From 00343a48d39d9ff74ceb662c5140048295f2610a Mon Sep 17 00:00:00 2001 From: TTrapper Date: Mon, 2 Oct 2017 17:51:09 -0300 Subject: [PATCH 0001/1801] sampled version of sparse_softmax_cross_entropy_with_logits --- tensorflow/python/ops/nn.py | 1 + tensorflow/python/ops/nn_impl.py | 98 ++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) diff --git a/tensorflow/python/ops/nn.py b/tensorflow/python/ops/nn.py index a80662c8b5..f7edace5b1 100644 --- a/tensorflow/python/ops/nn.py +++ b/tensorflow/python/ops/nn.py @@ -90,6 +90,7 @@ See the @{$python/nn} guide. @@in_top_k @@nce_loss @@sampled_softmax_loss +@@sampled_sparse_softmax_loss @@uniform_candidate_sampler @@log_uniform_candidate_sampler @@learned_unigram_candidate_sampler diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index db8e92831e..b2b57a055f 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -1258,3 +1258,101 @@ def sampled_softmax_loss(weights, labels=labels, logits=logits) # sampled_losses is a [batch_size] tensor. return sampled_losses + + +def sampled_sparse_softmax_loss(weights, + biases, + labels, + inputs, + num_sampled, + num_classes, + sampled_values=None, + remove_accidental_hits=True, + partition_strategy="mod", + name="sampled_sparse_softmax_loss"): + """Computes and returns the sampled sparse softmax training loss. + + This is a faster way to train a softmax classifier over a huge number of + classes. + + This operation is for training only. It is generally an underestimate of + the full softmax loss. + + A common use case is to use this method for training, and calculate the full + softmax loss for evaluation or inference. In this case, you must set + `partition_strategy="div"` for the two losses to be consistent, as in the + following example: + + ```python + if mode == "train": + loss = tf.nn.sampled_sparse_softmax_loss( + weights=weights, + biases=biases, + labels=labels, + inputs=inputs, + ..., + partition_strategy="div") + elif mode == "eval": + logits = tf.matmul(inputs, tf.transpose(weights)) + logits = tf.nn.bias_add(logits, biases) + labels_one_hot = tf.one_hot(labels, n_classes) + loss = tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=labels_one_hot, + logits=logits) + ``` + + See our [Candidate Sampling Algorithms Reference] + (https://www.tensorflow.org/extras/candidate_sampling.pdf) + + Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007) + ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math. + + Args: + weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor` + objects whose concatenation along dimension 0 has shape + [num_classes, dim]. The (possibly-sharded) class embeddings. + biases: A `Tensor` of shape `[num_classes]`. The class biases. + labels: A `Tensor` of type `int64` and shape `[batch_size, 1]`. + The index of the single target class for each row of logits. Note that + this format differs from the `labels` argument of + `nn.sparse_softmax_cross_entropy_with_logits`. + inputs: A `Tensor` of shape `[batch_size, dim]`. The forward + activations of the input network. + num_sampled: An `int`. The number of classes to randomly sample per batch. + num_classes: An `int`. The number of possible classes. + sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`, + `sampled_expected_count`) returned by a `*_candidate_sampler` function. + (if None, we default to `log_uniform_candidate_sampler`) + remove_accidental_hits: A `bool`. whether to remove "accidental hits" + where a sampled class equals one of the target classes. Default is + True. + partition_strategy: A string specifying the partitioning strategy, relevant + if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. + Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. + name: A name for the operation (optional). + + Returns: + A `batch_size` 1-D tensor of per-example sampled softmax losses. + + """ + logits, labels = _compute_sampled_logits( + weights=weights, + biases=biases, + labels=labels, + inputs=inputs, + num_sampled=num_sampled, + num_classes=num_classes, + num_true=1, + sampled_values=sampled_values, + subtract_log_q=True, + remove_accidental_hits=remove_accidental_hits, + partition_strategy=partition_strategy, + name=name) + + # labels returned by _compute_sampled_logits are one_hot. Convert to indices. + labels = array_ops.reshape(math_ops.argmax(labels, axis=1), [-1]) + + sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits( + labels=labels, logits=logits) + # sampled_losses is a [batch_size] tensor. + return sampled_losses -- GitLab From 499376eb38b6b5b991e330d87c91d879a6f7bbbe Mon Sep 17 00:00:00 2001 From: Daniyar Date: Mon, 2 Oct 2017 20:58:00 +0100 Subject: [PATCH 0002/1801] unpack for int64 tensors on gpu --- tensorflow/core/kernels/unpack_op.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tensorflow/core/kernels/unpack_op.cc b/tensorflow/core/kernels/unpack_op.cc index 7fd1def1fe..7ece912557 100644 --- a/tensorflow/core/kernels/unpack_op.cc +++ b/tensorflow/core/kernels/unpack_op.cc @@ -153,6 +153,12 @@ REGISTER_KERNEL_BUILDER(Name("Unpack") .HostMemory("output") .TypeConstraint("T"), UnpackOp); +REGISTER_KERNEL_BUILDER(Name("Unpack") + .Device(DEVICE_GPU) + .HostMemory("value") + .HostMemory("output") + .TypeConstraint("T"), + UnpackOp); #endif // GOOGLE_CUDA @@ -170,6 +176,12 @@ REGISTER_KERNEL_BUILDER(Name("Unpack") .HostMemory("output") .TypeConstraint("T"), UnpackOp); +REGISTER_KERNEL_BUILDER(Name("Unpack") + .Device(DEVICE_SYCL) + .HostMemory("value") + .HostMemory("output") + .TypeConstraint("T"), + UnpackOp); #undef REGISTER_SYCL #endif // TENSORFLOW_USE_SYCL -- GitLab From 7fe8a6decd3b1c077de5a3cdedff198195b16ee1 Mon Sep 17 00:00:00 2001 From: Daniyar Date: Thu, 5 Oct 2017 14:34:12 +0100 Subject: [PATCH 0003/1801] unstack op tests for dtypes --- .../python/kernel_tests/unstack_op_test.py | 37 ++++++++++++++----- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/kernel_tests/unstack_op_test.py b/tensorflow/python/kernel_tests/unstack_op_test.py index c2dcff978a..d937108599 100644 --- a/tensorflow/python/kernel_tests/unstack_op_test.py +++ b/tensorflow/python/kernel_tests/unstack_op_test.py @@ -22,6 +22,7 @@ import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.framework import constant_op +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.platform import test @@ -42,15 +43,33 @@ class UnstackOpTest(test.TestCase): np.random.seed(7) with self.test_session(use_gpu=True): for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): - data = np.random.randn(*shape) - # Convert data to a single tensorflow tensor - x = constant_op.constant(data) - # Unpack into a list of tensors - cs = array_ops.unstack(x, num=shape[0]) - self.assertEqual(type(cs), list) - self.assertEqual(len(cs), shape[0]) - cs = [c.eval() for c in cs] - self.assertAllEqual(cs, data) + for dtype in [np.bool, np.float16, np.float32, np.float64, np.int32, np.int64]: + data = np.random.randn(*shape).astype(dtype) + # Convert data to a single tensorflow tensor + x = constant_op.constant(data) + # Unpack into a list of tensors + cs = array_ops.unstack(x, num=shape[0]) + self.assertEqual(type(cs), list) + self.assertEqual(len(cs), shape[0]) + cs = [c.eval() for c in cs] + self.assertAllEqual(cs, data) + + def testSimpleGpu(self): + if not test_util.is_gpu_available(): + self.skipTest("No GPU available") + np.random.seed(7) + with self.test_session(use_gpu=True, force_gpu=True): + for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): + for dtype in [np.float16, np.float32, np.float64, np.int32, np.int64]: + data = np.random.randn(*shape).astype(dtype) + # Convert data to a single tensorflow tensor + x = constant_op.constant(data) + # Unpack into a list of tensors + cs = array_ops.unstack(x, num=shape[0]) + self.assertEqual(type(cs), list) + self.assertEqual(len(cs), shape[0]) + cs = [c.eval() for c in cs] + self.assertAllEqual(cs, data) def testGradientsAxis0(self): for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): -- GitLab From 03233a04cf07d639d8d2b5f3fbcab479b267ac4e Mon Sep 17 00:00:00 2001 From: TTrapper Date: Fri, 6 Oct 2017 00:21:08 -0300 Subject: [PATCH 0004/1801] Adressed reviewer comments: moved to contrib, fixed erroneous doc, modified _compute_sampled_logits to optionally return target indices --- tensorflow/contrib/nn/__init__.py | 1 + .../contrib/nn/python/ops/sampling_ops.py | 97 +++++++++++++ tensorflow/python/ops/nn.py | 1 - tensorflow/python/ops/nn_impl.py | 128 +++--------------- 4 files changed, 120 insertions(+), 107 deletions(-) diff --git a/tensorflow/contrib/nn/__init__.py b/tensorflow/contrib/nn/__init__.py index be0957f473..89b70ddfc2 100644 --- a/tensorflow/contrib/nn/__init__.py +++ b/tensorflow/contrib/nn/__init__.py @@ -19,6 +19,7 @@ @@deprecated_flipped_sparse_softmax_cross_entropy_with_logits @@deprecated_flipped_sigmoid_cross_entropy_with_logits @@rank_sampled_softmax_loss +@@sampled_sparse_softmax_loss """ from __future__ import absolute_import diff --git a/tensorflow/contrib/nn/python/ops/sampling_ops.py b/tensorflow/contrib/nn/python/ops/sampling_ops.py index 2ae529e015..b26da52f01 100644 --- a/tensorflow/contrib/nn/python/ops/sampling_ops.py +++ b/tensorflow/contrib/nn/python/ops/sampling_ops.py @@ -24,6 +24,8 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import embedding_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn +from tensorflow.python.ops import nn_impl +from tensorflow.python.ops import nn_ops def _rank_resample(weights, biases, inputs, sampled_values, num_resampled, @@ -240,3 +242,98 @@ def rank_sampled_softmax_loss(weights, remove_accidental_hits=remove_accidental_hits, partition_strategy=partition_strategy, name=name) + + +def sampled_sparse_softmax_loss(weights, + biases, + labels, + inputs, + num_sampled, + num_classes, + sampled_values=None, + remove_accidental_hits=True, + partition_strategy="mod", + name="sampled_sparse_softmax_loss"): + """Computes and returns the sampled sparse softmax training loss. + + This is a faster way to train a softmax classifier over a huge number of + classes. + + This operation is for training only. It is generally an underestimate of + the full softmax loss. + + A common use case is to use this method for training, and calculate the full + softmax loss for evaluation or inference. In this case, you must set + `partition_strategy="div"` for the two losses to be consistent, as in the + following example: + + ```python + if mode == "train": + loss = tf.nn.sampled_sparse_softmax_loss( + weights=weights, + biases=biases, + labels=labels, + inputs=inputs, + ..., + partition_strategy="div") + elif mode == "eval": + logits = tf.matmul(inputs, tf.transpose(weights)) + logits = tf.nn.bias_add(logits, biases) + loss = tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=tf.squeeze(labels), + logits=logits) + ``` + + See our [Candidate Sampling Algorithms Reference] + (https://www.tensorflow.org/extras/candidate_sampling.pdf) + + Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007) + ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math. + + Args: + weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor` + objects whose concatenation along dimension 0 has shape + [num_classes, dim]. The (possibly-sharded) class embeddings. + biases: A `Tensor` of shape `[num_classes]`. The class biases. + labels: A `Tensor` of type `int64` and shape `[batch_size, 1]`. + The index of the single target class for each row of logits. Note that + this format differs from the `labels` argument of + `nn.sparse_softmax_cross_entropy_with_logits`. + inputs: A `Tensor` of shape `[batch_size, dim]`. The forward + activations of the input network. + num_sampled: An `int`. The number of classes to randomly sample per batch. + num_classes: An `int`. The number of possible classes. + sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`, + `sampled_expected_count`) returned by a `*_candidate_sampler` function. + (if None, we default to `log_uniform_candidate_sampler`) + remove_accidental_hits: A `bool`. whether to remove "accidental hits" + where a sampled class equals one of the target classes. Default is + True. + partition_strategy: A string specifying the partitioning strategy, relevant + if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. + Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. + name: A name for the operation (optional). + + Returns: + A `batch_size` 1-D tensor of per-example sampled softmax losses. + + """ + logits, labels = nn_impl._compute_sampled_logits( + weights=weights, + biases=biases, + labels=labels, + inputs=inputs, + num_sampled=num_sampled, + num_classes=num_classes, + num_true=1, + sampled_values=sampled_values, + subtract_log_q=True, + remove_accidental_hits=remove_accidental_hits, + partition_strategy=partition_strategy, + labels_as_indices=True, + name=name) + + sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits( + labels=array_ops.squeeze(labels), logits=logits) + # sampled_losses is a [batch_size] tensor. + return sampled_losses diff --git a/tensorflow/python/ops/nn.py b/tensorflow/python/ops/nn.py index f7edace5b1..a80662c8b5 100644 --- a/tensorflow/python/ops/nn.py +++ b/tensorflow/python/ops/nn.py @@ -90,7 +90,6 @@ See the @{$python/nn} guide. @@in_top_k @@nce_loss @@sampled_softmax_loss -@@sampled_sparse_softmax_loss @@uniform_candidate_sampler @@log_uniform_candidate_sampler @@learned_unigram_candidate_sampler diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index b2b57a055f..ad18eedfb0 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -26,6 +26,7 @@ from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import candidate_sampling_ops from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import gen_array_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops @@ -893,6 +894,7 @@ def _compute_sampled_logits(weights, subtract_log_q=True, remove_accidental_hits=False, partition_strategy="mod", + labels_as_indices=False, name=None): """Helper function for nce_loss and sampled_softmax_loss functions. @@ -930,12 +932,18 @@ def _compute_sampled_logits(weights, partition_strategy: A string specifying the partitioning strategy, relevant if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. + labels_as_indices: A `bool`. Whether the returned labels represent the + indices of the true classes. Default is `False`. name: A name for the operation (optional). Returns: - out_logits, out_labels: `Tensor` objects each with shape + out_logits: `Tensor` object with shape `[batch_size, num_true + num_sampled]`, for passing to either `nn.sigmoid_cross_entropy_with_logits` (NCE) or `nn.softmax_cross_entropy_with_logits` (sampled softmax). + out_labels: If `labels_as_indices` is `False`, a Tensor object with the same + shape as `out_logits`. Otherwise a `Tensor` of shape + `[batch_size, num_true]` with the indices of the target classes for each + row of `out_logits`. """ if isinstance(weights, variables.PartitionedVariable): @@ -1046,13 +1054,19 @@ def _compute_sampled_logits(weights, # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat([true_logits, sampled_logits], 1) - # true_logits is a float tensor, ones_like(true_logits) is a float tensor - # of ones. We then divide by num_true to ensure the per-example labels sum - # to 1.0, i.e. form a proper probability distribution. - out_labels = array_ops.concat([ - array_ops.ones_like(true_logits) / num_true, - array_ops.zeros_like(sampled_logits) - ], 1) + if labels_as_indices: + # We want each row of labels to be the indices of the targets, which + # start at col 0 and end at col num_true-1. + out_labels = gen_array_ops.tile( + [math_ops.range(num_true)], [array_ops.shape(true_logits)[0], 1]) + else: + # true_logits is a float tensor, ones_like(true_logits) is a float + # tensor of ones. We then divide by num_true to ensure the per-example + # labels sum to 1.0, i.e. form a proper probability distribution. + out_labels = array_ops.concat([ + array_ops.ones_like(true_logits) / num_true, + array_ops.zeros_like(sampled_logits) + ], 1) return out_logits, out_labels @@ -1258,101 +1272,3 @@ def sampled_softmax_loss(weights, labels=labels, logits=logits) # sampled_losses is a [batch_size] tensor. return sampled_losses - - -def sampled_sparse_softmax_loss(weights, - biases, - labels, - inputs, - num_sampled, - num_classes, - sampled_values=None, - remove_accidental_hits=True, - partition_strategy="mod", - name="sampled_sparse_softmax_loss"): - """Computes and returns the sampled sparse softmax training loss. - - This is a faster way to train a softmax classifier over a huge number of - classes. - - This operation is for training only. It is generally an underestimate of - the full softmax loss. - - A common use case is to use this method for training, and calculate the full - softmax loss for evaluation or inference. In this case, you must set - `partition_strategy="div"` for the two losses to be consistent, as in the - following example: - - ```python - if mode == "train": - loss = tf.nn.sampled_sparse_softmax_loss( - weights=weights, - biases=biases, - labels=labels, - inputs=inputs, - ..., - partition_strategy="div") - elif mode == "eval": - logits = tf.matmul(inputs, tf.transpose(weights)) - logits = tf.nn.bias_add(logits, biases) - labels_one_hot = tf.one_hot(labels, n_classes) - loss = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=labels_one_hot, - logits=logits) - ``` - - See our [Candidate Sampling Algorithms Reference] - (https://www.tensorflow.org/extras/candidate_sampling.pdf) - - Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007) - ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math. - - Args: - weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor` - objects whose concatenation along dimension 0 has shape - [num_classes, dim]. The (possibly-sharded) class embeddings. - biases: A `Tensor` of shape `[num_classes]`. The class biases. - labels: A `Tensor` of type `int64` and shape `[batch_size, 1]`. - The index of the single target class for each row of logits. Note that - this format differs from the `labels` argument of - `nn.sparse_softmax_cross_entropy_with_logits`. - inputs: A `Tensor` of shape `[batch_size, dim]`. The forward - activations of the input network. - num_sampled: An `int`. The number of classes to randomly sample per batch. - num_classes: An `int`. The number of possible classes. - sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`, - `sampled_expected_count`) returned by a `*_candidate_sampler` function. - (if None, we default to `log_uniform_candidate_sampler`) - remove_accidental_hits: A `bool`. whether to remove "accidental hits" - where a sampled class equals one of the target classes. Default is - True. - partition_strategy: A string specifying the partitioning strategy, relevant - if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. - Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. - name: A name for the operation (optional). - - Returns: - A `batch_size` 1-D tensor of per-example sampled softmax losses. - - """ - logits, labels = _compute_sampled_logits( - weights=weights, - biases=biases, - labels=labels, - inputs=inputs, - num_sampled=num_sampled, - num_classes=num_classes, - num_true=1, - sampled_values=sampled_values, - subtract_log_q=True, - remove_accidental_hits=remove_accidental_hits, - partition_strategy=partition_strategy, - name=name) - - # labels returned by _compute_sampled_logits are one_hot. Convert to indices. - labels = array_ops.reshape(math_ops.argmax(labels, axis=1), [-1]) - - sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - # sampled_losses is a [batch_size] tensor. - return sampled_losses -- GitLab From 7680d8d00dec8897b64ea864da71537b7be957de Mon Sep 17 00:00:00 2001 From: TTrapper Date: Fri, 6 Oct 2017 00:47:54 -0300 Subject: [PATCH 0005/1801] checkstyle fix --- tensorflow/python/ops/nn_impl.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index ad18eedfb0..8e64259143 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -1055,18 +1055,18 @@ def _compute_sampled_logits(weights, # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat([true_logits, sampled_logits], 1) if labels_as_indices: - # We want each row of labels to be the indices of the targets, which - # start at col 0 and end at col num_true-1. - out_labels = gen_array_ops.tile( - [math_ops.range(num_true)], [array_ops.shape(true_logits)[0], 1]) + # We want each row of labels to be the indices of the targets, which + # start at col 0 and end at col num_true-1. + out_labels = gen_array_ops.tile( + [math_ops.range(num_true)], [array_ops.shape(true_logits)[0], 1]) else: - # true_logits is a float tensor, ones_like(true_logits) is a float - # tensor of ones. We then divide by num_true to ensure the per-example - # labels sum to 1.0, i.e. form a proper probability distribution. - out_labels = array_ops.concat([ - array_ops.ones_like(true_logits) / num_true, - array_ops.zeros_like(sampled_logits) - ], 1) + # true_logits is a float tensor, ones_like(true_logits) is a float + # tensor of ones. We then divide by num_true to ensure the per-example + # labels sum to 1.0, i.e. form a proper probability distribution. + out_labels = array_ops.concat([ + array_ops.ones_like(true_logits) / num_true, + array_ops.zeros_like(sampled_logits) + ], 1) return out_logits, out_labels -- GitLab From f300bcbb3419e7ad7130a84d5375ae53d92e1568 Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Sun, 22 Oct 2017 21:36:25 +0800 Subject: [PATCH 0006/1801] Propagate -DPCRE_STATIC from pcre.BUILD to swig.BUILD To fix a build error on Windows: ERROR: C:/os/t/external/swig/BUILD.bazel:5:1: Linking of rule '@swig//:swig' failed (Exit 1120): link.exe failed: error executing command misc.o : error LNK2019: unresolved external symbol __imp_pcre_compile referenced in function Swig_string_regex ... --- third_party/pcre.BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/pcre.BUILD b/third_party/pcre.BUILD index 68aadd1d40..e2cdec4029 100644 --- a/third_party/pcre.BUILD +++ b/third_party/pcre.BUILD @@ -50,12 +50,12 @@ cc_library( "-DNEWLINE=10", "-DNO_RECURSE", "-DPARENS_NEST_LIMIT=50", - "-DPCRE_STATIC=1", "-DPOSIX_MALLOC_THRESHOLD=10", "-DSTDC_HEADERS=1", "-DSUPPORT_UCP", "-DSUPPORT_UTF", ], + defines = ["PCRE_STATIC=1"], includes = ["."], visibility = ["@swig//:__pkg__"], # Please use RE2 alwayslink = 1, -- GitLab From 40fc0cb0258352b5d00f25bab55a6991b06b959b Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 5 Nov 2017 14:52:42 +0000 Subject: [PATCH 0007/1801] Fix issue in the `Defun` docs This fix fixes a couple of typos in the `Defun` docs: `tf.Constant` -> `tf.constant` Signed-off-by: Yong Tang --- tensorflow/python/framework/function.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py index cef3f8d4c4..f55ee5b1e1 100644 --- a/tensorflow/python/framework/function.py +++ b/tensorflow/python/framework/function.py @@ -82,8 +82,8 @@ class Defun(object): return x + y, x - y # Building the graph. - a = tf.Constant([1.0]) - b = tf.Constant([2.0]) + a = tf.constant([1.0]) + b = tf.constant([2.0]) c, d = MyFunc(a, b, name='mycall') ``` """ -- GitLab From b739ee1734baf437f1925ea09d07647ec7602caf Mon Sep 17 00:00:00 2001 From: Viraj Date: Fri, 27 Oct 2017 20:38:45 -0700 Subject: [PATCH 0008/1801] svd backpropagation when full_matrices is False --- tensorflow/python/kernel_tests/svd_op_test.py | 32 +++++---- tensorflow/python/ops/linalg_grad.py | 69 +++++++++++-------- 2 files changed, 60 insertions(+), 41 deletions(-) diff --git a/tensorflow/python/kernel_tests/svd_op_test.py b/tensorflow/python/kernel_tests/svd_op_test.py index 9871eacb03..d20567bf0e 100644 --- a/tensorflow/python/kernel_tests/svd_op_test.py +++ b/tensorflow/python/kernel_tests/svd_op_test.py @@ -190,10 +190,11 @@ class SvdGradOpTest(test.TestCase): pass # Filled in below -def _GetSvdGradOpTest(dtype_, shape_, compute_uv_): +def _GetSvdGradOpTest(dtype_, shape_, compute_uv_, full_matrices_): def _NormalizingSvd(tf_a): - tf_s, tf_u, tf_v = linalg_ops.svd(tf_a, compute_uv=True, full_matrices=True) + tf_s, tf_u, tf_v = linalg_ops.svd( + tf_a, compute_uv=True, full_matrices=full_matrices_) # Singular vectors are only unique up to an arbitrary phase. We normalize # the vectors such that the first component of u (if m >=n) or v (if n > m) # have phase 0. @@ -270,17 +271,20 @@ if __name__ == "__main__": _GetSvdOpTest(dtype, shape, use_static_shape, compute_uv, full_matrices)) for compute_uv in False, True: - dtypes = ([np.float32, np.float64] + [np.complex64, np.complex128] * - (not compute_uv)) - for dtype in dtypes: - mat_shapes = ([(10, 11), (11, 10), - (11, 11)] + [(5, 11), (11, 5)] * (not compute_uv)) - for mat_shape in mat_shapes: - for batch_dims in [(), (3,)]: - shape = batch_dims + mat_shape - name = "%s_%s_compute_uv_%s" % (dtype.__name__, - "_".join(map(str, shape)), compute_uv) - _AddTest(SvdGradOpTest, "SvdGrad", name, - _GetSvdGradOpTest(dtype, shape, compute_uv)) + for full_matrices in False, True: + dtypes = ([np.float32, np.float64] + + [np.complex64, np.complex128] * (not compute_uv)) + for dtype in dtypes: + mat_shapes = [(10, 11), (11, 10), (11, 11)] + if not full_matrices or not compute_uv: + mat_shapes += [(5, 11), (11, 5)] + for mat_shape in mat_shapes: + for batch_dims in [(), (3,)]: + shape = batch_dims + mat_shape + name = "%s_%s_compute_uv_%s_full_%s" % ( + dtype.__name__, "_".join(map(str, shape)), compute_uv, + full_matrices) + _AddTest(SvdGradOpTest, "SvdGrad", name, + _GetSvdGradOpTest(dtype, shape, compute_uv, full_matrices)) test.main() diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py index 8a76fe3ce5..1316ed5b54 100644 --- a/tensorflow/python/ops/linalg_grad.py +++ b/tensorflow/python/ops/linalg_grad.py @@ -268,13 +268,14 @@ def _SelfAdjointEigV2Grad(op, grad_e, grad_v): @ops.RegisterGradient("Svd") def _SvdGrad(op, grad_s, grad_u, grad_v): - """Gradient for Svd based on Giles' algorithm. Reference at top of file.""" - - if op.get_attr("compute_uv") and not op.get_attr("full_matrices"): - raise NotImplementedError( - "SVD gradient is not implemented for compute_uv=True and " - "full_matrices=False.") - + """Gradient for the singular value decomposition + + The derivation for the compute_uv=False case, and most of + the derivation for the full_matrices=True case, are in + Giles' paper (see reference at top of file). A derivation for + the full_matrices=False case is available at + https://j-towns.github.io/papers/svd-derivative.pdf + """ a = op.inputs[0] a_shape = a.get_shape().with_rank_at_least(2) @@ -300,7 +301,7 @@ def _SvdGrad(op, grad_s, grad_u, grad_v): "SVD gradient has not been implemented for input with unknown " "inner matrix shape.") - if not op.get_attr("full_matrices") or not op.get_attr("compute_uv"): + if not op.get_attr("compute_uv"): s, u, v = linalg_ops.svd(a, compute_uv=True, full_matrices=True) else: s = op.outputs[0] @@ -334,9 +335,10 @@ def _SvdGrad(op, grad_s, grad_u, grad_v): # multiple singular values with value zero. I am not sure if this is a true # instability or if it simply throws off the finite difference gradient # checker. - if abs(m - n) > 1: + if op.get_attr("full_matrices") and abs(m - n) > 1: raise NotImplementedError( - "svd gradient is not implemented for abs(m - n) > 1") + "svd gradient is not implemented for abs(m - n) > 1 " + "when full_matrices is True") s_mat = array_ops.matrix_diag(s) s2 = math_ops.square(s) @@ -352,32 +354,45 @@ def _SvdGrad(op, grad_s, grad_u, grad_v): array_ops.expand_dims(s2, -2) - array_ops.expand_dims(s2, -1)), array_ops.zeros_like(s)) s_inv_mat = array_ops.matrix_diag(math_ops.reciprocal(s)) + + v1 = v[..., :, :m] + grad_v1 = grad_v[..., :, :m] + u_gu = math_ops.matmul(u, grad_u, adjoint_a=True) - v_gv = math_ops.matmul(v, grad_v, adjoint_a=True) + v_gv = math_ops.matmul(v1, grad_v1, adjoint_a=True) - if m == n: - f_u = f * u_gu - f_v = f * v_gv - else: - dv2 = array_ops.matrix_transpose(v_gv[..., m:n, :m]) - v_gv[..., :m, m:n] - f_u = f * u_gu - f_v = f * v_gv[..., :m, :m] + f_u = f * u_gu + f_v = f * v_gv - grad_a_nouv = ( + term1_nouv = ( grad_s_mat + math_ops.matmul(f_u + _linalg.adjoint(f_u), s_mat) + math_ops.matmul(s_mat, f_v + _linalg.adjoint(f_v))) - if m != n: - grad_a_nouv = array_ops.concat( - [grad_a_nouv, math_ops.matmul(s_inv_mat, dv2)], -1) + term1 = math_ops.matmul(u, math_ops.matmul(term1_nouv, v1, adjoint_b=True)) + + if m == n: + grad_a_before_transpose = term1 + else: + proj_v1_perp = (linalg_ops.eye(n, dtype=v.dtype) + - math_ops.matmul(v1, v1, adjoint_b=True)) + term2_nous = math_ops.matmul(grad_v1, proj_v1_perp, adjoint_a=True) + + if op.get_attr("full_matrices"): + v2 = v[..., :, m:n] + grad_v2 = grad_v[..., :, m:n] + + v1t_gv2 = math_ops.matmul(v1, grad_v2, adjoint_a=True) + term2_nous -= math_ops.matmul(v1t_gv2, v2, adjoint_b=True) + + u_s_inv = math_ops.matmul(u, s_inv_mat) + term2 = math_ops.matmul(u_s_inv, term2_nous) + + grad_a_before_transpose = term1 + term2 if use_adjoint: - # Use (U X V^H)^H = V (U X)^H. - grad_a = math_ops.matmul( - v, math_ops.matmul(u, grad_a_nouv), adjoint_b=True) + grad_a = array_ops.matrix_transpose(grad_a_before_transpose) else: - grad_a = math_ops.matmul(u, - math_ops.matmul(grad_a_nouv, v, adjoint_b=True)) + grad_a = grad_a_before_transpose grad_a.set_shape(a_shape) return grad_a -- GitLab From 1a94310a14d073fbc80d55b211a85e47a2f9c9c6 Mon Sep 17 00:00:00 2001 From: dariavel Date: Thu, 26 Oct 2017 17:06:00 +0300 Subject: [PATCH 0009/1801] Add connectivity check Ping on each channel and count send+recv completions Signed-off-by: dariavel --- tensorflow/contrib/verbs/rdma.cc | 18 ++-- tensorflow/contrib/verbs/rdma.h | 1 + tensorflow/contrib/verbs/rdma_mgr.cc | 93 ++++++++++++++++++++ tensorflow/contrib/verbs/rdma_mgr.h | 7 +- tensorflow/contrib/verbs/verbs_server_lib.cc | 5 +- 5 files changed, 111 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc index 331943a3ef..d99cb34661 100644 --- a/tensorflow/contrib/verbs/rdma.cc +++ b/tensorflow/contrib/verbs/rdma.cc @@ -147,7 +147,7 @@ ibv_device* set_device() { // check validity of input device CHECK(false) << "The device " << env_p_rdma_device << " wasn't found"; } else { - // set default device + // set default device str_port_num = get_env_var("RDMA_DEVICE_PORT"); CHECK(str_port_num.empty()) << "RDMA_DEVICE should be provided if RDMA_DEVICE_PORT is set by user"; @@ -177,7 +177,7 @@ ibv_device* set_device() { // Returns: // port to use uint8_t set_port(ibv_context* context) { - uint8_t port_num = 0; //0 is illegal port number + uint8_t port_num = 0; // 0 is illegal port number string str_port_num; ibv_device_attr device_att; ibv_port_attr port_attr; @@ -419,9 +419,6 @@ RdmaAdapter::RdmaAdapter(const WorkerEnv* worker_env) 0); CHECK(cq_) << "Failed to create completion queue"; CHECK(!ibv_req_notify_cq(cq_, 0)) << "Failed to request CQ notification"; - polling_thread_.reset(Env::Default()->StartThread( - ThreadOptions(), "RdmaAdapterCQThread", [this] { Process_CQ(); })); - VLOG(2) << "Start RdmaAdapter: " << name(); } RdmaAdapter::~RdmaAdapter() { @@ -433,6 +430,12 @@ RdmaAdapter::~RdmaAdapter() { CHECK(!ibv_close_device(context_)) << "Failed to release context"; } +void RdmaAdapter::StartPolling() { + polling_thread_.reset(Env::Default()->StartThread( + ThreadOptions(), "RdmaAdapterCQThread", [this] { Process_CQ(); })); + VLOG(2) << "Start RdmaAdapter: " << name(); +} + string RdmaAdapter::name() const { return string(context_->device->name); } // Function to process incoming messages @@ -633,11 +636,6 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, buffer_index_name_table_.insert({index, buffer_names[i]}); buffer_name_index_table_.insert({buffer_names[i], index}); } - - // Initiate recv - for (int i = 0; i < 100; i++) { - Recv(); - } } } diff --git a/tensorflow/contrib/verbs/rdma.h b/tensorflow/contrib/verbs/rdma.h index 52d92a7c5b..2e128961b6 100644 --- a/tensorflow/contrib/verbs/rdma.h +++ b/tensorflow/contrib/verbs/rdma.h @@ -107,6 +107,7 @@ class RdmaAdapter { ~RdmaAdapter(); // Adapter name, e.g. mlx5_0. string name() const; + void StartPolling(); void Process_CQ(); protected: diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc index 09b878843f..b3b3c4f31d 100644 --- a/tensorflow/contrib/verbs/rdma_mgr.cc +++ b/tensorflow/contrib/verbs/rdma_mgr.cc @@ -115,6 +115,99 @@ void RdmaMgr::SetupChannels() { } } +#define PING_RECV_WRID 0 +#define PING_BUFF_SIZE 1024 + +int RdmaMgr::PostRecv(RdmaChannel* rc, struct ibv_sge list) { + struct ibv_recv_wr wr, *bad_wr; + memset(&wr, 0, sizeof(wr)); + wr.sg_list = &list; + wr.num_sge = 1; + wr.wr_id = PING_RECV_WRID; + + return ibv_post_recv(rc->qp_, &wr, &bad_wr); +} + +int RdmaMgr::PostSend(RdmaChannel* rc, struct ibv_sge list) { + struct ibv_send_wr wr, *bad_wr; + memset(&wr, 0, sizeof(wr)); + wr.wr_id = (uint64_t)rc; + wr.sg_list = &list; + wr.num_sge = 1; + wr.opcode = IBV_WR_SEND; + wr.send_flags = IBV_SEND_SIGNALED; + + return ibv_post_send(rc->qp_, &wr, &bad_wr); +} + +// Check connectivity by pinging every channel +bool RdmaMgr::ConnectivityCheck() { + int i, rcnt = 0, scnt = 0; + void* buff; + struct ibv_sge list; + buff = malloc(PING_BUFF_SIZE); + CHECK(buff) << "Malloc failed!"; + struct ibv_mr* mr = ibv_reg_mr(rdma_adapter_->pd_, buff, PING_BUFF_SIZE, + IBV_ACCESS_LOCAL_WRITE); + CHECK(mr) << "Failed to register memory region"; + + memset(&list, 0, sizeof(list)); + list.addr = (uintptr_t)buff; + list.length = PING_BUFF_SIZE; + list.lkey = mr->lkey; + + for (const auto& p : channel_table_) { + string worker_name = p.first; + RdmaChannel* rc = p.second; + + VLOG(2) << "Ping to " << worker_name; + CHECK(PostRecv(rc, list) == 0) << "Couldn't post receive from " + << worker_name << " with error " + << std::strerror(errno); + CHECK(PostSend(rc, list) == 0) << "Couldn't post send to " << worker_name + << " with error: " << std::strerror(errno); + for (int i = 0; i < 100; i++) { + rc->Recv(); + } + } + + while (rcnt < num_remote_workers_ || scnt < num_remote_workers_) { + int ne; + do { + ne = ibv_poll_cq(rdma_adapter_->cq_, 2 * num_remote_workers_, + rdma_adapter_->wc_); + CHECK(ne >= 0) << "poll CQ failed " << ne << "with error" + << std::strerror(errno); + } while (ne < 1); + + for (i = 0; i < ne; ++i) { + ibv_wc_status s = rdma_adapter_->wc_[i].status; + // recv complete + if ((int)rdma_adapter_->wc_[i].wr_id == PING_RECV_WRID) { + CHECK(s == IBV_WC_SUCCESS) << ": " << ibv_wc_status_str( + rdma_adapter_->wc_[i].status) + << "(" << rdma_adapter_->wc_[i].status + << ") for PING_RECV_WRID"; + ++rcnt; + // send complete + } else { + RdmaChannel* rc = + reinterpret_cast(rdma_adapter_->wc_[i].wr_id); + CHECK(s == IBV_WC_SUCCESS) << ": " << ibv_wc_status_str( + rdma_adapter_->wc_[i].status) + << "(" << rdma_adapter_->wc_[i].status + << ") to " << rc->remote_name_; + ++scnt; + } + } // for + } // while + CHECK(rcnt == scnt) << "Connectivity check failed!"; + ibv_dereg_mr(mr); + free(buff); + rdma_adapter_->StartPolling(); + return (num_remote_workers_ == rcnt) && (num_remote_workers_ == scnt); +} + RdmaMgr::~RdmaMgr() { for (const auto& p : channel_table_) delete p.second; channel_table_.clear(); diff --git a/tensorflow/contrib/verbs/rdma_mgr.h b/tensorflow/contrib/verbs/rdma_mgr.h index b156f64096..4ace70ba57 100644 --- a/tensorflow/contrib/verbs/rdma_mgr.h +++ b/tensorflow/contrib/verbs/rdma_mgr.h @@ -28,12 +28,16 @@ limitations under the License. namespace tensorflow { class RdmaMgr { + friend class RdmaChannel; + friend class RdmaAdapter; + public: explicit RdmaMgr(const WorkerEnv* const worker_env, GrpcChannelCache* const channel_cache); ~RdmaMgr(); RdmaChannel* FindChannel(const string& key); void SetupChannels(); + bool ConnectivityCheck(); const string& local_worker() { return local_worker_; } private: @@ -44,7 +48,8 @@ class RdmaMgr { RdmaAdapter* rdma_adapter_; typedef std::unordered_map ChannelTable; ChannelTable channel_table_; - + int PostSend(RdmaChannel* rc, struct ibv_sge list); + int PostRecv(RdmaChannel* rc, struct ibv_sge list); TF_DISALLOW_COPY_AND_ASSIGN(RdmaMgr); }; diff --git a/tensorflow/contrib/verbs/verbs_server_lib.cc b/tensorflow/contrib/verbs/verbs_server_lib.cc index 6d1c79c0fb..a606ef75a4 100644 --- a/tensorflow/contrib/verbs/verbs_server_lib.cc +++ b/tensorflow/contrib/verbs/verbs_server_lib.cc @@ -49,8 +49,8 @@ VerbsServer::~VerbsServer() { Status VerbsServer::ChannelCacheFactory(const ServerDef& server_def, GrpcChannelCache** channel_cache) { string name_prefix = - strings::StrCat("/job:", server_def.job_name(), "/replica:0", - "/task:", server_def.task_index()); + strings::StrCat("/job:", server_def.job_name(), "/replica:0", "/task:", + server_def.task_index()); GrpcChannelSpec channel_spec; TF_RETURN_IF_ERROR(ParseChannelSpec(server_def, &channel_spec)); @@ -103,6 +103,7 @@ Status VerbsServer::Start() { ThreadOptions(), "TF_verbs_service", [this] { verbs_service_->HandleRPCsLoop(); })); rdma_mgr_->SetupChannels(); + CHECK(rdma_mgr_->ConnectivityCheck()) << "Connectivity check failed!"; verbs_state_ = CONNECTED; } } -- GitLab From 734237891314132631bdd8adf03b8d7827f9c4ae Mon Sep 17 00:00:00 2001 From: dariavel Date: Tue, 31 Oct 2017 14:11:14 +0200 Subject: [PATCH 0010/1801] Move PostSend and PostRecv from mgr to channel, PostRecv upon channel creation before connectivity check Signed-off-by: dariavel --- tensorflow/contrib/verbs/rdma.cc | 39 ++++++++++++++++++++++++ tensorflow/contrib/verbs/rdma.h | 9 ++++++ tensorflow/contrib/verbs/rdma_mgr.cc | 45 ++-------------------------- tensorflow/contrib/verbs/rdma_mgr.h | 2 -- 4 files changed, 50 insertions(+), 45 deletions(-) diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc index d99cb34661..55a8f20c29 100644 --- a/tensorflow/contrib/verbs/rdma.cc +++ b/tensorflow/contrib/verbs/rdma.cc @@ -561,9 +561,44 @@ void RdmaAdapter::Process_CQ() { } } +int RdmaChannel::PingPostRecv() { + struct ibv_recv_wr wr, *bad_wr; + memset(&wr, 0, sizeof(wr)); + wr.sg_list = &ping_sge_list_; + wr.num_sge = 1; + wr.wr_id = PingRecvWrid; + + return ibv_post_recv(qp_, &wr, &bad_wr); +} + +int RdmaChannel::PingPostSend() { + struct ibv_send_wr wr, *bad_wr; + memset(&wr, 0, sizeof(wr)); + wr.wr_id = (uint64_t)this; + wr.sg_list = &ping_sge_list_; + wr.num_sge = 1; + wr.opcode = IBV_WR_SEND; + wr.send_flags = IBV_SEND_SIGNALED; + + return ibv_post_send(qp_, &wr, &bad_wr); +} + RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, const string remote_name) : adapter_(adapter), local_name_(local_name), remote_name_(remote_name) { + + struct ibv_sge list; + + mr_ = ibv_reg_mr(adapter_->pd_, ping_buff_, PingBuffSize, + IBV_ACCESS_LOCAL_WRITE); + CHECK(mr_) << "Failed to register memory region"; + + memset(&list, 0, sizeof(list)); + list.addr = (uintptr_t)ping_buff_; + list.length = PingBuffSize; + list.lkey = mr_->lkey; + + ping_sge_list_ = list; // Create queue pair { struct ibv_qp_init_attr attr; @@ -637,9 +672,13 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, buffer_name_index_table_.insert({buffer_names[i], index}); } } + CHECK(PingPostRecv() == 0) << "Couldn't post receive from " + << remote_name_ << " with error " + << std::strerror(errno); } RdmaChannel::~RdmaChannel() { + ibv_dereg_mr(mr_); CHECK(!ibv_destroy_qp(qp_)) << "Failed to destroy QP"; delete tx_message_buffer_; delete rx_message_buffer_; diff --git a/tensorflow/contrib/verbs/rdma.h b/tensorflow/contrib/verbs/rdma.h index 2e128961b6..92391d6a57 100644 --- a/tensorflow/contrib/verbs/rdma.h +++ b/tensorflow/contrib/verbs/rdma.h @@ -162,6 +162,15 @@ class RdmaChannel { void RemoveRecvCallback(const string& key); void RunRecvCallback(const string& key); static const int kNumMessageBuffers = 4; + static const int PingRecvWrid = 0; + + private: + static const int PingBuffSize = 1024; + char ping_buff_[PingBuffSize]; + struct ibv_mr* mr_; + struct ibv_sge ping_sge_list_; + int PingPostRecv(); + int PingPostSend(); protected: const RdmaAdapter* adapter_; diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc index b3b3c4f31d..3e2171f33d 100644 --- a/tensorflow/contrib/verbs/rdma_mgr.cc +++ b/tensorflow/contrib/verbs/rdma_mgr.cc @@ -115,56 +115,17 @@ void RdmaMgr::SetupChannels() { } } -#define PING_RECV_WRID 0 -#define PING_BUFF_SIZE 1024 - -int RdmaMgr::PostRecv(RdmaChannel* rc, struct ibv_sge list) { - struct ibv_recv_wr wr, *bad_wr; - memset(&wr, 0, sizeof(wr)); - wr.sg_list = &list; - wr.num_sge = 1; - wr.wr_id = PING_RECV_WRID; - - return ibv_post_recv(rc->qp_, &wr, &bad_wr); -} - -int RdmaMgr::PostSend(RdmaChannel* rc, struct ibv_sge list) { - struct ibv_send_wr wr, *bad_wr; - memset(&wr, 0, sizeof(wr)); - wr.wr_id = (uint64_t)rc; - wr.sg_list = &list; - wr.num_sge = 1; - wr.opcode = IBV_WR_SEND; - wr.send_flags = IBV_SEND_SIGNALED; - - return ibv_post_send(rc->qp_, &wr, &bad_wr); -} // Check connectivity by pinging every channel bool RdmaMgr::ConnectivityCheck() { int i, rcnt = 0, scnt = 0; - void* buff; - struct ibv_sge list; - buff = malloc(PING_BUFF_SIZE); - CHECK(buff) << "Malloc failed!"; - struct ibv_mr* mr = ibv_reg_mr(rdma_adapter_->pd_, buff, PING_BUFF_SIZE, - IBV_ACCESS_LOCAL_WRITE); - CHECK(mr) << "Failed to register memory region"; - - memset(&list, 0, sizeof(list)); - list.addr = (uintptr_t)buff; - list.length = PING_BUFF_SIZE; - list.lkey = mr->lkey; for (const auto& p : channel_table_) { string worker_name = p.first; RdmaChannel* rc = p.second; VLOG(2) << "Ping to " << worker_name; - CHECK(PostRecv(rc, list) == 0) << "Couldn't post receive from " - << worker_name << " with error " - << std::strerror(errno); - CHECK(PostSend(rc, list) == 0) << "Couldn't post send to " << worker_name + CHECK(rc->PingPostSend() == 0) << "Couldn't post send to " << worker_name << " with error: " << std::strerror(errno); for (int i = 0; i < 100; i++) { rc->Recv(); @@ -183,7 +144,7 @@ bool RdmaMgr::ConnectivityCheck() { for (i = 0; i < ne; ++i) { ibv_wc_status s = rdma_adapter_->wc_[i].status; // recv complete - if ((int)rdma_adapter_->wc_[i].wr_id == PING_RECV_WRID) { + if ((int)rdma_adapter_->wc_[i].wr_id == RdmaChannel::PingRecvWrid) { CHECK(s == IBV_WC_SUCCESS) << ": " << ibv_wc_status_str( rdma_adapter_->wc_[i].status) << "(" << rdma_adapter_->wc_[i].status @@ -202,8 +163,6 @@ bool RdmaMgr::ConnectivityCheck() { } // for } // while CHECK(rcnt == scnt) << "Connectivity check failed!"; - ibv_dereg_mr(mr); - free(buff); rdma_adapter_->StartPolling(); return (num_remote_workers_ == rcnt) && (num_remote_workers_ == scnt); } diff --git a/tensorflow/contrib/verbs/rdma_mgr.h b/tensorflow/contrib/verbs/rdma_mgr.h index 4ace70ba57..e711e60478 100644 --- a/tensorflow/contrib/verbs/rdma_mgr.h +++ b/tensorflow/contrib/verbs/rdma_mgr.h @@ -48,8 +48,6 @@ class RdmaMgr { RdmaAdapter* rdma_adapter_; typedef std::unordered_map ChannelTable; ChannelTable channel_table_; - int PostSend(RdmaChannel* rc, struct ibv_sge list); - int PostRecv(RdmaChannel* rc, struct ibv_sge list); TF_DISALLOW_COPY_AND_ASSIGN(RdmaMgr); }; -- GitLab From 097d536c02d5e9f8ab0c2269161343471c2a00fe Mon Sep 17 00:00:00 2001 From: dariavel Date: Mon, 9 Oct 2017 15:54:32 +0300 Subject: [PATCH 0011/1801] Call done in case of not OK status fix + light code refactoring Signed-off-by: dariavel --- .../contrib/verbs/rdma_rendezvous_mgr.cc | 40 +++++++------------ 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc index ce82ca2883..2bfa81c2ae 100644 --- a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc +++ b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc @@ -58,20 +58,13 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync( // parse src_name and dst_name string src_name, dst_name, unused; if (!DeviceNameUtils::SplitDeviceName(parsed.src_device, &src_name, + &unused) || + !DeviceNameUtils::SplitDeviceName(parsed.dst_device, &dst_name, &unused)) { s = errors::Internal("Could not parse src name."); } - CHECK(s.ok()) << "s is not ok, error code " << s.error_message(); - if (!s.ok()) { - done(s, Args(), recv_args, Tensor{}, false); - return; - } - if (!DeviceNameUtils::SplitDeviceName(parsed.dst_device, &dst_name, - &unused)) { - s = errors::Internal("Could not parse dst name."); - } - CHECK(s.ok()) << "s is not ok, error code " << s.error_message(); if (!s.ok()) { + LOG(ERROR) << "s is not ok, error code " << s.error_message(); done(s, Args(), recv_args, Tensor{}, false); return; } @@ -82,18 +75,13 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync( // insert callback rc->InsertRecvCallback(key_with_step_id, [this, key, key_with_step_id, rc, recv_args, parsed, done]() { - Status s; - Device* src_dev; - s = env_->device_mgr->LookupDevice("CPU:0", &src_dev); - CHECK(s.ok()) << "s is not ok, error code " << s.error_message(); - if (!s.ok()) { - done(s, Args(), recv_args, Tensor(), true); - return; - } - Device* dst_dev; - s = env_->device_mgr->LookupDevice(parsed.dst_device, &dst_dev); - CHECK(s.ok()) << "s is not ok, error code " << s.error_message(); - if (!s.ok()) { + Status src_s, dst_s, s; + Device* src_dev, *dst_dev; + src_s = env_->device_mgr->LookupDevice("CPU:0", &src_dev); + dst_s = env_->device_mgr->LookupDevice(parsed.dst_device, &dst_dev); + if (!src_s.ok() || !dst_s.ok()) { + s = src_s.ok() ? dst_s : src_s; + LOG(ERROR) << "s is not ok, error code " << s.error_message(); done(s, Args(), recv_args, Tensor(), true); return; } @@ -111,8 +99,8 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync( if (dst_dev->tensorflow_gpu_device_info() && (!recv_args.alloc_attrs.on_host())) { CHECK(recv_args.device_context) - << "send dev name: " << src_dev->name() - << " gpu_info: " << src_dev->tensorflow_gpu_device_info(); + << "send dev name: " << src_dev->name() + << " gpu_info: " << src_dev->tensorflow_gpu_device_info(); Allocator* alloc = ProcessState::singleton()->GetCUDAHostAllocator(0); Tensor copy(alloc, rm.data_type_, rm.tensor_shape_); memcpy(DMAHelper::base(©), input, rm.tensor_bytes_); @@ -122,8 +110,8 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync( GPUUtil::CopyCPUTensorToGPU( ©, recv_args.device_context, dst_dev, &gpu_copy, - [this, gpu_copy, key, key_with_step_id, recv_args, done, rm, - rc](const Status& s) { + [this, gpu_copy, key, key_with_step_id, recv_args, done, rm, rc]( + const Status& s) { CHECK(s.ok()) << "copy tensor to gpu sync"; Tensor val; val = std::move(gpu_copy); -- GitLab From 7edaa93308d7b4d03dd32c009c1ffe5847b9a8b8 Mon Sep 17 00:00:00 2001 From: Noa Ezra Date: Wed, 18 Oct 2017 10:25:04 +0300 Subject: [PATCH 0012/1801] fix compilation error when working without cuda Signed-off-by: Noa Ezra adding cuda library to BUILD file in order to use GOOGLE_CUDA define Signed-off-by: Noa Ezra --- tensorflow/contrib/verbs/BUILD | 6 ++++-- tensorflow/contrib/verbs/rdma.cc | 5 +++++ tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc | 4 ++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/verbs/BUILD b/tensorflow/contrib/verbs/BUILD index 746ff38b37..8b38fc1e85 100644 --- a/tensorflow/contrib/verbs/BUILD +++ b/tensorflow/contrib/verbs/BUILD @@ -7,6 +7,8 @@ package(default_visibility = [ licenses(["notice"]) # Apache 2.0 +load("//tensorflow:tensorflow.bzl","tf_cuda_library") + exports_files(["LICENSE"]) filegroup( @@ -97,7 +99,7 @@ cc_library( alwayslink = 1, ) -cc_library( +tf_cuda_library( name = "rdma_rendezvous_mgr", srcs = ["rdma_rendezvous_mgr.cc"], hdrs = ["rdma_rendezvous_mgr.h"], @@ -130,7 +132,7 @@ cc_library( ], ) -cc_library( +tf_cuda_library( name = "rdma", srcs = ["rdma.cc"], hdrs = ["rdma.h"], diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc index 55a8f20c29..79c6c1ab07 100644 --- a/tensorflow/contrib/verbs/rdma.cc +++ b/tensorflow/contrib/verbs/rdma.cc @@ -21,8 +21,10 @@ limitations under the License. #include "tensorflow/contrib/verbs/verbs_util.h" #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/dma_helper.h" +#if GOOGLE_CUDA #include "tensorflow/core/common_runtime/gpu/gpu_util.h" #include "tensorflow/core/common_runtime/gpu/process_state.h" +#endif #include "tensorflow/core/distributed_runtime/rendezvous_mgr_interface.h" #include "tensorflow/core/distributed_runtime/session_mgr.h" #include "tensorflow/core/framework/rendezvous.h" @@ -31,6 +33,7 @@ limitations under the License. #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/lib/random/random.h" +#include "tensorflow/core/lib/core/threadpool.h" namespace tensorflow { @@ -1063,6 +1066,7 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback( TensorProto proto; if (src_dev->tensorflow_gpu_device_info() && (!send_args.alloc_attrs.on_host())) { +#if GOOGLE_CUDA CHECK(send_args.device_context) << "send dev name: " << src_dev->name() << " gpu_info: " << src_dev->tensorflow_gpu_device_info(); @@ -1101,6 +1105,7 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback( &proto, NULL, send_args, recv_args); }); } +#endif // GOOGLE_CUDA } else { // tensor is in CPU memory. StringPiece copy_buf; diff --git a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc index 2bfa81c2ae..dbb3d25f45 100644 --- a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc +++ b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc @@ -21,8 +21,10 @@ limitations under the License. #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/dma_helper.h" +#if GOOGLE_CUDA #include "tensorflow/core/common_runtime/gpu/gpu_util.h" #include "tensorflow/core/common_runtime/gpu/process_state.h" +#endif // GOOGLE_CUDA #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" @@ -98,6 +100,7 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync( if (can_memcpy) { if (dst_dev->tensorflow_gpu_device_info() && (!recv_args.alloc_attrs.on_host())) { +#if GOOGLE_CUDA CHECK(recv_args.device_context) << "send dev name: " << src_dev->name() << " gpu_info: " << src_dev->tensorflow_gpu_device_info(); @@ -118,6 +121,7 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync( RecvPostCopyOps(key, key_with_step_id, recv_args, done, rm, rc, val, s); }); +#endif // GOOGLE_CUDA return; } else { AllocatorAttributes host_alloc_attrs; -- GitLab From d7dce09a100e29f63f2ac20740a061c9aaf27654 Mon Sep 17 00:00:00 2001 From: dariavel Date: Mon, 6 Nov 2017 11:38:15 +0200 Subject: [PATCH 0013/1801] Replace hardcoded 100 with RDMA_QP_QUEUE_DEPTH Signed-off-by: dariavel --- tensorflow/contrib/verbs/rdma_mgr.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc index 3e2171f33d..8d26e022d0 100644 --- a/tensorflow/contrib/verbs/rdma_mgr.cc +++ b/tensorflow/contrib/verbs/rdma_mgr.cc @@ -127,7 +127,7 @@ bool RdmaMgr::ConnectivityCheck() { VLOG(2) << "Ping to " << worker_name; CHECK(rc->PingPostSend() == 0) << "Couldn't post send to " << worker_name << " with error: " << std::strerror(errno); - for (int i = 0; i < 100; i++) { + for (i = 0; i < rc->adapter_->params_.queue_depth - 1; i++) { rc->Recv(); } } -- GitLab From d6b267ac78fcb6a3250c24d466e8aa478c1fc783 Mon Sep 17 00:00:00 2001 From: dariavel Date: Tue, 7 Nov 2017 11:57:50 +0200 Subject: [PATCH 0014/1801] Clang formating Signed-off-by: dariavel --- tensorflow/contrib/verbs/BUILD | 2 +- tensorflow/contrib/verbs/rdma.cc | 9 ++++----- tensorflow/contrib/verbs/rdma_mgr.cc | 1 - 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/verbs/BUILD b/tensorflow/contrib/verbs/BUILD index 8b38fc1e85..38a84ffb10 100644 --- a/tensorflow/contrib/verbs/BUILD +++ b/tensorflow/contrib/verbs/BUILD @@ -7,7 +7,7 @@ package(default_visibility = [ licenses(["notice"]) # Apache 2.0 -load("//tensorflow:tensorflow.bzl","tf_cuda_library") +load("//tensorflow:tensorflow.bzl", "tf_cuda_library") exports_files(["LICENSE"]) diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc index 79c6c1ab07..1fa98a1f01 100644 --- a/tensorflow/contrib/verbs/rdma.cc +++ b/tensorflow/contrib/verbs/rdma.cc @@ -577,7 +577,7 @@ int RdmaChannel::PingPostRecv() { int RdmaChannel::PingPostSend() { struct ibv_send_wr wr, *bad_wr; memset(&wr, 0, sizeof(wr)); - wr.wr_id = (uint64_t)this; + wr.wr_id = (uint64_t) this; wr.sg_list = &ping_sge_list_; wr.num_sge = 1; wr.opcode = IBV_WR_SEND; @@ -593,7 +593,7 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, struct ibv_sge list; mr_ = ibv_reg_mr(adapter_->pd_, ping_buff_, PingBuffSize, - IBV_ACCESS_LOCAL_WRITE); + IBV_ACCESS_LOCAL_WRITE); CHECK(mr_) << "Failed to register memory region"; memset(&list, 0, sizeof(list)); @@ -675,9 +675,8 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, buffer_name_index_table_.insert({buffer_names[i], index}); } } - CHECK(PingPostRecv() == 0) << "Couldn't post receive from " - << remote_name_ << " with error " - << std::strerror(errno); + CHECK(PingPostRecv() == 0) << "Couldn't post receive from " << remote_name_ + << " with error " << std::strerror(errno); } RdmaChannel::~RdmaChannel() { diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc index 8d26e022d0..e7df0528b5 100644 --- a/tensorflow/contrib/verbs/rdma_mgr.cc +++ b/tensorflow/contrib/verbs/rdma_mgr.cc @@ -115,7 +115,6 @@ void RdmaMgr::SetupChannels() { } } - // Check connectivity by pinging every channel bool RdmaMgr::ConnectivityCheck() { int i, rcnt = 0, scnt = 0; -- GitLab From 1dec49ebb0e076a2ebb513a3f3aaa725714330db Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Tue, 7 Nov 2017 16:18:26 -0800 Subject: [PATCH 0015/1801] Automated g4 rollback of changelist 174708213 PiperOrigin-RevId: 174930262 --- tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 060b3f9129..5a3b831429 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -66,7 +66,7 @@ _CROSS_REPLICA_SUM_OP = 'CrossReplicaSum' _RESERVED_PARAMS_KEYS = [_BATCH_SIZE_KEY] # TODO(b/65703635): Flip the value and remove all dead code. -_WRAP_INPUT_FN_INTO_WHILE_LOOP = True +_WRAP_INPUT_FN_INTO_WHILE_LOOP = False def _create_global_step(graph): -- GitLab From 72e0355c498f6f4531ffdb9c40997cad40684da5 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Tue, 7 Nov 2017 16:24:37 -0800 Subject: [PATCH 0016/1801] Added profiler traces for GPU back-end operations. PiperOrigin-RevId: 174931093 --- .../compiler/xla/service/gpu/gpu_compiler.cc | 18 +++++++++++++----- .../gpu/llvm_gpu_backend/gpu_backend_lib.cc | 4 ++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index ceb0e530c1..187b4a705c 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -75,6 +75,7 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" #include "tensorflow/core/platform/subprocess.h" +#include "tensorflow/core/platform/tracing.h" namespace se = ::perftools::gputools; @@ -87,6 +88,7 @@ namespace gpu { namespace { +using tensorflow::port::Tracing; using tensorflow::strings::StrCat; // Any address of a variable residing in global memory or returned by one of the @@ -231,6 +233,7 @@ tensorflow::Status PrepareHloModuleForIrEmitting( // code (i.e. a cubin) as a byte array. StatusOr> CompilePtx(const string& ptx, int cc_major, int cc_minor) { + Tracing::TraceMe annotation("Compile PTX", /*is_expensive=*/true); const string ptxas_path = tensorflow::io::JoinPath(tensorflow::CudaRoot(), "bin", "ptxas"); VLOG(2) << "Using ptxas at " << ptxas_path; @@ -295,11 +298,15 @@ StatusOr> GpuCompiler::Compile( std::unique_ptr module, se::StreamExecutor* stream_exec) { TF_RET_CHECK(stream_exec != nullptr); - TF_RETURN_IF_ERROR(OptimizeHloModule(module.get(), - stream_exec->GetDeviceDescription(), - ShapeSizeBytesFunction())); - TF_RETURN_IF_ERROR( - PrepareHloModuleForIrEmitting(module.get(), ShapeSizeBytesFunction())); + { + Tracing::TraceMe annotation("HLO Transforms", module->name(), + /*is_expensive=*/true); + TF_RETURN_IF_ERROR(OptimizeHloModule(module.get(), + stream_exec->GetDeviceDescription(), + ShapeSizeBytesFunction())); + TF_RETURN_IF_ERROR( + PrepareHloModuleForIrEmitting(module.get(), ShapeSizeBytesFunction())); + } llvm::LLVMContext llvm_context; std::string buffer; @@ -444,6 +451,7 @@ StatusOr> GpuCompiler::Compile( std::vector GpuCompiler::CompilePtxOrGetCachedResult(const string& ptx, int cc_major, int cc_minor) { + Tracing::TraceMe annotation("PTX->CUBIN", /*is_expensive=*/true); bool inserted; decltype(compilation_cache_.begin()) iter; // Pointers into compilation_cache_ where the ptx and (optional) cubin are diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc index 817e95a31c..1cb963be61 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc @@ -60,6 +60,7 @@ limitations under the License. #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/tracing.h" namespace xla { namespace gpu { @@ -488,6 +489,9 @@ StatusOr CompileToPtx(llvm::Module* module, string ptx; { + tensorflow::port::Tracing::TraceMe annotation( + "Compiling IR", llvm_ir::AsString(module->getName()), + /*is_expensive=*/true); ScopedLoggingTimer compilation_timer( "Compile module " + llvm_ir::AsString(module->getName()), /*vlog_level=*/2); -- GitLab From 2815673bcc5db2aa246083dc2fe08b0cc95711c4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 7 Nov 2017 16:29:35 -0800 Subject: [PATCH 0017/1801] [tf.data] Saveable iterator for dataset.take(.), dataset.skip(.) and dataset.repeat(.). PiperOrigin-RevId: 174931742 --- .../contrib/data/python/kernel_tests/BUILD | 2 + .../dataset_serialization_test_base.py | 73 +++++++++++------ .../kernel_tests/sequence_dataset_op_test.py | 78 +++++++++++++++++++ tensorflow/core/kernels/repeat_dataset_op.cc | 32 ++++++++ tensorflow/core/kernels/skip_dataset_op.cc | 46 ++++++++++- tensorflow/core/kernels/take_dataset_op.cc | 46 ++++++++++- 6 files changed, 245 insertions(+), 32 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 7283f0ff0a..c1f1d90c5d 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -365,6 +365,7 @@ py_test( srcs = ["sequence_dataset_op_test.py"], srcs_version = "PY2AND3", deps = [ + ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -428,6 +429,7 @@ py_test( srcs = ["zip_dataset_op_test.py"], srcs_version = "PY2AND3", deps = [ + ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/contrib/data/python/ops:iterator_ops", "//tensorflow/python:array_ops", diff --git a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py index df9147af6c..369b789a52 100644 --- a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py +++ b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py @@ -32,7 +32,7 @@ from tensorflow.python.util import nest class DatasetSerializationTestBase(test.TestCase): - """Base class for testing finite serializable datasets.""" + """Base class for testing serializable datasets.""" def tearDown(self): self._delete_ckpt() @@ -58,17 +58,19 @@ class DatasetSerializationTestBase(test.TestCase): if ds_fn2: self.verify_restore_in_modified_graph(ds_fn1, ds_fn2, num_outputs) - def verify_unused_iterator(self, ds_fn, num_outputs): + def verify_unused_iterator(self, ds_fn, num_outputs, verify_exhausted=True): """Verifies that saving and restoring an unused iterator works. Args: ds_fn: See `run_core_tests`. num_outputs: See `run_core_tests`. + verify_exhausted: See `gen_outputs`. Raises: AssertionError if any test fails. """ - self.verify_run_with_breaks(ds_fn, [0], num_outputs) + self.verify_run_with_breaks( + ds_fn, [0], num_outputs, verify_exhausted=verify_exhausted) def verify_fully_used_iterator(self, ds_fn, num_outputs): """Verifies that saving and restoring a fully used iterator works. @@ -104,12 +106,16 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn, [], 0, ckpt_saved=True, verify_exhausted=True) self.assertEqual(len(actual), 0) - def verify_init_before_restore(self, ds_fn, num_outputs): + def verify_init_before_restore(self, + ds_fn, + num_outputs, + verify_exhausted=True): """Verifies that retoring into an already initilized iterator works. Args: ds_fn: See `run_core_tests`. num_outputs: See `run_core_tests`. + verify_exhausted: See `gen_outputs`. Raises: AssertionError if any test fails. @@ -118,9 +124,14 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn, self.gen_break_points(num_outputs), num_outputs, - init_before_restore=True) + init_before_restore=True, + verify_exhausted=verify_exhausted) - def verify_multiple_breaks(self, ds_fn, num_outputs, num_breaks=10): + def verify_multiple_breaks(self, + ds_fn, + num_outputs, + num_breaks=10, + verify_exhausted=True): """Attempts to save/restore at multiple break points. Args: @@ -128,16 +139,22 @@ class DatasetSerializationTestBase(test.TestCase): num_outputs: See `run_core_tests`. num_breaks: The number of break points. These are uniformly spread in [0, num_outputs] both inclusive. + verify_exhausted: See `gen_outputs`. Raises: AssertionError if any test fails. """ - self.verify_run_with_breaks(ds_fn, - self.gen_break_points(num_outputs, num_breaks), - num_outputs) + self.verify_run_with_breaks( + ds_fn, + self.gen_break_points(num_outputs), + num_outputs, + verify_exhausted=verify_exhausted) - def verify_reset_restored_iterator(self, ds_fn, num_outputs, - break_point=None): + def verify_reset_restored_iterator(self, + ds_fn, + num_outputs, + break_point=None, + verify_exhausted=True): """Attempts to re-initialize a restored iterator. This is useful when restoring a training checkpoint during validation. @@ -146,6 +163,7 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn: See `run_core_tests`. num_outputs: See `run_core_tests`. break_point: Break point. Optional. Defaults to num_outputs/2. + verify_exhausted: See `gen_outputs`. Raises: AssertionError if any test fails. @@ -153,7 +171,8 @@ class DatasetSerializationTestBase(test.TestCase): break_point = num_outputs // 2 if not break_point else break_point # Collect ground truth containing all outputs. - expected = self.gen_outputs(ds_fn, [], num_outputs, verify_exhausted=True) + expected = self.gen_outputs( + ds_fn, [], num_outputs, verify_exhausted=verify_exhausted) # Skip some items and save checkpoint. self.gen_outputs(ds_fn, [], break_point, verify_exhausted=False) @@ -168,15 +187,17 @@ class DatasetSerializationTestBase(test.TestCase): sess.run(init_op) for _ in range(num_outputs): actual.append(sess.run(get_next_op)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) + if verify_exhausted: + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) self.match(expected, actual) def verify_restore_in_modified_graph(self, ds_fn1, ds_fn2, num_outputs, - break_point=None): + break_point=None, + verify_exhausted=True): """Attempts to restore an iterator in a modified graph. Builds an input pipeline using ds_fn1, runs it for `break_point` steps @@ -188,6 +209,7 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn2: See `run_core_tests`. num_outputs: See `run_core_tests`. break_point: Break point. Optional. Defaults to num_outputs/2. + verify_exhausted: See `gen_outputs`. Raises: AssertionError if any test fails. @@ -196,15 +218,15 @@ class DatasetSerializationTestBase(test.TestCase): # Skip `break_point` items and store the remaining produced from ds_fn1 # in `expected`. - self.gen_outputs(ds_fn1, [], break_point) + self.gen_outputs(ds_fn1, [], break_point, verify_exhausted=False) expected = self.gen_outputs( ds_fn1, [], num_outputs - break_point, ckpt_saved=True, - verify_exhausted=True) + verify_exhausted=verify_exhausted) # Generate `break_point` items from ds_fn1 and save checkpoint. - self.gen_outputs(ds_fn1, [], break_point) + self.gen_outputs(ds_fn1, [], break_point, verify_exhausted=False) actual = [] # Build graph for ds_fn2 but load checkpoint for ds_fn1. @@ -214,8 +236,9 @@ class DatasetSerializationTestBase(test.TestCase): self._restore(saver, sess) for _ in range(num_outputs - break_point): actual.append(sess.run(get_next_op)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) + if verify_exhausted: + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) self.match(expected, actual) @@ -223,6 +246,7 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn, break_points, num_outputs, + verify_exhausted=True, init_before_restore=False): """Verifies that ds_fn() produces the same outputs with and without breaks. @@ -237,6 +261,7 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn: See `gen_outputs`. break_points: See `gen_outputs`. num_outputs: See `gen_outputs`. + verify_exhausted: See `gen_outputs`. init_before_restore: See `gen_outputs`. Raises: @@ -245,13 +270,13 @@ class DatasetSerializationTestBase(test.TestCase): expected = self.gen_outputs( ds_fn, [], num_outputs, - verify_exhausted=True, + verify_exhausted=verify_exhausted, init_before_restore=init_before_restore) actual = self.gen_outputs( ds_fn, break_points, num_outputs, - verify_exhausted=True, + verify_exhausted=verify_exhausted, init_before_restore=init_before_restore) self.match(expected, actual) @@ -261,7 +286,7 @@ class DatasetSerializationTestBase(test.TestCase): num_outputs, ckpt_saved=False, init_before_restore=False, - verify_exhausted=False): + verify_exhausted=True): """Generates elements from input dataset while stopping at break points. Produces `num_outputs` outputs and saves the state of the iterator in the @@ -285,7 +310,7 @@ class DatasetSerializationTestBase(test.TestCase): after producing `num_outputs` elements. Returns: - A list if `num_outputs` items. + A list of `num_outputs` items. """ outputs = [] diff --git a/tensorflow/contrib/data/python/kernel_tests/sequence_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/sequence_dataset_op_test.py index 91615e9f62..1a26da82e5 100644 --- a/tensorflow/contrib/data/python/kernel_tests/sequence_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/sequence_dataset_op_test.py @@ -19,6 +19,7 @@ from __future__ import print_function import numpy as np +from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base from tensorflow.contrib.data.python.ops import dataset_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -207,5 +208,82 @@ class SequenceDatasetTest(test.TestCase): sess.run(get_next) +class SequenceDatasetSerializationTest( + dataset_serialization_test_base.DatasetSerializationTestBase): + + def _build_skip_dataset(self, count): + components = (np.arange(10),) + return dataset_ops.Dataset.from_tensor_slices(components).skip(count) + + def testSkipFewerThanInputs(self): + count = 4 + num_outputs = 10 - count + self.run_core_tests(lambda: self._build_skip_dataset(count), + lambda: self._build_skip_dataset(count + 2), + num_outputs) + + def testSkipVarious(self): + # Skip more than inputs + self.run_core_tests(lambda: self._build_skip_dataset(20), None, 0) + # Skip exactly the input size + self.run_core_tests(lambda: self._build_skip_dataset(10), None, 0) + self.run_core_tests(lambda: self._build_skip_dataset(-1), None, 0) + # Skip nothing + self.run_core_tests(lambda: self._build_skip_dataset(0), None, 10) + + def _build_take_dataset(self, count): + components = (np.arange(10),) + return dataset_ops.Dataset.from_tensor_slices(components).take(count) + + def testTakeFewerThanInputs(self): + count = 4 + self.run_core_tests( + lambda: self._build_take_dataset(count), + lambda: self._build_take_dataset(count + 2), + count, + ) + + def testTakeVarious(self): + # Take more than inputs + self.run_core_tests(lambda: self._build_take_dataset(20), None, 10) + # Take exactly the input size + self.run_core_tests(lambda: self._build_take_dataset(10), None, 10) + # Take all + self.run_core_tests(lambda: self._build_take_dataset(-1), None, 10) + # Take nothing + self.run_core_tests(lambda: self._build_take_dataset(0), None, 0) + + def _build_repeat_dataset(self, count, take_count=3): + components = (np.arange(10),) + return dataset_ops.Dataset.from_tensor_slices(components).take( + take_count).repeat(count) + + def testFiniteRepeat(self): + count = 10 + self.run_core_tests(lambda: self._build_repeat_dataset(count), + lambda: self._build_repeat_dataset(count + 2), + 3 * count) + + def testEmptyRepeat(self): + self.run_core_tests(lambda: self._build_repeat_dataset(0), None, 0) + + def testInfiniteRepeat(self): + self.verify_unused_iterator( + lambda: self._build_repeat_dataset(-1), 10, verify_exhausted=False) + self.verify_init_before_restore( + lambda: self._build_repeat_dataset(-1), 10, verify_exhausted=False) + self.verify_multiple_breaks( + lambda: self._build_repeat_dataset(-1), 20, verify_exhausted=False) + self.verify_reset_restored_iterator( + lambda: self._build_repeat_dataset(-1), 20, verify_exhausted=False) + self.verify_restore_in_modified_graph( + lambda: self._build_repeat_dataset(-1), + lambda: self._build_repeat_dataset(2), + 20, + verify_exhausted=False) + # Test repeat empty dataset + self.run_core_tests(lambda: self._build_repeat_dataset(-1, 0), None, 0) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/core/kernels/repeat_dataset_op.cc b/tensorflow/core/kernels/repeat_dataset_op.cc index 9813e99a70..6c0f4118e6 100644 --- a/tensorflow/core/kernels/repeat_dataset_op.cc +++ b/tensorflow/core/kernels/repeat_dataset_op.cc @@ -95,6 +95,15 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { *end_of_sequence = true; return Status::OK(); } + + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + return Status::OK(); + } + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + return Status::OK(); + } }; class FiniteIterator : public DatasetIterator { @@ -183,6 +192,29 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { } while (true); } + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + if (input_impl_) + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + else + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("uninitialized"), "")); + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + if (reader->Contains(full_name("uninitialized"))) { + input_impl_.reset(); + } else { + input_impl_ = dataset()->input_->MakeIterator(prefix()); + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + } + return Status::OK(); + } + private: mutex mu_; std::unique_ptr input_impl_ GUARDED_BY(mu_); diff --git a/tensorflow/core/kernels/skip_dataset_op.cc b/tensorflow/core/kernels/skip_dataset_op.cc index 52a6116a7c..05152db1ae 100644 --- a/tensorflow/core/kernels/skip_dataset_op.cc +++ b/tensorflow/core/kernels/skip_dataset_op.cc @@ -35,14 +35,14 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { int64 count; OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "count", &count)); - *output = new Dataset(count, input); + *output = new Dataset(ctx, count, input); } private: - class Dataset : public DatasetBase { + class Dataset : public GraphDatasetBase { public: - Dataset(int64 count, const DatasetBase* input) - : count_(count), input_(input) { + Dataset(OpKernelContext* ctx, int64 count, const DatasetBase* input) + : GraphDatasetBase(ctx), count_(count), input_(input) { input_->Ref(); } @@ -71,6 +71,18 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { string DebugString() override { return "SkipDatasetOp::Dataset"; } + protected: + Status AsGraphDefInternal(DatasetGraphDefBuilder* b, + Node** output) const override { + Node* input_graph_node = nullptr; + TF_RETURN_IF_ERROR(b->AddParentDataset(input_, &input_graph_node)); + Node* count = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(count_, &count)); + TF_RETURN_IF_ERROR( + b->AddDataset(this, {input_graph_node, count}, output)); + return Status::OK(); + } + private: class EmptyIterator : public DatasetIterator { public: @@ -82,6 +94,16 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { *end_of_sequence = true; return Status::OK(); } + + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + return Status::OK(); + } }; class FiniteIterator : public DatasetIterator { @@ -119,6 +141,22 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("i"), i_)); + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("i"), &i_)); + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + return Status::OK(); + } + private: mutex mu_; int64 i_ GUARDED_BY(mu_); diff --git a/tensorflow/core/kernels/take_dataset_op.cc b/tensorflow/core/kernels/take_dataset_op.cc index c3f33d663c..f9f675abda 100644 --- a/tensorflow/core/kernels/take_dataset_op.cc +++ b/tensorflow/core/kernels/take_dataset_op.cc @@ -35,14 +35,14 @@ class TakeDatasetOp : public UnaryDatasetOpKernel { // Create a new TakeDatasetOp::Dataset, and return it as the output. int64 count; OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "count", &count)); - *output = new Dataset(count, input); + *output = new Dataset(ctx, count, input); } private: - class Dataset : public DatasetBase { + class Dataset : public GraphDatasetBase { public: - Dataset(int64 count, const DatasetBase* input) - : count_(count), input_(input) { + Dataset(OpKernelContext* ctx, int64 count, const DatasetBase* input) + : GraphDatasetBase(ctx), count_(count), input_(input) { input_->Ref(); } @@ -72,6 +72,18 @@ class TakeDatasetOp : public UnaryDatasetOpKernel { string DebugString() override { return "TakeDatasetOp::Dataset"; } + protected: + Status AsGraphDefInternal(DatasetGraphDefBuilder* b, + Node** output) const override { + Node* input_graph_node = nullptr; + TF_RETURN_IF_ERROR(b->AddParentDataset(input_, &input_graph_node)); + Node* count = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(count_, &count)); + TF_RETURN_IF_ERROR( + b->AddDataset(this, {input_graph_node, count}, output)); + return Status::OK(); + } + private: class EmptyIterator : public DatasetIterator { public: @@ -83,6 +95,16 @@ class TakeDatasetOp : public UnaryDatasetOpKernel { *end_of_sequence = true; return Status::OK(); } + + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + return Status::OK(); + } }; class FiniteIterator : public DatasetIterator { @@ -110,6 +132,22 @@ class TakeDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("i"), i_)); + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("i"), &i_)); + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + return Status::OK(); + } + private: mutex mu_; int64 i_ GUARDED_BY(mu_); -- GitLab From 71e279c0c567c700fd02ba7b0a7481b1c1462227 Mon Sep 17 00:00:00 2001 From: Sergio Guadarrama Date: Tue, 7 Nov 2017 16:30:33 -0800 Subject: [PATCH 0018/1801] Allow passing other global_steps to summaries. PiperOrigin-RevId: 174931874 --- tensorflow/contrib/summary/summary_ops.py | 43 +++++++++++-------- .../contrib/summary/summary_ops_test.py | 12 ++++++ 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py index 56e3198593..9238671c4a 100644 --- a/tensorflow/contrib/summary/summary_ops.py +++ b/tensorflow/contrib/summary/summary_ops.py @@ -57,12 +57,14 @@ def should_record_summaries(): # TODO(apassos) consider how to handle local step here. @tf_contextlib.contextmanager -def record_summaries_every_n_global_steps(n): +def record_summaries_every_n_global_steps(n, global_step=None): """Sets the should_record_summaries Tensor to true if global_step % n == 0.""" + if global_step is None: + global_step = training_util.get_global_step() collection_ref = ops.get_collection_ref(_SHOULD_RECORD_SUMMARIES_NAME) old = collection_ref[:] with ops.device("cpu:0"): - collection_ref[:] = [math_ops.equal(training_util.get_global_step() % n, 0)] + collection_ref[:] = [math_ops.equal(global_step % n, 0)] yield collection_ref[:] = old @@ -204,68 +206,75 @@ def summary_writer_function(name, tensor, function, family=None): return op -def generic(name, tensor, metadata, family=None): +def generic(name, tensor, metadata, family=None, global_step=None): """Writes a tensor summary if possible.""" - + if global_step is None: + global_step = training_util.get_global_step() def function(tag, scope): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_summary( context.context().summary_writer_resource, - training_util.get_global_step(), array_ops.identity(tensor), + global_step, array_ops.identity(tensor), tag, metadata, name=scope) return summary_writer_function(name, tensor, function, family=family) -def scalar(name, tensor, family=None): +def scalar(name, tensor, family=None, global_step=None): """Writes a scalar summary if possible.""" - + if global_step is None: + global_step = training_util.get_global_step() def function(tag, scope): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_scalar_summary( context.context().summary_writer_resource, - training_util.get_global_step(), tag, array_ops.identity(tensor), + global_step, tag, array_ops.identity(tensor), name=scope) return summary_writer_function(name, tensor, function, family=family) -def histogram(name, tensor, family=None): +def histogram(name, tensor, family=None, global_step=None): """Writes a histogram summary if possible.""" - + if global_step is None: + global_step = training_util.get_global_step() def function(tag, scope): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_histogram_summary( context.context().summary_writer_resource, - training_util.get_global_step(), tag, array_ops.identity(tensor), + global_step, tag, array_ops.identity(tensor), name=scope) return summary_writer_function(name, tensor, function, family=family) -def image(name, tensor, bad_color=None, max_images=3, family=None): +def image(name, tensor, bad_color=None, max_images=3, family=None, + global_step=None): """Writes an image summary if possible.""" - + if global_step is None: + global_step = training_util.get_global_step() def function(tag, scope): bad_color_ = (constant_op.constant([255, 0, 0, 255], dtype=dtypes.uint8) if bad_color is None else bad_color) # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_image_summary( context.context().summary_writer_resource, - training_util.get_global_step(), tag, array_ops.identity(tensor), + global_step, tag, array_ops.identity(tensor), bad_color_, max_images, name=scope) return summary_writer_function(name, tensor, function, family=family) -def audio(name, tensor, sample_rate, max_outputs, family=None): +def audio(name, tensor, sample_rate, max_outputs, family=None, + global_step=None): """Writes an audio summary if possible.""" - + if global_step is None: + global_step = training_util.get_global_step() def function(tag, scope): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_audio_summary( context.context().summary_writer_resource, - training_util.get_global_step(), + global_step, tag, array_ops.identity(tensor), sample_rate=sample_rate, diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py index de7ae6ec27..466e194096 100644 --- a/tensorflow/contrib/summary/summary_ops_test.py +++ b/tensorflow/contrib/summary/summary_ops_test.py @@ -86,6 +86,18 @@ class TargetTest(test_util.TensorFlowTestCase): self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'scalar') + def testSummaryGlobalStep(self): + global_step = training_util.get_or_create_global_step() + logdir = tempfile.mkdtemp() + with summary_ops.create_summary_file_writer( + logdir, max_queue=0, + name='t2').as_default(), summary_ops.always_record_summaries(): + + summary_ops.scalar('scalar', 2.0, global_step=global_step) + + events = summary_test_util.events_from_file(logdir) + self.assertEqual(len(events), 2) + self.assertEqual(events[1].summary.value[0].tag, 'scalar') if __name__ == '__main__': test.main() -- GitLab From c8530b907a686b92c94d13f854dc504fa10901db Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Tue, 7 Nov 2017 16:50:18 -0800 Subject: [PATCH 0019/1801] tfe.Network naming under variable scopes. Networks take on the full prefix of their parent variable scopes. Fixes #14164. PiperOrigin-RevId: 174934769 --- tensorflow/contrib/eager/python/network.py | 63 ++++++---- .../contrib/eager/python/network_test.py | 108 ++++++++++++++++-- tensorflow/python/layers/base.py | 15 ++- 3 files changed, 148 insertions(+), 38 deletions(-) diff --git a/tensorflow/contrib/eager/python/network.py b/tensorflow/contrib/eager/python/network.py index c6e628b074..1a5c6e8aec 100644 --- a/tensorflow/contrib/eager/python/network.py +++ b/tensorflow/contrib/eager/python/network.py @@ -244,6 +244,12 @@ class Network(base.Layer): self._owned_layers = {} # The scope to use if we end up without a parent. self._default_parent_variable_scope = variable_scope.get_variable_scope() + # Hold on to the variable scope counts from init to check whether a scope + # with the name we want was ever created in our parent scope. Without this + # check we might have name collisions if the parent scope on init gets + # closed before build is called. + self._variable_scope_counts_on_init = ( + variable_scope._get_default_variable_store().variable_scopes_count) self._custom_getter, self._deferred_restorations = ( _make_custom_getter_for_deferred_restorations()) @@ -261,18 +267,29 @@ class Network(base.Layer): def _finalize_name(self, parent_network): if not self._name: - if not parent_network: - name_uid_map = base._get_default_graph_uid_map() - else: - name_uid_map = parent_network._sub_layer_name_uids # Were were not passed a name explicitly (or it was blank), so this is an # anonymous Network. We make up a unique name. if parent_network: avoid_names = parent_network._owned_layers + name_uid_map = parent_network._sub_layer_name_uids else: - avoid_names = None + name_uid_map = base._get_default_graph_uid_map() + # Figure out which names we have to avoid based on which variable scope + # we're nested in. + strip_name = self._default_parent_variable_scope.name + if strip_name: + strip_name += "/" + def _strip_on_init_scope(name): + if name.startswith(strip_name): + return name[len(strip_name):] + else: + return None + avoid_names = set( + _strip_on_init_scope(name) + for name in self._variable_scope_counts_on_init.keys() if name) self._name, self._base_name = self._make_unique_name( - name_uid_map=name_uid_map, avoid_names=avoid_names) + name_uid_map=name_uid_map, avoid_names=avoid_names, + namespace=self._default_parent_variable_scope.name) if self._first_parent is None or (self._first_parent # False = no parent and self._first_parent() is None): # Save a pointer to the parent Network so that we can later check that the @@ -302,7 +319,13 @@ class Network(base.Layer): parent_scope = first_parent._scope else: parent_scope = self._default_parent_variable_scope - with variable_scope.variable_scope(parent_scope): + with variable_scope.variable_scope(parent_scope) as parent_vs: + expected_scope_name = parent_vs.name + "/" + self._name + if expected_scope_name in self._variable_scope_counts_on_init: + raise ValueError( + ("A Network named '%s' already exists (or a variable_scope was " + "created with this name). Names must be unique.") % ( + self._name,)) # Make sure variables with this prefix will be unique. with variable_scope.variable_scope( None, use_resource=True, default_name=self._name) as scope: @@ -319,25 +342,22 @@ class Network(base.Layer): "created with this name). Names must be unique.") % ( self._name,)) if (first_parent - and scope_prefix[:-1] != first_parent._scope.name): + and scope_prefix[:-1] != first_parent.scope_name): raise ValueError( ("Network variable names must match a nesting of sub-Network " "names. Expected prefix '%s' from parent network, but got " "'%s' when attempting to create a variable_scope for Network " "'%s'. Likely an explicit variable_scope was inserted into " "the nesting.") % ( - first_parent._scope.name, + first_parent.scope_name, scope_prefix[:-1], self._name)) elif not first_parent and scope_prefix: # For the case when this Network is not nested inside any other - # Network, but is in a variable_scope. This is an error for now. - raise ValueError( - "Creating Networks inside named variable_scopes is currently " - "not supported (to ensure that variable names match the names " - "of Networks in which they were first created). To set " - "options, try `with tf.variable_scope(''):`. If this " - "limitation bothers you, please file a feature request.") + # Network, but is in a variable_scope. This Network's name takes on + # the full variable scope prefix. + self._name = scope_name + for non_network_sublayer in self._non_network_sublayers: self._set_scope_for_nonnetwork_sublayer(non_network_sublayer) @@ -355,8 +375,7 @@ class Network(base.Layer): raise ValueError( ("The parent of a Layer added to Network %s was garbage collected " "before the Layer was built. If this limitation bothers you " - "please, comment on " - "https://github.com/tensorflow/tensorflow/issues/14164.") % + "please file a feature request.") % (self.name,)) with variable_scope.variable_scope(parent_scope): # Horrid hack to make Layer variable names which are direct @@ -420,7 +439,9 @@ class Network(base.Layer): # name, and we should respect it (subject to error checking). layer._name, layer._base_name = layer._make_unique_name( name_uid_map=self._sub_layer_name_uids, - avoid_names=self._owned_layers) + avoid_names=self._owned_layers + # No namespace required, since we've specified our own UID map. + ) layer._first_parent = weakref.ref(self) self._non_network_sublayers.append(layer) if (not layer.built @@ -556,7 +577,7 @@ class Network(base.Layer): if os.path.isdir(save_path): # If we were passed a directory, default to naming based on the Network # name. - save_path = os.path.join(save_path, self.name) + save_path = os.path.join(save_path, self.name.replace("/", "_")) user_map_func = map_func if map_func is None: map_func = _make_prefix_stripping_map_fn(self.scope_name) @@ -750,7 +771,7 @@ class Network(base.Layer): self._set_scope() # scope_name should be available to map_funcs if os.path.isdir(save_path): # If we don't have a name yet, set no parent. - save_path = os.path.join(save_path, self.name) + save_path = os.path.join(save_path, self.name.replace("/", "_")) user_map_func = map_func if map_func is None: map_func = _make_prefix_stripping_map_fn(self.scope_name) diff --git a/tensorflow/contrib/eager/python/network_test.py b/tensorflow/contrib/eager/python/network_test.py index 14adbafe57..1127055c05 100644 --- a/tensorflow/contrib/eager/python/network_test.py +++ b/tensorflow/contrib/eager/python/network_test.py @@ -410,19 +410,103 @@ class NetworkTest(test.TestCase): @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) def testWrappingInVariableScope(self): + one = constant_op.constant([[1.]]) + # Naming happens in the order of first build rather than the order of + # construction, but for clarity they're the same here and construction is + # annotated. + outside_net_before = MyNetwork() # name=my_network_1 + outside_net_before(one) + captured_scope = variable_scope.get_variable_scope() with variable_scope.variable_scope("outside_scope"): - net = MyNetwork() - one = constant_op.constant([[1.]]) - with self.assertRaisesRegexp( - ValueError, - ("Creating Networks inside named variable_scopes is currently not " - "supported")): - net(one) - # Alternatively, we could re-name the Network to match the variable_scope: - # self.assertEqual("outside_scope/my_network_1", net.name) - # self.assertStartsWith( - # expected_start="outside_scope/my_network_1/dense/", - # actual=net.trainable_weights[0].name) + net1 = MyNetwork() # name=outside_scope/my_network_1 + net1(one) + name_conflict1 = MyNetwork(name="name_conflict") # fine, unique so far + name_conflict2 = MyNetwork(name="name_conflict") # error on build + with variable_scope.variable_scope("inside_scope"): + # No issue here since the name is unique within its scope. + name_conflict3 = MyNetwork(name="name_conflict") + net2 = MyNetwork() # name=outside_scope/my_network_3 to avoid the + # variable_scope my_network_2 below. + vs_name_conflict = MyNetwork(name="vs_name_conflict") # conflict below + with variable_scope.variable_scope("intervening_scope"): + with variable_scope.variable_scope(captured_scope): + with variable_scope.variable_scope("outside_scope"): + name_conflict4 = MyNetwork(name="name_conflict") # error on build + with variable_scope.variable_scope("my_network_2"): + pass + with variable_scope.variable_scope("vs_name_conflict"): + pass + net3 = MyNetwork() # name=outside_scope/my_network_4 + name_conflict1(one) + with self.assertRaisesRegexp( + ValueError, "named 'name_conflict' already exists"): + name_conflict2(one) + name_conflict3(one) + net2(one) + with self.assertRaisesRegexp( + ValueError, "or a variable_scope was created with this name"): + vs_name_conflict(one) + with self.assertRaisesRegexp( + ValueError, "named 'name_conflict' already exists"): + name_conflict4(one) + self.assertEqual("outside_scope/name_conflict", + name_conflict1.name) + self.assertStartsWith( + expected_start="outside_scope/name_conflict/dense_1/", + actual=name_conflict1.variables[0].name) + self.assertEqual("outside_scope/inside_scope/name_conflict", + name_conflict3.name) + self.assertStartsWith( + expected_start="outside_scope/inside_scope/name_conflict/dense_1/", + actual=name_conflict3.variables[0].name) + self.assertEqual("outside_scope/my_network_1", net1.name) + self.assertStartsWith( + expected_start="outside_scope/my_network_1/dense_1/", + actual=net1.trainable_weights[0].name) + self.assertEqual("outside_scope/my_network_3", net2.name) + self.assertStartsWith( + expected_start="outside_scope/my_network_3/dense_1/", + actual=net2.trainable_weights[0].name) + net3(one) + self.assertEqual("outside_scope/my_network_4", net3.name) + self.assertStartsWith( + expected_start="outside_scope/my_network_4/dense_1/", + actual=net3.trainable_weights[0].name) + outside_net_after = MyNetwork() + outside_net_after(one) + self.assertEqual("my_network_1", outside_net_before.name) + self.assertStartsWith( + expected_start="my_network_1/dense_1/", + actual=outside_net_before.trainable_weights[0].name) + self.assertEqual("my_network_2", outside_net_after.name) + self.assertStartsWith( + expected_start="my_network_2/dense_1/", + actual=outside_net_after.trainable_weights[0].name) + + @test_util.run_in_graph_and_eager_modes() + def testVariableScopeStripping(self): + with variable_scope.variable_scope("scope1"): + with variable_scope.variable_scope("scope2"): + net = MyNetwork() + net(constant_op.constant([[2.0]])) + self.evaluate(net.variables[0].assign([[42.]])) + self.assertEqual(net.name, "scope1/scope2/my_network_1") + self.assertStartsWith( + expected_start="scope1/scope2/my_network_1/dense_1/", + actual=net.trainable_weights[0].name) + save_path = net.save(self.get_temp_dir()) + self.assertIn("scope1_scope2_my_network_1", save_path) + restore_net = MyNetwork() + # Delayed restoration + restore_net.restore(save_path) + restore_net(constant_op.constant([[1.0]])) + self.assertAllEqual([[42.]], + self.evaluate(restore_net.variables[0])) + self.evaluate(restore_net.variables[0].assign([[-1.]])) + # Immediate restoration + restore_net.restore(save_path) + self.assertAllEqual([[42.]], + self.evaluate(restore_net.variables[0])) @test_util.run_in_graph_and_eager_modes() def testLayerNamesRespected(self): diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 07b9d9b7a6..8c8d774b75 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -401,10 +401,11 @@ class Layer(object): """ return input_shape - def _make_unique_name(self, name_uid_map=None, avoid_names=None): + def _make_unique_name(self, name_uid_map=None, avoid_names=None, + namespace=''): base_name = _to_snake_case(self.__class__.__name__) name = _unique_layer_name(base_name, name_uid_map=name_uid_map, - avoid_names=avoid_names) + avoid_names=avoid_names, namespace=namespace) return (name, base_name) def _set_scope(self, scope=None): @@ -2370,7 +2371,7 @@ def _get_default_graph_uid_map(): return name_uid_map -def _unique_layer_name(name, name_uid_map=None, avoid_names=None): +def _unique_layer_name(name, name_uid_map=None, avoid_names=None, namespace=''): """Makes a layer name (or arbitrary string) unique within a TensorFlow graph. Arguments: @@ -2379,6 +2380,9 @@ def _unique_layer_name(name, name_uid_map=None, avoid_names=None): names. If None (default), uses a per-Graph dictionary. avoid_names: An optional set or dict with names which should not be used. If None (default) does not avoid any names. + namespace: Gets a name which is unique within the (graph, namespace). Layers + which are not Networks use a blank namespace and so get graph-global + names. Returns: Unique string name. @@ -2396,6 +2400,7 @@ def _unique_layer_name(name, name_uid_map=None, avoid_names=None): avoid_names = set() proposed_name = None while proposed_name is None or proposed_name in avoid_names: - name_uid_map[name] += 1 - proposed_name = name + '_' + str(name_uid_map[name]) + name_key = (namespace, name) + name_uid_map[name_key] += 1 + proposed_name = name + '_' + str(name_uid_map[name_key]) return proposed_name -- GitLab From 788344009ed1a9e550e980415be1d271bccb8bef Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Tue, 7 Nov 2017 16:52:51 -0800 Subject: [PATCH 0020/1801] Fix FakeQuant to correctly set zero on CPU. PiperOrigin-RevId: 174935134 --- tensorflow/core/kernels/fake_quant_ops_functor.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/kernels/fake_quant_ops_functor.h b/tensorflow/core/kernels/fake_quant_ops_functor.h index b41b22d634..7aaad6e6c7 100644 --- a/tensorflow/core/kernels/fake_quant_ops_functor.h +++ b/tensorflow/core/kernels/fake_quant_ops_functor.h @@ -132,7 +132,7 @@ struct FakeQuantWithMinMaxVarsFunctor { const float max_val = max(); // If min and max are both zero, we should just return zero. if (min_val == 0.0f && max_val == 0.0f) { - outputs.setZero(); + outputs.device(d) = outputs.constant(0.0f); return; } float nudged_min, nudged_max, nudged_scale; @@ -163,8 +163,8 @@ struct FakeQuantWithMinMaxVarsGradientFunctor { // If min and max are both zero, we propagate everything to inputs. if (min_val == 0.0f && max_val == 0.0f) { backprops_wrt_input.device(d) = gradients; - backprop_wrt_min.setZero(); - backprop_wrt_max.setZero(); + backprop_wrt_min.device(d) = backprop_wrt_min.constant(0.0f); + backprop_wrt_max.device(d) = backprop_wrt_max.constant(0.0f); return; } float nudged_min, nudged_max, nudged_scale; @@ -205,7 +205,8 @@ struct FakeQuantWithMinMaxVarsPerChannelFunctor { const float max_val = max(i); // If min and max are both zero, we should just return zero. if (min_val == 0.0f && max_val == 0.0f) { - outputs.chip<1>(i).setZero(); + auto chip = outputs.chip<1>(i); + chip.device(d) = chip.constant(0.0f); continue; } float nudged_min, nudged_max, nudged_scale; @@ -242,8 +243,10 @@ struct FakeQuantWithMinMaxVarsPerChannelGradientFunctor { // If min and max are both zero, we propagate everything to inputs. if (min_val == 0.0f && max_val == 0.0f) { backprops_wrt_input.chip<1>(i).device(d) = gradients_chip; - backprop_wrt_min.chip<0>(i).setZero(); - backprop_wrt_max.chip<0>(i).setZero(); + auto min_chip = backprop_wrt_min.chip<0>(i); + auto max_chip = backprop_wrt_max.chip<0>(i); + min_chip.device(d) = min_chip.constant(0.0f); + max_chip.device(d) = max_chip.constant(0.0f); continue; } float nudged_min, nudged_max, nudged_scale; -- GitLab From aefb02c008f9870c6de6bb10c05725d89427dcb9 Mon Sep 17 00:00:00 2001 From: Chris Leary Date: Tue, 7 Nov 2017 17:08:18 -0800 Subject: [PATCH 0021/1801] [XLA] Add binary operation name to shape inference error message. PiperOrigin-RevId: 174937290 --- tensorflow/compiler/xla/service/shape_inference.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 791d17365b..9c7dc2185e 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/math/math_util.h" #include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/protobuf.h" @@ -770,8 +771,12 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(lhs)); TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(rhs)); - TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(lhs, "lhs of binary operation")); - TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(rhs, "rhs of binary operation")); + TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque( + lhs, tensorflow::strings::StrCat("lhs of binary operation ", + BinaryOperation_Name(operation)))); + TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque( + rhs, tensorflow::strings::StrCat("rhs of binary operation ", + BinaryOperation_Name(operation)))); switch (operation) { case BINOP_DOT: return InferDotOpShape(lhs, rhs); -- GitLab From 5278fa03a9e703d1e414ccebd858f7fdf22dbba5 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Tue, 7 Nov 2017 17:12:18 -0800 Subject: [PATCH 0022/1801] Make quant_delay work even if user didn't create global step. PiperOrigin-RevId: 174937793 --- tensorflow/contrib/quantize/python/quantize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index 548e33663e..6382d3f7b4 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -387,7 +387,7 @@ class _QuantizeContext(object): if delay_requested and self.quant_delay and self.quant_delay > 0: activate_quant = math_ops.greater_equal( - training_util.get_global_step(), + training_util.get_or_create_global_step(), self.quant_delay, name=scope + '/activate_quant') quant = control_flow_ops.cond( -- GitLab From b4668cc0702a78a2195116c332ec63b743af274b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 7 Nov 2017 17:12:57 -0800 Subject: [PATCH 0023/1801] Used tf.where to simplify conditional expression in div-sharding. PiperOrigin-RevId: 174937860 --- tensorflow/python/ops/embedding_ops.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py index 8c1ccc6840..f4561d1a83 100644 --- a/tensorflow/python/ops/embedding_ops.py +++ b/tensorflow/python/ops/embedding_ops.py @@ -191,12 +191,9 @@ def _embedding_lookup_and_transform(params, (flat_ids - extras) // ids_per_partition) # Emulate a conditional using a boolean indicator tensor - is_in_first_extras_partitions = math_ops.cast(p_assignments < extras, - flat_ids.dtype) - new_ids = (is_in_first_extras_partitions * (flat_ids % - (ids_per_partition + 1)) + - (1 - is_in_first_extras_partitions) * - ((flat_ids - extras) % ids_per_partition)) + new_ids = array_ops.where(p_assignments < extras, + flat_ids % (ids_per_partition + 1), + (flat_ids - extras) % ids_per_partition) else: raise ValueError("Unrecognized partition strategy: " + partition_strategy) -- GitLab From a6de80a90d10797279c950559eed5c101cee6030 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 7 Nov 2017 17:16:55 -0800 Subject: [PATCH 0024/1801] Removed an unused temporary variable from DeviceNameUtils::ParseFullName. PiperOrigin-RevId: 174938299 --- tensorflow/core/util/device_name_utils.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/util/device_name_utils.cc b/tensorflow/core/util/device_name_utils.cc index 2d797c855a..90c3fed2e8 100644 --- a/tensorflow/core/util/device_name_utils.cc +++ b/tensorflow/core/util/device_name_utils.cc @@ -116,7 +116,6 @@ bool DeviceNameUtils::ParseFullName(StringPiece fullname, ParsedName* p) { if (fullname == "/") { return true; } - StringPiece tmp; while (!fullname.empty()) { bool progress = false; if (str_util::ConsumePrefix(&fullname, "/job:")) { -- GitLab From e32e74d0c350be042647b0cbef9e7a619832e7d5 Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Tue, 7 Nov 2017 17:22:20 -0800 Subject: [PATCH 0025/1801] [XLA] Fix comments for arg_literals parameter in HloEvaluator::Evaluate. PiperOrigin-RevId: 174939009 --- tensorflow/compiler/xla/service/hlo_evaluator.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h index 67b6e215fc..7557aaa248 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator.h @@ -39,16 +39,18 @@ class HloEvaluator : public DfsHloVisitorWithDefault { HloEvaluator(); // Evaluates an HLO module and an array of pointers to literals. // Returns the evaluated result as a literal if successful. - // Precondition: argument literals correspond to each input computation's - // parameters in their post-ordering. See comment below for example. + // Precondition: The indices of arg_literals correspond to the parameter + // numbers of the HLO parameters in the computation. See comment below for an + // example. StatusOr> Evaluate( const HloModule& module, tensorflow::gtl::ArraySlice arg_literals); // Evaluates an HLO computation and an array of pointers to literals. // Returns the evaluated result as a literal if successful. - // Precondition: argument literals correspond to the input computation's - // parameters in their post-ordering. For e.g., consider the following graph: + // Precondition: The indices of arg_literals correspond to the parameter + // numbers of the HLO parameters in the computation. For e.g., consider the + // following graph: // // * // / \ @@ -57,8 +59,9 @@ class HloEvaluator : public DfsHloVisitorWithDefault { // / \ // Parameter0 Constant // - // The input literals array will have its first literal map to Parameter0 and - // the second map to Parameter1. + // where Parameter0 has parameter_number 0 and Parameter1 has parameter_number + // 1 in this computation. The input literals array will then have its first + // literal map to Parameter0 and the second map to Parameter1. StatusOr> Evaluate( const HloComputation& computation, tensorflow::gtl::ArraySlice arg_literals); -- GitLab From 5fd3810acf0e22130491f300cb75cf450bc9d290 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 7 Nov 2017 17:47:46 -0800 Subject: [PATCH 0026/1801] [tf.data] Saveable iterator for dataset.zip(..). PiperOrigin-RevId: 174941651 --- .../kernel_tests/zip_dataset_op_test.py | 27 +++++++++++++ tensorflow/core/kernels/zip_dataset_op.cc | 40 +++++++++++++++++-- 2 files changed, 63 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/zip_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/zip_dataset_op_test.py index b0e7218301..5d34b0024c 100644 --- a/tensorflow/contrib/data/python/kernel_tests/zip_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/zip_dataset_op_test.py @@ -19,6 +19,7 @@ from __future__ import print_function import numpy as np +from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base from tensorflow.contrib.data.python.ops import dataset_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -110,5 +111,31 @@ class ZipDatasetTest(test.TestCase): sess.run(get_next) +class ZipDatasetSerializationTest( + dataset_serialization_test_base.DatasetSerializationTestBase): + + def _build_dataset(self, arr): + components = [ + np.tile(np.array([[1], [2], [3], [4]]), 20), + np.tile(np.array([[12], [13], [14], [15]]), 22), + np.array(arr) + ] + datasets = [ + dataset_ops.Dataset.from_tensor_slices(component) + for component in components + ] + return dataset_ops.Dataset.zip((datasets[0], (datasets[1], datasets[2]))) + + def testCore(self): + # Equal length components + arr = [37.0, 38.0, 39.0, 40.0] + num_outputs = len(arr) + self.run_core_tests(lambda: self._build_dataset(arr), None, num_outputs) + # Variable length components + diff_size_arr = [1.0, 2.0] + self.run_core_tests(lambda: self._build_dataset(diff_size_arr), + lambda: self._build_dataset(arr), 2) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/core/kernels/zip_dataset_op.cc b/tensorflow/core/kernels/zip_dataset_op.cc index a80b9edbe4..30d64ea634 100644 --- a/tensorflow/core/kernels/zip_dataset_op.cc +++ b/tensorflow/core/kernels/zip_dataset_op.cc @@ -35,14 +35,15 @@ class ZipDatasetOp : public DatasetOpKernel { OP_REQUIRES_OK(ctx, GetDatasetFromVariantTensor(ctx->input(i), &input)); inputs.push_back(input); } - *output = new Dataset(inputs); + *output = new Dataset(ctx, inputs); } private: - class Dataset : public DatasetBase { + class Dataset : public GraphDatasetBase { public: - explicit Dataset(const std::vector& inputs) - : inputs_(inputs) { + explicit Dataset(OpKernelContext* ctx, + const std::vector& inputs) + : GraphDatasetBase(ctx), inputs_(inputs) { for (const auto& input : inputs_) { input->Ref(); for (DataType dt : input->output_dtypes()) { @@ -76,6 +77,21 @@ class ZipDatasetOp : public DatasetOpKernel { string DebugString() override { return "ZipDatasetOp::Dataset"; } + protected: + Status AsGraphDefInternal(DatasetGraphDefBuilder* b, + Node** output) const override { + std::vector input_graph_nodes; + input_graph_nodes.reserve(inputs_.size()); + for (const auto& input : inputs_) { + Node* input_node; + TF_RETURN_IF_ERROR(b->AddParentDataset(input, &input_node)); + input_graph_nodes.emplace_back(input_node); + } + TF_RETURN_IF_ERROR( + b->AddDatasetWithInputAsList(this, input_graph_nodes, output)); + return Status::OK(); + } + private: class Iterator : public DatasetIterator { public: @@ -109,6 +125,22 @@ class ZipDatasetOp : public DatasetOpKernel { return Status::OK(); } + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + for (auto& input_impl : input_impls_) + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl)); + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + for (auto& input_impl : input_impls_) + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl)); + return Status::OK(); + } + private: mutex mu_; std::vector> input_impls_ GUARDED_BY(mu_); -- GitLab From 980b74475f3674bd729d35dbc9b2de9f39a8dd6c Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Tue, 7 Nov 2017 18:16:47 -0800 Subject: [PATCH 0027/1801] Register int64 for GPU StridedSlice kernel PiperOrigin-RevId: 174944857 --- tensorflow/core/kernels/strided_slice_op.cc | 1 + tensorflow/core/kernels/strided_slice_op_impl.h | 2 ++ tensorflow/python/kernel_tests/array_ops_test.py | 13 ++++++++++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc index 8fc40db3cc..73b6d4cf6a 100644 --- a/tensorflow/core/kernels/strided_slice_op.cc +++ b/tensorflow/core/kernels/strided_slice_op.cc @@ -427,6 +427,7 @@ REGISTER_STRIDED_SLICE(bfloat16); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); +TF_CALL_int64(REGISTER_GPU); // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel diff --git a/tensorflow/core/kernels/strided_slice_op_impl.h b/tensorflow/core/kernels/strided_slice_op_impl.h index de65147572..afe3a051e6 100644 --- a/tensorflow/core/kernels/strided_slice_op_impl.h +++ b/tensorflow/core/kernels/strided_slice_op_impl.h @@ -284,6 +284,7 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_FOR_N_GPU); TF_CALL_complex64(DECLARE_FOR_N_GPU); TF_CALL_complex128(DECLARE_FOR_N_GPU); DECLARE_FOR_N_GPU(int32); +DECLARE_FOR_N_GPU(int64); #endif // END GOOGLE_CUDA TF_CALL_ALL_TYPES(DECLARE_FOR_N_CPU); @@ -299,6 +300,7 @@ DECLARE_FOR_N_CPU(bfloat16); TF_CALL_SYCL_PROXY_TYPES(PREVENT_FOR_N_SYCL); TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DECLARE_FOR_N_SYCL); DECLARE_FOR_N_SYCL(int32); +DECLARE_FOR_N_SYCL(int64); #undef DECLARE_FOR_N_SYCL #endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 8f4c94f318..6eb9c66d06 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -486,7 +486,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase): _ = checker2[...] _ = checker2[tuple()] - def testInt64GPU(self): + def testFloatSlicedArrayAndInt64IndicesGPU(self): if not test_util.is_gpu_available(): self.skipTest("No GPU available") with self.test_session(use_gpu=True, force_gpu=True): @@ -497,6 +497,17 @@ class StridedSliceTest(test_util.TensorFlowTestCase): s = array_ops.strided_slice(x, begin, end, strides) self.assertAllEqual([3.], self.evaluate(s)) + def testInt64SlicedArrayAndIndicesGPU(self): + if not test_util.is_gpu_available(): + self.skipTest("No GPU available") + with self.test_session(use_gpu=True, force_gpu=True): + x = constant_op.constant([1, 2, 3], dtype=dtypes.int64) + begin = constant_op.constant([2], dtype=dtypes.int64) + end = constant_op.constant([3], dtype=dtypes.int64) + strides = constant_op.constant([1], dtype=dtypes.int64) + s = array_ops.strided_slice(x, begin, end, strides) + self.assertAllEqual([3], self.evaluate(s)) + def testDegenerateSlices(self): with self.test_session(use_gpu=True): checker = StridedSliceChecker(self, StridedSliceChecker.REF_TENSOR) -- GitLab From db85753667aa6eb52c2eefc0b9c5446c6b1a6cd7 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Tue, 7 Nov 2017 18:38:12 -0800 Subject: [PATCH 0028/1801] Add functionality to perform training of additional fixed point layer on top of quantized base model. Also modify retrain_test to test creation of model info for fixed point mobilenet. PiperOrigin-RevId: 174946745 --- .../examples/image_retraining/retrain.py | 82 +++++++++++++++---- .../examples/image_retraining/retrain_test.py | 23 +++++- 2 files changed, 85 insertions(+), 20 deletions(-) diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py index 3549891461..ebddfb20f4 100644 --- a/tensorflow/examples/image_retraining/retrain.py +++ b/tensorflow/examples/image_retraining/retrain.py @@ -69,11 +69,18 @@ to validate that you have gathered good training data, but if you want to deploy on resource-limited platforms, you can try the `--architecture` flag with a Mobilenet model. For example: +Run floating-point version of mobilenet: ```bash python tensorflow/examples/image_retraining/retrain.py \ --image_dir ~/flower_photos --architecture mobilenet_1.0_224 ``` +Run quantized version of mobilenet: +```bash +python tensorflow/examples/image_retraining/retrain.py \ + --image_dir ~/flower_photos/ --architecture mobilenet_1.0_224_quantized +``` + There are 32 different Mobilenet models to choose from, with a variety of file size and latency options. The first number can be '1.0', '0.75', '0.50', or '0.25' to control the size, and the second controls the input image size, either @@ -107,6 +114,7 @@ import numpy as np from six.moves import urllib import tensorflow as tf +from tensorflow.contrib.quantize.python import quant_ops from tensorflow.python.framework import graph_util from tensorflow.python.framework import tensor_shape from tensorflow.python.platform import gfile @@ -271,6 +279,7 @@ def create_model_graph(model_info): """ with tf.Graph().as_default() as graph: model_path = os.path.join(FLAGS.model_dir, model_info['model_file_name']) + print('Model path: ', model_path) with gfile.FastGFile(model_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) @@ -337,7 +346,10 @@ def maybe_download_and_extract(data_url): statinfo = os.stat(filepath) tf.logging.info('Successfully downloaded', filename, statinfo.st_size, 'bytes.') - tarfile.open(filepath, 'r:gz').extractall(dest_directory) + print('Extracting file from ', filepath) + tarfile.open(filepath, 'r:gz').extractall(dest_directory) + else: + print('Not extracting or downloading files, model already present in disk') def ensure_dir_exists(dir_name): @@ -733,7 +745,7 @@ def variable_summaries(var): def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, - bottleneck_tensor_size): + bottleneck_tensor_size, quantize_layer): """Adds a new softmax and fully-connected layer for training. We need to retrain the top layer to identify our new classes, so this function @@ -745,10 +757,12 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, Args: class_count: Integer of how many categories of things we're trying to - recognize. + recognize. final_tensor_name: Name string for the new final node that produces results. bottleneck_tensor: The output of the main CNN graph. bottleneck_tensor_size: How many entries in the bottleneck vector. + quantize_layer: Boolean, specifying whether the newly added layer should be + quantized. Returns: The tensors for the training and cross entropy results, and tensors for the @@ -771,18 +785,41 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, with tf.name_scope('weights'): initial_value = tf.truncated_normal( [bottleneck_tensor_size, class_count], stddev=0.001) - layer_weights = tf.Variable(initial_value, name='final_weights') + if quantize_layer: + quantized_layer_weights = quant_ops.MovingAvgQuantize( + layer_weights, is_training=True) + variable_summaries(quantized_layer_weights) variable_summaries(layer_weights) with tf.name_scope('biases'): layer_biases = tf.Variable(tf.zeros([class_count]), name='final_biases') + if quantize_layer: + quantized_layer_biases = quant_ops.MovingAvgQuantize( + layer_biases, is_training=True) + variable_summaries(quantized_layer_biases) + variable_summaries(layer_biases) + with tf.name_scope('Wx_plus_b'): - logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases - tf.summary.histogram('pre_activations', logits) + if quantize_layer: + logits = tf.matmul(bottleneck_input, + quantized_layer_weights) + quantized_layer_biases + logits = quant_ops.MovingAvgQuantize( + logits, + init_min=-32.0, + init_max=32.0, + is_training=True, + num_bits=8, + narrow_range=False, + ema_decay=0.5) + tf.summary.histogram('pre_activations', logits) + else: + logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases + tf.summary.histogram('pre_activations', logits) final_tensor = tf.nn.softmax(logits, name=final_tensor_name) + tf.summary.histogram('activations', final_tensor) with tf.name_scope('cross_entropy'): @@ -790,6 +827,7 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, labels=ground_truth_input, logits=logits) with tf.name_scope('total'): cross_entropy_mean = tf.reduce_mean(cross_entropy) + tf.summary.scalar('cross_entropy', cross_entropy_mean) with tf.name_scope('train'): @@ -825,6 +863,7 @@ def add_evaluation_step(result_tensor, ground_truth_tensor): def save_graph_to_file(sess, graph, graph_file_name): output_graph_def = graph_util.convert_variables_to_constants( sess, graph.as_graph_def(), [FLAGS.final_tensor_name]) + with gfile.FastGFile(graph_file_name, 'wb') as f: f.write(output_graph_def.SerializeToString()) return @@ -858,6 +897,7 @@ def create_model_info(architecture): ValueError: If architecture name is unknown. """ architecture = architecture.lower() + is_quantized = False if architecture == 'inception_v3': # pylint: disable=line-too-long data_url = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz' @@ -902,19 +942,28 @@ def create_model_info(architecture): architecture) return None is_quantized = True - data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' - data_url += version_string + '_' + size_string + '_frozen.tgz' - bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' + + if is_quantized: + data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' + data_url += version_string + '_' + size_string + '_quantized_frozen.tgz' + bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' + resized_input_tensor_name = 'Placeholder:0' + model_dir_name = ('mobilenet_v1_' + version_string + '_' + size_string + + '_quantized_frozen') + model_base_name = 'quantized_frozen_graph.pb' + + else: + data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' + data_url += version_string + '_' + size_string + '_frozen.tgz' + bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' + resized_input_tensor_name = 'input:0' + model_dir_name = 'mobilenet_v1_' + version_string + '_' + size_string + model_base_name = 'frozen_graph.pb' + bottleneck_tensor_size = 1001 input_width = int(size_string) input_height = int(size_string) input_depth = 3 - resized_input_tensor_name = 'input:0' - if is_quantized: - model_base_name = 'quantized_graph.pb' - else: - model_base_name = 'frozen_graph.pb' - model_dir_name = 'mobilenet_v1_' + version_string + '_' + size_string model_file_name = os.path.join(model_dir_name, model_base_name) input_mean = 127.5 input_std = 127.5 @@ -933,6 +982,7 @@ def create_model_info(architecture): 'model_file_name': model_file_name, 'input_mean': input_mean, 'input_std': input_std, + 'quantize_layer': is_quantized, } @@ -1028,7 +1078,7 @@ def main(_): (train_step, cross_entropy, bottleneck_input, ground_truth_input, final_tensor) = add_final_training_ops( len(image_lists.keys()), FLAGS.final_tensor_name, bottleneck_tensor, - model_info['bottleneck_tensor_size']) + model_info['bottleneck_tensor_size'], model_info['quantize_layer']) # Create the operations we need to evaluate the accuracy of our new layer. evaluation_step, prediction = add_evaluation_step( diff --git a/tensorflow/examples/image_retraining/retrain_test.py b/tensorflow/examples/image_retraining/retrain_test.py index c342a17dd8..2de4c4ec99 100644 --- a/tensorflow/examples/image_retraining/retrain_test.py +++ b/tensorflow/examples/image_retraining/retrain_test.py @@ -70,10 +70,18 @@ class ImageRetrainingTest(test_util.TensorFlowTestCase): def testAddFinalTrainingOps(self, flags_mock): with tf.Graph().as_default(): with tf.Session() as sess: - bottleneck = tf.placeholder( - tf.float32, [1, 1024], - name='bottleneck') - retrain.add_final_training_ops(5, 'final', bottleneck, 1024) + bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') + # Test creating final training op with quantization + retrain.add_final_training_ops(5, 'final', bottleneck, 1024, False) + self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) + + @tf.test.mock.patch.object(retrain, 'FLAGS', learning_rate=0.01) + def testAddFinalTrainingOpsQuantized(self, flags_mock): + with tf.Graph().as_default(): + with tf.Session() as sess: + bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') + # Test creating final training op with quantization + retrain.add_final_training_ops(5, 'final', bottleneck, 1024, True) self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) def testAddEvaluationStep(self): @@ -99,5 +107,12 @@ class ImageRetrainingTest(test_util.TensorFlowTestCase): self.assertIsNotNone(model_info) self.assertEqual(299, model_info['input_width']) + def testCreateModelInfoQuantized(self): + # Test for mobilenet_quantized + model_info = retrain.create_model_info('mobilenet_1.0_224') + self.assertIsNotNone(model_info) + self.assertEqual(224, model_info['input_width']) + + if __name__ == '__main__': tf.test.main() -- GitLab From 5d35b03064268e05626d9a65348c1359e83ddcc2 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Tue, 7 Nov 2017 18:39:17 -0800 Subject: [PATCH 0029/1801] Fix Bazel builds for the TF Lite demo app Adds a new remote repository for the mobilenet tflite models necessary for running the TF Lite demo app. PiperOrigin-RevId: 174946867 --- tensorflow/workspace.bzl | 37 ++++++++++++++++-------------- third_party/tflite_mobilenet.BUILD | 13 +++++++++++ 2 files changed, 33 insertions(+), 17 deletions(-) create mode 100644 third_party/tflite_mobilenet.BUILD diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index afcae6eade..3081a8d1dc 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -1,21 +1,24 @@ # TensorFlow external dependencies that can be loaded in WORKSPACE files. load("//third_party/gpus:cuda_configure.bzl", "cuda_configure") + load("//third_party/sycl:sycl_configure.bzl", "sycl_configure") load("//third_party/mkl:build_defs.bzl", "mkl_repository") -load("@io_bazel_rules_closure//closure/private:java_import_external.bzl", - "java_import_external") +load( + "@io_bazel_rules_closure//closure/private:java_import_external.bzl", + "java_import_external", +) load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external") load("//third_party/py:python_configure.bzl", "python_configure") -load("//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl", - "arm_compiler_configure") - +load( + "//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl", + "arm_compiler_configure", +) def _is_windows(repository_ctx): """Returns true if the host operating system is windows.""" return repository_ctx.os.name.lower().find("windows") != -1 - def _get_env_var(repository_ctx, name): """Find an environment variable.""" if name in repository_ctx.os.environ: @@ -23,7 +26,6 @@ def _get_env_var(repository_ctx, name): else: return None - # Parse the bazel version string from `native.bazel_version`. def _parse_bazel_version(bazel_version): # Remove commit from version. @@ -39,7 +41,6 @@ def _parse_bazel_version(bazel_version): version_tuple += (str(number),) return version_tuple - # Check that a specific bazel version is being used. def check_version(bazel_version): if "bazel_version" not in dir(native): @@ -56,11 +57,9 @@ def check_version(bazel_version): fail("\nCurrent Bazel version is {}, expected at least {}\n".format( native.bazel_version, bazel_version)) - def _repos_are_siblings(): return Label("@foo//bar").workspace_root.startswith("../") - # Temporary workaround to support including TensorFlow as a submodule until this # use-case is supported in the next Bazel release. def _temp_workaround_http_archive_impl(repo_ctx): @@ -73,9 +72,7 @@ def _temp_workaround_http_archive_impl(repo_ctx): if repo_ctx.attr.patch_file != None: _apply_patch(repo_ctx, repo_ctx.attr.patch_file) - temp_workaround_http_archive = repository_rule( - implementation = _temp_workaround_http_archive_impl, attrs = { "build_file": attr.label(), "repository": attr.string(), @@ -84,6 +81,7 @@ temp_workaround_http_archive = repository_rule( "sha256": attr.string(default = ""), "strip_prefix": attr.string(default = ""), }, + implementation = _temp_workaround_http_archive_impl, ) # Executes specified command with arguments and calls 'fail' if it exited with @@ -95,7 +93,6 @@ def _execute_and_check_ret_code(repo_ctx, cmd_and_args): + "Stderr: {3}").format(" ".join(cmd_and_args), result.return_code, result.stdout, result.stderr)) - # Apply a patch_file to the repository root directory # Runs 'patch -p1' def _apply_patch(repo_ctx, patch_file): @@ -113,7 +110,6 @@ def _apply_patch(repo_ctx, patch_file): cmd = [bazel_sh, "-c", " ".join(cmd)] _execute_and_check_ret_code(repo_ctx, cmd) - # Download the repository and apply a patch to its root def _patched_http_archive_impl(repo_ctx): repo_ctx.download_and_extract( @@ -122,9 +118,7 @@ def _patched_http_archive_impl(repo_ctx): stripPrefix=repo_ctx.attr.strip_prefix) _apply_patch(repo_ctx, repo_ctx.attr.patch_file) - patched_http_archive = repository_rule( - implementation = _patched_http_archive_impl, attrs = { "patch_file": attr.label(), "build_file": attr.label(), @@ -133,9 +127,9 @@ patched_http_archive = repository_rule( "sha256": attr.string(default = ""), "strip_prefix": attr.string(default = ""), }, + implementation = _patched_http_archive_impl, ) - # If TensorFlow is linked as a submodule. # path_prefix is no longer used. # tf_repo_name is thought to be under consideration. @@ -821,3 +815,12 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://github.com/google/flatbuffers/archive/971a68110e4fc1bace10fcb6deeb189e7e1a34ce.tar.gz", ], ) + + native.new_http_archive( + name = "tflite_mobilenet", + build_file = str(Label("//third_party:tflite_mobilenet.BUILD")), + sha256 = "eb71679d23a0cbdb173b36ea39f3d3096de0a9b0410d148a8237f20cc1157a61", + urls = [ + "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_1.0_224_quantized_2017_11_01.zip" + ], + ) diff --git a/third_party/tflite_mobilenet.BUILD b/third_party/tflite_mobilenet.BUILD new file mode 100644 index 0000000000..75663eff48 --- /dev/null +++ b/third_party/tflite_mobilenet.BUILD @@ -0,0 +1,13 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +filegroup( + name = "model_files", + srcs = glob( + ["**/*"], + exclude = [ + "BUILD", + ], + ), +) -- GitLab From 4476ea391fcca4f6af0994242e3453fe4a159bb3 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Tue, 7 Nov 2017 18:45:36 -0800 Subject: [PATCH 0030/1801] MovingAvgQuantize and LastValueQuantize should use the updated value from the Assign op, otherwise min max variables never get updated. PiperOrigin-RevId: 174947421 --- tensorflow/contrib/quantize/BUILD | 18 +++- .../contrib/quantize/python/quant_ops.py | 57 +++++------- .../contrib/quantize/python/quant_ops_test.py | 87 +++++++++++++++++++ .../python/quantize_parameterized_test.py | 65 +++++++------- 4 files changed, 160 insertions(+), 67 deletions(-) create mode 100644 tensorflow/contrib/quantize/python/quant_ops_test.py diff --git a/tensorflow/contrib/quantize/BUILD b/tensorflow/contrib/quantize/BUILD index 935af80e7a..45a98c7f85 100644 --- a/tensorflow/contrib/quantize/BUILD +++ b/tensorflow/contrib/quantize/BUILD @@ -133,7 +133,6 @@ py_library( deps = [ "//tensorflow/contrib/framework:framework_py", "//tensorflow/python:array_ops", - "//tensorflow/python:check_ops", "//tensorflow/python:framework_ops", "//tensorflow/python:init_ops", "//tensorflow/python:math_ops", @@ -143,6 +142,23 @@ py_library( ], ) +py_test( + name = "quant_ops_test", + size = "small", + srcs = ["python/quant_ops_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":quant_ops", + "//tensorflow/python:array_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform_test", + "//tensorflow/python:session", + "//tensorflow/python:variables", + ], +) + py_library( name = "quantize", srcs = ["python/quantize.py"], diff --git a/tensorflow/contrib/quantize/python/quant_ops.py b/tensorflow/contrib/quantize/python/quant_ops.py index 0a38ef9fcd..f80d427ff0 100644 --- a/tensorflow/contrib/quantize/python/quant_ops.py +++ b/tensorflow/contrib/quantize/python/quant_ops.py @@ -22,15 +22,12 @@ from tensorflow.contrib.framework.python.ops import add_arg_scope from tensorflow.contrib.framework.python.ops import model_variable from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.training import moving_averages -EPSILON = 1e-5 - @add_arg_scope def FixedQuantize(inputs, init_min=-6.0, init_max=6.0, scope=None): @@ -133,12 +130,10 @@ def LastValueQuantize(inputs, batch_min = inputs else: batch_min = math_ops.reduce_min(inputs, name='BatchMin') - batch_min -= EPSILON - # B-eng requires that 0.0 if always in the [min; max] range. + # TFLite requires that 0.0 if always in the [min; max] range. batch_min = math_ops.minimum(batch_min, 0.0) - assign_min_op = state_ops.assign( - min_var, batch_min, name='AssignMinLast').op - ops.add_to_collection(updates_collection, assign_min_op) + assign_min = state_ops.assign(min_var, batch_min, name='AssignMinLast') + ops.add_to_collection(updates_collection, assign_min.op) if per_channel: if input_dim >= 2: @@ -148,17 +143,15 @@ def LastValueQuantize(inputs, batch_max = inputs else: batch_max = math_ops.reduce_max(inputs, name='BatchMax') - batch_max += EPSILON - # B-eng requires that 0.0 if always in the [min; max] range. + # TFLite requires that 0.0 if always in the [min; max] range. batch_max = math_ops.maximum(batch_max, 0.0) - assign_max_op = state_ops.assign( - max_var, batch_max, name='AssignMaxLast').op - ops.add_to_collection(updates_collection, assign_max_op) + assign_max = state_ops.assign(max_var, batch_max, name='AssignMaxLast') + ops.add_to_collection(updates_collection, assign_max.op) return _FakeQuantWithMinMaxVars( inputs, - batch_min, - batch_max, + assign_min, + assign_max, per_channel=per_channel, num_bits=num_bits, narrow_range=narrow_range) @@ -251,9 +244,9 @@ def MovingAvgQuantize(inputs, batch_min = math_ops.reduce_min(inputs, name='BatchMin') # B-eng requires that 0.0 if always in the [min; max] range. batch_min = math_ops.minimum(batch_min, 0.0) - assign_min_op = moving_averages.assign_moving_average( - min_var, batch_min, ema_decay, name='AssignMinEma').op - ops.add_to_collection(updates_collection, assign_min_op) + assign_min = moving_averages.assign_moving_average( + min_var, batch_min, ema_decay, name='AssignMinEma') + ops.add_to_collection(updates_collection, assign_min.op) if per_channel: if input_dim >= 2: @@ -265,14 +258,14 @@ def MovingAvgQuantize(inputs, batch_max = math_ops.reduce_max(inputs, name='BatchMax') # B-eng requires that 0.0 if always in the [min; max] range. batch_max = math_ops.maximum(batch_max, 0.0) - assign_max_op = moving_averages.assign_moving_average( - max_var, batch_max, ema_decay, name='AssignMaxEma').op - ops.add_to_collection(updates_collection, assign_max_op) + assign_max = moving_averages.assign_moving_average( + max_var, batch_max, ema_decay, name='AssignMaxEma') + ops.add_to_collection(updates_collection, assign_max.op) return _FakeQuantWithMinMaxVars( inputs, - min_var, - max_var, + assign_min, + assign_max, per_channel=per_channel, num_bits=num_bits, narrow_range=narrow_range) @@ -301,20 +294,10 @@ def _FakeQuantWithMinMaxVars(inputs, min_var, max_var, per_channel, num_bits, if per_channel: assert len(min_var.get_shape()) == 1 assert len(max_var.get_shape()) == 1 - with ops.control_dependencies([check_ops.assert_less(min_var, max_var)]): - return array_ops.fake_quant_with_min_max_vars_per_channel( - inputs, - min_var, - max_var, - num_bits=num_bits, - narrow_range=narrow_range) + return array_ops.fake_quant_with_min_max_vars_per_channel( + inputs, min_var, max_var, num_bits=num_bits, narrow_range=narrow_range) else: assert min_var.get_shape() == [] # pylint: disable=g-explicit-bool-comparison assert max_var.get_shape() == [] # pylint: disable=g-explicit-bool-comparison - with ops.control_dependencies([check_ops.assert_less(min_var, max_var)]): - return array_ops.fake_quant_with_min_max_vars( - inputs, - min_var, - max_var, - num_bits=num_bits, - narrow_range=narrow_range) + return array_ops.fake_quant_with_min_max_vars( + inputs, min_var, max_var, num_bits=num_bits, narrow_range=narrow_range) diff --git a/tensorflow/contrib/quantize/python/quant_ops_test.py b/tensorflow/contrib/quantize/python/quant_ops_test.py new file mode 100644 index 0000000000..3884679602 --- /dev/null +++ b/tensorflow/contrib/quantize/python/quant_ops_test.py @@ -0,0 +1,87 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for third_party.tensorflow.contrib.quantize.python.quant_ops.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.quantize.python import quant_ops +from tensorflow.python.client import session +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import googletest + +_MIN_MAX_VARS = 'min_max_vars' + + +class QuantOpsTest(googletest.TestCase): + + def testLastValueQuantizeTrainingAssign(self): + g = ops.Graph() + with session.Session(graph=g) as sess: + x = array_ops.placeholder(dtypes.float32, shape=[2]) + y = quant_ops.LastValueQuantize( + x, + init_min=0.0, + init_max=0.0, + is_training=True, + vars_collection=_MIN_MAX_VARS) + + # Run the step. + sess.run(variables.global_variables_initializer()) + sess.run(y, feed_dict={x: [-1.0, 1.0]}) + # Now check that the min_max_vars were, in fact, updated. + min_value, max_value = self._GetMinMaxValues(sess) + self.assertEqual(min_value, -1.0) + self.assertEqual(max_value, 1.0) + + def testMovingAvgQuantizeTrainingAssign(self): + g = ops.Graph() + with session.Session(graph=g) as sess: + x = array_ops.placeholder(dtypes.float32, shape=[2]) + y = quant_ops.MovingAvgQuantize( + x, + init_min=0.0, + init_max=0.0, + is_training=True, + vars_collection=_MIN_MAX_VARS) + + # Run the step. + sess.run(variables.global_variables_initializer()) + # Do two runs to avoid zero debias. + sess.run(y, feed_dict={x: [-1.0, 1.0]}) + sess.run(y, feed_dict={x: [0.0, 0.0]}) + # Now check that the min_max_vars were, in fact, updated. + min_value, max_value = self._GetMinMaxValues(sess) + self.assertGreater(min_value, -1.0) + self.assertLess(min_value, 0.0) + self.assertGreater(max_value, 0.0) + self.assertLess(max_value, 1.0) + + def _GetMinMaxValues(self, sess): + min_max_vars = ops.get_collection(_MIN_MAX_VARS) + self.assertEqual(len(min_max_vars), 2) + min_idx = 0 if 'min' in min_max_vars[0].name else 1 + max_idx = (min_idx + 1) % 2 + min_var, max_var = min_max_vars[min_idx], min_max_vars[max_idx] + min_max_values = sess.run([min_var, max_var]) + return min_max_values[0], min_max_values[1] + + +if __name__ == '__main__': + googletest.main() diff --git a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py index 3e62f95bd6..57dab03f16 100644 --- a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py +++ b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py @@ -97,8 +97,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(weights_quant.type, quantization_node_name) expected_inputs = [ - scope + '/weights_quant/Minimum', scope + '/weights_quant/Maximum', - scope + '/weights/read' + scope + '/weights_quant/AssignMinLast', + scope + '/weights_quant/AssignMaxLast', scope + '/weights/read' ] self._AssertInputOpsAre(weights_quant, expected_inputs) output_op_name = scope + '/Conv2D' @@ -109,8 +109,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(conv_quant.type, quantization_node_name) expected_inputs = [ - scope + '/conv_quant/min/read', scope + '/conv_quant/max/read', - scope + '/BiasAdd' + scope + '/conv_quant/AssignMinEma', + scope + '/conv_quant/AssignMaxEma', scope + '/BiasAdd' ] self._AssertInputOpsAre(conv_quant, expected_inputs) output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' @@ -122,7 +122,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): self.assertEqual(act_quant.type, quantization_node_name) expected_inputs = [ - 'test/act_quant/min/read', 'test/act_quant/max/read', + 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', 'test/' + activation_op_name ] self._AssertInputOpsAre(act_quant, expected_inputs) @@ -172,8 +172,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(weights_quant.type, quantization_node_name) expected_inputs = [ - scope + '/weights_quant/Minimum', scope + '/weights_quant/Maximum', - scope + '/weights/read' + scope + '/weights_quant/AssignMinLast', + scope + '/weights_quant/AssignMaxLast', scope + '/weights/read' ] self._AssertInputOpsAre(weights_quant, expected_inputs) output_op_name = scope + '/MatMul' @@ -184,8 +184,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(conv_quant.type, quantization_node_name) expected_inputs = [ - scope + '/conv_quant/min/read', scope + '/conv_quant/max/read', - scope + '/BiasAdd' + scope + '/conv_quant/AssignMinEma', + scope + '/conv_quant/AssignMaxEma', scope + '/BiasAdd' ] self._AssertInputOpsAre(conv_quant, expected_inputs) output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' @@ -196,7 +196,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(act_quant.type, quantization_node_name) expected_inputs = [ - 'test/act_quant/min/read', 'test/act_quant/max/read', + 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', 'test/' + activation_op_name ] self._AssertInputOpsAre(act_quant, expected_inputs) @@ -247,7 +247,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(weights_quant.type, quantization_node_name) expected_inputs = [ - scope + '/weights_quant/Minimum', scope + '/weights_quant/Maximum', + scope + '/weights_quant/AssignMinLast', + scope + '/weights_quant/AssignMaxLast', scope + '/depthwise_weights/read' ] self._AssertInputOpsAre(weights_quant, expected_inputs) @@ -259,8 +260,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(conv_quant.type, quantization_node_name) expected_inputs = [ - scope + '/conv_quant/min/read', scope + '/conv_quant/max/read', - scope + '/BiasAdd' + scope + '/conv_quant/AssignMinEma', + scope + '/conv_quant/AssignMaxEma', scope + '/BiasAdd' ] self._AssertInputOpsAre(conv_quant, expected_inputs) output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' @@ -271,7 +272,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(act_quant.type, quantization_node_name) expected_inputs = [ - 'test/act_quant/min/read', 'test/act_quant/max/read', + 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', 'test/' + activation_op_name ] self._AssertInputOpsAre(act_quant, expected_inputs) @@ -401,8 +402,10 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(weights_quant.type, quantization_node_name) expected_inputs = [ - scope + '/weights_quant/' + ('min/read' if use_ema else 'Minimum'), - scope + '/weights_quant/' + ('max/read' if use_ema else 'Maximum'), + scope + '/weights_quant/' + ('AssignMinEma' + if use_ema else 'AssignMinLast'), + scope + '/weights_quant/' + ('AssignMaxEma' + if use_ema else 'AssignMaxLast'), scope + '/mul_fold' ] self._AssertInputOpsAre(weights_quant, expected_inputs) @@ -415,8 +418,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(conv_quant.type, quantization_node_name) expected_inputs = [ - scope + '/conv_quant/min/read', scope + '/conv_quant/max/read', - scope + '/add_fold' + scope + '/conv_quant/AssignMinEma', + scope + '/conv_quant/AssignMaxEma', scope + '/add_fold' ] self._AssertInputOpsAre(conv_quant, expected_inputs) output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' @@ -427,7 +430,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(act_quant.type, quantization_node_name) expected_inputs = [ - 'test/act_quant/min/read', 'test/act_quant/max/read', + 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', 'test/' + activation_op_name ] self._AssertInputOpsAre(act_quant, expected_inputs) @@ -518,8 +521,10 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(weights_quant.type, quantization_node_name) expected_inputs = [ - scope + '/weights_quant/' + ('min/read' if use_ema else 'Minimum'), - scope + '/weights_quant/' + ('max/read' if use_ema else 'Maximum'), + scope + '/weights_quant/' + ('AssignMinEma' + if use_ema else 'AssignMinLast'), + scope + '/weights_quant/' + ('AssignMaxEma' + if use_ema else 'AssignMaxLast'), scope + '/mul_fold' ] self._AssertInputOpsAre(weights_quant, expected_inputs) @@ -532,8 +537,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(conv_quant.type, quantization_node_name) expected_inputs = [ - scope + '/conv_quant/min/read', scope + '/conv_quant/max/read', - scope + '/add_fold' + scope + '/conv_quant/AssignMinEma', + scope + '/conv_quant/AssignMaxEma', scope + '/add_fold' ] self._AssertInputOpsAre(conv_quant, expected_inputs) output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' @@ -544,7 +549,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(act_quant.type, quantization_node_name) expected_inputs = [ - 'test/act_quant/min/read', 'test/act_quant/max/read', + 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', 'test/' + activation_op_name ] self._AssertInputOpsAre(act_quant, expected_inputs) @@ -639,8 +644,10 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(weights_quant.type, quantization_node_name) expected_inputs = [ - scope + '/weights_quant/' + ('min/read' if use_ema else 'Minimum'), - scope + '/weights_quant/' + ('max/read' if use_ema else 'Maximum'), + scope + '/weights_quant/' + ('AssignMinEma' + if use_ema else 'AssignMinLast'), + scope + '/weights_quant/' + ('AssignMaxEma' + if use_ema else 'AssignMaxLast'), scope + '/mul_fold' ] self._AssertInputOpsAre(weights_quant, expected_inputs) @@ -653,8 +660,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(conv_quant.type, quantization_node_name) expected_inputs = [ - scope + '/conv_quant/min/read', scope + '/conv_quant/max/read', - scope + '/add_fold' + scope + '/conv_quant/AssignMinEma', + scope + '/conv_quant/AssignMaxEma', scope + '/add_fold' ] self._AssertInputOpsAre(conv_quant, expected_inputs) output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' @@ -665,7 +672,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(act_quant.type, quantization_node_name) expected_inputs = [ - 'test/act_quant/min/read', 'test/act_quant/max/read', + 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', 'test/' + activation_op_name ] self._AssertInputOpsAre(act_quant, expected_inputs) -- GitLab From dc2636aa6c88f41e1aec2a367e341eb42ceead54 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 7 Nov 2017 18:46:10 -0800 Subject: [PATCH 0031/1801] Silenced an unnecessary warning PiperOrigin-RevId: 174947453 --- tensorflow/core/grappler/utils.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index d9f4cdb5ae..11bd8fa5cb 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -45,7 +45,6 @@ NodeDef* NodeMap::GetNode(const string& name) const { string node_name = NodeName(name); auto it = nodes_.find(node_name); if (it == nodes_.end()) { - LOG(WARNING) << "Node " << node_name << " is not in the graph."; return nullptr; } return it->second; -- GitLab From f552fb90e94ccfb72475327553c968412282eb26 Mon Sep 17 00:00:00 2001 From: Alex Rothberg Date: Tue, 7 Nov 2017 22:04:16 -0500 Subject: [PATCH 0032/1801] update create_train_op to use get_global_step --- tensorflow/contrib/training/python/training/training.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/training/python/training/training.py b/tensorflow/contrib/training/python/training/training.py index 6a4d79796d..59f02fa38f 100644 --- a/tensorflow/contrib/training/python/training/training.py +++ b/tensorflow/contrib/training/python/training/training.py @@ -255,6 +255,7 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.summary import summary from tensorflow.python.training import monitored_session from tensorflow.python.training import optimizer as tf_optimizer +from tensorflow.python.training import training_util # TODO(nsilberman): move add_gradients_summaries, clip_gradient_norms and # multiply_gradients into contrib/summaries and contrib/optimizers.py @@ -409,7 +410,7 @@ def create_train_op(total_loss, loss value. """ if global_step is _USE_GLOBAL_STEP: - global_step = variables.get_or_create_global_step() + global_step = training_util.get_global_step() # Update ops use GraphKeys.UPDATE_OPS collection if update_ops is None. global_update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS)) -- GitLab From d484522eb3d58aac70130f5c02a732c7442046bc Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Tue, 7 Nov 2017 19:06:01 -0800 Subject: [PATCH 0033/1801] Fix link (the link tool expects these to be on one line) PiperOrigin-RevId: 174948909 --- tensorflow/docs_src/mobile/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/mobile/index.md b/tensorflow/docs_src/mobile/index.md index a6f1422f6f..06ad47bc62 100644 --- a/tensorflow/docs_src/mobile/index.md +++ b/tensorflow/docs_src/mobile/index.md @@ -35,8 +35,8 @@ speech-driven interface, and many of these require on-device processing. Most of the time a user isn’t giving commands, and so streaming audio continuously to a remote server would be a waste of bandwidth, since it would mostly be silence or background noises. To solve this problem it’s common to have a small neural -network running on-device @{$tutorials/audio_recognition$listening out for a -particular keyword}. Once that keyword has been spotted, the rest of the +network running on-device @{$tutorials/audio_recognition$listening out for a particular keyword}. +Once that keyword has been spotted, the rest of the conversation can be transmitted over to the server for further processing if more computing power is needed. -- GitLab From fa5672bddf3f78283d7d1552a42ffc8708f863bb Mon Sep 17 00:00:00 2001 From: Colin Raffel Date: Tue, 7 Nov 2017 21:05:37 -0800 Subject: [PATCH 0034/1801] Fix tf.contrib.seq2seq._monotonic_probability_fn to use a hard sigmoid when mode='hard'. Also adds tests to make sure the attention probabilities are 0 or 1 when mode='hard'. PiperOrigin-RevId: 174956465 --- .../kernel_tests/attention_wrapper_test.py | 37 +++++++++++++++++++ .../seq2seq/python/ops/attention_wrapper.py | 6 ++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py index 91493302b1..01a5540121 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py @@ -33,6 +33,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops from tensorflow.python.ops import rnn_cell from tensorflow.python.ops import variables from tensorflow.python.ops import variable_scope as vs @@ -589,6 +590,24 @@ class AttentionWrapperTest(test.TestCase): expected_final_alignment_history=expected_final_alignment_history, name='testBahdanauMonotonicNormalized') + def testBahdanauMonotonicHard(self): + # Run attention mechanism with mode='hard', make sure probabilities are hard + b, t, u, d = 10, 20, 30, 40 + with self.test_session(use_gpu=True) as sess: + a = wrapper.BahdanauMonotonicAttention( + d, + random_ops.random_normal((b, t, u)), + mode='hard') + # Just feed previous attention as [1, 0, 0, ...] + attn = a(random_ops.random_normal((b, d)), array_ops.one_hot([0]*b, t)) + sess.run(variables.global_variables_initializer()) + attn_out = attn.eval() + # All values should be 0 or 1 + self.assertTrue(np.all(np.logical_or(attn_out == 0, attn_out == 1))) + # Sum of distributions should be 0 or 1 (0 when all p_choose_i are 0) + self.assertTrue(np.all(np.logical_or(attn_out.sum(axis=1) == 1, + attn_out.sum(axis=1) == 0))) + def testLuongMonotonicNotNormalized(self): create_attention_mechanism = functools.partial( wrapper.LuongMonotonicAttention, sigmoid_noise=1.0, @@ -695,6 +714,24 @@ class AttentionWrapperTest(test.TestCase): expected_final_alignment_history=expected_final_alignment_history, name='testMultiAttention') + def testLuongMonotonicHard(self): + # Run attention mechanism with mode='hard', make sure probabilities are hard + b, t, u, d = 10, 20, 30, 40 + with self.test_session(use_gpu=True) as sess: + a = wrapper.LuongMonotonicAttention( + d, + random_ops.random_normal((b, t, u)), + mode='hard') + # Just feed previous attention as [1, 0, 0, ...] + attn = a(random_ops.random_normal((b, d)), array_ops.one_hot([0]*b, t)) + sess.run(variables.global_variables_initializer()) + attn_out = attn.eval() + # All values should be 0 or 1 + self.assertTrue(np.all(np.logical_or(attn_out == 0, attn_out == 1))) + # Sum of distributions should be 0 or 1 (0 when all p_choose_i are 0) + self.assertTrue(np.all(np.logical_or(attn_out.sum(axis=1) == 1, + attn_out.sum(axis=1) == 0))) + def testMultiAttentionNoAttentionLayer(self): create_attention_mechanisms = ( wrapper.BahdanauAttention, wrapper.LuongAttention) diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index 839df079ee..87230e3355 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -679,7 +679,11 @@ def _monotonic_probability_fn(score, previous_alignments, sigmoid_noise, mode, seed=seed) score += sigmoid_noise*noise # Compute "choosing" probabilities from the attention scores - p_choose_i = math_ops.sigmoid(score) + if mode == "hard": + # When mode is hard, use a hard sigmoid + p_choose_i = math_ops.cast(score > 0, score.dtype) + else: + p_choose_i = math_ops.sigmoid(score) # Convert from choosing probabilities to attention distribution return monotonic_attention(p_choose_i, previous_alignments, mode) -- GitLab From fff9b90a3f081b1dd0ca8ce5785f0e67c3557cce Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 7 Nov 2017 22:02:42 -0800 Subject: [PATCH 0035/1801] Update nsync version---allow compilation with bazel on x86_32 The new version of nsync has a BUILD file that detects x86_32 (which bazel currently calls piii). PiperOrigin-RevId: 174959924 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 3081a8d1dc..dfe332b091 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -442,11 +442,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): native.http_archive( name = "nsync", urls = [ - "https://mirror.bazel.build/github.com/google/nsync/archive/4fc8ff3e7626c5f24bc9674438d8257f0ffc226c.tar.gz", - # "https://github.com/google/nsync/archive/4fc8ff3e7626c5f24bc9674438d8257f0ffc226c.tar.gz", + "https://mirror.bazel.build/github.com/google/nsync/archive/93815892dddafe9146a5f7e7042281d59d0f4323.tar.gz", + # "https://github.com/google/nsync/archive/93815892dddafe9146a5f7e7042281d59d0f4323.tar.gz", ], - sha256 = "ffbbe828f3d0bef75462e34801de5cea31d10aa63eaa42a4ed74c46521bdfd58", - strip_prefix = "nsync-4fc8ff3e7626c5f24bc9674438d8257f0ffc226c", + sha256 = "e3bd4555415ace511338fc27e595351738eea4e9006f1612b76c82914770716b", + strip_prefix = "nsync-93815892dddafe9146a5f7e7042281d59d0f4323", ) native.http_archive( -- GitLab From f0b1e65b0ac9e587c117485a96a0eaf40675c518 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 7 Nov 2017 22:33:54 -0800 Subject: [PATCH 0036/1801] Automated g4 rollback of changelist 174912490 PiperOrigin-RevId: 174961746 --- .../contrib/cmake/tf_core_kernels.cmake | 1 - tensorflow/contrib/cmake/tf_core_ops.cmake | 1 - tensorflow/contrib/cmake/tf_python.cmake | 2 - tensorflow/contrib/data/BUILD | 13 +- tensorflow/contrib/data/__init__.py | 2 +- tensorflow/contrib/data/ops/dataset_ops.cc | 232 --------- .../python/kernel_tests/iterator_ops_test.py | 2 +- .../kernel_tests/range_dataset_op_test.py | 2 +- .../kernel_tests/reader_dataset_ops_test.py | 2 +- tensorflow/contrib/data/python/ops/BUILD | 40 +- .../contrib/data/python/ops/batching.py | 2 +- .../contrib/data/python/ops/dataset_ops.py | 8 +- .../contrib/data/python/ops/error_ops.py | 2 +- .../contrib/data/python/ops/grouping.py | 2 +- .../contrib/data/python/ops/interleave_ops.py | 2 +- .../contrib/data/python/ops/iterator_ops.py | 2 +- tensorflow/contrib/data/python/ops/readers.py | 2 +- .../contrib/data/python/ops/scan_ops.py | 2 +- .../core/ops/compat/ops_history.v1.pbtxt | 452 ++++++++++++++++++ tensorflow/core/ops/dataset_ops.cc | 197 ++++++++ .../python/kernel_tests/iterator_ops_test.py | 62 +++ .../kernel_tests/range_dataset_op_test.py | 330 +++++++++++++ .../kernel_tests/reader_dataset_ops_test.py | 298 ++++++++++++ 23 files changed, 1366 insertions(+), 292 deletions(-) delete mode 100644 tensorflow/contrib/data/ops/dataset_ops.cc diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake index 5b62598aa5..f978c8ccd5 100644 --- a/tensorflow/contrib/cmake/tf_core_kernels.cmake +++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake @@ -70,7 +70,6 @@ if(tensorflow_BUILD_CONTRIB_KERNELS) "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/kernels/cudnn_rnn_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/ops/cudnn_rnn_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/data/kernels/prefetching_kernels.cc" - "${tensorflow_source_dir}/tensorflow/contrib/data/ops/dataset_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/data/ops/prefetching_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/factorization/kernels/clustering_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/factorization/kernels/masked_matmul_ops.cc" diff --git a/tensorflow/contrib/cmake/tf_core_ops.cmake b/tensorflow/contrib/cmake/tf_core_ops.cmake index 03c168795c..4a61ed7a35 100644 --- a/tensorflow/contrib/cmake/tf_core_ops.cmake +++ b/tensorflow/contrib/cmake/tf_core_ops.cmake @@ -81,7 +81,6 @@ GENERATE_CONTRIB_OP_LIBRARY(boosted_trees_prediction "${tensorflow_source_dir}/t GENERATE_CONTRIB_OP_LIBRARY(boosted_trees_quantiles "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(boosted_trees_stats_accumulator "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/ops/stats_accumulator_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(cudnn_rnn "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/ops/cudnn_rnn_ops.cc") -GENERATE_CONTRIB_OP_LIBRARY(data_dataset "${tensorflow_source_dir}/tensorflow/contrib/data/ops/dataset_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(data_prefetching "${tensorflow_source_dir}/tensorflow/contrib/data/ops/prefetching_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(factorization_clustering "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/clustering_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(factorization_factorization "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/factorization_ops.cc") diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index a14b733158..7636e9ba6e 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -776,8 +776,6 @@ GENERATE_PYTHON_OP_LIB("contrib_boosted_trees_stats_accumulator_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/boosted_trees/python/ops/gen_stats_accumulator_ops.py) GENERATE_PYTHON_OP_LIB("contrib_cudnn_rnn_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/cudnn_rnn/ops/gen_cudnn_rnn_ops.py) -GENERATE_PYTHON_OP_LIB("contrib_data_dataset_ops" - DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/data/python/ops/gen_dataset_ops.py) GENERATE_PYTHON_OP_LIB("contrib_data_prefetching_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/data/python/ops/gen_prefetching_ops.py) GENERATE_PYTHON_OP_LIB("contrib_factorization_clustering_ops" diff --git a/tensorflow/contrib/data/BUILD b/tensorflow/contrib/data/BUILD index 7bcf5a5f4d..eaede0e00e 100644 --- a/tensorflow/contrib/data/BUILD +++ b/tensorflow/contrib/data/BUILD @@ -35,19 +35,8 @@ tf_custom_op_library( ], ) -# TODO(mrry): Move the kernels out of the core library into this library. -tf_custom_op_library( - name = "_dataset_ops.so", - srcs = [ - "ops/dataset_ops.cc", - ], -) - tf_gen_op_libs( - op_lib_names = [ - "dataset_ops", - "prefetching_ops", - ], + op_lib_names = ["prefetching_ops"], ) filegroup( diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index 0c7e793689..824ac4298f 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -41,8 +41,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function - # pylint: disable=unused-import + from tensorflow.contrib.data.python.ops.batching import batch_and_drop_remainder from tensorflow.contrib.data.python.ops.batching import dense_to_sparse_batch from tensorflow.contrib.data.python.ops.batching import unbatch diff --git a/tensorflow/contrib/data/ops/dataset_ops.cc b/tensorflow/contrib/data/ops/dataset_ops.cc deleted file mode 100644 index 1574384cb2..0000000000 --- a/tensorflow/contrib/data/ops/dataset_ops.cc +++ /dev/null @@ -1,232 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/core/framework/common_shape_fns.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/op_def_builder.h" -#include "tensorflow/core/framework/shape_inference.h" - -namespace tensorflow { - -// -------------------------------------------------------------------------- - -// The ops in this section can be composed to define an input -// pipeline. Each op produces a DT_VARIANT tensor that represents -// a DAG of "dataset" objects. An "dataset" object can be converted -// to a stateful "iterator" by passing the "dataset" to the -// "MakeIterator" op. -// -// TODO(b/65524810): DT_VARIANT tensors that represent "dataset" objects are -// not presently serializable. To avoid issues with constant folding, ensure -// that any "source dataset" ops (i.e. ops that output a dataset and do not -// take one as input) are marked "stateful". - -REGISTER_OP("IgnoreErrorsDataset") - .Input("input_dataset: variant") - .Output("handle: variant") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that contains the elements of `input_dataset` ignoring errors. -)doc"); - -REGISTER_OP("MapAndBatchDataset") - .Input("input_dataset: variant") - .Input("other_arguments: Targuments") - .Input("batch_size: int64") - .Input("num_parallel_batches: int64") - .Output("handle: variant") - .Attr("f: func") - .Attr("Targuments: list(type) >= 0") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that applies `f` to the outputs of `input_dataset` and then -batches `batch_size` of them. - -Unlike a "MapDataset", which applies `f` sequentially, this dataset invokes up -to `batch_size * num_parallel_batches` copies of `f` in parallel. - -batch_size: A scalar representing the number of elements to accumulate in a - batch. It determines the number of concurrent invocations of `f` that process - elements from `input_dataset` in parallel. -num_parallel_batches: A scalar representing the number of batches to create in - parallel. Processing multiple batches in parallel benefits workloads prone to - stragglers. -)doc"); - -REGISTER_OP("ScanDataset") - .Input("input_dataset: variant") - .Input("initial_state: Tstate") - .Input("other_arguments: Targuments") - .Output("handle: variant") - .Attr("f: func") - .Attr("Tstate: list(type) >= 1") - .Attr("Targuments: list(type) >= 0") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset successively reduces `f` over the elements of `input_dataset`. -)doc"); - -REGISTER_OP("ParallelInterleaveDataset") - .Input("input_dataset: variant") - .Input("other_arguments: Targuments") - .Input("cycle_length: int64") - .Input("block_length: int64") - .Input("sloppy: bool") - .Output("handle: variant") - .Attr("f: func") - .Attr("Targuments: list(type) >= 0") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that applies `f` to the outputs of `input_dataset`. - -The resulting dataset is similar to the `InterleaveDataset`, with the exception -that if retrieving the next value from a dataset would cause the requester to -block, it will skip that input dataset. This dataset is especially useful -when loading data from a variable-latency datastores (e.g. HDFS, GCS), as it -allows the training step to proceed so long as some data is available. - -!! WARNING !! This dataset is not deterministic! - -f: A function mapping elements of `input_dataset`, concatenated with - `other_arguments`, to a Dataset variant that contains elements matching - `output_types` and `output_shapes`. -)doc"); - -REGISTER_OP("GroupByWindowDataset") - .Input("input_dataset: variant") - .Input("key_func_other_arguments: Tkey_func_other_arguments") - .Input("reduce_func_other_arguments: Treduce_func_other_arguments") - .Input( - "window_size_func_other_arguments: Twindow_size_func_other_arguments") - .Output("handle: variant") - .Attr("key_func: func") - .Attr("reduce_func: func") - .Attr("window_size_func: func") - .Attr("Tkey_func_other_arguments: list(type) >= 0") - .Attr("Treduce_func_other_arguments: list(type) >= 0") - .Attr("Twindow_size_func_other_arguments: list(type) >= 0") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that computes a windowed group-by on `input_dataset`. - -// TODO(mrry): Support non-int64 keys. - -key_func: A function mapping an element of `input_dataset`, concatenated - with `key_func_other_arguments` to a scalar value of type DT_INT64. -)doc"); - -REGISTER_OP("DenseToSparseBatchDataset") - .Input("input_dataset: variant") - .Input("batch_size: int64") - .Input("row_shape: int64") - .Output("handle: variant") - // NOTE(mrry): the 0th and 2nd elements will be DT_INT64. - .Attr("output_types: list(type) >= 1") - // NOTE(mrry): the 1st and 2nd elements will be vectors. - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that yields a SparseTensor for each element of the input. - -input_dataset: A handle to an input dataset. Must have a single component. -batch_size: A scalar representing the number of elements to accumulate in a - batch. -row_shape: A vector representing the dense shape of each row in the produced - SparseTensor. The shape may be partially specified, using `-1` to indicate - that a particular dimension should use the maximum size of all batch elements. -)doc"); - -REGISTER_OP("SqlDataset") - .Input("driver_name: string") - .Input("data_source_name: string") - .Input("query: string") - .Output("handle: variant") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetIsStateful() // TODO(b/65524810): Source dataset ops must be marked - // stateful to inhibit constant folding. - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that executes a SQL query and emits rows of the result set. - -driver_name: The database type. Currently, the only supported type is 'sqlite'. -data_source_name: A connection string to connect to the database. -query: A SQL query to execute. -)doc"); - -REGISTER_OP("DatasetToSingleElement") - .Input("dataset: variant") - .Output("components: output_types") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn([](shape_inference::InferenceContext* c) { - shape_inference::ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused)); - std::vector output_shapes; - TF_RETURN_IF_ERROR(c->GetAttr("output_shapes", &output_shapes)); - if (output_shapes.size() != c->num_outputs()) { - return errors::InvalidArgument( - "`output_shapes` must be the same length as `output_types` (", - output_shapes.size(), " vs. ", c->num_outputs()); - } - for (size_t i = 0; i < output_shapes.size(); ++i) { - shape_inference::ShapeHandle output_shape_handle; - TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape( - output_shapes[i], &output_shape_handle)); - c->set_output(static_cast(i), output_shape_handle); - } - return Status::OK(); - }) - .Doc(R"doc( -Outputs the single element from the given dataset. - -dataset: A handle to a dataset that contains a single element. -components: The components of the single element of `input`. -)doc"); - -REGISTER_OP("SerializeIterator") - .Input("resource_handle: resource") - .Output("serialized: variant") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Converts the given `resource_handle` representing an iterator to a variant tensor. - -resource_handle: A handle to an iterator resource. -serialized: A variant tensor storing the state of the iterator contained in the - resource. -)doc"); - -REGISTER_OP("DeserializeIterator") - .Input("resource_handle: resource") - .Input("serialized: variant") - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R"doc( -Converts the given variant tensor to an iterator and stores it in the given resource. - -resource_handle: A handle to an iterator resource. -serialized: A variant tensor storing the state of the iterator contained in the - resource. -)doc"); - -} // namespace tensorflow diff --git a/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py index 271d80a54b..bda9a2a4a3 100644 --- a/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py @@ -21,7 +21,6 @@ import os import numpy as np from tensorflow.contrib.data.python.ops import dataset_ops -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.contrib.data.python.ops import readers from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session @@ -34,6 +33,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import functional_ops +from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import io_ops from tensorflow.python.ops import math_ops diff --git a/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py index 329dc80ba5..f59ac760dc 100644 --- a/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py @@ -21,7 +21,6 @@ import os from tensorflow.contrib.data.python.ops import dataset_ops from tensorflow.contrib.data.python.ops import enumerate_ops -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops from tensorflow.python.data.ops import iterator_ops from tensorflow.python.framework import constant_op @@ -30,6 +29,7 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import io_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import variables diff --git a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py index 8033f1d388..3ae8f71d77 100644 --- a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py @@ -21,7 +21,6 @@ import gzip import os import zlib -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops from tensorflow.contrib.data.python.ops import readers from tensorflow.core.example import example_pb2 @@ -34,6 +33,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.lib.io import python_io from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import io_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.platform import test diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index 727c5d1c38..1b81cf5be9 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -11,6 +11,20 @@ load( ) load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library") +py_library( + name = "dataset_ops", + srcs = [ + "dataset_ops.py", + ], + srcs_version = "PY2AND3", + deps = [ + ":transformation_ops", + "//tensorflow/python:util", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", + ], +) + py_library( name = "iterator_ops", srcs = [ @@ -59,7 +73,6 @@ py_library( ], srcs_version = "PY2AND3", deps = [ - ":gen_dataset_ops", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", "//tensorflow/python:dataset_ops_gen", @@ -115,31 +128,6 @@ tf_custom_op_py_library( ], ) -tf_gen_op_wrapper_py( - name = "gen_dataset_ops", - out = "gen_dataset_ops.py", - deps = ["//tensorflow/contrib/data:dataset_ops_op_lib"], -) - -tf_custom_op_py_library( - name = "dataset_ops", - srcs = ["dataset_ops.py"], - dso = ["//tensorflow/contrib/data:_dataset_ops.so"], - kernels = [ - "//tensorflow/contrib/data:dataset_ops_op_lib", - ], - srcs_version = "PY2AND3", - deps = [ - ":gen_dataset_ops", - ":transformation_ops", - "//tensorflow/contrib/util:util_py", - "//tensorflow/python:platform", - "//tensorflow/python:util", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/util:nest", - ], -) - filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py index e6e5f716b6..abc9212a87 100644 --- a/tensorflow/contrib/data/python/ops/batching.py +++ b/tensorflow/contrib/data/python/ops/batching.py @@ -17,7 +17,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.framework import dtypes @@ -25,6 +24,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import math_ops diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py index c4c4426809..45d6dbe743 100644 --- a/tensorflow/contrib/data/python/ops/dataset_ops.py +++ b/tensorflow/contrib/data/python/ops/dataset_ops.py @@ -20,21 +20,15 @@ from __future__ import print_function from tensorflow.contrib.data.python.ops import batching from tensorflow.contrib.data.python.ops import enumerate_ops from tensorflow.contrib.data.python.ops import error_ops -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.contrib.data.python.ops import grouping -from tensorflow.contrib.util import loader from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest +from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import gen_io_ops -from tensorflow.python.platform import resource_loader from tensorflow.python.util import deprecation -_dataset_ops = loader.load_op_library( - resource_loader.get_path_to_datafile("../../_dataset_ops.so")) - - class Dataset(dataset_ops.Dataset): """Represents a potentially large set of elements. diff --git a/tensorflow/contrib/data/python/ops/error_ops.py b/tensorflow/contrib/data/python/ops/error_ops.py index 51a2791072..238bb52b02 100644 --- a/tensorflow/contrib/data/python/ops/error_ops.py +++ b/tensorflow/contrib/data/python/ops/error_ops.py @@ -17,9 +17,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest +from tensorflow.python.ops import gen_dataset_ops def ignore_errors(): diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py index 1c7c94b3c8..6df7b22fb6 100644 --- a/tensorflow/contrib/data/python/ops/grouping.py +++ b/tensorflow/contrib/data/python/ops/grouping.py @@ -17,12 +17,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops +from tensorflow.python.ops import gen_dataset_ops def group_by_window(key_func, diff --git a/tensorflow/contrib/data/python/ops/interleave_ops.py b/tensorflow/contrib/data/python/ops/interleave_ops.py index ce23e95697..74a919c1ff 100644 --- a/tensorflow/contrib/data/python/ops/interleave_ops.py +++ b/tensorflow/contrib/data/python/ops/interleave_ops.py @@ -17,12 +17,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops +from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.util import deprecation diff --git a/tensorflow/contrib/data/python/ops/iterator_ops.py b/tensorflow/contrib/data/python/ops/iterator_ops.py index 32d2f42c93..d736029fb0 100644 --- a/tensorflow/contrib/data/python/ops/iterator_ops.py +++ b/tensorflow/contrib/data/python/ops/iterator_ops.py @@ -17,8 +17,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.framework import ops +from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.training import saver diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py index f22298b757..2e1c3153ca 100644 --- a/tensorflow/contrib/data/python/ops/readers.py +++ b/tensorflow/contrib/data/python/ops/readers.py @@ -18,7 +18,6 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.data.python.ops import dataset_ops as contrib_dataset_ops -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import readers from tensorflow.python.data.util import nest @@ -26,6 +25,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.platform import gfile from tensorflow.python.util import deprecation diff --git a/tensorflow/contrib/data/python/ops/scan_ops.py b/tensorflow/contrib/data/python/ops/scan_ops.py index 87bbbb7d19..5acaed48a3 100644 --- a/tensorflow/contrib/data/python/ops/scan_ops.py +++ b/tensorflow/contrib/data/python/ops/scan_ops.py @@ -19,11 +19,11 @@ from __future__ import print_function import collections -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.framework import function from tensorflow.python.framework import ops +from tensorflow.python.ops import gen_dataset_ops class _ScanDataset(dataset_ops.Dataset): diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 8b8251f84b..a4b5ca16af 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -8270,6 +8270,29 @@ op { } } } +op { + name: "DatasetToSingleElement" + input_arg { + name: "dataset" + type: DT_VARIANT + } + output_arg { + name: "components" + type_list_attr: "output_types" + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "DebugGradientIdentity" input_arg { @@ -9248,6 +9271,69 @@ op { } } } +op { + name: "DenseToSparseBatchDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "batch_size" + type: DT_INT64 + } + input_arg { + name: "row_shape" + type: DT_INT64 + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + is_stateful: true +} +op { + name: "DenseToSparseBatchDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "batch_size" + type: DT_INT64 + } + input_arg { + name: "row_shape" + type: DT_INT64 + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "DenseToSparseSetOperation" input_arg { @@ -9741,6 +9827,18 @@ op { } } } +op { + name: "DeserializeIterator" + input_arg { + name: "resource_handle" + type: DT_RESOURCE + } + input_arg { + name: "serialized" + type: DT_VARIANT + } + is_stateful: true +} op { name: "DeserializeManySparse" input_arg { @@ -13494,6 +13592,131 @@ op { } } } +op { + name: "GroupByWindowDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "key_func_other_arguments" + type_list_attr: "Tkey_func_other_arguments" + } + input_arg { + name: "reduce_func_other_arguments" + type_list_attr: "Treduce_func_other_arguments" + } + input_arg { + name: "window_size_func_other_arguments" + type_list_attr: "Twindow_size_func_other_arguments" + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "key_func" + type: "func" + } + attr { + name: "reduce_func" + type: "func" + } + attr { + name: "window_size_func" + type: "func" + } + attr { + name: "Tkey_func_other_arguments" + type: "list(type)" + has_minimum: true + } + attr { + name: "Treduce_func_other_arguments" + type: "list(type)" + has_minimum: true + } + attr { + name: "Twindow_size_func_other_arguments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + is_stateful: true +} +op { + name: "GroupByWindowDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "key_func_other_arguments" + type_list_attr: "Tkey_func_other_arguments" + } + input_arg { + name: "reduce_func_other_arguments" + type_list_attr: "Treduce_func_other_arguments" + } + input_arg { + name: "window_size_func_other_arguments" + type_list_attr: "Twindow_size_func_other_arguments" + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "key_func" + type: "func" + } + attr { + name: "reduce_func" + type: "func" + } + attr { + name: "window_size_func" + type: "func" + } + attr { + name: "Tkey_func_other_arguments" + type: "list(type)" + has_minimum: true + } + attr { + name: "Treduce_func_other_arguments" + type: "list(type)" + has_minimum: true + } + attr { + name: "Twindow_size_func_other_arguments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "HSVToRGB" input_arg { @@ -13914,6 +14137,53 @@ op { } } } +op { + name: "IgnoreErrorsDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + is_stateful: true +} +op { + name: "IgnoreErrorsDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "Imag" input_arg { @@ -15818,6 +16088,50 @@ op { } is_stateful: true } +op { + name: "MapAndBatchDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + input_arg { + name: "batch_size" + type: DT_INT64 + } + input_arg { + name: "num_parallel_batches" + type: DT_INT64 + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "MapClear" attr { @@ -20556,6 +20870,54 @@ op { type: "type" } } +op { + name: "ParallelInterleaveDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + input_arg { + name: "cycle_length" + type: DT_INT64 + } + input_arg { + name: "block_length" + type: DT_INT64 + } + input_arg { + name: "sloppy" + type: DT_BOOL + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "ParallelMapDataset" input_arg { @@ -30146,6 +30508,52 @@ op { } } } +op { + name: "ScanDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "initial_state" + type_list_attr: "Tstate" + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + } + attr { + name: "Tstate" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "ScatterAdd" input_arg { @@ -31861,6 +32269,18 @@ op { } } } +op { + name: "SerializeIterator" + input_arg { + name: "resource_handle" + type: DT_RESOURCE + } + output_arg { + name: "serialized" + type: DT_VARIANT + } + is_stateful: true +} op { name: "SerializeManySparse" input_arg { @@ -37265,6 +37685,38 @@ op { } } } +op { + name: "SqlDataset" + input_arg { + name: "driver_name" + type: DT_STRING + } + input_arg { + name: "data_source_name" + type: DT_STRING + } + input_arg { + name: "query" + type: DT_STRING + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + is_stateful: true +} op { name: "Sqrt" input_arg { diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index 8f5d8308a3..f512213964 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -141,6 +141,16 @@ count: A scalar representing the number of elements from the `input_dataset` that should be skipped. If count is -1, skips everything. )doc"); +REGISTER_OP("IgnoreErrorsDataset") + .Input("input_dataset: variant") + .Output("handle: variant") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that contains the elements of `input_dataset` ignoring errors. +)doc"); + REGISTER_OP("MapDataset") .Input("input_dataset: variant") .Input("other_arguments: Targuments") @@ -174,6 +184,32 @@ num_parallel_calls: The number of concurrent invocations of `f` that process elements from `input_dataset` in parallel. )doc"); +REGISTER_OP("MapAndBatchDataset") + .Input("input_dataset: variant") + .Input("other_arguments: Targuments") + .Input("batch_size: int64") + .Input("num_parallel_batches: int64") + .Output("handle: variant") + .Attr("f: func") + .Attr("Targuments: list(type) >= 0") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that applies `f` to the outputs of `input_dataset` and then +batches `batch_size` of them. + +Unlike a "MapDataset", which applies `f` sequentially, this dataset invokes up +to `batch_size * num_parallel_batches` copies of `f` in parallel. + +batch_size: A scalar representing the number of elements to accumulate in a + batch. It determines the number of concurrent invocations of `f` that process + elements from `input_dataset` in parallel. +num_parallel_batches: A scalar representing the number of batches to create in + parallel. Processing multiple batches in parallel benefits workloads prone to + stragglers. +)doc"); + REGISTER_OP("PrefetchDataset") .Input("input_dataset: variant") .Input("buffer_size: int64") @@ -188,6 +224,21 @@ buffer_size: The maximum number of elements to buffer in an iterator over this dataset. )doc"); +REGISTER_OP("ScanDataset") + .Input("input_dataset: variant") + .Input("initial_state: Tstate") + .Input("other_arguments: Targuments") + .Output("handle: variant") + .Attr("f: func") + .Attr("Tstate: list(type) >= 1") + .Attr("Targuments: list(type) >= 0") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset successively reduces `f` over the elements of `input_dataset`. +)doc"); + REGISTER_OP("FlatMapDataset") .Input("input_dataset: variant") .Input("other_arguments: Targuments") @@ -234,6 +285,59 @@ f: A function mapping elements of `input_dataset`, concatenated with `output_types` and `output_shapes`. )doc"); +REGISTER_OP("ParallelInterleaveDataset") + .Input("input_dataset: variant") + .Input("other_arguments: Targuments") + .Input("cycle_length: int64") + .Input("block_length: int64") + .Input("sloppy: bool") + .Output("handle: variant") + .Attr("f: func") + .Attr("Targuments: list(type) >= 0") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that applies `f` to the outputs of `input_dataset`. + +The resulting dataset is similar to the `InterleaveDataset`, with the exception +that if retrieving the next value from a dataset would cause the requester to +block, it will skip that input dataset. This dataset is especially useful +when loading data from a variable-latency datastores (e.g. HDFS, GCS), as it +allows the training step to proceed so long as some data is available. + +!! WARNING !! This dataset is not deterministic! + +f: A function mapping elements of `input_dataset`, concatenated with + `other_arguments`, to a Dataset variant that contains elements matching + `output_types` and `output_shapes`. +)doc"); + +REGISTER_OP("GroupByWindowDataset") + .Input("input_dataset: variant") + .Input("key_func_other_arguments: Tkey_func_other_arguments") + .Input("reduce_func_other_arguments: Treduce_func_other_arguments") + .Input( + "window_size_func_other_arguments: Twindow_size_func_other_arguments") + .Output("handle: variant") + .Attr("key_func: func") + .Attr("reduce_func: func") + .Attr("window_size_func: func") + .Attr("Tkey_func_other_arguments: list(type) >= 0") + .Attr("Treduce_func_other_arguments: list(type) >= 0") + .Attr("Twindow_size_func_other_arguments: list(type) >= 0") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that computes a windowed group-by on `input_dataset`. + +// TODO(mrry): Support non-int64 keys. + +key_func: A function mapping an element of `input_dataset`, concatenated + with `key_func_other_arguments` to a scalar value of type DT_INT64. +)doc"); + REGISTER_OP("FilterDataset") .Input("input_dataset: variant") .Input("other_arguments: Targuments") @@ -304,6 +408,27 @@ padding_values: A list of scalars containing the padding value to use for each of the outputs. )doc"); +REGISTER_OP("DenseToSparseBatchDataset") + .Input("input_dataset: variant") + .Input("batch_size: int64") + .Input("row_shape: int64") + .Output("handle: variant") + // NOTE(mrry): the 0th and 2nd elements will be DT_INT64. + .Attr("output_types: list(type) >= 1") + // NOTE(mrry): the 1st and 2nd elements will be vectors. + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that yields a SparseTensor for each element of the input. + +input_dataset: A handle to an input dataset. Must have a single component. +batch_size: A scalar representing the number of elements to accumulate in a + batch. +row_shape: A vector representing the dense shape of each row in the produced + SparseTensor. The shape may be partially specified, using `-1` to indicate + that a particular dimension should use the maximum size of all batch elements. +)doc"); + REGISTER_OP("RangeDataset") .Input("start: int64") .Input("stop: int64") @@ -389,6 +514,24 @@ compression_type: A scalar containing either (i) the empty string (no buffer_size: A scalar containing the number of bytes to buffer. )doc"); +REGISTER_OP("SqlDataset") + .Input("driver_name: string") + .Input("data_source_name: string") + .Input("query: string") + .Output("handle: variant") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetIsStateful() // TODO(b/65524810): Source dataset ops must be marked + // stateful to inhibit constant folding. + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that executes a SQL query and emits rows of the result set. + +driver_name: The database type. Currently, the only supported type is 'sqlite'. +data_source_name: A connection string to connect to the database. +query: A SQL query to execute. +)doc"); + REGISTER_OP("FixedLengthRecordDataset") .Input("filenames: string") .Input("header_bytes: int64") @@ -519,6 +662,36 @@ REGISTER_OP("IteratorGetNext") Gets the next output from the given iterator. )doc"); +REGISTER_OP("DatasetToSingleElement") + .Input("dataset: variant") + .Output("components: output_types") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn([](shape_inference::InferenceContext* c) { + shape_inference::ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused)); + std::vector output_shapes; + TF_RETURN_IF_ERROR(c->GetAttr("output_shapes", &output_shapes)); + if (output_shapes.size() != c->num_outputs()) { + return errors::InvalidArgument( + "`output_shapes` must be the same length as `output_types` (", + output_shapes.size(), " vs. ", c->num_outputs()); + } + for (size_t i = 0; i < output_shapes.size(); ++i) { + shape_inference::ShapeHandle output_shape_handle; + TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape( + output_shapes[i], &output_shape_handle)); + c->set_output(static_cast(i), output_shape_handle); + } + return Status::OK(); + }) + .Doc(R"doc( +Outputs the single element from the given dataset. + +dataset: A handle to a dataset that contains a single element. +components: The components of the single element of `input`. +)doc"); + REGISTER_OP("IteratorToStringHandle") .Input("resource_handle: resource") .Output("string_handle: string") @@ -547,4 +720,28 @@ output_shapes: If specified, defines the shape of each tuple component in an element produced by the resulting iterator. )doc"); +REGISTER_OP("SerializeIterator") + .Input("resource_handle: resource") + .Output("serialized: variant") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Converts the given `resource_handle` representing an iterator to a variant tensor. + +resource_handle: A handle to an iterator resource. +serialized: A variant tensor storing the state of the iterator contained in the + resource. +)doc"); + +REGISTER_OP("DeserializeIterator") + .Input("resource_handle: resource") + .Input("serialized: variant") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Converts the given variant tensor to an iterator and stores it in the given resource. + +resource_handle: A handle to an iterator resource. +serialized: A variant tensor storing the state of the iterator contained in the + resource. +)doc"); + } // namespace tensorflow diff --git a/tensorflow/python/kernel_tests/iterator_ops_test.py b/tensorflow/python/kernel_tests/iterator_ops_test.py index 60a44b5b14..2128ef4ae1 100644 --- a/tensorflow/python/kernel_tests/iterator_ops_test.py +++ b/tensorflow/python/kernel_tests/iterator_ops_test.py @@ -17,12 +17,14 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os import numpy as np from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import iterator_ops +from tensorflow.python.data.ops import readers from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -31,7 +33,9 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import functional_ops +from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import io_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import script_ops @@ -533,6 +537,64 @@ class IteratorTest(test.TestCase): target_placeholder: "/job:localhost/replica:0/task:0/cpu:0" }) + def testIncorrectIteratorRestore(self): + + def _path(): + return os.path.join(self.get_temp_dir(), "iterator") + + def _save_op(iterator_resource): + iterator_state_variant = gen_dataset_ops.serialize_iterator( + iterator_resource) + save_op = io_ops.write_file( + _path(), parsing_ops.serialize_tensor(iterator_state_variant)) + return save_op + + def _restore_op(iterator_resource): + iterator_state_variant = parsing_ops.parse_tensor( + io_ops.read_file(_path()), dtypes.variant) + restore_op = gen_dataset_ops.deserialize_iterator(iterator_resource, + iterator_state_variant) + return restore_op + + def _build_range_dataset_graph(): + start = 1 + stop = 10 + iterator = dataset_ops.Dataset.range(start, + stop).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + save_op = _save_op(iterator._iterator_resource) + restore_op = _restore_op(iterator._iterator_resource) + return init_op, get_next, save_op, restore_op + + def _build_reader_dataset_graph(): + filenames = ["test"] # Does not exist but we don't care in this test. + iterator = readers.FixedLengthRecordDataset( + filenames, 1, 0, 0).make_initializable_iterator() + init_op = iterator.initializer + get_next_op = iterator.get_next() + save_op = _save_op(iterator._iterator_resource) + restore_op = _restore_op(iterator._iterator_resource) + return init_op, get_next_op, save_op, restore_op + + # Saving iterator for RangeDataset graph. + with ops.Graph().as_default() as g: + init_op, _, save_op, _ = _build_range_dataset_graph() + with self.test_session(graph=g) as sess: + sess.run(init_op) + sess.run(save_op) + + # Attempt to restore the saved iterator into an IteratorResource of + # incompatible type. An iterator of RangeDataset has output type int64, + # while an iterator of FixedLengthRecordDataset has output type string. + # So an InvalidArgumentError should be raised by + # IteratorResource::set_iterator. + with ops.Graph().as_default() as g: + _, _, _, restore_op = _build_reader_dataset_graph() + with self.test_session(graph=g) as sess: + with self.assertRaises(errors.InvalidArgumentError): + sess.run(restore_op) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/range_dataset_op_test.py b/tensorflow/python/kernel_tests/range_dataset_op_test.py index 3c1685c951..0c530522b8 100644 --- a/tensorflow/python/kernel_tests/range_dataset_op_test.py +++ b/tensorflow/python/kernel_tests/range_dataset_op_test.py @@ -17,15 +17,32 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os + from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import iterator_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_dataset_ops +from tensorflow.python.ops import io_ops +from tensorflow.python.ops import parsing_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import gfile from tensorflow.python.platform import test class RangeDatasetTest(test.TestCase): + def tearDown(self): + # Remove all checkpoint files. + prefix = self._iterator_checkpoint_prefix() + pattern = prefix + "*" + files = gfile.Glob(pattern) + map(gfile.Remove, files) + def testStop(self): stop = array_ops.placeholder(dtypes.int64, shape=[]) iterator = dataset_ops.Dataset.range(stop).make_initializable_iterator() @@ -151,6 +168,319 @@ class RangeDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def _iterator_checkpoint_prefix(self): + return os.path.join(self.get_temp_dir(), "iterator") + + def _save_op(self, iterator_resource): + iterator_state_variant = gen_dataset_ops.serialize_iterator( + iterator_resource) + save_op = io_ops.write_file( + self._iterator_checkpoint_prefix(), + parsing_ops.serialize_tensor(iterator_state_variant)) + return save_op + + def _restore_op(self, iterator_resource): + iterator_state_variant = parsing_ops.parse_tensor( + io_ops.read_file(self._iterator_checkpoint_prefix()), dtypes.variant) + restore_op = gen_dataset_ops.deserialize_iterator(iterator_resource, + iterator_state_variant) + return restore_op + + def testSaveRestore(self): + + def _build_graph(start, stop): + iterator = dataset_ops.Dataset.range(start, + stop).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + save_op = self._save_op(iterator._iterator_resource) + restore_op = self._restore_op(iterator._iterator_resource) + return init_op, get_next, save_op, restore_op + + # Saving and restoring in different sessions. + start = 2 + stop = 10 + break_point = 5 + with ops.Graph().as_default() as g: + init_op, get_next, save_op, _ = _build_graph(start, stop) + with self.test_session(graph=g) as sess: + sess.run(variables.global_variables_initializer()) + sess.run(init_op) + for i in range(start, break_point): + self.assertEqual(i, sess.run(get_next)) + sess.run(save_op) + + with ops.Graph().as_default() as g: + init_op, get_next, _, restore_op = _build_graph(start, stop) + with self.test_session(graph=g) as sess: + sess.run(init_op) + sess.run(restore_op) + for i in range(break_point, stop): + self.assertEqual(i, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + # Saving and restoring in same session. + with ops.Graph().as_default() as g: + init_op, get_next, save_op, restore_op = _build_graph(start, stop) + with self.test_session(graph=g) as sess: + sess.run(variables.global_variables_initializer()) + sess.run(init_op) + for i in range(start, break_point): + self.assertEqual(i, sess.run(get_next)) + sess.run(save_op) + sess.run(restore_op) + for i in range(break_point, stop): + self.assertEqual(i, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testRestoreWithoutBuildingDatasetGraph(self): + + def _build_graph(start, stop, num_epochs): + dataset = dataset_ops.Dataset.range(start, stop).repeat(num_epochs) + iterator = dataset.make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + save_op = self._save_op(iterator._iterator_resource) + restore_op = self._restore_op(iterator._iterator_resource) + return init_op, get_next, save_op, restore_op + + # Saving and restoring in different sessions. + start = 2 + stop = 10 + num_epochs = 5 + break_point = 5 + break_epoch = 3 + with ops.Graph().as_default() as g: + init_op, get_next, save_op, _ = _build_graph(start, stop, num_epochs) + with self.test_session(graph=g) as sess: + sess.run(variables.global_variables_initializer()) + sess.run(init_op) + for _ in range(break_epoch): + for i in range(start, stop): + self.assertEqual(i, sess.run(get_next)) + for i in range(start, break_point): + self.assertEqual(i, sess.run(get_next)) + sess.run(save_op) + + with ops.Graph().as_default() as g: + # Create an empty IteratorResource and restore the Iterator into it. + output_types = dtypes.int64 + output_shapes = tensor_shape.scalar() + iterator = iterator_ops.Iterator.from_structure(output_types, + output_shapes) + restore_op = self._restore_op(iterator._iterator_resource) + get_next = iterator.get_next() + with self.test_session(graph=g) as sess: + sess.run(restore_op) + for i in range(break_point, stop): + self.assertEqual(i, sess.run(get_next)) + for _ in range(break_epoch + 1, num_epochs): + for i in range(start, stop): + self.assertEqual(i, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testRestoreInModifiedGraph(self): + + def _build_graph(start, stop): + dataset = dataset_ops.Dataset.range(start, stop) + iterator = dataset.make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + save_op = self._save_op(iterator._iterator_resource) + restore_op = self._restore_op(iterator._iterator_resource) + return init_op, get_next, save_op, restore_op + + # Saving and restoring in different sessions. + start = 2 + stop = 10 + stop_1 = 8 + break_point = 5 + with ops.Graph().as_default() as g: + init_op, get_next, save_op, _ = _build_graph(start, stop) + with self.test_session(graph=g) as sess: + sess.run(variables.global_variables_initializer()) + sess.run(init_op) + for i in range(start, break_point): + self.assertEqual(i, sess.run(get_next)) + sess.run(save_op) + + with ops.Graph().as_default() as g: + # Intentionally build a graph with a different value for stop to make sure + # the original dataset graph is actually getting loaded. + init_op, get_next, _, restore_op = _build_graph(start, stop_1) + with self.test_session(graph=g) as sess: + sess.run(restore_op) + for i in range(break_point, stop): + self.assertEqual(i, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testInitThenRestore(self): + # Note: Calling init_op before restore_op is redundant. This test just makes + # sure we do not fail if restore is called on an already initialized + # iterator resource. + + def _build_graph(start, stop): + dataset = dataset_ops.Dataset.range(start, stop) + iterator = dataset.make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + save_op = self._save_op(iterator._iterator_resource) + restore_op = self._restore_op(iterator._iterator_resource) + return init_op, get_next, save_op, restore_op + + # Saving and restoring in different sessions. + start = 2 + stop = 10 + break_point = 5 + with ops.Graph().as_default() as g: + init_op, get_next, save_op, _ = _build_graph(start, stop) + with self.test_session(graph=g) as sess: + sess.run(variables.global_variables_initializer()) + sess.run(init_op) + for i in range(start, break_point): + self.assertEqual(i, sess.run(get_next)) + sess.run(save_op) + + with ops.Graph().as_default() as g: + init_op, get_next, _, restore_op = _build_graph(start, stop) + with self.test_session(graph=g) as sess: + sess.run(init_op) + sess.run(restore_op) + for i in range(break_point, stop): + self.assertEqual(i, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testMultipleSaves(self): + + def _build_graph(start, stop): + iterator = dataset_ops.Dataset.range(start, + stop).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + save_op = self._save_op(iterator._iterator_resource) + restore_op = self._restore_op(iterator._iterator_resource) + return init_op, get_next, save_op, restore_op + + start = 2 + stop = 10 + break_point1 = 5 + break_point2 = 7 + + with ops.Graph().as_default() as g: + init_op, get_next, save_op, _ = _build_graph(start, stop) + with self.test_session(graph=g) as sess: + sess.run(variables.global_variables_initializer()) + sess.run(init_op) + for i in range(start, break_point1): + self.assertEqual(i, sess.run(get_next)) + sess.run(save_op) + + with ops.Graph().as_default() as g: + init_op, get_next, save_op, restore_op = _build_graph(start, stop) + with self.test_session(graph=g) as sess: + sess.run(restore_op) + for i in range(break_point1, break_point2): + self.assertEqual(i, sess.run(get_next)) + sess.run(save_op) + + break_point2 = 7 + with ops.Graph().as_default() as g: + init_op, get_next, save_op, restore_op = _build_graph(start, stop) + with self.test_session(graph=g) as sess: + sess.run(restore_op) + for i in range(break_point2, stop): + self.assertEqual(i, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testSaveRestoreWithRepeat(self): + + def _build_graph(start, stop, num_epochs): + iterator = dataset_ops.Dataset.range( + start, stop).repeat(num_epochs).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + save_op = self._save_op(iterator._iterator_resource) + restore_op = self._restore_op(iterator._iterator_resource) + return init_op, get_next, save_op, restore_op + + start = 2 + stop = 10 + num_epochs = 5 + break_range = 5 + break_epoch = 3 + with ops.Graph().as_default() as g: + init_op, get_next, save_op, restore_op = _build_graph( + start, stop, num_epochs) + with self.test_session(graph=g) as sess: + sess.run(variables.global_variables_initializer()) + sess.run(init_op) + # Note: There is no checkpoint saved currently so a NotFoundError is + # raised. + with self.assertRaises(errors.NotFoundError): + sess.run(restore_op) + for _ in range(break_epoch - 1): + for i in range(start, stop): + self.assertEqual(i, sess.run(get_next)) + for i in range(start, break_range): + self.assertEqual(i, sess.run(get_next)) + sess.run(save_op) + + with ops.Graph().as_default() as g: + init_op, get_next, _, restore_op = _build_graph(start, stop, num_epochs) + with self.test_session(graph=g) as sess: + sess.run(restore_op) + for i in range(break_range, stop): + self.assertEqual(i, sess.run(get_next)) + for _ in range(break_epoch, num_epochs): + for i in range(start, stop): + self.assertEqual(i, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testSaveRestoreExhaustedIterator(self): + + def _build_graph(start, stop, num_epochs): + iterator = dataset_ops.Dataset.range( + start, stop).repeat(num_epochs).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + save_op = self._save_op(iterator._iterator_resource) + restore_op = self._restore_op(iterator._iterator_resource) + return init_op, get_next, save_op, restore_op + + start = 2 + stop = 10 + num_epochs = 5 + with ops.Graph().as_default() as g: + init_op, get_next, save_op, restore_op = _build_graph( + start, stop, num_epochs) + with self.test_session(graph=g) as sess: + sess.run(variables.global_variables_initializer()) + sess.run(init_op) + # Note: There is no checkpoint saved currently so a NotFoundError is + # raised. + with self.assertRaises(errors.NotFoundError): + sess.run(restore_op) + for _ in range(num_epochs): + for i in range(start, stop): + self.assertEqual(i, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + sess.run(save_op) + + with ops.Graph().as_default() as g: + init_op, get_next, _, restore_op = _build_graph(start, stop, num_epochs) + with self.test_session(graph=g) as sess: + sess.run(restore_op) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/reader_dataset_ops_test.py b/tensorflow/python/kernel_tests/reader_dataset_ops_test.py index 70b6ce442e..c8e7333b4b 100644 --- a/tensorflow/python/kernel_tests/reader_dataset_ops_test.py +++ b/tensorflow/python/kernel_tests/reader_dataset_ops_test.py @@ -26,8 +26,13 @@ from tensorflow.python.data.ops import readers from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape from tensorflow.python.lib.io import python_io from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_dataset_ops +from tensorflow.python.ops import io_ops +from tensorflow.python.ops import parsing_ops from tensorflow.python.platform import test from tensorflow.python.util import compat @@ -267,6 +272,299 @@ class FixedLengthRecordReaderTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(iterator.get_next()) + def _iterator_checkpoint_path(self): + return os.path.join(self.get_temp_dir(), "iterator") + + def _save_op(self, iterator_resource): + iterator_state_variant = gen_dataset_ops.serialize_iterator( + iterator_resource) + save_op = io_ops.write_file( + self._iterator_checkpoint_path(), + parsing_ops.serialize_tensor(iterator_state_variant)) + return save_op + + def _restore_op(self, iterator_resource): + iterator_state_variant = parsing_ops.parse_tensor( + io_ops.read_file(self._iterator_checkpoint_path()), dtypes.variant) + restore_op = gen_dataset_ops.deserialize_iterator(iterator_resource, + iterator_state_variant) + return restore_op + + def _build_iterator_graph(self, num_epochs): + filenames = self._createFiles() + dataset = (readers.FixedLengthRecordDataset( + filenames, self._record_bytes, self._header_bytes, self._footer_bytes) + .repeat(num_epochs)) + iterator = dataset.make_initializable_iterator() + init_op = iterator.initializer + get_next_op = iterator.get_next() + save_op = self._save_op(iterator._iterator_resource) + restore_op = self._restore_op(iterator._iterator_resource) + return init_op, get_next_op, save_op, restore_op + + def _restore_iterator(self): + output_types = dtypes.string + output_shapes = tensor_shape.scalar() + iterator = iterator_ops.Iterator.from_structure(output_types, output_shapes) + get_next = iterator.get_next() + restore_op = self._restore_op(iterator._iterator_resource) + return restore_op, get_next + + def testSaveRestore(self): + num_epochs = 10 + epoch_break = 5 + file_break = self._num_files // 2 + record_break = self._num_records // 2 + + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs) + with self.test_session(graph=g) as sess: + sess.run(init_op) + # Note: There is no checkpoint saved currently so a NotFoundError is + # raised. + with self.assertRaises(errors.NotFoundError): + sess.run(restore_op) + for epoch in range(num_epochs): + for f in range(self._num_files): + for r in range(self._num_records): + if (epoch == epoch_break and f == file_break and + r == record_break): + sess.run(save_op) + break + self.assertEqual(self._record(f, r), sess.run(get_next_op)) + else: + continue + break + else: + continue + break + else: + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs) + with self.test_session(graph=g) as sess: + sess.run(restore_op) + for epoch in range(num_epochs): + for f in range(self._num_files): + for r in range(self._num_records): + if (epoch < epoch_break or + (epoch == epoch_break and f < file_break) or + (epoch == epoch_break and f == file_break and + r < record_break)): + continue + self.assertEqual(self._record(f, r), sess.run(get_next_op)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + def testInitThenRestore(self): + # Note: Calling init_op before restore_op is redundant. This test just makes + # sure we do not fail if restore is called on an already initialized + # iterator resource. + num_epochs = 10 + epoch_break = 5 + file_break = self._num_files // 2 + record_break = self._num_records // 2 + + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs) + with self.test_session(graph=g) as sess: + sess.run(init_op) + # Note: There is no checkpoint saved currently so a NotFoundError is + # raised. + with self.assertRaises(errors.NotFoundError): + sess.run(restore_op) + for epoch in range(num_epochs): + for f in range(self._num_files): + for r in range(self._num_records): + if (epoch == epoch_break and f == file_break and + r == record_break): + sess.run(save_op) + break + self.assertEqual(self._record(f, r), sess.run(get_next_op)) + else: + continue + break + else: + continue + break + else: + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs) + with self.test_session(graph=g) as sess: + sess.run(init_op) + sess.run(restore_op) + for epoch in range(num_epochs): + for f in range(self._num_files): + for r in range(self._num_records): + if (epoch < epoch_break or + (epoch == epoch_break and f < file_break) or + (epoch == epoch_break and f == file_break and + r < record_break)): + continue + self.assertEqual(self._record(f, r), sess.run(get_next_op)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + def testRestoreInModifiedGraph(self): + num_epochs = 10 + num_epochs_1 = 20 + epoch_break = 5 + file_break = self._num_files // 2 + record_break = self._num_records // 2 + + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs) + with self.test_session(graph=g) as sess: + sess.run(init_op) + # Note: There is no checkpoint saved currently so a NotFoundError is + # raised. + with self.assertRaises(errors.NotFoundError): + sess.run(restore_op) + for epoch in range(num_epochs): + for f in range(self._num_files): + for r in range(self._num_records): + if (epoch == epoch_break and f == file_break and + r == record_break): + sess.run(save_op) + break + self.assertEqual(self._record(f, r), sess.run(get_next_op)) + else: + continue + break + else: + continue + break + else: + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs_1) + with self.test_session(graph=g) as sess: + sess.run(restore_op) + for epoch in range(num_epochs): + for f in range(self._num_files): + for r in range(self._num_records): + if (epoch < epoch_break or + (epoch == epoch_break and f < file_break) or + (epoch == epoch_break and f == file_break and + r < record_break)): + continue + self.assertEqual(self._record(f, r), sess.run(get_next_op)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + def testRestoreWithoutBuildingDatasetGraph(self): + num_epochs = 10 + epoch_break = 5 + file_break = self._num_files // 2 + record_break = self._num_records // 2 + + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs) + with self.test_session(graph=g) as sess: + sess.run(init_op) + # Note: There is no checkpoint saved currently so a NotFoundError is + # raised. + with self.assertRaises(errors.NotFoundError): + sess.run(restore_op) + for epoch in range(num_epochs): + for f in range(self._num_files): + for r in range(self._num_records): + if (epoch == epoch_break and f == file_break and + r == record_break): + sess.run(save_op) + break + self.assertEqual(self._record(f, r), sess.run(get_next_op)) + else: + continue + break + else: + continue + break + else: + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + with ops.Graph().as_default() as g: + restore_op, get_next_op = self._restore_iterator() + with self.test_session(graph=g) as sess: + sess.run(restore_op) + for epoch in range(num_epochs): + for f in range(self._num_files): + for r in range(self._num_records): + if (epoch < epoch_break or + (epoch == epoch_break and f < file_break) or + (epoch == epoch_break and f == file_break and + r < record_break)): + continue + self.assertEqual(self._record(f, r), sess.run(get_next_op)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + def testRestoreUnusedIterator(self): + num_epochs = 10 + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs) + with self.test_session(graph=g) as sess: + sess.run(init_op) + # Note: There is no checkpoint saved currently so a NotFoundError is + # raised. + with self.assertRaises(errors.NotFoundError): + sess.run(restore_op) + # Save unused iterator. + sess.run(save_op) + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs) + with self.test_session(graph=g) as sess: + sess.run(restore_op) + for _ in range(num_epochs * self._num_files * self._num_records): + sess.run(get_next_op) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + def testRestoreExhaustedIterator(self): + num_epochs = 10 + + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs) + with self.test_session(graph=g) as sess: + sess.run(init_op) + # Note: There is no checkpoint saved currently so a NotFoundError is + # raised. + with self.assertRaises(errors.NotFoundError): + sess.run(restore_op) + for _ in range(num_epochs): + for f in range(self._num_files): + for r in range(self._num_records): + self.assertEqual(self._record(f, r), sess.run(get_next_op)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + sess.run(save_op) + + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs) + with self.test_session(graph=g) as sess: + sess.run(restore_op) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + class TFRecordDatasetTest(test.TestCase): -- GitLab From 5199923383856f9e3bdee40b6f7f976328b42e09 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 7 Nov 2017 22:41:22 -0800 Subject: [PATCH 0037/1801] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 174962378 --- tensorflow/go/op/wrappers.go | 411 +++++++++++++++++++++++------------ 1 file changed, 277 insertions(+), 134 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 4e5d17f76f..bdfad48567 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -3983,41 +3983,6 @@ func TensorArrayWriteV2(scope *Scope, handle tf.Output, index tf.Output, value t return op.Output(0) } -// Identity op for gradient debugging. -// -// This op is hidden from public in Python. It is used by TensorFlow Debugger to -// register gradient tensors for gradient debugging. -func DebugGradientIdentity(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DebugGradientIdentity", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Deprecated. Use TensorArrayGradV3 -func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"source": source} - opspec := tf.OpSpec{ - Type: "TensorArrayGradV2", - Input: []tf.Input{ - handle, flow_in, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Get the current size of the TensorArray. // // Arguments: @@ -4551,31 +4516,6 @@ func QueueCloseV2(scope *Scope, handle tf.Output, optional ...QueueCloseV2Attr) return scope.AddOperation(opspec) } -// Concatenates tensors along one dimension. -// -// Arguments: -// values: List of `N` Tensors to concatenate. Their ranks and types must match, -// and their sizes must match in all dimensions except `concat_dim`. -// axis: 0-D. The dimension along which to concatenate. Must be in the -// range [-rank(values), rank(values)). -// -// Returns A `Tensor` with the concatenation of values stacked along the -// `concat_dim` dimension. This tensor's shape matches that of `values` except -// in `concat_dim` where it has the sum of the sizes. -func ConcatV2(scope *Scope, values []tf.Output, axis tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ConcatV2", - Input: []tf.Input{ - tf.OutputList(values), axis, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // QueueDequeueUpToV2Attr is an optional argument to QueueDequeueUpToV2. type QueueDequeueUpToV2Attr func(optionalAttr) @@ -4992,80 +4932,6 @@ func PriorityQueueV2(scope *Scope, shapes []tf.Shape, optional ...PriorityQueueV return op.Output(0) } -// FIFOQueueV2Attr is an optional argument to FIFOQueueV2. -type FIFOQueueV2Attr func(optionalAttr) - -// FIFOQueueV2Shapes sets the optional shapes attribute to value. -// -// value: The shape of each component in a value. The length of this attr must -// be either 0 or the same as the length of component_types. If the length of -// this attr is 0, the shapes of queue elements are not constrained, and -// only one element may be dequeued at a time. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func FIFOQueueV2Shapes(value []tf.Shape) FIFOQueueV2Attr { - return func(m optionalAttr) { - m["shapes"] = value - } -} - -// FIFOQueueV2Capacity sets the optional capacity attribute to value. -// -// value: The upper bound on the number of elements in this queue. -// Negative numbers mean no limit. -// If not specified, defaults to -1 -func FIFOQueueV2Capacity(value int64) FIFOQueueV2Attr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// FIFOQueueV2Container sets the optional container attribute to value. -// -// value: If non-empty, this queue is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func FIFOQueueV2Container(value string) FIFOQueueV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// FIFOQueueV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this queue will be shared under the given name -// across multiple sessions. -// If not specified, defaults to "" -func FIFOQueueV2SharedName(value string) FIFOQueueV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// A queue that produces elements in first-in first-out order. -// -// Arguments: -// component_types: The type of each component in a value. -// -// Returns The handle to the queue. -func FIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...FIFOQueueV2Attr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"component_types": component_types} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FIFOQueueV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // StridedSliceAttr is an optional argument to StridedSlice. type StridedSliceAttr func(optionalAttr) @@ -5445,6 +5311,101 @@ func DynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged return op.Output(0) } +// FIFOQueueV2Attr is an optional argument to FIFOQueueV2. +type FIFOQueueV2Attr func(optionalAttr) + +// FIFOQueueV2Shapes sets the optional shapes attribute to value. +// +// value: The shape of each component in a value. The length of this attr must +// be either 0 or the same as the length of component_types. If the length of +// this attr is 0, the shapes of queue elements are not constrained, and +// only one element may be dequeued at a time. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func FIFOQueueV2Shapes(value []tf.Shape) FIFOQueueV2Attr { + return func(m optionalAttr) { + m["shapes"] = value + } +} + +// FIFOQueueV2Capacity sets the optional capacity attribute to value. +// +// value: The upper bound on the number of elements in this queue. +// Negative numbers mean no limit. +// If not specified, defaults to -1 +func FIFOQueueV2Capacity(value int64) FIFOQueueV2Attr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// FIFOQueueV2Container sets the optional container attribute to value. +// +// value: If non-empty, this queue is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func FIFOQueueV2Container(value string) FIFOQueueV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// FIFOQueueV2SharedName sets the optional shared_name attribute to value. +// +// value: If non-empty, this queue will be shared under the given name +// across multiple sessions. +// If not specified, defaults to "" +func FIFOQueueV2SharedName(value string) FIFOQueueV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// A queue that produces elements in first-in first-out order. +// +// Arguments: +// component_types: The type of each component in a value. +// +// Returns The handle to the queue. +func FIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...FIFOQueueV2Attr) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"component_types": component_types} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "FIFOQueueV2", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Converts the given `resource_handle` representing an iterator to a variant tensor. +// +// Arguments: +// resource_handle: A handle to an iterator resource. +// +// Returns A variant tensor storing the state of the iterator contained in the +// resource. +func SerializeIterator(scope *Scope, resource_handle tf.Output) (serialized tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SerializeIterator", + Input: []tf.Input{ + resource_handle, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Return a tensor with the same shape and contents as the input tensor or value. func Identity(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { @@ -5576,6 +5537,39 @@ func IteratorToStringHandle(scope *Scope, resource_handle tf.Output) (string_han return op.Output(0) } +// Outputs the single element from the given dataset. +// +// Arguments: +// dataset: A handle to a dataset that contains a single element. +// +// +// +// Returns The components of the single element of `input`. +func DatasetToSingleElement(scope *Scope, dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "DatasetToSingleElement", + Input: []tf.Input{ + dataset, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if components, idx, err = makeOutputList(op, idx, "components"); err != nil { + scope.UpdateErr("DatasetToSingleElement", err) + return + } + return components +} + // Gets the next output from the given iterator. func IteratorGetNext(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { if scope.Err() != nil { @@ -5696,6 +5690,30 @@ func FixedLengthRecordDataset(scope *Scope, filenames tf.Output, header_bytes tf return op.Output(0) } +// Creates a dataset that executes a SQL query and emits rows of the result set. +// +// Arguments: +// driver_name: The database type. Currently, the only supported type is 'sqlite'. +// data_source_name: A connection string to connect to the database. +// query: A SQL query to execute. +// +// +func SqlDataset(scope *Scope, driver_name tf.Output, data_source_name tf.Output, query tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "SqlDataset", + Input: []tf.Input{ + driver_name, data_source_name, query, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // PlaceholderAttr is an optional argument to Placeholder. type PlaceholderAttr func(optionalAttr) @@ -5766,6 +5784,68 @@ func CacheDataset(scope *Scope, input_dataset tf.Output, filename tf.Output, out return op.Output(0) } +// Identity op for gradient debugging. +// +// This op is hidden from public in Python. It is used by TensorFlow Debugger to +// register gradient tensors for gradient debugging. +func DebugGradientIdentity(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "DebugGradientIdentity", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Deprecated. Use TensorArrayGradV3 +func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"source": source} + opspec := tf.OpSpec{ + Type: "TensorArrayGradV2", + Input: []tf.Input{ + handle, flow_in, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset that yields a SparseTensor for each element of the input. +// +// Arguments: +// input_dataset: A handle to an input dataset. Must have a single component. +// batch_size: A scalar representing the number of elements to accumulate in a +// batch. +// row_shape: A vector representing the dense shape of each row in the produced +// SparseTensor. The shape may be partially specified, using `-1` to indicate +// that a particular dimension should use the maximum size of all batch elements. +// +// +func DenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, row_shape tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "DenseToSparseBatchDataset", + Input: []tf.Input{ + input_dataset, batch_size, row_shape, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Creates a dataset that batches and pads `batch_size` elements from the input. // // Arguments: @@ -5826,6 +5906,69 @@ func TensorArrayConcatV2(scope *Scope, handle tf.Output, flow_in tf.Output, dtyp return op.Output(0), op.Output(1) } +// Converts the given variant tensor to an iterator and stores it in the given resource. +// +// Arguments: +// resource_handle: A handle to an iterator resource. +// serialized: A variant tensor storing the state of the iterator contained in the +// resource. +// +// Returns the created operation. +func DeserializeIterator(scope *Scope, resource_handle tf.Output, serialized tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "DeserializeIterator", + Input: []tf.Input{ + resource_handle, serialized, + }, + } + return scope.AddOperation(opspec) +} + +// Concatenates tensors along one dimension. +// +// Arguments: +// values: List of `N` Tensors to concatenate. Their ranks and types must match, +// and their sizes must match in all dimensions except `concat_dim`. +// axis: 0-D. The dimension along which to concatenate. Must be in the +// range [-rank(values), rank(values)). +// +// Returns A `Tensor` with the concatenation of values stacked along the +// `concat_dim` dimension. This tensor's shape matches that of `values` except +// in `concat_dim` where it has the sum of the sizes. +func ConcatV2(scope *Scope, values []tf.Output, axis tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ConcatV2", + Input: []tf.Input{ + tf.OutputList(values), axis, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset that contains the elements of `input_dataset` ignoring errors. +func IgnoreErrorsDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "IgnoreErrorsDataset", + Input: []tf.Input{ + input_dataset, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Creates a dataset that concatenates `input_dataset` with `another_dataset`. func ConcatenateDataset(scope *Scope, input_dataset tf.Output, another_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { -- GitLab From eb49f78c38ef106f806f7698b374f4b28130025f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 7 Nov 2017 23:19:03 -0800 Subject: [PATCH 0038/1801] Update ops-related pbtxt files. PiperOrigin-RevId: 174964560 --- tensorflow/core/ops/ops.pbtxt | 362 ++++++++++++++++++++++++++++++++++ 1 file changed, 362 insertions(+) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index d35decc182..8353b45e22 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -6058,6 +6058,32 @@ op { summary: "Compute the cumulative sum of the tensor `x` along `axis`." description: "By default, this op performs an inclusive cumsum, which means that the first\nelement of the input is identical to the first element of the output:\n\n```python\ntf.cumsum([a, b, c]) # => [a, a + b, a + b + c]\n```\n\nBy setting the `exclusive` kwarg to `True`, an exclusive cumsum is\nperformed instead:\n\n```python\ntf.cumsum([a, b, c], exclusive=True) # => [0, a, a + b]\n```\n\nBy setting the `reverse` kwarg to `True`, the cumsum is performed in the\nopposite direction:\n\n```python\ntf.cumsum([a, b, c], reverse=True) # => [a + b + c, b + c, c]\n```\n\nThis is more efficient than using separate `tf.reverse` ops.\n\nThe `reverse` and `exclusive` kwargs can also be combined:\n\n```python\ntf.cumsum([a, b, c], exclusive=True, reverse=True) # => [b + c, c, 0]\n```" } +op { + name: "DatasetToSingleElement" + input_arg { + name: "dataset" + description: "A handle to a dataset that contains a single element." + type: DT_VARIANT + } + output_arg { + name: "components" + description: "The components of the single element of `input`." + type_list_attr: "output_types" + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Outputs the single element from the given dataset." +} op { name: "DebugGradientIdentity" input_arg { @@ -6689,6 +6715,41 @@ op { summary: "Applies set operation along last dimension of 2 `Tensor` inputs." description: "See SetOperationOp::SetOperationFromContext for values of `set_operation`.\n\nOutput `result` is a `SparseTensor` represented by `result_indices`,\n`result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this\nhas rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`\ndimension contains the result of `set_operation` applied to the corresponding\n`[0...n-1]` dimension of `set`." } +op { + name: "DenseToSparseBatchDataset" + input_arg { + name: "input_dataset" + description: "A handle to an input dataset. Must have a single component." + type: DT_VARIANT + } + input_arg { + name: "batch_size" + description: "A scalar representing the number of elements to accumulate in a\nbatch." + type: DT_INT64 + } + input_arg { + name: "row_shape" + description: "A vector representing the dense shape of each row in the produced\nSparseTensor. The shape may be partially specified, using `-1` to indicate\nthat a particular dimension should use the maximum size of all batch elements." + type: DT_INT64 + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Creates a dataset that yields a SparseTensor for each element of the input." +} op { name: "DenseToSparseSetOperation" input_arg { @@ -7028,6 +7089,21 @@ op { summary: "Dequantize the \'input\' tensor into a float Tensor." description: "[min_range, max_range] are scalar floats that specify the range for\nthe \'input\' data. The \'mode\' attribute controls exactly which calculations are\nused to convert the float values to their quantized equivalents.\n\nIn \'MIN_COMBINED\' mode, each value of the tensor will undergo the following:\n\n```\nif T == qint8, in[i] += (range(T) + 1)/ 2.0\nout[i] = min_range + (in[i]* (max_range - min_range) / range(T))\n```\nhere `range(T) = numeric_limits::max() - numeric_limits::min()`\n\n*MIN_COMBINED Mode Example*\n\nIf the input comes from a QuantizedRelu6, the output type is\nquint8 (range of 0-255) but the possible range of QuantizedRelu6 is\n0-6. The min_range and max_range values are therefore 0.0 and 6.0.\nDequantize on quint8 will take each value, cast to float, and multiply\nby 6 / 255.\nNote that if quantizedtype is qint8, the operation will additionally add\neach value by 128 prior to casting.\n\nIf the mode is \'MIN_FIRST\', then this approach is used:\n\n```c++\nnum_discrete_values = 1 << (# of bits in T)\nrange_adjust = num_discrete_values / (num_discrete_values - 1)\nrange = (range_max - range_min) * range_adjust\nrange_scale = range / num_discrete_values\nconst double offset_input = static_cast(input) - lowest_quantized;\nresult = range_min + ((input - numeric_limits::min()) * range_scale)\n```\n\n*SCALED mode Example*\n\n`SCALED` mode matches the quantization approach used in\n`QuantizeAndDequantize{V2|V3}`.\n\nIf the mode is `SCALED`, we do not use the full range of the output type,\nchoosing to elide the lowest possible value for symmetry (e.g., output range is\n-127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to\n0.\n\nWe first find the range of values in our tensor. The\nrange we use is always centered on 0, so we find m such that\n```c++\n m = max(abs(input_min), abs(input_max))\n```\n\nOur input tensor range is then `[-m, m]`.\n\nNext, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`.\nIf T is signed, this is\n```\n num_bits = sizeof(T) * 8\n [min_fixed, max_fixed] =\n [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1]\n```\n\nOtherwise, if T is unsigned, the fixed-point range is\n```\n [min_fixed, max_fixed] = [0, (1 << num_bits) - 1]\n```\n\nFrom this we compute our scaling factor, s:\n```c++\n s = (2 * m) / (max_fixed - min_fixed)\n```\n\nNow we can dequantize the elements of our tensor:\n```c++\nresult = input * s\n```" } +op { + name: "DeserializeIterator" + input_arg { + name: "resource_handle" + description: "A handle to an iterator resource." + type: DT_RESOURCE + } + input_arg { + name: "serialized" + description: "A variant tensor storing the state of the iterator contained in the\nresource." + type: DT_VARIANT + } + summary: "Converts the given variant tensor to an iterator and stores it in the given resource." + is_stateful: true +} op { name: "DeserializeManySparse" input_arg { @@ -10142,6 +10218,71 @@ op { summary: "Returns the truth value of (x >= y) element-wise." description: "*NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)" } +op { + name: "GroupByWindowDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "key_func_other_arguments" + type_list_attr: "Tkey_func_other_arguments" + } + input_arg { + name: "reduce_func_other_arguments" + type_list_attr: "Treduce_func_other_arguments" + } + input_arg { + name: "window_size_func_other_arguments" + type_list_attr: "Twindow_size_func_other_arguments" + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "key_func" + type: "func" + description: "A function mapping an element of `input_dataset`, concatenated\nwith `key_func_other_arguments` to a scalar value of type DT_INT64." + } + attr { + name: "reduce_func" + type: "func" + } + attr { + name: "window_size_func" + type: "func" + } + attr { + name: "Tkey_func_other_arguments" + type: "list(type)" + has_minimum: true + } + attr { + name: "Treduce_func_other_arguments" + type: "list(type)" + has_minimum: true + } + attr { + name: "Twindow_size_func_other_arguments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Creates a dataset that computes a windowed group-by on `input_dataset`." + description: "// TODO(mrry): Support non-int64 keys." +} op { name: "HSVToRGB" input_arg { @@ -10602,6 +10743,30 @@ op { summary: "Compute the upper regularized incomplete Gamma function `Q(a, x)`." description: "The upper regularized incomplete Gamma function is defined as:\n\n\\\\(Q(a, x) = Gamma(a, x) / Gamma(a) = 1 - P(a, x)\\\\)\n\nwhere\n\n\\\\(Gamma(a, x) = int_{x}^{\\infty} t^{a-1} exp(-t) dt\\\\)\n\nis the upper incomplete Gama function.\n\nNote, above `P(a, x)` (`Igamma`) is the lower regularized complete\nGamma function." } +op { + name: "IgnoreErrorsDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Creates a dataset that contains the elements of `input_dataset` ignoring errors." +} op { name: "Imag" input_arg { @@ -12373,6 +12538,54 @@ op { description: "This operation may be executed multiple times. Each execution will reset the\niterator in `iterator` to the first element of `dataset`." is_stateful: true } +op { + name: "MapAndBatchDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + input_arg { + name: "batch_size" + description: "A scalar representing the number of elements to accumulate in a\nbatch. It determines the number of concurrent invocations of `f` that process\nelements from `input_dataset` in parallel." + type: DT_INT64 + } + input_arg { + name: "num_parallel_batches" + description: "A scalar representing the number of batches to create in\nparallel. Processing multiple batches in parallel benefits workloads prone to\nstragglers." + type: DT_INT64 + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Creates a dataset that applies `f` to the outputs of `input_dataset` and then" + description: "batches `batch_size` of them.\n\nUnlike a \"MapDataset\", which applies `f` sequentially, this dataset invokes up\nto `batch_size * num_parallel_batches` copies of `f` in parallel." +} op { name: "MapClear" attr { @@ -16043,6 +16256,57 @@ op { summary: "Interleave the values from the `data` tensors into a single tensor." description: "Builds a merged tensor such that\n\n```python\n merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]\n```\n\nFor example, if each `indices[m]` is scalar or vector, we have\n\n```python\n # Scalar indices:\n merged[indices[m], ...] = data[m][...]\n\n # Vector indices:\n merged[indices[m][i], ...] = data[m][i, ...]\n```\n\nEach `data[i].shape` must start with the corresponding `indices[i].shape`,\nand the rest of `data[i].shape` must be constant w.r.t. `i`. That is, we\nmust have `data[i].shape = indices[i].shape + constant`. In terms of this\n`constant`, the output shape is\n\n merged.shape = [max(indices)] + constant\n\nValues may be merged in parallel, so if an index appears in both `indices[m][i]`\nand `indices[n][j]`, the result may be invalid. This differs from the normal\nDynamicStitch operator that defines the behavior in that case.\n\nFor example:\n\n```python\n indices[0] = 6\n indices[1] = [4, 1]\n indices[2] = [[5, 2], [0, 3]]\n data[0] = [61, 62]\n data[1] = [[41, 42], [11, 12]]\n data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]\n merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],\n [51, 52], [61, 62]]\n```\n\nThis method can be used to merge partitions created by `dynamic_partition`\nas illustrated on the following example:\n\n```python\n # Apply function (increments x_i) on elements for which a certain condition\n # apply (x_i != -1 in this example).\n x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])\n condition_mask=tf.not_equal(x,tf.constant(-1.))\n partitioned_data = tf.dynamic_partition(\n x, tf.cast(condition_mask, tf.int32) , 2)\n partitioned_data[1] = partitioned_data[1] + 1.0\n condition_indices = tf.dynamic_partition(\n tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)\n x = tf.dynamic_stitch(condition_indices, partitioned_data)\n # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain\n # unchanged.\n```\n\n
\n\n
" } +op { + name: "ParallelInterleaveDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + input_arg { + name: "cycle_length" + type: DT_INT64 + } + input_arg { + name: "block_length" + type: DT_INT64 + } + input_arg { + name: "sloppy" + type: DT_BOOL + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + description: "A function mapping elements of `input_dataset`, concatenated with\n`other_arguments`, to a Dataset variant that contains elements matching\n`output_types` and `output_shapes`." + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Creates a dataset that applies `f` to the outputs of `input_dataset`." + description: "The resulting dataset is similar to the `InterleaveDataset`, with the exception\nthat if retrieving the next value from a dataset would cause the requester to\nblock, it will skip that input dataset. This dataset is especially useful\nwhen loading data from a variable-latency datastores (e.g. HDFS, GCS), as it\nallows the training step to proceed so long as some data is available.\n\n!! WARNING !! This dataset is not deterministic!" +} op { name: "ParallelMapDataset" input_arg { @@ -23850,6 +24114,53 @@ op { summary: "Outputs a `Summary` protocol buffer with scalar values." description: "The input `tags` and `values` must have the same shape. The generated summary\nhas a summary value for each tag-value pair in `tags` and `values`." } +op { + name: "ScanDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "initial_state" + type_list_attr: "Tstate" + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + } + attr { + name: "Tstate" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Creates a dataset successively reduces `f` over the elements of `input_dataset`." +} op { name: "ScatterAdd" input_arg { @@ -25044,6 +25355,21 @@ op { } summary: "Computes gradients for the scaled exponential linear (Selu) operation." } +op { + name: "SerializeIterator" + input_arg { + name: "resource_handle" + description: "A handle to an iterator resource." + type: DT_RESOURCE + } + output_arg { + name: "serialized" + description: "A variant tensor storing the state of the iterator contained in the\nresource." + type: DT_VARIANT + } + summary: "Converts the given `resource_handle` representing an iterator to a variant tensor." + is_stateful: true +} op { name: "SerializeManySparse" input_arg { @@ -28954,6 +29280,42 @@ op { } summary: "Splits a tensor into `num_split` tensors along one dimension." } +op { + name: "SqlDataset" + input_arg { + name: "driver_name" + description: "The database type. Currently, the only supported type is \'sqlite\'." + type: DT_STRING + } + input_arg { + name: "data_source_name" + description: "A connection string to connect to the database." + type: DT_STRING + } + input_arg { + name: "query" + description: "A SQL query to execute." + type: DT_STRING + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Creates a dataset that executes a SQL query and emits rows of the result set." + is_stateful: true +} op { name: "Sqrt" input_arg { -- GitLab From 56e0d5e0d8dab578f1c9ef723772ac79e9fc9583 Mon Sep 17 00:00:00 2001 From: Jay Young Date: Wed, 8 Nov 2017 16:22:59 +0800 Subject: [PATCH 0039/1801] [FIX]the estimator generate by tf.keras.model_to_estimator() cannot export saved_model because the model_fn provided by _create_keras_model_fn wasn't set export_outputs in the returned EstimatorSpec. Here I provide a default export_outputs with serve_default key and Predict API, and the result inside is same as predictions [FIX]_save_first_checkpoint call saver.save with only a path and without filename, that make the ckpt saved with name like `{model_dir}/.meta` and `{model_dir}/.index`, which can not be found by latest_checkpoint("{model_dir}"). As state by save method of Saver, save_path should be a path to the checkpoint name. So to fix this, I change the name to `{model_dir}/keras_model.ckpt` --- tensorflow/python/keras/_impl/keras/estimator.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/estimator.py b/tensorflow/python/keras/_impl/keras/estimator.py index 125e63e1b8..a2a2fe0ead 100644 --- a/tensorflow/python/keras/_impl/keras/estimator.py +++ b/tensorflow/python/keras/_impl/keras/estimator.py @@ -19,10 +19,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os from tensorflow.python.client import session from tensorflow.python.estimator import estimator as estimator_lib from tensorflow.python.estimator import model_fn as model_fn_lib +from tensorflow.python.estimator import export as export_lib from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib @@ -33,6 +35,9 @@ from tensorflow.python.ops import metrics as metrics_module from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import saver as saver_lib from tensorflow.python.training import training_util +from tensorflow.python.saved_model import signature_constants + +_DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY def _create_ordered_io(keras_model, estimator_io_dict, is_input=True): @@ -184,7 +189,10 @@ def _create_keras_model_fn(keras_model, custom_objects=None): predictions=predictions, loss=loss, train_op=train_op, - eval_metric_ops=eval_metric_ops) + eval_metric_ops=eval_metric_ops, + export_outputs={ + _DEFAULT_SERVING_KEY: export_lib.export_output.PredictOutput(predictions) + }) return model_fn @@ -222,7 +230,7 @@ def _save_first_checkpoint(keras_model, estimator, custom_objects, K._initialize_variables(sess) # pylint: enable=protected-access saver = saver_lib.Saver() - saver.save(sess, estimator.model_dir + '/') + saver.save(sess, os.path.join(estimator.model_dir, 'keras_model.ckpt')) def model_to_estimator(keras_model=None, -- GitLab From f901742e656c9959e7d8a82d5713d24d96122058 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 02:25:01 -0800 Subject: [PATCH 0040/1801] Also register string types if __ANDROID_TYPES_FULL__ is defined PiperOrigin-RevId: 174979678 --- tensorflow/contrib/makefile/tf_op_files.txt | 18 +++++++++++++++ tensorflow/core/framework/register_types.h | 5 +++-- tensorflow/core/kernels/BUILD | 25 +++++++++++++++++++++ tensorflow/core/kernels/concat_lib_cpu.cc | 9 +++++--- 4 files changed, 52 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt index 8b77c99cb5..5f06106c1d 100644 --- a/tensorflow/contrib/makefile/tf_op_files.txt +++ b/tensorflow/contrib/makefile/tf_op_files.txt @@ -8,6 +8,7 @@ tensorflow/core/kernels/xent_op.cc tensorflow/core/kernels/where_op.cc tensorflow/core/kernels/variable_ops.cc tensorflow/core/kernels/unpack_op.cc +tensorflow/core/kernels/unique_op.cc tensorflow/core/kernels/transpose_op.cc tensorflow/core/kernels/transpose_functor_cpu.cc tensorflow/core/kernels/training_op_helpers.cc @@ -41,6 +42,9 @@ tensorflow/core/kernels/spectrogram_op.cc tensorflow/core/kernels/spectrogram.cc tensorflow/core/kernels/sparse_to_dense_op.cc tensorflow/core/kernels/sparse_matmul_op.cc +tensorflow/core/kernels/sparse_fill_empty_rows_op.cc +tensorflow/core/kernels/sparse_reshape_op.c +tensorflow/core/kernels/segment_reduction_ops.cc tensorflow/core/kernels/softsign_op.cc tensorflow/core/kernels/softplus_op.cc tensorflow/core/kernels/softmax_op.cc @@ -109,6 +113,10 @@ tensorflow/core/kernels/maxpooling_op.cc tensorflow/core/kernels/matmul_op.cc tensorflow/core/kernels/lrn_op.cc tensorflow/core/kernels/logging_ops.cc +tensorflow/core/kernels/initializable_lookup_table.c +tensorflow/core/kernels/lookup_table_init_op.cc +tensorflow/core/kernels/lookup_table_op.cc +tensorflow/core/kernels/lookup_util.cc tensorflow/core/kernels/inplace_ops.cc tensorflow/core/kernels/in_topk_op.cc tensorflow/core/kernels/immutable_constant_op.cc @@ -116,10 +124,18 @@ tensorflow/core/kernels/identity_op.cc tensorflow/core/kernels/identity_n_op.cc tensorflow/core/kernels/gather_op.cc tensorflow/core/kernels/gather_functor.cc +tensorflow/core/kernels/gather_nd_op.cc +tensorflow/core/kernels/gather_nd_op_cpu_impl_0.cc +tensorflow/core/kernels/gather_nd_op_cpu_impl_1.cc +tensorflow/core/kernels/gather_nd_op_cpu_impl_2.cc +tensorflow/core/kernels/gather_nd_op_cpu_impl_3.cc +tensorflow/core/kernels/gather_nd_op_cpu_impl_4.cc +tensorflow/core/kernels/gather_nd_op_cpu_impl_5.cc tensorflow/core/kernels/fused_batch_norm_op.cc tensorflow/core/kernels/function_ops.cc tensorflow/core/kernels/fill_functor.cc tensorflow/core/kernels/fifo_queue.cc +tensorflow/core/kernels/fifo_queue_op.cc tensorflow/core/kernels/fake_quant_ops.cc tensorflow/core/kernels/example_parsing_ops.cc tensorflow/core/kernels/encode_wav_op.cc @@ -166,6 +182,8 @@ tensorflow/core/kernels/cwise_op_floor.cc tensorflow/core/kernels/cwise_op_exp.cc tensorflow/core/kernels/cwise_op_equal_to_2.cc tensorflow/core/kernels/cwise_op_equal_to_1.cc +tensorflow/core/kernels/cwise_op_not_equal_to_2.cc +tensorflow/core/kernels/cwise_op_not_equal_to_1.cc tensorflow/core/kernels/cwise_op_div.cc tensorflow/core/kernels/cwise_op_bitwise_xor.cc tensorflow/core/kernels/cwise_op_bitwise_or.cc diff --git a/tensorflow/core/framework/register_types.h b/tensorflow/core/framework/register_types.h index c31ab18cc1..4bb37e4f6e 100644 --- a/tensorflow/core/framework/register_types.h +++ b/tensorflow/core/framework/register_types.h @@ -87,7 +87,8 @@ limitations under the License. #elif defined(__ANDROID_TYPES_FULL__) -// Only half, float, int32, int64, bool, and quantized types are supported. +// Only string, half, float, int32, int64, bool, and quantized types +// supported. #define TF_CALL_float(m) m(float) #define TF_CALL_double(m) #define TF_CALL_int32(m) m(::tensorflow::int32) @@ -96,7 +97,7 @@ limitations under the License. #define TF_CALL_int16(m) #define TF_CALL_int8(m) -#define TF_CALL_string(m) +#define TF_CALL_string(m) m(string) #define TF_CALL_resource(m) #define TF_CALL_variant(m) #define TF_CALL_complex64(m) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 34cd51ba66..6206963251 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -4420,6 +4420,15 @@ filegroup( "fill_functor.h", "function_ops.cc", "gather_functor.h", + "gather_nd_op.cc", + "gather_nd_op.h", + "gather_nd_op_cpu_impl.h", + "gather_nd_op_cpu_impl_0.cc", + "gather_nd_op_cpu_impl_1.cc", + "gather_nd_op_cpu_impl_2.cc", + "gather_nd_op_cpu_impl_3.cc", + "gather_nd_op_cpu_impl_4.cc", + "gather_nd_op_cpu_impl_5.cc", "gather_op.cc", "identity_n_op.cc", "identity_n_op.h", @@ -4513,6 +4522,10 @@ filegroup( "fused_batch_norm_op.h", "gemm_functors.h", "image_resizer_state.h", + "initializable_lookup_table.h", + "lookup_table_init_op.h", + "lookup_table_op.h", + "lookup_util.h", "maxpooling_op.h", "mfcc.h", "mfcc_dct.h", @@ -4529,6 +4542,7 @@ filegroup( "resize_nearest_neighbor_op.h", "reverse_op.h", "save_restore_tensor.h", + "segment_reduction_ops.h", "softplus_op.h", "softsign_op.h", "spacetobatch_functor.h", @@ -4578,6 +4592,8 @@ filegroup( "cwise_op_div.cc", "cwise_op_equal_to_1.cc", "cwise_op_equal_to_2.cc", + "cwise_op_not_equal_to_1.cc", + "cwise_op_not_equal_to_2.cc", "cwise_op_exp.cc", "cwise_op_floor.cc", "cwise_op_floor_div.cc", @@ -4619,6 +4635,7 @@ filegroup( "encode_wav_op.cc", "fake_quant_ops.cc", "fifo_queue.cc", + "fifo_queue_op.cc", "fused_batch_norm_op.cc", "population_count_op.cc", "population_count_op.h", @@ -4642,7 +4659,11 @@ filegroup( "depthtospace_op.cc", "dynamic_stitch_op.cc", "in_topk_op.cc", + "initializable_lookup_table.cc", "logging_ops.cc", + "lookup_table_init_op.cc", + "lookup_table_op.cc", + "lookup_util.cc", "lrn_op.cc", "maxpooling_op.cc", "mfcc.cc", @@ -4677,12 +4698,15 @@ filegroup( "save_op.cc", "save_restore_tensor.cc", "save_restore_v2_ops.cc", + "segment_reduction_ops.cc", "session_ops.cc", "softplus_op.cc", "softsign_op.cc", "spacetobatch_functor.cc", "spacetobatch_op.cc", "spacetodepth_op.cc", + "sparse_fill_empty_rows_op.cc", + "sparse_reshape_op.cc", "sparse_to_dense_op.cc", "spectrogram.cc", "spectrogram_op.cc", @@ -4705,6 +4729,7 @@ filegroup( "training_ops.cc", "transpose_functor_cpu.cc", "transpose_op.cc", + "unique_op.cc", "warn_about_ints.cc", "where_op.cc", "xent_op.cc", diff --git a/tensorflow/core/kernels/concat_lib_cpu.cc b/tensorflow/core/kernels/concat_lib_cpu.cc index 258ce15456..b0bec0c5dc 100644 --- a/tensorflow/core/kernels/concat_lib_cpu.cc +++ b/tensorflow/core/kernels/concat_lib_cpu.cc @@ -74,11 +74,14 @@ REGISTER(qint16) REGISTER(qint32) REGISTER(bfloat16) -#if defined(IS_MOBILE_PLATFORM) && !defined(SUPPORT_SELECTIVE_REGISTRATION) -// Primarily used for SavedModel support on mobile. +#if defined(IS_MOBILE_PLATFORM) && !defined(SUPPORT_SELECTIVE_REGISTRATION) && \ + !defined(__ANDROID_TYPES_FULL__) +// Primarily used for SavedModel support on mobile. Registering it here only if +// __ANDROID_TYPES_FULL__ is not defined, as that already register strings REGISTER(string); #endif // defined(IS_MOBILE_PLATFORM) && - // !defined(SUPPORT_SELECTIVE_REGISTRATION) + // !defined(SUPPORT_SELECTIVE_REGISTRATION) && + // !defined(__ANDROID_TYPES_FULL__) #ifdef TENSORFLOW_USE_SYCL template -- GitLab From af9c4ea6be5589cad66b8cb1159a58d7ec19ca7e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 03:09:46 -0800 Subject: [PATCH 0041/1801] Check GPU availability after creating test session. PiperOrigin-RevId: 174983466 --- tensorflow/contrib/nccl/python/ops/nccl_ops_test.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py index 0b13e3595e..bad0abd44c 100644 --- a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py +++ b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py @@ -72,14 +72,15 @@ class NcclTestCase(test.TestCase): two. device_sets: Tuple of virtual devices to run test on. """ - if not test.is_gpu_available(): - return # Test requires access to a GPU - for dtype in [np.float32, np.int32, np.int64, np.float64]: # Create session inside outer loop to test use of # same communicator across multiple sessions. with self.test_session(use_gpu=True) as sess: + # Check GPU availability *after* creating test session, see b/68975239. + if not test.is_gpu_available(): + return # Test requires access to a GPU + for devices in device_sets: shape = (3, 4) random = (np.random.random_sample(shape) - .5) * 1024 -- GitLab From 59ea341a1fd0a4badc6c3cfec7a578195a3bf623 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Wed, 8 Nov 2017 06:48:29 -0800 Subject: [PATCH 0042/1801] tfdbg: Add test for loading DebugDumpDir with a relative path PiperOrigin-RevId: 174999937 --- .../python/debug/wrappers/dumping_wrapper_test.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tensorflow/python/debug/wrappers/dumping_wrapper_test.py b/tensorflow/python/debug/wrappers/dumping_wrapper_test.py index d987ba84b5..eda5ecf508 100644 --- a/tensorflow/python/debug/wrappers/dumping_wrapper_test.py +++ b/tensorflow/python/debug/wrappers/dumping_wrapper_test.py @@ -111,6 +111,20 @@ class DumpingDebugWrapperSessionTest(test_util.TensorFlowTestCase): self.assertEqual(repr(self.inc_v), dump.run_fetches_info) self.assertEqual(repr(None), dump.run_feed_keys_info) + def testDumpingOnASingleRunWorksWithRelativePathForDebugDumpDir(self): + sess = dumping_wrapper.DumpingDebugWrapperSession( + self.sess, session_root=self.session_root, log_usage=False) + sess.run(self.inc_v) + dump_dirs = glob.glob(os.path.join(self.session_root, "run_*")) + cwd = os.getcwd() + try: + os.chdir(self.session_root) + dump = debug_data.DebugDumpDir( + os.path.relpath(dump_dirs[0], self.session_root)) + self.assertAllClose([10.0], dump.get_tensors("v", 0, "DebugIdentity")) + finally: + os.chdir(cwd) + def testDumpingOnASingleRunWithFeedDictWorks(self): sess = dumping_wrapper.DumpingDebugWrapperSession( self.sess, session_root=self.session_root, log_usage=False) -- GitLab From ac0ba5bd041f3287bb2a4f12c2ef43a3264f6073 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Wed, 8 Nov 2017 07:36:56 -0800 Subject: [PATCH 0043/1801] tfdbg: Fix a test bug hidden in a child thread PiperOrigin-RevId: 175004323 --- tensorflow/python/debug/wrappers/dumping_wrapper_test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/debug/wrappers/dumping_wrapper_test.py b/tensorflow/python/debug/wrappers/dumping_wrapper_test.py index eda5ecf508..acea9433e2 100644 --- a/tensorflow/python/debug/wrappers/dumping_wrapper_test.py +++ b/tensorflow/python/debug/wrappers/dumping_wrapper_test.py @@ -364,12 +364,14 @@ class DumpingDebugWrapperSessionTest(test_util.TensorFlowTestCase): thread_name_filter=r"MainThread$") self.assertAllClose(1.0, sess.run(self.delta)) + child_thread_result = [] def child_thread_job(): - sess.run(sess.run(self.eta)) + child_thread_result.append(sess.run(self.eta)) thread = threading.Thread(name="ChildThread", target=child_thread_job) thread.start() thread.join() + self.assertAllClose([-1.4], child_thread_result) dump_dirs = glob.glob(os.path.join(self.session_root, "run_*")) self.assertEqual(1, len(dump_dirs)) -- GitLab From 500e0aa5eb3fb0ed08b717fc34b8a8f2a2bd0907 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Wed, 8 Nov 2017 10:09:23 -0800 Subject: [PATCH 0044/1801] Fix incomplete spec of EagerTensor.numpy() PiperOrigin-RevId: 175023039 --- tensorflow/python/framework/ops.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 6469aca3ec..b256af2182 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -617,15 +617,16 @@ class _EagerTensorBase(Tensor): return dtypes._INTERN_TABLE[self._datatype_enum()] # pylint: disable=protected-access def numpy(self): - """Returns a numpy array with the same contents as the Tensor. + """Returns a numpy array or a scalar with the same contents as the Tensor. TODO(ashankar,agarwal): Perhaps this should NOT reference the underlying buffer but instead always explicitly copy? Note that currently it may or may not copy based on whether the numpy data is properly aligned or not. Returns: - A numpy array that may share memory with the Tensor object. Any changes - to one may be reflected in the other. + A numpy array or a scalar. Numpy array may share memory with the + Tensor object. Any changes to one may be reflected in the other. A scalar + value is returned when self has rank 0. Raises: ValueError: if the type of this Tensor is not representable in numpy. -- GitLab From 0211cb2f83b620ff899b6876e6e11ac08bc853b2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 10:19:01 -0800 Subject: [PATCH 0045/1801] Do not return a mutable HloComputation* from a entry_computation() on a const HloModule*. PiperOrigin-RevId: 175024608 --- tensorflow/compiler/xla/service/buffer_assignment.cc | 10 +++++----- tensorflow/compiler/xla/service/hlo_module.h | 6 +++++- .../compiler/xla/service/interpreter/executable.cc | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index b422b22df9..c74f050f77 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -497,19 +497,19 @@ Status GatherComputationsByAllocationType( std::vector* global_computations) { // Create a worklist of computations paired with whether the allocation must // be thread-local. - std::deque> worklist; + std::deque> worklist; worklist.push_back(std::make_pair(module->entry_computation(), /*is_thread_local*/ false)); // Sets for quickly checking membership. Computations are returned in vectors // for stable iteration. - FlatSet thread_local_set; - FlatSet global_set; + FlatSet thread_local_set; + FlatSet global_set; while (!worklist.empty()) { auto worklist_front = worklist.front(); worklist.pop_front(); - HloComputation* computation = worklist_front.first; + const HloComputation* computation = worklist_front.first; bool is_thread_local = worklist_front.second; bool in_thread_local_set = thread_local_set.count(computation) > 0; bool in_global_set = global_set.count(computation) > 0; @@ -653,7 +653,7 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation, } if (allow_input_output_aliasing_ && allocation->maybe_live_out()) { - HloComputation* entry_computation = + const HloComputation* entry_computation = assignment->module_->entry_computation(); for (auto param : entry_computation->parameter_instructions()) { for (auto& param_buffer : diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h index 6469851791..5141e7bc8d 100644 --- a/tensorflow/compiler/xla/service/hlo_module.h +++ b/tensorflow/compiler/xla/service/hlo_module.h @@ -85,7 +85,11 @@ class HloModule { std::unique_ptr Clone(const string& suffix = "clone") const; // Return a pointer to the entry computation of the module.. - HloComputation* entry_computation() const { + const HloComputation* entry_computation() const { + CHECK_NE(nullptr, entry_computation_); + return entry_computation_; + } + HloComputation* entry_computation() { CHECK_NE(nullptr, entry_computation_); return entry_computation_; } diff --git a/tensorflow/compiler/xla/service/interpreter/executable.cc b/tensorflow/compiler/xla/service/interpreter/executable.cc index 86dee8462f..96f937caf9 100644 --- a/tensorflow/compiler/xla/service/interpreter/executable.cc +++ b/tensorflow/compiler/xla/service/interpreter/executable.cc @@ -89,7 +89,7 @@ StatusOr InterpreterExecutable::ExecuteOnStream( uint64 start_micros = tensorflow::Env::Default()->NowMicros(); - HloComputation* computation = module().entry_computation(); + const HloComputation* computation = module().entry_computation(); if (computation->num_parameters() != arguments.size()) { return tensorflow::errors::Internal( "Mismatch between argument count and graph parameter count."); -- GitLab From d97dcf04a889e5f75c5651ca994f499655564416 Mon Sep 17 00:00:00 2001 From: jfaath Date: Wed, 8 Nov 2017 11:38:59 -0700 Subject: [PATCH 0046/1801] fix indenting on cache check --- tensorflow/python/layers/base.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index db608aa79a..220450214c 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -2087,17 +2087,17 @@ class Network(Layer): # Store in cache. self._output_shape_cache[cache_key] = output_shapes - else: - # Cache hit. - output_shapes = self._output_shape_cache[cache_key] + else: + # Cache hit. + output_shapes = self._output_shape_cache[cache_key] - if isinstance(output_shapes, list): - if len(output_shapes) == 1: - return tensor_shape.TensorShape(output_shapes[0]) - else: - return [tensor_shape.TensorShape(shape) for shape in output_shapes] + if isinstance(output_shapes, list): + if len(output_shapes) == 1: + return tensor_shape.TensorShape(output_shapes[0]) else: - return tensor_shape.TensorShape(output_shapes) + return [tensor_shape.TensorShape(shape) for shape in output_shapes] + else: + return tensor_shape.TensorShape(output_shapes) def _run_internal_graph(self, inputs, masks=None): """Computes output tensors for new inputs. -- GitLab From a2853e3011bfae8a75fc04ed1ef2f4ff0fd7cf59 Mon Sep 17 00:00:00 2001 From: Thomas Schumm Date: Wed, 8 Nov 2017 10:43:48 -0800 Subject: [PATCH 0047/1801] HParams.set_hparam doesn't fully check types, contrary to its docstring. PiperOrigin-RevId: 175028981 --- .../training/python/training/hparam.py | 58 +++++++++++++++++-- .../training/python/training/hparam_test.py | 31 +++++++++- 2 files changed, 84 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/training/python/training/hparam.py b/tensorflow/contrib/training/python/training/hparam.py index 391899b34f..7db625cdd5 100644 --- a/tensorflow/contrib/training/python/training/hparam.py +++ b/tensorflow/contrib/training/python/training/hparam.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function import json +import numbers import re import six @@ -76,7 +77,7 @@ def _process_scalar_value(name, parse_fn, var_type, m_dict, values, function. Raises: - ValueError: If the name has already been sued. + ValueError: If the name has already been used. """ try: parsed_value = parse_fn(m_dict['val']) @@ -138,6 +139,54 @@ def _process_list_value(name, parse_fn, var_type, m_dict, values, _parse_fail(name, var_type, m_dict['vals'], values) +def _cast_to_type_if_compatible(name, param_type, value): + """Cast hparam to the provided type, if compatible. + + Args: + name: Name of the hparam to be cast. + param_type: The type of the hparam. + value: The value to be cast, if compatible. + + Returns: + The result of casting `value` to `param_type`. + + Raises: + ValueError: If the type of `value` is not compatible with param_type. + * If `param_type` is a string type, but `value` is not. + * If `param_type` is a boolean, but `value` is not, or vice versa. + * If `param_type` is an integer type, but `value` is not. + * If `param_type` is a float type, but `value` is not a numeric type. + """ + fail_msg = ( + "Could not cast hparam '%s' of type '%s' from value %r" % + (name, param_type, value)) + + # Some callers use None, for which we can't do any casting/checking. :( + if issubclass(param_type, type(None)): + return value + + # Avoid converting a non-string type to a string. + if (issubclass(param_type, (six.string_types, six.binary_type)) and + not isinstance(value, (six.string_types, six.binary_type))): + raise ValueError(fail_msg) + + # Avoid converting a number or string type to a boolean or vice versa. + if issubclass(param_type, bool) != isinstance(value, bool): + raise ValueError(fail_msg) + + # Avoid converting float to an integer (the reverse is fine). + if (issubclass(param_type, numbers.Integral) and + not isinstance(value, numbers.Integral)): + raise ValueError(fail_msg) + + # Avoid converting a non-numeric type to a numeric type. + if (issubclass(param_type, numbers.Number) and + not isinstance(value, numbers.Number)): + raise ValueError(fail_msg) + + return param_type(value) + + def parse_values(values, type_map): """Parses hyperparameter values from a string into a python map. @@ -438,17 +487,18 @@ class HParams(object): Raises: ValueError: If there is a type mismatch. """ - _, is_list = self._hparam_types[name] + param_type, is_list = self._hparam_types[name] if isinstance(value, list): if not is_list: raise ValueError( 'Must not pass a list for single-valued parameter: %s' % name) - setattr(self, name, value) + setattr(self, name, [ + _cast_to_type_if_compatible(name, param_type, v) for v in value]) else: if is_list: raise ValueError( 'Must pass a list for multi-valued parameter: %s.' % name) - setattr(self, name, value) + setattr(self, name, _cast_to_type_if_compatible(name, param_type, value)) def parse(self, values): """Override hyperparameter values, parsing new values from a string. diff --git a/tensorflow/contrib/training/python/training/hparam_test.py b/tensorflow/contrib/training/python/training/hparam_test.py index f54514cefd..949c262f5b 100644 --- a/tensorflow/contrib/training/python/training/hparam_test.py +++ b/tensorflow/contrib/training/python/training/hparam_test.py @@ -318,13 +318,42 @@ class HParamsTest(test.TestCase): self.assertEqual(3.0, hparams.b) self.assertEqual('relu4', hparams.c_c) - def testSetHParamTypeMismatch(self): + def testSetHParamListNonListMismatch(self): hparams = hparam.HParams(a=1, b=[2.0, 3.0]) with self.assertRaisesRegexp(ValueError, r'Must not pass a list'): hparams.set_hparam('a', [1.0]) with self.assertRaisesRegexp(ValueError, r'Must pass a list'): hparams.set_hparam('b', 1.0) + def testSetHParamTypeMismatch(self): + hparams = hparam.HParams( + int_=1, str_='str', bool_=True, float_=1.1, list_int=[1, 2], none=None) + + with self.assertRaises(ValueError): + hparams.set_hparam('str_', 2.2) + + with self.assertRaises(ValueError): + hparams.set_hparam('int_', False) + + with self.assertRaises(ValueError): + hparams.set_hparam('bool_', 1) + + with self.assertRaises(ValueError): + hparams.set_hparam('int_', 2.2) + + with self.assertRaises(ValueError): + hparams.set_hparam('list_int', [2, 3.3]) + + with self.assertRaises(ValueError): + hparams.set_hparam('int_', '2') + + # Casting int to float is OK + hparams.set_hparam('float_', 1) + + # Getting stuck with NoneType :( + hparams.set_hparam('none', '1') + self.assertEqual('1', hparams.none) + def testNonProtoFails(self): with self.assertRaisesRegexp(AssertionError, ''): hparam.HParams(hparam_def=1) -- GitLab From 2884da93d7da90f7532643a8e3f1fa0f2a1d6bbe Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 8 Nov 2017 10:52:49 -0800 Subject: [PATCH 0048/1801] [XLA] Print constant literals of size <= 8 elements. Previously we'd only print scalars. But if you have a constant with just a few values, what the heck, show the whole thing. PiperOrigin-RevId: 175030210 --- .../compiler/xla/service/hlo_graph_dumper.cc | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index fd162622ce..1c063c973d 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -761,12 +761,22 @@ string HloDotDumper::DumpInstruction(const HloInstruction* instr) { string HloDotDumper::GetInstructionNodeInlinedOperands( const HloInstruction* instr) { auto stringify_constant = [](const HloInstruction* constant) { - if (ShapeUtil::IsEffectiveScalar(constant->shape())) { - auto elem_idx = IndexUtil::LinearIndexToMultidimensionalIndex( - constant->shape(), /*linear_index=*/0); - return Printf("%s (%s)", constant->literal().GetAsString(elem_idx), + const auto& shape = constant->shape(); + + // Print the literal value of constants with <= K elements. + optional elem_count; + if (!ShapeUtil::IsOpaque(shape) && !ShapeUtil::IsTuple(shape)) { + elem_count = 1; + for (int64 dim : shape.dimensions()) { + *elem_count *= dim; + } + } + if (elem_count.has_value() && *elem_count <= 8) { + return Printf("%s (%s)", constant->literal().ToString(), ShapeUtil::HumanString(constant->shape())); } + + // Otherwise, print e.g. "%constant.42 (s32[100])". string constant_name; if (tensorflow::StringPiece(constant->name()).starts_with("%constant")) { constant_name = constant->name(); -- GitLab From 729c8c1bb36656c4528d7ff306fbbbd7856733ea Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Wed, 8 Nov 2017 10:55:48 -0800 Subject: [PATCH 0049/1801] Add database writer ops to contrib/summary PiperOrigin-RevId: 175030602 --- .../contrib/cmake/tf_core_framework.cmake | 3 + tensorflow/contrib/summary/BUILD | 6 + tensorflow/contrib/summary/summary.py | 2 + tensorflow/contrib/summary/summary_ops.py | 125 ++++++++++++++++-- .../contrib/summary/summary_ops_test.py | 110 +++++++++++++++ tensorflow/contrib/tensorboard/db/BUILD | 2 + .../tensorboard/db/summary_db_writer.cc | 34 ++++- .../tensorboard/db/summary_db_writer_test.cc | 56 +++++++- tensorflow/core/kernels/BUILD | 3 + tensorflow/core/kernels/summary_interface.cc | 4 +- tensorflow/core/kernels/summary_kernels.cc | 50 +++++++ tensorflow/core/ops/summary_ops.cc | 41 ++++++ 12 files changed, 419 insertions(+), 17 deletions(-) diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake index c3dc8531bb..c607546f4a 100644 --- a/tensorflow/contrib/cmake/tf_core_framework.cmake +++ b/tensorflow/contrib/cmake/tf_core_framework.cmake @@ -301,6 +301,8 @@ file(GLOB_RECURSE tf_core_framework_srcs "${tensorflow_source_dir}/tensorflow/core/common_runtime/session.cc" "${tensorflow_source_dir}/tensorflow/core/common_runtime/session_factory.cc" "${tensorflow_source_dir}/tensorflow/core/common_runtime/session_options.cc" + "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/db/*.cc" + "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/db/*.h" "${tensorflow_source_dir}/public/*.h" ) @@ -314,6 +316,7 @@ file(GLOB_RECURSE tf_core_framework_exclude_srcs "${tensorflow_source_dir}/tensorflow/core/util/*test*.h" "${tensorflow_source_dir}/tensorflow/core/util/*test*.cc" "${tensorflow_source_dir}/tensorflow/core/util/*main.cc" + "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/db/*test*.cc" ) list(REMOVE_ITEM tf_core_framework_srcs ${tf_core_framework_exclude_srcs}) diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD index da23f1c380..3c60d2bb56 100644 --- a/tensorflow/contrib/summary/BUILD +++ b/tensorflow/contrib/summary/BUILD @@ -26,12 +26,18 @@ py_test( deps = [ ":summary_ops", ":summary_test_util", + "//tensorflow/python:array_ops", "//tensorflow/python:errors", + "//tensorflow/python:framework", "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:ops", "//tensorflow/python:platform", + "//tensorflow/python:state_ops", "//tensorflow/python:training", "//tensorflow/python/eager:function", "//tensorflow/python/eager:test", + "@six_archive//:six", ], ) diff --git a/tensorflow/contrib/summary/summary.py b/tensorflow/contrib/summary/summary.py index ca82ea094c..813e8b2b09 100644 --- a/tensorflow/contrib/summary/summary.py +++ b/tensorflow/contrib/summary/summary.py @@ -28,11 +28,13 @@ from __future__ import print_function from tensorflow.contrib.summary.summary_ops import all_summary_ops from tensorflow.contrib.summary.summary_ops import always_record_summaries from tensorflow.contrib.summary.summary_ops import audio +from tensorflow.contrib.summary.summary_ops import create_summary_db_writer from tensorflow.contrib.summary.summary_ops import create_summary_file_writer from tensorflow.contrib.summary.summary_ops import eval_dir from tensorflow.contrib.summary.summary_ops import generic from tensorflow.contrib.summary.summary_ops import histogram from tensorflow.contrib.summary.summary_ops import image +from tensorflow.contrib.summary.summary_ops import import_event from tensorflow.contrib.summary.summary_ops import never_record_summaries from tensorflow.contrib.summary.summary_ops import record_summaries_every_n_global_steps from tensorflow.contrib.summary.summary_ops import scalar diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py index 9238671c4a..f6be99f6ae 100644 --- a/tensorflow/contrib/summary/summary_ops.py +++ b/tensorflow/contrib/summary/summary_ops.py @@ -19,7 +19,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import getpass import os +import re +import time + +import six from tensorflow.contrib.summary import gen_summary_ops from tensorflow.python.eager import context @@ -42,6 +47,10 @@ _SHOULD_RECORD_SUMMARIES_NAME = "ShouldRecordSummaries" _SUMMARY_COLLECTION_NAME = "_SUMMARY_V2" _SUMMARY_WRITER_INIT_COLLECTION_NAME = "_SUMMARY_WRITER_V2" +_EXPERIMENT_NAME_PATTERNS = re.compile(r"^[^\x00-\x1F<>]{0,256}$") +_RUN_NAME_PATTERNS = re.compile(r"^[^\x00-\x1F<>]{0,512}$") +_USER_NAME_PATTERNS = re.compile(r"^[a-z]([-a-z0-9]{0,29}[a-z0-9])?$", re.I) + def should_record_summaries(): """Returns boolean Tensor which is true if summaries should be recorded.""" @@ -132,7 +141,8 @@ def create_summary_file_writer(logdir, flush once the queue gets bigger than this. flush_millis: the largest interval between flushes. filename_suffix: optional suffix for the event file name. - name: name for the summary writer. + name: Shared name for this SummaryWriter resource stored to default + Graph. Returns: Either a summary writer or an empty object which can be used as a @@ -147,14 +157,81 @@ def create_summary_file_writer(logdir, flush_millis = constant_op.constant(2 * 60 * 1000) if filename_suffix is None: filename_suffix = constant_op.constant("") - resource = gen_summary_ops.summary_writer(shared_name=name) - # TODO(apassos) ensure the initialization op runs when in graph mode; - # consider calling session.run here. - ops.add_to_collection( - _SUMMARY_WRITER_INIT_COLLECTION_NAME, - gen_summary_ops.create_summary_file_writer( - resource, logdir, max_queue, flush_millis, filename_suffix)) - return SummaryWriter(resource) + return _make_summary_writer( + name, + gen_summary_ops.create_summary_file_writer, + logdir=logdir, + max_queue=max_queue, + flush_millis=flush_millis, + filename_suffix=filename_suffix) + + +def create_summary_db_writer(db_uri, + experiment_name=None, + run_name=None, + user_name=None, + name=None): + """Creates a summary database writer in the current context. + + This can be used to write tensors from the execution graph directly + to a database. Only SQLite is supported right now. This function + will create the schema if it doesn't exist. Entries in the Users, + Experiments, and Runs tables will be created automatically if they + don't already exist. + + Args: + db_uri: For example "file:/tmp/foo.sqlite". + experiment_name: Defaults to YYYY-MM-DD in local time if None. + Empty string means the Run will not be associated with an + Experiment. Can't contain ASCII control characters or <>. Case + sensitive. + run_name: Defaults to HH:MM:SS in local time if None. Empty string + means a Tag will not be associated with any Run. Can't contain + ASCII control characters or <>. Case sensitive. + user_name: Defaults to system username if None. Empty means the + Experiment will not be associated with a User. Must be valid as + both a DNS label and Linux username. + name: Shared name for this SummaryWriter resource stored to default + Graph. + + Returns: + A new SummaryWriter instance. + """ + with ops.device("cpu:0"): + if experiment_name is None: + experiment_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + if run_name is None: + run_name = time.strftime("%H:%M:%S", time.localtime(time.time())) + if user_name is None: + user_name = getpass.getuser() + experiment_name = _cleanse_string( + "experiment_name", _EXPERIMENT_NAME_PATTERNS, experiment_name) + run_name = _cleanse_string("run_name", _RUN_NAME_PATTERNS, run_name) + user_name = _cleanse_string("user_name", _USER_NAME_PATTERNS, user_name) + return _make_summary_writer( + name, + gen_summary_ops.create_summary_db_writer, + db_uri=db_uri, + experiment_name=experiment_name, + run_name=run_name, + user_name=user_name) + + +def _make_summary_writer(name, factory, **kwargs): + resource = gen_summary_ops.summary_writer(shared_name=name) + # TODO(apassos): Consider doing this instead. + # node = factory(resource, **kwargs) + # if not context.in_eager_mode(): + # ops.get_default_session().run(node) + ops.add_to_collection(_SUMMARY_WRITER_INIT_COLLECTION_NAME, + factory(resource, **kwargs)) + return SummaryWriter(resource) + + +def _cleanse_string(name, pattern, value): + if isinstance(value, six.string_types) and pattern.search(value) is None: + raise ValueError("%s (%s) must match %s" % (name, value, pattern.pattern)) + return ops.convert_to_tensor(value, dtypes.string) def _nothing(): @@ -206,16 +283,22 @@ def summary_writer_function(name, tensor, function, family=None): return op -def generic(name, tensor, metadata, family=None, global_step=None): +def generic(name, tensor, metadata=None, family=None, global_step=None): """Writes a tensor summary if possible.""" if global_step is None: global_step = training_util.get_global_step() def function(tag, scope): + if metadata is None: + serialized_metadata = constant_op.constant("") + elif hasattr(metadata, "SerializeToString"): + serialized_metadata = constant_op.constant(metadata.SerializeToString()) + else: + serialized_metadata = metadata # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_summary( context.context().summary_writer_resource, global_step, array_ops.identity(tensor), - tag, metadata, name=scope) + tag, serialized_metadata, name=scope) return summary_writer_function(name, tensor, function, family=family) @@ -284,6 +367,26 @@ def audio(name, tensor, sample_rate, max_outputs, family=None, return summary_writer_function(name, tensor, function, family=family) +def import_event(tensor, name=None): + """Writes a tf.Event binary proto. + + When using create_summary_db_writer(), this can be used alongside + tf.TFRecordReader to load event logs into the database. Please note + that this is lower level than the other summary functions and will + ignore any conditions set by methods like should_record_summaries(). + + Args: + tensor: A `Tensor` of type `string` containing a serialized `Event` + proto. + name: A name for the operation (optional). + + Returns: + The created Operation. + """ + return gen_summary_ops.import_event( + context.context().summary_writer_resource, tensor, name=name) + + def eval_dir(model_dir, name=None): """Construct a logdir for an eval summary writer.""" return os.path.join(model_dir, "eval" if not name else "eval_" + name) diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py index 466e194096..6e1a746815 100644 --- a/tensorflow/contrib/summary/summary_ops_test.py +++ b/tensorflow/contrib/summary/summary_ops_test.py @@ -17,14 +17,22 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import functools +import os import tempfile +import six +import sqlite3 + from tensorflow.contrib.summary import summary_ops from tensorflow.contrib.summary import summary_test_util from tensorflow.python.eager import function from tensorflow.python.eager import test +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import state_ops from tensorflow.python.platform import gfile from tensorflow.python.training import training_util @@ -99,5 +107,107 @@ class TargetTest(test_util.TensorFlowTestCase): self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'scalar') + +class DbTest(test_util.TensorFlowTestCase): + + def setUp(self): + self.db_path = os.path.join(self.get_temp_dir(), 'DbTest.sqlite') + if os.path.exists(self.db_path): + os.unlink(self.db_path) + self.db = sqlite3.connect(self.db_path) + self.create_summary_db_writer = functools.partial( + summary_ops.create_summary_db_writer, + db_uri=self.db_path, + experiment_name='experiment', + run_name='run', + user_name='user') + + def tearDown(self): + self.db.close() + + def testIntegerSummaries(self): + step = training_util.create_global_step() + + def adder(x, y): + state_ops.assign_add(step, 1) + summary_ops.generic('x', x) + summary_ops.generic('y', y) + sum_ = x + y + summary_ops.generic('sum', sum_) + return sum_ + + with summary_ops.always_record_summaries(): + with self.create_summary_db_writer().as_default(): + self.assertEqual(5, adder(int64(2), int64(3)).numpy()) + + six.assertCountEqual(self, [1, 1, 1], + get_all(self.db, 'SELECT step FROM Tensors')) + six.assertCountEqual(self, ['x', 'y', 'sum'], + get_all(self.db, 'SELECT tag_name FROM Tags')) + x_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "x"') + y_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "y"') + sum_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "sum"') + + with summary_ops.always_record_summaries(): + with self.create_summary_db_writer().as_default(): + self.assertEqual(9, adder(int64(4), int64(5)).numpy()) + + six.assertCountEqual(self, [1, 1, 1, 2, 2, 2], + get_all(self.db, 'SELECT step FROM Tensors')) + six.assertCountEqual(self, [x_id, y_id, sum_id], + get_all(self.db, 'SELECT tag_id FROM Tags')) + self.assertEqual(2, get_tensor(self.db, x_id, 1)) + self.assertEqual(3, get_tensor(self.db, y_id, 1)) + self.assertEqual(5, get_tensor(self.db, sum_id, 1)) + self.assertEqual(4, get_tensor(self.db, x_id, 2)) + self.assertEqual(5, get_tensor(self.db, y_id, 2)) + self.assertEqual(9, get_tensor(self.db, sum_id, 2)) + six.assertCountEqual( + self, ['experiment'], + get_all(self.db, 'SELECT experiment_name FROM Experiments')) + six.assertCountEqual(self, ['run'], + get_all(self.db, 'SELECT run_name FROM Runs')) + six.assertCountEqual(self, ['user'], + get_all(self.db, 'SELECT user_name FROM Users')) + + def testBadExperimentName(self): + with self.assertRaises(ValueError): + self.create_summary_db_writer(experiment_name='\0') + + def testBadRunName(self): + with self.assertRaises(ValueError): + self.create_summary_db_writer(run_name='\0') + + def testBadUserName(self): + with self.assertRaises(ValueError): + self.create_summary_db_writer(user_name='-hi') + with self.assertRaises(ValueError): + self.create_summary_db_writer(user_name='hi-') + with self.assertRaises(ValueError): + self.create_summary_db_writer(user_name='@') + + +def get_one(db, q, *p): + return db.execute(q, p).fetchone()[0] + + +def get_all(db, q, *p): + return unroll(db.execute(q, p).fetchall()) + + +def get_tensor(db, tag_id, step): + return get_one( + db, 'SELECT tensor FROM Tensors WHERE tag_id = ? AND step = ?', tag_id, + step) + + +def int64(x): + return array_ops.constant(x, dtypes.int64) + + +def unroll(list_of_tuples): + return sum(list_of_tuples, ()) + + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/tensorboard/db/BUILD b/tensorflow/contrib/tensorboard/db/BUILD index d8bbf87d2c..068e862650 100644 --- a/tensorflow/contrib/tensorboard/db/BUILD +++ b/tensorflow/contrib/tensorboard/db/BUILD @@ -45,10 +45,12 @@ cc_library( tf_cc_test( name = "summary_db_writer_test", + size = "small", srcs = ["summary_db_writer_test.cc"], deps = [ ":summary_db_writer", "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core/lib/db:sqlite", diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc index df64e36305..a26ad61660 100644 --- a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc +++ b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc @@ -15,10 +15,12 @@ limitations under the License. #include "tensorflow/contrib/tensorboard/db/summary_db_writer.h" #include "tensorflow/contrib/tensorboard/db/schema.h" +#include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/lib/db/sqlite.h" #include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/snappy.h" +#include "tensorflow/core/util/event.pb.h" namespace tensorflow { namespace { @@ -86,13 +88,19 @@ class SummaryDbWriter : public SummaryWriterInterface { TF_RETURN_IF_ERROR(BindTensor(t)); break; } - TF_RETURN_IF_ERROR(insert_tensor_.StepAndReset()); - return Status::OK(); + return insert_tensor_.StepAndReset(); } Status WriteEvent(std::unique_ptr e) override { - // TODO(@jart): This will be used to load event logs. - return errors::Unimplemented("WriteEvent"); + mutex_lock ml(mu_); + TF_RETURN_IF_ERROR(InitializeParents()); + if (e->what_case() == Event::WhatCase::kSummary) { + const Summary& summary = e->summary(); + for (int i = 0; i < summary.value_size(); ++i) { + TF_RETURN_IF_ERROR(WriteSummary(e.get(), summary.value(i))); + } + } + return Status::OK(); } Status WriteScalar(int64 global_step, Tensor t, const string& tag) override { @@ -247,6 +255,24 @@ class SummaryDbWriter : public SummaryWriterInterface { return Status::OK(); } + Status WriteSummary(const Event* e, const Summary::Value& summary) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + int64 tag_id; + TF_RETURN_IF_ERROR(GetTagId(run_id_, summary.tag(), &tag_id)); + insert_tensor_.BindInt(1, tag_id); + insert_tensor_.BindInt(2, e->step()); + insert_tensor_.BindDouble(3, e->wall_time()); + switch (summary.value_case()) { + case Summary::Value::ValueCase::kSimpleValue: + insert_tensor_.BindDouble(4, summary.simple_value()); + break; + default: + // TODO(@jart): Handle the rest. + return Status::OK(); + } + return insert_tensor_.StepAndReset(); + } + mutex mu_; Env* env_; std::shared_ptr db_ GUARDED_BY(mu_); diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc index d32904f97c..c1af51e7b7 100644 --- a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc +++ b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc @@ -14,14 +14,19 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/tensorboard/db/summary_db_writer.h" +#include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/db/sqlite.h" +#include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/util/event.pb.h" namespace tensorflow { namespace { +const float kTolerance = 1e-5; + Tensor MakeScalarInt64(int64 x) { Tensor t(DT_INT64, TensorShape({})); t.scalar()() = x; @@ -41,7 +46,7 @@ class FakeClockEnv : public EnvWrapper { class SummaryDbWriterTest : public ::testing::Test { protected: - void SetUp() override { db_ = Sqlite::Open("file::memory:").ValueOrDie(); } + void SetUp() override { db_ = Sqlite::Open(":memory:").ValueOrDie(); } void TearDown() override { if (writer_ != nullptr) { @@ -158,5 +163,54 @@ TEST_F(SummaryDbWriterTest, TensorsWritten_RowsGetInitialized) { QueryString("SELECT tensor FROM Tensors WHERE step = 2").empty()); } +TEST_F(SummaryDbWriterTest, EmptyParentNames_NoParentsCreated) { + TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "", "", &env_, &writer_)); + TF_ASSERT_OK(writer_->WriteTensor(1, MakeScalarInt64(123LL), "taggy", + "this-is-metaaa")); + TF_ASSERT_OK(writer_->Flush()); + ASSERT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Users")); + ASSERT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Experiments")); + ASSERT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Runs")); + ASSERT_EQ(1LL, QueryInt("SELECT COUNT(*) FROM Tags")); + ASSERT_EQ(1LL, QueryInt("SELECT COUNT(*) FROM Tensors")); +} + +TEST_F(SummaryDbWriterTest, WriteEvent_Scalar) { + TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "", "", &env_, &writer_)); + std::unique_ptr e{new Event}; + e->set_step(7); + e->set_wall_time(123.456); + Summary::Value* s = e->mutable_summary()->add_value(); + s->set_tag("π"); + s->set_simple_value(3.14f); + s = e->mutable_summary()->add_value(); + s->set_tag("φ"); + s->set_simple_value(1.61f); + TF_ASSERT_OK(writer_->WriteEvent(std::move(e))); + TF_ASSERT_OK(writer_->Flush()); + ASSERT_EQ(2LL, QueryInt("SELECT COUNT(*) FROM Tags")); + ASSERT_EQ(2LL, QueryInt("SELECT COUNT(*) FROM Tensors")); + int64 tag1_id = QueryInt("SELECT tag_id FROM Tags WHERE tag_name = 'π'"); + int64 tag2_id = QueryInt("SELECT tag_id FROM Tags WHERE tag_name = 'φ'"); + EXPECT_GT(tag1_id, 0LL); + EXPECT_GT(tag2_id, 0LL); + EXPECT_EQ(123.456, QueryDouble(strings::StrCat( + "SELECT computed_time FROM Tensors WHERE tag_id = ", + tag1_id, " AND step = 7"))); + EXPECT_EQ(123.456, QueryDouble(strings::StrCat( + "SELECT computed_time FROM Tensors WHERE tag_id = ", + tag2_id, " AND step = 7"))); + EXPECT_NEAR(3.14, + QueryDouble(strings::StrCat( + "SELECT tensor FROM Tensors WHERE tag_id = ", tag1_id, + " AND step = 7")), + kTolerance); // Summary::simple_value is float + EXPECT_NEAR(1.61, + QueryDouble(strings::StrCat( + "SELECT tensor FROM Tensors WHERE tag_id = ", tag2_id, + " AND step = 7")), + kTolerance); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 6206963251..4169e842da 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -6243,8 +6243,11 @@ tf_kernel_library( srcs = ["summary_kernels.cc"], deps = [ ":summary_interface", + "//tensorflow/contrib/tensorboard/db:summary_db_writer", "//tensorflow/core:framework", + "//tensorflow/core:lib", "//tensorflow/core:summary_ops_op_lib", + "//tensorflow/core/lib/db:sqlite", ], ) diff --git a/tensorflow/core/kernels/summary_interface.cc b/tensorflow/core/kernels/summary_interface.cc index 313137ae49..cd366f8c13 100644 --- a/tensorflow/core/kernels/summary_interface.cc +++ b/tensorflow/core/kernels/summary_interface.cc @@ -257,7 +257,9 @@ class SummaryWriterImpl : public SummaryWriterInterface { Summary::Value* v = e->mutable_summary()->add_value(); t.AsProtoTensorContent(v->mutable_tensor()); v->set_tag(tag); - v->mutable_metadata()->ParseFromString(serialized_metadata); + if (!serialized_metadata.empty()) { + v->mutable_metadata()->ParseFromString(serialized_metadata); + } return WriteEvent(std::move(e)); } diff --git a/tensorflow/core/kernels/summary_kernels.cc b/tensorflow/core/kernels/summary_kernels.cc index cfa707de71..1fe2fc5b66 100644 --- a/tensorflow/core/kernels/summary_kernels.cc +++ b/tensorflow/core/kernels/summary_kernels.cc @@ -13,9 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/contrib/tensorboard/db/summary_db_writer.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/kernels/summary_interface.h" +#include "tensorflow/core/lib/db/sqlite.h" +#include "tensorflow/core/platform/protobuf.h" namespace tensorflow { @@ -46,6 +49,32 @@ class CreateSummaryFileWriterOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("CreateSummaryFileWriter").Device(DEVICE_CPU), CreateSummaryFileWriterOp); +class CreateSummaryDbWriterOp : public OpKernel { + public: + explicit CreateSummaryDbWriterOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + const Tensor* tmp; + OP_REQUIRES_OK(ctx, ctx->input("db_uri", &tmp)); + const string db_uri = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("experiment_name", &tmp)); + const string experiment_name = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("run_name", &tmp)); + const string run_name = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("user_name", &tmp)); + const string user_name = tmp->scalar()(); + SummaryWriterInterface* s; + auto db = Sqlite::Open(db_uri); + OP_REQUIRES_OK(ctx, db.status()); + OP_REQUIRES_OK( + ctx, CreateSummaryDbWriter(std::move(db.ValueOrDie()), experiment_name, + run_name, user_name, ctx->env(), &s)); + OP_REQUIRES_OK(ctx, CreateResource(ctx, HandleFromInput(ctx, 0), s)); + } +}; +REGISTER_KERNEL_BUILDER(Name("CreateSummaryDbWriter").Device(DEVICE_CPU), + CreateSummaryDbWriterOp); + class FlushSummaryWriterOp : public OpKernel { public: explicit FlushSummaryWriterOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} @@ -98,6 +127,27 @@ class WriteSummaryOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("WriteSummary").Device(DEVICE_CPU), WriteSummaryOp); +class ImportEventOp : public OpKernel { + public: + explicit ImportEventOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + SummaryWriterInterface* s; + OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); + core::ScopedUnref unref(s); + const Tensor* t; + OP_REQUIRES_OK(ctx, ctx->input("event", &t)); + std::unique_ptr event{new Event}; + if (!ParseProtoUnlimited(event.get(), t->scalar()())) { + ctx->CtxFailureWithWarning( + errors::DataLoss("Bad tf.Event binary proto tensor string")); + return; + } + OP_REQUIRES_OK(ctx, s->WriteEvent(std::move(event))); + } +}; +REGISTER_KERNEL_BUILDER(Name("ImportEvent").Device(DEVICE_CPU), ImportEventOp); + class WriteScalarSummaryOp : public OpKernel { public: explicit WriteScalarSummaryOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} diff --git a/tensorflow/core/ops/summary_ops.cc b/tensorflow/core/ops/summary_ops.cc index f778b48797..5efbac7ad7 100644 --- a/tensorflow/core/ops/summary_ops.cc +++ b/tensorflow/core/ops/summary_ops.cc @@ -49,6 +49,33 @@ flush_millis: How often, in milliseconds, to flush the pending events and filename_suffix: Every event file's name is suffixed with this suffix. )doc"); +REGISTER_OP("CreateSummaryDbWriter") + .Input("writer: resource") + .Input("db_uri: string") + .Input("experiment_name: string") + .Input("run_name: string") + .Input("user_name: string") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Creates summary database writer accessible by given resource handle. + +This can be used to write tensors from the execution graph directly +to a database. Only SQLite is supported right now. This function +will create the schema if it doesn't exist. Entries in the Users, +Experiments, and Runs tables will be created automatically if they +don't already exist. + +writer: Handle to SummaryWriter resource to overwrite. +db_uri: For example "file:/tmp/foo.sqlite". +experiment_name: Can't contain ASCII control characters or <>. Case + sensitive. If empty, then the Run will not be associated with any + Experiment. +run_name: Can't contain ASCII control characters or <>. Case sensitive. + If empty, then each Tag will not be associated with any Run. +user_name: Must be valid as both a DNS label and Linux username. If + empty, then the Experiment will not be associated with any User. +)doc"); + REGISTER_OP("FlushSummaryWriter") .Input("writer: resource") .SetShapeFn(shape_inference::NoOutputs) @@ -89,6 +116,20 @@ summary_metadata: Serialized SummaryMetadata protocol buffer containing plugin-related metadata for this summary. )doc"); +REGISTER_OP("ImportEvent") + .Input("writer: resource") + .Input("event: string") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Outputs a `tf.Event` protocol buffer. + +When CreateSummaryDbWriter is being used, this op can be useful for +importing data from event logs. + +writer: A handle to a summary writer. +event: A string containing a binary-encoded tf.Event proto. +)doc"); + REGISTER_OP("WriteScalarSummary") .Input("writer: resource") .Input("global_step: int64") -- GitLab From 83d9635669c60fa75910999ceb0c18341a08843a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 11:27:04 -0800 Subject: [PATCH 0050/1801] Add comment describing how to get optimized builds in Dockerfile. PiperOrigin-RevId: 175036186 --- tensorflow/tools/docker/Dockerfile.devel | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index 20e1dcd085..1a0145b078 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -83,6 +83,11 @@ ENV CI_BUILD_PYTHON python RUN tensorflow/tools/ci_build/builds/configured CPU \ bazel build -c opt --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \ + # For optimized builds appropriate for the hardware platform of your choosing, uncomment below... + # For ivy-bridge or sandy-bridge + # --copt=-march="ivybridge" \ + # for haswell, broadwell, or skylake + # --copt=-march="haswell" \ tensorflow/tools/pip_package:build_pip_package && \ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/pip && \ pip --no-cache-dir install --upgrade /tmp/pip/tensorflow-*.whl && \ -- GitLab From 5856a5cef9cd9cfdf16add7024ba4910949c2604 Mon Sep 17 00:00:00 2001 From: Chris Leary Date: Wed, 8 Nov 2017 11:28:21 -0800 Subject: [PATCH 0051/1801] [XLA] More diagnostic information in the reshape shape inference error. PiperOrigin-RevId: 175036413 --- tensorflow/compiler/xla/service/shape_inference.cc | 5 ++++- tensorflow/compiler/xla/tests/reshape_test.cc | 5 +++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 9c7dc2185e..dcd726f22c 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -1948,7 +1948,10 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( !std::is_permutation(dimensions.begin(), dimensions.end(), indices.begin())) { return InvalidArgument( - "Reshape dimensions not a permutation of the operand dimensions."); + "Reshape dimensions [%s] are not a permutation of the operand " + "dimensions (operand shape is %s).", + tensorflow::str_util::Join(dimensions, ",").c_str(), + ShapeUtil::HumanString(operand).c_str()); } return inferred_shape; diff --git a/tensorflow/compiler/xla/tests/reshape_test.cc b/tensorflow/compiler/xla/tests/reshape_test.cc index 72c68f24a0..d235b9a158 100644 --- a/tensorflow/compiler/xla/tests/reshape_test.cc +++ b/tensorflow/compiler/xla/tests/reshape_test.cc @@ -431,8 +431,9 @@ XLA_TEST_F(ReshapeTest, ToScalar) { XLA_TEST_F(ReshapeTest, BadDimensions) { ComputationBuilder b(client_, TestName()); b.Reshape(b.ConstantR1({1}), {}, {}); - EXPECT_THAT(ExecuteToString(&b, {}), - ::testing::HasSubstr("dimensions not a permutation")); + EXPECT_THAT( + ExecuteToString(&b, {}), + ::testing::HasSubstr("not a permutation of the operand dimensions")); } XLA_TEST_F(ReshapeTest, BadNewSizes) { -- GitLab From f95d6a01d341231d18bb969b12e615a9cb066e00 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 11:30:23 -0800 Subject: [PATCH 0052/1801] Minor docstring fixes PiperOrigin-RevId: 175036743 --- tensorflow/python/ops/ctc_ops.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/tensorflow/python/ops/ctc_ops.py b/tensorflow/python/ops/ctc_ops.py index 477c0d1cb4..f037767cf4 100644 --- a/tensorflow/python/ops/ctc_ops.py +++ b/tensorflow/python/ops/ctc_ops.py @@ -22,8 +22,8 @@ from __future__ import print_function from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor -from tensorflow.python.ops import gen_ctc_ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_ctc_ops from tensorflow.python.ops.nn_grad import _BroadcastMul @@ -38,7 +38,8 @@ def ctc_loss(labels, inputs, sequence_length, [A. Graves, S. Fernandez, F. Gomez, J. Schmidhuber. Connectionist Temporal Classification: Labeling Unsegmented Sequence Data - with Recurrent Neural Networks. ICML 2006, Pittsburgh, USA, pp. 369-376.](http://www.cs.toronto.edu/~graves/icml_2006.pdf) + with Recurrent Neural Networks. ICML 2006, Pittsburgh, USA, + pp. 369-376.](http://www.cs.toronto.edu/~graves/icml_2006.pdf) Input requirements: @@ -108,9 +109,9 @@ def ctc_loss(labels, inputs, sequence_length, See `core/ops/ctc_ops.cc` for more details. inputs: 3-D `float` `Tensor`. If time_major == False, this will be a `Tensor` shaped: - `[batch_size x max_time x num_classes]`. + `[batch_size, max_time, num_classes]`. If time_major == True (default), this will be a `Tensor` shaped: - `[max_time x batch_size x num_classes]`. + `[max_time, batch_size, num_classes]`. The logits. sequence_length: 1-D `int32` vector, size `[batch_size]`. The sequence lengths. @@ -120,15 +121,18 @@ def ctc_loss(labels, inputs, sequence_length, ignore_longer_outputs_than_inputs: Boolean. Default: False. If True, sequences with longer outputs than inputs will be ignored. time_major: The shape format of the `inputs` Tensors. - If True, these `Tensors` must be shaped `[max_time, batch_size, num_classes]`. - If False, these `Tensors` must be shaped `[batch_size, max_time, num_classes]`. - Using `time_major = True` (default) is a bit more efficient because it avoids - transposes at the beginning of the ctc_loss calculation. However, most - TensorFlow data is batch-major, so by this function also accepts inputs - in batch-major form. + If True, these `Tensors` must be shaped `[max_time, batch_size, + num_classes]`. + If False, these `Tensors` must be shaped `[batch_size, max_time, + num_classes]`. + Using `time_major = True` (default) is a bit more efficient because it + avoids transposes at the beginning of the ctc_loss calculation. However, + most TensorFlow data is batch-major, so by this function also accepts + inputs in batch-major form. Returns: - A 1-D `float` `Tensor`, size `[batch]`, containing the negative log probabilities. + A 1-D `float` `Tensor`, size `[batch]`, containing the negative log + probabilities. Raises: TypeError: if labels is not a `SparseTensor`. @@ -198,7 +202,7 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True): Args: inputs: 3-D `float` `Tensor` sized - `[max_time x batch_size x num_classes]`. The logits. + `[max_time, batch_size, num_classes]`. The logits. sequence_length: 1-D `int32` vector containing sequence lengths, having size `[batch_size]`. merge_repeated: Boolean. Default: True. @@ -207,7 +211,7 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True): A tuple `(decoded, neg_sum_logits)` where decoded: A single-element list. `decoded[0]` is an `SparseTensor` containing the decoded outputs s.t.: - `decoded.indices`: Indices matrix `(total_decoded_outputs x 2)`. + `decoded.indices`: Indices matrix `(total_decoded_outputs, 2)`. The rows store: `[batch, time]`. `decoded.values`: Values vector, size `(total_decoded_outputs)`. The vector stores the decoded classes. -- GitLab From 9c9dbe9740cb3ec385a3c9c6eb0ec57229486e90 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 8 Nov 2017 11:32:03 -0800 Subject: [PATCH 0053/1801] [XLA:CPU] Implement single threaded Matrix-Vector products in LLVM IR Right now we're always doing a 8x8 tiling on the matrix. This can probably be tuned further. There are some other follow-up items that I did not want to put in this already large CL: - Eigen has some smarts to avoid issuing unaligned vector loads and stores which the current CL does not. We need to investigate if being smart about alignment is worth it. - Prevent LLVM from vectorizing the epilogue. In fact we should disable loop vectorization for all the loops we've explicitly vectorized. - Cache the kernels by their shape to reduce code size impact. - Add aliasing information to the loads and stores emitted by the PacketSupportLibrary. This is probably not super critical since we've already vectorized the code, but we should do this for completeness. PiperOrigin-RevId: 175036991 --- tensorflow/compiler/xla/service/cpu/BUILD | 2 + .../xla/service/cpu/dot_op_emitter.cc | 564 +++++++++++++++++- .../compiler/xla/service/cpu/dot_op_emitter.h | 28 + .../xla/service/cpu/ir_emission_utils.cc | 17 +- .../xla/service/cpu/ir_emission_utils.h | 11 +- .../xla/service/cpu/layout_assignment.cc | 4 +- tensorflow/compiler/xla/service/llvm_ir/BUILD | 24 + .../service/llvm_ir/kernel_support_library.cc | 63 ++ .../service/llvm_ir/kernel_support_library.h | 124 ++++ .../compiler/xla/service/llvm_ir/llvm_util.cc | 8 + .../compiler/xla/service/llvm_ir/llvm_util.h | 2 + .../service/llvm_ir/vector_support_library.cc | 150 +++++ .../service/llvm_ir/vector_support_library.h | 174 ++++++ .../compiler/xla/tests/dot_operation_test.cc | 80 +++ 14 files changed, 1233 insertions(+), 18 deletions(-) create mode 100644 tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc create mode 100644 tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h create mode 100644 tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc create mode 100644 tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 6213baee2f..10ec677e2f 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -290,8 +290,10 @@ cc_library( "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_module_config", "//tensorflow/compiler/xla/service/llvm_ir:ir_array", + "//tensorflow/compiler/xla/service/llvm_ir:kernel_support_library", "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop", "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", + "//tensorflow/compiler/xla/service/llvm_ir:vector_support_library", "//tensorflow/core:lib", "@llvm//:core", ], diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index e57d49172b..1cbd4094a3 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -25,7 +25,9 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/cpu_runtime.h" #include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" +#include "tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/util.h" @@ -38,6 +40,450 @@ using llvm_ir::SetToFirstInsertPoint; namespace cpu { +namespace { +// Loads a tile of values from a 2D tensor. +class TileLoader { + public: + // Constructs a TileLoader that will load a tile consisting of + // `tile_size_along_major_dim` vectors from the matrix `matrix`, starting at + // `major_dim_offset` in the major dimension. The tile size along the minor + // dimension is the vector size, and that is implicitly determined by `vsl`. + TileLoader(VectorSupportLibrary* vsl, llvm::IRBuilder<>* ir_builder, + llvm::Value* matrix, int64 matrix_size_along_minor_dim, + llvm::Value* major_dim_offset, int64 tile_size_along_major_dim) + : vsl_(vsl) { + pointers_.reserve(tile_size_along_major_dim); + for (int64 i = 0; i < tile_size_along_major_dim; i++) { + llvm::Value* total_offset = ir_builder->CreateMul( + ir_builder->getInt64(matrix_size_along_minor_dim), + ir_builder->CreateAdd(ir_builder->getInt64(i), major_dim_offset)); + pointers_.push_back(vsl_->ComputeOffsetPointer(matrix, total_offset)); + } + } + + // Load a tile consisting of `tile_size_along_major_dim_` vectors starting at + // `major_dim_offset_` in the major dimension and `minor_dim_offset` in the + // minor dimension. + std::vector LoadTile(llvm::Value* minor_dim_offset) const { + std::vector result; + result.reserve(pointers_.size()); + for (const auto& pointer : pointers_) { + result.push_back(vsl_->LoadVector(pointer, minor_dim_offset)); + } + return result; + } + + private: + VectorSupportLibrary* vsl_; + std::vector pointers_; +}; + +// Computes a dot product between "[M,K]{0,1} lhs" with a [K,1] vector (the +// layout of the vector does not matter). This implementation uses a tiling +// scheme to improve performance. +// +// We logically separate the LHS matrix into four segments: +// +// +----------------------+---+ +// | | | +// | | | +// | A | B | +// | | | +// | | | +// | | | +// +----------------------+---+ +// | C | D | +// +----------------------+---+ +// +// where A is the largest submatrix of the LHS that can be evenly dividied into +// tiles. For each tile in A, assuming tile_rows_ == tile_cols_ == 4, we have: +// +// +---+---+---+---+ +--+--+--+--+ +// |M00|M10|M20|M30| |V0|V1|V2|V3| +// +---+---+---+---+ +--+--+--+--+ +// |M01|M11|M21|M31| and |V0|V1|V2|V3| +// +---+---+---+---+ +--+--+--+--+ +// |M02|M12|M22|M32| |V0|V1|V2|V3| +// +---+---+---+---+ +--+--+--+--+ +// |M03|M13|M23|M33| |V0|V1|V2|V3| +// +---+---+---+---+ +--+--+--+--+ +// +// (Legend: rows are horizontal and columns are vertical; and each column is one +// llvm::Value of a vector type) +// +// where: +// +// a. The left tile is from the column major left matrix. +// b. The right tile is an elementwise broadcast of a [V0, V1, V2, V3] +// vector loaded from the RHS vector. +// +// As we iterate through the column dimension, we compute the change to the +// result vector by an elementwise multiplication between the two tiles above +// followed by a reduction along the major dimension: +// +// +-----------------------------------+ +// | M00*V0 + M10*V1 + M20*V2 + M30*V3 | +// +-----------------------------------+ +// | M01*V0 + M11*V1 + M21*V2 + M31*V3 | +// Result[R:R+4] += +-----------------------------------+ +// | M02*V0 + M12*V1 + M22*V2 + M32*V3 | +// +-----------------------------------+ +// | M03*V0 + M13*V1 + M23*V2 + M33*V3 | +// +-----------------------------------+ +// +// Where R is the starting row for the tile. +// +// We have an inner epilogue loop to deal with the "C" submatrix and an outer +// epilogue loop to deal with the B,D submarix. +// +// TODO(sanjoy): We should investigate if using gather loads and scatter stores +// can be used here have the same inner loop for both column-major and row-major +// matrix-vector products. +class ColumnMajorMatrixVectorProductEmitter { + public: + ColumnMajorMatrixVectorProductEmitter(PrimitiveType scalar_type, + int64 tile_rows, int64 tile_cols, + int64 m, int64 k, llvm::Value* lhs, + llvm::Value* rhs, llvm::Value* result, + llvm::IRBuilder<>* ir_builder) + : scalar_type_(scalar_type), + tile_rows_(tile_rows), + tile_cols_(tile_cols), + m_(m), + k_(k), + lhs_(lhs), + rhs_(rhs), + result_(result), + ir_builder_(ir_builder), + ksl_(ir_builder_), + vsl_(scalar_type_, /*vector_size=*/tile_rows_, ir_builder_, "") { + CHECK(tile_rows_ > 0 && IsPowerOfTwo(static_cast(tile_rows_))); + } + + void Emit(); + + private: + void EmitOuterLoopBody(llvm::Value* column, int64 column_count, + bool is_first_column); + + TileLoader GetLhsTileLoader(llvm::Value* column_start, int64 column_count) { + return TileLoader(&vsl_, ir_builder_, /*matrix=*/lhs_, + /*matrix_size_along_minor_dim=*/m_, + /*major_dim_offset=*/column_start, + /*tile_size_along_major_dim=*/column_count); + } + + // Load a tile of values from the RHS. For the RHS a "tile" is a contiguous + // sequnce of `count` values, each one broadcasted to the vector width. + std::vector LoadRhsTile(llvm::Value* offset, int64 count) { + llvm::Value* base_pointer = vsl_.ComputeOffsetPointer(rhs_, offset); + std::vector result; + result.reserve(count); + for (int64 i = 0; i < count; i++) { + result.push_back(vsl_.LoadBroadcast(base_pointer, i)); + } + return result; + } + + void EmitInnerLoopTiled(TileLoader* lhs_tile_loader, + const std::vector& rhs_tile, + int64 columns, bool is_first_column); + + void EmitInnerLoopEpilogue(llvm::Value* current_tile_col, int64 columns, + bool is_first_tiled_column); + + PrimitiveType scalar_type_; + int64 tile_rows_; + int64 tile_cols_; + int64 m_; + int64 k_; + llvm::Value* lhs_; + llvm::Value* rhs_; + llvm::Value* result_; + llvm::IRBuilder<>* ir_builder_; + KernelSupportLibrary ksl_; + VectorSupportLibrary vsl_; +}; + +void ColumnMajorMatrixVectorProductEmitter::EmitOuterLoopBody( + llvm::Value* column, int64 column_count, bool is_first_column) { + TileLoader lhs_tile_loader = GetLhsTileLoader(/*column_start=*/column, + /*column_count=*/column_count); + + std::vector rhs_tile = + LoadRhsTile(column, /*count=*/column_count); + EmitInnerLoopTiled(&lhs_tile_loader, rhs_tile, + /*columns=*/column_count, is_first_column); + EmitInnerLoopEpilogue(column, /*columns=*/column_count, is_first_column); +} + +void ColumnMajorMatrixVectorProductEmitter::Emit() { + // See the comment on the class declaration for the algorithm used here. + int64 column_remainder = k_ % tile_cols_; + int64 column_limit = k_ - column_remainder; + + ksl_.For("dot.outer.tiled", + /*start=*/0, /*end=*/column_limit, /*step=*/tile_cols_, + [&](llvm::Value* column, bool is_first_column) { + EmitOuterLoopBody(column, tile_cols_, is_first_column); + }); + + if (column_remainder != 0) { + EmitOuterLoopBody(ir_builder_->getInt64(column_limit), column_remainder, + column_limit == 0); + } +} + +void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopTiled( + TileLoader* lhs_tile_loader, const std::vector& rhs_tile, + int64 columns, bool is_first_column) { + int64 row_limit = m_ - (m_ % tile_rows_); + + ksl_.For("dot.inner.tiled", /*start=*/0, /*end=*/row_limit, + /*step=*/tile_rows_, [&](llvm::Value* row) { + std::vector lhs_tile = + lhs_tile_loader->LoadTile(/*minor_dim_offset=*/row); + llvm::Value* accumulator = is_first_column + ? vsl_.GetZeroVector() + : vsl_.LoadVector(result_, row); + for (int i = 0; i < columns; i++) { + accumulator = vsl_.MulAdd(lhs_tile[i], rhs_tile[i], accumulator); + } + vsl_.StoreVector(accumulator, result_, row); + }); +} + +void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue( + llvm::Value* current_tile_col, int64 columns, bool is_first_tiled_column) { + int64 row_start = m_ - (m_ % tile_rows_); + if (row_start == m_) { + return; + } + + llvm::Value* columns_llvm = ir_builder_->getInt64(columns); + + // for (col = current_tile_col; col < (columns + current_tile_col); col++) + // for (row = row_start, row < m_; row++) { + // result[row] += lhs[row, col] * rhs[col] + // // Also take into account that if col is 0 then result[row] is not + // // initialized. + // } + + ksl_.For( + "dot.inner.epilg.outer", /*start=*/current_tile_col, + /*end=*/ir_builder_->CreateAdd(columns_llvm, current_tile_col), + /*step=*/1, /*peel_first_iteration=*/false, + [&](llvm::Value* col, llvm::Value* is_first_scalar_col) { + llvm::Value* rhs_element = vsl_.LoadScalar(rhs_, col); + llvm::Value* total_offset = + ir_builder_->CreateMul(col, ir_builder_->getInt64(m_)); + llvm::Value* lhs_base_pointer = + vsl_.ComputeOffsetPointer(lhs_, total_offset); + ksl_.For( + "dot.inner.epilg.inner", /*start=*/row_start, /*end=*/m_, + /*step=*/1, [&](llvm::Value* scalar_row) { + llvm::Value* product = vsl_.Mul( + vsl_.LoadScalar(lhs_base_pointer, scalar_row), rhs_element); + llvm::Value* setting_result_first_time = ir_builder_->CreateAnd( + is_first_scalar_col, + ir_builder_->getInt1(is_first_tiled_column)); + ksl_.If( + setting_result_first_time, + [&]() { vsl_.StoreScalar(product, result_, scalar_row); }, + [&]() { + vsl_.StoreScalar( + vsl_.Add(vsl_.LoadScalar(result_, scalar_row), product), + result_, scalar_row); + }); + }); + }); +} + +// Computes a dot product between "[M,K]{1,0} lhs" with a [K,1] vector (the +// layout of the vector does not matter). This implementation uses a tiling +// scheme to improve performance. +// +// We logically separate the LHS matrix into four segments: +// +// +----------------------+---+ +// | | | +// | | | +// | A | B | +// | | | +// | | | +// | | | +// +----------------------+---+ +// | C | D | +// +----------------------+---+ +// +// where A is the largest submatrix of the LHS that can be evenly dividied into +// tiles. For each tile in A, assuming tile_rows_ == tile_cols_ == 4, we have: +// +// +---+---+---+---+ +// |M00|M10|M20|M30| +// +---+---+---+---+ +--+--+--+--+ +// |M01|M11|M21|M31| and |V0|V1|V2|V3| +// +---+---+---+---+ +--+--+--+--+ +// |M02|M12|M22|M32| +// +---+---+---+---+ +// |M03|M13|M23|M33| +// +---+---+---+---+ +// +// (Legend: rows are horizontal and columns are vertical; and each row is one +// llvm::Value of a vector type) +// +// where: +// +// a. The left tile is loaded from the row major left matrix. +// b. The right vector is loaded from the RHS vector. +// +// We keep 4 vector accumulators accumulating the following four vector +// expressions as we iterate over the row dimension: +// +// +------+------+------+------+ +// |M0I*V0|M1I*V1|M2I*V2|M3I*V3| for I in [0,4) +// +------+------+------+------+ +// +// In the end we do a horizontal reduction over these 4 vector accumulators to +// get 4 values in the result vector. +// +// We have an inner epilogue loop to deal with the "B" sub-matrix and an outer +// epilogue loop to deal with the C,D submatrix. +class RowMajorMatrixVectorProductEmitter { + public: + RowMajorMatrixVectorProductEmitter(PrimitiveType scalar_type, int64 tile_rows, + int64 tile_cols, int64 m, int64 k, + llvm::Value* lhs, llvm::Value* rhs, + llvm::Value* result, + llvm::IRBuilder<>* ir_builder) + : scalar_type_(scalar_type), + tile_rows_(tile_rows), + tile_cols_(tile_cols), + m_(m), + k_(k), + lhs_(lhs), + rhs_(rhs), + result_(result), + ir_builder_(ir_builder), + ksl_(ir_builder_), + vsl_(scalar_type_, /*vector_size=*/tile_rows_, ir_builder_, "") { + CHECK(tile_cols_ > 0 && IsPowerOfTwo(static_cast(tile_cols_))); + } + + void Emit(); + + private: + TileLoader GetLhsTileLoader(llvm::Value* row_start, int64 row_count) { + return TileLoader(&vsl_, ir_builder_, /*matrix=*/lhs_, + /*matrix_size_along_minor_dim=*/k_, + /*major_dim_offset=*/row_start, + /*tile_size_along_major_dim=*/row_count); + } + + void EmitOuterLoopBody(llvm::Value* row, int64 row_count); + + void EmitInnerLoopTiled(TileLoader* lhs_tile_loader, int64 rows, + std::vector* vector_accumulators); + + void EmitInnerLoopEpilogue(llvm::Value* current_tile_row, int64 rows, + std::vector* scalar_accumulators); + + PrimitiveType scalar_type_; + int64 tile_rows_; + int64 tile_cols_; + int64 m_; + int64 k_; + llvm::Value* lhs_; + llvm::Value* rhs_; + llvm::Value* result_; + llvm::IRBuilder<>* ir_builder_; + KernelSupportLibrary ksl_; + VectorSupportLibrary vsl_; +}; + +void RowMajorMatrixVectorProductEmitter::EmitOuterLoopBody(llvm::Value* row, + int64 row_count) { + TileLoader lhs_tile_loader = GetLhsTileLoader(/*row_start=*/row, + /*row_count=*/row_count); + std::vector vector_accumulators; + std::vector scalar_accumulators; + for (int i = 0; i < row_count; i++) { + vector_accumulators.emplace_back(&vsl_, vsl_.GetZeroVector()); + scalar_accumulators.emplace_back(&vsl_, vsl_.GetZeroScalar()); + } + EmitInnerLoopTiled(&lhs_tile_loader, /*rows=*/row_count, + &vector_accumulators); + EmitInnerLoopEpilogue(/*current_tile_row=*/row, /*rows=*/row_count, + &scalar_accumulators); + + for (int i = 0; i < row_count; i++) { + llvm::Value* result_value = + vsl_.Add(vsl_.AddReduce(vector_accumulators[i].Get()), + scalar_accumulators[i].Get()); + llvm::Value* offset = ir_builder_->CreateAdd(ir_builder_->getInt64(i), row); + vsl_.StoreScalar(result_value, result_, offset); + } +} + +void RowMajorMatrixVectorProductEmitter::Emit() { + // See the comment on the class declaration for the algorithm used here. + int64 row_remainder = m_ % tile_rows_; + int64 row_limit = m_ - row_remainder; + + ksl_.For("dot.outer.tiled", + /*start=*/0, /*end=*/row_limit, /*step=*/tile_rows_, + [&](llvm::Value* row) { EmitOuterLoopBody(row, tile_rows_); }); + + if (row_remainder != 0) { + EmitOuterLoopBody(ir_builder_->getInt64(row_limit), row_remainder); + } +} + +void RowMajorMatrixVectorProductEmitter::EmitInnerLoopTiled( + TileLoader* lhs_tile_loader, int64 rows, + std::vector* vector_accumulators) { + int64 column_limit = k_ - (k_ % tile_cols_); + + ksl_.For("dot.inner.tiled", /*start=*/0, /*end=*/column_limit, + /*step=*/tile_cols_, [&](llvm::Value* col) { + std::vector lhs_tile = + lhs_tile_loader->LoadTile(/*minor_dim_offset=*/col); + llvm::Value* rhs_value = vsl_.LoadVector(rhs_, col); + for (int i = 0; i < rows; i++) { + llvm::Value* old_sum = (*vector_accumulators)[i].Get(); + (*vector_accumulators)[i].Set( + vsl_.Add(old_sum, vsl_.Mul(rhs_value, lhs_tile[i]))); + } + }); +} + +void RowMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue( + llvm::Value* current_tile_row, int64 rows, + std::vector* scalar_accumulators) { + int64 column_start = k_ - (k_ % tile_cols_); + if (column_start == k_) { + return; + } + + for (int r = 0; r < rows; r++) { + llvm::Value* total_offset = ir_builder_->CreateMul( + ir_builder_->CreateAdd(ir_builder_->getInt64(r), current_tile_row), + ir_builder_->getInt64(k_)); + llvm::Value* lhs_base_pointer = + vsl_.ComputeOffsetPointer(lhs_, total_offset); + ksl_.For("dot.inner.epilg.inner", /*start=*/column_start, /*end=*/k_, + /*step=*/1, [&](llvm::Value* scalar_col) { + llvm::Value* product = + vsl_.Mul(vsl_.LoadScalar(lhs_base_pointer, scalar_col), + vsl_.LoadScalar(rhs_, scalar_col)); + llvm::Value* old_value = (*scalar_accumulators)[r].Get(); + (*scalar_accumulators)[r].Set(vsl_.Add(old_value, product)); + }); + } +} + +} // namespace + DotOpEmitter::DotOpEmitter(const HloInstruction& dot, bool transpose_lhs, bool transpose_rhs, const llvm_ir::IrArray& target_array, @@ -72,6 +518,88 @@ DotOpEmitter::DotOpEmitter(const HloInstruction& dot, bool transpose_lhs, bool DotOpEmitter::ShapesAreLegalForRuntimeDot() const { return true; } +bool DotOpEmitter::EmitLlvmIrDotIfProfitable() { + if (dot_.shape().dimensions_size() != 2 || + ProfitableToImplementDotInUntiledLlvmIr(dot_) == + DotInLlvmIrProfitable::kYes) { + return false; + } + + if (!primitive_util::IsFloatingPointType(dot_.shape().element_type()) && + !primitive_util::IsIntegralType(dot_.shape().element_type())) { + return false; + } + + MatMultDims mat_mult_dims = GetMatMultDims(); + bool is_column_major_matrix_vector = false; + bool is_row_major_matrix_vector = false; + + int64 m, k; + bool swap_operands; + + if (mat_mult_dims.m == 1) { + bool rhs_effectively_row_major = + transpose_rhs_ ^ !mat_mult_dims.rhs_column_major; + if (rhs_effectively_row_major) { + k = mat_mult_dims.k; + m = mat_mult_dims.n; + is_column_major_matrix_vector = true; + swap_operands = true; + } else { + k = mat_mult_dims.k; + m = mat_mult_dims.n; + is_row_major_matrix_vector = true; + swap_operands = true; + } + } + + if (mat_mult_dims.n == 1) { + bool lhs_effectively_column_major = + transpose_lhs_ ^ mat_mult_dims.lhs_column_major; + if (lhs_effectively_column_major) { + m = mat_mult_dims.m; + k = mat_mult_dims.k; + is_column_major_matrix_vector = true; + swap_operands = false; + } else { + m = mat_mult_dims.m; + k = mat_mult_dims.k; + is_row_major_matrix_vector = true; + swap_operands = false; + } + } + + if (!is_column_major_matrix_vector && !is_row_major_matrix_vector) { + return false; + } + + if (is_column_major_matrix_vector) { + VLOG(2) << "Emitting column major matrix-vector multiply with m = " << m + << " and k = " << k; + ColumnMajorMatrixVectorProductEmitter emitter( + dot_.shape().element_type(), 8, 8, m, k, + swap_operands ? rhs_array_.GetBasePointer() + : lhs_array_.GetBasePointer(), + swap_operands ? lhs_array_.GetBasePointer() + : rhs_array_.GetBasePointer(), + target_array_.GetBasePointer(), ir_builder_); + emitter.Emit(); + } else { + VLOG(2) << "Emitting row major matrix-vector multiply with m = " << m + << " and k = " << k; + RowMajorMatrixVectorProductEmitter emitter( + dot_.shape().element_type(), 8, 8, m, k, + swap_operands ? rhs_array_.GetBasePointer() + : lhs_array_.GetBasePointer(), + swap_operands ? lhs_array_.GetBasePointer() + : rhs_array_.GetBasePointer(), + target_array_.GetBasePointer(), ir_builder_); + emitter.Emit(); + } + + return true; +} + tensorflow::Status DotOpEmitter::Emit() { // The dot operation performs a sum of products over dimension 0 of the left // hand side operand and dimension 1 of the right hand side operand. @@ -105,6 +633,10 @@ tensorflow::Status DotOpEmitter::Emit() { return EmitScalarDot(); } + if (EmitLlvmIrDotIfProfitable()) { + return Status::OK(); + } + if (PotentiallyImplementedAsEigenDot(dot_)) { return EmitCallToRuntime(); } @@ -340,22 +872,17 @@ tensorflow::Status DotOpEmitter::EmitCallToRuntime() { // // Effectively this involves swapping the 'lhs' with 'rhs' and 'm' with 'n'. - const Shape& lhs_shape = lhs_array_.GetShape(); - const Shape& rhs_shape = rhs_array_.GetShape(); + MatMultDims mat_mult_dims = GetMatMultDims(); - CHECK(LayoutUtil::Equal(lhs_shape.layout(), rhs_shape.layout())); + CHECK_EQ(mat_mult_dims.lhs_column_major, mat_mult_dims.rhs_column_major); - int64 m = lhs_shape.dimensions(transpose_lhs_ ? 1 : 0); - int64 k = lhs_shape.dimensions(transpose_lhs_ ? 0 : 1); - int64 n = rhs_shape.dimensions(transpose_rhs_ ? 0 : 1); const llvm_ir::IrArray* lhs = &lhs_array_; const llvm_ir::IrArray* rhs = &rhs_array_; bool transpose_lhs = transpose_lhs_; bool transpose_rhs = transpose_rhs_; - bool is_column_major = lhs_shape.layout().minor_to_major(0) == 0; - if (!is_column_major) { - std::swap(m, n); + if (!mat_mult_dims.lhs_column_major) { + std::swap(mat_mult_dims.m, mat_mult_dims.n); std::swap(lhs, rhs); std::swap(transpose_lhs, transpose_rhs); } @@ -367,12 +894,27 @@ tensorflow::Status DotOpEmitter::EmitCallToRuntime() { float_ptr_type), ir_builder_->CreateBitCast(lhs->GetBasePointer(), float_ptr_type), ir_builder_->CreateBitCast(rhs->GetBasePointer(), float_ptr_type), - ir_builder_->getInt64(m), ir_builder_->getInt64(n), - ir_builder_->getInt64(k), ir_builder_->getInt32(transpose_lhs), + ir_builder_->getInt64(mat_mult_dims.m), + ir_builder_->getInt64(mat_mult_dims.n), + ir_builder_->getInt64(mat_mult_dims.k), + ir_builder_->getInt32(transpose_lhs), ir_builder_->getInt32(transpose_rhs)}); return tensorflow::Status::OK(); } +DotOpEmitter::MatMultDims DotOpEmitter::GetMatMultDims() const { + CHECK_EQ(dot_.shape().dimensions_size(), 2); + + const Shape& lhs_shape = lhs_array_.GetShape(); + const Shape& rhs_shape = rhs_array_.GetShape(); + + return {lhs_shape.dimensions(transpose_lhs_ ? 1 : 0), + lhs_shape.dimensions(transpose_lhs_ ? 0 : 1), + rhs_shape.dimensions(transpose_rhs_ ? 0 : 1), + lhs_shape.layout().minor_to_major(0) == 0, + rhs_shape.layout().minor_to_major(0) == 0}; +} + llvm_ir::IrArray::Index DotOpEmitter::EmitOperandArrayLoopNest( llvm_ir::ForLoopNest* loop_nest, const llvm_ir::IrArray& operand_array, int64 reduction_dimension, tensorflow::StringPiece name_suffix) { diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h index cfc1066045..182e1b8c68 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h @@ -59,6 +59,10 @@ class DotOpEmitter { // LHS and RHS) and store the results in the target. tensorflow::Status EmitScalarDot(); + // Emit an LLVM IR implementation of the dot operation if we can. Returns + // true if an LLVM IR implementation was emitted. + bool EmitLlvmIrDotIfProfitable(); + // Emits a call to the CPU runtime to perform the matrix multiply. tensorflow::Status EmitCallToRuntime(); @@ -77,6 +81,30 @@ class DotOpEmitter { // no padding, and a rank of two. bool ShapesAreLegalForRuntimeDot() const; + // Represents the dimensions of a matrix-matrix multiply operation. + struct MatMultDims { + // The number of rows in the LHS. + int64 m; + + // The number of columns in the LHS, which is also must be equal to the + // number of rows in the RHS. + int64 k; + + // The number of columns on the RHS. + int64 n; + + // True if the LHS matrix column major. + bool lhs_column_major; + + // True if the RHS matrix column major. + bool rhs_column_major; + }; + + // Get the MatMultDims instance for the dot product this DotOpEmitter + // represents. Precondition: the dot is of rank 2 (and thus its operands are + // of rank 2 as well). + MatMultDims GetMatMultDims() const; + const HloInstruction& dot_; const bool transpose_lhs_; const bool transpose_rhs_; diff --git a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc index b99b36a55e..7149a19310 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc @@ -105,7 +105,9 @@ bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo) { return false; } - if (ProfitableToImplementDotInLlvmIr(hlo) == DotInLlvmIrProfitable::kYes) { + if (ProfitableToImplementDotInUntiledLlvmIr(hlo) == + DotInLlvmIrProfitable::kYes || + ProfitableToImplementDotInTiledLlvmIr(hlo)) { return false; } @@ -136,7 +138,7 @@ bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo) { return false; } -DotInLlvmIrProfitable ProfitableToImplementDotInLlvmIr( +DotInLlvmIrProfitable ProfitableToImplementDotInUntiledLlvmIr( const HloInstruction& dot) { if (dot.opcode() == HloOpcode::kDot && dot.shape().dimensions_size() == 2) { const Shape& result_shape = dot.shape(); @@ -178,5 +180,16 @@ DotInLlvmIrProfitable ProfitableToImplementDotInLlvmIr( return DotInLlvmIrProfitable::kNo; } +bool ProfitableToImplementDotInTiledLlvmIr(const HloInstruction& dot) { + // Any Matrix-Vector product of floating point or integral type, or + // a transpose-dot fusion of the same can be lowered to a tiled LLVM + // IR implementation. + const Shape& shape = dot.shape(); + return shape.dimensions_size() == 2 && + (shape.dimensions(0) == 1 || shape.dimensions(1) == 1) && + (primitive_util::IsFloatingPointType(shape.element_type()) || + primitive_util::IsIntegralType(shape.element_type())); +} + } // namespace cpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h index 66656ed997..cbe07a7c2b 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h @@ -29,16 +29,21 @@ bool PotentiallyImplementedAsEigenDot(const HloInstruction& dot); enum class DotInLlvmIrProfitable { kYes, kNo, kWithColumnMajorRhs }; // Returns a value to indicate if (and under what conditions) will lowering -// |dot| as a pure LLVM IR dot operation be profitable over calling into Eigen. -// Possible return values are: +// |dot| as a untiled LLVM IR dot operation be profitable over calling into +// Eigen or emitting a tiled LLVM IR implementation. Possible return values +// are: // // * DotInLlvmIrProfitable::kYes - always profitable. // * DotInLlvmIrProfitable::kNo - never profitable. // * DotInLlvmIrProfitable::kWithColumnMajorRhs - only if we can manage to make // the Rhs layout column major. -DotInLlvmIrProfitable ProfitableToImplementDotInLlvmIr( +DotInLlvmIrProfitable ProfitableToImplementDotInUntiledLlvmIr( const HloInstruction& dot); +// Returns true to indicate that we can generate a tiled LLVM IR implementation +// for |dot|. +bool ProfitableToImplementDotInTiledLlvmIr(const HloInstruction& dot); + } // namespace cpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/layout_assignment.cc b/tensorflow/compiler/xla/service/cpu/layout_assignment.cc index c446b6b792..b75ca34e0a 100644 --- a/tensorflow/compiler/xla/service/cpu/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/cpu/layout_assignment.cc @@ -51,7 +51,7 @@ Status CpuLayoutAssignment::AddBackendConstraints( tensorflow::gtl::FlatMap should_make_rhs_col_major_cache; auto should_make_rhs_col_major = [&](const HloInstruction& instruction) { - if (ProfitableToImplementDotInLlvmIr(instruction) != + if (ProfitableToImplementDotInUntiledLlvmIr(instruction) != DotInLlvmIrProfitable::kWithColumnMajorRhs) { return false; } @@ -68,7 +68,7 @@ Status CpuLayoutAssignment::AddBackendConstraints( bool result = std::all_of( rhs->users().begin(), rhs->users().end(), [&](HloInstruction* user) { - return ProfitableToImplementDotInLlvmIr(*user) == + return ProfitableToImplementDotInUntiledLlvmIr(*user) == DotInLlvmIrProfitable::kWithColumnMajorRhs && user->operand(0) != rhs; }); diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD index 075d4a1ab5..8f24bb1718 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/BUILD +++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD @@ -155,6 +155,30 @@ cc_library( ], ) +cc_library( + name = "vector_support_library", + srcs = ["vector_support_library.cc"], + hdrs = ["vector_support_library.h"], + deps = [ + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", + "@llvm//:core", + ], +) + +cc_library( + name = "kernel_support_library", + srcs = ["kernel_support_library.cc"], + hdrs = ["kernel_support_library.h"], + deps = [ + ":llvm_loop", + "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", + "//tensorflow/core:lib", + "@llvm//:core", + ], +) + # ----------------------------------------------------------------------------- filegroup( diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc new file mode 100644 index 0000000000..123a327d4d --- /dev/null +++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc @@ -0,0 +1,63 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h" + +#include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h" + +namespace xla { +void KernelSupportLibrary::For( + tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, + llvm::Value* step, + const std::function& for_body_generator) { + If(ir_builder_->CreateICmpSLT(start, end), [&]() { + for_body_generator(start, /*is_first_iteration=*/true); + For(name, ir_builder_->CreateAdd(start, step), end, step, + [&](llvm::Value* iv) { for_body_generator(iv, false); }); + }); +} + +void KernelSupportLibrary::For( + tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, + llvm::Value* step, bool peel_first_iteration, + const std::function& for_body_generator) { + if (peel_first_iteration) { + For(name, start, end, step, true, + [&](llvm::Value* indvar, bool is_first_iteration) { + for_body_generator(indvar, ir_builder_->getInt1(is_first_iteration)); + }); + } else { + std::unique_ptr loop = llvm_ir::ForLoop::EmitForLoop( + name, start, end, step, ir_builder_, prevent_unrolling_); + ir_builder_->SetInsertPoint(&loop->GetBodyBasicBlock()->back()); + for_body_generator(loop->GetIndVarValue(), + /*is_first_iteration=*/ir_builder_->CreateICmpEQ( + loop->GetIndVarValue(), start)); + llvm_ir::SetToLastInsertPoint(loop->GetExitBasicBlock(), ir_builder_); + } +} + +void KernelSupportLibrary::If( + llvm::Value* condition, const std::function& true_block_generator, + const std::function& false_block_generator) { + llvm_ir::LlvmIfData if_data = + llvm_ir::EmitIfThenElse(condition, "", ir_builder_); + ir_builder_->SetInsertPoint(&if_data.true_block->back()); + true_block_generator(); + ir_builder_->SetInsertPoint(&if_data.false_block->back()); + false_block_generator(); + llvm_ir::SetToLastInsertPoint(if_data.after_block, ir_builder_); +} +} // namespace xla diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h new file mode 100644 index 0000000000..25aa2291a6 --- /dev/null +++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h @@ -0,0 +1,124 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_KERNEL_SUPPORT_LIBRARY_H_ +#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_KERNEL_SUPPORT_LIBRARY_H_ + +#include + +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Value.h" +#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" +#include "tensorflow/core/lib/core/stringpiece.h" + +namespace xla { +// A thin wrapper around llvm_loop.h to make code generating structured control +// flow more readable. +class KernelSupportLibrary { + public: + // `ir_builder` is the llvm::IRBuilder instance used to generate LLVM IR. + // If `prevent_unrolling` is true then unrolling is explicitly disabled on + // every loop generated by this instance of KernelSupportLibrary. + explicit KernelSupportLibrary(llvm::IRBuilder<>* ir_builder, + bool prevent_unrolling = true) + : ir_builder_(ir_builder), prevent_unrolling_(prevent_unrolling) {} + + // Generates the following control flow structure: + // + // if (`start` < `end`) { + // `for_body_generator(/*ind_var=*/start, /*is_first_iteration=*/true)`; + // for (i64 i = `start` + `step`; i s< `end`; i += `step`) + // `for_body_generator(/*ind_var=*/,i, /*is_first_iteration=*/false)`; + // } + void For( + tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, + llvm::Value* step, + const std::function& + for_body_generator); + + void For( + tensorflow::StringPiece name, int64 start, int64 end, int64 step, + const std::function& + for_body_generator) { + For(name, /*start=*/ir_builder_->getInt64(start), + /*end=*/ir_builder_->getInt64(end), + /*step=*/ir_builder_->getInt64(step), for_body_generator); + } + + // Generates the following control flow structure if `peel_first_iteration` is + // true: + // + // if (`start` < `end`) { + // `for_body_generator(/*ind_var=*/start, /*is_first_iteration=*/,true)`; + // for (i64 i = `start` + `step`; i s< `end`; i += `step`) + // `for_body_generator(/*ind_var=*/,i, /*is_first_iteration=*/,false)`; + // } + // + // and the following if `peel_first_iteration` is false: + // + // for (i64 i = `start`; i s< `end`; i += `step`) + // `for_body_generator(/*ind_var=*/,i, + // /*is_first_iteration=*/,(i != `start`))`; + void For(tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, + llvm::Value* step, bool peel_first_iteration, + const std::function& + for_body_generator); + + void For(tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, + int64 step, bool peel_first_iteration, + const std::function& + for_body_generator) { + For(name, /*start=*/start, /*end=*/end, + /*step=*/ir_builder_->getInt64(step), peel_first_iteration, + for_body_generator); + } + + void For( + tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, + llvm::Value* step, + const std::function& for_body_generator) { + For(name, start, end, step, + /*peel_first_iteration=*/false, + [&](llvm::Value* indvar, llvm::Value*) { for_body_generator(indvar); }); + } + + void For( + tensorflow::StringPiece name, int64 start, int64 end, int64 step, + const std::function& for_body_generator) { + For(name, /*start=*/ir_builder_->getInt64(start), + /*end=*/ir_builder_->getInt64(end), + /*step=*/ir_builder_->getInt64(step), for_body_generator); + } + + // Generates the following control flow structure: + // + // if (`condition`) + // `true_block_generator()`; + // else + // `false_block_generator()`; + void If(llvm::Value* condition, + const std::function& true_block_generator, + const std::function& false_block_generator = []() {}); + + private: + llvm::IRBuilder<>* ir_builder_; + bool prevent_unrolling_; +}; +} // namespace xla + +#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_KERNEL_SUPPORT_LIBRARY_H_ diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc index 956c0d5f05..d95409e399 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc @@ -537,6 +537,14 @@ void SetToFirstInsertPoint(llvm::BasicBlock* blk, llvm::IRBuilder<>* builder) { builder->SetInsertPoint(blk, blk->getFirstInsertionPt()); } +void SetToLastInsertPoint(llvm::BasicBlock* blk, llvm::IRBuilder<>* builder) { + if (llvm::Instruction* terminator = blk->getTerminator()) { + builder->SetInsertPoint(terminator); + } else { + builder->SetInsertPoint(blk); + } +} + llvm::Value* CreateRor(llvm::Value* rotand, llvm::Value* rotor, llvm::IRBuilder<>* builder) { auto size = rotand->getType()->getPrimitiveSizeInBits(); diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h index 304192b58e..f70d9f88b3 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h @@ -243,6 +243,8 @@ llvm::Instruction* AddRangeMetadata(int64 lower, int64 upper, void SetToFirstInsertPoint(llvm::BasicBlock* blk, llvm::IRBuilder<>* builder); +void SetToLastInsertPoint(llvm::BasicBlock* blk, llvm::IRBuilder<>* builder); + // Create a bitwise rotation of `rotand` by `rotor`. llvm::Value* CreateRor(llvm::Value* rotand, llvm::Value* rotor, llvm::IRBuilder<>* builder); diff --git a/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc b/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc new file mode 100644 index 0000000000..e8c6a83618 --- /dev/null +++ b/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc @@ -0,0 +1,150 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h" + +#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" + +namespace xla { +VectorSupportLibrary::VectorSupportLibrary(PrimitiveType primitive_type, + int64 vector_size, + llvm::IRBuilder<>* ir_builder, + std::string name) + : vector_size_(vector_size), + primitive_type_(primitive_type), + ir_builder_(ir_builder), + name_(std::move(name)) { + scalar_type_ = llvm_ir::PrimitiveTypeToIrType( + primitive_type, ir_builder_->GetInsertBlock()->getModule()); + scalar_pointer_type_ = llvm::PointerType::getUnqual(scalar_type_); + vector_type_ = llvm::VectorType::get(scalar_type_, vector_size); + vector_pointer_type_ = llvm::PointerType::getUnqual(vector_type_); +} + +llvm::Value* VectorSupportLibrary::Mul(llvm::Value* lhs, llvm::Value* rhs) { + if (scalar_type_->isFloatingPointTy()) { + return ir_builder()->CreateFMul(lhs, rhs, name()); + } else { + return ir_builder()->CreateMul(lhs, rhs, name()); + } +} + +llvm::Value* VectorSupportLibrary::Add(llvm::Value* lhs, llvm::Value* rhs) { + if (scalar_type_->isFloatingPointTy()) { + return ir_builder()->CreateFAdd(lhs, rhs, name()); + } else { + return ir_builder()->CreateAdd(lhs, rhs, name()); + } +} + +llvm::Value* VectorSupportLibrary::ComputeOffsetPointer( + llvm::Value* base_pointer, llvm::Value* offset_elements) { + if (base_pointer->getType() != scalar_pointer_type()) { + base_pointer = ir_builder()->CreateBitCast(base_pointer, + scalar_pointer_type(), name()); + } + return ir_builder()->CreateInBoundsGEP(base_pointer, {offset_elements}, + name()); +} + +llvm::Value* VectorSupportLibrary::LoadVector(llvm::Value* pointer) { + if (pointer->getType() != vector_pointer_type()) { + pointer = + ir_builder()->CreateBitCast(pointer, vector_pointer_type(), name()); + } + return ir_builder()->CreateAlignedLoad( + pointer, ShapeUtil::ByteSizeOfPrimitiveType(primitive_type_), name()); +} + +llvm::Value* VectorSupportLibrary::LoadScalar(llvm::Value* pointer) { + if (pointer->getType() != scalar_pointer_type()) { + pointer = + ir_builder()->CreateBitCast(pointer, scalar_pointer_type(), name()); + } + return ir_builder()->CreateAlignedLoad( + pointer, ShapeUtil::ByteSizeOfPrimitiveType(primitive_type_), name()); +} + +void VectorSupportLibrary::StoreVector(llvm::Value* value, + llvm::Value* pointer) { + if (pointer->getType() != vector_pointer_type()) { + pointer = ir_builder()->CreateBitCast(pointer, vector_pointer_type()); + } + ir_builder()->CreateAlignedStore( + value, pointer, ShapeUtil::ByteSizeOfPrimitiveType(primitive_type_)); +} + +void VectorSupportLibrary::StoreScalar(llvm::Value* value, + llvm::Value* pointer) { + if (pointer->getType() != scalar_pointer_type()) { + pointer = + ir_builder()->CreateBitCast(pointer, scalar_pointer_type(), name()); + } + ir_builder()->CreateAlignedStore( + value, pointer, ShapeUtil::ByteSizeOfPrimitiveType(primitive_type_)); +} + +llvm::Value* VectorSupportLibrary::LoadBroadcast(llvm::Value* pointer) { + if (pointer->getType() != scalar_pointer_type()) { + pointer = + ir_builder()->CreateBitCast(pointer, scalar_pointer_type(), name()); + } + return ir_builder()->CreateVectorSplat( + vector_size(), ir_builder()->CreateLoad(pointer), name()); +} + +llvm::Value* VectorSupportLibrary::AddReduce(llvm::Value* vector) { + llvm::SmallVector mask(vector_size(), nullptr); + for (unsigned i = vector_size(); i != 1; i >>= 1) { + // On every iteration, we shuffle half of the remaining lanes to the top + // half of shuffle, and add two old and the new vector. + + for (unsigned j = 0; j < vector_size(); ++j) { + if (j < (i / 2)) { + mask[j] = ir_builder()->getInt32(i / 2 + j); + } else { + mask[j] = llvm::UndefValue::get(ir_builder()->getInt32Ty()); + } + } + + llvm::Value* half_remaining_lanes = ir_builder()->CreateShuffleVector( + vector, llvm::UndefValue::get(vector_type()), + llvm::ConstantVector::get(mask), ""); + vector = Add(vector, half_remaining_lanes); + } + + return ir_builder()->CreateExtractElement(vector, ir_builder()->getInt32(0), + name()); +} + +llvm::Value* VectorSupportLibrary::GetZeroVector() { + return llvm::Constant::getNullValue(vector_type()); +} + +llvm::Value* VectorSupportLibrary::GetZeroScalar() { + return llvm::Constant::getNullValue(scalar_type()); +} + +LlvmVariable::LlvmVariable(llvm::Type* type, llvm::IRBuilder<>* ir_builder) + : ir_builder_(ir_builder) { + alloca_ = llvm_ir::EmitAllocaAtFunctionEntry(type, "", ir_builder_); +} + +llvm::Value* LlvmVariable::Get() { return ir_builder_->CreateLoad(alloca_); } + +void LlvmVariable::Set(llvm::Value* new_value) { + ir_builder_->CreateStore(new_value, alloca_); +} +} // namespace xla diff --git a/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h b/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h new file mode 100644 index 0000000000..3072677ab0 --- /dev/null +++ b/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h @@ -0,0 +1,174 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_VECTOR_SUPPORT_LIBRARY_H_ +#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_VECTOR_SUPPORT_LIBRARY_H_ + +#include + +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Value.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" + +namespace xla { +// A thin wrapper around llvm_util.h to make code generating vector math flow +// more readable. +class VectorSupportLibrary { + public: + // This VectorSupportLibrary instance remembers `primitive_type` and + // `vector_size`, and these are implicitly used by the methods on this + // instance (i.e. LoadVector will load a vector of type <`vector_size` x + // `primitive_type`>). + VectorSupportLibrary(PrimitiveType primitive_type, int64 vector_size, + llvm::IRBuilder<>* ir_builder, std::string name); + + llvm::Value* Mul(llvm::Value* lhs, llvm::Value* rhs); + llvm::Value* Mul(int64 lhs, llvm::Value* rhs) { + return Mul(ir_builder()->getInt64(lhs), rhs); + } + + llvm::Value* Add(llvm::Value* lhs, llvm::Value* rhs); + llvm::Value* Add(int64 lhs, llvm::Value* rhs) { + return Add(ir_builder()->getInt64(lhs), rhs); + } + + llvm::Value* MulAdd(llvm::Value* a, llvm::Value* b, llvm::Value* c) { + return Add(c, Mul(a, b)); + } + + llvm::Value* ComputeOffsetPointer(llvm::Value* base_pointer, + llvm::Value* offset_elements); + llvm::Value* ComputeOffsetPointer(llvm::Value* base_pointer, + int64 offset_elements) { + return ComputeOffsetPointer(base_pointer, + ir_builder()->getInt64(offset_elements)); + } + + llvm::Value* LoadVector(llvm::Value* pointer); + + llvm::Value* LoadVector(llvm::Value* base_pointer, + llvm::Value* offset_elements) { + return LoadVector(ComputeOffsetPointer(base_pointer, offset_elements)); + } + + llvm::Value* LoadVector(llvm::Value* base_pointer, int64 offset_elements) { + return LoadVector(base_pointer, ir_builder()->getInt64(offset_elements)); + } + + llvm::Value* LoadScalar(llvm::Value* pointer); + + llvm::Value* LoadScalar(llvm::Value* base_pointer, + llvm::Value* offset_elements) { + return LoadScalar(ComputeOffsetPointer(base_pointer, offset_elements)); + } + + llvm::Value* LoadScalar(llvm::Value* base_pointer, int64 offset_elements) { + return LoadScalar(base_pointer, ir_builder()->getInt64(offset_elements)); + } + + void StoreVector(llvm::Value* value, llvm::Value* pointer); + + void StoreVector(llvm::Value* value, llvm::Value* base_pointer, + llvm::Value* offset_elements) { + StoreVector(value, ComputeOffsetPointer(base_pointer, offset_elements)); + } + + void StoreVector(llvm::Value* value, llvm::Value* base_pointer, + int64 offset_elements) { + StoreVector(value, base_pointer, ir_builder()->getInt64(offset_elements)); + } + + void StoreScalar(llvm::Value* value, llvm::Value* pointer); + void StoreScalar(llvm::Value* value, llvm::Value* base_pointer, + llvm::Value* offset_elements) { + StoreScalar(value, ComputeOffsetPointer(base_pointer, offset_elements)); + } + + void StoreScalar(llvm::Value* value, llvm::Value* base_pointer, + int64 offset_elements) { + StoreScalar(base_pointer, ir_builder()->getInt64(offset_elements)); + } + + llvm::Value* LoadBroadcast(llvm::Value* pointer); + llvm::Value* LoadBroadcast(llvm::Value* base_pointer, + llvm::Value* offset_elements) { + return LoadBroadcast(ComputeOffsetPointer(base_pointer, offset_elements)); + } + llvm::Value* LoadBroadcast(llvm::Value* base_pointer, int64 offset_elements) { + return LoadBroadcast(base_pointer, ir_builder()->getInt64(offset_elements)); + } + + llvm::Value* AddReduce(llvm::Value* vector); + + llvm::Value* GetZeroVector(); + llvm::Value* GetZeroScalar(); + + llvm::IRBuilder<>* ir_builder() const { return ir_builder_; } + int64 vector_size() const { return vector_size_; } + llvm::Type* vector_type() const { return vector_type_; } + llvm::Type* vector_pointer_type() const { return vector_pointer_type_; } + llvm::Type* scalar_type() const { return scalar_type_; } + llvm::Type* scalar_pointer_type() const { return scalar_pointer_type_; } + + const std::string& name() const { return name_; } + + private: + int64 vector_size_; + PrimitiveType primitive_type_; + llvm::IRBuilder<>* ir_builder_; + llvm::Type* vector_type_; + llvm::Type* vector_pointer_type_; + llvm::Type* scalar_type_; + llvm::Type* scalar_pointer_type_; + std::string name_; +}; + +// This wraps an alloca-backed stack variable which LLVM's SSA construction pass +// can later convert to a SSA value. +class LlvmVariable { + public: + LlvmVariable(llvm::Type*, llvm::IRBuilder<>* ir_builder); + + llvm::Value* Get(); + void Set(llvm::Value* new_value); + + private: + llvm::AllocaInst* alloca_; + llvm::IRBuilder<>* ir_builder_; +}; + +class VectorVariable : public LlvmVariable { + public: + VectorVariable(VectorSupportLibrary* vector_support, + llvm::Value* initial_value) + : LlvmVariable(vector_support->vector_type(), + vector_support->ir_builder()) { + Set(initial_value); + } +}; + +class ScalarVariable : public LlvmVariable { + public: + ScalarVariable(VectorSupportLibrary* vector_support, + llvm::Value* initial_value) + : LlvmVariable(vector_support->scalar_type(), + vector_support->ir_builder()) { + Set(initial_value); + } +}; +} // namespace xla + +#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_VECTOR_SUPPORT_LIBRARY_H_ diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index cf089d748d..c4e422b506 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -277,6 +277,62 @@ XLA_TEST_F(DotOperationTest, MatrixDotF32_260_3_520_MinorToMajorFF) { TestMatrixDot(260, 3, 520, false, false); } +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x8x8) { + TestMatrixDot(1, 8, 8, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x130x8) { + TestMatrixDot(1, 130, 8, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x8x130) { + TestMatrixDot(1, 8, 130, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x290x130) { + TestMatrixDot(1, 290, 130, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_2x1x1) { + TestMatrixDot(2, 1, 1, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_8x8x1) { + TestMatrixDot(8, 8, 1, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_16x1x1) { + TestMatrixDot(16, 1, 1, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_16x3x1) { + TestMatrixDot(16, 3, 1, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_3x3x1) { + TestMatrixDot(3, 3, 1, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_29x29x1) { + TestMatrixDot(29, 29, 1, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x8x2) { + TestMatrixDot(1, 8, 2, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x2x8) { + TestMatrixDot(1, 2, 8, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_259x258x1) { + TestMatrixDot(259, 258, 1, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_259x258x1_FT) { + TestMatrixDot(259, 258, 1, false, true); +} + XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorFF) { constexpr bool kLhsRowMajor = false; constexpr bool kRhsRowMajor = false; @@ -361,6 +417,30 @@ XLA_TEST_F(DotOperationTest, NonsquareMatrixDotC64) { TestNonsquareMatrixDot(); } +XLA_TEST_F(DotOperationTest, MatrixVectorC64) { + auto lhs_handle = + client_ + ->TransferToServer(*test_utils::CreateR2LiteralWithLayout( + {{1.0, 2.0, 3.0, -4.0}}, {1, 0})) + .ConsumeValueOrDie(); + auto rhs_handle = + client_ + ->TransferToServer(*test_utils::CreateR2LiteralWithLayout( + {{1.0, 1.0}, {2.0, 2.0}, {3.0, 3.0}, {-4.0, 4.0}}, {1, 0})) + .ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto prim_type = primitive_util::NativeToPrimitiveType(); + auto result = builder.Dot( + builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {1, 4}), "lhs"), + builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {4, 2}), "rhs")); + + Array2D expected({{30.0, -2.0}}); + + ComputeAndCompareR2( + &builder, expected, {lhs_handle.get(), rhs_handle.get()}, error_spec_); +} + XLA_TEST_F(DotOperationTest, ConcurrentMatMul) { ComputationBuilder builder(client_, TestName()); auto matrix1 = builder.ConstantR2({{1.0, 2.0}, {3.0, 4.0}}); -- GitLab From 505cbf22813dbd17482170562eb91e09d652b835 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 11:35:53 -0800 Subject: [PATCH 0054/1801] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 175037663 --- tensorflow/go/op/wrappers.go | 56 ++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index bdfad48567..eb79da5384 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -62,6 +62,29 @@ func WriteScalarSummary(scope *Scope, writer tf.Output, global_step tf.Output, t return scope.AddOperation(opspec) } +// Outputs a `tf.Event` protocol buffer. +// +// When CreateSummaryDbWriter is being used, this op can be useful for +// importing data from event logs. +// +// Arguments: +// writer: A handle to a summary writer. +// event: A string containing a binary-encoded tf.Event proto. +// +// Returns the created operation. +func ImportEvent(scope *Scope, writer tf.Output, event tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ImportEvent", + Input: []tf.Input{ + writer, event, + }, + } + return scope.AddOperation(opspec) +} + // Outputs a `Summary` protocol buffer with a tensor. // // Arguments: @@ -22454,6 +22477,39 @@ func QuantizedBiasAdd(scope *Scope, input tf.Output, bias tf.Output, min_input t return op.Output(0), op.Output(1), op.Output(2) } +// Creates summary database writer accessible by given resource handle. +// +// This can be used to write tensors from the execution graph directly +// to a database. Only SQLite is supported right now. This function +// will create the schema if it doesn't exist. Entries in the Users, +// Experiments, and Runs tables will be created automatically if they +// don't already exist. +// +// Arguments: +// writer: Handle to SummaryWriter resource to overwrite. +// db_uri: For example "file:/tmp/foo.sqlite". +// experiment_name: Can't contain ASCII control characters or <>. Case +// sensitive. If empty, then the Run will not be associated with any +// Experiment. +// run_name: Can't contain ASCII control characters or <>. Case sensitive. +// If empty, then each Tag will not be associated with any Run. +// user_name: Must be valid as both a DNS label and Linux username. If +// empty, then the Experiment will not be associated with any User. +// +// Returns the created operation. +func CreateSummaryDbWriter(scope *Scope, writer tf.Output, db_uri tf.Output, experiment_name tf.Output, run_name tf.Output, user_name tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "CreateSummaryDbWriter", + Input: []tf.Input{ + writer, db_uri, experiment_name, run_name, user_name, + }, + } + return scope.AddOperation(opspec) +} + // HistogramFixedWidthAttr is an optional argument to HistogramFixedWidth. type HistogramFixedWidthAttr func(optionalAttr) -- GitLab From 2eb8575a8d7bf7efcceb8283ba420c020ef35457 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Wed, 8 Nov 2017 11:54:14 -0800 Subject: [PATCH 0055/1801] Having with_gcp_support and windows causes build error. Multiple statements in a select statement should not be able to be true at the same time (unless one rule is more 'specific' than another). PiperOrigin-RevId: 175040618 --- tensorflow/BUILD | 91 ++++++++++++++++--- .../core/platform/default/build_config.bzl | 21 +++-- 2 files changed, 91 insertions(+), 21 deletions(-) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index f2cdf37dbf..5a408db94e 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -110,7 +110,7 @@ config_setting( config_setting( name = "no_tensorflow_py_deps", - values = {"define": "no_tensorflow_py_deps=true"}, + define_values = {"no_tensorflow_py_deps": "true"}, visibility = ["//visibility:public"], ) @@ -166,55 +166,116 @@ config_setting( # TODO(jhseu): Enable on other platforms other than Linux. config_setting( name = "with_jemalloc_linux_x86_64", - values = { - "cpu": "k8", - "define": "with_jemalloc=true", - }, + define_values = {"with_jemalloc": "true"}, + values = {"cpu": "k8"}, visibility = ["//visibility:public"], ) config_setting( name = "with_jemalloc_linux_ppc64le", - values = { - "cpu": "ppc", - "define": "with_jemalloc=true", - }, + define_values = {"with_jemalloc": "true"}, + values = {"cpu": "ppc"}, visibility = ["//visibility:public"], ) config_setting( name = "with_gcp_support", - values = {"define": "with_gcp_support=true"}, + define_values = {"with_gcp_support": "true"}, visibility = ["//visibility:public"], ) config_setting( name = "with_hdfs_support", - values = {"define": "with_hdfs_support=true"}, + define_values = {"with_hdfs_support": "true"}, visibility = ["//visibility:public"], ) config_setting( name = "with_s3_support", - values = {"define": "with_s3_support=true"}, + define_values = {"with_s3_support": "true"}, + visibility = ["//visibility:public"], +) + +# Crosses between platforms and file system libraries not supported on those +# platforms due to limitations in nested select() statements. +config_setting( + name = "with_gcp_support_windows_override", + define_values = {"with_gcp_support": "true"}, + values = {"cpu": "x64_windows"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "with_hdfs_support_windows_override", + define_values = {"with_hdfs_support": "true"}, + values = {"cpu": "x64_windows"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "with_s3_support_windows_override", + define_values = {"with_s3_support": "true"}, + values = {"cpu": "x64_windows"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "with_gcp_support_android_override", + define_values = {"with_gcp_support": "true"}, + values = {"crosstool_top": "//external:android/crosstool"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "with_hdfs_support_android_override", + define_values = {"with_hdfs_support": "true"}, + values = {"crosstool_top": "//external:android/crosstool"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "with_s3_support_android_override", + define_values = {"with_s3_support": "true"}, + values = {"crosstool_top": "//external:android/crosstool"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "with_gcp_support_ios_override", + define_values = {"with_gcp_support": "true"}, + values = {"crosstool_top": "//tools/osx/crosstool:crosstool"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "with_hdfs_support_ios_override", + define_values = {"with_hdfs_support": "true"}, + values = {"crosstool_top": "//tools/osx/crosstool:crosstool"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "with_s3_support_ios_override", + define_values = {"with_s3_support": "true"}, + values = {"crosstool_top": "//tools/osx/crosstool:crosstool"}, visibility = ["//visibility:public"], ) config_setting( name = "with_xla_support", - values = {"define": "with_xla_support=true"}, + define_values = {"with_xla_support": "true"}, visibility = ["//visibility:public"], ) config_setting( name = "with_gdr_support", - values = {"define": "with_gdr_support=true"}, + define_values = {"with_gdr_support": "true"}, visibility = ["//visibility:public"], ) config_setting( name = "with_verbs_support", - values = {"define": "with_verbs_support=true"}, + define_values = {"with_verbs_support": "true"}, visibility = ["//visibility:public"], ) diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 6225c2c705..5eeb861bdd 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -458,16 +458,25 @@ def tf_additional_lib_deps(): def tf_additional_core_deps(): return select({ + "//tensorflow:with_gcp_support_windows_override": [], + "//tensorflow:with_gcp_support_android_override": [], + "//tensorflow:with_gcp_support_ios_override": [], "//tensorflow:with_gcp_support": [ "//tensorflow/core/platform/cloud:gcs_file_system", ], "//conditions:default": [], }) + select({ + "//tensorflow:with_hdfs_support_windows_override": [], + "//tensorflow:with_hdfs_support_android_override": [], + "//tensorflow:with_hdfs_support_ios_override": [], "//tensorflow:with_hdfs_support": [ "//tensorflow/core/platform/hadoop:hadoop_file_system", ], "//conditions:default": [], }) + select({ + "//tensorflow:with_s3_support_windows_override": [], + "//tensorflow:with_s3_support_android_override": [], + "//tensorflow:with_s3_support_ios_override": [], "//tensorflow:with_s3_support": [ "//tensorflow/core/platform/s3:s3_file_system", ], @@ -477,9 +486,9 @@ def tf_additional_core_deps(): # TODO(jart, jhseu): Delete when GCP is default on. def tf_additional_cloud_op_deps(): return select({ - "//tensorflow:windows": [], - "//tensorflow:android": [], - "//tensorflow:ios": [], + "//tensorflow:with_gcp_support_windows_override": [], + "//tensorflow:with_gcp_support_android_override": [], + "//tensorflow:with_gcp_support_ios_override": [], "//tensorflow:with_gcp_support": [ "//tensorflow/contrib/cloud:bigquery_reader_ops_op_lib", ], @@ -489,9 +498,9 @@ def tf_additional_cloud_op_deps(): # TODO(jart, jhseu): Delete when GCP is default on. def tf_additional_cloud_kernel_deps(): return select({ - "//tensorflow:windows": [], - "//tensorflow:android": [], - "//tensorflow:ios": [], + "//tensorflow:with_gcp_support_windows_override": [], + "//tensorflow:with_gcp_support_android_override": [], + "//tensorflow:with_gcp_support_ios_override": [], "//tensorflow:with_gcp_support": [ "//tensorflow/contrib/cloud/kernels:bigquery_reader_ops", ], -- GitLab From b5634b5e071e94d876d52ce7837dae3c5f37c9ba Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 12:03:44 -0800 Subject: [PATCH 0056/1801] Supports logits as a Tensor in MultiHead. PiperOrigin-RevId: 175042091 --- tensorflow/contrib/estimator/BUILD | 5 +- .../estimator/python/estimator/multi_head.py | 67 +++++-- .../python/estimator/multi_head_test.py | 188 +++++++++++++++++- 3 files changed, 244 insertions(+), 16 deletions(-) diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index 6eb2cfdaca..bc67ef8354 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -204,10 +204,13 @@ py_library( ], srcs_version = "PY2AND3", deps = [ + "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", "//tensorflow/python:framework_ops", "//tensorflow/python:math_ops", + "//tensorflow/python:summary", "//tensorflow/python/estimator:head", + "//tensorflow/python/estimator:metric_keys", "//tensorflow/python/estimator:model_fn", "//tensorflow/python/saved_model:signature_constants", "@six_archive//:six", @@ -229,7 +232,7 @@ py_test( "//tensorflow/python:string_ops", "//tensorflow/python/estimator:metric_keys", "//tensorflow/python/estimator:model_fn", - "//tensorflow/python/ops/losses", + "//tensorflow/python/estimator:prediction_keys", "//tensorflow/python/saved_model:signature_constants", "//third_party/py/numpy", "@six_archive//:six", diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head.py b/tensorflow/contrib/estimator/python/estimator/multi_head.py index 69dbfcee62..73bae5acf9 100644 --- a/tensorflow/contrib/estimator/python/estimator/multi_head.py +++ b/tensorflow/contrib/estimator/python/estimator/multi_head.py @@ -22,10 +22,13 @@ import six from tensorflow.python.estimator import model_fn from tensorflow.python.estimator.canned import head as head_lib +from tensorflow.python.estimator.canned import metric_keys from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.saved_model import signature_constants +from tensorflow.python.summary import summary _DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY @@ -72,6 +75,23 @@ def multi_head(heads, head_weights=None): estimator.train(input_fn=input_fn, steps=100) ``` + Also supports `logits` as a `Tensor` of shape + `[D0, D1, ... DN, logits_dimension]`. It will split the `Tensor` along the + last dimension and distribute it appropriately among the heads. E.g.: + + ```python + def model_fn(features, labels, mode): + # Create simple heads and specify head name. + head1 = multi_class_head(n_classes=3, name='head1') + head2 = binary_classification_head(name='head2') + # Create multi-head from two simple heads. + head = multi_head([head1, head2]) + # Create logits for the multihead. + logits = logit_fn(logits_dimension=head.logits_dimension) + # Return the merged EstimatorSpec + return head.create_estimator_spec(..., logits=logits, ...) + ``` + Args: heads: List or tuple of `_Head` instances. All heads must have `name` specified. The first head in the list is the default used at serving time. @@ -161,18 +181,17 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access def create_loss(self, features, mode, logits, labels): """See `Head`.""" - # TODO(roumposg): Add support for logits as single Tensor (with - # _split_logits utility). - if not isinstance(logits, dict): - raise ValueError('logits must be a dict. Single Tensor support coming ' - 'soon.') + if isinstance(logits, dict): + logits_dict = logits + else: + logits_dict = self._split_logits(logits) weighted_sum_losses = [] example_weight_sums = [] labels_by_head = {} for head in self._heads: (weighted_sum_loss, example_weight_sum, processed_labels) = head.create_loss( - features, mode, logits[head.name], labels[head.name]) + features, mode, logits_dict[head.name], labels[head.name]) weighted_sum_losses.append(weighted_sum_loss) example_weight_sums.append(example_weight_sum) labels_by_head[head.name] = processed_labels @@ -205,10 +224,10 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access def create_estimator_spec( self, features, mode, logits, labels=None, train_op_fn=None): """See `_Head`.""" - # TODO(roumposg): Add support for logits as single Tensor (with - # _split_logits utility). - if not isinstance(logits, dict): - raise ValueError('logits must be a dict. Given: {}'.format(logits)) + if isinstance(logits, dict): + logits_dict = logits + else: + logits_dict = self._split_logits(logits) if labels and not isinstance(labels, dict): raise ValueError('labels must be a dict. Given: {}'.format(labels)) @@ -219,22 +238,42 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access head.create_estimator_spec( features=features, mode=mode, - logits=logits[head_name], + logits=logits_dict[head_name], labels=labels[head_name] if labels else None, train_op_fn=_no_op_train_fn)) - # TODO(roumposg): Add LOSS and LOSS_MEAN summaries for the total head- - # combined loss. if mode == model_fn.ModeKeys.TRAIN: if train_op_fn is None: raise ValueError('train_op_fn can not be None in TRAIN mode.') - return self._merge_train(all_estimator_spec, train_op_fn) + spec = self._merge_train(all_estimator_spec, train_op_fn) + with ops.name_scope(''): + summary.scalar(metric_keys.MetricKeys.LOSS, spec.loss) + return spec if mode == model_fn.ModeKeys.PREDICT: return self._merge_predict(all_estimator_spec) if mode == model_fn.ModeKeys.EVAL: return self._merge_eval(all_estimator_spec) raise ValueError('mode={} unrecognized'.format(mode)) + def _split_logits(self, logits): + """Splits logits along the last dimension and returns a dict.""" + logits_dict = {} + with ops.name_scope(None, 'split_logits', values=[logits]): + logits = ops.convert_to_tensor(logits) + batch_shape = array_ops.shape(logits)[:-1] + zeros_like_batch_shape = array_ops.zeros_like(batch_shape) + minus_ones_like_batch_shape = -1 * array_ops.ones_like(batch_shape) + begin_idx = 0 + for head in self._heads: + begin_tensor = array_ops.concat( + [zeros_like_batch_shape, [begin_idx]], axis=0) + size_tensor = array_ops.concat( + [minus_ones_like_batch_shape, [head.logits_dimension]], axis=0) + logits_dict[head.name] = array_ops.slice( + logits, begin=begin_tensor, size=size_tensor) + begin_idx += head.logits_dimension + return logits_dict + def _merge_train(self, all_estimator_spec, train_op_fn): """Merges list of `EstimatorSpec` for training. diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py index 16177aebd5..8d51a298b2 100644 --- a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py @@ -106,7 +106,8 @@ class MultiHeadTest(test.TestCase): multi_head = multi_head_lib.multi_head([head1, head2]) self.assertEqual('head1_head2', multi_head.name) - def test_predict_two_heads(self): + def test_predict_two_heads_logits_dict(self): + """Tests predict with logits as dict.""" head1 = head_lib.multi_label_head(n_classes=2, name='head1') head2 = head_lib.multi_label_head(n_classes=3, name='head2') multi_head = multi_head_lib.multi_head([head1, head2]) @@ -158,6 +159,111 @@ class MultiHeadTest(test.TestCase): expected_probabilities['head2'], sess.run(spec.export_outputs['head2'].scores)) + def test_predict_two_heads_logits_tensor(self): + """Tests predict with logits as Tensor.""" + head1 = head_lib.multi_label_head(n_classes=2, name='head1') + head2 = head_lib.multi_label_head(n_classes=3, name='head2') + multi_head = multi_head_lib.multi_head([head1, head2]) + + logits = np.array( + [[-1., 1., 2., -2., 2.], [-1.5, 1., -3., 2., -2.]], dtype=np.float32) + expected_logits1 = np.array([[-1., 1.], [-1.5, 1.]], dtype=np.float32) + expected_logits2 = np.array([[2., -2., 2.], [-3., 2., -2.]], + dtype=np.float32) + expected_probabilities = { + 'head1': _sigmoid(expected_logits1), + 'head2': _sigmoid(expected_logits2), + } + + spec = multi_head.create_estimator_spec( + features={'x': np.array(((42,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.PREDICT, + logits=logits) + + self.assertItemsEqual( + (_DEFAULT_SERVING_KEY, 'head1', 'classification/head1', 'predict/head1', + 'head2', 'classification/head2', 'predict/head2'), + spec.export_outputs.keys()) + + # Assert predictions and export_outputs. + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + self.assertIsNone(spec.scaffold.summary_op) + predictions = sess.run(spec.predictions) + self.assertAllClose( + expected_logits1, + predictions[('head1', prediction_keys.PredictionKeys.LOGITS)]) + self.assertAllClose( + expected_logits2, + predictions[('head2', prediction_keys.PredictionKeys.LOGITS)]) + self.assertAllClose( + expected_probabilities['head1'], + predictions[('head1', prediction_keys.PredictionKeys.PROBABILITIES)]) + self.assertAllClose( + expected_probabilities['head2'], + predictions[('head2', prediction_keys.PredictionKeys.PROBABILITIES)]) + + self.assertAllClose( + expected_probabilities['head1'], + sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].scores)) + self.assertAllClose( + expected_probabilities['head1'], + sess.run(spec.export_outputs['head1'].scores)) + self.assertAllClose( + expected_probabilities['head2'], + sess.run(spec.export_outputs['head2'].scores)) + + def test_predict_two_heads_logits_tensor_multi_dim(self): + """Tests predict with multi-dimensional logits of shape [2, 2, 5].""" + head1 = head_lib.regression_head(label_dimension=2, name='head1') + head2 = head_lib.regression_head(label_dimension=3, name='head2') + multi_head = multi_head_lib.multi_head([head1, head2]) + + logits = np.array( + [[[-1., 1., 2., -2., 2.], [-1., 1., 2., -2., 2.]], + [[-1.5, 1., -3., 2., -2.], [-1.5, 1., -3., 2., -2.]]], + dtype=np.float32) + expected_logits1 = np.array( + [[[-1., 1.], [-1., 1.]], + [[-1.5, 1.], [-1.5, 1.]]], + dtype=np.float32) + expected_logits2 = np.array( + [[[2., -2., 2.], [2., -2., 2.]], + [[-3., 2., -2.], [-3., 2., -2.]]], + dtype=np.float32) + + spec = multi_head.create_estimator_spec( + features={'x': np.array(((42,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.PREDICT, + logits=logits) + + self.assertItemsEqual( + (_DEFAULT_SERVING_KEY, 'head1', 'regression/head1', 'predict/head1', + 'head2', 'regression/head2', 'predict/head2'), + spec.export_outputs.keys()) + + # Assert predictions and export_outputs. + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + self.assertIsNone(spec.scaffold.summary_op) + predictions = sess.run(spec.predictions) + self.assertAllClose( + expected_logits1, + predictions[('head1', prediction_keys.PredictionKeys.PREDICTIONS)]) + self.assertAllClose( + expected_logits2, + predictions[('head2', prediction_keys.PredictionKeys.PREDICTIONS)]) + + self.assertAllClose( + expected_logits1, + sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].value)) + self.assertAllClose( + expected_logits1, + sess.run(spec.export_outputs['head1'].value)) + self.assertAllClose( + expected_logits2, + sess.run(spec.export_outputs['head2'].value)) + def test_eval_two_heads_with_weights(self): head1 = head_lib.multi_label_head(n_classes=2, name='head1') head2 = head_lib.multi_label_head(n_classes=3, name='head2') @@ -284,6 +390,84 @@ class MultiHeadTest(test.TestCase): # example_weight_sum = 1 * (1 + 2) + 2 * (2 + 3) = 13 self.assertAllClose(13., example_weight_sum.eval(), rtol=tol, atol=tol) + def test_train_create_loss_logits_tensor(self): + """Tests create_loss with logits Tensor.""" + weights1 = np.array([[1.], [2.]], dtype=np.float32) + weights2 = np.array([[2.], [3.]]) + head1 = head_lib.multi_label_head(n_classes=2, name='head1', + weight_column='weights1') + head2 = head_lib.multi_label_head(n_classes=3, name='head2', + weight_column='weights2') + multi_head = multi_head_lib.multi_head( + [head1, head2], head_weights=[1., 2.]) + + logits = np.array([[-10., 10., 20., -20., 20.], + [-15., 10., -30., 20., -20.]], dtype=np.float32) + labels = { + 'head1': np.array([[1, 0], [1, 1]], dtype=np.int64), + 'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64), + } + weighted_sum_loss, example_weight_sum, _ = multi_head.create_loss( + features={ + 'x': np.array(((42,),), dtype=np.int32), + 'weights1': weights1, + 'weights2': weights2 + }, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels) + tol = 1e-3 + with self.test_session(): + # loss of the first head is [[(10 + 10) / 2], [(15 + 0) / 2]] + # = [10, 7.5] + # weighted_sum_loss = 1 * 10 + 2 * 7.5 = 25 + # loss of the second head is [[(20 + 20 + 20) / 3], [(30 + 0 + 0) / 3]] + # = [20, 10] + # weighted_sum_loss = 2 * 20 + 3 * 10 = 70 + # head-weighted merge = 1 * 25 + 2 * 70 = 165 + self.assertAllClose(165, weighted_sum_loss.eval(), rtol=tol, atol=tol) + # example_weight_sum = 1 * (1 + 2) + 2 * (2 + 3) = 13 + self.assertAllClose(13., example_weight_sum.eval(), rtol=tol, atol=tol) + + def test_train_create_loss_logits_tensor_multi_dim(self): + """Tests create_loss with multi-dimensional logits of shape [2, 2, 5].""" + head1 = head_lib.regression_head(label_dimension=2, name='head1') + head2 = head_lib.regression_head(label_dimension=3, name='head2') + multi_head = multi_head_lib.multi_head([head1, head2]) + + logits = np.array( + [[[-1., 1., 2., -2., 2.], [-1., 1., 2., -2., 2.]], + [[-1.5, 1.5, -2., 2., -2.], [-1.5, 1.5, -2., 2., -2.]]], + dtype=np.float32) + labels = { + 'head1': np.array([[[1., 0.], [1., 0.]], + [[1.5, 1.5], [1.5, 1.5]]], dtype=np.float32), + 'head2': np.array([[[0., 1., 0.], [0., 1., 0.]], + [[2., 2., 0.], [2., 2., 0.]]], dtype=np.float32), + } + # Loss for the first head: + # loss1 = (1+1)^2 + (0-1)^2 + (1+1)^2 + (0-1)^2 + + # (1.5+1.5)^2 + (1.5-1.5)^2 + (1.5+1.5)^2 + (1.5-1.5)^2 + # = 28 + # Loss for the second head: + # loss2 = (0-2)^2 + (1+2)^2 + (0-2)^2 + (0-2)^2 + (1+2)^2 + (0-2)^2 + + # (2+2)^2 + (2-2)^2 + (0+2)^2 + (2+2)^2 + (2-2)^2 + (0+2)^2 + # = 74 + expected_weighted_sum_loss = 28. + 74. + + weighted_sum_loss, example_weight_sum, _ = multi_head.create_loss( + features={}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels) + tol = 1e-3 + with self.test_session(): + self.assertAllClose( + expected_weighted_sum_loss, weighted_sum_loss.eval(), + rtol=tol, atol=tol) + self.assertAllClose( + 2. * 2. * 5., example_weight_sum.eval(), rtol=tol, atol=tol) + def test_train_one_head(self): head1 = head_lib.multi_label_head(n_classes=2, name='head1') multi_head = multi_head_lib.multi_head([head1]) @@ -327,6 +511,7 @@ class MultiHeadTest(test.TestCase): six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), train_result) _assert_simple_summaries(self, { + metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS + '/head1': expected_loss, # Average loss over examples. metric_keys.MetricKeys.LOSS_MEAN + '/head1': expected_loss / 2, @@ -387,6 +572,7 @@ class MultiHeadTest(test.TestCase): six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), train_result) _assert_simple_summaries(self, { + metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS + '/head1': expected_loss_head1, metric_keys.MetricKeys.LOSS + '/head2': expected_loss_head2, # Average loss over examples. -- GitLab From aa3d321213acdbe3a2403c9081a14762b8e9bb36 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 12:21:40 -0800 Subject: [PATCH 0057/1801] Add padded_batch_and_drop_remainder and factor out shared filter_irregular_batches. PiperOrigin-RevId: 175045241 --- .../kernel_tests/batch_dataset_op_test.py | 225 +++++++++++------- .../contrib/data/python/ops/batching.py | 87 +++++-- 2 files changed, 207 insertions(+), 105 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py index 670f622c3c..951d4bb5f7 100644 --- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py @@ -52,8 +52,9 @@ class BatchDatasetTest(test.TestCase): def _map_fn(x, y, z): return math_ops.square(x), math_ops.square(y), math_ops.square(z) - iterator = (dataset_ops.Dataset.from_tensor_slices(components).map(_map_fn) - .repeat(count).batch(batch_size).make_initializable_iterator()) + iterator = ( + dataset_ops.Dataset.from_tensor_slices(components).map(_map_fn) + .repeat(count).batch(batch_size).make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() @@ -69,7 +70,7 @@ class BatchDatasetTest(test.TestCase): result = sess.run(get_next) for component, result_component in zip(components, result): for j in range(14): - self.assertAllEqual(component[(i*14 + j) % 7]**2, + self.assertAllEqual(component[(i * 14 + j) % 7]**2, result_component[j]) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) @@ -84,12 +85,12 @@ class BatchDatasetTest(test.TestCase): result = sess.run(get_next) for component, result_component in zip(components, result): for j in range(8): - self.assertAllEqual(component[(i*8 + j) % 7]**2, + self.assertAllEqual(component[(i * 8 + j) % 7]**2, result_component[j]) result = sess.run(get_next) for component, result_component in zip(components, result): for j in range((14 * 7) % 8): - self.assertAllEqual(component[((num_batches - 1)*8 + j) % 7]**2, + self.assertAllEqual(component[((num_batches - 1) * 8 + j) % 7]**2, result_component[j]) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) @@ -107,10 +108,10 @@ class BatchDatasetTest(test.TestCase): seq_lens = array_ops.placeholder(dtypes.int32, shape=[None]) padded_shape = array_ops.placeholder(dtypes.int64, shape=[1]) - iterator = (dataset_ops.Dataset.from_tensor_slices(seq_lens) - .map(lambda x: array_ops.fill([x], x)).padded_batch( - 4, - padded_shapes=padded_shape).make_initializable_iterator()) + iterator = ( + dataset_ops.Dataset.from_tensor_slices(seq_lens) + .map(lambda x: array_ops.fill([x], x)).padded_batch( + 4, padded_shapes=padded_shape).make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() @@ -118,35 +119,40 @@ class BatchDatasetTest(test.TestCase): with self.test_session() as sess: # Test with random sequence lengths, and max padding. random_seq_lens = np.random.randint(20, size=(32,)).astype(np.int32) - sess.run(init_op, feed_dict={padded_shape: [-1], - seq_lens: random_seq_lens}) + sess.run( + init_op, feed_dict={ + padded_shape: [-1], + seq_lens: random_seq_lens + }) for i in range(8): result = sess.run(get_next) padded_len = np.max(result) self.assertEqual((4, padded_len), result.shape) for j in range(4): - seq_len = random_seq_lens[(i*4)+j] + seq_len = random_seq_lens[(i * 4) + j] self.assertAllEqual(result[j, :seq_len], [seq_len] * seq_len) self.assertAllEqual(result[j, seq_len:], [0] * (padded_len - seq_len)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) # Test with random sequence lengths, and constant padding. - sess.run(init_op, feed_dict={padded_shape: [25], - seq_lens: random_seq_lens}) + sess.run( + init_op, feed_dict={ + padded_shape: [25], + seq_lens: random_seq_lens + }) for i in range(8): result = sess.run(get_next) self.assertEqual((4, 25), result.shape) for j in range(4): - seq_len = random_seq_lens[(i*4)+j] + seq_len = random_seq_lens[(i * 4) + j] self.assertAllEqual(result[j, :seq_len], [seq_len] * seq_len) self.assertAllEqual(result[j, seq_len:], [0] * (25 - seq_len)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) # Test correct handling of empty tensors. - sess.run(init_op, feed_dict={padded_shape: [-1], - seq_lens: [0, 0, 0, 0]}) + sess.run(init_op, feed_dict={padded_shape: [-1], seq_lens: [0, 0, 0, 0]}) result = sess.run(get_next) self.assertAllEqual([[], [], [], []], result) with self.assertRaises(errors.OutOfRangeError): @@ -154,8 +160,7 @@ class BatchDatasetTest(test.TestCase): # Test error handling with constant sequence lengths, and # too-short padding. - sess.run(init_op, feed_dict={padded_shape: [5], - seq_lens: [6, 5, 5, 5]}) + sess.run(init_op, feed_dict={padded_shape: [5], seq_lens: [6, 5, 5, 5]}) with self.assertRaises(errors.DataLossError): result = sess.run(get_next) @@ -166,11 +171,13 @@ class BatchDatasetTest(test.TestCase): def fill_tuple(x): filled = array_ops.fill([x], x) return (filled, string_ops.as_string(filled)) - iterator = (dataset_ops.Dataset.from_tensor_slices(seq_lens).map(fill_tuple) - .padded_batch( - 4, - padded_shapes=(padded_shape, padded_shape), - padding_values=(-1, "")).make_initializable_iterator()) + + iterator = ( + dataset_ops.Dataset.from_tensor_slices(seq_lens).map(fill_tuple) + .padded_batch( + 4, + padded_shapes=(padded_shape, padded_shape), + padding_values=(-1, "")).make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() @@ -178,15 +185,18 @@ class BatchDatasetTest(test.TestCase): with self.test_session() as sess: # Test with random sequence lengths, and max padding. random_seq_lens = np.random.randint(20, size=(32,)).astype(np.int32) - sess.run(init_op, feed_dict={padded_shape: [-1], - seq_lens: random_seq_lens}) + sess.run( + init_op, feed_dict={ + padded_shape: [-1], + seq_lens: random_seq_lens + }) for i in range(8): result = sess.run(get_next) padded_len = np.max(result[0]) self.assertEqual((4, padded_len), result[0].shape) self.assertEqual((4, padded_len), result[1].shape) for j in range(4): - seq_len = random_seq_lens[(i*4)+j] + seq_len = random_seq_lens[(i * 4) + j] self.assertAllEqual(result[0][j, :seq_len], [seq_len] * seq_len) self.assertAllEqual(result[0][j, seq_len:], [-1] * (padded_len - seq_len)) @@ -220,20 +230,21 @@ class BatchDatasetTest(test.TestCase): constant_op.constant([-1, -1], dtype=dtypes.int64), constant_op.constant([37], dtype=dtypes.int64))) - for dataset in [dynamic_padding_from_tensor_shapes, - dynamic_padding_from_lists, - dynamic_padding_from_lists_with_minus_one, - dynamic_padding_from_tensors]: + for dataset in [ + dynamic_padding_from_tensor_shapes, dynamic_padding_from_lists, + dynamic_padding_from_lists_with_minus_one, dynamic_padding_from_tensors + ]: self.assertEqual([None, None], dataset.output_shapes[0].as_list()) self.assertEqual([None, None, None], dataset.output_shapes[1].as_list()) self.assertEqual([None, 37], dataset.output_shapes[2].as_list()) def testDenseToSparseBatchDataset(self): components = np.random.randint(12, size=(100,)).astype(np.int32) - iterator = (dataset_ops.Dataset.from_tensor_slices(components) - .map(lambda x: array_ops.fill([x], x)).apply( - batching.dense_to_sparse_batch(4, [12])) - .make_initializable_iterator()) + iterator = ( + dataset_ops.Dataset.from_tensor_slices(components) + .map(lambda x: array_ops.fill([x], x)).apply( + batching.dense_to_sparse_batch(4, + [12])).make_initializable_iterator()) init_op = iterator.initializer get_next = sparse_tensor.SparseTensor(*iterator.get_next()) @@ -242,24 +253,26 @@ class BatchDatasetTest(test.TestCase): for start in range(0, len(components), 4): results = sess.run(get_next) + self.assertAllEqual([[i, j] + for i, c in enumerate(components[start:start + 4]) + for j in range(c)], results.indices) self.assertAllEqual( - [[i, j] for i, c in enumerate(components[start:start+4]) - for j in range(c)], results.indices) - self.assertAllEqual( - [c for c in components[start:start+4] for _ in range(c)], + [c for c in components[start:start + 4] for _ in range(c)], results.values) - self.assertAllEqual( - [min(4, len(components) - start), 12], results.dense_shape) + self.assertAllEqual([min(4, + len(components) - start), 12], + results.dense_shape) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) def testDenseToSparseBatchDatasetWithUnknownShape(self): components = np.random.randint(5, size=(40,)).astype(np.int32) - iterator = (dataset_ops.Dataset.from_tensor_slices(components) - .map(lambda x: array_ops.fill([x, x], x)).apply( - batching.dense_to_sparse_batch( - 4, [5, -1])).make_initializable_iterator()) + iterator = ( + dataset_ops.Dataset.from_tensor_slices(components) + .map(lambda x: array_ops.fill([x, x], x)).apply( + batching.dense_to_sparse_batch( + 4, [5, -1])).make_initializable_iterator()) init_op = iterator.initializer get_next = sparse_tensor.SparseTensor(*iterator.get_next()) @@ -268,27 +281,30 @@ class BatchDatasetTest(test.TestCase): for start in range(0, len(components), 4): results = sess.run(get_next) - self.assertAllEqual( - [[i, j, z] for i, c in enumerate(components[start:start+4]) - for j in range(c) for z in range(c)], results.indices) - self.assertAllEqual( - [c for c in components[start:start+4] - for _ in range(c) for _ in range(c)], - results.values) - self.assertAllEqual( - [min(4, len(components) - start), - 5, - np.max(components[start:start+4])], - results.dense_shape) + self.assertAllEqual([[i, j, z] + for i, c in enumerate(components[start:start + 4]) + for j in range(c) + for z in range(c)], results.indices) + self.assertAllEqual([ + c + for c in components[start:start + 4] for _ in range(c) + for _ in range(c) + ], results.values) + self.assertAllEqual([ + min(4, + len(components) - start), 5, + np.max(components[start:start + 4]) + ], results.dense_shape) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) def testDenseToSparseBatchDatasetWithInvalidShape(self): input_tensor = array_ops.constant([[1]]) - iterator = (dataset_ops.Dataset.from_tensors(input_tensor) - .apply(batching.dense_to_sparse_batch(4, [-2])) - .make_initializable_iterator()) + iterator = ( + dataset_ops.Dataset.from_tensors(input_tensor).apply( + batching.dense_to_sparse_batch(4, [-2])) + .make_initializable_iterator()) init_op = iterator.initializer with self.test_session() as sess: @@ -298,8 +314,10 @@ class BatchDatasetTest(test.TestCase): def testDenseToSparseBatchDatasetShapeErrors(self): input_tensor = array_ops.placeholder(dtypes.int32) - iterator = (dataset_ops.Dataset.from_tensors(input_tensor).apply( - batching.dense_to_sparse_batch(4, [12])).make_initializable_iterator()) + iterator = ( + dataset_ops.Dataset.from_tensors(input_tensor).apply( + batching.dense_to_sparse_batch(4, + [12])).make_initializable_iterator()) init_op = iterator.initializer get_next = sparse_tensor.SparseTensor(*iterator.get_next()) @@ -356,8 +374,7 @@ class BatchDatasetTest(test.TestCase): def testUnbatchMultiElementTupleDataset(self): data = tuple([(math_ops.range(10 * i, 10 * i + 10), - array_ops.fill([10], "hi")) - for i in range(3)]) + array_ops.fill([10], "hi")) for i in range(3)]) data = dataset_ops.Dataset.from_tensor_slices(data) expected_types = ((dtypes.int32, dtypes.string),) * 3 data = data.batch(2) @@ -370,9 +387,7 @@ class BatchDatasetTest(test.TestCase): with self.test_session() as sess: for i in range(10): - self.assertEqual(((i, b"hi"), - (10 + i, b"hi"), - (20 + i, b"hi")), + self.assertEqual(((i, b"hi"), (10 + i, b"hi"), (20 + i, b"hi")), sess.run(op)) with self.assertRaises(errors.OutOfRangeError): @@ -385,9 +400,10 @@ class BatchDatasetTest(test.TestCase): batch_size = array_ops.placeholder(dtypes.int64, shape=[]) - iterator = (dataset_ops.Dataset.from_tensor_slices(components).apply( - batching.batch_and_drop_remainder(batch_size)) - .make_initializable_iterator()) + iterator = ( + dataset_ops.Dataset.from_tensor_slices(components).apply( + batching.batch_and_drop_remainder(batch_size)) + .make_initializable_iterator()) next_element = iterator.get_next() @@ -404,14 +420,51 @@ class BatchDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) + def testPaddedBatchAndDropRemainder(self): + els = [] + for length in [3, 6, 9, 4, 12, 10, 2]: + els.append((np.array(length), np.arange(length) + 1, + np.array(length * 2))) + + dataset = dataset_ops.Dataset.from_tensors(els[0]) + for el in els[1:]: + dataset = dataset.concatenate(dataset_ops.Dataset.from_tensors(el)) + + batch_size = array_ops.placeholder(dtypes.int64, shape=[]) + iterator = ( + dataset.apply( + batching.padded_batch_and_drop_remainder( + batch_size, ([], [None], []))).make_initializable_iterator()) + + next_element = iterator.get_next() + + with self.test_session() as sess: + for test_batch_size in [1, 3, 7, 10]: + sess.run(iterator.initializer, feed_dict={batch_size: test_batch_size}) + num_batches = 7 // test_batch_size + for i in range(num_batches): + result = sess.run(next_element) + for component_idx, result_component in enumerate(result): + for j in range(test_batch_size): + data_idx = i * test_batch_size + j + comp = result_component[j] + unpadded = comp[comp > 0] + if np.isscalar(comp): + # The boolean mask indexing above adds a dim back. Rm it. + unpadded = unpadded[0] + self.assertAllEqual(els[data_idx][component_idx], unpadded) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + def testBatchAndDropRemainderShapeInference(self): - components = (array_ops.placeholder(dtypes.int32), (array_ops.placeholder( - dtypes.int32, shape=[None]), array_ops.placeholder( - dtypes.int32, shape=[20, 30]))) + components = (array_ops.placeholder(dtypes.int32), + (array_ops.placeholder(dtypes.int32, shape=[None]), + array_ops.placeholder(dtypes.int32, shape=[20, 30]))) # Test with a statically known batch size. - dataset = (dataset_ops.Dataset.from_tensor_slices(components).apply( - batching.batch_and_drop_remainder(128))) + dataset = ( + dataset_ops.Dataset.from_tensor_slices(components).apply( + batching.batch_and_drop_remainder(128))) self.assertIs(None, dataset.output_shapes[0].ndims) self.assertEqual([128], dataset.output_shapes[1][0].as_list()) @@ -420,8 +473,9 @@ class BatchDatasetTest(test.TestCase): # Test with a dynamic batch size: the static shape will be unknown, because # `batch_size` is a placeholder. batch_size = array_ops.placeholder(dtypes.int64) - dataset = (dataset_ops.Dataset.from_tensor_slices(components).apply( - batching.batch_and_drop_remainder(batch_size))) + dataset = ( + dataset_ops.Dataset.from_tensor_slices(components).apply( + batching.batch_and_drop_remainder(batch_size))) self.assertIs(None, dataset.output_shapes[0].ndims) self.assertEqual([None], dataset.output_shapes[1][0].as_list()) @@ -441,9 +495,10 @@ class BatchDatasetTest(test.TestCase): def _map_fn(x, y, z): return math_ops.square(x), math_ops.square(y), math_ops.square(z) - iterator = (dataset_ops.Dataset.from_tensor_slices(components).repeat(count) - .apply(batching.map_and_batch(_map_fn, batch_size)) - .make_initializable_iterator()) + iterator = ( + dataset_ops.Dataset.from_tensor_slices(components).repeat(count).apply( + batching.map_and_batch(_map_fn, batch_size)) + .make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() @@ -459,7 +514,7 @@ class BatchDatasetTest(test.TestCase): result = sess.run(get_next) for component, result_component in zip(components, result): for j in range(14): - self.assertAllEqual(component[(i*14 + j) % 7]**2, + self.assertAllEqual(component[(i * 14 + j) % 7]**2, result_component[j]) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) @@ -474,7 +529,7 @@ class BatchDatasetTest(test.TestCase): result = sess.run(get_next) for component, result_component in zip(components, result): for j in range(8): - self.assertAllEqual(component[(i*8 + j) % 7]**2, + self.assertAllEqual(component[(i * 8 + j) % 7]**2, result_component[j]) # The last batch should fail with `OutOfRange`. with self.assertRaises(errors.OutOfRangeError): @@ -495,8 +550,9 @@ class BatchDatasetTest(test.TestCase): array_ops.check_numerics( constant_op.constant(1.0) / constant_op.constant(0.0), "oops")) batch_size = array_ops.placeholder(dtypes.int64, shape=[]) - iterator = (dataset.apply(batching.map_and_batch(lambda x: x, batch_size)) - .make_initializable_iterator()) + iterator = ( + dataset.apply(batching.map_and_batch(lambda x: x, batch_size)) + .make_initializable_iterator()) init_op = iterator.initializer with self.test_session() as sess: with self.assertRaisesRegexp(errors.InvalidArgumentError, "oops"): @@ -504,6 +560,7 @@ class BatchDatasetTest(test.TestCase): def testBatchAndMapDatasetShapeMismatch(self): """Test a dataset that maps a TF function across its input elements.""" + def generator(): yield [1] yield [2] diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py index abc9212a87..d4ade7adfd 100644 --- a/tensorflow/contrib/data/python/ops/batching.py +++ b/tensorflow/contrib/data/python/ops/batching.py @@ -103,6 +103,42 @@ def unbatch(): return _apply_fn +def filter_irregular_batches(batch_size): + """Transformation that filters out batches that are not of size batch_size.""" + + def _apply_fn(dataset): + """Function from `Dataset` to `Dataset` that applies the transformation.""" + tensor_batch_size = ops.convert_to_tensor( + batch_size, dtype=dtypes.int64, name="batch_size") + + flattened = _RestructuredDataset(dataset, + tuple(nest.flatten(dataset.output_types))) + + def _predicate(*xs): + """Return `True` if this element is a full batch.""" + # Extract the dynamic batch size from the first component of the flattened + # batched element. + first_component = xs[0] + first_component_batch_size = array_ops.shape( + first_component, out_type=dtypes.int64)[0] + + return math_ops.equal(first_component_batch_size, tensor_batch_size) + + filtered = flattened.filter(_predicate) + + maybe_constant_batch_size = tensor_util.constant_value(tensor_batch_size) + + def _set_first_dimension(shape): + return shape.merge_with( + tensor_shape.vector(maybe_constant_batch_size).concatenate(shape[1:])) + + known_shapes = nest.map_structure(_set_first_dimension, + dataset.output_shapes) + return _RestructuredDataset(filtered, dataset.output_types, known_shapes) + + return _apply_fn + + def batch_and_drop_remainder(batch_size): """A batching transformation that omits the final small batch (if present). @@ -135,34 +171,43 @@ def batch_and_drop_remainder(batch_size): def _apply_fn(dataset): """Function from `Dataset` to `Dataset` that applies the transformation.""" - tensor_batch_size = ops.convert_to_tensor( - batch_size, dtype=dtypes.int64, name="batch_size") + batched = dataset.batch(batch_size) + return filter_irregular_batches(batch_size)(batched) - batched = dataset.batch(tensor_batch_size) - flattened = _RestructuredDataset(batched, - tuple(nest.flatten(batched.output_types))) + return _apply_fn - def _predicate(*xs): - """Return `True` if this element is a full batch.""" - # Extract the dynamic batch size from the first component of the flattened - # batched element. - first_component = xs[0] - first_component_batch_size = array_ops.shape( - first_component, out_type=dtypes.int64)[0] - return math_ops.equal(first_component_batch_size, tensor_batch_size) +def padded_batch_and_drop_remainder(batch_size, + padded_shapes, + padding_values=None): + """A batching and padding transformation that omits the final small batch. - filtered = flattened.filter(_predicate) + Like @{tf.data.Dataset.padded_batch}, this transformation combines + consecutive elements of this dataset into batches. However, if the batch + size does not evenly divide the input dataset size, this transformation will + drop the final smaller element. - maybe_constant_batch_size = tensor_util.constant_value(tensor_batch_size) + See `@{tf.contrib.data.batch_and_drop_remainder}` for more details. - def _set_first_dimension(shape): - return shape.merge_with( - tensor_shape.vector(maybe_constant_batch_size).concatenate(shape[1:])) + Args: + batch_size: A `tf.int64` scalar `tf.Tensor`, representing the number of + consecutive elements of this dataset to combine in a single batch. + padded_shapes: A nested structure of `tf.TensorShape` or + `tf.int64` vector tensor-like objects. See + @{tf.data.Dataset.padded_batch} for details. + padding_values: (Optional.) A nested structure of scalar-shaped + `tf.Tensor`. See @{tf.data.Dataset.padded_batch} for details. - known_shapes = nest.map_structure(_set_first_dimension, - batched.output_shapes) - return _RestructuredDataset(filtered, batched.output_types, known_shapes) + Returns: + A `Dataset` transformation function, which can be passed to + @{tf.data.Dataset.apply} + """ + + def _apply_fn(dataset): + """Function from `Dataset` to `Dataset` that applies the transformation.""" + batched = dataset.padded_batch( + batch_size, padded_shapes=padded_shapes, padding_values=padding_values) + return filter_irregular_batches(batch_size)(batched) return _apply_fn -- GitLab From 3a17aa5ea18e43e6974bbf6d5cef6d02edfada5c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 12:56:45 -0800 Subject: [PATCH 0058/1801] Support replacing tpu_config. PiperOrigin-RevId: 175049981 --- tensorflow/contrib/tpu/python/tpu/tpu_config.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config.py b/tensorflow/contrib/tpu/python/tpu/tpu_config.py index 3965c087a1..097acd5ee7 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_config.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_config.py @@ -109,3 +109,12 @@ class RunConfig(run_config_lib.RunConfig): @property def tpu_config(self): return self._tpu_config + + def replace(self, **kwargs): + if 'tpu_config' not in kwargs: + return super(RunConfig, self).replace(**kwargs) + + tpu_config = kwargs.pop('tpu_config') + new_instance = super(RunConfig, self).replace(**kwargs) + new_instance._tpu_config = tpu_config # pylint: disable=protected-access + return new_instance -- GitLab From 8507c4a122c83fdad7b1855d5d43d51b6bd8009d Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Wed, 8 Nov 2017 13:00:12 -0800 Subject: [PATCH 0059/1801] Allow Operation._get_attr() to work with all attr types with C API enabled This is achieved by accessing the AttrValue directly and using the existing Python code instead of dispatching to the specific C API attr getter for every type. I started going down the dispatch path, but it turns out to be a lot of code (spread across Python, C, and SWIG), and this is likely good enough from a performance standpoint. We can optimize in the future if necessary. In addition, changes the colocation group logic to use _set_attr() and get_attr(), and makes _set_attr() work with the C API disabled. This allows the colocation tests to pass with both the C API enabled and disabled. Without these additional changes, the "_class" attribute would be set on the C NodeDef, and then it would try to retrieve it from the Python NodeDef. PiperOrigin-RevId: 175050473 --- tensorflow/c/c_api.cc | 4 +- tensorflow/c/c_api_test.cc | 4 +- tensorflow/python/client/tf_session.i | 10 --- tensorflow/python/framework/ops.py | 69 +++++++++++--------- tensorflow/python/framework/ops_test.py | 87 +++++++++++++------------ tensorflow/python/framework/test_ops.cc | 23 +++++++ 6 files changed, 109 insertions(+), 88 deletions(-) diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index 6dd1b99910..dd638de3c6 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -890,8 +890,8 @@ const tensorflow::AttrValue* GetAttrValue(TF_Operation* oper, TF_Status* status) { const tensorflow::AttrValue* attr = oper->node.attrs().Find(attr_name); if (attr == nullptr) { - status->status = - InvalidArgument("Operation has no attr named '", attr_name, "'."); + status->status = InvalidArgument("Operation '", oper->node.name(), + "' has no attr named '", attr_name, "'."); } return attr; } diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc index 05881e619b..e0057eb51c 100644 --- a/tensorflow/c/c_api_test.cc +++ b/tensorflow/c/c_api_test.cc @@ -383,7 +383,7 @@ TEST(CAPI, Graph) { EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s)); ASSERT_FALSE(GetAttrValue(feed, "missing", &attr_value, s)); - EXPECT_EQ(string("Operation has no attr named 'missing'."), + EXPECT_EQ(string("Operation 'feed' has no attr named 'missing'."), string(TF_Message(s))); // Make a constant oper with the scalar "3". @@ -1054,7 +1054,7 @@ class CApiColocationTest : public ::testing::Test { TF_OperationGetAttrMetadata(op, tensorflow::kColocationAttrName, s_); if (expected.empty()) { ASSERT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)) << TF_Message(s_); - EXPECT_EQ(std::string("Operation has no attr named '_class'."), + EXPECT_EQ(std::string("Operation 'add' has no attr named '_class'."), std::string(TF_Message(s_))); return; } diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i index f45bc13602..40731aba7d 100644 --- a/tensorflow/python/client/tf_session.i +++ b/tensorflow/python/client/tf_session.i @@ -344,16 +344,6 @@ bool PyTensorListToVector(PyObject* py_tensor_list, %rename("_TF_SetConfig") TF_SetConfig; %rename("_TF_NewSessionOptions") TF_NewSessionOptions; -// Create temporary int64_t to pass to TF_OperationGetAttrInt -%typemap(in, numinputs=0) int64_t* value (int64_t val) { - $1 = &val; -} - -// Convert value to Python int -%typemap(argout) int64_t* value { - $result = PyInt_FromLong(*$1); -} - %include "tensorflow/c/c_api.h" %include "tensorflow/c/python_api.h" diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index b256af2182..ad2e2993c1 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -1641,13 +1641,15 @@ class Operation(object): default_colocation_group = [ compat.as_bytes("loc:@%s" % self._node_def.name) ] - if "_class" not in self._node_def.attr: + try: + class_attr = self.get_attr("_class") + except ValueError: # This op has no explicit colocation group, so it is itself its # own root of a colocation group. return default_colocation_group attr_groups = [ - class_name for class_name in self.get_attr("_class") + class_name for class_name in class_attr if class_name.startswith(b"loc:@") ] @@ -2062,16 +2064,19 @@ class Operation(object): def _set_attr(self, attr_name, attr_value): """Private method used to set an attribute in the node_def.""" - if not _USE_C_API: - assert "_set_attr not supported with _USE_C_API == False" - return - buf = c_api.TF_NewBufferFromString( - compat.as_bytes(attr_value.SerializeToString())) - try: - with errors.raise_exception_on_not_ok_status() as status: - c_api.SetAttr(self._graph._c_graph, self._c_op, attr_name, buf, status) # pylint: disable=protected-access - finally: - c_api.TF_DeleteBuffer(buf) + if _USE_C_API: + buf = c_api.TF_NewBufferFromString( + compat.as_bytes(attr_value.SerializeToString())) + try: + with errors.raise_exception_on_not_ok_status() as status: + # pylint: disable=protected-access + c_api.SetAttr(self._graph._c_graph, self._c_op, attr_name, buf, + status) + # pylint: enable=protected-access + finally: + c_api.TF_DeleteBuffer(buf) + else: + self._node_def.attr[attr_name].CopyFrom(attr_value) def get_attr(self, name): """Returns the value of the attr of this op with the given `name`. @@ -2085,25 +2090,24 @@ class Operation(object): Raises: ValueError: If this op does not have an attr with the given `name`. """ - if _USE_C_API: + fields = ["s", "i", "f", "b", "type", "shape", "tensor", "func"] + if self._c_op: try: - # TODO(b/65162920): remove this try/except block when all attrs are - # implemented to use the _set_attr method instead of node_def.attr. - with errors.raise_exception_on_not_ok_status() as status: - metadata = c_api.TF_OperationGetAttrMetadata(self._c_op, name, status) - with errors.raise_exception_on_not_ok_status() as status: - if metadata.type == c_api.TF_ATTR_INT and metadata.is_list == 0: - return c_api.TF_OperationGetAttrInt(self._c_op, name, status) - except errors.InvalidArgumentError: - # Colocation ops are failing to find attrs begininning with "_*". They - # should fall through to the not-CAPI logic until the attribute is set - # via the C-API always. - pass + with c_api_util.tf_buffer() as buf: + with errors.raise_exception_on_not_ok_status() as status: + c_api.TF_OperationGetAttrValueProto(self._c_op, name, buf, status) + data = c_api.TF_GetBuffer(buf) + except errors.InvalidArgumentError as e: + # Convert to ValueError for backwards compatibility. + raise ValueError(str(e)) + x = attr_value_pb2.AttrValue() + x.ParseFromString(data) + else: + if name not in self._node_def.attr: + raise ValueError( + "No attr named '" + name + "' in " + str(self._node_def)) + x = self._node_def.attr[name] - fields = ["s", "i", "f", "b", "type", "shape", "tensor", "func"] - if name not in self._node_def.attr: - raise ValueError("No attr named '" + name + "' in " + str(self._node_def)) - x = self._node_def.attr[name] # Treat an empty oneof value as an empty list. if not x.WhichOneof("value"): return [] @@ -3103,9 +3107,10 @@ class Graph(object): ret._set_device(colocation_op.device) # pylint: disable=protected-access all_colocation_groups = sorted(set(all_colocation_groups)) - ret.node_def.attr["_class"].CopyFrom( - attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue( - s=all_colocation_groups))) + # pylint: disable=protected-access + ret._set_attr("_class", attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue(s=all_colocation_groups))) + # pylint: enable=protected-access # Sets "container" attribute if # (1) self._container is not None diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 3087d6060b..4e931e00c5 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -31,9 +31,11 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import device as pydev from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import function from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import tensor_util from tensorflow.python.framework import test_ops from tensorflow.python.framework import test_util from tensorflow.python.framework import versions @@ -357,54 +359,55 @@ class OperationTest(test_util.TensorFlowTestCase): self.assertEqual("", repr(op)) def testGetAttr(self): - # TODO(b/65162920): implement all tests for get_attr with C API + op = test_ops.default_attrs() + self.assertEqual(op.get_attr("string_val"), b"abc") + self.assertEqual(op.get_attr("string_list_val"), [b"abc", b""]) + self.assertEqual(op.get_attr("int_val"), 123) + self.assertEqual(op.get_attr("int_list_val"), [1, 2, 3]) + self.assertEqual(op.get_attr("float_val"), 10.0) + self.assertEqual(op.get_attr("float_list_val"), [10.0]) + self.assertEqual(op.get_attr("bool_val"), True) + self.assertEqual(op.get_attr("bool_list_val"), [True, False]) + self.assertEqual(op.get_attr("shape_val"), + tensor_shape.as_shape([2, 1]).as_proto()) + self.assertEqual(op.get_attr("shape_list_val"), + [tensor_shape.as_shape([]).as_proto(), + tensor_shape.as_shape([1]).as_proto()]) + self.assertEqual(op.get_attr("tensor_val"), + tensor_util.make_tensor_proto(1, dtypes.int32)) + self.assertEqual(op.get_attr("tensor_list_val"), + [tensor_util.make_tensor_proto(1, dtypes.int32)]) + + type_val = op.get_attr("type_val") + # First check that type_val is a DType, because the assertEquals will work + # no matter what since DType overrides __eq__ + self.assertIsInstance(type_val, dtypes.DType) + self.assertEqual(type_val, dtypes.int32) + + type_list_val = op.get_attr("type_list_val") + self.assertTrue(all(isinstance(x, dtypes.DType) for x in type_list_val)) + self.assertEqual(type_list_val, [dtypes.int32, dtypes.float32]) + + @function.Defun(dtypes.float32, func_name="MyFunc") + def func(x): + return x + + op = test_ops.func_attr(func) + self.assertEqual(op.get_attr("f"), + attr_value_pb2.NameAttrList(name="MyFunc")) + + # Try fetching missing attr if ops._USE_C_API: - op = test_ops.int_attr().op - self.assertEqual(op.get_attr("foo"), 1) - - op_str = test_ops.string_list_attr(a=["z"], b="y") - self.assertEqual(op_str.get_attr("a"), [b"z"]) - self.assertEqual(op_str.get_attr("b"), b"y") - + error_msg = "Operation 'FuncAttr' has no attr named 'FakeAttr'." else: - list_value = attr_value_pb2.AttrValue.ListValue() - - list_value.type.append(types_pb2.DT_STRING) - list_value.type.append(types_pb2.DT_DOUBLE) - op = ops.Operation( - ops._NodeDef( - "None", - "op1", - attrs={ - "value": - attr_value_pb2.AttrValue(i=32), - "dtype": - attr_value_pb2.AttrValue(type=types_pb2.DT_INT32), - "list": - attr_value_pb2.AttrValue(list=list_value), - "func": - attr_value_pb2.AttrValue( - func=attr_value_pb2.NameAttrList()) - }), ops.Graph(), [], [dtypes.int32]) - self.assertEqual(32, op.get_attr("value")) - self.assertEqual("", op.get_attr("func").name) - - d = op.get_attr("dtype") - # First check that d is a DType, because the assertEquals will - # work no matter what since DType overrides __eq__ - self.assertIsInstance(d, dtypes.DType) - self.assertEqual(dtypes.int32, d) - - l = op.get_attr("list") - for x in l: - self.assertIsInstance(x, dtypes.DType) - self.assertEqual([dtypes.string, dtypes.double], l) + error_msg = "No attr named 'FakeAttr' in name: \"FuncAttr\"" + + with self.assertRaisesRegexp(ValueError, error_msg): + op.get_attr("FakeAttr") # TODO(b/65162920): remove this test when users who are directly mutating the # node_def have been updated to proper usage. def testSetAttr(self): - if not ops._USE_C_API: - return op = test_ops.int_attr().op op._set_attr("foo", attr_value_pb2.AttrValue(i=2)) # TODO(skyewm): add node_def check diff --git a/tensorflow/python/framework/test_ops.cc b/tensorflow/python/framework/test_ops.cc index a8b7fc543f..35e0167b26 100644 --- a/tensorflow/python/framework/test_ops.cc +++ b/tensorflow/python/framework/test_ops.cc @@ -341,4 +341,27 @@ REGISTER_OP("StringListAttr") .Attr("b: string") .SetShapeFn(shape_inference::UnknownShape); +REGISTER_OP("DefaultAttrs") + .Attr("string_val: string = 'abc'") + .Attr("string_list_val: list(string) = ['abc', '']") + .Attr("int_val: int = 123") + .Attr("int_list_val: list(int) = [1, 2, 3]") + .Attr("float_val: float = 10.0") + .Attr("float_list_val: list(float) = [10.0]") + .Attr("bool_val: bool = true") + .Attr("bool_list_val: list(bool) = [true, false]") + .Attr("type_val: type = DT_INT32") + .Attr("type_list_val: list(type) = [DT_INT32, DT_FLOAT]") + .Attr("shape_val: shape = { dim { size: 2 } dim { size: 1 } }") + .Attr("shape_list_val: list(shape) = [{}, { dim { size: 1} }]") + .Attr("tensor_val: tensor = { dtype: DT_INT32 tensor_shape: {} int_val: 1}") + .Attr( + "tensor_list_val: list(tensor) = " + "[{ dtype: DT_INT32 tensor_shape: {} int_val: 1}]") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("FuncAttr") + .Attr("f: func") + .SetShapeFn(shape_inference::UnknownShape); + } // end namespace tensorflow -- GitLab From 904abae95b2c88f4379e8133e5b8dfd2e2526ed0 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Wed, 8 Nov 2017 13:19:27 -0800 Subject: [PATCH 0060/1801] Give a better error message when placeholders are used with eager PiperOrigin-RevId: 175053592 --- tensorflow/python/ops/array_ops.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 6b4919b16f..61bd41e7de 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1653,6 +1653,8 @@ def placeholder(dtype, shape=None, name=None): print(sess.run(y, feed_dict={x: rand_array})) # Will succeed. ``` + @compatibility{eager} Placeholders are not compatible with eager execution. + Args: dtype: The type of elements in the tensor to be fed. shape: The shape of the tensor to be fed (optional). If the shape is not @@ -1662,7 +1664,14 @@ def placeholder(dtype, shape=None, name=None): Returns: A `Tensor` that may be used as a handle for feeding a value, but not evaluated directly. + + Raises: + RuntimeError: if eager execution is enabled """ + if context.in_eager_mode(): + raise RuntimeError("tf.placeholder() is not compatible with " + "eager execution.") + return gen_array_ops._placeholder(dtype=dtype, shape=shape, name=name) @@ -1706,6 +1715,8 @@ def sparse_placeholder(dtype, shape=None, name=None): print(sess.run(y, feed_dict={x: sp_value})) # Will succeed. ``` + @compatibility{eager} Placeholders are not compatible with eager execution. + Args: dtype: The type of `values` elements in the tensor to be fed. shape: The shape of the tensor to be fed (optional). If the shape is not @@ -1715,7 +1726,14 @@ def sparse_placeholder(dtype, shape=None, name=None): Returns: A `SparseTensor` that may be used as a handle for feeding a value, but not evaluated directly. + + Raises: + RuntimeError: if eager execution is enabled """ + if context.in_eager_mode(): + raise RuntimeError("tf.placeholder() is not compatible with " + "eager execution.") + shape_name = (name + "/shape") if name is not None else None shape, rank = _normalize_sparse_shape(shape, shape_name) if shape is None: -- GitLab From fa318123adcf457f3ed92e617c6fa34a695d2279 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 8 Nov 2017 13:34:58 -0800 Subject: [PATCH 0061/1801] Optimize gradient subgraphs by taking advantage of symbolic shapes whenever possible. PiperOrigin-RevId: 175055770 --- tensorflow/core/grappler/optimizers/BUILD | 1 + .../grappler/optimizers/constant_folding.cc | 100 ++++++++++++++++-- .../grappler/optimizers/constant_folding.h | 6 +- .../optimizers/constant_folding_test.cc | 53 ++++++++++ .../grappler/optimizers/meta_optimizer.cc | 4 +- 5 files changed, 153 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 669d02815c..54004a5e07 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -112,6 +112,7 @@ tf_cc_test( deps = [ ":constant_folding", "//tensorflow/cc:cc_ops", + "//tensorflow/cc:cc_ops_internal", "//tensorflow/core:all_kernels", "//tensorflow/core:core_cpu", "//tensorflow/core:core_cpu_internal", diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index cb02314183..a364ca487e 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -95,11 +95,15 @@ class DeviceSimple : public DeviceBase { }; } // namespace -ConstantFolding::ConstantFolding(DeviceBase* cpu_device) - : cpu_device_(cpu_device) { +ConstantFolding::ConstantFolding(RewriterConfig::Toggle opt_level, + DeviceBase* cpu_device) + : opt_level_(opt_level), cpu_device_(cpu_device) { resource_mgr_.reset(new ResourceMgr()); } +ConstantFolding::ConstantFolding(DeviceBase* cpu_device) + : ConstantFolding(RewriterConfig::ON, cpu_device) {} + // static string ConstantFolding::AddControlDependency(const string& input_name, GraphDef* graph, @@ -281,6 +285,84 @@ Status ConstantFolding::MaterializeShapes(const GrapplerItem& item, return Status::OK(); } +bool ShapesEqual(const TensorShapeProto& shape1, + const TensorShapeProto& shape2) { + if (shape1.unknown_rank() || shape2.unknown_rank()) { + return false; + } + if (shape1.dim_size() != shape2.dim_size()) { + return false; + } + for (int i = 0; i < shape1.dim_size(); ++i) { + if (shape1.dim(i).size() != shape2.dim(i).size()) { + return false; + } + } + return true; +} + +Status ConstantFolding::MaterializeConstants( + const GrapplerItem& item, const GraphProperties& properties) { + const int node_count = graph_.node_size(); + for (int i = 0; i < node_count; ++i) { + NodeDef& node = *graph_.mutable_node(i); + const string& op = node.op(); + if (op != "BroadcastGradientArgs") { + continue; + } + const NodeDef* shape_node1 = node_map_->GetNode(node.input(0)); + const NodeDef* shape_node2 = node_map_->GetNode(node.input(1)); + if (shape_node1 == nullptr || shape_node1->op() != "Shape" || + shape_node2 == nullptr || shape_node2->op() != "Shape") { + continue; + } + const std::vector& prop1 = + properties.GetInputProperties(shape_node1->name()); + const std::vector& prop2 = + properties.GetInputProperties(shape_node2->name()); + if (prop1.size() != 1 || prop2.size() != 1) { + continue; + } + const TensorShapeProto& shape1 = prop1[0].shape(); + const TensorShapeProto& shape2 = prop2[0].shape(); + if (ShapesEqual(shape1, shape2)) { + DataType type = node.attr().at("T").type(); + Tensor empty(type, TensorShape()); + NodeDef* out[2]; + for (int i = 0; i < 2; ++i) { + string const_name = AddPrefixToNodeName( + strings::StrCat(node.name(), "-", i), kConstantFoldingConst); + out[i] = node_map_->GetNode(const_name); + if (!out[i]) { + out[i] = graph_.add_node(); + *out[i] = CreateNodeDef(const_name, TensorValue(&empty)); + out[i]->set_device(node.device()); + node_map_->AddNode(const_name, out[i]); + string ctrl_dep = + AddControlDependency(node.name(), &graph_, node_map_.get()); + *out[i]->add_input() = ctrl_dep; + node_map_->AddOutput(NodeName(ctrl_dep), const_name); + } + } + + auto outputs = node_map_->GetOutputs(node.name()); + for (const auto& output : outputs) { + for (int k = 0; k < output->input_size(); ++k) { + int port; + string node_name = ParseNodeName(output->input(k), &port); + if (node_name == node.name() && port >= 0 && port < 2) { + *output->mutable_input(k) = out[port]->name(); + node_map_->UpdateInput(output->name(), node_name, + out[port]->name()); + } + } + } + } + } + + return Status::OK(); +} + bool ConstantFolding::IsFoldable(const NodeDef& node) const { // Folding not applicable to ops with no inputs. if (node.input().empty()) { @@ -921,23 +1003,25 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, } GraphProperties properties(item); + Status s = properties.InferStatically(); bool has_feed = !item.feed.empty(); - if (!has_feed) { + // bool has_feed = false; + if (!has_feed && s.ok()) { // Only use static shape information when there is no feed in the // graph. That's because it's possible to feed a placeholder with a tensor // of any shape, which could make the static information inconsistent with // the shapes actually fed. - Status s = properties.InferStatically(); - if (!s.ok()) { - VLOG(1) << "Failed to infer graph shapes: " << s; - } else { + if (s.ok()) { TF_RETURN_IF_ERROR(MaterializeShapes(item, properties)); } } + if (opt_level_ == RewriterConfig::AGGRESSIVE && s.ok()) { + TF_RETURN_IF_ERROR(MaterializeConstants(item, properties)); + } TF_RETURN_IF_ERROR(FoldGraph(output)); - if (!has_feed) { + if (!has_feed && s.ok()) { TF_RETURN_IF_ERROR(SimplifyGraph(output, properties)); } return Status::OK(); diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 30d778789a..dd988f336c 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/optimizers/graph_optimizer.h" #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/protobuf/rewriter_config.pb.h" namespace tensorflow { namespace grappler { @@ -37,6 +38,7 @@ class ConstantFolding : public GraphOptimizer { NodeMap* node_map); ConstantFolding(DeviceBase* cpu_device); + ConstantFolding(RewriterConfig::Toggle opt_level, DeviceBase* cpu_device); ~ConstantFolding() override {} @@ -51,7 +53,8 @@ class ConstantFolding : public GraphOptimizer { private: Status MaterializeShapes(const GrapplerItem& item, const GraphProperties& properties); - + Status MaterializeConstants(const GrapplerItem& item, + const GraphProperties& properties); bool IsFoldable(const NodeDef& node) const; Status EvaluateNode(const NodeDef& node, @@ -74,6 +77,7 @@ class ConstantFolding : public GraphOptimizer { GraphDef* output); // Points to an externally provided device or to owned_device_; + RewriterConfig::Toggle opt_level_; DeviceBase* cpu_device_; std::unique_ptr owned_device_; diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index a1dee6d2fb..17f9854b59 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/grappler/optimizers/constant_folding.h" +#include "tensorflow/cc/ops/array_ops_internal.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/tensor_testutil.h" @@ -838,6 +839,58 @@ TEST_F(ConstantFoldingTest, Packing) { // size needed to naively encode 1000 floats folded twice). EXPECT_GT(8000, output.ByteSizeLong()); } + +TEST_F(ConstantFoldingTest, ConstantMaterialization) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output a = + ops::Placeholder(s.WithOpName("a"), DT_FLOAT, + ops::Placeholder::Shape(PartialTensorShape({-1, -1}))); + Output b = ops::Square(s.WithOpName("b"), a); + Output c = ops::Mul(s.WithOpName("c"), a, b); + Output d = ops::Shape(s.WithOpName("d"), a); + Output e = ops::Shape(s.WithOpName("e"), b); + auto f = ops::internal::BroadcastGradientArgs(s.WithOpName("f"), d, e); + Output o1 = ops::Identity(s.WithOpName("o1"), f.r0); + Output o2 = ops::Identity(s.WithOpName("o2"), f.r1); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + ConstantFolding fold(RewriterConfig::AGGRESSIVE, nullptr /* cpu_device */); + GraphDef output; + Status status = fold.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + // Run a second time to make sure the optimization is idempotent. + item.graph.Swap(&output); + status = fold.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + int found = 0; + for (const auto& node : output.node()) { + if (node.name() == "o1") { + ++found; + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("ConstantFolding/f-0", node.input(0)); + } else if (node.name() == "o2") { + ++found; + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("ConstantFolding/f-1", node.input(0)); + } else if (node.name() == "ConstantFolding/f-0") { + ++found; + EXPECT_EQ("Const", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("^f", node.input(0)); + } else if (node.name() == "ConstantFolding/f-1") { + ++found; + EXPECT_EQ("Const", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("^f", node.input(0)); + } + } + EXPECT_EQ(4, found); +} + } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index a9875c06d8..6204a81f80 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -64,8 +64,8 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back(std::unique_ptr(new ModelPruner())); } if (cfg_.constant_folding() != RewriterConfig::OFF) { - optimizers.push_back( - std::unique_ptr(new ConstantFolding(cpu_device_))); + optimizers.push_back(std::unique_ptr( + new ConstantFolding(cfg_.constant_folding(), cpu_device_))); } if (cfg_.arithmetic_optimization() != RewriterConfig::OFF) { optimizers.push_back(std::unique_ptr( -- GitLab From 8f7aa84efea39b71b45040d89ef01fc15faa519b Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 8 Nov 2017 13:44:26 -0800 Subject: [PATCH 0062/1801] Moves imperative_grad to C Neutral-to-positive on all benchmarks. Also reduces overhead of should_record. PiperOrigin-RevId: 175057104 --- tensorflow/c/eager/BUILD | 1 + tensorflow/c/eager/tape.cc | 312 +++++++++++++++++++- tensorflow/c/eager/tape.h | 58 +++- tensorflow/python/eager/BUILD | 7 +- tensorflow/python/eager/backprop.py | 14 +- tensorflow/python/eager/backprop_test.py | 57 +--- tensorflow/python/eager/imperative_grad.py | 194 +------------ tensorflow/python/eager/pywrap_tensor.cc | 8 +- tensorflow/python/eager/pywrap_tensor.h | 25 ++ tensorflow/python/eager/pywrap_tfe.h | 13 +- tensorflow/python/eager/pywrap_tfe_src.cc | 313 +++++++++++++++++---- tensorflow/python/eager/tape.py | 12 +- tensorflow/python/eager/tape_test.py | 20 -- tensorflow/python/pywrap_tfe.i | 4 +- 14 files changed, 702 insertions(+), 336 deletions(-) create mode 100644 tensorflow/python/eager/pywrap_tensor.h diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index c77896b80b..74e94be8d6 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -39,6 +39,7 @@ tf_cuda_library( tf_cuda_library( name = "c_api_internal", hdrs = ["c_api_internal.h"], + visibility = ["//tensorflow:internal"], deps = [ ":c_api", ":runtime", diff --git a/tensorflow/c/eager/tape.cc b/tensorflow/c/eager/tape.cc index 464612a81e..459499bb69 100644 --- a/tensorflow/c/eager/tape.cc +++ b/tensorflow/c/eager/tape.cc @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include + #include "tensorflow/c/eager/tape.h" namespace tensorflow { @@ -94,8 +96,314 @@ void GradientTape::DeleteTrace(int64 tensor_id) { op_tape_.erase(op_it); } -std::pair GradientTape::Export() { - return {std::move(tensor_tape_), std::move(op_tape_)}; +// Terminology: +// +// - op: a possibly composite operation, which has an entry in the tape +// - target: dy in dx/dy +// - source: dx in dx/dy +// - tensor: one of the many inputs or outputs of an operation +// +// Below here we do the gradient algorithm. It works as follows: +// +// First we filter the tape to just the subset of operations we want to +// differentiate. In the process of doing so we count how many times each Tensor +// is used as an input to an op (so we know when we're done computing gradients +// for that Tensor). We also count, for each tape entry, how many of its output +// Tensors need gradients to be computed (Tensors which are not used do not need +// any gradients to be computed). +// +// Finally, we start a backprop stack with a set of tape entries for which we +// have all gradients available. This set usually is a subset of the set of +// targets (not all since targets which have outputs in the tape will not have +// gradients available initially). +// +// Then we repeatedly pop an entry from the stack, run its backprop, and update +// the gradients of its inputs. Once we have computed all gradients for a single +// input we can mark this input as done, and this can trigger adding an entry to +// the stack if all outputs of that entry are now done. +// +// When the stack is empty we have gradients for all tensors we're interested +// in. + +struct BackpropInitialState { + OpTape op_tape; + + // Map from tensor ID to how many references still exist for this tensor in + // the tape. + std::unordered_map tensor_usage_counts; + + // Maps from op ID to how many output tensors of this op still need to have + // their gradients computed. + std::unordered_map op_missing_tensor; +}; + +BackpropInitialState PrepareBackprop( + gtl::ArraySlice target, const TensorTape& tensor_tape, + OpTape op_tape, const std::unordered_set& sources_set) { + std::vector tensor_stack; + tensor_stack.reserve(target.size()); + for (auto t : target) { + tensor_stack.push_back(t); + } + BackpropInitialState result; + while (!tensor_stack.empty()) { + int64 tensor_id = tensor_stack.back(); + tensor_stack.pop_back(); + auto op_id_it = tensor_tape.find(tensor_id); + if (op_id_it == tensor_tape.end()) { + continue; + } + int64 op_id = op_id_it->second; + auto op_it = op_tape.find(op_id); + auto result_op_it = result.op_tape.find(op_id); + if (op_id == -1 || op_it == op_tape.end() || + result_op_it != result.op_tape.end()) { + continue; + } + CHECK(result.op_tape.emplace(op_id, op_it->second).second); + for (auto it : op_it->second.input_tensor_id) { + auto count_it = result.tensor_usage_counts.find(it); + if (count_it != result.tensor_usage_counts.end()) { + count_it->second++; + } else { + result.tensor_usage_counts[it] = 1; + if (sources_set.find(it) == sources_set.end() && + tensor_tape.find(it) != tensor_tape.end()) { + tensor_stack.push_back(it); + } + } + } + op_tape.erase(op_it); + } + for (auto& pair : result.tensor_usage_counts) { + auto it = tensor_tape.find(pair.first); + if (it != tensor_tape.end() && it->second != -1) { + result.op_missing_tensor[it->second] += 1; + } + } + // Call destructors for all unneeded gradient functions. + for (const auto& op_pair : op_tape) { + op_pair.second.backward_function_deleter(); + } + return result; +} + +std::vector InitialStack( + const OpTape& op_tape, + const std::unordered_map& op_missing_tensor) { + std::vector result; + for (auto& op_entry : op_tape) { + if (op_missing_tensor.find(op_entry.first) == op_missing_tensor.end()) { + result.push_back(op_entry.first); + } + } + return result; +} + +Status InitialGradients(const VSpace& vspace, gtl::ArraySlice target, + gtl::ArraySlice output_gradients, + std::unordered_map tensor_usage_counts, + std::unordered_map>* result) { + for (int i = 0; i < target.size(); ++i) { + int64 id = vspace.TensorId(target[i]); + if (tensor_usage_counts.find(id) != tensor_usage_counts.end()) { + if (!output_gradients.empty() && output_gradients[i] != nullptr) { + // TODO(apassos) figure out how to print debugging information here. + return errors::InvalidArgument( + "A gradient was provided for a tensor which is used as part of the " + "computation."); + } + } else { + if (output_gradients.empty() || output_gradients[i] == nullptr) { + (*result)[id].push_back(vspace.OnesLike(target[i])); + } else { + (*result)[id].push_back(output_gradients[i]); + } + } + } + return Status::OK(); +} + +// If over kMinAggregateCount gradients are accumulated and the total +// memory consumption is over kMinAggregateBytes, do an early aggregation +// so as to release the gradient tensor to save memory. +static const int kMinAggregateCount = 4; +static const int kMinAggregateBytes = 128 * 1024 * 1024; + +Status GradientTape::Gradient(const VSpace& vspace, + gtl::ArraySlice target, + gtl::ArraySlice sources, + gtl::ArraySlice output_gradients, + std::vector* result) { + std::vector id_sources; + id_sources.reserve(sources.size()); + for (void* s : sources) { + id_sources.push_back(vspace.TensorId(s)); + } + std::unordered_set sources_set(id_sources.begin(), id_sources.end()); + std::vector id_targets; + id_sources.reserve(target.size()); + for (void* t : target) { + id_targets.push_back(vspace.TensorId(t)); + } + BackpropInitialState state = PrepareBackprop( + id_targets, tensor_tape_, std::move(op_tape_), sources_set); + std::vector op_stack = + InitialStack(state.op_tape, state.op_missing_tensor); + std::unordered_map> gradients; + Status s = InitialGradients(vspace, target, output_gradients, + state.tensor_usage_counts, &gradients); + auto cleanup = [&state]() { + // Release all backprop functions + for (const auto& pair : state.op_tape) { + pair.second.backward_function_deleter(); + } + }; + if (!s.ok()) { + cleanup(); + return s; + } + std::unordered_map gradients_size; + // TODO(apassos) multiple threads could be dequeuing from op_stack at the same + // time, for better CPU backprop performance. + VLOG(1) << "Initial stack:"; + if (VLOG_IS_ON(1)) { + for (auto t : op_stack) { + VLOG(1) << " " << t; + } + } + std::unordered_map> + functions_accept_none_for_indices({ + {"SoftmaxCrossEntropyWithLogits", {1}}, + {"FusedBatchNorm", {1, 2, 3, 4}}, + }); + while (!op_stack.empty()) { + const int64 op = op_stack.back(); + VLOG(1) << "Popped " << op; + op_stack.pop_back(); + auto op_it = state.op_tape.find(op); + if (op_it == state.op_tape.end()) { + // It is possible for ops to end up on the stack if they are unrelated to + // the target; we should just skip them. + continue; + } + auto trace = std::move(op_it->second); + state.op_tape.erase(op_it); + std::vector out_gradients; + out_gradients.reserve(trace.output_tensor_info.size()); + for (int i = 0; i < trace.output_tensor_info.size(); ++i) { + const int64 id = trace.output_tensor_info[i].id; + auto grad_it = gradients.find(id); + if (grad_it == gradients.end()) { + auto func_name_it = + functions_accept_none_for_indices.find(trace.op_type); + if (func_name_it != functions_accept_none_for_indices.end() && + func_name_it->second.find(i) != func_name_it->second.end()) { + out_gradients.push_back(nullptr); + } else { + out_gradients.push_back( + vspace.Zeros(trace.output_tensor_info[i].shape, + trace.output_tensor_info[i].dtype)); + } + } else { + out_gradients.push_back(vspace.AggregateGradients(grad_it->second)); + if (sources_set.find(grad_it->first) == sources_set.end()) { + gradients.erase(grad_it); + } + } + } + std::vector in_gradients; + Status s = vspace.CallBackwardFunction(trace.backward_function, + out_gradients, &in_gradients); + if (!s.ok()) { + VLOG(1) << "Gradient function failed."; + cleanup(); + return s; + } + VLOG(1) << "Got " << in_gradients.size() << " in_gradients for " + << trace.input_tensor_id.size() << " sources"; + for (int i = 0; i < in_gradients.size(); ++i) { + const int64 id = trace.input_tensor_id[i]; + if (in_gradients[i] != nullptr) { + auto& unaggregated_grads = gradients[id]; + unaggregated_grads.push_back(in_gradients[i]); + if (unaggregated_grads.size() > kMinAggregateCount) { + auto size_it = gradients_size.find(id); + int64 size; + if (size_it == gradients_size.end()) { + size = vspace.NumElements(unaggregated_grads[0]); + gradients_size.emplace(id, size); + } else { + size = size_it->second; + } + if (unaggregated_grads.size() * size * 4 > kMinAggregateBytes) { + void* tensor = vspace.AggregateGradients(unaggregated_grads); + unaggregated_grads.clear(); + unaggregated_grads.push_back(tensor); + } + } + } + auto usage_count_it = state.tensor_usage_counts.find(id); + if (usage_count_it == state.tensor_usage_counts.end()) { + VLOG(1) << "Tensor " << id << " not used"; + continue; + } + usage_count_it->second--; + if (usage_count_it->second > 0) { + VLOG(1) << "Tensor " << id << " usage count " << usage_count_it->second; + continue; + } + auto tape_it = tensor_tape_.find(id); + if (tape_it == tensor_tape_.end()) { + VLOG(1) << "Tensor " << id + << " has no associated op. Deleting gradient"; + auto grad_it = gradients.find(id); + if (grad_it != gradients.end()) { + for (auto g : grad_it->second) { + vspace.DeleteTensor(g); + } + gradients.erase(grad_it); + } + continue; + } + const int64 op_id = tape_it->second; + if (op_id == -1) { + VLOG(1) << "Tensor " << id << " is source"; + continue; + } + auto missing_it = state.op_missing_tensor.find(op_id); + if (missing_it != state.op_missing_tensor.end()) { + missing_it->second--; + VLOG(1) << "Op " << op_id << " missing " << missing_it->second + << " output gradients"; + if (missing_it->second == 0) { + op_stack.push_back(op_id); + } + } + } + } + CHECK(state.op_tape.empty()); + result->reserve(sources.size()); + for (auto is : id_sources) { + auto grad_it = gradients.find(is); + if (grad_it == gradients.end()) { + result->push_back(nullptr); + } else { + if (grad_it->second.size() == 1) { + result->push_back(grad_it->second[0]); + } else { + result->push_back(vspace.AggregateGradients(grad_it->second)); + } + gradients.erase(grad_it); + } + } + VLOG(1) << "Final gradients size: " << gradients.size(); + for (auto grad_pair : gradients) { + for (const auto& g : grad_pair.second) { + vspace.DeleteTensor(g); + } + } + return Status::OK(); } } // namespace eager diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index df51f300eb..2bb62a7ab3 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -57,11 +57,57 @@ using TensorTape = std::unordered_map; // Map from operation-id to tape entry. using OpTape = std::unordered_map; +// Operations the tape needs to perform on tensors to do backpropagation. Named +// "vspace" because a subset of these are related to a vector space, such as +// adding gradients, getting zeroes, etc. Currently cannot be implemented +// without using tensorflow python code, hence left unspecified here. +// +// We currently use void* for tensors, backward functions, and gradients (which +// can be but are not required to be tensors). TODO(apassos) replace this first +// with templates to allow for pyobject specialization in the client followed by +// a TFE_TensorHandle specialization, which is blocked by quite a few things +// still. +class VSpace { + public: + virtual ~VSpace() {} + + // Returns the number of elements in the tensor. + virtual int64 NumElements(void* tensor) const = 0; + + // Consumes references to the tensors in the gradient_tensors list and returns + // a tensor with the result. + virtual void* AggregateGradients( + gtl::ArraySlice gradient_tensors) const = 0; + + // Returns a tensor of the right shape and dtype filled with zeros. + virtual void* Zeros(TensorShape shape, DataType dtype) const = 0; + + // Returns a Tensor which is filled with ones and like the input. + virtual void* OnesLike(void*) const = 0; + + // Returns an integer which is a unique-to-within-this-program handle for this + // tensor. + virtual int64 TensorId(void* tensor) const = 0; + + // Calls the passed-in backward function. + virtual Status CallBackwardFunction(void* backward_function, + gtl::ArraySlice output_gradients, + std::vector* result) const = 0; + + // Deletes the input tensor. + virtual void DeleteTensor(void* tensor) const = 0; +}; + // Traces the execution of operations, doing eager garbage collection, and // exporting a full trace so other code can do backpropagation. Not thread-safe. class GradientTape { public: GradientTape() {} + ~GradientTape() { + for (const auto& pair : op_tape_) { + pair.second.backward_function_deleter(); + } + } bool ShouldRecord(gtl::ArraySlice tensor_ids); @@ -75,10 +121,14 @@ class GradientTape { void DeleteTrace(int64 tensor_id); - // Note: it is only valid to call Export once per tape, and after calling - // export the tape is no longer valid (i.e. calls to ShouldRecord, Watch, - // Record, and Delete have undefined behavior). - std::pair Export(); + // Consumes the internal state of the tape (so cannot be called more than + // once) and produces the gradient of the target tensors with respect to the + // source tensors. The output gradients are used if not empty and not + // null. The result is populated with one tensor per target element. + Status Gradient(const VSpace& vspace, gtl::ArraySlice target, + gtl::ArraySlice sources, + gtl::ArraySlice output_gradients, + std::vector* result); private: TensorTape tensor_tape_; diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index bcd1e1d0dc..c36647b21c 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -14,11 +14,16 @@ cc_library( "pywrap_tensor.cc", "pywrap_tfe_src.cc", ], - hdrs = ["pywrap_tfe.h"], + hdrs = [ + "pywrap_tensor.h", + "pywrap_tfe.h", + ], visibility = ["//tensorflow:internal"], deps = [ "//tensorflow/c:c_api", + "//tensorflow/c:c_api_internal", "//tensorflow/c/eager:c_api", + "//tensorflow/c/eager:c_api_internal", "//tensorflow/c/eager:tape", "//tensorflow/core:lib", "//tensorflow/python:ndarray_tensor", diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 86b3776b8c..111d7cef56 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -727,11 +727,23 @@ def _num_elements(grad): raise ValueError("`grad` not a Tensor or IndexedSlices.") +_last_shape_dtype = [None, None] +_last_zero = [None] + + +def _zeros(shape, dtype): + """Wraps array_ops.zeros to cache last zero for a given shape and dtype.""" + if [shape, dtype] != _last_shape_dtype: + _last_shape_dtype[:] = [shape, dtype] + _last_zero[0] = array_ops.zeros(shape, dtype) + return _last_zero[0] + + _default_vspace = imperative_grad.VSpace( num_elements_fn=_num_elements, aggregate_fn=_aggregate_grads, tensor_id=ops.tensor_id, - zeros=array_ops.zeros, + zeros=_zeros, ones_like=lambda x: ops.convert_to_tensor(array_ops.ones_like(x))) diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index ed54b8e12e..ec9a185b73 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -24,11 +24,11 @@ from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.eager import custom_gradient -from tensorflow.python.eager import imperative_grad from tensorflow.python.eager import tape from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops @@ -41,7 +41,6 @@ from tensorflow.python.ops import random_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables from tensorflow.python.training import training -from tensorflow.python.util import compat class BackpropTest(test.TestCase): @@ -103,6 +102,18 @@ class BackpropTest(test.TestCase): grad_fn = backprop.gradients_function(f) self.assertAllEqual(2., grad_fn(1., dy=2.)[0]) + def testErrors(self): + + @custom_gradient.custom_gradient + def f(x): + def grad(_): + raise RuntimeError('x') + return x, grad + + # TODO(apassos) raise the right error here + with self.assertRaises(errors_impl.InternalError): + backprop.gradients_function(f)(constant_op.constant(1.0)) + def testImplicitGradOverEmbeddingLookup(self): batch_size = 8 embedding_size = 512 @@ -483,48 +494,6 @@ class BackpropTest(test.TestCase): initial_value=1., name='testSameObjectForMultipleArguments.Variable') self.assertAllEqual([1., 1.], np_g(v, v)) - def testEarlyGradAggregation(self): - # Needs to be a list so mutations by the callback affect this function. - add_n = [] - def callback(op_type, unused_1, unused_2, unused_3, unused_4): - if compat.as_bytes(op_type) == compat.as_bytes('AddN'): - add_n.append(1) - context.context().add_post_execution_callback(callback) - - v = resource_variable_ops.ResourceVariable(constant_op.constant(2.0), - name='v') - def fn(): - outputs = [] - for _ in range(20): - outputs.append(v * constant_op.constant(2.0)) - return math_ops.add_n(outputs) - - # By default the aggregation count is 2. - _ = backprop.implicit_grad(fn)()[0][1] - self.assertEqual(len(add_n), 2) - del add_n[:] - - # Reduce the aggregation limit, cause the backprop to do some - # early aggregation. - # pylint: disable=protected-access - old_cnt = imperative_grad._MIN_AGGREGATE_COUNT - old_bytes = imperative_grad._MIN_AGGREGATE_BYTES - imperative_grad._MIN_AGGREGATE_COUNT = 10 - imperative_grad._MIN_AGGREGATE_BYTES = 1 - _ = backprop.implicit_grad(fn)() - self.assertEqual(len(add_n), 6) - del add_n[:] - - # Aggregation is also limited by the memory. - imperative_grad._MIN_AGGREGATE_BYTES = 10000 - _ = backprop.implicit_grad(fn)() - self.assertEqual(len(add_n), 2) - - imperative_grad._MIN_AGGREGATE_COUNT = old_cnt - imperative_grad._MIN_AGGREGATE_BYTES = old_bytes - # pylint: enable=protected-access - context.context().clear_post_execution_callbacks() - def testImplicitGradientsCustomGradientAndCachedVariableValue(self): @custom_gradient.custom_gradient diff --git a/tensorflow/python/eager/imperative_grad.py b/tensorflow/python/eager/imperative_grad.py index c87719f84a..8932b7157b 100644 --- a/tensorflow/python/eager/imperative_grad.py +++ b/tensorflow/python/eager/imperative_grad.py @@ -20,102 +20,8 @@ from __future__ import print_function import collections -from tensorflow.python.eager import tape as tape_module - - -# Terminology: -# -# - op: a possibly composite operation, which has an entry in the tape -# - target: dy in dx/dy -# - source: dx in dx/dy -# - tensor: one of the many inputs or outputs of an operation -# -# Below here we do the gradient algorithm. It works as follows: -# -# First we filter the tape to just the subset of operations we want to -# differentiate. In the process of doing so we count how many times each Tensor -# is used as an input to an op (so we know when we're done computing gradients -# for that Tensor). We also count, for each tape entry, how many of its output -# Tensors need gradients to be computed (Tensors which are not used do not need -# any gradients to be computed). -# -# Finally, we start a backprop stack with a set of tape entries for which we -# have all gradients available. This set usually is a subset of the set of -# targets (not all since targets which have outputs in the tape will not have -# gradients available initially). -# -# Then we repeatedly pop an entry from the stack, run its backprop, and update -# the gradients of its inputs. Once we have computed all gradients for a single -# input we can mark this input as done, and this can trigger adding an entry to -# the stack if all outputs of that entry are now done. -# -# When the stack is empty we have gradients for all tensors we're interested in. -def _prepare_backprop(vspace, target, tensor_to_op, op_to_entry, id_sources): - """Filters the tape to only include relevant entries and counts tensor usages. - - Args: - vspace: information about the space we're differentiating in. - target: the target to optimize. - tensor_to_op: Map from tensor id to key in op_to_entry that produced it. - op_to_entry: Map from op id to a tape.TapeEntry object - id_sources: the ids of the sources wrt the gradient is being taken. - - Returns: - usage counts (how many entries downstream from a tensor use it) - op_to_entry_map: entry map (a filtered tape, with only the relevant - entries), - missing: map from tensor id to how many downstream gradients still need - to be computed before this tensor's gradient can be computed. - """ - tensor_stack = [vspace.tensor_id(x) for x in target] - tensor_usage_counts = {} - o_to_e = {} # Copy of just the bits we need from op_to_entry - while tensor_stack: - t = tensor_stack.pop() - op = tensor_to_op.get(t, None) - # op is None or -1 if the tensor is a source (i.e. was watched directly) - if op is None or op == -1 or op in o_to_e: - continue - op_trace = tape_module.TapeEntry(*op_to_entry[op]) - o_to_e[op] = op_trace - for it in op_trace.input_ids: - if it in tensor_usage_counts: - tensor_usage_counts[it] += 1 - else: - tensor_usage_counts[it] = 1 - if it not in id_sources and it in tensor_to_op: - tensor_stack.append(it) - op_missing_tensor_counts = collections.defaultdict(int) - for t in tensor_usage_counts: - if t in tensor_to_op and tensor_to_op[t] is not None: - op_missing_tensor_counts[tensor_to_op[t]] += 1 - return tensor_usage_counts, o_to_e, op_missing_tensor_counts - - -def _initialize_backprop_stack(op_to_entry, op_missing_tensor): - """Returns the set of tape entries which are available for backprop.""" - ready_ops = [] - for op in op_to_entry: - if op not in op_missing_tensor: - ready_ops.append(op) - return ready_ops - - -def _initial_gradients(vspace, target, output_gradients, tensor_usage_counts): - """Computes the initial gradients for each Tensor.""" - # Initialize the backprop stack - gradients = collections.defaultdict(list) - for i, t in enumerate(target): - if vspace.tensor_id(t) in tensor_usage_counts: - # Can't provide a gradient of something we're trying to differentiate - assert output_gradients is None or output_gradients[i] is None - else: - if output_gradients is None or output_gradients[i] is None: - out_grad = vspace.ones_like(t) - else: - out_grad = output_gradients[i] - gradients[vspace.tensor_id(t)].append(out_grad) - return gradients +from tensorflow.python import pywrap_tensorflow +from tensorflow.python.framework import errors VSpace = collections.namedtuple( @@ -123,13 +29,6 @@ VSpace = collections.namedtuple( ["aggregate_fn", "num_elements_fn", "tensor_id", "zeros", "ones_like"]) -# If over MIN_AGGREGATE_COUNT gradients are accumulated and the total -# memory consumption is over MIN_AGGREGATE_BYTES, do an early aggregation -# so as to release the gradient tensor to save memory. -_MIN_AGGREGATE_COUNT = 4 -_MIN_AGGREGATE_BYTES = 128 * 1024 * 1024 - - def imperative_grad( vspace, tape, @@ -161,89 +60,6 @@ def imperative_grad( or if only non-differentiable functions of the source were used in the computation of target. """ - tensor_to_op, op_to_entry = tape.export() - # This overwrites the op_to_entry variable, which will release all memory used - # to keep traces that are irrelevant to the gradient computation we're doing - # here. - id_sources = [vspace.tensor_id(t) for t in sources] - tensor_usage_counts, op_to_entry, op_missing_tensor = _prepare_backprop( - vspace, target, tensor_to_op, op_to_entry, id_sources) - ready_ops = _initialize_backprop_stack(op_to_entry, op_missing_tensor) - gradients = _initial_gradients(vspace, target, output_gradients, - tensor_usage_counts) - gradients_size = dict() - # Now exhaust the backprop stack - while ready_ops: - op = ready_ops.pop() - op_trace = op_to_entry.pop(op) - out_gradients = [gradients.pop(t, None) for t in op_trace.output_ids] - - # Cache the last used zero tensor. We reuse it if the next one - # we need is of the same shape and dtype. This is very helpful in - # large splits and should have negligible overhead in other cases. - last_shape_and_dtype = None - last_zeros = None - for i in range(len(out_gradients)): - if out_gradients[i] is None: - # TODO(apassos) this should be in the right device - none_indices = _grad_fn_accepts_none_for_indices.get( - op_trace.op_type, None) - if none_indices is None or i not in none_indices: - shape_and_dtype = op_trace.output_shape_and_dtype[i] - if shape_and_dtype != last_shape_and_dtype: - last_shape_and_dtype = shape_and_dtype - last_zeros = vspace.zeros(*shape_and_dtype) - out_gradients[i] = last_zeros - else: - out_gradients[i] = vspace.aggregate_fn(out_gradients[i]) - - in_gradients = op_trace.backward_function(*(out_gradients)) - for i, t in enumerate(op_trace.input_ids): - if in_gradients[i] is not None: - t_grads = gradients.setdefault(t, []) - t_grads.append(in_gradients[i]) - if len(t_grads) >= _MIN_AGGREGATE_COUNT: - if t not in gradients_size: - gradients_size[t] = vspace.num_elements_fn(t_grads[-1]) - size = gradients_size[t] - - if len(t_grads) * size * 4 > _MIN_AGGREGATE_BYTES: - t_grads[:] = [vspace.aggregate_fn(t_grads)] - if tensor_usage_counts.get(t, 0) > 0: - tensor_usage_counts[t] -= 1 - if (t in tensor_to_op - and tensor_usage_counts[t] == 0 - and t not in id_sources): - in_op = tensor_to_op[t] - if in_op is None or in_op == -1: - continue - if op_missing_tensor.get(in_op, 0) > 0: - op_missing_tensor[in_op] -= 1 - if op_missing_tensor.get(in_op, 0) == 0: - ready_ops.append(in_op) - result = [] - for i, s in enumerate(sources): - g = gradients.get(vspace.tensor_id(s), None) - if g is None: - result.append(None) - else: - result.append(vspace.aggregate_fn(g)) - return result - - -# TODO(agarwal): use an automatic mechanism for handling None arguments to -# gradient functions. -# Some gradient functions can accept None arguments for gradients. The following -# maps the operation name to the indices at which the corresponding gradient -# function can accept None values. -# e.g. FusedBatchNorm outputs 5 values and hence receives 5 gradient values -# during backprop. However the gradient function uses only the first of those -# values and ignores the rest. The entry, "FusedBatchNorm": [1, 2, 3, 4], -# indicates that only the gradient corresponding to index 0 is used, and the -# gradient values at indices 1-4 are ignored (and hence can be None). The -# backprop algorithm can then leverage this by not constructing zeros to -# pass for those indices. -_grad_fn_accepts_none_for_indices = { - "SoftmaxCrossEntropyWithLogits": [1], - "FusedBatchNorm": [1, 2, 3, 4] -} + with errors.raise_exception_on_not_ok_status() as status: + return pywrap_tensorflow.TFE_Py_TapeGradient( + tape._tape, vspace, target, sources, output_gradients, status) # pylint: disable=protected-access diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc index ca283862f9..653f3ef84e 100644 --- a/tensorflow/python/eager/pywrap_tensor.cc +++ b/tensorflow/python/eager/pywrap_tensor.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/python/lib/core/py_seq_tensor.h" #include "tensorflow/python/lib/core/safe_ptr.h" +#include "tensorflow/python/eager/pywrap_tensor.h" #include "tensorflow/python/eager/pywrap_tfe.h" #include "tensorflow/c/c_api.h" @@ -573,7 +574,7 @@ bool EagerTensor_CheckExact(const PyObject* o) { return Py_TYPE(o) == EagerTensorType; } -TFE_TensorHandle* EagerTensorHandle(const PyObject* o) { +TFE_TensorHandle* EagerTensor_Handle(const PyObject* o) { return reinterpret_cast(o)->handle; } @@ -594,6 +595,11 @@ PyObject* EagerTensorFromHandle(TFE_TensorHandle* handle) { return reinterpret_cast(t); } +tensorflow::int64 EagerTensor_id(const PyObject* tensor) { + CHECK(EagerTensor_CheckExact(tensor)); + return reinterpret_cast(tensor)->id; +} + PyObject* TFE_Py_InitEagerTensor(PyObject* base_class) { if (!PyType_Check(base_class)) { PyErr_SetString( diff --git a/tensorflow/python/eager/pywrap_tensor.h b/tensorflow/python/eager/pywrap_tensor.h new file mode 100644 index 0000000000..aa1efdd1b8 --- /dev/null +++ b/tensorflow/python/eager/pywrap_tensor.h @@ -0,0 +1,25 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_PYTHON_EAGER_PYWRAP_TENSOR_H_ +#define TENSORFLOW_PYTHON_EAGER_PYWRAP_TENSOR_H_ + +#include "tensorflow/c/eager/c_api.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/python/lib/core/numpy.h" + +bool EagerTensor_CheckExact(const PyObject* o); +tensorflow::int64 EagerTensor_id(const PyObject* tensor); + +#endif // TENSORFLOW_PYTHON_EAGER_PYWRAP_TENSOR_H_ diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h index 1d03df2933..6705483f3b 100644 --- a/tensorflow/python/eager/pywrap_tfe.h +++ b/tensorflow/python/eager/pywrap_tfe.h @@ -81,7 +81,7 @@ bool EagerTensor_CheckExact(const PyObject* o); PyObject* EagerTensorFromHandle(TFE_TensorHandle* handle); // Extracts the handle inside EagerTensor object `o`. Returns nullptr on error. -TFE_TensorHandle* EagerTensorHandle(const PyObject* o); +TFE_TensorHandle* EagerTensor_Handle(const PyObject* o); // Creates the `EagerTensor` class by subclassing `base_class` and returns the // newly created type, or nullptr on error. @@ -103,7 +103,16 @@ void TFE_Py_TapeRecordOperation(PyObject* tape, PyObject* op_type, PyObject* output_tensors, PyObject* input_tensor_ids, PyObject* backward_function); -PyObject* TFE_Py_TapeExport(PyObject* tape); + +// Computes a gradient based on information recorded on the tape.`tape` must +// have been produced by TFE_Py_NewTape. `vspace` must be a +// imperative_grad.py:VSpace named tuple. `target` and `sources` must be python +// lists of Tensor objects. `output_gradients` is either None or a python list +// of either Tensor or None, and if not None should have the same length as +// target. +PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, + PyObject* target, PyObject* sources, + PyObject* output_gradients, TF_Status* status); // Returns an EagerTensor of dimension [len(`tensor_list`)] containing // the `slice_dim`'th dimension of each tensor in `tensor_list`. In other words, diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 7456eb10f8..a00a7615d7 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -16,10 +16,13 @@ limitations under the License. #include "tensorflow/python/eager/pywrap_tfe.h" #include "tensorflow/c/c_api.h" +#include "tensorflow/c/c_api_internal.h" +#include "tensorflow/c/eager/c_api_internal.h" #include "tensorflow/c/eager/tape.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/python/eager/pywrap_tensor.h" using tensorflow::string; @@ -515,18 +518,50 @@ static std::vector MakeIntList(PyObject* list) { } PyObject* TFE_Py_TapeShouldRecord(PyObject* py_tape, PyObject* tensors) { + if (tensors == Py_None) { + Py_RETURN_FALSE; + } + PyObject* seq = PySequence_Fast(tensors, "expected a sequence"); + if (seq == nullptr) { + return nullptr; + } + int len = PySequence_Fast_GET_SIZE(seq); + // TODO(apassos) consider not building a list and changing the API to check + // each tensor individually. + std::vector tensor_ids; + tensor_ids.reserve(len); + for (int i = 0; i < len; ++i) { + PyObject* item = PySequence_Fast_GET_ITEM(seq, i); + if (EagerTensor_CheckExact(item)) { + tensor_ids.push_back(EagerTensor_id(item)); + } else { + PyObject* id_field = PyObject_GetAttrString(item, "_id"); + if (id_field == nullptr) { + return nullptr; + } + tensor_ids.push_back(MakeInt(id_field)); + Py_DECREF(id_field); + } + } + Py_DECREF(seq); TFE_Py_Tape* tape = reinterpret_cast(py_tape); - return PyBool_FromLong(tape->tape->ShouldRecord(MakeIntList(tensors))); + if (tape->tape->ShouldRecord(tensor_ids)) { + Py_RETURN_TRUE; + } else { + Py_RETURN_FALSE; + } } void TFE_Py_TapeWatch(PyObject* tape, tensorflow::int64 tensor_id) { reinterpret_cast(tape)->tape->Watch(tensor_id); } -// TODO(apassos) have a fast path for eager tensors here which gets information -// from the handle instead of from the python object, and use this only for the -// case of graph tensors. static tensorflow::eager::TapeTensor TapeTensorFromTensor(PyObject* tensor) { + if (EagerTensor_CheckExact(tensor)) { + TFE_TensorHandle* t = EagerTensor_Handle(tensor); + tensorflow::int64 id = EagerTensor_id(tensor); + return tensorflow::eager::TapeTensor{id, t->t.dtype(), t->t.shape()}; + } PyObject* id_field = PyObject_GetAttrString(tensor, "_id"); tensorflow::int64 id = MakeInt(id_field); Py_DECREF(id_field); @@ -592,64 +627,224 @@ void TFE_Py_TapeDeleteTrace(PyObject* tape, tensorflow::int64 tensor_id) { reinterpret_cast(tape)->tape->DeleteTrace(tensor_id); } -// TODO(apassos) when backprop.py moves to C most of this exporting logic can -// disappear. -PyObject* TFE_Py_TapeExport(PyObject* tape) { - std::pair exported = - reinterpret_cast(tape)->tape->Export(); - PyObject* tensor_tape = PyDict_New(); - for (const auto& pair : exported.first) { - PyObject* tid = PyLong_FromLong(pair.first); - PyObject* opid = PyLong_FromLong(pair.second); - PyDict_SetItem(tensor_tape, tid, opid); - Py_DECREF(tid); - Py_DECREF(opid); - } - - PyObject* op_tape = PyDict_New(); - for (const auto& pair : exported.second) { - PyObject* opid = PyLong_FromLong(pair.first); - const auto& entry = pair.second; - PyObject* op_type = PyBytes_FromString(entry.op_type.c_str()); - PyObject* output_ids = PyList_New(entry.output_tensor_info.size()); - for (int i = 0; i < entry.output_tensor_info.size(); ++i) { - PyObject* tid = PyLong_FromLong(entry.output_tensor_info[i].id); - PyList_SET_ITEM(output_ids, i, tid); +// TODO(apassos): cache the attribute lookups as member variables and decref +// them in the destructor. +class PyVSpace : public tensorflow::eager::VSpace { + public: + explicit PyVSpace(PyObject* py_vspace) : py_vspace_(py_vspace) {} + + tensorflow::Status Initialize() { + num_elements_ = PyObject_GetAttrString(py_vspace_, "num_elements_fn"); + if (num_elements_ == nullptr) { + return tensorflow::errors::InvalidArgument("invalid vspace"); + } + aggregate_fn_ = PyObject_GetAttrString(py_vspace_, "aggregate_fn"); + if (aggregate_fn_ == nullptr) { + return tensorflow::errors::InvalidArgument("invalid vspace"); + } + zeros_ = PyObject_GetAttrString(py_vspace_, "zeros"); + if (zeros_ == nullptr) { + return tensorflow::errors::InvalidArgument("invalid vspace"); } - PyObject* input_ids = PyList_New(entry.input_tensor_id.size()); - for (int i = 0; i < entry.input_tensor_id.size(); ++i) { - PyObject* tid = PyLong_FromLong(entry.input_tensor_id[i]); - PyList_SET_ITEM(input_ids, i, tid); + ones_like_ = PyObject_GetAttrString(reinterpret_cast(py_vspace_), + "ones_like"); + if (ones_like_ == nullptr) { + return tensorflow::errors::InvalidArgument("invalid vspace"); } - PyObject* backward_function = - reinterpret_cast(entry.backward_function); - PyObject* output_shape_and_dtype = - PyList_New(entry.output_tensor_info.size()); - for (int i = 0; i < entry.output_tensor_info.size(); ++i) { - const tensorflow::TensorShape& shape = entry.output_tensor_info[i].shape; - PyObject* shape_list = PyList_New(shape.dims()); - for (int j = 0; j < shape.dims(); ++j) { - PyList_SET_ITEM(shape_list, j, PyLong_FromLong(shape.dim_size(j))); + return tensorflow::Status::OK(); + } + + ~PyVSpace() override { + Py_XDECREF(num_elements_); + Py_XDECREF(aggregate_fn_); + Py_XDECREF(zeros_); + Py_XDECREF(ones_like_); + } + + tensorflow::int64 NumElements(void* tensor) const final { + PyObject* arglist = + Py_BuildValue("(O)", reinterpret_cast(tensor)); + PyObject* result = PyEval_CallObject(num_elements_, arglist); + tensorflow::int64 r = MakeInt(result); + Py_DECREF(result); + Py_DECREF(arglist); + return r; + } + + void* AggregateGradients( + tensorflow::gtl::ArraySlice gradient_tensors) const final { + PyObject* list = PyList_New(gradient_tensors.size()); + for (int i = 0; i < gradient_tensors.size(); ++i) { + // Note: stealing a reference to the gradient tensors. + CHECK(gradient_tensors[i] != nullptr); + CHECK(gradient_tensors[i] != Py_None); + PyList_SET_ITEM(list, i, + reinterpret_cast(gradient_tensors[i])); + } + PyObject* arglist = Py_BuildValue("(O)", list); + CHECK(arglist != nullptr); + PyObject* result = PyEval_CallObject(aggregate_fn_, arglist); + Py_DECREF(arglist); + Py_DECREF(list); + return result; + } + + void* Zeros(tensorflow::TensorShape shape, + tensorflow::DataType dtype) const final { + PyObject* py_shape = PyTuple_New(shape.dims()); + for (int i = 0; i < shape.dims(); ++i) { + PyTuple_SET_ITEM(py_shape, i, PyLong_FromLong(shape.dim_size(i))); + } + PyObject* py_dtype = PyLong_FromLong(static_cast(dtype)); + PyObject* arg_list = Py_BuildValue("OO", py_shape, py_dtype); + PyObject* result = PyEval_CallObject(zeros_, arg_list); + Py_DECREF(arg_list); + Py_DECREF(py_dtype); + Py_DECREF(py_shape); + return reinterpret_cast(result); + } + + void* OnesLike(void* tensor) const final { + PyObject* arg_list = Py_BuildValue("(O)", tensor); + PyObject* result = PyEval_CallObject(ones_like_, arg_list); + if (result == nullptr) { + VLOG(1) << "Call to ones_like failed"; + } + Py_DECREF(arg_list); + return reinterpret_cast(result); + } + + tensorflow::int64 TensorId(void* tensor) const final { + PyObject* py_tensor = reinterpret_cast(tensor); + PyObject* id_field = PyObject_GetAttrString(py_tensor, "_id"); + tensorflow::int64 id = MakeInt(id_field); + Py_DECREF(id_field); + return id; + } + + tensorflow::Status CallBackwardFunction( + void* backward_function, + tensorflow::gtl::ArraySlice output_gradients, + std::vector* result) const final { + PyObject* grads = PyTuple_New(output_gradients.size()); + for (int i = 0; i < output_gradients.size(); ++i) { + if (output_gradients[i] == nullptr) { + Py_INCREF(Py_None); + PyTuple_SET_ITEM(grads, i, Py_None); + } else { + PyTuple_SET_ITEM(grads, i, + reinterpret_cast(output_gradients[i])); } - PyObject* type_enum = PyLong_FromLong(entry.output_tensor_info[i].dtype); - PyObject* tuple = PyTuple_Pack(2, shape_list, type_enum); - Py_DECREF(shape_list); - Py_DECREF(type_enum); - PyList_SET_ITEM(output_shape_and_dtype, i, tuple); } - PyObject* opinfo = PyTuple_Pack(5, op_type, output_ids, input_ids, - backward_function, output_shape_and_dtype); - Py_DECREF(op_type); - Py_DECREF(output_ids); - Py_DECREF(input_ids); + PyObject* py_result = PyEval_CallObject( + reinterpret_cast(backward_function), grads); + Py_DECREF(grads); Py_DECREF(backward_function); - Py_DECREF(output_shape_and_dtype); - PyDict_SetItem(op_tape, opid, opinfo); - Py_DECREF(opid); - Py_DECREF(opinfo); - } - PyObject* retval = PyTuple_Pack(2, tensor_tape, op_tape); - Py_DECREF(tensor_tape); - Py_DECREF(op_tape); - return retval; + if (py_result == nullptr) { + VLOG(1) << "Gradient function threw exceptions"; + if (VLOG_IS_ON(1)) { + PyErr_Print(); + } + return tensorflow::errors::Internal("gradient function threw exceptions"); + } + result->clear(); + PyObject* seq = + PySequence_Fast(py_result, "expected a sequence of gradients"); + if (seq == nullptr) { + return tensorflow::errors::InvalidArgument( + "gradient function did not return a list"); + } + int len = PySequence_Fast_GET_SIZE(seq); + VLOG(1) << "Gradient length is " << len; + result->reserve(len); + for (int i = 0; i < len; ++i) { + PyObject* item = PySequence_Fast_GET_ITEM(seq, i); + if (item == Py_None) { + result->push_back(nullptr); + } else { + Py_INCREF(item); + result->push_back(item); + } + } + Py_DECREF(seq); + Py_DECREF(py_result); + return tensorflow::Status::OK(); + } + + void DeleteTensor(void* tensor) const final { + Py_XDECREF(reinterpret_cast(tensor)); + } + + private: + PyObject* py_vspace_; + + PyObject* num_elements_; + PyObject* aggregate_fn_; + PyObject* zeros_; + PyObject* ones_like_; +}; + +std::vector MakeTensorList(PyObject* tensors) { + PyObject* seq = PySequence_Fast(tensors, "expected a sequence"); + if (seq == nullptr) { + return {}; + } + int len = PySequence_Fast_GET_SIZE(seq); + std::vector list; + list.reserve(len); + for (int i = 0; i < len; ++i) { + list.push_back(PySequence_Fast_GET_ITEM(seq, i)); + } + Py_DECREF(seq); + return list; +} + +PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, + PyObject* target, PyObject* sources, + PyObject* output_gradients, TF_Status* status) { + PyVSpace c_vspace(vspace); + if (!c_vspace.Initialize().ok()) { + return nullptr; + } + + std::vector target_vec = MakeTensorList(target); + if (PyErr_Occurred()) { + return nullptr; + } + std::vector sources_vec = MakeTensorList(sources); + if (PyErr_Occurred()) { + return nullptr; + } + std::vector outgrad_vec; + if (output_gradients != Py_None) { + outgrad_vec = MakeTensorList(output_gradients); + if (PyErr_Occurred()) { + return nullptr; + } + for (void* tensor : outgrad_vec) { + // Calling the backward function will eat a reference to the tensors in + // outgrad_vec, so we need to increase their reference count. + Py_INCREF(reinterpret_cast(tensor)); + } + } + TFE_Py_Tape* tape_obj = reinterpret_cast(tape); + std::vector result; + status->status = tape_obj->tape->Gradient(c_vspace, target_vec, sources_vec, + outgrad_vec, &result); + if (!status->status.ok()) { + return nullptr; + } + if (!result.empty()) { + PyObject* py_result = PyList_New(result.size()); + for (int i = 0; i < result.size(); ++i) { + if (result[i] == nullptr) { + Py_INCREF(Py_None); + result[i] = Py_None; + } + PyList_SET_ITEM(py_result, i, reinterpret_cast(result[i])); + } + return py_result; + } + Py_INCREF(Py_None); + return Py_None; } diff --git a/tensorflow/python/eager/tape.py b/tensorflow/python/eager/tape.py index c16aa8c2f7..a06f5e1a67 100644 --- a/tensorflow/python/eager/tape.py +++ b/tensorflow/python/eager/tape.py @@ -72,7 +72,7 @@ class Tape(object): True if any of the tensors is in the tape. """ return pywrap_tensorflow.TFE_Py_TapeShouldRecord( - self._tape, [x._id for x in tensors]) # pylint: disable=protected-access + self._tape, tensors) def watch(self, tensor): """Adds a tensor to the tape.""" @@ -99,16 +99,6 @@ class Tape(object): """Deletes any trace we have for this tensor.""" self._delete_tensor_id(tensor_id) - def export(self): - """Exports the internal state of this tape. - - Returns: - tensor_tape: a map from tensor_id(tensor) to - responsible for generating that tensor. - op_tape: a map from to TapeEntry for that op. - """ - return pywrap_tensorflow.TFE_Py_TapeExport(self._tape) - class _TapeStack(threading.local): diff --git a/tensorflow/python/eager/tape_test.py b/tensorflow/python/eager/tape_test.py index c97cb62125..b490bac66d 100644 --- a/tensorflow/python/eager/tape_test.py +++ b/tensorflow/python/eager/tape_test.py @@ -22,7 +22,6 @@ from __future__ import print_function from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.eager import custom_gradient -from tensorflow.python.eager import tape from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -166,25 +165,6 @@ class TapeTest(test.TestCase): g, = backprop.gradients_function(fn, [0])(t) self.assertAllEqual(g, 1.0) - def testTapeGC(self): - # TODO(apassos) figure out how to test this without using tape internal - # APIs. - tape.push_new_tape() - - def f(): - x = constant_op.constant(1.0) - tape.watch(x) - x = gradient_is_constant(x) - x = gradient_is_constant(x) - x = gradient_is_constant(x) - - f() - t = tape.pop_tape() - tensor_tape, op_tape = t.export() - self.assertEqual(len(tensor_tape), 1) # The watched tensor will remain on - # the tape - self.assertEqual(len(op_tape), 0) # No operations should remain on the tape - def testCustomGradientGraphMode(self): with context.graph_mode(), self.test_session(): diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i index 637f738fed..cbacf458a0 100644 --- a/tensorflow/python/pywrap_tfe.i +++ b/tensorflow/python/pywrap_tfe.i @@ -29,7 +29,7 @@ limitations under the License. %rename("%s") TFE_Py_TapeWatch; %rename("%s") TFE_Py_TapeDeleteTrace; %rename("%s") TFE_Py_TapeRecordOperation; -%rename("%s") TFE_Py_TapeExport; +%rename("%s") TFE_Py_TapeGradient; %rename("%s") TFE_NewContextOptions; %rename("%s") TFE_ContextOptionsSetConfig; %rename("%s") TFE_ContextOptionsSetDevicePlacementPolicy; @@ -125,7 +125,7 @@ limitations under the License. SWIG_fail; } if (EagerTensor_CheckExact(elem)) { - (*$1)[i] = EagerTensorHandle(elem); + (*$1)[i] = EagerTensor_Handle(elem); } else { SWIG_exception_fail(SWIG_TypeError, "provided list of inputs contains objects other " -- GitLab From 2f796016426ada5346089111995a0bd64ee870e8 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 8 Nov 2017 13:49:36 -0800 Subject: [PATCH 0063/1801] [XLA:GPU] Add more logging to convolution autotuning. PiperOrigin-RevId: 175057863 --- .../xla/service/gpu/convolution_thunk.cc | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc index 536b96dcf6..e79d0a4c79 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" @@ -279,6 +280,13 @@ std::vector ConvolutionThunk::GetAlgorithms( return algorithms; } +static string AlgorithmToString(const se::dnn::AlgorithmDesc& algo) { + if (algo.tensor_ops_enabled()) { + return tensorflow::strings::StrCat(algo.algo_id(), "+TC"); + } + return tensorflow::strings::StrCat(algo.algo_id()); +} + tensorflow::Status ConvolutionThunk::ConvolveWithTune( const BatchDescriptor& input_descriptor, se::DeviceMemory input_data, const FilterDescriptor& filter_descriptor, @@ -303,6 +311,8 @@ tensorflow::Status ConvolutionThunk::ConvolveWithTune( buffer_allocations.device_ordinal(), buffer_allocations.memory_allocator()); se::dnn::ProfileResult profile_result; + VLOG(3) << "Trying algorithm " << AlgorithmToString(algorithm) + << " for ConvolutionThunk: " << this; bool launch_ok = Convolve(input_descriptor, input_data, filter_descriptor, filter_data, output_descriptor, output_data, convolution_descriptor, @@ -310,6 +320,11 @@ tensorflow::Status ConvolutionThunk::ConvolveWithTune( &scratch_allocator, &profile_result) .ok(); if (launch_ok && profile_result.is_valid()) { + VLOG(3) << "Run of algorithm " << AlgorithmToString(algorithm) + << " for ConvolutionThunk " << this << " succeeded, taking " + << profile_result.elapsed_time_in_ms() + << "ms. (Best result: " << best_result.elapsed_time_in_ms() + << "ms)"; if (profile_result.elapsed_time_in_ms() < best_result.elapsed_time_in_ms()) { best_result = profile_result; @@ -319,6 +334,9 @@ tensorflow::Status ConvolutionThunk::ConvolveWithTune( best_result_without_scratch.elapsed_time_in_ms()) { best_result_without_scratch = profile_result; } + } else { + VLOG(3) << "Run of algorithm " << AlgorithmToString(algorithm) + << " for ConvolutionThunk " << this << " failed."; } } @@ -343,8 +361,8 @@ tensorflow::Status ConvolutionThunk::ConvolveWithTune( { VLOG(2) << "Using convolution algorithm (" - << best_algorithm_.algorithm().algo_id() << ", " - << best_algorithm_.algorithm_no_scratch().algo_id() + << AlgorithmToString(best_algorithm_.algorithm()) << ", " + << AlgorithmToString(best_algorithm_.algorithm_no_scratch()) << ") for ConvolutionThunk: " << this; ConvolveScratchAllocator scratch_allocator( buffer_allocations.device_ordinal(), -- GitLab From e0046de7afa46199e11bb3aef823a55dfa6a0355 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 8 Nov 2017 14:10:50 -0800 Subject: [PATCH 0064/1801] Fix typo in tensorflow/python/client/session_clusterspec_prop_test.py PiperOrigin-RevId: 175061854 --- tensorflow/python/client/session_clusterspec_prop_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/client/session_clusterspec_prop_test.py b/tensorflow/python/client/session_clusterspec_prop_test.py index b77912b4f7..28a4dd27a7 100644 --- a/tensorflow/python/client/session_clusterspec_prop_test.py +++ b/tensorflow/python/client/session_clusterspec_prop_test.py @@ -169,7 +169,7 @@ class SessionClusterSpecPropagationTest(test_util.TensorFlowTestCase): # BaseRemoteRendezvous::SameWorkerRecvDone that means the test doesn't # actually capture the motivating bug unless run on a GPU machine. # - # Example error message (before bugfix -- linebreaks added because lint): + # Example error message (before bugfix -- line breaks added because lint): # # W0718 17:14:41.521534 190121 device_mgr.cc:107] Unknown device: # /job:worker/replica:0/task:0/device:CPU:0 all devices: -- GitLab From 6c382bb5e8860eb786dd51f5af639549a468bfdf Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 8 Nov 2017 14:20:09 -0800 Subject: [PATCH 0065/1801] More idiomatic tests for defuns using variables. PiperOrigin-RevId: 175063558 --- tensorflow/python/eager/function_test.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 243efccac4..209715894e 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -62,13 +62,21 @@ class FunctionTest(test.TestCase): @function.defun def step(): def inner(): - tape.watch_variable(v) return v * v return backprop.implicit_grad(inner)()[0][0] self.assertAllEqual(step(), 2.0) + def testDefunDifferentiable(self): + v = resource_variable_ops.ResourceVariable(1.0) + + @function.defun + def f(): + return v * v + + self.assertAllEqual(backprop.implicit_grad(f)()[0][0], 2.0) + def testGraphModeCaptureVariable(self): with context.graph_mode(), self.test_session() as sess: -- GitLab From ffe3636221ff8ecf93f9f78e19edf1419e20c67d Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Wed, 8 Nov 2017 14:21:28 -0800 Subject: [PATCH 0066/1801] Run Estimator.export_savedmodel with the user's TFSession config. Estimator assumes a particular config_pb2.ConfigProto that configures the underlying session. The config is either the default one or a user-supplied one. The default config has allow_soft_placement=True, the option that allows silent placement of operations on devices with kernels when the requested device doesn't have a kernel for that operation. Estimator's train(), eval() and predict() calls run with the underlying session configured in accordance to the ConfigProto. However, export_savedmodel runs without such a configuration. This appears to be a problem when the ModeKeys.PREDICT graph has an op that was placed on GPU but doesn't have a GPU kernel. The graph works for predict(), but when export_savedmodel() is trying to restore the corresponding variable, the code fails with "no kernel for the op" error. I attempted to show that in a test. To fix this issue, I am passing the ConfigProto to the session inside export_savedmodel. An alternative conservative and ugly fix is to pass a new instance ConfigProto with only allow_soft_placement=Estimator._session_config.allow_soft_placement. Passing the whole ConfigProto feels like the right thing to do. Here's what else is in ConfigProto: https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/core/protobuf/config.proto#L280. I verified by running an internal pipeline. Here's allow_soft_placement logic: https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/core/common_runtime/placer.cc#L322. PiperOrigin-RevId: 175063803 --- .../estimator/replicate_model_fn_test.py | 5 +- tensorflow/python/estimator/estimator.py | 2 +- tensorflow/python/estimator/estimator_test.py | 66 +++++++++++++++++++ 3 files changed, 68 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py index 10b47fba5a..ce286c33b0 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py @@ -90,14 +90,11 @@ class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase): def optimizer_fn(): return optimizers.get_optimizer_instance('Adagrad', learning_rate=0.05) - # TODO(isaprykin): Switch Estimator to use allow_soft_placement=True - # during export_savedmodel and then switch this test to replicate over - # GPUs instead of CPUs. estimator = estimator_lib.Estimator( model_fn=replicate_model_fn.replicate_model_fn( estimator.model_fn, optimizer_fn, - devices=['/cpu:0', '/cpu:0', '/cpu:0']), + devices=['/gpu:0', '/gpu:1', '/gpu:2']), model_dir=estimator.model_dir, config=estimator.config, params=estimator.params) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index a730e107ba..2d036e2cfb 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -537,7 +537,7 @@ class Estimator(object): temp_export_dir = get_temp_export_dir(export_dir) # TODO(soergel): Consider whether MonitoredSession makes sense here - with tf_session.Session() as session: + with tf_session.Session(config=self._session_config) as session: saver_for_restore = estimator_spec.scaffold.saver or saver.Saver( sharded=True) diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py index 2b9b44523b..c1b773b8c4 100644 --- a/tensorflow/python/estimator/estimator_test.py +++ b/tensorflow/python/estimator/estimator_test.py @@ -50,6 +50,7 @@ from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import metrics as metrics_lib from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import state_ops +from tensorflow.python.ops import string_ops from tensorflow.python.ops import variables from tensorflow.python.ops.losses import losses from tensorflow.python.platform import gfile @@ -1910,6 +1911,71 @@ class EstimatorExportTest(test.TestCase): est.train(dummy_input_fn, steps=1) est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) + def test_export_savedmodel_respects_soft_placement(self): + def model_fn_with_a_gpu_op_but_no_kernel(features, labels, mode): + _, _ = features, labels + table = saver_test_utils.CheckpointedOp(name='v2') + + update_global_step = state_ops.assign_add(training.get_global_step(), 1) + with ops.control_dependencies([update_global_step]): + train_op = table.insert('k1', 30.0) + + # In this test, there are no GPUs available. The goal is to verify that + # export_savedmodel executes nevertheless. + with ops.device('/gpu:0'): + string_op = string_ops.as_string(update_global_step) + + with ops.control_dependencies([string_op]): + prediction = table.lookup('k1', 0.0) + + return model_fn_lib.EstimatorSpec( + mode, + predictions=prediction, + loss=constant_op.constant(1.), + train_op=train_op, + export_outputs={ + 'test': export_output.PredictOutput({ + 'prediction': prediction + }) + }) + + tmpdir = tempfile.mkdtemp() + est = estimator.Estimator( + model_fn=model_fn_with_a_gpu_op_but_no_kernel) + est.train(input_fn=dummy_input_fn, steps=1) + feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64), + 'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)} + serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( + feature_spec) + export_dir_base = os.path.join( + compat.as_bytes(tmpdir), compat.as_bytes('export')) + + export_dir = est.export_savedmodel( + export_dir_base, serving_input_receiver_fn) + + # At this point, if export_savedmodel executed with + # allow_soft_placement=True, then the GPU-assigned operation was silently + # placed on the CPU. Otherwise, an exception would have been raised + # related to the fact that the requested GPU device isn't available. + + # Expectations below assume that export_savedmodel has completed normally. + self.assertTrue(gfile.Exists(export_dir_base)) + self.assertTrue(gfile.Exists(export_dir)) + self.assertTrue(gfile.Exists(os.path.join( + compat.as_bytes(export_dir), + compat.as_bytes('saved_model.pb')))) + self.assertTrue(gfile.Exists(os.path.join( + compat.as_bytes(export_dir), + compat.as_bytes('variables')))) + self.assertTrue(gfile.Exists(os.path.join( + compat.as_bytes(export_dir), + compat.as_bytes('variables/variables.index')))) + self.assertTrue(gfile.Exists(os.path.join( + compat.as_bytes(export_dir), + compat.as_bytes('variables/variables.data-00000-of-00001')))) + + gfile.DeleteRecursively(tmpdir) + class EstimatorHookOrderingTest(test.TestCase): -- GitLab From 544b47d5702787083445d64af4d4683141c0ffc9 Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Wed, 8 Nov 2017 14:39:42 -0800 Subject: [PATCH 0067/1801] Fix tensorflow.org rendering of the example code for run_step_fn. Python code isn't indented correctly. PiperOrigin-RevId: 175067065 --- tensorflow/python/training/monitored_session.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py index af9f11bb07..1f6016a91b 100644 --- a/tensorflow/python/training/monitored_session.py +++ b/tensorflow/python/training/monitored_session.py @@ -536,6 +536,7 @@ class _MonitoredSession(object): will return True. Example usage: + ```python with tf.Graph().as_default(): c = tf.placeholder(dtypes.float32) @@ -552,6 +553,7 @@ class _MonitoredSession(object): while not session.should_stop(): a = session.run_step_fn(step_fn) ``` + Hooks interact with the `run_with_hooks()` call inside the `step_fn` as they do with a `MonitoredSession.run` call. -- GitLab From 6488286b2678dddd7c8ed534d92f228bd4c532c9 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Wed, 8 Nov 2017 14:57:30 -0800 Subject: [PATCH 0068/1801] Update tf.keras RNNs to the Keras 2.0.9 API. Does not include cuDNN layers. Additionally, fix a bug with handling of activity_regularizer in tf.layers base Layer (and add test). PiperOrigin-RevId: 175070161 --- tensorflow/python/keras/BUILD | 12 + .../keras/_impl/keras/engine/topology.py | 9 +- .../keras/_impl/keras/integration_test.py | 2 +- .../keras/_impl/keras/layers/gru_test.py | 12 +- .../keras/_impl/keras/layers/lstm_test.py | 11 +- .../keras/_impl/keras/layers/recurrent.py | 2449 +++++++++++++---- .../_impl/keras/layers/recurrent_test.py | 378 +++ .../_impl/keras/layers/simplernn_test.py | 12 +- tensorflow/python/keras/layers/__init__.py | 5 + tensorflow/python/layers/base.py | 2 +- tensorflow/python/layers/base_test.py | 7 + .../tensorflow.keras.layers.-g-r-u-cell.pbtxt | 179 ++ .../tensorflow.keras.layers.-g-r-u.pbtxt | 86 +- ...ensorflow.keras.layers.-l-s-t-m-cell.pbtxt | 179 ++ .../tensorflow.keras.layers.-l-s-t-m.pbtxt | 90 +- .../tensorflow.keras.layers.-r-n-n.pbtxt | 191 ++ ...flow.keras.layers.-simple-r-n-n-cell.pbtxt | 179 ++ ...ensorflow.keras.layers.-simple-r-n-n.pbtxt | 78 +- ...ow.keras.layers.-stacked-r-n-n-cells.pbtxt | 183 ++ .../api/golden/tensorflow.keras.layers.pbtxt | 20 + tensorflow/tools/ci_build/ci_sanity.sh | 3 +- 21 files changed, 3414 insertions(+), 673 deletions(-) create mode 100644 tensorflow/python/keras/_impl/keras/layers/recurrent_test.py create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index 4db48b45ed..6a762ee5d2 100644 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -498,6 +498,18 @@ py_test( ], ) +py_test( + name = "recurrent_test", + size = "small", + srcs = ["_impl/keras/layers/recurrent_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":keras", + "//tensorflow/python:client_testlib", + "//third_party/py/numpy", + ], +) + py_test( name = "serialization_test", size = "small", diff --git a/tensorflow/python/keras/_impl/keras/engine/topology.py b/tensorflow/python/keras/_impl/keras/engine/topology.py index f9be782f85..2bcbabf19c 100644 --- a/tensorflow/python/keras/_impl/keras/engine/topology.py +++ b/tensorflow/python/keras/_impl/keras/engine/topology.py @@ -29,6 +29,9 @@ from six.moves import zip # pylint: disable=redefined-builtin from tensorflow.python.eager import context from tensorflow.python.framework import tensor_shape from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras import constraints +from tensorflow.python.keras._impl.keras import initializers +from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.utils import conv_utils from tensorflow.python.keras._impl.keras.utils.io_utils import ask_to_proceed_with_overwrite from tensorflow.python.keras._impl.keras.utils.layer_utils import print_summary as print_layer_summary @@ -209,9 +212,9 @@ class Layer(tf_base_layers.Layer): dtype = K.floatx() weight = self.add_variable(name, shape, dtype=dtype, - initializer=initializer, - regularizer=regularizer, - constraint=constraint, + initializer=initializers.get(initializer), + regularizer=regularizers.get(regularizer), + constraint=constraints.get(constraint), trainable=trainable) return weight diff --git a/tensorflow/python/keras/_impl/keras/integration_test.py b/tensorflow/python/keras/_impl/keras/integration_test.py index 7110036848..871a8c7329 100644 --- a/tensorflow/python/keras/_impl/keras/integration_test.py +++ b/tensorflow/python/keras/_impl/keras/integration_test.py @@ -93,7 +93,7 @@ class KerasIntegrationTest(test.TestCase): y_test = keras.utils.to_categorical(y_test) model = keras.models.Sequential() - model.add(keras.layers.LSTM(3, return_sequences=True, + model.add(keras.layers.LSTM(5, return_sequences=True, input_shape=x_train.shape[1:])) model.add(keras.layers.GRU(y_train.shape[-1], activation='softmax')) model.compile(loss='categorical_crossentropy', diff --git a/tensorflow/python/keras/_impl/keras/layers/gru_test.py b/tensorflow/python/keras/_impl/keras/layers/gru_test.py index 03f0736161..c57fbac41c 100644 --- a/tensorflow/python/keras/_impl/keras/layers/gru_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/gru_test.py @@ -156,8 +156,10 @@ class GRULayerTest(test.TestCase): activity_regularizer='l1') layer.build((None, None, 2)) self.assertEqual(len(layer.losses), 3) - layer(keras.backend.variable(np.ones((2, 3, 2)))) - self.assertEqual(len(layer.losses), 4) + + x = keras.backend.variable(np.ones((2, 3, 2))) + layer(x) + self.assertEqual(len(layer.get_losses_for(x)), 1) def test_constraints_GRU(self): embedding_dim = 4 @@ -175,9 +177,9 @@ class GRULayerTest(test.TestCase): recurrent_constraint=r_constraint, bias_constraint=b_constraint) layer.build((None, None, embedding_dim)) - self.assertEqual(layer.kernel.constraint, k_constraint) - self.assertEqual(layer.recurrent_kernel.constraint, r_constraint) - self.assertEqual(layer.bias.constraint, b_constraint) + self.assertEqual(layer.cell.kernel.constraint, k_constraint) + self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint) + self.assertEqual(layer.cell.bias.constraint, b_constraint) def test_with_masking_layer_GRU(self): layer_class = keras.layers.GRU diff --git a/tensorflow/python/keras/_impl/keras/layers/lstm_test.py b/tensorflow/python/keras/_impl/keras/layers/lstm_test.py index f43d90fec8..8d359bf17c 100644 --- a/tensorflow/python/keras/_impl/keras/layers/lstm_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/lstm_test.py @@ -156,8 +156,9 @@ class LSTMLayerTest(test.TestCase): activity_regularizer='l1') layer.build((None, None, 2)) self.assertEqual(len(layer.losses), 3) - layer(keras.backend.variable(np.ones((2, 3, 2)))) - self.assertEqual(len(layer.losses), 4) + x = keras.backend.variable(np.ones((2, 3, 2))) + layer(x) + self.assertEqual(len(layer.get_losses_for(x)), 1) def test_constraints_LSTM(self): embedding_dim = 4 @@ -175,9 +176,9 @@ class LSTMLayerTest(test.TestCase): recurrent_constraint=r_constraint, bias_constraint=b_constraint) layer.build((None, None, embedding_dim)) - self.assertEqual(layer.kernel.constraint, k_constraint) - self.assertEqual(layer.recurrent_kernel.constraint, r_constraint) - self.assertEqual(layer.bias.constraint, b_constraint) + self.assertEqual(layer.cell.kernel.constraint, k_constraint) + self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint) + self.assertEqual(layer.cell.bias.constraint, b_constraint) def test_with_masking_layer_LSTM(self): layer_class = keras.layers.LSTM diff --git a/tensorflow/python/keras/_impl/keras/layers/recurrent.py b/tensorflow/python/keras/_impl/keras/layers/recurrent.py index 139523403c..2bc74d5f80 100644 --- a/tensorflow/python/keras/_impl/keras/layers/recurrent.py +++ b/tensorflow/python/keras/_impl/keras/layers/recurrent.py @@ -1,4 +1,4 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -29,99 +29,209 @@ from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer +from tensorflow.python.keras._impl.keras.utils.generic_utils import has_arg +from tensorflow.python.platform import tf_logging as logging -# pylint: disable=access-member-before-definition +class StackedRNNCells(Layer): + """Wrapper allowing a stack of RNN cells to behave as a single cell. - -def _time_distributed_dense(x, - w, - b=None, - dropout=None, - input_dim=None, - output_dim=None, - timesteps=None, - training=None): - """Apply `y . w + b` for every temporal slice y of x. + Used to implement efficient stacked RNNs. Arguments: - x: input tensor. - w: weight matrix. - b: optional bias vector. - dropout: whether to apply dropout (same dropout mask - for every temporal slice of the input). - input_dim: integer; optional dimensionality of the input. - output_dim: integer; optional dimensionality of the output. - timesteps: integer; optional number of timesteps. - training: training phase tensor or boolean. - - Returns: - Output tensor. - """ - if not input_dim: - input_dim = K.shape(x)[2] - if not timesteps: - timesteps = K.shape(x)[1] - if not output_dim: - output_dim = K.shape(w)[1] - - if dropout is not None and 0. < dropout < 1.: - # apply the same dropout pattern at every timestep - ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) - dropout_matrix = K.dropout(ones, dropout) - expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) - x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) - - # collapse time dimension and batch dimension together - x = K.reshape(x, (-1, input_dim)) - x = K.dot(x, w) - if b is not None: - x = K.bias_add(x, b) - # reshape to 3D tensor - if K.backend() == 'tensorflow': - x = K.reshape(x, K.stack([-1, timesteps, output_dim])) - x.set_shape([None, None, output_dim]) - else: - x = K.reshape(x, (-1, timesteps, output_dim)) - return x + cells: List of RNN cell instances. + Examples: -class Recurrent(Layer): - """Abstract base class for recurrent layers. + ```python + cells = [ + keras.layers.LSTMCell(output_dim), + keras.layers.LSTMCell(output_dim), + keras.layers.LSTMCell(output_dim), + ] - Do not use in a model -- it's not a valid layer! - Use its children classes `LSTM`, `GRU` and `SimpleRNN` instead. + inputs = keras.Input((timesteps, input_dim)) + x = keras.layers.RNN(cells)(inputs) + ``` + """ - All recurrent layers (`LSTM`, `GRU`, `SimpleRNN`) also - follow the specifications of this class and accept - the keyword arguments listed below. + def __init__(self, cells, **kwargs): + for cell in cells: + if not hasattr(cell, 'call'): + raise ValueError('All cells must have a `call` method. ' + 'received cells:', cells) + if not hasattr(cell, 'state_size'): + raise ValueError('All cells must have a ' + '`state_size` attribute. ' + 'received cells:', cells) + self.cells = cells + super(StackedRNNCells, self).__init__(**kwargs) + + @property + def state_size(self): + # States are a flat list + # in reverse order of the cell stack. + # This allows to preserve the requirement + # `stack.state_size[0] == output_dim`. + # e.g. states of a 2-layer LSTM would be + # `[h2, c2, h1, c1]` + # (assuming one LSTM has states [h, c]) + state_size = [] + for cell in self.cells[::-1]: + if hasattr(cell.state_size, '__len__'): + state_size += list(cell.state_size) + else: + state_size.append(cell.state_size) + return tuple(state_size) + + def call(self, inputs, states, **kwargs): + # Recover per-cell states. + nested_states = [] + for cell in self.cells[::-1]: + if hasattr(cell.state_size, '__len__'): + nested_states.append(states[:len(cell.state_size)]) + states = states[len(cell.state_size):] + else: + nested_states.append([states[0]]) + states = states[1:] + nested_states = nested_states[::-1] + + # Call the cells in order and store the returned states. + new_nested_states = [] + for cell, states in zip(self.cells, nested_states): + inputs, states = cell.call(inputs, states, **kwargs) + new_nested_states.append(states) + + # Format the new states as a flat list + # in reverse cell order. + states = [] + for cell_states in new_nested_states[::-1]: + states += cell_states + return inputs, states - Example: + def build(self, input_shape): + for cell in self.cells: + if isinstance(cell, Layer): + cell.build(input_shape) + if hasattr(cell.state_size, '__len__'): + output_dim = cell.state_size[0] + else: + output_dim = cell.state_size + input_shape = (input_shape[0], input_shape[1], output_dim) + self.built = True - ```python - # as the first layer in a Sequential model - model = Sequential() - model.add(LSTM(32, input_shape=(10, 64))) - # now model.output_shape == (None, 32) - # note: `None` is the batch dimension. - - # for subsequent layers, no need to specify the input size: - model.add(LSTM(16)) - - # to stack recurrent layers, you must use return_sequences=True - # on any recurrent layer that feeds into another recurrent layer. - # note that you only need to specify the input size on the first layer. - model = Sequential() - model.add(LSTM(64, input_dim=64, input_length=10, return_sequences=True)) - model.add(LSTM(32, return_sequences=True)) - model.add(LSTM(10)) - ``` + def get_config(self): + cells = [] + for cell in self.cells: + cells.append({ + 'class_name': cell.__class__.__name__, + 'config': cell.get_config() + }) + config = {'cells': cells} + base_config = super(StackedRNNCells, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config, custom_objects=None): + from tensorflow.python.keras._impl.keras.layers import deserialize as deserialize_layer # pylint: disable=g-import-not-at-top + cells = [] + for cell_config in config.pop('cells'): + cells.append( + deserialize_layer(cell_config, custom_objects=custom_objects)) + return cls(cells, **config) + + @property + def trainable_weights(self): + if not self.trainable: + return [] + weights = [] + for cell in self.cells: + if isinstance(cell, Layer): + weights += cell.trainable_weights + return weights + + @property + def non_trainable_weights(self): + weights = [] + for cell in self.cells: + if isinstance(cell, Layer): + weights += cell.non_trainable_weights + if not self.trainable: + trainable_weights = [] + for cell in self.cells: + if isinstance(cell, Layer): + trainable_weights += cell.trainable_weights + return trainable_weights + weights + return weights + + def get_weights(self): + """Retrieves the weights of the model. + + Returns: + A flat list of Numpy arrays. + """ + weights = [] + for cell in self.cells: + if isinstance(cell, Layer): + weights += cell.weights + return K.batch_get_value(weights) + + def set_weights(self, weights): + """Sets the weights of the model. + + Arguments: + weights: A list of Numpy arrays with shapes and types matching + the output of `model.get_weights()`. + """ + tuples = [] + for cell in self.cells: + if isinstance(cell, Layer): + num_param = len(cell.weights) + weights = weights[:num_param] + for sw, w in zip(cell.weights, weights): + tuples.append((sw, w)) + weights = weights[num_param:] + K.batch_set_value(tuples) + + @property + def losses(self): + losses = [] + for cell in self.cells: + if isinstance(cell, Layer): + cell_losses = cell.losses + losses += cell_losses + return losses + + def get_losses_for(self, inputs=None): + losses = [] + for cell in self.cells: + if isinstance(cell, Layer): + cell_losses = cell.get_losses_for(inputs) + losses += cell_losses + return losses + + +class RNN(Layer): + """Base class for recurrent layers. Arguments: - weights: list of Numpy arrays to set as initial weights. - The list should have 3 elements, of shapes: - `[(input_dim, output_dim), (output_dim, output_dim), (output_dim,)]`. - return_sequences: Boolean. Whether to return the last output + cell: A RNN cell instance. A RNN cell is a class that has: + - a `call(input_at_t, states_at_t)` method, returning + `(output_at_t, states_at_t_plus_1)`. The call method of the + cell can also take the optional argument `constants`, see + section "Note on passing external constants" below. + - a `state_size` attribute. This can be a single integer + (single state) in which case it is + the size of the recurrent state + (which should be the same as the size of the cell output). + This can also be a list/tuple of integers + (one size per state). In this case, the first entry + (`state_size[0]`) should be the same as + the size of the cell output. + It is also possible for `cell` to be a list of RNN cell instances, + in which cases the cells get stacked on after the other in the RNN, + implementing an efficient stacked RNN. + return_sequences: Boolean. Whether to return the last output. in the output sequence, or the full sequence. return_state: Boolean. Whether to return the last state in addition to the output. @@ -137,21 +247,9 @@ class Recurrent(Layer): Unrolling can speed-up a RNN, although it tends to be more memory-intensive. Unrolling is only suitable for short sequences. - implementation: one of {0, 1, or 2}. - If set to 0, the RNN will use - an implementation that uses fewer, larger matrix products, - thus running faster on CPU but consuming more memory. - If set to 1, the RNN will use more matrix products, - but smaller ones, thus running slower - (may actually be faster on GPU) while consuming less memory. - If set to 2 (LSTM/GRU only), - the RNN will combine the input gate, - the forget gate and the output gate into a single matrix, - enabling more time-efficient parallelization on the GPU. - Note: RNN dropout must be shared for all gates, - resulting in a slightly reduced regularization. input_dim: dimensionality of the input (integer). - This argument (or alternatively, the keyword argument `input_shape`) + This argument (or alternatively, + the keyword argument `input_shape`) is required when using this layer as the first layer in a model. input_length: Length of input sequences, to be specified when it is constant. @@ -163,7 +261,7 @@ class Recurrent(Layer): at the level of the first layer (e.g. via the `input_shape` argument) - Input shape:s + Input shape: 3D tensor with shape `(batch_size, timesteps, input_dim)`, (Optional) 2D tensors with shape `(batch_size, output_dim)`. @@ -178,7 +276,7 @@ class Recurrent(Layer): # Masking This layer supports masking for input data with a variable number of timesteps. To introduce masks to your data, - use an `Embedding` layer with the `mask_zero` parameter + use an [Embedding](embeddings.md) layer with the `mask_zero` parameter set to `True`. # Note on using statefulness in RNNs @@ -212,42 +310,128 @@ class Recurrent(Layer): calling `reset_states` with the keyword argument `states`. The value of `states` should be a numpy array or list of numpy arrays representing the initial state of the RNN layer. + + # Note on passing external constants to RNNs + You can pass "external" constants to the cell using the `constants` + keyword argument of `RNN.__call__` (as well as `RNN.call`) method. This + requires that the `cell.call` method accepts the same keyword argument + `constants`. Such constants can be used to condition the cell + transformation on additional static inputs (not changing over time), + a.k.a. an attention mechanism. + + Examples: + + ```python + # First, let's define a RNN Cell, as a layer subclass. + + class MinimalRNNCell(keras.layers.Layer): + + def __init__(self, units, **kwargs): + self.units = units + self.state_size = units + super(MinimalRNNCell, self).__init__(**kwargs) + + def build(self, input_shape): + self.kernel = self.add_weight(shape=(input_shape[-1], self.units), + initializer='uniform', + name='kernel') + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + initializer='uniform', + name='recurrent_kernel') + self.built = True + + def call(self, inputs, states): + prev_output = states[0] + h = K.dot(inputs, self.kernel) + output = h + K.dot(prev_output, self.recurrent_kernel) + return output, [output] + + # Let's use this cell in a RNN layer: + + cell = MinimalRNNCell(32) + x = keras.Input((None, 5)) + layer = RNN(cell) + y = layer(x) + + # Here's how to use the cell to build a stacked RNN: + + cells = [MinimalRNNCell(32), MinimalRNNCell(64)] + x = keras.Input((None, 5)) + layer = RNN(cells) + y = layer(x) + ``` """ def __init__(self, + cell, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False, - implementation=0, + activity_regularizer=None, **kwargs): - super(Recurrent, self).__init__(**kwargs) + if isinstance(cell, (list, tuple)): + cell = StackedRNNCells(cell) + if not hasattr(cell, 'call'): + raise ValueError('`cell` should have a `call` method. ' + 'The RNN was passed:', cell) + if not hasattr(cell, 'state_size'): + raise ValueError('The RNN cell should have ' + 'an attribute `state_size` ' + '(tuple of integers, ' + 'one integer per RNN state).') + super(RNN, self).__init__( + activity_regularizer=regularizers.get(activity_regularizer), **kwargs) + self.cell = cell self.return_sequences = return_sequences self.return_state = return_state self.go_backwards = go_backwards self.stateful = stateful self.unroll = unroll - self.implementation = implementation + self.supports_masking = True self.input_spec = [InputSpec(ndim=3)] self.state_spec = None - self.dropout = 0 - self.recurrent_dropout = 0 + self._states = None + self.constants_spec = None + self._num_constants = None + + @property + def states(self): + if self._states is None: + if isinstance(self.cell.state_size, int): + num_states = 1 + else: + num_states = len(self.cell.state_size) + return [None for _ in range(num_states)] + return self._states + + @states.setter + def states(self, states): + self._states = states def _compute_output_shape(self, input_shape): if isinstance(input_shape, list): input_shape = input_shape[0] input_shape = tensor_shape.TensorShape(input_shape).as_list() + + if hasattr(self.cell.state_size, '__len__'): + output_dim = self.cell.state_size[0] + else: + output_dim = self.cell.state_size + if self.return_sequences: - output_shape = (input_shape[0], input_shape[1], self.units) + output_shape = (input_shape[0], input_shape[1], output_dim) else: - output_shape = (input_shape[0], self.units) + output_shape = (input_shape[0], output_dim) if self.return_state: - state_shape = [tensor_shape.TensorShape( - (input_shape[0], self.units)) for _ in self.states] - return [tensor_shape.TensorShape(output_shape)] + state_shape + state_shape = [(input_shape[0], output_dim) for _ in self.states] + output_shape = [output_shape] + state_shape + else: + output_shape = output_shape return tensor_shape.TensorShape(output_shape) def compute_mask(self, inputs, mask): @@ -257,82 +441,123 @@ class Recurrent(Layer): if self.return_state: state_mask = [None for _ in self.states] return [output_mask] + state_mask - return output_mask + else: + return output_mask - def step(self, inputs, states): - raise NotImplementedError + def build(self, input_shape): + # Note input_shape will be list of shapes of initial states and + # constants if these are passed in __call__. + if self._num_constants is not None: + constants_shape = input_shape[-self._num_constants:] # pylint: disable=invalid-unary-operand-type + else: + constants_shape = None - def get_constants(self, inputs, training=None): - return [] + if isinstance(input_shape, list): + input_shape = input_shape[0] + input_shape = tuple(tensor_shape.TensorShape(input_shape).as_list()) + + batch_size = input_shape[0] if self.stateful else None + input_dim = input_shape[-1] + self.input_spec[0] = InputSpec(shape=(batch_size, None, input_dim)) + + # allow cell (if layer) to build before we set or validate state_spec + if isinstance(self.cell, Layer): + step_input_shape = (input_shape[0],) + input_shape[2:] + if constants_shape is not None: + self.cell.build([step_input_shape] + constants_shape) + else: + self.cell.build(step_input_shape) + + # set or validate state_spec + if hasattr(self.cell.state_size, '__len__'): + state_size = list(self.cell.state_size) + else: + state_size = [self.cell.state_size] + + if self.state_spec is not None: + # initial_state was passed in call, check compatibility + if [spec.shape[-1] for spec in self.state_spec] != state_size: + raise ValueError( + 'An initial_state was passed that is not compatible with ' + '`cell.state_size`. Received `state_spec`={}; ' + 'However `cell.state_size` is ' + '{}'.format(self.state_spec, self.cell.state_size)) + else: + self.state_spec = [InputSpec(shape=(None, dim)) for dim in state_size] + if self.stateful: + self.reset_states() def get_initial_state(self, inputs): # build an all-zero tensor of shape (samples, output_dim) initial_state = K.zeros_like(inputs) # (samples, timesteps, input_dim) initial_state = K.sum(initial_state, axis=(1, 2)) # (samples,) initial_state = K.expand_dims(initial_state) # (samples, 1) - initial_state = K.tile(initial_state, [1, - self.units]) # (samples, output_dim) - initial_state = [initial_state for _ in range(len(self.states))] - return initial_state - - def preprocess_input(self, inputs, training=None): - return inputs + if hasattr(self.cell.state_size, '__len__'): + return [K.tile(initial_state, [1, dim]) for dim in self.cell.state_size] + else: + return [K.tile(initial_state, [1, self.cell.state_size])] - def __call__(self, inputs, initial_state=None, **kwargs): - if (isinstance(inputs, (list, tuple)) and - len(inputs) > 1 - and initial_state is None): - initial_state = inputs[1:] - inputs = inputs[0] + def __call__(self, inputs, initial_state=None, constants=None, **kwargs): + inputs, initial_state, constants = self._standardize_args( + inputs, initial_state, constants) - # If `initial_state` is specified, - # and if it a Keras tensor, - # then add it to the inputs and temporarily - # modify the input spec to include the state. - if initial_state is None: - return super(Recurrent, self).__call__(inputs, **kwargs) + if initial_state is None and constants is None: + return super(RNN, self).__call__(inputs, **kwargs) - if not isinstance(initial_state, (list, tuple)): - initial_state = [initial_state] + # If any of `initial_state` or `constants` are specified and are Keras + # tensors, then add them to the inputs and temporarily modify the + # input_spec to include them. - is_keras_tensor = hasattr(initial_state[0], '_keras_history') - for tensor in initial_state: + additional_inputs = [] + additional_specs = [] + if initial_state is not None: + kwargs['initial_state'] = initial_state + additional_inputs += initial_state + self.state_spec = [ + InputSpec(shape=K.int_shape(state)) for state in initial_state + ] + additional_specs += self.state_spec + if constants is not None: + kwargs['constants'] = constants + additional_inputs += constants + self.constants_spec = [ + InputSpec(shape=K.int_shape(constant)) for constant in constants + ] + self._num_constants = len(constants) + additional_specs += self.constants_spec + # at this point additional_inputs cannot be empty + is_keras_tensor = hasattr(additional_inputs[0], '_keras_history') + for tensor in additional_inputs: if hasattr(tensor, '_keras_history') != is_keras_tensor: - raise ValueError('The initial state of an RNN layer cannot be' - ' specified with a mix of Keras tensors and' - ' non-Keras tensors') + raise ValueError('The initial state or constants of an RNN' + ' layer cannot be specified with a mix of' + ' Keras tensors and non-Keras tensors') if is_keras_tensor: - # Compute the full input spec, including state - input_spec = self.input_spec - state_spec = self.state_spec - if not isinstance(input_spec, list): - input_spec = [input_spec] - if not isinstance(state_spec, list): - state_spec = [state_spec] - self.input_spec = input_spec + state_spec - - # Compute the full inputs, including state - inputs = [inputs] + list(initial_state) - - # Perform the call - output = super(Recurrent, self).__call__(inputs, **kwargs) - - # Restore original input spec - self.input_spec = input_spec + # Compute the full input spec, including state and constants + full_input = [inputs] + additional_inputs + full_input_spec = self.input_spec + additional_specs + # Perform the call with temporarily replaced input_spec + original_input_spec = self.input_spec + self.input_spec = full_input_spec + output = super(RNN, self).__call__(full_input, **kwargs) + self.input_spec = original_input_spec return output else: - kwargs['initial_state'] = initial_state - return super(Recurrent, self).__call__(inputs, **kwargs) - - def call(self, inputs, mask=None, training=None, initial_state=None): + return super(RNN, self).__call__(inputs, **kwargs) + + def call(self, + inputs, + mask=None, + training=None, + initial_state=None, + constants=None): # input shape: `(samples, time (padded with zeros), input_dim)` # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if isinstance(inputs, list): - initial_state = inputs[1:] inputs = inputs[0] - elif initial_state is not None: + if initial_state is not None: pass elif self.stateful: initial_state = self.states @@ -343,13 +568,14 @@ class Recurrent(Layer): mask = mask[0] if len(initial_state) != len(self.states): - raise ValueError('Layer has ' + str(len(self.states)) + - ' states but was passed ' + str(len(initial_state)) + - ' initial states.') + raise ValueError( + 'Layer has ' + str(len(self.states)) + ' states but was passed ' + + str(len(initial_state)) + ' initial states.') input_shape = K.int_shape(inputs) - if self.unroll and input_shape[1] is None: + timesteps = input_shape[1] + if self.unroll and timesteps in [None, 1]: raise ValueError('Cannot unroll a RNN if the ' - 'time dimension is undefined. \n' + 'time dimension is undefined or equal to 1. \n' '- If using a Sequential model, ' 'specify the time dimension by passing ' 'an `input_shape` or `batch_input_shape` ' @@ -359,15 +585,31 @@ class Recurrent(Layer): '- If using the functional API, specify ' 'the time dimension by passing a `shape` ' 'or `batch_shape` argument to your Input layer.') - constants = self.get_constants(inputs, training=None) - preprocessed_input = self.preprocess_input(inputs, training=None) + + kwargs = {} + if has_arg(self.cell.call, 'training'): + kwargs['training'] = training + + if constants: + if not has_arg(self.cell.call, 'constants'): + raise ValueError('RNN cell does not support constants') + + def step(inputs, states): + constants = states[-self._num_constants:] # pylint: disable=invalid-unary-operand-type + states = states[:-self._num_constants] # pylint: disable=invalid-unary-operand-type + return self.cell.call(inputs, states, constants=constants, **kwargs) + else: + + def step(inputs, states): + return self.cell.call(inputs, states, **kwargs) + last_output, outputs, states = K.rnn( - self.step, - preprocessed_input, + step, + inputs, initial_state, + constants=constants, go_backwards=self.go_backwards, mask=mask, - constants=constants, unroll=self.unroll) if self.stateful: updates = [] @@ -375,21 +617,63 @@ class Recurrent(Layer): updates.append((self.states[i], states[i])) self.add_update(updates, inputs) - # Properly set learning phase - if 0 < self.dropout + self.recurrent_dropout: - last_output._uses_learning_phase = True - outputs._uses_learning_phase = True + if self.return_sequences: + output = outputs + else: + output = last_output - if not self.return_sequences: - outputs = last_output + # Properly set learning phase + if getattr(last_output, '_uses_learning_phase', False): + output._uses_learning_phase = True if self.return_state: if not isinstance(states, (list, tuple)): states = [states] else: states = list(states) - return [outputs] + states - return outputs + return [output] + states + else: + return output + + def _standardize_args(self, inputs, initial_state, constants): + """Standardize `__call__` arguments to a single list of tensor inputs. + + When running a model loaded from file, the input tensors + `initial_state` and `constants` can be passed to `RNN.__call__` as part + of `inputs` instead of by the dedicated keyword arguments. This method + makes sure the arguments are separated and that `initial_state` and + `constants` are lists of tensors (or None). + + Arguments: + inputs: tensor or list/tuple of tensors + initial_state: tensor or list of tensors or None + constants: tensor or list of tensors or None + + Returns: + inputs: tensor + initial_state: list of tensors or None + constants: list of tensors or None + """ + if isinstance(inputs, list): + assert initial_state is None and constants is None + if self._num_constants is not None: + constants = inputs[-self._num_constants:] # pylint: disable=invalid-unary-operand-type + inputs = inputs[:-self._num_constants] # pylint: disable=invalid-unary-operand-type + if len(inputs) > 1: + initial_state = inputs[1:] + inputs = inputs[0] + + def to_list_or_none(x): + if x is None or isinstance(x, list): + return x + if isinstance(x, tuple): + return list(x) + return [x] + + initial_state = to_list_or_none(initial_state) + constants = to_list_or_none(constants) + + return inputs, initial_state, constants def reset_states(self, states=None): if not self.stateful: @@ -408,10 +692,19 @@ class Recurrent(Layer): '`batch_shape` argument to your Input layer.') # initialize state if None if self.states[0] is None: - self.states = [K.zeros((batch_size, self.units)) for _ in self.states] + if hasattr(self.cell.state_size, '__len__'): + self.states = [ + K.zeros((batch_size, dim)) for dim in self.cell.state_size + ] + else: + self.states = [K.zeros((batch_size, self.cell.state_size))] elif states is None: - for state in self.states: - K.set_value(state, np.zeros((batch_size, self.units))) + if hasattr(self.cell.state_size, '__len__'): + for state, dim in zip(self.states, self.cell.state_size): + K.set_value(state, np.zeros((batch_size, dim))) + else: + K.set_value(self.states[0], np.zeros((batch_size, + self.cell.state_size))) else: if not isinstance(states, (list, tuple)): states = [states] @@ -421,11 +714,16 @@ class Recurrent(Layer): 'but it received ' + str(len(states)) + ' state values. Input received: ' + str(states)) for index, (value, state) in enumerate(zip(states, self.states)): - if value.shape != (batch_size, self.units): - raise ValueError('State ' + str(index) + - ' is incompatible with layer ' + self.name + - ': expected shape=' + str((batch_size, self.units)) + - ', found shape=' + str(value.shape)) + if hasattr(self.cell.state_size, '__len__'): + dim = self.cell.state_size[index] + else: + dim = self.cell.state_size + if value.shape != (batch_size, dim): + raise ValueError( + 'State ' + str(index) + ' is incompatible with layer ' + + self.name + ': expected shape=' + str( + (batch_size, dim)) + ', found shape=' + str(value.shape)) + # TODO(fchollet): consider batch calls to `set_value`. K.set_value(state, value) def get_config(self): @@ -434,51 +732,94 @@ class Recurrent(Layer): 'return_state': self.return_state, 'go_backwards': self.go_backwards, 'stateful': self.stateful, - 'unroll': self.unroll, - 'implementation': self.implementation + 'unroll': self.unroll } - base_config = super(Recurrent, self).get_config() + if self._num_constants is not None: + config['num_constants'] = self._num_constants + + cell_config = self.cell.get_config() + config['cell'] = { + 'class_name': self.cell.__class__.__name__, + 'config': cell_config + } + base_config = super(RNN, self).get_config() return dict(list(base_config.items()) + list(config.items())) + @classmethod + def from_config(cls, config, custom_objects=None): + from tensorflow.python.keras._impl.keras.layers import deserialize as deserialize_layer # pylint: disable=g-import-not-at-top + cell = deserialize_layer(config.pop('cell'), custom_objects=custom_objects) + num_constants = config.pop('num_constants', None) + layer = cls(cell, **config) + layer._num_constants = num_constants + return layer + + @property + def trainable_weights(self): + if isinstance(self.cell, Layer): + return self.cell.trainable_weights + return [] + + @property + def non_trainable_weights(self): + if isinstance(self.cell, Layer): + return self.cell.non_trainable_weights + return [] -class SimpleRNN(Recurrent): - """Fully-connected RNN where the output is to be fed back to input. + @property + def losses(self): + if isinstance(self.cell, Layer): + return self.cell.losses + return [] + + def get_losses_for(self, inputs=None): + if isinstance(self.cell, Layer): + cell_losses = self.cell.get_losses_for(inputs) + return cell_losses + super(RNN, self).get_losses_for(inputs) + return super(RNN, self).get_losses_for(inputs) + + +class SimpleRNNCell(Layer): + """Cell class for SimpleRNN. Arguments: units: Positive integer, dimensionality of the output space. - activation: Activation function to use. - If you don't specify anything, no activation is applied + activation: Activation function to use + (see [activations](../activations.md)). If you pass None, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs.. + used for the linear transformation of the inputs. + (see [initializers](../initializers.md)). recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, - used for the linear transformation of the recurrent state.. - bias_initializer: Initializer for the bias vector. + used for the linear transformation of the recurrent state. + (see [initializers](../initializers.md)). + bias_initializer: Initializer for the bias vector + (see [initializers](../initializers.md)). kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix. + the `kernel` weights matrix + (see [regularizer](../regularizers.md)). recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix. - bias_regularizer: Regularizer function applied to the bias vector. - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation").. + the `recurrent_kernel` weights matrix + (see [regularizer](../regularizers.md)). + bias_regularizer: Regularizer function applied to the bias vector + (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to - the `kernel` weights matrix. + the `kernel` weights matrix + (see [constraints](../constraints.md)). recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix. - bias_constraint: Constraint function applied to the bias vector. + the `recurrent_kernel` weights matrix + (see [constraints](../constraints.md)). + bias_constraint: Constraint function applied to the bias vector + (see [constraints](../constraints.md)). dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. - - References: - - [A Theoretically Grounded Application of Dropout in Recurrent Neural - Networks](http://arxiv.org/abs/1512.05287) """ def __init__(self, @@ -491,15 +832,13 @@ class SimpleRNN(Recurrent): kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, - activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., **kwargs): - super(SimpleRNN, self).__init__( - activity_regularizer=regularizers.get(activity_regularizer), **kwargs) + super(SimpleRNNCell, self).__init__(**kwargs) self.units = units self.activation = activations.get(activation) self.use_bias = use_bias @@ -518,23 +857,13 @@ class SimpleRNN(Recurrent): self.dropout = min(1., max(0., dropout)) self.recurrent_dropout = min(1., max(0., recurrent_dropout)) - self.state_spec = InputSpec(shape=(None, self.units)) + self.state_size = self.units + self._dropout_mask = None + self._recurrent_dropout_mask = None def build(self, input_shape): - if isinstance(input_shape, list): - input_shape = input_shape[0] - input_shape = tensor_shape.TensorShape(input_shape).as_list() - - batch_size = input_shape[0] if self.stateful else None - self.input_dim = input_shape[2] - self.input_spec[0] = InputSpec(shape=(batch_size, None, self.input_dim)) - - self.states = [None] - if self.stateful: - self.reset_states() - self.kernel = self.add_weight( - shape=(self.input_dim, self.units), + shape=(input_shape[-1], self.units), name='kernel', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, @@ -556,152 +885,121 @@ class SimpleRNN(Recurrent): self.bias = None self.built = True - def preprocess_input(self, inputs, training=None): - if self.implementation > 0: - return inputs - else: - input_shape = inputs.get_shape().as_list() - input_dim = input_shape[2] - timesteps = input_shape[1] - return _time_distributed_dense( - inputs, - self.kernel, - self.bias, - self.dropout, - input_dim, - self.units, - timesteps, - training=training) + def _generate_dropout_mask(self, inputs, training=None): + if 0 < self.dropout < 1: + ones = K.ones_like(K.squeeze(inputs[:, 0:1, :], axis=1)) - def step(self, inputs, states): - if self.implementation == 0: - h = inputs + def dropped_inputs(): + return K.dropout(ones, self.dropout) + + self._dropout_mask = K.in_train_phase( + dropped_inputs, ones, training=training) else: - if 0 < self.dropout < 1: - h = K.dot(inputs * states[1], self.kernel) - else: - h = K.dot(inputs, self.kernel) - if self.bias is not None: - h = K.bias_add(h, self.bias) + self._dropout_mask = None - prev_output = states[0] + def _generate_recurrent_dropout_mask(self, inputs, training=None): if 0 < self.recurrent_dropout < 1: - prev_output *= states[2] - output = h + K.dot(prev_output, self.recurrent_kernel) - if self.activation is not None: - output = self.activation(output) - - # Properly set learning phase on output tensor. - if 0 < self.dropout + self.recurrent_dropout: - output._uses_learning_phase = True - return output, [output] - - def get_constants(self, inputs, training=None): - constants = [] - if self.implementation != 0 and 0 < self.dropout < 1: - input_shape = K.int_shape(inputs) - input_dim = input_shape[-1] ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) - ones = K.tile(ones, (1, int(input_dim))) + ones = K.tile(ones, (1, self.units)) def dropped_inputs(): return K.dropout(ones, self.dropout) - dp_mask = K.in_train_phase(dropped_inputs, ones, training=training) - constants.append(dp_mask) + self._recurrent_dropout_mask = K.in_train_phase( + dropped_inputs, ones, training=training) else: - constants.append(K.cast_to_floatx(1.)) - - if 0 < self.recurrent_dropout < 1: - ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) - ones = K.tile(ones, (1, self.units)) + self._recurrent_dropout_mask = None - def dropped_inputs(): # pylint: disable=function-redefined - return K.dropout(ones, self.recurrent_dropout) + def call(self, inputs, states, training=None): + prev_output = states[0] + dp_mask = self._dropout_mask + rec_dp_mask = self._recurrent_dropout_mask - rec_dp_mask = K.in_train_phase(dropped_inputs, ones, training=training) - constants.append(rec_dp_mask) + if dp_mask is not None: + h = K.dot(inputs * dp_mask, self.kernel) else: - constants.append(K.cast_to_floatx(1.)) - return constants + h = K.dot(inputs, self.kernel) + if self.bias is not None: + h = K.bias_add(h, self.bias) - def get_config(self): - config = { - 'units': self.units, - 'activation': activations.serialize(self.activation), - 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'dropout': self.dropout, - 'recurrent_dropout': self.recurrent_dropout - } - base_config = super(SimpleRNN, self).get_config() - return dict(list(base_config.items()) + list(config.items())) + if rec_dp_mask is not None: + prev_output *= rec_dp_mask + output = h + K.dot(prev_output, self.recurrent_kernel) + if self.activation is not None: + output = self.activation(output) + # Properly set learning phase on output tensor. + if 0 < self.dropout + self.recurrent_dropout: + if training is None: + output._uses_learning_phase = True + return output, [output] -class GRU(Recurrent): - """Gated Recurrent Unit - Cho et al. - 2014. +class SimpleRNN(RNN): + """Fully-connected RNN where the output is to be fed back to input. Arguments: units: Positive integer, dimensionality of the output space. - activation: Activation function to use. + activation: Activation function to use + (see [activations](../activations.md)). If you pass None, no activation is applied (ie. "linear" activation: `a(x) = x`). - recurrent_activation: Activation function to use - for the recurrent step. use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs.. + used for the linear transformation of the inputs. + (see [initializers](../initializers.md)). recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, - used for the linear transformation of the recurrent state.. - bias_initializer: Initializer for the bias vector. + used for the linear transformation of the recurrent state. + (see [initializers](../initializers.md)). + bias_initializer: Initializer for the bias vector + (see [initializers](../initializers.md)). kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix. + the `kernel` weights matrix + (see [regularizer](../regularizers.md)). recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix. - bias_regularizer: Regularizer function applied to the bias vector. + the `recurrent_kernel` weights matrix + (see [regularizer](../regularizers.md)). + bias_regularizer: Regularizer function applied to the bias vector + (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to - the output of the layer (its "activation").. + the output of the layer (its "activation"). + (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to - the `kernel` weights matrix. + the `kernel` weights matrix + (see [constraints](../constraints.md)). recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix. - bias_constraint: Constraint function applied to the bias vector. + the `recurrent_kernel` weights matrix + (see [constraints](../constraints.md)). + bias_constraint: Constraint function applied to the bias vector + (see [constraints](../constraints.md)). dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. - - References: - - [On the Properties of Neural Machine Translation: Encoder-Decoder - Approaches](https://arxiv.org/abs/1409.1259) - - [Empirical Evaluation of Gated Recurrent Neural Networks on Sequence - Modeling](http://arxiv.org/abs/1412.3555v1) - - [A Theoretically Grounded Application of Dropout in Recurrent Neural - Networks](http://arxiv.org/abs/1512.05287) - """ + return_sequences: Boolean. Whether to return the last output. + in the output sequence, or the full sequence. + return_state: Boolean. Whether to return the last state + in addition to the output. + go_backwards: Boolean (default False). + If True, process the input sequence backwards and return the + reversed sequence. + stateful: Boolean (default False). If True, the last state + for each sample at index i in a batch will be used as initial + state for the sample of index i in the following batch. + unroll: Boolean (default False). + If True, the network will be unrolled, + else a symbolic loop will be used. + Unrolling can speed-up a RNN, + although it tends to be more memory-intensive. + Unrolling is only suitable for short sequences. + """ def __init__(self, units, activation='tanh', - recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', @@ -715,9 +1013,220 @@ class GRU(Recurrent): bias_constraint=None, dropout=0., recurrent_dropout=0., + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + unroll=False, **kwargs): - super(GRU, self).__init__( - activity_regularizer=regularizers.get(activity_regularizer), **kwargs) + if 'implementation' in kwargs: + kwargs.pop('implementation') + logging.warning('The `implementation` argument ' + 'in `SimpleRNN` has been deprecated. ' + 'Please remove it from your layer call.') + cell = SimpleRNNCell( + units, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + recurrent_initializer=recurrent_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + recurrent_regularizer=recurrent_regularizer, + bias_regularizer=bias_regularizer, + kernel_constraint=kernel_constraint, + recurrent_constraint=recurrent_constraint, + bias_constraint=bias_constraint, + dropout=dropout, + recurrent_dropout=recurrent_dropout) + super(SimpleRNN, self).__init__( + cell, + return_sequences=return_sequences, + return_state=return_state, + go_backwards=go_backwards, + stateful=stateful, + unroll=unroll, + activity_regularizer=regularizers.get(activity_regularizer), + **kwargs) + # self.activity_regularizer = regularizers.get(activity_regularizer) + + def call(self, inputs, mask=None, training=None, initial_state=None): + self.cell._generate_dropout_mask(inputs, training=training) + self.cell._generate_recurrent_dropout_mask(inputs, training=training) + return super(SimpleRNN, self).call( + inputs, mask=mask, training=training, initial_state=initial_state) + + @property + def units(self): + return self.cell.units + + @property + def activation(self): + return self.cell.activation + + @property + def use_bias(self): + return self.cell.use_bias + + @property + def kernel_initializer(self): + return self.cell.kernel_initializer + + @property + def recurrent_initializer(self): + return self.cell.recurrent_initializer + + @property + def bias_initializer(self): + return self.cell.bias_initializer + + @property + def kernel_regularizer(self): + return self.cell.kernel_regularizer + + @property + def recurrent_regularizer(self): + return self.cell.recurrent_regularizer + + @property + def bias_regularizer(self): + return self.cell.bias_regularizer + + @property + def kernel_constraint(self): + return self.cell.kernel_constraint + + @property + def recurrent_constraint(self): + return self.cell.recurrent_constraint + + @property + def bias_constraint(self): + return self.cell.bias_constraint + + @property + def dropout(self): + return self.cell.dropout + + @property + def recurrent_dropout(self): + return self.cell.recurrent_dropout + + def get_config(self): + config = { + 'units': + self.units, + 'activation': + activations.serialize(self.activation), + 'use_bias': + self.use_bias, + 'kernel_initializer': + initializers.serialize(self.kernel_initializer), + 'recurrent_initializer': + initializers.serialize(self.recurrent_initializer), + 'bias_initializer': + initializers.serialize(self.bias_initializer), + 'kernel_regularizer': + regularizers.serialize(self.kernel_regularizer), + 'recurrent_regularizer': + regularizers.serialize(self.recurrent_regularizer), + 'bias_regularizer': + regularizers.serialize(self.bias_regularizer), + 'activity_regularizer': + regularizers.serialize(self.activity_regularizer), + 'kernel_constraint': + constraints.serialize(self.kernel_constraint), + 'recurrent_constraint': + constraints.serialize(self.recurrent_constraint), + 'bias_constraint': + constraints.serialize(self.bias_constraint), + 'dropout': + self.dropout, + 'recurrent_dropout': + self.recurrent_dropout + } + base_config = super(SimpleRNN, self).get_config() + del base_config['cell'] + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + if 'implementation' in config: + config.pop('implementation') + return cls(**config) + + +class GRUCell(Layer): + """Cell class for the GRU layer. + + Arguments: + units: Positive integer, dimensionality of the output space. + activation: Activation function to use + (see [activations](../activations.md)). + If you pass None, no activation is applied + (ie. "linear" activation: `a(x) = x`). + recurrent_activation: Activation function to use + for the recurrent step + (see [activations](../activations.md)). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, + used for the linear transformation of the inputs. + (see [initializers](../initializers.md)). + recurrent_initializer: Initializer for the `recurrent_kernel` + weights matrix, + used for the linear transformation of the recurrent state. + (see [initializers](../initializers.md)). + bias_initializer: Initializer for the bias vector + (see [initializers](../initializers.md)). + kernel_regularizer: Regularizer function applied to + the `kernel` weights matrix + (see [regularizer](../regularizers.md)). + recurrent_regularizer: Regularizer function applied to + the `recurrent_kernel` weights matrix + (see [regularizer](../regularizers.md)). + bias_regularizer: Regularizer function applied to the bias vector + (see [regularizer](../regularizers.md)). + kernel_constraint: Constraint function applied to + the `kernel` weights matrix + (see [constraints](../constraints.md)). + recurrent_constraint: Constraint function applied to + the `recurrent_kernel` weights matrix + (see [constraints](../constraints.md)). + bias_constraint: Constraint function applied to the bias vector + (see [constraints](../constraints.md)). + dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the inputs. + recurrent_dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the recurrent state. + implementation: Implementation mode, either 1 or 2. + Mode 1 will structure its operations as a larger number of + smaller dot products and additions, whereas mode 2 will + batch them into fewer, larger operations. These modes will + have different performance profiles on different hardware and + for different applications. + """ + + def __init__(self, + units, + activation='tanh', + recurrent_activation='hard_sigmoid', + use_bias=True, + kernel_initializer='glorot_uniform', + recurrent_initializer='orthogonal', + bias_initializer='zeros', + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0., + recurrent_dropout=0., + implementation=1, + **kwargs): + super(GRUCell, self).__init__(**kwargs) self.units = units self.activation = activations.get(activation) self.recurrent_activation = activations.get(recurrent_activation) @@ -737,22 +1246,15 @@ class GRU(Recurrent): self.dropout = min(1., max(0., dropout)) self.recurrent_dropout = min(1., max(0., recurrent_dropout)) - self.state_spec = InputSpec(shape=(None, self.units)) + self.implementation = implementation + self.state_size = self.units + self._dropout_mask = None + self._recurrent_dropout_mask = None def build(self, input_shape): - if isinstance(input_shape, list): - input_shape = input_shape[0] - input_shape = tensor_shape.TensorShape(input_shape).as_list() - batch_size = input_shape[0] if self.stateful else None - self.input_dim = input_shape[2] - self.input_spec[0] = InputSpec(shape=(batch_size, None, self.input_dim)) - - self.states = [None] - if self.stateful: - self.reset_states() - + input_dim = input_shape[-1] self.kernel = self.add_weight( - shape=(self.input_dim, self.units * 3), + shape=(input_dim, self.units * 3), name='kernel', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, @@ -792,89 +1294,83 @@ class GRU(Recurrent): self.bias_h = None self.built = True - def preprocess_input(self, inputs, training=None): - if self.implementation == 0: - input_shape = inputs.get_shape().as_list() - input_dim = input_shape[2] - timesteps = input_shape[1] - - x_z = _time_distributed_dense( - inputs, - self.kernel_z, - self.bias_z, - self.dropout, - input_dim, - self.units, - timesteps, - training=training) - x_r = _time_distributed_dense( - inputs, - self.kernel_r, - self.bias_r, - self.dropout, - input_dim, - self.units, - timesteps, - training=training) - x_h = _time_distributed_dense( - inputs, - self.kernel_h, - self.bias_h, - self.dropout, - input_dim, - self.units, - timesteps, - training=training) - return K.concatenate([x_z, x_r, x_h], axis=2) - else: - return inputs - - def get_constants(self, inputs, training=None): - constants = [] - if self.implementation != 0 and 0 < self.dropout < 1: - input_shape = K.int_shape(inputs) - input_dim = input_shape[-1] - ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) - ones = K.tile(ones, (1, int(input_dim))) + def _generate_dropout_mask(self, inputs, training=None): + if 0 < self.dropout < 1: + ones = K.ones_like(K.squeeze(inputs[:, 0:1, :], axis=1)) def dropped_inputs(): return K.dropout(ones, self.dropout) - dp_mask = [ + self._dropout_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(3) ] - constants.append(dp_mask) else: - constants.append([K.cast_to_floatx(1.) for _ in range(3)]) + self._dropout_mask = None + def _generate_recurrent_dropout_mask(self, inputs, training=None): if 0 < self.recurrent_dropout < 1: ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, self.units)) - def dropped_inputs(): # pylint: disable=function-redefined - return K.dropout(ones, self.recurrent_dropout) + def dropped_inputs(): + return K.dropout(ones, self.dropout) - rec_dp_mask = [ + self._recurrent_dropout_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(3) ] - constants.append(rec_dp_mask) else: - constants.append([K.cast_to_floatx(1.) for _ in range(3)]) - return constants + self._recurrent_dropout_mask = None - def step(self, inputs, states): + def call(self, inputs, states, training=None): h_tm1 = states[0] # previous memory - dp_mask = states[1] # dropout matrices for recurrent units - rec_dp_mask = states[2] - if self.implementation == 2: - matrix_x = K.dot(inputs * dp_mask[0], self.kernel) + # dropout matrices for input units + dp_mask = self._dropout_mask + # dropout matrices for recurrent units + rec_dp_mask = self._recurrent_dropout_mask + + if self.implementation == 1: + if 0. < self.dropout < 1.: + inputs_z = inputs * dp_mask[0] + inputs_r = inputs * dp_mask[1] + inputs_h = inputs * dp_mask[2] + else: + inputs_z = inputs + inputs_r = inputs + inputs_h = inputs + x_z = K.dot(inputs_z, self.kernel_z) + x_r = K.dot(inputs_r, self.kernel_r) + x_h = K.dot(inputs_h, self.kernel_h) + if self.use_bias: + x_z = K.bias_add(x_z, self.bias_z) + x_r = K.bias_add(x_r, self.bias_r) + x_h = K.bias_add(x_h, self.bias_h) + + if 0. < self.recurrent_dropout < 1.: + h_tm1_z = h_tm1 * rec_dp_mask[0] + h_tm1_r = h_tm1 * rec_dp_mask[1] + h_tm1_h = h_tm1 * rec_dp_mask[2] + else: + h_tm1_z = h_tm1 + h_tm1_r = h_tm1 + h_tm1_h = h_tm1 + z = self.recurrent_activation( + x_z + K.dot(h_tm1_z, self.recurrent_kernel_z)) + r = self.recurrent_activation( + x_r + K.dot(h_tm1_r, self.recurrent_kernel_r)) + + hh = self.activation(x_h + K.dot(r * h_tm1_h, self.recurrent_kernel_h)) + else: + if 0. < self.dropout < 1.: + inputs *= dp_mask[0] + matrix_x = K.dot(inputs, self.kernel) if self.use_bias: matrix_x = K.bias_add(matrix_x, self.bias) - matrix_inner = K.dot(h_tm1 * rec_dp_mask[0], - self.recurrent_kernel[:, :2 * self.units]) + if 0. < self.recurrent_dropout < 1.: + h_tm1 *= rec_dp_mask[0] + matrix_inner = K.dot(h_tm1, self.recurrent_kernel[:, :2 * self.units]) x_z = matrix_x[:, :self.units] x_r = matrix_x[:, self.units:2 * self.units] @@ -885,116 +1381,323 @@ class GRU(Recurrent): r = self.recurrent_activation(x_r + recurrent_r) x_h = matrix_x[:, 2 * self.units:] - recurrent_h = K.dot(r * h_tm1 * rec_dp_mask[0], - self.recurrent_kernel[:, 2 * self.units:]) + recurrent_h = K.dot(r * h_tm1, self.recurrent_kernel[:, 2 * self.units:]) hh = self.activation(x_h + recurrent_h) - else: - if self.implementation == 0: - x_z = inputs[:, :self.units] - x_r = inputs[:, self.units:2 * self.units] - x_h = inputs[:, 2 * self.units:] - elif self.implementation == 1: - x_z = K.dot(inputs * dp_mask[0], self.kernel_z) - x_r = K.dot(inputs * dp_mask[1], self.kernel_r) - x_h = K.dot(inputs * dp_mask[2], self.kernel_h) - if self.use_bias: - x_z = K.bias_add(x_z, self.bias_z) - x_r = K.bias_add(x_r, self.bias_r) - x_h = K.bias_add(x_h, self.bias_h) - else: - raise ValueError('Unknown `implementation` mode.') - z = self.recurrent_activation(x_z + K.dot(h_tm1 * rec_dp_mask[0], - self.recurrent_kernel_z)) - r = self.recurrent_activation(x_r + K.dot(h_tm1 * rec_dp_mask[1], - self.recurrent_kernel_r)) - - hh = self.activation(x_h + K.dot(r * h_tm1 * rec_dp_mask[2], - self.recurrent_kernel_h)) h = z * h_tm1 + (1 - z) * hh if 0 < self.dropout + self.recurrent_dropout: - h._uses_learning_phase = True + if training is None: + h._uses_learning_phase = True return h, [h] + +class GRU(RNN): + # pylint: disable=line-too-long + """Gated Recurrent Unit - Cho et al. + + 2014. + + Arguments: + units: Positive integer, dimensionality of the output space. + activation: Activation function to use + (see [activations](../activations.md)). + If you pass None, no activation is applied + (ie. "linear" activation: `a(x) = x`). + recurrent_activation: Activation function to use + for the recurrent step + (see [activations](../activations.md)). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, + used for the linear transformation of the inputs. + (see [initializers](../initializers.md)). + recurrent_initializer: Initializer for the `recurrent_kernel` + weights matrix, + used for the linear transformation of the recurrent state. + (see [initializers](../initializers.md)). + bias_initializer: Initializer for the bias vector + (see [initializers](../initializers.md)). + kernel_regularizer: Regularizer function applied to + the `kernel` weights matrix + (see [regularizer](../regularizers.md)). + recurrent_regularizer: Regularizer function applied to + the `recurrent_kernel` weights matrix + (see [regularizer](../regularizers.md)). + bias_regularizer: Regularizer function applied to the bias vector + (see [regularizer](../regularizers.md)). + activity_regularizer: Regularizer function applied to + the output of the layer (its "activation"). + (see [regularizer](../regularizers.md)). + kernel_constraint: Constraint function applied to + the `kernel` weights matrix + (see [constraints](../constraints.md)). + recurrent_constraint: Constraint function applied to + the `recurrent_kernel` weights matrix + (see [constraints](../constraints.md)). + bias_constraint: Constraint function applied to the bias vector + (see [constraints](../constraints.md)). + dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the inputs. + recurrent_dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the recurrent state. + implementation: Implementation mode, either 1 or 2. + Mode 1 will structure its operations as a larger number of + smaller dot products and additions, whereas mode 2 will + batch them into fewer, larger operations. These modes will + have different performance profiles on different hardware and + for different applications. + return_sequences: Boolean. Whether to return the last output. + in the output sequence, or the full sequence. + return_state: Boolean. Whether to return the last state + in addition to the output. + go_backwards: Boolean (default False). + If True, process the input sequence backwards and return the + reversed sequence. + stateful: Boolean (default False). If True, the last state + for each sample at index i in a batch will be used as initial + state for the sample of index i in the following batch. + unroll: Boolean (default False). + If True, the network will be unrolled, + else a symbolic loop will be used. + Unrolling can speed-up a RNN, + although it tends to be more memory-intensive. + Unrolling is only suitable for short sequences. + + References: + - [On the Properties of Neural Machine Translation: Encoder-Decoder Approaches](https://arxiv.org/abs/1409.1259) + - [Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling](http://arxiv.org/abs/1412.3555v1) + - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287) + """ + # pylint: enable=line-too-long + + def __init__(self, + units, + activation='tanh', + recurrent_activation='hard_sigmoid', + use_bias=True, + kernel_initializer='glorot_uniform', + recurrent_initializer='orthogonal', + bias_initializer='zeros', + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0., + recurrent_dropout=0., + implementation=1, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + unroll=False, + **kwargs): + if implementation == 0: + logging.warning('`implementation=0` has been deprecated, ' + 'and now defaults to `implementation=1`.' + 'Please update your layer call.') + cell = GRUCell( + units, + activation=activation, + recurrent_activation=recurrent_activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + recurrent_initializer=recurrent_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + recurrent_regularizer=recurrent_regularizer, + bias_regularizer=bias_regularizer, + kernel_constraint=kernel_constraint, + recurrent_constraint=recurrent_constraint, + bias_constraint=bias_constraint, + dropout=dropout, + recurrent_dropout=recurrent_dropout, + implementation=implementation) + super(GRU, self).__init__( + cell, + return_sequences=return_sequences, + return_state=return_state, + go_backwards=go_backwards, + stateful=stateful, + unroll=unroll, + **kwargs) + self.activity_regularizer = regularizers.get(activity_regularizer) + + def call(self, inputs, mask=None, training=None, initial_state=None): + self.cell._generate_dropout_mask(inputs, training=training) + self.cell._generate_recurrent_dropout_mask(inputs, training=training) + return super(GRU, self).call( + inputs, mask=mask, training=training, initial_state=initial_state) + + @property + def units(self): + return self.cell.units + + @property + def activation(self): + return self.cell.activation + + @property + def recurrent_activation(self): + return self.cell.recurrent_activation + + @property + def use_bias(self): + return self.cell.use_bias + + @property + def kernel_initializer(self): + return self.cell.kernel_initializer + + @property + def recurrent_initializer(self): + return self.cell.recurrent_initializer + + @property + def bias_initializer(self): + return self.cell.bias_initializer + + @property + def kernel_regularizer(self): + return self.cell.kernel_regularizer + + @property + def recurrent_regularizer(self): + return self.cell.recurrent_regularizer + + @property + def bias_regularizer(self): + return self.cell.bias_regularizer + + @property + def kernel_constraint(self): + return self.cell.kernel_constraint + + @property + def recurrent_constraint(self): + return self.cell.recurrent_constraint + + @property + def bias_constraint(self): + return self.cell.bias_constraint + + @property + def dropout(self): + return self.cell.dropout + + @property + def recurrent_dropout(self): + return self.cell.recurrent_dropout + + @property + def implementation(self): + return self.cell.implementation + def get_config(self): config = { - 'units': self.units, - 'activation': activations.serialize(self.activation), + 'units': + self.units, + 'activation': + activations.serialize(self.activation), 'recurrent_activation': activations.serialize(self.recurrent_activation), - 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), + 'use_bias': + self.use_bias, + 'kernel_initializer': + initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), + 'bias_initializer': + initializers.serialize(self.bias_initializer), + 'kernel_regularizer': + regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), + 'bias_regularizer': + regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), + 'kernel_constraint': + constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'dropout': self.dropout, - 'recurrent_dropout': self.recurrent_dropout + 'bias_constraint': + constraints.serialize(self.bias_constraint), + 'dropout': + self.dropout, + 'recurrent_dropout': + self.recurrent_dropout, + 'implementation': + self.implementation } base_config = super(GRU, self).get_config() + del base_config['cell'] return dict(list(base_config.items()) + list(config.items())) + @classmethod + def from_config(cls, config): + if 'implementation' in config and config['implementation'] == 0: + config['implementation'] = 1 + return cls(**config) -class LSTM(Recurrent): - """Long-Short Term Memory unit - Hochreiter 1997. - For a step-by-step description of the algorithm, see - [this tutorial](http://deeplearning.net/tutorial/lstm.html). +class LSTMCell(Layer): + """Cell class for the LSTM layer. Arguments: units: Positive integer, dimensionality of the output space. - activation: Activation function to use. + activation: Activation function to use + (see [activations](../activations.md)). If you pass None, no activation is applied (ie. "linear" activation: `a(x) = x`). recurrent_activation: Activation function to use - for the recurrent step. + for the recurrent step + (see [activations](../activations.md)). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs.. + used for the linear transformation of the inputs. + (see [initializers](../initializers.md)). recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, - used for the linear transformation of the recurrent state.. - bias_initializer: Initializer for the bias vector. + used for the linear transformation of the recurrent state. + (see [initializers](../initializers.md)). + bias_initializer: Initializer for the bias vector + (see [initializers](../initializers.md)). unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at initialization. Setting it to true will also force `bias_initializer="zeros"`. This is recommended in [Jozefowicz et al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix. + the `kernel` weights matrix + (see [regularizer](../regularizers.md)). recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix. - bias_regularizer: Regularizer function applied to the bias vector. - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation").. + the `recurrent_kernel` weights matrix + (see [regularizer](../regularizers.md)). + bias_regularizer: Regularizer function applied to the bias vector + (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to - the `kernel` weights matrix. + the `kernel` weights matrix + (see [constraints](../constraints.md)). recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix. - bias_constraint: Constraint function applied to the bias vector. + the `recurrent_kernel` weights matrix + (see [constraints](../constraints.md)). + bias_constraint: Constraint function applied to the bias vector + (see [constraints](../constraints.md)). dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. - - References: - - [Long short-term - memory]((http://www.bioinf.jku.at/publications/older/2604.pdf) - (original 1997 paper) - - [Supervised sequence labeling with recurrent neural - networks](http://www.cs.toronto.edu/~graves/preprint.pdf) - - [A Theoretically Grounded Application of Dropout in Recurrent Neural - Networks](http://arxiv.org/abs/1512.05287) + implementation: Implementation mode, either 1 or 2. + Mode 1 will structure its operations as a larger number of + smaller dot products and additions, whereas mode 2 will + batch them into fewer, larger operations. These modes will + have different performance profiles on different hardware and + for different applications. """ def __init__(self, @@ -1009,15 +1712,14 @@ class LSTM(Recurrent): kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, - activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., + implementation=1, **kwargs): - super(LSTM, self).__init__( - activity_regularizer=regularizers.get(activity_regularizer), **kwargs) + super(LSTMCell, self).__init__(**kwargs) self.units = units self.activation = activations.get(activation) self.recurrent_activation = activations.get(recurrent_activation) @@ -1038,25 +1740,15 @@ class LSTM(Recurrent): self.dropout = min(1., max(0., dropout)) self.recurrent_dropout = min(1., max(0., recurrent_dropout)) - self.state_spec = [ - InputSpec(shape=(None, self.units)), - InputSpec(shape=(None, self.units)) - ] + self.implementation = implementation + self.state_size = (self.units, self.units) + self._dropout_mask = None + self._recurrent_dropout_mask = None def build(self, input_shape): - if isinstance(input_shape, list): - input_shape = input_shape[0] - input_shape = tensor_shape.TensorShape(input_shape).as_list() - batch_size = input_shape[0] if self.stateful else None - self.input_dim = input_shape[2] - self.input_spec[0] = InputSpec(shape=(batch_size, None, self.input_dim)) - - self.states = [None, None] - if self.stateful: - self.reset_states() - + input_dim = input_shape[-1] self.kernel = self.add_weight( - shape=(self.input_dim, self.units * 4), + shape=(input_dim, self.units * 4), name='kernel', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, @@ -1112,96 +1804,90 @@ class LSTM(Recurrent): self.bias_o = None self.built = True - def preprocess_input(self, inputs, training=None): - if self.implementation == 0: - input_shape = inputs.get_shape().as_list() - input_dim = input_shape[2] - timesteps = input_shape[1] - - x_i = _time_distributed_dense( - inputs, - self.kernel_i, - self.bias_i, - self.dropout, - input_dim, - self.units, - timesteps, - training=training) - x_f = _time_distributed_dense( - inputs, - self.kernel_f, - self.bias_f, - self.dropout, - input_dim, - self.units, - timesteps, - training=training) - x_c = _time_distributed_dense( - inputs, - self.kernel_c, - self.bias_c, - self.dropout, - input_dim, - self.units, - timesteps, - training=training) - x_o = _time_distributed_dense( - inputs, - self.kernel_o, - self.bias_o, - self.dropout, - input_dim, - self.units, - timesteps, - training=training) - return K.concatenate([x_i, x_f, x_c, x_o], axis=2) - else: - return inputs - - def get_constants(self, inputs, training=None): - constants = [] - if self.implementation != 0 and 0 < self.dropout < 1: - input_shape = K.int_shape(inputs) - input_dim = input_shape[-1] - ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) - ones = K.tile(ones, (1, int(input_dim))) + def _generate_dropout_mask(self, inputs, training=None): + if 0 < self.dropout < 1: + ones = K.ones_like(K.squeeze(inputs[:, 0:1, :], axis=1)) def dropped_inputs(): return K.dropout(ones, self.dropout) - dp_mask = [ + self._dropout_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(4) ] - constants.append(dp_mask) else: - constants.append([K.cast_to_floatx(1.) for _ in range(4)]) + self._dropout_mask = None + def _generate_recurrent_dropout_mask(self, inputs, training=None): if 0 < self.recurrent_dropout < 1: ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, self.units)) - def dropped_inputs(): # pylint: disable=function-redefined - return K.dropout(ones, self.recurrent_dropout) + def dropped_inputs(): + return K.dropout(ones, self.dropout) - rec_dp_mask = [ + self._recurrent_dropout_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(4) ] - constants.append(rec_dp_mask) else: - constants.append([K.cast_to_floatx(1.) for _ in range(4)]) - return constants - - def step(self, inputs, states): - h_tm1 = states[0] - c_tm1 = states[1] - dp_mask = states[2] - rec_dp_mask = states[3] - - if self.implementation == 2: - z = K.dot(inputs * dp_mask[0], self.kernel) - z += K.dot(h_tm1 * rec_dp_mask[0], self.recurrent_kernel) + self._recurrent_dropout_mask = None + + def call(self, inputs, states, training=None): + # dropout matrices for input units + dp_mask = self._dropout_mask + # dropout matrices for recurrent units + rec_dp_mask = self._recurrent_dropout_mask + + h_tm1 = states[0] # previous memory state + c_tm1 = states[1] # previous carry state + + if self.implementation == 1: + if 0 < self.dropout < 1.: + inputs_i = inputs * dp_mask[0] + inputs_f = inputs * dp_mask[1] + inputs_c = inputs * dp_mask[2] + inputs_o = inputs * dp_mask[3] + else: + inputs_i = inputs + inputs_f = inputs + inputs_c = inputs + inputs_o = inputs + x_i = K.dot(inputs_i, self.kernel_i) + x_f = K.dot(inputs_f, self.kernel_f) + x_c = K.dot(inputs_c, self.kernel_c) + x_o = K.dot(inputs_o, self.kernel_o) + if self.use_bias: + x_i = K.bias_add(x_i, self.bias_i) + x_f = K.bias_add(x_f, self.bias_f) + x_c = K.bias_add(x_c, self.bias_c) + x_o = K.bias_add(x_o, self.bias_o) + + if 0 < self.recurrent_dropout < 1.: + h_tm1_i = h_tm1 * rec_dp_mask[0] + h_tm1_f = h_tm1 * rec_dp_mask[1] + h_tm1_c = h_tm1 * rec_dp_mask[2] + h_tm1_o = h_tm1 * rec_dp_mask[3] + else: + h_tm1_i = h_tm1 + h_tm1_f = h_tm1 + h_tm1_c = h_tm1 + h_tm1_o = h_tm1 + i = self.recurrent_activation( + x_i + K.dot(h_tm1_i, self.recurrent_kernel_i)) + f = self.recurrent_activation( + x_f + K.dot(h_tm1_f, self.recurrent_kernel_f)) + c = f * c_tm1 + i * self.activation( + x_c + K.dot(h_tm1_c, self.recurrent_kernel_c)) + o = self.recurrent_activation( + x_o + K.dot(h_tm1_o, self.recurrent_kernel_o)) + else: + if 0. < self.dropout < 1.: + inputs *= dp_mask[0] + z = K.dot(inputs, self.kernel) + if 0. < self.recurrent_dropout < 1.: + h_tm1 *= rec_dp_mask[0] + z += K.dot(h_tm1, self.recurrent_kernel) if self.use_bias: z = K.bias_add(z, self.bias) @@ -1214,57 +1900,606 @@ class LSTM(Recurrent): f = self.recurrent_activation(z1) c = f * c_tm1 + i * self.activation(z2) o = self.recurrent_activation(z3) - else: - if self.implementation == 0: - x_i = inputs[:, :self.units] - x_f = inputs[:, self.units:2 * self.units] - x_c = inputs[:, 2 * self.units:3 * self.units] - x_o = inputs[:, 3 * self.units:] - elif self.implementation == 1: - x_i = K.dot(inputs * dp_mask[0], self.kernel_i) + self.bias_i - x_f = K.dot(inputs * dp_mask[1], self.kernel_f) + self.bias_f - x_c = K.dot(inputs * dp_mask[2], self.kernel_c) + self.bias_c - x_o = K.dot(inputs * dp_mask[3], self.kernel_o) + self.bias_o - else: - raise ValueError('Unknown `implementation` mode.') - i = self.recurrent_activation(x_i + K.dot(h_tm1 * rec_dp_mask[0], - self.recurrent_kernel_i)) - f = self.recurrent_activation(x_f + K.dot(h_tm1 * rec_dp_mask[1], - self.recurrent_kernel_f)) - c = f * c_tm1 + i * self.activation( - x_c + K.dot(h_tm1 * rec_dp_mask[2], self.recurrent_kernel_c)) - o = self.recurrent_activation(x_o + K.dot(h_tm1 * rec_dp_mask[3], - self.recurrent_kernel_o)) h = o * self.activation(c) if 0 < self.dropout + self.recurrent_dropout: - h._uses_learning_phase = True + if training is None: + h._uses_learning_phase = True return h, [h, c] - def get_config(self): - config = { - 'units': self.units, - 'activation': activations.serialize(self.activation), - 'recurrent_activation': - activations.serialize(self.recurrent_activation), - 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'unit_forget_bias': self.unit_forget_bias, - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'dropout': self.dropout, - 'recurrent_dropout': self.recurrent_dropout - } - base_config = super(LSTM, self).get_config() + +class LSTM(RNN): + # pylint: disable=line-too-long + """Long-Short Term Memory layer - Hochreiter 1997. + + Arguments: + units: Positive integer, dimensionality of the output space. + activation: Activation function to use + (see [activations](../activations.md)). + If you pass None, no activation is applied + (ie. "linear" activation: `a(x) = x`). + recurrent_activation: Activation function to use + for the recurrent step + (see [activations](../activations.md)). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, + used for the linear transformation of the inputs. + (see [initializers](../initializers.md)). + recurrent_initializer: Initializer for the `recurrent_kernel` + weights matrix, + used for the linear transformation of the recurrent state. + (see [initializers](../initializers.md)). + bias_initializer: Initializer for the bias vector + (see [initializers](../initializers.md)). + unit_forget_bias: Boolean. + If True, add 1 to the bias of the forget gate at initialization. + Setting it to true will also force `bias_initializer="zeros"`. + This is recommended in [Jozefowicz et + al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) + kernel_regularizer: Regularizer function applied to + the `kernel` weights matrix + (see [regularizer](../regularizers.md)). + recurrent_regularizer: Regularizer function applied to + the `recurrent_kernel` weights matrix + (see [regularizer](../regularizers.md)). + bias_regularizer: Regularizer function applied to the bias vector + (see [regularizer](../regularizers.md)). + activity_regularizer: Regularizer function applied to + the output of the layer (its "activation"). + (see [regularizer](../regularizers.md)). + kernel_constraint: Constraint function applied to + the `kernel` weights matrix + (see [constraints](../constraints.md)). + recurrent_constraint: Constraint function applied to + the `recurrent_kernel` weights matrix + (see [constraints](../constraints.md)). + bias_constraint: Constraint function applied to the bias vector + (see [constraints](../constraints.md)). + dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the inputs. + recurrent_dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the recurrent state. + implementation: Implementation mode, either 1 or 2. + Mode 1 will structure its operations as a larger number of + smaller dot products and additions, whereas mode 2 will + batch them into fewer, larger operations. These modes will + have different performance profiles on different hardware and + for different applications. + return_sequences: Boolean. Whether to return the last output. + in the output sequence, or the full sequence. + return_state: Boolean. Whether to return the last state + in addition to the output. + go_backwards: Boolean (default False). + If True, process the input sequence backwards and return the + reversed sequence. + stateful: Boolean (default False). If True, the last state + for each sample at index i in a batch will be used as initial + state for the sample of index i in the following batch. + unroll: Boolean (default False). + If True, the network will be unrolled, + else a symbolic loop will be used. + Unrolling can speed-up a RNN, + although it tends to be more memory-intensive. + Unrolling is only suitable for short sequences. + + References: + - [Long short-term memory](http://www.bioinf.jku.at/publications/older/2604.pdf) + - [Learning to forget: Continual prediction with LSTM](http://www.mitpressjournals.org/doi/pdf/10.1162/089976600300015015) + - [Supervised sequence labeling with recurrent neural networks](http://www.cs.toronto.edu/~graves/preprint.pdf) + - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287) + """ + # pylint: enable=line-too-long + + def __init__(self, + units, + activation='tanh', + recurrent_activation='hard_sigmoid', + use_bias=True, + kernel_initializer='glorot_uniform', + recurrent_initializer='orthogonal', + bias_initializer='zeros', + unit_forget_bias=True, + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0., + recurrent_dropout=0., + implementation=1, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + unroll=False, + **kwargs): + if implementation == 0: + logging.warning('`implementation=0` has been deprecated, ' + 'and now defaults to `implementation=1`.' + 'Please update your layer call.') + cell = LSTMCell( + units, + activation=activation, + recurrent_activation=recurrent_activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + recurrent_initializer=recurrent_initializer, + unit_forget_bias=unit_forget_bias, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + recurrent_regularizer=recurrent_regularizer, + bias_regularizer=bias_regularizer, + kernel_constraint=kernel_constraint, + recurrent_constraint=recurrent_constraint, + bias_constraint=bias_constraint, + dropout=dropout, + recurrent_dropout=recurrent_dropout, + implementation=implementation) + super(LSTM, self).__init__( + cell, + return_sequences=return_sequences, + return_state=return_state, + go_backwards=go_backwards, + stateful=stateful, + unroll=unroll, + **kwargs) + self.activity_regularizer = regularizers.get(activity_regularizer) + + def call(self, inputs, mask=None, training=None, initial_state=None): + self.cell._generate_dropout_mask(inputs, training=training) + self.cell._generate_recurrent_dropout_mask(inputs, training=training) + return super(LSTM, self).call( + inputs, mask=mask, training=training, initial_state=initial_state) + + @property + def units(self): + return self.cell.units + + @property + def activation(self): + return self.cell.activation + + @property + def recurrent_activation(self): + return self.cell.recurrent_activation + + @property + def use_bias(self): + return self.cell.use_bias + + @property + def kernel_initializer(self): + return self.cell.kernel_initializer + + @property + def recurrent_initializer(self): + return self.cell.recurrent_initializer + + @property + def bias_initializer(self): + return self.cell.bias_initializer + + @property + def unit_forget_bias(self): + return self.cell.unit_forget_bias + + @property + def kernel_regularizer(self): + return self.cell.kernel_regularizer + + @property + def recurrent_regularizer(self): + return self.cell.recurrent_regularizer + + @property + def bias_regularizer(self): + return self.cell.bias_regularizer + + @property + def kernel_constraint(self): + return self.cell.kernel_constraint + + @property + def recurrent_constraint(self): + return self.cell.recurrent_constraint + + @property + def bias_constraint(self): + return self.cell.bias_constraint + + @property + def dropout(self): + return self.cell.dropout + + @property + def recurrent_dropout(self): + return self.cell.recurrent_dropout + + @property + def implementation(self): + return self.cell.implementation + + def get_config(self): + config = { + 'units': + self.units, + 'activation': + activations.serialize(self.activation), + 'recurrent_activation': + activations.serialize(self.recurrent_activation), + 'use_bias': + self.use_bias, + 'kernel_initializer': + initializers.serialize(self.kernel_initializer), + 'recurrent_initializer': + initializers.serialize(self.recurrent_initializer), + 'bias_initializer': + initializers.serialize(self.bias_initializer), + 'unit_forget_bias': + self.unit_forget_bias, + 'kernel_regularizer': + regularizers.serialize(self.kernel_regularizer), + 'recurrent_regularizer': + regularizers.serialize(self.recurrent_regularizer), + 'bias_regularizer': + regularizers.serialize(self.bias_regularizer), + 'activity_regularizer': + regularizers.serialize(self.activity_regularizer), + 'kernel_constraint': + constraints.serialize(self.kernel_constraint), + 'recurrent_constraint': + constraints.serialize(self.recurrent_constraint), + 'bias_constraint': + constraints.serialize(self.bias_constraint), + 'dropout': + self.dropout, + 'recurrent_dropout': + self.recurrent_dropout, + 'implementation': + self.implementation + } + base_config = super(LSTM, self).get_config() + del base_config['cell'] + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + if 'implementation' in config and config['implementation'] == 0: + config['implementation'] = 1 + return cls(**config) + + +class Recurrent(Layer): + """Deprecated abstract base class for recurrent layers. + + It still exists because it is leveraged by the convolutional-recurrent layers. + It will be removed entirely in the future. + It was never part of the public API. + Do not use. + + Arguments: + weights: list of Numpy arrays to set as initial weights. + The list should have 3 elements, of shapes: + `[(input_dim, output_dim), (output_dim, output_dim), (output_dim,)]`. + return_sequences: Boolean. Whether to return the last output + in the output sequence, or the full sequence. + return_state: Boolean. Whether to return the last state + in addition to the output. + go_backwards: Boolean (default False). + If True, process the input sequence backwards and return the + reversed sequence. + stateful: Boolean (default False). If True, the last state + for each sample at index i in a batch will be used as initial + state for the sample of index i in the following batch. + unroll: Boolean (default False). + If True, the network will be unrolled, + else a symbolic loop will be used. + Unrolling can speed-up a RNN, + although it tends to be more memory-intensive. + Unrolling is only suitable for short sequences. + implementation: one of {0, 1, or 2}. + If set to 0, the RNN will use + an implementation that uses fewer, larger matrix products, + thus running faster on CPU but consuming more memory. + If set to 1, the RNN will use more matrix products, + but smaller ones, thus running slower + (may actually be faster on GPU) while consuming less memory. + If set to 2 (LSTM/GRU only), + the RNN will combine the input gate, + the forget gate and the output gate into a single matrix, + enabling more time-efficient parallelization on the GPU. + Note: RNN dropout must be shared for all gates, + resulting in a slightly reduced regularization. + input_dim: dimensionality of the input (integer). + This argument (or alternatively, the keyword argument `input_shape`) + is required when using this layer as the first layer in a model. + input_length: Length of input sequences, to be specified + when it is constant. + This argument is required if you are going to connect + `Flatten` then `Dense` layers upstream + (without it, the shape of the dense outputs cannot be computed). + Note that if the recurrent layer is not the first layer + in your model, you would need to specify the input length + at the level of the first layer + (e.g. via the `input_shape` argument) + + Input shape: + 3D tensor with shape `(batch_size, timesteps, input_dim)`, + (Optional) 2D tensors with shape `(batch_size, output_dim)`. + + Output shape: + - if `return_state`: a list of tensors. The first tensor is + the output. The remaining tensors are the last states, + each with shape `(batch_size, units)`. + - if `return_sequences`: 3D tensor with shape + `(batch_size, timesteps, units)`. + - else, 2D tensor with shape `(batch_size, units)`. + + # Masking + This layer supports masking for input data with a variable number + of timesteps. To introduce masks to your data, + use an `Embedding` layer with the `mask_zero` parameter + set to `True`. + + # Note on using statefulness in RNNs + You can set RNN layers to be 'stateful', which means that the states + computed for the samples in one batch will be reused as initial states + for the samples in the next batch. This assumes a one-to-one mapping + between samples in different successive batches. + + To enable statefulness: + - specify `stateful=True` in the layer constructor. + - specify a fixed batch size for your model, by passing + if sequential model: + `batch_input_shape=(...)` to the first layer in your model. + else for functional model with 1 or more Input layers: + `batch_shape=(...)` to all the first layers in your model. + This is the expected shape of your inputs + *including the batch size*. + It should be a tuple of integers, e.g. `(32, 10, 100)`. + - specify `shuffle=False` when calling fit(). + + To reset the states of your model, call `.reset_states()` on either + a specific layer, or on your entire model. + + # Note on specifying the initial state of RNNs + You can specify the initial state of RNN layers symbolically by + calling them with the keyword argument `initial_state`. The value of + `initial_state` should be a tensor or list of tensors representing + the initial state of the RNN layer. + + You can specify the initial state of RNN layers numerically by + calling `reset_states` with the keyword argument `states`. The value of + `states` should be a numpy array or list of numpy arrays representing + the initial state of the RNN layer. + """ + + def __init__(self, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + unroll=False, + implementation=0, + **kwargs): + super(Recurrent, self).__init__(**kwargs) + self.return_sequences = return_sequences + self.return_state = return_state + self.go_backwards = go_backwards + self.stateful = stateful + self.unroll = unroll + self.implementation = implementation + self.supports_masking = True + self.input_spec = [InputSpec(ndim=3)] + self.state_spec = None + self.dropout = 0 + self.recurrent_dropout = 0 + + def _compute_output_shape(self, input_shape): + if isinstance(input_shape, list): + input_shape = input_shape[0] + input_shape = tensor_shape.TensorShape(input_shape).as_list() + if self.return_sequences: + output_shape = (input_shape[0], input_shape[1], self.units) + else: + output_shape = (input_shape[0], self.units) + + if self.return_state: + state_shape = [tensor_shape.TensorShape( + (input_shape[0], self.units)) for _ in self.states] + return [tensor_shape.TensorShape(output_shape)] + state_shape + return tensor_shape.TensorShape(output_shape) + + def compute_mask(self, inputs, mask): + if isinstance(mask, list): + mask = mask[0] + output_mask = mask if self.return_sequences else None + if self.return_state: + state_mask = [None for _ in self.states] + return [output_mask] + state_mask + return output_mask + + def step(self, inputs, states): + raise NotImplementedError + + def get_constants(self, inputs, training=None): + return [] + + def get_initial_state(self, inputs): + # build an all-zero tensor of shape (samples, output_dim) + initial_state = K.zeros_like(inputs) # (samples, timesteps, input_dim) + initial_state = K.sum(initial_state, axis=(1, 2)) # (samples,) + initial_state = K.expand_dims(initial_state) # (samples, 1) + initial_state = K.tile(initial_state, [1, + self.units]) # (samples, output_dim) + initial_state = [initial_state for _ in range(len(self.states))] + return initial_state + + def preprocess_input(self, inputs, training=None): + return inputs + + def __call__(self, inputs, initial_state=None, **kwargs): + if (isinstance(inputs, (list, tuple)) and + len(inputs) > 1 + and initial_state is None): + initial_state = inputs[1:] + inputs = inputs[0] + + # If `initial_state` is specified, + # and if it a Keras tensor, + # then add it to the inputs and temporarily + # modify the input spec to include the state. + if initial_state is None: + return super(Recurrent, self).__call__(inputs, **kwargs) + + if not isinstance(initial_state, (list, tuple)): + initial_state = [initial_state] + + is_keras_tensor = hasattr(initial_state[0], '_keras_history') + for tensor in initial_state: + if hasattr(tensor, '_keras_history') != is_keras_tensor: + raise ValueError('The initial state of an RNN layer cannot be' + ' specified with a mix of Keras tensors and' + ' non-Keras tensors') + + if is_keras_tensor: + # Compute the full input spec, including state + input_spec = self.input_spec + state_spec = self.state_spec + if not isinstance(input_spec, list): + input_spec = [input_spec] + if not isinstance(state_spec, list): + state_spec = [state_spec] + self.input_spec = input_spec + state_spec + + # Compute the full inputs, including state + inputs = [inputs] + list(initial_state) + + # Perform the call + output = super(Recurrent, self).__call__(inputs, **kwargs) + + # Restore original input spec + self.input_spec = input_spec + return output + else: + kwargs['initial_state'] = initial_state + return super(Recurrent, self).__call__(inputs, **kwargs) + + def call(self, inputs, mask=None, training=None, initial_state=None): + # input shape: `(samples, time (padded with zeros), input_dim)` + # note that the .build() method of subclasses MUST define + # self.input_spec and self.state_spec with complete input shapes. + if isinstance(inputs, list): + initial_state = inputs[1:] + inputs = inputs[0] + elif initial_state is not None: + pass + elif self.stateful: + initial_state = self.states + else: + initial_state = self.get_initial_state(inputs) + + if isinstance(mask, list): + mask = mask[0] + + if len(initial_state) != len(self.states): + raise ValueError('Layer has ' + str(len(self.states)) + + ' states but was passed ' + str(len(initial_state)) + + ' initial states.') + input_shape = K.int_shape(inputs) + if self.unroll and input_shape[1] is None: + raise ValueError('Cannot unroll a RNN if the ' + 'time dimension is undefined. \n' + '- If using a Sequential model, ' + 'specify the time dimension by passing ' + 'an `input_shape` or `batch_input_shape` ' + 'argument to your first layer. If your ' + 'first layer is an Embedding, you can ' + 'also use the `input_length` argument.\n' + '- If using the functional API, specify ' + 'the time dimension by passing a `shape` ' + 'or `batch_shape` argument to your Input layer.') + constants = self.get_constants(inputs, training=None) + preprocessed_input = self.preprocess_input(inputs, training=None) + last_output, outputs, states = K.rnn( + self.step, + preprocessed_input, + initial_state, + go_backwards=self.go_backwards, + mask=mask, + constants=constants, + unroll=self.unroll) + if self.stateful: + updates = [] + for i in range(len(states)): + updates.append((self.states[i], states[i])) + self.add_update(updates, inputs) + + # Properly set learning phase + if 0 < self.dropout + self.recurrent_dropout: + last_output._uses_learning_phase = True + outputs._uses_learning_phase = True + + if not self.return_sequences: + outputs = last_output + + if self.return_state: + if not isinstance(states, (list, tuple)): + states = [states] + else: + states = list(states) + return [outputs] + states + return outputs + + def reset_states(self, states=None): + if not self.stateful: + raise AttributeError('Layer must be stateful.') + batch_size = self.input_spec[0].shape[0] + if not batch_size: + raise ValueError('If a RNN is stateful, it needs to know ' + 'its batch size. Specify the batch size ' + 'of your input tensors: \n' + '- If using a Sequential model, ' + 'specify the batch size by passing ' + 'a `batch_input_shape` ' + 'argument to your first layer.\n' + '- If using the functional API, specify ' + 'the time dimension by passing a ' + '`batch_shape` argument to your Input layer.') + # initialize state if None + if self.states[0] is None: + self.states = [K.zeros((batch_size, self.units)) for _ in self.states] + elif states is None: + for state in self.states: + K.set_value(state, np.zeros((batch_size, self.units))) + else: + if not isinstance(states, (list, tuple)): + states = [states] + if len(states) != len(self.states): + raise ValueError('Layer ' + self.name + ' expects ' + + str(len(self.states)) + ' states, ' + 'but it received ' + str(len(states)) + + ' state values. Input received: ' + str(states)) + for index, (value, state) in enumerate(zip(states, self.states)): + if value.shape != (batch_size, self.units): + raise ValueError('State ' + str(index) + + ' is incompatible with layer ' + self.name + + ': expected shape=' + str((batch_size, self.units)) + + ', found shape=' + str(value.shape)) + K.set_value(state, value) + + def get_config(self): + config = { + 'return_sequences': self.return_sequences, + 'return_state': self.return_state, + 'go_backwards': self.go_backwards, + 'stateful': self.stateful, + 'unroll': self.unroll, + 'implementation': self.implementation + } + base_config = super(Recurrent, self).get_config() return dict(list(base_config.items()) + list(config.items())) diff --git a/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py b/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py new file mode 100644 index 0000000000..b1f89a30bb --- /dev/null +++ b/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py @@ -0,0 +1,378 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for recurrent layers functionality other than GRU, LSTM, SimpleRNN. + +See also: lstm_test.py, gru_test.py, simplernn_test.py. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.keras._impl import keras +from tensorflow.python.platform import test + + +class RNNTest(test.TestCase): + + def test_minimal_rnn_cell_non_layer(self): + + class MinimalRNNCell(object): + + def __init__(self, units, input_dim): + self.units = units + self.state_size = units + self.kernel = keras.backend.variable( + np.random.random((input_dim, units))) + + def call(self, inputs, states): + prev_output = states[0] + output = keras.backend.dot(inputs, self.kernel) + prev_output + return output, [output] + + with self.test_session(): + # Basic test case. + cell = MinimalRNNCell(32, 5) + x = keras.Input((None, 5)) + layer = keras.layers.RNN(cell) + y = layer(x) + model = keras.models.Model(x, y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + # Test stacking. + cells = [MinimalRNNCell(8, 5), + MinimalRNNCell(32, 8), + MinimalRNNCell(32, 32)] + layer = keras.layers.RNN(cells) + y = layer(x) + model = keras.models.Model(x, y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + def test_minimal_rnn_cell_non_layer_multiple_states(self): + + class MinimalRNNCell(object): + + def __init__(self, units, input_dim): + self.units = units + self.state_size = (units, units) + self.kernel = keras.backend.variable( + np.random.random((input_dim, units))) + + def call(self, inputs, states): + prev_output_1 = states[0] + prev_output_2 = states[1] + output = keras.backend.dot(inputs, self.kernel) + output += prev_output_1 + output -= prev_output_2 + return output, [output * 2, output * 3] + + with self.test_session(): + # Basic test case. + cell = MinimalRNNCell(32, 5) + x = keras.Input((None, 5)) + layer = keras.layers.RNN(cell) + y = layer(x) + model = keras.models.Model(x, y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + # Test stacking. + cells = [MinimalRNNCell(8, 5), + MinimalRNNCell(16, 8), + MinimalRNNCell(32, 16)] + layer = keras.layers.RNN(cells) + assert layer.cell.state_size == (32, 32, 16, 16, 8, 8) + y = layer(x) + model = keras.models.Model(x, y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + def test_minimal_rnn_cell_layer(self): + + class MinimalRNNCell(keras.layers.Layer): + + def __init__(self, units, **kwargs): + self.units = units + self.state_size = units + super(MinimalRNNCell, self).__init__(**kwargs) + + def build(self, input_shape): + self.kernel = self.add_weight(shape=(input_shape[-1], self.units), + initializer='uniform', + name='kernel') + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + initializer='uniform', + name='recurrent_kernel') + self.built = True + + def call(self, inputs, states): + prev_output = states[0] + h = keras.backend.dot(inputs, self.kernel) + output = h + keras.backend.dot(prev_output, self.recurrent_kernel) + return output, [output] + + def get_config(self): + config = {'units': self.units} + base_config = super(MinimalRNNCell, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + with self.test_session(): + # Test basic case. + x = keras.Input((None, 5)) + cell = MinimalRNNCell(32) + layer = keras.layers.RNN(cell) + y = layer(x) + model = keras.models.Model(x, y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + # Test basic case serialization. + x_np = np.random.random((6, 5, 5)) + y_np = model.predict(x_np) + weights = model.get_weights() + config = layer.get_config() + with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}): + layer = keras.layers.RNN.from_config(config) + y = layer(x) + model = keras.models.Model(x, y) + model.set_weights(weights) + y_np_2 = model.predict(x_np) + self.assertAllClose(y_np, y_np_2, atol=1e-4) + + # Test stacking. + cells = [MinimalRNNCell(8), + MinimalRNNCell(12), + MinimalRNNCell(32)] + layer = keras.layers.RNN(cells) + y = layer(x) + model = keras.models.Model(x, y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + # Test stacked RNN serialization. + x_np = np.random.random((6, 5, 5)) + y_np = model.predict(x_np) + weights = model.get_weights() + config = layer.get_config() + with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}): + layer = keras.layers.RNN.from_config(config) + y = layer(x) + model = keras.models.Model(x, y) + model.set_weights(weights) + y_np_2 = model.predict(x_np) + self.assertAllClose(y_np, y_np_2, atol=1e-4) + + def test_rnn_cell_with_constants_layer(self): + + class RNNCellWithConstants(keras.layers.Layer): + + def __init__(self, units, **kwargs): + self.units = units + self.state_size = units + super(RNNCellWithConstants, self).__init__(**kwargs) + + def build(self, input_shape): + if not isinstance(input_shape, list): + raise TypeError('expects constants shape') + [input_shape, constant_shape] = input_shape + # will (and should) raise if more than one constant passed + + self.input_kernel = self.add_weight( + shape=(input_shape[-1], self.units), + initializer='uniform', + name='kernel') + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + initializer='uniform', + name='recurrent_kernel') + self.constant_kernel = self.add_weight( + shape=(constant_shape[-1], self.units), + initializer='uniform', + name='constant_kernel') + self.built = True + + def call(self, inputs, states, constants): + [prev_output] = states + [constant] = constants + h_input = keras.backend.dot(inputs, self.input_kernel) + h_state = keras.backend.dot(prev_output, self.recurrent_kernel) + h_const = keras.backend.dot(constant, self.constant_kernel) + output = h_input + h_state + h_const + return output, [output] + + def get_config(self): + config = {'units': self.units} + base_config = super(RNNCellWithConstants, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + with self.test_session(): + # Test basic case. + x = keras.Input((None, 5)) + c = keras.Input((3,)) + cell = RNNCellWithConstants(32) + layer = keras.layers.RNN(cell) + y = layer(x, constants=c) + model = keras.models.Model([x, c], y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch( + [np.zeros((6, 5, 5)), np.zeros((6, 3))], + np.zeros((6, 32)) + ) + + with self.test_session(): + # Test basic case serialization. + x_np = np.random.random((6, 5, 5)) + c_np = np.random.random((6, 3)) + y_np = model.predict([x_np, c_np]) + weights = model.get_weights() + config = layer.get_config() + custom_objects = {'RNNCellWithConstants': RNNCellWithConstants} + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.RNN.from_config(config.copy()) + y = layer(x, constants=c) + model = keras.models.Model([x, c], y) + model.set_weights(weights) + y_np_2 = model.predict([x_np, c_np]) + self.assertAllClose(y_np, y_np_2, atol=1e-4) + + with self.test_session(): + # test flat list inputs + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.RNN.from_config(config.copy()) + y = layer([x, c]) + model = keras.models.Model([x, c], y) + model.set_weights(weights) + y_np_3 = model.predict([x_np, c_np]) + self.assertAllClose(y_np, y_np_3, atol=1e-4) + + def test_rnn_cell_with_constants_layer_passing_initial_state(self): + + class RNNCellWithConstants(keras.layers.Layer): + + def __init__(self, units, **kwargs): + self.units = units + self.state_size = units + super(RNNCellWithConstants, self).__init__(**kwargs) + + def build(self, input_shape): + if not isinstance(input_shape, list): + raise TypeError('expects constants shape') + [input_shape, constant_shape] = input_shape + # will (and should) raise if more than one constant passed + + self.input_kernel = self.add_weight( + shape=(input_shape[-1], self.units), + initializer='uniform', + name='kernel') + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + initializer='uniform', + name='recurrent_kernel') + self.constant_kernel = self.add_weight( + shape=(constant_shape[-1], self.units), + initializer='uniform', + name='constant_kernel') + self.built = True + + def call(self, inputs, states, constants): + [prev_output] = states + [constant] = constants + h_input = keras.backend.dot(inputs, self.input_kernel) + h_state = keras.backend.dot(prev_output, self.recurrent_kernel) + h_const = keras.backend.dot(constant, self.constant_kernel) + output = h_input + h_state + h_const + return output, [output] + + def get_config(self): + config = {'units': self.units} + base_config = super(RNNCellWithConstants, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + with self.test_session(): + # Test basic case. + x = keras.Input((None, 5)) + c = keras.Input((3,)) + s = keras.Input((32,)) + cell = RNNCellWithConstants(32) + layer = keras.layers.RNN(cell) + y = layer(x, initial_state=s, constants=c) + model = keras.models.Model([x, s, c], y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch( + [np.zeros((6, 5, 5)), np.zeros((6, 32)), np.zeros((6, 3))], + np.zeros((6, 32)) + ) + + with self.test_session(): + # Test basic case serialization. + x_np = np.random.random((6, 5, 5)) + s_np = np.random.random((6, 32)) + c_np = np.random.random((6, 3)) + y_np = model.predict([x_np, s_np, c_np]) + weights = model.get_weights() + config = layer.get_config() + custom_objects = {'RNNCellWithConstants': RNNCellWithConstants} + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.RNN.from_config(config.copy()) + y = layer(x, initial_state=s, constants=c) + model = keras.models.Model([x, s, c], y) + model.set_weights(weights) + y_np_2 = model.predict([x_np, s_np, c_np]) + self.assertAllClose(y_np, y_np_2, atol=1e-4) + + # verify that state is used + y_np_2_different_s = model.predict([x_np, s_np + 10., c_np]) + with self.assertRaises(AssertionError): + self.assertAllClose(y_np, y_np_2_different_s, atol=1e-4) + + with self.test_session(): + # test flat list inputs + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.RNN.from_config(config.copy()) + y = layer([x, s, c]) + model = keras.models.Model([x, s, c], y) + model.set_weights(weights) + y_np_3 = model.predict([x_np, s_np, c_np]) + self.assertAllClose(y_np, y_np_3, atol=1e-4) + + def test_stacked_rnn_attributes(self): + cells = [keras.layers.LSTMCell(3), + keras.layers.LSTMCell(3, kernel_regularizer='l2')] + layer = keras.layers.RNN(cells) + layer.build((None, None, 5)) + + # Test regularization losses + assert len(layer.losses) == 1 + + # Test weights + assert len(layer.trainable_weights) == 6 + cells[0].trainable = False + assert len(layer.trainable_weights) == 3 + assert len(layer.non_trainable_weights) == 3 + + # Test `get_losses_for` + x = keras.Input((None, 5)) + y = keras.backend.sum(x) + cells[0].add_loss(y, inputs=x) + assert layer.get_losses_for(x) == [y] + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/python/keras/_impl/keras/layers/simplernn_test.py b/tensorflow/python/keras/_impl/keras/layers/simplernn_test.py index 9833485236..7edebdacd0 100644 --- a/tensorflow/python/keras/_impl/keras/layers/simplernn_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/simplernn_test.py @@ -156,8 +156,10 @@ class SimpleRNNLayerTest(test.TestCase): activity_regularizer='l1') layer.build((None, None, 2)) self.assertEqual(len(layer.losses), 3) - layer(keras.backend.variable(np.ones((2, 3, 2)))) - self.assertEqual(len(layer.losses), 4) + + x = keras.backend.variable(np.ones((2, 3, 2))) + layer(x) + self.assertEqual(len(layer.get_losses_for(x)), 1) def test_constraints_SimpleRNN(self): embedding_dim = 4 @@ -175,9 +177,9 @@ class SimpleRNNLayerTest(test.TestCase): recurrent_constraint=r_constraint, bias_constraint=b_constraint) layer.build((None, None, embedding_dim)) - self.assertEqual(layer.kernel.constraint, k_constraint) - self.assertEqual(layer.recurrent_kernel.constraint, r_constraint) - self.assertEqual(layer.bias.constraint, b_constraint) + self.assertEqual(layer.cell.kernel.constraint, k_constraint) + self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint) + self.assertEqual(layer.cell.bias.constraint, b_constraint) def test_with_masking_layer_SimpleRNN(self): layer_class = keras.layers.SimpleRNN diff --git a/tensorflow/python/keras/layers/__init__.py b/tensorflow/python/keras/layers/__init__.py index acf0a5e179..b94bf8f0f6 100644 --- a/tensorflow/python/keras/layers/__init__.py +++ b/tensorflow/python/keras/layers/__init__.py @@ -134,6 +134,11 @@ from tensorflow.python.keras._impl.keras.layers.pooling import GlobalMaxPool2D from tensorflow.python.keras._impl.keras.layers.pooling import GlobalMaxPool3D # Recurrent layers. +from tensorflow.python.keras._impl.keras.layers.recurrent import RNN +from tensorflow.python.keras._impl.keras.layers.recurrent import StackedRNNCells +from tensorflow.python.keras._impl.keras.layers.recurrent import SimpleRNNCell +from tensorflow.python.keras._impl.keras.layers.recurrent import GRUCell +from tensorflow.python.keras._impl.keras.layers.recurrent import LSTMCell from tensorflow.python.keras._impl.keras.layers.recurrent import SimpleRNN from tensorflow.python.keras._impl.keras.layers.recurrent import GRU from tensorflow.python.keras._impl.keras.layers.recurrent import LSTM diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 8c8d774b75..c71e8382e9 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -642,7 +642,7 @@ class Layer(object): for output in output_list: with ops.name_scope('ActivityRegularizer'): activity_regularization = self._activity_regularizer(output) - self.add_loss(activity_regularization) + self.add_loss(activity_regularization, inputs=inputs) if not in_deferred_mode: # TODO(fchollet): consider how masking will work with deferred mode. diff --git a/tensorflow/python/layers/base_test.py b/tensorflow/python/layers/base_test.py index 71eff2f965..7ddfe37827 100644 --- a/tensorflow/python/layers/base_test.py +++ b/tensorflow/python/layers/base_test.py @@ -574,6 +574,13 @@ class BaseLayerTest(test.TestCase): self.assertEqual(3, result['label'].numpy()) self.assertEqual(4.0, result['logits'].numpy()) + def testActivityRegularizer(self): + regularizer = math_ops.reduce_sum + layer = base_layers.Layer(activity_regularizer=regularizer) + x = array_ops.placeholder('int32') + layer.apply(x) + self.assertEqual(len(layer.get_losses_for(x)), 1) + class NetworkTest(test.TestCase): diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt new file mode 100644 index 0000000000..763184899c --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt @@ -0,0 +1,179 @@ +path: "tensorflow.keras.layers.GRUCell" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "scope_name" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\'], " + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_variable" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + } + member_method { + name: "apply" + argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "build" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\', \'states\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_losses_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_updates_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt index 9237399254..889f2cbc23 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt @@ -1,14 +1,34 @@ path: "tensorflow.keras.layers.GRU" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" + member { + name: "activation" + mtype: "" + } member { name: "activity_regularizer" mtype: "" } + member { + name: "bias_constraint" + mtype: "" + } + member { + name: "bias_initializer" + mtype: "" + } + member { + name: "bias_regularizer" + mtype: "" + } + member { + name: "dropout" + mtype: "" + } member { name: "dtype" mtype: "" @@ -17,6 +37,10 @@ tf_class { name: "graph" mtype: "" } + member { + name: "implementation" + mtype: "" + } member { name: "inbound_nodes" mtype: "" @@ -33,6 +57,18 @@ tf_class { name: "input_shape" mtype: "" } + member { + name: "kernel_constraint" + mtype: "" + } + member { + name: "kernel_initializer" + mtype: "" + } + member { + name: "kernel_regularizer" + mtype: "" + } member { name: "losses" mtype: "" @@ -65,10 +101,34 @@ tf_class { name: "output_shape" mtype: "" } + member { + name: "recurrent_activation" + mtype: "" + } + member { + name: "recurrent_constraint" + mtype: "" + } + member { + name: "recurrent_dropout" + mtype: "" + } + member { + name: "recurrent_initializer" + mtype: "" + } + member { + name: "recurrent_regularizer" + mtype: "" + } member { name: "scope_name" mtype: "" } + member { + name: "states" + mtype: "" + } member { name: "trainable_variables" mtype: "" @@ -77,10 +137,18 @@ tf_class { name: "trainable_weights" mtype: "" } + member { + name: "units" + mtype: "" + } member { name: "updates" mtype: "" } + member { + name: "use_bias" + mtype: "" + } member { name: "variables" mtype: "" @@ -91,7 +159,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\'], " + argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\', \'False\', \'False\', \'False\', \'False\', \'False\'], " } member_method { name: "add_loss" @@ -137,10 +205,6 @@ tf_class { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_constants" - argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " - } member_method { name: "get_initial_state" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -159,7 +223,7 @@ tf_class { } member_method { name: "get_losses_for" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_output_at" @@ -181,10 +245,6 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "preprocess_input" - argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " - } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -193,8 +253,4 @@ tf_class { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "step" - argspec: "args=[\'self\', \'inputs\', \'states\'], varargs=None, keywords=None, defaults=None" - } } diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt new file mode 100644 index 0000000000..4ce7c34f6c --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt @@ -0,0 +1,179 @@ +path: "tensorflow.keras.layers.LSTMCell" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "scope_name" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'unit_forget_bias\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\'], " + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_variable" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + } + member_method { + name: "apply" + argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "build" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\', \'states\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_losses_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_updates_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt index 20935e2f99..e1a1d0d58e 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt @@ -1,14 +1,34 @@ path: "tensorflow.keras.layers.LSTM" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" + member { + name: "activation" + mtype: "" + } member { name: "activity_regularizer" mtype: "" } + member { + name: "bias_constraint" + mtype: "" + } + member { + name: "bias_initializer" + mtype: "" + } + member { + name: "bias_regularizer" + mtype: "" + } + member { + name: "dropout" + mtype: "" + } member { name: "dtype" mtype: "" @@ -17,6 +37,10 @@ tf_class { name: "graph" mtype: "" } + member { + name: "implementation" + mtype: "" + } member { name: "inbound_nodes" mtype: "" @@ -33,6 +57,18 @@ tf_class { name: "input_shape" mtype: "" } + member { + name: "kernel_constraint" + mtype: "" + } + member { + name: "kernel_initializer" + mtype: "" + } + member { + name: "kernel_regularizer" + mtype: "" + } member { name: "losses" mtype: "" @@ -65,10 +101,34 @@ tf_class { name: "output_shape" mtype: "" } + member { + name: "recurrent_activation" + mtype: "" + } + member { + name: "recurrent_constraint" + mtype: "" + } + member { + name: "recurrent_dropout" + mtype: "" + } + member { + name: "recurrent_initializer" + mtype: "" + } + member { + name: "recurrent_regularizer" + mtype: "" + } member { name: "scope_name" mtype: "" } + member { + name: "states" + mtype: "" + } member { name: "trainable_variables" mtype: "" @@ -77,10 +137,22 @@ tf_class { name: "trainable_weights" mtype: "" } + member { + name: "unit_forget_bias" + mtype: "" + } + member { + name: "units" + mtype: "" + } member { name: "updates" mtype: "" } + member { + name: "use_bias" + mtype: "" + } member { name: "variables" mtype: "" @@ -91,7 +163,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'unit_forget_bias\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\'], " + argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'unit_forget_bias\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\', \'False\', \'False\', \'False\', \'False\', \'False\'], " } member_method { name: "add_loss" @@ -137,10 +209,6 @@ tf_class { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_constants" - argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " - } member_method { name: "get_initial_state" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -159,7 +227,7 @@ tf_class { } member_method { name: "get_losses_for" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_output_at" @@ -181,10 +249,6 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "preprocess_input" - argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " - } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -193,8 +257,4 @@ tf_class { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "step" - argspec: "args=[\'self\', \'inputs\', \'states\'], varargs=None, keywords=None, defaults=None" - } } diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt new file mode 100644 index 0000000000..c7c9b10f22 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt @@ -0,0 +1,191 @@ +path: "tensorflow.keras.layers.RNN" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "scope_name" + mtype: "" + } + member { + name: "states" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'cell\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\', \'activity_regularizer\'], varargs=None, keywords=kwargs, defaults=[\'False\', \'False\', \'False\', \'False\', \'False\', \'None\'], " + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_variable" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + } + member_method { + name: "apply" + argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "build" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\', \'constants\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_initial_state" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_losses_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_updates_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "reset_states" + argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt new file mode 100644 index 0000000000..10c7f8867c --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt @@ -0,0 +1,179 @@ +path: "tensorflow.keras.layers.SimpleRNNCell" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "scope_name" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'units\', \'activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\'], " + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_variable" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + } + member_method { + name: "apply" + argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "build" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\', \'states\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_losses_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_updates_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt index f4148fcc23..588df21088 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt @@ -1,14 +1,34 @@ path: "tensorflow.keras.layers.SimpleRNN" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" + member { + name: "activation" + mtype: "" + } member { name: "activity_regularizer" mtype: "" } + member { + name: "bias_constraint" + mtype: "" + } + member { + name: "bias_initializer" + mtype: "" + } + member { + name: "bias_regularizer" + mtype: "" + } + member { + name: "dropout" + mtype: "" + } member { name: "dtype" mtype: "" @@ -33,6 +53,18 @@ tf_class { name: "input_shape" mtype: "" } + member { + name: "kernel_constraint" + mtype: "" + } + member { + name: "kernel_initializer" + mtype: "" + } + member { + name: "kernel_regularizer" + mtype: "" + } member { name: "losses" mtype: "" @@ -65,10 +97,30 @@ tf_class { name: "output_shape" mtype: "" } + member { + name: "recurrent_constraint" + mtype: "" + } + member { + name: "recurrent_dropout" + mtype: "" + } + member { + name: "recurrent_initializer" + mtype: "" + } + member { + name: "recurrent_regularizer" + mtype: "" + } member { name: "scope_name" mtype: "" } + member { + name: "states" + mtype: "" + } member { name: "trainable_variables" mtype: "" @@ -77,10 +129,18 @@ tf_class { name: "trainable_weights" mtype: "" } + member { + name: "units" + mtype: "" + } member { name: "updates" mtype: "" } + member { + name: "use_bias" + mtype: "" + } member { name: "variables" mtype: "" @@ -91,7 +151,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'units\', \'activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\'], " + argspec: "args=[\'self\', \'units\', \'activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'False\', \'False\', \'False\', \'False\', \'False\'], " } member_method { name: "add_loss" @@ -137,10 +197,6 @@ tf_class { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_constants" - argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " - } member_method { name: "get_initial_state" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -159,7 +215,7 @@ tf_class { } member_method { name: "get_losses_for" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_output_at" @@ -181,10 +237,6 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "preprocess_input" - argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " - } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -193,8 +245,4 @@ tf_class { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "step" - argspec: "args=[\'self\', \'inputs\', \'states\'], varargs=None, keywords=None, defaults=None" - } } diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt new file mode 100644 index 0000000000..5779e41342 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt @@ -0,0 +1,183 @@ +path: "tensorflow.keras.layers.StackedRNNCells" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "scope_name" + mtype: "" + } + member { + name: "state_size" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'cells\'], varargs=None, keywords=kwargs, defaults=None" + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_variable" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + } + member_method { + name: "apply" + argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "build" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\', \'states\'], varargs=None, keywords=kwargs, defaults=None" + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_losses_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_updates_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.pbtxt index 8466c3e039..fe336c4be5 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.pbtxt @@ -140,6 +140,10 @@ tf_module { name: "GRU" mtype: "" } + member { + name: "GRUCell" + mtype: "" + } member { name: "GaussianDropout" mtype: "" @@ -208,6 +212,10 @@ tf_module { name: "LSTM" mtype: "" } + member { + name: "LSTMCell" + mtype: "" + } member { name: "Lambda" mtype: "" @@ -272,6 +280,10 @@ tf_module { name: "Permute" mtype: "" } + member { + name: "RNN" + mtype: "" + } member { name: "RepeatVector" mtype: "" @@ -292,6 +304,10 @@ tf_module { name: "SimpleRNN" mtype: "" } + member { + name: "SimpleRNNCell" + mtype: "" + } member { name: "SpatialDropout1D" mtype: "" @@ -304,6 +320,10 @@ tf_module { name: "SpatialDropout3D" mtype: "" } + member { + name: "StackedRNNCells" + mtype: "" + } member { name: "ThresholdedReLU" mtype: "" diff --git a/tensorflow/tools/ci_build/ci_sanity.sh b/tensorflow/tools/ci_build/ci_sanity.sh index f1c207f9b6..8d4e4c23dc 100755 --- a/tensorflow/tools/ci_build/ci_sanity.sh +++ b/tensorflow/tools/ci_build/ci_sanity.sh @@ -98,7 +98,8 @@ do_pylint() { "^tensorflow/contrib/eager/python/evaluator\.py.*\[E0202.*method-hidden "\ "^tensorflow/contrib/eager/python/metrics_impl\.py.*\[E0202.*method-hidden "\ "^tensorflow/python/platform/gfile\.py.*\[E0301.*non-iterator "\ -"^tensorflow/python/keras/_impl/keras/callbacks\.py.*\[E1133.*not-an-iterable" +"^tensorflow/python/keras/_impl/keras/callbacks\.py.*\[E1133.*not-an-iterable "\ +"^tensorflow/python/keras/_impl/keras/layers/recurrent\.py.*\[E0203.*access-member-before-definition" echo "ERROR_WHITELIST=\"${ERROR_WHITELIST}\"" -- GitLab From 8bb665ae1c8f2aedd479b5bfe2403ac54e37319e Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Wed, 8 Nov 2017 15:19:12 -0800 Subject: [PATCH 0069/1801] Improve usability of TPUEstimator. 1) Log how many batches to enqueue. The old message is very confusing. 2) If input_pipeline has queue runner, generate a logging (legacy mode) or error out (new mode) 3) If input pipeline has summaries, generate a logging (legacy mode) or error out (new mode) PiperOrigin-RevId: 175073856 --- .../contrib/tpu/python/tpu/tpu_estimator.py | 35 +++++++++++++++++-- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 5a3b831429..16d712af9e 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -535,13 +535,15 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook): session, self._dequeue_ops) def before_run(self, run_context): - logging.info('Enqueue next batch of data to infeed.') - iterations = run_context.session.run(self._iterations_per_loop_var) + + logging.info('Enqueue next (%d) batch(es) of data to infeed.', iterations) + self._infeed_thd_controller.send_next_batch_signal(iterations) if self._dequeue_ops is not None: # TODO(xiejw): Refactor the outfeed dequeue into tf.while_loop. - logging.info('Dequeue next batch of data from outfeed.') + logging.info( + 'Dequeue next (%d) batch(es) of data from outfeed.', iterations) self._outfeed_thd_controller.send_next_batch_signal(iterations) def end(self, session): @@ -842,6 +844,8 @@ class _InputPipeline(object): # structure is recorded. enqueue_ops = self._invoke_input_fn_and_record_structure() + self._validate_input_pipeline() + def dequeue_fn(): """dequeue_fn is used by TPU to retrieve the tensors.""" values = self._infeed_queue.generate_dequeue_op() @@ -920,6 +924,31 @@ class _InputPipeline(object): else: return enqueue_fn() + def _validate_input_pipeline(self): + # Perform some sanity checks to log user friendly information. We should + # error out to give users better error message. But, if + # _WRAP_INPUT_FN_INTO_WHILE_LOOP is False (legacy behavior), we cannot break + # user code, so, log a warning. + if ops.get_default_graph().get_collection(ops.GraphKeys.QUEUE_RUNNERS): + err_msg = ('Input pipeline contains one or more QueueRunners. ' + 'These are not supported via TPUEstimator. You must convert ' + 'your input pipeline to use `tf.data` instead (see ' + 'https://www.tensorflow.org/programmers_guide/datasets for ' + 'instructions.') + if _WRAP_INPUT_FN_INTO_WHILE_LOOP: + raise RuntimeError(err_msg) + else: + logging.warn(err_msg) + elif ops.get_default_graph().get_collection(ops.GraphKeys.SUMMARIES): + # Queue Runner has summary Ops by default. So here we use elif to do + # necessary checks for Dataset input pipeline only. + err_msg = ('Input pipeline contains `tf.summary` operations. ' + 'These are not currently supported.') + if _WRAP_INPUT_FN_INTO_WHILE_LOOP: + raise RuntimeError(err_msg) + else: + logging.warn(err_msg) + class _ModelFnWrapper(object): """A `model_fn` wrapper. -- GitLab From 12d6b450b2be345b3848efd8d623b1507a2c630f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 15:24:01 -0800 Subject: [PATCH 0070/1801] Hlo parser: support window and convolution. Also, to make the text format easier to write and unambiguous: - Print "window={}" around the window attribute; rename the "window" sub attribute to "size"; - Print the dim_lables in logical order, instead of physical order. PiperOrigin-RevId: 175074526 --- .../compiler/xla/service/hlo_instruction.cc | 10 +- .../compiler/xla/tools/parser/README.md | 16 +- .../compiler/xla/tools/parser/hlo_lexer.cc | 65 +- .../compiler/xla/tools/parser/hlo_lexer.h | 6 +- .../compiler/xla/tools/parser/hlo_parser.cc | 589 ++++++++++++++---- .../xla/tools/parser/hlo_parser_test.cc | 120 ++++ .../compiler/xla/tools/parser/hlo_token.h | 3 + tensorflow/compiler/xla/window_util.cc | 28 +- 8 files changed, 691 insertions(+), 146 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 5107ac782d..ee98c3fabc 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1850,7 +1850,7 @@ std::vector HloInstruction::ExtraAttributesToString() const { extra.push_back(StrCat("dimensions={", Join(dimensions(), ","), "}")); } if (window_ != nullptr) { - extra.push_back(window_util::ToString(*window_)); + extra.push_back(StrCat("window={", window_util::ToString(*window_), "}")); } if (padding_config_ != nullptr) { extra.push_back(StrCat("padding=", padding_config_->ShortDebugString())); @@ -2856,13 +2856,7 @@ string HloInstruction::ConvolutionDimensionNumbersToString() const { const auto append_dims = [&](const std::vector& dims, const Shape& shape) { CHECK_EQ(dims.size(), ShapeUtil::Rank(shape)); - for (int64 logical = 0; logical < dims.size(); ++logical) { - int64 physical = logical; - if (!shape.layout().minor_to_major().empty()) { - physical = LayoutUtil::Major(shape.layout(), logical); - } - result += dims[physical]; - } + StrAppend(&result, Join(dims, "")); }; // lhs_dims[i] is the symbol of the logical dimension i for the lhs diff --git a/tensorflow/compiler/xla/tools/parser/README.md b/tensorflow/compiler/xla/tools/parser/README.md index 2c864d77a2..986041caf6 100644 --- a/tensorflow/compiler/xla/tools/parser/README.md +++ b/tensorflow/compiler/xla/tools/parser/README.md @@ -43,14 +43,22 @@ operand : shape name ; -extra_attributes +attributes : /*empty*/ - | ',' extra_attribute - | ',' extra_attribute extra_attributes + | ',' attribute + | ',' attribute attributes ; -extra_attribute +attribute : attribute_name attribute_value ; +attribute_value + : kInt + | kName + | [0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,} /*dim_labels_pattern*/ + | [0-9]+(x[0-9]+)+ /*dxd_pattern*/ + | [0-9]+_[0-9]+(x[0-9]+_[0-9]+)* /*window_pad_pattern*/ + | '{' sub_attributes '}' + ; param_list : '(' param_list1 ')' diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc index d104ff3460..f70386411c 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc @@ -122,7 +122,7 @@ TokKind HloLexer::LexToken() { current_ptr_++; return TokKind::kArrow; } - return LexDigitOrNegative(); + return LexNumberOrPattern(); case '=': return TokKind::kEqual; case ',': @@ -149,12 +149,15 @@ TokKind HloLexer::LexToken() { } } -// Lex a shape, name, keyword, or opcode. +// Lex a shape, name, keyword, opcode, attribute name, or the dim labels +// pattern. +// // shape ::= ([a-zA-Z0-9_]*[0-9]*)\[([0-9,]*)\](?:\s*{([0-9,]*)})? // name ::= [a-zA-Z_][a-zA-Z0-9_.-]*: // keyword ::= HloModule, ENTRY, ... // opcode ::= add, greater-than, ... // attribute_name ::= condition, body, dimensions, ... +// dim_labels_pattern ::= [0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,} TokKind HloLexer::LexIdentifier() { { auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end()); @@ -220,6 +223,16 @@ TokKind HloLexer::LexIdentifier() { return TokKind::kOpcode; } + { + auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end()); + static LazyRE2 dim_labels_pattern = { + R"([0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,})"}; + if (RE2::Consume(&consumable, *dim_labels_pattern)) { + current_ptr_ = consumable.begin(); + str_val_.assign(token_start_, current_ptr_); + return TokKind::kDimLabels; + } + } current_ptr_ = token_start_ + 1; return TokKind::kError; } @@ -240,15 +253,20 @@ TokKind HloLexer::LexPercent() { return TokKind::kError; } -// Lex integer and floating-point values, and -inf. -// int [-]?[0-9]+ -// fp with exp [-]?([0-9]+|[0-9]+[.][0-9]*|[0-9]*[.][0-9]+)([eE][+-]?[0-9]+) -// fp without exp [-]?([0-9]+[.][0-9]*|[0-9]*[.][0-9]+) -// negative inf -inf -TokKind HloLexer::LexDigitOrNegative() { +// Lex integer and floating-point values, -inf, and patterns for dim labels, +// dxd (e.g. 1x2x3), and window pad. +// +// fp with exp ::= [-]?([0-9]+|[0-9]+[.][0-9]*|[0-9]*[.][0-9]+)([eE][+-]?[0-9]+) +// fp without exp ::= [-]?([0-9]+[.][0-9]*|[0-9]*[.][0-9]+) +// dim_labels_pattern ::= [0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,} +// dxd_pattern ::= [0-9]+(x[0-9]+)+ +// window_pad_pattern ::= [0-9]+_[0-9]+(x[0-9]+_[0-9]+)* +// int ::= [-]?[0-9]+ +// negative inf ::= '-inf' +TokKind HloLexer::LexNumberOrPattern() { auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end()); static LazyRE2 float_pattern = { - R"([-]?((\d+|\d+[.]\d*|\d*[.]\d+)([eE][+-]?\d+))|(\d+[.]\d*|\d*[.]\d+))"}; + R"([-]?((\d+|\d+[.]\d*|\d*[.]\d+)([eE][+-]?\d+))|[-]?(\d+[.]\d*|\d*[.]\d+))"}; if (RE2::Consume(&consumable, *float_pattern)) { current_ptr_ = consumable.begin(); tensorflow::strings::safe_strtod(string(token_start_, current_ptr_).c_str(), @@ -256,6 +274,29 @@ TokKind HloLexer::LexDigitOrNegative() { return TokKind::kDecimal; } + static LazyRE2 dim_labels_pattern = { + R"([0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,})"}; + static LazyRE2 dxd_pattern = {R"([0-9]+(x[0-9]+)+)"}; + static LazyRE2 pad_pattern = {R"([0-9]+_[0-9]+(x[0-9]+_[0-9]+)*)"}; + + if (RE2::Consume(&consumable, *dim_labels_pattern)) { + current_ptr_ = consumable.begin(); + str_val_.assign(token_start_, current_ptr_); + return TokKind::kDimLabels; + } + + if (RE2::Consume(&consumable, *dxd_pattern)) { + current_ptr_ = consumable.begin(); + str_val_.assign(token_start_, current_ptr_); + return TokKind::kDxD; + } + + if (RE2::Consume(&consumable, *pad_pattern)) { + current_ptr_ = consumable.begin(); + str_val_.assign(token_start_, current_ptr_); + return TokKind::kWindowPad; + } + static LazyRE2 int_pattern = {R"([-]?\d+)"}; if (RE2::Consume(&consumable, *int_pattern)) { current_ptr_ = consumable.begin(); @@ -350,6 +391,12 @@ string TokKindToString(TokKind kind) { return "kName"; case TokKind::kAttributeName: return "kAttributeName"; + case TokKind::kDimLabels: + return "kDimLabels"; + case TokKind::kDxD: + return "kDxD"; + case TokKind::kWindowPad: + return "kWindowPad"; case TokKind::kShape: return "kShape"; case TokKind::kOpcode: diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h index 3b9efcb92d..74e6829180 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h +++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h @@ -37,11 +37,15 @@ class HloLexer { } TokKind Lex() { return current_kind_ = LexToken(); } + TokKind GetKind() const { return current_kind_; } string GetStrVal() const { switch (GetKind()) { case TokKind::kName: case TokKind::kAttributeName: + case TokKind::kDimLabels: + case TokKind::kDxD: + case TokKind::kWindowPad: return str_val_; default: LOG(FATAL) << "This token does not have string value"; @@ -92,7 +96,7 @@ class HloLexer { TokKind LexPercent(); TokKind LexShape(); TokKind LexConstant(); - TokKind LexDigitOrNegative(); + TokKind LexNumberOrPattern(); TokKind LexComment(); const tensorflow::StringPiece buf_; diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index 6c2e37e3b5..f1e987cb15 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -28,6 +28,9 @@ namespace tools { namespace { using tensorflow::StringPiece; +using tensorflow::gtl::optional; +using tensorflow::str_util::Split; +using tensorflow::str_util::SplitAndParseAsInts; using tensorflow::strings::Printf; using tensorflow::strings::StrAppend; using tensorflow::strings::StrCat; @@ -57,8 +60,6 @@ class HloParser { bool ParseInstructionList(HloComputation::Builder* builder, string* root_name); bool ParseInstruction(HloComputation::Builder* builder, string* root_name); - bool ParseSharding(HloInstruction* instruction); - bool ParseControlPredecessors(HloInstruction* instruction); bool ParseLiteral(std::unique_ptr* literal, const Shape& shape); bool ParseTupleLiteral(std::unique_ptr* literal, const Shape& shape); bool ParseNonTupleLiteral(std::unique_ptr* literal, @@ -78,10 +79,55 @@ class HloParser { bool ParseOperands(std::vector* operands, const int expected_size); - template - bool ParseExtraAttribute(T* value, const string& expected_attribute); - template - bool ParseAttributeValue(T* value); + // Types of attributes. + enum class AttrTy { + kInt64, + kHloComputation, + kWindow, + kConvolutionDimensionNumbers, + kSharding, + kInstructionList, + }; + + struct AttrConfig { + bool required; // whether it's required or optional + AttrTy attr_type; // what type it is + void* result; // where to store the parsed result. + }; + + // Parses attributes given names and configs of the attributes. Each parsed + // result is passed back through the result pointer in corresponding + // AttrConfig. Note that the result pointer must point to a optional typed + // variable which outlives this function. Returns false on error. You should + // not use the any of the results if this function failed. + // + // Example usage: + // + // std::unordered_map attrs; + // optional foo; + // attrs["foo"] = {/*required=*/false, AttrTy::kInt64, &foo}; + // optional bar; + // attrs["bar"] = {/*required=*/true, AttrTy::kWindow, &bar}; + // if (!ParseAttribute(attrs)) { + // return false; // Do not use 'foo' 'bar' if failed. + // } + // // Do something with 'bar'. + // if (foo) { // If attr foo is seen, do something with 'foo'. } + // + bool ParseAttributes(const std::unordered_map& attrs); + + // Parses a name and finds the corresponding hlo computation. + bool ParseComputationName(HloComputation** value); + // Parses a list of names and finds the corresponding hlo instructions. + bool ParseInstructionNames(std::vector* instructions); + bool ParseWindow(Window* window); + bool ParseConvolutionDimensionNumbers(ConvolutionDimensionNumbers* dnums); + bool ParseSharding(OpSharding* sharding); + + // Parses a sub-attribute of the window attribute, e.g.,size=1x2x3. + bool ParseDxD(const string& name, std::vector* result); + // Parses window's pad sub-attriute, e.g., pad=0_0x3x3. + bool ParseWindowPad(std::vector>* pad); bool ParseParamList(); bool ParseName(string* result); @@ -214,7 +260,7 @@ bool HloParser::ParseInstructionList(HloComputation::Builder* builder, "expects '}' at the end of instruction list."); } -// instruction ::= ('ROOT')? name '=' shape opcode operands (extra_attribute)* +// instruction ::= ('ROOT')? name '=' shape opcode operands (attribute)* bool HloParser::ParseInstruction(HloComputation::Builder* builder, string* root_name) { string name; @@ -230,6 +276,15 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, if (is_root) { *root_name = name; } + + // Add optional attributes. + std::unordered_map attrs; + optional sharding; + attrs["sharding"] = {/*required=*/false, AttrTy::kSharding, &sharding}; + optional> predecessors; + attrs["control-predecessors"] = {/*required=*/false, AttrTy::kInstructionList, + &predecessors}; + HloInstruction* instruction; switch (opcode) { case HloOpcode::kParameter: { @@ -237,7 +292,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, if (!ParseToken(TokKind::kLparen, "expects '(' before parameter number") || !ParseInt64(¶meter_number) || - !ParseToken(TokKind::kRparen, "expects ')' after parameter number")) { + !ParseToken(TokKind::kRparen, "expects ')' after parameter number") || + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction( @@ -249,7 +305,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, if (!ParseToken(TokKind::kLparen, "expects '(' before constant literal") || !ParseLiteral(&literal, shape) || - !ParseToken(TokKind::kRparen, "expects ')' after constant literal")) { + !ParseToken(TokKind::kRparen, "expects ')' after constant literal") || + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction( @@ -275,7 +332,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, case HloOpcode::kSin: case HloOpcode::kSort: case HloOpcode::kTanh: { - if (!ParseOperands(&operands, /*expected_size=*/1)) { + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction( @@ -305,7 +363,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, case HloOpcode::kShiftLeft: case HloOpcode::kShiftRightArithmetic: case HloOpcode::kShiftRightLogical: { - if (!ParseOperands(&operands, /*expected_size=*/2)) { + if (!ParseOperands(&operands, /*expected_size=*/2) || + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction(HloInstruction::CreateBinary( @@ -315,7 +374,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, // Ternary ops. case HloOpcode::kClamp: case HloOpcode::kSelect: { - if (!ParseOperands(&operands, /*expected_size=*/3)) { + if (!ParseOperands(&operands, /*expected_size=*/3) || + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction(HloInstruction::CreateTernary( @@ -324,7 +384,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, } // Other supported ops. case HloOpcode::kConvert: { - if (!ParseOperands(&operands, /*expected_size=*/1)) { + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction( @@ -332,7 +393,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kCrossReplicaSum: { - if (!ParseOperands(&operands, /*expected_size=*/1)) { + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction( @@ -340,7 +402,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kReshape: { - if (!ParseOperands(&operands, /*expected_size=*/1)) { + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction( @@ -348,7 +411,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kTuple: { - if (!ParseOperands(&operands)) { + if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { return false; } instruction = @@ -356,70 +419,99 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kWhile: { - HloComputation* condition; - HloComputation* body; + optional condition; + optional body; + attrs["condition"] = {/*required=*/true, AttrTy::kHloComputation, + &condition}; + attrs["body"] = {/*required=*/true, AttrTy::kHloComputation, &body}; if (!ParseOperands(&operands, /*expected_size=*/1) || - !ParseExtraAttribute(&condition, - /*expected_attribute=*/"condition") || - !ParseExtraAttribute(&body, /*expected_attribute=*/"body")) { + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction(HloInstruction::CreateWhile( - shape, condition, body, /*init=*/operands[0])); + shape, *condition, *body, /*init=*/operands[0])); break; } case HloOpcode::kRecv: { - int64 channel_id; + optional channel_id; + attrs["channel_id"] = {/*required=*/true, AttrTy::kInt64, &channel_id}; if (!ParseOperands(&operands, /*expected_size=*/0) || - !ParseExtraAttribute(&channel_id, - /*expected_attribute=*/"channel_id")) { + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction( - HloInstruction::CreateRecv(shape, channel_id)); + HloInstruction::CreateRecv(shape, *channel_id)); break; } case HloOpcode::kSend: { - int64 channel_id; + optional channel_id; + attrs["channel_id"] = {/*required=*/true, AttrTy::kInt64, &channel_id}; if (!ParseOperands(&operands, /*expected_size=*/1) || - !ParseExtraAttribute(&channel_id, - /*expected_attribute=*/"channel_id")) { + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction( - HloInstruction::CreateSend(operands[0], channel_id)); + HloInstruction::CreateSend(operands[0], *channel_id)); break; } case HloOpcode::kGetTupleElement: { - int64 index; + optional index; + attrs["index"] = {/*required=*/true, AttrTy::kInt64, &index}; if (!ParseOperands(&operands, /*expected_size=*/1) || - !ParseExtraAttribute(&index, /*expected_attribute=*/"index")) { + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction( - HloInstruction::CreateGetTupleElement(shape, operands[0], index)); + HloInstruction::CreateGetTupleElement(shape, operands[0], *index)); break; } case HloOpcode::kCall: { - HloComputation* to_apply; - if (!ParseOperands(&operands) || - !ParseExtraAttribute(&to_apply, - /*expected_attribute=*/"to_apply")) { + optional to_apply; + attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation, + &to_apply}; + if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction( - HloInstruction::CreateCall(shape, operands, to_apply)); + HloInstruction::CreateCall(shape, operands, *to_apply)); + break; + } + case HloOpcode::kReduceWindow: { + optional reduce_computation; + optional window; + attrs["window"] = {/*required=*/true, AttrTy::kWindow, &window}; + attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation, + &reduce_computation}; + if (!ParseOperands(&operands, /*expected_size=*/2) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreateReduceWindow( + shape, /*operand=*/operands[0], /*init_value=*/operands[1], *window, + *reduce_computation)); + break; + } + case HloOpcode::kConvolution: { + optional window; + optional dnums; + attrs["window"] = {/*required=*/true, AttrTy::kWindow, &window}; + attrs["dim_labels"] = {/*required=*/true, + AttrTy::kConvolutionDimensionNumbers, &dnums}; + if (!ParseOperands(&operands, /*expected_size=*/2) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreateConvolve( + shape, /*lhs=*/operands[0], /*rhs=*/operands[1], *window, *dnums)); break; } case HloOpcode::kBroadcast: case HloOpcode::kCustomCall: case HloOpcode::kConcatenate: case HloOpcode::kReducePrecision: - case HloOpcode::kConvolution: case HloOpcode::kMap: case HloOpcode::kPad: case HloOpcode::kReduce: - case HloOpcode::kReduceWindow: case HloOpcode::kSelectAndScatter: case HloOpcode::kReverse: case HloOpcode::kRng: @@ -438,43 +530,27 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, HloOpcodeString(opcode))); } - bool has_sharding = false; - bool has_control = false; - while (EatIfPresent(TokKind::kComma)) { - string attribute_name; - if (!ParseAttributeName(&attribute_name)) { - return TokenError("expects ', sharding=' or ', control-predecessors='"); - } - - if (attribute_name == "sharding") { - // Parse "sharding=". - if (has_sharding) { - return TokenError("expects at most 1 'sharding='"); - } - has_sharding = true; - if (!ParseSharding(instruction)) { - return false; - } - } else if (attribute_name == "control-predecessors") { - // Parse "control-predecessors" - if (has_control) { - return TokenError("expects at most 1 'control-predecessors='"); - } - has_control = true; - if (!ParseControlPredecessors(instruction)) { - return false; + // Add common attrs (sharding, control predecessors) to the instruction, if + // they were seen. + if (sharding) { + instruction->set_sharding( + HloSharding::FromProto(sharding.value()).ValueOrDie()); + } + if (predecessors) { + for (auto* pre : *predecessors) { + Status status = pre->AddControlDependencyTo(instruction); + if (!status.ok()) { + return TokenError(StrCat("error adding control dependency for: ", name, + " status: ", status.ToString())); } - } else { - return TokenError(StrCat("unexpected attribute: ", attribute_name)); } } - return AddInstruction(name, instruction); } // ::= '{' 'replicated'? 'maximal'? ('device=' int)? shape? ('devices=' ('[' // dims ']')* device_list)? '}' dims ::= int_list device_list ::= int_list -bool HloParser::ParseSharding(HloInstruction* instruction) { +bool HloParser::ParseSharding(OpSharding* sharding) { if (!ParseToken(TokKind::kLbrace, "expected '{' to start sharding attribute")) { return false; @@ -545,7 +621,6 @@ bool HloParser::ParseSharding(HloInstruction* instruction) { } } - OpSharding sharding; if (replicated) { if (!devices.empty()) { return TokenError( @@ -555,7 +630,7 @@ bool HloParser::ParseSharding(HloInstruction* instruction) { return TokenError( "replicated shardings should not have any tile shape set"); } - sharding.set_type(OpSharding::Type::OpSharding_Type_REPLICATED); + sharding->set_type(OpSharding::Type::OpSharding_Type_REPLICATED); } else if (maximal) { if (devices.size() != 1) { return TokenError( @@ -564,8 +639,8 @@ bool HloParser::ParseSharding(HloInstruction* instruction) { if (!ShapeUtil::Equal(tile_shape, Shape())) { return TokenError("maximal shardings should not have any tile shape set"); } - sharding.set_type(OpSharding::Type::OpSharding_Type_MAXIMAL); - sharding.add_tile_assignment_devices(devices[0]); + sharding->set_type(OpSharding::Type::OpSharding_Type_MAXIMAL); + sharding->add_tile_assignment_devices(devices[0]); } else { if (devices.size() <= 1) { return TokenError( @@ -579,47 +654,43 @@ bool HloParser::ParseSharding(HloInstruction* instruction) { "non-maximal shardings must have a tile assignment list including " "dimensions"); } - sharding.set_type(OpSharding::Type::OpSharding_Type_OTHER); - *sharding.mutable_tile_shape() = tile_shape; + sharding->set_type(OpSharding::Type::OpSharding_Type_OTHER); + *sharding->mutable_tile_shape() = tile_shape; for (int64 dim : tile_assignment_dimensions) { - sharding.add_tile_assignment_dimensions(dim); + sharding->add_tile_assignment_dimensions(dim); } for (int64 device : devices) { - sharding.add_tile_assignment_devices(device); + sharding->add_tile_assignment_devices(device); } } - instruction->set_sharding(HloSharding::FromProto(sharding).ValueOrDie()); lexer_.Lex(); return true; } // '{' name+ '}' -bool HloParser::ParseControlPredecessors(HloInstruction* instruction) { +bool HloParser::ParseInstructionNames( + std::vector* instructions) { if (!ParseToken(TokKind::kLbrace, - "expects '{' at the beginning of control predecessors")) { + "expects '{' at the beginning of instruction name list")) { return false; } do { string name; if (!ParseName(&name)) { - return TokenError("expects a control predecessor"); + return TokenError("expects a instruction name"); } - HloInstruction* pre = + HloInstruction* instr = tensorflow::gtl::FindPtrOrNull(instruction_pool_, name); - if (!pre) { + if (!instr) { return TokenError( - StrCat("control predecessor ", name, " is not defined: ")); - } - Status status = pre->AddControlDependencyTo(instruction); - if (!status.ok()) { - return TokenError(StrCat("error adding control dependency for: ", name, - " status: ", status.ToString())); + Printf("instruction '%s' is not defined", name.c_str())); } + instructions->push_back(instr); } while (EatIfPresent(TokKind::kComma)); return ParseToken(TokKind::kRbrace, - "expects '}' at the end of control predecessors"); + "expects '}' at the end of control instructions"); } bool HloParser::SetValueInLiteral(int64 value, int64 linear_index, @@ -957,28 +1028,95 @@ bool HloParser::ParseOperands(std::vector* operands, return true; } -// extra_attribute ::= ',' attribute_name value -template -bool HloParser::ParseExtraAttribute(T* value, - const string& expected_attribute) { - if (!ParseToken(TokKind::kComma, - "expects ',' in front of an extra attribute")) { - return false; - } - string attribute_name; - if (!ParseAttributeName(&attribute_name) && - attribute_name != expected_attribute) { - return TokenError(StrCat("expects attribute name: ", expected_attribute)); +bool HloParser::ParseAttributes( + const std::unordered_map& attrs) { + std::unordered_set seen_attrs; + while (EatIfPresent(TokKind::kComma)) { + string name; + if (!ParseAttributeName(&name)) { + return TokenError("error parsing attributes"); + } + VLOG(1) << "Parsing attribute " << name; + if (!seen_attrs.insert(name).second) { + return TokenError(Printf("attribute %s already exists", name.c_str())); + } + auto attr_it = attrs.find(name); + if (attr_it == attrs.end()) { + return TokenError(Printf("unexpected attribute %s", name.c_str())); + } + AttrTy attr_type = attr_it->second.attr_type; + void* attr_out_ptr = attr_it->second.result; + bool success = [&] { + switch (attr_type) { + case AttrTy::kInt64: { + int64 result; + if (!ParseInt64(&result)) { + return false; + } + static_cast*>(attr_out_ptr)->emplace(result); + return true; + } + case AttrTy::kHloComputation: { + HloComputation* result; + if (!ParseComputationName(&result)) { + return false; + } + static_cast*>(attr_out_ptr) + ->emplace(result); + return true; + } + case AttrTy::kWindow: { + Window result; + if (!ParseWindow(&result)) { + return false; + } + static_cast*>(attr_out_ptr)->emplace(result); + return true; + } + case AttrTy::kConvolutionDimensionNumbers: { + ConvolutionDimensionNumbers result; + if (!ParseConvolutionDimensionNumbers(&result)) { + return false; + } + static_cast*>(attr_out_ptr) + ->emplace(result); + return true; + } + case AttrTy::kSharding: { + OpSharding sharding; + if (!ParseSharding(&sharding)) { + return false; + } + static_cast*>(attr_out_ptr)->emplace(sharding); + return true; + } + case AttrTy::kInstructionList: { + std::vector result; + if (!ParseInstructionNames(&result)) { + return false; + } + static_cast>*>(attr_out_ptr) + ->emplace(result); + return true; + } + } + }(); + if (!success) { + return TokenError(Printf("error parsing attribute %s", name.c_str())); + } } - if (!ParseAttributeValue(value)) { - return TokenError( - StrCat("expects value for attribute: ", expected_attribute)); + // Check that all required attrs were seen. + for (const auto& attr_it : attrs) { + if (attr_it.second.required && + seen_attrs.find(attr_it.first) == seen_attrs.end()) { + return TokenError(Printf("attribute %s is expected but not seen", + attr_it.first.c_str())); + } } return true; } -template <> -bool HloParser::ParseAttributeValue(HloComputation** value) { +bool HloParser::ParseComputationName(HloComputation** value) { string name; if (!ParseName(&name)) { return TokenError("expects computation name"); @@ -990,9 +1128,191 @@ bool HloParser::ParseAttributeValue(HloComputation** value) { return true; } -template <> -bool HloParser::ParseAttributeValue(int64* value) { - return ParseInt64(value); +// ::= '{' size stride? pad? lhs_dilate? rhs_dilate? '}' +// The subattributes can appear in any order. 'size=' is required, others are +// optional. +bool HloParser::ParseWindow(Window* window) { + if (!ParseToken(TokKind::kLbrace, "expected '{' to start window attribute")) { + return false; + } + + std::vector size; + std::vector stride; + std::vector> pad; + std::vector lhs_dilate; + std::vector rhs_dilate; + while (lexer_.GetKind() != TokKind::kRbrace) { + string field_name; + if (!ParseAttributeName(&field_name)) { + return TokenError("expects sub-attributes in window"); + } + bool ok = [&] { + if (field_name == "size") { + return ParseDxD("size", &size); + } + if (field_name == "stride") { + return ParseDxD("stride", &stride); + } + if (field_name == "lhs_dilate") { + return ParseDxD("lhs_dilate", &lhs_dilate); + } + if (field_name == "rhs_dilate") { + return ParseDxD("rls_dilate", &rhs_dilate); + } + if (field_name == "pad") { + return ParseWindowPad(&pad); + } + return TokenError(StrCat("unexpected attribute name: ", field_name)); + }(); + if (!ok) { + return false; + } + } + + if (size.empty()) { + return TokenError( + "sub-attribute 'size=' is required in the window attribute"); + } + if (!stride.empty() && stride.size() != size.size()) { + return TokenError("expects 'stride=' has the same size as 'size='"); + } + if (!lhs_dilate.empty() && lhs_dilate.size() != size.size()) { + return TokenError("expects 'lhs_dilate=' has the same size as 'size='"); + } + if (!rhs_dilate.empty() && rhs_dilate.size() != size.size()) { + return TokenError("expects 'rhs_dilate=' has the same size as 'size='"); + } + if (!pad.empty() && pad.size() != size.size()) { + return TokenError("expects 'pad=' has the same size as 'size='"); + } + + for (int i = 0; i < size.size(); i++) { + window->add_dimensions()->set_size(size[i]); + if (!pad.empty()) { + window->mutable_dimensions(i)->set_padding_low(pad[i][0]); + window->mutable_dimensions(i)->set_padding_high(pad[i][1]); + } + // If some field is not present, it has the default value. + window->mutable_dimensions(i)->set_stride(stride.empty() ? 1 : stride[i]); + window->mutable_dimensions(i)->set_base_dilation( + lhs_dilate.empty() ? 1 : lhs_dilate[i]); + window->mutable_dimensions(i)->set_window_dilation( + rhs_dilate.empty() ? 1 : rhs_dilate[i]); + } + return ParseToken(TokKind::kRbrace, "expected '}' to end window attribute"); +} + +// This is the inverse of HloInstruction::ConvolutionDimensionNumbersToString. +// The string looks like "dim_labels=0bf_0io->0bf". +bool HloParser::ParseConvolutionDimensionNumbers( + ConvolutionDimensionNumbers* dnums) { + if (lexer_.GetKind() != TokKind::kDimLabels) { + return TokenError("expects dim labels pattern, e.g., 'bf0_0io->0bf'"); + } + string str = lexer_.GetStrVal(); + + // The str is expected to have 3 items, lhs, rhs, out, and it must looks like + // lhs_rhs->out, that is, the first separator is "_" and the second is "->". + // So we replace the "->" with "_" and then split on "_". + str = tensorflow::str_util::StringReplace(str, /*oldsub=*/"->", + /*newsub=*/"_", + /*replace_all=*/false); + std::vector lhs_rhs_out = Split(str, "_"); + if (lhs_rhs_out.size() != 3) { + LOG(FATAL) << "expects 3 items: lhs, rhs, and output dims, but sees " + << str; + } + + const int64 rank = lhs_rhs_out[0].length(); + if (rank != lhs_rhs_out[1].length() || rank != lhs_rhs_out[2].length()) { + return TokenError( + "convolution lhs, rhs, and output must have the same rank"); + } + if (rank < 3) { + return TokenError("convolution rank must >=3"); + } + + auto is_unique = [](string str) -> bool { + std::sort(str.begin(), str.end()); + return std::unique(str.begin(), str.end()) == str.end(); + }; + + // lhs + { + const string& lhs = lhs_rhs_out[0]; + if (!is_unique(lhs)) { + return TokenError( + StrCat("expects unique lhs dimension numbers, but sees ", lhs)); + } + for (int i = 0; i < rank - 2; i++) { + dnums->add_spatial_dimensions(-1); + } + for (int i = 0; i < rank; i++) { + char c = lhs[i]; + if (c == 'b') { + dnums->set_input_batch_dimension(i); + } else if (c == 'f') { + dnums->set_input_feature_dimension(i); + } else if (c < '0' + rank && c >= '0') { + dnums->set_spatial_dimensions(c - '0', i); + } else { + return TokenError( + Printf("expects [0-%lldbf] in lhs dimension numbers", rank - 1)); + } + } + } + // rhs + { + const string& rhs = lhs_rhs_out[1]; + if (!is_unique(rhs)) { + return TokenError( + StrCat("expects unique rhs dimension numbers, but sees ", rhs)); + } + for (int i = 0; i < rank - 2; i++) { + dnums->add_kernel_spatial_dimensions(-1); + } + for (int i = 0; i < rank; i++) { + char c = rhs[i]; + if (c == 'i') { + dnums->set_kernel_input_feature_dimension(i); + } else if (c == 'o') { + dnums->set_kernel_output_feature_dimension(i); + } else if (c < '0' + rank && c >= '0') { + dnums->set_kernel_spatial_dimensions(c - '0', i); + } else { + return TokenError( + Printf("expects [0-%lldio] in rhs dimension numbers", rank - 1)); + } + } + } + // output + { + const string& out = lhs_rhs_out[2]; + if (!is_unique(out)) { + return TokenError( + StrCat("expects unique output dimension numbers, but sees ", out)); + } + for (int i = 0; i < rank; i++) { + char c = out[i]; + if (c == 'b') { + dnums->set_output_batch_dimension(i); + } else if (c == 'f') { + dnums->set_output_feature_dimension(i); + } else if (c < '0' + rank && c >= '0') { + if (dnums->spatial_dimensions(c - '0') != i) { + return TokenError( + "output spatial dimensions should be the same as input spatial " + "dimensions"); + } + } else { + return TokenError( + Printf("expects [0-%lldbf] in output dimension numbers", rank - 1)); + } + } + } + + lexer_.Lex(); + return true; } // param_list ::= '(' param_list1 ')' @@ -1070,6 +1390,55 @@ bool HloParser::ParseAttributeName(string* result) { return true; } +bool HloParser::ParseDxD(const string& name, std::vector* result) { + if (!result->empty()) { + return TokenError( + Printf("sub-attribute '%s=' already exists", name.c_str())); + } + // 1D + if (lexer_.GetKind() == TokKind::kInt) { + int64 number; + if (!ParseInt64(&number)) { + return TokenError(Printf("expects sub-attribute '%s=i'", name.c_str())); + } + result->push_back(number); + return true; + } + // 2D or higher. + if (lexer_.GetKind() == TokKind::kDxD) { + string str = lexer_.GetStrVal(); + if (!SplitAndParseAsInts(str, 'x', result)) { + return TokenError( + Printf("expects sub-attribute '%s=ixj...'", name.c_str())); + } + lexer_.Lex(); + return true; + } + return TokenError("expects token type kInt or kDxD"); +} + +bool HloParser::ParseWindowPad(std::vector>* pad) { + if (!pad->empty()) { + return TokenError("sub-attribute 'pad=' already exists"); + } + if (lexer_.GetKind() != TokKind::kWindowPad) { + return TokenError("expects window pad pattern, e.g., '0_0x3_3'"); + } + string str = lexer_.GetStrVal(); + std::vector padding_str = Split(str, 'x'); + for (int i = 0; i < padding_str.size(); i++) { + std::vector low_high; + if (!SplitAndParseAsInts(padding_str[i], '_', &low_high) || + low_high.size() != 2) { + return TokenError( + "expects padding_low and padding_high separated by '_'"); + } + pad->push_back(low_high); + } + lexer_.Lex(); + return true; +} + bool HloParser::ParseOpcode(HloOpcode* result) { VLOG(1) << "ParseOpcode"; if (lexer_.GetKind() != TokKind::kOpcode) { diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index 359256f064..62b4385e76 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -25,6 +25,7 @@ namespace tools { namespace { using tensorflow::StringPiece; +using tensorflow::strings::StrCat; struct TestData { string test_name; @@ -247,6 +248,39 @@ ENTRY %CallR0F32IdentityScalar.v2 () -> f32[] { ROOT %call = f32[] call(f32[] %constant), to_apply=%Identity.v1 } +)" +}, +// reduce window +{ +"ReduceWindow", +R"(HloModule R4UnitWindow_module: + +%add_F32.v3 (lhs: f32[], rhs: f32[]) -> f32[] { + %lhs = f32[] parameter(0) + %rhs = f32[] parameter(1) + ROOT %add = f32[] add(f32[] %lhs, f32[] %rhs) +} + +ENTRY %R4UnitWindow.v3 (operand: f32[13,12,8,15]) -> f32[13,3,8,15] { + %operand = f32[13,12,8,15]{0,3,2,1} parameter(0) + %constant = f32[] constant(0) + ROOT %reduce-window = f32[13,3,8,15]{0,3,2,1} reduce-window(f32[13,12,8,15]{0,3,2,1} %operand, f32[] %constant), window={size=1x1x7x1 stride=1x4x1x1 pad=0_0x0_0x3_3x0_0}, to_apply=%add_F32.v3 +} + +)" +}, +// convolution +{ +"Convolution", +R"(HloModule Convolve1D1Window_0_module: + +ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2,1] { + %input = f32[1,2,1]{2,1,0} parameter(0) + %copy = f32[1,2,1]{2,0,1} copy(f32[1,2,1]{2,1,0} %input) + %filter = f32[1,1,1]{2,1,0} parameter(1) + ROOT %convolution = f32[1,2,1]{2,0,1} convolution(f32[1,2,1]{2,0,1} %copy, f32[1,1,1]{2,1,0} %filter), window={size=1}, dim_labels=b0f_0io->b0f +} + )" } }); @@ -427,6 +461,92 @@ ENTRY %ConstantWithExp.v4 () -> f32[] { // printed as "300". } +TEST_F(HloParserTest, AttibutesAnyOrder) { + const string original = R"(HloModule any_order_module: + +ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2,1] { + %input = f32[1,2,1]{2,1,0} parameter(0) + %copy = f32[1,2,1]{2,0,1} copy(f32[1,2,1]{2,1,0} %input) + %filter = f32[1,1,1]{2,1,0} parameter(1) + ROOT %convolution = f32[1,2,1]{2,0,1} convolution(f32[1,2,1]{2,0,1} %copy, f32[1,1,1]{2,1,0} %filter), sharding={maximal device=1}, dim_labels=b0f_0io->b0f, window={pad=1_1 size=2} +} + +)"; + TF_EXPECT_OK(Parse(original).status()); +} + +TEST_F(HloParserTest, InvalidDimLabels) { + string prefix = R"(HloModule invalid_dim_labels_module: + +ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2,1] { + %input = f32[1,2,1]{2,1,0} parameter(0) + %copy = f32[1,2,1]{2,0,1} copy(f32[1,2,1]{2,1,0} %input) + %filter = f32[1,1,1]{2,1,0} parameter(1) + ROOT %convolution = f32[1,2,1]{2,0,1} convolution(f32[1,2,1]{2,0,1} %copy, f32[1,1,1]{2,1,0} %filter), window={size=1} )"; + string suffix = R"( +} + +)"; + + ExpectHasSubstr(Parse(StrCat(prefix, ",dim_labels=00_01_10", suffix)) + .status() + .error_message(), + "expects dim labels pattern"); + + ExpectHasSubstr(Parse(StrCat(prefix, ",dim_labels=010_1100->010", suffix)) + .status() + .error_message(), + "must have the same rank"); + + ExpectHasSubstr(Parse(StrCat(prefix, ",dim_labels=0bf_io0->b0f", suffix)) + .status() + .error_message(), + "output spatial dimensions should be the same as input " + "spatial dimensions"); +} + +TEST_F(HloParserTest, UnexpectedAttribute) { + const string original = R"(HloModule unexpected_attr_module: + +ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { + %recv = f32[] recv(), channel_id=15 + ROOT %constant = f32[] constant(2.1) + %send = () send(f32[] %constant), channel_id=16, calls=%recv +} + +)"; + ExpectHasSubstr(Parse(original).status().error_message(), + "unexpected attribute calls"); +} + +TEST_F(HloParserTest, MissingAttribute) { + const string original = R"(HloModule missing_attr_module: + +ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { + %recv = f32[] recv(), channel_id=15 + ROOT %constant = f32[] constant(-2.1) + %send = () send(f32[] %constant) +} + +)"; + ExpectHasSubstr(Parse(original).status().error_message(), + "attribute channel_id is expected but not seen"); +} + +TEST_F(HloParserTest, PredecessorUndefined) { + const string original = R"(HloModule pre_not_found_module: + +ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { + %recv = f32[] recv(), channel_id=15 + ROOT %constant = f32[] constant(2.1) + %send = () send(f32[] %constant), channel_id=16, control-predecessors={%done} +} + +)"; + ExpectHasSubstr(Parse(original).status().error_message(), + "'done' is not defined"); +} + } // namespace } // namespace tools } // namespace xla diff --git a/tensorflow/compiler/xla/tools/parser/hlo_token.h b/tensorflow/compiler/xla/tools/parser/hlo_token.h index 9c2069e756..15ab8b1ccc 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_token.h +++ b/tensorflow/compiler/xla/tools/parser/hlo_token.h @@ -57,6 +57,9 @@ enum class TokKind { // Typed tokens. kName, // %foo kAttributeName, // dimensions= + kDimLabels, // [0-9bf]+_[0-9io]+->[0-9bf]+ + kDxD, // [0-9]+(x[0-9]+)+ + kWindowPad, // [0-9]+_[0-9]+(x[0-9]+_[0-9]+)* kShape, // f32[2,3]{1,0} kOpcode, // add kInt, // 42 diff --git a/tensorflow/compiler/xla/window_util.cc b/tensorflow/compiler/xla/window_util.cc index 23161873a0..6f7f1479b9 100644 --- a/tensorflow/compiler/xla/window_util.cc +++ b/tensorflow/compiler/xla/window_util.cc @@ -26,8 +26,8 @@ namespace xla { namespace window_util { /* static */ string ToString(const WindowDimension& dim) { - using tensorflow::strings::StrCat; using tensorflow::strings::StrAppend; + using tensorflow::strings::StrCat; string str = StrCat("(size=", dim.size()); if (dim.stride() != 1) { StrAppend(&str, ",stride=", dim.stride()); @@ -49,22 +49,22 @@ namespace window_util { } string ToString(const Window& window) { - using tensorflow::strings::StrCat; using tensorflow::strings::StrAppend; + using tensorflow::strings::StrCat; string str; - const auto add_field = [&]( - const char* heading, - std::function format) { - StrAppend(&str, heading, "="); - const char* prefix = ""; - for (const auto& window_dimension : window.dimensions()) { - StrAppend(&str, prefix, format(window_dimension)); - prefix = "x"; - } - }; - - add_field("window", + const auto add_field = + [&](const char* heading, + std::function format) { + StrAppend(&str, heading, "="); + const char* prefix = ""; + for (const auto& window_dimension : window.dimensions()) { + StrAppend(&str, prefix, format(window_dimension)); + prefix = "x"; + } + }; + + add_field("size", [](const WindowDimension& dim) { return StrCat(dim.size()); }); if (HasStride(window)) { add_field(" stride", -- GitLab From 35febc0cc9c27d57e574dc6a3bd634f9611feb60 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 15:24:05 -0800 Subject: [PATCH 0071/1801] Add a --all_tensor_names option, which is useful if I only want to know all tensor names. It is especially useful in cases whether some of the tensors has huge size. Also update the usage description. PiperOrigin-RevId: 175074541 --- tensorflow/python/tools/inspect_checkpoint.py | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/tools/inspect_checkpoint.py b/tensorflow/python/tools/inspect_checkpoint.py index 47a74e5abf..8716058e61 100644 --- a/tensorflow/python/tools/inspect_checkpoint.py +++ b/tensorflow/python/tools/inspect_checkpoint.py @@ -29,7 +29,8 @@ from tensorflow.python.platform import flags FLAGS = None -def print_tensors_in_checkpoint_file(file_name, tensor_name, all_tensors): +def print_tensors_in_checkpoint_file(file_name, tensor_name, all_tensors, + all_tensor_names): """Prints tensors in a checkpoint file. If no `tensor_name` is provided, prints the tensor names and shapes @@ -41,14 +42,16 @@ def print_tensors_in_checkpoint_file(file_name, tensor_name, all_tensors): file_name: Name of the checkpoint file. tensor_name: Name of the tensor in the checkpoint file to print. all_tensors: Boolean indicating whether to print all tensors. + all_tensor_names: Boolean indicating whether to print all tensor names. """ try: reader = pywrap_tensorflow.NewCheckpointReader(file_name) - if all_tensors: + if all_tensors or all_tensor_names: var_to_shape_map = reader.get_variable_to_shape_map() for key in sorted(var_to_shape_map): print("tensor_name: ", key) - print(reader.get_tensor(key)) + if all_tensors: + print(reader.get_tensor(key)) elif not tensor_name: print(reader.debug_string().decode("utf-8")) else: @@ -104,11 +107,14 @@ def parse_numpy_printoption(kv_str): def main(unused_argv): if not FLAGS.file_name: print("Usage: inspect_checkpoint --file_name=checkpoint_file_name " - "[--tensor_name=tensor_to_print]") + "[--tensor_name=tensor_to_print] " + "[--all_tensors] " + "[--all_tensor_names] " + "[--printoptions]") sys.exit(1) else: print_tensors_in_checkpoint_file(FLAGS.file_name, FLAGS.tensor_name, - FLAGS.all_tensors) + FLAGS.all_tensors, FLAGS.all_tensor_names) if __name__ == "__main__": @@ -130,6 +136,13 @@ if __name__ == "__main__": type="bool", default=False, help="If True, print the values of all the tensors.") + parser.add_argument( + "--all_tensor_names", + nargs="?", + const=True, + type="bool", + default=False, + help="If True, print the names of all the tensors.") parser.add_argument( "--printoptions", nargs="*", -- GitLab From a6babd6a4f6462e805be946bf6b352b2e4248794 Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Wed, 8 Nov 2017 15:35:27 -0800 Subject: [PATCH 0072/1801] Move MakeFakeLiteral from client/lib/testing.h to tests/test_utils.h. Also remove superfluous literal creation methods in that file, and replace them with the existing ones in the Literal class. Also, optionally print layout in Literal::ToString. PiperOrigin-RevId: 175076277 --- tensorflow/compiler/xla/client/lib/BUILD | 1 + tensorflow/compiler/xla/client/lib/testing.cc | 57 +-------- tensorflow/compiler/xla/client/lib/testing.h | 4 - tensorflow/compiler/xla/literal_util.cc | 22 +++- tensorflow/compiler/xla/literal_util.h | 2 +- tensorflow/compiler/xla/service/BUILD | 2 - .../compiler/xla/service/hlo_cse_test.cc | 24 ++-- .../xla/service/layout_assignment_test.cc | 32 ++--- tensorflow/compiler/xla/tests/BUILD | 3 +- .../xla/tests/client_library_test_base.h | 6 +- tensorflow/compiler/xla/tests/client_test.cc | 4 +- .../xla/tests/compilation_cache_test.cc | 8 +- .../xla/tests/compute_constant_test.cc | 4 +- .../compiler/xla/tests/dot_operation_test.cc | 25 ++-- .../xla/tests/local_client_execute_test.cc | 10 +- tensorflow/compiler/xla/tests/map_test.cc | 8 +- tensorflow/compiler/xla/tests/test_utils.cc | 120 ++++++++++++++++++ tensorflow/compiler/xla/tests/test_utils.h | 64 ++-------- tensorflow/compiler/xla/tools/BUILD | 1 + .../compiler/xla/tools/replay_computation.cc | 1 + 20 files changed, 209 insertions(+), 189 deletions(-) create mode 100644 tensorflow/compiler/xla/tests/test_utils.cc diff --git a/tensorflow/compiler/xla/client/lib/BUILD b/tensorflow/compiler/xla/client/lib/BUILD index ee34682087..fca2bf2688 100644 --- a/tensorflow/compiler/xla/client/lib/BUILD +++ b/tensorflow/compiler/xla/client/lib/BUILD @@ -44,6 +44,7 @@ cc_library( "//tensorflow/compiler/xla/client:computation", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/tests:test_utils", "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xla/client/lib/testing.cc b/tensorflow/compiler/xla/client/lib/testing.cc index e6645e4941..d936bd870b 100644 --- a/tensorflow/compiler/xla/client/lib/testing.cc +++ b/tensorflow/compiler/xla/client/lib/testing.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/test_utils.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/strings/strcat.h" @@ -48,62 +49,6 @@ std::unique_ptr MakeFakeDataViaDeviceOrDie(const Shape& shape, } // namespace -StatusOr> MakeFakeLiteral(const Shape& shape) { - if (ShapeUtil::IsTuple(shape)) { - std::vector> elements; - for (const Shape& element_shape : shape.tuple_shapes()) { - TF_ASSIGN_OR_RETURN(std::unique_ptr element, - MakeFakeLiteral(element_shape)); - elements.push_back(std::move(element)); - } - return Literal::MakeTupleOwned(std::move(elements)); - } - std::unique_ptr literal = Literal::CreateFromShape(shape); - std::minstd_rand0 engine; - switch (shape.element_type()) { - case F32: { - std::uniform_real_distribution generator(0.0f, 1.0f); - TF_CHECK_OK(literal->Populate( - [&](tensorflow::gtl::ArraySlice /*indices*/) { - return generator(engine); - })); - break; - } - case S32: { - std::uniform_int_distribution generator( - std::numeric_limits::lowest(), - std::numeric_limits::max()); - TF_CHECK_OK(literal->Populate( - [&](tensorflow::gtl::ArraySlice /*indices*/) { - return generator(engine); - })); - break; - } - case S64: { - std::uniform_int_distribution generator( - std::numeric_limits::lowest(), - std::numeric_limits::max()); - TF_CHECK_OK(literal->Populate( - [&](tensorflow::gtl::ArraySlice /*indices*/) { - return generator(engine); - })); - break; - } - case PRED: { - std::uniform_int_distribution generator(0, 1); - TF_CHECK_OK(literal->Populate( - [&](tensorflow::gtl::ArraySlice /*indices*/) { - return generator(engine); - })); - break; - } - default: - return Unimplemented("Unsupported type for fake literal generation: %s", - ShapeUtil::HumanString(shape).c_str()); - } - return std::move(literal); -} - std::unique_ptr MakeFakeDataOrDie(const Shape& shape, Client* client) { if (ShapeUtil::ByteSizeOf(shape) < (1LL << 30)) { diff --git a/tensorflow/compiler/xla/client/lib/testing.h b/tensorflow/compiler/xla/client/lib/testing.h index b5c4393dcc..7e640d1307 100644 --- a/tensorflow/compiler/xla/client/lib/testing.h +++ b/tensorflow/compiler/xla/client/lib/testing.h @@ -26,10 +26,6 @@ limitations under the License. namespace xla { -// Generates fake data in a literal of the given shape, or returns an error -// status if the element type is currently unhandled for fake data generation. -StatusOr> MakeFakeLiteral(const Shape& shape); - // Generates fake data of the given shape on the device or dies. The fake data // is created by performing a computation on the device rather than transferring // data from the host to the device. diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index fda791401d..0cb2223ae5 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -569,9 +569,17 @@ int64 Literal::LinearIndex( return IndexUtil::MultidimensionalIndexToLinearIndex(shape(), multi_index); } -string Literal::ToString() const { +string Literal::ToString(bool print_layout) const { std::vector pieces; + auto shape_to_string = [print_layout](const Shape& shape) { + if (print_layout) { + return ShapeUtil::HumanStringWithLayout(shape); + } else { + return ShapeUtil::HumanString(shape); + } + }; + auto element_to_string = [this](tensorflow::gtl::ArraySlice indices) -> string { PrimitiveType element_type = shape().element_type(); @@ -585,7 +593,7 @@ string Literal::ToString() const { // TODO(b/32894291): refactor this code to reduce code duplication. if (ShapeUtil::IsTuple(shape())) { - pieces.push_back(ShapeUtil::HumanString(shape())); + pieces.push_back(shape_to_string(shape())); pieces.push_back(" (\n"); pieces.push_back(tensorflow::str_util::Join( tuple_literals(), ",\n", [](string* out, const Literal& element) { @@ -601,7 +609,7 @@ string Literal::ToString() const { } pieces.push_back("}"); } else if (ShapeUtil::Rank(shape()) == 2) { - pieces.push_back(ShapeUtil::HumanString(shape())); + pieces.push_back(shape_to_string(shape())); pieces.push_back(" {\n"); for (int64 i0 = 0; i0 < shape().dimensions(0); ++i0) { pieces.push_back(" { "); @@ -613,7 +621,7 @@ string Literal::ToString() const { } pieces.push_back("}"); } else if (ShapeUtil::Rank(shape()) == 3) { - pieces.push_back(ShapeUtil::HumanString(shape())); + pieces.push_back(shape_to_string(shape())); pieces.push_back(" {\n"); for (int64 i0 = 0; i0 < shape().dimensions(0); ++i0) { pieces.push_back(i0 > 0 ? ",\n{" : "{"); @@ -628,7 +636,7 @@ string Literal::ToString() const { } pieces.push_back("\n}"); } else if (ShapeUtil::Rank(shape()) == 4) { - pieces.push_back(ShapeUtil::HumanString(shape())); + pieces.push_back(shape_to_string(shape())); pieces.push_back(" {\n"); for (int64 i0 = 0; i0 < shape().dimensions(0); ++i0) { pieces.push_back(tensorflow::strings::Printf(" { /*i0=%lld*/\n", i0)); @@ -649,7 +657,7 @@ string Literal::ToString() const { } pieces.push_back("}"); } else if (ShapeUtil::Rank(shape()) == 5) { - pieces.push_back(ShapeUtil::HumanString(shape())); + pieces.push_back(shape_to_string(shape())); pieces.push_back(" {\n"); for (int64 i0 = 0; i0 < shape().dimensions(0); ++i0) { pieces.push_back(tensorflow::strings::Printf(" { /*i0=%lld*/\n", i0)); @@ -676,7 +684,7 @@ string Literal::ToString() const { } pieces.push_back("}"); } else { - pieces.push_back(ShapeUtil::HumanString(shape())); + pieces.push_back(shape_to_string(shape())); pieces.push_back(" {...}"); } diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h index a1e288829f..667f926c46 100644 --- a/tensorflow/compiler/xla/literal_util.h +++ b/tensorflow/compiler/xla/literal_util.h @@ -450,7 +450,7 @@ class Literal { tensorflow::Status ValidateLiteral() const; // Returns a string representation of the literal value. - string ToString() const; + string ToString(bool print_layout = false) const; // Invokes the "per cell" callback for each element in the provided // literal with the element's indices and a string representation of diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index c6f6c6c38b..7cf24641b5 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1780,7 +1780,6 @@ tf_cc_test( "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:test_utils", - "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:lib", ], ) @@ -1851,7 +1850,6 @@ tf_cc_test( "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:test_utils", - "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xla/service/hlo_cse_test.cc b/tensorflow/compiler/xla/service/hlo_cse_test.cc index 7c4626e78a..3601a790c4 100644 --- a/tensorflow/compiler/xla/service/hlo_cse_test.cc +++ b/tensorflow/compiler/xla/service/hlo_cse_test.cc @@ -79,12 +79,12 @@ TEST_F(HloCseTest, CombineTwoConstantsDifferentLayoutsAndInsensitive) { // Test that two identical constants with different layouts are commoned if // the pass is not layout sensitive. auto builder = HloComputation::Builder(TestName()); - auto constant1 = builder.AddInstruction(HloInstruction::CreateConstant( - test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, - /*minor_to_major=*/{0, 1}))); - auto constant2 = builder.AddInstruction(HloInstruction::CreateConstant( - test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, - /*minor_to_major=*/{1, 0}))); + auto constant1 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2WithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({0, 1})))); + auto constant2 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2WithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({1, 0})))); auto add = builder.AddInstruction(HloInstruction::CreateBinary( constant1->shape(), HloOpcode::kAdd, constant1, constant2)); @@ -111,12 +111,12 @@ TEST_F(HloCseTest, CombineTwoConstantsDifferentLayoutsAndSensitive) { // Test that two identical constants with different layouts are *not* commoned // if the pass is layout sensitive. auto builder = HloComputation::Builder(TestName()); - auto constant1 = builder.AddInstruction(HloInstruction::CreateConstant( - test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, - /*minor_to_major=*/{0, 1}))); - auto constant2 = builder.AddInstruction(HloInstruction::CreateConstant( - test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, - /*minor_to_major=*/{1, 0}))); + auto constant1 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2WithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({0, 1})))); + auto constant2 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2WithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({1, 0})))); auto add = builder.AddInstruction(HloInstruction::CreateBinary( constant1->shape(), HloOpcode::kAdd, constant1, constant2)); diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc index c39ff52230..d51c0d1dfb 100644 --- a/tensorflow/compiler/xla/service/layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc @@ -131,10 +131,10 @@ TEST_F(LayoutAssignmentTest, FusionInstruction) { std::vector> minor_to_majors = {{0, 1}, {1, 0}}; for (auto& minor_to_major : minor_to_majors) { auto builder = HloComputation::Builder(TestName()); - auto constant_literal1 = test_utils::CreateR2LiteralWithLayout( - {{1.0, 2.0}, {3.0, 4.0}}, minor_to_major); - auto constant_literal2 = test_utils::CreateR2LiteralWithLayout( - {{5.0, 6.0}, {7.0, 8.0}}, minor_to_major); + auto constant_literal1 = Literal::CreateR2WithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout(minor_to_major)); + auto constant_literal2 = Literal::CreateR2WithLayout( + {{5.0, 6.0}, {7.0, 8.0}}, LayoutUtil::MakeLayout(minor_to_major)); Shape ashape = constant_literal1->shape(); auto constant1 = builder.AddInstruction( @@ -181,12 +181,12 @@ TEST_F(LayoutAssignmentTest, TupleLayout) { // Verify the layouts of a tuple are assigned properly (the element layouts // match their source). auto builder = HloComputation::Builder(TestName()); - auto constant0 = builder.AddInstruction(HloInstruction::CreateConstant( - test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, - {0, 1}))); - auto constant1 = builder.AddInstruction(HloInstruction::CreateConstant( - test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, - {1, 0}))); + auto constant0 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2WithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({0, 1})))); + auto constant1 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2WithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({1, 0})))); auto tuple = builder.AddInstruction( HloInstruction::CreateTuple({constant0, constant1})); @@ -218,12 +218,12 @@ TEST_F(LayoutAssignmentTest, TupleLayout) { TEST_F(LayoutAssignmentTest, TupleSelect) { // Verify layouts of a select with tuple operands is assigned properly. auto builder = HloComputation::Builder(TestName()); - auto constant0 = builder.AddInstruction(HloInstruction::CreateConstant( - test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, - {0, 1}))); - auto constant1 = builder.AddInstruction(HloInstruction::CreateConstant( - test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, - {1, 0}))); + auto constant0 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2WithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({0, 1})))); + auto constant1 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2WithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({1, 0})))); auto tuple0 = builder.AddInstruction( HloInstruction::CreateTuple({constant0, constant1})); auto tuple1 = builder.AddInstruction( diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 4e1be24b61..2333a30ad5 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -61,13 +61,14 @@ generate_backend_test_macros() cc_library( name = "test_utils", - testonly = True, + srcs = ["test_utils.cc"], hdrs = ["test_utils.h"], deps = [ "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:hlo", "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h index 7cfc276ec1..2c37466ff2 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.h +++ b/tensorflow/compiler/xla/tests/client_library_test_base.h @@ -469,8 +469,7 @@ template std::vector ClientLibraryTestBase::CreatePseudorandomR1( const int width, NativeT min_value, NativeT max_value, uint32 seed) { std::vector result(width); - test_utils::PseudorandomGenerator generator(min_value, max_value, - seed); + PseudorandomGenerator generator(min_value, max_value, seed); for (int i = 0; i < width; ++i) { result[i] = generator.get(); } @@ -482,8 +481,7 @@ std::unique_ptr> ClientLibraryTestBase::CreatePseudorandomR2( const int rows, const int cols, NativeT min_value, NativeT max_value, uint32 seed) { auto result = MakeUnique>(rows, cols); - test_utils::PseudorandomGenerator generator(min_value, max_value, - seed); + PseudorandomGenerator generator(min_value, max_value, seed); for (int y = 0; y < rows; ++y) { for (int x = 0; x < cols; ++x) { (*result)(y, x) = generator.get(); diff --git a/tensorflow/compiler/xla/tests/client_test.cc b/tensorflow/compiler/xla/tests/client_test.cc index 0853feeebd..183bcf1dd3 100644 --- a/tensorflow/compiler/xla/tests/client_test.cc +++ b/tensorflow/compiler/xla/tests/client_test.cc @@ -54,8 +54,8 @@ TEST_F(ClientTest, ExecuteWithLayout) { .ConsumeValueOrDie(); std::unique_ptr expected_literal = - test_utils::CreateR2LiteralWithLayout({{11, 22}, {33, 44}}, - transfer_layout); + Literal::CreateR2WithLayout( + {{11, 22}, {33, 44}}, LayoutUtil::MakeLayout(transfer_layout)); auto computed = client_->Transfer(*data, &expected_literal->shape()); diff --git a/tensorflow/compiler/xla/tests/compilation_cache_test.cc b/tensorflow/compiler/xla/tests/compilation_cache_test.cc index 707e439245..0f780fa87e 100644 --- a/tensorflow/compiler/xla/tests/compilation_cache_test.cc +++ b/tensorflow/compiler/xla/tests/compilation_cache_test.cc @@ -138,13 +138,13 @@ XLA_TEST_F(CompilationCacheTest, DifferentParameterLayouts) { // layouts. Use these arrays as parameters to a simple computation. If the // layout of the array changes then computation should be recompiled (cache // miss). - auto rowmaj_array = test_utils::CreateR2LiteralWithLayout( - {{1.0f, 2.0f}, {3.0f, 4.0f}}, /*minor_to_major=*/{1, 0}); + auto rowmaj_array = Literal::CreateR2WithLayout( + {{1.0f, 2.0f}, {3.0f, 4.0f}}, LayoutUtil::MakeLayout({1, 0})); auto rowmaj_handle = client_->TransferToServer(*rowmaj_array).ConsumeValueOrDie(); - auto colmaj_array = test_utils::CreateR2LiteralWithLayout( - {{1.0f, 2.0f}, {3.0f, 4.0f}}, /*minor_to_major=*/{0, 1}); + auto colmaj_array = Literal::CreateR2WithLayout( + {{1.0f, 2.0f}, {3.0f, 4.0f}}, LayoutUtil::MakeLayout({0, 1})); auto colmaj_handle = client_->TransferToServer(*colmaj_array).ConsumeValueOrDie(); diff --git a/tensorflow/compiler/xla/tests/compute_constant_test.cc b/tensorflow/compiler/xla/tests/compute_constant_test.cc index d423c78476..5226a78386 100644 --- a/tensorflow/compiler/xla/tests/compute_constant_test.cc +++ b/tensorflow/compiler/xla/tests/compute_constant_test.cc @@ -264,8 +264,8 @@ XLA_TEST_F(ComputeConstantTest, Layout) { ASSERT_TRUE(computed.ok()) << computed.status(); std::unique_ptr expected_literal = - test_utils::CreateR2LiteralWithLayout({{11, 22}, {33, 44}}, - layout); + Literal::CreateR2WithLayout({{11, 22}, {33, 44}}, + LayoutUtil::MakeLayout(layout)); LiteralTestUtil::AssertEqualShapesAndLayouts( expected_literal->shape(), computed.ValueOrDie()->shape()); LiteralTestUtil::ExpectEqual(*expected_literal, *computed.ValueOrDie()); diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index c4e422b506..b72dd2707c 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -177,15 +177,15 @@ void DotOperationTest::TestSquareMatrixDot(bool lhs_row_major, bool rhs_row_major) { auto lhs_handle = client_ - ->TransferToServer(*test_utils::CreateR2LiteralWithLayout( + ->TransferToServer(*Literal::CreateR2WithLayout( {{1.0, 2.0}, {3.0, -4.0}}, - MinorToMajorForIsRowMajor(lhs_row_major))) + LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(lhs_row_major)))) .ConsumeValueOrDie(); auto rhs_handle = client_ - ->TransferToServer(*test_utils::CreateR2LiteralWithLayout( + ->TransferToServer(*Literal::CreateR2WithLayout( {{1.0, 6.0}, {7.0, -4.0}}, - MinorToMajorForIsRowMajor(rhs_row_major))) + LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(rhs_row_major)))) .ConsumeValueOrDie(); ComputationBuilder builder(client_, TestName()); @@ -362,15 +362,15 @@ void DotOperationTest::TestNonsquareMatrixDot(bool lhs_row_major, bool rhs_row_major) { auto lhs_handle = client_ - ->TransferToServer(*test_utils::CreateR2LiteralWithLayout( + ->TransferToServer(*Literal::CreateR2WithLayout( {{1.0, 2.0, 3.0}, {3.0, -4.0, -1.0}}, - MinorToMajorForIsRowMajor(lhs_row_major))) + LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(lhs_row_major)))) .ConsumeValueOrDie(); auto rhs_handle = client_ - ->TransferToServer(*test_utils::CreateR2LiteralWithLayout( + ->TransferToServer(*Literal::CreateR2WithLayout( {{1.0, 6.0}, {2.0, 3.0}, {7.0, -4.0}}, - MinorToMajorForIsRowMajor(rhs_row_major))) + LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(rhs_row_major)))) .ConsumeValueOrDie(); ComputationBuilder builder(client_, TestName()); @@ -420,13 +420,14 @@ XLA_TEST_F(DotOperationTest, NonsquareMatrixDotC64) { XLA_TEST_F(DotOperationTest, MatrixVectorC64) { auto lhs_handle = client_ - ->TransferToServer(*test_utils::CreateR2LiteralWithLayout( - {{1.0, 2.0, 3.0, -4.0}}, {1, 0})) + ->TransferToServer(*Literal::CreateR2WithLayout( + {{1.0, 2.0, 3.0, -4.0}}, LayoutUtil::MakeLayout({1, 0}))) .ConsumeValueOrDie(); auto rhs_handle = client_ - ->TransferToServer(*test_utils::CreateR2LiteralWithLayout( - {{1.0, 1.0}, {2.0, 2.0}, {3.0, 3.0}, {-4.0, 4.0}}, {1, 0})) + ->TransferToServer(*Literal::CreateR2WithLayout( + {{1.0, 1.0}, {2.0, 2.0}, {3.0, 3.0}, {-4.0, 4.0}}, + LayoutUtil::MakeLayout({1, 0}))) .ConsumeValueOrDie(); ComputationBuilder builder(client_, TestName()); diff --git a/tensorflow/compiler/xla/tests/local_client_execute_test.cc b/tensorflow/compiler/xla/tests/local_client_execute_test.cc index 329b53012f..a196e250d1 100644 --- a/tensorflow/compiler/xla/tests/local_client_execute_test.cc +++ b/tensorflow/compiler/xla/tests/local_client_execute_test.cc @@ -136,16 +136,14 @@ XLA_TEST_F(LocalClientExecuteTest, AddArraysWithDifferentInputLayouts) { auto computation = builder.Build().ConsumeValueOrDie(); // Create x as a col-major array. - auto x_array = LiteralToShapedBuffer( - *test_utils::CreateR2LiteralWithLayout({{1.0f, 2.0f}, {3.0f, 4.0f}}, - /*minor_to_major=*/{0, 1})); + auto x_array = LiteralToShapedBuffer(*Literal::CreateR2WithLayout( + {{1.0f, 2.0f}, {3.0f, 4.0f}}, LayoutUtil::MakeLayout({0, 1}))); EXPECT_TRUE(LayoutUtil::Equal(x_array->shape().layout(), LayoutUtil::MakeLayout({0, 1}))); // Create y as a row-major array. - auto y_array = LiteralToShapedBuffer( - *test_utils::CreateR2LiteralWithLayout({{10.0f, 20.0f}, {30.0f, 40.0f}}, - /*minor_to_major=*/{1, 0})); + auto y_array = LiteralToShapedBuffer(*Literal::CreateR2WithLayout( + {{10.0f, 20.0f}, {30.0f, 40.0f}}, LayoutUtil::MakeLayout({1, 0}))); EXPECT_TRUE(LayoutUtil::Equal(y_array->shape().layout(), LayoutUtil::MakeLayout({1, 0}))); diff --git a/tensorflow/compiler/xla/tests/map_test.cc b/tensorflow/compiler/xla/tests/map_test.cc index 2ef392508d..2b0f7e6e80 100644 --- a/tensorflow/compiler/xla/tests/map_test.cc +++ b/tensorflow/compiler/xla/tests/map_test.cc @@ -405,13 +405,13 @@ TEST_F(MapTest, MapBinaryAdder) { // for Map that used to fail in shape inference (b/28989438). XLA_TEST_F(MapTest, AddWithMixedLayouts) { ComputationBuilder builder(client_, TestName()); - std::unique_ptr param0_literal = - test_utils::CreateR2LiteralWithLayout({{1, 2}, {3, 4}}, {1, 0}); + std::unique_ptr param0_literal = Literal::CreateR2WithLayout( + {{1, 2}, {3, 4}}, LayoutUtil::MakeLayout({1, 0})); std::unique_ptr param0_data = client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); - std::unique_ptr param1_literal = - test_utils::CreateR2LiteralWithLayout({{10, 20}, {30, 40}}, {0, 1}); + std::unique_ptr param1_literal = Literal::CreateR2WithLayout( + {{10, 20}, {30, 40}}, LayoutUtil::MakeLayout({0, 1})); std::unique_ptr param1_data = client_->TransferToServer(*param1_literal).ConsumeValueOrDie(); diff --git a/tensorflow/compiler/xla/tests/test_utils.cc b/tensorflow/compiler/xla/tests/test_utils.cc new file mode 100644 index 0000000000..cdd3d66bbb --- /dev/null +++ b/tensorflow/compiler/xla/tests/test_utils.cc @@ -0,0 +1,120 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/tests/test_utils.h" + +#include "tensorflow/compiler/xla/primitive_util.h" + +namespace xla { + +namespace { + +template +void PopulateWithRandomFloatingPointData(Literal* literal) { + CHECK_EQ(literal->shape().element_type(), + primitive_util::NativeToPrimitiveType()); + std::minstd_rand0 engine; + std::uniform_real_distribution generator(0.0f, 1.0f); + TF_CHECK_OK(literal->Populate( + [&](tensorflow::gtl::ArraySlice /*indices*/) { + return generator(engine); + })); +} + +template +void PopulateWithRandomIntegralData(Literal* literal) { + CHECK_EQ(literal->shape().element_type(), + primitive_util::NativeToPrimitiveType()); + std::minstd_rand0 engine; + std::uniform_int_distribution generator( + std::numeric_limits::lowest(), std::numeric_limits::max()); + TF_CHECK_OK(literal->Populate( + [&](tensorflow::gtl::ArraySlice /*indices*/) { + return generator(engine); + })); +} + +} // namespace + +StatusOr> MakeFakeLiteral(const Shape& shape) { + if (ShapeUtil::IsTuple(shape)) { + std::vector> elements; + for (const Shape& element_shape : shape.tuple_shapes()) { + TF_ASSIGN_OR_RETURN(std::unique_ptr element, + MakeFakeLiteral(element_shape)); + elements.push_back(std::move(element)); + } + return Literal::MakeTupleOwned(std::move(elements)); + } + std::unique_ptr literal = Literal::CreateFromShape(shape); + switch (shape.element_type()) { + case F32: + PopulateWithRandomFloatingPointData(literal.get()); + break; + case F64: + PopulateWithRandomFloatingPointData(literal.get()); + break; + case S8: + PopulateWithRandomIntegralData(literal.get()); + break; + case U8: + PopulateWithRandomIntegralData(literal.get()); + break; + case S16: + PopulateWithRandomIntegralData(literal.get()); + break; + case U16: + PopulateWithRandomIntegralData(literal.get()); + break; + case S32: + PopulateWithRandomIntegralData(literal.get()); + break; + case U32: + PopulateWithRandomIntegralData(literal.get()); + break; + case S64: + PopulateWithRandomIntegralData(literal.get()); + break; + case U64: + PopulateWithRandomIntegralData(literal.get()); + break; + case PRED: { + std::uniform_int_distribution generator(0, 1); + std::minstd_rand0 engine; + TF_CHECK_OK(literal->Populate( + [&](tensorflow::gtl::ArraySlice /*indices*/) { + return generator(engine); + })); + break; + } + default: + return Unimplemented("Unsupported type for fake literal generation: %s", + ShapeUtil::HumanString(shape).c_str()); + } + return std::move(literal); +} + +StatusOr>> MakeFakeArguments( + const HloModule& module) { + std::vector> arguments; + for (const ShapeLayout& shape_layout : + module.config().entry_computation_layout().parameter_layouts()) { + TF_ASSIGN_OR_RETURN(auto literal, MakeFakeLiteral(shape_layout.shape())); + arguments.push_back(std::move(literal)); + } + return std::move(arguments); +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/tests/test_utils.h b/tensorflow/compiler/xla/tests/test_utils.h index f3a522b05e..12d5255fce 100644 --- a/tensorflow/compiler/xla/tests/test_utils.h +++ b/tensorflow/compiler/xla/tests/test_utils.h @@ -23,12 +23,12 @@ limitations under the License. #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/platform/types.h" namespace xla { -namespace test_utils { // A class which generates pseudorandom numbers of a given type within a given // range. Not cryptographically secure and likely not perfectly evenly @@ -53,63 +53,15 @@ class PseudorandomGenerator { std::mt19937 generator_; }; -// Convenience function for creating a rank-2 array with arbitrary layout. -template -std::unique_ptr CreateR2LiteralWithLayout( - std::initializer_list> values, - tensorflow::gtl::ArraySlice minor_to_major) { - auto literal = MakeUnique(); - const int64 d0 = values.size(); - const int64 d1 = values.begin()->size(); - literal.get()->PopulateWithValue(0, {d0, d1}); - *literal->mutable_shape()->mutable_layout() = - LayoutUtil::MakeLayout(minor_to_major); - TF_CHECK_OK(ShapeUtil::ValidateShape(literal->shape())); - - int64 dim0 = 0; - for (auto inner_list : values) { - int64 dim1 = 0; - for (auto value : inner_list) { - literal.get()->Set({dim0, dim1}, value); - ++dim1; - } - ++dim0; - } - return literal; -} +// Generates fake data in a literal of the given shape, or returns an error +// status if the element type is currently unhandled for fake data generation. +StatusOr> MakeFakeLiteral(const Shape& shape); -// Convenience function for creating a rank-3 array with arbitrary layout. -template -std::unique_ptr CreateR3LiteralWithLayout( - std::initializer_list>> - values, - tensorflow::gtl::ArraySlice minor_to_major) { - auto literal = MakeUnique(); - const int64 d0 = values.size(); - const int64 d1 = values.begin()->size(); - const int64 d2 = values.begin()->begin()->size(); - literal.get()->PopulateWithValue(0, {d0, d1, d2}); - *literal->mutable_shape()->mutable_layout() = - LayoutUtil::MakeLayout(minor_to_major); - TF_CHECK_OK(ShapeUtil::ValidateShape(literal->shape())); - - int64 dim0 = 0; - for (auto inner_list : values) { - int64 dim1 = 0; - for (auto inner_inner_list : inner_list) { - int64 dim2 = 0; - for (auto value : inner_inner_list) { - literal.get()->Set({dim0, dim1, dim2}, value); - ++dim2; - } - ++dim1; - } - ++dim0; - } - return literal; -} +// Generates a vector of arguments containing fake data. The number, shape and +// layout of the arguments is appropriate for given HLO module. +StatusOr>> MakeFakeArguments( + const HloModule& module); -} // namespace test_utils } // namespace xla #endif // TENSORFLOW_COMPILER_XLA_TESTS_TEST_UTILS_H_ diff --git a/tensorflow/compiler/xla/tools/BUILD b/tensorflow/compiler/xla/tools/BUILD index 759921dce5..091fa0c3ec 100644 --- a/tensorflow/compiler/xla/tools/BUILD +++ b/tensorflow/compiler/xla/tools/BUILD @@ -88,6 +88,7 @@ cc_library( "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/lib:testing", "//tensorflow/compiler/xla/service:session_proto", + "//tensorflow/compiler/xla/tests:test_utils", "//tensorflow/core:framework_internal", "//tensorflow/core:lib", ], diff --git a/tensorflow/compiler/xla/tools/replay_computation.cc b/tensorflow/compiler/xla/tools/replay_computation.cc index 89b26b8916..503e7d456e 100644 --- a/tensorflow/compiler/xla/tools/replay_computation.cc +++ b/tensorflow/compiler/xla/tools/replay_computation.cc @@ -45,6 +45,7 @@ limitations under the License. #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/test_utils.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/threadpool.h" -- GitLab From 481739daad1bc92225da29bb7a65ced6a9a52303 Mon Sep 17 00:00:00 2001 From: Olivia Nordquist Date: Wed, 8 Nov 2017 15:57:27 -0800 Subject: [PATCH 0073/1801] allows tf.Print to print empty data list and changes a noop test in function_test.py to verify that it doesn't raise a ValueError as an empty list would have previously PiperOrigin-RevId: 175079527 --- tensorflow/core/ops/logging_ops.cc | 2 +- tensorflow/python/framework/function_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/ops/logging_ops.cc b/tensorflow/core/ops/logging_ops.cc index 11cb9861a3..e6995821df 100644 --- a/tensorflow/core/ops/logging_ops.cc +++ b/tensorflow/core/ops/logging_ops.cc @@ -43,7 +43,7 @@ REGISTER_OP("Print") .Output("output: T") .SetIsStateful() .Attr("T: type") - .Attr("U: list(type)") + .Attr("U: list(type) >= 0") .Attr("message: string = ''") .Attr("first_n: int = -1") .Attr("summarize: int = 3") diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py index 36b0737cfc..ba43e9199b 100644 --- a/tensorflow/python/framework/function_test.py +++ b/tensorflow/python/framework/function_test.py @@ -370,7 +370,7 @@ class FunctionTest(test.TestCase): @function.Defun(dtypes.float32) def Foo(x): - y = logging_ops.Print(x, [x], "Hello") + y = logging_ops.Print(x, [], "Hello") with ops.control_dependencies([y]): z = control_flow_ops.no_op() with ops.control_dependencies([z]): -- GitLab From 1ff2d1377753c1ae74eca7b0705fce2775195cbe Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 15:59:24 -0800 Subject: [PATCH 0074/1801] Add Baseline Estimators to core TensorFlow PiperOrigin-RevId: 175079784 --- tensorflow/python/estimator/BUILD | 65 + .../python/estimator/canned/baseline.py | 349 ++++ .../python/estimator/canned/baseline_test.py | 1545 +++++++++++++++++ tensorflow/python/estimator/estimator_lib.py | 4 + ...rflow.estimator.-baseline-classifier.pbtxt | 54 + ...orflow.estimator.-baseline-regressor.pbtxt | 54 + .../api/golden/tensorflow.estimator.pbtxt | 8 + 7 files changed, 2079 insertions(+) create mode 100644 tensorflow/python/estimator/canned/baseline.py create mode 100644 tensorflow/python/estimator/canned/baseline_test.py create mode 100644 tensorflow/tools/api/golden/tensorflow.estimator.-baseline-classifier.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.estimator.-baseline-regressor.pbtxt diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index 26f1fd888a..dba7761700 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -25,6 +25,7 @@ py_library( srcs = ["estimator_lib.py"], srcs_version = "PY2AND3", deps = [ + ":baseline", ":dnn", ":dnn_linear_combined", ":estimator", @@ -186,6 +187,70 @@ py_test( ], ) +py_library( + name = "baseline", + srcs = ["canned/baseline.py"], + srcs_version = "PY2AND3", + deps = [ + ":estimator", + ":head", + ":model_fn", + ":optimizers", + "//tensorflow/python:init_ops", + "//tensorflow/python:layers", + "//tensorflow/python:nn", + "//tensorflow/python:partitioned_variables", + "//tensorflow/python:summary", + "//tensorflow/python:training", + "//tensorflow/python:variable_scope", + "//tensorflow/python/feature_column", + "@six_archive//:six", + ], +) + +py_test( + name = "baseline_test", + size = "medium", + srcs = ["canned/baseline_test.py"], + srcs_version = "PY2AND3", + tags = [ + "no_pip", + "notsan", # b/67510291 + ], + deps = [ + ":baseline", + ":estimator", + ":export_export", + ":metric_keys", + ":numpy_io", + ":pandas_io", + ":run_config", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:check_ops", + "//tensorflow/python:client", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:data_flow_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:parsing_ops", + "//tensorflow/python:platform", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python:state_ops", + "//tensorflow/python:summary", + "//tensorflow/python:training", + "//tensorflow/python:variable_scope", + "//tensorflow/python:variables", + "//tensorflow/python/feature_column", + "//third_party/py/numpy", + "//third_party/py/pandas", + "@six_archive//:six", + ], +) + py_library( name = "dnn", srcs = ["canned/dnn.py"], diff --git a/tensorflow/python/estimator/canned/baseline.py b/tensorflow/python/estimator/canned/baseline.py new file mode 100644 index 0000000000..96e4ecd29f --- /dev/null +++ b/tensorflow/python/estimator/canned/baseline.py @@ -0,0 +1,349 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Baseline estimators. + +Baseline estimators are bias-only estimators that can be used for debugging +and as simple baselines. + +Example: + +``` +# Build BaselineClassifier +classifier = BaselineClassifier(n_classes=3) + +# Input builders +def input_fn_train: # returns x, y (where y represents label's class index). + pass + +def input_fn_eval: # returns x, y (where y represents label's class index). + pass + +# Fit model. +classifier.train(input_fn=input_fn_train) + +# Evaluate cross entropy between the test and train labels. +loss = classifier.evaluate(input_fn=input_fn_eval)["loss"] + +# predict outputs the probability distribution of the classes as seen in +# training. +predictions = classifier.predict(new_samples) +``` +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six + +from tensorflow.python.estimator import estimator +from tensorflow.python.estimator.canned import head as head_lib +from tensorflow.python.estimator.canned import optimizers +from tensorflow.python.feature_column import feature_column as feature_column_lib +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.training import training_util + +# The default learning rate of 0.3 is a historical artifact of the initial +# implementation, but seems a reasonable choice. +_LEARNING_RATE = 0.3 + + +def _get_weight_column_key(weight_column): + if weight_column is None: + return None + if isinstance(weight_column, six.string_types): + return weight_column + if not isinstance(weight_column, feature_column_lib._NumericColumn): # pylint: disable=protected-access + raise TypeError('Weight column must be either a string or _NumericColumn.' + ' Given type: {}.'.format(type(weight_column))) + return weight_column.key() + + +def _baseline_logit_fn_builder(num_outputs, weight_column=None): + """Function builder for a baseline logit_fn. + + Args: + num_outputs: Number of outputs for the model. + weight_column: A string or a `_NumericColumn` created by + `tf.feature_column.numeric_column` defining feature column representing + weights. It will be multiplied by the loss of the example. + Returns: + A logit_fn (see below). + """ + + def baseline_logit_fn(features): + """Baseline model logit_fn. + + The baseline model simply learns a bias, so the output logits are a + `Variable` with one weight for each output that learns the bias for the + corresponding output. + + Args: + features: The first item returned from the `input_fn` passed to `train`, + `evaluate`, and `predict`. This should be a single `Tensor` or dict with + `Tensor` values. + Returns: + A `Tensor` representing the logits. + """ + size_checks = [] + batch_size = None + + weight_column_key = _get_weight_column_key(weight_column) + + # The first dimension is assumed to be a batch size and must be consistent + # among all of the features. + for key, feature in features.items(): + # Skip weight_column to ensure we don't add size checks to it. + # These would introduce a dependency on the weight at serving time. + if key == weight_column_key: + continue + first_dim = array_ops.shape(feature)[0] + if batch_size is None: + batch_size = first_dim + else: + size_checks.append(check_ops.assert_equal(batch_size, first_dim)) + + with ops.control_dependencies(size_checks): + with variable_scope.variable_scope('baseline'): + bias = variable_scope.get_variable('bias', shape=[num_outputs], + initializer=init_ops.Zeros) + return math_ops.multiply(bias, array_ops.ones([batch_size, + num_outputs])) + + return baseline_logit_fn + + +def _baseline_model_fn(features, labels, mode, head, optimizer, + weight_column=None, config=None): + """Model_fn for baseline models. + + Args: + features: `Tensor` or dict of `Tensor` (depends on data passed to `train`). + labels: `Tensor` of labels that are compatible with the `Head` instance. + mode: Defines whether this is training, evaluation or prediction. + See `ModeKeys`. + head: A `Head` instance. + optimizer: String, `tf.Optimizer` object, or callable that creates the + optimizer to use for training. If not specified, will use `FtrlOptimizer` + with a default learning rate of 0.3. + weight_column: A string or a `_NumericColumn` created by + `tf.feature_column.numeric_column` defining feature column representing + weights. It will be multiplied by the loss of the example. + config: `RunConfig` object to configure the runtime settings. + + Raises: + KeyError: If weight column is specified but not present. + ValueError: If features is an empty dictionary. + + Returns: + An `EstimatorSpec` instance. + """ + del config # Unused. + + logit_fn = _baseline_logit_fn_builder(head.logits_dimension, weight_column) + logits = logit_fn(features) + + def train_op_fn(loss): + opt = optimizers.get_optimizer_instance( + optimizer, learning_rate=_LEARNING_RATE) + return opt.minimize(loss, global_step=training_util.get_global_step()) + + return head.create_estimator_spec( + features=features, + mode=mode, + logits=logits, + labels=labels, + train_op_fn=train_op_fn) + + +class BaselineClassifier(estimator.Estimator): + """A classifier that can establish a simple baseline. + + This classifier ignores feature values and will learn to predict the average + value of each label. For single-label problems, this will predict the + probability distribution of the classes as seen in the labels. For multi-label + problems, this will predict the fraction of examples that are positive for + each class. + + Example: + + ```python + + # Build BaselineClassifier + classifier = BaselineClassifier(n_classes=3) + + # Input builders + def input_fn_train: # returns x, y (where y represents label's class index). + pass + + def input_fn_eval: # returns x, y (where y represents label's class index). + pass + + # Fit model. + classifier.train(input_fn=input_fn_train) + + # Evaluate cross entropy between the test and train labels. + loss = classifier.evaluate(input_fn=input_fn_eval)["loss"] + + # predict outputs the probability distribution of the classes as seen in + # training. + predictions = classifier.predict(new_samples) + + ``` + + Input of `train` and `evaluate` should have following features, + otherwise there will be a `KeyError`: + + * if `weight_column` is not `None`, a feature with + `key=weight_column` whose value is a `Tensor`. + """ + + def __init__(self, + model_dir=None, + n_classes=2, + weight_column=None, + label_vocabulary=None, + optimizer='Ftrl', + config=None): + """Initializes a BaselineClassifier instance. + + Args: + model_dir: Directory to save model parameters, graph and etc. This can + also be used to load checkpoints from the directory into a estimator to + continue training a previously saved model. + n_classes: number of label classes. Default is binary classification. + It must be greater than 1. Note: Class labels are integers representing + the class index (i.e. values from 0 to n_classes-1). For arbitrary + label values (e.g. string labels), convert to class indices first. + weight_column: A string or a `_NumericColumn` created by + `tf.feature_column.numeric_column` defining feature column representing + weights. It will be multiplied by the loss of the example. + label_vocabulary: Optional list of strings with size `[n_classes]` + defining the label vocabulary. Only supported for `n_classes` > 2. + optimizer: String, `tf.Optimizer` object, or callable that creates the + optimizer to use for training. If not specified, will use + `FtrlOptimizer` with a default learning rate of 0.3. + config: `RunConfig` object to configure the runtime settings. + Returns: + A `BaselineClassifier` estimator. + + Raises: + ValueError: If `n_classes` < 2. + """ + if n_classes == 2: + head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( # pylint: disable=protected-access + weight_column=weight_column, + label_vocabulary=label_vocabulary) + else: + head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( # pylint: disable=protected-access + n_classes, weight_column=weight_column, + label_vocabulary=label_vocabulary) + def _model_fn(features, labels, mode, config): + return _baseline_model_fn( + features=features, + labels=labels, + mode=mode, + head=head, + optimizer=optimizer, + weight_column=weight_column, + config=config) + super(BaselineClassifier, self).__init__( + model_fn=_model_fn, + model_dir=model_dir, + config=config) + + +class BaselineRegressor(estimator.Estimator): + """A regressor that can establish a simple baseline. + + This regressor ignores feature values and will learn to predict the average + value of each label. + + Example: + + ```python + + # Build BaselineRegressor + regressor = BaselineRegressor() + + # Input builders + def input_fn_train: # returns x, y (where y is the label). + pass + + def input_fn_eval: # returns x, y (where y is the label). + pass + + # Fit model. + regressor.train(input_fn=input_fn_train) + + # Evaluate squared-loss between the test and train targets. + loss = regressor.evaluate(input_fn=input_fn_eval)["loss"] + + # predict outputs the mean value seen during training. + predictions = regressor.predict(new_samples) + ``` + + Input of `train` and `evaluate` should have following features, + otherwise there will be a `KeyError`: + + * if `weight_column` is not `None`, a feature with + `key=weight_column` whose value is a `Tensor`. + """ + + def __init__(self, + model_dir=None, + label_dimension=1, + weight_column=None, + optimizer='Ftrl', + config=None): + """Initializes a BaselineRegressor instance. + + Args: + model_dir: Directory to save model parameters, graph and etc. This can + also be used to load checkpoints from the directory into a estimator to + continue training a previously saved model. + label_dimension: Number of regression targets per example. This is the + size of the last dimension of the labels and logits `Tensor` objects + (typically, these have shape `[batch_size, label_dimension]`). + weight_column: A string or a `_NumericColumn` created by + `tf.feature_column.numeric_column` defining feature column representing + weights. It will be multiplied by the loss of the example. + optimizer: String, `tf.Optimizer` object, or callable that creates the + optimizer to use for training. If not specified, will use + `FtrlOptimizer` with a default learning rate of 0.3. + config: `RunConfig` object to configure the runtime settings. + Returns: + A `BaselineRegressor` estimator. + """ + + head = head_lib._regression_head_with_mean_squared_error_loss( # pylint: disable=protected-access + label_dimension=label_dimension, + weight_column=weight_column) + def _model_fn(features, labels, mode, config): + return _baseline_model_fn( + features=features, + labels=labels, + mode=mode, + head=head, + optimizer=optimizer, + config=config) + super(BaselineRegressor, self).__init__( + model_fn=_model_fn, + model_dir=model_dir, + config=config) diff --git a/tensorflow/python/estimator/canned/baseline_test.py b/tensorflow/python/estimator/canned/baseline_test.py new file mode 100644 index 0000000000..96639e88ea --- /dev/null +++ b/tensorflow/python/estimator/canned/baseline_test.py @@ -0,0 +1,1545 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for baseline.py.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import os +import shutil +import tempfile + +import numpy as np +import six + +from tensorflow.core.example import example_pb2 +from tensorflow.core.example import feature_pb2 +from tensorflow.python.client import session as tf_session +from tensorflow.python.estimator.canned import baseline +from tensorflow.python.estimator.canned import metric_keys +from tensorflow.python.estimator.export import export +from tensorflow.python.estimator.inputs import numpy_io +from tensorflow.python.estimator.inputs import pandas_io +from tensorflow.python.feature_column import feature_column as feature_column_lib +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import data_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import parsing_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables +from tensorflow.python.platform import gfile +from tensorflow.python.platform import test +from tensorflow.python.summary.writer import writer_cache +from tensorflow.python.training import checkpoint_utils +from tensorflow.python.training import input as input_lib +from tensorflow.python.training import optimizer +from tensorflow.python.training import queue_runner +from tensorflow.python.training import saver + + +try: + # pylint: disable=g-import-not-at-top + import pandas as pd + HAS_PANDAS = True +except IOError: + # Pandas writes a temporary file during import. If it fails, don't use pandas. + HAS_PANDAS = False +except ImportError: + HAS_PANDAS = False + +# pylint rules which are disabled by default for test files. +# pylint: disable=invalid-name,protected-access,missing-docstring + +# Names of variables created by model. +BIAS_NAME = 'baseline/bias' + + +def assert_close(expected, actual, rtol=1e-04, name='assert_close'): + with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope: + expected = ops.convert_to_tensor(expected, name='expected') + actual = ops.convert_to_tensor(actual, name='actual') + rdiff = math_ops.abs(expected - actual, 'diff') / math_ops.abs(expected) + rtol = ops.convert_to_tensor(rtol, name='rtol') + return check_ops.assert_less( + rdiff, + rtol, + data=('Condition expected =~ actual did not hold element-wise:' + 'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff, + 'rtol = ', rtol,), + name=scope) + + +def save_variables_to_ckpt(model_dir): + init_all_op = [variables.global_variables_initializer()] + with tf_session.Session() as sess: + sess.run(init_all_op) + saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt')) + + +def queue_parsed_features(feature_map): + tensors_to_enqueue = [] + keys = [] + for key, tensor in six.iteritems(feature_map): + keys.append(key) + tensors_to_enqueue.append(tensor) + queue_dtypes = [x.dtype for x in tensors_to_enqueue] + input_queue = data_flow_ops.FIFOQueue(capacity=100, dtypes=queue_dtypes) + queue_runner.add_queue_runner( + queue_runner.QueueRunner(input_queue, + [input_queue.enqueue(tensors_to_enqueue)])) + dequeued_tensors = input_queue.dequeue() + return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))} + + +def sorted_key_dict(unsorted_dict): + return {k: unsorted_dict[k] for k in sorted(unsorted_dict)} + + +def sigmoid(x): + return 1 / (1 + np.exp(-1.0 * x)) + + +def _baseline_regressor_fn(*args, **kwargs): + return baseline.BaselineRegressor(*args, **kwargs) + + +def _baseline_classifier_fn(*args, **kwargs): + return baseline.BaselineClassifier(*args, **kwargs) + + +# Tests for Baseline Regressor. + + +# TODO(b/36813849): Add tests with dynamic shape inputs using placeholders. +class BaselineRegressorEvaluationTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + writer_cache.FileWriterCache.clear() + shutil.rmtree(self._model_dir) + + def test_evaluation_for_simple_data(self): + with ops.Graph().as_default(): + variables.Variable([13.0], name=BIAS_NAME) + variables.Variable( + 100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir) + eval_metrics = baseline_regressor.evaluate( + input_fn=lambda: ({'age': ((1,),)}, ((10.,),)), steps=1) + + # Logit is bias = 13, while label is 10. Loss is 3**2 = 9. + self.assertDictEqual({ + metric_keys.MetricKeys.LOSS: 9., + metric_keys.MetricKeys.LOSS_MEAN: 9., + ops.GraphKeys.GLOBAL_STEP: 100 + }, eval_metrics) + + def test_evaluation_batch(self): + """Tests evaluation for batch_size==2.""" + with ops.Graph().as_default(): + variables.Variable([13.0], name=BIAS_NAME) + variables.Variable( + 100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir) + eval_metrics = baseline_regressor.evaluate( + input_fn=lambda: ({'age': ((1,), (1,))}, ((10.,), (10.,))), steps=1) + + # Logit is bias = 13, while label is 10. + # Loss per example is 3**2 = 9. + # Training loss is the sum over batch = 9 + 9 = 18 + # Average loss is the average over batch = 9 + self.assertDictEqual({ + metric_keys.MetricKeys.LOSS: 18., + metric_keys.MetricKeys.LOSS_MEAN: 9., + ops.GraphKeys.GLOBAL_STEP: 100 + }, eval_metrics) + + def test_evaluation_weights(self): + """Tests evaluation with weights.""" + with ops.Graph().as_default(): + variables.Variable([13.0], name=BIAS_NAME) + variables.Variable( + 100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + def _input_fn(): + features = {'age': ((1,), (1,)), 'weights': ((1.,), (2.,))} + labels = ((10.,), (10.,)) + return features, labels + + baseline_regressor = _baseline_regressor_fn( + weight_column='weights', + model_dir=self._model_dir) + eval_metrics = baseline_regressor.evaluate(input_fn=_input_fn, steps=1) + + # Logit is bias = 13, while label is 10. + # Loss per example is 3**2 = 9. + # Training loss is the weighted sum over batch = 9 + 2*9 = 27 + # average loss is the weighted average = 9 + 2*9 / (1 + 2) = 9 + self.assertDictEqual({ + metric_keys.MetricKeys.LOSS: 27., + metric_keys.MetricKeys.LOSS_MEAN: 9., + ops.GraphKeys.GLOBAL_STEP: 100 + }, eval_metrics) + + def test_evaluation_for_multi_dimensions(self): + label_dim = 2 + with ops.Graph().as_default(): + variables.Variable([46.0, 58.0], name=BIAS_NAME) + variables.Variable(100, name='global_step', dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + baseline_regressor = _baseline_regressor_fn( + label_dimension=label_dim, + model_dir=self._model_dir) + input_fn = numpy_io.numpy_input_fn( + x={ + 'age': np.array([[2., 4., 5.]]), + }, + y=np.array([[46., 58.]]), + batch_size=1, + num_epochs=None, + shuffle=False) + eval_metrics = baseline_regressor.evaluate(input_fn=input_fn, steps=1) + + self.assertItemsEqual( + (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN, + ops.GraphKeys.GLOBAL_STEP), eval_metrics.keys()) + + # Logit is bias which is [46, 58] + self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS]) + + +class BaselineRegressorPredictTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + writer_cache.FileWriterCache.clear() + shutil.rmtree(self._model_dir) + + def test_1d(self): + """Tests predict when all variables are one-dimensional.""" + with ops.Graph().as_default(): + variables.Variable([.2], name=BIAS_NAME) + variables.Variable(100, name='global_step', dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir) + + predict_input_fn = numpy_io.numpy_input_fn( + x={'x': np.array([[2.]])}, + y=None, + batch_size=1, + num_epochs=1, + shuffle=False) + predictions = baseline_regressor.predict(input_fn=predict_input_fn) + predicted_scores = list([x['predictions'] for x in predictions]) + # x * weight + bias = 2. * 10. + .2 = 20.2 + self.assertAllClose([[.2]], predicted_scores) + + def testMultiDim(self): + """Tests predict when all variables are multi-dimenstional.""" + batch_size = 2 + label_dimension = 3 + with ops.Graph().as_default(): + variables.Variable( # shape=[label_dimension] + [.2, .4, .6], name=BIAS_NAME) + variables.Variable(100, name='global_step', dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + baseline_regressor = _baseline_regressor_fn( + label_dimension=label_dimension, + model_dir=self._model_dir) + + predict_input_fn = numpy_io.numpy_input_fn( + # x shape=[batch_size, x_dim] + x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])}, + y=None, + batch_size=batch_size, + num_epochs=1, + shuffle=False) + predictions = baseline_regressor.predict(input_fn=predict_input_fn) + predicted_scores = list([x['predictions'] for x in predictions]) + # score = bias, shape=[batch_size, label_dimension] + self.assertAllClose([[0.2, 0.4, 0.6], [0.2, 0.4, 0.6]], + predicted_scores) + + +class BaselineRegressorIntegrationTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + writer_cache.FileWriterCache.clear() + shutil.rmtree(self._model_dir) + + def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, + input_dimension, label_dimension, prediction_length): + feature_columns = [ + feature_column_lib.numeric_column('x', shape=(input_dimension,)) + ] + est = _baseline_regressor_fn( + label_dimension=label_dimension, + model_dir=self._model_dir) + + # TRAIN + # learn y = x + est.train(train_input_fn, steps=200) + + # EVALUTE + scores = est.evaluate(eval_input_fn) + self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP]) + self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) + + # PREDICT + predictions = np.array( + [x['predictions'] for x in est.predict(predict_input_fn)]) + self.assertAllEqual((prediction_length, label_dimension), predictions.shape) + + # EXPORT + feature_spec = feature_column_lib.make_parse_example_spec(feature_columns) + serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( + feature_spec) + export_dir = est.export_savedmodel(tempfile.mkdtemp(), + serving_input_receiver_fn) + self.assertTrue(gfile.Exists(export_dir)) + + def test_numpy_input_fn(self): + """Tests complete flow with numpy_input_fn.""" + label_dimension = 2 + input_dimension = label_dimension + batch_size = 10 + prediction_length = batch_size + data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) + data = data.reshape(batch_size, label_dimension) + + train_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, + y=data, + batch_size=batch_size, + num_epochs=None, + shuffle=True) + eval_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, + y=data, + batch_size=batch_size, + num_epochs=1, + shuffle=False) + predict_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, + y=None, + batch_size=batch_size, + num_epochs=1, + shuffle=False) + + self._test_complete_flow( + train_input_fn=train_input_fn, + eval_input_fn=eval_input_fn, + predict_input_fn=predict_input_fn, + input_dimension=input_dimension, + label_dimension=label_dimension, + prediction_length=prediction_length) + + def test_pandas_input_fn(self): + """Tests complete flow with pandas_input_fn.""" + if not HAS_PANDAS: + return + + # Pandas DataFrame natually supports 1 dim data only. + label_dimension = 1 + input_dimension = label_dimension + batch_size = 10 + data = np.array([1., 2., 3., 4.], dtype=np.float32) + x = pd.DataFrame({'x': data}) + y = pd.Series(data) + prediction_length = 4 + + train_input_fn = pandas_io.pandas_input_fn( + x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) + eval_input_fn = pandas_io.pandas_input_fn( + x=x, y=y, batch_size=batch_size, shuffle=False) + predict_input_fn = pandas_io.pandas_input_fn( + x=x, batch_size=batch_size, shuffle=False) + + self._test_complete_flow( + train_input_fn=train_input_fn, + eval_input_fn=eval_input_fn, + predict_input_fn=predict_input_fn, + input_dimension=input_dimension, + label_dimension=label_dimension, + prediction_length=prediction_length) + + def test_input_fn_from_parse_example(self): + """Tests complete flow with input_fn constructed from parse_example.""" + label_dimension = 2 + input_dimension = label_dimension + batch_size = 10 + prediction_length = batch_size + data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) + data = data.reshape(batch_size, label_dimension) + + serialized_examples = [] + for datum in data: + example = example_pb2.Example(features=feature_pb2.Features( + feature={ + 'x': + feature_pb2.Feature(float_list=feature_pb2.FloatList( + value=datum)), + 'y': + feature_pb2.Feature(float_list=feature_pb2.FloatList( + value=datum[:label_dimension])), + })) + serialized_examples.append(example.SerializeToString()) + + feature_spec = { + 'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32), + 'y': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32), + } + + def _train_input_fn(): + feature_map = parsing_ops.parse_example(serialized_examples, feature_spec) + features = queue_parsed_features(feature_map) + labels = features.pop('y') + return features, labels + + def _eval_input_fn(): + feature_map = parsing_ops.parse_example( + input_lib.limit_epochs(serialized_examples, num_epochs=1), + feature_spec) + features = queue_parsed_features(feature_map) + labels = features.pop('y') + return features, labels + + def _predict_input_fn(): + feature_map = parsing_ops.parse_example( + input_lib.limit_epochs(serialized_examples, num_epochs=1), + feature_spec) + features = queue_parsed_features(feature_map) + features.pop('y') + return features, None + + self._test_complete_flow( + train_input_fn=_train_input_fn, + eval_input_fn=_eval_input_fn, + predict_input_fn=_predict_input_fn, + input_dimension=input_dimension, + label_dimension=label_dimension, + prediction_length=prediction_length) + + +class BaselineRegressorTrainingTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + writer_cache.FileWriterCache.clear() + shutil.rmtree(self._model_dir) + + def _mock_optimizer(self, expected_loss=None): + expected_var_names = [ + '%s:0' % BIAS_NAME + ] + + def _minimize(loss, global_step=None, var_list=None): + trainable_vars = var_list or ops.get_collection( + ops.GraphKeys.TRAINABLE_VARIABLES) + self.assertItemsEqual(expected_var_names, + [var.name for var in trainable_vars]) + + # Verify loss. We can't check the value directly, so we add an assert op. + self.assertEquals(0, loss.shape.ndims) + if expected_loss is None: + if global_step is not None: + return state_ops.assign_add(global_step, 1).op + return control_flow_ops.no_op() + assert_loss = assert_close( + math_ops.to_float(expected_loss, name='expected'), + loss, + name='assert_loss') + with ops.control_dependencies((assert_loss,)): + if global_step is not None: + return state_ops.assign_add(global_step, 1).op + return control_flow_ops.no_op() + + mock_optimizer = test.mock.NonCallableMock( + spec=optimizer.Optimizer, + wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer')) + mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize) + + # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks. + # So, return mock_optimizer itself for deepcopy. + mock_optimizer.__deepcopy__ = lambda _: mock_optimizer + return mock_optimizer + + def _assert_checkpoint(self, + label_dimension, + expected_global_step, + expected_bias=None): + shapes = { + name: shape + for (name, shape) in checkpoint_utils.list_variables(self._model_dir) + } + + self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP]) + self.assertEqual(expected_global_step, + checkpoint_utils.load_variable(self._model_dir, + ops.GraphKeys.GLOBAL_STEP)) + + self.assertEqual([label_dimension], shapes[BIAS_NAME]) + if expected_bias is not None: + self.assertEqual(expected_bias, + checkpoint_utils.load_variable(self._model_dir, + BIAS_NAME)) + + def testFromScratchWithDefaultOptimizer(self): + # Create BaselineRegressor. + label = 5. + age = 17 + baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir) + + # Train for a few steps, and validate final checkpoint. + num_steps = 10 + baseline_regressor.train( + input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) + self._assert_checkpoint(label_dimension=1, expected_global_step=num_steps) + + def testTrainWithOneDimLabel(self): + label_dimension = 1 + batch_size = 20 + est = _baseline_regressor_fn( + label_dimension=label_dimension, + model_dir=self._model_dir) + data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32) + self.assertEqual((batch_size,), data_rank_1.shape) + + train_input_fn = numpy_io.numpy_input_fn( + x={'age': data_rank_1}, + y=data_rank_1, + batch_size=batch_size, + num_epochs=None, + shuffle=True) + est.train(train_input_fn, steps=200) + self._assert_checkpoint(label_dimension=1, expected_global_step=200) + + def testTrainWithOneDimWeight(self): + label_dimension = 1 + batch_size = 20 + est = _baseline_regressor_fn( + label_dimension=label_dimension, + weight_column='w', + model_dir=self._model_dir) + + data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32) + self.assertEqual((batch_size,), data_rank_1.shape) + + train_input_fn = numpy_io.numpy_input_fn( + x={'age': data_rank_1, + 'w': data_rank_1}, + y=data_rank_1, + batch_size=batch_size, + num_epochs=None, + shuffle=True) + est.train(train_input_fn, steps=200) + self._assert_checkpoint(label_dimension=1, expected_global_step=200) + + def testFromScratch(self): + # Create BaselineRegressor. + label = 5. + age = 17 + # loss = (logits - label)^2 = (0 - 5.)^2 = 25. + mock_optimizer = self._mock_optimizer(expected_loss=25.) + baseline_regressor = _baseline_regressor_fn( + model_dir=self._model_dir, + optimizer=mock_optimizer) + self.assertEqual(0, mock_optimizer.minimize.call_count) + + # Train for a few steps, and validate optimizer and final checkpoint. + num_steps = 10 + baseline_regressor.train( + input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) + self.assertEqual(1, mock_optimizer.minimize.call_count) + self._assert_checkpoint( + label_dimension=1, + expected_global_step=num_steps, + expected_bias=[0.]) + + def testFromCheckpoint(self): + # Create initial checkpoint. + bias = 7.0 + initial_global_step = 100 + with ops.Graph().as_default(): + variables.Variable([bias], name=BIAS_NAME) + variables.Variable( + initial_global_step, + name=ops.GraphKeys.GLOBAL_STEP, + dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + # logits = bias = 6. + # loss = (logits - label)^2 = (7 - 5)^2 = 4 + mock_optimizer = self._mock_optimizer(expected_loss=4.) + baseline_regressor = _baseline_regressor_fn( + model_dir=self._model_dir, + optimizer=mock_optimizer) + self.assertEqual(0, mock_optimizer.minimize.call_count) + + # Train for a few steps, and validate optimizer and final checkpoint. + num_steps = 10 + baseline_regressor.train( + input_fn=lambda: ({'age': ((17,),)}, ((5.,),)), steps=num_steps) + self.assertEqual(1, mock_optimizer.minimize.call_count) + self._assert_checkpoint( + label_dimension=1, + expected_global_step=initial_global_step + num_steps, + expected_bias=[bias]) + + def testFromCheckpointMultiBatch(self): + # Create initial checkpoint. + bias = 5.0 + initial_global_step = 100 + with ops.Graph().as_default(): + variables.Variable([bias], name=BIAS_NAME) + variables.Variable( + initial_global_step, + name=ops.GraphKeys.GLOBAL_STEP, + dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + # logits = bias + # logits[0] = 5. + # logits[1] = 5. + # loss = sum(logits - label)^2 = (5 - 5)^2 + (5 - 3)^2 = 4 + mock_optimizer = self._mock_optimizer(expected_loss=4.) + baseline_regressor = _baseline_regressor_fn( + model_dir=self._model_dir, + optimizer=mock_optimizer) + self.assertEqual(0, mock_optimizer.minimize.call_count) + + # Train for a few steps, and validate optimizer and final checkpoint. + num_steps = 10 + baseline_regressor.train( + input_fn=lambda: ({'age': ((17,), (15,))}, ((5.,), (3.,))), + steps=num_steps) + self.assertEqual(1, mock_optimizer.minimize.call_count) + self._assert_checkpoint( + label_dimension=1, + expected_global_step=initial_global_step + num_steps, + expected_bias=bias) + + +# Tests for Baseline Classifier. + + +class BaselineClassifierTrainingTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + shutil.rmtree(self._model_dir) + + def _mock_optimizer(self, expected_loss=None): + expected_var_names = [ + '%s:0' % BIAS_NAME + ] + + def _minimize(loss, global_step): + trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + self.assertItemsEqual( + expected_var_names, + [var.name for var in trainable_vars]) + + # Verify loss. We can't check the value directly, so we add an assert op. + self.assertEquals(0, loss.shape.ndims) + if expected_loss is None: + return state_ops.assign_add(global_step, 1).op + assert_loss = assert_close( + math_ops.to_float(expected_loss, name='expected'), + loss, + name='assert_loss') + with ops.control_dependencies((assert_loss,)): + return state_ops.assign_add(global_step, 1).op + + mock_optimizer = test.mock.NonCallableMock( + spec=optimizer.Optimizer, + wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer')) + mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize) + + # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks. + # So, return mock_optimizer itself for deepcopy. + mock_optimizer.__deepcopy__ = lambda _: mock_optimizer + return mock_optimizer + + def _assert_checkpoint( + self, n_classes, expected_global_step, expected_bias=None): + logits_dimension = n_classes if n_classes > 2 else 1 + + shapes = { + name: shape for (name, shape) in + checkpoint_utils.list_variables(self._model_dir) + } + + self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP]) + self.assertEqual( + expected_global_step, + checkpoint_utils.load_variable( + self._model_dir, ops.GraphKeys.GLOBAL_STEP)) + + self.assertEqual([logits_dimension], shapes[BIAS_NAME]) + if expected_bias is not None: + self.assertAllEqual(expected_bias, + checkpoint_utils.load_variable( + self._model_dir, BIAS_NAME)) + + def _testFromScratchWithDefaultOptimizer(self, n_classes): + label = 0 + age = 17 + est = baseline.BaselineClassifier( + n_classes=n_classes, + model_dir=self._model_dir) + + # Train for a few steps, and validate final checkpoint. + num_steps = 10 + est.train( + input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) + self._assert_checkpoint(n_classes, num_steps) + + def testBinaryClassesFromScratchWithDefaultOptimizer(self): + self._testFromScratchWithDefaultOptimizer(n_classes=2) + + def testMultiClassesFromScratchWithDefaultOptimizer(self): + self._testFromScratchWithDefaultOptimizer(n_classes=4) + + def _testTrainWithTwoDimsLabel(self, n_classes): + batch_size = 20 + + est = baseline.BaselineClassifier( + n_classes=n_classes, + model_dir=self._model_dir) + data_rank_1 = np.array([0, 1]) + data_rank_2 = np.array([[0], [1]]) + self.assertEqual((2,), data_rank_1.shape) + self.assertEqual((2, 1), data_rank_2.shape) + + train_input_fn = numpy_io.numpy_input_fn( + x={'age': data_rank_1}, + y=data_rank_2, + batch_size=batch_size, + num_epochs=None, + shuffle=True) + est.train(train_input_fn, steps=200) + self._assert_checkpoint(n_classes, 200) + + def testBinaryClassesTrainWithTwoDimsLabel(self): + self._testTrainWithTwoDimsLabel(n_classes=2) + + def testMultiClassesTrainWithTwoDimsLabel(self): + self._testTrainWithTwoDimsLabel(n_classes=4) + + def _testTrainWithOneDimLabel(self, n_classes): + batch_size = 20 + + est = baseline.BaselineClassifier( + n_classes=n_classes, + model_dir=self._model_dir) + data_rank_1 = np.array([0, 1]) + self.assertEqual((2,), data_rank_1.shape) + + train_input_fn = numpy_io.numpy_input_fn( + x={'age': data_rank_1}, + y=data_rank_1, + batch_size=batch_size, + num_epochs=None, + shuffle=True) + est.train(train_input_fn, steps=200) + self._assert_checkpoint(n_classes, 200) + + def testBinaryClassesTrainWithOneDimLabel(self): + self._testTrainWithOneDimLabel(n_classes=2) + + def testMultiClassesTrainWithOneDimLabel(self): + self._testTrainWithOneDimLabel(n_classes=4) + + def _testTrainWithTwoDimsWeight(self, n_classes): + batch_size = 20 + + est = baseline.BaselineClassifier( + weight_column='w', + n_classes=n_classes, + model_dir=self._model_dir) + data_rank_1 = np.array([0, 1]) + data_rank_2 = np.array([[0], [1]]) + self.assertEqual((2,), data_rank_1.shape) + self.assertEqual((2, 1), data_rank_2.shape) + + train_input_fn = numpy_io.numpy_input_fn( + x={'age': data_rank_1, 'w': data_rank_2}, y=data_rank_1, + batch_size=batch_size, num_epochs=None, + shuffle=True) + est.train(train_input_fn, steps=200) + self._assert_checkpoint(n_classes, 200) + + def testBinaryClassesTrainWithTwoDimsWeight(self): + self._testTrainWithTwoDimsWeight(n_classes=2) + + def testMultiClassesTrainWithTwoDimsWeight(self): + self._testTrainWithTwoDimsWeight(n_classes=4) + + def _testTrainWithOneDimWeight(self, n_classes): + batch_size = 20 + + est = baseline.BaselineClassifier( + weight_column='w', + n_classes=n_classes, + model_dir=self._model_dir) + data_rank_1 = np.array([0, 1]) + self.assertEqual((2,), data_rank_1.shape) + + train_input_fn = numpy_io.numpy_input_fn( + x={'age': data_rank_1, 'w': data_rank_1}, y=data_rank_1, + batch_size=batch_size, num_epochs=None, + shuffle=True) + est.train(train_input_fn, steps=200) + self._assert_checkpoint(n_classes, 200) + + def testBinaryClassesTrainWithOneDimWeight(self): + self._testTrainWithOneDimWeight(n_classes=2) + + def testMultiClassesTrainWithOneDimWeight(self): + self._testTrainWithOneDimWeight(n_classes=4) + + def _testFromScratch(self, n_classes): + label = 1 + age = 17 + # For binary classifier: + # loss = sigmoid_cross_entropy(logits, label) where logits=0 (weights are + # all zero initially) and label = 1 so, + # loss = 1 * -log ( sigmoid(logits) ) = 0.69315 + # For multi class classifier: + # loss = cross_entropy(logits, label) where logits are all 0s (weights are + # all zero initially) and label = 1 so, + # loss = 1 * -log ( 1.0 / n_classes ) + # For this particular test case, as logits are same, the formula + # 1 * -log ( 1.0 / n_classes ) covers both binary and multi class cases. + mock_optimizer = self._mock_optimizer( + expected_loss=-1 * math.log(1.0/n_classes)) + + est = baseline.BaselineClassifier( + n_classes=n_classes, + optimizer=mock_optimizer, + model_dir=self._model_dir) + self.assertEqual(0, mock_optimizer.minimize.call_count) + + # Train for a few steps, and validate optimizer and final checkpoint. + num_steps = 10 + est.train( + input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) + self.assertEqual(1, mock_optimizer.minimize.call_count) + self._assert_checkpoint( + n_classes, + expected_global_step=num_steps, + expected_bias=[0.] if n_classes == 2 else [.0] * n_classes) + + def testBinaryClassesFromScratch(self): + self._testFromScratch(n_classes=2) + + def testMultiClassesFromScratch(self): + self._testFromScratch(n_classes=4) + + def _testFromCheckpoint(self, n_classes): + # Create initial checkpoint. + label = 1 + age = 17 + bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes + initial_global_step = 100 + with ops.Graph().as_default(): + variables.Variable(bias, name=BIAS_NAME) + variables.Variable( + initial_global_step, name=ops.GraphKeys.GLOBAL_STEP, + dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + # For binary classifier: + # logits = bias = -1. + # loss = sigmoid_cross_entropy(logits, label) + # so, loss = 1 * -log ( sigmoid(-1) ) = 1.3133 + # For multi class classifier: + # loss = cross_entropy(logits, label) + # where logits = bias and label = 1 + # so, loss = 1 * -log ( softmax(logits)[1] ) + if n_classes == 2: + expected_loss = 1.3133 + else: + logits = bias + logits_exp = np.exp(logits) + softmax = logits_exp / logits_exp.sum() + expected_loss = -1 * math.log(softmax[label]) + + mock_optimizer = self._mock_optimizer(expected_loss=expected_loss) + + est = baseline.BaselineClassifier( + n_classes=n_classes, + optimizer=mock_optimizer, + model_dir=self._model_dir) + self.assertEqual(0, mock_optimizer.minimize.call_count) + + # Train for a few steps, and validate optimizer and final checkpoint. + num_steps = 10 + est.train( + input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) + self.assertEqual(1, mock_optimizer.minimize.call_count) + self._assert_checkpoint( + n_classes, + expected_global_step=initial_global_step + num_steps, + expected_bias=bias) + + def testBinaryClassesFromCheckpoint(self): + self._testFromCheckpoint(n_classes=2) + + def testMultiClassesFromCheckpoint(self): + self._testFromCheckpoint(n_classes=4) + + def _testFromCheckpointFloatLabels(self, n_classes): + """Tests float labels for binary classification.""" + # Create initial checkpoint. + if n_classes > 2: + return + label = 0.8 + age = 17 + bias = [-1.0] + initial_global_step = 100 + with ops.Graph().as_default(): + variables.Variable(bias, name=BIAS_NAME) + variables.Variable( + initial_global_step, name=ops.GraphKeys.GLOBAL_STEP, + dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + # logits = bias = -1. + # loss = sigmoid_cross_entropy(logits, label) + # => loss = -0.8 * log(sigmoid(-1)) -0.2 * log(sigmoid(+1)) = 1.1132617 + mock_optimizer = self._mock_optimizer(expected_loss=1.1132617) + + est = baseline.BaselineClassifier( + n_classes=n_classes, + optimizer=mock_optimizer, + model_dir=self._model_dir) + self.assertEqual(0, mock_optimizer.minimize.call_count) + + # Train for a few steps, and validate optimizer and final checkpoint. + num_steps = 10 + est.train( + input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) + self.assertEqual(1, mock_optimizer.minimize.call_count) + + def testBinaryClassesFromCheckpointFloatLabels(self): + self._testFromCheckpointFloatLabels(n_classes=2) + + def testMultiClassesFromCheckpointFloatLabels(self): + self._testFromCheckpointFloatLabels(n_classes=4) + + def _testFromCheckpointMultiBatch(self, n_classes): + # Create initial checkpoint. + label = [1, 0] + age = [17, 18.5] + # For binary case, the expected weight has shape (1,1). For multi class + # case, the shape is (1, n_classes). In order to test the weights, set + # weights as 2.0 * range(n_classes). + bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes + initial_global_step = 100 + with ops.Graph().as_default(): + variables.Variable(bias, name=BIAS_NAME) + variables.Variable( + initial_global_step, name=ops.GraphKeys.GLOBAL_STEP, + dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + # For binary classifier: + # logits = bias + # logits[0] = -1. + # logits[1] = -1. + # loss = sigmoid_cross_entropy(logits, label) + # so, loss[0] = 1 * -log ( sigmoid(-1) ) = 1.3133 + # loss[1] = (1 - 0) * -log ( 1- sigmoid(-1) ) = 0.3132 + # For multi class classifier: + # loss = cross_entropy(logits, label) + # where logits = bias and label = [1, 0] + # so, loss = 1 * -log ( softmax(logits)[label] ) + if n_classes == 2: + expected_loss = (1.3133 + 0.3132) + else: + # Expand logits since batch_size=2 + logits = bias * np.ones(shape=(2, 1)) + logits_exp = np.exp(logits) + softmax_row_0 = logits_exp[0] / logits_exp[0].sum() + softmax_row_1 = logits_exp[1] / logits_exp[1].sum() + expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) + expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) + expected_loss = expected_loss_0 + expected_loss_1 + + mock_optimizer = self._mock_optimizer(expected_loss=expected_loss) + + est = baseline.BaselineClassifier( + n_classes=n_classes, + optimizer=mock_optimizer, + model_dir=self._model_dir) + self.assertEqual(0, mock_optimizer.minimize.call_count) + + # Train for a few steps, and validate optimizer and final checkpoint. + num_steps = 10 + est.train( + input_fn=lambda: ({'age': (age)}, (label)), + steps=num_steps) + self.assertEqual(1, mock_optimizer.minimize.call_count) + self._assert_checkpoint( + n_classes, + expected_global_step=initial_global_step + num_steps, + expected_bias=bias) + + def testBinaryClassesFromCheckpointMultiBatch(self): + self._testFromCheckpointMultiBatch(n_classes=2) + + def testMultiClassesFromCheckpointMultiBatch(self): + self._testFromCheckpointMultiBatch(n_classes=4) + + +class BaselineClassifierEvaluationTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + shutil.rmtree(self._model_dir) + + def _test_evaluation_for_simple_data(self, n_classes): + label = 1 + age = 1. + + bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes + + with ops.Graph().as_default(): + variables.Variable(bias, name=BIAS_NAME) + variables.Variable( + 100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + est = _baseline_classifier_fn( + n_classes=n_classes, + model_dir=self._model_dir) + eval_metrics = est.evaluate( + input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=1) + + if n_classes == 2: + # Binary classes: loss = -log(sigmoid(-1)) = 1.3133 + # Prediction = sigmoid(-1) = 0.2689 + expected_metrics = { + metric_keys.MetricKeys.LOSS: 1.3133, + ops.GraphKeys.GLOBAL_STEP: 100, + metric_keys.MetricKeys.LOSS_MEAN: 1.3133, + metric_keys.MetricKeys.ACCURACY: 0., + metric_keys.MetricKeys.PREDICTION_MEAN: 0.2689, + metric_keys.MetricKeys.LABEL_MEAN: 1., + metric_keys.MetricKeys.ACCURACY_BASELINE: 1, + metric_keys.MetricKeys.AUC: 0., + metric_keys.MetricKeys.AUC_PR: 1., + } + else: + # Multi classes: loss = 1 * -log ( softmax(logits)[label] ) + logits = bias + logits_exp = np.exp(logits) + softmax = logits_exp / logits_exp.sum() + expected_loss = -1 * math.log(softmax[label]) + + expected_metrics = { + metric_keys.MetricKeys.LOSS: expected_loss, + ops.GraphKeys.GLOBAL_STEP: 100, + metric_keys.MetricKeys.LOSS_MEAN: expected_loss, + metric_keys.MetricKeys.ACCURACY: 0., + } + + self.assertAllClose(sorted_key_dict(expected_metrics), + sorted_key_dict(eval_metrics), rtol=1e-3) + + def test_binary_classes_evaluation_for_simple_data(self): + self._test_evaluation_for_simple_data(n_classes=2) + + def test_multi_classes_evaluation_for_simple_data(self): + self._test_evaluation_for_simple_data(n_classes=4) + + def _test_evaluation_batch(self, n_classes): + """Tests evaluation for batch_size==2.""" + label = [1, 0] + age = [17., 18.] + bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes + initial_global_step = 100 + with ops.Graph().as_default(): + variables.Variable(bias, name=BIAS_NAME) + variables.Variable( + initial_global_step, name=ops.GraphKeys.GLOBAL_STEP, + dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + est = _baseline_classifier_fn( + n_classes=n_classes, + model_dir=self._model_dir) + eval_metrics = est.evaluate( + input_fn=lambda: ({'age': (age)}, (label)), steps=1) + + if n_classes == 2: + # Logits are (-1., -1.) labels are (1, 0). + # Loss is + # loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133 + # loss for row 2: (1 - 0) * -log(1 - sigmoid(-1)) = 0.3132 + # Prediction = sigmoid(-1) = 0.2689 + expected_loss = 1.3133 + 0.3132 + + expected_metrics = { + metric_keys.MetricKeys.LOSS: expected_loss, + ops.GraphKeys.GLOBAL_STEP: 100, + metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2, + metric_keys.MetricKeys.ACCURACY: 0.5, + metric_keys.MetricKeys.PREDICTION_MEAN: 0.2689, + metric_keys.MetricKeys.LABEL_MEAN: 0.5, + metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5, + metric_keys.MetricKeys.AUC: 0.5, + metric_keys.MetricKeys.AUC_PR: 0.75, + } + else: + # Expand logits since batch_size=2 + logits = bias * np.ones(shape=(2, 1)) + logits_exp = np.exp(logits) + softmax_row_0 = logits_exp[0] / logits_exp[0].sum() + softmax_row_1 = logits_exp[1] / logits_exp[1].sum() + expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) + expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) + expected_loss = expected_loss_0 + expected_loss_1 + + expected_metrics = { + metric_keys.MetricKeys.LOSS: expected_loss, + ops.GraphKeys.GLOBAL_STEP: 100, + metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2, + metric_keys.MetricKeys.ACCURACY: 0.5, + } + + self.assertAllClose(sorted_key_dict(expected_metrics), + sorted_key_dict(eval_metrics), rtol=1e-3) + + def test_binary_classes_evaluation_batch(self): + self._test_evaluation_batch(n_classes=2) + + def test_multi_classes_evaluation_batch(self): + self._test_evaluation_batch(n_classes=4) + + def _test_evaluation_weights(self, n_classes): + """Tests evaluation with weights.""" + + label = [1, 0] + age = [17., 18.] + weights = [1., 2.] + # For binary case, the expected weight has shape (1,1). For multi class + # case, the shape is (1, n_classes). In order to test the weights, set + # weights as 2.0 * range(n_classes). + bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes + initial_global_step = 100 + with ops.Graph().as_default(): + variables.Variable(bias, name=BIAS_NAME) + variables.Variable( + initial_global_step, name=ops.GraphKeys.GLOBAL_STEP, + dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + est = _baseline_classifier_fn( + n_classes=n_classes, + weight_column='w', + model_dir=self._model_dir) + eval_metrics = est.evaluate( + input_fn=lambda: ({'age': (age), 'w': (weights)}, (label)), steps=1) + + if n_classes == 2: + # Logits are (-1., -1.) labels are (1, 0). + # Loss is + # loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133 + # loss for row 2: (1 - 0) * -log(1 - sigmoid(-1)) = 0.3132 + # weights = [1., 2.] + expected_loss = 1.3133 * 1. + 0.3132 * 2. + loss_mean = expected_loss / (1.0 + 2.0) + label_mean = np.average(label, weights=weights) + logits = [-1, -1] + logistics = sigmoid(np.array(logits)) + predictions_mean = np.average(logistics, weights=weights) + + expected_metrics = { + metric_keys.MetricKeys.LOSS: expected_loss, + ops.GraphKeys.GLOBAL_STEP: 100, + metric_keys.MetricKeys.LOSS_MEAN: loss_mean, + metric_keys.MetricKeys.ACCURACY: 2. / (1. + 2.), + metric_keys.MetricKeys.PREDICTION_MEAN: predictions_mean, + metric_keys.MetricKeys.LABEL_MEAN: label_mean, + metric_keys.MetricKeys.ACCURACY_BASELINE: ( + max(label_mean, 1-label_mean)), + metric_keys.MetricKeys.AUC: 0.5, + metric_keys.MetricKeys.AUC_PR: 2. / (1. + 2.), + } + else: + # Multi classes: unweighted_loss = 1 * -log ( soft_max(logits)[label] ) + # Expand logits since batch_size=2 + logits = bias * np.ones(shape=(2, 1)) + logits_exp = np.exp(logits) + softmax_row_0 = logits_exp[0] / logits_exp[0].sum() + softmax_row_1 = logits_exp[1] / logits_exp[1].sum() + expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) + expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) + loss_mean = np.average([expected_loss_0, expected_loss_1], + weights=weights) + expected_loss = loss_mean * np.sum(weights) + + expected_metrics = { + metric_keys.MetricKeys.LOSS: expected_loss, + ops.GraphKeys.GLOBAL_STEP: 100, + metric_keys.MetricKeys.LOSS_MEAN: loss_mean, + metric_keys.MetricKeys.ACCURACY: 2. / (1. + 2.), + } + + self.assertAllClose(sorted_key_dict(expected_metrics), + sorted_key_dict(eval_metrics), rtol=1e-3) + + def test_binary_classes_evaluation_weights(self): + self._test_evaluation_weights(n_classes=2) + + def test_multi_classes_evaluation_weights(self): + self._test_evaluation_weights(n_classes=4) + + +class BaselineClassifierPredictTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + shutil.rmtree(self._model_dir) + + def _testPredictions(self, n_classes, label_vocabulary, label_output_fn): + """Tests predict when all variables are one-dimensional.""" + age = 1. + + bias = [10.0] if n_classes == 2 else [10.0] * n_classes + + with ops.Graph().as_default(): + variables.Variable(bias, name=BIAS_NAME) + variables.Variable(100, name='global_step', dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + est = _baseline_classifier_fn( + label_vocabulary=label_vocabulary, + n_classes=n_classes, + model_dir=self._model_dir) + + predict_input_fn = numpy_io.numpy_input_fn( + x={'age': np.array([[age]])}, + y=None, + batch_size=1, + num_epochs=1, + shuffle=False) + predictions = list(est.predict(input_fn=predict_input_fn)) + + if n_classes == 2: + scalar_logits = bias[0] + two_classes_logits = [0, scalar_logits] + two_classes_logits_exp = np.exp(two_classes_logits) + softmax = two_classes_logits_exp / two_classes_logits_exp.sum() + + expected_predictions = { + 'class_ids': [1], + 'classes': [label_output_fn(1)], + 'logistic': [sigmoid(np.array(scalar_logits))], + 'logits': [scalar_logits], + 'probabilities': softmax, + } + else: + onedim_logits = np.array(bias) + class_ids = onedim_logits.argmax() + logits_exp = np.exp(onedim_logits) + softmax = logits_exp / logits_exp.sum() + expected_predictions = { + 'class_ids': [class_ids], + 'classes': [label_output_fn(class_ids)], + 'logits': onedim_logits, + 'probabilities': softmax, + } + + self.assertEqual(1, len(predictions)) + # assertAllClose cannot handle byte type. + self.assertEqual(expected_predictions['classes'], predictions[0]['classes']) + expected_predictions.pop('classes') + predictions[0].pop('classes') + self.assertAllClose(sorted_key_dict(expected_predictions), + sorted_key_dict(predictions[0])) + + def testBinaryClassesWithoutLabelVocabulary(self): + n_classes = 2 + self._testPredictions(n_classes, + label_vocabulary=None, + label_output_fn=lambda x: ('%s' % x).encode()) + + def testBinaryClassesWithLabelVocabulary(self): + n_classes = 2 + self._testPredictions( + n_classes, + label_vocabulary=['class_vocab_{}'.format(i) + for i in range(n_classes)], + label_output_fn=lambda x: ('class_vocab_%s' % x).encode()) + + def testMultiClassesWithoutLabelVocabulary(self): + n_classes = 4 + self._testPredictions( + n_classes, + label_vocabulary=None, + label_output_fn=lambda x: ('%s' % x).encode()) + + def testMultiClassesWithLabelVocabulary(self): + n_classes = 4 + self._testPredictions( + n_classes, + label_vocabulary=['class_vocab_{}'.format(i) + for i in range(n_classes)], + label_output_fn=lambda x: ('class_vocab_%s' % x).encode()) + + +class BaselineClassifierIntegrationTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + shutil.rmtree(self._model_dir) + + def _test_complete_flow(self, n_classes, train_input_fn, eval_input_fn, + predict_input_fn, input_dimension, prediction_length): + feature_columns = [ + feature_column_lib.numeric_column('x', shape=(input_dimension,)) + ] + est = _baseline_classifier_fn( + n_classes=n_classes, + model_dir=self._model_dir) + + # TRAIN + # learn y = x + est.train(train_input_fn, steps=200) + + # EVALUTE + scores = est.evaluate(eval_input_fn) + self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP]) + self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) + + # PREDICT + predictions = np.array( + [x['classes'] for x in est.predict(predict_input_fn)]) + self.assertAllEqual((prediction_length, 1), predictions.shape) + + # EXPORT + feature_spec = feature_column_lib.make_parse_example_spec(feature_columns) + serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( + feature_spec) + export_dir = est.export_savedmodel(tempfile.mkdtemp(), + serving_input_receiver_fn) + self.assertTrue(gfile.Exists(export_dir)) + + def _test_numpy_input_fn(self, n_classes): + """Tests complete flow with numpy_input_fn.""" + input_dimension = 4 + batch_size = 10 + prediction_length = batch_size + data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32) + data = data.reshape(batch_size, input_dimension) + target = np.array([1] * batch_size) + + train_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, + y=target, + batch_size=batch_size, + num_epochs=None, + shuffle=True) + eval_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, + y=target, + batch_size=batch_size, + num_epochs=1, + shuffle=False) + predict_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, + y=None, + batch_size=batch_size, + num_epochs=1, + shuffle=False) + + self._test_complete_flow( + n_classes=n_classes, + train_input_fn=train_input_fn, + eval_input_fn=eval_input_fn, + predict_input_fn=predict_input_fn, + input_dimension=input_dimension, + prediction_length=prediction_length) + + def test_binary_classes_numpy_input_fn(self): + self._test_numpy_input_fn(n_classes=2) + + def test_multi_classes_numpy_input_fn(self): + self._test_numpy_input_fn(n_classes=4) + + def _test_pandas_input_fn(self, n_classes): + """Tests complete flow with pandas_input_fn.""" + if not HAS_PANDAS: + return + + # Pandas DataFrame natually supports 1 dim data only. + input_dimension = 1 + batch_size = 10 + data = np.array([1., 2., 3., 4.], dtype=np.float32) + target = np.array([1, 0, 1, 0], dtype=np.int32) + x = pd.DataFrame({'x': data}) + y = pd.Series(target) + prediction_length = 4 + + train_input_fn = pandas_io.pandas_input_fn( + x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) + eval_input_fn = pandas_io.pandas_input_fn( + x=x, y=y, batch_size=batch_size, shuffle=False) + predict_input_fn = pandas_io.pandas_input_fn( + x=x, batch_size=batch_size, shuffle=False) + + self._test_complete_flow( + n_classes=n_classes, + train_input_fn=train_input_fn, + eval_input_fn=eval_input_fn, + predict_input_fn=predict_input_fn, + input_dimension=input_dimension, + prediction_length=prediction_length) + + def test_binary_classes_pandas_input_fn(self): + self._test_pandas_input_fn(n_classes=2) + + def test_multi_classes_pandas_input_fn(self): + self._test_pandas_input_fn(n_classes=4) + + def _test_input_fn_from_parse_example(self, n_classes): + """Tests complete flow with input_fn constructed from parse_example.""" + input_dimension = 2 + batch_size = 10 + prediction_length = batch_size + data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32) + data = data.reshape(batch_size, input_dimension) + target = np.array([1] * batch_size, dtype=np.int64) + + serialized_examples = [] + for x, y in zip(data, target): + example = example_pb2.Example(features=feature_pb2.Features( + feature={ + 'x': + feature_pb2.Feature(float_list=feature_pb2.FloatList( + value=x)), + 'y': + feature_pb2.Feature(int64_list=feature_pb2.Int64List( + value=[y])), + })) + serialized_examples.append(example.SerializeToString()) + + feature_spec = { + 'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32), + 'y': parsing_ops.FixedLenFeature([1], dtypes.int64), + } + + def _train_input_fn(): + feature_map = parsing_ops.parse_example(serialized_examples, feature_spec) + features = queue_parsed_features(feature_map) + labels = features.pop('y') + return features, labels + + def _eval_input_fn(): + feature_map = parsing_ops.parse_example( + input_lib.limit_epochs(serialized_examples, num_epochs=1), + feature_spec) + features = queue_parsed_features(feature_map) + labels = features.pop('y') + return features, labels + + def _predict_input_fn(): + feature_map = parsing_ops.parse_example( + input_lib.limit_epochs(serialized_examples, num_epochs=1), + feature_spec) + features = queue_parsed_features(feature_map) + features.pop('y') + return features, None + + self._test_complete_flow( + n_classes=n_classes, + train_input_fn=_train_input_fn, + eval_input_fn=_eval_input_fn, + predict_input_fn=_predict_input_fn, + input_dimension=input_dimension, + prediction_length=prediction_length) + + def test_binary_classes_input_fn_from_parse_example(self): + self._test_input_fn_from_parse_example(n_classes=2) + + def test_multi_classes_input_fn_from_parse_example(self): + self._test_input_fn_from_parse_example(n_classes=4) + + +# Tests for Baseline logit_fn. + + +class BaselineLogitFnTest(test.TestCase): + + def test_basic_logit_correctness(self): + """baseline_logit_fn simply returns the bias variable.""" + with ops.Graph().as_default(): + logit_fn = baseline._baseline_logit_fn_builder(num_outputs=2) + logits = logit_fn(features={'age': [[23.], [31.]]}) + with variable_scope.variable_scope('baseline', reuse=True): + bias_var = variable_scope.get_variable('bias') + with tf_session.Session() as sess: + sess.run([variables.global_variables_initializer()]) + self.assertAllClose([[0., 0.], [0., 0.]], logits.eval()) + sess.run(bias_var.assign([10., 5.])) + self.assertAllClose([[10., 5.], [10., 5.]], logits.eval()) + + +if __name__ == '__main__': + test.main() + diff --git a/tensorflow/python/estimator/estimator_lib.py b/tensorflow/python/estimator/estimator_lib.py index 5b82fd75ff..bed2b67419 100644 --- a/tensorflow/python/estimator/estimator_lib.py +++ b/tensorflow/python/estimator/estimator_lib.py @@ -19,6 +19,8 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import,line-too-long,wildcard-import +from tensorflow.python.estimator.canned.baseline import BaselineClassifier +from tensorflow.python.estimator.canned.baseline import BaselineRegressor from tensorflow.python.estimator.canned.dnn import DNNClassifier from tensorflow.python.estimator.canned.dnn import DNNRegressor from tensorflow.python.estimator.canned.dnn_linear_combined import DNNLinearCombinedClassifier @@ -46,6 +48,8 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ # Canned Estimators + 'BaselineClassifier', + 'BaselineRegressor', 'DNNClassifier', 'DNNRegressor', 'DNNLinearCombinedClassifier', diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-classifier.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-classifier.pbtxt new file mode 100644 index 0000000000..f5ed263f0e --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-classifier.pbtxt @@ -0,0 +1,54 @@ +path: "tensorflow.estimator.BaselineClassifier" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "config" + mtype: "" + } + member { + name: "model_dir" + mtype: "" + } + member { + name: "model_fn" + mtype: "" + } + member { + name: "params" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'model_dir\', \'n_classes\', \'weight_column\', \'label_vocabulary\', \'optimizer\', \'config\'], varargs=None, keywords=None, defaults=[\'None\', \'2\', \'None\', \'None\', \'Ftrl\', \'None\'], " + } + member_method { + name: "evaluate" + argspec: "args=[\'self\', \'input_fn\', \'steps\', \'hooks\', \'checkpoint_path\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " + } + member_method { + name: "export_savedmodel" + argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], " + } + member_method { + name: "get_variable_names" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_variable_value" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "latest_checkpoint" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "predict" + argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "train" + argspec: "args=[\'self\', \'input_fn\', \'hooks\', \'steps\', \'max_steps\', \'saving_listeners\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-regressor.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-regressor.pbtxt new file mode 100644 index 0000000000..61a29942c5 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-regressor.pbtxt @@ -0,0 +1,54 @@ +path: "tensorflow.estimator.BaselineRegressor" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "config" + mtype: "" + } + member { + name: "model_dir" + mtype: "" + } + member { + name: "model_fn" + mtype: "" + } + member { + name: "params" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'model_dir\', \'label_dimension\', \'weight_column\', \'optimizer\', \'config\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'None\', \'Ftrl\', \'None\'], " + } + member_method { + name: "evaluate" + argspec: "args=[\'self\', \'input_fn\', \'steps\', \'hooks\', \'checkpoint_path\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " + } + member_method { + name: "export_savedmodel" + argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], " + } + member_method { + name: "get_variable_names" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_variable_value" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "latest_checkpoint" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "predict" + argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "train" + argspec: "args=[\'self\', \'input_fn\', \'hooks\', \'steps\', \'max_steps\', \'saving_listeners\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt index ef93a61bd8..cdc367b99e 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt @@ -1,5 +1,13 @@ path: "tensorflow.estimator" tf_module { + member { + name: "BaselineClassifier" + mtype: "" + } + member { + name: "BaselineRegressor" + mtype: "" + } member { name: "DNNClassifier" mtype: "" -- GitLab From 7db94de969662cfc83b7152d57b23d6c57da0784 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 16:36:22 -0800 Subject: [PATCH 0075/1801] Update ops-related pbtxt files. PiperOrigin-RevId: 175085154 --- .../core/ops/compat/ops_history.v1.pbtxt | 46 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 1 - 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index a4b5ca16af..60f67543f1 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -21670,6 +21670,52 @@ op { } is_stateful: true } +op { + name: "Print" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "data" + type_list_attr: "U" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + } + attr { + name: "U" + type: "list(type)" + has_minimum: true + } + attr { + name: "message" + type: "string" + default_value { + s: "" + } + } + attr { + name: "first_n" + type: "int" + default_value { + i: -1 + } + } + attr { + name: "summarize" + type: "int" + default_value { + i: 3 + } + } + is_stateful: true +} op { name: "PriorityQueue" output_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 8353b45e22..2a74c20707 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -16977,7 +16977,6 @@ op { name: "U" type: "list(type)" has_minimum: true - minimum: 1 } attr { name: "message" -- GitLab From ecb3557621229deaebec209629d154c37da7f9d3 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Wed, 8 Nov 2017 16:44:37 -0800 Subject: [PATCH 0076/1801] Make assert_equal/_none_equal/_less ops work in eager mode Also, fix documentation of eager mode execute() method and make tf_should_use work with empty list returned by execute() RELNOTES: tf.assert_equal no longer raises ValueError. It now raises InvalidArgumentError, as documented. PiperOrigin-RevId: 175086223 --- tensorflow/python/eager/execute.py | 3 +- tensorflow/python/kernel_tests/BUILD | 1 + .../python/kernel_tests/check_ops_test.py | 311 +++++++++++------- tensorflow/python/ops/check_ops.py | 79 ++++- tensorflow/python/ops/control_flow_ops.py | 41 ++- tensorflow/python/util/tf_should_use.py | 2 +- 6 files changed, 316 insertions(+), 121 deletions(-) diff --git a/tensorflow/python/eager/execute.py b/tensorflow/python/eager/execute.py index 983c1ea73e..c6457232e9 100644 --- a/tensorflow/python/eager/execute.py +++ b/tensorflow/python/eager/execute.py @@ -47,8 +47,7 @@ def execute(op_name, num_outputs, inputs, attrs, ctx, name=None): name: Customized name for the operation. Returns: - None if there are no outputs, a single Tensor object if there is one output - and a list of Tensor objects if there are multiple outputs. + List of output Tensor objects. The list is empty if there are no outputs Raises: An exception on error. diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 7fa504e85e..8d6f863a4c 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -1186,6 +1186,7 @@ cuda_py_test( srcs = ["check_ops_test.py"], additional_deps = [ "//third_party/py/numpy", + "//tensorflow/python/eager:context", "//tensorflow/python:array_ops", "//tensorflow/python:check_ops", "//tensorflow/python:client_testlib", diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py index ed859e3774..43785adcee 100644 --- a/tensorflow/python/kernel_tests/check_ops_test.py +++ b/tensorflow/python/kernel_tests/check_ops_test.py @@ -20,10 +20,13 @@ from __future__ import print_function import numpy as np +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.platform import test @@ -71,110 +74,178 @@ class AssertProperIterableTest(test.TestCase): class AssertEqualTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_equal(self): - with self.test_session(): + small = constant_op.constant([1, 2], name="small") + with ops.control_dependencies([check_ops.assert_equal(small, small)]): + out = array_ops.identity(small) + self.evaluate(out) + + def test_returns_none_with_eager(self): + with context.eager_mode(): small = constant_op.constant([1, 2], name="small") - with ops.control_dependencies([check_ops.assert_equal(small, small)]): - out = array_ops.identity(small) - out.eval() + x = check_ops.assert_equal(small, small) + assert x is None + @test_util.run_in_graph_and_eager_modes() def test_raises_when_greater(self): - with self.test_session(): - # Static check - static_small = constant_op.constant([1, 2], name="small") - static_big = constant_op.constant([3, 4], name="big") - with self.assertRaisesRegexp(ValueError, "fail"): - check_ops.assert_equal(static_big, static_small, message="fail") - # Dynamic check - small = array_ops.placeholder(dtypes.int32, name="small") - big = array_ops.placeholder(dtypes.int32, name="big") - with ops.control_dependencies( - [check_ops.assert_equal( - big, small, message="fail")]): - out = array_ops.identity(small) - with self.assertRaisesOpError("fail.*big.*small"): - out.eval(feed_dict={small: [1, 2], big: [3, 4]}) - + # Static check + static_small = constant_op.constant([1, 2], name="small") + static_big = constant_op.constant([3, 4], name="big") + with self.assertRaisesRegexp(errors.InvalidArgumentError, "fail"): + check_ops.assert_equal(static_big, static_small, message="fail") + + # Dynamic check + if context.in_graph_mode(): + with self.test_session(): + small = array_ops.placeholder(dtypes.int32, name="small") + big = array_ops.placeholder(dtypes.int32, name="big") + with ops.control_dependencies( + [check_ops.assert_equal( + big, small, message="fail")]): + out = array_ops.identity(small) + with self.assertRaisesOpError("fail.*big.*small"): + out.eval(feed_dict={small: [1, 2], big: [3, 4]}) + + def test_error_message_eager(self): + expected_error_msg_full = r"""big does not equal small +Condition x == y did not hold. +Indices of first 6 different values: +\[\[0 0\] + \[1 1\] + \[2 0\]\] +Corresponding x values: +\[2 3 6\] +Corresponding y values: +\[20 30 60\] +First 6 elements of x: +\[2 2 3 3 6 6\] +First 6 elements of y: +\[20 2 3 30 60 6\] +""" + expected_error_msg_short = r"""big does not equal small +Condition x == y did not hold. +Indices of first 2 different values: +\[\[0 0\] + \[1 1\]\] +Corresponding x values: +\[2 3\] +Corresponding y values: +\[20 30\] +First 2 elements of x: +\[2 2\] +First 2 elements of y: +\[20 2\] +""" + with context.eager_mode(): + big = constant_op.constant([[2, 2], [3, 3], [6, 6]]) + small = constant_op.constant([[20, 2], [3, 30], [60, 6]]) + with self.assertRaisesRegexp(errors.InvalidArgumentError, + expected_error_msg_full): + check_ops.assert_equal(big, small, message="big does not equal small", + summarize=10) + with self.assertRaisesRegexp(errors.InvalidArgumentError, + expected_error_msg_short): + check_ops.assert_equal(big, small, message="big does not equal small", + summarize=2) + + @test_util.run_in_graph_and_eager_modes() def test_raises_when_less(self): - with self.test_session(): - # Static check - static_small = constant_op.constant([3, 1], name="small") - static_big = constant_op.constant([4, 2], name="big") - with self.assertRaisesRegexp(ValueError, "fail"): - check_ops.assert_equal(static_big, static_small, message="fail") - # Dynamic check - small = array_ops.placeholder(dtypes.int32, name="small") - big = array_ops.placeholder(dtypes.int32, name="big") - with ops.control_dependencies([check_ops.assert_equal(small, big)]): - out = array_ops.identity(small) - with self.assertRaisesOpError("small.*big"): - out.eval(feed_dict={small: [3, 1], big: [4, 2]}) + # Static check + static_small = constant_op.constant([3, 1], name="small") + static_big = constant_op.constant([4, 2], name="big") + with self.assertRaisesRegexp(errors.InvalidArgumentError, "fail"): + check_ops.assert_equal(static_big, static_small, message="fail") + + # Dynamic check + if context.in_graph_mode(): + with self.test_session(): + small = array_ops.placeholder(dtypes.int32, name="small") + big = array_ops.placeholder(dtypes.int32, name="big") + with ops.control_dependencies([check_ops.assert_equal(small, big)]): + out = array_ops.identity(small) + with self.assertRaisesOpError("small.*big"): + out.eval(feed_dict={small: [3, 1], big: [4, 2]}) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_equal_and_broadcastable_shapes(self): - with self.test_session(): - small = constant_op.constant([1, 2], name="small") - small_2 = constant_op.constant([1, 2], name="small_2") - with ops.control_dependencies([check_ops.assert_equal(small, small_2)]): - out = array_ops.identity(small) - out.eval() + small = constant_op.constant([[1, 2], [1, 2]], name="small") + small_2 = constant_op.constant([1, 2], name="small_2") + with ops.control_dependencies([check_ops.assert_equal(small, small_2)]): + out = array_ops.identity(small) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_raises_when_equal_but_non_broadcastable_shapes(self): - with self.test_session(): - small = constant_op.constant([1, 1, 1], name="small") - small_2 = constant_op.constant([1, 1], name="small_2") - with self.assertRaisesRegexp(ValueError, "must be"): - with ops.control_dependencies([check_ops.assert_equal(small, small_2)]): - out = array_ops.identity(small) - out.eval() + small = constant_op.constant([1, 1, 1], name="small") + small_2 = constant_op.constant([1, 1], name="small_2") + # The exception in eager and non-eager mode is different because + # eager mode relies on shape check done as part of the C++ op, while + # graph mode does shape checks when creating the `Operation` instance. + with self.assertRaisesRegexp( + (errors.InvalidArgumentError, ValueError), + (r"Incompatible shapes: \[3\] vs. \[2\]|" + r"Dimensions must be equal, but are 3 and 2")): + with ops.control_dependencies([check_ops.assert_equal(small, small_2)]): + out = array_ops.identity(small) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_both_empty(self): - with self.test_session(): - larry = constant_op.constant([]) - curly = constant_op.constant([]) - with ops.control_dependencies([check_ops.assert_equal(larry, curly)]): - out = array_ops.identity(larry) - out.eval() + larry = constant_op.constant([]) + curly = constant_op.constant([]) + with ops.control_dependencies([check_ops.assert_equal(larry, curly)]): + out = array_ops.identity(larry) + self.evaluate(out) class AssertNoneEqualTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_not_equal(self): - with self.test_session(): - small = constant_op.constant([1, 2], name="small") - big = constant_op.constant([10, 20], name="small") - with ops.control_dependencies( - [check_ops.assert_none_equal(big, small)]): - out = array_ops.identity(small) - out.eval() - + small = constant_op.constant([1, 2], name="small") + big = constant_op.constant([10, 20], name="small") + with ops.control_dependencies( + [check_ops.assert_none_equal(big, small)]): + out = array_ops.identity(small) + self.evaluate(out) + + @test_util.run_in_graph_and_eager_modes() def test_raises_when_equal(self): - with self.test_session(): - small = constant_op.constant([3, 1], name="small") + small = constant_op.constant([3, 1], name="small") + with self.assertRaisesOpError("x != y did not hold"): with ops.control_dependencies( [check_ops.assert_none_equal(small, small)]): out = array_ops.identity(small) - with self.assertRaisesOpError("x != y did not hold"): - out.eval() + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_not_equal_and_broadcastable_shapes(self): - with self.test_session(): - small = constant_op.constant([1, 2], name="small") - big = constant_op.constant([3], name="big") - with ops.control_dependencies( - [check_ops.assert_none_equal(small, big)]): - out = array_ops.identity(small) - out.eval() - + small = constant_op.constant([1, 2], name="small") + big = constant_op.constant([3], name="big") + with ops.control_dependencies( + [check_ops.assert_none_equal(small, big)]): + out = array_ops.identity(small) + self.evaluate(out) + + @test_util.run_in_graph_and_eager_modes() def test_raises_when_not_equal_but_non_broadcastable_shapes(self): with self.test_session(): small = constant_op.constant([1, 1, 1], name="small") big = constant_op.constant([10, 10], name="big") - with self.assertRaisesRegexp(ValueError, "must be"): + # The exception in eager and non-eager mode is different because + # eager mode relies on shape check done as part of the C++ op, while + # graph mode does shape checks when creating the `Operation` instance. + with self.assertRaisesRegexp( + (ValueError, errors.InvalidArgumentError), + (r"Incompatible shapes: \[3\] vs. \[2\]|" + r"Dimensions must be equal, but are 3 and 2")): with ops.control_dependencies( [check_ops.assert_none_equal(small, big)]): out = array_ops.identity(small) - out.eval() + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_both_empty(self): with self.test_session(): larry = constant_op.constant([]) @@ -182,62 +253,82 @@ class AssertNoneEqualTest(test.TestCase): with ops.control_dependencies( [check_ops.assert_none_equal(larry, curly)]): out = array_ops.identity(larry) - out.eval() + self.evaluate(out) + + def test_returns_none_with_eager(self): + with context.eager_mode(): + t1 = constant_op.constant([1, 2]) + t2 = constant_op.constant([3, 4]) + x = check_ops.assert_none_equal(t1, t2) + assert x is None class AssertLessTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_raises_when_equal(self): - with self.test_session(): - small = constant_op.constant([1, 2], name="small") + small = constant_op.constant([1, 2], name="small") + with self.assertRaisesOpError("failure message.*\n*.* x < y did not hold"): with ops.control_dependencies( [check_ops.assert_less( - small, small, message="fail")]): + small, small, message="failure message")]): out = array_ops.identity(small) - with self.assertRaisesOpError("fail.*small.*small"): - out.eval() + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_raises_when_greater(self): - with self.test_session(): - small = constant_op.constant([1, 2], name="small") - big = constant_op.constant([3, 4], name="big") + small = constant_op.constant([1, 2], name="small") + big = constant_op.constant([3, 4], name="big") + with self.assertRaisesOpError("x < y did not hold"): with ops.control_dependencies([check_ops.assert_less(big, small)]): out = array_ops.identity(small) - with self.assertRaisesOpError("big.*small"): - out.eval() + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_less(self): - with self.test_session(): - small = constant_op.constant([3, 1], name="small") - big = constant_op.constant([4, 2], name="big") - with ops.control_dependencies([check_ops.assert_less(small, big)]): - out = array_ops.identity(small) - out.eval() + small = constant_op.constant([3, 1], name="small") + big = constant_op.constant([4, 2], name="big") + with ops.control_dependencies([check_ops.assert_less(small, big)]): + out = array_ops.identity(small) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_less_and_broadcastable_shapes(self): - with self.test_session(): - small = constant_op.constant([1], name="small") - big = constant_op.constant([3, 2], name="big") - with ops.control_dependencies([check_ops.assert_less(small, big)]): - out = array_ops.identity(small) - out.eval() + small = constant_op.constant([1], name="small") + big = constant_op.constant([3, 2], name="big") + with ops.control_dependencies([check_ops.assert_less(small, big)]): + out = array_ops.identity(small) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_raises_when_less_but_non_broadcastable_shapes(self): - with self.test_session(): - small = constant_op.constant([1, 1, 1], name="small") - big = constant_op.constant([3, 2], name="big") - with self.assertRaisesRegexp(ValueError, "must be"): - with ops.control_dependencies([check_ops.assert_less(small, big)]): - out = array_ops.identity(small) - out.eval() + small = constant_op.constant([1, 1, 1], name="small") + big = constant_op.constant([3, 2], name="big") + # The exception in eager and non-eager mode is different because + # eager mode relies on shape check done as part of the C++ op, while + # graph mode does shape checks when creating the `Operation` instance. + with self.assertRaisesRegexp( + (ValueError, errors.InvalidArgumentError), + (r"Incompatible shapes: \[3\] vs. \[2\]|" + "Dimensions must be equal, but are 3 and 2")): + with ops.control_dependencies([check_ops.assert_less(small, big)]): + out = array_ops.identity(small) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_both_empty(self): - with self.test_session(): - larry = constant_op.constant([]) - curly = constant_op.constant([]) - with ops.control_dependencies([check_ops.assert_less(larry, curly)]): - out = array_ops.identity(larry) - out.eval() + larry = constant_op.constant([]) + curly = constant_op.constant([]) + with ops.control_dependencies([check_ops.assert_less(larry, curly)]): + out = array_ops.identity(larry) + self.evaluate(out) + + def test_returns_none_with_eager(self): + with context.eager_mode(): + t1 = constant_op.constant([1, 2]) + t2 = constant_op.constant([3, 4]) + x = check_ops.assert_less(t1, t2) + assert x is None class AssertLessEqualTest(test.TestCase): diff --git a/tensorflow/python/ops/check_ops.py b/tensorflow/python/ops/check_ops.py index ceee009104..7e509f72c1 100644 --- a/tensorflow/python/ops/check_ops.py +++ b/tensorflow/python/ops/check_ops.py @@ -48,6 +48,7 @@ import numpy as np from tensorflow.python.eager import context from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_util @@ -96,10 +97,11 @@ def _maybe_constant_value_string(t): def _assert_static(condition, data): - """Raises a static ValueError with as much information as possible.""" + """Raises a InvalidArgumentError with as much information as possible.""" if not condition: data_static = [_maybe_constant_value_string(x) for x in data] - raise ValueError('\n'.join(data_static)) + raise errors.InvalidArgumentError(node_def=None, op=None, + message='\n'.join(data_static)) def assert_proper_iterable(values): @@ -303,11 +305,60 @@ def assert_equal(x, y, data=None, summarize=None, message=None, name=None): Returns: Op that raises `InvalidArgumentError` if `x == y` is False. + @compatibility{eager} returns None + + Raises: + InvalidArgumentError if the check can be performed immediately and + `x == y` is False. The check can be performed immediately during + eager execution or if `x` and `y` are statically known. """ message = message or '' with ops.name_scope(name, 'assert_equal', [x, y, data]): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') + + if context.in_eager_mode(): + eq = math_ops.equal(x, y) + condition = math_ops.reduce_all(eq) + if not condition: + # Prepare a message with first elements of x and y + summary_msg = '' + if summarize: + # reshape((-1,)) is the fastest way to get a flat array view. + x_np = x.numpy().reshape((-1,)) + y_np = y.numpy().reshape((-1,)) + x_sum = min(x_np.size, summarize) + y_sum = min(y_np.size, summarize) + summary_msg = ('First %d elements of x:\n%s\n' + 'First %d elements of y:\n%s\n' % + (x_sum, x_np[:x_sum], + y_sum, y_np[:y_sum])) + + # Get the values that actually differed and their indices + mask = math_ops.logical_not(eq) + indices = array_ops.where(mask) + indices_np = indices.numpy() + x_vals = array_ops.boolean_mask(x, mask) + y_vals = array_ops.boolean_mask(y, mask) + diff_to_print = 0 + if summarize: + diff_to_print = min(summarize, indices_np.size) + + raise errors.InvalidArgumentError( + node_def=None, op=None, + message=('%s\nCondition x == y did not hold.\n' + 'Indices of first %s different values:\n%s\n' + 'Corresponding x values:\n%s\n' + 'Corresponding y values:\n%s\n' + '%s' + % + (message or '', + diff_to_print, indices_np[:diff_to_print], + x_vals.numpy().reshape((-1,))[:diff_to_print], + y_vals.numpy().reshape((-1,))[:diff_to_print], + summary_msg))) + return + if data is None: data = [ message, @@ -356,12 +407,19 @@ def assert_none_equal( with ops.name_scope(name, 'assert_none_equal', [x, y, data]): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') + if context.in_eager_mode(): + x_name = 'x' + y_name = 'y' + else: + x_name = x.name + y_name = y.name + if data is None: data = [ message, - 'Condition x != y did not hold for every single element:' - 'x (%s) = ' % x.name, x, - 'y (%s) = ' % y.name, y + 'Condition x != y did not hold for every single element:', + 'x (%s) = ' % x_name, x, + 'y (%s) = ' % y_name, y ] condition = math_ops.reduce_all(math_ops.not_equal(x, y)) return control_flow_ops.Assert(condition, data, summarize=summarize) @@ -397,11 +455,18 @@ def assert_less(x, y, data=None, summarize=None, message=None, name=None): with ops.name_scope(name, 'assert_less', [x, y, data]): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') + if context.in_eager_mode(): + x_name = 'x' + y_name = 'y' + else: + x_name = x.name + y_name = y.name + if data is None: data = [ message, - 'Condition x < y did not hold element-wise:' - 'x (%s) = ' % x.name, x, 'y (%s) = ' % y.name, y + 'Condition x < y did not hold element-wise:', + 'x (%s) = ' % x_name, x, 'y (%s) = ' % y_name, y ] condition = math_ops.reduce_all(math_ops.less(x, y)) return control_flow_ops.Assert(condition, data, summarize=summarize) diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 10d8e01304..8afb079d20 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -60,6 +60,7 @@ from tensorflow.core.protobuf import control_flow_pb2 from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape @@ -86,6 +87,29 @@ from tensorflow.python.util import tf_should_use _basetuple = tuple +def _summarize_eager(tensor, summarize=None): + """Returns a summarized string representation of eager `tensor`. + + Args: + tensor: EagerTensor to summarize + summarize: Include these many first elements of `array` + """ + # reshape((-1,)) is the fastest way to get a flat array view + if tensor._rank(): # pylint: disable=protected-access + flat = tensor.numpy().reshape((-1,)) + lst = [str(x) for x in flat[:summarize]] + if len(lst) < flat.size: + lst.append("...") + else: + # tensor.numpy() returns a scalar for zero dimensional arrays + if summarize != 0: + lst = [str(tensor.numpy())] + else: + lst = [] + + return ", ".join(lst) + + # pylint: disable=protected-access @@ -98,7 +122,8 @@ def Assert(condition, data, summarize=None, name=None): If `condition` evaluates to false, print the list of tensors in `data`. `summarize` determines how many entries of the tensors to print. - NOTE: To ensure that Assert executes, one usually attaches a dependency: + NOTE: In graph mode, to ensure that Assert executes, one usually attaches + a dependency: ```python # Ensure maximum element of x is smaller or equal to 1 @@ -117,7 +142,21 @@ def Assert(condition, data, summarize=None, name=None): assert_op: An `Operation` that, when executed, raises a `tf.errors.InvalidArgumentError` if `condition` is not true. @compatibility{eager} returns None. + + Raises: + @compatibility{eager} `tf.errors.InvalidArgumentError` if `condition` + is not true """ + if context.in_eager_mode(): + if not condition: + xs = ops.convert_n_to_tensor(data) + data_str = [_summarize_eager(x, summarize) for x in xs] + raise errors.InvalidArgumentError( + node_def=None, op=None, + message="Expected '%s' to be true. Summarized data: %s" % ( + condition, "\n".join(data_str))) + return + with ops.name_scope(name, "Assert", [condition, data]) as name: xs = ops.convert_n_to_tensor(data) if all([x.dtype in {dtypes.string, dtypes.int32} for x in xs]): diff --git a/tensorflow/python/util/tf_should_use.py b/tensorflow/python/util/tf_should_use.py index a576547d5f..37733152e8 100644 --- a/tensorflow/python/util/tf_should_use.py +++ b/tensorflow/python/util/tf_should_use.py @@ -44,7 +44,7 @@ def _add_should_use_warning(x, fatal_error=False): and is a very shallow wrapper for `x` which logs access into `x`. """ del fatal_error - if x is None: # special corner case where x is None + if x is None or x == []: # pylint: disable=g-explicit-bool-comparison return x if context.in_eager_mode(): -- GitLab From 04d3d4d3a70aed9a8a09c7c87765652fea38cbfd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 16:49:17 -0800 Subject: [PATCH 0077/1801] Changed gradient of GatherNd to use IndexedSlices when possible rather than producing a dense output. PiperOrigin-RevId: 175086874 --- tensorflow/python/kernel_tests/gather_nd_op_test.py | 10 ++++++++-- tensorflow/python/ops/array_grad.py | 6 +++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/kernel_tests/gather_nd_op_test.py b/tensorflow/python/kernel_tests/gather_nd_op_test.py index af5e23c926..5109ed98c9 100644 --- a/tensorflow/python/kernel_tests/gather_nd_op_test.py +++ b/tensorflow/python/kernel_tests/gather_nd_op_test.py @@ -25,6 +25,7 @@ import numpy as np from tensorflow.python.client import session from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import variables @@ -185,6 +186,9 @@ class GatherNdTest(test.TestCase): self.assertAllEqual(expected.reshape([10, 10, 20]), gather_nd_val) self.assertEqual([10, 10, 20], gather_nd_t.get_shape()) + def assertIndexedSlices(self, t): + self.assertIsInstance(t, ops.IndexedSlices) + def testUnknownIndices(self): params = constant_op.constant([[0, 1, 2]]) indices = array_ops.placeholder(dtypes.int32) @@ -233,7 +237,8 @@ class GatherNdTest(test.TestCase): grads = gradients_impl.gradients([outputs], [inputs], [grad_vals])[0] expected_grads = np.array([[3, 4], [1, 2]], dtype=np.float64) with self.test_session(use_gpu=True): - self.assertAllEqual(expected_grads, grads.eval()) + self.assertIndexedSlices(grads) + self.assertAllEqual(expected_grads, ops.convert_to_tensor(grads).eval()) def testGradientsRank3Elements(self): indices = constant_op.constant( @@ -284,7 +289,8 @@ class GatherNdTest(test.TestCase): [0, 0, 0, 0, 0, 0, 0, 0, 0], [3, 3, 3, 3, 3, 3, 3, 3, 3]], dtype=np.float64) with self.test_session(use_gpu=True): - self.assertAllEqual(expected_grads, grads.eval()) + self.assertIndexedSlices(grads) + self.assertAllEqual(expected_grads, ops.convert_to_tensor(grads).eval()) class GatherNdOpBenchmark(test.Benchmark): diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index 3c025881cb..87f8d14860 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -460,7 +460,11 @@ def _GatherNdGrad(op, grad): ref = op.inputs[0] indices = op.inputs[1] ref_shape = array_ops.shape(ref, out_type=indices.dtype) - ref_grad = array_ops.scatter_nd(indices, grad, ref_shape) + if indices.shape.ndims == 2 and indices.shape[-1].value == 1: + ref_grad = ops.IndexedSlices(grad, array_ops.squeeze(indices, axis=-1), + ref_shape) + else: + ref_grad = array_ops.scatter_nd(indices, grad, ref_shape) return [ref_grad, None] -- GitLab From d1dc152b5c97b5b58314a6959543311ced35deed Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Wed, 8 Nov 2017 16:59:13 -0800 Subject: [PATCH 0078/1801] Improve error message for @graph_callable argument check PiperOrigin-RevId: 175088248 --- tensorflow/python/eager/graph_callable.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py index a7f1061d18..ce51d17cfc 100644 --- a/tensorflow/python/eager/graph_callable.py +++ b/tensorflow/python/eager/graph_callable.py @@ -247,7 +247,9 @@ def _get_graph_callable_inputs(shape_and_dtypes): ret.append(_get_graph_callable_inputs(x)) else: raise errors.InvalidArgumentError( - None, None, "shape_and_dtypes not ShapeAndDtype, type: %s " % type(x)) + None, None, "Expected the argument to @graph_callable to be a " + "(possibly nested) list or tuple of ShapeAndDtype objects, " + "but got an object of type: %s" % type(x)) return tuple(ret) if isinstance(shape_and_dtypes, tuple) else ret @@ -267,7 +269,7 @@ def _graph_callable_internal(func, shape_and_dtypes): Args: func: The tfe Python function to compile. - shape_and_dtypes: A list of type ShapeAndDtype. + shape_and_dtypes: A possibly nested list or tuple of ShapeAndDtype objects. Raises: ValueError: If any one of func's outputs is not a Tensor. @@ -430,9 +432,10 @@ def graph_callable(shape_and_dtypes): ret = foo(tfe.Tensor(2.0)) # `ret` here now is a Tensor with value 9.0. ``` Args: - shape_and_dtypes: A list of type ShapeAndDtype that specifies shape and type - information for each of the callable's arguments. The length of this list - must be equal to the number of arguments accepted by the wrapped function. + shape_and_dtypes: A possibly nested list or tuple of ShapeAndDtype objects + that specifies shape and type information for each of the callable's + arguments. The length of this list must be equal to the number of + arguments accepted by the wrapped function. Returns: A callable graph object. -- GitLab From c58da5291a6b1344de8e3e7e7ea59d770701fc15 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 17:01:26 -0800 Subject: [PATCH 0079/1801] Remove extra copy of literal in client TransferToOutfeed PiperOrigin-RevId: 175088538 --- tensorflow/compiler/xla/client/client.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/client/client.cc b/tensorflow/compiler/xla/client/client.cc index 92cd8e729d..66937d64af 100644 --- a/tensorflow/compiler/xla/client/client.cc +++ b/tensorflow/compiler/xla/client/client.cc @@ -142,8 +142,7 @@ StatusOr> Client::TransferFromOutfeed( "TransferToClient request"); } - Literal literal(response.literal()); - return MakeUnique(literal); + return MakeUnique(response.literal()); } Status Client::ResetDevice() { -- GitLab From 5de6f68848b8bc431e18a53fa03700820bcee57f Mon Sep 17 00:00:00 2001 From: Cameron Thomas Date: Thu, 9 Nov 2017 01:19:51 +0000 Subject: [PATCH 0080/1801] Forward declare condition_variable Necessary to enable friendship with mutex --- tensorflow/core/platform/default/mutex.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/core/platform/default/mutex.h b/tensorflow/core/platform/default/mutex.h index c3e44c42d9..044c754e80 100644 --- a/tensorflow/core/platform/default/mutex.h +++ b/tensorflow/core/platform/default/mutex.h @@ -31,6 +31,8 @@ namespace tensorflow { enum LinkerInitialized { LINKER_INITIALIZED }; +class condition_variable; + // Mimic std::mutex + C++17's shared_mutex, adding a LinkerInitialized // constructor interface. This type is as fast as mutex, but is also a shared // lock. -- GitLab From 1c79da73c193944878025a6f49565c54f63da4f2 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Wed, 8 Nov 2017 19:01:15 -0800 Subject: [PATCH 0081/1801] Add hooks keyword argument to slim evaluate_once to enable TFDBG debugging of slim.evaluation.evaluate_once() Fixes: #13444 PiperOrigin-RevId: 175101022 --- tensorflow/contrib/slim/BUILD | 2 + .../contrib/slim/python/slim/evaluation.py | 15 ++++-- .../slim/python/slim/evaluation_test.py | 46 +++++++++++++++++-- .../docs_src/programmers_guide/debugger.md | 26 ++++++++++- 4 files changed, 77 insertions(+), 12 deletions(-) diff --git a/tensorflow/contrib/slim/BUILD b/tensorflow/contrib/slim/BUILD index 23c23af2f4..c2f106c2b2 100644 --- a/tensorflow/contrib/slim/BUILD +++ b/tensorflow/contrib/slim/BUILD @@ -39,6 +39,8 @@ py_test( "//tensorflow/python:summary", "//tensorflow/python:training", "//tensorflow/python:variables", + "//tensorflow/python/debug:debug_data", + "//tensorflow/python/debug:hooks", "//third_party/py/numpy", ], ) diff --git a/tensorflow/contrib/slim/python/slim/evaluation.py b/tensorflow/contrib/slim/python/slim/evaluation.py index 2d4b08df61..cdb720b36b 100644 --- a/tensorflow/contrib/slim/python/slim/evaluation.py +++ b/tensorflow/contrib/slim/python/slim/evaluation.py @@ -153,7 +153,8 @@ def evaluate_once(master, summary_op=_USE_DEFAULT, summary_op_feed_dict=None, variables_to_restore=None, - session_config=None): + session_config=None, + hooks=None): """Evaluates the model at the given checkpoint path. Args: @@ -177,6 +178,8 @@ def evaluate_once(master, slim.variables.GetVariablesToRestore() is used. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. + hooks: A list of additional `SessionRunHook` objects to pass during the + evaluation. Returns: The value of `final_op` or `None` if `final_op` is `None`. @@ -184,11 +187,13 @@ def evaluate_once(master, if summary_op == _USE_DEFAULT: summary_op = summary.merge_all() - hooks = [evaluation.StopAfterNEvalsHook(num_evals),] + all_hooks = [evaluation.StopAfterNEvalsHook(num_evals),] if summary_op is not None: - hooks.append(evaluation.SummaryAtEndHook( + all_hooks.append(evaluation.SummaryAtEndHook( log_dir=logdir, summary_op=summary_op, feed_dict=summary_op_feed_dict)) + if hooks is not None: + all_hooks.extend(hooks) saver = None if variables_to_restore is not None: @@ -203,7 +208,7 @@ def evaluate_once(master, feed_dict=eval_op_feed_dict, final_ops=final_op, final_ops_feed_dict=final_op_feed_dict, - hooks=hooks, + hooks=all_hooks, config=session_config) @@ -256,7 +261,7 @@ def evaluation_loop(master, configure the `Session`. If left as `None`, the default will be used. timeout: The maximum amount of time to wait between checkpoints. If left as `None`, then the process will wait indefinitely. - hooks: A list of additional SessionRunHook objects to pass during + hooks: A list of additional `SessionRunHook` objects to pass during repeated evaluations. Returns: diff --git a/tensorflow/contrib/slim/python/slim/evaluation_test.py b/tensorflow/contrib/slim/python/slim/evaluation_test.py index d9e0f54b72..870f504d10 100644 --- a/tensorflow/contrib/slim/python/slim/evaluation_test.py +++ b/tensorflow/contrib/slim/python/slim/evaluation_test.py @@ -20,6 +20,7 @@ from __future__ import print_function import glob import os +import shutil import time import numpy as np @@ -29,6 +30,8 @@ from tensorflow.contrib.metrics.python.ops import metric_ops from tensorflow.contrib.slim.python.slim import evaluation from tensorflow.contrib.training.python.training import evaluation as evaluation_lib from tensorflow.core.protobuf import saver_pb2 +from tensorflow.python.debug.lib import debug_data +from tensorflow.python.debug.wrappers import hooks from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -230,11 +233,7 @@ class SingleEvaluationTest(test.TestCase): with self.assertRaises(errors.NotFoundError): evaluation.evaluate_once('', checkpoint_path, log_dir) - def testRestoredModelPerformance(self): - checkpoint_path = os.path.join(self.get_temp_dir(), 'model.ckpt') - log_dir = os.path.join(self.get_temp_dir(), 'log_dir1/') - - # First, save out the current model to a checkpoint: + def _prepareCheckpoint(self, checkpoint_path): init_op = control_flow_ops.group(variables.global_variables_initializer(), variables.local_variables_initializer()) saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V1) @@ -242,6 +241,13 @@ class SingleEvaluationTest(test.TestCase): sess.run(init_op) saver.save(sess, checkpoint_path) + def testRestoredModelPerformance(self): + checkpoint_path = os.path.join(self.get_temp_dir(), 'model.ckpt') + log_dir = os.path.join(self.get_temp_dir(), 'log_dir1/') + + # First, save out the current model to a checkpoint: + self._prepareCheckpoint(checkpoint_path) + # Next, determine the metric to evaluate: value_op, update_op = metric_ops.streaming_accuracy(self._predictions, self._labels) @@ -251,6 +257,36 @@ class SingleEvaluationTest(test.TestCase): '', checkpoint_path, log_dir, eval_op=update_op, final_op=value_op) self.assertAlmostEqual(accuracy_value, self._expected_accuracy) + def testAdditionalHooks(self): + checkpoint_path = os.path.join(self.get_temp_dir(), 'model.ckpt') + log_dir = os.path.join(self.get_temp_dir(), 'log_dir1/') + + # First, save out the current model to a checkpoint: + self._prepareCheckpoint(checkpoint_path) + + # Next, determine the metric to evaluate: + value_op, update_op = metric_ops.streaming_accuracy(self._predictions, + self._labels) + + dumping_root = os.path.join(self.get_temp_dir(), 'tfdbg_dump_dir') + dumping_hook = hooks.DumpingDebugHook(dumping_root, log_usage=False) + try: + # Run the evaluation and verify the results: + accuracy_value = evaluation.evaluate_once( + '', checkpoint_path, log_dir, eval_op=update_op, final_op=value_op, + hooks=[dumping_hook]) + self.assertAlmostEqual(accuracy_value, self._expected_accuracy) + + dump = debug_data.DebugDumpDir( + glob.glob(os.path.join(dumping_root, 'run_*'))[0]) + # Here we simply assert that the dumped data has been loaded and is + # non-empty. We do not care about the detailed model-internal tensors or + # their values. + self.assertTrue(dump.dumped_tensor_data) + finally: + if os.path.isdir(dumping_root): + shutil.rmtree(dumping_root) + if __name__ == '__main__': test.main() diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md index 36a016e880..1f856bbf3f 100644 --- a/tensorflow/docs_src/programmers_guide/debugger.md +++ b/tensorflow/docs_src/programmers_guide/debugger.md @@ -509,8 +509,12 @@ model.fit(...) # This will break into the TFDBG CLI. ## Debugging tf-slim with TFDBG -TFDBG currently supports only training with +TFDBG supports debugging of training and evaluation with [tf-slim](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim). +As detailed below, training and evaluation require slightly different debugging +workflows. + +### Debugging training in tf-slim To debug the training process, provide `LocalCLIDebugWrapperSession` to the `session_wrapper` argument of `slim.learning.train()`. For example: @@ -519,13 +523,31 @@ import tensorflow as tf from tensorflow.python import debug as tf_debug # ... Code that creates the graph and the train_op ... -tf.contrib.slim.learning_train( +tf.contrib.slim.learning.train( train_op, logdir, number_of_steps=10, session_wrapper=tf_debug.LocalCLIDebugWrapperSession) ``` +### Debugging evaluation in tf-slim +To debug the evaluation process, provide `LocalCLIDebugHook` to the +`hooks` argument of `slim.evaluation.evaluate_once()`. For example: + +``` python +import tensorflow as tf +from tensorflow.python import debug as tf_debug + +# ... Code that creates the graph and the eval and final ops ... +tf.contrib.slim.evaluation.evaluate_once( + '', + checkpoint_path, + logdir, + eval_op=my_eval_op, + final_op=my_value_op, + hooks=[tf_debug.LocalCLIDebugHook()]) +``` + ## Offline Debugging of Remotely-Running Sessions Often, your model is running on a remote machine or a process that you don't -- GitLab From 29833cac91cb2f7c5016db9fc82f47124d2c94da Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Wed, 8 Nov 2017 22:23:01 -0800 Subject: [PATCH 0082/1801] Simplify graph construction with an option to not validate colocation constraints (for graph optimizations, colocation constraints are already validated previously and device placement of nodes has completed previously and there is no need to validate again). PiperOrigin-RevId: 175113956 --- tensorflow/core/graph/graph_constructor.cc | 10 +++++--- tensorflow/core/graph/graph_constructor.h | 3 +++ .../core/graph/graph_constructor_test.cc | 15 +++++++++++ .../core/grappler/costs/graph_properties.cc | 5 ++++ .../grappler/costs/graph_properties_test.cc | 25 +++++++++++++++++++ 5 files changed, 55 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc index 753cb260e5..2ee409768b 100644 --- a/tensorflow/core/graph/graph_constructor.cc +++ b/tensorflow/core/graph/graph_constructor.cc @@ -68,7 +68,8 @@ class GraphConstructor { Options(const GraphConstructorOptions& in) // NOLINT(runtime/explicit) : allow_internal_ops(in.allow_internal_ops), expect_device_spec(in.expect_device_spec), - importing(false) {} + importing(false), + validate_colocation_constraints(false) {} Options(const ImportGraphDefOptions& in) // NOLINT(runtime/explicit) : allow_internal_ops(false), expect_device_spec(false), @@ -81,7 +82,8 @@ class GraphConstructor { control_dependencies(in.control_dependencies), return_tensors(in.return_tensors), return_nodes(in.return_nodes), - importing(true) {} + importing(true), + validate_colocation_constraints(in.validate_colocation_constraints) {} bool allow_internal_ops; bool expect_device_spec; @@ -103,6 +105,7 @@ class GraphConstructor { // applicable to ConvertGraphDefToGraph as well, so make an attempt to // remove this. bool importing; + bool validate_colocation_constraints; }; typedef gtl::ArraySlice NodeDefSlice; @@ -492,7 +495,8 @@ Status GraphConstructor::InitFromEdges() { Status GraphConstructor::ValidateColocationConstraints( const NodeDef& node_def) { - if (!opts_.importing) return Status::OK(); + if (!opts_.validate_colocation_constraints || !opts_.importing) + return Status::OK(); const auto iter = node_def.attr().find(kColocationAttrName); if (iter == node_def.attr().end()) return Status::OK(); for (const string& c : iter->second.list().s()) { diff --git a/tensorflow/core/graph/graph_constructor.h b/tensorflow/core/graph/graph_constructor.h index 416c0ee9ae..4b418b8622 100644 --- a/tensorflow/core/graph/graph_constructor.h +++ b/tensorflow/core/graph/graph_constructor.h @@ -119,6 +119,9 @@ struct ImportGraphDefOptions { // TODO(skyewm): make this work with `skip_mapped_nodes` if there's a need. std::vector return_nodes; + // If true, checks that all colocation constraints are nodes in the GraphDef. + bool validate_colocation_constraints = true; + // TODO(ashankar): Enable handling of GraphDefs produced by newer binaries // with ops that are not defined in the binary calling ImportGraphDef. // Similar to the producer_op_list argument to import_graph_def in the diff --git a/tensorflow/core/graph/graph_constructor_test.cc b/tensorflow/core/graph/graph_constructor_test.cc index cd541c7d86..893826da3e 100644 --- a/tensorflow/core/graph/graph_constructor_test.cc +++ b/tensorflow/core/graph/graph_constructor_test.cc @@ -2978,5 +2978,20 @@ versions { EXPECT_EQ(17, refiner.graph_def_version()); } +TEST_F(GraphConstructorTest, ImportGraphDef_ValidateColationConstraints) { + GraphDef def; + ASSERT_TRUE(protobuf::TextFormat::ParseFromString( + "node { name: 'A' op: 'TestInput' attr { key: '_class' value { list { " + "s:'loc:@missing' } } } }", + &def)); + ImportGraphDefOptions options; + // TODO(yaozhang): Extend ExpectError to check error type and use ExpectError + // and ExpectOK to replace the code below. + Status s = ImportGraphDef(options, def, &graph_, nullptr); + EXPECT_TRUE(errors::IsInvalidArgument(s)) << s; + options.validate_colocation_constraints = false; + TF_EXPECT_OK(ImportGraphDef(options, def, &graph_, nullptr)); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index a59879f53c..8654a2a3ed 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -447,6 +447,11 @@ Status GraphProperties::InferStatically() { shape_refiner.set_disable_constant_propagation(true); shape_refiner.set_function_library_for_shape_inference(&function_library); ImportGraphDefOptions options; + // Graph optimization happens at the late stage of graph execution, + // when colocation constraints are already validated previously and + // the device placement of nodes has also completed, so there + // is no need to validate colocation constraints again. + options.validate_colocation_constraints = false; Status s = ImportGraphDef(options, item_.graph, &graph, &shape_refiner); TF_RETURN_IF_ERROR(s); diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc index a33cdacc09..acd0b598ae 100644 --- a/tensorflow/core/grappler/costs/graph_properties_test.cc +++ b/tensorflow/core/grappler/costs/graph_properties_test.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h" #include "tensorflow/core/grappler/inputs/utils.h" +#include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/protobuf.h" @@ -784,6 +785,30 @@ TEST_F(GraphPropertiesTest, SymbolicShapes) { EXPECT_EQ(shape_f.dim(1).size(), shape_a.dim(1).size()); } +TEST_F(GraphPropertiesTest, DoNotValidateColocationConstraints) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output a = ops::Const(s.WithOpName("a"), 1.0f, {1}); + Output b = ops::Const(s.WithOpName("b"), 2.0f, {1}); + Output c = ops::Const(s.WithOpName("c").ColocateWith(a), 3.0f, {1}); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + // Create a graph with node a removed (say by some graph optimization + // pass), noting that node c is colocated with a. This is fine as it + // is in the late stage of graph execution, the colocation constraints have + // been validated previously and the device placement of nodes has completed. + GraphDef optimized_graph; + for (const auto& node : item.graph.node()) { + if (node.name() != "a") { + *optimized_graph.add_node() = node; + } + } + item.graph.Swap(&optimized_graph); + GraphProperties properties(item); + // This function should return OK, since it doesn't validate the colocation + // constraints internally. + TF_EXPECT_OK(properties.InferStatically()); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From b58ee215e631b9c2a0400cbd5b52ea7a3a8bfca0 Mon Sep 17 00:00:00 2001 From: PW486 Date: Thu, 9 Nov 2017 19:12:41 +0900 Subject: [PATCH 0083/1801] Fixed typos, comments --- tensorflow/core/public/session.h | 2 +- tensorflow/core/util/saved_tensor_slice.proto | 2 +- tensorflow/core/util/strided_slice_op.cc | 4 ++-- tensorflow/core/util/tensor_slice_reader.h | 1 - tensorflow/core/util/tensor_slice_reader_cache.h | 1 - tensorflow/core/util/tensor_slice_writer.h | 1 - 6 files changed, 4 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/public/session.h b/tensorflow/core/public/session.h index bca384e59f..75ad50f6f2 100644 --- a/tensorflow/core/public/session.h +++ b/tensorflow/core/public/session.h @@ -186,7 +186,7 @@ class Session { /// the `SessionOptions::target` field). virtual Status Close() = 0; - // NOTE(ashankar): As of July 2017, this method was added to faciliate some + // NOTE(ashankar): As of July 2017, this method was added to facilitate some // experimentation. Reconsider/re-evaluate after September 2017. // // Sets `*output` to the `DeviceMgr` that owns accessible devices in the diff --git a/tensorflow/core/util/saved_tensor_slice.proto b/tensorflow/core/util/saved_tensor_slice.proto index 6278685957..8a6dd7bdb7 100644 --- a/tensorflow/core/util/saved_tensor_slice.proto +++ b/tensorflow/core/util/saved_tensor_slice.proto @@ -1,7 +1,7 @@ // Protocol buffers for saved tensor slices. It's used for the brain tensor // ops checkpoints and the V3 checkpoints in dist_belief. -// A checkpoint file is an sstable. The value for each record is a serialized +// A checkpoint file is a stable. The value for each record is a serialized // SavedTensorSlices message (defined below). // // Each checkpoint file has a record with the empty key (""), which corresponds diff --git a/tensorflow/core/util/strided_slice_op.cc b/tensorflow/core/util/strided_slice_op.cc index cfe9275a09..d5bc676a9a 100644 --- a/tensorflow/core/util/strided_slice_op.cc +++ b/tensorflow/core/util/strided_slice_op.cc @@ -218,8 +218,8 @@ Status ValidateStridedSliceOp( // Step 2: Make a sparse spec into a full index spec // - // The sparse spec does not corresopnds to the number of dimensions - // Make a dense spec that corresponds to thte number of dimensions + // The sparse spec does not corresponds to the number of dimensions + // Make a dense spec that corresponds to the number of dimensions // // For example suppose foo[...,3:] on foo.shape=(2,2,3) then // we need to produce the missing begin_mask for the first two diff --git a/tensorflow/core/util/tensor_slice_reader.h b/tensorflow/core/util/tensor_slice_reader.h index 4bb2b24615..263f56c7fc 100644 --- a/tensorflow/core/util/tensor_slice_reader.h +++ b/tensorflow/core/util/tensor_slice_reader.h @@ -15,7 +15,6 @@ limitations under the License. // The utility to read checkpoints for google brain tensor ops and v3 // checkpoints for dist_belief. -// #ifndef TENSORFLOW_UTIL_TENSOR_SLICE_READER_H_ #define TENSORFLOW_UTIL_TENSOR_SLICE_READER_H_ diff --git a/tensorflow/core/util/tensor_slice_reader_cache.h b/tensorflow/core/util/tensor_slice_reader_cache.h index bdd36a2791..63a8d0b068 100644 --- a/tensorflow/core/util/tensor_slice_reader_cache.h +++ b/tensorflow/core/util/tensor_slice_reader_cache.h @@ -15,7 +15,6 @@ limitations under the License. // The utility to read checkpoints for google brain tensor ops and v3 // checkpoints for dist_belief. -// #ifndef TENSORFLOW_UTIL_TENSOR_SLICE_READER_CACHE_H_ #define TENSORFLOW_UTIL_TENSOR_SLICE_READER_CACHE_H_ diff --git a/tensorflow/core/util/tensor_slice_writer.h b/tensorflow/core/util/tensor_slice_writer.h index 95d6384afe..bdb4921e1b 100644 --- a/tensorflow/core/util/tensor_slice_writer.h +++ b/tensorflow/core/util/tensor_slice_writer.h @@ -15,7 +15,6 @@ limitations under the License. // The utility to write checkpoints for google brain tensor ops and v3 // checkpoints for dist_belief. -// #ifndef TENSORFLOW_UTIL_TENSOR_SLICE_WRITER_H_ #define TENSORFLOW_UTIL_TENSOR_SLICE_WRITER_H_ -- GitLab From c25cd200ddb2728aec1302f655ff220b08d60007 Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Thu, 9 Nov 2017 19:23:07 +0900 Subject: [PATCH 0084/1801] CMake: configure default string values of options properly Because cmake configures defaults values as ON or OFF only, string values as default doesn't work. Thus, when it is set "OFF", we need to re-set the values. Fixes #14400 Signed-off-by: MyungJoo Ham --- tensorflow/contrib/cmake/CMakeLists.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 77a3fc0c83..846daf3213 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -53,7 +53,15 @@ if (NOT WIN32) set(tensorflow_CUDNN_INCLUDE /usr/include) endif (NOT tensorflow_CUDNN_INCLUDE) option(tensorflow_PATH_CUDNN_STATIC_LIB "Override PATH_STATIC_LIB for libcudnn_static.a" ${tensorflow_PATH_STATIC_LIB}) + if (NOT tensorflow_PATH_CUDNN_STATIC_LIB) + # option's default value is OFF. Fill it with real default values + set (tensorflow_PATH_CUDNN_STATIC_LIB ${tensorflow_PATH_STATIC_LIB}) + endif (NOT tensorflow_PATH_CUDNN_STATIC_LIB) option(tensorflow_PATH_NCCL_STATIC_LIB "Override PATH_STATIC_LIB for libnccl_static.a" ${tensorflow_PATH_STATIC_LIB}) + if (NOT tensorflow_PATH_NCCL_STATIC_LIB) + # option's default value is OFF. Fill it with real default values + set (tensorflow_PATH_NCCL_STATIC_LIB ${tensorflow_PATH_STATIC_LIB}) + endif (NOT tensorflow_PATH_NCCL_STATIC_LIB) option(tensorflow_CUDA_LIBRARY_PATH "Designate the default CUDA library paths" /usr/local/cuda/lib64) if (NOT tensorflow_CUDA_LIBRARY_PATH) # option's default value is OFF. Fill it with real default values -- GitLab From 71bd045af1ebe74c7e3b1b968b5a5b86e0a153c3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 02:32:44 -0800 Subject: [PATCH 0085/1801] When sharding a tuple, we typically want to describe the data sharding of each individual subtensor individually. Tuples are essentially just containers - the tensors they contain should be able to be sharded differently. Tuples are hierarchically structured, but shardings were designed to not contain the sharded type (the sharded type is inferred from the output type of the instruction the sharding is applied to). Therefore, shardings for tuples contain shardings for each subtensor as a non-structured list. This list is ordered as a preorder walk of the tuple shape, and of course only the leaf nodes of the tuple shape are stored. The structure is reapplied when the sharded instruction's shape is known. PiperOrigin-RevId: 175132692 --- .../compiler/xla/service/hlo_sharding.cc | 71 +++++++++++++++- .../compiler/xla/service/hlo_sharding.h | 83 +++++++++++++++++-- .../compiler/xla/service/hlo_sharding_test.cc | 68 +++++++++++++++ tensorflow/compiler/xla/shape_tree.h | 3 + .../compiler/xla/tools/parser/hlo_parser.cc | 41 ++++++++- .../xla/tools/parser/hlo_parser_test.cc | 15 +++- tensorflow/compiler/xla/xla_data.proto | 13 ++- 7 files changed, 278 insertions(+), 16 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc index 0d019d22f5..bc5663513b 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_sharding.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/strings/str_util.h" namespace xla { @@ -38,6 +39,15 @@ HloSharding HloSharding::Tile1D(const Shape& input_shape, int64 num_tiles) { } string HloSharding::ToString() const { + if (IsTuple()) { + std::vector parts; + parts.reserve(tuple_elements_.size()); + for (const HloSharding& element : tuple_elements_) { + parts.push_back(element.ToString()); + } + return StrCat("{", tensorflow::str_util::Join(parts, ", "), "}"); + } + string result = StrCat("{", (replicated_ ? " replicated" : ""), (maximal_ ? " maximal" : "")); @@ -53,6 +63,11 @@ string HloSharding::ToString() const { } bool HloSharding::UsesDevice(int64 device) const { + if (IsTuple()) { + return std::any_of( + tuple_elements_.begin(), tuple_elements_.end(), + [&](const HloSharding& s) { return s.UsesDevice(device); }); + } const auto& devices = tile_assignment_; return replicated_ || std::find(devices.begin(), devices.end(), device) != devices.end(); @@ -61,6 +76,7 @@ bool HloSharding::UsesDevice(int64 device) const { std::vector HloSharding::TileIndexForDevice(int64 device) const { CHECK(!ShapeUtil::IsTuple(tile_shape_)); CHECK(!maximal_); + CHECK(!IsTuple()); std::vector ret_index; tile_assignment_.Each([&](tensorflow::gtl::ArraySlice index, int64 d) { if (d == device) { @@ -74,6 +90,7 @@ std::vector HloSharding::TileIndexForDevice(int64 device) const { int64 HloSharding::DeviceForTileIndex( tensorflow::gtl::ArraySlice index) const { CHECK(!replicated_); + CHECK(!IsTuple()); if (maximal_) { return *tile_assignment_.begin(); } @@ -82,7 +99,7 @@ int64 HloSharding::DeviceForTileIndex( } std::vector HloSharding::TileOffsetForDevice(int64 device) const { - CHECK(!ShapeUtil::IsTuple(tile_shape_)); + CHECK(!IsTuple()); std::vector index = TileIndexForDevice(device); if (maximal_) { @@ -97,7 +114,7 @@ std::vector HloSharding::TileOffsetForDevice(int64 device) const { } std::vector HloSharding::TileLimitForDevice(int64 device) const { - CHECK(!ShapeUtil::IsTuple(tile_shape_)); + CHECK(!IsTuple()); CHECK(!maximal_); // Maximal shardings do not have a valid tile shape. std::vector index = TileIndexForDevice(device); @@ -108,13 +125,41 @@ std::vector HloSharding::TileLimitForDevice(int64 device) const { } StatusOr HloSharding::UniqueDevice() const { - if (!replicated_ && maximal_) { + if (IsTuple()) { + if (tuple_elements_.empty()) { + return tensorflow::errors::InvalidArgument( + "UniqueDevice() called on empty tuple"); + } + std::vector> results; + std::transform(tuple_elements_.begin(), tuple_elements_.end(), + std::back_inserter(results), + [](const HloSharding& s) { return s.UniqueDevice(); }); + if (std::all_of(results.begin(), results.end(), + [&](const StatusOr& s) { + return s.ok() && results[0].ok() && + s.ValueOrDie() == results[0].ValueOrDie(); + })) { + return results[0]; + } else { + return tensorflow::errors::InvalidArgument( + "Tuple did not contain a unique device"); + } + } + if (!replicated_ && maximal_ && !IsTuple()) { return static_cast(*tile_assignment_.begin()); } return tensorflow::errors::InvalidArgument( "UniqueDevice() called on sharding that executes on multiple devices"); } +bool HloSharding::HasUniqueDevice() const { + if (IsTuple()) { + return UniqueDevice().status().ok(); + } else { + return !IsReplicated() && IsTileMaximal(); + } +} + Status HloSharding::Validate(const Shape& shape, int64 num_devices) const { if (replicated_) { return Status::OK(); @@ -193,7 +238,16 @@ Status HloSharding::Validate(const Shape& shape, int64 num_devices) const { /*static*/ StatusOr HloSharding::FromProto( const OpSharding& proto) { - if (proto.type() == OpSharding::Type::OpSharding_Type_REPLICATED) { + if (proto.type() == OpSharding::Type::OpSharding_Type_TUPLE) { + std::vector tuple_shardings; + tuple_shardings.reserve(proto.tuple_shardings().size()); + for (const OpSharding& tuple_sharding_proto : proto.tuple_shardings()) { + TF_ASSIGN_OR_RETURN(HloSharding sharding, + HloSharding::FromProto(tuple_sharding_proto)); + tuple_shardings.push_back(sharding); + } + return HloSharding(tuple_shardings); + } else if (proto.type() == OpSharding::Type::OpSharding_Type_REPLICATED) { return Replicate(); } else if (proto.type() == OpSharding::Type::OpSharding_Type_MAXIMAL) { return HloSharding(proto.tile_assignment_devices(0)); @@ -212,6 +266,15 @@ Status HloSharding::Validate(const Shape& shape, int64 num_devices) const { OpSharding HloSharding::ToProto() const { OpSharding result; + + if (IsTuple()) { + for (const HloSharding& element : tuple_elements_) { + *result.add_tuple_shardings() = element.ToProto(); + } + result.set_type(OpSharding::Type::OpSharding_Type_TUPLE); + return result; + } + *result.mutable_tile_shape() = tile_shape_; for (int64 dim : tile_assignment_.dimensions()) { result.add_tile_assignment_dimensions(dim); diff --git a/tensorflow/compiler/xla/service/hlo_sharding.h b/tensorflow/compiler/xla/service/hlo_sharding.h index d7ada30c70..f8ef2a3d05 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.h +++ b/tensorflow/compiler/xla/service/hlo_sharding.h @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/compiler/xla/array.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/protobuf_util.h" +#include "tensorflow/compiler/xla/shape_tree.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/lib/hash/hash.h" @@ -67,6 +68,18 @@ class HloSharding { // `num_tiles` tiles. static HloSharding Tile1D(const Shape& input_shape, int64 num_tiles); + // Creates a new sharding for a tuple type. The given ShapeTree must have + // elements for every leaf shape contained in the tuple. + static HloSharding Tuple(const ShapeTree& sub_shardings) { + std::vector flattened_list; + flattened_list.reserve( + std::distance(sub_shardings.leaf_begin(), sub_shardings.leaf_end())); + for (const auto& index_to_sharding : sub_shardings.leaves()) { + flattened_list.push_back(index_to_sharding.second); + } + return HloSharding(flattened_list); + } + // Create a new sharding from a protobuf OpSharding. static StatusOr FromProto(const OpSharding& proto); @@ -76,47 +89,89 @@ class HloSharding { // Validate that this sharding can be applied to a tensor with shape `shape`. Status Validate(const Shape& shape, int64 num_devices) const; + // Returns true if the sharding has tuple type. + bool IsTuple() const { return tuple_; } + // Returns true if the sharding is trivial: replicate on all devices. - bool IsReplicated() const { return replicated_; } + bool IsReplicated() const { + if (!IsTuple()) { + return replicated_; + } + return std::all_of(tuple_elements_.begin(), tuple_elements_.end(), + [](const HloSharding& s) { return s.IsReplicated(); }); + } // Returns true if the tile size is the same as the input size. - bool IsTileMaximal() const { return maximal_; } + bool IsTileMaximal() const { + if (!IsTuple()) { + return maximal_; + } + return std::all_of(tuple_elements_.begin(), tuple_elements_.end(), + [](const HloSharding& s) { return s.IsTileMaximal(); }); + } // Returns true if the sharding defines an operation on the given device. bool UsesDevice(int64 device) const; // Returns the tile that should be executed on the given device. + // REQUIRES: !IsTuple() std::vector TileIndexForDevice(int64 device) const; // Returns the device that should execute the given tile. // It is an error to call this if is_replicated() is true. + // REQUIRES: !IsTuple() int64 DeviceForTileIndex(tensorflow::gtl::ArraySlice index) const; // Given a device ID, returns the offset within the input space of the // tile that should be executed on the given core. This returns the lower // extent of the tile in the input space. + // REQUIRES: !IsTuple() std::vector TileOffsetForDevice(int64 device) const; // Given a device ID, returns the limit within the input space of the // tile that should be executed on the given core. This returns the upper // extent of the tile in the input space. + // REQUIRES: !IsTuple() std::vector TileLimitForDevice(int64 device) const; // Returns the single device this op operates on. - // Requires !Replicated() && IsTileMaximal(). + // REQUIRES: !IsTuple&& !Replicated() && IsTileMaximal() StatusOr UniqueDevice() const; // Returns true if this op only uses a single device. - bool HasUniqueDevice() const { return !IsReplicated() && IsTileMaximal(); } + bool HasUniqueDevice() const; + + // Returns the ShapeTree containing the shardings for each element of this + // tuple. Only the leaf elements are populated. This creates a new ShapeTree + // object so is not cheap. REQUIRES: IsTuple() + ShapeTree GetTupleShardingsAsShapeTree( + const Shape& tuple_shape) const { + ShapeTree result(tuple_shape, HloSharding::Replicate()); + CHECK_EQ(std::distance(result.leaf_begin(), result.leaf_end()), + tuple_elements_.size()); + auto it = tuple_elements_.begin(); + for (auto& index_to_sharding : result.leaves()) { + index_to_sharding.second = *it++; + } + return result; + } bool operator==(const HloSharding& other) const { return replicated_ == other.replicated_ && maximal_ == other.maximal_ && protobuf_util::ProtobufEquals(tile_shape_, other.tile_shape_) && - tile_assignment_ == other.tile_assignment_; + tile_assignment_ == other.tile_assignment_ && + tuple_elements_ == other.tuple_elements_; } bool operator!=(const HloSharding& other) const { return !(*this == other); } size_t Hash() const { + if (!tuple_) { + size_t h = 0; + for (const auto& element : tuple_elements_) { + h = tensorflow::Hash64Combine(h, element.Hash()); + } + return h; + } if (replicated_) { return 0; } @@ -131,33 +186,47 @@ class HloSharding { } // Gets the tile shape. - // It is an error to call this if IsTileMaximal() is true. + // REQUIRES: !IsTileMaximal() && !IsTuple() const Shape& tile_shape() const { return tile_shape_; } // Gets the tile assignment tensor. - // It is an error to call this if IsReplicated() is true. + // REQUIRES: !IsReplicated() && !IsTuple() const Array& tile_assignment() const { return tile_assignment_; } private: HloSharding() : replicated_(true), maximal_(true), + tuple_(false), tile_shape_(), tile_assignment_({0}) {} explicit HloSharding(int64 device_id) : replicated_(false), maximal_(true), + tuple_(false), tile_shape_(), tile_assignment_({1}, device_id) {} HloSharding(const Shape& tile_shape, const Array& tile_assignment) : replicated_(false), maximal_(false), + tuple_(false), tile_shape_(tile_shape), tile_assignment_(tile_assignment) {} + HloSharding(const std::vector& tuple_shardings) + : replicated_(false), + maximal_(false), + tuple_(true), + tile_assignment_({0}), + tuple_elements_(tuple_shardings) {} bool replicated_; bool maximal_; + bool tuple_; Shape tile_shape_; Array tile_assignment_; + // Only non-empty when tuple_ is true, but because empty tuples are allowed + // may also be empty even then. This is a flattened list of all the leaf + // shardings in a tuple shape, by pre-order walk (ShapeTree iterator order). + std::vector tuple_elements_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_sharding_test.cc b/tensorflow/compiler/xla/service/hlo_sharding_test.cc index d0a20471a0..00ea38480e 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding_test.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding_test.cc @@ -132,6 +132,29 @@ TEST_F(HloShardingTest, Tile) { } } +TEST_F(HloShardingTest, NestedTuple) { + // nested_tuple_shape = (f32[], (f32[3]), f32[4, 6]) + Shape nested_tuple_shape = ShapeUtil::MakeTupleShape({ + ShapeUtil::MakeShape(F32, {}), + ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {3})}), + ShapeUtil::MakeShape(F32, {4, 6}), + }); + + OpSharding proto; + proto.set_type(OpSharding::Type::OpSharding_Type_TUPLE); + *proto.add_tuple_shardings() = HloSharding::Replicate().ToProto(); + *proto.add_tuple_shardings() = HloSharding::AssignDevice(0).ToProto(); + *proto.add_tuple_shardings() = HloSharding::AssignDevice(1).ToProto(); + HloSharding tuple_sharding = + HloSharding::FromProto(proto).ConsumeValueOrDie(); + + ShapeTree shape_tree = + tuple_sharding.GetTupleShardingsAsShapeTree(nested_tuple_shape); + EXPECT_EQ(shape_tree.element({0}), HloSharding::Replicate()); + EXPECT_EQ(shape_tree.element({1, 0}), HloSharding::AssignDevice(0)); + EXPECT_EQ(shape_tree.element({2}), HloSharding::AssignDevice(1)); +} + TEST_F(HloShardingTest, Hash) { auto hash_compare_equal = [](const HloSharding& a, const HloSharding& b) { if (a.Hash() != b.Hash()) { @@ -184,6 +207,51 @@ TEST_F(HloShardingTest, Hash) { MakeArray({2, 2}, {0, 3, 1, 2})); EXPECT_FALSE(hash_compare_equal(sharding1, sharding2)); } + + HloSharding default_sharding = HloSharding::Replicate(); + { + ShapeTree shape_tree(ShapeUtil::MakeTupleShape({}), + default_sharding); + HloSharding sharding1 = HloSharding::Replicate(); + HloSharding sharding2 = HloSharding::Tuple(shape_tree); + EXPECT_FALSE(hash_compare_equal(sharding1, sharding2)); + } + + { + ShapeTree shape_tree(ShapeUtil::MakeTupleShape({}), + default_sharding); + HloSharding sharding1 = HloSharding::Tuple(shape_tree); + HloSharding sharding2 = HloSharding::Tuple(shape_tree); + EXPECT_TRUE(hash_compare_equal(sharding1, sharding2)); + } + + { + ShapeTree shape_tree1( + ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {4})}), + default_sharding); + *shape_tree1.mutable_element({0}) = HloSharding::Replicate(); + ShapeTree shape_tree2( + ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {4})}), + default_sharding); + *shape_tree2.mutable_element({0}) = HloSharding::AssignDevice(0); + HloSharding sharding1 = HloSharding::Tuple(shape_tree1); + HloSharding sharding2 = HloSharding::Tuple(shape_tree2); + EXPECT_FALSE(hash_compare_equal(sharding1, sharding2)); + } + + { + ShapeTree shape_tree1( + ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {4})}), + default_sharding); + *shape_tree1.mutable_element({0}) = HloSharding::AssignDevice(0); + ShapeTree shape_tree2( + ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {4})}), + default_sharding); + *shape_tree2.mutable_element({0}) = HloSharding::AssignDevice(0); + HloSharding sharding1 = HloSharding::Tuple(shape_tree1); + HloSharding sharding2 = HloSharding::Tuple(shape_tree2); + EXPECT_TRUE(hash_compare_equal(sharding1, sharding2)); + } } } // namespace diff --git a/tensorflow/compiler/xla/shape_tree.h b/tensorflow/compiler/xla/shape_tree.h index 64a36471b9..a898a4d375 100644 --- a/tensorflow/compiler/xla/shape_tree.h +++ b/tensorflow/compiler/xla/shape_tree.h @@ -116,6 +116,7 @@ class ShapeTree { ShapeTree(const Shape* shape, const T& init_value); ShapeTree(const ShapeTree& other) { *this = other; } + ShapeTree(ShapeTree&&) = default; ShapeTree& operator=(const ShapeTree& other) { root_ = other.root_; @@ -132,6 +133,8 @@ class ShapeTree { return *this; } + ShapeTree& operator=(ShapeTree&& other) = default; + // Returns the data element associated with the array in the shape at the // given index (see ShapeUtil::GetSubshape for how indexes are defined). const T& element(const ShapeIndex& index) const; diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index f1e987cb15..df07e069a0 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -60,6 +60,7 @@ class HloParser { bool ParseInstructionList(HloComputation::Builder* builder, string* root_name); bool ParseInstruction(HloComputation::Builder* builder, string* root_name); + bool ParseControlPredecessors(HloInstruction* instruction); bool ParseLiteral(std::unique_ptr* literal, const Shape& shape); bool ParseTupleLiteral(std::unique_ptr* literal, const Shape& shape); bool ParseNonTupleLiteral(std::unique_ptr* literal, @@ -123,6 +124,7 @@ class HloParser { bool ParseWindow(Window* window); bool ParseConvolutionDimensionNumbers(ConvolutionDimensionNumbers* dnums); bool ParseSharding(OpSharding* sharding); + bool ParseSingleSharding(OpSharding* sharding, bool lbrace_pre_lexed); // Parses a sub-attribute of the window attribute, e.g.,size=1x2x3. bool ParseDxD(const string& name, std::vector* result); @@ -548,14 +550,49 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, return AddInstruction(name, instruction); } -// ::= '{' 'replicated'? 'maximal'? ('device=' int)? shape? ('devices=' ('[' -// dims ']')* device_list)? '}' dims ::= int_list device_list ::= int_list +// ::= '{' (single_sharding | tuple_sharding) '}' +// +// tuple_sharding ::= single_sharding* (',' single_sharding)* bool HloParser::ParseSharding(OpSharding* sharding) { + // A single sharding starts with '{' and is not followed by '{'. + // A tuple sharding starts with '{' and is followed by '{', or is '{''}' for + // an empty tuple. if (!ParseToken(TokKind::kLbrace, "expected '{' to start sharding attribute")) { return false; } + if (lexer_.GetKind() != TokKind::kLbrace && + lexer_.GetKind() != TokKind::kRbrace) { + return ParseSingleSharding(sharding, /*lbrace_pre_lexed=*/true); + } + + // Tuple sharding. + // Allow empty tuple shardings. + if (lexer_.GetKind() != TokKind::kRbrace) { + do { + if (!ParseSingleSharding(sharding->add_tuple_shardings(), + /*lbrace_pre_lexed=*/false)) { + return false; + } + } while (EatIfPresent(TokKind::kComma)); + } + sharding->set_type(OpSharding::Type::OpSharding_Type_TUPLE); + + return ParseToken(TokKind::kRbrace, "expected '}' to end sharding attribute"); +} + +// ::= '{' 'replicated'? 'maximal'? ('device=' int)? shape? +// ('devices=' ('[' dims ']')* device_list)? '}' +// dims ::= int_list device_list ::= int_list +bool HloParser::ParseSingleSharding(OpSharding* sharding, + bool lbrace_pre_lexed) { + if (!lbrace_pre_lexed && + !ParseToken(TokKind::kLbrace, + "expected '{' to start sharding attribute")) { + return false; + } + bool maximal = false; bool replicated = false; std::vector devices; diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index 62b4385e76..a9dc360978 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -152,7 +152,7 @@ ENTRY %SelectR1F32WithCmpR1F32sFromParamsSmall.v4 (v1: f32[4], v2: f32[4]) -> f3 %v1 = f32[4]{0} parameter(0), sharding={maximal device=1} %v2 = f32[4]{0} parameter(1), sharding={maximal device=1} %greater-than = pred[4]{0} greater-than(f32[4]{0} %v1, f32[4]{0} %v2), sharding={replicated} - ROOT %select = f32[4]{0} select(pred[4]{0} %greater-than, f32[4]{0} %v1, f32[4]{0} %v2) + ROOT %select = f32[4]{0} select(pred[4]{0} %greater-than, f32[4]{0} %v1, f32[4]{0} %v2), sharding={} } )" @@ -180,6 +180,19 @@ ENTRY %TupleCreate.v4 (v1: f32[], v2: f32[3], v3: f32[2,3]) -> (f32[], f32[3], f ROOT %tuple = (f32[], f32[3]{0}, f32[2,3]{1,0}) tuple(f32[] %v1, f32[3]{0} %v2, f32[2,3]{1,0} %v3) } +)" +}, +{ +"ShardedTupleCreate", +R"(HloModule ShardedTupleCreate_module: + +ENTRY %ShardedTupleCreate.v4 (v1: f32[], v2: f32[3], v3: f32[2,3]) -> (f32[], f32[3], f32[2,3]) { + %v1 = f32[] parameter(0) + %v2 = f32[3]{0} parameter(1) + %v3 = f32[2,3]{1,0} parameter(2) + ROOT %tuple = (f32[], f32[3]{0}, f32[2,3]{1,0}) tuple(f32[] %v1, f32[3]{0} %v2, f32[2,3]{1,0} %v3), sharding={{replicated}, {maximal device=0}, {replicated}} +} + )" }, // int32 result = 0; diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index 06987e0044..7146604708 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -825,8 +825,10 @@ message OpSharding { REPLICATED = 0; // This sharding is maximal - one device runs the entire operation. MAXIMAL = 1; - // Neither of the above; tile_shape and tile_assignment are both used. - OTHER = 2; + // This sharding is a tuple - only the tuple_shardings field is valid. + TUPLE = 2; + // None of the above; tile_shape and tile_assignment are both used. + OTHER = 3; } Type type = 1; // The shape of the sharded tile. @@ -838,6 +840,13 @@ message OpSharding { // Flattened list of device IDs. The order of flattening is the same as used // by IndexUtil::MultiToLinearIndex(tile_assignment_shape). repeated int64 tile_assignment_devices = 4; + // If type == TUPLE, the sub-shardings, one per leaf node in the tuple shape, + // in pre-order. The tuple shape could be nested; here we store just a + // flattened list of all leaves in the tuple shape. Note that the tuple shape + // is not stored here; shardings do not store the shapes to which they are + // applied, this is inferred from the instruction this sharding gets attached + // to. + repeated OpSharding tuple_shardings = 5; } message OpRequest { -- GitLab From 7a5041df41c5c98b175e57729a4976e78887d6fd Mon Sep 17 00:00:00 2001 From: Zhengsheng Wei Date: Thu, 9 Nov 2017 19:02:46 +0800 Subject: [PATCH 0086/1801] modified document --- tensorflow/python/layers/convolutional.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py index 8c327d7e27..6ad18a4e25 100644 --- a/tensorflow/python/layers/convolutional.py +++ b/tensorflow/python/layers/convolutional.py @@ -64,8 +64,8 @@ class _Conv(base.Layer): linear activation. use_bias: Boolean, whether the layer uses a bias. kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, no bias will - be applied. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. kernel_regularizer: Optional regularizer for the convolution kernel. bias_regularizer: Optional regularizer for the bias vector. activity_regularizer: Optional regularizer function for the output. -- GitLab From 18d5c3e4cf1ea8459d4eb12eb741283263c1a065 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 05:36:43 -0800 Subject: [PATCH 0087/1801] Previously we had a large number of ComputeAndCompare* methods to run a computation and then compare the reuslt to a specified value (Array or Literal). The new method takes adventage of the recently added ComputeConstant method to calculate the expected value using the HloEvaluator eliminating the need for doing the calculation manually. As a usage example I converted the convolution tests to the new method what simplified them by quite a bit. If there is interest then we can migrate the other tests as well and then remove the old style ComputeAndCompare* methods. PiperOrigin-RevId: 175145596 --- .../xla/tests/client_library_test_base.cc | 54 ++++++ .../xla/tests/client_library_test_base.h | 17 ++ .../compiler/xla/tests/convolution_test.cc | 160 ++++++------------ 3 files changed, 126 insertions(+), 105 deletions(-) diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.cc b/tensorflow/compiler/xla/tests/client_library_test_base.cc index 065bce7e31..ef54714e46 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.cc +++ b/tensorflow/compiler/xla/tests/client_library_test_base.cc @@ -346,6 +346,60 @@ void ClientLibraryTestBase::ComputeAndCompareTuple( LiteralTestUtil::ExpectNearTuple(expected, *actual, error); } +void ClientLibraryTestBase::ComputeAndCompare( + ComputationBuilder* builder, const ComputationDataHandle& operand, + tensorflow::gtl::ArraySlice arguments) { + auto status_or_data = ComputeValueAndReference(builder, operand, arguments); + EXPECT_IS_OK(status_or_data); + if (!status_or_data.ok()) { + return; + } + std::unique_ptr reference, result; + std::tie(reference, result) = status_or_data.ConsumeValueOrDie(); + LiteralTestUtil::ExpectEqual(*reference, *result); +} + +void ClientLibraryTestBase::ComputeAndCompare( + ComputationBuilder* builder, const ComputationDataHandle& operand, + tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { + auto status_or_data = ComputeValueAndReference(builder, operand, arguments); + EXPECT_IS_OK(status_or_data); + if (!status_or_data.ok()) { + return; + } + std::unique_ptr reference, result; + std::tie(reference, result) = status_or_data.ConsumeValueOrDie(); + LiteralTestUtil::ExpectNear(*reference, *result, error); +} + +StatusOr, std::unique_ptr>> +ClientLibraryTestBase::ComputeValueAndReference( + ComputationBuilder* builder, const ComputationDataHandle& operand, + tensorflow::gtl::ArraySlice arguments) { + // Transfer the arguments to the executor service. We put the unique_ptr's + // into a vector to keep the data alive on the service until the end of this + // function. + std::vector> argument_data; + for (const auto& arg : arguments) { + TF_ASSIGN_OR_RETURN(auto data, client_->TransferToServer(arg)); + argument_data.push_back(std::move(data)); + } + + // Create raw pointers to the GlobalData for the rest of the call stack. + std::vector argument_data_ptr; + std::transform( + argument_data.begin(), argument_data.end(), + std::back_inserter(argument_data_ptr), + [](const std::unique_ptr& data) { return data.get(); }); + + TF_ASSIGN_OR_RETURN( + auto reference, + builder->ComputeConstant(operand, /*output_layout=*/nullptr, arguments)); + TF_ASSIGN_OR_RETURN(auto result, + ExecuteAndTransfer(builder, argument_data_ptr)); + return std::make_pair(std::move(reference), std::move(result)); +} + Computation ClientLibraryTestBase::CreateScalarRelu() { ComputationBuilder builder(client_, "relu"); auto z_value = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "z_value"); diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h index 2c37466ff2..b578667735 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.h +++ b/tensorflow/compiler/xla/tests/client_library_test_base.h @@ -196,6 +196,16 @@ class ClientLibraryTestBase : public ::testing::Test { ComputationBuilder* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec abs_error); + // Convenience method for running a built computation and comparing the result + // with the HloEvaluator. + void ComputeAndCompare(ComputationBuilder* builder, + const ComputationDataHandle& operand, + tensorflow::gtl::ArraySlice arguments); + void ComputeAndCompare(ComputationBuilder* builder, + const ComputationDataHandle& operand, + tensorflow::gtl::ArraySlice arguments, + ErrorSpec error); + // Create scalar operations for use in reductions. Computation CreateScalarRelu(); Computation CreateScalarMax(); @@ -298,6 +308,13 @@ class ClientLibraryTestBase : public ::testing::Test { const std::function& verify_output, const Shape* output_with_layout = nullptr); + + // Executes the computation and calculates the expected reference value using + // the HloEvaluator. Returns two literal in the order of (expected, actual). + StatusOr, std::unique_ptr>> + ComputeValueAndReference(ComputationBuilder* builder, + const ComputationDataHandle& operand, + tensorflow::gtl::ArraySlice arguments); }; template diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc index 0cc2e5fb7e..7425f778a6 100644 --- a/tensorflow/compiler/xla/tests/convolution_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_test.cc @@ -82,177 +82,127 @@ XLA_TEST_F(ConvolutionTest, ForwardPassConvolution_3x3x256_256_OutputZ_Iota) { ComputationBuilder builder(client_, TestName()); auto lhs = builder.ConstantR4FromArray4D(*alhs); auto rhs = builder.ConstantR4FromArray4D(*arhs); - builder.Conv(lhs, rhs, {1, 1}, Padding::kValid); + auto conv = builder.Conv(lhs, rhs, {1, 1}, Padding::kValid); - std::unique_ptr> aexpected = - ReferenceUtil::ConvArray4D(*alhs, *arhs, {1, 1}, Padding::kValid); - - ComputeAndCompareR4(&builder, *aexpected, {}, error_spec_); + ComputeAndCompare(&builder, conv, {}, error_spec_); } TEST_F(ConvolutionTest, Convolve_1x1x1x2_1x1x1x2_Valid) { ComputationBuilder builder(client_, TestName()); - { - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); - auto input = builder.Parameter(0, input_shape, "input"); - auto filter = builder.Parameter(1, filter_shape, "filter"); - builder.Conv(input, filter, {1, 1}, Padding::kValid); - } - - Array4D input(1, 1, 1, 2); - input.FillWithYX(Array2D({ + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + auto conv = builder.Conv(input, filter, {1, 1}, Padding::kValid); + + Array4D input_data(1, 1, 1, 2); + input_data.FillWithYX(Array2D({ {1, 2}, })); - Array4D filter(1, 1, 1, 2); - filter.FillWithYX(Array2D({ + Array4D filter_data(1, 1, 1, 2); + filter_data.FillWithYX(Array2D({ {5, 6}, })); - std::unique_ptr> aexpected = - ReferenceUtil::ConvArray4D(input, filter, {1, 1}, Padding::kValid); - - auto input_literal = - client_->TransferToServer(*Literal::CreateR4FromArray4D(input)) - .ConsumeValueOrDie(); - auto filter_literal = - client_->TransferToServer(*Literal::CreateR4FromArray4D(filter)) - .ConsumeValueOrDie(); - - ComputeAndCompareR4(&builder, *aexpected, - {input_literal.get(), filter_literal.get()}, - error_spec_); + ComputeAndCompare(&builder, conv, + {*Literal::CreateFromArray(input_data), + *Literal::CreateFromArray(filter_data)}, + error_spec_); } // Tests valid padding for 2D convolution in raster space. TEST_F(ConvolutionTest, Convolve_1x1x4x4_1x1x2x2_Valid) { ComputationBuilder builder(client_, TestName()); - { - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 2, 2}); - auto input = builder.Parameter(0, input_shape, "input"); - auto filter = builder.Parameter(1, filter_shape, "filter"); - builder.Conv(input, filter, {1, 1}, Padding::kValid); - } + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 2, 2}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + auto conv = builder.Conv(input, filter, {1, 1}, Padding::kValid); - Array4D input(1, 1, 4, 4); + Array4D input_data(1, 1, 4, 4); // clang-format off - input.FillWithYX(Array2D({ + input_data.FillWithYX(Array2D({ {1, 2, 3, 4 }, {5, 6, 7, 8 }, {9, 10, 11, 12}, {13, 14, 15, 16}, })); // clang-format on - Array4D filter(1, 1, 2, 2); + Array4D filter_data(1, 1, 2, 2); // clang-format off - filter.FillWithYX(Array2D({ + filter_data.FillWithYX(Array2D({ {5, 6}, {7, 8}, })); // clang-format on - - std::unique_ptr> aexpected = - ReferenceUtil::ConvArray4D(input, filter, {1, 1}, Padding::kValid); - - auto input_literal = - client_->TransferToServer(*Literal::CreateR4FromArray4D(input)) - .ConsumeValueOrDie(); - auto filter_literal = - client_->TransferToServer(*Literal::CreateR4FromArray4D(filter)) - .ConsumeValueOrDie(); - - ComputeAndCompareR4(&builder, *aexpected, - {input_literal.get(), filter_literal.get()}, - error_spec_); + ComputeAndCompare(&builder, conv, + {*Literal::CreateFromArray(input_data), + *Literal::CreateFromArray(filter_data)}, + error_spec_); } // Tests same padding for 2D convolution in raster space. TEST_F(ConvolutionTest, Convolve_1x1x4x4_1x1x2x2_Same) { ComputationBuilder builder(client_, TestName()); - { - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 2, 2}); - auto input = builder.Parameter(0, input_shape, "input"); - auto filter = builder.Parameter(1, filter_shape, "filter"); - builder.Conv(input, filter, {1, 1}, Padding::kSame); - } + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 2, 2}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + auto conv = builder.Conv(input, filter, {1, 1}, Padding::kSame); - Array4D input(1, 1, 4, 4); + Array4D input_data(1, 1, 4, 4); // clang-format off - input.FillWithYX(Array2D({ + input_data.FillWithYX(Array2D({ {1, 2, 3, 4 }, {5, 6, 7, 8 }, {9, 10, 11, 12}, {13, 14, 15, 16}, })); // clang-format on - Array4D filter(1, 1, 2, 2); + Array4D filter_data(1, 1, 2, 2); // clang-format off - filter.FillWithYX(Array2D({ + filter_data.FillWithYX(Array2D({ {5, 6}, {7, 8}, })); // clang-format on - - std::unique_ptr> aexpected = - ReferenceUtil::ConvArray4D(input, filter, {1, 1}, Padding::kSame); - - auto input_literal = - client_->TransferToServer(*Literal::CreateR4FromArray4D(input)) - .ConsumeValueOrDie(); - auto filter_literal = - client_->TransferToServer(*Literal::CreateR4FromArray4D(filter)) - .ConsumeValueOrDie(); - - ComputeAndCompareR4(&builder, *aexpected, - {input_literal.get(), filter_literal.get()}, - error_spec_); + ComputeAndCompare(&builder, conv, + {*Literal::CreateFromArray(input_data), + *Literal::CreateFromArray(filter_data)}, + error_spec_); } // Tests same padding for 2D convolution in raster space with an odd sized // kernel. TEST_F(ConvolutionTest, Convolve_1x1x4x4_1x1x3x3_Same) { ComputationBuilder builder(client_, TestName()); - { - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 3, 3}); - auto input = builder.Parameter(0, input_shape, "input"); - auto filter = builder.Parameter(1, filter_shape, "filter"); - builder.Conv(input, filter, {1, 1}, Padding::kSame); - } + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 3, 3}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + auto conv = builder.Conv(input, filter, {1, 1}, Padding::kSame); - Array4D input(1, 1, 4, 4); + Array4D input_data(1, 1, 4, 4); // clang-format off - input.FillWithYX(Array2D({ + input_data.FillWithYX(Array2D({ {1, 2, 3, 4 }, {5, 6, 7, 8 }, {9, 10, 11, 12}, {13, 14, 15, 16}, })); // clang-format on - Array4D filter(1, 1, 3, 3); + Array4D filter_data(1, 1, 3, 3); // clang-format off - filter.FillWithYX(Array2D({ + filter_data.FillWithYX(Array2D({ { 5, 6, 7}, { 8, 9, 10}, {11, 12, 13}, })); // clang-format on - - std::unique_ptr> aexpected = - ReferenceUtil::ConvArray4D(input, filter, {1, 1}, Padding::kSame); - - auto input_literal = - client_->TransferToServer(*Literal::CreateR4FromArray4D(input)) - .ConsumeValueOrDie(); - auto filter_literal = - client_->TransferToServer(*Literal::CreateR4FromArray4D(filter)) - .ConsumeValueOrDie(); - - ComputeAndCompareR4(&builder, *aexpected, - {input_literal.get(), filter_literal.get()}, - error_spec_); + ComputeAndCompare(&builder, conv, + {*Literal::CreateFromArray(input_data), + *Literal::CreateFromArray(filter_data)}, + error_spec_); } XLA_TEST_F(ConvolutionTest, Convolve1D_1x2x5_1x2x2_Valid) { -- GitLab From d45f27d4586ef2d2dcc405eaac97b1515dad9671 Mon Sep 17 00:00:00 2001 From: PW486 Date: Thu, 9 Nov 2017 23:05:38 +0900 Subject: [PATCH 0088/1801] Fixed typos, comments --- tensorflow/core/util/saved_tensor_slice.proto | 2 +- tensorflow/core/util/strided_slice_op.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/util/saved_tensor_slice.proto b/tensorflow/core/util/saved_tensor_slice.proto index 8a6dd7bdb7..6278685957 100644 --- a/tensorflow/core/util/saved_tensor_slice.proto +++ b/tensorflow/core/util/saved_tensor_slice.proto @@ -1,7 +1,7 @@ // Protocol buffers for saved tensor slices. It's used for the brain tensor // ops checkpoints and the V3 checkpoints in dist_belief. -// A checkpoint file is a stable. The value for each record is a serialized +// A checkpoint file is an sstable. The value for each record is a serialized // SavedTensorSlices message (defined below). // // Each checkpoint file has a record with the empty key (""), which corresponds diff --git a/tensorflow/core/util/strided_slice_op.cc b/tensorflow/core/util/strided_slice_op.cc index d5bc676a9a..f0264c0a9d 100644 --- a/tensorflow/core/util/strided_slice_op.cc +++ b/tensorflow/core/util/strided_slice_op.cc @@ -218,8 +218,8 @@ Status ValidateStridedSliceOp( // Step 2: Make a sparse spec into a full index spec // - // The sparse spec does not corresponds to the number of dimensions - // Make a dense spec that corresponds to the number of dimensions + // The sparse spec does not correspond to the number of dimensions + // Make a dense spec that correspond to the number of dimensions // // For example suppose foo[...,3:] on foo.shape=(2,2,3) then // we need to produce the missing begin_mask for the first two -- GitLab From 6f6eb52a89ec6e360d8604fa68516cf2d819207f Mon Sep 17 00:00:00 2001 From: PW486 Date: Thu, 9 Nov 2017 23:10:41 +0900 Subject: [PATCH 0089/1801] Fixed typos, comments --- tensorflow/core/util/strided_slice_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/util/strided_slice_op.cc b/tensorflow/core/util/strided_slice_op.cc index f0264c0a9d..aca60b942d 100644 --- a/tensorflow/core/util/strided_slice_op.cc +++ b/tensorflow/core/util/strided_slice_op.cc @@ -219,7 +219,7 @@ Status ValidateStridedSliceOp( // Step 2: Make a sparse spec into a full index spec // // The sparse spec does not correspond to the number of dimensions - // Make a dense spec that correspond to the number of dimensions + // Make a dense spec that corresponds to the number of dimensions // // For example suppose foo[...,3:] on foo.shape=(2,2,3) then // we need to produce the missing begin_mask for the first two -- GitLab From 2c3f70192850a4eee3d190f796948f2324d66821 Mon Sep 17 00:00:00 2001 From: amilioto Date: Thu, 9 Nov 2017 15:28:45 +0100 Subject: [PATCH 0090/1801] added patch needed for jetson tx2/1 --- tensorflow/stream_executor/cuda/cuda_gpu_executor.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index 6c522264e1..a8759376e4 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -866,6 +866,9 @@ static int TryToReadNumaNode(const string &pci_bus_id, int device_ordinal) { #elif defined(PLATFORM_WINDOWS) // Windows support for NUMA is not currently implemented. Return node 0. return 0; +#elif defined(__aarch64__) + LOG(INFO) << "ARM64 does not support NUMA - returning NUMA node zero"; + return 0; #else VLOG(2) << "trying to read NUMA node for device ordinal: " << device_ordinal; static const int kUnknownNumaNode = -1; -- GitLab From efab2e1d91507b948d545d14f942b15250c8bb92 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 9 Nov 2017 07:37:15 -0800 Subject: [PATCH 0091/1801] Removes void*s from the tape gradient code, replacing with templates. PiperOrigin-RevId: 175155685 --- tensorflow/c/eager/BUILD | 1 - tensorflow/c/eager/tape.cc | 410 ------------------- tensorflow/c/eager/tape.h | 473 ++++++++++++++++++++-- tensorflow/python/eager/pywrap_tfe_src.cc | 60 +-- 4 files changed, 479 insertions(+), 465 deletions(-) delete mode 100644 tensorflow/c/eager/tape.cc diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index 74e94be8d6..d533758e36 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -106,7 +106,6 @@ tf_cc_test( cc_library( name = "tape", - srcs = ["tape.cc"], hdrs = ["tape.h"], visibility = ["//tensorflow:internal"], deps = [ diff --git a/tensorflow/c/eager/tape.cc b/tensorflow/c/eager/tape.cc deleted file mode 100644 index 459499bb69..0000000000 --- a/tensorflow/c/eager/tape.cc +++ /dev/null @@ -1,410 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include - -#include "tensorflow/c/eager/tape.h" - -namespace tensorflow { -namespace eager { - -bool GradientTape::ShouldRecord(gtl::ArraySlice tensor_ids) { - for (int64 i : tensor_ids) { - if (tensor_tape_.find(i) != tensor_tape_.end()) { - return true; - } - } - return false; -} - -void GradientTape::Watch(int64 tensor_id) { - tensor_tape_.emplace(tensor_id, -1); -} - -void GradientTape::RecordOperation( - const string& op_type, gtl::ArraySlice output_tensors, - gtl::ArraySlice input_tensor_id, void* backward_function, - const std::function& backward_function_deleter) { - if (!ShouldRecord(input_tensor_id)) { - backward_function_deleter(); - return; - } - std::vector ids; - ids.reserve(input_tensor_id.size()); - for (int64 i : input_tensor_id) { - tensor_usage_[i]++; - ids.push_back(i); - } - const int64 op_id = next_op_id_++; - std::vector tensors; - tensors.reserve(output_tensors.size()); - for (const TapeTensor& o : output_tensors) { - // Note: the tensor can have already been watched and hence be in the tape, - // so we cannot check that we're inserting it here. - tensor_tape_[o.id] = op_id; - tensor_usage_[o.id] = 1; - tensors.push_back(o); - } - op_tape_[op_id] = OpTapeEntry{op_type, tensors, ids, backward_function, - backward_function_deleter}; -} - -void GradientTape::DeleteTrace(int64 tensor_id) { - auto it = tensor_usage_.find(tensor_id); - if (it == tensor_usage_.end()) { - return; - } - it->second--; - if (it->second != 0) { - return; - } - tensor_usage_.erase(it); - auto tensor_op_it = tensor_tape_.find(tensor_id); - if (tensor_op_it == tensor_tape_.end()) { - return; - } - const int64 op_id = tensor_op_it->second; - if (op_id == -1) { - // Do not delete watched tensors. - return; - } - tensor_tape_.erase(tensor_op_it); - auto op_it = op_tape_.find(op_id); - CHECK(op_it != op_tape_.end()); - for (const auto& output : op_it->second.output_tensor_info) { - if (tensor_usage_.find(output.id) != tensor_usage_.end()) { - // Found a usage for an output, so cannot delete the op. - return; - } - } - for (int64 id : op_it->second.input_tensor_id) { - DeleteTrace(id); - } - op_it->second.backward_function_deleter(); - op_tape_.erase(op_it); -} - -// Terminology: -// -// - op: a possibly composite operation, which has an entry in the tape -// - target: dy in dx/dy -// - source: dx in dx/dy -// - tensor: one of the many inputs or outputs of an operation -// -// Below here we do the gradient algorithm. It works as follows: -// -// First we filter the tape to just the subset of operations we want to -// differentiate. In the process of doing so we count how many times each Tensor -// is used as an input to an op (so we know when we're done computing gradients -// for that Tensor). We also count, for each tape entry, how many of its output -// Tensors need gradients to be computed (Tensors which are not used do not need -// any gradients to be computed). -// -// Finally, we start a backprop stack with a set of tape entries for which we -// have all gradients available. This set usually is a subset of the set of -// targets (not all since targets which have outputs in the tape will not have -// gradients available initially). -// -// Then we repeatedly pop an entry from the stack, run its backprop, and update -// the gradients of its inputs. Once we have computed all gradients for a single -// input we can mark this input as done, and this can trigger adding an entry to -// the stack if all outputs of that entry are now done. -// -// When the stack is empty we have gradients for all tensors we're interested -// in. - -struct BackpropInitialState { - OpTape op_tape; - - // Map from tensor ID to how many references still exist for this tensor in - // the tape. - std::unordered_map tensor_usage_counts; - - // Maps from op ID to how many output tensors of this op still need to have - // their gradients computed. - std::unordered_map op_missing_tensor; -}; - -BackpropInitialState PrepareBackprop( - gtl::ArraySlice target, const TensorTape& tensor_tape, - OpTape op_tape, const std::unordered_set& sources_set) { - std::vector tensor_stack; - tensor_stack.reserve(target.size()); - for (auto t : target) { - tensor_stack.push_back(t); - } - BackpropInitialState result; - while (!tensor_stack.empty()) { - int64 tensor_id = tensor_stack.back(); - tensor_stack.pop_back(); - auto op_id_it = tensor_tape.find(tensor_id); - if (op_id_it == tensor_tape.end()) { - continue; - } - int64 op_id = op_id_it->second; - auto op_it = op_tape.find(op_id); - auto result_op_it = result.op_tape.find(op_id); - if (op_id == -1 || op_it == op_tape.end() || - result_op_it != result.op_tape.end()) { - continue; - } - CHECK(result.op_tape.emplace(op_id, op_it->second).second); - for (auto it : op_it->second.input_tensor_id) { - auto count_it = result.tensor_usage_counts.find(it); - if (count_it != result.tensor_usage_counts.end()) { - count_it->second++; - } else { - result.tensor_usage_counts[it] = 1; - if (sources_set.find(it) == sources_set.end() && - tensor_tape.find(it) != tensor_tape.end()) { - tensor_stack.push_back(it); - } - } - } - op_tape.erase(op_it); - } - for (auto& pair : result.tensor_usage_counts) { - auto it = tensor_tape.find(pair.first); - if (it != tensor_tape.end() && it->second != -1) { - result.op_missing_tensor[it->second] += 1; - } - } - // Call destructors for all unneeded gradient functions. - for (const auto& op_pair : op_tape) { - op_pair.second.backward_function_deleter(); - } - return result; -} - -std::vector InitialStack( - const OpTape& op_tape, - const std::unordered_map& op_missing_tensor) { - std::vector result; - for (auto& op_entry : op_tape) { - if (op_missing_tensor.find(op_entry.first) == op_missing_tensor.end()) { - result.push_back(op_entry.first); - } - } - return result; -} - -Status InitialGradients(const VSpace& vspace, gtl::ArraySlice target, - gtl::ArraySlice output_gradients, - std::unordered_map tensor_usage_counts, - std::unordered_map>* result) { - for (int i = 0; i < target.size(); ++i) { - int64 id = vspace.TensorId(target[i]); - if (tensor_usage_counts.find(id) != tensor_usage_counts.end()) { - if (!output_gradients.empty() && output_gradients[i] != nullptr) { - // TODO(apassos) figure out how to print debugging information here. - return errors::InvalidArgument( - "A gradient was provided for a tensor which is used as part of the " - "computation."); - } - } else { - if (output_gradients.empty() || output_gradients[i] == nullptr) { - (*result)[id].push_back(vspace.OnesLike(target[i])); - } else { - (*result)[id].push_back(output_gradients[i]); - } - } - } - return Status::OK(); -} - -// If over kMinAggregateCount gradients are accumulated and the total -// memory consumption is over kMinAggregateBytes, do an early aggregation -// so as to release the gradient tensor to save memory. -static const int kMinAggregateCount = 4; -static const int kMinAggregateBytes = 128 * 1024 * 1024; - -Status GradientTape::Gradient(const VSpace& vspace, - gtl::ArraySlice target, - gtl::ArraySlice sources, - gtl::ArraySlice output_gradients, - std::vector* result) { - std::vector id_sources; - id_sources.reserve(sources.size()); - for (void* s : sources) { - id_sources.push_back(vspace.TensorId(s)); - } - std::unordered_set sources_set(id_sources.begin(), id_sources.end()); - std::vector id_targets; - id_sources.reserve(target.size()); - for (void* t : target) { - id_targets.push_back(vspace.TensorId(t)); - } - BackpropInitialState state = PrepareBackprop( - id_targets, tensor_tape_, std::move(op_tape_), sources_set); - std::vector op_stack = - InitialStack(state.op_tape, state.op_missing_tensor); - std::unordered_map> gradients; - Status s = InitialGradients(vspace, target, output_gradients, - state.tensor_usage_counts, &gradients); - auto cleanup = [&state]() { - // Release all backprop functions - for (const auto& pair : state.op_tape) { - pair.second.backward_function_deleter(); - } - }; - if (!s.ok()) { - cleanup(); - return s; - } - std::unordered_map gradients_size; - // TODO(apassos) multiple threads could be dequeuing from op_stack at the same - // time, for better CPU backprop performance. - VLOG(1) << "Initial stack:"; - if (VLOG_IS_ON(1)) { - for (auto t : op_stack) { - VLOG(1) << " " << t; - } - } - std::unordered_map> - functions_accept_none_for_indices({ - {"SoftmaxCrossEntropyWithLogits", {1}}, - {"FusedBatchNorm", {1, 2, 3, 4}}, - }); - while (!op_stack.empty()) { - const int64 op = op_stack.back(); - VLOG(1) << "Popped " << op; - op_stack.pop_back(); - auto op_it = state.op_tape.find(op); - if (op_it == state.op_tape.end()) { - // It is possible for ops to end up on the stack if they are unrelated to - // the target; we should just skip them. - continue; - } - auto trace = std::move(op_it->second); - state.op_tape.erase(op_it); - std::vector out_gradients; - out_gradients.reserve(trace.output_tensor_info.size()); - for (int i = 0; i < trace.output_tensor_info.size(); ++i) { - const int64 id = trace.output_tensor_info[i].id; - auto grad_it = gradients.find(id); - if (grad_it == gradients.end()) { - auto func_name_it = - functions_accept_none_for_indices.find(trace.op_type); - if (func_name_it != functions_accept_none_for_indices.end() && - func_name_it->second.find(i) != func_name_it->second.end()) { - out_gradients.push_back(nullptr); - } else { - out_gradients.push_back( - vspace.Zeros(trace.output_tensor_info[i].shape, - trace.output_tensor_info[i].dtype)); - } - } else { - out_gradients.push_back(vspace.AggregateGradients(grad_it->second)); - if (sources_set.find(grad_it->first) == sources_set.end()) { - gradients.erase(grad_it); - } - } - } - std::vector in_gradients; - Status s = vspace.CallBackwardFunction(trace.backward_function, - out_gradients, &in_gradients); - if (!s.ok()) { - VLOG(1) << "Gradient function failed."; - cleanup(); - return s; - } - VLOG(1) << "Got " << in_gradients.size() << " in_gradients for " - << trace.input_tensor_id.size() << " sources"; - for (int i = 0; i < in_gradients.size(); ++i) { - const int64 id = trace.input_tensor_id[i]; - if (in_gradients[i] != nullptr) { - auto& unaggregated_grads = gradients[id]; - unaggregated_grads.push_back(in_gradients[i]); - if (unaggregated_grads.size() > kMinAggregateCount) { - auto size_it = gradients_size.find(id); - int64 size; - if (size_it == gradients_size.end()) { - size = vspace.NumElements(unaggregated_grads[0]); - gradients_size.emplace(id, size); - } else { - size = size_it->second; - } - if (unaggregated_grads.size() * size * 4 > kMinAggregateBytes) { - void* tensor = vspace.AggregateGradients(unaggregated_grads); - unaggregated_grads.clear(); - unaggregated_grads.push_back(tensor); - } - } - } - auto usage_count_it = state.tensor_usage_counts.find(id); - if (usage_count_it == state.tensor_usage_counts.end()) { - VLOG(1) << "Tensor " << id << " not used"; - continue; - } - usage_count_it->second--; - if (usage_count_it->second > 0) { - VLOG(1) << "Tensor " << id << " usage count " << usage_count_it->second; - continue; - } - auto tape_it = tensor_tape_.find(id); - if (tape_it == tensor_tape_.end()) { - VLOG(1) << "Tensor " << id - << " has no associated op. Deleting gradient"; - auto grad_it = gradients.find(id); - if (grad_it != gradients.end()) { - for (auto g : grad_it->second) { - vspace.DeleteTensor(g); - } - gradients.erase(grad_it); - } - continue; - } - const int64 op_id = tape_it->second; - if (op_id == -1) { - VLOG(1) << "Tensor " << id << " is source"; - continue; - } - auto missing_it = state.op_missing_tensor.find(op_id); - if (missing_it != state.op_missing_tensor.end()) { - missing_it->second--; - VLOG(1) << "Op " << op_id << " missing " << missing_it->second - << " output gradients"; - if (missing_it->second == 0) { - op_stack.push_back(op_id); - } - } - } - } - CHECK(state.op_tape.empty()); - result->reserve(sources.size()); - for (auto is : id_sources) { - auto grad_it = gradients.find(is); - if (grad_it == gradients.end()) { - result->push_back(nullptr); - } else { - if (grad_it->second.size() == 1) { - result->push_back(grad_it->second[0]); - } else { - result->push_back(vspace.AggregateGradients(grad_it->second)); - } - gradients.erase(grad_it); - } - } - VLOG(1) << "Final gradients size: " << gradients.size(); - for (auto grad_pair : gradients) { - for (const auto& g : grad_pair.second) { - vspace.DeleteTensor(g); - } - } - return Status::OK(); -} - -} // namespace eager -} // namespace tensorflow diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index 2bb62a7ab3..654ceb7bec 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -19,6 +19,7 @@ limitations under the License. // maintains the data structures required to do so. #include +#include #include #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" @@ -36,13 +37,14 @@ struct TapeTensor { }; // Represents an entry in the tape. +template struct OpTapeEntry { string op_type; std::vector output_tensor_info; std::vector input_tensor_id; // TODO(apassos) consider narrowing down this interface. - void* backward_function; + BackwardFunction* backward_function; // Should be called before deleting the backward function. TODO(apassos) use // unique_ptrs to ensure this happens. @@ -55,51 +57,67 @@ struct OpTapeEntry { using TensorTape = std::unordered_map; // Map from operation-id to tape entry. -using OpTape = std::unordered_map; +template +using OpTape = std::unordered_map>; // Operations the tape needs to perform on tensors to do backpropagation. Named // "vspace" because a subset of these are related to a vector space, such as // adding gradients, getting zeroes, etc. Currently cannot be implemented // without using tensorflow python code, hence left unspecified here. // -// We currently use void* for tensors, backward functions, and gradients (which -// can be but are not required to be tensors). TODO(apassos) replace this first -// with templates to allow for pyobject specialization in the client followed by -// a TFE_TensorHandle specialization, which is blocked by quite a few things -// still. +// Tensor is a representation of a tensor. We need to take its ID, and it needs +// to match IDs in the tape. +// +// Gradient is the type returned by gradient functions. In Python TF it's either +// Tensor or IndexedSlices or None, which here we map to nullptr. Gradients need +// to allow their size to be computed and they need to be passable to a backward +// function and deleted (as the backprop code creates lots of gradients the user +// is not interested in). +// +// BackwardFunction needs to be a closure which stores intermediate activations +// from the forward computation and calls a vector-jacobian product function +// (also known as adjoint function) to compute, given downstream gradients, +// upstream gradients. +// +// TODO(apassos) provide concrete template instantiations for TFE_TensorHandle +// specialization, which is blocked by quite a few things needing to loop back +// into python now. +template class VSpace { public: virtual ~VSpace() {} - // Returns the number of elements in the tensor. - virtual int64 NumElements(void* tensor) const = 0; + // Returns the number of elements in the gradient tensor. + virtual int64 NumElements(Gradient* tensor) const = 0; // Consumes references to the tensors in the gradient_tensors list and returns // a tensor with the result. - virtual void* AggregateGradients( - gtl::ArraySlice gradient_tensors) const = 0; + virtual Gradient* AggregateGradients( + gtl::ArraySlice gradient_tensors) const = 0; // Returns a tensor of the right shape and dtype filled with zeros. - virtual void* Zeros(TensorShape shape, DataType dtype) const = 0; + virtual Gradient* Zeros(TensorShape shape, DataType dtype) const = 0; // Returns a Tensor which is filled with ones and like the input. - virtual void* OnesLike(void*) const = 0; + virtual Gradient* OnesLike(Tensor*) const = 0; // Returns an integer which is a unique-to-within-this-program handle for this // tensor. - virtual int64 TensorId(void* tensor) const = 0; + virtual int64 TensorId(Tensor* tensor) const = 0; // Calls the passed-in backward function. - virtual Status CallBackwardFunction(void* backward_function, - gtl::ArraySlice output_gradients, - std::vector* result) const = 0; + virtual Status CallBackwardFunction( + BackwardFunction* backward_function, + gtl::ArraySlice output_gradients, + std::vector* result) const = 0; // Deletes the input tensor. - virtual void DeleteTensor(void* tensor) const = 0; + virtual void DeleteGradient(Gradient* gradient) const = 0; }; // Traces the execution of operations, doing eager garbage collection, and // exporting a full trace so other code can do backpropagation. Not thread-safe. +template class GradientTape { public: GradientTape() {} @@ -116,7 +134,7 @@ class GradientTape { void RecordOperation(const string& op_type, gtl::ArraySlice output_tensors, gtl::ArraySlice input_tensor_id, - void* backward_function, + BackwardFunction* backward_function, const std::function& backward_function_deleter); void DeleteTrace(int64 tensor_id); @@ -125,14 +143,15 @@ class GradientTape { // once) and produces the gradient of the target tensors with respect to the // source tensors. The output gradients are used if not empty and not // null. The result is populated with one tensor per target element. - Status Gradient(const VSpace& vspace, gtl::ArraySlice target, - gtl::ArraySlice sources, - gtl::ArraySlice output_gradients, - std::vector* result); + Status ComputeGradient( + const VSpace& vspace, + gtl::ArraySlice target, gtl::ArraySlice sources, + gtl::ArraySlice output_gradients, + std::vector* result); private: TensorTape tensor_tape_; - OpTape op_tape_; + OpTape op_tape_; int64 next_op_id_{0}; // Map from tensor id to number of remaining usages (i.e. how many entries in @@ -140,6 +159,412 @@ class GradientTape { std::unordered_map tensor_usage_; }; +// Template instantiations here + +template +bool GradientTape::ShouldRecord( + gtl::ArraySlice tensor_ids) { + for (int64 i : tensor_ids) { + if (tensor_tape_.find(i) != tensor_tape_.end()) { + return true; + } + } + return false; +} + +template +void GradientTape::Watch(int64 tensor_id) { + tensor_tape_.emplace(tensor_id, -1); +} + +template +void GradientTape::RecordOperation( + const string& op_type, gtl::ArraySlice output_tensors, + gtl::ArraySlice input_tensor_id, BackwardFunction* backward_function, + const std::function& backward_function_deleter) { + if (!ShouldRecord(input_tensor_id)) { + backward_function_deleter(); + return; + } + std::vector ids; + ids.reserve(input_tensor_id.size()); + for (int64 i : input_tensor_id) { + tensor_usage_[i]++; + ids.push_back(i); + } + const int64 op_id = next_op_id_++; + std::vector tensors; + tensors.reserve(output_tensors.size()); + for (const TapeTensor& o : output_tensors) { + // Note: the tensor can have already been watched and hence be in the tape, + // so we cannot check that we're inserting it here. + tensor_tape_[o.id] = op_id; + tensor_usage_[o.id] = 1; + tensors.push_back(o); + } + op_tape_[op_id] = OpTapeEntry{ + op_type, tensors, ids, backward_function, backward_function_deleter}; +} + +template +void GradientTape::DeleteTrace( + int64 tensor_id) { + auto it = tensor_usage_.find(tensor_id); + if (it == tensor_usage_.end()) { + return; + } + it->second--; + if (it->second != 0) { + return; + } + tensor_usage_.erase(it); + auto tensor_op_it = tensor_tape_.find(tensor_id); + if (tensor_op_it == tensor_tape_.end()) { + return; + } + const int64 op_id = tensor_op_it->second; + if (op_id == -1) { + // Do not delete watched tensors. + return; + } + tensor_tape_.erase(tensor_op_it); + auto op_it = op_tape_.find(op_id); + CHECK(op_it != op_tape_.end()); + for (const auto& output : op_it->second.output_tensor_info) { + if (tensor_usage_.find(output.id) != tensor_usage_.end()) { + // Found a usage for an output, so cannot delete the op. + return; + } + } + for (int64 id : op_it->second.input_tensor_id) { + DeleteTrace(id); + } + op_it->second.backward_function_deleter(); + op_tape_.erase(op_it); +} + +// Terminology: +// +// - op: a possibly composite operation, which has an entry in the tape +// - target: dy in dx/dy +// - source: dx in dx/dy +// - tensor: one of the many inputs or outputs of an operation +// +// Below here we do the gradient algorithm. It works as follows: +// +// First we filter the tape to just the subset of operations we want to +// differentiate. In the process of doing so we count how many times each Tensor +// is used as an input to an op (so we know when we're done computing gradients +// for that Tensor). We also count, for each tape entry, how many of its output +// Tensors need gradients to be computed (Tensors which are not used do not need +// any gradients to be computed). +// +// Finally, we start a backprop stack with a set of tape entries for which we +// have all gradients available. This set usually is a subset of the set of +// targets (not all since targets which have outputs in the tape will not have +// gradients available initially). +// +// Then we repeatedly pop an entry from the stack, run its backprop, and update +// the gradients of its inputs. Once we have computed all gradients for a single +// input we can mark this input as done, and this can trigger adding an entry to +// the stack if all outputs of that entry are now done. +// +// When the stack is empty we have gradients for all tensors we're interested +// in. + +namespace { + +template +struct BackpropInitialState { + OpTape op_tape; + + // Map from tensor ID to how many references still exist for this tensor in + // the tape. + std::unordered_map tensor_usage_counts; + + // Maps from op ID to how many output tensors of this op still need to have + // their gradients computed. + std::unordered_map op_missing_tensor; +}; + +template +BackpropInitialState PrepareBackprop( + gtl::ArraySlice target, const TensorTape& tensor_tape, + OpTape op_tape, + const std::unordered_set& sources_set) { + std::vector tensor_stack; + tensor_stack.reserve(target.size()); + for (auto t : target) { + tensor_stack.push_back(t); + } + BackpropInitialState result; + while (!tensor_stack.empty()) { + int64 tensor_id = tensor_stack.back(); + tensor_stack.pop_back(); + auto op_id_it = tensor_tape.find(tensor_id); + if (op_id_it == tensor_tape.end()) { + continue; + } + int64 op_id = op_id_it->second; + auto op_it = op_tape.find(op_id); + auto result_op_it = result.op_tape.find(op_id); + if (op_id == -1 || op_it == op_tape.end() || + result_op_it != result.op_tape.end()) { + continue; + } + CHECK(result.op_tape.emplace(op_id, op_it->second).second); + for (auto it : op_it->second.input_tensor_id) { + auto count_it = result.tensor_usage_counts.find(it); + if (count_it != result.tensor_usage_counts.end()) { + count_it->second++; + } else { + result.tensor_usage_counts[it] = 1; + if (sources_set.find(it) == sources_set.end() && + tensor_tape.find(it) != tensor_tape.end()) { + tensor_stack.push_back(it); + } + } + } + op_tape.erase(op_it); + } + for (auto& pair : result.tensor_usage_counts) { + auto it = tensor_tape.find(pair.first); + if (it != tensor_tape.end() && it->second != -1) { + result.op_missing_tensor[it->second] += 1; + } + } + // Call destructors for all unneeded gradient functions. + for (const auto& op_pair : op_tape) { + op_pair.second.backward_function_deleter(); + } + return result; +} + +template +std::vector InitialStack( + const OpTape& op_tape, + const std::unordered_map& op_missing_tensor) { + std::vector result; + for (auto& op_entry : op_tape) { + if (op_missing_tensor.find(op_entry.first) == op_missing_tensor.end()) { + result.push_back(op_entry.first); + } + } + return result; +} + +template +Status InitialGradients( + const VSpace& vspace, + gtl::ArraySlice target, + gtl::ArraySlice output_gradients, + std::unordered_map tensor_usage_counts, + std::unordered_map>* result) { + for (int i = 0; i < target.size(); ++i) { + int64 id = vspace.TensorId(target[i]); + if (tensor_usage_counts.find(id) != tensor_usage_counts.end()) { + if (!output_gradients.empty() && output_gradients[i] != nullptr) { + // TODO(apassos) figure out how to print debugging information here. + return errors::InvalidArgument( + "A gradient was provided for a tensor which is used as part of the " + "computation."); + } + } else { + if (output_gradients.empty() || output_gradients[i] == nullptr) { + (*result)[id].push_back(vspace.OnesLike(target[i])); + } else { + (*result)[id].push_back(output_gradients[i]); + } + } + } + return Status::OK(); +} + +} // namespace + +// If over kMinAggregateCount gradients are accumulated and the total +// memory consumption is over kMinAggregateBytes, do an early aggregation +// so as to release the gradient tensor to save memory. +constexpr int kMinAggregateCount = 4; +constexpr int kMinAggregateBytes = 128 * 1024 * 1024; + +template +Status GradientTape::ComputeGradient( + const VSpace& vspace, + gtl::ArraySlice target, gtl::ArraySlice sources, + gtl::ArraySlice output_gradients, + std::vector* result) { + std::vector id_sources; + id_sources.reserve(sources.size()); + for (Tensor* s : sources) { + id_sources.push_back(vspace.TensorId(s)); + } + std::unordered_set sources_set(id_sources.begin(), id_sources.end()); + std::vector id_targets; + id_sources.reserve(target.size()); + for (Tensor* t : target) { + id_targets.push_back(vspace.TensorId(t)); + } + BackpropInitialState state = PrepareBackprop( + id_targets, tensor_tape_, std::move(op_tape_), sources_set); + std::vector op_stack = + InitialStack(state.op_tape, state.op_missing_tensor); + std::unordered_map> gradients; + Status s = InitialGradients(vspace, target, output_gradients, + state.tensor_usage_counts, &gradients); + auto cleanup = [&state]() { + // Release all backprop functions + for (const auto& pair : state.op_tape) { + pair.second.backward_function_deleter(); + } + }; + if (!s.ok()) { + cleanup(); + return s; + } + std::unordered_map gradients_size; + // TODO(apassos) multiple threads could be dequeuing from op_stack at the same + // time, for better CPU backprop performance. + VLOG(1) << "Initial stack:"; + if (VLOG_IS_ON(1)) { + for (auto t : op_stack) { + VLOG(1) << " " << t; + } + } + std::unordered_map> + functions_accept_none_for_indices({ + {"SoftmaxCrossEntropyWithLogits", {1}}, + {"FusedBatchNorm", {1, 2, 3, 4}}, + }); + while (!op_stack.empty()) { + const int64 op = op_stack.back(); + VLOG(1) << "Popped " << op; + op_stack.pop_back(); + auto op_it = state.op_tape.find(op); + if (op_it == state.op_tape.end()) { + // It is possible for ops to end up on the stack if they are unrelated to + // the target; we should just skip them. + continue; + } + auto trace = std::move(op_it->second); + state.op_tape.erase(op_it); + std::vector out_gradients; + out_gradients.reserve(trace.output_tensor_info.size()); + for (int i = 0; i < trace.output_tensor_info.size(); ++i) { + const int64 id = trace.output_tensor_info[i].id; + auto grad_it = gradients.find(id); + if (grad_it == gradients.end()) { + auto func_name_it = + functions_accept_none_for_indices.find(trace.op_type); + if (func_name_it != functions_accept_none_for_indices.end() && + func_name_it->second.find(i) != func_name_it->second.end()) { + out_gradients.push_back(nullptr); + } else { + out_gradients.push_back( + vspace.Zeros(trace.output_tensor_info[i].shape, + trace.output_tensor_info[i].dtype)); + } + } else { + out_gradients.push_back(vspace.AggregateGradients(grad_it->second)); + if (sources_set.find(grad_it->first) == sources_set.end()) { + gradients.erase(grad_it); + } + } + } + std::vector in_gradients; + Status s = vspace.CallBackwardFunction(trace.backward_function, + out_gradients, &in_gradients); + if (!s.ok()) { + VLOG(1) << "Gradient function failed."; + cleanup(); + return s; + } + VLOG(1) << "Got " << in_gradients.size() << " in_gradients for " + << trace.input_tensor_id.size() << " sources"; + for (int i = 0; i < in_gradients.size(); ++i) { + const int64 id = trace.input_tensor_id[i]; + if (in_gradients[i] != nullptr) { + auto& unaggregated_grads = gradients[id]; + unaggregated_grads.push_back(in_gradients[i]); + if (unaggregated_grads.size() > kMinAggregateCount) { + auto size_it = gradients_size.find(id); + int64 size; + if (size_it == gradients_size.end()) { + size = vspace.NumElements(unaggregated_grads[0]); + gradients_size.emplace(id, size); + } else { + size = size_it->second; + } + if (unaggregated_grads.size() * size * 4 > kMinAggregateBytes) { + Gradient* grad = vspace.AggregateGradients(unaggregated_grads); + unaggregated_grads.clear(); + unaggregated_grads.push_back(grad); + } + } + } + auto usage_count_it = state.tensor_usage_counts.find(id); + if (usage_count_it == state.tensor_usage_counts.end()) { + VLOG(1) << "Tensor " << id << " not used"; + continue; + } + usage_count_it->second--; + if (usage_count_it->second > 0) { + VLOG(1) << "Tensor " << id << " usage count " << usage_count_it->second; + continue; + } + auto tape_it = tensor_tape_.find(id); + if (tape_it == tensor_tape_.end()) { + VLOG(1) << "Tensor " << id + << " has no associated op. Deleting gradient"; + auto grad_it = gradients.find(id); + if (grad_it != gradients.end()) { + for (auto g : grad_it->second) { + vspace.DeleteGradient(g); + } + gradients.erase(grad_it); + } + continue; + } + const int64 op_id = tape_it->second; + if (op_id == -1) { + VLOG(1) << "Tensor " << id << " is source"; + continue; + } + auto missing_it = state.op_missing_tensor.find(op_id); + if (missing_it != state.op_missing_tensor.end()) { + missing_it->second--; + VLOG(1) << "Op " << op_id << " missing " << missing_it->second + << " output gradients"; + if (missing_it->second == 0) { + op_stack.push_back(op_id); + } + } + } + } + CHECK(state.op_tape.empty()); + result->reserve(sources.size()); + for (auto is : id_sources) { + auto grad_it = gradients.find(is); + if (grad_it == gradients.end()) { + result->push_back(nullptr); + } else { + if (grad_it->second.size() == 1) { + result->push_back(grad_it->second[0]); + } else { + result->push_back(vspace.AggregateGradients(grad_it->second)); + } + gradients.erase(grad_it); + } + } + VLOG(1) << "Final gradients size: " << gradients.size(); + for (auto grad_pair : gradients) { + for (const auto& g : grad_pair.second) { + vspace.DeleteGradient(g); + } + } + return Status::OK(); +} + } // namespace eager } // namespace tensorflow diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index a00a7615d7..d67c3b18f7 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -443,10 +443,13 @@ void TFE_DeleteContextCapsule(PyObject* context) { TF_DeleteStatus(status); } +using GradientTape = + tensorflow::eager::GradientTape; + typedef struct { PyObject_HEAD /* Type-specific fields go here. */ - tensorflow::eager::GradientTape* tape; + GradientTape* tape; } TFE_Py_Tape; static void TFE_Py_Tape_Delete(PyObject* tape) { @@ -481,7 +484,7 @@ PyObject* TFE_Py_NewTape() { TFE_Py_Tape_Type.tp_new = PyType_GenericNew; if (PyType_Ready(&TFE_Py_Tape_Type) < 0) return nullptr; TFE_Py_Tape* tape = PyObject_NEW(TFE_Py_Tape, &TFE_Py_Tape_Type); - tape->tape = new tensorflow::eager::GradientTape(); + tape->tape = new GradientTape(); return reinterpret_cast(tape); } @@ -627,9 +630,8 @@ void TFE_Py_TapeDeleteTrace(PyObject* tape, tensorflow::int64 tensor_id) { reinterpret_cast(tape)->tape->DeleteTrace(tensor_id); } -// TODO(apassos): cache the attribute lookups as member variables and decref -// them in the destructor. -class PyVSpace : public tensorflow::eager::VSpace { +class PyVSpace + : public tensorflow::eager::VSpace { public: explicit PyVSpace(PyObject* py_vspace) : py_vspace_(py_vspace) {} @@ -661,7 +663,7 @@ class PyVSpace : public tensorflow::eager::VSpace { Py_XDECREF(ones_like_); } - tensorflow::int64 NumElements(void* tensor) const final { + tensorflow::int64 NumElements(PyObject* tensor) const final { PyObject* arglist = Py_BuildValue("(O)", reinterpret_cast(tensor)); PyObject* result = PyEval_CallObject(num_elements_, arglist); @@ -671,8 +673,8 @@ class PyVSpace : public tensorflow::eager::VSpace { return r; } - void* AggregateGradients( - tensorflow::gtl::ArraySlice gradient_tensors) const final { + PyObject* AggregateGradients( + tensorflow::gtl::ArraySlice gradient_tensors) const final { PyObject* list = PyList_New(gradient_tensors.size()); for (int i = 0; i < gradient_tensors.size(); ++i) { // Note: stealing a reference to the gradient tensors. @@ -689,8 +691,8 @@ class PyVSpace : public tensorflow::eager::VSpace { return result; } - void* Zeros(tensorflow::TensorShape shape, - tensorflow::DataType dtype) const final { + PyObject* Zeros(tensorflow::TensorShape shape, + tensorflow::DataType dtype) const final { PyObject* py_shape = PyTuple_New(shape.dims()); for (int i = 0; i < shape.dims(); ++i) { PyTuple_SET_ITEM(py_shape, i, PyLong_FromLong(shape.dim_size(i))); @@ -701,20 +703,20 @@ class PyVSpace : public tensorflow::eager::VSpace { Py_DECREF(arg_list); Py_DECREF(py_dtype); Py_DECREF(py_shape); - return reinterpret_cast(result); + return reinterpret_cast(result); } - void* OnesLike(void* tensor) const final { + PyObject* OnesLike(PyObject* tensor) const final { PyObject* arg_list = Py_BuildValue("(O)", tensor); PyObject* result = PyEval_CallObject(ones_like_, arg_list); if (result == nullptr) { VLOG(1) << "Call to ones_like failed"; } Py_DECREF(arg_list); - return reinterpret_cast(result); + return result; } - tensorflow::int64 TensorId(void* tensor) const final { + tensorflow::int64 TensorId(PyObject* tensor) const final { PyObject* py_tensor = reinterpret_cast(tensor); PyObject* id_field = PyObject_GetAttrString(py_tensor, "_id"); tensorflow::int64 id = MakeInt(id_field); @@ -723,9 +725,9 @@ class PyVSpace : public tensorflow::eager::VSpace { } tensorflow::Status CallBackwardFunction( - void* backward_function, - tensorflow::gtl::ArraySlice output_gradients, - std::vector* result) const final { + PyObject* backward_function, + tensorflow::gtl::ArraySlice output_gradients, + std::vector* result) const final { PyObject* grads = PyTuple_New(output_gradients.size()); for (int i = 0; i < output_gradients.size(); ++i) { if (output_gradients[i] == nullptr) { @@ -771,9 +773,7 @@ class PyVSpace : public tensorflow::eager::VSpace { return tensorflow::Status::OK(); } - void DeleteTensor(void* tensor) const final { - Py_XDECREF(reinterpret_cast(tensor)); - } + void DeleteGradient(PyObject* tensor) const final { Py_XDECREF(tensor); } private: PyObject* py_vspace_; @@ -784,13 +784,13 @@ class PyVSpace : public tensorflow::eager::VSpace { PyObject* ones_like_; }; -std::vector MakeTensorList(PyObject* tensors) { +std::vector MakeTensorList(PyObject* tensors) { PyObject* seq = PySequence_Fast(tensors, "expected a sequence"); if (seq == nullptr) { return {}; } int len = PySequence_Fast_GET_SIZE(seq); - std::vector list; + std::vector list; list.reserve(len); for (int i = 0; i < len; ++i) { list.push_back(PySequence_Fast_GET_ITEM(seq, i)); @@ -807,30 +807,30 @@ PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, return nullptr; } - std::vector target_vec = MakeTensorList(target); + std::vector target_vec = MakeTensorList(target); if (PyErr_Occurred()) { return nullptr; } - std::vector sources_vec = MakeTensorList(sources); + std::vector sources_vec = MakeTensorList(sources); if (PyErr_Occurred()) { return nullptr; } - std::vector outgrad_vec; + std::vector outgrad_vec; if (output_gradients != Py_None) { outgrad_vec = MakeTensorList(output_gradients); if (PyErr_Occurred()) { return nullptr; } - for (void* tensor : outgrad_vec) { + for (PyObject* tensor : outgrad_vec) { // Calling the backward function will eat a reference to the tensors in // outgrad_vec, so we need to increase their reference count. - Py_INCREF(reinterpret_cast(tensor)); + Py_INCREF(tensor); } } TFE_Py_Tape* tape_obj = reinterpret_cast(tape); - std::vector result; - status->status = tape_obj->tape->Gradient(c_vspace, target_vec, sources_vec, - outgrad_vec, &result); + std::vector result; + status->status = tape_obj->tape->ComputeGradient( + c_vspace, target_vec, sources_vec, outgrad_vec, &result); if (!status->status.ok()) { return nullptr; } -- GitLab From bcf2ce97591e0cf6b76148e64cf073dd122f41f6 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Thu, 9 Nov 2017 08:30:44 -0800 Subject: [PATCH 0092/1801] Fix typo in tensorflow/python/client/timeline.py COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/tensorflow/pull/14386 from yifeif:yifeif-patch-2 8391d3b0369f823fc94ea75aef2df04c611a1671 PiperOrigin-RevId: 175161296 --- tensorflow/python/client/timeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/client/timeline.py b/tensorflow/python/client/timeline.py index f3ba4244ce..1e96ac5ed4 100644 --- a/tensorflow/python/client/timeline.py +++ b/tensorflow/python/client/timeline.py @@ -275,7 +275,7 @@ class _TensorTracker(object): name: The name of the Tensor as a string. object_id: Chrome Trace object identifier assigned for this Tensor. timestamp: The creation timestamp of this event as a long integer. - pid: Process identifier of the assicaiated device, as an integer. + pid: Process identifier of the associated device, as an integer. allocator: Name of the allocator used to create the Tensor. num_bytes: Number of bytes allocated (long integer). -- GitLab From f11999586914467f510de2fc3b33fac3c984e6d4 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Thu, 9 Nov 2017 08:46:31 -0800 Subject: [PATCH 0093/1801] Internal Change. PiperOrigin-RevId: 175163107 --- configure.py | 9 +++++---- tensorflow/BUILD | 6 ++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/configure.py b/configure.py index 650541770a..e98367ef9f 100644 --- a/configure.py +++ b/configure.py @@ -487,10 +487,11 @@ def set_cc_opt_flags(environ_cp): cc_opt_flags = get_from_env_or_user_or_default(environ_cp, 'CC_OPT_FLAGS', question, default_cc_opt_flags) for opt in cc_opt_flags.split(): - host_opt = '-march=native' # It should be safe on the same build host. - write_to_bazelrc( - 'build:opt --cxxopt=%s --copt=%s' % (opt, opt) + - ' --host_cxxopt=%s --host_copt=%s' % (host_opt, host_opt)) + write_to_bazelrc('build:opt --cxxopt=%s --copt=%s' % (opt, opt)) + host_opt = '-march=native' # It should be safe on the same build host. + write_to_bazelrc( + 'build:opt --host_cxxopt=%s --host_copt=%s' % (host_opt, host_opt)) + write_to_bazelrc('build:opt --define with_default_optimizations=true') def set_tf_cuda_clang(environ_cp): diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 5a408db94e..8d3d38b5a1 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -178,6 +178,12 @@ config_setting( visibility = ["//visibility:public"], ) +config_setting( + name = "with_default_optimizations", + define_values = {"with_default_optimizations": "true"}, + visibility = ["//visibility:public"], +) + config_setting( name = "with_gcp_support", define_values = {"with_gcp_support": "true"}, -- GitLab From a11d99f2ff4b3022f615d07b142b73571ff93b20 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Nov 2017 09:11:36 -0800 Subject: [PATCH 0094/1801] Implemented Processor, which allows us to merge shapes of unknown rank with shapes of known rank. Made sure Processor::Merge doesn't erase previously inferred dimensions. PiperOrigin-RevId: 175166217 --- .../core/grappler/costs/graph_properties.cc | 103 +++++++----------- .../grappler/costs/graph_properties_test.cc | 7 +- 2 files changed, 45 insertions(+), 65 deletions(-) diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index 8654a2a3ed..151455778a 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -50,13 +50,9 @@ template struct HandleToObject {}; template <> struct HandleToObject { - typedef TensorShapeProto Object; + typedef ShapeHandle Object; - static TensorShapeProto Unknown() { - TensorShapeProto result; - result.set_unknown_rank(true); - return result; - } + static ShapeHandle Unknown() { return ShapeHandle(); } }; template <> @@ -67,13 +63,24 @@ struct HandleToObject { }; template -struct Processor { +struct Processor {}; + +template <> +struct Processor { // Extract the shape or dim denoted by the handle. - void ExtractValue(Handle /*t1*/, - typename HandleToObject::Object* result) {} + void ExtractValue(ShapeHandle h, ShapeHandle* result) { *result = h; } // Merge the shapes or dims. - Status Merge(Handle /*t1*/, Handle /*t2*/, - typename HandleToObject::Object* result) { + Status Merge(ShapeHandle h1, ShapeHandle h2, ShapeHandle* result) { + if (InferenceContext::RankKnown(*result)) { + // The result was initialized in a previous merge to a shape of known + // rank, make sure we preserve that information. + return Status::OK(); + } + if (InferenceContext::RankKnown(h1)) { + *result = h1; + } else { + *result = h2; + } return Status::OK(); } }; @@ -101,24 +108,34 @@ struct Processor { if (dim1 >= 0 && dim2 >= 0) { CHECK_EQ(dim1, dim2); - *result = dim1; + RefineDim(dim1, result); } else if (dim1 >= 0 && dim2 < 0) { - *result = dim1; + RefineDim(dim1, result); } else if (dim1 < 0 && dim2 >= 0) { - *result = dim2; + RefineDim(dim2, result); } else if (dim1 < -1) { - *result = dim1; + RefineDim(dim1, result); } else if (dim2 < -1) { - *result = dim2; + RefineDim(dim2, result); } else { CHECK_EQ(dim1, dim2); CHECK_EQ(-1, dim1); - *result = -1; + RefineDim(-1, result); } return Status::OK(); } private: + void RefineDim(int64 dim, int64* result) { + if (*result >= 0) { + CHECK(*result == dim || dim < 0); + } else if (dim >= 0) { + *result = dim; + } else if (dim < *result) { + *result = dim; + } + } + int64 counter = 2; }; @@ -354,18 +371,17 @@ class SymbolicShapeManager { return dims_.Merge(d1, d2); } - int64 Value(DimensionHandle d) { return dims_.GetMergedValue(d); } - void AsTensorProperties(const ShapeHandle& shape, const DataType& type, - InferenceContext* ctx, OpInfo::TensorProperties* properties) { properties->set_dtype(type); - if (!ctx->RankKnown(shape)) { + ShapeHandle actual_shape = shapes_.GetMergedValue(shape); + if (!InferenceContext::RankKnown(actual_shape)) { properties->mutable_shape()->set_unknown_rank(true); } else { - for (int j = 0; j < ctx->Rank(shape); ++j) { - shape_inference::DimensionHandle dim = ctx->Dim(shape, j); - int64 d = Value(dim); + for (int j = 0; j < InferenceContext::Rank(actual_shape); ++j) { + shape_inference::DimensionHandle dim = + InferenceContext::DimKnownRank(actual_shape, j); + int64 d = dims_.GetMergedValue(dim); properties->mutable_shape()->add_dim()->set_size(d); } } @@ -477,41 +493,6 @@ Status GraphProperties::InferStatically() { } } } - - // Infer output shape for Restore op. - if (node->op_def().name() == "Restore" || - node->op_def().name() == "RestoreV2" || - node->op_def().name() == "RestoreSlice") { - auto ctx = shape_refiner.GetContext(node); - for (const Edge* out_edge : node->out_edges()) { - const Node* output = out_edge->dst(); - int output_idx = out_edge->src_output(); - if (output_idx < 0) { - continue; - } - if (!ctx->FullyDefined(ctx->output(output_idx)) && - output->op_def().name() == "Assign") { - if (!output->attrs().Find("validate_shape") || - !output->attrs().Find("validate_shape")->b()) { - continue; - } - auto output_ctx = shape_refiner.GetContext(output); - if (output_ctx->FullyDefined(output_ctx->output(0))) { - ctx->set_output(output_idx, output_ctx->output(0)); - output_ctx->MergeInput(1, output_ctx->output(0)); - } else { - const Node* var; - TF_CHECK_OK(node->input_node(0, &var)); - if (node->IsVariable()) { - auto var_ctx = shape_refiner.GetContext(var); - CHECK(var_ctx->FullyDefined(var_ctx->output(0))); - ctx->set_output(output_idx, var_ctx->output(0)); - output_ctx->MergeInput(1, var_ctx->output(0)); - } - } - } - } - } } // Propagate the initial shapes of Enter nodes manually (the Enter shape @@ -691,7 +672,7 @@ Status GraphProperties::InferStatically() { input_properties.resize(ctx->num_inputs()); for (int i = 0; i < ctx->num_inputs(); ++i) { shape_manager.AsTensorProperties(ctx->input(i), node->input_type(i), - ctx, &input_properties[i]); + &input_properties[i]); } for (const auto& edge : node->in_edges()) { if (!edge->src()->IsConstant()) { @@ -718,7 +699,7 @@ Status GraphProperties::InferStatically() { output_properties.resize(ctx->num_outputs()); for (int i = 0; i < ctx->num_outputs(); ++i) { shape_manager.AsTensorProperties(ctx->output(i), node->output_type(i), - ctx, &output_properties[i]); + &output_properties[i]); } } } diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc index acd0b598ae..f785f627e1 100644 --- a/tensorflow/core/grappler/costs/graph_properties_test.cc +++ b/tensorflow/core/grappler/costs/graph_properties_test.cc @@ -296,10 +296,9 @@ TEST_F(GraphPropertiesTest, Queues) { ASSERT_EQ(1, props2.size()); EXPECT_EQ("float: [3,7]", PropToString(props2[0])); - // The dequeue3 op shape is unknown. const auto props3 = properties.GetOutputProperties("Dequeue3"); ASSERT_EQ(1, props3.size()); - EXPECT_EQ("float: ?", PropToString(props3[0])); + EXPECT_EQ("float: [3,7]", PropToString(props3[0])); // The dequeue3 op shape is unknown. The square2 op shape is known. Verify // that we merge the 2 properly to determine the shape of the data coming out @@ -678,8 +677,8 @@ TEST_F(GraphPropertiesTest, InferRestoreOpShape) { TEST_F(GraphPropertiesTest, InferRestoreOpShape_WithTwoNodesShareSameOutput) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - Output var = - ops::Variable(s.WithOpName("var"), TensorShape(), DataType::DT_FLOAT); + Output var = ops::Variable(s.WithOpName("var"), PartialTensorShape(), + DataType::DT_FLOAT); Output var2 = ops::Variable(s.WithOpName("var2"), TensorShape({128, 256}), DataType::DT_FLOAT); Output filename = -- GitLab From 2598f7b6b3770cafb4b047740bac6d53e33ea2f7 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 9 Nov 2017 09:24:00 -0800 Subject: [PATCH 0095/1801] Explicitly disable vectorization in the LLVM IR generated for Dot. In practice this does not seem to make a difference, but I did it anyway for completeness. PiperOrigin-RevId: 175167706 --- .../service/llvm_ir/kernel_support_library.cc | 4 +- .../service/llvm_ir/kernel_support_library.h | 8 ++- .../compiler/xla/service/llvm_ir/llvm_loop.cc | 68 +++++++++++++------ .../compiler/xla/service/llvm_ir/llvm_loop.h | 29 +++++--- 4 files changed, 75 insertions(+), 34 deletions(-) diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc index 123a327d4d..29cc0f81bd 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc @@ -40,7 +40,9 @@ void KernelSupportLibrary::For( }); } else { std::unique_ptr loop = llvm_ir::ForLoop::EmitForLoop( - name, start, end, step, ir_builder_, prevent_unrolling_); + name, start, end, step, ir_builder_, + /*prevent_unrolling=*/prevent_unrolling_, + /*prevent_vectorization=*/prevent_vectorization_); ir_builder_->SetInsertPoint(&loop->GetBodyBasicBlock()->back()); for_body_generator(loop->GetIndVarValue(), /*is_first_iteration=*/ir_builder_->CreateICmpEQ( diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h index 25aa2291a6..9bafb7b577 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h +++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h @@ -33,8 +33,11 @@ class KernelSupportLibrary { // If `prevent_unrolling` is true then unrolling is explicitly disabled on // every loop generated by this instance of KernelSupportLibrary. explicit KernelSupportLibrary(llvm::IRBuilder<>* ir_builder, - bool prevent_unrolling = true) - : ir_builder_(ir_builder), prevent_unrolling_(prevent_unrolling) {} + bool prevent_unrolling = true, + bool prevent_vectorization = true) + : ir_builder_(ir_builder), + prevent_unrolling_(prevent_unrolling), + prevent_vectorization_(prevent_vectorization) {} // Generates the following control flow structure: // @@ -118,6 +121,7 @@ class KernelSupportLibrary { private: llvm::IRBuilder<>* ir_builder_; bool prevent_unrolling_; + bool prevent_vectorization_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc index 83d35cb9ef..7b227ce294 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc @@ -34,21 +34,24 @@ namespace llvm_ir { ForLoop::ForLoop(tensorflow::StringPiece prefix, tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, - llvm::Value* step, bool prevent_unrolling) + llvm::Value* step, bool prevent_unrolling, + bool prevent_vectorization) : prefix_(prefix.ToString()), suffix_(suffix.ToString()), start_index_(start_index), end_index_(end_index), step_(step), insert_before_bb_(nullptr), - prevent_unrolling_(prevent_unrolling) {} + prevent_unrolling_(prevent_unrolling), + prevent_vectorization_(prevent_vectorization) {} /* static */ std::unique_ptr ForLoop::EmitForLoop( tensorflow::StringPiece prefix, llvm::Value* start_index, llvm::Value* end_index, llvm::Value* step, llvm::IRBuilder<>* ir_builder, - bool prevent_unrolling) { - std::unique_ptr loop(new ForLoop( - prefix, /*suffix=*/"", start_index, end_index, step, prevent_unrolling)); + bool prevent_unrolling, bool prevent_vectorization) { + std::unique_ptr loop(new ForLoop(prefix, /*suffix=*/"", start_index, + end_index, step, prevent_unrolling, + prevent_vectorization)); loop->Emit(ir_builder); return loop; } @@ -127,14 +130,12 @@ void ForLoop::Emit(llvm::IRBuilder<>* ir_builder) { ir_builder->CreateStore(indvar_inc, indvar_address); llvm::BranchInst* back_branch = ir_builder->CreateBr(header_bb_); - if (prevent_unrolling_) { - const char* const kLlvmLoopUnrollDisableMDName = "llvm.loop.unroll.disable"; - llvm::LLVMContext* ctx = &back_branch->getContext(); - + std::vector loop_metadata = GetLoopMetadata(ir_builder); + if (!loop_metadata.empty()) { + llvm::LLVMContext* ctx = &start_index_->getContext(); auto temp_node = llvm::MDNode::getTemporary(*ctx, llvm::None); - auto no_unroll_node = llvm::MDNode::get( - *ctx, {llvm::MDString::get(*ctx, kLlvmLoopUnrollDisableMDName)}); - auto loop_id = llvm::MDNode::get(*ctx, {temp_node.get(), no_unroll_node}); + loop_metadata.insert(loop_metadata.begin(), temp_node.get()); + auto loop_id = llvm::MDNode::get(*ctx, loop_metadata); loop_id->replaceOperandWith(0, loop_id); back_branch->setMetadata(llvm::LLVMContext::MD_loop, loop_id); } @@ -143,6 +144,27 @@ void ForLoop::Emit(llvm::IRBuilder<>* ir_builder) { ir_builder->SetInsertPoint(exit_bb_); } +std::vector ForLoop::GetLoopMetadata( + llvm::IRBuilder<>* ir_builder) { + const char* const kLlvmLoopUnrollDisableMDName = "llvm.loop.unroll.disable"; + const char* const kLlvmLoopVectorizeMDName = "llvm.loop.vectorize.enable"; + llvm::LLVMContext* ctx = &start_index_->getContext(); + + std::vector result; + if (prevent_unrolling_) { + result.push_back(llvm::MDNode::get( + *ctx, {llvm::MDString::get(*ctx, kLlvmLoopUnrollDisableMDName)})); + } + + if (prevent_vectorization_) { + result.push_back(llvm::MDNode::get( + *ctx, {llvm::MDString::get(*ctx, kLlvmLoopVectorizeMDName), + llvm::ConstantAsMetadata::get(ir_builder->getFalse())})); + } + + return result; +} + string ForLoop::GetQualifiedName(tensorflow::StringPiece name) { return llvm_ir::IrName(prefix_, llvm_ir::IrName(name, suffix_)); } @@ -156,23 +178,25 @@ llvm::BasicBlock* ForLoop::CreateLoopBB(tensorflow::StringPiece name, std::unique_ptr ForLoopNest::AddLoop(tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, - bool prevent_unrolling) { + bool prevent_unrolling, + bool prevent_vectorization) { return AddLoop(suffix, start_index, end_index, ir_builder_->getInt64(1), - prevent_unrolling); + prevent_unrolling, prevent_vectorization); } std::unique_ptr ForLoopNest::AddLoop(tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, llvm::Value* stride, - bool prevent_unrolling) { + bool prevent_unrolling, + bool prevent_vectorization) { if (inner_loop_body_bb_ != nullptr) { // Create this loop inside the previous one. ir_builder_->SetInsertPoint(&*inner_loop_body_bb_->getFirstInsertionPt()); } std::unique_ptr loop(new ForLoop( /*prefix=*/name_, suffix, start_index, end_index, stride, - prevent_unrolling)); + prevent_unrolling, prevent_vectorization)); loop->Emit(ir_builder_); if (outer_loop_preheader_bb_ == nullptr) { @@ -191,20 +215,24 @@ std::unique_ptr ForLoopNest::AddLoop(tensorflow::StringPiece suffix, std::unique_ptr ForLoopNest::AddLoop(int64 start_index, int64 end_index, tensorflow::StringPiece suffix, - bool prevent_unrolling) { + bool prevent_unrolling, + bool prevent_vectorization) { CHECK_LE(start_index, end_index); return AddLoop(suffix, ir_builder_->getInt64(start_index), - ir_builder_->getInt64(end_index), prevent_unrolling); + ir_builder_->getInt64(end_index), prevent_unrolling, + prevent_vectorization); } std::unique_ptr ForLoopNest::AddLoop(int64 start_index, int64 end_index, int64 stride, tensorflow::StringPiece suffix, - bool prevent_unrolling) { + bool prevent_unrolling, + bool prevent_vectorization) { CHECK_LE(start_index, end_index); return AddLoop(suffix, ir_builder_->getInt64(start_index), ir_builder_->getInt64(end_index), - ir_builder_->getInt64(stride), prevent_unrolling); + ir_builder_->getInt64(stride), prevent_unrolling, + prevent_vectorization); } IrArray::Index ForLoopNest::AddLoopsForShape(const Shape& shape, diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h index 90f7c7df9e..20069ce5a2 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h @@ -71,12 +71,10 @@ class ForLoop { // // If `prevent_unrolling` is true then emit metadata that directs LLVM to not // unroll the generated loop. - static std::unique_ptr EmitForLoop(tensorflow::StringPiece prefix, - llvm::Value* start_index, - llvm::Value* end_index, - llvm::Value* step, - llvm::IRBuilder<>* ir_builder, - bool prevent_unrolling = false); + static std::unique_ptr EmitForLoop( + tensorflow::StringPiece prefix, llvm::Value* start_index, + llvm::Value* end_index, llvm::Value* step, llvm::IRBuilder<>* ir_builder, + bool prevent_unrolling = false, bool prevent_vectorization = false); // The names of the blocks follow LLVM's conventions. Control flow amongst the // blocks for the example C code looks like: @@ -130,7 +128,7 @@ class ForLoop { ForLoop(tensorflow::StringPiece prefix, tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, llvm::Value* step, - bool prevent_unrolling); + bool prevent_unrolling, bool prevent_vectorization); // Emit the loop at the insert point of the builder. void Emit(llvm::IRBuilder<>* ir_builder); @@ -142,6 +140,10 @@ class ForLoop { // they are set. string GetQualifiedName(tensorflow::StringPiece name); + // Return a list of metadata nodes that should be associated with the + // llvm::Loop for this `ForLoop`. + std::vector GetLoopMetadata(llvm::IRBuilder<>* ir_builder); + string prefix_; string suffix_; llvm::Value* start_index_; @@ -160,6 +162,7 @@ class ForLoop { llvm::BasicBlock* exit_bb_; llvm::Value* indvar_; bool prevent_unrolling_; + bool prevent_vectorization_; TF_DISALLOW_COPY_AND_ASSIGN(ForLoop); }; @@ -185,24 +188,28 @@ class ForLoopNest { std::unique_ptr AddLoop(tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, llvm::Value* stride, - bool prevent_unrolling = false); + bool prevent_unrolling = false, + bool prevent_vectorization = false); // Like the above, except that it defaults to a stride of one. std::unique_ptr AddLoop(tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, - bool prevent_unrolling = false); + bool prevent_unrolling = false, + bool prevent_vectorization = false); // A convenient wrapper of the other flavor of AddLoop. The given start and // end index are constant. std::unique_ptr AddLoop(int64 start_index, int64 end_index, int64 stride, tensorflow::StringPiece suffix, - bool prevent_unrolling = false); + bool prevent_unrolling = false, + bool prevent_vectorization = false); // Like the above, except that it defaults to a stride of one. std::unique_ptr AddLoop(int64 start_index, int64 end_index, tensorflow::StringPiece suffix, - bool prevent_unrolling = false); + bool prevent_unrolling = false, + bool prevent_vectorization = false); // Add loops to iterate through the indices within the specified // shape. The returned index collects the induction variables of the -- GitLab From 86f723beca5e651af6f703a8f5720d0f038ae3f1 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Thu, 9 Nov 2017 09:25:54 -0800 Subject: [PATCH 0096/1801] Internal Change. PiperOrigin-RevId: 175167946 --- tensorflow/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 8d3d38b5a1..8cb7edcc50 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -355,7 +355,7 @@ config_setting( visibility = ["//visibility:public"], ) -# Make a dummy rule that we can chaqnge "default" in select statements to. +# Make a dummy rule that we can change "default" in select statements to. # to disable dependencies in copybara. config_setting( name = "dummy_disabled_internal", -- GitLab From 09f99427f96d96393e71a4bda378493e0a6817de Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Thu, 9 Nov 2017 10:12:58 -0800 Subject: [PATCH 0097/1801] Adds explicity docstring about TF version in examples. PiperOrigin-RevId: 175174326 --- tensorflow/examples/learn/iris.py | 5 ++++- tensorflow/examples/learn/wide_n_deep_tutorial.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/examples/learn/iris.py b/tensorflow/examples/learn/iris.py index 0a50b3ba87..03e60972aa 100644 --- a/tensorflow/examples/learn/iris.py +++ b/tensorflow/examples/learn/iris.py @@ -11,7 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Example of DNNClassifier for Iris plant dataset.""" +"""Example of DNNClassifier for Iris plant dataset. + +This example uses APIs in Tensorflow 1.4 or above. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/examples/learn/wide_n_deep_tutorial.py b/tensorflow/examples/learn/wide_n_deep_tutorial.py index e447b3e24e..072353392a 100644 --- a/tensorflow/examples/learn/wide_n_deep_tutorial.py +++ b/tensorflow/examples/learn/wide_n_deep_tutorial.py @@ -12,7 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Example code for TensorFlow Wide & Deep Tutorial using TF.Learn API.""" +"""Example code for TensorFlow Wide & Deep Tutorial using TF High Level API. + +This example uses APIs in Tensorflow 1.4 or above. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -- GitLab From 17532a3c5fd671a59002fac83c92344c451f9936 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 10:27:15 -0800 Subject: [PATCH 0098/1801] Supports multi-dimensional logits and labels in multi_label head and some cleanup. PiperOrigin-RevId: 175176635 --- .../estimator/python/estimator/head.py | 143 ++++++++---- .../estimator/python/estimator/head_test.py | 206 +++++++++++++++++- tensorflow/python/estimator/canned/head.py | 131 +++++------ .../python/estimator/canned/head_test.py | 10 +- 4 files changed, 379 insertions(+), 111 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/head.py b/tensorflow/contrib/estimator/python/estimator/head.py index e344ee3c3e..a9311a20f1 100644 --- a/tensorflow/contrib/estimator/python/estimator/head.py +++ b/tensorflow/contrib/estimator/python/estimator/head.py @@ -28,6 +28,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import math_ops @@ -48,7 +49,20 @@ def multi_class_head(n_classes, Uses `sparse_softmax_cross_entropy` loss. - This head expects to be fed integer labels specifying the class index. + The head expects `logits` with shape `[D0, D1, ... DN, n_classes]`. + In many applications, the shape is `[batch_size, n_classes]`. + + `labels` must be a dense `Tensor` with shape matching `logits`, namely + `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string + `Tensor` with values from the vocabulary. If `label_vocabulary` is not given, + `labels` must be an integer `Tensor` with values specifying the class index. + + If `weight_column` is specified, weights must be of shape + `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`. + + The loss is the weighted sum over the input dimensions. Namely, if the input + labels have shape `[batch_size, 1]`, the loss is the weighted sum over + `batch_size`. Args: n_classes: Number of classes, must be greater than 2 (for 2 classes, use @@ -57,11 +71,11 @@ def multi_class_head(n_classes, `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. - label_vocabulary: A list of strings represents possible label values. If it - is not given, that means labels are already encoded as integer within - [0, n_classes). If given, labels must be string type and have any value in - `label_vocabulary`. Also there will be errors if vocabulary is not - provided and labels are string. + label_vocabulary: A list or tuple of strings representing possible label + values. If it is not given, that means labels are already encoded as an + integer within [0, n_classes). If given, labels must be of string type and + have any value in `label_vocabulary`. Note that errors will be raised if + `label_vocabulary` is not provided but labels are strings. name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. Also used as `name_scope` when creating ops. @@ -84,7 +98,20 @@ def binary_classification_head( This head uses `sigmoid_cross_entropy_with_logits` loss. - This head expects to be fed float labels of shape `(batch_size, 1)`. + The head expects `logits` with shape `[D0, D1, ... DN, 1]`. + In many applications, the shape is `[batch_size, 1]`. + + `labels` must be a dense `Tensor` with shape matching `logits`, namely + `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string + `Tensor` with values from the vocabulary. If `label_vocabulary` is not given, + `labels` must be float `Tensor` with values in the interval `[0, 1]`. + + If `weight_column` is specified, weights must be of shape + `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`. + + The loss is the weighted sum over the input dimensions. Namely, if the input + labels have shape `[batch_size, 1]`, the loss is the weighted sum over + `batch_size`. Args: weight_column: A string or a `_NumericColumn` created by @@ -96,11 +123,11 @@ def binary_classification_head( generated for each threshold value. This threshold is applied to the logistic values to determine the binary classification (i.e., above the threshold is `true`, below is `false`. - label_vocabulary: A list of strings represents possible label values. If it - is not given, that means labels are already encoded within [0, 1]. If - given, labels must be string type and have any value in - `label_vocabulary`. Also there will be errors if vocabulary is not - provided and labels are string. + label_vocabulary: A list or tuple of strings representing possible label + values. If it is not given, labels must be float with values within + [0, 1]. If given, labels must be string type and have any value in + `label_vocabulary`. Note that errors will be raised if `label_vocabulary` + is not provided but labels are strings. name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. Also used as `name_scope` when creating ops. @@ -120,9 +147,22 @@ def binary_classification_head( def regression_head(weight_column=None, label_dimension=1, name=None): - """Creates a `_Head` for regression using the mean squared loss. + """Creates a `_Head` for regression using the `mean_squared_error` loss. + + The loss is the weighted sum over all input dimensions. Namely, if the input + labels have shape `[batch_size, label_dimension]`, the loss is the weighted + sum over both `batch_size` and `label_dimension`. + + The head expects `logits` with shape `[D0, D1, ... DN, label_dimension]`. + In many applications, the shape is `[batch_size, label_dimension]`. + + The `labels` shape must match `logits`, namely + `[D0, D1, ... DN, label_dimension]`. If `label_dimension=1`, shape + `[D0, D1, ... DN]` is also supported. - Uses `mean_squared_error` loss. + If `weight_column` is specified, weights must be of shape + `[D0, D1, ... DN]`, `[D0, D1, ... DN, 1]` or + `[D0, D1, ... DN, label_dimension]`. Args: weight_column: A string or a `_NumericColumn` created by @@ -156,15 +196,29 @@ def multi_label_head(n_classes, or more associated labels, from a discrete set. This is distinct from `multi_class_head` which has exactly one label per example. - Uses `sigmoid_cross_entropy` loss averaged over classes. Expects labels as a - multi-hot tensor of shape `[batch_size, n_classes]`, or as an integer - `SparseTensor` of class indices. + Uses `sigmoid_cross_entropy` loss average over classes and weighted sum over + the batch. Namely, if the input logits have shape `[batch_size, n_classes]`, + the loss is the average over `n_classes` and the weighted sum over + `batch_size`. + + The head expects `logits` with shape `[D0, D1, ... DN, n_classes]`. In many + applications, the shape is `[batch_size, label_n_classes]`. + + Labels can be: + * A multi-hot tensor of shape `[D0, D1, ... DN, n_classes]` + * An integer `SparseTensor` of class indices. The `dense_shape` must be + `[D0, D1, ... DN, ?]` and the values within `[0, n_classes)`. + * If `label_vocabulary` is given, a string `SparseTensor`. The `dense_shape` + must be `[D0, D1, ... DN, ?]` and the values within `label_vocabulary`. + + If `weight_column` is specified, weights must be of shape + `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`. Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or `(labels, logits, features)` as arguments and returns unreduced loss with - shape `[batch_size, 1]`. `loss_fn` must support indicator `labels` with shape - `[batch_size, n_classes]`. Namely, the head applies `label_vocabulary` to the - input labels before passing them to `loss_fn`. + shape `[D0, D1, ... DN, 1]`. `loss_fn` must support indicator `labels` with + shape `[D0, D1, ... DN, n_classes]`. Namely, the head applies + `label_vocabulary` to the input labels before passing them to `loss_fn`. Args: n_classes: Number of classes, must be greater than 1 (for 1 class, use @@ -191,7 +245,7 @@ def multi_label_head(n_classes, An instance of `_Head` for multi-label classification. Raises: - ValueError: if `n_classes` or `thresholds` is invalid. + ValueError: if `n_classes`, `thresholds`, or `loss_fn` is invalid. """ thresholds = tuple(thresholds) if thresholds else tuple() if n_classes is None or n_classes < 2: @@ -259,26 +313,36 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access indices=labels.indices, values=label_ids_values, dense_shape=labels.dense_shape) + return math_ops.to_int64( + sparse_ops.sparse_to_indicator(label_ids, self._n_classes)) else: - label_ids = labels - return math_ops.to_int64( - sparse_ops.sparse_to_indicator(label_ids, self._n_classes)) - msg = ('labels shape must be [batch_size, {}]. ' - 'Given: ').format(self._n_classes) - labels_shape = array_ops.shape(labels) - check_rank_op = control_flow_ops.Assert( - math_ops.equal(array_ops.rank(labels), 2), - data=[msg, labels_shape]) - check_label_dim = control_flow_ops.Assert( - math_ops.equal(labels_shape[-1], self._n_classes), - data=[msg, labels_shape]) - with ops.control_dependencies([check_rank_op, check_label_dim]): - return array_ops.identity(labels) + err_msg = ( + r'labels must be an integer SparseTensor with values in ' + r'[0, {})'.format(self._n_classes)) + assert_int = check_ops.assert_integer( + labels.values, message=err_msg) + assert_less = check_ops.assert_less( + labels.values, + ops.convert_to_tensor(self._n_classes, dtype=labels.dtype), + message=err_msg) + assert_greater = check_ops.assert_non_negative( + labels.values, message=err_msg) + with ops.control_dependencies( + [assert_int, assert_less, assert_greater]): + return math_ops.to_int64( + sparse_ops.sparse_to_indicator(labels, self._n_classes)) + err_msg = ( + r'labels must be an integer indicator Tensor with values in [0, 1]') + return head_lib._assert_range(labels, 2, message=err_msg) # pylint:disable=protected-access, def create_loss(self, features, mode, logits, labels): """See `Head`.""" del mode # Unused for this head. + logits = ops.convert_to_tensor(logits) processed_labels = self._process_labels(labels) + processed_labels = head_lib._check_dense_labels_match_logits_and_reshape( # pylint:disable=protected-access + labels=processed_labels, logits=logits, + expected_labels_dimension=self.logits_dimension) if self._loss_fn: unweighted_loss = _call_loss_fn( loss_fn=self._loss_fn, labels=processed_labels, logits=logits, @@ -290,7 +354,8 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access # Averages loss over classes. unweighted_loss = math_ops.reduce_mean( unweighted_loss, axis=-1, keep_dims=True) - weights = head_lib._weights(features, self._weight_column) # pylint:disable=protected-access, + weights = head_lib._get_weights_and_check_match_logits( # pylint:disable=protected-access, + features=features, weight_column=self._weight_column, logits=logits) weighted_sum_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) # _weights() can return 1. @@ -305,7 +370,7 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access self, features, mode, logits, labels=None, train_op_fn=None): """See `Head`.""" with ops.name_scope(self._name, 'head'): - logits = head_lib._check_logits(logits, self.logits_dimension) # pylint:disable=protected-access + logits = head_lib._check_logits_final_dim(logits, self.logits_dimension) # pylint:disable=protected-access # Predict. pred_keys = prediction_keys.PredictionKeys @@ -335,6 +400,8 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access # Eval. if mode == model_fn.ModeKeys.EVAL: + weights = head_lib._get_weights_and_check_match_logits( # pylint:disable=protected-access, + features=features, weight_column=self._weight_column, logits=logits) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.EVAL, predictions=predictions, @@ -342,7 +409,7 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access eval_metric_ops=self._eval_metric_ops( labels=processed_labels, probabilities=probabilities, - weights=head_lib._weights(features, self._weight_column), # pylint:disable=protected-access, + weights=weights, weighted_sum_loss=weighted_sum_loss, example_weight_sum=example_weight_sum)) diff --git a/tensorflow/contrib/estimator/python/estimator/head_test.py b/tensorflow/contrib/estimator/python/estimator/head_test.py index fd8c53f6a9..d1cf909004 100644 --- a/tensorflow/contrib/estimator/python/estimator/head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/head_test.py @@ -316,13 +316,14 @@ class MultiLabelHead(test.TestCase): _initialize_variables(self, monitored_session.Scaffold()) with self.assertRaisesRegexp( errors.InvalidArgumentError, - r'labels shape must be \[batch_size, 2\]\. Given: \] \[2 1\]'): + r'\[expected_labels_shape: \] \[2 2\] \[labels_shape: \] \[2 1\]'): actual_weighted_sum_loss.eval({ labels_placeholder: np.array([[1], [1]], dtype=np.int64) }) with self.assertRaisesRegexp( errors.InvalidArgumentError, - r'labels shape must be \[batch_size, 2\]\. Given: \] \[2\]'): + r'labels shape must be \[D0, D1, ... DN, 2\]\..*' + r'\[Received shape: \] \[2\]'): actual_weighted_sum_loss.eval({ labels_placeholder: np.array([1, 1], dtype=np.int64) }) @@ -387,9 +388,11 @@ class MultiLabelHead(test.TestCase): logits=np.array([[-10., 10.], [-15., 10.]], dtype=np.float32), labels=None) - def _test_eval(self, head, logits, labels, expected_loss, expected_metrics): + def _test_eval( + self, head, logits, labels, expected_loss, expected_metrics, + features=None): spec = head.create_estimator_spec( - features={'x': np.array(((42,),), dtype=np.int32)}, + features=features or {}, mode=model_fn.ModeKeys.EVAL, logits=logits, labels=labels) @@ -655,6 +658,54 @@ class MultiLabelHead(test.TestCase): labels=None, train_op_fn=_no_op_train_fn) + def test_train_invalid_indicator_labels(self): + head = head_lib.multi_label_head(n_classes=2) + logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) + # The value 2 is outside the allowed range. + labels = np.array([[2, 0], [1, 1]], dtype=np.int64) + def _train_op_fn(loss): + del loss + return control_flow_ops.no_op() + + spec = head.create_estimator_spec( + features={}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + train_op_fn=_train_op_fn) + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r'labels must be an integer indicator Tensor with values in ' + r'\[0, 1\]'): + sess.run(spec.loss) + + def test_train_invalid_sparse_labels(self): + head = head_lib.multi_label_head(n_classes=2) + logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) + # The value 2 is outside the allowed range. + labels = sparse_tensor.SparseTensor( + values=[2, 0, 1], + indices=[[0, 0], [1, 0], [1, 1]], + dense_shape=[2, 2]) + def _train_op_fn(loss): + del loss + return control_flow_ops.no_op() + + spec = head.create_estimator_spec( + features={}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + train_op_fn=_train_op_fn) + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r'labels must be an integer SparseTensor with values in \[0, 2\)'): + sess.run(spec.loss) + def _test_train(self, head, logits, labels, expected_loss): expected_train_result = 'my_train_op' def _train_op_fn(loss): @@ -791,6 +842,153 @@ class MultiLabelHead(test.TestCase): metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 3, }, summary_str, tol) + def test_multi_dim_weighted_train_create_loss(self): + """Logits and labels of shape [2, 2, 3], weights [2, 2].""" + head = head_lib.multi_label_head(n_classes=3, weight_column='weights') + + logits = np.array([[[-10., 10., -10.], [10., -10., 10.]], + [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32) + labels = np.array([[[1, 0, 0], [1, 0, 0]], + [[0, 1, 1], [0, 1, 1]]], dtype=np.int64) + weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) + # loss = [[10 + 10 + 0, 0 + 0 + 10], [0 + 0 + 12, 12 + 12 + 0]] / 3 + # = [[20/3, 10/3], [4, 8]] + # weighted_sum_loss = 1*20/3 + 1.5*10/3 + 2*4 + 2.5*8 = 39.6667 + expected_weighted_sum_loss = 39.6667 + expected_example_weight_sum = np.sum(weights) + actual_weighted_sum_loss, actual_example_weight_sum, _ = head.create_loss( + features={'weights': weights}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels) + atol = 1.e-3 + with self.test_session(): + _initialize_variables(self, monitored_session.Scaffold()) + self.assertAllClose( + expected_weighted_sum_loss, actual_weighted_sum_loss.eval(), + atol=atol) + self.assertAllClose( + expected_example_weight_sum, actual_example_weight_sum.eval(), + atol=atol) + + def test_multi_dim_weighted_train(self): + """Logits and labels of shape [2, 2, 3], weights [2, 2].""" + head = head_lib.multi_label_head(n_classes=3, weight_column='weights') + + logits = np.array([[[-10., 10., -10.], [10., -10., 10.]], + [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32) + labels = np.array([[[1, 0, 0], [1, 0, 0]], + [[0, 1, 1], [0, 1, 1]]], dtype=np.int64) + weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) + # loss = [[10 + 10 + 0, 0 + 0 + 10], [0 + 0 + 12, 12 + 12 + 0]] / 3 + # = [[20/3, 10/3], [4, 8]] + # weighted_sum_loss = 1*20/3 + 1.5*10/3 + 2*4 + 2.5*8 = 39.6667 + expected_loss = 39.6667 + expected_train_result = 'my_train_op' + def _train_op_fn(loss): + return string_ops.string_join( + [constant_op.constant(expected_train_result), + string_ops.as_string(loss, precision=3)]) + + spec = head.create_estimator_spec( + features={'weights': weights}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + train_op_fn=_train_op_fn) + + atol = 1.e-3 + with self.test_session() as sess: + _initialize_variables(self, monitored_session.Scaffold()) + loss, train_result = sess.run((spec.loss, spec.train_op)) + self.assertAllClose(expected_loss, loss, atol=atol) + self.assertEqual( + six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), + train_result) + + def test_multi_dim_weights_wrong_inner_dim(self): + """Logits and labels of shape [2, 2, 3], weights [2, 1].""" + head = head_lib.multi_label_head(n_classes=3, weight_column='weights') + + logits = np.array([[[-10., 10., -10.], [10., -10., 10.]], + [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32) + labels = np.array([[[1, 0, 0], [1, 0, 0]], + [[0, 1, 1], [0, 1, 1]]], dtype=np.int64) + weights = np.array([[1.], [2.]], dtype=np.float32) + def _train_op_fn(loss): + del loss + return control_flow_ops.no_op() + + spec = head.create_estimator_spec( + features={'weights': weights}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + train_op_fn=_train_op_fn) + with self.test_session(): + _initialize_variables(self, monitored_session.Scaffold()) + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r'\[logits_shape: \] \[2 2 3\] \[weights_shape: \] \[2 1\]'): + spec.loss.eval() + + def test_multi_dim_weights_wrong_outer_dim(self): + """Logits and labels of shape [2, 2, 3], weights [2, 2, 3].""" + head = head_lib.multi_label_head(n_classes=3, weight_column='weights') + + logits = np.array([[[-10., 10., -10.], [10., -10., 10.]], + [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32) + labels = np.array([[[1, 0, 0], [1, 0, 0]], + [[0, 1, 1], [0, 1, 1]]], dtype=np.int64) + weights = np.array([[[1., 1., 1.], [1.5, 1.5, 1.5]], + [[2., 2., 2.], [2.5, 2.5, 2.5]]], dtype=np.float32) + weights_placeholder = array_ops.placeholder(dtype=dtypes.float32) + def _train_op_fn(loss): + del loss + return control_flow_ops.no_op() + + spec = head.create_estimator_spec( + features={'weights': weights_placeholder}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + train_op_fn=_train_op_fn) + with self.test_session(): + _initialize_variables(self, monitored_session.Scaffold()) + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r'\[logits_shape: \] \[2 2 3\] \[weights_shape: \] \[2 2 3\]'): + spec.loss.eval({weights_placeholder: weights}) + + def test_multi_dim_weighted_eval(self): + """Logits and labels of shape [2, 2, 3], weights [2, 2].""" + head = head_lib.multi_label_head(n_classes=3, weight_column='weights') + + logits = np.array([[[-10., 10., -10.], [10., -10., 10.]], + [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32) + labels = np.array([[[1, 0, 0], [1, 0, 0]], + [[0, 1, 1], [0, 1, 1]]], dtype=np.int64) + weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) + # loss = [[10 + 10 + 0, 0 + 0 + 10], [0 + 0 + 12, 12 + 12 + 0]] / 3 + # = [[20/3, 10/3], [4, 8]] + # weighted_sum_loss = 1*20/3 + 1.5*10/3 + 2*4 + 2.5*8 = 39.6667 + expected_loss = 39.6667 + keys = metric_keys.MetricKeys + expected_metrics = { + keys.LOSS_MEAN: expected_loss / np.sum(weights), + # auc and auc_pr cannot be reliably calculated for only 4 samples, but + # this assert tests that the algorithm remains consistent. + keys.AUC: 0.4977, + keys.AUC_PR: 0.6645, + } + self._test_eval( + head=head, + features={'weights': weights}, + logits=logits, + labels=labels, + expected_loss=expected_loss, + expected_metrics=expected_metrics) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py index 2c3e18cb12..eaed412c8b 100644 --- a/tensorflow/python/estimator/canned/head.py +++ b/tensorflow/python/estimator/canned/head.py @@ -264,26 +264,55 @@ def _check_dense_labels_match_logits_and_reshape( return array_ops.identity(labels, name=scope) -def _check_weights_match_logits_and_reshape(weights, logits): - """Checks that weights shape matches logits and reshapes if needed. +def _get_weights_and_check_match_logits( + features, weight_column, logits, allow_per_logit_weights=False): + """Fetches weights from features and checks that the shape matches logits. Consider logits of shape [D0, D1, ... DN, logits_dimension]. Weights shape can be either: - * [D0, D1, ... DN, logits_dimension] + * [D0, D1, ... DN, logits_dimension] if `allow_per_logit_weights=True`. * [D0, D1, ... DN, 1] * [D0, D1, ... DN]: In this case, weights is reshaped into [D0, D1, ... DN, 1] to work with weight broadcasting rules. Args: - weights: weights Tensor. + features: The features dict that contains weights. + weight_column: The weight column. If not given, this method returns 1. logits: logits Tensor. + allow_per_logit_weights: Boolean. Whether we allow weights along the logits + dimension, namely shape `[D0, D1, ... DN, logits_dimension]`. Returns: Validated and reshaped weights Tensor. + Raises: + ValueError: If the weights `Tensor` cannot be cast into float. """ - err_msg = ( - 'weights shape must be [D0, D1, ... DN], [D0, D1, ... DN, 1] or ' - '[D0, D1, ... DN, logits_dimension]') - with ops.name_scope(None, 'weights', (weights, logits)) as scope: + if allow_per_logit_weights: + err_msg = ( + 'weights shape must be [D0, D1, ... DN], [D0, D1, ... DN, 1] or ' + '[D0, D1, ... DN, logits_dimension]') + else: + err_msg = ( + 'weights shape must be [D0, D1, ... DN] or [D0, D1, ... DN, 1]') + with ops.name_scope( + None, 'weights', + values=tuple(six.itervalues(features)) + (logits,)) as scope: + # Fetch the weights. + if weight_column is None: + return 1. + if isinstance(weight_column, six.string_types): + weight_column = feature_column_lib.numeric_column( + key=weight_column, shape=(1,)) + if not isinstance(weight_column, feature_column_lib._NumericColumn): # pylint: disable=protected-access + raise TypeError('Weight column must be either a string or _NumericColumn.' + ' Given type: {}.'.format(type(weight_column))) + weights = weight_column._get_dense_tensor( # pylint: disable=protected-access + feature_column_lib._LazyBuilder(features)) # pylint: disable=protected-access + if not (weights.dtype.is_floating or weights.dtype.is_integer): + raise ValueError('Weight column should be castable to float. ' + 'Given dtype: {}'.format(weights.dtype)) + weights = math_ops.to_float(weights, name='weights') + + # Validate the weights shape. weights_shape = array_ops.shape(weights, name='weights_shape') logits_shape = array_ops.shape(logits, name='logits_shape') if (weights.shape.ndims is not None and logits.shape.ndims is not None and @@ -295,42 +324,24 @@ def _check_weights_match_logits_and_reshape(weights, logits): with ops.control_dependencies([assert_dimension]): return array_ops.expand_dims(weights, -1, name=scope) supported_weights_shape = array_ops.concat([logits_shape[:-1], [1]], axis=0) - condition = math_ops.reduce_any( - [math_ops.reduce_all(math_ops.equal(logits_shape, weights_shape)), - math_ops.reduce_all(math_ops.equal( - supported_weights_shape, weights_shape))]) - assert_dimension = control_flow_ops.Assert( - condition=condition, - data=[err_msg, 'logits_shape: ', logits_shape, - 'weights_shape: ', weights_shape]) + if allow_per_logit_weights: + condition = math_ops.reduce_any( + [math_ops.reduce_all(math_ops.equal(logits_shape, weights_shape)), + math_ops.reduce_all(math_ops.equal( + supported_weights_shape, weights_shape))]) + assert_dimension = control_flow_ops.Assert( + condition=condition, + data=[err_msg, 'logits_shape: ', logits_shape, + 'weights_shape: ', weights_shape]) + else: + assert_dimension = check_ops.assert_equal( + supported_weights_shape, weights_shape, message=err_msg, + data=['logits_shape: ', logits_shape, + 'weights_shape: ', weights_shape]) with ops.control_dependencies([assert_dimension]): return array_ops.identity(weights, name=scope) -# TODO(roumposg): Delete once all heads support multi-dim input. -def _check_logits(logits, expected_logits_dimension): - """Check logits type and shape.""" - with ops.name_scope(None, 'logits', (logits,)) as scope: - logits = math_ops.to_float(logits) - logits_shape = array_ops.shape(logits) - assert_rank = check_ops.assert_rank( - logits, 2, data=[logits_shape], - message='logits shape must be [batch_size, logits_dimension]') - with ops.control_dependencies([assert_rank]): - static_shape = logits.shape - if static_shape is not None: - dim1 = static_shape[1] - if (dim1 is not None) and (dim1 != expected_logits_dimension): - raise ValueError( - 'logits shape must be [batch_size, logits_dimension], got %s.' % - (static_shape,)) - assert_dimension = check_ops.assert_equal( - expected_logits_dimension, logits_shape[1], data=[logits_shape], - message='logits shape must be [batch_size, logits_dimension]') - with ops.control_dependencies([assert_dimension]): - return array_ops.identity(logits, name=scope) - - def _check_logits_final_dim(logits, expected_logits_dimension): """Checks that logits shape is [D0, D1, ... DN, logits_dimension].""" with ops.name_scope(None, 'logits', (logits,)) as scope: @@ -575,10 +586,8 @@ class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head): labels=label_ids, logits=logits, reduction=losses.Reduction.NONE) # Restore the squeezed dim, so unweighted_loss matches the weights shape. unweighted_loss = array_ops.expand_dims(unweighted_loss, axis=-1) - weights = _weights(features, self._weight_column) - if self._weight_column is not None: - weights = _check_weights_match_logits_and_reshape( - weights=weights, logits=logits) + weights = _get_weights_and_check_match_logits( + features=features, weight_column=self._weight_column, logits=logits) weighted_sum_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) # _weights() can return 1. @@ -680,7 +689,7 @@ class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head): def _binary_logistic_head_with_sigmoid_cross_entropy_loss( weight_column=None, thresholds=None, label_vocabulary=None, name=None): - """Creates a `Head` for single label binary classification. + """Creates a `_Head` for single label binary classification. This head uses `sigmoid_cross_entropy_with_logits` loss. @@ -718,7 +727,7 @@ def _binary_logistic_head_with_sigmoid_cross_entropy_loss( suffixed by `"/" + name`. Also used as `name_scope` when creating ops. Returns: - An instance of `Head` for binary classification. + An instance of `_Head` for binary classification. Raises: ValueError: if `thresholds` contains a value outside of `(0, 1)`. @@ -852,10 +861,8 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head): labels = _assert_range(labels, 2) unweighted_loss = nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=logits) - weights = _weights(features, self._weight_column) - if self._weight_column is not None: - weights = _check_weights_match_logits_and_reshape( - weights=weights, logits=logits) + weights = _get_weights_and_check_match_logits( + features=features, weight_column=self._weight_column, logits=logits) weighted_sum_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) # _weights() can return 1. @@ -918,12 +925,8 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head): # Eval. if mode == model_fn.ModeKeys.EVAL: - weights = _weights(features, self._weight_column) - # TODO(roumposg): Merge this logic inside _weights once all heads - # support multi-dimensional inputs. - if self._weight_column is not None: - weights = _check_weights_match_logits_and_reshape( - weights=weights, logits=logits) + weights = _get_weights_and_check_match_logits( + features=features, weight_column=self._weight_column, logits=logits) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.EVAL, predictions=predictions, @@ -957,7 +960,7 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head): def _regression_head_with_mean_squared_error_loss(weight_column=None, label_dimension=1, name=None): - """Creates a `_Head` for regression using the mean squared loss. + """Creates a `_Head` for regression using the `mean_squared_error` loss. The loss is the weighted sum over all input dimensions. Namely, if the input labels have shape `[batch_size, label_dimension]`, the loss is the weighted @@ -1023,10 +1026,9 @@ class _RegressionHeadWithMeanSquaredErrorLoss(_Head): labels = math_ops.to_float(labels) unweighted_loss = losses.mean_squared_error( labels=labels, predictions=logits, reduction=losses.Reduction.NONE) - weights = _weights(features, self._weight_column) - if self._weight_column is not None: - weights = _check_weights_match_logits_and_reshape( - weights=weights, logits=logits) + weights = _get_weights_and_check_match_logits( + features=features, weight_column=self._weight_column, logits=logits, + allow_per_logit_weights=True) weighted_sum_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) # _weights() can return 1. @@ -1111,18 +1113,19 @@ class _RegressionHeadWithMeanSquaredErrorLoss(_Head): train_op=train_op_fn(weighted_sum_loss)) -def _assert_range(labels, n_classes): +def _assert_range(labels, n_classes, message=None): with ops.name_scope(None, 'assert_range', (labels,)): assert_less = check_ops.assert_less( labels, ops.convert_to_tensor(n_classes, dtype=labels.dtype), - message='Label IDs must < n_classes') + message=message or 'Label IDs must < n_classes') assert_greater = check_ops.assert_non_negative( - labels, message='Label IDs must >= 0') + labels, message=message or 'Label IDs must >= 0') with ops.control_dependencies((assert_less, assert_greater)): return array_ops.identity(labels) +# TODO(b/69000400): Delete this method. def _weights(features, weight_column): """Fetches weights from features.""" with ops.name_scope(None, 'weights', values=features.values()): diff --git a/tensorflow/python/estimator/canned/head_test.py b/tensorflow/python/estimator/canned/head_test.py index 0a4ea7d81c..4497cd26f2 100644 --- a/tensorflow/python/estimator/canned/head_test.py +++ b/tensorflow/python/estimator/canned/head_test.py @@ -987,12 +987,14 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase): spec.loss.eval() def test_multi_dim_train_weights_wrong_outer_dim(self): - """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2, 2].""" + """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2, 3].""" head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, weight_column='weights') logits = np.array([[[10, 0, 0], [12, 0, 0]], [[0, 10, 0], [0, 15, 0]]], dtype=np.float32) labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64) + weights = np.array([[[1., 1.1, 1.2], [1.5, 1.6, 1.7]], + [[2., 2.1, 2.2], [2.5, 2.6, 2.7]]]) weights_placeholder = array_ops.placeholder(dtype=dtypes.float32) def _no_op_train_fn(loss): del loss @@ -1008,10 +1010,8 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase): _initialize_variables(self, monitored_session.Scaffold()) with self.assertRaisesRegexp( errors.InvalidArgumentError, - r'\[logits_shape: \]\s\[2 2 3\]\s\[weights_shape: \]\s\[2 2 2\]'): - spec.loss.eval({ - weights_placeholder: np.array([[[1., 1.1], [1.5, 1.6]], - [[2., 2.1], [2.5, 2.6]]])}) + r'\[logits_shape: \]\s\[2 2 3\]\s\[weights_shape: \]\s\[2 2 3\]'): + spec.loss.eval({weights_placeholder: weights}) def test_multi_dim_weighted_eval(self): """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2].""" -- GitLab From c4a2562dfcd8dd61f4d2c4ce88f3b72eeb888a5a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 10:35:13 -0800 Subject: [PATCH 0099/1801] Allow a key type without a constructor that takes an int in Squawd. PiperOrigin-RevId: 175178089 --- .../lib/quantiles/weighted_quantiles_buffer.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_buffer.h b/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_buffer.h index 5e316538ce..70037d5bd8 100644 --- a/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_buffer.h +++ b/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_buffer.h @@ -33,9 +33,9 @@ template Date: Thu, 9 Nov 2017 12:01:53 -0800 Subject: [PATCH 0100/1801] Fix typo in tensorflow/python/layers/base_test.py COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/tensorflow/pull/14412 from yifeif:yifeif-patch-3 4b91380c6fc1f995d48a5f184e7307f776541bd0 PiperOrigin-RevId: 175192097 --- tensorflow/python/estimator/BUILD | 2 -- tensorflow/python/layers/base_test.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index dba7761700..03f386e9cf 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -245,8 +245,6 @@ py_test( "//tensorflow/python:variable_scope", "//tensorflow/python:variables", "//tensorflow/python/feature_column", - "//third_party/py/numpy", - "//third_party/py/pandas", "@six_archive//:six", ], ) diff --git a/tensorflow/python/layers/base_test.py b/tensorflow/python/layers/base_test.py index 7ddfe37827..509ad5a7af 100644 --- a/tensorflow/python/layers/base_test.py +++ b/tensorflow/python/layers/base_test.py @@ -47,7 +47,7 @@ class BaseLayerTest(test.TestCase): self.assertEqual(layer.trainable_variables, []) self.assertEqual(layer.non_trainable_variables, []) if context.in_graph_mode(): - # updates, losses only suppported in GRAPH mode + # updates, losses only supported in GRAPH mode self.assertEqual(layer.updates, []) self.assertEqual(layer.losses, []) self.assertEqual(layer.built, False) -- GitLab From 534c6176f6b8704f0944ad17cc3fba5ff26784ed Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Thu, 9 Nov 2017 12:22:01 -0800 Subject: [PATCH 0101/1801] `replicate_model_fn` supports aggregating gradients in IndexedSlices. `gradients.gradients` may return computed gradients in IndexedSlices as opposed to a Tensor: https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/python/ops/gradients_impl.py#L881. `replicate_model_fn` currently uses math_ops.add_n to aggregate gradients from all towers. It doesn't work with IndexedSlices and thus needs to be handled separately. PiperOrigin-RevId: 175194893 --- .../python/estimator/replicate_model_fn.py | 25 +++++- .../estimator/replicate_model_fn_test.py | 87 +++++++++++++++++-- 2 files changed, 104 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py index 7005a647db..421bf18c45 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py @@ -34,10 +34,12 @@ from tensorflow.python.estimator import util from tensorflow.python.estimator.export import export_output as export_output_lib from tensorflow.python.framework import device as framework_device from tensorflow.python.framework import ops as ops_lib +from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gradients as gradients_lib from tensorflow.python.ops import math_ops +from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib @@ -183,10 +185,17 @@ def _split_batch(features, labels, number_of_shards, device): """Split input features and labes into batches.""" def split_dictionary(dictionary): + """Split a dictionary into shards.""" shards = [{} for _ in range(number_of_shards)] for name, tensor in six.iteritems(dictionary): - for i, shard in enumerate(array_ops.split(tensor, number_of_shards)): - shards[i][name] = shard + if isinstance(tensor, sparse_tensor.SparseTensor): + for i, shard in enumerate( + sparse_ops.sparse_split( + sp_input=tensor, num_split=number_of_shards, axis=0)): + shards[i][name] = shard + else: + for i, shard in enumerate(array_ops.split(tensor, number_of_shards)): + shards[i][name] = shard return shards with ops_lib.name_scope('split_inputs'): @@ -313,7 +322,17 @@ def _call_optimizer_fn(optimizer_fn, params): def _compute_sum_on_device(values, device, name=None): with ops_lib.device(device): - return math_ops.add_n(values, name=name) + if isinstance(values[0], ops_lib.IndexedSlices): + if name: + raise ValueError('The name {} is not expected to be given to ' + 'IndexedSlices {}'.format(name, values)) + + values_concat = array_ops.concat([v.values for v in values], axis=0) + indices_concat = array_ops.concat([v.indices for v in values], axis=0) + return ops_lib.IndexedSlices(values_concat, indices_concat, + values[0].dense_shape) + else: + return math_ops.add_n(values, name=name) def _train_spec(tower_specs, diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py index ce286c33b0..c90169af8c 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py @@ -65,20 +65,35 @@ class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase): data = np.linspace( 0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) x_data = data.reshape(batch_size, input_dimension) + categorical_data = np.random.random_integers( + 0, len(x_data), size=len(x_data)) y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1)) train_input_fn = numpy_io.numpy_input_fn( - x={'x': x_data}, + x={'x': x_data, + 'categories': categorical_data}, y=y_data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( - x={'x': x_data}, y=y_data, batch_size=batch_size, shuffle=False) + x={'x': x_data, + 'categories': categorical_data}, + y=y_data, + batch_size=batch_size, + shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( - x={'x': x_data}, batch_size=batch_size, shuffle=False) + x={'x': x_data, + 'categories': categorical_data}, + batch_size=batch_size, + shuffle=False) feature_columns = [ - feature_column.numeric_column('x', shape=(input_dimension,)) + feature_column.numeric_column('x', shape=(input_dimension,)), + feature_column.indicator_column( + feature_column.categorical_column_with_vocabulary_list( + 'categories', + vocabulary_list=np.linspace( + 0., len(x_data), len(x_data), dtype=np.int64))) ] estimator = dnn.DNNClassifier( @@ -858,7 +873,7 @@ class LocalDeviceSetterTest(test_util.TensorFlowTestCase): class ComputeSumWithDevicePlacementTest(test_util.TensorFlowTestCase): - def test_example(self): + def test_vectors(self): with self.test_session() as session: total = replicate_model_fn._compute_sum_on_device( [1.0, 2.0, 3.0, 4.0], device='/device:GPU:0', name='test_sum') @@ -867,6 +882,68 @@ class ComputeSumWithDevicePlacementTest(test_util.TensorFlowTestCase): self.assertEqual('test_sum', total.op.name) self.assertEqual(10.0, session.run(total)) + def test_tensors(self): + with self.test_session() as session: + total = replicate_model_fn._compute_sum_on_device( + [[1.0, 2.0], [3.0, 4.0]], device='/device:GPU:0', name='test_sum') + + self.assertEqual('/device:GPU:0', total.device) + self.assertEqual('test_sum', total.op.name) + self.assertAllEqual([4.0, 6.0], session.run(total)) + + def test_indexedslices(self): + with self.test_session() as session: + a = ops_lib.IndexedSlices( + constant_op.constant([1.0, 2.0]), [0, 1], + dense_shape=constant_op.constant([2])) + b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1]) + + total = replicate_model_fn._compute_sum_on_device( + [a, b], device='/device:GPU:0') + + self.assertEqual('/device:GPU:0', total.device) + self.assertAllEqual([4.0, 6.0], + session.run(ops_lib.convert_to_tensor(total))) + + def test_indexedslices_higher_dimensions(self): + with self.test_session() as session: + a = ops_lib.IndexedSlices( + constant_op.constant([[1.0, 5.0], [2.0, 6.0]]), [0, 1], + dense_shape=constant_op.constant([2, 4])) + b = ops_lib.IndexedSlices( + constant_op.constant([[3.0, 7.0], [4.0, 8.0]]), [0, 1]) + + total = replicate_model_fn._compute_sum_on_device( + [a, b], device='/device:GPU:0') + + self.assertEqual('/device:GPU:0', total.device) + self.assertAllEqual([[4.0, 12.0], [6.0, 14.0]], + session.run(ops_lib.convert_to_tensor(total))) + + def test_indexedslices_some_dont_overlap(self): + with self.test_session() as session: + a = ops_lib.IndexedSlices( + constant_op.constant([1.0, 2.0]), [0, 3], + dense_shape=constant_op.constant([4])) + b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1]) + + total = replicate_model_fn._compute_sum_on_device( + [a, b], device='/device:GPU:0') + + self.assertEqual('/device:GPU:0', total.device) + self.assertAllEqual([4.0, 4.0, 0.0, 2.0], + session.run(ops_lib.convert_to_tensor(total))) + + def test_no_name_for_indexslices(self): + a = ops_lib.IndexedSlices( + constant_op.constant([1.0, 2.0]), [0, 1], + dense_shape=constant_op.constant([2])) + b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1]) + + with self.assertRaisesRegexp(ValueError, ''): + _ = replicate_model_fn._compute_sum_on_device( + [a, b], device='/device:GPU:0', name='cant_name_indexslices') + class ConcatTensorDictsTest(test_util.TensorFlowTestCase): -- GitLab From c51b3c301d60697bb498d19ea5068ddfb2525f95 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Thu, 9 Nov 2017 12:24:28 -0800 Subject: [PATCH 0102/1801] Fix cmake build. PiperOrigin-RevId: 175195239 --- tensorflow/contrib/cmake/tf_c.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/contrib/cmake/tf_c.cmake b/tensorflow/contrib/cmake/tf_c.cmake index f3882e8cf7..3ae28b7601 100644 --- a/tensorflow/contrib/cmake/tf_c.cmake +++ b/tensorflow/contrib/cmake/tf_c.cmake @@ -21,7 +21,6 @@ set(tf_c_srcs "${tensorflow_source_dir}/tensorflow/c/c_api_function.cc" "${tensorflow_source_dir}/tensorflow/c/eager/c_api.cc" "${tensorflow_source_dir}/tensorflow/c/eager/c_api.h" - "${tensorflow_source_dir}/tensorflow/c/eager/tape.cc" "${tensorflow_source_dir}/tensorflow/c/eager/tape.h" "${tensorflow_source_dir}/tensorflow/c/eager/runtime.cc" "${tensorflow_source_dir}/tensorflow/c/eager/runtime.h" -- GitLab From e384cf3822a95fad8a83d8b5e364321244a2c6dd Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Nov 2017 12:48:14 -0800 Subject: [PATCH 0103/1801] Use error status instead of assertions to ensure shape consistency PiperOrigin-RevId: 175198248 --- .../core/grappler/costs/graph_properties.cc | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index 151455778a..35048a4fcf 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -108,32 +108,35 @@ struct Processor { if (dim1 >= 0 && dim2 >= 0) { CHECK_EQ(dim1, dim2); - RefineDim(dim1, result); + return RefineDim(dim1, result); } else if (dim1 >= 0 && dim2 < 0) { - RefineDim(dim1, result); + return RefineDim(dim1, result); } else if (dim1 < 0 && dim2 >= 0) { - RefineDim(dim2, result); + return RefineDim(dim2, result); } else if (dim1 < -1) { - RefineDim(dim1, result); + return RefineDim(dim1, result); } else if (dim2 < -1) { - RefineDim(dim2, result); + return RefineDim(dim2, result); } else { CHECK_EQ(dim1, dim2); CHECK_EQ(-1, dim1); - RefineDim(-1, result); + return RefineDim(-1, result); } return Status::OK(); } private: - void RefineDim(int64 dim, int64* result) { + Status RefineDim(int64 dim, int64* result) { if (*result >= 0) { - CHECK(*result == dim || dim < 0); + if (!(*result == dim || dim < 0)) { + return errors::InvalidArgument("Inconsistent dimensions detected"); + } } else if (dim >= 0) { *result = dim; } else if (dim < *result) { *result = dim; } + return Status::OK(); } int64 counter = 2; -- GitLab From c693b3130fabde91b09c160f36f3ac1eed6311f6 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Thu, 9 Nov 2017 12:54:10 -0800 Subject: [PATCH 0104/1801] Disable tensorflow/contrib/data/python/kernel_tests:prefetching_ops_test. Flaky in open source build. PiperOrigin-RevId: 175199083 --- tensorflow/contrib/data/python/kernel_tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index c1f1d90c5d..d811683ecd 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -448,6 +448,7 @@ py_test( size = "small", srcs = ["prefetching_ops_test.py"], srcs_version = "PY2AND3", + tags = ["no_oss"], # b/68785503 deps = [ "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/contrib/data/python/ops:prefetching_py", -- GitLab From 10a2b450d26eca33b880fdc4887946d60064ef50 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 13:02:38 -0800 Subject: [PATCH 0105/1801] Add per-host input for multi-host setup. PiperOrigin-RevId: 175200199 --- .../contrib/tpu/python/tpu/tpu_config.py | 5 +- .../contrib/tpu/python/tpu/tpu_estimator.py | 111 ++++++++++-------- 2 files changed, 61 insertions(+), 55 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config.py b/tensorflow/contrib/tpu/python/tpu/tpu_config.py index 097acd5ee7..916b9b3082 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_config.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_config.py @@ -45,10 +45,7 @@ class TPUConfig( is invoked once on each host. To be precise, with a global batch size `train_batch_size` in `TPUEstimator` constructor, the batch size for each shard is `train_batch_size` // #hosts. With Per-Core input pipeline - deployment, the shard batch size is `train_batch_size` // #cores. Note - that this only works for single-host TPU training now (tracked in - b/67051042). For multi-host, please use Per-Core, i.e., `False` for - `per_host_input_for_training`. + deployment, the shard batch size is `train_batch_size` // #cores. tpu_job_name: The name of the TPU job. Typically, this name is auto-inferred within TPUEstimator, however when using ClusterSpec propagation in more esoteric cluster configurations, you may need to specify the job name as a diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 16d712af9e..07877fcc76 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -232,8 +232,10 @@ class _TPUContext(object): mode == model_fn_lib.ModeKeys.TRAIN else self._eval_batch_size) # On TPU - return (global_batch_size // self.num_cores - if self.is_input_sharded_per_core() else global_batch_size) + if self.is_input_sharded_per_core(): + return global_batch_size // self.num_cores + else: + return global_batch_size // self.num_hosts @property def batch_size_for_model_fn(self): @@ -682,6 +684,40 @@ def generate_per_core_enqueue_ops_fn_for_host( return enqueue_ops_fn, (lambda: infeed_queue_holder['instance']) +def generate_per_host_enqueue_ops_fn_for_host( + ctx, input_fn, inputs_structure_recorder, batch_axis, device): + """Generates infeed enqueue ops for per-host input_fn on a single host.""" + infeed_queue_holder = {'instance': None} + + def enqueue_ops_fn(): + with ops.device(device): + num_cores_per_host = ctx.num_of_cores_per_host + inputs = input_fn() + if isinstance(inputs, tuple): + features, labels = inputs + else: + features, labels = inputs, None + inputs_structure_recorder.validate_and_record_structure( + features, labels) + unsharded_tensor_list = ( + inputs_structure_recorder.flatten_features_and_labels( + features, labels)) + + infeed_queue = tpu_feed.InfeedQueue( + tuple_types=[t.dtype for t in unsharded_tensor_list], + tuple_shapes=[t.shape for t in unsharded_tensor_list], + shard_dimensions=batch_axis) + infeed_queue_holder['instance'] = infeed_queue + infeed_queue.set_number_of_shards(num_cores_per_host) + + per_host_enqueue_ops = ( + infeed_queue.split_inputs_and_generate_enqueue_ops( + unsharded_tensor_list, + placement_function=lambda x: device)) + return per_host_enqueue_ops + return enqueue_ops_fn, (lambda: infeed_queue_holder['instance']) + + class _InputPipeline(object): """`_InputPipeline` handles invoking `input_fn` and piping to infeed queue. @@ -856,15 +892,15 @@ class _InputPipeline(object): return (enqueue_ops, dequeue_fn) def _invoke_input_fn_and_record_structure(self): + """Deploys the input pipeline and record input structure.""" + enqueue_ops = [] + infeed_queues = [] + num_hosts = self._ctx.num_hosts + tpu_host_placement_fn = self._ctx.tpu_host_placement_function if self._sharded_per_core: # Per-Core input pipeline deployment. - tpu_host_placement_fn = self._ctx.tpu_host_placement_function - enqueue_ops = [] - infeed_queues = [] - # Invoke input pipeline for each core and placed on the corresponding # host. - num_hosts = self._ctx.num_hosts for host_id in range(num_hosts): host_device = tpu_host_placement_fn(host_id=host_id) with ops.device(host_device): @@ -881,48 +917,27 @@ class _InputPipeline(object): # Infeed_queue_getter must be called after enqueue_ops_fn is called. infeed_queues.append(infeed_queue_getter()) - # infeed_queue is used to generate dequeue ops. The only thing it uses for - # dequeue is dtypes and types. So, any one can be used. Here, grab the - # first one. - self._infeed_queue = infeed_queues[0] - return enqueue_ops - else: - # TODO(b/67051042): Extend this to multi-host support. - host_id = 0 - host_device = self._ctx.tpu_host_placement_function(host_id=host_id) - def enqueue_fn(): + for host_id in range(num_hosts): + host_device = tpu_host_placement_fn(host_id=host_id) with ops.device(host_device): with ops.name_scope('input_pipeline_task%d' % (host_id)): - inputs = self._input_fn() - if isinstance(inputs, tuple): - features, labels = inputs - else: - features, labels = inputs, None - self._inputs_structure_recorder.validate_and_record_structure( - features, labels) - unsharded_tensor_list = ( - self._inputs_structure_recorder.flatten_features_and_labels( - features, labels)) - - self._infeed_queue = tpu_feed.InfeedQueue( - tuple_types=[t.dtype for t in unsharded_tensor_list], - tuple_shapes=[t.shape for t in unsharded_tensor_list], - shard_dimensions=self._batch_axis) - self._infeed_queue.set_number_of_shards(self._ctx.num_cores) - - def placement_fn(core_id): - return self._ctx.tpu_host_placement_function(core_id=core_id) - return ( - self._infeed_queue.split_inputs_and_generate_enqueue_ops( - unsharded_tensor_list, - placement_function=placement_fn)) + enqueue_ops_fn, infeed_queue_getter = ( + generate_per_host_enqueue_ops_fn_for_host( + self._ctx, self._input_fn, self._inputs_structure_recorder, + self._batch_axis, host_device)) - if _WRAP_INPUT_FN_INTO_WHILE_LOOP: - return _wrap_computation_in_while_loop(device=host_device, - op_fn=enqueue_fn) - else: - return enqueue_fn() + if _WRAP_INPUT_FN_INTO_WHILE_LOOP: + enqueue_ops.append(_wrap_computation_in_while_loop( + device=host_device, op_fn=enqueue_ops_fn)) + else: + enqueue_ops.append(enqueue_ops_fn()) + infeed_queues.append(infeed_queue_getter()) + # infeed_queue is used to generate dequeue ops. The only thing it uses for + # dequeue is dtypes and types. So, any one can be used. Here, grab the + # first one. + self._infeed_queue = infeed_queues[0] + return enqueue_ops def _validate_input_pipeline(self): # Perform some sanity checks to log user friendly information. We should @@ -1425,12 +1440,6 @@ class TPUEstimator(estimator_lib.Estimator): 'eval batch size {} must be divisible by number of shards {}' .format(eval_batch_size, config.tpu_config.num_shards)) - if (config.tpu_config.num_shards > 8 and - config.tpu_config.per_host_input_for_training): - # TODO(b/67051042): Support per_host input pipelines when num_shards > 8 - raise NotImplementedError( - 'Per-host input pipelines only available for num_shards <= 8') - # Verifies the model_fn signature according to Estimator framework. estimator_lib._verify_model_fn_args(model_fn, params) # pylint: disable=protected-access # We cannot store config and params in this constructor as parent -- GitLab From 73b7d47031dc53ef52ef028dc0a830de8ec18238 Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Thu, 9 Nov 2017 13:10:02 -0800 Subject: [PATCH 0106/1801] Disable flaky tests in replicate_model_fn_test.py. I suspect that reducing local variables for eval metrics over more than one tower is flaky, but I haven't figured out why yet. PiperOrigin-RevId: 175201241 --- .../estimator/replicate_model_fn_test.py | 108 +++++++++--------- 1 file changed, 55 insertions(+), 53 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py index c90169af8c..bb06700160 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py @@ -223,33 +223,34 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): features, labels, self.params) del estimator_spec - def test_eval(self): - features = np.array([[0.01], [0.002]]) - labels = np.array([[0.01], [0.02]]) - - with self.test_session() as session: - replicated_model_fn = replicate_model_fn.replicate_model_fn( - self.model_fn, self.optimizer_fn, devices=['/gpu:0', '/gpu:1']) - estimator_spec = replicated_model_fn(model_fn_lib.ModeKeys.EVAL, features, - labels, self.params) - session.run(variables.local_variables_initializer()) - session.run(variables.global_variables_initializer()) - - accuracy, a = estimator_spec.eval_metric_ops['accuracy'] - auc, b = estimator_spec.eval_metric_ops['auc'] - - session.run([a, b]) - accuracy = session.run(accuracy) - auc = session.run(auc) - - # Accuracy is 0.0 (no match) in the first tower. - # Accuracy is 1.0 (match) in the second tower, since the feature - # times weight "c" happened to be equal to the label. - total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) - - self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01) - self.assertEqual(0, auc) - self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01) +# TODO(isaprykin): Resolve the source of flakinness. +# def test_eval(self): +# features = np.array([[0.01], [0.002]]) +# labels = np.array([[0.01], [0.02]]) +# +# with self.test_session() as session: +# replicated_model_fn = replicate_model_fn.replicate_model_fn( +# self.model_fn, self.optimizer_fn, devices=['/gpu:0', '/gpu:1']) +# estimator_spec = replicated_model_fn(model_fn_lib.ModeKeys.EVAL, features, +# labels, self.params) +# session.run(variables.local_variables_initializer()) +# session.run(variables.global_variables_initializer()) +# +# accuracy, a = estimator_spec.eval_metric_ops['accuracy'] +# auc, b = estimator_spec.eval_metric_ops['auc'] +# +# session.run([a, b]) +# accuracy = session.run(accuracy) +# auc = session.run(auc) +# +# # Accuracy is 0.0 (no match) in the first tower. +# # Accuracy is 1.0 (match) in the second tower, since the feature +# # times weight "c" happened to be equal to the label. +# total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) +# +# self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01) +# self.assertEqual(0, auc) +# self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01) def test_predict(self): features = np.array([[0.01], [0.002]]) @@ -523,32 +524,33 @@ class EvalSpecTest(test_util.TensorFlowTestCase): } return metrics - def test_example(self): - with self.test_session() as session: - tower_losses = map(self.create_constant_loss, [2, 4, 6]) - tower_metrics = map(self.create_eval_metrics, [0, 0.2, 0.3]) - tower_specs = [ - self.create_estimator_spec(l, m) - for l, m in zip(tower_losses, tower_metrics) - ] - session.run(variables.local_variables_initializer()) - - estimator_spec = replicate_model_fn._eval_spec( - tower_specs, aggregation_device='/device:GPU:0') - - accuracy, a = estimator_spec.eval_metric_ops['accuracy'] - auc, b = estimator_spec.eval_metric_ops['auc'] - - self.assertEqual('/device:CPU:0', accuracy.device) - self.assertEqual('/device:CPU:0', auc.device) - - session.run([a, b]) - accuracy = session.run(accuracy) - auc = session.run(auc) - - self.assertNear((12 - 2) / 12, accuracy, 0.01) - self.assertEqual(0, auc) - self.assertEqual(2 + 4 + 6, session.run(estimator_spec.loss)) +# TODO(isaprykin): Resolve the source of flakinness. +# def test_example(self): +# with self.test_session() as session: +# tower_losses = map(self.create_constant_loss, [2, 4, 6]) +# tower_metrics = map(self.create_eval_metrics, [0, 0.2, 0.3]) +# tower_specs = [ +# self.create_estimator_spec(l, m) +# for l, m in zip(tower_losses, tower_metrics) +# ] +# session.run(variables.local_variables_initializer()) +# +# estimator_spec = replicate_model_fn._eval_spec( +# tower_specs, aggregation_device='/device:GPU:0') +# +# accuracy, a = estimator_spec.eval_metric_ops['accuracy'] +# auc, b = estimator_spec.eval_metric_ops['auc'] +# +# self.assertEqual('/device:CPU:0', accuracy.device) +# self.assertEqual('/device:CPU:0', auc.device) +# +# session.run([a, b]) +# accuracy = session.run(accuracy) +# auc = session.run(auc) +# +# self.assertNear((12 - 2) / 12, accuracy, 0.01) +# self.assertEqual(0, auc) +# self.assertEqual(2 + 4 + 6, session.run(estimator_spec.loss)) def test_handles_single_tower(self): with self.test_session() as session: -- GitLab From 11b9c430fac6a68972012d8b34b3f216a7b9e650 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 9 Nov 2017 13:26:45 -0800 Subject: [PATCH 0107/1801] EagerVariableStore.trainable_variables() PiperOrigin-RevId: 175203593 --- .../python/kernel_tests/variable_scope_test.py | 12 ++++++++++++ tensorflow/python/ops/variable_scope.py | 5 +++++ 2 files changed, 17 insertions(+) diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py index bd4b12b7e8..5396214956 100644 --- a/tensorflow/python/kernel_tests/variable_scope_test.py +++ b/tensorflow/python/kernel_tests/variable_scope_test.py @@ -117,6 +117,18 @@ class VariableScopeTest(test.TestCase): w = variable_scope.get_variable("w", []) self.assertEqual(w.dtype.base_dtype, dtypes.float16) + def testEagerVaribleStore(self): + with context.eager_mode(): + store = variable_scope.EagerVariableStore() + with store.as_default(): + v = variable_scope.get_variable("v", shape=(), trainable=True) + w = variable_scope.get_variable("w", shape=(), trainable=False) + + self.assertTrue(v in store.variables()) + self.assertTrue(w in store.variables()) + self.assertTrue(v in store.trainable_variables()) + self.assertFalse(w in store.trainable_variables()) + @test_util.run_in_graph_and_eager_modes() def testInitFromNonTensorValue(self): v = variable_scope.get_variable("v4", initializer=4, dtype=dtypes.int32) diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index 92fa928eed..9a0ff75594 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -1227,6 +1227,11 @@ class EagerVariableStore(object): def variables(self): return self._store._vars.values() # pylint: disable=protected-access + def trainable_variables(self): + # pylint: disable=protected-access + return [x for x in self._store._vars.values() if x._trainable] + # pylint: enable=protected-access + def get_variable(name, shape=None, -- GitLab From e830e6ddcb20ff2f7391b7c896bdb5004d5dda88 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 13:29:56 -0800 Subject: [PATCH 0108/1801] Modify quantization to support add ops that occur after all quantizable types, not just Conv2D. PiperOrigin-RevId: 175204002 --- .../contrib/quantize/python/quantize.py | 4 +-- .../contrib/quantize/python/quantize_test.py | 25 +++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index 6382d3f7b4..7db2d863aa 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -89,8 +89,8 @@ def Quantize(graph, op.name[:-len('/depthwise')]) if separable_conv and separable_conv.type == 'Conv2D': continue - if op.type == 'Conv2D': - # Quantize add ops that come after Conv2D + # Quantize add ops that come after Conv2D or DepthwiseConv2dNative. + if op.type in ['Conv2D', 'DepthwiseConv2dNative']: add_context_re = re.search(r'^(.*)/[^/]+/', op.name) if add_context_re is not None: context.add_contexts.add(add_context_re.group(1)) diff --git a/tensorflow/contrib/quantize/python/quantize_test.py b/tensorflow/contrib/quantize/python/quantize_test.py index eb141a21bd..1e4dd7cf67 100644 --- a/tensorflow/contrib/quantize/python/quantize_test.py +++ b/tensorflow/contrib/quantize/python/quantize_test.py @@ -30,6 +30,7 @@ from tensorflow.python.ops import nn_ops from tensorflow.python.platform import googletest conv2d = layers.conv2d +separable_conv2d = layers.separable_conv2d class QuantizeTest(test_util.TensorFlowTestCase): @@ -77,6 +78,30 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(add_quant.type, quantization_node_name) + def testInsertQuantOpForAddAfterSeparableConv2d(self): + graph = ops.Graph() + with graph.as_default(): + batch_size, height, width, depth = 5, 128, 128, 3 + input1 = array_ops.zeros((batch_size, height, width, depth)) + input2 = array_ops.zeros((batch_size, height / 2, width / 2, depth)) + conv = separable_conv2d(input1, None, [5, 5], stride=2, + depth_multiplier=1.0, padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=None, scope='test/test') + node = math_ops.add(conv, input2, name='test/add') + node = array_ops.identity(node, name='test/identity') + update_barrier = control_flow_ops.no_op(name='update_barrier') + with ops.control_dependencies([update_barrier]): + array_ops.identity(node, name='control_dependency') + + quantize.Quantize(graph=graph, weight_bits=8, weight_narrow_range=True, + activation_bits=8) + + quantization_node_name = 'FakeQuantWithMinMaxVars' + add_quant = graph.get_operation_by_name('test/add_quant/' + + quantization_node_name) + self.assertEqual(add_quant.type, quantization_node_name) + def _WeightInit(self, stddev): """Returns truncated normal variable initializer. -- GitLab From ea33185cc154bb80741bf4a8a7321aae4b5396cd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 13:30:25 -0800 Subject: [PATCH 0109/1801] Fix GANEstimator docstring. PiperOrigin-RevId: 175204075 --- .../contrib/gan/python/estimator/python/gan_estimator_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py index e89993991a..0824ecf616 100644 --- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py @@ -76,7 +76,7 @@ class GANEstimator(estimator.Estimator): return logits # Create GAN estimator. - gan_estimator = estimator.GANEstimator( + gan_estimator = tfgan.estimator.GANEstimator( model_dir, generator_fn=generator_fn, discriminator_fn=discriminator_fn, -- GitLab From b1ce37a0fe442162a0958510b146dcfc2507f27f Mon Sep 17 00:00:00 2001 From: Cem Eteke Date: Fri, 10 Nov 2017 01:19:32 +0300 Subject: [PATCH 0110/1801] Data type support for seq2seq attention mechanisms (#12007) * Dtype support for Attention Wrapper added * Reviews * Converting string dtype to tf.dtype --- .../seq2seq/python/ops/attention_wrapper.py | 51 ++++++++++++++----- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index 839df079ee..0c64c9caf1 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -149,7 +149,7 @@ class _BaseAttentionMechanism(AttentionMechanism): memory_sequence_length=None, memory_layer=None, check_inner_dims_defined=True, - score_mask_value=float("-inf"), + score_mask_value=None, name=None): """Construct base AttentionMechanism class. @@ -187,9 +187,12 @@ class _BaseAttentionMechanism(AttentionMechanism): "memory_layer is not a Layer: %s" % type(memory_layer).__name__) self._query_layer = query_layer self._memory_layer = memory_layer + self.dtype = memory_layer.dtype if not callable(probability_fn): raise TypeError("probability_fn must be callable, saw type: %s" % type(probability_fn).__name__) + if score_mask_value is None: + score_mask_value = dtypes.as_dtype(self._memory_layer.dtype).as_numpy_dtype(-np.inf) self._probability_fn = lambda score, prev: ( # pylint:disable=g-long-lambda probability_fn( _maybe_mask_score(score, memory_sequence_length, score_mask_value), @@ -334,7 +337,8 @@ class LuongAttention(_BaseAttentionMechanism): memory_sequence_length=None, scale=False, probability_fn=None, - score_mask_value=float("-inf"), + score_mask_value=None, + dtype=None, name="LuongAttention"): """Construct the AttentionMechanism mechanism. @@ -353,17 +357,20 @@ class LuongAttention(_BaseAttentionMechanism): score_mask_value: (optional) The mask value for score before passing into `probability_fn`. The default is -inf. Only used if `memory_sequence_length` is not None. + dtype: The data type for the memory layer of the attention mechanism. name: Name to use when creating ops. """ # For LuongAttention, we only transform the memory layer; thus # num_units **must** match expected the query depth. if probability_fn is None: probability_fn = nn_ops.softmax + if dtype is None: + dtype = dtypes.float32 wrapped_probability_fn = lambda score, _: probability_fn(score) super(LuongAttention, self).__init__( query_layer=None, memory_layer=layers_core.Dense( - num_units, name="memory_layer", use_bias=False), + num_units, name="memory_layer", use_bias=False, dtype=dtype), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, @@ -475,7 +482,8 @@ class BahdanauAttention(_BaseAttentionMechanism): memory_sequence_length=None, normalize=False, probability_fn=None, - score_mask_value=float("-inf"), + score_mask_value=None, + dtype=None, name="BahdanauAttention"): """Construct the Attention mechanism. @@ -494,16 +502,20 @@ class BahdanauAttention(_BaseAttentionMechanism): score_mask_value: (optional): The mask value for score before passing into `probability_fn`. The default is -inf. Only used if `memory_sequence_length` is not None. + dtype: The data type for the query and memory layers of the attention + mechanism. name: Name to use when creating ops. """ if probability_fn is None: probability_fn = nn_ops.softmax + if dtype is None: + dtype = dtypes.float32 wrapped_probability_fn = lambda score, _: probability_fn(score) super(BahdanauAttention, self).__init__( query_layer=layers_core.Dense( - num_units, name="query_layer", use_bias=False), + num_units, name="query_layer", use_bias=False, dtype=dtype), memory_layer=layers_core.Dense( - num_units, name="memory_layer", use_bias=False), + num_units, name="memory_layer", use_bias=False, dtype=dtype), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, @@ -734,11 +746,12 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism): memory, memory_sequence_length=None, normalize=False, - score_mask_value=float("-inf"), + score_mask_value=None, sigmoid_noise=0., sigmoid_noise_seed=None, score_bias_init=0., mode="parallel", + dtype=None, name="BahdanauMonotonicAttention"): """Construct the Attention mechanism. @@ -762,17 +775,21 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism): mode: How to compute the attention distribution. Must be one of 'recursive', 'parallel', or 'hard'. See the docstring for `tf.contrib.seq2seq.monotonic_attention` for more information. + dtype: The data type for the query and memory layers of the attention + mechanism. name: Name to use when creating ops. """ # Set up the monotonic probability fn with supplied parameters + if dtype is None: + dtype = dtypes.float32 wrapped_probability_fn = functools.partial( _monotonic_probability_fn, sigmoid_noise=sigmoid_noise, mode=mode, seed=sigmoid_noise_seed) super(BahdanauMonotonicAttention, self).__init__( query_layer=layers_core.Dense( - num_units, name="query_layer", use_bias=False), + num_units, name="query_layer", use_bias=False, dtype=dtype), memory_layer=layers_core.Dense( - num_units, name="memory_layer", use_bias=False), + num_units, name="memory_layer", use_bias=False, dtype=dtype), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, @@ -830,11 +847,12 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism): memory, memory_sequence_length=None, scale=False, - score_mask_value=float("-inf"), + score_mask_value=None, sigmoid_noise=0., sigmoid_noise_seed=None, score_bias_init=0., mode="parallel", + dtype=None, name="LuongMonotonicAttention"): """Construct the Attention mechanism. @@ -858,17 +876,21 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism): mode: How to compute the attention distribution. Must be one of 'recursive', 'parallel', or 'hard'. See the docstring for `tf.contrib.seq2seq.monotonic_attention` for more information. + dtype: The data type for the query and memory layers of the attention + mechanism. name: Name to use when creating ops. """ # Set up the monotonic probability fn with supplied parameters + if dtype is None: + dtype = dtypes.float32 wrapped_probability_fn = functools.partial( _monotonic_probability_fn, sigmoid_noise=sigmoid_noise, mode=mode, seed=sigmoid_noise_seed) super(LuongMonotonicAttention, self).__init__( query_layer=layers_core.Dense( - num_units, name="query_layer", use_bias=False), + num_units, name="query_layer", use_bias=False, dtype=dtype), memory_layer=layers_core.Dense( - num_units, name="memory_layer", use_bias=False), + num_units, name="memory_layer", use_bias=False, dtype=dtype), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, @@ -1119,8 +1141,9 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): % (len(attention_layer_sizes), len(attention_mechanisms))) self._attention_layers = tuple( layers_core.Dense( - attention_layer_size, name="attention_layer", use_bias=False) - for attention_layer_size in attention_layer_sizes) + attention_layer_size, name="attention_layer", use_bias=False, + dtype=attention_mechanisms[i].dtype) + for i, attention_layer_size in enumerate(attention_layer_sizes)) self._attention_layer_size = sum(attention_layer_sizes) else: self._attention_layers = None -- GitLab From 96575f01df25005107fd1ce822b20bc6906baa3b Mon Sep 17 00:00:00 2001 From: Mahmoud Abuzaina Date: Thu, 9 Nov 2017 14:45:51 -0800 Subject: [PATCH 0111/1801] Fixing master regression in slice op (#14329) --- tensorflow/core/kernels/slice_op.cc | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc index 4849818605..28a379774b 100644 --- a/tensorflow/core/kernels/slice_op.cc +++ b/tensorflow/core/kernels/slice_op.cc @@ -252,8 +252,25 @@ class MklSliceOp : public OpKernel { if (input_dims == 4) { HandleCase4D(context, begin, size, result); } else { - functor::Slice()( - context->eigen_device(), result, input, begin, size); +#define HANDLE_DIM(NDIM) \ + if (input_dims == NDIM) { \ + functor::Slice()( \ + context->eigen_device(), result, input, begin, size); \ + return; \ + } + + HANDLE_DIM(1); + HANDLE_DIM(2); + HANDLE_DIM(3); + HANDLE_DIM(4); + HANDLE_DIM(5); + HANDLE_DIM(6); + +#undef HANDLE_DIM + + // handle cases which dim >= 7 + functor::Slice()( + context->eigen_device(), result, input, begin, size); } } } @@ -375,7 +392,7 @@ class MklSliceOp : public OpKernel { } functor::Slice()( - context->eigen_device(), result, input, begin, size); + context->eigen_device(), result, context->input(0), begin, size); } }; #endif -- GitLab From 10cf65b48e1b2f16eaa826d2793cb67207a085d0 Mon Sep 17 00:00:00 2001 From: Ralph Tang Date: Thu, 9 Nov 2017 17:46:54 -0500 Subject: [PATCH 0112/1801] Clarify low-latency model stride length (#13704) --- tensorflow/examples/speech_commands/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/examples/speech_commands/models.py b/tensorflow/examples/speech_commands/models.py index 82d6a94ea1..ab611f414a 100644 --- a/tensorflow/examples/speech_commands/models.py +++ b/tensorflow/examples/speech_commands/models.py @@ -326,7 +326,7 @@ def create_low_latency_conv_model(fingerprint_input, model_settings, first_filter_height = input_time_size first_filter_count = 186 first_filter_stride_x = 1 - first_filter_stride_y = 4 + first_filter_stride_y = 1 first_weights = tf.Variable( tf.truncated_normal( [first_filter_height, first_filter_width, 1, first_filter_count], -- GitLab From 02dbaaa3a7063ea5ede4ddf47a6ef5df5a64518e Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Thu, 9 Nov 2017 14:51:56 -0800 Subject: [PATCH 0113/1801] Fix typo Fix typo in tensorflow/python/framework/function.py --- tensorflow/python/framework/function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py index cef3f8d4c4..29cf223724 100644 --- a/tensorflow/python/framework/function.py +++ b/tensorflow/python/framework/function.py @@ -100,7 +100,7 @@ class Defun(object): grad_func - (optional). A function implementing the gradient of the function-to-register. This is must be a `_DefinedFunction` object. The gradient - function must satisify the criterion defined in + function must satisfy the criterion defined in function.proto:GradientDef. python_grad_func - (optional). A function implementing the -- GitLab From 954e8d6b134288195f54b8871ee9fcc432bf0aba Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Thu, 9 Nov 2017 13:42:15 -0800 Subject: [PATCH 0114/1801] eager: README title tweak. PiperOrigin-RevId: 175205782 --- tensorflow/contrib/eager/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/eager/README.md b/tensorflow/contrib/eager/README.md index ae4b07799f..dcc370cd00 100644 --- a/tensorflow/contrib/eager/README.md +++ b/tensorflow/contrib/eager/README.md @@ -1,4 +1,4 @@ -# TensorFlow Eager Execution +# Eager Execution > *WARNING*: This is a preview/pre-alpha version. The API and performance > characteristics are subject to change. -- GitLab From e930f0e072b8d67d9bf29d77babf071a3569615c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 13:49:12 -0800 Subject: [PATCH 0115/1801] Fix bug reported in b/69059093, by skipping rewrites that we can determine have already been applied. Make sure rewrites are idempotent by running the optimizer twice in unit tests. PiperOrigin-RevId: 175206742 --- .../optimizers/arithmetic_optimizer.cc | 12 +-- .../optimizers/arithmetic_optimizer_test.cc | 94 ++++++++++++++++++- 2 files changed, 96 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 14df3caebb..44d16e5a42 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -833,8 +833,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( } } - if (node->input_size() > 0 && IsAggregate(*node) && - !node_map->GetOutputs(node->name()).empty()) { + if (node->input_size() > 0 && IsAggregate(*node)) { // Discard aggregate nodes with a single input. if (node->input_size() == 1) { return node->input(0); @@ -855,7 +854,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( break; } } - if (all_equal) { + if (all_equal && node_map->GetNode(node->name() + "_const") == nullptr) { // 1. Create constant node with value N. const int N = node->input_size(); const auto type = GetDataTypeFromAttr(*node, "T"); @@ -898,7 +897,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( // where all the inputs are Mul nodes. This pattern occurs frequently in // regularization terms for the gradients during training. if (node->input_size() > 1 && IsAggregate(*node) && - !node_map->GetOutputs(node->name()).empty()) { + node_map->GetNode(node->name() + "_hoist") == nullptr) { // Determine the set of common factors if the input nodes are all Mul nodes. std::set common_factors; int i = 0; @@ -1011,8 +1010,9 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( } // Fold Conj into Transpose or ConjugateTranspose. - if (node->op() == "Conj" || node->op() == "Transpose" || - node->op() == "ConjugateTranspose") { + if ((node->op() == "Conj" || node->op() == "Transpose" || + node->op() == "ConjugateTranspose") && + node_map->GetNode(node->name() + "_fused") == nullptr) { const NodeDef* input = node_map->GetNode(node->input(0)); const NodeDef* transpose_op = node->op() == "Conj" ? input : node; const NodeDef* conj_op = node->op() == "Conj" ? node : input; diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 9f471302c7..60fb47f51a 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -38,8 +38,8 @@ TEST_F(ArithmeticOptimizerTest, NoOp) { ArithmeticOptimizer optimizer; GraphDef output; - Status s = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(s); + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); EXPECT_EQ(item.graph.node_size(), output.node_size()); for (int i = 0; i < item.graph.node_size(); ++i) { @@ -66,6 +66,10 @@ TEST_F(ArithmeticOptimizerTest, OpDedupping) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); EXPECT_EQ(2, output.node_size()); const NodeDef& new_c1 = output.node(0); @@ -91,6 +95,10 @@ TEST_F(ArithmeticOptimizerTest, OpDedupCommutative) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); EXPECT_EQ(4, output.node_size()); const NodeDef& new_c1 = output.node(0); @@ -146,13 +154,17 @@ TEST_F(ArithmeticOptimizerTest, SimplifyInvolutionsWithChain) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); EXPECT_EQ(6, output.node_size()); EXPECT_EQ("squeeze", output.node(5).input(0)); EXPECT_EQ("c", output.node(2).input(0)); } -TEST_F(ArithmeticOptimizerTest, SimplifyReplaceTrivialSums) { +TEST_F(ArithmeticOptimizerTest, TrivialSumsSimple) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2}); Output add = ops::Add(s.WithOpName("add"), x, x); @@ -165,6 +177,10 @@ TEST_F(ArithmeticOptimizerTest, SimplifyReplaceTrivialSums) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); EXPECT_EQ(5, output.node_size()); const NodeDef& new_const = output.node(3); @@ -178,7 +194,61 @@ TEST_F(ArithmeticOptimizerTest, SimplifyReplaceTrivialSums) { EXPECT_EQ("add_mul", new_id.input(0)); } -TEST_F(ArithmeticOptimizerTest, SimplifyHoistFactor) { +TEST_F(ArithmeticOptimizerTest, TrivialSumsRepeatedAdd) { + // Test case from b/69059093. + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output p = ops::Placeholder(s, DT_FLOAT, ops::Placeholder::Shape({10, 10})); + Output add = ops::Add(s.WithOpName("Add"), p, p); + Output add1 = ops::Add(s.WithOpName("Add_1"), p, p); + Output add4 = ops::Add(s.WithOpName("Add_4"), add, add1); + Output add5 = ops::Add(s.WithOpName("Add_5"), add, add1); + Output add6 = ops::Add(s.WithOpName("Add_6"), add4, add5); + Output id = ops::Identity(s.WithOpName("id"), add6); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + ArithmeticOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + EXPECT_EQ(11, output.node_size()); + const NodeDef& new_id = output.node(4); + EXPECT_EQ("id", new_id.name()); + EXPECT_EQ("Add_6_mul", new_id.input(0)); + + // Add4 and add5 get deduped, and we rewrite each of the 3 remaining add nodes + // of the form Add(x,x) into Mul(Const(2), x). + const NodeDef& new_add_4_const = output.node(5); + EXPECT_EQ("Add_4_const", new_add_4_const.name()); + EXPECT_EQ("^Add", new_add_4_const.input(0)); + const NodeDef& new_add_4_mul = output.node(6); + EXPECT_EQ("Add_4_mul", new_add_4_mul.name()); + EXPECT_EQ("Add_4_const", new_add_4_mul.input(0)); + EXPECT_EQ("Add_mul", new_add_4_mul.input(1)); + + const NodeDef& new_add_6_const = output.node(7); + EXPECT_EQ("Add_6_const", new_add_6_const.name()); + EXPECT_EQ("^Add_4_mul", new_add_6_const.input(0)); + const NodeDef& new_add_6_mul = output.node(8); + EXPECT_EQ("Add_6_mul", new_add_6_mul.name()); + EXPECT_EQ("Add_6_const", new_add_6_mul.input(0)); + EXPECT_EQ("Add_4_mul", new_add_6_mul.input(1)); + + const NodeDef& new_add_const = output.node(9); + EXPECT_EQ("Add_const", new_add_const.name()); + EXPECT_EQ("^Placeholder", new_add_const.input(0)); + const NodeDef& new_add_mul = output.node(10); + EXPECT_EQ("Add_mul", new_add_mul.name()); + EXPECT_EQ("Add_const", new_add_mul.input(0)); + EXPECT_EQ("Placeholder", new_add_mul.input(1)); +} + +TEST_F(ArithmeticOptimizerTest, HoistFactor) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2}); Output y1 = ops::Const(s.WithOpName("y1"), {3.0f, 4.0f}, {1, 2}); @@ -195,6 +265,10 @@ TEST_F(ArithmeticOptimizerTest, SimplifyHoistFactor) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); EXPECT_EQ(9, output.node_size()); const NodeDef& new_add = output.node(8); @@ -225,6 +299,10 @@ TEST_F(ArithmeticOptimizerTest, FuseConjAndTranspose) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); EXPECT_EQ(7, output.node_size()); EXPECT_EQ("trans_fused", output.node(6).name()); @@ -272,6 +350,10 @@ TEST_F(ArithmeticOptimizerTest, FuseTransposeAndConj) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); EXPECT_EQ(7, output.node_size()); EXPECT_EQ("conj_fused", output.node(6).name()); @@ -304,6 +386,10 @@ TEST_F(ArithmeticOptimizerTest, FoldTransposeIntoMatMul) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); EXPECT_EQ(7, output.node_size()); EXPECT_EQ("matmul_fused", output.node(6).name()); -- GitLab From 9d5a6650ca2ad7068ee556c8dbce03b96ea22128 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 9 Nov 2017 13:56:17 -0800 Subject: [PATCH 0116/1801] Instances per second in the eager microbenchmarks. PiperOrigin-RevId: 175207829 --- tensorflow/python/eager/benchmarks_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 26a70a617d..b555f16f1d 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -66,7 +66,8 @@ class MicroBenchmarks(test.Benchmark): func() end = time.time() mean_us = (end - start) * 1e6 / num_iters - self.report_benchmark(iters=num_iters, wall_time=mean_us) + self.report_benchmark(iters=num_iters, wall_time=mean_us, + extras={"examples_per_sec": num_iters/(end-start)}) def benchmark_create_np_array(self): func = lambda: np.array([3.0]) -- GitLab From 898b3486ab16fd2acc3d9f12f57a3be8d83d09ec Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Thu, 9 Nov 2017 14:10:11 -0800 Subject: [PATCH 0117/1801] Limit internal fragmentation in BFCAllocator to 128mb per allocation. Previously, if you had a very large allocation, it would round up to the next power of 2, and then, if this didn't fit in your GPU's available memory, eat all remaining memory in the device. Now we waste at most 128mb of memory in a large alloc. PiperOrigin-RevId: 175209995 --- tensorflow/core/common_runtime/bfc_allocator.cc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc index 38fe247521..6399b8cf55 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.cc +++ b/tensorflow/core/common_runtime/bfc_allocator.cc @@ -296,12 +296,13 @@ void* BFCAllocator::FindChunkPtr(BinNum bin_num, size_t rounded_bytes, // it from the free bin structure prior to using. RemoveFreeChunkIterFromBin(&b->free_chunks, citer); - // If we can break the size of the chunk into two reasonably - // large pieces, do so. - // - // TODO(vrv): What should be the criteria when deciding when - // to split? - if (chunk->size >= rounded_bytes * 2) { + // If we can break the size of the chunk into two reasonably large + // pieces, do so. In any case don't waste more than + // kMaxInternalFragmentation bytes on padding this alloc. + const int64 kMaxInternalFragmentation = 128 << 20; // 128mb + if (chunk->size >= rounded_bytes * 2 || + static_cast(chunk->size) - rounded_bytes >= + kMaxInternalFragmentation) { SplitChunk(h, rounded_bytes); chunk = ChunkFromHandle(h); // Update chunk pointer in case it moved } -- GitLab From e4cbba18dd0c04e5490997bc04c09a5269ce19e8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 14:14:30 -0800 Subject: [PATCH 0118/1801] Expose padded_batch_and_drop_remainder PiperOrigin-RevId: 175210678 --- tensorflow/contrib/data/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index 824ac4298f..6e43ae0e63 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -23,6 +23,7 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@TextLineDataset @@batch_and_drop_remainder +@@padded_batch_and_drop_remainder @@dense_to_sparse_batch @@enumerate_dataset @@group_by_window @@ -45,6 +46,7 @@ from __future__ import print_function from tensorflow.contrib.data.python.ops.batching import batch_and_drop_remainder from tensorflow.contrib.data.python.ops.batching import dense_to_sparse_batch +from tensorflow.contrib.data.python.ops.batching import padded_batch_and_drop_remainder from tensorflow.contrib.data.python.ops.batching import unbatch from tensorflow.contrib.data.python.ops.dataset_ops import Dataset from tensorflow.contrib.data.python.ops.dataset_ops import get_single_element -- GitLab From 3de7349955b839edbd61fef7bac3db9e140ffd3d Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Thu, 9 Nov 2017 14:21:25 -0800 Subject: [PATCH 0119/1801] Add tf.nn.softmax_cross_entropy_with_logits_v2 which enables backprop wrt the labels. Clarify current backprop behavior. Original bugfix by Alexandre Passos. PiperOrigin-RevId: 175211803 --- .../python/kernel_tests/xent_op_test.py | 18 ++ tensorflow/python/ops/nn.py | 1 + tensorflow/python/ops/nn_grad.py | 5 +- tensorflow/python/ops/nn_ops.py | 164 +++++++++++++----- .../tools/api/golden/tensorflow.nn.pbtxt | 4 + 5 files changed, 143 insertions(+), 49 deletions(-) diff --git a/tensorflow/python/kernel_tests/xent_op_test.py b/tensorflow/python/kernel_tests/xent_op_test.py index 4b3dadc112..43be08f8a1 100644 --- a/tensorflow/python/kernel_tests/xent_op_test.py +++ b/tensorflow/python/kernel_tests/xent_op_test.py @@ -181,6 +181,24 @@ class XentTest(test.TestCase): print("cross entropy gradient err = ", err) self.assertLess(err, 5e-8) + def testGradientLabelWithV2(self): + with self.test_session(): + l = constant_op.constant( + [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.5], + shape=[3, 4], + dtype=dtypes.float64, + name="l") + f = constant_op.constant( + [0.1, 0.2, 0.3, 0.4, 0.1, 0.4, 0.9, 1.6, 0.1, 0.8, 2.7, 6.4], + shape=[3, 4], + dtype=dtypes.float64, + name="f") + x = nn_ops.softmax_cross_entropy_with_logits_v2(labels=l, logits=f, + name="xent") + err = gradient_checker.compute_gradient_error(l, [3, 4], x, [3]) + + self.assertLess(err, 5e-8) + def testSecondGradient(self): with self.test_session() as sess: l = constant_op.constant([0.0, 0.0, 1.0/3, 0.0, diff --git a/tensorflow/python/ops/nn.py b/tensorflow/python/ops/nn.py index 79af3ac117..ee1a00623a 100644 --- a/tensorflow/python/ops/nn.py +++ b/tensorflow/python/ops/nn.py @@ -74,6 +74,7 @@ See the @{$python/nn} guide. @@softmax @@log_softmax @@softmax_cross_entropy_with_logits +@@softmax_cross_entropy_with_logits_v2 @@sparse_softmax_cross_entropy_with_logits @@weighted_cross_entropy_with_logits @@embedding_lookup diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index 557f39fb42..4b406ba840 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -420,7 +420,6 @@ def _SoftmaxCrossEntropyWithLogitsGrad(op, grad_loss, grad_grad): # grad_loss is the backprop for cost, and we multiply it with the gradients # (which is output[1]) # grad_grad is the backprop for softmax gradient. - # There is no gradient for the labels # # Second derivative is just softmax derivative w.r.t. logits. softmax_grad = op.outputs[1] @@ -436,15 +435,15 @@ def _SoftmaxCrossEntropyWithLogitsGrad(op, grad_loss, grad_grad): const_fill_value = tensor_util.constant_value(g) return const_fill_value is not None and (const_fill_value == 0).all() + logits = op.inputs[0] if grad_grad is not None and not IsZero(grad_grad): - logits = op.inputs[0] softmax = nn_ops.softmax(logits) grad += ((grad_grad - array_ops.squeeze( math_ops.matmul(grad_grad[:, None, :], softmax[:, :, None]), axis=1)) * softmax) - return grad, None + return grad, _BroadcastMul(grad_loss, -nn_ops.log_softmax(logits)) @ops.RegisterGradient("SparseSoftmaxCrossEntropyWithLogits") diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index a37b68c6fa..bdaac65904 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -32,11 +32,13 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops + # go/tf-wildcard-import # pylint: disable=wildcard-import from tensorflow.python.ops.gen_nn_ops import * # pylint: enable=wildcard-import +from tensorflow.python.util import deprecation # Aliases for some automatically-generated names. local_response_normalization = gen_nn_ops.lrn @@ -1700,9 +1702,9 @@ def _ensure_xent_args(name, sentinel, labels, logits): raise ValueError("Both labels and logits must be provided.") -def softmax_cross_entropy_with_logits(_sentinel=None, # pylint: disable=invalid-name - labels=None, logits=None, - dim=-1, name=None): +def softmax_cross_entropy_with_logits_v2(_sentinel=None, # pylint: disable=invalid-name + labels=None, logits=None, + dim=-1, name=None): """Computes softmax cross entropy between `logits` and `labels`. Measures the probability error in discrete classification tasks in which the @@ -1726,6 +1728,10 @@ def softmax_cross_entropy_with_logits(_sentinel=None, # pylint: disable=invalid `[batch_size, num_classes]` and the same dtype (either `float16`, `float32`, or `float64`). + Backpropagation will happen into both `logits` and `labels`. To disallow + backpropagation into `labels`, pass label tensors through a `stop_gradients` + before feeding it to this function. + **Note that to avoid confusion, it is required to pass only named arguments to this function.** @@ -1747,57 +1753,123 @@ def softmax_cross_entropy_with_logits(_sentinel=None, # pylint: disable=invalid # could break users who call this with bad labels, but disregard the bad # results. - logits = ops.convert_to_tensor(logits) - labels = ops.convert_to_tensor(labels) - precise_logits = math_ops.cast(logits, dtypes.float32) if ( - logits.dtype == dtypes.float16) else logits - # labels and logits must be of the same type - labels = math_ops.cast(labels, precise_logits.dtype) - input_rank = array_ops.rank(precise_logits) - # For shape inference. - shape = logits.get_shape() + with ops.name_scope( + name, "softmax_cross_entropy_with_logits", [logits, labels]) as name: + logits = ops.convert_to_tensor(logits, name="logits") + labels = ops.convert_to_tensor(labels, name="labels") + precise_logits = math_ops.cast(logits, dtypes.float32) if ( + logits.dtype == dtypes.float16) else logits + # labels and logits must be of the same type + labels = math_ops.cast(labels, precise_logits.dtype) + input_rank = array_ops.rank(precise_logits) + # For shape inference. + shape = logits.get_shape() - # Move the dim to the end if dim is not the last dimension. - if dim is not -1: - def _move_dim_to_end(tensor, dim_index, rank): - return array_ops.transpose(tensor, - array_ops.concat([ - math_ops.range(dim_index), - math_ops.range(dim_index + 1, rank), - [dim_index] - ], 0)) + # Move the dim to the end if dim is not the last dimension. + if dim is not -1: + def _move_dim_to_end(tensor, dim_index, rank): + return array_ops.transpose(tensor, + array_ops.concat([ + math_ops.range(dim_index), + math_ops.range(dim_index + 1, rank), + [dim_index] + ], 0)) - precise_logits = _move_dim_to_end(precise_logits, dim, input_rank) - labels = _move_dim_to_end(labels, dim, input_rank) + precise_logits = _move_dim_to_end(precise_logits, dim, input_rank) + labels = _move_dim_to_end(labels, dim, input_rank) - input_shape = array_ops.shape(precise_logits) + input_shape = array_ops.shape(precise_logits) - # Make precise_logits and labels into matrices. - precise_logits = _flatten_outer_dims(precise_logits) - labels = _flatten_outer_dims(labels) + # Make precise_logits and labels into matrices. + precise_logits = _flatten_outer_dims(precise_logits) + labels = _flatten_outer_dims(labels) - # Do the actual op computation. - # The second output tensor contains the gradients. We use it in - # _CrossEntropyGrad() in nn_grad but not here. - cost, unused_backprop = gen_nn_ops._softmax_cross_entropy_with_logits( - precise_logits, labels, name=name) + # Do the actual op computation. + # The second output tensor contains the gradients. We use it in + # _CrossEntropyGrad() in nn_grad but not here. + cost, unused_backprop = gen_nn_ops._softmax_cross_entropy_with_logits( + precise_logits, labels, name=name) - # The output cost shape should be the input minus dim. - output_shape = array_ops.slice(input_shape, [0], - [math_ops.subtract(input_rank, 1)]) - cost = array_ops.reshape(cost, output_shape) + # The output cost shape should be the input minus dim. + output_shape = array_ops.slice(input_shape, [0], + [math_ops.subtract(input_rank, 1)]) + cost = array_ops.reshape(cost, output_shape) - # Make shape inference work since reshape and transpose may erase its static - # shape. - if context.in_graph_mode() and shape is not None and shape.dims is not None: - shape = shape.as_list() - del shape[dim] - cost.set_shape(shape) + # Make shape inference work since reshape and transpose may erase its static + # shape. + if context.in_graph_mode() and shape is not None and shape.dims is not None: + shape = shape.as_list() + del shape[dim] + cost.set_shape(shape) - if logits.dtype == dtypes.float16: - return math_ops.cast(cost, dtypes.float16) - else: - return cost + if logits.dtype == dtypes.float16: + return math_ops.cast(cost, dtypes.float16) + else: + return cost + + +_XENT_DEPRECATION = """ +Future major versions of TensorFlow will allow gradients to flow +into the labels input on backprop by default. + +See tf.nn.softmax_cross_entropy_with_logits_v2. +""" + + +@deprecation.deprecated(date=None, instructions=_XENT_DEPRECATION) +def softmax_cross_entropy_with_logits(_sentinel=None, # pylint: disable=invalid-name + labels=None, logits=None, + dim=-1, name=None): + """Computes softmax cross entropy between `logits` and `labels`. + + Measures the probability error in discrete classification tasks in which the + classes are mutually exclusive (each entry is in exactly one class). For + example, each CIFAR-10 image is labeled with one and only one label: an image + can be a dog or a truck, but not both. + + **NOTE:** While the classes are mutually exclusive, their probabilities + need not be. All that is required is that each row of `labels` is + a valid probability distribution. If they are not, the computation of the + gradient will be incorrect. + + If using exclusive `labels` (wherein one and only + one class is true at a time), see `sparse_softmax_cross_entropy_with_logits`. + + **WARNING:** This op expects unscaled logits, since it performs a `softmax` + on `logits` internally for efficiency. Do not call this op with the + output of `softmax`, as it will produce incorrect results. + + `logits` and `labels` must have the same shape, e.g. + `[batch_size, num_classes]` and the same dtype (either `float16`, `float32`, + or `float64`). + + Backpropagation will happen only into `logits`. To calculate a cross entropy + loss that allows backpropagation into both `logits` and `labels`, see + @{tf.nn.softmax_cross_entropy_with_logits_v2}. + + **Note that to avoid confusion, it is required to pass only named arguments to + this function.** + + Args: + _sentinel: Used to prevent positional parameters. Internal, do not use. + labels: Each row `labels[i]` must be a valid probability distribution. + logits: Unscaled log probabilities. + dim: The class dimension. Defaulted to -1 which is the last dimension. + name: A name for the operation (optional). + + Returns: + A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the + softmax cross entropy loss. + """ + _ensure_xent_args("softmax_cross_entropy_with_logits", _sentinel, + labels, logits) + + with ops.name_scope( + name, "softmax_cross_entropy_with_logits_sg", [logits, labels]) as name: + labels = array_ops.stop_gradient(labels, name="labels_stop_gradient") + + return softmax_cross_entropy_with_logits_v2( + labels=labels, logits=logits, dim=dim, name=name) def sparse_softmax_cross_entropy_with_logits(_sentinel=None, # pylint: disable=invalid-name diff --git a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt index 11637814a6..24c0448dea 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt @@ -288,6 +288,10 @@ tf_module { name: "softmax_cross_entropy_with_logits" argspec: "args=[\'_sentinel\', \'labels\', \'logits\', \'dim\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'-1\', \'None\'], " } + member_method { + name: "softmax_cross_entropy_with_logits_v2" + argspec: "args=[\'_sentinel\', \'labels\', \'logits\', \'dim\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'-1\', \'None\'], " + } member_method { name: "softplus" argspec: "args=[\'features\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " -- GitLab From a0e9c52921aef9eecbd358fa5f129328f0024ab9 Mon Sep 17 00:00:00 2001 From: Anna R Date: Thu, 9 Nov 2017 14:31:06 -0800 Subject: [PATCH 0120/1801] Internal change. PiperOrigin-RevId: 175213336 --- tensorflow/contrib/nccl/BUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/nccl/BUILD b/tensorflow/contrib/nccl/BUILD index ed9fb64b95..df9dbb457a 100644 --- a/tensorflow/contrib/nccl/BUILD +++ b/tensorflow/contrib/nccl/BUILD @@ -48,8 +48,8 @@ tf_cuda_cc_test( # Disabled on jenkins until errors finding nvmlShutdown are found. tags = [ "manual", + "multi_gpu", "no_oss", - "noguitar", # note: is run manually there "notap", ], deps = if_cuda( @@ -138,8 +138,8 @@ cuda_py_test( # Disabled on jenkins until errors finding nvmlShutdown are found. tags = [ "manual", + "multi_gpu", "no_oss", - "noguitar", # note: is run manually there "notap", ], ) -- GitLab From f3f85e9aa0f6f26740d1da77e5bcc58ff70aa71c Mon Sep 17 00:00:00 2001 From: HyoukJoong Lee Date: Thu, 9 Nov 2017 14:48:37 -0800 Subject: [PATCH 0121/1801] Change for asynchronous Send and Recv by splitting Send into {Send, SendDone} and Recv into {Recv, RecvDone}. See operation_semantics.md for the updated semantics. PiperOrigin-RevId: 175216012 --- .../compiler/xla/service/buffer_assignment.cc | 11 --- .../compiler/xla/service/cpu/ir_emitter.cc | 10 +++ .../compiler/xla/service/cpu/ir_emitter.h | 2 + .../compiler/xla/service/dfs_hlo_visitor.h | 6 +- .../service/dfs_hlo_visitor_with_default.h | 10 ++- .../compiler/xla/service/gpu/ir_emitter.cc | 8 ++ .../compiler/xla/service/gpu/ir_emitter.h | 2 + .../compiler/xla/service/hlo_cost_analysis.cc | 8 ++ .../compiler/xla/service/hlo_cost_analysis.h | 2 + .../xla/service/hlo_dataflow_analysis.cc | 65 ++++++++++++++ .../xla/service/hlo_dataflow_analysis.h | 2 + .../xla/service/hlo_dataflow_analysis_test.cc | 48 ++++++++++ .../compiler/xla/service/hlo_graph_dumper.cc | 4 + .../compiler/xla/service/hlo_instruction.cc | 57 ++++++++++-- .../compiler/xla/service/hlo_instruction.h | 22 +++-- .../compiler/xla/service/hlo_matchers.h | 2 + tensorflow/compiler/xla/service/hlo_opcode.h | 2 + .../xla/service/hlo_rematerialization.cc | 2 + .../compiler/xla/service/hlo_verifier.cc | 49 +++++++++- .../xla/service/instruction_fusion.cc | 2 + .../xla/service/logical_buffer_analysis.cc | 15 ++++ .../xla/service/logical_buffer_analysis.h | 2 + .../xla/service/tuple_points_to_analysis.cc | 58 ++++++++++++ .../xla/service/tuple_points_to_analysis.h | 2 + .../service/tuple_points_to_analysis_test.cc | 45 ++++++++++ .../compiler/xla/service/user_computation.cc | 6 +- .../xla/service/while_loop_simplifier.cc | 4 +- .../xla/service/while_loop_simplifier_test.cc | 6 +- .../compiler/xla/tools/parser/hlo_parser.cc | 30 ++++++- .../xla/tools/parser/hlo_parser_test.cc | 26 ++++-- .../performance/xla/operation_semantics.md | 89 +++++++++++++++++++ 31 files changed, 550 insertions(+), 47 deletions(-) diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index c74f050f77..3c5b360c8e 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -819,17 +819,6 @@ Status BufferAssigner::AssignBuffersForComputation( continue; } - if (instruction->opcode() == HloOpcode::kRecv) { - // Make sure that recv operations get a new unique allocation so that - // don't share their buffer with any other operations. - BufferAllocation* allocation = assignment->NewAllocation( - *buffer, buffer_size, is_thread_local, /*is_reusable=*/false); - allocation_indices.push_back(allocation->index()); - VLOG(3) << "New allocation #" << allocation->index() - << " for recv: " << *buffer; - continue; - } - if (ShapeUtil::IsTuple(buffer->shape())) { // TODO(b/34669761): Don't reuse tuple buffers because the GPU backend // assumes longer buffer liveness than indicated by the analysis. diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index a20ce6826c..e547f291b8 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -1983,6 +1983,11 @@ Status IrEmitter::HandleSend(HloInstruction* send) { return Unimplemented("Send is not implemented on CPU. See b/33942983."); } +Status IrEmitter::HandleSendDone(HloInstruction* send_done) { + // TODO(b/33942983): Support Send/Recv on CPU. + return Unimplemented("Send-done is not implemented on CPU. See b/33942983."); +} + Status IrEmitter::HandleSlice(HloInstruction* slice) { VLOG(2) << "HandleSlice: " << slice->ToString(); auto operand = slice->operand(0); @@ -2148,6 +2153,11 @@ Status IrEmitter::HandleRecv(HloInstruction* recv) { return Unimplemented("Recv is not implemented on CPU. See b/33942983."); } +Status IrEmitter::HandleRecvDone(HloInstruction* recv_done) { + // TODO(b/33942983): Support Send/Recv on CPU. + return Unimplemented("Recv-done is not implemented on CPU. See b/33942983."); +} + Status IrEmitter::HandlePad(HloInstruction* pad) { // CPU backend does not properly handle negative padding but this is ok // because negative padding should be removed by the algebraic simplifier. diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h index 5d061e11e3..83eded5ad8 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h @@ -171,11 +171,13 @@ class IrEmitter : public DfsHloVisitorWithDefault { Status HandleReduceWindow(HloInstruction* reduce_window) override; Status HandleSelectAndScatter(HloInstruction* select_and_scatter) override; Status HandleSend(HloInstruction* send) override; + Status HandleSendDone(HloInstruction* send_done) override; Status HandleSlice(HloInstruction* slice) override; Status HandleDynamicSlice(HloInstruction* dynamic_slice) override; Status HandleDynamicUpdateSlice( HloInstruction* dynamic_update_slice) override; Status HandleRecv(HloInstruction* recv) override; + Status HandleRecvDone(HloInstruction* recv_done) override; Status HandlePad(HloInstruction* pad) override; Status HandleTuple(HloInstruction* tuple) override; Status HandleMap(HloInstruction* map) override; diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h index de3cd15440..bc73839a88 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h @@ -211,9 +211,11 @@ class DfsHloVisitorBase { virtual Status HandlePad(HloInstructionPtr hlo) = 0; - virtual Status HandleSend(HloInstructionPtr hlo) = 0; + virtual Status HandleSend(HloInstructionPtr send) = 0; + virtual Status HandleSendDone(HloInstructionPtr send_done) = 0; - virtual Status HandleRecv(HloInstructionPtr hlo) = 0; + virtual Status HandleRecv(HloInstructionPtr recv) = 0; + virtual Status HandleRecvDone(HloInstructionPtr recv_done) = 0; virtual Status HandleBatchNormTraining(HloInstructionPtr hlo) = 0; diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h index 7ce88be89d..5415bab5b3 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h @@ -167,11 +167,17 @@ class DfsHloVisitorWithDefaultBase Status HandleWhile(HloInstructionPtr xla_while) override { return DefaultAction(xla_while); } + Status HandleRecv(HloInstructionPtr recv) override { + return DefaultAction(recv); + } + Status HandleRecvDone(HloInstructionPtr recv_done) override { + return DefaultAction(recv_done); + } Status HandleSend(HloInstructionPtr send) override { return DefaultAction(send); } - Status HandleRecv(HloInstructionPtr recv) override { - return DefaultAction(recv); + Status HandleSendDone(HloInstructionPtr send_done) override { + return DefaultAction(send_done); } // Invoked to inform the visitor that the traversal has completed, and that diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc index 57a3f713e3..9d55c7859d 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc @@ -128,10 +128,18 @@ Status IrEmitter::HandleSend(HloInstruction*) { return Unimplemented("Send is not implemented on GPU"); } +Status IrEmitter::HandleSendDone(HloInstruction*) { + return Unimplemented("Send-Done is not implemented on GPU"); +} + Status IrEmitter::HandleRecv(HloInstruction*) { return Unimplemented("Recv is not implemented on GPU"); } +Status IrEmitter::HandleRecvDone(HloInstruction*) { + return Unimplemented("Recv-done is not implemented on GPU"); +} + Status IrEmitter::HandleTuple(HloInstruction* tuple) { std::vector base_ptrs; for (const HloInstruction* operand : tuple->operands()) { diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.h b/tensorflow/compiler/xla/service/gpu/ir_emitter.h index 263992d925..61fdeaa0ee 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.h @@ -84,7 +84,9 @@ class IrEmitter : public DfsHloVisitorWithDefault { Status HandleOutfeed(HloInstruction* outfeed) override; Status HandleSort(HloInstruction* sort) override; Status HandleSend(HloInstruction* send) override; + Status HandleSendDone(HloInstruction* send_done) override; Status HandleRecv(HloInstruction* recv) override; + Status HandleRecvDone(HloInstruction* recv_done) override; Status HandleParameter(HloInstruction* parameter) override; Status HandleReduce(HloInstruction* reduce) override; Status HandleTuple(HloInstruction* tuple) override; diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc index 17ba2b673a..1877065f67 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc @@ -337,10 +337,18 @@ Status HloCostAnalysis::HandleSend(const HloInstruction*) { return Status::OK(); } +Status HloCostAnalysis::HandleSendDone(const HloInstruction*) { + return Status::OK(); +} + Status HloCostAnalysis::HandleRecv(const HloInstruction*) { return Status::OK(); } +Status HloCostAnalysis::HandleRecvDone(const HloInstruction*) { + return Status::OK(); +} + Status HloCostAnalysis::HandleReshape(const HloInstruction*) { return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.h b/tensorflow/compiler/xla/service/hlo_cost_analysis.h index 8074868e37..0f44775378 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.h +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.h @@ -60,7 +60,9 @@ class HloCostAnalysis : public ConstDfsHloVisitor { Status HandleReducePrecision(const HloInstruction* hlo) override; Status HandleConcatenate(const HloInstruction* concatenate) override; Status HandleSend(const HloInstruction* send) override; + Status HandleSendDone(const HloInstruction* send_done) override; Status HandleRecv(const HloInstruction* recv) override; + Status HandleRecvDone(const HloInstruction* recv_done) override; Status HandleConvert(const HloInstruction* convert) override; Status HandleCopy(const HloInstruction* copy) override; Status HandleDot(const HloInstruction* dot) override; diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc index 92261bce62..ff80f18bb5 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc @@ -242,6 +242,51 @@ bool HloDataflowAnalysis::UpdateBitcastValueSet(HloInstruction* bitcast) { return false; } +bool HloDataflowAnalysis::UpdateSendValueSet(HloInstruction* send) { + CHECK_EQ(send->opcode(), HloOpcode::kSend); + bool changed = false; + // Send forwards the operand value to the output tuple at {0}. + for (auto& pair : GetInstructionValueSet(send->operand(0))) { + const ShapeIndex& operand_index = pair.first; + const HloValueSet& operand_value_set = pair.second; + + ShapeIndex index = {0}; + for (int64 i : operand_index) { + index.push_back(i); + } + + HloValueSet& value_set = GetValueSet(send, index); + if (value_set != operand_value_set) { + value_set = operand_value_set; + changed = true; + } + } + return changed; +} + +bool HloDataflowAnalysis::UpdateRecvDoneValueSet(HloInstruction* recv_done) { + CHECK_EQ(recv_done->opcode(), HloOpcode::kRecvDone); + bool changed = false; + // RecvDone forwards the operand value at {0} to the output. + for (auto& pair : GetInstructionValueSet(recv_done)) { + ShapeIndex& index = pair.first; + HloValueSet& value_set = pair.second; + + ShapeIndex operand_index = {0}; + for (int64 i : index) { + operand_index.push_back(i); + } + + const HloValueSet& operand_value_set = + GetValueSet(recv_done->operand(0), operand_index); + if (value_set != operand_value_set) { + value_set = operand_value_set; + changed = true; + } + } + return changed; +} + bool HloDataflowAnalysis::UpdateCallValueSet(HloInstruction* call) { CHECK_EQ(call->opcode(), HloOpcode::kCall); InstructionValueSet& value_set = GetInstructionValueSet(call); @@ -429,6 +474,10 @@ bool HloDataflowAnalysis::UpdateInstructionValueSet( return UpdateCallValueSet(instruction); case HloOpcode::kWhile: return UpdateWhileValueSet(instruction); + case HloOpcode::kSend: + return UpdateSendValueSet(instruction); + case HloOpcode::kRecvDone: + return UpdateRecvDoneValueSet(instruction); default: // Instruction does not forward HloValues (it defines all values in its // output). No update is necessary. @@ -537,6 +586,12 @@ Status HloDataflowAnalysis::InitializeInstructionValueSets() { GetValueSet(instruction, /*index=*/{}).AddValue(value); }; + // Lambda to set the value set at the given index of the output. + auto define_value_at = [this, &instruction](const ShapeIndex& index) { + HloValue* value = NewHloValue(instruction, index, /*is_phi=*/false); + GetValueSet(instruction, index).AddValue(value); + }; + switch (instruction->opcode()) { case HloOpcode::kBitcast: if (bitcast_defines_value_) { @@ -577,6 +632,16 @@ Status HloDataflowAnalysis::InitializeInstructionValueSets() { // values flow from their operands. define_top_level_only(); break; + case HloOpcode::kRecvDone: + // RecvDone aliases its input tuple element {0}, therefore does not + // define any values. + break; + case HloOpcode::kSend: + // Send produces a tuple of {aliased operand, U32 context}, therefore + // only defines the top-level tuple and the tuple element at {1}. + define_value_at(/*index=*/{}); + define_value_at(/*index=*/{1}); + break; default: define_all_values(); break; diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h index 207e553bf7..63467f3206 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h @@ -146,7 +146,9 @@ class HloDataflowAnalysis { bool UpdateCopyValueSet(HloInstruction* copy); bool UpdateGetTupleElementValueSet(HloInstruction* gte); bool UpdateParameterValueSet(HloInstruction* parameter); + bool UpdateRecvDoneValueSet(HloInstruction* recv_done); bool UpdateSelectValueSet(HloInstruction* select); + bool UpdateSendValueSet(HloInstruction* send); bool UpdateTupleValueSet(HloInstruction* tuple); bool UpdateWhileValueSet(HloInstruction* xla_while); diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc index 4b8eb237a6..66a538fc51 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc @@ -1139,6 +1139,54 @@ TEST_P(HloDataflowAnalysisTest, TupleCopy) { analysis.GetValueDefinedAt(copy, /*index=*/{}).live_out_of_module()); } +TEST_P(HloDataflowAnalysisTest, SendAndSendDone) { + // Test that a Send forwards its operand to the output tuple at {0}. + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape_, "param0")); + auto send = builder.AddInstruction( + HloInstruction::CreateSend(param, /*channel_id=*/0)); + auto send_done = builder.AddInstruction(HloInstruction::CreateSendDone(send)); + module_->AddEntryComputation(builder.Build()); + + bool ssa_form = GetParam(); + const HloDataflowAnalysis& analysis = RunAnalysis(ssa_form); + + EXPECT_EQ(analysis.values().size(), 4); + + EXPECT_TRUE(analysis.ValueIsDefinedAt(param)); + EXPECT_TRUE(analysis.ValueIsDefinedAt(send, /*index=*/{})); + EXPECT_FALSE(analysis.ValueIsDefinedAt(send, /*index=*/{0})); + EXPECT_TRUE(analysis.ValueIsDefinedAt(send, /*index=*/{1})); + EXPECT_TRUE(analysis.ValueIsDefinedAt(send_done)); + EXPECT_THAT(HloValuesAt(send, /*index=*/{0}), + UnorderedElementsAre(analysis.GetValueDefinedAt(param))); +} + +TEST_P(HloDataflowAnalysisTest, RecvAndRecvDone) { + // Test that a RecvDone forwards its operand tuple element at {0} to the + // output. + auto builder = HloComputation::Builder(TestName()); + auto recv = builder.AddInstruction( + HloInstruction::CreateRecv(scalar_shape_, /*channel_id=*/0)); + auto recv_done = builder.AddInstruction(HloInstruction::CreateRecvDone(recv)); + module_->AddEntryComputation(builder.Build()); + + bool ssa_form = GetParam(); + const HloDataflowAnalysis& analysis = RunAnalysis(ssa_form); + + EXPECT_EQ(analysis.values().size(), 3); + + EXPECT_TRUE(analysis.ValueIsDefinedAt(recv, /*index=*/{})); + EXPECT_TRUE(analysis.ValueIsDefinedAt(recv, /*index=*/{0})); + EXPECT_TRUE(analysis.ValueIsDefinedAt(recv, /*index=*/{1})); + EXPECT_FALSE(analysis.ValueIsDefinedAt(recv_done)); + EXPECT_THAT(HloValuesAt(recv_done), + UnorderedElementsAre(analysis.GetValueDefinedAt(recv, {0}))); + EXPECT_TRUE( + analysis.GetValueDefinedAt(recv, /*index=*/{0}).live_out_of_module()); +} + TEST_P(HloDataflowAnalysisTest, ElementwiseChainInterference) { // A simple chain of elementwise operations. No values should interfere. // diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 1c063c973d..67e0238c4a 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -943,7 +943,9 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) { case HloOpcode::kFusion: return kGray; case HloOpcode::kSend: + case HloOpcode::kSendDone: case HloOpcode::kRecv: + case HloOpcode::kRecvDone: case HloOpcode::kInfeed: case HloOpcode::kOutfeed: case HloOpcode::kCrossReplicaSum: @@ -1037,7 +1039,9 @@ string HloDotDumper::GetInstructionNodeExtraInfo(const HloInstruction* instr) { ? "" : StrCat("stride=", VectorString(instr->slice_strides())); case HloOpcode::kSend: + case HloOpcode::kSendDone: case HloOpcode::kRecv: + case HloOpcode::kRecvDone: return StrCat("channel_id=", instr->channel_id()); default: return ""; diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index ee98c3fabc..ffb933155f 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -371,20 +371,50 @@ HloInstruction::CreateCrossReplicaSum(const Shape& shape, /* static */ std::unique_ptr HloInstruction::CreateSend( HloInstruction* operand, int64 channel_id) { + // Send instruction produces a tuple of {aliased operand, U32 context}. + Shape output_shape = ShapeUtil::MakeTupleShape( + {operand->shape(), ShapeUtil::MakeShape(U32, {})}); auto instruction = - WrapUnique(new HloInstruction(HloOpcode::kSend, ShapeUtil::MakeNil())); + WrapUnique(new HloInstruction(HloOpcode::kSend, output_shape)); instruction->AppendOperand(operand); instruction->channel_id_ = channel_id; return instruction; } +/* static */ std::unique_ptr HloInstruction::CreateSendDone( + HloInstruction* operand) { + CHECK(operand->opcode() == HloOpcode::kSend) + << "SendDone must take the context operand from Send"; + auto instruction = WrapUnique( + new HloInstruction(HloOpcode::kSendDone, ShapeUtil::MakeNil())); + instruction->AppendOperand(operand); + instruction->channel_id_ = operand->channel_id(); + return instruction; +} + /* static */ std::unique_ptr HloInstruction::CreateRecv( const Shape& shape, int64 channel_id) { - auto instruction = WrapUnique(new HloInstruction(HloOpcode::kRecv, shape)); + // Recv instruction produces a tuple of {receive buffer, U32 context}. + Shape output_shape = + ShapeUtil::MakeTupleShape({shape, ShapeUtil::MakeShape(U32, {})}); + auto instruction = + WrapUnique(new HloInstruction(HloOpcode::kRecv, output_shape)); instruction->channel_id_ = channel_id; return instruction; } +/* static */ std::unique_ptr HloInstruction::CreateRecvDone( + HloInstruction* operand) { + CHECK(operand->opcode() == HloOpcode::kRecv) + << "RecvDone must take the context operand from Recv"; + Shape output_shape = ShapeUtil::GetTupleElementShape(operand->shape(), 0); + auto instruction = + WrapUnique(new HloInstruction(HloOpcode::kRecvDone, output_shape)); + instruction->AppendOperand(operand); + instruction->channel_id_ = operand->channel_id(); + return instruction; +} + /* static */ std::unique_ptr HloInstruction::CreateReverse( const Shape& shape, HloInstruction* operand, tensorflow::gtl::ArraySlice dimensions) { @@ -908,7 +938,9 @@ RandomDistribution HloInstruction::random_distribution() const { bool HloInstruction::HasSideEffect() const { switch (opcode_) { case HloOpcode::kSend: + case HloOpcode::kSendDone: case HloOpcode::kRecv: + case HloOpcode::kRecvDone: case HloOpcode::kInfeed: case HloOpcode::kOutfeed: case HloOpcode::kTrace: @@ -1164,7 +1196,9 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( new_operands[4], epsilon(), feature_index()); break; case HloOpcode::kRecv: + case HloOpcode::kRecvDone: case HloOpcode::kSend: + case HloOpcode::kSendDone: case HloOpcode::kTrace: LOG(FATAL) << "Not yet implemented, clone: " << HloOpcodeString(opcode_); } @@ -1557,8 +1591,10 @@ bool HloInstruction::IdenticalSlowPath( case HloOpcode::kInfeed: case HloOpcode::kOutfeed: case HloOpcode::kSort: - case HloOpcode::kSend: case HloOpcode::kRecv: + case HloOpcode::kRecvDone: + case HloOpcode::kSend: + case HloOpcode::kSendDone: return false; } } @@ -1891,7 +1927,8 @@ std::vector HloInstruction::ExtraAttributesToString() const { }))); } - if (opcode() == HloOpcode::kSend || opcode() == HloOpcode::kRecv) { + if (opcode() == HloOpcode::kSend || opcode() == HloOpcode::kRecv || + opcode() == HloOpcode::kSendDone || opcode() == HloOpcode::kRecvDone) { extra.push_back(StrCat("channel_id=", channel_id_)); } @@ -2071,8 +2108,10 @@ bool HloInstruction::IsFusable() const { case HloOpcode::kOutfeed: case HloOpcode::kParameter: case HloOpcode::kTrace: - case HloOpcode::kSend: case HloOpcode::kRecv: + case HloOpcode::kRecvDone: + case HloOpcode::kSend: + case HloOpcode::kSendDone: return false; // Only fuse Rng if it is used once, otherwise the random numbers generated // will be different in each fusion. If it is the root (user count = 0) @@ -2279,10 +2318,14 @@ Status HloInstruction::Visit(DfsHloVisitorBase* visitor) { return visitor->HandleCall(this); case HloOpcode::kCustomCall: return visitor->HandleCustomCall(this); - case HloOpcode::kSend: - return visitor->HandleSend(this); case HloOpcode::kRecv: return visitor->HandleRecv(this); + case HloOpcode::kRecvDone: + return visitor->HandleRecvDone(this); + case HloOpcode::kSend: + return visitor->HandleSend(this); + case HloOpcode::kSendDone: + return visitor->HandleSendDone(this); // These opcodes are not handled here. case HloOpcode::kTrace: diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 4d8fe6bc10..974d43d89e 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -181,18 +181,28 @@ class HloInstruction { const Shape& shape, HloInstruction* operand, tensorflow::StringPiece outfeed_config); - // Creates a send instruction with the given channel id, which sends the - // operand data to a unique receive instruction in another computation that - // has the same channel id. + // Creates an asynchronous send instruction with the given channel id, which + // initiates sending the operand data to a unique receive instruction in + // another computation that has the same channel id. static std::unique_ptr CreateSend(HloInstruction* operand, int64 channel_id); - // Creates a receive instruction with the given channel id, which receives - // data of the given shape from a unique send instruction in another - // computation that has the same channel id. + // Blocks until data transfer for the Send instruction (operand) is complete. + // The operand must be kSend. + static std::unique_ptr CreateSendDone( + HloInstruction* operand); + + // Creates an asynchronous receive instruction with the given channel id, + // which allocates resources to receive data of the given shape from a unique + // send instruction in another computation that has the same channel id. static std::unique_ptr CreateRecv(const Shape& shape, int64 channel_id); + // Blocks until data transfer for the Recv instruction (operand) is complete + // and returns the receive buffer. The operand must be kRecv. + static std::unique_ptr CreateRecvDone( + HloInstruction* operand); + // Creates a slice instruction, where the operand is sliced by the given // start/limit indices. static std::unique_ptr CreateSlice( diff --git a/tensorflow/compiler/xla/service/hlo_matchers.h b/tensorflow/compiler/xla/service/hlo_matchers.h index 4d4010b025..268fa0f632 100644 --- a/tensorflow/compiler/xla/service/hlo_matchers.h +++ b/tensorflow/compiler/xla/service/hlo_matchers.h @@ -121,6 +121,7 @@ HLO_MATCHER(Outfeed); HLO_MATCHER(Pad); HLO_MATCHER(Power); HLO_MATCHER(Recv); +HLO_MATCHER(RecvDone); HLO_MATCHER(Reduce); HLO_MATCHER(ReducePrecision); HLO_MATCHER(ReduceWindow); @@ -131,6 +132,7 @@ HLO_MATCHER(Rng); HLO_MATCHER(Select); HLO_MATCHER(SelectAndScatter); HLO_MATCHER(Send); +HLO_MATCHER(SendDone); HLO_MATCHER(ShiftLeft); HLO_MATCHER(ShiftRightLogical); HLO_MATCHER(ShiftRightArithmetic); diff --git a/tensorflow/compiler/xla/service/hlo_opcode.h b/tensorflow/compiler/xla/service/hlo_opcode.h index d68fc20321..e0d02e0665 100644 --- a/tensorflow/compiler/xla/service/hlo_opcode.h +++ b/tensorflow/compiler/xla/service/hlo_opcode.h @@ -97,6 +97,7 @@ namespace xla { V(kPower, "power") \ V(kReal, "real") \ V(kRecv, "recv") \ + V(kRecvDone, "recv-done") \ V(kReduce, "reduce") \ V(kReducePrecision, "reduce-precision") \ V(kReduceWindow, "reduce-window") \ @@ -108,6 +109,7 @@ namespace xla { V(kSelect, "select") \ V(kSelectAndScatter, "select-and-scatter") \ V(kSend, "send") \ + V(kSendDone, "send-done") \ V(kShiftLeft, "shift-left") \ V(kShiftRightArithmetic, "shift-right-arithmetic") \ V(kShiftRightLogical, "shift-right-logical") \ diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc index c96df50e79..828be8490c 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc @@ -66,7 +66,9 @@ bool IsRematerializable(const HloInstruction* instruction) { case HloOpcode::kInfeed: case HloOpcode::kParameter: case HloOpcode::kRecv: + case HloOpcode::kRecvDone: case HloOpcode::kSend: + case HloOpcode::kSendDone: case HloOpcode::kTrace: case HloOpcode::kWhile: return false; diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index c1aa655401..c938450891 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -270,12 +270,40 @@ class ShapeVerifier : public DfsHloVisitor { pad->padding_config())); } - Status HandleSend(HloInstruction*) override { - return tensorflow::Status::OK(); + Status HandleSend(HloInstruction* send) override { + TF_RET_CHECK(send->users().size() == 1); + const HloInstruction* send_done = send->users()[0]; + TF_RET_CHECK(send_done->opcode() == HloOpcode::kSendDone); + TF_RETURN_IF_ERROR(CheckSameChannel(send, send_done)); + return CheckShape( + send, ShapeUtil::MakeTupleShape( + {send->operand(0)->shape(), ShapeUtil::MakeShape(U32, {})})); } - Status HandleRecv(HloInstruction*) override { - return tensorflow::Status::OK(); + Status HandleSendDone(HloInstruction* send_done) override { + TF_RET_CHECK(send_done->operands().size() == 1); + const HloInstruction* send = send_done->operand(0); + TF_RET_CHECK(send->opcode() == HloOpcode::kSend); + TF_RETURN_IF_ERROR(CheckSameChannel(send, send_done)); + return CheckShape(send_done, ShapeUtil::MakeNil()); + } + + Status HandleRecv(HloInstruction* recv) override { + TF_RET_CHECK(recv->users().size() == 1); + const HloInstruction* recv_done = recv->users()[0]; + TF_RET_CHECK(recv_done->opcode() == HloOpcode::kRecvDone); + TF_RETURN_IF_ERROR(CheckSameChannel(recv, recv_done)); + return CheckShape(recv, + ShapeUtil::MakeTupleShape( + {recv_done->shape(), ShapeUtil::MakeShape(U32, {})})); + } + + Status HandleRecvDone(HloInstruction* recv_done) override { + TF_RET_CHECK(recv_done->operands().size() == 1); + const HloInstruction* recv = recv_done->operand(0); + TF_RET_CHECK(recv->opcode() == HloOpcode::kRecv); + TF_RETURN_IF_ERROR(CheckSameChannel(recv, recv_done)); + return CheckShape(recv_done, recv->shape().tuple_shapes(0)); } Status HandleBatchNormTraining(HloInstruction* batch_norm_training) override { @@ -365,6 +393,19 @@ class ShapeVerifier : public DfsHloVisitor { instruction->opcode(), instruction->operands())); } + // Checks if the given two instructions shares the same channel id. + Status CheckSameChannel(const HloInstruction* instr1, + const HloInstruction* instr2) { + if (instr1->channel_id() != instr2->channel_id()) { + return FailedPrecondition( + "Expected to have the same channel id, actual channel ids are: %s " + "(%lld), %s (%lld)", + instr1->ToString().c_str(), instr1->channel_id(), + instr2->ToString().c_str(), instr2->channel_id()); + } + return tensorflow::Status::OK(); + } + // Returns the size of a Shape in bytes. const std::function shape_size_fn_; }; diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc index 0d1b7bc109..dea47b1fd7 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion.cc @@ -113,7 +113,9 @@ namespace xla { case HloOpcode::kTrace: case HloOpcode::kWhile: case HloOpcode::kSend: + case HloOpcode::kSendDone: case HloOpcode::kRecv: + case HloOpcode::kRecvDone: return true; } diff --git a/tensorflow/compiler/xla/service/logical_buffer_analysis.cc b/tensorflow/compiler/xla/service/logical_buffer_analysis.cc index b92017c6cb..02dc49e78c 100644 --- a/tensorflow/compiler/xla/service/logical_buffer_analysis.cc +++ b/tensorflow/compiler/xla/service/logical_buffer_analysis.cc @@ -104,6 +104,21 @@ Status LogicalBufferAnalysis::HandleBitcast(HloInstruction*) { return Status::OK(); } +Status LogicalBufferAnalysis::HandleRecvDone(HloInstruction*) { + // RecvDone doesn't create a new buffer but rather aliases its input (Recv) + // tuple element at {0} to its output. + return Status::OK(); +} + +Status LogicalBufferAnalysis::HandleSend(HloInstruction* send) { + // Send creates new buffers for the top-level tuple and the context (tuple + // element at {1}). Tuple element at {0} is an alias of the Send operand, so + // we don't need to create a new Logical Buffer for that. + NewLogicalBuffer(send, /*index=*/{}); + NewLogicalBuffer(send, /*index=*/{1}); + return Status::OK(); +} + Status LogicalBufferAnalysis::HandleTuple(HloInstruction* tuple) { // A Tuple instruction only creates the top-level buffer. NewLogicalBuffer(tuple, /*index=*/{}); diff --git a/tensorflow/compiler/xla/service/logical_buffer_analysis.h b/tensorflow/compiler/xla/service/logical_buffer_analysis.h index a82e83ec5c..598d08b720 100644 --- a/tensorflow/compiler/xla/service/logical_buffer_analysis.h +++ b/tensorflow/compiler/xla/service/logical_buffer_analysis.h @@ -60,6 +60,8 @@ class LogicalBufferAnalysis : public DfsHloVisitorWithDefault { Status HandleGetTupleElement(HloInstruction* get_tuple_element) override; Status HandleBitcast(HloInstruction* bitcast) override; Status HandleCopy(HloInstruction* copy) override; + Status HandleRecvDone(HloInstruction* recv_done) override; + Status HandleSend(HloInstruction* send) override; Status HandleSelect(HloInstruction* select) override; // A map from the buffer ID to the logical buffer diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc index df537bd7c1..a1f9451dd4 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc @@ -253,6 +253,64 @@ Status TuplePointsToAnalysis::HandleBitcast(HloInstruction* bitcast) { return Status::OK(); } +Status TuplePointsToAnalysis::HandleRecvDone(HloInstruction* recv_done) { + // RecvDone aliases its input (Recv) tuple element {0} to its output. + PointsToSet& points_to_set = CreateEmptyPointsToSet(recv_done); + const PointsToSet& operand_points_to_set = + GetPointsToSet(recv_done->operand(0)); + + // Recursively copy the points to set of the operand tuple {0}. + points_to_set.ForEachMutableElement( + [this, &points_to_set, &operand_points_to_set]( + const ShapeIndex& index, PointsToSet::BufferList* buffers) { + ShapeIndex src_index({0}); + for (auto element : index) { + src_index.push_back(element); + } + *buffers = operand_points_to_set.element(src_index); + for (auto& tuple_source : + operand_points_to_set.tuple_sources(src_index)) { + points_to_set.add_tuple_source(index, tuple_source); + } + }); + return Status::OK(); +} + +Status TuplePointsToAnalysis::HandleSend(HloInstruction* send) { + // Send creates a tuple of {aliased operand, U32 context}. + PointsToSet& points_to_set = CreateEmptyPointsToSet(send); + + // Creates the points to set for the tuple and its element at {1}. + auto top_buffer = points_to_set.mutable_element(ShapeIndex({})); + top_buffer->push_back( + &logical_buffer_analysis_->GetBuffer(send, ShapeIndex({}))); + points_to_set.add_tuple_source({}, send); + + auto context_buffer = points_to_set.mutable_element(ShapeIndex({1})); + context_buffer->push_back( + &logical_buffer_analysis_->GetBuffer(send, ShapeIndex({1}))); + + // Recursively copy the points to set of the operand to output tuple {0}. + const PointsToSet& operand_points_to_set = GetPointsToSet(send->operand(0)); + operand_points_to_set.ForEachElement( + [&points_to_set, &operand_points_to_set]( + const ShapeIndex& src_index, + const PointsToSet::BufferList& points_to) { + ShapeIndex target_index({0}); + for (auto element : src_index) { + target_index.push_back(element); + } + *points_to_set.mutable_element(target_index) = points_to; + + for (HloInstruction* tuple : + operand_points_to_set.tuple_sources(src_index)) { + points_to_set.add_tuple_source(target_index, tuple); + } + }); + + return Status::OK(); +} + Status TuplePointsToAnalysis::HandleTuple(HloInstruction* tuple) { tensorflow::gtl::ArraySlice operands(tuple->operands()); PointsToSet& points_to_set = CreateEmptyPointsToSet(tuple); diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h index e6157a1ed1..8928de107e 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h @@ -251,6 +251,8 @@ class TuplePointsToAnalysis : public DfsHloVisitorWithDefault { Status HandleGetTupleElement(HloInstruction* get_tuple_element) override; Status HandleBitcast(HloInstruction* bitcast) override; Status HandleCopy(HloInstruction* copy) override; + Status HandleRecvDone(HloInstruction* recv_done) override; + Status HandleSend(HloInstruction* send) override; Status HandleSelect(HloInstruction* select) override; string ToString() const; diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc index 694ed57fa2..dec446d4da 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc @@ -313,6 +313,51 @@ TEST_F(TuplePointsToAnalysisTest, TupleCopy) { {constant1, constant2, copy}); } +TEST_F(TuplePointsToAnalysisTest, SendAndSendDone) { + // Send forwards its operand to the output tuple at {0}. + auto builder = HloComputation::Builder(TestName()); + auto constant = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(1.0))); + auto send = builder.AddInstruction( + HloInstruction::CreateSend(constant, /*channel_id=*/0)); + auto send_done = builder.AddInstruction(HloInstruction::CreateSendDone(send)); + + BuildModuleAndRunAnalysis(builder.Build()); + + EXPECT_FALSE(points_to_analysis_->GetPointsToSet(send).IsAmbiguous()); + EXPECT_TRUE(points_to_analysis_->GetPointsToSet(send).IsDistinct()); + EXPECT_FALSE(points_to_analysis_->GetPointsToSet(send_done).IsAmbiguous()); + EXPECT_TRUE(points_to_analysis_->GetPointsToSet(send_done).IsDistinct()); + + ExpectHasTopLevelBuffers( + points_to_analysis_->GetPointsToSet(send).element({}), {send}); + ExpectHasTopLevelBuffers( + points_to_analysis_->GetPointsToSet(send).element({0}), {constant}); + ExpectHasTopLevelBuffers( + points_to_analysis_->GetPointsToSet(send_done).CreateFlattenedSet(), + {send_done}); + ExpectHasBufferAliases(constant, {}, {{constant, {}}, {send, {0}}}); +} + +TEST_F(TuplePointsToAnalysisTest, RecvAndRecvDone) { + // RecvDone forwards its operand tuple element at {0} to the output. + auto builder = HloComputation::Builder(TestName()); + auto recv = builder.AddInstruction(HloInstruction::CreateRecv( + ShapeUtil::MakeShape(F32, {1, 2, 3}), /*channel_id=*/0)); + auto recv_done = builder.AddInstruction(HloInstruction::CreateRecvDone(recv)); + + BuildModuleAndRunAnalysis(builder.Build()); + + EXPECT_FALSE(points_to_analysis_->GetPointsToSet(recv).IsAmbiguous()); + EXPECT_TRUE(points_to_analysis_->GetPointsToSet(recv).IsDistinct()); + EXPECT_FALSE(points_to_analysis_->GetPointsToSet(recv_done).IsAmbiguous()); + EXPECT_TRUE(points_to_analysis_->GetPointsToSet(recv_done).IsDistinct()); + + ExpectHasTopLevelBuffers( + points_to_analysis_->GetPointsToSet(recv).element({}), {recv}); + ExpectHasBufferAliases(recv, {0}, {{recv, {0}}, {recv_done, {}}}); +} + TEST_F(TuplePointsToAnalysisTest, TupleSelect) { // Select from two different tuples. This should create an ambiguous points to // set containing the union of both sides. diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc index e9d182509b..8d5bb08e51 100644 --- a/tensorflow/compiler/xla/service/user_computation.cc +++ b/tensorflow/compiler/xla/service/user_computation.cc @@ -2927,8 +2927,9 @@ void ComputationLowerer::Visit( case OpRequest::kRecvRequest: { const RecvRequest& recv_request = request.request().recv_request(); - hlo_instruction = add_instruction(HloInstruction::CreateRecv( + HloInstruction* recv = add_instruction(HloInstruction::CreateRecv( request.output_shape(), recv_request.channel_handle().handle())); + hlo_instruction = add_instruction(HloInstruction::CreateRecvDone(recv)); break; } @@ -3120,8 +3121,9 @@ void ComputationLowerer::Visit( case OpRequest::kSendRequest: { const SendRequest& send_request = request.request().send_request(); HloInstruction* operand = lookup_instruction(send_request.operand()); - hlo_instruction = add_instruction(HloInstruction::CreateSend( + HloInstruction* send = add_instruction(HloInstruction::CreateSend( operand, send_request.channel_handle().handle())); + hlo_instruction = add_instruction(HloInstruction::CreateSendDone(send)); break; } diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc index 65734f91bc..2fac914892 100644 --- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc +++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc @@ -58,7 +58,9 @@ static bool ContainsSendOrRecv(const HloComputation* comp) { static bool IsOrContainsSendOrRecv(const HloInstruction* instr) { if (instr->opcode() == HloOpcode::kSend || - instr->opcode() == HloOpcode::kRecv) { + instr->opcode() == HloOpcode::kSendDone || + instr->opcode() == HloOpcode::kRecv || + instr->opcode() == HloOpcode::kRecvDone) { return true; } for (const auto& subcomp : instr->called_computations()) { diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc index 8e1a2dcde1..d99b31dc00 100644 --- a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc @@ -144,10 +144,11 @@ TEST_F(WhileLoopSimplifierTest, NotRemovedIfContainsSend) { auto* while_op = computation->root_instruction(); ASSERT_EQ(while_op->opcode(), HloOpcode::kWhile); auto* while_body = while_op->while_body(); - while_body->AddInstruction(HloInstruction::CreateSend( + auto* send = while_body->AddInstruction(HloInstruction::CreateSend( while_body->AddInstruction( HloInstruction::CreateConstant(Literal::CreateR0(true))), /*channel_id=*/0)); + while_body->AddInstruction(HloInstruction::CreateSendDone(send)); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } @@ -156,9 +157,10 @@ TEST_F(WhileLoopSimplifierTest, NotRemovedIfContainsRecv) { auto* while_op = computation->root_instruction(); ASSERT_EQ(while_op->opcode(), HloOpcode::kWhile); auto* while_body = while_op->while_body(); - while_body->AddInstruction( + auto* recv = while_body->AddInstruction( HloInstruction::CreateRecv(ShapeUtil::MakeShape(F32, {1}), /*channel_id=*/0)); + while_body->AddInstruction(HloInstruction::CreateRecvDone(recv)); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index df07e069a0..3741c3daac 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -442,7 +442,21 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, return false; } instruction = builder->AddInstruction( - HloInstruction::CreateRecv(shape, *channel_id)); + HloInstruction::CreateRecv(shape.tuple_shapes(0), *channel_id)); + break; + } + case HloOpcode::kRecvDone: { + optional channel_id; + attrs["channel_id"] = {/*required=*/true, AttrTy::kInt64, &channel_id}; + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { + return false; + } + if (channel_id != operands[0]->channel_id()) { + return false; + } + instruction = + builder->AddInstruction(HloInstruction::CreateRecvDone(operands[0])); break; } case HloOpcode::kSend: { @@ -456,6 +470,20 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, HloInstruction::CreateSend(operands[0], *channel_id)); break; } + case HloOpcode::kSendDone: { + optional channel_id; + attrs["channel_id"] = {/*required=*/true, AttrTy::kInt64, &channel_id}; + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { + return false; + } + if (channel_id != operands[0]->channel_id()) { + return false; + } + instruction = + builder->AddInstruction(HloInstruction::CreateSendDone(operands[0])); + break; + } case HloOpcode::kGetTupleElement: { optional index; attrs["index"] = {/*required=*/true, AttrTy::kInt64, &index}; diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index a9dc360978..ca476a4bb7 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -226,9 +226,11 @@ ENTRY %WhileWithScalarS32Result.v2 () -> s32[] { R"(HloModule TwoSendRecvBothWayRecvFist_module: ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { - %recv = f32[] recv(), channel_id=15, sharding={maximal device=1} - ROOT %constant = f32[] constant(2.1), sharding={maximal device=0} - %send = () send(f32[] %constant), channel_id=16, sharding={maximal device=0}, control-predecessors={%recv} + %recv = (f32[], u32[]) recv(), channel_id=15, sharding={maximal device=1} + ROOT %recv-done = f32[] recv-done((f32[], u32[]) %recv), channel_id=15, sharding={maximal device=1} + %constant = f32[] constant(2.1), sharding={maximal device=0} + %send = (f32[], u32[]) send(f32[] %constant), channel_id=16, sharding={maximal device=0}, control-predecessors={%recv} + %send-done = () send-done((f32[], u32[]) %send), channel_id=16, sharding={maximal device=0} } )" @@ -522,9 +524,11 @@ TEST_F(HloParserTest, UnexpectedAttribute) { const string original = R"(HloModule unexpected_attr_module: ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { - %recv = f32[] recv(), channel_id=15 + %recv = (f32[], u32[]) recv(), channel_id=15 + %recv-done = f32[] recv-done((f32[], u32[]) %recv), channel_id=15 ROOT %constant = f32[] constant(2.1) - %send = () send(f32[] %constant), channel_id=16, calls=%recv + %send = (f32[], u32[]) send(f32[] %constant), channel_id=16, calls=%recv + %send-done = () send-done((f32[], u32[]) %send), channel_id=16 } )"; @@ -536,9 +540,11 @@ TEST_F(HloParserTest, MissingAttribute) { const string original = R"(HloModule missing_attr_module: ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { - %recv = f32[] recv(), channel_id=15 + %recv = (f32[], u32[]) recv(), channel_id=15 + %recv-done = f32[] recv-done((f32[], u32[]) %recv), channel_id=15 ROOT %constant = f32[] constant(-2.1) - %send = () send(f32[] %constant) + %send = (f32[], u32[]) send(f32[] %constant) + %send-done = () send-done((f32[], u32[]) %send), channel_id=16 } )"; @@ -550,9 +556,11 @@ TEST_F(HloParserTest, PredecessorUndefined) { const string original = R"(HloModule pre_not_found_module: ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { - %recv = f32[] recv(), channel_id=15 + %recv = (f32[], u32[]) recv(), channel_id=15 + %recv-done = f32[] recv-done((f32[], u32[]) %recv), channel_id=15 ROOT %constant = f32[] constant(2.1) - %send = () send(f32[] %constant), channel_id=16, control-predecessors={%done} + %send = (f32[], u32[]) send(f32[] %constant), channel_id=16, control-predecessors={%done} + %send-done = () send-done((f32[], u32[]) %send), channel_id=16 } )"; diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index 3ca3b51a5e..ccced8792e 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -901,6 +901,95 @@ are all 0. Figure below shows examples of different `edge_padding` and +## Recv + +See also +[`ComputationBuilder::Recv`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h). + + `Recv(shape, channel_handle)` + +| Arguments | Type | Semantics | +| ---------------- | --------------- | ------------------------------------ | +| `shape` | `Shape` | shape of the data to receive | +| `channel_handle` | `ChannelHandle` | unique identifier for each send/recv pair | + +Receives data of the given shape from a `Send` instruction in another +computation that shares the same channel handle. Returns a +ComputationDataHandle for the received data. + +The client API of `Recv` operation represents synchronous communication. +However, the instruction is internally decomposed into 2 HLO instructions +(`Recv` and `RecvDone`) to enable asynchronous data transfers. See also +[`HloInstruction::CreateRecv` and `HloInstruction::CreateRecvDone`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/hlo_instruction.h). + +`Recv(const Shape& shape, int64 channel_id)` + +Allocates resources required to receive data from a `Send` instruction with the +same channel_id. Returns a context for the allocated resources, which is used +by a following `RecvDone` instruction to wait for the completion of the data +transfer. The context is a tuple of {receive buffer (shape), request identifier +(U32)} and it can only be used by a `RecvDone` instruction. + + `RecvDone(HloInstruction context)` + +Given a context created by a `Recv` instruction, waits for the data transfer to +complete and returns the received data. + +## Send + +See also +[`ComputationBuilder::Send`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h). + + `Send(operand, channel_handle)` + +| Arguments | Type | Semantics | +| ---------------- | ----------------------- | -------------------------------- | +| `operand` | `ComputationDataHandle` | data to send (array of type T) | +| `channel_handle` | `ChannelHandle` | unique identifier for each send/recv pair | + +Sends the given operand data to a `Recv` instruction in another computation +that shares the same channel handle. Does not return any data. + +Similar to the `Recv` operation, the client API of `Send` operation represents +synchronous communication, and is internally decomposed into 2 HLO instructions +(`Send` and `SendDone`) to enable asynchronous data transfers. See also +[`HloInstruction::CreateSend` and `HloInstruction::CreateSendDone`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/hlo_instruction.h). + +`Send(HloInstruction operand, int64 channel_id)` + +Initiates an asynchronous transfer of the operand to the resources allocated by +the `Recv` instruction with the same channel id. Returns a context, which is +used by a following `SendDone` instruction to wait for the completion of the +data transfer. The context is a tuple of {operand (shape), request identifier +(U32)} and it can only be used by a `SendDone` instruction. + + `SendDone(HloInstruction context)` + +Given a context created by a `Send` instruction, waits for the data transfer to +complete. The instruction does not return any data. + + Scheduling of channel instructions + +The execution order of the 4 instructions for each channel (`Recv`, `RecvDone`, +`Send`, `SendDone`) is as below. + +
+ +
+ +* `Recv` happens before `Send` +* `Send` happens before `RecvDone` +* `Recv` happens before `RecvDone` +* `Send` happens before `SendDone` + +When the backend compilers generate a linear schedule for each computation that +communicates via channel instructions, there must not be cycles across the +computations. For example, below schedules lead to deadlocks. + +
+ +
+ ## Reduce See also -- GitLab From b11a79032856722d0e51ce421455af8a8610d965 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 14:55:09 -0800 Subject: [PATCH 0122/1801] Adds streaming_dynamic_auc to Tensorflow contrib metrics. This metric differs from streaming_auc because it uses every prediction as a threshold rather than linearly spaced fixed thresholds. PiperOrigin-RevId: 175217002 --- tensorflow/contrib/metrics/__init__.py | 2 + .../contrib/metrics/python/ops/metric_ops.py | 149 ++++++++++ .../metrics/python/ops/metric_ops_test.py | 262 ++++++++++++++++-- 3 files changed, 385 insertions(+), 28 deletions(-) diff --git a/tensorflow/contrib/metrics/__init__.py b/tensorflow/contrib/metrics/__init__.py index 302042c4dd..8eed45c4b3 100644 --- a/tensorflow/contrib/metrics/__init__.py +++ b/tensorflow/contrib/metrics/__init__.py @@ -27,6 +27,7 @@ See the @{$python/contrib.metrics} guide. @@streaming_false_negative_rate @@streaming_false_negative_rate_at_thresholds @@streaming_auc +@@streaming_dynamic_auc @@streaming_curve_points @@streaming_recall_at_k @@streaming_mean_absolute_error @@ -88,6 +89,7 @@ from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_auc from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_concat from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_covariance from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_curve_points +from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_dynamic_auc from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_false_negative_rate from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_false_negative_rate_at_thresholds from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_false_negatives diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index 3dd1f1a627..24692ff12f 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -1178,6 +1178,154 @@ def streaming_auc(predictions, name=name) +def _compute_dynamic_auc(labels, predictions, curve='ROC'): + """Computes the apporixmate AUC by a Riemann sum with data-derived thresholds. + + Computes the area under the ROC or PR curve using each prediction as a + threshold. This could be slow for large batches, but has the advantage of not + having its results degrade depending on the distribution of predictions. + + Args: + labels: A `Tensor` of ground truth labels with the same shape as + `predictions` with values of 0 or 1 and type `int64`. + predictions: A 1-D `Tensor` of predictions whose values are `float64`. + curve: The name of the curve to be computed, 'ROC' for the Receiving + Operating Characteristic or 'PR' for the Precision-Recall curve. + + Returns: + A scalar `Tensor` containing the area-under-curve value for the input. + """ + # Count the total number of positive and negative labels in the input. + size = array_ops.size(predictions) + total_positive = math_ops.cast(math_ops.reduce_sum(labels), dtypes.int32) + + def continue_computing_dynamic_auc(): + """Continues dynamic auc computation, entered if labels are not all equal. + + Returns: + A scalar `Tensor` containing the area-under-curve value. + """ + # Sort the predictions descending, and the corresponding labels as well. + ordered_predictions, indices = nn.top_k(predictions, k=size) + ordered_labels = array_ops.gather(labels, indices) + + # Get the counts of the unique ordered predictions. + _, _, counts = array_ops.unique_with_counts(ordered_predictions) + + # Compute the indices of the split points between different predictions. + splits = math_ops.cast( + array_ops.pad(math_ops.cumsum(counts), paddings=[[1, 0]]), dtypes.int32) + + # Count the positives to the left of the split indices. + positives = math_ops.cast( + array_ops.pad(math_ops.cumsum(ordered_labels), paddings=[[1, 0]]), + dtypes.int32) + true_positives = array_ops.gather(positives, splits) + if curve == 'ROC': + # Count the negatives to the left of every split point and the total + # number of negatives for computing the FPR. + false_positives = math_ops.subtract(splits, true_positives) + total_negative = size - total_positive + x_axis_values = math_ops.truediv(false_positives, total_negative) + y_axis_values = math_ops.truediv(true_positives, total_positive) + elif curve == 'PR': + x_axis_values = math_ops.truediv(true_positives, total_positive) + # For conformance, set precision to 1 when the number of positive + # classifications is 0. + y_axis_values = array_ops.where( + math_ops.greater(splits, 0), + math_ops.truediv(true_positives, splits), + array_ops.ones_like(true_positives, dtype=dtypes.float64)) + + # Calculate trapezoid areas. + heights = math_ops.add(y_axis_values[1:], y_axis_values[:-1]) / 2.0 + widths = math_ops.abs( + math_ops.subtract(x_axis_values[1:], x_axis_values[:-1])) + return math_ops.reduce_sum(math_ops.multiply(heights, widths)) + + # If all the labels are the same, AUC isn't well-defined (but raising an + # exception seems excessive) so we return 0, otherwise we finish computing. + return control_flow_ops.cond( + math_ops.logical_or( + math_ops.equal(total_positive, 0), + math_ops.equal(total_positive, size) + ), + true_fn=lambda: array_ops.constant(0, dtypes.float64), + false_fn=continue_computing_dynamic_auc) + + +def streaming_dynamic_auc(labels, + predictions, + curve='ROC', + metrics_collections=(), + updates_collections=(), + name=None): + """Computes the apporixmate AUC by a Riemann sum with data-derived thresholds. + + USAGE NOTE: this approach requires storing all of the predictions and labels + for a single evaluation in memory, so it may not be usable when the evaluation + batch size and/or the number of evaluation steps is very large. + + Computes the area under the ROC or PR curve using each prediction as a + threshold. This has the advantage of being resilient to the distribution of + predictions by aggregating across batches, accumulating labels and predictions + and performing the final calculation using all of the concatenated values. + + Args: + labels: A `Tensor` of ground truth labels with the same shape as `labels` + and with values of 0 or 1 whose values are castable to `int64`. + predictions: A `Tensor` of predictions whose values are castable to + `float64`. Will be flattened into a 1-D `Tensor`. + curve: The name of the curve for which to compute AUC, 'ROC' for the + Receiving Operating Characteristic or 'PR' for the Precision-Recall curve. + metrics_collections: An optional iterable of collections that `auc` should + be added to. + updates_collections: An optional iterable of collections that `update_op` + should be added to. + name: An optional name for the variable_scope that contains the metric + variables. + + Returns: + auc: A scalar `Tensor` containing the current area-under-curve value. + update_op: An operation that concatenates the input labels and predictions + to the accumulated values. + + Raises: + ValueError: If `labels` and `predictions` have mismatched shapes or if + `curve` isn't a recognized curve type. + """ + + if curve not in ['PR', 'ROC']: + raise ValueError('curve must be either ROC or PR, %s unknown' % curve) + + with variable_scope.variable_scope(name, default_name='dynamic_auc'): + labels.get_shape().assert_is_compatible_with(predictions.get_shape()) + predictions = array_ops.reshape( + math_ops.cast(predictions, dtypes.float64), [-1]) + labels = array_ops.reshape(math_ops.cast(labels, dtypes.int64), [-1]) + with ops.control_dependencies([ + check_ops.assert_greater_equal( + labels, + array_ops.zeros_like(labels, dtypes.int64), + message='labels must be 0 or 1, at least one is <0'), + check_ops.assert_less_equal( + labels, + array_ops.ones_like(labels, dtypes.int64), + message='labels must be 0 or 1, at least one is >1') + ]): + preds_accum, update_preds = streaming_concat(predictions, + name='concat_preds') + labels_accum, update_labels = streaming_concat(labels, + name='concat_labels') + update_op = control_flow_ops.group(update_labels, update_preds) + auc = _compute_dynamic_auc(labels_accum, preds_accum, curve=curve) + if updates_collections: + ops.add_to_collections(updates_collections, update_op) + if metrics_collections: + ops.add_to_collections(metrics_collections, auc) + return auc, update_op + + def streaming_precision_recall_at_equal_thresholds(predictions, labels, num_thresholds=None, @@ -3285,6 +3433,7 @@ __all__ = [ 'streaming_accuracy', 'streaming_auc', 'streaming_curve_points', + 'streaming_dynamic_auc', 'streaming_false_negative_rate', 'streaming_false_negative_rate_at_thresholds', 'streaming_false_negatives', diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py index 6a8e58b4da..5d0463e1f7 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py @@ -1708,6 +1708,34 @@ class StreamingCurvePointsTest(test.TestCase): [[1.0, 4.0 / 6.0], [0.75, 1.0], [0.0, 1.0]]) +def _np_auc(predictions, labels, weights=None): + """Computes the AUC explicitly using Numpy. + + Args: + predictions: an ndarray with shape [N]. + labels: an ndarray with shape [N]. + weights: an ndarray with shape [N]. + + Returns: + the area under the ROC curve. + """ + if weights is None: + weights = np.ones(np.size(predictions)) + is_positive = labels > 0 + num_positives = np.sum(weights[is_positive]) + num_negatives = np.sum(weights[~is_positive]) + + # Sort descending: + inds = np.argsort(-predictions) + + sorted_labels = labels[inds] + sorted_weights = weights[inds] + is_positive = sorted_labels > 0 + + tp = np.cumsum(sorted_weights * is_positive) / num_positives + return np.sum((sorted_weights * tp)[~is_positive]) / num_negatives + + class StreamingAUCTest(test.TestCase): def setUp(self): @@ -1896,33 +1924,6 @@ class StreamingAUCTest(test.TestCase): self.assertAlmostEqual(1, auc.eval(), 6) - def np_auc(self, predictions, labels, weights): - """Computes the AUC explicitly using Numpy. - - Args: - predictions: an ndarray with shape [N]. - labels: an ndarray with shape [N]. - weights: an ndarray with shape [N]. - - Returns: - the area under the ROC curve. - """ - if weights is None: - weights = np.ones(np.size(predictions)) - is_positive = labels > 0 - num_positives = np.sum(weights[is_positive]) - num_negatives = np.sum(weights[~is_positive]) - - # Sort descending: - inds = np.argsort(-predictions) - - sorted_labels = labels[inds] - sorted_weights = weights[inds] - is_positive = sorted_labels > 0 - - tp = np.cumsum(sorted_weights * is_positive) / num_positives - return np.sum((sorted_weights * tp)[~is_positive]) / num_negatives - def testWithMultipleUpdates(self): num_samples = 1000 batch_size = 10 @@ -1945,7 +1946,7 @@ class StreamingAUCTest(test.TestCase): for weights in (None, np.ones(num_samples), np.random.exponential( scale=1.0, size=num_samples)): - expected_auc = self.np_auc(predictions, labels, weights) + expected_auc = _np_auc(predictions, labels, weights) with self.test_session() as sess: enqueue_ops = [[] for i in range(num_batches)] @@ -1974,6 +1975,211 @@ class StreamingAUCTest(test.TestCase): self.assertAlmostEqual(expected_auc, auc.eval(), 2) +class StreamingDynamicAUCTest(test.TestCase): + + def setUp(self): + super(StreamingDynamicAUCTest, self).setUp() + np.random.seed(1) + ops.reset_default_graph() + + def testUnknownCurve(self): + with self.assertRaisesRegexp( + ValueError, 'curve must be either ROC or PR, TEST_CURVE unknown'): + metrics.streaming_dynamic_auc(labels=array_ops.ones((10, 1)), + predictions=array_ops.ones((10, 1)), + curve='TEST_CURVE') + + def testVars(self): + metrics.streaming_dynamic_auc( + labels=array_ops.ones((10, 1)), predictions=array_ops.ones((10, 1))) + _assert_metric_variables(self, ['dynamic_auc/concat_labels/array:0', + 'dynamic_auc/concat_labels/size:0', + 'dynamic_auc/concat_preds/array:0', + 'dynamic_auc/concat_preds/size:0']) + + def testMetricsCollection(self): + my_collection_name = '__metrics__' + auc, _ = metrics.streaming_dynamic_auc( + labels=array_ops.ones((10, 1)), + predictions=array_ops.ones((10, 1)), + metrics_collections=[my_collection_name]) + self.assertEqual(ops.get_collection(my_collection_name), [auc]) + + def testUpdatesCollection(self): + my_collection_name = '__updates__' + _, update_op = metrics.streaming_dynamic_auc( + labels=array_ops.ones((10, 1)), + predictions=array_ops.ones((10, 1)), + updates_collections=[my_collection_name]) + self.assertEqual(ops.get_collection(my_collection_name), [update_op]) + + def testValueTensorIsIdempotent(self): + predictions = random_ops.random_uniform( + (10, 3), maxval=1, dtype=dtypes_lib.float32, seed=1) + labels = random_ops.random_uniform( + (10, 3), maxval=2, dtype=dtypes_lib.int64, seed=2) + auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + # Run several updates. + for _ in xrange(10): + sess.run(update_op) + # Then verify idempotency. + initial_auc = auc.eval() + for _ in xrange(10): + self.assertAlmostEqual(initial_auc, auc.eval(), 5) + + def testAllLabelsOnes(self): + with self.test_session() as sess: + predictions = constant_op.constant([1., 1., 1.]) + labels = constant_op.constant([1, 1, 1]) + auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + self.assertEqual(0, auc.eval()) + + def testAllLabelsZeros(self): + with self.test_session() as sess: + predictions = constant_op.constant([1., 1., 1.]) + labels = constant_op.constant([0, 0, 0]) + auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + self.assertEqual(0, auc.eval()) + + def testNonZeroOnePredictions(self): + with self.test_session() as sess: + predictions = constant_op.constant([2.5, -2.5, 2.5, -2.5], + dtype=dtypes_lib.float32) + labels = constant_op.constant([1, 0, 1, 0]) + auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + self.assertAlmostEqual(auc.eval(), 1.0) + + def testAllCorrect(self): + inputs = np.random.randint(0, 2, size=(100, 1)) + with self.test_session() as sess: + predictions = constant_op.constant(inputs) + labels = constant_op.constant(inputs) + auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + self.assertEqual(1, auc.eval()) + + def testSomeCorrect(self): + with self.test_session() as sess: + predictions = constant_op.constant([1, 0, 1, 0]) + labels = constant_op.constant([0, 1, 1, 0]) + auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + self.assertAlmostEqual(0.5, auc.eval()) + + def testAllIncorrect(self): + inputs = np.random.randint(0, 2, size=(100, 1)) + with self.test_session() as sess: + predictions = constant_op.constant(inputs, dtype=dtypes_lib.float32) + labels = constant_op.constant(1 - inputs, dtype=dtypes_lib.float32) + auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + self.assertAlmostEqual(0, auc.eval()) + + def testExceptionOnIncompatibleShapes(self): + with self.test_session() as sess: + predictions = array_ops.ones([5]) + labels = array_ops.zeros([6]) + with self.assertRaisesRegexp(ValueError, 'Shapes .* are incompatible'): + _, update_op = metrics.streaming_dynamic_auc(labels, predictions) + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + + def testExceptionOnGreaterThanOneLabel(self): + with self.test_session() as sess: + predictions = constant_op.constant([1, 0.5, 0], dtypes_lib.float32) + labels = constant_op.constant([2, 1, 0]) + _, update_op = metrics.streaming_dynamic_auc(labels, predictions) + sess.run(variables.local_variables_initializer()) + with self.assertRaisesRegexp( + errors_impl.InvalidArgumentError, + '.*labels must be 0 or 1, at least one is >1.*'): + sess.run(update_op) + + def testExceptionOnNegativeLabel(self): + with self.test_session() as sess: + predictions = constant_op.constant([1, 0.5, 0], dtypes_lib.float32) + labels = constant_op.constant([1, 0, -1]) + _, update_op = metrics.streaming_dynamic_auc(labels, predictions) + sess.run(variables.local_variables_initializer()) + with self.assertRaisesRegexp( + errors_impl.InvalidArgumentError, + '.*labels must be 0 or 1, at least one is <0.*'): + sess.run(update_op) + + def testWithMultipleUpdates(self): + batch_size = 10 + num_batches = 100 + labels = np.array([]) + predictions = np.array([]) + tf_labels = variables.Variable(array_ops.ones(batch_size, dtypes_lib.int32), + collections=[ops.GraphKeys.LOCAL_VARIABLES], + dtype=dtypes_lib.int32) + tf_predictions = variables.Variable( + array_ops.ones(batch_size), + collections=[ops.GraphKeys.LOCAL_VARIABLES], + dtype=dtypes_lib.float32) + auc, update_op = metrics.streaming_dynamic_auc(tf_labels, tf_predictions) + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + for _ in xrange(num_batches): + new_labels = np.random.randint(0, 2, size=batch_size) + noise = np.random.normal(0.0, scale=0.2, size=batch_size) + new_predictions = 0.4 + 0.2 * new_labels + noise + labels = np.concatenate([labels, new_labels]) + predictions = np.concatenate([predictions, new_predictions]) + sess.run(tf_labels.assign(new_labels)) + sess.run(tf_predictions.assign(new_predictions)) + sess.run(update_op) + expected_auc = _np_auc(predictions, labels) + self.assertAlmostEqual(expected_auc, auc.eval()) + + def testAUCPRReverseIncreasingPredictions(self): + with self.test_session() as sess: + predictions = constant_op.constant( + [0.1, 0.4, 0.35, 0.8], dtype=dtypes_lib.float32) + labels = constant_op.constant([0, 0, 1, 1]) + auc, update_op = metrics.streaming_dynamic_auc( + labels, predictions, curve='PR') + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + self.assertAlmostEqual(0.79166, auc.eval(), delta=1e-5) + + def testAUCPRJumbledPredictions(self): + with self.test_session() as sess: + predictions = constant_op.constant( + [0.1, 0.4, 0.35, 0.8, 0.1, 0.135, 0.81], dtypes_lib.float32) + labels = constant_op.constant([0, 0, 1, 0, 1, 0, 1]) + auc, update_op = metrics.streaming_dynamic_auc( + labels, predictions, curve='PR') + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + self.assertAlmostEqual(0.610317, auc.eval(), delta=1e-6) + + def testAUCPRPredictionsLessThanHalf(self): + with self.test_session() as sess: + predictions = constant_op.constant( + [0.0, 0.1, 0.2, 0.33, 0.3, 0.4, 0.5], + shape=(1, 7), + dtype=dtypes_lib.float32) + labels = constant_op.constant([0, 0, 0, 0, 1, 1, 1], shape=(1, 7)) + auc, update_op = metrics.streaming_dynamic_auc( + labels, predictions, curve='PR') + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + self.assertAlmostEqual(0.90277, auc.eval(), delta=1e-5) + + class StreamingPrecisionRecallAtEqualThresholdsTest(test.TestCase): def setUp(self): -- GitLab From 9abe08570ffe5e4aaa9bbd1f977455e8b0dd4491 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 15:00:15 -0800 Subject: [PATCH 0123/1801] [TF:XLA] Clean up unused XLA options and functions. PiperOrigin-RevId: 175217850 --- .../compiler/jit/kernels/xla_launch_op.cc | 1 - .../compiler/jit/xla_compilation_cache.cc | 3 --- tensorflow/compiler/tf2xla/xla_compiler.h | 6 ------ .../compiler/xla/client/local_client.cc | 20 ------------------- tensorflow/compiler/xla/client/local_client.h | 16 --------------- .../compiler/xla/service/hlo_module_config.cc | 4 ++-- .../compiler/xla/service/hlo_module_config.h | 10 ---------- .../compiler/xla/service/local_service.cc | 20 ------------------- tensorflow/compiler/xla/service/service.h | 2 -- 9 files changed, 2 insertions(+), 80 deletions(-) diff --git a/tensorflow/compiler/jit/kernels/xla_launch_op.cc b/tensorflow/compiler/jit/kernels/xla_launch_op.cc index 27c5da08c1..e481796d9e 100644 --- a/tensorflow/compiler/jit/kernels/xla_launch_op.cc +++ b/tensorflow/compiler/jit/kernels/xla_launch_op.cc @@ -257,7 +257,6 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { options.flib_def = ctx->function_library()->GetFunctionLibraryDefinition(); options.graph_def_version = ctx->function_library()->graph_def_version(); options.allow_cpu_custom_calls = (platform_id_ == gpu::host::kHostPlatformId); - options.local_executable_has_hybrid_result = true; const XlaCompiler::CompilationResult* kernel; xla::LocalExecutable* executable; diff --git a/tensorflow/compiler/jit/xla_compilation_cache.cc b/tensorflow/compiler/jit/xla_compilation_cache.cc index 23368b6c76..bc2eccd277 100644 --- a/tensorflow/compiler/jit/xla_compilation_cache.cc +++ b/tensorflow/compiler/jit/xla_compilation_cache.cc @@ -227,10 +227,7 @@ Status XlaCompilationCache::BuildExecutable( } xla::ExecutableBuildOptions build_options; build_options.set_device_ordinal(client_->default_device_ordinal()); - build_options.set_platform(client_->platform()); build_options.set_result_layout(result.xla_output_shape); - build_options.set_has_hybrid_result( - options.local_executable_has_hybrid_result); auto compile_result = client_->Compile(*result.computation, argument_layouts, build_options); diff --git a/tensorflow/compiler/tf2xla/xla_compiler.h b/tensorflow/compiler/tf2xla/xla_compiler.h index 4d40ca5825..ac7d4cfb12 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.h +++ b/tensorflow/compiler/tf2xla/xla_compiler.h @@ -236,12 +236,6 @@ class XlaCompiler { // to the computation. bool allow_cpu_custom_calls = false; - // If 'local_executable_has_hybrid_result', the top-level pointers of the - // result tuple of compiled programs are stored in host memory and the - // nested buffers in device memory, otherwise the whole result tuple is - // stored in device memory. - bool local_executable_has_hybrid_result = false; - // If not nullptr, populate_resource_manager is called with the // compilation device's resource manager when the compilation // device is created, and can be used to create metadata objects diff --git a/tensorflow/compiler/xla/client/local_client.cc b/tensorflow/compiler/xla/client/local_client.cc index 15c744ecd3..b50425a09c 100644 --- a/tensorflow/compiler/xla/client/local_client.cc +++ b/tensorflow/compiler/xla/client/local_client.cc @@ -27,16 +27,6 @@ namespace se = ::perftools::gputools; namespace xla { -ExecutableBuildOptions& ExecutableBuildOptions::set_platform( - perftools::gputools::Platform* platform) { - platform_ = platform; - return *this; -} - -perftools::gputools::Platform* ExecutableBuildOptions::platform() const { - return platform_; -} - ExecutableBuildOptions& ExecutableBuildOptions::set_device_ordinal( int device_ordinal) { device_ordinal_ = device_ordinal; @@ -56,16 +46,6 @@ const Shape* ExecutableBuildOptions::result_layout() const { return result_layout_set_ ? &result_layout_ : nullptr; } -ExecutableBuildOptions& ExecutableBuildOptions::set_has_hybrid_result( - bool has_hybrid_result) { - has_hybrid_result_ = has_hybrid_result; - return *this; -} - -bool ExecutableBuildOptions::has_hybrid_result() const { - return has_hybrid_result_; -} - namespace { StatusOr BorrowStreamForDevice(int device_ordinal, Backend* backend) { diff --git a/tensorflow/compiler/xla/client/local_client.h b/tensorflow/compiler/xla/client/local_client.h index 9f985ed527..e9eeaa0aa2 100644 --- a/tensorflow/compiler/xla/client/local_client.h +++ b/tensorflow/compiler/xla/client/local_client.h @@ -37,14 +37,6 @@ namespace xla { // LocalClient::Compile. class ExecutableBuildOptions { public: - // If set, this is the platform to build the computation for. This must match - // the underlying platform of the service. A value of nullptr indicates the - // option has not been set. - // - // TODO(b/28616830): Support multiple platforms. - ExecutableBuildOptions& set_platform(perftools::gputools::Platform* platform); - perftools::gputools::Platform* platform() const; - // If set, this is the device to build the computation for. Valid // device_ordinal values are: 0 to # of devices - 1. These values are // identical to the device ordinal values used by StreamExecutor. The built @@ -61,18 +53,10 @@ class ExecutableBuildOptions { ExecutableBuildOptions& set_result_layout(const Shape& shape_with_layout); const Shape* result_layout() const; - // If set, the executable will be built to output a hybrid - // ShapedBuffer with top-level tuple pointers in host memory and - // result buffers in device memory. - ExecutableBuildOptions& set_has_hybrid_result(bool has_hybrid_result); - bool has_hybrid_result() const; - private: - perftools::gputools::Platform* platform_ = nullptr; int device_ordinal_ = -1; Shape result_layout_; bool result_layout_set_ = false; - bool has_hybrid_result_ = true; }; class LocalExecutable { diff --git a/tensorflow/compiler/xla/service/hlo_module_config.cc b/tensorflow/compiler/xla/service/hlo_module_config.cc index 8974deb530..822e2f1f53 100644 --- a/tensorflow/compiler/xla/service/hlo_module_config.cc +++ b/tensorflow/compiler/xla/service/hlo_module_config.cc @@ -39,8 +39,8 @@ void HloModuleConfig::SetDefaultComputationLayout( } string HloModuleConfig::compilation_cache_key() const { - string key = tensorflow::strings::StrCat("profiling=", hlo_profiling_enabled_, - "::hybrid=", has_hybrid_result_); + string key = + tensorflow::strings::StrCat("profiling=", hlo_profiling_enabled_); StrAppend(&key, "::("); std::vector params; for (const ShapeLayout& param_layout : diff --git a/tensorflow/compiler/xla/service/hlo_module_config.h b/tensorflow/compiler/xla/service/hlo_module_config.h index 4a7ead9c10..a5ee895e48 100644 --- a/tensorflow/compiler/xla/service/hlo_module_config.h +++ b/tensorflow/compiler/xla/service/hlo_module_config.h @@ -104,16 +104,6 @@ class HloModuleConfig { // Whether to enable HLO-level profiling. bool hlo_profiling_enabled_ = false; - // If this flag is true, the generated executable will return a ShapedBuffer - // holding the result of the computation. In a ShapedBuffer, tuples have their - // structure held in host memory and the element arrays (leaves of the tuple - // structure) stored in device memory. The ShapedBuffer is considered "hybrid" - // because its leaves are on device but its structure is stored on - // host. Otherwise, if this flag is false, the generated executable will - // return a DeviceMemoryBase where the result is held entirely in device - // memory. - bool has_hybrid_result_ = false; - // Module/graph-level seed handle. uint64 seed_ = 0; diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc index d4d35da9d6..06f43bd3cb 100644 --- a/tensorflow/compiler/xla/service/local_service.cc +++ b/tensorflow/compiler/xla/service/local_service.cc @@ -68,26 +68,6 @@ LocalService::LocalService(const ServiceOptions& options, std::unique_ptr execute_backend) : Service(options, std::move(execute_backend)) {} -namespace { -// Returns the space required to allocate a shape. If -// allocate_space_for_deep_copy the space includes all sub-buffers of -// a tuple. -int64 RequiredSpace(const Shape& shape, bool allocate_space_for_deep_copy, - TransferManager* transfer_manager) { - int64 size = 0; - // TODO(b/33492279) remove once no devices represent result tuples as - // contiguous buffers. - if (allocate_space_for_deep_copy) { - ShapeUtil::ForEachSubshape( - shape, [&size, transfer_manager](const Shape& subshape, - const ShapeIndex& /*index*/) { - size += transfer_manager->GetByteSizeRequirement(subshape); - }); - } - return size; -} -} // namespace - StatusOr> LocalService::CompileExecutable( const ComputationHandle& computation, const tensorflow::gtl::ArraySlice argument_layouts, diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h index 6646be2e9a..47f4f0ade5 100644 --- a/tensorflow/compiler/xla/service/service.h +++ b/tensorflow/compiler/xla/service/service.h @@ -272,8 +272,6 @@ class Service : public ServiceInterface { // Create a Hlo module config for the given program shape and arguments. // execution_options is optional; if not given a default is used. - // has_hybrid_result is used to initialize the same-named field in - // HloModuleConfig -- see that class for documentation. StatusOr> CreateModuleConfig( const ProgramShape& program_shape, tensorflow::gtl::ArraySlice argument_shapes, -- GitLab From b57e6aaa330a2354f2f4cd26f3ffc1fd11103ff0 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 9 Nov 2017 15:04:22 -0800 Subject: [PATCH 0124/1801] Make LLVMCompilerTest less stateful. Instead of assigning the pre and post optimization to a singleton xla::Compiler object, prefer creating a short-lived CpuCompiler or a GpuCompiler instance on the stack. Without this change, adding a second test case on the (Cpu|Gpu)Compiler in the same process triggers a use-after-free. (Btw, LLVMCompiler should really be spelled LlvmCompiler per Google C++ style, I'll do that rename shortly). PiperOrigin-RevId: 175218617 --- tensorflow/compiler/xla/tests/BUILD | 21 +-- .../compiler/xla/tests/llvm_compiler_test.cc | 143 +++++++++++++----- 2 files changed, 117 insertions(+), 47 deletions(-) diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 2333a30ad5..3e62481629 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1344,22 +1344,23 @@ xla_test( ], ) -xla_test( +tf_cc_test( name = "llvm_compiler_test", srcs = ["llvm_compiler_test.cc"], - backends = [ - "cpu", - "gpu", - "cpu_parallel", - ], + tags = ["requires-gpu-sm35"], deps = [ - "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla/service:backend", + "//tensorflow/compiler/xla/service:cpu_plugin", + "//tensorflow/compiler/xla/service:gpu_plugin", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:llvm_compiler", - "//tensorflow/compiler/xla/tests:hlo_test_base", - "//tensorflow/compiler/xla/tests:literal_test_util", - "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/compiler/xla/service:platform_util", + "//tensorflow/compiler/xla/service/cpu:cpu_compiler", + "//tensorflow/compiler/xla/service/gpu:gpu_compiler", "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/stream_executor", "@llvm//:core", ], ) diff --git a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc index 458258e7ee..70d8b764a3 100644 --- a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc +++ b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc @@ -14,49 +14,118 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/service/llvm_compiler.h" +#include "tensorflow/compiler/xla/service/backend.h" +#include "tensorflow/compiler/xla/service/cpu/cpu_compiler.h" +#include "tensorflow/compiler/xla/service/gpu/gpu_compiler.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" -#include "tensorflow/compiler/xla/tests/hlo_test_base.h" -#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/service/platform_util.h" +#include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/stream_executor/stream_executor.h" namespace xla { namespace { -class LLVMCompilerTest : public HloTestBase {}; - -XLA_TEST_F(LLVMCompilerTest, CompilerHooks) { - int pre_opt_hook_call_count = 0; - int post_opt_hook_call_count = 0; - - auto pre_opt_hook = [&pre_opt_hook_call_count](const llvm::Module &) { - ++pre_opt_hook_call_count; - return Status::OK(); - }; - auto post_opt_hook = [&post_opt_hook_call_count](const llvm::Module &) { - ++post_opt_hook_call_count; - return Status::OK(); - }; - - // Create HLO module, and run the compiler. - auto builder = HloComputation::Builder(TestName()); - builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(42.0))); - - auto hlo_module = CreateNewModule(); - hlo_module->AddEntryComputation(builder.Build()); - - auto compiler = static_cast(backend().compiler()); - compiler->SetPreOptimizationHook(pre_opt_hook); - compiler->SetPostOptimizationHook(post_opt_hook); - - ASSERT_TRUE( - compiler - ->Compile(std::move(hlo_module), backend().default_stream_executor()) - .ok()); - - // Test that hooks were called. - EXPECT_EQ(1, pre_opt_hook_call_count); - EXPECT_EQ(1, post_opt_hook_call_count); +class LLVMCompilerTest : public ::testing::Test { + public: + void SetUp() override { + Platform *platform = FindPlatform(); + ASSERT_NE(platform, nullptr); + + BackendOptions backend_options; + backend_options.set_platform(platform); + StatusOr> backend_or_status = + Backend::CreateBackend(backend_options); + ASSERT_IS_OK(backend_or_status.status()); + backend_ = backend_or_status.ConsumeValueOrDie(); + } + + ~LLVMCompilerTest() override {} + + protected: + using Platform = ::perftools::gputools::Platform; + + explicit LLVMCompilerTest(string platform_name) + : platform_name_(std::move(platform_name)) {} + + void TestCompilerHooks(LLVMCompiler *compiler) { + int pre_opt_hook_call_count = 0; + int post_opt_hook_call_count = 0; + + auto pre_opt_hook = [&pre_opt_hook_call_count](const llvm::Module &) { + ++pre_opt_hook_call_count; + return Status::OK(); + }; + auto post_opt_hook = [&post_opt_hook_call_count](const llvm::Module &) { + ++post_opt_hook_call_count; + return Status::OK(); + }; + + // Create HLO module, and run the compiler. + auto builder = HloComputation::Builder(TestName()); + builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(42.0))); + + auto hlo_module = CreateNewModule(); + hlo_module->AddEntryComputation(builder.Build()); + + compiler->SetPreOptimizationHook(pre_opt_hook); + compiler->SetPostOptimizationHook(post_opt_hook); + + ASSERT_TRUE(compiler + ->Compile(std::move(hlo_module), + backend_->default_stream_executor()) + .ok()); + + // Test that hooks were called. + EXPECT_EQ(1, pre_opt_hook_call_count); + EXPECT_EQ(1, post_opt_hook_call_count); + } + + private: + Platform *FindPlatform() { + for (Platform *platform : + PlatformUtil::GetSupportedPlatforms().ConsumeValueOrDie()) { + if (platform->Name() == platform_name_) { + return platform; + } + } + return nullptr; + } + + string platform_name_; + std::unique_ptr backend_; + + static string TestName() { + return ::testing::UnitTest::GetInstance()->current_test_info()->name(); + } + + static std::unique_ptr CreateNewModule() { + HloModuleConfig config; + config.set_debug_options(legacy_flags::GetDebugOptionsFromFlags()); + return MakeUnique(TestName(), VersionedComputationHandle(), + config); + } +}; + +class CpuCompilerTest : public LLVMCompilerTest { + public: + CpuCompilerTest() : LLVMCompilerTest("Host") {} +}; + +class GpuCompilerTest : public LLVMCompilerTest { + public: + GpuCompilerTest() : LLVMCompilerTest("CUDA") {} +}; + +TEST_F(CpuCompilerTest, HooksTest) { + cpu::CpuCompiler compiler; + TestCompilerHooks(&compiler); +} + +TEST_F(GpuCompilerTest, HooksTest) { + gpu::GpuCompiler compiler; + TestCompilerHooks(&compiler); } } // namespace -- GitLab From 67c3d9f7242df74492943c769719ffb863ca1af0 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 9 Nov 2017 15:55:07 -0800 Subject: [PATCH 0125/1801] Tensor template argument to gradienttape was unnecessary. PiperOrigin-RevId: 175225805 --- tensorflow/c/eager/tape.h | 112 +++++++++++---------- tensorflow/python/eager/backprop.py | 2 +- tensorflow/python/eager/imperative_grad.py | 2 +- tensorflow/python/eager/pywrap_tfe_src.cc | 65 +++++++----- 4 files changed, 104 insertions(+), 77 deletions(-) diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index 654ceb7bec..29d73c5ca4 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -65,9 +65,6 @@ using OpTape = std::unordered_map>; // adding gradients, getting zeroes, etc. Currently cannot be implemented // without using tensorflow python code, hence left unspecified here. // -// Tensor is a representation of a tensor. We need to take its ID, and it needs -// to match IDs in the tape. -// // Gradient is the type returned by gradient functions. In Python TF it's either // Tensor or IndexedSlices or None, which here we map to nullptr. Gradients need // to allow their size to be computed and they need to be passable to a backward @@ -82,7 +79,7 @@ using OpTape = std::unordered_map>; // TODO(apassos) provide concrete template instantiations for TFE_TensorHandle // specialization, which is blocked by quite a few things needing to loop back // into python now. -template +template class VSpace { public: virtual ~VSpace() {} @@ -99,11 +96,7 @@ class VSpace { virtual Gradient* Zeros(TensorShape shape, DataType dtype) const = 0; // Returns a Tensor which is filled with ones and like the input. - virtual Gradient* OnesLike(Tensor*) const = 0; - - // Returns an integer which is a unique-to-within-this-program handle for this - // tensor. - virtual int64 TensorId(Tensor* tensor) const = 0; + virtual Gradient* Ones(TensorShape shape, DataType dtype) const = 0; // Calls the passed-in backward function. virtual Status CallBackwardFunction( @@ -117,7 +110,7 @@ class VSpace { // Traces the execution of operations, doing eager garbage collection, and // exporting a full trace so other code can do backpropagation. Not thread-safe. -template +template class GradientTape { public: GradientTape() {} @@ -143,11 +136,11 @@ class GradientTape { // once) and produces the gradient of the target tensors with respect to the // source tensors. The output gradients are used if not empty and not // null. The result is populated with one tensor per target element. - Status ComputeGradient( - const VSpace& vspace, - gtl::ArraySlice target, gtl::ArraySlice sources, - gtl::ArraySlice output_gradients, - std::vector* result); + Status ComputeGradient(const VSpace& vspace, + gtl::ArraySlice target_tensor_ids, + gtl::ArraySlice source_tensor_id, + gtl::ArraySlice output_gradients, + std::vector* result); private: TensorTape tensor_tape_; @@ -161,8 +154,8 @@ class GradientTape { // Template instantiations here -template -bool GradientTape::ShouldRecord( +template +bool GradientTape::ShouldRecord( gtl::ArraySlice tensor_ids) { for (int64 i : tensor_ids) { if (tensor_tape_.find(i) != tensor_tape_.end()) { @@ -172,13 +165,13 @@ bool GradientTape::ShouldRecord( return false; } -template -void GradientTape::Watch(int64 tensor_id) { +template +void GradientTape::Watch(int64 tensor_id) { tensor_tape_.emplace(tensor_id, -1); } -template -void GradientTape::RecordOperation( +template +void GradientTape::RecordOperation( const string& op_type, gtl::ArraySlice output_tensors, gtl::ArraySlice input_tensor_id, BackwardFunction* backward_function, const std::function& backward_function_deleter) { @@ -206,9 +199,8 @@ void GradientTape::RecordOperation( op_type, tensors, ids, backward_function, backward_function_deleter}; } -template -void GradientTape::DeleteTrace( - int64 tensor_id) { +template +void GradientTape::DeleteTrace(int64 tensor_id) { auto it = tensor_usage_.find(tensor_id); if (it == tensor_usage_.end()) { return; @@ -353,15 +345,16 @@ std::vector InitialStack( return result; } -template +template Status InitialGradients( - const VSpace& vspace, - gtl::ArraySlice target, - gtl::ArraySlice output_gradients, - std::unordered_map tensor_usage_counts, + const VSpace& vspace, + gtl::ArraySlice target_tensor_ids, + gtl::ArraySlice output_gradients, const TensorTape& tensor_tape, + const OpTape& op_tape, + const std::unordered_map& tensor_usage_counts, std::unordered_map>* result) { - for (int i = 0; i < target.size(); ++i) { - int64 id = vspace.TensorId(target[i]); + for (int i = 0; i < target_tensor_ids.size(); ++i) { + const int64 id = target_tensor_ids[i]; if (tensor_usage_counts.find(id) != tensor_usage_counts.end()) { if (!output_gradients.empty() && output_gradients[i] != nullptr) { // TODO(apassos) figure out how to print debugging information here. @@ -371,7 +364,31 @@ Status InitialGradients( } } else { if (output_gradients.empty() || output_gradients[i] == nullptr) { - (*result)[id].push_back(vspace.OnesLike(target[i])); + auto tensor_it = tensor_tape.find(id); + if (tensor_it != tensor_tape.end() && tensor_it->second != -1) { + auto op_it = op_tape.find(tensor_it->second); + if (op_it == op_tape.end()) { + return errors::Internal( + "Internal state of the gradient tape is invalid."); + } + bool found = false; + for (int j = 0; j < op_it->second.output_tensor_info.size(); ++j) { + if (op_it->second.output_tensor_info[j].id == id) { + found = true; + (*result)[id].push_back( + vspace.Ones(op_it->second.output_tensor_info[j].shape, + op_it->second.output_tensor_info[j].dtype)); + break; + } + } + if (!found) { + return errors::Internal( + "Internal state of the gradient tape is invalid."); + } + } else { + // No record of the target tensor found on the tape, so no gradient + // needs to be computed from it. Do nothing. + } } else { (*result)[id].push_back(output_gradients[i]); } @@ -388,29 +405,22 @@ Status InitialGradients( constexpr int kMinAggregateCount = 4; constexpr int kMinAggregateBytes = 128 * 1024 * 1024; -template -Status GradientTape::ComputeGradient( - const VSpace& vspace, - gtl::ArraySlice target, gtl::ArraySlice sources, +template +Status GradientTape::ComputeGradient( + const VSpace& vspace, + gtl::ArraySlice target_tensor_ids, + gtl::ArraySlice source_tensor_ids, gtl::ArraySlice output_gradients, std::vector* result) { - std::vector id_sources; - id_sources.reserve(sources.size()); - for (Tensor* s : sources) { - id_sources.push_back(vspace.TensorId(s)); - } - std::unordered_set sources_set(id_sources.begin(), id_sources.end()); - std::vector id_targets; - id_sources.reserve(target.size()); - for (Tensor* t : target) { - id_targets.push_back(vspace.TensorId(t)); - } + std::unordered_set sources_set(source_tensor_ids.begin(), + source_tensor_ids.end()); BackpropInitialState state = PrepareBackprop( - id_targets, tensor_tape_, std::move(op_tape_), sources_set); + target_tensor_ids, tensor_tape_, std::move(op_tape_), sources_set); std::vector op_stack = InitialStack(state.op_tape, state.op_missing_tensor); std::unordered_map> gradients; - Status s = InitialGradients(vspace, target, output_gradients, + Status s = InitialGradients(vspace, target_tensor_ids, output_gradients, + tensor_tape_, state.op_tape, state.tensor_usage_counts, &gradients); auto cleanup = [&state]() { // Release all backprop functions @@ -542,8 +552,8 @@ Status GradientTape::ComputeGradient( } } CHECK(state.op_tape.empty()); - result->reserve(sources.size()); - for (auto is : id_sources) { + result->reserve(source_tensor_ids.size()); + for (auto is : source_tensor_ids) { auto grad_it = gradients.find(is); if (grad_it == gradients.end()) { result->push_back(nullptr); diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 111d7cef56..0a92ab38a8 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -744,7 +744,7 @@ _default_vspace = imperative_grad.VSpace( aggregate_fn=_aggregate_grads, tensor_id=ops.tensor_id, zeros=_zeros, - ones_like=lambda x: ops.convert_to_tensor(array_ops.ones_like(x))) + ones=array_ops.ones) class GradientTape(object): diff --git a/tensorflow/python/eager/imperative_grad.py b/tensorflow/python/eager/imperative_grad.py index 8932b7157b..837cad974a 100644 --- a/tensorflow/python/eager/imperative_grad.py +++ b/tensorflow/python/eager/imperative_grad.py @@ -26,7 +26,7 @@ from tensorflow.python.framework import errors VSpace = collections.namedtuple( "VSpace", - ["aggregate_fn", "num_elements_fn", "tensor_id", "zeros", "ones_like"]) + ["aggregate_fn", "num_elements_fn", "tensor_id", "zeros", "ones"]) def imperative_grad( diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index d67c3b18f7..77b49be8f8 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -443,8 +443,7 @@ void TFE_DeleteContextCapsule(PyObject* context) { TF_DeleteStatus(status); } -using GradientTape = - tensorflow::eager::GradientTape; +using GradientTape = tensorflow::eager::GradientTape; typedef struct { PyObject_HEAD @@ -630,8 +629,7 @@ void TFE_Py_TapeDeleteTrace(PyObject* tape, tensorflow::int64 tensor_id) { reinterpret_cast(tape)->tape->DeleteTrace(tensor_id); } -class PyVSpace - : public tensorflow::eager::VSpace { +class PyVSpace : public tensorflow::eager::VSpace { public: explicit PyVSpace(PyObject* py_vspace) : py_vspace_(py_vspace) {} @@ -648,9 +646,9 @@ class PyVSpace if (zeros_ == nullptr) { return tensorflow::errors::InvalidArgument("invalid vspace"); } - ones_like_ = PyObject_GetAttrString(reinterpret_cast(py_vspace_), - "ones_like"); - if (ones_like_ == nullptr) { + ones_ = + PyObject_GetAttrString(reinterpret_cast(py_vspace_), "ones"); + if (ones_ == nullptr) { return tensorflow::errors::InvalidArgument("invalid vspace"); } return tensorflow::Status::OK(); @@ -660,7 +658,7 @@ class PyVSpace Py_XDECREF(num_elements_); Py_XDECREF(aggregate_fn_); Py_XDECREF(zeros_); - Py_XDECREF(ones_like_); + Py_XDECREF(ones_); } tensorflow::int64 NumElements(PyObject* tensor) const final { @@ -706,24 +704,21 @@ class PyVSpace return reinterpret_cast(result); } - PyObject* OnesLike(PyObject* tensor) const final { - PyObject* arg_list = Py_BuildValue("(O)", tensor); - PyObject* result = PyEval_CallObject(ones_like_, arg_list); - if (result == nullptr) { - VLOG(1) << "Call to ones_like failed"; + PyObject* Ones(tensorflow::TensorShape shape, + tensorflow::DataType dtype) const final { + PyObject* py_shape = PyTuple_New(shape.dims()); + for (int i = 0; i < shape.dims(); ++i) { + PyTuple_SET_ITEM(py_shape, i, PyLong_FromLong(shape.dim_size(i))); } + PyObject* py_dtype = PyLong_FromLong(static_cast(dtype)); + PyObject* arg_list = Py_BuildValue("OO", py_shape, py_dtype); + PyObject* result = PyEval_CallObject(ones_, arg_list); Py_DECREF(arg_list); + Py_DECREF(py_dtype); + Py_DECREF(py_shape); return result; } - tensorflow::int64 TensorId(PyObject* tensor) const final { - PyObject* py_tensor = reinterpret_cast(tensor); - PyObject* id_field = PyObject_GetAttrString(py_tensor, "_id"); - tensorflow::int64 id = MakeInt(id_field); - Py_DECREF(id_field); - return id; - } - tensorflow::Status CallBackwardFunction( PyObject* backward_function, tensorflow::gtl::ArraySlice output_gradients, @@ -781,7 +776,7 @@ class PyVSpace PyObject* num_elements_; PyObject* aggregate_fn_; PyObject* zeros_; - PyObject* ones_like_; + PyObject* ones_; }; std::vector MakeTensorList(PyObject* tensors) { @@ -799,6 +794,28 @@ std::vector MakeTensorList(PyObject* tensors) { return list; } +std::vector MakeTensorIDList(PyObject* tensors) { + PyObject* seq = PySequence_Fast(tensors, "expected a sequence"); + if (seq == nullptr) { + return {}; + } + int len = PySequence_Fast_GET_SIZE(seq); + std::vector list; + list.reserve(len); + for (int i = 0; i < len; ++i) { + PyObject* tensor = PySequence_Fast_GET_ITEM(seq, i); + if (EagerTensor_CheckExact(tensor)) { + list.push_back(EagerTensor_id(tensor)); + } else { + PyObject* id_field = PyObject_GetAttrString(tensor, "_id"); + list.push_back(MakeInt(id_field)); + Py_DECREF(id_field); + } + } + Py_DECREF(seq); + return list; +} + PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, PyObject* target, PyObject* sources, PyObject* output_gradients, TF_Status* status) { @@ -807,11 +824,11 @@ PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, return nullptr; } - std::vector target_vec = MakeTensorList(target); + std::vector target_vec = MakeTensorIDList(target); if (PyErr_Occurred()) { return nullptr; } - std::vector sources_vec = MakeTensorList(sources); + std::vector sources_vec = MakeTensorIDList(sources); if (PyErr_Occurred()) { return nullptr; } -- GitLab From d7b22fbfdf707d6c6fc8df553242da36dab20e47 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Nov 2017 16:11:06 -0800 Subject: [PATCH 0126/1801] Materialize constants in more cases. PiperOrigin-RevId: 175228264 --- .../grappler/optimizers/constant_folding.cc | 140 +++++++++++++----- .../optimizers/constant_folding_test.cc | 29 +++- tensorflow/core/util/bcast.cc | 2 - 3 files changed, 131 insertions(+), 40 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index a364ca487e..02a732b092 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -36,6 +36,7 @@ limitations under the License. #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/public/version.h" +#include "tensorflow/core/util/bcast.h" namespace tensorflow { namespace grappler { @@ -301,6 +302,44 @@ bool ShapesEqual(const TensorShapeProto& shape1, return true; } +namespace { +bool ExtractShape(const NodeDef& shape_node, const GraphProperties& properties, + BCast::Vec* shape, int64* min_id) { + if (shape_node.op() == "Shape") { + const std::vector& prop1 = + properties.GetInputProperties(shape_node.name()); + if (prop1.size() != 1) { + return false; + } + const TensorShapeProto& shp = prop1[0].shape(); + if (shp.unknown_rank()) { + return false; + } + for (const auto& dim : shp.dim()) { + shape->push_back(dim.size()); + *min_id = std::min(*min_id, dim.size()); + } + } else { + const TensorProto& raw_val = shape_node.attr().at("value").tensor(); + if (raw_val.dtype() != DT_INT64 && raw_val.dtype() != DT_INT32) { + return false; + } + Tensor value(raw_val.dtype(), raw_val.tensor_shape()); + if (!value.FromProto(raw_val)) { + return false; + } + for (int j = 0; j < value.NumElements(); ++j) { + if (raw_val.dtype() == DT_INT64) { + shape->push_back(value.vec()(j)); + } else { + shape->push_back(value.vec()(j)); + } + } + } + return true; +} +} // namespace + Status ConstantFolding::MaterializeConstants( const GrapplerItem& item, const GraphProperties& properties) { const int node_count = graph_.node_size(); @@ -312,49 +351,76 @@ Status ConstantFolding::MaterializeConstants( } const NodeDef* shape_node1 = node_map_->GetNode(node.input(0)); const NodeDef* shape_node2 = node_map_->GetNode(node.input(1)); - if (shape_node1 == nullptr || shape_node1->op() != "Shape" || - shape_node2 == nullptr || shape_node2->op() != "Shape") { + if (shape_node1 == nullptr || + (shape_node1->op() != "Shape" && shape_node1->op() != "Const") || + shape_node2 == nullptr || + (shape_node2->op() != "Shape" && shape_node2->op() != "Const")) { continue; } - const std::vector& prop1 = - properties.GetInputProperties(shape_node1->name()); - const std::vector& prop2 = - properties.GetInputProperties(shape_node2->name()); - if (prop1.size() != 1 || prop2.size() != 1) { + int64 min_id = 0; + BCast::Vec shape1; + if (!ExtractShape(*shape_node1, properties, &shape1, &min_id)) { + continue; + } + BCast::Vec shape2; + if (!ExtractShape(*shape_node2, properties, &shape2, &min_id)) { + continue; + } + // A value of -1 means we don't known anything about the dimension. Replace + // the -1 values with unique dimension ids since we don't want two '-1' + // dimensions to be considered equal. + for (auto& id : shape1) { + if (id == -1) { + id = --min_id; + } + } + for (auto& id : shape2) { + if (id == -1) { + id = --min_id; + } + } + BCast bcast(shape1, shape2); + if (!bcast.IsValid()) { continue; } - const TensorShapeProto& shape1 = prop1[0].shape(); - const TensorShapeProto& shape2 = prop2[0].shape(); - if (ShapesEqual(shape1, shape2)) { - DataType type = node.attr().at("T").type(); - Tensor empty(type, TensorShape()); - NodeDef* out[2]; - for (int i = 0; i < 2; ++i) { + BCast::Vec reduce_dims[2]; + reduce_dims[0] = bcast.grad_x_reduce_idx(); + reduce_dims[1] = bcast.grad_y_reduce_idx(); + + const DataType type = node.attr().at("T").type(); + NodeDef* out[2]; + for (int j = 0; j < 2; ++j) { + if (!reduce_dims[j].empty()) { + // This is the case when a tensor dimension 1 is matched against an + // unknown dimension. The unknown dimension could also be equal to 1, in + // which case there would be no reduction. + out[j] = nullptr; + } else { + Tensor value(type, TensorShape({0})); string const_name = AddPrefixToNodeName( - strings::StrCat(node.name(), "-", i), kConstantFoldingConst); - out[i] = node_map_->GetNode(const_name); - if (!out[i]) { - out[i] = graph_.add_node(); - *out[i] = CreateNodeDef(const_name, TensorValue(&empty)); - out[i]->set_device(node.device()); - node_map_->AddNode(const_name, out[i]); + strings::StrCat(node.name(), "-", j), kConstantFoldingConst); + out[j] = node_map_->GetNode(const_name); + if (!out[j]) { + out[j] = graph_.add_node(); + *out[j] = CreateNodeDef(const_name, TensorValue(&value)); + out[j]->set_device(node.device()); + node_map_->AddNode(const_name, out[j]); string ctrl_dep = AddControlDependency(node.name(), &graph_, node_map_.get()); - *out[i]->add_input() = ctrl_dep; + *out[j]->add_input() = ctrl_dep; node_map_->AddOutput(NodeName(ctrl_dep), const_name); } } + } - auto outputs = node_map_->GetOutputs(node.name()); - for (const auto& output : outputs) { - for (int k = 0; k < output->input_size(); ++k) { - int port; - string node_name = ParseNodeName(output->input(k), &port); - if (node_name == node.name() && port >= 0 && port < 2) { - *output->mutable_input(k) = out[port]->name(); - node_map_->UpdateInput(output->name(), node_name, - out[port]->name()); - } + auto outputs = node_map_->GetOutputs(node.name()); + for (const auto& output : outputs) { + for (int k = 0; k < output->input_size(); ++k) { + int port; + string node_name = ParseNodeName(output->input(k), &port); + if (node_name == node.name() && port >= 0 && port < 2 && out[port]) { + *output->mutable_input(k) = out[port]->name(); + node_map_->UpdateInput(output->name(), node_name, out[port]->name()); } } } @@ -1005,15 +1071,13 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, GraphProperties properties(item); Status s = properties.InferStatically(); bool has_feed = !item.feed.empty(); - // bool has_feed = false; + if (!has_feed && s.ok()) { // Only use static shape information when there is no feed in the // graph. That's because it's possible to feed a placeholder with a tensor // of any shape, which could make the static information inconsistent with // the shapes actually fed. - if (s.ok()) { - TF_RETURN_IF_ERROR(MaterializeShapes(item, properties)); - } + TF_RETURN_IF_ERROR(MaterializeShapes(item, properties)); } if (opt_level_ == RewriterConfig::AGGRESSIVE && s.ok()) { TF_RETURN_IF_ERROR(MaterializeConstants(item, properties)); @@ -1040,12 +1104,14 @@ Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, GrapplerItem item_to_optimize = item; *output = item.graph; + int64 node_count; do { graph_.Swap(output); item_to_optimize.graph = graph_; *output = GraphDef(); + node_count = graph_.node_size(); TF_RETURN_IF_ERROR(RunOptimizationPass(cluster, item_to_optimize, output)); - } while (output->node_size() < graph_.node_size()); + } while (output->node_size() != node_count); *output->mutable_library() = item.graph.library(); *output->mutable_versions() = item.graph.versions(); diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 17f9854b59..43f84b1ddf 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -849,10 +849,18 @@ TEST_F(ConstantFoldingTest, ConstantMaterialization) { Output c = ops::Mul(s.WithOpName("c"), a, b); Output d = ops::Shape(s.WithOpName("d"), a); Output e = ops::Shape(s.WithOpName("e"), b); + auto f = ops::internal::BroadcastGradientArgs(s.WithOpName("f"), d, e); Output o1 = ops::Identity(s.WithOpName("o1"), f.r0); Output o2 = ops::Identity(s.WithOpName("o2"), f.r1); + Output g = ops::Placeholder(s.WithOpName("g"), DT_FLOAT, + ops::Placeholder::Shape(PartialTensorShape({1}))); + Output h = ops::Shape(s.WithOpName("h"), g); + auto i = ops::internal::BroadcastGradientArgs(s.WithOpName("i"), d, h); + Output p1 = ops::Identity(s.WithOpName("p1"), i.r0); + Output p2 = ops::Identity(s.WithOpName("p2"), i.r1); + GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); @@ -881,14 +889,33 @@ TEST_F(ConstantFoldingTest, ConstantMaterialization) { EXPECT_EQ("Const", node.op()); EXPECT_EQ(1, node.input_size()); EXPECT_EQ("^f", node.input(0)); + EXPECT_EQ(0, TensorShape(node.attr().at("value").tensor().tensor_shape()) + .num_elements()); } else if (node.name() == "ConstantFolding/f-1") { ++found; EXPECT_EQ("Const", node.op()); EXPECT_EQ(1, node.input_size()); EXPECT_EQ("^f", node.input(0)); + EXPECT_EQ(0, TensorShape(node.attr().at("value").tensor().tensor_shape()) + .num_elements()); + } else if (node.name() == "p1") { + ++found; + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("ConstantFolding/i-0", node.input(0)); + } else if (node.name() == "p2") { + ++found; + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("i:1", node.input(0)); + } else if (node.name() == "ConstantFolding/i-0") { + ++found; + EXPECT_EQ("Const", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("^i", node.input(0)); + EXPECT_EQ(0, TensorShape(node.attr().at("value").tensor().tensor_shape()) + .num_elements()); } } - EXPECT_EQ(4, found); + EXPECT_EQ(7, found); } } // namespace diff --git a/tensorflow/core/util/bcast.cc b/tensorflow/core/util/bcast.cc index 47e6ddb3d8..1eab7e3d02 100644 --- a/tensorflow/core/util/bcast.cc +++ b/tensorflow/core/util/bcast.cc @@ -68,9 +68,7 @@ BCast::BCast(const Vec& sx, const Vec& sy, const bool fewer_dims_optimization) { // Output shape. State curr = UNKNOWN; const int64 x_i = x[i]; // i-th dimension of x. - CHECK_GE(x_i, 0); const int64 y_i = y[i]; // i-th dimension of y. - CHECK_GE(y_i, 0); int64 o_i; // i-th dimension of the output. int64 bx_i; // i-th broadcast for x. int64 by_i; // i-th broadcast for y. -- GitLab From 9a5f4814bafdd53d574d3c8aabc859d8a06ba39d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 16:11:25 -0800 Subject: [PATCH 0127/1801] Added some additional documentation to the swish() function PiperOrigin-RevId: 175228315 --- tensorflow/python/ops/nn_impl.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 2c83e4e29f..431ea1186a 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -275,9 +275,6 @@ def _swish_shape(op): return [op.inputs[0].shape] -# Set noinline=True so that sigmoid(features) is re-computed during -# backprop, and we can free the sigmoid(features) expression immediately -# after use during the forward pass. @function.Defun(shape_func=_swish_shape, func_name="swish_grad", noinline=True) def _swish_grad(features, grad): """Gradient of Swish function defined below.""" @@ -287,6 +284,11 @@ def _swish_grad(features, grad): return grad * activation_grad +# Naively, x * tf.nn.sigmoid(x) requires keeping both x and sigmoid(x) around +# for backprop, effectively doubling the tensor's memory consumption. We use a +# @Defun decorator with noinline=True so that sigmoid(features) is re-computed +# during backprop, and we can free the sigmoid(features) expression immediately +# after use during the forward pass. @function.Defun( grad_func=_swish_grad, shape_func=_swish_shape, @@ -296,7 +298,7 @@ def swish(features): # pylint: disable=g-doc-args """Computes the Swish activation function: `x * sigmoid(x)`. - Source: "Swish: a Self-Gated Activation Function" (Ramachandran et al. 2017) + Source: "Searching for Activation Functions" (Ramachandran et al. 2017) https://arxiv.org/abs/1710.05941 Args: -- GitLab From 39cee098f01a56bd67be41648342f4008870b988 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 16:23:43 -0800 Subject: [PATCH 0128/1801] Always push updated nodes to the queue for possible further optimization. PiperOrigin-RevId: 175229944 --- .../optimizers/arithmetic_optimizer.cc | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 44d16e5a42..f2277a9b79 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -703,7 +703,6 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( node_map->AddOutput(new_transpose->name(), new_cast->name()); new_nodes->push_back(new_transpose); - new_nodes->push_back(new_cast); // Add frame dependencies that the original node might have had. AddFrameControlDeps(node, {new_transpose, new_cast}, new_transpose->input(0), {new_transpose}, @@ -880,7 +879,6 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( new_mul_node->set_device(node->device()); SetDataTypeToAttr(type, "T", new_mul_node); node_map->AddNode(new_mul_node->name(), new_mul_node); - new_nodes->push_back(new_mul_node); new_mul_node->add_input(new_const_node->name()); node_map->AddOutput(new_const_node->name(), new_mul_node->name()); new_mul_node->add_input(node->input(0)); @@ -945,7 +943,6 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( new_mul_node->set_name(new_mul_node->name() + "_hoist"); new_mul_node->set_input(0, common_factor); new_mul_node->set_input(1, new_add_node->name()); - new_nodes->push_back(new_mul_node); node_map->AddNode(new_mul_node->name(), new_mul_node); } } @@ -1045,10 +1042,14 @@ namespace { template class SetVector { public: - void PushBack(const T& value) { - CHECK(!Exists(value)) << "Value " << value << " is already in the set."; - set_.insert(value); + // Returns false if value already existed in the set, true otherwise. + bool PushBack(const T& value) { + if (!set_.insert(value).second) { + VLOG(2) << "Value " << value << " is already in the set."; + return false; + } vector_.push_back(value); + return true; } T PopBack() { @@ -1089,6 +1090,11 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps( } if (NodeName(simplified_tensor) != node->name()) { + // Always consider simplified_tensor for further optimizations. + const NodeDef* simplified_node = node_map.GetNode(simplified_tensor); + if (simplified_node != nullptr) { + nodes_to_simplify.PushBack(simplified_node); + } // When `node` is simplifed to another node rather than in-place, the // consumers of `node` are already redirected to `simplified_tensor`. // Re-push the consumers into `nodes_to_simplify` for further -- GitLab From 0719d26b1e61d13af1754b28ae855ba094d944ea Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Thu, 9 Nov 2017 16:26:07 -0800 Subject: [PATCH 0129/1801] Increase tolerance in flaky multinomial test. PiperOrigin-RevId: 175230217 --- .../python/kernel_tests/distributions/multinomial_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/kernel_tests/distributions/multinomial_test.py b/tensorflow/python/kernel_tests/distributions/multinomial_test.py index 614a34f077..ebc89f15c5 100644 --- a/tensorflow/python/kernel_tests/distributions/multinomial_test.py +++ b/tensorflow/python/kernel_tests/distributions/multinomial_test.py @@ -283,10 +283,10 @@ class MultinomialTest(test.TestCase): dist.variance(), dist.stddev(), ]) - self.assertAllClose(sample_mean_, analytic_mean, atol=0., rtol=0.01) - self.assertAllClose(sample_cov_, analytic_cov, atol=0., rtol=0.01) - self.assertAllClose(sample_var_, analytic_var, atol=0., rtol=0.01) - self.assertAllClose(sample_stddev_, analytic_stddev, atol=0., rtol=0.01) + self.assertAllClose(sample_mean_, analytic_mean, atol=0.01, rtol=0.01) + self.assertAllClose(sample_cov_, analytic_cov, atol=0.01, rtol=0.01) + self.assertAllClose(sample_var_, analytic_var, atol=0.01, rtol=0.01) + self.assertAllClose(sample_stddev_, analytic_stddev, atol=0.01, rtol=0.01) def testSampleUnbiasedNonScalarBatch(self): with self.test_session() as sess: -- GitLab From 47f8f08f0db5bb668d73993624691e8e9d064af4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 16:35:57 -0800 Subject: [PATCH 0130/1801] Support more instructions in Hlo parser: Broadcast, Concatenate, Map, Reduce, SelectAndScatter, Reverse, Slice, DynamicSlice, DynamicUpdateSlice, Transpose, BatchNormTraining, BatchNormInference, BatchNormGrad. PiperOrigin-RevId: 175231463 --- .../compiler/xla/service/hlo_instruction.cc | 12 +- .../compiler/xla/tools/parser/hlo_parser.cc | 314 +++++++++++++++++- .../xla/tools/parser/hlo_parser_test.cc | 231 ++++++++++++- 3 files changed, 540 insertions(+), 17 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index ffb933155f..1b2161fc2e 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1891,7 +1891,7 @@ std::vector HloInstruction::ExtraAttributesToString() const { if (padding_config_ != nullptr) { extra.push_back(StrCat("padding=", padding_config_->ShortDebugString())); } - if (!slice_starts_.empty() && !slice_limits_.empty()) { + if (opcode() == HloOpcode::kSlice) { std::vector bounds; bounds.reserve(slice_starts_.size()); const bool omit_stride = @@ -1904,6 +1904,16 @@ std::vector HloInstruction::ExtraAttributesToString() const { } extra.push_back(StrCat("slice={", Join(bounds, ", "), "}")); } + if (opcode() == HloOpcode::kDynamicSlice) { + extra.push_back( + StrCat("dynamic_slice_sizes={", Join(dynamic_slice_sizes(), ","), "}")); + } + if (opcode() == HloOpcode::kBatchNormTraining || + opcode() == HloOpcode::kBatchNormInference || + opcode() == HloOpcode::kBatchNormGrad) { + extra.push_back(StrCat("epsilon=", epsilon())); + extra.push_back(StrCat("feature_index=", feature_index())); + } if (convolution_dimension_numbers_ != nullptr) { extra.push_back(ConvolutionDimensionNumbersToString()); diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index 3741c3daac..710877b4e0 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -80,14 +80,25 @@ class HloParser { bool ParseOperands(std::vector* operands, const int expected_size); + // Describes the start, limit, and stride on every dimension of the operand + // being sliced. + struct SliceRanges { + std::vector starts; + std::vector limits; + std::vector strides; + }; + // Types of attributes. enum class AttrTy { kInt64, + kFloat, + kBracedInt64List, kHloComputation, kWindow, kConvolutionDimensionNumbers, kSharding, kInstructionList, + kSliceRanges, }; struct AttrConfig { @@ -131,6 +142,10 @@ class HloParser { // Parses window's pad sub-attriute, e.g., pad=0_0x3x3. bool ParseWindowPad(std::vector>* pad); + bool ParseSliceRanges(SliceRanges* result); + bool ParseInt64List(const TokKind start, const TokKind end, + const TokKind delim, std::vector* result); + bool ParseParamList(); bool ParseName(string* result); bool ParseAttributeName(string* result); @@ -535,26 +550,190 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, shape, /*lhs=*/operands[0], /*rhs=*/operands[1], *window, *dnums)); break; } - case HloOpcode::kBroadcast: + case HloOpcode::kBroadcast: { + optional> broadcast_dimensions; + attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, + &broadcast_dimensions}; + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreateBroadcast( + shape, operands[0], *broadcast_dimensions)); + break; + } + case HloOpcode::kConcatenate: { + optional> dimensions; + attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, + &dimensions}; + if (!ParseOperands(&operands) || !ParseAttributes(attrs) || + dimensions->size() != 1) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreateConcatenate( + shape, operands, dimensions->at(0))); + break; + } + case HloOpcode::kMap: { + optional to_apply; + attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation, + &to_apply}; + if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction( + HloInstruction::CreateMap(shape, operands, *to_apply)); + break; + } + case HloOpcode::kReduce: { + optional reduce_computation; + attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation, + &reduce_computation}; + optional> dimensions_to_reduce; + attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, + &dimensions_to_reduce}; + if (!ParseOperands(&operands, /*expected_size=*/2) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreateReduce( + shape, /*operand=*/operands[0], /*init_value=*/operands[1], + *dimensions_to_reduce, *reduce_computation)); + break; + } + case HloOpcode::kReverse: { + optional> dimensions; + attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, + &dimensions}; + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction( + HloInstruction::CreateReverse(shape, operands[0], *dimensions)); + break; + } + case HloOpcode::kSelectAndScatter: { + optional select; + attrs["select"] = {/*required=*/true, AttrTy::kHloComputation, &select}; + optional scatter; + attrs["scatter"] = {/*required=*/true, AttrTy::kHloComputation, &scatter}; + optional window; + attrs["window"] = {/*required=*/true, AttrTy::kWindow, &window}; + if (!ParseOperands(&operands, /*expected_size=*/3) || + !ParseAttributes(attrs)) { + return false; + } + instruction = + builder->AddInstruction(HloInstruction::CreateSelectAndScatter( + shape, /*operand=*/operands[0], *select, *window, + /*source=*/operands[1], /*init_value=*/operands[2], *scatter)); + break; + } + case HloOpcode::kSlice: { + optional slice_ranges; + attrs["slice"] = {/*required=*/true, AttrTy::kSliceRanges, &slice_ranges}; + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreateSlice( + shape, operands[0], slice_ranges->starts, slice_ranges->limits, + slice_ranges->strides)); + break; + } + case HloOpcode::kDynamicSlice: { + optional> dynamic_slice_sizes; + attrs["dynamic_slice_sizes"] = { + /*required=*/true, AttrTy::kBracedInt64List, &dynamic_slice_sizes}; + if (!ParseOperands(&operands, /*expected_size=*/2) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreateDynamicSlice( + shape, /*operand=*/operands[0], /*start_indices=*/operands[1], + *dynamic_slice_sizes)); + break; + } + case HloOpcode::kDynamicUpdateSlice: { + if (!ParseOperands(&operands, /*expected_size=*/3) || + !ParseAttributes(attrs)) { + return false; + } + instruction = + builder->AddInstruction(HloInstruction::CreateDynamicUpdateSlice( + shape, /*operand=*/operands[0], /*update=*/operands[1], + /*start_indices=*/operands[2])); + break; + } + case HloOpcode::kTranspose: { + optional> dimensions; + attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, + &dimensions}; + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction( + HloInstruction::CreateTranspose(shape, operands[0], *dimensions)); + break; + } + case HloOpcode::kBatchNormTraining: { + optional epsilon; + attrs["epsilon"] = {/*required=*/true, AttrTy::kFloat, &epsilon}; + optional feature_index; + attrs["feature_index"] = {/*required=*/true, AttrTy::kInt64, + &feature_index}; + if (!ParseOperands(&operands, /*expected_size=*/3) || + !ParseAttributes(attrs)) { + return false; + } + instruction = + builder->AddInstruction(HloInstruction::CreateBatchNormTraining( + shape, /*operand=*/operands[0], /*scale=*/operands[1], + /*offset=*/operands[2], *epsilon, *feature_index)); + break; + } + case HloOpcode::kBatchNormInference: { + optional epsilon; + attrs["epsilon"] = {/*required=*/true, AttrTy::kFloat, &epsilon}; + optional feature_index; + attrs["feature_index"] = {/*required=*/true, AttrTy::kInt64, + &feature_index}; + if (!ParseOperands(&operands, /*expected_size=*/5) || + !ParseAttributes(attrs)) { + return false; + } + instruction = + builder->AddInstruction(HloInstruction::CreateBatchNormInference( + shape, /*operand=*/operands[0], /*scale=*/operands[1], + /*offset=*/operands[2], /*mean=*/operands[3], + /*variance=*/operands[4], *epsilon, *feature_index)); + break; + } + case HloOpcode::kBatchNormGrad: { + optional epsilon; + attrs["epsilon"] = {/*required=*/true, AttrTy::kFloat, &epsilon}; + optional feature_index; + attrs["feature_index"] = {/*required=*/true, AttrTy::kInt64, + &feature_index}; + if (!ParseOperands(&operands, /*expected_size=*/5) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreateBatchNormGrad( + shape, /*operand=*/operands[0], /*scale=*/operands[1], + /*mean=*/operands[2], /*variance=*/operands[3], + /*grad_output=*/operands[4], *epsilon, *feature_index)); + break; + } case HloOpcode::kCustomCall: - case HloOpcode::kConcatenate: case HloOpcode::kReducePrecision: - case HloOpcode::kMap: case HloOpcode::kPad: - case HloOpcode::kReduce: - case HloOpcode::kSelectAndScatter: - case HloOpcode::kReverse: case HloOpcode::kRng: - case HloOpcode::kSlice: - case HloOpcode::kDynamicSlice: - case HloOpcode::kDynamicUpdateSlice: - case HloOpcode::kTranspose: case HloOpcode::kFusion: - case HloOpcode::kBatchNormTraining: - case HloOpcode::kBatchNormInference: case HloOpcode::kInfeed: case HloOpcode::kOutfeed: - case HloOpcode::kBatchNormGrad: case HloOpcode::kTrace: return TokenError(StrCat("parsing not yet implemented for op: ", HloOpcodeString(opcode))); @@ -1121,6 +1300,19 @@ bool HloParser::ParseAttributes( static_cast*>(attr_out_ptr)->emplace(result); return true; } + case AttrTy::kFloat: { + double result; + if (!ParseDouble(&result)) { + return false; + } + if (result > std::numeric_limits::max() || + result < std::numeric_limits::lowest()) { + return TokenError("value out of range for float"); + } + static_cast*>(attr_out_ptr) + ->emplace(static_cast(result)); + return true; + } case AttrTy::kHloComputation: { HloComputation* result; if (!ParseComputationName(&result)) { @@ -1164,6 +1356,24 @@ bool HloParser::ParseAttributes( ->emplace(result); return true; } + case AttrTy::kBracedInt64List: { + std::vector result; + if (!ParseInt64List(TokKind::kLbrace, TokKind::kRbrace, + TokKind::kComma, &result)) { + return false; + } + static_cast>*>(attr_out_ptr) + ->emplace(result); + return true; + } + case AttrTy::kSliceRanges: { + SliceRanges result; + if (!ParseSliceRanges(&result)) { + return false; + } + static_cast*>(attr_out_ptr)->emplace(result); + return true; + } } }(); if (!success) { @@ -1380,6 +1590,84 @@ bool HloParser::ParseConvolutionDimensionNumbers( return true; } +// ::= '{' ranges '}' +// ::= /*empty*/ +// ::= range (',' range)* +// range ::= '[' start ':' limit (':' stride)? ']' +// +// The slice ranges are printed as: +// +// {[dim0_start:dim0_limit:dim0stride], [dim1_start:dim1_limit], ...} +// +// This function extracts the starts, limits, and strides as 3 vectors to the +// result. If stride is not present, stride is 1. For example, if the slice +// ranges is printed as: +// +// {[2:3:4], [5:6:7], [8:9]} +// +// The the parsed result will be: +// +// {/*starts=*/{2, 5, 8}, /*limits=*/{3, 6, 9}, /*strides=*/{4, 7, 1}} +// +bool HloParser::ParseSliceRanges(SliceRanges* result) { + if (!ParseToken(TokKind::kLbrace, "expects '{' to start ranges")) { + return false; + } + std::vector> ranges; + if (lexer_.GetKind() == TokKind::kRbrace) { + // empty + return ParseToken(TokKind::kRbrace, "expects '}' to end ranges"); + } + do { + ranges.emplace_back(); + if (!ParseInt64List(TokKind::kLsquare, TokKind::kRsquare, TokKind::kColon, + &ranges.back())) { + return false; + } + } while (EatIfPresent(TokKind::kComma)); + + for (const auto& range : ranges) { + if (range.size() != 2 && range.size() != 3) { + return TokenError(Printf( + "expects [start:limit:step] or [start:limit], but sees %ld elements.", + range.size())); + } + } + + for (const auto& range : ranges) { + result->starts.push_back(range[0]); + result->limits.push_back(range[1]); + result->strides.push_back(range.size() == 3 ? range[2] : 1); + } + return ParseToken(TokKind::kRbrace, "expects '}' to end ranges"); +} + +// int64list ::= start int64_elements end +// int64_elements +// ::= /*empty*/ +// ::= int64_val (delim int64_val)* +bool HloParser::ParseInt64List(const TokKind start, const TokKind end, + const TokKind delim, + std::vector* result) { + if (!ParseToken(start, StrCat("expects an int64 list starting with ", + TokKindToString(start)))) { + return false; + } + if (lexer_.GetKind() == end) { + // empty + } else { + do { + int64 i; + if (!ParseInt64(&i)) { + return false; + } + result->push_back(i); + } while (EatIfPresent(delim)); + } + return ParseToken( + end, StrCat("expects an int64 list to end with ", TokKindToString(end))); +} + // param_list ::= '(' param_list1 ')' // param_list1 // ::= /*empty*/ diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index ca476a4bb7..fbe0409e3d 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -44,10 +44,11 @@ std::vector CreateTestCases() { "AxpyParam", R"(HloModule axpy_module: -ENTRY %axpy.v5 (alpha: f32[2,4], x: f32[2,4], y: f32[2,4]) -> f32[2,4] { - %alpha = f32[2,4]{1,0} parameter(0) +ENTRY %axpy.v5 (alpha: f32[], x: f32[2,4], y: f32[2,4]) -> f32[2,4] { + %alpha = f32[] parameter(0) + %broadcast = f32[2,4]{1,0} broadcast(f32[] %alpha), dimensions={} %x = f32[2,4]{1,0} parameter(1) - %multiply = f32[2,4]{1,0} multiply(f32[2,4]{1,0} %alpha, f32[2,4]{1,0} %x) + %multiply = f32[2,4]{1,0} multiply(f32[2,4]{1,0} %broadcast, f32[2,4]{1,0} %x) %y = f32[2,4]{1,0} parameter(2) ROOT %add = f32[2,4]{1,0} add(f32[2,4]{1,0} %multiply, f32[2,4]{1,0} %y) } @@ -296,6 +297,218 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2 ROOT %convolution = f32[1,2,1]{2,0,1} convolution(f32[1,2,1]{2,0,1} %copy, f32[1,1,1]{2,1,0} %filter), window={size=1}, dim_labels=b0f_0io->b0f } +)" +}, +// reverse(constant) +{ +"Reverse4D", +R"(HloModule Reverse4DFloatArrayOnDim01_module: + +ENTRY %Reverse4DFloatArrayOnDim01.v2 () -> f32[4,3,2,1] { + %constant = f32[4,3,2,1]{0,1,2,3} constant(f32[4,3,2,1] { { /*i0=0*/ { /*i1=0*/ {1}, {2} }, { /*i1=1*/ {3}, {4} }, { /*i1=2*/ {5}, {6} } }, { /*i0=1*/ { /*i1=0*/ {7}, {8} }, { /*i1=1*/ {9}, {10} }, { /*i1=2*/ {11}, {12} } }, { /*i0=2*/ { /*i1=0*/ {13}, {14} }, { /*i1=1*/ {15}, {16} }, { /*i1=2*/ {17}, {18} } }, { /*i0=3*/ { /*i1=0*/ {19}, {20} }, { /*i1=1*/ {21}, {22} }, { /*i1=2*/ {23}, {24} } } }) + ROOT %reverse = f32[4,3,2,1]{0,1,2,3} reverse(f32[4,3,2,1]{0,1,2,3} %constant), dimensions={0,1} +} + +)" +}, +// concat +{ +"Concat", +R"(HloModule Concat2x3With2x5_module: + +ENTRY %Concat2x3With2x5.v3 () -> f32[2,8] { + %constant = f32[2,3]{1,0} constant(f32[2,3] { { 0, 1, 2 }, { 1000, 1001, 1002 } }) + %constant.1 = f32[2,5]{1,0} constant(f32[2,5] { { 64, 65, 66, 67, 68 }, { 1064, 1065, 1066, 1067, 1068 } }) + ROOT %concatenate = f32[2,8]{1,0} concatenate(f32[2,3]{1,0} %constant, f32[2,5]{1,0} %constant.1), dimensions={1} +} + +)" +}, +// map +{ +"Map", +R"(HloModule MapBinaryAdder_module: + +%add_F32.v3 (lhs: f32[], rhs: f32[]) -> f32[] { + %lhs = f32[] parameter(0) + %rhs = f32[] parameter(1) + ROOT %add = f32[] add(f32[] %lhs, f32[] %rhs) +} + +ENTRY %MapBinaryAdder.v3 (param0: f32[4], param1: f32[4]) -> f32[4] { + %param0 = f32[4]{0} parameter(0) + %param1 = f32[4]{0} parameter(1) + ROOT %map = f32[4]{0} map(f32[4]{0} %param0, f32[4]{0} %param1), to_apply=%add_F32.v3 +} + +)" +}, +// reduce +{ +"Reduce", +R"(HloModule ReduceR3ToR2_module: + +%add_F32.v3 (lhs: f32[], rhs: f32[]) -> f32[] { + %lhs = f32[] parameter(0) + %rhs = f32[] parameter(1) + ROOT %add = f32[] add(f32[] %lhs, f32[] %rhs) +} + +ENTRY %ReduceR3ToR2.v3 (input: f32[8,16,256]) -> f32[8,16] { + %input = f32[8,16,256]{2,1,0} parameter(0) + %constant = f32[] constant(0) + ROOT %reduce = f32[8,16]{1,0} reduce(f32[8,16,256]{2,1,0} %input, f32[] %constant), dimensions={2}, to_apply=%add_F32.v3 +} + +)" +}, +// select and scatter +{ +"SelectAndScatter", +R"(HloModule R4F32OverlapSmall_module: + +%ge_F32.v3 (lhs: f32[], rhs: f32[]) -> pred[] { + %lhs = f32[] parameter(0) + %rhs = f32[] parameter(1) + ROOT %greater-than-or-equal-to = pred[] greater-than-or-equal-to(f32[] %lhs, f32[] %rhs) +} + +%add_F32.v3 (lhs.1: f32[], rhs.1: f32[]) -> f32[] { + %lhs.1 = f32[] parameter(0) + %rhs.1 = f32[] parameter(1) + ROOT %add = f32[] add(f32[] %lhs.1, f32[] %rhs.1) +} + +ENTRY %R4F32OverlapSmall.v4 () -> f32[4,5,1,1] { + %constant = f32[4,5,1,1]{3,2,1,0} constant(f32[4,5,1,1] { { /*i0=0*/ { /*i1=0*/ {7} }, { /*i1=1*/ {2} }, { /*i1=2*/ {5} }, { /*i1=3*/ {3} }, { /*i1=4*/ {8} } }, { /*i0=1*/ { /*i1=0*/ {3} }, { /*i1=1*/ {8} }, { /*i1=2*/ {9} }, { /*i1=3*/ {3} }, { /*i1=4*/ {4} } }, { /*i0=2*/ { /*i1=0*/ {1} }, { /*i1=1*/ {5} }, { /*i1=2*/ {7} }, { /*i1=3*/ {5} }, { /*i1=4*/ {6} } }, { /*i0=3*/ { /*i1=0*/ {0} }, { /*i1=1*/ {6} }, { /*i1=2*/ {2} }, { /*i1=3*/ {10} }, { /*i1=4*/ {2} } } }) + %constant.1 = f32[2,2,1,1]{3,2,1,0} constant(f32[2,2,1,1] { { /*i0=0*/ { /*i1=0*/ {2} }, { /*i1=1*/ {6} } }, { /*i0=1*/ { /*i1=0*/ {3} }, { /*i1=1*/ {1} } } }) + %constant.2 = f32[] constant(0) + ROOT %select-and-scatter = f32[4,5,1,1]{3,2,1,0} select-and-scatter(f32[4,5,1,1]{3,2,1,0} %constant, f32[2,2,1,1]{3,2,1,0} %constant.1, f32[] %constant.2), window={size=2x3x1x1 stride=2x2x1x1}, select=%ge_F32.v3, scatter=%add_F32.v3 +} + +)" +}, +// slice +{ +"Slice", +R"(HloModule slice_module: + +ENTRY %slice.v2 (p0: f32[3,3,4,4]) -> f32[3,3,2,4] { + %p0 = f32[3,3,4,4]{3,2,1,0} parameter(0) + ROOT %slice = f32[3,3,2,4]{3,2,1,0} slice(f32[3,3,4,4]{3,2,1,0} %p0), slice={[0:3:1], [0:3:1], [0:4:2], [0:4:1]} +} + +)" +}, +// slice, no stride +{ +"SliceNoStride", +R"(HloModule Slice3x3x3_To_1x3x3_F32_module: + +ENTRY %Slice3x3x3_To_1x3x3_F32.v2 () -> f32[1,3,3] { + %constant = f32[3,3,3]{2,1,0} constant(f32[3,3,3] { { { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 } }, { { 9, 10, 11 }, { 12, 13, 14 }, { 15, 16, 17 } }, { { 18, 19, 20 }, { 21, 22, 23 }, { 24, 25, 26 } } }) + ROOT %slice = f32[1,3,3]{2,1,0} slice(f32[3,3,3]{2,1,0} %constant), slice={[0:1], [0:3], [0:3]} +} + +)" +}, +// slice R0 +{ +"SliceR0", +R"(HloModule SliceR0_module: + +ENTRY %SliceR0.v2 () -> s32[] { + %constant = s32[] constant(1) + ROOT %slice = s32[] slice(s32[] %constant), slice={} +} + +)" +}, +// transpose +{ +"Transpose", +R"(HloModule Transpose_module: + +ENTRY %Transpose.v2 () -> s32[1,2,3] { + %constant = s32[1,2,3]{2,1,0} constant(s32[1,2,3] { { { 1, 2, 3 }, { 4, 5, 6 } } }) + ROOT %transpose = s32[1,2,3]{2,1,0} transpose(s32[1,2,3]{2,1,0} %constant), dimensions={0,1,2} +} + +)" +}, +// Dynamic slice +{ +"DynamicSlice", +R"(HloModule DynamicSlice_module: + +ENTRY %DynamicSlice.v5 (original_parameter: s32[2,2,258], start_index: s32[1]) -> s32[2,2,258] { + %original_parameter = s32[2,2,258]{2,1,0} parameter(0) + %constant = s32[1]{0} constant({0}) + %start_index = s32[1]{0} parameter(1) + %concatenate = s32[3]{0} concatenate(s32[1]{0} %constant, s32[1]{0} %constant, s32[1]{0} %start_index), dimensions={0} + ROOT %dynamic-slice = s32[2,2,258]{2,1,0} dynamic-slice(s32[2,2,258]{2,1,0} %original_parameter, s32[3]{0} %concatenate), dynamic_slice_sizes={2,2,258} +} + +)" +}, +// Dynamic update slice +{ +"DynamicUpdateSlice", +R"(HloModule DynamicUpdateSlice_module: + +ENTRY %DynamicUpdateSlice.v4 (input: s32[1,1,25,1], update: s32[1,1,2,1], start_indices: s32[4]) -> s32[1,1,25,1] { + %input = s32[1,1,25,1]{3,2,1,0} parameter(0) + %update = s32[1,1,2,1]{3,2,1,0} parameter(1) + %start_indices = s32[4]{0} parameter(2) + ROOT %dynamic-update-slice = s32[1,1,25,1]{3,2,1,0} dynamic-update-slice(s32[1,1,25,1]{3,2,1,0} %input, s32[1,1,2,1]{3,2,1,0} %update, s32[4]{0} %start_indices) +} + +)" +}, +// batch norm training +{ +"BatchNormTraining", +R"(HloModule BasicTraining_module: + +ENTRY %BasicTraining.v4 () -> (f32[2,2,1,2], f32[2], f32[2]) { + %constant = f32[2,2,1,2]{3,2,1,0} constant(f32[2,2,1,2] { { /*i0=0*/ { /*i1=0*/ {1, 2} }, { /*i1=1*/ {3, 4} } }, { /*i0=1*/ { /*i1=0*/ {5, 6} }, { /*i1=1*/ {7, 8} } } }) + %constant.1 = f32[2]{0} constant({2, 3}) + %constant.2 = f32[2]{0} constant({1, 2}) + ROOT %batch-norm-training = (f32[2,2,1,2]{3,2,1,0}, f32[2]{0}, f32[2]{0}) batch-norm-training(f32[2,2,1,2]{3,2,1,0} %constant, f32[2]{0} %constant.1, f32[2]{0} %constant.2), epsilon=0.001, feature_index=3 +} + +)" +}, +// batch norm inference +{ +"BatchNormInference", +R"(HloModule BatchNormInference_module: + +ENTRY %BatchNormInference.v6 (input: f32[2,2,2,2], offset: f32[2], scale: f32[2], mean: f32[2], variance: f32[2]) -> f32[2,2,2,2] { + %input = f32[2,2,2,2]{3,2,1,0} parameter(0) + %offset = f32[2]{0} parameter(1) + %scale = f32[2]{0} parameter(2) + %mean = f32[2]{0} parameter(3) + %variance = f32[2]{0} parameter(4) + ROOT %batch-norm-inference = f32[2,2,2,2]{3,2,1,0} batch-norm-inference(f32[2,2,2,2]{3,2,1,0} %input, f32[2]{0} %offset, f32[2]{0} %scale, f32[2]{0} %mean, f32[2]{0} %variance), epsilon=0.001, feature_index=0 +} + +)" +}, +// batch norm grad +{ +"BatchNormGrad", +R"(HloModule BatchNormGrad_module: + +ENTRY %BatchNormGrad.v4 (input: f32[2,2,2,2], scale: f32[2], mean: f32[2], variance: f32[2], grad_output: f32[2,2,2,2]) -> (f32[2,2,2,2], f32[2], f32[2]) { + %input = f32[2,2,2,2]{3,2,1,0} parameter(0) + %scale = f32[2]{0} parameter(1) + %mean = f32[2]{0} parameter(2) + %variance = f32[2]{0} parameter(3) + %grad_output = f32[2,2,2,2]{3,2,1,0} parameter(4) + ROOT %batch-norm-grad = (f32[2,2,2,2]{3,2,1,0}, f32[2]{0}, f32[2]{0}) batch-norm-grad(f32[2,2,2,2]{3,2,1,0} %input, f32[2]{0} %scale, f32[2]{0} %mean, f32[2]{0} %variance, f32[2,2,2,2]{3,2,1,0} %grad_output), epsilon=0.001, feature_index=0 +} + )" } }); @@ -568,6 +781,18 @@ ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { "'done' is not defined"); } +TEST_F(HloParserTest, SliceAllowOmitStride1) { + const string original = R"(HloModule slice_module: + +ENTRY %slice.v2 (p0: f32[3,3,4,4]) -> f32[3,3,2,4] { + %p0 = f32[3,3,4,4]{3,2,1,0} parameter(0) + ROOT %slice = f32[3,3,2,4]{3,2,1,0} slice(f32[3,3,4,4]{3,2,1,0} %p0), slice={[0:3], [0:3], [0:4:2], [0:4]} +} + +)"; + TF_EXPECT_OK(Parse(original).status()); +} + } // namespace } // namespace tools } // namespace xla -- GitLab From 2397537748552d8a7850b89d1f39dc1fc0b2a9f8 Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Thu, 9 Nov 2017 16:40:22 -0800 Subject: [PATCH 0131/1801] De-flakify and re-enable tests in replicate_model_fn_test.py. "Reduce metric variables" operation is a single operation across all metric variables, which means it is across all eval metrics. Previously, an update op for every eval metric was conditioned on a copy of overall "reduce metric variables" op. The latter was meant to be idempotent and thus the end result was supposed to be correct. However, "reduce metric variables" op consists of a number of variable assignments and thus is not atomic. If execution of two "reduce metric variables" ops interleaves, then the end result might come out to be incorrect. This caused flakiness in replicate_model_fn_test.py. To fix the problem, there is now a single copy of the "reduce metric variables" and every eval metric is associated with that single instance. PiperOrigin-RevId: 175232016 --- .../python/estimator/replicate_model_fn.py | 18 +-- .../estimator/replicate_model_fn_test.py | 108 +++++++++--------- 2 files changed, 58 insertions(+), 68 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py index 421bf18c45..0848c5f62f 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py @@ -357,25 +357,17 @@ def _eval_spec(tower_specs, aggregation_device, aggregated_loss_name='loss'): [spec.loss for spec in tower_specs], aggregation_device, aggregated_loss_name) - eval_metric_ops_lists = {} + update_ops = [] for tower_spec in tower_specs: - metrics = tower_spec.eval_metric_ops or {} - for name, (_, update_op) in six.iteritems(metrics): - update_ops = eval_metric_ops_lists.setdefault(name, ([])) + for name, (_, update_op) in six.iteritems(tower_spec.eval_metric_ops): update_ops.append(update_op) + with ops_lib.control_dependencies(update_ops): + reduced_update_op = _reduce_metric_variables(len(tower_specs)) + eval_metric_ops = {} for name, (metric_tensor, _) in six.iteritems(tower_specs[0].eval_metric_ops): - with ops_lib.control_dependencies(eval_metric_ops_lists[name]): - # This operation reduces local variables across all metrics, yet is - # called for every metric. This is redundant and it's done because - # it is hard to know what local variables correspond to what metric. - # Estimator is going to execute all `reduced_update_op`s as part of - # a group inside a single `Session.run()` call, which will avoid duplicate - # computation. - reduced_update_op = _reduce_metric_variables(len(tower_specs)) eval_metric_ops[name] = (metric_tensor, reduced_update_op) - estimator_spec['eval_metric_ops'] = eval_metric_ops return model_fn_lib.EstimatorSpec(**estimator_spec) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py index bb06700160..21d5a9c327 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py @@ -223,34 +223,34 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): features, labels, self.params) del estimator_spec -# TODO(isaprykin): Resolve the source of flakinness. -# def test_eval(self): -# features = np.array([[0.01], [0.002]]) -# labels = np.array([[0.01], [0.02]]) -# -# with self.test_session() as session: -# replicated_model_fn = replicate_model_fn.replicate_model_fn( -# self.model_fn, self.optimizer_fn, devices=['/gpu:0', '/gpu:1']) -# estimator_spec = replicated_model_fn(model_fn_lib.ModeKeys.EVAL, features, -# labels, self.params) -# session.run(variables.local_variables_initializer()) -# session.run(variables.global_variables_initializer()) -# -# accuracy, a = estimator_spec.eval_metric_ops['accuracy'] -# auc, b = estimator_spec.eval_metric_ops['auc'] -# -# session.run([a, b]) -# accuracy = session.run(accuracy) -# auc = session.run(auc) -# -# # Accuracy is 0.0 (no match) in the first tower. -# # Accuracy is 1.0 (match) in the second tower, since the feature -# # times weight "c" happened to be equal to the label. -# total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) -# -# self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01) -# self.assertEqual(0, auc) -# self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01) + def test_eval(self): + features = np.array([[0.01], [0.002]]) + labels = np.array([[0.01], [0.02]]) + + with self.test_session() as session: + replicated_model_fn = replicate_model_fn.replicate_model_fn( + self.model_fn, self.optimizer_fn, devices=['/gpu:0', '/gpu:1']) + estimator_spec = replicated_model_fn(model_fn_lib.ModeKeys.EVAL, features, + labels, self.params) + session.run(variables.local_variables_initializer()) + session.run(variables.global_variables_initializer()) + + accuracy, a = estimator_spec.eval_metric_ops['accuracy'] + auc, b = estimator_spec.eval_metric_ops['auc'] + + session.run([a, b]) + accuracy = session.run(accuracy) + auc = session.run(auc) + + # loss[i] = features[i] * 10 - labels[i]. + # Accuracy is 0.0 (no match) in the first tower. + # Accuracy is 1.0 (match) in the second tower, since the feature + # times weight "c" happened to be equal to the label. + total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) + + self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01) + self.assertEqual(0, auc) + self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01) def test_predict(self): features = np.array([[0.01], [0.002]]) @@ -524,33 +524,31 @@ class EvalSpecTest(test_util.TensorFlowTestCase): } return metrics -# TODO(isaprykin): Resolve the source of flakinness. -# def test_example(self): -# with self.test_session() as session: -# tower_losses = map(self.create_constant_loss, [2, 4, 6]) -# tower_metrics = map(self.create_eval_metrics, [0, 0.2, 0.3]) -# tower_specs = [ -# self.create_estimator_spec(l, m) -# for l, m in zip(tower_losses, tower_metrics) -# ] -# session.run(variables.local_variables_initializer()) -# -# estimator_spec = replicate_model_fn._eval_spec( -# tower_specs, aggregation_device='/device:GPU:0') -# -# accuracy, a = estimator_spec.eval_metric_ops['accuracy'] -# auc, b = estimator_spec.eval_metric_ops['auc'] -# -# self.assertEqual('/device:CPU:0', accuracy.device) -# self.assertEqual('/device:CPU:0', auc.device) -# -# session.run([a, b]) -# accuracy = session.run(accuracy) -# auc = session.run(auc) -# -# self.assertNear((12 - 2) / 12, accuracy, 0.01) -# self.assertEqual(0, auc) -# self.assertEqual(2 + 4 + 6, session.run(estimator_spec.loss)) + def test_example(self): + with self.test_session() as session: + tower_losses = map(self.create_constant_loss, [2, 4, 6]) + tower_metrics = map(self.create_eval_metrics, [0, 0.2, 0.3]) + tower_specs = [ + self.create_estimator_spec(l, m) + for l, m in zip(tower_losses, tower_metrics) + ] + session.run(variables.local_variables_initializer()) + + estimator_spec = replicate_model_fn._eval_spec( + tower_specs, aggregation_device='/device:GPU:0') + + accuracy, a = estimator_spec.eval_metric_ops['accuracy'] + auc, b = estimator_spec.eval_metric_ops['auc'] + + self.assertEqual('/device:CPU:0', accuracy.device) + self.assertEqual('/device:CPU:0', auc.device) + + session.run([a, b]) + accuracy, auc = session.run([accuracy, auc]) + + self.assertNear((12 - 2) / 12, accuracy, 0.01) + self.assertEqual(0, auc) + self.assertEqual(2 + 4 + 6, session.run(estimator_spec.loss)) def test_handles_single_tower(self): with self.test_session() as session: -- GitLab From 70e5cf1c486b28579d960d191f957f869a160e34 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Nov 2017 16:45:14 -0800 Subject: [PATCH 0132/1801] Improved the reporting of dimensions PiperOrigin-RevId: 175232587 --- tensorflow/python/grappler/model_analyzer.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/grappler/model_analyzer.cc b/tensorflow/python/grappler/model_analyzer.cc index 4ec7620bce..7d365c3be9 100644 --- a/tensorflow/python/grappler/model_analyzer.cc +++ b/tensorflow/python/grappler/model_analyzer.cc @@ -59,10 +59,15 @@ void ModelAnalyzer::PrintNodeInfo(const NodeDef* node, if (i > 0) { os << ", "; } - if (prop.shape().dim(i).size() < 0) { + if (prop.shape().dim(i).size() >= 0) { + // Print the actual dimension. + os << prop.shape().dim(i).size(); + } else if (prop.shape().dim(i).size() == -1) { + // We don't know anything about the dimension. os << "?"; } else { - os << prop.shape().dim(i).size(); + // Symbolic dimension. + os << "x" << -prop.shape().dim(i).size(); } } os << "]"; -- GitLab From 589279dab6259e578a94d5eb7b2e7498d459c8ec Mon Sep 17 00:00:00 2001 From: Zhengsheng Wei Date: Fri, 10 Nov 2017 09:54:12 +0800 Subject: [PATCH 0133/1801] modify relevant docstring(s) of convolution. Include con2d, con3d and their relevant. --- .../python/layers/core_layers.py | 8 ++-- tensorflow/python/layers/convolutional.py | 48 +++++++++---------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/tensorflow/contrib/model_pruning/python/layers/core_layers.py b/tensorflow/contrib/model_pruning/python/layers/core_layers.py index ae60d8b1e1..95dfd8f421 100644 --- a/tensorflow/contrib/model_pruning/python/layers/core_layers.py +++ b/tensorflow/contrib/model_pruning/python/layers/core_layers.py @@ -72,8 +72,8 @@ class _MaskedConv(base.Layer): linear activation. use_bias: Boolean, whether the layer uses a bias. kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, no bias will - be applied. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. kernel_regularizer: Optional regularizer for the convolution kernel. bias_regularizer: Optional regularizer for the bias vector. activity_regularizer: Regularizer function for the output. @@ -279,8 +279,8 @@ class MaskedConv2D(_MaskedConv): linear activation. use_bias: Boolean, whether the layer uses a bias. kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, no bias will - be applied. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. kernel_regularizer: Optional regularizer for the convolution kernel. bias_regularizer: Optional regularizer for the bias vector. activity_regularizer: Regularizer function for the output. diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py index 6ad18a4e25..ea3c0de5e1 100644 --- a/tensorflow/python/layers/convolutional.py +++ b/tensorflow/python/layers/convolutional.py @@ -254,8 +254,8 @@ class Conv1D(_Conv): linear activation. use_bias: Boolean, whether the layer uses a bias. kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, no bias will - be applied. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. kernel_regularizer: Optional regularizer for the convolution kernel. bias_regularizer: Optional regularizer for the bias vector. activity_regularizer: Optional regularizer function for the output. @@ -362,8 +362,8 @@ def conv1d(inputs, linear activation. use_bias: Boolean, whether the layer uses a bias. kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, no bias will - be applied. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. kernel_regularizer: Optional regularizer for the convolution kernel. bias_regularizer: Optional regularizer for the bias vector. activity_regularizer: Optional regularizer function for the output. @@ -450,8 +450,8 @@ class Conv2D(_Conv): linear activation. use_bias: Boolean, whether the layer uses a bias. kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, no bias will - be applied. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. kernel_regularizer: Optional regularizer for the convolution kernel. bias_regularizer: Optional regularizer for the bias vector. activity_regularizer: Optional regularizer function for the output. @@ -565,8 +565,8 @@ def conv2d(inputs, linear activation. use_bias: Boolean, whether the layer uses a bias. kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, no bias will - be applied. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. kernel_regularizer: Optional regularizer for the convolution kernel. bias_regularizer: Optional regularizer for the bias vector. activity_regularizer: Optional regularizer function for the output. @@ -654,8 +654,8 @@ class Conv3D(_Conv): linear activation. use_bias: Boolean, whether the layer uses a bias. kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, no bias will - be applied. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. kernel_regularizer: Optional regularizer for the convolution kernel. bias_regularizer: Optional regularizer for the bias vector. activity_regularizer: Optional regularizer function for the output. @@ -770,8 +770,8 @@ def conv3d(inputs, linear activation. use_bias: Boolean, whether the layer uses a bias. kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, no bias will - be applied. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. kernel_regularizer: Optional regularizer for the convolution kernel. bias_regularizer: Optional regularizer for the bias vector. activity_regularizer: Optional regularizer function for the output. @@ -860,8 +860,8 @@ class SeparableConv2D(Conv2D): use_bias: Boolean, whether the layer uses a bias. depthwise_initializer: An initializer for the depthwise convolution kernel. pointwise_initializer: An initializer for the pointwise convolution kernel. - bias_initializer: An initializer for the bias vector. If None, no bias will - be applied. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. depthwise_regularizer: Optional regularizer for the depthwise convolution kernel. pointwise_regularizer: Optional regularizer for the pointwise @@ -1088,8 +1088,8 @@ def separable_conv2d(inputs, use_bias: Boolean, whether the layer uses a bias. depthwise_initializer: An initializer for the depthwise convolution kernel. pointwise_initializer: An initializer for the pointwise convolution kernel. - bias_initializer: An initializer for the bias vector. If None, no bias will - be applied. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. depthwise_regularizer: Optional regularizer for the depthwise convolution kernel. pointwise_regularizer: Optional regularizer for the pointwise @@ -1174,8 +1174,8 @@ class Conv2DTranspose(Conv2D): linear activation. use_bias: Boolean, whether the layer uses a bias. kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, no bias will - be applied. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. kernel_regularizer: Optional regularizer for the convolution kernel. bias_regularizer: Optional regularizer for the bias vector. activity_regularizer: Optional regularizer function for the output. @@ -1391,8 +1391,8 @@ def conv2d_transpose(inputs, linear activation. use_bias: Boolean, whether the layer uses a bias. kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If `None`, then no - bias will be applied. + bias_initializer: An initializer for the bias vector. If `None`, the default + initializer will be used. kernel_regularizer: Optional regularizer for the convolution kernel. bias_regularizer: Optional regularizer for the bias vector. activity_regularizer: Optional regularizer function for the output. @@ -1464,8 +1464,8 @@ class Conv3DTranspose(Conv3D): linear activation. use_bias: Boolean, whether the layer uses a bias. kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If `None`, then no - bias will be applied. + bias_initializer: An initializer for the bias vector. If `None`, the default + initializer will be used. kernel_regularizer: Optional regularizer for the convolution kernel. bias_regularizer: Optional regularizer for the bias vector. activity_regularizer: Optional regularizer function for the output. @@ -1705,8 +1705,8 @@ def conv3d_transpose(inputs, linear activation. use_bias: Boolean, whether the layer uses a bias. kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, no bias will - be applied. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. kernel_regularizer: Optional regularizer for the convolution kernel. bias_regularizer: Optional regularizer for the bias vector. activity_regularizer: Optional regularizer function for the output. -- GitLab From 70c79c257c9f4a15252c678a402445bdaeceef2c Mon Sep 17 00:00:00 2001 From: Zhengsheng Wei Date: Fri, 10 Nov 2017 10:12:29 +0800 Subject: [PATCH 0134/1801] restore docstring of core_layers --- .../contrib/model_pruning/python/layers/core_layers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/model_pruning/python/layers/core_layers.py b/tensorflow/contrib/model_pruning/python/layers/core_layers.py index 95dfd8f421..ae60d8b1e1 100644 --- a/tensorflow/contrib/model_pruning/python/layers/core_layers.py +++ b/tensorflow/contrib/model_pruning/python/layers/core_layers.py @@ -72,8 +72,8 @@ class _MaskedConv(base.Layer): linear activation. use_bias: Boolean, whether the layer uses a bias. kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, the default - initializer will be used. + bias_initializer: An initializer for the bias vector. If None, no bias will + be applied. kernel_regularizer: Optional regularizer for the convolution kernel. bias_regularizer: Optional regularizer for the bias vector. activity_regularizer: Regularizer function for the output. @@ -279,8 +279,8 @@ class MaskedConv2D(_MaskedConv): linear activation. use_bias: Boolean, whether the layer uses a bias. kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, the default - initializer will be used. + bias_initializer: An initializer for the bias vector. If None, no bias will + be applied. kernel_regularizer: Optional regularizer for the convolution kernel. bias_regularizer: Optional regularizer for the bias vector. activity_regularizer: Regularizer function for the output. -- GitLab From b31493889da917c9b78aeb00e23a00e398272c26 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Thu, 9 Nov 2017 17:10:33 -0800 Subject: [PATCH 0135/1801] [StreamExecutor] LOG(ERROR) the driver version when cudnnCreate fails. Previously we LOG(INFO)'ed the driver version, which meant it wouldn't be printed unless you passed --logtostderr. But this information is pretty important, especially since cudnnCreate failing is likely to be a fatal error. PiperOrigin-RevId: 175235628 --- tensorflow/stream_executor/cuda/cuda_dnn.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index a20334e40a..ad8164c7f9 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -390,8 +390,8 @@ port::Status CudnnSupport::Init() { << DriverVersionStatusToString(result); } else { const auto& version = result.ValueOrDie(); - LOG(INFO) << "possibly insufficient driver version: " - << DriverVersionToString(version); + LOG(ERROR) << "possibly insufficient driver version: " + << DriverVersionToString(version); // OS X kernel driver does not report version accurately #if !defined(__APPLE__) if (std::get<0>(version) < 340) { -- GitLab From 9268d1b471cc9f37011d145bc39d0b63d2125c1f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 17:55:08 -0800 Subject: [PATCH 0136/1801] Hlo parser: support padding. Also, give PaddingConfig its own ToString format. PiperOrigin-RevId: 175239832 --- .../compiler/xla/service/hlo_instruction.cc | 18 ++++++- .../compiler/xla/service/hlo_instruction.h | 2 + .../compiler/xla/tools/parser/README.md | 6 +-- .../compiler/xla/tools/parser/hlo_lexer.cc | 13 ++--- .../compiler/xla/tools/parser/hlo_lexer.h | 2 +- .../compiler/xla/tools/parser/hlo_parser.cc | 51 +++++++++++++++++- .../xla/tools/parser/hlo_parser_test.cc | 52 ++++++++++++++++++- .../compiler/xla/tools/parser/hlo_token.h | 2 +- 8 files changed, 130 insertions(+), 16 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 1b2161fc2e..674d3e3836 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1889,7 +1889,8 @@ std::vector HloInstruction::ExtraAttributesToString() const { extra.push_back(StrCat("window={", window_util::ToString(*window_), "}")); } if (padding_config_ != nullptr) { - extra.push_back(StrCat("padding=", padding_config_->ShortDebugString())); + extra.push_back( + StrCat("padding=", xla::PaddingConfigToString(*padding_config_))); } if (opcode() == HloOpcode::kSlice) { std::vector bounds; @@ -2894,6 +2895,21 @@ StatusOr StringToFusionKind( return InvalidArgument("Unknown fusion kind: %s", kind_name.c_str()); } +string PaddingConfigToString(const PaddingConfig& padding) { + bool has_interior_padding = + std::any_of(padding.dimensions().begin(), padding.dimensions().end(), + [](const PaddingConfig::PaddingConfigDimension& dim) { + return dim.interior_padding() != 0; + }); + return Join( + padding.dimensions(), "x", + [&](string* out, const PaddingConfig::PaddingConfigDimension& dim) { + StrAppend( + out, dim.edge_padding_low(), "_", dim.edge_padding_high(), + has_interior_padding ? StrCat("_", dim.interior_padding()) : ""); + }); +} + std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind) { return os << ToString(kind); } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 974d43d89e..64a88164a7 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -1234,6 +1234,8 @@ string ToString(HloInstruction::FusionKind kind); StatusOr StringToFusionKind( const string& kind_name); +string PaddingConfigToString(const PaddingConfig& padding); + std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind); // Map classes that guarantee a deterministic iteration order when the key is diff --git a/tensorflow/compiler/xla/tools/parser/README.md b/tensorflow/compiler/xla/tools/parser/README.md index 986041caf6..b768b94e77 100644 --- a/tensorflow/compiler/xla/tools/parser/README.md +++ b/tensorflow/compiler/xla/tools/parser/README.md @@ -54,9 +54,9 @@ attribute attribute_value : kInt | kName - | [0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,} /*dim_labels_pattern*/ - | [0-9]+(x[0-9]+)+ /*dxd_pattern*/ - | [0-9]+_[0-9]+(x[0-9]+_[0-9]+)* /*window_pad_pattern*/ + | [0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,} /*dim_labels_pattern*/ + | [0-9]+(x[0-9]+)+ /*dxd_pattern*/ + | [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)* /*pad_pattern*/ | '{' sub_attributes '}' ; diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc index f70386411c..b5befbf58b 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc @@ -254,13 +254,13 @@ TokKind HloLexer::LexPercent() { } // Lex integer and floating-point values, -inf, and patterns for dim labels, -// dxd (e.g. 1x2x3), and window pad. +// dxd (e.g. 1x2x3), and pad. // // fp with exp ::= [-]?([0-9]+|[0-9]+[.][0-9]*|[0-9]*[.][0-9]+)([eE][+-]?[0-9]+) // fp without exp ::= [-]?([0-9]+[.][0-9]*|[0-9]*[.][0-9]+) // dim_labels_pattern ::= [0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,} // dxd_pattern ::= [0-9]+(x[0-9]+)+ -// window_pad_pattern ::= [0-9]+_[0-9]+(x[0-9]+_[0-9]+)* +// pad_pattern ::= [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)* // int ::= [-]?[0-9]+ // negative inf ::= '-inf' TokKind HloLexer::LexNumberOrPattern() { @@ -277,7 +277,8 @@ TokKind HloLexer::LexNumberOrPattern() { static LazyRE2 dim_labels_pattern = { R"([0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,})"}; static LazyRE2 dxd_pattern = {R"([0-9]+(x[0-9]+)+)"}; - static LazyRE2 pad_pattern = {R"([0-9]+_[0-9]+(x[0-9]+_[0-9]+)*)"}; + static LazyRE2 pad_pattern = { + R"([0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)*)"}; if (RE2::Consume(&consumable, *dim_labels_pattern)) { current_ptr_ = consumable.begin(); @@ -294,7 +295,7 @@ TokKind HloLexer::LexNumberOrPattern() { if (RE2::Consume(&consumable, *pad_pattern)) { current_ptr_ = consumable.begin(); str_val_.assign(token_start_, current_ptr_); - return TokKind::kWindowPad; + return TokKind::kPad; } static LazyRE2 int_pattern = {R"([-]?\d+)"}; @@ -395,8 +396,8 @@ string TokKindToString(TokKind kind) { return "kDimLabels"; case TokKind::kDxD: return "kDxD"; - case TokKind::kWindowPad: - return "kWindowPad"; + case TokKind::kPad: + return "kPad"; case TokKind::kShape: return "kShape"; case TokKind::kOpcode: diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h index 74e6829180..79c4f271a1 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h +++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h @@ -45,7 +45,7 @@ class HloLexer { case TokKind::kAttributeName: case TokKind::kDimLabels: case TokKind::kDxD: - case TokKind::kWindowPad: + case TokKind::kPad: return str_val_; default: LOG(FATAL) << "This token does not have string value"; diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index 710877b4e0..fed0492a54 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -99,6 +99,7 @@ class HloParser { kSharding, kInstructionList, kSliceRanges, + kPaddingConfig, }; struct AttrConfig { @@ -134,6 +135,7 @@ class HloParser { bool ParseInstructionNames(std::vector* instructions); bool ParseWindow(Window* window); bool ParseConvolutionDimensionNumbers(ConvolutionDimensionNumbers* dnums); + bool ParsePaddingConfig(PaddingConfig* padding); bool ParseSharding(OpSharding* sharding); bool ParseSingleSharding(OpSharding* sharding, bool lbrace_pre_lexed); @@ -727,9 +729,19 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, /*grad_output=*/operands[4], *epsilon, *feature_index)); break; } + case HloOpcode::kPad: { + optional padding; + attrs["padding"] = {/*required=*/true, AttrTy::kPaddingConfig, &padding}; + if (!ParseOperands(&operands, /*expected_size=*/2) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreatePad( + shape, operands[0], /*padding_value=*/operands[1], *padding)); + break; + } case HloOpcode::kCustomCall: case HloOpcode::kReducePrecision: - case HloOpcode::kPad: case HloOpcode::kRng: case HloOpcode::kFusion: case HloOpcode::kInfeed: @@ -1374,6 +1386,14 @@ bool HloParser::ParseAttributes( static_cast*>(attr_out_ptr)->emplace(result); return true; } + case AttrTy::kPaddingConfig: { + PaddingConfig result; + if (!ParsePaddingConfig(&result)) { + return false; + } + static_cast*>(attr_out_ptr)->emplace(result); + return true; + } } }(); if (!success) { @@ -1774,7 +1794,7 @@ bool HloParser::ParseWindowPad(std::vector>* pad) { if (!pad->empty()) { return TokenError("sub-attribute 'pad=' already exists"); } - if (lexer_.GetKind() != TokKind::kWindowPad) { + if (lexer_.GetKind() != TokKind::kPad) { return TokenError("expects window pad pattern, e.g., '0_0x3_3'"); } string str = lexer_.GetStrVal(); @@ -1792,6 +1812,33 @@ bool HloParser::ParseWindowPad(std::vector>* pad) { return true; } +// This is the inverse xla::ToString(PaddingConfig). The padding config string +// looks like "0_0_0x3_3_1". The string is first separated by 'x', each +// substring represents one PaddingConfigDimension. The substring is 3 (or 2) +// numbers joined by '_'. +bool HloParser::ParsePaddingConfig(PaddingConfig* padding) { + if (lexer_.GetKind() != TokKind::kPad) { + return TokenError("expects padding config, e.g., '0_0_0x3_3_1'"); + } + string str = lexer_.GetStrVal(); + std::vector padding_str = Split(str, 'x'); + for (const auto& padding_dim_str : padding_str) { + std::vector padding_dim; + if (!SplitAndParseAsInts(padding_dim_str, '_', &padding_dim) || + (padding_dim.size() != 2 && padding_dim.size() != 3)) { + return TokenError( + "expects padding config pattern like 'low_high_interior' or " + "'low_high'"); + } + auto* dim = padding->add_dimensions(); + dim->set_edge_padding_low(padding_dim[0]); + dim->set_edge_padding_high(padding_dim[1]); + dim->set_interior_padding(padding_dim.size() == 3 ? padding_dim[2] : 0); + } + lexer_.Lex(); + return true; +} + bool HloParser::ParseOpcode(HloOpcode* result) { VLOG(1) << "ParseOpcode"; if (lexer_.GetKind() != TokKind::kOpcode) { diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index fbe0409e3d..d19c6e1877 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -36,6 +36,10 @@ string TestDataToString(const ::testing::TestParamInfo& data) { return data.param.test_name; } +// For each string below, we check that: +// - we parse it to an HloModule successfully, and +// - the stringification of the resulting HloModule is equal to our original +// string. std::vector CreateTestCases() { // clang-format off return std::vector({ @@ -509,6 +513,32 @@ ENTRY %BatchNormGrad.v4 (input: f32[2,2,2,2], scale: f32[2], mean: f32[2], varia ROOT %batch-norm-grad = (f32[2,2,2,2]{3,2,1,0}, f32[2]{0}, f32[2]{0}) batch-norm-grad(f32[2,2,2,2]{3,2,1,0} %input, f32[2]{0} %scale, f32[2]{0} %mean, f32[2]{0} %variance, f32[2,2,2,2]{3,2,1,0} %grad_output), epsilon=0.001, feature_index=0 } +)" +}, +// pad +{ +"Pad", +R"(HloModule Pad1DS3Array_module: + +ENTRY %Pad1DS3Array.v3 () -> f32[8] { + %constant = f32[3]{0} constant({1, 2, 3}) + %constant.1 = f32[] constant(0.1) + ROOT %pad = f32[8]{0} pad(f32[3]{0} %constant, f32[] %constant.1), padding=3_1 +} + +)" +}, +// pad has interior +{ +"PadHasInterior", +R"(HloModule PadHasInterior_module: + +ENTRY %PadHasInterior.v3 (input: f32[1,25,7,7]) -> f32[1,25,17,11] { + %input = f32[1,25,7,7]{3,2,1,0} parameter(0) + %constant = f32[] constant(-5.123) + ROOT %pad = f32[1,25,17,11]{3,2,1,0} pad(f32[1,25,7,7]{3,2,1,0} %input, f32[] %constant), padding=0_0_0x0_0_0x2_2_1x2_2_0 +} + )" } }); @@ -523,7 +553,10 @@ class HloParserTest : public ::testing::Test, << "'" << s << "' does not contain '" << expected << "'"; } - void ExpectSuccess() { + // Expects "ToString(Parse(string)) == string", that is, parses the string, + // asserts that it succeeded, stringifies the parsed module, and checks that + // the it equals the original string. + void ExpectEqual() { const string& original = GetParam().module_string; auto result = Parse(original); TF_EXPECT_OK(result.status()); @@ -532,7 +565,7 @@ class HloParserTest : public ::testing::Test, } }; -TEST_P(HloParserTest, Run) { ExpectSuccess(); } +TEST_P(HloParserTest, Run) { ExpectEqual(); } INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, HloParserTest, ::testing::ValuesIn(CreateTestCases()), @@ -793,6 +826,21 @@ ENTRY %slice.v2 (p0: f32[3,3,4,4]) -> f32[3,3,2,4] { TF_EXPECT_OK(Parse(original).status()); } +TEST_F(HloParserTest, PaddingConfigIsNotWindowPad) { + const string original = R"(HloModule window_pad_module: + +ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2,1] { + %input = f32[1,2,1]{2,1,0} parameter(0) + %copy = f32[1,2,1]{2,0,1} copy(f32[1,2,1]{2,1,0} %input) + %filter = f32[1,1,1]{2,1,0} parameter(1) + ROOT %convolution = f32[1,2,1]{2,0,1} convolution(f32[1,2,1]{2,0,1} %copy, f32[1,1,1]{2,1,0} %filter), dim_labels=b0f_0io->b0f, window={pad=1_1_0 size=1} +} + +)"; + ExpectHasSubstr(Parse(original).status().error_message(), + "expects padding_low and padding_high separated by '_'"); +} + } // namespace } // namespace tools } // namespace xla diff --git a/tensorflow/compiler/xla/tools/parser/hlo_token.h b/tensorflow/compiler/xla/tools/parser/hlo_token.h index 15ab8b1ccc..9afd2fac23 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_token.h +++ b/tensorflow/compiler/xla/tools/parser/hlo_token.h @@ -59,7 +59,7 @@ enum class TokKind { kAttributeName, // dimensions= kDimLabels, // [0-9bf]+_[0-9io]+->[0-9bf]+ kDxD, // [0-9]+(x[0-9]+)+ - kWindowPad, // [0-9]+_[0-9]+(x[0-9]+_[0-9]+)* + kPad, // [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)* kShape, // f32[2,3]{1,0} kOpcode, // add kInt, // 42 -- GitLab From 17626168cb05e9edc6cbbd57d04c1da8a43ecfb2 Mon Sep 17 00:00:00 2001 From: PW486 Date: Fri, 10 Nov 2017 12:33:44 +0900 Subject: [PATCH 0137/1801] Fixed typos, comments --- tensorflow/contrib/batching/shared_batch_scheduler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/batching/shared_batch_scheduler.h b/tensorflow/contrib/batching/shared_batch_scheduler.h index 41a3f99137..1853827dc0 100644 --- a/tensorflow/contrib/batching/shared_batch_scheduler.h +++ b/tensorflow/contrib/batching/shared_batch_scheduler.h @@ -63,7 +63,7 @@ namespace serving { // instead of N independent ones, with their sharing deliberately coordinated. // // SharedBatchScheduler does not implement the BatchScheduler API; rather, it -// presents an abstraction of "queues", where each queue coresponds to one type +// presents an abstraction of "queues", where each queue corresponds to one type // of task. Tasks submitted to a given queue are placed in their own batches, // and cannot be mixed with other tasks. Queues can be added and deleted // dynamically, to accommodate e.g. versions of a model being brought up and -- GitLab From 80646b480fded909ec439e32165223046b445f1f Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Thu, 9 Nov 2017 19:27:20 -0800 Subject: [PATCH 0138/1801] [XLA] Don't deemphasize nodes inside of subcomputations in dumped XLA graphs. Nodes inside of subcomputations (e.g. fusion computations) are always printed by the HLO graph dumper. Before this change, the dumper was not fully aware of this fact, leading it to mark as "deemphasized" (i.e. draw as gray with a dashed outline) nodes that had no business of being deemphasized. PiperOrigin-RevId: 175247474 --- tensorflow/compiler/xla/service/hlo_graph_dumper.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 67e0238c4a..04b3059fb1 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -1303,7 +1303,9 @@ NodeFilter MakeNodeFilter(const HloInstruction* root, int64 radius) { auto is_displayed = [&](const HloInstruction* instr) { // Constants are displayed inline with their users; they're never omitted. - return nodes.count(instr) > 0 || instr->opcode() == HloOpcode::kConstant; + // Nodes in subcomputations are always shown. + return nodes.count(instr) > 0 || instr->opcode() == HloOpcode::kConstant || + instr->parent() != root->parent(); }; // Make a second pass over 'nodes' to fix up the NodeFilterResults now that we -- GitLab From badd35648851c0b84fdbd997b1f6e9aa20122216 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Thu, 9 Nov 2017 20:45:39 -0800 Subject: [PATCH 0139/1801] Add bfloat support to XLA. This is necessary in providing bfloat support in GPU backend. RELNOTES: bfloat support is now added to XLA infra. PiperOrigin-RevId: 175252067 --- tensorflow/compiler/tf2xla/type_util.cc | 3 + tensorflow/compiler/xla/BUILD | 1 + tensorflow/compiler/xla/literal_util.cc | 99 ++++++- tensorflow/compiler/xla/literal_util.h | 23 ++ tensorflow/compiler/xla/literal_util_test.cc | 62 +++++ tensorflow/compiler/xla/primitive_util.cc | 8 +- tensorflow/compiler/xla/primitive_util.h | 7 + tensorflow/compiler/xla/service/backend.cc | 4 +- .../xla/service/cpu/cpu_runtime_test.cc | 4 +- .../compiler/xla/service/hlo_evaluator.cc | 4 + tensorflow/compiler/xla/service/hlo_runner.cc | 3 +- tensorflow/compiler/xla/shape_util.cc | 1 + .../compiler/xla/tests/literal_test_util.cc | 13 +- .../xla/tests/local_client_test_base.cc | 3 +- tensorflow/compiler/xla/types.h | 3 + tensorflow/compiler/xla/xla_data.proto | 13 +- tensorflow/core/framework/bfloat16.cc | 30 +-- tensorflow/core/framework/bfloat16_test.cc | 92 +++++++ tensorflow/core/framework/numeric_types.h | 251 +++++++++++++++++- 19 files changed, 580 insertions(+), 44 deletions(-) diff --git a/tensorflow/compiler/tf2xla/type_util.cc b/tensorflow/compiler/tf2xla/type_util.cc index 1efbe0ffb1..c969212a1b 100644 --- a/tensorflow/compiler/tf2xla/type_util.cc +++ b/tensorflow/compiler/tf2xla/type_util.cc @@ -49,6 +49,9 @@ Status DataTypeToPrimitiveType(DataType data_type, xla::PrimitiveType* type) { case tensorflow::DT_UINT64: *type = xla::U64; return Status::OK(); + case tensorflow::DT_BFLOAT16: + *type = xla::BF16; + return Status::OK(); case tensorflow::DT_HALF: *type = xla::F16; return Status::OK(); diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD index 660f419e46..f6e405744a 100644 --- a/tensorflow/compiler/xla/BUILD +++ b/tensorflow/compiler/xla/BUILD @@ -77,6 +77,7 @@ cc_library( hdrs = ["types.h"], visibility = [":friends"], deps = [ + "//tensorflow/core:framework_lite", "//tensorflow/core:lib", "//third_party/eigen3", ], diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index 0cb2223ae5..93d3cd425f 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -33,6 +33,20 @@ limitations under the License. #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" +namespace { +using tensorflow::int64; + +constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; + +// Converts between little and big endian, assuming elements in the array are 16 +// bits long. +void ConvertEndianShort(char* bytes, int64 size) { + CHECK_EQ(size / 2, 0); + for (int64 i = 0; i < size; i += 2) { + std::swap(bytes[i], bytes[i + 1]); + } +} +} // namespace namespace xla { @@ -169,6 +183,8 @@ Status Literal::Copy(const Literal& src_literal, return CopyRange(src_literal, src_base, dest_base, copy_size); case F16: return CopyRange(src_literal, src_base, dest_base, copy_size); + case BF16: + return CopyRange(src_literal, src_base, dest_base, copy_size); case F32: return CopyRange(src_literal, src_base, dest_base, copy_size); case F64: @@ -200,6 +216,8 @@ Status Literal::Copy(const Literal& src_literal, return *Literal::CreateR0(0); case F16: return *Literal::CreateR0(static_cast(0.0f)); + case BF16: + return *Literal::CreateR0(static_cast(0.0f)); case F32: return *Literal::CreateR0(0); case F64: @@ -285,6 +303,9 @@ Status Literal::Copy(const Literal& src_literal, case F16: return *Literal::CreateR0( static_cast(-std::numeric_limits::infinity())); + case BF16: + return *Literal::CreateR0( + static_cast(-std::numeric_limits::infinity())); case TUPLE: LOG(FATAL) << "tuple element type has no minimum value"; case OPAQUE: @@ -321,6 +342,9 @@ Status Literal::Copy(const Literal& src_literal, case F16: return *Literal::CreateR0( static_cast(std::numeric_limits::infinity())); + case BF16: + return *Literal::CreateR0( + static_cast(std::numeric_limits::infinity())); case TUPLE: LOG(FATAL) << "tuple element type has no maximum value"; case OPAQUE: @@ -428,6 +452,7 @@ std::unique_ptr Literal::Transpose( // The shape with affine layout resulting from that operation will be // F32[8,11]{0,1}, since it leaves the original most minor (the 8 sized), the // most minor. + // // Essentially, given MinMaj(Di) the position of the Di dimension within the // minor to major vector, and given T(Di) the index that the original Di // dimension has within the transposed array, a layout is affine if @@ -536,6 +561,9 @@ string Literal::GetAsString( } case F16: return tensorflow::strings::StrCat(Get(multi_index)); + case BF16: + return tensorflow::strings::StrCat( + static_cast(Get(multi_index))); default: return tensorflow::strings::StrCat( "[", PrimitiveType_Name(shape().element_type()), "]"); @@ -743,6 +771,8 @@ void* Literal::MutableInternalData() { return reinterpret_cast(c64s_.data()); case F16: return reinterpret_cast(f16s_.data()); + case BF16: + return reinterpret_cast(bf16s_.data()); default: LOG(FATAL) << "primitive type not supported in literals: " << PrimitiveType_Name(shape().element_type()); @@ -785,6 +815,9 @@ void Literal::Reserve(int64 num_elements) { case F16: Resize(num_elements, static_cast(0.0f)); break; + case BF16: + Resize(num_elements, static_cast(0.0f)); + break; default: LOG(FATAL) << "primitive type not supported in literals: " << PrimitiveType_Name(shape().element_type()); @@ -824,6 +857,9 @@ tensorflow::Status Literal::ValidateLiteral() const { case F16: actual = f16s().size() / sizeof(half); break; + case BF16: + actual = bf16s().size(); + break; default: return tensorflow::errors::Unimplemented( "unhandled element type for literal validation: " + @@ -920,6 +956,7 @@ StatusOr> ConvertIfDestTypeMatches( CONVERT_IF_TYPES_MATCH(F16) CONVERT_IF_TYPES_MATCH(F32) CONVERT_IF_TYPES_MATCH(F64) + CONVERT_IF_TYPES_MATCH(BF16) #undef CONVERT_IF_TYPES_MATCH case C64: return ConvertToC64(src_literal); @@ -949,8 +986,9 @@ StatusOr> Literal::Convert( CONVERT_IF_DEST_TYPE_MATCHES(F16) CONVERT_IF_DEST_TYPE_MATCHES(F32) CONVERT_IF_DEST_TYPE_MATCHES(F64) + CONVERT_IF_DEST_TYPE_MATCHES(BF16) #undef CONVERT_IF_DEST_TYPE_MATCHES - // Other types are not yet supported. + // Other types are not yet supported. default: return InvalidArgument("Unimplemented: Convert from type %s to type %s", PrimitiveType_Name(shape().element_type()).c_str(), @@ -1019,6 +1057,8 @@ bool Literal::operator==(const Literal& other) const { return EqualElements(*this, other, 0, &multi_index); case F16: return EqualElements(*this, other, 0, &multi_index); + case BF16: + return EqualElements(*this, other, 0, &multi_index); case C64: return EqualElements(*this, other, 0, &multi_index); default: @@ -1128,13 +1168,18 @@ tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice() { template <> tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice() { - // TODO - there is an endianess problem here. fix it, or wait for uint16 - // support in protobuf auto values = mutable_f16s(); return tensorflow::gtl::MutableArraySlice(values->data(), values->size()); } +template <> +tensorflow::gtl::MutableArraySlice +Literal::GetMutableArraySlice() { + auto values = mutable_bf16s(); + return {values->data(), values->size()}; +} + template <> tensorflow::gtl::ArraySlice Literal::GetArraySlice() const { CHECK_EQ(shape().element_type(), PRED); @@ -1205,6 +1250,12 @@ tensorflow::gtl::ArraySlice Literal::GetArraySlice() const { f16s().size() / sizeof(half)); } +template <> +tensorflow::gtl::ArraySlice Literal::GetArraySlice() const { + CHECK_EQ(shape().element_type(), BF16); + return {bf16s().data(), bf16s().size()}; +} + template <> tensorflow::gtl::ArraySlice Literal::GetArraySlice() const { @@ -1253,6 +1304,9 @@ bool Literal::IsAll(int8 value) const { return AllElementsEqualValue(*this, value); case F16: return AllElementsEqualValue(*this, static_cast(value)); + case BF16: + return AllElementsEqualValue(*this, + static_cast(value)); case PRED: if (value == 0) { return AllElementsEqualValue(*this, false); @@ -1274,6 +1328,9 @@ bool Literal::IsAllFloat(float value) const { return AllElementsEqualValue(*this, value); case F16: return AllElementsEqualValue(*this, static_cast(value)); + case BF16: + return AllElementsEqualValue(*this, + static_cast(value)); default: return false; } @@ -1310,6 +1367,8 @@ bool Literal::IsZero(tensorflow::gtl::ArraySlice indices) const { return Get(indices) == complex64(0.0f, 0.0f); case F16: return Get(indices) == static_cast(0.0f); + case BF16: + return Get(indices) == static_cast(0.0f); case PRED: return Get(indices) == false; default: @@ -1377,6 +1436,12 @@ void Literal::Resize(int64 num_elements, half value) { mutable_f16s()->resize(num_elements, value); } +template <> +void Literal::Resize(int64 num_elements, bfloat16 value) { + CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements); + mutable_bf16s()->resize(num_elements, value); +} + template <> void Literal::Resize(int64 num_elements, complex64 value) { CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements); @@ -1425,6 +1490,19 @@ LiteralProto Literal::ToProto() const { *proto.mutable_f16s() = string(reinterpret_cast(f16s_.data()), f16s_.size() * sizeof(half)); + if (!kLittleEndian) { + ConvertEndianShort(const_cast(proto.mutable_f16s()->data()), + proto.f16s().size()); + } + break; + case BF16: + *proto.mutable_bf16s() = + string(reinterpret_cast(bf16s_.data()), + bf16s_.size() * sizeof(bfloat16)); + if (!kLittleEndian) { + ConvertEndianShort(const_cast(proto.mutable_bf16s()->data()), + proto.bf16s().size()); + } break; case F32: CopyToRepeatedField(proto.mutable_f32s(), f32s()); @@ -1493,6 +1571,21 @@ void Literal::CopyFromProto(const LiteralProto& literal_proto) { CHECK_EQ(0, s.size() % sizeof(half)); f16s_ = std::vector(s.size() / sizeof(half)); memcpy(f16s_.data(), s.data(), s.size()); + + if (!kLittleEndian) { + ConvertEndianShort(reinterpret_cast(f16s_.data()), s.size()); + } + break; + } + case BF16: { + const string& s(literal_proto.bf16s()); + CHECK_EQ(0, s.size() % sizeof(bfloat16)); + bf16s_ = std::vector(s.size() / sizeof(bfloat16)); + memcpy(bf16s_.data(), s.data(), s.size()); + + if (!kLittleEndian) { + ConvertEndianShort(reinterpret_cast(bf16s_.data()), s.size()); + } break; } case F32: diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h index 667f926c46..f37e529caf 100644 --- a/tensorflow/compiler/xla/literal_util.h +++ b/tensorflow/compiler/xla/literal_util.h @@ -163,6 +163,11 @@ class Literal { const std::vector& c64s() const { return c64s_; } std::vector* mutable_c64s() { return &c64s_; } + int bf16s_size() const { return bf16s().size(); } + bfloat16 bf16s(int i) const { return bf16s_[i]; } + const std::vector& bf16s() const { return bf16s_; } + std::vector* mutable_bf16s() { return &bf16s_; } + int tuple_literals_size() const { return tuple_literals().size(); } const Literal& tuple_literals(int i) const { return tuple_literals_[i]; } Literal* add_tuple_literals() { @@ -622,6 +627,7 @@ class Literal { std::vector u16s_; std::vector u32s_; std::vector u64s_; + std::vector bf16s_; std::vector f16s_; std::vector f32s_; std::vector f64s_; @@ -674,6 +680,9 @@ tensorflow::gtl::ArraySlice Literal::GetArraySlice() const; template <> tensorflow::gtl::ArraySlice Literal::GetArraySlice() const; +template <> +tensorflow::gtl::ArraySlice Literal::GetArraySlice() const; + template <> tensorflow::gtl::ArraySlice Literal::GetArraySlice() const; @@ -714,6 +723,9 @@ tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice(); template <> tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice(); +template <> +tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice(); + template <> tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice(); @@ -747,6 +759,9 @@ void Literal::Resize(int64 num_elements, double value); template <> void Literal::Resize(int64 num_elements, half value); +template <> +void Literal::Resize(int64 num_elements, bfloat16 value); + template <> void Literal::Resize(int64 num_elements, complex64 value); @@ -990,6 +1005,14 @@ inline half Literal::Get( return GetArraySlice()[linear_index]; } +template <> +inline bfloat16 Literal::Get( + tensorflow::gtl::ArraySlice multi_index) const { + CHECK(shape().element_type() == BF16); + int64 linear_index = LinearIndex(multi_index); + return GetArraySlice()[linear_index]; +} + template void Literal::Set(tensorflow::gtl::ArraySlice multi_index, NativeT value) { diff --git a/tensorflow/compiler/xla/literal_util_test.cc b/tensorflow/compiler/xla/literal_util_test.cc index 6d596da4ad..1e08101759 100644 --- a/tensorflow/compiler/xla/literal_util_test.cc +++ b/tensorflow/compiler/xla/literal_util_test.cc @@ -110,6 +110,18 @@ TEST_F(LiteralUtilTest, LiteralScalarToString) { auto c64_lit = Literal::CreateR0({3.14f, 2.78f}); ASSERT_EQ("(3.14, 2.78)", c64_lit->ToString()); + + auto bf16_lit = Literal::CreateR0(static_cast(0.5f)); + ASSERT_EQ("0.5", bf16_lit->ToString()); + + // 3.14 will be rounded to 3.125 in bfloat16 format (Round to nearest even). + auto bf16_lit_truncated = + Literal::CreateR0(static_cast(3.14f)); + ASSERT_EQ("3.140625", bf16_lit_truncated->ToString()); + + auto bf16_lit_truncated2 = + Literal::CreateR0(static_cast(9.001f)); + ASSERT_EQ("9", bf16_lit_truncated2->ToString()); } TEST_F(LiteralUtilTest, LiteralVectorToString) { @@ -397,6 +409,18 @@ TEST_F(LiteralUtilTest, IsAll) { EXPECT_FALSE(Literal::CreateR2({{h8}, {h9}})->IsAll(8)); EXPECT_FALSE(Literal::CreateR2({{h9}, {h8}})->IsAll(8)); + bfloat16 b8(8.0f); + bfloat16 b9(9.0f); + + EXPECT_TRUE(Literal::CreateR2({{b8}, {b8}})->IsAll(8)); + EXPECT_FALSE(Literal::CreateR2({{b8}, {b9}})->IsAll(8)); + EXPECT_FALSE(Literal::CreateR2({{b9}, {b8}})->IsAll(8)); + + // 9.001 will be truncated to 9.0 + bfloat16 b91(9.001f); + bfloat16 b90(9.00f); + EXPECT_TRUE(Literal::CreateR2({{b91}, {b90}})->IsAll(9.0)); + complex64 c8_9 = {8, 9}; EXPECT_FALSE(Literal::CreateR2({{c8_9}, {c8_9}})->IsAll(8)); @@ -691,6 +715,30 @@ TEST_F(LiteralUtilTest, PopulateR2C64) { EXPECT_EQ(output, *expected); } +TEST_F(LiteralUtilTest, PopulateWithValueR0BF16) { + Literal output; + bfloat16 h(0.25f); + output.PopulateWithValue(h, {}); + auto expected = Literal::CreateR0(h); + EXPECT_EQ(output, *expected); +} + +TEST_F(LiteralUtilTest, PopulateWithValueR1BF16) { + Literal output; + bfloat16 h(0.5f); + output.PopulateWithValue(h, {3}); + auto expected = Literal::CreateR1({h, h, h}); + EXPECT_EQ(output, *expected); +} + +TEST_F(LiteralUtilTest, PopulateWithValueR2BF16) { + Literal output; + bfloat16 h(2.0f); + output.PopulateWithValue(h, {2, 2}); + auto expected = Literal::CreateR2({{h, h}, {h, h}}); + EXPECT_EQ(output, *expected); +} + TEST_F(LiteralUtilTest, PopulateWithValueR0F32) { Literal output; output.PopulateWithValue(2.5f, {}); @@ -975,6 +1023,14 @@ TEST_F(LiteralUtilTest, ConvertIfTypesMatch) { {{half(26.0), half(0.0), half(28.0), half(0.0)}, {half(0.0), half(31.0), half(0.0), half(33.0)}}, }}, layout_r4_dim0major_); + auto bf16 = Literal::CreateR4WithLayout({{ + {{bfloat16(10.0), bfloat16(0.0), bfloat16(12.0), bfloat16(0.0)}, + {bfloat16(0.0), bfloat16(15.0), bfloat16(0.0), bfloat16(17.0)}}, + {{bfloat16(0.0), bfloat16(19.0), bfloat16(0.0), bfloat16(21.0)}, + {bfloat16(22.0), bfloat16(0.0), bfloat16(24.0), bfloat16(0.0)}}, + {{bfloat16(26.0), bfloat16(0.0), bfloat16(28.0), bfloat16(0.0)}, + {bfloat16(0.0), bfloat16(31.0), bfloat16(0.0), bfloat16(33.0)}}, + }}, layout_r4_dim0major_); auto f32 = Literal::CreateR4WithLayout({{ {{10.0f, 0.0f, 12.0f, 0.0f}, {0.0f, 15.0f, 0.0f, 17.0f}}, {{0.0f, 19.0f, 0.0f, 21.0f}, {22.0f, 0.0f, 24.0f, 0.0f}}, @@ -1008,6 +1064,12 @@ TEST_F(LiteralUtilTest, ConvertIfTypesMatch) { conv = s8->Convert(PRED).ConsumeValueOrDie(); EXPECT_EQ(*conv, *pred); + conv = bf16->Convert(S32).ConsumeValueOrDie(); + EXPECT_EQ(*conv, *s32); + + conv = bf16->Convert(F32).ConsumeValueOrDie(); + EXPECT_EQ(*conv, *f32); + conv = pred->Convert(S32).ConsumeValueOrDie(); EXPECT_EQ(*conv, *int32_pred); diff --git a/tensorflow/compiler/xla/primitive_util.cc b/tensorflow/compiler/xla/primitive_util.cc index 2113b5e06f..2bce56b7bd 100644 --- a/tensorflow/compiler/xla/primitive_util.cc +++ b/tensorflow/compiler/xla/primitive_util.cc @@ -78,6 +78,11 @@ PrimitiveType NativeToPrimitiveType() { return F64; } +template <> +PrimitiveType NativeToPrimitiveType() { + return BF16; +} + template <> PrimitiveType NativeToPrimitiveType() { return F16; @@ -89,7 +94,7 @@ PrimitiveType NativeToPrimitiveType() { } bool IsFloatingPointType(PrimitiveType type) { - return type == F16 || type == F32 || type == F64; + return type == F16 || type == F32 || type == F64 || type == BF16; } bool IsComplexType(PrimitiveType type) { return type == C64; } @@ -118,6 +123,7 @@ int BitWidth(PrimitiveType type) { case S16: case U16: case F16: + case BF16: return 16; case U32: diff --git a/tensorflow/compiler/xla/primitive_util.h b/tensorflow/compiler/xla/primitive_util.h index a49c8b86fc..19c6a13888 100644 --- a/tensorflow/compiler/xla/primitive_util.h +++ b/tensorflow/compiler/xla/primitive_util.h @@ -77,6 +77,8 @@ template <> PrimitiveType NativeToPrimitiveType(); template <> PrimitiveType NativeToPrimitiveType(); +template <> +PrimitiveType NativeToPrimitiveType(); // Complex template <> @@ -167,6 +169,11 @@ struct PrimitiveTypeToNative { using type = half; }; +template <> +struct PrimitiveTypeToNative { + using type = bfloat16; +}; + // Complex template <> struct PrimitiveTypeToNative { diff --git a/tensorflow/compiler/xla/service/backend.cc b/tensorflow/compiler/xla/service/backend.cc index 9abe30e3f3..05f2d06278 100644 --- a/tensorflow/compiler/xla/service/backend.cc +++ b/tensorflow/compiler/xla/service/backend.cc @@ -13,14 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#define EIGEN_USE_THREADS + #include "tensorflow/compiler/xla/service/backend.h" #include #include #include -#define EIGEN_USE_THREADS - #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/service/compiler.h" #include "tensorflow/compiler/xla/service/platform_util.h" diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc index f8e260dd90..f385829cdf 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc @@ -12,15 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ - +#define EIGEN_USE_THREADS #include "tensorflow/compiler/xla/service/cpu/cpu_runtime.h" #include #include #include -#define EIGEN_USE_THREADS - #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/array2d.h" #include "tensorflow/compiler/xla/client/local_client.h" diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 88b77ccdd0..a722d1b3d9 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -1450,6 +1450,10 @@ HloEvaluator::HloEvaluator() { typed_visitors_[F32] = MakeUnique>(this); typed_visitors_[F64] = MakeUnique>(this); typed_visitors_[C64] = MakeUnique>(this); + + typed_visitors_[BF16] = MakeUnique([](HloInstruction*) { + return Unimplemented("HloEvaluator: unhandled primitive type: BF16."); + }); typed_visitors_[TUPLE] = MakeUnique([](HloInstruction*) { return Unimplemented("HloEvaluator: unhandled primitive type: TUPLE."); }); diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index f463e57d99..158fb9a546 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#define EIGEN_USE_THREADS #include "tensorflow/compiler/xla/service/hlo_runner.h" @@ -19,8 +20,6 @@ limitations under the License. #include #include -#define EIGEN_USE_THREADS - #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/ptr_util.h" diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index b5eb81dfc6..4d0bafa908 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -263,6 +263,7 @@ StatusOr MakeShapeWithLayoutInternal( case S32: case S64: case F16: + case BF16: case F32: case F64: return true; diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc index 95a52ecd2f..75c9a0d3fb 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.cc +++ b/tensorflow/compiler/xla/tests/literal_test_util.cc @@ -116,16 +116,18 @@ template ::testing::AssertionResult CompareFloatsBitwiseEqual(FloatT lhs, FloatT rhs) { auto ulhs = tensorflow::bit_cast(lhs); auto urhs = tensorflow::bit_cast(rhs); + auto lhs_double = static_cast(lhs); + auto rhs_double = static_cast(rhs); if (ulhs != urhs) { return ::testing::AssertionFailure() << tensorflow::strings::Printf( "floating values are not bitwise-equal; and equality testing " "was requested: %s=%g=%a vs %s=%g=%a", tensorflow::strings::StrCat(tensorflow::strings::Hex(ulhs)) .c_str(), - lhs, lhs, + lhs_double, lhs_double, tensorflow::strings::StrCat(tensorflow::strings::Hex(urhs)) .c_str(), - rhs, rhs); + rhs_double, rhs_double); } return ::testing::AssertionSuccess(); } @@ -149,6 +151,10 @@ template // Specializations for floating types that do bitwise comparisons when equality // comparison is requested. template <> +::testing::AssertionResult CompareEqual(bfloat16 lhs, bfloat16 rhs) { + return CompareFloatsBitwiseEqual(lhs, rhs); +} +template <> ::testing::AssertionResult CompareEqual(float lhs, float rhs) { return CompareFloatsBitwiseEqual(lhs, rhs); } @@ -238,6 +244,9 @@ bool ExpectLiteralsEqual(const Literal& expected, const Literal& actual, case U64: match = ExpectLiteralsEqual(expected, actual, &multi_index, 0); break; + case BF16: + match = ExpectLiteralsEqual(expected, actual, &multi_index, 0); + break; case F32: match = ExpectLiteralsEqual(expected, actual, &multi_index, 0); break; diff --git a/tensorflow/compiler/xla/tests/local_client_test_base.cc b/tensorflow/compiler/xla/tests/local_client_test_base.cc index c11e1df0a7..d98875dbc2 100644 --- a/tensorflow/compiler/xla/tests/local_client_test_base.cc +++ b/tensorflow/compiler/xla/tests/local_client_test_base.cc @@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#define EIGEN_USE_THREADS #include "tensorflow/compiler/xla/tests/local_client_test_base.h" #include -#define EIGEN_USE_THREADS - #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/map_util.h" diff --git a/tensorflow/compiler/xla/types.h b/tensorflow/compiler/xla/types.h index 3b19ca321c..9fa4297523 100644 --- a/tensorflow/compiler/xla/types.h +++ b/tensorflow/compiler/xla/types.h @@ -19,6 +19,7 @@ limitations under the License. #include #include "third_party/eigen3/Eigen/Core" +#include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/platform/types.h" #include @@ -32,6 +33,8 @@ using ::tensorflow::int16; using ::tensorflow::int32; using ::tensorflow::int64; +using ::tensorflow::bfloat16; + using ::tensorflow::uint8; using ::tensorflow::uint16; using ::tensorflow::uint32; diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index 7146604708..eac8f2ff07 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -46,6 +46,12 @@ enum PrimitiveType { // converted to f16 from f32 at arbirary points in the computation. F16 = 10; F32 = 11; + + // Truncated 16 bit floating-point format. This is similar to IEEE's 16 bit + // floating-point format, but uses 1 bit for the sign, 8 bits for the exponent + // and 7 bits for the mantissa. + BF16 = 16; + F64 = 12; // Complex values of fixed width. @@ -63,6 +69,8 @@ enum PrimitiveType { // An opaque type used for passing context specific data to a custom // operation. OPAQUE = 14; + + // Next = 17 } // Describes the value held inside padding elements. @@ -310,7 +318,10 @@ message LiteralProto { repeated double f64s = 9; repeated float c64s = 12; // Stored as interleaved real, imag floats. repeated LiteralProto tuple_literals = 10; - bytes f16s = 11; // Note: the F16s are encoded in little endian byte order + // The F16s and BF16s are encoded in little endian byte order + bytes f16s = 11; + bytes bf16s = 13; + // Next = 14 } message WindowDimension { diff --git a/tensorflow/core/framework/bfloat16.cc b/tensorflow/core/framework/bfloat16.cc index a5ac0e1a8d..1a6f355c77 100644 --- a/tensorflow/core/framework/bfloat16.cc +++ b/tensorflow/core/framework/bfloat16.cc @@ -18,32 +18,24 @@ limitations under the License. namespace tensorflow { void FloatToBFloat16(const float* src, bfloat16* dst, int64 size) { - const uint16_t* p = reinterpret_cast(src); - uint16_t* q = reinterpret_cast(dst); -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - for (; size != 0; p += 2, q++, size--) { - *q = p[0]; - } -#else - for (; size != 0; p += 2, q++, size--) { - *q = p[1]; - } -#endif + for (int64 i = 0; i < size; ++i) { + dst[i] = bfloat16(src[i]); + } } void BFloat16ToFloat(const bfloat16* src, float* dst, int64 size) { const uint16_t* p = reinterpret_cast(src); uint16_t* q = reinterpret_cast(dst); #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - for (; size != 0; p++, q += 2, size--) { - q[0] = *p; - q[1] = 0; + for (; size != 0; p++, q += 2, size--) { + q[0] = *p; + q[1] = 0; } -#else - for (; size != 0; p++, q += 2, size--) { - q[0] = 0; - q[1] = *p; - } +#else + for (; size != 0; p++, q += 2, size--) { + q[0] = 0; + q[1] = *p; + } #endif } diff --git a/tensorflow/core/framework/bfloat16_test.cc b/tensorflow/core/framework/bfloat16_test.cc index af4e6a4411..a25b764ea2 100644 --- a/tensorflow/core/framework/bfloat16_test.cc +++ b/tensorflow/core/framework/bfloat16_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/framework/bfloat16.h" +#include "tensorflow/core/lib/core/casts.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" @@ -27,6 +28,97 @@ TEST(Bfloat16Test, Simple) { EXPECT_EQ(0x4140, a.value); } +float BinaryToFloat(uint32_t sign, uint32_t exponent, uint32_t high_mantissa, + uint32_t low_mantissa) { + return bit_cast((sign << 31) + (exponent << 23) + + (high_mantissa << 16) + low_mantissa); +} + +struct Bfloat16TestParam { + float input; + float expected; +}; + +class Bfloat16Test : public ::testing::Test, + public ::testing::WithParamInterface {}; + +TEST_P(Bfloat16Test, RoundOrTruncate) { + bfloat16 a(GetParam().input); + if (std::isnan(GetParam().input)) { + EXPECT_TRUE(std::isnan(float(a))); + return; + } + EXPECT_EQ(GetParam().expected, float(a)); +} + +INSTANTIATE_TEST_CASE_P( + Bfloat16Test_Instantiation, Bfloat16Test, + ::testing::Values( + // More than half. + Bfloat16TestParam{ + BinaryToFloat(0, 0b10000000, 0b1001000, 0b1111010111000011), + BinaryToFloat(0, 0b10000000, 0b1001001, 0b0000000000000000)}, + + Bfloat16TestParam{ + BinaryToFloat(1, 0b10000000, 0b1001000, 0b1111010111000011), + BinaryToFloat(1, 0b10000000, 0b1001001, 0b0000000000000000)}, + + // Exact half. + Bfloat16TestParam{ + BinaryToFloat(0, 0b10000000, 0b1001000, 0b1000000000000000), + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, + + // NaN stays at NaN. + Bfloat16TestParam{ + BinaryToFloat(0, 0b11111111, 0b0000000, 0b0000000000000001), + BinaryToFloat(0, 0b11111111, 0b1000000, 0b0000000000000000)}, + + // NaN stays at NaN -- no exponents overflow. + Bfloat16TestParam{ + BinaryToFloat(0, 0b11111111, 0b1111111, 0b1111111111111111), + BinaryToFloat(0, 0b11111111, 0b1000000, 0b0000000000000000)}, + + // More than half, round to an odd number. + Bfloat16TestParam{ + BinaryToFloat(1, 0b10000000, 0b1001000, 0b1100000000000000), + BinaryToFloat(1, 0b10000000, 0b1001001, 0b0000000000000000)}, + + // Less than half, truncate. + Bfloat16TestParam{ + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000), + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, + + // Less than half, truncate. + Bfloat16TestParam{ + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0100000000000000), + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, + + // Exact at half, but result is already even. + Bfloat16TestParam{ + BinaryToFloat(0, 0b10000000, 0b1001000, 0b1000000000000000), + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, + + // Denormal values. + Bfloat16TestParam{ + BinaryToFloat(0, 0b00000000, 0b1001000, 0b1000000000000000), + BinaryToFloat(0, 0b00000000, 0b1001000, 0b0000000000000000)}, + Bfloat16TestParam{ + BinaryToFloat(0, 0b00000000, 0b1111111, 0b1100000000000000), + BinaryToFloat(0, 0b00000001, 0b0000000, 0b0000000000000000)})); +TEST(Bfloat16Test, RoundWithFractionOverflow) { + // Still works with fraction overflow -- round to 4./ + // + // Input 3.9960938: + // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) + // 0 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1100000000000000 + // + // Should round to 4.0: + // Sign | Exp (8 bit) | Frac (first 7 bit) + // 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 + bfloat16 a(3.9960938f); + EXPECT_EQ(4.0, float(a)); +} + TEST(Bfloat16Test, Conversion) { float a[100]; for (int i = 0; i < 100; ++i) { diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h index a630bee38d..d005de2af1 100644 --- a/tensorflow/core/framework/numeric_types.h +++ b/tensorflow/core/framework/numeric_types.h @@ -44,29 +44,262 @@ typedef Eigen::QUInt16 quint16; // see framework/bfloat16.h for description. struct bfloat16 { EIGEN_DEVICE_FUNC bfloat16() {} - EIGEN_DEVICE_FUNC explicit bfloat16(const float v) { - const uint16_t* p = reinterpret_cast(&v); + + explicit EIGEN_DEVICE_FUNC bfloat16(float v) { + uint32_t input; + memcpy(&input, &v, sizeof(uint32_t)); + + if ((~input & 0x7f800000) == 0 && (input & 0x007fffff) != 0) { + // If the value is a NaN, squash it to a qNaN with msb of fraction set, + // this makes sure after truncation we don't end up with an inf. + // + // qNaN magic: All exponent bits set + most significant bit of fraction + // set. + value = 0x7fc0; + } else { + // Fast rounding algorithm that rounds a half value to nearest even. This + // reduces expected error when we convert a large number of floats. Here + // is how it works: + // + // Definitions: + // To convert a float 32 to bfloat16, a float 32 can be viewed as 32 bits + // with the following tags: + // + // Sign | Exp (8 bits) | Frac (23 bits) + // S EEEEEEEE FFFFFFLRTTTTTTTTTTTTTTT + // + // S: Sign bit. + // E: Exponent bits. + // F: First 6 bits of fraction. + // L: Least significant bit of resulting bfloat16 if we truncate away the + // rest of the float32. This is also the 7th bit of fraction + // R: Rounding bit, 8th bit of fraction. + // T: Sticky bits, rest of fraction, 15 bits. + // + // To round half to nearest even, there are 3 cases where we want to round + // down (simply truncate the result of the bits away, which consists of + // rounding bit and sticky bits) and two cases where we want to round up + // (truncate then add one to the result). + // + // The fast converting algorithm simply adds lsb (L) to 0x7fff (15 bits of + // 1s) as the rounding bias, adds the rounding bias to the input, then + // truncates the last 16 bits away. + // + // To understand how it works, we can analyze this algorithm case by case: + // + // 1. L = 0, R = 0: + // Expect: round down, this is less than half value. + // + // Algorithm: + // - Rounding bias: 0x7fff + 0 = 0x7fff + // - Adding rounding bias to input may create any carry, depending on + // whether there is any value set to 1 in T bits. + // - R may be set to 1 if there is a carry. + // - L remains 0. + // - Note that this case also handles Inf and -Inf, where all fraction + // bits, including L, R and Ts are all 0. The output remains Inf after + // this algorithm. + // + // 2. L = 1, R = 0: + // Expect: round down, this is less than half value. + // + // Algorithm: + // - Rounding bias: 0x7fff + 1 = 0x8000 + // - Adding rounding bias to input doesn't change sticky bits but + // adds 1 to rounding bit. + // - L remains 1. + // + // 3. L = 0, R = 1, all of T are 0: + // Expect: round down, this is exactly at half, the result is already + // even (L=0). + // + // Algorithm: + // - Rounding bias: 0x7fff + 0 = 0x7fff + // - Adding rounding bias to input sets all sticky bits to 1, but + // doesn't create a carry. + // - R remains 1. + // - L remains 0. + // + // 4. L = 1, R = 1: + // Expect: round up, this is exactly at half, the result needs to be + // round to the next even number. + // + // Algorithm: + // - Rounding bias: 0x7fff + 1 = 0x8000 + // - Adding rounding bias to input doesn't change sticky bits, but + // creates a carry from rounding bit. + // - The carry sets L to 0, creates another carry bit and propagate + // forward to F bits. + // - If all the F bits are 1, a carry then propagates to the exponent + // bits, which then creates the minimum value with the next exponent + // value. Note that we won't have the case where exponents are all 1, + // since that's either a NaN (handled in the other if condition) or inf + // (handled in case 1). + // + // 5. L = 0, R = 1, any of T is 1: + // Expect: round up, this is greater than half. + // + // Algorithm: + // - Rounding bias: 0x7fff + 0 = 0x7fff + // - Adding rounding bias to input creates a carry from sticky bits, + // sets rounding bit to 0, then create another carry. + // - The second carry sets L to 1. + // + // Examples: + // + // Exact half value that is already even: + // Input: + // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) + // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1000000000000000 + // + // This falls into case 3. We truncate the rest of 16 bits and no + // carry is created into F and L: + // + // Output: + // Sign | Exp (8 bit) | Frac (first 7 bit) + // S E E E E E E E E F F F F F F L + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 + // + // Exact half value, round to next even number: + // Input: + // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) + // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1000000000000000 + // + // This falls into case 4. We create a carry from R and T, + // which then propagates into L and F: + // + // Output: + // Sign | Exp (8 bit) | Frac (first 7 bit) + // S E E E E E E E E F F F F F F L + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 + // + // + // Max denormal value round to min normal value: + // Input: + // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) + // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT + // 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1111111111111111 + // + // This falls into case 4. We create a carry from R and T, + // propagate into L and F, which then propagates into exponent + // bits: + // + // Output: + // Sign | Exp (8 bit) | Frac (first 7 bit) + // S E E E E E E E E F F F F F F L + // 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 + // + // Max normal value round to Inf: + // Input: + // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) + // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT + // 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1111111111111111 + // + // This falls into case 4. We create a carry from R and T, + // propagate into L and F, which then propagates into exponent + // bits: + // + // Sign | Exp (8 bit) | Frac (first 7 bit) + // S E E E E E E E E F F F F F F L + // 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 + // + // + // Least significant bit of resulting bfloat. + uint32_t lsb = (input >> 16) & 1; + uint32_t rounding_bias = 0x7fff + lsb; + input += rounding_bias; + value = static_cast(input >> 16); + } + } + + template + explicit EIGEN_DEVICE_FUNC bfloat16(const T& val) + : bfloat16(static_cast(val)) {} + + EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const { + float result; + + uint16_t* q = reinterpret_cast(&result); + #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - value = p[0]; + q[0] = value; + q[1] = 0; #else - value = p[1]; + q[0] = 0; + q[1] = value; #endif + return result; + } + + EIGEN_DEVICE_FUNC explicit operator bool() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator Eigen::half() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator short() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator int() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator char() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator signed char() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator unsigned char() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator unsigned int() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator unsigned long() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator unsigned long long() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator long long() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator double() const { + return static_cast(float(*this)); } uint16_t value; }; +inline bool operator==(const bfloat16 a, const bfloat16 b) { + return a.value == b.value; +} + +inline bool operator!=(const bfloat16 a, const bfloat16 b) { + return a.value != b.value; +} + } // end namespace tensorflow namespace Eigen { template <> struct NumTraits : GenericNumTraits {}; -EIGEN_STRONG_INLINE bool operator==(const tensorflow::bfloat16 a, - const tensorflow::bfloat16 b) { - return a.value == b.value; -} - +using ::tensorflow::operator==; +using ::tensorflow::operator!=; } // namespace Eigen #ifdef COMPILER_MSVC -- GitLab From 3c41cb6bff409f37e35d2e1b2619d5dc6742dbe5 Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Thu, 9 Nov 2017 21:01:00 -0800 Subject: [PATCH 0140/1801] Get rid of IteratorBase::is_exhausted flag since it is not possible to rely on it unless we lock each call to GetNext which is not preferable. Each iterator now handles saving/restoring exhausted state. As a guideline, we always reset the input_impl(s) when they get exhausted. This can be used as an indicator of exhausted-ness for non-terminal iterators. Also reduces memory overhead. Each iterator should also handle calls to GetNextInternal when it is exhausted. Fixed this for some datasets. Also fix a bug in dataset_serialization_test_base. We were not saving a checkpoint after exhausting the iterator so verify_exhausted_iterator was not really testing restoring an exhausted iterator. PiperOrigin-RevId: 175253023 --- .../dataset_serialization_test_base.py | 4 +-- tensorflow/core/kernels/batch_dataset_op.cc | 21 +++++++++++-- .../core/kernels/concatenate_dataset_op.cc | 8 ++++- tensorflow/core/kernels/dataset.cc | 1 - tensorflow/core/kernels/dataset.h | 23 ++------------ tensorflow/core/kernels/range_dataset_op.cc | 1 - tensorflow/core/kernels/reader_dataset_ops.cc | 1 - tensorflow/core/kernels/repeat_dataset_op.cc | 18 +++++++++-- tensorflow/core/kernels/shuffle_dataset_op.cc | 31 ++++++++++--------- tensorflow/core/kernels/skip_dataset_op.cc | 21 +++++++++++-- tensorflow/core/kernels/take_dataset_op.cc | 17 ++++++++-- tensorflow/core/kernels/zip_dataset_op.cc | 31 +++++++++++++++---- 12 files changed, 120 insertions(+), 57 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py index 369b789a52..07fecf04fa 100644 --- a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py +++ b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py @@ -337,11 +337,11 @@ class DatasetSerializationTestBase(test.TestCase): num_iters = end - start for _ in range(num_iters): outputs.append(sess.run(get_next_op)) - self._save(sess, saver) - ckpt_saved = True if i == len(break_points) and verify_exhausted: with self.assertRaises(errors.OutOfRangeError): sess.run(get_next_op) + self._save(sess, saver) + ckpt_saved = True return outputs diff --git a/tensorflow/core/kernels/batch_dataset_op.cc b/tensorflow/core/kernels/batch_dataset_op.cc index 2e52ad39f8..6a5fd17a9e 100644 --- a/tensorflow/core/kernels/batch_dataset_op.cc +++ b/tensorflow/core/kernels/batch_dataset_op.cc @@ -143,9 +143,13 @@ class BatchDatasetOp : public UnaryDatasetOpKernel { // Each row of `batch_elements` is a tuple of tensors from the // input iterator. std::vector> batch_elements; - batch_elements.reserve(dataset()->batch_size_); { mutex_lock l(mu_); + if (!input_impl_) { + *end_of_sequence = true; + return Status::OK(); + } + batch_elements.reserve(dataset()->batch_size_); *end_of_sequence = false; for (int i = 0; i < dataset()->batch_size_ && !*end_of_sequence; ++i) { @@ -154,6 +158,8 @@ class BatchDatasetOp : public UnaryDatasetOpKernel { end_of_sequence)); if (!*end_of_sequence) { batch_elements.emplace_back(std::move(batch_element_tuple)); + } else { + input_impl_.reset(); } } } @@ -194,14 +200,23 @@ class BatchDatasetOp : public UnaryDatasetOpKernel { protected: Status SaveInternal(IteratorStateWriter* writer) override { mutex_lock l(mu_); - TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + if (!input_impl_) { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("input_impl_empty"), "")); + } else { + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + } return Status::OK(); } Status RestoreInternal(OpKernelContext* ctx, IteratorStateReader* reader) override { mutex_lock l(mu_); - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + if (!reader->Contains(full_name("input_impl_empty"))) { + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + } else { + input_impl_.reset(); + } return Status::OK(); } diff --git a/tensorflow/core/kernels/concatenate_dataset_op.cc b/tensorflow/core/kernels/concatenate_dataset_op.cc index 711c234129..c3bd89c479 100644 --- a/tensorflow/core/kernels/concatenate_dataset_op.cc +++ b/tensorflow/core/kernels/concatenate_dataset_op.cc @@ -104,6 +104,10 @@ class ConcatenateDatasetOp : public BinaryDatasetOpKernel { std::vector* out_tensors, bool* end_of_sequence) override { mutex_lock l(mu_); + if (!input_impl_) { + *end_of_sequence = true; + return Status::OK(); + } while (i_ < 2) { TF_RETURN_IF_ERROR( input_impl_->GetNext(ctx, out_tensors, end_of_sequence)); @@ -140,7 +144,9 @@ class ConcatenateDatasetOp : public BinaryDatasetOpKernel { } else if (i_ == 2) { input_impl_.reset(); } - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + if (input_impl_) { + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + } return Status::OK(); } diff --git a/tensorflow/core/kernels/dataset.cc b/tensorflow/core/kernels/dataset.cc index 0414875a5d..fcfa2956f7 100644 --- a/tensorflow/core/kernels/dataset.cc +++ b/tensorflow/core/kernels/dataset.cc @@ -126,7 +126,6 @@ void BinaryDatasetOpKernel::MakeDataset(OpKernelContext* ctx, MakeDataset(ctx, input, another_input, output); } -const char IteratorBase::kIteratorExhausted[] = "ITERATOR_EXHAUSTED"; const char GraphDatasetBase::kDatasetGraphKey[] = "_DATASET_GRAPH"; const char GraphDatasetBase::kDatasetGraphOutputNodeKey[] = "_DATASET_GRAPH_OUTPUT_NODE"; diff --git a/tensorflow/core/kernels/dataset.h b/tensorflow/core/kernels/dataset.h index 4a42ac80c3..aa4f436b39 100644 --- a/tensorflow/core/kernels/dataset.h +++ b/tensorflow/core/kernels/dataset.h @@ -306,27 +306,14 @@ class IteratorBase { // Saves the state of this iterator. virtual Status Save(IteratorStateWriter* writer) { - if (is_exhausted_) { - LOG(INFO) << "Iterator exhausted."; - return writer->WriteScalar(kIteratorExhausted, kIteratorExhausted); - } else { - return SaveInternal(writer); - } + return SaveInternal(writer); } // Restores the state of this iterator. virtual Status Restore(OpKernelContext* ctx, IteratorStateReader* reader) { - if (reader->Contains(kIteratorExhausted)) { - LOG(INFO) << "Iterator exhausted. Nothing to restore."; - is_exhausted_ = true; - return Status::OK(); - } else { - return RestoreInternal(ctx, reader); - } + return RestoreInternal(ctx, reader); } - static const char kIteratorExhausted[]; - protected: // This is needed so that sub-classes of IteratorBase can call // `SaveInternal` on their parent iterators, e.g., in @@ -354,8 +341,6 @@ class IteratorBase { IteratorStateReader* reader) { return errors::Unimplemented("RestoreInternal"); } - - bool is_exhausted_ = false; // Whether the iterator has been exhausted. }; // Represents a (potentially infinite) range of outputs, where each @@ -491,10 +476,6 @@ class DatasetIterator : public IteratorBase { Status GetNext(IteratorContext* ctx, std::vector* out_tensors, bool* end_of_sequence) final { port::Tracing::TraceMe activity(params_.prefix); - if (is_exhausted_) { - *end_of_sequence = true; - return Status::OK(); - } return GetNextInternal(ctx, out_tensors, end_of_sequence); } diff --git a/tensorflow/core/kernels/range_dataset_op.cc b/tensorflow/core/kernels/range_dataset_op.cc index 7adfcc4f8d..e7ae840fc7 100644 --- a/tensorflow/core/kernels/range_dataset_op.cc +++ b/tensorflow/core/kernels/range_dataset_op.cc @@ -99,7 +99,6 @@ class RangeDatasetOp : public DatasetOpKernel { if ((dataset()->step_ > 0 && next_ >= dataset()->stop_) || (dataset()->step_ < 0 && next_ <= dataset()->stop_)) { *end_of_sequence = true; - is_exhausted_ = true; return Status::OK(); } Tensor value_tensor(cpu_allocator(), DT_INT64, {}); diff --git a/tensorflow/core/kernels/reader_dataset_ops.cc b/tensorflow/core/kernels/reader_dataset_ops.cc index 39ef92a5de..c08e42be1d 100644 --- a/tensorflow/core/kernels/reader_dataset_ops.cc +++ b/tensorflow/core/kernels/reader_dataset_ops.cc @@ -402,7 +402,6 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel { // Iteration ends when there are no more files to process. if (current_file_index_ == dataset()->filenames_.size()) { *end_of_sequence = true; - is_exhausted_ = true; return Status::OK(); } diff --git a/tensorflow/core/kernels/repeat_dataset_op.cc b/tensorflow/core/kernels/repeat_dataset_op.cc index 6c0f4118e6..0167b9ea64 100644 --- a/tensorflow/core/kernels/repeat_dataset_op.cc +++ b/tensorflow/core/kernels/repeat_dataset_op.cc @@ -117,6 +117,10 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { std::vector* out_tensors, bool* end_of_sequence) override { mutex_lock l(mu_); // TODO(mrry): Make locking less conservative. + if (!input_impl_) { + *end_of_sequence = true; + return Status::OK(); + } while (i_ < dataset()->count_) { TF_RETURN_IF_ERROR( input_impl_->GetNext(ctx, out_tensors, end_of_sequence)); @@ -127,7 +131,6 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { input_impl_ = dataset()->input_->MakeIterator(prefix()); } *end_of_sequence = true; - is_exhausted_ = true; input_impl_.reset(); return Status::OK(); } @@ -136,7 +139,12 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { Status SaveInternal(IteratorStateWriter* writer) override { mutex_lock l(mu_); TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("i"), i_)); - TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + if (!input_impl_) { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("input_impl_empty"), "")); + } else { + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + } return Status::OK(); } @@ -144,7 +152,11 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { IteratorStateReader* reader) override { mutex_lock l(mu_); TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("i"), &i_)); - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + if (!reader->Contains(full_name("input_impl_empty"))) { + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + } else { + input_impl_.reset(); + } return Status::OK(); } diff --git a/tensorflow/core/kernels/shuffle_dataset_op.cc b/tensorflow/core/kernels/shuffle_dataset_op.cc index 2146ba2aa1..dd0ab57e9d 100644 --- a/tensorflow/core/kernels/shuffle_dataset_op.cc +++ b/tensorflow/core/kernels/shuffle_dataset_op.cc @@ -105,8 +105,7 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { mutex_lock l(mu_); int64 start_micros = ctx->env()->NowMicros(); int64 num_log_entries = 0; - while (!end_of_input_sequence_ && - buffer_.size() < dataset()->buffer_size_) { + while (input_impl_ && buffer_.size() < dataset()->buffer_size_) { if (ctx->env()->NowMicros() > ((num_log_entries + 1) * kLogIntervalMicros) + start_micros) { num_log_entries++; @@ -114,9 +113,10 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { << buffer_.size() << " of " << dataset()->buffer_size_; } std::vector input_element; + bool end_of_input_sequence; TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &input_element, - &end_of_input_sequence_)); - if (!end_of_input_sequence_) { + &end_of_input_sequence)); + if (!end_of_input_sequence) { buffer_.emplace_back(std::move(input_element)); } else { input_impl_.reset(); @@ -135,7 +135,7 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { std::swap(buffer_[index], buffer_.back()); buffer_.pop_back(); } else { - DCHECK(end_of_input_sequence_); + DCHECK(input_impl_ == nullptr); *end_of_sequence = true; } return Status::OK(); @@ -148,11 +148,11 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { // Save the tensors in the buffer. TF_RETURN_IF_ERROR( writer->WriteScalar(full_name("buffer_size"), buffer_.size())); - for (int i = 0; i < buffer_.size(); i++) { + for (size_t i = 0; i < buffer_.size(); i++) { TF_RETURN_IF_ERROR(writer->WriteScalar( full_name(strings::StrCat("buffer_", i, "_size")), buffer_[i].size())); - for (int j = 0; j < buffer_[i].size(); j++) { + for (size_t j = 0; j < buffer_[i].size(); j++) { TF_RETURN_IF_ERROR(writer->WriteTensor( full_name(strings::StrCat("buffer_", i, "_", j)), buffer_[i][j])); @@ -165,7 +165,7 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { // Save input iterator if it hasn't been exhausted else write // "end_of_input_sequence". - if (end_of_input_sequence_) { + if (!input_impl_) { TF_RETURN_IF_ERROR( writer->WriteScalar(full_name("end_of_input_sequence"), "")); } else { @@ -180,10 +180,15 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { buffer_.clear(); // Restore the buffer. - int64 buffer_size; - TF_RETURN_IF_ERROR( - reader->ReadScalar(full_name("buffer_size"), &buffer_size)); - for (int i = 0; i < buffer_size; i++) { + size_t buffer_size; + { + int64 temp; + TF_RETURN_IF_ERROR( + reader->ReadScalar(full_name("buffer_size"), &temp)); + buffer_size = static_cast(temp); + } + buffer_.reserve(buffer_size); + for (size_t i = 0; i < buffer_size; i++) { int64 list_size; TF_RETURN_IF_ERROR(reader->ReadScalar( full_name(strings::StrCat("buffer_", i, "_size")), &list_size)); @@ -205,7 +210,6 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { input_impl_ = dataset()->input_->MakeIterator(prefix()); TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); } else { - end_of_input_sequence_ = true; input_impl_.reset(); } return Status::OK(); @@ -230,7 +234,6 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { mutex mu_; std::vector> buffer_ GUARDED_BY(mu_); std::unique_ptr input_impl_ GUARDED_BY(mu_); - bool end_of_input_sequence_ GUARDED_BY(mu_) = false; const int64 seed_ GUARDED_BY(mu_); const int64 seed2_ GUARDED_BY(mu_); random::PhiloxRandom parent_generator_ GUARDED_BY(mu_); diff --git a/tensorflow/core/kernels/skip_dataset_op.cc b/tensorflow/core/kernels/skip_dataset_op.cc index 05152db1ae..7ee945dd4c 100644 --- a/tensorflow/core/kernels/skip_dataset_op.cc +++ b/tensorflow/core/kernels/skip_dataset_op.cc @@ -118,6 +118,11 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { bool* end_of_sequence) override { mutex_lock l(mu_); // TODO(mrry): Make locking less conservative. + if (!input_impl_) { + *end_of_sequence = true; + return Status::OK(); + } + // Keep calling GetNext(). TODO(vrv): Figure out a way to // skip records without reading, perhaps by adding an // interface to iterator. @@ -138,6 +143,9 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { // Return GetNext() on the underlying iterator. TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, out_tensors, end_of_sequence)); + if (*end_of_sequence) { + input_impl_.reset(); + } return Status::OK(); } @@ -145,7 +153,12 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { Status SaveInternal(IteratorStateWriter* writer) override { mutex_lock l(mu_); TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("i"), i_)); - TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + if (input_impl_) { + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + } else { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("input_impl_empty"), "")); + } return Status::OK(); } @@ -153,7 +166,11 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { IteratorStateReader* reader) override { mutex_lock l(mu_); TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("i"), &i_)); - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + if (!reader->Contains(full_name("input_impl_empty"))) { + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + } else { + input_impl_.reset(); + } return Status::OK(); } diff --git a/tensorflow/core/kernels/take_dataset_op.cc b/tensorflow/core/kernels/take_dataset_op.cc index f9f675abda..fb294a96b1 100644 --- a/tensorflow/core/kernels/take_dataset_op.cc +++ b/tensorflow/core/kernels/take_dataset_op.cc @@ -118,6 +118,10 @@ class TakeDatasetOp : public UnaryDatasetOpKernel { std::vector* out_tensors, bool* end_of_sequence) override { mutex_lock l(mu_); // TODO(mrry): Make locking less conservative. + if (!input_impl_) { + *end_of_sequence = true; + return Status::OK(); + } while (i_ < dataset()->count_) { TF_RETURN_IF_ERROR( input_impl_->GetNext(ctx, out_tensors, end_of_sequence)); @@ -136,7 +140,12 @@ class TakeDatasetOp : public UnaryDatasetOpKernel { Status SaveInternal(IteratorStateWriter* writer) override { mutex_lock l(mu_); TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("i"), i_)); - TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + if (input_impl_) { + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + } else { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("input_impl_empty"), "")); + } return Status::OK(); } @@ -144,7 +153,11 @@ class TakeDatasetOp : public UnaryDatasetOpKernel { IteratorStateReader* reader) override { mutex_lock l(mu_); TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("i"), &i_)); - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + if (!reader->Contains(full_name("input_impl_empty"))) { + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + } else { + input_impl_.reset(); + } return Status::OK(); } diff --git a/tensorflow/core/kernels/zip_dataset_op.cc b/tensorflow/core/kernels/zip_dataset_op.cc index 30d64ea634..f466c8b268 100644 --- a/tensorflow/core/kernels/zip_dataset_op.cc +++ b/tensorflow/core/kernels/zip_dataset_op.cc @@ -109,6 +109,10 @@ class ZipDatasetOp : public DatasetOpKernel { std::vector* out_tensors, bool* end_of_sequence) override { mutex_lock l(mu_); + if (input_impls_.empty()) { + *end_of_sequence = true; + return Status::OK(); + } out_tensors->clear(); out_tensors->reserve(dataset()->output_dtypes().size()); for (const auto& input_impl : input_impls_) { @@ -116,28 +120,43 @@ class ZipDatasetOp : public DatasetOpKernel { TF_RETURN_IF_ERROR( input_impl->GetNext(ctx, &input_tensors, end_of_sequence)); if (*end_of_sequence) { - return Status::OK(); + break; } out_tensors->insert(out_tensors->end(), input_tensors.begin(), input_tensors.end()); } - *end_of_sequence = false; + if (*end_of_sequence) { + out_tensors->clear(); + input_impls_.clear(); + } else { + *end_of_sequence = false; + } return Status::OK(); } protected: Status SaveInternal(IteratorStateWriter* writer) override { mutex_lock l(mu_); - for (auto& input_impl : input_impls_) - TF_RETURN_IF_ERROR(SaveParent(writer, input_impl)); + if (input_impls_.empty()) { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("input_impls_empty"), "")); + } else { + for (auto& input_impl : input_impls_) + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl)); + } return Status::OK(); } Status RestoreInternal(OpKernelContext* ctx, IteratorStateReader* reader) override { mutex_lock l(mu_); - for (auto& input_impl : input_impls_) - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl)); + if (reader->Contains(full_name("input_impls_empty"))) { + input_impls_.clear(); + } else { + DCHECK_EQ(input_impls_.size(), dataset()->inputs_.size()); + for (auto& input_impl : input_impls_) + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl)); + } return Status::OK(); } -- GitLab From e058a030f88f19a60e3a4d5ed6b5cbcf85b1a5d6 Mon Sep 17 00:00:00 2001 From: PW486 Date: Fri, 10 Nov 2017 14:19:31 +0900 Subject: [PATCH 0141/1801] Fixed typos --- tensorflow/c/c_test_util.h | 2 +- tensorflow/compiler/xla/client/computation_builder.h | 2 +- tensorflow/contrib/boosted_trees/lib/utils/batch_features.h | 2 +- tensorflow/core/grappler/costs/virtual_placer.h | 2 +- tensorflow/python/util/util.cc | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/c/c_test_util.h b/tensorflow/c/c_test_util.h index d547337492..bc44a7b840 100644 --- a/tensorflow/c/c_test_util.h +++ b/tensorflow/c/c_test_util.h @@ -74,7 +74,7 @@ TF_Operation* Neg(TF_Operation* n, TF_Graph* graph, TF_Status* s, TF_Operation* LessThan(TF_Output l, TF_Output r, TF_Graph* graph, TF_Status* s); -// Split `input` along the first dimention into 3 tensors +// Split `input` along the first dimension into 3 tensors TF_Operation* Split3(TF_Operation* input, TF_Graph* graph, TF_Status* s, const char* name = "split3"); diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index 8e1b4be1f3..9159b26614 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -806,7 +806,7 @@ class ComputationBuilder { // The operand must represent a constant value, which in this case // means that it must not statically depend on any parameter of the // computation that is being built other then the ones specified on the - // paramtere list. The parameters in the list will be indexed by their + // parameter list. The parameters in the list will be indexed by their // parameter id property so the number of parameters specified should be at // least as many as the largest used parameter index. // diff --git a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h index 7a550d6f73..badc629a11 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h +++ b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h @@ -56,7 +56,7 @@ class BatchFeatures { *num_sparse_int_features = sparse_int_feature_columns_.size(); if (*num_dense_float_features == 0 && *num_sparse_float_features == 0 && *num_sparse_int_features == 0) { - return errors::FailedPrecondition("Not intialized yet."); + return errors::FailedPrecondition("Not initialized yet."); } return Status::OK(); } diff --git a/tensorflow/core/grappler/costs/virtual_placer.h b/tensorflow/core/grappler/costs/virtual_placer.h index 7ccb1ebb99..fee5ce0f51 100644 --- a/tensorflow/core/grappler/costs/virtual_placer.h +++ b/tensorflow/core/grappler/costs/virtual_placer.h @@ -41,7 +41,7 @@ class VirtualPlacer { private: // Converts given device name to Lowercase Fully-Qualified Name (LFQN) string. // This helps us disambiguate device names internally and simplify matching. - // If device_name couldn't be parsed succesfully, returns empty string. + // If device_name couldn't be parsed successfully, returns empty string. string to_lfqn_or_empty(const string& device_name) const; // Map based on the cluster info: cluster device name -> device properties. diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc index c3d7611ad4..a41fa7df25 100644 --- a/tensorflow/python/util/util.cc +++ b/tensorflow/python/util/util.cc @@ -29,7 +29,7 @@ bool WarnedThatSetIsNotSequence = false; // Returns 1 if `o` is considered a sequence for the purposes of Flatten(). // Returns 0 otherwise. -// Returns -1 if an error occured. +// Returns -1 if an error occurred. int IsSequenceHelper(PyObject* o) { if (PyDict_Check(o)) return true; if (PySet_Check(o) && !WarnedThatSetIsNotSequence) { -- GitLab From 23dc70389b3bf51886156de88fae6b922619a6ff Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 9 Nov 2017 22:39:33 -0800 Subject: [PATCH 0142/1801] [XLA:CPU] Make one of the tile dimensions in the LLVM IR GEMV tunable. The tiling dimension corresponding to the number of vector registers in the tile can be changed easily. Expose this value as a backend specific flag so that we can experiment with it to find a good default value. This CL also fixes a bug exposed by a variable tiling factor in the row major GEMV implementation. This wasn't caught before because having tile_rows == tile_cols hides the bug. PiperOrigin-RevId: 175258553 --- tensorflow/compiler/xla/service/cpu/BUILD | 2 ++ .../compiler/xla/service/cpu/cpu_options.cc | 16 ++++++++++++++++ .../compiler/xla/service/cpu/cpu_options.h | 2 ++ .../compiler/xla/service/cpu/dot_op_emitter.cc | 11 ++++++++--- .../compiler/xla/service/cpu/dot_op_emitter.h | 9 +++++++++ 5 files changed, 37 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 10ec677e2f..4f6e69ebd4 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -280,6 +280,7 @@ cc_library( srcs = ["dot_op_emitter.cc"], hdrs = ["dot_op_emitter.h"], deps = [ + ":cpu_options", ":cpu_runtime", ":ir_emission_utils", "//tensorflow/compiler/xla:shape_util", @@ -719,6 +720,7 @@ cc_library( hdrs = ["cpu_options.h"], deps = [ "//tensorflow/compiler/xla/service:hlo_module_config", + "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xla/service/cpu/cpu_options.cc b/tensorflow/compiler/xla/service/cpu/cpu_options.cc index dba140d112..09f028463a 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_options.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_options.cc @@ -15,11 +15,14 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/cpu_options.h" +#include "tensorflow/core/lib/strings/numbers.h" + namespace { const char* const kXlaParallelCpuOption = "xla_cpu_parallel"; const char* const kXlaOptimizeForSizeCpuOption = "xla_cpu_optimize_for_size"; const char* const kXlaDisableVectorizedReduce = "xla_disable_vectorized_reduce"; +const char* const kLlvmIrDotTilingFactor = "xla_llvm_dot_tiling_factor"; } // namespace @@ -45,6 +48,19 @@ bool VectorizedReduceDisabled(const HloModuleConfig& config) { return extra_options_map.count(kXlaOptimizeForSizeCpuOption) > 0; } +tensorflow::gtl::optional LlvmIrGemvTilingFactor( + const HloModuleConfig& config) { + const auto& extra_options_map = + config.debug_options().xla_backend_extra_options(); + auto it = extra_options_map.find(kLlvmIrDotTilingFactor); + int64 tiling_factor; + if (it != extra_options_map.end() && + tensorflow::strings::safe_strto64(it->second, &tiling_factor)) { + return tiling_factor; + } + return tensorflow::gtl::nullopt; +} + } // namespace options } // namespace cpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/cpu_options.h b/tensorflow/compiler/xla/service/cpu/cpu_options.h index 5dc24ebc7b..6ba0fd2453 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_options.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_options.h @@ -27,6 +27,8 @@ namespace options { bool CpuParallelBackendRequested(const HloModuleConfig& config); bool OptimizeForSizeRequested(const HloModuleConfig& config); bool VectorizedReduceDisabled(const HloModuleConfig& config); +tensorflow::gtl::optional LlvmIrGemvTilingFactor( + const HloModuleConfig& config); } // namespace options } // namespace cpu diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index 1cbd4094a3..2a447a54b0 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -366,7 +366,7 @@ class RowMajorMatrixVectorProductEmitter { result_(result), ir_builder_(ir_builder), ksl_(ir_builder_), - vsl_(scalar_type_, /*vector_size=*/tile_rows_, ir_builder_, "") { + vsl_(scalar_type_, /*vector_size=*/tile_cols_, ir_builder_, "") { CHECK(tile_cols_ > 0 && IsPowerOfTwo(static_cast(tile_cols_))); } @@ -573,11 +573,15 @@ bool DotOpEmitter::EmitLlvmIrDotIfProfitable() { return false; } + int64 tiling_factor = GetGemvTilingFactor(); + CHECK_GT(tiling_factor, 0); + if (is_column_major_matrix_vector) { VLOG(2) << "Emitting column major matrix-vector multiply with m = " << m << " and k = " << k; ColumnMajorMatrixVectorProductEmitter emitter( - dot_.shape().element_type(), 8, 8, m, k, + dot_.shape().element_type(), /*tile_rows=*/8, + /*tile_cols=*/tiling_factor, m, k, swap_operands ? rhs_array_.GetBasePointer() : lhs_array_.GetBasePointer(), swap_operands ? lhs_array_.GetBasePointer() @@ -588,7 +592,8 @@ bool DotOpEmitter::EmitLlvmIrDotIfProfitable() { VLOG(2) << "Emitting row major matrix-vector multiply with m = " << m << " and k = " << k; RowMajorMatrixVectorProductEmitter emitter( - dot_.shape().element_type(), 8, 8, m, k, + dot_.shape().element_type(), /*tile_rows=*/tiling_factor, + /*tile_cols=*/8, m, k, swap_operands ? rhs_array_.GetBasePointer() : lhs_array_.GetBasePointer(), swap_operands ? lhs_array_.GetBasePointer() diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h index 182e1b8c68..470bf6ffb4 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_DOT_OP_EMITTER_H_ #include "llvm/IR/IRBuilder.h" +#include "tensorflow/compiler/xla/service/cpu/cpu_options.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module_config.h" #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h" @@ -105,6 +106,14 @@ class DotOpEmitter { // of rank 2 as well). MatMultDims GetMatMultDims() const; + // When doing a tiled GEMV in LLVM IR, a "tile" consists of this many vector + // registers. + int64 GetGemvTilingFactor() const { + const int64 kDefaultTilingFactor = 8; + return options::LlvmIrGemvTilingFactor(hlo_module_config_) + .value_or(kDefaultTilingFactor); + } + const HloInstruction& dot_; const bool transpose_lhs_; const bool transpose_rhs_; -- GitLab From f6931a687874190bb6f5cbc927da2bdc97a18b38 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Thu, 9 Nov 2017 23:29:56 -0800 Subject: [PATCH 0143/1801] Java/OS X: Workaround for how the framework library is packaged in the .jar by the release process. See #13872 PiperOrigin-RevId: 175261983 --- .../java/org/tensorflow/NativeLibrary.java | 43 ++++++++++++++----- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/tensorflow/java/src/main/java/org/tensorflow/NativeLibrary.java b/tensorflow/java/src/main/java/org/tensorflow/NativeLibrary.java index 2b431eebf5..499757e8cf 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/NativeLibrary.java +++ b/tensorflow/java/src/main/java/org/tensorflow/NativeLibrary.java @@ -43,7 +43,6 @@ final class NativeLibrary { private static final boolean DEBUG = System.getProperty("org.tensorflow.NativeLibrary.DEBUG") != null; private static final String JNI_LIBNAME = "tensorflow_jni"; - private static final String FRAMEWORK_LIBNAME = "tensorflow_framework"; public static void load() { if (isLoaded() || tryLoadLibrary()) { @@ -59,12 +58,15 @@ final class NativeLibrary { } // Native code is not present, perhaps it has been packaged into the .jar file containing this. // Extract the JNI library itself - final String jniResourceName = makeResourceName(JNI_LIBNAME); + final String jniLibName = System.mapLibraryName(JNI_LIBNAME); + final String jniResourceName = makeResourceName(jniLibName); log("jniResourceName: " + jniResourceName); final InputStream jniResource = NativeLibrary.class.getClassLoader().getResourceAsStream(jniResourceName); // Extract the JNI's dependency - final String frameworkResourceName = makeResourceName(FRAMEWORK_LIBNAME); + final String frameworkLibName = + maybeAdjustForMacOS(System.mapLibraryName("tensorflow_framework")); + final String frameworkResourceName = makeResourceName(frameworkLibName); log("frameworkResourceName: " + frameworkResourceName); final InputStream frameworkResource = NativeLibrary.class.getClassLoader().getResourceAsStream(frameworkResourceName); @@ -88,12 +90,15 @@ final class NativeLibrary { tempPath.deleteOnExit(); final String tempDirectory = tempPath.toString(); if (frameworkResource != null) { - extractResource(frameworkResource, FRAMEWORK_LIBNAME, tempDirectory); + extractResource(frameworkResource, frameworkLibName, tempDirectory); } else { - log(frameworkResourceName + " not found. This is fine assuming " + jniResourceName - + " is not built to depend on it."); + log( + frameworkResourceName + + " not found. This is fine assuming " + + jniResourceName + + " is not built to depend on it."); } - System.load(extractResource(jniResource, JNI_LIBNAME, tempDirectory)); + System.load(extractResource(jniResource, jniLibName, tempDirectory)); } catch (IOException e) { throw new UnsatisfiedLinkError( String.format( @@ -121,9 +126,27 @@ final class NativeLibrary { } } + private static String maybeAdjustForMacOS(String libFilename) { + if (!System.getProperty("os.name").contains("OS X")) { + return libFilename; + } + // This is macOS, and the TensorFlow release process might have setup dependencies on + // libtensorflow_framework.so instead of libtensorflow_framework.dylib. Adjust for that. + final ClassLoader cl = NativeLibrary.class.getClassLoader(); + if (cl.getResource(makeResourceName(libFilename)) != null) { + return libFilename; + } + // liftensorflow_framework.dylib not found, try libtensorflow_framework.so + final String suffix = ".dylib"; + if (!libFilename.endsWith(suffix)) { + return libFilename; + } + return libFilename.substring(0, libFilename.length() - suffix.length()) + ".so"; + } + private static String extractResource( InputStream resource, String resourceName, String extractToDirectory) throws IOException { - final File dst = new File(extractToDirectory, System.mapLibraryName(resourceName)); + final File dst = new File(extractToDirectory, resourceName); dst.deleteOnExit(); final String dstPath = dst.toString(); log("extracting native library to: " + dstPath); @@ -157,9 +180,7 @@ final class NativeLibrary { } private static String makeResourceName(String baseName) { - return "org/tensorflow/native/" - + String.format("%s-%s/", os(), architecture()) - + System.mapLibraryName(baseName); + return "org/tensorflow/native/" + String.format("%s-%s/", os(), architecture()) + baseName; } private static long copy(InputStream src, File dstFile) throws IOException { -- GitLab From 8d46b72fdcf675245addb006aadcf358ddf7dd7d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 02:48:04 -0800 Subject: [PATCH 0144/1801] Correct comment in K-FAC's layer_collection PiperOrigin-RevId: 175275184 --- tensorflow/contrib/kfac/python/ops/layer_collection.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py index 4eabb59b3e..7300a7998c 100644 --- a/tensorflow/contrib/kfac/python/ops/layer_collection.py +++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py @@ -448,10 +448,10 @@ class LayerCollection(object): tf.get_variable_scope().reuse. Raises: - ValueError: If reuse=True and name != None. - ValueError: If reuse=True and seed != None. - KeyError: If reuse=True and no existing LossFunction with 'name' found. - KeyError: If reuse=False and existing LossFunction with 'name' found. + ValueError: If reuse == True and name == None. + ValueError: If reuse == True and seed != None. + KeyError: If reuse == True and no existing LossFunction with 'name' found. + KeyError: If reuse == False and existing LossFunction with 'name' found. """ name = name or self._graph.unique_name( "register_categorical_predictive_distribution") -- GitLab From 593dfb6a340ed5348f935f725285c659b574327c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 03:30:53 -0800 Subject: [PATCH 0145/1801] Extend the Array class with more functionality. PiperOrigin-RevId: 175277161 --- tensorflow/compiler/xla/BUILD | 1 + tensorflow/compiler/xla/array.h | 159 +++++++++++++++++- tensorflow/compiler/xla/array_test.cc | 45 +++++ .../compiler/xla/client/computation_builder.h | 1 + .../compiler/xla/service/hlo_instruction.h | 5 + .../compiler/xla/service/hlo_sharding.cc | 3 +- 6 files changed, 205 insertions(+), 9 deletions(-) diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD index f6e405744a..515b572b0e 100644 --- a/tensorflow/compiler/xla/BUILD +++ b/tensorflow/compiler/xla/BUILD @@ -340,6 +340,7 @@ cc_library( name = "array", hdrs = ["array.h"], deps = [ + ":status", ":types", "//tensorflow/core:lib", ], diff --git a/tensorflow/compiler/xla/array.h b/tensorflow/compiler/xla/array.h index ba898d1f4e..213e0bac6c 100644 --- a/tensorflow/compiler/xla/array.h +++ b/tensorflow/compiler/xla/array.h @@ -23,8 +23,10 @@ limitations under the License. #include #include #include +#include #include +#include "tensorflow/compiler/xla/status.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/lib/core/bits.h" #include "tensorflow/core/lib/strings/str_util.h" @@ -35,10 +37,63 @@ limitations under the License. namespace xla { +namespace array_impl { + +// conjunction +// +// Performs a compile-time logical AND operation on the passed types (which +// must have `::value` members convertible to `bool`. Short-circuits if it +// encounters any `false` members (and does not compare the `::value` members +// of any remaining arguments). +// +// This metafunction is designed to be a drop-in replacement for the C++17 +// `std::conjunction` metafunction. +template +struct conjunction; + +template +struct conjunction + : std::conditional, T>::type {}; + +template <> +struct conjunction<> : std::true_type {}; + +// A type trait that is valid when all elements in a parameter pack are of +// integral type. +template +using pack_is_integral = conjunction...>; + +// Compares three same-sized vectors elementwise. For each item in `values`, +// returns false if any of values[i] is outside the half-open range [starts[i], +// ends[i]). +template +bool all_inside_range(const C1& values, const C2& range_starts, + const C3& range_ends) { + for (size_t i = 0, e = values.size(); i < e; ++i) { + if (values[i] < range_starts[i] || values[i] >= range_ends[i]) { + return false; + } + } + return true; +} + +} // namespace array_impl + // General N dimensional array class with arbitrary value type. template class Array { public: + // Type inference can have a hard time parsing very deep initializer list + // nests, especially if one or more dimensions is one as the compiler just + // sees a single-element integer initializer. These typedefs allow casting + // explicitly with less typing. + using InitializerList1D = std::initializer_list; + using InitializerList2D = std::initializer_list; + using InitializerList3D = std::initializer_list; + using InitializerList4D = std::initializer_list; + + using value_type = T; + // Creates a new array with the specified dimensions. explicit Array(tensorflow::gtl::ArraySlice sizes) : Array(sizes, T()) {} @@ -53,7 +108,7 @@ class Array { // Creates a 2D array from the given nested initializer list. The outer // initializer list is the first dimension, the inner is the second dimension. // For example, {{1, 2, 3}, {4, 5, 6}} results in an array with n1=2 and n2=3. - Array(std::initializer_list> values) + Array(InitializerList2D values) : Array(ToInt64Vector({values.size(), values.begin()->size()})) { int64 idx = 0; for (const auto& it1 : values) { @@ -67,8 +122,7 @@ class Array { // Creates a 3D array from the given nested initializer list. The outer // initializer list is the first dimension, and so on. - Array(std::initializer_list>> - values) + Array(InitializerList3D values) : Array(ToInt64Vector({values.size(), values.begin()->size(), values.begin()->begin()->size()})) { int64 idx = 0; @@ -85,9 +139,7 @@ class Array { // Creates a 4D array from the given nested initializer list. The outer // initializer list is the first dimension, and so on. - Array(std::initializer_list< - std::initializer_list>>> - values) + Array(InitializerList4D values) : Array(ToInt64Vector({values.size(), values.begin()->size(), values.begin()->begin()->size(), values.begin()->begin()->begin()->size()})) { @@ -173,10 +225,46 @@ class Array { } } + // Invokes a callback with the (indices, value_ptr) for each cell in the + // array. If a callback returns a non-OK status, returns that else returns + // Status::OK(). + Status EachStatus( + std::function, T*)> f) { + std::vector index(sizes_.size()); + for (int64 i = 0; i < num_elements(); ++i, next_index(&index)) { + Status s = f(index, &values_[i]); + if (!s.ok()) { + return s; + } + } + return Status::OK(); + } + + // Invokes a callback with the (indices, value) for each cell in the array. + // If a callback returns a non-OK status, returns that else returns + // Status::OK(). + Status EachStatus( + std::function, T)> f) const { + std::vector index(sizes_.size()); + for (int64 i = 0; i < num_elements(); ++i, next_index(&index)) { + Status s = f(index, values_[i]); + if (!s.ok()) { + return s; + } + } + return Status::OK(); + } + // Returns the value at the cell specified by the indexes. The number of // arguments have to match with the number of dimensions for the array. + // + // The type trait is required to avoid this overload participating too + // eagerly; a parameter pack can take zero or more elements, so we must + // restrict this to only parameter packs that are all of integral type. template - const T& operator()(Dims... dims) const { + typename std::enable_if::value, + const T&>::type + operator()(Dims... dims) const { // We are using a std::array to avoid having to allocate memory in this // function for performance reasons. std::array indexes{{static_cast(dims)...}}; @@ -186,7 +274,9 @@ class Array { // Returns the value at the cell specified by the indexes. The number of // arguments have to match with the number of dimensions for the array. template - T& operator()(Dims... dims) { + typename std::enable_if::value, + T&>::type + operator()(Dims... dims) { // We are using a std::array to avoid having to allocate memory in this // function for performance reasons. std::array indexes{{static_cast(dims)...}}; @@ -255,6 +345,59 @@ class Array { bool operator!=(const Array& other) const { return !(*this == other); } + // Performs the equivalent of a slice operation on this array. + Array Slice(tensorflow::gtl::ArraySlice starts, + tensorflow::gtl::ArraySlice limits) const { + CHECK_EQ(starts.size(), num_dimensions()); + CHECK_EQ(limits.size(), num_dimensions()); + + std::vector sizes; + std::transform(starts.begin(), starts.end(), limits.begin(), + std::back_inserter(sizes), + [](int64 start, int64 limit) { return limit - start; }); + Array result(sizes); + + std::vector index(sizes_.size()); + int64 slice_i = 0; + for (int64 i = 0; i < num_elements(); ++i, next_index(&index)) { + if (array_impl::all_inside_range(index, starts, limits)) { + // Even though the bounds of result are different to our bounds, we're + // iterating in the same order. So we can simply write successive linear + // indices instead of recalculating a multi-dimensional index. + result.values_[slice_i++] = values_[i]; + } + } + return result; + } + + // Performs the equivalent of a DynamicUpdateSlice in-place on this array. + void UpdateSlice(const Array& from, + tensorflow::gtl::ArraySlice start_indices) { + CHECK_EQ(from.num_dimensions(), num_dimensions()); + std::vector limit_indices; + std::transform(start_indices.begin(), start_indices.end(), + from.dimensions().begin(), std::back_inserter(limit_indices), + std::plus{}); + std::vector index(sizes_.size()); + int64 from_i = 0; + for (int64 i = 0; i < num_elements(); ++i, next_index(&index)) { + if (array_impl::all_inside_range(index, start_indices, limit_indices)) { + // Even though the bounds of from are different to our bounds, we're + // iterating in the same order. So we can simply write successive linear + // indices instead of recalculating a multi-dimensional index. + values_[i] = from.values_[from_i++]; + } + } + } + + // Performs an in-place reshape, modifying the dimensions but not the + // underlying data. + void Reshape(tensorflow::gtl::ArraySlice new_dimensions) { + int64 old_num_elements = num_elements(); + sizes_ = std::vector(new_dimensions.begin(), new_dimensions.end()); + CHECK_EQ(num_elements(), old_num_elements); + } + // Returns a string representation of the array suitable for debugging. string ToString() const { std::vector pieces; diff --git a/tensorflow/compiler/xla/array_test.cc b/tensorflow/compiler/xla/array_test.cc index 093784f541..8b94194774 100644 --- a/tensorflow/compiler/xla/array_test.cc +++ b/tensorflow/compiler/xla/array_test.cc @@ -71,6 +71,19 @@ TEST(ArrayTest, IndexingReadWrite) { EXPECT_EQ(arr(1, 2), 61); } +TEST(ArrayTest, DynamicIndexingReadWrite) { + Array arr({2, 3}); + + std::vector index1 = {1, 1}; + std::vector index2 = {1, 2}; + EXPECT_EQ(arr(index1), 0); + EXPECT_EQ(arr(index2), 0); + arr(index1) = 51; + arr(index2) = 61; + EXPECT_EQ(arr(1, 1), 51); + EXPECT_EQ(arr(1, 2), 61); +} + TEST(ArrayTest, IndexingReadWriteBool) { Array arr{{false, true, false}, {false, true, false}}; @@ -141,5 +154,37 @@ TEST(ArrayTest, Each) { EXPECT_EQ(arr.num_elements() * (arr.num_elements() - 1) / 2, each_sum); } +TEST(ArrayTest, Slice) { + Array arr({2, 4}); + arr.FillWithMultiples(1); + + Array identity_slice = arr.Slice({0, 0}, {2, 4}); + EXPECT_EQ(identity_slice.dimensions(), arr.dimensions()); + for (auto it1 = arr.begin(), it2 = identity_slice.begin(), e = arr.end(); + it1 != e; ++it1, ++it2) { + EXPECT_EQ(*it1, *it2); + } + + Array sub_slice = arr.Slice({1, 0}, {2, 2}); + EXPECT_EQ(sub_slice.dimensions(), (std::vector{1, 2})); + const string expected = R"([[4, 5]])"; + EXPECT_EQ(expected, sub_slice.ToString()); +} + +TEST(ArrayTest, UpdateSlice) { + Array arr({3, 4}); + arr.FillWithMultiples(1); + + Array sub_arr({2, 2}); + sub_arr.FillWithMultiples(3); + + arr.UpdateSlice(sub_arr, {1, 1}); + + const string expected = R"([[0, 1, 2, 3], + [4, 0, 3, 7], + [8, 6, 9, 11]])"; + EXPECT_EQ(expected, arr.ToString()); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index 8e1b4be1f3..4c6e320557 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -68,6 +68,7 @@ class ShardingBuilder { const TileAssignment& tile_assignment) { OpSharding result; result.set_type(OpSharding::Type::OpSharding_Type_OTHER); + *result.mutable_tile_shape() = tile_shape; for (int64 dim : tile_assignment.dimensions()) { result.add_tile_assignment_dimensions(dim); } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 64a88164a7..d174f05aa6 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -863,6 +863,11 @@ class HloInstruction { return *window_; } + // Sets the window data in a windowed operation such as convolution. + void set_window(const Window& window) { + window_ = MakeUnique(window); + } + // Returns the padding configuration for a pad node. // // Precondition: opcode() == HloOpcode::kPad diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc index bc5663513b..7356663454 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding.cc @@ -249,7 +249,8 @@ Status HloSharding::Validate(const Shape& shape, int64 num_devices) const { return HloSharding(tuple_shardings); } else if (proto.type() == OpSharding::Type::OpSharding_Type_REPLICATED) { return Replicate(); - } else if (proto.type() == OpSharding::Type::OpSharding_Type_MAXIMAL) { + } else if (proto.type() == OpSharding::Type::OpSharding_Type_MAXIMAL || + proto.tile_assignment_devices().size() == 1) { return HloSharding(proto.tile_assignment_devices(0)); } // Some versions of gcc cannot infer the TileAssignment constructor from a -- GitLab From e951547c9ba5f883f3be9bd9b1a79ccc85b29629 Mon Sep 17 00:00:00 2001 From: Paul Van Eck Date: Fri, 10 Nov 2017 05:53:27 -0800 Subject: [PATCH 0146/1801] Add curses install note for Windows in tfdbg docs (#14343) * Add curses install note for Windows in tfdbg docs * Adjust Windows note with warning --- .../docs_src/programmers_guide/debugger.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md index a1496d26a9..dd5496b08e 100644 --- a/tensorflow/docs_src/programmers_guide/debugger.md +++ b/tensorflow/docs_src/programmers_guide/debugger.md @@ -9,11 +9,19 @@ lets you view the internal structure and states of running TensorFlow graphs during training and inference, which is difficult to debug with general-purpose debuggers such as Python's `pdb` due to TensorFlow's computation-graph paradigm. -> NOTE: The system requirements of tfdbg on supported external platforms include -> the following. On Mac OS X, the `ncurses` library is required. It can be -> installed with `brew install homebrew/dupes/ncurses`. On Windows, `pyreadline` -> is required. If you use Anaconda3, you can install it with a command +> NOTE: TensorFlow debugger uses a +> [curses](https://en.wikipedia.org/wiki/Curses_\(programming_library\))-based +> text user interface. On Mac OS X, the `ncurses` library is required and can +> be installed with `brew install homebrew/dupes/ncurses`. On Windows, curses +> isn't as well supported, so a +> [readline](https://en.wikipedia.org/wiki/GNU_Readline)-based interface can +> be used with tfdbg by installing `pyreadline` with pip. +> If you use Anaconda3, you can install it with a command > such as `"C:\Program Files\Anaconda3\Scripts\pip.exe" install pyreadline`. +> Unofficial Windows curses packages can be downloaded +> [here](https://www.lfd.uci.edu/~gohlke/pythonlibs/#curses), then subsequently +> installed using `pip install .whl`, however curses on Windows +> may not work as reliably as curses on Linux or Mac. This tutorial demonstrates how to use the **tfdbg** command-line interface (CLI) to debug the appearance of [`nan`s](https://en.wikipedia.org/wiki/NaN) -- GitLab From 8cf98b7d0e9118dc45f06a5fed9bfc62b2a86c44 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Fri, 10 Nov 2017 08:19:04 -0800 Subject: [PATCH 0147/1801] Add "no_pip" to contrib/data/python/kernel_tests Add "no_pip" to tests under contrib/data/python/kernel_tests that depend on tensorflow.contrib.data.python.kernel_tests --- tensorflow/contrib/data/python/kernel_tests/BUILD | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index d811683ecd..241fc2ab4f 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -11,6 +11,7 @@ py_test( size = "small", srcs = ["batch_dataset_op_test.py"], srcs_version = "PY2AND3", + tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", @@ -364,6 +365,7 @@ py_test( size = "small", srcs = ["sequence_dataset_op_test.py"], srcs_version = "PY2AND3", + tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", @@ -428,6 +430,7 @@ py_test( size = "small", srcs = ["zip_dataset_op_test.py"], srcs_version = "PY2AND3", + tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", -- GitLab From d0a5d885d61b837018cb931a4d577289acc826fc Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Fri, 10 Nov 2017 12:26:11 -0800 Subject: [PATCH 0148/1801] Revert "Branch 175277161" --- CODE_OF_CONDUCT.md | 2 +- README.md | 4 +- configure.py | 47 +- tensorflow/BUILD | 124 +- tensorflow/c/c_api.cc | 4 +- tensorflow/c/c_api_test.cc | 4 +- tensorflow/c/eager/BUILD | 2 +- tensorflow/c/eager/tape.cc | 102 + tensorflow/c/eager/tape.h | 501 +--- tensorflow/compiler/aot/tfcompile.bzl | 15 +- .../compiler/jit/kernels/xla_launch_op.cc | 1 + .../compiler/jit/xla_compilation_cache.cc | 3 + tensorflow/compiler/tests/BUILD | 2 +- .../compiler/tests/fused_batchnorm_test.py | 25 +- tensorflow/compiler/tf2xla/type_util.cc | 3 - tensorflow/compiler/tf2xla/xla_compiler.h | 6 + tensorflow/compiler/xla/BUILD | 2 - tensorflow/compiler/xla/array.h | 159 +- tensorflow/compiler/xla/array_test.cc | 45 - tensorflow/compiler/xla/client/client.cc | 3 +- .../compiler/xla/client/computation_builder.h | 1 - tensorflow/compiler/xla/client/lib/BUILD | 1 - tensorflow/compiler/xla/client/lib/testing.cc | 57 +- tensorflow/compiler/xla/client/lib/testing.h | 4 + .../compiler/xla/client/local_client.cc | 20 + tensorflow/compiler/xla/client/local_client.h | 16 + tensorflow/compiler/xla/literal_util.cc | 121 +- tensorflow/compiler/xla/literal_util.h | 25 +- tensorflow/compiler/xla/literal_util_test.cc | 62 - tensorflow/compiler/xla/primitive_util.cc | 8 +- tensorflow/compiler/xla/primitive_util.h | 7 - tensorflow/compiler/xla/service/BUILD | 4 +- tensorflow/compiler/xla/service/backend.cc | 4 +- .../compiler/xla/service/buffer_assignment.cc | 21 +- tensorflow/compiler/xla/service/cpu/BUILD | 4 - .../compiler/xla/service/cpu/cpu_options.cc | 16 - .../compiler/xla/service/cpu/cpu_options.h | 2 - .../xla/service/cpu/cpu_runtime_test.cc | 4 +- .../xla/service/cpu/dot_op_emitter.cc | 569 +--- .../compiler/xla/service/cpu/dot_op_emitter.h | 37 - .../xla/service/cpu/ir_emission_utils.cc | 17 +- .../xla/service/cpu/ir_emission_utils.h | 11 +- .../compiler/xla/service/cpu/ir_emitter.cc | 10 - .../compiler/xla/service/cpu/ir_emitter.h | 2 - .../xla/service/cpu/layout_assignment.cc | 4 +- .../compiler/xla/service/dfs_hlo_visitor.h | 6 +- .../service/dfs_hlo_visitor_with_default.h | 10 +- .../xla/service/gpu/convolution_thunk.cc | 22 +- .../compiler/xla/service/gpu/gpu_compiler.cc | 18 +- .../compiler/xla/service/gpu/ir_emitter.cc | 8 - .../compiler/xla/service/gpu/ir_emitter.h | 2 - .../gpu/llvm_gpu_backend/gpu_backend_lib.cc | 4 - .../compiler/xla/service/hlo_cost_analysis.cc | 8 - .../compiler/xla/service/hlo_cost_analysis.h | 2 - .../compiler/xla/service/hlo_cse_test.cc | 24 +- .../xla/service/hlo_dataflow_analysis.cc | 65 - .../xla/service/hlo_dataflow_analysis.h | 2 - .../xla/service/hlo_dataflow_analysis_test.cc | 48 - .../compiler/xla/service/hlo_evaluator.cc | 4 - .../compiler/xla/service/hlo_evaluator.h | 15 +- .../compiler/xla/service/hlo_graph_dumper.cc | 26 +- .../compiler/xla/service/hlo_instruction.cc | 97 +- .../compiler/xla/service/hlo_instruction.h | 31 +- .../xla/service/hlo_instruction_test.cc | 4 +- .../compiler/xla/service/hlo_matchers.h | 2 - tensorflow/compiler/xla/service/hlo_module.h | 6 +- .../compiler/xla/service/hlo_module_config.cc | 4 +- .../compiler/xla/service/hlo_module_config.h | 10 + tensorflow/compiler/xla/service/hlo_opcode.h | 2 - .../xla/service/hlo_rematerialization.cc | 2 - tensorflow/compiler/xla/service/hlo_runner.cc | 3 +- .../compiler/xla/service/hlo_sharding.cc | 74 +- .../compiler/xla/service/hlo_sharding.h | 83 +- .../compiler/xla/service/hlo_sharding_test.cc | 68 - .../compiler/xla/service/hlo_verifier.cc | 49 +- .../xla/service/instruction_fusion.cc | 2 - .../xla/service/interpreter/executable.cc | 2 +- .../xla/service/layout_assignment_test.cc | 32 +- tensorflow/compiler/xla/service/llvm_ir/BUILD | 24 - .../service/llvm_ir/kernel_support_library.cc | 65 - .../service/llvm_ir/kernel_support_library.h | 128 - .../compiler/xla/service/llvm_ir/llvm_loop.cc | 68 +- .../compiler/xla/service/llvm_ir/llvm_loop.h | 29 +- .../compiler/xla/service/llvm_ir/llvm_util.cc | 8 - .../compiler/xla/service/llvm_ir/llvm_util.h | 2 - .../service/llvm_ir/vector_support_library.cc | 150 - .../service/llvm_ir/vector_support_library.h | 174 -- .../compiler/xla/service/local_service.cc | 20 + .../xla/service/logical_buffer_analysis.cc | 15 - .../xla/service/logical_buffer_analysis.h | 2 - tensorflow/compiler/xla/service/service.h | 2 + .../compiler/xla/service/shape_inference.cc | 14 +- .../xla/service/tuple_points_to_analysis.cc | 58 - .../xla/service/tuple_points_to_analysis.h | 2 - .../service/tuple_points_to_analysis_test.cc | 45 - .../compiler/xla/service/user_computation.cc | 6 +- .../xla/service/while_loop_simplifier.cc | 4 +- .../xla/service/while_loop_simplifier_test.cc | 6 +- tensorflow/compiler/xla/shape_tree.h | 3 - tensorflow/compiler/xla/shape_util.cc | 1 - tensorflow/compiler/xla/tests/BUILD | 24 +- .../xla/tests/client_library_test_base.cc | 54 - .../xla/tests/client_library_test_base.h | 23 +- tensorflow/compiler/xla/tests/client_test.cc | 4 +- .../xla/tests/compilation_cache_test.cc | 8 +- .../xla/tests/compute_constant_test.cc | 4 +- .../compiler/xla/tests/convolution_test.cc | 160 +- .../compiler/xla/tests/dot_operation_test.cc | 97 +- .../compiler/xla/tests/literal_test_util.cc | 13 +- .../compiler/xla/tests/llvm_compiler_test.cc | 143 +- .../xla/tests/local_client_execute_test.cc | 10 +- .../xla/tests/local_client_test_base.cc | 3 +- tensorflow/compiler/xla/tests/map_test.cc | 8 +- tensorflow/compiler/xla/tests/reshape_test.cc | 5 +- tensorflow/compiler/xla/tests/test_utils.cc | 120 - tensorflow/compiler/xla/tests/test_utils.h | 64 +- tensorflow/compiler/xla/tools/BUILD | 1 - .../compiler/xla/tools/parser/README.md | 16 +- .../compiler/xla/tools/parser/hlo_lexer.cc | 66 +- .../compiler/xla/tools/parser/hlo_lexer.h | 6 +- .../compiler/xla/tools/parser/hlo_parser.cc | 1017 +------ .../xla/tools/parser/hlo_parser_test.cc | 432 +-- .../compiler/xla/tools/parser/hlo_token.h | 3 - .../compiler/xla/tools/replay_computation.cc | 1 - tensorflow/compiler/xla/types.h | 3 - tensorflow/compiler/xla/window_util.cc | 28 +- tensorflow/compiler/xla/xla_data.proto | 26 +- tensorflow/contrib/batching/BUILD | 1 + .../contrib/batching/kernels/batch_kernels.cc | 2 +- .../kernel_tests/csiszar_divergence_test.py | 2 +- .../lib/quantiles/weighted_quantiles_buffer.h | 10 +- tensorflow/contrib/cmake/CMakeLists.txt | 147 +- .../contrib/cmake/external/boringssl.cmake | 6 +- .../contrib/cmake/external/jsoncpp.cmake | 6 +- tensorflow/contrib/cmake/external/lmdb.cmake | 6 +- tensorflow/contrib/cmake/external/png.cmake | 6 +- .../contrib/cmake/external/protobuf.cmake | 6 +- tensorflow/contrib/cmake/external/re2.cmake | 8 +- .../contrib/cmake/external/snappy.cmake | 8 +- .../contrib/cmake/external/sqlite.cmake | 6 +- tensorflow/contrib/cmake/external/zlib.cmake | 6 +- tensorflow/contrib/cmake/tf_c.cmake | 1 + tensorflow/contrib/cmake/tf_cc_ops.cmake | 36 +- .../contrib/cmake/tf_core_framework.cmake | 3 - .../contrib/cmake/tf_core_kernels.cmake | 24 +- tensorflow/contrib/cmake/tf_core_ops.cmake | 1 + .../cmake/tf_label_image_example.cmake | 5 + tensorflow/contrib/cmake/tf_python.cmake | 40 +- tensorflow/contrib/cmake/tf_shared_lib.cmake | 45 +- .../contrib/cmake/tf_stream_executor.cmake | 3 + tensorflow/contrib/cmake/tf_tools.cmake | 13 +- tensorflow/contrib/cmake/tf_tutorials.cmake | 5 + tensorflow/contrib/crf/python/ops/crf.py | 21 +- tensorflow/contrib/data/BUILD | 13 +- tensorflow/contrib/data/__init__.py | 4 +- tensorflow/contrib/data/ops/dataset_ops.cc | 232 ++ .../contrib/data/python/kernel_tests/BUILD | 9 +- .../kernel_tests/batch_dataset_op_test.py | 225 +- .../dataset_serialization_test_base.py | 77 +- .../python/kernel_tests/iterator_ops_test.py | 2 +- .../kernel_tests/range_dataset_op_test.py | 2 +- .../kernel_tests/reader_dataset_ops_test.py | 2 +- .../kernel_tests/sequence_dataset_op_test.py | 78 - .../kernel_tests/zip_dataset_op_test.py | 27 - tensorflow/contrib/data/python/ops/BUILD | 40 +- .../contrib/data/python/ops/batching.py | 89 +- .../contrib/data/python/ops/dataset_ops.py | 8 +- .../contrib/data/python/ops/error_ops.py | 2 +- .../contrib/data/python/ops/grouping.py | 2 +- .../contrib/data/python/ops/interleave_ops.py | 2 +- .../contrib/data/python/ops/iterator_ops.py | 2 +- tensorflow/contrib/data/python/ops/readers.py | 2 +- .../contrib/data/python/ops/scan_ops.py | 2 +- tensorflow/contrib/distributions/BUILD | 17 + tensorflow/contrib/distributions/__init__.py | 2 + .../python/kernel_tests/cauchy_test.py | 437 +++ .../distributions/python/ops/cauchy.py | 223 ++ tensorflow/contrib/eager/README.md | 2 +- .../python/examples/notebooks/1_basics.ipynb | 4 +- .../examples/notebooks/2_gradients.ipynb | 6 +- .../examples/notebooks/3_datasets.ipynb | 10 +- tensorflow/contrib/eager/python/network.py | 63 +- .../contrib/eager/python/network_test.py | 108 +- tensorflow/contrib/estimator/BUILD | 5 +- .../estimator/python/estimator/head.py | 143 +- .../estimator/python/estimator/head_test.py | 206 +- .../estimator/python/estimator/multi_head.py | 67 +- .../python/estimator/multi_head_test.py | 188 +- .../python/estimator/replicate_model_fn.py | 43 +- .../estimator/replicate_model_fn_test.py | 96 +- .../estimator/python/gan_estimator_impl.py | 2 +- .../kfac/python/ops/layer_collection.py | 8 +- .../contrib/layers/python/layers/layers.py | 18 +- .../layers/python/layers/layers_test.py | 73 +- .../learn/python/learn/estimators/head.py | 2 +- .../learn/python/learn/estimators/model_fn.py | 6 +- .../python/learn/learn_io/data_feeder.py | 12 +- .../linear_optimizer/python/ops/sdca_ops.py | 11 +- tensorflow/contrib/makefile/Makefile | 3 +- tensorflow/contrib/makefile/README.md | 41 +- tensorflow/contrib/makefile/build_all_ios.sh | 54 +- .../contrib/makefile/compile_ios_protobuf.sh | 369 +-- .../makefile/compile_ios_tensorflow.sh | 155 +- tensorflow/contrib/makefile/compile_nsync.sh | 5 +- tensorflow/contrib/makefile/tf_op_files.txt | 18 - tensorflow/contrib/metrics/__init__.py | 2 - .../contrib/metrics/python/ops/metric_ops.py | 149 - .../metrics/python/ops/metric_ops_test.py | 262 +- tensorflow/contrib/nccl/BUILD | 4 +- .../contrib/nccl/python/ops/nccl_ops_test.py | 7 +- tensorflow/contrib/nn/__init__.py | 2 + tensorflow/contrib/opt/BUILD | 18 + tensorflow/contrib/opt/__init__.py | 5 +- .../training/multitask_optimizer_wrapper.py | 138 + .../multitask_optimizer_wrapper_test.py | 119 + tensorflow/contrib/quantize/BUILD | 18 +- .../contrib/quantize/python/quant_ops.py | 57 +- .../contrib/quantize/python/quant_ops_test.py | 87 - .../contrib/quantize/python/quantize.py | 6 +- .../python/quantize_parameterized_test.py | 65 +- .../contrib/quantize/python/quantize_test.py | 25 - .../python/kernel_tests/core_rnn_cell_test.py | 42 + .../rnn/python/kernel_tests/rnn_cell_test.py | 44 + tensorflow/contrib/rnn/python/ops/rnn_cell.py | 344 ++- .../kernel_tests/attention_wrapper_test.py | 37 - .../seq2seq/python/ops/attention_wrapper.py | 57 +- tensorflow/contrib/slim/BUILD | 2 - tensorflow/contrib/slim/README.md | 2 +- .../contrib/slim/python/slim/evaluation.py | 15 +- .../slim/python/slim/evaluation_test.py | 46 +- .../slim/python/slim/nets/resnet_v1_test.py | 2 +- tensorflow/contrib/summary/BUILD | 6 - tensorflow/contrib/summary/summary.py | 2 - tensorflow/contrib/summary/summary_ops.py | 166 +- .../contrib/summary/summary_ops_test.py | 122 - tensorflow/contrib/tensorboard/db/BUILD | 2 - .../tensorboard/db/summary_db_writer.cc | 34 +- .../tensorboard/db/summary_db_writer_test.cc | 56 +- .../contrib/tpu/python/tpu/tpu_config.py | 14 +- .../contrib/tpu/python/tpu/tpu_estimator.py | 144 +- .../training/python/training/hparam.py | 58 +- .../training/python/training/hparam_test.py | 31 +- tensorflow/contrib/verbs/README.md | 14 +- tensorflow/contrib/verbs/rdma.cc | 413 ++- tensorflow/contrib/verbs/rdma.h | 40 +- tensorflow/core/BUILD | 1 + .../core/common_runtime/bfc_allocator.cc | 13 +- .../core/common_runtime/mkl_cpu_allocator.h | 2 +- .../core/common_runtime/sycl/sycl_device.h | 22 +- tensorflow/core/framework/bfloat16.cc | 30 +- tensorflow/core/framework/bfloat16_test.cc | 92 - tensorflow/core/framework/numeric_types.h | 251 +- tensorflow/core/framework/register_types.h | 5 +- tensorflow/core/graph/graph.cc | 15 + tensorflow/core/graph/graph.h | 5 + tensorflow/core/graph/graph_constructor.cc | 10 +- tensorflow/core/graph/graph_constructor.h | 3 - .../core/graph/graph_constructor_test.cc | 15 - tensorflow/core/graph/graph_partition.cc | 4 +- tensorflow/core/graph/graph_test.cc | 64 +- tensorflow/core/graph/mkl_graph_util.h | 179 +- tensorflow/core/graph/mkl_layout_pass.cc | 2 +- .../core/graph/mkl_tfconversion_pass.cc | 4 +- .../core/grappler/costs/graph_properties.cc | 113 +- .../core/grappler/costs/graph_properties.h | 6 + .../grappler/costs/graph_properties_test.cc | 32 +- tensorflow/core/grappler/optimizers/BUILD | 1 - .../optimizers/arithmetic_optimizer.cc | 34 +- .../optimizers/arithmetic_optimizer_test.cc | 94 +- .../grappler/optimizers/constant_folding.cc | 172 +- .../grappler/optimizers/constant_folding.h | 6 +- .../optimizers/constant_folding_test.cc | 80 - .../grappler/optimizers/meta_optimizer.cc | 4 +- tensorflow/core/grappler/utils.cc | 3 +- tensorflow/core/kernels/BUILD | 59 +- tensorflow/core/kernels/avgpooling_op.cc | 7 +- tensorflow/core/kernels/batch_dataset_op.cc | 21 +- tensorflow/core/kernels/bincount_op.cc | 115 +- tensorflow/core/kernels/bincount_op.h | 41 + tensorflow/core/kernels/bincount_op_gpu.cu.cc | 114 + tensorflow/core/kernels/bincount_op_test.cc | 75 + tensorflow/core/kernels/bucketize_op.cc | 66 +- tensorflow/core/kernels/bucketize_op.h | 41 + .../core/kernels/bucketize_op_gpu.cu.cc | 101 + tensorflow/core/kernels/concat_lib_cpu.cc | 9 +- .../core/kernels/concatenate_dataset_op.cc | 8 +- tensorflow/core/kernels/conv_grad_ops_3d.cc | 42 +- tensorflow/core/kernels/conv_ops_3d.cc | 5 + tensorflow/core/kernels/cwise_op_acosh.cc | 12 +- tensorflow/core/kernels/cwise_op_asinh.cc | 14 +- tensorflow/core/kernels/cwise_op_atanh.cc | 14 +- tensorflow/core/kernels/cwise_ops.h | 12 + tensorflow/core/kernels/dataset.cc | 1 + tensorflow/core/kernels/dataset.h | 23 +- .../core/kernels/depthwise_conv_grad_op.cc | 10 +- tensorflow/core/kernels/depthwise_conv_op.cc | 10 +- tensorflow/core/kernels/depthwise_conv_op.h | 4 +- .../core/kernels/depthwise_conv_op_gpu.cu.cc | 19 +- .../kernels/dynamic_partition_op_gpu.cu.cc | 376 +++ .../core/kernels/dynamic_partition_op_test.cc | 58 + .../core/kernels/fake_quant_ops_functor.h | 15 +- .../core/kernels/fused_batch_norm_op.cc | 70 +- tensorflow/core/kernels/fused_batch_norm_op.h | 22 +- tensorflow/core/kernels/lmdb_reader_op.cc | 7 +- tensorflow/core/kernels/maxpooling_op.cc | 47 +- .../core/kernels/maxpooling_op_gpu.cu.cc | 40 +- tensorflow/core/kernels/maxpooling_op_gpu.h | 2 +- .../core/kernels/mkl_conv_grad_filter_ops.cc | 78 +- .../core/kernels/mkl_conv_grad_input_ops.cc | 86 +- tensorflow/core/kernels/mkl_conv_ops.cc | 82 +- tensorflow/core/kernels/mkl_conv_ops.h | 140 +- tensorflow/core/kernels/mkl_tfconv_op.cc | 124 - tensorflow/core/kernels/mkl_tfconv_op.h | 80 +- tensorflow/core/kernels/pooling_ops_common.cc | 10 +- .../core/kernels/pooling_ops_common_gpu.h | 4 +- tensorflow/core/kernels/quantized_add_op.cc | 2 +- tensorflow/core/kernels/random_op.cc | 4 +- tensorflow/core/kernels/range_dataset_op.cc | 1 + tensorflow/core/kernels/reader_dataset_ops.cc | 1 + tensorflow/core/kernels/repeat_dataset_op.cc | 50 +- .../core/kernels/segment_reduction_ops.cc | 3 + .../core/kernels/segment_reduction_ops.h | 36 +- tensorflow/core/kernels/shape_ops.cc | 43 +- tensorflow/core/kernels/shape_ops.h | 13 +- tensorflow/core/kernels/shuffle_dataset_op.cc | 31 +- tensorflow/core/kernels/skip_dataset_op.cc | 63 +- tensorflow/core/kernels/slice_op.cc | 116 +- tensorflow/core/kernels/slice_op.h | 109 +- tensorflow/core/kernels/slice_op_gpu.cu.cc | 56 + tensorflow/core/kernels/strided_slice_op.cc | 1 - .../core/kernels/strided_slice_op_impl.h | 25 +- .../core/kernels/strided_slice_op_test.cc | 49 + tensorflow/core/kernels/summary_interface.cc | 4 +- tensorflow/core/kernels/summary_kernels.cc | 50 - tensorflow/core/kernels/take_dataset_op.cc | 59 +- tensorflow/core/kernels/transpose_op.cc | 35 +- tensorflow/core/kernels/unique_op.cc | 113 +- tensorflow/core/kernels/zip_dataset_op.cc | 63 +- tensorflow/core/ops/array_ops.cc | 44 +- .../core/ops/compat/ops_history.v1.pbtxt | 498 ---- tensorflow/core/ops/dataset_ops.cc | 197 -- tensorflow/core/ops/logging_ops.cc | 2 +- tensorflow/core/ops/math_ops.cc | 2 + tensorflow/core/ops/nn_ops.cc | 12 +- tensorflow/core/ops/ops.pbtxt | 368 +-- tensorflow/core/ops/summary_ops.cc | 41 - .../core/platform/default/build_config.bzl | 21 +- .../core/platform/default/build_config/BUILD | 20 +- .../core/platform/default/notification.h | 2 +- tensorflow/core/platform/posix/error.cc | 11 +- tensorflow/core/platform/posix/port.cc | 6 +- .../core/platform/vmodule_benchmark_test.cc | 28 - tensorflow/core/platform/vmodule_test.cc | 117 - tensorflow/core/public/version.h | 2 +- tensorflow/core/util/bcast.cc | 2 + tensorflow/core/util/device_name_utils.cc | 1 + tensorflow/core/util/mkl_util.h | 691 ++++- tensorflow/core/util/mkl_util_test.cc | 92 + .../api_guides/python/threading_and_queues.md | 2 +- .../docs_src/get_started/get_started.md | 6 +- tensorflow/docs_src/get_started/input_fn.md | 6 +- tensorflow/docs_src/get_started/monitors.md | 406 --- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 18 +- tensorflow/docs_src/install/install_linux.md | 22 +- tensorflow/docs_src/install/install_mac.md | 10 +- .../docs_src/install/install_sources.md | 19 +- tensorflow/docs_src/mobile/index.md | 4 +- tensorflow/docs_src/mobile/prepare_models.md | 2 +- .../performance/xla/operation_semantics.md | 89 - .../docs_src/programmers_guide/debugger.md | 29 +- .../docs_src/programmers_guide/tensors.md | 12 +- tensorflow/docs_src/tutorials/deep_cnn.md | 14 +- tensorflow/docs_src/tutorials/word2vec.md | 10 +- .../examples/image_retraining/retrain.py | 82 +- .../examples/image_retraining/retrain_test.py | 23 +- tensorflow/examples/learn/iris.py | 5 +- .../examples/learn/wide_n_deep_tutorial.py | 5 +- tensorflow/examples/speech_commands/models.py | 2 +- tensorflow/go/android.go | 6 + tensorflow/go/op/wrappers.go | 467 +--- tensorflow/go/operation_test.go | 8 + tensorflow/go/tensor.go | 9 +- tensorflow/go/tensor_test.go | 9 +- .../java/org/tensorflow/NativeLibrary.java | 43 +- .../src/main/java/org/tensorflow/Shape.java | 32 + .../java/org/tensorflow/types/TFBool.java | 30 - .../java/org/tensorflow/types/TFDouble.java | 30 - .../java/org/tensorflow/types/TFFloat.java | 30 - .../java/org/tensorflow/types/TFInt32.java | 30 - .../java/org/tensorflow/types/TFInt64.java | 30 - .../java/org/tensorflow/types/TFString.java | 27 - .../java/org/tensorflow/types/TFType.java | 20 - .../java/org/tensorflow/types/TFUInt8.java | 30 - .../main/java/org/tensorflow/types/Types.java | 52 - .../test/java/org/tensorflow/ShapeTest.java | 26 + .../client/session_clusterspec_prop_test.py | 2 +- tensorflow/python/client/tf_session.i | 10 + tensorflow/python/client/timeline.py | 2 +- .../debug/wrappers/dumping_wrapper_test.py | 18 +- tensorflow/python/eager/BUILD | 7 +- tensorflow/python/eager/backprop.py | 16 +- tensorflow/python/eager/backprop_test.py | 57 +- tensorflow/python/eager/benchmarks_test.py | 3 +- tensorflow/python/eager/execute.py | 3 +- tensorflow/python/eager/function_test.py | 10 +- tensorflow/python/eager/graph_callable.py | 13 +- tensorflow/python/eager/imperative_grad.py | 196 +- tensorflow/python/eager/pywrap_tensor.cc | 8 +- tensorflow/python/eager/pywrap_tensor.h | 25 - tensorflow/python/eager/pywrap_tfe.h | 13 +- tensorflow/python/eager/pywrap_tfe_src.cc | 334 +-- tensorflow/python/eager/tape.py | 12 +- tensorflow/python/eager/tape_test.py | 20 + tensorflow/python/estimator/BUILD | 63 - .../python/estimator/canned/baseline.py | 349 --- .../python/estimator/canned/baseline_test.py | 1545 ----------- tensorflow/python/estimator/canned/head.py | 133 +- .../python/estimator/canned/head_test.py | 10 +- tensorflow/python/estimator/estimator.py | 2 +- tensorflow/python/estimator/estimator_lib.py | 4 - tensorflow/python/estimator/estimator_test.py | 66 - .../python/estimator/inputs/numpy_io.py | 83 +- .../python/estimator/inputs/numpy_io_test.py | 87 + tensorflow/python/framework/function.py | 2 +- tensorflow/python/framework/function_test.py | 2 +- tensorflow/python/framework/ops.py | 80 +- tensorflow/python/framework/ops_test.py | 87 +- tensorflow/python/framework/tensor_util.py | 1 + tensorflow/python/framework/test_ops.cc | 23 - tensorflow/python/framework/test_util.py | 3 +- tensorflow/python/grappler/model_analyzer.cc | 9 +- tensorflow/python/keras/BUILD | 12 - .../keras/_impl/keras/engine/topology.py | 9 +- .../keras/_impl/keras/integration_test.py | 2 +- .../keras/_impl/keras/layers/gru_test.py | 12 +- .../keras/_impl/keras/layers/lstm_test.py | 11 +- .../keras/_impl/keras/layers/recurrent.py | 2417 ++++------------- .../_impl/keras/layers/recurrent_test.py | 378 --- .../_impl/keras/layers/simplernn_test.py | 12 +- tensorflow/python/keras/layers/__init__.py | 5 - tensorflow/python/kernel_tests/BUILD | 1 - .../python/kernel_tests/array_ops_test.py | 52 +- .../python/kernel_tests/bincount_op_test.py | 25 +- .../python/kernel_tests/bucketize_op_test.py | 8 +- .../python/kernel_tests/check_ops_test.py | 311 +-- .../python/kernel_tests/constant_op_test.py | 14 +- tensorflow/python/kernel_tests/conv1d_test.py | 43 + .../python/kernel_tests/conv_ops_3d_test.py | 267 +- .../kernel_tests/depthwise_conv_op_test.py | 20 +- .../python/kernel_tests/distributions/BUILD | 1 + .../distributions/multinomial_test.py | 20 +- .../kernel_tests/dynamic_partition_op_test.py | 106 +- .../python/kernel_tests/gather_nd_op_test.py | 10 +- .../python/kernel_tests/iterator_ops_test.py | 62 - .../python/kernel_tests/pooling_ops_test.py | 60 +- .../kernel_tests/range_dataset_op_test.py | 330 --- .../kernel_tests/reader_dataset_ops_test.py | 298 -- .../python/kernel_tests/reader_ops_test.py | 41 + .../segment_reduction_ops_test.py | 29 +- .../python/kernel_tests/shape_ops_test.py | 10 + .../python/kernel_tests/slice_op_test.py | 25 +- .../python/kernel_tests/unique_op_test.py | 26 + .../kernel_tests/variable_scope_test.py | 12 - .../python/kernel_tests/xent_op_test.py | 18 - tensorflow/python/layers/base.py | 25 +- tensorflow/python/layers/base_test.py | 9 +- tensorflow/python/layers/convolutional.py | 2 + tensorflow/python/layers/normalization.py | 22 +- .../python/layers/normalization_test.py | 98 +- tensorflow/python/ops/array_grad.py | 6 +- tensorflow/python/ops/array_ops.py | 56 +- tensorflow/python/ops/check_ops.py | 79 +- tensorflow/python/ops/control_flow_ops.py | 41 +- tensorflow/python/ops/ctc_ops.py | 30 +- .../python/ops/distributions/dirichlet.py | 2 +- .../python/ops/distributions/multinomial.py | 49 +- tensorflow/python/ops/embedding_ops.py | 9 +- tensorflow/python/ops/image_ops_impl.py | 23 +- tensorflow/python/ops/linalg_ops.py | 31 +- tensorflow/python/ops/math_grad_test.py | 17 + tensorflow/python/ops/math_ops.py | 220 +- tensorflow/python/ops/metrics_impl.py | 2 +- tensorflow/python/ops/nn.py | 1 - .../python/ops/nn_fused_batchnorm_test.py | 119 +- tensorflow/python/ops/nn_grad.py | 5 +- tensorflow/python/ops/nn_impl.py | 26 +- tensorflow/python/ops/nn_ops.py | 289 +- tensorflow/python/ops/variable_scope.py | 5 - tensorflow/python/ops/variables.py | 4 +- tensorflow/python/pywrap_tfe.i | 4 +- .../python/tools/import_pb_to_tensorboard.py | 0 tensorflow/python/tools/inspect_checkpoint.py | 23 +- .../python/training/monitored_session.py | 2 - tensorflow/python/util/tf_should_use.py | 2 +- tensorflow/stream_executor/cuda/cuda_dnn.cc | 13 +- tensorflow/stream_executor/dnn.cc | 16 +- tensorflow/stream_executor/dnn.h | 6 + tensorflow/tensorflow.bzl | 2 +- ...rflow.estimator.-baseline-classifier.pbtxt | 54 - ...orflow.estimator.-baseline-regressor.pbtxt | 54 - .../api/golden/tensorflow.estimator.pbtxt | 8 - .../tensorflow.keras.layers.-g-r-u-cell.pbtxt | 179 -- .../tensorflow.keras.layers.-g-r-u.pbtxt | 86 +- ...ensorflow.keras.layers.-l-s-t-m-cell.pbtxt | 179 -- .../tensorflow.keras.layers.-l-s-t-m.pbtxt | 90 +- .../tensorflow.keras.layers.-r-n-n.pbtxt | 191 -- ...flow.keras.layers.-simple-r-n-n-cell.pbtxt | 179 -- ...ensorflow.keras.layers.-simple-r-n-n.pbtxt | 78 +- ...ow.keras.layers.-stacked-r-n-n-cells.pbtxt | 183 -- .../api/golden/tensorflow.keras.layers.pbtxt | 20 - .../tools/api/golden/tensorflow.linalg.pbtxt | 2 +- .../tools/api/golden/tensorflow.nn.pbtxt | 10 +- tensorflow/tools/api/golden/tensorflow.pbtxt | 22 +- tensorflow/tools/ci_build/ci_sanity.sh | 3 +- .../tools/ci_build/install/install_golang.sh | 2 +- .../ci_build/linux/libtensorflow_docker.sh | 2 +- .../tools/ci_build/osx/libtensorflow_cpu.sh | 2 +- .../tools/ci_build/osx/libtensorflow_gpu.sh | 2 +- .../tools/ci_build/pi/build_raspberry_pi.sh | 6 + .../ci_build/windows/bazel/bazel_test_lib.sh | 4 +- tensorflow/tools/docker/Dockerfile.devel | 5 - .../docker/Dockerfile.devel-gpu-cuda9-cudnn7 | 7 +- tensorflow/tools/docker/Dockerfile.gpu | 2 +- tensorflow/tools/docker/README.md | 14 + tensorflow/tools/graph_transforms/BUILD | 2 + .../tools/graph_transforms/quantize_nodes.cc | 2 + tensorflow/tools/pip_package/setup.py | 2 +- tensorflow/workspace.bzl | 45 +- third_party/aws.BUILD | 3 + .../boringssl/add_boringssl_s390x.patch | 133 - third_party/curl.BUILD | 1 - third_party/nanopb.BUILD | 23 - third_party/sycl/crosstool/CROSSTOOL.tpl | 8 +- third_party/sycl/crosstool/trisycl.tpl | 73 + third_party/sycl/sycl/BUILD.tpl | 17 +- third_party/sycl/sycl/build_defs.bzl.tpl | 17 +- third_party/sycl/sycl_configure.bzl | 86 +- third_party/tflite_mobilenet.BUILD | 13 - third_party/zlib.BUILD | 2 +- tools/bazel.rc | 7 +- 542 files changed, 10708 insertions(+), 20624 deletions(-) create mode 100644 tensorflow/c/eager/tape.cc delete mode 100644 tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc delete mode 100644 tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h delete mode 100644 tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc delete mode 100644 tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h delete mode 100644 tensorflow/compiler/xla/tests/test_utils.cc create mode 100644 tensorflow/contrib/data/ops/dataset_ops.cc create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py create mode 100644 tensorflow/contrib/distributions/python/ops/cauchy.py create mode 100644 tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py create mode 100644 tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py delete mode 100644 tensorflow/contrib/quantize/python/quant_ops_test.py create mode 100644 tensorflow/core/kernels/bincount_op.h create mode 100644 tensorflow/core/kernels/bincount_op_gpu.cu.cc create mode 100644 tensorflow/core/kernels/bincount_op_test.cc create mode 100644 tensorflow/core/kernels/bucketize_op.h create mode 100644 tensorflow/core/kernels/bucketize_op_gpu.cu.cc create mode 100644 tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc delete mode 100644 tensorflow/core/kernels/mkl_tfconv_op.cc delete mode 100644 tensorflow/core/platform/vmodule_benchmark_test.cc delete mode 100644 tensorflow/core/platform/vmodule_test.cc create mode 100644 tensorflow/core/util/mkl_util_test.cc delete mode 100644 tensorflow/docs_src/get_started/monitors.md create mode 100644 tensorflow/go/android.go delete mode 100644 tensorflow/java/src/main/java/org/tensorflow/types/TFBool.java delete mode 100644 tensorflow/java/src/main/java/org/tensorflow/types/TFDouble.java delete mode 100644 tensorflow/java/src/main/java/org/tensorflow/types/TFFloat.java delete mode 100644 tensorflow/java/src/main/java/org/tensorflow/types/TFInt32.java delete mode 100644 tensorflow/java/src/main/java/org/tensorflow/types/TFInt64.java delete mode 100644 tensorflow/java/src/main/java/org/tensorflow/types/TFString.java delete mode 100644 tensorflow/java/src/main/java/org/tensorflow/types/TFType.java delete mode 100644 tensorflow/java/src/main/java/org/tensorflow/types/TFUInt8.java delete mode 100644 tensorflow/java/src/main/java/org/tensorflow/types/Types.java delete mode 100644 tensorflow/python/eager/pywrap_tensor.h delete mode 100644 tensorflow/python/estimator/canned/baseline.py delete mode 100644 tensorflow/python/estimator/canned/baseline_test.py delete mode 100644 tensorflow/python/keras/_impl/keras/layers/recurrent_test.py mode change 100644 => 100755 tensorflow/python/tools/import_pb_to_tensorboard.py delete mode 100644 tensorflow/tools/api/golden/tensorflow.estimator.-baseline-classifier.pbtxt delete mode 100644 tensorflow/tools/api/golden/tensorflow.estimator.-baseline-regressor.pbtxt delete mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt delete mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt delete mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt delete mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt delete mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt delete mode 100644 third_party/boringssl/add_boringssl_s390x.patch delete mode 100644 third_party/nanopb.BUILD create mode 100644 third_party/sycl/crosstool/trisycl.tpl delete mode 100644 third_party/tflite_mobilenet.BUILD diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 10fd595fec..cfc45049f7 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -42,7 +42,7 @@ The Code of Conduct also applies within project spaces and in public spaces when Conflicts in an open source project can take many forms, from someone having a bad day and using harsh and hurtful language in the issue queue, to more serious instances such as sexist/racist statements or threats of violence, and everything in between. -If the behaviour is threatening or harassing, or for other reasons requires immediate escalation, please see below. +If the behavior is threatening or harassing, or for other reasons requires immediate escalation, please see below. However, for the vast majority of issues, we aim to empower individuals to first resolve conflicts themselves, asking for help when needed, and only after that fails to escalate further. This approach gives people more control over the outcome of their dispute. diff --git a/README.md b/README.md index 24bbb6cec1..aff3427bdd 100644 --- a/README.md +++ b/README.md @@ -73,11 +73,11 @@ $ python ## For more information -* [TensorFlow website](https://www.tensorflow.org) +* [TensorFlow Website](https://www.tensorflow.org) * [TensorFlow White Papers](https://www.tensorflow.org/about/bib) * [TensorFlow Model Zoo](https://github.com/tensorflow/models) * [TensorFlow MOOC on Udacity](https://www.udacity.com/course/deep-learning--ud730) -* [TensorFlow course at Stanford](https://web.stanford.edu/class/cs20si) +* [TensorFlow Course at Stanford](https://web.stanford.edu/class/cs20si) Learn more about the TensorFlow community at the [community page of tensorflow.org](https://www.tensorflow.org/community) for a few ways to participate. diff --git a/configure.py b/configure.py index e98367ef9f..83ee01c630 100644 --- a/configure.py +++ b/configure.py @@ -43,6 +43,7 @@ _DEFAULT_CUDA_PATH_WIN = ('C:/Program Files/NVIDIA GPU Computing ' 'Toolkit/CUDA/v%s' % _DEFAULT_CUDA_VERSION) _TF_OPENCL_VERSION = '1.2' _DEFAULT_COMPUTECPP_TOOLKIT_PATH = '/usr/local/computecpp' +_DEFAULT_TRISYCL_INCLUDE_DIR = '/usr/local/triSYCL/include' def is_windows(): @@ -487,11 +488,10 @@ def set_cc_opt_flags(environ_cp): cc_opt_flags = get_from_env_or_user_or_default(environ_cp, 'CC_OPT_FLAGS', question, default_cc_opt_flags) for opt in cc_opt_flags.split(): - write_to_bazelrc('build:opt --cxxopt=%s --copt=%s' % (opt, opt)) - host_opt = '-march=native' # It should be safe on the same build host. - write_to_bazelrc( - 'build:opt --host_cxxopt=%s --host_copt=%s' % (host_opt, host_opt)) - write_to_bazelrc('build:opt --define with_default_optimizations=true') + host_opt = '-march=native' # It should be safe on the same build host. + write_to_bazelrc( + 'build:opt --cxxopt=%s --copt=%s' % (opt, opt) + + ' --host_cxxopt=%s --host_copt=%s' % (host_opt, host_opt)) def set_tf_cuda_clang(environ_cp): @@ -641,7 +641,7 @@ def set_tf_cuda_version(environ_cp): write_action_env_to_bazelrc('TF_CUDA_VERSION', tf_cuda_version) -def set_tf_cunn_version(environ_cp): +def set_tf_cudnn_version(environ_cp): """Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION.""" ask_cudnn_version = ( 'Please specify the cuDNN version you want to use. ' @@ -887,6 +887,27 @@ def set_computecpp_toolkit_path(environ_cp): write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH', computecpp_toolkit_path) +def set_trisycl_include_dir(environ_cp): + """Set TRISYCL_INCLUDE_DIR""" + ask_trisycl_include_dir = ('Please specify the location of the triSYCL ' + 'include directory. (Use --config=sycl_trisycl ' + 'when building with Bazel) ' + '[Default is %s]: ' + ) % (_DEFAULT_TRISYCL_INCLUDE_DIR) + while True: + trisycl_include_dir = get_from_env_or_user_or_default( + environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir, + _DEFAULT_TRISYCL_INCLUDE_DIR) + if os.path.exists(trisycl_include_dir): + break + + print('Invalid triSYCL include directory, %s cannot be found' + % (trisycl_include_dir)) + + # Set TRISYCL_INCLUDE_DIR + environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir + write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', + trisycl_include_dir) def set_mpi_home(environ_cp): """Set MPI_HOME.""" @@ -999,6 +1020,8 @@ def main(): environ_cp['TF_NEED_GCP'] = '0' environ_cp['TF_NEED_HDFS'] = '0' environ_cp['TF_NEED_JEMALLOC'] = '0' + environ_cp['TF_NEED_OPENCL_SYCL'] = '0' + environ_cp['TF_NEED_COMPUTECPP'] = '0' environ_cp['TF_NEED_OPENCL'] = '0' environ_cp['TF_NEED_S3'] = '0' environ_cp['TF_CUDA_CLANG'] = '0' @@ -1021,17 +1044,21 @@ def main(): set_build_var(environ_cp, 'TF_NEED_VERBS', 'VERBS', 'with_verbs_support', False, 'verbs') - set_action_env_var(environ_cp, 'TF_NEED_OPENCL', 'OpenCL', False) - if environ_cp.get('TF_NEED_OPENCL') == '1': + set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False) + if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1': set_host_cxx_compiler(environ_cp) set_host_c_compiler(environ_cp) - set_computecpp_toolkit_path(environ_cp) + set_action_env_var(environ_cp, 'TF_NEED_COMPUTECPP', 'ComputeCPP', True) + if environ_cp.get('TF_NEED_COMPUTECPP') == '1': + set_computecpp_toolkit_path(environ_cp) + else: + set_trisycl_include_dir(environ_cp) set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False) if (environ_cp.get('TF_NEED_CUDA') == '1' and 'TF_CUDA_CONFIG_REPO' not in environ_cp): set_tf_cuda_version(environ_cp) - set_tf_cunn_version(environ_cp) + set_tf_cudnn_version(environ_cp) set_tf_cuda_compute_capabilities(environ_cp) set_tf_cuda_clang(environ_cp) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 8cb7edcc50..9874f95ea3 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -54,6 +54,15 @@ config_setting( visibility = ["//visibility:public"], ) +config_setting( + name = "raspberry_pi_armeabi", + values = { + "crosstool_top": "@local_config_arm_compiler//:toolchain", + "cpu": "armeabi", + }, + visibility = ["//visibility:public"], +) + config_setting( name = "android_arm", values = { @@ -110,7 +119,7 @@ config_setting( config_setting( name = "no_tensorflow_py_deps", - define_values = {"no_tensorflow_py_deps": "true"}, + values = {"define": "no_tensorflow_py_deps=true"}, visibility = ["//visibility:public"], ) @@ -166,122 +175,55 @@ config_setting( # TODO(jhseu): Enable on other platforms other than Linux. config_setting( name = "with_jemalloc_linux_x86_64", - define_values = {"with_jemalloc": "true"}, - values = {"cpu": "k8"}, + values = { + "cpu": "k8", + "define": "with_jemalloc=true", + }, visibility = ["//visibility:public"], ) config_setting( name = "with_jemalloc_linux_ppc64le", - define_values = {"with_jemalloc": "true"}, - values = {"cpu": "ppc"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_default_optimizations", - define_values = {"with_default_optimizations": "true"}, + values = { + "cpu": "ppc", + "define": "with_jemalloc=true", + }, visibility = ["//visibility:public"], ) config_setting( name = "with_gcp_support", - define_values = {"with_gcp_support": "true"}, + values = {"define": "with_gcp_support=true"}, visibility = ["//visibility:public"], ) config_setting( name = "with_hdfs_support", - define_values = {"with_hdfs_support": "true"}, + values = {"define": "with_hdfs_support=true"}, visibility = ["//visibility:public"], ) config_setting( name = "with_s3_support", - define_values = {"with_s3_support": "true"}, - visibility = ["//visibility:public"], -) - -# Crosses between platforms and file system libraries not supported on those -# platforms due to limitations in nested select() statements. -config_setting( - name = "with_gcp_support_windows_override", - define_values = {"with_gcp_support": "true"}, - values = {"cpu": "x64_windows"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_hdfs_support_windows_override", - define_values = {"with_hdfs_support": "true"}, - values = {"cpu": "x64_windows"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_s3_support_windows_override", - define_values = {"with_s3_support": "true"}, - values = {"cpu": "x64_windows"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_gcp_support_android_override", - define_values = {"with_gcp_support": "true"}, - values = {"crosstool_top": "//external:android/crosstool"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_hdfs_support_android_override", - define_values = {"with_hdfs_support": "true"}, - values = {"crosstool_top": "//external:android/crosstool"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_s3_support_android_override", - define_values = {"with_s3_support": "true"}, - values = {"crosstool_top": "//external:android/crosstool"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_gcp_support_ios_override", - define_values = {"with_gcp_support": "true"}, - values = {"crosstool_top": "//tools/osx/crosstool:crosstool"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_hdfs_support_ios_override", - define_values = {"with_hdfs_support": "true"}, - values = {"crosstool_top": "//tools/osx/crosstool:crosstool"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_s3_support_ios_override", - define_values = {"with_s3_support": "true"}, - values = {"crosstool_top": "//tools/osx/crosstool:crosstool"}, + values = {"define": "with_s3_support=true"}, visibility = ["//visibility:public"], ) config_setting( name = "with_xla_support", - define_values = {"with_xla_support": "true"}, + values = {"define": "with_xla_support=true"}, visibility = ["//visibility:public"], ) config_setting( name = "with_gdr_support", - define_values = {"with_gdr_support": "true"}, + values = {"define": "with_gdr_support=true"}, visibility = ["//visibility:public"], ) config_setting( name = "with_verbs_support", - define_values = {"with_verbs_support": "true"}, + values = {"define": "with_verbs_support=true"}, visibility = ["//visibility:public"], ) @@ -355,7 +297,7 @@ config_setting( visibility = ["//visibility:public"], ) -# Make a dummy rule that we can change "default" in select statements to. +# Make a dummy rule that we can chaqnge "default" in select statements to. # to disable dependencies in copybara. config_setting( name = "dummy_disabled_internal", @@ -384,6 +326,14 @@ filegroup( visibility = ["//tensorflow:__subpackages__"], ) +py_library( + name = "tensorflow_py", + srcs = ["__init__.py"], + srcs_version = "PY2AND3", + visibility = ["//visibility:public"], + deps = ["//tensorflow/python"], +) + filegroup( name = "all_opensource_files", data = [ @@ -737,11 +687,3 @@ tf_cc_shared_object( "//tensorflow/core:tensorflow", ], ) - -py_library( - name = "tensorflow_py", - srcs = ["__init__.py"], - srcs_version = "PY2AND3", - visibility = ["//visibility:public"], - deps = ["//tensorflow/python"], -) diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index dd638de3c6..6dd1b99910 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -890,8 +890,8 @@ const tensorflow::AttrValue* GetAttrValue(TF_Operation* oper, TF_Status* status) { const tensorflow::AttrValue* attr = oper->node.attrs().Find(attr_name); if (attr == nullptr) { - status->status = InvalidArgument("Operation '", oper->node.name(), - "' has no attr named '", attr_name, "'."); + status->status = + InvalidArgument("Operation has no attr named '", attr_name, "'."); } return attr; } diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc index e0057eb51c..05881e619b 100644 --- a/tensorflow/c/c_api_test.cc +++ b/tensorflow/c/c_api_test.cc @@ -383,7 +383,7 @@ TEST(CAPI, Graph) { EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s)); ASSERT_FALSE(GetAttrValue(feed, "missing", &attr_value, s)); - EXPECT_EQ(string("Operation 'feed' has no attr named 'missing'."), + EXPECT_EQ(string("Operation has no attr named 'missing'."), string(TF_Message(s))); // Make a constant oper with the scalar "3". @@ -1054,7 +1054,7 @@ class CApiColocationTest : public ::testing::Test { TF_OperationGetAttrMetadata(op, tensorflow::kColocationAttrName, s_); if (expected.empty()) { ASSERT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)) << TF_Message(s_); - EXPECT_EQ(std::string("Operation 'add' has no attr named '_class'."), + EXPECT_EQ(std::string("Operation has no attr named '_class'."), std::string(TF_Message(s_))); return; } diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index d533758e36..c77896b80b 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -39,7 +39,6 @@ tf_cuda_library( tf_cuda_library( name = "c_api_internal", hdrs = ["c_api_internal.h"], - visibility = ["//tensorflow:internal"], deps = [ ":c_api", ":runtime", @@ -106,6 +105,7 @@ tf_cc_test( cc_library( name = "tape", + srcs = ["tape.cc"], hdrs = ["tape.h"], visibility = ["//tensorflow:internal"], deps = [ diff --git a/tensorflow/c/eager/tape.cc b/tensorflow/c/eager/tape.cc new file mode 100644 index 0000000000..464612a81e --- /dev/null +++ b/tensorflow/c/eager/tape.cc @@ -0,0 +1,102 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/c/eager/tape.h" + +namespace tensorflow { +namespace eager { + +bool GradientTape::ShouldRecord(gtl::ArraySlice tensor_ids) { + for (int64 i : tensor_ids) { + if (tensor_tape_.find(i) != tensor_tape_.end()) { + return true; + } + } + return false; +} + +void GradientTape::Watch(int64 tensor_id) { + tensor_tape_.emplace(tensor_id, -1); +} + +void GradientTape::RecordOperation( + const string& op_type, gtl::ArraySlice output_tensors, + gtl::ArraySlice input_tensor_id, void* backward_function, + const std::function& backward_function_deleter) { + if (!ShouldRecord(input_tensor_id)) { + backward_function_deleter(); + return; + } + std::vector ids; + ids.reserve(input_tensor_id.size()); + for (int64 i : input_tensor_id) { + tensor_usage_[i]++; + ids.push_back(i); + } + const int64 op_id = next_op_id_++; + std::vector tensors; + tensors.reserve(output_tensors.size()); + for (const TapeTensor& o : output_tensors) { + // Note: the tensor can have already been watched and hence be in the tape, + // so we cannot check that we're inserting it here. + tensor_tape_[o.id] = op_id; + tensor_usage_[o.id] = 1; + tensors.push_back(o); + } + op_tape_[op_id] = OpTapeEntry{op_type, tensors, ids, backward_function, + backward_function_deleter}; +} + +void GradientTape::DeleteTrace(int64 tensor_id) { + auto it = tensor_usage_.find(tensor_id); + if (it == tensor_usage_.end()) { + return; + } + it->second--; + if (it->second != 0) { + return; + } + tensor_usage_.erase(it); + auto tensor_op_it = tensor_tape_.find(tensor_id); + if (tensor_op_it == tensor_tape_.end()) { + return; + } + const int64 op_id = tensor_op_it->second; + if (op_id == -1) { + // Do not delete watched tensors. + return; + } + tensor_tape_.erase(tensor_op_it); + auto op_it = op_tape_.find(op_id); + CHECK(op_it != op_tape_.end()); + for (const auto& output : op_it->second.output_tensor_info) { + if (tensor_usage_.find(output.id) != tensor_usage_.end()) { + // Found a usage for an output, so cannot delete the op. + return; + } + } + for (int64 id : op_it->second.input_tensor_id) { + DeleteTrace(id); + } + op_it->second.backward_function_deleter(); + op_tape_.erase(op_it); +} + +std::pair GradientTape::Export() { + return {std::move(tensor_tape_), std::move(op_tape_)}; +} + +} // namespace eager +} // namespace tensorflow diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index 29d73c5ca4..df51f300eb 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -19,7 +19,6 @@ limitations under the License. // maintains the data structures required to do so. #include -#include #include #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" @@ -37,14 +36,13 @@ struct TapeTensor { }; // Represents an entry in the tape. -template struct OpTapeEntry { string op_type; std::vector output_tensor_info; std::vector input_tensor_id; // TODO(apassos) consider narrowing down this interface. - BackwardFunction* backward_function; + void* backward_function; // Should be called before deleting the backward function. TODO(apassos) use // unique_ptrs to ensure this happens. @@ -57,68 +55,13 @@ struct OpTapeEntry { using TensorTape = std::unordered_map; // Map from operation-id to tape entry. -template -using OpTape = std::unordered_map>; - -// Operations the tape needs to perform on tensors to do backpropagation. Named -// "vspace" because a subset of these are related to a vector space, such as -// adding gradients, getting zeroes, etc. Currently cannot be implemented -// without using tensorflow python code, hence left unspecified here. -// -// Gradient is the type returned by gradient functions. In Python TF it's either -// Tensor or IndexedSlices or None, which here we map to nullptr. Gradients need -// to allow their size to be computed and they need to be passable to a backward -// function and deleted (as the backprop code creates lots of gradients the user -// is not interested in). -// -// BackwardFunction needs to be a closure which stores intermediate activations -// from the forward computation and calls a vector-jacobian product function -// (also known as adjoint function) to compute, given downstream gradients, -// upstream gradients. -// -// TODO(apassos) provide concrete template instantiations for TFE_TensorHandle -// specialization, which is blocked by quite a few things needing to loop back -// into python now. -template -class VSpace { - public: - virtual ~VSpace() {} - - // Returns the number of elements in the gradient tensor. - virtual int64 NumElements(Gradient* tensor) const = 0; - - // Consumes references to the tensors in the gradient_tensors list and returns - // a tensor with the result. - virtual Gradient* AggregateGradients( - gtl::ArraySlice gradient_tensors) const = 0; - - // Returns a tensor of the right shape and dtype filled with zeros. - virtual Gradient* Zeros(TensorShape shape, DataType dtype) const = 0; - - // Returns a Tensor which is filled with ones and like the input. - virtual Gradient* Ones(TensorShape shape, DataType dtype) const = 0; - - // Calls the passed-in backward function. - virtual Status CallBackwardFunction( - BackwardFunction* backward_function, - gtl::ArraySlice output_gradients, - std::vector* result) const = 0; - - // Deletes the input tensor. - virtual void DeleteGradient(Gradient* gradient) const = 0; -}; +using OpTape = std::unordered_map; // Traces the execution of operations, doing eager garbage collection, and // exporting a full trace so other code can do backpropagation. Not thread-safe. -template class GradientTape { public: GradientTape() {} - ~GradientTape() { - for (const auto& pair : op_tape_) { - pair.second.backward_function_deleter(); - } - } bool ShouldRecord(gtl::ArraySlice tensor_ids); @@ -127,24 +70,19 @@ class GradientTape { void RecordOperation(const string& op_type, gtl::ArraySlice output_tensors, gtl::ArraySlice input_tensor_id, - BackwardFunction* backward_function, + void* backward_function, const std::function& backward_function_deleter); void DeleteTrace(int64 tensor_id); - // Consumes the internal state of the tape (so cannot be called more than - // once) and produces the gradient of the target tensors with respect to the - // source tensors. The output gradients are used if not empty and not - // null. The result is populated with one tensor per target element. - Status ComputeGradient(const VSpace& vspace, - gtl::ArraySlice target_tensor_ids, - gtl::ArraySlice source_tensor_id, - gtl::ArraySlice output_gradients, - std::vector* result); + // Note: it is only valid to call Export once per tape, and after calling + // export the tape is no longer valid (i.e. calls to ShouldRecord, Watch, + // Record, and Delete have undefined behavior). + std::pair Export(); private: TensorTape tensor_tape_; - OpTape op_tape_; + OpTape op_tape_; int64 next_op_id_{0}; // Map from tensor id to number of remaining usages (i.e. how many entries in @@ -152,429 +90,6 @@ class GradientTape { std::unordered_map tensor_usage_; }; -// Template instantiations here - -template -bool GradientTape::ShouldRecord( - gtl::ArraySlice tensor_ids) { - for (int64 i : tensor_ids) { - if (tensor_tape_.find(i) != tensor_tape_.end()) { - return true; - } - } - return false; -} - -template -void GradientTape::Watch(int64 tensor_id) { - tensor_tape_.emplace(tensor_id, -1); -} - -template -void GradientTape::RecordOperation( - const string& op_type, gtl::ArraySlice output_tensors, - gtl::ArraySlice input_tensor_id, BackwardFunction* backward_function, - const std::function& backward_function_deleter) { - if (!ShouldRecord(input_tensor_id)) { - backward_function_deleter(); - return; - } - std::vector ids; - ids.reserve(input_tensor_id.size()); - for (int64 i : input_tensor_id) { - tensor_usage_[i]++; - ids.push_back(i); - } - const int64 op_id = next_op_id_++; - std::vector tensors; - tensors.reserve(output_tensors.size()); - for (const TapeTensor& o : output_tensors) { - // Note: the tensor can have already been watched and hence be in the tape, - // so we cannot check that we're inserting it here. - tensor_tape_[o.id] = op_id; - tensor_usage_[o.id] = 1; - tensors.push_back(o); - } - op_tape_[op_id] = OpTapeEntry{ - op_type, tensors, ids, backward_function, backward_function_deleter}; -} - -template -void GradientTape::DeleteTrace(int64 tensor_id) { - auto it = tensor_usage_.find(tensor_id); - if (it == tensor_usage_.end()) { - return; - } - it->second--; - if (it->second != 0) { - return; - } - tensor_usage_.erase(it); - auto tensor_op_it = tensor_tape_.find(tensor_id); - if (tensor_op_it == tensor_tape_.end()) { - return; - } - const int64 op_id = tensor_op_it->second; - if (op_id == -1) { - // Do not delete watched tensors. - return; - } - tensor_tape_.erase(tensor_op_it); - auto op_it = op_tape_.find(op_id); - CHECK(op_it != op_tape_.end()); - for (const auto& output : op_it->second.output_tensor_info) { - if (tensor_usage_.find(output.id) != tensor_usage_.end()) { - // Found a usage for an output, so cannot delete the op. - return; - } - } - for (int64 id : op_it->second.input_tensor_id) { - DeleteTrace(id); - } - op_it->second.backward_function_deleter(); - op_tape_.erase(op_it); -} - -// Terminology: -// -// - op: a possibly composite operation, which has an entry in the tape -// - target: dy in dx/dy -// - source: dx in dx/dy -// - tensor: one of the many inputs or outputs of an operation -// -// Below here we do the gradient algorithm. It works as follows: -// -// First we filter the tape to just the subset of operations we want to -// differentiate. In the process of doing so we count how many times each Tensor -// is used as an input to an op (so we know when we're done computing gradients -// for that Tensor). We also count, for each tape entry, how many of its output -// Tensors need gradients to be computed (Tensors which are not used do not need -// any gradients to be computed). -// -// Finally, we start a backprop stack with a set of tape entries for which we -// have all gradients available. This set usually is a subset of the set of -// targets (not all since targets which have outputs in the tape will not have -// gradients available initially). -// -// Then we repeatedly pop an entry from the stack, run its backprop, and update -// the gradients of its inputs. Once we have computed all gradients for a single -// input we can mark this input as done, and this can trigger adding an entry to -// the stack if all outputs of that entry are now done. -// -// When the stack is empty we have gradients for all tensors we're interested -// in. - -namespace { - -template -struct BackpropInitialState { - OpTape op_tape; - - // Map from tensor ID to how many references still exist for this tensor in - // the tape. - std::unordered_map tensor_usage_counts; - - // Maps from op ID to how many output tensors of this op still need to have - // their gradients computed. - std::unordered_map op_missing_tensor; -}; - -template -BackpropInitialState PrepareBackprop( - gtl::ArraySlice target, const TensorTape& tensor_tape, - OpTape op_tape, - const std::unordered_set& sources_set) { - std::vector tensor_stack; - tensor_stack.reserve(target.size()); - for (auto t : target) { - tensor_stack.push_back(t); - } - BackpropInitialState result; - while (!tensor_stack.empty()) { - int64 tensor_id = tensor_stack.back(); - tensor_stack.pop_back(); - auto op_id_it = tensor_tape.find(tensor_id); - if (op_id_it == tensor_tape.end()) { - continue; - } - int64 op_id = op_id_it->second; - auto op_it = op_tape.find(op_id); - auto result_op_it = result.op_tape.find(op_id); - if (op_id == -1 || op_it == op_tape.end() || - result_op_it != result.op_tape.end()) { - continue; - } - CHECK(result.op_tape.emplace(op_id, op_it->second).second); - for (auto it : op_it->second.input_tensor_id) { - auto count_it = result.tensor_usage_counts.find(it); - if (count_it != result.tensor_usage_counts.end()) { - count_it->second++; - } else { - result.tensor_usage_counts[it] = 1; - if (sources_set.find(it) == sources_set.end() && - tensor_tape.find(it) != tensor_tape.end()) { - tensor_stack.push_back(it); - } - } - } - op_tape.erase(op_it); - } - for (auto& pair : result.tensor_usage_counts) { - auto it = tensor_tape.find(pair.first); - if (it != tensor_tape.end() && it->second != -1) { - result.op_missing_tensor[it->second] += 1; - } - } - // Call destructors for all unneeded gradient functions. - for (const auto& op_pair : op_tape) { - op_pair.second.backward_function_deleter(); - } - return result; -} - -template -std::vector InitialStack( - const OpTape& op_tape, - const std::unordered_map& op_missing_tensor) { - std::vector result; - for (auto& op_entry : op_tape) { - if (op_missing_tensor.find(op_entry.first) == op_missing_tensor.end()) { - result.push_back(op_entry.first); - } - } - return result; -} - -template -Status InitialGradients( - const VSpace& vspace, - gtl::ArraySlice target_tensor_ids, - gtl::ArraySlice output_gradients, const TensorTape& tensor_tape, - const OpTape& op_tape, - const std::unordered_map& tensor_usage_counts, - std::unordered_map>* result) { - for (int i = 0; i < target_tensor_ids.size(); ++i) { - const int64 id = target_tensor_ids[i]; - if (tensor_usage_counts.find(id) != tensor_usage_counts.end()) { - if (!output_gradients.empty() && output_gradients[i] != nullptr) { - // TODO(apassos) figure out how to print debugging information here. - return errors::InvalidArgument( - "A gradient was provided for a tensor which is used as part of the " - "computation."); - } - } else { - if (output_gradients.empty() || output_gradients[i] == nullptr) { - auto tensor_it = tensor_tape.find(id); - if (tensor_it != tensor_tape.end() && tensor_it->second != -1) { - auto op_it = op_tape.find(tensor_it->second); - if (op_it == op_tape.end()) { - return errors::Internal( - "Internal state of the gradient tape is invalid."); - } - bool found = false; - for (int j = 0; j < op_it->second.output_tensor_info.size(); ++j) { - if (op_it->second.output_tensor_info[j].id == id) { - found = true; - (*result)[id].push_back( - vspace.Ones(op_it->second.output_tensor_info[j].shape, - op_it->second.output_tensor_info[j].dtype)); - break; - } - } - if (!found) { - return errors::Internal( - "Internal state of the gradient tape is invalid."); - } - } else { - // No record of the target tensor found on the tape, so no gradient - // needs to be computed from it. Do nothing. - } - } else { - (*result)[id].push_back(output_gradients[i]); - } - } - } - return Status::OK(); -} - -} // namespace - -// If over kMinAggregateCount gradients are accumulated and the total -// memory consumption is over kMinAggregateBytes, do an early aggregation -// so as to release the gradient tensor to save memory. -constexpr int kMinAggregateCount = 4; -constexpr int kMinAggregateBytes = 128 * 1024 * 1024; - -template -Status GradientTape::ComputeGradient( - const VSpace& vspace, - gtl::ArraySlice target_tensor_ids, - gtl::ArraySlice source_tensor_ids, - gtl::ArraySlice output_gradients, - std::vector* result) { - std::unordered_set sources_set(source_tensor_ids.begin(), - source_tensor_ids.end()); - BackpropInitialState state = PrepareBackprop( - target_tensor_ids, tensor_tape_, std::move(op_tape_), sources_set); - std::vector op_stack = - InitialStack(state.op_tape, state.op_missing_tensor); - std::unordered_map> gradients; - Status s = InitialGradients(vspace, target_tensor_ids, output_gradients, - tensor_tape_, state.op_tape, - state.tensor_usage_counts, &gradients); - auto cleanup = [&state]() { - // Release all backprop functions - for (const auto& pair : state.op_tape) { - pair.second.backward_function_deleter(); - } - }; - if (!s.ok()) { - cleanup(); - return s; - } - std::unordered_map gradients_size; - // TODO(apassos) multiple threads could be dequeuing from op_stack at the same - // time, for better CPU backprop performance. - VLOG(1) << "Initial stack:"; - if (VLOG_IS_ON(1)) { - for (auto t : op_stack) { - VLOG(1) << " " << t; - } - } - std::unordered_map> - functions_accept_none_for_indices({ - {"SoftmaxCrossEntropyWithLogits", {1}}, - {"FusedBatchNorm", {1, 2, 3, 4}}, - }); - while (!op_stack.empty()) { - const int64 op = op_stack.back(); - VLOG(1) << "Popped " << op; - op_stack.pop_back(); - auto op_it = state.op_tape.find(op); - if (op_it == state.op_tape.end()) { - // It is possible for ops to end up on the stack if they are unrelated to - // the target; we should just skip them. - continue; - } - auto trace = std::move(op_it->second); - state.op_tape.erase(op_it); - std::vector out_gradients; - out_gradients.reserve(trace.output_tensor_info.size()); - for (int i = 0; i < trace.output_tensor_info.size(); ++i) { - const int64 id = trace.output_tensor_info[i].id; - auto grad_it = gradients.find(id); - if (grad_it == gradients.end()) { - auto func_name_it = - functions_accept_none_for_indices.find(trace.op_type); - if (func_name_it != functions_accept_none_for_indices.end() && - func_name_it->second.find(i) != func_name_it->second.end()) { - out_gradients.push_back(nullptr); - } else { - out_gradients.push_back( - vspace.Zeros(trace.output_tensor_info[i].shape, - trace.output_tensor_info[i].dtype)); - } - } else { - out_gradients.push_back(vspace.AggregateGradients(grad_it->second)); - if (sources_set.find(grad_it->first) == sources_set.end()) { - gradients.erase(grad_it); - } - } - } - std::vector in_gradients; - Status s = vspace.CallBackwardFunction(trace.backward_function, - out_gradients, &in_gradients); - if (!s.ok()) { - VLOG(1) << "Gradient function failed."; - cleanup(); - return s; - } - VLOG(1) << "Got " << in_gradients.size() << " in_gradients for " - << trace.input_tensor_id.size() << " sources"; - for (int i = 0; i < in_gradients.size(); ++i) { - const int64 id = trace.input_tensor_id[i]; - if (in_gradients[i] != nullptr) { - auto& unaggregated_grads = gradients[id]; - unaggregated_grads.push_back(in_gradients[i]); - if (unaggregated_grads.size() > kMinAggregateCount) { - auto size_it = gradients_size.find(id); - int64 size; - if (size_it == gradients_size.end()) { - size = vspace.NumElements(unaggregated_grads[0]); - gradients_size.emplace(id, size); - } else { - size = size_it->second; - } - if (unaggregated_grads.size() * size * 4 > kMinAggregateBytes) { - Gradient* grad = vspace.AggregateGradients(unaggregated_grads); - unaggregated_grads.clear(); - unaggregated_grads.push_back(grad); - } - } - } - auto usage_count_it = state.tensor_usage_counts.find(id); - if (usage_count_it == state.tensor_usage_counts.end()) { - VLOG(1) << "Tensor " << id << " not used"; - continue; - } - usage_count_it->second--; - if (usage_count_it->second > 0) { - VLOG(1) << "Tensor " << id << " usage count " << usage_count_it->second; - continue; - } - auto tape_it = tensor_tape_.find(id); - if (tape_it == tensor_tape_.end()) { - VLOG(1) << "Tensor " << id - << " has no associated op. Deleting gradient"; - auto grad_it = gradients.find(id); - if (grad_it != gradients.end()) { - for (auto g : grad_it->second) { - vspace.DeleteGradient(g); - } - gradients.erase(grad_it); - } - continue; - } - const int64 op_id = tape_it->second; - if (op_id == -1) { - VLOG(1) << "Tensor " << id << " is source"; - continue; - } - auto missing_it = state.op_missing_tensor.find(op_id); - if (missing_it != state.op_missing_tensor.end()) { - missing_it->second--; - VLOG(1) << "Op " << op_id << " missing " << missing_it->second - << " output gradients"; - if (missing_it->second == 0) { - op_stack.push_back(op_id); - } - } - } - } - CHECK(state.op_tape.empty()); - result->reserve(source_tensor_ids.size()); - for (auto is : source_tensor_ids) { - auto grad_it = gradients.find(is); - if (grad_it == gradients.end()) { - result->push_back(nullptr); - } else { - if (grad_it->second.size() == 1) { - result->push_back(grad_it->second[0]); - } else { - result->push_back(vspace.AggregateGradients(grad_it->second)); - } - gradients.erase(grad_it); - } - } - VLOG(1) << "Final gradients size: " << gradients.size(); - for (auto grad_pair : gradients) { - for (const auto& g : grad_pair.second) { - vspace.DeleteGradient(g); - } - } - return Status::OK(); -} - } // namespace eager } // namespace tensorflow diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl index ee291c12d0..1e22b760b8 100644 --- a/tensorflow/compiler/aot/tfcompile.bzl +++ b/tensorflow/compiler/aot/tfcompile.bzl @@ -119,7 +119,7 @@ def tf_library(name, graph, config, out_nodes_file, ] + freeze_saver_srcs, outs=[freeze_file], - cmd=("$(location //tensorflow/python/tools:freeze_graph)" + + cmd=("$(location @org_tensorflow//tensorflow/python/tools:freeze_graph)" + freeze_args), tools=["@org_tensorflow//tensorflow/python/tools:freeze_graph"], tags=tags, @@ -130,6 +130,10 @@ def tf_library(name, graph, config, header_file = name + ".h" object_file = name + ".o" ep = ("__" + PACKAGE_NAME + "__" + name).replace("/", "_") + if type(tfcompile_flags) == type(""): + flags = tfcompile_flags + else: + flags = " ".join(["'" + arg.replace("'", "'\\''") + "'" for arg in (tfcompile_flags or [])]) native.genrule( name=("gen_" + name), srcs=[ @@ -148,7 +152,7 @@ def tf_library(name, graph, config, " --target_triple=" + target_llvm_triple() + " --out_header=$(@D)/" + header_file + " --out_object=$(@D)/" + object_file + - " " + (tfcompile_flags or "")), + flags), tools=[tfcompile_tool], visibility=visibility, testonly=testonly, @@ -185,7 +189,7 @@ def tf_library(name, graph, config, " --cpp_class=" + cpp_class + " --target_triple=" + target_llvm_triple() + " --out_session_module=$(@D)/" + session_module_pb + - " " + (tfcompile_flags or "")), + flags), tools=[tfcompile_tool], visibility=visibility, testonly=testonly, @@ -195,8 +199,7 @@ def tf_library(name, graph, config, # The cc_library rule packaging up the header and object file, and needed # kernel implementations. - need_xla_data_proto = (tfcompile_flags and - tfcompile_flags.find("--gen_program_shape") != -1) + need_xla_data_proto = (flags and flags.find("--gen_program_shape") != -1) native.cc_library( name=name, srcs=[object_file], @@ -253,7 +256,7 @@ def tf_library(name, graph, config, ], outs=[test_file], cmd=("sed " + sed_replace + - " $(location //tensorflow/compiler/aot:test.cc) " + + " $(location @org_tensorflow//tensorflow/compiler/aot:test.cc) " + "> $(OUTS)"), tags=tags, ) diff --git a/tensorflow/compiler/jit/kernels/xla_launch_op.cc b/tensorflow/compiler/jit/kernels/xla_launch_op.cc index e481796d9e..27c5da08c1 100644 --- a/tensorflow/compiler/jit/kernels/xla_launch_op.cc +++ b/tensorflow/compiler/jit/kernels/xla_launch_op.cc @@ -257,6 +257,7 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { options.flib_def = ctx->function_library()->GetFunctionLibraryDefinition(); options.graph_def_version = ctx->function_library()->graph_def_version(); options.allow_cpu_custom_calls = (platform_id_ == gpu::host::kHostPlatformId); + options.local_executable_has_hybrid_result = true; const XlaCompiler::CompilationResult* kernel; xla::LocalExecutable* executable; diff --git a/tensorflow/compiler/jit/xla_compilation_cache.cc b/tensorflow/compiler/jit/xla_compilation_cache.cc index bc2eccd277..23368b6c76 100644 --- a/tensorflow/compiler/jit/xla_compilation_cache.cc +++ b/tensorflow/compiler/jit/xla_compilation_cache.cc @@ -227,7 +227,10 @@ Status XlaCompilationCache::BuildExecutable( } xla::ExecutableBuildOptions build_options; build_options.set_device_ordinal(client_->default_device_ordinal()); + build_options.set_platform(client_->platform()); build_options.set_result_layout(result.xla_output_shape); + build_options.set_has_hybrid_result( + options.local_executable_has_hybrid_result); auto compile_result = client_->Compile(*result.computation, argument_layouts, build_options); diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 21b8823944..284ecbf97d 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -657,7 +657,7 @@ tf_library( cpp_class = "LSTMLayerInference", graph = "lstm_layer_inference.pbtxt", tags = ["manual"], - tfcompile_flags = "--xla_cpu_multi_thread_eigen=false", + tfcompile_flags = ["--xla_cpu_multi_thread_eigen=false"], ) # ----------------------------------------------------------------------------- diff --git a/tensorflow/compiler/tests/fused_batchnorm_test.py b/tensorflow/compiler/tests/fused_batchnorm_test.py index 936fcf8b6b..a773b5a947 100644 --- a/tensorflow/compiler/tests/fused_batchnorm_test.py +++ b/tensorflow/compiler/tests/fused_batchnorm_test.py @@ -36,7 +36,7 @@ class FusedBatchNormTest(XLATestCase): x_square = x * x x_square_sum = np.sum(x_square, (0, 1, 2)) x_sum = np.sum(x, axis=(0, 1, 2)) - element_count = np.size(x) / int(np.shape(x)[0]) + element_count = np.size(x) / int(np.shape(x)[-1]) mean = x_sum / element_count var = x_square_sum / element_count - mean * mean normalized = (x - mean) / np.sqrt(var + epsilon) @@ -64,8 +64,9 @@ class FusedBatchNormTest(XLATestCase): return grad_x, grad_scale, grad_offset def testInference(self): - x_shape = [2, 2, 6, 2] - scale_shape = [2] + channel = 3 + x_shape = [2, 2, 6, channel] + scale_shape = [channel] x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) @@ -74,8 +75,8 @@ class FusedBatchNormTest(XLATestCase): with self.test_session() as sess, self.test_scope(): # To avoid constant folding t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x") - scale = array_ops.placeholder(np.float32, shape=[2], name="scale") - offset = array_ops.placeholder(np.float32, shape=[2], name="offset") + scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale") + offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset") epsilon = 0.001 y_ref, mean_ref, var_ref = self._reference_training( x_val, scale_val, offset_val, epsilon, data_format) @@ -97,8 +98,9 @@ class FusedBatchNormTest(XLATestCase): self.assertAllClose(y_val, y_ref, atol=1e-3) def _testLearning(self, use_gradient_checker): - x_shape = [2, 2, 6, 2] - scale_shape = [2] + channel = 3 + x_shape = [2, 2, 6, channel] + scale_shape = [channel] x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) @@ -109,8 +111,8 @@ class FusedBatchNormTest(XLATestCase): with self.test_session() as sess, self.test_scope(): # To avoid constant folding t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x") - scale = array_ops.placeholder(np.float32, shape=[2], name="scale") - offset = array_ops.placeholder(np.float32, shape=[2], name="offset") + scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale") + offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset") epsilon = 0.001 y, mean, var = nn.fused_batch_norm( t_val, @@ -154,8 +156,9 @@ class FusedBatchNormTest(XLATestCase): def testGradient(self): # TODO(b/64270657): Use gradient_checker here in addition to comparing with # this reference implementation. - x_shape = [2, 2, 6, 2] - scale_shape = [2] + channel = 3 + x_shape = [2, 2, 6, channel] + scale_shape = [channel] grad_val = np.random.random_sample(x_shape).astype(np.float32) x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) diff --git a/tensorflow/compiler/tf2xla/type_util.cc b/tensorflow/compiler/tf2xla/type_util.cc index c969212a1b..1efbe0ffb1 100644 --- a/tensorflow/compiler/tf2xla/type_util.cc +++ b/tensorflow/compiler/tf2xla/type_util.cc @@ -49,9 +49,6 @@ Status DataTypeToPrimitiveType(DataType data_type, xla::PrimitiveType* type) { case tensorflow::DT_UINT64: *type = xla::U64; return Status::OK(); - case tensorflow::DT_BFLOAT16: - *type = xla::BF16; - return Status::OK(); case tensorflow::DT_HALF: *type = xla::F16; return Status::OK(); diff --git a/tensorflow/compiler/tf2xla/xla_compiler.h b/tensorflow/compiler/tf2xla/xla_compiler.h index ac7d4cfb12..4d40ca5825 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.h +++ b/tensorflow/compiler/tf2xla/xla_compiler.h @@ -236,6 +236,12 @@ class XlaCompiler { // to the computation. bool allow_cpu_custom_calls = false; + // If 'local_executable_has_hybrid_result', the top-level pointers of the + // result tuple of compiled programs are stored in host memory and the + // nested buffers in device memory, otherwise the whole result tuple is + // stored in device memory. + bool local_executable_has_hybrid_result = false; + // If not nullptr, populate_resource_manager is called with the // compilation device's resource manager when the compilation // device is created, and can be used to create metadata objects diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD index 515b572b0e..660f419e46 100644 --- a/tensorflow/compiler/xla/BUILD +++ b/tensorflow/compiler/xla/BUILD @@ -77,7 +77,6 @@ cc_library( hdrs = ["types.h"], visibility = [":friends"], deps = [ - "//tensorflow/core:framework_lite", "//tensorflow/core:lib", "//third_party/eigen3", ], @@ -340,7 +339,6 @@ cc_library( name = "array", hdrs = ["array.h"], deps = [ - ":status", ":types", "//tensorflow/core:lib", ], diff --git a/tensorflow/compiler/xla/array.h b/tensorflow/compiler/xla/array.h index 213e0bac6c..ba898d1f4e 100644 --- a/tensorflow/compiler/xla/array.h +++ b/tensorflow/compiler/xla/array.h @@ -23,10 +23,8 @@ limitations under the License. #include #include #include -#include #include -#include "tensorflow/compiler/xla/status.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/lib/core/bits.h" #include "tensorflow/core/lib/strings/str_util.h" @@ -37,63 +35,10 @@ limitations under the License. namespace xla { -namespace array_impl { - -// conjunction -// -// Performs a compile-time logical AND operation on the passed types (which -// must have `::value` members convertible to `bool`. Short-circuits if it -// encounters any `false` members (and does not compare the `::value` members -// of any remaining arguments). -// -// This metafunction is designed to be a drop-in replacement for the C++17 -// `std::conjunction` metafunction. -template -struct conjunction; - -template -struct conjunction - : std::conditional, T>::type {}; - -template <> -struct conjunction<> : std::true_type {}; - -// A type trait that is valid when all elements in a parameter pack are of -// integral type. -template -using pack_is_integral = conjunction...>; - -// Compares three same-sized vectors elementwise. For each item in `values`, -// returns false if any of values[i] is outside the half-open range [starts[i], -// ends[i]). -template -bool all_inside_range(const C1& values, const C2& range_starts, - const C3& range_ends) { - for (size_t i = 0, e = values.size(); i < e; ++i) { - if (values[i] < range_starts[i] || values[i] >= range_ends[i]) { - return false; - } - } - return true; -} - -} // namespace array_impl - // General N dimensional array class with arbitrary value type. template class Array { public: - // Type inference can have a hard time parsing very deep initializer list - // nests, especially if one or more dimensions is one as the compiler just - // sees a single-element integer initializer. These typedefs allow casting - // explicitly with less typing. - using InitializerList1D = std::initializer_list; - using InitializerList2D = std::initializer_list; - using InitializerList3D = std::initializer_list; - using InitializerList4D = std::initializer_list; - - using value_type = T; - // Creates a new array with the specified dimensions. explicit Array(tensorflow::gtl::ArraySlice sizes) : Array(sizes, T()) {} @@ -108,7 +53,7 @@ class Array { // Creates a 2D array from the given nested initializer list. The outer // initializer list is the first dimension, the inner is the second dimension. // For example, {{1, 2, 3}, {4, 5, 6}} results in an array with n1=2 and n2=3. - Array(InitializerList2D values) + Array(std::initializer_list> values) : Array(ToInt64Vector({values.size(), values.begin()->size()})) { int64 idx = 0; for (const auto& it1 : values) { @@ -122,7 +67,8 @@ class Array { // Creates a 3D array from the given nested initializer list. The outer // initializer list is the first dimension, and so on. - Array(InitializerList3D values) + Array(std::initializer_list>> + values) : Array(ToInt64Vector({values.size(), values.begin()->size(), values.begin()->begin()->size()})) { int64 idx = 0; @@ -139,7 +85,9 @@ class Array { // Creates a 4D array from the given nested initializer list. The outer // initializer list is the first dimension, and so on. - Array(InitializerList4D values) + Array(std::initializer_list< + std::initializer_list>>> + values) : Array(ToInt64Vector({values.size(), values.begin()->size(), values.begin()->begin()->size(), values.begin()->begin()->begin()->size()})) { @@ -225,46 +173,10 @@ class Array { } } - // Invokes a callback with the (indices, value_ptr) for each cell in the - // array. If a callback returns a non-OK status, returns that else returns - // Status::OK(). - Status EachStatus( - std::function, T*)> f) { - std::vector index(sizes_.size()); - for (int64 i = 0; i < num_elements(); ++i, next_index(&index)) { - Status s = f(index, &values_[i]); - if (!s.ok()) { - return s; - } - } - return Status::OK(); - } - - // Invokes a callback with the (indices, value) for each cell in the array. - // If a callback returns a non-OK status, returns that else returns - // Status::OK(). - Status EachStatus( - std::function, T)> f) const { - std::vector index(sizes_.size()); - for (int64 i = 0; i < num_elements(); ++i, next_index(&index)) { - Status s = f(index, values_[i]); - if (!s.ok()) { - return s; - } - } - return Status::OK(); - } - // Returns the value at the cell specified by the indexes. The number of // arguments have to match with the number of dimensions for the array. - // - // The type trait is required to avoid this overload participating too - // eagerly; a parameter pack can take zero or more elements, so we must - // restrict this to only parameter packs that are all of integral type. template - typename std::enable_if::value, - const T&>::type - operator()(Dims... dims) const { + const T& operator()(Dims... dims) const { // We are using a std::array to avoid having to allocate memory in this // function for performance reasons. std::array indexes{{static_cast(dims)...}}; @@ -274,9 +186,7 @@ class Array { // Returns the value at the cell specified by the indexes. The number of // arguments have to match with the number of dimensions for the array. template - typename std::enable_if::value, - T&>::type - operator()(Dims... dims) { + T& operator()(Dims... dims) { // We are using a std::array to avoid having to allocate memory in this // function for performance reasons. std::array indexes{{static_cast(dims)...}}; @@ -345,59 +255,6 @@ class Array { bool operator!=(const Array& other) const { return !(*this == other); } - // Performs the equivalent of a slice operation on this array. - Array Slice(tensorflow::gtl::ArraySlice starts, - tensorflow::gtl::ArraySlice limits) const { - CHECK_EQ(starts.size(), num_dimensions()); - CHECK_EQ(limits.size(), num_dimensions()); - - std::vector sizes; - std::transform(starts.begin(), starts.end(), limits.begin(), - std::back_inserter(sizes), - [](int64 start, int64 limit) { return limit - start; }); - Array result(sizes); - - std::vector index(sizes_.size()); - int64 slice_i = 0; - for (int64 i = 0; i < num_elements(); ++i, next_index(&index)) { - if (array_impl::all_inside_range(index, starts, limits)) { - // Even though the bounds of result are different to our bounds, we're - // iterating in the same order. So we can simply write successive linear - // indices instead of recalculating a multi-dimensional index. - result.values_[slice_i++] = values_[i]; - } - } - return result; - } - - // Performs the equivalent of a DynamicUpdateSlice in-place on this array. - void UpdateSlice(const Array& from, - tensorflow::gtl::ArraySlice start_indices) { - CHECK_EQ(from.num_dimensions(), num_dimensions()); - std::vector limit_indices; - std::transform(start_indices.begin(), start_indices.end(), - from.dimensions().begin(), std::back_inserter(limit_indices), - std::plus{}); - std::vector index(sizes_.size()); - int64 from_i = 0; - for (int64 i = 0; i < num_elements(); ++i, next_index(&index)) { - if (array_impl::all_inside_range(index, start_indices, limit_indices)) { - // Even though the bounds of from are different to our bounds, we're - // iterating in the same order. So we can simply write successive linear - // indices instead of recalculating a multi-dimensional index. - values_[i] = from.values_[from_i++]; - } - } - } - - // Performs an in-place reshape, modifying the dimensions but not the - // underlying data. - void Reshape(tensorflow::gtl::ArraySlice new_dimensions) { - int64 old_num_elements = num_elements(); - sizes_ = std::vector(new_dimensions.begin(), new_dimensions.end()); - CHECK_EQ(num_elements(), old_num_elements); - } - // Returns a string representation of the array suitable for debugging. string ToString() const { std::vector pieces; diff --git a/tensorflow/compiler/xla/array_test.cc b/tensorflow/compiler/xla/array_test.cc index 8b94194774..093784f541 100644 --- a/tensorflow/compiler/xla/array_test.cc +++ b/tensorflow/compiler/xla/array_test.cc @@ -71,19 +71,6 @@ TEST(ArrayTest, IndexingReadWrite) { EXPECT_EQ(arr(1, 2), 61); } -TEST(ArrayTest, DynamicIndexingReadWrite) { - Array arr({2, 3}); - - std::vector index1 = {1, 1}; - std::vector index2 = {1, 2}; - EXPECT_EQ(arr(index1), 0); - EXPECT_EQ(arr(index2), 0); - arr(index1) = 51; - arr(index2) = 61; - EXPECT_EQ(arr(1, 1), 51); - EXPECT_EQ(arr(1, 2), 61); -} - TEST(ArrayTest, IndexingReadWriteBool) { Array arr{{false, true, false}, {false, true, false}}; @@ -154,37 +141,5 @@ TEST(ArrayTest, Each) { EXPECT_EQ(arr.num_elements() * (arr.num_elements() - 1) / 2, each_sum); } -TEST(ArrayTest, Slice) { - Array arr({2, 4}); - arr.FillWithMultiples(1); - - Array identity_slice = arr.Slice({0, 0}, {2, 4}); - EXPECT_EQ(identity_slice.dimensions(), arr.dimensions()); - for (auto it1 = arr.begin(), it2 = identity_slice.begin(), e = arr.end(); - it1 != e; ++it1, ++it2) { - EXPECT_EQ(*it1, *it2); - } - - Array sub_slice = arr.Slice({1, 0}, {2, 2}); - EXPECT_EQ(sub_slice.dimensions(), (std::vector{1, 2})); - const string expected = R"([[4, 5]])"; - EXPECT_EQ(expected, sub_slice.ToString()); -} - -TEST(ArrayTest, UpdateSlice) { - Array arr({3, 4}); - arr.FillWithMultiples(1); - - Array sub_arr({2, 2}); - sub_arr.FillWithMultiples(3); - - arr.UpdateSlice(sub_arr, {1, 1}); - - const string expected = R"([[0, 1, 2, 3], - [4, 0, 3, 7], - [8, 6, 9, 11]])"; - EXPECT_EQ(expected, arr.ToString()); -} - } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/client/client.cc b/tensorflow/compiler/xla/client/client.cc index 66937d64af..92cd8e729d 100644 --- a/tensorflow/compiler/xla/client/client.cc +++ b/tensorflow/compiler/xla/client/client.cc @@ -142,7 +142,8 @@ StatusOr> Client::TransferFromOutfeed( "TransferToClient request"); } - return MakeUnique(response.literal()); + Literal literal(response.literal()); + return MakeUnique(literal); } Status Client::ResetDevice() { diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index 4c6e320557..8e1b4be1f3 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -68,7 +68,6 @@ class ShardingBuilder { const TileAssignment& tile_assignment) { OpSharding result; result.set_type(OpSharding::Type::OpSharding_Type_OTHER); - *result.mutable_tile_shape() = tile_shape; for (int64 dim : tile_assignment.dimensions()) { result.add_tile_assignment_dimensions(dim); } diff --git a/tensorflow/compiler/xla/client/lib/BUILD b/tensorflow/compiler/xla/client/lib/BUILD index fca2bf2688..ee34682087 100644 --- a/tensorflow/compiler/xla/client/lib/BUILD +++ b/tensorflow/compiler/xla/client/lib/BUILD @@ -44,7 +44,6 @@ cc_library( "//tensorflow/compiler/xla/client:computation", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", - "//tensorflow/compiler/xla/tests:test_utils", "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xla/client/lib/testing.cc b/tensorflow/compiler/xla/client/lib/testing.cc index d936bd870b..e6645e4941 100644 --- a/tensorflow/compiler/xla/client/lib/testing.cc +++ b/tensorflow/compiler/xla/client/lib/testing.cc @@ -21,7 +21,6 @@ limitations under the License. #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/statusor.h" -#include "tensorflow/compiler/xla/tests/test_utils.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/strings/strcat.h" @@ -49,6 +48,62 @@ std::unique_ptr MakeFakeDataViaDeviceOrDie(const Shape& shape, } // namespace +StatusOr> MakeFakeLiteral(const Shape& shape) { + if (ShapeUtil::IsTuple(shape)) { + std::vector> elements; + for (const Shape& element_shape : shape.tuple_shapes()) { + TF_ASSIGN_OR_RETURN(std::unique_ptr element, + MakeFakeLiteral(element_shape)); + elements.push_back(std::move(element)); + } + return Literal::MakeTupleOwned(std::move(elements)); + } + std::unique_ptr literal = Literal::CreateFromShape(shape); + std::minstd_rand0 engine; + switch (shape.element_type()) { + case F32: { + std::uniform_real_distribution generator(0.0f, 1.0f); + TF_CHECK_OK(literal->Populate( + [&](tensorflow::gtl::ArraySlice /*indices*/) { + return generator(engine); + })); + break; + } + case S32: { + std::uniform_int_distribution generator( + std::numeric_limits::lowest(), + std::numeric_limits::max()); + TF_CHECK_OK(literal->Populate( + [&](tensorflow::gtl::ArraySlice /*indices*/) { + return generator(engine); + })); + break; + } + case S64: { + std::uniform_int_distribution generator( + std::numeric_limits::lowest(), + std::numeric_limits::max()); + TF_CHECK_OK(literal->Populate( + [&](tensorflow::gtl::ArraySlice /*indices*/) { + return generator(engine); + })); + break; + } + case PRED: { + std::uniform_int_distribution generator(0, 1); + TF_CHECK_OK(literal->Populate( + [&](tensorflow::gtl::ArraySlice /*indices*/) { + return generator(engine); + })); + break; + } + default: + return Unimplemented("Unsupported type for fake literal generation: %s", + ShapeUtil::HumanString(shape).c_str()); + } + return std::move(literal); +} + std::unique_ptr MakeFakeDataOrDie(const Shape& shape, Client* client) { if (ShapeUtil::ByteSizeOf(shape) < (1LL << 30)) { diff --git a/tensorflow/compiler/xla/client/lib/testing.h b/tensorflow/compiler/xla/client/lib/testing.h index 7e640d1307..b5c4393dcc 100644 --- a/tensorflow/compiler/xla/client/lib/testing.h +++ b/tensorflow/compiler/xla/client/lib/testing.h @@ -26,6 +26,10 @@ limitations under the License. namespace xla { +// Generates fake data in a literal of the given shape, or returns an error +// status if the element type is currently unhandled for fake data generation. +StatusOr> MakeFakeLiteral(const Shape& shape); + // Generates fake data of the given shape on the device or dies. The fake data // is created by performing a computation on the device rather than transferring // data from the host to the device. diff --git a/tensorflow/compiler/xla/client/local_client.cc b/tensorflow/compiler/xla/client/local_client.cc index b50425a09c..15c744ecd3 100644 --- a/tensorflow/compiler/xla/client/local_client.cc +++ b/tensorflow/compiler/xla/client/local_client.cc @@ -27,6 +27,16 @@ namespace se = ::perftools::gputools; namespace xla { +ExecutableBuildOptions& ExecutableBuildOptions::set_platform( + perftools::gputools::Platform* platform) { + platform_ = platform; + return *this; +} + +perftools::gputools::Platform* ExecutableBuildOptions::platform() const { + return platform_; +} + ExecutableBuildOptions& ExecutableBuildOptions::set_device_ordinal( int device_ordinal) { device_ordinal_ = device_ordinal; @@ -46,6 +56,16 @@ const Shape* ExecutableBuildOptions::result_layout() const { return result_layout_set_ ? &result_layout_ : nullptr; } +ExecutableBuildOptions& ExecutableBuildOptions::set_has_hybrid_result( + bool has_hybrid_result) { + has_hybrid_result_ = has_hybrid_result; + return *this; +} + +bool ExecutableBuildOptions::has_hybrid_result() const { + return has_hybrid_result_; +} + namespace { StatusOr BorrowStreamForDevice(int device_ordinal, Backend* backend) { diff --git a/tensorflow/compiler/xla/client/local_client.h b/tensorflow/compiler/xla/client/local_client.h index e9eeaa0aa2..9f985ed527 100644 --- a/tensorflow/compiler/xla/client/local_client.h +++ b/tensorflow/compiler/xla/client/local_client.h @@ -37,6 +37,14 @@ namespace xla { // LocalClient::Compile. class ExecutableBuildOptions { public: + // If set, this is the platform to build the computation for. This must match + // the underlying platform of the service. A value of nullptr indicates the + // option has not been set. + // + // TODO(b/28616830): Support multiple platforms. + ExecutableBuildOptions& set_platform(perftools::gputools::Platform* platform); + perftools::gputools::Platform* platform() const; + // If set, this is the device to build the computation for. Valid // device_ordinal values are: 0 to # of devices - 1. These values are // identical to the device ordinal values used by StreamExecutor. The built @@ -53,10 +61,18 @@ class ExecutableBuildOptions { ExecutableBuildOptions& set_result_layout(const Shape& shape_with_layout); const Shape* result_layout() const; + // If set, the executable will be built to output a hybrid + // ShapedBuffer with top-level tuple pointers in host memory and + // result buffers in device memory. + ExecutableBuildOptions& set_has_hybrid_result(bool has_hybrid_result); + bool has_hybrid_result() const; + private: + perftools::gputools::Platform* platform_ = nullptr; int device_ordinal_ = -1; Shape result_layout_; bool result_layout_set_ = false; + bool has_hybrid_result_ = true; }; class LocalExecutable { diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index 93d3cd425f..fda791401d 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -33,20 +33,6 @@ limitations under the License. #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" -namespace { -using tensorflow::int64; - -constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; - -// Converts between little and big endian, assuming elements in the array are 16 -// bits long. -void ConvertEndianShort(char* bytes, int64 size) { - CHECK_EQ(size / 2, 0); - for (int64 i = 0; i < size; i += 2) { - std::swap(bytes[i], bytes[i + 1]); - } -} -} // namespace namespace xla { @@ -183,8 +169,6 @@ Status Literal::Copy(const Literal& src_literal, return CopyRange(src_literal, src_base, dest_base, copy_size); case F16: return CopyRange(src_literal, src_base, dest_base, copy_size); - case BF16: - return CopyRange(src_literal, src_base, dest_base, copy_size); case F32: return CopyRange(src_literal, src_base, dest_base, copy_size); case F64: @@ -216,8 +200,6 @@ Status Literal::Copy(const Literal& src_literal, return *Literal::CreateR0(0); case F16: return *Literal::CreateR0(static_cast(0.0f)); - case BF16: - return *Literal::CreateR0(static_cast(0.0f)); case F32: return *Literal::CreateR0(0); case F64: @@ -303,9 +285,6 @@ Status Literal::Copy(const Literal& src_literal, case F16: return *Literal::CreateR0( static_cast(-std::numeric_limits::infinity())); - case BF16: - return *Literal::CreateR0( - static_cast(-std::numeric_limits::infinity())); case TUPLE: LOG(FATAL) << "tuple element type has no minimum value"; case OPAQUE: @@ -342,9 +321,6 @@ Status Literal::Copy(const Literal& src_literal, case F16: return *Literal::CreateR0( static_cast(std::numeric_limits::infinity())); - case BF16: - return *Literal::CreateR0( - static_cast(std::numeric_limits::infinity())); case TUPLE: LOG(FATAL) << "tuple element type has no maximum value"; case OPAQUE: @@ -452,7 +428,6 @@ std::unique_ptr Literal::Transpose( // The shape with affine layout resulting from that operation will be // F32[8,11]{0,1}, since it leaves the original most minor (the 8 sized), the // most minor. - // // Essentially, given MinMaj(Di) the position of the Di dimension within the // minor to major vector, and given T(Di) the index that the original Di // dimension has within the transposed array, a layout is affine if @@ -561,9 +536,6 @@ string Literal::GetAsString( } case F16: return tensorflow::strings::StrCat(Get(multi_index)); - case BF16: - return tensorflow::strings::StrCat( - static_cast(Get(multi_index))); default: return tensorflow::strings::StrCat( "[", PrimitiveType_Name(shape().element_type()), "]"); @@ -597,17 +569,9 @@ int64 Literal::LinearIndex( return IndexUtil::MultidimensionalIndexToLinearIndex(shape(), multi_index); } -string Literal::ToString(bool print_layout) const { +string Literal::ToString() const { std::vector pieces; - auto shape_to_string = [print_layout](const Shape& shape) { - if (print_layout) { - return ShapeUtil::HumanStringWithLayout(shape); - } else { - return ShapeUtil::HumanString(shape); - } - }; - auto element_to_string = [this](tensorflow::gtl::ArraySlice indices) -> string { PrimitiveType element_type = shape().element_type(); @@ -621,7 +585,7 @@ string Literal::ToString(bool print_layout) const { // TODO(b/32894291): refactor this code to reduce code duplication. if (ShapeUtil::IsTuple(shape())) { - pieces.push_back(shape_to_string(shape())); + pieces.push_back(ShapeUtil::HumanString(shape())); pieces.push_back(" (\n"); pieces.push_back(tensorflow::str_util::Join( tuple_literals(), ",\n", [](string* out, const Literal& element) { @@ -637,7 +601,7 @@ string Literal::ToString(bool print_layout) const { } pieces.push_back("}"); } else if (ShapeUtil::Rank(shape()) == 2) { - pieces.push_back(shape_to_string(shape())); + pieces.push_back(ShapeUtil::HumanString(shape())); pieces.push_back(" {\n"); for (int64 i0 = 0; i0 < shape().dimensions(0); ++i0) { pieces.push_back(" { "); @@ -649,7 +613,7 @@ string Literal::ToString(bool print_layout) const { } pieces.push_back("}"); } else if (ShapeUtil::Rank(shape()) == 3) { - pieces.push_back(shape_to_string(shape())); + pieces.push_back(ShapeUtil::HumanString(shape())); pieces.push_back(" {\n"); for (int64 i0 = 0; i0 < shape().dimensions(0); ++i0) { pieces.push_back(i0 > 0 ? ",\n{" : "{"); @@ -664,7 +628,7 @@ string Literal::ToString(bool print_layout) const { } pieces.push_back("\n}"); } else if (ShapeUtil::Rank(shape()) == 4) { - pieces.push_back(shape_to_string(shape())); + pieces.push_back(ShapeUtil::HumanString(shape())); pieces.push_back(" {\n"); for (int64 i0 = 0; i0 < shape().dimensions(0); ++i0) { pieces.push_back(tensorflow::strings::Printf(" { /*i0=%lld*/\n", i0)); @@ -685,7 +649,7 @@ string Literal::ToString(bool print_layout) const { } pieces.push_back("}"); } else if (ShapeUtil::Rank(shape()) == 5) { - pieces.push_back(shape_to_string(shape())); + pieces.push_back(ShapeUtil::HumanString(shape())); pieces.push_back(" {\n"); for (int64 i0 = 0; i0 < shape().dimensions(0); ++i0) { pieces.push_back(tensorflow::strings::Printf(" { /*i0=%lld*/\n", i0)); @@ -712,7 +676,7 @@ string Literal::ToString(bool print_layout) const { } pieces.push_back("}"); } else { - pieces.push_back(shape_to_string(shape())); + pieces.push_back(ShapeUtil::HumanString(shape())); pieces.push_back(" {...}"); } @@ -771,8 +735,6 @@ void* Literal::MutableInternalData() { return reinterpret_cast(c64s_.data()); case F16: return reinterpret_cast(f16s_.data()); - case BF16: - return reinterpret_cast(bf16s_.data()); default: LOG(FATAL) << "primitive type not supported in literals: " << PrimitiveType_Name(shape().element_type()); @@ -815,9 +777,6 @@ void Literal::Reserve(int64 num_elements) { case F16: Resize(num_elements, static_cast(0.0f)); break; - case BF16: - Resize(num_elements, static_cast(0.0f)); - break; default: LOG(FATAL) << "primitive type not supported in literals: " << PrimitiveType_Name(shape().element_type()); @@ -857,9 +816,6 @@ tensorflow::Status Literal::ValidateLiteral() const { case F16: actual = f16s().size() / sizeof(half); break; - case BF16: - actual = bf16s().size(); - break; default: return tensorflow::errors::Unimplemented( "unhandled element type for literal validation: " + @@ -956,7 +912,6 @@ StatusOr> ConvertIfDestTypeMatches( CONVERT_IF_TYPES_MATCH(F16) CONVERT_IF_TYPES_MATCH(F32) CONVERT_IF_TYPES_MATCH(F64) - CONVERT_IF_TYPES_MATCH(BF16) #undef CONVERT_IF_TYPES_MATCH case C64: return ConvertToC64(src_literal); @@ -986,9 +941,8 @@ StatusOr> Literal::Convert( CONVERT_IF_DEST_TYPE_MATCHES(F16) CONVERT_IF_DEST_TYPE_MATCHES(F32) CONVERT_IF_DEST_TYPE_MATCHES(F64) - CONVERT_IF_DEST_TYPE_MATCHES(BF16) #undef CONVERT_IF_DEST_TYPE_MATCHES - // Other types are not yet supported. + // Other types are not yet supported. default: return InvalidArgument("Unimplemented: Convert from type %s to type %s", PrimitiveType_Name(shape().element_type()).c_str(), @@ -1057,8 +1011,6 @@ bool Literal::operator==(const Literal& other) const { return EqualElements(*this, other, 0, &multi_index); case F16: return EqualElements(*this, other, 0, &multi_index); - case BF16: - return EqualElements(*this, other, 0, &multi_index); case C64: return EqualElements(*this, other, 0, &multi_index); default: @@ -1168,18 +1120,13 @@ tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice() { template <> tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice() { + // TODO - there is an endianess problem here. fix it, or wait for uint16 + // support in protobuf auto values = mutable_f16s(); return tensorflow::gtl::MutableArraySlice(values->data(), values->size()); } -template <> -tensorflow::gtl::MutableArraySlice -Literal::GetMutableArraySlice() { - auto values = mutable_bf16s(); - return {values->data(), values->size()}; -} - template <> tensorflow::gtl::ArraySlice Literal::GetArraySlice() const { CHECK_EQ(shape().element_type(), PRED); @@ -1250,12 +1197,6 @@ tensorflow::gtl::ArraySlice Literal::GetArraySlice() const { f16s().size() / sizeof(half)); } -template <> -tensorflow::gtl::ArraySlice Literal::GetArraySlice() const { - CHECK_EQ(shape().element_type(), BF16); - return {bf16s().data(), bf16s().size()}; -} - template <> tensorflow::gtl::ArraySlice Literal::GetArraySlice() const { @@ -1304,9 +1245,6 @@ bool Literal::IsAll(int8 value) const { return AllElementsEqualValue(*this, value); case F16: return AllElementsEqualValue(*this, static_cast(value)); - case BF16: - return AllElementsEqualValue(*this, - static_cast(value)); case PRED: if (value == 0) { return AllElementsEqualValue(*this, false); @@ -1328,9 +1266,6 @@ bool Literal::IsAllFloat(float value) const { return AllElementsEqualValue(*this, value); case F16: return AllElementsEqualValue(*this, static_cast(value)); - case BF16: - return AllElementsEqualValue(*this, - static_cast(value)); default: return false; } @@ -1367,8 +1302,6 @@ bool Literal::IsZero(tensorflow::gtl::ArraySlice indices) const { return Get(indices) == complex64(0.0f, 0.0f); case F16: return Get(indices) == static_cast(0.0f); - case BF16: - return Get(indices) == static_cast(0.0f); case PRED: return Get(indices) == false; default: @@ -1436,12 +1369,6 @@ void Literal::Resize(int64 num_elements, half value) { mutable_f16s()->resize(num_elements, value); } -template <> -void Literal::Resize(int64 num_elements, bfloat16 value) { - CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements); - mutable_bf16s()->resize(num_elements, value); -} - template <> void Literal::Resize(int64 num_elements, complex64 value) { CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements); @@ -1490,19 +1417,6 @@ LiteralProto Literal::ToProto() const { *proto.mutable_f16s() = string(reinterpret_cast(f16s_.data()), f16s_.size() * sizeof(half)); - if (!kLittleEndian) { - ConvertEndianShort(const_cast(proto.mutable_f16s()->data()), - proto.f16s().size()); - } - break; - case BF16: - *proto.mutable_bf16s() = - string(reinterpret_cast(bf16s_.data()), - bf16s_.size() * sizeof(bfloat16)); - if (!kLittleEndian) { - ConvertEndianShort(const_cast(proto.mutable_bf16s()->data()), - proto.bf16s().size()); - } break; case F32: CopyToRepeatedField(proto.mutable_f32s(), f32s()); @@ -1571,21 +1485,6 @@ void Literal::CopyFromProto(const LiteralProto& literal_proto) { CHECK_EQ(0, s.size() % sizeof(half)); f16s_ = std::vector(s.size() / sizeof(half)); memcpy(f16s_.data(), s.data(), s.size()); - - if (!kLittleEndian) { - ConvertEndianShort(reinterpret_cast(f16s_.data()), s.size()); - } - break; - } - case BF16: { - const string& s(literal_proto.bf16s()); - CHECK_EQ(0, s.size() % sizeof(bfloat16)); - bf16s_ = std::vector(s.size() / sizeof(bfloat16)); - memcpy(bf16s_.data(), s.data(), s.size()); - - if (!kLittleEndian) { - ConvertEndianShort(reinterpret_cast(bf16s_.data()), s.size()); - } break; } case F32: diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h index f37e529caf..a1e288829f 100644 --- a/tensorflow/compiler/xla/literal_util.h +++ b/tensorflow/compiler/xla/literal_util.h @@ -163,11 +163,6 @@ class Literal { const std::vector& c64s() const { return c64s_; } std::vector* mutable_c64s() { return &c64s_; } - int bf16s_size() const { return bf16s().size(); } - bfloat16 bf16s(int i) const { return bf16s_[i]; } - const std::vector& bf16s() const { return bf16s_; } - std::vector* mutable_bf16s() { return &bf16s_; } - int tuple_literals_size() const { return tuple_literals().size(); } const Literal& tuple_literals(int i) const { return tuple_literals_[i]; } Literal* add_tuple_literals() { @@ -455,7 +450,7 @@ class Literal { tensorflow::Status ValidateLiteral() const; // Returns a string representation of the literal value. - string ToString(bool print_layout = false) const; + string ToString() const; // Invokes the "per cell" callback for each element in the provided // literal with the element's indices and a string representation of @@ -627,7 +622,6 @@ class Literal { std::vector u16s_; std::vector u32s_; std::vector u64s_; - std::vector bf16s_; std::vector f16s_; std::vector f32s_; std::vector f64s_; @@ -680,9 +674,6 @@ tensorflow::gtl::ArraySlice Literal::GetArraySlice() const; template <> tensorflow::gtl::ArraySlice Literal::GetArraySlice() const; -template <> -tensorflow::gtl::ArraySlice Literal::GetArraySlice() const; - template <> tensorflow::gtl::ArraySlice Literal::GetArraySlice() const; @@ -723,9 +714,6 @@ tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice(); template <> tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice(); -template <> -tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice(); - template <> tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice(); @@ -759,9 +747,6 @@ void Literal::Resize(int64 num_elements, double value); template <> void Literal::Resize(int64 num_elements, half value); -template <> -void Literal::Resize(int64 num_elements, bfloat16 value); - template <> void Literal::Resize(int64 num_elements, complex64 value); @@ -1005,14 +990,6 @@ inline half Literal::Get( return GetArraySlice()[linear_index]; } -template <> -inline bfloat16 Literal::Get( - tensorflow::gtl::ArraySlice multi_index) const { - CHECK(shape().element_type() == BF16); - int64 linear_index = LinearIndex(multi_index); - return GetArraySlice()[linear_index]; -} - template void Literal::Set(tensorflow::gtl::ArraySlice multi_index, NativeT value) { diff --git a/tensorflow/compiler/xla/literal_util_test.cc b/tensorflow/compiler/xla/literal_util_test.cc index 1e08101759..6d596da4ad 100644 --- a/tensorflow/compiler/xla/literal_util_test.cc +++ b/tensorflow/compiler/xla/literal_util_test.cc @@ -110,18 +110,6 @@ TEST_F(LiteralUtilTest, LiteralScalarToString) { auto c64_lit = Literal::CreateR0({3.14f, 2.78f}); ASSERT_EQ("(3.14, 2.78)", c64_lit->ToString()); - - auto bf16_lit = Literal::CreateR0(static_cast(0.5f)); - ASSERT_EQ("0.5", bf16_lit->ToString()); - - // 3.14 will be rounded to 3.125 in bfloat16 format (Round to nearest even). - auto bf16_lit_truncated = - Literal::CreateR0(static_cast(3.14f)); - ASSERT_EQ("3.140625", bf16_lit_truncated->ToString()); - - auto bf16_lit_truncated2 = - Literal::CreateR0(static_cast(9.001f)); - ASSERT_EQ("9", bf16_lit_truncated2->ToString()); } TEST_F(LiteralUtilTest, LiteralVectorToString) { @@ -409,18 +397,6 @@ TEST_F(LiteralUtilTest, IsAll) { EXPECT_FALSE(Literal::CreateR2({{h8}, {h9}})->IsAll(8)); EXPECT_FALSE(Literal::CreateR2({{h9}, {h8}})->IsAll(8)); - bfloat16 b8(8.0f); - bfloat16 b9(9.0f); - - EXPECT_TRUE(Literal::CreateR2({{b8}, {b8}})->IsAll(8)); - EXPECT_FALSE(Literal::CreateR2({{b8}, {b9}})->IsAll(8)); - EXPECT_FALSE(Literal::CreateR2({{b9}, {b8}})->IsAll(8)); - - // 9.001 will be truncated to 9.0 - bfloat16 b91(9.001f); - bfloat16 b90(9.00f); - EXPECT_TRUE(Literal::CreateR2({{b91}, {b90}})->IsAll(9.0)); - complex64 c8_9 = {8, 9}; EXPECT_FALSE(Literal::CreateR2({{c8_9}, {c8_9}})->IsAll(8)); @@ -715,30 +691,6 @@ TEST_F(LiteralUtilTest, PopulateR2C64) { EXPECT_EQ(output, *expected); } -TEST_F(LiteralUtilTest, PopulateWithValueR0BF16) { - Literal output; - bfloat16 h(0.25f); - output.PopulateWithValue(h, {}); - auto expected = Literal::CreateR0(h); - EXPECT_EQ(output, *expected); -} - -TEST_F(LiteralUtilTest, PopulateWithValueR1BF16) { - Literal output; - bfloat16 h(0.5f); - output.PopulateWithValue(h, {3}); - auto expected = Literal::CreateR1({h, h, h}); - EXPECT_EQ(output, *expected); -} - -TEST_F(LiteralUtilTest, PopulateWithValueR2BF16) { - Literal output; - bfloat16 h(2.0f); - output.PopulateWithValue(h, {2, 2}); - auto expected = Literal::CreateR2({{h, h}, {h, h}}); - EXPECT_EQ(output, *expected); -} - TEST_F(LiteralUtilTest, PopulateWithValueR0F32) { Literal output; output.PopulateWithValue(2.5f, {}); @@ -1023,14 +975,6 @@ TEST_F(LiteralUtilTest, ConvertIfTypesMatch) { {{half(26.0), half(0.0), half(28.0), half(0.0)}, {half(0.0), half(31.0), half(0.0), half(33.0)}}, }}, layout_r4_dim0major_); - auto bf16 = Literal::CreateR4WithLayout({{ - {{bfloat16(10.0), bfloat16(0.0), bfloat16(12.0), bfloat16(0.0)}, - {bfloat16(0.0), bfloat16(15.0), bfloat16(0.0), bfloat16(17.0)}}, - {{bfloat16(0.0), bfloat16(19.0), bfloat16(0.0), bfloat16(21.0)}, - {bfloat16(22.0), bfloat16(0.0), bfloat16(24.0), bfloat16(0.0)}}, - {{bfloat16(26.0), bfloat16(0.0), bfloat16(28.0), bfloat16(0.0)}, - {bfloat16(0.0), bfloat16(31.0), bfloat16(0.0), bfloat16(33.0)}}, - }}, layout_r4_dim0major_); auto f32 = Literal::CreateR4WithLayout({{ {{10.0f, 0.0f, 12.0f, 0.0f}, {0.0f, 15.0f, 0.0f, 17.0f}}, {{0.0f, 19.0f, 0.0f, 21.0f}, {22.0f, 0.0f, 24.0f, 0.0f}}, @@ -1064,12 +1008,6 @@ TEST_F(LiteralUtilTest, ConvertIfTypesMatch) { conv = s8->Convert(PRED).ConsumeValueOrDie(); EXPECT_EQ(*conv, *pred); - conv = bf16->Convert(S32).ConsumeValueOrDie(); - EXPECT_EQ(*conv, *s32); - - conv = bf16->Convert(F32).ConsumeValueOrDie(); - EXPECT_EQ(*conv, *f32); - conv = pred->Convert(S32).ConsumeValueOrDie(); EXPECT_EQ(*conv, *int32_pred); diff --git a/tensorflow/compiler/xla/primitive_util.cc b/tensorflow/compiler/xla/primitive_util.cc index 2bce56b7bd..2113b5e06f 100644 --- a/tensorflow/compiler/xla/primitive_util.cc +++ b/tensorflow/compiler/xla/primitive_util.cc @@ -78,11 +78,6 @@ PrimitiveType NativeToPrimitiveType() { return F64; } -template <> -PrimitiveType NativeToPrimitiveType() { - return BF16; -} - template <> PrimitiveType NativeToPrimitiveType() { return F16; @@ -94,7 +89,7 @@ PrimitiveType NativeToPrimitiveType() { } bool IsFloatingPointType(PrimitiveType type) { - return type == F16 || type == F32 || type == F64 || type == BF16; + return type == F16 || type == F32 || type == F64; } bool IsComplexType(PrimitiveType type) { return type == C64; } @@ -123,7 +118,6 @@ int BitWidth(PrimitiveType type) { case S16: case U16: case F16: - case BF16: return 16; case U32: diff --git a/tensorflow/compiler/xla/primitive_util.h b/tensorflow/compiler/xla/primitive_util.h index 19c6a13888..a49c8b86fc 100644 --- a/tensorflow/compiler/xla/primitive_util.h +++ b/tensorflow/compiler/xla/primitive_util.h @@ -77,8 +77,6 @@ template <> PrimitiveType NativeToPrimitiveType(); template <> PrimitiveType NativeToPrimitiveType(); -template <> -PrimitiveType NativeToPrimitiveType(); // Complex template <> @@ -169,11 +167,6 @@ struct PrimitiveTypeToNative { using type = half; }; -template <> -struct PrimitiveTypeToNative { - using type = bfloat16; -}; - // Complex template <> struct PrimitiveTypeToNative { diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 7cf24641b5..521fe411a4 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -90,8 +90,6 @@ cc_library( ":shape_inference", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", - "//tensorflow/compiler/xla:status", - "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", @@ -1780,6 +1778,7 @@ tf_cc_test( "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:test_utils", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:lib", ], ) @@ -1850,6 +1849,7 @@ tf_cc_test( "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:test_utils", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xla/service/backend.cc b/tensorflow/compiler/xla/service/backend.cc index 05f2d06278..9abe30e3f3 100644 --- a/tensorflow/compiler/xla/service/backend.cc +++ b/tensorflow/compiler/xla/service/backend.cc @@ -13,14 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#define EIGEN_USE_THREADS - #include "tensorflow/compiler/xla/service/backend.h" #include #include #include +#define EIGEN_USE_THREADS + #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/service/compiler.h" #include "tensorflow/compiler/xla/service/platform_util.h" diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index 3c5b360c8e..b422b22df9 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -497,19 +497,19 @@ Status GatherComputationsByAllocationType( std::vector* global_computations) { // Create a worklist of computations paired with whether the allocation must // be thread-local. - std::deque> worklist; + std::deque> worklist; worklist.push_back(std::make_pair(module->entry_computation(), /*is_thread_local*/ false)); // Sets for quickly checking membership. Computations are returned in vectors // for stable iteration. - FlatSet thread_local_set; - FlatSet global_set; + FlatSet thread_local_set; + FlatSet global_set; while (!worklist.empty()) { auto worklist_front = worklist.front(); worklist.pop_front(); - const HloComputation* computation = worklist_front.first; + HloComputation* computation = worklist_front.first; bool is_thread_local = worklist_front.second; bool in_thread_local_set = thread_local_set.count(computation) > 0; bool in_global_set = global_set.count(computation) > 0; @@ -653,7 +653,7 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation, } if (allow_input_output_aliasing_ && allocation->maybe_live_out()) { - const HloComputation* entry_computation = + HloComputation* entry_computation = assignment->module_->entry_computation(); for (auto param : entry_computation->parameter_instructions()) { for (auto& param_buffer : @@ -819,6 +819,17 @@ Status BufferAssigner::AssignBuffersForComputation( continue; } + if (instruction->opcode() == HloOpcode::kRecv) { + // Make sure that recv operations get a new unique allocation so that + // don't share their buffer with any other operations. + BufferAllocation* allocation = assignment->NewAllocation( + *buffer, buffer_size, is_thread_local, /*is_reusable=*/false); + allocation_indices.push_back(allocation->index()); + VLOG(3) << "New allocation #" << allocation->index() + << " for recv: " << *buffer; + continue; + } + if (ShapeUtil::IsTuple(buffer->shape())) { // TODO(b/34669761): Don't reuse tuple buffers because the GPU backend // assumes longer buffer liveness than indicated by the analysis. diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 4f6e69ebd4..6213baee2f 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -280,7 +280,6 @@ cc_library( srcs = ["dot_op_emitter.cc"], hdrs = ["dot_op_emitter.h"], deps = [ - ":cpu_options", ":cpu_runtime", ":ir_emission_utils", "//tensorflow/compiler/xla:shape_util", @@ -291,10 +290,8 @@ cc_library( "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_module_config", "//tensorflow/compiler/xla/service/llvm_ir:ir_array", - "//tensorflow/compiler/xla/service/llvm_ir:kernel_support_library", "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop", "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", - "//tensorflow/compiler/xla/service/llvm_ir:vector_support_library", "//tensorflow/core:lib", "@llvm//:core", ], @@ -720,7 +717,6 @@ cc_library( hdrs = ["cpu_options.h"], deps = [ "//tensorflow/compiler/xla/service:hlo_module_config", - "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xla/service/cpu/cpu_options.cc b/tensorflow/compiler/xla/service/cpu/cpu_options.cc index 09f028463a..dba140d112 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_options.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_options.cc @@ -15,14 +15,11 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/cpu_options.h" -#include "tensorflow/core/lib/strings/numbers.h" - namespace { const char* const kXlaParallelCpuOption = "xla_cpu_parallel"; const char* const kXlaOptimizeForSizeCpuOption = "xla_cpu_optimize_for_size"; const char* const kXlaDisableVectorizedReduce = "xla_disable_vectorized_reduce"; -const char* const kLlvmIrDotTilingFactor = "xla_llvm_dot_tiling_factor"; } // namespace @@ -48,19 +45,6 @@ bool VectorizedReduceDisabled(const HloModuleConfig& config) { return extra_options_map.count(kXlaOptimizeForSizeCpuOption) > 0; } -tensorflow::gtl::optional LlvmIrGemvTilingFactor( - const HloModuleConfig& config) { - const auto& extra_options_map = - config.debug_options().xla_backend_extra_options(); - auto it = extra_options_map.find(kLlvmIrDotTilingFactor); - int64 tiling_factor; - if (it != extra_options_map.end() && - tensorflow::strings::safe_strto64(it->second, &tiling_factor)) { - return tiling_factor; - } - return tensorflow::gtl::nullopt; -} - } // namespace options } // namespace cpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/cpu_options.h b/tensorflow/compiler/xla/service/cpu/cpu_options.h index 6ba0fd2453..5dc24ebc7b 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_options.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_options.h @@ -27,8 +27,6 @@ namespace options { bool CpuParallelBackendRequested(const HloModuleConfig& config); bool OptimizeForSizeRequested(const HloModuleConfig& config); bool VectorizedReduceDisabled(const HloModuleConfig& config); -tensorflow::gtl::optional LlvmIrGemvTilingFactor( - const HloModuleConfig& config); } // namespace options } // namespace cpu diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc index f385829cdf..f8e260dd90 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc @@ -12,13 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#define EIGEN_USE_THREADS + #include "tensorflow/compiler/xla/service/cpu/cpu_runtime.h" #include #include #include +#define EIGEN_USE_THREADS + #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/array2d.h" #include "tensorflow/compiler/xla/client/local_client.h" diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index 2a447a54b0..e57d49172b 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -25,9 +25,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/cpu_runtime.h" #include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/hlo_module.h" -#include "tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" -#include "tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/util.h" @@ -40,450 +38,6 @@ using llvm_ir::SetToFirstInsertPoint; namespace cpu { -namespace { -// Loads a tile of values from a 2D tensor. -class TileLoader { - public: - // Constructs a TileLoader that will load a tile consisting of - // `tile_size_along_major_dim` vectors from the matrix `matrix`, starting at - // `major_dim_offset` in the major dimension. The tile size along the minor - // dimension is the vector size, and that is implicitly determined by `vsl`. - TileLoader(VectorSupportLibrary* vsl, llvm::IRBuilder<>* ir_builder, - llvm::Value* matrix, int64 matrix_size_along_minor_dim, - llvm::Value* major_dim_offset, int64 tile_size_along_major_dim) - : vsl_(vsl) { - pointers_.reserve(tile_size_along_major_dim); - for (int64 i = 0; i < tile_size_along_major_dim; i++) { - llvm::Value* total_offset = ir_builder->CreateMul( - ir_builder->getInt64(matrix_size_along_minor_dim), - ir_builder->CreateAdd(ir_builder->getInt64(i), major_dim_offset)); - pointers_.push_back(vsl_->ComputeOffsetPointer(matrix, total_offset)); - } - } - - // Load a tile consisting of `tile_size_along_major_dim_` vectors starting at - // `major_dim_offset_` in the major dimension and `minor_dim_offset` in the - // minor dimension. - std::vector LoadTile(llvm::Value* minor_dim_offset) const { - std::vector result; - result.reserve(pointers_.size()); - for (const auto& pointer : pointers_) { - result.push_back(vsl_->LoadVector(pointer, minor_dim_offset)); - } - return result; - } - - private: - VectorSupportLibrary* vsl_; - std::vector pointers_; -}; - -// Computes a dot product between "[M,K]{0,1} lhs" with a [K,1] vector (the -// layout of the vector does not matter). This implementation uses a tiling -// scheme to improve performance. -// -// We logically separate the LHS matrix into four segments: -// -// +----------------------+---+ -// | | | -// | | | -// | A | B | -// | | | -// | | | -// | | | -// +----------------------+---+ -// | C | D | -// +----------------------+---+ -// -// where A is the largest submatrix of the LHS that can be evenly dividied into -// tiles. For each tile in A, assuming tile_rows_ == tile_cols_ == 4, we have: -// -// +---+---+---+---+ +--+--+--+--+ -// |M00|M10|M20|M30| |V0|V1|V2|V3| -// +---+---+---+---+ +--+--+--+--+ -// |M01|M11|M21|M31| and |V0|V1|V2|V3| -// +---+---+---+---+ +--+--+--+--+ -// |M02|M12|M22|M32| |V0|V1|V2|V3| -// +---+---+---+---+ +--+--+--+--+ -// |M03|M13|M23|M33| |V0|V1|V2|V3| -// +---+---+---+---+ +--+--+--+--+ -// -// (Legend: rows are horizontal and columns are vertical; and each column is one -// llvm::Value of a vector type) -// -// where: -// -// a. The left tile is from the column major left matrix. -// b. The right tile is an elementwise broadcast of a [V0, V1, V2, V3] -// vector loaded from the RHS vector. -// -// As we iterate through the column dimension, we compute the change to the -// result vector by an elementwise multiplication between the two tiles above -// followed by a reduction along the major dimension: -// -// +-----------------------------------+ -// | M00*V0 + M10*V1 + M20*V2 + M30*V3 | -// +-----------------------------------+ -// | M01*V0 + M11*V1 + M21*V2 + M31*V3 | -// Result[R:R+4] += +-----------------------------------+ -// | M02*V0 + M12*V1 + M22*V2 + M32*V3 | -// +-----------------------------------+ -// | M03*V0 + M13*V1 + M23*V2 + M33*V3 | -// +-----------------------------------+ -// -// Where R is the starting row for the tile. -// -// We have an inner epilogue loop to deal with the "C" submatrix and an outer -// epilogue loop to deal with the B,D submarix. -// -// TODO(sanjoy): We should investigate if using gather loads and scatter stores -// can be used here have the same inner loop for both column-major and row-major -// matrix-vector products. -class ColumnMajorMatrixVectorProductEmitter { - public: - ColumnMajorMatrixVectorProductEmitter(PrimitiveType scalar_type, - int64 tile_rows, int64 tile_cols, - int64 m, int64 k, llvm::Value* lhs, - llvm::Value* rhs, llvm::Value* result, - llvm::IRBuilder<>* ir_builder) - : scalar_type_(scalar_type), - tile_rows_(tile_rows), - tile_cols_(tile_cols), - m_(m), - k_(k), - lhs_(lhs), - rhs_(rhs), - result_(result), - ir_builder_(ir_builder), - ksl_(ir_builder_), - vsl_(scalar_type_, /*vector_size=*/tile_rows_, ir_builder_, "") { - CHECK(tile_rows_ > 0 && IsPowerOfTwo(static_cast(tile_rows_))); - } - - void Emit(); - - private: - void EmitOuterLoopBody(llvm::Value* column, int64 column_count, - bool is_first_column); - - TileLoader GetLhsTileLoader(llvm::Value* column_start, int64 column_count) { - return TileLoader(&vsl_, ir_builder_, /*matrix=*/lhs_, - /*matrix_size_along_minor_dim=*/m_, - /*major_dim_offset=*/column_start, - /*tile_size_along_major_dim=*/column_count); - } - - // Load a tile of values from the RHS. For the RHS a "tile" is a contiguous - // sequnce of `count` values, each one broadcasted to the vector width. - std::vector LoadRhsTile(llvm::Value* offset, int64 count) { - llvm::Value* base_pointer = vsl_.ComputeOffsetPointer(rhs_, offset); - std::vector result; - result.reserve(count); - for (int64 i = 0; i < count; i++) { - result.push_back(vsl_.LoadBroadcast(base_pointer, i)); - } - return result; - } - - void EmitInnerLoopTiled(TileLoader* lhs_tile_loader, - const std::vector& rhs_tile, - int64 columns, bool is_first_column); - - void EmitInnerLoopEpilogue(llvm::Value* current_tile_col, int64 columns, - bool is_first_tiled_column); - - PrimitiveType scalar_type_; - int64 tile_rows_; - int64 tile_cols_; - int64 m_; - int64 k_; - llvm::Value* lhs_; - llvm::Value* rhs_; - llvm::Value* result_; - llvm::IRBuilder<>* ir_builder_; - KernelSupportLibrary ksl_; - VectorSupportLibrary vsl_; -}; - -void ColumnMajorMatrixVectorProductEmitter::EmitOuterLoopBody( - llvm::Value* column, int64 column_count, bool is_first_column) { - TileLoader lhs_tile_loader = GetLhsTileLoader(/*column_start=*/column, - /*column_count=*/column_count); - - std::vector rhs_tile = - LoadRhsTile(column, /*count=*/column_count); - EmitInnerLoopTiled(&lhs_tile_loader, rhs_tile, - /*columns=*/column_count, is_first_column); - EmitInnerLoopEpilogue(column, /*columns=*/column_count, is_first_column); -} - -void ColumnMajorMatrixVectorProductEmitter::Emit() { - // See the comment on the class declaration for the algorithm used here. - int64 column_remainder = k_ % tile_cols_; - int64 column_limit = k_ - column_remainder; - - ksl_.For("dot.outer.tiled", - /*start=*/0, /*end=*/column_limit, /*step=*/tile_cols_, - [&](llvm::Value* column, bool is_first_column) { - EmitOuterLoopBody(column, tile_cols_, is_first_column); - }); - - if (column_remainder != 0) { - EmitOuterLoopBody(ir_builder_->getInt64(column_limit), column_remainder, - column_limit == 0); - } -} - -void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopTiled( - TileLoader* lhs_tile_loader, const std::vector& rhs_tile, - int64 columns, bool is_first_column) { - int64 row_limit = m_ - (m_ % tile_rows_); - - ksl_.For("dot.inner.tiled", /*start=*/0, /*end=*/row_limit, - /*step=*/tile_rows_, [&](llvm::Value* row) { - std::vector lhs_tile = - lhs_tile_loader->LoadTile(/*minor_dim_offset=*/row); - llvm::Value* accumulator = is_first_column - ? vsl_.GetZeroVector() - : vsl_.LoadVector(result_, row); - for (int i = 0; i < columns; i++) { - accumulator = vsl_.MulAdd(lhs_tile[i], rhs_tile[i], accumulator); - } - vsl_.StoreVector(accumulator, result_, row); - }); -} - -void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue( - llvm::Value* current_tile_col, int64 columns, bool is_first_tiled_column) { - int64 row_start = m_ - (m_ % tile_rows_); - if (row_start == m_) { - return; - } - - llvm::Value* columns_llvm = ir_builder_->getInt64(columns); - - // for (col = current_tile_col; col < (columns + current_tile_col); col++) - // for (row = row_start, row < m_; row++) { - // result[row] += lhs[row, col] * rhs[col] - // // Also take into account that if col is 0 then result[row] is not - // // initialized. - // } - - ksl_.For( - "dot.inner.epilg.outer", /*start=*/current_tile_col, - /*end=*/ir_builder_->CreateAdd(columns_llvm, current_tile_col), - /*step=*/1, /*peel_first_iteration=*/false, - [&](llvm::Value* col, llvm::Value* is_first_scalar_col) { - llvm::Value* rhs_element = vsl_.LoadScalar(rhs_, col); - llvm::Value* total_offset = - ir_builder_->CreateMul(col, ir_builder_->getInt64(m_)); - llvm::Value* lhs_base_pointer = - vsl_.ComputeOffsetPointer(lhs_, total_offset); - ksl_.For( - "dot.inner.epilg.inner", /*start=*/row_start, /*end=*/m_, - /*step=*/1, [&](llvm::Value* scalar_row) { - llvm::Value* product = vsl_.Mul( - vsl_.LoadScalar(lhs_base_pointer, scalar_row), rhs_element); - llvm::Value* setting_result_first_time = ir_builder_->CreateAnd( - is_first_scalar_col, - ir_builder_->getInt1(is_first_tiled_column)); - ksl_.If( - setting_result_first_time, - [&]() { vsl_.StoreScalar(product, result_, scalar_row); }, - [&]() { - vsl_.StoreScalar( - vsl_.Add(vsl_.LoadScalar(result_, scalar_row), product), - result_, scalar_row); - }); - }); - }); -} - -// Computes a dot product between "[M,K]{1,0} lhs" with a [K,1] vector (the -// layout of the vector does not matter). This implementation uses a tiling -// scheme to improve performance. -// -// We logically separate the LHS matrix into four segments: -// -// +----------------------+---+ -// | | | -// | | | -// | A | B | -// | | | -// | | | -// | | | -// +----------------------+---+ -// | C | D | -// +----------------------+---+ -// -// where A is the largest submatrix of the LHS that can be evenly dividied into -// tiles. For each tile in A, assuming tile_rows_ == tile_cols_ == 4, we have: -// -// +---+---+---+---+ -// |M00|M10|M20|M30| -// +---+---+---+---+ +--+--+--+--+ -// |M01|M11|M21|M31| and |V0|V1|V2|V3| -// +---+---+---+---+ +--+--+--+--+ -// |M02|M12|M22|M32| -// +---+---+---+---+ -// |M03|M13|M23|M33| -// +---+---+---+---+ -// -// (Legend: rows are horizontal and columns are vertical; and each row is one -// llvm::Value of a vector type) -// -// where: -// -// a. The left tile is loaded from the row major left matrix. -// b. The right vector is loaded from the RHS vector. -// -// We keep 4 vector accumulators accumulating the following four vector -// expressions as we iterate over the row dimension: -// -// +------+------+------+------+ -// |M0I*V0|M1I*V1|M2I*V2|M3I*V3| for I in [0,4) -// +------+------+------+------+ -// -// In the end we do a horizontal reduction over these 4 vector accumulators to -// get 4 values in the result vector. -// -// We have an inner epilogue loop to deal with the "B" sub-matrix and an outer -// epilogue loop to deal with the C,D submatrix. -class RowMajorMatrixVectorProductEmitter { - public: - RowMajorMatrixVectorProductEmitter(PrimitiveType scalar_type, int64 tile_rows, - int64 tile_cols, int64 m, int64 k, - llvm::Value* lhs, llvm::Value* rhs, - llvm::Value* result, - llvm::IRBuilder<>* ir_builder) - : scalar_type_(scalar_type), - tile_rows_(tile_rows), - tile_cols_(tile_cols), - m_(m), - k_(k), - lhs_(lhs), - rhs_(rhs), - result_(result), - ir_builder_(ir_builder), - ksl_(ir_builder_), - vsl_(scalar_type_, /*vector_size=*/tile_cols_, ir_builder_, "") { - CHECK(tile_cols_ > 0 && IsPowerOfTwo(static_cast(tile_cols_))); - } - - void Emit(); - - private: - TileLoader GetLhsTileLoader(llvm::Value* row_start, int64 row_count) { - return TileLoader(&vsl_, ir_builder_, /*matrix=*/lhs_, - /*matrix_size_along_minor_dim=*/k_, - /*major_dim_offset=*/row_start, - /*tile_size_along_major_dim=*/row_count); - } - - void EmitOuterLoopBody(llvm::Value* row, int64 row_count); - - void EmitInnerLoopTiled(TileLoader* lhs_tile_loader, int64 rows, - std::vector* vector_accumulators); - - void EmitInnerLoopEpilogue(llvm::Value* current_tile_row, int64 rows, - std::vector* scalar_accumulators); - - PrimitiveType scalar_type_; - int64 tile_rows_; - int64 tile_cols_; - int64 m_; - int64 k_; - llvm::Value* lhs_; - llvm::Value* rhs_; - llvm::Value* result_; - llvm::IRBuilder<>* ir_builder_; - KernelSupportLibrary ksl_; - VectorSupportLibrary vsl_; -}; - -void RowMajorMatrixVectorProductEmitter::EmitOuterLoopBody(llvm::Value* row, - int64 row_count) { - TileLoader lhs_tile_loader = GetLhsTileLoader(/*row_start=*/row, - /*row_count=*/row_count); - std::vector vector_accumulators; - std::vector scalar_accumulators; - for (int i = 0; i < row_count; i++) { - vector_accumulators.emplace_back(&vsl_, vsl_.GetZeroVector()); - scalar_accumulators.emplace_back(&vsl_, vsl_.GetZeroScalar()); - } - EmitInnerLoopTiled(&lhs_tile_loader, /*rows=*/row_count, - &vector_accumulators); - EmitInnerLoopEpilogue(/*current_tile_row=*/row, /*rows=*/row_count, - &scalar_accumulators); - - for (int i = 0; i < row_count; i++) { - llvm::Value* result_value = - vsl_.Add(vsl_.AddReduce(vector_accumulators[i].Get()), - scalar_accumulators[i].Get()); - llvm::Value* offset = ir_builder_->CreateAdd(ir_builder_->getInt64(i), row); - vsl_.StoreScalar(result_value, result_, offset); - } -} - -void RowMajorMatrixVectorProductEmitter::Emit() { - // See the comment on the class declaration for the algorithm used here. - int64 row_remainder = m_ % tile_rows_; - int64 row_limit = m_ - row_remainder; - - ksl_.For("dot.outer.tiled", - /*start=*/0, /*end=*/row_limit, /*step=*/tile_rows_, - [&](llvm::Value* row) { EmitOuterLoopBody(row, tile_rows_); }); - - if (row_remainder != 0) { - EmitOuterLoopBody(ir_builder_->getInt64(row_limit), row_remainder); - } -} - -void RowMajorMatrixVectorProductEmitter::EmitInnerLoopTiled( - TileLoader* lhs_tile_loader, int64 rows, - std::vector* vector_accumulators) { - int64 column_limit = k_ - (k_ % tile_cols_); - - ksl_.For("dot.inner.tiled", /*start=*/0, /*end=*/column_limit, - /*step=*/tile_cols_, [&](llvm::Value* col) { - std::vector lhs_tile = - lhs_tile_loader->LoadTile(/*minor_dim_offset=*/col); - llvm::Value* rhs_value = vsl_.LoadVector(rhs_, col); - for (int i = 0; i < rows; i++) { - llvm::Value* old_sum = (*vector_accumulators)[i].Get(); - (*vector_accumulators)[i].Set( - vsl_.Add(old_sum, vsl_.Mul(rhs_value, lhs_tile[i]))); - } - }); -} - -void RowMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue( - llvm::Value* current_tile_row, int64 rows, - std::vector* scalar_accumulators) { - int64 column_start = k_ - (k_ % tile_cols_); - if (column_start == k_) { - return; - } - - for (int r = 0; r < rows; r++) { - llvm::Value* total_offset = ir_builder_->CreateMul( - ir_builder_->CreateAdd(ir_builder_->getInt64(r), current_tile_row), - ir_builder_->getInt64(k_)); - llvm::Value* lhs_base_pointer = - vsl_.ComputeOffsetPointer(lhs_, total_offset); - ksl_.For("dot.inner.epilg.inner", /*start=*/column_start, /*end=*/k_, - /*step=*/1, [&](llvm::Value* scalar_col) { - llvm::Value* product = - vsl_.Mul(vsl_.LoadScalar(lhs_base_pointer, scalar_col), - vsl_.LoadScalar(rhs_, scalar_col)); - llvm::Value* old_value = (*scalar_accumulators)[r].Get(); - (*scalar_accumulators)[r].Set(vsl_.Add(old_value, product)); - }); - } -} - -} // namespace - DotOpEmitter::DotOpEmitter(const HloInstruction& dot, bool transpose_lhs, bool transpose_rhs, const llvm_ir::IrArray& target_array, @@ -518,93 +72,6 @@ DotOpEmitter::DotOpEmitter(const HloInstruction& dot, bool transpose_lhs, bool DotOpEmitter::ShapesAreLegalForRuntimeDot() const { return true; } -bool DotOpEmitter::EmitLlvmIrDotIfProfitable() { - if (dot_.shape().dimensions_size() != 2 || - ProfitableToImplementDotInUntiledLlvmIr(dot_) == - DotInLlvmIrProfitable::kYes) { - return false; - } - - if (!primitive_util::IsFloatingPointType(dot_.shape().element_type()) && - !primitive_util::IsIntegralType(dot_.shape().element_type())) { - return false; - } - - MatMultDims mat_mult_dims = GetMatMultDims(); - bool is_column_major_matrix_vector = false; - bool is_row_major_matrix_vector = false; - - int64 m, k; - bool swap_operands; - - if (mat_mult_dims.m == 1) { - bool rhs_effectively_row_major = - transpose_rhs_ ^ !mat_mult_dims.rhs_column_major; - if (rhs_effectively_row_major) { - k = mat_mult_dims.k; - m = mat_mult_dims.n; - is_column_major_matrix_vector = true; - swap_operands = true; - } else { - k = mat_mult_dims.k; - m = mat_mult_dims.n; - is_row_major_matrix_vector = true; - swap_operands = true; - } - } - - if (mat_mult_dims.n == 1) { - bool lhs_effectively_column_major = - transpose_lhs_ ^ mat_mult_dims.lhs_column_major; - if (lhs_effectively_column_major) { - m = mat_mult_dims.m; - k = mat_mult_dims.k; - is_column_major_matrix_vector = true; - swap_operands = false; - } else { - m = mat_mult_dims.m; - k = mat_mult_dims.k; - is_row_major_matrix_vector = true; - swap_operands = false; - } - } - - if (!is_column_major_matrix_vector && !is_row_major_matrix_vector) { - return false; - } - - int64 tiling_factor = GetGemvTilingFactor(); - CHECK_GT(tiling_factor, 0); - - if (is_column_major_matrix_vector) { - VLOG(2) << "Emitting column major matrix-vector multiply with m = " << m - << " and k = " << k; - ColumnMajorMatrixVectorProductEmitter emitter( - dot_.shape().element_type(), /*tile_rows=*/8, - /*tile_cols=*/tiling_factor, m, k, - swap_operands ? rhs_array_.GetBasePointer() - : lhs_array_.GetBasePointer(), - swap_operands ? lhs_array_.GetBasePointer() - : rhs_array_.GetBasePointer(), - target_array_.GetBasePointer(), ir_builder_); - emitter.Emit(); - } else { - VLOG(2) << "Emitting row major matrix-vector multiply with m = " << m - << " and k = " << k; - RowMajorMatrixVectorProductEmitter emitter( - dot_.shape().element_type(), /*tile_rows=*/tiling_factor, - /*tile_cols=*/8, m, k, - swap_operands ? rhs_array_.GetBasePointer() - : lhs_array_.GetBasePointer(), - swap_operands ? lhs_array_.GetBasePointer() - : rhs_array_.GetBasePointer(), - target_array_.GetBasePointer(), ir_builder_); - emitter.Emit(); - } - - return true; -} - tensorflow::Status DotOpEmitter::Emit() { // The dot operation performs a sum of products over dimension 0 of the left // hand side operand and dimension 1 of the right hand side operand. @@ -638,10 +105,6 @@ tensorflow::Status DotOpEmitter::Emit() { return EmitScalarDot(); } - if (EmitLlvmIrDotIfProfitable()) { - return Status::OK(); - } - if (PotentiallyImplementedAsEigenDot(dot_)) { return EmitCallToRuntime(); } @@ -877,17 +340,22 @@ tensorflow::Status DotOpEmitter::EmitCallToRuntime() { // // Effectively this involves swapping the 'lhs' with 'rhs' and 'm' with 'n'. - MatMultDims mat_mult_dims = GetMatMultDims(); + const Shape& lhs_shape = lhs_array_.GetShape(); + const Shape& rhs_shape = rhs_array_.GetShape(); - CHECK_EQ(mat_mult_dims.lhs_column_major, mat_mult_dims.rhs_column_major); + CHECK(LayoutUtil::Equal(lhs_shape.layout(), rhs_shape.layout())); + int64 m = lhs_shape.dimensions(transpose_lhs_ ? 1 : 0); + int64 k = lhs_shape.dimensions(transpose_lhs_ ? 0 : 1); + int64 n = rhs_shape.dimensions(transpose_rhs_ ? 0 : 1); const llvm_ir::IrArray* lhs = &lhs_array_; const llvm_ir::IrArray* rhs = &rhs_array_; bool transpose_lhs = transpose_lhs_; bool transpose_rhs = transpose_rhs_; - if (!mat_mult_dims.lhs_column_major) { - std::swap(mat_mult_dims.m, mat_mult_dims.n); + bool is_column_major = lhs_shape.layout().minor_to_major(0) == 0; + if (!is_column_major) { + std::swap(m, n); std::swap(lhs, rhs); std::swap(transpose_lhs, transpose_rhs); } @@ -899,27 +367,12 @@ tensorflow::Status DotOpEmitter::EmitCallToRuntime() { float_ptr_type), ir_builder_->CreateBitCast(lhs->GetBasePointer(), float_ptr_type), ir_builder_->CreateBitCast(rhs->GetBasePointer(), float_ptr_type), - ir_builder_->getInt64(mat_mult_dims.m), - ir_builder_->getInt64(mat_mult_dims.n), - ir_builder_->getInt64(mat_mult_dims.k), - ir_builder_->getInt32(transpose_lhs), + ir_builder_->getInt64(m), ir_builder_->getInt64(n), + ir_builder_->getInt64(k), ir_builder_->getInt32(transpose_lhs), ir_builder_->getInt32(transpose_rhs)}); return tensorflow::Status::OK(); } -DotOpEmitter::MatMultDims DotOpEmitter::GetMatMultDims() const { - CHECK_EQ(dot_.shape().dimensions_size(), 2); - - const Shape& lhs_shape = lhs_array_.GetShape(); - const Shape& rhs_shape = rhs_array_.GetShape(); - - return {lhs_shape.dimensions(transpose_lhs_ ? 1 : 0), - lhs_shape.dimensions(transpose_lhs_ ? 0 : 1), - rhs_shape.dimensions(transpose_rhs_ ? 0 : 1), - lhs_shape.layout().minor_to_major(0) == 0, - rhs_shape.layout().minor_to_major(0) == 0}; -} - llvm_ir::IrArray::Index DotOpEmitter::EmitOperandArrayLoopNest( llvm_ir::ForLoopNest* loop_nest, const llvm_ir::IrArray& operand_array, int64 reduction_dimension, tensorflow::StringPiece name_suffix) { diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h index 470bf6ffb4..cfc1066045 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h @@ -17,7 +17,6 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_DOT_OP_EMITTER_H_ #include "llvm/IR/IRBuilder.h" -#include "tensorflow/compiler/xla/service/cpu/cpu_options.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module_config.h" #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h" @@ -60,10 +59,6 @@ class DotOpEmitter { // LHS and RHS) and store the results in the target. tensorflow::Status EmitScalarDot(); - // Emit an LLVM IR implementation of the dot operation if we can. Returns - // true if an LLVM IR implementation was emitted. - bool EmitLlvmIrDotIfProfitable(); - // Emits a call to the CPU runtime to perform the matrix multiply. tensorflow::Status EmitCallToRuntime(); @@ -82,38 +77,6 @@ class DotOpEmitter { // no padding, and a rank of two. bool ShapesAreLegalForRuntimeDot() const; - // Represents the dimensions of a matrix-matrix multiply operation. - struct MatMultDims { - // The number of rows in the LHS. - int64 m; - - // The number of columns in the LHS, which is also must be equal to the - // number of rows in the RHS. - int64 k; - - // The number of columns on the RHS. - int64 n; - - // True if the LHS matrix column major. - bool lhs_column_major; - - // True if the RHS matrix column major. - bool rhs_column_major; - }; - - // Get the MatMultDims instance for the dot product this DotOpEmitter - // represents. Precondition: the dot is of rank 2 (and thus its operands are - // of rank 2 as well). - MatMultDims GetMatMultDims() const; - - // When doing a tiled GEMV in LLVM IR, a "tile" consists of this many vector - // registers. - int64 GetGemvTilingFactor() const { - const int64 kDefaultTilingFactor = 8; - return options::LlvmIrGemvTilingFactor(hlo_module_config_) - .value_or(kDefaultTilingFactor); - } - const HloInstruction& dot_; const bool transpose_lhs_; const bool transpose_rhs_; diff --git a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc index 7149a19310..b99b36a55e 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc @@ -105,9 +105,7 @@ bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo) { return false; } - if (ProfitableToImplementDotInUntiledLlvmIr(hlo) == - DotInLlvmIrProfitable::kYes || - ProfitableToImplementDotInTiledLlvmIr(hlo)) { + if (ProfitableToImplementDotInLlvmIr(hlo) == DotInLlvmIrProfitable::kYes) { return false; } @@ -138,7 +136,7 @@ bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo) { return false; } -DotInLlvmIrProfitable ProfitableToImplementDotInUntiledLlvmIr( +DotInLlvmIrProfitable ProfitableToImplementDotInLlvmIr( const HloInstruction& dot) { if (dot.opcode() == HloOpcode::kDot && dot.shape().dimensions_size() == 2) { const Shape& result_shape = dot.shape(); @@ -180,16 +178,5 @@ DotInLlvmIrProfitable ProfitableToImplementDotInUntiledLlvmIr( return DotInLlvmIrProfitable::kNo; } -bool ProfitableToImplementDotInTiledLlvmIr(const HloInstruction& dot) { - // Any Matrix-Vector product of floating point or integral type, or - // a transpose-dot fusion of the same can be lowered to a tiled LLVM - // IR implementation. - const Shape& shape = dot.shape(); - return shape.dimensions_size() == 2 && - (shape.dimensions(0) == 1 || shape.dimensions(1) == 1) && - (primitive_util::IsFloatingPointType(shape.element_type()) || - primitive_util::IsIntegralType(shape.element_type())); -} - } // namespace cpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h index cbe07a7c2b..66656ed997 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h @@ -29,21 +29,16 @@ bool PotentiallyImplementedAsEigenDot(const HloInstruction& dot); enum class DotInLlvmIrProfitable { kYes, kNo, kWithColumnMajorRhs }; // Returns a value to indicate if (and under what conditions) will lowering -// |dot| as a untiled LLVM IR dot operation be profitable over calling into -// Eigen or emitting a tiled LLVM IR implementation. Possible return values -// are: +// |dot| as a pure LLVM IR dot operation be profitable over calling into Eigen. +// Possible return values are: // // * DotInLlvmIrProfitable::kYes - always profitable. // * DotInLlvmIrProfitable::kNo - never profitable. // * DotInLlvmIrProfitable::kWithColumnMajorRhs - only if we can manage to make // the Rhs layout column major. -DotInLlvmIrProfitable ProfitableToImplementDotInUntiledLlvmIr( +DotInLlvmIrProfitable ProfitableToImplementDotInLlvmIr( const HloInstruction& dot); -// Returns true to indicate that we can generate a tiled LLVM IR implementation -// for |dot|. -bool ProfitableToImplementDotInTiledLlvmIr(const HloInstruction& dot); - } // namespace cpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index e547f291b8..a20ce6826c 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -1983,11 +1983,6 @@ Status IrEmitter::HandleSend(HloInstruction* send) { return Unimplemented("Send is not implemented on CPU. See b/33942983."); } -Status IrEmitter::HandleSendDone(HloInstruction* send_done) { - // TODO(b/33942983): Support Send/Recv on CPU. - return Unimplemented("Send-done is not implemented on CPU. See b/33942983."); -} - Status IrEmitter::HandleSlice(HloInstruction* slice) { VLOG(2) << "HandleSlice: " << slice->ToString(); auto operand = slice->operand(0); @@ -2153,11 +2148,6 @@ Status IrEmitter::HandleRecv(HloInstruction* recv) { return Unimplemented("Recv is not implemented on CPU. See b/33942983."); } -Status IrEmitter::HandleRecvDone(HloInstruction* recv_done) { - // TODO(b/33942983): Support Send/Recv on CPU. - return Unimplemented("Recv-done is not implemented on CPU. See b/33942983."); -} - Status IrEmitter::HandlePad(HloInstruction* pad) { // CPU backend does not properly handle negative padding but this is ok // because negative padding should be removed by the algebraic simplifier. diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h index 83eded5ad8..5d061e11e3 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h @@ -171,13 +171,11 @@ class IrEmitter : public DfsHloVisitorWithDefault { Status HandleReduceWindow(HloInstruction* reduce_window) override; Status HandleSelectAndScatter(HloInstruction* select_and_scatter) override; Status HandleSend(HloInstruction* send) override; - Status HandleSendDone(HloInstruction* send_done) override; Status HandleSlice(HloInstruction* slice) override; Status HandleDynamicSlice(HloInstruction* dynamic_slice) override; Status HandleDynamicUpdateSlice( HloInstruction* dynamic_update_slice) override; Status HandleRecv(HloInstruction* recv) override; - Status HandleRecvDone(HloInstruction* recv_done) override; Status HandlePad(HloInstruction* pad) override; Status HandleTuple(HloInstruction* tuple) override; Status HandleMap(HloInstruction* map) override; diff --git a/tensorflow/compiler/xla/service/cpu/layout_assignment.cc b/tensorflow/compiler/xla/service/cpu/layout_assignment.cc index b75ca34e0a..c446b6b792 100644 --- a/tensorflow/compiler/xla/service/cpu/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/cpu/layout_assignment.cc @@ -51,7 +51,7 @@ Status CpuLayoutAssignment::AddBackendConstraints( tensorflow::gtl::FlatMap should_make_rhs_col_major_cache; auto should_make_rhs_col_major = [&](const HloInstruction& instruction) { - if (ProfitableToImplementDotInUntiledLlvmIr(instruction) != + if (ProfitableToImplementDotInLlvmIr(instruction) != DotInLlvmIrProfitable::kWithColumnMajorRhs) { return false; } @@ -68,7 +68,7 @@ Status CpuLayoutAssignment::AddBackendConstraints( bool result = std::all_of( rhs->users().begin(), rhs->users().end(), [&](HloInstruction* user) { - return ProfitableToImplementDotInUntiledLlvmIr(*user) == + return ProfitableToImplementDotInLlvmIr(*user) == DotInLlvmIrProfitable::kWithColumnMajorRhs && user->operand(0) != rhs; }); diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h index bc73839a88..de3cd15440 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h @@ -211,11 +211,9 @@ class DfsHloVisitorBase { virtual Status HandlePad(HloInstructionPtr hlo) = 0; - virtual Status HandleSend(HloInstructionPtr send) = 0; - virtual Status HandleSendDone(HloInstructionPtr send_done) = 0; + virtual Status HandleSend(HloInstructionPtr hlo) = 0; - virtual Status HandleRecv(HloInstructionPtr recv) = 0; - virtual Status HandleRecvDone(HloInstructionPtr recv_done) = 0; + virtual Status HandleRecv(HloInstructionPtr hlo) = 0; virtual Status HandleBatchNormTraining(HloInstructionPtr hlo) = 0; diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h index 5415bab5b3..7ce88be89d 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h @@ -167,17 +167,11 @@ class DfsHloVisitorWithDefaultBase Status HandleWhile(HloInstructionPtr xla_while) override { return DefaultAction(xla_while); } - Status HandleRecv(HloInstructionPtr recv) override { - return DefaultAction(recv); - } - Status HandleRecvDone(HloInstructionPtr recv_done) override { - return DefaultAction(recv_done); - } Status HandleSend(HloInstructionPtr send) override { return DefaultAction(send); } - Status HandleSendDone(HloInstructionPtr send_done) override { - return DefaultAction(send_done); + Status HandleRecv(HloInstructionPtr recv) override { + return DefaultAction(recv); } // Invoked to inform the visitor that the traversal has completed, and that diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc index e79d0a4c79..536b96dcf6 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc @@ -19,7 +19,6 @@ limitations under the License. #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" -#include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" @@ -280,13 +279,6 @@ std::vector ConvolutionThunk::GetAlgorithms( return algorithms; } -static string AlgorithmToString(const se::dnn::AlgorithmDesc& algo) { - if (algo.tensor_ops_enabled()) { - return tensorflow::strings::StrCat(algo.algo_id(), "+TC"); - } - return tensorflow::strings::StrCat(algo.algo_id()); -} - tensorflow::Status ConvolutionThunk::ConvolveWithTune( const BatchDescriptor& input_descriptor, se::DeviceMemory input_data, const FilterDescriptor& filter_descriptor, @@ -311,8 +303,6 @@ tensorflow::Status ConvolutionThunk::ConvolveWithTune( buffer_allocations.device_ordinal(), buffer_allocations.memory_allocator()); se::dnn::ProfileResult profile_result; - VLOG(3) << "Trying algorithm " << AlgorithmToString(algorithm) - << " for ConvolutionThunk: " << this; bool launch_ok = Convolve(input_descriptor, input_data, filter_descriptor, filter_data, output_descriptor, output_data, convolution_descriptor, @@ -320,11 +310,6 @@ tensorflow::Status ConvolutionThunk::ConvolveWithTune( &scratch_allocator, &profile_result) .ok(); if (launch_ok && profile_result.is_valid()) { - VLOG(3) << "Run of algorithm " << AlgorithmToString(algorithm) - << " for ConvolutionThunk " << this << " succeeded, taking " - << profile_result.elapsed_time_in_ms() - << "ms. (Best result: " << best_result.elapsed_time_in_ms() - << "ms)"; if (profile_result.elapsed_time_in_ms() < best_result.elapsed_time_in_ms()) { best_result = profile_result; @@ -334,9 +319,6 @@ tensorflow::Status ConvolutionThunk::ConvolveWithTune( best_result_without_scratch.elapsed_time_in_ms()) { best_result_without_scratch = profile_result; } - } else { - VLOG(3) << "Run of algorithm " << AlgorithmToString(algorithm) - << " for ConvolutionThunk " << this << " failed."; } } @@ -361,8 +343,8 @@ tensorflow::Status ConvolutionThunk::ConvolveWithTune( { VLOG(2) << "Using convolution algorithm (" - << AlgorithmToString(best_algorithm_.algorithm()) << ", " - << AlgorithmToString(best_algorithm_.algorithm_no_scratch()) + << best_algorithm_.algorithm().algo_id() << ", " + << best_algorithm_.algorithm_no_scratch().algo_id() << ") for ConvolutionThunk: " << this; ConvolveScratchAllocator scratch_allocator( buffer_allocations.device_ordinal(), diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 187b4a705c..ceb0e530c1 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -75,7 +75,6 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" #include "tensorflow/core/platform/subprocess.h" -#include "tensorflow/core/platform/tracing.h" namespace se = ::perftools::gputools; @@ -88,7 +87,6 @@ namespace gpu { namespace { -using tensorflow::port::Tracing; using tensorflow::strings::StrCat; // Any address of a variable residing in global memory or returned by one of the @@ -233,7 +231,6 @@ tensorflow::Status PrepareHloModuleForIrEmitting( // code (i.e. a cubin) as a byte array. StatusOr> CompilePtx(const string& ptx, int cc_major, int cc_minor) { - Tracing::TraceMe annotation("Compile PTX", /*is_expensive=*/true); const string ptxas_path = tensorflow::io::JoinPath(tensorflow::CudaRoot(), "bin", "ptxas"); VLOG(2) << "Using ptxas at " << ptxas_path; @@ -298,15 +295,11 @@ StatusOr> GpuCompiler::Compile( std::unique_ptr module, se::StreamExecutor* stream_exec) { TF_RET_CHECK(stream_exec != nullptr); - { - Tracing::TraceMe annotation("HLO Transforms", module->name(), - /*is_expensive=*/true); - TF_RETURN_IF_ERROR(OptimizeHloModule(module.get(), - stream_exec->GetDeviceDescription(), - ShapeSizeBytesFunction())); - TF_RETURN_IF_ERROR( - PrepareHloModuleForIrEmitting(module.get(), ShapeSizeBytesFunction())); - } + TF_RETURN_IF_ERROR(OptimizeHloModule(module.get(), + stream_exec->GetDeviceDescription(), + ShapeSizeBytesFunction())); + TF_RETURN_IF_ERROR( + PrepareHloModuleForIrEmitting(module.get(), ShapeSizeBytesFunction())); llvm::LLVMContext llvm_context; std::string buffer; @@ -451,7 +444,6 @@ StatusOr> GpuCompiler::Compile( std::vector GpuCompiler::CompilePtxOrGetCachedResult(const string& ptx, int cc_major, int cc_minor) { - Tracing::TraceMe annotation("PTX->CUBIN", /*is_expensive=*/true); bool inserted; decltype(compilation_cache_.begin()) iter; // Pointers into compilation_cache_ where the ptx and (optional) cubin are diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc index 9d55c7859d..57a3f713e3 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc @@ -128,18 +128,10 @@ Status IrEmitter::HandleSend(HloInstruction*) { return Unimplemented("Send is not implemented on GPU"); } -Status IrEmitter::HandleSendDone(HloInstruction*) { - return Unimplemented("Send-Done is not implemented on GPU"); -} - Status IrEmitter::HandleRecv(HloInstruction*) { return Unimplemented("Recv is not implemented on GPU"); } -Status IrEmitter::HandleRecvDone(HloInstruction*) { - return Unimplemented("Recv-done is not implemented on GPU"); -} - Status IrEmitter::HandleTuple(HloInstruction* tuple) { std::vector base_ptrs; for (const HloInstruction* operand : tuple->operands()) { diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.h b/tensorflow/compiler/xla/service/gpu/ir_emitter.h index 61fdeaa0ee..263992d925 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.h @@ -84,9 +84,7 @@ class IrEmitter : public DfsHloVisitorWithDefault { Status HandleOutfeed(HloInstruction* outfeed) override; Status HandleSort(HloInstruction* sort) override; Status HandleSend(HloInstruction* send) override; - Status HandleSendDone(HloInstruction* send_done) override; Status HandleRecv(HloInstruction* recv) override; - Status HandleRecvDone(HloInstruction* recv_done) override; Status HandleParameter(HloInstruction* parameter) override; Status HandleReduce(HloInstruction* reduce) override; Status HandleTuple(HloInstruction* tuple) override; diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc index 1cb963be61..817e95a31c 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc @@ -60,7 +60,6 @@ limitations under the License. #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/tracing.h" namespace xla { namespace gpu { @@ -489,9 +488,6 @@ StatusOr CompileToPtx(llvm::Module* module, string ptx; { - tensorflow::port::Tracing::TraceMe annotation( - "Compiling IR", llvm_ir::AsString(module->getName()), - /*is_expensive=*/true); ScopedLoggingTimer compilation_timer( "Compile module " + llvm_ir::AsString(module->getName()), /*vlog_level=*/2); diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc index 1877065f67..17ba2b673a 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc @@ -337,18 +337,10 @@ Status HloCostAnalysis::HandleSend(const HloInstruction*) { return Status::OK(); } -Status HloCostAnalysis::HandleSendDone(const HloInstruction*) { - return Status::OK(); -} - Status HloCostAnalysis::HandleRecv(const HloInstruction*) { return Status::OK(); } -Status HloCostAnalysis::HandleRecvDone(const HloInstruction*) { - return Status::OK(); -} - Status HloCostAnalysis::HandleReshape(const HloInstruction*) { return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.h b/tensorflow/compiler/xla/service/hlo_cost_analysis.h index 0f44775378..8074868e37 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.h +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.h @@ -60,9 +60,7 @@ class HloCostAnalysis : public ConstDfsHloVisitor { Status HandleReducePrecision(const HloInstruction* hlo) override; Status HandleConcatenate(const HloInstruction* concatenate) override; Status HandleSend(const HloInstruction* send) override; - Status HandleSendDone(const HloInstruction* send_done) override; Status HandleRecv(const HloInstruction* recv) override; - Status HandleRecvDone(const HloInstruction* recv_done) override; Status HandleConvert(const HloInstruction* convert) override; Status HandleCopy(const HloInstruction* copy) override; Status HandleDot(const HloInstruction* dot) override; diff --git a/tensorflow/compiler/xla/service/hlo_cse_test.cc b/tensorflow/compiler/xla/service/hlo_cse_test.cc index 3601a790c4..7c4626e78a 100644 --- a/tensorflow/compiler/xla/service/hlo_cse_test.cc +++ b/tensorflow/compiler/xla/service/hlo_cse_test.cc @@ -79,12 +79,12 @@ TEST_F(HloCseTest, CombineTwoConstantsDifferentLayoutsAndInsensitive) { // Test that two identical constants with different layouts are commoned if // the pass is not layout sensitive. auto builder = HloComputation::Builder(TestName()); - auto constant1 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR2WithLayout( - {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({0, 1})))); - auto constant2 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR2WithLayout( - {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({1, 0})))); + auto constant1 = builder.AddInstruction(HloInstruction::CreateConstant( + test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, + /*minor_to_major=*/{0, 1}))); + auto constant2 = builder.AddInstruction(HloInstruction::CreateConstant( + test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, + /*minor_to_major=*/{1, 0}))); auto add = builder.AddInstruction(HloInstruction::CreateBinary( constant1->shape(), HloOpcode::kAdd, constant1, constant2)); @@ -111,12 +111,12 @@ TEST_F(HloCseTest, CombineTwoConstantsDifferentLayoutsAndSensitive) { // Test that two identical constants with different layouts are *not* commoned // if the pass is layout sensitive. auto builder = HloComputation::Builder(TestName()); - auto constant1 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR2WithLayout( - {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({0, 1})))); - auto constant2 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR2WithLayout( - {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({1, 0})))); + auto constant1 = builder.AddInstruction(HloInstruction::CreateConstant( + test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, + /*minor_to_major=*/{0, 1}))); + auto constant2 = builder.AddInstruction(HloInstruction::CreateConstant( + test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, + /*minor_to_major=*/{1, 0}))); auto add = builder.AddInstruction(HloInstruction::CreateBinary( constant1->shape(), HloOpcode::kAdd, constant1, constant2)); diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc index ff80f18bb5..92261bce62 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc @@ -242,51 +242,6 @@ bool HloDataflowAnalysis::UpdateBitcastValueSet(HloInstruction* bitcast) { return false; } -bool HloDataflowAnalysis::UpdateSendValueSet(HloInstruction* send) { - CHECK_EQ(send->opcode(), HloOpcode::kSend); - bool changed = false; - // Send forwards the operand value to the output tuple at {0}. - for (auto& pair : GetInstructionValueSet(send->operand(0))) { - const ShapeIndex& operand_index = pair.first; - const HloValueSet& operand_value_set = pair.second; - - ShapeIndex index = {0}; - for (int64 i : operand_index) { - index.push_back(i); - } - - HloValueSet& value_set = GetValueSet(send, index); - if (value_set != operand_value_set) { - value_set = operand_value_set; - changed = true; - } - } - return changed; -} - -bool HloDataflowAnalysis::UpdateRecvDoneValueSet(HloInstruction* recv_done) { - CHECK_EQ(recv_done->opcode(), HloOpcode::kRecvDone); - bool changed = false; - // RecvDone forwards the operand value at {0} to the output. - for (auto& pair : GetInstructionValueSet(recv_done)) { - ShapeIndex& index = pair.first; - HloValueSet& value_set = pair.second; - - ShapeIndex operand_index = {0}; - for (int64 i : index) { - operand_index.push_back(i); - } - - const HloValueSet& operand_value_set = - GetValueSet(recv_done->operand(0), operand_index); - if (value_set != operand_value_set) { - value_set = operand_value_set; - changed = true; - } - } - return changed; -} - bool HloDataflowAnalysis::UpdateCallValueSet(HloInstruction* call) { CHECK_EQ(call->opcode(), HloOpcode::kCall); InstructionValueSet& value_set = GetInstructionValueSet(call); @@ -474,10 +429,6 @@ bool HloDataflowAnalysis::UpdateInstructionValueSet( return UpdateCallValueSet(instruction); case HloOpcode::kWhile: return UpdateWhileValueSet(instruction); - case HloOpcode::kSend: - return UpdateSendValueSet(instruction); - case HloOpcode::kRecvDone: - return UpdateRecvDoneValueSet(instruction); default: // Instruction does not forward HloValues (it defines all values in its // output). No update is necessary. @@ -586,12 +537,6 @@ Status HloDataflowAnalysis::InitializeInstructionValueSets() { GetValueSet(instruction, /*index=*/{}).AddValue(value); }; - // Lambda to set the value set at the given index of the output. - auto define_value_at = [this, &instruction](const ShapeIndex& index) { - HloValue* value = NewHloValue(instruction, index, /*is_phi=*/false); - GetValueSet(instruction, index).AddValue(value); - }; - switch (instruction->opcode()) { case HloOpcode::kBitcast: if (bitcast_defines_value_) { @@ -632,16 +577,6 @@ Status HloDataflowAnalysis::InitializeInstructionValueSets() { // values flow from their operands. define_top_level_only(); break; - case HloOpcode::kRecvDone: - // RecvDone aliases its input tuple element {0}, therefore does not - // define any values. - break; - case HloOpcode::kSend: - // Send produces a tuple of {aliased operand, U32 context}, therefore - // only defines the top-level tuple and the tuple element at {1}. - define_value_at(/*index=*/{}); - define_value_at(/*index=*/{1}); - break; default: define_all_values(); break; diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h index 63467f3206..207e553bf7 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h @@ -146,9 +146,7 @@ class HloDataflowAnalysis { bool UpdateCopyValueSet(HloInstruction* copy); bool UpdateGetTupleElementValueSet(HloInstruction* gte); bool UpdateParameterValueSet(HloInstruction* parameter); - bool UpdateRecvDoneValueSet(HloInstruction* recv_done); bool UpdateSelectValueSet(HloInstruction* select); - bool UpdateSendValueSet(HloInstruction* send); bool UpdateTupleValueSet(HloInstruction* tuple); bool UpdateWhileValueSet(HloInstruction* xla_while); diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc index 66a538fc51..4b8eb237a6 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc @@ -1139,54 +1139,6 @@ TEST_P(HloDataflowAnalysisTest, TupleCopy) { analysis.GetValueDefinedAt(copy, /*index=*/{}).live_out_of_module()); } -TEST_P(HloDataflowAnalysisTest, SendAndSendDone) { - // Test that a Send forwards its operand to the output tuple at {0}. - auto builder = HloComputation::Builder(TestName()); - auto param = builder.AddInstruction( - HloInstruction::CreateParameter(0, scalar_shape_, "param0")); - auto send = builder.AddInstruction( - HloInstruction::CreateSend(param, /*channel_id=*/0)); - auto send_done = builder.AddInstruction(HloInstruction::CreateSendDone(send)); - module_->AddEntryComputation(builder.Build()); - - bool ssa_form = GetParam(); - const HloDataflowAnalysis& analysis = RunAnalysis(ssa_form); - - EXPECT_EQ(analysis.values().size(), 4); - - EXPECT_TRUE(analysis.ValueIsDefinedAt(param)); - EXPECT_TRUE(analysis.ValueIsDefinedAt(send, /*index=*/{})); - EXPECT_FALSE(analysis.ValueIsDefinedAt(send, /*index=*/{0})); - EXPECT_TRUE(analysis.ValueIsDefinedAt(send, /*index=*/{1})); - EXPECT_TRUE(analysis.ValueIsDefinedAt(send_done)); - EXPECT_THAT(HloValuesAt(send, /*index=*/{0}), - UnorderedElementsAre(analysis.GetValueDefinedAt(param))); -} - -TEST_P(HloDataflowAnalysisTest, RecvAndRecvDone) { - // Test that a RecvDone forwards its operand tuple element at {0} to the - // output. - auto builder = HloComputation::Builder(TestName()); - auto recv = builder.AddInstruction( - HloInstruction::CreateRecv(scalar_shape_, /*channel_id=*/0)); - auto recv_done = builder.AddInstruction(HloInstruction::CreateRecvDone(recv)); - module_->AddEntryComputation(builder.Build()); - - bool ssa_form = GetParam(); - const HloDataflowAnalysis& analysis = RunAnalysis(ssa_form); - - EXPECT_EQ(analysis.values().size(), 3); - - EXPECT_TRUE(analysis.ValueIsDefinedAt(recv, /*index=*/{})); - EXPECT_TRUE(analysis.ValueIsDefinedAt(recv, /*index=*/{0})); - EXPECT_TRUE(analysis.ValueIsDefinedAt(recv, /*index=*/{1})); - EXPECT_FALSE(analysis.ValueIsDefinedAt(recv_done)); - EXPECT_THAT(HloValuesAt(recv_done), - UnorderedElementsAre(analysis.GetValueDefinedAt(recv, {0}))); - EXPECT_TRUE( - analysis.GetValueDefinedAt(recv, /*index=*/{0}).live_out_of_module()); -} - TEST_P(HloDataflowAnalysisTest, ElementwiseChainInterference) { // A simple chain of elementwise operations. No values should interfere. // diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index a722d1b3d9..88b77ccdd0 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -1450,10 +1450,6 @@ HloEvaluator::HloEvaluator() { typed_visitors_[F32] = MakeUnique>(this); typed_visitors_[F64] = MakeUnique>(this); typed_visitors_[C64] = MakeUnique>(this); - - typed_visitors_[BF16] = MakeUnique([](HloInstruction*) { - return Unimplemented("HloEvaluator: unhandled primitive type: BF16."); - }); typed_visitors_[TUPLE] = MakeUnique([](HloInstruction*) { return Unimplemented("HloEvaluator: unhandled primitive type: TUPLE."); }); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h index 7557aaa248..67b6e215fc 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator.h @@ -39,18 +39,16 @@ class HloEvaluator : public DfsHloVisitorWithDefault { HloEvaluator(); // Evaluates an HLO module and an array of pointers to literals. // Returns the evaluated result as a literal if successful. - // Precondition: The indices of arg_literals correspond to the parameter - // numbers of the HLO parameters in the computation. See comment below for an - // example. + // Precondition: argument literals correspond to each input computation's + // parameters in their post-ordering. See comment below for example. StatusOr> Evaluate( const HloModule& module, tensorflow::gtl::ArraySlice arg_literals); // Evaluates an HLO computation and an array of pointers to literals. // Returns the evaluated result as a literal if successful. - // Precondition: The indices of arg_literals correspond to the parameter - // numbers of the HLO parameters in the computation. For e.g., consider the - // following graph: + // Precondition: argument literals correspond to the input computation's + // parameters in their post-ordering. For e.g., consider the following graph: // // * // / \ @@ -59,9 +57,8 @@ class HloEvaluator : public DfsHloVisitorWithDefault { // / \ // Parameter0 Constant // - // where Parameter0 has parameter_number 0 and Parameter1 has parameter_number - // 1 in this computation. The input literals array will then have its first - // literal map to Parameter0 and the second map to Parameter1. + // The input literals array will have its first literal map to Parameter0 and + // the second map to Parameter1. StatusOr> Evaluate( const HloComputation& computation, tensorflow::gtl::ArraySlice arg_literals); diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 04b3059fb1..fd162622ce 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -761,22 +761,12 @@ string HloDotDumper::DumpInstruction(const HloInstruction* instr) { string HloDotDumper::GetInstructionNodeInlinedOperands( const HloInstruction* instr) { auto stringify_constant = [](const HloInstruction* constant) { - const auto& shape = constant->shape(); - - // Print the literal value of constants with <= K elements. - optional elem_count; - if (!ShapeUtil::IsOpaque(shape) && !ShapeUtil::IsTuple(shape)) { - elem_count = 1; - for (int64 dim : shape.dimensions()) { - *elem_count *= dim; - } - } - if (elem_count.has_value() && *elem_count <= 8) { - return Printf("%s (%s)", constant->literal().ToString(), + if (ShapeUtil::IsEffectiveScalar(constant->shape())) { + auto elem_idx = IndexUtil::LinearIndexToMultidimensionalIndex( + constant->shape(), /*linear_index=*/0); + return Printf("%s (%s)", constant->literal().GetAsString(elem_idx), ShapeUtil::HumanString(constant->shape())); } - - // Otherwise, print e.g. "%constant.42 (s32[100])". string constant_name; if (tensorflow::StringPiece(constant->name()).starts_with("%constant")) { constant_name = constant->name(); @@ -943,9 +933,7 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) { case HloOpcode::kFusion: return kGray; case HloOpcode::kSend: - case HloOpcode::kSendDone: case HloOpcode::kRecv: - case HloOpcode::kRecvDone: case HloOpcode::kInfeed: case HloOpcode::kOutfeed: case HloOpcode::kCrossReplicaSum: @@ -1039,9 +1027,7 @@ string HloDotDumper::GetInstructionNodeExtraInfo(const HloInstruction* instr) { ? "" : StrCat("stride=", VectorString(instr->slice_strides())); case HloOpcode::kSend: - case HloOpcode::kSendDone: case HloOpcode::kRecv: - case HloOpcode::kRecvDone: return StrCat("channel_id=", instr->channel_id()); default: return ""; @@ -1303,9 +1289,7 @@ NodeFilter MakeNodeFilter(const HloInstruction* root, int64 radius) { auto is_displayed = [&](const HloInstruction* instr) { // Constants are displayed inline with their users; they're never omitted. - // Nodes in subcomputations are always shown. - return nodes.count(instr) > 0 || instr->opcode() == HloOpcode::kConstant || - instr->parent() != root->parent(); + return nodes.count(instr) > 0 || instr->opcode() == HloOpcode::kConstant; }; // Make a second pass over 'nodes' to fix up the NodeFilterResults now that we diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 674d3e3836..5107ac782d 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -371,50 +371,20 @@ HloInstruction::CreateCrossReplicaSum(const Shape& shape, /* static */ std::unique_ptr HloInstruction::CreateSend( HloInstruction* operand, int64 channel_id) { - // Send instruction produces a tuple of {aliased operand, U32 context}. - Shape output_shape = ShapeUtil::MakeTupleShape( - {operand->shape(), ShapeUtil::MakeShape(U32, {})}); auto instruction = - WrapUnique(new HloInstruction(HloOpcode::kSend, output_shape)); + WrapUnique(new HloInstruction(HloOpcode::kSend, ShapeUtil::MakeNil())); instruction->AppendOperand(operand); instruction->channel_id_ = channel_id; return instruction; } -/* static */ std::unique_ptr HloInstruction::CreateSendDone( - HloInstruction* operand) { - CHECK(operand->opcode() == HloOpcode::kSend) - << "SendDone must take the context operand from Send"; - auto instruction = WrapUnique( - new HloInstruction(HloOpcode::kSendDone, ShapeUtil::MakeNil())); - instruction->AppendOperand(operand); - instruction->channel_id_ = operand->channel_id(); - return instruction; -} - /* static */ std::unique_ptr HloInstruction::CreateRecv( const Shape& shape, int64 channel_id) { - // Recv instruction produces a tuple of {receive buffer, U32 context}. - Shape output_shape = - ShapeUtil::MakeTupleShape({shape, ShapeUtil::MakeShape(U32, {})}); - auto instruction = - WrapUnique(new HloInstruction(HloOpcode::kRecv, output_shape)); + auto instruction = WrapUnique(new HloInstruction(HloOpcode::kRecv, shape)); instruction->channel_id_ = channel_id; return instruction; } -/* static */ std::unique_ptr HloInstruction::CreateRecvDone( - HloInstruction* operand) { - CHECK(operand->opcode() == HloOpcode::kRecv) - << "RecvDone must take the context operand from Recv"; - Shape output_shape = ShapeUtil::GetTupleElementShape(operand->shape(), 0); - auto instruction = - WrapUnique(new HloInstruction(HloOpcode::kRecvDone, output_shape)); - instruction->AppendOperand(operand); - instruction->channel_id_ = operand->channel_id(); - return instruction; -} - /* static */ std::unique_ptr HloInstruction::CreateReverse( const Shape& shape, HloInstruction* operand, tensorflow::gtl::ArraySlice dimensions) { @@ -938,9 +908,7 @@ RandomDistribution HloInstruction::random_distribution() const { bool HloInstruction::HasSideEffect() const { switch (opcode_) { case HloOpcode::kSend: - case HloOpcode::kSendDone: case HloOpcode::kRecv: - case HloOpcode::kRecvDone: case HloOpcode::kInfeed: case HloOpcode::kOutfeed: case HloOpcode::kTrace: @@ -1196,9 +1164,7 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( new_operands[4], epsilon(), feature_index()); break; case HloOpcode::kRecv: - case HloOpcode::kRecvDone: case HloOpcode::kSend: - case HloOpcode::kSendDone: case HloOpcode::kTrace: LOG(FATAL) << "Not yet implemented, clone: " << HloOpcodeString(opcode_); } @@ -1591,10 +1557,8 @@ bool HloInstruction::IdenticalSlowPath( case HloOpcode::kInfeed: case HloOpcode::kOutfeed: case HloOpcode::kSort: - case HloOpcode::kRecv: - case HloOpcode::kRecvDone: case HloOpcode::kSend: - case HloOpcode::kSendDone: + case HloOpcode::kRecv: return false; } } @@ -1886,13 +1850,12 @@ std::vector HloInstruction::ExtraAttributesToString() const { extra.push_back(StrCat("dimensions={", Join(dimensions(), ","), "}")); } if (window_ != nullptr) { - extra.push_back(StrCat("window={", window_util::ToString(*window_), "}")); + extra.push_back(window_util::ToString(*window_)); } if (padding_config_ != nullptr) { - extra.push_back( - StrCat("padding=", xla::PaddingConfigToString(*padding_config_))); + extra.push_back(StrCat("padding=", padding_config_->ShortDebugString())); } - if (opcode() == HloOpcode::kSlice) { + if (!slice_starts_.empty() && !slice_limits_.empty()) { std::vector bounds; bounds.reserve(slice_starts_.size()); const bool omit_stride = @@ -1905,16 +1868,6 @@ std::vector HloInstruction::ExtraAttributesToString() const { } extra.push_back(StrCat("slice={", Join(bounds, ", "), "}")); } - if (opcode() == HloOpcode::kDynamicSlice) { - extra.push_back( - StrCat("dynamic_slice_sizes={", Join(dynamic_slice_sizes(), ","), "}")); - } - if (opcode() == HloOpcode::kBatchNormTraining || - opcode() == HloOpcode::kBatchNormInference || - opcode() == HloOpcode::kBatchNormGrad) { - extra.push_back(StrCat("epsilon=", epsilon())); - extra.push_back(StrCat("feature_index=", feature_index())); - } if (convolution_dimension_numbers_ != nullptr) { extra.push_back(ConvolutionDimensionNumbersToString()); @@ -1938,8 +1891,7 @@ std::vector HloInstruction::ExtraAttributesToString() const { }))); } - if (opcode() == HloOpcode::kSend || opcode() == HloOpcode::kRecv || - opcode() == HloOpcode::kSendDone || opcode() == HloOpcode::kRecvDone) { + if (opcode() == HloOpcode::kSend || opcode() == HloOpcode::kRecv) { extra.push_back(StrCat("channel_id=", channel_id_)); } @@ -2119,10 +2071,8 @@ bool HloInstruction::IsFusable() const { case HloOpcode::kOutfeed: case HloOpcode::kParameter: case HloOpcode::kTrace: - case HloOpcode::kRecv: - case HloOpcode::kRecvDone: case HloOpcode::kSend: - case HloOpcode::kSendDone: + case HloOpcode::kRecv: return false; // Only fuse Rng if it is used once, otherwise the random numbers generated // will be different in each fusion. If it is the root (user count = 0) @@ -2329,14 +2279,10 @@ Status HloInstruction::Visit(DfsHloVisitorBase* visitor) { return visitor->HandleCall(this); case HloOpcode::kCustomCall: return visitor->HandleCustomCall(this); - case HloOpcode::kRecv: - return visitor->HandleRecv(this); - case HloOpcode::kRecvDone: - return visitor->HandleRecvDone(this); case HloOpcode::kSend: return visitor->HandleSend(this); - case HloOpcode::kSendDone: - return visitor->HandleSendDone(this); + case HloOpcode::kRecv: + return visitor->HandleRecv(this); // These opcodes are not handled here. case HloOpcode::kTrace: @@ -2895,21 +2841,6 @@ StatusOr StringToFusionKind( return InvalidArgument("Unknown fusion kind: %s", kind_name.c_str()); } -string PaddingConfigToString(const PaddingConfig& padding) { - bool has_interior_padding = - std::any_of(padding.dimensions().begin(), padding.dimensions().end(), - [](const PaddingConfig::PaddingConfigDimension& dim) { - return dim.interior_padding() != 0; - }); - return Join( - padding.dimensions(), "x", - [&](string* out, const PaddingConfig::PaddingConfigDimension& dim) { - StrAppend( - out, dim.edge_padding_low(), "_", dim.edge_padding_high(), - has_interior_padding ? StrCat("_", dim.interior_padding()) : ""); - }); -} - std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind) { return os << ToString(kind); } @@ -2925,7 +2856,13 @@ string HloInstruction::ConvolutionDimensionNumbersToString() const { const auto append_dims = [&](const std::vector& dims, const Shape& shape) { CHECK_EQ(dims.size(), ShapeUtil::Rank(shape)); - StrAppend(&result, Join(dims, "")); + for (int64 logical = 0; logical < dims.size(); ++logical) { + int64 physical = logical; + if (!shape.layout().minor_to_major().empty()) { + physical = LayoutUtil::Major(shape.layout(), logical); + } + result += dims[physical]; + } }; // lhs_dims[i] is the symbol of the logical dimension i for the lhs diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index d174f05aa6..5ff04a4888 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -181,28 +181,18 @@ class HloInstruction { const Shape& shape, HloInstruction* operand, tensorflow::StringPiece outfeed_config); - // Creates an asynchronous send instruction with the given channel id, which - // initiates sending the operand data to a unique receive instruction in - // another computation that has the same channel id. + // Creates a send instruction with the given channel id, which sends the + // operand data to a unique receive instruction in another computation that + // has the same channel id. static std::unique_ptr CreateSend(HloInstruction* operand, int64 channel_id); - // Blocks until data transfer for the Send instruction (operand) is complete. - // The operand must be kSend. - static std::unique_ptr CreateSendDone( - HloInstruction* operand); - - // Creates an asynchronous receive instruction with the given channel id, - // which allocates resources to receive data of the given shape from a unique - // send instruction in another computation that has the same channel id. + // Creates a receive instruction with the given channel id, which receives + // data of the given shape from a unique send instruction in another + // computation that has the same channel id. static std::unique_ptr CreateRecv(const Shape& shape, int64 channel_id); - // Blocks until data transfer for the Recv instruction (operand) is complete - // and returns the receive buffer. The operand must be kRecv. - static std::unique_ptr CreateRecvDone( - HloInstruction* operand); - // Creates a slice instruction, where the operand is sliced by the given // start/limit indices. static std::unique_ptr CreateSlice( @@ -212,7 +202,7 @@ class HloInstruction { tensorflow::gtl::ArraySlice strides); // Creates a slice instruction, where the first operand is sliced by - // start indices specified in the second operand, and by size specfied in + // start indices specified in the second operand, and by size specified in // 'slice_sizes'. static std::unique_ptr CreateDynamicSlice( const Shape& shape, HloInstruction* operand, @@ -863,11 +853,6 @@ class HloInstruction { return *window_; } - // Sets the window data in a windowed operation such as convolution. - void set_window(const Window& window) { - window_ = MakeUnique(window); - } - // Returns the padding configuration for a pad node. // // Precondition: opcode() == HloOpcode::kPad @@ -1239,8 +1224,6 @@ string ToString(HloInstruction::FusionKind kind); StatusOr StringToFusionKind( const string& kind_name); -string PaddingConfigToString(const PaddingConfig& padding); - std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind); // Map classes that guarantee a deterministic iteration order when the key is diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc index 4ead64d997..ddb623332c 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc @@ -792,8 +792,8 @@ TEST_F(HloInstructionTest, ComplexFusionOp) { // sub = Sub(mul, clamp) // tuple = Tuple({sub, sub, mul, C1}) // - // Notable complexities are repeated operands in a same instruction, different - // shapes, use of value in different expressions. + // Notable complexities are repeated operands in the same instruction, + // different shapes, use of value in different expressions. auto c1 = builder.AddInstruction( HloInstruction::CreateConstant(Literal::CreateR0(1.1f))); auto c2 = builder.AddInstruction( diff --git a/tensorflow/compiler/xla/service/hlo_matchers.h b/tensorflow/compiler/xla/service/hlo_matchers.h index 268fa0f632..4d4010b025 100644 --- a/tensorflow/compiler/xla/service/hlo_matchers.h +++ b/tensorflow/compiler/xla/service/hlo_matchers.h @@ -121,7 +121,6 @@ HLO_MATCHER(Outfeed); HLO_MATCHER(Pad); HLO_MATCHER(Power); HLO_MATCHER(Recv); -HLO_MATCHER(RecvDone); HLO_MATCHER(Reduce); HLO_MATCHER(ReducePrecision); HLO_MATCHER(ReduceWindow); @@ -132,7 +131,6 @@ HLO_MATCHER(Rng); HLO_MATCHER(Select); HLO_MATCHER(SelectAndScatter); HLO_MATCHER(Send); -HLO_MATCHER(SendDone); HLO_MATCHER(ShiftLeft); HLO_MATCHER(ShiftRightLogical); HLO_MATCHER(ShiftRightArithmetic); diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h index 5141e7bc8d..6469851791 100644 --- a/tensorflow/compiler/xla/service/hlo_module.h +++ b/tensorflow/compiler/xla/service/hlo_module.h @@ -85,11 +85,7 @@ class HloModule { std::unique_ptr Clone(const string& suffix = "clone") const; // Return a pointer to the entry computation of the module.. - const HloComputation* entry_computation() const { - CHECK_NE(nullptr, entry_computation_); - return entry_computation_; - } - HloComputation* entry_computation() { + HloComputation* entry_computation() const { CHECK_NE(nullptr, entry_computation_); return entry_computation_; } diff --git a/tensorflow/compiler/xla/service/hlo_module_config.cc b/tensorflow/compiler/xla/service/hlo_module_config.cc index 822e2f1f53..8974deb530 100644 --- a/tensorflow/compiler/xla/service/hlo_module_config.cc +++ b/tensorflow/compiler/xla/service/hlo_module_config.cc @@ -39,8 +39,8 @@ void HloModuleConfig::SetDefaultComputationLayout( } string HloModuleConfig::compilation_cache_key() const { - string key = - tensorflow::strings::StrCat("profiling=", hlo_profiling_enabled_); + string key = tensorflow::strings::StrCat("profiling=", hlo_profiling_enabled_, + "::hybrid=", has_hybrid_result_); StrAppend(&key, "::("); std::vector params; for (const ShapeLayout& param_layout : diff --git a/tensorflow/compiler/xla/service/hlo_module_config.h b/tensorflow/compiler/xla/service/hlo_module_config.h index a5ee895e48..4a7ead9c10 100644 --- a/tensorflow/compiler/xla/service/hlo_module_config.h +++ b/tensorflow/compiler/xla/service/hlo_module_config.h @@ -104,6 +104,16 @@ class HloModuleConfig { // Whether to enable HLO-level profiling. bool hlo_profiling_enabled_ = false; + // If this flag is true, the generated executable will return a ShapedBuffer + // holding the result of the computation. In a ShapedBuffer, tuples have their + // structure held in host memory and the element arrays (leaves of the tuple + // structure) stored in device memory. The ShapedBuffer is considered "hybrid" + // because its leaves are on device but its structure is stored on + // host. Otherwise, if this flag is false, the generated executable will + // return a DeviceMemoryBase where the result is held entirely in device + // memory. + bool has_hybrid_result_ = false; + // Module/graph-level seed handle. uint64 seed_ = 0; diff --git a/tensorflow/compiler/xla/service/hlo_opcode.h b/tensorflow/compiler/xla/service/hlo_opcode.h index e0d02e0665..d68fc20321 100644 --- a/tensorflow/compiler/xla/service/hlo_opcode.h +++ b/tensorflow/compiler/xla/service/hlo_opcode.h @@ -97,7 +97,6 @@ namespace xla { V(kPower, "power") \ V(kReal, "real") \ V(kRecv, "recv") \ - V(kRecvDone, "recv-done") \ V(kReduce, "reduce") \ V(kReducePrecision, "reduce-precision") \ V(kReduceWindow, "reduce-window") \ @@ -109,7 +108,6 @@ namespace xla { V(kSelect, "select") \ V(kSelectAndScatter, "select-and-scatter") \ V(kSend, "send") \ - V(kSendDone, "send-done") \ V(kShiftLeft, "shift-left") \ V(kShiftRightArithmetic, "shift-right-arithmetic") \ V(kShiftRightLogical, "shift-right-logical") \ diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc index 828be8490c..c96df50e79 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc @@ -66,9 +66,7 @@ bool IsRematerializable(const HloInstruction* instruction) { case HloOpcode::kInfeed: case HloOpcode::kParameter: case HloOpcode::kRecv: - case HloOpcode::kRecvDone: case HloOpcode::kSend: - case HloOpcode::kSendDone: case HloOpcode::kTrace: case HloOpcode::kWhile: return false; diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index 158fb9a546..f463e57d99 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#define EIGEN_USE_THREADS #include "tensorflow/compiler/xla/service/hlo_runner.h" @@ -20,6 +19,8 @@ limitations under the License. #include #include +#define EIGEN_USE_THREADS + #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/ptr_util.h" diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc index 7356663454..0d019d22f5 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding.cc @@ -16,7 +16,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_sharding.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/strings/str_util.h" namespace xla { @@ -39,15 +38,6 @@ HloSharding HloSharding::Tile1D(const Shape& input_shape, int64 num_tiles) { } string HloSharding::ToString() const { - if (IsTuple()) { - std::vector parts; - parts.reserve(tuple_elements_.size()); - for (const HloSharding& element : tuple_elements_) { - parts.push_back(element.ToString()); - } - return StrCat("{", tensorflow::str_util::Join(parts, ", "), "}"); - } - string result = StrCat("{", (replicated_ ? " replicated" : ""), (maximal_ ? " maximal" : "")); @@ -63,11 +53,6 @@ string HloSharding::ToString() const { } bool HloSharding::UsesDevice(int64 device) const { - if (IsTuple()) { - return std::any_of( - tuple_elements_.begin(), tuple_elements_.end(), - [&](const HloSharding& s) { return s.UsesDevice(device); }); - } const auto& devices = tile_assignment_; return replicated_ || std::find(devices.begin(), devices.end(), device) != devices.end(); @@ -76,7 +61,6 @@ bool HloSharding::UsesDevice(int64 device) const { std::vector HloSharding::TileIndexForDevice(int64 device) const { CHECK(!ShapeUtil::IsTuple(tile_shape_)); CHECK(!maximal_); - CHECK(!IsTuple()); std::vector ret_index; tile_assignment_.Each([&](tensorflow::gtl::ArraySlice index, int64 d) { if (d == device) { @@ -90,7 +74,6 @@ std::vector HloSharding::TileIndexForDevice(int64 device) const { int64 HloSharding::DeviceForTileIndex( tensorflow::gtl::ArraySlice index) const { CHECK(!replicated_); - CHECK(!IsTuple()); if (maximal_) { return *tile_assignment_.begin(); } @@ -99,7 +82,7 @@ int64 HloSharding::DeviceForTileIndex( } std::vector HloSharding::TileOffsetForDevice(int64 device) const { - CHECK(!IsTuple()); + CHECK(!ShapeUtil::IsTuple(tile_shape_)); std::vector index = TileIndexForDevice(device); if (maximal_) { @@ -114,7 +97,7 @@ std::vector HloSharding::TileOffsetForDevice(int64 device) const { } std::vector HloSharding::TileLimitForDevice(int64 device) const { - CHECK(!IsTuple()); + CHECK(!ShapeUtil::IsTuple(tile_shape_)); CHECK(!maximal_); // Maximal shardings do not have a valid tile shape. std::vector index = TileIndexForDevice(device); @@ -125,41 +108,13 @@ std::vector HloSharding::TileLimitForDevice(int64 device) const { } StatusOr HloSharding::UniqueDevice() const { - if (IsTuple()) { - if (tuple_elements_.empty()) { - return tensorflow::errors::InvalidArgument( - "UniqueDevice() called on empty tuple"); - } - std::vector> results; - std::transform(tuple_elements_.begin(), tuple_elements_.end(), - std::back_inserter(results), - [](const HloSharding& s) { return s.UniqueDevice(); }); - if (std::all_of(results.begin(), results.end(), - [&](const StatusOr& s) { - return s.ok() && results[0].ok() && - s.ValueOrDie() == results[0].ValueOrDie(); - })) { - return results[0]; - } else { - return tensorflow::errors::InvalidArgument( - "Tuple did not contain a unique device"); - } - } - if (!replicated_ && maximal_ && !IsTuple()) { + if (!replicated_ && maximal_) { return static_cast(*tile_assignment_.begin()); } return tensorflow::errors::InvalidArgument( "UniqueDevice() called on sharding that executes on multiple devices"); } -bool HloSharding::HasUniqueDevice() const { - if (IsTuple()) { - return UniqueDevice().status().ok(); - } else { - return !IsReplicated() && IsTileMaximal(); - } -} - Status HloSharding::Validate(const Shape& shape, int64 num_devices) const { if (replicated_) { return Status::OK(); @@ -238,19 +193,9 @@ Status HloSharding::Validate(const Shape& shape, int64 num_devices) const { /*static*/ StatusOr HloSharding::FromProto( const OpSharding& proto) { - if (proto.type() == OpSharding::Type::OpSharding_Type_TUPLE) { - std::vector tuple_shardings; - tuple_shardings.reserve(proto.tuple_shardings().size()); - for (const OpSharding& tuple_sharding_proto : proto.tuple_shardings()) { - TF_ASSIGN_OR_RETURN(HloSharding sharding, - HloSharding::FromProto(tuple_sharding_proto)); - tuple_shardings.push_back(sharding); - } - return HloSharding(tuple_shardings); - } else if (proto.type() == OpSharding::Type::OpSharding_Type_REPLICATED) { + if (proto.type() == OpSharding::Type::OpSharding_Type_REPLICATED) { return Replicate(); - } else if (proto.type() == OpSharding::Type::OpSharding_Type_MAXIMAL || - proto.tile_assignment_devices().size() == 1) { + } else if (proto.type() == OpSharding::Type::OpSharding_Type_MAXIMAL) { return HloSharding(proto.tile_assignment_devices(0)); } // Some versions of gcc cannot infer the TileAssignment constructor from a @@ -267,15 +212,6 @@ Status HloSharding::Validate(const Shape& shape, int64 num_devices) const { OpSharding HloSharding::ToProto() const { OpSharding result; - - if (IsTuple()) { - for (const HloSharding& element : tuple_elements_) { - *result.add_tuple_shardings() = element.ToProto(); - } - result.set_type(OpSharding::Type::OpSharding_Type_TUPLE); - return result; - } - *result.mutable_tile_shape() = tile_shape_; for (int64 dim : tile_assignment_.dimensions()) { result.add_tile_assignment_dimensions(dim); diff --git a/tensorflow/compiler/xla/service/hlo_sharding.h b/tensorflow/compiler/xla/service/hlo_sharding.h index f8ef2a3d05..d7ada30c70 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.h +++ b/tensorflow/compiler/xla/service/hlo_sharding.h @@ -24,7 +24,6 @@ limitations under the License. #include "tensorflow/compiler/xla/array.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/protobuf_util.h" -#include "tensorflow/compiler/xla/shape_tree.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/lib/hash/hash.h" @@ -68,18 +67,6 @@ class HloSharding { // `num_tiles` tiles. static HloSharding Tile1D(const Shape& input_shape, int64 num_tiles); - // Creates a new sharding for a tuple type. The given ShapeTree must have - // elements for every leaf shape contained in the tuple. - static HloSharding Tuple(const ShapeTree& sub_shardings) { - std::vector flattened_list; - flattened_list.reserve( - std::distance(sub_shardings.leaf_begin(), sub_shardings.leaf_end())); - for (const auto& index_to_sharding : sub_shardings.leaves()) { - flattened_list.push_back(index_to_sharding.second); - } - return HloSharding(flattened_list); - } - // Create a new sharding from a protobuf OpSharding. static StatusOr FromProto(const OpSharding& proto); @@ -89,89 +76,47 @@ class HloSharding { // Validate that this sharding can be applied to a tensor with shape `shape`. Status Validate(const Shape& shape, int64 num_devices) const; - // Returns true if the sharding has tuple type. - bool IsTuple() const { return tuple_; } - // Returns true if the sharding is trivial: replicate on all devices. - bool IsReplicated() const { - if (!IsTuple()) { - return replicated_; - } - return std::all_of(tuple_elements_.begin(), tuple_elements_.end(), - [](const HloSharding& s) { return s.IsReplicated(); }); - } + bool IsReplicated() const { return replicated_; } // Returns true if the tile size is the same as the input size. - bool IsTileMaximal() const { - if (!IsTuple()) { - return maximal_; - } - return std::all_of(tuple_elements_.begin(), tuple_elements_.end(), - [](const HloSharding& s) { return s.IsTileMaximal(); }); - } + bool IsTileMaximal() const { return maximal_; } // Returns true if the sharding defines an operation on the given device. bool UsesDevice(int64 device) const; // Returns the tile that should be executed on the given device. - // REQUIRES: !IsTuple() std::vector TileIndexForDevice(int64 device) const; // Returns the device that should execute the given tile. // It is an error to call this if is_replicated() is true. - // REQUIRES: !IsTuple() int64 DeviceForTileIndex(tensorflow::gtl::ArraySlice index) const; // Given a device ID, returns the offset within the input space of the // tile that should be executed on the given core. This returns the lower // extent of the tile in the input space. - // REQUIRES: !IsTuple() std::vector TileOffsetForDevice(int64 device) const; // Given a device ID, returns the limit within the input space of the // tile that should be executed on the given core. This returns the upper // extent of the tile in the input space. - // REQUIRES: !IsTuple() std::vector TileLimitForDevice(int64 device) const; // Returns the single device this op operates on. - // REQUIRES: !IsTuple&& !Replicated() && IsTileMaximal() + // Requires !Replicated() && IsTileMaximal(). StatusOr UniqueDevice() const; // Returns true if this op only uses a single device. - bool HasUniqueDevice() const; - - // Returns the ShapeTree containing the shardings for each element of this - // tuple. Only the leaf elements are populated. This creates a new ShapeTree - // object so is not cheap. REQUIRES: IsTuple() - ShapeTree GetTupleShardingsAsShapeTree( - const Shape& tuple_shape) const { - ShapeTree result(tuple_shape, HloSharding::Replicate()); - CHECK_EQ(std::distance(result.leaf_begin(), result.leaf_end()), - tuple_elements_.size()); - auto it = tuple_elements_.begin(); - for (auto& index_to_sharding : result.leaves()) { - index_to_sharding.second = *it++; - } - return result; - } + bool HasUniqueDevice() const { return !IsReplicated() && IsTileMaximal(); } bool operator==(const HloSharding& other) const { return replicated_ == other.replicated_ && maximal_ == other.maximal_ && protobuf_util::ProtobufEquals(tile_shape_, other.tile_shape_) && - tile_assignment_ == other.tile_assignment_ && - tuple_elements_ == other.tuple_elements_; + tile_assignment_ == other.tile_assignment_; } bool operator!=(const HloSharding& other) const { return !(*this == other); } size_t Hash() const { - if (!tuple_) { - size_t h = 0; - for (const auto& element : tuple_elements_) { - h = tensorflow::Hash64Combine(h, element.Hash()); - } - return h; - } if (replicated_) { return 0; } @@ -186,47 +131,33 @@ class HloSharding { } // Gets the tile shape. - // REQUIRES: !IsTileMaximal() && !IsTuple() + // It is an error to call this if IsTileMaximal() is true. const Shape& tile_shape() const { return tile_shape_; } // Gets the tile assignment tensor. - // REQUIRES: !IsReplicated() && !IsTuple() + // It is an error to call this if IsReplicated() is true. const Array& tile_assignment() const { return tile_assignment_; } private: HloSharding() : replicated_(true), maximal_(true), - tuple_(false), tile_shape_(), tile_assignment_({0}) {} explicit HloSharding(int64 device_id) : replicated_(false), maximal_(true), - tuple_(false), tile_shape_(), tile_assignment_({1}, device_id) {} HloSharding(const Shape& tile_shape, const Array& tile_assignment) : replicated_(false), maximal_(false), - tuple_(false), tile_shape_(tile_shape), tile_assignment_(tile_assignment) {} - HloSharding(const std::vector& tuple_shardings) - : replicated_(false), - maximal_(false), - tuple_(true), - tile_assignment_({0}), - tuple_elements_(tuple_shardings) {} bool replicated_; bool maximal_; - bool tuple_; Shape tile_shape_; Array tile_assignment_; - // Only non-empty when tuple_ is true, but because empty tuples are allowed - // may also be empty even then. This is a flattened list of all the leaf - // shardings in a tuple shape, by pre-order walk (ShapeTree iterator order). - std::vector tuple_elements_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_sharding_test.cc b/tensorflow/compiler/xla/service/hlo_sharding_test.cc index 00ea38480e..d0a20471a0 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding_test.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding_test.cc @@ -132,29 +132,6 @@ TEST_F(HloShardingTest, Tile) { } } -TEST_F(HloShardingTest, NestedTuple) { - // nested_tuple_shape = (f32[], (f32[3]), f32[4, 6]) - Shape nested_tuple_shape = ShapeUtil::MakeTupleShape({ - ShapeUtil::MakeShape(F32, {}), - ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {3})}), - ShapeUtil::MakeShape(F32, {4, 6}), - }); - - OpSharding proto; - proto.set_type(OpSharding::Type::OpSharding_Type_TUPLE); - *proto.add_tuple_shardings() = HloSharding::Replicate().ToProto(); - *proto.add_tuple_shardings() = HloSharding::AssignDevice(0).ToProto(); - *proto.add_tuple_shardings() = HloSharding::AssignDevice(1).ToProto(); - HloSharding tuple_sharding = - HloSharding::FromProto(proto).ConsumeValueOrDie(); - - ShapeTree shape_tree = - tuple_sharding.GetTupleShardingsAsShapeTree(nested_tuple_shape); - EXPECT_EQ(shape_tree.element({0}), HloSharding::Replicate()); - EXPECT_EQ(shape_tree.element({1, 0}), HloSharding::AssignDevice(0)); - EXPECT_EQ(shape_tree.element({2}), HloSharding::AssignDevice(1)); -} - TEST_F(HloShardingTest, Hash) { auto hash_compare_equal = [](const HloSharding& a, const HloSharding& b) { if (a.Hash() != b.Hash()) { @@ -207,51 +184,6 @@ TEST_F(HloShardingTest, Hash) { MakeArray({2, 2}, {0, 3, 1, 2})); EXPECT_FALSE(hash_compare_equal(sharding1, sharding2)); } - - HloSharding default_sharding = HloSharding::Replicate(); - { - ShapeTree shape_tree(ShapeUtil::MakeTupleShape({}), - default_sharding); - HloSharding sharding1 = HloSharding::Replicate(); - HloSharding sharding2 = HloSharding::Tuple(shape_tree); - EXPECT_FALSE(hash_compare_equal(sharding1, sharding2)); - } - - { - ShapeTree shape_tree(ShapeUtil::MakeTupleShape({}), - default_sharding); - HloSharding sharding1 = HloSharding::Tuple(shape_tree); - HloSharding sharding2 = HloSharding::Tuple(shape_tree); - EXPECT_TRUE(hash_compare_equal(sharding1, sharding2)); - } - - { - ShapeTree shape_tree1( - ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {4})}), - default_sharding); - *shape_tree1.mutable_element({0}) = HloSharding::Replicate(); - ShapeTree shape_tree2( - ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {4})}), - default_sharding); - *shape_tree2.mutable_element({0}) = HloSharding::AssignDevice(0); - HloSharding sharding1 = HloSharding::Tuple(shape_tree1); - HloSharding sharding2 = HloSharding::Tuple(shape_tree2); - EXPECT_FALSE(hash_compare_equal(sharding1, sharding2)); - } - - { - ShapeTree shape_tree1( - ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {4})}), - default_sharding); - *shape_tree1.mutable_element({0}) = HloSharding::AssignDevice(0); - ShapeTree shape_tree2( - ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {4})}), - default_sharding); - *shape_tree2.mutable_element({0}) = HloSharding::AssignDevice(0); - HloSharding sharding1 = HloSharding::Tuple(shape_tree1); - HloSharding sharding2 = HloSharding::Tuple(shape_tree2); - EXPECT_TRUE(hash_compare_equal(sharding1, sharding2)); - } } } // namespace diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index c938450891..c1aa655401 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -270,40 +270,12 @@ class ShapeVerifier : public DfsHloVisitor { pad->padding_config())); } - Status HandleSend(HloInstruction* send) override { - TF_RET_CHECK(send->users().size() == 1); - const HloInstruction* send_done = send->users()[0]; - TF_RET_CHECK(send_done->opcode() == HloOpcode::kSendDone); - TF_RETURN_IF_ERROR(CheckSameChannel(send, send_done)); - return CheckShape( - send, ShapeUtil::MakeTupleShape( - {send->operand(0)->shape(), ShapeUtil::MakeShape(U32, {})})); - } - - Status HandleSendDone(HloInstruction* send_done) override { - TF_RET_CHECK(send_done->operands().size() == 1); - const HloInstruction* send = send_done->operand(0); - TF_RET_CHECK(send->opcode() == HloOpcode::kSend); - TF_RETURN_IF_ERROR(CheckSameChannel(send, send_done)); - return CheckShape(send_done, ShapeUtil::MakeNil()); - } - - Status HandleRecv(HloInstruction* recv) override { - TF_RET_CHECK(recv->users().size() == 1); - const HloInstruction* recv_done = recv->users()[0]; - TF_RET_CHECK(recv_done->opcode() == HloOpcode::kRecvDone); - TF_RETURN_IF_ERROR(CheckSameChannel(recv, recv_done)); - return CheckShape(recv, - ShapeUtil::MakeTupleShape( - {recv_done->shape(), ShapeUtil::MakeShape(U32, {})})); + Status HandleSend(HloInstruction*) override { + return tensorflow::Status::OK(); } - Status HandleRecvDone(HloInstruction* recv_done) override { - TF_RET_CHECK(recv_done->operands().size() == 1); - const HloInstruction* recv = recv_done->operand(0); - TF_RET_CHECK(recv->opcode() == HloOpcode::kRecv); - TF_RETURN_IF_ERROR(CheckSameChannel(recv, recv_done)); - return CheckShape(recv_done, recv->shape().tuple_shapes(0)); + Status HandleRecv(HloInstruction*) override { + return tensorflow::Status::OK(); } Status HandleBatchNormTraining(HloInstruction* batch_norm_training) override { @@ -393,19 +365,6 @@ class ShapeVerifier : public DfsHloVisitor { instruction->opcode(), instruction->operands())); } - // Checks if the given two instructions shares the same channel id. - Status CheckSameChannel(const HloInstruction* instr1, - const HloInstruction* instr2) { - if (instr1->channel_id() != instr2->channel_id()) { - return FailedPrecondition( - "Expected to have the same channel id, actual channel ids are: %s " - "(%lld), %s (%lld)", - instr1->ToString().c_str(), instr1->channel_id(), - instr2->ToString().c_str(), instr2->channel_id()); - } - return tensorflow::Status::OK(); - } - // Returns the size of a Shape in bytes. const std::function shape_size_fn_; }; diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc index dea47b1fd7..0d1b7bc109 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion.cc @@ -113,9 +113,7 @@ namespace xla { case HloOpcode::kTrace: case HloOpcode::kWhile: case HloOpcode::kSend: - case HloOpcode::kSendDone: case HloOpcode::kRecv: - case HloOpcode::kRecvDone: return true; } diff --git a/tensorflow/compiler/xla/service/interpreter/executable.cc b/tensorflow/compiler/xla/service/interpreter/executable.cc index 96f937caf9..86dee8462f 100644 --- a/tensorflow/compiler/xla/service/interpreter/executable.cc +++ b/tensorflow/compiler/xla/service/interpreter/executable.cc @@ -89,7 +89,7 @@ StatusOr InterpreterExecutable::ExecuteOnStream( uint64 start_micros = tensorflow::Env::Default()->NowMicros(); - const HloComputation* computation = module().entry_computation(); + HloComputation* computation = module().entry_computation(); if (computation->num_parameters() != arguments.size()) { return tensorflow::errors::Internal( "Mismatch between argument count and graph parameter count."); diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc index d51c0d1dfb..c39ff52230 100644 --- a/tensorflow/compiler/xla/service/layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc @@ -131,10 +131,10 @@ TEST_F(LayoutAssignmentTest, FusionInstruction) { std::vector> minor_to_majors = {{0, 1}, {1, 0}}; for (auto& minor_to_major : minor_to_majors) { auto builder = HloComputation::Builder(TestName()); - auto constant_literal1 = Literal::CreateR2WithLayout( - {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout(minor_to_major)); - auto constant_literal2 = Literal::CreateR2WithLayout( - {{5.0, 6.0}, {7.0, 8.0}}, LayoutUtil::MakeLayout(minor_to_major)); + auto constant_literal1 = test_utils::CreateR2LiteralWithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, minor_to_major); + auto constant_literal2 = test_utils::CreateR2LiteralWithLayout( + {{5.0, 6.0}, {7.0, 8.0}}, minor_to_major); Shape ashape = constant_literal1->shape(); auto constant1 = builder.AddInstruction( @@ -181,12 +181,12 @@ TEST_F(LayoutAssignmentTest, TupleLayout) { // Verify the layouts of a tuple are assigned properly (the element layouts // match their source). auto builder = HloComputation::Builder(TestName()); - auto constant0 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR2WithLayout( - {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({0, 1})))); - auto constant1 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR2WithLayout( - {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({1, 0})))); + auto constant0 = builder.AddInstruction(HloInstruction::CreateConstant( + test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, + {0, 1}))); + auto constant1 = builder.AddInstruction(HloInstruction::CreateConstant( + test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, + {1, 0}))); auto tuple = builder.AddInstruction( HloInstruction::CreateTuple({constant0, constant1})); @@ -218,12 +218,12 @@ TEST_F(LayoutAssignmentTest, TupleLayout) { TEST_F(LayoutAssignmentTest, TupleSelect) { // Verify layouts of a select with tuple operands is assigned properly. auto builder = HloComputation::Builder(TestName()); - auto constant0 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR2WithLayout( - {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({0, 1})))); - auto constant1 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR2WithLayout( - {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({1, 0})))); + auto constant0 = builder.AddInstruction(HloInstruction::CreateConstant( + test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, + {0, 1}))); + auto constant1 = builder.AddInstruction(HloInstruction::CreateConstant( + test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, + {1, 0}))); auto tuple0 = builder.AddInstruction( HloInstruction::CreateTuple({constant0, constant1})); auto tuple1 = builder.AddInstruction( diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD index 8f24bb1718..075d4a1ab5 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/BUILD +++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD @@ -155,30 +155,6 @@ cc_library( ], ) -cc_library( - name = "vector_support_library", - srcs = ["vector_support_library.cc"], - hdrs = ["vector_support_library.h"], - deps = [ - "//tensorflow/compiler/xla:types", - "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", - "@llvm//:core", - ], -) - -cc_library( - name = "kernel_support_library", - srcs = ["kernel_support_library.cc"], - hdrs = ["kernel_support_library.h"], - deps = [ - ":llvm_loop", - "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", - "//tensorflow/core:lib", - "@llvm//:core", - ], -) - # ----------------------------------------------------------------------------- filegroup( diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc deleted file mode 100644 index 29cc0f81bd..0000000000 --- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h" - -#include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h" - -namespace xla { -void KernelSupportLibrary::For( - tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, - llvm::Value* step, - const std::function& for_body_generator) { - If(ir_builder_->CreateICmpSLT(start, end), [&]() { - for_body_generator(start, /*is_first_iteration=*/true); - For(name, ir_builder_->CreateAdd(start, step), end, step, - [&](llvm::Value* iv) { for_body_generator(iv, false); }); - }); -} - -void KernelSupportLibrary::For( - tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, - llvm::Value* step, bool peel_first_iteration, - const std::function& for_body_generator) { - if (peel_first_iteration) { - For(name, start, end, step, true, - [&](llvm::Value* indvar, bool is_first_iteration) { - for_body_generator(indvar, ir_builder_->getInt1(is_first_iteration)); - }); - } else { - std::unique_ptr loop = llvm_ir::ForLoop::EmitForLoop( - name, start, end, step, ir_builder_, - /*prevent_unrolling=*/prevent_unrolling_, - /*prevent_vectorization=*/prevent_vectorization_); - ir_builder_->SetInsertPoint(&loop->GetBodyBasicBlock()->back()); - for_body_generator(loop->GetIndVarValue(), - /*is_first_iteration=*/ir_builder_->CreateICmpEQ( - loop->GetIndVarValue(), start)); - llvm_ir::SetToLastInsertPoint(loop->GetExitBasicBlock(), ir_builder_); - } -} - -void KernelSupportLibrary::If( - llvm::Value* condition, const std::function& true_block_generator, - const std::function& false_block_generator) { - llvm_ir::LlvmIfData if_data = - llvm_ir::EmitIfThenElse(condition, "", ir_builder_); - ir_builder_->SetInsertPoint(&if_data.true_block->back()); - true_block_generator(); - ir_builder_->SetInsertPoint(&if_data.false_block->back()); - false_block_generator(); - llvm_ir::SetToLastInsertPoint(if_data.after_block, ir_builder_); -} -} // namespace xla diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h deleted file mode 100644 index 9bafb7b577..0000000000 --- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h +++ /dev/null @@ -1,128 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_KERNEL_SUPPORT_LIBRARY_H_ -#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_KERNEL_SUPPORT_LIBRARY_H_ - -#include - -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Value.h" -#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" -#include "tensorflow/core/lib/core/stringpiece.h" - -namespace xla { -// A thin wrapper around llvm_loop.h to make code generating structured control -// flow more readable. -class KernelSupportLibrary { - public: - // `ir_builder` is the llvm::IRBuilder instance used to generate LLVM IR. - // If `prevent_unrolling` is true then unrolling is explicitly disabled on - // every loop generated by this instance of KernelSupportLibrary. - explicit KernelSupportLibrary(llvm::IRBuilder<>* ir_builder, - bool prevent_unrolling = true, - bool prevent_vectorization = true) - : ir_builder_(ir_builder), - prevent_unrolling_(prevent_unrolling), - prevent_vectorization_(prevent_vectorization) {} - - // Generates the following control flow structure: - // - // if (`start` < `end`) { - // `for_body_generator(/*ind_var=*/start, /*is_first_iteration=*/true)`; - // for (i64 i = `start` + `step`; i s< `end`; i += `step`) - // `for_body_generator(/*ind_var=*/,i, /*is_first_iteration=*/false)`; - // } - void For( - tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, - llvm::Value* step, - const std::function& - for_body_generator); - - void For( - tensorflow::StringPiece name, int64 start, int64 end, int64 step, - const std::function& - for_body_generator) { - For(name, /*start=*/ir_builder_->getInt64(start), - /*end=*/ir_builder_->getInt64(end), - /*step=*/ir_builder_->getInt64(step), for_body_generator); - } - - // Generates the following control flow structure if `peel_first_iteration` is - // true: - // - // if (`start` < `end`) { - // `for_body_generator(/*ind_var=*/start, /*is_first_iteration=*/,true)`; - // for (i64 i = `start` + `step`; i s< `end`; i += `step`) - // `for_body_generator(/*ind_var=*/,i, /*is_first_iteration=*/,false)`; - // } - // - // and the following if `peel_first_iteration` is false: - // - // for (i64 i = `start`; i s< `end`; i += `step`) - // `for_body_generator(/*ind_var=*/,i, - // /*is_first_iteration=*/,(i != `start`))`; - void For(tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, - llvm::Value* step, bool peel_first_iteration, - const std::function& - for_body_generator); - - void For(tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, - int64 step, bool peel_first_iteration, - const std::function& - for_body_generator) { - For(name, /*start=*/start, /*end=*/end, - /*step=*/ir_builder_->getInt64(step), peel_first_iteration, - for_body_generator); - } - - void For( - tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, - llvm::Value* step, - const std::function& for_body_generator) { - For(name, start, end, step, - /*peel_first_iteration=*/false, - [&](llvm::Value* indvar, llvm::Value*) { for_body_generator(indvar); }); - } - - void For( - tensorflow::StringPiece name, int64 start, int64 end, int64 step, - const std::function& for_body_generator) { - For(name, /*start=*/ir_builder_->getInt64(start), - /*end=*/ir_builder_->getInt64(end), - /*step=*/ir_builder_->getInt64(step), for_body_generator); - } - - // Generates the following control flow structure: - // - // if (`condition`) - // `true_block_generator()`; - // else - // `false_block_generator()`; - void If(llvm::Value* condition, - const std::function& true_block_generator, - const std::function& false_block_generator = []() {}); - - private: - llvm::IRBuilder<>* ir_builder_; - bool prevent_unrolling_; - bool prevent_vectorization_; -}; -} // namespace xla - -#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_KERNEL_SUPPORT_LIBRARY_H_ diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc index 7b227ce294..83d35cb9ef 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc @@ -34,24 +34,21 @@ namespace llvm_ir { ForLoop::ForLoop(tensorflow::StringPiece prefix, tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, - llvm::Value* step, bool prevent_unrolling, - bool prevent_vectorization) + llvm::Value* step, bool prevent_unrolling) : prefix_(prefix.ToString()), suffix_(suffix.ToString()), start_index_(start_index), end_index_(end_index), step_(step), insert_before_bb_(nullptr), - prevent_unrolling_(prevent_unrolling), - prevent_vectorization_(prevent_vectorization) {} + prevent_unrolling_(prevent_unrolling) {} /* static */ std::unique_ptr ForLoop::EmitForLoop( tensorflow::StringPiece prefix, llvm::Value* start_index, llvm::Value* end_index, llvm::Value* step, llvm::IRBuilder<>* ir_builder, - bool prevent_unrolling, bool prevent_vectorization) { - std::unique_ptr loop(new ForLoop(prefix, /*suffix=*/"", start_index, - end_index, step, prevent_unrolling, - prevent_vectorization)); + bool prevent_unrolling) { + std::unique_ptr loop(new ForLoop( + prefix, /*suffix=*/"", start_index, end_index, step, prevent_unrolling)); loop->Emit(ir_builder); return loop; } @@ -130,12 +127,14 @@ void ForLoop::Emit(llvm::IRBuilder<>* ir_builder) { ir_builder->CreateStore(indvar_inc, indvar_address); llvm::BranchInst* back_branch = ir_builder->CreateBr(header_bb_); - std::vector loop_metadata = GetLoopMetadata(ir_builder); - if (!loop_metadata.empty()) { - llvm::LLVMContext* ctx = &start_index_->getContext(); + if (prevent_unrolling_) { + const char* const kLlvmLoopUnrollDisableMDName = "llvm.loop.unroll.disable"; + llvm::LLVMContext* ctx = &back_branch->getContext(); + auto temp_node = llvm::MDNode::getTemporary(*ctx, llvm::None); - loop_metadata.insert(loop_metadata.begin(), temp_node.get()); - auto loop_id = llvm::MDNode::get(*ctx, loop_metadata); + auto no_unroll_node = llvm::MDNode::get( + *ctx, {llvm::MDString::get(*ctx, kLlvmLoopUnrollDisableMDName)}); + auto loop_id = llvm::MDNode::get(*ctx, {temp_node.get(), no_unroll_node}); loop_id->replaceOperandWith(0, loop_id); back_branch->setMetadata(llvm::LLVMContext::MD_loop, loop_id); } @@ -144,27 +143,6 @@ void ForLoop::Emit(llvm::IRBuilder<>* ir_builder) { ir_builder->SetInsertPoint(exit_bb_); } -std::vector ForLoop::GetLoopMetadata( - llvm::IRBuilder<>* ir_builder) { - const char* const kLlvmLoopUnrollDisableMDName = "llvm.loop.unroll.disable"; - const char* const kLlvmLoopVectorizeMDName = "llvm.loop.vectorize.enable"; - llvm::LLVMContext* ctx = &start_index_->getContext(); - - std::vector result; - if (prevent_unrolling_) { - result.push_back(llvm::MDNode::get( - *ctx, {llvm::MDString::get(*ctx, kLlvmLoopUnrollDisableMDName)})); - } - - if (prevent_vectorization_) { - result.push_back(llvm::MDNode::get( - *ctx, {llvm::MDString::get(*ctx, kLlvmLoopVectorizeMDName), - llvm::ConstantAsMetadata::get(ir_builder->getFalse())})); - } - - return result; -} - string ForLoop::GetQualifiedName(tensorflow::StringPiece name) { return llvm_ir::IrName(prefix_, llvm_ir::IrName(name, suffix_)); } @@ -178,25 +156,23 @@ llvm::BasicBlock* ForLoop::CreateLoopBB(tensorflow::StringPiece name, std::unique_ptr ForLoopNest::AddLoop(tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, - bool prevent_unrolling, - bool prevent_vectorization) { + bool prevent_unrolling) { return AddLoop(suffix, start_index, end_index, ir_builder_->getInt64(1), - prevent_unrolling, prevent_vectorization); + prevent_unrolling); } std::unique_ptr ForLoopNest::AddLoop(tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, llvm::Value* stride, - bool prevent_unrolling, - bool prevent_vectorization) { + bool prevent_unrolling) { if (inner_loop_body_bb_ != nullptr) { // Create this loop inside the previous one. ir_builder_->SetInsertPoint(&*inner_loop_body_bb_->getFirstInsertionPt()); } std::unique_ptr loop(new ForLoop( /*prefix=*/name_, suffix, start_index, end_index, stride, - prevent_unrolling, prevent_vectorization)); + prevent_unrolling)); loop->Emit(ir_builder_); if (outer_loop_preheader_bb_ == nullptr) { @@ -215,24 +191,20 @@ std::unique_ptr ForLoopNest::AddLoop(tensorflow::StringPiece suffix, std::unique_ptr ForLoopNest::AddLoop(int64 start_index, int64 end_index, tensorflow::StringPiece suffix, - bool prevent_unrolling, - bool prevent_vectorization) { + bool prevent_unrolling) { CHECK_LE(start_index, end_index); return AddLoop(suffix, ir_builder_->getInt64(start_index), - ir_builder_->getInt64(end_index), prevent_unrolling, - prevent_vectorization); + ir_builder_->getInt64(end_index), prevent_unrolling); } std::unique_ptr ForLoopNest::AddLoop(int64 start_index, int64 end_index, int64 stride, tensorflow::StringPiece suffix, - bool prevent_unrolling, - bool prevent_vectorization) { + bool prevent_unrolling) { CHECK_LE(start_index, end_index); return AddLoop(suffix, ir_builder_->getInt64(start_index), ir_builder_->getInt64(end_index), - ir_builder_->getInt64(stride), prevent_unrolling, - prevent_vectorization); + ir_builder_->getInt64(stride), prevent_unrolling); } IrArray::Index ForLoopNest::AddLoopsForShape(const Shape& shape, diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h index 20069ce5a2..90f7c7df9e 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h @@ -71,10 +71,12 @@ class ForLoop { // // If `prevent_unrolling` is true then emit metadata that directs LLVM to not // unroll the generated loop. - static std::unique_ptr EmitForLoop( - tensorflow::StringPiece prefix, llvm::Value* start_index, - llvm::Value* end_index, llvm::Value* step, llvm::IRBuilder<>* ir_builder, - bool prevent_unrolling = false, bool prevent_vectorization = false); + static std::unique_ptr EmitForLoop(tensorflow::StringPiece prefix, + llvm::Value* start_index, + llvm::Value* end_index, + llvm::Value* step, + llvm::IRBuilder<>* ir_builder, + bool prevent_unrolling = false); // The names of the blocks follow LLVM's conventions. Control flow amongst the // blocks for the example C code looks like: @@ -128,7 +130,7 @@ class ForLoop { ForLoop(tensorflow::StringPiece prefix, tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, llvm::Value* step, - bool prevent_unrolling, bool prevent_vectorization); + bool prevent_unrolling); // Emit the loop at the insert point of the builder. void Emit(llvm::IRBuilder<>* ir_builder); @@ -140,10 +142,6 @@ class ForLoop { // they are set. string GetQualifiedName(tensorflow::StringPiece name); - // Return a list of metadata nodes that should be associated with the - // llvm::Loop for this `ForLoop`. - std::vector GetLoopMetadata(llvm::IRBuilder<>* ir_builder); - string prefix_; string suffix_; llvm::Value* start_index_; @@ -162,7 +160,6 @@ class ForLoop { llvm::BasicBlock* exit_bb_; llvm::Value* indvar_; bool prevent_unrolling_; - bool prevent_vectorization_; TF_DISALLOW_COPY_AND_ASSIGN(ForLoop); }; @@ -188,28 +185,24 @@ class ForLoopNest { std::unique_ptr AddLoop(tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, llvm::Value* stride, - bool prevent_unrolling = false, - bool prevent_vectorization = false); + bool prevent_unrolling = false); // Like the above, except that it defaults to a stride of one. std::unique_ptr AddLoop(tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, - bool prevent_unrolling = false, - bool prevent_vectorization = false); + bool prevent_unrolling = false); // A convenient wrapper of the other flavor of AddLoop. The given start and // end index are constant. std::unique_ptr AddLoop(int64 start_index, int64 end_index, int64 stride, tensorflow::StringPiece suffix, - bool prevent_unrolling = false, - bool prevent_vectorization = false); + bool prevent_unrolling = false); // Like the above, except that it defaults to a stride of one. std::unique_ptr AddLoop(int64 start_index, int64 end_index, tensorflow::StringPiece suffix, - bool prevent_unrolling = false, - bool prevent_vectorization = false); + bool prevent_unrolling = false); // Add loops to iterate through the indices within the specified // shape. The returned index collects the induction variables of the diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc index d95409e399..956c0d5f05 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc @@ -537,14 +537,6 @@ void SetToFirstInsertPoint(llvm::BasicBlock* blk, llvm::IRBuilder<>* builder) { builder->SetInsertPoint(blk, blk->getFirstInsertionPt()); } -void SetToLastInsertPoint(llvm::BasicBlock* blk, llvm::IRBuilder<>* builder) { - if (llvm::Instruction* terminator = blk->getTerminator()) { - builder->SetInsertPoint(terminator); - } else { - builder->SetInsertPoint(blk); - } -} - llvm::Value* CreateRor(llvm::Value* rotand, llvm::Value* rotor, llvm::IRBuilder<>* builder) { auto size = rotand->getType()->getPrimitiveSizeInBits(); diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h index f70d9f88b3..304192b58e 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h @@ -243,8 +243,6 @@ llvm::Instruction* AddRangeMetadata(int64 lower, int64 upper, void SetToFirstInsertPoint(llvm::BasicBlock* blk, llvm::IRBuilder<>* builder); -void SetToLastInsertPoint(llvm::BasicBlock* blk, llvm::IRBuilder<>* builder); - // Create a bitwise rotation of `rotand` by `rotor`. llvm::Value* CreateRor(llvm::Value* rotand, llvm::Value* rotor, llvm::IRBuilder<>* builder); diff --git a/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc b/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc deleted file mode 100644 index e8c6a83618..0000000000 --- a/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc +++ /dev/null @@ -1,150 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h" - -#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" - -namespace xla { -VectorSupportLibrary::VectorSupportLibrary(PrimitiveType primitive_type, - int64 vector_size, - llvm::IRBuilder<>* ir_builder, - std::string name) - : vector_size_(vector_size), - primitive_type_(primitive_type), - ir_builder_(ir_builder), - name_(std::move(name)) { - scalar_type_ = llvm_ir::PrimitiveTypeToIrType( - primitive_type, ir_builder_->GetInsertBlock()->getModule()); - scalar_pointer_type_ = llvm::PointerType::getUnqual(scalar_type_); - vector_type_ = llvm::VectorType::get(scalar_type_, vector_size); - vector_pointer_type_ = llvm::PointerType::getUnqual(vector_type_); -} - -llvm::Value* VectorSupportLibrary::Mul(llvm::Value* lhs, llvm::Value* rhs) { - if (scalar_type_->isFloatingPointTy()) { - return ir_builder()->CreateFMul(lhs, rhs, name()); - } else { - return ir_builder()->CreateMul(lhs, rhs, name()); - } -} - -llvm::Value* VectorSupportLibrary::Add(llvm::Value* lhs, llvm::Value* rhs) { - if (scalar_type_->isFloatingPointTy()) { - return ir_builder()->CreateFAdd(lhs, rhs, name()); - } else { - return ir_builder()->CreateAdd(lhs, rhs, name()); - } -} - -llvm::Value* VectorSupportLibrary::ComputeOffsetPointer( - llvm::Value* base_pointer, llvm::Value* offset_elements) { - if (base_pointer->getType() != scalar_pointer_type()) { - base_pointer = ir_builder()->CreateBitCast(base_pointer, - scalar_pointer_type(), name()); - } - return ir_builder()->CreateInBoundsGEP(base_pointer, {offset_elements}, - name()); -} - -llvm::Value* VectorSupportLibrary::LoadVector(llvm::Value* pointer) { - if (pointer->getType() != vector_pointer_type()) { - pointer = - ir_builder()->CreateBitCast(pointer, vector_pointer_type(), name()); - } - return ir_builder()->CreateAlignedLoad( - pointer, ShapeUtil::ByteSizeOfPrimitiveType(primitive_type_), name()); -} - -llvm::Value* VectorSupportLibrary::LoadScalar(llvm::Value* pointer) { - if (pointer->getType() != scalar_pointer_type()) { - pointer = - ir_builder()->CreateBitCast(pointer, scalar_pointer_type(), name()); - } - return ir_builder()->CreateAlignedLoad( - pointer, ShapeUtil::ByteSizeOfPrimitiveType(primitive_type_), name()); -} - -void VectorSupportLibrary::StoreVector(llvm::Value* value, - llvm::Value* pointer) { - if (pointer->getType() != vector_pointer_type()) { - pointer = ir_builder()->CreateBitCast(pointer, vector_pointer_type()); - } - ir_builder()->CreateAlignedStore( - value, pointer, ShapeUtil::ByteSizeOfPrimitiveType(primitive_type_)); -} - -void VectorSupportLibrary::StoreScalar(llvm::Value* value, - llvm::Value* pointer) { - if (pointer->getType() != scalar_pointer_type()) { - pointer = - ir_builder()->CreateBitCast(pointer, scalar_pointer_type(), name()); - } - ir_builder()->CreateAlignedStore( - value, pointer, ShapeUtil::ByteSizeOfPrimitiveType(primitive_type_)); -} - -llvm::Value* VectorSupportLibrary::LoadBroadcast(llvm::Value* pointer) { - if (pointer->getType() != scalar_pointer_type()) { - pointer = - ir_builder()->CreateBitCast(pointer, scalar_pointer_type(), name()); - } - return ir_builder()->CreateVectorSplat( - vector_size(), ir_builder()->CreateLoad(pointer), name()); -} - -llvm::Value* VectorSupportLibrary::AddReduce(llvm::Value* vector) { - llvm::SmallVector mask(vector_size(), nullptr); - for (unsigned i = vector_size(); i != 1; i >>= 1) { - // On every iteration, we shuffle half of the remaining lanes to the top - // half of shuffle, and add two old and the new vector. - - for (unsigned j = 0; j < vector_size(); ++j) { - if (j < (i / 2)) { - mask[j] = ir_builder()->getInt32(i / 2 + j); - } else { - mask[j] = llvm::UndefValue::get(ir_builder()->getInt32Ty()); - } - } - - llvm::Value* half_remaining_lanes = ir_builder()->CreateShuffleVector( - vector, llvm::UndefValue::get(vector_type()), - llvm::ConstantVector::get(mask), ""); - vector = Add(vector, half_remaining_lanes); - } - - return ir_builder()->CreateExtractElement(vector, ir_builder()->getInt32(0), - name()); -} - -llvm::Value* VectorSupportLibrary::GetZeroVector() { - return llvm::Constant::getNullValue(vector_type()); -} - -llvm::Value* VectorSupportLibrary::GetZeroScalar() { - return llvm::Constant::getNullValue(scalar_type()); -} - -LlvmVariable::LlvmVariable(llvm::Type* type, llvm::IRBuilder<>* ir_builder) - : ir_builder_(ir_builder) { - alloca_ = llvm_ir::EmitAllocaAtFunctionEntry(type, "", ir_builder_); -} - -llvm::Value* LlvmVariable::Get() { return ir_builder_->CreateLoad(alloca_); } - -void LlvmVariable::Set(llvm::Value* new_value) { - ir_builder_->CreateStore(new_value, alloca_); -} -} // namespace xla diff --git a/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h b/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h deleted file mode 100644 index 3072677ab0..0000000000 --- a/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h +++ /dev/null @@ -1,174 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_VECTOR_SUPPORT_LIBRARY_H_ -#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_VECTOR_SUPPORT_LIBRARY_H_ - -#include - -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Value.h" -#include "tensorflow/compiler/xla/types.h" -#include "tensorflow/compiler/xla/xla_data.pb.h" - -namespace xla { -// A thin wrapper around llvm_util.h to make code generating vector math flow -// more readable. -class VectorSupportLibrary { - public: - // This VectorSupportLibrary instance remembers `primitive_type` and - // `vector_size`, and these are implicitly used by the methods on this - // instance (i.e. LoadVector will load a vector of type <`vector_size` x - // `primitive_type`>). - VectorSupportLibrary(PrimitiveType primitive_type, int64 vector_size, - llvm::IRBuilder<>* ir_builder, std::string name); - - llvm::Value* Mul(llvm::Value* lhs, llvm::Value* rhs); - llvm::Value* Mul(int64 lhs, llvm::Value* rhs) { - return Mul(ir_builder()->getInt64(lhs), rhs); - } - - llvm::Value* Add(llvm::Value* lhs, llvm::Value* rhs); - llvm::Value* Add(int64 lhs, llvm::Value* rhs) { - return Add(ir_builder()->getInt64(lhs), rhs); - } - - llvm::Value* MulAdd(llvm::Value* a, llvm::Value* b, llvm::Value* c) { - return Add(c, Mul(a, b)); - } - - llvm::Value* ComputeOffsetPointer(llvm::Value* base_pointer, - llvm::Value* offset_elements); - llvm::Value* ComputeOffsetPointer(llvm::Value* base_pointer, - int64 offset_elements) { - return ComputeOffsetPointer(base_pointer, - ir_builder()->getInt64(offset_elements)); - } - - llvm::Value* LoadVector(llvm::Value* pointer); - - llvm::Value* LoadVector(llvm::Value* base_pointer, - llvm::Value* offset_elements) { - return LoadVector(ComputeOffsetPointer(base_pointer, offset_elements)); - } - - llvm::Value* LoadVector(llvm::Value* base_pointer, int64 offset_elements) { - return LoadVector(base_pointer, ir_builder()->getInt64(offset_elements)); - } - - llvm::Value* LoadScalar(llvm::Value* pointer); - - llvm::Value* LoadScalar(llvm::Value* base_pointer, - llvm::Value* offset_elements) { - return LoadScalar(ComputeOffsetPointer(base_pointer, offset_elements)); - } - - llvm::Value* LoadScalar(llvm::Value* base_pointer, int64 offset_elements) { - return LoadScalar(base_pointer, ir_builder()->getInt64(offset_elements)); - } - - void StoreVector(llvm::Value* value, llvm::Value* pointer); - - void StoreVector(llvm::Value* value, llvm::Value* base_pointer, - llvm::Value* offset_elements) { - StoreVector(value, ComputeOffsetPointer(base_pointer, offset_elements)); - } - - void StoreVector(llvm::Value* value, llvm::Value* base_pointer, - int64 offset_elements) { - StoreVector(value, base_pointer, ir_builder()->getInt64(offset_elements)); - } - - void StoreScalar(llvm::Value* value, llvm::Value* pointer); - void StoreScalar(llvm::Value* value, llvm::Value* base_pointer, - llvm::Value* offset_elements) { - StoreScalar(value, ComputeOffsetPointer(base_pointer, offset_elements)); - } - - void StoreScalar(llvm::Value* value, llvm::Value* base_pointer, - int64 offset_elements) { - StoreScalar(base_pointer, ir_builder()->getInt64(offset_elements)); - } - - llvm::Value* LoadBroadcast(llvm::Value* pointer); - llvm::Value* LoadBroadcast(llvm::Value* base_pointer, - llvm::Value* offset_elements) { - return LoadBroadcast(ComputeOffsetPointer(base_pointer, offset_elements)); - } - llvm::Value* LoadBroadcast(llvm::Value* base_pointer, int64 offset_elements) { - return LoadBroadcast(base_pointer, ir_builder()->getInt64(offset_elements)); - } - - llvm::Value* AddReduce(llvm::Value* vector); - - llvm::Value* GetZeroVector(); - llvm::Value* GetZeroScalar(); - - llvm::IRBuilder<>* ir_builder() const { return ir_builder_; } - int64 vector_size() const { return vector_size_; } - llvm::Type* vector_type() const { return vector_type_; } - llvm::Type* vector_pointer_type() const { return vector_pointer_type_; } - llvm::Type* scalar_type() const { return scalar_type_; } - llvm::Type* scalar_pointer_type() const { return scalar_pointer_type_; } - - const std::string& name() const { return name_; } - - private: - int64 vector_size_; - PrimitiveType primitive_type_; - llvm::IRBuilder<>* ir_builder_; - llvm::Type* vector_type_; - llvm::Type* vector_pointer_type_; - llvm::Type* scalar_type_; - llvm::Type* scalar_pointer_type_; - std::string name_; -}; - -// This wraps an alloca-backed stack variable which LLVM's SSA construction pass -// can later convert to a SSA value. -class LlvmVariable { - public: - LlvmVariable(llvm::Type*, llvm::IRBuilder<>* ir_builder); - - llvm::Value* Get(); - void Set(llvm::Value* new_value); - - private: - llvm::AllocaInst* alloca_; - llvm::IRBuilder<>* ir_builder_; -}; - -class VectorVariable : public LlvmVariable { - public: - VectorVariable(VectorSupportLibrary* vector_support, - llvm::Value* initial_value) - : LlvmVariable(vector_support->vector_type(), - vector_support->ir_builder()) { - Set(initial_value); - } -}; - -class ScalarVariable : public LlvmVariable { - public: - ScalarVariable(VectorSupportLibrary* vector_support, - llvm::Value* initial_value) - : LlvmVariable(vector_support->scalar_type(), - vector_support->ir_builder()) { - Set(initial_value); - } -}; -} // namespace xla - -#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_VECTOR_SUPPORT_LIBRARY_H_ diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc index 06f43bd3cb..d4d35da9d6 100644 --- a/tensorflow/compiler/xla/service/local_service.cc +++ b/tensorflow/compiler/xla/service/local_service.cc @@ -68,6 +68,26 @@ LocalService::LocalService(const ServiceOptions& options, std::unique_ptr execute_backend) : Service(options, std::move(execute_backend)) {} +namespace { +// Returns the space required to allocate a shape. If +// allocate_space_for_deep_copy the space includes all sub-buffers of +// a tuple. +int64 RequiredSpace(const Shape& shape, bool allocate_space_for_deep_copy, + TransferManager* transfer_manager) { + int64 size = 0; + // TODO(b/33492279) remove once no devices represent result tuples as + // contiguous buffers. + if (allocate_space_for_deep_copy) { + ShapeUtil::ForEachSubshape( + shape, [&size, transfer_manager](const Shape& subshape, + const ShapeIndex& /*index*/) { + size += transfer_manager->GetByteSizeRequirement(subshape); + }); + } + return size; +} +} // namespace + StatusOr> LocalService::CompileExecutable( const ComputationHandle& computation, const tensorflow::gtl::ArraySlice argument_layouts, diff --git a/tensorflow/compiler/xla/service/logical_buffer_analysis.cc b/tensorflow/compiler/xla/service/logical_buffer_analysis.cc index 02dc49e78c..b92017c6cb 100644 --- a/tensorflow/compiler/xla/service/logical_buffer_analysis.cc +++ b/tensorflow/compiler/xla/service/logical_buffer_analysis.cc @@ -104,21 +104,6 @@ Status LogicalBufferAnalysis::HandleBitcast(HloInstruction*) { return Status::OK(); } -Status LogicalBufferAnalysis::HandleRecvDone(HloInstruction*) { - // RecvDone doesn't create a new buffer but rather aliases its input (Recv) - // tuple element at {0} to its output. - return Status::OK(); -} - -Status LogicalBufferAnalysis::HandleSend(HloInstruction* send) { - // Send creates new buffers for the top-level tuple and the context (tuple - // element at {1}). Tuple element at {0} is an alias of the Send operand, so - // we don't need to create a new Logical Buffer for that. - NewLogicalBuffer(send, /*index=*/{}); - NewLogicalBuffer(send, /*index=*/{1}); - return Status::OK(); -} - Status LogicalBufferAnalysis::HandleTuple(HloInstruction* tuple) { // A Tuple instruction only creates the top-level buffer. NewLogicalBuffer(tuple, /*index=*/{}); diff --git a/tensorflow/compiler/xla/service/logical_buffer_analysis.h b/tensorflow/compiler/xla/service/logical_buffer_analysis.h index 598d08b720..a82e83ec5c 100644 --- a/tensorflow/compiler/xla/service/logical_buffer_analysis.h +++ b/tensorflow/compiler/xla/service/logical_buffer_analysis.h @@ -60,8 +60,6 @@ class LogicalBufferAnalysis : public DfsHloVisitorWithDefault { Status HandleGetTupleElement(HloInstruction* get_tuple_element) override; Status HandleBitcast(HloInstruction* bitcast) override; Status HandleCopy(HloInstruction* copy) override; - Status HandleRecvDone(HloInstruction* recv_done) override; - Status HandleSend(HloInstruction* send) override; Status HandleSelect(HloInstruction* select) override; // A map from the buffer ID to the logical buffer diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h index 47f4f0ade5..6646be2e9a 100644 --- a/tensorflow/compiler/xla/service/service.h +++ b/tensorflow/compiler/xla/service/service.h @@ -272,6 +272,8 @@ class Service : public ServiceInterface { // Create a Hlo module config for the given program shape and arguments. // execution_options is optional; if not given a default is used. + // has_hybrid_result is used to initialize the same-named field in + // HloModuleConfig -- see that class for documentation. StatusOr> CreateModuleConfig( const ProgramShape& program_shape, tensorflow::gtl::ArraySlice argument_shapes, diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index dcd726f22c..791d17365b 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -31,7 +31,6 @@ limitations under the License. #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/math/math_util.h" #include "tensorflow/core/lib/strings/str_util.h" -#include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/protobuf.h" @@ -771,12 +770,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(lhs)); TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(rhs)); - TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque( - lhs, tensorflow::strings::StrCat("lhs of binary operation ", - BinaryOperation_Name(operation)))); - TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque( - rhs, tensorflow::strings::StrCat("rhs of binary operation ", - BinaryOperation_Name(operation)))); + TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(lhs, "lhs of binary operation")); + TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(rhs, "rhs of binary operation")); switch (operation) { case BINOP_DOT: return InferDotOpShape(lhs, rhs); @@ -1948,10 +1943,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( !std::is_permutation(dimensions.begin(), dimensions.end(), indices.begin())) { return InvalidArgument( - "Reshape dimensions [%s] are not a permutation of the operand " - "dimensions (operand shape is %s).", - tensorflow::str_util::Join(dimensions, ",").c_str(), - ShapeUtil::HumanString(operand).c_str()); + "Reshape dimensions not a permutation of the operand dimensions."); } return inferred_shape; diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc index a1f9451dd4..df537bd7c1 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc @@ -253,64 +253,6 @@ Status TuplePointsToAnalysis::HandleBitcast(HloInstruction* bitcast) { return Status::OK(); } -Status TuplePointsToAnalysis::HandleRecvDone(HloInstruction* recv_done) { - // RecvDone aliases its input (Recv) tuple element {0} to its output. - PointsToSet& points_to_set = CreateEmptyPointsToSet(recv_done); - const PointsToSet& operand_points_to_set = - GetPointsToSet(recv_done->operand(0)); - - // Recursively copy the points to set of the operand tuple {0}. - points_to_set.ForEachMutableElement( - [this, &points_to_set, &operand_points_to_set]( - const ShapeIndex& index, PointsToSet::BufferList* buffers) { - ShapeIndex src_index({0}); - for (auto element : index) { - src_index.push_back(element); - } - *buffers = operand_points_to_set.element(src_index); - for (auto& tuple_source : - operand_points_to_set.tuple_sources(src_index)) { - points_to_set.add_tuple_source(index, tuple_source); - } - }); - return Status::OK(); -} - -Status TuplePointsToAnalysis::HandleSend(HloInstruction* send) { - // Send creates a tuple of {aliased operand, U32 context}. - PointsToSet& points_to_set = CreateEmptyPointsToSet(send); - - // Creates the points to set for the tuple and its element at {1}. - auto top_buffer = points_to_set.mutable_element(ShapeIndex({})); - top_buffer->push_back( - &logical_buffer_analysis_->GetBuffer(send, ShapeIndex({}))); - points_to_set.add_tuple_source({}, send); - - auto context_buffer = points_to_set.mutable_element(ShapeIndex({1})); - context_buffer->push_back( - &logical_buffer_analysis_->GetBuffer(send, ShapeIndex({1}))); - - // Recursively copy the points to set of the operand to output tuple {0}. - const PointsToSet& operand_points_to_set = GetPointsToSet(send->operand(0)); - operand_points_to_set.ForEachElement( - [&points_to_set, &operand_points_to_set]( - const ShapeIndex& src_index, - const PointsToSet::BufferList& points_to) { - ShapeIndex target_index({0}); - for (auto element : src_index) { - target_index.push_back(element); - } - *points_to_set.mutable_element(target_index) = points_to; - - for (HloInstruction* tuple : - operand_points_to_set.tuple_sources(src_index)) { - points_to_set.add_tuple_source(target_index, tuple); - } - }); - - return Status::OK(); -} - Status TuplePointsToAnalysis::HandleTuple(HloInstruction* tuple) { tensorflow::gtl::ArraySlice operands(tuple->operands()); PointsToSet& points_to_set = CreateEmptyPointsToSet(tuple); diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h index 8928de107e..e6157a1ed1 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h @@ -251,8 +251,6 @@ class TuplePointsToAnalysis : public DfsHloVisitorWithDefault { Status HandleGetTupleElement(HloInstruction* get_tuple_element) override; Status HandleBitcast(HloInstruction* bitcast) override; Status HandleCopy(HloInstruction* copy) override; - Status HandleRecvDone(HloInstruction* recv_done) override; - Status HandleSend(HloInstruction* send) override; Status HandleSelect(HloInstruction* select) override; string ToString() const; diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc index dec446d4da..694ed57fa2 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc @@ -313,51 +313,6 @@ TEST_F(TuplePointsToAnalysisTest, TupleCopy) { {constant1, constant2, copy}); } -TEST_F(TuplePointsToAnalysisTest, SendAndSendDone) { - // Send forwards its operand to the output tuple at {0}. - auto builder = HloComputation::Builder(TestName()); - auto constant = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(1.0))); - auto send = builder.AddInstruction( - HloInstruction::CreateSend(constant, /*channel_id=*/0)); - auto send_done = builder.AddInstruction(HloInstruction::CreateSendDone(send)); - - BuildModuleAndRunAnalysis(builder.Build()); - - EXPECT_FALSE(points_to_analysis_->GetPointsToSet(send).IsAmbiguous()); - EXPECT_TRUE(points_to_analysis_->GetPointsToSet(send).IsDistinct()); - EXPECT_FALSE(points_to_analysis_->GetPointsToSet(send_done).IsAmbiguous()); - EXPECT_TRUE(points_to_analysis_->GetPointsToSet(send_done).IsDistinct()); - - ExpectHasTopLevelBuffers( - points_to_analysis_->GetPointsToSet(send).element({}), {send}); - ExpectHasTopLevelBuffers( - points_to_analysis_->GetPointsToSet(send).element({0}), {constant}); - ExpectHasTopLevelBuffers( - points_to_analysis_->GetPointsToSet(send_done).CreateFlattenedSet(), - {send_done}); - ExpectHasBufferAliases(constant, {}, {{constant, {}}, {send, {0}}}); -} - -TEST_F(TuplePointsToAnalysisTest, RecvAndRecvDone) { - // RecvDone forwards its operand tuple element at {0} to the output. - auto builder = HloComputation::Builder(TestName()); - auto recv = builder.AddInstruction(HloInstruction::CreateRecv( - ShapeUtil::MakeShape(F32, {1, 2, 3}), /*channel_id=*/0)); - auto recv_done = builder.AddInstruction(HloInstruction::CreateRecvDone(recv)); - - BuildModuleAndRunAnalysis(builder.Build()); - - EXPECT_FALSE(points_to_analysis_->GetPointsToSet(recv).IsAmbiguous()); - EXPECT_TRUE(points_to_analysis_->GetPointsToSet(recv).IsDistinct()); - EXPECT_FALSE(points_to_analysis_->GetPointsToSet(recv_done).IsAmbiguous()); - EXPECT_TRUE(points_to_analysis_->GetPointsToSet(recv_done).IsDistinct()); - - ExpectHasTopLevelBuffers( - points_to_analysis_->GetPointsToSet(recv).element({}), {recv}); - ExpectHasBufferAliases(recv, {0}, {{recv, {0}}, {recv_done, {}}}); -} - TEST_F(TuplePointsToAnalysisTest, TupleSelect) { // Select from two different tuples. This should create an ambiguous points to // set containing the union of both sides. diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc index 8d5bb08e51..e9d182509b 100644 --- a/tensorflow/compiler/xla/service/user_computation.cc +++ b/tensorflow/compiler/xla/service/user_computation.cc @@ -2927,9 +2927,8 @@ void ComputationLowerer::Visit( case OpRequest::kRecvRequest: { const RecvRequest& recv_request = request.request().recv_request(); - HloInstruction* recv = add_instruction(HloInstruction::CreateRecv( + hlo_instruction = add_instruction(HloInstruction::CreateRecv( request.output_shape(), recv_request.channel_handle().handle())); - hlo_instruction = add_instruction(HloInstruction::CreateRecvDone(recv)); break; } @@ -3121,9 +3120,8 @@ void ComputationLowerer::Visit( case OpRequest::kSendRequest: { const SendRequest& send_request = request.request().send_request(); HloInstruction* operand = lookup_instruction(send_request.operand()); - HloInstruction* send = add_instruction(HloInstruction::CreateSend( + hlo_instruction = add_instruction(HloInstruction::CreateSend( operand, send_request.channel_handle().handle())); - hlo_instruction = add_instruction(HloInstruction::CreateSendDone(send)); break; } diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc index 2fac914892..65734f91bc 100644 --- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc +++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc @@ -58,9 +58,7 @@ static bool ContainsSendOrRecv(const HloComputation* comp) { static bool IsOrContainsSendOrRecv(const HloInstruction* instr) { if (instr->opcode() == HloOpcode::kSend || - instr->opcode() == HloOpcode::kSendDone || - instr->opcode() == HloOpcode::kRecv || - instr->opcode() == HloOpcode::kRecvDone) { + instr->opcode() == HloOpcode::kRecv) { return true; } for (const auto& subcomp : instr->called_computations()) { diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc index d99b31dc00..8e1a2dcde1 100644 --- a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc @@ -144,11 +144,10 @@ TEST_F(WhileLoopSimplifierTest, NotRemovedIfContainsSend) { auto* while_op = computation->root_instruction(); ASSERT_EQ(while_op->opcode(), HloOpcode::kWhile); auto* while_body = while_op->while_body(); - auto* send = while_body->AddInstruction(HloInstruction::CreateSend( + while_body->AddInstruction(HloInstruction::CreateSend( while_body->AddInstruction( HloInstruction::CreateConstant(Literal::CreateR0(true))), /*channel_id=*/0)); - while_body->AddInstruction(HloInstruction::CreateSendDone(send)); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } @@ -157,10 +156,9 @@ TEST_F(WhileLoopSimplifierTest, NotRemovedIfContainsRecv) { auto* while_op = computation->root_instruction(); ASSERT_EQ(while_op->opcode(), HloOpcode::kWhile); auto* while_body = while_op->while_body(); - auto* recv = while_body->AddInstruction( + while_body->AddInstruction( HloInstruction::CreateRecv(ShapeUtil::MakeShape(F32, {1}), /*channel_id=*/0)); - while_body->AddInstruction(HloInstruction::CreateRecvDone(recv)); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } diff --git a/tensorflow/compiler/xla/shape_tree.h b/tensorflow/compiler/xla/shape_tree.h index a898a4d375..64a36471b9 100644 --- a/tensorflow/compiler/xla/shape_tree.h +++ b/tensorflow/compiler/xla/shape_tree.h @@ -116,7 +116,6 @@ class ShapeTree { ShapeTree(const Shape* shape, const T& init_value); ShapeTree(const ShapeTree& other) { *this = other; } - ShapeTree(ShapeTree&&) = default; ShapeTree& operator=(const ShapeTree& other) { root_ = other.root_; @@ -133,8 +132,6 @@ class ShapeTree { return *this; } - ShapeTree& operator=(ShapeTree&& other) = default; - // Returns the data element associated with the array in the shape at the // given index (see ShapeUtil::GetSubshape for how indexes are defined). const T& element(const ShapeIndex& index) const; diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index 4d0bafa908..b5eb81dfc6 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -263,7 +263,6 @@ StatusOr MakeShapeWithLayoutInternal( case S32: case S64: case F16: - case BF16: case F32: case F64: return true; diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 3e62481629..4e1be24b61 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -61,14 +61,13 @@ generate_backend_test_macros() cc_library( name = "test_utils", - srcs = ["test_utils.cc"], + testonly = True, hdrs = ["test_utils.h"], deps = [ "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/service:hlo", "//tensorflow/core:lib", ], ) @@ -1344,23 +1343,22 @@ xla_test( ], ) -tf_cc_test( +xla_test( name = "llvm_compiler_test", srcs = ["llvm_compiler_test.cc"], - tags = ["requires-gpu-sm35"], + backends = [ + "cpu", + "gpu", + "cpu_parallel", + ], deps = [ - "//tensorflow/compiler/xla:test_helpers", - "//tensorflow/compiler/xla/service:backend", - "//tensorflow/compiler/xla/service:cpu_plugin", - "//tensorflow/compiler/xla/service:gpu_plugin", + "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:llvm_compiler", - "//tensorflow/compiler/xla/service:platform_util", - "//tensorflow/compiler/xla/service/cpu:cpu_compiler", - "//tensorflow/compiler/xla/service/gpu:gpu_compiler", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/stream_executor", "@llvm//:core", ], ) diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.cc b/tensorflow/compiler/xla/tests/client_library_test_base.cc index ef54714e46..065bce7e31 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.cc +++ b/tensorflow/compiler/xla/tests/client_library_test_base.cc @@ -346,60 +346,6 @@ void ClientLibraryTestBase::ComputeAndCompareTuple( LiteralTestUtil::ExpectNearTuple(expected, *actual, error); } -void ClientLibraryTestBase::ComputeAndCompare( - ComputationBuilder* builder, const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice arguments) { - auto status_or_data = ComputeValueAndReference(builder, operand, arguments); - EXPECT_IS_OK(status_or_data); - if (!status_or_data.ok()) { - return; - } - std::unique_ptr reference, result; - std::tie(reference, result) = status_or_data.ConsumeValueOrDie(); - LiteralTestUtil::ExpectEqual(*reference, *result); -} - -void ClientLibraryTestBase::ComputeAndCompare( - ComputationBuilder* builder, const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { - auto status_or_data = ComputeValueAndReference(builder, operand, arguments); - EXPECT_IS_OK(status_or_data); - if (!status_or_data.ok()) { - return; - } - std::unique_ptr reference, result; - std::tie(reference, result) = status_or_data.ConsumeValueOrDie(); - LiteralTestUtil::ExpectNear(*reference, *result, error); -} - -StatusOr, std::unique_ptr>> -ClientLibraryTestBase::ComputeValueAndReference( - ComputationBuilder* builder, const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice arguments) { - // Transfer the arguments to the executor service. We put the unique_ptr's - // into a vector to keep the data alive on the service until the end of this - // function. - std::vector> argument_data; - for (const auto& arg : arguments) { - TF_ASSIGN_OR_RETURN(auto data, client_->TransferToServer(arg)); - argument_data.push_back(std::move(data)); - } - - // Create raw pointers to the GlobalData for the rest of the call stack. - std::vector argument_data_ptr; - std::transform( - argument_data.begin(), argument_data.end(), - std::back_inserter(argument_data_ptr), - [](const std::unique_ptr& data) { return data.get(); }); - - TF_ASSIGN_OR_RETURN( - auto reference, - builder->ComputeConstant(operand, /*output_layout=*/nullptr, arguments)); - TF_ASSIGN_OR_RETURN(auto result, - ExecuteAndTransfer(builder, argument_data_ptr)); - return std::make_pair(std::move(reference), std::move(result)); -} - Computation ClientLibraryTestBase::CreateScalarRelu() { ComputationBuilder builder(client_, "relu"); auto z_value = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "z_value"); diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h index b578667735..7cfc276ec1 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.h +++ b/tensorflow/compiler/xla/tests/client_library_test_base.h @@ -196,16 +196,6 @@ class ClientLibraryTestBase : public ::testing::Test { ComputationBuilder* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec abs_error); - // Convenience method for running a built computation and comparing the result - // with the HloEvaluator. - void ComputeAndCompare(ComputationBuilder* builder, - const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice arguments); - void ComputeAndCompare(ComputationBuilder* builder, - const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice arguments, - ErrorSpec error); - // Create scalar operations for use in reductions. Computation CreateScalarRelu(); Computation CreateScalarMax(); @@ -308,13 +298,6 @@ class ClientLibraryTestBase : public ::testing::Test { const std::function& verify_output, const Shape* output_with_layout = nullptr); - - // Executes the computation and calculates the expected reference value using - // the HloEvaluator. Returns two literal in the order of (expected, actual). - StatusOr, std::unique_ptr>> - ComputeValueAndReference(ComputationBuilder* builder, - const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice arguments); }; template @@ -486,7 +469,8 @@ template std::vector ClientLibraryTestBase::CreatePseudorandomR1( const int width, NativeT min_value, NativeT max_value, uint32 seed) { std::vector result(width); - PseudorandomGenerator generator(min_value, max_value, seed); + test_utils::PseudorandomGenerator generator(min_value, max_value, + seed); for (int i = 0; i < width; ++i) { result[i] = generator.get(); } @@ -498,7 +482,8 @@ std::unique_ptr> ClientLibraryTestBase::CreatePseudorandomR2( const int rows, const int cols, NativeT min_value, NativeT max_value, uint32 seed) { auto result = MakeUnique>(rows, cols); - PseudorandomGenerator generator(min_value, max_value, seed); + test_utils::PseudorandomGenerator generator(min_value, max_value, + seed); for (int y = 0; y < rows; ++y) { for (int x = 0; x < cols; ++x) { (*result)(y, x) = generator.get(); diff --git a/tensorflow/compiler/xla/tests/client_test.cc b/tensorflow/compiler/xla/tests/client_test.cc index 183bcf1dd3..0853feeebd 100644 --- a/tensorflow/compiler/xla/tests/client_test.cc +++ b/tensorflow/compiler/xla/tests/client_test.cc @@ -54,8 +54,8 @@ TEST_F(ClientTest, ExecuteWithLayout) { .ConsumeValueOrDie(); std::unique_ptr expected_literal = - Literal::CreateR2WithLayout( - {{11, 22}, {33, 44}}, LayoutUtil::MakeLayout(transfer_layout)); + test_utils::CreateR2LiteralWithLayout({{11, 22}, {33, 44}}, + transfer_layout); auto computed = client_->Transfer(*data, &expected_literal->shape()); diff --git a/tensorflow/compiler/xla/tests/compilation_cache_test.cc b/tensorflow/compiler/xla/tests/compilation_cache_test.cc index 0f780fa87e..707e439245 100644 --- a/tensorflow/compiler/xla/tests/compilation_cache_test.cc +++ b/tensorflow/compiler/xla/tests/compilation_cache_test.cc @@ -138,13 +138,13 @@ XLA_TEST_F(CompilationCacheTest, DifferentParameterLayouts) { // layouts. Use these arrays as parameters to a simple computation. If the // layout of the array changes then computation should be recompiled (cache // miss). - auto rowmaj_array = Literal::CreateR2WithLayout( - {{1.0f, 2.0f}, {3.0f, 4.0f}}, LayoutUtil::MakeLayout({1, 0})); + auto rowmaj_array = test_utils::CreateR2LiteralWithLayout( + {{1.0f, 2.0f}, {3.0f, 4.0f}}, /*minor_to_major=*/{1, 0}); auto rowmaj_handle = client_->TransferToServer(*rowmaj_array).ConsumeValueOrDie(); - auto colmaj_array = Literal::CreateR2WithLayout( - {{1.0f, 2.0f}, {3.0f, 4.0f}}, LayoutUtil::MakeLayout({0, 1})); + auto colmaj_array = test_utils::CreateR2LiteralWithLayout( + {{1.0f, 2.0f}, {3.0f, 4.0f}}, /*minor_to_major=*/{0, 1}); auto colmaj_handle = client_->TransferToServer(*colmaj_array).ConsumeValueOrDie(); diff --git a/tensorflow/compiler/xla/tests/compute_constant_test.cc b/tensorflow/compiler/xla/tests/compute_constant_test.cc index 5226a78386..d423c78476 100644 --- a/tensorflow/compiler/xla/tests/compute_constant_test.cc +++ b/tensorflow/compiler/xla/tests/compute_constant_test.cc @@ -264,8 +264,8 @@ XLA_TEST_F(ComputeConstantTest, Layout) { ASSERT_TRUE(computed.ok()) << computed.status(); std::unique_ptr expected_literal = - Literal::CreateR2WithLayout({{11, 22}, {33, 44}}, - LayoutUtil::MakeLayout(layout)); + test_utils::CreateR2LiteralWithLayout({{11, 22}, {33, 44}}, + layout); LiteralTestUtil::AssertEqualShapesAndLayouts( expected_literal->shape(), computed.ValueOrDie()->shape()); LiteralTestUtil::ExpectEqual(*expected_literal, *computed.ValueOrDie()); diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc index 7425f778a6..0cc2e5fb7e 100644 --- a/tensorflow/compiler/xla/tests/convolution_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_test.cc @@ -82,127 +82,177 @@ XLA_TEST_F(ConvolutionTest, ForwardPassConvolution_3x3x256_256_OutputZ_Iota) { ComputationBuilder builder(client_, TestName()); auto lhs = builder.ConstantR4FromArray4D(*alhs); auto rhs = builder.ConstantR4FromArray4D(*arhs); - auto conv = builder.Conv(lhs, rhs, {1, 1}, Padding::kValid); + builder.Conv(lhs, rhs, {1, 1}, Padding::kValid); - ComputeAndCompare(&builder, conv, {}, error_spec_); + std::unique_ptr> aexpected = + ReferenceUtil::ConvArray4D(*alhs, *arhs, {1, 1}, Padding::kValid); + + ComputeAndCompareR4(&builder, *aexpected, {}, error_spec_); } TEST_F(ConvolutionTest, Convolve_1x1x1x2_1x1x1x2_Valid) { ComputationBuilder builder(client_, TestName()); - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); - auto input = builder.Parameter(0, input_shape, "input"); - auto filter = builder.Parameter(1, filter_shape, "filter"); - auto conv = builder.Conv(input, filter, {1, 1}, Padding::kValid); - - Array4D input_data(1, 1, 1, 2); - input_data.FillWithYX(Array2D({ + { + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + builder.Conv(input, filter, {1, 1}, Padding::kValid); + } + + Array4D input(1, 1, 1, 2); + input.FillWithYX(Array2D({ {1, 2}, })); - Array4D filter_data(1, 1, 1, 2); - filter_data.FillWithYX(Array2D({ + Array4D filter(1, 1, 1, 2); + filter.FillWithYX(Array2D({ {5, 6}, })); - ComputeAndCompare(&builder, conv, - {*Literal::CreateFromArray(input_data), - *Literal::CreateFromArray(filter_data)}, - error_spec_); + std::unique_ptr> aexpected = + ReferenceUtil::ConvArray4D(input, filter, {1, 1}, Padding::kValid); + + auto input_literal = + client_->TransferToServer(*Literal::CreateR4FromArray4D(input)) + .ConsumeValueOrDie(); + auto filter_literal = + client_->TransferToServer(*Literal::CreateR4FromArray4D(filter)) + .ConsumeValueOrDie(); + + ComputeAndCompareR4(&builder, *aexpected, + {input_literal.get(), filter_literal.get()}, + error_spec_); } // Tests valid padding for 2D convolution in raster space. TEST_F(ConvolutionTest, Convolve_1x1x4x4_1x1x2x2_Valid) { ComputationBuilder builder(client_, TestName()); - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 2, 2}); - auto input = builder.Parameter(0, input_shape, "input"); - auto filter = builder.Parameter(1, filter_shape, "filter"); - auto conv = builder.Conv(input, filter, {1, 1}, Padding::kValid); + { + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 2, 2}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + builder.Conv(input, filter, {1, 1}, Padding::kValid); + } - Array4D input_data(1, 1, 4, 4); + Array4D input(1, 1, 4, 4); // clang-format off - input_data.FillWithYX(Array2D({ + input.FillWithYX(Array2D({ {1, 2, 3, 4 }, {5, 6, 7, 8 }, {9, 10, 11, 12}, {13, 14, 15, 16}, })); // clang-format on - Array4D filter_data(1, 1, 2, 2); + Array4D filter(1, 1, 2, 2); // clang-format off - filter_data.FillWithYX(Array2D({ + filter.FillWithYX(Array2D({ {5, 6}, {7, 8}, })); // clang-format on - ComputeAndCompare(&builder, conv, - {*Literal::CreateFromArray(input_data), - *Literal::CreateFromArray(filter_data)}, - error_spec_); + + std::unique_ptr> aexpected = + ReferenceUtil::ConvArray4D(input, filter, {1, 1}, Padding::kValid); + + auto input_literal = + client_->TransferToServer(*Literal::CreateR4FromArray4D(input)) + .ConsumeValueOrDie(); + auto filter_literal = + client_->TransferToServer(*Literal::CreateR4FromArray4D(filter)) + .ConsumeValueOrDie(); + + ComputeAndCompareR4(&builder, *aexpected, + {input_literal.get(), filter_literal.get()}, + error_spec_); } // Tests same padding for 2D convolution in raster space. TEST_F(ConvolutionTest, Convolve_1x1x4x4_1x1x2x2_Same) { ComputationBuilder builder(client_, TestName()); - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 2, 2}); - auto input = builder.Parameter(0, input_shape, "input"); - auto filter = builder.Parameter(1, filter_shape, "filter"); - auto conv = builder.Conv(input, filter, {1, 1}, Padding::kSame); + { + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 2, 2}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + builder.Conv(input, filter, {1, 1}, Padding::kSame); + } - Array4D input_data(1, 1, 4, 4); + Array4D input(1, 1, 4, 4); // clang-format off - input_data.FillWithYX(Array2D({ + input.FillWithYX(Array2D({ {1, 2, 3, 4 }, {5, 6, 7, 8 }, {9, 10, 11, 12}, {13, 14, 15, 16}, })); // clang-format on - Array4D filter_data(1, 1, 2, 2); + Array4D filter(1, 1, 2, 2); // clang-format off - filter_data.FillWithYX(Array2D({ + filter.FillWithYX(Array2D({ {5, 6}, {7, 8}, })); // clang-format on - ComputeAndCompare(&builder, conv, - {*Literal::CreateFromArray(input_data), - *Literal::CreateFromArray(filter_data)}, - error_spec_); + + std::unique_ptr> aexpected = + ReferenceUtil::ConvArray4D(input, filter, {1, 1}, Padding::kSame); + + auto input_literal = + client_->TransferToServer(*Literal::CreateR4FromArray4D(input)) + .ConsumeValueOrDie(); + auto filter_literal = + client_->TransferToServer(*Literal::CreateR4FromArray4D(filter)) + .ConsumeValueOrDie(); + + ComputeAndCompareR4(&builder, *aexpected, + {input_literal.get(), filter_literal.get()}, + error_spec_); } // Tests same padding for 2D convolution in raster space with an odd sized // kernel. TEST_F(ConvolutionTest, Convolve_1x1x4x4_1x1x3x3_Same) { ComputationBuilder builder(client_, TestName()); - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 3, 3}); - auto input = builder.Parameter(0, input_shape, "input"); - auto filter = builder.Parameter(1, filter_shape, "filter"); - auto conv = builder.Conv(input, filter, {1, 1}, Padding::kSame); + { + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 3, 3}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + builder.Conv(input, filter, {1, 1}, Padding::kSame); + } - Array4D input_data(1, 1, 4, 4); + Array4D input(1, 1, 4, 4); // clang-format off - input_data.FillWithYX(Array2D({ + input.FillWithYX(Array2D({ {1, 2, 3, 4 }, {5, 6, 7, 8 }, {9, 10, 11, 12}, {13, 14, 15, 16}, })); // clang-format on - Array4D filter_data(1, 1, 3, 3); + Array4D filter(1, 1, 3, 3); // clang-format off - filter_data.FillWithYX(Array2D({ + filter.FillWithYX(Array2D({ { 5, 6, 7}, { 8, 9, 10}, {11, 12, 13}, })); // clang-format on - ComputeAndCompare(&builder, conv, - {*Literal::CreateFromArray(input_data), - *Literal::CreateFromArray(filter_data)}, - error_spec_); + + std::unique_ptr> aexpected = + ReferenceUtil::ConvArray4D(input, filter, {1, 1}, Padding::kSame); + + auto input_literal = + client_->TransferToServer(*Literal::CreateR4FromArray4D(input)) + .ConsumeValueOrDie(); + auto filter_literal = + client_->TransferToServer(*Literal::CreateR4FromArray4D(filter)) + .ConsumeValueOrDie(); + + ComputeAndCompareR4(&builder, *aexpected, + {input_literal.get(), filter_literal.get()}, + error_spec_); } XLA_TEST_F(ConvolutionTest, Convolve1D_1x2x5_1x2x2_Valid) { diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index b72dd2707c..cf089d748d 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -177,15 +177,15 @@ void DotOperationTest::TestSquareMatrixDot(bool lhs_row_major, bool rhs_row_major) { auto lhs_handle = client_ - ->TransferToServer(*Literal::CreateR2WithLayout( + ->TransferToServer(*test_utils::CreateR2LiteralWithLayout( {{1.0, 2.0}, {3.0, -4.0}}, - LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(lhs_row_major)))) + MinorToMajorForIsRowMajor(lhs_row_major))) .ConsumeValueOrDie(); auto rhs_handle = client_ - ->TransferToServer(*Literal::CreateR2WithLayout( + ->TransferToServer(*test_utils::CreateR2LiteralWithLayout( {{1.0, 6.0}, {7.0, -4.0}}, - LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(rhs_row_major)))) + MinorToMajorForIsRowMajor(rhs_row_major))) .ConsumeValueOrDie(); ComputationBuilder builder(client_, TestName()); @@ -277,62 +277,6 @@ XLA_TEST_F(DotOperationTest, MatrixDotF32_260_3_520_MinorToMajorFF) { TestMatrixDot(260, 3, 520, false, false); } -XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x8x8) { - TestMatrixDot(1, 8, 8, true, true); -} - -XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x130x8) { - TestMatrixDot(1, 130, 8, true, true); -} - -XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x8x130) { - TestMatrixDot(1, 8, 130, true, true); -} - -XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x290x130) { - TestMatrixDot(1, 290, 130, true, true); -} - -XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_2x1x1) { - TestMatrixDot(2, 1, 1, true, true); -} - -XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_8x8x1) { - TestMatrixDot(8, 8, 1, true, true); -} - -XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_16x1x1) { - TestMatrixDot(16, 1, 1, true, true); -} - -XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_16x3x1) { - TestMatrixDot(16, 3, 1, true, true); -} - -XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_3x3x1) { - TestMatrixDot(3, 3, 1, true, true); -} - -XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_29x29x1) { - TestMatrixDot(29, 29, 1, true, true); -} - -XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x8x2) { - TestMatrixDot(1, 8, 2, true, true); -} - -XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x2x8) { - TestMatrixDot(1, 2, 8, true, true); -} - -XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_259x258x1) { - TestMatrixDot(259, 258, 1, true, true); -} - -XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_259x258x1_FT) { - TestMatrixDot(259, 258, 1, false, true); -} - XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorFF) { constexpr bool kLhsRowMajor = false; constexpr bool kRhsRowMajor = false; @@ -362,15 +306,15 @@ void DotOperationTest::TestNonsquareMatrixDot(bool lhs_row_major, bool rhs_row_major) { auto lhs_handle = client_ - ->TransferToServer(*Literal::CreateR2WithLayout( + ->TransferToServer(*test_utils::CreateR2LiteralWithLayout( {{1.0, 2.0, 3.0}, {3.0, -4.0, -1.0}}, - LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(lhs_row_major)))) + MinorToMajorForIsRowMajor(lhs_row_major))) .ConsumeValueOrDie(); auto rhs_handle = client_ - ->TransferToServer(*Literal::CreateR2WithLayout( + ->TransferToServer(*test_utils::CreateR2LiteralWithLayout( {{1.0, 6.0}, {2.0, 3.0}, {7.0, -4.0}}, - LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(rhs_row_major)))) + MinorToMajorForIsRowMajor(rhs_row_major))) .ConsumeValueOrDie(); ComputationBuilder builder(client_, TestName()); @@ -417,31 +361,6 @@ XLA_TEST_F(DotOperationTest, NonsquareMatrixDotC64) { TestNonsquareMatrixDot(); } -XLA_TEST_F(DotOperationTest, MatrixVectorC64) { - auto lhs_handle = - client_ - ->TransferToServer(*Literal::CreateR2WithLayout( - {{1.0, 2.0, 3.0, -4.0}}, LayoutUtil::MakeLayout({1, 0}))) - .ConsumeValueOrDie(); - auto rhs_handle = - client_ - ->TransferToServer(*Literal::CreateR2WithLayout( - {{1.0, 1.0}, {2.0, 2.0}, {3.0, 3.0}, {-4.0, 4.0}}, - LayoutUtil::MakeLayout({1, 0}))) - .ConsumeValueOrDie(); - - ComputationBuilder builder(client_, TestName()); - auto prim_type = primitive_util::NativeToPrimitiveType(); - auto result = builder.Dot( - builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {1, 4}), "lhs"), - builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {4, 2}), "rhs")); - - Array2D expected({{30.0, -2.0}}); - - ComputeAndCompareR2( - &builder, expected, {lhs_handle.get(), rhs_handle.get()}, error_spec_); -} - XLA_TEST_F(DotOperationTest, ConcurrentMatMul) { ComputationBuilder builder(client_, TestName()); auto matrix1 = builder.ConstantR2({{1.0, 2.0}, {3.0, 4.0}}); diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc index 75c9a0d3fb..95a52ecd2f 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.cc +++ b/tensorflow/compiler/xla/tests/literal_test_util.cc @@ -116,18 +116,16 @@ template ::testing::AssertionResult CompareFloatsBitwiseEqual(FloatT lhs, FloatT rhs) { auto ulhs = tensorflow::bit_cast(lhs); auto urhs = tensorflow::bit_cast(rhs); - auto lhs_double = static_cast(lhs); - auto rhs_double = static_cast(rhs); if (ulhs != urhs) { return ::testing::AssertionFailure() << tensorflow::strings::Printf( "floating values are not bitwise-equal; and equality testing " "was requested: %s=%g=%a vs %s=%g=%a", tensorflow::strings::StrCat(tensorflow::strings::Hex(ulhs)) .c_str(), - lhs_double, lhs_double, + lhs, lhs, tensorflow::strings::StrCat(tensorflow::strings::Hex(urhs)) .c_str(), - rhs_double, rhs_double); + rhs, rhs); } return ::testing::AssertionSuccess(); } @@ -151,10 +149,6 @@ template // Specializations for floating types that do bitwise comparisons when equality // comparison is requested. template <> -::testing::AssertionResult CompareEqual(bfloat16 lhs, bfloat16 rhs) { - return CompareFloatsBitwiseEqual(lhs, rhs); -} -template <> ::testing::AssertionResult CompareEqual(float lhs, float rhs) { return CompareFloatsBitwiseEqual(lhs, rhs); } @@ -244,9 +238,6 @@ bool ExpectLiteralsEqual(const Literal& expected, const Literal& actual, case U64: match = ExpectLiteralsEqual(expected, actual, &multi_index, 0); break; - case BF16: - match = ExpectLiteralsEqual(expected, actual, &multi_index, 0); - break; case F32: match = ExpectLiteralsEqual(expected, actual, &multi_index, 0); break; diff --git a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc index 70d8b764a3..458258e7ee 100644 --- a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc +++ b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc @@ -14,118 +14,49 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/service/llvm_compiler.h" -#include "tensorflow/compiler/xla/service/backend.h" -#include "tensorflow/compiler/xla/service/cpu/cpu_compiler.h" -#include "tensorflow/compiler/xla/service/gpu/gpu_compiler.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" -#include "tensorflow/compiler/xla/service/platform_util.h" -#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" #include "tensorflow/core/platform/test.h" -#include "tensorflow/stream_executor/stream_executor.h" namespace xla { namespace { -class LLVMCompilerTest : public ::testing::Test { - public: - void SetUp() override { - Platform *platform = FindPlatform(); - ASSERT_NE(platform, nullptr); - - BackendOptions backend_options; - backend_options.set_platform(platform); - StatusOr> backend_or_status = - Backend::CreateBackend(backend_options); - ASSERT_IS_OK(backend_or_status.status()); - backend_ = backend_or_status.ConsumeValueOrDie(); - } - - ~LLVMCompilerTest() override {} - - protected: - using Platform = ::perftools::gputools::Platform; - - explicit LLVMCompilerTest(string platform_name) - : platform_name_(std::move(platform_name)) {} - - void TestCompilerHooks(LLVMCompiler *compiler) { - int pre_opt_hook_call_count = 0; - int post_opt_hook_call_count = 0; - - auto pre_opt_hook = [&pre_opt_hook_call_count](const llvm::Module &) { - ++pre_opt_hook_call_count; - return Status::OK(); - }; - auto post_opt_hook = [&post_opt_hook_call_count](const llvm::Module &) { - ++post_opt_hook_call_count; - return Status::OK(); - }; - - // Create HLO module, and run the compiler. - auto builder = HloComputation::Builder(TestName()); - builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(42.0))); - - auto hlo_module = CreateNewModule(); - hlo_module->AddEntryComputation(builder.Build()); - - compiler->SetPreOptimizationHook(pre_opt_hook); - compiler->SetPostOptimizationHook(post_opt_hook); - - ASSERT_TRUE(compiler - ->Compile(std::move(hlo_module), - backend_->default_stream_executor()) - .ok()); - - // Test that hooks were called. - EXPECT_EQ(1, pre_opt_hook_call_count); - EXPECT_EQ(1, post_opt_hook_call_count); - } - - private: - Platform *FindPlatform() { - for (Platform *platform : - PlatformUtil::GetSupportedPlatforms().ConsumeValueOrDie()) { - if (platform->Name() == platform_name_) { - return platform; - } - } - return nullptr; - } - - string platform_name_; - std::unique_ptr backend_; - - static string TestName() { - return ::testing::UnitTest::GetInstance()->current_test_info()->name(); - } - - static std::unique_ptr CreateNewModule() { - HloModuleConfig config; - config.set_debug_options(legacy_flags::GetDebugOptionsFromFlags()); - return MakeUnique(TestName(), VersionedComputationHandle(), - config); - } -}; - -class CpuCompilerTest : public LLVMCompilerTest { - public: - CpuCompilerTest() : LLVMCompilerTest("Host") {} -}; - -class GpuCompilerTest : public LLVMCompilerTest { - public: - GpuCompilerTest() : LLVMCompilerTest("CUDA") {} -}; - -TEST_F(CpuCompilerTest, HooksTest) { - cpu::CpuCompiler compiler; - TestCompilerHooks(&compiler); -} - -TEST_F(GpuCompilerTest, HooksTest) { - gpu::GpuCompiler compiler; - TestCompilerHooks(&compiler); +class LLVMCompilerTest : public HloTestBase {}; + +XLA_TEST_F(LLVMCompilerTest, CompilerHooks) { + int pre_opt_hook_call_count = 0; + int post_opt_hook_call_count = 0; + + auto pre_opt_hook = [&pre_opt_hook_call_count](const llvm::Module &) { + ++pre_opt_hook_call_count; + return Status::OK(); + }; + auto post_opt_hook = [&post_opt_hook_call_count](const llvm::Module &) { + ++post_opt_hook_call_count; + return Status::OK(); + }; + + // Create HLO module, and run the compiler. + auto builder = HloComputation::Builder(TestName()); + builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(42.0))); + + auto hlo_module = CreateNewModule(); + hlo_module->AddEntryComputation(builder.Build()); + + auto compiler = static_cast(backend().compiler()); + compiler->SetPreOptimizationHook(pre_opt_hook); + compiler->SetPostOptimizationHook(post_opt_hook); + + ASSERT_TRUE( + compiler + ->Compile(std::move(hlo_module), backend().default_stream_executor()) + .ok()); + + // Test that hooks were called. + EXPECT_EQ(1, pre_opt_hook_call_count); + EXPECT_EQ(1, post_opt_hook_call_count); } } // namespace diff --git a/tensorflow/compiler/xla/tests/local_client_execute_test.cc b/tensorflow/compiler/xla/tests/local_client_execute_test.cc index a196e250d1..329b53012f 100644 --- a/tensorflow/compiler/xla/tests/local_client_execute_test.cc +++ b/tensorflow/compiler/xla/tests/local_client_execute_test.cc @@ -136,14 +136,16 @@ XLA_TEST_F(LocalClientExecuteTest, AddArraysWithDifferentInputLayouts) { auto computation = builder.Build().ConsumeValueOrDie(); // Create x as a col-major array. - auto x_array = LiteralToShapedBuffer(*Literal::CreateR2WithLayout( - {{1.0f, 2.0f}, {3.0f, 4.0f}}, LayoutUtil::MakeLayout({0, 1}))); + auto x_array = LiteralToShapedBuffer( + *test_utils::CreateR2LiteralWithLayout({{1.0f, 2.0f}, {3.0f, 4.0f}}, + /*minor_to_major=*/{0, 1})); EXPECT_TRUE(LayoutUtil::Equal(x_array->shape().layout(), LayoutUtil::MakeLayout({0, 1}))); // Create y as a row-major array. - auto y_array = LiteralToShapedBuffer(*Literal::CreateR2WithLayout( - {{10.0f, 20.0f}, {30.0f, 40.0f}}, LayoutUtil::MakeLayout({1, 0}))); + auto y_array = LiteralToShapedBuffer( + *test_utils::CreateR2LiteralWithLayout({{10.0f, 20.0f}, {30.0f, 40.0f}}, + /*minor_to_major=*/{1, 0})); EXPECT_TRUE(LayoutUtil::Equal(y_array->shape().layout(), LayoutUtil::MakeLayout({1, 0}))); diff --git a/tensorflow/compiler/xla/tests/local_client_test_base.cc b/tensorflow/compiler/xla/tests/local_client_test_base.cc index d98875dbc2..c11e1df0a7 100644 --- a/tensorflow/compiler/xla/tests/local_client_test_base.cc +++ b/tensorflow/compiler/xla/tests/local_client_test_base.cc @@ -12,12 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#define EIGEN_USE_THREADS #include "tensorflow/compiler/xla/tests/local_client_test_base.h" #include +#define EIGEN_USE_THREADS + #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/map_util.h" diff --git a/tensorflow/compiler/xla/tests/map_test.cc b/tensorflow/compiler/xla/tests/map_test.cc index 2b0f7e6e80..2ef392508d 100644 --- a/tensorflow/compiler/xla/tests/map_test.cc +++ b/tensorflow/compiler/xla/tests/map_test.cc @@ -405,13 +405,13 @@ TEST_F(MapTest, MapBinaryAdder) { // for Map that used to fail in shape inference (b/28989438). XLA_TEST_F(MapTest, AddWithMixedLayouts) { ComputationBuilder builder(client_, TestName()); - std::unique_ptr param0_literal = Literal::CreateR2WithLayout( - {{1, 2}, {3, 4}}, LayoutUtil::MakeLayout({1, 0})); + std::unique_ptr param0_literal = + test_utils::CreateR2LiteralWithLayout({{1, 2}, {3, 4}}, {1, 0}); std::unique_ptr param0_data = client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); - std::unique_ptr param1_literal = Literal::CreateR2WithLayout( - {{10, 20}, {30, 40}}, LayoutUtil::MakeLayout({0, 1})); + std::unique_ptr param1_literal = + test_utils::CreateR2LiteralWithLayout({{10, 20}, {30, 40}}, {0, 1}); std::unique_ptr param1_data = client_->TransferToServer(*param1_literal).ConsumeValueOrDie(); diff --git a/tensorflow/compiler/xla/tests/reshape_test.cc b/tensorflow/compiler/xla/tests/reshape_test.cc index d235b9a158..72c68f24a0 100644 --- a/tensorflow/compiler/xla/tests/reshape_test.cc +++ b/tensorflow/compiler/xla/tests/reshape_test.cc @@ -431,9 +431,8 @@ XLA_TEST_F(ReshapeTest, ToScalar) { XLA_TEST_F(ReshapeTest, BadDimensions) { ComputationBuilder b(client_, TestName()); b.Reshape(b.ConstantR1({1}), {}, {}); - EXPECT_THAT( - ExecuteToString(&b, {}), - ::testing::HasSubstr("not a permutation of the operand dimensions")); + EXPECT_THAT(ExecuteToString(&b, {}), + ::testing::HasSubstr("dimensions not a permutation")); } XLA_TEST_F(ReshapeTest, BadNewSizes) { diff --git a/tensorflow/compiler/xla/tests/test_utils.cc b/tensorflow/compiler/xla/tests/test_utils.cc deleted file mode 100644 index cdd3d66bbb..0000000000 --- a/tensorflow/compiler/xla/tests/test_utils.cc +++ /dev/null @@ -1,120 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/xla/tests/test_utils.h" - -#include "tensorflow/compiler/xla/primitive_util.h" - -namespace xla { - -namespace { - -template -void PopulateWithRandomFloatingPointData(Literal* literal) { - CHECK_EQ(literal->shape().element_type(), - primitive_util::NativeToPrimitiveType()); - std::minstd_rand0 engine; - std::uniform_real_distribution generator(0.0f, 1.0f); - TF_CHECK_OK(literal->Populate( - [&](tensorflow::gtl::ArraySlice /*indices*/) { - return generator(engine); - })); -} - -template -void PopulateWithRandomIntegralData(Literal* literal) { - CHECK_EQ(literal->shape().element_type(), - primitive_util::NativeToPrimitiveType()); - std::minstd_rand0 engine; - std::uniform_int_distribution generator( - std::numeric_limits::lowest(), std::numeric_limits::max()); - TF_CHECK_OK(literal->Populate( - [&](tensorflow::gtl::ArraySlice /*indices*/) { - return generator(engine); - })); -} - -} // namespace - -StatusOr> MakeFakeLiteral(const Shape& shape) { - if (ShapeUtil::IsTuple(shape)) { - std::vector> elements; - for (const Shape& element_shape : shape.tuple_shapes()) { - TF_ASSIGN_OR_RETURN(std::unique_ptr element, - MakeFakeLiteral(element_shape)); - elements.push_back(std::move(element)); - } - return Literal::MakeTupleOwned(std::move(elements)); - } - std::unique_ptr literal = Literal::CreateFromShape(shape); - switch (shape.element_type()) { - case F32: - PopulateWithRandomFloatingPointData(literal.get()); - break; - case F64: - PopulateWithRandomFloatingPointData(literal.get()); - break; - case S8: - PopulateWithRandomIntegralData(literal.get()); - break; - case U8: - PopulateWithRandomIntegralData(literal.get()); - break; - case S16: - PopulateWithRandomIntegralData(literal.get()); - break; - case U16: - PopulateWithRandomIntegralData(literal.get()); - break; - case S32: - PopulateWithRandomIntegralData(literal.get()); - break; - case U32: - PopulateWithRandomIntegralData(literal.get()); - break; - case S64: - PopulateWithRandomIntegralData(literal.get()); - break; - case U64: - PopulateWithRandomIntegralData(literal.get()); - break; - case PRED: { - std::uniform_int_distribution generator(0, 1); - std::minstd_rand0 engine; - TF_CHECK_OK(literal->Populate( - [&](tensorflow::gtl::ArraySlice /*indices*/) { - return generator(engine); - })); - break; - } - default: - return Unimplemented("Unsupported type for fake literal generation: %s", - ShapeUtil::HumanString(shape).c_str()); - } - return std::move(literal); -} - -StatusOr>> MakeFakeArguments( - const HloModule& module) { - std::vector> arguments; - for (const ShapeLayout& shape_layout : - module.config().entry_computation_layout().parameter_layouts()) { - TF_ASSIGN_OR_RETURN(auto literal, MakeFakeLiteral(shape_layout.shape())); - arguments.push_back(std::move(literal)); - } - return std::move(arguments); -} - -} // namespace xla diff --git a/tensorflow/compiler/xla/tests/test_utils.h b/tensorflow/compiler/xla/tests/test_utils.h index 12d5255fce..f3a522b05e 100644 --- a/tensorflow/compiler/xla/tests/test_utils.h +++ b/tensorflow/compiler/xla/tests/test_utils.h @@ -23,12 +23,12 @@ limitations under the License. #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/ptr_util.h" -#include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/platform/types.h" namespace xla { +namespace test_utils { // A class which generates pseudorandom numbers of a given type within a given // range. Not cryptographically secure and likely not perfectly evenly @@ -53,15 +53,63 @@ class PseudorandomGenerator { std::mt19937 generator_; }; -// Generates fake data in a literal of the given shape, or returns an error -// status if the element type is currently unhandled for fake data generation. -StatusOr> MakeFakeLiteral(const Shape& shape); +// Convenience function for creating a rank-2 array with arbitrary layout. +template +std::unique_ptr CreateR2LiteralWithLayout( + std::initializer_list> values, + tensorflow::gtl::ArraySlice minor_to_major) { + auto literal = MakeUnique(); + const int64 d0 = values.size(); + const int64 d1 = values.begin()->size(); + literal.get()->PopulateWithValue(0, {d0, d1}); + *literal->mutable_shape()->mutable_layout() = + LayoutUtil::MakeLayout(minor_to_major); + TF_CHECK_OK(ShapeUtil::ValidateShape(literal->shape())); + + int64 dim0 = 0; + for (auto inner_list : values) { + int64 dim1 = 0; + for (auto value : inner_list) { + literal.get()->Set({dim0, dim1}, value); + ++dim1; + } + ++dim0; + } + return literal; +} -// Generates a vector of arguments containing fake data. The number, shape and -// layout of the arguments is appropriate for given HLO module. -StatusOr>> MakeFakeArguments( - const HloModule& module); +// Convenience function for creating a rank-3 array with arbitrary layout. +template +std::unique_ptr CreateR3LiteralWithLayout( + std::initializer_list>> + values, + tensorflow::gtl::ArraySlice minor_to_major) { + auto literal = MakeUnique(); + const int64 d0 = values.size(); + const int64 d1 = values.begin()->size(); + const int64 d2 = values.begin()->begin()->size(); + literal.get()->PopulateWithValue(0, {d0, d1, d2}); + *literal->mutable_shape()->mutable_layout() = + LayoutUtil::MakeLayout(minor_to_major); + TF_CHECK_OK(ShapeUtil::ValidateShape(literal->shape())); + + int64 dim0 = 0; + for (auto inner_list : values) { + int64 dim1 = 0; + for (auto inner_inner_list : inner_list) { + int64 dim2 = 0; + for (auto value : inner_inner_list) { + literal.get()->Set({dim0, dim1, dim2}, value); + ++dim2; + } + ++dim1; + } + ++dim0; + } + return literal; +} +} // namespace test_utils } // namespace xla #endif // TENSORFLOW_COMPILER_XLA_TESTS_TEST_UTILS_H_ diff --git a/tensorflow/compiler/xla/tools/BUILD b/tensorflow/compiler/xla/tools/BUILD index 091fa0c3ec..759921dce5 100644 --- a/tensorflow/compiler/xla/tools/BUILD +++ b/tensorflow/compiler/xla/tools/BUILD @@ -88,7 +88,6 @@ cc_library( "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/lib:testing", "//tensorflow/compiler/xla/service:session_proto", - "//tensorflow/compiler/xla/tests:test_utils", "//tensorflow/core:framework_internal", "//tensorflow/core:lib", ], diff --git a/tensorflow/compiler/xla/tools/parser/README.md b/tensorflow/compiler/xla/tools/parser/README.md index b768b94e77..2c864d77a2 100644 --- a/tensorflow/compiler/xla/tools/parser/README.md +++ b/tensorflow/compiler/xla/tools/parser/README.md @@ -43,22 +43,14 @@ operand : shape name ; -attributes +extra_attributes : /*empty*/ - | ',' attribute - | ',' attribute attributes + | ',' extra_attribute + | ',' extra_attribute extra_attributes ; -attribute +extra_attribute : attribute_name attribute_value ; -attribute_value - : kInt - | kName - | [0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,} /*dim_labels_pattern*/ - | [0-9]+(x[0-9]+)+ /*dxd_pattern*/ - | [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)* /*pad_pattern*/ - | '{' sub_attributes '}' - ; param_list : '(' param_list1 ')' diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc index b5befbf58b..d104ff3460 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc @@ -122,7 +122,7 @@ TokKind HloLexer::LexToken() { current_ptr_++; return TokKind::kArrow; } - return LexNumberOrPattern(); + return LexDigitOrNegative(); case '=': return TokKind::kEqual; case ',': @@ -149,15 +149,12 @@ TokKind HloLexer::LexToken() { } } -// Lex a shape, name, keyword, opcode, attribute name, or the dim labels -// pattern. -// +// Lex a shape, name, keyword, or opcode. // shape ::= ([a-zA-Z0-9_]*[0-9]*)\[([0-9,]*)\](?:\s*{([0-9,]*)})? // name ::= [a-zA-Z_][a-zA-Z0-9_.-]*: // keyword ::= HloModule, ENTRY, ... // opcode ::= add, greater-than, ... // attribute_name ::= condition, body, dimensions, ... -// dim_labels_pattern ::= [0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,} TokKind HloLexer::LexIdentifier() { { auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end()); @@ -223,16 +220,6 @@ TokKind HloLexer::LexIdentifier() { return TokKind::kOpcode; } - { - auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end()); - static LazyRE2 dim_labels_pattern = { - R"([0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,})"}; - if (RE2::Consume(&consumable, *dim_labels_pattern)) { - current_ptr_ = consumable.begin(); - str_val_.assign(token_start_, current_ptr_); - return TokKind::kDimLabels; - } - } current_ptr_ = token_start_ + 1; return TokKind::kError; } @@ -253,20 +240,15 @@ TokKind HloLexer::LexPercent() { return TokKind::kError; } -// Lex integer and floating-point values, -inf, and patterns for dim labels, -// dxd (e.g. 1x2x3), and pad. -// -// fp with exp ::= [-]?([0-9]+|[0-9]+[.][0-9]*|[0-9]*[.][0-9]+)([eE][+-]?[0-9]+) -// fp without exp ::= [-]?([0-9]+[.][0-9]*|[0-9]*[.][0-9]+) -// dim_labels_pattern ::= [0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,} -// dxd_pattern ::= [0-9]+(x[0-9]+)+ -// pad_pattern ::= [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)* -// int ::= [-]?[0-9]+ -// negative inf ::= '-inf' -TokKind HloLexer::LexNumberOrPattern() { +// Lex integer and floating-point values, and -inf. +// int [-]?[0-9]+ +// fp with exp [-]?([0-9]+|[0-9]+[.][0-9]*|[0-9]*[.][0-9]+)([eE][+-]?[0-9]+) +// fp without exp [-]?([0-9]+[.][0-9]*|[0-9]*[.][0-9]+) +// negative inf -inf +TokKind HloLexer::LexDigitOrNegative() { auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end()); static LazyRE2 float_pattern = { - R"([-]?((\d+|\d+[.]\d*|\d*[.]\d+)([eE][+-]?\d+))|[-]?(\d+[.]\d*|\d*[.]\d+))"}; + R"([-]?((\d+|\d+[.]\d*|\d*[.]\d+)([eE][+-]?\d+))|(\d+[.]\d*|\d*[.]\d+))"}; if (RE2::Consume(&consumable, *float_pattern)) { current_ptr_ = consumable.begin(); tensorflow::strings::safe_strtod(string(token_start_, current_ptr_).c_str(), @@ -274,30 +256,6 @@ TokKind HloLexer::LexNumberOrPattern() { return TokKind::kDecimal; } - static LazyRE2 dim_labels_pattern = { - R"([0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,})"}; - static LazyRE2 dxd_pattern = {R"([0-9]+(x[0-9]+)+)"}; - static LazyRE2 pad_pattern = { - R"([0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)*)"}; - - if (RE2::Consume(&consumable, *dim_labels_pattern)) { - current_ptr_ = consumable.begin(); - str_val_.assign(token_start_, current_ptr_); - return TokKind::kDimLabels; - } - - if (RE2::Consume(&consumable, *dxd_pattern)) { - current_ptr_ = consumable.begin(); - str_val_.assign(token_start_, current_ptr_); - return TokKind::kDxD; - } - - if (RE2::Consume(&consumable, *pad_pattern)) { - current_ptr_ = consumable.begin(); - str_val_.assign(token_start_, current_ptr_); - return TokKind::kPad; - } - static LazyRE2 int_pattern = {R"([-]?\d+)"}; if (RE2::Consume(&consumable, *int_pattern)) { current_ptr_ = consumable.begin(); @@ -392,12 +350,6 @@ string TokKindToString(TokKind kind) { return "kName"; case TokKind::kAttributeName: return "kAttributeName"; - case TokKind::kDimLabels: - return "kDimLabels"; - case TokKind::kDxD: - return "kDxD"; - case TokKind::kPad: - return "kPad"; case TokKind::kShape: return "kShape"; case TokKind::kOpcode: diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h index 79c4f271a1..3b9efcb92d 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h +++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h @@ -37,15 +37,11 @@ class HloLexer { } TokKind Lex() { return current_kind_ = LexToken(); } - TokKind GetKind() const { return current_kind_; } string GetStrVal() const { switch (GetKind()) { case TokKind::kName: case TokKind::kAttributeName: - case TokKind::kDimLabels: - case TokKind::kDxD: - case TokKind::kPad: return str_val_; default: LOG(FATAL) << "This token does not have string value"; @@ -96,7 +92,7 @@ class HloLexer { TokKind LexPercent(); TokKind LexShape(); TokKind LexConstant(); - TokKind LexNumberOrPattern(); + TokKind LexDigitOrNegative(); TokKind LexComment(); const tensorflow::StringPiece buf_; diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index fed0492a54..6c2e37e3b5 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -28,9 +28,6 @@ namespace tools { namespace { using tensorflow::StringPiece; -using tensorflow::gtl::optional; -using tensorflow::str_util::Split; -using tensorflow::str_util::SplitAndParseAsInts; using tensorflow::strings::Printf; using tensorflow::strings::StrAppend; using tensorflow::strings::StrCat; @@ -60,6 +57,7 @@ class HloParser { bool ParseInstructionList(HloComputation::Builder* builder, string* root_name); bool ParseInstruction(HloComputation::Builder* builder, string* root_name); + bool ParseSharding(HloInstruction* instruction); bool ParseControlPredecessors(HloInstruction* instruction); bool ParseLiteral(std::unique_ptr* literal, const Shape& shape); bool ParseTupleLiteral(std::unique_ptr* literal, const Shape& shape); @@ -80,73 +78,10 @@ class HloParser { bool ParseOperands(std::vector* operands, const int expected_size); - // Describes the start, limit, and stride on every dimension of the operand - // being sliced. - struct SliceRanges { - std::vector starts; - std::vector limits; - std::vector strides; - }; - - // Types of attributes. - enum class AttrTy { - kInt64, - kFloat, - kBracedInt64List, - kHloComputation, - kWindow, - kConvolutionDimensionNumbers, - kSharding, - kInstructionList, - kSliceRanges, - kPaddingConfig, - }; - - struct AttrConfig { - bool required; // whether it's required or optional - AttrTy attr_type; // what type it is - void* result; // where to store the parsed result. - }; - - // Parses attributes given names and configs of the attributes. Each parsed - // result is passed back through the result pointer in corresponding - // AttrConfig. Note that the result pointer must point to a optional typed - // variable which outlives this function. Returns false on error. You should - // not use the any of the results if this function failed. - // - // Example usage: - // - // std::unordered_map attrs; - // optional foo; - // attrs["foo"] = {/*required=*/false, AttrTy::kInt64, &foo}; - // optional bar; - // attrs["bar"] = {/*required=*/true, AttrTy::kWindow, &bar}; - // if (!ParseAttribute(attrs)) { - // return false; // Do not use 'foo' 'bar' if failed. - // } - // // Do something with 'bar'. - // if (foo) { // If attr foo is seen, do something with 'foo'. } - // - bool ParseAttributes(const std::unordered_map& attrs); - - // Parses a name and finds the corresponding hlo computation. - bool ParseComputationName(HloComputation** value); - // Parses a list of names and finds the corresponding hlo instructions. - bool ParseInstructionNames(std::vector* instructions); - bool ParseWindow(Window* window); - bool ParseConvolutionDimensionNumbers(ConvolutionDimensionNumbers* dnums); - bool ParsePaddingConfig(PaddingConfig* padding); - bool ParseSharding(OpSharding* sharding); - bool ParseSingleSharding(OpSharding* sharding, bool lbrace_pre_lexed); - - // Parses a sub-attribute of the window attribute, e.g.,size=1x2x3. - bool ParseDxD(const string& name, std::vector* result); - // Parses window's pad sub-attriute, e.g., pad=0_0x3x3. - bool ParseWindowPad(std::vector>* pad); - - bool ParseSliceRanges(SliceRanges* result); - bool ParseInt64List(const TokKind start, const TokKind end, - const TokKind delim, std::vector* result); + template + bool ParseExtraAttribute(T* value, const string& expected_attribute); + template + bool ParseAttributeValue(T* value); bool ParseParamList(); bool ParseName(string* result); @@ -279,7 +214,7 @@ bool HloParser::ParseInstructionList(HloComputation::Builder* builder, "expects '}' at the end of instruction list."); } -// instruction ::= ('ROOT')? name '=' shape opcode operands (attribute)* +// instruction ::= ('ROOT')? name '=' shape opcode operands (extra_attribute)* bool HloParser::ParseInstruction(HloComputation::Builder* builder, string* root_name) { string name; @@ -295,15 +230,6 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, if (is_root) { *root_name = name; } - - // Add optional attributes. - std::unordered_map attrs; - optional sharding; - attrs["sharding"] = {/*required=*/false, AttrTy::kSharding, &sharding}; - optional> predecessors; - attrs["control-predecessors"] = {/*required=*/false, AttrTy::kInstructionList, - &predecessors}; - HloInstruction* instruction; switch (opcode) { case HloOpcode::kParameter: { @@ -311,8 +237,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, if (!ParseToken(TokKind::kLparen, "expects '(' before parameter number") || !ParseInt64(¶meter_number) || - !ParseToken(TokKind::kRparen, "expects ')' after parameter number") || - !ParseAttributes(attrs)) { + !ParseToken(TokKind::kRparen, "expects ')' after parameter number")) { return false; } instruction = builder->AddInstruction( @@ -324,8 +249,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, if (!ParseToken(TokKind::kLparen, "expects '(' before constant literal") || !ParseLiteral(&literal, shape) || - !ParseToken(TokKind::kRparen, "expects ')' after constant literal") || - !ParseAttributes(attrs)) { + !ParseToken(TokKind::kRparen, "expects ')' after constant literal")) { return false; } instruction = builder->AddInstruction( @@ -351,8 +275,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, case HloOpcode::kSin: case HloOpcode::kSort: case HloOpcode::kTanh: { - if (!ParseOperands(&operands, /*expected_size=*/1) || - !ParseAttributes(attrs)) { + if (!ParseOperands(&operands, /*expected_size=*/1)) { return false; } instruction = builder->AddInstruction( @@ -382,8 +305,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, case HloOpcode::kShiftLeft: case HloOpcode::kShiftRightArithmetic: case HloOpcode::kShiftRightLogical: { - if (!ParseOperands(&operands, /*expected_size=*/2) || - !ParseAttributes(attrs)) { + if (!ParseOperands(&operands, /*expected_size=*/2)) { return false; } instruction = builder->AddInstruction(HloInstruction::CreateBinary( @@ -393,8 +315,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, // Ternary ops. case HloOpcode::kClamp: case HloOpcode::kSelect: { - if (!ParseOperands(&operands, /*expected_size=*/3) || - !ParseAttributes(attrs)) { + if (!ParseOperands(&operands, /*expected_size=*/3)) { return false; } instruction = builder->AddInstruction(HloInstruction::CreateTernary( @@ -403,8 +324,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, } // Other supported ops. case HloOpcode::kConvert: { - if (!ParseOperands(&operands, /*expected_size=*/1) || - !ParseAttributes(attrs)) { + if (!ParseOperands(&operands, /*expected_size=*/1)) { return false; } instruction = builder->AddInstruction( @@ -412,8 +332,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kCrossReplicaSum: { - if (!ParseOperands(&operands, /*expected_size=*/1) || - !ParseAttributes(attrs)) { + if (!ParseOperands(&operands, /*expected_size=*/1)) { return false; } instruction = builder->AddInstruction( @@ -421,8 +340,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kReshape: { - if (!ParseOperands(&operands, /*expected_size=*/1) || - !ParseAttributes(attrs)) { + if (!ParseOperands(&operands, /*expected_size=*/1)) { return false; } instruction = builder->AddInstruction( @@ -430,7 +348,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kTuple: { - if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { + if (!ParseOperands(&operands)) { return false; } instruction = @@ -438,376 +356,126 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kWhile: { - optional condition; - optional body; - attrs["condition"] = {/*required=*/true, AttrTy::kHloComputation, - &condition}; - attrs["body"] = {/*required=*/true, AttrTy::kHloComputation, &body}; + HloComputation* condition; + HloComputation* body; if (!ParseOperands(&operands, /*expected_size=*/1) || - !ParseAttributes(attrs)) { + !ParseExtraAttribute(&condition, + /*expected_attribute=*/"condition") || + !ParseExtraAttribute(&body, /*expected_attribute=*/"body")) { return false; } instruction = builder->AddInstruction(HloInstruction::CreateWhile( - shape, *condition, *body, /*init=*/operands[0])); + shape, condition, body, /*init=*/operands[0])); break; } case HloOpcode::kRecv: { - optional channel_id; - attrs["channel_id"] = {/*required=*/true, AttrTy::kInt64, &channel_id}; + int64 channel_id; if (!ParseOperands(&operands, /*expected_size=*/0) || - !ParseAttributes(attrs)) { + !ParseExtraAttribute(&channel_id, + /*expected_attribute=*/"channel_id")) { return false; } instruction = builder->AddInstruction( - HloInstruction::CreateRecv(shape.tuple_shapes(0), *channel_id)); - break; - } - case HloOpcode::kRecvDone: { - optional channel_id; - attrs["channel_id"] = {/*required=*/true, AttrTy::kInt64, &channel_id}; - if (!ParseOperands(&operands, /*expected_size=*/1) || - !ParseAttributes(attrs)) { - return false; - } - if (channel_id != operands[0]->channel_id()) { - return false; - } - instruction = - builder->AddInstruction(HloInstruction::CreateRecvDone(operands[0])); + HloInstruction::CreateRecv(shape, channel_id)); break; } case HloOpcode::kSend: { - optional channel_id; - attrs["channel_id"] = {/*required=*/true, AttrTy::kInt64, &channel_id}; + int64 channel_id; if (!ParseOperands(&operands, /*expected_size=*/1) || - !ParseAttributes(attrs)) { + !ParseExtraAttribute(&channel_id, + /*expected_attribute=*/"channel_id")) { return false; } instruction = builder->AddInstruction( - HloInstruction::CreateSend(operands[0], *channel_id)); - break; - } - case HloOpcode::kSendDone: { - optional channel_id; - attrs["channel_id"] = {/*required=*/true, AttrTy::kInt64, &channel_id}; - if (!ParseOperands(&operands, /*expected_size=*/1) || - !ParseAttributes(attrs)) { - return false; - } - if (channel_id != operands[0]->channel_id()) { - return false; - } - instruction = - builder->AddInstruction(HloInstruction::CreateSendDone(operands[0])); + HloInstruction::CreateSend(operands[0], channel_id)); break; } case HloOpcode::kGetTupleElement: { - optional index; - attrs["index"] = {/*required=*/true, AttrTy::kInt64, &index}; + int64 index; if (!ParseOperands(&operands, /*expected_size=*/1) || - !ParseAttributes(attrs)) { + !ParseExtraAttribute(&index, /*expected_attribute=*/"index")) { return false; } instruction = builder->AddInstruction( - HloInstruction::CreateGetTupleElement(shape, operands[0], *index)); + HloInstruction::CreateGetTupleElement(shape, operands[0], index)); break; } case HloOpcode::kCall: { - optional to_apply; - attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation, - &to_apply}; - if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { - return false; - } - instruction = builder->AddInstruction( - HloInstruction::CreateCall(shape, operands, *to_apply)); - break; - } - case HloOpcode::kReduceWindow: { - optional reduce_computation; - optional window; - attrs["window"] = {/*required=*/true, AttrTy::kWindow, &window}; - attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation, - &reduce_computation}; - if (!ParseOperands(&operands, /*expected_size=*/2) || - !ParseAttributes(attrs)) { - return false; - } - instruction = builder->AddInstruction(HloInstruction::CreateReduceWindow( - shape, /*operand=*/operands[0], /*init_value=*/operands[1], *window, - *reduce_computation)); - break; - } - case HloOpcode::kConvolution: { - optional window; - optional dnums; - attrs["window"] = {/*required=*/true, AttrTy::kWindow, &window}; - attrs["dim_labels"] = {/*required=*/true, - AttrTy::kConvolutionDimensionNumbers, &dnums}; - if (!ParseOperands(&operands, /*expected_size=*/2) || - !ParseAttributes(attrs)) { - return false; - } - instruction = builder->AddInstruction(HloInstruction::CreateConvolve( - shape, /*lhs=*/operands[0], /*rhs=*/operands[1], *window, *dnums)); - break; - } - case HloOpcode::kBroadcast: { - optional> broadcast_dimensions; - attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, - &broadcast_dimensions}; - if (!ParseOperands(&operands, /*expected_size=*/1) || - !ParseAttributes(attrs)) { - return false; - } - instruction = builder->AddInstruction(HloInstruction::CreateBroadcast( - shape, operands[0], *broadcast_dimensions)); - break; - } - case HloOpcode::kConcatenate: { - optional> dimensions; - attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, - &dimensions}; - if (!ParseOperands(&operands) || !ParseAttributes(attrs) || - dimensions->size() != 1) { - return false; - } - instruction = builder->AddInstruction(HloInstruction::CreateConcatenate( - shape, operands, dimensions->at(0))); - break; - } - case HloOpcode::kMap: { - optional to_apply; - attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation, - &to_apply}; - if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { - return false; - } - instruction = builder->AddInstruction( - HloInstruction::CreateMap(shape, operands, *to_apply)); - break; - } - case HloOpcode::kReduce: { - optional reduce_computation; - attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation, - &reduce_computation}; - optional> dimensions_to_reduce; - attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, - &dimensions_to_reduce}; - if (!ParseOperands(&operands, /*expected_size=*/2) || - !ParseAttributes(attrs)) { - return false; - } - instruction = builder->AddInstruction(HloInstruction::CreateReduce( - shape, /*operand=*/operands[0], /*init_value=*/operands[1], - *dimensions_to_reduce, *reduce_computation)); - break; - } - case HloOpcode::kReverse: { - optional> dimensions; - attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, - &dimensions}; - if (!ParseOperands(&operands, /*expected_size=*/1) || - !ParseAttributes(attrs)) { - return false; - } - instruction = builder->AddInstruction( - HloInstruction::CreateReverse(shape, operands[0], *dimensions)); - break; - } - case HloOpcode::kSelectAndScatter: { - optional select; - attrs["select"] = {/*required=*/true, AttrTy::kHloComputation, &select}; - optional scatter; - attrs["scatter"] = {/*required=*/true, AttrTy::kHloComputation, &scatter}; - optional window; - attrs["window"] = {/*required=*/true, AttrTy::kWindow, &window}; - if (!ParseOperands(&operands, /*expected_size=*/3) || - !ParseAttributes(attrs)) { - return false; - } - instruction = - builder->AddInstruction(HloInstruction::CreateSelectAndScatter( - shape, /*operand=*/operands[0], *select, *window, - /*source=*/operands[1], /*init_value=*/operands[2], *scatter)); - break; - } - case HloOpcode::kSlice: { - optional slice_ranges; - attrs["slice"] = {/*required=*/true, AttrTy::kSliceRanges, &slice_ranges}; - if (!ParseOperands(&operands, /*expected_size=*/1) || - !ParseAttributes(attrs)) { - return false; - } - instruction = builder->AddInstruction(HloInstruction::CreateSlice( - shape, operands[0], slice_ranges->starts, slice_ranges->limits, - slice_ranges->strides)); - break; - } - case HloOpcode::kDynamicSlice: { - optional> dynamic_slice_sizes; - attrs["dynamic_slice_sizes"] = { - /*required=*/true, AttrTy::kBracedInt64List, &dynamic_slice_sizes}; - if (!ParseOperands(&operands, /*expected_size=*/2) || - !ParseAttributes(attrs)) { - return false; - } - instruction = builder->AddInstruction(HloInstruction::CreateDynamicSlice( - shape, /*operand=*/operands[0], /*start_indices=*/operands[1], - *dynamic_slice_sizes)); - break; - } - case HloOpcode::kDynamicUpdateSlice: { - if (!ParseOperands(&operands, /*expected_size=*/3) || - !ParseAttributes(attrs)) { - return false; - } - instruction = - builder->AddInstruction(HloInstruction::CreateDynamicUpdateSlice( - shape, /*operand=*/operands[0], /*update=*/operands[1], - /*start_indices=*/operands[2])); - break; - } - case HloOpcode::kTranspose: { - optional> dimensions; - attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, - &dimensions}; - if (!ParseOperands(&operands, /*expected_size=*/1) || - !ParseAttributes(attrs)) { + HloComputation* to_apply; + if (!ParseOperands(&operands) || + !ParseExtraAttribute(&to_apply, + /*expected_attribute=*/"to_apply")) { return false; } instruction = builder->AddInstruction( - HloInstruction::CreateTranspose(shape, operands[0], *dimensions)); - break; - } - case HloOpcode::kBatchNormTraining: { - optional epsilon; - attrs["epsilon"] = {/*required=*/true, AttrTy::kFloat, &epsilon}; - optional feature_index; - attrs["feature_index"] = {/*required=*/true, AttrTy::kInt64, - &feature_index}; - if (!ParseOperands(&operands, /*expected_size=*/3) || - !ParseAttributes(attrs)) { - return false; - } - instruction = - builder->AddInstruction(HloInstruction::CreateBatchNormTraining( - shape, /*operand=*/operands[0], /*scale=*/operands[1], - /*offset=*/operands[2], *epsilon, *feature_index)); - break; - } - case HloOpcode::kBatchNormInference: { - optional epsilon; - attrs["epsilon"] = {/*required=*/true, AttrTy::kFloat, &epsilon}; - optional feature_index; - attrs["feature_index"] = {/*required=*/true, AttrTy::kInt64, - &feature_index}; - if (!ParseOperands(&operands, /*expected_size=*/5) || - !ParseAttributes(attrs)) { - return false; - } - instruction = - builder->AddInstruction(HloInstruction::CreateBatchNormInference( - shape, /*operand=*/operands[0], /*scale=*/operands[1], - /*offset=*/operands[2], /*mean=*/operands[3], - /*variance=*/operands[4], *epsilon, *feature_index)); - break; - } - case HloOpcode::kBatchNormGrad: { - optional epsilon; - attrs["epsilon"] = {/*required=*/true, AttrTy::kFloat, &epsilon}; - optional feature_index; - attrs["feature_index"] = {/*required=*/true, AttrTy::kInt64, - &feature_index}; - if (!ParseOperands(&operands, /*expected_size=*/5) || - !ParseAttributes(attrs)) { - return false; - } - instruction = builder->AddInstruction(HloInstruction::CreateBatchNormGrad( - shape, /*operand=*/operands[0], /*scale=*/operands[1], - /*mean=*/operands[2], /*variance=*/operands[3], - /*grad_output=*/operands[4], *epsilon, *feature_index)); - break; - } - case HloOpcode::kPad: { - optional padding; - attrs["padding"] = {/*required=*/true, AttrTy::kPaddingConfig, &padding}; - if (!ParseOperands(&operands, /*expected_size=*/2) || - !ParseAttributes(attrs)) { - return false; - } - instruction = builder->AddInstruction(HloInstruction::CreatePad( - shape, operands[0], /*padding_value=*/operands[1], *padding)); + HloInstruction::CreateCall(shape, operands, to_apply)); break; } + case HloOpcode::kBroadcast: case HloOpcode::kCustomCall: + case HloOpcode::kConcatenate: case HloOpcode::kReducePrecision: + case HloOpcode::kConvolution: + case HloOpcode::kMap: + case HloOpcode::kPad: + case HloOpcode::kReduce: + case HloOpcode::kReduceWindow: + case HloOpcode::kSelectAndScatter: + case HloOpcode::kReverse: case HloOpcode::kRng: + case HloOpcode::kSlice: + case HloOpcode::kDynamicSlice: + case HloOpcode::kDynamicUpdateSlice: + case HloOpcode::kTranspose: case HloOpcode::kFusion: + case HloOpcode::kBatchNormTraining: + case HloOpcode::kBatchNormInference: case HloOpcode::kInfeed: case HloOpcode::kOutfeed: + case HloOpcode::kBatchNormGrad: case HloOpcode::kTrace: return TokenError(StrCat("parsing not yet implemented for op: ", HloOpcodeString(opcode))); } - // Add common attrs (sharding, control predecessors) to the instruction, if - // they were seen. - if (sharding) { - instruction->set_sharding( - HloSharding::FromProto(sharding.value()).ValueOrDie()); - } - if (predecessors) { - for (auto* pre : *predecessors) { - Status status = pre->AddControlDependencyTo(instruction); - if (!status.ok()) { - return TokenError(StrCat("error adding control dependency for: ", name, - " status: ", status.ToString())); - } + bool has_sharding = false; + bool has_control = false; + while (EatIfPresent(TokKind::kComma)) { + string attribute_name; + if (!ParseAttributeName(&attribute_name)) { + return TokenError("expects ', sharding=' or ', control-predecessors='"); } - } - return AddInstruction(name, instruction); -} - -// ::= '{' (single_sharding | tuple_sharding) '}' -// -// tuple_sharding ::= single_sharding* (',' single_sharding)* -bool HloParser::ParseSharding(OpSharding* sharding) { - // A single sharding starts with '{' and is not followed by '{'. - // A tuple sharding starts with '{' and is followed by '{', or is '{''}' for - // an empty tuple. - if (!ParseToken(TokKind::kLbrace, - "expected '{' to start sharding attribute")) { - return false; - } - if (lexer_.GetKind() != TokKind::kLbrace && - lexer_.GetKind() != TokKind::kRbrace) { - return ParseSingleSharding(sharding, /*lbrace_pre_lexed=*/true); - } - - // Tuple sharding. - // Allow empty tuple shardings. - if (lexer_.GetKind() != TokKind::kRbrace) { - do { - if (!ParseSingleSharding(sharding->add_tuple_shardings(), - /*lbrace_pre_lexed=*/false)) { + if (attribute_name == "sharding") { + // Parse "sharding=". + if (has_sharding) { + return TokenError("expects at most 1 'sharding='"); + } + has_sharding = true; + if (!ParseSharding(instruction)) { return false; } - } while (EatIfPresent(TokKind::kComma)); + } else if (attribute_name == "control-predecessors") { + // Parse "control-predecessors" + if (has_control) { + return TokenError("expects at most 1 'control-predecessors='"); + } + has_control = true; + if (!ParseControlPredecessors(instruction)) { + return false; + } + } else { + return TokenError(StrCat("unexpected attribute: ", attribute_name)); + } } - sharding->set_type(OpSharding::Type::OpSharding_Type_TUPLE); - return ParseToken(TokKind::kRbrace, "expected '}' to end sharding attribute"); + return AddInstruction(name, instruction); } -// ::= '{' 'replicated'? 'maximal'? ('device=' int)? shape? -// ('devices=' ('[' dims ']')* device_list)? '}' -// dims ::= int_list device_list ::= int_list -bool HloParser::ParseSingleSharding(OpSharding* sharding, - bool lbrace_pre_lexed) { - if (!lbrace_pre_lexed && - !ParseToken(TokKind::kLbrace, +// ::= '{' 'replicated'? 'maximal'? ('device=' int)? shape? ('devices=' ('[' +// dims ']')* device_list)? '}' dims ::= int_list device_list ::= int_list +bool HloParser::ParseSharding(HloInstruction* instruction) { + if (!ParseToken(TokKind::kLbrace, "expected '{' to start sharding attribute")) { return false; } @@ -877,6 +545,7 @@ bool HloParser::ParseSingleSharding(OpSharding* sharding, } } + OpSharding sharding; if (replicated) { if (!devices.empty()) { return TokenError( @@ -886,7 +555,7 @@ bool HloParser::ParseSingleSharding(OpSharding* sharding, return TokenError( "replicated shardings should not have any tile shape set"); } - sharding->set_type(OpSharding::Type::OpSharding_Type_REPLICATED); + sharding.set_type(OpSharding::Type::OpSharding_Type_REPLICATED); } else if (maximal) { if (devices.size() != 1) { return TokenError( @@ -895,8 +564,8 @@ bool HloParser::ParseSingleSharding(OpSharding* sharding, if (!ShapeUtil::Equal(tile_shape, Shape())) { return TokenError("maximal shardings should not have any tile shape set"); } - sharding->set_type(OpSharding::Type::OpSharding_Type_MAXIMAL); - sharding->add_tile_assignment_devices(devices[0]); + sharding.set_type(OpSharding::Type::OpSharding_Type_MAXIMAL); + sharding.add_tile_assignment_devices(devices[0]); } else { if (devices.size() <= 1) { return TokenError( @@ -910,43 +579,47 @@ bool HloParser::ParseSingleSharding(OpSharding* sharding, "non-maximal shardings must have a tile assignment list including " "dimensions"); } - sharding->set_type(OpSharding::Type::OpSharding_Type_OTHER); - *sharding->mutable_tile_shape() = tile_shape; + sharding.set_type(OpSharding::Type::OpSharding_Type_OTHER); + *sharding.mutable_tile_shape() = tile_shape; for (int64 dim : tile_assignment_dimensions) { - sharding->add_tile_assignment_dimensions(dim); + sharding.add_tile_assignment_dimensions(dim); } for (int64 device : devices) { - sharding->add_tile_assignment_devices(device); + sharding.add_tile_assignment_devices(device); } } + instruction->set_sharding(HloSharding::FromProto(sharding).ValueOrDie()); lexer_.Lex(); return true; } // '{' name+ '}' -bool HloParser::ParseInstructionNames( - std::vector* instructions) { +bool HloParser::ParseControlPredecessors(HloInstruction* instruction) { if (!ParseToken(TokKind::kLbrace, - "expects '{' at the beginning of instruction name list")) { + "expects '{' at the beginning of control predecessors")) { return false; } do { string name; if (!ParseName(&name)) { - return TokenError("expects a instruction name"); + return TokenError("expects a control predecessor"); } - HloInstruction* instr = + HloInstruction* pre = tensorflow::gtl::FindPtrOrNull(instruction_pool_, name); - if (!instr) { + if (!pre) { return TokenError( - Printf("instruction '%s' is not defined", name.c_str())); + StrCat("control predecessor ", name, " is not defined: ")); + } + Status status = pre->AddControlDependencyTo(instruction); + if (!status.ok()) { + return TokenError(StrCat("error adding control dependency for: ", name, + " status: ", status.ToString())); } - instructions->push_back(instr); } while (EatIfPresent(TokKind::kComma)); return ParseToken(TokKind::kRbrace, - "expects '}' at the end of control instructions"); + "expects '}' at the end of control predecessors"); } bool HloParser::SetValueInLiteral(int64 value, int64 linear_index, @@ -1284,134 +957,28 @@ bool HloParser::ParseOperands(std::vector* operands, return true; } -bool HloParser::ParseAttributes( - const std::unordered_map& attrs) { - std::unordered_set seen_attrs; - while (EatIfPresent(TokKind::kComma)) { - string name; - if (!ParseAttributeName(&name)) { - return TokenError("error parsing attributes"); - } - VLOG(1) << "Parsing attribute " << name; - if (!seen_attrs.insert(name).second) { - return TokenError(Printf("attribute %s already exists", name.c_str())); - } - auto attr_it = attrs.find(name); - if (attr_it == attrs.end()) { - return TokenError(Printf("unexpected attribute %s", name.c_str())); - } - AttrTy attr_type = attr_it->second.attr_type; - void* attr_out_ptr = attr_it->second.result; - bool success = [&] { - switch (attr_type) { - case AttrTy::kInt64: { - int64 result; - if (!ParseInt64(&result)) { - return false; - } - static_cast*>(attr_out_ptr)->emplace(result); - return true; - } - case AttrTy::kFloat: { - double result; - if (!ParseDouble(&result)) { - return false; - } - if (result > std::numeric_limits::max() || - result < std::numeric_limits::lowest()) { - return TokenError("value out of range for float"); - } - static_cast*>(attr_out_ptr) - ->emplace(static_cast(result)); - return true; - } - case AttrTy::kHloComputation: { - HloComputation* result; - if (!ParseComputationName(&result)) { - return false; - } - static_cast*>(attr_out_ptr) - ->emplace(result); - return true; - } - case AttrTy::kWindow: { - Window result; - if (!ParseWindow(&result)) { - return false; - } - static_cast*>(attr_out_ptr)->emplace(result); - return true; - } - case AttrTy::kConvolutionDimensionNumbers: { - ConvolutionDimensionNumbers result; - if (!ParseConvolutionDimensionNumbers(&result)) { - return false; - } - static_cast*>(attr_out_ptr) - ->emplace(result); - return true; - } - case AttrTy::kSharding: { - OpSharding sharding; - if (!ParseSharding(&sharding)) { - return false; - } - static_cast*>(attr_out_ptr)->emplace(sharding); - return true; - } - case AttrTy::kInstructionList: { - std::vector result; - if (!ParseInstructionNames(&result)) { - return false; - } - static_cast>*>(attr_out_ptr) - ->emplace(result); - return true; - } - case AttrTy::kBracedInt64List: { - std::vector result; - if (!ParseInt64List(TokKind::kLbrace, TokKind::kRbrace, - TokKind::kComma, &result)) { - return false; - } - static_cast>*>(attr_out_ptr) - ->emplace(result); - return true; - } - case AttrTy::kSliceRanges: { - SliceRanges result; - if (!ParseSliceRanges(&result)) { - return false; - } - static_cast*>(attr_out_ptr)->emplace(result); - return true; - } - case AttrTy::kPaddingConfig: { - PaddingConfig result; - if (!ParsePaddingConfig(&result)) { - return false; - } - static_cast*>(attr_out_ptr)->emplace(result); - return true; - } - } - }(); - if (!success) { - return TokenError(Printf("error parsing attribute %s", name.c_str())); - } +// extra_attribute ::= ',' attribute_name value +template +bool HloParser::ParseExtraAttribute(T* value, + const string& expected_attribute) { + if (!ParseToken(TokKind::kComma, + "expects ',' in front of an extra attribute")) { + return false; } - // Check that all required attrs were seen. - for (const auto& attr_it : attrs) { - if (attr_it.second.required && - seen_attrs.find(attr_it.first) == seen_attrs.end()) { - return TokenError(Printf("attribute %s is expected but not seen", - attr_it.first.c_str())); - } + string attribute_name; + if (!ParseAttributeName(&attribute_name) && + attribute_name != expected_attribute) { + return TokenError(StrCat("expects attribute name: ", expected_attribute)); + } + if (!ParseAttributeValue(value)) { + return TokenError( + StrCat("expects value for attribute: ", expected_attribute)); } return true; } -bool HloParser::ParseComputationName(HloComputation** value) { +template <> +bool HloParser::ParseAttributeValue(HloComputation** value) { string name; if (!ParseName(&name)) { return TokenError("expects computation name"); @@ -1423,269 +990,9 @@ bool HloParser::ParseComputationName(HloComputation** value) { return true; } -// ::= '{' size stride? pad? lhs_dilate? rhs_dilate? '}' -// The subattributes can appear in any order. 'size=' is required, others are -// optional. -bool HloParser::ParseWindow(Window* window) { - if (!ParseToken(TokKind::kLbrace, "expected '{' to start window attribute")) { - return false; - } - - std::vector size; - std::vector stride; - std::vector> pad; - std::vector lhs_dilate; - std::vector rhs_dilate; - while (lexer_.GetKind() != TokKind::kRbrace) { - string field_name; - if (!ParseAttributeName(&field_name)) { - return TokenError("expects sub-attributes in window"); - } - bool ok = [&] { - if (field_name == "size") { - return ParseDxD("size", &size); - } - if (field_name == "stride") { - return ParseDxD("stride", &stride); - } - if (field_name == "lhs_dilate") { - return ParseDxD("lhs_dilate", &lhs_dilate); - } - if (field_name == "rhs_dilate") { - return ParseDxD("rls_dilate", &rhs_dilate); - } - if (field_name == "pad") { - return ParseWindowPad(&pad); - } - return TokenError(StrCat("unexpected attribute name: ", field_name)); - }(); - if (!ok) { - return false; - } - } - - if (size.empty()) { - return TokenError( - "sub-attribute 'size=' is required in the window attribute"); - } - if (!stride.empty() && stride.size() != size.size()) { - return TokenError("expects 'stride=' has the same size as 'size='"); - } - if (!lhs_dilate.empty() && lhs_dilate.size() != size.size()) { - return TokenError("expects 'lhs_dilate=' has the same size as 'size='"); - } - if (!rhs_dilate.empty() && rhs_dilate.size() != size.size()) { - return TokenError("expects 'rhs_dilate=' has the same size as 'size='"); - } - if (!pad.empty() && pad.size() != size.size()) { - return TokenError("expects 'pad=' has the same size as 'size='"); - } - - for (int i = 0; i < size.size(); i++) { - window->add_dimensions()->set_size(size[i]); - if (!pad.empty()) { - window->mutable_dimensions(i)->set_padding_low(pad[i][0]); - window->mutable_dimensions(i)->set_padding_high(pad[i][1]); - } - // If some field is not present, it has the default value. - window->mutable_dimensions(i)->set_stride(stride.empty() ? 1 : stride[i]); - window->mutable_dimensions(i)->set_base_dilation( - lhs_dilate.empty() ? 1 : lhs_dilate[i]); - window->mutable_dimensions(i)->set_window_dilation( - rhs_dilate.empty() ? 1 : rhs_dilate[i]); - } - return ParseToken(TokKind::kRbrace, "expected '}' to end window attribute"); -} - -// This is the inverse of HloInstruction::ConvolutionDimensionNumbersToString. -// The string looks like "dim_labels=0bf_0io->0bf". -bool HloParser::ParseConvolutionDimensionNumbers( - ConvolutionDimensionNumbers* dnums) { - if (lexer_.GetKind() != TokKind::kDimLabels) { - return TokenError("expects dim labels pattern, e.g., 'bf0_0io->0bf'"); - } - string str = lexer_.GetStrVal(); - - // The str is expected to have 3 items, lhs, rhs, out, and it must looks like - // lhs_rhs->out, that is, the first separator is "_" and the second is "->". - // So we replace the "->" with "_" and then split on "_". - str = tensorflow::str_util::StringReplace(str, /*oldsub=*/"->", - /*newsub=*/"_", - /*replace_all=*/false); - std::vector lhs_rhs_out = Split(str, "_"); - if (lhs_rhs_out.size() != 3) { - LOG(FATAL) << "expects 3 items: lhs, rhs, and output dims, but sees " - << str; - } - - const int64 rank = lhs_rhs_out[0].length(); - if (rank != lhs_rhs_out[1].length() || rank != lhs_rhs_out[2].length()) { - return TokenError( - "convolution lhs, rhs, and output must have the same rank"); - } - if (rank < 3) { - return TokenError("convolution rank must >=3"); - } - - auto is_unique = [](string str) -> bool { - std::sort(str.begin(), str.end()); - return std::unique(str.begin(), str.end()) == str.end(); - }; - - // lhs - { - const string& lhs = lhs_rhs_out[0]; - if (!is_unique(lhs)) { - return TokenError( - StrCat("expects unique lhs dimension numbers, but sees ", lhs)); - } - for (int i = 0; i < rank - 2; i++) { - dnums->add_spatial_dimensions(-1); - } - for (int i = 0; i < rank; i++) { - char c = lhs[i]; - if (c == 'b') { - dnums->set_input_batch_dimension(i); - } else if (c == 'f') { - dnums->set_input_feature_dimension(i); - } else if (c < '0' + rank && c >= '0') { - dnums->set_spatial_dimensions(c - '0', i); - } else { - return TokenError( - Printf("expects [0-%lldbf] in lhs dimension numbers", rank - 1)); - } - } - } - // rhs - { - const string& rhs = lhs_rhs_out[1]; - if (!is_unique(rhs)) { - return TokenError( - StrCat("expects unique rhs dimension numbers, but sees ", rhs)); - } - for (int i = 0; i < rank - 2; i++) { - dnums->add_kernel_spatial_dimensions(-1); - } - for (int i = 0; i < rank; i++) { - char c = rhs[i]; - if (c == 'i') { - dnums->set_kernel_input_feature_dimension(i); - } else if (c == 'o') { - dnums->set_kernel_output_feature_dimension(i); - } else if (c < '0' + rank && c >= '0') { - dnums->set_kernel_spatial_dimensions(c - '0', i); - } else { - return TokenError( - Printf("expects [0-%lldio] in rhs dimension numbers", rank - 1)); - } - } - } - // output - { - const string& out = lhs_rhs_out[2]; - if (!is_unique(out)) { - return TokenError( - StrCat("expects unique output dimension numbers, but sees ", out)); - } - for (int i = 0; i < rank; i++) { - char c = out[i]; - if (c == 'b') { - dnums->set_output_batch_dimension(i); - } else if (c == 'f') { - dnums->set_output_feature_dimension(i); - } else if (c < '0' + rank && c >= '0') { - if (dnums->spatial_dimensions(c - '0') != i) { - return TokenError( - "output spatial dimensions should be the same as input spatial " - "dimensions"); - } - } else { - return TokenError( - Printf("expects [0-%lldbf] in output dimension numbers", rank - 1)); - } - } - } - - lexer_.Lex(); - return true; -} - -// ::= '{' ranges '}' -// ::= /*empty*/ -// ::= range (',' range)* -// range ::= '[' start ':' limit (':' stride)? ']' -// -// The slice ranges are printed as: -// -// {[dim0_start:dim0_limit:dim0stride], [dim1_start:dim1_limit], ...} -// -// This function extracts the starts, limits, and strides as 3 vectors to the -// result. If stride is not present, stride is 1. For example, if the slice -// ranges is printed as: -// -// {[2:3:4], [5:6:7], [8:9]} -// -// The the parsed result will be: -// -// {/*starts=*/{2, 5, 8}, /*limits=*/{3, 6, 9}, /*strides=*/{4, 7, 1}} -// -bool HloParser::ParseSliceRanges(SliceRanges* result) { - if (!ParseToken(TokKind::kLbrace, "expects '{' to start ranges")) { - return false; - } - std::vector> ranges; - if (lexer_.GetKind() == TokKind::kRbrace) { - // empty - return ParseToken(TokKind::kRbrace, "expects '}' to end ranges"); - } - do { - ranges.emplace_back(); - if (!ParseInt64List(TokKind::kLsquare, TokKind::kRsquare, TokKind::kColon, - &ranges.back())) { - return false; - } - } while (EatIfPresent(TokKind::kComma)); - - for (const auto& range : ranges) { - if (range.size() != 2 && range.size() != 3) { - return TokenError(Printf( - "expects [start:limit:step] or [start:limit], but sees %ld elements.", - range.size())); - } - } - - for (const auto& range : ranges) { - result->starts.push_back(range[0]); - result->limits.push_back(range[1]); - result->strides.push_back(range.size() == 3 ? range[2] : 1); - } - return ParseToken(TokKind::kRbrace, "expects '}' to end ranges"); -} - -// int64list ::= start int64_elements end -// int64_elements -// ::= /*empty*/ -// ::= int64_val (delim int64_val)* -bool HloParser::ParseInt64List(const TokKind start, const TokKind end, - const TokKind delim, - std::vector* result) { - if (!ParseToken(start, StrCat("expects an int64 list starting with ", - TokKindToString(start)))) { - return false; - } - if (lexer_.GetKind() == end) { - // empty - } else { - do { - int64 i; - if (!ParseInt64(&i)) { - return false; - } - result->push_back(i); - } while (EatIfPresent(delim)); - } - return ParseToken( - end, StrCat("expects an int64 list to end with ", TokKindToString(end))); +template <> +bool HloParser::ParseAttributeValue(int64* value) { + return ParseInt64(value); } // param_list ::= '(' param_list1 ')' @@ -1763,82 +1070,6 @@ bool HloParser::ParseAttributeName(string* result) { return true; } -bool HloParser::ParseDxD(const string& name, std::vector* result) { - if (!result->empty()) { - return TokenError( - Printf("sub-attribute '%s=' already exists", name.c_str())); - } - // 1D - if (lexer_.GetKind() == TokKind::kInt) { - int64 number; - if (!ParseInt64(&number)) { - return TokenError(Printf("expects sub-attribute '%s=i'", name.c_str())); - } - result->push_back(number); - return true; - } - // 2D or higher. - if (lexer_.GetKind() == TokKind::kDxD) { - string str = lexer_.GetStrVal(); - if (!SplitAndParseAsInts(str, 'x', result)) { - return TokenError( - Printf("expects sub-attribute '%s=ixj...'", name.c_str())); - } - lexer_.Lex(); - return true; - } - return TokenError("expects token type kInt or kDxD"); -} - -bool HloParser::ParseWindowPad(std::vector>* pad) { - if (!pad->empty()) { - return TokenError("sub-attribute 'pad=' already exists"); - } - if (lexer_.GetKind() != TokKind::kPad) { - return TokenError("expects window pad pattern, e.g., '0_0x3_3'"); - } - string str = lexer_.GetStrVal(); - std::vector padding_str = Split(str, 'x'); - for (int i = 0; i < padding_str.size(); i++) { - std::vector low_high; - if (!SplitAndParseAsInts(padding_str[i], '_', &low_high) || - low_high.size() != 2) { - return TokenError( - "expects padding_low and padding_high separated by '_'"); - } - pad->push_back(low_high); - } - lexer_.Lex(); - return true; -} - -// This is the inverse xla::ToString(PaddingConfig). The padding config string -// looks like "0_0_0x3_3_1". The string is first separated by 'x', each -// substring represents one PaddingConfigDimension. The substring is 3 (or 2) -// numbers joined by '_'. -bool HloParser::ParsePaddingConfig(PaddingConfig* padding) { - if (lexer_.GetKind() != TokKind::kPad) { - return TokenError("expects padding config, e.g., '0_0_0x3_3_1'"); - } - string str = lexer_.GetStrVal(); - std::vector padding_str = Split(str, 'x'); - for (const auto& padding_dim_str : padding_str) { - std::vector padding_dim; - if (!SplitAndParseAsInts(padding_dim_str, '_', &padding_dim) || - (padding_dim.size() != 2 && padding_dim.size() != 3)) { - return TokenError( - "expects padding config pattern like 'low_high_interior' or " - "'low_high'"); - } - auto* dim = padding->add_dimensions(); - dim->set_edge_padding_low(padding_dim[0]); - dim->set_edge_padding_high(padding_dim[1]); - dim->set_interior_padding(padding_dim.size() == 3 ? padding_dim[2] : 0); - } - lexer_.Lex(); - return true; -} - bool HloParser::ParseOpcode(HloOpcode* result) { VLOG(1) << "ParseOpcode"; if (lexer_.GetKind() != TokKind::kOpcode) { diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index d19c6e1877..359256f064 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -25,7 +25,6 @@ namespace tools { namespace { using tensorflow::StringPiece; -using tensorflow::strings::StrCat; struct TestData { string test_name; @@ -36,10 +35,6 @@ string TestDataToString(const ::testing::TestParamInfo& data) { return data.param.test_name; } -// For each string below, we check that: -// - we parse it to an HloModule successfully, and -// - the stringification of the resulting HloModule is equal to our original -// string. std::vector CreateTestCases() { // clang-format off return std::vector({ @@ -48,11 +43,10 @@ std::vector CreateTestCases() { "AxpyParam", R"(HloModule axpy_module: -ENTRY %axpy.v5 (alpha: f32[], x: f32[2,4], y: f32[2,4]) -> f32[2,4] { - %alpha = f32[] parameter(0) - %broadcast = f32[2,4]{1,0} broadcast(f32[] %alpha), dimensions={} +ENTRY %axpy.v5 (alpha: f32[2,4], x: f32[2,4], y: f32[2,4]) -> f32[2,4] { + %alpha = f32[2,4]{1,0} parameter(0) %x = f32[2,4]{1,0} parameter(1) - %multiply = f32[2,4]{1,0} multiply(f32[2,4]{1,0} %broadcast, f32[2,4]{1,0} %x) + %multiply = f32[2,4]{1,0} multiply(f32[2,4]{1,0} %alpha, f32[2,4]{1,0} %x) %y = f32[2,4]{1,0} parameter(2) ROOT %add = f32[2,4]{1,0} add(f32[2,4]{1,0} %multiply, f32[2,4]{1,0} %y) } @@ -157,7 +151,7 @@ ENTRY %SelectR1F32WithCmpR1F32sFromParamsSmall.v4 (v1: f32[4], v2: f32[4]) -> f3 %v1 = f32[4]{0} parameter(0), sharding={maximal device=1} %v2 = f32[4]{0} parameter(1), sharding={maximal device=1} %greater-than = pred[4]{0} greater-than(f32[4]{0} %v1, f32[4]{0} %v2), sharding={replicated} - ROOT %select = f32[4]{0} select(pred[4]{0} %greater-than, f32[4]{0} %v1, f32[4]{0} %v2), sharding={} + ROOT %select = f32[4]{0} select(pred[4]{0} %greater-than, f32[4]{0} %v1, f32[4]{0} %v2) } )" @@ -185,19 +179,6 @@ ENTRY %TupleCreate.v4 (v1: f32[], v2: f32[3], v3: f32[2,3]) -> (f32[], f32[3], f ROOT %tuple = (f32[], f32[3]{0}, f32[2,3]{1,0}) tuple(f32[] %v1, f32[3]{0} %v2, f32[2,3]{1,0} %v3) } -)" -}, -{ -"ShardedTupleCreate", -R"(HloModule ShardedTupleCreate_module: - -ENTRY %ShardedTupleCreate.v4 (v1: f32[], v2: f32[3], v3: f32[2,3]) -> (f32[], f32[3], f32[2,3]) { - %v1 = f32[] parameter(0) - %v2 = f32[3]{0} parameter(1) - %v3 = f32[2,3]{1,0} parameter(2) - ROOT %tuple = (f32[], f32[3]{0}, f32[2,3]{1,0}) tuple(f32[] %v1, f32[3]{0} %v2, f32[2,3]{1,0} %v3), sharding={{replicated}, {maximal device=0}, {replicated}} -} - )" }, // int32 result = 0; @@ -231,11 +212,9 @@ ENTRY %WhileWithScalarS32Result.v2 () -> s32[] { R"(HloModule TwoSendRecvBothWayRecvFist_module: ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { - %recv = (f32[], u32[]) recv(), channel_id=15, sharding={maximal device=1} - ROOT %recv-done = f32[] recv-done((f32[], u32[]) %recv), channel_id=15, sharding={maximal device=1} - %constant = f32[] constant(2.1), sharding={maximal device=0} - %send = (f32[], u32[]) send(f32[] %constant), channel_id=16, sharding={maximal device=0}, control-predecessors={%recv} - %send-done = () send-done((f32[], u32[]) %send), channel_id=16, sharding={maximal device=0} + %recv = f32[] recv(), channel_id=15, sharding={maximal device=1} + ROOT %constant = f32[] constant(2.1), sharding={maximal device=0} + %send = () send(f32[] %constant), channel_id=16, sharding={maximal device=0}, control-predecessors={%recv} } )" @@ -268,277 +247,6 @@ ENTRY %CallR0F32IdentityScalar.v2 () -> f32[] { ROOT %call = f32[] call(f32[] %constant), to_apply=%Identity.v1 } -)" -}, -// reduce window -{ -"ReduceWindow", -R"(HloModule R4UnitWindow_module: - -%add_F32.v3 (lhs: f32[], rhs: f32[]) -> f32[] { - %lhs = f32[] parameter(0) - %rhs = f32[] parameter(1) - ROOT %add = f32[] add(f32[] %lhs, f32[] %rhs) -} - -ENTRY %R4UnitWindow.v3 (operand: f32[13,12,8,15]) -> f32[13,3,8,15] { - %operand = f32[13,12,8,15]{0,3,2,1} parameter(0) - %constant = f32[] constant(0) - ROOT %reduce-window = f32[13,3,8,15]{0,3,2,1} reduce-window(f32[13,12,8,15]{0,3,2,1} %operand, f32[] %constant), window={size=1x1x7x1 stride=1x4x1x1 pad=0_0x0_0x3_3x0_0}, to_apply=%add_F32.v3 -} - -)" -}, -// convolution -{ -"Convolution", -R"(HloModule Convolve1D1Window_0_module: - -ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2,1] { - %input = f32[1,2,1]{2,1,0} parameter(0) - %copy = f32[1,2,1]{2,0,1} copy(f32[1,2,1]{2,1,0} %input) - %filter = f32[1,1,1]{2,1,0} parameter(1) - ROOT %convolution = f32[1,2,1]{2,0,1} convolution(f32[1,2,1]{2,0,1} %copy, f32[1,1,1]{2,1,0} %filter), window={size=1}, dim_labels=b0f_0io->b0f -} - -)" -}, -// reverse(constant) -{ -"Reverse4D", -R"(HloModule Reverse4DFloatArrayOnDim01_module: - -ENTRY %Reverse4DFloatArrayOnDim01.v2 () -> f32[4,3,2,1] { - %constant = f32[4,3,2,1]{0,1,2,3} constant(f32[4,3,2,1] { { /*i0=0*/ { /*i1=0*/ {1}, {2} }, { /*i1=1*/ {3}, {4} }, { /*i1=2*/ {5}, {6} } }, { /*i0=1*/ { /*i1=0*/ {7}, {8} }, { /*i1=1*/ {9}, {10} }, { /*i1=2*/ {11}, {12} } }, { /*i0=2*/ { /*i1=0*/ {13}, {14} }, { /*i1=1*/ {15}, {16} }, { /*i1=2*/ {17}, {18} } }, { /*i0=3*/ { /*i1=0*/ {19}, {20} }, { /*i1=1*/ {21}, {22} }, { /*i1=2*/ {23}, {24} } } }) - ROOT %reverse = f32[4,3,2,1]{0,1,2,3} reverse(f32[4,3,2,1]{0,1,2,3} %constant), dimensions={0,1} -} - -)" -}, -// concat -{ -"Concat", -R"(HloModule Concat2x3With2x5_module: - -ENTRY %Concat2x3With2x5.v3 () -> f32[2,8] { - %constant = f32[2,3]{1,0} constant(f32[2,3] { { 0, 1, 2 }, { 1000, 1001, 1002 } }) - %constant.1 = f32[2,5]{1,0} constant(f32[2,5] { { 64, 65, 66, 67, 68 }, { 1064, 1065, 1066, 1067, 1068 } }) - ROOT %concatenate = f32[2,8]{1,0} concatenate(f32[2,3]{1,0} %constant, f32[2,5]{1,0} %constant.1), dimensions={1} -} - -)" -}, -// map -{ -"Map", -R"(HloModule MapBinaryAdder_module: - -%add_F32.v3 (lhs: f32[], rhs: f32[]) -> f32[] { - %lhs = f32[] parameter(0) - %rhs = f32[] parameter(1) - ROOT %add = f32[] add(f32[] %lhs, f32[] %rhs) -} - -ENTRY %MapBinaryAdder.v3 (param0: f32[4], param1: f32[4]) -> f32[4] { - %param0 = f32[4]{0} parameter(0) - %param1 = f32[4]{0} parameter(1) - ROOT %map = f32[4]{0} map(f32[4]{0} %param0, f32[4]{0} %param1), to_apply=%add_F32.v3 -} - -)" -}, -// reduce -{ -"Reduce", -R"(HloModule ReduceR3ToR2_module: - -%add_F32.v3 (lhs: f32[], rhs: f32[]) -> f32[] { - %lhs = f32[] parameter(0) - %rhs = f32[] parameter(1) - ROOT %add = f32[] add(f32[] %lhs, f32[] %rhs) -} - -ENTRY %ReduceR3ToR2.v3 (input: f32[8,16,256]) -> f32[8,16] { - %input = f32[8,16,256]{2,1,0} parameter(0) - %constant = f32[] constant(0) - ROOT %reduce = f32[8,16]{1,0} reduce(f32[8,16,256]{2,1,0} %input, f32[] %constant), dimensions={2}, to_apply=%add_F32.v3 -} - -)" -}, -// select and scatter -{ -"SelectAndScatter", -R"(HloModule R4F32OverlapSmall_module: - -%ge_F32.v3 (lhs: f32[], rhs: f32[]) -> pred[] { - %lhs = f32[] parameter(0) - %rhs = f32[] parameter(1) - ROOT %greater-than-or-equal-to = pred[] greater-than-or-equal-to(f32[] %lhs, f32[] %rhs) -} - -%add_F32.v3 (lhs.1: f32[], rhs.1: f32[]) -> f32[] { - %lhs.1 = f32[] parameter(0) - %rhs.1 = f32[] parameter(1) - ROOT %add = f32[] add(f32[] %lhs.1, f32[] %rhs.1) -} - -ENTRY %R4F32OverlapSmall.v4 () -> f32[4,5,1,1] { - %constant = f32[4,5,1,1]{3,2,1,0} constant(f32[4,5,1,1] { { /*i0=0*/ { /*i1=0*/ {7} }, { /*i1=1*/ {2} }, { /*i1=2*/ {5} }, { /*i1=3*/ {3} }, { /*i1=4*/ {8} } }, { /*i0=1*/ { /*i1=0*/ {3} }, { /*i1=1*/ {8} }, { /*i1=2*/ {9} }, { /*i1=3*/ {3} }, { /*i1=4*/ {4} } }, { /*i0=2*/ { /*i1=0*/ {1} }, { /*i1=1*/ {5} }, { /*i1=2*/ {7} }, { /*i1=3*/ {5} }, { /*i1=4*/ {6} } }, { /*i0=3*/ { /*i1=0*/ {0} }, { /*i1=1*/ {6} }, { /*i1=2*/ {2} }, { /*i1=3*/ {10} }, { /*i1=4*/ {2} } } }) - %constant.1 = f32[2,2,1,1]{3,2,1,0} constant(f32[2,2,1,1] { { /*i0=0*/ { /*i1=0*/ {2} }, { /*i1=1*/ {6} } }, { /*i0=1*/ { /*i1=0*/ {3} }, { /*i1=1*/ {1} } } }) - %constant.2 = f32[] constant(0) - ROOT %select-and-scatter = f32[4,5,1,1]{3,2,1,0} select-and-scatter(f32[4,5,1,1]{3,2,1,0} %constant, f32[2,2,1,1]{3,2,1,0} %constant.1, f32[] %constant.2), window={size=2x3x1x1 stride=2x2x1x1}, select=%ge_F32.v3, scatter=%add_F32.v3 -} - -)" -}, -// slice -{ -"Slice", -R"(HloModule slice_module: - -ENTRY %slice.v2 (p0: f32[3,3,4,4]) -> f32[3,3,2,4] { - %p0 = f32[3,3,4,4]{3,2,1,0} parameter(0) - ROOT %slice = f32[3,3,2,4]{3,2,1,0} slice(f32[3,3,4,4]{3,2,1,0} %p0), slice={[0:3:1], [0:3:1], [0:4:2], [0:4:1]} -} - -)" -}, -// slice, no stride -{ -"SliceNoStride", -R"(HloModule Slice3x3x3_To_1x3x3_F32_module: - -ENTRY %Slice3x3x3_To_1x3x3_F32.v2 () -> f32[1,3,3] { - %constant = f32[3,3,3]{2,1,0} constant(f32[3,3,3] { { { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 } }, { { 9, 10, 11 }, { 12, 13, 14 }, { 15, 16, 17 } }, { { 18, 19, 20 }, { 21, 22, 23 }, { 24, 25, 26 } } }) - ROOT %slice = f32[1,3,3]{2,1,0} slice(f32[3,3,3]{2,1,0} %constant), slice={[0:1], [0:3], [0:3]} -} - -)" -}, -// slice R0 -{ -"SliceR0", -R"(HloModule SliceR0_module: - -ENTRY %SliceR0.v2 () -> s32[] { - %constant = s32[] constant(1) - ROOT %slice = s32[] slice(s32[] %constant), slice={} -} - -)" -}, -// transpose -{ -"Transpose", -R"(HloModule Transpose_module: - -ENTRY %Transpose.v2 () -> s32[1,2,3] { - %constant = s32[1,2,3]{2,1,0} constant(s32[1,2,3] { { { 1, 2, 3 }, { 4, 5, 6 } } }) - ROOT %transpose = s32[1,2,3]{2,1,0} transpose(s32[1,2,3]{2,1,0} %constant), dimensions={0,1,2} -} - -)" -}, -// Dynamic slice -{ -"DynamicSlice", -R"(HloModule DynamicSlice_module: - -ENTRY %DynamicSlice.v5 (original_parameter: s32[2,2,258], start_index: s32[1]) -> s32[2,2,258] { - %original_parameter = s32[2,2,258]{2,1,0} parameter(0) - %constant = s32[1]{0} constant({0}) - %start_index = s32[1]{0} parameter(1) - %concatenate = s32[3]{0} concatenate(s32[1]{0} %constant, s32[1]{0} %constant, s32[1]{0} %start_index), dimensions={0} - ROOT %dynamic-slice = s32[2,2,258]{2,1,0} dynamic-slice(s32[2,2,258]{2,1,0} %original_parameter, s32[3]{0} %concatenate), dynamic_slice_sizes={2,2,258} -} - -)" -}, -// Dynamic update slice -{ -"DynamicUpdateSlice", -R"(HloModule DynamicUpdateSlice_module: - -ENTRY %DynamicUpdateSlice.v4 (input: s32[1,1,25,1], update: s32[1,1,2,1], start_indices: s32[4]) -> s32[1,1,25,1] { - %input = s32[1,1,25,1]{3,2,1,0} parameter(0) - %update = s32[1,1,2,1]{3,2,1,0} parameter(1) - %start_indices = s32[4]{0} parameter(2) - ROOT %dynamic-update-slice = s32[1,1,25,1]{3,2,1,0} dynamic-update-slice(s32[1,1,25,1]{3,2,1,0} %input, s32[1,1,2,1]{3,2,1,0} %update, s32[4]{0} %start_indices) -} - -)" -}, -// batch norm training -{ -"BatchNormTraining", -R"(HloModule BasicTraining_module: - -ENTRY %BasicTraining.v4 () -> (f32[2,2,1,2], f32[2], f32[2]) { - %constant = f32[2,2,1,2]{3,2,1,0} constant(f32[2,2,1,2] { { /*i0=0*/ { /*i1=0*/ {1, 2} }, { /*i1=1*/ {3, 4} } }, { /*i0=1*/ { /*i1=0*/ {5, 6} }, { /*i1=1*/ {7, 8} } } }) - %constant.1 = f32[2]{0} constant({2, 3}) - %constant.2 = f32[2]{0} constant({1, 2}) - ROOT %batch-norm-training = (f32[2,2,1,2]{3,2,1,0}, f32[2]{0}, f32[2]{0}) batch-norm-training(f32[2,2,1,2]{3,2,1,0} %constant, f32[2]{0} %constant.1, f32[2]{0} %constant.2), epsilon=0.001, feature_index=3 -} - -)" -}, -// batch norm inference -{ -"BatchNormInference", -R"(HloModule BatchNormInference_module: - -ENTRY %BatchNormInference.v6 (input: f32[2,2,2,2], offset: f32[2], scale: f32[2], mean: f32[2], variance: f32[2]) -> f32[2,2,2,2] { - %input = f32[2,2,2,2]{3,2,1,0} parameter(0) - %offset = f32[2]{0} parameter(1) - %scale = f32[2]{0} parameter(2) - %mean = f32[2]{0} parameter(3) - %variance = f32[2]{0} parameter(4) - ROOT %batch-norm-inference = f32[2,2,2,2]{3,2,1,0} batch-norm-inference(f32[2,2,2,2]{3,2,1,0} %input, f32[2]{0} %offset, f32[2]{0} %scale, f32[2]{0} %mean, f32[2]{0} %variance), epsilon=0.001, feature_index=0 -} - -)" -}, -// batch norm grad -{ -"BatchNormGrad", -R"(HloModule BatchNormGrad_module: - -ENTRY %BatchNormGrad.v4 (input: f32[2,2,2,2], scale: f32[2], mean: f32[2], variance: f32[2], grad_output: f32[2,2,2,2]) -> (f32[2,2,2,2], f32[2], f32[2]) { - %input = f32[2,2,2,2]{3,2,1,0} parameter(0) - %scale = f32[2]{0} parameter(1) - %mean = f32[2]{0} parameter(2) - %variance = f32[2]{0} parameter(3) - %grad_output = f32[2,2,2,2]{3,2,1,0} parameter(4) - ROOT %batch-norm-grad = (f32[2,2,2,2]{3,2,1,0}, f32[2]{0}, f32[2]{0}) batch-norm-grad(f32[2,2,2,2]{3,2,1,0} %input, f32[2]{0} %scale, f32[2]{0} %mean, f32[2]{0} %variance, f32[2,2,2,2]{3,2,1,0} %grad_output), epsilon=0.001, feature_index=0 -} - -)" -}, -// pad -{ -"Pad", -R"(HloModule Pad1DS3Array_module: - -ENTRY %Pad1DS3Array.v3 () -> f32[8] { - %constant = f32[3]{0} constant({1, 2, 3}) - %constant.1 = f32[] constant(0.1) - ROOT %pad = f32[8]{0} pad(f32[3]{0} %constant, f32[] %constant.1), padding=3_1 -} - -)" -}, -// pad has interior -{ -"PadHasInterior", -R"(HloModule PadHasInterior_module: - -ENTRY %PadHasInterior.v3 (input: f32[1,25,7,7]) -> f32[1,25,17,11] { - %input = f32[1,25,7,7]{3,2,1,0} parameter(0) - %constant = f32[] constant(-5.123) - ROOT %pad = f32[1,25,17,11]{3,2,1,0} pad(f32[1,25,7,7]{3,2,1,0} %input, f32[] %constant), padding=0_0_0x0_0_0x2_2_1x2_2_0 -} - )" } }); @@ -553,10 +261,7 @@ class HloParserTest : public ::testing::Test, << "'" << s << "' does not contain '" << expected << "'"; } - // Expects "ToString(Parse(string)) == string", that is, parses the string, - // asserts that it succeeded, stringifies the parsed module, and checks that - // the it equals the original string. - void ExpectEqual() { + void ExpectSuccess() { const string& original = GetParam().module_string; auto result = Parse(original); TF_EXPECT_OK(result.status()); @@ -565,7 +270,7 @@ class HloParserTest : public ::testing::Test, } }; -TEST_P(HloParserTest, Run) { ExpectEqual(); } +TEST_P(HloParserTest, Run) { ExpectSuccess(); } INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, HloParserTest, ::testing::ValuesIn(CreateTestCases()), @@ -722,125 +427,6 @@ ENTRY %ConstantWithExp.v4 () -> f32[] { // printed as "300". } -TEST_F(HloParserTest, AttibutesAnyOrder) { - const string original = R"(HloModule any_order_module: - -ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2,1] { - %input = f32[1,2,1]{2,1,0} parameter(0) - %copy = f32[1,2,1]{2,0,1} copy(f32[1,2,1]{2,1,0} %input) - %filter = f32[1,1,1]{2,1,0} parameter(1) - ROOT %convolution = f32[1,2,1]{2,0,1} convolution(f32[1,2,1]{2,0,1} %copy, f32[1,1,1]{2,1,0} %filter), sharding={maximal device=1}, dim_labels=b0f_0io->b0f, window={pad=1_1 size=2} -} - -)"; - TF_EXPECT_OK(Parse(original).status()); -} - -TEST_F(HloParserTest, InvalidDimLabels) { - string prefix = R"(HloModule invalid_dim_labels_module: - -ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2,1] { - %input = f32[1,2,1]{2,1,0} parameter(0) - %copy = f32[1,2,1]{2,0,1} copy(f32[1,2,1]{2,1,0} %input) - %filter = f32[1,1,1]{2,1,0} parameter(1) - ROOT %convolution = f32[1,2,1]{2,0,1} convolution(f32[1,2,1]{2,0,1} %copy, f32[1,1,1]{2,1,0} %filter), window={size=1} )"; - string suffix = R"( -} - -)"; - - ExpectHasSubstr(Parse(StrCat(prefix, ",dim_labels=00_01_10", suffix)) - .status() - .error_message(), - "expects dim labels pattern"); - - ExpectHasSubstr(Parse(StrCat(prefix, ",dim_labels=010_1100->010", suffix)) - .status() - .error_message(), - "must have the same rank"); - - ExpectHasSubstr(Parse(StrCat(prefix, ",dim_labels=0bf_io0->b0f", suffix)) - .status() - .error_message(), - "output spatial dimensions should be the same as input " - "spatial dimensions"); -} - -TEST_F(HloParserTest, UnexpectedAttribute) { - const string original = R"(HloModule unexpected_attr_module: - -ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { - %recv = (f32[], u32[]) recv(), channel_id=15 - %recv-done = f32[] recv-done((f32[], u32[]) %recv), channel_id=15 - ROOT %constant = f32[] constant(2.1) - %send = (f32[], u32[]) send(f32[] %constant), channel_id=16, calls=%recv - %send-done = () send-done((f32[], u32[]) %send), channel_id=16 -} - -)"; - ExpectHasSubstr(Parse(original).status().error_message(), - "unexpected attribute calls"); -} - -TEST_F(HloParserTest, MissingAttribute) { - const string original = R"(HloModule missing_attr_module: - -ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { - %recv = (f32[], u32[]) recv(), channel_id=15 - %recv-done = f32[] recv-done((f32[], u32[]) %recv), channel_id=15 - ROOT %constant = f32[] constant(-2.1) - %send = (f32[], u32[]) send(f32[] %constant) - %send-done = () send-done((f32[], u32[]) %send), channel_id=16 -} - -)"; - ExpectHasSubstr(Parse(original).status().error_message(), - "attribute channel_id is expected but not seen"); -} - -TEST_F(HloParserTest, PredecessorUndefined) { - const string original = R"(HloModule pre_not_found_module: - -ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { - %recv = (f32[], u32[]) recv(), channel_id=15 - %recv-done = f32[] recv-done((f32[], u32[]) %recv), channel_id=15 - ROOT %constant = f32[] constant(2.1) - %send = (f32[], u32[]) send(f32[] %constant), channel_id=16, control-predecessors={%done} - %send-done = () send-done((f32[], u32[]) %send), channel_id=16 -} - -)"; - ExpectHasSubstr(Parse(original).status().error_message(), - "'done' is not defined"); -} - -TEST_F(HloParserTest, SliceAllowOmitStride1) { - const string original = R"(HloModule slice_module: - -ENTRY %slice.v2 (p0: f32[3,3,4,4]) -> f32[3,3,2,4] { - %p0 = f32[3,3,4,4]{3,2,1,0} parameter(0) - ROOT %slice = f32[3,3,2,4]{3,2,1,0} slice(f32[3,3,4,4]{3,2,1,0} %p0), slice={[0:3], [0:3], [0:4:2], [0:4]} -} - -)"; - TF_EXPECT_OK(Parse(original).status()); -} - -TEST_F(HloParserTest, PaddingConfigIsNotWindowPad) { - const string original = R"(HloModule window_pad_module: - -ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2,1] { - %input = f32[1,2,1]{2,1,0} parameter(0) - %copy = f32[1,2,1]{2,0,1} copy(f32[1,2,1]{2,1,0} %input) - %filter = f32[1,1,1]{2,1,0} parameter(1) - ROOT %convolution = f32[1,2,1]{2,0,1} convolution(f32[1,2,1]{2,0,1} %copy, f32[1,1,1]{2,1,0} %filter), dim_labels=b0f_0io->b0f, window={pad=1_1_0 size=1} -} - -)"; - ExpectHasSubstr(Parse(original).status().error_message(), - "expects padding_low and padding_high separated by '_'"); -} - } // namespace } // namespace tools } // namespace xla diff --git a/tensorflow/compiler/xla/tools/parser/hlo_token.h b/tensorflow/compiler/xla/tools/parser/hlo_token.h index 9afd2fac23..9c2069e756 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_token.h +++ b/tensorflow/compiler/xla/tools/parser/hlo_token.h @@ -57,9 +57,6 @@ enum class TokKind { // Typed tokens. kName, // %foo kAttributeName, // dimensions= - kDimLabels, // [0-9bf]+_[0-9io]+->[0-9bf]+ - kDxD, // [0-9]+(x[0-9]+)+ - kPad, // [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)* kShape, // f32[2,3]{1,0} kOpcode, // add kInt, // 42 diff --git a/tensorflow/compiler/xla/tools/replay_computation.cc b/tensorflow/compiler/xla/tools/replay_computation.cc index 503e7d456e..89b26b8916 100644 --- a/tensorflow/compiler/xla/tools/replay_computation.cc +++ b/tensorflow/compiler/xla/tools/replay_computation.cc @@ -45,7 +45,6 @@ limitations under the License. #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/statusor.h" -#include "tensorflow/compiler/xla/tests/test_utils.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/threadpool.h" diff --git a/tensorflow/compiler/xla/types.h b/tensorflow/compiler/xla/types.h index 9fa4297523..3b19ca321c 100644 --- a/tensorflow/compiler/xla/types.h +++ b/tensorflow/compiler/xla/types.h @@ -19,7 +19,6 @@ limitations under the License. #include #include "third_party/eigen3/Eigen/Core" -#include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/platform/types.h" #include @@ -33,8 +32,6 @@ using ::tensorflow::int16; using ::tensorflow::int32; using ::tensorflow::int64; -using ::tensorflow::bfloat16; - using ::tensorflow::uint8; using ::tensorflow::uint16; using ::tensorflow::uint32; diff --git a/tensorflow/compiler/xla/window_util.cc b/tensorflow/compiler/xla/window_util.cc index 6f7f1479b9..23161873a0 100644 --- a/tensorflow/compiler/xla/window_util.cc +++ b/tensorflow/compiler/xla/window_util.cc @@ -26,8 +26,8 @@ namespace xla { namespace window_util { /* static */ string ToString(const WindowDimension& dim) { - using tensorflow::strings::StrAppend; using tensorflow::strings::StrCat; + using tensorflow::strings::StrAppend; string str = StrCat("(size=", dim.size()); if (dim.stride() != 1) { StrAppend(&str, ",stride=", dim.stride()); @@ -49,22 +49,22 @@ namespace window_util { } string ToString(const Window& window) { - using tensorflow::strings::StrAppend; using tensorflow::strings::StrCat; + using tensorflow::strings::StrAppend; string str; - const auto add_field = - [&](const char* heading, - std::function format) { - StrAppend(&str, heading, "="); - const char* prefix = ""; - for (const auto& window_dimension : window.dimensions()) { - StrAppend(&str, prefix, format(window_dimension)); - prefix = "x"; - } - }; - - add_field("size", + const auto add_field = [&]( + const char* heading, + std::function format) { + StrAppend(&str, heading, "="); + const char* prefix = ""; + for (const auto& window_dimension : window.dimensions()) { + StrAppend(&str, prefix, format(window_dimension)); + prefix = "x"; + } + }; + + add_field("window", [](const WindowDimension& dim) { return StrCat(dim.size()); }); if (HasStride(window)) { add_field(" stride", diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index eac8f2ff07..06987e0044 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -46,12 +46,6 @@ enum PrimitiveType { // converted to f16 from f32 at arbirary points in the computation. F16 = 10; F32 = 11; - - // Truncated 16 bit floating-point format. This is similar to IEEE's 16 bit - // floating-point format, but uses 1 bit for the sign, 8 bits for the exponent - // and 7 bits for the mantissa. - BF16 = 16; - F64 = 12; // Complex values of fixed width. @@ -69,8 +63,6 @@ enum PrimitiveType { // An opaque type used for passing context specific data to a custom // operation. OPAQUE = 14; - - // Next = 17 } // Describes the value held inside padding elements. @@ -318,10 +310,7 @@ message LiteralProto { repeated double f64s = 9; repeated float c64s = 12; // Stored as interleaved real, imag floats. repeated LiteralProto tuple_literals = 10; - // The F16s and BF16s are encoded in little endian byte order - bytes f16s = 11; - bytes bf16s = 13; - // Next = 14 + bytes f16s = 11; // Note: the F16s are encoded in little endian byte order } message WindowDimension { @@ -836,10 +825,8 @@ message OpSharding { REPLICATED = 0; // This sharding is maximal - one device runs the entire operation. MAXIMAL = 1; - // This sharding is a tuple - only the tuple_shardings field is valid. - TUPLE = 2; - // None of the above; tile_shape and tile_assignment are both used. - OTHER = 3; + // Neither of the above; tile_shape and tile_assignment are both used. + OTHER = 2; } Type type = 1; // The shape of the sharded tile. @@ -851,13 +838,6 @@ message OpSharding { // Flattened list of device IDs. The order of flattening is the same as used // by IndexUtil::MultiToLinearIndex(tile_assignment_shape). repeated int64 tile_assignment_devices = 4; - // If type == TUPLE, the sub-shardings, one per leaf node in the tuple shape, - // in pre-order. The tuple shape could be nested; here we store just a - // flattened list of all leaves in the tuple shape. Note that the tuple shape - // is not stored here; shardings do not store the shapes to which they are - // applied, this is inferred from the instruction this sharding gets attached - // to. - repeated OpSharding tuple_shardings = 5; } message OpRequest { diff --git a/tensorflow/contrib/batching/BUILD b/tensorflow/contrib/batching/BUILD index 8b7df4a84c..a111cfecb3 100644 --- a/tensorflow/contrib/batching/BUILD +++ b/tensorflow/contrib/batching/BUILD @@ -82,6 +82,7 @@ cc_library( tf_cc_test( name = "adaptive_shared_batch_scheduler_test", srcs = ["adaptive_shared_batch_scheduler_test.cc"], + tags = ["manual"], # b/69013768 deps = [ ":adaptive_shared_batch_scheduler", "//tensorflow/contrib/batching/test_util:fake_clock_env", diff --git a/tensorflow/contrib/batching/kernels/batch_kernels.cc b/tensorflow/contrib/batching/kernels/batch_kernels.cc index 3b7c538fcc..6041d8c9b2 100644 --- a/tensorflow/contrib/batching/kernels/batch_kernels.cc +++ b/tensorflow/contrib/batching/kernels/batch_kernels.cc @@ -461,7 +461,7 @@ class BatchResource : public ResourceBase { return Status::OK(); } - // Looks up the batcher queue for 'queue_name'. If it did't previously exist, + // Looks up the batcher queue for 'queue_name'. If it didn't previously exist, // creates it. Status LookupOrCreateBatcherQueue(const string& queue_name, BatcherQueue** queue) { diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py index 8c6a614beb..2e94b7206d 100644 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py +++ b/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py @@ -759,7 +759,7 @@ class CsiszarVIMCOTest(test.TestCase): def _csiszar_vimco_helper_grad(self, logu, delta): """Finite difference approximation of `grad(csiszar_vimco_helper, logu)`.""" - # This code actually estimates the sum of the Jacobiab because thats what + # This code actually estimates the sum of the Jacobiab because that's what # TF's `gradients` does. np_log_avg_u1, np_log_sooavg_u1 = self._csiszar_vimco_helper( logu[..., None] + np.diag([delta]*len(logu))) diff --git a/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_buffer.h b/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_buffer.h index 70037d5bd8..5e316538ce 100644 --- a/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_buffer.h +++ b/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_buffer.h @@ -33,9 +33,9 @@ template ${_AT_DISTCOPY}/) - endif() + if (tensorflow_ENABLE_GPU AND _AT_GPUSOURCES) + # some ops call out to cuda directly; need to link libs for the cuda dlls + target_link_libraries(${_AT_TARGET} ${CUDA_LIBRARIES}) + endif() + if (_AT_DISTCOPY) + add_custom_command(TARGET ${_AT_TARGET} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy $ ${_AT_DISTCOPY}/) endif() if (_AT_DEPENDS) add_dependencies(${_AT_TARGET} ${_AT_DEPENDS}) @@ -180,9 +182,19 @@ function(AddUserOps) # make sure TF_COMPILE_LIBRARY is not defined for this target get_target_property(target_compile_flags ${_AT_TARGET} COMPILE_FLAGS) if(target_compile_flags STREQUAL "target_compile_flags-NOTFOUND") - set(target_compile_flags "/UTF_COMPILE_LIBRARY") + if (WIN32) + set(target_compile_flags "/UTF_COMPILE_LIBRARY") + else (WIN32) + # gcc uses UTF as default + set(target_compile_flags "-finput-charset=UTF-8") + endif (WIN32) else() - set(target_compile_flags "${target_compile_flags} /UTF_COMPILE_LIBRARY") + if (WIN32) + set(target_compile_flags "${target_compile_flags} /UTF_COMPILE_LIBRARY") + else (WIN32) + # gcc uses UTF as default + set(target_compile_flags "${target_compile_flags} -finput-charset=UTF-8") + endif (WIN32) endif() set_target_properties(${_AT_TARGET} PROPERTIES COMPILE_FLAGS ${target_compile_flags}) add_dependencies(tf_extension_ops ${_AT_TARGET}) diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake index c607546f4a..c3dc8531bb 100644 --- a/tensorflow/contrib/cmake/tf_core_framework.cmake +++ b/tensorflow/contrib/cmake/tf_core_framework.cmake @@ -301,8 +301,6 @@ file(GLOB_RECURSE tf_core_framework_srcs "${tensorflow_source_dir}/tensorflow/core/common_runtime/session.cc" "${tensorflow_source_dir}/tensorflow/core/common_runtime/session_factory.cc" "${tensorflow_source_dir}/tensorflow/core/common_runtime/session_options.cc" - "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/db/*.cc" - "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/db/*.h" "${tensorflow_source_dir}/public/*.h" ) @@ -316,7 +314,6 @@ file(GLOB_RECURSE tf_core_framework_exclude_srcs "${tensorflow_source_dir}/tensorflow/core/util/*test*.h" "${tensorflow_source_dir}/tensorflow/core/util/*test*.cc" "${tensorflow_source_dir}/tensorflow/core/util/*main.cc" - "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/db/*test*.cc" ) list(REMOVE_ITEM tf_core_framework_srcs ${tf_core_framework_exclude_srcs}) diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake index f978c8ccd5..a2ab4b9ae4 100644 --- a/tensorflow/contrib/cmake/tf_core_kernels.cmake +++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake @@ -70,6 +70,7 @@ if(tensorflow_BUILD_CONTRIB_KERNELS) "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/kernels/cudnn_rnn_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/ops/cudnn_rnn_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/data/kernels/prefetching_kernels.cc" + "${tensorflow_source_dir}/tensorflow/contrib/data/ops/dataset_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/data/ops/prefetching_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/factorization/kernels/clustering_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/factorization/kernels/masked_matmul_ops.cc" @@ -183,6 +184,7 @@ file(GLOB_RECURSE tf_core_gpu_kernels_srcs "${tensorflow_source_dir}/tensorflow/contrib/image/kernels/*.cu.cc" "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/*.cu.cc" "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/*.cu.cc" + "${tensorflow_source_dir}/tensorflow/contrib/resampler/kernels/*.cu.cc" ) if(WIN32 AND tensorflow_ENABLE_GPU) @@ -206,16 +208,16 @@ endif(WIN32 AND tensorflow_ENABLE_GPU) add_library(tf_core_kernels OBJECT ${tf_core_kernels_srcs}) add_dependencies(tf_core_kernels tf_core_cpu) -if(WIN32) +if (WIN32) target_compile_options(tf_core_kernels PRIVATE /MP) - if (tensorflow_ENABLE_GPU) - set_source_files_properties(${tf_core_gpu_kernels_srcs} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ) - set(tf_core_gpu_kernels_lib tf_core_gpu_kernels) - cuda_add_library(${tf_core_gpu_kernels_lib} ${tf_core_gpu_kernels_srcs}) - set_target_properties(${tf_core_gpu_kernels_lib} - PROPERTIES DEBUG_POSTFIX "" - COMPILE_FLAGS "${TF_REGULAR_CXX_FLAGS}" - ) - add_dependencies(${tf_core_gpu_kernels_lib} tf_core_cpu) - endif() +endif (WIN32) +if (tensorflow_ENABLE_GPU) + set_source_files_properties(${tf_core_gpu_kernels_srcs} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ) + set(tf_core_gpu_kernels_lib tf_core_gpu_kernels) + cuda_add_library(${tf_core_gpu_kernels_lib} ${tf_core_gpu_kernels_srcs}) + set_target_properties(${tf_core_gpu_kernels_lib} + PROPERTIES DEBUG_POSTFIX "" + COMPILE_FLAGS "${TF_REGULAR_CXX_FLAGS}" + ) + add_dependencies(${tf_core_gpu_kernels_lib} tf_core_cpu) endif() diff --git a/tensorflow/contrib/cmake/tf_core_ops.cmake b/tensorflow/contrib/cmake/tf_core_ops.cmake index 4a61ed7a35..03c168795c 100644 --- a/tensorflow/contrib/cmake/tf_core_ops.cmake +++ b/tensorflow/contrib/cmake/tf_core_ops.cmake @@ -81,6 +81,7 @@ GENERATE_CONTRIB_OP_LIBRARY(boosted_trees_prediction "${tensorflow_source_dir}/t GENERATE_CONTRIB_OP_LIBRARY(boosted_trees_quantiles "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(boosted_trees_stats_accumulator "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/ops/stats_accumulator_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(cudnn_rnn "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/ops/cudnn_rnn_ops.cc") +GENERATE_CONTRIB_OP_LIBRARY(data_dataset "${tensorflow_source_dir}/tensorflow/contrib/data/ops/dataset_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(data_prefetching "${tensorflow_source_dir}/tensorflow/contrib/data/ops/prefetching_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(factorization_clustering "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/clustering_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(factorization_factorization "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/factorization_ops.cc") diff --git a/tensorflow/contrib/cmake/tf_label_image_example.cmake b/tensorflow/contrib/cmake/tf_label_image_example.cmake index 0d3a4699eb..7f2f60b089 100644 --- a/tensorflow/contrib/cmake/tf_label_image_example.cmake +++ b/tensorflow/contrib/cmake/tf_label_image_example.cmake @@ -34,3 +34,8 @@ target_link_libraries(tf_label_image_example PUBLIC ${tf_core_gpu_kernels_lib} ${tensorflow_EXTERNAL_LIBRARIES} ) + +install(TARGETS tf_label_image_example + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) \ No newline at end of file diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 7636e9ba6e..43b98659e3 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -701,6 +701,9 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name) set(require_shape_fn 1) endif() + get_filename_component(GENERATE_PYTHON_OP_LIB_MKDIRPATH ${GENERATE_PYTHON_OP_LIB_DESTINATION} PATH) + file(MAKE_DIRECTORY ${GENERATE_PYTHON_OP_LIB_MKDIRPATH}) + # Create a C++ executable that links in the appropriate op # registrations and generates Python wrapper code based on the # registered ops. @@ -729,6 +732,7 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name) ${GENERATE_PYTHON_OP_LIB_DESTINATION} PARENT_SCOPE) endfunction() +GENERATE_PYTHON_OP_LIB("audio_ops") GENERATE_PYTHON_OP_LIB("array_ops") GENERATE_PYTHON_OP_LIB("bitwise_ops") GENERATE_PYTHON_OP_LIB("math_ops") @@ -776,6 +780,8 @@ GENERATE_PYTHON_OP_LIB("contrib_boosted_trees_stats_accumulator_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/boosted_trees/python/ops/gen_stats_accumulator_ops.py) GENERATE_PYTHON_OP_LIB("contrib_cudnn_rnn_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/cudnn_rnn/ops/gen_cudnn_rnn_ops.py) +GENERATE_PYTHON_OP_LIB("contrib_data_dataset_ops" + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/data/python/ops/gen_dataset_ops.py) GENERATE_PYTHON_OP_LIB("contrib_data_prefetching_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/data/python/ops/gen_prefetching_ops.py) GENERATE_PYTHON_OP_LIB("contrib_factorization_clustering_ops" @@ -973,7 +979,7 @@ add_library(pywrap_tensorflow_internal SHARED $ $<$:$> $ - $<$:$> + $<$:$<$:$>> $<$:$> ${pywrap_tensorflow_deffile} ) @@ -1049,25 +1055,23 @@ if(WIN32) DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/rnn/python/ops/) endif(WIN32) -if(WIN32) - # include contrib/seq2seq as .so - # - set(tf_beam_search_srcs - "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc" - "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.h" - "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/ops/beam_search_ops.cc" - ) +# include contrib/seq2seq as .so +# +set(tf_beam_search_srcs + "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc" + "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.h" + "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/ops/beam_search_ops.cc" +) - set(tf_beam_search_gpu_srcs - "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops_gpu.cu.cc" - ) +set(tf_beam_search_gpu_srcs + "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops_gpu.cu.cc" +) - AddUserOps(TARGET _beam_search_ops - SOURCES "${tf_beam_search_srcs}" - GPUSOURCES ${tf_beam_search_gpu_srcs} - DEPENDS pywrap_tensorflow_internal tf_python_ops - DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/seq2seq/python/ops/) -endif(WIN32) +AddUserOps(TARGET _beam_search_ops + SOURCES "${tf_beam_search_srcs}" + GPUSOURCES ${tf_beam_search_gpu_srcs} + DEPENDS pywrap_tensorflow_internal tf_python_ops + DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/seq2seq/python/ops/) ############################################################ # Build a PIP package containing the TensorFlow runtime. diff --git a/tensorflow/contrib/cmake/tf_shared_lib.cmake b/tensorflow/contrib/cmake/tf_shared_lib.cmake index 9bf45bab30..3e3fe0cdfa 100644 --- a/tensorflow/contrib/cmake/tf_shared_lib.cmake +++ b/tensorflow/contrib/cmake/tf_shared_lib.cmake @@ -73,7 +73,7 @@ add_library(tensorflow SHARED $ $<$:$> $ - $<$:$> + $<$:$<$:$>> $<$:$> ${tensorflow_deffile} ) @@ -94,3 +94,46 @@ endif() if(WIN32) add_dependencies(tensorflow tensorflow_static) endif(WIN32) + +install(TARGETS tensorflow + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) + +# install necessary headers +# tensorflow headers +install(DIRECTORY ${tensorflow_source_dir}/tensorflow/cc/ + DESTINATION include/tensorflow/cc + FILES_MATCHING PATTERN "*.h") +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tensorflow/cc/ + DESTINATION include/tensorflow/cc + FILES_MATCHING PATTERN "*.h") +install(DIRECTORY ${tensorflow_source_dir}/tensorflow/core/ + DESTINATION include/tensorflow/core + FILES_MATCHING PATTERN "*.h") +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tensorflow/core/ + DESTINATION include/tensorflow/core + FILES_MATCHING PATTERN "*.h") +install(DIRECTORY ${tensorflow_source_dir}/tensorflow/stream_executor/ + DESTINATION include/tensorflow/stream_executor + FILES_MATCHING PATTERN "*.h") +# google protobuf headers +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src/google/ + DESTINATION include/google + FILES_MATCHING PATTERN "*.h") +# nsync headers +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/nsync/ + DESTINATION include/external/nsync + FILES_MATCHING PATTERN "*.h") +# Eigen directory +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen/Eigen/ + DESTINATION include/Eigen) +# external directory +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/eigen_archive/ + DESTINATION include/external/eigen_archive) +# third_party eigen directory +install(DIRECTORY ${tensorflow_source_dir}/third_party/eigen3/ + DESTINATION include/third_party/eigen3) +# unsupported Eigen directory +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen/unsupported/Eigen/ + DESTINATION include/unsupported/Eigen) diff --git a/tensorflow/contrib/cmake/tf_stream_executor.cmake b/tensorflow/contrib/cmake/tf_stream_executor.cmake index 3d84f1ebb9..8d95f0d3e8 100644 --- a/tensorflow/contrib/cmake/tf_stream_executor.cmake +++ b/tensorflow/contrib/cmake/tf_stream_executor.cmake @@ -74,6 +74,9 @@ endif() #) #list(REMOVE_ITEM tf_stream_executor_srcs ${tf_stream_executor_test_srcs}) +if (NOT WIN32) + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lgomp") +endif (NOT WIN32) add_library(tf_stream_executor OBJECT ${tf_stream_executor_srcs}) add_dependencies(tf_stream_executor diff --git a/tensorflow/contrib/cmake/tf_tools.cmake b/tensorflow/contrib/cmake/tf_tools.cmake index 6ef9598963..cb58a2e7df 100644 --- a/tensorflow/contrib/cmake/tf_tools.cmake +++ b/tensorflow/contrib/cmake/tf_tools.cmake @@ -73,7 +73,7 @@ add_executable(${transform_graph} $ $ $ - $<$:$> + $<$:$<$:$>> $<$:$> ) @@ -95,7 +95,7 @@ add_executable(${summarize_graph} $ $ $ - $<$:$> + $<$:$<$:$>> $<$:$> ) @@ -117,7 +117,7 @@ add_executable(${compare_graphs} $ $ $ - $<$:$> + $<$:$<$:$>> $<$:$> ) @@ -138,7 +138,7 @@ add_executable(${benchmark_model} $ $ $ - $<$:$> + $<$:$<$:$>> $<$:$> ) @@ -147,3 +147,8 @@ target_link_libraries(${benchmark_model} PUBLIC ${tf_core_gpu_kernels_lib} ${tensorflow_EXTERNAL_LIBRARIES} ) + +install(TARGETS ${transform_graph} ${summarize_graph} ${compare_graphs} ${benchmark_model} + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) diff --git a/tensorflow/contrib/cmake/tf_tutorials.cmake b/tensorflow/contrib/cmake/tf_tutorials.cmake index 858e7dda92..e63fccc181 100644 --- a/tensorflow/contrib/cmake/tf_tutorials.cmake +++ b/tensorflow/contrib/cmake/tf_tutorials.cmake @@ -34,3 +34,8 @@ target_link_libraries(tf_tutorials_example_trainer PUBLIC ${tf_core_gpu_kernels_lib} ${tensorflow_EXTERNAL_LIBRARIES} ) + +install(TARGETS tf_tutorials_example_trainer + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) diff --git a/tensorflow/contrib/crf/python/ops/crf.py b/tensorflow/contrib/crf/python/ops/crf.py index 7166e38b28..c8adb0369b 100644 --- a/tensorflow/contrib/crf/python/ops/crf.py +++ b/tensorflow/contrib/crf/python/ops/crf.py @@ -360,8 +360,8 @@ class CrfDecodeForwardRnnCell(rnn_cell.RNNCell): scope: Unused variable scope of this cell. Returns: - backpointers: [batch_size, num_tags], containing backpointers. - new_state: [batch_size, num_tags], containing new score values. + backpointers: A [batch_size, num_tags] matrix of backpointers. + new_state: A [batch_size, num_tags] matrix of new score values. """ # For simplicity, in shape comments, denote: # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output). @@ -385,7 +385,7 @@ class CrfDecodeBackwardRnnCell(rnn_cell.RNNCell): """Initialize the CrfDecodeBackwardRnnCell. Args: - num_tags + num_tags: An integer. """ self._num_tags = num_tags @@ -401,8 +401,9 @@ class CrfDecodeBackwardRnnCell(rnn_cell.RNNCell): """Build the CrfDecodeBackwardRnnCell. Args: - inputs: [batch_size, num_tags], backpointer of next step (in time order). - state: [batch_size, 1], next position's tag index. + inputs: A [batch_size, num_tags] matrix of + backpointer of next step (in time order). + state: A [batch_size, 1] matrix of tag index of next step. scope: Unused variable scope of this cell. Returns: @@ -426,16 +427,16 @@ def crf_decode(potentials, transition_params, sequence_length): This is a function for tensor. Args: - potentials: A [batch_size, max_seq_len, num_tags] tensor, matrix of + potentials: A [batch_size, max_seq_len, num_tags] tensor of unary potentials. - transition_params: A [num_tags, num_tags] tensor, matrix of + transition_params: A [num_tags, num_tags] matrix of binary potentials. - sequence_length: A [batch_size] tensor, containing sequence lengths. + sequence_length: A [batch_size] vector of true sequence lengths. Returns: - decode_tags: A [batch_size, max_seq_len] tensor, with dtype tf.int32. + decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`. Contains the highest scoring tag indicies. - best_score: A [batch_size] tensor, containing the score of decode_tags. + best_score: A [batch_size] vector, containing the score of `decode_tags`. """ # For simplicity, in shape comments, denote: # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output). diff --git a/tensorflow/contrib/data/BUILD b/tensorflow/contrib/data/BUILD index eaede0e00e..7bcf5a5f4d 100644 --- a/tensorflow/contrib/data/BUILD +++ b/tensorflow/contrib/data/BUILD @@ -35,8 +35,19 @@ tf_custom_op_library( ], ) +# TODO(mrry): Move the kernels out of the core library into this library. +tf_custom_op_library( + name = "_dataset_ops.so", + srcs = [ + "ops/dataset_ops.cc", + ], +) + tf_gen_op_libs( - op_lib_names = ["prefetching_ops"], + op_lib_names = [ + "dataset_ops", + "prefetching_ops", + ], ) filegroup( diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index 6e43ae0e63..0c7e793689 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -23,7 +23,6 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@TextLineDataset @@batch_and_drop_remainder -@@padded_batch_and_drop_remainder @@dense_to_sparse_batch @@enumerate_dataset @@group_by_window @@ -42,11 +41,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -# pylint: disable=unused-import +# pylint: disable=unused-import from tensorflow.contrib.data.python.ops.batching import batch_and_drop_remainder from tensorflow.contrib.data.python.ops.batching import dense_to_sparse_batch -from tensorflow.contrib.data.python.ops.batching import padded_batch_and_drop_remainder from tensorflow.contrib.data.python.ops.batching import unbatch from tensorflow.contrib.data.python.ops.dataset_ops import Dataset from tensorflow.contrib.data.python.ops.dataset_ops import get_single_element diff --git a/tensorflow/contrib/data/ops/dataset_ops.cc b/tensorflow/contrib/data/ops/dataset_ops.cc new file mode 100644 index 0000000000..1574384cb2 --- /dev/null +++ b/tensorflow/contrib/data/ops/dataset_ops.cc @@ -0,0 +1,232 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_def_builder.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +// -------------------------------------------------------------------------- + +// The ops in this section can be composed to define an input +// pipeline. Each op produces a DT_VARIANT tensor that represents +// a DAG of "dataset" objects. An "dataset" object can be converted +// to a stateful "iterator" by passing the "dataset" to the +// "MakeIterator" op. +// +// TODO(b/65524810): DT_VARIANT tensors that represent "dataset" objects are +// not presently serializable. To avoid issues with constant folding, ensure +// that any "source dataset" ops (i.e. ops that output a dataset and do not +// take one as input) are marked "stateful". + +REGISTER_OP("IgnoreErrorsDataset") + .Input("input_dataset: variant") + .Output("handle: variant") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that contains the elements of `input_dataset` ignoring errors. +)doc"); + +REGISTER_OP("MapAndBatchDataset") + .Input("input_dataset: variant") + .Input("other_arguments: Targuments") + .Input("batch_size: int64") + .Input("num_parallel_batches: int64") + .Output("handle: variant") + .Attr("f: func") + .Attr("Targuments: list(type) >= 0") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that applies `f` to the outputs of `input_dataset` and then +batches `batch_size` of them. + +Unlike a "MapDataset", which applies `f` sequentially, this dataset invokes up +to `batch_size * num_parallel_batches` copies of `f` in parallel. + +batch_size: A scalar representing the number of elements to accumulate in a + batch. It determines the number of concurrent invocations of `f` that process + elements from `input_dataset` in parallel. +num_parallel_batches: A scalar representing the number of batches to create in + parallel. Processing multiple batches in parallel benefits workloads prone to + stragglers. +)doc"); + +REGISTER_OP("ScanDataset") + .Input("input_dataset: variant") + .Input("initial_state: Tstate") + .Input("other_arguments: Targuments") + .Output("handle: variant") + .Attr("f: func") + .Attr("Tstate: list(type) >= 1") + .Attr("Targuments: list(type) >= 0") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset successively reduces `f` over the elements of `input_dataset`. +)doc"); + +REGISTER_OP("ParallelInterleaveDataset") + .Input("input_dataset: variant") + .Input("other_arguments: Targuments") + .Input("cycle_length: int64") + .Input("block_length: int64") + .Input("sloppy: bool") + .Output("handle: variant") + .Attr("f: func") + .Attr("Targuments: list(type) >= 0") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that applies `f` to the outputs of `input_dataset`. + +The resulting dataset is similar to the `InterleaveDataset`, with the exception +that if retrieving the next value from a dataset would cause the requester to +block, it will skip that input dataset. This dataset is especially useful +when loading data from a variable-latency datastores (e.g. HDFS, GCS), as it +allows the training step to proceed so long as some data is available. + +!! WARNING !! This dataset is not deterministic! + +f: A function mapping elements of `input_dataset`, concatenated with + `other_arguments`, to a Dataset variant that contains elements matching + `output_types` and `output_shapes`. +)doc"); + +REGISTER_OP("GroupByWindowDataset") + .Input("input_dataset: variant") + .Input("key_func_other_arguments: Tkey_func_other_arguments") + .Input("reduce_func_other_arguments: Treduce_func_other_arguments") + .Input( + "window_size_func_other_arguments: Twindow_size_func_other_arguments") + .Output("handle: variant") + .Attr("key_func: func") + .Attr("reduce_func: func") + .Attr("window_size_func: func") + .Attr("Tkey_func_other_arguments: list(type) >= 0") + .Attr("Treduce_func_other_arguments: list(type) >= 0") + .Attr("Twindow_size_func_other_arguments: list(type) >= 0") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that computes a windowed group-by on `input_dataset`. + +// TODO(mrry): Support non-int64 keys. + +key_func: A function mapping an element of `input_dataset`, concatenated + with `key_func_other_arguments` to a scalar value of type DT_INT64. +)doc"); + +REGISTER_OP("DenseToSparseBatchDataset") + .Input("input_dataset: variant") + .Input("batch_size: int64") + .Input("row_shape: int64") + .Output("handle: variant") + // NOTE(mrry): the 0th and 2nd elements will be DT_INT64. + .Attr("output_types: list(type) >= 1") + // NOTE(mrry): the 1st and 2nd elements will be vectors. + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that yields a SparseTensor for each element of the input. + +input_dataset: A handle to an input dataset. Must have a single component. +batch_size: A scalar representing the number of elements to accumulate in a + batch. +row_shape: A vector representing the dense shape of each row in the produced + SparseTensor. The shape may be partially specified, using `-1` to indicate + that a particular dimension should use the maximum size of all batch elements. +)doc"); + +REGISTER_OP("SqlDataset") + .Input("driver_name: string") + .Input("data_source_name: string") + .Input("query: string") + .Output("handle: variant") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetIsStateful() // TODO(b/65524810): Source dataset ops must be marked + // stateful to inhibit constant folding. + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that executes a SQL query and emits rows of the result set. + +driver_name: The database type. Currently, the only supported type is 'sqlite'. +data_source_name: A connection string to connect to the database. +query: A SQL query to execute. +)doc"); + +REGISTER_OP("DatasetToSingleElement") + .Input("dataset: variant") + .Output("components: output_types") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn([](shape_inference::InferenceContext* c) { + shape_inference::ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused)); + std::vector output_shapes; + TF_RETURN_IF_ERROR(c->GetAttr("output_shapes", &output_shapes)); + if (output_shapes.size() != c->num_outputs()) { + return errors::InvalidArgument( + "`output_shapes` must be the same length as `output_types` (", + output_shapes.size(), " vs. ", c->num_outputs()); + } + for (size_t i = 0; i < output_shapes.size(); ++i) { + shape_inference::ShapeHandle output_shape_handle; + TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape( + output_shapes[i], &output_shape_handle)); + c->set_output(static_cast(i), output_shape_handle); + } + return Status::OK(); + }) + .Doc(R"doc( +Outputs the single element from the given dataset. + +dataset: A handle to a dataset that contains a single element. +components: The components of the single element of `input`. +)doc"); + +REGISTER_OP("SerializeIterator") + .Input("resource_handle: resource") + .Output("serialized: variant") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Converts the given `resource_handle` representing an iterator to a variant tensor. + +resource_handle: A handle to an iterator resource. +serialized: A variant tensor storing the state of the iterator contained in the + resource. +)doc"); + +REGISTER_OP("DeserializeIterator") + .Input("resource_handle: resource") + .Input("serialized: variant") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Converts the given variant tensor to an iterator and stores it in the given resource. + +resource_handle: A handle to an iterator resource. +serialized: A variant tensor storing the state of the iterator contained in the + resource. +)doc"); + +} // namespace tensorflow diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 241fc2ab4f..5877f42dcf 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -365,9 +365,7 @@ py_test( size = "small", srcs = ["sequence_dataset_op_test.py"], srcs_version = "PY2AND3", - tags = ["no_pip"], deps = [ - ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -430,9 +428,7 @@ py_test( size = "small", srcs = ["zip_dataset_op_test.py"], srcs_version = "PY2AND3", - tags = ["no_pip"], deps = [ - ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/contrib/data/python/ops:iterator_ops", "//tensorflow/python:array_ops", @@ -451,7 +447,10 @@ py_test( size = "small", srcs = ["prefetching_ops_test.py"], srcs_version = "PY2AND3", - tags = ["no_oss"], # b/68785503 + tags = [ + "manual", + "no_oss", + ], deps = [ "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/contrib/data/python/ops:prefetching_py", diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py index 951d4bb5f7..670f622c3c 100644 --- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py @@ -52,9 +52,8 @@ class BatchDatasetTest(test.TestCase): def _map_fn(x, y, z): return math_ops.square(x), math_ops.square(y), math_ops.square(z) - iterator = ( - dataset_ops.Dataset.from_tensor_slices(components).map(_map_fn) - .repeat(count).batch(batch_size).make_initializable_iterator()) + iterator = (dataset_ops.Dataset.from_tensor_slices(components).map(_map_fn) + .repeat(count).batch(batch_size).make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() @@ -70,7 +69,7 @@ class BatchDatasetTest(test.TestCase): result = sess.run(get_next) for component, result_component in zip(components, result): for j in range(14): - self.assertAllEqual(component[(i * 14 + j) % 7]**2, + self.assertAllEqual(component[(i*14 + j) % 7]**2, result_component[j]) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) @@ -85,12 +84,12 @@ class BatchDatasetTest(test.TestCase): result = sess.run(get_next) for component, result_component in zip(components, result): for j in range(8): - self.assertAllEqual(component[(i * 8 + j) % 7]**2, + self.assertAllEqual(component[(i*8 + j) % 7]**2, result_component[j]) result = sess.run(get_next) for component, result_component in zip(components, result): for j in range((14 * 7) % 8): - self.assertAllEqual(component[((num_batches - 1) * 8 + j) % 7]**2, + self.assertAllEqual(component[((num_batches - 1)*8 + j) % 7]**2, result_component[j]) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) @@ -108,10 +107,10 @@ class BatchDatasetTest(test.TestCase): seq_lens = array_ops.placeholder(dtypes.int32, shape=[None]) padded_shape = array_ops.placeholder(dtypes.int64, shape=[1]) - iterator = ( - dataset_ops.Dataset.from_tensor_slices(seq_lens) - .map(lambda x: array_ops.fill([x], x)).padded_batch( - 4, padded_shapes=padded_shape).make_initializable_iterator()) + iterator = (dataset_ops.Dataset.from_tensor_slices(seq_lens) + .map(lambda x: array_ops.fill([x], x)).padded_batch( + 4, + padded_shapes=padded_shape).make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() @@ -119,40 +118,35 @@ class BatchDatasetTest(test.TestCase): with self.test_session() as sess: # Test with random sequence lengths, and max padding. random_seq_lens = np.random.randint(20, size=(32,)).astype(np.int32) - sess.run( - init_op, feed_dict={ - padded_shape: [-1], - seq_lens: random_seq_lens - }) + sess.run(init_op, feed_dict={padded_shape: [-1], + seq_lens: random_seq_lens}) for i in range(8): result = sess.run(get_next) padded_len = np.max(result) self.assertEqual((4, padded_len), result.shape) for j in range(4): - seq_len = random_seq_lens[(i * 4) + j] + seq_len = random_seq_lens[(i*4)+j] self.assertAllEqual(result[j, :seq_len], [seq_len] * seq_len) self.assertAllEqual(result[j, seq_len:], [0] * (padded_len - seq_len)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) # Test with random sequence lengths, and constant padding. - sess.run( - init_op, feed_dict={ - padded_shape: [25], - seq_lens: random_seq_lens - }) + sess.run(init_op, feed_dict={padded_shape: [25], + seq_lens: random_seq_lens}) for i in range(8): result = sess.run(get_next) self.assertEqual((4, 25), result.shape) for j in range(4): - seq_len = random_seq_lens[(i * 4) + j] + seq_len = random_seq_lens[(i*4)+j] self.assertAllEqual(result[j, :seq_len], [seq_len] * seq_len) self.assertAllEqual(result[j, seq_len:], [0] * (25 - seq_len)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) # Test correct handling of empty tensors. - sess.run(init_op, feed_dict={padded_shape: [-1], seq_lens: [0, 0, 0, 0]}) + sess.run(init_op, feed_dict={padded_shape: [-1], + seq_lens: [0, 0, 0, 0]}) result = sess.run(get_next) self.assertAllEqual([[], [], [], []], result) with self.assertRaises(errors.OutOfRangeError): @@ -160,7 +154,8 @@ class BatchDatasetTest(test.TestCase): # Test error handling with constant sequence lengths, and # too-short padding. - sess.run(init_op, feed_dict={padded_shape: [5], seq_lens: [6, 5, 5, 5]}) + sess.run(init_op, feed_dict={padded_shape: [5], + seq_lens: [6, 5, 5, 5]}) with self.assertRaises(errors.DataLossError): result = sess.run(get_next) @@ -171,13 +166,11 @@ class BatchDatasetTest(test.TestCase): def fill_tuple(x): filled = array_ops.fill([x], x) return (filled, string_ops.as_string(filled)) - - iterator = ( - dataset_ops.Dataset.from_tensor_slices(seq_lens).map(fill_tuple) - .padded_batch( - 4, - padded_shapes=(padded_shape, padded_shape), - padding_values=(-1, "")).make_initializable_iterator()) + iterator = (dataset_ops.Dataset.from_tensor_slices(seq_lens).map(fill_tuple) + .padded_batch( + 4, + padded_shapes=(padded_shape, padded_shape), + padding_values=(-1, "")).make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() @@ -185,18 +178,15 @@ class BatchDatasetTest(test.TestCase): with self.test_session() as sess: # Test with random sequence lengths, and max padding. random_seq_lens = np.random.randint(20, size=(32,)).astype(np.int32) - sess.run( - init_op, feed_dict={ - padded_shape: [-1], - seq_lens: random_seq_lens - }) + sess.run(init_op, feed_dict={padded_shape: [-1], + seq_lens: random_seq_lens}) for i in range(8): result = sess.run(get_next) padded_len = np.max(result[0]) self.assertEqual((4, padded_len), result[0].shape) self.assertEqual((4, padded_len), result[1].shape) for j in range(4): - seq_len = random_seq_lens[(i * 4) + j] + seq_len = random_seq_lens[(i*4)+j] self.assertAllEqual(result[0][j, :seq_len], [seq_len] * seq_len) self.assertAllEqual(result[0][j, seq_len:], [-1] * (padded_len - seq_len)) @@ -230,21 +220,20 @@ class BatchDatasetTest(test.TestCase): constant_op.constant([-1, -1], dtype=dtypes.int64), constant_op.constant([37], dtype=dtypes.int64))) - for dataset in [ - dynamic_padding_from_tensor_shapes, dynamic_padding_from_lists, - dynamic_padding_from_lists_with_minus_one, dynamic_padding_from_tensors - ]: + for dataset in [dynamic_padding_from_tensor_shapes, + dynamic_padding_from_lists, + dynamic_padding_from_lists_with_minus_one, + dynamic_padding_from_tensors]: self.assertEqual([None, None], dataset.output_shapes[0].as_list()) self.assertEqual([None, None, None], dataset.output_shapes[1].as_list()) self.assertEqual([None, 37], dataset.output_shapes[2].as_list()) def testDenseToSparseBatchDataset(self): components = np.random.randint(12, size=(100,)).astype(np.int32) - iterator = ( - dataset_ops.Dataset.from_tensor_slices(components) - .map(lambda x: array_ops.fill([x], x)).apply( - batching.dense_to_sparse_batch(4, - [12])).make_initializable_iterator()) + iterator = (dataset_ops.Dataset.from_tensor_slices(components) + .map(lambda x: array_ops.fill([x], x)).apply( + batching.dense_to_sparse_batch(4, [12])) + .make_initializable_iterator()) init_op = iterator.initializer get_next = sparse_tensor.SparseTensor(*iterator.get_next()) @@ -253,26 +242,24 @@ class BatchDatasetTest(test.TestCase): for start in range(0, len(components), 4): results = sess.run(get_next) - self.assertAllEqual([[i, j] - for i, c in enumerate(components[start:start + 4]) - for j in range(c)], results.indices) self.assertAllEqual( - [c for c in components[start:start + 4] for _ in range(c)], + [[i, j] for i, c in enumerate(components[start:start+4]) + for j in range(c)], results.indices) + self.assertAllEqual( + [c for c in components[start:start+4] for _ in range(c)], results.values) - self.assertAllEqual([min(4, - len(components) - start), 12], - results.dense_shape) + self.assertAllEqual( + [min(4, len(components) - start), 12], results.dense_shape) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) def testDenseToSparseBatchDatasetWithUnknownShape(self): components = np.random.randint(5, size=(40,)).astype(np.int32) - iterator = ( - dataset_ops.Dataset.from_tensor_slices(components) - .map(lambda x: array_ops.fill([x, x], x)).apply( - batching.dense_to_sparse_batch( - 4, [5, -1])).make_initializable_iterator()) + iterator = (dataset_ops.Dataset.from_tensor_slices(components) + .map(lambda x: array_ops.fill([x, x], x)).apply( + batching.dense_to_sparse_batch( + 4, [5, -1])).make_initializable_iterator()) init_op = iterator.initializer get_next = sparse_tensor.SparseTensor(*iterator.get_next()) @@ -281,30 +268,27 @@ class BatchDatasetTest(test.TestCase): for start in range(0, len(components), 4): results = sess.run(get_next) - self.assertAllEqual([[i, j, z] - for i, c in enumerate(components[start:start + 4]) - for j in range(c) - for z in range(c)], results.indices) - self.assertAllEqual([ - c - for c in components[start:start + 4] for _ in range(c) - for _ in range(c) - ], results.values) - self.assertAllEqual([ - min(4, - len(components) - start), 5, - np.max(components[start:start + 4]) - ], results.dense_shape) + self.assertAllEqual( + [[i, j, z] for i, c in enumerate(components[start:start+4]) + for j in range(c) for z in range(c)], results.indices) + self.assertAllEqual( + [c for c in components[start:start+4] + for _ in range(c) for _ in range(c)], + results.values) + self.assertAllEqual( + [min(4, len(components) - start), + 5, + np.max(components[start:start+4])], + results.dense_shape) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) def testDenseToSparseBatchDatasetWithInvalidShape(self): input_tensor = array_ops.constant([[1]]) - iterator = ( - dataset_ops.Dataset.from_tensors(input_tensor).apply( - batching.dense_to_sparse_batch(4, [-2])) - .make_initializable_iterator()) + iterator = (dataset_ops.Dataset.from_tensors(input_tensor) + .apply(batching.dense_to_sparse_batch(4, [-2])) + .make_initializable_iterator()) init_op = iterator.initializer with self.test_session() as sess: @@ -314,10 +298,8 @@ class BatchDatasetTest(test.TestCase): def testDenseToSparseBatchDatasetShapeErrors(self): input_tensor = array_ops.placeholder(dtypes.int32) - iterator = ( - dataset_ops.Dataset.from_tensors(input_tensor).apply( - batching.dense_to_sparse_batch(4, - [12])).make_initializable_iterator()) + iterator = (dataset_ops.Dataset.from_tensors(input_tensor).apply( + batching.dense_to_sparse_batch(4, [12])).make_initializable_iterator()) init_op = iterator.initializer get_next = sparse_tensor.SparseTensor(*iterator.get_next()) @@ -374,7 +356,8 @@ class BatchDatasetTest(test.TestCase): def testUnbatchMultiElementTupleDataset(self): data = tuple([(math_ops.range(10 * i, 10 * i + 10), - array_ops.fill([10], "hi")) for i in range(3)]) + array_ops.fill([10], "hi")) + for i in range(3)]) data = dataset_ops.Dataset.from_tensor_slices(data) expected_types = ((dtypes.int32, dtypes.string),) * 3 data = data.batch(2) @@ -387,7 +370,9 @@ class BatchDatasetTest(test.TestCase): with self.test_session() as sess: for i in range(10): - self.assertEqual(((i, b"hi"), (10 + i, b"hi"), (20 + i, b"hi")), + self.assertEqual(((i, b"hi"), + (10 + i, b"hi"), + (20 + i, b"hi")), sess.run(op)) with self.assertRaises(errors.OutOfRangeError): @@ -400,10 +385,9 @@ class BatchDatasetTest(test.TestCase): batch_size = array_ops.placeholder(dtypes.int64, shape=[]) - iterator = ( - dataset_ops.Dataset.from_tensor_slices(components).apply( - batching.batch_and_drop_remainder(batch_size)) - .make_initializable_iterator()) + iterator = (dataset_ops.Dataset.from_tensor_slices(components).apply( + batching.batch_and_drop_remainder(batch_size)) + .make_initializable_iterator()) next_element = iterator.get_next() @@ -420,51 +404,14 @@ class BatchDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) - def testPaddedBatchAndDropRemainder(self): - els = [] - for length in [3, 6, 9, 4, 12, 10, 2]: - els.append((np.array(length), np.arange(length) + 1, - np.array(length * 2))) - - dataset = dataset_ops.Dataset.from_tensors(els[0]) - for el in els[1:]: - dataset = dataset.concatenate(dataset_ops.Dataset.from_tensors(el)) - - batch_size = array_ops.placeholder(dtypes.int64, shape=[]) - iterator = ( - dataset.apply( - batching.padded_batch_and_drop_remainder( - batch_size, ([], [None], []))).make_initializable_iterator()) - - next_element = iterator.get_next() - - with self.test_session() as sess: - for test_batch_size in [1, 3, 7, 10]: - sess.run(iterator.initializer, feed_dict={batch_size: test_batch_size}) - num_batches = 7 // test_batch_size - for i in range(num_batches): - result = sess.run(next_element) - for component_idx, result_component in enumerate(result): - for j in range(test_batch_size): - data_idx = i * test_batch_size + j - comp = result_component[j] - unpadded = comp[comp > 0] - if np.isscalar(comp): - # The boolean mask indexing above adds a dim back. Rm it. - unpadded = unpadded[0] - self.assertAllEqual(els[data_idx][component_idx], unpadded) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - def testBatchAndDropRemainderShapeInference(self): - components = (array_ops.placeholder(dtypes.int32), - (array_ops.placeholder(dtypes.int32, shape=[None]), - array_ops.placeholder(dtypes.int32, shape=[20, 30]))) + components = (array_ops.placeholder(dtypes.int32), (array_ops.placeholder( + dtypes.int32, shape=[None]), array_ops.placeholder( + dtypes.int32, shape=[20, 30]))) # Test with a statically known batch size. - dataset = ( - dataset_ops.Dataset.from_tensor_slices(components).apply( - batching.batch_and_drop_remainder(128))) + dataset = (dataset_ops.Dataset.from_tensor_slices(components).apply( + batching.batch_and_drop_remainder(128))) self.assertIs(None, dataset.output_shapes[0].ndims) self.assertEqual([128], dataset.output_shapes[1][0].as_list()) @@ -473,9 +420,8 @@ class BatchDatasetTest(test.TestCase): # Test with a dynamic batch size: the static shape will be unknown, because # `batch_size` is a placeholder. batch_size = array_ops.placeholder(dtypes.int64) - dataset = ( - dataset_ops.Dataset.from_tensor_slices(components).apply( - batching.batch_and_drop_remainder(batch_size))) + dataset = (dataset_ops.Dataset.from_tensor_slices(components).apply( + batching.batch_and_drop_remainder(batch_size))) self.assertIs(None, dataset.output_shapes[0].ndims) self.assertEqual([None], dataset.output_shapes[1][0].as_list()) @@ -495,10 +441,9 @@ class BatchDatasetTest(test.TestCase): def _map_fn(x, y, z): return math_ops.square(x), math_ops.square(y), math_ops.square(z) - iterator = ( - dataset_ops.Dataset.from_tensor_slices(components).repeat(count).apply( - batching.map_and_batch(_map_fn, batch_size)) - .make_initializable_iterator()) + iterator = (dataset_ops.Dataset.from_tensor_slices(components).repeat(count) + .apply(batching.map_and_batch(_map_fn, batch_size)) + .make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() @@ -514,7 +459,7 @@ class BatchDatasetTest(test.TestCase): result = sess.run(get_next) for component, result_component in zip(components, result): for j in range(14): - self.assertAllEqual(component[(i * 14 + j) % 7]**2, + self.assertAllEqual(component[(i*14 + j) % 7]**2, result_component[j]) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) @@ -529,7 +474,7 @@ class BatchDatasetTest(test.TestCase): result = sess.run(get_next) for component, result_component in zip(components, result): for j in range(8): - self.assertAllEqual(component[(i * 8 + j) % 7]**2, + self.assertAllEqual(component[(i*8 + j) % 7]**2, result_component[j]) # The last batch should fail with `OutOfRange`. with self.assertRaises(errors.OutOfRangeError): @@ -550,9 +495,8 @@ class BatchDatasetTest(test.TestCase): array_ops.check_numerics( constant_op.constant(1.0) / constant_op.constant(0.0), "oops")) batch_size = array_ops.placeholder(dtypes.int64, shape=[]) - iterator = ( - dataset.apply(batching.map_and_batch(lambda x: x, batch_size)) - .make_initializable_iterator()) + iterator = (dataset.apply(batching.map_and_batch(lambda x: x, batch_size)) + .make_initializable_iterator()) init_op = iterator.initializer with self.test_session() as sess: with self.assertRaisesRegexp(errors.InvalidArgumentError, "oops"): @@ -560,7 +504,6 @@ class BatchDatasetTest(test.TestCase): def testBatchAndMapDatasetShapeMismatch(self): """Test a dataset that maps a TF function across its input elements.""" - def generator(): yield [1] yield [2] diff --git a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py index 07fecf04fa..df9147af6c 100644 --- a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py +++ b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py @@ -32,7 +32,7 @@ from tensorflow.python.util import nest class DatasetSerializationTestBase(test.TestCase): - """Base class for testing serializable datasets.""" + """Base class for testing finite serializable datasets.""" def tearDown(self): self._delete_ckpt() @@ -58,19 +58,17 @@ class DatasetSerializationTestBase(test.TestCase): if ds_fn2: self.verify_restore_in_modified_graph(ds_fn1, ds_fn2, num_outputs) - def verify_unused_iterator(self, ds_fn, num_outputs, verify_exhausted=True): + def verify_unused_iterator(self, ds_fn, num_outputs): """Verifies that saving and restoring an unused iterator works. Args: ds_fn: See `run_core_tests`. num_outputs: See `run_core_tests`. - verify_exhausted: See `gen_outputs`. Raises: AssertionError if any test fails. """ - self.verify_run_with_breaks( - ds_fn, [0], num_outputs, verify_exhausted=verify_exhausted) + self.verify_run_with_breaks(ds_fn, [0], num_outputs) def verify_fully_used_iterator(self, ds_fn, num_outputs): """Verifies that saving and restoring a fully used iterator works. @@ -106,16 +104,12 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn, [], 0, ckpt_saved=True, verify_exhausted=True) self.assertEqual(len(actual), 0) - def verify_init_before_restore(self, - ds_fn, - num_outputs, - verify_exhausted=True): + def verify_init_before_restore(self, ds_fn, num_outputs): """Verifies that retoring into an already initilized iterator works. Args: ds_fn: See `run_core_tests`. num_outputs: See `run_core_tests`. - verify_exhausted: See `gen_outputs`. Raises: AssertionError if any test fails. @@ -124,14 +118,9 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn, self.gen_break_points(num_outputs), num_outputs, - init_before_restore=True, - verify_exhausted=verify_exhausted) + init_before_restore=True) - def verify_multiple_breaks(self, - ds_fn, - num_outputs, - num_breaks=10, - verify_exhausted=True): + def verify_multiple_breaks(self, ds_fn, num_outputs, num_breaks=10): """Attempts to save/restore at multiple break points. Args: @@ -139,22 +128,16 @@ class DatasetSerializationTestBase(test.TestCase): num_outputs: See `run_core_tests`. num_breaks: The number of break points. These are uniformly spread in [0, num_outputs] both inclusive. - verify_exhausted: See `gen_outputs`. Raises: AssertionError if any test fails. """ - self.verify_run_with_breaks( - ds_fn, - self.gen_break_points(num_outputs), - num_outputs, - verify_exhausted=verify_exhausted) + self.verify_run_with_breaks(ds_fn, + self.gen_break_points(num_outputs, num_breaks), + num_outputs) - def verify_reset_restored_iterator(self, - ds_fn, - num_outputs, - break_point=None, - verify_exhausted=True): + def verify_reset_restored_iterator(self, ds_fn, num_outputs, + break_point=None): """Attempts to re-initialize a restored iterator. This is useful when restoring a training checkpoint during validation. @@ -163,7 +146,6 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn: See `run_core_tests`. num_outputs: See `run_core_tests`. break_point: Break point. Optional. Defaults to num_outputs/2. - verify_exhausted: See `gen_outputs`. Raises: AssertionError if any test fails. @@ -171,8 +153,7 @@ class DatasetSerializationTestBase(test.TestCase): break_point = num_outputs // 2 if not break_point else break_point # Collect ground truth containing all outputs. - expected = self.gen_outputs( - ds_fn, [], num_outputs, verify_exhausted=verify_exhausted) + expected = self.gen_outputs(ds_fn, [], num_outputs, verify_exhausted=True) # Skip some items and save checkpoint. self.gen_outputs(ds_fn, [], break_point, verify_exhausted=False) @@ -187,17 +168,15 @@ class DatasetSerializationTestBase(test.TestCase): sess.run(init_op) for _ in range(num_outputs): actual.append(sess.run(get_next_op)) - if verify_exhausted: - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) self.match(expected, actual) def verify_restore_in_modified_graph(self, ds_fn1, ds_fn2, num_outputs, - break_point=None, - verify_exhausted=True): + break_point=None): """Attempts to restore an iterator in a modified graph. Builds an input pipeline using ds_fn1, runs it for `break_point` steps @@ -209,7 +188,6 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn2: See `run_core_tests`. num_outputs: See `run_core_tests`. break_point: Break point. Optional. Defaults to num_outputs/2. - verify_exhausted: See `gen_outputs`. Raises: AssertionError if any test fails. @@ -218,15 +196,15 @@ class DatasetSerializationTestBase(test.TestCase): # Skip `break_point` items and store the remaining produced from ds_fn1 # in `expected`. - self.gen_outputs(ds_fn1, [], break_point, verify_exhausted=False) + self.gen_outputs(ds_fn1, [], break_point) expected = self.gen_outputs( ds_fn1, [], num_outputs - break_point, ckpt_saved=True, - verify_exhausted=verify_exhausted) + verify_exhausted=True) # Generate `break_point` items from ds_fn1 and save checkpoint. - self.gen_outputs(ds_fn1, [], break_point, verify_exhausted=False) + self.gen_outputs(ds_fn1, [], break_point) actual = [] # Build graph for ds_fn2 but load checkpoint for ds_fn1. @@ -236,9 +214,8 @@ class DatasetSerializationTestBase(test.TestCase): self._restore(saver, sess) for _ in range(num_outputs - break_point): actual.append(sess.run(get_next_op)) - if verify_exhausted: - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) self.match(expected, actual) @@ -246,7 +223,6 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn, break_points, num_outputs, - verify_exhausted=True, init_before_restore=False): """Verifies that ds_fn() produces the same outputs with and without breaks. @@ -261,7 +237,6 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn: See `gen_outputs`. break_points: See `gen_outputs`. num_outputs: See `gen_outputs`. - verify_exhausted: See `gen_outputs`. init_before_restore: See `gen_outputs`. Raises: @@ -270,13 +245,13 @@ class DatasetSerializationTestBase(test.TestCase): expected = self.gen_outputs( ds_fn, [], num_outputs, - verify_exhausted=verify_exhausted, + verify_exhausted=True, init_before_restore=init_before_restore) actual = self.gen_outputs( ds_fn, break_points, num_outputs, - verify_exhausted=verify_exhausted, + verify_exhausted=True, init_before_restore=init_before_restore) self.match(expected, actual) @@ -286,7 +261,7 @@ class DatasetSerializationTestBase(test.TestCase): num_outputs, ckpt_saved=False, init_before_restore=False, - verify_exhausted=True): + verify_exhausted=False): """Generates elements from input dataset while stopping at break points. Produces `num_outputs` outputs and saves the state of the iterator in the @@ -310,7 +285,7 @@ class DatasetSerializationTestBase(test.TestCase): after producing `num_outputs` elements. Returns: - A list of `num_outputs` items. + A list if `num_outputs` items. """ outputs = [] @@ -337,11 +312,11 @@ class DatasetSerializationTestBase(test.TestCase): num_iters = end - start for _ in range(num_iters): outputs.append(sess.run(get_next_op)) + self._save(sess, saver) + ckpt_saved = True if i == len(break_points) and verify_exhausted: with self.assertRaises(errors.OutOfRangeError): sess.run(get_next_op) - self._save(sess, saver) - ckpt_saved = True return outputs diff --git a/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py index bda9a2a4a3..271d80a54b 100644 --- a/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py @@ -21,6 +21,7 @@ import os import numpy as np from tensorflow.contrib.data.python.ops import dataset_ops +from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.contrib.data.python.ops import readers from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session @@ -33,7 +34,6 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import functional_ops -from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import io_ops from tensorflow.python.ops import math_ops diff --git a/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py index f59ac760dc..329dc80ba5 100644 --- a/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py @@ -21,6 +21,7 @@ import os from tensorflow.contrib.data.python.ops import dataset_ops from tensorflow.contrib.data.python.ops import enumerate_ops +from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops from tensorflow.python.data.ops import iterator_ops from tensorflow.python.framework import constant_op @@ -29,7 +30,6 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import io_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import variables diff --git a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py index 3ae8f71d77..8033f1d388 100644 --- a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py @@ -21,6 +21,7 @@ import gzip import os import zlib +from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops from tensorflow.contrib.data.python.ops import readers from tensorflow.core.example import example_pb2 @@ -33,7 +34,6 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.lib.io import python_io from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import io_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.platform import test diff --git a/tensorflow/contrib/data/python/kernel_tests/sequence_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/sequence_dataset_op_test.py index 1a26da82e5..91615e9f62 100644 --- a/tensorflow/contrib/data/python/kernel_tests/sequence_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/sequence_dataset_op_test.py @@ -19,7 +19,6 @@ from __future__ import print_function import numpy as np -from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base from tensorflow.contrib.data.python.ops import dataset_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -208,82 +207,5 @@ class SequenceDatasetTest(test.TestCase): sess.run(get_next) -class SequenceDatasetSerializationTest( - dataset_serialization_test_base.DatasetSerializationTestBase): - - def _build_skip_dataset(self, count): - components = (np.arange(10),) - return dataset_ops.Dataset.from_tensor_slices(components).skip(count) - - def testSkipFewerThanInputs(self): - count = 4 - num_outputs = 10 - count - self.run_core_tests(lambda: self._build_skip_dataset(count), - lambda: self._build_skip_dataset(count + 2), - num_outputs) - - def testSkipVarious(self): - # Skip more than inputs - self.run_core_tests(lambda: self._build_skip_dataset(20), None, 0) - # Skip exactly the input size - self.run_core_tests(lambda: self._build_skip_dataset(10), None, 0) - self.run_core_tests(lambda: self._build_skip_dataset(-1), None, 0) - # Skip nothing - self.run_core_tests(lambda: self._build_skip_dataset(0), None, 10) - - def _build_take_dataset(self, count): - components = (np.arange(10),) - return dataset_ops.Dataset.from_tensor_slices(components).take(count) - - def testTakeFewerThanInputs(self): - count = 4 - self.run_core_tests( - lambda: self._build_take_dataset(count), - lambda: self._build_take_dataset(count + 2), - count, - ) - - def testTakeVarious(self): - # Take more than inputs - self.run_core_tests(lambda: self._build_take_dataset(20), None, 10) - # Take exactly the input size - self.run_core_tests(lambda: self._build_take_dataset(10), None, 10) - # Take all - self.run_core_tests(lambda: self._build_take_dataset(-1), None, 10) - # Take nothing - self.run_core_tests(lambda: self._build_take_dataset(0), None, 0) - - def _build_repeat_dataset(self, count, take_count=3): - components = (np.arange(10),) - return dataset_ops.Dataset.from_tensor_slices(components).take( - take_count).repeat(count) - - def testFiniteRepeat(self): - count = 10 - self.run_core_tests(lambda: self._build_repeat_dataset(count), - lambda: self._build_repeat_dataset(count + 2), - 3 * count) - - def testEmptyRepeat(self): - self.run_core_tests(lambda: self._build_repeat_dataset(0), None, 0) - - def testInfiniteRepeat(self): - self.verify_unused_iterator( - lambda: self._build_repeat_dataset(-1), 10, verify_exhausted=False) - self.verify_init_before_restore( - lambda: self._build_repeat_dataset(-1), 10, verify_exhausted=False) - self.verify_multiple_breaks( - lambda: self._build_repeat_dataset(-1), 20, verify_exhausted=False) - self.verify_reset_restored_iterator( - lambda: self._build_repeat_dataset(-1), 20, verify_exhausted=False) - self.verify_restore_in_modified_graph( - lambda: self._build_repeat_dataset(-1), - lambda: self._build_repeat_dataset(2), - 20, - verify_exhausted=False) - # Test repeat empty dataset - self.run_core_tests(lambda: self._build_repeat_dataset(-1, 0), None, 0) - - if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/kernel_tests/zip_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/zip_dataset_op_test.py index 5d34b0024c..b0e7218301 100644 --- a/tensorflow/contrib/data/python/kernel_tests/zip_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/zip_dataset_op_test.py @@ -19,7 +19,6 @@ from __future__ import print_function import numpy as np -from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base from tensorflow.contrib.data.python.ops import dataset_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -111,31 +110,5 @@ class ZipDatasetTest(test.TestCase): sess.run(get_next) -class ZipDatasetSerializationTest( - dataset_serialization_test_base.DatasetSerializationTestBase): - - def _build_dataset(self, arr): - components = [ - np.tile(np.array([[1], [2], [3], [4]]), 20), - np.tile(np.array([[12], [13], [14], [15]]), 22), - np.array(arr) - ] - datasets = [ - dataset_ops.Dataset.from_tensor_slices(component) - for component in components - ] - return dataset_ops.Dataset.zip((datasets[0], (datasets[1], datasets[2]))) - - def testCore(self): - # Equal length components - arr = [37.0, 38.0, 39.0, 40.0] - num_outputs = len(arr) - self.run_core_tests(lambda: self._build_dataset(arr), None, num_outputs) - # Variable length components - diff_size_arr = [1.0, 2.0] - self.run_core_tests(lambda: self._build_dataset(diff_size_arr), - lambda: self._build_dataset(arr), 2) - - if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index 1b81cf5be9..727c5d1c38 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -11,20 +11,6 @@ load( ) load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library") -py_library( - name = "dataset_ops", - srcs = [ - "dataset_ops.py", - ], - srcs_version = "PY2AND3", - deps = [ - ":transformation_ops", - "//tensorflow/python:util", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/util:nest", - ], -) - py_library( name = "iterator_ops", srcs = [ @@ -73,6 +59,7 @@ py_library( ], srcs_version = "PY2AND3", deps = [ + ":gen_dataset_ops", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", "//tensorflow/python:dataset_ops_gen", @@ -128,6 +115,31 @@ tf_custom_op_py_library( ], ) +tf_gen_op_wrapper_py( + name = "gen_dataset_ops", + out = "gen_dataset_ops.py", + deps = ["//tensorflow/contrib/data:dataset_ops_op_lib"], +) + +tf_custom_op_py_library( + name = "dataset_ops", + srcs = ["dataset_ops.py"], + dso = ["//tensorflow/contrib/data:_dataset_ops.so"], + kernels = [ + "//tensorflow/contrib/data:dataset_ops_op_lib", + ], + srcs_version = "PY2AND3", + deps = [ + ":gen_dataset_ops", + ":transformation_ops", + "//tensorflow/contrib/util:util_py", + "//tensorflow/python:platform", + "//tensorflow/python:util", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py index d4ade7adfd..e6e5f716b6 100644 --- a/tensorflow/contrib/data/python/ops/batching.py +++ b/tensorflow/contrib/data/python/ops/batching.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.framework import dtypes @@ -24,7 +25,6 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import math_ops @@ -103,42 +103,6 @@ def unbatch(): return _apply_fn -def filter_irregular_batches(batch_size): - """Transformation that filters out batches that are not of size batch_size.""" - - def _apply_fn(dataset): - """Function from `Dataset` to `Dataset` that applies the transformation.""" - tensor_batch_size = ops.convert_to_tensor( - batch_size, dtype=dtypes.int64, name="batch_size") - - flattened = _RestructuredDataset(dataset, - tuple(nest.flatten(dataset.output_types))) - - def _predicate(*xs): - """Return `True` if this element is a full batch.""" - # Extract the dynamic batch size from the first component of the flattened - # batched element. - first_component = xs[0] - first_component_batch_size = array_ops.shape( - first_component, out_type=dtypes.int64)[0] - - return math_ops.equal(first_component_batch_size, tensor_batch_size) - - filtered = flattened.filter(_predicate) - - maybe_constant_batch_size = tensor_util.constant_value(tensor_batch_size) - - def _set_first_dimension(shape): - return shape.merge_with( - tensor_shape.vector(maybe_constant_batch_size).concatenate(shape[1:])) - - known_shapes = nest.map_structure(_set_first_dimension, - dataset.output_shapes) - return _RestructuredDataset(filtered, dataset.output_types, known_shapes) - - return _apply_fn - - def batch_and_drop_remainder(batch_size): """A batching transformation that omits the final small batch (if present). @@ -171,43 +135,34 @@ def batch_and_drop_remainder(batch_size): def _apply_fn(dataset): """Function from `Dataset` to `Dataset` that applies the transformation.""" - batched = dataset.batch(batch_size) - return filter_irregular_batches(batch_size)(batched) - - return _apply_fn + tensor_batch_size = ops.convert_to_tensor( + batch_size, dtype=dtypes.int64, name="batch_size") + batched = dataset.batch(tensor_batch_size) + flattened = _RestructuredDataset(batched, + tuple(nest.flatten(batched.output_types))) -def padded_batch_and_drop_remainder(batch_size, - padded_shapes, - padding_values=None): - """A batching and padding transformation that omits the final small batch. + def _predicate(*xs): + """Return `True` if this element is a full batch.""" + # Extract the dynamic batch size from the first component of the flattened + # batched element. + first_component = xs[0] + first_component_batch_size = array_ops.shape( + first_component, out_type=dtypes.int64)[0] - Like @{tf.data.Dataset.padded_batch}, this transformation combines - consecutive elements of this dataset into batches. However, if the batch - size does not evenly divide the input dataset size, this transformation will - drop the final smaller element. + return math_ops.equal(first_component_batch_size, tensor_batch_size) - See `@{tf.contrib.data.batch_and_drop_remainder}` for more details. + filtered = flattened.filter(_predicate) - Args: - batch_size: A `tf.int64` scalar `tf.Tensor`, representing the number of - consecutive elements of this dataset to combine in a single batch. - padded_shapes: A nested structure of `tf.TensorShape` or - `tf.int64` vector tensor-like objects. See - @{tf.data.Dataset.padded_batch} for details. - padding_values: (Optional.) A nested structure of scalar-shaped - `tf.Tensor`. See @{tf.data.Dataset.padded_batch} for details. + maybe_constant_batch_size = tensor_util.constant_value(tensor_batch_size) - Returns: - A `Dataset` transformation function, which can be passed to - @{tf.data.Dataset.apply} - """ + def _set_first_dimension(shape): + return shape.merge_with( + tensor_shape.vector(maybe_constant_batch_size).concatenate(shape[1:])) - def _apply_fn(dataset): - """Function from `Dataset` to `Dataset` that applies the transformation.""" - batched = dataset.padded_batch( - batch_size, padded_shapes=padded_shapes, padding_values=padding_values) - return filter_irregular_batches(batch_size)(batched) + known_shapes = nest.map_structure(_set_first_dimension, + batched.output_shapes) + return _RestructuredDataset(filtered, batched.output_types, known_shapes) return _apply_fn diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py index 45d6dbe743..c4c4426809 100644 --- a/tensorflow/contrib/data/python/ops/dataset_ops.py +++ b/tensorflow/contrib/data/python/ops/dataset_ops.py @@ -20,15 +20,21 @@ from __future__ import print_function from tensorflow.contrib.data.python.ops import batching from tensorflow.contrib.data.python.ops import enumerate_ops from tensorflow.contrib.data.python.ops import error_ops +from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.contrib.data.python.ops import grouping +from tensorflow.contrib.util import loader from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest -from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import gen_io_ops +from tensorflow.python.platform import resource_loader from tensorflow.python.util import deprecation +_dataset_ops = loader.load_op_library( + resource_loader.get_path_to_datafile("../../_dataset_ops.so")) + + class Dataset(dataset_ops.Dataset): """Represents a potentially large set of elements. diff --git a/tensorflow/contrib/data/python/ops/error_ops.py b/tensorflow/contrib/data/python/ops/error_ops.py index 238bb52b02..51a2791072 100644 --- a/tensorflow/contrib/data/python/ops/error_ops.py +++ b/tensorflow/contrib/data/python/ops/error_ops.py @@ -17,9 +17,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest -from tensorflow.python.ops import gen_dataset_ops def ignore_errors(): diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py index 6df7b22fb6..1c7c94b3c8 100644 --- a/tensorflow/contrib/data/python/ops/grouping.py +++ b/tensorflow/contrib/data/python/ops/grouping.py @@ -17,12 +17,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops -from tensorflow.python.ops import gen_dataset_ops def group_by_window(key_func, diff --git a/tensorflow/contrib/data/python/ops/interleave_ops.py b/tensorflow/contrib/data/python/ops/interleave_ops.py index 74a919c1ff..ce23e95697 100644 --- a/tensorflow/contrib/data/python/ops/interleave_ops.py +++ b/tensorflow/contrib/data/python/ops/interleave_ops.py @@ -17,12 +17,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops -from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.util import deprecation diff --git a/tensorflow/contrib/data/python/ops/iterator_ops.py b/tensorflow/contrib/data/python/ops/iterator_ops.py index d736029fb0..32d2f42c93 100644 --- a/tensorflow/contrib/data/python/ops/iterator_ops.py +++ b/tensorflow/contrib/data/python/ops/iterator_ops.py @@ -17,8 +17,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.framework import ops -from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.training import saver diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py index 2e1c3153ca..f22298b757 100644 --- a/tensorflow/contrib/data/python/ops/readers.py +++ b/tensorflow/contrib/data/python/ops/readers.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.data.python.ops import dataset_ops as contrib_dataset_ops +from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import readers from tensorflow.python.data.util import nest @@ -25,7 +26,6 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.platform import gfile from tensorflow.python.util import deprecation diff --git a/tensorflow/contrib/data/python/ops/scan_ops.py b/tensorflow/contrib/data/python/ops/scan_ops.py index 5acaed48a3..87bbbb7d19 100644 --- a/tensorflow/contrib/data/python/ops/scan_ops.py +++ b/tensorflow/contrib/data/python/ops/scan_ops.py @@ -19,11 +19,11 @@ from __future__ import print_function import collections +from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.framework import function from tensorflow.python.framework import ops -from tensorflow.python.ops import gen_dataset_ops class _ScanDataset(dataset_ops.Dataset): diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 2dc8ad9483..145b9495ff 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -140,6 +140,23 @@ cuda_py_test( ], ) +cuda_py_test( + name = "cauchy_test", + size = "medium", + srcs = ["python/kernel_tests/cauchy_test.py"], + additional_deps = [ + ":distributions_py", + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:gradients", + "//tensorflow/python:platform_test", + "//tensorflow/python:variables", + ], +) + cuda_py_test( name = "chi2_test", srcs = ["python/kernel_tests/chi2_test.py"], diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py index 16f6533e57..0d12d83893 100644 --- a/tensorflow/contrib/distributions/__init__.py +++ b/tensorflow/contrib/distributions/__init__.py @@ -24,6 +24,7 @@ from __future__ import print_function from tensorflow.contrib.distributions.python.ops import bijectors from tensorflow.contrib.distributions.python.ops.binomial import * +from tensorflow.contrib.distributions.python.ops.cauchy import * from tensorflow.contrib.distributions.python.ops.chi2 import * from tensorflow.contrib.distributions.python.ops.conditional_distribution import * from tensorflow.contrib.distributions.python.ops.conditional_transformed_distribution import * @@ -83,6 +84,7 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ 'bijectors', + 'Cauchy', 'ConditionalDistribution', 'ConditionalTransformedDistribution', 'FULLY_REPARAMETERIZED', diff --git a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py new file mode 100644 index 0000000000..7f7697357c --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py @@ -0,0 +1,437 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Cauchy.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import importlib +import numpy as np + +from tensorflow.contrib.distributions.python.ops import cauchy as cauchy_lib +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import variables +from tensorflow.python.platform import test +from tensorflow.python.platform import tf_logging + + +def try_import(name): # pylint: disable=invalid-name + module = None + try: + module = importlib.import_module(name) + except ImportError as e: + tf_logging.warning("Could not import %s: %s" % (name, str(e))) + return module + +stats = try_import("scipy.stats") + + +class CauchyTest(test.TestCase): + + def setUp(self): + self._rng = np.random.RandomState(123) + + def assertAllFinite(self, tensor): + is_finite = np.isfinite(tensor.eval()) + all_true = np.ones_like(is_finite, dtype=np.bool) + self.assertAllEqual(all_true, is_finite) + + def _testParamShapes(self, sample_shape, expected): + with self.test_session(): + param_shapes = cauchy_lib.Cauchy.param_shapes(sample_shape) + loc_shape, scale_shape = param_shapes["loc"], param_shapes["scale"] + self.assertAllEqual(expected, loc_shape.eval()) + self.assertAllEqual(expected, scale_shape.eval()) + loc = array_ops.zeros(loc_shape) + scale = array_ops.ones(scale_shape) + self.assertAllEqual( + expected, + array_ops.shape(cauchy_lib.Cauchy(loc, scale).sample()).eval()) + + def _testParamStaticShapes(self, sample_shape, expected): + param_shapes = cauchy_lib.Cauchy.param_static_shapes(sample_shape) + loc_shape, scale_shape = param_shapes["loc"], param_shapes["scale"] + self.assertEqual(expected, loc_shape) + self.assertEqual(expected, scale_shape) + + def testParamShapes(self): + sample_shape = [10, 3, 4] + self._testParamShapes(sample_shape, sample_shape) + self._testParamShapes(constant_op.constant(sample_shape), sample_shape) + + def testParamStaticShapes(self): + sample_shape = [10, 3, 4] + self._testParamStaticShapes(sample_shape, sample_shape) + self._testParamStaticShapes( + tensor_shape.TensorShape(sample_shape), sample_shape) + + def testCauchyLogPDF(self): + with self.test_session(): + batch_size = 6 + loc = constant_op.constant([3.0] * batch_size) + scale = constant_op.constant([np.sqrt(10.0)] * batch_size) + x = np.array([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0], dtype=np.float32) + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + log_pdf = cauchy.log_prob(x) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), + log_pdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), + log_pdf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, log_pdf.shape) + self.assertAllEqual(cauchy.batch_shape, log_pdf.eval().shape) + + pdf = cauchy.prob(x) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, pdf.shape) + self.assertAllEqual(cauchy.batch_shape, pdf.eval().shape) + + if not stats: + return + expected_log_pdf = stats.cauchy(loc.eval(), scale.eval()).logpdf(x) + self.assertAllClose(expected_log_pdf, log_pdf.eval()) + self.assertAllClose(np.exp(expected_log_pdf), pdf.eval()) + + def testCauchyLogPDFMultidimensional(self): + with self.test_session(): + batch_size = 6 + loc = constant_op.constant([[3.0, -3.0]] * batch_size) + scale = constant_op.constant([[np.sqrt(10.0), np.sqrt(15.0)]] * + batch_size) + x = np.array([[-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]], dtype=np.float32).T + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + log_pdf = cauchy.log_prob(x) + log_pdf_values = log_pdf.eval() + self.assertEqual(log_pdf.shape, (6, 2)) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), + log_pdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), + log_pdf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, log_pdf.shape) + self.assertAllEqual(cauchy.batch_shape, log_pdf.eval().shape) + + pdf = cauchy.prob(x) + pdf_values = pdf.eval() + self.assertEqual(pdf.shape, (6, 2)) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf_values.shape) + self.assertAllEqual(cauchy.batch_shape, pdf.shape) + self.assertAllEqual(cauchy.batch_shape, pdf_values.shape) + + if not stats: + return + expected_log_pdf = stats.cauchy(loc.eval(), scale.eval()).logpdf(x) + self.assertAllClose(expected_log_pdf, log_pdf_values) + self.assertAllClose(np.exp(expected_log_pdf), pdf_values) + + def testCauchyCDF(self): + with self.test_session(): + batch_size = 50 + loc = self._rng.randn(batch_size) + scale = self._rng.rand(batch_size) + 1.0 + x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64) + + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + cdf = cauchy.cdf(x) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, cdf.shape) + self.assertAllEqual(cauchy.batch_shape, cdf.eval().shape) + if not stats: + return + expected_cdf = stats.cauchy(loc, scale).cdf(x) + self.assertAllClose(expected_cdf, cdf.eval(), atol=0) + + def testCauchySurvivalFunction(self): + with self.test_session(): + batch_size = 50 + loc = self._rng.randn(batch_size) + scale = self._rng.rand(batch_size) + 1.0 + x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64) + + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + sf = cauchy.survival_function(x) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, sf.shape) + self.assertAllEqual(cauchy.batch_shape, sf.eval().shape) + if not stats: + return + expected_sf = stats.cauchy(loc, scale).sf(x) + self.assertAllClose(expected_sf, sf.eval(), atol=0) + + def testCauchyLogCDF(self): + with self.test_session(): + batch_size = 50 + loc = self._rng.randn(batch_size) + scale = self._rng.rand(batch_size) + 1.0 + x = np.linspace(-100.0, 10.0, batch_size).astype(np.float64) + + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + cdf = cauchy.log_cdf(x) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, cdf.shape) + self.assertAllEqual(cauchy.batch_shape, cdf.eval().shape) + + if not stats: + return + expected_cdf = stats.cauchy(loc, scale).logcdf(x) + self.assertAllClose(expected_cdf, cdf.eval(), atol=0, rtol=1e-5) + + def testFiniteGradientAtDifficultPoints(self): + for dtype in [np.float32, np.float64]: + g = ops.Graph() + with g.as_default(): + loc = variables.Variable(dtype(0.0)) + scale = variables.Variable(dtype(1.0)) + dist = cauchy_lib.Cauchy(loc=loc, scale=scale) + x = np.array([-100., -20., -5., 0., 5., 20., 100.]).astype(dtype) + for func in [ + dist.cdf, dist.log_cdf, dist.survival_function, + dist.log_survival_function, dist.log_prob, dist.prob + ]: + value = func(x) + grads = gradients_impl.gradients(value, [loc, scale]) + with self.test_session(graph=g): + variables.global_variables_initializer().run() + self.assertAllFinite(value) + self.assertAllFinite(grads[0]) + self.assertAllFinite(grads[1]) + + def testCauchyLogSurvivalFunction(self): + with self.test_session(): + batch_size = 50 + loc = self._rng.randn(batch_size) + scale = self._rng.rand(batch_size) + 1.0 + x = np.linspace(-10.0, 100.0, batch_size).astype(np.float64) + + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + sf = cauchy.log_survival_function(x) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, sf.shape) + self.assertAllEqual(cauchy.batch_shape, sf.eval().shape) + + if not stats: + return + expected_sf = stats.cauchy(loc, scale).logsf(x) + self.assertAllClose(expected_sf, sf.eval(), atol=0, rtol=1e-5) + + def testCauchyEntropy(self): + with self.test_session(): + loc = np.array([1.0, 1.0, 1.0]) + scale = np.array([[1.0, 2.0, 3.0]]) + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + entropy = cauchy.entropy() + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), + entropy.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), + entropy.eval().shape) + self.assertAllEqual(cauchy.batch_shape, entropy.shape) + self.assertAllEqual(cauchy.batch_shape, entropy.eval().shape) + + if not stats: + return + expected_entropy = stats.cauchy(loc, scale).entropy() + self.assertAllClose(expected_entropy, entropy.eval()) + + def testCauchyMode(self): + with self.test_session(): + # Mu will be broadcast to [7, 7, 7]. + loc = [7.] + scale = [11., 12., 13.] + + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + self.assertAllEqual((3,), cauchy.mode().shape) + self.assertAllEqual([7., 7, 7], cauchy.mode().eval()) + + def testCauchyMean(self): + with self.test_session(): + loc = [1., 2., 3.] + scale = [7.] + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + self.assertAllEqual((3,), cauchy.mean().shape) + self.assertAllEqual([np.nan] * 3, cauchy.mean().eval()) + + def testCauchyNanMean(self): + with self.test_session(): + loc = [1., 2., 3.] + scale = [7.] + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False) + + with self.assertRaises(ValueError): + cauchy.mean().eval() + + def testCauchyQuantile(self): + with self.test_session(): + batch_size = 50 + loc = self._rng.randn(batch_size) + scale = self._rng.rand(batch_size) + 1.0 + p = np.linspace(0.000001, 0.999999, batch_size).astype(np.float64) + + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + x = cauchy.quantile(p) + + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), x.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), x.eval().shape) + self.assertAllEqual(cauchy.batch_shape, x.shape) + self.assertAllEqual(cauchy.batch_shape, x.eval().shape) + + if not stats: + return + expected_x = stats.cauchy(loc, scale).ppf(p) + self.assertAllClose(expected_x, x.eval(), atol=0.) + + def testCauchyVariance(self): + with self.test_session(): + # scale will be broadcast to [7, 7, 7] + loc = [1., 2., 3.] + scale = [7.] + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + self.assertAllEqual((3,), cauchy.variance().shape) + self.assertAllEqual([np.nan] * 3, cauchy.variance().eval()) + + def testCauchyNanVariance(self): + with self.test_session(): + # scale will be broadcast to [7, 7, 7] + loc = [1., 2., 3.] + scale = [7.] + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False) + + with self.assertRaises(ValueError): + cauchy.variance().eval() + + def testCauchyStandardDeviation(self): + with self.test_session(): + # scale will be broadcast to [7, 7, 7] + loc = [1., 2., 3.] + scale = [7.] + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + self.assertAllEqual((3,), cauchy.stddev().shape) + self.assertAllEqual([np.nan] * 3, cauchy.stddev().eval()) + + def testCauchyNanStandardDeviation(self): + with self.test_session(): + # scale will be broadcast to [7, 7, 7] + loc = [1., 2., 3.] + scale = [7.] + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False) + + with self.assertRaises(ValueError): + cauchy.stddev().eval() + + def testCauchySample(self): + with self.test_session(): + loc = constant_op.constant(3.0) + scale = constant_op.constant(1.0) + loc_v = 3.0 + n = constant_op.constant(100000) + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + samples = cauchy.sample(n) + sample_values = samples.eval() + + self.assertEqual(sample_values.shape, (100000,)) + self.assertAllClose(np.median(sample_values), loc_v, atol=1e-1) + + expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate( + tensor_shape.TensorShape(cauchy.batch_shape_tensor().eval())) + + self.assertAllEqual(expected_shape, samples.shape) + self.assertAllEqual(expected_shape, sample_values.shape) + + expected_shape = (tensor_shape.TensorShape( + [n.eval()]).concatenate(cauchy.batch_shape)) + + self.assertAllEqual(expected_shape, samples.shape) + self.assertAllEqual(expected_shape, sample_values.shape) + + def testCauchySampleMultiDimensional(self): + with self.test_session(): + batch_size = 2 + loc = constant_op.constant([[3.0, -3.0]] * batch_size) + scale = constant_op.constant([[0.5, 1.0]] * batch_size) + loc_v = [3.0, -3.0] + n = constant_op.constant(100000) + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + samples = cauchy.sample(n) + sample_values = samples.eval() + self.assertEqual(samples.shape, (100000, batch_size, 2)) + self.assertAllClose(np.median(sample_values[:, 0, 0]), + loc_v[0], atol=1e-1) + self.assertAllClose(np.median(sample_values[:, 0, 1]), + loc_v[1], atol=1e-1) + + expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate( + tensor_shape.TensorShape(cauchy.batch_shape_tensor().eval())) + self.assertAllEqual(expected_shape, samples.shape) + self.assertAllEqual(expected_shape, sample_values.shape) + + expected_shape = (tensor_shape.TensorShape( + [n.eval()]).concatenate(cauchy.batch_shape)) + self.assertAllEqual(expected_shape, samples.shape) + self.assertAllEqual(expected_shape, sample_values.shape) + + def testCauchyNegativeLocFails(self): + with self.test_session(): + cauchy = cauchy_lib.Cauchy(loc=[1.], scale=[-5.], validate_args=True) + with self.assertRaisesOpError("Condition x > 0 did not hold"): + cauchy.mode().eval() + + def testCauchyShape(self): + with self.test_session(): + loc = constant_op.constant([-3.0] * 5) + scale = constant_op.constant(11.0) + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + self.assertEqual(cauchy.batch_shape_tensor().eval(), [5]) + self.assertEqual(cauchy.batch_shape, tensor_shape.TensorShape([5])) + self.assertAllEqual(cauchy.event_shape_tensor().eval(), []) + self.assertEqual(cauchy.event_shape, tensor_shape.TensorShape([])) + + def testCauchyShapeWithPlaceholders(self): + loc = array_ops.placeholder(dtype=dtypes.float32) + scale = array_ops.placeholder(dtype=dtypes.float32) + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + with self.test_session() as sess: + # get_batch_shape should return an "" tensor. + self.assertEqual(cauchy.batch_shape, tensor_shape.TensorShape(None)) + self.assertEqual(cauchy.event_shape, ()) + self.assertAllEqual(cauchy.event_shape_tensor().eval(), []) + self.assertAllEqual( + sess.run(cauchy.batch_shape_tensor(), + feed_dict={loc: 5.0, + scale: [1.0, 2.0]}), [2]) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distributions/python/ops/cauchy.py b/tensorflow/contrib/distributions/python/ops/cauchy.py new file mode 100644 index 0000000000..a17bb091f6 --- /dev/null +++ b/tensorflow/contrib/distributions/python/ops/cauchy.py @@ -0,0 +1,223 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""The Cauchy distribution class.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.ops.distributions import distribution + + +__all__ = [ + "Cauchy", +] + + +class Cauchy(distribution.Distribution): + """The Cauchy distribution with location `loc` and scale `scale`. + + #### Mathematical details + + The probability density function (pdf) is, + + ```none + pdf(x; loc, scale) = 1 / (pi * scale * (1 + ((x - loc) / scale)**2)) + ``` + where `loc` is the location, and `scale` is the scale. + + The Cauchy distribution is a member of the [location-scale family]( + https://en.wikipedia.org/wiki/Location-scale_family), i.e. + + ```none + X ~ Cauchy(loc=0, scale=1) + Y ~ Cauchy(loc=loc, scale=scale) + Y = loc + scale * X + ``` + + #### Examples + + Examples of initialization of one or a batch of distributions. + + ```python + # Define a single scalar Cauchy distribution. + dist = Cauchy(loc=0., scale=3.) + + # Evaluate the cdf at 1, returning a scalar. + dist.cdf(1.) + + # Define a batch of two scalar valued Cauchy distributions. + dist = Cauchy(loc=[1, 2.], scale=[11, 22.]) + + # Evaluate the pdf of the first distribution on 0, and the second on 1.5, + # returning a length two tensor. + dist.prob([0, 1.5]) + + # Get 3 samples, returning a 3 x 2 tensor. + dist.sample([3]) + ``` + + Arguments are broadcast when possible. + + ```python + # Define a batch of two scalar valued Cauchy distributions. + # Both have median 1, but different scales. + dist = tf.contrib.distributions.Cauchy(loc=1., scale=[11, 22.]) + # Evaluate the pdf of both distributions on the same point, 3.0, + # returning a length 2 tensor. + dist.prob(3.0) + ``` + """ + + def __init__(self, + loc, + scale, + validate_args=False, + allow_nan_stats=True, + name="Cauchy"): + """Construct Cauchy distributions with loc and and scale `loc` and `scale`. + + The parameters `loc` and `scale` must be shaped in a way that supports + broadcasting (e.g. `loc + scale` is a valid operation). + + Args: + loc: Floating point tensor; the modes of the distribution(s). + scale: Floating point tensor; the locations of the distribution(s). + Must contain only positive values. + validate_args: Python `bool`, default `False`. When `True` distribution + parameters are checked for validity despite possibly degrading runtime + performance. When `False` invalid inputs may silently render incorrect + outputs. + allow_nan_stats: Python `bool`, default `True`. When `True`, + statistics (e.g., mean, mode, variance) use the value "`NaN`" to + indicate the result is undefined. When `False`, an exception is raised + if one or more of the statistic's batch members are undefined. + name: Python `str` name prefixed to Ops created by this class. + + Raises: + TypeError: if `loc` and `scale` have different `dtype`. + """ + parameters = locals() + with ops.name_scope(name, values=[loc, scale]): + with ops.control_dependencies([check_ops.assert_positive(scale)] if + validate_args else []): + self._loc = array_ops.identity(loc, name="loc") + self._scale = array_ops.identity(scale, name="scale") + check_ops.assert_same_float_dtype([self._loc, self._scale]) + super(Cauchy, self).__init__( + dtype=self._scale.dtype, + reparameterization_type=distribution.FULLY_REPARAMETERIZED, + validate_args=validate_args, + allow_nan_stats=allow_nan_stats, + parameters=parameters, + graph_parents=[self._loc, self._scale], + name=name) + + @staticmethod + def _param_shapes(sample_shape): + return dict( + zip(("loc", "scale"), ([ops.convert_to_tensor( + sample_shape, dtype=dtypes.int32)] * 2))) + + @property + def loc(self): + """Distribution parameter for the mean.""" + return self._loc + + @property + def scale(self): + """Distribution parameter for standard deviation.""" + return self._scale + + def _batch_shape_tensor(self): + return array_ops.broadcast_dynamic_shape( + array_ops.shape(self.loc), + array_ops.shape(self.scale)) + + def _batch_shape(self): + return array_ops.broadcast_static_shape( + self.loc.shape, + self.scale.shape) + + def _event_shape_tensor(self): + return constant_op.constant([], dtype=dtypes.int32) + + def _event_shape(self): + return tensor_shape.scalar() + + def _sample_n(self, n, seed=None): + shape = array_ops.concat([[n], self.batch_shape_tensor()], 0) + probs = random_ops.random_uniform( + shape=shape, minval=0., maxval=1., dtype=self.dtype, seed=seed) + return self._quantile(probs) + + def _log_prob(self, x): + return self._log_unnormalized_prob(x) - self._log_normalization() + + def _cdf(self, x): + return math_ops.atan(self._z(x)) / np.pi + 0.5 + + def _log_cdf(self, x): + return math_ops.log1p(2 / np.pi * math_ops.atan(self._z(x))) - np.log(2) + + def _log_unnormalized_prob(self, x): + return -math_ops.log1p(math_ops.square(self._z(x))) + + def _log_normalization(self): + return np.log(np.pi) + math_ops.log(self.scale) + + def _entropy(self): + h = np.log(4 * np.pi) + math_ops.log(self.scale) + return h * array_ops.ones_like(self.loc) + + def _quantile(self, p): + return self.loc + self.scale * math_ops.tan(np.pi * (p - 0.5)) + + def _mode(self): + return self.loc * array_ops.ones_like(self.scale) + + def _z(self, x): + """Standardize input `x`.""" + with ops.name_scope("standardize", values=[x]): + return (x - self.loc) / self.scale + + def _inv_z(self, z): + """Reconstruct input `x` from a its normalized version.""" + with ops.name_scope("reconstruct", values=[z]): + return z * self.scale + self.loc + + def _mean(self): + if self.allow_nan_stats: + return array_ops.fill(self.batch_shape_tensor(), + self.dtype.as_numpy_dtype(np.nan)) + else: + raise ValueError("`mean` is undefined for Cauchy distribution.") + + def _stddev(self): + if self.allow_nan_stats: + return array_ops.fill(self.batch_shape_tensor(), + self.dtype.as_numpy_dtype(np.nan)) + else: + raise ValueError("`stddev` is undefined for Cauchy distribution.") diff --git a/tensorflow/contrib/eager/README.md b/tensorflow/contrib/eager/README.md index dcc370cd00..ae4b07799f 100644 --- a/tensorflow/contrib/eager/README.md +++ b/tensorflow/contrib/eager/README.md @@ -1,4 +1,4 @@ -# Eager Execution +# TensorFlow Eager Execution > *WARNING*: This is a preview/pre-alpha version. The API and performance > characteristics are subject to change. diff --git a/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb index 01616f2e7d..459f2f4a7d 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb @@ -429,7 +429,9 @@ "cpu_tensor = tf.random_normal([SIZE, SIZE])\n", "\n", "if is_gpu_available:\n", - " gpu_tensor = cpu_tensor.gpu()" + " gpu_tensor = cpu_tensor.gpu()\n", + "else:\n", + " print(\"GPU not available.\")" ] }, { diff --git a/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb index 3b7e2cd435..e6c7c11733 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb @@ -383,7 +383,7 @@ "\n", "`implicit_value_and_gradients()` returns a function that accepts the same inputs as the function passed in, and returns a tuple consisting of:\n", "\n", - "1. the value returned by the function passed in (in this case, the loss calculated by `calculate_linear_model_loss()`), and\n", + "1. the value returned by the function passed in (in this case, the loss calculated by `loss_fn()`), and\n", "1. a list of tuples consisting of:\n", " 1. The value of the gradient (a `tf.Tensor`) with respect to a given variable\n", " 1. The corresponding variable (`tf.Variable`)\n", @@ -698,7 +698,7 @@ "source": [ "## Other Ways to Compute Gradients\n", "\n", - "Using our loss function as an example (`calculate_linear_model_loss()`), there are several other ways we could compute gradients:\n", + "Using our loss function as an example (`loss_fn()`), there are several other ways we could compute gradients:\n", "\n", "1. `tfe.implicit_gradients()`\n", "1. `tfe.gradients_function()`\n", @@ -841,7 +841,7 @@ "# tfe.implicit_value_and_gradients() demo\n", "value_gradients_fn = tfe.implicit_value_and_gradients(loss_fn)\n", "\n", - "# Returns only gradients:\n", + "# Returns the value returned by the function passed in, gradients, and variables:\n", "value_gradients_fn(inputs, labels, wb)" ] } diff --git a/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb index ebcc7027c1..0088da5c4b 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb @@ -9,7 +9,7 @@ "source": [ "# Eager Execution Tutorial: Importing Data\n", "\n", - "This notebook demonstrates the use of the [`tf.contrib.data.Dataset` API](https://www.tensorflow.org/programmers_guide/datasets) to build pipelines to feed data to your program. It covers:\n", + "This notebook demonstrates the use of the [`tf.data.Dataset` API](https://www.tensorflow.org/programmers_guide/datasets) to build pipelines to feed data to your program. It covers:\n", "\n", "* Creating a `Dataset`.\n", "* Iteration over a `Dataset` with eager execution enabled.\n", @@ -64,7 +64,7 @@ "source": [ "# Step 1: Create a source `Dataset`\n", "\n", - "Create a _source_ dataset using one of the factory functions like [`Dataset.from_tensors`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#from_tensor_slices) or using objects that read from files like [`TextLineDataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/TextLineDataset) or [`TFRecordDataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/TFRecordDataset). See the [Programmer's Guide](https://www.google.com/url?sa=D\u0026q=https%3A%2F%2Fwww.tensorflow.org%2Fprogrammers_guide%2Fdatasets%23reading_input_data) for more information." + "Create a _source_ dataset using one of the factory functions like [`Dataset.from_tensors`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices) or using objects that read from files like [`TextLineDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TextLineDataset) or [`TFRecordDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TFRecordDataset). See the [Programmer's Guide](https://www.google.com/url?sa=D\u0026q=https%3A%2F%2Fwww.tensorflow.org%2Fprogrammers_guide%2Fdatasets%23reading_input_data) for more information." ] }, { @@ -83,7 +83,7 @@ }, "outputs": [], "source": [ - "ds_tensors = tf.contrib.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6])\n", + "ds_tensors = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6])\n", "\n", "# Create a CSV file\n", "import tempfile\n", @@ -93,7 +93,7 @@ "Line 2\n", "Line 3\n", " \"\"\")\n", - "ds_file = tf.contrib.data.TextLineDataset(filename)\n" + "ds_file = tf.data.TextLineDataset(filename)\n" ] }, { @@ -105,7 +105,7 @@ "source": [ "# Step 2: Apply transformations\n", "\n", - "Use the transformations functions like [`map`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#map), [`batch`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#batch), [`shuffle`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#shuffle) etc. to apply transformations to the records of the dataset. See the [API documentation for `tf.contrib.data.Dataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset) for details." + "Use the transformations functions like [`map`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map), [`batch`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch), [`shuffle`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#shuffle) etc. to apply transformations to the records of the dataset. See the [API documentation for `tf.data.Dataset`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) for details." ] }, { diff --git a/tensorflow/contrib/eager/python/network.py b/tensorflow/contrib/eager/python/network.py index 1a5c6e8aec..c6e628b074 100644 --- a/tensorflow/contrib/eager/python/network.py +++ b/tensorflow/contrib/eager/python/network.py @@ -244,12 +244,6 @@ class Network(base.Layer): self._owned_layers = {} # The scope to use if we end up without a parent. self._default_parent_variable_scope = variable_scope.get_variable_scope() - # Hold on to the variable scope counts from init to check whether a scope - # with the name we want was ever created in our parent scope. Without this - # check we might have name collisions if the parent scope on init gets - # closed before build is called. - self._variable_scope_counts_on_init = ( - variable_scope._get_default_variable_store().variable_scopes_count) self._custom_getter, self._deferred_restorations = ( _make_custom_getter_for_deferred_restorations()) @@ -267,29 +261,18 @@ class Network(base.Layer): def _finalize_name(self, parent_network): if not self._name: + if not parent_network: + name_uid_map = base._get_default_graph_uid_map() + else: + name_uid_map = parent_network._sub_layer_name_uids # Were were not passed a name explicitly (or it was blank), so this is an # anonymous Network. We make up a unique name. if parent_network: avoid_names = parent_network._owned_layers - name_uid_map = parent_network._sub_layer_name_uids else: - name_uid_map = base._get_default_graph_uid_map() - # Figure out which names we have to avoid based on which variable scope - # we're nested in. - strip_name = self._default_parent_variable_scope.name - if strip_name: - strip_name += "/" - def _strip_on_init_scope(name): - if name.startswith(strip_name): - return name[len(strip_name):] - else: - return None - avoid_names = set( - _strip_on_init_scope(name) - for name in self._variable_scope_counts_on_init.keys() if name) + avoid_names = None self._name, self._base_name = self._make_unique_name( - name_uid_map=name_uid_map, avoid_names=avoid_names, - namespace=self._default_parent_variable_scope.name) + name_uid_map=name_uid_map, avoid_names=avoid_names) if self._first_parent is None or (self._first_parent # False = no parent and self._first_parent() is None): # Save a pointer to the parent Network so that we can later check that the @@ -319,13 +302,7 @@ class Network(base.Layer): parent_scope = first_parent._scope else: parent_scope = self._default_parent_variable_scope - with variable_scope.variable_scope(parent_scope) as parent_vs: - expected_scope_name = parent_vs.name + "/" + self._name - if expected_scope_name in self._variable_scope_counts_on_init: - raise ValueError( - ("A Network named '%s' already exists (or a variable_scope was " - "created with this name). Names must be unique.") % ( - self._name,)) + with variable_scope.variable_scope(parent_scope): # Make sure variables with this prefix will be unique. with variable_scope.variable_scope( None, use_resource=True, default_name=self._name) as scope: @@ -342,22 +319,25 @@ class Network(base.Layer): "created with this name). Names must be unique.") % ( self._name,)) if (first_parent - and scope_prefix[:-1] != first_parent.scope_name): + and scope_prefix[:-1] != first_parent._scope.name): raise ValueError( ("Network variable names must match a nesting of sub-Network " "names. Expected prefix '%s' from parent network, but got " "'%s' when attempting to create a variable_scope for Network " "'%s'. Likely an explicit variable_scope was inserted into " "the nesting.") % ( - first_parent.scope_name, + first_parent._scope.name, scope_prefix[:-1], self._name)) elif not first_parent and scope_prefix: # For the case when this Network is not nested inside any other - # Network, but is in a variable_scope. This Network's name takes on - # the full variable scope prefix. - self._name = scope_name - + # Network, but is in a variable_scope. This is an error for now. + raise ValueError( + "Creating Networks inside named variable_scopes is currently " + "not supported (to ensure that variable names match the names " + "of Networks in which they were first created). To set " + "options, try `with tf.variable_scope(''):`. If this " + "limitation bothers you, please file a feature request.") for non_network_sublayer in self._non_network_sublayers: self._set_scope_for_nonnetwork_sublayer(non_network_sublayer) @@ -375,7 +355,8 @@ class Network(base.Layer): raise ValueError( ("The parent of a Layer added to Network %s was garbage collected " "before the Layer was built. If this limitation bothers you " - "please file a feature request.") % + "please, comment on " + "https://github.com/tensorflow/tensorflow/issues/14164.") % (self.name,)) with variable_scope.variable_scope(parent_scope): # Horrid hack to make Layer variable names which are direct @@ -439,9 +420,7 @@ class Network(base.Layer): # name, and we should respect it (subject to error checking). layer._name, layer._base_name = layer._make_unique_name( name_uid_map=self._sub_layer_name_uids, - avoid_names=self._owned_layers - # No namespace required, since we've specified our own UID map. - ) + avoid_names=self._owned_layers) layer._first_parent = weakref.ref(self) self._non_network_sublayers.append(layer) if (not layer.built @@ -577,7 +556,7 @@ class Network(base.Layer): if os.path.isdir(save_path): # If we were passed a directory, default to naming based on the Network # name. - save_path = os.path.join(save_path, self.name.replace("/", "_")) + save_path = os.path.join(save_path, self.name) user_map_func = map_func if map_func is None: map_func = _make_prefix_stripping_map_fn(self.scope_name) @@ -771,7 +750,7 @@ class Network(base.Layer): self._set_scope() # scope_name should be available to map_funcs if os.path.isdir(save_path): # If we don't have a name yet, set no parent. - save_path = os.path.join(save_path, self.name.replace("/", "_")) + save_path = os.path.join(save_path, self.name) user_map_func = map_func if map_func is None: map_func = _make_prefix_stripping_map_fn(self.scope_name) diff --git a/tensorflow/contrib/eager/python/network_test.py b/tensorflow/contrib/eager/python/network_test.py index 1127055c05..14adbafe57 100644 --- a/tensorflow/contrib/eager/python/network_test.py +++ b/tensorflow/contrib/eager/python/network_test.py @@ -410,103 +410,19 @@ class NetworkTest(test.TestCase): @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) def testWrappingInVariableScope(self): - one = constant_op.constant([[1.]]) - # Naming happens in the order of first build rather than the order of - # construction, but for clarity they're the same here and construction is - # annotated. - outside_net_before = MyNetwork() # name=my_network_1 - outside_net_before(one) - captured_scope = variable_scope.get_variable_scope() with variable_scope.variable_scope("outside_scope"): - net1 = MyNetwork() # name=outside_scope/my_network_1 - net1(one) - name_conflict1 = MyNetwork(name="name_conflict") # fine, unique so far - name_conflict2 = MyNetwork(name="name_conflict") # error on build - with variable_scope.variable_scope("inside_scope"): - # No issue here since the name is unique within its scope. - name_conflict3 = MyNetwork(name="name_conflict") - net2 = MyNetwork() # name=outside_scope/my_network_3 to avoid the - # variable_scope my_network_2 below. - vs_name_conflict = MyNetwork(name="vs_name_conflict") # conflict below - with variable_scope.variable_scope("intervening_scope"): - with variable_scope.variable_scope(captured_scope): - with variable_scope.variable_scope("outside_scope"): - name_conflict4 = MyNetwork(name="name_conflict") # error on build - with variable_scope.variable_scope("my_network_2"): - pass - with variable_scope.variable_scope("vs_name_conflict"): - pass - net3 = MyNetwork() # name=outside_scope/my_network_4 - name_conflict1(one) - with self.assertRaisesRegexp( - ValueError, "named 'name_conflict' already exists"): - name_conflict2(one) - name_conflict3(one) - net2(one) - with self.assertRaisesRegexp( - ValueError, "or a variable_scope was created with this name"): - vs_name_conflict(one) - with self.assertRaisesRegexp( - ValueError, "named 'name_conflict' already exists"): - name_conflict4(one) - self.assertEqual("outside_scope/name_conflict", - name_conflict1.name) - self.assertStartsWith( - expected_start="outside_scope/name_conflict/dense_1/", - actual=name_conflict1.variables[0].name) - self.assertEqual("outside_scope/inside_scope/name_conflict", - name_conflict3.name) - self.assertStartsWith( - expected_start="outside_scope/inside_scope/name_conflict/dense_1/", - actual=name_conflict3.variables[0].name) - self.assertEqual("outside_scope/my_network_1", net1.name) - self.assertStartsWith( - expected_start="outside_scope/my_network_1/dense_1/", - actual=net1.trainable_weights[0].name) - self.assertEqual("outside_scope/my_network_3", net2.name) - self.assertStartsWith( - expected_start="outside_scope/my_network_3/dense_1/", - actual=net2.trainable_weights[0].name) - net3(one) - self.assertEqual("outside_scope/my_network_4", net3.name) - self.assertStartsWith( - expected_start="outside_scope/my_network_4/dense_1/", - actual=net3.trainable_weights[0].name) - outside_net_after = MyNetwork() - outside_net_after(one) - self.assertEqual("my_network_1", outside_net_before.name) - self.assertStartsWith( - expected_start="my_network_1/dense_1/", - actual=outside_net_before.trainable_weights[0].name) - self.assertEqual("my_network_2", outside_net_after.name) - self.assertStartsWith( - expected_start="my_network_2/dense_1/", - actual=outside_net_after.trainable_weights[0].name) - - @test_util.run_in_graph_and_eager_modes() - def testVariableScopeStripping(self): - with variable_scope.variable_scope("scope1"): - with variable_scope.variable_scope("scope2"): - net = MyNetwork() - net(constant_op.constant([[2.0]])) - self.evaluate(net.variables[0].assign([[42.]])) - self.assertEqual(net.name, "scope1/scope2/my_network_1") - self.assertStartsWith( - expected_start="scope1/scope2/my_network_1/dense_1/", - actual=net.trainable_weights[0].name) - save_path = net.save(self.get_temp_dir()) - self.assertIn("scope1_scope2_my_network_1", save_path) - restore_net = MyNetwork() - # Delayed restoration - restore_net.restore(save_path) - restore_net(constant_op.constant([[1.0]])) - self.assertAllEqual([[42.]], - self.evaluate(restore_net.variables[0])) - self.evaluate(restore_net.variables[0].assign([[-1.]])) - # Immediate restoration - restore_net.restore(save_path) - self.assertAllEqual([[42.]], - self.evaluate(restore_net.variables[0])) + net = MyNetwork() + one = constant_op.constant([[1.]]) + with self.assertRaisesRegexp( + ValueError, + ("Creating Networks inside named variable_scopes is currently not " + "supported")): + net(one) + # Alternatively, we could re-name the Network to match the variable_scope: + # self.assertEqual("outside_scope/my_network_1", net.name) + # self.assertStartsWith( + # expected_start="outside_scope/my_network_1/dense/", + # actual=net.trainable_weights[0].name) @test_util.run_in_graph_and_eager_modes() def testLayerNamesRespected(self): diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index bc67ef8354..6eb2cfdaca 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -204,13 +204,10 @@ py_library( ], srcs_version = "PY2AND3", deps = [ - "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", "//tensorflow/python:framework_ops", "//tensorflow/python:math_ops", - "//tensorflow/python:summary", "//tensorflow/python/estimator:head", - "//tensorflow/python/estimator:metric_keys", "//tensorflow/python/estimator:model_fn", "//tensorflow/python/saved_model:signature_constants", "@six_archive//:six", @@ -232,7 +229,7 @@ py_test( "//tensorflow/python:string_ops", "//tensorflow/python/estimator:metric_keys", "//tensorflow/python/estimator:model_fn", - "//tensorflow/python/estimator:prediction_keys", + "//tensorflow/python/ops/losses", "//tensorflow/python/saved_model:signature_constants", "//third_party/py/numpy", "@six_archive//:six", diff --git a/tensorflow/contrib/estimator/python/estimator/head.py b/tensorflow/contrib/estimator/python/estimator/head.py index a9311a20f1..e344ee3c3e 100644 --- a/tensorflow/contrib/estimator/python/estimator/head.py +++ b/tensorflow/contrib/estimator/python/estimator/head.py @@ -28,7 +28,6 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import math_ops @@ -49,20 +48,7 @@ def multi_class_head(n_classes, Uses `sparse_softmax_cross_entropy` loss. - The head expects `logits` with shape `[D0, D1, ... DN, n_classes]`. - In many applications, the shape is `[batch_size, n_classes]`. - - `labels` must be a dense `Tensor` with shape matching `logits`, namely - `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string - `Tensor` with values from the vocabulary. If `label_vocabulary` is not given, - `labels` must be an integer `Tensor` with values specifying the class index. - - If `weight_column` is specified, weights must be of shape - `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`. - - The loss is the weighted sum over the input dimensions. Namely, if the input - labels have shape `[batch_size, 1]`, the loss is the weighted sum over - `batch_size`. + This head expects to be fed integer labels specifying the class index. Args: n_classes: Number of classes, must be greater than 2 (for 2 classes, use @@ -71,11 +57,11 @@ def multi_class_head(n_classes, `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. - label_vocabulary: A list or tuple of strings representing possible label - values. If it is not given, that means labels are already encoded as an - integer within [0, n_classes). If given, labels must be of string type and - have any value in `label_vocabulary`. Note that errors will be raised if - `label_vocabulary` is not provided but labels are strings. + label_vocabulary: A list of strings represents possible label values. If it + is not given, that means labels are already encoded as integer within + [0, n_classes). If given, labels must be string type and have any value in + `label_vocabulary`. Also there will be errors if vocabulary is not + provided and labels are string. name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. Also used as `name_scope` when creating ops. @@ -98,20 +84,7 @@ def binary_classification_head( This head uses `sigmoid_cross_entropy_with_logits` loss. - The head expects `logits` with shape `[D0, D1, ... DN, 1]`. - In many applications, the shape is `[batch_size, 1]`. - - `labels` must be a dense `Tensor` with shape matching `logits`, namely - `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string - `Tensor` with values from the vocabulary. If `label_vocabulary` is not given, - `labels` must be float `Tensor` with values in the interval `[0, 1]`. - - If `weight_column` is specified, weights must be of shape - `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`. - - The loss is the weighted sum over the input dimensions. Namely, if the input - labels have shape `[batch_size, 1]`, the loss is the weighted sum over - `batch_size`. + This head expects to be fed float labels of shape `(batch_size, 1)`. Args: weight_column: A string or a `_NumericColumn` created by @@ -123,11 +96,11 @@ def binary_classification_head( generated for each threshold value. This threshold is applied to the logistic values to determine the binary classification (i.e., above the threshold is `true`, below is `false`. - label_vocabulary: A list or tuple of strings representing possible label - values. If it is not given, labels must be float with values within - [0, 1]. If given, labels must be string type and have any value in - `label_vocabulary`. Note that errors will be raised if `label_vocabulary` - is not provided but labels are strings. + label_vocabulary: A list of strings represents possible label values. If it + is not given, that means labels are already encoded within [0, 1]. If + given, labels must be string type and have any value in + `label_vocabulary`. Also there will be errors if vocabulary is not + provided and labels are string. name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. Also used as `name_scope` when creating ops. @@ -147,22 +120,9 @@ def binary_classification_head( def regression_head(weight_column=None, label_dimension=1, name=None): - """Creates a `_Head` for regression using the `mean_squared_error` loss. - - The loss is the weighted sum over all input dimensions. Namely, if the input - labels have shape `[batch_size, label_dimension]`, the loss is the weighted - sum over both `batch_size` and `label_dimension`. - - The head expects `logits` with shape `[D0, D1, ... DN, label_dimension]`. - In many applications, the shape is `[batch_size, label_dimension]`. - - The `labels` shape must match `logits`, namely - `[D0, D1, ... DN, label_dimension]`. If `label_dimension=1`, shape - `[D0, D1, ... DN]` is also supported. + """Creates a `_Head` for regression using the mean squared loss. - If `weight_column` is specified, weights must be of shape - `[D0, D1, ... DN]`, `[D0, D1, ... DN, 1]` or - `[D0, D1, ... DN, label_dimension]`. + Uses `mean_squared_error` loss. Args: weight_column: A string or a `_NumericColumn` created by @@ -196,29 +156,15 @@ def multi_label_head(n_classes, or more associated labels, from a discrete set. This is distinct from `multi_class_head` which has exactly one label per example. - Uses `sigmoid_cross_entropy` loss average over classes and weighted sum over - the batch. Namely, if the input logits have shape `[batch_size, n_classes]`, - the loss is the average over `n_classes` and the weighted sum over - `batch_size`. - - The head expects `logits` with shape `[D0, D1, ... DN, n_classes]`. In many - applications, the shape is `[batch_size, label_n_classes]`. - - Labels can be: - * A multi-hot tensor of shape `[D0, D1, ... DN, n_classes]` - * An integer `SparseTensor` of class indices. The `dense_shape` must be - `[D0, D1, ... DN, ?]` and the values within `[0, n_classes)`. - * If `label_vocabulary` is given, a string `SparseTensor`. The `dense_shape` - must be `[D0, D1, ... DN, ?]` and the values within `label_vocabulary`. - - If `weight_column` is specified, weights must be of shape - `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`. + Uses `sigmoid_cross_entropy` loss averaged over classes. Expects labels as a + multi-hot tensor of shape `[batch_size, n_classes]`, or as an integer + `SparseTensor` of class indices. Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or `(labels, logits, features)` as arguments and returns unreduced loss with - shape `[D0, D1, ... DN, 1]`. `loss_fn` must support indicator `labels` with - shape `[D0, D1, ... DN, n_classes]`. Namely, the head applies - `label_vocabulary` to the input labels before passing them to `loss_fn`. + shape `[batch_size, 1]`. `loss_fn` must support indicator `labels` with shape + `[batch_size, n_classes]`. Namely, the head applies `label_vocabulary` to the + input labels before passing them to `loss_fn`. Args: n_classes: Number of classes, must be greater than 1 (for 1 class, use @@ -245,7 +191,7 @@ def multi_label_head(n_classes, An instance of `_Head` for multi-label classification. Raises: - ValueError: if `n_classes`, `thresholds`, or `loss_fn` is invalid. + ValueError: if `n_classes` or `thresholds` is invalid. """ thresholds = tuple(thresholds) if thresholds else tuple() if n_classes is None or n_classes < 2: @@ -313,36 +259,26 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access indices=labels.indices, values=label_ids_values, dense_shape=labels.dense_shape) - return math_ops.to_int64( - sparse_ops.sparse_to_indicator(label_ids, self._n_classes)) else: - err_msg = ( - r'labels must be an integer SparseTensor with values in ' - r'[0, {})'.format(self._n_classes)) - assert_int = check_ops.assert_integer( - labels.values, message=err_msg) - assert_less = check_ops.assert_less( - labels.values, - ops.convert_to_tensor(self._n_classes, dtype=labels.dtype), - message=err_msg) - assert_greater = check_ops.assert_non_negative( - labels.values, message=err_msg) - with ops.control_dependencies( - [assert_int, assert_less, assert_greater]): - return math_ops.to_int64( - sparse_ops.sparse_to_indicator(labels, self._n_classes)) - err_msg = ( - r'labels must be an integer indicator Tensor with values in [0, 1]') - return head_lib._assert_range(labels, 2, message=err_msg) # pylint:disable=protected-access, + label_ids = labels + return math_ops.to_int64( + sparse_ops.sparse_to_indicator(label_ids, self._n_classes)) + msg = ('labels shape must be [batch_size, {}]. ' + 'Given: ').format(self._n_classes) + labels_shape = array_ops.shape(labels) + check_rank_op = control_flow_ops.Assert( + math_ops.equal(array_ops.rank(labels), 2), + data=[msg, labels_shape]) + check_label_dim = control_flow_ops.Assert( + math_ops.equal(labels_shape[-1], self._n_classes), + data=[msg, labels_shape]) + with ops.control_dependencies([check_rank_op, check_label_dim]): + return array_ops.identity(labels) def create_loss(self, features, mode, logits, labels): """See `Head`.""" del mode # Unused for this head. - logits = ops.convert_to_tensor(logits) processed_labels = self._process_labels(labels) - processed_labels = head_lib._check_dense_labels_match_logits_and_reshape( # pylint:disable=protected-access - labels=processed_labels, logits=logits, - expected_labels_dimension=self.logits_dimension) if self._loss_fn: unweighted_loss = _call_loss_fn( loss_fn=self._loss_fn, labels=processed_labels, logits=logits, @@ -354,8 +290,7 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access # Averages loss over classes. unweighted_loss = math_ops.reduce_mean( unweighted_loss, axis=-1, keep_dims=True) - weights = head_lib._get_weights_and_check_match_logits( # pylint:disable=protected-access, - features=features, weight_column=self._weight_column, logits=logits) + weights = head_lib._weights(features, self._weight_column) # pylint:disable=protected-access, weighted_sum_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) # _weights() can return 1. @@ -370,7 +305,7 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access self, features, mode, logits, labels=None, train_op_fn=None): """See `Head`.""" with ops.name_scope(self._name, 'head'): - logits = head_lib._check_logits_final_dim(logits, self.logits_dimension) # pylint:disable=protected-access + logits = head_lib._check_logits(logits, self.logits_dimension) # pylint:disable=protected-access # Predict. pred_keys = prediction_keys.PredictionKeys @@ -400,8 +335,6 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access # Eval. if mode == model_fn.ModeKeys.EVAL: - weights = head_lib._get_weights_and_check_match_logits( # pylint:disable=protected-access, - features=features, weight_column=self._weight_column, logits=logits) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.EVAL, predictions=predictions, @@ -409,7 +342,7 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access eval_metric_ops=self._eval_metric_ops( labels=processed_labels, probabilities=probabilities, - weights=weights, + weights=head_lib._weights(features, self._weight_column), # pylint:disable=protected-access, weighted_sum_loss=weighted_sum_loss, example_weight_sum=example_weight_sum)) diff --git a/tensorflow/contrib/estimator/python/estimator/head_test.py b/tensorflow/contrib/estimator/python/estimator/head_test.py index d1cf909004..fd8c53f6a9 100644 --- a/tensorflow/contrib/estimator/python/estimator/head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/head_test.py @@ -316,14 +316,13 @@ class MultiLabelHead(test.TestCase): _initialize_variables(self, monitored_session.Scaffold()) with self.assertRaisesRegexp( errors.InvalidArgumentError, - r'\[expected_labels_shape: \] \[2 2\] \[labels_shape: \] \[2 1\]'): + r'labels shape must be \[batch_size, 2\]\. Given: \] \[2 1\]'): actual_weighted_sum_loss.eval({ labels_placeholder: np.array([[1], [1]], dtype=np.int64) }) with self.assertRaisesRegexp( errors.InvalidArgumentError, - r'labels shape must be \[D0, D1, ... DN, 2\]\..*' - r'\[Received shape: \] \[2\]'): + r'labels shape must be \[batch_size, 2\]\. Given: \] \[2\]'): actual_weighted_sum_loss.eval({ labels_placeholder: np.array([1, 1], dtype=np.int64) }) @@ -388,11 +387,9 @@ class MultiLabelHead(test.TestCase): logits=np.array([[-10., 10.], [-15., 10.]], dtype=np.float32), labels=None) - def _test_eval( - self, head, logits, labels, expected_loss, expected_metrics, - features=None): + def _test_eval(self, head, logits, labels, expected_loss, expected_metrics): spec = head.create_estimator_spec( - features=features or {}, + features={'x': np.array(((42,),), dtype=np.int32)}, mode=model_fn.ModeKeys.EVAL, logits=logits, labels=labels) @@ -658,54 +655,6 @@ class MultiLabelHead(test.TestCase): labels=None, train_op_fn=_no_op_train_fn) - def test_train_invalid_indicator_labels(self): - head = head_lib.multi_label_head(n_classes=2) - logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) - # The value 2 is outside the allowed range. - labels = np.array([[2, 0], [1, 1]], dtype=np.int64) - def _train_op_fn(loss): - del loss - return control_flow_ops.no_op() - - spec = head.create_estimator_spec( - features={}, - mode=model_fn.ModeKeys.TRAIN, - logits=logits, - labels=labels, - train_op_fn=_train_op_fn) - with self.test_session() as sess: - _initialize_variables(self, spec.scaffold) - with self.assertRaisesRegexp( - errors.InvalidArgumentError, - r'labels must be an integer indicator Tensor with values in ' - r'\[0, 1\]'): - sess.run(spec.loss) - - def test_train_invalid_sparse_labels(self): - head = head_lib.multi_label_head(n_classes=2) - logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) - # The value 2 is outside the allowed range. - labels = sparse_tensor.SparseTensor( - values=[2, 0, 1], - indices=[[0, 0], [1, 0], [1, 1]], - dense_shape=[2, 2]) - def _train_op_fn(loss): - del loss - return control_flow_ops.no_op() - - spec = head.create_estimator_spec( - features={}, - mode=model_fn.ModeKeys.TRAIN, - logits=logits, - labels=labels, - train_op_fn=_train_op_fn) - with self.test_session() as sess: - _initialize_variables(self, spec.scaffold) - with self.assertRaisesRegexp( - errors.InvalidArgumentError, - r'labels must be an integer SparseTensor with values in \[0, 2\)'): - sess.run(spec.loss) - def _test_train(self, head, logits, labels, expected_loss): expected_train_result = 'my_train_op' def _train_op_fn(loss): @@ -842,153 +791,6 @@ class MultiLabelHead(test.TestCase): metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 3, }, summary_str, tol) - def test_multi_dim_weighted_train_create_loss(self): - """Logits and labels of shape [2, 2, 3], weights [2, 2].""" - head = head_lib.multi_label_head(n_classes=3, weight_column='weights') - - logits = np.array([[[-10., 10., -10.], [10., -10., 10.]], - [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32) - labels = np.array([[[1, 0, 0], [1, 0, 0]], - [[0, 1, 1], [0, 1, 1]]], dtype=np.int64) - weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) - # loss = [[10 + 10 + 0, 0 + 0 + 10], [0 + 0 + 12, 12 + 12 + 0]] / 3 - # = [[20/3, 10/3], [4, 8]] - # weighted_sum_loss = 1*20/3 + 1.5*10/3 + 2*4 + 2.5*8 = 39.6667 - expected_weighted_sum_loss = 39.6667 - expected_example_weight_sum = np.sum(weights) - actual_weighted_sum_loss, actual_example_weight_sum, _ = head.create_loss( - features={'weights': weights}, - mode=model_fn.ModeKeys.TRAIN, - logits=logits, - labels=labels) - atol = 1.e-3 - with self.test_session(): - _initialize_variables(self, monitored_session.Scaffold()) - self.assertAllClose( - expected_weighted_sum_loss, actual_weighted_sum_loss.eval(), - atol=atol) - self.assertAllClose( - expected_example_weight_sum, actual_example_weight_sum.eval(), - atol=atol) - - def test_multi_dim_weighted_train(self): - """Logits and labels of shape [2, 2, 3], weights [2, 2].""" - head = head_lib.multi_label_head(n_classes=3, weight_column='weights') - - logits = np.array([[[-10., 10., -10.], [10., -10., 10.]], - [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32) - labels = np.array([[[1, 0, 0], [1, 0, 0]], - [[0, 1, 1], [0, 1, 1]]], dtype=np.int64) - weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) - # loss = [[10 + 10 + 0, 0 + 0 + 10], [0 + 0 + 12, 12 + 12 + 0]] / 3 - # = [[20/3, 10/3], [4, 8]] - # weighted_sum_loss = 1*20/3 + 1.5*10/3 + 2*4 + 2.5*8 = 39.6667 - expected_loss = 39.6667 - expected_train_result = 'my_train_op' - def _train_op_fn(loss): - return string_ops.string_join( - [constant_op.constant(expected_train_result), - string_ops.as_string(loss, precision=3)]) - - spec = head.create_estimator_spec( - features={'weights': weights}, - mode=model_fn.ModeKeys.TRAIN, - logits=logits, - labels=labels, - train_op_fn=_train_op_fn) - - atol = 1.e-3 - with self.test_session() as sess: - _initialize_variables(self, monitored_session.Scaffold()) - loss, train_result = sess.run((spec.loss, spec.train_op)) - self.assertAllClose(expected_loss, loss, atol=atol) - self.assertEqual( - six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), - train_result) - - def test_multi_dim_weights_wrong_inner_dim(self): - """Logits and labels of shape [2, 2, 3], weights [2, 1].""" - head = head_lib.multi_label_head(n_classes=3, weight_column='weights') - - logits = np.array([[[-10., 10., -10.], [10., -10., 10.]], - [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32) - labels = np.array([[[1, 0, 0], [1, 0, 0]], - [[0, 1, 1], [0, 1, 1]]], dtype=np.int64) - weights = np.array([[1.], [2.]], dtype=np.float32) - def _train_op_fn(loss): - del loss - return control_flow_ops.no_op() - - spec = head.create_estimator_spec( - features={'weights': weights}, - mode=model_fn.ModeKeys.TRAIN, - logits=logits, - labels=labels, - train_op_fn=_train_op_fn) - with self.test_session(): - _initialize_variables(self, monitored_session.Scaffold()) - with self.assertRaisesRegexp( - errors.InvalidArgumentError, - r'\[logits_shape: \] \[2 2 3\] \[weights_shape: \] \[2 1\]'): - spec.loss.eval() - - def test_multi_dim_weights_wrong_outer_dim(self): - """Logits and labels of shape [2, 2, 3], weights [2, 2, 3].""" - head = head_lib.multi_label_head(n_classes=3, weight_column='weights') - - logits = np.array([[[-10., 10., -10.], [10., -10., 10.]], - [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32) - labels = np.array([[[1, 0, 0], [1, 0, 0]], - [[0, 1, 1], [0, 1, 1]]], dtype=np.int64) - weights = np.array([[[1., 1., 1.], [1.5, 1.5, 1.5]], - [[2., 2., 2.], [2.5, 2.5, 2.5]]], dtype=np.float32) - weights_placeholder = array_ops.placeholder(dtype=dtypes.float32) - def _train_op_fn(loss): - del loss - return control_flow_ops.no_op() - - spec = head.create_estimator_spec( - features={'weights': weights_placeholder}, - mode=model_fn.ModeKeys.TRAIN, - logits=logits, - labels=labels, - train_op_fn=_train_op_fn) - with self.test_session(): - _initialize_variables(self, monitored_session.Scaffold()) - with self.assertRaisesRegexp( - errors.InvalidArgumentError, - r'\[logits_shape: \] \[2 2 3\] \[weights_shape: \] \[2 2 3\]'): - spec.loss.eval({weights_placeholder: weights}) - - def test_multi_dim_weighted_eval(self): - """Logits and labels of shape [2, 2, 3], weights [2, 2].""" - head = head_lib.multi_label_head(n_classes=3, weight_column='weights') - - logits = np.array([[[-10., 10., -10.], [10., -10., 10.]], - [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32) - labels = np.array([[[1, 0, 0], [1, 0, 0]], - [[0, 1, 1], [0, 1, 1]]], dtype=np.int64) - weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) - # loss = [[10 + 10 + 0, 0 + 0 + 10], [0 + 0 + 12, 12 + 12 + 0]] / 3 - # = [[20/3, 10/3], [4, 8]] - # weighted_sum_loss = 1*20/3 + 1.5*10/3 + 2*4 + 2.5*8 = 39.6667 - expected_loss = 39.6667 - keys = metric_keys.MetricKeys - expected_metrics = { - keys.LOSS_MEAN: expected_loss / np.sum(weights), - # auc and auc_pr cannot be reliably calculated for only 4 samples, but - # this assert tests that the algorithm remains consistent. - keys.AUC: 0.4977, - keys.AUC_PR: 0.6645, - } - self._test_eval( - head=head, - features={'weights': weights}, - logits=logits, - labels=labels, - expected_loss=expected_loss, - expected_metrics=expected_metrics) - if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head.py b/tensorflow/contrib/estimator/python/estimator/multi_head.py index 73bae5acf9..69dbfcee62 100644 --- a/tensorflow/contrib/estimator/python/estimator/multi_head.py +++ b/tensorflow/contrib/estimator/python/estimator/multi_head.py @@ -22,13 +22,10 @@ import six from tensorflow.python.estimator import model_fn from tensorflow.python.estimator.canned import head as head_lib -from tensorflow.python.estimator.canned import metric_keys from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.saved_model import signature_constants -from tensorflow.python.summary import summary _DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY @@ -75,23 +72,6 @@ def multi_head(heads, head_weights=None): estimator.train(input_fn=input_fn, steps=100) ``` - Also supports `logits` as a `Tensor` of shape - `[D0, D1, ... DN, logits_dimension]`. It will split the `Tensor` along the - last dimension and distribute it appropriately among the heads. E.g.: - - ```python - def model_fn(features, labels, mode): - # Create simple heads and specify head name. - head1 = multi_class_head(n_classes=3, name='head1') - head2 = binary_classification_head(name='head2') - # Create multi-head from two simple heads. - head = multi_head([head1, head2]) - # Create logits for the multihead. - logits = logit_fn(logits_dimension=head.logits_dimension) - # Return the merged EstimatorSpec - return head.create_estimator_spec(..., logits=logits, ...) - ``` - Args: heads: List or tuple of `_Head` instances. All heads must have `name` specified. The first head in the list is the default used at serving time. @@ -181,17 +161,18 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access def create_loss(self, features, mode, logits, labels): """See `Head`.""" - if isinstance(logits, dict): - logits_dict = logits - else: - logits_dict = self._split_logits(logits) + # TODO(roumposg): Add support for logits as single Tensor (with + # _split_logits utility). + if not isinstance(logits, dict): + raise ValueError('logits must be a dict. Single Tensor support coming ' + 'soon.') weighted_sum_losses = [] example_weight_sums = [] labels_by_head = {} for head in self._heads: (weighted_sum_loss, example_weight_sum, processed_labels) = head.create_loss( - features, mode, logits_dict[head.name], labels[head.name]) + features, mode, logits[head.name], labels[head.name]) weighted_sum_losses.append(weighted_sum_loss) example_weight_sums.append(example_weight_sum) labels_by_head[head.name] = processed_labels @@ -224,10 +205,10 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access def create_estimator_spec( self, features, mode, logits, labels=None, train_op_fn=None): """See `_Head`.""" - if isinstance(logits, dict): - logits_dict = logits - else: - logits_dict = self._split_logits(logits) + # TODO(roumposg): Add support for logits as single Tensor (with + # _split_logits utility). + if not isinstance(logits, dict): + raise ValueError('logits must be a dict. Given: {}'.format(logits)) if labels and not isinstance(labels, dict): raise ValueError('labels must be a dict. Given: {}'.format(labels)) @@ -238,42 +219,22 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access head.create_estimator_spec( features=features, mode=mode, - logits=logits_dict[head_name], + logits=logits[head_name], labels=labels[head_name] if labels else None, train_op_fn=_no_op_train_fn)) + # TODO(roumposg): Add LOSS and LOSS_MEAN summaries for the total head- + # combined loss. if mode == model_fn.ModeKeys.TRAIN: if train_op_fn is None: raise ValueError('train_op_fn can not be None in TRAIN mode.') - spec = self._merge_train(all_estimator_spec, train_op_fn) - with ops.name_scope(''): - summary.scalar(metric_keys.MetricKeys.LOSS, spec.loss) - return spec + return self._merge_train(all_estimator_spec, train_op_fn) if mode == model_fn.ModeKeys.PREDICT: return self._merge_predict(all_estimator_spec) if mode == model_fn.ModeKeys.EVAL: return self._merge_eval(all_estimator_spec) raise ValueError('mode={} unrecognized'.format(mode)) - def _split_logits(self, logits): - """Splits logits along the last dimension and returns a dict.""" - logits_dict = {} - with ops.name_scope(None, 'split_logits', values=[logits]): - logits = ops.convert_to_tensor(logits) - batch_shape = array_ops.shape(logits)[:-1] - zeros_like_batch_shape = array_ops.zeros_like(batch_shape) - minus_ones_like_batch_shape = -1 * array_ops.ones_like(batch_shape) - begin_idx = 0 - for head in self._heads: - begin_tensor = array_ops.concat( - [zeros_like_batch_shape, [begin_idx]], axis=0) - size_tensor = array_ops.concat( - [minus_ones_like_batch_shape, [head.logits_dimension]], axis=0) - logits_dict[head.name] = array_ops.slice( - logits, begin=begin_tensor, size=size_tensor) - begin_idx += head.logits_dimension - return logits_dict - def _merge_train(self, all_estimator_spec, train_op_fn): """Merges list of `EstimatorSpec` for training. diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py index 8d51a298b2..16177aebd5 100644 --- a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py @@ -106,8 +106,7 @@ class MultiHeadTest(test.TestCase): multi_head = multi_head_lib.multi_head([head1, head2]) self.assertEqual('head1_head2', multi_head.name) - def test_predict_two_heads_logits_dict(self): - """Tests predict with logits as dict.""" + def test_predict_two_heads(self): head1 = head_lib.multi_label_head(n_classes=2, name='head1') head2 = head_lib.multi_label_head(n_classes=3, name='head2') multi_head = multi_head_lib.multi_head([head1, head2]) @@ -159,111 +158,6 @@ class MultiHeadTest(test.TestCase): expected_probabilities['head2'], sess.run(spec.export_outputs['head2'].scores)) - def test_predict_two_heads_logits_tensor(self): - """Tests predict with logits as Tensor.""" - head1 = head_lib.multi_label_head(n_classes=2, name='head1') - head2 = head_lib.multi_label_head(n_classes=3, name='head2') - multi_head = multi_head_lib.multi_head([head1, head2]) - - logits = np.array( - [[-1., 1., 2., -2., 2.], [-1.5, 1., -3., 2., -2.]], dtype=np.float32) - expected_logits1 = np.array([[-1., 1.], [-1.5, 1.]], dtype=np.float32) - expected_logits2 = np.array([[2., -2., 2.], [-3., 2., -2.]], - dtype=np.float32) - expected_probabilities = { - 'head1': _sigmoid(expected_logits1), - 'head2': _sigmoid(expected_logits2), - } - - spec = multi_head.create_estimator_spec( - features={'x': np.array(((42,),), dtype=np.int32)}, - mode=model_fn.ModeKeys.PREDICT, - logits=logits) - - self.assertItemsEqual( - (_DEFAULT_SERVING_KEY, 'head1', 'classification/head1', 'predict/head1', - 'head2', 'classification/head2', 'predict/head2'), - spec.export_outputs.keys()) - - # Assert predictions and export_outputs. - with self.test_session() as sess: - _initialize_variables(self, spec.scaffold) - self.assertIsNone(spec.scaffold.summary_op) - predictions = sess.run(spec.predictions) - self.assertAllClose( - expected_logits1, - predictions[('head1', prediction_keys.PredictionKeys.LOGITS)]) - self.assertAllClose( - expected_logits2, - predictions[('head2', prediction_keys.PredictionKeys.LOGITS)]) - self.assertAllClose( - expected_probabilities['head1'], - predictions[('head1', prediction_keys.PredictionKeys.PROBABILITIES)]) - self.assertAllClose( - expected_probabilities['head2'], - predictions[('head2', prediction_keys.PredictionKeys.PROBABILITIES)]) - - self.assertAllClose( - expected_probabilities['head1'], - sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].scores)) - self.assertAllClose( - expected_probabilities['head1'], - sess.run(spec.export_outputs['head1'].scores)) - self.assertAllClose( - expected_probabilities['head2'], - sess.run(spec.export_outputs['head2'].scores)) - - def test_predict_two_heads_logits_tensor_multi_dim(self): - """Tests predict with multi-dimensional logits of shape [2, 2, 5].""" - head1 = head_lib.regression_head(label_dimension=2, name='head1') - head2 = head_lib.regression_head(label_dimension=3, name='head2') - multi_head = multi_head_lib.multi_head([head1, head2]) - - logits = np.array( - [[[-1., 1., 2., -2., 2.], [-1., 1., 2., -2., 2.]], - [[-1.5, 1., -3., 2., -2.], [-1.5, 1., -3., 2., -2.]]], - dtype=np.float32) - expected_logits1 = np.array( - [[[-1., 1.], [-1., 1.]], - [[-1.5, 1.], [-1.5, 1.]]], - dtype=np.float32) - expected_logits2 = np.array( - [[[2., -2., 2.], [2., -2., 2.]], - [[-3., 2., -2.], [-3., 2., -2.]]], - dtype=np.float32) - - spec = multi_head.create_estimator_spec( - features={'x': np.array(((42,),), dtype=np.int32)}, - mode=model_fn.ModeKeys.PREDICT, - logits=logits) - - self.assertItemsEqual( - (_DEFAULT_SERVING_KEY, 'head1', 'regression/head1', 'predict/head1', - 'head2', 'regression/head2', 'predict/head2'), - spec.export_outputs.keys()) - - # Assert predictions and export_outputs. - with self.test_session() as sess: - _initialize_variables(self, spec.scaffold) - self.assertIsNone(spec.scaffold.summary_op) - predictions = sess.run(spec.predictions) - self.assertAllClose( - expected_logits1, - predictions[('head1', prediction_keys.PredictionKeys.PREDICTIONS)]) - self.assertAllClose( - expected_logits2, - predictions[('head2', prediction_keys.PredictionKeys.PREDICTIONS)]) - - self.assertAllClose( - expected_logits1, - sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].value)) - self.assertAllClose( - expected_logits1, - sess.run(spec.export_outputs['head1'].value)) - self.assertAllClose( - expected_logits2, - sess.run(spec.export_outputs['head2'].value)) - def test_eval_two_heads_with_weights(self): head1 = head_lib.multi_label_head(n_classes=2, name='head1') head2 = head_lib.multi_label_head(n_classes=3, name='head2') @@ -390,84 +284,6 @@ class MultiHeadTest(test.TestCase): # example_weight_sum = 1 * (1 + 2) + 2 * (2 + 3) = 13 self.assertAllClose(13., example_weight_sum.eval(), rtol=tol, atol=tol) - def test_train_create_loss_logits_tensor(self): - """Tests create_loss with logits Tensor.""" - weights1 = np.array([[1.], [2.]], dtype=np.float32) - weights2 = np.array([[2.], [3.]]) - head1 = head_lib.multi_label_head(n_classes=2, name='head1', - weight_column='weights1') - head2 = head_lib.multi_label_head(n_classes=3, name='head2', - weight_column='weights2') - multi_head = multi_head_lib.multi_head( - [head1, head2], head_weights=[1., 2.]) - - logits = np.array([[-10., 10., 20., -20., 20.], - [-15., 10., -30., 20., -20.]], dtype=np.float32) - labels = { - 'head1': np.array([[1, 0], [1, 1]], dtype=np.int64), - 'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64), - } - weighted_sum_loss, example_weight_sum, _ = multi_head.create_loss( - features={ - 'x': np.array(((42,),), dtype=np.int32), - 'weights1': weights1, - 'weights2': weights2 - }, - mode=model_fn.ModeKeys.TRAIN, - logits=logits, - labels=labels) - tol = 1e-3 - with self.test_session(): - # loss of the first head is [[(10 + 10) / 2], [(15 + 0) / 2]] - # = [10, 7.5] - # weighted_sum_loss = 1 * 10 + 2 * 7.5 = 25 - # loss of the second head is [[(20 + 20 + 20) / 3], [(30 + 0 + 0) / 3]] - # = [20, 10] - # weighted_sum_loss = 2 * 20 + 3 * 10 = 70 - # head-weighted merge = 1 * 25 + 2 * 70 = 165 - self.assertAllClose(165, weighted_sum_loss.eval(), rtol=tol, atol=tol) - # example_weight_sum = 1 * (1 + 2) + 2 * (2 + 3) = 13 - self.assertAllClose(13., example_weight_sum.eval(), rtol=tol, atol=tol) - - def test_train_create_loss_logits_tensor_multi_dim(self): - """Tests create_loss with multi-dimensional logits of shape [2, 2, 5].""" - head1 = head_lib.regression_head(label_dimension=2, name='head1') - head2 = head_lib.regression_head(label_dimension=3, name='head2') - multi_head = multi_head_lib.multi_head([head1, head2]) - - logits = np.array( - [[[-1., 1., 2., -2., 2.], [-1., 1., 2., -2., 2.]], - [[-1.5, 1.5, -2., 2., -2.], [-1.5, 1.5, -2., 2., -2.]]], - dtype=np.float32) - labels = { - 'head1': np.array([[[1., 0.], [1., 0.]], - [[1.5, 1.5], [1.5, 1.5]]], dtype=np.float32), - 'head2': np.array([[[0., 1., 0.], [0., 1., 0.]], - [[2., 2., 0.], [2., 2., 0.]]], dtype=np.float32), - } - # Loss for the first head: - # loss1 = (1+1)^2 + (0-1)^2 + (1+1)^2 + (0-1)^2 + - # (1.5+1.5)^2 + (1.5-1.5)^2 + (1.5+1.5)^2 + (1.5-1.5)^2 - # = 28 - # Loss for the second head: - # loss2 = (0-2)^2 + (1+2)^2 + (0-2)^2 + (0-2)^2 + (1+2)^2 + (0-2)^2 + - # (2+2)^2 + (2-2)^2 + (0+2)^2 + (2+2)^2 + (2-2)^2 + (0+2)^2 - # = 74 - expected_weighted_sum_loss = 28. + 74. - - weighted_sum_loss, example_weight_sum, _ = multi_head.create_loss( - features={}, - mode=model_fn.ModeKeys.TRAIN, - logits=logits, - labels=labels) - tol = 1e-3 - with self.test_session(): - self.assertAllClose( - expected_weighted_sum_loss, weighted_sum_loss.eval(), - rtol=tol, atol=tol) - self.assertAllClose( - 2. * 2. * 5., example_weight_sum.eval(), rtol=tol, atol=tol) - def test_train_one_head(self): head1 = head_lib.multi_label_head(n_classes=2, name='head1') multi_head = multi_head_lib.multi_head([head1]) @@ -511,7 +327,6 @@ class MultiHeadTest(test.TestCase): six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), train_result) _assert_simple_summaries(self, { - metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS + '/head1': expected_loss, # Average loss over examples. metric_keys.MetricKeys.LOSS_MEAN + '/head1': expected_loss / 2, @@ -572,7 +387,6 @@ class MultiHeadTest(test.TestCase): six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), train_result) _assert_simple_summaries(self, { - metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS + '/head1': expected_loss_head1, metric_keys.MetricKeys.LOSS + '/head2': expected_loss_head2, # Average loss over examples. diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py index 0848c5f62f..7005a647db 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py @@ -34,12 +34,10 @@ from tensorflow.python.estimator import util from tensorflow.python.estimator.export import export_output as export_output_lib from tensorflow.python.framework import device as framework_device from tensorflow.python.framework import ops as ops_lib -from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gradients as gradients_lib from tensorflow.python.ops import math_ops -from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib @@ -185,17 +183,10 @@ def _split_batch(features, labels, number_of_shards, device): """Split input features and labes into batches.""" def split_dictionary(dictionary): - """Split a dictionary into shards.""" shards = [{} for _ in range(number_of_shards)] for name, tensor in six.iteritems(dictionary): - if isinstance(tensor, sparse_tensor.SparseTensor): - for i, shard in enumerate( - sparse_ops.sparse_split( - sp_input=tensor, num_split=number_of_shards, axis=0)): - shards[i][name] = shard - else: - for i, shard in enumerate(array_ops.split(tensor, number_of_shards)): - shards[i][name] = shard + for i, shard in enumerate(array_ops.split(tensor, number_of_shards)): + shards[i][name] = shard return shards with ops_lib.name_scope('split_inputs'): @@ -322,17 +313,7 @@ def _call_optimizer_fn(optimizer_fn, params): def _compute_sum_on_device(values, device, name=None): with ops_lib.device(device): - if isinstance(values[0], ops_lib.IndexedSlices): - if name: - raise ValueError('The name {} is not expected to be given to ' - 'IndexedSlices {}'.format(name, values)) - - values_concat = array_ops.concat([v.values for v in values], axis=0) - indices_concat = array_ops.concat([v.indices for v in values], axis=0) - return ops_lib.IndexedSlices(values_concat, indices_concat, - values[0].dense_shape) - else: - return math_ops.add_n(values, name=name) + return math_ops.add_n(values, name=name) def _train_spec(tower_specs, @@ -357,17 +338,25 @@ def _eval_spec(tower_specs, aggregation_device, aggregated_loss_name='loss'): [spec.loss for spec in tower_specs], aggregation_device, aggregated_loss_name) - update_ops = [] + eval_metric_ops_lists = {} for tower_spec in tower_specs: - for name, (_, update_op) in six.iteritems(tower_spec.eval_metric_ops): + metrics = tower_spec.eval_metric_ops or {} + for name, (_, update_op) in six.iteritems(metrics): + update_ops = eval_metric_ops_lists.setdefault(name, ([])) update_ops.append(update_op) - with ops_lib.control_dependencies(update_ops): - reduced_update_op = _reduce_metric_variables(len(tower_specs)) - eval_metric_ops = {} for name, (metric_tensor, _) in six.iteritems(tower_specs[0].eval_metric_ops): + with ops_lib.control_dependencies(eval_metric_ops_lists[name]): + # This operation reduces local variables across all metrics, yet is + # called for every metric. This is redundant and it's done because + # it is hard to know what local variables correspond to what metric. + # Estimator is going to execute all `reduced_update_op`s as part of + # a group inside a single `Session.run()` call, which will avoid duplicate + # computation. + reduced_update_op = _reduce_metric_variables(len(tower_specs)) eval_metric_ops[name] = (metric_tensor, reduced_update_op) + estimator_spec['eval_metric_ops'] = eval_metric_ops return model_fn_lib.EstimatorSpec(**estimator_spec) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py index 21d5a9c327..10b47fba5a 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py @@ -65,35 +65,20 @@ class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase): data = np.linspace( 0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) x_data = data.reshape(batch_size, input_dimension) - categorical_data = np.random.random_integers( - 0, len(x_data), size=len(x_data)) y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1)) train_input_fn = numpy_io.numpy_input_fn( - x={'x': x_data, - 'categories': categorical_data}, + x={'x': x_data}, y=y_data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( - x={'x': x_data, - 'categories': categorical_data}, - y=y_data, - batch_size=batch_size, - shuffle=False) + x={'x': x_data}, y=y_data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( - x={'x': x_data, - 'categories': categorical_data}, - batch_size=batch_size, - shuffle=False) + x={'x': x_data}, batch_size=batch_size, shuffle=False) feature_columns = [ - feature_column.numeric_column('x', shape=(input_dimension,)), - feature_column.indicator_column( - feature_column.categorical_column_with_vocabulary_list( - 'categories', - vocabulary_list=np.linspace( - 0., len(x_data), len(x_data), dtype=np.int64))) + feature_column.numeric_column('x', shape=(input_dimension,)) ] estimator = dnn.DNNClassifier( @@ -105,11 +90,14 @@ class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase): def optimizer_fn(): return optimizers.get_optimizer_instance('Adagrad', learning_rate=0.05) + # TODO(isaprykin): Switch Estimator to use allow_soft_placement=True + # during export_savedmodel and then switch this test to replicate over + # GPUs instead of CPUs. estimator = estimator_lib.Estimator( model_fn=replicate_model_fn.replicate_model_fn( estimator.model_fn, optimizer_fn, - devices=['/gpu:0', '/gpu:1', '/gpu:2']), + devices=['/cpu:0', '/cpu:0', '/cpu:0']), model_dir=estimator.model_dir, config=estimator.config, params=estimator.params) @@ -242,7 +230,6 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): accuracy = session.run(accuracy) auc = session.run(auc) - # loss[i] = features[i] * 10 - labels[i]. # Accuracy is 0.0 (no match) in the first tower. # Accuracy is 1.0 (match) in the second tower, since the feature # times weight "c" happened to be equal to the label. @@ -544,7 +531,8 @@ class EvalSpecTest(test_util.TensorFlowTestCase): self.assertEqual('/device:CPU:0', auc.device) session.run([a, b]) - accuracy, auc = session.run([accuracy, auc]) + accuracy = session.run(accuracy) + auc = session.run(auc) self.assertNear((12 - 2) / 12, accuracy, 0.01) self.assertEqual(0, auc) @@ -873,7 +861,7 @@ class LocalDeviceSetterTest(test_util.TensorFlowTestCase): class ComputeSumWithDevicePlacementTest(test_util.TensorFlowTestCase): - def test_vectors(self): + def test_example(self): with self.test_session() as session: total = replicate_model_fn._compute_sum_on_device( [1.0, 2.0, 3.0, 4.0], device='/device:GPU:0', name='test_sum') @@ -882,68 +870,6 @@ class ComputeSumWithDevicePlacementTest(test_util.TensorFlowTestCase): self.assertEqual('test_sum', total.op.name) self.assertEqual(10.0, session.run(total)) - def test_tensors(self): - with self.test_session() as session: - total = replicate_model_fn._compute_sum_on_device( - [[1.0, 2.0], [3.0, 4.0]], device='/device:GPU:0', name='test_sum') - - self.assertEqual('/device:GPU:0', total.device) - self.assertEqual('test_sum', total.op.name) - self.assertAllEqual([4.0, 6.0], session.run(total)) - - def test_indexedslices(self): - with self.test_session() as session: - a = ops_lib.IndexedSlices( - constant_op.constant([1.0, 2.0]), [0, 1], - dense_shape=constant_op.constant([2])) - b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1]) - - total = replicate_model_fn._compute_sum_on_device( - [a, b], device='/device:GPU:0') - - self.assertEqual('/device:GPU:0', total.device) - self.assertAllEqual([4.0, 6.0], - session.run(ops_lib.convert_to_tensor(total))) - - def test_indexedslices_higher_dimensions(self): - with self.test_session() as session: - a = ops_lib.IndexedSlices( - constant_op.constant([[1.0, 5.0], [2.0, 6.0]]), [0, 1], - dense_shape=constant_op.constant([2, 4])) - b = ops_lib.IndexedSlices( - constant_op.constant([[3.0, 7.0], [4.0, 8.0]]), [0, 1]) - - total = replicate_model_fn._compute_sum_on_device( - [a, b], device='/device:GPU:0') - - self.assertEqual('/device:GPU:0', total.device) - self.assertAllEqual([[4.0, 12.0], [6.0, 14.0]], - session.run(ops_lib.convert_to_tensor(total))) - - def test_indexedslices_some_dont_overlap(self): - with self.test_session() as session: - a = ops_lib.IndexedSlices( - constant_op.constant([1.0, 2.0]), [0, 3], - dense_shape=constant_op.constant([4])) - b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1]) - - total = replicate_model_fn._compute_sum_on_device( - [a, b], device='/device:GPU:0') - - self.assertEqual('/device:GPU:0', total.device) - self.assertAllEqual([4.0, 4.0, 0.0, 2.0], - session.run(ops_lib.convert_to_tensor(total))) - - def test_no_name_for_indexslices(self): - a = ops_lib.IndexedSlices( - constant_op.constant([1.0, 2.0]), [0, 1], - dense_shape=constant_op.constant([2])) - b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1]) - - with self.assertRaisesRegexp(ValueError, ''): - _ = replicate_model_fn._compute_sum_on_device( - [a, b], device='/device:GPU:0', name='cant_name_indexslices') - class ConcatTensorDictsTest(test_util.TensorFlowTestCase): diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py index 0824ecf616..e89993991a 100644 --- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py @@ -76,7 +76,7 @@ class GANEstimator(estimator.Estimator): return logits # Create GAN estimator. - gan_estimator = tfgan.estimator.GANEstimator( + gan_estimator = estimator.GANEstimator( model_dir, generator_fn=generator_fn, discriminator_fn=discriminator_fn, diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py index 7300a7998c..4eabb59b3e 100644 --- a/tensorflow/contrib/kfac/python/ops/layer_collection.py +++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py @@ -448,10 +448,10 @@ class LayerCollection(object): tf.get_variable_scope().reuse. Raises: - ValueError: If reuse == True and name == None. - ValueError: If reuse == True and seed != None. - KeyError: If reuse == True and no existing LossFunction with 'name' found. - KeyError: If reuse == False and existing LossFunction with 'name' found. + ValueError: If reuse=True and name != None. + ValueError: If reuse=True and seed != None. + KeyError: If reuse=True and no existing LossFunction with 'name' found. + KeyError: If reuse=False and existing LossFunction with 'name' found. """ name = name or self._graph.unique_name( "register_categorical_predictive_distribution") diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index ad4a0b302f..dab5a5297c 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -286,7 +286,6 @@ def _fused_batch_norm(inputs, ValueError: If the rank of `inputs` is neither 2 or 4. ValueError: If rank or `C` dimension of `inputs` is undefined. """ - # TODO(reedwm): Add support for fp16 inputs. if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): raise ValueError('data_format has to be either NCHW or NHWC.') with variable_scope.variable_scope( @@ -320,9 +319,10 @@ def _fused_batch_norm(inputs, (inputs.name, params_shape)) # Allocate parameters for the beta and gamma of the normalization. - trainable_beta = trainable and center beta_collections = utils.get_variable_collections(variables_collections, 'beta') + # Float32 required to avoid precision-loss when using fp16 input/output + variable_dtype = dtypes.float32 if not param_initializers: param_initializers = {} if not param_regularizers: @@ -336,13 +336,13 @@ def _fused_batch_norm(inputs, beta = variables.model_variable( 'beta', shape=params_shape, - dtype=dtype, + dtype=variable_dtype, initializer=beta_initializer, regularizer=beta_regularizer, collections=beta_collections, - trainable=trainable_beta) + trainable=trainable) else: - beta = array_ops.constant(0.0, shape=params_shape) + beta = array_ops.constant(0.0, dtype=variable_dtype, shape=params_shape) if scale: gamma_collections = utils.get_variable_collections( @@ -352,13 +352,13 @@ def _fused_batch_norm(inputs, gamma = variables.model_variable( 'gamma', shape=params_shape, - dtype=dtype, + dtype=variable_dtype, initializer=gamma_initializer, regularizer=gamma_regularizer, collections=gamma_collections, trainable=trainable) else: - gamma = array_ops.constant(1.0, shape=params_shape) + gamma = array_ops.constant(1.0, dtype=variable_dtype, shape=params_shape) # Create moving_mean and moving_variance variables and add them to the # appropriate collections. We disable variable partitioning while creating @@ -375,7 +375,7 @@ def _fused_batch_norm(inputs, moving_mean = variables.model_variable( 'moving_mean', shape=params_shape, - dtype=dtype, + dtype=variable_dtype, initializer=moving_mean_initializer, trainable=False, collections=moving_mean_collections) @@ -386,7 +386,7 @@ def _fused_batch_norm(inputs, moving_variance = variables.model_variable( 'moving_variance', shape=params_shape, - dtype=dtype, + dtype=variable_dtype, initializer=moving_variance_initializer, trainable=False, collections=moving_variance_collections) diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index 2837a3172d..7ccd9d8868 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -1766,10 +1766,12 @@ class BatchNormTest(test.TestCase): with self.assertRaisesRegexp(ValueError, 'undefined'): _layers.batch_norm(inputs, data_format='NCHW') - def _testCreateOp(self, fused): + def _testCreateOp(self, fused, dtype=None): + if dtype is None: + dtype = dtypes.float32 height, width = 3, 3 with self.test_session(): - images = np.random.uniform(size=(5, height, width, 3)).astype('f') + images = np.random.uniform(size=(5, height, width, 3)).astype(dtype.as_numpy_dtype) output = _layers.batch_norm(images, fused=fused) expected_name = ('BatchNorm/FusedBatchNorm' if fused else 'BatchNorm/batchnorm') @@ -1784,6 +1786,9 @@ class BatchNormTest(test.TestCase): def testCreateOpFused(self): self._testCreateOp(True) + def testCreateOpFusedFloat16(self): + self._testCreateOp(True, dtypes.float16) + def _testCreateOpBetaRegularizer(self, fused=True): height, width = 3, 3 with self.test_session(): @@ -2651,10 +2656,68 @@ class BatchNormTest(test.TestCase): def testBatchNormBeta(self): # Test case for 11673 with self.test_session() as sess: - a = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10)) - b = _layers.batch_norm(a, center=False, data_format='NCHW', - zero_debias_moving_mean=True) + a_32 = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10)) + b_32 = _layers.batch_norm(a_32, center=False, data_format='NCHW', + zero_debias_moving_mean=True) + a_16 = array_ops.placeholder(dtypes.float16, shape=(10, 10, 10, 10)) + b_16 = _layers.batch_norm(a_16, center=False, data_format='NCHW', + zero_debias_moving_mean=True) + sess.run(variables_lib.global_variables_initializer()) + + def testVariablesAreFloat32(self): + height, width = 3, 3 + with self.test_session(): + images = random_ops.random_uniform((5, height, width, 3), + seed=1, dtype=dtypes.float16) + _layers.batch_norm(images, scale=True) + beta = variables.get_variables_by_name('beta')[0] + gamma = variables.get_variables_by_name('gamma')[0] + self.assertEqual(beta.dtype, dtypes.float32_ref) + self.assertEqual(gamma.dtype, dtypes.float32_ref) + moving_mean = variables.get_variables_by_name('moving_mean')[0] + moving_variance = variables.get_variables_by_name('moving_variance')[0] + self.assertEqual(moving_mean.dtype, dtypes.float32_ref) + self.assertEqual(moving_variance.dtype, dtypes.float32_ref) + + def _runFusedBatchNorm(self, shape, dtype): + channels = shape[1] + images = np.arange(np.product(shape), dtype=dtype).reshape(shape) + beta = init_ops.constant_initializer( + np.arange( + 2, channels + 2, dtype=np.float32)) + gamma = init_ops.constant_initializer( + np.arange( + 10, channels + 10, dtype=np.float32) * 2.0) + mean = init_ops.constant_initializer( + np.arange( + 3, channels + 3, dtype=np.float32) * 5.0) + variance = init_ops.constant_initializer( + np.arange( + 1, channels + 1, dtype=np.float32) * 4.0) + output = _layers.batch_norm( + images, + fused=True, + is_training=True, + scale=True, + epsilon=0.5, + param_initializers={ + 'beta': beta, + 'gamma': gamma, + 'moving_mean': mean, + 'moving_variance': variance, + }, + data_format='NCHW') + with self.test_session(use_gpu=True) as sess: sess.run(variables_lib.global_variables_initializer()) + return sess.run(output) + + def testFusedBatchNormFloat16MatchesFloat32(self): + if test.is_gpu_available(cuda_only=True): + shape = [5, 4, 2, 3] + res_32 = self._runFusedBatchNorm(shape, np.float32) + res_16 = self._runFusedBatchNorm(shape, np.float16) + self.assertAllClose(res_32, res_16, rtol=1e-3) + def testAdjustmentCreated(self): # Tests that the adjustment is appropriately passed to and used by the core diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py index 468d792a0d..bc0e6fc009 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -119,7 +119,7 @@ class Head(object): update_op = tf.contrib.layers.optimize_loss(optimizer=sync, loss=model_fn_ops.loss, ...) hooks = [sync.make_session_run_hook(is_chief)] - ... upate train_op and hooks in ModelFnOps and return + ... update train_op and hooks in ModelFnOps and return ``` """ __metaclass__ = abc.ABCMeta diff --git a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py index 8be9c72adf..44e6c7c52d 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py @@ -23,7 +23,6 @@ import collections import six -from tensorflow.contrib import framework as contrib_framework from tensorflow.contrib.framework import get_graph_from_inputs from tensorflow.contrib.learn.python.learn.estimators import constants from tensorflow.contrib.learn.python.learn.estimators import metric_key @@ -32,6 +31,7 @@ from tensorflow.python.estimator import model_fn as core_model_fn_lib from tensorflow.python.estimator.export import export_output as core_export_lib from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging @@ -156,11 +156,11 @@ class ModelFnOps( else: if isinstance(predictions, dict): predictions = { - k: contrib_framework.convert_to_tensor_or_sparse_tensor(v) + k: sparse_tensor.convert_to_tensor_or_sparse_tensor(v) for k, v in six.iteritems(predictions) } else: - predictions = contrib_framework.convert_to_tensor_or_sparse_tensor( + predictions = sparse_tensor.convert_to_tensor_or_sparse_tensor( predictions) # Validate eval_metric_ops diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py index 4c50d40aaa..db18ebf05d 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py @@ -28,13 +28,14 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging # pylint: disable=g-multiple-import,g-bad-import-order from .pandas_io import HAS_PANDAS, extract_pandas_data, extract_pandas_matrix, extract_pandas_labels from .dask_io import HAS_DASK, extract_dask_data, extract_dask_labels - # pylint: enable=g-multiple-import,g-bad-import-order @@ -365,8 +366,13 @@ class DataFeeder(object): self.random_state = np.random.RandomState( 42) if random_state is None else random_state - num_samples = list(self._x.values())[0].shape[ - 0] if x_is_dict else self._x.shape[0] + if x_is_dict: + num_samples = list(self._x.values())[0].shape[0] + elif tensor_util.is_tensor(self._x): + num_samples = self._x.shape[0].value # shape will be a Dimension, extract an int + else: + num_samples = self._x.shape[0] + if self._shuffle: self.indices = self.random_state.permutation(num_samples) else: diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py index 13f2f0f502..86d8484391 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py @@ -238,10 +238,10 @@ class SdcaModel(object): with name_scope('sdca/prediction'): sparse_variables = self._convert_n_to_tensor(self._variables[ 'sparse_features_weights']) - result = 0.0 + result_sparse = 0.0 for sfc, sv in zip(examples['sparse_features'], sparse_variables): # TODO(sibyl-Aix6ihai): following does not take care of missing features. - result += math_ops.segment_sum( + result_sparse += math_ops.segment_sum( math_ops.multiply( array_ops.gather(sv, sfc.feature_indices), sfc.feature_values), sfc.example_indices) @@ -249,12 +249,13 @@ class SdcaModel(object): dense_variables = self._convert_n_to_tensor(self._variables[ 'dense_features_weights']) + result_dense = 0.0 for i in range(len(dense_variables)): - result += math_ops.matmul(dense_features[i], - array_ops.expand_dims(dense_variables[i], -1)) + result_dense += math_ops.matmul( + dense_features[i], array_ops.expand_dims(dense_variables[i], -1)) # Reshaping to allow shape inference at graph construction time. - return array_ops.reshape(result, [-1]) + return array_ops.reshape(result_dense, [-1]) + result_sparse def predictions(self, examples): """Add operations to compute predictions by the model. diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile index dba1464653..e2e6c05591 100644 --- a/tensorflow/contrib/makefile/Makefile +++ b/tensorflow/contrib/makefile/Makefile @@ -314,7 +314,8 @@ ifeq ($(TARGET),ANDROID) -Wno-narrowing \ -fomit-frame-pointer \ $(MARCH_OPTION) \ --fPIE +-fPIE \ +-fPIC INCLUDES = \ -I$(NDK_ROOT)/sources/android/support/include \ -I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/include \ diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md index 715eb51577..65bd60c12a 100644 --- a/tensorflow/contrib/makefile/README.md +++ b/tensorflow/contrib/makefile/README.md @@ -174,10 +174,26 @@ tensorflow/contrib/makefile/build_all_ios.sh This process will take around twenty minutes on a modern MacBook Pro. -When it completes, you will have a library for a single architecture and the -benchmark program. Although successfully compiling the benchmark program is a +When it completes, you will have a unified library for all architectures +(i386sim, x86_64sim, armv7, armv7s and arm64) and the benchmark program. +Although successfully compiling the benchmark program is a sign of success, the program is not a complete iOS app. +If you would only like to build only one architecture to save time: +(iOS 11+ only supports 64bit so you can get away with arm64) + +```bash +tensorflow/contrib/makefile/build_all_ios.sh -a arm64 +``` + +After the first build if you would like to just build the tensorflow +library you can pass the -T flag to avoid a clean & rebuild. This should +take you just a few seconds to generate the library if you modified one file. + +```bash +tensorflow/contrib/makefile/build_all_ios.sh -a arm64 -T +``` + To see TensorFlow running on iOS, the example Xcode project in [tensorflow/examples/ios](../../examples/ios/) shows how to use the static library in a simple app. @@ -193,19 +209,18 @@ If you have not already, you will need to download dependencies: tensorflow/contrib/makefile/download_dependencies.sh ``` -Next, you will need to compile protobufs for iOS: +Next, you will need to compile protobufs for iOS (optionally takes the -a $ARCH flag): ```bash -tensorflow/contrib/makefile/compile_ios_protobuf.sh +tensorflow/contrib/makefile/compile_ios_protobuf.sh ``` -Then, you will need to compile the nsync library for iOS: +Then, you will need to compile the nsync library for iOS (optionally takes -a $ARCH flag): ```bash export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh` export TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios` ``` - Then, you can run the makefile specifying iOS as the target, along with the architecture you want to build for: @@ -219,10 +234,6 @@ This creates a library in `tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a` that you can link any xcode project against. -At this point, you will have a library for a single architecture and the -benchmark program. Although successfully compiling the benchmark program is a -sign of success, the program is not a complete iOS app. - To see TensorFlow running on iOS, the example Xcode project in [tensorflow/examples/ios](../../examples/ios/) shows how to use the static library in a simple app. @@ -237,6 +248,14 @@ time follow it with: compile_ios_tensorflow.sh ``` +`compile_ios_tensorflow.sh` takes the -a flag to build only for one architecture. +In case you run into issues with unresolved symbols with nsync you can also pass +-h ${HOST_NSYNC_LIB} and -n {TARGET_NSYNC_LIB} so it would look like: + +```bash +tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -h tensorflow/contrib/makefile/downloads/nsync/builds/default.macos.c++11/nsync.a -n tensorflow/contrib/makefile/downloads/nsync/builds/lipo.ios.c++11/nsync.a -a arm64 +``` + In XCode, you will need to use -force_load in the linker flags section of the build settings to pull in the global constructors that are used to register ops and kernels. @@ -249,7 +268,7 @@ debug mode. If you are concerned about performance or are working on a release build, you would likely want a higher optimization setting, like so: ```bash -compile_ios_tensorflow.sh "-Os" +compile_ios_tensorflow.sh -f "-Os" ``` For other variations of valid optimization flags, see [clang optimization levels](http://stackoverflow.com/questions/15548023/clang-optimization-levels). diff --git a/tensorflow/contrib/makefile/build_all_ios.sh b/tensorflow/contrib/makefile/build_all_ios.sh index a49bbe4565..988e12b482 100755 --- a/tensorflow/contrib/makefile/build_all_ios.sh +++ b/tensorflow/contrib/makefile/build_all_ios.sh @@ -23,14 +23,29 @@ if [[ $(uname) != "Darwin" ]]; then exit 1 fi +usage() { + echo "Usage: $(basename "$0") [-a:T]" + echo "-a [build_arch] build only for specified arch x86_64 [default=all]" + echo "-T only build tensorflow (dont download other deps etc)" + exit 1 +} + +while getopts "a:T" opt_name; do + case "$opt_name" in + a) BUILD_ARCH="${OPTARG}";; + T) ONLY_MAKE_TENSORFLOW="true";; + *) usage;; + esac +done +shift $((OPTIND - 1)) + + # Make sure we're in the correct directory, at the root of the source tree. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd ${SCRIPT_DIR}/../../../ - -# Remove any old files first. -make -f tensorflow/contrib/makefile/Makefile clean -rm -rf tensorflow/contrib/makefile/downloads +source "${SCRIPT_DIR}/build_helper.subr" +JOB_COUNT="${JOB_COUNT:-$(get_job_count)}" # Setting a deployment target is required for building with bitcode, # otherwise linking will fail with: @@ -41,20 +56,37 @@ if [[ -n MACOSX_DEPLOYMENT_TARGET ]]; then export MACOSX_DEPLOYMENT_TARGET=$(sw_vers -productVersion) fi -# Pull down the required versions of the frameworks we need. -tensorflow/contrib/makefile/download_dependencies.sh +if [[ "${ONLY_MAKE_TENSORFLOW}" != "true" ]]; then + # Remove any old files first. + make -f tensorflow/contrib/makefile/Makefile clean + rm -rf tensorflow/contrib/makefile/downloads -# Compile protobuf for the target iOS device architectures. -tensorflow/contrib/makefile/compile_ios_protobuf.sh + # Pull down the required versions of the frameworks we need. + tensorflow/contrib/makefile/download_dependencies.sh + + # Compile protobuf for the target iOS device architectures. + tensorflow/contrib/makefile/compile_ios_protobuf.sh +fi # Compile nsync for the target iOS device architectures. # Don't use export var=`something` syntax; it swallows the exit status. HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh` -TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios` +if [[ -z "${BUILD_ARCH}" ]]; then + # No arch specified so build all architectures + TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios` +else + # arch specified so build just that + TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios -a ${BUILD_ARCH}` +fi export HOST_NSYNC_LIB TARGET_NSYNC_LIB -# Build the iOS TensorFlow libraries. -tensorflow/contrib/makefile/compile_ios_tensorflow.sh "-O3" +if [[ -z "${BUILD_ARCH}" ]]; then + # build the ios tensorflow libraries. + tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -h $HOST_NSYNC_LIB -n $TARGET_NSYNC_LIB +else + # arch specified so build just that + tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -a "${BUILD_ARCH}" -h $HOST_NSYNC_LIB -n $TARGET_NSYNC_LIB +fi # Creates a static universal library in # tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a diff --git a/tensorflow/contrib/makefile/compile_ios_protobuf.sh b/tensorflow/contrib/makefile/compile_ios_protobuf.sh index 4056db18a7..43e5809dd2 100755 --- a/tensorflow/contrib/makefile/compile_ios_protobuf.sh +++ b/tensorflow/contrib/makefile/compile_ios_protobuf.sh @@ -21,10 +21,28 @@ if [[ -n MACOSX_DEPLOYMENT_TARGET ]]; then export MACOSX_DEPLOYMENT_TARGET=$(sw_vers -productVersion) fi -SCRIPT_DIR=$(dirname $0) +usage() { + echo "Usage: $(basename "$0") [-a]" + echo "-a [build_arch] build for specified arch comma separate for multiple archs (eg: x86_64,arm64)" + echo "default arch i386, x86_64, armv7, armv7s, arm64" + exit 1 +} + +BUILD_TARGET="i386 x86_64 armv7 armv7s arm64" +while getopts "a:" opt_name; do + case "$opt_name" in + a) BUILD_TARGET="${OPTARG}";; + *) usage;; + esac +done +shift $((OPTIND - 1)) + +IFS=' ' read -r -a build_targets <<< "${BUILD_TARGET}" + +SCRIPT_DIR=$(cd `dirname $0` && pwd) source "${SCRIPT_DIR}/build_helper.subr" -cd tensorflow/contrib/makefile +cd ${SCRIPT_DIR} HOST_GENDIR="$(pwd)/gen/protobuf-host" mkdir -p "${HOST_GENDIR}" @@ -64,6 +82,10 @@ else echo "protoc found. Skip building host tools." fi +# Remove old libs +rm -f ${LIBDIR}/libprotobuf.a +rm -f ${LIBDIR}/libprotobuf-lite.a + ./autogen.sh if [ $? -ne 0 ] then @@ -71,157 +93,192 @@ then exit 1 fi -make distclean -./configure \ ---host=i386-apple-${OSX_VERSION} \ ---disable-shared \ ---enable-cross-compile \ ---with-protoc="${PROTOC_PATH}" \ ---prefix=${LIBDIR}/iossim_386 \ ---exec-prefix=${LIBDIR}/iossim_386 \ -"CFLAGS=${CFLAGS} \ --mios-simulator-version-min=${MIN_SDK_VERSION} \ --arch i386 \ --fembed-bitcode \ --isysroot ${IPHONESIMULATOR_SYSROOT}" \ -"CXX=${CXX}" \ -"CXXFLAGS=${CXXFLAGS} \ --mios-simulator-version-min=${MIN_SDK_VERSION} \ --arch i386 \ --fembed-bitcode \ --isysroot \ -${IPHONESIMULATOR_SYSROOT}" \ -LDFLAGS="-arch i386 \ --fembed-bitcode \ --mios-simulator-version-min=${MIN_SDK_VERSION} \ -${LDFLAGS} \ --L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \ --L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \ -"LIBS=${LIBS}" -make -j"${JOB_COUNT}" -make install - -make distclean -./configure \ ---host=x86_64-apple-${OSX_VERSION} \ ---disable-shared \ ---enable-cross-compile \ ---with-protoc="${PROTOC_PATH}" \ ---prefix=${LIBDIR}/iossim_x86_64 \ ---exec-prefix=${LIBDIR}/iossim_x86_64 \ -"CFLAGS=${CFLAGS} \ --mios-simulator-version-min=${MIN_SDK_VERSION} \ --arch x86_64 \ --fembed-bitcode \ --isysroot ${IPHONESIMULATOR_SYSROOT}" \ -"CXX=${CXX}" \ -"CXXFLAGS=${CXXFLAGS} \ --mios-simulator-version-min=${MIN_SDK_VERSION} \ --arch x86_64 \ --fembed-bitcode \ --isysroot \ -${IPHONESIMULATOR_SYSROOT}" \ -LDFLAGS="-arch x86_64 \ --fembed-bitcode \ --mios-simulator-version-min=${MIN_SDK_VERSION} \ -${LDFLAGS} \ --L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \ --L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \ -"LIBS=${LIBS}" -make -j"${JOB_COUNT}" -make install - -make distclean -./configure \ ---host=armv7-apple-${OSX_VERSION} \ ---with-protoc="${PROTOC_PATH}" \ ---disable-shared \ ---prefix=${LIBDIR}/ios_arm7 \ ---exec-prefix=${LIBDIR}/ios_arm7 \ -"CFLAGS=${CFLAGS} \ --miphoneos-version-min=${MIN_SDK_VERSION} \ --arch armv7 \ --fembed-bitcode \ --isysroot ${IPHONEOS_SYSROOT}" \ -"CXX=${CXX}" \ -"CXXFLAGS=${CXXFLAGS} \ --miphoneos-version-min=${MIN_SDK_VERSION} \ --arch armv7 \ --fembed-bitcode \ --isysroot ${IPHONEOS_SYSROOT}" \ -LDFLAGS="-arch armv7 \ --fembed-bitcode \ --miphoneos-version-min=${MIN_SDK_VERSION} \ -${LDFLAGS}" \ -"LIBS=${LIBS}" -make -j"${JOB_COUNT}" -make install - -make distclean -./configure \ ---host=armv7s-apple-${OSX_VERSION} \ ---with-protoc="${PROTOC_PATH}" \ ---disable-shared \ ---prefix=${LIBDIR}/ios_arm7s \ ---exec-prefix=${LIBDIR}/ios_arm7s \ -"CFLAGS=${CFLAGS} \ --miphoneos-version-min=${MIN_SDK_VERSION} \ --arch armv7s \ --fembed-bitcode \ --isysroot ${IPHONEOS_SYSROOT}" \ -"CXX=${CXX}" \ -"CXXFLAGS=${CXXFLAGS} \ --miphoneos-version-min=${MIN_SDK_VERSION} \ --arch armv7s \ --fembed-bitcode \ --isysroot ${IPHONEOS_SYSROOT}" \ -LDFLAGS="-arch armv7s \ --fembed-bitcode \ --miphoneos-version-min=${MIN_SDK_VERSION} \ -${LDFLAGS}" \ -"LIBS=${LIBS}" -make -j"${JOB_COUNT}" -make install - -make distclean -./configure \ ---host=arm \ ---with-protoc="${PROTOC_PATH}" \ ---disable-shared \ ---prefix=${LIBDIR}/ios_arm64 \ ---exec-prefix=${LIBDIR}/ios_arm64 \ -"CFLAGS=${CFLAGS} \ --miphoneos-version-min=${MIN_SDK_VERSION} \ --arch arm64 \ --fembed-bitcode \ --isysroot ${IPHONEOS_SYSROOT}" \ -"CXXFLAGS=${CXXFLAGS} \ --miphoneos-version-min=${MIN_SDK_VERSION} \ --arch arm64 \ --fembed-bitcode \ --isysroot ${IPHONEOS_SYSROOT}" \ -LDFLAGS="-arch arm64 \ --fembed-bitcode \ --miphoneos-version-min=${MIN_SDK_VERSION} \ -${LDFLAGS}" \ -"LIBS=${LIBS}" -make -j"${JOB_COUNT}" -make install - -lipo \ -${LIBDIR}/iossim_386/lib/libprotobuf.a \ -${LIBDIR}/iossim_x86_64/lib/libprotobuf.a \ -${LIBDIR}/ios_arm7/lib/libprotobuf.a \ -${LIBDIR}/ios_arm7s/lib/libprotobuf.a \ -${LIBDIR}/ios_arm64/lib/libprotobuf.a \ --create \ --output ${LIBDIR}/libprotobuf.a - -lipo \ -${LIBDIR}/iossim_386/lib/libprotobuf-lite.a \ -${LIBDIR}/iossim_x86_64/lib/libprotobuf-lite.a \ -${LIBDIR}/ios_arm7/lib/libprotobuf-lite.a \ -${LIBDIR}/ios_arm7s/lib/libprotobuf-lite.a \ -${LIBDIR}/ios_arm64/lib/libprotobuf-lite.a \ --create \ --output ${LIBDIR}/libprotobuf-lite.a +package_pb_library() { + pb_libs="${LIBDIR}/${1}/lib/libprotobuf.a" + if [ -f "${LIBDIR}/libprotobuf.a" ]; then + pb_libs="$pb_libs ${LIBDIR}/libprotobuf.a" + fi + lipo \ + $pb_libs \ + -create \ + -output ${LIBDIR}/libprotobuf.a + + pblite_libs="${LIBDIR}/${1}/lib/libprotobuf-lite.a" + if [ -f "${LIBDIR}/libprotobuf-lite.a" ]; then + pblite_libs="$pblite_libs ${LIBDIR}/libprotobuf-lite.a" + fi + lipo \ + $pblite_libs \ + -create \ + -output ${LIBDIR}/libprotobuf-lite.a +} + +build_target() { +case "$1" in + i386) make distclean + ./configure \ + --host=i386-apple-${OSX_VERSION} \ + --disable-shared \ + --enable-cross-compile \ + --with-protoc="${PROTOC_PATH}" \ + --prefix=${LIBDIR}/iossim_386 \ + --exec-prefix=${LIBDIR}/iossim_386 \ + "CFLAGS=${CFLAGS} \ + -mios-simulator-version-min=${MIN_SDK_VERSION} \ + -arch i386 \ + -fembed-bitcode \ + -isysroot ${IPHONESIMULATOR_SYSROOT}" \ + "CXX=${CXX}" \ + "CXXFLAGS=${CXXFLAGS} \ + -mios-simulator-version-min=${MIN_SDK_VERSION} \ + -arch i386 \ + -fembed-bitcode \ + -isysroot \ + ${IPHONESIMULATOR_SYSROOT}" \ + LDFLAGS="-arch i386 \ + -fembed-bitcode \ + -mios-simulator-version-min=${MIN_SDK_VERSION} \ + ${LDFLAGS} \ + -L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \ + -L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \ + "LIBS=${LIBS}" + make -j"${JOB_COUNT}" + make install + + package_pb_library "iossim_386" + ;; + + x86_64) make distclean + ./configure \ + --host=x86_64-apple-${OSX_VERSION} \ + --disable-shared \ + --enable-cross-compile \ + --with-protoc="${PROTOC_PATH}" \ + --prefix=${LIBDIR}/iossim_x86_64 \ + --exec-prefix=${LIBDIR}/iossim_x86_64 \ + "CFLAGS=${CFLAGS} \ + -mios-simulator-version-min=${MIN_SDK_VERSION} \ + -arch x86_64 \ + -fembed-bitcode \ + -isysroot ${IPHONESIMULATOR_SYSROOT}" \ + "CXX=${CXX}" \ + "CXXFLAGS=${CXXFLAGS} \ + -mios-simulator-version-min=${MIN_SDK_VERSION} \ + -arch x86_64 \ + -fembed-bitcode \ + -isysroot \ + ${IPHONESIMULATOR_SYSROOT}" \ + LDFLAGS="-arch x86_64 \ + -fembed-bitcode \ + -mios-simulator-version-min=${MIN_SDK_VERSION} \ + ${LDFLAGS} \ + -L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \ + -L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \ + "LIBS=${LIBS}" + make -j"${JOB_COUNT}" + make install + + package_pb_library "iossim_x86_64" + ;; + + armv7) make distclean + ./configure \ + --host=armv7-apple-${OSX_VERSION} \ + --with-protoc="${PROTOC_PATH}" \ + --disable-shared \ + --prefix=${LIBDIR}/ios_arm7 \ + --exec-prefix=${LIBDIR}/ios_arm7 \ + "CFLAGS=${CFLAGS} \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + -arch armv7 \ + -fembed-bitcode \ + -isysroot ${IPHONEOS_SYSROOT}" \ + "CXX=${CXX}" \ + "CXXFLAGS=${CXXFLAGS} \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + -arch armv7 \ + -fembed-bitcode \ + -isysroot ${IPHONEOS_SYSROOT}" \ + LDFLAGS="-arch armv7 \ + -fembed-bitcode \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + ${LDFLAGS}" \ + "LIBS=${LIBS}" + make -j"${JOB_COUNT}" + make install + + package_pb_library "ios_arm7" + ;; + + armv7s) make distclean + ./configure \ + --host=armv7s-apple-${OSX_VERSION} \ + --with-protoc="${PROTOC_PATH}" \ + --disable-shared \ + --prefix=${LIBDIR}/ios_arm7s \ + --exec-prefix=${LIBDIR}/ios_arm7s \ + "CFLAGS=${CFLAGS} \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + -arch armv7s \ + -fembed-bitcode \ + -isysroot ${IPHONEOS_SYSROOT}" \ + "CXX=${CXX}" \ + "CXXFLAGS=${CXXFLAGS} \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + -arch armv7s \ + -fembed-bitcode \ + -isysroot ${IPHONEOS_SYSROOT}" \ + LDFLAGS="-arch armv7s \ + -fembed-bitcode \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + ${LDFLAGS}" \ + "LIBS=${LIBS}" + make -j"${JOB_COUNT}" + make install + + package_pb_library "ios_arm7s" + ;; + + arm64) make distclean + ./configure \ + --host=arm \ + --with-protoc="${PROTOC_PATH}" \ + --disable-shared \ + --prefix=${LIBDIR}/ios_arm64 \ + --exec-prefix=${LIBDIR}/ios_arm64 \ + "CFLAGS=${CFLAGS} \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + -arch arm64 \ + -fembed-bitcode \ + -isysroot ${IPHONEOS_SYSROOT}" \ + "CXXFLAGS=${CXXFLAGS} \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + -arch arm64 \ + -fembed-bitcode \ + -isysroot ${IPHONEOS_SYSROOT}" \ + LDFLAGS="-arch arm64 \ + -fembed-bitcode \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + ${LDFLAGS}" \ + "LIBS=${LIBS}" + make -j"${JOB_COUNT}" + make install + + package_pb_library "ios_arm64" + ;; + *) + echo "Unknown ARCH" + exit 1 + ;; +esac +} + +for build_element in "${build_targets[@]}" +do + echo "$build_element" + build_target "$build_element" +done + +file ${LIBDIR}/libprotobuf.a +file ${LIBDIR}/libprotobuf-lite.a +echo "Done building and packaging the libraries" diff --git a/tensorflow/contrib/makefile/compile_ios_tensorflow.sh b/tensorflow/contrib/makefile/compile_ios_tensorflow.sh index 5d1cc8b375..ae82163e11 100755 --- a/tensorflow/contrib/makefile/compile_ios_tensorflow.sh +++ b/tensorflow/contrib/makefile/compile_ios_tensorflow.sh @@ -43,55 +43,124 @@ then exit 1 fi +usage() { + echo "Usage: $(basename "$0") [-a]" + echo "-a [build_arch] build for specified arch comma separate for multiple archs (eg: x86_64,arm64)" + echo "default is [i386, x86_64, armv7, armv7s, arm64]" + exit 1 +} + +BUILD_TARGET="i386 x86_64 armv7 armv7s arm64" +while getopts "a:f:h:n:" opt_name; do + case "$opt_name" in + a) BUILD_TARGET="${OPTARG}";; + f) BUILD_OPT="${OPTARG}";; + h) NSYNC_HOST="${OPTARG}";; + n) NSYNC_TARGET="${OPTARG}";; + *) usage;; + esac +done +shift $((OPTIND - 1)) + +IFS=' ' read -r -a build_targets <<< "${BUILD_TARGET}" + +SCRIPT_DIR=$(cd `dirname $0` && pwd) +source "${SCRIPT_DIR}/build_helper.subr" + + GENDIR=tensorflow/contrib/makefile/gen/ LIBDIR=${GENDIR}lib LIB_PREFIX=libtensorflow-core -make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ -TARGET=IOS IOS_ARCH=ARMV7 LIB_NAME=${LIB_PREFIX}-armv7.a OPTFLAGS="$1" -if [ $? -ne 0 ] -then - echo "armv7 compilation failed." - exit 1 -fi +#remove any old artifacts +rm -rf ${LIBDIR}/${LIB_PREFIX}.a -make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ -TARGET=IOS IOS_ARCH=ARMV7S LIB_NAME=${LIB_PREFIX}-armv7s.a OPTFLAGS="$1" -if [ $? -ne 0 ] -then - echo "arm7vs compilation failed." - exit 1 -fi +package_tf_library() { + CAP_DIR=`echo $1 | tr 'a-z' 'A-Z'` + tf_libs="${LIBDIR}/ios_${CAP_DIR}/${LIB_PREFIX}-${1}.a" + if [ -f "${LIBDIR}/${LIB_PREFIX}.a" ]; then + tf_libs="$tf_libs ${LIBDIR}/${LIB_PREFIX}.a" + fi + lipo \ + $tf_libs \ + -create \ + -output ${LIBDIR}/${LIB_PREFIX}.a +} -make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ -TARGET=IOS IOS_ARCH=ARM64 LIB_NAME=${LIB_PREFIX}-arm64.a OPTFLAGS="$1" -if [ $? -ne 0 ] -then - echo "arm64 compilation failed." - exit 1 -fi +build_tf_target() { +case "$1" in + armv7) + make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ + TARGET=IOS IOS_ARCH=ARMV7 LIB_NAME=${LIB_PREFIX}-armv7.a \ + OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \ + TARGET_NSYNC_LIB="${NSYNC_TARGET}" + if [ $? -ne 0 ] + then + echo "armv7 compilation failed." + exit 1 + fi + package_tf_library "armv7" + ;; + armv7s) + make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ + TARGET=IOS IOS_ARCH=ARMV7S LIB_NAME=${LIB_PREFIX}-armv7s.a \ + OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \ + TARGET_NSYNC_LIB="${NSYNC_TARGET}" -make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ -TARGET=IOS IOS_ARCH=I386 LIB_NAME=${LIB_PREFIX}-i386.a OPTFLAGS="$1" -if [ $? -ne 0 ] -then - echo "i386 compilation failed." - exit 1 -fi + if [ $? -ne 0 ] + then + echo "arm7vs compilation failed." + exit 1 + fi + package_tf_library "armv7s" + ;; + arm64) + make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ + TARGET=IOS IOS_ARCH=ARM64 LIB_NAME=${LIB_PREFIX}-arm64.a \ + OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \ + TARGET_NSYNC_LIB="${NSYNC_TARGET}" + if [ $? -ne 0 ] + then + echo "arm64 compilation failed." + exit 1 + fi + package_tf_library "arm64" + ;; + i386) + make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ + TARGET=IOS IOS_ARCH=I386 LIB_NAME=${LIB_PREFIX}-i386.a \ + OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \ + TARGET_NSYNC_LIB="${NSYNC_TARGET}" + if [ $? -ne 0 ] + then + echo "i386 compilation failed." + exit 1 + fi + package_tf_library "i386" + ;; + x86_64) + make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ + TARGET=IOS IOS_ARCH=X86_64 LIB_NAME=${LIB_PREFIX}-x86_64.a \ + OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \ + TARGET_NSYNC_LIB="${NSYNC_TARGET}" + if [ $? -ne 0 ] + then + echo "x86_64 compilation failed." + exit 1 + fi + package_tf_library "x86_64" + ;; + *) + echo "Unknown ARCH" + exit 1 +esac +} -make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ -TARGET=IOS IOS_ARCH=X86_64 LIB_NAME=${LIB_PREFIX}-x86_64.a OPTFLAGS="$1" -if [ $? -ne 0 ] -then - echo "x86_64 compilation failed." - exit 1 -fi +for build_tf_element in "${build_targets[@]}" +do + echo "$build_tf_element" + build_tf_target "$build_tf_element" +done -lipo \ -${LIBDIR}/ios_ARMV7/${LIB_PREFIX}-armv7.a \ -${LIBDIR}/ios_ARMV7S/${LIB_PREFIX}-armv7s.a \ -${LIBDIR}/ios_ARM64/${LIB_PREFIX}-arm64.a \ -${LIBDIR}/ios_I386/${LIB_PREFIX}-i386.a \ -${LIBDIR}/ios_X86_64/${LIB_PREFIX}-x86_64.a \ --create \ --output ${LIBDIR}/${LIB_PREFIX}.a +echo "Done building and packaging TF" +file ${LIBDIR}/${LIB_PREFIX}.a diff --git a/tensorflow/contrib/makefile/compile_nsync.sh b/tensorflow/contrib/makefile/compile_nsync.sh index ecbd9bb825..930e6b8dea 100755 --- a/tensorflow/contrib/makefile/compile_nsync.sh +++ b/tensorflow/contrib/makefile/compile_nsync.sh @@ -265,7 +265,7 @@ for arch in $archs; do -I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/libs/'"$arch"'/include \ -I../../platform/c++11 -I../../platform/gcc \ -I../../platform/posix -pthread - PLATFORM_CFLAGS=-std=c++11 -Wno-narrowing '"$march_option"' -fPIE + PLATFORM_CFLAGS=-std=c++11 -Wno-narrowing '"$march_option"' -fPIE -fPIC PLATFORM_LDFLAGS=-pthread MKDEP=${CC} -M -std=c++11 PLATFORM_C=../../platform/c++11/src/nsync_semaphore_mutex.cc \ @@ -301,6 +301,9 @@ done case "$target_platform" in ios) nsync_platform_dir="$nsync_builds_dir/lipo.$target_platform.c++11" + if [ -d "$nsync_platform_dir" ]; then + rm -rf "$nsync_platform_dir" + fi mkdir "$nsync_platform_dir" eval lipo $platform_libs -create -output '$nsync_platform_dir/nsync.a' echo "$nsync_platform_dir/nsync.a" diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt index 5f06106c1d..8b77c99cb5 100644 --- a/tensorflow/contrib/makefile/tf_op_files.txt +++ b/tensorflow/contrib/makefile/tf_op_files.txt @@ -8,7 +8,6 @@ tensorflow/core/kernels/xent_op.cc tensorflow/core/kernels/where_op.cc tensorflow/core/kernels/variable_ops.cc tensorflow/core/kernels/unpack_op.cc -tensorflow/core/kernels/unique_op.cc tensorflow/core/kernels/transpose_op.cc tensorflow/core/kernels/transpose_functor_cpu.cc tensorflow/core/kernels/training_op_helpers.cc @@ -42,9 +41,6 @@ tensorflow/core/kernels/spectrogram_op.cc tensorflow/core/kernels/spectrogram.cc tensorflow/core/kernels/sparse_to_dense_op.cc tensorflow/core/kernels/sparse_matmul_op.cc -tensorflow/core/kernels/sparse_fill_empty_rows_op.cc -tensorflow/core/kernels/sparse_reshape_op.c -tensorflow/core/kernels/segment_reduction_ops.cc tensorflow/core/kernels/softsign_op.cc tensorflow/core/kernels/softplus_op.cc tensorflow/core/kernels/softmax_op.cc @@ -113,10 +109,6 @@ tensorflow/core/kernels/maxpooling_op.cc tensorflow/core/kernels/matmul_op.cc tensorflow/core/kernels/lrn_op.cc tensorflow/core/kernels/logging_ops.cc -tensorflow/core/kernels/initializable_lookup_table.c -tensorflow/core/kernels/lookup_table_init_op.cc -tensorflow/core/kernels/lookup_table_op.cc -tensorflow/core/kernels/lookup_util.cc tensorflow/core/kernels/inplace_ops.cc tensorflow/core/kernels/in_topk_op.cc tensorflow/core/kernels/immutable_constant_op.cc @@ -124,18 +116,10 @@ tensorflow/core/kernels/identity_op.cc tensorflow/core/kernels/identity_n_op.cc tensorflow/core/kernels/gather_op.cc tensorflow/core/kernels/gather_functor.cc -tensorflow/core/kernels/gather_nd_op.cc -tensorflow/core/kernels/gather_nd_op_cpu_impl_0.cc -tensorflow/core/kernels/gather_nd_op_cpu_impl_1.cc -tensorflow/core/kernels/gather_nd_op_cpu_impl_2.cc -tensorflow/core/kernels/gather_nd_op_cpu_impl_3.cc -tensorflow/core/kernels/gather_nd_op_cpu_impl_4.cc -tensorflow/core/kernels/gather_nd_op_cpu_impl_5.cc tensorflow/core/kernels/fused_batch_norm_op.cc tensorflow/core/kernels/function_ops.cc tensorflow/core/kernels/fill_functor.cc tensorflow/core/kernels/fifo_queue.cc -tensorflow/core/kernels/fifo_queue_op.cc tensorflow/core/kernels/fake_quant_ops.cc tensorflow/core/kernels/example_parsing_ops.cc tensorflow/core/kernels/encode_wav_op.cc @@ -182,8 +166,6 @@ tensorflow/core/kernels/cwise_op_floor.cc tensorflow/core/kernels/cwise_op_exp.cc tensorflow/core/kernels/cwise_op_equal_to_2.cc tensorflow/core/kernels/cwise_op_equal_to_1.cc -tensorflow/core/kernels/cwise_op_not_equal_to_2.cc -tensorflow/core/kernels/cwise_op_not_equal_to_1.cc tensorflow/core/kernels/cwise_op_div.cc tensorflow/core/kernels/cwise_op_bitwise_xor.cc tensorflow/core/kernels/cwise_op_bitwise_or.cc diff --git a/tensorflow/contrib/metrics/__init__.py b/tensorflow/contrib/metrics/__init__.py index 8eed45c4b3..302042c4dd 100644 --- a/tensorflow/contrib/metrics/__init__.py +++ b/tensorflow/contrib/metrics/__init__.py @@ -27,7 +27,6 @@ See the @{$python/contrib.metrics} guide. @@streaming_false_negative_rate @@streaming_false_negative_rate_at_thresholds @@streaming_auc -@@streaming_dynamic_auc @@streaming_curve_points @@streaming_recall_at_k @@streaming_mean_absolute_error @@ -89,7 +88,6 @@ from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_auc from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_concat from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_covariance from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_curve_points -from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_dynamic_auc from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_false_negative_rate from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_false_negative_rate_at_thresholds from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_false_negatives diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index 24692ff12f..3dd1f1a627 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -1178,154 +1178,6 @@ def streaming_auc(predictions, name=name) -def _compute_dynamic_auc(labels, predictions, curve='ROC'): - """Computes the apporixmate AUC by a Riemann sum with data-derived thresholds. - - Computes the area under the ROC or PR curve using each prediction as a - threshold. This could be slow for large batches, but has the advantage of not - having its results degrade depending on the distribution of predictions. - - Args: - labels: A `Tensor` of ground truth labels with the same shape as - `predictions` with values of 0 or 1 and type `int64`. - predictions: A 1-D `Tensor` of predictions whose values are `float64`. - curve: The name of the curve to be computed, 'ROC' for the Receiving - Operating Characteristic or 'PR' for the Precision-Recall curve. - - Returns: - A scalar `Tensor` containing the area-under-curve value for the input. - """ - # Count the total number of positive and negative labels in the input. - size = array_ops.size(predictions) - total_positive = math_ops.cast(math_ops.reduce_sum(labels), dtypes.int32) - - def continue_computing_dynamic_auc(): - """Continues dynamic auc computation, entered if labels are not all equal. - - Returns: - A scalar `Tensor` containing the area-under-curve value. - """ - # Sort the predictions descending, and the corresponding labels as well. - ordered_predictions, indices = nn.top_k(predictions, k=size) - ordered_labels = array_ops.gather(labels, indices) - - # Get the counts of the unique ordered predictions. - _, _, counts = array_ops.unique_with_counts(ordered_predictions) - - # Compute the indices of the split points between different predictions. - splits = math_ops.cast( - array_ops.pad(math_ops.cumsum(counts), paddings=[[1, 0]]), dtypes.int32) - - # Count the positives to the left of the split indices. - positives = math_ops.cast( - array_ops.pad(math_ops.cumsum(ordered_labels), paddings=[[1, 0]]), - dtypes.int32) - true_positives = array_ops.gather(positives, splits) - if curve == 'ROC': - # Count the negatives to the left of every split point and the total - # number of negatives for computing the FPR. - false_positives = math_ops.subtract(splits, true_positives) - total_negative = size - total_positive - x_axis_values = math_ops.truediv(false_positives, total_negative) - y_axis_values = math_ops.truediv(true_positives, total_positive) - elif curve == 'PR': - x_axis_values = math_ops.truediv(true_positives, total_positive) - # For conformance, set precision to 1 when the number of positive - # classifications is 0. - y_axis_values = array_ops.where( - math_ops.greater(splits, 0), - math_ops.truediv(true_positives, splits), - array_ops.ones_like(true_positives, dtype=dtypes.float64)) - - # Calculate trapezoid areas. - heights = math_ops.add(y_axis_values[1:], y_axis_values[:-1]) / 2.0 - widths = math_ops.abs( - math_ops.subtract(x_axis_values[1:], x_axis_values[:-1])) - return math_ops.reduce_sum(math_ops.multiply(heights, widths)) - - # If all the labels are the same, AUC isn't well-defined (but raising an - # exception seems excessive) so we return 0, otherwise we finish computing. - return control_flow_ops.cond( - math_ops.logical_or( - math_ops.equal(total_positive, 0), - math_ops.equal(total_positive, size) - ), - true_fn=lambda: array_ops.constant(0, dtypes.float64), - false_fn=continue_computing_dynamic_auc) - - -def streaming_dynamic_auc(labels, - predictions, - curve='ROC', - metrics_collections=(), - updates_collections=(), - name=None): - """Computes the apporixmate AUC by a Riemann sum with data-derived thresholds. - - USAGE NOTE: this approach requires storing all of the predictions and labels - for a single evaluation in memory, so it may not be usable when the evaluation - batch size and/or the number of evaluation steps is very large. - - Computes the area under the ROC or PR curve using each prediction as a - threshold. This has the advantage of being resilient to the distribution of - predictions by aggregating across batches, accumulating labels and predictions - and performing the final calculation using all of the concatenated values. - - Args: - labels: A `Tensor` of ground truth labels with the same shape as `labels` - and with values of 0 or 1 whose values are castable to `int64`. - predictions: A `Tensor` of predictions whose values are castable to - `float64`. Will be flattened into a 1-D `Tensor`. - curve: The name of the curve for which to compute AUC, 'ROC' for the - Receiving Operating Characteristic or 'PR' for the Precision-Recall curve. - metrics_collections: An optional iterable of collections that `auc` should - be added to. - updates_collections: An optional iterable of collections that `update_op` - should be added to. - name: An optional name for the variable_scope that contains the metric - variables. - - Returns: - auc: A scalar `Tensor` containing the current area-under-curve value. - update_op: An operation that concatenates the input labels and predictions - to the accumulated values. - - Raises: - ValueError: If `labels` and `predictions` have mismatched shapes or if - `curve` isn't a recognized curve type. - """ - - if curve not in ['PR', 'ROC']: - raise ValueError('curve must be either ROC or PR, %s unknown' % curve) - - with variable_scope.variable_scope(name, default_name='dynamic_auc'): - labels.get_shape().assert_is_compatible_with(predictions.get_shape()) - predictions = array_ops.reshape( - math_ops.cast(predictions, dtypes.float64), [-1]) - labels = array_ops.reshape(math_ops.cast(labels, dtypes.int64), [-1]) - with ops.control_dependencies([ - check_ops.assert_greater_equal( - labels, - array_ops.zeros_like(labels, dtypes.int64), - message='labels must be 0 or 1, at least one is <0'), - check_ops.assert_less_equal( - labels, - array_ops.ones_like(labels, dtypes.int64), - message='labels must be 0 or 1, at least one is >1') - ]): - preds_accum, update_preds = streaming_concat(predictions, - name='concat_preds') - labels_accum, update_labels = streaming_concat(labels, - name='concat_labels') - update_op = control_flow_ops.group(update_labels, update_preds) - auc = _compute_dynamic_auc(labels_accum, preds_accum, curve=curve) - if updates_collections: - ops.add_to_collections(updates_collections, update_op) - if metrics_collections: - ops.add_to_collections(metrics_collections, auc) - return auc, update_op - - def streaming_precision_recall_at_equal_thresholds(predictions, labels, num_thresholds=None, @@ -3433,7 +3285,6 @@ __all__ = [ 'streaming_accuracy', 'streaming_auc', 'streaming_curve_points', - 'streaming_dynamic_auc', 'streaming_false_negative_rate', 'streaming_false_negative_rate_at_thresholds', 'streaming_false_negatives', diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py index 5d0463e1f7..6a8e58b4da 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py @@ -1708,34 +1708,6 @@ class StreamingCurvePointsTest(test.TestCase): [[1.0, 4.0 / 6.0], [0.75, 1.0], [0.0, 1.0]]) -def _np_auc(predictions, labels, weights=None): - """Computes the AUC explicitly using Numpy. - - Args: - predictions: an ndarray with shape [N]. - labels: an ndarray with shape [N]. - weights: an ndarray with shape [N]. - - Returns: - the area under the ROC curve. - """ - if weights is None: - weights = np.ones(np.size(predictions)) - is_positive = labels > 0 - num_positives = np.sum(weights[is_positive]) - num_negatives = np.sum(weights[~is_positive]) - - # Sort descending: - inds = np.argsort(-predictions) - - sorted_labels = labels[inds] - sorted_weights = weights[inds] - is_positive = sorted_labels > 0 - - tp = np.cumsum(sorted_weights * is_positive) / num_positives - return np.sum((sorted_weights * tp)[~is_positive]) / num_negatives - - class StreamingAUCTest(test.TestCase): def setUp(self): @@ -1924,6 +1896,33 @@ class StreamingAUCTest(test.TestCase): self.assertAlmostEqual(1, auc.eval(), 6) + def np_auc(self, predictions, labels, weights): + """Computes the AUC explicitly using Numpy. + + Args: + predictions: an ndarray with shape [N]. + labels: an ndarray with shape [N]. + weights: an ndarray with shape [N]. + + Returns: + the area under the ROC curve. + """ + if weights is None: + weights = np.ones(np.size(predictions)) + is_positive = labels > 0 + num_positives = np.sum(weights[is_positive]) + num_negatives = np.sum(weights[~is_positive]) + + # Sort descending: + inds = np.argsort(-predictions) + + sorted_labels = labels[inds] + sorted_weights = weights[inds] + is_positive = sorted_labels > 0 + + tp = np.cumsum(sorted_weights * is_positive) / num_positives + return np.sum((sorted_weights * tp)[~is_positive]) / num_negatives + def testWithMultipleUpdates(self): num_samples = 1000 batch_size = 10 @@ -1946,7 +1945,7 @@ class StreamingAUCTest(test.TestCase): for weights in (None, np.ones(num_samples), np.random.exponential( scale=1.0, size=num_samples)): - expected_auc = _np_auc(predictions, labels, weights) + expected_auc = self.np_auc(predictions, labels, weights) with self.test_session() as sess: enqueue_ops = [[] for i in range(num_batches)] @@ -1975,211 +1974,6 @@ class StreamingAUCTest(test.TestCase): self.assertAlmostEqual(expected_auc, auc.eval(), 2) -class StreamingDynamicAUCTest(test.TestCase): - - def setUp(self): - super(StreamingDynamicAUCTest, self).setUp() - np.random.seed(1) - ops.reset_default_graph() - - def testUnknownCurve(self): - with self.assertRaisesRegexp( - ValueError, 'curve must be either ROC or PR, TEST_CURVE unknown'): - metrics.streaming_dynamic_auc(labels=array_ops.ones((10, 1)), - predictions=array_ops.ones((10, 1)), - curve='TEST_CURVE') - - def testVars(self): - metrics.streaming_dynamic_auc( - labels=array_ops.ones((10, 1)), predictions=array_ops.ones((10, 1))) - _assert_metric_variables(self, ['dynamic_auc/concat_labels/array:0', - 'dynamic_auc/concat_labels/size:0', - 'dynamic_auc/concat_preds/array:0', - 'dynamic_auc/concat_preds/size:0']) - - def testMetricsCollection(self): - my_collection_name = '__metrics__' - auc, _ = metrics.streaming_dynamic_auc( - labels=array_ops.ones((10, 1)), - predictions=array_ops.ones((10, 1)), - metrics_collections=[my_collection_name]) - self.assertEqual(ops.get_collection(my_collection_name), [auc]) - - def testUpdatesCollection(self): - my_collection_name = '__updates__' - _, update_op = metrics.streaming_dynamic_auc( - labels=array_ops.ones((10, 1)), - predictions=array_ops.ones((10, 1)), - updates_collections=[my_collection_name]) - self.assertEqual(ops.get_collection(my_collection_name), [update_op]) - - def testValueTensorIsIdempotent(self): - predictions = random_ops.random_uniform( - (10, 3), maxval=1, dtype=dtypes_lib.float32, seed=1) - labels = random_ops.random_uniform( - (10, 3), maxval=2, dtype=dtypes_lib.int64, seed=2) - auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) - with self.test_session() as sess: - sess.run(variables.local_variables_initializer()) - # Run several updates. - for _ in xrange(10): - sess.run(update_op) - # Then verify idempotency. - initial_auc = auc.eval() - for _ in xrange(10): - self.assertAlmostEqual(initial_auc, auc.eval(), 5) - - def testAllLabelsOnes(self): - with self.test_session() as sess: - predictions = constant_op.constant([1., 1., 1.]) - labels = constant_op.constant([1, 1, 1]) - auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) - sess.run(variables.local_variables_initializer()) - sess.run(update_op) - self.assertEqual(0, auc.eval()) - - def testAllLabelsZeros(self): - with self.test_session() as sess: - predictions = constant_op.constant([1., 1., 1.]) - labels = constant_op.constant([0, 0, 0]) - auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) - sess.run(variables.local_variables_initializer()) - sess.run(update_op) - self.assertEqual(0, auc.eval()) - - def testNonZeroOnePredictions(self): - with self.test_session() as sess: - predictions = constant_op.constant([2.5, -2.5, 2.5, -2.5], - dtype=dtypes_lib.float32) - labels = constant_op.constant([1, 0, 1, 0]) - auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) - sess.run(variables.local_variables_initializer()) - sess.run(update_op) - self.assertAlmostEqual(auc.eval(), 1.0) - - def testAllCorrect(self): - inputs = np.random.randint(0, 2, size=(100, 1)) - with self.test_session() as sess: - predictions = constant_op.constant(inputs) - labels = constant_op.constant(inputs) - auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) - sess.run(variables.local_variables_initializer()) - sess.run(update_op) - self.assertEqual(1, auc.eval()) - - def testSomeCorrect(self): - with self.test_session() as sess: - predictions = constant_op.constant([1, 0, 1, 0]) - labels = constant_op.constant([0, 1, 1, 0]) - auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) - sess.run(variables.local_variables_initializer()) - sess.run(update_op) - self.assertAlmostEqual(0.5, auc.eval()) - - def testAllIncorrect(self): - inputs = np.random.randint(0, 2, size=(100, 1)) - with self.test_session() as sess: - predictions = constant_op.constant(inputs, dtype=dtypes_lib.float32) - labels = constant_op.constant(1 - inputs, dtype=dtypes_lib.float32) - auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) - sess.run(variables.local_variables_initializer()) - sess.run(update_op) - self.assertAlmostEqual(0, auc.eval()) - - def testExceptionOnIncompatibleShapes(self): - with self.test_session() as sess: - predictions = array_ops.ones([5]) - labels = array_ops.zeros([6]) - with self.assertRaisesRegexp(ValueError, 'Shapes .* are incompatible'): - _, update_op = metrics.streaming_dynamic_auc(labels, predictions) - sess.run(variables.local_variables_initializer()) - sess.run(update_op) - - def testExceptionOnGreaterThanOneLabel(self): - with self.test_session() as sess: - predictions = constant_op.constant([1, 0.5, 0], dtypes_lib.float32) - labels = constant_op.constant([2, 1, 0]) - _, update_op = metrics.streaming_dynamic_auc(labels, predictions) - sess.run(variables.local_variables_initializer()) - with self.assertRaisesRegexp( - errors_impl.InvalidArgumentError, - '.*labels must be 0 or 1, at least one is >1.*'): - sess.run(update_op) - - def testExceptionOnNegativeLabel(self): - with self.test_session() as sess: - predictions = constant_op.constant([1, 0.5, 0], dtypes_lib.float32) - labels = constant_op.constant([1, 0, -1]) - _, update_op = metrics.streaming_dynamic_auc(labels, predictions) - sess.run(variables.local_variables_initializer()) - with self.assertRaisesRegexp( - errors_impl.InvalidArgumentError, - '.*labels must be 0 or 1, at least one is <0.*'): - sess.run(update_op) - - def testWithMultipleUpdates(self): - batch_size = 10 - num_batches = 100 - labels = np.array([]) - predictions = np.array([]) - tf_labels = variables.Variable(array_ops.ones(batch_size, dtypes_lib.int32), - collections=[ops.GraphKeys.LOCAL_VARIABLES], - dtype=dtypes_lib.int32) - tf_predictions = variables.Variable( - array_ops.ones(batch_size), - collections=[ops.GraphKeys.LOCAL_VARIABLES], - dtype=dtypes_lib.float32) - auc, update_op = metrics.streaming_dynamic_auc(tf_labels, tf_predictions) - with self.test_session() as sess: - sess.run(variables.local_variables_initializer()) - for _ in xrange(num_batches): - new_labels = np.random.randint(0, 2, size=batch_size) - noise = np.random.normal(0.0, scale=0.2, size=batch_size) - new_predictions = 0.4 + 0.2 * new_labels + noise - labels = np.concatenate([labels, new_labels]) - predictions = np.concatenate([predictions, new_predictions]) - sess.run(tf_labels.assign(new_labels)) - sess.run(tf_predictions.assign(new_predictions)) - sess.run(update_op) - expected_auc = _np_auc(predictions, labels) - self.assertAlmostEqual(expected_auc, auc.eval()) - - def testAUCPRReverseIncreasingPredictions(self): - with self.test_session() as sess: - predictions = constant_op.constant( - [0.1, 0.4, 0.35, 0.8], dtype=dtypes_lib.float32) - labels = constant_op.constant([0, 0, 1, 1]) - auc, update_op = metrics.streaming_dynamic_auc( - labels, predictions, curve='PR') - sess.run(variables.local_variables_initializer()) - sess.run(update_op) - self.assertAlmostEqual(0.79166, auc.eval(), delta=1e-5) - - def testAUCPRJumbledPredictions(self): - with self.test_session() as sess: - predictions = constant_op.constant( - [0.1, 0.4, 0.35, 0.8, 0.1, 0.135, 0.81], dtypes_lib.float32) - labels = constant_op.constant([0, 0, 1, 0, 1, 0, 1]) - auc, update_op = metrics.streaming_dynamic_auc( - labels, predictions, curve='PR') - sess.run(variables.local_variables_initializer()) - sess.run(update_op) - self.assertAlmostEqual(0.610317, auc.eval(), delta=1e-6) - - def testAUCPRPredictionsLessThanHalf(self): - with self.test_session() as sess: - predictions = constant_op.constant( - [0.0, 0.1, 0.2, 0.33, 0.3, 0.4, 0.5], - shape=(1, 7), - dtype=dtypes_lib.float32) - labels = constant_op.constant([0, 0, 0, 0, 1, 1, 1], shape=(1, 7)) - auc, update_op = metrics.streaming_dynamic_auc( - labels, predictions, curve='PR') - sess.run(variables.local_variables_initializer()) - sess.run(update_op) - self.assertAlmostEqual(0.90277, auc.eval(), delta=1e-5) - - class StreamingPrecisionRecallAtEqualThresholdsTest(test.TestCase): def setUp(self): diff --git a/tensorflow/contrib/nccl/BUILD b/tensorflow/contrib/nccl/BUILD index df9dbb457a..ed9fb64b95 100644 --- a/tensorflow/contrib/nccl/BUILD +++ b/tensorflow/contrib/nccl/BUILD @@ -48,8 +48,8 @@ tf_cuda_cc_test( # Disabled on jenkins until errors finding nvmlShutdown are found. tags = [ "manual", - "multi_gpu", "no_oss", + "noguitar", # note: is run manually there "notap", ], deps = if_cuda( @@ -138,8 +138,8 @@ cuda_py_test( # Disabled on jenkins until errors finding nvmlShutdown are found. tags = [ "manual", - "multi_gpu", "no_oss", + "noguitar", # note: is run manually there "notap", ], ) diff --git a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py index bad0abd44c..0b13e3595e 100644 --- a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py +++ b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py @@ -72,15 +72,14 @@ class NcclTestCase(test.TestCase): two. device_sets: Tuple of virtual devices to run test on. """ + if not test.is_gpu_available(): + return # Test requires access to a GPU + for dtype in [np.float32, np.int32, np.int64, np.float64]: # Create session inside outer loop to test use of # same communicator across multiple sessions. with self.test_session(use_gpu=True) as sess: - # Check GPU availability *after* creating test session, see b/68975239. - if not test.is_gpu_available(): - return # Test requires access to a GPU - for devices in device_sets: shape = (3, 4) random = (np.random.random_sample(shape) - .5) * 1024 diff --git a/tensorflow/contrib/nn/__init__.py b/tensorflow/contrib/nn/__init__.py index 3bf795d19a..0bc133a00e 100644 --- a/tensorflow/contrib/nn/__init__.py +++ b/tensorflow/contrib/nn/__init__.py @@ -15,6 +15,7 @@ """Module for variants of ops in tf.nn. @@alpha_dropout +@@conv1d_transpose @@deprecated_flipped_softmax_cross_entropy_with_logits @@deprecated_flipped_sparse_softmax_cross_entropy_with_logits @@deprecated_flipped_sigmoid_cross_entropy_with_logits @@ -32,6 +33,7 @@ from tensorflow.contrib.nn.python.ops.alpha_dropout import * from tensorflow.contrib.nn.python.ops.cross_entropy import * from tensorflow.contrib.nn.python.ops.sampling_ops import * from tensorflow.contrib.nn.python.ops.scaled_softplus import * +from tensorflow.python.ops.nn_ops import conv1d_transpose from tensorflow.python.ops.nn_ops import nth_element # pylint: enable=unused-import,wildcard-import diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD index 096d2270e4..022e5ab06f 100644 --- a/tensorflow/contrib/opt/BUILD +++ b/tensorflow/contrib/opt/BUILD @@ -18,6 +18,7 @@ py_library( "python/training/external_optimizer.py", "python/training/lazy_adam_optimizer.py", "python/training/moving_average_optimizer.py", + "python/training/multitask_optimizer_wrapper.py", "python/training/nadam_optimizer.py", "python/training/variable_clipping_optimizer.py", ], @@ -95,6 +96,23 @@ py_test( ], ) +py_test( + name = "multitask_optimizer_wrapper_test", + srcs = ["python/training/multitask_optimizer_wrapper_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":opt_py", + "//tensorflow/python:client", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:training", + "//tensorflow/python:variables", + "//third_party/py/numpy", + "@six_archive//:six", + ], +) + py_test( name = "lazy_adam_optimizer_test", srcs = ["python/training/lazy_adam_optimizer_test.py"], diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py index e194fa2d4d..af47e3937a 100644 --- a/tensorflow/contrib/opt/__init__.py +++ b/tensorflow/contrib/opt/__init__.py @@ -24,7 +24,7 @@ from tensorflow.contrib.opt.python.training.external_optimizer import * from tensorflow.contrib.opt.python.training.lazy_adam_optimizer import * from tensorflow.contrib.opt.python.training.nadam_optimizer import * from tensorflow.contrib.opt.python.training.moving_average_optimizer import * -from tensorflow.contrib.opt.python.training.nadam_optimizer import * +from tensorflow.contrib.opt.python.training.multitask_optimizer_wrapper import * from tensorflow.contrib.opt.python.training.variable_clipping_optimizer import * # pylint: enable=wildcard-import @@ -35,7 +35,8 @@ _allowed_symbols = [ 'DelayCompensatedGradientDescentOptimizer', 'DropStaleGradientOptimizer', 'ExternalOptimizerInterface', 'LazyAdamOptimizer', 'NadamOptimizer', 'MovingAverageOptimizer', - 'ScipyOptimizerInterface', 'VariableClippingOptimizer' + 'ScipyOptimizerInterface', 'VariableClippingOptimizer', + 'MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm', ] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py new file mode 100644 index 0000000000..c26037935d --- /dev/null +++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py @@ -0,0 +1,138 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""An optimizer wrapper that ensures correct behaviour +of stateful optimizers with multitask loss.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import types +import six + +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import clip_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.training import optimizer + +__all__ = ["MultitaskOptimizerWrapper", + "clip_gradients_by_global_norm"] + +def _is_all_zeros(grad): + all_zeros = math_ops.equal(math_ops.count_nonzero(grad), 0) + return all_zeros + +def _get_wrapper(fn, opt): + def wrapper(self, grad, *args, **kwargs): # pylint: disable=unused-argument + all_zeros = _is_all_zeros(grad) + return control_flow_ops.cond( + all_zeros, + control_flow_ops.no_op, + lambda: fn(grad, *args, **kwargs)) + wrapper = types.MethodType(wrapper, opt) + return wrapper + +class MultitaskOptimizerWrapper(object): + """Optimizer wrapper that ensures that + all-zero gradients don't affect the optimizer state. + + This might be useful when a multi-task loss is used, + and some components of the loss might be + not present (e.g. masked out) in some training batches. + Technically their gradient would be zero, + which would normally affect the optimizer state + (e.g. push running average to zero). + However this is not the desired behaviour, + since the missing loss component + should be treated as unknown rather than zero. + + This wrapper filters out all-zero gradient tensors, + therefore preserving the optimizer state. + + If gradient clipping by global norm is used, + the provided function clip_gradients_by_global_norm + should be used (and specified explicitly by the user). + Otherwise the global norm would be underestimated + because of all-zero tensors that should be ignored. + + The gradient calculation and application + are delegated to an underlying optimizer. + The gradient application is altered only for all-zero tensors. + + Example: + ```python + momentum_optimizer = tf.train.MomentumOptimizer( + learning_rate, momentum=0.9) + multitask_momentum_optimizer = tf.contrib.opt.MultitaskOptimizerWrapper( + momentum_optimizer) + gradvars = multitask_momentum_optimizer.compute_gradients( + loss) + gradvars_clipped, _ = tf.contrib.opt.clip_gradients_by_global_norm( + gradvars, 15.0) + train_op = multitask_momentum_optimizer.apply_gradients( + gradvars_clipped, global_step=batch) + ``` + """ + def __init__(self, opt): + """ + Args: + opt: an instance of a class that implements tf.train.Optimizer. + """ + if not isinstance(opt, optimizer.Optimizer): + raise TypeError( + "Supplied optimizer must be an instance of tf.train.Optimizer") + self._opt = opt + overriden_methods = ('_apply_dense', + '_resource_apply_dense', + '_apply_sparse', + '_resource_apply_sparse') + for name in overriden_methods: + fn = getattr(self._opt, name) + wrapper = _get_wrapper(fn, self._opt) + setattr(self._opt, name, wrapper) + + def __getattr__(self, name): + return getattr(self._opt, name) + + +def clip_gradients_by_global_norm(gradients_variables, clip_norm=20.): + """Clips gradients of a multitask loss by their global norm. + Ignores all-zero tensors when computing the global norm. + + Args: + gradients_variables: a list of pairs (gradient, variable). + clip_norm: a float Tensor, the global norm to clip on. Default is 20.0. + + Returns: + list: A list of pairs of the same type as gradients_variables,. + fixed_global_norm: A 0-D (scalar) Tensor representing the global norm. + """ + gradients, variables = six.moves.zip(*gradients_variables) + def _replace_nonexisting_grad(grad): + if grad is None: + return grad + all_zeros = _is_all_zeros(grad) + return control_flow_ops.cond(all_zeros, + lambda: array_ops.zeros( + [], dtype=dtypes.as_dtype(grad.dtype)), + lambda: grad) + nonzero_gradients = [_replace_nonexisting_grad(g) for g in gradients] + fixed_global_norm = clip_ops.global_norm(nonzero_gradients) + gradients, _ = clip_ops.clip_by_global_norm(gradients, clip_norm, + use_norm=fixed_global_norm) + return list(six.moves.zip(gradients, variables)), fixed_global_norm diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py new file mode 100644 index 0000000000..b06213f715 --- /dev/null +++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py @@ -0,0 +1,119 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for MultitaskOptimizerWrapper.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.opt.python.training import multitask_optimizer_wrapper +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import variables +from tensorflow.python.platform import test +from tensorflow.python.training import momentum + +import numpy as np +import six + +class MultitaskOptimizerWrapperTest(test.TestCase): + """ + Tests for the multitask optimizer wrapper. + """ + def testWrapper(self): + with self.test_session(): + var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32) + var1 = variables.Variable([3.0, 4.0], dtype=dtypes.float32) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtypes.float32) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtypes.float32) + grads_allzero = constant_op.constant([0.0, 0.0], dtype=dtypes.float32) + mom_opt_impl = momentum.MomentumOptimizer( + learning_rate=2.0, momentum=0.9) + mom_opt = multitask_optimizer_wrapper.MultitaskOptimizerWrapper( + mom_opt_impl) + mom_update = mom_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + mom_update_partial = mom_opt.apply_gradients( + zip([grads_allzero, grads1], [var0, var1])) + mom_update_no_action = mom_opt.apply_gradients( + zip([grads_allzero, grads_allzero], [var0, var1])) + self.evaluate(variables.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + self.assertEqual(["momentum"], mom_opt.get_slot_names()) + slot0 = mom_opt.get_slot(var0, "momentum") + self.assertEquals(slot0.get_shape(), var0.get_shape()) + slot1 = mom_opt.get_slot(var1, "momentum") + self.assertEquals(slot1.get_shape(), var1.get_shape()) + + # Step 1: normal momentum update. + self.evaluate(mom_update) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), + self.evaluate(slot0)) + self.assertAllCloseAccordingToType(np.array([0.01, 0.01]), + self.evaluate(slot1)) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), + self.evaluate(var0)) + self.assertAllCloseAccordingToType( + np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), + self.evaluate(var1)) + + # Step 2: momentum update that changes only slot1 but not slot0. + self.evaluate(mom_update_partial) + # Check that only the relevant momentum accumulator has been updated. + self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), + self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), + self.evaluate(slot1)) + + # Step 3: momentum update that does not change anything. + self.evaluate(mom_update_no_action) + # Check that the momentum accumulators have *NOT* been updated. + self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), + self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), + self.evaluate(slot1)) + + def testGradientClipping(self): + with self.test_session(): + var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32) + var1 = variables.Variable([3.0, 4.0], dtype=dtypes.float32) + var2 = variables.Variable([3.0, 4.0], dtype=dtypes.float32) + var3 = variables.Variable([3.0, 4.0], dtype=dtypes.float32) + grads0 = constant_op.constant([10.0, 15.0], dtype=dtypes.float32) + grads1 = constant_op.constant([0.0, 5.0], dtype=dtypes.float32) + grads2 = constant_op.constant([0.0, 0.0], dtype=dtypes.float32) + grads3 = None + varlist = [var0, var1, var2, var3] + gradients = [grads0, grads1, grads2, grads3] + clipped_gradvars, global_norm = multitask_optimizer_wrapper.clip_gradients_by_global_norm( + six.moves.zip(gradients, varlist), clip_norm=1.0) + clipped_grads = list(six.moves.zip(*clipped_gradvars))[0] + reference_global_norm = np.sqrt(np.sum(np.square([10.0, 15.0, 0.0, 5.0]))) + self.assertAllCloseAccordingToType( + self.evaluate(global_norm), reference_global_norm) + self.assertAllCloseAccordingToType( + self.evaluate(clipped_grads[2]), np.array([0., 0.])) + self.assertEqual(clipped_grads[3], None) + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/quantize/BUILD b/tensorflow/contrib/quantize/BUILD index 45a98c7f85..935af80e7a 100644 --- a/tensorflow/contrib/quantize/BUILD +++ b/tensorflow/contrib/quantize/BUILD @@ -133,6 +133,7 @@ py_library( deps = [ "//tensorflow/contrib/framework:framework_py", "//tensorflow/python:array_ops", + "//tensorflow/python:check_ops", "//tensorflow/python:framework_ops", "//tensorflow/python:init_ops", "//tensorflow/python:math_ops", @@ -142,23 +143,6 @@ py_library( ], ) -py_test( - name = "quant_ops_test", - size = "small", - srcs = ["python/quant_ops_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":quant_ops", - "//tensorflow/python:array_ops", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:platform_test", - "//tensorflow/python:session", - "//tensorflow/python:variables", - ], -) - py_library( name = "quantize", srcs = ["python/quantize.py"], diff --git a/tensorflow/contrib/quantize/python/quant_ops.py b/tensorflow/contrib/quantize/python/quant_ops.py index f80d427ff0..0a38ef9fcd 100644 --- a/tensorflow/contrib/quantize/python/quant_ops.py +++ b/tensorflow/contrib/quantize/python/quant_ops.py @@ -22,12 +22,15 @@ from tensorflow.contrib.framework.python.ops import add_arg_scope from tensorflow.contrib.framework.python.ops import model_variable from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.training import moving_averages +EPSILON = 1e-5 + @add_arg_scope def FixedQuantize(inputs, init_min=-6.0, init_max=6.0, scope=None): @@ -130,10 +133,12 @@ def LastValueQuantize(inputs, batch_min = inputs else: batch_min = math_ops.reduce_min(inputs, name='BatchMin') - # TFLite requires that 0.0 if always in the [min; max] range. + batch_min -= EPSILON + # B-eng requires that 0.0 if always in the [min; max] range. batch_min = math_ops.minimum(batch_min, 0.0) - assign_min = state_ops.assign(min_var, batch_min, name='AssignMinLast') - ops.add_to_collection(updates_collection, assign_min.op) + assign_min_op = state_ops.assign( + min_var, batch_min, name='AssignMinLast').op + ops.add_to_collection(updates_collection, assign_min_op) if per_channel: if input_dim >= 2: @@ -143,15 +148,17 @@ def LastValueQuantize(inputs, batch_max = inputs else: batch_max = math_ops.reduce_max(inputs, name='BatchMax') - # TFLite requires that 0.0 if always in the [min; max] range. + batch_max += EPSILON + # B-eng requires that 0.0 if always in the [min; max] range. batch_max = math_ops.maximum(batch_max, 0.0) - assign_max = state_ops.assign(max_var, batch_max, name='AssignMaxLast') - ops.add_to_collection(updates_collection, assign_max.op) + assign_max_op = state_ops.assign( + max_var, batch_max, name='AssignMaxLast').op + ops.add_to_collection(updates_collection, assign_max_op) return _FakeQuantWithMinMaxVars( inputs, - assign_min, - assign_max, + batch_min, + batch_max, per_channel=per_channel, num_bits=num_bits, narrow_range=narrow_range) @@ -244,9 +251,9 @@ def MovingAvgQuantize(inputs, batch_min = math_ops.reduce_min(inputs, name='BatchMin') # B-eng requires that 0.0 if always in the [min; max] range. batch_min = math_ops.minimum(batch_min, 0.0) - assign_min = moving_averages.assign_moving_average( - min_var, batch_min, ema_decay, name='AssignMinEma') - ops.add_to_collection(updates_collection, assign_min.op) + assign_min_op = moving_averages.assign_moving_average( + min_var, batch_min, ema_decay, name='AssignMinEma').op + ops.add_to_collection(updates_collection, assign_min_op) if per_channel: if input_dim >= 2: @@ -258,14 +265,14 @@ def MovingAvgQuantize(inputs, batch_max = math_ops.reduce_max(inputs, name='BatchMax') # B-eng requires that 0.0 if always in the [min; max] range. batch_max = math_ops.maximum(batch_max, 0.0) - assign_max = moving_averages.assign_moving_average( - max_var, batch_max, ema_decay, name='AssignMaxEma') - ops.add_to_collection(updates_collection, assign_max.op) + assign_max_op = moving_averages.assign_moving_average( + max_var, batch_max, ema_decay, name='AssignMaxEma').op + ops.add_to_collection(updates_collection, assign_max_op) return _FakeQuantWithMinMaxVars( inputs, - assign_min, - assign_max, + min_var, + max_var, per_channel=per_channel, num_bits=num_bits, narrow_range=narrow_range) @@ -294,10 +301,20 @@ def _FakeQuantWithMinMaxVars(inputs, min_var, max_var, per_channel, num_bits, if per_channel: assert len(min_var.get_shape()) == 1 assert len(max_var.get_shape()) == 1 - return array_ops.fake_quant_with_min_max_vars_per_channel( - inputs, min_var, max_var, num_bits=num_bits, narrow_range=narrow_range) + with ops.control_dependencies([check_ops.assert_less(min_var, max_var)]): + return array_ops.fake_quant_with_min_max_vars_per_channel( + inputs, + min_var, + max_var, + num_bits=num_bits, + narrow_range=narrow_range) else: assert min_var.get_shape() == [] # pylint: disable=g-explicit-bool-comparison assert max_var.get_shape() == [] # pylint: disable=g-explicit-bool-comparison - return array_ops.fake_quant_with_min_max_vars( - inputs, min_var, max_var, num_bits=num_bits, narrow_range=narrow_range) + with ops.control_dependencies([check_ops.assert_less(min_var, max_var)]): + return array_ops.fake_quant_with_min_max_vars( + inputs, + min_var, + max_var, + num_bits=num_bits, + narrow_range=narrow_range) diff --git a/tensorflow/contrib/quantize/python/quant_ops_test.py b/tensorflow/contrib/quantize/python/quant_ops_test.py deleted file mode 100644 index 3884679602..0000000000 --- a/tensorflow/contrib/quantize/python/quant_ops_test.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for third_party.tensorflow.contrib.quantize.python.quant_ops.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.contrib.quantize.python import quant_ops -from tensorflow.python.client import session -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import variables -from tensorflow.python.platform import googletest - -_MIN_MAX_VARS = 'min_max_vars' - - -class QuantOpsTest(googletest.TestCase): - - def testLastValueQuantizeTrainingAssign(self): - g = ops.Graph() - with session.Session(graph=g) as sess: - x = array_ops.placeholder(dtypes.float32, shape=[2]) - y = quant_ops.LastValueQuantize( - x, - init_min=0.0, - init_max=0.0, - is_training=True, - vars_collection=_MIN_MAX_VARS) - - # Run the step. - sess.run(variables.global_variables_initializer()) - sess.run(y, feed_dict={x: [-1.0, 1.0]}) - # Now check that the min_max_vars were, in fact, updated. - min_value, max_value = self._GetMinMaxValues(sess) - self.assertEqual(min_value, -1.0) - self.assertEqual(max_value, 1.0) - - def testMovingAvgQuantizeTrainingAssign(self): - g = ops.Graph() - with session.Session(graph=g) as sess: - x = array_ops.placeholder(dtypes.float32, shape=[2]) - y = quant_ops.MovingAvgQuantize( - x, - init_min=0.0, - init_max=0.0, - is_training=True, - vars_collection=_MIN_MAX_VARS) - - # Run the step. - sess.run(variables.global_variables_initializer()) - # Do two runs to avoid zero debias. - sess.run(y, feed_dict={x: [-1.0, 1.0]}) - sess.run(y, feed_dict={x: [0.0, 0.0]}) - # Now check that the min_max_vars were, in fact, updated. - min_value, max_value = self._GetMinMaxValues(sess) - self.assertGreater(min_value, -1.0) - self.assertLess(min_value, 0.0) - self.assertGreater(max_value, 0.0) - self.assertLess(max_value, 1.0) - - def _GetMinMaxValues(self, sess): - min_max_vars = ops.get_collection(_MIN_MAX_VARS) - self.assertEqual(len(min_max_vars), 2) - min_idx = 0 if 'min' in min_max_vars[0].name else 1 - max_idx = (min_idx + 1) % 2 - min_var, max_var = min_max_vars[min_idx], min_max_vars[max_idx] - min_max_values = sess.run([min_var, max_var]) - return min_max_values[0], min_max_values[1] - - -if __name__ == '__main__': - googletest.main() diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index 7db2d863aa..548e33663e 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -89,8 +89,8 @@ def Quantize(graph, op.name[:-len('/depthwise')]) if separable_conv and separable_conv.type == 'Conv2D': continue - # Quantize add ops that come after Conv2D or DepthwiseConv2dNative. - if op.type in ['Conv2D', 'DepthwiseConv2dNative']: + if op.type == 'Conv2D': + # Quantize add ops that come after Conv2D add_context_re = re.search(r'^(.*)/[^/]+/', op.name) if add_context_re is not None: context.add_contexts.add(add_context_re.group(1)) @@ -387,7 +387,7 @@ class _QuantizeContext(object): if delay_requested and self.quant_delay and self.quant_delay > 0: activate_quant = math_ops.greater_equal( - training_util.get_or_create_global_step(), + training_util.get_global_step(), self.quant_delay, name=scope + '/activate_quant') quant = control_flow_ops.cond( diff --git a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py index 57dab03f16..3e62f95bd6 100644 --- a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py +++ b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py @@ -97,8 +97,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(weights_quant.type, quantization_node_name) expected_inputs = [ - scope + '/weights_quant/AssignMinLast', - scope + '/weights_quant/AssignMaxLast', scope + '/weights/read' + scope + '/weights_quant/Minimum', scope + '/weights_quant/Maximum', + scope + '/weights/read' ] self._AssertInputOpsAre(weights_quant, expected_inputs) output_op_name = scope + '/Conv2D' @@ -109,8 +109,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(conv_quant.type, quantization_node_name) expected_inputs = [ - scope + '/conv_quant/AssignMinEma', - scope + '/conv_quant/AssignMaxEma', scope + '/BiasAdd' + scope + '/conv_quant/min/read', scope + '/conv_quant/max/read', + scope + '/BiasAdd' ] self._AssertInputOpsAre(conv_quant, expected_inputs) output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' @@ -122,7 +122,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): self.assertEqual(act_quant.type, quantization_node_name) expected_inputs = [ - 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', + 'test/act_quant/min/read', 'test/act_quant/max/read', 'test/' + activation_op_name ] self._AssertInputOpsAre(act_quant, expected_inputs) @@ -172,8 +172,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(weights_quant.type, quantization_node_name) expected_inputs = [ - scope + '/weights_quant/AssignMinLast', - scope + '/weights_quant/AssignMaxLast', scope + '/weights/read' + scope + '/weights_quant/Minimum', scope + '/weights_quant/Maximum', + scope + '/weights/read' ] self._AssertInputOpsAre(weights_quant, expected_inputs) output_op_name = scope + '/MatMul' @@ -184,8 +184,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(conv_quant.type, quantization_node_name) expected_inputs = [ - scope + '/conv_quant/AssignMinEma', - scope + '/conv_quant/AssignMaxEma', scope + '/BiasAdd' + scope + '/conv_quant/min/read', scope + '/conv_quant/max/read', + scope + '/BiasAdd' ] self._AssertInputOpsAre(conv_quant, expected_inputs) output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' @@ -196,7 +196,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(act_quant.type, quantization_node_name) expected_inputs = [ - 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', + 'test/act_quant/min/read', 'test/act_quant/max/read', 'test/' + activation_op_name ] self._AssertInputOpsAre(act_quant, expected_inputs) @@ -247,8 +247,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(weights_quant.type, quantization_node_name) expected_inputs = [ - scope + '/weights_quant/AssignMinLast', - scope + '/weights_quant/AssignMaxLast', + scope + '/weights_quant/Minimum', scope + '/weights_quant/Maximum', scope + '/depthwise_weights/read' ] self._AssertInputOpsAre(weights_quant, expected_inputs) @@ -260,8 +259,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(conv_quant.type, quantization_node_name) expected_inputs = [ - scope + '/conv_quant/AssignMinEma', - scope + '/conv_quant/AssignMaxEma', scope + '/BiasAdd' + scope + '/conv_quant/min/read', scope + '/conv_quant/max/read', + scope + '/BiasAdd' ] self._AssertInputOpsAre(conv_quant, expected_inputs) output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' @@ -272,7 +271,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(act_quant.type, quantization_node_name) expected_inputs = [ - 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', + 'test/act_quant/min/read', 'test/act_quant/max/read', 'test/' + activation_op_name ] self._AssertInputOpsAre(act_quant, expected_inputs) @@ -402,10 +401,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(weights_quant.type, quantization_node_name) expected_inputs = [ - scope + '/weights_quant/' + ('AssignMinEma' - if use_ema else 'AssignMinLast'), - scope + '/weights_quant/' + ('AssignMaxEma' - if use_ema else 'AssignMaxLast'), + scope + '/weights_quant/' + ('min/read' if use_ema else 'Minimum'), + scope + '/weights_quant/' + ('max/read' if use_ema else 'Maximum'), scope + '/mul_fold' ] self._AssertInputOpsAre(weights_quant, expected_inputs) @@ -418,8 +415,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(conv_quant.type, quantization_node_name) expected_inputs = [ - scope + '/conv_quant/AssignMinEma', - scope + '/conv_quant/AssignMaxEma', scope + '/add_fold' + scope + '/conv_quant/min/read', scope + '/conv_quant/max/read', + scope + '/add_fold' ] self._AssertInputOpsAre(conv_quant, expected_inputs) output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' @@ -430,7 +427,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(act_quant.type, quantization_node_name) expected_inputs = [ - 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', + 'test/act_quant/min/read', 'test/act_quant/max/read', 'test/' + activation_op_name ] self._AssertInputOpsAre(act_quant, expected_inputs) @@ -521,10 +518,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(weights_quant.type, quantization_node_name) expected_inputs = [ - scope + '/weights_quant/' + ('AssignMinEma' - if use_ema else 'AssignMinLast'), - scope + '/weights_quant/' + ('AssignMaxEma' - if use_ema else 'AssignMaxLast'), + scope + '/weights_quant/' + ('min/read' if use_ema else 'Minimum'), + scope + '/weights_quant/' + ('max/read' if use_ema else 'Maximum'), scope + '/mul_fold' ] self._AssertInputOpsAre(weights_quant, expected_inputs) @@ -537,8 +532,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(conv_quant.type, quantization_node_name) expected_inputs = [ - scope + '/conv_quant/AssignMinEma', - scope + '/conv_quant/AssignMaxEma', scope + '/add_fold' + scope + '/conv_quant/min/read', scope + '/conv_quant/max/read', + scope + '/add_fold' ] self._AssertInputOpsAre(conv_quant, expected_inputs) output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' @@ -549,7 +544,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(act_quant.type, quantization_node_name) expected_inputs = [ - 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', + 'test/act_quant/min/read', 'test/act_quant/max/read', 'test/' + activation_op_name ] self._AssertInputOpsAre(act_quant, expected_inputs) @@ -644,10 +639,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(weights_quant.type, quantization_node_name) expected_inputs = [ - scope + '/weights_quant/' + ('AssignMinEma' - if use_ema else 'AssignMinLast'), - scope + '/weights_quant/' + ('AssignMaxEma' - if use_ema else 'AssignMaxLast'), + scope + '/weights_quant/' + ('min/read' if use_ema else 'Minimum'), + scope + '/weights_quant/' + ('max/read' if use_ema else 'Maximum'), scope + '/mul_fold' ] self._AssertInputOpsAre(weights_quant, expected_inputs) @@ -660,8 +653,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(conv_quant.type, quantization_node_name) expected_inputs = [ - scope + '/conv_quant/AssignMinEma', - scope + '/conv_quant/AssignMaxEma', scope + '/add_fold' + scope + '/conv_quant/min/read', scope + '/conv_quant/max/read', + scope + '/add_fold' ] self._AssertInputOpsAre(conv_quant, expected_inputs) output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' @@ -672,7 +665,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(act_quant.type, quantization_node_name) expected_inputs = [ - 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', + 'test/act_quant/min/read', 'test/act_quant/max/read', 'test/' + activation_op_name ] self._AssertInputOpsAre(act_quant, expected_inputs) diff --git a/tensorflow/contrib/quantize/python/quantize_test.py b/tensorflow/contrib/quantize/python/quantize_test.py index 1e4dd7cf67..eb141a21bd 100644 --- a/tensorflow/contrib/quantize/python/quantize_test.py +++ b/tensorflow/contrib/quantize/python/quantize_test.py @@ -30,7 +30,6 @@ from tensorflow.python.ops import nn_ops from tensorflow.python.platform import googletest conv2d = layers.conv2d -separable_conv2d = layers.separable_conv2d class QuantizeTest(test_util.TensorFlowTestCase): @@ -78,30 +77,6 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(add_quant.type, quantization_node_name) - def testInsertQuantOpForAddAfterSeparableConv2d(self): - graph = ops.Graph() - with graph.as_default(): - batch_size, height, width, depth = 5, 128, 128, 3 - input1 = array_ops.zeros((batch_size, height, width, depth)) - input2 = array_ops.zeros((batch_size, height / 2, width / 2, depth)) - conv = separable_conv2d(input1, None, [5, 5], stride=2, - depth_multiplier=1.0, padding='SAME', - weights_initializer=self._WeightInit(0.09), - activation_fn=None, scope='test/test') - node = math_ops.add(conv, input2, name='test/add') - node = array_ops.identity(node, name='test/identity') - update_barrier = control_flow_ops.no_op(name='update_barrier') - with ops.control_dependencies([update_barrier]): - array_ops.identity(node, name='control_dependency') - - quantize.Quantize(graph=graph, weight_bits=8, weight_narrow_range=True, - activation_bits=8) - - quantization_node_name = 'FakeQuantWithMinMaxVars' - add_quant = graph.get_operation_by_name('test/add_quant/' + - quantization_node_name) - self.assertEqual(add_quant.type, quantization_node_name) - def _WeightInit(self, stddev): """Returns truncated normal variable initializer. diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py index 909c6aba2b..16b6d145e3 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py @@ -38,6 +38,9 @@ from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib from tensorflow.python.platform import test +from tensorflow.python.framework import test_util +from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell + # pylint: enable=protected-access @@ -358,6 +361,45 @@ class RNNCellTest(test.TestCase): self.assertEquals(variables[2].op.name, "root/lstm_cell/projection/kernel") + def testLSTMCellLayerNorm(self): + with self.test_session() as sess: + num_units = 2 + num_proj = 3 + batch_size = 1 + input_size = 4 + with variable_scope.variable_scope( + "root", initializer=init_ops.constant_initializer(0.5)): + x = array_ops.zeros([batch_size, input_size]) + c = array_ops.zeros([batch_size, num_units]) + h = array_ops.zeros([batch_size, num_proj]) + state = rnn_cell_impl.LSTMStateTuple(c, h) + cell = contrib_rnn_cell.LayerNormLSTMCell( + num_units=num_units, + num_proj=num_proj, + forget_bias=1.0, + layer_norm=True, + norm_gain=1.0, + norm_shift=0.0) + g, out_m = cell(x, state) + sess.run([variables_lib.global_variables_initializer()]) + res = sess.run([g, out_m], { + x.name: np.ones((batch_size, input_size)), + c.name: 0.1 * np.ones((batch_size, num_units)), + h.name: 0.1 * np.ones((batch_size, num_proj)) + }) + self.assertEqual(len(res), 2) + # The numbers in results were not calculated, this is mostly just a + # smoke test. + self.assertEqual(res[0].shape, (batch_size, num_proj)) + self.assertEqual(res[1][0].shape, (batch_size, num_units)) + self.assertEqual(res[1][1].shape, (batch_size, num_proj)) + # Different inputs so different outputs and states + for i in range(1, batch_size): + self.assertTrue( + float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6) + self.assertTrue( + float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6) + def testOutputProjectionWrapper(self): with self.test_session() as sess: with variable_scope.variable_scope( diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py index ebd4564f12..b4a5f2d7eb 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py @@ -37,6 +37,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import rnn from tensorflow.python.ops import rnn_cell +from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -1275,6 +1276,49 @@ class LayerNormBasicLSTMCellTest(test.TestCase): self.assertAllClose(res[2].c, expected_c1, 1e-5) self.assertAllClose(res[2].h, expected_h1, 1e-5) + + def testBasicLSTMCellWithStateTupleLayerNorm(self): + """The results of LSTMCell and LayerNormBasicLSTMCell + should be same. """ + with self.test_session() as sess: + with variable_scope.variable_scope( + "root", initializer=init_ops.constant_initializer(0.5)): + x = array_ops.zeros([1, 2]) + c0 = array_ops.zeros([1, 2]) + h0 = array_ops.zeros([1, 2]) + state0 = rnn_cell_impl.LSTMStateTuple(c0, h0) + c1 = array_ops.zeros([1, 2]) + h1 = array_ops.zeros([1, 2]) + state1 = rnn_cell_impl.LSTMStateTuple(c1, h1) + cell = rnn_cell_impl.MultiRNNCell( + [contrib_rnn_cell.LayerNormLSTMCell( + 2, + layer_norm=True, + norm_gain=1.0, + norm_shift=0.0) for _ in range(2)]) + h, (s0, s1) = cell(x, (state0, state1)) + sess.run([variables.global_variables_initializer()]) + res = sess.run([h, s0, s1], { + x.name: np.array([[1., 1.]]), + c0.name: 0.1 * np.asarray([[0, 1]]), + h0.name: 0.1 * np.asarray([[2, 3]]), + c1.name: 0.1 * np.asarray([[4, 5]]), + h1.name: 0.1 * np.asarray([[6, 7]]), + }) + + expected_h = np.array([[-0.38079708, 0.38079708]]) + expected_h0 = np.array([[-0.38079708, 0.38079708]]) + expected_c0 = np.array([[-1.0, 1.0]]) + expected_h1 = np.array([[-0.38079708, 0.38079708]]) + expected_c1 = np.array([[-1.0, 1.0]]) + + self.assertEqual(len(res), 3) + self.assertAllClose(res[0], expected_h, 1e-5) + self.assertAllClose(res[1].c, expected_c0, 1e-5) + self.assertAllClose(res[1].h, expected_h0, 1e-5) + self.assertAllClose(res[2].c, expected_c1, 1e-5) + self.assertAllClose(res[2].h, expected_h1, 1e-5) + def testBasicLSTMCellWithDropout(self): def _is_close(x, y, digits=4): diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index d4691f2c27..5e85c125df 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -36,6 +36,7 @@ from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.ops import partitioned_variables from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import nest @@ -76,6 +77,18 @@ def _get_sharded_variable(name, shape, dtype, num_shards): return shards +def _norm(g, b, inp, scope): + shape = inp.get_shape()[-1:] + gamma_init = init_ops.constant_initializer(g) + beta_init = init_ops.constant_initializer(b) + with vs.variable_scope(scope): + # Initialize beta and gamma for use by layer_norm. + vs.get_variable("gamma", shape=shape, initializer=gamma_init) + vs.get_variable("beta", shape=shape, initializer=beta_init) + normalized = layers.layer_norm(inp, reuse=True, scope=scope) + return normalized + + class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): """Long short-term memory unit (LSTM) recurrent network cell. @@ -102,13 +115,24 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): The class uses optional peep-hole connections, and an optional projection layer. + + Layer normalization implementation is based on: + + https://arxiv.org/abs/1607.06450. + + "Layer Normalization" + Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton + + and is applied before the internal nonlinearities. + """ def __init__(self, num_units, use_peepholes=False, initializer=None, num_proj=None, proj_clip=None, num_unit_shards=1, num_proj_shards=1, forget_bias=1.0, state_is_tuple=True, - activation=math_ops.tanh, reuse=None): + activation=math_ops.tanh, reuse=None, + layer_norm=False, norm_gain=1.0, norm_shift=0.0): """Initialize the parameters for an LSTM cell. Args: @@ -135,6 +159,13 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. + layer_norm: If `True`, layer normalization will be applied. + norm_gain: float, The layer normalization gain initial value. If + `layer_norm` has been set to `False`, this argument will be ignored. + norm_shift: float, The layer normalization shift initial value. If + `layer_norm` has been set to `False`, this argument will be ignored. + + """ super(CoupledInputForgetGateLSTMCell, self).__init__(_reuse=reuse) if not state_is_tuple: @@ -152,6 +183,9 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): self._state_is_tuple = state_is_tuple self._activation = activation self._reuse = reuse + self._layer_norm = layer_norm + self._norm_gain = norm_gain + self._norm_shift = norm_shift if num_proj: self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_proj) @@ -220,9 +254,20 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): # j = new_input, f = forget_gate, o = output_gate cell_inputs = array_ops.concat([inputs, m_prev], 1) - lstm_matrix = nn_ops.bias_add(math_ops.matmul(cell_inputs, concat_w), b) + lstm_matrix = math_ops.matmul(cell_inputs, concat_w) + + # If layer nomalization is applied, do not add bias + if not self._layer_norm: + lstm_matrix = nn_ops.bias_add(lstm_matrix, b) + j, f, o = array_ops.split(value=lstm_matrix, num_or_size_splits=3, axis=1) + # Apply layer normalization + if self._layer_norm: + j = _norm(self._norm_gain, self._norm_shift, j, "transform") + f = _norm(self._norm_gain, self._norm_shift, f, "forget") + o = _norm(self._norm_gain, self._norm_shift, o, "output") + # Diagonal connections if self._use_peepholes: w_f_diag = vs.get_variable( @@ -236,6 +281,10 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): f_act = sigmoid(f + self._forget_bias) c = (f_act * c_prev + (1 - f_act) * self._activation(j)) + # Apply layer normalization + if self._layer_norm: + c = _norm(self._norm_gain, self._norm_shift, c, "state") + if self._use_peepholes: m = sigmoid(o + w_o_diag * c) * self._activation(c) else: @@ -1301,8 +1350,8 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell): self._keep_prob = dropout_keep_prob self._seed = dropout_prob_seed self._layer_norm = layer_norm - self._g = norm_gain - self._b = norm_shift + self._norm_gain = norm_gain + self._norm_shift = norm_shift self._reuse = reuse @property @@ -1313,24 +1362,25 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell): def output_size(self): return self._num_units - def _norm(self, inp, scope): + def _norm(self, inp, scope, dtype=dtypes.float32): shape = inp.get_shape()[-1:] - gamma_init = init_ops.constant_initializer(self._g) - beta_init = init_ops.constant_initializer(self._b) + gamma_init = init_ops.constant_initializer(self._norm_gain) + beta_init = init_ops.constant_initializer(self._norm_shift) with vs.variable_scope(scope): # Initialize beta and gamma for use by layer_norm. - vs.get_variable("gamma", shape=shape, initializer=gamma_init) - vs.get_variable("beta", shape=shape, initializer=beta_init) + vs.get_variable("gamma", shape=shape, initializer=gamma_init, dtype=dtype) + vs.get_variable("beta", shape=shape, initializer=beta_init, dtype=dtype) normalized = layers.layer_norm(inp, reuse=True, scope=scope) return normalized def _linear(self, args): out_size = 4 * self._num_units proj_size = args.get_shape()[-1] - weights = vs.get_variable("kernel", [proj_size, out_size]) + dtype = args.dtype + weights = vs.get_variable("kernel", [proj_size, out_size], dtype=dtype) out = math_ops.matmul(args, weights) if not self._layer_norm: - bias = vs.get_variable("bias", [out_size]) + bias = vs.get_variable("bias", [out_size], dtype=dtype) out = nn_ops.bias_add(out, bias) return out @@ -1339,13 +1389,14 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell): c, h = state args = array_ops.concat([inputs, h], 1) concat = self._linear(args) + dtype = args.dtype i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1) if self._layer_norm: - i = self._norm(i, "input") - j = self._norm(j, "transform") - f = self._norm(f, "forget") - o = self._norm(o, "output") + i = self._norm(i, "input", dtype=dtype) + j = self._norm(j, "transform", dtype=dtype) + f = self._norm(f, "forget", dtype=dtype) + o = self._norm(o, "output", dtype=dtype) g = self._activation(j) if (not isinstance(self._keep_prob, float)) or self._keep_prob < 1: @@ -1354,7 +1405,7 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell): new_c = (c * math_ops.sigmoid(f + self._forget_bias) + math_ops.sigmoid(i) * g) if self._layer_norm: - new_c = self._norm(new_c, "state") + new_c = self._norm(new_c, "state", dtype=dtype) new_h = self._activation(new_c) * math_ops.sigmoid(o) new_state = rnn_cell_impl.LSTMStateTuple(new_c, new_h) @@ -2306,3 +2357,264 @@ class GLSTMCell(rnn_cell_impl.RNNCell): new_state = rnn_cell_impl.LSTMStateTuple(c, m) return m, new_state + + +class LayerNormLSTMCell(rnn_cell_impl.RNNCell): + """Long short-term memory unit (LSTM) recurrent network cell. + + The default non-peephole implementation is based on: + + http://www.bioinf.jku.at/publications/older/2604.pdf + + S. Hochreiter and J. Schmidhuber. + "Long Short-Term Memory". Neural Computation, 9(8):1735-1780, 1997. + + The peephole implementation is based on: + + https://research.google.com/pubs/archive/43905.pdf + + Hasim Sak, Andrew Senior, and Francoise Beaufays. + "Long short-term memory recurrent neural network architectures for + large scale acoustic modeling." INTERSPEECH, 2014. + + The class uses optional peep-hole connections, optional cell clipping, and + an optional projection layer. + + Layer normalization implementation is based on: + + https://arxiv.org/abs/1607.06450. + + "Layer Normalization" + Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton + + and is applied before the internal nonlinearities. + + """ + + def __init__(self, num_units, + use_peepholes=False, cell_clip=None, + initializer=None, num_proj=None, proj_clip=None, + forget_bias=1.0, + activation=None, layer_norm=False, + norm_gain=1.0, norm_shift=0.0, reuse=None): + """Initialize the parameters for an LSTM cell. + + Args: + num_units: int, The number of units in the LSTM cell + use_peepholes: bool, set True to enable diagonal/peephole connections. + cell_clip: (optional) A float value, if provided the cell state is clipped + by this value prior to the cell output activation. + initializer: (optional) The initializer to use for the weight and + projection matrices. + num_proj: (optional) int, The output dimensionality for the projection + matrices. If None, no projection is performed. + proj_clip: (optional) A float value. If `num_proj > 0` and `proj_clip` is + provided, then the projected values are clipped elementwise to within + `[-proj_clip, proj_clip]`. + forget_bias: Biases of the forget gate are initialized by default to 1 + in order to reduce the scale of forgetting at the beginning of + the training. Must set it manually to `0.0` when restoring from + CudnnLSTM trained checkpoints. + activation: Activation function of the inner states. Default: `tanh`. + layer_norm: If `True`, layer normalization will be applied. + norm_gain: float, The layer normalization gain initial value. If + `layer_norm` has been set to `False`, this argument will be ignored. + norm_shift: float, The layer normalization shift initial value. If + `layer_norm` has been set to `False`, this argument will be ignored. + reuse: (optional) Python boolean describing whether to reuse variables + in an existing scope. If not `True`, and the existing scope already has + the given variables, an error is raised. + + When restoring from CudnnLSTM-trained checkpoints, must use + CudnnCompatibleLSTMCell instead. + """ + super(LayerNormLSTMCell, self).__init__(_reuse=reuse) + + self._num_units = num_units + self._use_peepholes = use_peepholes + self._cell_clip = cell_clip + self._initializer = initializer + self._num_proj = num_proj + self._proj_clip = proj_clip + self._forget_bias = forget_bias + self._activation = activation or math_ops.tanh + self._layer_norm = layer_norm + self._norm_gain = norm_gain + self._norm_shift = norm_shift + + if num_proj: + self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_proj)) + self._output_size = num_proj + else: + self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_units)) + self._output_size = num_units + + @property + def state_size(self): + return self._state_size + + @property + def output_size(self): + return self._output_size + + + def _linear(self, + args, + output_size, + bias, + bias_initializer=None, + kernel_initializer=None, + layer_norm=False): + """Linear map: sum_i(args[i] * W[i]), where W[i] is a Variable. + + Args: + args: a 2D Tensor or a list of 2D, batch x n, Tensors. + output_size: int, second dimension of W[i]. + bias: boolean, whether to add a bias term or not. + bias_initializer: starting value to initialize the bias + (default is all zeros). + kernel_initializer: starting value to initialize the weight. + layer_norm: boolean, whether to apply layer normalization. + + + Returns: + A 2D Tensor with shape [batch x output_size] taking value + sum_i(args[i] * W[i]), where each W[i] is a newly created Variable. + + Raises: + ValueError: if some of the arguments has unspecified or wrong shape. + """ + if args is None or (nest.is_sequence(args) and not args): + raise ValueError("`args` must be specified") + if not nest.is_sequence(args): + args = [args] + + # Calculate the total size of arguments on dimension 1. + total_arg_size = 0 + shapes = [a.get_shape() for a in args] + for shape in shapes: + if shape.ndims != 2: + raise ValueError("linear is expecting 2D arguments: %s" % shapes) + if shape[1].value is None: + raise ValueError("linear expects shape[1] to be provided for shape %s, " + "but saw %s" % (shape, shape[1])) + else: + total_arg_size += shape[1].value + + dtype = [a.dtype for a in args][0] + + # Now the computation. + scope = vs.get_variable_scope() + with vs.variable_scope(scope) as outer_scope: + weights = vs.get_variable( + "kernel", [total_arg_size, output_size], + dtype=dtype, + initializer=kernel_initializer) + if len(args) == 1: + res = math_ops.matmul(args[0], weights) + else: + res = math_ops.matmul(array_ops.concat(args, 1), weights) + if not bias: + return res + with vs.variable_scope(outer_scope) as inner_scope: + inner_scope.set_partitioner(None) + if bias_initializer is None: + bias_initializer = init_ops.constant_initializer(0.0, dtype=dtype) + biases = vs.get_variable( + "bias", [output_size], + dtype=dtype, + initializer=bias_initializer) + + if not layer_norm: + res = nn_ops.bias_add(res, biases) + + return res + + def call(self, inputs, state): + """Run one step of LSTM. + + Args: + inputs: input Tensor, 2D, batch x num_units. + state: this must be a tuple of state Tensors, + both `2-D`, with column sizes `c_state` and + `m_state`. + + Returns: + A tuple containing: + + - A `2-D, [batch x output_dim]`, Tensor representing the output of the + LSTM after reading `inputs` when previous state was `state`. + Here output_dim is: + num_proj if num_proj was set, + num_units otherwise. + - Tensor(s) representing the new state of LSTM after reading `inputs` when + the previous state was `state`. Same type and shape(s) as `state`. + + Raises: + ValueError: If input size cannot be inferred from inputs via + static shape inference. + """ + num_proj = self._num_units if self._num_proj is None else self._num_proj + sigmoid = math_ops.sigmoid + + (c_prev, m_prev) = state + + dtype = inputs.dtype + input_size = inputs.get_shape().with_rank(2)[1] + if input_size.value is None: + raise ValueError("Could not infer input size from inputs.get_shape()[-1]") + scope = vs.get_variable_scope() + with vs.variable_scope(scope, initializer=self._initializer) as unit_scope: + + # i = input_gate, j = new_input, f = forget_gate, o = output_gate + lstm_matrix = self._linear([inputs, m_prev], 4 * self._num_units, bias=True, + bias_initializer=None, layer_norm=self._layer_norm) + i, j, f, o = array_ops.split( + value=lstm_matrix, num_or_size_splits=4, axis=1) + + if self._layer_norm: + i = _norm(self._norm_gain, self._norm_shift, i, "input") + j = _norm(self._norm_gain, self._norm_shift, j, "transform") + f = _norm(self._norm_gain, self._norm_shift, f, "forget") + o = _norm(self._norm_gain, self._norm_shift, o, "output") + + # Diagonal connections + if self._use_peepholes: + with vs.variable_scope(unit_scope) as projection_scope: + w_f_diag = vs.get_variable( + "w_f_diag", shape=[self._num_units], dtype=dtype) + w_i_diag = vs.get_variable( + "w_i_diag", shape=[self._num_units], dtype=dtype) + w_o_diag = vs.get_variable( + "w_o_diag", shape=[self._num_units], dtype=dtype) + + if self._use_peepholes: + c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev + + sigmoid(i + w_i_diag * c_prev) * self._activation(j)) + else: + c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * + self._activation(j)) + + if self._layer_norm: + c = _norm(self._norm_gain, self._norm_shift, c, "state") + + if self._cell_clip is not None: + # pylint: disable=invalid-unary-operand-type + c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip) + # pylint: enable=invalid-unary-operand-type + if self._use_peepholes: + m = sigmoid(o + w_o_diag * c) * self._activation(c) + else: + m = sigmoid(o) * self._activation(c) + + if self._num_proj is not None: + with vs.variable_scope("projection") as proj_scope: + m = self._linear(m, self._num_proj, bias=False) + + if self._proj_clip is not None: + # pylint: disable=invalid-unary-operand-type + m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip) + # pylint: enable=invalid-unary-operand-type + + new_state = (rnn_cell_impl.LSTMStateTuple(c, m)) + return m, new_state diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py index 01a5540121..91493302b1 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py @@ -33,7 +33,6 @@ from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops from tensorflow.python.ops import rnn_cell from tensorflow.python.ops import variables from tensorflow.python.ops import variable_scope as vs @@ -590,24 +589,6 @@ class AttentionWrapperTest(test.TestCase): expected_final_alignment_history=expected_final_alignment_history, name='testBahdanauMonotonicNormalized') - def testBahdanauMonotonicHard(self): - # Run attention mechanism with mode='hard', make sure probabilities are hard - b, t, u, d = 10, 20, 30, 40 - with self.test_session(use_gpu=True) as sess: - a = wrapper.BahdanauMonotonicAttention( - d, - random_ops.random_normal((b, t, u)), - mode='hard') - # Just feed previous attention as [1, 0, 0, ...] - attn = a(random_ops.random_normal((b, d)), array_ops.one_hot([0]*b, t)) - sess.run(variables.global_variables_initializer()) - attn_out = attn.eval() - # All values should be 0 or 1 - self.assertTrue(np.all(np.logical_or(attn_out == 0, attn_out == 1))) - # Sum of distributions should be 0 or 1 (0 when all p_choose_i are 0) - self.assertTrue(np.all(np.logical_or(attn_out.sum(axis=1) == 1, - attn_out.sum(axis=1) == 0))) - def testLuongMonotonicNotNormalized(self): create_attention_mechanism = functools.partial( wrapper.LuongMonotonicAttention, sigmoid_noise=1.0, @@ -714,24 +695,6 @@ class AttentionWrapperTest(test.TestCase): expected_final_alignment_history=expected_final_alignment_history, name='testMultiAttention') - def testLuongMonotonicHard(self): - # Run attention mechanism with mode='hard', make sure probabilities are hard - b, t, u, d = 10, 20, 30, 40 - with self.test_session(use_gpu=True) as sess: - a = wrapper.LuongMonotonicAttention( - d, - random_ops.random_normal((b, t, u)), - mode='hard') - # Just feed previous attention as [1, 0, 0, ...] - attn = a(random_ops.random_normal((b, d)), array_ops.one_hot([0]*b, t)) - sess.run(variables.global_variables_initializer()) - attn_out = attn.eval() - # All values should be 0 or 1 - self.assertTrue(np.all(np.logical_or(attn_out == 0, attn_out == 1))) - # Sum of distributions should be 0 or 1 (0 when all p_choose_i are 0) - self.assertTrue(np.all(np.logical_or(attn_out.sum(axis=1) == 1, - attn_out.sum(axis=1) == 0))) - def testMultiAttentionNoAttentionLayer(self): create_attention_mechanisms = ( wrapper.BahdanauAttention, wrapper.LuongAttention) diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index 87230e3355..0c64c9caf1 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -149,7 +149,7 @@ class _BaseAttentionMechanism(AttentionMechanism): memory_sequence_length=None, memory_layer=None, check_inner_dims_defined=True, - score_mask_value=float("-inf"), + score_mask_value=None, name=None): """Construct base AttentionMechanism class. @@ -187,9 +187,12 @@ class _BaseAttentionMechanism(AttentionMechanism): "memory_layer is not a Layer: %s" % type(memory_layer).__name__) self._query_layer = query_layer self._memory_layer = memory_layer + self.dtype = memory_layer.dtype if not callable(probability_fn): raise TypeError("probability_fn must be callable, saw type: %s" % type(probability_fn).__name__) + if score_mask_value is None: + score_mask_value = dtypes.as_dtype(self._memory_layer.dtype).as_numpy_dtype(-np.inf) self._probability_fn = lambda score, prev: ( # pylint:disable=g-long-lambda probability_fn( _maybe_mask_score(score, memory_sequence_length, score_mask_value), @@ -334,7 +337,8 @@ class LuongAttention(_BaseAttentionMechanism): memory_sequence_length=None, scale=False, probability_fn=None, - score_mask_value=float("-inf"), + score_mask_value=None, + dtype=None, name="LuongAttention"): """Construct the AttentionMechanism mechanism. @@ -353,17 +357,20 @@ class LuongAttention(_BaseAttentionMechanism): score_mask_value: (optional) The mask value for score before passing into `probability_fn`. The default is -inf. Only used if `memory_sequence_length` is not None. + dtype: The data type for the memory layer of the attention mechanism. name: Name to use when creating ops. """ # For LuongAttention, we only transform the memory layer; thus # num_units **must** match expected the query depth. if probability_fn is None: probability_fn = nn_ops.softmax + if dtype is None: + dtype = dtypes.float32 wrapped_probability_fn = lambda score, _: probability_fn(score) super(LuongAttention, self).__init__( query_layer=None, memory_layer=layers_core.Dense( - num_units, name="memory_layer", use_bias=False), + num_units, name="memory_layer", use_bias=False, dtype=dtype), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, @@ -475,7 +482,8 @@ class BahdanauAttention(_BaseAttentionMechanism): memory_sequence_length=None, normalize=False, probability_fn=None, - score_mask_value=float("-inf"), + score_mask_value=None, + dtype=None, name="BahdanauAttention"): """Construct the Attention mechanism. @@ -494,16 +502,20 @@ class BahdanauAttention(_BaseAttentionMechanism): score_mask_value: (optional): The mask value for score before passing into `probability_fn`. The default is -inf. Only used if `memory_sequence_length` is not None. + dtype: The data type for the query and memory layers of the attention + mechanism. name: Name to use when creating ops. """ if probability_fn is None: probability_fn = nn_ops.softmax + if dtype is None: + dtype = dtypes.float32 wrapped_probability_fn = lambda score, _: probability_fn(score) super(BahdanauAttention, self).__init__( query_layer=layers_core.Dense( - num_units, name="query_layer", use_bias=False), + num_units, name="query_layer", use_bias=False, dtype=dtype), memory_layer=layers_core.Dense( - num_units, name="memory_layer", use_bias=False), + num_units, name="memory_layer", use_bias=False, dtype=dtype), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, @@ -679,11 +691,7 @@ def _monotonic_probability_fn(score, previous_alignments, sigmoid_noise, mode, seed=seed) score += sigmoid_noise*noise # Compute "choosing" probabilities from the attention scores - if mode == "hard": - # When mode is hard, use a hard sigmoid - p_choose_i = math_ops.cast(score > 0, score.dtype) - else: - p_choose_i = math_ops.sigmoid(score) + p_choose_i = math_ops.sigmoid(score) # Convert from choosing probabilities to attention distribution return monotonic_attention(p_choose_i, previous_alignments, mode) @@ -738,11 +746,12 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism): memory, memory_sequence_length=None, normalize=False, - score_mask_value=float("-inf"), + score_mask_value=None, sigmoid_noise=0., sigmoid_noise_seed=None, score_bias_init=0., mode="parallel", + dtype=None, name="BahdanauMonotonicAttention"): """Construct the Attention mechanism. @@ -766,17 +775,21 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism): mode: How to compute the attention distribution. Must be one of 'recursive', 'parallel', or 'hard'. See the docstring for `tf.contrib.seq2seq.monotonic_attention` for more information. + dtype: The data type for the query and memory layers of the attention + mechanism. name: Name to use when creating ops. """ # Set up the monotonic probability fn with supplied parameters + if dtype is None: + dtype = dtypes.float32 wrapped_probability_fn = functools.partial( _monotonic_probability_fn, sigmoid_noise=sigmoid_noise, mode=mode, seed=sigmoid_noise_seed) super(BahdanauMonotonicAttention, self).__init__( query_layer=layers_core.Dense( - num_units, name="query_layer", use_bias=False), + num_units, name="query_layer", use_bias=False, dtype=dtype), memory_layer=layers_core.Dense( - num_units, name="memory_layer", use_bias=False), + num_units, name="memory_layer", use_bias=False, dtype=dtype), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, @@ -834,11 +847,12 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism): memory, memory_sequence_length=None, scale=False, - score_mask_value=float("-inf"), + score_mask_value=None, sigmoid_noise=0., sigmoid_noise_seed=None, score_bias_init=0., mode="parallel", + dtype=None, name="LuongMonotonicAttention"): """Construct the Attention mechanism. @@ -862,17 +876,21 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism): mode: How to compute the attention distribution. Must be one of 'recursive', 'parallel', or 'hard'. See the docstring for `tf.contrib.seq2seq.monotonic_attention` for more information. + dtype: The data type for the query and memory layers of the attention + mechanism. name: Name to use when creating ops. """ # Set up the monotonic probability fn with supplied parameters + if dtype is None: + dtype = dtypes.float32 wrapped_probability_fn = functools.partial( _monotonic_probability_fn, sigmoid_noise=sigmoid_noise, mode=mode, seed=sigmoid_noise_seed) super(LuongMonotonicAttention, self).__init__( query_layer=layers_core.Dense( - num_units, name="query_layer", use_bias=False), + num_units, name="query_layer", use_bias=False, dtype=dtype), memory_layer=layers_core.Dense( - num_units, name="memory_layer", use_bias=False), + num_units, name="memory_layer", use_bias=False, dtype=dtype), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, @@ -1123,8 +1141,9 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): % (len(attention_layer_sizes), len(attention_mechanisms))) self._attention_layers = tuple( layers_core.Dense( - attention_layer_size, name="attention_layer", use_bias=False) - for attention_layer_size in attention_layer_sizes) + attention_layer_size, name="attention_layer", use_bias=False, + dtype=attention_mechanisms[i].dtype) + for i, attention_layer_size in enumerate(attention_layer_sizes)) self._attention_layer_size = sum(attention_layer_sizes) else: self._attention_layers = None diff --git a/tensorflow/contrib/slim/BUILD b/tensorflow/contrib/slim/BUILD index c2f106c2b2..23c23af2f4 100644 --- a/tensorflow/contrib/slim/BUILD +++ b/tensorflow/contrib/slim/BUILD @@ -39,8 +39,6 @@ py_test( "//tensorflow/python:summary", "//tensorflow/python:training", "//tensorflow/python:variables", - "//tensorflow/python/debug:debug_data", - "//tensorflow/python/debug:hooks", "//third_party/py/numpy", ], ) diff --git a/tensorflow/contrib/slim/README.md b/tensorflow/contrib/slim/README.md index 0bfd0801d5..f7a85557ca 100644 --- a/tensorflow/contrib/slim/README.md +++ b/tensorflow/contrib/slim/README.md @@ -237,7 +237,7 @@ One way to reduce this code duplication would be via a `for` loop: ```python net = ... for i in range(3): - net = slim.conv2d(net, 256, [3, 3], scope='conv3_' % (i+1)) + net = slim.conv2d(net, 256, [3, 3], scope='conv3_%d' % (i+1)) net = slim.max_pool2d(net, [2, 2], scope='pool2') ``` diff --git a/tensorflow/contrib/slim/python/slim/evaluation.py b/tensorflow/contrib/slim/python/slim/evaluation.py index cdb720b36b..2d4b08df61 100644 --- a/tensorflow/contrib/slim/python/slim/evaluation.py +++ b/tensorflow/contrib/slim/python/slim/evaluation.py @@ -153,8 +153,7 @@ def evaluate_once(master, summary_op=_USE_DEFAULT, summary_op_feed_dict=None, variables_to_restore=None, - session_config=None, - hooks=None): + session_config=None): """Evaluates the model at the given checkpoint path. Args: @@ -178,8 +177,6 @@ def evaluate_once(master, slim.variables.GetVariablesToRestore() is used. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. - hooks: A list of additional `SessionRunHook` objects to pass during the - evaluation. Returns: The value of `final_op` or `None` if `final_op` is `None`. @@ -187,13 +184,11 @@ def evaluate_once(master, if summary_op == _USE_DEFAULT: summary_op = summary.merge_all() - all_hooks = [evaluation.StopAfterNEvalsHook(num_evals),] + hooks = [evaluation.StopAfterNEvalsHook(num_evals),] if summary_op is not None: - all_hooks.append(evaluation.SummaryAtEndHook( + hooks.append(evaluation.SummaryAtEndHook( log_dir=logdir, summary_op=summary_op, feed_dict=summary_op_feed_dict)) - if hooks is not None: - all_hooks.extend(hooks) saver = None if variables_to_restore is not None: @@ -208,7 +203,7 @@ def evaluate_once(master, feed_dict=eval_op_feed_dict, final_ops=final_op, final_ops_feed_dict=final_op_feed_dict, - hooks=all_hooks, + hooks=hooks, config=session_config) @@ -261,7 +256,7 @@ def evaluation_loop(master, configure the `Session`. If left as `None`, the default will be used. timeout: The maximum amount of time to wait between checkpoints. If left as `None`, then the process will wait indefinitely. - hooks: A list of additional `SessionRunHook` objects to pass during + hooks: A list of additional SessionRunHook objects to pass during repeated evaluations. Returns: diff --git a/tensorflow/contrib/slim/python/slim/evaluation_test.py b/tensorflow/contrib/slim/python/slim/evaluation_test.py index 870f504d10..d9e0f54b72 100644 --- a/tensorflow/contrib/slim/python/slim/evaluation_test.py +++ b/tensorflow/contrib/slim/python/slim/evaluation_test.py @@ -20,7 +20,6 @@ from __future__ import print_function import glob import os -import shutil import time import numpy as np @@ -30,8 +29,6 @@ from tensorflow.contrib.metrics.python.ops import metric_ops from tensorflow.contrib.slim.python.slim import evaluation from tensorflow.contrib.training.python.training import evaluation as evaluation_lib from tensorflow.core.protobuf import saver_pb2 -from tensorflow.python.debug.lib import debug_data -from tensorflow.python.debug.wrappers import hooks from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -233,7 +230,11 @@ class SingleEvaluationTest(test.TestCase): with self.assertRaises(errors.NotFoundError): evaluation.evaluate_once('', checkpoint_path, log_dir) - def _prepareCheckpoint(self, checkpoint_path): + def testRestoredModelPerformance(self): + checkpoint_path = os.path.join(self.get_temp_dir(), 'model.ckpt') + log_dir = os.path.join(self.get_temp_dir(), 'log_dir1/') + + # First, save out the current model to a checkpoint: init_op = control_flow_ops.group(variables.global_variables_initializer(), variables.local_variables_initializer()) saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V1) @@ -241,13 +242,6 @@ class SingleEvaluationTest(test.TestCase): sess.run(init_op) saver.save(sess, checkpoint_path) - def testRestoredModelPerformance(self): - checkpoint_path = os.path.join(self.get_temp_dir(), 'model.ckpt') - log_dir = os.path.join(self.get_temp_dir(), 'log_dir1/') - - # First, save out the current model to a checkpoint: - self._prepareCheckpoint(checkpoint_path) - # Next, determine the metric to evaluate: value_op, update_op = metric_ops.streaming_accuracy(self._predictions, self._labels) @@ -257,36 +251,6 @@ class SingleEvaluationTest(test.TestCase): '', checkpoint_path, log_dir, eval_op=update_op, final_op=value_op) self.assertAlmostEqual(accuracy_value, self._expected_accuracy) - def testAdditionalHooks(self): - checkpoint_path = os.path.join(self.get_temp_dir(), 'model.ckpt') - log_dir = os.path.join(self.get_temp_dir(), 'log_dir1/') - - # First, save out the current model to a checkpoint: - self._prepareCheckpoint(checkpoint_path) - - # Next, determine the metric to evaluate: - value_op, update_op = metric_ops.streaming_accuracy(self._predictions, - self._labels) - - dumping_root = os.path.join(self.get_temp_dir(), 'tfdbg_dump_dir') - dumping_hook = hooks.DumpingDebugHook(dumping_root, log_usage=False) - try: - # Run the evaluation and verify the results: - accuracy_value = evaluation.evaluate_once( - '', checkpoint_path, log_dir, eval_op=update_op, final_op=value_op, - hooks=[dumping_hook]) - self.assertAlmostEqual(accuracy_value, self._expected_accuracy) - - dump = debug_data.DebugDumpDir( - glob.glob(os.path.join(dumping_root, 'run_*'))[0]) - # Here we simply assert that the dumped data has been loaded and is - # non-empty. We do not care about the detailed model-internal tensors or - # their values. - self.assertTrue(dump.dumped_tensor_data) - finally: - if os.path.isdir(dumping_root): - shutil.rmtree(dumping_root) - if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py b/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py index b4fd2580c2..576444214d 100644 --- a/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py +++ b/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py @@ -386,7 +386,7 @@ class ResnetCompleteNetworkTest(test.TestCase): inputs, None, is_training=False, global_pool=False) sess.run(variables.global_variables_initializer()) self.assertAllClose( - output.eval(), expected.eval(), atol=1e-4, rtol=1e-4) + output.eval(), expected.eval(), atol=2e-4, rtol=1e-4) def testUnknownBatchSize(self): batch = 2 diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD index 3c60d2bb56..da23f1c380 100644 --- a/tensorflow/contrib/summary/BUILD +++ b/tensorflow/contrib/summary/BUILD @@ -26,18 +26,12 @@ py_test( deps = [ ":summary_ops", ":summary_test_util", - "//tensorflow/python:array_ops", "//tensorflow/python:errors", - "//tensorflow/python:framework", "//tensorflow/python:framework_test_lib", - "//tensorflow/python:math_ops", - "//tensorflow/python:ops", "//tensorflow/python:platform", - "//tensorflow/python:state_ops", "//tensorflow/python:training", "//tensorflow/python/eager:function", "//tensorflow/python/eager:test", - "@six_archive//:six", ], ) diff --git a/tensorflow/contrib/summary/summary.py b/tensorflow/contrib/summary/summary.py index 813e8b2b09..ca82ea094c 100644 --- a/tensorflow/contrib/summary/summary.py +++ b/tensorflow/contrib/summary/summary.py @@ -28,13 +28,11 @@ from __future__ import print_function from tensorflow.contrib.summary.summary_ops import all_summary_ops from tensorflow.contrib.summary.summary_ops import always_record_summaries from tensorflow.contrib.summary.summary_ops import audio -from tensorflow.contrib.summary.summary_ops import create_summary_db_writer from tensorflow.contrib.summary.summary_ops import create_summary_file_writer from tensorflow.contrib.summary.summary_ops import eval_dir from tensorflow.contrib.summary.summary_ops import generic from tensorflow.contrib.summary.summary_ops import histogram from tensorflow.contrib.summary.summary_ops import image -from tensorflow.contrib.summary.summary_ops import import_event from tensorflow.contrib.summary.summary_ops import never_record_summaries from tensorflow.contrib.summary.summary_ops import record_summaries_every_n_global_steps from tensorflow.contrib.summary.summary_ops import scalar diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py index f6be99f6ae..56e3198593 100644 --- a/tensorflow/contrib/summary/summary_ops.py +++ b/tensorflow/contrib/summary/summary_ops.py @@ -19,12 +19,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import getpass import os -import re -import time - -import six from tensorflow.contrib.summary import gen_summary_ops from tensorflow.python.eager import context @@ -47,10 +42,6 @@ _SHOULD_RECORD_SUMMARIES_NAME = "ShouldRecordSummaries" _SUMMARY_COLLECTION_NAME = "_SUMMARY_V2" _SUMMARY_WRITER_INIT_COLLECTION_NAME = "_SUMMARY_WRITER_V2" -_EXPERIMENT_NAME_PATTERNS = re.compile(r"^[^\x00-\x1F<>]{0,256}$") -_RUN_NAME_PATTERNS = re.compile(r"^[^\x00-\x1F<>]{0,512}$") -_USER_NAME_PATTERNS = re.compile(r"^[a-z]([-a-z0-9]{0,29}[a-z0-9])?$", re.I) - def should_record_summaries(): """Returns boolean Tensor which is true if summaries should be recorded.""" @@ -66,14 +57,12 @@ def should_record_summaries(): # TODO(apassos) consider how to handle local step here. @tf_contextlib.contextmanager -def record_summaries_every_n_global_steps(n, global_step=None): +def record_summaries_every_n_global_steps(n): """Sets the should_record_summaries Tensor to true if global_step % n == 0.""" - if global_step is None: - global_step = training_util.get_global_step() collection_ref = ops.get_collection_ref(_SHOULD_RECORD_SUMMARIES_NAME) old = collection_ref[:] with ops.device("cpu:0"): - collection_ref[:] = [math_ops.equal(global_step % n, 0)] + collection_ref[:] = [math_ops.equal(training_util.get_global_step() % n, 0)] yield collection_ref[:] = old @@ -141,8 +130,7 @@ def create_summary_file_writer(logdir, flush once the queue gets bigger than this. flush_millis: the largest interval between flushes. filename_suffix: optional suffix for the event file name. - name: Shared name for this SummaryWriter resource stored to default - Graph. + name: name for the summary writer. Returns: Either a summary writer or an empty object which can be used as a @@ -157,81 +145,14 @@ def create_summary_file_writer(logdir, flush_millis = constant_op.constant(2 * 60 * 1000) if filename_suffix is None: filename_suffix = constant_op.constant("") - return _make_summary_writer( - name, - gen_summary_ops.create_summary_file_writer, - logdir=logdir, - max_queue=max_queue, - flush_millis=flush_millis, - filename_suffix=filename_suffix) - - -def create_summary_db_writer(db_uri, - experiment_name=None, - run_name=None, - user_name=None, - name=None): - """Creates a summary database writer in the current context. - - This can be used to write tensors from the execution graph directly - to a database. Only SQLite is supported right now. This function - will create the schema if it doesn't exist. Entries in the Users, - Experiments, and Runs tables will be created automatically if they - don't already exist. - - Args: - db_uri: For example "file:/tmp/foo.sqlite". - experiment_name: Defaults to YYYY-MM-DD in local time if None. - Empty string means the Run will not be associated with an - Experiment. Can't contain ASCII control characters or <>. Case - sensitive. - run_name: Defaults to HH:MM:SS in local time if None. Empty string - means a Tag will not be associated with any Run. Can't contain - ASCII control characters or <>. Case sensitive. - user_name: Defaults to system username if None. Empty means the - Experiment will not be associated with a User. Must be valid as - both a DNS label and Linux username. - name: Shared name for this SummaryWriter resource stored to default - Graph. - - Returns: - A new SummaryWriter instance. - """ - with ops.device("cpu:0"): - if experiment_name is None: - experiment_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) - if run_name is None: - run_name = time.strftime("%H:%M:%S", time.localtime(time.time())) - if user_name is None: - user_name = getpass.getuser() - experiment_name = _cleanse_string( - "experiment_name", _EXPERIMENT_NAME_PATTERNS, experiment_name) - run_name = _cleanse_string("run_name", _RUN_NAME_PATTERNS, run_name) - user_name = _cleanse_string("user_name", _USER_NAME_PATTERNS, user_name) - return _make_summary_writer( - name, - gen_summary_ops.create_summary_db_writer, - db_uri=db_uri, - experiment_name=experiment_name, - run_name=run_name, - user_name=user_name) - - -def _make_summary_writer(name, factory, **kwargs): - resource = gen_summary_ops.summary_writer(shared_name=name) - # TODO(apassos): Consider doing this instead. - # node = factory(resource, **kwargs) - # if not context.in_eager_mode(): - # ops.get_default_session().run(node) - ops.add_to_collection(_SUMMARY_WRITER_INIT_COLLECTION_NAME, - factory(resource, **kwargs)) - return SummaryWriter(resource) - - -def _cleanse_string(name, pattern, value): - if isinstance(value, six.string_types) and pattern.search(value) is None: - raise ValueError("%s (%s) must match %s" % (name, value, pattern.pattern)) - return ops.convert_to_tensor(value, dtypes.string) + resource = gen_summary_ops.summary_writer(shared_name=name) + # TODO(apassos) ensure the initialization op runs when in graph mode; + # consider calling session.run here. + ops.add_to_collection( + _SUMMARY_WRITER_INIT_COLLECTION_NAME, + gen_summary_ops.create_summary_file_writer( + resource, logdir, max_queue, flush_millis, filename_suffix)) + return SummaryWriter(resource) def _nothing(): @@ -283,81 +204,68 @@ def summary_writer_function(name, tensor, function, family=None): return op -def generic(name, tensor, metadata=None, family=None, global_step=None): +def generic(name, tensor, metadata, family=None): """Writes a tensor summary if possible.""" - if global_step is None: - global_step = training_util.get_global_step() + def function(tag, scope): - if metadata is None: - serialized_metadata = constant_op.constant("") - elif hasattr(metadata, "SerializeToString"): - serialized_metadata = constant_op.constant(metadata.SerializeToString()) - else: - serialized_metadata = metadata # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_summary( context.context().summary_writer_resource, - global_step, array_ops.identity(tensor), - tag, serialized_metadata, name=scope) + training_util.get_global_step(), array_ops.identity(tensor), + tag, metadata, name=scope) return summary_writer_function(name, tensor, function, family=family) -def scalar(name, tensor, family=None, global_step=None): +def scalar(name, tensor, family=None): """Writes a scalar summary if possible.""" - if global_step is None: - global_step = training_util.get_global_step() + def function(tag, scope): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_scalar_summary( context.context().summary_writer_resource, - global_step, tag, array_ops.identity(tensor), + training_util.get_global_step(), tag, array_ops.identity(tensor), name=scope) return summary_writer_function(name, tensor, function, family=family) -def histogram(name, tensor, family=None, global_step=None): +def histogram(name, tensor, family=None): """Writes a histogram summary if possible.""" - if global_step is None: - global_step = training_util.get_global_step() + def function(tag, scope): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_histogram_summary( context.context().summary_writer_resource, - global_step, tag, array_ops.identity(tensor), + training_util.get_global_step(), tag, array_ops.identity(tensor), name=scope) return summary_writer_function(name, tensor, function, family=family) -def image(name, tensor, bad_color=None, max_images=3, family=None, - global_step=None): +def image(name, tensor, bad_color=None, max_images=3, family=None): """Writes an image summary if possible.""" - if global_step is None: - global_step = training_util.get_global_step() + def function(tag, scope): bad_color_ = (constant_op.constant([255, 0, 0, 255], dtype=dtypes.uint8) if bad_color is None else bad_color) # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_image_summary( context.context().summary_writer_resource, - global_step, tag, array_ops.identity(tensor), + training_util.get_global_step(), tag, array_ops.identity(tensor), bad_color_, max_images, name=scope) return summary_writer_function(name, tensor, function, family=family) -def audio(name, tensor, sample_rate, max_outputs, family=None, - global_step=None): +def audio(name, tensor, sample_rate, max_outputs, family=None): """Writes an audio summary if possible.""" - if global_step is None: - global_step = training_util.get_global_step() + def function(tag, scope): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_audio_summary( context.context().summary_writer_resource, - global_step, + training_util.get_global_step(), tag, array_ops.identity(tensor), sample_rate=sample_rate, @@ -367,26 +275,6 @@ def audio(name, tensor, sample_rate, max_outputs, family=None, return summary_writer_function(name, tensor, function, family=family) -def import_event(tensor, name=None): - """Writes a tf.Event binary proto. - - When using create_summary_db_writer(), this can be used alongside - tf.TFRecordReader to load event logs into the database. Please note - that this is lower level than the other summary functions and will - ignore any conditions set by methods like should_record_summaries(). - - Args: - tensor: A `Tensor` of type `string` containing a serialized `Event` - proto. - name: A name for the operation (optional). - - Returns: - The created Operation. - """ - return gen_summary_ops.import_event( - context.context().summary_writer_resource, tensor, name=name) - - def eval_dir(model_dir, name=None): """Construct a logdir for an eval summary writer.""" return os.path.join(model_dir, "eval" if not name else "eval_" + name) diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py index 6e1a746815..de7ae6ec27 100644 --- a/tensorflow/contrib/summary/summary_ops_test.py +++ b/tensorflow/contrib/summary/summary_ops_test.py @@ -17,22 +17,14 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import functools -import os import tempfile -import six -import sqlite3 - from tensorflow.contrib.summary import summary_ops from tensorflow.contrib.summary import summary_test_util from tensorflow.python.eager import function from tensorflow.python.eager import test -from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import state_ops from tensorflow.python.platform import gfile from tensorflow.python.training import training_util @@ -94,120 +86,6 @@ class TargetTest(test_util.TensorFlowTestCase): self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'scalar') - def testSummaryGlobalStep(self): - global_step = training_util.get_or_create_global_step() - logdir = tempfile.mkdtemp() - with summary_ops.create_summary_file_writer( - logdir, max_queue=0, - name='t2').as_default(), summary_ops.always_record_summaries(): - - summary_ops.scalar('scalar', 2.0, global_step=global_step) - - events = summary_test_util.events_from_file(logdir) - self.assertEqual(len(events), 2) - self.assertEqual(events[1].summary.value[0].tag, 'scalar') - - -class DbTest(test_util.TensorFlowTestCase): - - def setUp(self): - self.db_path = os.path.join(self.get_temp_dir(), 'DbTest.sqlite') - if os.path.exists(self.db_path): - os.unlink(self.db_path) - self.db = sqlite3.connect(self.db_path) - self.create_summary_db_writer = functools.partial( - summary_ops.create_summary_db_writer, - db_uri=self.db_path, - experiment_name='experiment', - run_name='run', - user_name='user') - - def tearDown(self): - self.db.close() - - def testIntegerSummaries(self): - step = training_util.create_global_step() - - def adder(x, y): - state_ops.assign_add(step, 1) - summary_ops.generic('x', x) - summary_ops.generic('y', y) - sum_ = x + y - summary_ops.generic('sum', sum_) - return sum_ - - with summary_ops.always_record_summaries(): - with self.create_summary_db_writer().as_default(): - self.assertEqual(5, adder(int64(2), int64(3)).numpy()) - - six.assertCountEqual(self, [1, 1, 1], - get_all(self.db, 'SELECT step FROM Tensors')) - six.assertCountEqual(self, ['x', 'y', 'sum'], - get_all(self.db, 'SELECT tag_name FROM Tags')) - x_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "x"') - y_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "y"') - sum_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "sum"') - - with summary_ops.always_record_summaries(): - with self.create_summary_db_writer().as_default(): - self.assertEqual(9, adder(int64(4), int64(5)).numpy()) - - six.assertCountEqual(self, [1, 1, 1, 2, 2, 2], - get_all(self.db, 'SELECT step FROM Tensors')) - six.assertCountEqual(self, [x_id, y_id, sum_id], - get_all(self.db, 'SELECT tag_id FROM Tags')) - self.assertEqual(2, get_tensor(self.db, x_id, 1)) - self.assertEqual(3, get_tensor(self.db, y_id, 1)) - self.assertEqual(5, get_tensor(self.db, sum_id, 1)) - self.assertEqual(4, get_tensor(self.db, x_id, 2)) - self.assertEqual(5, get_tensor(self.db, y_id, 2)) - self.assertEqual(9, get_tensor(self.db, sum_id, 2)) - six.assertCountEqual( - self, ['experiment'], - get_all(self.db, 'SELECT experiment_name FROM Experiments')) - six.assertCountEqual(self, ['run'], - get_all(self.db, 'SELECT run_name FROM Runs')) - six.assertCountEqual(self, ['user'], - get_all(self.db, 'SELECT user_name FROM Users')) - - def testBadExperimentName(self): - with self.assertRaises(ValueError): - self.create_summary_db_writer(experiment_name='\0') - - def testBadRunName(self): - with self.assertRaises(ValueError): - self.create_summary_db_writer(run_name='\0') - - def testBadUserName(self): - with self.assertRaises(ValueError): - self.create_summary_db_writer(user_name='-hi') - with self.assertRaises(ValueError): - self.create_summary_db_writer(user_name='hi-') - with self.assertRaises(ValueError): - self.create_summary_db_writer(user_name='@') - - -def get_one(db, q, *p): - return db.execute(q, p).fetchone()[0] - - -def get_all(db, q, *p): - return unroll(db.execute(q, p).fetchall()) - - -def get_tensor(db, tag_id, step): - return get_one( - db, 'SELECT tensor FROM Tensors WHERE tag_id = ? AND step = ?', tag_id, - step) - - -def int64(x): - return array_ops.constant(x, dtypes.int64) - - -def unroll(list_of_tuples): - return sum(list_of_tuples, ()) - if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/tensorboard/db/BUILD b/tensorflow/contrib/tensorboard/db/BUILD index 068e862650..d8bbf87d2c 100644 --- a/tensorflow/contrib/tensorboard/db/BUILD +++ b/tensorflow/contrib/tensorboard/db/BUILD @@ -45,12 +45,10 @@ cc_library( tf_cc_test( name = "summary_db_writer_test", - size = "small", srcs = ["summary_db_writer_test.cc"], deps = [ ":summary_db_writer", "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core/lib/db:sqlite", diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc index a26ad61660..df64e36305 100644 --- a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc +++ b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc @@ -15,12 +15,10 @@ limitations under the License. #include "tensorflow/contrib/tensorboard/db/summary_db_writer.h" #include "tensorflow/contrib/tensorboard/db/schema.h" -#include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/lib/db/sqlite.h" #include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/snappy.h" -#include "tensorflow/core/util/event.pb.h" namespace tensorflow { namespace { @@ -88,19 +86,13 @@ class SummaryDbWriter : public SummaryWriterInterface { TF_RETURN_IF_ERROR(BindTensor(t)); break; } - return insert_tensor_.StepAndReset(); + TF_RETURN_IF_ERROR(insert_tensor_.StepAndReset()); + return Status::OK(); } Status WriteEvent(std::unique_ptr e) override { - mutex_lock ml(mu_); - TF_RETURN_IF_ERROR(InitializeParents()); - if (e->what_case() == Event::WhatCase::kSummary) { - const Summary& summary = e->summary(); - for (int i = 0; i < summary.value_size(); ++i) { - TF_RETURN_IF_ERROR(WriteSummary(e.get(), summary.value(i))); - } - } - return Status::OK(); + // TODO(@jart): This will be used to load event logs. + return errors::Unimplemented("WriteEvent"); } Status WriteScalar(int64 global_step, Tensor t, const string& tag) override { @@ -255,24 +247,6 @@ class SummaryDbWriter : public SummaryWriterInterface { return Status::OK(); } - Status WriteSummary(const Event* e, const Summary::Value& summary) - EXCLUSIVE_LOCKS_REQUIRED(mu_) { - int64 tag_id; - TF_RETURN_IF_ERROR(GetTagId(run_id_, summary.tag(), &tag_id)); - insert_tensor_.BindInt(1, tag_id); - insert_tensor_.BindInt(2, e->step()); - insert_tensor_.BindDouble(3, e->wall_time()); - switch (summary.value_case()) { - case Summary::Value::ValueCase::kSimpleValue: - insert_tensor_.BindDouble(4, summary.simple_value()); - break; - default: - // TODO(@jart): Handle the rest. - return Status::OK(); - } - return insert_tensor_.StepAndReset(); - } - mutex mu_; Env* env_; std::shared_ptr db_ GUARDED_BY(mu_); diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc index c1af51e7b7..d32904f97c 100644 --- a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc +++ b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc @@ -14,19 +14,14 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/tensorboard/db/summary_db_writer.h" -#include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/db/sqlite.h" -#include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/test.h" -#include "tensorflow/core/util/event.pb.h" namespace tensorflow { namespace { -const float kTolerance = 1e-5; - Tensor MakeScalarInt64(int64 x) { Tensor t(DT_INT64, TensorShape({})); t.scalar()() = x; @@ -46,7 +41,7 @@ class FakeClockEnv : public EnvWrapper { class SummaryDbWriterTest : public ::testing::Test { protected: - void SetUp() override { db_ = Sqlite::Open(":memory:").ValueOrDie(); } + void SetUp() override { db_ = Sqlite::Open("file::memory:").ValueOrDie(); } void TearDown() override { if (writer_ != nullptr) { @@ -163,54 +158,5 @@ TEST_F(SummaryDbWriterTest, TensorsWritten_RowsGetInitialized) { QueryString("SELECT tensor FROM Tensors WHERE step = 2").empty()); } -TEST_F(SummaryDbWriterTest, EmptyParentNames_NoParentsCreated) { - TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "", "", &env_, &writer_)); - TF_ASSERT_OK(writer_->WriteTensor(1, MakeScalarInt64(123LL), "taggy", - "this-is-metaaa")); - TF_ASSERT_OK(writer_->Flush()); - ASSERT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Users")); - ASSERT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Experiments")); - ASSERT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Runs")); - ASSERT_EQ(1LL, QueryInt("SELECT COUNT(*) FROM Tags")); - ASSERT_EQ(1LL, QueryInt("SELECT COUNT(*) FROM Tensors")); -} - -TEST_F(SummaryDbWriterTest, WriteEvent_Scalar) { - TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "", "", &env_, &writer_)); - std::unique_ptr e{new Event}; - e->set_step(7); - e->set_wall_time(123.456); - Summary::Value* s = e->mutable_summary()->add_value(); - s->set_tag("π"); - s->set_simple_value(3.14f); - s = e->mutable_summary()->add_value(); - s->set_tag("φ"); - s->set_simple_value(1.61f); - TF_ASSERT_OK(writer_->WriteEvent(std::move(e))); - TF_ASSERT_OK(writer_->Flush()); - ASSERT_EQ(2LL, QueryInt("SELECT COUNT(*) FROM Tags")); - ASSERT_EQ(2LL, QueryInt("SELECT COUNT(*) FROM Tensors")); - int64 tag1_id = QueryInt("SELECT tag_id FROM Tags WHERE tag_name = 'π'"); - int64 tag2_id = QueryInt("SELECT tag_id FROM Tags WHERE tag_name = 'φ'"); - EXPECT_GT(tag1_id, 0LL); - EXPECT_GT(tag2_id, 0LL); - EXPECT_EQ(123.456, QueryDouble(strings::StrCat( - "SELECT computed_time FROM Tensors WHERE tag_id = ", - tag1_id, " AND step = 7"))); - EXPECT_EQ(123.456, QueryDouble(strings::StrCat( - "SELECT computed_time FROM Tensors WHERE tag_id = ", - tag2_id, " AND step = 7"))); - EXPECT_NEAR(3.14, - QueryDouble(strings::StrCat( - "SELECT tensor FROM Tensors WHERE tag_id = ", tag1_id, - " AND step = 7")), - kTolerance); // Summary::simple_value is float - EXPECT_NEAR(1.61, - QueryDouble(strings::StrCat( - "SELECT tensor FROM Tensors WHERE tag_id = ", tag2_id, - " AND step = 7")), - kTolerance); -} - } // namespace } // namespace tensorflow diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config.py b/tensorflow/contrib/tpu/python/tpu/tpu_config.py index 916b9b3082..3965c087a1 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_config.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_config.py @@ -45,7 +45,10 @@ class TPUConfig( is invoked once on each host. To be precise, with a global batch size `train_batch_size` in `TPUEstimator` constructor, the batch size for each shard is `train_batch_size` // #hosts. With Per-Core input pipeline - deployment, the shard batch size is `train_batch_size` // #cores. + deployment, the shard batch size is `train_batch_size` // #cores. Note + that this only works for single-host TPU training now (tracked in + b/67051042). For multi-host, please use Per-Core, i.e., `False` for + `per_host_input_for_training`. tpu_job_name: The name of the TPU job. Typically, this name is auto-inferred within TPUEstimator, however when using ClusterSpec propagation in more esoteric cluster configurations, you may need to specify the job name as a @@ -106,12 +109,3 @@ class RunConfig(run_config_lib.RunConfig): @property def tpu_config(self): return self._tpu_config - - def replace(self, **kwargs): - if 'tpu_config' not in kwargs: - return super(RunConfig, self).replace(**kwargs) - - tpu_config = kwargs.pop('tpu_config') - new_instance = super(RunConfig, self).replace(**kwargs) - new_instance._tpu_config = tpu_config # pylint: disable=protected-access - return new_instance diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 07877fcc76..060b3f9129 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -66,7 +66,7 @@ _CROSS_REPLICA_SUM_OP = 'CrossReplicaSum' _RESERVED_PARAMS_KEYS = [_BATCH_SIZE_KEY] # TODO(b/65703635): Flip the value and remove all dead code. -_WRAP_INPUT_FN_INTO_WHILE_LOOP = False +_WRAP_INPUT_FN_INTO_WHILE_LOOP = True def _create_global_step(graph): @@ -232,10 +232,8 @@ class _TPUContext(object): mode == model_fn_lib.ModeKeys.TRAIN else self._eval_batch_size) # On TPU - if self.is_input_sharded_per_core(): - return global_batch_size // self.num_cores - else: - return global_batch_size // self.num_hosts + return (global_batch_size // self.num_cores + if self.is_input_sharded_per_core() else global_batch_size) @property def batch_size_for_model_fn(self): @@ -537,15 +535,13 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook): session, self._dequeue_ops) def before_run(self, run_context): - iterations = run_context.session.run(self._iterations_per_loop_var) - - logging.info('Enqueue next (%d) batch(es) of data to infeed.', iterations) + logging.info('Enqueue next batch of data to infeed.') + iterations = run_context.session.run(self._iterations_per_loop_var) self._infeed_thd_controller.send_next_batch_signal(iterations) if self._dequeue_ops is not None: # TODO(xiejw): Refactor the outfeed dequeue into tf.while_loop. - logging.info( - 'Dequeue next (%d) batch(es) of data from outfeed.', iterations) + logging.info('Dequeue next batch of data from outfeed.') self._outfeed_thd_controller.send_next_batch_signal(iterations) def end(self, session): @@ -684,40 +680,6 @@ def generate_per_core_enqueue_ops_fn_for_host( return enqueue_ops_fn, (lambda: infeed_queue_holder['instance']) -def generate_per_host_enqueue_ops_fn_for_host( - ctx, input_fn, inputs_structure_recorder, batch_axis, device): - """Generates infeed enqueue ops for per-host input_fn on a single host.""" - infeed_queue_holder = {'instance': None} - - def enqueue_ops_fn(): - with ops.device(device): - num_cores_per_host = ctx.num_of_cores_per_host - inputs = input_fn() - if isinstance(inputs, tuple): - features, labels = inputs - else: - features, labels = inputs, None - inputs_structure_recorder.validate_and_record_structure( - features, labels) - unsharded_tensor_list = ( - inputs_structure_recorder.flatten_features_and_labels( - features, labels)) - - infeed_queue = tpu_feed.InfeedQueue( - tuple_types=[t.dtype for t in unsharded_tensor_list], - tuple_shapes=[t.shape for t in unsharded_tensor_list], - shard_dimensions=batch_axis) - infeed_queue_holder['instance'] = infeed_queue - infeed_queue.set_number_of_shards(num_cores_per_host) - - per_host_enqueue_ops = ( - infeed_queue.split_inputs_and_generate_enqueue_ops( - unsharded_tensor_list, - placement_function=lambda x: device)) - return per_host_enqueue_ops - return enqueue_ops_fn, (lambda: infeed_queue_holder['instance']) - - class _InputPipeline(object): """`_InputPipeline` handles invoking `input_fn` and piping to infeed queue. @@ -880,8 +842,6 @@ class _InputPipeline(object): # structure is recorded. enqueue_ops = self._invoke_input_fn_and_record_structure() - self._validate_input_pipeline() - def dequeue_fn(): """dequeue_fn is used by TPU to retrieve the tensors.""" values = self._infeed_queue.generate_dequeue_op() @@ -892,15 +852,15 @@ class _InputPipeline(object): return (enqueue_ops, dequeue_fn) def _invoke_input_fn_and_record_structure(self): - """Deploys the input pipeline and record input structure.""" - enqueue_ops = [] - infeed_queues = [] - num_hosts = self._ctx.num_hosts - tpu_host_placement_fn = self._ctx.tpu_host_placement_function if self._sharded_per_core: # Per-Core input pipeline deployment. + tpu_host_placement_fn = self._ctx.tpu_host_placement_function + enqueue_ops = [] + infeed_queues = [] + # Invoke input pipeline for each core and placed on the corresponding # host. + num_hosts = self._ctx.num_hosts for host_id in range(num_hosts): host_device = tpu_host_placement_fn(host_id=host_id) with ops.device(host_device): @@ -917,52 +877,48 @@ class _InputPipeline(object): # Infeed_queue_getter must be called after enqueue_ops_fn is called. infeed_queues.append(infeed_queue_getter()) + # infeed_queue is used to generate dequeue ops. The only thing it uses for + # dequeue is dtypes and types. So, any one can be used. Here, grab the + # first one. + self._infeed_queue = infeed_queues[0] + return enqueue_ops + else: - for host_id in range(num_hosts): - host_device = tpu_host_placement_fn(host_id=host_id) + # TODO(b/67051042): Extend this to multi-host support. + host_id = 0 + host_device = self._ctx.tpu_host_placement_function(host_id=host_id) + def enqueue_fn(): with ops.device(host_device): with ops.name_scope('input_pipeline_task%d' % (host_id)): - enqueue_ops_fn, infeed_queue_getter = ( - generate_per_host_enqueue_ops_fn_for_host( - self._ctx, self._input_fn, self._inputs_structure_recorder, - self._batch_axis, host_device)) - - if _WRAP_INPUT_FN_INTO_WHILE_LOOP: - enqueue_ops.append(_wrap_computation_in_while_loop( - device=host_device, op_fn=enqueue_ops_fn)) + inputs = self._input_fn() + if isinstance(inputs, tuple): + features, labels = inputs else: - enqueue_ops.append(enqueue_ops_fn()) - infeed_queues.append(infeed_queue_getter()) - # infeed_queue is used to generate dequeue ops. The only thing it uses for - # dequeue is dtypes and types. So, any one can be used. Here, grab the - # first one. - self._infeed_queue = infeed_queues[0] - return enqueue_ops - - def _validate_input_pipeline(self): - # Perform some sanity checks to log user friendly information. We should - # error out to give users better error message. But, if - # _WRAP_INPUT_FN_INTO_WHILE_LOOP is False (legacy behavior), we cannot break - # user code, so, log a warning. - if ops.get_default_graph().get_collection(ops.GraphKeys.QUEUE_RUNNERS): - err_msg = ('Input pipeline contains one or more QueueRunners. ' - 'These are not supported via TPUEstimator. You must convert ' - 'your input pipeline to use `tf.data` instead (see ' - 'https://www.tensorflow.org/programmers_guide/datasets for ' - 'instructions.') - if _WRAP_INPUT_FN_INTO_WHILE_LOOP: - raise RuntimeError(err_msg) - else: - logging.warn(err_msg) - elif ops.get_default_graph().get_collection(ops.GraphKeys.SUMMARIES): - # Queue Runner has summary Ops by default. So here we use elif to do - # necessary checks for Dataset input pipeline only. - err_msg = ('Input pipeline contains `tf.summary` operations. ' - 'These are not currently supported.') + features, labels = inputs, None + self._inputs_structure_recorder.validate_and_record_structure( + features, labels) + unsharded_tensor_list = ( + self._inputs_structure_recorder.flatten_features_and_labels( + features, labels)) + + self._infeed_queue = tpu_feed.InfeedQueue( + tuple_types=[t.dtype for t in unsharded_tensor_list], + tuple_shapes=[t.shape for t in unsharded_tensor_list], + shard_dimensions=self._batch_axis) + self._infeed_queue.set_number_of_shards(self._ctx.num_cores) + + def placement_fn(core_id): + return self._ctx.tpu_host_placement_function(core_id=core_id) + return ( + self._infeed_queue.split_inputs_and_generate_enqueue_ops( + unsharded_tensor_list, + placement_function=placement_fn)) + if _WRAP_INPUT_FN_INTO_WHILE_LOOP: - raise RuntimeError(err_msg) + return _wrap_computation_in_while_loop(device=host_device, + op_fn=enqueue_fn) else: - logging.warn(err_msg) + return enqueue_fn() class _ModelFnWrapper(object): @@ -1440,6 +1396,12 @@ class TPUEstimator(estimator_lib.Estimator): 'eval batch size {} must be divisible by number of shards {}' .format(eval_batch_size, config.tpu_config.num_shards)) + if (config.tpu_config.num_shards > 8 and + config.tpu_config.per_host_input_for_training): + # TODO(b/67051042): Support per_host input pipelines when num_shards > 8 + raise NotImplementedError( + 'Per-host input pipelines only available for num_shards <= 8') + # Verifies the model_fn signature according to Estimator framework. estimator_lib._verify_model_fn_args(model_fn, params) # pylint: disable=protected-access # We cannot store config and params in this constructor as parent diff --git a/tensorflow/contrib/training/python/training/hparam.py b/tensorflow/contrib/training/python/training/hparam.py index 7db625cdd5..391899b34f 100644 --- a/tensorflow/contrib/training/python/training/hparam.py +++ b/tensorflow/contrib/training/python/training/hparam.py @@ -18,7 +18,6 @@ from __future__ import division from __future__ import print_function import json -import numbers import re import six @@ -77,7 +76,7 @@ def _process_scalar_value(name, parse_fn, var_type, m_dict, values, function. Raises: - ValueError: If the name has already been used. + ValueError: If the name has already been sued. """ try: parsed_value = parse_fn(m_dict['val']) @@ -139,54 +138,6 @@ def _process_list_value(name, parse_fn, var_type, m_dict, values, _parse_fail(name, var_type, m_dict['vals'], values) -def _cast_to_type_if_compatible(name, param_type, value): - """Cast hparam to the provided type, if compatible. - - Args: - name: Name of the hparam to be cast. - param_type: The type of the hparam. - value: The value to be cast, if compatible. - - Returns: - The result of casting `value` to `param_type`. - - Raises: - ValueError: If the type of `value` is not compatible with param_type. - * If `param_type` is a string type, but `value` is not. - * If `param_type` is a boolean, but `value` is not, or vice versa. - * If `param_type` is an integer type, but `value` is not. - * If `param_type` is a float type, but `value` is not a numeric type. - """ - fail_msg = ( - "Could not cast hparam '%s' of type '%s' from value %r" % - (name, param_type, value)) - - # Some callers use None, for which we can't do any casting/checking. :( - if issubclass(param_type, type(None)): - return value - - # Avoid converting a non-string type to a string. - if (issubclass(param_type, (six.string_types, six.binary_type)) and - not isinstance(value, (six.string_types, six.binary_type))): - raise ValueError(fail_msg) - - # Avoid converting a number or string type to a boolean or vice versa. - if issubclass(param_type, bool) != isinstance(value, bool): - raise ValueError(fail_msg) - - # Avoid converting float to an integer (the reverse is fine). - if (issubclass(param_type, numbers.Integral) and - not isinstance(value, numbers.Integral)): - raise ValueError(fail_msg) - - # Avoid converting a non-numeric type to a numeric type. - if (issubclass(param_type, numbers.Number) and - not isinstance(value, numbers.Number)): - raise ValueError(fail_msg) - - return param_type(value) - - def parse_values(values, type_map): """Parses hyperparameter values from a string into a python map. @@ -487,18 +438,17 @@ class HParams(object): Raises: ValueError: If there is a type mismatch. """ - param_type, is_list = self._hparam_types[name] + _, is_list = self._hparam_types[name] if isinstance(value, list): if not is_list: raise ValueError( 'Must not pass a list for single-valued parameter: %s' % name) - setattr(self, name, [ - _cast_to_type_if_compatible(name, param_type, v) for v in value]) + setattr(self, name, value) else: if is_list: raise ValueError( 'Must pass a list for multi-valued parameter: %s.' % name) - setattr(self, name, _cast_to_type_if_compatible(name, param_type, value)) + setattr(self, name, value) def parse(self, values): """Override hyperparameter values, parsing new values from a string. diff --git a/tensorflow/contrib/training/python/training/hparam_test.py b/tensorflow/contrib/training/python/training/hparam_test.py index 949c262f5b..f54514cefd 100644 --- a/tensorflow/contrib/training/python/training/hparam_test.py +++ b/tensorflow/contrib/training/python/training/hparam_test.py @@ -318,42 +318,13 @@ class HParamsTest(test.TestCase): self.assertEqual(3.0, hparams.b) self.assertEqual('relu4', hparams.c_c) - def testSetHParamListNonListMismatch(self): + def testSetHParamTypeMismatch(self): hparams = hparam.HParams(a=1, b=[2.0, 3.0]) with self.assertRaisesRegexp(ValueError, r'Must not pass a list'): hparams.set_hparam('a', [1.0]) with self.assertRaisesRegexp(ValueError, r'Must pass a list'): hparams.set_hparam('b', 1.0) - def testSetHParamTypeMismatch(self): - hparams = hparam.HParams( - int_=1, str_='str', bool_=True, float_=1.1, list_int=[1, 2], none=None) - - with self.assertRaises(ValueError): - hparams.set_hparam('str_', 2.2) - - with self.assertRaises(ValueError): - hparams.set_hparam('int_', False) - - with self.assertRaises(ValueError): - hparams.set_hparam('bool_', 1) - - with self.assertRaises(ValueError): - hparams.set_hparam('int_', 2.2) - - with self.assertRaises(ValueError): - hparams.set_hparam('list_int', [2, 3.3]) - - with self.assertRaises(ValueError): - hparams.set_hparam('int_', '2') - - # Casting int to float is OK - hparams.set_hparam('float_', 1) - - # Getting stuck with NoneType :( - hparams.set_hparam('none', '1') - self.assertEqual('1', hparams.none) - def testNonProtoFails(self): with self.assertRaisesRegexp(AssertionError, ''): hparam.HParams(hparam_def=1) diff --git a/tensorflow/contrib/verbs/README.md b/tensorflow/contrib/verbs/README.md index da5f2b0223..dcb390b0a5 100644 --- a/tensorflow/contrib/verbs/README.md +++ b/tensorflow/contrib/verbs/README.md @@ -1,4 +1,4 @@ -## How to compile and use RDMA-enabled TensorFlow +## How to compile, use and configure RDMA-enabled TensorFlow 1. Follow the regular TF compilation instructions. During configure step, if you want ibverbs based RDMA support, answer yes to this question: ```Do you wish to build TensorFlow with VERBS-RDMA support [y/N]``` @@ -7,6 +7,18 @@ ```server = tf.train.Server(cluster, job_name="local", task_index=0, protocol='grpc+verbs') # default protocol is 'grpc'``` +3. RDMA configuration is done by setting the following environment variables: + * **RDMA_DEVICE**: The RDMA device name to be used. If not defined by user, a default device with an active port will be set if exists. + * **RDMA_DEVICE_PORT**: The port within the selected device. Not relevant if RDMA_DEVICE is not defined. If not defined by user, a default active port will be set if exists. + * **RDMA_GID_INDEX**: The GID index of the port. If not defined by user, a default suitable GID index will be set (RoCEV2 is favourable as default). + * **RDMA_QP_PKEY_INDEX**: The Pkey for the QP. If not defined by user, the default value is 0. + * **RDMA_QP_QUEUE_DEPTH**: TX/RX queue size for the QP. If not defined by user, the default value is 1024. + * **RDMA_QP_TIMEOUT**: The retransmission timeout for QPs. If not defined by user, the default value is 14. + * **RDMA_QP_RETRY_COUNT**: Number of retransmission for QPs. If not defined by user, the default value is 7. + * **RDMA_QP_SL**: Service level configuration for QOS and ECN, valid values are 0-7. If not defined by user, the default value is 0. + * **RDMA_QP_MTU**: MTU configuration for the QPs. If not defined by user, the default value is active MTU from query_port. + * **RDMA_TRAFFIC_CLASS**: Traffic class configuration for QP, in case of DSCP trust level QoS configuration. If not defined by user, the default value is 0. For more info see [HowTo Configure Trust state on Mellanox Adapters](https://community.mellanox.com/docs/DOC-2866). + ## Overview The design is based on TensorFlow r1.0. An RDMA path is added between servers for tensor transfer (weights, gradients, etc). The existing GRPC path remains and is responsible for "administrative" tasks, such as setting up the RDMA path, exchanging computation graphs, etc. diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc index 26e18b28aa..331943a3ef 100644 --- a/tensorflow/contrib/verbs/rdma.cc +++ b/tensorflow/contrib/verbs/rdma.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/contrib/verbs/rdma.h" #include +#include #include "tensorflow/contrib/verbs/verbs_util.h" #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/dma_helper.h" @@ -33,6 +34,8 @@ limitations under the License. namespace tensorflow { +#define RoCE_V2 "RoCE v2" + namespace { // hash name to 32-bit integer uint32_t NameHash(const string& name) { @@ -66,16 +69,337 @@ string MessageTypeToString(RdmaMessageType rmt) { } } // namespace -ibv_context* open_default_device() { +// Function to get environment variable +// Args: +// var_name - the name of the environmental variable +// Returns: +// string with it's value or empty string if not set +string get_env_var(char const* var_name) { + char const* var_temp = getenv(var_name); + + return (var_temp == NULL) ? string() : string(var_temp); +} + +// Function to open device +// Args: +// ibv_dev device to open +// Returns: +// context of the opened device +ibv_context* open_device(ibv_device* ibv_dev) { + ibv_context* context = ibv_open_device(ibv_dev); + + CHECK(context) << "Open context failed for " << ibv_get_device_name(ibv_dev); + return context; +} + +// Function to count the number of active ports for device +// Args: +// device - to check active ports +// Returns: +// number of active ports of the given device +int get_dev_active_port_count(ibv_device* device) { + ibv_device_attr device_att; + ibv_port_attr port_attr; + ibv_context* context = NULL; + int rc, port_index, active_ports = 0; + + context = ibv_open_device(device); + CHECK(context) << "Open context failed for " << ibv_get_device_name(device); + rc = ibv_query_device(context, &device_att); + CHECK(!rc) << "Failed to query the device"; + + for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) { + rc = ibv_query_port(context, port_index, &port_attr); + CHECK(!rc) << "Failed to query the port" << port_index; + if (port_attr.state == IBV_PORT_ACTIVE) { + active_ports++; + } + } + ibv_close_device(context); + return active_ports; +} + +// Function to set device. If RDMA_DEVICE not set, search for device with active +// port. +// Fails if more than one device with active port was found. +// Returns: +// device to use +ibv_device* set_device() { ibv_device** dev_list; - ibv_device* ib_dev; - dev_list = ibv_get_device_list(NULL); + int dev_num, device_index, device_to_open = 0; + int num_devs_with_active_port = 0; + string env_p_rdma_device, str_port_num; + + dev_list = ibv_get_device_list(&dev_num); CHECK(dev_list) << "No InfiniBand device found"; - ib_dev = dev_list[0]; - CHECK(ib_dev) << "No InfiniBand device found"; - ibv_context* context = ibv_open_device(ib_dev); - CHECK(context) << "Open context failed for " << ibv_get_device_name(ib_dev); - return context; + + env_p_rdma_device = get_env_var("RDMA_DEVICE"); + if (!env_p_rdma_device.empty()) { + for (device_index = 0; device_index < dev_num; device_index++) { + if (!env_p_rdma_device.compare( + ibv_get_device_name(dev_list[device_index]))) { + CHECK(get_dev_active_port_count(dev_list[device_index]) != 0) + << "Device " << ibv_get_device_name(dev_list[device_index]) + << " has no active ports"; + return dev_list[device_index]; + } + } + // check validity of input device + CHECK(false) << "The device " << env_p_rdma_device << " wasn't found"; + } else { + // set default device + str_port_num = get_env_var("RDMA_DEVICE_PORT"); + CHECK(str_port_num.empty()) + << "RDMA_DEVICE should be provided if RDMA_DEVICE_PORT is set by user"; + for (device_index = 0; device_index < dev_num; device_index++) { + // get port_num + if (get_dev_active_port_count(dev_list[device_index]) > 0) { + num_devs_with_active_port++; + CHECK(num_devs_with_active_port <= 1) << ". More than one device with " + "active port in the system. " + "Please enter RDMA_DEVICE"; + // found device with at least 1 active port + device_to_open = device_index; + } + } + CHECK(num_devs_with_active_port > 0) + << "There is no active port in the system"; + return dev_list[device_to_open]; + } + CHECK(false) << "No device was set!"; + return NULL; // never happens +} + +// Function to set port for device. +// If RDMA_DEVICE_PORT not set, first active port of the device will be set. +// Args: +// context of the device +// Returns: +// port to use +uint8_t set_port(ibv_context* context) { + uint8_t port_num = 0; //0 is illegal port number + string str_port_num; + ibv_device_attr device_att; + ibv_port_attr port_attr; + int rc, port_index; + + rc = ibv_query_device(context, &device_att); + CHECK(!rc) << "Failed to query the device\n"; + + str_port_num = get_env_var("RDMA_DEVICE_PORT"); + // user defined port + if (!str_port_num.empty()) { + port_num = stoi(str_port_num); + CHECK(port_num > 0) << "RDMA_DEVICE_PORT should be positive"; + CHECK(port_num <= device_att.phys_port_cnt) << "RDMA_DEVICE_PORT should be " + "less or equal to amount of " + "available ports"; + rc = ibv_query_port(context, port_num, &port_attr); + CHECK(!rc) << "Failed to query the port" << port_num; + // check if port id active + CHECK(port_attr.state == IBV_PORT_ACTIVE) + << "Selected RDMA_DEVICE_PORT is not active"; + } + // set default port + else { + for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) { + rc = ibv_query_port(context, port_index, &port_attr); + CHECK(!rc) << "Failed to query the port" << port_index; + if (port_attr.state == IBV_PORT_ACTIVE) { + port_num = port_index; + break; + } + } + CHECK_GT(port_num, 0) << "No active ports"; + } + return port_num; +} + +// Function read from sysfs file +// Args: +// dir - directory +// file - file +// buff - buffer for the result +// size - buffer size +// Returns: +// number of bytes were read or -1 if failed +int read_sysfs_file(const char* dir, const char* file, char* buf, size_t size) { + char* path; + int fd; + int len; + + if (asprintf(&path, "%s/%s", dir, file) < 0) return -1; + + fd = open(path, O_RDONLY); + if (fd < 0) { + free(path); + return -1; + } + + len = read(fd, buf, size); + + close(fd); + free(path); + + if (len > 0 && buf[len - 1] == '\n') buf[--len] = '\0'; + + return len; +} + +// Function to check if GID index support RoCE V2 +// Args: +// context - device context +// port_num - port number +// index - GID index +// Returns: +// if GID supports RoCE V2 - true, otherwise - false. +bool is_gid_type_roce_v2(ibv_context* context, uint8_t port_num, + uint8_t index) { + char name[32]; + char buff[41]; + + snprintf(name, sizeof(name), "ports/%d/gid_attrs/types/%d", port_num, index); + if (read_sysfs_file(context->device->ibdev_path, name, buff, sizeof(buff)) <= + 0) { + return false; + } + return !strcmp(buff, RoCE_V2); +} + +// Function to set GID index. +// If the port link is IB, no GID index should be selected. +// If Ethernet but RDMA_GID_INDEX not set gid index that supports +// RoCE V2 will be chosen(fails if more then one IP is configured) +// Args: +// context - device context +// port_num - port number +// Returns: +// GID index to use +uint8_t set_gid(uint8_t port_num, ibv_context* context) { + ibv_port_attr port_attr; + string gid_str; + int rc, i, gids_num = 0, v2_ip_num = 0; + union ibv_gid gid; + uint8_t gid_index = 0; + + rc = ibv_query_port(context, port_num, &port_attr); + CHECK(!rc) << "Failed to query the port" << port_num; + + for (i = 0; i < port_attr.gid_tbl_len; i++) { + rc = ibv_query_gid(context, port_num, i, &gid); + CHECK(!rc) << "Failed to query gid to port " << (int)port_num << " index " + << i; + if (gid.global.interface_id) { + gids_num++; + if (gid.global.subnet_prefix == 0 && + is_gid_type_roce_v2(context, port_num, i)) { + if (v2_ip_num == 0) { + // can be overwritten by RDMA_GID_INDEX later + gid_index = i; + } + v2_ip_num++; + } + } + } + switch (port_attr.link_layer) { + case(IBV_LINK_LAYER_ETHERNET) : + gid_str = get_env_var("RDMA_GID_INDEX"); + if (!gid_str.empty()) { + gid_index = stoi(gid_str); + CHECK(gid_index < gids_num) + << "RDMA_GID_INDEX should be less than GIDs amount" << gids_num; + } else { + CHECK(v2_ip_num <= 1) + << "More than one IP is available, please specify GID_INDEX"; + } + break; + case(IBV_LINK_LAYER_INFINIBAND) : // no need in GID index + break; + default: + LOG(INFO) << "Unknown port link layer. Currently supporting Ethernet and " + "InfiniBand only. "; + } + if (!is_gid_type_roce_v2(context, port_num, gid_index)) { + LOG(INFO) << "RoCE v2 is not configured for GID_INDEX " << (int)gid_index; + } + return gid_index; +} + +// set the default or environment value to the configuration parameter. +// Args: +// default_val- the default value for this parameter +// env_param- the environment parameter's name +// Returns: +// 32-bit value +uint32_t set_param(uint32_t default_val, const char* env_param) { + uint32_t val = default_val; + string val_s; + + val_s = get_env_var(env_param); + + if (!val_s.empty()) { + val = stoi(val_s); + } + return val; +} + +enum ibv_mtu set_mtu(uint8_t port_num, ibv_context* context) { + ibv_port_attr port_attr; + enum ibv_mtu mtu; + string mtu_s; + int rc, mtu_i; + + rc = ibv_query_port(context, port_num, &port_attr); + CHECK(!rc) << "Failed to query the port" << port_num; + + mtu_s = get_env_var("RDMA_MTU"); + + if (!mtu_s.empty()) { + mtu_i = stoi(mtu_s); + switch (mtu_i) { + case 256: + mtu = IBV_MTU_256; + break; + case 512: + mtu = IBV_MTU_512; + break; + case 1024: + mtu = IBV_MTU_1024; + break; + case 2048: + mtu = IBV_MTU_2048; + break; + case 4096: + mtu = IBV_MTU_4096; + break; + default: + CHECK(0) << "Error: MTU input value must be one of the following: 256, " + "512, 1024, 2048, 4096. MTU " << mtu << " is invalid\n"; + break; + } + CHECK(mtu < port_attr.active_mtu) + << "MTU configuration for the QPs is larger than active MTU"; + } else { + mtu = port_attr.active_mtu; + } + return mtu; +} + +RdmaParams params_init(ibv_context* context) { + RdmaParams params; + + params.port_num = set_port(context); + params.sgid_index = set_gid(params.port_num, context); + params.pkey_index = (uint8_t)set_param(PKEY_DEFAULT, "RDMA_PKEY"); + params.queue_depth = set_param(QUEUE_DEPTH_DEFAULT, "RDMA_QUEUE_DEPTH"); + params.timeout = (uint8_t)set_param(TIMEOUT_DEFAULT, "RDMA_TIMEOUT"); + params.retry_cnt = (uint8_t)set_param(RETRY_CNT_DEFAULT, "RDMA_RETRY_CNT"); + params.sl = (uint8_t)set_param(SL_DEFAULT, "RDMA_SL"); + CHECK(params.sl <= 7) << "SL value is " << (int)params.sl + << ". Valid values are 0-7."; + params.mtu = set_mtu(params.port_num, context); + params.traffic_class = set_param(TRAFFIC_CLASS, "RDMA_TRAFFIC_CLASS"); + return params; } ibv_pd* alloc_protection_domain(ibv_context* context) { @@ -85,7 +409,8 @@ ibv_pd* alloc_protection_domain(ibv_context* context) { } RdmaAdapter::RdmaAdapter(const WorkerEnv* worker_env) - : context_(open_default_device()), + : context_(open_device(set_device())), + params_(params_init(context_)), pd_(alloc_protection_domain(context_)), worker_env_(worker_env) { event_channel_ = ibv_create_comp_channel(context_); @@ -128,9 +453,9 @@ void RdmaAdapter::Process_CQ() { CHECK_GE(ne, 0); for (int i = 0; i < ne; ++i) { CHECK(wc_[i].status == IBV_WC_SUCCESS) - << "Failed status \n" - << ibv_wc_status_str(wc_[i].status) << " " << wc_[i].status << " " - << static_cast(wc_[i].wr_id) << " " << wc_[i].vendor_err; + << "Failed status \n" << ibv_wc_status_str(wc_[i].status) << " " + << wc_[i].status << " " << static_cast(wc_[i].wr_id) << " " + << wc_[i].vendor_err; if (wc_[i].opcode == IBV_WC_RECV_RDMA_WITH_IMM) { RdmaChannel* rc = reinterpret_cast(wc_[i].wr_id); // put back a recv wr. @@ -242,8 +567,8 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, memset(&attr, 0, sizeof(ibv_qp_init_attr)); attr.send_cq = adapter_->cq_; attr.recv_cq = adapter_->cq_; - attr.cap.max_send_wr = RdmaAdapter::MAX_CONCURRENT_WRITES; - attr.cap.max_recv_wr = RdmaAdapter::MAX_CONCURRENT_WRITES; + attr.cap.max_send_wr = adapter_->params_.queue_depth; + attr.cap.max_recv_wr = adapter_->params_.queue_depth; attr.cap.max_send_sge = 1; attr.cap.max_recv_sge = 1; attr.qp_type = IBV_QPT_RC; @@ -257,8 +582,8 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, struct ibv_qp_attr attr; memset(&attr, 0, sizeof(ibv_qp_attr)); attr.qp_state = IBV_QPS_INIT; - attr.pkey_index = 0; - attr.port_num = 1; + attr.pkey_index = adapter_->params_.pkey_index; + attr.port_num = adapter_->params_.port_num; attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE; int mask = @@ -269,13 +594,15 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, // Local address { struct ibv_port_attr attr; - CHECK(!ibv_query_port(adapter_->context_, (uint8_t)1, &attr)) + CHECK( + !ibv_query_port(adapter_->context_, adapter_->params_.port_num, &attr)) << "Query port"; self_.lid = attr.lid; self_.qpn = qp_->qp_num; self_.psn = static_cast(random::New64()) & 0xffffff; union ibv_gid gid; - CHECK(!ibv_query_gid(adapter_->context_, (uint8_t)1, 0, &gid)) + CHECK(!ibv_query_gid(adapter_->context_, adapter_->params_.port_num, + adapter_->params_.sgid_index, &gid)) << "Query gid"; self_.snp = gid.global.subnet_prefix; self_.iid = gid.global.interface_id; @@ -284,7 +611,7 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, // create message and ack buffers, then initialize the tables. { const string buffer_names[] = {"tx_message_buffer", "rx_message_buffer", - "tx_ack_buffer", "rx_ack_buffer"}; + "tx_ack_buffer", "rx_ack_buffer"}; tx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[0]); rx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[1]); tx_ack_buffer_ = new RdmaAckBuffer(this, buffer_names[2]); @@ -345,7 +672,7 @@ void RdmaChannel::SetRemoteAddress(const RdmaAddress& ra, bool override) { void RdmaChannel::Recv() { struct ibv_recv_wr wr; memset(&wr, 0, sizeof(wr)); - wr.wr_id = (uint64_t)this; + wr.wr_id = (uint64_t) this; struct ibv_recv_wr* bad_wr; CHECK(!ibv_post_recv(qp_, &wr, &bad_wr)) << "Failed to post recv"; } @@ -479,11 +806,9 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) { struct ibv_qp_attr attr; memset(&attr, 0, sizeof(ibv_qp_attr)); attr.qp_state = IBV_QPS_RTR; - struct ibv_port_attr port_attr; - CHECK(!ibv_query_port(adapter_->context_, (uint8_t)1, &port_attr)) - << "Query port failed"; + // This assumes both QP's ports are configured with the same MTU - attr.path_mtu = port_attr.active_mtu; + attr.path_mtu = adapter_->params_.mtu; attr.dest_qp_num = remoteAddr.qpn; attr.rq_psn = remoteAddr.psn; attr.max_dest_rd_atomic = 1; @@ -494,30 +819,32 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) { attr.ah_attr.grh.flow_label = 0; attr.ah_attr.grh.hop_limit = 255; attr.ah_attr.dlid = remoteAddr.lid; - attr.ah_attr.sl = 0; + attr.ah_attr.sl = adapter_->params_.sl; attr.ah_attr.src_path_bits = 0; - attr.ah_attr.port_num = 1; + attr.ah_attr.port_num = adapter_->params_.port_num; + attr.ah_attr.grh.sgid_index = adapter_->params_.sgid_index; + attr.ah_attr.grh.traffic_class = adapter_->params_.traffic_class; int r; - CHECK(!(r = ibv_modify_qp(qp_, &attr, - IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | - IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | - IBV_QP_MAX_DEST_RD_ATOMIC | - IBV_QP_MIN_RNR_TIMER))) + CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_AV | + IBV_QP_PATH_MTU | + IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | + IBV_QP_MAX_DEST_RD_ATOMIC | + IBV_QP_MIN_RNR_TIMER))) << "QP to Ready to Receive " << r; memset(&attr, 0, sizeof(ibv_qp_attr)); attr.qp_state = IBV_QPS_RTS; attr.sq_psn = self_.psn; - attr.timeout = 14; - attr.retry_cnt = 7; + attr.timeout = adapter_->params_.timeout; + attr.retry_cnt = adapter_->params_.retry_cnt; attr.rnr_retry = 7; /* infinite */ attr.max_rd_atomic = 1; - CHECK(!(r = ibv_modify_qp(qp_, &attr, - IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | - IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | - IBV_QP_MAX_QP_RD_ATOMIC))) + CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_TIMEOUT | + IBV_QP_RETRY_CNT | + IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | + IBV_QP_MAX_QP_RD_ATOMIC))) << "QP to Ready to Send " << r; connected_ = true; @@ -604,7 +931,7 @@ void RdmaBuffer::Write(uint32_t imm_data, size_t buffer_size) { struct ibv_send_wr wr; memset(&wr, 0, sizeof(wr)); - wr.wr_id = (uint64_t)this; + wr.wr_id = (uint64_t) this; wr.sg_list = &list; wr.num_sge = 1; wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM; @@ -699,9 +1026,9 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback( TensorProto proto; if (src_dev->tensorflow_gpu_device_info() && (!send_args.alloc_attrs.on_host())) { - CHECK(send_args.device_context) - << "send dev name: " << src_dev->name() - << " gpu_info: " << src_dev->tensorflow_gpu_device_info(); + CHECK(send_args.device_context) << "send dev name: " << src_dev->name() + << " gpu_info: " + << src_dev->tensorflow_gpu_device_info(); if (can_memcpy) { AllocatorAttributes host_alloc_attrs; @@ -727,8 +1054,8 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback( // aync instead GPUUtil::SetProtoFromGPU( in, src_dev, send_args.device_context, &proto, is_dead, - [this, proto, buffer_size, key, in, step_id, key_with_step_id, - is_dead, send_args, recv_args](const Status& s) mutable { + [this, proto, buffer_size, key, in, step_id, key_with_step_id, + is_dead, send_args, recv_args](const Status& s) mutable { CHECK(s.ok()) << "copy proto from gpu sync"; auto tensor_bytes = proto.ByteSize(); buffer_size += tensor_bytes; diff --git a/tensorflow/contrib/verbs/rdma.h b/tensorflow/contrib/verbs/rdma.h index e1e07db776..52d92a7c5b 100644 --- a/tensorflow/contrib/verbs/rdma.h +++ b/tensorflow/contrib/verbs/rdma.h @@ -36,7 +36,24 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" namespace tensorflow { - +#define PKEY_DEFAULT 0 +#define QUEUE_DEPTH_DEFAULT 1024 +#define TIMEOUT_DEFAULT 14 +#define RETRY_CNT_DEFAULT 7 +#define SL_DEFAULT 0 +#define TRAFFIC_CLASS 0 + +struct RdmaParams { + uint8_t port_num; + uint8_t sgid_index; + uint8_t pkey_index; + uint32_t queue_depth; + uint8_t timeout; + uint8_t retry_cnt; + uint8_t sl; + enum ibv_mtu mtu; + uint8_t traffic_class; +}; // structure to save the address of remote channels. struct RdmaAddress { uint32_t lid; @@ -50,9 +67,20 @@ struct RemoteMR { uint64_t remote_addr; uint32_t rkey; }; -enum BufferStatus { none, idle, busy }; -enum Location { local, remote }; -enum BufferType { ACK, MESSAGE, TENSOR }; +enum BufferStatus { + none, + idle, + busy +}; +enum Location { + local, + remote +}; +enum BufferType { + ACK, + MESSAGE, + TENSOR +}; enum RdmaMessageType { RDMA_MESSAGE_ACK, RDMA_MESSAGE_BUFFER_IDLE, @@ -84,6 +112,8 @@ class RdmaAdapter { protected: static const int MAX_CONCURRENT_WRITES = 1000; ibv_context* context_; + // RDMA configuration parameters + RdmaParams params_; // ibverbs protection domain ibv_pd* pd_; // Completion event channel, to wait for work completions @@ -183,7 +213,7 @@ class RdmaBuffer { } void FreeBuffer(); void EnqueueItem(string Item); - virtual void SendNextItem(){}; + virtual void SendNextItem() {}; void CreateCPUBuffer(size_t size, bool lock = true); void SetRemoteMR(RemoteMR rmi, bool override); uint32_t LookupBufferIndex(const string& buffer_name) { diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 18d69fceb3..9530af637e 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2710,6 +2710,7 @@ tf_cc_test_mkl( srcs = [ "graph/mkl_layout_pass_test.cc", "graph/mkl_tfconversion_pass_test.cc", + "util/mkl_util_test.cc", ], linkstatic = 1, deps = [ diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc index 6399b8cf55..38fe247521 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.cc +++ b/tensorflow/core/common_runtime/bfc_allocator.cc @@ -296,13 +296,12 @@ void* BFCAllocator::FindChunkPtr(BinNum bin_num, size_t rounded_bytes, // it from the free bin structure prior to using. RemoveFreeChunkIterFromBin(&b->free_chunks, citer); - // If we can break the size of the chunk into two reasonably large - // pieces, do so. In any case don't waste more than - // kMaxInternalFragmentation bytes on padding this alloc. - const int64 kMaxInternalFragmentation = 128 << 20; // 128mb - if (chunk->size >= rounded_bytes * 2 || - static_cast(chunk->size) - rounded_bytes >= - kMaxInternalFragmentation) { + // If we can break the size of the chunk into two reasonably + // large pieces, do so. + // + // TODO(vrv): What should be the criteria when deciding when + // to split? + if (chunk->size >= rounded_bytes * 2) { SplitChunk(h, rounded_bytes); chunk = ChunkFromHandle(h); // Update chunk pointer in case it moved } diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index 53e80b1ee3..63b74e8dbf 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -81,7 +81,7 @@ class MklCPUAllocator : public Allocator { } #if defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE) if (user_val > max_mem_bytes) { - LOG(WARNING) << "The user specifed a memory limit " << kMaxLimitStr + LOG(WARNING) << "The user specified a memory limit " << kMaxLimitStr << "=" << user_val << " greater than available physical memory: " << max_mem_bytes diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.h b/tensorflow/core/common_runtime/sycl/sycl_device.h index 9caa076c72..cc272d156e 100644 --- a/tensorflow/core/common_runtime/sycl/sycl_device.h +++ b/tensorflow/core/common_runtime/sycl/sycl_device.h @@ -46,8 +46,8 @@ class GSYCLInterface { if (!found_device) { // Currently Intel GPU is not supported - LOG(WARNING) << "No OpenCL GPU found that is supported by ComputeCpp, " - "trying OpenCL CPU"; + LOG(WARNING) << "No OpenCL GPU found that is supported by " + << "ComputeCpp/triSYCL, trying OpenCL CPU"; } for (const auto& device : device_list) { @@ -58,10 +58,24 @@ class GSYCLInterface { } } + if (!found_device) { + LOG(WARNING) << "No OpenCL CPU found that is supported by " + << "ComputeCpp/triSYCL, checking for host sycl device"; + } + + for (const auto& device : device_list) { + // triSYCL only supports the host device for now + if (device.is_host()) { + LOG(WARNING) << "Found SYCL host device"; + AddDevice(device); + found_device = true; + } + } + if (!found_device) { // Currently Intel GPU is not supported - LOG(FATAL) - << "No OpenCL GPU nor CPU found that is supported by ComputeCpp"; + LOG(FATAL) << "No SYCL host and no OpenCL GPU nor CPU" + << " supported by ComputeCPP/triSYCL was found"; } else { LOG(INFO) << "Found following OpenCL devices:"; for (int i = 0; i < device_list.size(); i++) { diff --git a/tensorflow/core/framework/bfloat16.cc b/tensorflow/core/framework/bfloat16.cc index 1a6f355c77..a5ac0e1a8d 100644 --- a/tensorflow/core/framework/bfloat16.cc +++ b/tensorflow/core/framework/bfloat16.cc @@ -18,24 +18,32 @@ limitations under the License. namespace tensorflow { void FloatToBFloat16(const float* src, bfloat16* dst, int64 size) { - for (int64 i = 0; i < size; ++i) { - dst[i] = bfloat16(src[i]); - } + const uint16_t* p = reinterpret_cast(src); + uint16_t* q = reinterpret_cast(dst); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + for (; size != 0; p += 2, q++, size--) { + *q = p[0]; + } +#else + for (; size != 0; p += 2, q++, size--) { + *q = p[1]; + } +#endif } void BFloat16ToFloat(const bfloat16* src, float* dst, int64 size) { const uint16_t* p = reinterpret_cast(src); uint16_t* q = reinterpret_cast(dst); #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - for (; size != 0; p++, q += 2, size--) { - q[0] = *p; - q[1] = 0; + for (; size != 0; p++, q += 2, size--) { + q[0] = *p; + q[1] = 0; } -#else - for (; size != 0; p++, q += 2, size--) { - q[0] = 0; - q[1] = *p; - } +#else + for (; size != 0; p++, q += 2, size--) { + q[0] = 0; + q[1] = *p; + } #endif } diff --git a/tensorflow/core/framework/bfloat16_test.cc b/tensorflow/core/framework/bfloat16_test.cc index a25b764ea2..af4e6a4411 100644 --- a/tensorflow/core/framework/bfloat16_test.cc +++ b/tensorflow/core/framework/bfloat16_test.cc @@ -15,7 +15,6 @@ limitations under the License. #include "tensorflow/core/framework/bfloat16.h" -#include "tensorflow/core/lib/core/casts.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" @@ -28,97 +27,6 @@ TEST(Bfloat16Test, Simple) { EXPECT_EQ(0x4140, a.value); } -float BinaryToFloat(uint32_t sign, uint32_t exponent, uint32_t high_mantissa, - uint32_t low_mantissa) { - return bit_cast((sign << 31) + (exponent << 23) + - (high_mantissa << 16) + low_mantissa); -} - -struct Bfloat16TestParam { - float input; - float expected; -}; - -class Bfloat16Test : public ::testing::Test, - public ::testing::WithParamInterface {}; - -TEST_P(Bfloat16Test, RoundOrTruncate) { - bfloat16 a(GetParam().input); - if (std::isnan(GetParam().input)) { - EXPECT_TRUE(std::isnan(float(a))); - return; - } - EXPECT_EQ(GetParam().expected, float(a)); -} - -INSTANTIATE_TEST_CASE_P( - Bfloat16Test_Instantiation, Bfloat16Test, - ::testing::Values( - // More than half. - Bfloat16TestParam{ - BinaryToFloat(0, 0b10000000, 0b1001000, 0b1111010111000011), - BinaryToFloat(0, 0b10000000, 0b1001001, 0b0000000000000000)}, - - Bfloat16TestParam{ - BinaryToFloat(1, 0b10000000, 0b1001000, 0b1111010111000011), - BinaryToFloat(1, 0b10000000, 0b1001001, 0b0000000000000000)}, - - // Exact half. - Bfloat16TestParam{ - BinaryToFloat(0, 0b10000000, 0b1001000, 0b1000000000000000), - BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, - - // NaN stays at NaN. - Bfloat16TestParam{ - BinaryToFloat(0, 0b11111111, 0b0000000, 0b0000000000000001), - BinaryToFloat(0, 0b11111111, 0b1000000, 0b0000000000000000)}, - - // NaN stays at NaN -- no exponents overflow. - Bfloat16TestParam{ - BinaryToFloat(0, 0b11111111, 0b1111111, 0b1111111111111111), - BinaryToFloat(0, 0b11111111, 0b1000000, 0b0000000000000000)}, - - // More than half, round to an odd number. - Bfloat16TestParam{ - BinaryToFloat(1, 0b10000000, 0b1001000, 0b1100000000000000), - BinaryToFloat(1, 0b10000000, 0b1001001, 0b0000000000000000)}, - - // Less than half, truncate. - Bfloat16TestParam{ - BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000), - BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, - - // Less than half, truncate. - Bfloat16TestParam{ - BinaryToFloat(0, 0b10000000, 0b1001000, 0b0100000000000000), - BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, - - // Exact at half, but result is already even. - Bfloat16TestParam{ - BinaryToFloat(0, 0b10000000, 0b1001000, 0b1000000000000000), - BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, - - // Denormal values. - Bfloat16TestParam{ - BinaryToFloat(0, 0b00000000, 0b1001000, 0b1000000000000000), - BinaryToFloat(0, 0b00000000, 0b1001000, 0b0000000000000000)}, - Bfloat16TestParam{ - BinaryToFloat(0, 0b00000000, 0b1111111, 0b1100000000000000), - BinaryToFloat(0, 0b00000001, 0b0000000, 0b0000000000000000)})); -TEST(Bfloat16Test, RoundWithFractionOverflow) { - // Still works with fraction overflow -- round to 4./ - // - // Input 3.9960938: - // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) - // 0 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1100000000000000 - // - // Should round to 4.0: - // Sign | Exp (8 bit) | Frac (first 7 bit) - // 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 - bfloat16 a(3.9960938f); - EXPECT_EQ(4.0, float(a)); -} - TEST(Bfloat16Test, Conversion) { float a[100]; for (int i = 0; i < 100; ++i) { diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h index d005de2af1..a630bee38d 100644 --- a/tensorflow/core/framework/numeric_types.h +++ b/tensorflow/core/framework/numeric_types.h @@ -44,262 +44,29 @@ typedef Eigen::QUInt16 quint16; // see framework/bfloat16.h for description. struct bfloat16 { EIGEN_DEVICE_FUNC bfloat16() {} - - explicit EIGEN_DEVICE_FUNC bfloat16(float v) { - uint32_t input; - memcpy(&input, &v, sizeof(uint32_t)); - - if ((~input & 0x7f800000) == 0 && (input & 0x007fffff) != 0) { - // If the value is a NaN, squash it to a qNaN with msb of fraction set, - // this makes sure after truncation we don't end up with an inf. - // - // qNaN magic: All exponent bits set + most significant bit of fraction - // set. - value = 0x7fc0; - } else { - // Fast rounding algorithm that rounds a half value to nearest even. This - // reduces expected error when we convert a large number of floats. Here - // is how it works: - // - // Definitions: - // To convert a float 32 to bfloat16, a float 32 can be viewed as 32 bits - // with the following tags: - // - // Sign | Exp (8 bits) | Frac (23 bits) - // S EEEEEEEE FFFFFFLRTTTTTTTTTTTTTTT - // - // S: Sign bit. - // E: Exponent bits. - // F: First 6 bits of fraction. - // L: Least significant bit of resulting bfloat16 if we truncate away the - // rest of the float32. This is also the 7th bit of fraction - // R: Rounding bit, 8th bit of fraction. - // T: Sticky bits, rest of fraction, 15 bits. - // - // To round half to nearest even, there are 3 cases where we want to round - // down (simply truncate the result of the bits away, which consists of - // rounding bit and sticky bits) and two cases where we want to round up - // (truncate then add one to the result). - // - // The fast converting algorithm simply adds lsb (L) to 0x7fff (15 bits of - // 1s) as the rounding bias, adds the rounding bias to the input, then - // truncates the last 16 bits away. - // - // To understand how it works, we can analyze this algorithm case by case: - // - // 1. L = 0, R = 0: - // Expect: round down, this is less than half value. - // - // Algorithm: - // - Rounding bias: 0x7fff + 0 = 0x7fff - // - Adding rounding bias to input may create any carry, depending on - // whether there is any value set to 1 in T bits. - // - R may be set to 1 if there is a carry. - // - L remains 0. - // - Note that this case also handles Inf and -Inf, where all fraction - // bits, including L, R and Ts are all 0. The output remains Inf after - // this algorithm. - // - // 2. L = 1, R = 0: - // Expect: round down, this is less than half value. - // - // Algorithm: - // - Rounding bias: 0x7fff + 1 = 0x8000 - // - Adding rounding bias to input doesn't change sticky bits but - // adds 1 to rounding bit. - // - L remains 1. - // - // 3. L = 0, R = 1, all of T are 0: - // Expect: round down, this is exactly at half, the result is already - // even (L=0). - // - // Algorithm: - // - Rounding bias: 0x7fff + 0 = 0x7fff - // - Adding rounding bias to input sets all sticky bits to 1, but - // doesn't create a carry. - // - R remains 1. - // - L remains 0. - // - // 4. L = 1, R = 1: - // Expect: round up, this is exactly at half, the result needs to be - // round to the next even number. - // - // Algorithm: - // - Rounding bias: 0x7fff + 1 = 0x8000 - // - Adding rounding bias to input doesn't change sticky bits, but - // creates a carry from rounding bit. - // - The carry sets L to 0, creates another carry bit and propagate - // forward to F bits. - // - If all the F bits are 1, a carry then propagates to the exponent - // bits, which then creates the minimum value with the next exponent - // value. Note that we won't have the case where exponents are all 1, - // since that's either a NaN (handled in the other if condition) or inf - // (handled in case 1). - // - // 5. L = 0, R = 1, any of T is 1: - // Expect: round up, this is greater than half. - // - // Algorithm: - // - Rounding bias: 0x7fff + 0 = 0x7fff - // - Adding rounding bias to input creates a carry from sticky bits, - // sets rounding bit to 0, then create another carry. - // - The second carry sets L to 1. - // - // Examples: - // - // Exact half value that is already even: - // Input: - // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) - // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1000000000000000 - // - // This falls into case 3. We truncate the rest of 16 bits and no - // carry is created into F and L: - // - // Output: - // Sign | Exp (8 bit) | Frac (first 7 bit) - // S E E E E E E E E F F F F F F L - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 - // - // Exact half value, round to next even number: - // Input: - // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) - // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1000000000000000 - // - // This falls into case 4. We create a carry from R and T, - // which then propagates into L and F: - // - // Output: - // Sign | Exp (8 bit) | Frac (first 7 bit) - // S E E E E E E E E F F F F F F L - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 - // - // - // Max denormal value round to min normal value: - // Input: - // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) - // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT - // 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1111111111111111 - // - // This falls into case 4. We create a carry from R and T, - // propagate into L and F, which then propagates into exponent - // bits: - // - // Output: - // Sign | Exp (8 bit) | Frac (first 7 bit) - // S E E E E E E E E F F F F F F L - // 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 - // - // Max normal value round to Inf: - // Input: - // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) - // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT - // 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1111111111111111 - // - // This falls into case 4. We create a carry from R and T, - // propagate into L and F, which then propagates into exponent - // bits: - // - // Sign | Exp (8 bit) | Frac (first 7 bit) - // S E E E E E E E E F F F F F F L - // 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 - // - // - // Least significant bit of resulting bfloat. - uint32_t lsb = (input >> 16) & 1; - uint32_t rounding_bias = 0x7fff + lsb; - input += rounding_bias; - value = static_cast(input >> 16); - } - } - - template - explicit EIGEN_DEVICE_FUNC bfloat16(const T& val) - : bfloat16(static_cast(val)) {} - - EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const { - float result; - - uint16_t* q = reinterpret_cast(&result); - + EIGEN_DEVICE_FUNC explicit bfloat16(const float v) { + const uint16_t* p = reinterpret_cast(&v); #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - q[0] = value; - q[1] = 0; + value = p[0]; #else - q[0] = 0; - q[1] = value; + value = p[1]; #endif - return result; - } - - EIGEN_DEVICE_FUNC explicit operator bool() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator Eigen::half() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator short() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator int() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator char() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator signed char() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator unsigned char() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator unsigned int() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator unsigned long() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator unsigned long long() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator long long() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator double() const { - return static_cast(float(*this)); } uint16_t value; }; -inline bool operator==(const bfloat16 a, const bfloat16 b) { - return a.value == b.value; -} - -inline bool operator!=(const bfloat16 a, const bfloat16 b) { - return a.value != b.value; -} - } // end namespace tensorflow namespace Eigen { template <> struct NumTraits : GenericNumTraits {}; -using ::tensorflow::operator==; -using ::tensorflow::operator!=; +EIGEN_STRONG_INLINE bool operator==(const tensorflow::bfloat16 a, + const tensorflow::bfloat16 b) { + return a.value == b.value; +} + } // namespace Eigen #ifdef COMPILER_MSVC diff --git a/tensorflow/core/framework/register_types.h b/tensorflow/core/framework/register_types.h index 4bb37e4f6e..c31ab18cc1 100644 --- a/tensorflow/core/framework/register_types.h +++ b/tensorflow/core/framework/register_types.h @@ -87,8 +87,7 @@ limitations under the License. #elif defined(__ANDROID_TYPES_FULL__) -// Only string, half, float, int32, int64, bool, and quantized types -// supported. +// Only half, float, int32, int64, bool, and quantized types are supported. #define TF_CALL_float(m) m(float) #define TF_CALL_double(m) #define TF_CALL_int32(m) m(::tensorflow::int32) @@ -97,7 +96,7 @@ limitations under the License. #define TF_CALL_int16(m) #define TF_CALL_int8(m) -#define TF_CALL_string(m) m(string) +#define TF_CALL_string(m) #define TF_CALL_resource(m) #define TF_CALL_variant(m) #define TF_CALL_complex64(m) diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc index 87c41186d5..fd1b5d33b9 100644 --- a/tensorflow/core/graph/graph.cc +++ b/tensorflow/core/graph/graph.cc @@ -453,6 +453,21 @@ const Edge* Graph::AddControlEdge(Node* source, Node* dest, return AddEdge(source, kControlSlot, dest, kControlSlot); } +void Graph::RemoveControlEdge(const Edge* e) { + if (!e->src_->IsSource() && !e->dst_->IsSink()) { + e->dst_->MaybeCopyOnWrite(); + std::string e_src_name = strings::StrCat("^", e->src_->name()); + auto* inputs = e->dst_->props_->node_def.mutable_input(); + for (auto it = inputs->begin(); it != inputs->end(); ++it) { + if (*it == e_src_name) { + inputs->erase(it); + break; + } + } + } + RemoveEdge(e); +} + Status Graph::UpdateEdge(Node* new_src, int new_src_index, Node* dst, int dst_index) { TF_RETURN_IF_ERROR(IsValidOutputTensor(new_src, new_src_index)); diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index c5dde722fa..d0dba6e1f0 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -451,6 +451,11 @@ class Graph { // REQUIRES: The edge must exist. void RemoveEdge(const Edge* edge); + // Removes control edge `edge` from the graph. Note that this also updates + // the corresponding NodeDef to reflect the change. + // REQUIRES: The control edge must exist. + void RemoveControlEdge(const Edge* e); + // Updates the input to a node. The existing edge to `dst` is removed and an // edge from `new_src` to `dst` is created. The NodeDef associated with `dst` // is also updated. diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc index 2ee409768b..753cb260e5 100644 --- a/tensorflow/core/graph/graph_constructor.cc +++ b/tensorflow/core/graph/graph_constructor.cc @@ -68,8 +68,7 @@ class GraphConstructor { Options(const GraphConstructorOptions& in) // NOLINT(runtime/explicit) : allow_internal_ops(in.allow_internal_ops), expect_device_spec(in.expect_device_spec), - importing(false), - validate_colocation_constraints(false) {} + importing(false) {} Options(const ImportGraphDefOptions& in) // NOLINT(runtime/explicit) : allow_internal_ops(false), expect_device_spec(false), @@ -82,8 +81,7 @@ class GraphConstructor { control_dependencies(in.control_dependencies), return_tensors(in.return_tensors), return_nodes(in.return_nodes), - importing(true), - validate_colocation_constraints(in.validate_colocation_constraints) {} + importing(true) {} bool allow_internal_ops; bool expect_device_spec; @@ -105,7 +103,6 @@ class GraphConstructor { // applicable to ConvertGraphDefToGraph as well, so make an attempt to // remove this. bool importing; - bool validate_colocation_constraints; }; typedef gtl::ArraySlice NodeDefSlice; @@ -495,8 +492,7 @@ Status GraphConstructor::InitFromEdges() { Status GraphConstructor::ValidateColocationConstraints( const NodeDef& node_def) { - if (!opts_.validate_colocation_constraints || !opts_.importing) - return Status::OK(); + if (!opts_.importing) return Status::OK(); const auto iter = node_def.attr().find(kColocationAttrName); if (iter == node_def.attr().end()) return Status::OK(); for (const string& c : iter->second.list().s()) { diff --git a/tensorflow/core/graph/graph_constructor.h b/tensorflow/core/graph/graph_constructor.h index 4b418b8622..416c0ee9ae 100644 --- a/tensorflow/core/graph/graph_constructor.h +++ b/tensorflow/core/graph/graph_constructor.h @@ -119,9 +119,6 @@ struct ImportGraphDefOptions { // TODO(skyewm): make this work with `skip_mapped_nodes` if there's a need. std::vector return_nodes; - // If true, checks that all colocation constraints are nodes in the GraphDef. - bool validate_colocation_constraints = true; - // TODO(ashankar): Enable handling of GraphDefs produced by newer binaries // with ops that are not defined in the binary calling ImportGraphDef. // Similar to the producer_op_list argument to import_graph_def in the diff --git a/tensorflow/core/graph/graph_constructor_test.cc b/tensorflow/core/graph/graph_constructor_test.cc index 893826da3e..cd541c7d86 100644 --- a/tensorflow/core/graph/graph_constructor_test.cc +++ b/tensorflow/core/graph/graph_constructor_test.cc @@ -2978,20 +2978,5 @@ versions { EXPECT_EQ(17, refiner.graph_def_version()); } -TEST_F(GraphConstructorTest, ImportGraphDef_ValidateColationConstraints) { - GraphDef def; - ASSERT_TRUE(protobuf::TextFormat::ParseFromString( - "node { name: 'A' op: 'TestInput' attr { key: '_class' value { list { " - "s:'loc:@missing' } } } }", - &def)); - ImportGraphDefOptions options; - // TODO(yaozhang): Extend ExpectError to check error type and use ExpectError - // and ExpectOK to replace the code below. - Status s = ImportGraphDef(options, def, &graph_, nullptr); - EXPECT_TRUE(errors::IsInvalidArgument(s)) << s; - options.validate_colocation_constraints = false; - TF_EXPECT_OK(ImportGraphDef(options, def, &graph_, nullptr)); -} - } // namespace } // namespace tensorflow diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc index b9e3cba035..1924c05d3d 100644 --- a/tensorflow/core/graph/graph_partition.cc +++ b/tensorflow/core/graph/graph_partition.cc @@ -117,7 +117,7 @@ DataType EdgeType(const Edge* e) { } } -// Return true iff we need to add a same device send/recv for 'edge'. +// Return true iff we need to add the same device send/recv for 'edge'. bool NeedSameDeviceSendRecv(const Edge* edge, const GraphInfo& info) { if (edge->IsControlEdge()) { return false; @@ -1116,7 +1116,7 @@ Status Partition(const PartitionOptions& opts, Graph* g, // before the data is available. AddInput(real_recv, send->name(), Graph::kControlSlot); } else if (control_flow_edge != nullptr) { - // Redirect control edge to the real recv since this is not a same + // Redirect control edge to the real recv since this is not the same // device send/recv. --num_control_flow_edges; AddInput(real_recv, control_flow_edge->src()->name(), diff --git a/tensorflow/core/graph/graph_test.cc b/tensorflow/core/graph/graph_test.cc index e5d57facaa..d1c89a48bd 100644 --- a/tensorflow/core/graph/graph_test.cc +++ b/tensorflow/core/graph/graph_test.cc @@ -118,6 +118,25 @@ class GraphTest : public ::testing::Test { LOG(FATAL) << name; } + bool ControlEdgeExistsInGraphOrNodeDef(const Node* src, + const Node* dst) { + for (const Edge *e : dst->in_edges()) { + if (e->IsControlEdge() && + e->src() == src && + e->src_output() == Graph::kControlSlot && + e->dst_input() == Graph::kControlSlot) { + return true; + } + } + std::string control_edge_name = strings::StrCat("^", src->name()); + for (int i = 0; i < dst->def().input_size(); ++i) { + if (dst->def().input(i) == control_edge_name) { + return true; + } + } + return false; + } + Graph graph_; private: @@ -458,8 +477,8 @@ TEST_F(GraphTest, AddControlEdge) { EXPECT_TRUE(edge == nullptr); EXPECT_EQ(b->def().input_size(), 2); - // Can add redundant control edge with create_duplicate. - edge = graph_.AddControlEdge(a, b, /*create_duplicate=*/true); + // Can add redundant control edge with allow_duplicates. + edge = graph_.AddControlEdge(a, b, /*allow_duplicates=*/true); EXPECT_TRUE(edge != nullptr); // create_duplicate causes the NodeDef not to be updated. ASSERT_EQ(b->def().input_size(), 2); @@ -477,6 +496,47 @@ TEST_F(GraphTest, AddControlEdge) { EXPECT_EQ(b->def().input_size(), 2); } +TEST_F(GraphTest, RemoveControlEdge) { + FromGraphDef( + "node { name: 'A' op: 'OneOutput' }" + "node { name: 'B' op: 'OneInputTwoOutputs' input: [ 'A:0' ] }" + "node { name: 'C' op: 'NoOp' } "); + Node* a = FindNode("A"); + Node* b = FindNode("B"); + Node* c = FindNode("C"); + + // Add a control edge. + const Edge* edge_1 = graph_.AddControlEdge(c, a); + const Edge* edge_2 = graph_.AddControlEdge(a, b); + ASSERT_TRUE(edge_1 != nullptr); + ASSERT_TRUE(edge_2 != nullptr); + + ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(c, a)); + ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(a, b)); + + graph_.RemoveControlEdge(edge_1); + ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a)); + ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(a, b)); + + graph_.RemoveControlEdge(edge_2); + ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a)); + ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(a, b)); + + // Test removing a duplicate control edge. + // Note that unless allow_duplicates is true, the duplicate edge + // will not be added. That's why we expect edge_4 to be a null + // pointer. We are not testing with allow_duplicates set to true, + // as that is a highly unlikely use case that does not make much + // sense. + const Edge* edge_3 = graph_.AddControlEdge(c, a); + const Edge* edge_4 = graph_.AddControlEdge(c, a); + ASSERT_TRUE(edge_3 != nullptr); + ASSERT_TRUE(edge_4 == nullptr); + + graph_.RemoveControlEdge(edge_3); + ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a)); +} + TEST_F(GraphTest, UpdateEdge) { // Build a little graph Node* a = FromNodeDef("A", "OneOutput", 0); diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h index cb32d64334..880e4e712e 100644 --- a/tensorflow/core/graph/mkl_graph_util.h +++ b/tensorflow/core/graph/mkl_graph_util.h @@ -21,107 +21,108 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" namespace tensorflow { -// Since our ops are going to produce and also consume N addition tensors -// (Mkl) for N Tensorflow tensors, we can have following different -// orderings among these 2N tensors. -// -// E.g., for Tensorflow tensors A, B, and C, our ops will produce and -// consume A_m, B_m, and C_m additionally. -// -// INTERLEAVED: in this case 2N tensors are interleaved. So for above -// example, the ordering looks like: A, A_m, B, B_m, C, C_m. -// -// CONTIGUOUS: in thi case N Tensorflow tensors are contiguous followed -// by N Mkl tensors. So for above example, the ordering looks -// like: A, B, C, A_m, B_m, C_m -// -// Following APIs map index of original Tensorflow tensors to their -// appropriate position based on selected ordering. For contiguous ordering, -// we need to know the total number of tensors (parameter total). -// -typedef enum { TENSORS_INTERLEAVED, TENSORS_CONTIGUOUS } MklTfTensorOrdering; -// NOTE: Currently, we use contiguous ordering. If you change this, then you -// would need to change Mkl op definitions in nn_ops.cc. -static MklTfTensorOrdering kTensorOrdering = TENSORS_CONTIGUOUS; + // Since our ops are going to produce and also consume N addition tensors + // (Mkl) for N Tensorflow tensors, we can have following different + // orderings among these 2N tensors. + // + // E.g., for Tensorflow tensors A, B, and C, our ops will produce and + // consume A_m, B_m, and C_m additionally. + // + // INTERLEAVED: in this case 2N tensors are interleaved. So for above + // example, the ordering looks like: A, A_m, B, B_m, C, C_m. + // + // CONTIGUOUS: in thi case N Tensorflow tensors are contiguous followed + // by N Mkl tensors. So for above example, the ordering looks + // like: A, B, C, A_m, B_m, C_m + // + // Following APIs map index of original Tensorflow tensors to their + // appropriate position based on selected ordering. For contiguous ordering, + // we need to know the total number of tensors (parameter total). + // + typedef enum { TENSORS_INTERLEAVED, TENSORS_CONTIGUOUS } MklTfTensorOrdering; + // NOTE: Currently, we use contiguous ordering. If you change this, then you + // would need to change Mkl op definitions in nn_ops.cc. + static MklTfTensorOrdering kTensorOrdering = TENSORS_CONTIGUOUS; -// Get index of MetaData tensor from index 'n' of Data tensor. -inline int DataIndexToMetaDataIndex(int n, int total_tensors) { - if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { - // For interleaved ordering, Mkl tensor follows immediately after - // Tensorflow tensor. - return n + 1; - } else { - CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); - // For contiguous ordering, Mkl tensor is n+total_tensors / 2 away. - return n + total_tensors / 2; + // Get index of MetaData tensor from index 'n' of Data tensor. + inline int DataIndexToMetaDataIndex(int n, int total_tensors) { + if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { + // For interleaved ordering, Mkl tensor follows immediately after + // Tensorflow tensor. + return n + 1; + } else { + CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); + // For contiguous ordering, Mkl tensor is n+total_tensors / 2 away. + return n + total_tensors / 2; + } } -} -int inline GetTensorDataIndex(int n, int total_tensors) { - if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { - return 2 * n; // index corresponding to nth input/output tensor - } else { - CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); - return n; - } -} + int inline GetTensorDataIndex(int n, int total_tensors) { + if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { + return 2 * n; // index corresponding to nth input/output tensor + } else { + CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); + return n; + } + } -int inline GetTensorMetaDataIndex(int n, int total_tensors) { - // Get index for TensorData first and then use mapping function - // to get TensorMetaData index from TensorData index. - int tidx = GetTensorDataIndex(n, total_tensors); - return DataIndexToMetaDataIndex(tidx, total_tensors); -} + int inline GetTensorMetaDataIndex(int n, int total_tensors) { + // Get index for TensorData first and then use mapping function + // to get TensorMetaData index from TensorData index. + int tidx = GetTensorDataIndex(n, total_tensors); + return DataIndexToMetaDataIndex(tidx, total_tensors); + } namespace mkl_op_registry { -static const char* kMklOpLabel = "MklOp"; -static const char* kMklOpLabelPattern = "label='MklOp'"; - -// Get the name of Mkl op from original TensorFlow op -// We prefix 'Mkl' to the original op to get Mkl op. -inline string GetMklOpName(const string& name) { - // Prefix that we add to Tensorflow op name to construct Mkl op name. - const char* const kMklOpPrefix = "_Mkl"; - return string(kMklOpPrefix) + name; -} + static const char* kMklOpLabel = "MklOp"; + static const char* kMklOpLabelPattern = "label='MklOp'"; -// Check whether opname with type T is registered as MKL-compliant. -// -// @input: name of the op -// @input: T datatype to be used for checking op -// @return: true if opname is registered as Mkl op; false otherwise -static inline bool IsMklOp(const std::string& op_name, DataType T) { - string kernel = KernelsRegisteredForOp(op_name); - bool result = - kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT); - if (result) { - VLOG(1) << "mkl_op_registry::" << op_name << " is " << kMklOpLabel; + // Get the name of Mkl op from original TensorFlow op + // We prefix 'Mkl' to the original op to get Mkl op. + inline string GetMklOpName(const string& name) { + // Prefix that we add to Tensorflow op name to construct Mkl op name. + const char* const kMklOpPrefix = "_Mkl"; + return string(kMklOpPrefix) + name; } - return result; -} -// Check whether opname with type T is registered as MKL-compliant and -// is element-wise. -// -// @input: name of the op -// @input: T datatype to be used for checking op -// @return: true if opname is registered as element-wise Mkl op; -// false otherwise -static inline bool IsMklElementWiseOp(const std::string& op_name, DataType T) { - if (!IsMklOp(op_name, T)) { - return false; + // Check whether opname with type T is registered as MKL-compliant. + // + // @input: name of the op + // @input: T datatype to be used for checking op + // @return: true if opname is registered as Mkl op; false otherwise + static inline bool IsMklOp(const std::string& op_name, DataType T) { + string kernel = KernelsRegisteredForOp(op_name); + bool result = + kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT); + if (result) { + VLOG(1) << "mkl_op_registry::" << op_name << " is " << kMklOpLabel; + } + return result; } - bool result = (0 == op_name.compare(GetMklOpName("Add")) || - 0 == op_name.compare(GetMklOpName("Sub")) || - 0 == op_name.compare(GetMklOpName("Mul")) || - 0 == op_name.compare(GetMklOpName("Maximum")) || - 0 == op_name.compare(GetMklOpName("SquaredDifference"))); + // Check whether opname with type T is registered as MKL-compliant and + // is element-wise. + // + // @input: name of the op + // @input: T datatype to be used for checking op + // @return: true if opname is registered as element-wise Mkl op; + // false otherwise + static inline bool IsMklElementWiseOp(const std::string& op_name, + DataType T) { + if (!IsMklOp(op_name, T)) { + return false; + } - VLOG(1) << "mkl_op_registry::" << op_name - << " is elementwise MKL op: " << result; - return result; -} + bool result = (0 == op_name.compare(GetMklOpName("Add")) || + 0 == op_name.compare(GetMklOpName("Sub")) || + 0 == op_name.compare(GetMklOpName("Mul")) || + 0 == op_name.compare(GetMklOpName("Maximum")) || + 0 == op_name.compare(GetMklOpName("SquaredDifference"))); + + VLOG(1) << "mkl_op_registry::" << op_name + << " is elementwise MKL op: " << result; + return result; + } } // namespace mkl_op_registry } // namespace tensorflow #endif // INTEL_MKL diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index f4c9073dee..912075aa28 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -37,8 +37,8 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/util/tensor_format.h" -#include "tensorflow/core/graph/mkl_graph_util.h" #include "tensorflow/core/graph/mkl_layout_pass.h" +#include "tensorflow/core/graph/mkl_graph_util.h" namespace tensorflow { diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc index fe4588389e..599bb88f01 100644 --- a/tensorflow/core/graph/mkl_tfconversion_pass.cc +++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc @@ -33,8 +33,8 @@ limitations under the License. #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/graph/mkl_graph_util.h" #include "tensorflow/core/graph/mkl_tfconversion_pass.h" +#include "tensorflow/core/graph/mkl_graph_util.h" namespace tensorflow { @@ -68,7 +68,7 @@ namespace tensorflow { // take place before we hit the op. For this, we add a new op before each // element-wise MKL op to deal with the inputs, called _MklInputConversion. // This pass has been enhanced to add this capability. -// +// // The _MklInputConversion op will check the inputs to the elementwise op and // make sure that either both are in MKL format or both are in TF format, // depending on their initial state and whether broadcast is needed or not. diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index 35048a4fcf..44322a2d8c 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -50,9 +50,13 @@ template struct HandleToObject {}; template <> struct HandleToObject { - typedef ShapeHandle Object; + typedef TensorShapeProto Object; - static ShapeHandle Unknown() { return ShapeHandle(); } + static TensorShapeProto Unknown() { + TensorShapeProto result; + result.set_unknown_rank(true); + return result; + } }; template <> @@ -63,24 +67,13 @@ struct HandleToObject { }; template -struct Processor {}; - -template <> -struct Processor { +struct Processor { // Extract the shape or dim denoted by the handle. - void ExtractValue(ShapeHandle h, ShapeHandle* result) { *result = h; } + void ExtractValue(Handle /*t1*/, + typename HandleToObject::Object* result) {} // Merge the shapes or dims. - Status Merge(ShapeHandle h1, ShapeHandle h2, ShapeHandle* result) { - if (InferenceContext::RankKnown(*result)) { - // The result was initialized in a previous merge to a shape of known - // rank, make sure we preserve that information. - return Status::OK(); - } - if (InferenceContext::RankKnown(h1)) { - *result = h1; - } else { - *result = h2; - } + Status Merge(Handle /*t1*/, Handle /*t2*/, + typename HandleToObject::Object* result) { return Status::OK(); } }; @@ -108,37 +101,24 @@ struct Processor { if (dim1 >= 0 && dim2 >= 0) { CHECK_EQ(dim1, dim2); - return RefineDim(dim1, result); + *result = dim1; } else if (dim1 >= 0 && dim2 < 0) { - return RefineDim(dim1, result); + *result = dim1; } else if (dim1 < 0 && dim2 >= 0) { - return RefineDim(dim2, result); + *result = dim2; } else if (dim1 < -1) { - return RefineDim(dim1, result); + *result = dim1; } else if (dim2 < -1) { - return RefineDim(dim2, result); + *result = dim2; } else { CHECK_EQ(dim1, dim2); CHECK_EQ(-1, dim1); - return RefineDim(-1, result); + *result = -1; } return Status::OK(); } private: - Status RefineDim(int64 dim, int64* result) { - if (*result >= 0) { - if (!(*result == dim || dim < 0)) { - return errors::InvalidArgument("Inconsistent dimensions detected"); - } - } else if (dim >= 0) { - *result = dim; - } else if (dim < *result) { - *result = dim; - } - return Status::OK(); - } - int64 counter = 2; }; @@ -374,17 +354,18 @@ class SymbolicShapeManager { return dims_.Merge(d1, d2); } + int64 Value(DimensionHandle d) { return dims_.GetMergedValue(d); } + void AsTensorProperties(const ShapeHandle& shape, const DataType& type, + InferenceContext* ctx, OpInfo::TensorProperties* properties) { properties->set_dtype(type); - ShapeHandle actual_shape = shapes_.GetMergedValue(shape); - if (!InferenceContext::RankKnown(actual_shape)) { + if (!ctx->RankKnown(shape)) { properties->mutable_shape()->set_unknown_rank(true); } else { - for (int j = 0; j < InferenceContext::Rank(actual_shape); ++j) { - shape_inference::DimensionHandle dim = - InferenceContext::DimKnownRank(actual_shape, j); - int64 d = dims_.GetMergedValue(dim); + for (int j = 0; j < ctx->Rank(shape); ++j) { + shape_inference::DimensionHandle dim = ctx->Dim(shape, j); + int64 d = Value(dim); properties->mutable_shape()->add_dim()->set_size(d); } } @@ -466,11 +447,6 @@ Status GraphProperties::InferStatically() { shape_refiner.set_disable_constant_propagation(true); shape_refiner.set_function_library_for_shape_inference(&function_library); ImportGraphDefOptions options; - // Graph optimization happens at the late stage of graph execution, - // when colocation constraints are already validated previously and - // the device placement of nodes has also completed, so there - // is no need to validate colocation constraints again. - options.validate_colocation_constraints = false; Status s = ImportGraphDef(options, item_.graph, &graph, &shape_refiner); TF_RETURN_IF_ERROR(s); @@ -496,6 +472,41 @@ Status GraphProperties::InferStatically() { } } } + + // Infer output shape for Restore op. + if (node->op_def().name() == "Restore" || + node->op_def().name() == "RestoreV2" || + node->op_def().name() == "RestoreSlice") { + auto ctx = shape_refiner.GetContext(node); + for (const Edge* out_edge : node->out_edges()) { + const Node* output = out_edge->dst(); + int output_idx = out_edge->src_output(); + if (output_idx < 0) { + continue; + } + if (!ctx->FullyDefined(ctx->output(output_idx)) && + output->op_def().name() == "Assign") { + if (!output->attrs().Find("validate_shape") || + !output->attrs().Find("validate_shape")->b()) { + continue; + } + auto output_ctx = shape_refiner.GetContext(output); + if (output_ctx->FullyDefined(output_ctx->output(0))) { + ctx->set_output(output_idx, output_ctx->output(0)); + output_ctx->MergeInput(1, output_ctx->output(0)); + } else { + const Node* var; + TF_CHECK_OK(node->input_node(0, &var)); + if (node->IsVariable()) { + auto var_ctx = shape_refiner.GetContext(var); + CHECK(var_ctx->FullyDefined(var_ctx->output(0))); + ctx->set_output(output_idx, var_ctx->output(0)); + output_ctx->MergeInput(1, var_ctx->output(0)); + } + } + } + } + } } // Propagate the initial shapes of Enter nodes manually (the Enter shape @@ -628,6 +639,8 @@ Status GraphProperties::InferStatically() { } while (!done); } + std::unordered_map dim_ids; + // Track shapes globally accross the graph. SymbolicShapeManager shape_manager; bool found_error = false; @@ -675,7 +688,7 @@ Status GraphProperties::InferStatically() { input_properties.resize(ctx->num_inputs()); for (int i = 0; i < ctx->num_inputs(); ++i) { shape_manager.AsTensorProperties(ctx->input(i), node->input_type(i), - &input_properties[i]); + ctx, &input_properties[i]); } for (const auto& edge : node->in_edges()) { if (!edge->src()->IsConstant()) { @@ -702,7 +715,7 @@ Status GraphProperties::InferStatically() { output_properties.resize(ctx->num_outputs()); for (int i = 0; i < ctx->num_outputs(); ++i) { shape_manager.AsTensorProperties(ctx->output(i), node->output_type(i), - &output_properties[i]); + ctx, &output_properties[i]); } } } diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h index a6aed0bba6..e2fe9f9689 100644 --- a/tensorflow/core/grappler/costs/graph_properties.h +++ b/tensorflow/core/grappler/costs/graph_properties.h @@ -55,6 +55,12 @@ class GraphProperties { const std::vector& GetOutputProperties( const string& node_name) const; + static void FillTensorPropertiesFromContext( + const shape_inference::ShapeHandle&, const DataType&, + shape_inference::InferenceContext*, + std::unordered_map* dim_ids, + OpInfo::TensorProperties*); + private: // Inputs GrapplerItem item_; diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc index f785f627e1..a33cdacc09 100644 --- a/tensorflow/core/grappler/costs/graph_properties_test.cc +++ b/tensorflow/core/grappler/costs/graph_properties_test.cc @@ -23,7 +23,6 @@ limitations under the License. #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h" #include "tensorflow/core/grappler/inputs/utils.h" -#include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/protobuf.h" @@ -296,9 +295,10 @@ TEST_F(GraphPropertiesTest, Queues) { ASSERT_EQ(1, props2.size()); EXPECT_EQ("float: [3,7]", PropToString(props2[0])); + // The dequeue3 op shape is unknown. const auto props3 = properties.GetOutputProperties("Dequeue3"); ASSERT_EQ(1, props3.size()); - EXPECT_EQ("float: [3,7]", PropToString(props3[0])); + EXPECT_EQ("float: ?", PropToString(props3[0])); // The dequeue3 op shape is unknown. The square2 op shape is known. Verify // that we merge the 2 properly to determine the shape of the data coming out @@ -677,8 +677,8 @@ TEST_F(GraphPropertiesTest, InferRestoreOpShape) { TEST_F(GraphPropertiesTest, InferRestoreOpShape_WithTwoNodesShareSameOutput) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - Output var = ops::Variable(s.WithOpName("var"), PartialTensorShape(), - DataType::DT_FLOAT); + Output var = + ops::Variable(s.WithOpName("var"), TensorShape(), DataType::DT_FLOAT); Output var2 = ops::Variable(s.WithOpName("var2"), TensorShape({128, 256}), DataType::DT_FLOAT); Output filename = @@ -784,30 +784,6 @@ TEST_F(GraphPropertiesTest, SymbolicShapes) { EXPECT_EQ(shape_f.dim(1).size(), shape_a.dim(1).size()); } -TEST_F(GraphPropertiesTest, DoNotValidateColocationConstraints) { - tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - Output a = ops::Const(s.WithOpName("a"), 1.0f, {1}); - Output b = ops::Const(s.WithOpName("b"), 2.0f, {1}); - Output c = ops::Const(s.WithOpName("c").ColocateWith(a), 3.0f, {1}); - GrapplerItem item; - TF_CHECK_OK(s.ToGraphDef(&item.graph)); - // Create a graph with node a removed (say by some graph optimization - // pass), noting that node c is colocated with a. This is fine as it - // is in the late stage of graph execution, the colocation constraints have - // been validated previously and the device placement of nodes has completed. - GraphDef optimized_graph; - for (const auto& node : item.graph.node()) { - if (node.name() != "a") { - *optimized_graph.add_node() = node; - } - } - item.graph.Swap(&optimized_graph); - GraphProperties properties(item); - // This function should return OK, since it doesn't validate the colocation - // constraints internally. - TF_EXPECT_OK(properties.InferStatically()); -} - } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 54004a5e07..669d02815c 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -112,7 +112,6 @@ tf_cc_test( deps = [ ":constant_folding", "//tensorflow/cc:cc_ops", - "//tensorflow/cc:cc_ops_internal", "//tensorflow/core:all_kernels", "//tensorflow/core:core_cpu", "//tensorflow/core:core_cpu_internal", diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index f2277a9b79..38af7170b5 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -185,6 +185,10 @@ bool IsInnerMatrixTransposeNode(const NodeDef& transpose_node, return false; } +bool SimplyReordersData(const NodeDef& node) { + return node.op() == "Transpose"; +} + // Follow a chain (through input(0)) of ops starting at `source->input(0)` as // long as they // 1. preserve the values of their first input, @@ -703,6 +707,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( node_map->AddOutput(new_transpose->name(), new_cast->name()); new_nodes->push_back(new_transpose); + new_nodes->push_back(new_cast); // Add frame dependencies that the original node might have had. AddFrameControlDeps(node, {new_transpose, new_cast}, new_transpose->input(0), {new_transpose}, @@ -832,7 +837,8 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( } } - if (node->input_size() > 0 && IsAggregate(*node)) { + if (node->input_size() > 0 && IsAggregate(*node) && + !node_map->GetOutputs(node->name()).empty()) { // Discard aggregate nodes with a single input. if (node->input_size() == 1) { return node->input(0); @@ -853,7 +859,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( break; } } - if (all_equal && node_map->GetNode(node->name() + "_const") == nullptr) { + if (all_equal) { // 1. Create constant node with value N. const int N = node->input_size(); const auto type = GetDataTypeFromAttr(*node, "T"); @@ -879,6 +885,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( new_mul_node->set_device(node->device()); SetDataTypeToAttr(type, "T", new_mul_node); node_map->AddNode(new_mul_node->name(), new_mul_node); + new_nodes->push_back(new_mul_node); new_mul_node->add_input(new_const_node->name()); node_map->AddOutput(new_const_node->name(), new_mul_node->name()); new_mul_node->add_input(node->input(0)); @@ -895,7 +902,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( // where all the inputs are Mul nodes. This pattern occurs frequently in // regularization terms for the gradients during training. if (node->input_size() > 1 && IsAggregate(*node) && - node_map->GetNode(node->name() + "_hoist") == nullptr) { + !node_map->GetOutputs(node->name()).empty()) { // Determine the set of common factors if the input nodes are all Mul nodes. std::set common_factors; int i = 0; @@ -943,6 +950,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( new_mul_node->set_name(new_mul_node->name() + "_hoist"); new_mul_node->set_input(0, common_factor); new_mul_node->set_input(1, new_add_node->name()); + new_nodes->push_back(new_mul_node); node_map->AddNode(new_mul_node->name(), new_mul_node); } } @@ -1007,9 +1015,8 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( } // Fold Conj into Transpose or ConjugateTranspose. - if ((node->op() == "Conj" || node->op() == "Transpose" || - node->op() == "ConjugateTranspose") && - node_map->GetNode(node->name() + "_fused") == nullptr) { + if (node->op() == "Conj" || node->op() == "Transpose" || + node->op() == "ConjugateTranspose") { const NodeDef* input = node_map->GetNode(node->input(0)); const NodeDef* transpose_op = node->op() == "Conj" ? input : node; const NodeDef* conj_op = node->op() == "Conj" ? node : input; @@ -1042,14 +1049,10 @@ namespace { template class SetVector { public: - // Returns false if value already existed in the set, true otherwise. - bool PushBack(const T& value) { - if (!set_.insert(value).second) { - VLOG(2) << "Value " << value << " is already in the set."; - return false; - } + void PushBack(const T& value) { + CHECK(!Exists(value)) << "Value " << value << " is already in the set."; + set_.insert(value); vector_.push_back(value); - return true; } T PopBack() { @@ -1090,11 +1093,6 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps( } if (NodeName(simplified_tensor) != node->name()) { - // Always consider simplified_tensor for further optimizations. - const NodeDef* simplified_node = node_map.GetNode(simplified_tensor); - if (simplified_node != nullptr) { - nodes_to_simplify.PushBack(simplified_node); - } // When `node` is simplifed to another node rather than in-place, the // consumers of `node` are already redirected to `simplified_tensor`. // Re-push the consumers into `nodes_to_simplify` for further diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 60fb47f51a..9f471302c7 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -38,8 +38,8 @@ TEST_F(ArithmeticOptimizerTest, NoOp) { ArithmeticOptimizer optimizer; GraphDef output; - Status status = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); + Status s = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(s); EXPECT_EQ(item.graph.node_size(), output.node_size()); for (int i = 0; i < item.graph.node_size(); ++i) { @@ -66,10 +66,6 @@ TEST_F(ArithmeticOptimizerTest, OpDedupping) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - // Run the optimizer twice to make sure the rewrite is idempotent. - item.graph.Swap(&output); - status = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); EXPECT_EQ(2, output.node_size()); const NodeDef& new_c1 = output.node(0); @@ -95,10 +91,6 @@ TEST_F(ArithmeticOptimizerTest, OpDedupCommutative) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - // Run the optimizer twice to make sure the rewrite is idempotent. - item.graph.Swap(&output); - status = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); EXPECT_EQ(4, output.node_size()); const NodeDef& new_c1 = output.node(0); @@ -154,17 +146,13 @@ TEST_F(ArithmeticOptimizerTest, SimplifyInvolutionsWithChain) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - // Run the optimizer twice to make sure the rewrite is idempotent. - item.graph.Swap(&output); - status = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); EXPECT_EQ(6, output.node_size()); EXPECT_EQ("squeeze", output.node(5).input(0)); EXPECT_EQ("c", output.node(2).input(0)); } -TEST_F(ArithmeticOptimizerTest, TrivialSumsSimple) { +TEST_F(ArithmeticOptimizerTest, SimplifyReplaceTrivialSums) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2}); Output add = ops::Add(s.WithOpName("add"), x, x); @@ -177,10 +165,6 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsSimple) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - // Run the optimizer twice to make sure the rewrite is idempotent. - item.graph.Swap(&output); - status = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); EXPECT_EQ(5, output.node_size()); const NodeDef& new_const = output.node(3); @@ -194,61 +178,7 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsSimple) { EXPECT_EQ("add_mul", new_id.input(0)); } -TEST_F(ArithmeticOptimizerTest, TrivialSumsRepeatedAdd) { - // Test case from b/69059093. - tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - Output p = ops::Placeholder(s, DT_FLOAT, ops::Placeholder::Shape({10, 10})); - Output add = ops::Add(s.WithOpName("Add"), p, p); - Output add1 = ops::Add(s.WithOpName("Add_1"), p, p); - Output add4 = ops::Add(s.WithOpName("Add_4"), add, add1); - Output add5 = ops::Add(s.WithOpName("Add_5"), add, add1); - Output add6 = ops::Add(s.WithOpName("Add_6"), add4, add5); - Output id = ops::Identity(s.WithOpName("id"), add6); - - GrapplerItem item; - TF_CHECK_OK(s.ToGraphDef(&item.graph)); - ArithmeticOptimizer optimizer; - GraphDef output; - Status status = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); - // Run the optimizer twice to make sure the rewrite is idempotent. - item.graph.Swap(&output); - status = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); - - EXPECT_EQ(11, output.node_size()); - const NodeDef& new_id = output.node(4); - EXPECT_EQ("id", new_id.name()); - EXPECT_EQ("Add_6_mul", new_id.input(0)); - - // Add4 and add5 get deduped, and we rewrite each of the 3 remaining add nodes - // of the form Add(x,x) into Mul(Const(2), x). - const NodeDef& new_add_4_const = output.node(5); - EXPECT_EQ("Add_4_const", new_add_4_const.name()); - EXPECT_EQ("^Add", new_add_4_const.input(0)); - const NodeDef& new_add_4_mul = output.node(6); - EXPECT_EQ("Add_4_mul", new_add_4_mul.name()); - EXPECT_EQ("Add_4_const", new_add_4_mul.input(0)); - EXPECT_EQ("Add_mul", new_add_4_mul.input(1)); - - const NodeDef& new_add_6_const = output.node(7); - EXPECT_EQ("Add_6_const", new_add_6_const.name()); - EXPECT_EQ("^Add_4_mul", new_add_6_const.input(0)); - const NodeDef& new_add_6_mul = output.node(8); - EXPECT_EQ("Add_6_mul", new_add_6_mul.name()); - EXPECT_EQ("Add_6_const", new_add_6_mul.input(0)); - EXPECT_EQ("Add_4_mul", new_add_6_mul.input(1)); - - const NodeDef& new_add_const = output.node(9); - EXPECT_EQ("Add_const", new_add_const.name()); - EXPECT_EQ("^Placeholder", new_add_const.input(0)); - const NodeDef& new_add_mul = output.node(10); - EXPECT_EQ("Add_mul", new_add_mul.name()); - EXPECT_EQ("Add_const", new_add_mul.input(0)); - EXPECT_EQ("Placeholder", new_add_mul.input(1)); -} - -TEST_F(ArithmeticOptimizerTest, HoistFactor) { +TEST_F(ArithmeticOptimizerTest, SimplifyHoistFactor) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2}); Output y1 = ops::Const(s.WithOpName("y1"), {3.0f, 4.0f}, {1, 2}); @@ -265,10 +195,6 @@ TEST_F(ArithmeticOptimizerTest, HoistFactor) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - // Run the optimizer twice to make sure the rewrite is idempotent. - item.graph.Swap(&output); - status = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); EXPECT_EQ(9, output.node_size()); const NodeDef& new_add = output.node(8); @@ -299,10 +225,6 @@ TEST_F(ArithmeticOptimizerTest, FuseConjAndTranspose) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - // Run the optimizer twice to make sure the rewrite is idempotent. - item.graph.Swap(&output); - status = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); EXPECT_EQ(7, output.node_size()); EXPECT_EQ("trans_fused", output.node(6).name()); @@ -350,10 +272,6 @@ TEST_F(ArithmeticOptimizerTest, FuseTransposeAndConj) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - // Run the optimizer twice to make sure the rewrite is idempotent. - item.graph.Swap(&output); - status = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); EXPECT_EQ(7, output.node_size()); EXPECT_EQ("conj_fused", output.node(6).name()); @@ -386,10 +304,6 @@ TEST_F(ArithmeticOptimizerTest, FoldTransposeIntoMatMul) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - // Run the optimizer twice to make sure the rewrite is idempotent. - item.graph.Swap(&output); - status = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); EXPECT_EQ(7, output.node_size()); EXPECT_EQ("matmul_fused", output.node(6).name()); diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 02a732b092..cb02314183 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -36,7 +36,6 @@ limitations under the License. #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/public/version.h" -#include "tensorflow/core/util/bcast.h" namespace tensorflow { namespace grappler { @@ -96,15 +95,11 @@ class DeviceSimple : public DeviceBase { }; } // namespace -ConstantFolding::ConstantFolding(RewriterConfig::Toggle opt_level, - DeviceBase* cpu_device) - : opt_level_(opt_level), cpu_device_(cpu_device) { +ConstantFolding::ConstantFolding(DeviceBase* cpu_device) + : cpu_device_(cpu_device) { resource_mgr_.reset(new ResourceMgr()); } -ConstantFolding::ConstantFolding(DeviceBase* cpu_device) - : ConstantFolding(RewriterConfig::ON, cpu_device) {} - // static string ConstantFolding::AddControlDependency(const string& input_name, GraphDef* graph, @@ -286,149 +281,6 @@ Status ConstantFolding::MaterializeShapes(const GrapplerItem& item, return Status::OK(); } -bool ShapesEqual(const TensorShapeProto& shape1, - const TensorShapeProto& shape2) { - if (shape1.unknown_rank() || shape2.unknown_rank()) { - return false; - } - if (shape1.dim_size() != shape2.dim_size()) { - return false; - } - for (int i = 0; i < shape1.dim_size(); ++i) { - if (shape1.dim(i).size() != shape2.dim(i).size()) { - return false; - } - } - return true; -} - -namespace { -bool ExtractShape(const NodeDef& shape_node, const GraphProperties& properties, - BCast::Vec* shape, int64* min_id) { - if (shape_node.op() == "Shape") { - const std::vector& prop1 = - properties.GetInputProperties(shape_node.name()); - if (prop1.size() != 1) { - return false; - } - const TensorShapeProto& shp = prop1[0].shape(); - if (shp.unknown_rank()) { - return false; - } - for (const auto& dim : shp.dim()) { - shape->push_back(dim.size()); - *min_id = std::min(*min_id, dim.size()); - } - } else { - const TensorProto& raw_val = shape_node.attr().at("value").tensor(); - if (raw_val.dtype() != DT_INT64 && raw_val.dtype() != DT_INT32) { - return false; - } - Tensor value(raw_val.dtype(), raw_val.tensor_shape()); - if (!value.FromProto(raw_val)) { - return false; - } - for (int j = 0; j < value.NumElements(); ++j) { - if (raw_val.dtype() == DT_INT64) { - shape->push_back(value.vec()(j)); - } else { - shape->push_back(value.vec()(j)); - } - } - } - return true; -} -} // namespace - -Status ConstantFolding::MaterializeConstants( - const GrapplerItem& item, const GraphProperties& properties) { - const int node_count = graph_.node_size(); - for (int i = 0; i < node_count; ++i) { - NodeDef& node = *graph_.mutable_node(i); - const string& op = node.op(); - if (op != "BroadcastGradientArgs") { - continue; - } - const NodeDef* shape_node1 = node_map_->GetNode(node.input(0)); - const NodeDef* shape_node2 = node_map_->GetNode(node.input(1)); - if (shape_node1 == nullptr || - (shape_node1->op() != "Shape" && shape_node1->op() != "Const") || - shape_node2 == nullptr || - (shape_node2->op() != "Shape" && shape_node2->op() != "Const")) { - continue; - } - int64 min_id = 0; - BCast::Vec shape1; - if (!ExtractShape(*shape_node1, properties, &shape1, &min_id)) { - continue; - } - BCast::Vec shape2; - if (!ExtractShape(*shape_node2, properties, &shape2, &min_id)) { - continue; - } - // A value of -1 means we don't known anything about the dimension. Replace - // the -1 values with unique dimension ids since we don't want two '-1' - // dimensions to be considered equal. - for (auto& id : shape1) { - if (id == -1) { - id = --min_id; - } - } - for (auto& id : shape2) { - if (id == -1) { - id = --min_id; - } - } - BCast bcast(shape1, shape2); - if (!bcast.IsValid()) { - continue; - } - BCast::Vec reduce_dims[2]; - reduce_dims[0] = bcast.grad_x_reduce_idx(); - reduce_dims[1] = bcast.grad_y_reduce_idx(); - - const DataType type = node.attr().at("T").type(); - NodeDef* out[2]; - for (int j = 0; j < 2; ++j) { - if (!reduce_dims[j].empty()) { - // This is the case when a tensor dimension 1 is matched against an - // unknown dimension. The unknown dimension could also be equal to 1, in - // which case there would be no reduction. - out[j] = nullptr; - } else { - Tensor value(type, TensorShape({0})); - string const_name = AddPrefixToNodeName( - strings::StrCat(node.name(), "-", j), kConstantFoldingConst); - out[j] = node_map_->GetNode(const_name); - if (!out[j]) { - out[j] = graph_.add_node(); - *out[j] = CreateNodeDef(const_name, TensorValue(&value)); - out[j]->set_device(node.device()); - node_map_->AddNode(const_name, out[j]); - string ctrl_dep = - AddControlDependency(node.name(), &graph_, node_map_.get()); - *out[j]->add_input() = ctrl_dep; - node_map_->AddOutput(NodeName(ctrl_dep), const_name); - } - } - } - - auto outputs = node_map_->GetOutputs(node.name()); - for (const auto& output : outputs) { - for (int k = 0; k < output->input_size(); ++k) { - int port; - string node_name = ParseNodeName(output->input(k), &port); - if (node_name == node.name() && port >= 0 && port < 2 && out[port]) { - *output->mutable_input(k) = out[port]->name(); - node_map_->UpdateInput(output->name(), node_name, out[port]->name()); - } - } - } - } - - return Status::OK(); -} - bool ConstantFolding::IsFoldable(const NodeDef& node) const { // Folding not applicable to ops with no inputs. if (node.input().empty()) { @@ -1069,23 +921,23 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, } GraphProperties properties(item); - Status s = properties.InferStatically(); bool has_feed = !item.feed.empty(); - - if (!has_feed && s.ok()) { + if (!has_feed) { // Only use static shape information when there is no feed in the // graph. That's because it's possible to feed a placeholder with a tensor // of any shape, which could make the static information inconsistent with // the shapes actually fed. - TF_RETURN_IF_ERROR(MaterializeShapes(item, properties)); - } - if (opt_level_ == RewriterConfig::AGGRESSIVE && s.ok()) { - TF_RETURN_IF_ERROR(MaterializeConstants(item, properties)); + Status s = properties.InferStatically(); + if (!s.ok()) { + VLOG(1) << "Failed to infer graph shapes: " << s; + } else { + TF_RETURN_IF_ERROR(MaterializeShapes(item, properties)); + } } TF_RETURN_IF_ERROR(FoldGraph(output)); - if (!has_feed && s.ok()) { + if (!has_feed) { TF_RETURN_IF_ERROR(SimplifyGraph(output, properties)); } return Status::OK(); @@ -1104,14 +956,12 @@ Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, GrapplerItem item_to_optimize = item; *output = item.graph; - int64 node_count; do { graph_.Swap(output); item_to_optimize.graph = graph_; *output = GraphDef(); - node_count = graph_.node_size(); TF_RETURN_IF_ERROR(RunOptimizationPass(cluster, item_to_optimize, output)); - } while (output->node_size() != node_count); + } while (output->node_size() < graph_.node_size()); *output->mutable_library() = item.graph.library(); *output->mutable_versions() = item.graph.versions(); diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index dd988f336c..30d778789a 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -22,7 +22,6 @@ limitations under the License. #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/optimizers/graph_optimizer.h" #include "tensorflow/core/grappler/utils.h" -#include "tensorflow/core/protobuf/rewriter_config.pb.h" namespace tensorflow { namespace grappler { @@ -38,7 +37,6 @@ class ConstantFolding : public GraphOptimizer { NodeMap* node_map); ConstantFolding(DeviceBase* cpu_device); - ConstantFolding(RewriterConfig::Toggle opt_level, DeviceBase* cpu_device); ~ConstantFolding() override {} @@ -53,8 +51,7 @@ class ConstantFolding : public GraphOptimizer { private: Status MaterializeShapes(const GrapplerItem& item, const GraphProperties& properties); - Status MaterializeConstants(const GrapplerItem& item, - const GraphProperties& properties); + bool IsFoldable(const NodeDef& node) const; Status EvaluateNode(const NodeDef& node, @@ -77,7 +74,6 @@ class ConstantFolding : public GraphOptimizer { GraphDef* output); // Points to an externally provided device or to owned_device_; - RewriterConfig::Toggle opt_level_; DeviceBase* cpu_device_; std::unique_ptr owned_device_; diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 43f84b1ddf..a1dee6d2fb 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -14,7 +14,6 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/grappler/optimizers/constant_folding.h" -#include "tensorflow/cc/ops/array_ops_internal.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/tensor_testutil.h" @@ -839,85 +838,6 @@ TEST_F(ConstantFoldingTest, Packing) { // size needed to naively encode 1000 floats folded twice). EXPECT_GT(8000, output.ByteSizeLong()); } - -TEST_F(ConstantFoldingTest, ConstantMaterialization) { - tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - Output a = - ops::Placeholder(s.WithOpName("a"), DT_FLOAT, - ops::Placeholder::Shape(PartialTensorShape({-1, -1}))); - Output b = ops::Square(s.WithOpName("b"), a); - Output c = ops::Mul(s.WithOpName("c"), a, b); - Output d = ops::Shape(s.WithOpName("d"), a); - Output e = ops::Shape(s.WithOpName("e"), b); - - auto f = ops::internal::BroadcastGradientArgs(s.WithOpName("f"), d, e); - Output o1 = ops::Identity(s.WithOpName("o1"), f.r0); - Output o2 = ops::Identity(s.WithOpName("o2"), f.r1); - - Output g = ops::Placeholder(s.WithOpName("g"), DT_FLOAT, - ops::Placeholder::Shape(PartialTensorShape({1}))); - Output h = ops::Shape(s.WithOpName("h"), g); - auto i = ops::internal::BroadcastGradientArgs(s.WithOpName("i"), d, h); - Output p1 = ops::Identity(s.WithOpName("p1"), i.r0); - Output p2 = ops::Identity(s.WithOpName("p2"), i.r1); - - GrapplerItem item; - TF_CHECK_OK(s.ToGraphDef(&item.graph)); - - ConstantFolding fold(RewriterConfig::AGGRESSIVE, nullptr /* cpu_device */); - GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); - - // Run a second time to make sure the optimization is idempotent. - item.graph.Swap(&output); - status = fold.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); - - int found = 0; - for (const auto& node : output.node()) { - if (node.name() == "o1") { - ++found; - EXPECT_EQ(1, node.input_size()); - EXPECT_EQ("ConstantFolding/f-0", node.input(0)); - } else if (node.name() == "o2") { - ++found; - EXPECT_EQ(1, node.input_size()); - EXPECT_EQ("ConstantFolding/f-1", node.input(0)); - } else if (node.name() == "ConstantFolding/f-0") { - ++found; - EXPECT_EQ("Const", node.op()); - EXPECT_EQ(1, node.input_size()); - EXPECT_EQ("^f", node.input(0)); - EXPECT_EQ(0, TensorShape(node.attr().at("value").tensor().tensor_shape()) - .num_elements()); - } else if (node.name() == "ConstantFolding/f-1") { - ++found; - EXPECT_EQ("Const", node.op()); - EXPECT_EQ(1, node.input_size()); - EXPECT_EQ("^f", node.input(0)); - EXPECT_EQ(0, TensorShape(node.attr().at("value").tensor().tensor_shape()) - .num_elements()); - } else if (node.name() == "p1") { - ++found; - EXPECT_EQ(1, node.input_size()); - EXPECT_EQ("ConstantFolding/i-0", node.input(0)); - } else if (node.name() == "p2") { - ++found; - EXPECT_EQ(1, node.input_size()); - EXPECT_EQ("i:1", node.input(0)); - } else if (node.name() == "ConstantFolding/i-0") { - ++found; - EXPECT_EQ("Const", node.op()); - EXPECT_EQ(1, node.input_size()); - EXPECT_EQ("^i", node.input(0)); - EXPECT_EQ(0, TensorShape(node.attr().at("value").tensor().tensor_shape()) - .num_elements()); - } - } - EXPECT_EQ(7, found); -} - } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 6204a81f80..a9875c06d8 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -64,8 +64,8 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back(std::unique_ptr(new ModelPruner())); } if (cfg_.constant_folding() != RewriterConfig::OFF) { - optimizers.push_back(std::unique_ptr( - new ConstantFolding(cfg_.constant_folding(), cpu_device_))); + optimizers.push_back( + std::unique_ptr(new ConstantFolding(cpu_device_))); } if (cfg_.arithmetic_optimization() != RewriterConfig::OFF) { optimizers.push_back(std::unique_ptr( diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index 11bd8fa5cb..54be02b5f8 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -45,6 +45,7 @@ NodeDef* NodeMap::GetNode(const string& name) const { string node_name = NodeName(name); auto it = nodes_.find(node_name); if (it == nodes_.end()) { + LOG(WARNING) << "Node " << node_name << " is not in the graph."; return nullptr; } return it->second; @@ -61,7 +62,7 @@ const std::set& NodeMap::GetOutputs(const string& node_name) const { void NodeMap::AddNode(const string& name, NodeDef* node) { auto ret = nodes_.insert(std::make_pair(name, node)); CHECK(ret.second) << "Pair (" << name << "," << node - << ") is not inserted because a same key already exists."; + << ") is not inserted because the same key already exists."; } void NodeMap::AddOutput(const string& node_name, const string& output_name) { diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 4169e842da..a5c62fef17 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -916,6 +916,25 @@ tf_cc_test( ], ) +tf_cuda_cc_test( + name = "bincount_op_test", + size = "small", + srcs = ["bincount_op_test.cc"], + deps = [ + ":bincount_op", + ":ops_testutil", + ":ops_util", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_cuda_cc_test( name = "constant_op_test", size = "small", @@ -1601,7 +1620,10 @@ DYNAMIC_DEPS = [ tf_kernel_library( name = "dynamic_partition_op", prefix = "dynamic_partition_op", - deps = DYNAMIC_DEPS, + deps = DYNAMIC_DEPS + [ + ":fill_functor", + ":gather_functor", + ] + if_cuda(["@cub_archive//:cub"]), ) tf_kernel_library( @@ -1671,7 +1693,7 @@ tf_kernel_library( ], ) -tf_cc_tests( +tf_cuda_cc_tests( name = "dynamic_op_test", size = "small", srcs = [ @@ -2554,8 +2576,9 @@ tf_kernel_library( tf_kernel_library( name = "bucketize_op", + gpu_srcs = ["cuda_device_array.h"], prefix = "bucketize_op", - deps = MATH_DEPS, + deps = ARRAY_DEPS, ) tf_kernel_library( @@ -3156,7 +3179,7 @@ tf_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//third_party/eigen3", - ], + ] + if_cuda(["@cub_archive//:cub"]), ) tf_kernel_library( @@ -4420,15 +4443,6 @@ filegroup( "fill_functor.h", "function_ops.cc", "gather_functor.h", - "gather_nd_op.cc", - "gather_nd_op.h", - "gather_nd_op_cpu_impl.h", - "gather_nd_op_cpu_impl_0.cc", - "gather_nd_op_cpu_impl_1.cc", - "gather_nd_op_cpu_impl_2.cc", - "gather_nd_op_cpu_impl_3.cc", - "gather_nd_op_cpu_impl_4.cc", - "gather_nd_op_cpu_impl_5.cc", "gather_op.cc", "identity_n_op.cc", "identity_n_op.h", @@ -4522,10 +4536,6 @@ filegroup( "fused_batch_norm_op.h", "gemm_functors.h", "image_resizer_state.h", - "initializable_lookup_table.h", - "lookup_table_init_op.h", - "lookup_table_op.h", - "lookup_util.h", "maxpooling_op.h", "mfcc.h", "mfcc_dct.h", @@ -4542,7 +4552,6 @@ filegroup( "resize_nearest_neighbor_op.h", "reverse_op.h", "save_restore_tensor.h", - "segment_reduction_ops.h", "softplus_op.h", "softsign_op.h", "spacetobatch_functor.h", @@ -4592,8 +4601,6 @@ filegroup( "cwise_op_div.cc", "cwise_op_equal_to_1.cc", "cwise_op_equal_to_2.cc", - "cwise_op_not_equal_to_1.cc", - "cwise_op_not_equal_to_2.cc", "cwise_op_exp.cc", "cwise_op_floor.cc", "cwise_op_floor_div.cc", @@ -4635,7 +4642,6 @@ filegroup( "encode_wav_op.cc", "fake_quant_ops.cc", "fifo_queue.cc", - "fifo_queue_op.cc", "fused_batch_norm_op.cc", "population_count_op.cc", "population_count_op.h", @@ -4659,11 +4665,7 @@ filegroup( "depthtospace_op.cc", "dynamic_stitch_op.cc", "in_topk_op.cc", - "initializable_lookup_table.cc", "logging_ops.cc", - "lookup_table_init_op.cc", - "lookup_table_op.cc", - "lookup_util.cc", "lrn_op.cc", "maxpooling_op.cc", "mfcc.cc", @@ -4698,15 +4700,12 @@ filegroup( "save_op.cc", "save_restore_tensor.cc", "save_restore_v2_ops.cc", - "segment_reduction_ops.cc", "session_ops.cc", "softplus_op.cc", "softsign_op.cc", "spacetobatch_functor.cc", "spacetobatch_op.cc", "spacetodepth_op.cc", - "sparse_fill_empty_rows_op.cc", - "sparse_reshape_op.cc", "sparse_to_dense_op.cc", "spectrogram.cc", "spectrogram_op.cc", @@ -4729,7 +4728,6 @@ filegroup( "training_ops.cc", "transpose_functor_cpu.cc", "transpose_op.cc", - "unique_op.cc", "warn_about_ints.cc", "where_op.cc", "xent_op.cc", @@ -6243,11 +6241,8 @@ tf_kernel_library( srcs = ["summary_kernels.cc"], deps = [ ":summary_interface", - "//tensorflow/contrib/tensorboard/db:summary_db_writer", "//tensorflow/core:framework", - "//tensorflow/core:lib", "//tensorflow/core:summary_ops_op_lib", - "//tensorflow/core/lib/db:sqlite", ], ) diff --git a/tensorflow/core/kernels/avgpooling_op.cc b/tensorflow/core/kernels/avgpooling_op.cc index af629d0de8..f918023693 100644 --- a/tensorflow/core/kernels/avgpooling_op.cc +++ b/tensorflow/core/kernels/avgpooling_op.cc @@ -153,7 +153,8 @@ class AvgPoolingOp : public UnaryOp { if (data_format_ == FORMAT_NCHW) { DnnPoolingOp::Compute( context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_, - stride_, padding_, data_format_, tensor_in, output_shape); + stride_, padding_, data_format_, tensor_in, output_shape, + /*propagate_nans=*/false); } else { Tensor* output = nullptr; OP_REQUIRES_OK(context, @@ -408,7 +409,7 @@ class AvgPoolingGradOp : public OpKernel { DnnPoolingGradOp::Compute( context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_, stride_, padding_, data_format_, nullptr, nullptr, out_backprop, - output_shape); + output_shape, /*propagate_nans=*/false); } private: @@ -532,7 +533,7 @@ class AvgPoolingGradOpCustomGPUKernel : public OpKernel { DnnPoolingGradOp::Compute( context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_, stride_, padding_, data_format_, nullptr, nullptr, out_backprop, - output_shape); + output_shape, /*propagate_nans=*/false); } } diff --git a/tensorflow/core/kernels/batch_dataset_op.cc b/tensorflow/core/kernels/batch_dataset_op.cc index 6a5fd17a9e..2e52ad39f8 100644 --- a/tensorflow/core/kernels/batch_dataset_op.cc +++ b/tensorflow/core/kernels/batch_dataset_op.cc @@ -143,13 +143,9 @@ class BatchDatasetOp : public UnaryDatasetOpKernel { // Each row of `batch_elements` is a tuple of tensors from the // input iterator. std::vector> batch_elements; + batch_elements.reserve(dataset()->batch_size_); { mutex_lock l(mu_); - if (!input_impl_) { - *end_of_sequence = true; - return Status::OK(); - } - batch_elements.reserve(dataset()->batch_size_); *end_of_sequence = false; for (int i = 0; i < dataset()->batch_size_ && !*end_of_sequence; ++i) { @@ -158,8 +154,6 @@ class BatchDatasetOp : public UnaryDatasetOpKernel { end_of_sequence)); if (!*end_of_sequence) { batch_elements.emplace_back(std::move(batch_element_tuple)); - } else { - input_impl_.reset(); } } } @@ -200,23 +194,14 @@ class BatchDatasetOp : public UnaryDatasetOpKernel { protected: Status SaveInternal(IteratorStateWriter* writer) override { mutex_lock l(mu_); - if (!input_impl_) { - TF_RETURN_IF_ERROR( - writer->WriteScalar(full_name("input_impl_empty"), "")); - } else { - TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); - } + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); return Status::OK(); } Status RestoreInternal(OpKernelContext* ctx, IteratorStateReader* reader) override { mutex_lock l(mu_); - if (!reader->Contains(full_name("input_impl_empty"))) { - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); - } else { - input_impl_.reset(); - } + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); return Status::OK(); } diff --git a/tensorflow/core/kernels/bincount_op.cc b/tensorflow/core/kernels/bincount_op.cc index 1cd5943ef3..766d63e3be 100644 --- a/tensorflow/core/kernels/bincount_op.cc +++ b/tensorflow/core/kernels/bincount_op.cc @@ -17,6 +17,7 @@ limitations under the License. #define EIGEN_USE_THREADS +#include "tensorflow/core/kernels/bincount_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/types.h" @@ -27,46 +28,37 @@ namespace tensorflow { using thread::ThreadPool; -template -class BincountOp : public OpKernel { - public: - explicit BincountOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; - void Compute(OpKernelContext* ctx) override { - const Tensor& arr_t = ctx->input(0); - const Tensor& size_tensor = ctx->input(1); - const Tensor& weights_t = ctx->input(2); - int32 size = size_tensor.scalar()(); - OP_REQUIRES( - ctx, size >= 0, - errors::InvalidArgument("size (", size, ") must be non-negative")); - const bool has_weights = weights_t.NumElements() > 0; - OP_REQUIRES(ctx, !(has_weights && arr_t.shape() != weights_t.shape()), - errors::InvalidArgument( - "If weights are passed, they must have the same shape (" + - weights_t.shape().DebugString() + ") as arr (" + - arr_t.shape().DebugString() + ")")); - const auto arr = arr_t.flat(); - const auto weights = weights_t.flat(); +namespace functor { + +template +struct BincountFunctor { + static Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& arr, + const typename TTypes::ConstTensor& weights, + typename TTypes::Tensor& output) { + int size = output.size(); Tensor all_nonneg_t; - OP_REQUIRES_OK(ctx, - ctx->allocate_temp(DT_BOOL, TensorShape({}), &all_nonneg_t, - AllocatorAttributes())); - all_nonneg_t.scalar().device(ctx->eigen_cpu_device()) = + TF_RETURN_IF_ERROR(context->allocate_temp( + DT_BOOL, TensorShape({}), &all_nonneg_t, AllocatorAttributes())); + all_nonneg_t.scalar().device(context->eigen_cpu_device()) = (arr >= 0).all(); - OP_REQUIRES(ctx, all_nonneg_t.scalar()(), - errors::InvalidArgument("Input arr must be non-negative!")); + if (!all_nonneg_t.scalar()()) { + return errors::InvalidArgument("Input arr must be non-negative!"); + } // Allocate partial output bin sums for each worker thread. Worker ids in // ParallelForWithWorkerId range from 0 to NumThreads() inclusive. ThreadPool* thread_pool = - ctx->device()->tensorflow_cpu_worker_threads()->workers; + context->device()->tensorflow_cpu_worker_threads()->workers; const int64 num_threads = thread_pool->NumThreads() + 1; Tensor partial_bins_t; - OP_REQUIRES_OK(ctx, ctx->allocate_temp(weights_t.dtype(), - TensorShape({num_threads, size}), - &partial_bins_t)); + TF_RETURN_IF_ERROR(context->allocate_temp(DataTypeToEnum::value, + TensorShape({num_threads, size}), + &partial_bins_t)); auto partial_bins = partial_bins_t.matrix(); partial_bins.setZero(); thread_pool->ParallelForWithWorkerId( @@ -75,7 +67,7 @@ class BincountOp : public OpKernel { for (int64 i = start_ind; i < limit_ind; i++) { int32 value = arr(i); if (value < size) { - if (has_weights) { + if (weights.size()) { partial_bins(worker_id, value) += weights(i); } else { // Complex numbers don't support "++". @@ -84,25 +76,62 @@ class BincountOp : public OpKernel { } } }); - TensorShape output_shape({size}); - Tensor* output_t; - OP_REQUIRES_OK(ctx, ctx->allocate_output(0, output_shape, &output_t)); + // Sum the partial bins along the 0th axis. Eigen::array reduce_dims({0}); - output_t->flat().device(ctx->eigen_cpu_device()) = - partial_bins.sum(reduce_dims); + output.device(context->eigen_cpu_device()) = partial_bins.sum(reduce_dims); + return Status::OK(); + } +}; + +} // namespace functor + +template +class BincountOp : public OpKernel { + public: + explicit BincountOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + const Tensor& arr_t = ctx->input(0); + const Tensor& size_tensor = ctx->input(1); + const Tensor& weights_t = ctx->input(2); + + int32 size = size_tensor.scalar()(); + OP_REQUIRES(ctx, size >= 0, errors::InvalidArgument( + "size (", size, ") must be non-negative")); + + const auto arr = arr_t.flat(); + const auto weights = weights_t.flat(); + Tensor* output_t; + OP_REQUIRES_OK(ctx, + ctx->allocate_output(0, TensorShape({size}), &output_t)); + auto output = output_t->flat(); + OP_REQUIRES_OK(ctx, functor::BincountFunctor::Compute( + ctx, arr, weights, output)); } }; -#define REGISTER(TYPE) \ +#define REGISTER_KERNELS(type) \ REGISTER_KERNEL_BUILDER( \ - Name("Bincount").Device(DEVICE_CPU).TypeConstraint("T"), \ - BincountOp) + Name("Bincount").Device(DEVICE_CPU).TypeConstraint("T"), \ + BincountOp) + +TF_CALL_NUMBER_TYPES(REGISTER_KERNELS); +#undef REGISTER_KERNELS + +#if GOOGLE_CUDA + +#define REGISTER_KERNELS(type) \ + REGISTER_KERNEL_BUILDER(Name("Bincount") \ + .Device(DEVICE_GPU) \ + .HostMemory("size") \ + .TypeConstraint("T"), \ + BincountOp) -TF_CALL_NUMBER_TYPES(REGISTER); +TF_CALL_int32(REGISTER_KERNELS); +TF_CALL_float(REGISTER_KERNELS); +#undef REGISTER_KERNELS -// TODO(ringwalt): Add a GPU implementation. We probably want to take a -// different approach, e.g. threads in a warp each taking a pass over the same -// data, and each thread summing a single bin. +#endif // GOOGLE_CUDA } // end namespace tensorflow diff --git a/tensorflow/core/kernels/bincount_op.h b/tensorflow/core/kernels/bincount_op.h new file mode 100644 index 0000000000..0f8dd2b82a --- /dev/null +++ b/tensorflow/core/kernels/bincount_op.h @@ -0,0 +1,41 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_BINCOUNT_OP_H_ +#define TENSORFLOW_BINCOUNT_OP_H_ + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace tensorflow { + +namespace functor { + +template +struct BincountFunctor { + static Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& arr, + const typename TTypes::ConstTensor& weights, + typename TTypes::Tensor& output); +}; + +} // end namespace functor + +} // end namespace tensorflow + +#endif // TENSORFLOW_BINCOUNT_OP_H_ diff --git a/tensorflow/core/kernels/bincount_op_gpu.cu.cc b/tensorflow/core/kernels/bincount_op_gpu.cu.cc new file mode 100644 index 0000000000..ae9e26ffdf --- /dev/null +++ b/tensorflow/core/kernels/bincount_op_gpu.cu.cc @@ -0,0 +1,114 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include "tensorflow/core/kernels/bincount_op.h" +#include "external/cub_archive/cub/device/device_histogram.cuh" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" + +namespace tensorflow { + +typedef Eigen::GpuDevice GPUDevice; + +namespace functor { + +template +struct BincountFunctor { + static Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& arr, + const typename TTypes::ConstTensor& weights, + typename TTypes::Tensor& output) { + if (weights.size() != 0) { + return errors::InvalidArgument( + "Weights should not be passed as it should be " + "handled by unsorted_segment_sum"); + } + if (output.size() == 0) { + return Status::OK(); + } + // In case weight.size() == 0, use CUB + size_t temp_storage_bytes = 0; + const int32* d_samples = arr.data(); + T* d_histogram = output.data(); + int num_levels = output.size() + 1; + int32 lower_level = 0; + int32 upper_level = output.size(); + int num_samples = arr.size(); + const cudaStream_t& stream = GetCudaStream(context); + + // The first HistogramEven is to obtain the temp storage size required + // with d_temp_storage = NULL passed to the call. + auto err = cub::DeviceHistogram::HistogramEven( + /* d_temp_storage */ NULL, + /* temp_storage_bytes */ temp_storage_bytes, + /* d_samples */ d_samples, + /* d_histogram */ d_histogram, + /* num_levels */ num_levels, + /* lower_level */ lower_level, + /* upper_level */ upper_level, + /* num_samples */ num_samples, + /* stream */ stream); + if (err != cudaSuccess) { + return errors::Internal( + "Could not launch HistogramEven to get temp storage: ", + cudaGetErrorString(err), "."); + } + Tensor temp_storage; + TF_RETURN_IF_ERROR(context->allocate_temp( + DataTypeToEnum::value, + TensorShape({static_cast(temp_storage_bytes)}), &temp_storage)); + + void* d_temp_storage = temp_storage.flat().data(); + // The second HistogramEven is to actual run with d_temp_storage + // allocated with temp_storage_bytes. + err = cub::DeviceHistogram::HistogramEven( + /* d_temp_storage */ d_temp_storage, + /* temp_storage_bytes */ temp_storage_bytes, + /* d_samples */ d_samples, + /* d_histogram */ d_histogram, + /* num_levels */ num_levels, + /* lower_level */ lower_level, + /* upper_level */ upper_level, + /* num_samples */ num_samples, + /* stream */ stream); + if (err != cudaSuccess) { + return errors::Internal("Could not launch HistogramEven: ", + cudaGetErrorString(err), "."); + } + return Status::OK(); + } +}; + +} // end namespace functor + +#define REGISTER_GPU_SPEC(type) \ + template struct functor::BincountFunctor; + +TF_CALL_int32(REGISTER_GPU_SPEC); +TF_CALL_float(REGISTER_GPU_SPEC); +#undef REGISTER_GPU_SPEC + +} // namespace tensorflow + +#endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/bincount_op_test.cc b/tensorflow/core/kernels/bincount_op_test.cc new file mode 100644 index 0000000000..14becc87a7 --- /dev/null +++ b/tensorflow/core/kernels/bincount_op_test.cc @@ -0,0 +1,75 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" + +namespace tensorflow { + +static Graph* Bincount(int arr_size, int nbins) { + Graph* g = new Graph(OpRegistry::Global()); + + Tensor arr(DT_INT32, TensorShape({arr_size})); + arr.flat() = arr.flat().setRandom().abs(); + + Tensor size(DT_INT32, TensorShape({(int32)1})); + size.flat()(0) = (int32)nbins; + + Tensor weights(DT_INT32, TensorShape({0})); + + Node* node; + TF_CHECK_OK(NodeBuilder(g->NewName("n"), "Bincount") + .Input(test::graph::Constant(g, arr)) + .Input(test::graph::Constant(g, size)) + .Input(test::graph::Constant(g, weights)) + .Attr("T", DT_INT32) + .Finalize(g, &node)); + return g; +} + +#define BM_BincountDev(K, NBINS, type) \ + static void BM_Bincount##_##type##_##K##_##NBINS(int iters) { \ + testing::ItemsProcessed(static_cast(iters) * K * 1024); \ + test::Benchmark(#type, Bincount(K * 1024, NBINS)).Run(iters); \ + } \ + BENCHMARK(BM_Bincount##_##type##_##K##_##NBINS); + +BM_BincountDev(32, 1000, cpu); +BM_BincountDev(32, 2000, cpu); +BM_BincountDev(32, 5000, cpu); +BM_BincountDev(64, 1000, cpu); +BM_BincountDev(64, 2000, cpu); +BM_BincountDev(64, 5000, cpu); +BM_BincountDev(128, 1000, cpu); +BM_BincountDev(128, 2000, cpu); +BM_BincountDev(128, 5000, cpu); + +BM_BincountDev(32, 1000, gpu); +BM_BincountDev(32, 2000, gpu); +BM_BincountDev(32, 5000, gpu); +BM_BincountDev(64, 1000, gpu); +BM_BincountDev(64, 2000, gpu); +BM_BincountDev(64, 5000, gpu); +BM_BincountDev(128, 1000, gpu); +BM_BincountDev(128, 2000, gpu); +BM_BincountDev(128, 5000, gpu); + +} // end namespace tensorflow diff --git a/tensorflow/core/kernels/bucketize_op.cc b/tensorflow/core/kernels/bucketize_op.cc index 93c2d01221..c1693de538 100644 --- a/tensorflow/core/kernels/bucketize_op.cc +++ b/tensorflow/core/kernels/bucketize_op.cc @@ -15,15 +15,43 @@ limitations under the License. // See docs in ../ops/math_ops.cc. -#include -#include - +#include "tensorflow/core/kernels/bucketize_op.h" #include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" namespace tensorflow { +using thread::ThreadPool; + +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; + +namespace functor { + template +struct BucketizeFunctor { + // PRECONDITION: boundaries_vector must be sorted. + static Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& input, + const std::vector& boundaries_vector, + typename TTypes::Tensor& output) { + const int N = input.size(); + for (int i = 0; i < N; i++) { + auto first_bigger_it = std::upper_bound( + boundaries_vector.begin(), boundaries_vector.end(), input(i)); + output(i) = first_bigger_it - boundaries_vector.begin(); + } + + return Status::OK(); + } +}; +} // namespace functor + +template class BucketizeOp : public OpKernel { public: explicit BucketizeOp(OpKernelConstruction* context) : OpKernel(context) { @@ -34,36 +62,42 @@ class BucketizeOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& input_tensor = context->input(0); - auto input = input_tensor.flat(); + const auto input = input_tensor.flat(); + Tensor* output_tensor = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(), &output_tensor)); auto output = output_tensor->template flat(); - - const int N = input.size(); - for (int i = 0; i < N; i++) { - output(i) = CalculateBucketIndex(input(i)); - } + OP_REQUIRES_OK(context, functor::BucketizeFunctor::Compute( + context, input, boundaries_, output)); } private: - int32 CalculateBucketIndex(const T value) { - auto first_bigger_it = - std::upper_bound(boundaries_.begin(), boundaries_.end(), value); - return first_bigger_it - boundaries_.begin(); - } std::vector boundaries_; }; #define REGISTER_KERNEL(T) \ REGISTER_KERNEL_BUILDER( \ Name("Bucketize").Device(DEVICE_CPU).TypeConstraint("T"), \ - BucketizeOp); + BucketizeOp); + +REGISTER_KERNEL(int32); +REGISTER_KERNEL(int64); +REGISTER_KERNEL(float); +REGISTER_KERNEL(double); +#undef REGISTER_KERNEL + +#if GOOGLE_CUDA +#define REGISTER_KERNEL(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("Bucketize").Device(DEVICE_GPU).TypeConstraint("T"), \ + BucketizeOp); REGISTER_KERNEL(int32); REGISTER_KERNEL(int64); REGISTER_KERNEL(float); REGISTER_KERNEL(double); #undef REGISTER_KERNEL +#endif // GOOGLE_CUDA } // namespace tensorflow diff --git a/tensorflow/core/kernels/bucketize_op.h b/tensorflow/core/kernels/bucketize_op.h new file mode 100644 index 0000000000..c8e461beb9 --- /dev/null +++ b/tensorflow/core/kernels/bucketize_op.h @@ -0,0 +1,41 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_BUCKETIZE_OP_H_ +#define TENSORFLOW_BUCKETIZE_OP_H_ + +#include +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace tensorflow { +namespace functor { + +template +struct BucketizeFunctor { + static Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& input, + const std::vector& boundaries_vector, + typename TTypes::Tensor& output); +}; + +} // namespace functor +} // namespace tensorflow + +#endif // TENSORFLOW_BUCKETIZE_OP_H_ diff --git a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc new file mode 100644 index 0000000000..aafbbe41b4 --- /dev/null +++ b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc @@ -0,0 +1,101 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/kernels/bucketize_op.h" +#include "tensorflow/core/kernels/cuda_device_array.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" + +namespace tensorflow { + +typedef Eigen::GpuDevice GPUDevice; + +template +__global__ void BucketizeCustomKernel( + const int32 size_in, const T* in, const int32 size_boundaries, + CudaDeviceArrayStruct boundaries_array, int32* out) { + const float* boundaries = GetCudaDeviceArrayOnDevice(&boundaries_array); + CUDA_1D_KERNEL_LOOP(i, size_in) { + T value = in[i]; + int32 bucket = 0; + int32 count = size_boundaries; + while (count > 0) { + int32 l = bucket; + int32 step = count / 2; + l += step; + if (!(value < static_cast(boundaries[l]))) { + bucket = ++l; + count -= step + 1; + } else { + count = step; + } + } + out[i] = bucket; + } +} + +namespace functor { + +template +struct BucketizeFunctor { + // PRECONDITION: boundaries_vector must be sorted. + static Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& input, + const std::vector& boundaries_vector, + typename TTypes::Tensor& output) { + const GPUDevice& d = context->eigen_device(); + + CudaDeviceArrayOnHost boundaries_array(context, + boundaries_vector.size()); + TF_RETURN_IF_ERROR(boundaries_array.Init()); + for (int i = 0; i < boundaries_vector.size(); ++i) { + boundaries_array.Set(i, boundaries_vector[i]); + } + TF_RETURN_IF_ERROR(boundaries_array.Finalize()); + + CudaLaunchConfig config = GetCudaLaunchConfig(input.size(), d); + BucketizeCustomKernel< + T><<>>( + input.size(), input.data(), boundaries_vector.size(), + boundaries_array.data(), output.data()); + + return Status::OK(); + } +}; +} // namespace functor + +#define REGISTER_GPU_SPEC(type) \ + template struct functor::BucketizeFunctor; + +REGISTER_GPU_SPEC(int32); +REGISTER_GPU_SPEC(int64); +REGISTER_GPU_SPEC(float); +REGISTER_GPU_SPEC(double); +#undef REGISTER_GPU_SPEC + +} // namespace tensorflow + +#endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/concat_lib_cpu.cc b/tensorflow/core/kernels/concat_lib_cpu.cc index b0bec0c5dc..258ce15456 100644 --- a/tensorflow/core/kernels/concat_lib_cpu.cc +++ b/tensorflow/core/kernels/concat_lib_cpu.cc @@ -74,14 +74,11 @@ REGISTER(qint16) REGISTER(qint32) REGISTER(bfloat16) -#if defined(IS_MOBILE_PLATFORM) && !defined(SUPPORT_SELECTIVE_REGISTRATION) && \ - !defined(__ANDROID_TYPES_FULL__) -// Primarily used for SavedModel support on mobile. Registering it here only if -// __ANDROID_TYPES_FULL__ is not defined, as that already register strings +#if defined(IS_MOBILE_PLATFORM) && !defined(SUPPORT_SELECTIVE_REGISTRATION) +// Primarily used for SavedModel support on mobile. REGISTER(string); #endif // defined(IS_MOBILE_PLATFORM) && - // !defined(SUPPORT_SELECTIVE_REGISTRATION) && - // !defined(__ANDROID_TYPES_FULL__) + // !defined(SUPPORT_SELECTIVE_REGISTRATION) #ifdef TENSORFLOW_USE_SYCL template diff --git a/tensorflow/core/kernels/concatenate_dataset_op.cc b/tensorflow/core/kernels/concatenate_dataset_op.cc index c3bd89c479..711c234129 100644 --- a/tensorflow/core/kernels/concatenate_dataset_op.cc +++ b/tensorflow/core/kernels/concatenate_dataset_op.cc @@ -104,10 +104,6 @@ class ConcatenateDatasetOp : public BinaryDatasetOpKernel { std::vector* out_tensors, bool* end_of_sequence) override { mutex_lock l(mu_); - if (!input_impl_) { - *end_of_sequence = true; - return Status::OK(); - } while (i_ < 2) { TF_RETURN_IF_ERROR( input_impl_->GetNext(ctx, out_tensors, end_of_sequence)); @@ -144,9 +140,7 @@ class ConcatenateDatasetOp : public BinaryDatasetOpKernel { } else if (i_ == 2) { input_impl_.reset(); } - if (input_impl_) { - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); - } + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); return Status::OK(); } diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc index 21f5cb1716..f819fccbfb 100644 --- a/tensorflow/core/kernels/conv_grad_ops_3d.cc +++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc @@ -236,6 +236,7 @@ class Conv3DBackpropInputOp : public OpKernel { REGISTER_KERNEL_BUILDER( \ Name("Conv3DBackpropInputV2").Device(DEVICE_CPU).TypeConstraint("T"), \ Conv3DBackpropInputOp); +TF_CALL_half(REGISTER_CPU_KERNEL); TF_CALL_float(REGISTER_CPU_KERNEL); TF_CALL_double(REGISTER_CPU_KERNEL); #undef REGISTER_CPU_KERNEL @@ -383,6 +384,7 @@ class Conv3DBackpropFilterOp : public OpKernel { .Device(DEVICE_CPU) \ .TypeConstraint("T"), \ Conv3DBackpropFilterOp); +TF_CALL_half(REGISTER_CPU_KERNEL); TF_CALL_float(REGISTER_CPU_KERNEL); TF_CALL_double(REGISTER_CPU_KERNEL); #undef REGISTER_CPU_KERNEL @@ -409,6 +411,7 @@ namespace functor { const std::array& padding_right, \ typename TTypes::Tensor out, TensorFormat format); +DECLARE_GPU_SPEC(Eigen::half); DECLARE_GPU_SPEC(float); #undef DECLARE_GPU_SPEC } // namespace functor @@ -1098,22 +1101,29 @@ class Conv3DBackpropFilterOp : public OpKernel { bool cudnn_use_autotune_; }; -REGISTER_KERNEL_BUILDER( - Name("Conv3DBackpropInput").Device(DEVICE_GPU).TypeConstraint("T"), - Conv3DBackpropInputOp); -REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropInputV2") - .Device(DEVICE_GPU) - .TypeConstraint("T") - .HostMemory("input_sizes"), - Conv3DBackpropInputOp); -REGISTER_KERNEL_BUILDER( - Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint("T"), - Conv3DBackpropFilterOp); -REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropFilterV2") - .Device(DEVICE_GPU) - .TypeConstraint("T") - .HostMemory("filter_sizes"), - Conv3DBackpropFilterOp); + + +#define REGISTER_GPU_KERNEL(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("Conv3DBackpropInput").Device(DEVICE_GPU).TypeConstraint("T"), \ + Conv3DBackpropInputOp); \ + REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropInputV2") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("input_sizes"), \ + Conv3DBackpropInputOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint("T"), \ + Conv3DBackpropFilterOp); \ + REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropFilterV2") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("filter_sizes"), \ + Conv3DBackpropFilterOp); +TF_CALL_half(REGISTER_GPU_KERNEL); +TF_CALL_float(REGISTER_GPU_KERNEL); +#undef REGISTER_GPU_KERNEL + #endif // GOOGLE_CUDA } // namespace tensorflow diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc index 8a89d564de..37cb67bc51 100644 --- a/tensorflow/core/kernels/conv_ops_3d.cc +++ b/tensorflow/core/kernels/conv_ops_3d.cc @@ -145,6 +145,7 @@ class Conv3DOp : public BinaryOp { REGISTER_KERNEL_BUILDER( \ Name("Conv3D").Device(DEVICE_CPU).TypeConstraint("T"), \ Conv3DOp); +TF_CALL_half(REGISTER_CPU_KERNEL); TF_CALL_float(REGISTER_CPU_KERNEL); TF_CALL_double(REGISTER_CPU_KERNEL); #undef REGISTER_CPU_KERNEL @@ -482,12 +483,16 @@ namespace functor { const std::array& padding_right, \ typename TTypes::Tensor out, TensorFormat format); +DECLARE_GPU_SPEC(Eigen::half); DECLARE_GPU_SPEC(float); #undef DECLARE_GPU_SPEC } // namespace functor // Registration of the GPU implementations. +REGISTER_KERNEL_BUILDER( + Name("Conv3D").Device(DEVICE_GPU).TypeConstraint("T"), + Conv3DOp); REGISTER_KERNEL_BUILDER( Name("Conv3D").Device(DEVICE_GPU).TypeConstraint("T"), Conv3DOp); diff --git a/tensorflow/core/kernels/cwise_op_acosh.cc b/tensorflow/core/kernels/cwise_op_acosh.cc index 7bdd8d22a3..39c8814073 100644 --- a/tensorflow/core/kernels/cwise_op_acosh.cc +++ b/tensorflow/core/kernels/cwise_op_acosh.cc @@ -20,16 +20,8 @@ namespace tensorflow { REGISTER4(UnaryOp, CPU, "Acosh", functor::acosh, float, double, complex64, complex128); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Acosh") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T"), \ - UnaryOp>); -REGISTER_SYCL_KERNEL(float); -REGISTER_SYCL_KERNEL(double); -#undef REGISTER_SYCL_KERNEL +#ifdef TENSORFLOW_USE_SYCL +REGISTER2(UnaryOp, SYCL, "Acosh", functor::acosh, float, double); #endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA diff --git a/tensorflow/core/kernels/cwise_op_asinh.cc b/tensorflow/core/kernels/cwise_op_asinh.cc index e0644323c0..8d44208aa7 100644 --- a/tensorflow/core/kernels/cwise_op_asinh.cc +++ b/tensorflow/core/kernels/cwise_op_asinh.cc @@ -20,17 +20,9 @@ namespace tensorflow { REGISTER4(UnaryOp, CPU, "Asinh", functor::asinh, float, double, complex64, complex128); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Asinh") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T"), \ - UnaryOp>); -REGISTER_SYCL_KERNEL(float); -REGISTER_SYCL_KERNEL(double); -#undef REGISTER_SYCL_KERNEL -#endif // TENSORFLOW_USE_SYC +#ifdef TENSORFLOW_USE_SYCL +REGISTER2(UnaryOp, SYCL, "Asinh", functor::asinh, float, double); +#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER2(UnaryOp, GPU, "Asinh", functor::asinh, float, double); diff --git a/tensorflow/core/kernels/cwise_op_atanh.cc b/tensorflow/core/kernels/cwise_op_atanh.cc index 058f5140c5..bbc69e45aa 100644 --- a/tensorflow/core/kernels/cwise_op_atanh.cc +++ b/tensorflow/core/kernels/cwise_op_atanh.cc @@ -20,17 +20,9 @@ namespace tensorflow { REGISTER4(UnaryOp, CPU, "Atanh", functor::atanh, float, double, complex64, complex128); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Atanh") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T"), \ - UnaryOp>); -REGISTER_SYCL_KERNEL(float); -REGISTER_SYCL_KERNEL(double); -#undef REGISTER_SYCL_KERNEL -#endif // TENSORFLOW_USE_SYC +#ifdef TENSORFLOW_USE_SYCL +REGISTER2(UnaryOp, SYCL, "Atanh", functor::atanh, float, double); +#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER2(UnaryOp, GPU, "Atanh", functor::atanh, float, double); diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h index 6c22b124de..d32185b6bf 100644 --- a/tensorflow/core/kernels/cwise_ops.h +++ b/tensorflow/core/kernels/cwise_ops.h @@ -49,7 +49,11 @@ template struct scalar_asinh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_asinh_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const { +#if EIGEN_HAS_CXX11_MATH + return numext::asinh(a); +#else return std::asinh(a); +#endif // EIGEN_HAS_CXX11_MATH } }; template @@ -61,7 +65,11 @@ template struct scalar_acosh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_acosh_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const { +#if EIGEN_HAS_CXX11_MATH + return numext::acosh(a); +#else return std::acosh(a); +#endif // EIGEN_HAS_CXX11_MATH } }; template @@ -73,7 +81,11 @@ template struct scalar_atanh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_atanh_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const { +#if EIGEN_HAS_CXX11_MATH + return numext::atanh(a); +#else return std::atanh(a); +#endif // EIGEN_HAS_CXX11_MATH } }; template diff --git a/tensorflow/core/kernels/dataset.cc b/tensorflow/core/kernels/dataset.cc index fcfa2956f7..0414875a5d 100644 --- a/tensorflow/core/kernels/dataset.cc +++ b/tensorflow/core/kernels/dataset.cc @@ -126,6 +126,7 @@ void BinaryDatasetOpKernel::MakeDataset(OpKernelContext* ctx, MakeDataset(ctx, input, another_input, output); } +const char IteratorBase::kIteratorExhausted[] = "ITERATOR_EXHAUSTED"; const char GraphDatasetBase::kDatasetGraphKey[] = "_DATASET_GRAPH"; const char GraphDatasetBase::kDatasetGraphOutputNodeKey[] = "_DATASET_GRAPH_OUTPUT_NODE"; diff --git a/tensorflow/core/kernels/dataset.h b/tensorflow/core/kernels/dataset.h index aa4f436b39..4a42ac80c3 100644 --- a/tensorflow/core/kernels/dataset.h +++ b/tensorflow/core/kernels/dataset.h @@ -306,14 +306,27 @@ class IteratorBase { // Saves the state of this iterator. virtual Status Save(IteratorStateWriter* writer) { - return SaveInternal(writer); + if (is_exhausted_) { + LOG(INFO) << "Iterator exhausted."; + return writer->WriteScalar(kIteratorExhausted, kIteratorExhausted); + } else { + return SaveInternal(writer); + } } // Restores the state of this iterator. virtual Status Restore(OpKernelContext* ctx, IteratorStateReader* reader) { - return RestoreInternal(ctx, reader); + if (reader->Contains(kIteratorExhausted)) { + LOG(INFO) << "Iterator exhausted. Nothing to restore."; + is_exhausted_ = true; + return Status::OK(); + } else { + return RestoreInternal(ctx, reader); + } } + static const char kIteratorExhausted[]; + protected: // This is needed so that sub-classes of IteratorBase can call // `SaveInternal` on their parent iterators, e.g., in @@ -341,6 +354,8 @@ class IteratorBase { IteratorStateReader* reader) { return errors::Unimplemented("RestoreInternal"); } + + bool is_exhausted_ = false; // Whether the iterator has been exhausted. }; // Represents a (potentially infinite) range of outputs, where each @@ -476,6 +491,10 @@ class DatasetIterator : public IteratorBase { Status GetNext(IteratorContext* ctx, std::vector* out_tensors, bool* end_of_sequence) final { port::Tracing::TraceMe activity(params_.prefix); + if (is_exhausted_) { + *end_of_sequence = true; + return Status::OK(); + } return GetNextInternal(ctx, out_tensors, end_of_sequence); } diff --git a/tensorflow/core/kernels/depthwise_conv_grad_op.cc b/tensorflow/core/kernels/depthwise_conv_grad_op.cc index 9804d7d38e..53d65a22d1 100644 --- a/tensorflow/core/kernels/depthwise_conv_grad_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_grad_op.cc @@ -231,7 +231,7 @@ static void CopyOutputBackpropRegion(const DepthwiseArgs& args, } // Pad to vector-register width (if needed). for (int64 d = 0; d < pad_size; ++d) { - buffer[buf_base + vectorized_size + scalar_size + d] = 0; + buffer[buf_base + vectorized_size + scalar_size + d] = static_cast(0); } } } @@ -297,7 +297,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args, for (int i = 0; i < output_vectorized_size; i += kPacketSize) { // Reset accumulator. - auto vaccum = Eigen::internal::pset1(0); + auto vaccum = Eigen::internal::pset1(static_cast(0)); for (int j = 0; j < filter_spatial_size; ++j) { // Calculate index. const int64 index = i + j * padded_filter_inner_dim_size; @@ -318,7 +318,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args, } if (output_scalar_size > 0) { - auto vaccum = Eigen::internal::pset1(0); + auto vaccum = Eigen::internal::pset1(static_cast(0)); for (int j = 0; j < filter_spatial_size; ++j) { const int64 index = output_vectorized_size + j * padded_filter_inner_dim_size; @@ -346,7 +346,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args, if (depth_multiplier > 1) { for (int64 d = 0; d < in_depth; ++d) { const int64 index = d * args.depth_multiplier; - T accum = 0; + T accum = static_cast(0); for (int64 dm = 0; dm < dm_vectorized_size; dm += kPacketSize) { const auto v = Eigen::internal::ploadu(out_buffer + index + dm); accum += Eigen::internal::predux(v); @@ -510,6 +510,7 @@ static void DepthwiseConvBackpropInputReference(const DepthwiseArgs& args, #if GOOGLE_CUDA +extern template struct LaunchDepthwiseConvBackpropInputOp; extern template struct LaunchDepthwiseConvBackpropInputOp; extern template struct LaunchDepthwiseConvBackpropInputOp; @@ -884,6 +885,7 @@ static void DepthwiseConvBackpropFilterReference(const DepthwiseArgs& args, #if GOOGLE_CUDA +extern template struct LaunchDepthwiseConvBackpropFilterOp; extern template struct LaunchDepthwiseConvBackpropFilterOp; extern template struct LaunchDepthwiseConvBackpropFilterOp; diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc index bbeeaf7895..2759ecb2f1 100644 --- a/tensorflow/core/kernels/depthwise_conv_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_op.cc @@ -94,7 +94,7 @@ struct DepthwiseConv2DKernel { for (int i = 0; i < output_vectorized_size; i += kPacketSize) { // Reset accumulator. - auto vaccum = Eigen::internal::pset1(0); + auto vaccum = Eigen::internal::pset1(static_cast(0)); for (int j = 0; j < filter_spatial_size; ++j) { // Calculate index. const int64 index = i + j * padded_filter_inner_dim_size; @@ -115,7 +115,7 @@ struct DepthwiseConv2DKernel { } if (output_scalar_size > 0) { - auto vaccum = Eigen::internal::pset1(0); + auto vaccum = Eigen::internal::pset1(static_cast(0)); for (int j = 0; j < filter_spatial_size; ++j) { const int64 index = output_vectorized_size + j * padded_filter_inner_dim_size; @@ -246,6 +246,7 @@ extern template class LaunchConv2DOp; #if GOOGLE_CUDA // Extern template instantiated in depthwise_conv_op_gpu.cc. +extern template struct LaunchDepthwiseConvOp; extern template struct LaunchDepthwiseConvOp; extern template struct LaunchDepthwiseConvOp; @@ -419,12 +420,17 @@ class DepthwiseConv2dNativeOp : public BinaryOp { Name("DepthwiseConv2dNative").Device(DEVICE_CPU).TypeConstraint("T"), \ DepthwiseConv2dNativeOp); +TF_CALL_half(REGISTER_CPU_KERNEL); TF_CALL_float(REGISTER_CPU_KERNEL); #if !defined(PLATFORM_WINDOWS) || !defined(_DEBUG) TF_CALL_double(REGISTER_CPU_KERNEL); #endif #if GOOGLE_CUDA +REGISTER_KERNEL_BUILDER( + Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint("T"), + DepthwiseConv2dNativeOp); + REGISTER_KERNEL_BUILDER( Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint("T"), DepthwiseConv2dNativeOp); diff --git a/tensorflow/core/kernels/depthwise_conv_op.h b/tensorflow/core/kernels/depthwise_conv_op.h index aa5b5c76f6..11aed5b415 100644 --- a/tensorflow/core/kernels/depthwise_conv_op.h +++ b/tensorflow/core/kernels/depthwise_conv_op.h @@ -158,7 +158,7 @@ struct DepthwiseFilterPadOp { } // Pad the remainder of output to vector-register boundary. for (int64 j = 0; j < pad_size; ++j) { - padded_filter[output_base + vectorized_size + scalar_size + j] = 0; + padded_filter[output_base + vectorized_size + scalar_size + j] = static_cast(0); } } } @@ -266,7 +266,7 @@ struct DepthwiseInputCopyOp { // Pad the remainder of the output to vector register boundary. for (int64 d = 0; d < output_pad_size; ++d) { - in_buf[d] = 0; + in_buf[d] = static_cast(0); } in_buf += output_pad_size; diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc index ecfe51d599..903aac5d68 100644 --- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc +++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc @@ -105,7 +105,7 @@ __global__ void __launch_bounds__(1024, 2) const int input_row_end = input_row_start + filter_rows; const int input_col_end = input_col_start + filter_cols; - T sum = 0; + T sum = static_cast(0); const int input_offset_temp = in_rows * OB; if (input_row_start >= 0 && input_col_start >= 0 && @@ -258,8 +258,8 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNHWCSmall( __syncthreads(); if (depth_in_range) { - T sum1 = 0; - T sum2 = 0; + T sum1 = static_cast(0); + T sum2 = static_cast(0); int shared_offset = data_idx; const T* filter_ptr = filter_read_offset + shared_data; UNROLL for (int r = 0; r < filter_rows; ++r) { @@ -369,7 +369,7 @@ __global__ void __launch_bounds__(1024, 2) const int input_row_end = input_row_start + filter_rows; const int input_col_end = input_col_start + filter_cols; - T sum = 0; + T sum = static_cast(0); if (input_row_start >= 0 && input_col_start >= 0 && input_row_end < in_rows && input_col_end < in_cols) { // Loop that doesn't need to check for boundary conditions. @@ -529,8 +529,8 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNCHWSmall( __syncthreads(); if (slice_in_range) { - T sum1 = 0; - T sum2 = 0; + T sum1 = static_cast(0); + T sum2 = static_cast(0); int shared_offset = data_idx; const T* filter_ptr = filter_read_offset + shared_data; UNROLL for (int r = 0; r < filter_rows; ++r) { @@ -710,6 +710,7 @@ void LaunchDepthwiseConvOp::operator()(OpKernelContext* ctx, "Launch of gpu kernel for DepthwiseConv2dGPULaunch failed")); } +template struct LaunchDepthwiseConvOp; template struct LaunchDepthwiseConvOp; template struct LaunchDepthwiseConvOp; @@ -744,7 +745,7 @@ __global__ void __launch_bounds__(640, 2) const int in_r = (thread_id / in_depth / in_cols) % in_rows; const int b = thread_id / in_depth / in_cols / in_rows; - T sum = 0; + T sum = static_cast(0); const int out_r_start = tf_max(0, (in_r - filter_rows + pad_rows + stride) / stride); @@ -810,7 +811,7 @@ __global__ void __launch_bounds__(640, 2) const int in_d = (thread_id / in_cols / in_rows) % in_depth; const int b = thread_id / in_depth / in_cols / in_rows; - T sum = 0; + T sum = static_cast(0); const int out_d_start = in_d * depth_multiplier; const int out_d_end = out_d_start + depth_multiplier; @@ -919,6 +920,7 @@ void LaunchDepthwiseConvBackpropInputOp::operator()( "utGPULaunch failed")); } +template struct LaunchDepthwiseConvBackpropInputOp; template struct LaunchDepthwiseConvBackpropInputOp; template struct LaunchDepthwiseConvBackpropInputOp; @@ -1631,6 +1633,7 @@ void LaunchDepthwiseConvBackpropFilterOp::operator()( "terGPULaunch failed")); } +template struct LaunchDepthwiseConvBackpropFilterOp; template struct LaunchDepthwiseConvBackpropFilterOp; template struct LaunchDepthwiseConvBackpropFilterOp; } // namespace tensorflow diff --git a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc new file mode 100644 index 0000000000..7249c8c66c --- /dev/null +++ b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc @@ -0,0 +1,376 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// The algorithm for dynamic partition has the following steps: +// 1. Let N be the size of partitions. We initialize a new vector indices_in +// with the values 0, 1, 2, ..., N-1. +// 2. We apply cub::DeviceRadixSort::SortPairs to the key - value pairs given +// by partitions and indices_in. This will result in two new vectors +// partitions_out and indices_out, with partitions_out sorted. +// 3. The first dimension of outputs[i] is equal to the length of the interval +// of i-values in partitions_out. We determine it in two steps: +// - compute the starting and ending point of each interval, +// - subtract the starting and ending points to find the length. +// The result is placed in partition_count. +// 4. Because partition_count is on the GPU, we bring it asynchronously to +// the CPU. Then we can allocate the output tensors. +// 5. Finally, we use indices_out and the gather functor to collect the output. +// This works, because for each interval of i-values, indices_out points +// to the slices which should form output[i]. + +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include "external/cub_archive/cub/device/device_radix_sort.cuh" +#include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/bounds_check.h" +#include "tensorflow/core/kernels/fill_functor.h" +#include "tensorflow/core/kernels/gather_functor_gpu.cu.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" + +namespace tensorflow { + +typedef Eigen::GpuDevice GPUDevice; + +namespace { + +template +__global__ void RangeInitKernel(const T start, const T delta, const int32 size, + T* out) { + CUDA_1D_KERNEL_LOOP(i, size) { out[i] = start + i * delta; } +} + +__global__ void FindEndpointsKernel(const int32* partitions, int32 size, + int32 nump, int32* start, int32* end) { + CUDA_1D_KERNEL_LOOP(i, size) { + int32 current = ldg(partitions + i); + if (FastBoundsCheck(current, nump)) { + if (i == 0) + start[current] = i; + else { + int32 before = ldg(partitions + i - 1); + if (before != current) start[current] = i; + } + if (i == size - 1) + end[current] = i + 1; + else { + int32 after = ldg(partitions + i + 1); + if (after != current) end[current] = i + 1; + } + } + } +} + +// We create a local version of subtract, because the tf.subtract kernel +// is not defined for int32. We use it to compute the length of an interval +// by subtracting the endpoints. +__global__ void IntervalLengthKernel(int32* start, int32 size, int32* end) { + CUDA_1D_KERNEL_LOOP(i, size) { + int32 start_point = ldg(start + i); + end[i] = end[i] - start_point; + } +} + +// Initialize out with range start, start + delta, start + 2 * delta, ... +// This is needed because tf.range has no GPU implementation. +template +void RangeInit(const GPUDevice& d, const T start, const T delta, + const int32 size, typename TTypes::Flat out) { + CudaLaunchConfig config = GetCudaLaunchConfig(size, d); + RangeInitKernel< + T><<>>( + start, delta, size, out.data()); +} + +// Partitions is a sorted vector of N non-negative integer numbers. +// This function computes the starting and ending points of each interval +// of values. +void ComputeIntervals(const GPUDevice& d, Tensor* partitions, int32 N, + int32 nump, int32* start_ptr, int32* end_ptr) { + CudaLaunchConfig config = GetCudaLaunchConfig(N, d); + FindEndpointsKernel<<>>(partitions->flat().data(), N, nump, + start_ptr, end_ptr); +} + +// Subtract the ending points of each interval to obtain the interval length. +void ComputeItvLength(const GPUDevice& d, int32 num, int32* start_ptr, + int32* end_ptr) { + CudaLaunchConfig config = GetCudaLaunchConfig(num, d); + IntervalLengthKernel<<>>(start_ptr, num, end_ptr); +} + +template +void CallGatherKernel(const GPUDevice& d, const T* params, const int32* indices, + T* out, int64 gather_dim_size, int64 indices_size, + int64 slice_size, int64 out_size) { + CudaLaunchConfig config = GetCudaLaunchConfig(out_size, d); + GatherOpKernel< + T, int32, + true><<>>( + params, indices, out, gather_dim_size, indices_size, slice_size, + out_size); +} + +} // namespace + +// The current implementation has memory cost on GPU +// I + P + max(3N + R, O + N), where: +// I - the size of the input +// N - the size of the partitions tensor +// R - the temporary storage used by cub::RadixSort, about 2N +// P - the number of partitions +// O - the size of the output +// So roughly the cost is I + P + max(5N, O + N). +template +class DynamicPartitionOpGPU : public AsyncOpKernel { + public: + explicit DynamicPartitionOpGPU(OpKernelConstruction* c) : AsyncOpKernel(c) { + OP_REQUIRES_OK(c, c->GetAttr("num_partitions", &num_partitions_)); + OP_REQUIRES(c, num_partitions_ >= 1, + errors::InvalidArgument("num_partitions must be at least 1")); + } + + void AllocateTempSpace(OpKernelContext* c, int32 N, Tensor* indices_in, + Tensor* partitions_out, Tensor* indices_out, + DoneCallback done) { + int32 M = std::max(N, num_partitions_); + // indices_in will be made slightly larger to accomodate + // later computations. + OP_REQUIRES_OK_ASYNC( + c, c->allocate_temp(DT_INT32, TensorShape({M}), indices_in), done); + OP_REQUIRES_OK_ASYNC( + c, c->allocate_temp(DT_INT32, TensorShape({N}), partitions_out), done); + OP_REQUIRES_OK_ASYNC( + c, c->allocate_temp(DT_INT32, TensorShape({N}), indices_out), done); + } + + void AllocateOutputs(OpKernelContext* c, const Tensor* data, + const Tensor* partitions, const Tensor* partition_count, + OpOutputList* Tout, DoneCallback done) { + auto e_part_count = partition_count->flat(); + // Allocate output tensors of the right size + OP_REQUIRES_OK_ASYNC(c, c->output_list("outputs", Tout), done); + for (int p = 0; p < num_partitions_; p++) { + TensorShape shape; + shape.AddDim(e_part_count(p)); + for (int i = partitions->dims(); i < data->dims(); i++) { + shape.AddDim(data->dim_size(i)); + } + Tensor* out; + OP_REQUIRES_OK_ASYNC(c, Tout->allocate(p, shape, &out), done); + } + } + + void ComputeAsync(OpKernelContext* c, DoneCallback done) { + const Tensor& data = c->input(0); + const Tensor& partitions = c->input(1); + + OP_REQUIRES_ASYNC( + c, TensorShapeUtils::StartsWith(data.shape(), partitions.shape()), + errors::InvalidArgument("data.shape must start with partitions.shape, ", + "got data.shape = ", data.shape().DebugString(), + ", partitions.shape = ", + partitions.shape().DebugString()), + done); + + Tensor partition_count; + + // We must handle the case of empty partitions separately, + // because kernels don't work with 0-sized tensors. + if (partitions.NumElements() == 0) { + AllocatorAttributes alloc_attr; + alloc_attr.set_on_host(true); + OP_REQUIRES_OK_ASYNC( + c, c->allocate_temp(DT_INT32, TensorShape({num_partitions_}), + &partition_count, alloc_attr), + done); + auto e_part_count = partition_count.flat(); + for (int i = 0; i < num_partitions_; i++) e_part_count(i) = 0; + OpOutputList outputs; + this->AllocateOutputs(c, &data, &partitions, &partition_count, &outputs, + done); + if (c->status().ok()) done(); + return; + } + + // Prepare for counting. + OP_REQUIRES_OK_ASYNC( + c, c->allocate_temp(DT_INT32, TensorShape({num_partitions_}), + &partition_count), + done); + Tensor indices_out; + // Count how many times each partition index occurs. + // Also sort the info in partitions and output it in indices_out, + // in preparation for the next step. + this->CountAndSortParts(c, &partitions, &partition_count, &indices_out, + done); + if (!c->status().ok()) return; + + // In order to allocate the output tensor we have to move partition_count + // to CPU. + auto* stream = c->op_device_context()->stream(); + OP_REQUIRES_ASYNC(c, stream, errors::Internal("No GPU stream available."), + done); + Tensor cpu_tensor; + AllocatorAttributes alloc_attr; + alloc_attr.set_on_host(true); + alloc_attr.set_gpu_compatible(true); + OP_REQUIRES_OK_ASYNC( + c, c->allocate_temp(partition_count.dtype(), partition_count.shape(), + &cpu_tensor, alloc_attr), + done); + perftools::gputools::DeviceMemoryBase wrapped( + partition_count.flat().data(), num_partitions_ * sizeof(int32)); + const bool status = + stream + ->ThenMemcpy(cpu_tensor.flat().data(), wrapped, + num_partitions_ * sizeof(int32)) + .ok(); + OP_REQUIRES_ASYNC( + c, status, + errors::Internal("Failed to launch copy from device to host."), done); + + // Keep a reference to partition_count so that the buffer + // is not deallocated at the end of the function, before + // memcpy is completed. + TensorReference partition_ref(partition_count); + auto wrapped_callback = [this, c, &data, &partitions, indices_out, + partition_ref, cpu_tensor, done]() { + OpOutputList outputs; + this->AllocateOutputs(c, &data, &partitions, &cpu_tensor, &outputs, done); + if (!c->status().ok()) { + partition_ref.Unref(); + return; + } + int32 N = partitions.NumElements(); + int64 slice_size = data.NumElements() / N; + this->GatherSlices(c, &data, &indices_out, N, slice_size, outputs); + partition_ref.Unref(); + done(); + }; + + c->device()->tensorflow_gpu_device_info()->event_mgr->ThenExecute( + stream, wrapped_callback); + } + + protected: + void RadixSort(OpKernelContext* c, const Tensor* partitions, + Tensor* indices_in, Tensor* partitions_out, + Tensor* indices_out, DoneCallback done) { + int32 N = partitions->NumElements(); + const GPUDevice& device = c->eigen_device(); + const cudaStream_t& cu_stream = GetCudaStream(c); + + // Initialize the indices_in tensor using the Range GPU kernel. + RangeInit(device, 0, 1, N, indices_in->flat()); + // Obtain the pointers to inner buffers. + const int32* partitions_ptr = partitions->flat().data(); + int32* partitions_out_ptr = partitions_out->flat().data(); + int32* indices_in_ptr = indices_in->flat().data(); + int32* indices_out_ptr = indices_out->flat().data(); + // Determine temporary device storage requirements. + Tensor cub_temp_storage; + size_t temp_storage_bytes = 0; + cub::DeviceRadixSort::SortPairs( + NULL, temp_storage_bytes, partitions_ptr, partitions_out_ptr, + indices_in_ptr, indices_out_ptr, N, 0, sizeof(int32) * 8, cu_stream); + // Allocate temporary storage. + OP_REQUIRES_OK_ASYNC( + c, c->allocate_temp( + DT_INT8, TensorShape({static_cast(temp_storage_bytes)}), + &cub_temp_storage), + done); + // Radix-sort the partition information. + cub::DeviceRadixSort::SortPairs( + cub_temp_storage.flat().data(), temp_storage_bytes, + partitions_ptr, partitions_out_ptr, indices_in_ptr, indices_out_ptr, N, + 0, sizeof(int32) * 8, cu_stream); + } // At this point cub_temp_storage will be marked for deallocation. + + void CountAndSortParts(OpKernelContext* c, const Tensor* partitions, + Tensor* partition_count, Tensor* indices_out, + DoneCallback done) { + const GPUDevice& device = c->eigen_device(); + int32 N = partitions->NumElements(); + Tensor indices_in; + Tensor partitions_out; + + // Allocate memory for Radix-Sort. + this->AllocateTempSpace(c, N, &indices_in, &partitions_out, indices_out, + done); + if (!c->status().ok()) return; + this->RadixSort(c, partitions, &indices_in, &partitions_out, indices_out, + done); + if (!c->status().ok()) return; + // We still need a little bit of additional memory. However, + // we can reuse the indices_in tensor. We could also use atomic + // operations and no additional memory, but this approach seems faster. + + // Zero-out the allocated memory. + functor::SetZeroFunctor zero_functor; + zero_functor(device, partition_count->flat()); + zero_functor(device, indices_in.flat()); + // Obtain the pointers to inner buffers. + int32* start_ptr = indices_in.flat().data(); + int32* end_ptr = partition_count->flat().data(); + // Obtain the starting and ending points of each interval. + ComputeIntervals(device, &partitions_out, N, num_partitions_, start_ptr, + end_ptr); + // Subtract to compute the number of appearances of each id. + ComputeItvLength(device, num_partitions_, start_ptr, end_ptr); + } // At this point indices_in and partitions_out will be marked + // for deallocation. + + void GatherSlices(OpKernelContext* c, const Tensor* data, + const Tensor* indices, int32 N, int64 slice_size, + OpOutputList& outs) { + const GPUDevice& device = c->eigen_device(); + const int32* ind_base = indices->flat().data(); + const T* data_base = data->flat().data(); + + for (int p = 0; p < num_partitions_; p++) { + int32 indices_size = outs[p]->dim_size(0); + int64 out_size = outs[p]->NumElements(); + T* out_base = outs[p]->flat().data(); + if (out_size > 0) + CallGatherKernel(device, data_base, ind_base, out_base, N, + indices_size, slice_size, out_size); + ind_base += indices_size; + } + } + + int num_partitions_; +}; + +#define REGISTER_DYNAMIC_PARTITION_GPU(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("DynamicPartition").Device(DEVICE_GPU).TypeConstraint("T"), \ + DynamicPartitionOpGPU) + +TF_CALL_GPU_NUMBER_TYPES(REGISTER_DYNAMIC_PARTITION_GPU); +TF_CALL_complex64(REGISTER_DYNAMIC_PARTITION_GPU); +TF_CALL_complex128(REGISTER_DYNAMIC_PARTITION_GPU); +#undef REGISTER_DYNAMIC_PARTITION_GPU + +} // namespace tensorflow + +#endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/dynamic_partition_op_test.cc b/tensorflow/core/kernels/dynamic_partition_op_test.cc index 0e8fbc0a67..9a7ed0af21 100644 --- a/tensorflow/core/kernels/dynamic_partition_op_test.cc +++ b/tensorflow/core/kernels/dynamic_partition_op_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include #include +#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/fake_input.h" #include "tensorflow/core/framework/node_def_builder.h" @@ -23,10 +24,14 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/graph/testlib.h" #include "tensorflow/core/kernels/ops_testutil.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/random/simple_philox.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" namespace tensorflow { namespace { @@ -153,5 +158,58 @@ TEST_F(DynamicPartitionOpTest, Error_IndexOutOfRange) { << s; } +Node* DynamicPartitionNode(Graph* g, Node* in0, Node* in1, int num_partitions) { + Node* ret; + TF_CHECK_OK(NodeBuilder(g->NewName("n"), "DynamicPartition") + .Input(in0) + .Input(in1) + .Attr("num_partitions", num_partitions) + .Finalize(g, &ret)); + return ret; +} + +template +static Graph* DynamicPartition(int num_partitions, int dim) { + Graph* g = new Graph(OpRegistry::Global()); + // Always use a 128MB buffer. + const int kRows = ((128 << 20) / sizeof(T)) / dim; + Tensor data(DataTypeToEnum::value, TensorShape({kRows, dim})); + data.flat().setRandom(); + + random::PhiloxRandom philox(301, 17); + random::SimplePhilox rnd(&philox); + Tensor partitions(DT_INT32, TensorShape({kRows})); + for (int i = 0; i < kRows; i++) { + partitions.flat()(i) = rnd.Uniform(num_partitions); + } + DynamicPartitionNode(g, test::graph::Constant(g, data), + test::graph::Constant(g, partitions), num_partitions); + return g; +} + +#define BM_DYNAMIC_PARTITION(DEVICE, T, num) \ + static void BM_##DEVICE##_dynpart_##T##_##num(int iters, int dim) { \ + const int64 items = ((128 << 20) / sizeof(T)); \ + const int64 tot = static_cast(iters) * items; \ + testing::ItemsProcessed(tot); \ + testing::UseRealTime(); \ + test::Benchmark(#DEVICE, DynamicPartition(num, dim)).Run(iters); \ + } \ + BENCHMARK(BM_##DEVICE##_dynpart_##T##_##num)->Arg(1)->Arg(256) + +BM_DYNAMIC_PARTITION(cpu, float, 2); +BM_DYNAMIC_PARTITION(cpu, float, 100); +BM_DYNAMIC_PARTITION(cpu, double, 2); +BM_DYNAMIC_PARTITION(cpu, double, 100); +BM_DYNAMIC_PARTITION(cpu, complex64, 2); +BM_DYNAMIC_PARTITION(cpu, complex64, 100); + +BM_DYNAMIC_PARTITION(gpu, float, 2); +BM_DYNAMIC_PARTITION(gpu, float, 100); +BM_DYNAMIC_PARTITION(gpu, double, 2); +BM_DYNAMIC_PARTITION(gpu, double, 100); +BM_DYNAMIC_PARTITION(gpu, complex64, 2); +BM_DYNAMIC_PARTITION(gpu, complex64, 100); + } // namespace } // namespace tensorflow diff --git a/tensorflow/core/kernels/fake_quant_ops_functor.h b/tensorflow/core/kernels/fake_quant_ops_functor.h index 7aaad6e6c7..b41b22d634 100644 --- a/tensorflow/core/kernels/fake_quant_ops_functor.h +++ b/tensorflow/core/kernels/fake_quant_ops_functor.h @@ -132,7 +132,7 @@ struct FakeQuantWithMinMaxVarsFunctor { const float max_val = max(); // If min and max are both zero, we should just return zero. if (min_val == 0.0f && max_val == 0.0f) { - outputs.device(d) = outputs.constant(0.0f); + outputs.setZero(); return; } float nudged_min, nudged_max, nudged_scale; @@ -163,8 +163,8 @@ struct FakeQuantWithMinMaxVarsGradientFunctor { // If min and max are both zero, we propagate everything to inputs. if (min_val == 0.0f && max_val == 0.0f) { backprops_wrt_input.device(d) = gradients; - backprop_wrt_min.device(d) = backprop_wrt_min.constant(0.0f); - backprop_wrt_max.device(d) = backprop_wrt_max.constant(0.0f); + backprop_wrt_min.setZero(); + backprop_wrt_max.setZero(); return; } float nudged_min, nudged_max, nudged_scale; @@ -205,8 +205,7 @@ struct FakeQuantWithMinMaxVarsPerChannelFunctor { const float max_val = max(i); // If min and max are both zero, we should just return zero. if (min_val == 0.0f && max_val == 0.0f) { - auto chip = outputs.chip<1>(i); - chip.device(d) = chip.constant(0.0f); + outputs.chip<1>(i).setZero(); continue; } float nudged_min, nudged_max, nudged_scale; @@ -243,10 +242,8 @@ struct FakeQuantWithMinMaxVarsPerChannelGradientFunctor { // If min and max are both zero, we propagate everything to inputs. if (min_val == 0.0f && max_val == 0.0f) { backprops_wrt_input.chip<1>(i).device(d) = gradients_chip; - auto min_chip = backprop_wrt_min.chip<0>(i); - auto max_chip = backprop_wrt_max.chip<0>(i); - min_chip.device(d) = min_chip.constant(0.0f); - max_chip.device(d) = max_chip.constant(0.0f); + backprop_wrt_min.chip<0>(i).setZero(); + backprop_wrt_max.chip<0>(i).setZero(); continue; } float nudged_min, nudged_max, nudged_scale; diff --git a/tensorflow/core/kernels/fused_batch_norm_op.cc b/tensorflow/core/kernels/fused_batch_norm_op.cc index 0ecb829f34..1688674eb7 100644 --- a/tensorflow/core/kernels/fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/fused_batch_norm_op.cc @@ -54,25 +54,20 @@ struct FusedBatchNorm { Tensor* batch_var_output, Tensor* saved_mean_output, Tensor* saved_var_output, TensorFormat tensor_format, bool is_training) { - // Currently U is ignored, since we only support the case where T and U are - // both float32. - // TODO(reedwm): Add float16 support, use U, and remove these asserts. - static_assert(std::is_same::value, "T currently must be float."); - static_assert(std::is_same::value, "U currently must be float."); OP_REQUIRES(context, tensor_format == FORMAT_NHWC, errors::Internal("The CPU implementation of FusedBatchNorm " "only supports NHWC tensor format for now.")); typename TTypes::ConstTensor x(x_input.tensor()); - typename TTypes::ConstVec scale(scale_input.vec()); - typename TTypes::ConstVec offset(offset_input.vec()); - typename TTypes::ConstVec estimated_mean(estimated_mean_input.vec()); - typename TTypes::ConstVec estimated_variance( - estimated_variance_input.vec()); + typename TTypes::ConstVec scale(scale_input.vec()); + typename TTypes::ConstVec offset(offset_input.vec()); + typename TTypes::ConstVec estimated_mean(estimated_mean_input.vec()); + typename TTypes::ConstVec estimated_variance( + estimated_variance_input.vec()); typename TTypes::Tensor y(y_output->tensor()); - typename TTypes::Vec batch_mean(batch_mean_output->vec()); - typename TTypes::Vec batch_var(batch_var_output->vec()); - typename TTypes::Vec saved_mean(saved_mean_output->vec()); - typename TTypes::Vec saved_var(saved_var_output->vec()); + typename TTypes::Vec batch_mean(batch_mean_output->vec()); + typename TTypes::Vec batch_var(batch_var_output->vec()); + typename TTypes::Vec saved_mean(saved_mean_output->vec()); + typename TTypes::Vec saved_var(saved_var_output->vec()); const CPUDevice& d = context->eigen_device(); @@ -93,15 +88,15 @@ struct FusedBatchNorm { bcast_spec.set(0, rest_size); #endif - auto x_rest_by_depth = x.reshape(rest_by_depth); + auto x_rest_by_depth = x.reshape(rest_by_depth).template cast(); const int rest_size_minus_one = (rest_size > 1) ? (rest_size - 1) : 1; - T rest_size_inv = static_cast(1.0f / static_cast(rest_size)); + U rest_size_inv = static_cast(1.0f / static_cast(rest_size)); // This adjustment is for Bessel's correction - T rest_size_adjust = - static_cast(rest_size) / static_cast(rest_size_minus_one); + U rest_size_adjust = + static_cast(rest_size) / static_cast(rest_size_minus_one); - Eigen::Tensor mean(depth); - Eigen::Tensor variance(depth); + Eigen::Tensor mean(depth); + Eigen::Tensor variance(depth); if (is_training) { mean.device(d) = (x_rest_by_depth.sum(reduce_dims) * rest_size_inv); batch_mean.device(d) = mean; @@ -129,7 +124,7 @@ struct FusedBatchNorm { auto x_shifted = x_scaled + offset.reshape(one_by_depth).broadcast(bcast_spec); - y.reshape(rest_by_depth).device(d) = x_shifted; + y.reshape(rest_by_depth).device(d) = x_shifted.template cast(); } }; @@ -138,7 +133,7 @@ struct FusedBatchNormGrad { void operator()(OpKernelContext* context, const Tensor& y_backprop_input, const Tensor& x_input, const Tensor& scale_input, const Tensor& mean_input, const Tensor& variance_input, - T epsilon, Tensor* x_backprop_output, + U epsilon, Tensor* x_backprop_output, Tensor* scale_backprop_output, Tensor* offset_backprop_output, TensorFormat tensor_format) { OP_REQUIRES(context, tensor_format == FORMAT_NHWC, @@ -147,12 +142,12 @@ struct FusedBatchNormGrad { typename TTypes::ConstTensor y_backprop( y_backprop_input.tensor()); typename TTypes::ConstTensor x(x_input.tensor()); - typename TTypes::ConstVec scale(scale_input.vec()); - typename TTypes::ConstVec mean(mean_input.vec()); - typename TTypes::ConstVec variance(variance_input.vec()); + typename TTypes::ConstVec scale(scale_input.vec()); + typename TTypes::ConstVec mean(mean_input.vec()); + typename TTypes::ConstVec variance(variance_input.vec()); typename TTypes::Tensor x_backprop(x_backprop_output->tensor()); - typename TTypes::Vec scale_backprop(scale_backprop_output->vec()); - typename TTypes::Vec offset_backprop(offset_backprop_output->vec()); + typename TTypes::Vec scale_backprop(scale_backprop_output->vec()); + typename TTypes::Vec offset_backprop(offset_backprop_output->vec()); // Note: the following formulas are used to compute the gradients for // back propagation. @@ -181,8 +176,8 @@ struct FusedBatchNormGrad { bcast_spec.set(0, rest_size); #endif - auto x_rest_by_depth = x.reshape(rest_by_depth); - T rest_size_inv = static_cast(1.0f / static_cast(rest_size)); + auto x_rest_by_depth = x.reshape(rest_by_depth).template cast(); + U rest_size_inv = static_cast(1.0f / static_cast(rest_size)); auto x_mean_rest_by_depth = mean.reshape(one_by_depth).broadcast(bcast_spec); @@ -192,7 +187,8 @@ struct FusedBatchNormGrad { coef0.eval().reshape(one_by_depth).broadcast(bcast_spec); auto x_scaled = x_centered * coef0_rest_by_depth; - auto y_backprop_rest_by_depth = y_backprop.eval().reshape(rest_by_depth); + auto y_backprop_rest_by_depth = + y_backprop.eval().reshape(rest_by_depth).template cast(); scale_backprop.device(d) = (y_backprop_rest_by_depth * x_scaled).sum(reduce_dims); auto y_backprop_sum = y_backprop_rest_by_depth.sum(reduce_dims); @@ -214,7 +210,7 @@ struct FusedBatchNormGrad { .reshape(one_by_depth) .broadcast(bcast_spec); x_backprop.reshape(rest_by_depth).device(d) = - coef1 * (y_backprop_centered - x_centered * coef2); + (coef1 * (y_backprop_centered - x_centered * coef2)).template cast(); } }; @@ -689,6 +685,18 @@ REGISTER_KERNEL_BUILDER(Name("FusedBatchNormGradV2") .TypeConstraint("U"), FusedBatchNormGradOp); +REGISTER_KERNEL_BUILDER(Name("FusedBatchNormV2") + .Device(DEVICE_CPU) + .TypeConstraint("T") + .TypeConstraint("U"), + FusedBatchNormOp); + +REGISTER_KERNEL_BUILDER(Name("FusedBatchNormGradV2") + .Device(DEVICE_CPU) + .TypeConstraint("T") + .TypeConstraint("U"), + FusedBatchNormGradOp); + #if GOOGLE_CUDA REGISTER_KERNEL_BUILDER( diff --git a/tensorflow/core/kernels/fused_batch_norm_op.h b/tensorflow/core/kernels/fused_batch_norm_op.h index 38b24d7011..3af104bf95 100644 --- a/tensorflow/core/kernels/fused_batch_norm_op.h +++ b/tensorflow/core/kernels/fused_batch_norm_op.h @@ -92,26 +92,28 @@ struct FusedBatchNormFreezeGrad { // offset_backprop = sum(y_backprop) // scale_backprop = y_backprop * ((x - pop_mean) * rsqrt(pop_var + epsilon)) // x_backprop = y_backprop * (scale * rsqrt(pop_var + epsilon)) - offset_backprop.device(d) = y_backprop.reshape(rest_by_depth) - .template cast() - .sum(reduction_axis); + + auto y_backprop_rest_by_depth = + y_backprop.reshape(rest_by_depth).template cast(); + auto input_rest_by_depth = input.reshape(rest_by_depth).template cast(); + + offset_backprop.device(d) = y_backprop_rest_by_depth.sum(reduction_axis); // scratch1 = rsqrt(pop_var + epsilon) scratch1.device(d) = (pop_var + pop_var.constant(epsilon)).rsqrt(); // scratch2 = sum(y_backprop * (x - mean)) scratch2.device(d) = - (y_backprop.reshape(rest_by_depth).template cast() * - (input.reshape(rest_by_depth).template cast() - + (y_backprop_rest_by_depth * + (input_rest_by_depth - pop_mean.reshape(one_by_depth).broadcast(rest_by_one))) .sum(reduction_axis); x_backprop.reshape(rest_by_depth).device(d) = - (y_backprop.reshape(rest_by_depth).template cast() * - ((scratch1 * scale) - .eval() - .reshape(one_by_depth) - .broadcast(rest_by_one))) + (y_backprop_rest_by_depth * ((scratch1 * scale) + .eval() + .reshape(one_by_depth) + .broadcast(rest_by_one))) .template cast(); scale_backprop.device(d) = scratch2 * scratch1; } diff --git a/tensorflow/core/kernels/lmdb_reader_op.cc b/tensorflow/core/kernels/lmdb_reader_op.cc index 3bb07301b5..31a427f2c9 100755 --- a/tensorflow/core/kernels/lmdb_reader_op.cc +++ b/tensorflow/core/kernels/lmdb_reader_op.cc @@ -36,7 +36,7 @@ class LMDBReader : public ReaderBase { Status OnWorkStartedLocked() override { MDB_CHECK(mdb_env_create(&mdb_env_)); - int flags = MDB_RDONLY | MDB_NOTLS; + int flags = MDB_RDONLY | MDB_NOTLS | MDB_NOLOCK; // Check if the LMDB filename is actually a file instead of a directory. // If so, set appropriate flags so we can open it. @@ -57,10 +57,13 @@ class LMDBReader : public ReaderBase { if (mdb_env_ != nullptr) { if (mdb_cursor_) { mdb_cursor_close(mdb_cursor_); + mdb_cursor_ = nullptr; } - mdb_txn_abort(mdb_txn_); mdb_dbi_close(mdb_env_, mdb_dbi_); + mdb_txn_abort(mdb_txn_); mdb_env_close(mdb_env_); + mdb_txn_ = nullptr; + mdb_dbi_ = 0; mdb_env_ = nullptr; } return Status::OK(); diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc index e2cf605811..157ce106ce 100644 --- a/tensorflow/core/kernels/maxpooling_op.cc +++ b/tensorflow/core/kernels/maxpooling_op.cc @@ -20,7 +20,6 @@ limitations under the License. #include "tensorflow/core/kernels/maxpooling_op.h" #include -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" @@ -34,9 +33,11 @@ limitations under the License. #include "tensorflow/core/kernels/pooling_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/util/env_var.h" #include "tensorflow/core/util/padding.h" #include "tensorflow/core/util/tensor_format.h" #include "tensorflow/core/util/use_cudnn.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #if GOOGLE_CUDA #include "tensorflow/core/kernels/maxpooling_op_gpu.h" @@ -358,6 +359,7 @@ class MaxPoolingGradOp : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); use_dnn_ = CanUseCudnn(); + ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); } void Compute(OpKernelContext* context) override { @@ -405,7 +407,7 @@ class MaxPoolingGradOp : public OpKernel { DnnPoolingGradOp::Compute( context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize, stride, padding_, data_format_, &tensor_in, &tensor_out, out_backprop, - output_shape); + output_shape, propagate_nans_); } else { CHECK(data_format_ == FORMAT_NHWC) << "Non-Cudnn MaxPoolGrad only supports NHWC format"; @@ -420,6 +422,7 @@ class MaxPoolingGradOp : public OpKernel { Padding padding_; TensorFormat data_format_; bool use_dnn_; + bool propagate_nans_; }; #endif // GOOGLE_CUDA @@ -884,6 +887,8 @@ class MaxPoolingWithArgmaxOp : public OpKernel { OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1, errors::Unimplemented( "Pooling is not yet supported on the batch dimension.")); + + ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); } void Compute(OpKernelContext* context) override { @@ -902,14 +907,15 @@ class MaxPoolingWithArgmaxOp : public OpKernel { Tensor* argmax = nullptr; OP_REQUIRES_OK(context, context->allocate_output(1, out_shape, &argmax)); - LaunchMaxPoolingWithArgmax::launch(context, params, tensor_in, - output, argmax); + LaunchMaxPoolingWithArgmax::launch( + context, params, tensor_in, output, argmax, propagate_nans_); } private: std::vector ksize_; std::vector stride_; Padding padding_; + bool propagate_nans_; }; template @@ -1045,6 +1051,8 @@ class MaxPoolingNoMaskOp : public OpKernel { errors::Unimplemented( "Pooling is not yet supported on the batch dimension.")); use_dnn_ = CanUseCudnn(); + + ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); } void Compute(OpKernelContext* context) override { @@ -1068,9 +1076,10 @@ class MaxPoolingNoMaskOp : public OpKernel { // These is_int8x4 checks avoid linker errors for missing qint8 kernels. if (!is_int8x4 && use_dnn_ && data_format_ == FORMAT_NCHW) { - DnnPoolingOp::Compute( - context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize_, - stride_, padding_, data_format_, tensor_in, out_shape); + DnnPoolingOp::Compute(context, + perftools::gputools::dnn::PoolingMode::kMaximum, + ksize_, stride_, padding_, data_format_, + tensor_in, out_shape, propagate_nans_); } else { Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output)); @@ -1079,7 +1088,7 @@ class MaxPoolingNoMaskOp : public OpKernel { tensor_in, output); } else if (data_format_ == FORMAT_NHWC) { LaunchMaxPoolingNoMask::launch(context, params, tensor_in, - output); + output, propagate_nans_); } else { LOG(FATAL) << "MaxPool currently only supports the following (layout, " "type) combinations: (NHWC, non-qint8), " @@ -1098,6 +1107,7 @@ class MaxPoolingNoMaskOp : public OpKernel { Padding padding_; TensorFormat data_format_; bool use_dnn_; + bool propagate_nans_; }; template @@ -1127,6 +1137,7 @@ class MaxPoolingNoMaskV2Op : public OpKernel { } OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); use_dnn_ = CanUseCudnn(); + ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); } void Compute(OpKernelContext* context) override { @@ -1168,16 +1179,17 @@ class MaxPoolingNoMaskV2Op : public OpKernel { ShapeFromFormat(data_format_, params.tensor_in_batch, params.out_height, params.out_width, params.depth); if (use_dnn_ && data_format_ == FORMAT_NCHW) { - DnnPoolingOp::Compute( - context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize, - stride, padding_, data_format_, tensor_in, out_shape); + DnnPoolingOp::Compute(context, + perftools::gputools::dnn::PoolingMode::kMaximum, + ksize, stride, padding_, data_format_, tensor_in, + out_shape, propagate_nans_); } else { CHECK(data_format_ == FORMAT_NHWC) << "Non-Cudnn MaxPool only supports NHWC format"; Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output)); LaunchMaxPoolingNoMask::launch(context, params, tensor_in, - output); + output, propagate_nans_); } } @@ -1187,18 +1199,20 @@ class MaxPoolingNoMaskV2Op : public OpKernel { Padding padding_; TensorFormat data_format_; bool use_dnn_; + bool propagate_nans_; }; template struct LaunchMaxPoolingNoMask { static void launch(OpKernelContext* context, const PoolParameters& params, - const Tensor& input, Tensor* output) { + const Tensor& input, Tensor* output, bool propagate_nans) { bool status = functor::MaxPoolForwardWithOptionalArgmax()( input.flat().data(), params.tensor_in_batch, params.tensor_in_rows, params.tensor_in_cols, params.depth, params.out_height, params.out_width, params.window_rows, params.window_cols, params.row_stride, params.col_stride, params.pad_rows, params.pad_cols, - output->flat().data(), nullptr, context->eigen_gpu_device()); + output->flat().data(), nullptr, context->eigen_gpu_device(), + propagate_nans); if (!status) { context->SetStatus( errors::Internal("Failed launching MaxPoolForwardNoMask")); @@ -1209,7 +1223,8 @@ struct LaunchMaxPoolingNoMask { template struct LaunchMaxPoolingWithArgmax { static void launch(OpKernelContext* context, const PoolParameters& params, - const Tensor& input, Tensor* output, Tensor* argmax) { + const Tensor& input, Tensor* output, Tensor* argmax, + bool propagate_nans) { bool status = functor::MaxPoolForwardWithOptionalArgmax()( input.flat().data(), params.tensor_in_batch, params.tensor_in_rows, params.tensor_in_cols, params.depth, params.out_height, @@ -1217,7 +1232,7 @@ struct LaunchMaxPoolingWithArgmax { params.row_stride, params.col_stride, params.pad_rows, params.pad_cols, output->flat().data(), reinterpret_cast(argmax->flat().data()), - context->eigen_gpu_device()); + context->eigen_gpu_device(), propagate_nans); if (!status) { context->SetStatus( errors::Internal("Failed launching MaxPoolForwardWithArgmax")); diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc index 26f5274804..d96b844383 100644 --- a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc +++ b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc @@ -29,6 +29,15 @@ limitations under the License. namespace tensorflow { namespace { +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool IsGreaterThan(dtype a, dtype b) { + if (propagate_nans) { + return !(a <= b); + } else { + return a > b; + } +} + // This is Yangqing's custom kernel for the maxpooling operation. There are // three functions: MaxPoolForwardNCHW and MaxPoolForwardNHWC are the two // forward functions, dealing with the forward case. MaxPoolBackward is the @@ -51,7 +60,7 @@ namespace { // const int output_size = batch * channels * pooled_height * pooled_width; // MaxPoolForwardNCHW<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, // kThreadsPerBlock, 0, cuda_stream>>>(...); -template +template __global__ void MaxPoolForwardNCHW(const int nthreads, const dtype* bottom_data, const int channels, const int height, const int width, const int pooled_height, @@ -77,7 +86,7 @@ __global__ void MaxPoolForwardNCHW(const int nthreads, const dtype* bottom_data, for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { int idx = c * height * width + h * width + w; - if (bottom_data_n[idx] > maxval) { + if (IsGreaterThan(bottom_data_n[idx], maxval)) { maxidx = idx; maxval = bottom_data_n[idx]; } @@ -126,7 +135,7 @@ __global__ void MaxPoolForwardNoMaskKernel_NCHW_VECT_C( } } -template +template __global__ void MaxPoolForwardNHWC(const int nthreads, const dtype* bottom_data, const int height, const int width, const int channels, const int pooled_height, @@ -153,7 +162,7 @@ __global__ void MaxPoolForwardNHWC(const int nthreads, const dtype* bottom_data, for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { int idx = (h * width + w) * channels + c; - if (bottom_data_n[idx] > maxval) { + if (IsGreaterThan(bottom_data_n[idx], maxval)) { maxidx = idx; maxval = bottom_data_n[idx]; } @@ -390,15 +399,24 @@ bool MaxPoolForwardWithOptionalArgmax::operator()( const int channels, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_t, const int pad_l, T* top_data, - int64* mask, const Eigen::GpuDevice& d) { + int64* mask, const Eigen::GpuDevice& d, bool propagate_nans) { const int kThreadsPerBlock = 1024; const int output_size = batch * channels * pooled_height * pooled_width; - - MaxPoolForwardNHWC<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, - kThreadsPerBlock, 0, d.stream()>>>( - output_size, bottom_data, height, width, channels, pooled_height, - pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, - top_data, mask); + if (propagate_nans) { + MaxPoolForwardNHWC + <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, + kThreadsPerBlock, 0, d.stream()>>> + (output_size, bottom_data, height, width, channels, pooled_height, + pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, + top_data, mask); + } else { + MaxPoolForwardNHWC + <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, + kThreadsPerBlock, 0, d.stream()>>> + (output_size, bottom_data, height, width, channels, pooled_height, + pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, + top_data, mask); + } return d.ok(); } diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.h b/tensorflow/core/kernels/maxpooling_op_gpu.h index 34203797cf..38ebb34248 100644 --- a/tensorflow/core/kernels/maxpooling_op_gpu.h +++ b/tensorflow/core/kernels/maxpooling_op_gpu.h @@ -39,7 +39,7 @@ struct MaxPoolForwardWithOptionalArgmax { const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_t, const int pad_l, T* top_data, int64* mask, - const Eigen::GpuDevice& d); + const Eigen::GpuDevice& d, bool propagate_nans); }; struct MaxPoolForwardNoMask_NCHW_VECT_C { diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc index 9080bf7be8..f291281108 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc @@ -45,12 +45,12 @@ limitations under the License. #ifdef INTEL_MKL_DNN #include "mkldnn.hpp" -using mkldnn::prop_kind; using mkldnn::stream; +using mkldnn::prop_kind; +using mkldnn::convolution_forward; using mkldnn::convolution_backward_weights; using mkldnn::convolution_direct; -using mkldnn::convolution_forward; #endif @@ -463,13 +463,12 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel { // Generate input shapes. TensorShape filter_shape; - OP_REQUIRES( - context, TensorShapeUtils::IsVector(filter_tensor.shape()), - errors::InvalidArgument( + OP_REQUIRES(context, TensorShapeUtils::IsVector(filter_tensor.shape()), + errors::InvalidArgument( "Conv2DBackpropFilter: filter_sizes input must be 1-dim, not ", filter_tensor.dims())); OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape( - filter_tensor.vec(), &filter_shape)); + filter_tensor.vec(), &filter_shape)); TensorShape input_shape = input_tensor.shape(); TensorShape obp_shape = obp_tensor.shape(); @@ -481,26 +480,27 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel { // Get forward convolution parameters. MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); - conv_utl.GetConvFwdSizesInMklOrder( - input_shape, filter_shape, &fwd_input_dims, &fwd_filter_dims, - &strides, &fwd_output_dims_tf_order, &fwd_output_dims, &padding_l, - &padding_r); + conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape, + &fwd_input_dims, &fwd_filter_dims, + &strides, + &fwd_output_dims_tf_order, + &fwd_output_dims, + &padding_l, &padding_r); if (!context->status().ok()) return; // Create Convolution forward descriptor since Convolution backward // API needs it. For that, we first need to create input, filter // and output memory descriptors. auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_); - auto fwd_src_md = - memory::desc(fwd_input_dims, MklDnnType(), mkl_data_format); - auto fwd_filter_md = - memory::desc(fwd_filter_dims, MklDnnType(), memory::format::hwio); - auto fwd_out_md = - memory::desc(fwd_output_dims, MklDnnType(), mkl_data_format); - auto fwd_desc = convolution_forward::desc( - prop_kind::forward, convolution_direct, fwd_src_md, fwd_filter_md, - fwd_out_md, strides, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)); + auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType(), + mkl_data_format); + auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType(), + memory::format::hwio); + auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType(), + mkl_data_format); + auto fwd_desc = convolution_forward::desc(prop_kind::forward, + convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md, + strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_)); auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine); // Allocate output tensor and shape @@ -537,22 +537,23 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel { output.SetOpMemDesc(bwd_output_dims, memory::format::any); // Create convolution backward weights primitive. - auto bwd_desc = convolution_backward_weights::desc( - convolution_direct, input.GetOpMemDesc(), output.GetOpMemDesc(), - outbackprop.GetOpMemDesc(), strides, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)); + auto bwd_desc = convolution_backward_weights::desc(convolution_direct, + input.GetOpMemDesc(), output.GetOpMemDesc(), + outbackprop.GetOpMemDesc(), strides, padding_l, + padding_r, TFPaddingToMklDnnPadding(padding_)); - auto bwd_pd = convolution_backward_weights::primitive_desc( - bwd_desc, cpu_engine, fwd_pd); + auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc, + cpu_engine, + fwd_pd); PrepareAndExecutePrimitive(bwd_pd, &input, &outbackprop, &output); - } catch (mkldnn::error& e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + ", in file " + - string(__FILE__) + ":" + std::to_string(__LINE__); - OP_REQUIRES_OK( - context, - errors::Aborted("Operation received an exception:", error_msg)); + } catch (mkldnn::error &e) { + string error_msg = "Status: " + std::to_string(e.status) + + ", message: " + string(e.message) + + ", in file " + string(__FILE__) + ":" + + std::to_string(__LINE__); + OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:", + error_msg)); } } @@ -563,8 +564,9 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel { // Prepare and execute net - checks for input and output reorders. void PrepareAndExecutePrimitive( - const convolution_backward_weights::primitive_desc& conv_pd, - MklDnnData* input, MklDnnData* obp, MklDnnData* output) { + const convolution_backward_weights::primitive_desc& conv_pd, + MklDnnData* input, MklDnnData* obp, + MklDnnData* output) { // Create reorders between user layout and MKL layout if it is needed and // add it to the net before convolution. std::vector net; @@ -575,10 +577,10 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel { // output side, we will prepare reorder primitive in case output // reorder to user memory is required. bool output_reorder_required = output->PrepareReorderToUserMemIfReq( - conv_pd.diff_weights_primitive_desc()); + conv_pd.diff_weights_primitive_desc()); - net.push_back(convolution_backward_weights( - conv_pd, input->GetOpMem(), obp->GetOpMem(), output->GetOpMem())); + net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(), + obp->GetOpMem(), output->GetOpMem())); // Insert reorder primitive in the net for output reorder if reorder is // required. diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc index 4b6bf92e42..4a47d0463e 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc @@ -23,8 +23,6 @@ limitations under the License. #define EIGEN_USE_THREADS #include #include -#include "mkl_dnn.h" -#include "mkl_dnn_types.h" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" @@ -43,16 +41,18 @@ limitations under the License. #include "tensorflow/core/util/tensor_format.h" #include "tensorflow/core/util/use_cudnn.h" #include "tensorflow/core/util/work_sharder.h" +#include "mkl_dnn.h" +#include "mkl_dnn_types.h" #ifdef INTEL_MKL_DNN #include "mkldnn.hpp" -using mkldnn::prop_kind; using mkldnn::stream; +using mkldnn::prop_kind; -using mkldnn::convolution_backward_data; -using mkldnn::convolution_direct; using mkldnn::convolution_forward; +using mkldnn::convolution_direct; +using mkldnn::convolution_backward_data; #endif namespace tensorflow { @@ -397,13 +397,12 @@ class MklConv2DCustomBackpropInputOp : public OpKernel { // Generate input shape. TensorShape input_shape; - OP_REQUIRES( - context, TensorShapeUtils::IsVector(input_tensor.shape()), - errors::InvalidArgument( + OP_REQUIRES(context, TensorShapeUtils::IsVector(input_tensor.shape()), + errors::InvalidArgument( "Conv2DBackpropInput: input_sizes input must be 1-dim, not ", input_tensor.dims())); OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape( - input_tensor.vec(), &input_shape)); + input_tensor.vec(), &input_shape)); TensorShape filter_shape = filter_tensor.shape(); TensorShape obp_shape = obp_tensor.shape(); @@ -415,26 +414,27 @@ class MklConv2DCustomBackpropInputOp : public OpKernel { // Get forward convolution parameters. MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); - conv_utl.GetConvFwdSizesInMklOrder( - input_shape, filter_shape, &fwd_input_dims, &fwd_filter_dims, - &strides, &fwd_output_dims_tf_order, &fwd_output_dims, &padding_l, - &padding_r); + conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape, + &fwd_input_dims, &fwd_filter_dims, + &strides, + &fwd_output_dims_tf_order, + &fwd_output_dims, + &padding_l, &padding_r); if (!context->status().ok()) return; // Create Convolution forward descriptor since Convolution backward // API needs it. For that, we first need to create input, filter // and output memory descriptors. auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_); - auto fwd_src_md = - memory::desc(fwd_input_dims, MklDnnType(), mkl_data_format); - auto fwd_filter_md = - memory::desc(fwd_filter_dims, MklDnnType(), memory::format::hwio); - auto fwd_out_md = - memory::desc(fwd_output_dims, MklDnnType(), mkl_data_format); - auto fwd_desc = convolution_forward::desc( - prop_kind::forward, convolution_direct, fwd_src_md, fwd_filter_md, - fwd_out_md, strides, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)); + auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType(), + mkl_data_format); + auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType(), + memory::format::hwio); + auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType(), + mkl_data_format); + auto fwd_desc = convolution_forward::desc(prop_kind::forward, + convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md, + strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_)); auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine); // Allocate output tensor and shape @@ -475,22 +475,23 @@ class MklConv2DCustomBackpropInputOp : public OpKernel { output.SetOpMemDesc(bwd_output_dims, memory::format::any); // Create convolution backward data primitive. - auto bwd_desc = convolution_backward_data::desc( - convolution_direct, output.GetOpMemDesc(), filter.GetOpMemDesc(), - outbackprop.GetOpMemDesc(), strides, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)); + auto bwd_desc = convolution_backward_data::desc(convolution_direct, + output.GetOpMemDesc(), filter.GetOpMemDesc(), + outbackprop.GetOpMemDesc(), strides, padding_l, + padding_r, TFPaddingToMklDnnPadding(padding_)); - auto bwd_pd = convolution_backward_data::primitive_desc( - bwd_desc, cpu_engine, fwd_pd); + auto bwd_pd = convolution_backward_data::primitive_desc(bwd_desc, + cpu_engine, + fwd_pd); PrepareAndExecutePrimitive(bwd_pd, &filter, &outbackprop, &output); - } catch (mkldnn::error& e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + ", in file " + - string(__FILE__) + ":" + std::to_string(__LINE__); - OP_REQUIRES_OK( - context, - errors::Aborted("Operation received an exception:", error_msg)); + } catch (mkldnn::error &e) { + string error_msg = "Status: " + std::to_string(e.status) + + ", message: " + string(e.message) + + ", in file " + string(__FILE__) + ":" + + std::to_string(__LINE__); + OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:", + error_msg)); } } @@ -501,8 +502,9 @@ class MklConv2DCustomBackpropInputOp : public OpKernel { // Prepare and execute net - checks for input and output reorders. void PrepareAndExecutePrimitive( - const convolution_backward_data::primitive_desc& conv_pd, - MklDnnData* filter, MklDnnData* obp, MklDnnData* output) { + const convolution_backward_data::primitive_desc& conv_pd, + MklDnnData* filter, MklDnnData* obp, + MklDnnData* output) { // Create reorders between user layout and MKL layout if it is needed and // add it to the net before convolution. std::vector net; @@ -512,11 +514,11 @@ class MklConv2DCustomBackpropInputOp : public OpKernel { // Memory for output of convolution. Since we may need reorder on the // output side, we will prepare reorder primitive in case output // reorder to user memory is required. - bool output_reorder_required = - output->PrepareReorderToUserMemIfReq(conv_pd.diff_src_primitive_desc()); + bool output_reorder_required = output->PrepareReorderToUserMemIfReq( + conv_pd.diff_src_primitive_desc()); - net.push_back(convolution_backward_data( - conv_pd, obp->GetOpMem(), filter->GetOpMem(), output->GetOpMem())); + net.push_back(convolution_backward_data(conv_pd, obp->GetOpMem(), + filter->GetOpMem(), output->GetOpMem())); // Insert reorder primitive in the net for output reorder if reorder is // required. diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 369f632fb4..a9872b8d6d 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -18,8 +18,8 @@ limitations under the License. #include #include -#include #include +#include #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" @@ -46,11 +46,11 @@ limitations under the License. #ifdef INTEL_MKL_DNN #include "mkldnn.hpp" -using mkldnn::prop_kind; using mkldnn::stream; +using mkldnn::prop_kind; -using mkldnn::convolution_direct; using mkldnn::convolution_forward; +using mkldnn::convolution_direct; #endif namespace tensorflow { @@ -523,16 +523,19 @@ class MklConv2DOp : public OpKernel { // Get shapes of input tensors in MKL-DNN order MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); - conv_utl.GetConvFwdSizesInMklOrder( - src_tensor.shape(), filter_tensor.shape(), &src_dims, &filter_dims, - &strides, &output_dims_tf_order, &output_dims_mkl_order, &padding_l, - &padding_r); + conv_utl.GetConvFwdSizesInMklOrder(src_tensor.shape(), + filter_tensor.shape(), + &src_dims, &filter_dims, &strides, + &output_dims_tf_order, + &output_dims_mkl_order, &padding_l, + &padding_r); if (!context->status().ok()) return; // Check for corner case - if there is nothing to compute, return. - TensorShape tf_output_shape( - {output_dims_tf_order[0], output_dims_tf_order[1], - output_dims_tf_order[2], output_dims_tf_order[3]}); + TensorShape tf_output_shape({output_dims_tf_order[0], + output_dims_tf_order[1], + output_dims_tf_order[2], + output_dims_tf_order[3]}); Tensor* output_tensor = nullptr; MklShape mkl_output_mkl_shape; mkl_output_mkl_shape.SetMklTensor(false); @@ -569,13 +572,13 @@ class MklConv2DOp : public OpKernel { // the layout is Tensorflow's layout (NHWC or NCHW depending on data // format). src.SetUsrMem(src_dims, TFDataFormatToMklDnnDataFormat(data_format_), - const_cast( - static_cast(src_tensor.flat().data()))); + const_cast(static_cast( + src_tensor.flat().data()))); // Although filter shape (filter_dims) required is in MKL-DNN order, // the layout is Tensorflow's layout (HWIO). filter.SetUsrMem(filter_dims, memory::format::hwio, const_cast(static_cast( - filter_tensor.flat().data()))); + filter_tensor.flat().data()))); // Although output shape (output_dims) required is in MKL-DNN order, // layout is Tensorflow's layout (NHWC or NCHW depending on data format). output.SetUsrMem(output_dims_mkl_order, @@ -595,36 +598,36 @@ class MklConv2DOp : public OpKernel { const Tensor& bias_tensor = MklGetInput(context, 2); bias.SetUsrMem(bias_size, memory::format::x, const_cast(static_cast( - bias_tensor.flat().data()))); + bias_tensor.flat().data()))); bias.SetOpMemDesc(bias_size, memory::format::any); // Create convolution primitive with Bias. - auto conv_desc = convolution_forward::desc( - prop_kind::forward, convolution_direct, src.GetOpMemDesc(), - filter.GetOpMemDesc(), bias.GetOpMemDesc(), output.GetOpMemDesc(), - strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_)); + auto conv_desc = convolution_forward::desc(prop_kind::forward, + convolution_direct, src.GetOpMemDesc(), filter.GetOpMemDesc(), + bias.GetOpMemDesc(), output.GetOpMemDesc(), strides, + padding_l, padding_r, TFPaddingToMklDnnPadding(padding_)); - auto conv_prim_desc = - convolution_forward::primitive_desc(conv_desc, cpu_engine); + auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc, + cpu_engine); PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output); } else { // Create convolution primitive without Bias. - auto conv_desc = convolution_forward::desc( - prop_kind::forward, convolution_direct, src.GetOpMemDesc(), - filter.GetOpMemDesc(), output.GetOpMemDesc(), strides, padding_l, - padding_r, TFPaddingToMklDnnPadding(padding_)); + auto conv_desc = convolution_forward::desc(prop_kind::forward, + convolution_direct, src.GetOpMemDesc(), filter.GetOpMemDesc(), + output.GetOpMemDesc(), strides, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); - auto conv_prim_desc = - convolution_forward::primitive_desc(conv_desc, cpu_engine); + auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc, + cpu_engine); PrepareAndExecuteNet(conv_prim_desc, &src, &filter, nullptr, &output); } - } catch (mkldnn::error& e) { + } catch (mkldnn::error &e) { string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + std::string(e.message) + ", in file " + - std::string(__FILE__) + ":" + std::to_string(__LINE__); - OP_REQUIRES_OK( - context, - errors::Aborted("Operation received an exception:", error_msg)); + ", message: " + std::string(e.message) + + ", in file " + std::string(__FILE__) + ":" + + std::to_string(__LINE__); + OP_REQUIRES_OK(context, + errors::Aborted("Operation received an exception:", error_msg)); } } @@ -635,9 +638,9 @@ class MklConv2DOp : public OpKernel { // Prepare and execute net - checks for input and output reorders. void PrepareAndExecuteNet( - const convolution_forward::primitive_desc& conv_prim_desc, - MklDnnData* src, MklDnnData* filter, MklDnnData* bias, - MklDnnData* output) { + const convolution_forward::primitive_desc& conv_prim_desc, + MklDnnData* src, MklDnnData* filter, + MklDnnData* bias, MklDnnData* output) { // Create reorders between user layout and MKL layout if it is needed and // add it to the net before convolution. std::vector net; @@ -648,19 +651,18 @@ class MklConv2DOp : public OpKernel { // output side, we will prepare reorder primitive in case output // reorder to user memory is required. bool output_reorder_required = output->PrepareReorderToUserMemIfReq( - conv_prim_desc.dst_primitive_desc()); + conv_prim_desc.dst_primitive_desc()); // Create convolution primitive and add it to net. if (bias) { CHECK_EQ(biasEnabled, true); net.push_back(convolution_forward(conv_prim_desc, src->GetOpMem(), - filter->GetOpMem(), bias->GetOpMem(), - output->GetOpMem())); + filter->GetOpMem(), bias->GetOpMem(), + output->GetOpMem())); } else { CHECK_EQ(biasEnabled, false); net.push_back(convolution_forward(conv_prim_desc, src->GetOpMem(), - filter->GetOpMem(), - output->GetOpMem())); + filter->GetOpMem(), output->GetOpMem())); } // Insert reorder primitive in the net for output reorder if reorder is diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h index e29af19ca9..f0cb37f8a4 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.h +++ b/tensorflow/core/kernels/mkl_conv_ops.h @@ -16,8 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_ #define TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_ -#include #include +#include #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" @@ -26,8 +26,8 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_slice.h" #include "tensorflow/core/kernels/bounds_check.h" -#include "tensorflow/core/kernels/conv_grad_ops.h" #include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/kernels/conv_grad_ops.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/lib/strings/numbers.h" @@ -49,15 +49,15 @@ namespace tensorflow { class MklDnnConvUtil { protected: - OpKernelContext *context_; // We don't own this. + OpKernelContext* context_; // We don't own this. std::vector strides_; Padding padding_; TensorFormat data_format_; public: - MklDnnConvUtil(OpKernelContext *context, const std::vector &strides, - Padding pad, TensorFormat fm) - : context_(context), strides_(strides), padding_(pad), data_format_(fm) {} + MklDnnConvUtil(OpKernelContext* context, const std::vector& strides, + Padding pad, TensorFormat fm) : context_(context), + strides_(strides), padding_(pad), data_format_(fm) {} virtual ~MklDnnConvUtil() { context_ = nullptr; } @@ -75,14 +75,14 @@ class MklDnnConvUtil { // requires input in NCHW format. Function does not return anything. // But errors arising from sanity checks are returned in context's // status. - virtual inline void GetInputSizeInMklOrder(const TensorShape &input_shape, - memory::dims *input_dims) { -#define CHECK_BOUNDS(val, err_msg) \ - do { \ - OP_REQUIRES(context_, \ - FastBoundsCheck(val, std::numeric_limits::max()), \ - errors::InvalidArgument(err_msg)); \ - } while (0) + virtual inline void + GetInputSizeInMklOrder(const TensorShape& input_shape, + memory::dims *input_dims) { + #define CHECK_BOUNDS(val, err_msg) do { \ + OP_REQUIRES(context_, FastBoundsCheck(val, \ + std::numeric_limits::max()), \ + errors::InvalidArgument(err_msg)); \ + }while(0) CHECK_NOTNULL(input_dims); @@ -105,7 +105,7 @@ class MklDnnConvUtil { CHECK_BOUNDS(input_batch_raw, "Input batch too large"); int input_batch = static_cast(input_batch_raw); -#undef CHECK_BOUNDS + #undef CHECK_BOUNDS // MKL-DNN always requires input in NCHW format. *input_dims = {input_batch, input_depth, input_rows, input_cols}; @@ -125,9 +125,10 @@ class MklDnnConvUtil { // forward gets actual tensor as input). // // TODO(nhasabni): Add similar function for input and filter in MklShape. - virtual inline void GetFilterSizeInMklOrder(const TensorShape &input_shape, - const TensorShape &filter_shape, - memory::dims *filter_dims) { + virtual inline void + GetFilterSizeInMklOrder(const TensorShape& input_shape, + const TensorShape& filter_shape, + memory::dims *filter_dims) { CHECK_NOTNULL(filter_dims); OP_REQUIRES(context_, filter_shape.dims() == 4, @@ -135,18 +136,17 @@ class MklDnnConvUtil { filter_shape.DebugString())); for (int i = 0; i < 3; i++) { - OP_REQUIRES(context_, - FastBoundsCheck(filter_shape.dim_size(i), - std::numeric_limits::max()), - errors::InvalidArgument("filter too large")); + OP_REQUIRES(context_, FastBoundsCheck(filter_shape.dim_size(i), + std::numeric_limits::max()), + errors::InvalidArgument("filter too large")); } int input_depth = GetTensorDim(input_shape, data_format_, 'C'); - OP_REQUIRES(context_, input_depth == filter_shape.dim_size(2), - errors::InvalidArgument( - "input and filter must have the same depth: ", input_depth, - " vs ", filter_shape.dim_size(2))); + OP_REQUIRES( + context_, input_depth == filter_shape.dim_size(2), + errors::InvalidArgument("input and filter must have the same depth: ", + input_depth, " vs ", filter_shape.dim_size(2))); // TF filter is always in (rows, cols, in_depth, out_depth) order. int filter_rows = static_cast(filter_shape.dim_size(0)); @@ -163,25 +163,25 @@ class MklDnnConvUtil { // requires filter in OIHW format. Function does not return anything. // But errors arising from sanity checks are returned in context's // status. - virtual inline void GetFilterSizeInMklOrder(size_t src_index, - size_t filter_index, - memory::dims *filter_dims) { + virtual inline void + GetFilterSizeInMklOrder(size_t src_index, size_t filter_index, + memory::dims *filter_dims) { CHECK_NOTNULL(filter_dims); - const Tensor &input = MklGetInput(context_, src_index); - const Tensor &filter = MklGetInput(context_, filter_index); + const Tensor& input = MklGetInput(context_, src_index); + const Tensor& filter = MklGetInput(context_, filter_index); GetFilterSizeInMklOrder(input.shape(), filter.shape(), filter_dims); } // Calculate Bias size for 2D Convolution. Function does not return // anything, but sets error in context status. - virtual inline void GetBiasSizeInMklOrder(size_t bias_index, - memory::dims *bias_dims) { - const Tensor &bias = MklGetInput(context_, bias_index); + virtual inline void + GetBiasSizeInMklOrder(size_t bias_index, memory::dims *bias_dims) { + const Tensor& bias = MklGetInput(context_, bias_index); OP_REQUIRES(context_, bias.dims() == 1, errors::InvalidArgument("bias must be 1-dimensional: ", bias.shape().DebugString())); - *bias_dims = {static_cast(bias.dim_size(0))}; + *bias_dims = { static_cast(bias.dim_size(0)) }; } // Function to calculate output and padding size for 2D convolution. @@ -193,11 +193,13 @@ class MklDnnConvUtil { // status is returned via context status. // // TODO(nhasabni): Add similar function for input and filter in MklShape. - virtual inline void GetOutputAndPadSizeInMklOrder( - const TensorShape &input_shape, const TensorShape &filter_shape, - const memory::dims &strides, memory::dims *output_dims_tf_order, - memory::dims *output_dims_mkl_order, memory::dims *pad_l, - memory::dims *pad_r) { + virtual inline void + GetOutputAndPadSizeInMklOrder(const TensorShape& input_shape, + const TensorShape& filter_shape, + const memory::dims& strides, + memory::dims *output_dims_tf_order, + memory::dims *output_dims_mkl_order, + memory::dims *pad_l, memory::dims *pad_r) { CHECK_NOTNULL(output_dims_tf_order); CHECK_NOTNULL(output_dims_mkl_order); CHECK_NOTNULL(pad_l); @@ -223,21 +225,21 @@ class MklDnnConvUtil { int64 out_rows = 0, out_cols = 0; int64 pad_top = 0, pad_bottom = 0, pad_left, pad_right; - OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( - input_rows, filter_rows, stride_rows, padding_, - &out_rows, &pad_top, &pad_bottom)); - OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( - input_cols, filter_cols, stride_cols, padding_, - &out_cols, &pad_left, &pad_right)); + OP_REQUIRES_OK(context_, + GetWindowedOutputSizeVerbose(input_rows, filter_rows, stride_rows, + padding_, &out_rows, &pad_top, &pad_bottom)); + OP_REQUIRES_OK(context_, + GetWindowedOutputSizeVerbose(input_cols, filter_cols, stride_cols, + padding_, &out_cols, &pad_left, &pad_right)); // Tensorflow output is in data_format order. (NHWC or NCHW) - TensorShape out_shape = - ShapeFromFormat(data_format_, out_batch, out_rows, out_cols, out_depth); + TensorShape out_shape = ShapeFromFormat(data_format_, out_batch, + out_rows, out_cols, out_depth); *output_dims_tf_order = TFShapeToMklDnnDims(out_shape); // MKL-DNN always needs output in NCHW format. *output_dims_mkl_order = {out_batch, out_depth, static_cast(out_rows), - static_cast(out_cols)}; + static_cast(out_cols)}; // Now handle padding. MKL-DNN uses asymetric padding. *pad_l = {static_cast(pad_top), static_cast(pad_left)}; @@ -248,25 +250,27 @@ class MklDnnConvUtil { // See comment on GetConvOutputAndPadSizeInMklOrder for parameters. // // Function does not return anything, but sets error in context status. - inline void GetOutputAndPadSizeInMklOrder( - size_t src_index, size_t filter_index, const memory::dims &strides, - memory::dims *output_dims_tf_order, memory::dims *output_dims_mkl_order, - memory::dims *pad_l, memory::dims *pad_r) { + inline void + GetOutputAndPadSizeInMklOrder(size_t src_index, size_t filter_index, + const memory::dims& strides, + memory::dims *output_dims_tf_order, + memory::dims *output_dims_mkl_order, + memory::dims *pad_l, memory::dims *pad_r) { CHECK_NOTNULL(output_dims_tf_order); CHECK_NOTNULL(output_dims_mkl_order); CHECK_NOTNULL(pad_l); CHECK_NOTNULL(pad_r); - const Tensor &input = MklGetInput(context_, src_index); - const Tensor &filter = MklGetInput(context_, filter_index); + const Tensor& input = MklGetInput(context_, src_index); + const Tensor& filter = MklGetInput(context_, filter_index); OP_REQUIRES(context_, input.dims() == 4, errors::InvalidArgument("input must be 4-dimensional", - input.shape().DebugString())); + input.shape().DebugString())); - GetOutputAndPadSizeInMklOrder(input.shape(), filter.shape(), strides, - output_dims_tf_order, output_dims_mkl_order, - pad_l, pad_r); + GetOutputAndPadSizeInMklOrder(input.shape(), filter.shape(), + strides, output_dims_tf_order, + output_dims_mkl_order, pad_l, pad_r); } // Wrapper function to calculate input, filter, and output sizes of @@ -275,12 +279,15 @@ class MklDnnConvUtil { // also calculates strides and paddings for 2D Convolution. // // Function does not return anything, but sets error in context status. - inline void GetConvFwdSizesInMklOrder( - const TensorShape &input_shape, const TensorShape &filter_shape, - memory::dims *input_dims, memory::dims *filter_dims, - memory::dims *strides, memory::dims *output_dims_tf_order, - memory::dims *output_dims_mkl_order, memory::dims *pad_l, - memory::dims *pad_r) { + inline void GetConvFwdSizesInMklOrder(const TensorShape& input_shape, + const TensorShape& filter_shape, + memory::dims *input_dims, + memory::dims *filter_dims, + memory::dims *strides, + memory::dims *output_dims_tf_order, + memory::dims *output_dims_mkl_order, + memory::dims *pad_l, + memory::dims *pad_r) { CHECK_NOTNULL(input_dims); CHECK_NOTNULL(filter_dims); CHECK_NOTNULL(strides); @@ -295,7 +302,8 @@ class MklDnnConvUtil { if (!context_->status().ok()) return; GetStridesInMklOrder(strides); GetOutputAndPadSizeInMklOrder(input_shape, filter_shape, *strides, - output_dims_tf_order, output_dims_mkl_order, + output_dims_tf_order, + output_dims_mkl_order, pad_l, pad_r); if (!context_->status().ok()) return; } diff --git a/tensorflow/core/kernels/mkl_tfconv_op.cc b/tensorflow/core/kernels/mkl_tfconv_op.cc deleted file mode 100644 index b48c735d12..0000000000 --- a/tensorflow/core/kernels/mkl_tfconv_op.cc +++ /dev/null @@ -1,124 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifdef INTEL_MKL - -#include -#include -#include "tensorflow/core/framework/numeric_op.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/ops_util.h" -#include "tensorflow/core/platform/cpu_info.h" -#include "tensorflow/core/platform/macros.h" -#include "tensorflow/core/util/tensor_format.h" - -#include "tensorflow/core/util/mkl_util.h" -#include "mkl_dnn.h" -#include "mkl_dnn_types.h" - -namespace tensorflow { -typedef Eigen::ThreadPoolDevice CPUDevice; - -/////////////////////////////////////////////////////////// -// Op kernel -/////////////////////////////////////////////////////////// - -template -class MklToTfOp : public OpKernel { - public: - explicit MklToTfOp(OpKernelConstruction* context) : OpKernel(context) { - OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str)); - OP_REQUIRES_OK(context, context->GetAttr("T", &op_data_type)); - has_avx512f_ = port::TestCPUFeature(port::CPUFeature::AVX512F); - } - - void Compute(OpKernelContext* context) override { - // Check that input tensor is in MKL format. - const Tensor& input_tensor = MklGetInput(context, 0); - MklShape input_shape; - GetMklShape(context, 0, &input_shape); - - // if input is already in Tf format, then just copy input tensor to output. - if (!input_shape.IsMklTensor()) { - context->set_output(0, input_tensor); - VLOG(1) << "MKLToTFConversion: No conversion needed, " - << "copying input to output"; - return; - } - - // Check that input data type is same as operator data type and that it is - // same as output data type. - DataType input_data_type = input_type(0); - DataType output_data_type = output_type(0); - CHECK_EQ(op_data_type, input_data_type); - CHECK_EQ(op_data_type, output_data_type); - - TensorShape output_shape; - size_t ndims = input_shape.GetDimension(); - size_t* in_sizes = new size_t[ndims]; - for (size_t i = 0; i < ndims; i++) { - // Outermost to innermost dimension - output_shape.AddDim(input_shape.GetSizes()[input_shape.tf_dim_idx(i)]); - in_sizes[i] = input_shape.GetSizes()[i]; - } - - // Allocate output tensor. - Tensor* output_tensor = NULL; - OP_REQUIRES_OK(context, - context->allocate_output(0, output_shape, &output_tensor)); - - dnnLayout_t output_layout = - static_cast(input_shape.GetTfLayout()); - // Execute DNNConversion. - void* input_buffer = - static_cast(const_cast(input_tensor.flat().data())); - delete[] in_sizes; - void* output_buffer = - static_cast(const_cast(output_tensor->flat().data())); - input_shape.GetConvertedFlatData(output_layout, input_buffer, - output_buffer); - VLOG(1) << "MKLToTFConversion complete successfully."; - } - - private: - /// Data format of the operation - string data_format_str; - - /// Data type of the operation - DataType op_data_type; - - /// CPUIDInfo - bool has_avx512f_ = false; -}; - -/////////////////////////////////////////////////////////// -// Register kernel -/////////////////////////////////////////////////////////// - -#define REGISTER_CPU(T) \ - REGISTER_KERNEL_BUILDER(Name("_MklToTf") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T") \ - .Label(mkl_op_registry::kMklOpLabel), \ - MklToTfOp); - -TF_CALL_float(REGISTER_CPU); -#undef REGISTER_CPU -} // namespace tensorflow -#endif /* INTEL_MKL */ diff --git a/tensorflow/core/kernels/mkl_tfconv_op.h b/tensorflow/core/kernels/mkl_tfconv_op.h index a240ee44fb..0a5be4fec9 100644 --- a/tensorflow/core/kernels/mkl_tfconv_op.h +++ b/tensorflow/core/kernels/mkl_tfconv_op.h @@ -13,11 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifdef INTEL_MKL - #ifndef TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_ #define TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_ +#ifdef INTEL_MKL + #include #include #include "tensorflow/core/framework/numeric_op.h" @@ -35,6 +35,10 @@ limitations under the License. #include "mkl_dnn_types.h" #include "tensorflow/core/util/mkl_util.h" +#ifdef INTEL_MKL_DNN +using mkldnn::stream; +#endif + namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; @@ -57,6 +61,71 @@ class MklToTfOp : public OpKernel { VLOG(1) << "MKLToTFConversion complete successfully."; } +#ifdef INTEL_MKL_DNN + static void ConvertMklToTf(OpKernel* op_kernel, OpKernelContext* context, + string data_format_str, DataType op_data_type, + bool has_avx512f, uint input_number) { + try { + // Check that input tensor is in MKL format. + const Tensor& input_tensor = MklGetInput(context, input_number); + MklDnnShape input_shape; + GetMklShape(context, input_number, &input_shape); + + // if input is already in Tf format, then copy input tensor to output. + if (!input_shape.IsMklTensor()) { + context->set_output(input_number, input_tensor); + VLOG(1) << "MKLToTFConversion: No conversion needed, " + << "copying input to output"; + return; + } + + // Check that input data type is same as operator data type and that it + // is same as output data type. + DataType input_data_type = op_kernel->input_type(input_number); + DataType output_data_type = op_kernel->output_type(input_number); + CHECK_EQ(op_data_type, input_data_type); + CHECK_EQ(op_data_type, output_data_type); + + auto cpu_engine = engine(engine::cpu, 0); + MklDnnData input(&cpu_engine); + + // Get Mkl layout of input tensor. + auto input_mkl_md = input_shape.GetMklLayout(); + // Get TensorFlow layout of input tensor. Expected output of conversion + // has same layout as Tensorflow layout of input tensor. + auto output_tf_md = input_shape.GetTfLayout(); + auto output_tf_pd = memory::primitive_desc(output_tf_md, cpu_engine); + // Set input Mkl layout as the user layout. + input.SetUsrMem(input_mkl_md, &input_tensor); + + // Allocate output tensor. + TensorShape output_shape = input_shape.GetTfShape(); + Tensor* output_tensor = NULL; + OP_REQUIRES_OK(context, context->allocate_output(input_number, + output_shape, &output_tensor)); + CHECK_NOTNULL(output_tensor); + + // Do we need to reorder Mkl layout into TensorFlow layout? + if (input.IsReorderNeeded(output_tf_pd)) { + // Insert reorder between Mkl layout and TensorFlow layout. + std::vector net; + CHECK_EQ(input.CheckReorderToOpMem(output_tf_pd, output_tensor, &net), + true); + stream(stream::kind::eager).submit(net).wait(); + } else { + // If not, just forward input tensor to output tensor. + CHECK(output_tensor->CopyFrom(input_tensor, output_shape)); + } + } catch (mkldnn::error &e) { + string error_msg = "Status: " + std::to_string(e.status) + + ", message: " + std::string(e.message) + + ", in file " + std::string(__FILE__) + ":" + + std::to_string(__LINE__); + OP_REQUIRES_OK(context, + errors::Aborted("Operation received an exception:", error_msg)); + } + } +#else static void ConvertMklToTf(OpKernel* op_kernel, OpKernelContext* context, string data_format_str, DataType op_data_type, bool has_avx512f, uint input_number) { @@ -91,8 +160,8 @@ class MklToTfOp : public OpKernel { // Allocate output tensor. Tensor* output_tensor = NULL; - OP_REQUIRES_OK(context, - context->allocate_output(input_number, output_shape, &output_tensor)); + OP_REQUIRES_OK(context, context->allocate_output(input_number, + output_shape, &output_tensor)); dnnLayout_t output_layout = static_cast(input_shape.GetTfLayout()); @@ -106,6 +175,7 @@ class MklToTfOp : public OpKernel { output_buffer); VLOG(1) << "MKLToTFConversion complete successfully."; } +#endif private: /// Data format of the operation @@ -132,5 +202,5 @@ class MklToTfOp : public OpKernel { TF_CALL_NUMBER_TYPES(REGISTER_CPU); #undef REGISTER_CPU } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_ #endif // INTEL_MKL +#endif // TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_ diff --git a/tensorflow/core/kernels/pooling_ops_common.cc b/tensorflow/core/kernels/pooling_ops_common.cc index 7dee751c4f..ac90f67ce0 100644 --- a/tensorflow/core/kernels/pooling_ops_common.cc +++ b/tensorflow/core/kernels/pooling_ops_common.cc @@ -143,7 +143,7 @@ void DnnPoolingOp::Compute( perftools::gputools::dnn::PoolingMode pooling_mode, const std::vector& size, const std::vector& stride, Padding padding, TensorFormat data_format, const Tensor& tensor_in, - const TensorShape& tensor_out_shape) { + const TensorShape& tensor_out_shape, bool propagate_nans) { Tensor* tensor_out = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, tensor_out_shape, &tensor_out)); @@ -188,7 +188,8 @@ void DnnPoolingOp::Compute( .set_vertical_stride(params.row_stride) .set_horizontal_stride(params.col_stride) .set_vertical_padding(params.pad_rows) - .set_horizontal_padding(params.pad_cols); + .set_horizontal_padding(params.pad_cols) + .set_propagate_nans(propagate_nans); perftools::gputools::dnn::BatchDescriptor input_desc; input_desc.set_count(params.tensor_in_batch) @@ -237,7 +238,7 @@ void DnnPoolingGradOp::Compute( const std::vector& size, const std::vector& stride, Padding padding, TensorFormat data_format, const Tensor* tensor_in, const Tensor* tensor_out, const Tensor& out_backprop, - const TensorShape& tensor_in_shape) { + const TensorShape& tensor_in_shape, bool propagate_nans) { CHECK((pooling_mode != perftools::gputools::dnn::PoolingMode::kMaximum) || (tensor_in && tensor_out)) << "For MaxPoolGrad, both tensor_in and tensor_out needs to be " @@ -327,7 +328,8 @@ void DnnPoolingGradOp::Compute( .set_vertical_stride(params.row_stride) .set_horizontal_stride(params.col_stride) .set_vertical_padding(params.pad_rows) - .set_horizontal_padding(params.pad_cols); + .set_horizontal_padding(params.pad_cols) + .set_propagate_nans(propagate_nans); perftools::gputools::dnn::BatchDescriptor orig_output_desc; orig_output_desc.set_count(params.tensor_in_batch) diff --git a/tensorflow/core/kernels/pooling_ops_common_gpu.h b/tensorflow/core/kernels/pooling_ops_common_gpu.h index b594f39fad..1458456585 100644 --- a/tensorflow/core/kernels/pooling_ops_common_gpu.h +++ b/tensorflow/core/kernels/pooling_ops_common_gpu.h @@ -44,7 +44,7 @@ class DnnPoolingOp { const std::vector& size, const std::vector& stride, Padding padding, TensorFormat data_format, const Tensor& tensor_in, - const TensorShape& tensor_out_shape); + const TensorShape& tensor_out_shape, bool propagate_nans); }; // A helper class that launch the cudnn pooling backward operations. @@ -60,7 +60,7 @@ class DnnPoolingGradOp { const std::vector& stride, Padding padding, TensorFormat data_format, const Tensor* tensor_in, const Tensor* tensor_out, const Tensor& out_backprop, - const TensorShape& tensor_in_shape); + const TensorShape& tensor_in_shape, bool propagate_nans); }; } // namespace tensorflow diff --git a/tensorflow/core/kernels/quantized_add_op.cc b/tensorflow/core/kernels/quantized_add_op.cc index 8be0c56798..337c8e5c17 100644 --- a/tensorflow/core/kernels/quantized_add_op.cc +++ b/tensorflow/core/kernels/quantized_add_op.cc @@ -489,7 +489,7 @@ class QuantizedAddOp : public OpKernel { // adding zero leaves the result unchanged, and to contain the largest of // the two input values with some room to spare. const float smallest_min = std::min(min_x, min_y); - const float largest_max = std::min(max_x, max_y); + const float largest_max = std::max(max_x, max_y); const float biggest_range = std::max(std::abs(smallest_min), std::abs(largest_max)); const float output_range = (biggest_range * (1 << 14)); diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc index a37c757865..55a8b9c9b6 100644 --- a/tensorflow/core/kernels/random_op.cc +++ b/tensorflow/core/kernels/random_op.cc @@ -577,7 +577,7 @@ struct FillPhiloxRandomKernel { const size_t kGroupSize = Distribution::kResultElementCount; const size_t item_id = item.get_global(0); - const size_t total_item_count = item.get_global_range(0); + const size_t total_item_count = item.get_global_range(); size_t offset = item_id * kGroupSize; gen_.Skip(item_id); @@ -633,7 +633,7 @@ struct FillPhiloxRandomKernel { PhiloxRandom::kResultElementCount; const size_t item_id = item.get_global(0); - const size_t total_item_count = item.get_global_range(0); + const size_t total_item_count = item.get_global_range(); size_t group_index = item_id; size_t offset = group_index * kGroupSize; diff --git a/tensorflow/core/kernels/range_dataset_op.cc b/tensorflow/core/kernels/range_dataset_op.cc index e7ae840fc7..7adfcc4f8d 100644 --- a/tensorflow/core/kernels/range_dataset_op.cc +++ b/tensorflow/core/kernels/range_dataset_op.cc @@ -99,6 +99,7 @@ class RangeDatasetOp : public DatasetOpKernel { if ((dataset()->step_ > 0 && next_ >= dataset()->stop_) || (dataset()->step_ < 0 && next_ <= dataset()->stop_)) { *end_of_sequence = true; + is_exhausted_ = true; return Status::OK(); } Tensor value_tensor(cpu_allocator(), DT_INT64, {}); diff --git a/tensorflow/core/kernels/reader_dataset_ops.cc b/tensorflow/core/kernels/reader_dataset_ops.cc index c08e42be1d..39ef92a5de 100644 --- a/tensorflow/core/kernels/reader_dataset_ops.cc +++ b/tensorflow/core/kernels/reader_dataset_ops.cc @@ -402,6 +402,7 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel { // Iteration ends when there are no more files to process. if (current_file_index_ == dataset()->filenames_.size()) { *end_of_sequence = true; + is_exhausted_ = true; return Status::OK(); } diff --git a/tensorflow/core/kernels/repeat_dataset_op.cc b/tensorflow/core/kernels/repeat_dataset_op.cc index 0167b9ea64..9813e99a70 100644 --- a/tensorflow/core/kernels/repeat_dataset_op.cc +++ b/tensorflow/core/kernels/repeat_dataset_op.cc @@ -95,15 +95,6 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { *end_of_sequence = true; return Status::OK(); } - - protected: - Status SaveInternal(IteratorStateWriter* writer) override { - return Status::OK(); - } - Status RestoreInternal(OpKernelContext* ctx, - IteratorStateReader* reader) override { - return Status::OK(); - } }; class FiniteIterator : public DatasetIterator { @@ -117,10 +108,6 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { std::vector* out_tensors, bool* end_of_sequence) override { mutex_lock l(mu_); // TODO(mrry): Make locking less conservative. - if (!input_impl_) { - *end_of_sequence = true; - return Status::OK(); - } while (i_ < dataset()->count_) { TF_RETURN_IF_ERROR( input_impl_->GetNext(ctx, out_tensors, end_of_sequence)); @@ -131,6 +118,7 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { input_impl_ = dataset()->input_->MakeIterator(prefix()); } *end_of_sequence = true; + is_exhausted_ = true; input_impl_.reset(); return Status::OK(); } @@ -139,12 +127,7 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { Status SaveInternal(IteratorStateWriter* writer) override { mutex_lock l(mu_); TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("i"), i_)); - if (!input_impl_) { - TF_RETURN_IF_ERROR( - writer->WriteScalar(full_name("input_impl_empty"), "")); - } else { - TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); - } + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); return Status::OK(); } @@ -152,11 +135,7 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { IteratorStateReader* reader) override { mutex_lock l(mu_); TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("i"), &i_)); - if (!reader->Contains(full_name("input_impl_empty"))) { - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); - } else { - input_impl_.reset(); - } + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); return Status::OK(); } @@ -204,29 +183,6 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { } while (true); } - protected: - Status SaveInternal(IteratorStateWriter* writer) override { - mutex_lock l(mu_); - if (input_impl_) - TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); - else - TF_RETURN_IF_ERROR( - writer->WriteScalar(full_name("uninitialized"), "")); - return Status::OK(); - } - - Status RestoreInternal(OpKernelContext* ctx, - IteratorStateReader* reader) override { - mutex_lock l(mu_); - if (reader->Contains(full_name("uninitialized"))) { - input_impl_.reset(); - } else { - input_impl_ = dataset()->input_->MakeIterator(prefix()); - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); - } - return Status::OK(); - } - private: mutex mu_; std::unique_ptr input_impl_ GUARDED_BY(mu_); diff --git a/tensorflow/core/kernels/segment_reduction_ops.cc b/tensorflow/core/kernels/segment_reduction_ops.cc index 4302a68a18..2334e50f1d 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.cc +++ b/tensorflow/core/kernels/segment_reduction_ops.cc @@ -376,6 +376,9 @@ struct UnsortedSegmentSumFunctor auto data_flat = typename TTypes::ConstTensor(data, N, data_size / N); for (int64 i = 0; i < N; ++i) { Index j = internal::SubtleMustCopy(segment_ids(i)); + if (j < 0) { + continue; + } OP_REQUIRES(ctx, FastBoundsCheck(j, output_rows), errors::InvalidArgument( "segment_ids", SliceDebugString(segment_ids_shape, i), diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h index 412c1d601d..b10bea72ba 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.h +++ b/tensorflow/core/kernels/segment_reduction_ops.h @@ -30,14 +30,14 @@ namespace functor { #ifdef GOOGLE_CUDA typedef Eigen::GpuDevice GPUDevice; // Functor for SegmentSumGPUOp. -// 'output_rows': the number of output segments (unique segment ids in +// output_rows: the number of output segments (unique segment ids in // 'segment_ids'). -// 'segment_ids_shape': shape of 'segment_ids' tensor. -// 'segment_ids': unsorted map from input to output segment ids at which to +// segment_ids_shape: shape of 'segment_ids' tensor. +// segment_ids: unsorted map from input to output segment ids at which to // perform segment sum operation. -// 'data_size': size of input data tensor. -// 'data': input data tensor. -// 'output': output reshaped to {output_rows, output.size/output_rows} +// data_size: size of input data tensor. +// data: input data tensor. +// output: output reshaped to {output_rows, output.size/output_rows} template struct SegmentSumFunctor { void operator()(OpKernelContext* ctx, const GPUDevice& d, @@ -61,14 +61,14 @@ struct UnsortedSegmentBaseFunctor{ }; // Functor for UnsortedSegmentSumOp. -// 'output_rows': the number of output segments (unique segment ids in +// output_rows: the number of output segments (unique segment ids in // 'segment_ids'). -// 'segment_ids_shape': shape of 'segment_ids' tensor. -// 'segment_ids': unsorted map from input to output segment ids at which to +// segment_ids_shape: shape of 'segment_ids' tensor. +// segment_ids: unsorted map from input to output segment ids at which to // perform segment sum operation. -// 'data_size': size of input data tensor. -// 'data': input data tensor. -// 'output': output reshaped to {output_rows, output.size/output_rows} +// data_size: size of input data tensor. +// data: input data tensor. +// output: output reshaped to {output_rows, output.size/output_rows} template struct UnsortedSegmentSumFunctor: public UnsortedSegmentBaseFunctor { void operator()(OpKernelContext* ctx, const Device& d, @@ -79,14 +79,14 @@ struct UnsortedSegmentSumFunctor: public UnsortedSegmentBaseFunctor struct UnsortedSegmentMaxFunctor: public UnsortedSegmentBaseFunctor { void operator()(OpKernelContext* ctx, const Device& d, diff --git a/tensorflow/core/kernels/shape_ops.cc b/tensorflow/core/kernels/shape_ops.cc index 721f9b949b..28a39bae3f 100644 --- a/tensorflow/core/kernels/shape_ops.cc +++ b/tensorflow/core/kernels/shape_ops.cc @@ -341,7 +341,12 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") .Device(DEVICE_CPU) .HostMemory("dim") .TypeConstraint("Tdim"), - ExpandDimsOp); + ExpandDimsOp); +REGISTER_KERNEL_BUILDER(Name("ExpandDims") + .Device(DEVICE_CPU) + .HostMemory("dim") + .TypeConstraint("Tdim"), + ExpandDimsOp); #if GOOGLE_CUDA #define REGISTER_GPU_KERNEL(type) \ @@ -350,7 +355,13 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") .TypeConstraint("T") \ .TypeConstraint("Tdim") \ .HostMemory("dim"), \ - ExpandDimsOp); + ExpandDimsOp); \ + REGISTER_KERNEL_BUILDER(Name("ExpandDims") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .TypeConstraint("Tdim") \ + .HostMemory("dim"), \ + ExpandDimsOp); TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL); TF_CALL_bool(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL @@ -362,7 +373,15 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") .HostMemory("input") .HostMemory("dim") .HostMemory("output"), - ExpandDimsOp); + ExpandDimsOp); +REGISTER_KERNEL_BUILDER(Name("ExpandDims") + .Device(DEVICE_GPU) + .TypeConstraint("T") + .TypeConstraint("Tdim") + .HostMemory("input") + .HostMemory("dim") + .HostMemory("output"), + ExpandDimsOp); #endif // GOOGLE_CUDA #ifdef TENSORFLOW_USE_SYCL @@ -372,7 +391,13 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") .TypeConstraint("T") \ .TypeConstraint("Tdim") \ .HostMemory("dim"), \ - ExpandDimsOp); + ExpandDimsOp); \ + REGISTER_KERNEL_BUILDER(Name("ExpandDims") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint("T") \ + .TypeConstraint("Tdim") \ + .HostMemory("dim"), \ + ExpandDimsOp); TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); TF_CALL_bool(REGISTER_SYCL_KERNEL); #undef REGISTER_SYCL_KERNEL @@ -384,7 +409,15 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") .HostMemory("input") .HostMemory("dim") .HostMemory("output"), - ExpandDimsOp); + ExpandDimsOp); +REGISTER_KERNEL_BUILDER(Name("ExpandDims") + .Device(DEVICE_SYCL) + .TypeConstraint("T") + .TypeConstraint("Tdim") + .HostMemory("input") + .HostMemory("dim") + .HostMemory("output"), + ExpandDimsOp); #endif // TENSORFLOW_USE_SYCL // Squeeze --------------------------------------- diff --git a/tensorflow/core/kernels/shape_ops.h b/tensorflow/core/kernels/shape_ops.h index ac607f4e8b..8d9d0ea846 100644 --- a/tensorflow/core/kernels/shape_ops.h +++ b/tensorflow/core/kernels/shape_ops.h @@ -145,6 +145,7 @@ class SizeOp : public OpKernel { bool IsExpensive() override { return false; } }; +template class ExpandDimsOp : public OpKernel { public: explicit ExpandDimsOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} @@ -153,7 +154,7 @@ class ExpandDimsOp : public OpKernel { OP_REQUIRES(ctx, ctx->input(0).dtype() != DT_VARIANT, errors::InvalidArgument("ExpandDims on Variant not supported")); - int32 dim = ctx->input(1).flat()(0); + Tdim dim = ctx->input(1).flat()(0); OP_REQUIRES( ctx, (dim >= -1 - ctx->input(0).dims() && dim <= ctx->input(0).dims()), errors::InvalidArgument("Tried to expand dim index ", dim, @@ -175,7 +176,7 @@ class ExpandDimsOp : public OpKernel { } // Clamp to the end if needed. - dim = std::min(dim, existing_dims_size); + dim = std::min(dim, existing_dims_size); new_shape.emplace(new_shape.begin() + dim, 1); const TensorShape output_shape(new_shape); @@ -234,10 +235,10 @@ class SqueezeOp : public OpKernel { if (!wrapped_squeeze_dims.empty()) { if (wrapped_squeeze_dims.count(i) > 0) { OP_REQUIRES(ctx, existing_dim == 1, - errors::InvalidArgument( - "Tried to explicitly squeeze " - "dimension ", - i, " but dimension was not 1: ", existing_dim)); + errors::InvalidArgument("Tried to explicitly squeeze " + "dimension ", + i, " but dimension was not 1: ", + existing_dim)); } else { // This dimension is not being squeezed. new_shape.push_back(existing_dim); diff --git a/tensorflow/core/kernels/shuffle_dataset_op.cc b/tensorflow/core/kernels/shuffle_dataset_op.cc index dd0ab57e9d..2146ba2aa1 100644 --- a/tensorflow/core/kernels/shuffle_dataset_op.cc +++ b/tensorflow/core/kernels/shuffle_dataset_op.cc @@ -105,7 +105,8 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { mutex_lock l(mu_); int64 start_micros = ctx->env()->NowMicros(); int64 num_log_entries = 0; - while (input_impl_ && buffer_.size() < dataset()->buffer_size_) { + while (!end_of_input_sequence_ && + buffer_.size() < dataset()->buffer_size_) { if (ctx->env()->NowMicros() > ((num_log_entries + 1) * kLogIntervalMicros) + start_micros) { num_log_entries++; @@ -113,10 +114,9 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { << buffer_.size() << " of " << dataset()->buffer_size_; } std::vector input_element; - bool end_of_input_sequence; TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &input_element, - &end_of_input_sequence)); - if (!end_of_input_sequence) { + &end_of_input_sequence_)); + if (!end_of_input_sequence_) { buffer_.emplace_back(std::move(input_element)); } else { input_impl_.reset(); @@ -135,7 +135,7 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { std::swap(buffer_[index], buffer_.back()); buffer_.pop_back(); } else { - DCHECK(input_impl_ == nullptr); + DCHECK(end_of_input_sequence_); *end_of_sequence = true; } return Status::OK(); @@ -148,11 +148,11 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { // Save the tensors in the buffer. TF_RETURN_IF_ERROR( writer->WriteScalar(full_name("buffer_size"), buffer_.size())); - for (size_t i = 0; i < buffer_.size(); i++) { + for (int i = 0; i < buffer_.size(); i++) { TF_RETURN_IF_ERROR(writer->WriteScalar( full_name(strings::StrCat("buffer_", i, "_size")), buffer_[i].size())); - for (size_t j = 0; j < buffer_[i].size(); j++) { + for (int j = 0; j < buffer_[i].size(); j++) { TF_RETURN_IF_ERROR(writer->WriteTensor( full_name(strings::StrCat("buffer_", i, "_", j)), buffer_[i][j])); @@ -165,7 +165,7 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { // Save input iterator if it hasn't been exhausted else write // "end_of_input_sequence". - if (!input_impl_) { + if (end_of_input_sequence_) { TF_RETURN_IF_ERROR( writer->WriteScalar(full_name("end_of_input_sequence"), "")); } else { @@ -180,15 +180,10 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { buffer_.clear(); // Restore the buffer. - size_t buffer_size; - { - int64 temp; - TF_RETURN_IF_ERROR( - reader->ReadScalar(full_name("buffer_size"), &temp)); - buffer_size = static_cast(temp); - } - buffer_.reserve(buffer_size); - for (size_t i = 0; i < buffer_size; i++) { + int64 buffer_size; + TF_RETURN_IF_ERROR( + reader->ReadScalar(full_name("buffer_size"), &buffer_size)); + for (int i = 0; i < buffer_size; i++) { int64 list_size; TF_RETURN_IF_ERROR(reader->ReadScalar( full_name(strings::StrCat("buffer_", i, "_size")), &list_size)); @@ -210,6 +205,7 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { input_impl_ = dataset()->input_->MakeIterator(prefix()); TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); } else { + end_of_input_sequence_ = true; input_impl_.reset(); } return Status::OK(); @@ -234,6 +230,7 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { mutex mu_; std::vector> buffer_ GUARDED_BY(mu_); std::unique_ptr input_impl_ GUARDED_BY(mu_); + bool end_of_input_sequence_ GUARDED_BY(mu_) = false; const int64 seed_ GUARDED_BY(mu_); const int64 seed2_ GUARDED_BY(mu_); random::PhiloxRandom parent_generator_ GUARDED_BY(mu_); diff --git a/tensorflow/core/kernels/skip_dataset_op.cc b/tensorflow/core/kernels/skip_dataset_op.cc index 7ee945dd4c..52a6116a7c 100644 --- a/tensorflow/core/kernels/skip_dataset_op.cc +++ b/tensorflow/core/kernels/skip_dataset_op.cc @@ -35,14 +35,14 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { int64 count; OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "count", &count)); - *output = new Dataset(ctx, count, input); + *output = new Dataset(count, input); } private: - class Dataset : public GraphDatasetBase { + class Dataset : public DatasetBase { public: - Dataset(OpKernelContext* ctx, int64 count, const DatasetBase* input) - : GraphDatasetBase(ctx), count_(count), input_(input) { + Dataset(int64 count, const DatasetBase* input) + : count_(count), input_(input) { input_->Ref(); } @@ -71,18 +71,6 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { string DebugString() override { return "SkipDatasetOp::Dataset"; } - protected: - Status AsGraphDefInternal(DatasetGraphDefBuilder* b, - Node** output) const override { - Node* input_graph_node = nullptr; - TF_RETURN_IF_ERROR(b->AddParentDataset(input_, &input_graph_node)); - Node* count = nullptr; - TF_RETURN_IF_ERROR(b->AddScalar(count_, &count)); - TF_RETURN_IF_ERROR( - b->AddDataset(this, {input_graph_node, count}, output)); - return Status::OK(); - } - private: class EmptyIterator : public DatasetIterator { public: @@ -94,16 +82,6 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { *end_of_sequence = true; return Status::OK(); } - - protected: - Status SaveInternal(IteratorStateWriter* writer) override { - return Status::OK(); - } - - Status RestoreInternal(OpKernelContext* ctx, - IteratorStateReader* reader) override { - return Status::OK(); - } }; class FiniteIterator : public DatasetIterator { @@ -118,11 +96,6 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { bool* end_of_sequence) override { mutex_lock l(mu_); // TODO(mrry): Make locking less conservative. - if (!input_impl_) { - *end_of_sequence = true; - return Status::OK(); - } - // Keep calling GetNext(). TODO(vrv): Figure out a way to // skip records without reading, perhaps by adding an // interface to iterator. @@ -143,34 +116,6 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { // Return GetNext() on the underlying iterator. TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, out_tensors, end_of_sequence)); - if (*end_of_sequence) { - input_impl_.reset(); - } - return Status::OK(); - } - - protected: - Status SaveInternal(IteratorStateWriter* writer) override { - mutex_lock l(mu_); - TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("i"), i_)); - if (input_impl_) { - TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); - } else { - TF_RETURN_IF_ERROR( - writer->WriteScalar(full_name("input_impl_empty"), "")); - } - return Status::OK(); - } - - Status RestoreInternal(OpKernelContext* ctx, - IteratorStateReader* reader) override { - mutex_lock l(mu_); - TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("i"), &i_)); - if (!reader->Contains(full_name("input_impl_empty"))) { - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); - } else { - input_impl_.reset(); - } return Status::OK(); } diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc index d46701749b..28a379774b 100644 --- a/tensorflow/core/kernels/slice_op.cc +++ b/tensorflow/core/kernels/slice_op.cc @@ -190,41 +190,25 @@ class SliceOp : public OpKernel { } return; } -#define HANDLE_DIM(NDIM) \ - if (input_dims == NDIM) { \ - HandleCase(context, begin, size, result); \ - return; \ +#define HANDLE_DIM(NDIM) \ + if (input_dims == NDIM) { \ + functor::Slice()( \ + context->eigen_device(), result, input, begin, size); \ + return; \ } - HANDLE_DIM(1); HANDLE_DIM(2); HANDLE_DIM(3); HANDLE_DIM(4); HANDLE_DIM(5); HANDLE_DIM(6); - HANDLE_DIM(7); #undef HANDLE_DIM - OP_REQUIRES(context, false, errors::Unimplemented( - "SliceOp : Unhandled input dimensions")); - } - } - - private: - template - void HandleCase(OpKernelContext* context, const gtl::ArraySlice& begin, - const gtl::ArraySlice& size, Tensor* result) { - Eigen::DSizes indices; - Eigen::DSizes sizes; - for (int i = 0; i < NDIM; ++i) { - indices[i] = begin[i]; - sizes[i] = size[i]; + // handle cases which dim >= 7 + functor::Slice()( + context->eigen_device(), result, input, begin, size); } - - functor::Slice()( - context->eigen_device(), result->tensor(), - context->input(0).tensor(), indices, sizes); } }; @@ -264,11 +248,16 @@ class MklSliceOp : public OpKernel { } return; } -#define HANDLE_DIM(NDIM) \ - if (input_dims == NDIM) { \ - HandleCase(context, begin, size, result); \ - return; \ - } + // Special case for handling 4-D tensor slice. + if (input_dims == 4) { + HandleCase4D(context, begin, size, result); + } else { +#define HANDLE_DIM(NDIM) \ + if (input_dims == NDIM) { \ + functor::Slice()( \ + context->eigen_device(), result, input, begin, size); \ + return; \ + } HANDLE_DIM(1); HANDLE_DIM(2); @@ -276,12 +265,13 @@ class MklSliceOp : public OpKernel { HANDLE_DIM(4); HANDLE_DIM(5); HANDLE_DIM(6); - HANDLE_DIM(7); #undef HANDLE_DIM - OP_REQUIRES(context, false, errors::Unimplemented( - "SliceOp : Unhandled input dimensions")); + // handle cases which dim >= 7 + functor::Slice()( + context->eigen_device(), result, input, begin, size); + } } } @@ -328,8 +318,7 @@ class MklSliceOp : public OpKernel { return false; } - template - void HandleCase(OpKernelContext* context, + void HandleCase4D(OpKernelContext* context, const gtl::ArraySlice& begin, const gtl::ArraySlice& size, Tensor* result) { int slice_dim = -1; @@ -338,8 +327,7 @@ class MklSliceOp : public OpKernel { // differs from the input tensor in only 1 out of 4 dimensions. // This case arises in the context of Slice of 4-D tensor in NHWC or NCHW // format over channel dimension. - if (NDIM == 4 && - DoesSliceShapeDifferInOnly1D(in_shape, begin, size, &slice_dim)) { + if (DoesSliceShapeDifferInOnly1D(in_shape, begin, size, &slice_dim)) { size_t in_strides[4] = { (size_t) in_shape.dim_size(1) * in_shape.dim_size(2) * in_shape.dim_size(3), @@ -403,16 +391,8 @@ class MklSliceOp : public OpKernel { // slice_dim is not 1 or 3, then we fallback to Eigen implementation. } - Eigen::DSizes indices; - Eigen::DSizes sizes; - for (int i = 0; i < NDIM; ++i) { - indices[i] = begin[i]; - sizes[i] = size[i]; - } - - functor::Slice()( - context->eigen_device(), result->tensor(), - context->input(0).tensor(), indices, sizes); + functor::Slice()( + context->eigen_device(), result, context->input(0), begin, size); } }; #endif @@ -420,13 +400,13 @@ class MklSliceOp : public OpKernel { // Forward declarations of the functor specializations for declared in the // sharded source files. namespace functor { -#define DECLARE_CPU_SPEC(T, NDIM) \ - template <> \ - void Slice::operator()( \ - const CPUDevice& d, typename TTypes::Tensor output, \ - typename TTypes::ConstTensor input, \ - const Eigen::DSizes& indices, \ - const Eigen::DSizes& sizes); \ +#define DECLARE_CPU_SPEC(T, NDIM) \ + template <> \ + void Slice::operator()( \ + const CPUDevice& d, Tensor* output, \ + const Tensor& input, \ + const gtl::ArraySlice& slice_indices, \ + const gtl::ArraySlice& slice_sizes); \ extern template struct Slice; #define DECLARE_FOR_N(T) \ @@ -476,13 +456,14 @@ REGISTER_SLICE(bfloat16); #if GOOGLE_CUDA // Forward declarations of the functor specializations for GPU. namespace functor { -#define DECLARE_GPU_SPEC(T, NDIM) \ - template <> \ - void Slice::operator()( \ - const GPUDevice& d, typename TTypes::Tensor output, \ - typename TTypes::ConstTensor input, \ - const Eigen::DSizes& indices, \ - const Eigen::DSizes& sizes); \ +#define DECLARE_GPU_SPEC(T, NDIM) \ + template <> \ + void Slice::operator()( \ + const GPUDevice& d, \ + Tensor* output, \ + const Tensor& input, \ + const gtl::ArraySlice& slice_indices, \ + const gtl::ArraySlice& slice_sizes); \ extern template struct Slice; #define DECLARE_FOR_N(T) \ @@ -536,13 +517,14 @@ REGISTER_KERNEL_BUILDER(Name("Slice") #ifdef TENSORFLOW_USE_SYCL // Forward declarations of the functor specializations for SYCL. namespace functor { -#define DECLARE_SYCL_SPEC(T, NDIM) \ - template <> \ - void Slice::operator()( \ - const SYCLDevice& d, typename TTypes::Tensor output,\ - typename TTypes::ConstTensor input, \ - const Eigen::DSizes& indices, \ - const Eigen::DSizes& sizes); \ +#define DECLARE_SYCL_SPEC(T, NDIM) \ + template <> \ + void Slice::operator()( \ + const SYCLDevice& d, \ + Tensor* output, \ + const Tensor& input, \ + const gtl::ArraySlice& slice_indices, \ + const gtl::ArraySlice& slice_sizes); \ extern template struct Slice; #define DECLARE_FOR_N(T) \ diff --git a/tensorflow/core/kernels/slice_op.h b/tensorflow/core/kernels/slice_op.h index db7eded745..55a4be985b 100644 --- a/tensorflow/core/kernels/slice_op.h +++ b/tensorflow/core/kernels/slice_op.h @@ -19,31 +19,104 @@ limitations under the License. // Functor definition for SliceOp, must be compilable by nvcc. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/kernels/ops_util.h" namespace tensorflow { -namespace functor { + +namespace internal { + +template +void SliceSimple(const Device& d, Tensor* out, const Tensor& in, + const gtl::ArraySlice& slice_indices); +template +void SliceSimpleGpu(const Device& d, Tensor* out, const Tensor& in, + const gtl::ArraySlice& slice_indices); + +template +void SliceSimple(const Device& d, Tensor* out, const Tensor& in, + const gtl::ArraySlice& slice_indices) { + const int ndims = in.dims(); + const int64 nelem = out->NumElements(); + const gtl::InlinedVector in_strides = ComputeStride(in.shape()); + const gtl::InlinedVector out_strides = ComputeStride(out->shape()); + const T* p = in.flat().data(); + T* q = out->flat().data(); + + std::vector i_idx(nelem, 0); + std::vector t(nelem, 0); + + for (int64 o_idx = 0; o_idx < nelem; ++o_idx) { + t[o_idx] = o_idx; + } + for (int i = 0; i < ndims; ++i) { + int64 n = (nelem + 7) / 8; + int64 o_idx = 0; + switch (nelem % 8) { +#define CALC_INPUT_IDX \ + i_idx[o_idx] += (t[o_idx] / out_strides[i] + slice_indices[i]) * in_strides[i]; \ + t[o_idx] %= out_strides[i]; \ + ++o_idx; + case 0: do { CALC_INPUT_IDX; + case 7: CALC_INPUT_IDX; + case 6: CALC_INPUT_IDX; + case 5: CALC_INPUT_IDX; + case 4: CALC_INPUT_IDX; + case 3: CALC_INPUT_IDX; + case 2: CALC_INPUT_IDX; + case 1: CALC_INPUT_IDX; +#undef CALC_INPUT_IDX + } while (--n > 0); + } + } + for (int64 o_idx = 0; o_idx < nelem; ++o_idx) { + q[o_idx] = p[i_idx[o_idx]]; + } +} template +void SliceUsingEigen(const Device& d, Tensor* out, const Tensor& in, + const gtl::ArraySlice& slice_indices, + const gtl::ArraySlice& slice_sizes) { + auto input = in.tensor(); + auto output = out->tensor(); + Eigen::DSizes indices; + for (int i = 0; i < NDIMS; ++i) { + indices[i] = slice_indices[i]; + } + Eigen::DSizes sizes; + for (int i = 0; i < NDIMS; ++i) { + sizes[i] = slice_sizes[i]; + } + const bool use_64bit = input.size() > Eigen::NumTraits::highest(); + if (!use_64bit && + Eigen::internal::is_same::value) { + To32Bit(output).device(d) = To32Bit(input).slice(indices, sizes); + } else { + output.device(d) = input.slice(indices, sizes); + } +} + +} // namespace internal + +namespace functor { + +// Template parameter NDIM is not neccesary here. The aim of keeping it +// is to compile struct slice seperately which minimizes the compiling time. +template struct Slice { - void operator()(const Device& d, typename TTypes::Tensor output, - typename TTypes::ConstTensor input, - const Eigen::DSizes& slice_indices, - const Eigen::DSizes& slice_sizes) { - bool use_64bit = (input.size() > Eigen::NumTraits::highest()); - if (!use_64bit && - Eigen::internal::is_same::value) { - Eigen::DSizes indices; - for (int i = 0; i < NDIMS; ++i) { - indices[i] = slice_indices[i]; - } - Eigen::DSizes sizes; - for (int i = 0; i < NDIMS; ++i) { - sizes[i] = slice_sizes[i]; - } - To32Bit(output).device(d) = To32Bit(input).slice(indices, sizes); + void operator()(const Device& d, Tensor* out, const Tensor& in, + const gtl::ArraySlice& slice_indices, + const gtl::ArraySlice& slice_sizes) { + if (in.dims() == NDIM) { + internal::SliceUsingEigen(d, out, in, slice_indices, slice_sizes); } else { - output.device(d) = input.slice(slice_indices, slice_sizes); + if (Eigen::internal::is_same::value) { + internal::SliceSimpleGpu(d, out, in, slice_indices); + } else { + internal::SliceSimple(d, out, in, slice_indices); + } } } }; diff --git a/tensorflow/core/kernels/slice_op_gpu.cu.cc b/tensorflow/core/kernels/slice_op_gpu.cu.cc index a301986f2f..3039b3d777 100644 --- a/tensorflow/core/kernels/slice_op_gpu.cu.cc +++ b/tensorflow/core/kernels/slice_op_gpu.cu.cc @@ -21,9 +21,65 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" namespace tensorflow { +namespace internal { + +template +__global__ void SliceKernel(int nthreads, const T* src, const int32* buf, + const int32 ndims, T* dst) { + const int32* in_strides = buf; + const int32* out_strides = buf + ndims; + const int32* slice_indices = buf + ndims * 2; + CUDA_1D_KERNEL_LOOP(o_idx, nthreads) { + int32 i_idx = 0; + int32 t = o_idx; + for (int i = 0; i < ndims; ++i) { + i_idx += (t / out_strides[i] + slice_indices[i]) * in_strides[i]; + t %= out_strides[i]; + } + dst[o_idx] = ldg(src + i_idx); + } +} + +template +void SliceSimpleGpu(const Device& d, Tensor* out, const Tensor& in, + const gtl::ArraySlice& slice_indices) { + // Ensures we can use 32-bit index. + const int64 in_nelem = in.NumElements(); + CHECK_LT(in_nelem, kint32max) << "Tensor too large to transpose on GPU"; + const int64 out_nelem = out->NumElements(); + CHECK_LT(out_nelem, kint32max) << "Tensor too large to transpose on GPU"; + // Pack strides and slice indices sizes into one buffer. + const int32 ndims = in.dims(); + gtl::InlinedVector host_buf(ndims * 3); + gtl::InlinedVector in_strides = ComputeStride(in.shape()); + gtl::InlinedVector out_strides = ComputeStride(out->shape()); + for (int i = 0; i < ndims; ++i) { + host_buf[i] = in_strides[i]; + host_buf[ndims + i] = out_strides[i]; + host_buf[ndims * 2 + i] = slice_indices[i]; + } + auto num_bytes = sizeof(int64) * host_buf.size(); + auto dev_buf = d.allocate(num_bytes); + // NOTE: host_buf is not allocated by CudaHostAllocator, and + // therefore we are doing a sync copy effectively. + d.memcpyHostToDevice(dev_buf, host_buf.data(), num_bytes); + // Launch kernel to q[...] = p[...]. + const T* p = in.flat().data(); + T* q = out->flat().data(); + CudaLaunchConfig cfg = GetCudaLaunchConfig(out_nelem, d); + SliceKernel<<>>( + cfg.virtual_thread_count, p, reinterpret_cast(dev_buf), + ndims, q); + // Safe to deallocate immediately after the kernel launch. + d.deallocate(dev_buf); +} + +} // namespace internal typedef Eigen::GpuDevice GPUDevice; diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc index 73b6d4cf6a..8fc40db3cc 100644 --- a/tensorflow/core/kernels/strided_slice_op.cc +++ b/tensorflow/core/kernels/strided_slice_op.cc @@ -427,7 +427,6 @@ REGISTER_STRIDED_SLICE(bfloat16); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); -TF_CALL_int64(REGISTER_GPU); // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel diff --git a/tensorflow/core/kernels/strided_slice_op_impl.h b/tensorflow/core/kernels/strided_slice_op_impl.h index afe3a051e6..7d42887426 100644 --- a/tensorflow/core/kernels/strided_slice_op_impl.h +++ b/tensorflow/core/kernels/strided_slice_op_impl.h @@ -84,16 +84,16 @@ void HandleStridedSliceCase(OpKernelContext* context, gtl::InlinedVector processing_dims = processing_shape.dim_sizes(); if (is_simple_slice) { - Eigen::DSizes begin_di; - Eigen::DSizes sizes_di; + gtl::InlinedVector sizes(begin.size()); for (int i = 0; i < NDIM; ++i) { - begin_di[i] = begin[i]; - sizes_di[i] = end[i] - begin[i]; + sizes[i] = end[i] - begin[i]; } - functor::Slice()( - context->eigen_device(), - result->bit_casted_shaped(processing_dims), - context->input(0).bit_casted_tensor(), begin_di, sizes_di); + const TensorShape final_shape = result->shape(); + CHECK(result->CopyFrom(*result, processing_shape)); + const Tensor input = context->input(0); + functor::Slice()( + context->eigen_device(), result, input, begin, sizes); + CHECK(result->CopyFrom(*result, final_shape)); } else { Eigen::DSizes begin_di; Eigen::DSizes end_di; @@ -196,10 +196,9 @@ class HandleStridedSliceAssignCase { extern template struct StridedSlice; \ template <> \ void Slice::operator()( \ - const GPUDevice& d, typename TTypes::Tensor output, \ - typename TTypes::ConstTensor input, \ - const Eigen::DSizes& indices, \ - const Eigen::DSizes& sizes); \ + const GPUDevice& d, Tensor* output, const Tensor& input, \ + const gtl::ArraySlice& slice_indices, \ + const gtl::ArraySlice& slice_sizes); \ extern template struct Slice; \ template <> \ void StridedSliceGrad::operator()( \ @@ -284,7 +283,6 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_FOR_N_GPU); TF_CALL_complex64(DECLARE_FOR_N_GPU); TF_CALL_complex128(DECLARE_FOR_N_GPU); DECLARE_FOR_N_GPU(int32); -DECLARE_FOR_N_GPU(int64); #endif // END GOOGLE_CUDA TF_CALL_ALL_TYPES(DECLARE_FOR_N_CPU); @@ -300,7 +298,6 @@ DECLARE_FOR_N_CPU(bfloat16); TF_CALL_SYCL_PROXY_TYPES(PREVENT_FOR_N_SYCL); TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DECLARE_FOR_N_SYCL); DECLARE_FOR_N_SYCL(int32); -DECLARE_FOR_N_SYCL(int64); #undef DECLARE_FOR_N_SYCL #endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/kernels/strided_slice_op_test.cc b/tensorflow/core/kernels/strided_slice_op_test.cc index 281ca0f58f..78bb15463c 100644 --- a/tensorflow/core/kernels/strided_slice_op_test.cc +++ b/tensorflow/core/kernels/strided_slice_op_test.cc @@ -76,20 +76,69 @@ static void SliceHelper(int iters, int size) { testing::UseRealTime(); } +template +static void Dim8SliceHelper(int iters, int size) { + testing::StopTiming(); + Graph* g = new Graph(OpRegistry::Global()); + DataType dt = DataTypeToEnum::v(); + int kDim = 100; + int kMaxSize = 15000; + CHECK_LT(size, kMaxSize); + + Tensor begin(DT_INT32, TensorShape({8})); + begin.flat()(10) = 10; + for (int i = 1; i < 7; ++i) { + begin.flat()(i) = 0; + } + begin.flat()(7) = 10; + + Tensor end(DT_INT32, TensorShape({8})); + end.flat()(0) = 10 + kDim; + for (int i = 1; i < 7; ++i) { + end.flat()(i) = 1; + } + end.flat()(7) = 10 + size; + + Tensor strides(DT_INT32, TensorShape({8})); + for (int i = 0; i < 8; ++i) { + strides.flat()(i) = 1; + } + + Tensor input(dt, TensorShape({2*kDim, 1, 1, 1, 1, 1, 1, kMaxSize})); + input.flat().setRandom(); + + Node* node; + TF_CHECK_OK(NodeBuilder(g->NewName("n"), "StridedSlice") + .Input(test::graph::Constant(g, input)) + .Input(test::graph::Constant(g, begin)) + .Input(test::graph::Constant(g, end)) + .Input(test::graph::Constant(g, strides)) + .Attr("T", dt) + .Finalize(g, &node)); + + testing::BytesProcessed(static_cast(iters) * kDim * size * sizeof(T)); + testing::StartTiming(); + test::Benchmark("cpu", g).Run(iters); + testing::UseRealTime(); +} + static void BM_SliceFloat(int iters, int dim2) { SliceHelper(iters, dim2); + Dim8SliceHelper(iters, dim2); } BENCHMARK(BM_SliceFloat)->Arg(100)->Arg(1000)->Arg(10000); static void BM_SliceComplex64(int iters, int dim2) { SliceHelper>(iters, dim2); + Dim8SliceHelper>(iters, dim2); } BENCHMARK(BM_SliceComplex64)->Arg(100)->Arg(1000)->Arg(10000); static void BM_SliceBFloat16(int iters, int dim2) { SliceHelper(iters, dim2); + Dim8SliceHelper(iters, dim2); } BENCHMARK(BM_SliceBFloat16)->Arg(100)->Arg(1000)->Arg(10000); diff --git a/tensorflow/core/kernels/summary_interface.cc b/tensorflow/core/kernels/summary_interface.cc index cd366f8c13..313137ae49 100644 --- a/tensorflow/core/kernels/summary_interface.cc +++ b/tensorflow/core/kernels/summary_interface.cc @@ -257,9 +257,7 @@ class SummaryWriterImpl : public SummaryWriterInterface { Summary::Value* v = e->mutable_summary()->add_value(); t.AsProtoTensorContent(v->mutable_tensor()); v->set_tag(tag); - if (!serialized_metadata.empty()) { - v->mutable_metadata()->ParseFromString(serialized_metadata); - } + v->mutable_metadata()->ParseFromString(serialized_metadata); return WriteEvent(std::move(e)); } diff --git a/tensorflow/core/kernels/summary_kernels.cc b/tensorflow/core/kernels/summary_kernels.cc index 1fe2fc5b66..cfa707de71 100644 --- a/tensorflow/core/kernels/summary_kernels.cc +++ b/tensorflow/core/kernels/summary_kernels.cc @@ -13,12 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/tensorboard/db/summary_db_writer.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/kernels/summary_interface.h" -#include "tensorflow/core/lib/db/sqlite.h" -#include "tensorflow/core/platform/protobuf.h" namespace tensorflow { @@ -49,32 +46,6 @@ class CreateSummaryFileWriterOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("CreateSummaryFileWriter").Device(DEVICE_CPU), CreateSummaryFileWriterOp); -class CreateSummaryDbWriterOp : public OpKernel { - public: - explicit CreateSummaryDbWriterOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} - - void Compute(OpKernelContext* ctx) override { - const Tensor* tmp; - OP_REQUIRES_OK(ctx, ctx->input("db_uri", &tmp)); - const string db_uri = tmp->scalar()(); - OP_REQUIRES_OK(ctx, ctx->input("experiment_name", &tmp)); - const string experiment_name = tmp->scalar()(); - OP_REQUIRES_OK(ctx, ctx->input("run_name", &tmp)); - const string run_name = tmp->scalar()(); - OP_REQUIRES_OK(ctx, ctx->input("user_name", &tmp)); - const string user_name = tmp->scalar()(); - SummaryWriterInterface* s; - auto db = Sqlite::Open(db_uri); - OP_REQUIRES_OK(ctx, db.status()); - OP_REQUIRES_OK( - ctx, CreateSummaryDbWriter(std::move(db.ValueOrDie()), experiment_name, - run_name, user_name, ctx->env(), &s)); - OP_REQUIRES_OK(ctx, CreateResource(ctx, HandleFromInput(ctx, 0), s)); - } -}; -REGISTER_KERNEL_BUILDER(Name("CreateSummaryDbWriter").Device(DEVICE_CPU), - CreateSummaryDbWriterOp); - class FlushSummaryWriterOp : public OpKernel { public: explicit FlushSummaryWriterOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} @@ -127,27 +98,6 @@ class WriteSummaryOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("WriteSummary").Device(DEVICE_CPU), WriteSummaryOp); -class ImportEventOp : public OpKernel { - public: - explicit ImportEventOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} - - void Compute(OpKernelContext* ctx) override { - SummaryWriterInterface* s; - OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); - core::ScopedUnref unref(s); - const Tensor* t; - OP_REQUIRES_OK(ctx, ctx->input("event", &t)); - std::unique_ptr event{new Event}; - if (!ParseProtoUnlimited(event.get(), t->scalar()())) { - ctx->CtxFailureWithWarning( - errors::DataLoss("Bad tf.Event binary proto tensor string")); - return; - } - OP_REQUIRES_OK(ctx, s->WriteEvent(std::move(event))); - } -}; -REGISTER_KERNEL_BUILDER(Name("ImportEvent").Device(DEVICE_CPU), ImportEventOp); - class WriteScalarSummaryOp : public OpKernel { public: explicit WriteScalarSummaryOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} diff --git a/tensorflow/core/kernels/take_dataset_op.cc b/tensorflow/core/kernels/take_dataset_op.cc index fb294a96b1..c3f33d663c 100644 --- a/tensorflow/core/kernels/take_dataset_op.cc +++ b/tensorflow/core/kernels/take_dataset_op.cc @@ -35,14 +35,14 @@ class TakeDatasetOp : public UnaryDatasetOpKernel { // Create a new TakeDatasetOp::Dataset, and return it as the output. int64 count; OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "count", &count)); - *output = new Dataset(ctx, count, input); + *output = new Dataset(count, input); } private: - class Dataset : public GraphDatasetBase { + class Dataset : public DatasetBase { public: - Dataset(OpKernelContext* ctx, int64 count, const DatasetBase* input) - : GraphDatasetBase(ctx), count_(count), input_(input) { + Dataset(int64 count, const DatasetBase* input) + : count_(count), input_(input) { input_->Ref(); } @@ -72,18 +72,6 @@ class TakeDatasetOp : public UnaryDatasetOpKernel { string DebugString() override { return "TakeDatasetOp::Dataset"; } - protected: - Status AsGraphDefInternal(DatasetGraphDefBuilder* b, - Node** output) const override { - Node* input_graph_node = nullptr; - TF_RETURN_IF_ERROR(b->AddParentDataset(input_, &input_graph_node)); - Node* count = nullptr; - TF_RETURN_IF_ERROR(b->AddScalar(count_, &count)); - TF_RETURN_IF_ERROR( - b->AddDataset(this, {input_graph_node, count}, output)); - return Status::OK(); - } - private: class EmptyIterator : public DatasetIterator { public: @@ -95,16 +83,6 @@ class TakeDatasetOp : public UnaryDatasetOpKernel { *end_of_sequence = true; return Status::OK(); } - - protected: - Status SaveInternal(IteratorStateWriter* writer) override { - return Status::OK(); - } - - Status RestoreInternal(OpKernelContext* ctx, - IteratorStateReader* reader) override { - return Status::OK(); - } }; class FiniteIterator : public DatasetIterator { @@ -118,10 +96,6 @@ class TakeDatasetOp : public UnaryDatasetOpKernel { std::vector* out_tensors, bool* end_of_sequence) override { mutex_lock l(mu_); // TODO(mrry): Make locking less conservative. - if (!input_impl_) { - *end_of_sequence = true; - return Status::OK(); - } while (i_ < dataset()->count_) { TF_RETURN_IF_ERROR( input_impl_->GetNext(ctx, out_tensors, end_of_sequence)); @@ -136,31 +110,6 @@ class TakeDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } - protected: - Status SaveInternal(IteratorStateWriter* writer) override { - mutex_lock l(mu_); - TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("i"), i_)); - if (input_impl_) { - TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); - } else { - TF_RETURN_IF_ERROR( - writer->WriteScalar(full_name("input_impl_empty"), "")); - } - return Status::OK(); - } - - Status RestoreInternal(OpKernelContext* ctx, - IteratorStateReader* reader) override { - mutex_lock l(mu_); - TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("i"), &i_)); - if (!reader->Contains(full_name("input_impl_empty"))) { - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); - } else { - input_impl_.reset(); - } - return Status::OK(); - } - private: mutex mu_; int64 i_ GUARDED_BY(mu_); diff --git a/tensorflow/core/kernels/transpose_op.cc b/tensorflow/core/kernels/transpose_op.cc index 20f0edf309..96c051c636 100644 --- a/tensorflow/core/kernels/transpose_op.cc +++ b/tensorflow/core/kernels/transpose_op.cc @@ -31,13 +31,14 @@ limitations under the License. namespace tensorflow { -// inv = InvertPermutationOp(T p) takes a permutation of +// inv = InvertPermutationOp(T p) takes a permutation of // integers 0, 1, ..., n - 1 and returns the inverted // permutation of p. I.e., inv[p[i]] == i, for i in [0 .. n). // -// REQUIRES: input is a vector of int32. +// REQUIRES: input is a vector of int32 or int64. // REQUIRES: input is a permutation of 0, 1, ..., n-1. +template class InvertPermutationOp : public OpKernel { public: explicit InvertPermutationOp(OpKernelConstruction* context) @@ -48,20 +49,19 @@ class InvertPermutationOp : public OpKernel { OP_REQUIRES( context, TensorShapeUtils::IsVector(input.shape()), errors::InvalidArgument("invert_permutation expects a 1D vector.")); - auto Tin = input.vec(); + auto Tin = input.vec(); OP_REQUIRES(context, FastBoundsCheck(Tin.size(), std::numeric_limits::max()), errors::InvalidArgument("permutation of nonnegative int32s " "must have <= int32 max elements")); - const int32 N = - static_cast(Tin.size()); // Safe: bounds-checked above. + const T N = static_cast(Tin.size()); // Safe: bounds-checked above. Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, input.shape(), &output)); - auto Tout = output->vec(); + auto Tout = output->vec(); std::fill_n(Tout.data(), N, -1); for (int i = 0; i < N; ++i) { - const int32 d = internal::SubtleMustCopy(Tin(i)); + const T d = internal::SubtleMustCopy(Tin(i)); OP_REQUIRES(context, FastBoundsCheck(d, N), errors::InvalidArgument(d, " is not between 0 and ", N)); OP_REQUIRES(context, Tout(d) == -1, @@ -73,14 +73,23 @@ class InvertPermutationOp : public OpKernel { REGISTER_KERNEL_BUILDER( Name("InvertPermutation").Device(DEVICE_CPU).TypeConstraint("T"), - InvertPermutationOp); + InvertPermutationOp); +REGISTER_KERNEL_BUILDER( + Name("InvertPermutation").Device(DEVICE_CPU).TypeConstraint("T"), + InvertPermutationOp); REGISTER_KERNEL_BUILDER(Name("InvertPermutation") .Device(DEVICE_GPU) .TypeConstraint("T") .HostMemory("x") .HostMemory("y"), - InvertPermutationOp); + InvertPermutationOp); +REGISTER_KERNEL_BUILDER(Name("InvertPermutation") + .Device(DEVICE_GPU) + .TypeConstraint("T") + .HostMemory("x") + .HostMemory("y"), + InvertPermutationOp); #ifdef TENSORFLOW_USE_SYCL REGISTER_KERNEL_BUILDER(Name("InvertPermutation") @@ -88,7 +97,13 @@ REGISTER_KERNEL_BUILDER(Name("InvertPermutation") .TypeConstraint("T") .HostMemory("x") .HostMemory("y"), - InvertPermutationOp); + InvertPermutationOp); +REGISTER_KERNEL_BUILDER(Name("InvertPermutation") + .Device(DEVICE_SYCL) + .TypeConstraint("T") + .HostMemory("x") + .HostMemory("y"), + InvertPermutationOp); #endif // TENSORFLOW_USE_SYCL namespace { diff --git a/tensorflow/core/kernels/unique_op.cc b/tensorflow/core/kernels/unique_op.cc index 701c5f6d2b..d087784c8a 100644 --- a/tensorflow/core/kernels/unique_op.cc +++ b/tensorflow/core/kernels/unique_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include #include #include @@ -21,6 +22,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/hash/hash.h" namespace tensorflow { @@ -33,8 +35,6 @@ class UniqueOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& input = context->input(0); - OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()), - errors::InvalidArgument("unique expects a 1D vector.")); // TODO(dga): Make unique polymorphic for returning int32 and int64 // vectors to support large tensors. OP_REQUIRES(context, @@ -42,31 +42,102 @@ class UniqueOp : public OpKernel { errors::InvalidArgument( "unique does not support input tensors larger than ", std::numeric_limits::max(), " elements")); - auto Tin = input.vec(); - const int64 N = static_cast(Tin.size()); + + int64 axis = 0; + std::vector new_sizes{1, input.NumElements(), 1}; + if (context->num_inputs() == 1) { + OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()), + errors::InvalidArgument("unique expects a 1D vector.")); + } else { + // In case of UniqueV2, the axis is a 1D vector. The purpose is + // to allow specifying either "no axis" or "axis". The `[]` means + // "no axis", while `[x]` means `axis = x`. + const Tensor& axis_tensor = context->input(1); + OP_REQUIRES(context, TensorShapeUtils::IsVector(axis_tensor.shape()), + errors::InvalidArgument("axis expects a 1D vector.")); + OP_REQUIRES( + context, axis_tensor.NumElements() <= 1, + errors::InvalidArgument( + "axis does not support input tensors larger than 1 elements")); + if (axis_tensor.NumElements() == 0) { + OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()), + errors::InvalidArgument("unique expects a 1D vector.")); + } else { + auto axis_vec = axis_tensor.vec(); + axis = axis_vec(0); + axis = axis < 0 ? axis + input.dims() : axis; + OP_REQUIRES(context, 0 <= axis && axis < input.dims(), + errors::InvalidArgument("axis has to be between [0, ", + input.dims(), ")")); + if (axis > 0) { + for (int64 i = 0; i < axis; i++) { + new_sizes[0] *= input.dim_size(i); + } + } + new_sizes[1] = input.dim_size(axis); + if (axis + 1 < input.dims()) { + for (int64 i = axis + 1; i < input.dims(); i++) { + new_sizes[2] *= input.dim_size(i); + } + } + } + } + + auto Tin = input.shaped(new_sizes); Tensor* idx = nullptr; - OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {0}, 1, input.shape(), &idx)); + OP_REQUIRES_OK(context, context->allocate_output( + 1, TensorShape({Tin.dimension(1)}), &idx)); auto idx_vec = idx->template vec(); - std::unordered_map uniq; - uniq.reserve(2 * N); - for (int64 i = 0, j = 0; i < N; ++i) { - auto it = uniq.insert(std::make_pair(Tin(i), j)); + auto hash_fn = [&Tin](const int64& key) -> unsigned long { + size_t h = 0; + for (int64 i = 0; i < Tin.dimension(0); i++) { + for (int64 j = 0; j < Tin.dimension(2); j++) { + h = Hash64Combine(h, hash{}(Tin(i, key, j))); + } + } + return h; + }; + + auto equal_to_fn = [&Tin](const int64& lhs, const int64& rhs) { + for (int64 i = 0; i < Tin.dimension(0); i++) { + for (int64 j = 0; j < Tin.dimension(2); j++) { + if (Tin(i, lhs, j) != Tin(i, rhs, j)) { + return false; + } + } + } + return true; + }; + + std::unordered_map + uniq(0, hash_fn, equal_to_fn); + + uniq.reserve(2 * Tin.dimension(1)); + + for (int64 i = 0, j = 0; i < Tin.dimension(1); ++i) { + auto it = uniq.insert(std::make_pair(i, j)); idx_vec(i) = it.first->second; if (it.second) { ++j; } } + int64 uniq_size = static_cast(uniq.size()); + new_sizes[1] = uniq_size; + TensorShape output_shape(input.shape()); + output_shape.set_dim(axis, uniq_size); Tensor* output = nullptr; - OP_REQUIRES_OK(context, context->allocate_output( - 0, TensorShape({uniq_size}), &output)); - auto output_vec = output->template vec(); + OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output)); + auto Tout = output->shaped(new_sizes); for (auto it : uniq) { - output_vec(it.second) = it.first; + for (int64 i = 0; i < Tin.dimension(0); i++) { + for (int64 j = 0; j < Tin.dimension(2); j++) { + Tout(i, it.second, j) = Tin(i, it.first, j); + } + } } if (num_outputs() > 2) { @@ -74,7 +145,7 @@ class UniqueOp : public OpKernel { 2, TensorShape({uniq_size}), &output)); auto count_output_vec = output->template vec(); count_output_vec.setZero(); - for (int64 i = 0; i < N; ++i) { + for (int64 i = 0; i < Tin.dimension(1); ++i) { count_output_vec(idx_vec(i))++; } } @@ -92,6 +163,16 @@ class UniqueOp : public OpKernel { .TypeConstraint("T") \ .TypeConstraint("out_idx"), \ UniqueOp); \ + REGISTER_KERNEL_BUILDER(Name("UniqueV2") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("out_idx"), \ + UniqueOp); \ + REGISTER_KERNEL_BUILDER(Name("UniqueV2") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("out_idx"), \ + UniqueOp); \ REGISTER_KERNEL_BUILDER(Name("UniqueWithCounts") \ .Device(DEVICE_CPU) \ .TypeConstraint("T") \ @@ -176,5 +257,5 @@ REGISTER_KERNEL_BUILDER(Name("Unique") .HostMemory("y") .HostMemory("idx"), UniqueOp); -#endif // TENSORFLOW_USE_SYCL +#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/zip_dataset_op.cc b/tensorflow/core/kernels/zip_dataset_op.cc index f466c8b268..a80b9edbe4 100644 --- a/tensorflow/core/kernels/zip_dataset_op.cc +++ b/tensorflow/core/kernels/zip_dataset_op.cc @@ -35,15 +35,14 @@ class ZipDatasetOp : public DatasetOpKernel { OP_REQUIRES_OK(ctx, GetDatasetFromVariantTensor(ctx->input(i), &input)); inputs.push_back(input); } - *output = new Dataset(ctx, inputs); + *output = new Dataset(inputs); } private: - class Dataset : public GraphDatasetBase { + class Dataset : public DatasetBase { public: - explicit Dataset(OpKernelContext* ctx, - const std::vector& inputs) - : GraphDatasetBase(ctx), inputs_(inputs) { + explicit Dataset(const std::vector& inputs) + : inputs_(inputs) { for (const auto& input : inputs_) { input->Ref(); for (DataType dt : input->output_dtypes()) { @@ -77,21 +76,6 @@ class ZipDatasetOp : public DatasetOpKernel { string DebugString() override { return "ZipDatasetOp::Dataset"; } - protected: - Status AsGraphDefInternal(DatasetGraphDefBuilder* b, - Node** output) const override { - std::vector input_graph_nodes; - input_graph_nodes.reserve(inputs_.size()); - for (const auto& input : inputs_) { - Node* input_node; - TF_RETURN_IF_ERROR(b->AddParentDataset(input, &input_node)); - input_graph_nodes.emplace_back(input_node); - } - TF_RETURN_IF_ERROR( - b->AddDatasetWithInputAsList(this, input_graph_nodes, output)); - return Status::OK(); - } - private: class Iterator : public DatasetIterator { public: @@ -109,10 +93,6 @@ class ZipDatasetOp : public DatasetOpKernel { std::vector* out_tensors, bool* end_of_sequence) override { mutex_lock l(mu_); - if (input_impls_.empty()) { - *end_of_sequence = true; - return Status::OK(); - } out_tensors->clear(); out_tensors->reserve(dataset()->output_dtypes().size()); for (const auto& input_impl : input_impls_) { @@ -120,43 +100,12 @@ class ZipDatasetOp : public DatasetOpKernel { TF_RETURN_IF_ERROR( input_impl->GetNext(ctx, &input_tensors, end_of_sequence)); if (*end_of_sequence) { - break; + return Status::OK(); } out_tensors->insert(out_tensors->end(), input_tensors.begin(), input_tensors.end()); } - if (*end_of_sequence) { - out_tensors->clear(); - input_impls_.clear(); - } else { - *end_of_sequence = false; - } - return Status::OK(); - } - - protected: - Status SaveInternal(IteratorStateWriter* writer) override { - mutex_lock l(mu_); - if (input_impls_.empty()) { - TF_RETURN_IF_ERROR( - writer->WriteScalar(full_name("input_impls_empty"), "")); - } else { - for (auto& input_impl : input_impls_) - TF_RETURN_IF_ERROR(SaveParent(writer, input_impl)); - } - return Status::OK(); - } - - Status RestoreInternal(OpKernelContext* ctx, - IteratorStateReader* reader) override { - mutex_lock l(mu_); - if (reader->Contains(full_name("input_impls_empty"))) { - input_impls_.clear(); - } else { - DCHECK_EQ(input_impls_.size(), dataset()->inputs_.size()); - for (auto& input_impl : input_impls_) - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl)); - } + *end_of_sequence = false; return Status::OK(); } diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index cdf370399c..c8cc147360 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -723,7 +723,9 @@ y: a tensor of the same shape and type as x but filled with zeros. REGISTER_OP("OnesLike") .Input("x: T") .Output("y: T") - .Attr("T: {float, double, int32, int64, complex64, complex128}") + .Attr( + "T: {float, double, int8, uint8, int16, uint16, int32, int64, " + "complex64, complex128, bool}") .SetShapeFn(shape_inference::UnchangedShape) .Doc(R"doc( Returns a tensor of ones with the same shape and type as x. @@ -2031,6 +2033,46 @@ y: 1-D. idx: 1-D. )doc"); +REGISTER_OP("UniqueV2") + .Input("x: T") + .Input("axis: int64") + .Output("y: T") + .Output("idx: out_idx") + .Attr("T: type") + .Attr("out_idx: {int32, int64} = DT_INT32") + .SetShapeFn([](InferenceContext* c) { + c->set_output(0, c->Vector(InferenceContext::kUnknownDim)); + c->set_output(1, c->input(0)); + return Status::OK(); + }) + .Doc(R"doc( +Finds unique elements in a 1-D tensor. + +This operation returns a tensor `y` containing all of the unique elements of `x` +sorted in the same order that they occur in `x`. This operation also returns a +tensor `idx` the same size as `x` that contains the index of each value of `x` +in the unique output `y`. In other words: + +`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` + +For example: + +``` +# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] +y, idx = unique(x) +y ==> [1, 2, 4, 7, 8] +idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] +``` + + +x: A `Tensor`. +axis: A `Tensor` of type `int64` (default: 0). The axis of the Tensor to + find the unique elements. +y: A `Tensor`. Unique elements along the `axis` of `Tensor` x. +idx: A 1-D Tensor. Has the same type as x that contains the index of each + value of x in the output y. +)doc"); + // -------------------------------------------------------------------------- REGISTER_OP("UniqueWithCounts") .Input("x: T") diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 60f67543f1..8b8251f84b 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -8270,29 +8270,6 @@ op { } } } -op { - name: "DatasetToSingleElement" - input_arg { - name: "dataset" - type: DT_VARIANT - } - output_arg { - name: "components" - type_list_attr: "output_types" - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } -} op { name: "DebugGradientIdentity" input_arg { @@ -9271,69 +9248,6 @@ op { } } } -op { - name: "DenseToSparseBatchDataset" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } - input_arg { - name: "batch_size" - type: DT_INT64 - } - input_arg { - name: "row_shape" - type: DT_INT64 - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } - is_stateful: true -} -op { - name: "DenseToSparseBatchDataset" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } - input_arg { - name: "batch_size" - type: DT_INT64 - } - input_arg { - name: "row_shape" - type: DT_INT64 - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } -} op { name: "DenseToSparseSetOperation" input_arg { @@ -9827,18 +9741,6 @@ op { } } } -op { - name: "DeserializeIterator" - input_arg { - name: "resource_handle" - type: DT_RESOURCE - } - input_arg { - name: "serialized" - type: DT_VARIANT - } - is_stateful: true -} op { name: "DeserializeManySparse" input_arg { @@ -13592,131 +13494,6 @@ op { } } } -op { - name: "GroupByWindowDataset" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } - input_arg { - name: "key_func_other_arguments" - type_list_attr: "Tkey_func_other_arguments" - } - input_arg { - name: "reduce_func_other_arguments" - type_list_attr: "Treduce_func_other_arguments" - } - input_arg { - name: "window_size_func_other_arguments" - type_list_attr: "Twindow_size_func_other_arguments" - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "key_func" - type: "func" - } - attr { - name: "reduce_func" - type: "func" - } - attr { - name: "window_size_func" - type: "func" - } - attr { - name: "Tkey_func_other_arguments" - type: "list(type)" - has_minimum: true - } - attr { - name: "Treduce_func_other_arguments" - type: "list(type)" - has_minimum: true - } - attr { - name: "Twindow_size_func_other_arguments" - type: "list(type)" - has_minimum: true - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } - is_stateful: true -} -op { - name: "GroupByWindowDataset" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } - input_arg { - name: "key_func_other_arguments" - type_list_attr: "Tkey_func_other_arguments" - } - input_arg { - name: "reduce_func_other_arguments" - type_list_attr: "Treduce_func_other_arguments" - } - input_arg { - name: "window_size_func_other_arguments" - type_list_attr: "Twindow_size_func_other_arguments" - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "key_func" - type: "func" - } - attr { - name: "reduce_func" - type: "func" - } - attr { - name: "window_size_func" - type: "func" - } - attr { - name: "Tkey_func_other_arguments" - type: "list(type)" - has_minimum: true - } - attr { - name: "Treduce_func_other_arguments" - type: "list(type)" - has_minimum: true - } - attr { - name: "Twindow_size_func_other_arguments" - type: "list(type)" - has_minimum: true - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } -} op { name: "HSVToRGB" input_arg { @@ -14137,53 +13914,6 @@ op { } } } -op { - name: "IgnoreErrorsDataset" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } - is_stateful: true -} -op { - name: "IgnoreErrorsDataset" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } -} op { name: "Imag" input_arg { @@ -16088,50 +15818,6 @@ op { } is_stateful: true } -op { - name: "MapAndBatchDataset" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } - input_arg { - name: "other_arguments" - type_list_attr: "Targuments" - } - input_arg { - name: "batch_size" - type: DT_INT64 - } - input_arg { - name: "num_parallel_batches" - type: DT_INT64 - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "f" - type: "func" - } - attr { - name: "Targuments" - type: "list(type)" - has_minimum: true - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } -} op { name: "MapClear" attr { @@ -20870,54 +20556,6 @@ op { type: "type" } } -op { - name: "ParallelInterleaveDataset" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } - input_arg { - name: "other_arguments" - type_list_attr: "Targuments" - } - input_arg { - name: "cycle_length" - type: DT_INT64 - } - input_arg { - name: "block_length" - type: DT_INT64 - } - input_arg { - name: "sloppy" - type: DT_BOOL - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "f" - type: "func" - } - attr { - name: "Targuments" - type: "list(type)" - has_minimum: true - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } -} op { name: "ParallelMapDataset" input_arg { @@ -21670,52 +21308,6 @@ op { } is_stateful: true } -op { - name: "Print" - input_arg { - name: "input" - type_attr: "T" - } - input_arg { - name: "data" - type_list_attr: "U" - } - output_arg { - name: "output" - type_attr: "T" - } - attr { - name: "T" - type: "type" - } - attr { - name: "U" - type: "list(type)" - has_minimum: true - } - attr { - name: "message" - type: "string" - default_value { - s: "" - } - } - attr { - name: "first_n" - type: "int" - default_value { - i: -1 - } - } - attr { - name: "summarize" - type: "int" - default_value { - i: 3 - } - } - is_stateful: true -} op { name: "PriorityQueue" output_arg { @@ -30554,52 +30146,6 @@ op { } } } -op { - name: "ScanDataset" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } - input_arg { - name: "initial_state" - type_list_attr: "Tstate" - } - input_arg { - name: "other_arguments" - type_list_attr: "Targuments" - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "f" - type: "func" - } - attr { - name: "Tstate" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "Targuments" - type: "list(type)" - has_minimum: true - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } -} op { name: "ScatterAdd" input_arg { @@ -32315,18 +31861,6 @@ op { } } } -op { - name: "SerializeIterator" - input_arg { - name: "resource_handle" - type: DT_RESOURCE - } - output_arg { - name: "serialized" - type: DT_VARIANT - } - is_stateful: true -} op { name: "SerializeManySparse" input_arg { @@ -37731,38 +37265,6 @@ op { } } } -op { - name: "SqlDataset" - input_arg { - name: "driver_name" - type: DT_STRING - } - input_arg { - name: "data_source_name" - type: DT_STRING - } - input_arg { - name: "query" - type: DT_STRING - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } - is_stateful: true -} op { name: "Sqrt" input_arg { diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index f512213964..8f5d8308a3 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -141,16 +141,6 @@ count: A scalar representing the number of elements from the `input_dataset` that should be skipped. If count is -1, skips everything. )doc"); -REGISTER_OP("IgnoreErrorsDataset") - .Input("input_dataset: variant") - .Output("handle: variant") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that contains the elements of `input_dataset` ignoring errors. -)doc"); - REGISTER_OP("MapDataset") .Input("input_dataset: variant") .Input("other_arguments: Targuments") @@ -184,32 +174,6 @@ num_parallel_calls: The number of concurrent invocations of `f` that process elements from `input_dataset` in parallel. )doc"); -REGISTER_OP("MapAndBatchDataset") - .Input("input_dataset: variant") - .Input("other_arguments: Targuments") - .Input("batch_size: int64") - .Input("num_parallel_batches: int64") - .Output("handle: variant") - .Attr("f: func") - .Attr("Targuments: list(type) >= 0") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that applies `f` to the outputs of `input_dataset` and then -batches `batch_size` of them. - -Unlike a "MapDataset", which applies `f` sequentially, this dataset invokes up -to `batch_size * num_parallel_batches` copies of `f` in parallel. - -batch_size: A scalar representing the number of elements to accumulate in a - batch. It determines the number of concurrent invocations of `f` that process - elements from `input_dataset` in parallel. -num_parallel_batches: A scalar representing the number of batches to create in - parallel. Processing multiple batches in parallel benefits workloads prone to - stragglers. -)doc"); - REGISTER_OP("PrefetchDataset") .Input("input_dataset: variant") .Input("buffer_size: int64") @@ -224,21 +188,6 @@ buffer_size: The maximum number of elements to buffer in an iterator over this dataset. )doc"); -REGISTER_OP("ScanDataset") - .Input("input_dataset: variant") - .Input("initial_state: Tstate") - .Input("other_arguments: Targuments") - .Output("handle: variant") - .Attr("f: func") - .Attr("Tstate: list(type) >= 1") - .Attr("Targuments: list(type) >= 0") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset successively reduces `f` over the elements of `input_dataset`. -)doc"); - REGISTER_OP("FlatMapDataset") .Input("input_dataset: variant") .Input("other_arguments: Targuments") @@ -285,59 +234,6 @@ f: A function mapping elements of `input_dataset`, concatenated with `output_types` and `output_shapes`. )doc"); -REGISTER_OP("ParallelInterleaveDataset") - .Input("input_dataset: variant") - .Input("other_arguments: Targuments") - .Input("cycle_length: int64") - .Input("block_length: int64") - .Input("sloppy: bool") - .Output("handle: variant") - .Attr("f: func") - .Attr("Targuments: list(type) >= 0") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that applies `f` to the outputs of `input_dataset`. - -The resulting dataset is similar to the `InterleaveDataset`, with the exception -that if retrieving the next value from a dataset would cause the requester to -block, it will skip that input dataset. This dataset is especially useful -when loading data from a variable-latency datastores (e.g. HDFS, GCS), as it -allows the training step to proceed so long as some data is available. - -!! WARNING !! This dataset is not deterministic! - -f: A function mapping elements of `input_dataset`, concatenated with - `other_arguments`, to a Dataset variant that contains elements matching - `output_types` and `output_shapes`. -)doc"); - -REGISTER_OP("GroupByWindowDataset") - .Input("input_dataset: variant") - .Input("key_func_other_arguments: Tkey_func_other_arguments") - .Input("reduce_func_other_arguments: Treduce_func_other_arguments") - .Input( - "window_size_func_other_arguments: Twindow_size_func_other_arguments") - .Output("handle: variant") - .Attr("key_func: func") - .Attr("reduce_func: func") - .Attr("window_size_func: func") - .Attr("Tkey_func_other_arguments: list(type) >= 0") - .Attr("Treduce_func_other_arguments: list(type) >= 0") - .Attr("Twindow_size_func_other_arguments: list(type) >= 0") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that computes a windowed group-by on `input_dataset`. - -// TODO(mrry): Support non-int64 keys. - -key_func: A function mapping an element of `input_dataset`, concatenated - with `key_func_other_arguments` to a scalar value of type DT_INT64. -)doc"); - REGISTER_OP("FilterDataset") .Input("input_dataset: variant") .Input("other_arguments: Targuments") @@ -408,27 +304,6 @@ padding_values: A list of scalars containing the padding value to use for each of the outputs. )doc"); -REGISTER_OP("DenseToSparseBatchDataset") - .Input("input_dataset: variant") - .Input("batch_size: int64") - .Input("row_shape: int64") - .Output("handle: variant") - // NOTE(mrry): the 0th and 2nd elements will be DT_INT64. - .Attr("output_types: list(type) >= 1") - // NOTE(mrry): the 1st and 2nd elements will be vectors. - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that yields a SparseTensor for each element of the input. - -input_dataset: A handle to an input dataset. Must have a single component. -batch_size: A scalar representing the number of elements to accumulate in a - batch. -row_shape: A vector representing the dense shape of each row in the produced - SparseTensor. The shape may be partially specified, using `-1` to indicate - that a particular dimension should use the maximum size of all batch elements. -)doc"); - REGISTER_OP("RangeDataset") .Input("start: int64") .Input("stop: int64") @@ -514,24 +389,6 @@ compression_type: A scalar containing either (i) the empty string (no buffer_size: A scalar containing the number of bytes to buffer. )doc"); -REGISTER_OP("SqlDataset") - .Input("driver_name: string") - .Input("data_source_name: string") - .Input("query: string") - .Output("handle: variant") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetIsStateful() // TODO(b/65524810): Source dataset ops must be marked - // stateful to inhibit constant folding. - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that executes a SQL query and emits rows of the result set. - -driver_name: The database type. Currently, the only supported type is 'sqlite'. -data_source_name: A connection string to connect to the database. -query: A SQL query to execute. -)doc"); - REGISTER_OP("FixedLengthRecordDataset") .Input("filenames: string") .Input("header_bytes: int64") @@ -662,36 +519,6 @@ REGISTER_OP("IteratorGetNext") Gets the next output from the given iterator. )doc"); -REGISTER_OP("DatasetToSingleElement") - .Input("dataset: variant") - .Output("components: output_types") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn([](shape_inference::InferenceContext* c) { - shape_inference::ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused)); - std::vector output_shapes; - TF_RETURN_IF_ERROR(c->GetAttr("output_shapes", &output_shapes)); - if (output_shapes.size() != c->num_outputs()) { - return errors::InvalidArgument( - "`output_shapes` must be the same length as `output_types` (", - output_shapes.size(), " vs. ", c->num_outputs()); - } - for (size_t i = 0; i < output_shapes.size(); ++i) { - shape_inference::ShapeHandle output_shape_handle; - TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape( - output_shapes[i], &output_shape_handle)); - c->set_output(static_cast(i), output_shape_handle); - } - return Status::OK(); - }) - .Doc(R"doc( -Outputs the single element from the given dataset. - -dataset: A handle to a dataset that contains a single element. -components: The components of the single element of `input`. -)doc"); - REGISTER_OP("IteratorToStringHandle") .Input("resource_handle: resource") .Output("string_handle: string") @@ -720,28 +547,4 @@ output_shapes: If specified, defines the shape of each tuple component in an element produced by the resulting iterator. )doc"); -REGISTER_OP("SerializeIterator") - .Input("resource_handle: resource") - .Output("serialized: variant") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Converts the given `resource_handle` representing an iterator to a variant tensor. - -resource_handle: A handle to an iterator resource. -serialized: A variant tensor storing the state of the iterator contained in the - resource. -)doc"); - -REGISTER_OP("DeserializeIterator") - .Input("resource_handle: resource") - .Input("serialized: variant") - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R"doc( -Converts the given variant tensor to an iterator and stores it in the given resource. - -resource_handle: A handle to an iterator resource. -serialized: A variant tensor storing the state of the iterator contained in the - resource. -)doc"); - } // namespace tensorflow diff --git a/tensorflow/core/ops/logging_ops.cc b/tensorflow/core/ops/logging_ops.cc index e6995821df..11cb9861a3 100644 --- a/tensorflow/core/ops/logging_ops.cc +++ b/tensorflow/core/ops/logging_ops.cc @@ -43,7 +43,7 @@ REGISTER_OP("Print") .Output("output: T") .SetIsStateful() .Attr("T: type") - .Attr("U: list(type) >= 0") + .Attr("U: list(type)") .Attr("message: string = ''") .Attr("first_n: int = -1") .Attr("summarize: int = 3") diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 7b10af9f44..d30b847696 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1829,6 +1829,8 @@ need not be sorted and need not cover all values in the full range of valid values. If the sum is empty for a given segment ID `i`, `output[i] = 0`. +If the given segment ID `i` is negative, the value is dropped and will not be +added to the sum of the segment. `num_segments` should equal the number of distinct segment IDs. diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index de059a3e7e..a3609372a9 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -819,7 +819,7 @@ REGISTER_OP("DepthwiseConv2dNative") .Input("input: T") .Input("filter: T") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {half, float, double}") .Attr("strides: list(int)") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) @@ -945,7 +945,7 @@ REGISTER_OP("Conv3D") .Input("input: T") .Input("filter: T") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {half, float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) @@ -977,7 +977,7 @@ REGISTER_OP("Conv3DBackpropInput") .Input("filter: T") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {half, float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Deprecated(10, "Use Conv3DBackpropInputV2") @@ -1003,7 +1003,7 @@ REGISTER_OP("Conv3DBackpropFilter") .Input("filter: T") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {half, float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Deprecated(10, "Use Conv3DBackpropFilterV2") @@ -1032,7 +1032,7 @@ REGISTER_OP("Conv3DBackpropInputV2") .Input("filter: T") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {half, float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) @@ -1069,7 +1069,7 @@ REGISTER_OP("Conv3DBackpropFilterV2") .Input("filter_sizes: int32") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {half, float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 2a74c20707..2c73441e7d 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -5261,6 +5261,7 @@ op { type: "type" allowed_values { list { + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } @@ -5327,6 +5328,7 @@ op { type: "type" allowed_values { list { + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } @@ -5382,6 +5384,7 @@ op { type: "type" allowed_values { list { + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } @@ -5447,6 +5450,7 @@ op { type: "type" allowed_values { list { + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } @@ -5502,6 +5506,7 @@ op { type: "type" allowed_values { list { + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } @@ -6058,32 +6063,6 @@ op { summary: "Compute the cumulative sum of the tensor `x` along `axis`." description: "By default, this op performs an inclusive cumsum, which means that the first\nelement of the input is identical to the first element of the output:\n\n```python\ntf.cumsum([a, b, c]) # => [a, a + b, a + b + c]\n```\n\nBy setting the `exclusive` kwarg to `True`, an exclusive cumsum is\nperformed instead:\n\n```python\ntf.cumsum([a, b, c], exclusive=True) # => [0, a, a + b]\n```\n\nBy setting the `reverse` kwarg to `True`, the cumsum is performed in the\nopposite direction:\n\n```python\ntf.cumsum([a, b, c], reverse=True) # => [a + b + c, b + c, c]\n```\n\nThis is more efficient than using separate `tf.reverse` ops.\n\nThe `reverse` and `exclusive` kwargs can also be combined:\n\n```python\ntf.cumsum([a, b, c], exclusive=True, reverse=True) # => [b + c, c, 0]\n```" } -op { - name: "DatasetToSingleElement" - input_arg { - name: "dataset" - description: "A handle to a dataset that contains a single element." - type: DT_VARIANT - } - output_arg { - name: "components" - description: "The components of the single element of `input`." - type_list_attr: "output_types" - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } - summary: "Outputs the single element from the given dataset." -} op { name: "DebugGradientIdentity" input_arg { @@ -6715,41 +6694,6 @@ op { summary: "Applies set operation along last dimension of 2 `Tensor` inputs." description: "See SetOperationOp::SetOperationFromContext for values of `set_operation`.\n\nOutput `result` is a `SparseTensor` represented by `result_indices`,\n`result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this\nhas rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`\ndimension contains the result of `set_operation` applied to the corresponding\n`[0...n-1]` dimension of `set`." } -op { - name: "DenseToSparseBatchDataset" - input_arg { - name: "input_dataset" - description: "A handle to an input dataset. Must have a single component." - type: DT_VARIANT - } - input_arg { - name: "batch_size" - description: "A scalar representing the number of elements to accumulate in a\nbatch." - type: DT_INT64 - } - input_arg { - name: "row_shape" - description: "A vector representing the dense shape of each row in the produced\nSparseTensor. The shape may be partially specified, using `-1` to indicate\nthat a particular dimension should use the maximum size of all batch elements." - type: DT_INT64 - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } - summary: "Creates a dataset that yields a SparseTensor for each element of the input." -} op { name: "DenseToSparseSetOperation" input_arg { @@ -7089,21 +7033,6 @@ op { summary: "Dequantize the \'input\' tensor into a float Tensor." description: "[min_range, max_range] are scalar floats that specify the range for\nthe \'input\' data. The \'mode\' attribute controls exactly which calculations are\nused to convert the float values to their quantized equivalents.\n\nIn \'MIN_COMBINED\' mode, each value of the tensor will undergo the following:\n\n```\nif T == qint8, in[i] += (range(T) + 1)/ 2.0\nout[i] = min_range + (in[i]* (max_range - min_range) / range(T))\n```\nhere `range(T) = numeric_limits::max() - numeric_limits::min()`\n\n*MIN_COMBINED Mode Example*\n\nIf the input comes from a QuantizedRelu6, the output type is\nquint8 (range of 0-255) but the possible range of QuantizedRelu6 is\n0-6. The min_range and max_range values are therefore 0.0 and 6.0.\nDequantize on quint8 will take each value, cast to float, and multiply\nby 6 / 255.\nNote that if quantizedtype is qint8, the operation will additionally add\neach value by 128 prior to casting.\n\nIf the mode is \'MIN_FIRST\', then this approach is used:\n\n```c++\nnum_discrete_values = 1 << (# of bits in T)\nrange_adjust = num_discrete_values / (num_discrete_values - 1)\nrange = (range_max - range_min) * range_adjust\nrange_scale = range / num_discrete_values\nconst double offset_input = static_cast(input) - lowest_quantized;\nresult = range_min + ((input - numeric_limits::min()) * range_scale)\n```\n\n*SCALED mode Example*\n\n`SCALED` mode matches the quantization approach used in\n`QuantizeAndDequantize{V2|V3}`.\n\nIf the mode is `SCALED`, we do not use the full range of the output type,\nchoosing to elide the lowest possible value for symmetry (e.g., output range is\n-127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to\n0.\n\nWe first find the range of values in our tensor. The\nrange we use is always centered on 0, so we find m such that\n```c++\n m = max(abs(input_min), abs(input_max))\n```\n\nOur input tensor range is then `[-m, m]`.\n\nNext, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`.\nIf T is signed, this is\n```\n num_bits = sizeof(T) * 8\n [min_fixed, max_fixed] =\n [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1]\n```\n\nOtherwise, if T is unsigned, the fixed-point range is\n```\n [min_fixed, max_fixed] = [0, (1 << num_bits) - 1]\n```\n\nFrom this we compute our scaling factor, s:\n```c++\n s = (2 * m) / (max_fixed - min_fixed)\n```\n\nNow we can dequantize the elements of our tensor:\n```c++\nresult = input * s\n```" } -op { - name: "DeserializeIterator" - input_arg { - name: "resource_handle" - description: "A handle to an iterator resource." - type: DT_RESOURCE - } - input_arg { - name: "serialized" - description: "A variant tensor storing the state of the iterator contained in the\nresource." - type: DT_VARIANT - } - summary: "Converts the given variant tensor to an iterator and stores it in the given resource." - is_stateful: true -} op { name: "DeserializeManySparse" input_arg { @@ -10218,71 +10147,6 @@ op { summary: "Returns the truth value of (x >= y) element-wise." description: "*NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)" } -op { - name: "GroupByWindowDataset" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } - input_arg { - name: "key_func_other_arguments" - type_list_attr: "Tkey_func_other_arguments" - } - input_arg { - name: "reduce_func_other_arguments" - type_list_attr: "Treduce_func_other_arguments" - } - input_arg { - name: "window_size_func_other_arguments" - type_list_attr: "Twindow_size_func_other_arguments" - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "key_func" - type: "func" - description: "A function mapping an element of `input_dataset`, concatenated\nwith `key_func_other_arguments` to a scalar value of type DT_INT64." - } - attr { - name: "reduce_func" - type: "func" - } - attr { - name: "window_size_func" - type: "func" - } - attr { - name: "Tkey_func_other_arguments" - type: "list(type)" - has_minimum: true - } - attr { - name: "Treduce_func_other_arguments" - type: "list(type)" - has_minimum: true - } - attr { - name: "Twindow_size_func_other_arguments" - type: "list(type)" - has_minimum: true - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } - summary: "Creates a dataset that computes a windowed group-by on `input_dataset`." - description: "// TODO(mrry): Support non-int64 keys." -} op { name: "HSVToRGB" input_arg { @@ -10743,30 +10607,6 @@ op { summary: "Compute the upper regularized incomplete Gamma function `Q(a, x)`." description: "The upper regularized incomplete Gamma function is defined as:\n\n\\\\(Q(a, x) = Gamma(a, x) / Gamma(a) = 1 - P(a, x)\\\\)\n\nwhere\n\n\\\\(Gamma(a, x) = int_{x}^{\\infty} t^{a-1} exp(-t) dt\\\\)\n\nis the upper incomplete Gama function.\n\nNote, above `P(a, x)` (`Igamma`) is the lower regularized complete\nGamma function." } -op { - name: "IgnoreErrorsDataset" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } - summary: "Creates a dataset that contains the elements of `input_dataset` ignoring errors." -} op { name: "Imag" input_arg { @@ -12538,54 +12378,6 @@ op { description: "This operation may be executed multiple times. Each execution will reset the\niterator in `iterator` to the first element of `dataset`." is_stateful: true } -op { - name: "MapAndBatchDataset" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } - input_arg { - name: "other_arguments" - type_list_attr: "Targuments" - } - input_arg { - name: "batch_size" - description: "A scalar representing the number of elements to accumulate in a\nbatch. It determines the number of concurrent invocations of `f` that process\nelements from `input_dataset` in parallel." - type: DT_INT64 - } - input_arg { - name: "num_parallel_batches" - description: "A scalar representing the number of batches to create in\nparallel. Processing multiple batches in parallel benefits workloads prone to\nstragglers." - type: DT_INT64 - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "f" - type: "func" - } - attr { - name: "Targuments" - type: "list(type)" - has_minimum: true - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } - summary: "Creates a dataset that applies `f` to the outputs of `input_dataset` and then" - description: "batches `batch_size` of them.\n\nUnlike a \"MapDataset\", which applies `f` sequentially, this dataset invokes up\nto `batch_size * num_parallel_batches` copies of `f` in parallel." -} op { name: "MapClear" attr { @@ -16256,57 +16048,6 @@ op { summary: "Interleave the values from the `data` tensors into a single tensor." description: "Builds a merged tensor such that\n\n```python\n merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]\n```\n\nFor example, if each `indices[m]` is scalar or vector, we have\n\n```python\n # Scalar indices:\n merged[indices[m], ...] = data[m][...]\n\n # Vector indices:\n merged[indices[m][i], ...] = data[m][i, ...]\n```\n\nEach `data[i].shape` must start with the corresponding `indices[i].shape`,\nand the rest of `data[i].shape` must be constant w.r.t. `i`. That is, we\nmust have `data[i].shape = indices[i].shape + constant`. In terms of this\n`constant`, the output shape is\n\n merged.shape = [max(indices)] + constant\n\nValues may be merged in parallel, so if an index appears in both `indices[m][i]`\nand `indices[n][j]`, the result may be invalid. This differs from the normal\nDynamicStitch operator that defines the behavior in that case.\n\nFor example:\n\n```python\n indices[0] = 6\n indices[1] = [4, 1]\n indices[2] = [[5, 2], [0, 3]]\n data[0] = [61, 62]\n data[1] = [[41, 42], [11, 12]]\n data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]\n merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],\n [51, 52], [61, 62]]\n```\n\nThis method can be used to merge partitions created by `dynamic_partition`\nas illustrated on the following example:\n\n```python\n # Apply function (increments x_i) on elements for which a certain condition\n # apply (x_i != -1 in this example).\n x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])\n condition_mask=tf.not_equal(x,tf.constant(-1.))\n partitioned_data = tf.dynamic_partition(\n x, tf.cast(condition_mask, tf.int32) , 2)\n partitioned_data[1] = partitioned_data[1] + 1.0\n condition_indices = tf.dynamic_partition(\n tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)\n x = tf.dynamic_stitch(condition_indices, partitioned_data)\n # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain\n # unchanged.\n```\n\n
\n\n
" } -op { - name: "ParallelInterleaveDataset" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } - input_arg { - name: "other_arguments" - type_list_attr: "Targuments" - } - input_arg { - name: "cycle_length" - type: DT_INT64 - } - input_arg { - name: "block_length" - type: DT_INT64 - } - input_arg { - name: "sloppy" - type: DT_BOOL - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "f" - type: "func" - description: "A function mapping elements of `input_dataset`, concatenated with\n`other_arguments`, to a Dataset variant that contains elements matching\n`output_types` and `output_shapes`." - } - attr { - name: "Targuments" - type: "list(type)" - has_minimum: true - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } - summary: "Creates a dataset that applies `f` to the outputs of `input_dataset`." - description: "The resulting dataset is similar to the `InterleaveDataset`, with the exception\nthat if retrieving the next value from a dataset would cause the requester to\nblock, it will skip that input dataset. This dataset is especially useful\nwhen loading data from a variable-latency datastores (e.g. HDFS, GCS), as it\nallows the training step to proceed so long as some data is available.\n\n!! WARNING !! This dataset is not deterministic!" -} op { name: "ParallelMapDataset" input_arg { @@ -16977,6 +16718,7 @@ op { name: "U" type: "list(type)" has_minimum: true + minimum: 1 } attr { name: "message" @@ -24113,53 +23855,6 @@ op { summary: "Outputs a `Summary` protocol buffer with scalar values." description: "The input `tags` and `values` must have the same shape. The generated summary\nhas a summary value for each tag-value pair in `tags` and `values`." } -op { - name: "ScanDataset" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } - input_arg { - name: "initial_state" - type_list_attr: "Tstate" - } - input_arg { - name: "other_arguments" - type_list_attr: "Targuments" - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "f" - type: "func" - } - attr { - name: "Tstate" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "Targuments" - type: "list(type)" - has_minimum: true - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } - summary: "Creates a dataset successively reduces `f` over the elements of `input_dataset`." -} op { name: "ScatterAdd" input_arg { @@ -25354,21 +25049,6 @@ op { } summary: "Computes gradients for the scaled exponential linear (Selu) operation." } -op { - name: "SerializeIterator" - input_arg { - name: "resource_handle" - description: "A handle to an iterator resource." - type: DT_RESOURCE - } - output_arg { - name: "serialized" - description: "A variant tensor storing the state of the iterator contained in the\nresource." - type: DT_VARIANT - } - summary: "Converts the given `resource_handle` representing an iterator to a variant tensor." - is_stateful: true -} op { name: "SerializeManySparse" input_arg { @@ -29279,42 +28959,6 @@ op { } summary: "Splits a tensor into `num_split` tensors along one dimension." } -op { - name: "SqlDataset" - input_arg { - name: "driver_name" - description: "The database type. Currently, the only supported type is \'sqlite\'." - type: DT_STRING - } - input_arg { - name: "data_source_name" - description: "A connection string to connect to the database." - type: DT_STRING - } - input_arg { - name: "query" - description: "A SQL query to execute." - type: DT_STRING - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } - summary: "Creates a dataset that executes a SQL query and emits rows of the result set." - is_stateful: true -} op { name: "Sqrt" input_arg { diff --git a/tensorflow/core/ops/summary_ops.cc b/tensorflow/core/ops/summary_ops.cc index 5efbac7ad7..f778b48797 100644 --- a/tensorflow/core/ops/summary_ops.cc +++ b/tensorflow/core/ops/summary_ops.cc @@ -49,33 +49,6 @@ flush_millis: How often, in milliseconds, to flush the pending events and filename_suffix: Every event file's name is suffixed with this suffix. )doc"); -REGISTER_OP("CreateSummaryDbWriter") - .Input("writer: resource") - .Input("db_uri: string") - .Input("experiment_name: string") - .Input("run_name: string") - .Input("user_name: string") - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R"doc( -Creates summary database writer accessible by given resource handle. - -This can be used to write tensors from the execution graph directly -to a database. Only SQLite is supported right now. This function -will create the schema if it doesn't exist. Entries in the Users, -Experiments, and Runs tables will be created automatically if they -don't already exist. - -writer: Handle to SummaryWriter resource to overwrite. -db_uri: For example "file:/tmp/foo.sqlite". -experiment_name: Can't contain ASCII control characters or <>. Case - sensitive. If empty, then the Run will not be associated with any - Experiment. -run_name: Can't contain ASCII control characters or <>. Case sensitive. - If empty, then each Tag will not be associated with any Run. -user_name: Must be valid as both a DNS label and Linux username. If - empty, then the Experiment will not be associated with any User. -)doc"); - REGISTER_OP("FlushSummaryWriter") .Input("writer: resource") .SetShapeFn(shape_inference::NoOutputs) @@ -116,20 +89,6 @@ summary_metadata: Serialized SummaryMetadata protocol buffer containing plugin-related metadata for this summary. )doc"); -REGISTER_OP("ImportEvent") - .Input("writer: resource") - .Input("event: string") - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R"doc( -Outputs a `tf.Event` protocol buffer. - -When CreateSummaryDbWriter is being used, this op can be useful for -importing data from event logs. - -writer: A handle to a summary writer. -event: A string containing a binary-encoded tf.Event proto. -)doc"); - REGISTER_OP("WriteScalarSummary") .Input("writer: resource") .Input("global_step: int64") diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 5eeb861bdd..6225c2c705 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -458,25 +458,16 @@ def tf_additional_lib_deps(): def tf_additional_core_deps(): return select({ - "//tensorflow:with_gcp_support_windows_override": [], - "//tensorflow:with_gcp_support_android_override": [], - "//tensorflow:with_gcp_support_ios_override": [], "//tensorflow:with_gcp_support": [ "//tensorflow/core/platform/cloud:gcs_file_system", ], "//conditions:default": [], }) + select({ - "//tensorflow:with_hdfs_support_windows_override": [], - "//tensorflow:with_hdfs_support_android_override": [], - "//tensorflow:with_hdfs_support_ios_override": [], "//tensorflow:with_hdfs_support": [ "//tensorflow/core/platform/hadoop:hadoop_file_system", ], "//conditions:default": [], }) + select({ - "//tensorflow:with_s3_support_windows_override": [], - "//tensorflow:with_s3_support_android_override": [], - "//tensorflow:with_s3_support_ios_override": [], "//tensorflow:with_s3_support": [ "//tensorflow/core/platform/s3:s3_file_system", ], @@ -486,9 +477,9 @@ def tf_additional_core_deps(): # TODO(jart, jhseu): Delete when GCP is default on. def tf_additional_cloud_op_deps(): return select({ - "//tensorflow:with_gcp_support_windows_override": [], - "//tensorflow:with_gcp_support_android_override": [], - "//tensorflow:with_gcp_support_ios_override": [], + "//tensorflow:windows": [], + "//tensorflow:android": [], + "//tensorflow:ios": [], "//tensorflow:with_gcp_support": [ "//tensorflow/contrib/cloud:bigquery_reader_ops_op_lib", ], @@ -498,9 +489,9 @@ def tf_additional_cloud_op_deps(): # TODO(jart, jhseu): Delete when GCP is default on. def tf_additional_cloud_kernel_deps(): return select({ - "//tensorflow:with_gcp_support_windows_override": [], - "//tensorflow:with_gcp_support_android_override": [], - "//tensorflow:with_gcp_support_ios_override": [], + "//tensorflow:windows": [], + "//tensorflow:android": [], + "//tensorflow:ios": [], "//tensorflow:with_gcp_support": [ "//tensorflow/contrib/cloud/kernels:bigquery_reader_ops", ], diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD index f746b15fee..f2fadb4558 100644 --- a/tensorflow/core/platform/default/build_config/BUILD +++ b/tensorflow/core/platform/default/build_config/BUILD @@ -12,6 +12,7 @@ load("//tensorflow:tensorflow.bzl", "tf_copts") load("//tensorflow:tensorflow.bzl", "tf_cuda_library") load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static") load("@local_config_sycl//sycl:platform.bzl", "sycl_library_path") +load("@local_config_sycl//sycl:build_defs.bzl", "if_ccpp") cc_library( name = "gtest", @@ -194,17 +195,16 @@ cc_library( cc_library( name = "sycl", - data = [ + data = if_ccpp([ "@local_config_sycl//sycl:{}".format(sycl_library_path("ComputeCpp")), - ], - linkopts = select({ - "//conditions:default": [ - "-Wl,-rpath,../local_config_sycl/sycl/lib", - ], - }), - deps = [ - "@local_config_sycl//sycl:syclrt", - ], + ]), + linkopts = if_ccpp([ + "-Wl,-rpath,../local_config_sycl/sycl/lib", + ]), + deps = if_ccpp( + ["@local_config_sycl//sycl:syclrt"], + ["@local_config_sycl//sycl:sycl_headers"], + ), ) filegroup( diff --git a/tensorflow/core/platform/default/notification.h b/tensorflow/core/platform/default/notification.h index 6a214dbd0a..5c401b7477 100644 --- a/tensorflow/core/platform/default/notification.h +++ b/tensorflow/core/platform/default/notification.h @@ -73,7 +73,7 @@ class Notification { } mutex mu_; // protects mutations of notified_ - condition_variable cv_; // signalled when notified_ becomes non-zero + condition_variable cv_; // signaled when notified_ becomes non-zero std::atomic notified_; // mutations under mu_ }; diff --git a/tensorflow/core/platform/posix/error.cc b/tensorflow/core/platform/posix/error.cc index e9baad5422..f8b0285c50 100644 --- a/tensorflow/core/platform/posix/error.cc +++ b/tensorflow/core/platform/posix/error.cc @@ -72,7 +72,7 @@ error::Code ErrnoToCode(int err_number) { case EBUSY: // Device or resource busy case ECHILD: // No child processes case EISCONN: // Socket is connected -#if !defined(_WIN32) +#if !defined(_WIN32) && !defined(__HAIKU__) case ENOTBLK: // Block device required #endif case ENOTCONN: // The socket is not connected @@ -94,7 +94,7 @@ error::Code ErrnoToCode(int err_number) { case ENODATA: // No message is available on the STREAM read queue case ENOMEM: // Not enough space case ENOSR: // No STREAM resources -#if !defined(_WIN32) +#if !defined(_WIN32) && !defined(__HAIKU__) case EUSERS: // Too many users #endif code = error::RESOURCE_EXHAUSTED; @@ -111,7 +111,7 @@ error::Code ErrnoToCode(int err_number) { case EPFNOSUPPORT: // Protocol family not supported #endif case EPROTONOSUPPORT: // Protocol not supported -#if !defined(_WIN32) +#if !defined(_WIN32) && !defined(__HAIKU__) case ESOCKTNOSUPPORT: // Socket type not supported #endif case EXDEV: // Improper link @@ -131,7 +131,8 @@ error::Code ErrnoToCode(int err_number) { case ENETUNREACH: // Network unreachable case ENOLCK: // No locks available case ENOLINK: // Link has been severed -#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32)) +#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32) \ + || defined(__HAIKU__)) case ENONET: // Machine is not on the network #endif code = error::UNAVAILABLE; @@ -156,7 +157,7 @@ error::Code ErrnoToCode(int err_number) { case ENOEXEC: // Exec format error case ENOMSG: // No message of the desired type case EPROTO: // Protocol error -#if !defined(_WIN32) +#if !defined(_WIN32) && !defined(__HAIKU__) case EREMOTE: // Object is remote #endif code = error::UNKNOWN; diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc index 6cba40ccfc..09f69a95c1 100644 --- a/tensorflow/core/platform/posix/port.cc +++ b/tensorflow/core/platform/posix/port.cc @@ -37,7 +37,8 @@ limitations under the License. #ifdef TF_USE_SNAPPY #include "snappy.h" #endif -#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) +#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \ + || defined(__HAIKU__) #include #endif @@ -61,7 +62,8 @@ int NumSchedulableCPUs() { } perror("sched_getaffinity"); #endif -#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) +#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \ + || defined(__HAIKU__) unsigned int count = std::thread::hardware_concurrency(); if (count > 0) return static_cast(count); #endif diff --git a/tensorflow/core/platform/vmodule_benchmark_test.cc b/tensorflow/core/platform/vmodule_benchmark_test.cc deleted file mode 100644 index 0f9e75bf9c..0000000000 --- a/tensorflow/core/platform/vmodule_benchmark_test.cc +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/test_benchmark.h" - -namespace tensorflow { - -static void BM_DisabledVlog(int iters) { - for (int i = 0; i < iters; ++i) { - VLOG(1) << "Testing VLOG(1)!"; - } -} -BENCHMARK(BM_DisabledVlog); - -} // namespace tensorflow diff --git a/tensorflow/core/platform/vmodule_test.cc b/tensorflow/core/platform/vmodule_test.cc deleted file mode 100644 index 47b4b2e0e7..0000000000 --- a/tensorflow/core/platform/vmodule_test.cc +++ /dev/null @@ -1,117 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// Test that popens a child process with the VLOG-ing environment variable set -// for the logging framework, and observes VLOG_IS_ON and VLOG macro output. - -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/platform.h" -#include "tensorflow/core/platform/test.h" - -#include - -namespace tensorflow { -namespace { - -int RealMain(const char* argv0, bool do_vlog) { - if (do_vlog) { -#if !defined(PLATFORM_GOOGLE) - // Note, we only test this when !defined(PLATFORM_GOOGLE) because - // VmoduleActivated doesn't exist in that implementation. - // - // Also, we call this internal API to simulate what would happen if - // differently-named translation units attempted to VLOG, so we don't need - // to create dummy translation unit files. - bool ok = internal::LogMessage::VmoduleActivated("vmodule_test.cc", 7) && - internal::LogMessage::VmoduleActivated("shoobadooba.h", 3); - if (!ok) { - fprintf(stderr, "vmodule activated levels not as expected.\n"); - return EXIT_FAILURE; - } -#endif - - // Print info on which VLOG levels are activated. - fprintf(stderr, "VLOG_IS_ON(8)? %d\n", VLOG_IS_ON(8)); - fprintf(stderr, "VLOG_IS_ON(7)? %d\n", VLOG_IS_ON(7)); - fprintf(stderr, "VLOG_IS_ON(6)? %d\n", VLOG_IS_ON(6)); - // Do some VLOG-ing. - VLOG(8) << "VLOG(8)"; - VLOG(7) << "VLOG(7)"; - VLOG(6) << "VLOG(6)"; - LOG(INFO) << "INFO"; - return EXIT_SUCCESS; - } - - // Popen the child process. - std::string command = std::string(argv0); -#if defined(PLATFORM_GOOGLE) - command = command + " do_vlog --vmodule=vmodule_test=7 --alsologtostderr"; -#else - command = - "TF_CPP_VMODULE=vmodule_test=7,shoobadooba=3 " + command + " do_vlog"; -#endif - command += " 2>&1"; - fprintf(stderr, "Running: \"%s\"\n", command.c_str()); - FILE* f = popen(command.c_str(), "r"); - if (f == nullptr) { - fprintf(stderr, "Failed to popen child: %s\n", strerror(errno)); - return EXIT_FAILURE; - } - - // Read data from the child's stdout. - constexpr int kBufferSizeBytes = 4096; - char buffer[kBufferSizeBytes]; - size_t result = fread(buffer, sizeof(buffer[0]), kBufferSizeBytes - 1, f); - if (result == 0) { - fprintf(stderr, "Failed to read from child stdout: %zu %s\n", result, - strerror(errno)); - return EXIT_FAILURE; - } - buffer[result] = '\0'; - int status = pclose(f); - if (status == -1) { - fprintf(stderr, "Failed to close popen child: %s\n", strerror(errno)); - return EXIT_FAILURE; - } - - // Check output is as expected. - const char kExpected[] = - "VLOG_IS_ON(8)? 0\nVLOG_IS_ON(7)? 1\nVLOG_IS_ON(6)? 1\n"; - if (strstr(buffer, kExpected) == nullptr) { - fprintf(stderr, "error: unexpected output from child: \"%.*s\"\n", - kBufferSizeBytes, buffer); - return EXIT_FAILURE; - } - bool ok = strstr(buffer, "VLOG(7)\n") != nullptr && - strstr(buffer, "VLOG(6)\n") != nullptr && - strstr(buffer, "VLOG(8)\n") == nullptr; - if (!ok) { - fprintf(stderr, "error: VLOG output not as expected: \"%.*s\"\n", - kBufferSizeBytes, buffer); - return EXIT_FAILURE; - } - - // Success! - return EXIT_SUCCESS; -} - -} // namespace -} // namespace tensorflow - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - bool do_vlog = argc >= 2 && strcmp(argv[1], "do_vlog") == 0; - return tensorflow::RealMain(argv[0], do_vlog); -} diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 1bf9c93101..ec077c4283 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -24,7 +24,7 @@ limitations under the License. // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "-rc1" +#define TF_VERSION_SUFFIX "" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/core/util/bcast.cc b/tensorflow/core/util/bcast.cc index 1eab7e3d02..47e6ddb3d8 100644 --- a/tensorflow/core/util/bcast.cc +++ b/tensorflow/core/util/bcast.cc @@ -68,7 +68,9 @@ BCast::BCast(const Vec& sx, const Vec& sy, const bool fewer_dims_optimization) { // Output shape. State curr = UNKNOWN; const int64 x_i = x[i]; // i-th dimension of x. + CHECK_GE(x_i, 0); const int64 y_i = y[i]; // i-th dimension of y. + CHECK_GE(y_i, 0); int64 o_i; // i-th dimension of the output. int64 bx_i; // i-th broadcast for x. int64 by_i; // i-th broadcast for y. diff --git a/tensorflow/core/util/device_name_utils.cc b/tensorflow/core/util/device_name_utils.cc index 90c3fed2e8..2d797c855a 100644 --- a/tensorflow/core/util/device_name_utils.cc +++ b/tensorflow/core/util/device_name_utils.cc @@ -116,6 +116,7 @@ bool DeviceNameUtils::ParseFullName(StringPiece fullname, ParsedName* p) { if (fullname == "/") { return true; } + StringPiece tmp; while (!fullname.empty()) { bool progress = false; if (str_util::ConsumePrefix(&fullname, "/job:")) { diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 1bfa4f83a3..118ff0d0d6 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -26,18 +26,23 @@ limitations under the License. #include "mkl_trans.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" - #include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/graph/mkl_graph_util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/util/padding.h" #include "tensorflow/core/util/tensor_format.h" +#include "tensorflow/core/graph/mkl_graph_util.h" #ifdef INTEL_MKL_DNN #include "mkldnn.hpp" + +using mkldnn::memory; +using mkldnn::reorder; +using mkldnn::primitive; +using mkldnn::padding_kind; +using mkldnn::engine; #endif // The file contains a number of utility classes and functions used by MKL @@ -51,6 +56,8 @@ namespace tensorflow { // Tensorflow tensor. typedef enum { W = 0, H = 1, C = 2, N = 3 } MklDims; +typedef enum { Dim_N = 0, Dim_C = 1, Dim_H = 2, Dim_W = 3, + Dim_O = 0, Dim_I = 1 } MklDnnDims; class MklShape { public: @@ -143,7 +150,9 @@ class MklShape { size_t GetDimension() const { return dimension_; } const size_t* GetSizes() const { return sizes_; } int64 dim_size(int index) const { return sizes_[index]; } - int64 tf_dim_size(int index) const { return sizes_[tf_to_mkl_dim_map_[index]]; } + int64 tf_dim_size(int index) const { + return sizes_[tf_to_mkl_dim_map_[index]]; + } const size_t* GetStrides() const { return strides_; } const size_t* GetTfToMklDimMap() const { return tf_to_mkl_dim_map_; } size_t tf_dim_idx(int index) const { return tf_to_mkl_dim_map_[index]; } @@ -227,7 +236,8 @@ class MklShape { (IS_MKL_TENSOR_OFFSET + sizeof(size_t)) // Location of dimension_ // Location of sizes. Note dim is not used here, left here // to make macros consistent. -#define SIZES_OFFSET(dims) (DIMS_OFFSET + sizeof(size_t)) +#define SIZES_OFFSET(dims) \ + (DIMS_OFFSET + sizeof(size_t)) #define STRIDES_OFFSET(dims) \ (SIZES_OFFSET(dims) + dims * sizeof(size_t)) // Location of strides #define MKL_LAYOUT_OFFSET(dims) \ @@ -309,6 +319,266 @@ class MklShape { nullptr; // TF dimension corresponding to this MKL dimension }; +#ifdef INTEL_MKL_DNN + +// Forward decl +TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format); + +class MklDnnShape { + private: + typedef struct { + /// Flag to indicate if the tensor is an MKL tensor or not + bool is_mkl_tensor_ = false; + /// Number of dimensions in Tensorflow format + size_t dimension_ = 0; + /// Required by MKLDNN for conversions + mkldnn_dims_t sizes_; // Required by MKL for conversions + memory::format tf_data_format_ = memory::format::format_undef; + memory::data_type T_ = memory::data_type::data_undef; + // MKL layout + mkldnn_memory_desc_t mkl_md_; + /// TF dimension corresponding to this MKL dimension + mkldnn_dims_t map_; + } MklShapeData; + MklShapeData data_; + + typedef std::remove_extent::type mkldnn_dim_t; +#define INVALID_DIM_SIZE -1 + + + public: + MklDnnShape() { + for (size_t i = 0; i < sizeof(data_.sizes_) / + sizeof(data_.sizes_[0]); ++i) { + data_.sizes_[i] = -1; + } + for (size_t i = 0; i < sizeof(data_.map_) / + sizeof(data_.map_[0]); ++i) { + data_.map_[i] = -1; + } + } + + ~MklDnnShape() {} + TF_DISALLOW_COPY_AND_ASSIGN(MklDnnShape); // Cannot copy + + inline const bool IsMklTensor() const { return data_.is_mkl_tensor_; } + inline void SetMklTensor(bool is_mkl_tensor) { + data_.is_mkl_tensor_ = is_mkl_tensor; + } + + inline void SetDimensions(const size_t dimension) { + data_.dimension_ = dimension; + } + inline size_t GetDimension(char dimension)const { + int index = GetMklDnnTensorDimIndex(dimension); + CHECK(index >= 0 && index < this->GetDimension()) + << "Invalid index from the dimension: " << index << ", " << dimension; + return this->DimSize(index); + } + + inline int32 GetMklDnnTensorDimIndex(char dimension)const { + switch (dimension) { + case 'N': + return MklDnnDims::Dim_N; + case 'C': + return MklDnnDims::Dim_C; + case 'H': + return MklDnnDims::Dim_H; + case 'W': + return MklDnnDims::Dim_W; + default: + LOG(FATAL) << "Invalid dimension: " << dimension; + return -1; // Avoid compiler warning about missing return value + } + } + + inline size_t GetDimension() const { return data_.dimension_; } + inline const int* GetSizes() const { + return reinterpret_cast(&data_.sizes_[0]); + } + + // Returns an mkldnn::memory::dims object that contains the sizes of this + // MklDnnShape object. + inline memory::dims GetSizesAsMklDnnDims() const { + memory::dims retVal; + if (data_.is_mkl_tensor_) { + int dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]); + for (size_t i = 0 ; i < dimensions; i++) { + if (data_.sizes_[i] != INVALID_DIM_SIZE) + retVal.push_back(data_.sizes_[i]); + } + } else { + CHECK_EQ(data_.is_mkl_tensor_, true); + } + return retVal; + } + + inline int64 DimSize(int index) const { + CHECK_LT(index, sizeof(data_.sizes_)/sizeof(data_.sizes_[0])); + return data_.sizes_[index]; + } + + /// Return TensorShape that describes the Tensorflow shape of the tensor + /// represented by this MklShape. + inline TensorShape GetTfShape() { + CHECK_EQ(data_.is_mkl_tensor_, true); + + std::vector shape(data_.dimension_, -1); + for (size_t idx = 0; idx < data_.dimension_; ++idx) { + shape[idx] = data_.sizes_[TfDimIdx(idx)]; + } + + TensorShape ts; + bool ret = TensorShapeUtils::MakeShape(shape, &ts).ok(); + CHECK_EQ(ret, true); + return ts; + } + + inline void SetElemType(memory::data_type dt) { data_.T_ = dt; } + inline const memory::data_type GetElemType() { return data_.T_; } + + inline void SetMklLayout(memory::primitive_desc* pd) { + CHECK_NOTNULL(pd); + data_.mkl_md_ = pd->desc().data; + } + inline const memory::desc GetMklLayout() const { + return memory::desc(data_.mkl_md_); + } + + inline memory::format GetTfDataFormat() const { + return data_.tf_data_format_; + } + /// We don't create primitive_descriptor for TensorFlow layout now. + /// We use lazy evaluation and create it only when needed. + inline void SetTfLayout(size_t dims, const memory::dims& sizes, + memory::format format) { + CHECK_EQ(dims, sizes.size()); + data_.dimension_ = dims; + for (size_t ii = 0; ii < dims; ii++) { + data_.sizes_[ii] = sizes[ii]; + } + data_.tf_data_format_ = format; + SetTfDimOrder(dims, format); + } + inline const memory::desc GetTfLayout() const { + memory::dims dims; + for (size_t ii = 0; ii < data_.dimension_; ii++) { + dims.push_back(data_.sizes_[ii]); + } + return memory::desc(dims, data_.T_, data_.tf_data_format_); + } + inline const memory::desc GetCurLayout() const { + return IsMklTensor() ? GetMklLayout() : GetTfLayout(); + } + + // nhasabni - I've removed SetTfDimOrder that was setting default order in + // case of MKL-ML. We don't need a case of default dimension order because + // when an operator that does not get data_format attribute gets all inputs + // in Tensorflow format, it will produce output in Tensorflow format. + inline void SetTfDimOrder(const size_t dimension, const mkldnn_dims_t map) { + CHECK(dimension == data_.dimension_); + for (size_t ii = 0; ii < dimension; ii++) { + data_.map_[ii] = map[ii]; + } + } + + inline void SetTfDimOrder(const size_t dimension, TensorFormat data_format) { + // TODO(nhasabni): Why do we restrict this to 4D? + CHECK_EQ(dimension, 4); + CHECK(dimension == data_.dimension_); + data_.map_[GetTensorDimIndex<2>(data_format, 'W')] = MklDnnDims::Dim_W; + data_.map_[GetTensorDimIndex<2>(data_format, 'H')] = MklDnnDims::Dim_H; + data_.map_[GetTensorDimIndex<2>(data_format, 'C')] = MklDnnDims::Dim_C; + data_.map_[GetTensorDimIndex<2>(data_format, 'N')] = MklDnnDims::Dim_N; + } + + inline void SetTfDimOrder(const size_t dimension, memory::format format) { + TensorFormat data_format = MklDnnDataFormatToTFDataFormat(format); + SetTfDimOrder(dimension, data_format); + } + + inline const mkldnn_dim_t* GetTfToMklDimMap() const { + return &data_.map_[0]; + } + inline size_t TfDimIdx(int index) const { return data_.map_[index]; } + inline int64 TfDimSize(int index) const { + return data_.sizes_[TfDimIdx(index)]; + } + + /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd' + /// corresponds to MKL's Channel dimension. + inline bool IsMklChannelDim(int d) const { + return TfDimIdx(d) == MklDnnDims::Dim_C; + } + /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd' + /// corresponds to MKL's Batch dimension. + inline bool IsMklBatchDim(int d) const { + return TfDimIdx(d) == MklDnnDims::Dim_N; + } + /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd' + /// corresponds to MKL's Width dimension. + inline bool IsMklWidthDim(int d) const { + return TfDimIdx(d) == MklDnnDims::Dim_W; + } + /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd' + /// corresponds to MKL's Height dimension. + inline bool IsMklHeightDim(int d) const { + return TfDimIdx(d) == MklDnnDims::Dim_H; + } + + /// Check if the TF-Mkl dimension ordering map specifies if the input + /// tensor is in NCHW format. + inline bool IsTensorInNCHWFormat() const { + TensorFormat data_format = FORMAT_NCHW; + return (IsMklBatchDim(GetTensorDimIndex<2>(data_format, 'N')) && + IsMklChannelDim(GetTensorDimIndex<2>(data_format, 'C')) && + IsMklHeightDim(GetTensorDimIndex<2>(data_format, 'H')) && + IsMklWidthDim(GetTensorDimIndex<2>(data_format, 'W'))); + } + + /// Check if the TF-Mkl dimension ordering map specifies if the input + /// tensor is in NHWC format. + inline bool IsTensorInNHWCFormat() const { + TensorFormat data_format = FORMAT_NHWC; + return (IsMklBatchDim(GetTensorDimIndex<2>(data_format, 'N')) && + IsMklChannelDim(GetTensorDimIndex<2>(data_format, 'C')) && + IsMklHeightDim(GetTensorDimIndex<2>(data_format, 'H')) && + IsMklWidthDim(GetTensorDimIndex<2>(data_format, 'W'))); + } + + /// The following methods are used for serializing and de-serializing the + /// contents of the mklshape object. + /// The data is serialized in this order + /// is_mkl_tensor_ : dimension_ : sizes_ : map_: format_ : T_ : mkl_pd_; + + /// Size of buffer to hold the serialized object, the size is computed by + /// following above mentioned order + inline size_t GetSerializeBufferSize() const { + return sizeof(MklShapeData); + } + + void SerializeMklDnnShape(unsigned char* buf, size_t buf_size) const { + CHECK(buf_size >= GetSerializeBufferSize()) + << "Buffer size is too small to SerializeMklDnnShape"; + *reinterpret_cast(buf) = data_; + } + + void DeSerializeMklDnnShape(const unsigned char* buf, size_t buf_size) { + // Make sure buffer holds at least is_mkl_tensor_. + CHECK(buf_size >= sizeof(data_.is_mkl_tensor_)) + << "Buffer size is too small in DeSerializeMklDnnShape"; + + const bool is_mkl_tensor = *reinterpret_cast(buf); + if (is_mkl_tensor) { // If it is an MKL Tensor then read the rest + CHECK(buf_size >= GetSerializeBufferSize()) + << "Buffer size is too small in DeSerializeMklDnnShape"; + data_ = *reinterpret_cast(buf); + } + } +}; + +#endif + // List of MklShape objects. Used in Concat/Split layers. typedef std::vector MklShapeList; @@ -347,6 +617,36 @@ inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor, return output_tensor; } +#ifdef INTEL_MKL_DNN +template +inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor, + const MklDnnShape& mkl_shape) { + Tensor output_tensor; + TensorShape output_shape; + +#if 0 + // TODO(nhasabni): need to implement + for (size_t j = 0; j < mkl_shape.GetDimension(); j++) { + // Outermost to innermost dimension + output_shape.AddDim(mkl_shape.GetSizes()[mkl_shape.tf_dim_idx(j)]); + } + + // Allocate output tensor. + context->allocate_temp(DataTypeToEnum::v(), output_shape, &output_tensor); + + dnnLayout_t output_layout = static_cast(mkl_shape.GetTfLayout()); + void* input_buffer = const_cast(mkl_tensor.flat().data()); + void* output_buffer = const_cast(output_tensor.flat().data()); + + if (mkl_tensor.NumElements() != 0) { + mkl_shape.GetConvertedFlatData(output_layout, input_buffer, output_buffer); + } +#endif + + return output_tensor; +} +#endif + // Get the MKL shape from the second string tensor inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) { mklshape->DeSerializeMklShape( @@ -359,6 +659,20 @@ inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) { sizeof(uint8)); } +#ifdef INTEL_MKL_DNN +inline void GetMklShape(OpKernelContext* ctext, int n, + MklDnnShape* mklshape) { + mklshape->DeSerializeMklDnnShape( + ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs())) + .flat() + .data(), + ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs())) + .flat() + .size() * + sizeof(uint8)); +} +#endif + // Gets the actual input inline const Tensor& MklGetInput(OpKernelContext* ctext, int n) { return ctext->input(GetTensorDataIndex(n, ctext->num_inputs())); @@ -382,6 +696,27 @@ inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name, } } +#ifdef INTEL_MKL_DNN +/// Get shape of input tensor pointed by 'input_idx' in TensorShape format. +/// If the input tensor is in MKL layout, then obtains TensorShape from +/// MklShape. +inline TensorShape GetTfShape(OpKernelContext* context, + size_t input_idx) { + // Sanity check. + CHECK_NOTNULL(context); + CHECK_LT(input_idx, context->num_inputs()); + + MklDnnShape input_mkl_shape; + GetMklShape(context, input_idx, &input_mkl_shape); + if (input_mkl_shape.IsMklTensor()) { + return input_mkl_shape.GetTfShape(); + } else { + const Tensor& t = MklGetInput(context, input_idx); + return t.shape(); + } +} +#endif + // Allocate the second output tensor that will contain // the MKL shape serialized inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, @@ -397,6 +732,23 @@ inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, second_tensor->flat().size() * sizeof(uint8)); } +#ifdef INTEL_MKL_DNN +// Allocate the second output tensor that will contain +// the MKL shape serialized +inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, + const MklDnnShape& mkl_shape) { + Tensor* second_tensor = nullptr; + TensorShape second_shape; + second_shape.AddDim(mkl_shape.GetSerializeBufferSize()); + OP_REQUIRES_OK(ctext, ctext->allocate_output( + GetTensorMetaDataIndex(n, ctext->num_outputs()), + second_shape, &second_tensor)); + mkl_shape.SerializeMklDnnShape( + second_tensor->flat().data(), + second_tensor->flat().size() * sizeof(uint8)); +} +#endif + // Allocate the output tensor, create a second output tensor that will contain // the MKL shape serialized inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, @@ -417,9 +769,43 @@ inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, second_tensor->flat().size() * sizeof(uint8)); } +#ifdef INTEL_MKL_DNN +// Allocate the output tensor, create a second output tensor that will contain +// the MKL shape serialized +inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, + Tensor** output, + const TensorShape& tf_shape, + const MklDnnShape& mkl_shape) { + Tensor* second_tensor = nullptr; + TensorShape second_shape; + second_shape.AddDim(mkl_shape.GetSerializeBufferSize()); + OP_REQUIRES_OK( + ctext, ctext->allocate_output(GetTensorDataIndex(n, ctext->num_outputs()), + tf_shape, output)); + OP_REQUIRES_OK(ctext, ctext->allocate_output( + GetTensorMetaDataIndex(n, ctext->num_outputs()), + second_shape, &second_tensor)); + mkl_shape.SerializeMklDnnShape( + second_tensor->flat().data(), + second_tensor->flat().size() * sizeof(uint8)); +} +#endif + // Allocates a temp tensor and returns the data buffer for temporary storage. // Currently -// we only support F32, will need to templatize if other types are added +#ifdef INTEL_MKL_DNN +template +inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, + const memory::primitive_desc& pd, void** buf_out) { + TensorShape tf_shape; + + tf_shape.AddDim(pd.get_size() / sizeof(T) + 1); + OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::v(), + tf_shape, tensor_out)); + *buf_out = static_cast(tensor_out->flat().data()); +} +#endif + inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, dnnLayout_t lt_buff, void** buf_out) { TensorShape tf_shape; @@ -435,7 +821,7 @@ inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, template inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, - TensorShape tf_shape) { + TensorShape tf_shape) { OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::v(), tf_shape, tensor_out)); } @@ -669,6 +1055,8 @@ inline bool MklCompareShapes(const TensorShape* input_shape_0, return true; } +// These functions do not compile with MKL-DNN since mkl.h is missing. +// We may need to remove them later. // TODO(intel_tf): Remove this routine when faster MKL layout conversion is // out. inline void MklNHWCToNCHW(const Tensor& input, Tensor** output) { @@ -707,18 +1095,11 @@ inline void MklNCHWToNHWC(const Tensor& input, Tensor** output) { #ifdef INTEL_MKL_DNN -using mkldnn::engine; -using mkldnn::memory; -using mkldnn::padding_kind; -using mkldnn::primitive; -using mkldnn::reorder; - /// Return MKL-DNN data type (memory::data_type) for input type T /// /// @input None /// @return memory::data_type corresponding to type T -template -static memory::data_type MklDnnType(); +template static memory::data_type MklDnnType(); /// Instantiation for float type. Add similar instantiations for other /// type if needed. @@ -733,15 +1114,26 @@ memory::data_type MklDnnType() { /// @return: memory::format corresponding to TensorFlow data format; /// Fails with an error if invalid data format. inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) { - if (format == FORMAT_NHWC) - return memory::format::nhwc; - else if (format == FORMAT_NCHW) - return memory::format::nchw; - TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format")); + if (format == FORMAT_NHWC) return memory::format::nhwc; + else if (format == FORMAT_NCHW) return memory::format::nchw; + TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, + "Unsupported data format")); // Return to get rid of compiler warning return memory::format::format_undef; } +/// Map MKL-DNN data format to TensorFlow's data format +/// +/// @input: memory::format +/// @return: Tensorflow data format corresponding to memory::format +/// Fails with an error if invalid data format. +inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) { + if (format == memory::format::nhwc) return FORMAT_NHWC; + else if (format == memory::format::nchw) return FORMAT_NCHW; + TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, + "Unsupported data format")); +} + /// Map TensorShape object into memory::dims required by MKL-DNN /// /// This function will simply map input TensorShape into MKL-DNN dims @@ -753,7 +1145,7 @@ inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) { /// @return memory::dims corresponding to TensorShape inline memory::dims TFShapeToMklDnnDims(const TensorShape& shape) { memory::dims dims(shape.dims()); - for (unsigned int d = 0; d < shape.dims(); ++d) { + for (int d = 0; d < shape.dims(); ++d) { dims[d] = shape.dim_size(d); } return dims; @@ -769,7 +1161,7 @@ inline memory::dims TFShapeToMklDnnDims(const TensorShape& shape) { /// @input TensorShape object in shape /// @return memory::dims in MKL-DNN required NCHW format inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape, - TensorFormat format) { + TensorFormat format) { // Check validity of format. CHECK_NE(TFDataFormatToMklDnnDataFormat(format), memory::format::format_undef); @@ -783,6 +1175,43 @@ inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape, return memory::dims({n, c, h, w}); } +/// Map MklDnn memory::dims object into TensorShape object. +/// +/// This function will simply map input shape in MKL-DNN memory::dims format +/// in Tensorflow's TensorShape object by perserving dimension order. +/// +/// @input MKL-DNN memory::dims object +/// @output TensorShape corresponding to memory::dims +inline TensorShape MklDnnDimsToTFShape(const memory::dims& dims) { + std::vector shape(dims.size(), -1); + for (int d = 0; d < dims.size(); d++) { + shape[d] = dims[d]; + } + + TensorShape ret; + CHECK_EQ(TensorShapeUtils::MakeShape(shape, &ret).ok(), true); + return ret; +} + +/// Function to calculate strides given tensor shape in Tensorflow order +/// E.g., if dims_tf_order is {1, 2, 3, 4}, then as per Tensorflow convention, +/// dimesion with size 1 is outermost dimension; while dimension with size 4 is +/// innermost dimension. So strides for this tensor would be {4 * 3 * 2, +/// 4 * 3, 4, 1}, i.e., {24, 12, 4, 1}. +/// +/// @input Tensorflow shape in memory::dims type +/// @return memory::dims containing strides for the tensor. +inline memory::dims CalculateTFStrides(const memory::dims& dims_tf_order) { + CHECK_GT(dims_tf_order.size(), 0); + memory::dims strides(dims_tf_order.size()); + int last_dim_idx = dims_tf_order.size() - 1; + strides[last_dim_idx] = 1; + for (int d = last_dim_idx - 1; d >= 0; d--) { + strides[d] = strides[d + 1] * dims_tf_order[d + 1]; + } + return strides; +} + inline padding_kind TFPaddingToMklDnnPadding(Padding pad) { // MKL-DNN only supports zero padding. return padding_kind::zero; @@ -808,23 +1237,21 @@ class MklDnnData { const engine* cpu_engine_; public: - explicit MklDnnData(const engine* e) - : user_memory_(nullptr), - reorder_memory_(nullptr), - op_md_(nullptr), - cpu_engine_(e) {} + explicit MklDnnData(const engine* e) : user_memory_(nullptr), + reorder_memory_(nullptr), + op_md_(nullptr), cpu_engine_(e) {} ~MklDnnData() { cpu_engine_ = nullptr; // We don't own this. - delete (user_memory_); - delete (reorder_memory_); - delete (op_md_); + delete(user_memory_); + delete(reorder_memory_); + delete(op_md_); } - void* GetTensorBuffer(const Tensor* tensor) { + inline void* GetTensorBuffer(const Tensor* tensor) const { CHECK_NOTNULL(tensor); - return const_cast( - static_cast(tensor->flat().data())); + return const_cast(static_cast( + tensor->flat().data())); } /// Set user memory primitive using specified dimensions, memory format and @@ -835,35 +1262,83 @@ class MklDnnData { /// an operation. E.g., filter of Conv2D is of shape {1, 2, 3, 4}, and /// memory format HWIO, and the buffer that contains actual values is /// pointed by data_buffer. - void SetUsrMem(memory::dims dim, memory::format fm, void* data_buffer) { - CHECK_NOTNULL(data_buffer); - CHECK_NOTNULL(cpu_engine_); - // TODO(nhasabni): can we remove dynamic memory allocation? - user_memory_ = - new memory(memory::primitive_desc( - memory::desc(dim, MklDnnType(), fm), *cpu_engine_), - data_buffer); + inline void SetUsrMem(const memory::dims& dim, memory::format fm, + void* data_buffer = nullptr) { + auto md = memory::desc(dim, MklDnnType(), fm); + SetUsrMem(md, data_buffer); } - void SetUsrMem(memory::dims dim, memory::format fm, const Tensor* tensor) { + inline void SetUsrMem(const memory::dims& dim, memory::format fm, + const Tensor* tensor) { CHECK_NOTNULL(tensor); SetUsrMem(dim, fm, GetTensorBuffer(tensor)); } + /// Helper function to create memory descriptor in Blocked format + /// + /// @input: Tensor dimensions + /// @input: strides corresponding to dimensions. One can use utility + /// function such as CalculateTFStrides to compute strides + /// for given dimensions. + /// @return: memory::desc object corresponding to blocked memory format + /// for given dimensions and strides. + static inline memory::desc CreateBlockedMemDesc(const memory::dims& dim, + const memory::dims& strides) { + CHECK_EQ(dim.size(), strides.size()); + + // We have to construct memory descriptor in a C style. This is not at all + // ideal but MKLDNN does not offer any API to construct descriptor in + // blocked format except a copy constructor that accepts + // mkldnn_memory_desc_t. + mkldnn_memory_desc_t md; + md.primitive_kind = mkldnn_memory; + md.ndims = dim.size(); + md.format = mkldnn_blocked; + md.data_type = memory::convert_to_c(MklDnnType()); + + for (size_t i = 0; i < dim.size(); i++) { + md.layout_desc.blocking.block_dims[i] = 1; + md.layout_desc.blocking.strides[1][i] = 1; + md.layout_desc.blocking.strides[0][i] = strides[i]; + md.layout_desc.blocking.padding_dims[i] = dim[i]; + md.layout_desc.blocking.offset_padding_to_data[i] = 0; + md.dims[i] = dim[i]; + } + md.layout_desc.blocking.offset_padding = 0; + + return memory::desc(md); + } + + /// A version of SetUsrMem call that allows user to create memory in blocked + /// format. So in addition to accepting dimensions, it also accepts strides. + /// This allows user to create memory for tensor in a format that is not + /// supported by MKLDNN. E.g., MKLDNN does not support tensor format for 6 + /// dimensional tensor as a native format. But by using blocked format, a user + /// can create memory for 6D tensor. + inline void SetUsrMem(const memory::dims& dim, const memory::dims& strides, + void* data_buffer = nullptr) { + CHECK_EQ(dim.size(), strides.size()); + auto blocked_md = MklDnnData::CreateBlockedMemDesc(dim, strides); + SetUsrMem(blocked_md, data_buffer); + } + + inline void SetUsrMem(const memory::dims& dim, const memory::dims& strides, + const Tensor* tensor) { + CHECK_NOTNULL(tensor); + SetUsrMem(dim, strides, GetTensorBuffer(tensor)); + } + /// A version of function to set user memory primitive that accepts memory /// descriptor directly, instead of accepting dimensions and format. This /// function is more generic that the one above, but the function above is /// sufficient in most cases. - void SetUsrMem(memory::desc md, void* data_buffer) { - CHECK_NOTNULL(data_buffer); - CHECK_NOTNULL(cpu_engine_); - // TODO(nhasabni): can we remove dynamic memory allocation? - user_memory_ = - new memory(memory::primitive_desc(md, *cpu_engine_), data_buffer); + inline void SetUsrMem(const memory::desc& md, void* data_buffer = nullptr) { + auto pd = memory::primitive_desc(md, *cpu_engine_); + SetUsrMem(pd, data_buffer); } /// A version of SetUsrMem with memory descriptor and tensor - void SetUsrMem(memory::desc md, const Tensor* tensor) { + inline void SetUsrMem(const memory::desc& md, const Tensor* tensor) { CHECK_NOTNULL(tensor); SetUsrMem(md, GetTensorBuffer(tensor)); } @@ -872,41 +1347,60 @@ class MklDnnData { /// descriptor directly, instead of accepting dimensions and format. This /// function is more generic that the one above, but the function above is /// sufficient in most cases. - void SetUsrMem(memory::primitive_desc pd, void* data_buffer) { - CHECK_NOTNULL(data_buffer); + inline void SetUsrMem(const memory::primitive_desc& pd, + void* data_buffer = nullptr) { CHECK_NOTNULL(cpu_engine_); // TODO(nhasabni): can we remove dynamic memory allocation? - user_memory_ = new memory(pd, data_buffer); + if (data_buffer) { + user_memory_ = new memory(pd, data_buffer); + } else { + user_memory_ = new memory(pd); + } } /// A version of SetUsrMem with primitive descriptor and tensor - void SetUsrMem(memory::primitive_desc pd, const Tensor* tensor) { + inline void SetUsrMem(const memory::primitive_desc& pd, + const Tensor* tensor) { CHECK_NOTNULL(tensor); SetUsrMem(pd, GetTensorBuffer(tensor)); } /// Get function for user memory primitive. - const memory* GetUsrMem() const { return user_memory_; } + inline const memory* GetUsrMem() const { return user_memory_; } /// Get function for primitive descriptor of user memory primitive. - const memory::primitive_desc GetUsrMemPrimDesc() const { + inline const memory::primitive_desc GetUsrMemPrimDesc() const { CHECK_NOTNULL(user_memory_); return user_memory_->get_primitive_desc(); } /// Get function for descriptor of user memory. - memory::desc GetUsrMemDesc() { + inline memory::desc GetUsrMemDesc() { // This is ugly. Why MKL-DNN does not provide desc() method of const type?? const memory::primitive_desc pd = GetUsrMemPrimDesc(); return const_cast(&pd)->desc(); } /// Get function for data buffer of user memory primitive. - void* GetUsrMemDataHandle() const { + inline void* GetUsrMemDataHandle() const { CHECK_NOTNULL(user_memory_); return user_memory_->get_data_handle(); } + /// Set function for data buffer of user memory primitive. + inline void* SetUsrMemDataHandle(void* data_buffer) { + CHECK_NOTNULL(user_memory_); + CHECK_NOTNULL(data_buffer); + return user_memory_->set_data_handle(data_buffer); + } + + /// Set function for data buffer of user memory primitive. + inline void SetUsrMemDataHandle(const Tensor* tensor) { + CHECK_NOTNULL(user_memory_); + CHECK_NOTNULL(tensor); + user_memory_->set_data_handle(GetTensorBuffer(tensor)); + } + /// Get the memory primitive for input and output of an op. If inputs /// to an op require reorders, then this function returns memory primitive /// for reorder. Otherwise, it will return memory primitive for user memory. @@ -915,7 +1409,7 @@ class MklDnnData { /// execute Conv2D, we need memory primitive for I and F. Buf if reorder is /// required for I and F (say I_r is reorder primitive for I; F_r is reorder /// primitive for F), then we need I_r and F_r to perform Conv2D. - const memory& GetOpMem() const { + inline const memory& GetOpMem() const { return reorder_memory_ ? *reorder_memory_ : *user_memory_; } @@ -923,13 +1417,32 @@ class MklDnnData { /// format. E.g., For Conv2D, the dimensions would be same as user dimensions /// but memory::format would be mkldnn::any because we want MKL-DNN to choose /// best layout/format for given input dimensions. - void SetOpMemDesc(const memory::dims& dim, memory::format fm) { + inline void SetOpMemDesc(const memory::dims& dim, memory::format fm) { // TODO(nhasabni): can we remove dynamic memory allocation? op_md_ = new memory::desc(dim, MklDnnType(), fm); } /// Get function for memory descriptor for an operation - const memory::desc& GetOpMemDesc() const { return *op_md_; } + inline const memory::desc& GetOpMemDesc() const { return *op_md_; } + + /// Predicate that checks if we need to reorder user's memory into memory + /// pointed by op_pd. + /// + /// @input: op_pd - memory primitive descriptor of the given input of an + /// operation + /// @return: true in case reorder of input is needed; false, otherwise. + inline bool IsReorderNeeded(const memory::primitive_desc& op_pd) const { + CHECK_NOTNULL(user_memory_); + return op_pd != user_memory_->get_primitive_desc(); + } + + /// Function to create a reorder from memory pointed by from to memory pointed + /// by to. Returns created primitive. + inline primitive CreateReorder(const memory* from, const memory* to) const { + CHECK_NOTNULL(from); + CHECK_NOTNULL(to); + return reorder(*from, *to); + } /// Function to handle input reordering /// @@ -945,19 +1458,62 @@ class MklDnnData { /// operation /// @input: net - net to which to add reorder primitive in case it is needed. /// @return: true in case reorder of input is needed; false, otherwise. - bool CheckReorderToOpMem(const memory::primitive_desc& op_pd, - std::vector* net) { + inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd, + std::vector* net) { CHECK_NOTNULL(net); CHECK_NOTNULL(user_memory_); - if (op_pd != user_memory_->get_primitive_desc()) { + if (IsReorderNeeded(op_pd)) { // TODO(nhasabni): can we remove dynamic memory allocation? reorder_memory_ = new memory(op_pd); - net->push_back(reorder(*user_memory_, *reorder_memory_)); + net->push_back(CreateReorder(user_memory_, reorder_memory_)); + return true; + } + return false; + } + + /// Overloaded version of above function that accepts memory buffer + /// where output of reorder needs to be stored. + /// + /// @input: op_pd - memory primitive descriptor of the given input of an + /// operation + /// @reorder_data_handle - memory buffer where output of reorder needs to be + /// stored. Primitive does not check if buffer is + /// enough size to write. + /// @input: net - net to which to add reorder primitive in case it is needed. + /// @return: true in case reorder of input is needed; false, otherwise. + inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd, + void* reorder_data_handle, + std::vector* net) { + CHECK_NOTNULL(net); + CHECK_NOTNULL(reorder_data_handle); + CHECK_NOTNULL(user_memory_); + if (IsReorderNeeded(op_pd)) { + // TODO(nhasabni): can we remove dynamic memory allocation? + reorder_memory_ = new memory(op_pd, reorder_data_handle); + net->push_back(CreateReorder(user_memory_, reorder_memory_)); return true; } return false; } + /// Another overloaded version of CheckReorderToOpMem that accepts Tensor + /// where output of reorder needs to be stored. + /// + /// @input: op_pd - memory primitive descriptor of the given input of an + /// operation + /// @reorder_tensor - Tensor whose buffer is to be used to store output of + /// reorder. Primitive does not check if buffer is + /// enough size to write. + /// @input: net - net to which to add reorder primitive in case it is needed. + /// @return: true in case reorder of input is needed; false, otherwise. + inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd, + Tensor* reorder_tensor, + std::vector* net) { + CHECK_NOTNULL(net); + CHECK_NOTNULL(reorder_tensor); + return CheckReorderToOpMem(op_pd, GetTensorBuffer(reorder_tensor), net); + } + /// Function to handle output reorder /// /// This function performs very similar functionality as input reordering @@ -970,9 +1526,10 @@ class MklDnnData { /// /// @input memory primitive descriptor for the given output of an operation /// @return: true in case reorder of output is needed; false, otherwise. - bool PrepareReorderToUserMemIfReq(const memory::primitive_desc& op_pd) { + inline bool PrepareReorderToUserMemIfReq( + const memory::primitive_desc& op_pd) { CHECK_NOTNULL(user_memory_); - if (op_pd != user_memory_->get_primitive_desc()) { + if (IsReorderNeeded(op_pd)) { // TODO(nhasabni): can we remove dynamic memory allocation? reorder_memory_ = new memory(op_pd); return true; @@ -987,11 +1544,11 @@ class MklDnnData { /// to the user-specified output buffer. /// /// @input: net - net to which to add reorder primitive - void InsertReorderToUserMem(std::vector* net) { + inline void InsertReorderToUserMem(std::vector* net) { CHECK_NOTNULL(net); CHECK_NOTNULL(user_memory_); CHECK_NOTNULL(reorder_memory_); - net->push_back(reorder(*reorder_memory_, *user_memory_)); + net->push_back(CreateReorder(reorder_memory_, user_memory_)); } }; diff --git a/tensorflow/core/util/mkl_util_test.cc b/tensorflow/core/util/mkl_util_test.cc new file mode 100644 index 0000000000..6aef3d86e9 --- /dev/null +++ b/tensorflow/core/util/mkl_util_test.cc @@ -0,0 +1,92 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifdef INTEL_MKL + +#include "tensorflow/core/util/mkl_util.h" + +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { + +#ifdef INTEL_MKL_DNN + +TEST(MklUtilTest, MklDnnTfShape) { + auto cpu_engine = engine(engine::cpu, 0); + MklDnnData a(&cpu_engine); + + const int N = 1, C = 2, H = 3, W = 4; + memory::dims a_dims = {N, C, H, W}; + MklDnnShape a_mkldnn_shape; + a_mkldnn_shape.SetMklTensor(true); + // Create TF layout in NCHW. + a_mkldnn_shape.SetTfLayout(a_dims.size(), a_dims, memory::format::nchw); + TensorShape a_tf_shape_nchw({N, C, H, W}); + TensorShape a_tf_shape_nhwc({N, H, W, C}); + TensorShape a_mkldnn_tf_shape = a_mkldnn_shape.GetTfShape(); + // Check that returned shape is in NCHW format. + EXPECT_EQ(a_tf_shape_nchw, a_mkldnn_tf_shape); + EXPECT_NE(a_tf_shape_nhwc, a_mkldnn_tf_shape); + + memory::dims b_dims = {N, C, H, W}; + MklDnnShape b_mkldnn_shape; + b_mkldnn_shape.SetMklTensor(true); + // Create TF layout in NHWC. + b_mkldnn_shape.SetTfLayout(b_dims.size(), b_dims, memory::format::nhwc); + TensorShape b_tf_shape_nhwc({N, H, W, C}); + TensorShape b_tf_shape_nchw({N, C, H, W}); + TensorShape b_mkldnn_tf_shape = b_mkldnn_shape.GetTfShape(); + // Check that returned shape is in NHWC format. + EXPECT_EQ(b_tf_shape_nhwc, b_mkldnn_tf_shape); + EXPECT_NE(b_tf_shape_nchw, b_mkldnn_tf_shape); +} + + +TEST(MklUtilTest, MklDnnBlockedFormatTest) { + // Let's create 2D tensor of shape {3, 4} with 3 being innermost dimension + // first (case 1) and then it being outermost dimension (case 2). + auto cpu_engine = engine(engine::cpu, 0); + + // Setting for case 1 + MklDnnData a(&cpu_engine); + memory::dims dim1 = {3, 4}; + memory::dims strides1 = {1, 3}; + a.SetUsrMem(dim1, strides1); + + memory::desc a_md1 = a.GetUsrMemDesc(); + EXPECT_EQ(a_md1.data.ndims, 2); + EXPECT_EQ(a_md1.data.dims[0], 3); + EXPECT_EQ(a_md1.data.dims[1], 4); + EXPECT_EQ(a_md1.data.format, mkldnn_blocked); + + // Setting for case 2 + MklDnnData b(&cpu_engine); + memory::dims dim2 = {3, 4}; + memory::dims strides2 = {4, 1}; + b.SetUsrMem(dim2, strides2); + + memory::desc b_md2 = b.GetUsrMemDesc(); + EXPECT_EQ(b_md2.data.ndims, 2); + EXPECT_EQ(b_md2.data.dims[0], 3); + EXPECT_EQ(b_md2.data.dims[1], 4); + EXPECT_EQ(b_md2.data.format, mkldnn_blocked); +} + +#endif // INTEL_MKL_DNN +} // namespace +} // namespace tensorflow + +#endif // INTEL_MKL diff --git a/tensorflow/docs_src/api_guides/python/threading_and_queues.md b/tensorflow/docs_src/api_guides/python/threading_and_queues.md index ab95ce0af9..8ad4c4c075 100644 --- a/tensorflow/docs_src/api_guides/python/threading_and_queues.md +++ b/tensorflow/docs_src/api_guides/python/threading_and_queues.md @@ -3,7 +3,7 @@ Note: In versions of TensorFlow before 1.2, we recommended using multi-threaded, queue-based input pipelines for performance. Beginning with TensorFlow 1.4, however, we recommend using the `tf.data` module instead. (See -[Datasets](datasets) for details. In TensorFlow 1.2 and 1.3, the module was +@{$datasets$Datasets} for details. In TensorFlow 1.2 and 1.3, the module was called `tf.contrib.data`.) The `tf.data` module offers an easier-to-use interface for constructing efficient input pipelines. Furthermore, we've stopped developing the old multi-threaded, queue-based input pipelines. We've retained diff --git a/tensorflow/docs_src/get_started/get_started.md b/tensorflow/docs_src/get_started/get_started.md index 8409962744..be14ab4026 100644 --- a/tensorflow/docs_src/get_started/get_started.md +++ b/tensorflow/docs_src/get_started/get_started.md @@ -272,7 +272,7 @@ train = optimizer.minimize(loss) ``` ```python -sess.run(init) # reset values to incorrect defaults. +sess.run(init) # reset variables to incorrect defaults. for i in range(1000): sess.run(train, {x: [1, 2, 3, 4], y: [0, -1, -2, -3]}) @@ -317,7 +317,7 @@ y_train = [0, -1, -2, -3] # training loop init = tf.global_variables_initializer() sess = tf.Session() -sess.run(init) # reset values to wrong +sess.run(init) # initialize variables with incorrect defaults. for i in range(1000): sess.run(train, {x: x_train, y: y_train}) @@ -383,7 +383,7 @@ train_input_fn = tf.estimator.inputs.numpy_input_fn( eval_input_fn = tf.estimator.inputs.numpy_input_fn( {"x": x_eval}, y_eval, batch_size=4, num_epochs=1000, shuffle=False) -# We can invoke 1000 training steps by invoking the method and passing the +# We can invoke 1000 training steps by invoking the method and passing the # training data set. estimator.train(input_fn=input_fn, steps=1000) diff --git a/tensorflow/docs_src/get_started/input_fn.md b/tensorflow/docs_src/get_started/input_fn.md index 9d3af5d96a..0db5c6143a 100644 --- a/tensorflow/docs_src/get_started/input_fn.md +++ b/tensorflow/docs_src/get_started/input_fn.md @@ -191,7 +191,7 @@ import pandas as pd def get_input_fn_from_pandas(data_set, num_epochs=None, shuffle=True): return tf.estimator.inputs.pandas_input_fn( - x=pdDataFrame(...), + x=pd.DataFrame(...), y=pd.Series(...), num_epochs=num_epochs, shuffle=shuffle) @@ -267,8 +267,8 @@ tf.logging.set_verbosity(tf.logging.INFO) Define the column names for the data set in `COLUMNS`. To distinguish features from the label, also define `FEATURES` and `LABEL`. Then read the three CSVs -(@{tf.train}, -@{tf.test}, and +([train](http://download.tensorflow.org/data/boston_train.csv), +[test](http://download.tensorflow.org/data/boston_test.csv), and [predict](http://download.tensorflow.org/data/boston_predict.csv)) into _pandas_ `DataFrame`s: diff --git a/tensorflow/docs_src/get_started/monitors.md b/tensorflow/docs_src/get_started/monitors.md deleted file mode 100644 index 5606e95365..0000000000 --- a/tensorflow/docs_src/get_started/monitors.md +++ /dev/null @@ -1,406 +0,0 @@ -# Logging and Monitoring Basics with tf.contrib.learn - -When training a model, it’s often valuable to track and evaluate progress in -real time. In this tutorial, you’ll learn how to use TensorFlow’s logging -capabilities and the `Monitor` API to audit the in-progress training of a neural -network classifier for categorizing irises. This tutorial builds on the code -developed in @{$estimator$tf.estimator Quickstart} so if you -haven't yet completed that tutorial, you may want to explore it first, -especially if you're looking for an intro/refresher on tf.contrib.learn basics. - -## Setup {#setup} - -For this tutorial, you'll be building upon the following code from -@{$estimator$tf.estimator Quickstart}: - -```python -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import numpy as np -import tensorflow as tf - -# Data sets -IRIS_TRAINING = os.path.join(os.path.dirname(__file__), "iris_training.csv") -IRIS_TEST = os.path.join(os.path.dirname(__file__), "iris_test.csv") - -def main(unused_argv): - # Load datasets. - training_set = tf.contrib.learn.datasets.base.load_csv_with_header( - filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float32) - test_set = tf.contrib.learn.datasets.base.load_csv_with_header( - filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float32) - - # Specify that all features have real-value data - feature_columns = [tf.contrib.layers.real_valued_column("", dimension=4)] - - # Build 3 layer DNN with 10, 20, 10 units respectively. - classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns, - hidden_units=[10, 20, 10], - n_classes=3, - model_dir="/tmp/iris_model") - - # Fit model. - classifier.fit(x=training_set.data, - y=training_set.target, - steps=2000) - - # Evaluate accuracy. - accuracy_score = classifier.evaluate(x=test_set.data, - y=test_set.target)["accuracy"] - print('Accuracy: {0:f}'.format(accuracy_score)) - - # Classify two new flower samples. - new_samples = np.array( - [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=float) - y = list(classifier.predict(new_samples, as_iterable=True)) - print('Predictions: {}'.format(str(y))) - -if __name__ == "__main__": - tf.app.run() -``` - -Copy the above code into a file, and download the corresponding -[training](http://download.tensorflow.org/data/iris_training.csv) and -[test](http://download.tensorflow.org/data/iris_test.csv) data sets to the same -directory. - -In the following sections, you'll progressively make updates to the above code -to add logging and monitoring capabilities. Final code incorporating all updates -is [available for download -here](https://www.tensorflow.org/code/tensorflow/examples/tutorials/monitors/iris_monitors.py). - -## Overview - -The @{$estimator$tf.estimator Quickstart tutorial} walked through -how to implement a neural net classifier to categorize iris examples into one of -three species. - -But when [the code](#setup) from this tutorial is run, the output contains no -logging tracking how model training is progressing—only the results of the -`print` statements that were included: - -```none -Accuracy: 0.933333 -Predictions: [1 2] -``` - -Without any logging, model training feels like a bit of a black box; you can't -see what's happening as TensorFlow steps through gradient descent, get a sense -of whether the model is converging appropriately, or audit to determine whether -[early stopping](https://en.wikipedia.org/wiki/Early_stopping) might be -appropriate. - -One way to address this problem would be to split model training into multiple -`fit` calls with smaller numbers of steps in order to evaluate accuracy more -progressively. However, this is not recommended practice, as it greatly slows -down model training. Fortunately, tf.contrib.learn offers another solution: a -@{tf.contrib.learn.monitors$Monitor API} designed to help -you log metrics and evaluate your model while training is in progress. In the -following sections, you'll learn how to enable logging in TensorFlow, set up a -ValidationMonitor to do streaming evaluations, and visualize your metrics using -TensorBoard. - -## Enabling Logging with TensorFlow - -TensorFlow uses five different levels for log messages. In order of ascending -severity, they are `DEBUG`, `INFO`, `WARN`, `ERROR`, and `FATAL`. When you -configure logging at any of these levels, TensorFlow will output all log -messages corresponding to that level and all levels of higher severity. For -example, if you set a logging level of `ERROR`, you'll get log output containing -`ERROR` and `FATAL` messages, and if you set a level of `DEBUG`, you'll get log -messages from all five levels. - -By default, TensorFlow is configured at a logging level of `WARN`, but when -tracking model training, you'll want to adjust the level to `INFO`, which will -provide additional feedback as `fit` operations are in progress. - -Add the following line to the beginning of your code (right after your -`import`s): - -```python -tf.logging.set_verbosity(tf.logging.INFO) -``` - -Now when you run the code, you'll see additional log output like the following: - -```none -INFO:tensorflow:loss = 1.18812, step = 1 -INFO:tensorflow:loss = 0.210323, step = 101 -INFO:tensorflow:loss = 0.109025, step = 201 -``` - -With `INFO`-level logging, tf.contrib.learn automatically outputs [training-loss -metrics](https://en.wikipedia.org/wiki/Loss_function) to stderr after every 100 -steps. - -## Configuring a ValidationMonitor for Streaming Evaluation - -Logging training loss is helpful to get a sense whether your model is -converging, but what if you want further insight into what's happening during -training? tf.contrib.learn provides several high-level `Monitor`s you can attach -to your `fit` operations to further track metrics and/or debug lower-level -TensorFlow operations during model training, including: - -Monitor | Description -------------------- | ----------- -`CaptureVariable` | Saves a specified variable's values into a collection at every _n_ steps of training -`PrintTensor` | Logs a specified tensor's values at every _n_ steps of training -`SummarySaver` | Saves @{tf.Summary} [protocol buffers](https://developers.google.com/protocol-buffers/) for a given tensor using a @{tf.summary.FileWriter} at every _n_ steps of training -`ValidationMonitor` | Logs a specified set of evaluation metrics at every _n_ steps of training, and, if desired, implements early stopping under certain conditions - -### Evaluating Every *N* Steps - -For the iris neural network classifier, while logging training loss, you might -also want to simultaneously evaluate against test data to see how well the model -is generalizing. You can accomplish this by configuring a `ValidationMonitor` -with the test data (`test_set.data` and `test_set.target`), and setting how -often to evaluate with `every_n_steps`. The default value of `every_n_steps` is -`100`; here, set `every_n_steps` to `50` to evaluate after every 50 steps of -model training: - -```python -validation_monitor = tf.contrib.learn.monitors.ValidationMonitor( - test_set.data, - test_set.target, - every_n_steps=50) -``` - -Place this code right before the line instantiating the `classifier`. - -`ValidationMonitor`s rely on saved checkpoints to perform evaluation operations, -so you'll want to modify instantiation of the `classifier` to add a -@{tf.contrib.learn.RunConfig} that includes -`save_checkpoints_secs`, which specifies how many seconds should elapse between -checkpoint saves during training. Because the iris data set is quite small, and -thus trains quickly, it makes sense to set `save_checkpoints_secs` to 1 (saving -a checkpoint every second) to ensure a sufficient number of checkpoints: - -```python -classifier = tf.contrib.learn.DNNClassifier( - feature_columns=feature_columns, - hidden_units=[10, 20, 10], - n_classes=3, - model_dir="/tmp/iris_model", - config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1)) -``` - -NOTE: The `model_dir` parameter specifies an explicit directory -(`/tmp/iris_model`) for model data to be stored; this directory path will be -easier to reference later on than an autogenerated one. Each time you run the -code, any existing data in `/tmp/iris_model` will be loaded, and model training -will continue where it left off in the last run (e.g., running the script twice -in succession will execute 4000 steps during training—2000 during each -`fit` operation). To start over model training from scratch, delete -`/tmp/iris_model` before running the code. - -Finally, to attach your `validation_monitor`, update the `fit` call to include a -`monitors` param, which takes a list of all monitors to run during model -training: - -```python -classifier.fit(x=training_set.data, - y=training_set.target, - steps=2000, - monitors=[validation_monitor]) -``` - -Now, when you rerun the code, you should see validation metrics in your log -output, e.g.: - -```none -INFO:tensorflow:Validation (step 50): loss = 1.71139, global_step = 0, accuracy = 0.266667 -... -INFO:tensorflow:Validation (step 300): loss = 0.0714158, global_step = 268, accuracy = 0.966667 -... -INFO:tensorflow:Validation (step 1750): loss = 0.0574449, global_step = 1729, accuracy = 0.966667 -``` - -### Customizing the Evaluation Metrics with MetricSpec - -By default, if no evaluation metrics are specified, `ValidationMonitor` will log -both [loss](https://en.wikipedia.org/wiki/Loss_function) and accuracy, but you -can customize the list of metrics that will be run every 50 steps. To specify -the exact metrics you'd like to run in each evaluation pass, you can add a -`metrics` param to the `ValidationMonitor` constructor. `metrics` takes a dict -of key/value pairs, where each key is the name you'd like logged for the metric, -and the corresponding value is a -[`MetricSpec`](https://www.tensorflow.org/code/tensorflow/contrib/learn/python/learn/metric_spec.py) -object. - -The `MetricSpec` constructor accepts four parameters: - -* `metric_fn`. The function that calculates and returns the value of a metric. - This can be a predefined function available in the - @{tf.contrib.metrics} module, such as - @{tf.contrib.metrics.streaming_precision} or - @{tf.contrib.metrics.streaming_recall}. - - Alternatively, you can define your own custom metric function, which must - take `predictions` and `labels` tensors as arguments (a `weights` argument - can also optionally be supplied). The function must return the value of the - metric in one of two formats: - - * A single tensor - * A pair of ops `(value_op, update_op)`, where `value_op` returns the - metric value and `update_op` performs a corresponding operation to - update internal model state. - -* `prediction_key`. The key of the tensor containing the predictions returned - by the model. This argument may be omitted if the model returns either a - single tensor or a dict with a single entry. For a `DNNClassifier` model, - class predictions will be returned in a tensor with the key - @{tf.contrib.learn.PredictionKey.CLASSES}. - -* `label_key`. The key of the tensor containing the labels returned by the - model, as specified by the model's @{$input_fn$`input_fn`}. As - with `prediction_key`, this argument may be omitted if the `input_fn` - returns either a single tensor or a dict with a single entry. In the iris - example in this tutorial, the `DNNClassifier` does not have an `input_fn` - (`x`,`y` data is passed directly to `fit`), so it's not necessary to provide - a `label_key`. - -* `weights_key`. *Optional*. The key of the tensor (returned by the - @{$input_fn$`input_fn`}) containing weights inputs for the - `metric_fn`. - -The following code creates a `validation_metrics` dict that defines three -metrics to log during model evaluation: - -* `"accuracy"`, using @{tf.contrib.metrics.streaming_accuracy} - as the `metric_fn` -* `"precision"`, using @{tf.contrib.metrics.streaming_precision} - as the `metric_fn` -* `"recall"`, using @{tf.contrib.metrics.streaming_recall} - as the `metric_fn` - -```python -validation_metrics = { - "accuracy": - tf.contrib.learn.MetricSpec( - metric_fn=tf.contrib.metrics.streaming_accuracy, - prediction_key=tf.contrib.learn.PredictionKey.CLASSES), - "precision": - tf.contrib.learn.MetricSpec( - metric_fn=tf.contrib.metrics.streaming_precision, - prediction_key=tf.contrib.learn.PredictionKey.CLASSES), - "recall": - tf.contrib.learn.MetricSpec( - metric_fn=tf.contrib.metrics.streaming_recall, - prediction_key=tf.contrib.learn.PredictionKey.CLASSES) -} -``` - -Add the above code before the `ValidationMonitor` constructor. Then revise the -`ValidationMonitor` constructor as follows to add a `metrics` parameter to log -the accuracy, precision, and recall metrics specified in `validation_metrics` -(loss is always logged, and doesn't need to be explicitly specified): - -```python -validation_monitor = tf.contrib.learn.monitors.ValidationMonitor( - test_set.data, - test_set.target, - every_n_steps=50, - metrics=validation_metrics) -``` - -Rerun the code, and you should see precision and recall included in your log -output, e.g.: - -```none -INFO:tensorflow:Validation (step 50): recall = 0.0, loss = 1.20626, global_step = 1, precision = 0.0, accuracy = 0.266667 -... -INFO:tensorflow:Validation (step 600): recall = 1.0, loss = 0.0530696, global_step = 571, precision = 1.0, accuracy = 0.966667 -... -INFO:tensorflow:Validation (step 1500): recall = 1.0, loss = 0.0617403, global_step = 1452, precision = 1.0, accuracy = 0.966667 -``` - -### Early Stopping with ValidationMonitor - -Note that in the above log output, by step 600, the model has already achieved -precision and recall rates of 1.0. This raises the question as to whether model -training could benefit from -[early stopping](https://en.wikipedia.org/wiki/Early_stopping). - -In addition to logging eval metrics, `ValidationMonitor`s make it easy to -implement early stopping when specified conditions are met, via three params: - -| Param | Description | -| -------------------------------- | ----------------------------------------- | -| `early_stopping_metric` | Metric that triggers early stopping | -: : (e.g., loss or accuracy) under conditions : -: : specified in `early_stopping_rounds` and : -: : `early_stopping_metric_minimize`. Default : -: : is `"loss"`. : -| `early_stopping_metric_minimize` | `True` if desired model behavior is to | -: : minimize the value of : -: : `early_stopping_metric`; `False` if : -: : desired model behavior is to maximize the : -: : value of `early_stopping_metric`. Default : -: : is `True`. : -| `early_stopping_rounds` | Sets a number of steps during which if | -: : the `early_stopping_metric` does not : -: : decrease (if : -: : `early_stopping_metric_minimize` is : -: : `True`) or increase (if : -: : `early_stopping_metric_minimize` is : -: : `False`), training will be stopped. : -: : Default is `None`, which means early : -: : stopping will never occur. : - -Make the following revision to the `ValidationMonitor` constructor, which -specifies that if loss (`early_stopping_metric="loss"`) does not decrease -(`early_stopping_metric_minimize=True`) over a period of 200 steps -(`early_stopping_rounds=200`), model training will stop immediately at that -point, and not complete the full 2000 steps specified in `fit`: - -```python -validation_monitor = tf.contrib.learn.monitors.ValidationMonitor( - test_set.data, - test_set.target, - every_n_steps=50, - metrics=validation_metrics, - early_stopping_metric="loss", - early_stopping_metric_minimize=True, - early_stopping_rounds=200) -``` - -Rerun the code to see if model training stops early: - -```none -... -INFO:tensorflow:Validation (step 1150): recall = 1.0, loss = 0.056436, global_step = 1119, precision = 1.0, accuracy = 0.966667 -INFO:tensorflow:Stopping. Best step: 800 with loss = 0.048313818872. -``` - -Indeed, here training stops at step 1150, indicating that for the past 200 -steps, loss did not decrease, and that overall, step 800 produced the smallest -loss value against the test data set. This suggests that additional calibration -of hyperparameters by decreasing the step count might further improve the model. - -## Visualizing Log Data with TensorBoard - -Reading through the log produced by `ValidationMonitor` provides plenty of raw -data on model performance during training, but it may also be helpful to see -visualizations of this data to get further insight into trends—for -example, how accuracy is changing over step count. You can use TensorBoard (a -separate program packaged with TensorFlow) to plot graphs like this by setting -the `logdir` command-line argument to the directory where you saved your model -training data (here, `/tmp/iris_model`). Run the following on your command line: - -
$ tensorboard --logdir=/tmp/iris_model/
-Starting TensorBoard 39 on port 6006
- -Then navigate to `http://0.0.0.0:`*``* in your browser, where -*``* is the port specified in the command-line output (here, -`6006`). - -If you click on the accuracy field, you'll see an image like the following, -which shows accuracy plotted against step count: - -![Accuracy over step count in TensorBoard](https://www.tensorflow.org/images/validation_monitor_tensorboard_accuracy.png "Accuracy over step count in TensorBoard") - -For more on using TensorBoard, see @{$summaries_and_tensorboard$TensorBoard: Visualizing Learning} and @{$graph_viz$TensorBoard: Graph Visualization}. diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 3a153e8114..df622c6ac5 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.4.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.4.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index df43255896..8b3da49a0d 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.4.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.4.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index f7f2c3cdc7..6eb8158249 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.4.0-rc1 + 1.4.0 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.4.0-rc1 + 1.4.0 @@ -124,7 +124,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0-rc1.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -143,7 +143,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.4.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.4.0.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -151,10 +151,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0-rc1.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.4.0-rc1.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.4.0.zip). 3. Extract this .zip file. @@ -202,7 +202,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
javac -cp libtensorflow-1.4.0-rc1.jar HelloTF.java
+
javac -cp libtensorflow-1.4.0.jar HelloTF.java
### Running @@ -216,11 +216,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.4.0-rc1.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.4.0.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.4.0-rc1.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.4.0.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 414ab7b1f7..f7380bac8a 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -188,7 +188,7 @@ Take the following steps to install TensorFlow with Virtualenv: Virtualenv environment:
(tensorflow)$ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl If you encounter installation problems, see [Common Installation Problems](#common_installation_problems). @@ -293,7 +293,7 @@ take the following steps:
      $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl
      
If this step fails, see @@ -480,7 +480,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl @@ -648,14 +648,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -667,14 +667,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -686,14 +686,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp35-cp35m-linux_x86_64.whl
 
@@ -705,14 +705,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 9a95710bfa..79b383817b 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -114,7 +114,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -235,7 +235,7 @@ take the following steps: issue the following command:
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -344,7 +344,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl @@ -517,7 +517,7 @@ This section documents the relevant values for Mac OS installations.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl
 
@@ -525,7 +525,7 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-a
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 6d0dcdcd4a..aa4ae6c876 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -355,10 +355,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.4.0rc1 on Linux: +for TensorFlow 1.4.0 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.4.0rc1-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.4.0-py2-none-any.whl
 
## Validate your installation @@ -447,8 +447,10 @@ Stack Overflow and specify the `tensorflow` tag. **Linux** - - + + + + @@ -460,7 +462,8 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.4.0rc1CPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.5N/AN/A
tensorflow_gpu-1.4.0rc1GPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.568
tensorflow-1.4.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.5.4N/AN/A
tensorflow_gpu-1.4.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.5.468
tensorflow-1.3.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.5N/AN/A
tensorflow_gpu-1.3.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.568
tensorflow-1.2.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.5N/AN/A
tensorflow_gpu-1.2.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.55.18
tensorflow-1.1.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.2N/AN/A
- + + @@ -471,8 +474,10 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.4.0rc1CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.5N/AN/A
tensorflow-1.4.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.5.4N/AN/A
tensorflow-1.3.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.5N/AN/A
tensorflow-1.2.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.5N/AN/A
tensorflow-1.1.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.2N/AN/A
tensorflow_gpu-1.1.0GPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.25.18
- - + + + + diff --git a/tensorflow/docs_src/mobile/index.md b/tensorflow/docs_src/mobile/index.md index 06ad47bc62..a6f1422f6f 100644 --- a/tensorflow/docs_src/mobile/index.md +++ b/tensorflow/docs_src/mobile/index.md @@ -35,8 +35,8 @@ speech-driven interface, and many of these require on-device processing. Most of the time a user isn’t giving commands, and so streaming audio continuously to a remote server would be a waste of bandwidth, since it would mostly be silence or background noises. To solve this problem it’s common to have a small neural -network running on-device @{$tutorials/audio_recognition$listening out for a particular keyword}. -Once that keyword has been spotted, the rest of the +network running on-device @{$tutorials/audio_recognition$listening out for a +particular keyword}. Once that keyword has been spotted, the rest of the conversation can be transmitted over to the server for further processing if more computing power is needed. diff --git a/tensorflow/docs_src/mobile/prepare_models.md b/tensorflow/docs_src/mobile/prepare_models.md index c5a560e074..8fc65be35a 100644 --- a/tensorflow/docs_src/mobile/prepare_models.md +++ b/tensorflow/docs_src/mobile/prepare_models.md @@ -296,6 +296,6 @@ complains about missing header files, add the .h’s that are needed into the [`android_extended_ops`](https://www.tensorflow.org/code/tensorflow/core/kernels/BUILD#L3525) target. -If you’re using a makefile targetting iOS, Raspberry Pi, etc, go to +If you’re using a makefile targeting iOS, Raspberry Pi, etc, go to [`tensorflow/contrib/makefile/tf_op_files.txt`](https://www.tensorflow.org/code/tensorflow/contrib/makefile/tf_op_files.txt) and add the right implementation files there. diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index ccced8792e..3ca3b51a5e 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -901,95 +901,6 @@ are all 0. Figure below shows examples of different `edge_padding` and -## Recv - -See also -[`ComputationBuilder::Recv`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h). - - `Recv(shape, channel_handle)` - -| Arguments | Type | Semantics | -| ---------------- | --------------- | ------------------------------------ | -| `shape` | `Shape` | shape of the data to receive | -| `channel_handle` | `ChannelHandle` | unique identifier for each send/recv pair | - -Receives data of the given shape from a `Send` instruction in another -computation that shares the same channel handle. Returns a -ComputationDataHandle for the received data. - -The client API of `Recv` operation represents synchronous communication. -However, the instruction is internally decomposed into 2 HLO instructions -(`Recv` and `RecvDone`) to enable asynchronous data transfers. See also -[`HloInstruction::CreateRecv` and `HloInstruction::CreateRecvDone`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/hlo_instruction.h). - -`Recv(const Shape& shape, int64 channel_id)` - -Allocates resources required to receive data from a `Send` instruction with the -same channel_id. Returns a context for the allocated resources, which is used -by a following `RecvDone` instruction to wait for the completion of the data -transfer. The context is a tuple of {receive buffer (shape), request identifier -(U32)} and it can only be used by a `RecvDone` instruction. - - `RecvDone(HloInstruction context)` - -Given a context created by a `Recv` instruction, waits for the data transfer to -complete and returns the received data. - -## Send - -See also -[`ComputationBuilder::Send`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h). - - `Send(operand, channel_handle)` - -| Arguments | Type | Semantics | -| ---------------- | ----------------------- | -------------------------------- | -| `operand` | `ComputationDataHandle` | data to send (array of type T) | -| `channel_handle` | `ChannelHandle` | unique identifier for each send/recv pair | - -Sends the given operand data to a `Recv` instruction in another computation -that shares the same channel handle. Does not return any data. - -Similar to the `Recv` operation, the client API of `Send` operation represents -synchronous communication, and is internally decomposed into 2 HLO instructions -(`Send` and `SendDone`) to enable asynchronous data transfers. See also -[`HloInstruction::CreateSend` and `HloInstruction::CreateSendDone`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/hlo_instruction.h). - -`Send(HloInstruction operand, int64 channel_id)` - -Initiates an asynchronous transfer of the operand to the resources allocated by -the `Recv` instruction with the same channel id. Returns a context, which is -used by a following `SendDone` instruction to wait for the completion of the -data transfer. The context is a tuple of {operand (shape), request identifier -(U32)} and it can only be used by a `SendDone` instruction. - - `SendDone(HloInstruction context)` - -Given a context created by a `Send` instruction, waits for the data transfer to -complete. The instruction does not return any data. - - Scheduling of channel instructions - -The execution order of the 4 instructions for each channel (`Recv`, `RecvDone`, -`Send`, `SendDone`) is as below. - -
- -
- -* `Recv` happens before `Send` -* `Send` happens before `RecvDone` -* `Recv` happens before `RecvDone` -* `Send` happens before `SendDone` - -When the backend compilers generate a linear schedule for each computation that -communicates via channel instructions, there must not be cycles across the -computations. For example, below schedules lead to deadlocks. - -
- -
- ## Reduce See also diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md index 87d900eae3..dd5496b08e 100644 --- a/tensorflow/docs_src/programmers_guide/debugger.md +++ b/tensorflow/docs_src/programmers_guide/debugger.md @@ -157,6 +157,7 @@ Try the following commands at the `tfdbg>` prompt (referencing the code at | | `pt [slicing]` | Print a subarray of tensor, using [numpy](http://www.numpy.org/)-style array slicing. | `pt hidden/Relu:0[0:50,:]` | | | `-a` | Print the entirety of a large tensor, without using ellipses. (May take a long time for large tensors.) | `pt -a hidden/Relu:0[0:50,:]` | | | `-r ` | Highlight elements falling into specified numerical range. Multiple ranges can be used in conjunction. | `pt hidden/Relu:0 -a -r [[-inf,-1],[1,inf]]` | +| | `-n ` | Print dump corresponding to specified 0-based dump number. Required for tensors with multiple dumps. | `pt -n 0 hidden/Relu:0` | | | `-s` | Include a summary of the numeric values of the tensor (applicable only to non-empty tensors with Boolean and numeric types such as `int*` and `float*`.) | `pt -s hidden/Relu:0[0:50,:]` | | **`@[coordinates]`** | | Navigate to specified element in `pt` output. | `@[10,0]` or `@10,0` | | **`/regex`** | | [less](https://linux.die.net/man/1/less)-style search for given regular expression. | `/inf` | @@ -174,10 +175,12 @@ Try the following commands at the `tfdbg>` prompt (referencing the code at | | `-r` | List the inputs to node, recursively (the input tree.) | `li -r hidden/Relu:0` | | | `-d ` | Limit recursion depth under the `-r` mode. | `li -r -d 3 hidden/Relu:0` | | | `-c` | Include control inputs. | `li -c -r hidden/Relu:0` | +| | `-t` | Show op types of input nodes. | `li -t -r hidden/Relu:0` | | **`lo`** | | **List output recipients of node** | | | | `-r` | List the output recipients of node, recursively (the output tree.) | `lo -r hidden/Relu:0` | | | `-d ` | Limit recursion depth under the `-r` mode. | `lo -r -d 3 hidden/Relu:0` | | | `-c` | Include recipients via control edges. | `lo -c -r hidden/Relu:0` | +| | `-t` | Show op types of recipient nodes. | `lo -t -r hidden/Relu:0` | | **`ls`** | | **List Python source files involved in node creation.** | | | | `-p ` | Limit output to source files matching given regular-expression path pattern. | `ls -p .*debug_mnist.*` | | | `-n` | Limit output to node names matching given regular-expression pattern. | `ls -n Softmax.*` | @@ -517,12 +520,8 @@ model.fit(...) # This will break into the TFDBG CLI. ## Debugging tf-slim with TFDBG -TFDBG supports debugging of training and evaluation with +TFDBG currently supports only training with [tf-slim](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim). -As detailed below, training and evaluation require slightly different debugging -workflows. - -### Debugging training in tf-slim To debug the training process, provide `LocalCLIDebugWrapperSession` to the `session_wrapper` argument of `slim.learning.train()`. For example: @@ -531,31 +530,13 @@ import tensorflow as tf from tensorflow.python import debug as tf_debug # ... Code that creates the graph and the train_op ... -tf.contrib.slim.learning.train( +tf.contrib.slim.learning_train( train_op, logdir, number_of_steps=10, session_wrapper=tf_debug.LocalCLIDebugWrapperSession) ``` -### Debugging evaluation in tf-slim -To debug the evaluation process, provide `LocalCLIDebugHook` to the -`hooks` argument of `slim.evaluation.evaluate_once()`. For example: - -``` python -import tensorflow as tf -from tensorflow.python import debug as tf_debug - -# ... Code that creates the graph and the eval and final ops ... -tf.contrib.slim.evaluation.evaluate_once( - '', - checkpoint_path, - logdir, - eval_op=my_eval_op, - final_op=my_value_op, - hooks=[tf_debug.LocalCLIDebugHook()]) -``` - ## Offline Debugging of Remotely-Running Sessions Often, your model is running on a remote machine or a process that you don't diff --git a/tensorflow/docs_src/programmers_guide/tensors.md b/tensorflow/docs_src/programmers_guide/tensors.md index d6f80430cd..88eb277e35 100644 --- a/tensorflow/docs_src/programmers_guide/tensors.md +++ b/tensorflow/docs_src/programmers_guide/tensors.md @@ -29,8 +29,8 @@ Some types of tensors are special, and these will be covered in other units of the Programmer's guide. The main ones are: * `tf.Variable` - * `tf.Constant` - * `tf.Placeholder` + * `tf.constant` + * `tf.placeholder` * `tf.SparseTensor` With the exception of `tf.Variable`, the value of a tensor is immutable, which @@ -64,7 +64,7 @@ The following snippet demonstrates creating a few rank 0 variables: mammal = tf.Variable("Elephant", tf.string) ignition = tf.Variable(451, tf.int16) floating = tf.Variable(3.14159265359, tf.float64) -its_complicated = tf.Variable((12.3, -4.85), tf.complex64) +its_complicated = tf.Variable(12.3 - 4.85j, tf.complex64) ``` Note: A string is treated as a single item in TensorFlow, not as a sequence of @@ -79,7 +79,7 @@ initial value. For example: mystr = tf.Variable(["Hello"], tf.string) cool_numbers = tf.Variable([3.14159, 2.71828], tf.float32) first_primes = tf.Variable([2, 3, 5, 7, 11], tf.int32) -its_very_complicated = tf.Variable([(12.3, -4.85), (7.5, -6.23)], tf.complex64) +its_very_complicated = tf.Variable([12.3 - 4.85j, 7.5 - 6.23j], tf.complex64) ``` @@ -275,8 +275,8 @@ Graphs and Sessions for more information). Sometimes it is not possible to evaluate a `tf.Tensor` with no context because its value might depend on dynamic information that is not available. For -example, tensors that depend on `Placeholder`s can't be evaluated without -providing a value for the `Placeholder`. +example, tensors that depend on `placeholder`s can't be evaluated without +providing a value for the `placeholder`. ``` python p = tf.placeholder(tf.float32) diff --git a/tensorflow/docs_src/tutorials/deep_cnn.md b/tensorflow/docs_src/tutorials/deep_cnn.md index b57ef24f58..6f802fd106 100644 --- a/tensorflow/docs_src/tutorials/deep_cnn.md +++ b/tensorflow/docs_src/tutorials/deep_cnn.md @@ -83,21 +83,21 @@ for details. It consists of 1,068,298 learnable parameters and requires about ## Code Organization The code for this tutorial resides in -[`models/tutorials/image/cifar10/`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/). +[`models/tutorials/image/cifar10/`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/). File | Purpose --- | --- -[`cifar10_input.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10_input.py) | Reads the native CIFAR-10 binary file format. -[`cifar10.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10.py) | Builds the CIFAR-10 model. -[`cifar10_train.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10_train.py) | Trains a CIFAR-10 model on a CPU or GPU. -[`cifar10_multi_gpu_train.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10_multi_gpu_train.py) | Trains a CIFAR-10 model on multiple GPUs. -[`cifar10_eval.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10_eval.py) | Evaluates the predictive performance of a CIFAR-10 model. +[`cifar10_input.py`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/cifar10_input.py) | Reads the native CIFAR-10 binary file format. +[`cifar10.py`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/cifar10.py) | Builds the CIFAR-10 model. +[`cifar10_train.py`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/cifar10_train.py) | Trains a CIFAR-10 model on a CPU or GPU. +[`cifar10_multi_gpu_train.py`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/cifar10_multi_gpu_train.py) | Trains a CIFAR-10 model on multiple GPUs. +[`cifar10_eval.py`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/cifar10_eval.py) | Evaluates the predictive performance of a CIFAR-10 model. ## CIFAR-10 Model The CIFAR-10 network is largely contained in -[`cifar10.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10.py). +[`cifar10.py`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/cifar10.py). The complete training graph contains roughly 765 operations. We find that we can make the code most reusable by constructing the graph with the following modules: diff --git a/tensorflow/docs_src/tutorials/word2vec.md b/tensorflow/docs_src/tutorials/word2vec.md index 0a1c41c84a..3fe7352bd2 100644 --- a/tensorflow/docs_src/tutorials/word2vec.md +++ b/tensorflow/docs_src/tutorials/word2vec.md @@ -23,7 +23,7 @@ straight in, feel free to look at the minimalistic implementation in This basic example contains the code needed to download some data, train on it a bit and visualize the result. Once you get comfortable with reading and running the basic version, you can graduate to -[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/tutorials/embedding/word2vec.py) +[models/tutorials/embedding/word2vec.py](https://www.tensorflow.org/code/tensorflow_models/tutorials/embedding/word2vec.py) which is a more serious implementation that showcases some more advanced TensorFlow principles about how to efficiently use threads to move data into a text model, how to checkpoint during training, etc. @@ -341,7 +341,7 @@ t-SNE. Et voila! As expected, words that are similar end up clustering nearby each other. For a more heavyweight implementation of word2vec that showcases more of the advanced features of TensorFlow, see the implementation in -[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/tutorials/embedding/word2vec.py). +[models/tutorials/embedding/word2vec.py](https://www.tensorflow.org/code/tensorflow_models/tutorials/embedding/word2vec.py). ## Evaluating Embeddings: Analogical Reasoning @@ -357,7 +357,7 @@ Download the dataset for this task from To see how we do this evaluation, have a look at the `build_eval_graph()` and `eval()` functions in -[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/tutorials/embedding/word2vec.py). +[models/tutorials/embedding/word2vec.py](https://www.tensorflow.org/code/tensorflow_models/tutorials/embedding/word2vec.py). The choice of hyperparameters can strongly influence the accuracy on this task. To achieve state-of-the-art performance on this task requires training over a @@ -385,13 +385,13 @@ your model is seriously bottlenecked on input data, you may want to implement a custom data reader for your problem, as described in @{$new_data_formats$New Data Formats}. For the case of Skip-Gram modeling, we've actually already done this for you as an example in -[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/tutorials/embedding/word2vec.py). +[models/tutorials/embedding/word2vec.py](https://www.tensorflow.org/code/tensorflow_models/tutorials/embedding/word2vec.py). If your model is no longer I/O bound but you want still more performance, you can take things further by writing your own TensorFlow Ops, as described in @{$adding_an_op$Adding a New Op}. Again we've provided an example of this for the Skip-Gram case -[models/tutorials/embedding/word2vec_optimized.py](https://github.com/tensorflow/models/tree/master/tutorials/embedding/word2vec_optimized.py). +[models/tutorials/embedding/word2vec_optimized.py](https://www.tensorflow.org/code/tensorflow_models/tutorials/embedding/word2vec_optimized.py). Feel free to benchmark these against each other to measure performance improvements at each stage. diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py index ebddfb20f4..3549891461 100644 --- a/tensorflow/examples/image_retraining/retrain.py +++ b/tensorflow/examples/image_retraining/retrain.py @@ -69,18 +69,11 @@ to validate that you have gathered good training data, but if you want to deploy on resource-limited platforms, you can try the `--architecture` flag with a Mobilenet model. For example: -Run floating-point version of mobilenet: ```bash python tensorflow/examples/image_retraining/retrain.py \ --image_dir ~/flower_photos --architecture mobilenet_1.0_224 ``` -Run quantized version of mobilenet: -```bash -python tensorflow/examples/image_retraining/retrain.py \ - --image_dir ~/flower_photos/ --architecture mobilenet_1.0_224_quantized -``` - There are 32 different Mobilenet models to choose from, with a variety of file size and latency options. The first number can be '1.0', '0.75', '0.50', or '0.25' to control the size, and the second controls the input image size, either @@ -114,7 +107,6 @@ import numpy as np from six.moves import urllib import tensorflow as tf -from tensorflow.contrib.quantize.python import quant_ops from tensorflow.python.framework import graph_util from tensorflow.python.framework import tensor_shape from tensorflow.python.platform import gfile @@ -279,7 +271,6 @@ def create_model_graph(model_info): """ with tf.Graph().as_default() as graph: model_path = os.path.join(FLAGS.model_dir, model_info['model_file_name']) - print('Model path: ', model_path) with gfile.FastGFile(model_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) @@ -346,10 +337,7 @@ def maybe_download_and_extract(data_url): statinfo = os.stat(filepath) tf.logging.info('Successfully downloaded', filename, statinfo.st_size, 'bytes.') - print('Extracting file from ', filepath) - tarfile.open(filepath, 'r:gz').extractall(dest_directory) - else: - print('Not extracting or downloading files, model already present in disk') + tarfile.open(filepath, 'r:gz').extractall(dest_directory) def ensure_dir_exists(dir_name): @@ -745,7 +733,7 @@ def variable_summaries(var): def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, - bottleneck_tensor_size, quantize_layer): + bottleneck_tensor_size): """Adds a new softmax and fully-connected layer for training. We need to retrain the top layer to identify our new classes, so this function @@ -757,12 +745,10 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, Args: class_count: Integer of how many categories of things we're trying to - recognize. + recognize. final_tensor_name: Name string for the new final node that produces results. bottleneck_tensor: The output of the main CNN graph. bottleneck_tensor_size: How many entries in the bottleneck vector. - quantize_layer: Boolean, specifying whether the newly added layer should be - quantized. Returns: The tensors for the training and cross entropy results, and tensors for the @@ -785,41 +771,18 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, with tf.name_scope('weights'): initial_value = tf.truncated_normal( [bottleneck_tensor_size, class_count], stddev=0.001) + layer_weights = tf.Variable(initial_value, name='final_weights') - if quantize_layer: - quantized_layer_weights = quant_ops.MovingAvgQuantize( - layer_weights, is_training=True) - variable_summaries(quantized_layer_weights) variable_summaries(layer_weights) with tf.name_scope('biases'): layer_biases = tf.Variable(tf.zeros([class_count]), name='final_biases') - if quantize_layer: - quantized_layer_biases = quant_ops.MovingAvgQuantize( - layer_biases, is_training=True) - variable_summaries(quantized_layer_biases) - variable_summaries(layer_biases) - with tf.name_scope('Wx_plus_b'): - if quantize_layer: - logits = tf.matmul(bottleneck_input, - quantized_layer_weights) + quantized_layer_biases - logits = quant_ops.MovingAvgQuantize( - logits, - init_min=-32.0, - init_max=32.0, - is_training=True, - num_bits=8, - narrow_range=False, - ema_decay=0.5) - tf.summary.histogram('pre_activations', logits) - else: - logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases - tf.summary.histogram('pre_activations', logits) + logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases + tf.summary.histogram('pre_activations', logits) final_tensor = tf.nn.softmax(logits, name=final_tensor_name) - tf.summary.histogram('activations', final_tensor) with tf.name_scope('cross_entropy'): @@ -827,7 +790,6 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, labels=ground_truth_input, logits=logits) with tf.name_scope('total'): cross_entropy_mean = tf.reduce_mean(cross_entropy) - tf.summary.scalar('cross_entropy', cross_entropy_mean) with tf.name_scope('train'): @@ -863,7 +825,6 @@ def add_evaluation_step(result_tensor, ground_truth_tensor): def save_graph_to_file(sess, graph, graph_file_name): output_graph_def = graph_util.convert_variables_to_constants( sess, graph.as_graph_def(), [FLAGS.final_tensor_name]) - with gfile.FastGFile(graph_file_name, 'wb') as f: f.write(output_graph_def.SerializeToString()) return @@ -897,7 +858,6 @@ def create_model_info(architecture): ValueError: If architecture name is unknown. """ architecture = architecture.lower() - is_quantized = False if architecture == 'inception_v3': # pylint: disable=line-too-long data_url = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz' @@ -942,28 +902,19 @@ def create_model_info(architecture): architecture) return None is_quantized = True - - if is_quantized: - data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' - data_url += version_string + '_' + size_string + '_quantized_frozen.tgz' - bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' - resized_input_tensor_name = 'Placeholder:0' - model_dir_name = ('mobilenet_v1_' + version_string + '_' + size_string + - '_quantized_frozen') - model_base_name = 'quantized_frozen_graph.pb' - - else: - data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' - data_url += version_string + '_' + size_string + '_frozen.tgz' - bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' - resized_input_tensor_name = 'input:0' - model_dir_name = 'mobilenet_v1_' + version_string + '_' + size_string - model_base_name = 'frozen_graph.pb' - + data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' + data_url += version_string + '_' + size_string + '_frozen.tgz' + bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' bottleneck_tensor_size = 1001 input_width = int(size_string) input_height = int(size_string) input_depth = 3 + resized_input_tensor_name = 'input:0' + if is_quantized: + model_base_name = 'quantized_graph.pb' + else: + model_base_name = 'frozen_graph.pb' + model_dir_name = 'mobilenet_v1_' + version_string + '_' + size_string model_file_name = os.path.join(model_dir_name, model_base_name) input_mean = 127.5 input_std = 127.5 @@ -982,7 +933,6 @@ def create_model_info(architecture): 'model_file_name': model_file_name, 'input_mean': input_mean, 'input_std': input_std, - 'quantize_layer': is_quantized, } @@ -1078,7 +1028,7 @@ def main(_): (train_step, cross_entropy, bottleneck_input, ground_truth_input, final_tensor) = add_final_training_ops( len(image_lists.keys()), FLAGS.final_tensor_name, bottleneck_tensor, - model_info['bottleneck_tensor_size'], model_info['quantize_layer']) + model_info['bottleneck_tensor_size']) # Create the operations we need to evaluate the accuracy of our new layer. evaluation_step, prediction = add_evaluation_step( diff --git a/tensorflow/examples/image_retraining/retrain_test.py b/tensorflow/examples/image_retraining/retrain_test.py index 2de4c4ec99..c342a17dd8 100644 --- a/tensorflow/examples/image_retraining/retrain_test.py +++ b/tensorflow/examples/image_retraining/retrain_test.py @@ -70,18 +70,10 @@ class ImageRetrainingTest(test_util.TensorFlowTestCase): def testAddFinalTrainingOps(self, flags_mock): with tf.Graph().as_default(): with tf.Session() as sess: - bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') - # Test creating final training op with quantization - retrain.add_final_training_ops(5, 'final', bottleneck, 1024, False) - self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) - - @tf.test.mock.patch.object(retrain, 'FLAGS', learning_rate=0.01) - def testAddFinalTrainingOpsQuantized(self, flags_mock): - with tf.Graph().as_default(): - with tf.Session() as sess: - bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') - # Test creating final training op with quantization - retrain.add_final_training_ops(5, 'final', bottleneck, 1024, True) + bottleneck = tf.placeholder( + tf.float32, [1, 1024], + name='bottleneck') + retrain.add_final_training_ops(5, 'final', bottleneck, 1024) self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) def testAddEvaluationStep(self): @@ -107,12 +99,5 @@ class ImageRetrainingTest(test_util.TensorFlowTestCase): self.assertIsNotNone(model_info) self.assertEqual(299, model_info['input_width']) - def testCreateModelInfoQuantized(self): - # Test for mobilenet_quantized - model_info = retrain.create_model_info('mobilenet_1.0_224') - self.assertIsNotNone(model_info) - self.assertEqual(224, model_info['input_width']) - - if __name__ == '__main__': tf.test.main() diff --git a/tensorflow/examples/learn/iris.py b/tensorflow/examples/learn/iris.py index 03e60972aa..0a50b3ba87 100644 --- a/tensorflow/examples/learn/iris.py +++ b/tensorflow/examples/learn/iris.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Example of DNNClassifier for Iris plant dataset. - -This example uses APIs in Tensorflow 1.4 or above. -""" +"""Example of DNNClassifier for Iris plant dataset.""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/examples/learn/wide_n_deep_tutorial.py b/tensorflow/examples/learn/wide_n_deep_tutorial.py index 072353392a..e447b3e24e 100644 --- a/tensorflow/examples/learn/wide_n_deep_tutorial.py +++ b/tensorflow/examples/learn/wide_n_deep_tutorial.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Example code for TensorFlow Wide & Deep Tutorial using TF High Level API. - -This example uses APIs in Tensorflow 1.4 or above. -""" +"""Example code for TensorFlow Wide & Deep Tutorial using TF.Learn API.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/tensorflow/examples/speech_commands/models.py b/tensorflow/examples/speech_commands/models.py index 82d6a94ea1..ab611f414a 100644 --- a/tensorflow/examples/speech_commands/models.py +++ b/tensorflow/examples/speech_commands/models.py @@ -326,7 +326,7 @@ def create_low_latency_conv_model(fingerprint_input, model_settings, first_filter_height = input_time_size first_filter_count = 186 first_filter_stride_x = 1 - first_filter_stride_y = 4 + first_filter_stride_y = 1 first_weights = tf.Variable( tf.truncated_normal( [first_filter_height, first_filter_width, 1, first_filter_count], diff --git a/tensorflow/go/android.go b/tensorflow/go/android.go new file mode 100644 index 0000000000..f7d666b7a9 --- /dev/null +++ b/tensorflow/go/android.go @@ -0,0 +1,6 @@ +// +build android + +package tensorflow + +// #cgo LDFLAGS: -landroid -llog -lm -lz -ldl +import "C" diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index eb79da5384..4e5d17f76f 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -62,29 +62,6 @@ func WriteScalarSummary(scope *Scope, writer tf.Output, global_step tf.Output, t return scope.AddOperation(opspec) } -// Outputs a `tf.Event` protocol buffer. -// -// When CreateSummaryDbWriter is being used, this op can be useful for -// importing data from event logs. -// -// Arguments: -// writer: A handle to a summary writer. -// event: A string containing a binary-encoded tf.Event proto. -// -// Returns the created operation. -func ImportEvent(scope *Scope, writer tf.Output, event tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ImportEvent", - Input: []tf.Input{ - writer, event, - }, - } - return scope.AddOperation(opspec) -} - // Outputs a `Summary` protocol buffer with a tensor. // // Arguments: @@ -4006,6 +3983,41 @@ func TensorArrayWriteV2(scope *Scope, handle tf.Output, index tf.Output, value t return op.Output(0) } +// Identity op for gradient debugging. +// +// This op is hidden from public in Python. It is used by TensorFlow Debugger to +// register gradient tensors for gradient debugging. +func DebugGradientIdentity(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "DebugGradientIdentity", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Deprecated. Use TensorArrayGradV3 +func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"source": source} + opspec := tf.OpSpec{ + Type: "TensorArrayGradV2", + Input: []tf.Input{ + handle, flow_in, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Get the current size of the TensorArray. // // Arguments: @@ -4539,6 +4551,31 @@ func QueueCloseV2(scope *Scope, handle tf.Output, optional ...QueueCloseV2Attr) return scope.AddOperation(opspec) } +// Concatenates tensors along one dimension. +// +// Arguments: +// values: List of `N` Tensors to concatenate. Their ranks and types must match, +// and their sizes must match in all dimensions except `concat_dim`. +// axis: 0-D. The dimension along which to concatenate. Must be in the +// range [-rank(values), rank(values)). +// +// Returns A `Tensor` with the concatenation of values stacked along the +// `concat_dim` dimension. This tensor's shape matches that of `values` except +// in `concat_dim` where it has the sum of the sizes. +func ConcatV2(scope *Scope, values []tf.Output, axis tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ConcatV2", + Input: []tf.Input{ + tf.OutputList(values), axis, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // QueueDequeueUpToV2Attr is an optional argument to QueueDequeueUpToV2. type QueueDequeueUpToV2Attr func(optionalAttr) @@ -4955,6 +4992,80 @@ func PriorityQueueV2(scope *Scope, shapes []tf.Shape, optional ...PriorityQueueV return op.Output(0) } +// FIFOQueueV2Attr is an optional argument to FIFOQueueV2. +type FIFOQueueV2Attr func(optionalAttr) + +// FIFOQueueV2Shapes sets the optional shapes attribute to value. +// +// value: The shape of each component in a value. The length of this attr must +// be either 0 or the same as the length of component_types. If the length of +// this attr is 0, the shapes of queue elements are not constrained, and +// only one element may be dequeued at a time. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func FIFOQueueV2Shapes(value []tf.Shape) FIFOQueueV2Attr { + return func(m optionalAttr) { + m["shapes"] = value + } +} + +// FIFOQueueV2Capacity sets the optional capacity attribute to value. +// +// value: The upper bound on the number of elements in this queue. +// Negative numbers mean no limit. +// If not specified, defaults to -1 +func FIFOQueueV2Capacity(value int64) FIFOQueueV2Attr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// FIFOQueueV2Container sets the optional container attribute to value. +// +// value: If non-empty, this queue is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func FIFOQueueV2Container(value string) FIFOQueueV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// FIFOQueueV2SharedName sets the optional shared_name attribute to value. +// +// value: If non-empty, this queue will be shared under the given name +// across multiple sessions. +// If not specified, defaults to "" +func FIFOQueueV2SharedName(value string) FIFOQueueV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// A queue that produces elements in first-in first-out order. +// +// Arguments: +// component_types: The type of each component in a value. +// +// Returns The handle to the queue. +func FIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...FIFOQueueV2Attr) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"component_types": component_types} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "FIFOQueueV2", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // StridedSliceAttr is an optional argument to StridedSlice. type StridedSliceAttr func(optionalAttr) @@ -5334,101 +5445,6 @@ func DynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged return op.Output(0) } -// FIFOQueueV2Attr is an optional argument to FIFOQueueV2. -type FIFOQueueV2Attr func(optionalAttr) - -// FIFOQueueV2Shapes sets the optional shapes attribute to value. -// -// value: The shape of each component in a value. The length of this attr must -// be either 0 or the same as the length of component_types. If the length of -// this attr is 0, the shapes of queue elements are not constrained, and -// only one element may be dequeued at a time. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func FIFOQueueV2Shapes(value []tf.Shape) FIFOQueueV2Attr { - return func(m optionalAttr) { - m["shapes"] = value - } -} - -// FIFOQueueV2Capacity sets the optional capacity attribute to value. -// -// value: The upper bound on the number of elements in this queue. -// Negative numbers mean no limit. -// If not specified, defaults to -1 -func FIFOQueueV2Capacity(value int64) FIFOQueueV2Attr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// FIFOQueueV2Container sets the optional container attribute to value. -// -// value: If non-empty, this queue is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func FIFOQueueV2Container(value string) FIFOQueueV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// FIFOQueueV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this queue will be shared under the given name -// across multiple sessions. -// If not specified, defaults to "" -func FIFOQueueV2SharedName(value string) FIFOQueueV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// A queue that produces elements in first-in first-out order. -// -// Arguments: -// component_types: The type of each component in a value. -// -// Returns The handle to the queue. -func FIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...FIFOQueueV2Attr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"component_types": component_types} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FIFOQueueV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Converts the given `resource_handle` representing an iterator to a variant tensor. -// -// Arguments: -// resource_handle: A handle to an iterator resource. -// -// Returns A variant tensor storing the state of the iterator contained in the -// resource. -func SerializeIterator(scope *Scope, resource_handle tf.Output) (serialized tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SerializeIterator", - Input: []tf.Input{ - resource_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Return a tensor with the same shape and contents as the input tensor or value. func Identity(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { @@ -5560,39 +5576,6 @@ func IteratorToStringHandle(scope *Scope, resource_handle tf.Output) (string_han return op.Output(0) } -// Outputs the single element from the given dataset. -// -// Arguments: -// dataset: A handle to a dataset that contains a single element. -// -// -// -// Returns The components of the single element of `input`. -func DatasetToSingleElement(scope *Scope, dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "DatasetToSingleElement", - Input: []tf.Input{ - dataset, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("DatasetToSingleElement", err) - return - } - return components -} - // Gets the next output from the given iterator. func IteratorGetNext(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { if scope.Err() != nil { @@ -5713,30 +5696,6 @@ func FixedLengthRecordDataset(scope *Scope, filenames tf.Output, header_bytes tf return op.Output(0) } -// Creates a dataset that executes a SQL query and emits rows of the result set. -// -// Arguments: -// driver_name: The database type. Currently, the only supported type is 'sqlite'. -// data_source_name: A connection string to connect to the database. -// query: A SQL query to execute. -// -// -func SqlDataset(scope *Scope, driver_name tf.Output, data_source_name tf.Output, query tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "SqlDataset", - Input: []tf.Input{ - driver_name, data_source_name, query, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // PlaceholderAttr is an optional argument to Placeholder. type PlaceholderAttr func(optionalAttr) @@ -5807,68 +5766,6 @@ func CacheDataset(scope *Scope, input_dataset tf.Output, filename tf.Output, out return op.Output(0) } -// Identity op for gradient debugging. -// -// This op is hidden from public in Python. It is used by TensorFlow Debugger to -// register gradient tensors for gradient debugging. -func DebugGradientIdentity(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DebugGradientIdentity", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Deprecated. Use TensorArrayGradV3 -func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"source": source} - opspec := tf.OpSpec{ - Type: "TensorArrayGradV2", - Input: []tf.Input{ - handle, flow_in, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that yields a SparseTensor for each element of the input. -// -// Arguments: -// input_dataset: A handle to an input dataset. Must have a single component. -// batch_size: A scalar representing the number of elements to accumulate in a -// batch. -// row_shape: A vector representing the dense shape of each row in the produced -// SparseTensor. The shape may be partially specified, using `-1` to indicate -// that a particular dimension should use the maximum size of all batch elements. -// -// -func DenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, row_shape tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "DenseToSparseBatchDataset", - Input: []tf.Input{ - input_dataset, batch_size, row_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Creates a dataset that batches and pads `batch_size` elements from the input. // // Arguments: @@ -5929,69 +5826,6 @@ func TensorArrayConcatV2(scope *Scope, handle tf.Output, flow_in tf.Output, dtyp return op.Output(0), op.Output(1) } -// Converts the given variant tensor to an iterator and stores it in the given resource. -// -// Arguments: -// resource_handle: A handle to an iterator resource. -// serialized: A variant tensor storing the state of the iterator contained in the -// resource. -// -// Returns the created operation. -func DeserializeIterator(scope *Scope, resource_handle tf.Output, serialized tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DeserializeIterator", - Input: []tf.Input{ - resource_handle, serialized, - }, - } - return scope.AddOperation(opspec) -} - -// Concatenates tensors along one dimension. -// -// Arguments: -// values: List of `N` Tensors to concatenate. Their ranks and types must match, -// and their sizes must match in all dimensions except `concat_dim`. -// axis: 0-D. The dimension along which to concatenate. Must be in the -// range [-rank(values), rank(values)). -// -// Returns A `Tensor` with the concatenation of values stacked along the -// `concat_dim` dimension. This tensor's shape matches that of `values` except -// in `concat_dim` where it has the sum of the sizes. -func ConcatV2(scope *Scope, values []tf.Output, axis tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ConcatV2", - Input: []tf.Input{ - tf.OutputList(values), axis, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that contains the elements of `input_dataset` ignoring errors. -func IgnoreErrorsDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "IgnoreErrorsDataset", - Input: []tf.Input{ - input_dataset, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Creates a dataset that concatenates `input_dataset` with `another_dataset`. func ConcatenateDataset(scope *Scope, input_dataset tf.Output, another_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { @@ -22477,39 +22311,6 @@ func QuantizedBiasAdd(scope *Scope, input tf.Output, bias tf.Output, min_input t return op.Output(0), op.Output(1), op.Output(2) } -// Creates summary database writer accessible by given resource handle. -// -// This can be used to write tensors from the execution graph directly -// to a database. Only SQLite is supported right now. This function -// will create the schema if it doesn't exist. Entries in the Users, -// Experiments, and Runs tables will be created automatically if they -// don't already exist. -// -// Arguments: -// writer: Handle to SummaryWriter resource to overwrite. -// db_uri: For example "file:/tmp/foo.sqlite". -// experiment_name: Can't contain ASCII control characters or <>. Case -// sensitive. If empty, then the Run will not be associated with any -// Experiment. -// run_name: Can't contain ASCII control characters or <>. Case sensitive. -// If empty, then each Tag will not be associated with any Run. -// user_name: Must be valid as both a DNS label and Linux username. If -// empty, then the Experiment will not be associated with any User. -// -// Returns the created operation. -func CreateSummaryDbWriter(scope *Scope, writer tf.Output, db_uri tf.Output, experiment_name tf.Output, run_name tf.Output, user_name tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "CreateSummaryDbWriter", - Input: []tf.Input{ - writer, db_uri, experiment_name, run_name, user_name, - }, - } - return scope.AddOperation(opspec) -} - // HistogramFixedWidthAttr is an optional argument to HistogramFixedWidth. type HistogramFixedWidthAttr func(optionalAttr) diff --git a/tensorflow/go/operation_test.go b/tensorflow/go/operation_test.go index 7cba043af2..40c951ab8c 100644 --- a/tensorflow/go/operation_test.go +++ b/tensorflow/go/operation_test.go @@ -123,6 +123,14 @@ func TestOutputDataTypeAndShape(t *testing.T) { []int64{2, 3}, Double, }, + { // Matrix of Uint64 + [][]uint64{ + {1, 2, 3}, + {4, 5, 6}, + }, + []int64{2, 3}, + Uint64, + }, } for idx, test := range testdata { t.Run(fmt.Sprintf("#%d Value %T", idx, test.Value), func(t *testing.T) { diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go index 36a74c0081..1326a95278 100644 --- a/tensorflow/go/tensor.go +++ b/tensorflow/go/tensor.go @@ -101,7 +101,7 @@ func NewTensor(value interface{}) (*Tensor, error) { return nil, bug("NewTensor incorrectly calculated the size of a tensor with type %v and shape %v as %v bytes instead of %v", dataType, shape, nbytes, buf.Len()) } } else { - e := stringEncoder{offsets: buf, data: raw[nflattened*8 : len(raw)], status: newStatus()} + e := stringEncoder{offsets: buf, data: raw[nflattened*8:], status: newStatus()} if err := e.encode(reflect.ValueOf(value), shape); err != nil { return nil, err } @@ -207,6 +207,9 @@ func (t *Tensor) WriteContentsTo(w io.Writer) (int64, error) { func tensorData(c *C.TF_Tensor) []byte { // See: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices cbytes := C.TF_TensorData(c) + if cbytes == nil { + return nil + } length := int(C.TF_TensorByteSize(c)) slice := (*[1 << 30]byte)(unsafe.Pointer(cbytes))[:length:length] return slice @@ -310,7 +313,7 @@ func encodeTensor(w *bytes.Buffer, v reflect.Value, shape []int64) error { if err := w.WriteByte(b); err != nil { return err } - case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: + case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: if err := binary.Write(w, nativeEndian, v.Interface()); err != nil { return err } @@ -349,7 +352,7 @@ func decodeTensor(r *bytes.Reader, shape []int64, typ reflect.Type, ptr reflect. return err } ptr.Elem().SetBool(b == 1) - case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: + case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: if err := binary.Read(r, nativeEndian, ptr.Interface()); err != nil { return err } diff --git a/tensorflow/go/tensor_test.go b/tensorflow/go/tensor_test.go index 35bd2fd9a5..674a8ce86f 100644 --- a/tensorflow/go/tensor_test.go +++ b/tensorflow/go/tensor_test.go @@ -34,11 +34,15 @@ func TestNewTensor(t *testing.T) { {nil, int64(5)}, {nil, uint8(5)}, {nil, uint16(5)}, + {nil, uint32(5)}, + {nil, uint64(5)}, {nil, float32(5)}, {nil, float64(5)}, {nil, complex(float32(5), float32(6))}, {nil, complex(float64(5), float64(6))}, {nil, "a string"}, + {[]int64{1}, []uint32{1}}, + {[]int64{1}, []uint64{1}}, {[]int64{2}, []bool{true, false}}, {[]int64{1}, []float64{1}}, {[]int64{1}, [1]float64{1}}, @@ -71,11 +75,6 @@ func TestNewTensor(t *testing.T) { // native ints not supported int(5), []int{5}, - // uint32 and uint64 are not supported in TensorFlow - uint32(5), - []uint32{5}, - uint64(5), - []uint64{5}, // Mismatched dimensions [][]float32{{1, 2, 3}, {4}}, // Mismatched dimensions. Should return "mismatched slice lengths" error instead of "BUG" diff --git a/tensorflow/java/src/main/java/org/tensorflow/NativeLibrary.java b/tensorflow/java/src/main/java/org/tensorflow/NativeLibrary.java index 499757e8cf..2b431eebf5 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/NativeLibrary.java +++ b/tensorflow/java/src/main/java/org/tensorflow/NativeLibrary.java @@ -43,6 +43,7 @@ final class NativeLibrary { private static final boolean DEBUG = System.getProperty("org.tensorflow.NativeLibrary.DEBUG") != null; private static final String JNI_LIBNAME = "tensorflow_jni"; + private static final String FRAMEWORK_LIBNAME = "tensorflow_framework"; public static void load() { if (isLoaded() || tryLoadLibrary()) { @@ -58,15 +59,12 @@ final class NativeLibrary { } // Native code is not present, perhaps it has been packaged into the .jar file containing this. // Extract the JNI library itself - final String jniLibName = System.mapLibraryName(JNI_LIBNAME); - final String jniResourceName = makeResourceName(jniLibName); + final String jniResourceName = makeResourceName(JNI_LIBNAME); log("jniResourceName: " + jniResourceName); final InputStream jniResource = NativeLibrary.class.getClassLoader().getResourceAsStream(jniResourceName); // Extract the JNI's dependency - final String frameworkLibName = - maybeAdjustForMacOS(System.mapLibraryName("tensorflow_framework")); - final String frameworkResourceName = makeResourceName(frameworkLibName); + final String frameworkResourceName = makeResourceName(FRAMEWORK_LIBNAME); log("frameworkResourceName: " + frameworkResourceName); final InputStream frameworkResource = NativeLibrary.class.getClassLoader().getResourceAsStream(frameworkResourceName); @@ -90,15 +88,12 @@ final class NativeLibrary { tempPath.deleteOnExit(); final String tempDirectory = tempPath.toString(); if (frameworkResource != null) { - extractResource(frameworkResource, frameworkLibName, tempDirectory); + extractResource(frameworkResource, FRAMEWORK_LIBNAME, tempDirectory); } else { - log( - frameworkResourceName - + " not found. This is fine assuming " - + jniResourceName - + " is not built to depend on it."); + log(frameworkResourceName + " not found. This is fine assuming " + jniResourceName + + " is not built to depend on it."); } - System.load(extractResource(jniResource, jniLibName, tempDirectory)); + System.load(extractResource(jniResource, JNI_LIBNAME, tempDirectory)); } catch (IOException e) { throw new UnsatisfiedLinkError( String.format( @@ -126,27 +121,9 @@ final class NativeLibrary { } } - private static String maybeAdjustForMacOS(String libFilename) { - if (!System.getProperty("os.name").contains("OS X")) { - return libFilename; - } - // This is macOS, and the TensorFlow release process might have setup dependencies on - // libtensorflow_framework.so instead of libtensorflow_framework.dylib. Adjust for that. - final ClassLoader cl = NativeLibrary.class.getClassLoader(); - if (cl.getResource(makeResourceName(libFilename)) != null) { - return libFilename; - } - // liftensorflow_framework.dylib not found, try libtensorflow_framework.so - final String suffix = ".dylib"; - if (!libFilename.endsWith(suffix)) { - return libFilename; - } - return libFilename.substring(0, libFilename.length() - suffix.length()) + ".so"; - } - private static String extractResource( InputStream resource, String resourceName, String extractToDirectory) throws IOException { - final File dst = new File(extractToDirectory, resourceName); + final File dst = new File(extractToDirectory, System.mapLibraryName(resourceName)); dst.deleteOnExit(); final String dstPath = dst.toString(); log("extracting native library to: " + dstPath); @@ -180,7 +157,9 @@ final class NativeLibrary { } private static String makeResourceName(String baseName) { - return "org/tensorflow/native/" + String.format("%s-%s/", os(), architecture()) + baseName; + return "org/tensorflow/native/" + + String.format("%s-%s/", os(), architecture()) + + System.mapLibraryName(baseName); } private static long copy(InputStream src, File dstFile) throws IOException { diff --git a/tensorflow/java/src/main/java/org/tensorflow/Shape.java b/tensorflow/java/src/main/java/org/tensorflow/Shape.java index 9aa92be111..d533c3d480 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/Shape.java +++ b/tensorflow/java/src/main/java/org/tensorflow/Shape.java @@ -77,6 +77,24 @@ public final class Shape { return shape[i]; } + @Override + public int hashCode() { + return Arrays.hashCode(shape); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + + if (obj instanceof Shape && Arrays.equals(this.shape, ((Shape) obj).shape)) { + return !hasUnknownDimension(); + } + + return super.equals(obj); + } + /** Succinct description of the shape meant for debugging. */ @Override public String toString() { @@ -98,4 +116,18 @@ public final class Shape { } private long[] shape; + + private boolean hasUnknownDimension() { + if (shape == null) { + return true; + } + + for (long dimension : shape) { + if (dimension == -1) { + return true; + } + } + + return false; + } } diff --git a/tensorflow/java/src/main/java/org/tensorflow/types/TFBool.java b/tensorflow/java/src/main/java/org/tensorflow/types/TFBool.java deleted file mode 100644 index ab34f6aa12..0000000000 --- a/tensorflow/java/src/main/java/org/tensorflow/types/TFBool.java +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -// GENERATED FILE. To update, edit tftypes.pl instead. - -package org.tensorflow.types; - -import org.tensorflow.DataType; - -/** Represents a boolean. */ -public class TFBool implements TFType { - private TFBool() {} - static { - Types.typeCodes.put(TFBool.class, DataType.BOOL); - } - static { - Types.scalars.put(TFBool.class, false); - } -} diff --git a/tensorflow/java/src/main/java/org/tensorflow/types/TFDouble.java b/tensorflow/java/src/main/java/org/tensorflow/types/TFDouble.java deleted file mode 100644 index 49e5d9f2f3..0000000000 --- a/tensorflow/java/src/main/java/org/tensorflow/types/TFDouble.java +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -// GENERATED FILE. To update, edit tftypes.pl instead. - -package org.tensorflow.types; - -import org.tensorflow.DataType; - -/** Represents a 64-bit double precision floating point number. */ -public class TFDouble implements TFType { - private TFDouble() {} - static { - Types.typeCodes.put(TFDouble.class, DataType.DOUBLE); - } - static { - Types.scalars.put(TFDouble.class, 0.0); - } -} diff --git a/tensorflow/java/src/main/java/org/tensorflow/types/TFFloat.java b/tensorflow/java/src/main/java/org/tensorflow/types/TFFloat.java deleted file mode 100644 index 8426ee41f0..0000000000 --- a/tensorflow/java/src/main/java/org/tensorflow/types/TFFloat.java +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -// GENERATED FILE. To update, edit tftypes.pl instead. - -package org.tensorflow.types; - -import org.tensorflow.DataType; - -/** Represents a 32-bit single precision floating point number. */ -public class TFFloat implements TFType { - private TFFloat() {} - static { - Types.typeCodes.put(TFFloat.class, DataType.FLOAT); - } - static { - Types.scalars.put(TFFloat.class, 0f); - } -} diff --git a/tensorflow/java/src/main/java/org/tensorflow/types/TFInt32.java b/tensorflow/java/src/main/java/org/tensorflow/types/TFInt32.java deleted file mode 100644 index 3947b6ad09..0000000000 --- a/tensorflow/java/src/main/java/org/tensorflow/types/TFInt32.java +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -// GENERATED FILE. To update, edit tftypes.pl instead. - -package org.tensorflow.types; - -import org.tensorflow.DataType; - -/** Represents a 32-bit signed integer. */ -public class TFInt32 implements TFType { - private TFInt32() {} - static { - Types.typeCodes.put(TFInt32.class, DataType.INT32); - } - static { - Types.scalars.put(TFInt32.class, 0); - } -} diff --git a/tensorflow/java/src/main/java/org/tensorflow/types/TFInt64.java b/tensorflow/java/src/main/java/org/tensorflow/types/TFInt64.java deleted file mode 100644 index ccdded8693..0000000000 --- a/tensorflow/java/src/main/java/org/tensorflow/types/TFInt64.java +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -// GENERATED FILE. To update, edit tftypes.pl instead. - -package org.tensorflow.types; - -import org.tensorflow.DataType; - -/** Represents a 64-bit signed integer. */ -public class TFInt64 implements TFType { - private TFInt64() {} - static { - Types.typeCodes.put(TFInt64.class, DataType.INT64); - } - static { - Types.scalars.put(TFInt64.class, 0L); - } -} diff --git a/tensorflow/java/src/main/java/org/tensorflow/types/TFString.java b/tensorflow/java/src/main/java/org/tensorflow/types/TFString.java deleted file mode 100644 index e7327e8c57..0000000000 --- a/tensorflow/java/src/main/java/org/tensorflow/types/TFString.java +++ /dev/null @@ -1,27 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -// GENERATED FILE. To update, edit tftypes.pl instead. - -package org.tensorflow.types; - -import org.tensorflow.DataType; - -/** Represents an arbitrary sequence of bytes. */ -public class TFString implements TFType { - private TFString() {} - static { - Types.typeCodes.put(TFString.class, DataType.STRING); - } -} diff --git a/tensorflow/java/src/main/java/org/tensorflow/types/TFType.java b/tensorflow/java/src/main/java/org/tensorflow/types/TFType.java deleted file mode 100644 index 562953ac9d..0000000000 --- a/tensorflow/java/src/main/java/org/tensorflow/types/TFType.java +++ /dev/null @@ -1,20 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -package org.tensorflow.types; - -/** - * A marker interface for classes representing TensorFlow types. - */ -public interface TFType {} diff --git a/tensorflow/java/src/main/java/org/tensorflow/types/TFUInt8.java b/tensorflow/java/src/main/java/org/tensorflow/types/TFUInt8.java deleted file mode 100644 index d7305ca5a8..0000000000 --- a/tensorflow/java/src/main/java/org/tensorflow/types/TFUInt8.java +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -// GENERATED FILE. To update, edit tftypes.pl instead. - -package org.tensorflow.types; - -import org.tensorflow.DataType; - -/** Represents an 8-bit unsigned integer. */ -public class TFUInt8 implements TFType { - private TFUInt8() {} - static { - Types.typeCodes.put(TFUInt8.class, DataType.UINT8); - } - static { - Types.scalars.put(TFUInt8.class, (byte)0); - } -} diff --git a/tensorflow/java/src/main/java/org/tensorflow/types/Types.java b/tensorflow/java/src/main/java/org/tensorflow/types/Types.java deleted file mode 100644 index 976cd9fd34..0000000000 --- a/tensorflow/java/src/main/java/org/tensorflow/types/Types.java +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -package org.tensorflow.types; - -import java.util.HashMap; -import java.util.Map; -import org.tensorflow.DataType; - -/** - * Utility class for managing the representation of TensorFlow types as Java - * types. For each TensorFlow type (e.g., int32), there is a corresponding Java - * type (e.g., TFInt32) that represents it at compile time and a corresponding - * class object (e.g., TFInt32.class) that represents it at run time. There is - * also an enumeration value in DataType that can be used to represent the - * type, though that should rarely be required. - */ -public class Types { - - private Types() {} // not instantiable - - static final Map, DataType> typeCodes = new HashMap<>(); - - /** Returns the DataType value corresponding to a TensorFlow type class. */ - public static DataType dataType(Class c) { - DataType dtype = typeCodes.get(c); - if (dtype == null) { - throw new IllegalArgumentException("" + c + " is not a TensorFlow type."); - } - return dtype; - } - - static final Map, Object> scalars = new HashMap<>(); - - /** Returns the zero value of type described by {@code c}, or null if - * the type (e.g., string) is not numeric and therefore has no zero value. - */ - public static Object zeroValue(Class c) { - return scalars.get(c); - } -} diff --git a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java index 3b027700c5..92cc3bd60e 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java @@ -16,6 +16,7 @@ limitations under the License. package org.tensorflow; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; import org.junit.Test; import org.junit.runner.RunWith; @@ -77,4 +78,29 @@ public class ShapeTest { assertEquals(5, n.shape().size(1)); } } + + @Test + public void equalsWorksCorrectly() { + assertEquals(Shape.scalar(), Shape.scalar()); + assertEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 3)); + + assertNotEquals(Shape.make(1,2), null); + assertNotEquals(Shape.make(1,2), new Object()); + assertNotEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 4)); + + + assertNotEquals(Shape.unknown(), Shape.unknown()); + assertNotEquals(Shape.make(-1), Shape.make(-1)); + assertNotEquals(Shape.make(1, -1, 3), Shape.make(1, -1, 3)); + } + + @Test + public void hashCodeIsAsExpected() { + assertEquals(Shape.make(1, 2, 3, 4).hashCode(), Shape.make(1, 2, 3, 4).hashCode()); + assertEquals(Shape.scalar().hashCode(), Shape.scalar().hashCode()); + assertEquals(Shape.unknown().hashCode(), Shape.unknown().hashCode()); + + assertNotEquals(Shape.make(1, 2).hashCode(), Shape.make(1, 3).hashCode()); + } } + diff --git a/tensorflow/python/client/session_clusterspec_prop_test.py b/tensorflow/python/client/session_clusterspec_prop_test.py index 28a4dd27a7..b77912b4f7 100644 --- a/tensorflow/python/client/session_clusterspec_prop_test.py +++ b/tensorflow/python/client/session_clusterspec_prop_test.py @@ -169,7 +169,7 @@ class SessionClusterSpecPropagationTest(test_util.TensorFlowTestCase): # BaseRemoteRendezvous::SameWorkerRecvDone that means the test doesn't # actually capture the motivating bug unless run on a GPU machine. # - # Example error message (before bugfix -- line breaks added because lint): + # Example error message (before bugfix -- linebreaks added because lint): # # W0718 17:14:41.521534 190121 device_mgr.cc:107] Unknown device: # /job:worker/replica:0/task:0/device:CPU:0 all devices: diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i index 40731aba7d..f45bc13602 100644 --- a/tensorflow/python/client/tf_session.i +++ b/tensorflow/python/client/tf_session.i @@ -344,6 +344,16 @@ bool PyTensorListToVector(PyObject* py_tensor_list, %rename("_TF_SetConfig") TF_SetConfig; %rename("_TF_NewSessionOptions") TF_NewSessionOptions; +// Create temporary int64_t to pass to TF_OperationGetAttrInt +%typemap(in, numinputs=0) int64_t* value (int64_t val) { + $1 = &val; +} + +// Convert value to Python int +%typemap(argout) int64_t* value { + $result = PyInt_FromLong(*$1); +} + %include "tensorflow/c/c_api.h" %include "tensorflow/c/python_api.h" diff --git a/tensorflow/python/client/timeline.py b/tensorflow/python/client/timeline.py index 1e96ac5ed4..f3ba4244ce 100644 --- a/tensorflow/python/client/timeline.py +++ b/tensorflow/python/client/timeline.py @@ -275,7 +275,7 @@ class _TensorTracker(object): name: The name of the Tensor as a string. object_id: Chrome Trace object identifier assigned for this Tensor. timestamp: The creation timestamp of this event as a long integer. - pid: Process identifier of the associated device, as an integer. + pid: Process identifier of the assicaiated device, as an integer. allocator: Name of the allocator used to create the Tensor. num_bytes: Number of bytes allocated (long integer). diff --git a/tensorflow/python/debug/wrappers/dumping_wrapper_test.py b/tensorflow/python/debug/wrappers/dumping_wrapper_test.py index acea9433e2..d987ba84b5 100644 --- a/tensorflow/python/debug/wrappers/dumping_wrapper_test.py +++ b/tensorflow/python/debug/wrappers/dumping_wrapper_test.py @@ -111,20 +111,6 @@ class DumpingDebugWrapperSessionTest(test_util.TensorFlowTestCase): self.assertEqual(repr(self.inc_v), dump.run_fetches_info) self.assertEqual(repr(None), dump.run_feed_keys_info) - def testDumpingOnASingleRunWorksWithRelativePathForDebugDumpDir(self): - sess = dumping_wrapper.DumpingDebugWrapperSession( - self.sess, session_root=self.session_root, log_usage=False) - sess.run(self.inc_v) - dump_dirs = glob.glob(os.path.join(self.session_root, "run_*")) - cwd = os.getcwd() - try: - os.chdir(self.session_root) - dump = debug_data.DebugDumpDir( - os.path.relpath(dump_dirs[0], self.session_root)) - self.assertAllClose([10.0], dump.get_tensors("v", 0, "DebugIdentity")) - finally: - os.chdir(cwd) - def testDumpingOnASingleRunWithFeedDictWorks(self): sess = dumping_wrapper.DumpingDebugWrapperSession( self.sess, session_root=self.session_root, log_usage=False) @@ -364,14 +350,12 @@ class DumpingDebugWrapperSessionTest(test_util.TensorFlowTestCase): thread_name_filter=r"MainThread$") self.assertAllClose(1.0, sess.run(self.delta)) - child_thread_result = [] def child_thread_job(): - child_thread_result.append(sess.run(self.eta)) + sess.run(sess.run(self.eta)) thread = threading.Thread(name="ChildThread", target=child_thread_job) thread.start() thread.join() - self.assertAllClose([-1.4], child_thread_result) dump_dirs = glob.glob(os.path.join(self.session_root, "run_*")) self.assertEqual(1, len(dump_dirs)) diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index c36647b21c..bcd1e1d0dc 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -14,16 +14,11 @@ cc_library( "pywrap_tensor.cc", "pywrap_tfe_src.cc", ], - hdrs = [ - "pywrap_tensor.h", - "pywrap_tfe.h", - ], + hdrs = ["pywrap_tfe.h"], visibility = ["//tensorflow:internal"], deps = [ "//tensorflow/c:c_api", - "//tensorflow/c:c_api_internal", "//tensorflow/c/eager:c_api", - "//tensorflow/c/eager:c_api_internal", "//tensorflow/c/eager:tape", "//tensorflow/core:lib", "//tensorflow/python:ndarray_tensor", diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 0a92ab38a8..86b3776b8c 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -727,24 +727,12 @@ def _num_elements(grad): raise ValueError("`grad` not a Tensor or IndexedSlices.") -_last_shape_dtype = [None, None] -_last_zero = [None] - - -def _zeros(shape, dtype): - """Wraps array_ops.zeros to cache last zero for a given shape and dtype.""" - if [shape, dtype] != _last_shape_dtype: - _last_shape_dtype[:] = [shape, dtype] - _last_zero[0] = array_ops.zeros(shape, dtype) - return _last_zero[0] - - _default_vspace = imperative_grad.VSpace( num_elements_fn=_num_elements, aggregate_fn=_aggregate_grads, tensor_id=ops.tensor_id, - zeros=_zeros, - ones=array_ops.ones) + zeros=array_ops.zeros, + ones_like=lambda x: ops.convert_to_tensor(array_ops.ones_like(x))) class GradientTape(object): diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index ec9a185b73..ed54b8e12e 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -24,11 +24,11 @@ from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.eager import custom_gradient +from tensorflow.python.eager import imperative_grad from tensorflow.python.eager import tape from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops @@ -41,6 +41,7 @@ from tensorflow.python.ops import random_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables from tensorflow.python.training import training +from tensorflow.python.util import compat class BackpropTest(test.TestCase): @@ -102,18 +103,6 @@ class BackpropTest(test.TestCase): grad_fn = backprop.gradients_function(f) self.assertAllEqual(2., grad_fn(1., dy=2.)[0]) - def testErrors(self): - - @custom_gradient.custom_gradient - def f(x): - def grad(_): - raise RuntimeError('x') - return x, grad - - # TODO(apassos) raise the right error here - with self.assertRaises(errors_impl.InternalError): - backprop.gradients_function(f)(constant_op.constant(1.0)) - def testImplicitGradOverEmbeddingLookup(self): batch_size = 8 embedding_size = 512 @@ -494,6 +483,48 @@ class BackpropTest(test.TestCase): initial_value=1., name='testSameObjectForMultipleArguments.Variable') self.assertAllEqual([1., 1.], np_g(v, v)) + def testEarlyGradAggregation(self): + # Needs to be a list so mutations by the callback affect this function. + add_n = [] + def callback(op_type, unused_1, unused_2, unused_3, unused_4): + if compat.as_bytes(op_type) == compat.as_bytes('AddN'): + add_n.append(1) + context.context().add_post_execution_callback(callback) + + v = resource_variable_ops.ResourceVariable(constant_op.constant(2.0), + name='v') + def fn(): + outputs = [] + for _ in range(20): + outputs.append(v * constant_op.constant(2.0)) + return math_ops.add_n(outputs) + + # By default the aggregation count is 2. + _ = backprop.implicit_grad(fn)()[0][1] + self.assertEqual(len(add_n), 2) + del add_n[:] + + # Reduce the aggregation limit, cause the backprop to do some + # early aggregation. + # pylint: disable=protected-access + old_cnt = imperative_grad._MIN_AGGREGATE_COUNT + old_bytes = imperative_grad._MIN_AGGREGATE_BYTES + imperative_grad._MIN_AGGREGATE_COUNT = 10 + imperative_grad._MIN_AGGREGATE_BYTES = 1 + _ = backprop.implicit_grad(fn)() + self.assertEqual(len(add_n), 6) + del add_n[:] + + # Aggregation is also limited by the memory. + imperative_grad._MIN_AGGREGATE_BYTES = 10000 + _ = backprop.implicit_grad(fn)() + self.assertEqual(len(add_n), 2) + + imperative_grad._MIN_AGGREGATE_COUNT = old_cnt + imperative_grad._MIN_AGGREGATE_BYTES = old_bytes + # pylint: enable=protected-access + context.context().clear_post_execution_callbacks() + def testImplicitGradientsCustomGradientAndCachedVariableValue(self): @custom_gradient.custom_gradient diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index b555f16f1d..26a70a617d 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -66,8 +66,7 @@ class MicroBenchmarks(test.Benchmark): func() end = time.time() mean_us = (end - start) * 1e6 / num_iters - self.report_benchmark(iters=num_iters, wall_time=mean_us, - extras={"examples_per_sec": num_iters/(end-start)}) + self.report_benchmark(iters=num_iters, wall_time=mean_us) def benchmark_create_np_array(self): func = lambda: np.array([3.0]) diff --git a/tensorflow/python/eager/execute.py b/tensorflow/python/eager/execute.py index c6457232e9..983c1ea73e 100644 --- a/tensorflow/python/eager/execute.py +++ b/tensorflow/python/eager/execute.py @@ -47,7 +47,8 @@ def execute(op_name, num_outputs, inputs, attrs, ctx, name=None): name: Customized name for the operation. Returns: - List of output Tensor objects. The list is empty if there are no outputs + None if there are no outputs, a single Tensor object if there is one output + and a list of Tensor objects if there are multiple outputs. Raises: An exception on error. diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 209715894e..243efccac4 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -62,21 +62,13 @@ class FunctionTest(test.TestCase): @function.defun def step(): def inner(): + tape.watch_variable(v) return v * v return backprop.implicit_grad(inner)()[0][0] self.assertAllEqual(step(), 2.0) - def testDefunDifferentiable(self): - v = resource_variable_ops.ResourceVariable(1.0) - - @function.defun - def f(): - return v * v - - self.assertAllEqual(backprop.implicit_grad(f)()[0][0], 2.0) - def testGraphModeCaptureVariable(self): with context.graph_mode(), self.test_session() as sess: diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py index ce51d17cfc..a7f1061d18 100644 --- a/tensorflow/python/eager/graph_callable.py +++ b/tensorflow/python/eager/graph_callable.py @@ -247,9 +247,7 @@ def _get_graph_callable_inputs(shape_and_dtypes): ret.append(_get_graph_callable_inputs(x)) else: raise errors.InvalidArgumentError( - None, None, "Expected the argument to @graph_callable to be a " - "(possibly nested) list or tuple of ShapeAndDtype objects, " - "but got an object of type: %s" % type(x)) + None, None, "shape_and_dtypes not ShapeAndDtype, type: %s " % type(x)) return tuple(ret) if isinstance(shape_and_dtypes, tuple) else ret @@ -269,7 +267,7 @@ def _graph_callable_internal(func, shape_and_dtypes): Args: func: The tfe Python function to compile. - shape_and_dtypes: A possibly nested list or tuple of ShapeAndDtype objects. + shape_and_dtypes: A list of type ShapeAndDtype. Raises: ValueError: If any one of func's outputs is not a Tensor. @@ -432,10 +430,9 @@ def graph_callable(shape_and_dtypes): ret = foo(tfe.Tensor(2.0)) # `ret` here now is a Tensor with value 9.0. ``` Args: - shape_and_dtypes: A possibly nested list or tuple of ShapeAndDtype objects - that specifies shape and type information for each of the callable's - arguments. The length of this list must be equal to the number of - arguments accepted by the wrapped function. + shape_and_dtypes: A list of type ShapeAndDtype that specifies shape and type + information for each of the callable's arguments. The length of this list + must be equal to the number of arguments accepted by the wrapped function. Returns: A callable graph object. diff --git a/tensorflow/python/eager/imperative_grad.py b/tensorflow/python/eager/imperative_grad.py index 837cad974a..c87719f84a 100644 --- a/tensorflow/python/eager/imperative_grad.py +++ b/tensorflow/python/eager/imperative_grad.py @@ -20,13 +20,114 @@ from __future__ import print_function import collections -from tensorflow.python import pywrap_tensorflow -from tensorflow.python.framework import errors +from tensorflow.python.eager import tape as tape_module + + +# Terminology: +# +# - op: a possibly composite operation, which has an entry in the tape +# - target: dy in dx/dy +# - source: dx in dx/dy +# - tensor: one of the many inputs or outputs of an operation +# +# Below here we do the gradient algorithm. It works as follows: +# +# First we filter the tape to just the subset of operations we want to +# differentiate. In the process of doing so we count how many times each Tensor +# is used as an input to an op (so we know when we're done computing gradients +# for that Tensor). We also count, for each tape entry, how many of its output +# Tensors need gradients to be computed (Tensors which are not used do not need +# any gradients to be computed). +# +# Finally, we start a backprop stack with a set of tape entries for which we +# have all gradients available. This set usually is a subset of the set of +# targets (not all since targets which have outputs in the tape will not have +# gradients available initially). +# +# Then we repeatedly pop an entry from the stack, run its backprop, and update +# the gradients of its inputs. Once we have computed all gradients for a single +# input we can mark this input as done, and this can trigger adding an entry to +# the stack if all outputs of that entry are now done. +# +# When the stack is empty we have gradients for all tensors we're interested in. +def _prepare_backprop(vspace, target, tensor_to_op, op_to_entry, id_sources): + """Filters the tape to only include relevant entries and counts tensor usages. + + Args: + vspace: information about the space we're differentiating in. + target: the target to optimize. + tensor_to_op: Map from tensor id to key in op_to_entry that produced it. + op_to_entry: Map from op id to a tape.TapeEntry object + id_sources: the ids of the sources wrt the gradient is being taken. + + Returns: + usage counts (how many entries downstream from a tensor use it) + op_to_entry_map: entry map (a filtered tape, with only the relevant + entries), + missing: map from tensor id to how many downstream gradients still need + to be computed before this tensor's gradient can be computed. + """ + tensor_stack = [vspace.tensor_id(x) for x in target] + tensor_usage_counts = {} + o_to_e = {} # Copy of just the bits we need from op_to_entry + while tensor_stack: + t = tensor_stack.pop() + op = tensor_to_op.get(t, None) + # op is None or -1 if the tensor is a source (i.e. was watched directly) + if op is None or op == -1 or op in o_to_e: + continue + op_trace = tape_module.TapeEntry(*op_to_entry[op]) + o_to_e[op] = op_trace + for it in op_trace.input_ids: + if it in tensor_usage_counts: + tensor_usage_counts[it] += 1 + else: + tensor_usage_counts[it] = 1 + if it not in id_sources and it in tensor_to_op: + tensor_stack.append(it) + op_missing_tensor_counts = collections.defaultdict(int) + for t in tensor_usage_counts: + if t in tensor_to_op and tensor_to_op[t] is not None: + op_missing_tensor_counts[tensor_to_op[t]] += 1 + return tensor_usage_counts, o_to_e, op_missing_tensor_counts + + +def _initialize_backprop_stack(op_to_entry, op_missing_tensor): + """Returns the set of tape entries which are available for backprop.""" + ready_ops = [] + for op in op_to_entry: + if op not in op_missing_tensor: + ready_ops.append(op) + return ready_ops + + +def _initial_gradients(vspace, target, output_gradients, tensor_usage_counts): + """Computes the initial gradients for each Tensor.""" + # Initialize the backprop stack + gradients = collections.defaultdict(list) + for i, t in enumerate(target): + if vspace.tensor_id(t) in tensor_usage_counts: + # Can't provide a gradient of something we're trying to differentiate + assert output_gradients is None or output_gradients[i] is None + else: + if output_gradients is None or output_gradients[i] is None: + out_grad = vspace.ones_like(t) + else: + out_grad = output_gradients[i] + gradients[vspace.tensor_id(t)].append(out_grad) + return gradients VSpace = collections.namedtuple( "VSpace", - ["aggregate_fn", "num_elements_fn", "tensor_id", "zeros", "ones"]) + ["aggregate_fn", "num_elements_fn", "tensor_id", "zeros", "ones_like"]) + + +# If over MIN_AGGREGATE_COUNT gradients are accumulated and the total +# memory consumption is over MIN_AGGREGATE_BYTES, do an early aggregation +# so as to release the gradient tensor to save memory. +_MIN_AGGREGATE_COUNT = 4 +_MIN_AGGREGATE_BYTES = 128 * 1024 * 1024 def imperative_grad( @@ -60,6 +161,89 @@ def imperative_grad( or if only non-differentiable functions of the source were used in the computation of target. """ - with errors.raise_exception_on_not_ok_status() as status: - return pywrap_tensorflow.TFE_Py_TapeGradient( - tape._tape, vspace, target, sources, output_gradients, status) # pylint: disable=protected-access + tensor_to_op, op_to_entry = tape.export() + # This overwrites the op_to_entry variable, which will release all memory used + # to keep traces that are irrelevant to the gradient computation we're doing + # here. + id_sources = [vspace.tensor_id(t) for t in sources] + tensor_usage_counts, op_to_entry, op_missing_tensor = _prepare_backprop( + vspace, target, tensor_to_op, op_to_entry, id_sources) + ready_ops = _initialize_backprop_stack(op_to_entry, op_missing_tensor) + gradients = _initial_gradients(vspace, target, output_gradients, + tensor_usage_counts) + gradients_size = dict() + # Now exhaust the backprop stack + while ready_ops: + op = ready_ops.pop() + op_trace = op_to_entry.pop(op) + out_gradients = [gradients.pop(t, None) for t in op_trace.output_ids] + + # Cache the last used zero tensor. We reuse it if the next one + # we need is of the same shape and dtype. This is very helpful in + # large splits and should have negligible overhead in other cases. + last_shape_and_dtype = None + last_zeros = None + for i in range(len(out_gradients)): + if out_gradients[i] is None: + # TODO(apassos) this should be in the right device + none_indices = _grad_fn_accepts_none_for_indices.get( + op_trace.op_type, None) + if none_indices is None or i not in none_indices: + shape_and_dtype = op_trace.output_shape_and_dtype[i] + if shape_and_dtype != last_shape_and_dtype: + last_shape_and_dtype = shape_and_dtype + last_zeros = vspace.zeros(*shape_and_dtype) + out_gradients[i] = last_zeros + else: + out_gradients[i] = vspace.aggregate_fn(out_gradients[i]) + + in_gradients = op_trace.backward_function(*(out_gradients)) + for i, t in enumerate(op_trace.input_ids): + if in_gradients[i] is not None: + t_grads = gradients.setdefault(t, []) + t_grads.append(in_gradients[i]) + if len(t_grads) >= _MIN_AGGREGATE_COUNT: + if t not in gradients_size: + gradients_size[t] = vspace.num_elements_fn(t_grads[-1]) + size = gradients_size[t] + + if len(t_grads) * size * 4 > _MIN_AGGREGATE_BYTES: + t_grads[:] = [vspace.aggregate_fn(t_grads)] + if tensor_usage_counts.get(t, 0) > 0: + tensor_usage_counts[t] -= 1 + if (t in tensor_to_op + and tensor_usage_counts[t] == 0 + and t not in id_sources): + in_op = tensor_to_op[t] + if in_op is None or in_op == -1: + continue + if op_missing_tensor.get(in_op, 0) > 0: + op_missing_tensor[in_op] -= 1 + if op_missing_tensor.get(in_op, 0) == 0: + ready_ops.append(in_op) + result = [] + for i, s in enumerate(sources): + g = gradients.get(vspace.tensor_id(s), None) + if g is None: + result.append(None) + else: + result.append(vspace.aggregate_fn(g)) + return result + + +# TODO(agarwal): use an automatic mechanism for handling None arguments to +# gradient functions. +# Some gradient functions can accept None arguments for gradients. The following +# maps the operation name to the indices at which the corresponding gradient +# function can accept None values. +# e.g. FusedBatchNorm outputs 5 values and hence receives 5 gradient values +# during backprop. However the gradient function uses only the first of those +# values and ignores the rest. The entry, "FusedBatchNorm": [1, 2, 3, 4], +# indicates that only the gradient corresponding to index 0 is used, and the +# gradient values at indices 1-4 are ignored (and hence can be None). The +# backprop algorithm can then leverage this by not constructing zeros to +# pass for those indices. +_grad_fn_accepts_none_for_indices = { + "SoftmaxCrossEntropyWithLogits": [1], + "FusedBatchNorm": [1, 2, 3, 4] +} diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc index 653f3ef84e..ca283862f9 100644 --- a/tensorflow/python/eager/pywrap_tensor.cc +++ b/tensorflow/python/eager/pywrap_tensor.cc @@ -20,7 +20,6 @@ limitations under the License. #include "tensorflow/python/lib/core/py_seq_tensor.h" #include "tensorflow/python/lib/core/safe_ptr.h" -#include "tensorflow/python/eager/pywrap_tensor.h" #include "tensorflow/python/eager/pywrap_tfe.h" #include "tensorflow/c/c_api.h" @@ -574,7 +573,7 @@ bool EagerTensor_CheckExact(const PyObject* o) { return Py_TYPE(o) == EagerTensorType; } -TFE_TensorHandle* EagerTensor_Handle(const PyObject* o) { +TFE_TensorHandle* EagerTensorHandle(const PyObject* o) { return reinterpret_cast(o)->handle; } @@ -595,11 +594,6 @@ PyObject* EagerTensorFromHandle(TFE_TensorHandle* handle) { return reinterpret_cast(t); } -tensorflow::int64 EagerTensor_id(const PyObject* tensor) { - CHECK(EagerTensor_CheckExact(tensor)); - return reinterpret_cast(tensor)->id; -} - PyObject* TFE_Py_InitEagerTensor(PyObject* base_class) { if (!PyType_Check(base_class)) { PyErr_SetString( diff --git a/tensorflow/python/eager/pywrap_tensor.h b/tensorflow/python/eager/pywrap_tensor.h deleted file mode 100644 index aa1efdd1b8..0000000000 --- a/tensorflow/python/eager/pywrap_tensor.h +++ /dev/null @@ -1,25 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_PYTHON_EAGER_PYWRAP_TENSOR_H_ -#define TENSORFLOW_PYTHON_EAGER_PYWRAP_TENSOR_H_ - -#include "tensorflow/c/eager/c_api.h" -#include "tensorflow/core/platform/types.h" -#include "tensorflow/python/lib/core/numpy.h" - -bool EagerTensor_CheckExact(const PyObject* o); -tensorflow::int64 EagerTensor_id(const PyObject* tensor); - -#endif // TENSORFLOW_PYTHON_EAGER_PYWRAP_TENSOR_H_ diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h index 6705483f3b..1d03df2933 100644 --- a/tensorflow/python/eager/pywrap_tfe.h +++ b/tensorflow/python/eager/pywrap_tfe.h @@ -81,7 +81,7 @@ bool EagerTensor_CheckExact(const PyObject* o); PyObject* EagerTensorFromHandle(TFE_TensorHandle* handle); // Extracts the handle inside EagerTensor object `o`. Returns nullptr on error. -TFE_TensorHandle* EagerTensor_Handle(const PyObject* o); +TFE_TensorHandle* EagerTensorHandle(const PyObject* o); // Creates the `EagerTensor` class by subclassing `base_class` and returns the // newly created type, or nullptr on error. @@ -103,16 +103,7 @@ void TFE_Py_TapeRecordOperation(PyObject* tape, PyObject* op_type, PyObject* output_tensors, PyObject* input_tensor_ids, PyObject* backward_function); - -// Computes a gradient based on information recorded on the tape.`tape` must -// have been produced by TFE_Py_NewTape. `vspace` must be a -// imperative_grad.py:VSpace named tuple. `target` and `sources` must be python -// lists of Tensor objects. `output_gradients` is either None or a python list -// of either Tensor or None, and if not None should have the same length as -// target. -PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, - PyObject* target, PyObject* sources, - PyObject* output_gradients, TF_Status* status); +PyObject* TFE_Py_TapeExport(PyObject* tape); // Returns an EagerTensor of dimension [len(`tensor_list`)] containing // the `slice_dim`'th dimension of each tensor in `tensor_list`. In other words, diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 77b49be8f8..7456eb10f8 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -16,13 +16,10 @@ limitations under the License. #include "tensorflow/python/eager/pywrap_tfe.h" #include "tensorflow/c/c_api.h" -#include "tensorflow/c/c_api_internal.h" -#include "tensorflow/c/eager/c_api_internal.h" #include "tensorflow/c/eager/tape.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/python/eager/pywrap_tensor.h" using tensorflow::string; @@ -443,12 +440,10 @@ void TFE_DeleteContextCapsule(PyObject* context) { TF_DeleteStatus(status); } -using GradientTape = tensorflow::eager::GradientTape; - typedef struct { PyObject_HEAD /* Type-specific fields go here. */ - GradientTape* tape; + tensorflow::eager::GradientTape* tape; } TFE_Py_Tape; static void TFE_Py_Tape_Delete(PyObject* tape) { @@ -483,7 +478,7 @@ PyObject* TFE_Py_NewTape() { TFE_Py_Tape_Type.tp_new = PyType_GenericNew; if (PyType_Ready(&TFE_Py_Tape_Type) < 0) return nullptr; TFE_Py_Tape* tape = PyObject_NEW(TFE_Py_Tape, &TFE_Py_Tape_Type); - tape->tape = new GradientTape(); + tape->tape = new tensorflow::eager::GradientTape(); return reinterpret_cast(tape); } @@ -520,50 +515,18 @@ static std::vector MakeIntList(PyObject* list) { } PyObject* TFE_Py_TapeShouldRecord(PyObject* py_tape, PyObject* tensors) { - if (tensors == Py_None) { - Py_RETURN_FALSE; - } - PyObject* seq = PySequence_Fast(tensors, "expected a sequence"); - if (seq == nullptr) { - return nullptr; - } - int len = PySequence_Fast_GET_SIZE(seq); - // TODO(apassos) consider not building a list and changing the API to check - // each tensor individually. - std::vector tensor_ids; - tensor_ids.reserve(len); - for (int i = 0; i < len; ++i) { - PyObject* item = PySequence_Fast_GET_ITEM(seq, i); - if (EagerTensor_CheckExact(item)) { - tensor_ids.push_back(EagerTensor_id(item)); - } else { - PyObject* id_field = PyObject_GetAttrString(item, "_id"); - if (id_field == nullptr) { - return nullptr; - } - tensor_ids.push_back(MakeInt(id_field)); - Py_DECREF(id_field); - } - } - Py_DECREF(seq); TFE_Py_Tape* tape = reinterpret_cast(py_tape); - if (tape->tape->ShouldRecord(tensor_ids)) { - Py_RETURN_TRUE; - } else { - Py_RETURN_FALSE; - } + return PyBool_FromLong(tape->tape->ShouldRecord(MakeIntList(tensors))); } void TFE_Py_TapeWatch(PyObject* tape, tensorflow::int64 tensor_id) { reinterpret_cast(tape)->tape->Watch(tensor_id); } +// TODO(apassos) have a fast path for eager tensors here which gets information +// from the handle instead of from the python object, and use this only for the +// case of graph tensors. static tensorflow::eager::TapeTensor TapeTensorFromTensor(PyObject* tensor) { - if (EagerTensor_CheckExact(tensor)) { - TFE_TensorHandle* t = EagerTensor_Handle(tensor); - tensorflow::int64 id = EagerTensor_id(tensor); - return tensorflow::eager::TapeTensor{id, t->t.dtype(), t->t.shape()}; - } PyObject* id_field = PyObject_GetAttrString(tensor, "_id"); tensorflow::int64 id = MakeInt(id_field); Py_DECREF(id_field); @@ -629,239 +592,64 @@ void TFE_Py_TapeDeleteTrace(PyObject* tape, tensorflow::int64 tensor_id) { reinterpret_cast(tape)->tape->DeleteTrace(tensor_id); } -class PyVSpace : public tensorflow::eager::VSpace { - public: - explicit PyVSpace(PyObject* py_vspace) : py_vspace_(py_vspace) {} - - tensorflow::Status Initialize() { - num_elements_ = PyObject_GetAttrString(py_vspace_, "num_elements_fn"); - if (num_elements_ == nullptr) { - return tensorflow::errors::InvalidArgument("invalid vspace"); +// TODO(apassos) when backprop.py moves to C most of this exporting logic can +// disappear. +PyObject* TFE_Py_TapeExport(PyObject* tape) { + std::pair exported = + reinterpret_cast(tape)->tape->Export(); + PyObject* tensor_tape = PyDict_New(); + for (const auto& pair : exported.first) { + PyObject* tid = PyLong_FromLong(pair.first); + PyObject* opid = PyLong_FromLong(pair.second); + PyDict_SetItem(tensor_tape, tid, opid); + Py_DECREF(tid); + Py_DECREF(opid); + } + + PyObject* op_tape = PyDict_New(); + for (const auto& pair : exported.second) { + PyObject* opid = PyLong_FromLong(pair.first); + const auto& entry = pair.second; + PyObject* op_type = PyBytes_FromString(entry.op_type.c_str()); + PyObject* output_ids = PyList_New(entry.output_tensor_info.size()); + for (int i = 0; i < entry.output_tensor_info.size(); ++i) { + PyObject* tid = PyLong_FromLong(entry.output_tensor_info[i].id); + PyList_SET_ITEM(output_ids, i, tid); } - aggregate_fn_ = PyObject_GetAttrString(py_vspace_, "aggregate_fn"); - if (aggregate_fn_ == nullptr) { - return tensorflow::errors::InvalidArgument("invalid vspace"); + PyObject* input_ids = PyList_New(entry.input_tensor_id.size()); + for (int i = 0; i < entry.input_tensor_id.size(); ++i) { + PyObject* tid = PyLong_FromLong(entry.input_tensor_id[i]); + PyList_SET_ITEM(input_ids, i, tid); } - zeros_ = PyObject_GetAttrString(py_vspace_, "zeros"); - if (zeros_ == nullptr) { - return tensorflow::errors::InvalidArgument("invalid vspace"); - } - ones_ = - PyObject_GetAttrString(reinterpret_cast(py_vspace_), "ones"); - if (ones_ == nullptr) { - return tensorflow::errors::InvalidArgument("invalid vspace"); - } - return tensorflow::Status::OK(); - } - - ~PyVSpace() override { - Py_XDECREF(num_elements_); - Py_XDECREF(aggregate_fn_); - Py_XDECREF(zeros_); - Py_XDECREF(ones_); - } - - tensorflow::int64 NumElements(PyObject* tensor) const final { - PyObject* arglist = - Py_BuildValue("(O)", reinterpret_cast(tensor)); - PyObject* result = PyEval_CallObject(num_elements_, arglist); - tensorflow::int64 r = MakeInt(result); - Py_DECREF(result); - Py_DECREF(arglist); - return r; - } - - PyObject* AggregateGradients( - tensorflow::gtl::ArraySlice gradient_tensors) const final { - PyObject* list = PyList_New(gradient_tensors.size()); - for (int i = 0; i < gradient_tensors.size(); ++i) { - // Note: stealing a reference to the gradient tensors. - CHECK(gradient_tensors[i] != nullptr); - CHECK(gradient_tensors[i] != Py_None); - PyList_SET_ITEM(list, i, - reinterpret_cast(gradient_tensors[i])); - } - PyObject* arglist = Py_BuildValue("(O)", list); - CHECK(arglist != nullptr); - PyObject* result = PyEval_CallObject(aggregate_fn_, arglist); - Py_DECREF(arglist); - Py_DECREF(list); - return result; - } - - PyObject* Zeros(tensorflow::TensorShape shape, - tensorflow::DataType dtype) const final { - PyObject* py_shape = PyTuple_New(shape.dims()); - for (int i = 0; i < shape.dims(); ++i) { - PyTuple_SET_ITEM(py_shape, i, PyLong_FromLong(shape.dim_size(i))); - } - PyObject* py_dtype = PyLong_FromLong(static_cast(dtype)); - PyObject* arg_list = Py_BuildValue("OO", py_shape, py_dtype); - PyObject* result = PyEval_CallObject(zeros_, arg_list); - Py_DECREF(arg_list); - Py_DECREF(py_dtype); - Py_DECREF(py_shape); - return reinterpret_cast(result); - } - - PyObject* Ones(tensorflow::TensorShape shape, - tensorflow::DataType dtype) const final { - PyObject* py_shape = PyTuple_New(shape.dims()); - for (int i = 0; i < shape.dims(); ++i) { - PyTuple_SET_ITEM(py_shape, i, PyLong_FromLong(shape.dim_size(i))); - } - PyObject* py_dtype = PyLong_FromLong(static_cast(dtype)); - PyObject* arg_list = Py_BuildValue("OO", py_shape, py_dtype); - PyObject* result = PyEval_CallObject(ones_, arg_list); - Py_DECREF(arg_list); - Py_DECREF(py_dtype); - Py_DECREF(py_shape); - return result; - } - - tensorflow::Status CallBackwardFunction( - PyObject* backward_function, - tensorflow::gtl::ArraySlice output_gradients, - std::vector* result) const final { - PyObject* grads = PyTuple_New(output_gradients.size()); - for (int i = 0; i < output_gradients.size(); ++i) { - if (output_gradients[i] == nullptr) { - Py_INCREF(Py_None); - PyTuple_SET_ITEM(grads, i, Py_None); - } else { - PyTuple_SET_ITEM(grads, i, - reinterpret_cast(output_gradients[i])); + PyObject* backward_function = + reinterpret_cast(entry.backward_function); + PyObject* output_shape_and_dtype = + PyList_New(entry.output_tensor_info.size()); + for (int i = 0; i < entry.output_tensor_info.size(); ++i) { + const tensorflow::TensorShape& shape = entry.output_tensor_info[i].shape; + PyObject* shape_list = PyList_New(shape.dims()); + for (int j = 0; j < shape.dims(); ++j) { + PyList_SET_ITEM(shape_list, j, PyLong_FromLong(shape.dim_size(j))); } + PyObject* type_enum = PyLong_FromLong(entry.output_tensor_info[i].dtype); + PyObject* tuple = PyTuple_Pack(2, shape_list, type_enum); + Py_DECREF(shape_list); + Py_DECREF(type_enum); + PyList_SET_ITEM(output_shape_and_dtype, i, tuple); } - PyObject* py_result = PyEval_CallObject( - reinterpret_cast(backward_function), grads); - Py_DECREF(grads); + PyObject* opinfo = PyTuple_Pack(5, op_type, output_ids, input_ids, + backward_function, output_shape_and_dtype); + Py_DECREF(op_type); + Py_DECREF(output_ids); + Py_DECREF(input_ids); Py_DECREF(backward_function); - if (py_result == nullptr) { - VLOG(1) << "Gradient function threw exceptions"; - if (VLOG_IS_ON(1)) { - PyErr_Print(); - } - return tensorflow::errors::Internal("gradient function threw exceptions"); - } - result->clear(); - PyObject* seq = - PySequence_Fast(py_result, "expected a sequence of gradients"); - if (seq == nullptr) { - return tensorflow::errors::InvalidArgument( - "gradient function did not return a list"); - } - int len = PySequence_Fast_GET_SIZE(seq); - VLOG(1) << "Gradient length is " << len; - result->reserve(len); - for (int i = 0; i < len; ++i) { - PyObject* item = PySequence_Fast_GET_ITEM(seq, i); - if (item == Py_None) { - result->push_back(nullptr); - } else { - Py_INCREF(item); - result->push_back(item); - } - } - Py_DECREF(seq); - Py_DECREF(py_result); - return tensorflow::Status::OK(); - } - - void DeleteGradient(PyObject* tensor) const final { Py_XDECREF(tensor); } - - private: - PyObject* py_vspace_; - - PyObject* num_elements_; - PyObject* aggregate_fn_; - PyObject* zeros_; - PyObject* ones_; -}; - -std::vector MakeTensorList(PyObject* tensors) { - PyObject* seq = PySequence_Fast(tensors, "expected a sequence"); - if (seq == nullptr) { - return {}; - } - int len = PySequence_Fast_GET_SIZE(seq); - std::vector list; - list.reserve(len); - for (int i = 0; i < len; ++i) { - list.push_back(PySequence_Fast_GET_ITEM(seq, i)); - } - Py_DECREF(seq); - return list; -} - -std::vector MakeTensorIDList(PyObject* tensors) { - PyObject* seq = PySequence_Fast(tensors, "expected a sequence"); - if (seq == nullptr) { - return {}; - } - int len = PySequence_Fast_GET_SIZE(seq); - std::vector list; - list.reserve(len); - for (int i = 0; i < len; ++i) { - PyObject* tensor = PySequence_Fast_GET_ITEM(seq, i); - if (EagerTensor_CheckExact(tensor)) { - list.push_back(EagerTensor_id(tensor)); - } else { - PyObject* id_field = PyObject_GetAttrString(tensor, "_id"); - list.push_back(MakeInt(id_field)); - Py_DECREF(id_field); - } - } - Py_DECREF(seq); - return list; -} - -PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, - PyObject* target, PyObject* sources, - PyObject* output_gradients, TF_Status* status) { - PyVSpace c_vspace(vspace); - if (!c_vspace.Initialize().ok()) { - return nullptr; - } - - std::vector target_vec = MakeTensorIDList(target); - if (PyErr_Occurred()) { - return nullptr; - } - std::vector sources_vec = MakeTensorIDList(sources); - if (PyErr_Occurred()) { - return nullptr; - } - std::vector outgrad_vec; - if (output_gradients != Py_None) { - outgrad_vec = MakeTensorList(output_gradients); - if (PyErr_Occurred()) { - return nullptr; - } - for (PyObject* tensor : outgrad_vec) { - // Calling the backward function will eat a reference to the tensors in - // outgrad_vec, so we need to increase their reference count. - Py_INCREF(tensor); - } - } - TFE_Py_Tape* tape_obj = reinterpret_cast(tape); - std::vector result; - status->status = tape_obj->tape->ComputeGradient( - c_vspace, target_vec, sources_vec, outgrad_vec, &result); - if (!status->status.ok()) { - return nullptr; - } - if (!result.empty()) { - PyObject* py_result = PyList_New(result.size()); - for (int i = 0; i < result.size(); ++i) { - if (result[i] == nullptr) { - Py_INCREF(Py_None); - result[i] = Py_None; - } - PyList_SET_ITEM(py_result, i, reinterpret_cast(result[i])); - } - return py_result; - } - Py_INCREF(Py_None); - return Py_None; + Py_DECREF(output_shape_and_dtype); + PyDict_SetItem(op_tape, opid, opinfo); + Py_DECREF(opid); + Py_DECREF(opinfo); + } + PyObject* retval = PyTuple_Pack(2, tensor_tape, op_tape); + Py_DECREF(tensor_tape); + Py_DECREF(op_tape); + return retval; } diff --git a/tensorflow/python/eager/tape.py b/tensorflow/python/eager/tape.py index a06f5e1a67..c16aa8c2f7 100644 --- a/tensorflow/python/eager/tape.py +++ b/tensorflow/python/eager/tape.py @@ -72,7 +72,7 @@ class Tape(object): True if any of the tensors is in the tape. """ return pywrap_tensorflow.TFE_Py_TapeShouldRecord( - self._tape, tensors) + self._tape, [x._id for x in tensors]) # pylint: disable=protected-access def watch(self, tensor): """Adds a tensor to the tape.""" @@ -99,6 +99,16 @@ class Tape(object): """Deletes any trace we have for this tensor.""" self._delete_tensor_id(tensor_id) + def export(self): + """Exports the internal state of this tape. + + Returns: + tensor_tape: a map from tensor_id(tensor) to + responsible for generating that tensor. + op_tape: a map from to TapeEntry for that op. + """ + return pywrap_tensorflow.TFE_Py_TapeExport(self._tape) + class _TapeStack(threading.local): diff --git a/tensorflow/python/eager/tape_test.py b/tensorflow/python/eager/tape_test.py index b490bac66d..c97cb62125 100644 --- a/tensorflow/python/eager/tape_test.py +++ b/tensorflow/python/eager/tape_test.py @@ -22,6 +22,7 @@ from __future__ import print_function from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.eager import custom_gradient +from tensorflow.python.eager import tape from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -165,6 +166,25 @@ class TapeTest(test.TestCase): g, = backprop.gradients_function(fn, [0])(t) self.assertAllEqual(g, 1.0) + def testTapeGC(self): + # TODO(apassos) figure out how to test this without using tape internal + # APIs. + tape.push_new_tape() + + def f(): + x = constant_op.constant(1.0) + tape.watch(x) + x = gradient_is_constant(x) + x = gradient_is_constant(x) + x = gradient_is_constant(x) + + f() + t = tape.pop_tape() + tensor_tape, op_tape = t.export() + self.assertEqual(len(tensor_tape), 1) # The watched tensor will remain on + # the tape + self.assertEqual(len(op_tape), 0) # No operations should remain on the tape + def testCustomGradientGraphMode(self): with context.graph_mode(), self.test_session(): diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index 03f386e9cf..26f1fd888a 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -25,7 +25,6 @@ py_library( srcs = ["estimator_lib.py"], srcs_version = "PY2AND3", deps = [ - ":baseline", ":dnn", ":dnn_linear_combined", ":estimator", @@ -187,68 +186,6 @@ py_test( ], ) -py_library( - name = "baseline", - srcs = ["canned/baseline.py"], - srcs_version = "PY2AND3", - deps = [ - ":estimator", - ":head", - ":model_fn", - ":optimizers", - "//tensorflow/python:init_ops", - "//tensorflow/python:layers", - "//tensorflow/python:nn", - "//tensorflow/python:partitioned_variables", - "//tensorflow/python:summary", - "//tensorflow/python:training", - "//tensorflow/python:variable_scope", - "//tensorflow/python/feature_column", - "@six_archive//:six", - ], -) - -py_test( - name = "baseline_test", - size = "medium", - srcs = ["canned/baseline_test.py"], - srcs_version = "PY2AND3", - tags = [ - "no_pip", - "notsan", # b/67510291 - ], - deps = [ - ":baseline", - ":estimator", - ":export_export", - ":metric_keys", - ":numpy_io", - ":pandas_io", - ":run_config", - "//tensorflow/core:protos_all_py", - "//tensorflow/python:array_ops", - "//tensorflow/python:check_ops", - "//tensorflow/python:client", - "//tensorflow/python:client_testlib", - "//tensorflow/python:constant_op", - "//tensorflow/python:control_flow_ops", - "//tensorflow/python:data_flow_ops", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:parsing_ops", - "//tensorflow/python:platform", - "//tensorflow/python:sparse_tensor", - "//tensorflow/python:state_ops", - "//tensorflow/python:summary", - "//tensorflow/python:training", - "//tensorflow/python:variable_scope", - "//tensorflow/python:variables", - "//tensorflow/python/feature_column", - "@six_archive//:six", - ], -) - py_library( name = "dnn", srcs = ["canned/dnn.py"], diff --git a/tensorflow/python/estimator/canned/baseline.py b/tensorflow/python/estimator/canned/baseline.py deleted file mode 100644 index 96e4ecd29f..0000000000 --- a/tensorflow/python/estimator/canned/baseline.py +++ /dev/null @@ -1,349 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Baseline estimators. - -Baseline estimators are bias-only estimators that can be used for debugging -and as simple baselines. - -Example: - -``` -# Build BaselineClassifier -classifier = BaselineClassifier(n_classes=3) - -# Input builders -def input_fn_train: # returns x, y (where y represents label's class index). - pass - -def input_fn_eval: # returns x, y (where y represents label's class index). - pass - -# Fit model. -classifier.train(input_fn=input_fn_train) - -# Evaluate cross entropy between the test and train labels. -loss = classifier.evaluate(input_fn=input_fn_eval)["loss"] - -# predict outputs the probability distribution of the classes as seen in -# training. -predictions = classifier.predict(new_samples) -``` -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six - -from tensorflow.python.estimator import estimator -from tensorflow.python.estimator.canned import head as head_lib -from tensorflow.python.estimator.canned import optimizers -from tensorflow.python.feature_column import feature_column as feature_column_lib -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops -from tensorflow.python.ops import init_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.training import training_util - -# The default learning rate of 0.3 is a historical artifact of the initial -# implementation, but seems a reasonable choice. -_LEARNING_RATE = 0.3 - - -def _get_weight_column_key(weight_column): - if weight_column is None: - return None - if isinstance(weight_column, six.string_types): - return weight_column - if not isinstance(weight_column, feature_column_lib._NumericColumn): # pylint: disable=protected-access - raise TypeError('Weight column must be either a string or _NumericColumn.' - ' Given type: {}.'.format(type(weight_column))) - return weight_column.key() - - -def _baseline_logit_fn_builder(num_outputs, weight_column=None): - """Function builder for a baseline logit_fn. - - Args: - num_outputs: Number of outputs for the model. - weight_column: A string or a `_NumericColumn` created by - `tf.feature_column.numeric_column` defining feature column representing - weights. It will be multiplied by the loss of the example. - Returns: - A logit_fn (see below). - """ - - def baseline_logit_fn(features): - """Baseline model logit_fn. - - The baseline model simply learns a bias, so the output logits are a - `Variable` with one weight for each output that learns the bias for the - corresponding output. - - Args: - features: The first item returned from the `input_fn` passed to `train`, - `evaluate`, and `predict`. This should be a single `Tensor` or dict with - `Tensor` values. - Returns: - A `Tensor` representing the logits. - """ - size_checks = [] - batch_size = None - - weight_column_key = _get_weight_column_key(weight_column) - - # The first dimension is assumed to be a batch size and must be consistent - # among all of the features. - for key, feature in features.items(): - # Skip weight_column to ensure we don't add size checks to it. - # These would introduce a dependency on the weight at serving time. - if key == weight_column_key: - continue - first_dim = array_ops.shape(feature)[0] - if batch_size is None: - batch_size = first_dim - else: - size_checks.append(check_ops.assert_equal(batch_size, first_dim)) - - with ops.control_dependencies(size_checks): - with variable_scope.variable_scope('baseline'): - bias = variable_scope.get_variable('bias', shape=[num_outputs], - initializer=init_ops.Zeros) - return math_ops.multiply(bias, array_ops.ones([batch_size, - num_outputs])) - - return baseline_logit_fn - - -def _baseline_model_fn(features, labels, mode, head, optimizer, - weight_column=None, config=None): - """Model_fn for baseline models. - - Args: - features: `Tensor` or dict of `Tensor` (depends on data passed to `train`). - labels: `Tensor` of labels that are compatible with the `Head` instance. - mode: Defines whether this is training, evaluation or prediction. - See `ModeKeys`. - head: A `Head` instance. - optimizer: String, `tf.Optimizer` object, or callable that creates the - optimizer to use for training. If not specified, will use `FtrlOptimizer` - with a default learning rate of 0.3. - weight_column: A string or a `_NumericColumn` created by - `tf.feature_column.numeric_column` defining feature column representing - weights. It will be multiplied by the loss of the example. - config: `RunConfig` object to configure the runtime settings. - - Raises: - KeyError: If weight column is specified but not present. - ValueError: If features is an empty dictionary. - - Returns: - An `EstimatorSpec` instance. - """ - del config # Unused. - - logit_fn = _baseline_logit_fn_builder(head.logits_dimension, weight_column) - logits = logit_fn(features) - - def train_op_fn(loss): - opt = optimizers.get_optimizer_instance( - optimizer, learning_rate=_LEARNING_RATE) - return opt.minimize(loss, global_step=training_util.get_global_step()) - - return head.create_estimator_spec( - features=features, - mode=mode, - logits=logits, - labels=labels, - train_op_fn=train_op_fn) - - -class BaselineClassifier(estimator.Estimator): - """A classifier that can establish a simple baseline. - - This classifier ignores feature values and will learn to predict the average - value of each label. For single-label problems, this will predict the - probability distribution of the classes as seen in the labels. For multi-label - problems, this will predict the fraction of examples that are positive for - each class. - - Example: - - ```python - - # Build BaselineClassifier - classifier = BaselineClassifier(n_classes=3) - - # Input builders - def input_fn_train: # returns x, y (where y represents label's class index). - pass - - def input_fn_eval: # returns x, y (where y represents label's class index). - pass - - # Fit model. - classifier.train(input_fn=input_fn_train) - - # Evaluate cross entropy between the test and train labels. - loss = classifier.evaluate(input_fn=input_fn_eval)["loss"] - - # predict outputs the probability distribution of the classes as seen in - # training. - predictions = classifier.predict(new_samples) - - ``` - - Input of `train` and `evaluate` should have following features, - otherwise there will be a `KeyError`: - - * if `weight_column` is not `None`, a feature with - `key=weight_column` whose value is a `Tensor`. - """ - - def __init__(self, - model_dir=None, - n_classes=2, - weight_column=None, - label_vocabulary=None, - optimizer='Ftrl', - config=None): - """Initializes a BaselineClassifier instance. - - Args: - model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into a estimator to - continue training a previously saved model. - n_classes: number of label classes. Default is binary classification. - It must be greater than 1. Note: Class labels are integers representing - the class index (i.e. values from 0 to n_classes-1). For arbitrary - label values (e.g. string labels), convert to class indices first. - weight_column: A string or a `_NumericColumn` created by - `tf.feature_column.numeric_column` defining feature column representing - weights. It will be multiplied by the loss of the example. - label_vocabulary: Optional list of strings with size `[n_classes]` - defining the label vocabulary. Only supported for `n_classes` > 2. - optimizer: String, `tf.Optimizer` object, or callable that creates the - optimizer to use for training. If not specified, will use - `FtrlOptimizer` with a default learning rate of 0.3. - config: `RunConfig` object to configure the runtime settings. - Returns: - A `BaselineClassifier` estimator. - - Raises: - ValueError: If `n_classes` < 2. - """ - if n_classes == 2: - head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( # pylint: disable=protected-access - weight_column=weight_column, - label_vocabulary=label_vocabulary) - else: - head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( # pylint: disable=protected-access - n_classes, weight_column=weight_column, - label_vocabulary=label_vocabulary) - def _model_fn(features, labels, mode, config): - return _baseline_model_fn( - features=features, - labels=labels, - mode=mode, - head=head, - optimizer=optimizer, - weight_column=weight_column, - config=config) - super(BaselineClassifier, self).__init__( - model_fn=_model_fn, - model_dir=model_dir, - config=config) - - -class BaselineRegressor(estimator.Estimator): - """A regressor that can establish a simple baseline. - - This regressor ignores feature values and will learn to predict the average - value of each label. - - Example: - - ```python - - # Build BaselineRegressor - regressor = BaselineRegressor() - - # Input builders - def input_fn_train: # returns x, y (where y is the label). - pass - - def input_fn_eval: # returns x, y (where y is the label). - pass - - # Fit model. - regressor.train(input_fn=input_fn_train) - - # Evaluate squared-loss between the test and train targets. - loss = regressor.evaluate(input_fn=input_fn_eval)["loss"] - - # predict outputs the mean value seen during training. - predictions = regressor.predict(new_samples) - ``` - - Input of `train` and `evaluate` should have following features, - otherwise there will be a `KeyError`: - - * if `weight_column` is not `None`, a feature with - `key=weight_column` whose value is a `Tensor`. - """ - - def __init__(self, - model_dir=None, - label_dimension=1, - weight_column=None, - optimizer='Ftrl', - config=None): - """Initializes a BaselineRegressor instance. - - Args: - model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into a estimator to - continue training a previously saved model. - label_dimension: Number of regression targets per example. This is the - size of the last dimension of the labels and logits `Tensor` objects - (typically, these have shape `[batch_size, label_dimension]`). - weight_column: A string or a `_NumericColumn` created by - `tf.feature_column.numeric_column` defining feature column representing - weights. It will be multiplied by the loss of the example. - optimizer: String, `tf.Optimizer` object, or callable that creates the - optimizer to use for training. If not specified, will use - `FtrlOptimizer` with a default learning rate of 0.3. - config: `RunConfig` object to configure the runtime settings. - Returns: - A `BaselineRegressor` estimator. - """ - - head = head_lib._regression_head_with_mean_squared_error_loss( # pylint: disable=protected-access - label_dimension=label_dimension, - weight_column=weight_column) - def _model_fn(features, labels, mode, config): - return _baseline_model_fn( - features=features, - labels=labels, - mode=mode, - head=head, - optimizer=optimizer, - config=config) - super(BaselineRegressor, self).__init__( - model_fn=_model_fn, - model_dir=model_dir, - config=config) diff --git a/tensorflow/python/estimator/canned/baseline_test.py b/tensorflow/python/estimator/canned/baseline_test.py deleted file mode 100644 index 96639e88ea..0000000000 --- a/tensorflow/python/estimator/canned/baseline_test.py +++ /dev/null @@ -1,1545 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for baseline.py.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import os -import shutil -import tempfile - -import numpy as np -import six - -from tensorflow.core.example import example_pb2 -from tensorflow.core.example import feature_pb2 -from tensorflow.python.client import session as tf_session -from tensorflow.python.estimator.canned import baseline -from tensorflow.python.estimator.canned import metric_keys -from tensorflow.python.estimator.export import export -from tensorflow.python.estimator.inputs import numpy_io -from tensorflow.python.estimator.inputs import pandas_io -from tensorflow.python.feature_column import feature_column as feature_column_lib -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import check_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import data_flow_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import parsing_ops -from tensorflow.python.ops import state_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.ops import variables -from tensorflow.python.platform import gfile -from tensorflow.python.platform import test -from tensorflow.python.summary.writer import writer_cache -from tensorflow.python.training import checkpoint_utils -from tensorflow.python.training import input as input_lib -from tensorflow.python.training import optimizer -from tensorflow.python.training import queue_runner -from tensorflow.python.training import saver - - -try: - # pylint: disable=g-import-not-at-top - import pandas as pd - HAS_PANDAS = True -except IOError: - # Pandas writes a temporary file during import. If it fails, don't use pandas. - HAS_PANDAS = False -except ImportError: - HAS_PANDAS = False - -# pylint rules which are disabled by default for test files. -# pylint: disable=invalid-name,protected-access,missing-docstring - -# Names of variables created by model. -BIAS_NAME = 'baseline/bias' - - -def assert_close(expected, actual, rtol=1e-04, name='assert_close'): - with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope: - expected = ops.convert_to_tensor(expected, name='expected') - actual = ops.convert_to_tensor(actual, name='actual') - rdiff = math_ops.abs(expected - actual, 'diff') / math_ops.abs(expected) - rtol = ops.convert_to_tensor(rtol, name='rtol') - return check_ops.assert_less( - rdiff, - rtol, - data=('Condition expected =~ actual did not hold element-wise:' - 'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff, - 'rtol = ', rtol,), - name=scope) - - -def save_variables_to_ckpt(model_dir): - init_all_op = [variables.global_variables_initializer()] - with tf_session.Session() as sess: - sess.run(init_all_op) - saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt')) - - -def queue_parsed_features(feature_map): - tensors_to_enqueue = [] - keys = [] - for key, tensor in six.iteritems(feature_map): - keys.append(key) - tensors_to_enqueue.append(tensor) - queue_dtypes = [x.dtype for x in tensors_to_enqueue] - input_queue = data_flow_ops.FIFOQueue(capacity=100, dtypes=queue_dtypes) - queue_runner.add_queue_runner( - queue_runner.QueueRunner(input_queue, - [input_queue.enqueue(tensors_to_enqueue)])) - dequeued_tensors = input_queue.dequeue() - return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))} - - -def sorted_key_dict(unsorted_dict): - return {k: unsorted_dict[k] for k in sorted(unsorted_dict)} - - -def sigmoid(x): - return 1 / (1 + np.exp(-1.0 * x)) - - -def _baseline_regressor_fn(*args, **kwargs): - return baseline.BaselineRegressor(*args, **kwargs) - - -def _baseline_classifier_fn(*args, **kwargs): - return baseline.BaselineClassifier(*args, **kwargs) - - -# Tests for Baseline Regressor. - - -# TODO(b/36813849): Add tests with dynamic shape inputs using placeholders. -class BaselineRegressorEvaluationTest(test.TestCase): - - def setUp(self): - self._model_dir = tempfile.mkdtemp() - - def tearDown(self): - if self._model_dir: - writer_cache.FileWriterCache.clear() - shutil.rmtree(self._model_dir) - - def test_evaluation_for_simple_data(self): - with ops.Graph().as_default(): - variables.Variable([13.0], name=BIAS_NAME) - variables.Variable( - 100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir) - eval_metrics = baseline_regressor.evaluate( - input_fn=lambda: ({'age': ((1,),)}, ((10.,),)), steps=1) - - # Logit is bias = 13, while label is 10. Loss is 3**2 = 9. - self.assertDictEqual({ - metric_keys.MetricKeys.LOSS: 9., - metric_keys.MetricKeys.LOSS_MEAN: 9., - ops.GraphKeys.GLOBAL_STEP: 100 - }, eval_metrics) - - def test_evaluation_batch(self): - """Tests evaluation for batch_size==2.""" - with ops.Graph().as_default(): - variables.Variable([13.0], name=BIAS_NAME) - variables.Variable( - 100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir) - eval_metrics = baseline_regressor.evaluate( - input_fn=lambda: ({'age': ((1,), (1,))}, ((10.,), (10.,))), steps=1) - - # Logit is bias = 13, while label is 10. - # Loss per example is 3**2 = 9. - # Training loss is the sum over batch = 9 + 9 = 18 - # Average loss is the average over batch = 9 - self.assertDictEqual({ - metric_keys.MetricKeys.LOSS: 18., - metric_keys.MetricKeys.LOSS_MEAN: 9., - ops.GraphKeys.GLOBAL_STEP: 100 - }, eval_metrics) - - def test_evaluation_weights(self): - """Tests evaluation with weights.""" - with ops.Graph().as_default(): - variables.Variable([13.0], name=BIAS_NAME) - variables.Variable( - 100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - def _input_fn(): - features = {'age': ((1,), (1,)), 'weights': ((1.,), (2.,))} - labels = ((10.,), (10.,)) - return features, labels - - baseline_regressor = _baseline_regressor_fn( - weight_column='weights', - model_dir=self._model_dir) - eval_metrics = baseline_regressor.evaluate(input_fn=_input_fn, steps=1) - - # Logit is bias = 13, while label is 10. - # Loss per example is 3**2 = 9. - # Training loss is the weighted sum over batch = 9 + 2*9 = 27 - # average loss is the weighted average = 9 + 2*9 / (1 + 2) = 9 - self.assertDictEqual({ - metric_keys.MetricKeys.LOSS: 27., - metric_keys.MetricKeys.LOSS_MEAN: 9., - ops.GraphKeys.GLOBAL_STEP: 100 - }, eval_metrics) - - def test_evaluation_for_multi_dimensions(self): - label_dim = 2 - with ops.Graph().as_default(): - variables.Variable([46.0, 58.0], name=BIAS_NAME) - variables.Variable(100, name='global_step', dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - baseline_regressor = _baseline_regressor_fn( - label_dimension=label_dim, - model_dir=self._model_dir) - input_fn = numpy_io.numpy_input_fn( - x={ - 'age': np.array([[2., 4., 5.]]), - }, - y=np.array([[46., 58.]]), - batch_size=1, - num_epochs=None, - shuffle=False) - eval_metrics = baseline_regressor.evaluate(input_fn=input_fn, steps=1) - - self.assertItemsEqual( - (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN, - ops.GraphKeys.GLOBAL_STEP), eval_metrics.keys()) - - # Logit is bias which is [46, 58] - self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS]) - - -class BaselineRegressorPredictTest(test.TestCase): - - def setUp(self): - self._model_dir = tempfile.mkdtemp() - - def tearDown(self): - if self._model_dir: - writer_cache.FileWriterCache.clear() - shutil.rmtree(self._model_dir) - - def test_1d(self): - """Tests predict when all variables are one-dimensional.""" - with ops.Graph().as_default(): - variables.Variable([.2], name=BIAS_NAME) - variables.Variable(100, name='global_step', dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir) - - predict_input_fn = numpy_io.numpy_input_fn( - x={'x': np.array([[2.]])}, - y=None, - batch_size=1, - num_epochs=1, - shuffle=False) - predictions = baseline_regressor.predict(input_fn=predict_input_fn) - predicted_scores = list([x['predictions'] for x in predictions]) - # x * weight + bias = 2. * 10. + .2 = 20.2 - self.assertAllClose([[.2]], predicted_scores) - - def testMultiDim(self): - """Tests predict when all variables are multi-dimenstional.""" - batch_size = 2 - label_dimension = 3 - with ops.Graph().as_default(): - variables.Variable( # shape=[label_dimension] - [.2, .4, .6], name=BIAS_NAME) - variables.Variable(100, name='global_step', dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - baseline_regressor = _baseline_regressor_fn( - label_dimension=label_dimension, - model_dir=self._model_dir) - - predict_input_fn = numpy_io.numpy_input_fn( - # x shape=[batch_size, x_dim] - x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])}, - y=None, - batch_size=batch_size, - num_epochs=1, - shuffle=False) - predictions = baseline_regressor.predict(input_fn=predict_input_fn) - predicted_scores = list([x['predictions'] for x in predictions]) - # score = bias, shape=[batch_size, label_dimension] - self.assertAllClose([[0.2, 0.4, 0.6], [0.2, 0.4, 0.6]], - predicted_scores) - - -class BaselineRegressorIntegrationTest(test.TestCase): - - def setUp(self): - self._model_dir = tempfile.mkdtemp() - - def tearDown(self): - if self._model_dir: - writer_cache.FileWriterCache.clear() - shutil.rmtree(self._model_dir) - - def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, - input_dimension, label_dimension, prediction_length): - feature_columns = [ - feature_column_lib.numeric_column('x', shape=(input_dimension,)) - ] - est = _baseline_regressor_fn( - label_dimension=label_dimension, - model_dir=self._model_dir) - - # TRAIN - # learn y = x - est.train(train_input_fn, steps=200) - - # EVALUTE - scores = est.evaluate(eval_input_fn) - self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP]) - self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) - - # PREDICT - predictions = np.array( - [x['predictions'] for x in est.predict(predict_input_fn)]) - self.assertAllEqual((prediction_length, label_dimension), predictions.shape) - - # EXPORT - feature_spec = feature_column_lib.make_parse_example_spec(feature_columns) - serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( - feature_spec) - export_dir = est.export_savedmodel(tempfile.mkdtemp(), - serving_input_receiver_fn) - self.assertTrue(gfile.Exists(export_dir)) - - def test_numpy_input_fn(self): - """Tests complete flow with numpy_input_fn.""" - label_dimension = 2 - input_dimension = label_dimension - batch_size = 10 - prediction_length = batch_size - data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) - data = data.reshape(batch_size, label_dimension) - - train_input_fn = numpy_io.numpy_input_fn( - x={'x': data}, - y=data, - batch_size=batch_size, - num_epochs=None, - shuffle=True) - eval_input_fn = numpy_io.numpy_input_fn( - x={'x': data}, - y=data, - batch_size=batch_size, - num_epochs=1, - shuffle=False) - predict_input_fn = numpy_io.numpy_input_fn( - x={'x': data}, - y=None, - batch_size=batch_size, - num_epochs=1, - shuffle=False) - - self._test_complete_flow( - train_input_fn=train_input_fn, - eval_input_fn=eval_input_fn, - predict_input_fn=predict_input_fn, - input_dimension=input_dimension, - label_dimension=label_dimension, - prediction_length=prediction_length) - - def test_pandas_input_fn(self): - """Tests complete flow with pandas_input_fn.""" - if not HAS_PANDAS: - return - - # Pandas DataFrame natually supports 1 dim data only. - label_dimension = 1 - input_dimension = label_dimension - batch_size = 10 - data = np.array([1., 2., 3., 4.], dtype=np.float32) - x = pd.DataFrame({'x': data}) - y = pd.Series(data) - prediction_length = 4 - - train_input_fn = pandas_io.pandas_input_fn( - x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) - eval_input_fn = pandas_io.pandas_input_fn( - x=x, y=y, batch_size=batch_size, shuffle=False) - predict_input_fn = pandas_io.pandas_input_fn( - x=x, batch_size=batch_size, shuffle=False) - - self._test_complete_flow( - train_input_fn=train_input_fn, - eval_input_fn=eval_input_fn, - predict_input_fn=predict_input_fn, - input_dimension=input_dimension, - label_dimension=label_dimension, - prediction_length=prediction_length) - - def test_input_fn_from_parse_example(self): - """Tests complete flow with input_fn constructed from parse_example.""" - label_dimension = 2 - input_dimension = label_dimension - batch_size = 10 - prediction_length = batch_size - data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) - data = data.reshape(batch_size, label_dimension) - - serialized_examples = [] - for datum in data: - example = example_pb2.Example(features=feature_pb2.Features( - feature={ - 'x': - feature_pb2.Feature(float_list=feature_pb2.FloatList( - value=datum)), - 'y': - feature_pb2.Feature(float_list=feature_pb2.FloatList( - value=datum[:label_dimension])), - })) - serialized_examples.append(example.SerializeToString()) - - feature_spec = { - 'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32), - 'y': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32), - } - - def _train_input_fn(): - feature_map = parsing_ops.parse_example(serialized_examples, feature_spec) - features = queue_parsed_features(feature_map) - labels = features.pop('y') - return features, labels - - def _eval_input_fn(): - feature_map = parsing_ops.parse_example( - input_lib.limit_epochs(serialized_examples, num_epochs=1), - feature_spec) - features = queue_parsed_features(feature_map) - labels = features.pop('y') - return features, labels - - def _predict_input_fn(): - feature_map = parsing_ops.parse_example( - input_lib.limit_epochs(serialized_examples, num_epochs=1), - feature_spec) - features = queue_parsed_features(feature_map) - features.pop('y') - return features, None - - self._test_complete_flow( - train_input_fn=_train_input_fn, - eval_input_fn=_eval_input_fn, - predict_input_fn=_predict_input_fn, - input_dimension=input_dimension, - label_dimension=label_dimension, - prediction_length=prediction_length) - - -class BaselineRegressorTrainingTest(test.TestCase): - - def setUp(self): - self._model_dir = tempfile.mkdtemp() - - def tearDown(self): - if self._model_dir: - writer_cache.FileWriterCache.clear() - shutil.rmtree(self._model_dir) - - def _mock_optimizer(self, expected_loss=None): - expected_var_names = [ - '%s:0' % BIAS_NAME - ] - - def _minimize(loss, global_step=None, var_list=None): - trainable_vars = var_list or ops.get_collection( - ops.GraphKeys.TRAINABLE_VARIABLES) - self.assertItemsEqual(expected_var_names, - [var.name for var in trainable_vars]) - - # Verify loss. We can't check the value directly, so we add an assert op. - self.assertEquals(0, loss.shape.ndims) - if expected_loss is None: - if global_step is not None: - return state_ops.assign_add(global_step, 1).op - return control_flow_ops.no_op() - assert_loss = assert_close( - math_ops.to_float(expected_loss, name='expected'), - loss, - name='assert_loss') - with ops.control_dependencies((assert_loss,)): - if global_step is not None: - return state_ops.assign_add(global_step, 1).op - return control_flow_ops.no_op() - - mock_optimizer = test.mock.NonCallableMock( - spec=optimizer.Optimizer, - wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer')) - mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize) - - # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks. - # So, return mock_optimizer itself for deepcopy. - mock_optimizer.__deepcopy__ = lambda _: mock_optimizer - return mock_optimizer - - def _assert_checkpoint(self, - label_dimension, - expected_global_step, - expected_bias=None): - shapes = { - name: shape - for (name, shape) in checkpoint_utils.list_variables(self._model_dir) - } - - self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP]) - self.assertEqual(expected_global_step, - checkpoint_utils.load_variable(self._model_dir, - ops.GraphKeys.GLOBAL_STEP)) - - self.assertEqual([label_dimension], shapes[BIAS_NAME]) - if expected_bias is not None: - self.assertEqual(expected_bias, - checkpoint_utils.load_variable(self._model_dir, - BIAS_NAME)) - - def testFromScratchWithDefaultOptimizer(self): - # Create BaselineRegressor. - label = 5. - age = 17 - baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir) - - # Train for a few steps, and validate final checkpoint. - num_steps = 10 - baseline_regressor.train( - input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) - self._assert_checkpoint(label_dimension=1, expected_global_step=num_steps) - - def testTrainWithOneDimLabel(self): - label_dimension = 1 - batch_size = 20 - est = _baseline_regressor_fn( - label_dimension=label_dimension, - model_dir=self._model_dir) - data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32) - self.assertEqual((batch_size,), data_rank_1.shape) - - train_input_fn = numpy_io.numpy_input_fn( - x={'age': data_rank_1}, - y=data_rank_1, - batch_size=batch_size, - num_epochs=None, - shuffle=True) - est.train(train_input_fn, steps=200) - self._assert_checkpoint(label_dimension=1, expected_global_step=200) - - def testTrainWithOneDimWeight(self): - label_dimension = 1 - batch_size = 20 - est = _baseline_regressor_fn( - label_dimension=label_dimension, - weight_column='w', - model_dir=self._model_dir) - - data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32) - self.assertEqual((batch_size,), data_rank_1.shape) - - train_input_fn = numpy_io.numpy_input_fn( - x={'age': data_rank_1, - 'w': data_rank_1}, - y=data_rank_1, - batch_size=batch_size, - num_epochs=None, - shuffle=True) - est.train(train_input_fn, steps=200) - self._assert_checkpoint(label_dimension=1, expected_global_step=200) - - def testFromScratch(self): - # Create BaselineRegressor. - label = 5. - age = 17 - # loss = (logits - label)^2 = (0 - 5.)^2 = 25. - mock_optimizer = self._mock_optimizer(expected_loss=25.) - baseline_regressor = _baseline_regressor_fn( - model_dir=self._model_dir, - optimizer=mock_optimizer) - self.assertEqual(0, mock_optimizer.minimize.call_count) - - # Train for a few steps, and validate optimizer and final checkpoint. - num_steps = 10 - baseline_regressor.train( - input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) - self.assertEqual(1, mock_optimizer.minimize.call_count) - self._assert_checkpoint( - label_dimension=1, - expected_global_step=num_steps, - expected_bias=[0.]) - - def testFromCheckpoint(self): - # Create initial checkpoint. - bias = 7.0 - initial_global_step = 100 - with ops.Graph().as_default(): - variables.Variable([bias], name=BIAS_NAME) - variables.Variable( - initial_global_step, - name=ops.GraphKeys.GLOBAL_STEP, - dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - # logits = bias = 6. - # loss = (logits - label)^2 = (7 - 5)^2 = 4 - mock_optimizer = self._mock_optimizer(expected_loss=4.) - baseline_regressor = _baseline_regressor_fn( - model_dir=self._model_dir, - optimizer=mock_optimizer) - self.assertEqual(0, mock_optimizer.minimize.call_count) - - # Train for a few steps, and validate optimizer and final checkpoint. - num_steps = 10 - baseline_regressor.train( - input_fn=lambda: ({'age': ((17,),)}, ((5.,),)), steps=num_steps) - self.assertEqual(1, mock_optimizer.minimize.call_count) - self._assert_checkpoint( - label_dimension=1, - expected_global_step=initial_global_step + num_steps, - expected_bias=[bias]) - - def testFromCheckpointMultiBatch(self): - # Create initial checkpoint. - bias = 5.0 - initial_global_step = 100 - with ops.Graph().as_default(): - variables.Variable([bias], name=BIAS_NAME) - variables.Variable( - initial_global_step, - name=ops.GraphKeys.GLOBAL_STEP, - dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - # logits = bias - # logits[0] = 5. - # logits[1] = 5. - # loss = sum(logits - label)^2 = (5 - 5)^2 + (5 - 3)^2 = 4 - mock_optimizer = self._mock_optimizer(expected_loss=4.) - baseline_regressor = _baseline_regressor_fn( - model_dir=self._model_dir, - optimizer=mock_optimizer) - self.assertEqual(0, mock_optimizer.minimize.call_count) - - # Train for a few steps, and validate optimizer and final checkpoint. - num_steps = 10 - baseline_regressor.train( - input_fn=lambda: ({'age': ((17,), (15,))}, ((5.,), (3.,))), - steps=num_steps) - self.assertEqual(1, mock_optimizer.minimize.call_count) - self._assert_checkpoint( - label_dimension=1, - expected_global_step=initial_global_step + num_steps, - expected_bias=bias) - - -# Tests for Baseline Classifier. - - -class BaselineClassifierTrainingTest(test.TestCase): - - def setUp(self): - self._model_dir = tempfile.mkdtemp() - - def tearDown(self): - if self._model_dir: - shutil.rmtree(self._model_dir) - - def _mock_optimizer(self, expected_loss=None): - expected_var_names = [ - '%s:0' % BIAS_NAME - ] - - def _minimize(loss, global_step): - trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) - self.assertItemsEqual( - expected_var_names, - [var.name for var in trainable_vars]) - - # Verify loss. We can't check the value directly, so we add an assert op. - self.assertEquals(0, loss.shape.ndims) - if expected_loss is None: - return state_ops.assign_add(global_step, 1).op - assert_loss = assert_close( - math_ops.to_float(expected_loss, name='expected'), - loss, - name='assert_loss') - with ops.control_dependencies((assert_loss,)): - return state_ops.assign_add(global_step, 1).op - - mock_optimizer = test.mock.NonCallableMock( - spec=optimizer.Optimizer, - wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer')) - mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize) - - # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks. - # So, return mock_optimizer itself for deepcopy. - mock_optimizer.__deepcopy__ = lambda _: mock_optimizer - return mock_optimizer - - def _assert_checkpoint( - self, n_classes, expected_global_step, expected_bias=None): - logits_dimension = n_classes if n_classes > 2 else 1 - - shapes = { - name: shape for (name, shape) in - checkpoint_utils.list_variables(self._model_dir) - } - - self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP]) - self.assertEqual( - expected_global_step, - checkpoint_utils.load_variable( - self._model_dir, ops.GraphKeys.GLOBAL_STEP)) - - self.assertEqual([logits_dimension], shapes[BIAS_NAME]) - if expected_bias is not None: - self.assertAllEqual(expected_bias, - checkpoint_utils.load_variable( - self._model_dir, BIAS_NAME)) - - def _testFromScratchWithDefaultOptimizer(self, n_classes): - label = 0 - age = 17 - est = baseline.BaselineClassifier( - n_classes=n_classes, - model_dir=self._model_dir) - - # Train for a few steps, and validate final checkpoint. - num_steps = 10 - est.train( - input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) - self._assert_checkpoint(n_classes, num_steps) - - def testBinaryClassesFromScratchWithDefaultOptimizer(self): - self._testFromScratchWithDefaultOptimizer(n_classes=2) - - def testMultiClassesFromScratchWithDefaultOptimizer(self): - self._testFromScratchWithDefaultOptimizer(n_classes=4) - - def _testTrainWithTwoDimsLabel(self, n_classes): - batch_size = 20 - - est = baseline.BaselineClassifier( - n_classes=n_classes, - model_dir=self._model_dir) - data_rank_1 = np.array([0, 1]) - data_rank_2 = np.array([[0], [1]]) - self.assertEqual((2,), data_rank_1.shape) - self.assertEqual((2, 1), data_rank_2.shape) - - train_input_fn = numpy_io.numpy_input_fn( - x={'age': data_rank_1}, - y=data_rank_2, - batch_size=batch_size, - num_epochs=None, - shuffle=True) - est.train(train_input_fn, steps=200) - self._assert_checkpoint(n_classes, 200) - - def testBinaryClassesTrainWithTwoDimsLabel(self): - self._testTrainWithTwoDimsLabel(n_classes=2) - - def testMultiClassesTrainWithTwoDimsLabel(self): - self._testTrainWithTwoDimsLabel(n_classes=4) - - def _testTrainWithOneDimLabel(self, n_classes): - batch_size = 20 - - est = baseline.BaselineClassifier( - n_classes=n_classes, - model_dir=self._model_dir) - data_rank_1 = np.array([0, 1]) - self.assertEqual((2,), data_rank_1.shape) - - train_input_fn = numpy_io.numpy_input_fn( - x={'age': data_rank_1}, - y=data_rank_1, - batch_size=batch_size, - num_epochs=None, - shuffle=True) - est.train(train_input_fn, steps=200) - self._assert_checkpoint(n_classes, 200) - - def testBinaryClassesTrainWithOneDimLabel(self): - self._testTrainWithOneDimLabel(n_classes=2) - - def testMultiClassesTrainWithOneDimLabel(self): - self._testTrainWithOneDimLabel(n_classes=4) - - def _testTrainWithTwoDimsWeight(self, n_classes): - batch_size = 20 - - est = baseline.BaselineClassifier( - weight_column='w', - n_classes=n_classes, - model_dir=self._model_dir) - data_rank_1 = np.array([0, 1]) - data_rank_2 = np.array([[0], [1]]) - self.assertEqual((2,), data_rank_1.shape) - self.assertEqual((2, 1), data_rank_2.shape) - - train_input_fn = numpy_io.numpy_input_fn( - x={'age': data_rank_1, 'w': data_rank_2}, y=data_rank_1, - batch_size=batch_size, num_epochs=None, - shuffle=True) - est.train(train_input_fn, steps=200) - self._assert_checkpoint(n_classes, 200) - - def testBinaryClassesTrainWithTwoDimsWeight(self): - self._testTrainWithTwoDimsWeight(n_classes=2) - - def testMultiClassesTrainWithTwoDimsWeight(self): - self._testTrainWithTwoDimsWeight(n_classes=4) - - def _testTrainWithOneDimWeight(self, n_classes): - batch_size = 20 - - est = baseline.BaselineClassifier( - weight_column='w', - n_classes=n_classes, - model_dir=self._model_dir) - data_rank_1 = np.array([0, 1]) - self.assertEqual((2,), data_rank_1.shape) - - train_input_fn = numpy_io.numpy_input_fn( - x={'age': data_rank_1, 'w': data_rank_1}, y=data_rank_1, - batch_size=batch_size, num_epochs=None, - shuffle=True) - est.train(train_input_fn, steps=200) - self._assert_checkpoint(n_classes, 200) - - def testBinaryClassesTrainWithOneDimWeight(self): - self._testTrainWithOneDimWeight(n_classes=2) - - def testMultiClassesTrainWithOneDimWeight(self): - self._testTrainWithOneDimWeight(n_classes=4) - - def _testFromScratch(self, n_classes): - label = 1 - age = 17 - # For binary classifier: - # loss = sigmoid_cross_entropy(logits, label) where logits=0 (weights are - # all zero initially) and label = 1 so, - # loss = 1 * -log ( sigmoid(logits) ) = 0.69315 - # For multi class classifier: - # loss = cross_entropy(logits, label) where logits are all 0s (weights are - # all zero initially) and label = 1 so, - # loss = 1 * -log ( 1.0 / n_classes ) - # For this particular test case, as logits are same, the formula - # 1 * -log ( 1.0 / n_classes ) covers both binary and multi class cases. - mock_optimizer = self._mock_optimizer( - expected_loss=-1 * math.log(1.0/n_classes)) - - est = baseline.BaselineClassifier( - n_classes=n_classes, - optimizer=mock_optimizer, - model_dir=self._model_dir) - self.assertEqual(0, mock_optimizer.minimize.call_count) - - # Train for a few steps, and validate optimizer and final checkpoint. - num_steps = 10 - est.train( - input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) - self.assertEqual(1, mock_optimizer.minimize.call_count) - self._assert_checkpoint( - n_classes, - expected_global_step=num_steps, - expected_bias=[0.] if n_classes == 2 else [.0] * n_classes) - - def testBinaryClassesFromScratch(self): - self._testFromScratch(n_classes=2) - - def testMultiClassesFromScratch(self): - self._testFromScratch(n_classes=4) - - def _testFromCheckpoint(self, n_classes): - # Create initial checkpoint. - label = 1 - age = 17 - bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes - initial_global_step = 100 - with ops.Graph().as_default(): - variables.Variable(bias, name=BIAS_NAME) - variables.Variable( - initial_global_step, name=ops.GraphKeys.GLOBAL_STEP, - dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - # For binary classifier: - # logits = bias = -1. - # loss = sigmoid_cross_entropy(logits, label) - # so, loss = 1 * -log ( sigmoid(-1) ) = 1.3133 - # For multi class classifier: - # loss = cross_entropy(logits, label) - # where logits = bias and label = 1 - # so, loss = 1 * -log ( softmax(logits)[1] ) - if n_classes == 2: - expected_loss = 1.3133 - else: - logits = bias - logits_exp = np.exp(logits) - softmax = logits_exp / logits_exp.sum() - expected_loss = -1 * math.log(softmax[label]) - - mock_optimizer = self._mock_optimizer(expected_loss=expected_loss) - - est = baseline.BaselineClassifier( - n_classes=n_classes, - optimizer=mock_optimizer, - model_dir=self._model_dir) - self.assertEqual(0, mock_optimizer.minimize.call_count) - - # Train for a few steps, and validate optimizer and final checkpoint. - num_steps = 10 - est.train( - input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) - self.assertEqual(1, mock_optimizer.minimize.call_count) - self._assert_checkpoint( - n_classes, - expected_global_step=initial_global_step + num_steps, - expected_bias=bias) - - def testBinaryClassesFromCheckpoint(self): - self._testFromCheckpoint(n_classes=2) - - def testMultiClassesFromCheckpoint(self): - self._testFromCheckpoint(n_classes=4) - - def _testFromCheckpointFloatLabels(self, n_classes): - """Tests float labels for binary classification.""" - # Create initial checkpoint. - if n_classes > 2: - return - label = 0.8 - age = 17 - bias = [-1.0] - initial_global_step = 100 - with ops.Graph().as_default(): - variables.Variable(bias, name=BIAS_NAME) - variables.Variable( - initial_global_step, name=ops.GraphKeys.GLOBAL_STEP, - dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - # logits = bias = -1. - # loss = sigmoid_cross_entropy(logits, label) - # => loss = -0.8 * log(sigmoid(-1)) -0.2 * log(sigmoid(+1)) = 1.1132617 - mock_optimizer = self._mock_optimizer(expected_loss=1.1132617) - - est = baseline.BaselineClassifier( - n_classes=n_classes, - optimizer=mock_optimizer, - model_dir=self._model_dir) - self.assertEqual(0, mock_optimizer.minimize.call_count) - - # Train for a few steps, and validate optimizer and final checkpoint. - num_steps = 10 - est.train( - input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) - self.assertEqual(1, mock_optimizer.minimize.call_count) - - def testBinaryClassesFromCheckpointFloatLabels(self): - self._testFromCheckpointFloatLabels(n_classes=2) - - def testMultiClassesFromCheckpointFloatLabels(self): - self._testFromCheckpointFloatLabels(n_classes=4) - - def _testFromCheckpointMultiBatch(self, n_classes): - # Create initial checkpoint. - label = [1, 0] - age = [17, 18.5] - # For binary case, the expected weight has shape (1,1). For multi class - # case, the shape is (1, n_classes). In order to test the weights, set - # weights as 2.0 * range(n_classes). - bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes - initial_global_step = 100 - with ops.Graph().as_default(): - variables.Variable(bias, name=BIAS_NAME) - variables.Variable( - initial_global_step, name=ops.GraphKeys.GLOBAL_STEP, - dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - # For binary classifier: - # logits = bias - # logits[0] = -1. - # logits[1] = -1. - # loss = sigmoid_cross_entropy(logits, label) - # so, loss[0] = 1 * -log ( sigmoid(-1) ) = 1.3133 - # loss[1] = (1 - 0) * -log ( 1- sigmoid(-1) ) = 0.3132 - # For multi class classifier: - # loss = cross_entropy(logits, label) - # where logits = bias and label = [1, 0] - # so, loss = 1 * -log ( softmax(logits)[label] ) - if n_classes == 2: - expected_loss = (1.3133 + 0.3132) - else: - # Expand logits since batch_size=2 - logits = bias * np.ones(shape=(2, 1)) - logits_exp = np.exp(logits) - softmax_row_0 = logits_exp[0] / logits_exp[0].sum() - softmax_row_1 = logits_exp[1] / logits_exp[1].sum() - expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) - expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) - expected_loss = expected_loss_0 + expected_loss_1 - - mock_optimizer = self._mock_optimizer(expected_loss=expected_loss) - - est = baseline.BaselineClassifier( - n_classes=n_classes, - optimizer=mock_optimizer, - model_dir=self._model_dir) - self.assertEqual(0, mock_optimizer.minimize.call_count) - - # Train for a few steps, and validate optimizer and final checkpoint. - num_steps = 10 - est.train( - input_fn=lambda: ({'age': (age)}, (label)), - steps=num_steps) - self.assertEqual(1, mock_optimizer.minimize.call_count) - self._assert_checkpoint( - n_classes, - expected_global_step=initial_global_step + num_steps, - expected_bias=bias) - - def testBinaryClassesFromCheckpointMultiBatch(self): - self._testFromCheckpointMultiBatch(n_classes=2) - - def testMultiClassesFromCheckpointMultiBatch(self): - self._testFromCheckpointMultiBatch(n_classes=4) - - -class BaselineClassifierEvaluationTest(test.TestCase): - - def setUp(self): - self._model_dir = tempfile.mkdtemp() - - def tearDown(self): - if self._model_dir: - shutil.rmtree(self._model_dir) - - def _test_evaluation_for_simple_data(self, n_classes): - label = 1 - age = 1. - - bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes - - with ops.Graph().as_default(): - variables.Variable(bias, name=BIAS_NAME) - variables.Variable( - 100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - est = _baseline_classifier_fn( - n_classes=n_classes, - model_dir=self._model_dir) - eval_metrics = est.evaluate( - input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=1) - - if n_classes == 2: - # Binary classes: loss = -log(sigmoid(-1)) = 1.3133 - # Prediction = sigmoid(-1) = 0.2689 - expected_metrics = { - metric_keys.MetricKeys.LOSS: 1.3133, - ops.GraphKeys.GLOBAL_STEP: 100, - metric_keys.MetricKeys.LOSS_MEAN: 1.3133, - metric_keys.MetricKeys.ACCURACY: 0., - metric_keys.MetricKeys.PREDICTION_MEAN: 0.2689, - metric_keys.MetricKeys.LABEL_MEAN: 1., - metric_keys.MetricKeys.ACCURACY_BASELINE: 1, - metric_keys.MetricKeys.AUC: 0., - metric_keys.MetricKeys.AUC_PR: 1., - } - else: - # Multi classes: loss = 1 * -log ( softmax(logits)[label] ) - logits = bias - logits_exp = np.exp(logits) - softmax = logits_exp / logits_exp.sum() - expected_loss = -1 * math.log(softmax[label]) - - expected_metrics = { - metric_keys.MetricKeys.LOSS: expected_loss, - ops.GraphKeys.GLOBAL_STEP: 100, - metric_keys.MetricKeys.LOSS_MEAN: expected_loss, - metric_keys.MetricKeys.ACCURACY: 0., - } - - self.assertAllClose(sorted_key_dict(expected_metrics), - sorted_key_dict(eval_metrics), rtol=1e-3) - - def test_binary_classes_evaluation_for_simple_data(self): - self._test_evaluation_for_simple_data(n_classes=2) - - def test_multi_classes_evaluation_for_simple_data(self): - self._test_evaluation_for_simple_data(n_classes=4) - - def _test_evaluation_batch(self, n_classes): - """Tests evaluation for batch_size==2.""" - label = [1, 0] - age = [17., 18.] - bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes - initial_global_step = 100 - with ops.Graph().as_default(): - variables.Variable(bias, name=BIAS_NAME) - variables.Variable( - initial_global_step, name=ops.GraphKeys.GLOBAL_STEP, - dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - est = _baseline_classifier_fn( - n_classes=n_classes, - model_dir=self._model_dir) - eval_metrics = est.evaluate( - input_fn=lambda: ({'age': (age)}, (label)), steps=1) - - if n_classes == 2: - # Logits are (-1., -1.) labels are (1, 0). - # Loss is - # loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133 - # loss for row 2: (1 - 0) * -log(1 - sigmoid(-1)) = 0.3132 - # Prediction = sigmoid(-1) = 0.2689 - expected_loss = 1.3133 + 0.3132 - - expected_metrics = { - metric_keys.MetricKeys.LOSS: expected_loss, - ops.GraphKeys.GLOBAL_STEP: 100, - metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2, - metric_keys.MetricKeys.ACCURACY: 0.5, - metric_keys.MetricKeys.PREDICTION_MEAN: 0.2689, - metric_keys.MetricKeys.LABEL_MEAN: 0.5, - metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5, - metric_keys.MetricKeys.AUC: 0.5, - metric_keys.MetricKeys.AUC_PR: 0.75, - } - else: - # Expand logits since batch_size=2 - logits = bias * np.ones(shape=(2, 1)) - logits_exp = np.exp(logits) - softmax_row_0 = logits_exp[0] / logits_exp[0].sum() - softmax_row_1 = logits_exp[1] / logits_exp[1].sum() - expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) - expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) - expected_loss = expected_loss_0 + expected_loss_1 - - expected_metrics = { - metric_keys.MetricKeys.LOSS: expected_loss, - ops.GraphKeys.GLOBAL_STEP: 100, - metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2, - metric_keys.MetricKeys.ACCURACY: 0.5, - } - - self.assertAllClose(sorted_key_dict(expected_metrics), - sorted_key_dict(eval_metrics), rtol=1e-3) - - def test_binary_classes_evaluation_batch(self): - self._test_evaluation_batch(n_classes=2) - - def test_multi_classes_evaluation_batch(self): - self._test_evaluation_batch(n_classes=4) - - def _test_evaluation_weights(self, n_classes): - """Tests evaluation with weights.""" - - label = [1, 0] - age = [17., 18.] - weights = [1., 2.] - # For binary case, the expected weight has shape (1,1). For multi class - # case, the shape is (1, n_classes). In order to test the weights, set - # weights as 2.0 * range(n_classes). - bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes - initial_global_step = 100 - with ops.Graph().as_default(): - variables.Variable(bias, name=BIAS_NAME) - variables.Variable( - initial_global_step, name=ops.GraphKeys.GLOBAL_STEP, - dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - est = _baseline_classifier_fn( - n_classes=n_classes, - weight_column='w', - model_dir=self._model_dir) - eval_metrics = est.evaluate( - input_fn=lambda: ({'age': (age), 'w': (weights)}, (label)), steps=1) - - if n_classes == 2: - # Logits are (-1., -1.) labels are (1, 0). - # Loss is - # loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133 - # loss for row 2: (1 - 0) * -log(1 - sigmoid(-1)) = 0.3132 - # weights = [1., 2.] - expected_loss = 1.3133 * 1. + 0.3132 * 2. - loss_mean = expected_loss / (1.0 + 2.0) - label_mean = np.average(label, weights=weights) - logits = [-1, -1] - logistics = sigmoid(np.array(logits)) - predictions_mean = np.average(logistics, weights=weights) - - expected_metrics = { - metric_keys.MetricKeys.LOSS: expected_loss, - ops.GraphKeys.GLOBAL_STEP: 100, - metric_keys.MetricKeys.LOSS_MEAN: loss_mean, - metric_keys.MetricKeys.ACCURACY: 2. / (1. + 2.), - metric_keys.MetricKeys.PREDICTION_MEAN: predictions_mean, - metric_keys.MetricKeys.LABEL_MEAN: label_mean, - metric_keys.MetricKeys.ACCURACY_BASELINE: ( - max(label_mean, 1-label_mean)), - metric_keys.MetricKeys.AUC: 0.5, - metric_keys.MetricKeys.AUC_PR: 2. / (1. + 2.), - } - else: - # Multi classes: unweighted_loss = 1 * -log ( soft_max(logits)[label] ) - # Expand logits since batch_size=2 - logits = bias * np.ones(shape=(2, 1)) - logits_exp = np.exp(logits) - softmax_row_0 = logits_exp[0] / logits_exp[0].sum() - softmax_row_1 = logits_exp[1] / logits_exp[1].sum() - expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) - expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) - loss_mean = np.average([expected_loss_0, expected_loss_1], - weights=weights) - expected_loss = loss_mean * np.sum(weights) - - expected_metrics = { - metric_keys.MetricKeys.LOSS: expected_loss, - ops.GraphKeys.GLOBAL_STEP: 100, - metric_keys.MetricKeys.LOSS_MEAN: loss_mean, - metric_keys.MetricKeys.ACCURACY: 2. / (1. + 2.), - } - - self.assertAllClose(sorted_key_dict(expected_metrics), - sorted_key_dict(eval_metrics), rtol=1e-3) - - def test_binary_classes_evaluation_weights(self): - self._test_evaluation_weights(n_classes=2) - - def test_multi_classes_evaluation_weights(self): - self._test_evaluation_weights(n_classes=4) - - -class BaselineClassifierPredictTest(test.TestCase): - - def setUp(self): - self._model_dir = tempfile.mkdtemp() - - def tearDown(self): - if self._model_dir: - shutil.rmtree(self._model_dir) - - def _testPredictions(self, n_classes, label_vocabulary, label_output_fn): - """Tests predict when all variables are one-dimensional.""" - age = 1. - - bias = [10.0] if n_classes == 2 else [10.0] * n_classes - - with ops.Graph().as_default(): - variables.Variable(bias, name=BIAS_NAME) - variables.Variable(100, name='global_step', dtype=dtypes.int64) - save_variables_to_ckpt(self._model_dir) - - est = _baseline_classifier_fn( - label_vocabulary=label_vocabulary, - n_classes=n_classes, - model_dir=self._model_dir) - - predict_input_fn = numpy_io.numpy_input_fn( - x={'age': np.array([[age]])}, - y=None, - batch_size=1, - num_epochs=1, - shuffle=False) - predictions = list(est.predict(input_fn=predict_input_fn)) - - if n_classes == 2: - scalar_logits = bias[0] - two_classes_logits = [0, scalar_logits] - two_classes_logits_exp = np.exp(two_classes_logits) - softmax = two_classes_logits_exp / two_classes_logits_exp.sum() - - expected_predictions = { - 'class_ids': [1], - 'classes': [label_output_fn(1)], - 'logistic': [sigmoid(np.array(scalar_logits))], - 'logits': [scalar_logits], - 'probabilities': softmax, - } - else: - onedim_logits = np.array(bias) - class_ids = onedim_logits.argmax() - logits_exp = np.exp(onedim_logits) - softmax = logits_exp / logits_exp.sum() - expected_predictions = { - 'class_ids': [class_ids], - 'classes': [label_output_fn(class_ids)], - 'logits': onedim_logits, - 'probabilities': softmax, - } - - self.assertEqual(1, len(predictions)) - # assertAllClose cannot handle byte type. - self.assertEqual(expected_predictions['classes'], predictions[0]['classes']) - expected_predictions.pop('classes') - predictions[0].pop('classes') - self.assertAllClose(sorted_key_dict(expected_predictions), - sorted_key_dict(predictions[0])) - - def testBinaryClassesWithoutLabelVocabulary(self): - n_classes = 2 - self._testPredictions(n_classes, - label_vocabulary=None, - label_output_fn=lambda x: ('%s' % x).encode()) - - def testBinaryClassesWithLabelVocabulary(self): - n_classes = 2 - self._testPredictions( - n_classes, - label_vocabulary=['class_vocab_{}'.format(i) - for i in range(n_classes)], - label_output_fn=lambda x: ('class_vocab_%s' % x).encode()) - - def testMultiClassesWithoutLabelVocabulary(self): - n_classes = 4 - self._testPredictions( - n_classes, - label_vocabulary=None, - label_output_fn=lambda x: ('%s' % x).encode()) - - def testMultiClassesWithLabelVocabulary(self): - n_classes = 4 - self._testPredictions( - n_classes, - label_vocabulary=['class_vocab_{}'.format(i) - for i in range(n_classes)], - label_output_fn=lambda x: ('class_vocab_%s' % x).encode()) - - -class BaselineClassifierIntegrationTest(test.TestCase): - - def setUp(self): - self._model_dir = tempfile.mkdtemp() - - def tearDown(self): - if self._model_dir: - shutil.rmtree(self._model_dir) - - def _test_complete_flow(self, n_classes, train_input_fn, eval_input_fn, - predict_input_fn, input_dimension, prediction_length): - feature_columns = [ - feature_column_lib.numeric_column('x', shape=(input_dimension,)) - ] - est = _baseline_classifier_fn( - n_classes=n_classes, - model_dir=self._model_dir) - - # TRAIN - # learn y = x - est.train(train_input_fn, steps=200) - - # EVALUTE - scores = est.evaluate(eval_input_fn) - self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP]) - self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) - - # PREDICT - predictions = np.array( - [x['classes'] for x in est.predict(predict_input_fn)]) - self.assertAllEqual((prediction_length, 1), predictions.shape) - - # EXPORT - feature_spec = feature_column_lib.make_parse_example_spec(feature_columns) - serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( - feature_spec) - export_dir = est.export_savedmodel(tempfile.mkdtemp(), - serving_input_receiver_fn) - self.assertTrue(gfile.Exists(export_dir)) - - def _test_numpy_input_fn(self, n_classes): - """Tests complete flow with numpy_input_fn.""" - input_dimension = 4 - batch_size = 10 - prediction_length = batch_size - data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32) - data = data.reshape(batch_size, input_dimension) - target = np.array([1] * batch_size) - - train_input_fn = numpy_io.numpy_input_fn( - x={'x': data}, - y=target, - batch_size=batch_size, - num_epochs=None, - shuffle=True) - eval_input_fn = numpy_io.numpy_input_fn( - x={'x': data}, - y=target, - batch_size=batch_size, - num_epochs=1, - shuffle=False) - predict_input_fn = numpy_io.numpy_input_fn( - x={'x': data}, - y=None, - batch_size=batch_size, - num_epochs=1, - shuffle=False) - - self._test_complete_flow( - n_classes=n_classes, - train_input_fn=train_input_fn, - eval_input_fn=eval_input_fn, - predict_input_fn=predict_input_fn, - input_dimension=input_dimension, - prediction_length=prediction_length) - - def test_binary_classes_numpy_input_fn(self): - self._test_numpy_input_fn(n_classes=2) - - def test_multi_classes_numpy_input_fn(self): - self._test_numpy_input_fn(n_classes=4) - - def _test_pandas_input_fn(self, n_classes): - """Tests complete flow with pandas_input_fn.""" - if not HAS_PANDAS: - return - - # Pandas DataFrame natually supports 1 dim data only. - input_dimension = 1 - batch_size = 10 - data = np.array([1., 2., 3., 4.], dtype=np.float32) - target = np.array([1, 0, 1, 0], dtype=np.int32) - x = pd.DataFrame({'x': data}) - y = pd.Series(target) - prediction_length = 4 - - train_input_fn = pandas_io.pandas_input_fn( - x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) - eval_input_fn = pandas_io.pandas_input_fn( - x=x, y=y, batch_size=batch_size, shuffle=False) - predict_input_fn = pandas_io.pandas_input_fn( - x=x, batch_size=batch_size, shuffle=False) - - self._test_complete_flow( - n_classes=n_classes, - train_input_fn=train_input_fn, - eval_input_fn=eval_input_fn, - predict_input_fn=predict_input_fn, - input_dimension=input_dimension, - prediction_length=prediction_length) - - def test_binary_classes_pandas_input_fn(self): - self._test_pandas_input_fn(n_classes=2) - - def test_multi_classes_pandas_input_fn(self): - self._test_pandas_input_fn(n_classes=4) - - def _test_input_fn_from_parse_example(self, n_classes): - """Tests complete flow with input_fn constructed from parse_example.""" - input_dimension = 2 - batch_size = 10 - prediction_length = batch_size - data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32) - data = data.reshape(batch_size, input_dimension) - target = np.array([1] * batch_size, dtype=np.int64) - - serialized_examples = [] - for x, y in zip(data, target): - example = example_pb2.Example(features=feature_pb2.Features( - feature={ - 'x': - feature_pb2.Feature(float_list=feature_pb2.FloatList( - value=x)), - 'y': - feature_pb2.Feature(int64_list=feature_pb2.Int64List( - value=[y])), - })) - serialized_examples.append(example.SerializeToString()) - - feature_spec = { - 'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32), - 'y': parsing_ops.FixedLenFeature([1], dtypes.int64), - } - - def _train_input_fn(): - feature_map = parsing_ops.parse_example(serialized_examples, feature_spec) - features = queue_parsed_features(feature_map) - labels = features.pop('y') - return features, labels - - def _eval_input_fn(): - feature_map = parsing_ops.parse_example( - input_lib.limit_epochs(serialized_examples, num_epochs=1), - feature_spec) - features = queue_parsed_features(feature_map) - labels = features.pop('y') - return features, labels - - def _predict_input_fn(): - feature_map = parsing_ops.parse_example( - input_lib.limit_epochs(serialized_examples, num_epochs=1), - feature_spec) - features = queue_parsed_features(feature_map) - features.pop('y') - return features, None - - self._test_complete_flow( - n_classes=n_classes, - train_input_fn=_train_input_fn, - eval_input_fn=_eval_input_fn, - predict_input_fn=_predict_input_fn, - input_dimension=input_dimension, - prediction_length=prediction_length) - - def test_binary_classes_input_fn_from_parse_example(self): - self._test_input_fn_from_parse_example(n_classes=2) - - def test_multi_classes_input_fn_from_parse_example(self): - self._test_input_fn_from_parse_example(n_classes=4) - - -# Tests for Baseline logit_fn. - - -class BaselineLogitFnTest(test.TestCase): - - def test_basic_logit_correctness(self): - """baseline_logit_fn simply returns the bias variable.""" - with ops.Graph().as_default(): - logit_fn = baseline._baseline_logit_fn_builder(num_outputs=2) - logits = logit_fn(features={'age': [[23.], [31.]]}) - with variable_scope.variable_scope('baseline', reuse=True): - bias_var = variable_scope.get_variable('bias') - with tf_session.Session() as sess: - sess.run([variables.global_variables_initializer()]) - self.assertAllClose([[0., 0.], [0., 0.]], logits.eval()) - sess.run(bias_var.assign([10., 5.])) - self.assertAllClose([[10., 5.], [10., 5.]], logits.eval()) - - -if __name__ == '__main__': - test.main() - diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py index eaed412c8b..01c00621ce 100644 --- a/tensorflow/python/estimator/canned/head.py +++ b/tensorflow/python/estimator/canned/head.py @@ -117,7 +117,7 @@ class _Head(object): update_op = tf.contrib.layers.optimize_loss(optimizer=sync, loss=estimator_spec.loss, ...) hooks = [sync.make_session_run_hook(is_chief)] - ... upate train_op and hooks in EstimatorSpec and return + ... update train_op and hooks in EstimatorSpec and return ``` """ __metaclass__ = abc.ABCMeta @@ -264,55 +264,26 @@ def _check_dense_labels_match_logits_and_reshape( return array_ops.identity(labels, name=scope) -def _get_weights_and_check_match_logits( - features, weight_column, logits, allow_per_logit_weights=False): - """Fetches weights from features and checks that the shape matches logits. +def _check_weights_match_logits_and_reshape(weights, logits): + """Checks that weights shape matches logits and reshapes if needed. Consider logits of shape [D0, D1, ... DN, logits_dimension]. Weights shape can be either: - * [D0, D1, ... DN, logits_dimension] if `allow_per_logit_weights=True`. + * [D0, D1, ... DN, logits_dimension] * [D0, D1, ... DN, 1] * [D0, D1, ... DN]: In this case, weights is reshaped into [D0, D1, ... DN, 1] to work with weight broadcasting rules. Args: - features: The features dict that contains weights. - weight_column: The weight column. If not given, this method returns 1. + weights: weights Tensor. logits: logits Tensor. - allow_per_logit_weights: Boolean. Whether we allow weights along the logits - dimension, namely shape `[D0, D1, ... DN, logits_dimension]`. Returns: Validated and reshaped weights Tensor. - Raises: - ValueError: If the weights `Tensor` cannot be cast into float. """ - if allow_per_logit_weights: - err_msg = ( - 'weights shape must be [D0, D1, ... DN], [D0, D1, ... DN, 1] or ' - '[D0, D1, ... DN, logits_dimension]') - else: - err_msg = ( - 'weights shape must be [D0, D1, ... DN] or [D0, D1, ... DN, 1]') - with ops.name_scope( - None, 'weights', - values=tuple(six.itervalues(features)) + (logits,)) as scope: - # Fetch the weights. - if weight_column is None: - return 1. - if isinstance(weight_column, six.string_types): - weight_column = feature_column_lib.numeric_column( - key=weight_column, shape=(1,)) - if not isinstance(weight_column, feature_column_lib._NumericColumn): # pylint: disable=protected-access - raise TypeError('Weight column must be either a string or _NumericColumn.' - ' Given type: {}.'.format(type(weight_column))) - weights = weight_column._get_dense_tensor( # pylint: disable=protected-access - feature_column_lib._LazyBuilder(features)) # pylint: disable=protected-access - if not (weights.dtype.is_floating or weights.dtype.is_integer): - raise ValueError('Weight column should be castable to float. ' - 'Given dtype: {}'.format(weights.dtype)) - weights = math_ops.to_float(weights, name='weights') - - # Validate the weights shape. + err_msg = ( + 'weights shape must be [D0, D1, ... DN], [D0, D1, ... DN, 1] or ' + '[D0, D1, ... DN, logits_dimension]') + with ops.name_scope(None, 'weights', (weights, logits)) as scope: weights_shape = array_ops.shape(weights, name='weights_shape') logits_shape = array_ops.shape(logits, name='logits_shape') if (weights.shape.ndims is not None and logits.shape.ndims is not None and @@ -324,24 +295,42 @@ def _get_weights_and_check_match_logits( with ops.control_dependencies([assert_dimension]): return array_ops.expand_dims(weights, -1, name=scope) supported_weights_shape = array_ops.concat([logits_shape[:-1], [1]], axis=0) - if allow_per_logit_weights: - condition = math_ops.reduce_any( - [math_ops.reduce_all(math_ops.equal(logits_shape, weights_shape)), - math_ops.reduce_all(math_ops.equal( - supported_weights_shape, weights_shape))]) - assert_dimension = control_flow_ops.Assert( - condition=condition, - data=[err_msg, 'logits_shape: ', logits_shape, - 'weights_shape: ', weights_shape]) - else: - assert_dimension = check_ops.assert_equal( - supported_weights_shape, weights_shape, message=err_msg, - data=['logits_shape: ', logits_shape, - 'weights_shape: ', weights_shape]) + condition = math_ops.reduce_any( + [math_ops.reduce_all(math_ops.equal(logits_shape, weights_shape)), + math_ops.reduce_all(math_ops.equal( + supported_weights_shape, weights_shape))]) + assert_dimension = control_flow_ops.Assert( + condition=condition, + data=[err_msg, 'logits_shape: ', logits_shape, + 'weights_shape: ', weights_shape]) with ops.control_dependencies([assert_dimension]): return array_ops.identity(weights, name=scope) +# TODO(roumposg): Delete once all heads support multi-dim input. +def _check_logits(logits, expected_logits_dimension): + """Check logits type and shape.""" + with ops.name_scope(None, 'logits', (logits,)) as scope: + logits = math_ops.to_float(logits) + logits_shape = array_ops.shape(logits) + assert_rank = check_ops.assert_rank( + logits, 2, data=[logits_shape], + message='logits shape must be [batch_size, logits_dimension]') + with ops.control_dependencies([assert_rank]): + static_shape = logits.shape + if static_shape is not None: + dim1 = static_shape[1] + if (dim1 is not None) and (dim1 != expected_logits_dimension): + raise ValueError( + 'logits shape must be [batch_size, logits_dimension], got %s.' % + (static_shape,)) + assert_dimension = check_ops.assert_equal( + expected_logits_dimension, logits_shape[1], data=[logits_shape], + message='logits shape must be [batch_size, logits_dimension]') + with ops.control_dependencies([assert_dimension]): + return array_ops.identity(logits, name=scope) + + def _check_logits_final_dim(logits, expected_logits_dimension): """Checks that logits shape is [D0, D1, ... DN, logits_dimension].""" with ops.name_scope(None, 'logits', (logits,)) as scope: @@ -586,8 +575,10 @@ class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head): labels=label_ids, logits=logits, reduction=losses.Reduction.NONE) # Restore the squeezed dim, so unweighted_loss matches the weights shape. unweighted_loss = array_ops.expand_dims(unweighted_loss, axis=-1) - weights = _get_weights_and_check_match_logits( - features=features, weight_column=self._weight_column, logits=logits) + weights = _weights(features, self._weight_column) + if self._weight_column is not None: + weights = _check_weights_match_logits_and_reshape( + weights=weights, logits=logits) weighted_sum_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) # _weights() can return 1. @@ -689,7 +680,7 @@ class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head): def _binary_logistic_head_with_sigmoid_cross_entropy_loss( weight_column=None, thresholds=None, label_vocabulary=None, name=None): - """Creates a `_Head` for single label binary classification. + """Creates a `Head` for single label binary classification. This head uses `sigmoid_cross_entropy_with_logits` loss. @@ -727,7 +718,7 @@ def _binary_logistic_head_with_sigmoid_cross_entropy_loss( suffixed by `"/" + name`. Also used as `name_scope` when creating ops. Returns: - An instance of `_Head` for binary classification. + An instance of `Head` for binary classification. Raises: ValueError: if `thresholds` contains a value outside of `(0, 1)`. @@ -861,8 +852,10 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head): labels = _assert_range(labels, 2) unweighted_loss = nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=logits) - weights = _get_weights_and_check_match_logits( - features=features, weight_column=self._weight_column, logits=logits) + weights = _weights(features, self._weight_column) + if self._weight_column is not None: + weights = _check_weights_match_logits_and_reshape( + weights=weights, logits=logits) weighted_sum_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) # _weights() can return 1. @@ -925,8 +918,12 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head): # Eval. if mode == model_fn.ModeKeys.EVAL: - weights = _get_weights_and_check_match_logits( - features=features, weight_column=self._weight_column, logits=logits) + weights = _weights(features, self._weight_column) + # TODO(roumposg): Merge this logic inside _weights once all heads + # support multi-dimensional inputs. + if self._weight_column is not None: + weights = _check_weights_match_logits_and_reshape( + weights=weights, logits=logits) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.EVAL, predictions=predictions, @@ -960,7 +957,7 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head): def _regression_head_with_mean_squared_error_loss(weight_column=None, label_dimension=1, name=None): - """Creates a `_Head` for regression using the `mean_squared_error` loss. + """Creates a `_Head` for regression using the mean squared loss. The loss is the weighted sum over all input dimensions. Namely, if the input labels have shape `[batch_size, label_dimension]`, the loss is the weighted @@ -1026,9 +1023,10 @@ class _RegressionHeadWithMeanSquaredErrorLoss(_Head): labels = math_ops.to_float(labels) unweighted_loss = losses.mean_squared_error( labels=labels, predictions=logits, reduction=losses.Reduction.NONE) - weights = _get_weights_and_check_match_logits( - features=features, weight_column=self._weight_column, logits=logits, - allow_per_logit_weights=True) + weights = _weights(features, self._weight_column) + if self._weight_column is not None: + weights = _check_weights_match_logits_and_reshape( + weights=weights, logits=logits) weighted_sum_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) # _weights() can return 1. @@ -1113,19 +1111,18 @@ class _RegressionHeadWithMeanSquaredErrorLoss(_Head): train_op=train_op_fn(weighted_sum_loss)) -def _assert_range(labels, n_classes, message=None): +def _assert_range(labels, n_classes): with ops.name_scope(None, 'assert_range', (labels,)): assert_less = check_ops.assert_less( labels, ops.convert_to_tensor(n_classes, dtype=labels.dtype), - message=message or 'Label IDs must < n_classes') + message='Label IDs must < n_classes') assert_greater = check_ops.assert_non_negative( - labels, message=message or 'Label IDs must >= 0') + labels, message='Label IDs must >= 0') with ops.control_dependencies((assert_less, assert_greater)): return array_ops.identity(labels) -# TODO(b/69000400): Delete this method. def _weights(features, weight_column): """Fetches weights from features.""" with ops.name_scope(None, 'weights', values=features.values()): diff --git a/tensorflow/python/estimator/canned/head_test.py b/tensorflow/python/estimator/canned/head_test.py index 4497cd26f2..0a4ea7d81c 100644 --- a/tensorflow/python/estimator/canned/head_test.py +++ b/tensorflow/python/estimator/canned/head_test.py @@ -987,14 +987,12 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase): spec.loss.eval() def test_multi_dim_train_weights_wrong_outer_dim(self): - """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2, 3].""" + """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2, 2].""" head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, weight_column='weights') logits = np.array([[[10, 0, 0], [12, 0, 0]], [[0, 10, 0], [0, 15, 0]]], dtype=np.float32) labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64) - weights = np.array([[[1., 1.1, 1.2], [1.5, 1.6, 1.7]], - [[2., 2.1, 2.2], [2.5, 2.6, 2.7]]]) weights_placeholder = array_ops.placeholder(dtype=dtypes.float32) def _no_op_train_fn(loss): del loss @@ -1010,8 +1008,10 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase): _initialize_variables(self, monitored_session.Scaffold()) with self.assertRaisesRegexp( errors.InvalidArgumentError, - r'\[logits_shape: \]\s\[2 2 3\]\s\[weights_shape: \]\s\[2 2 3\]'): - spec.loss.eval({weights_placeholder: weights}) + r'\[logits_shape: \]\s\[2 2 3\]\s\[weights_shape: \]\s\[2 2 2\]'): + spec.loss.eval({ + weights_placeholder: np.array([[[1., 1.1], [1.5, 1.6]], + [[2., 2.1], [2.5, 2.6]]])}) def test_multi_dim_weighted_eval(self): """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2].""" diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 2d036e2cfb..a730e107ba 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -537,7 +537,7 @@ class Estimator(object): temp_export_dir = get_temp_export_dir(export_dir) # TODO(soergel): Consider whether MonitoredSession makes sense here - with tf_session.Session(config=self._session_config) as session: + with tf_session.Session() as session: saver_for_restore = estimator_spec.scaffold.saver or saver.Saver( sharded=True) diff --git a/tensorflow/python/estimator/estimator_lib.py b/tensorflow/python/estimator/estimator_lib.py index bed2b67419..5b82fd75ff 100644 --- a/tensorflow/python/estimator/estimator_lib.py +++ b/tensorflow/python/estimator/estimator_lib.py @@ -19,8 +19,6 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import,line-too-long,wildcard-import -from tensorflow.python.estimator.canned.baseline import BaselineClassifier -from tensorflow.python.estimator.canned.baseline import BaselineRegressor from tensorflow.python.estimator.canned.dnn import DNNClassifier from tensorflow.python.estimator.canned.dnn import DNNRegressor from tensorflow.python.estimator.canned.dnn_linear_combined import DNNLinearCombinedClassifier @@ -48,8 +46,6 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ # Canned Estimators - 'BaselineClassifier', - 'BaselineRegressor', 'DNNClassifier', 'DNNRegressor', 'DNNLinearCombinedClassifier', diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py index c1b773b8c4..2b9b44523b 100644 --- a/tensorflow/python/estimator/estimator_test.py +++ b/tensorflow/python/estimator/estimator_test.py @@ -50,7 +50,6 @@ from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import metrics as metrics_lib from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import state_ops -from tensorflow.python.ops import string_ops from tensorflow.python.ops import variables from tensorflow.python.ops.losses import losses from tensorflow.python.platform import gfile @@ -1911,71 +1910,6 @@ class EstimatorExportTest(test.TestCase): est.train(dummy_input_fn, steps=1) est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) - def test_export_savedmodel_respects_soft_placement(self): - def model_fn_with_a_gpu_op_but_no_kernel(features, labels, mode): - _, _ = features, labels - table = saver_test_utils.CheckpointedOp(name='v2') - - update_global_step = state_ops.assign_add(training.get_global_step(), 1) - with ops.control_dependencies([update_global_step]): - train_op = table.insert('k1', 30.0) - - # In this test, there are no GPUs available. The goal is to verify that - # export_savedmodel executes nevertheless. - with ops.device('/gpu:0'): - string_op = string_ops.as_string(update_global_step) - - with ops.control_dependencies([string_op]): - prediction = table.lookup('k1', 0.0) - - return model_fn_lib.EstimatorSpec( - mode, - predictions=prediction, - loss=constant_op.constant(1.), - train_op=train_op, - export_outputs={ - 'test': export_output.PredictOutput({ - 'prediction': prediction - }) - }) - - tmpdir = tempfile.mkdtemp() - est = estimator.Estimator( - model_fn=model_fn_with_a_gpu_op_but_no_kernel) - est.train(input_fn=dummy_input_fn, steps=1) - feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64), - 'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)} - serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( - feature_spec) - export_dir_base = os.path.join( - compat.as_bytes(tmpdir), compat.as_bytes('export')) - - export_dir = est.export_savedmodel( - export_dir_base, serving_input_receiver_fn) - - # At this point, if export_savedmodel executed with - # allow_soft_placement=True, then the GPU-assigned operation was silently - # placed on the CPU. Otherwise, an exception would have been raised - # related to the fact that the requested GPU device isn't available. - - # Expectations below assume that export_savedmodel has completed normally. - self.assertTrue(gfile.Exists(export_dir_base)) - self.assertTrue(gfile.Exists(export_dir)) - self.assertTrue(gfile.Exists(os.path.join( - compat.as_bytes(export_dir), - compat.as_bytes('saved_model.pb')))) - self.assertTrue(gfile.Exists(os.path.join( - compat.as_bytes(export_dir), - compat.as_bytes('variables')))) - self.assertTrue(gfile.Exists(os.path.join( - compat.as_bytes(export_dir), - compat.as_bytes('variables/variables.index')))) - self.assertTrue(gfile.Exists(os.path.join( - compat.as_bytes(export_dir), - compat.as_bytes('variables/variables.data-00000-of-00001')))) - - gfile.DeleteRecursively(tmpdir) - class EstimatorHookOrderingTest(test.TestCase): diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py index c9f37f06e8..3512f66284 100644 --- a/tensorflow/python/estimator/inputs/numpy_io.py +++ b/tensorflow/python/estimator/inputs/numpy_io.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import collections +from six import string_types from tensorflow.python.estimator.inputs.queues import feeding_functions # Key name to pack the target into dict of `features`. See @@ -51,8 +52,9 @@ def numpy_input_fn(x, num_threads=1): """Returns input function that would feed dict of numpy arrays into the model. - This returns a function outputting `features` and `target` based on the dict - of numpy arrays. The dict `features` has the same keys as the `x`. + This returns a function outputting `features` and `targets` based on the dict + of numpy arrays. The dict `features` has the same keys as the `x`. The dict + `targets` has the same keys as the `y` if `y` is a dict. Example: @@ -69,7 +71,7 @@ def numpy_input_fn(x, Args: x: dict of numpy array object. - y: numpy array object. `None` if absent. + y: numpy array object or dict of numpy array object. `None` if absent. batch_size: Integer, size of batches to return. num_epochs: Integer, number of epochs to iterate over data. If `None` will run forever. @@ -81,11 +83,13 @@ def numpy_input_fn(x, such as in prediction and evaluation mode, `num_threads` should be 1. Returns: - Function, that has signature of ()->(dict of `features`, `target`) + Function, that has signature of ()->(dict of `features`, `targets`) Raises: ValueError: if the shape of `y` mismatches the shape of values in `x` (i.e., values in `x` have same shape). + ValueError: if duplicate keys are in both `x` and `y` when `y` is a dict. + ValueError: if x or y is an empty dict. TypeError: `x` is not a dict or `shuffle` is not bool. """ @@ -97,43 +101,76 @@ def numpy_input_fn(x, """Numpy input function.""" if not isinstance(x, dict): raise TypeError('x must be dict; got {}'.format(type(x).__name__)) + if not x: + raise ValueError('x cannot be empty') # Make a shadow copy and also ensure the order of iteration is consistent. - ordered_dict_x = collections.OrderedDict( + ordered_dict_data = collections.OrderedDict( sorted(x.items(), key=lambda t: t[0])) + # Deep copy keys which is a view in python 3 + feature_keys = list(ordered_dict_data.keys()) + + if y is None: + target_keys = None + elif isinstance(y, dict): + if not y: + raise ValueError('y cannot be empty dict, use None instead.') + + ordered_dict_y = collections.OrderedDict( + sorted(y.items(), key=lambda t: t[0])) + target_keys = list(ordered_dict_y.keys()) + + duplicate_keys = set(feature_keys).intersection(set(target_keys)) + if len(duplicate_keys): + raise ValueError('{} duplicate keys are found in both x and y: ' + '{}'.format(len(duplicate_keys), duplicate_keys)) + + ordered_dict_data.update(ordered_dict_y) + else: + target_keys = _get_unique_target_key(ordered_dict_data) + ordered_dict_data[target_keys] = y + + if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1: + shape_dict_of_x = {k: ordered_dict_data[k].shape + for k in feature_keys} + + if target_keys is None: + shape_of_y = None + elif isinstance(target_keys, string_types): + shape_of_y = y.shape + else: + shape_of_y = {k: ordered_dict_data[k].shape + for k in target_keys} - unique_target_key = _get_unique_target_key(ordered_dict_x) - if y is not None: - ordered_dict_x[unique_target_key] = y - - if len(set(v.shape[0] for v in ordered_dict_x.values())) != 1: - shape_dict_of_x = {k: ordered_dict_x[k].shape - for k in ordered_dict_x.keys()} - shape_of_y = None if y is None else y.shape raise ValueError('Length of tensors in x and y is mismatched. All ' 'elements in x and y must have the same length.\n' 'Shapes in x: {}\n' - 'Shape for y: {}\n'.format(shape_dict_of_x, shape_of_y)) + 'Shapes in y: {}\n'.format(shape_dict_of_x, shape_of_y)) queue = feeding_functions._enqueue_data( # pylint: disable=protected-access - ordered_dict_x, + ordered_dict_data, queue_capacity, shuffle=shuffle, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) - features = (queue.dequeue_many(batch_size) if num_epochs is None + batch = (queue.dequeue_many(batch_size) if num_epochs is None else queue.dequeue_up_to(batch_size)) - # Remove the first `Tensor` in `features`, which is the row number. - if len(features) > 0: - features.pop(0) + # Remove the first `Tensor` in `batch`, which is the row number. + if len(batch) > 0: + batch.pop(0) - features = dict(zip(ordered_dict_x.keys(), features)) - if y is not None: - target = features.pop(unique_target_key) + features = dict(zip(feature_keys, batch[:len(feature_keys)])) + if target_keys is None: + # TODO(martinwicke), return consistent result + return features + elif isinstance(target_keys, string_types): + target = batch[-1] + return features, target + else: + target = dict(zip(target_keys, batch[-len(target_keys):])) return features, target - return features return input_fn diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py index 02df22b632..65eae7a7dc 100644 --- a/tensorflow/python/estimator/inputs/numpy_io_test.py +++ b/tensorflow/python/estimator/inputs/numpy_io_test.py @@ -239,6 +239,40 @@ class NumpyIoTest(test.TestCase): x, y, batch_size=2, shuffle=False, num_epochs=1) failing_input_fn() + def testNumpyInputFnWithXIsEmptyDict(self): + x = {} + y = np.arange(4) + with self.test_session(): + with self.assertRaisesRegexp(ValueError, 'x cannot be empty'): + failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False) + failing_input_fn() + + def testNumpyInputFnWithYIsNone(self): + a = np.arange(4) * 1.0 + b = np.arange(32, 36) + x = {'a': a, 'b': b} + y = None + + with self.test_session() as session: + input_fn = numpy_io.numpy_input_fn( + x, y, batch_size=2, shuffle=False, num_epochs=1) + features_tensor = input_fn() + + coord = coordinator.Coordinator() + threads = queue_runner_impl.start_queue_runners(session, coord=coord) + + feature = session.run(features_tensor) + self.assertEqual(len(feature), 2) + self.assertAllEqual(feature['a'], [0, 1]) + self.assertAllEqual(feature['b'], [32, 33]) + + session.run([features_tensor]) + with self.assertRaises(errors.OutOfRangeError): + session.run([features_tensor]) + + coord.request_stop() + coord.join(threads) + def testNumpyInputFnWithNonBoolShuffle(self): x = np.arange(32, 36) y = np.arange(4) @@ -285,6 +319,59 @@ class NumpyIoTest(test.TestCase): num_epochs=1) failing_input_fn() + def testNumpyInputFnWithYAsDict(self): + a = np.arange(4) * 1.0 + b = np.arange(32, 36) + x = {'a': a, 'b': b} + y = {'y1': np.arange(-32, -28), 'y2': np.arange(32, 28, -1)} + + with self.test_session() as session: + input_fn = numpy_io.numpy_input_fn( + x, y, batch_size=2, shuffle=False, num_epochs=1) + features_tensor, targets_tensor = input_fn() + + coord = coordinator.Coordinator() + threads = queue_runner_impl.start_queue_runners(session, coord=coord) + + features, targets = session.run([features_tensor, targets_tensor]) + self.assertEqual(len(features), 2) + self.assertAllEqual(features['a'], [0, 1]) + self.assertAllEqual(features['b'], [32, 33]) + self.assertEqual(len(targets), 2) + self.assertAllEqual(targets['y1'], [-32, -31]) + self.assertAllEqual(targets['y2'], [32, 31]) + + session.run([features_tensor, targets_tensor]) + with self.assertRaises(errors.OutOfRangeError): + session.run([features_tensor, targets_tensor]) + + coord.request_stop() + coord.join(threads) + + def testNumpyInputFnWithYIsEmptyDict(self): + a = np.arange(4) * 1.0 + b = np.arange(32, 36) + x = {'a': a, 'b': b} + y = {} + with self.test_session(): + with self.assertRaisesRegexp(ValueError, 'y cannot be empty'): + failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False) + failing_input_fn() + + def testNumpyInputFnWithDuplicateKeysInXAndY(self): + a = np.arange(4) * 1.0 + b = np.arange(32, 36) + x = {'a': a, 'b': b} + y = {'y1': np.arange(-32, -28), + 'a': a, + 'y2': np.arange(32, 28, -1), + 'b': b} + with self.test_session(): + with self.assertRaisesRegexp( + ValueError, '2 duplicate keys are found in both x and y'): + failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False) + failing_input_fn() + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py index 29cf223724..cef3f8d4c4 100644 --- a/tensorflow/python/framework/function.py +++ b/tensorflow/python/framework/function.py @@ -100,7 +100,7 @@ class Defun(object): grad_func - (optional). A function implementing the gradient of the function-to-register. This is must be a `_DefinedFunction` object. The gradient - function must satisfy the criterion defined in + function must satisify the criterion defined in function.proto:GradientDef. python_grad_func - (optional). A function implementing the diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py index ba43e9199b..36b0737cfc 100644 --- a/tensorflow/python/framework/function_test.py +++ b/tensorflow/python/framework/function_test.py @@ -370,7 +370,7 @@ class FunctionTest(test.TestCase): @function.Defun(dtypes.float32) def Foo(x): - y = logging_ops.Print(x, [], "Hello") + y = logging_ops.Print(x, [x], "Hello") with ops.control_dependencies([y]): z = control_flow_ops.no_op() with ops.control_dependencies([z]): diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index ad2e2993c1..ab4455534e 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -617,16 +617,15 @@ class _EagerTensorBase(Tensor): return dtypes._INTERN_TABLE[self._datatype_enum()] # pylint: disable=protected-access def numpy(self): - """Returns a numpy array or a scalar with the same contents as the Tensor. + """Returns a numpy array with the same contents as the Tensor. TODO(ashankar,agarwal): Perhaps this should NOT reference the underlying buffer but instead always explicitly copy? Note that currently it may or may not copy based on whether the numpy data is properly aligned or not. Returns: - A numpy array or a scalar. Numpy array may share memory with the - Tensor object. Any changes to one may be reflected in the other. A scalar - value is returned when self has rank 0. + A numpy array that may share memory with the Tensor object. Any changes + to one may be reflected in the other. Raises: ValueError: if the type of this Tensor is not representable in numpy. @@ -864,6 +863,10 @@ def convert_to_tensor(value, dtype=None, name=None, preferred_dtype=None): inputs, which allows those ops to accept numpy arrays, Python lists, and scalars in addition to `Tensor` objects. + Note: This function diverges from default Numpy behavior for `float` and + `string` types when `None` is present in a Python list or scalar. Rather + than silently converting `None` values, an error will be thrown. + Args: value: An object whose type has a registered `Tensor` conversion function. dtype: Optional element type for the returned tensor. If missing, the @@ -1641,15 +1644,13 @@ class Operation(object): default_colocation_group = [ compat.as_bytes("loc:@%s" % self._node_def.name) ] - try: - class_attr = self.get_attr("_class") - except ValueError: + if "_class" not in self._node_def.attr: # This op has no explicit colocation group, so it is itself its # own root of a colocation group. return default_colocation_group attr_groups = [ - class_name for class_name in class_attr + class_name for class_name in self.get_attr("_class") if class_name.startswith(b"loc:@") ] @@ -2064,19 +2065,16 @@ class Operation(object): def _set_attr(self, attr_name, attr_value): """Private method used to set an attribute in the node_def.""" - if _USE_C_API: - buf = c_api.TF_NewBufferFromString( - compat.as_bytes(attr_value.SerializeToString())) - try: - with errors.raise_exception_on_not_ok_status() as status: - # pylint: disable=protected-access - c_api.SetAttr(self._graph._c_graph, self._c_op, attr_name, buf, - status) - # pylint: enable=protected-access - finally: - c_api.TF_DeleteBuffer(buf) - else: - self._node_def.attr[attr_name].CopyFrom(attr_value) + if not _USE_C_API: + assert "_set_attr not supported with _USE_C_API == False" + return + buf = c_api.TF_NewBufferFromString( + compat.as_bytes(attr_value.SerializeToString())) + try: + with errors.raise_exception_on_not_ok_status() as status: + c_api.SetAttr(self._graph._c_graph, self._c_op, attr_name, buf, status) # pylint: disable=protected-access + finally: + c_api.TF_DeleteBuffer(buf) def get_attr(self, name): """Returns the value of the attr of this op with the given `name`. @@ -2090,24 +2088,25 @@ class Operation(object): Raises: ValueError: If this op does not have an attr with the given `name`. """ - fields = ["s", "i", "f", "b", "type", "shape", "tensor", "func"] - if self._c_op: + if _USE_C_API: try: - with c_api_util.tf_buffer() as buf: - with errors.raise_exception_on_not_ok_status() as status: - c_api.TF_OperationGetAttrValueProto(self._c_op, name, buf, status) - data = c_api.TF_GetBuffer(buf) - except errors.InvalidArgumentError as e: - # Convert to ValueError for backwards compatibility. - raise ValueError(str(e)) - x = attr_value_pb2.AttrValue() - x.ParseFromString(data) - else: - if name not in self._node_def.attr: - raise ValueError( - "No attr named '" + name + "' in " + str(self._node_def)) - x = self._node_def.attr[name] + # TODO(b/65162920): remove this try/except block when all attrs are + # implemented to use the _set_attr method instead of node_def.attr. + with errors.raise_exception_on_not_ok_status() as status: + metadata = c_api.TF_OperationGetAttrMetadata(self._c_op, name, status) + with errors.raise_exception_on_not_ok_status() as status: + if metadata.type == c_api.TF_ATTR_INT and metadata.is_list == 0: + return c_api.TF_OperationGetAttrInt(self._c_op, name, status) + except errors.InvalidArgumentError: + # Colocation ops are failing to find attrs begininning with "_*". They + # should fall through to the not-CAPI logic until the attribute is set + # via the C-API always. + pass + fields = ["s", "i", "f", "b", "type", "shape", "tensor", "func"] + if name not in self._node_def.attr: + raise ValueError("No attr named '" + name + "' in " + str(self._node_def)) + x = self._node_def.attr[name] # Treat an empty oneof value as an empty list. if not x.WhichOneof("value"): return [] @@ -3107,10 +3106,9 @@ class Graph(object): ret._set_device(colocation_op.device) # pylint: disable=protected-access all_colocation_groups = sorted(set(all_colocation_groups)) - # pylint: disable=protected-access - ret._set_attr("_class", attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(s=all_colocation_groups))) - # pylint: enable=protected-access + ret.node_def.attr["_class"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue( + s=all_colocation_groups))) # Sets "container" attribute if # (1) self._container is not None diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 4e931e00c5..3087d6060b 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -31,11 +31,9 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import device as pydev from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors -from tensorflow.python.framework import function from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape -from tensorflow.python.framework import tensor_util from tensorflow.python.framework import test_ops from tensorflow.python.framework import test_util from tensorflow.python.framework import versions @@ -359,55 +357,54 @@ class OperationTest(test_util.TensorFlowTestCase): self.assertEqual("", repr(op)) def testGetAttr(self): - op = test_ops.default_attrs() - self.assertEqual(op.get_attr("string_val"), b"abc") - self.assertEqual(op.get_attr("string_list_val"), [b"abc", b""]) - self.assertEqual(op.get_attr("int_val"), 123) - self.assertEqual(op.get_attr("int_list_val"), [1, 2, 3]) - self.assertEqual(op.get_attr("float_val"), 10.0) - self.assertEqual(op.get_attr("float_list_val"), [10.0]) - self.assertEqual(op.get_attr("bool_val"), True) - self.assertEqual(op.get_attr("bool_list_val"), [True, False]) - self.assertEqual(op.get_attr("shape_val"), - tensor_shape.as_shape([2, 1]).as_proto()) - self.assertEqual(op.get_attr("shape_list_val"), - [tensor_shape.as_shape([]).as_proto(), - tensor_shape.as_shape([1]).as_proto()]) - self.assertEqual(op.get_attr("tensor_val"), - tensor_util.make_tensor_proto(1, dtypes.int32)) - self.assertEqual(op.get_attr("tensor_list_val"), - [tensor_util.make_tensor_proto(1, dtypes.int32)]) - - type_val = op.get_attr("type_val") - # First check that type_val is a DType, because the assertEquals will work - # no matter what since DType overrides __eq__ - self.assertIsInstance(type_val, dtypes.DType) - self.assertEqual(type_val, dtypes.int32) - - type_list_val = op.get_attr("type_list_val") - self.assertTrue(all(isinstance(x, dtypes.DType) for x in type_list_val)) - self.assertEqual(type_list_val, [dtypes.int32, dtypes.float32]) - - @function.Defun(dtypes.float32, func_name="MyFunc") - def func(x): - return x - - op = test_ops.func_attr(func) - self.assertEqual(op.get_attr("f"), - attr_value_pb2.NameAttrList(name="MyFunc")) - - # Try fetching missing attr + # TODO(b/65162920): implement all tests for get_attr with C API if ops._USE_C_API: - error_msg = "Operation 'FuncAttr' has no attr named 'FakeAttr'." - else: - error_msg = "No attr named 'FakeAttr' in name: \"FuncAttr\"" + op = test_ops.int_attr().op + self.assertEqual(op.get_attr("foo"), 1) + + op_str = test_ops.string_list_attr(a=["z"], b="y") + self.assertEqual(op_str.get_attr("a"), [b"z"]) + self.assertEqual(op_str.get_attr("b"), b"y") - with self.assertRaisesRegexp(ValueError, error_msg): - op.get_attr("FakeAttr") + else: + list_value = attr_value_pb2.AttrValue.ListValue() + + list_value.type.append(types_pb2.DT_STRING) + list_value.type.append(types_pb2.DT_DOUBLE) + op = ops.Operation( + ops._NodeDef( + "None", + "op1", + attrs={ + "value": + attr_value_pb2.AttrValue(i=32), + "dtype": + attr_value_pb2.AttrValue(type=types_pb2.DT_INT32), + "list": + attr_value_pb2.AttrValue(list=list_value), + "func": + attr_value_pb2.AttrValue( + func=attr_value_pb2.NameAttrList()) + }), ops.Graph(), [], [dtypes.int32]) + self.assertEqual(32, op.get_attr("value")) + self.assertEqual("", op.get_attr("func").name) + + d = op.get_attr("dtype") + # First check that d is a DType, because the assertEquals will + # work no matter what since DType overrides __eq__ + self.assertIsInstance(d, dtypes.DType) + self.assertEqual(dtypes.int32, d) + + l = op.get_attr("list") + for x in l: + self.assertIsInstance(x, dtypes.DType) + self.assertEqual([dtypes.string, dtypes.double], l) # TODO(b/65162920): remove this test when users who are directly mutating the # node_def have been updated to proper usage. def testSetAttr(self): + if not ops._USE_C_API: + return op = test_ops.int_attr().op op._set_attr("foo", attr_value_pb2.AttrValue(i=2)) # TODO(skyewm): add node_def check diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index 7e74c19124..e283542172 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -286,6 +286,7 @@ _TF_TO_IS_OK = { dtypes.bool: [_FilterBool], dtypes.complex128: [_FilterComplex], dtypes.complex64: [_FilterComplex], + dtypes.float16: [_FilterFloat], dtypes.float32: [_FilterFloat], dtypes.float64: [_FilterFloat], dtypes.int16: [_FilterInt], diff --git a/tensorflow/python/framework/test_ops.cc b/tensorflow/python/framework/test_ops.cc index 35e0167b26..a8b7fc543f 100644 --- a/tensorflow/python/framework/test_ops.cc +++ b/tensorflow/python/framework/test_ops.cc @@ -341,27 +341,4 @@ REGISTER_OP("StringListAttr") .Attr("b: string") .SetShapeFn(shape_inference::UnknownShape); -REGISTER_OP("DefaultAttrs") - .Attr("string_val: string = 'abc'") - .Attr("string_list_val: list(string) = ['abc', '']") - .Attr("int_val: int = 123") - .Attr("int_list_val: list(int) = [1, 2, 3]") - .Attr("float_val: float = 10.0") - .Attr("float_list_val: list(float) = [10.0]") - .Attr("bool_val: bool = true") - .Attr("bool_list_val: list(bool) = [true, false]") - .Attr("type_val: type = DT_INT32") - .Attr("type_list_val: list(type) = [DT_INT32, DT_FLOAT]") - .Attr("shape_val: shape = { dim { size: 2 } dim { size: 1 } }") - .Attr("shape_list_val: list(shape) = [{}, { dim { size: 1} }]") - .Attr("tensor_val: tensor = { dtype: DT_INT32 tensor_shape: {} int_val: 1}") - .Attr( - "tensor_list_val: list(tensor) = " - "[{ dtype: DT_INT32 tensor_shape: {} int_val: 1}]") - .SetShapeFn(shape_inference::UnknownShape); - -REGISTER_OP("FuncAttr") - .Attr("f: func") - .SetShapeFn(shape_inference::UnknownShape); - } // end namespace tensorflow diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 6e3a35af3c..d796b0ebea 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -984,8 +984,9 @@ class TensorFlowTestCase(googletest.TestCase): err: A float value. msg: An optional string message to append to the failure message. """ + # f1 == f2 is needed here as we might have: f1, f2 = inf, inf self.assertTrue( - math.fabs(f1 - f2) <= err, + f1 == f2 or math.fabs(f1 - f2) <= err, "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg if msg is not None else "")) diff --git a/tensorflow/python/grappler/model_analyzer.cc b/tensorflow/python/grappler/model_analyzer.cc index 7d365c3be9..4ec7620bce 100644 --- a/tensorflow/python/grappler/model_analyzer.cc +++ b/tensorflow/python/grappler/model_analyzer.cc @@ -59,15 +59,10 @@ void ModelAnalyzer::PrintNodeInfo(const NodeDef* node, if (i > 0) { os << ", "; } - if (prop.shape().dim(i).size() >= 0) { - // Print the actual dimension. - os << prop.shape().dim(i).size(); - } else if (prop.shape().dim(i).size() == -1) { - // We don't know anything about the dimension. + if (prop.shape().dim(i).size() < 0) { os << "?"; } else { - // Symbolic dimension. - os << "x" << -prop.shape().dim(i).size(); + os << prop.shape().dim(i).size(); } } os << "]"; diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index 6a762ee5d2..4db48b45ed 100644 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -498,18 +498,6 @@ py_test( ], ) -py_test( - name = "recurrent_test", - size = "small", - srcs = ["_impl/keras/layers/recurrent_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", - ], -) - py_test( name = "serialization_test", size = "small", diff --git a/tensorflow/python/keras/_impl/keras/engine/topology.py b/tensorflow/python/keras/_impl/keras/engine/topology.py index 2bcbabf19c..f9be782f85 100644 --- a/tensorflow/python/keras/_impl/keras/engine/topology.py +++ b/tensorflow/python/keras/_impl/keras/engine/topology.py @@ -29,9 +29,6 @@ from six.moves import zip # pylint: disable=redefined-builtin from tensorflow.python.eager import context from tensorflow.python.framework import tensor_shape from tensorflow.python.keras._impl.keras import backend as K -from tensorflow.python.keras._impl.keras import constraints -from tensorflow.python.keras._impl.keras import initializers -from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.utils import conv_utils from tensorflow.python.keras._impl.keras.utils.io_utils import ask_to_proceed_with_overwrite from tensorflow.python.keras._impl.keras.utils.layer_utils import print_summary as print_layer_summary @@ -212,9 +209,9 @@ class Layer(tf_base_layers.Layer): dtype = K.floatx() weight = self.add_variable(name, shape, dtype=dtype, - initializer=initializers.get(initializer), - regularizer=regularizers.get(regularizer), - constraint=constraints.get(constraint), + initializer=initializer, + regularizer=regularizer, + constraint=constraint, trainable=trainable) return weight diff --git a/tensorflow/python/keras/_impl/keras/integration_test.py b/tensorflow/python/keras/_impl/keras/integration_test.py index 871a8c7329..7110036848 100644 --- a/tensorflow/python/keras/_impl/keras/integration_test.py +++ b/tensorflow/python/keras/_impl/keras/integration_test.py @@ -93,7 +93,7 @@ class KerasIntegrationTest(test.TestCase): y_test = keras.utils.to_categorical(y_test) model = keras.models.Sequential() - model.add(keras.layers.LSTM(5, return_sequences=True, + model.add(keras.layers.LSTM(3, return_sequences=True, input_shape=x_train.shape[1:])) model.add(keras.layers.GRU(y_train.shape[-1], activation='softmax')) model.compile(loss='categorical_crossentropy', diff --git a/tensorflow/python/keras/_impl/keras/layers/gru_test.py b/tensorflow/python/keras/_impl/keras/layers/gru_test.py index c57fbac41c..03f0736161 100644 --- a/tensorflow/python/keras/_impl/keras/layers/gru_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/gru_test.py @@ -156,10 +156,8 @@ class GRULayerTest(test.TestCase): activity_regularizer='l1') layer.build((None, None, 2)) self.assertEqual(len(layer.losses), 3) - - x = keras.backend.variable(np.ones((2, 3, 2))) - layer(x) - self.assertEqual(len(layer.get_losses_for(x)), 1) + layer(keras.backend.variable(np.ones((2, 3, 2)))) + self.assertEqual(len(layer.losses), 4) def test_constraints_GRU(self): embedding_dim = 4 @@ -177,9 +175,9 @@ class GRULayerTest(test.TestCase): recurrent_constraint=r_constraint, bias_constraint=b_constraint) layer.build((None, None, embedding_dim)) - self.assertEqual(layer.cell.kernel.constraint, k_constraint) - self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint) - self.assertEqual(layer.cell.bias.constraint, b_constraint) + self.assertEqual(layer.kernel.constraint, k_constraint) + self.assertEqual(layer.recurrent_kernel.constraint, r_constraint) + self.assertEqual(layer.bias.constraint, b_constraint) def test_with_masking_layer_GRU(self): layer_class = keras.layers.GRU diff --git a/tensorflow/python/keras/_impl/keras/layers/lstm_test.py b/tensorflow/python/keras/_impl/keras/layers/lstm_test.py index 8d359bf17c..f43d90fec8 100644 --- a/tensorflow/python/keras/_impl/keras/layers/lstm_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/lstm_test.py @@ -156,9 +156,8 @@ class LSTMLayerTest(test.TestCase): activity_regularizer='l1') layer.build((None, None, 2)) self.assertEqual(len(layer.losses), 3) - x = keras.backend.variable(np.ones((2, 3, 2))) - layer(x) - self.assertEqual(len(layer.get_losses_for(x)), 1) + layer(keras.backend.variable(np.ones((2, 3, 2)))) + self.assertEqual(len(layer.losses), 4) def test_constraints_LSTM(self): embedding_dim = 4 @@ -176,9 +175,9 @@ class LSTMLayerTest(test.TestCase): recurrent_constraint=r_constraint, bias_constraint=b_constraint) layer.build((None, None, embedding_dim)) - self.assertEqual(layer.cell.kernel.constraint, k_constraint) - self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint) - self.assertEqual(layer.cell.bias.constraint, b_constraint) + self.assertEqual(layer.kernel.constraint, k_constraint) + self.assertEqual(layer.recurrent_kernel.constraint, r_constraint) + self.assertEqual(layer.bias.constraint, b_constraint) def test_with_masking_layer_LSTM(self): layer_class = keras.layers.LSTM diff --git a/tensorflow/python/keras/_impl/keras/layers/recurrent.py b/tensorflow/python/keras/_impl/keras/layers/recurrent.py index 2bc74d5f80..139523403c 100644 --- a/tensorflow/python/keras/_impl/keras/layers/recurrent.py +++ b/tensorflow/python/keras/_impl/keras/layers/recurrent.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -29,209 +29,99 @@ from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer -from tensorflow.python.keras._impl.keras.utils.generic_utils import has_arg -from tensorflow.python.platform import tf_logging as logging -class StackedRNNCells(Layer): - """Wrapper allowing a stack of RNN cells to behave as a single cell. +# pylint: disable=access-member-before-definition - Used to implement efficient stacked RNNs. - Arguments: - cells: List of RNN cell instances. +def _time_distributed_dense(x, + w, + b=None, + dropout=None, + input_dim=None, + output_dim=None, + timesteps=None, + training=None): + """Apply `y . w + b` for every temporal slice y of x. - Examples: + Arguments: + x: input tensor. + w: weight matrix. + b: optional bias vector. + dropout: whether to apply dropout (same dropout mask + for every temporal slice of the input). + input_dim: integer; optional dimensionality of the input. + output_dim: integer; optional dimensionality of the output. + timesteps: integer; optional number of timesteps. + training: training phase tensor or boolean. + + Returns: + Output tensor. + """ + if not input_dim: + input_dim = K.shape(x)[2] + if not timesteps: + timesteps = K.shape(x)[1] + if not output_dim: + output_dim = K.shape(w)[1] + + if dropout is not None and 0. < dropout < 1.: + # apply the same dropout pattern at every timestep + ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) + dropout_matrix = K.dropout(ones, dropout) + expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) + x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) + + # collapse time dimension and batch dimension together + x = K.reshape(x, (-1, input_dim)) + x = K.dot(x, w) + if b is not None: + x = K.bias_add(x, b) + # reshape to 3D tensor + if K.backend() == 'tensorflow': + x = K.reshape(x, K.stack([-1, timesteps, output_dim])) + x.set_shape([None, None, output_dim]) + else: + x = K.reshape(x, (-1, timesteps, output_dim)) + return x - ```python - cells = [ - keras.layers.LSTMCell(output_dim), - keras.layers.LSTMCell(output_dim), - keras.layers.LSTMCell(output_dim), - ] - inputs = keras.Input((timesteps, input_dim)) - x = keras.layers.RNN(cells)(inputs) - ``` - """ +class Recurrent(Layer): + """Abstract base class for recurrent layers. - def __init__(self, cells, **kwargs): - for cell in cells: - if not hasattr(cell, 'call'): - raise ValueError('All cells must have a `call` method. ' - 'received cells:', cells) - if not hasattr(cell, 'state_size'): - raise ValueError('All cells must have a ' - '`state_size` attribute. ' - 'received cells:', cells) - self.cells = cells - super(StackedRNNCells, self).__init__(**kwargs) - - @property - def state_size(self): - # States are a flat list - # in reverse order of the cell stack. - # This allows to preserve the requirement - # `stack.state_size[0] == output_dim`. - # e.g. states of a 2-layer LSTM would be - # `[h2, c2, h1, c1]` - # (assuming one LSTM has states [h, c]) - state_size = [] - for cell in self.cells[::-1]: - if hasattr(cell.state_size, '__len__'): - state_size += list(cell.state_size) - else: - state_size.append(cell.state_size) - return tuple(state_size) - - def call(self, inputs, states, **kwargs): - # Recover per-cell states. - nested_states = [] - for cell in self.cells[::-1]: - if hasattr(cell.state_size, '__len__'): - nested_states.append(states[:len(cell.state_size)]) - states = states[len(cell.state_size):] - else: - nested_states.append([states[0]]) - states = states[1:] - nested_states = nested_states[::-1] - - # Call the cells in order and store the returned states. - new_nested_states = [] - for cell, states in zip(self.cells, nested_states): - inputs, states = cell.call(inputs, states, **kwargs) - new_nested_states.append(states) - - # Format the new states as a flat list - # in reverse cell order. - states = [] - for cell_states in new_nested_states[::-1]: - states += cell_states - return inputs, states + Do not use in a model -- it's not a valid layer! + Use its children classes `LSTM`, `GRU` and `SimpleRNN` instead. - def build(self, input_shape): - for cell in self.cells: - if isinstance(cell, Layer): - cell.build(input_shape) - if hasattr(cell.state_size, '__len__'): - output_dim = cell.state_size[0] - else: - output_dim = cell.state_size - input_shape = (input_shape[0], input_shape[1], output_dim) - self.built = True + All recurrent layers (`LSTM`, `GRU`, `SimpleRNN`) also + follow the specifications of this class and accept + the keyword arguments listed below. - def get_config(self): - cells = [] - for cell in self.cells: - cells.append({ - 'class_name': cell.__class__.__name__, - 'config': cell.get_config() - }) - config = {'cells': cells} - base_config = super(StackedRNNCells, self).get_config() - return dict(list(base_config.items()) + list(config.items())) + Example: - @classmethod - def from_config(cls, config, custom_objects=None): - from tensorflow.python.keras._impl.keras.layers import deserialize as deserialize_layer # pylint: disable=g-import-not-at-top - cells = [] - for cell_config in config.pop('cells'): - cells.append( - deserialize_layer(cell_config, custom_objects=custom_objects)) - return cls(cells, **config) - - @property - def trainable_weights(self): - if not self.trainable: - return [] - weights = [] - for cell in self.cells: - if isinstance(cell, Layer): - weights += cell.trainable_weights - return weights - - @property - def non_trainable_weights(self): - weights = [] - for cell in self.cells: - if isinstance(cell, Layer): - weights += cell.non_trainable_weights - if not self.trainable: - trainable_weights = [] - for cell in self.cells: - if isinstance(cell, Layer): - trainable_weights += cell.trainable_weights - return trainable_weights + weights - return weights - - def get_weights(self): - """Retrieves the weights of the model. - - Returns: - A flat list of Numpy arrays. - """ - weights = [] - for cell in self.cells: - if isinstance(cell, Layer): - weights += cell.weights - return K.batch_get_value(weights) - - def set_weights(self, weights): - """Sets the weights of the model. - - Arguments: - weights: A list of Numpy arrays with shapes and types matching - the output of `model.get_weights()`. - """ - tuples = [] - for cell in self.cells: - if isinstance(cell, Layer): - num_param = len(cell.weights) - weights = weights[:num_param] - for sw, w in zip(cell.weights, weights): - tuples.append((sw, w)) - weights = weights[num_param:] - K.batch_set_value(tuples) - - @property - def losses(self): - losses = [] - for cell in self.cells: - if isinstance(cell, Layer): - cell_losses = cell.losses - losses += cell_losses - return losses - - def get_losses_for(self, inputs=None): - losses = [] - for cell in self.cells: - if isinstance(cell, Layer): - cell_losses = cell.get_losses_for(inputs) - losses += cell_losses - return losses - - -class RNN(Layer): - """Base class for recurrent layers. + ```python + # as the first layer in a Sequential model + model = Sequential() + model.add(LSTM(32, input_shape=(10, 64))) + # now model.output_shape == (None, 32) + # note: `None` is the batch dimension. + + # for subsequent layers, no need to specify the input size: + model.add(LSTM(16)) + + # to stack recurrent layers, you must use return_sequences=True + # on any recurrent layer that feeds into another recurrent layer. + # note that you only need to specify the input size on the first layer. + model = Sequential() + model.add(LSTM(64, input_dim=64, input_length=10, return_sequences=True)) + model.add(LSTM(32, return_sequences=True)) + model.add(LSTM(10)) + ``` Arguments: - cell: A RNN cell instance. A RNN cell is a class that has: - - a `call(input_at_t, states_at_t)` method, returning - `(output_at_t, states_at_t_plus_1)`. The call method of the - cell can also take the optional argument `constants`, see - section "Note on passing external constants" below. - - a `state_size` attribute. This can be a single integer - (single state) in which case it is - the size of the recurrent state - (which should be the same as the size of the cell output). - This can also be a list/tuple of integers - (one size per state). In this case, the first entry - (`state_size[0]`) should be the same as - the size of the cell output. - It is also possible for `cell` to be a list of RNN cell instances, - in which cases the cells get stacked on after the other in the RNN, - implementing an efficient stacked RNN. - return_sequences: Boolean. Whether to return the last output. + weights: list of Numpy arrays to set as initial weights. + The list should have 3 elements, of shapes: + `[(input_dim, output_dim), (output_dim, output_dim), (output_dim,)]`. + return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence. return_state: Boolean. Whether to return the last state in addition to the output. @@ -247,9 +137,21 @@ class RNN(Layer): Unrolling can speed-up a RNN, although it tends to be more memory-intensive. Unrolling is only suitable for short sequences. + implementation: one of {0, 1, or 2}. + If set to 0, the RNN will use + an implementation that uses fewer, larger matrix products, + thus running faster on CPU but consuming more memory. + If set to 1, the RNN will use more matrix products, + but smaller ones, thus running slower + (may actually be faster on GPU) while consuming less memory. + If set to 2 (LSTM/GRU only), + the RNN will combine the input gate, + the forget gate and the output gate into a single matrix, + enabling more time-efficient parallelization on the GPU. + Note: RNN dropout must be shared for all gates, + resulting in a slightly reduced regularization. input_dim: dimensionality of the input (integer). - This argument (or alternatively, - the keyword argument `input_shape`) + This argument (or alternatively, the keyword argument `input_shape`) is required when using this layer as the first layer in a model. input_length: Length of input sequences, to be specified when it is constant. @@ -261,7 +163,7 @@ class RNN(Layer): at the level of the first layer (e.g. via the `input_shape` argument) - Input shape: + Input shape:s 3D tensor with shape `(batch_size, timesteps, input_dim)`, (Optional) 2D tensors with shape `(batch_size, output_dim)`. @@ -276,7 +178,7 @@ class RNN(Layer): # Masking This layer supports masking for input data with a variable number of timesteps. To introduce masks to your data, - use an [Embedding](embeddings.md) layer with the `mask_zero` parameter + use an `Embedding` layer with the `mask_zero` parameter set to `True`. # Note on using statefulness in RNNs @@ -310,128 +212,42 @@ class RNN(Layer): calling `reset_states` with the keyword argument `states`. The value of `states` should be a numpy array or list of numpy arrays representing the initial state of the RNN layer. - - # Note on passing external constants to RNNs - You can pass "external" constants to the cell using the `constants` - keyword argument of `RNN.__call__` (as well as `RNN.call`) method. This - requires that the `cell.call` method accepts the same keyword argument - `constants`. Such constants can be used to condition the cell - transformation on additional static inputs (not changing over time), - a.k.a. an attention mechanism. - - Examples: - - ```python - # First, let's define a RNN Cell, as a layer subclass. - - class MinimalRNNCell(keras.layers.Layer): - - def __init__(self, units, **kwargs): - self.units = units - self.state_size = units - super(MinimalRNNCell, self).__init__(**kwargs) - - def build(self, input_shape): - self.kernel = self.add_weight(shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.built = True - - def call(self, inputs, states): - prev_output = states[0] - h = K.dot(inputs, self.kernel) - output = h + K.dot(prev_output, self.recurrent_kernel) - return output, [output] - - # Let's use this cell in a RNN layer: - - cell = MinimalRNNCell(32) - x = keras.Input((None, 5)) - layer = RNN(cell) - y = layer(x) - - # Here's how to use the cell to build a stacked RNN: - - cells = [MinimalRNNCell(32), MinimalRNNCell(64)] - x = keras.Input((None, 5)) - layer = RNN(cells) - y = layer(x) - ``` """ def __init__(self, - cell, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False, - activity_regularizer=None, + implementation=0, **kwargs): - if isinstance(cell, (list, tuple)): - cell = StackedRNNCells(cell) - if not hasattr(cell, 'call'): - raise ValueError('`cell` should have a `call` method. ' - 'The RNN was passed:', cell) - if not hasattr(cell, 'state_size'): - raise ValueError('The RNN cell should have ' - 'an attribute `state_size` ' - '(tuple of integers, ' - 'one integer per RNN state).') - super(RNN, self).__init__( - activity_regularizer=regularizers.get(activity_regularizer), **kwargs) - self.cell = cell + super(Recurrent, self).__init__(**kwargs) self.return_sequences = return_sequences self.return_state = return_state self.go_backwards = go_backwards self.stateful = stateful self.unroll = unroll - + self.implementation = implementation self.supports_masking = True self.input_spec = [InputSpec(ndim=3)] self.state_spec = None - self._states = None - self.constants_spec = None - self._num_constants = None - - @property - def states(self): - if self._states is None: - if isinstance(self.cell.state_size, int): - num_states = 1 - else: - num_states = len(self.cell.state_size) - return [None for _ in range(num_states)] - return self._states - - @states.setter - def states(self, states): - self._states = states + self.dropout = 0 + self.recurrent_dropout = 0 def _compute_output_shape(self, input_shape): if isinstance(input_shape, list): input_shape = input_shape[0] input_shape = tensor_shape.TensorShape(input_shape).as_list() - - if hasattr(self.cell.state_size, '__len__'): - output_dim = self.cell.state_size[0] - else: - output_dim = self.cell.state_size - if self.return_sequences: - output_shape = (input_shape[0], input_shape[1], output_dim) + output_shape = (input_shape[0], input_shape[1], self.units) else: - output_shape = (input_shape[0], output_dim) + output_shape = (input_shape[0], self.units) if self.return_state: - state_shape = [(input_shape[0], output_dim) for _ in self.states] - output_shape = [output_shape] + state_shape - else: - output_shape = output_shape + state_shape = [tensor_shape.TensorShape( + (input_shape[0], self.units)) for _ in self.states] + return [tensor_shape.TensorShape(output_shape)] + state_shape return tensor_shape.TensorShape(output_shape) def compute_mask(self, inputs, mask): @@ -441,123 +257,82 @@ class RNN(Layer): if self.return_state: state_mask = [None for _ in self.states] return [output_mask] + state_mask - else: - return output_mask - - def build(self, input_shape): - # Note input_shape will be list of shapes of initial states and - # constants if these are passed in __call__. - if self._num_constants is not None: - constants_shape = input_shape[-self._num_constants:] # pylint: disable=invalid-unary-operand-type - else: - constants_shape = None - - if isinstance(input_shape, list): - input_shape = input_shape[0] - input_shape = tuple(tensor_shape.TensorShape(input_shape).as_list()) + return output_mask - batch_size = input_shape[0] if self.stateful else None - input_dim = input_shape[-1] - self.input_spec[0] = InputSpec(shape=(batch_size, None, input_dim)) - - # allow cell (if layer) to build before we set or validate state_spec - if isinstance(self.cell, Layer): - step_input_shape = (input_shape[0],) + input_shape[2:] - if constants_shape is not None: - self.cell.build([step_input_shape] + constants_shape) - else: - self.cell.build(step_input_shape) + def step(self, inputs, states): + raise NotImplementedError - # set or validate state_spec - if hasattr(self.cell.state_size, '__len__'): - state_size = list(self.cell.state_size) - else: - state_size = [self.cell.state_size] - - if self.state_spec is not None: - # initial_state was passed in call, check compatibility - if [spec.shape[-1] for spec in self.state_spec] != state_size: - raise ValueError( - 'An initial_state was passed that is not compatible with ' - '`cell.state_size`. Received `state_spec`={}; ' - 'However `cell.state_size` is ' - '{}'.format(self.state_spec, self.cell.state_size)) - else: - self.state_spec = [InputSpec(shape=(None, dim)) for dim in state_size] - if self.stateful: - self.reset_states() + def get_constants(self, inputs, training=None): + return [] def get_initial_state(self, inputs): # build an all-zero tensor of shape (samples, output_dim) initial_state = K.zeros_like(inputs) # (samples, timesteps, input_dim) initial_state = K.sum(initial_state, axis=(1, 2)) # (samples,) initial_state = K.expand_dims(initial_state) # (samples, 1) - if hasattr(self.cell.state_size, '__len__'): - return [K.tile(initial_state, [1, dim]) for dim in self.cell.state_size] - else: - return [K.tile(initial_state, [1, self.cell.state_size])] + initial_state = K.tile(initial_state, [1, + self.units]) # (samples, output_dim) + initial_state = [initial_state for _ in range(len(self.states))] + return initial_state + + def preprocess_input(self, inputs, training=None): + return inputs - def __call__(self, inputs, initial_state=None, constants=None, **kwargs): - inputs, initial_state, constants = self._standardize_args( - inputs, initial_state, constants) + def __call__(self, inputs, initial_state=None, **kwargs): + if (isinstance(inputs, (list, tuple)) and + len(inputs) > 1 + and initial_state is None): + initial_state = inputs[1:] + inputs = inputs[0] - if initial_state is None and constants is None: - return super(RNN, self).__call__(inputs, **kwargs) + # If `initial_state` is specified, + # and if it a Keras tensor, + # then add it to the inputs and temporarily + # modify the input spec to include the state. + if initial_state is None: + return super(Recurrent, self).__call__(inputs, **kwargs) - # If any of `initial_state` or `constants` are specified and are Keras - # tensors, then add them to the inputs and temporarily modify the - # input_spec to include them. + if not isinstance(initial_state, (list, tuple)): + initial_state = [initial_state] - additional_inputs = [] - additional_specs = [] - if initial_state is not None: - kwargs['initial_state'] = initial_state - additional_inputs += initial_state - self.state_spec = [ - InputSpec(shape=K.int_shape(state)) for state in initial_state - ] - additional_specs += self.state_spec - if constants is not None: - kwargs['constants'] = constants - additional_inputs += constants - self.constants_spec = [ - InputSpec(shape=K.int_shape(constant)) for constant in constants - ] - self._num_constants = len(constants) - additional_specs += self.constants_spec - # at this point additional_inputs cannot be empty - is_keras_tensor = hasattr(additional_inputs[0], '_keras_history') - for tensor in additional_inputs: + is_keras_tensor = hasattr(initial_state[0], '_keras_history') + for tensor in initial_state: if hasattr(tensor, '_keras_history') != is_keras_tensor: - raise ValueError('The initial state or constants of an RNN' - ' layer cannot be specified with a mix of' - ' Keras tensors and non-Keras tensors') + raise ValueError('The initial state of an RNN layer cannot be' + ' specified with a mix of Keras tensors and' + ' non-Keras tensors') if is_keras_tensor: - # Compute the full input spec, including state and constants - full_input = [inputs] + additional_inputs - full_input_spec = self.input_spec + additional_specs - # Perform the call with temporarily replaced input_spec - original_input_spec = self.input_spec - self.input_spec = full_input_spec - output = super(RNN, self).__call__(full_input, **kwargs) - self.input_spec = original_input_spec + # Compute the full input spec, including state + input_spec = self.input_spec + state_spec = self.state_spec + if not isinstance(input_spec, list): + input_spec = [input_spec] + if not isinstance(state_spec, list): + state_spec = [state_spec] + self.input_spec = input_spec + state_spec + + # Compute the full inputs, including state + inputs = [inputs] + list(initial_state) + + # Perform the call + output = super(Recurrent, self).__call__(inputs, **kwargs) + + # Restore original input spec + self.input_spec = input_spec return output else: - return super(RNN, self).__call__(inputs, **kwargs) - - def call(self, - inputs, - mask=None, - training=None, - initial_state=None, - constants=None): + kwargs['initial_state'] = initial_state + return super(Recurrent, self).__call__(inputs, **kwargs) + + def call(self, inputs, mask=None, training=None, initial_state=None): # input shape: `(samples, time (padded with zeros), input_dim)` # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if isinstance(inputs, list): + initial_state = inputs[1:] inputs = inputs[0] - if initial_state is not None: + elif initial_state is not None: pass elif self.stateful: initial_state = self.states @@ -568,14 +343,13 @@ class RNN(Layer): mask = mask[0] if len(initial_state) != len(self.states): - raise ValueError( - 'Layer has ' + str(len(self.states)) + ' states but was passed ' + - str(len(initial_state)) + ' initial states.') + raise ValueError('Layer has ' + str(len(self.states)) + + ' states but was passed ' + str(len(initial_state)) + + ' initial states.') input_shape = K.int_shape(inputs) - timesteps = input_shape[1] - if self.unroll and timesteps in [None, 1]: + if self.unroll and input_shape[1] is None: raise ValueError('Cannot unroll a RNN if the ' - 'time dimension is undefined or equal to 1. \n' + 'time dimension is undefined. \n' '- If using a Sequential model, ' 'specify the time dimension by passing ' 'an `input_shape` or `batch_input_shape` ' @@ -585,31 +359,15 @@ class RNN(Layer): '- If using the functional API, specify ' 'the time dimension by passing a `shape` ' 'or `batch_shape` argument to your Input layer.') - - kwargs = {} - if has_arg(self.cell.call, 'training'): - kwargs['training'] = training - - if constants: - if not has_arg(self.cell.call, 'constants'): - raise ValueError('RNN cell does not support constants') - - def step(inputs, states): - constants = states[-self._num_constants:] # pylint: disable=invalid-unary-operand-type - states = states[:-self._num_constants] # pylint: disable=invalid-unary-operand-type - return self.cell.call(inputs, states, constants=constants, **kwargs) - else: - - def step(inputs, states): - return self.cell.call(inputs, states, **kwargs) - + constants = self.get_constants(inputs, training=None) + preprocessed_input = self.preprocess_input(inputs, training=None) last_output, outputs, states = K.rnn( - step, - inputs, + self.step, + preprocessed_input, initial_state, - constants=constants, go_backwards=self.go_backwards, mask=mask, + constants=constants, unroll=self.unroll) if self.stateful: updates = [] @@ -617,63 +375,21 @@ class RNN(Layer): updates.append((self.states[i], states[i])) self.add_update(updates, inputs) - if self.return_sequences: - output = outputs - else: - output = last_output - # Properly set learning phase - if getattr(last_output, '_uses_learning_phase', False): - output._uses_learning_phase = True + if 0 < self.dropout + self.recurrent_dropout: + last_output._uses_learning_phase = True + outputs._uses_learning_phase = True + + if not self.return_sequences: + outputs = last_output if self.return_state: if not isinstance(states, (list, tuple)): states = [states] else: states = list(states) - return [output] + states - else: - return output - - def _standardize_args(self, inputs, initial_state, constants): - """Standardize `__call__` arguments to a single list of tensor inputs. - - When running a model loaded from file, the input tensors - `initial_state` and `constants` can be passed to `RNN.__call__` as part - of `inputs` instead of by the dedicated keyword arguments. This method - makes sure the arguments are separated and that `initial_state` and - `constants` are lists of tensors (or None). - - Arguments: - inputs: tensor or list/tuple of tensors - initial_state: tensor or list of tensors or None - constants: tensor or list of tensors or None - - Returns: - inputs: tensor - initial_state: list of tensors or None - constants: list of tensors or None - """ - if isinstance(inputs, list): - assert initial_state is None and constants is None - if self._num_constants is not None: - constants = inputs[-self._num_constants:] # pylint: disable=invalid-unary-operand-type - inputs = inputs[:-self._num_constants] # pylint: disable=invalid-unary-operand-type - if len(inputs) > 1: - initial_state = inputs[1:] - inputs = inputs[0] - - def to_list_or_none(x): - if x is None or isinstance(x, list): - return x - if isinstance(x, tuple): - return list(x) - return [x] - - initial_state = to_list_or_none(initial_state) - constants = to_list_or_none(constants) - - return inputs, initial_state, constants + return [outputs] + states + return outputs def reset_states(self, states=None): if not self.stateful: @@ -692,19 +408,10 @@ class RNN(Layer): '`batch_shape` argument to your Input layer.') # initialize state if None if self.states[0] is None: - if hasattr(self.cell.state_size, '__len__'): - self.states = [ - K.zeros((batch_size, dim)) for dim in self.cell.state_size - ] - else: - self.states = [K.zeros((batch_size, self.cell.state_size))] + self.states = [K.zeros((batch_size, self.units)) for _ in self.states] elif states is None: - if hasattr(self.cell.state_size, '__len__'): - for state, dim in zip(self.states, self.cell.state_size): - K.set_value(state, np.zeros((batch_size, dim))) - else: - K.set_value(self.states[0], np.zeros((batch_size, - self.cell.state_size))) + for state in self.states: + K.set_value(state, np.zeros((batch_size, self.units))) else: if not isinstance(states, (list, tuple)): states = [states] @@ -714,16 +421,11 @@ class RNN(Layer): 'but it received ' + str(len(states)) + ' state values. Input received: ' + str(states)) for index, (value, state) in enumerate(zip(states, self.states)): - if hasattr(self.cell.state_size, '__len__'): - dim = self.cell.state_size[index] - else: - dim = self.cell.state_size - if value.shape != (batch_size, dim): - raise ValueError( - 'State ' + str(index) + ' is incompatible with layer ' + - self.name + ': expected shape=' + str( - (batch_size, dim)) + ', found shape=' + str(value.shape)) - # TODO(fchollet): consider batch calls to `set_value`. + if value.shape != (batch_size, self.units): + raise ValueError('State ' + str(index) + + ' is incompatible with layer ' + self.name + + ': expected shape=' + str((batch_size, self.units)) + + ', found shape=' + str(value.shape)) K.set_value(state, value) def get_config(self): @@ -732,94 +434,51 @@ class RNN(Layer): 'return_state': self.return_state, 'go_backwards': self.go_backwards, 'stateful': self.stateful, - 'unroll': self.unroll - } - if self._num_constants is not None: - config['num_constants'] = self._num_constants - - cell_config = self.cell.get_config() - config['cell'] = { - 'class_name': self.cell.__class__.__name__, - 'config': cell_config + 'unroll': self.unroll, + 'implementation': self.implementation } - base_config = super(RNN, self).get_config() + base_config = super(Recurrent, self).get_config() return dict(list(base_config.items()) + list(config.items())) - @classmethod - def from_config(cls, config, custom_objects=None): - from tensorflow.python.keras._impl.keras.layers import deserialize as deserialize_layer # pylint: disable=g-import-not-at-top - cell = deserialize_layer(config.pop('cell'), custom_objects=custom_objects) - num_constants = config.pop('num_constants', None) - layer = cls(cell, **config) - layer._num_constants = num_constants - return layer - - @property - def trainable_weights(self): - if isinstance(self.cell, Layer): - return self.cell.trainable_weights - return [] - - @property - def non_trainable_weights(self): - if isinstance(self.cell, Layer): - return self.cell.non_trainable_weights - return [] - - @property - def losses(self): - if isinstance(self.cell, Layer): - return self.cell.losses - return [] - - def get_losses_for(self, inputs=None): - if isinstance(self.cell, Layer): - cell_losses = self.cell.get_losses_for(inputs) - return cell_losses + super(RNN, self).get_losses_for(inputs) - return super(RNN, self).get_losses_for(inputs) - -class SimpleRNNCell(Layer): - """Cell class for SimpleRNN. +class SimpleRNN(Recurrent): + """Fully-connected RNN where the output is to be fed back to input. Arguments: units: Positive integer, dimensionality of the output space. - activation: Activation function to use - (see [activations](../activations.md)). + activation: Activation function to use. + If you don't specify anything, no activation is applied If you pass None, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. - (see [initializers](../initializers.md)). + used for the linear transformation of the inputs.. recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, - used for the linear transformation of the recurrent state. - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). + used for the linear transformation of the recurrent state.. + bias_initializer: Initializer for the bias vector. kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). + the `kernel` weights matrix. recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). + the `recurrent_kernel` weights matrix. + bias_regularizer: Regularizer function applied to the bias vector. + activity_regularizer: Regularizer function applied to + the output of the layer (its "activation").. kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). + the `kernel` weights matrix. recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). + the `recurrent_kernel` weights matrix. + bias_constraint: Constraint function applied to the bias vector. dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. + + References: + - [A Theoretically Grounded Application of Dropout in Recurrent Neural + Networks](http://arxiv.org/abs/1512.05287) """ def __init__(self, @@ -832,13 +491,15 @@ class SimpleRNNCell(Layer): kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, + activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., **kwargs): - super(SimpleRNNCell, self).__init__(**kwargs) + super(SimpleRNN, self).__init__( + activity_regularizer=regularizers.get(activity_regularizer), **kwargs) self.units = units self.activation = activations.get(activation) self.use_bias = use_bias @@ -857,13 +518,23 @@ class SimpleRNNCell(Layer): self.dropout = min(1., max(0., dropout)) self.recurrent_dropout = min(1., max(0., recurrent_dropout)) - self.state_size = self.units - self._dropout_mask = None - self._recurrent_dropout_mask = None + self.state_spec = InputSpec(shape=(None, self.units)) def build(self, input_shape): + if isinstance(input_shape, list): + input_shape = input_shape[0] + input_shape = tensor_shape.TensorShape(input_shape).as_list() + + batch_size = input_shape[0] if self.stateful else None + self.input_dim = input_shape[2] + self.input_spec[0] = InputSpec(shape=(batch_size, None, self.input_dim)) + + self.states = [None] + if self.stateful: + self.reset_states() + self.kernel = self.add_weight( - shape=(input_shape[-1], self.units), + shape=(self.input_dim, self.units), name='kernel', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, @@ -885,327 +556,146 @@ class SimpleRNNCell(Layer): self.bias = None self.built = True - def _generate_dropout_mask(self, inputs, training=None): - if 0 < self.dropout < 1: - ones = K.ones_like(K.squeeze(inputs[:, 0:1, :], axis=1)) - - def dropped_inputs(): - return K.dropout(ones, self.dropout) + def preprocess_input(self, inputs, training=None): + if self.implementation > 0: + return inputs + else: + input_shape = inputs.get_shape().as_list() + input_dim = input_shape[2] + timesteps = input_shape[1] + return _time_distributed_dense( + inputs, + self.kernel, + self.bias, + self.dropout, + input_dim, + self.units, + timesteps, + training=training) - self._dropout_mask = K.in_train_phase( - dropped_inputs, ones, training=training) + def step(self, inputs, states): + if self.implementation == 0: + h = inputs else: - self._dropout_mask = None + if 0 < self.dropout < 1: + h = K.dot(inputs * states[1], self.kernel) + else: + h = K.dot(inputs, self.kernel) + if self.bias is not None: + h = K.bias_add(h, self.bias) - def _generate_recurrent_dropout_mask(self, inputs, training=None): + prev_output = states[0] if 0 < self.recurrent_dropout < 1: + prev_output *= states[2] + output = h + K.dot(prev_output, self.recurrent_kernel) + if self.activation is not None: + output = self.activation(output) + + # Properly set learning phase on output tensor. + if 0 < self.dropout + self.recurrent_dropout: + output._uses_learning_phase = True + return output, [output] + + def get_constants(self, inputs, training=None): + constants = [] + if self.implementation != 0 and 0 < self.dropout < 1: + input_shape = K.int_shape(inputs) + input_dim = input_shape[-1] ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) - ones = K.tile(ones, (1, self.units)) + ones = K.tile(ones, (1, int(input_dim))) def dropped_inputs(): return K.dropout(ones, self.dropout) - self._recurrent_dropout_mask = K.in_train_phase( - dropped_inputs, ones, training=training) + dp_mask = K.in_train_phase(dropped_inputs, ones, training=training) + constants.append(dp_mask) else: - self._recurrent_dropout_mask = None + constants.append(K.cast_to_floatx(1.)) - def call(self, inputs, states, training=None): - prev_output = states[0] - dp_mask = self._dropout_mask - rec_dp_mask = self._recurrent_dropout_mask + if 0 < self.recurrent_dropout < 1: + ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) + ones = K.tile(ones, (1, self.units)) + + def dropped_inputs(): # pylint: disable=function-redefined + return K.dropout(ones, self.recurrent_dropout) - if dp_mask is not None: - h = K.dot(inputs * dp_mask, self.kernel) + rec_dp_mask = K.in_train_phase(dropped_inputs, ones, training=training) + constants.append(rec_dp_mask) else: - h = K.dot(inputs, self.kernel) - if self.bias is not None: - h = K.bias_add(h, self.bias) + constants.append(K.cast_to_floatx(1.)) + return constants - if rec_dp_mask is not None: - prev_output *= rec_dp_mask - output = h + K.dot(prev_output, self.recurrent_kernel) - if self.activation is not None: - output = self.activation(output) + def get_config(self): + config = { + 'units': self.units, + 'activation': activations.serialize(self.activation), + 'use_bias': self.use_bias, + 'kernel_initializer': initializers.serialize(self.kernel_initializer), + 'recurrent_initializer': + initializers.serialize(self.recurrent_initializer), + 'bias_initializer': initializers.serialize(self.bias_initializer), + 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), + 'recurrent_regularizer': + regularizers.serialize(self.recurrent_regularizer), + 'bias_regularizer': regularizers.serialize(self.bias_regularizer), + 'activity_regularizer': + regularizers.serialize(self.activity_regularizer), + 'kernel_constraint': constraints.serialize(self.kernel_constraint), + 'recurrent_constraint': + constraints.serialize(self.recurrent_constraint), + 'bias_constraint': constraints.serialize(self.bias_constraint), + 'dropout': self.dropout, + 'recurrent_dropout': self.recurrent_dropout + } + base_config = super(SimpleRNN, self).get_config() + return dict(list(base_config.items()) + list(config.items())) - # Properly set learning phase on output tensor. - if 0 < self.dropout + self.recurrent_dropout: - if training is None: - output._uses_learning_phase = True - return output, [output] +class GRU(Recurrent): + """Gated Recurrent Unit - Cho et al. -class SimpleRNN(RNN): - """Fully-connected RNN where the output is to be fed back to input. + 2014. Arguments: units: Positive integer, dimensionality of the output space. - activation: Activation function to use - (see [activations](../activations.md)). + activation: Activation function to use. If you pass None, no activation is applied (ie. "linear" activation: `a(x) = x`). + recurrent_activation: Activation function to use + for the recurrent step. use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. - (see [initializers](../initializers.md)). + used for the linear transformation of the inputs.. recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, - used for the linear transformation of the recurrent state. - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). + used for the linear transformation of the recurrent state.. + bias_initializer: Initializer for the bias vector. kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). + the `kernel` weights matrix. recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). + the `recurrent_kernel` weights matrix. + bias_regularizer: Regularizer function applied to the bias vector. activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). + the output of the layer (its "activation").. kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). + the `kernel` weights matrix. recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). + the `recurrent_kernel` weights matrix. + bias_constraint: Constraint function applied to the bias vector. dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. - return_sequences: Boolean. Whether to return the last output. - in the output sequence, or the full sequence. - return_state: Boolean. Whether to return the last state - in addition to the output. - go_backwards: Boolean (default False). - If True, process the input sequence backwards and return the - reversed sequence. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - unroll: Boolean (default False). - If True, the network will be unrolled, - else a symbolic loop will be used. - Unrolling can speed-up a RNN, - although it tends to be more memory-intensive. - Unrolling is only suitable for short sequences. - """ - - def __init__(self, - units, - activation='tanh', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - unroll=False, - **kwargs): - if 'implementation' in kwargs: - kwargs.pop('implementation') - logging.warning('The `implementation` argument ' - 'in `SimpleRNN` has been deprecated. ' - 'Please remove it from your layer call.') - cell = SimpleRNNCell( - units, - activation=activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - recurrent_initializer=recurrent_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, - kernel_constraint=kernel_constraint, - recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, - dropout=dropout, - recurrent_dropout=recurrent_dropout) - super(SimpleRNN, self).__init__( - cell, - return_sequences=return_sequences, - return_state=return_state, - go_backwards=go_backwards, - stateful=stateful, - unroll=unroll, - activity_regularizer=regularizers.get(activity_regularizer), - **kwargs) - # self.activity_regularizer = regularizers.get(activity_regularizer) - - def call(self, inputs, mask=None, training=None, initial_state=None): - self.cell._generate_dropout_mask(inputs, training=training) - self.cell._generate_recurrent_dropout_mask(inputs, training=training) - return super(SimpleRNN, self).call( - inputs, mask=mask, training=training, initial_state=initial_state) - - @property - def units(self): - return self.cell.units - - @property - def activation(self): - return self.cell.activation - - @property - def use_bias(self): - return self.cell.use_bias - - @property - def kernel_initializer(self): - return self.cell.kernel_initializer - - @property - def recurrent_initializer(self): - return self.cell.recurrent_initializer - - @property - def bias_initializer(self): - return self.cell.bias_initializer - - @property - def kernel_regularizer(self): - return self.cell.kernel_regularizer - - @property - def recurrent_regularizer(self): - return self.cell.recurrent_regularizer - - @property - def bias_regularizer(self): - return self.cell.bias_regularizer - - @property - def kernel_constraint(self): - return self.cell.kernel_constraint - - @property - def recurrent_constraint(self): - return self.cell.recurrent_constraint - - @property - def bias_constraint(self): - return self.cell.bias_constraint - - @property - def dropout(self): - return self.cell.dropout - - @property - def recurrent_dropout(self): - return self.cell.recurrent_dropout - - def get_config(self): - config = { - 'units': - self.units, - 'activation': - activations.serialize(self.activation), - 'use_bias': - self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': - constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint), - 'dropout': - self.dropout, - 'recurrent_dropout': - self.recurrent_dropout - } - base_config = super(SimpleRNN, self).get_config() - del base_config['cell'] - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config): - if 'implementation' in config: - config.pop('implementation') - return cls(**config) - -class GRUCell(Layer): - """Cell class for the GRU layer. - - Arguments: - units: Positive integer, dimensionality of the output space. - activation: Activation function to use - (see [activations](../activations.md)). - If you pass None, no activation is applied - (ie. "linear" activation: `a(x) = x`). - recurrent_activation: Activation function to use - for the recurrent step - (see [activations](../activations.md)). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. - (see [initializers](../initializers.md)). - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, - used for the linear transformation of the recurrent state. - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). - recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the inputs. - recurrent_dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the recurrent state. - implementation: Implementation mode, either 1 or 2. - Mode 1 will structure its operations as a larger number of - smaller dot products and additions, whereas mode 2 will - batch them into fewer, larger operations. These modes will - have different performance profiles on different hardware and - for different applications. + References: + - [On the Properties of Neural Machine Translation: Encoder-Decoder + Approaches](https://arxiv.org/abs/1409.1259) + - [Empirical Evaluation of Gated Recurrent Neural Networks on Sequence + Modeling](http://arxiv.org/abs/1412.3555v1) + - [A Theoretically Grounded Application of Dropout in Recurrent Neural + Networks](http://arxiv.org/abs/1512.05287) """ def __init__(self, @@ -1219,14 +709,15 @@ class GRUCell(Layer): kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, + activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., - implementation=1, **kwargs): - super(GRUCell, self).__init__(**kwargs) + super(GRU, self).__init__( + activity_regularizer=regularizers.get(activity_regularizer), **kwargs) self.units = units self.activation = activations.get(activation) self.recurrent_activation = activations.get(recurrent_activation) @@ -1246,15 +737,22 @@ class GRUCell(Layer): self.dropout = min(1., max(0., dropout)) self.recurrent_dropout = min(1., max(0., recurrent_dropout)) - self.implementation = implementation - self.state_size = self.units - self._dropout_mask = None - self._recurrent_dropout_mask = None + self.state_spec = InputSpec(shape=(None, self.units)) def build(self, input_shape): - input_dim = input_shape[-1] + if isinstance(input_shape, list): + input_shape = input_shape[0] + input_shape = tensor_shape.TensorShape(input_shape).as_list() + batch_size = input_shape[0] if self.stateful else None + self.input_dim = input_shape[2] + self.input_spec[0] = InputSpec(shape=(batch_size, None, self.input_dim)) + + self.states = [None] + if self.stateful: + self.reset_states() + self.kernel = self.add_weight( - shape=(input_dim, self.units * 3), + shape=(self.input_dim, self.units * 3), name='kernel', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, @@ -1294,83 +792,89 @@ class GRUCell(Layer): self.bias_h = None self.built = True - def _generate_dropout_mask(self, inputs, training=None): - if 0 < self.dropout < 1: - ones = K.ones_like(K.squeeze(inputs[:, 0:1, :], axis=1)) + def preprocess_input(self, inputs, training=None): + if self.implementation == 0: + input_shape = inputs.get_shape().as_list() + input_dim = input_shape[2] + timesteps = input_shape[1] + + x_z = _time_distributed_dense( + inputs, + self.kernel_z, + self.bias_z, + self.dropout, + input_dim, + self.units, + timesteps, + training=training) + x_r = _time_distributed_dense( + inputs, + self.kernel_r, + self.bias_r, + self.dropout, + input_dim, + self.units, + timesteps, + training=training) + x_h = _time_distributed_dense( + inputs, + self.kernel_h, + self.bias_h, + self.dropout, + input_dim, + self.units, + timesteps, + training=training) + return K.concatenate([x_z, x_r, x_h], axis=2) + else: + return inputs + + def get_constants(self, inputs, training=None): + constants = [] + if self.implementation != 0 and 0 < self.dropout < 1: + input_shape = K.int_shape(inputs) + input_dim = input_shape[-1] + ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) + ones = K.tile(ones, (1, int(input_dim))) def dropped_inputs(): return K.dropout(ones, self.dropout) - self._dropout_mask = [ + dp_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(3) ] + constants.append(dp_mask) else: - self._dropout_mask = None + constants.append([K.cast_to_floatx(1.) for _ in range(3)]) - def _generate_recurrent_dropout_mask(self, inputs, training=None): if 0 < self.recurrent_dropout < 1: ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, self.units)) - def dropped_inputs(): - return K.dropout(ones, self.dropout) + def dropped_inputs(): # pylint: disable=function-redefined + return K.dropout(ones, self.recurrent_dropout) - self._recurrent_dropout_mask = [ + rec_dp_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(3) ] + constants.append(rec_dp_mask) else: - self._recurrent_dropout_mask = None + constants.append([K.cast_to_floatx(1.) for _ in range(3)]) + return constants - def call(self, inputs, states, training=None): + def step(self, inputs, states): h_tm1 = states[0] # previous memory + dp_mask = states[1] # dropout matrices for recurrent units + rec_dp_mask = states[2] - # dropout matrices for input units - dp_mask = self._dropout_mask - # dropout matrices for recurrent units - rec_dp_mask = self._recurrent_dropout_mask - - if self.implementation == 1: - if 0. < self.dropout < 1.: - inputs_z = inputs * dp_mask[0] - inputs_r = inputs * dp_mask[1] - inputs_h = inputs * dp_mask[2] - else: - inputs_z = inputs - inputs_r = inputs - inputs_h = inputs - x_z = K.dot(inputs_z, self.kernel_z) - x_r = K.dot(inputs_r, self.kernel_r) - x_h = K.dot(inputs_h, self.kernel_h) - if self.use_bias: - x_z = K.bias_add(x_z, self.bias_z) - x_r = K.bias_add(x_r, self.bias_r) - x_h = K.bias_add(x_h, self.bias_h) - - if 0. < self.recurrent_dropout < 1.: - h_tm1_z = h_tm1 * rec_dp_mask[0] - h_tm1_r = h_tm1 * rec_dp_mask[1] - h_tm1_h = h_tm1 * rec_dp_mask[2] - else: - h_tm1_z = h_tm1 - h_tm1_r = h_tm1 - h_tm1_h = h_tm1 - z = self.recurrent_activation( - x_z + K.dot(h_tm1_z, self.recurrent_kernel_z)) - r = self.recurrent_activation( - x_r + K.dot(h_tm1_r, self.recurrent_kernel_r)) - - hh = self.activation(x_h + K.dot(r * h_tm1_h, self.recurrent_kernel_h)) - else: - if 0. < self.dropout < 1.: - inputs *= dp_mask[0] - matrix_x = K.dot(inputs, self.kernel) + if self.implementation == 2: + matrix_x = K.dot(inputs * dp_mask[0], self.kernel) if self.use_bias: matrix_x = K.bias_add(matrix_x, self.bias) - if 0. < self.recurrent_dropout < 1.: - h_tm1 *= rec_dp_mask[0] - matrix_inner = K.dot(h_tm1, self.recurrent_kernel[:, :2 * self.units]) + matrix_inner = K.dot(h_tm1 * rec_dp_mask[0], + self.recurrent_kernel[:, :2 * self.units]) x_z = matrix_x[:, :self.units] x_r = matrix_x[:, self.units:2 * self.units] @@ -1381,323 +885,116 @@ class GRUCell(Layer): r = self.recurrent_activation(x_r + recurrent_r) x_h = matrix_x[:, 2 * self.units:] - recurrent_h = K.dot(r * h_tm1, self.recurrent_kernel[:, 2 * self.units:]) + recurrent_h = K.dot(r * h_tm1 * rec_dp_mask[0], + self.recurrent_kernel[:, 2 * self.units:]) hh = self.activation(x_h + recurrent_h) + else: + if self.implementation == 0: + x_z = inputs[:, :self.units] + x_r = inputs[:, self.units:2 * self.units] + x_h = inputs[:, 2 * self.units:] + elif self.implementation == 1: + x_z = K.dot(inputs * dp_mask[0], self.kernel_z) + x_r = K.dot(inputs * dp_mask[1], self.kernel_r) + x_h = K.dot(inputs * dp_mask[2], self.kernel_h) + if self.use_bias: + x_z = K.bias_add(x_z, self.bias_z) + x_r = K.bias_add(x_r, self.bias_r) + x_h = K.bias_add(x_h, self.bias_h) + else: + raise ValueError('Unknown `implementation` mode.') + z = self.recurrent_activation(x_z + K.dot(h_tm1 * rec_dp_mask[0], + self.recurrent_kernel_z)) + r = self.recurrent_activation(x_r + K.dot(h_tm1 * rec_dp_mask[1], + self.recurrent_kernel_r)) + + hh = self.activation(x_h + K.dot(r * h_tm1 * rec_dp_mask[2], + self.recurrent_kernel_h)) h = z * h_tm1 + (1 - z) * hh if 0 < self.dropout + self.recurrent_dropout: - if training is None: - h._uses_learning_phase = True + h._uses_learning_phase = True return h, [h] - -class GRU(RNN): - # pylint: disable=line-too-long - """Gated Recurrent Unit - Cho et al. - - 2014. - - Arguments: - units: Positive integer, dimensionality of the output space. - activation: Activation function to use - (see [activations](../activations.md)). - If you pass None, no activation is applied - (ie. "linear" activation: `a(x) = x`). - recurrent_activation: Activation function to use - for the recurrent step - (see [activations](../activations.md)). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. - (see [initializers](../initializers.md)). - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, - used for the linear transformation of the recurrent state. - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). - recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the inputs. - recurrent_dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the recurrent state. - implementation: Implementation mode, either 1 or 2. - Mode 1 will structure its operations as a larger number of - smaller dot products and additions, whereas mode 2 will - batch them into fewer, larger operations. These modes will - have different performance profiles on different hardware and - for different applications. - return_sequences: Boolean. Whether to return the last output. - in the output sequence, or the full sequence. - return_state: Boolean. Whether to return the last state - in addition to the output. - go_backwards: Boolean (default False). - If True, process the input sequence backwards and return the - reversed sequence. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - unroll: Boolean (default False). - If True, the network will be unrolled, - else a symbolic loop will be used. - Unrolling can speed-up a RNN, - although it tends to be more memory-intensive. - Unrolling is only suitable for short sequences. - - References: - - [On the Properties of Neural Machine Translation: Encoder-Decoder Approaches](https://arxiv.org/abs/1409.1259) - - [Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling](http://arxiv.org/abs/1412.3555v1) - - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287) - """ - # pylint: enable=line-too-long - - def __init__(self, - units, - activation='tanh', - recurrent_activation='hard_sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - implementation=1, - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - unroll=False, - **kwargs): - if implementation == 0: - logging.warning('`implementation=0` has been deprecated, ' - 'and now defaults to `implementation=1`.' - 'Please update your layer call.') - cell = GRUCell( - units, - activation=activation, - recurrent_activation=recurrent_activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - recurrent_initializer=recurrent_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, - kernel_constraint=kernel_constraint, - recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, - dropout=dropout, - recurrent_dropout=recurrent_dropout, - implementation=implementation) - super(GRU, self).__init__( - cell, - return_sequences=return_sequences, - return_state=return_state, - go_backwards=go_backwards, - stateful=stateful, - unroll=unroll, - **kwargs) - self.activity_regularizer = regularizers.get(activity_regularizer) - - def call(self, inputs, mask=None, training=None, initial_state=None): - self.cell._generate_dropout_mask(inputs, training=training) - self.cell._generate_recurrent_dropout_mask(inputs, training=training) - return super(GRU, self).call( - inputs, mask=mask, training=training, initial_state=initial_state) - - @property - def units(self): - return self.cell.units - - @property - def activation(self): - return self.cell.activation - - @property - def recurrent_activation(self): - return self.cell.recurrent_activation - - @property - def use_bias(self): - return self.cell.use_bias - - @property - def kernel_initializer(self): - return self.cell.kernel_initializer - - @property - def recurrent_initializer(self): - return self.cell.recurrent_initializer - - @property - def bias_initializer(self): - return self.cell.bias_initializer - - @property - def kernel_regularizer(self): - return self.cell.kernel_regularizer - - @property - def recurrent_regularizer(self): - return self.cell.recurrent_regularizer - - @property - def bias_regularizer(self): - return self.cell.bias_regularizer - - @property - def kernel_constraint(self): - return self.cell.kernel_constraint - - @property - def recurrent_constraint(self): - return self.cell.recurrent_constraint - - @property - def bias_constraint(self): - return self.cell.bias_constraint - - @property - def dropout(self): - return self.cell.dropout - - @property - def recurrent_dropout(self): - return self.cell.recurrent_dropout - - @property - def implementation(self): - return self.cell.implementation - def get_config(self): config = { - 'units': - self.units, - 'activation': - activations.serialize(self.activation), + 'units': self.units, + 'activation': activations.serialize(self.activation), 'recurrent_activation': activations.serialize(self.recurrent_activation), - 'use_bias': - self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), + 'use_bias': self.use_bias, + 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), + 'bias_initializer': initializers.serialize(self.bias_initializer), + 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), + 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': - constraints.serialize(self.kernel_constraint), + 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint), - 'dropout': - self.dropout, - 'recurrent_dropout': - self.recurrent_dropout, - 'implementation': - self.implementation + 'bias_constraint': constraints.serialize(self.bias_constraint), + 'dropout': self.dropout, + 'recurrent_dropout': self.recurrent_dropout } base_config = super(GRU, self).get_config() - del base_config['cell'] return dict(list(base_config.items()) + list(config.items())) - @classmethod - def from_config(cls, config): - if 'implementation' in config and config['implementation'] == 0: - config['implementation'] = 1 - return cls(**config) +class LSTM(Recurrent): + """Long-Short Term Memory unit - Hochreiter 1997. -class LSTMCell(Layer): - """Cell class for the LSTM layer. + For a step-by-step description of the algorithm, see + [this tutorial](http://deeplearning.net/tutorial/lstm.html). Arguments: units: Positive integer, dimensionality of the output space. - activation: Activation function to use - (see [activations](../activations.md)). + activation: Activation function to use. If you pass None, no activation is applied (ie. "linear" activation: `a(x) = x`). recurrent_activation: Activation function to use - for the recurrent step - (see [activations](../activations.md)). + for the recurrent step. use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. - (see [initializers](../initializers.md)). + used for the linear transformation of the inputs.. recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, - used for the linear transformation of the recurrent state. - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). + used for the linear transformation of the recurrent state.. + bias_initializer: Initializer for the bias vector. unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at initialization. Setting it to true will also force `bias_initializer="zeros"`. This is recommended in [Jozefowicz et al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). + the `kernel` weights matrix. recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). + the `recurrent_kernel` weights matrix. + bias_regularizer: Regularizer function applied to the bias vector. + activity_regularizer: Regularizer function applied to + the output of the layer (its "activation").. kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). + the `kernel` weights matrix. recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). + the `recurrent_kernel` weights matrix. + bias_constraint: Constraint function applied to the bias vector. dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. - implementation: Implementation mode, either 1 or 2. - Mode 1 will structure its operations as a larger number of - smaller dot products and additions, whereas mode 2 will - batch them into fewer, larger operations. These modes will - have different performance profiles on different hardware and - for different applications. + + References: + - [Long short-term + memory]((http://www.bioinf.jku.at/publications/older/2604.pdf) + (original 1997 paper) + - [Supervised sequence labeling with recurrent neural + networks](http://www.cs.toronto.edu/~graves/preprint.pdf) + - [A Theoretically Grounded Application of Dropout in Recurrent Neural + Networks](http://arxiv.org/abs/1512.05287) """ def __init__(self, @@ -1712,14 +1009,15 @@ class LSTMCell(Layer): kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, + activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., - implementation=1, **kwargs): - super(LSTMCell, self).__init__(**kwargs) + super(LSTM, self).__init__( + activity_regularizer=regularizers.get(activity_regularizer), **kwargs) self.units = units self.activation = activations.get(activation) self.recurrent_activation = activations.get(recurrent_activation) @@ -1740,15 +1038,25 @@ class LSTMCell(Layer): self.dropout = min(1., max(0., dropout)) self.recurrent_dropout = min(1., max(0., recurrent_dropout)) - self.implementation = implementation - self.state_size = (self.units, self.units) - self._dropout_mask = None - self._recurrent_dropout_mask = None + self.state_spec = [ + InputSpec(shape=(None, self.units)), + InputSpec(shape=(None, self.units)) + ] def build(self, input_shape): - input_dim = input_shape[-1] + if isinstance(input_shape, list): + input_shape = input_shape[0] + input_shape = tensor_shape.TensorShape(input_shape).as_list() + batch_size = input_shape[0] if self.stateful else None + self.input_dim = input_shape[2] + self.input_spec[0] = InputSpec(shape=(batch_size, None, self.input_dim)) + + self.states = [None, None] + if self.stateful: + self.reset_states() + self.kernel = self.add_weight( - shape=(input_dim, self.units * 4), + shape=(self.input_dim, self.units * 4), name='kernel', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, @@ -1804,90 +1112,96 @@ class LSTMCell(Layer): self.bias_o = None self.built = True - def _generate_dropout_mask(self, inputs, training=None): - if 0 < self.dropout < 1: - ones = K.ones_like(K.squeeze(inputs[:, 0:1, :], axis=1)) + def preprocess_input(self, inputs, training=None): + if self.implementation == 0: + input_shape = inputs.get_shape().as_list() + input_dim = input_shape[2] + timesteps = input_shape[1] + + x_i = _time_distributed_dense( + inputs, + self.kernel_i, + self.bias_i, + self.dropout, + input_dim, + self.units, + timesteps, + training=training) + x_f = _time_distributed_dense( + inputs, + self.kernel_f, + self.bias_f, + self.dropout, + input_dim, + self.units, + timesteps, + training=training) + x_c = _time_distributed_dense( + inputs, + self.kernel_c, + self.bias_c, + self.dropout, + input_dim, + self.units, + timesteps, + training=training) + x_o = _time_distributed_dense( + inputs, + self.kernel_o, + self.bias_o, + self.dropout, + input_dim, + self.units, + timesteps, + training=training) + return K.concatenate([x_i, x_f, x_c, x_o], axis=2) + else: + return inputs + + def get_constants(self, inputs, training=None): + constants = [] + if self.implementation != 0 and 0 < self.dropout < 1: + input_shape = K.int_shape(inputs) + input_dim = input_shape[-1] + ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) + ones = K.tile(ones, (1, int(input_dim))) def dropped_inputs(): return K.dropout(ones, self.dropout) - self._dropout_mask = [ + dp_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(4) ] + constants.append(dp_mask) else: - self._dropout_mask = None + constants.append([K.cast_to_floatx(1.) for _ in range(4)]) - def _generate_recurrent_dropout_mask(self, inputs, training=None): if 0 < self.recurrent_dropout < 1: ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, self.units)) - def dropped_inputs(): - return K.dropout(ones, self.dropout) + def dropped_inputs(): # pylint: disable=function-redefined + return K.dropout(ones, self.recurrent_dropout) - self._recurrent_dropout_mask = [ + rec_dp_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(4) ] + constants.append(rec_dp_mask) else: - self._recurrent_dropout_mask = None - - def call(self, inputs, states, training=None): - # dropout matrices for input units - dp_mask = self._dropout_mask - # dropout matrices for recurrent units - rec_dp_mask = self._recurrent_dropout_mask - - h_tm1 = states[0] # previous memory state - c_tm1 = states[1] # previous carry state - - if self.implementation == 1: - if 0 < self.dropout < 1.: - inputs_i = inputs * dp_mask[0] - inputs_f = inputs * dp_mask[1] - inputs_c = inputs * dp_mask[2] - inputs_o = inputs * dp_mask[3] - else: - inputs_i = inputs - inputs_f = inputs - inputs_c = inputs - inputs_o = inputs - x_i = K.dot(inputs_i, self.kernel_i) - x_f = K.dot(inputs_f, self.kernel_f) - x_c = K.dot(inputs_c, self.kernel_c) - x_o = K.dot(inputs_o, self.kernel_o) - if self.use_bias: - x_i = K.bias_add(x_i, self.bias_i) - x_f = K.bias_add(x_f, self.bias_f) - x_c = K.bias_add(x_c, self.bias_c) - x_o = K.bias_add(x_o, self.bias_o) - - if 0 < self.recurrent_dropout < 1.: - h_tm1_i = h_tm1 * rec_dp_mask[0] - h_tm1_f = h_tm1 * rec_dp_mask[1] - h_tm1_c = h_tm1 * rec_dp_mask[2] - h_tm1_o = h_tm1 * rec_dp_mask[3] - else: - h_tm1_i = h_tm1 - h_tm1_f = h_tm1 - h_tm1_c = h_tm1 - h_tm1_o = h_tm1 - i = self.recurrent_activation( - x_i + K.dot(h_tm1_i, self.recurrent_kernel_i)) - f = self.recurrent_activation( - x_f + K.dot(h_tm1_f, self.recurrent_kernel_f)) - c = f * c_tm1 + i * self.activation( - x_c + K.dot(h_tm1_c, self.recurrent_kernel_c)) - o = self.recurrent_activation( - x_o + K.dot(h_tm1_o, self.recurrent_kernel_o)) - else: - if 0. < self.dropout < 1.: - inputs *= dp_mask[0] - z = K.dot(inputs, self.kernel) - if 0. < self.recurrent_dropout < 1.: - h_tm1 *= rec_dp_mask[0] - z += K.dot(h_tm1, self.recurrent_kernel) + constants.append([K.cast_to_floatx(1.) for _ in range(4)]) + return constants + + def step(self, inputs, states): + h_tm1 = states[0] + c_tm1 = states[1] + dp_mask = states[2] + rec_dp_mask = states[3] + + if self.implementation == 2: + z = K.dot(inputs * dp_mask[0], self.kernel) + z += K.dot(h_tm1 * rec_dp_mask[0], self.recurrent_kernel) if self.use_bias: z = K.bias_add(z, self.bias) @@ -1900,606 +1214,57 @@ class LSTMCell(Layer): f = self.recurrent_activation(z1) c = f * c_tm1 + i * self.activation(z2) o = self.recurrent_activation(z3) + else: + if self.implementation == 0: + x_i = inputs[:, :self.units] + x_f = inputs[:, self.units:2 * self.units] + x_c = inputs[:, 2 * self.units:3 * self.units] + x_o = inputs[:, 3 * self.units:] + elif self.implementation == 1: + x_i = K.dot(inputs * dp_mask[0], self.kernel_i) + self.bias_i + x_f = K.dot(inputs * dp_mask[1], self.kernel_f) + self.bias_f + x_c = K.dot(inputs * dp_mask[2], self.kernel_c) + self.bias_c + x_o = K.dot(inputs * dp_mask[3], self.kernel_o) + self.bias_o + else: + raise ValueError('Unknown `implementation` mode.') + i = self.recurrent_activation(x_i + K.dot(h_tm1 * rec_dp_mask[0], + self.recurrent_kernel_i)) + f = self.recurrent_activation(x_f + K.dot(h_tm1 * rec_dp_mask[1], + self.recurrent_kernel_f)) + c = f * c_tm1 + i * self.activation( + x_c + K.dot(h_tm1 * rec_dp_mask[2], self.recurrent_kernel_c)) + o = self.recurrent_activation(x_o + K.dot(h_tm1 * rec_dp_mask[3], + self.recurrent_kernel_o)) h = o * self.activation(c) if 0 < self.dropout + self.recurrent_dropout: - if training is None: - h._uses_learning_phase = True + h._uses_learning_phase = True return h, [h, c] - -class LSTM(RNN): - # pylint: disable=line-too-long - """Long-Short Term Memory layer - Hochreiter 1997. - - Arguments: - units: Positive integer, dimensionality of the output space. - activation: Activation function to use - (see [activations](../activations.md)). - If you pass None, no activation is applied - (ie. "linear" activation: `a(x) = x`). - recurrent_activation: Activation function to use - for the recurrent step - (see [activations](../activations.md)). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. - (see [initializers](../initializers.md)). - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, - used for the linear transformation of the recurrent state. - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - unit_forget_bias: Boolean. - If True, add 1 to the bias of the forget gate at initialization. - Setting it to true will also force `bias_initializer="zeros"`. - This is recommended in [Jozefowicz et - al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). - recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the inputs. - recurrent_dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the recurrent state. - implementation: Implementation mode, either 1 or 2. - Mode 1 will structure its operations as a larger number of - smaller dot products and additions, whereas mode 2 will - batch them into fewer, larger operations. These modes will - have different performance profiles on different hardware and - for different applications. - return_sequences: Boolean. Whether to return the last output. - in the output sequence, or the full sequence. - return_state: Boolean. Whether to return the last state - in addition to the output. - go_backwards: Boolean (default False). - If True, process the input sequence backwards and return the - reversed sequence. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - unroll: Boolean (default False). - If True, the network will be unrolled, - else a symbolic loop will be used. - Unrolling can speed-up a RNN, - although it tends to be more memory-intensive. - Unrolling is only suitable for short sequences. - - References: - - [Long short-term memory](http://www.bioinf.jku.at/publications/older/2604.pdf) - - [Learning to forget: Continual prediction with LSTM](http://www.mitpressjournals.org/doi/pdf/10.1162/089976600300015015) - - [Supervised sequence labeling with recurrent neural networks](http://www.cs.toronto.edu/~graves/preprint.pdf) - - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287) - """ - # pylint: enable=line-too-long - - def __init__(self, - units, - activation='tanh', - recurrent_activation='hard_sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - unit_forget_bias=True, - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - implementation=1, - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - unroll=False, - **kwargs): - if implementation == 0: - logging.warning('`implementation=0` has been deprecated, ' - 'and now defaults to `implementation=1`.' - 'Please update your layer call.') - cell = LSTMCell( - units, - activation=activation, - recurrent_activation=recurrent_activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - recurrent_initializer=recurrent_initializer, - unit_forget_bias=unit_forget_bias, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, - kernel_constraint=kernel_constraint, - recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, - dropout=dropout, - recurrent_dropout=recurrent_dropout, - implementation=implementation) - super(LSTM, self).__init__( - cell, - return_sequences=return_sequences, - return_state=return_state, - go_backwards=go_backwards, - stateful=stateful, - unroll=unroll, - **kwargs) - self.activity_regularizer = regularizers.get(activity_regularizer) - - def call(self, inputs, mask=None, training=None, initial_state=None): - self.cell._generate_dropout_mask(inputs, training=training) - self.cell._generate_recurrent_dropout_mask(inputs, training=training) - return super(LSTM, self).call( - inputs, mask=mask, training=training, initial_state=initial_state) - - @property - def units(self): - return self.cell.units - - @property - def activation(self): - return self.cell.activation - - @property - def recurrent_activation(self): - return self.cell.recurrent_activation - - @property - def use_bias(self): - return self.cell.use_bias - - @property - def kernel_initializer(self): - return self.cell.kernel_initializer - - @property - def recurrent_initializer(self): - return self.cell.recurrent_initializer - - @property - def bias_initializer(self): - return self.cell.bias_initializer - - @property - def unit_forget_bias(self): - return self.cell.unit_forget_bias - - @property - def kernel_regularizer(self): - return self.cell.kernel_regularizer - - @property - def recurrent_regularizer(self): - return self.cell.recurrent_regularizer - - @property - def bias_regularizer(self): - return self.cell.bias_regularizer - - @property - def kernel_constraint(self): - return self.cell.kernel_constraint - - @property - def recurrent_constraint(self): - return self.cell.recurrent_constraint - - @property - def bias_constraint(self): - return self.cell.bias_constraint - - @property - def dropout(self): - return self.cell.dropout - - @property - def recurrent_dropout(self): - return self.cell.recurrent_dropout - - @property - def implementation(self): - return self.cell.implementation - def get_config(self): config = { - 'units': - self.units, - 'activation': - activations.serialize(self.activation), + 'units': self.units, + 'activation': activations.serialize(self.activation), 'recurrent_activation': activations.serialize(self.recurrent_activation), - 'use_bias': - self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), + 'use_bias': self.use_bias, + 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'unit_forget_bias': - self.unit_forget_bias, - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), + 'bias_initializer': initializers.serialize(self.bias_initializer), + 'unit_forget_bias': self.unit_forget_bias, + 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), + 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': - constraints.serialize(self.kernel_constraint), + 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint), - 'dropout': - self.dropout, - 'recurrent_dropout': - self.recurrent_dropout, - 'implementation': - self.implementation + 'bias_constraint': constraints.serialize(self.bias_constraint), + 'dropout': self.dropout, + 'recurrent_dropout': self.recurrent_dropout } base_config = super(LSTM, self).get_config() - del base_config['cell'] - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config): - if 'implementation' in config and config['implementation'] == 0: - config['implementation'] = 1 - return cls(**config) - - -class Recurrent(Layer): - """Deprecated abstract base class for recurrent layers. - - It still exists because it is leveraged by the convolutional-recurrent layers. - It will be removed entirely in the future. - It was never part of the public API. - Do not use. - - Arguments: - weights: list of Numpy arrays to set as initial weights. - The list should have 3 elements, of shapes: - `[(input_dim, output_dim), (output_dim, output_dim), (output_dim,)]`. - return_sequences: Boolean. Whether to return the last output - in the output sequence, or the full sequence. - return_state: Boolean. Whether to return the last state - in addition to the output. - go_backwards: Boolean (default False). - If True, process the input sequence backwards and return the - reversed sequence. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - unroll: Boolean (default False). - If True, the network will be unrolled, - else a symbolic loop will be used. - Unrolling can speed-up a RNN, - although it tends to be more memory-intensive. - Unrolling is only suitable for short sequences. - implementation: one of {0, 1, or 2}. - If set to 0, the RNN will use - an implementation that uses fewer, larger matrix products, - thus running faster on CPU but consuming more memory. - If set to 1, the RNN will use more matrix products, - but smaller ones, thus running slower - (may actually be faster on GPU) while consuming less memory. - If set to 2 (LSTM/GRU only), - the RNN will combine the input gate, - the forget gate and the output gate into a single matrix, - enabling more time-efficient parallelization on the GPU. - Note: RNN dropout must be shared for all gates, - resulting in a slightly reduced regularization. - input_dim: dimensionality of the input (integer). - This argument (or alternatively, the keyword argument `input_shape`) - is required when using this layer as the first layer in a model. - input_length: Length of input sequences, to be specified - when it is constant. - This argument is required if you are going to connect - `Flatten` then `Dense` layers upstream - (without it, the shape of the dense outputs cannot be computed). - Note that if the recurrent layer is not the first layer - in your model, you would need to specify the input length - at the level of the first layer - (e.g. via the `input_shape` argument) - - Input shape: - 3D tensor with shape `(batch_size, timesteps, input_dim)`, - (Optional) 2D tensors with shape `(batch_size, output_dim)`. - - Output shape: - - if `return_state`: a list of tensors. The first tensor is - the output. The remaining tensors are the last states, - each with shape `(batch_size, units)`. - - if `return_sequences`: 3D tensor with shape - `(batch_size, timesteps, units)`. - - else, 2D tensor with shape `(batch_size, units)`. - - # Masking - This layer supports masking for input data with a variable number - of timesteps. To introduce masks to your data, - use an `Embedding` layer with the `mask_zero` parameter - set to `True`. - - # Note on using statefulness in RNNs - You can set RNN layers to be 'stateful', which means that the states - computed for the samples in one batch will be reused as initial states - for the samples in the next batch. This assumes a one-to-one mapping - between samples in different successive batches. - - To enable statefulness: - - specify `stateful=True` in the layer constructor. - - specify a fixed batch size for your model, by passing - if sequential model: - `batch_input_shape=(...)` to the first layer in your model. - else for functional model with 1 or more Input layers: - `batch_shape=(...)` to all the first layers in your model. - This is the expected shape of your inputs - *including the batch size*. - It should be a tuple of integers, e.g. `(32, 10, 100)`. - - specify `shuffle=False` when calling fit(). - - To reset the states of your model, call `.reset_states()` on either - a specific layer, or on your entire model. - - # Note on specifying the initial state of RNNs - You can specify the initial state of RNN layers symbolically by - calling them with the keyword argument `initial_state`. The value of - `initial_state` should be a tensor or list of tensors representing - the initial state of the RNN layer. - - You can specify the initial state of RNN layers numerically by - calling `reset_states` with the keyword argument `states`. The value of - `states` should be a numpy array or list of numpy arrays representing - the initial state of the RNN layer. - """ - - def __init__(self, - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - unroll=False, - implementation=0, - **kwargs): - super(Recurrent, self).__init__(**kwargs) - self.return_sequences = return_sequences - self.return_state = return_state - self.go_backwards = go_backwards - self.stateful = stateful - self.unroll = unroll - self.implementation = implementation - self.supports_masking = True - self.input_spec = [InputSpec(ndim=3)] - self.state_spec = None - self.dropout = 0 - self.recurrent_dropout = 0 - - def _compute_output_shape(self, input_shape): - if isinstance(input_shape, list): - input_shape = input_shape[0] - input_shape = tensor_shape.TensorShape(input_shape).as_list() - if self.return_sequences: - output_shape = (input_shape[0], input_shape[1], self.units) - else: - output_shape = (input_shape[0], self.units) - - if self.return_state: - state_shape = [tensor_shape.TensorShape( - (input_shape[0], self.units)) for _ in self.states] - return [tensor_shape.TensorShape(output_shape)] + state_shape - return tensor_shape.TensorShape(output_shape) - - def compute_mask(self, inputs, mask): - if isinstance(mask, list): - mask = mask[0] - output_mask = mask if self.return_sequences else None - if self.return_state: - state_mask = [None for _ in self.states] - return [output_mask] + state_mask - return output_mask - - def step(self, inputs, states): - raise NotImplementedError - - def get_constants(self, inputs, training=None): - return [] - - def get_initial_state(self, inputs): - # build an all-zero tensor of shape (samples, output_dim) - initial_state = K.zeros_like(inputs) # (samples, timesteps, input_dim) - initial_state = K.sum(initial_state, axis=(1, 2)) # (samples,) - initial_state = K.expand_dims(initial_state) # (samples, 1) - initial_state = K.tile(initial_state, [1, - self.units]) # (samples, output_dim) - initial_state = [initial_state for _ in range(len(self.states))] - return initial_state - - def preprocess_input(self, inputs, training=None): - return inputs - - def __call__(self, inputs, initial_state=None, **kwargs): - if (isinstance(inputs, (list, tuple)) and - len(inputs) > 1 - and initial_state is None): - initial_state = inputs[1:] - inputs = inputs[0] - - # If `initial_state` is specified, - # and if it a Keras tensor, - # then add it to the inputs and temporarily - # modify the input spec to include the state. - if initial_state is None: - return super(Recurrent, self).__call__(inputs, **kwargs) - - if not isinstance(initial_state, (list, tuple)): - initial_state = [initial_state] - - is_keras_tensor = hasattr(initial_state[0], '_keras_history') - for tensor in initial_state: - if hasattr(tensor, '_keras_history') != is_keras_tensor: - raise ValueError('The initial state of an RNN layer cannot be' - ' specified with a mix of Keras tensors and' - ' non-Keras tensors') - - if is_keras_tensor: - # Compute the full input spec, including state - input_spec = self.input_spec - state_spec = self.state_spec - if not isinstance(input_spec, list): - input_spec = [input_spec] - if not isinstance(state_spec, list): - state_spec = [state_spec] - self.input_spec = input_spec + state_spec - - # Compute the full inputs, including state - inputs = [inputs] + list(initial_state) - - # Perform the call - output = super(Recurrent, self).__call__(inputs, **kwargs) - - # Restore original input spec - self.input_spec = input_spec - return output - else: - kwargs['initial_state'] = initial_state - return super(Recurrent, self).__call__(inputs, **kwargs) - - def call(self, inputs, mask=None, training=None, initial_state=None): - # input shape: `(samples, time (padded with zeros), input_dim)` - # note that the .build() method of subclasses MUST define - # self.input_spec and self.state_spec with complete input shapes. - if isinstance(inputs, list): - initial_state = inputs[1:] - inputs = inputs[0] - elif initial_state is not None: - pass - elif self.stateful: - initial_state = self.states - else: - initial_state = self.get_initial_state(inputs) - - if isinstance(mask, list): - mask = mask[0] - - if len(initial_state) != len(self.states): - raise ValueError('Layer has ' + str(len(self.states)) + - ' states but was passed ' + str(len(initial_state)) + - ' initial states.') - input_shape = K.int_shape(inputs) - if self.unroll and input_shape[1] is None: - raise ValueError('Cannot unroll a RNN if the ' - 'time dimension is undefined. \n' - '- If using a Sequential model, ' - 'specify the time dimension by passing ' - 'an `input_shape` or `batch_input_shape` ' - 'argument to your first layer. If your ' - 'first layer is an Embedding, you can ' - 'also use the `input_length` argument.\n' - '- If using the functional API, specify ' - 'the time dimension by passing a `shape` ' - 'or `batch_shape` argument to your Input layer.') - constants = self.get_constants(inputs, training=None) - preprocessed_input = self.preprocess_input(inputs, training=None) - last_output, outputs, states = K.rnn( - self.step, - preprocessed_input, - initial_state, - go_backwards=self.go_backwards, - mask=mask, - constants=constants, - unroll=self.unroll) - if self.stateful: - updates = [] - for i in range(len(states)): - updates.append((self.states[i], states[i])) - self.add_update(updates, inputs) - - # Properly set learning phase - if 0 < self.dropout + self.recurrent_dropout: - last_output._uses_learning_phase = True - outputs._uses_learning_phase = True - - if not self.return_sequences: - outputs = last_output - - if self.return_state: - if not isinstance(states, (list, tuple)): - states = [states] - else: - states = list(states) - return [outputs] + states - return outputs - - def reset_states(self, states=None): - if not self.stateful: - raise AttributeError('Layer must be stateful.') - batch_size = self.input_spec[0].shape[0] - if not batch_size: - raise ValueError('If a RNN is stateful, it needs to know ' - 'its batch size. Specify the batch size ' - 'of your input tensors: \n' - '- If using a Sequential model, ' - 'specify the batch size by passing ' - 'a `batch_input_shape` ' - 'argument to your first layer.\n' - '- If using the functional API, specify ' - 'the time dimension by passing a ' - '`batch_shape` argument to your Input layer.') - # initialize state if None - if self.states[0] is None: - self.states = [K.zeros((batch_size, self.units)) for _ in self.states] - elif states is None: - for state in self.states: - K.set_value(state, np.zeros((batch_size, self.units))) - else: - if not isinstance(states, (list, tuple)): - states = [states] - if len(states) != len(self.states): - raise ValueError('Layer ' + self.name + ' expects ' + - str(len(self.states)) + ' states, ' - 'but it received ' + str(len(states)) + - ' state values. Input received: ' + str(states)) - for index, (value, state) in enumerate(zip(states, self.states)): - if value.shape != (batch_size, self.units): - raise ValueError('State ' + str(index) + - ' is incompatible with layer ' + self.name + - ': expected shape=' + str((batch_size, self.units)) + - ', found shape=' + str(value.shape)) - K.set_value(state, value) - - def get_config(self): - config = { - 'return_sequences': self.return_sequences, - 'return_state': self.return_state, - 'go_backwards': self.go_backwards, - 'stateful': self.stateful, - 'unroll': self.unroll, - 'implementation': self.implementation - } - base_config = super(Recurrent, self).get_config() return dict(list(base_config.items()) + list(config.items())) diff --git a/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py b/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py deleted file mode 100644 index b1f89a30bb..0000000000 --- a/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py +++ /dev/null @@ -1,378 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for recurrent layers functionality other than GRU, LSTM, SimpleRNN. - -See also: lstm_test.py, gru_test.py, simplernn_test.py. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.python.keras._impl import keras -from tensorflow.python.platform import test - - -class RNNTest(test.TestCase): - - def test_minimal_rnn_cell_non_layer(self): - - class MinimalRNNCell(object): - - def __init__(self, units, input_dim): - self.units = units - self.state_size = units - self.kernel = keras.backend.variable( - np.random.random((input_dim, units))) - - def call(self, inputs, states): - prev_output = states[0] - output = keras.backend.dot(inputs, self.kernel) + prev_output - return output, [output] - - with self.test_session(): - # Basic test case. - cell = MinimalRNNCell(32, 5) - x = keras.Input((None, 5)) - layer = keras.layers.RNN(cell) - y = layer(x) - model = keras.models.Model(x, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - # Test stacking. - cells = [MinimalRNNCell(8, 5), - MinimalRNNCell(32, 8), - MinimalRNNCell(32, 32)] - layer = keras.layers.RNN(cells) - y = layer(x) - model = keras.models.Model(x, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - def test_minimal_rnn_cell_non_layer_multiple_states(self): - - class MinimalRNNCell(object): - - def __init__(self, units, input_dim): - self.units = units - self.state_size = (units, units) - self.kernel = keras.backend.variable( - np.random.random((input_dim, units))) - - def call(self, inputs, states): - prev_output_1 = states[0] - prev_output_2 = states[1] - output = keras.backend.dot(inputs, self.kernel) - output += prev_output_1 - output -= prev_output_2 - return output, [output * 2, output * 3] - - with self.test_session(): - # Basic test case. - cell = MinimalRNNCell(32, 5) - x = keras.Input((None, 5)) - layer = keras.layers.RNN(cell) - y = layer(x) - model = keras.models.Model(x, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - # Test stacking. - cells = [MinimalRNNCell(8, 5), - MinimalRNNCell(16, 8), - MinimalRNNCell(32, 16)] - layer = keras.layers.RNN(cells) - assert layer.cell.state_size == (32, 32, 16, 16, 8, 8) - y = layer(x) - model = keras.models.Model(x, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - def test_minimal_rnn_cell_layer(self): - - class MinimalRNNCell(keras.layers.Layer): - - def __init__(self, units, **kwargs): - self.units = units - self.state_size = units - super(MinimalRNNCell, self).__init__(**kwargs) - - def build(self, input_shape): - self.kernel = self.add_weight(shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.built = True - - def call(self, inputs, states): - prev_output = states[0] - h = keras.backend.dot(inputs, self.kernel) - output = h + keras.backend.dot(prev_output, self.recurrent_kernel) - return output, [output] - - def get_config(self): - config = {'units': self.units} - base_config = super(MinimalRNNCell, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - with self.test_session(): - # Test basic case. - x = keras.Input((None, 5)) - cell = MinimalRNNCell(32) - layer = keras.layers.RNN(cell) - y = layer(x) - model = keras.models.Model(x, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - # Test basic case serialization. - x_np = np.random.random((6, 5, 5)) - y_np = model.predict(x_np) - weights = model.get_weights() - config = layer.get_config() - with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}): - layer = keras.layers.RNN.from_config(config) - y = layer(x) - model = keras.models.Model(x, y) - model.set_weights(weights) - y_np_2 = model.predict(x_np) - self.assertAllClose(y_np, y_np_2, atol=1e-4) - - # Test stacking. - cells = [MinimalRNNCell(8), - MinimalRNNCell(12), - MinimalRNNCell(32)] - layer = keras.layers.RNN(cells) - y = layer(x) - model = keras.models.Model(x, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - # Test stacked RNN serialization. - x_np = np.random.random((6, 5, 5)) - y_np = model.predict(x_np) - weights = model.get_weights() - config = layer.get_config() - with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}): - layer = keras.layers.RNN.from_config(config) - y = layer(x) - model = keras.models.Model(x, y) - model.set_weights(weights) - y_np_2 = model.predict(x_np) - self.assertAllClose(y_np, y_np_2, atol=1e-4) - - def test_rnn_cell_with_constants_layer(self): - - class RNNCellWithConstants(keras.layers.Layer): - - def __init__(self, units, **kwargs): - self.units = units - self.state_size = units - super(RNNCellWithConstants, self).__init__(**kwargs) - - def build(self, input_shape): - if not isinstance(input_shape, list): - raise TypeError('expects constants shape') - [input_shape, constant_shape] = input_shape - # will (and should) raise if more than one constant passed - - self.input_kernel = self.add_weight( - shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.constant_kernel = self.add_weight( - shape=(constant_shape[-1], self.units), - initializer='uniform', - name='constant_kernel') - self.built = True - - def call(self, inputs, states, constants): - [prev_output] = states - [constant] = constants - h_input = keras.backend.dot(inputs, self.input_kernel) - h_state = keras.backend.dot(prev_output, self.recurrent_kernel) - h_const = keras.backend.dot(constant, self.constant_kernel) - output = h_input + h_state + h_const - return output, [output] - - def get_config(self): - config = {'units': self.units} - base_config = super(RNNCellWithConstants, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - with self.test_session(): - # Test basic case. - x = keras.Input((None, 5)) - c = keras.Input((3,)) - cell = RNNCellWithConstants(32) - layer = keras.layers.RNN(cell) - y = layer(x, constants=c) - model = keras.models.Model([x, c], y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch( - [np.zeros((6, 5, 5)), np.zeros((6, 3))], - np.zeros((6, 32)) - ) - - with self.test_session(): - # Test basic case serialization. - x_np = np.random.random((6, 5, 5)) - c_np = np.random.random((6, 3)) - y_np = model.predict([x_np, c_np]) - weights = model.get_weights() - config = layer.get_config() - custom_objects = {'RNNCellWithConstants': RNNCellWithConstants} - with keras.utils.CustomObjectScope(custom_objects): - layer = keras.layers.RNN.from_config(config.copy()) - y = layer(x, constants=c) - model = keras.models.Model([x, c], y) - model.set_weights(weights) - y_np_2 = model.predict([x_np, c_np]) - self.assertAllClose(y_np, y_np_2, atol=1e-4) - - with self.test_session(): - # test flat list inputs - with keras.utils.CustomObjectScope(custom_objects): - layer = keras.layers.RNN.from_config(config.copy()) - y = layer([x, c]) - model = keras.models.Model([x, c], y) - model.set_weights(weights) - y_np_3 = model.predict([x_np, c_np]) - self.assertAllClose(y_np, y_np_3, atol=1e-4) - - def test_rnn_cell_with_constants_layer_passing_initial_state(self): - - class RNNCellWithConstants(keras.layers.Layer): - - def __init__(self, units, **kwargs): - self.units = units - self.state_size = units - super(RNNCellWithConstants, self).__init__(**kwargs) - - def build(self, input_shape): - if not isinstance(input_shape, list): - raise TypeError('expects constants shape') - [input_shape, constant_shape] = input_shape - # will (and should) raise if more than one constant passed - - self.input_kernel = self.add_weight( - shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.constant_kernel = self.add_weight( - shape=(constant_shape[-1], self.units), - initializer='uniform', - name='constant_kernel') - self.built = True - - def call(self, inputs, states, constants): - [prev_output] = states - [constant] = constants - h_input = keras.backend.dot(inputs, self.input_kernel) - h_state = keras.backend.dot(prev_output, self.recurrent_kernel) - h_const = keras.backend.dot(constant, self.constant_kernel) - output = h_input + h_state + h_const - return output, [output] - - def get_config(self): - config = {'units': self.units} - base_config = super(RNNCellWithConstants, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - with self.test_session(): - # Test basic case. - x = keras.Input((None, 5)) - c = keras.Input((3,)) - s = keras.Input((32,)) - cell = RNNCellWithConstants(32) - layer = keras.layers.RNN(cell) - y = layer(x, initial_state=s, constants=c) - model = keras.models.Model([x, s, c], y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch( - [np.zeros((6, 5, 5)), np.zeros((6, 32)), np.zeros((6, 3))], - np.zeros((6, 32)) - ) - - with self.test_session(): - # Test basic case serialization. - x_np = np.random.random((6, 5, 5)) - s_np = np.random.random((6, 32)) - c_np = np.random.random((6, 3)) - y_np = model.predict([x_np, s_np, c_np]) - weights = model.get_weights() - config = layer.get_config() - custom_objects = {'RNNCellWithConstants': RNNCellWithConstants} - with keras.utils.CustomObjectScope(custom_objects): - layer = keras.layers.RNN.from_config(config.copy()) - y = layer(x, initial_state=s, constants=c) - model = keras.models.Model([x, s, c], y) - model.set_weights(weights) - y_np_2 = model.predict([x_np, s_np, c_np]) - self.assertAllClose(y_np, y_np_2, atol=1e-4) - - # verify that state is used - y_np_2_different_s = model.predict([x_np, s_np + 10., c_np]) - with self.assertRaises(AssertionError): - self.assertAllClose(y_np, y_np_2_different_s, atol=1e-4) - - with self.test_session(): - # test flat list inputs - with keras.utils.CustomObjectScope(custom_objects): - layer = keras.layers.RNN.from_config(config.copy()) - y = layer([x, s, c]) - model = keras.models.Model([x, s, c], y) - model.set_weights(weights) - y_np_3 = model.predict([x_np, s_np, c_np]) - self.assertAllClose(y_np, y_np_3, atol=1e-4) - - def test_stacked_rnn_attributes(self): - cells = [keras.layers.LSTMCell(3), - keras.layers.LSTMCell(3, kernel_regularizer='l2')] - layer = keras.layers.RNN(cells) - layer.build((None, None, 5)) - - # Test regularization losses - assert len(layer.losses) == 1 - - # Test weights - assert len(layer.trainable_weights) == 6 - cells[0].trainable = False - assert len(layer.trainable_weights) == 3 - assert len(layer.non_trainable_weights) == 3 - - # Test `get_losses_for` - x = keras.Input((None, 5)) - y = keras.backend.sum(x) - cells[0].add_loss(y, inputs=x) - assert layer.get_losses_for(x) == [y] - - -if __name__ == '__main__': - test.main() diff --git a/tensorflow/python/keras/_impl/keras/layers/simplernn_test.py b/tensorflow/python/keras/_impl/keras/layers/simplernn_test.py index 7edebdacd0..9833485236 100644 --- a/tensorflow/python/keras/_impl/keras/layers/simplernn_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/simplernn_test.py @@ -156,10 +156,8 @@ class SimpleRNNLayerTest(test.TestCase): activity_regularizer='l1') layer.build((None, None, 2)) self.assertEqual(len(layer.losses), 3) - - x = keras.backend.variable(np.ones((2, 3, 2))) - layer(x) - self.assertEqual(len(layer.get_losses_for(x)), 1) + layer(keras.backend.variable(np.ones((2, 3, 2)))) + self.assertEqual(len(layer.losses), 4) def test_constraints_SimpleRNN(self): embedding_dim = 4 @@ -177,9 +175,9 @@ class SimpleRNNLayerTest(test.TestCase): recurrent_constraint=r_constraint, bias_constraint=b_constraint) layer.build((None, None, embedding_dim)) - self.assertEqual(layer.cell.kernel.constraint, k_constraint) - self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint) - self.assertEqual(layer.cell.bias.constraint, b_constraint) + self.assertEqual(layer.kernel.constraint, k_constraint) + self.assertEqual(layer.recurrent_kernel.constraint, r_constraint) + self.assertEqual(layer.bias.constraint, b_constraint) def test_with_masking_layer_SimpleRNN(self): layer_class = keras.layers.SimpleRNN diff --git a/tensorflow/python/keras/layers/__init__.py b/tensorflow/python/keras/layers/__init__.py index b94bf8f0f6..acf0a5e179 100644 --- a/tensorflow/python/keras/layers/__init__.py +++ b/tensorflow/python/keras/layers/__init__.py @@ -134,11 +134,6 @@ from tensorflow.python.keras._impl.keras.layers.pooling import GlobalMaxPool2D from tensorflow.python.keras._impl.keras.layers.pooling import GlobalMaxPool3D # Recurrent layers. -from tensorflow.python.keras._impl.keras.layers.recurrent import RNN -from tensorflow.python.keras._impl.keras.layers.recurrent import StackedRNNCells -from tensorflow.python.keras._impl.keras.layers.recurrent import SimpleRNNCell -from tensorflow.python.keras._impl.keras.layers.recurrent import GRUCell -from tensorflow.python.keras._impl.keras.layers.recurrent import LSTMCell from tensorflow.python.keras._impl.keras.layers.recurrent import SimpleRNN from tensorflow.python.keras._impl.keras.layers.recurrent import GRU from tensorflow.python.keras._impl.keras.layers.recurrent import LSTM diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 8d6f863a4c..7fa504e85e 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -1186,7 +1186,6 @@ cuda_py_test( srcs = ["check_ops_test.py"], additional_deps = [ "//third_party/py/numpy", - "//tensorflow/python/eager:context", "//tensorflow/python:array_ops", "//tensorflow/python:check_ops", "//tensorflow/python:client_testlib", diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 6eb9c66d06..76b80e60ea 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -107,22 +107,41 @@ class BooleanMaskTest(test_util.TensorFlowTestCase): def setUp(self): self.rng = np.random.RandomState(42) - def CheckVersusNumpy(self, ndims_mask, arr_shape, make_mask=None): + def CheckVersusNumpy(self, ndims_mask, arr_shape, make_mask=None, axis=None): """Check equivalence between boolean_mask and numpy masking.""" if make_mask is None: make_mask = lambda shape: self.rng.randint(0, 2, size=shape).astype(bool) arr = np.random.rand(*arr_shape) mask = make_mask(arr_shape[:ndims_mask]) - masked_arr = arr[mask] - with self.test_session(): - masked_tensor = array_ops.boolean_mask(arr, mask) + if axis is not None: + mask = make_mask(arr_shape[axis:ndims_mask+axis]) + if axis is None or axis == 0: + masked_arr = arr[mask] + elif axis == 1: + masked_arr = arr[:,mask] + elif axis == 2: + masked_arr = arr[:,:,mask] + with self.test_session() as sess: + masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis) # Leading dimension size of masked_tensor is always unknown until runtime # since we don't how many elements will be kept. - self.assertAllEqual(masked_tensor.get_shape()[1:], masked_arr.shape[1:]) + leading = 1 if axis is None else axis + 1 + self.assertAllEqual(masked_tensor.get_shape()[leading:], + masked_arr.shape[leading:]) self.assertAllClose(masked_arr, masked_tensor.eval()) + def testMaskDim1ArrDim2Axis1(self): + ndims_mask = 1 + for arr_shape in [(1, 1), (2, 2), (2, 5)]: + self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1) + + def testMaskDim2ArrDim2Axis1(self): + ndims_mask = 2 + for arr_shape in [(1, 1), (2, 2), (2, 5)]: + self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1) + def testMaskDim1ArrDim1(self): ndims_mask = 1 for arr_shape in [(1,), (2,), (3,), (10,)]: @@ -486,7 +505,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase): _ = checker2[...] _ = checker2[tuple()] - def testFloatSlicedArrayAndInt64IndicesGPU(self): + def testInt64GPU(self): if not test_util.is_gpu_available(): self.skipTest("No GPU available") with self.test_session(use_gpu=True, force_gpu=True): @@ -497,17 +516,6 @@ class StridedSliceTest(test_util.TensorFlowTestCase): s = array_ops.strided_slice(x, begin, end, strides) self.assertAllEqual([3.], self.evaluate(s)) - def testInt64SlicedArrayAndIndicesGPU(self): - if not test_util.is_gpu_available(): - self.skipTest("No GPU available") - with self.test_session(use_gpu=True, force_gpu=True): - x = constant_op.constant([1, 2, 3], dtype=dtypes.int64) - begin = constant_op.constant([2], dtype=dtypes.int64) - end = constant_op.constant([3], dtype=dtypes.int64) - strides = constant_op.constant([1], dtype=dtypes.int64) - s = array_ops.strided_slice(x, begin, end, strides) - self.assertAllEqual([3], self.evaluate(s)) - def testDegenerateSlices(self): with self.test_session(use_gpu=True): checker = StridedSliceChecker(self, StridedSliceChecker.REF_TENSOR) @@ -1070,6 +1078,16 @@ class PadTest(test_util.TensorFlowTestCase): [0, 0, 4, 5, 6, 0, 0], [0, 0, 0, 0, 0, 0, 0]]) +class InvertPermutationTest(test_util.TensorFlowTestCase): + + def testInvertPermutation(self): + for dtype in [dtypes.int32, dtypes.int64]: + with self.test_session(use_gpu=True): + x = constant_op.constant([3, 4, 0, 2, 1], dtype=dtype) + y = array_ops.invert_permutation(x) + self.assertAllEqual(y.get_shape(), [5]) + self.assertAllEqual(y.eval(), [2, 4, 3, 0, 1]) + if __name__ == "__main__": test_lib.main() diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py index 7a610debd1..79285476b4 100644 --- a/tensorflow/python/kernel_tests/bincount_op_test.py +++ b/tensorflow/python/kernel_tests/bincount_op_test.py @@ -25,11 +25,10 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import math_ops from tensorflow.python.platform import googletest - class BincountTest(test_util.TensorFlowTestCase): def test_empty(self): - with self.test_session(): + with self.test_session(use_gpu=True): self.assertAllEqual( math_ops.bincount([], minlength=5).eval(), [0, 0, 0, 0, 0]) self.assertAllEqual(math_ops.bincount([], minlength=1).eval(), [0]) @@ -42,7 +41,7 @@ class BincountTest(test_util.TensorFlowTestCase): np.float64) def test_values(self): - with self.test_session(): + with self.test_session(use_gpu=True): self.assertAllEqual( math_ops.bincount([1, 1, 1, 2, 2, 3]).eval(), [0, 3, 2, 1]) arr = [1, 1, 2, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5] @@ -57,14 +56,14 @@ class BincountTest(test_util.TensorFlowTestCase): math_ops.bincount(np.arange(10000)).eval(), np.ones(10000)) def test_maxlength(self): - with self.test_session(): + with self.test_session(use_gpu=True): self.assertAllEqual(math_ops.bincount([5], maxlength=3).eval(), [0, 0, 0]) self.assertAllEqual(math_ops.bincount([1], maxlength=3).eval(), [0, 1]) self.assertAllEqual(math_ops.bincount([], maxlength=3).eval(), []) def test_random_with_weights(self): num_samples = 10000 - with self.test_session(): + with self.test_session(use_gpu=True): np.random.seed(42) for dtype in [dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]: arr = np.random.randint(0, 1000, num_samples) @@ -72,17 +71,29 @@ class BincountTest(test_util.TensorFlowTestCase): weights = np.random.randint(-100, 100, num_samples) else: weights = np.random.random(num_samples) - self.assertAllEqual( + self.assertAllClose( math_ops.bincount(arr, weights).eval(), np.bincount(arr, weights)) + def test_random_without_weights(self): + num_samples = 10000 + with self.test_session(use_gpu=True): + np.random.seed(42) + for dtype in [np.int32, np.float32]: + arr = np.random.randint(0, 1000, num_samples) + weights = np.ones(num_samples).astype(dtype) + self.assertAllClose( + math_ops.bincount(arr, None).eval(), + np.bincount(arr, weights)) + def test_zero_weights(self): - with self.test_session(): + with self.test_session(use_gpu=True): self.assertAllEqual( math_ops.bincount(np.arange(1000), np.zeros(1000)).eval(), np.zeros(1000)) def test_negative(self): + # unsorted_segment_sum will only report InvalidArgumentError on CPU with self.test_session(): with self.assertRaises(errors.InvalidArgumentError): math_ops.bincount([1, 2, 3, -1, 6, 8]).eval() diff --git a/tensorflow/python/kernel_tests/bucketize_op_test.py b/tensorflow/python/kernel_tests/bucketize_op_test.py index 6db3592055..e612b1c134 100644 --- a/tensorflow/python/kernel_tests/bucketize_op_test.py +++ b/tensorflow/python/kernel_tests/bucketize_op_test.py @@ -31,7 +31,7 @@ class BucketizationOpTest(test.TestCase): constant_op.constant([-5, 0, 2, 3, 5, 8, 10, 11, 12]), boundaries=[0, 3, 8, 11]) expected_out = [0, 1, 1, 2, 2, 3, 3, 4, 4] - with self.test_session() as sess: + with self.test_session(use_gpu=True) as sess: self.assertAllEqual(expected_out, sess.run(op)) def testFloat(self): @@ -39,7 +39,7 @@ class BucketizationOpTest(test.TestCase): constant_op.constant([-5., 0., 2., 3., 5., 8., 10., 11., 12.]), boundaries=[0., 3., 8., 11.]) expected_out = [0, 1, 1, 2, 2, 3, 3, 4, 4] - with self.test_session() as sess: + with self.test_session(use_gpu=True) as sess: self.assertAllEqual(expected_out, sess.run(op)) def test2DInput(self): @@ -47,13 +47,13 @@ class BucketizationOpTest(test.TestCase): constant_op.constant([[-5, 0, 2, 3, 5], [8, 10, 11, 12, 0]]), boundaries=[0, 3, 8, 11]) expected_out = [[0, 1, 1, 2, 2], [3, 3, 4, 4, 1]] - with self.test_session() as sess: + with self.test_session(use_gpu=True) as sess: self.assertAllEqual(expected_out, sess.run(op)) def testInvalidBoundariesOrder(self): op = math_ops._bucketize( constant_op.constant([-5, 0]), boundaries=[0, 8, 3, 11]) - with self.test_session() as sess: + with self.test_session(use_gpu=True) as sess: with self.assertRaisesRegexp( errors_impl.InvalidArgumentError, "Expected sorted boundaries"): sess.run(op) diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py index 43785adcee..ed859e3774 100644 --- a/tensorflow/python/kernel_tests/check_ops_test.py +++ b/tensorflow/python/kernel_tests/check_ops_test.py @@ -20,13 +20,10 @@ from __future__ import print_function import numpy as np -from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor -from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.platform import test @@ -74,178 +71,110 @@ class AssertProperIterableTest(test.TestCase): class AssertEqualTest(test.TestCase): - @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_equal(self): - small = constant_op.constant([1, 2], name="small") - with ops.control_dependencies([check_ops.assert_equal(small, small)]): - out = array_ops.identity(small) - self.evaluate(out) - - def test_returns_none_with_eager(self): - with context.eager_mode(): + with self.test_session(): small = constant_op.constant([1, 2], name="small") - x = check_ops.assert_equal(small, small) - assert x is None + with ops.control_dependencies([check_ops.assert_equal(small, small)]): + out = array_ops.identity(small) + out.eval() - @test_util.run_in_graph_and_eager_modes() def test_raises_when_greater(self): - # Static check - static_small = constant_op.constant([1, 2], name="small") - static_big = constant_op.constant([3, 4], name="big") - with self.assertRaisesRegexp(errors.InvalidArgumentError, "fail"): - check_ops.assert_equal(static_big, static_small, message="fail") - - # Dynamic check - if context.in_graph_mode(): - with self.test_session(): - small = array_ops.placeholder(dtypes.int32, name="small") - big = array_ops.placeholder(dtypes.int32, name="big") - with ops.control_dependencies( - [check_ops.assert_equal( - big, small, message="fail")]): - out = array_ops.identity(small) - with self.assertRaisesOpError("fail.*big.*small"): - out.eval(feed_dict={small: [1, 2], big: [3, 4]}) - - def test_error_message_eager(self): - expected_error_msg_full = r"""big does not equal small -Condition x == y did not hold. -Indices of first 6 different values: -\[\[0 0\] - \[1 1\] - \[2 0\]\] -Corresponding x values: -\[2 3 6\] -Corresponding y values: -\[20 30 60\] -First 6 elements of x: -\[2 2 3 3 6 6\] -First 6 elements of y: -\[20 2 3 30 60 6\] -""" - expected_error_msg_short = r"""big does not equal small -Condition x == y did not hold. -Indices of first 2 different values: -\[\[0 0\] - \[1 1\]\] -Corresponding x values: -\[2 3\] -Corresponding y values: -\[20 30\] -First 2 elements of x: -\[2 2\] -First 2 elements of y: -\[20 2\] -""" - with context.eager_mode(): - big = constant_op.constant([[2, 2], [3, 3], [6, 6]]) - small = constant_op.constant([[20, 2], [3, 30], [60, 6]]) - with self.assertRaisesRegexp(errors.InvalidArgumentError, - expected_error_msg_full): - check_ops.assert_equal(big, small, message="big does not equal small", - summarize=10) - with self.assertRaisesRegexp(errors.InvalidArgumentError, - expected_error_msg_short): - check_ops.assert_equal(big, small, message="big does not equal small", - summarize=2) - - @test_util.run_in_graph_and_eager_modes() + with self.test_session(): + # Static check + static_small = constant_op.constant([1, 2], name="small") + static_big = constant_op.constant([3, 4], name="big") + with self.assertRaisesRegexp(ValueError, "fail"): + check_ops.assert_equal(static_big, static_small, message="fail") + # Dynamic check + small = array_ops.placeholder(dtypes.int32, name="small") + big = array_ops.placeholder(dtypes.int32, name="big") + with ops.control_dependencies( + [check_ops.assert_equal( + big, small, message="fail")]): + out = array_ops.identity(small) + with self.assertRaisesOpError("fail.*big.*small"): + out.eval(feed_dict={small: [1, 2], big: [3, 4]}) + def test_raises_when_less(self): - # Static check - static_small = constant_op.constant([3, 1], name="small") - static_big = constant_op.constant([4, 2], name="big") - with self.assertRaisesRegexp(errors.InvalidArgumentError, "fail"): - check_ops.assert_equal(static_big, static_small, message="fail") - - # Dynamic check - if context.in_graph_mode(): - with self.test_session(): - small = array_ops.placeholder(dtypes.int32, name="small") - big = array_ops.placeholder(dtypes.int32, name="big") - with ops.control_dependencies([check_ops.assert_equal(small, big)]): - out = array_ops.identity(small) - with self.assertRaisesOpError("small.*big"): - out.eval(feed_dict={small: [3, 1], big: [4, 2]}) + with self.test_session(): + # Static check + static_small = constant_op.constant([3, 1], name="small") + static_big = constant_op.constant([4, 2], name="big") + with self.assertRaisesRegexp(ValueError, "fail"): + check_ops.assert_equal(static_big, static_small, message="fail") + # Dynamic check + small = array_ops.placeholder(dtypes.int32, name="small") + big = array_ops.placeholder(dtypes.int32, name="big") + with ops.control_dependencies([check_ops.assert_equal(small, big)]): + out = array_ops.identity(small) + with self.assertRaisesOpError("small.*big"): + out.eval(feed_dict={small: [3, 1], big: [4, 2]}) - @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_equal_and_broadcastable_shapes(self): - small = constant_op.constant([[1, 2], [1, 2]], name="small") - small_2 = constant_op.constant([1, 2], name="small_2") - with ops.control_dependencies([check_ops.assert_equal(small, small_2)]): - out = array_ops.identity(small) - self.evaluate(out) - - @test_util.run_in_graph_and_eager_modes() - def test_raises_when_equal_but_non_broadcastable_shapes(self): - small = constant_op.constant([1, 1, 1], name="small") - small_2 = constant_op.constant([1, 1], name="small_2") - # The exception in eager and non-eager mode is different because - # eager mode relies on shape check done as part of the C++ op, while - # graph mode does shape checks when creating the `Operation` instance. - with self.assertRaisesRegexp( - (errors.InvalidArgumentError, ValueError), - (r"Incompatible shapes: \[3\] vs. \[2\]|" - r"Dimensions must be equal, but are 3 and 2")): + with self.test_session(): + small = constant_op.constant([1, 2], name="small") + small_2 = constant_op.constant([1, 2], name="small_2") with ops.control_dependencies([check_ops.assert_equal(small, small_2)]): out = array_ops.identity(small) - self.evaluate(out) + out.eval() + + def test_raises_when_equal_but_non_broadcastable_shapes(self): + with self.test_session(): + small = constant_op.constant([1, 1, 1], name="small") + small_2 = constant_op.constant([1, 1], name="small_2") + with self.assertRaisesRegexp(ValueError, "must be"): + with ops.control_dependencies([check_ops.assert_equal(small, small_2)]): + out = array_ops.identity(small) + out.eval() - @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_both_empty(self): - larry = constant_op.constant([]) - curly = constant_op.constant([]) - with ops.control_dependencies([check_ops.assert_equal(larry, curly)]): - out = array_ops.identity(larry) - self.evaluate(out) + with self.test_session(): + larry = constant_op.constant([]) + curly = constant_op.constant([]) + with ops.control_dependencies([check_ops.assert_equal(larry, curly)]): + out = array_ops.identity(larry) + out.eval() class AssertNoneEqualTest(test.TestCase): - @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_not_equal(self): - small = constant_op.constant([1, 2], name="small") - big = constant_op.constant([10, 20], name="small") - with ops.control_dependencies( - [check_ops.assert_none_equal(big, small)]): - out = array_ops.identity(small) - self.evaluate(out) - - @test_util.run_in_graph_and_eager_modes() + with self.test_session(): + small = constant_op.constant([1, 2], name="small") + big = constant_op.constant([10, 20], name="small") + with ops.control_dependencies( + [check_ops.assert_none_equal(big, small)]): + out = array_ops.identity(small) + out.eval() + def test_raises_when_equal(self): - small = constant_op.constant([3, 1], name="small") - with self.assertRaisesOpError("x != y did not hold"): + with self.test_session(): + small = constant_op.constant([3, 1], name="small") with ops.control_dependencies( [check_ops.assert_none_equal(small, small)]): out = array_ops.identity(small) - self.evaluate(out) + with self.assertRaisesOpError("x != y did not hold"): + out.eval() - @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_not_equal_and_broadcastable_shapes(self): - small = constant_op.constant([1, 2], name="small") - big = constant_op.constant([3], name="big") - with ops.control_dependencies( - [check_ops.assert_none_equal(small, big)]): - out = array_ops.identity(small) - self.evaluate(out) - - @test_util.run_in_graph_and_eager_modes() + with self.test_session(): + small = constant_op.constant([1, 2], name="small") + big = constant_op.constant([3], name="big") + with ops.control_dependencies( + [check_ops.assert_none_equal(small, big)]): + out = array_ops.identity(small) + out.eval() + def test_raises_when_not_equal_but_non_broadcastable_shapes(self): with self.test_session(): small = constant_op.constant([1, 1, 1], name="small") big = constant_op.constant([10, 10], name="big") - # The exception in eager and non-eager mode is different because - # eager mode relies on shape check done as part of the C++ op, while - # graph mode does shape checks when creating the `Operation` instance. - with self.assertRaisesRegexp( - (ValueError, errors.InvalidArgumentError), - (r"Incompatible shapes: \[3\] vs. \[2\]|" - r"Dimensions must be equal, but are 3 and 2")): + with self.assertRaisesRegexp(ValueError, "must be"): with ops.control_dependencies( [check_ops.assert_none_equal(small, big)]): out = array_ops.identity(small) - self.evaluate(out) + out.eval() - @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_both_empty(self): with self.test_session(): larry = constant_op.constant([]) @@ -253,82 +182,62 @@ class AssertNoneEqualTest(test.TestCase): with ops.control_dependencies( [check_ops.assert_none_equal(larry, curly)]): out = array_ops.identity(larry) - self.evaluate(out) - - def test_returns_none_with_eager(self): - with context.eager_mode(): - t1 = constant_op.constant([1, 2]) - t2 = constant_op.constant([3, 4]) - x = check_ops.assert_none_equal(t1, t2) - assert x is None + out.eval() class AssertLessTest(test.TestCase): - @test_util.run_in_graph_and_eager_modes() def test_raises_when_equal(self): - small = constant_op.constant([1, 2], name="small") - with self.assertRaisesOpError("failure message.*\n*.* x < y did not hold"): + with self.test_session(): + small = constant_op.constant([1, 2], name="small") with ops.control_dependencies( [check_ops.assert_less( - small, small, message="failure message")]): + small, small, message="fail")]): out = array_ops.identity(small) - self.evaluate(out) + with self.assertRaisesOpError("fail.*small.*small"): + out.eval() - @test_util.run_in_graph_and_eager_modes() def test_raises_when_greater(self): - small = constant_op.constant([1, 2], name="small") - big = constant_op.constant([3, 4], name="big") - with self.assertRaisesOpError("x < y did not hold"): + with self.test_session(): + small = constant_op.constant([1, 2], name="small") + big = constant_op.constant([3, 4], name="big") with ops.control_dependencies([check_ops.assert_less(big, small)]): out = array_ops.identity(small) - self.evaluate(out) + with self.assertRaisesOpError("big.*small"): + out.eval() - @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_less(self): - small = constant_op.constant([3, 1], name="small") - big = constant_op.constant([4, 2], name="big") - with ops.control_dependencies([check_ops.assert_less(small, big)]): - out = array_ops.identity(small) - self.evaluate(out) + with self.test_session(): + small = constant_op.constant([3, 1], name="small") + big = constant_op.constant([4, 2], name="big") + with ops.control_dependencies([check_ops.assert_less(small, big)]): + out = array_ops.identity(small) + out.eval() - @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_less_and_broadcastable_shapes(self): - small = constant_op.constant([1], name="small") - big = constant_op.constant([3, 2], name="big") - with ops.control_dependencies([check_ops.assert_less(small, big)]): - out = array_ops.identity(small) - self.evaluate(out) - - @test_util.run_in_graph_and_eager_modes() - def test_raises_when_less_but_non_broadcastable_shapes(self): - small = constant_op.constant([1, 1, 1], name="small") - big = constant_op.constant([3, 2], name="big") - # The exception in eager and non-eager mode is different because - # eager mode relies on shape check done as part of the C++ op, while - # graph mode does shape checks when creating the `Operation` instance. - with self.assertRaisesRegexp( - (ValueError, errors.InvalidArgumentError), - (r"Incompatible shapes: \[3\] vs. \[2\]|" - "Dimensions must be equal, but are 3 and 2")): + with self.test_session(): + small = constant_op.constant([1], name="small") + big = constant_op.constant([3, 2], name="big") with ops.control_dependencies([check_ops.assert_less(small, big)]): out = array_ops.identity(small) - self.evaluate(out) + out.eval() + + def test_raises_when_less_but_non_broadcastable_shapes(self): + with self.test_session(): + small = constant_op.constant([1, 1, 1], name="small") + big = constant_op.constant([3, 2], name="big") + with self.assertRaisesRegexp(ValueError, "must be"): + with ops.control_dependencies([check_ops.assert_less(small, big)]): + out = array_ops.identity(small) + out.eval() - @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_both_empty(self): - larry = constant_op.constant([]) - curly = constant_op.constant([]) - with ops.control_dependencies([check_ops.assert_less(larry, curly)]): - out = array_ops.identity(larry) - self.evaluate(out) - - def test_returns_none_with_eager(self): - with context.eager_mode(): - t1 = constant_op.constant([1, 2]) - t2 = constant_op.constant([3, 4]) - x = check_ops.assert_less(t1, t2) - assert x is None + with self.test_session(): + larry = constant_op.constant([]) + curly = constant_op.constant([]) + with ops.control_dependencies([check_ops.assert_less(larry, curly)]): + out = array_ops.identity(larry) + out.eval() class AssertLessEqualTest(test.TestCase): diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py index 6167cb9999..6cbdd4cbb3 100644 --- a/tensorflow/python/kernel_tests/constant_op_test.py +++ b/tensorflow/python/kernel_tests/constant_op_test.py @@ -439,9 +439,10 @@ class ZerosLikeTest(test.TestCase): def testZerosLikeCPU(self): for dtype in [ - dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int32, - dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.int8, - dtypes_lib.complex64, dtypes_lib.complex128, dtypes_lib.int64, + dtypes_lib.float32, dtypes_lib.float64, + dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, + dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool, + dtypes_lib.complex64, dtypes_lib.complex128, dtypes_lib.string ]: self._compareZeros(dtype, fully_defined_shape=False, use_gpu=False) @@ -573,9 +574,10 @@ class OnesLikeTest(test.TestCase): def testOnesLike(self): for dtype in [ - dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int32, - dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.int8, - dtypes_lib.complex64, dtypes_lib.complex128, dtypes_lib.int64 + dtypes_lib.float32, dtypes_lib.float64, + dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, + dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool, + dtypes_lib.complex64, dtypes_lib.complex128 ]: numpy_dtype = dtype.as_numpy_dtype with self.test_session(): diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py index 662c94eea7..7c8d309bbd 100644 --- a/tensorflow/python/kernel_tests/conv1d_test.py +++ b/tensorflow/python/kernel_tests/conv1d_test.py @@ -17,6 +17,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin + from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops @@ -50,5 +53,45 @@ class Conv1DTest(test.TestCase): self.assertAllClose(output, [2 * 1 + 1 * 2, 2 * 3 + 1 * 4]) + def testConv1DTranspose(self): + with self.test_session(): + stride = 2 + + # Input, output: [batch, width, depth] + x_shape = [2, 4, 3] + y_shape = [2, 9, 2] + + # Filter: [kernel_width, output_depth, input_depth] + f_shape = [3, 2, 3] + + x = constant_op.constant( + 1.0, shape=x_shape, name="x", dtype=dtypes.float32) + f = constant_op.constant( + 1.0, shape=f_shape, name="filter", dtype=dtypes.float32) + output = nn_ops.conv1d_transpose( + x, f, y_shape, stride=stride, padding="VALID") + value = output.eval() + + cache_values = np.zeros(y_shape, dtype=np.float32) + + # The amount of padding added + pad = 1 + + for n in xrange(x_shape[0]): + for k in xrange(f_shape[1]): + for w in xrange(pad, y_shape[1] - pad): + target = 3.0 + # We add a case for locations divisible by the stride. + w_in = w % stride == 0 and w > pad and w < y_shape[1] - 1 - pad + if w_in: + target += 3.0 + cache_values[n, w, k] = target + + # copy values in the border + cache_values[n, 0, k] = cache_values[n, 1, k] + cache_values[n, -1, k] = cache_values[n, -2, k] + + self.assertAllClose(cache_values, value) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py index 14622ab467..116681fc4c 100644 --- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py @@ -21,6 +21,8 @@ from __future__ import print_function import collections import math +import numpy as np + from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import test_util @@ -45,8 +47,19 @@ def GetTestConfigs(): class Conv3DTest(test.TestCase): + def _DtypesToTest(self, use_gpu): + if use_gpu: + if not test_util.CudaSupportsHalfMatMulAndConv(): + return [dtypes.float32] + else: + # It is important that float32 comes before float16 here, + # as we will be using its gradients as reference for fp16 gradients. + return [dtypes.float32, dtypes.float16] + else: + return [dtypes.float64, dtypes.float32, dtypes.float16] + def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, stride, - padding, data_format, use_gpu): + padding, data_format, dtype, use_gpu): total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: @@ -54,13 +67,14 @@ class Conv3DTest(test.TestCase): for s in filter_in_sizes: total_size_2 *= s - # Initializes the input tensor with array containing incrementing - # numbers from 1. - x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] - x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] + # Initializes the input tensor with array containing numbers from 0 to 1. + # We keep the input tensor values fairly small to avoid overflowing a float16 + # tensor during the conv3d + x1 = [f * 1.0 / total_size_1 for f in range(1, total_size_1 + 1)] + x2 = [f * 1.0 / total_size_2 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu): - t1 = constant_op.constant(x1, shape=tensor_in_sizes) - t2 = constant_op.constant(x2, shape=filter_in_sizes) + t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype) + t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype) if isinstance(stride, collections.Iterable): strides = [1] + list(stride) + [1] @@ -81,27 +95,35 @@ class Conv3DTest(test.TestCase): expected): results = [] for data_format, use_gpu in GetTestConfigs(): - result = self._SetupValuesForDevice( - tensor_in_sizes, - filter_in_sizes, - stride, - padding, - data_format, - use_gpu=use_gpu) - results.append(result) - tolerance = 1e-2 if use_gpu else 1e-5 + for dtype in self._DtypesToTest(use_gpu): + result = self._SetupValuesForDevice( + tensor_in_sizes, + filter_in_sizes, + stride, + padding, + data_format, + dtype, + use_gpu=use_gpu) + results.append(result) + with self.test_session() as sess: values = sess.run(results) for value in values: print("expected = ", expected) print("actual = ", value) - self.assertAllClose(expected, value.flatten(), atol=tolerance, - rtol=1e-6) + tol = 1e-6 + if value.dtype == np.float16: + tol = 1e-3 + + self.assertAllClose(expected, value.flatten(), atol=tol, + rtol=tol) def testConv3D1x1x1Filter(self): expected_output = [ - 30.0, 36.0, 42.0, 66.0, 81.0, 96.0, 102.0, 126.0, 150.0, 138.0, 171.0, - 204.0, 174.0, 216.0, 258.0, 210.0, 261.0, 312.0 + 0.18518519, 0.22222222, 0.25925926, 0.40740741, 0.5 , + 0.59259259, 0.62962963, 0.77777778, 0.92592593, 0.85185185, + 1.05555556, 1.25925926, 1.07407407, 1.33333333, 1.59259259, + 1.2962963 , 1.61111111, 1.92592593 ] # These are equivalent to the Conv2D1x1 case. @@ -127,8 +149,10 @@ class Conv3DTest(test.TestCase): # Expected values computed using scipy's correlate function. def testConv3D2x2x2Filter(self): expected_output = [ - 19554., 19962., 20370., 22110., 22590., 23070., 34890., 35730., 36570., - 37446., 38358., 39270., 50226., 51498., 52770., 52782., 54126., 55470. + 3.77199074, 3.85069444, 3.92939815, 4.2650463 , 4.35763889, + 4.45023148, 6.73032407, 6.89236111, 7.05439815, 7.22337963, + 7.39930556, 7.57523148, 9.68865741, 9.93402778, 10.17939815, + 10.18171296, 10.44097222, 10.70023148 ] # expected_shape = [1, 3, 1, 2, 5] self._VerifyValues( @@ -140,69 +164,19 @@ class Conv3DTest(test.TestCase): def testConv3DStrides(self): expected_output = [ - 102., - 151., - 172., - 193., - 214., - 235., - 142., - 438., - 592., - 613., - 634., - 655., - 676., - 394., - 774., - 1033., - 1054., - 1075., - 1096., - 1117., - 646., - 1894., - 2503., - 2524., - 2545., - 2566., - 2587., - 1486., - 2230., - 2944., - 2965., - 2986., - 3007., - 3028., - 1738., - 2566., - 3385., - 3406., - 3427., - 3448., - 3469., - 1990., - 3686., - 4855., - 4876., - 4897., - 4918., - 4939., - 2830., - 4022., - 5296., - 5317., - 5338., - 5359., - 5380., - 3082., - 4358., - 5737., - 5758., - 5779., - 5800., - 5821., - 3334., + 0.06071429, 0.08988095, 0.10238095, 0.11488095, 0.12738095, + 0.13988095, 0.08452381, 0.26071429, 0.35238095, 0.36488095, + 0.37738095, 0.38988095, 0.40238095, 0.23452381, 0.46071429, + 0.61488095, 0.62738095, 0.63988095, 0.65238095, 0.66488095, + 0.38452381, 1.12738095, 1.48988095, 1.50238095, 1.51488095, + 1.52738095, 1.53988095, 0.88452381, 1.32738095, 1.75238095, + 1.76488095, 1.77738095, 1.78988095, 1.80238095, 1.03452381, + 1.52738095, 2.01488095, 2.02738095, 2.03988095, 2.05238095, + 2.06488095, 1.18452381, 2.19404762, 2.88988095, 2.90238095, + 2.91488095, 2.92738095, 2.93988095, 1.68452381, 2.39404762, + 3.15238095, 3.16488095, 3.17738095, 3.18988095, 3.20238095, + 1.83452381, 2.59404762, 3.41488095, 3.42738095, 3.43988095, + 3.45238095, 3.46488095, 1.98452381 ] self._VerifyValues( tensor_in_sizes=[1, 5, 8, 7, 1], @@ -212,7 +186,10 @@ class Conv3DTest(test.TestCase): expected=expected_output) def testConv3D2x2x2FilterStride2(self): - expected_output = [19554., 19962., 20370., 50226., 51498., 52770.] + expected_output = [ + 3.77199074, 3.85069444, 3.92939815, 9.68865741, 9.93402778, + 10.17939815 + ] self._VerifyValues( tensor_in_sizes=[1, 4, 2, 3, 3], filter_in_sizes=[2, 2, 2, 3, 3], @@ -222,11 +199,14 @@ class Conv3DTest(test.TestCase): def testConv3DStride3(self): expected_output = [ - 36564., 38022., 39480., 37824., 39354., 40884., 39084., 40686., 42288., - 46644., 48678., 50712., 47904., 50010., 52116., 49164., 51342., 53520., - 107124., 112614., 118104., 108384., 113946., 119508., 109644., 115278., - 120912., 117204., 123270., 129336., 118464., 124602., 130740., 119724., - 125934., 132144. + 1.51140873, 1.57167659, 1.63194444, 1.56349206, 1.62673611, + 1.68998016, 1.6155754 , 1.68179563, 1.74801587, 1.9280754 , + 2.01215278, 2.09623016, 1.98015873, 2.0672123 , 2.15426587, + 2.03224206, 2.12227183, 2.21230159, 4.4280754 , 4.65500992, + 4.88194444, 4.48015873, 4.71006944, 4.93998016, 4.53224206, + 4.76512897, 4.99801587, 4.84474206, 5.09548611, 5.34623016, + 4.8968254 , 5.15054563, 5.40426587, 4.94890873, 5.20560516, + 5.46230159 ] self._VerifyValues( tensor_in_sizes=[1, 6, 7, 8, 2], @@ -237,8 +217,9 @@ class Conv3DTest(test.TestCase): def testConv3D2x2x2FilterStride2Same(self): expected_output = [ - 19554., 19962., 20370., 10452., 10710., 10968., 50226., 51498., 52770., - 23844., 24534., 25224. + 3.77199074, 3.85069444, 3.92939815, 2.0162037 , 2.06597222, + 2.11574074, 9.68865741, 9.93402778, 10.17939815, 4.59953704, + 4.73263889, 4.86574074 ] self._VerifyValues( tensor_in_sizes=[1, 4, 2, 3, 3], @@ -248,7 +229,10 @@ class Conv3DTest(test.TestCase): expected=expected_output) def testKernelSmallerThanStride(self): - expected_output = [1., 3., 7., 9., 19., 21., 25., 27.] + expected_output = [ + 0.03703704, 0.11111111, 0.25925926, 0.33333333, 0.7037037 , + 0.77777778, 0.92592593, 1. + ] self._VerifyValues( tensor_in_sizes=[1, 3, 3, 3, 1], filter_in_sizes=[1, 1, 1, 1, 1], @@ -263,9 +247,12 @@ class Conv3DTest(test.TestCase): expected=expected_output) expected_output = [ - 1484., 1592., 770., 2240., 2348., 1106., 1149., 1191., 539., 6776., - 6884., 3122., 7532., 7640., 3458., 3207., 3249., 1421., 3005., 3035., - 1225., 3215., 3245., 1309., 1013., 1022., 343. + 0.54081633, 0.58017493, 0.28061224, 0.81632653, 0.85568513, + 0.40306122, 0.41873178, 0.4340379 , 0.19642857, 2.46938776, + 2.50874636, 1.1377551 , 2.74489796, 2.78425656, 1.26020408, + 1.16873178, 1.1840379 , 0.51785714, 1.09511662, 1.10604956, + 0.44642857, 1.17164723, 1.18258017, 0.47704082, 0.3691691 , + 0.37244898, 0.125 ] self._VerifyValues( tensor_in_sizes=[1, 7, 7, 7, 1], @@ -274,7 +261,10 @@ class Conv3DTest(test.TestCase): padding="SAME", expected=expected_output) - expected_output = [1484., 1592., 2240., 2348., 6776., 6884., 7532., 7640.] + expected_output = [ + 0.540816, 0.580175, 0.816327, 0.855685, 2.469388, 2.508746, + 2.744898, 2.784257 + ] self._VerifyValues( tensor_in_sizes=[1, 7, 7, 7, 1], filter_in_sizes=[2, 2, 2, 1, 1], @@ -288,7 +278,7 @@ class Conv3DTest(test.TestCase): filter_in_sizes=[2, 1, 2, 1, 2], stride=1, padding="VALID", - expected=[50, 60]) + expected=[1.5625, 1.875]) def _ConstructAndTestGradientForConfig( self, batch, input_shape, filter_shape, in_depth, out_depth, stride, @@ -328,50 +318,63 @@ class Conv3DTest(test.TestCase): input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] - if test.is_gpu_available() and use_gpu: - data_type = dtypes.float32 + + for data_type in self._DtypesToTest(use_gpu=use_gpu): # TODO(mjanusz): Modify gradient_checker to also provide max relative # error and synchronize the tolerance levels between the tests for forward # and backward computations. - if test.is_gpu_available(): + if data_type == dtypes.float64: + tolerance = 1e-8 + elif data_type == dtypes.float32: tolerance = 5e-3 - else: - # As of Aug 2016, higher tolerance is needed for some CPU architectures. - # Runs on a single machine can also generate slightly different errors - # because of multithreading. - tolerance = 8e-3 - else: - data_type = dtypes.float64 - tolerance = 1e-8 - with self.test_session(use_gpu=use_gpu): - orig_input_tensor = constant_op.constant( + elif data_type == dtypes.float16: + tolerance = 1e-3 + + + with self.test_session(use_gpu=use_gpu): + orig_input_tensor = constant_op.constant( input_data, shape=input_shape, dtype=data_type, name="input") - filter_tensor = constant_op.constant( + filter_tensor = constant_op.constant( filter_data, shape=filter_shape, dtype=data_type, name="filter") - if data_format == "NCDHW": - input_tensor = test_util.NHWCToNCHW(orig_input_tensor) - strides = test_util.NHWCToNCHW(strides) - else: - input_tensor = orig_input_tensor + if data_format == "NCDHW": + input_tensor = test_util.NHWCToNCHW(orig_input_tensor) + new_strides = test_util.NHWCToNCHW(strides) + else: + input_tensor = orig_input_tensor + new_strides = strides - conv = nn_ops.conv3d( - input_tensor, filter_tensor, strides, padding, + conv = nn_ops.conv3d( + input_tensor, filter_tensor, new_strides, padding, data_format=data_format, name="conv") - if data_format == "NCDHW": - conv = test_util.NCHWToNHWC(conv) + if data_format == "NCDHW": + conv = test_util.NCHWToNHWC(conv) + + + if test_input: + jacob_t, jacob_n = gradient_checker.compute_gradient(orig_input_tensor, + input_shape, + conv, + output_shape) + else: + jacob_t, jacob_n = gradient_checker.compute_gradient(filter_tensor, + filter_shape, + conv, + output_shape) + + + if data_type != dtypes.float16: + reference_jacob_t = jacob_t + err = np.fabs(jacob_t - jacob_n).max() + else: + # Compare fp16 theoretical gradients to fp32 theoretical gradients, + # since fp16 numerical gradients are too imprecise. + err = np.fabs(jacob_t - reference_jacob_t).max() + + print("conv3d gradient error = ", err) + self.assertLess(err, tolerance) - if test_input: - err = gradient_checker.compute_gradient_error(orig_input_tensor, - input_shape, - conv, output_shape) - else: - err = gradient_checker.compute_gradient_error(filter_tensor, - filter_shape, conv, - output_shape) - print("conv3d gradient error = ", err) - self.assertLess(err, tolerance) def ConstructAndTestGradient(self, **kwargs): for data_format, use_gpu in GetTestConfigs(): diff --git a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py index 3298092fbe..f7ae1a0f37 100644 --- a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py +++ b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py @@ -122,7 +122,9 @@ class DepthwiseConv2DTest(test.TestCase): x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu) as sess: - if data_type == dtypes.float32: + if data_type == dtypes.float16: + tolerance = 1e-5 + elif data_type == dtypes.float32: tolerance = 1e-5 else: self.assertEqual(data_type, dtypes.float64) @@ -169,7 +171,7 @@ class DepthwiseConv2DTest(test.TestCase): padding) in enumerate(ConfigsToTest()): print("Testing DepthwiseConv2D,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) - for data_type in [dtypes.float32, dtypes.float64]: + for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]: self._VerifyValues( input_size, filter_size, stride, padding, data_type, use_gpu=True) @@ -181,7 +183,7 @@ class DepthwiseConv2DTest(test.TestCase): padding) in enumerate(ConfigsToTest()): print("Testing DepthwiseConv2DFormat,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) - for data_type in [dtypes.float32, dtypes.float64]: + for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]: self._VerifyValues( input_size, filter_size, @@ -318,7 +320,9 @@ class DepthwiseConv2DTest(test.TestCase): input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] with self.test_session(use_gpu=use_gpu): - if data_type == dtypes.float32: + if data_type == dtypes.float16: + tolerance = 0.002 + elif data_type == dtypes.float32: tolerance = 0.002 else: self.assertEqual(data_type, dtypes.float64) @@ -369,6 +373,8 @@ class DepthwiseConv2DTest(test.TestCase): print("Testing DepthwiseConv2DInputGrad,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) + # Note: float16 test for DepthwiseConv2DInputGrad is not enabled, + # calculations are not very precise. for data_type in [dtypes.float32, dtypes.float64]: self._ConstructAndTestGradient( input_size, @@ -389,6 +395,8 @@ class DepthwiseConv2DTest(test.TestCase): print("Testing DepthwiseConv2DInputGradFormat,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) + # Note: float16 test for DepthwiseConv2DInputGradFormat is not enabled, + # calculations are not very precise. for data_type in [dtypes.float32, dtypes.float64]: self._ConstructAndTestGradient( input_size, @@ -407,6 +415,8 @@ class DepthwiseConv2DTest(test.TestCase): print("Testing DepthwiseConv2DFilterGrad,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) + # Note: float16 test for DepthwiseConv2DFilterGrad is not enabled, + # calculations are not very precise. for data_type in [dtypes.float32, dtypes.float64]: self._ConstructAndTestGradient( input_size, @@ -427,6 +437,8 @@ class DepthwiseConv2DTest(test.TestCase): print("Testing DepthwiseConv2DFilterGradFormat,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) + # Note: float16 test for DepthwiseConv2DFilterGradFormat is not enabled, + # calculations are not very precise. for data_type in [dtypes.float32, dtypes.float64]: self._ConstructAndTestGradient( input_size, diff --git a/tensorflow/python/kernel_tests/distributions/BUILD b/tensorflow/python/kernel_tests/distributions/BUILD index e21446c2ef..e220d05692 100644 --- a/tensorflow/python/kernel_tests/distributions/BUILD +++ b/tensorflow/python/kernel_tests/distributions/BUILD @@ -193,6 +193,7 @@ cuda_py_test( "//tensorflow/python:math_ops", "//tensorflow/python:platform_test", ], + tags = ["manual"], # b/69001419 ) cuda_py_test( diff --git a/tensorflow/python/kernel_tests/distributions/multinomial_test.py b/tensorflow/python/kernel_tests/distributions/multinomial_test.py index ebc89f15c5..d62aca151a 100644 --- a/tensorflow/python/kernel_tests/distributions/multinomial_test.py +++ b/tensorflow/python/kernel_tests/distributions/multinomial_test.py @@ -250,13 +250,11 @@ class MultinomialTest(test.TestCase): theta = np.array([[1., 2, 3], [2.5, 4, 0.01]], dtype=np.float32) theta /= np.sum(theta, 1)[..., array_ops.newaxis] - # Ideally we'd be able to test broadcasting but, the multinomial sampler - # doesn't support different total counts. - n = np.float32(5) + n = np.array([[10., 9.], [8., 7.], [6., 5.]], dtype=np.float32) with self.test_session() as sess: - # batch_shape=[2], event_shape=[3] + # batch_shape=[3, 2], event_shape=[3] dist = multinomial.Multinomial(n, theta) - x = dist.sample(int(250e3), seed=1) + x = dist.sample(int(1000e3), seed=1) sample_mean = math_ops.reduce_mean(x, 0) x_centered = x - sample_mean[array_ops.newaxis, ...] sample_cov = math_ops.reduce_mean(math_ops.matmul( @@ -283,17 +281,17 @@ class MultinomialTest(test.TestCase): dist.variance(), dist.stddev(), ]) - self.assertAllClose(sample_mean_, analytic_mean, atol=0.01, rtol=0.01) - self.assertAllClose(sample_cov_, analytic_cov, atol=0.01, rtol=0.01) - self.assertAllClose(sample_var_, analytic_var, atol=0.01, rtol=0.01) - self.assertAllClose(sample_stddev_, analytic_stddev, atol=0.01, rtol=0.01) + self.assertAllClose(sample_mean_, analytic_mean, atol=0., rtol=0.01) + self.assertAllClose(sample_cov_, analytic_cov, atol=0., rtol=0.01) + self.assertAllClose(sample_var_, analytic_var, atol=0., rtol=0.01) + self.assertAllClose(sample_stddev_, analytic_stddev, atol=0., rtol=0.01) def testSampleUnbiasedNonScalarBatch(self): with self.test_session() as sess: dist = multinomial.Multinomial( - total_count=5., + total_count=[7., 6., 5.], logits=math_ops.log(2. * self._rng.rand(4, 3, 2).astype(np.float32))) - n = int(3e3) + n = int(3e4) x = dist.sample(n, seed=0) sample_mean = math_ops.reduce_mean(x, 0) # Cyclically rotate event dims left. diff --git a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py index 4883095707..2460950aa9 100644 --- a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py +++ b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py @@ -33,8 +33,8 @@ from tensorflow.python.platform import test class DynamicPartitionTest(test.TestCase): def testSimpleOneDimensional(self): - with self.test_session() as sess: - data = constant_op.constant([0, 13, 2, 39, 4, 17]) + with self.test_session(use_gpu=True) as sess: + data = constant_op.constant([0, 13, 2, 39, 4, 17], dtype=dtypes.float32) indices = constant_op.constant([0, 0, 2, 3, 2, 1]) partitions = data_flow_ops.dynamic_partition( data, indices, num_partitions=4) @@ -52,9 +52,10 @@ class DynamicPartitionTest(test.TestCase): self.assertEqual([None], partitions[3].get_shape().as_list()) def testSimpleTwoDimensional(self): - with self.test_session() as sess: + with self.test_session(use_gpu=True) as sess: data = constant_op.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11], - [12, 13, 14], [15, 16, 17]]) + [12, 13, 14], [15, 16, 17]], + dtype=dtypes.float32) indices = constant_op.constant([0, 0, 2, 3, 2, 1]) partitions = data_flow_ops.dynamic_partition( data, indices, num_partitions=4) @@ -71,9 +72,61 @@ class DynamicPartitionTest(test.TestCase): self.assertEqual([None, 3], partitions[2].get_shape().as_list()) self.assertEqual([None, 3], partitions[3].get_shape().as_list()) + def testLargeOneDimensional(self): + num = 100000 + data_list = [x for x in range(num)] + indices_list = [x % 2 for x in range(num)] + part1 = [x for x in range(num) if x % 2 == 0] + part2 = [x for x in range(num) if x % 2 == 1] + with self.test_session(use_gpu=True) as sess: + data = constant_op.constant(data_list, dtype=dtypes.float32) + indices = constant_op.constant(indices_list, dtype=dtypes.int32) + partitions = data_flow_ops.dynamic_partition( + data, indices, num_partitions=2) + partition_vals = sess.run(partitions) + + self.assertAllEqual(part1, partition_vals[0]) + self.assertAllEqual(part2, partition_vals[1]) + + def testLargeTwoDimensional(self): + rows = 100000 + cols = 100 + data_list = [None] * rows + for i in range(rows): + data_list[i] = [i for _ in range(cols)] + num_partitions = 97 + indices_list = [(i ** 2) % num_partitions for i in range(rows)] + parts = [[] for _ in range(num_partitions)] + for i in range(rows): + parts[(i ** 2) % num_partitions].append(data_list[i]) + with self.test_session(use_gpu=True) as sess: + data = constant_op.constant(data_list, dtype=dtypes.float32) + indices = constant_op.constant(indices_list, dtype=dtypes.int32) + partitions = data_flow_ops.dynamic_partition( + data, indices, num_partitions=num_partitions) + partition_vals = sess.run(partitions) + + for i in range(num_partitions): + # reshape because of empty parts + parts_np = np.array(parts[i], dtype=np.float).reshape(-1, cols) + self.assertAllEqual(parts_np, partition_vals[i]) + + def testSimpleComplex(self): + data_list = [1 + 2j, 3 + 4j, 5 + 6j, 7 + 8j] + indices_list = [1, 0, 1, 0] + with self.test_session(use_gpu=True) as sess: + data = constant_op.constant(data_list, dtype=dtypes.complex64) + indices = constant_op.constant(indices_list, dtype=dtypes.int32) + partitions = data_flow_ops.dynamic_partition( + data, indices, num_partitions=2) + partition_vals = sess.run(partitions) + + self.assertAllEqual([3 + 4j, 7 + 8j], partition_vals[0]) + self.assertAllEqual([1 + 2j, 5 + 6j], partition_vals[1]) + def testHigherRank(self): np.random.seed(7) - with self.test_session() as sess: + with self.test_session(use_gpu=True) as sess: for n in 2, 3: for shape in (4,), (4, 5), (4, 5, 2): partitions = np.random.randint(n, size=np.prod(shape)).reshape(shape) @@ -95,6 +148,49 @@ class DynamicPartitionTest(test.TestCase): self.assertEqual(grads[1], None) # Partitions has no gradients self.assertAllEqual(7 * data, sess.run(grads[0])) + def testEmptyParts(self): + data_list = [1, 2, 3, 4] + indices_list = [1, 3, 1, 3] + with self.test_session(use_gpu=True) as sess: + data = constant_op.constant(data_list, dtype=dtypes.float32) + indices = constant_op.constant(indices_list, dtype=dtypes.int32) + partitions = data_flow_ops.dynamic_partition( + data, indices, num_partitions=4) + partition_vals = sess.run(partitions) + + self.assertAllEqual([], partition_vals[0]) + self.assertAllEqual([1, 3], partition_vals[1]) + self.assertAllEqual([], partition_vals[2]) + self.assertAllEqual([2, 4], partition_vals[3]) + + def testEmptyDataTwoDimensional(self): + data_list = [[], []] + indices_list = [0, 1] + with self.test_session(use_gpu=True) as sess: + data = constant_op.constant(data_list, dtype=dtypes.float32) + indices = constant_op.constant(indices_list, dtype=dtypes.int32) + partitions = data_flow_ops.dynamic_partition( + data, indices, num_partitions=3) + partition_vals = sess.run(partitions) + + self.assertAllEqual([[]], partition_vals[0]) + self.assertAllEqual([[]], partition_vals[1]) + self.assertAllEqual(np.array([], dtype=np.float).reshape(0, 0), + partition_vals[2]) + + def testEmptyPartitions(self): + data_list = [] + indices_list = [] + with self.test_session(use_gpu=True) as sess: + data = constant_op.constant(data_list, dtype=dtypes.float32) + indices = constant_op.constant(indices_list, dtype=dtypes.int32) + partitions = data_flow_ops.dynamic_partition( + data, indices, num_partitions=2) + partition_vals = sess.run(partitions) + + self.assertAllEqual([], partition_vals[0]) + self.assertAllEqual([], partition_vals[1]) + def testErrorIndexOutOfRange(self): with self.test_session() as sess: data = constant_op.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11], diff --git a/tensorflow/python/kernel_tests/gather_nd_op_test.py b/tensorflow/python/kernel_tests/gather_nd_op_test.py index 5109ed98c9..af5e23c926 100644 --- a/tensorflow/python/kernel_tests/gather_nd_op_test.py +++ b/tensorflow/python/kernel_tests/gather_nd_op_test.py @@ -25,7 +25,6 @@ import numpy as np from tensorflow.python.client import session from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import variables @@ -186,9 +185,6 @@ class GatherNdTest(test.TestCase): self.assertAllEqual(expected.reshape([10, 10, 20]), gather_nd_val) self.assertEqual([10, 10, 20], gather_nd_t.get_shape()) - def assertIndexedSlices(self, t): - self.assertIsInstance(t, ops.IndexedSlices) - def testUnknownIndices(self): params = constant_op.constant([[0, 1, 2]]) indices = array_ops.placeholder(dtypes.int32) @@ -237,8 +233,7 @@ class GatherNdTest(test.TestCase): grads = gradients_impl.gradients([outputs], [inputs], [grad_vals])[0] expected_grads = np.array([[3, 4], [1, 2]], dtype=np.float64) with self.test_session(use_gpu=True): - self.assertIndexedSlices(grads) - self.assertAllEqual(expected_grads, ops.convert_to_tensor(grads).eval()) + self.assertAllEqual(expected_grads, grads.eval()) def testGradientsRank3Elements(self): indices = constant_op.constant( @@ -289,8 +284,7 @@ class GatherNdTest(test.TestCase): [0, 0, 0, 0, 0, 0, 0, 0, 0], [3, 3, 3, 3, 3, 3, 3, 3, 3]], dtype=np.float64) with self.test_session(use_gpu=True): - self.assertIndexedSlices(grads) - self.assertAllEqual(expected_grads, ops.convert_to_tensor(grads).eval()) + self.assertAllEqual(expected_grads, grads.eval()) class GatherNdOpBenchmark(test.Benchmark): diff --git a/tensorflow/python/kernel_tests/iterator_ops_test.py b/tensorflow/python/kernel_tests/iterator_ops_test.py index 2128ef4ae1..60a44b5b14 100644 --- a/tensorflow/python/kernel_tests/iterator_ops_test.py +++ b/tensorflow/python/kernel_tests/iterator_ops_test.py @@ -17,14 +17,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os import numpy as np from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import iterator_ops -from tensorflow.python.data.ops import readers from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -33,9 +31,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import functional_ops -from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import gradients_impl -from tensorflow.python.ops import io_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import script_ops @@ -537,64 +533,6 @@ class IteratorTest(test.TestCase): target_placeholder: "/job:localhost/replica:0/task:0/cpu:0" }) - def testIncorrectIteratorRestore(self): - - def _path(): - return os.path.join(self.get_temp_dir(), "iterator") - - def _save_op(iterator_resource): - iterator_state_variant = gen_dataset_ops.serialize_iterator( - iterator_resource) - save_op = io_ops.write_file( - _path(), parsing_ops.serialize_tensor(iterator_state_variant)) - return save_op - - def _restore_op(iterator_resource): - iterator_state_variant = parsing_ops.parse_tensor( - io_ops.read_file(_path()), dtypes.variant) - restore_op = gen_dataset_ops.deserialize_iterator(iterator_resource, - iterator_state_variant) - return restore_op - - def _build_range_dataset_graph(): - start = 1 - stop = 10 - iterator = dataset_ops.Dataset.range(start, - stop).make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - save_op = _save_op(iterator._iterator_resource) - restore_op = _restore_op(iterator._iterator_resource) - return init_op, get_next, save_op, restore_op - - def _build_reader_dataset_graph(): - filenames = ["test"] # Does not exist but we don't care in this test. - iterator = readers.FixedLengthRecordDataset( - filenames, 1, 0, 0).make_initializable_iterator() - init_op = iterator.initializer - get_next_op = iterator.get_next() - save_op = _save_op(iterator._iterator_resource) - restore_op = _restore_op(iterator._iterator_resource) - return init_op, get_next_op, save_op, restore_op - - # Saving iterator for RangeDataset graph. - with ops.Graph().as_default() as g: - init_op, _, save_op, _ = _build_range_dataset_graph() - with self.test_session(graph=g) as sess: - sess.run(init_op) - sess.run(save_op) - - # Attempt to restore the saved iterator into an IteratorResource of - # incompatible type. An iterator of RangeDataset has output type int64, - # while an iterator of FixedLengthRecordDataset has output type string. - # So an InvalidArgumentError should be raised by - # IteratorResource::set_iterator. - with ops.Graph().as_default() as g: - _, _, _, restore_op = _build_reader_dataset_graph() - with self.test_session(graph=g) as sess: - with self.assertRaises(errors.InvalidArgumentError): - sess.run(restore_op) - if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index c699d50c02..988a72603f 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import numpy as np +import os from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -1341,11 +1342,33 @@ class PoolingTest(test.TestCase): return # Test the GPU implementation that uses cudnn for now. - # It does not propagate the diff in cases of NaNs + saved_nanprop = os.environ.get("TF_ENABLE_MAXPOOL_NANPROP") + # Do not propagate the diff in cases of NaNs + os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "0" expected_input_backprop_cudnn = [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] + + for v2 in [True, False]: + self._testMaxPoolGradDirect( + input_data, + output_backprop, + expected_input_backprop_cudnn, + input_sizes=[1, 4, 4, 1], + output_sizes=[1, 3, 3, 1], + window_rows=2, + window_cols=2, + row_stride=1, + col_stride=1, + padding="VALID", + use_gpu=True, + v2=v2) + + # Propagate the diff in cases of NaNs + os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "1" + expected_input_backprop_cudnn = expected_input_backprop_tf_cpu + for v2 in [True, False]: self._testMaxPoolGradDirect( input_data, @@ -1361,6 +1384,11 @@ class PoolingTest(test.TestCase): use_gpu=True, v2=v2) + if saved_nanprop: + os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = saved_nanprop + else: + del os.environ["TF_ENABLE_MAXPOOL_NANPROP"] + def _testMaxPoolGradDirectWithNans2_2(self): input_data = [float("nan")] * 16 output_backprop = [ @@ -1391,11 +1419,14 @@ class PoolingTest(test.TestCase): return # Test the GPU implementation that uses cudnn for now. - # It does not propagate the diff in cases of NaNs + saved_nanprop = os.environ.get("TF_ENABLE_MAXPOOL_NANPROP") + # Do not propagate the diff in cases of NaNs + os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "0" expected_input_backprop_cudnn = [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] + for v2 in [True, False]: self._testMaxPoolGradDirect( input_data, @@ -1411,6 +1442,31 @@ class PoolingTest(test.TestCase): use_gpu=True, v2=v2) + + # Propagate the diff in cases of NaNs + os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "1" + expected_input_backprop_cudnn = expected_input_backprop_tf_cpu + + for v2 in [True, False]: + self._testMaxPoolGradDirect( + input_data, + output_backprop, + expected_input_backprop_cudnn, + input_sizes=[1, 4, 4, 1], + output_sizes=[1, 3, 3, 1], + window_rows=2, + window_cols=2, + row_stride=1, + col_stride=1, + padding="VALID", + use_gpu=True, + v2=v2) + + if saved_nanprop: + os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = saved_nanprop + else: + del os.environ["TF_ENABLE_MAXPOOL_NANPROP"] + def testMaxPoolGradDirect(self): self._testMaxPoolGradDirect1_1() self._testMaxPoolGradDirect1_2() diff --git a/tensorflow/python/kernel_tests/range_dataset_op_test.py b/tensorflow/python/kernel_tests/range_dataset_op_test.py index 0c530522b8..3c1685c951 100644 --- a/tensorflow/python/kernel_tests/range_dataset_op_test.py +++ b/tensorflow/python/kernel_tests/range_dataset_op_test.py @@ -17,32 +17,15 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os - from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.data.ops import iterator_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_dataset_ops -from tensorflow.python.ops import io_ops -from tensorflow.python.ops import parsing_ops -from tensorflow.python.ops import variables -from tensorflow.python.platform import gfile from tensorflow.python.platform import test class RangeDatasetTest(test.TestCase): - def tearDown(self): - # Remove all checkpoint files. - prefix = self._iterator_checkpoint_prefix() - pattern = prefix + "*" - files = gfile.Glob(pattern) - map(gfile.Remove, files) - def testStop(self): stop = array_ops.placeholder(dtypes.int64, shape=[]) iterator = dataset_ops.Dataset.range(stop).make_initializable_iterator() @@ -168,319 +151,6 @@ class RangeDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) - def _iterator_checkpoint_prefix(self): - return os.path.join(self.get_temp_dir(), "iterator") - - def _save_op(self, iterator_resource): - iterator_state_variant = gen_dataset_ops.serialize_iterator( - iterator_resource) - save_op = io_ops.write_file( - self._iterator_checkpoint_prefix(), - parsing_ops.serialize_tensor(iterator_state_variant)) - return save_op - - def _restore_op(self, iterator_resource): - iterator_state_variant = parsing_ops.parse_tensor( - io_ops.read_file(self._iterator_checkpoint_prefix()), dtypes.variant) - restore_op = gen_dataset_ops.deserialize_iterator(iterator_resource, - iterator_state_variant) - return restore_op - - def testSaveRestore(self): - - def _build_graph(start, stop): - iterator = dataset_ops.Dataset.range(start, - stop).make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - save_op = self._save_op(iterator._iterator_resource) - restore_op = self._restore_op(iterator._iterator_resource) - return init_op, get_next, save_op, restore_op - - # Saving and restoring in different sessions. - start = 2 - stop = 10 - break_point = 5 - with ops.Graph().as_default() as g: - init_op, get_next, save_op, _ = _build_graph(start, stop) - with self.test_session(graph=g) as sess: - sess.run(variables.global_variables_initializer()) - sess.run(init_op) - for i in range(start, break_point): - self.assertEqual(i, sess.run(get_next)) - sess.run(save_op) - - with ops.Graph().as_default() as g: - init_op, get_next, _, restore_op = _build_graph(start, stop) - with self.test_session(graph=g) as sess: - sess.run(init_op) - sess.run(restore_op) - for i in range(break_point, stop): - self.assertEqual(i, sess.run(get_next)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - # Saving and restoring in same session. - with ops.Graph().as_default() as g: - init_op, get_next, save_op, restore_op = _build_graph(start, stop) - with self.test_session(graph=g) as sess: - sess.run(variables.global_variables_initializer()) - sess.run(init_op) - for i in range(start, break_point): - self.assertEqual(i, sess.run(get_next)) - sess.run(save_op) - sess.run(restore_op) - for i in range(break_point, stop): - self.assertEqual(i, sess.run(get_next)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testRestoreWithoutBuildingDatasetGraph(self): - - def _build_graph(start, stop, num_epochs): - dataset = dataset_ops.Dataset.range(start, stop).repeat(num_epochs) - iterator = dataset.make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - save_op = self._save_op(iterator._iterator_resource) - restore_op = self._restore_op(iterator._iterator_resource) - return init_op, get_next, save_op, restore_op - - # Saving and restoring in different sessions. - start = 2 - stop = 10 - num_epochs = 5 - break_point = 5 - break_epoch = 3 - with ops.Graph().as_default() as g: - init_op, get_next, save_op, _ = _build_graph(start, stop, num_epochs) - with self.test_session(graph=g) as sess: - sess.run(variables.global_variables_initializer()) - sess.run(init_op) - for _ in range(break_epoch): - for i in range(start, stop): - self.assertEqual(i, sess.run(get_next)) - for i in range(start, break_point): - self.assertEqual(i, sess.run(get_next)) - sess.run(save_op) - - with ops.Graph().as_default() as g: - # Create an empty IteratorResource and restore the Iterator into it. - output_types = dtypes.int64 - output_shapes = tensor_shape.scalar() - iterator = iterator_ops.Iterator.from_structure(output_types, - output_shapes) - restore_op = self._restore_op(iterator._iterator_resource) - get_next = iterator.get_next() - with self.test_session(graph=g) as sess: - sess.run(restore_op) - for i in range(break_point, stop): - self.assertEqual(i, sess.run(get_next)) - for _ in range(break_epoch + 1, num_epochs): - for i in range(start, stop): - self.assertEqual(i, sess.run(get_next)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testRestoreInModifiedGraph(self): - - def _build_graph(start, stop): - dataset = dataset_ops.Dataset.range(start, stop) - iterator = dataset.make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - save_op = self._save_op(iterator._iterator_resource) - restore_op = self._restore_op(iterator._iterator_resource) - return init_op, get_next, save_op, restore_op - - # Saving and restoring in different sessions. - start = 2 - stop = 10 - stop_1 = 8 - break_point = 5 - with ops.Graph().as_default() as g: - init_op, get_next, save_op, _ = _build_graph(start, stop) - with self.test_session(graph=g) as sess: - sess.run(variables.global_variables_initializer()) - sess.run(init_op) - for i in range(start, break_point): - self.assertEqual(i, sess.run(get_next)) - sess.run(save_op) - - with ops.Graph().as_default() as g: - # Intentionally build a graph with a different value for stop to make sure - # the original dataset graph is actually getting loaded. - init_op, get_next, _, restore_op = _build_graph(start, stop_1) - with self.test_session(graph=g) as sess: - sess.run(restore_op) - for i in range(break_point, stop): - self.assertEqual(i, sess.run(get_next)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testInitThenRestore(self): - # Note: Calling init_op before restore_op is redundant. This test just makes - # sure we do not fail if restore is called on an already initialized - # iterator resource. - - def _build_graph(start, stop): - dataset = dataset_ops.Dataset.range(start, stop) - iterator = dataset.make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - save_op = self._save_op(iterator._iterator_resource) - restore_op = self._restore_op(iterator._iterator_resource) - return init_op, get_next, save_op, restore_op - - # Saving and restoring in different sessions. - start = 2 - stop = 10 - break_point = 5 - with ops.Graph().as_default() as g: - init_op, get_next, save_op, _ = _build_graph(start, stop) - with self.test_session(graph=g) as sess: - sess.run(variables.global_variables_initializer()) - sess.run(init_op) - for i in range(start, break_point): - self.assertEqual(i, sess.run(get_next)) - sess.run(save_op) - - with ops.Graph().as_default() as g: - init_op, get_next, _, restore_op = _build_graph(start, stop) - with self.test_session(graph=g) as sess: - sess.run(init_op) - sess.run(restore_op) - for i in range(break_point, stop): - self.assertEqual(i, sess.run(get_next)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testMultipleSaves(self): - - def _build_graph(start, stop): - iterator = dataset_ops.Dataset.range(start, - stop).make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - save_op = self._save_op(iterator._iterator_resource) - restore_op = self._restore_op(iterator._iterator_resource) - return init_op, get_next, save_op, restore_op - - start = 2 - stop = 10 - break_point1 = 5 - break_point2 = 7 - - with ops.Graph().as_default() as g: - init_op, get_next, save_op, _ = _build_graph(start, stop) - with self.test_session(graph=g) as sess: - sess.run(variables.global_variables_initializer()) - sess.run(init_op) - for i in range(start, break_point1): - self.assertEqual(i, sess.run(get_next)) - sess.run(save_op) - - with ops.Graph().as_default() as g: - init_op, get_next, save_op, restore_op = _build_graph(start, stop) - with self.test_session(graph=g) as sess: - sess.run(restore_op) - for i in range(break_point1, break_point2): - self.assertEqual(i, sess.run(get_next)) - sess.run(save_op) - - break_point2 = 7 - with ops.Graph().as_default() as g: - init_op, get_next, save_op, restore_op = _build_graph(start, stop) - with self.test_session(graph=g) as sess: - sess.run(restore_op) - for i in range(break_point2, stop): - self.assertEqual(i, sess.run(get_next)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testSaveRestoreWithRepeat(self): - - def _build_graph(start, stop, num_epochs): - iterator = dataset_ops.Dataset.range( - start, stop).repeat(num_epochs).make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - save_op = self._save_op(iterator._iterator_resource) - restore_op = self._restore_op(iterator._iterator_resource) - return init_op, get_next, save_op, restore_op - - start = 2 - stop = 10 - num_epochs = 5 - break_range = 5 - break_epoch = 3 - with ops.Graph().as_default() as g: - init_op, get_next, save_op, restore_op = _build_graph( - start, stop, num_epochs) - with self.test_session(graph=g) as sess: - sess.run(variables.global_variables_initializer()) - sess.run(init_op) - # Note: There is no checkpoint saved currently so a NotFoundError is - # raised. - with self.assertRaises(errors.NotFoundError): - sess.run(restore_op) - for _ in range(break_epoch - 1): - for i in range(start, stop): - self.assertEqual(i, sess.run(get_next)) - for i in range(start, break_range): - self.assertEqual(i, sess.run(get_next)) - sess.run(save_op) - - with ops.Graph().as_default() as g: - init_op, get_next, _, restore_op = _build_graph(start, stop, num_epochs) - with self.test_session(graph=g) as sess: - sess.run(restore_op) - for i in range(break_range, stop): - self.assertEqual(i, sess.run(get_next)) - for _ in range(break_epoch, num_epochs): - for i in range(start, stop): - self.assertEqual(i, sess.run(get_next)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testSaveRestoreExhaustedIterator(self): - - def _build_graph(start, stop, num_epochs): - iterator = dataset_ops.Dataset.range( - start, stop).repeat(num_epochs).make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - save_op = self._save_op(iterator._iterator_resource) - restore_op = self._restore_op(iterator._iterator_resource) - return init_op, get_next, save_op, restore_op - - start = 2 - stop = 10 - num_epochs = 5 - with ops.Graph().as_default() as g: - init_op, get_next, save_op, restore_op = _build_graph( - start, stop, num_epochs) - with self.test_session(graph=g) as sess: - sess.run(variables.global_variables_initializer()) - sess.run(init_op) - # Note: There is no checkpoint saved currently so a NotFoundError is - # raised. - with self.assertRaises(errors.NotFoundError): - sess.run(restore_op) - for _ in range(num_epochs): - for i in range(start, stop): - self.assertEqual(i, sess.run(get_next)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - sess.run(save_op) - - with ops.Graph().as_default() as g: - init_op, get_next, _, restore_op = _build_graph(start, stop, num_epochs) - with self.test_session(graph=g) as sess: - sess.run(restore_op) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/reader_dataset_ops_test.py b/tensorflow/python/kernel_tests/reader_dataset_ops_test.py index c8e7333b4b..70b6ce442e 100644 --- a/tensorflow/python/kernel_tests/reader_dataset_ops_test.py +++ b/tensorflow/python/kernel_tests/reader_dataset_ops_test.py @@ -26,13 +26,8 @@ from tensorflow.python.data.ops import readers from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape from tensorflow.python.lib.io import python_io from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_dataset_ops -from tensorflow.python.ops import io_ops -from tensorflow.python.ops import parsing_ops from tensorflow.python.platform import test from tensorflow.python.util import compat @@ -272,299 +267,6 @@ class FixedLengthRecordReaderTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(iterator.get_next()) - def _iterator_checkpoint_path(self): - return os.path.join(self.get_temp_dir(), "iterator") - - def _save_op(self, iterator_resource): - iterator_state_variant = gen_dataset_ops.serialize_iterator( - iterator_resource) - save_op = io_ops.write_file( - self._iterator_checkpoint_path(), - parsing_ops.serialize_tensor(iterator_state_variant)) - return save_op - - def _restore_op(self, iterator_resource): - iterator_state_variant = parsing_ops.parse_tensor( - io_ops.read_file(self._iterator_checkpoint_path()), dtypes.variant) - restore_op = gen_dataset_ops.deserialize_iterator(iterator_resource, - iterator_state_variant) - return restore_op - - def _build_iterator_graph(self, num_epochs): - filenames = self._createFiles() - dataset = (readers.FixedLengthRecordDataset( - filenames, self._record_bytes, self._header_bytes, self._footer_bytes) - .repeat(num_epochs)) - iterator = dataset.make_initializable_iterator() - init_op = iterator.initializer - get_next_op = iterator.get_next() - save_op = self._save_op(iterator._iterator_resource) - restore_op = self._restore_op(iterator._iterator_resource) - return init_op, get_next_op, save_op, restore_op - - def _restore_iterator(self): - output_types = dtypes.string - output_shapes = tensor_shape.scalar() - iterator = iterator_ops.Iterator.from_structure(output_types, output_shapes) - get_next = iterator.get_next() - restore_op = self._restore_op(iterator._iterator_resource) - return restore_op, get_next - - def testSaveRestore(self): - num_epochs = 10 - epoch_break = 5 - file_break = self._num_files // 2 - record_break = self._num_records // 2 - - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs) - with self.test_session(graph=g) as sess: - sess.run(init_op) - # Note: There is no checkpoint saved currently so a NotFoundError is - # raised. - with self.assertRaises(errors.NotFoundError): - sess.run(restore_op) - for epoch in range(num_epochs): - for f in range(self._num_files): - for r in range(self._num_records): - if (epoch == epoch_break and f == file_break and - r == record_break): - sess.run(save_op) - break - self.assertEqual(self._record(f, r), sess.run(get_next_op)) - else: - continue - break - else: - continue - break - else: - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs) - with self.test_session(graph=g) as sess: - sess.run(restore_op) - for epoch in range(num_epochs): - for f in range(self._num_files): - for r in range(self._num_records): - if (epoch < epoch_break or - (epoch == epoch_break and f < file_break) or - (epoch == epoch_break and f == file_break and - r < record_break)): - continue - self.assertEqual(self._record(f, r), sess.run(get_next_op)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - - def testInitThenRestore(self): - # Note: Calling init_op before restore_op is redundant. This test just makes - # sure we do not fail if restore is called on an already initialized - # iterator resource. - num_epochs = 10 - epoch_break = 5 - file_break = self._num_files // 2 - record_break = self._num_records // 2 - - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs) - with self.test_session(graph=g) as sess: - sess.run(init_op) - # Note: There is no checkpoint saved currently so a NotFoundError is - # raised. - with self.assertRaises(errors.NotFoundError): - sess.run(restore_op) - for epoch in range(num_epochs): - for f in range(self._num_files): - for r in range(self._num_records): - if (epoch == epoch_break and f == file_break and - r == record_break): - sess.run(save_op) - break - self.assertEqual(self._record(f, r), sess.run(get_next_op)) - else: - continue - break - else: - continue - break - else: - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs) - with self.test_session(graph=g) as sess: - sess.run(init_op) - sess.run(restore_op) - for epoch in range(num_epochs): - for f in range(self._num_files): - for r in range(self._num_records): - if (epoch < epoch_break or - (epoch == epoch_break and f < file_break) or - (epoch == epoch_break and f == file_break and - r < record_break)): - continue - self.assertEqual(self._record(f, r), sess.run(get_next_op)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - - def testRestoreInModifiedGraph(self): - num_epochs = 10 - num_epochs_1 = 20 - epoch_break = 5 - file_break = self._num_files // 2 - record_break = self._num_records // 2 - - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs) - with self.test_session(graph=g) as sess: - sess.run(init_op) - # Note: There is no checkpoint saved currently so a NotFoundError is - # raised. - with self.assertRaises(errors.NotFoundError): - sess.run(restore_op) - for epoch in range(num_epochs): - for f in range(self._num_files): - for r in range(self._num_records): - if (epoch == epoch_break and f == file_break and - r == record_break): - sess.run(save_op) - break - self.assertEqual(self._record(f, r), sess.run(get_next_op)) - else: - continue - break - else: - continue - break - else: - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs_1) - with self.test_session(graph=g) as sess: - sess.run(restore_op) - for epoch in range(num_epochs): - for f in range(self._num_files): - for r in range(self._num_records): - if (epoch < epoch_break or - (epoch == epoch_break and f < file_break) or - (epoch == epoch_break and f == file_break and - r < record_break)): - continue - self.assertEqual(self._record(f, r), sess.run(get_next_op)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - - def testRestoreWithoutBuildingDatasetGraph(self): - num_epochs = 10 - epoch_break = 5 - file_break = self._num_files // 2 - record_break = self._num_records // 2 - - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs) - with self.test_session(graph=g) as sess: - sess.run(init_op) - # Note: There is no checkpoint saved currently so a NotFoundError is - # raised. - with self.assertRaises(errors.NotFoundError): - sess.run(restore_op) - for epoch in range(num_epochs): - for f in range(self._num_files): - for r in range(self._num_records): - if (epoch == epoch_break and f == file_break and - r == record_break): - sess.run(save_op) - break - self.assertEqual(self._record(f, r), sess.run(get_next_op)) - else: - continue - break - else: - continue - break - else: - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - - with ops.Graph().as_default() as g: - restore_op, get_next_op = self._restore_iterator() - with self.test_session(graph=g) as sess: - sess.run(restore_op) - for epoch in range(num_epochs): - for f in range(self._num_files): - for r in range(self._num_records): - if (epoch < epoch_break or - (epoch == epoch_break and f < file_break) or - (epoch == epoch_break and f == file_break and - r < record_break)): - continue - self.assertEqual(self._record(f, r), sess.run(get_next_op)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - - def testRestoreUnusedIterator(self): - num_epochs = 10 - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs) - with self.test_session(graph=g) as sess: - sess.run(init_op) - # Note: There is no checkpoint saved currently so a NotFoundError is - # raised. - with self.assertRaises(errors.NotFoundError): - sess.run(restore_op) - # Save unused iterator. - sess.run(save_op) - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs) - with self.test_session(graph=g) as sess: - sess.run(restore_op) - for _ in range(num_epochs * self._num_files * self._num_records): - sess.run(get_next_op) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - - def testRestoreExhaustedIterator(self): - num_epochs = 10 - - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs) - with self.test_session(graph=g) as sess: - sess.run(init_op) - # Note: There is no checkpoint saved currently so a NotFoundError is - # raised. - with self.assertRaises(errors.NotFoundError): - sess.run(restore_op) - for _ in range(num_epochs): - for f in range(self._num_files): - for r in range(self._num_records): - self.assertEqual(self._record(f, r), sess.run(get_next_op)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - sess.run(save_op) - - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs) - with self.test_session(graph=g) as sess: - sess.run(restore_op) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - class TFRecordDatasetTest(test.TestCase): diff --git a/tensorflow/python/kernel_tests/reader_ops_test.py b/tensorflow/python/kernel_tests/reader_ops_test.py index 5630259b7b..8e54d10f32 100644 --- a/tensorflow/python/kernel_tests/reader_ops_test.py +++ b/tensorflow/python/kernel_tests/reader_ops_test.py @@ -35,6 +35,9 @@ from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import io_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test +from tensorflow.python.training import coordinator +from tensorflow.python.training import input as input_lib +from tensorflow.python.training import queue_runner_impl from tensorflow.python.util import compat prefix_path = "tensorflow/core/lib" @@ -1011,6 +1014,25 @@ class LMDBReaderTest(test.TestCase): "\\(requested 1, current size 0\\)"): k, v = sess.run([key, value]) + def testReadFromSameFile(self): + with self.test_session() as sess: + reader1 = io_ops.LMDBReader(name="test_read_from_same_file1") + reader2 = io_ops.LMDBReader(name="test_read_from_same_file2") + filename_queue = input_lib.string_input_producer([self.db_path], + num_epochs=None) + key1, value1 = reader1.read(filename_queue) + key2, value2 = reader2.read(filename_queue) + + coord = coordinator.Coordinator() + threads = queue_runner_impl.start_queue_runners(sess, coord=coord) + for i in range(3): + for j in range(10): + k1, v1, k2, v2 = sess.run([key1, value1, key2, value2]) + self.assertAllEqual(compat.as_bytes(k1), compat.as_bytes(k2)) + self.assertAllEqual(compat.as_bytes(v1), compat.as_bytes(v2)) + coord.request_stop() + coord.join(threads) + def testReadFromFolder(self): with self.test_session() as sess: reader = io_ops.LMDBReader(name="test_read_from_folder") @@ -1029,6 +1051,25 @@ class LMDBReaderTest(test.TestCase): "\\(requested 1, current size 0\\)"): k, v = sess.run([key, value]) + def testReadFromFileRepeatedly(self): + with self.test_session() as sess: + reader = io_ops.LMDBReader(name="test_read_from_file_repeated") + filename_queue = input_lib.string_input_producer([self.db_path], + num_epochs=None) + key, value = reader.read(filename_queue) + + coord = coordinator.Coordinator() + threads = queue_runner_impl.start_queue_runners(sess, coord=coord) + # Iterate over the lmdb 3 times. + for i in range(3): + # Go over all 10 records each time. + for j in range(10): + k, v = sess.run([key, value]) + self.assertAllEqual(compat.as_bytes(k), compat.as_bytes(str(j))) + self.assertAllEqual( + compat.as_bytes(v), compat.as_bytes(str(chr(ord("a") + j)))) + coord.request_stop() + coord.join(threads) if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index 516a9d000e..3a02f24902 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -323,8 +323,9 @@ class UnsortedSegmentSumTest(SegmentReductionHelper): def testBadIndices(self): # Note: GPU kernel does not return the out-of-range error needed for this # test, so this test is marked as cpu-only. + # Note: With PR #13055 a negative index will be ignored silently. with self.test_session(use_gpu=False): - for bad in [[-1]], [[7]]: + for bad in [[2]], [[7]]: unsorted = math_ops.unsorted_segment_sum([[17]], bad, num_segments=2) with self.assertRaisesOpError( r"segment_ids\[0,0\] = %d is out of range \[0, 2\)" % bad[0][0]): @@ -360,6 +361,32 @@ class UnsortedSegmentSumTest(SegmentReductionHelper): x_init_value=np_x.astype(np.double), delta=1) self.assertAllClose(jacob_t, jacob_n) + def testDropNegatives(self): + # Note: the test is done by replacing segment_ids with 8 to -1 + # for index and replace values generated by numpy with 0. + dtypes = [ + dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int64, + dtypes_lib.int32, dtypes_lib.complex64, dtypes_lib.complex128 + ] + indices_flat = np.array([0, 4, 0, 8, 3, 8, 4, 7, 7, 3]) + num_segments = 12 + for indices in indices_flat, indices_flat.reshape(5, 2): + shape = indices.shape + (2,) + for dtype in dtypes: + with self.test_session(use_gpu=True): + tf_x, np_x = self._input(shape, dtype=dtype) + np_ans = self._segmentReduce( + indices, np_x, np.add, op2=None, num_out_rows=num_segments) + # Replace np_ans[8] with 0 for the value + np_ans[8:] = 0 + # Replace 8 with -1 in indices + np.place(indices, indices==8, [-1]) + s = math_ops.unsorted_segment_sum( + data=tf_x, segment_ids=indices, num_segments=num_segments) + tf_ans = s.eval() + self.assertAllClose(np_ans, tf_ans) + self.assertShapeEqual(np_ans, s) + class SparseSegmentReductionHelper(SegmentReductionHelper): diff --git a/tensorflow/python/kernel_tests/shape_ops_test.py b/tensorflow/python/kernel_tests/shape_ops_test.py index a9fc699b21..7368251ab6 100644 --- a/tensorflow/python/kernel_tests/shape_ops_test.py +++ b/tensorflow/python/kernel_tests/shape_ops_test.py @@ -258,6 +258,16 @@ class ShapeOpsTest(test.TestCase): self.assertAllEqual([True], array_ops.expand_dims(inp, 0).eval()) self.assertAllEqual([True], array_ops.expand_dims(inp, -1).eval()) + def testExpandDimsDimType(self): + for dtype in [dtypes.int32, dtypes.int64]: + x = np.zeros([2]) + np_ans = np.expand_dims(x, axis=0) + with self.test_session(use_gpu=True): + tensor = array_ops.expand_dims(x, constant_op.constant(0, dtype)) + tf_ans = tensor.eval() + self.assertShapeEqual(np_ans, tensor) + self.assertAllEqual(np_ans, tf_ans) + def _compareSqueeze(self, x, squeeze_dims, use_gpu): with self.test_session(use_gpu=use_gpu): if squeeze_dims: diff --git a/tensorflow/python/kernel_tests/slice_op_test.py b/tensorflow/python/kernel_tests/slice_op_test.py index 051a25080b..6cdc7872f9 100644 --- a/tensorflow/python/kernel_tests/slice_op_test.py +++ b/tensorflow/python/kernel_tests/slice_op_test.py @@ -217,6 +217,30 @@ class SliceTest(test.TestCase): self.assertEqual(expected_val.shape, slice_t.get_shape()) self.assertEqual(expected_val.shape, slice2_t.get_shape()) + def testRandomHighRank(self): + # Random dims of rank 8 + input_shape = np.random.randint(0, 20, size=8) + inp = np.random.rand(*input_shape).astype("f") + with self.test_session(use_gpu=True) as sess: + a = constant_op.constant( + [float(x) for x in inp.ravel(order="C")], + shape=input_shape, + dtype=dtypes.float32) + indices = [0 if x == 0 else np.random.randint(x) for x in input_shape] + sizes = [ + np.random.randint(0, input_shape[i] - indices[i] + 1) + for i in range(8) + ] + slice_t = array_ops.slice(a, indices, sizes) + slice_val = sess.run(slice_t) + + expected_val = inp[indices[0]:indices[0] + sizes[0], indices[1]:indices[1] + sizes[ + 1], indices[2]:indices[2] + sizes[2], indices[3]:indices[3] + sizes[3], indices[ + 4]:indices[4] + sizes[4], indices[5]:indices[5] + sizes[5], indices[6]:indices[ + 6] + sizes[6], indices[7]:indices[7] + sizes[7]] + self.assertAllEqual(slice_val, expected_val) + self.assertEqual(expected_val.shape, slice_t.get_shape()) + def testPartialShapeInference(self): z = array_ops.zeros((1, 2, 3)) self.assertAllEqual(z.get_shape().as_list(), [1, 2, 3]) @@ -227,7 +251,6 @@ class SliceTest(test.TestCase): m2 = array_ops.slice(z, [0, 0, 0], [constant_op.constant(1) + 0, 2, -1]) self.assertAllEqual(m2.get_shape().as_list(), [None, 2, None]) - def _testGradientSlice(self, input_shape, slice_begin, slice_size): with self.test_session(use_gpu=True): num_inputs = np.prod(input_shape) diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py index a50f53b3cd..04758ce45a 100644 --- a/tensorflow/python/kernel_tests/unique_op_test.py +++ b/tensorflow/python/kernel_tests/unique_op_test.py @@ -22,6 +22,7 @@ import numpy as np from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_array_ops from tensorflow.python.platform import test @@ -61,6 +62,31 @@ class UniqueTest(test.TestCase): for i in range(len(x)): self.assertEqual(x[i], tf_y[tf_idx[i]].decode('ascii')) + def testInt32Axis(self): + x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]]) + with self.test_session() as sess: + y0, idx0 = gen_array_ops.unique_v2(x, axis=[0]) + tf_y0, tf_idx0 = sess.run([y0, idx0]) + y1, idx1 = gen_array_ops.unique_v2(x, axis=[1]) + tf_y1, tf_idx1 = sess.run([y1, idx1]) + self.assertAllEqual(tf_y0, np.array([[1, 0, 0], [2, 0, 0]])) + self.assertAllEqual(tf_idx0, np.array([0, 0, 1])) + self.assertAllEqual(tf_y1, np.array([[1, 0], [1, 0], [2, 0]])) + self.assertAllEqual(tf_idx1, np.array([0, 1, 1])) + + def testInt32V2(self): + # This test is only temporary, once V2 is used + # by default, the axis will be wrapped to allow `axis=None`. + x = np.random.randint(2, high=10, size=7000) + with self.test_session() as sess: + y, idx = gen_array_ops.unique_v2(x, axis=[]) + tf_y, tf_idx = sess.run([y, idx]) + + self.assertEqual(len(x), len(tf_idx)) + self.assertEqual(len(tf_y), len(np.unique(x))) + for i in range(len(x)): + self.assertEqual(x[i], tf_y[tf_idx[i]]) + class UniqueWithCountsTest(test.TestCase): def testInt32(self): diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py index 5396214956..bd4b12b7e8 100644 --- a/tensorflow/python/kernel_tests/variable_scope_test.py +++ b/tensorflow/python/kernel_tests/variable_scope_test.py @@ -117,18 +117,6 @@ class VariableScopeTest(test.TestCase): w = variable_scope.get_variable("w", []) self.assertEqual(w.dtype.base_dtype, dtypes.float16) - def testEagerVaribleStore(self): - with context.eager_mode(): - store = variable_scope.EagerVariableStore() - with store.as_default(): - v = variable_scope.get_variable("v", shape=(), trainable=True) - w = variable_scope.get_variable("w", shape=(), trainable=False) - - self.assertTrue(v in store.variables()) - self.assertTrue(w in store.variables()) - self.assertTrue(v in store.trainable_variables()) - self.assertFalse(w in store.trainable_variables()) - @test_util.run_in_graph_and_eager_modes() def testInitFromNonTensorValue(self): v = variable_scope.get_variable("v4", initializer=4, dtype=dtypes.int32) diff --git a/tensorflow/python/kernel_tests/xent_op_test.py b/tensorflow/python/kernel_tests/xent_op_test.py index 43be08f8a1..4b3dadc112 100644 --- a/tensorflow/python/kernel_tests/xent_op_test.py +++ b/tensorflow/python/kernel_tests/xent_op_test.py @@ -181,24 +181,6 @@ class XentTest(test.TestCase): print("cross entropy gradient err = ", err) self.assertLess(err, 5e-8) - def testGradientLabelWithV2(self): - with self.test_session(): - l = constant_op.constant( - [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.5], - shape=[3, 4], - dtype=dtypes.float64, - name="l") - f = constant_op.constant( - [0.1, 0.2, 0.3, 0.4, 0.1, 0.4, 0.9, 1.6, 0.1, 0.8, 2.7, 6.4], - shape=[3, 4], - dtype=dtypes.float64, - name="f") - x = nn_ops.softmax_cross_entropy_with_logits_v2(labels=l, logits=f, - name="xent") - err = gradient_checker.compute_gradient_error(l, [3, 4], x, [3]) - - self.assertLess(err, 5e-8) - def testSecondGradient(self): with self.test_session() as sess: l = constant_op.constant([0.0, 0.0, 1.0/3, 0.0, diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index c71e8382e9..db608aa79a 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -220,7 +220,7 @@ class Layer(object): Weight updates (for instance, the updates of the moving mean and variance in a BatchNormalization layer) may be dependent on the inputs passed - when calling a layer. Hence, when reusing a same layer on + when calling a layer. Hence, when reusing the same layer on different inputs `a` and `b`, some entries in `layer.updates` may be dependent on `a` and some on `b`. This method automatically keeps track of dependencies. @@ -294,9 +294,9 @@ class Layer(object): """Add loss tensor(s), potentially dependent on layer inputs. Some losses (for instance, activity regularization losses) may be dependent - on the inputs passed when calling a layer. Hence, when reusing a same layer - on different inputs `a` and `b`, some entries in `layer.losses` may be - dependent on `a` and some on `b`. This method automatically keeps track + on the inputs passed when calling a layer. Hence, when reusing the same + layer on different inputs `a` and `b`, some entries in `layer.losses` may + be dependent on `a` and some on `b`. This method automatically keeps track of dependencies. The `get_losses_for` method allows to retrieve the losses relevant to a @@ -401,11 +401,10 @@ class Layer(object): """ return input_shape - def _make_unique_name(self, name_uid_map=None, avoid_names=None, - namespace=''): + def _make_unique_name(self, name_uid_map=None, avoid_names=None): base_name = _to_snake_case(self.__class__.__name__) name = _unique_layer_name(base_name, name_uid_map=name_uid_map, - avoid_names=avoid_names, namespace=namespace) + avoid_names=avoid_names) return (name, base_name) def _set_scope(self, scope=None): @@ -642,7 +641,7 @@ class Layer(object): for output in output_list: with ops.name_scope('ActivityRegularizer'): activity_regularization = self._activity_regularizer(output) - self.add_loss(activity_regularization, inputs=inputs) + self.add_loss(activity_regularization) if not in_deferred_mode: # TODO(fchollet): consider how masking will work with deferred mode. @@ -2371,7 +2370,7 @@ def _get_default_graph_uid_map(): return name_uid_map -def _unique_layer_name(name, name_uid_map=None, avoid_names=None, namespace=''): +def _unique_layer_name(name, name_uid_map=None, avoid_names=None): """Makes a layer name (or arbitrary string) unique within a TensorFlow graph. Arguments: @@ -2380,9 +2379,6 @@ def _unique_layer_name(name, name_uid_map=None, avoid_names=None, namespace=''): names. If None (default), uses a per-Graph dictionary. avoid_names: An optional set or dict with names which should not be used. If None (default) does not avoid any names. - namespace: Gets a name which is unique within the (graph, namespace). Layers - which are not Networks use a blank namespace and so get graph-global - names. Returns: Unique string name. @@ -2400,7 +2396,6 @@ def _unique_layer_name(name, name_uid_map=None, avoid_names=None, namespace=''): avoid_names = set() proposed_name = None while proposed_name is None or proposed_name in avoid_names: - name_key = (namespace, name) - name_uid_map[name_key] += 1 - proposed_name = name + '_' + str(name_uid_map[name_key]) + name_uid_map[name] += 1 + proposed_name = name + '_' + str(name_uid_map[name]) return proposed_name diff --git a/tensorflow/python/layers/base_test.py b/tensorflow/python/layers/base_test.py index 509ad5a7af..71eff2f965 100644 --- a/tensorflow/python/layers/base_test.py +++ b/tensorflow/python/layers/base_test.py @@ -47,7 +47,7 @@ class BaseLayerTest(test.TestCase): self.assertEqual(layer.trainable_variables, []) self.assertEqual(layer.non_trainable_variables, []) if context.in_graph_mode(): - # updates, losses only supported in GRAPH mode + # updates, losses only suppported in GRAPH mode self.assertEqual(layer.updates, []) self.assertEqual(layer.losses, []) self.assertEqual(layer.built, False) @@ -574,13 +574,6 @@ class BaseLayerTest(test.TestCase): self.assertEqual(3, result['label'].numpy()) self.assertEqual(4.0, result['logits'].numpy()) - def testActivityRegularizer(self): - regularizer = math_ops.reduce_sum - layer = base_layers.Layer(activity_regularizer=regularizer) - x = array_ops.placeholder('int32') - layer.apply(x) - self.assertEqual(len(layer.get_losses_for(x)), 1) - class NetworkTest(test.TestCase): diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py index 0c7ce02835..8c327d7e27 100644 --- a/tensorflow/python/layers/convolutional.py +++ b/tensorflow/python/layers/convolutional.py @@ -813,6 +813,7 @@ def conv3d(inputs, bias_constraint=bias_constraint, trainable=trainable, name=name, + dtype=inputs.dtype.base_dtype, _reuse=reuse, _scope=name) return layer.apply(inputs) @@ -1746,6 +1747,7 @@ def conv3d_transpose(inputs, bias_constraint=bias_constraint, trainable=trainable, name=name, + dtype=inputs.dtype.base_dtype, _reuse=reuse, _scope=name) return layer.apply(inputs) diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index a9d59b25a3..dc39e96f87 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -26,6 +26,7 @@ import numpy as np from tensorflow.python.eager import context from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.layers import base @@ -236,6 +237,12 @@ class BatchNormalization(base.Layer): raise ValueError('Unsupported axis, fused batch norm only supports ' 'axis == [1] or axis == [3]') + # Raise parameters of fp16 batch norm to fp32 + if self.dtype == dtypes.float16: + param_dtype = dtypes.float32 + else: + param_dtype = self.dtype or dtypes.float32 + axis_to_dim = {x: input_shape[x].value for x in self.axis} for x in axis_to_dim: if axis_to_dim[x] is None: @@ -259,6 +266,7 @@ class BatchNormalization(base.Layer): if self.scale: self.gamma = self.add_variable(name='gamma', shape=param_shape, + dtype=param_dtype, initializer=self.gamma_initializer, regularizer=self.gamma_regularizer, constraint=self.gamma_constraint, @@ -266,11 +274,14 @@ class BatchNormalization(base.Layer): else: self.gamma = None if self.fused: - self._gamma_const = array_ops.constant(1.0, shape=param_shape) + self._gamma_const = array_ops.constant(1.0, + dtype=param_dtype, + shape=param_shape) if self.center: self.beta = self.add_variable(name='beta', shape=param_shape, + dtype=param_dtype, initializer=self.beta_initializer, regularizer=self.beta_regularizer, constraint=self.beta_constraint, @@ -278,7 +289,9 @@ class BatchNormalization(base.Layer): else: self.beta = None if self.fused: - self._beta_const = array_ops.constant(0.0, shape=param_shape) + self._beta_const = array_ops.constant(0.0, + dtype=param_dtype, + shape=param_shape) # Disable variable partitioning when creating the moving mean and variance try: @@ -290,12 +303,14 @@ class BatchNormalization(base.Layer): self.moving_mean = self.add_variable( name='moving_mean', shape=param_shape, + dtype=param_dtype, initializer=self.moving_mean_initializer, trainable=False) self.moving_variance = self.add_variable( name='moving_variance', shape=param_shape, + dtype=param_dtype, initializer=self.moving_variance_initializer, trainable=False) @@ -311,6 +326,7 @@ class BatchNormalization(base.Layer): def _renorm_variable(name, shape): var = self.add_variable(name=name, shape=shape, + dtype=param_dtype, initializer=init_ops.zeros_initializer(), trainable=False) return var @@ -353,7 +369,6 @@ class BatchNormalization(base.Layer): def _fused_batch_norm(self, inputs, training): """Returns the output of fused batch norm.""" - # TODO(reedwm): Add support for fp16 inputs. beta = self.beta if self.center else self._beta_const gamma = self.gamma if self.scale else self._gamma_const @@ -749,6 +764,7 @@ def batch_normalization(inputs, virtual_batch_size=virtual_batch_size, adjustment=adjustment, name=name, + dtype=inputs.dtype.base_dtype, _reuse=reuse, _scope=name) return layer.apply(inputs, training=training) diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py index 90ebdc8c86..b2876c58c2 100644 --- a/tensorflow/python/layers/normalization_test.py +++ b/tensorflow/python/layers/normalization_test.py @@ -68,11 +68,12 @@ class BNTest(test.TestCase): use_gpu, is_fused, restore=False, - freeze_mode=False): + freeze_mode=False, + dtype=dtypes.float32): ops.reset_default_graph() graph = ops.get_default_graph() with self.test_session(graph=graph, use_gpu=use_gpu) as sess: - image = array_ops.placeholder(dtype='float32', shape=shape) + image = array_ops.placeholder(dtype=dtype, shape=shape) loss, train_op, saver = self._simple_model(image, is_fused, freeze_mode) if restore: saver.restore(sess, checkpoint_path) @@ -80,7 +81,7 @@ class BNTest(test.TestCase): sess.run(variables.global_variables_initializer()) np.random.seed(0) for _ in range(2): - image_val = np.random.rand(*shape).astype(np.float32) + image_val = np.random.rand(*shape).astype(dtype.as_numpy_dtype) sess.run([loss, train_op], feed_dict={image: image_val}) if restore: all_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) @@ -90,15 +91,74 @@ class BNTest(test.TestCase): saver.save(sess, checkpoint_path) def _infer(self, checkpoint_path, image_val, shape, use_gpu, is_fused): + dtype = image_val.dtype ops.reset_default_graph() graph = ops.get_default_graph() with self.test_session(graph=graph, use_gpu=use_gpu) as sess: - image = array_ops.placeholder(dtype='float32', shape=shape) + image = array_ops.placeholder(dtype=dtype, shape=shape) loss, _, saver = self._simple_model(image, is_fused, True) saver.restore(sess, checkpoint_path) loss_val = sess.run(loss, feed_dict={image: image_val}) return loss_val + def _trainEvalSequence(self, + dtype, + train1_use_gpu, + train2_use_gpu, + infer_use_gpu): + batch, height, width, input_channels = 2, 4, 5, 3 + shape = [batch, height, width, input_channels] + checkpoint = os.path.join(self.get_temp_dir(), 'cp_%s_%s_%s_%s' % + (dtype, train1_use_gpu, train2_use_gpu, infer_use_gpu)) + + self._train( + checkpoint, + shape, + use_gpu=train1_use_gpu, + is_fused=True, + restore=False, + freeze_mode=False, + dtype=dtype) + + train_vars = self._train( + checkpoint, + shape, + use_gpu=train2_use_gpu, + is_fused=True, + restore=True, + freeze_mode=False, + dtype=dtype) + + np.random.seed(0) + image_val = np.random.rand(batch, + height, + width, + input_channels).astype(dtype.as_numpy_dtype) + loss_val = self._infer(checkpoint, image_val, shape, + use_gpu=infer_use_gpu, is_fused=True) + + return train_vars, loss_val + + def testHalfPrecision(self): + ref_vars, ref_loss = self._trainEvalSequence(dtype=dtypes.float32, + train1_use_gpu=True, + train2_use_gpu=True, + infer_use_gpu=True) + + self.assertEqual(len(ref_vars), 5) + + for train1_use_gpu in [True, False]: + for train2_use_gpu in [True, False]: + for infer_use_gpu in [True, False]: + test_vars, test_loss = self._trainEvalSequence(dtypes.float16, + train1_use_gpu, + train2_use_gpu, + infer_use_gpu) + self.assertEqual(len(test_vars), 5) + for test_var, ref_var in zip(test_vars, ref_vars): + self.assertAllClose(test_var, ref_var, rtol=1.e-3, atol=1.e-3) + self.assertAllClose(test_loss, ref_loss, rtol=1.e-3, atol=1.e-3) + def _testCheckpoint(self, is_fused_checkpoint_a, is_fused_checkpoint_b, use_gpu_checkpoint_a, use_gpu_checkpoint_b, use_gpu_test_a, use_gpu_test_b, freeze_mode): @@ -218,6 +278,36 @@ class BNTest(test.TestCase): ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES), bn.trainable_variables) + def testCreateFusedBNFloat16(self): + # Call layer. + bn = normalization_layers.BatchNormalization(axis=1, fused=True) + inputs = random_ops.random_uniform((5, 4, 3, 3), + seed=1, + dtype=dtypes.float16) + training = array_ops.placeholder(dtype='bool') + outputs = bn.apply(inputs, training=training) + + # Verify shape. + self.assertListEqual(outputs.get_shape().as_list(), [5, 4, 3, 3]) + + # Verify layer attributes. + self.assertEqual(len(bn.updates), 2) + self.assertEqual(len(bn.variables), 4) + self.assertEqual(len(bn.trainable_variables), 2) + self.assertEqual(len(bn.non_trainable_variables), 2) + for var in bn.variables: + self.assertEqual(var.dtype, dtypes.float32_ref) + + # Test that updates were created and added to UPDATE_OPS. + self.assertEqual(len(bn.updates), 2) + self.assertListEqual( + ops.get_collection(ops.GraphKeys.UPDATE_OPS), bn.updates) + + # Test that weights were created and added to TRAINABLE_VARIABLES. + self.assertListEqual( + ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES), + bn.trainable_variables) + def test3DInputAxis1(self): epsilon = 1e-3 bn = normalization_layers.BatchNormalization( diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index 87f8d14860..3c025881cb 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -460,11 +460,7 @@ def _GatherNdGrad(op, grad): ref = op.inputs[0] indices = op.inputs[1] ref_shape = array_ops.shape(ref, out_type=indices.dtype) - if indices.shape.ndims == 2 and indices.shape[-1].value == 1: - ref_grad = ops.IndexedSlices(grad, array_ops.squeeze(indices, axis=-1), - ref_shape) - else: - ref_grad = array_ops.scatter_nd(indices, grad, ref_shape) + ref_grad = array_ops.scatter_nd(indices, grad, ref_shape) return [ref_grad, None] diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 61bd41e7de..f5f1278bfd 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1136,7 +1136,7 @@ def concat(values, axis, name="concat"): return gen_array_ops._concat_v2(values=values, axis=axis, name=name) -def boolean_mask(tensor, mask, name="boolean_mask"): +def boolean_mask(tensor, mask, name="boolean_mask", axis=None): """Apply boolean mask to tensor. Numpy equivalent is `tensor[mask]`. ```python @@ -1150,11 +1150,17 @@ def boolean_mask(tensor, mask, name="boolean_mask"): the first K dimensions of `tensor`'s shape. We then have: `boolean_mask(tensor, mask)[i, j1,...,jd] = tensor[i1,...,iK,j1,...,jd]` where `(i1,...,iK)` is the ith `True` entry of `mask` (row-major order). + The `axis` could be used with `mask` to indicate the axis to mask from. + In that case, `axis + dim(mask) <= dim(tensor)` and `mask`'s shape must match + the first `axis + dim(mask)` dimensions of `tensor`'s shape. Args: tensor: N-D tensor. mask: K-D boolean tensor, K <= N and K must be known statically. name: A name for this operation (optional). + axis: A 0-D int Tensor representing the axis in `tensor` to mask from. + By default, axis is 0 which will mask from the first dimension. Otherwise + K + axis <= N. Returns: (N-K+1)-dimensional tensor populated by entries in `tensor` corresponding @@ -1173,10 +1179,10 @@ def boolean_mask(tensor, mask, name="boolean_mask"): ``` """ - def _apply_mask_1d(reshaped_tensor, mask): + def _apply_mask_1d(reshaped_tensor, mask, axis=None): """Mask tensor along dimension 0 with a 1-D mask.""" indices = squeeze(where(mask), squeeze_dims=[1]) - return gather(reshaped_tensor, indices) + return gather(reshaped_tensor, indices, axis=axis) with ops.name_scope(name, values=[tensor, mask]): tensor = ops.convert_to_tensor(tensor, name="tensor") @@ -1191,19 +1197,22 @@ def boolean_mask(tensor, mask, name="boolean_mask"): raise ValueError( "Number of mask dimensions must be specified, even if some dimensions" " are None. E.g. shape=[None] is ok, but shape=None is not.") - shape_tensor[:ndims_mask].assert_is_compatible_with(shape_mask) + axis = 0 if axis is None else axis + shape_tensor[axis:axis+ndims_mask].assert_is_compatible_with(shape_mask) - leading_size = gen_math_ops._prod(shape(tensor)[:ndims_mask], [0]) + leading_size = gen_math_ops._prod(shape(tensor)[axis:axis+ndims_mask], [0]) tensor = reshape(tensor, - concat([[leading_size], - shape(tensor)[ndims_mask:]], 0)) - first_dim = shape_tensor[:ndims_mask].num_elements() + concat([shape(tensor)[:axis], + [leading_size], + shape(tensor)[axis+ndims_mask:]], 0)) + first_dim = shape_tensor[axis:axis+ndims_mask].num_elements() tensor.set_shape( - tensor_shape.as_shape([first_dim]) - .concatenate(shape_tensor[ndims_mask:])) + tensor_shape.as_shape(shape_tensor[:axis]) + .concatenate([first_dim]) + .concatenate(shape_tensor[axis+ndims_mask:])) mask = reshape(mask, [-1]) - return _apply_mask_1d(tensor, mask) + return _apply_mask_1d(tensor, mask, axis) def sparse_mask(a, mask_indices, name=None): @@ -1525,7 +1534,8 @@ def zeros_like(tensor, dtype=None, name=None, optimize=True): Args: tensor: A `Tensor`. dtype: A type for the returned `Tensor`. Must be `float32`, `float64`, - `int8`, `int16`, `int32`, `int64`, `uint8`, `complex64`, or `complex128`. + `int8`, `uint8`, `int16`, `uint16`, int32`, `int64`, + `complex64`, `complex128` or `bool`. name: A name for the operation (optional). optimize: if true, attempt to statically determine the shape of 'tensor' and encode it as a constant. @@ -1576,8 +1586,8 @@ def ones_like(tensor, dtype=None, name=None, optimize=True): Args: tensor: A `Tensor`. dtype: A type for the returned `Tensor`. Must be `float32`, `float64`, - `int8`, `int16`, `int32`, `int64`, `uint8`, `complex64`, `complex128` or - `bool`. + `int8`, `uint8`, `int16`, `uint16`, int32`, `int64`, + `complex64`, `complex128` or `bool`. name: A name for the operation (optional). optimize: if true, attempt to statically determine the shape of 'tensor' and encode it as a constant. @@ -1653,8 +1663,6 @@ def placeholder(dtype, shape=None, name=None): print(sess.run(y, feed_dict={x: rand_array})) # Will succeed. ``` - @compatibility{eager} Placeholders are not compatible with eager execution. - Args: dtype: The type of elements in the tensor to be fed. shape: The shape of the tensor to be fed (optional). If the shape is not @@ -1664,14 +1672,7 @@ def placeholder(dtype, shape=None, name=None): Returns: A `Tensor` that may be used as a handle for feeding a value, but not evaluated directly. - - Raises: - RuntimeError: if eager execution is enabled """ - if context.in_eager_mode(): - raise RuntimeError("tf.placeholder() is not compatible with " - "eager execution.") - return gen_array_ops._placeholder(dtype=dtype, shape=shape, name=name) @@ -1715,8 +1716,6 @@ def sparse_placeholder(dtype, shape=None, name=None): print(sess.run(y, feed_dict={x: sp_value})) # Will succeed. ``` - @compatibility{eager} Placeholders are not compatible with eager execution. - Args: dtype: The type of `values` elements in the tensor to be fed. shape: The shape of the tensor to be fed (optional). If the shape is not @@ -1726,14 +1725,7 @@ def sparse_placeholder(dtype, shape=None, name=None): Returns: A `SparseTensor` that may be used as a handle for feeding a value, but not evaluated directly. - - Raises: - RuntimeError: if eager execution is enabled """ - if context.in_eager_mode(): - raise RuntimeError("tf.placeholder() is not compatible with " - "eager execution.") - shape_name = (name + "/shape") if name is not None else None shape, rank = _normalize_sparse_shape(shape, shape_name) if shape is None: diff --git a/tensorflow/python/ops/check_ops.py b/tensorflow/python/ops/check_ops.py index 7e509f72c1..ceee009104 100644 --- a/tensorflow/python/ops/check_ops.py +++ b/tensorflow/python/ops/check_ops.py @@ -48,7 +48,6 @@ import numpy as np from tensorflow.python.eager import context from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_util @@ -97,11 +96,10 @@ def _maybe_constant_value_string(t): def _assert_static(condition, data): - """Raises a InvalidArgumentError with as much information as possible.""" + """Raises a static ValueError with as much information as possible.""" if not condition: data_static = [_maybe_constant_value_string(x) for x in data] - raise errors.InvalidArgumentError(node_def=None, op=None, - message='\n'.join(data_static)) + raise ValueError('\n'.join(data_static)) def assert_proper_iterable(values): @@ -305,60 +303,11 @@ def assert_equal(x, y, data=None, summarize=None, message=None, name=None): Returns: Op that raises `InvalidArgumentError` if `x == y` is False. - @compatibility{eager} returns None - - Raises: - InvalidArgumentError if the check can be performed immediately and - `x == y` is False. The check can be performed immediately during - eager execution or if `x` and `y` are statically known. """ message = message or '' with ops.name_scope(name, 'assert_equal', [x, y, data]): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') - - if context.in_eager_mode(): - eq = math_ops.equal(x, y) - condition = math_ops.reduce_all(eq) - if not condition: - # Prepare a message with first elements of x and y - summary_msg = '' - if summarize: - # reshape((-1,)) is the fastest way to get a flat array view. - x_np = x.numpy().reshape((-1,)) - y_np = y.numpy().reshape((-1,)) - x_sum = min(x_np.size, summarize) - y_sum = min(y_np.size, summarize) - summary_msg = ('First %d elements of x:\n%s\n' - 'First %d elements of y:\n%s\n' % - (x_sum, x_np[:x_sum], - y_sum, y_np[:y_sum])) - - # Get the values that actually differed and their indices - mask = math_ops.logical_not(eq) - indices = array_ops.where(mask) - indices_np = indices.numpy() - x_vals = array_ops.boolean_mask(x, mask) - y_vals = array_ops.boolean_mask(y, mask) - diff_to_print = 0 - if summarize: - diff_to_print = min(summarize, indices_np.size) - - raise errors.InvalidArgumentError( - node_def=None, op=None, - message=('%s\nCondition x == y did not hold.\n' - 'Indices of first %s different values:\n%s\n' - 'Corresponding x values:\n%s\n' - 'Corresponding y values:\n%s\n' - '%s' - % - (message or '', - diff_to_print, indices_np[:diff_to_print], - x_vals.numpy().reshape((-1,))[:diff_to_print], - y_vals.numpy().reshape((-1,))[:diff_to_print], - summary_msg))) - return - if data is None: data = [ message, @@ -407,19 +356,12 @@ def assert_none_equal( with ops.name_scope(name, 'assert_none_equal', [x, y, data]): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') - if context.in_eager_mode(): - x_name = 'x' - y_name = 'y' - else: - x_name = x.name - y_name = y.name - if data is None: data = [ message, - 'Condition x != y did not hold for every single element:', - 'x (%s) = ' % x_name, x, - 'y (%s) = ' % y_name, y + 'Condition x != y did not hold for every single element:' + 'x (%s) = ' % x.name, x, + 'y (%s) = ' % y.name, y ] condition = math_ops.reduce_all(math_ops.not_equal(x, y)) return control_flow_ops.Assert(condition, data, summarize=summarize) @@ -455,18 +397,11 @@ def assert_less(x, y, data=None, summarize=None, message=None, name=None): with ops.name_scope(name, 'assert_less', [x, y, data]): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') - if context.in_eager_mode(): - x_name = 'x' - y_name = 'y' - else: - x_name = x.name - y_name = y.name - if data is None: data = [ message, - 'Condition x < y did not hold element-wise:', - 'x (%s) = ' % x_name, x, 'y (%s) = ' % y_name, y + 'Condition x < y did not hold element-wise:' + 'x (%s) = ' % x.name, x, 'y (%s) = ' % y.name, y ] condition = math_ops.reduce_all(math_ops.less(x, y)) return control_flow_ops.Assert(condition, data, summarize=summarize) diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 8afb079d20..10d8e01304 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -60,7 +60,6 @@ from tensorflow.core.protobuf import control_flow_pb2 from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape @@ -87,29 +86,6 @@ from tensorflow.python.util import tf_should_use _basetuple = tuple -def _summarize_eager(tensor, summarize=None): - """Returns a summarized string representation of eager `tensor`. - - Args: - tensor: EagerTensor to summarize - summarize: Include these many first elements of `array` - """ - # reshape((-1,)) is the fastest way to get a flat array view - if tensor._rank(): # pylint: disable=protected-access - flat = tensor.numpy().reshape((-1,)) - lst = [str(x) for x in flat[:summarize]] - if len(lst) < flat.size: - lst.append("...") - else: - # tensor.numpy() returns a scalar for zero dimensional arrays - if summarize != 0: - lst = [str(tensor.numpy())] - else: - lst = [] - - return ", ".join(lst) - - # pylint: disable=protected-access @@ -122,8 +98,7 @@ def Assert(condition, data, summarize=None, name=None): If `condition` evaluates to false, print the list of tensors in `data`. `summarize` determines how many entries of the tensors to print. - NOTE: In graph mode, to ensure that Assert executes, one usually attaches - a dependency: + NOTE: To ensure that Assert executes, one usually attaches a dependency: ```python # Ensure maximum element of x is smaller or equal to 1 @@ -142,21 +117,7 @@ def Assert(condition, data, summarize=None, name=None): assert_op: An `Operation` that, when executed, raises a `tf.errors.InvalidArgumentError` if `condition` is not true. @compatibility{eager} returns None. - - Raises: - @compatibility{eager} `tf.errors.InvalidArgumentError` if `condition` - is not true """ - if context.in_eager_mode(): - if not condition: - xs = ops.convert_n_to_tensor(data) - data_str = [_summarize_eager(x, summarize) for x in xs] - raise errors.InvalidArgumentError( - node_def=None, op=None, - message="Expected '%s' to be true. Summarized data: %s" % ( - condition, "\n".join(data_str))) - return - with ops.name_scope(name, "Assert", [condition, data]) as name: xs = ops.convert_n_to_tensor(data) if all([x.dtype in {dtypes.string, dtypes.int32} for x in xs]): diff --git a/tensorflow/python/ops/ctc_ops.py b/tensorflow/python/ops/ctc_ops.py index f037767cf4..477c0d1cb4 100644 --- a/tensorflow/python/ops/ctc_ops.py +++ b/tensorflow/python/ops/ctc_ops.py @@ -22,8 +22,8 @@ from __future__ import print_function from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor -from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_ctc_ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops.nn_grad import _BroadcastMul @@ -38,8 +38,7 @@ def ctc_loss(labels, inputs, sequence_length, [A. Graves, S. Fernandez, F. Gomez, J. Schmidhuber. Connectionist Temporal Classification: Labeling Unsegmented Sequence Data - with Recurrent Neural Networks. ICML 2006, Pittsburgh, USA, - pp. 369-376.](http://www.cs.toronto.edu/~graves/icml_2006.pdf) + with Recurrent Neural Networks. ICML 2006, Pittsburgh, USA, pp. 369-376.](http://www.cs.toronto.edu/~graves/icml_2006.pdf) Input requirements: @@ -109,9 +108,9 @@ def ctc_loss(labels, inputs, sequence_length, See `core/ops/ctc_ops.cc` for more details. inputs: 3-D `float` `Tensor`. If time_major == False, this will be a `Tensor` shaped: - `[batch_size, max_time, num_classes]`. + `[batch_size x max_time x num_classes]`. If time_major == True (default), this will be a `Tensor` shaped: - `[max_time, batch_size, num_classes]`. + `[max_time x batch_size x num_classes]`. The logits. sequence_length: 1-D `int32` vector, size `[batch_size]`. The sequence lengths. @@ -121,18 +120,15 @@ def ctc_loss(labels, inputs, sequence_length, ignore_longer_outputs_than_inputs: Boolean. Default: False. If True, sequences with longer outputs than inputs will be ignored. time_major: The shape format of the `inputs` Tensors. - If True, these `Tensors` must be shaped `[max_time, batch_size, - num_classes]`. - If False, these `Tensors` must be shaped `[batch_size, max_time, - num_classes]`. - Using `time_major = True` (default) is a bit more efficient because it - avoids transposes at the beginning of the ctc_loss calculation. However, - most TensorFlow data is batch-major, so by this function also accepts - inputs in batch-major form. + If True, these `Tensors` must be shaped `[max_time, batch_size, num_classes]`. + If False, these `Tensors` must be shaped `[batch_size, max_time, num_classes]`. + Using `time_major = True` (default) is a bit more efficient because it avoids + transposes at the beginning of the ctc_loss calculation. However, most + TensorFlow data is batch-major, so by this function also accepts inputs + in batch-major form. Returns: - A 1-D `float` `Tensor`, size `[batch]`, containing the negative log - probabilities. + A 1-D `float` `Tensor`, size `[batch]`, containing the negative log probabilities. Raises: TypeError: if labels is not a `SparseTensor`. @@ -202,7 +198,7 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True): Args: inputs: 3-D `float` `Tensor` sized - `[max_time, batch_size, num_classes]`. The logits. + `[max_time x batch_size x num_classes]`. The logits. sequence_length: 1-D `int32` vector containing sequence lengths, having size `[batch_size]`. merge_repeated: Boolean. Default: True. @@ -211,7 +207,7 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True): A tuple `(decoded, neg_sum_logits)` where decoded: A single-element list. `decoded[0]` is an `SparseTensor` containing the decoded outputs s.t.: - `decoded.indices`: Indices matrix `(total_decoded_outputs, 2)`. + `decoded.indices`: Indices matrix `(total_decoded_outputs x 2)`. The rows store: `[batch, time]`. `decoded.values`: Values vector, size `(total_decoded_outputs)`. The vector stores the decoded classes. diff --git a/tensorflow/python/ops/distributions/dirichlet.py b/tensorflow/python/ops/distributions/dirichlet.py index 923696a553..2accedf1b9 100644 --- a/tensorflow/python/ops/distributions/dirichlet.py +++ b/tensorflow/python/ops/distributions/dirichlet.py @@ -196,7 +196,7 @@ class Dirichlet(distribution.Distribution): alpha=self.concentration, dtype=self.dtype, seed=seed) - return gamma_sample / math_ops.reduce_sum(gamma_sample, -1, keep_dims=True) + return gamma_sample / math_ops.reduce_sum(gamma_sample, -1, keepdims=True) @distribution_util.AppendDocstring(_dirichlet_sample_note) def _log_prob(self, x): diff --git a/tensorflow/python/ops/distributions/multinomial.py b/tensorflow/python/ops/distributions/multinomial.py index 00b5697c83..d49fac59ca 100644 --- a/tensorflow/python/ops/distributions/multinomial.py +++ b/tensorflow/python/ops/distributions/multinomial.py @@ -26,6 +26,7 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops +from tensorflow.python.ops import functional_ops from tensorflow.python.ops.distributions import distribution from tensorflow.python.ops.distributions import util as distribution_util @@ -140,6 +141,8 @@ class Multinomial(distribution.Distribution): counts = [[2., 1, 1], [3, 1, 1]] dist.prob(counts) # Shape [2] + + dist.sample(5) # Shape [5, 2, 3] ``` """ @@ -231,29 +234,35 @@ class Multinomial(distribution.Distribution): def _sample_n(self, n, seed=None): n_draws = math_ops.cast(self.total_count, dtype=dtypes.int32) - if self.total_count.get_shape().ndims is not None: - if self.total_count.get_shape().ndims != 0: - raise NotImplementedError( - "Sample only supported for scalar number of draws.") - elif self.validate_args: - is_scalar = check_ops.assert_rank( - n_draws, 0, - message="Sample only supported for scalar number of draws.") - n_draws = control_flow_ops.with_dependencies([is_scalar], n_draws) k = self.event_shape_tensor()[0] - # Flatten batch dims so logits has shape [B, k], - # where B = reduce_prod(self.batch_shape_tensor()). - x = random_ops.multinomial( - logits=array_ops.reshape(self.logits, [-1, k]), - num_samples=n * n_draws, - seed=seed) - x = array_ops.reshape(x, shape=[-1, n, n_draws]) - x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), - axis=-2) # shape: [B, n, k] + + # boardcast the total_count and logits to same shape + n_draws = array_ops.ones_like( + self.logits[..., 0], dtype=n_draws.dtype) * n_draws + logits = array_ops.ones_like( + n_draws[..., array_ops.newaxis], dtype=self.logits.dtype) * self.logits + + # flatten the total_count and logits + flat_logits = array_ops.reshape(logits, [-1, k]) # [B1B2...Bm, k] + flat_ndraws = n * array_ops.reshape(n_draws, [-1]) # [B1B2...Bm] + + # computes each total_count and logits situation by map_fn + def _sample_single(args): + logits, n_draw = args[0], args[1] # [K], [] + x = random_ops.multinomial(logits[array_ops.newaxis, ...], + n_draw, seed) # [1, n*n_draw] + x = array_ops.reshape(x, shape=[n, -1]) # [n, n_draw] + x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), axis=-2) # [n, k] + return x + x = functional_ops.map_fn(_sample_single, + [flat_logits, flat_ndraws], + dtype=self.dtype) # [B1B2...Bm, n, k] + + # reshape the results to proper shape x = array_ops.transpose(x, perm=[1, 0, 2]) final_shape = array_ops.concat([[n], self.batch_shape_tensor(), [k]], 0) - x = array_ops.reshape(x, final_shape) - return math_ops.cast(x, self.dtype) + x = array_ops.reshape(x, final_shape) # [n, B1, B2,..., Bm, k] + return x @distribution_util.AppendDocstring(_multinomial_sample_note) def _log_prob(self, counts): diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py index f4561d1a83..8c1ccc6840 100644 --- a/tensorflow/python/ops/embedding_ops.py +++ b/tensorflow/python/ops/embedding_ops.py @@ -191,9 +191,12 @@ def _embedding_lookup_and_transform(params, (flat_ids - extras) // ids_per_partition) # Emulate a conditional using a boolean indicator tensor - new_ids = array_ops.where(p_assignments < extras, - flat_ids % (ids_per_partition + 1), - (flat_ids - extras) % ids_per_partition) + is_in_first_extras_partitions = math_ops.cast(p_assignments < extras, + flat_ids.dtype) + new_ids = (is_in_first_extras_partitions * (flat_ids % + (ids_per_partition + 1)) + + (1 - is_in_first_extras_partitions) * + ((flat_ids - extras) % ids_per_partition)) else: raise ValueError("Unrecognized partition strategy: " + partition_strategy) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 2946dbe81e..7c23321ca5 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1121,7 +1121,7 @@ def rgb_to_grayscale(images, name=None): rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0) gray_float = math_ops.reduce_sum(flt_image * rgb_weights, rank_1, - keep_dims=True) + keepdims=True) gray_float.set_shape(images.get_shape()[:-1].concatenate([1])) return convert_image_dtype(gray_float, orig_dtype, name=name) @@ -1212,26 +1212,7 @@ def adjust_hue(image, delta, name=None): orig_dtype = image.dtype flt_image = convert_image_dtype(image, dtypes.float32) - # TODO(zhengxq): we will switch to the fused version after we add a GPU - # kernel for that. - fused = os.environ.get('TF_ADJUST_HUE_FUSED', '') - fused = fused.lower() in ('true', 't', '1') - - if not fused: - hsv = gen_image_ops.rgb_to_hsv(flt_image) - - hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1]) - saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1]) - value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1]) - - # Note that we add 2*pi to guarantee that the resulting hue is a positive - # floating point number since delta is [-0.5, 0.5]. - hue = math_ops.mod(hue + (delta + 1.), 1.) - - hsv_altered = array_ops.concat([hue, saturation, value], 2) - rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered) - else: - rgb_altered = gen_image_ops.adjust_hue(flt_image, delta) + rgb_altered = gen_image_ops.adjust_hue(flt_image, delta) return convert_image_dtype(rgb_altered, orig_dtype) diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index 2cb467c891..14a039ffd0 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -30,6 +30,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops.gen_linalg_ops import * # pylint: enable=wildcard-import from tensorflow.python.util import compat +from tensorflow.python.util.deprecation import deprecated_args # Names below are lower_case. # pylint: disable=invalid-name @@ -438,7 +439,10 @@ def svd(tensor, full_matrices=False, compute_uv=True, name=None): # pylint: disable=redefined-builtin -def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None): +@deprecated_args(None, "keep_dims is deprecated, use keepdims instead", + "keep_dims") +def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None, + keep_dims=None): r"""Computes the norm of vectors, matrices, and tensors. This function can compute several different vector norms (the 1-norm, the @@ -471,13 +475,13 @@ def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None): can be either a matrix or a batch of matrices at runtime, pass `axis=[-2,-1]` instead of `axis=None` to make sure that matrix norms are computed. - keep_dims: If True, the axis indicated in `axis` are kept with size 1. + keepdims: If True, the axis indicated in `axis` are kept with size 1. Otherwise, the dimensions in `axis` are removed from the output shape. name: The name of the op. Returns: output: A `Tensor` of the same type as tensor, containing the vector or - matrix norms. If `keep_dims` is True then the rank of output is equal to + matrix norms. If `keepdims` is True then the rank of output is equal to the rank of `tensor`. Otherwise, if `axis` is none the output is a scalar, if `axis` is an integer, the rank of `output` is one less than the rank of `tensor`, if `axis` is a 2-tuple the rank of `output` is two less @@ -497,6 +501,13 @@ def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None): @end_compatibility """ + if keep_dims is not None: + if keepdims is not None: + raise ValueError("Cannot specify both 'keep_dims' and 'keepdims'") + keepdims = keep_dims + if keepdims is None: + keepdims = False + is_matrix_norm = ((isinstance(axis, tuple) or isinstance(axis, list)) and len(axis) == 2) if is_matrix_norm: @@ -528,25 +539,25 @@ def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None): # matrices. result = math_ops.sqrt( math_ops.reduce_sum( - tensor * math_ops.conj(tensor), axis, keep_dims=True)) + tensor * math_ops.conj(tensor), axis, keepdims=True)) else: result = math_ops.abs(tensor) if ord == 1: sum_axis = None if axis is None else axis[0] - result = math_ops.reduce_sum(result, sum_axis, keep_dims=True) + result = math_ops.reduce_sum(result, sum_axis, keepdims=True) if is_matrix_norm: - result = math_ops.reduce_max(result, axis[-1], keep_dims=True) + result = math_ops.reduce_max(result, axis[-1], keepdims=True) elif ord == np.inf: if is_matrix_norm: - result = math_ops.reduce_sum(result, axis[1], keep_dims=True) + result = math_ops.reduce_sum(result, axis[1], keepdims=True) max_axis = None if axis is None else axis[0] - result = math_ops.reduce_max(result, max_axis, keep_dims=True) + result = math_ops.reduce_max(result, max_axis, keepdims=True) else: # General p-norms (positive p only) result = math_ops.pow( math_ops.reduce_sum( - math_ops.pow(result, ord), axis, keep_dims=True), 1.0 / ord) - if not keep_dims: + math_ops.pow(result, ord), axis, keepdims=True), 1.0 / ord) + if not keepdims: result = array_ops.squeeze(result, axis) return result diff --git a/tensorflow/python/ops/math_grad_test.py b/tensorflow/python/ops/math_grad_test.py index 5732c756ce..04eeb00518 100644 --- a/tensorflow/python/ops/math_grad_test.py +++ b/tensorflow/python/ops/math_grad_test.py @@ -113,6 +113,23 @@ class MinOrMaxGradientTest(test.TestCase): self.assertLess(error, 1e-4) +class MaximumOrMinimumGradientTest(test.TestCase): + + def testMaximumGradient(self): + inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32) + outputs = math_ops.maximum(inputs, 3.0) + with self.test_session(): + error = gradient_checker.compute_gradient_error(inputs, [4], outputs, [4]) + self.assertLess(error, 1e-4) + + def testMinimumGradient(self): + inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32) + outputs = math_ops.minimum(inputs, 2.0) + with self.test_session(): + error = gradient_checker.compute_gradient_error(inputs, [4], outputs, [4]) + self.assertLess(error, 1e-4) + + class ProdGradientTest(test.TestCase): def testProdGradient(self): diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 886b2048f9..81b3c21808 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -1265,16 +1265,19 @@ def _ReductionDims(x, axis, reduction_indices): return range(0, array_ops.rank(x)) +@deprecated_args(None, "keep_dims is deprecated, use keepdims instead", + "keep_dims") def reduce_sum(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the sum of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1287,7 +1290,7 @@ def reduce_sum(input_tensor, tf.reduce_sum(x) # 6 tf.reduce_sum(x, 0) # [2, 2, 2] tf.reduce_sum(x, 1) # [3, 3] - tf.reduce_sum(x, 1, keep_dims=True) # [[3], [3]] + tf.reduce_sum(x, 1, keepdims=True) # [[3], [3]] tf.reduce_sum(x, [0, 1]) # 6 ``` @@ -1296,7 +1299,7 @@ def reduce_sum(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. @@ -1307,24 +1310,35 @@ def reduce_sum(input_tensor, Equivalent to np.sum @end_compatibility """ + + if keep_dims is not None: + if keepdims is not None: + raise ValueError("Cannot specify both 'keep_dims' and 'keepdims'") + keepdims = keep_dims + if keepdims is None: + keepdims = False + return gen_math_ops._sum( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name) +@deprecated_args(None, "keep_dims is deprecated, use keepdims instead", + "keep_dims") def count_nonzero(input_tensor, axis=None, - keep_dims=False, + keepdims=None, dtype=dtypes.int64, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes number of nonzero elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1341,7 +1355,7 @@ def count_nonzero(input_tensor, tf.count_nonzero(x) # 3 tf.count_nonzero(x, 0) # [1, 2, 0] tf.count_nonzero(x, 1) # [1, 2] - tf.count_nonzero(x, 1, keep_dims=True) # [[1], [2]] + tf.count_nonzero(x, 1, keepdims=True) # [[1], [2]] tf.count_nonzero(x, [0, 1]) # 3 ``` @@ -1350,7 +1364,7 @@ def count_nonzero(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. dtype: The output dtype; defaults to `tf.int64`. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. @@ -1358,6 +1372,13 @@ def count_nonzero(input_tensor, Returns: The reduced tensor (number of nonzero values). """ + if keep_dims is not None: + if keepdims is not None: + raise ValueError("Cannot specify both 'keep_dims' and 'keepdims'") + keepdims = keep_dims + if keepdims is None: + keepdims = False + with ops.name_scope(name, "count_nonzero", [input_tensor]): input_tensor = ops.convert_to_tensor(input_tensor, name="input_tensor") zero = input_tensor.dtype.as_numpy_dtype() @@ -1366,21 +1387,24 @@ def count_nonzero(input_tensor, # int64 reduction happens on GPU to_int64(gen_math_ops.not_equal(input_tensor, zero)), axis=axis, - keep_dims=keep_dims, + keepdims=keepdims, reduction_indices=reduction_indices), dtype=dtype) +@deprecated_args(None, "keep_dims is deprecated, use keepdims instead", + "keep_dims") def reduce_mean(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the mean of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1400,7 +1424,7 @@ def reduce_mean(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. @@ -1409,25 +1433,44 @@ def reduce_mean(input_tensor, @compatibility(numpy) Equivalent to np.mean + + Please note that `np.mean` has a `dtype` parameter that could be used to specify the output type. By default this is `dtype=float64`. On the other hand, `tf.reduce_mean` has an aggressive type inference from `input_tensor`, for example: + + ```python + x = tf.constant([1, 0, 1, 0]) + tf.reduce_mean(x) # 0 + y = tf.constant([1., 0., 1., 0.]) + tf.reduce_mean(y) # 0.5 + ``` + @end_compatibility """ + if keep_dims is not None: + if keepdims is not None: + raise ValueError("Cannot specify both 'keep_dims' and 'keepdims'") + keepdims = keep_dims + if keepdims is None: + keepdims = False return gen_math_ops._mean( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name) +@deprecated_args(None, "keep_dims is deprecated, use keepdims instead", + "keep_dims") def reduce_prod(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the product of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1438,7 +1481,7 @@ def reduce_prod(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. @@ -1449,23 +1492,32 @@ def reduce_prod(input_tensor, Equivalent to np.prod @end_compatibility """ + if keep_dims is not None: + if keepdims is not None: + raise ValueError("Cannot specify both 'keep_dims' and 'keepdims'") + keepdims = keep_dims + if keepdims is None: + keepdims = False return gen_math_ops._prod( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name) +@deprecated_args(None, "keep_dims is deprecated, use keepdims instead", + "keep_dims") def reduce_min(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the minimum of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1476,7 +1528,7 @@ def reduce_min(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. @@ -1487,23 +1539,32 @@ def reduce_min(input_tensor, Equivalent to np.min @end_compatibility """ + if keep_dims is not None: + if keepdims is not None: + raise ValueError("Cannot specify both 'keep_dims' and 'keepdims'") + keepdims = keep_dims + if keepdims is None: + keepdims = False return gen_math_ops._min( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name) +@deprecated_args(None, "keep_dims is deprecated, use keepdims instead", + "keep_dims") def reduce_max(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the maximum of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1514,7 +1575,7 @@ def reduce_max(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. @@ -1525,23 +1586,32 @@ def reduce_max(input_tensor, Equivalent to np.max @end_compatibility """ + if keep_dims is not None: + if keepdims is not None: + raise ValueError("Cannot specify both 'keep_dims' and 'keepdims'") + keepdims = keep_dims + if keepdims is None: + keepdims = False return gen_math_ops._max( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name) +@deprecated_args(None, "keep_dims is deprecated, use keepdims instead", + "keep_dims") def reduce_all(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the "logical and" of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1561,7 +1631,7 @@ def reduce_all(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. @@ -1572,23 +1642,32 @@ def reduce_all(input_tensor, Equivalent to np.all @end_compatibility """ + if keep_dims is not None: + if keepdims is not None: + raise ValueError("Cannot specify both 'keep_dims' and 'keepdims'") + keepdims = keep_dims + if keepdims is None: + keepdims = False return gen_math_ops._all( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name) +@deprecated_args(None, "keep_dims is deprecated, use keepdims instead", + "keep_dims") def reduce_any(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the "logical or" of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1608,7 +1687,7 @@ def reduce_any(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. @@ -1619,23 +1698,32 @@ def reduce_any(input_tensor, Equivalent to np.any @end_compatibility """ + if keep_dims is not None: + if keepdims is not None: + raise ValueError("Cannot specify both 'keep_dims' and 'keepdims'") + keepdims = keep_dims + if keepdims is None: + keepdims = False return gen_math_ops._any( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name) +@deprecated_args(None, "keep_dims is deprecated, use keepdims instead", + "keep_dims") def reduce_logsumexp(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes log(sum(exp(elements across dimensions of a tensor))). Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1652,7 +1740,7 @@ def reduce_logsumexp(input_tensor, tf.reduce_logsumexp(x) # log(6) tf.reduce_logsumexp(x, 0) # [log(2), log(2), log(2)] tf.reduce_logsumexp(x, 1) # [log(3), log(3)] - tf.reduce_logsumexp(x, 1, keep_dims=True) # [[log(3)], [log(3)]] + tf.reduce_logsumexp(x, 1, keepdims=True) # [[log(3)], [log(3)]] tf.reduce_logsumexp(x, [0, 1]) # log(6) ``` @@ -1661,19 +1749,25 @@ def reduce_logsumexp(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. Returns: The reduced tensor. """ + if keep_dims is not None: + if keepdims is not None: + raise ValueError("Cannot specify both 'keep_dims' and 'keepdims'") + keepdims = keep_dims + if keepdims is None: + keepdims = False with ops.name_scope(name, "ReduceLogSumExp", [input_tensor]) as name: raw_max = reduce_max( input_tensor, axis=axis, reduction_indices=reduction_indices, - keep_dims=True) + keepdims=True) my_max = array_ops.stop_gradient( array_ops.where( gen_math_ops.is_finite(raw_max), @@ -1683,9 +1777,9 @@ def reduce_logsumexp(input_tensor, reduce_sum( gen_math_ops.exp(input_tensor - my_max), axis, - keep_dims=True, + keepdims=True, reduction_indices=reduction_indices)) + my_max - if not keep_dims: + if not keepdims: if isinstance(axis, int): axis = [axis] result = array_ops.squeeze(result, axis) @@ -2191,8 +2285,10 @@ def bincount(arr, maxlength = ops.convert_to_tensor( maxlength, name="maxlength", dtype=dtypes.int32) output_size = gen_math_ops.minimum(maxlength, output_size) - weights = (ops.convert_to_tensor(weights, name="weights") - if weights is not None else constant_op.constant([], dtype)) + if weights is not None: + weights = ops.convert_to_tensor(weights, name="weights") + return gen_math_ops.unsorted_segment_sum(weights, arr, output_size) + weights = constant_op.constant([], dtype) return gen_math_ops.bincount(arr, output_size, weights) @@ -2355,7 +2451,7 @@ def reduced_shape(input_shape, axes): input_shape: 1-D Tensor, the shape of the Tensor being reduced. axes: 1-D Tensor, the reduction axes. Returns: - A 1-D Tensor, the output shape as if keep_dims were set to True. + A 1-D Tensor, the output shape as if keepdims were set to True. """ # Example: # cast needed for SparseTensor reductions diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py index 67caf72621..870c4f4062 100644 --- a/tensorflow/python/ops/metrics_impl.py +++ b/tensorflow/python/ops/metrics_impl.py @@ -794,7 +794,7 @@ def mean_cosine_distance(labels, predictions, dim, weights=None, radial_diffs = math_ops.multiply(predictions, labels) radial_diffs = math_ops.reduce_sum(radial_diffs, reduction_indices=[dim,], - keep_dims=True) + keepdims=True) mean_distance, update_op = mean(radial_diffs, weights, None, None, diff --git a/tensorflow/python/ops/nn.py b/tensorflow/python/ops/nn.py index ee1a00623a..79af3ac117 100644 --- a/tensorflow/python/ops/nn.py +++ b/tensorflow/python/ops/nn.py @@ -74,7 +74,6 @@ See the @{$python/nn} guide. @@softmax @@log_softmax @@softmax_cross_entropy_with_logits -@@softmax_cross_entropy_with_logits_v2 @@sparse_softmax_cross_entropy_with_logits @@weighted_cross_entropy_with_logits @@embedding_lookup diff --git a/tensorflow/python/ops/nn_fused_batchnorm_test.py b/tensorflow/python/ops/nn_fused_batchnorm_test.py index 1fcd0384da..e72d34d1f7 100644 --- a/tensorflow/python/ops/nn_fused_batchnorm_test.py +++ b/tensorflow/python/ops/nn_fused_batchnorm_test.py @@ -335,22 +335,22 @@ class BatchNormalizationTest(test.TestCase): def testInference(self): x_shape = [1, 1, 6, 1] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_inference( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC') self._test_inference( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW') - self._test_inference( - x_shape, np.float32, [1], np.float32, use_gpu=False, data_format='NHWC') + self._test_inference( + x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC') x_shape = [1, 1, 6, 2] if test.is_gpu_available(cuda_only=True): for dtype in [np.float16, np.float32]: self._test_inference( x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC') - self._test_inference( - x_shape, np.float32, [2], np.float32, use_gpu=False, data_format='NHWC') + self._test_inference( + x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC') x_shape = [1, 2, 1, 6] if test.is_gpu_available(cuda_only=True): @@ -359,33 +359,33 @@ class BatchNormalizationTest(test.TestCase): x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW') x_shape = [27, 131, 127, 6] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_inference( x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW') self._test_inference( x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC') - self._test_inference( - x_shape, np.float32, [6], np.float32, use_gpu=False, data_format='NHWC') + self._test_inference( + x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC') def testTraining(self): x_shape = [1, 1, 6, 1] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_training( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC') self._test_training( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW') - self._test_training( - x_shape, np.float32, [1], np.float32, use_gpu=False, data_format='NHWC') + self._test_training( + x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC') x_shape = [1, 1, 6, 2] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_training( x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC') - self._test_training( - x_shape, np.float32, [2], np.float32, use_gpu=False, data_format='NHWC') + self._test_training( + x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC') x_shape = [1, 2, 1, 6] if test.is_gpu_available(cuda_only=True): @@ -394,20 +394,20 @@ class BatchNormalizationTest(test.TestCase): x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW') x_shape = [27, 131, 127, 6] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_training( x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW') self._test_training( x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC') - self._test_training( - x_shape, np.float32, [6], np.float32, use_gpu=False, data_format='NHWC') + self._test_training( + x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC') def testBatchNormGrad(self): for is_training in [True, False]: x_shape = [1, 1, 6, 1] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_gradient( x_shape, dtype, [1], @@ -422,17 +422,17 @@ class BatchNormalizationTest(test.TestCase): use_gpu=True, data_format='NCHW', is_training=is_training) - self._test_gradient( - x_shape, - np.float32, [1], - np.float32, - use_gpu=False, - data_format='NHWC', - is_training=is_training) + self._test_gradient( + x_shape, + dtype, [1], + np.float32, + use_gpu=False, + data_format='NHWC', + is_training=is_training) x_shape = [1, 1, 6, 2] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_gradient( x_shape, dtype, [2], @@ -440,13 +440,13 @@ class BatchNormalizationTest(test.TestCase): use_gpu=True, data_format='NHWC', is_training=is_training) - self._test_gradient( - x_shape, - np.float32, [2], - np.float32, - use_gpu=False, - data_format='NHWC', - is_training=is_training) + self._test_gradient( + x_shape, + dtype, [2], + np.float32, + use_gpu=False, + data_format='NHWC', + is_training=is_training) x_shape = [1, 2, 1, 6] if test.is_gpu_available(cuda_only=True): @@ -460,8 +460,8 @@ class BatchNormalizationTest(test.TestCase): is_training=is_training) x_shape = [5, 7, 11, 4] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_gradient( x_shape, dtype, [7], @@ -476,13 +476,13 @@ class BatchNormalizationTest(test.TestCase): use_gpu=True, data_format='NHWC', is_training=is_training) - self._test_gradient( - x_shape, - np.float32, [4], - np.float32, - use_gpu=False, - data_format='NHWC', - is_training=is_training) + self._test_gradient( + x_shape, + dtype, [4], + np.float32, + use_gpu=False, + data_format='NHWC', + is_training=is_training) def _testBatchNormGradGrad(self, config): shape = config['shape'] @@ -506,15 +506,14 @@ class BatchNormalizationTest(test.TestCase): data_format='NCHW', is_training=is_training, err_tolerance=err_tolerance) - if dtype != np.float16: - self._test_grad_grad( - shape, - np.float32, [shape[3]], - np.float32, - use_gpu=False, - data_format='NHWC', - is_training=is_training, - err_tolerance=err_tolerance) + self._test_grad_grad( + shape, + dtype, [shape[3]], + np.float32, + use_gpu=False, + data_format='NHWC', + is_training=is_training, + err_tolerance=err_tolerance) def testBatchNormGradGrad(self): configs = [{ @@ -525,6 +524,10 @@ class BatchNormalizationTest(test.TestCase): 'shape': [2, 3, 2, 2], 'err_tolerance': 1e-3, 'dtype': np.float32, + }, { + 'shape': [2, 3, 4, 5], + 'err_tolerance': 1e-2, + 'dtype': np.float16, }, { 'shape': [2, 3, 2, 2], 'err_tolerance': 2e-3, diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index 4b406ba840..557f39fb42 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -420,6 +420,7 @@ def _SoftmaxCrossEntropyWithLogitsGrad(op, grad_loss, grad_grad): # grad_loss is the backprop for cost, and we multiply it with the gradients # (which is output[1]) # grad_grad is the backprop for softmax gradient. + # There is no gradient for the labels # # Second derivative is just softmax derivative w.r.t. logits. softmax_grad = op.outputs[1] @@ -435,15 +436,15 @@ def _SoftmaxCrossEntropyWithLogitsGrad(op, grad_loss, grad_grad): const_fill_value = tensor_util.constant_value(g) return const_fill_value is not None and (const_fill_value == 0).all() - logits = op.inputs[0] if grad_grad is not None and not IsZero(grad_grad): + logits = op.inputs[0] softmax = nn_ops.softmax(logits) grad += ((grad_grad - array_ops.squeeze( math_ops.matmul(grad_grad[:, None, :], softmax[:, :, None]), axis=1)) * softmax) - return grad, _BroadcastMul(grad_loss, -nn_ops.log_softmax(logits)) + return grad, None @ops.RegisterGradient("SparseSoftmaxCrossEntropyWithLogits") diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 431ea1186a..7297d2f349 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -32,6 +32,8 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variables +from tensorflow.python.util.deprecation import deprecated_args +from tensorflow.python.util.deprecation import deprecated_argument_lookup def log_poisson_loss(targets, log_input, compute_full_loss=False, name=None): @@ -275,6 +277,9 @@ def _swish_shape(op): return [op.inputs[0].shape] +# Set noinline=True so that sigmoid(features) is re-computed during +# backprop, and we can free the sigmoid(features) expression immediately +# after use during the forward pass. @function.Defun(shape_func=_swish_shape, func_name="swish_grad", noinline=True) def _swish_grad(features, grad): """Gradient of Swish function defined below.""" @@ -284,11 +289,6 @@ def _swish_grad(features, grad): return grad * activation_grad -# Naively, x * tf.nn.sigmoid(x) requires keeping both x and sigmoid(x) around -# for backprop, effectively doubling the tensor's memory consumption. We use a -# @Defun decorator with noinline=True so that sigmoid(features) is re-computed -# during backprop, and we can free the sigmoid(features) expression immediately -# after use during the forward pass. @function.Defun( grad_func=_swish_grad, shape_func=_swish_shape, @@ -298,7 +298,7 @@ def swish(features): # pylint: disable=g-doc-args """Computes the Swish activation function: `x * sigmoid(x)`. - Source: "Searching for Activation Functions" (Ramachandran et al. 2017) + Source: "Swish: a Self-Gated Activation Function" (Ramachandran et al. 2017) https://arxiv.org/abs/1710.05941 Args: @@ -313,19 +313,20 @@ def swish(features): return features * math_ops.sigmoid(features) -def l2_normalize(x, dim, epsilon=1e-12, name=None): - """Normalizes along dimension `dim` using an L2 norm. +@deprecated_args(None, "dim is deprecated, use axis instead", "dim") +def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None): + """Normalizes along dimension `axis` using an L2 norm. - For a 1-D tensor with `dim = 0`, computes + For a 1-D tensor with `axis = 0`, computes output = x / sqrt(max(sum(x**2), epsilon)) For `x` with more dimensions, independently normalizes each 1-D slice along - dimension `dim`. + dimension `axis`. Args: x: A `Tensor`. - dim: Dimension along which to normalize. A scalar or a vector of + axis: Dimension along which to normalize. A scalar or a vector of integers. epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the divisor if `norm < sqrt(epsilon)`. @@ -335,8 +336,9 @@ def l2_normalize(x, dim, epsilon=1e-12, name=None): A `Tensor` with the same shape as `x`. """ with ops.name_scope(name, "l2_normalize", [x]) as name: + axis = deprecated_argument_lookup("axis", axis, "dim", dim) x = ops.convert_to_tensor(x, name="x") - square_sum = math_ops.reduce_sum(math_ops.square(x), dim, keep_dims=True) + square_sum = math_ops.reduce_sum(math_ops.square(x), axis, keep_dims=True) x_inv_norm = math_ops.rsqrt(math_ops.maximum(square_sum, epsilon)) return math_ops.multiply(x, x_inv_norm, name=name) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index bdaac65904..c4de2c7f00 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -23,6 +23,7 @@ import numbers import numpy as np from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import graph_util from tensorflow.python.framework import ops @@ -32,13 +33,13 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops - # go/tf-wildcard-import # pylint: disable=wildcard-import from tensorflow.python.ops.gen_nn_ops import * # pylint: enable=wildcard-import +from tensorflow.python.util.deprecation import deprecated_args +from tensorflow.python.util.deprecation import deprecated_argument_lookup -from tensorflow.python.util import deprecation # Aliases for some automatically-generated names. local_response_normalization = gen_nn_ops.lrn @@ -1645,17 +1646,18 @@ def _softmax(logits, compute_op, dim=-1, name=None): return output -def softmax(logits, dim=-1, name=None): +@deprecated_args(None, "dim is deprecated, use axis instead", "dim") +def softmax(logits, axis=None, name=None, dim=None): """Computes softmax activations. This function performs the equivalent of - softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), dim) + softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis) Args: logits: A non-empty `Tensor`. Must be one of the following types: `half`, `float32`, `float64`. - dim: The dimension softmax would be performed on. The default is -1 which + axis: The dimension softmax would be performed on. The default is -1 which indicates the last dimension. name: A name for the operation (optional). @@ -1663,23 +1665,27 @@ def softmax(logits, dim=-1, name=None): A `Tensor`. Has the same type and shape as `logits`. Raises: - InvalidArgumentError: if `logits` is empty or `dim` is beyond the last + InvalidArgumentError: if `logits` is empty or `axis` is beyond the last dimension of `logits`. """ - return _softmax(logits, gen_nn_ops._softmax, dim, name) + axis = deprecated_argument_lookup("axis", axis, "dim", dim) + if axis is None: + axis = -1 + return _softmax(logits, gen_nn_ops._softmax, axis, name) -def log_softmax(logits, dim=-1, name=None): +@deprecated_args(None, "dim is deprecated, use axis instead", "dim") +def log_softmax(logits, axis=None, name=None, dim=None): """Computes log softmax activations. For each batch `i` and class `j` we have - logsoftmax = logits - log(reduce_sum(exp(logits), dim)) + logsoftmax = logits - log(reduce_sum(exp(logits), axis)) Args: logits: A non-empty `Tensor`. Must be one of the following types: `half`, `float32`, `float64`. - dim: The dimension softmax would be performed on. The default is -1 which + axis: The dimension softmax would be performed on. The default is -1 which indicates the last dimension. name: A name for the operation (optional). @@ -1687,10 +1693,13 @@ def log_softmax(logits, dim=-1, name=None): A `Tensor`. Has the same type as `logits`. Same shape as `logits`. Raises: - InvalidArgumentError: if `logits` is empty or `dim` is beyond the last + InvalidArgumentError: if `logits` is empty or `axis` is beyond the last dimension of `logits`. """ - return _softmax(logits, gen_nn_ops._log_softmax, dim, name) + axis = deprecated_argument_lookup("axis", axis, "dim", dim) + if axis is None: + axis = -1 + return _softmax(logits, gen_nn_ops._log_softmax, axis, name) def _ensure_xent_args(name, sentinel, labels, logits): @@ -1702,9 +1711,9 @@ def _ensure_xent_args(name, sentinel, labels, logits): raise ValueError("Both labels and logits must be provided.") -def softmax_cross_entropy_with_logits_v2(_sentinel=None, # pylint: disable=invalid-name - labels=None, logits=None, - dim=-1, name=None): +def softmax_cross_entropy_with_logits(_sentinel=None, # pylint: disable=invalid-name + labels=None, logits=None, + dim=-1, name=None): """Computes softmax cross entropy between `logits` and `labels`. Measures the probability error in discrete classification tasks in which the @@ -1728,10 +1737,6 @@ def softmax_cross_entropy_with_logits_v2(_sentinel=None, # pylint: disable=inva `[batch_size, num_classes]` and the same dtype (either `float16`, `float32`, or `float64`). - Backpropagation will happen into both `logits` and `labels`. To disallow - backpropagation into `labels`, pass label tensors through a `stop_gradients` - before feeding it to this function. - **Note that to avoid confusion, it is required to pass only named arguments to this function.** @@ -1753,123 +1758,57 @@ def softmax_cross_entropy_with_logits_v2(_sentinel=None, # pylint: disable=inva # could break users who call this with bad labels, but disregard the bad # results. - with ops.name_scope( - name, "softmax_cross_entropy_with_logits", [logits, labels]) as name: - logits = ops.convert_to_tensor(logits, name="logits") - labels = ops.convert_to_tensor(labels, name="labels") - precise_logits = math_ops.cast(logits, dtypes.float32) if ( - logits.dtype == dtypes.float16) else logits - # labels and logits must be of the same type - labels = math_ops.cast(labels, precise_logits.dtype) - input_rank = array_ops.rank(precise_logits) - # For shape inference. - shape = logits.get_shape() - - # Move the dim to the end if dim is not the last dimension. - if dim is not -1: - def _move_dim_to_end(tensor, dim_index, rank): - return array_ops.transpose(tensor, - array_ops.concat([ - math_ops.range(dim_index), - math_ops.range(dim_index + 1, rank), - [dim_index] - ], 0)) - - precise_logits = _move_dim_to_end(precise_logits, dim, input_rank) - labels = _move_dim_to_end(labels, dim, input_rank) - - input_shape = array_ops.shape(precise_logits) - - # Make precise_logits and labels into matrices. - precise_logits = _flatten_outer_dims(precise_logits) - labels = _flatten_outer_dims(labels) - - # Do the actual op computation. - # The second output tensor contains the gradients. We use it in - # _CrossEntropyGrad() in nn_grad but not here. - cost, unused_backprop = gen_nn_ops._softmax_cross_entropy_with_logits( - precise_logits, labels, name=name) - - # The output cost shape should be the input minus dim. - output_shape = array_ops.slice(input_shape, [0], - [math_ops.subtract(input_rank, 1)]) - cost = array_ops.reshape(cost, output_shape) - - # Make shape inference work since reshape and transpose may erase its static - # shape. - if context.in_graph_mode() and shape is not None and shape.dims is not None: - shape = shape.as_list() - del shape[dim] - cost.set_shape(shape) - - if logits.dtype == dtypes.float16: - return math_ops.cast(cost, dtypes.float16) - else: - return cost - - -_XENT_DEPRECATION = """ -Future major versions of TensorFlow will allow gradients to flow -into the labels input on backprop by default. - -See tf.nn.softmax_cross_entropy_with_logits_v2. -""" - - -@deprecation.deprecated(date=None, instructions=_XENT_DEPRECATION) -def softmax_cross_entropy_with_logits(_sentinel=None, # pylint: disable=invalid-name - labels=None, logits=None, - dim=-1, name=None): - """Computes softmax cross entropy between `logits` and `labels`. - - Measures the probability error in discrete classification tasks in which the - classes are mutually exclusive (each entry is in exactly one class). For - example, each CIFAR-10 image is labeled with one and only one label: an image - can be a dog or a truck, but not both. - - **NOTE:** While the classes are mutually exclusive, their probabilities - need not be. All that is required is that each row of `labels` is - a valid probability distribution. If they are not, the computation of the - gradient will be incorrect. + logits = ops.convert_to_tensor(logits) + labels = ops.convert_to_tensor(labels) + precise_logits = math_ops.cast(logits, dtypes.float32) if ( + logits.dtype == dtypes.float16) else logits + # labels and logits must be of the same type + labels = math_ops.cast(labels, precise_logits.dtype) + input_rank = array_ops.rank(precise_logits) + # For shape inference. + shape = logits.get_shape() - If using exclusive `labels` (wherein one and only - one class is true at a time), see `sparse_softmax_cross_entropy_with_logits`. + # Move the dim to the end if dim is not the last dimension. + if dim is not -1: + def _move_dim_to_end(tensor, dim_index, rank): + return array_ops.transpose(tensor, + array_ops.concat([ + math_ops.range(dim_index), + math_ops.range(dim_index + 1, rank), + [dim_index] + ], 0)) - **WARNING:** This op expects unscaled logits, since it performs a `softmax` - on `logits` internally for efficiency. Do not call this op with the - output of `softmax`, as it will produce incorrect results. + precise_logits = _move_dim_to_end(precise_logits, dim, input_rank) + labels = _move_dim_to_end(labels, dim, input_rank) - `logits` and `labels` must have the same shape, e.g. - `[batch_size, num_classes]` and the same dtype (either `float16`, `float32`, - or `float64`). + input_shape = array_ops.shape(precise_logits) - Backpropagation will happen only into `logits`. To calculate a cross entropy - loss that allows backpropagation into both `logits` and `labels`, see - @{tf.nn.softmax_cross_entropy_with_logits_v2}. + # Make precise_logits and labels into matrices. + precise_logits = _flatten_outer_dims(precise_logits) + labels = _flatten_outer_dims(labels) - **Note that to avoid confusion, it is required to pass only named arguments to - this function.** + # Do the actual op computation. + # The second output tensor contains the gradients. We use it in + # _CrossEntropyGrad() in nn_grad but not here. + cost, unused_backprop = gen_nn_ops._softmax_cross_entropy_with_logits( + precise_logits, labels, name=name) - Args: - _sentinel: Used to prevent positional parameters. Internal, do not use. - labels: Each row `labels[i]` must be a valid probability distribution. - logits: Unscaled log probabilities. - dim: The class dimension. Defaulted to -1 which is the last dimension. - name: A name for the operation (optional). + # The output cost shape should be the input minus dim. + output_shape = array_ops.slice(input_shape, [0], + [math_ops.subtract(input_rank, 1)]) + cost = array_ops.reshape(cost, output_shape) - Returns: - A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the - softmax cross entropy loss. - """ - _ensure_xent_args("softmax_cross_entropy_with_logits", _sentinel, - labels, logits) - - with ops.name_scope( - name, "softmax_cross_entropy_with_logits_sg", [logits, labels]) as name: - labels = array_ops.stop_gradient(labels, name="labels_stop_gradient") + # Make shape inference work since reshape and transpose may erase its static + # shape. + if context.in_graph_mode() and shape is not None and shape.dims is not None: + shape = shape.as_list() + del shape[dim] + cost.set_shape(shape) - return softmax_cross_entropy_with_logits_v2( - labels=labels, logits=logits, dim=dim, name=name) + if logits.dtype == dtypes.float16: + return math_ops.cast(cost, dtypes.float16) + else: + return cost def sparse_softmax_cross_entropy_with_logits(_sentinel=None, # pylint: disable=invalid-name @@ -2305,6 +2244,100 @@ def conv1d(value, filters, stride, padding, return array_ops.squeeze(result, [spatial_start_dim]) +def conv1d_transpose(value, + filter, + output_shape, + stride, + padding="SAME", + data_format="NWC", + name=None): + """The transpose of `conv1d`. + + This operation is sometimes called "deconvolution" after [Deconvolutional + Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is + actually the transpose (gradient) of `conv1d` rather than an actual + deconvolution. + + Args: + value: A 3-D `Tensor` of type `float` and shape + `[batch, in_width, in_channels]` for `NWC` data format or + `[batch, in_channels, in_width]` for `NCW` data format. + filter: A 3-D `Tensor` with the same type as `value` and shape + `[filter_width, output_channels, in_channels]`. `filter`'s + `in_channels` dimension must match that of `value`. + output_shape: A 1-D `Tensor` representing the output shape of the + deconvolution op. + stride: An `integer`. The number of entries by which + the filter is moved right at each step. + padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. + See the @{tf.nn.convolution$comment here} + data_format: A string. 'NHWC' and 'NCHW' are supported. + name: Optional name for the returned tensor. + + Returns: + A `Tensor` with the same type as `value`. + + Raises: + ValueError: If input/output depth does not match `filter`'s shape, or if + padding is other than `'VALID'` or `'SAME'`. + """ + with ops.name_scope(name, "conv1d_transpose", + [value, filter, output_shape]) as name: + output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape") + if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(3)): + raise ValueError("output_shape must have shape (3,), got {}" + .format(output_shape_.get_shape())) + + # The format could be either NWC or NCW, map to NHWC or NCHW + if data_format is None or data_format == "NWC": + data_format_2d = "NHWC" + axis = 2 + elif data_format == "NCW": + data_format_2d = "NCHW" + axis = 1 + else: + raise ValueError("data_format must be \"NWC\" or \"NCW\".") + + if not value.get_shape()[axis].is_compatible_with(filter.get_shape()[2]): + raise ValueError("input channels does not match filter's input channels, " + "{} != {}".format(value.get_shape()[axis], + filter.get_shape()[2])) + + if isinstance(output_shape, (list, np.ndarray)): + # output_shape's shape should be == [3] if reached this point. + if not filter.get_shape()[1].is_compatible_with(output_shape[axis]): + raise ValueError( + "output_shape does not match filter's output channels, " + "{} != {}".format(output_shape[axis], filter.get_shape()[1])) + + if padding != "VALID" and padding != "SAME": + raise ValueError("padding must be either VALID or SAME:" + " {}".format(padding)) + + # Reshape the input tensor to [batch, 1, in_width, in_channels] + if data_format_2d == "NHWC": + output_shape_ = array_ops.concat([output_shape_[:1], [1], + output_shape_[1:]], axis=0) + spatial_start_dim = 1 + strides = [1, 1, stride, 1] + else: + output_shape_ = array_ops.concat([output_shape_[:2], [1], + output_shape_[2:]], axis=0) + spatial_start_dim = 2 + strides = [1, 1, 1, stride] + value = array_ops.expand_dims(value, spatial_start_dim) + filter = array_ops.expand_dims(filter, 0) + + result = gen_nn_ops.conv2d_backprop_input(input_sizes=output_shape_, + filter=filter, + out_backprop=value, + strides=strides, + padding=padding, + data_format=data_format_2d, + name=name) + return array_ops.squeeze(result, [spatial_start_dim]) + + @ops.RegisterStatistics("Dilation2D", "flops") def _calc_dilation2d_flops(graph, node): """Calculates the compute resources needed for Dilation2D.""" diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index 9a0ff75594..92fa928eed 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -1227,11 +1227,6 @@ class EagerVariableStore(object): def variables(self): return self._store._vars.values() # pylint: disable=protected-access - def trainable_variables(self): - # pylint: disable=protected-access - return [x for x in self._store._vars.values() if x._trainable] - # pylint: enable=protected-access - def get_variable(name, shape=None, diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index f906b7b3c4..eab7c3828f 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -1063,13 +1063,13 @@ class Variable(object): class PartitionedVariable(object): """A container for partitioned `Variable` objects. - @compatiblity(eager) `tf.PartitionedVariable` is not compatible with + @compatibility(eager) `tf.PartitionedVariable` is not compatible with eager execution. Use `tfe.Variable` instead which is compatable with both eager execution and graph construction. See [the TensorFlow Eager Execution guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/g3doc/guide.md#variables-and-optimizers) for details on how variables work in eager execution. - @end_compatiblity + @end_compatibility """ class PartitionedVariableIterator(object): diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i index cbacf458a0..637f738fed 100644 --- a/tensorflow/python/pywrap_tfe.i +++ b/tensorflow/python/pywrap_tfe.i @@ -29,7 +29,7 @@ limitations under the License. %rename("%s") TFE_Py_TapeWatch; %rename("%s") TFE_Py_TapeDeleteTrace; %rename("%s") TFE_Py_TapeRecordOperation; -%rename("%s") TFE_Py_TapeGradient; +%rename("%s") TFE_Py_TapeExport; %rename("%s") TFE_NewContextOptions; %rename("%s") TFE_ContextOptionsSetConfig; %rename("%s") TFE_ContextOptionsSetDevicePlacementPolicy; @@ -125,7 +125,7 @@ limitations under the License. SWIG_fail; } if (EagerTensor_CheckExact(elem)) { - (*$1)[i] = EagerTensor_Handle(elem); + (*$1)[i] = EagerTensorHandle(elem); } else { SWIG_exception_fail(SWIG_TypeError, "provided list of inputs contains objects other " diff --git a/tensorflow/python/tools/import_pb_to_tensorboard.py b/tensorflow/python/tools/import_pb_to_tensorboard.py old mode 100644 new mode 100755 diff --git a/tensorflow/python/tools/inspect_checkpoint.py b/tensorflow/python/tools/inspect_checkpoint.py index 8716058e61..47a74e5abf 100644 --- a/tensorflow/python/tools/inspect_checkpoint.py +++ b/tensorflow/python/tools/inspect_checkpoint.py @@ -29,8 +29,7 @@ from tensorflow.python.platform import flags FLAGS = None -def print_tensors_in_checkpoint_file(file_name, tensor_name, all_tensors, - all_tensor_names): +def print_tensors_in_checkpoint_file(file_name, tensor_name, all_tensors): """Prints tensors in a checkpoint file. If no `tensor_name` is provided, prints the tensor names and shapes @@ -42,16 +41,14 @@ def print_tensors_in_checkpoint_file(file_name, tensor_name, all_tensors, file_name: Name of the checkpoint file. tensor_name: Name of the tensor in the checkpoint file to print. all_tensors: Boolean indicating whether to print all tensors. - all_tensor_names: Boolean indicating whether to print all tensor names. """ try: reader = pywrap_tensorflow.NewCheckpointReader(file_name) - if all_tensors or all_tensor_names: + if all_tensors: var_to_shape_map = reader.get_variable_to_shape_map() for key in sorted(var_to_shape_map): print("tensor_name: ", key) - if all_tensors: - print(reader.get_tensor(key)) + print(reader.get_tensor(key)) elif not tensor_name: print(reader.debug_string().decode("utf-8")) else: @@ -107,14 +104,11 @@ def parse_numpy_printoption(kv_str): def main(unused_argv): if not FLAGS.file_name: print("Usage: inspect_checkpoint --file_name=checkpoint_file_name " - "[--tensor_name=tensor_to_print] " - "[--all_tensors] " - "[--all_tensor_names] " - "[--printoptions]") + "[--tensor_name=tensor_to_print]") sys.exit(1) else: print_tensors_in_checkpoint_file(FLAGS.file_name, FLAGS.tensor_name, - FLAGS.all_tensors, FLAGS.all_tensor_names) + FLAGS.all_tensors) if __name__ == "__main__": @@ -136,13 +130,6 @@ if __name__ == "__main__": type="bool", default=False, help="If True, print the values of all the tensors.") - parser.add_argument( - "--all_tensor_names", - nargs="?", - const=True, - type="bool", - default=False, - help="If True, print the names of all the tensors.") parser.add_argument( "--printoptions", nargs="*", diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py index 1f6016a91b..af9f11bb07 100644 --- a/tensorflow/python/training/monitored_session.py +++ b/tensorflow/python/training/monitored_session.py @@ -536,7 +536,6 @@ class _MonitoredSession(object): will return True. Example usage: - ```python with tf.Graph().as_default(): c = tf.placeholder(dtypes.float32) @@ -553,7 +552,6 @@ class _MonitoredSession(object): while not session.should_stop(): a = session.run_step_fn(step_fn) ``` - Hooks interact with the `run_with_hooks()` call inside the `step_fn` as they do with a `MonitoredSession.run` call. diff --git a/tensorflow/python/util/tf_should_use.py b/tensorflow/python/util/tf_should_use.py index 37733152e8..a576547d5f 100644 --- a/tensorflow/python/util/tf_should_use.py +++ b/tensorflow/python/util/tf_should_use.py @@ -44,7 +44,7 @@ def _add_should_use_warning(x, fatal_error=False): and is a very shallow wrapper for `x` which logs access into `x`. """ del fatal_error - if x is None or x == []: # pylint: disable=g-explicit-bool-comparison + if x is None: # special corner case where x is None return x if context.in_eager_mode(): diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index ad8164c7f9..2094061b44 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -232,7 +232,6 @@ CUDNN_DNN_ROUTINE_EACH_R3(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) __macro(cudnnRNNBackwardData) \ __macro(cudnnRNNBackwardWeights) \ __macro(cudnnSetRNNDescriptor) \ - __macro(cudnnSetRNNDescriptor_v6) \ __macro(cudnnGetFilterNdDescriptor) // clang-format on @@ -245,7 +244,8 @@ CUDNN_DNN_ROUTINE_EACH_R5(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) // clang-format off #if CUDNN_VERSION >= 6000 #define CUDNN_DNN_ROUTINE_EACH_R6(__macro) \ - __macro(cudnnConvolutionBiasActivationForward) + __macro(cudnnConvolutionBiasActivationForward) \ + __macro(cudnnSetRNNDescriptor_v6) // clang-format on CUDNN_DNN_ROUTINE_EACH_R6(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) @@ -390,8 +390,8 @@ port::Status CudnnSupport::Init() { << DriverVersionStatusToString(result); } else { const auto& version = result.ValueOrDie(); - LOG(ERROR) << "possibly insufficient driver version: " - << DriverVersionToString(version); + LOG(INFO) << "possibly insufficient driver version: " + << DriverVersionToString(version); // OS X kernel driver does not report version accurately #if !defined(__APPLE__) if (std::get<0>(version) < 340) { @@ -665,7 +665,6 @@ class ScopedPoolingDescriptor { LOG(FATAL) << "could not create cudnn pooling descriptor: " << ToString(status); } - const std::vector strides64 = pooling_descriptor.strides(); const std::vector padding64 = pooling_descriptor.padding(); const std::vector shape64 = pooling_descriptor.window(); @@ -680,14 +679,14 @@ class ScopedPoolingDescriptor { &CheckedNarrowing); std::transform(shape64.cbegin(), shape64.cend(), shape.begin(), &CheckedNarrowing); + bool propagate_nans = pooling_descriptor.propagate_nans(); status = wrap::cudnnSetPoolingNdDescriptor( parent_, handle_, (pooling_descriptor.mode() == dnn::PoolingMode::kMaximum ? CUDNN_POOLING_MAX : CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING), #if CUDNN_VERSION >= 5000 - // Always propagate nans. - CUDNN_PROPAGATE_NAN, + propagate_nans ? CUDNN_PROPAGATE_NAN : CUDNN_NOT_PROPAGATE_NAN, #endif nd, shape.data(), padding.data(), strides.data()); if (status != CUDNN_STATUS_SUCCESS) { diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc index 07fe8a85f4..29fd6d0e87 100644 --- a/tensorflow/stream_executor/dnn.cc +++ b/tensorflow/stream_executor/dnn.cc @@ -472,7 +472,8 @@ PoolingDescriptor::PoolingDescriptor(int ndims) ndims_(ndims), window_(ndims, 0), padding_(ndims, 0), - strides_(ndims, 1) {} + strides_(ndims, 1), + propagate_nans_(false) {} PoolingDescriptor::PoolingDescriptor() : PoolingDescriptor(/*ndims=*/2) {} @@ -482,6 +483,7 @@ void PoolingDescriptor::CloneFrom(const PoolingDescriptor& other) { window_ = other.window_; padding_ = other.padding_; strides_ = other.strides_; + propagate_nans_ = other.propagate_nans_; } string PoolingDescriptor::ToString() const { @@ -495,9 +497,12 @@ string PoolingDescriptor::ToString() const { port::Appendf(&padding, "%lld", padding_[i]); } - return port::Printf("{mode: %s window: %s strides: %s padding: %s}", - mode_string, window.c_str(), strides.c_str(), - padding.c_str()); + const char* propagate_string = propagate_nans_ ? "Yes" : "No"; + + return port::Printf( + "{mode: %s window: %s strides: %s padding: %s propagate NaNs: %s}", + mode_string, window.c_str(), strides.c_str(), padding.c_str(), + propagate_string); } string PoolingDescriptor::ToShortString() const { @@ -508,7 +513,8 @@ string PoolingDescriptor::ToShortString() const { port::Appendf(&padding, "_p%d:%lld", i, padding_[i]); } return port::StrCat(mode_ == dnn::PoolingMode::kMaximum ? "max" : "avg", - window, strides, padding); + window, strides, padding, + propagate_nans_ ? "propagate_nans" : "ignore_nans"); } // -- NormalizeDescriptor diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index 49235167ab..0d2cd4a9f2 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -661,6 +661,10 @@ class PoolingDescriptor { SetDim(&strides_, dim, value); return *this; } + PoolingDescriptor& set_propagate_nans(bool value) { + propagate_nans_ = value; + return *this; + } int ndims() const { return ndims_; } void CloneFrom(const PoolingDescriptor& other); @@ -681,10 +685,12 @@ class PoolingDescriptor { std::vector window() const { return window_; } std::vector padding() const { return padding_; } std::vector strides() const { return strides_; } + bool propagate_nans() const { return propagate_nans_; } private: PoolingMode mode_; int ndims_; + bool propagate_nans_; // Stored as: ..., y, x. std::vector window_; diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 43ecb7f937..16c3386e15 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -172,8 +172,8 @@ def tf_copts(): "-DEIGEN_AVOID_STL_ARRAY", "-Iexternal/gemmlowp", "-Wno-sign-compare", - "-fno-exceptions", "-ftemplate-depth=900", + "-fno-exceptions", ]) + if_cuda(["-DGOOGLE_CUDA=1"]) + if_mkl(["-DINTEL_MKL=1", "-fopenmp",]) + if_android_arm( ["-mfpu=neon"]) + if_linux_x86_64(["-msse3"]) + select({ clean_dep("//tensorflow:android"): [ diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-classifier.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-classifier.pbtxt deleted file mode 100644 index f5ed263f0e..0000000000 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-classifier.pbtxt +++ /dev/null @@ -1,54 +0,0 @@ -path: "tensorflow.estimator.BaselineClassifier" -tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - member { - name: "config" - mtype: "" - } - member { - name: "model_dir" - mtype: "" - } - member { - name: "model_fn" - mtype: "" - } - member { - name: "params" - mtype: "" - } - member_method { - name: "__init__" - argspec: "args=[\'self\', \'model_dir\', \'n_classes\', \'weight_column\', \'label_vocabulary\', \'optimizer\', \'config\'], varargs=None, keywords=None, defaults=[\'None\', \'2\', \'None\', \'None\', \'Ftrl\', \'None\'], " - } - member_method { - name: "evaluate" - argspec: "args=[\'self\', \'input_fn\', \'steps\', \'hooks\', \'checkpoint_path\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " - } - member_method { - name: "export_savedmodel" - argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], " - } - member_method { - name: "get_variable_names" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_variable_value" - argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "latest_checkpoint" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "predict" - argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } - member_method { - name: "train" - argspec: "args=[\'self\', \'input_fn\', \'hooks\', \'steps\', \'max_steps\', \'saving_listeners\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " - } -} diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-regressor.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-regressor.pbtxt deleted file mode 100644 index 61a29942c5..0000000000 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-regressor.pbtxt +++ /dev/null @@ -1,54 +0,0 @@ -path: "tensorflow.estimator.BaselineRegressor" -tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - member { - name: "config" - mtype: "" - } - member { - name: "model_dir" - mtype: "" - } - member { - name: "model_fn" - mtype: "" - } - member { - name: "params" - mtype: "" - } - member_method { - name: "__init__" - argspec: "args=[\'self\', \'model_dir\', \'label_dimension\', \'weight_column\', \'optimizer\', \'config\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'None\', \'Ftrl\', \'None\'], " - } - member_method { - name: "evaluate" - argspec: "args=[\'self\', \'input_fn\', \'steps\', \'hooks\', \'checkpoint_path\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " - } - member_method { - name: "export_savedmodel" - argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], " - } - member_method { - name: "get_variable_names" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_variable_value" - argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "latest_checkpoint" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "predict" - argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } - member_method { - name: "train" - argspec: "args=[\'self\', \'input_fn\', \'hooks\', \'steps\', \'max_steps\', \'saving_listeners\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " - } -} diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt index cdc367b99e..ef93a61bd8 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt @@ -1,13 +1,5 @@ path: "tensorflow.estimator" tf_module { - member { - name: "BaselineClassifier" - mtype: "" - } - member { - name: "BaselineRegressor" - mtype: "" - } member { name: "DNNClassifier" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt deleted file mode 100644 index 763184899c..0000000000 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt +++ /dev/null @@ -1,179 +0,0 @@ -path: "tensorflow.keras.layers.GRUCell" -tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - member { - name: "activity_regularizer" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "graph" - mtype: "" - } - member { - name: "inbound_nodes" - mtype: "" - } - member { - name: "input" - mtype: "" - } - member { - name: "input_mask" - mtype: "" - } - member { - name: "input_shape" - mtype: "" - } - member { - name: "losses" - mtype: "" - } - member { - name: "name" - mtype: "" - } - member { - name: "non_trainable_variables" - mtype: "" - } - member { - name: "non_trainable_weights" - mtype: "" - } - member { - name: "outbound_nodes" - mtype: "" - } - member { - name: "output" - mtype: "" - } - member { - name: "output_mask" - mtype: "" - } - member { - name: "output_shape" - mtype: "" - } - member { - name: "scope_name" - mtype: "" - } - member { - name: "trainable_variables" - mtype: "" - } - member { - name: "trainable_weights" - mtype: "" - } - member { - name: "updates" - mtype: "" - } - member { - name: "variables" - mtype: "" - } - member { - name: "weights" - mtype: "" - } - member_method { - name: "__init__" - argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\'], " - } - member_method { - name: "add_loss" - argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "add_update" - argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " - } - member_method { - name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " - } - member_method { - name: "apply" - argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" - } - member_method { - name: "build" - argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "call" - argspec: "args=[\'self\', \'inputs\', \'states\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "compute_mask" - argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "count_params" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "from_config" - argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_config" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_input_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_input_mask_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_input_shape_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_losses_for" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_output_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_output_mask_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_output_shape_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_updates_for" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_weights" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "set_weights" - argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" - } -} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt index 889f2cbc23..9237399254 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt @@ -1,34 +1,14 @@ path: "tensorflow.keras.layers.GRU" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - member { - name: "activation" - mtype: "" - } member { name: "activity_regularizer" mtype: "" } - member { - name: "bias_constraint" - mtype: "" - } - member { - name: "bias_initializer" - mtype: "" - } - member { - name: "bias_regularizer" - mtype: "" - } - member { - name: "dropout" - mtype: "" - } member { name: "dtype" mtype: "" @@ -37,10 +17,6 @@ tf_class { name: "graph" mtype: "" } - member { - name: "implementation" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -57,18 +33,6 @@ tf_class { name: "input_shape" mtype: "" } - member { - name: "kernel_constraint" - mtype: "" - } - member { - name: "kernel_initializer" - mtype: "" - } - member { - name: "kernel_regularizer" - mtype: "" - } member { name: "losses" mtype: "" @@ -101,34 +65,10 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "recurrent_activation" - mtype: "" - } - member { - name: "recurrent_constraint" - mtype: "" - } - member { - name: "recurrent_dropout" - mtype: "" - } - member { - name: "recurrent_initializer" - mtype: "" - } - member { - name: "recurrent_regularizer" - mtype: "" - } member { name: "scope_name" mtype: "" } - member { - name: "states" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -137,18 +77,10 @@ tf_class { name: "trainable_weights" mtype: "" } - member { - name: "units" - mtype: "" - } member { name: "updates" mtype: "" } - member { - name: "use_bias" - mtype: "" - } member { name: "variables" mtype: "" @@ -159,7 +91,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\', \'False\', \'False\', \'False\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\'], " } member_method { name: "add_loss" @@ -205,6 +137,10 @@ tf_class { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_constants" + argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "get_initial_state" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -223,7 +159,7 @@ tf_class { } member_method { name: "get_losses_for" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } member_method { name: "get_output_at" @@ -245,6 +181,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "preprocess_input" + argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -253,4 +193,8 @@ tf_class { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "step" + argspec: "args=[\'self\', \'inputs\', \'states\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt deleted file mode 100644 index 4ce7c34f6c..0000000000 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt +++ /dev/null @@ -1,179 +0,0 @@ -path: "tensorflow.keras.layers.LSTMCell" -tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - member { - name: "activity_regularizer" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "graph" - mtype: "" - } - member { - name: "inbound_nodes" - mtype: "" - } - member { - name: "input" - mtype: "" - } - member { - name: "input_mask" - mtype: "" - } - member { - name: "input_shape" - mtype: "" - } - member { - name: "losses" - mtype: "" - } - member { - name: "name" - mtype: "" - } - member { - name: "non_trainable_variables" - mtype: "" - } - member { - name: "non_trainable_weights" - mtype: "" - } - member { - name: "outbound_nodes" - mtype: "" - } - member { - name: "output" - mtype: "" - } - member { - name: "output_mask" - mtype: "" - } - member { - name: "output_shape" - mtype: "" - } - member { - name: "scope_name" - mtype: "" - } - member { - name: "trainable_variables" - mtype: "" - } - member { - name: "trainable_weights" - mtype: "" - } - member { - name: "updates" - mtype: "" - } - member { - name: "variables" - mtype: "" - } - member { - name: "weights" - mtype: "" - } - member_method { - name: "__init__" - argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'unit_forget_bias\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\'], " - } - member_method { - name: "add_loss" - argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "add_update" - argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " - } - member_method { - name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " - } - member_method { - name: "apply" - argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" - } - member_method { - name: "build" - argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "call" - argspec: "args=[\'self\', \'inputs\', \'states\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "compute_mask" - argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "count_params" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "from_config" - argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_config" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_input_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_input_mask_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_input_shape_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_losses_for" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_output_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_output_mask_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_output_shape_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_updates_for" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_weights" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "set_weights" - argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" - } -} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt index e1a1d0d58e..20935e2f99 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt @@ -1,34 +1,14 @@ path: "tensorflow.keras.layers.LSTM" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - member { - name: "activation" - mtype: "" - } member { name: "activity_regularizer" mtype: "" } - member { - name: "bias_constraint" - mtype: "" - } - member { - name: "bias_initializer" - mtype: "" - } - member { - name: "bias_regularizer" - mtype: "" - } - member { - name: "dropout" - mtype: "" - } member { name: "dtype" mtype: "" @@ -37,10 +17,6 @@ tf_class { name: "graph" mtype: "" } - member { - name: "implementation" - mtype: "" - } member { name: "inbound_nodes" mtype: "" @@ -57,18 +33,6 @@ tf_class { name: "input_shape" mtype: "" } - member { - name: "kernel_constraint" - mtype: "" - } - member { - name: "kernel_initializer" - mtype: "" - } - member { - name: "kernel_regularizer" - mtype: "" - } member { name: "losses" mtype: "" @@ -101,34 +65,10 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "recurrent_activation" - mtype: "" - } - member { - name: "recurrent_constraint" - mtype: "" - } - member { - name: "recurrent_dropout" - mtype: "" - } - member { - name: "recurrent_initializer" - mtype: "" - } - member { - name: "recurrent_regularizer" - mtype: "" - } member { name: "scope_name" mtype: "" } - member { - name: "states" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -137,22 +77,10 @@ tf_class { name: "trainable_weights" mtype: "" } - member { - name: "unit_forget_bias" - mtype: "" - } - member { - name: "units" - mtype: "" - } member { name: "updates" mtype: "" } - member { - name: "use_bias" - mtype: "" - } member { name: "variables" mtype: "" @@ -163,7 +91,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'unit_forget_bias\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\', \'False\', \'False\', \'False\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'unit_forget_bias\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\'], " } member_method { name: "add_loss" @@ -209,6 +137,10 @@ tf_class { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_constants" + argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "get_initial_state" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -227,7 +159,7 @@ tf_class { } member_method { name: "get_losses_for" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } member_method { name: "get_output_at" @@ -249,6 +181,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "preprocess_input" + argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -257,4 +193,8 @@ tf_class { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "step" + argspec: "args=[\'self\', \'inputs\', \'states\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt deleted file mode 100644 index c7c9b10f22..0000000000 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt +++ /dev/null @@ -1,191 +0,0 @@ -path: "tensorflow.keras.layers.RNN" -tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - member { - name: "activity_regularizer" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "graph" - mtype: "" - } - member { - name: "inbound_nodes" - mtype: "" - } - member { - name: "input" - mtype: "" - } - member { - name: "input_mask" - mtype: "" - } - member { - name: "input_shape" - mtype: "" - } - member { - name: "losses" - mtype: "" - } - member { - name: "name" - mtype: "" - } - member { - name: "non_trainable_variables" - mtype: "" - } - member { - name: "non_trainable_weights" - mtype: "" - } - member { - name: "outbound_nodes" - mtype: "" - } - member { - name: "output" - mtype: "" - } - member { - name: "output_mask" - mtype: "" - } - member { - name: "output_shape" - mtype: "" - } - member { - name: "scope_name" - mtype: "" - } - member { - name: "states" - mtype: "" - } - member { - name: "trainable_variables" - mtype: "" - } - member { - name: "trainable_weights" - mtype: "" - } - member { - name: "updates" - mtype: "" - } - member { - name: "variables" - mtype: "" - } - member { - name: "weights" - mtype: "" - } - member_method { - name: "__init__" - argspec: "args=[\'self\', \'cell\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\', \'activity_regularizer\'], varargs=None, keywords=kwargs, defaults=[\'False\', \'False\', \'False\', \'False\', \'False\', \'None\'], " - } - member_method { - name: "add_loss" - argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "add_update" - argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " - } - member_method { - name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " - } - member_method { - name: "apply" - argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" - } - member_method { - name: "build" - argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "call" - argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\', \'constants\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " - } - member_method { - name: "compute_mask" - argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "count_params" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "from_config" - argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "get_config" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_initial_state" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_input_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_input_mask_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_input_shape_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_losses_for" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "get_output_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_output_mask_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_output_shape_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_updates_for" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_weights" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "reset_states" - argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "set_weights" - argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" - } -} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt deleted file mode 100644 index 10c7f8867c..0000000000 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt +++ /dev/null @@ -1,179 +0,0 @@ -path: "tensorflow.keras.layers.SimpleRNNCell" -tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - member { - name: "activity_regularizer" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "graph" - mtype: "" - } - member { - name: "inbound_nodes" - mtype: "" - } - member { - name: "input" - mtype: "" - } - member { - name: "input_mask" - mtype: "" - } - member { - name: "input_shape" - mtype: "" - } - member { - name: "losses" - mtype: "" - } - member { - name: "name" - mtype: "" - } - member { - name: "non_trainable_variables" - mtype: "" - } - member { - name: "non_trainable_weights" - mtype: "" - } - member { - name: "outbound_nodes" - mtype: "" - } - member { - name: "output" - mtype: "" - } - member { - name: "output_mask" - mtype: "" - } - member { - name: "output_shape" - mtype: "" - } - member { - name: "scope_name" - mtype: "" - } - member { - name: "trainable_variables" - mtype: "" - } - member { - name: "trainable_weights" - mtype: "" - } - member { - name: "updates" - mtype: "" - } - member { - name: "variables" - mtype: "" - } - member { - name: "weights" - mtype: "" - } - member_method { - name: "__init__" - argspec: "args=[\'self\', \'units\', \'activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\'], " - } - member_method { - name: "add_loss" - argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "add_update" - argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " - } - member_method { - name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " - } - member_method { - name: "apply" - argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" - } - member_method { - name: "build" - argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "call" - argspec: "args=[\'self\', \'inputs\', \'states\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "compute_mask" - argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "count_params" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "from_config" - argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_config" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_input_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_input_mask_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_input_shape_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_losses_for" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_output_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_output_mask_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_output_shape_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_updates_for" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_weights" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "set_weights" - argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" - } -} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt index 588df21088..f4148fcc23 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt @@ -1,34 +1,14 @@ path: "tensorflow.keras.layers.SimpleRNN" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" - member { - name: "activation" - mtype: "" - } member { name: "activity_regularizer" mtype: "" } - member { - name: "bias_constraint" - mtype: "" - } - member { - name: "bias_initializer" - mtype: "" - } - member { - name: "bias_regularizer" - mtype: "" - } - member { - name: "dropout" - mtype: "" - } member { name: "dtype" mtype: "" @@ -53,18 +33,6 @@ tf_class { name: "input_shape" mtype: "" } - member { - name: "kernel_constraint" - mtype: "" - } - member { - name: "kernel_initializer" - mtype: "" - } - member { - name: "kernel_regularizer" - mtype: "" - } member { name: "losses" mtype: "" @@ -97,30 +65,10 @@ tf_class { name: "output_shape" mtype: "" } - member { - name: "recurrent_constraint" - mtype: "" - } - member { - name: "recurrent_dropout" - mtype: "" - } - member { - name: "recurrent_initializer" - mtype: "" - } - member { - name: "recurrent_regularizer" - mtype: "" - } member { name: "scope_name" mtype: "" } - member { - name: "states" - mtype: "" - } member { name: "trainable_variables" mtype: "" @@ -129,18 +77,10 @@ tf_class { name: "trainable_weights" mtype: "" } - member { - name: "units" - mtype: "" - } member { name: "updates" mtype: "" } - member { - name: "use_bias" - mtype: "" - } member { name: "variables" mtype: "" @@ -151,7 +91,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'units\', \'activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'False\', \'False\', \'False\', \'False\', \'False\'], " + argspec: "args=[\'self\', \'units\', \'activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\'], " } member_method { name: "add_loss" @@ -197,6 +137,10 @@ tf_class { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_constants" + argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "get_initial_state" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -215,7 +159,7 @@ tf_class { } member_method { name: "get_losses_for" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" } member_method { name: "get_output_at" @@ -237,6 +181,10 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "preprocess_input" + argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -245,4 +193,8 @@ tf_class { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "step" + argspec: "args=[\'self\', \'inputs\', \'states\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt deleted file mode 100644 index 5779e41342..0000000000 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt +++ /dev/null @@ -1,183 +0,0 @@ -path: "tensorflow.keras.layers.StackedRNNCells" -tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - is_instance: "" - member { - name: "activity_regularizer" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "graph" - mtype: "" - } - member { - name: "inbound_nodes" - mtype: "" - } - member { - name: "input" - mtype: "" - } - member { - name: "input_mask" - mtype: "" - } - member { - name: "input_shape" - mtype: "" - } - member { - name: "losses" - mtype: "" - } - member { - name: "name" - mtype: "" - } - member { - name: "non_trainable_variables" - mtype: "" - } - member { - name: "non_trainable_weights" - mtype: "" - } - member { - name: "outbound_nodes" - mtype: "" - } - member { - name: "output" - mtype: "" - } - member { - name: "output_mask" - mtype: "" - } - member { - name: "output_shape" - mtype: "" - } - member { - name: "scope_name" - mtype: "" - } - member { - name: "state_size" - mtype: "" - } - member { - name: "trainable_variables" - mtype: "" - } - member { - name: "trainable_weights" - mtype: "" - } - member { - name: "updates" - mtype: "" - } - member { - name: "variables" - mtype: "" - } - member { - name: "weights" - mtype: "" - } - member_method { - name: "__init__" - argspec: "args=[\'self\', \'cells\'], varargs=None, keywords=kwargs, defaults=None" - } - member_method { - name: "add_loss" - argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "add_update" - argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " - } - member_method { - name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " - } - member_method { - name: "apply" - argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" - } - member_method { - name: "build" - argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "call" - argspec: "args=[\'self\', \'inputs\', \'states\'], varargs=None, keywords=kwargs, defaults=None" - } - member_method { - name: "compute_mask" - argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "count_params" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "from_config" - argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "get_config" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_input_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_input_mask_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_input_shape_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_losses_for" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "get_output_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_output_mask_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_output_shape_at" - argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_updates_for" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_weights" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "set_weights" - argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" - } -} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.pbtxt index fe336c4be5..8466c3e039 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.pbtxt @@ -140,10 +140,6 @@ tf_module { name: "GRU" mtype: "" } - member { - name: "GRUCell" - mtype: "" - } member { name: "GaussianDropout" mtype: "" @@ -212,10 +208,6 @@ tf_module { name: "LSTM" mtype: "" } - member { - name: "LSTMCell" - mtype: "" - } member { name: "Lambda" mtype: "" @@ -280,10 +272,6 @@ tf_module { name: "Permute" mtype: "" } - member { - name: "RNN" - mtype: "" - } member { name: "RepeatVector" mtype: "" @@ -304,10 +292,6 @@ tf_module { name: "SimpleRNN" mtype: "" } - member { - name: "SimpleRNNCell" - mtype: "" - } member { name: "SpatialDropout1D" mtype: "" @@ -320,10 +304,6 @@ tf_module { name: "SpatialDropout3D" mtype: "" } - member { - name: "StackedRNNCells" - mtype: "" - } member { name: "ThresholdedReLU" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt index 9fd38a29b7..62e634afb8 100644 --- a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt @@ -94,7 +94,7 @@ tf_module { } member_method { name: "norm" - argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keep_dims\', \'name\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'False\', \'None\'], " + argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keepdims\', \'name\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "qr" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt index 24c0448dea..1e9d28ca74 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt @@ -170,7 +170,7 @@ tf_module { } member_method { name: "l2_normalize" - argspec: "args=[\'x\', \'dim\', \'epsilon\', \'name\'], varargs=None, keywords=None, defaults=[\'1e-12\', \'None\'], " + argspec: "args=[\'x\', \'axis\', \'epsilon\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'1e-12\', \'None\', \'None\'], " } member_method { name: "leaky_relu" @@ -190,7 +190,7 @@ tf_module { } member_method { name: "log_softmax" - argspec: "args=[\'logits\', \'dim\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], " + argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "log_uniform_candidate_sampler" @@ -282,16 +282,12 @@ tf_module { } member_method { name: "softmax" - argspec: "args=[\'logits\', \'dim\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], " + argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "softmax_cross_entropy_with_logits" argspec: "args=[\'_sentinel\', \'labels\', \'logits\', \'dim\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'-1\', \'None\'], " } - member_method { - name: "softmax_cross_entropy_with_logits_v2" - argspec: "args=[\'_sentinel\', \'labels\', \'logits\', \'dim\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'-1\', \'None\'], " - } member_method { name: "softplus" argspec: "args=[\'features\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index bf7bc6a7c1..0edd4153d7 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -750,7 +750,7 @@ tf_module { } member_method { name: "boolean_mask" - argspec: "args=[\'tensor\', \'mask\', \'name\'], varargs=None, keywords=None, defaults=[\'boolean_mask\'], " + argspec: "args=[\'tensor\', \'mask\', \'name\', \'axis\'], varargs=None, keywords=None, defaults=[\'boolean_mask\', \'None\'], " } member_method { name: "broadcast_dynamic_shape" @@ -858,7 +858,7 @@ tf_module { } member_method { name: "count_nonzero" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'dtype\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \"\", \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'dtype\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"\", \'None\', \'None\', \'None\'], " } member_method { name: "count_up_to" @@ -1414,7 +1414,7 @@ tf_module { } member_method { name: "norm" - argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keep_dims\', \'name\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'False\', \'None\'], " + argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keepdims\', \'name\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "not_equal" @@ -1546,11 +1546,11 @@ tf_module { } member_method { name: "reduce_all" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_any" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_join" @@ -1558,27 +1558,27 @@ tf_module { } member_method { name: "reduce_logsumexp" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_max" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_mean" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_min" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_prod" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_sum" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "register_tensor_conversion_function" diff --git a/tensorflow/tools/ci_build/ci_sanity.sh b/tensorflow/tools/ci_build/ci_sanity.sh index 8d4e4c23dc..f1c207f9b6 100755 --- a/tensorflow/tools/ci_build/ci_sanity.sh +++ b/tensorflow/tools/ci_build/ci_sanity.sh @@ -98,8 +98,7 @@ do_pylint() { "^tensorflow/contrib/eager/python/evaluator\.py.*\[E0202.*method-hidden "\ "^tensorflow/contrib/eager/python/metrics_impl\.py.*\[E0202.*method-hidden "\ "^tensorflow/python/platform/gfile\.py.*\[E0301.*non-iterator "\ -"^tensorflow/python/keras/_impl/keras/callbacks\.py.*\[E1133.*not-an-iterable "\ -"^tensorflow/python/keras/_impl/keras/layers/recurrent\.py.*\[E0203.*access-member-before-definition" +"^tensorflow/python/keras/_impl/keras/callbacks\.py.*\[E1133.*not-an-iterable" echo "ERROR_WHITELIST=\"${ERROR_WHITELIST}\"" diff --git a/tensorflow/tools/ci_build/install/install_golang.sh b/tensorflow/tools/ci_build/install/install_golang.sh index 55c1674495..e1edd62cc5 100755 --- a/tensorflow/tools/ci_build/install/install_golang.sh +++ b/tensorflow/tools/ci_build/install/install_golang.sh @@ -16,7 +16,7 @@ set -ex -GOLANG_URL="https://storage.googleapis.com/golang/go1.9.1.linux-amd64.tar.gz" +GOLANG_URL="https://storage.googleapis.com/golang/go1.9.2.linux-amd64.tar.gz" sudo mkdir -p /usr/local wget -q -O - "${GOLANG_URL}" | sudo tar -C /usr/local -xz diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh index dcda8228bc..e5d8303c6e 100755 --- a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh +++ b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh @@ -48,6 +48,6 @@ ${DOCKER_BINARY} run \ -e "TF_NEED_GCP=0" \ -e "TF_NEED_HDFS=0" \ -e "TF_NEED_CUDA=${TF_NEED_CUDA}" \ - -e "TF_NEED_OPENCL=0" \ + -e "TF_NEED_OPENCL_SYCL=0" \ "${DOCKER_IMAGE}" \ "/workspace/tensorflow/tools/ci_build/linux/libtensorflow.sh" diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh index d90a1b905d..e1b56b9a25 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh @@ -27,7 +27,7 @@ export PYTHON_BIN_PATH="/usr/bin/python" export TF_NEED_GCP=0 export TF_NEED_HDFS=0 export TF_NEED_CUDA=0 -export TF_NEED_OPENCL=0 +export TF_NEED_OPENCL_SYCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh index 79973647c1..5a901af3e5 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh @@ -28,7 +28,7 @@ export LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib:${L export PYTHON_BIN_PATH="/usr/bin/python" export TF_NEED_GCP=0 export TF_NEED_HDFS=0 -export TF_NEED_OPENCL=0 +export TF_NEED_OPENCL_SYCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh index 5244898c40..88116d9f24 100755 --- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh +++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh @@ -75,17 +75,23 @@ if [[ $1 == "PI_ONE" ]]; then PI_COPTS="--copt=-march=armv6 --copt=-mfpu=vfp --copt=-DUSE_GEMM_FOR_CONV --copt=-DUSE_OPENBLAS --copt=-isystem --copt=${OPENBLAS_INSTALL_PATH}/include/ + --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR --linkopt=-L${OPENBLAS_INSTALL_PATH}/lib/ --linkopt=-l:libopenblas.a" echo "Building for the Pi One/Zero, with no NEON support" else PI_COPTS='--copt=-march=armv7-a --copt=-mfpu=neon-vfpv4 + --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8' echo "Building for the Pi Two/Three, with NEON acceleration" fi +# We need to pass down the environment variable with a possible alternate Python +# include path for Python 3.x builds to work. +export CROSSTOOL_PYTHON_INCLUDE_PATH + cd ${WORKSPACE_PATH} bazel build -c opt ${PI_COPTS} \ --config=monolithic \ diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh index 6a8b6417d6..6c964c7227 100644 --- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh +++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh @@ -117,7 +117,7 @@ function run_configure_for_cpu_build { export TF_NEED_VERBS=0 export TF_NEED_GCP=0 export TF_NEED_HDFS=0 - export TF_NEED_OPENCL=0 + export TF_NEED_OPENCL_SYCL=0 echo "" | ./configure } @@ -141,7 +141,7 @@ function run_configure_for_gpu_build { export TF_NEED_MKL=0 export TF_NEED_GCP=0 export TF_NEED_HDFS=0 - export TF_NEED_OPENCL=0 + export TF_NEED_OPENCL_SYCL=0 # TODO(pcloudy): Remove this after TensorFlow uses its own CRSOOTOOL # for GPU build on Windows diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index 1a0145b078..20e1dcd085 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -83,11 +83,6 @@ ENV CI_BUILD_PYTHON python RUN tensorflow/tools/ci_build/builds/configured CPU \ bazel build -c opt --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \ - # For optimized builds appropriate for the hardware platform of your choosing, uncomment below... - # For ivy-bridge or sandy-bridge - # --copt=-march="ivybridge" \ - # for haswell, broadwell, or skylake - # --copt=-march="haswell" \ tensorflow/tools/pip_package:build_pip_package && \ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/pip && \ pip --no-cache-dir install --upgrade /tmp/pip/tensorflow-*.whl && \ diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 index 64ebc4607a..9bcc3925a8 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 @@ -101,12 +101,11 @@ RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/lib --jobs=${TF_AVAILABLE_CPUS} \ tensorflow/tools/pip_package:build_pip_package && \ mkdir /pip_pkg && \ - bazel-bin/tensorflow/tools/pip_package/build_pip_package /pip_pkg - -# Clean up pip wheel and Bazel cache when done. -RUN pip --no-cache-dir install --upgrade /pip_pkg/tensorflow-*.whl && \ + bazel-bin/tensorflow/tools/pip_package/build_pip_package /pip_pkg && \ + pip --no-cache-dir install --upgrade /pip_pkg/tensorflow-*.whl && \ rm -rf /pip_pkg && \ rm -rf /root/.cache +# Clean up pip wheel and Bazel cache when done. WORKDIR /root diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu index 0571dd7391..e212d10290 100644 --- a/tensorflow/tools/docker/Dockerfile.gpu +++ b/tensorflow/tools/docker/Dockerfile.gpu @@ -1,4 +1,4 @@ -FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04 +FROM nvidia/cuda:8.0-cudnn6-runtime-ubuntu16.04 LABEL maintainer="Craig Citro " diff --git a/tensorflow/tools/docker/README.md b/tensorflow/tools/docker/README.md index 2e5a0038ed..e35c58ff80 100644 --- a/tensorflow/tools/docker/README.md +++ b/tensorflow/tools/docker/README.md @@ -60,6 +60,20 @@ Building TensorFlow Docker containers should be done through the script. The raw Dockerfiles should not be used directly as they contain strings to be replaced by the script during the build. +Attempting to run [parameterized_docker_build.sh](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/docker/parameterized_docker_build.sh) +from a binary docker image such as for example `tensorflow/tensorflow:latest` will +not work. One needs to execute the script from a developer docker image since by +contrast with a binary docker image it contains not only the compiled solution but +also the tensorflow source code. Please select the appropriate developer docker +image of tensorflow at `tensorflow/tensorflow:[.](https://hub.docker.com/r/tensorflow/tensorflow/tags/)`. + +The smallest command line to generate a docker image will then be: +```docker run -it tensorflow/tensorflow:"right_tag"``` + +If you would like to start a jupyter notebook on your docker container, make sure +to map the port 8888 of your docker container by adding -p 8888:8888 to the above +command. + To use the script, specify the container type (`CPU` vs. `GPU`), the desired Python version (`PYTHON2` vs. `PYTHON3`) and whether the developer Docker image is to be built (`NO` vs. `YES`). In addition, you need to specify the central diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD index 1bf7113c9e..9216008600 100644 --- a/tensorflow/tools/graph_transforms/BUILD +++ b/tensorflow/tools/graph_transforms/BUILD @@ -131,6 +131,8 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core:tensorflow", + "//tensorflow/contrib/rnn:gru_ops_op_lib", + "//tensorflow/contrib/rnn:lstm_ops_op_lib", ] + if_not_windows([ "//tensorflow/core/kernels:quantized_ops", "//tensorflow/core/kernels:remote_fused_graph_rewriter_transform", diff --git a/tensorflow/tools/graph_transforms/quantize_nodes.cc b/tensorflow/tools/graph_transforms/quantize_nodes.cc index 2b85e7e83c..97e8f77616 100644 --- a/tensorflow/tools/graph_transforms/quantize_nodes.cc +++ b/tensorflow/tools/graph_transforms/quantize_nodes.cc @@ -759,6 +759,7 @@ Status QuantizeNodes(const GraphDef& input_graph_def, NodeDef reshape_dims; reshape_dims.set_op("Const"); reshape_dims.set_name(unique_input_name + "/reshape_dims"); + AddNodeInput("^" + input_name, &reshape_dims); SetNodeAttr("dtype", DT_INT32, &reshape_dims); Tensor reshape_dims_tensor(DT_INT32, {1}); reshape_dims_tensor.flat()(0) = -1; @@ -768,6 +769,7 @@ Status QuantizeNodes(const GraphDef& input_graph_def, NodeDef reduction_dims; reduction_dims.set_op("Const"); reduction_dims.set_name(unique_input_name + "/reduction_dims"); + AddNodeInput("^" + input_name, &reduction_dims); SetNodeAttr("dtype", DT_INT32, &reduction_dims); Tensor reduction_dims_tensor(DT_INT32, {1}); reduction_dims_tensor.flat()(0) = 0; diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 456c2e2908..0c54300e06 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.4.0-rc1' +_VERSION = '1.4.0' REQUIRED_PACKAGES = [ 'absl-py', diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index dfe332b091..afcae6eade 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -1,24 +1,21 @@ # TensorFlow external dependencies that can be loaded in WORKSPACE files. load("//third_party/gpus:cuda_configure.bzl", "cuda_configure") - load("//third_party/sycl:sycl_configure.bzl", "sycl_configure") load("//third_party/mkl:build_defs.bzl", "mkl_repository") -load( - "@io_bazel_rules_closure//closure/private:java_import_external.bzl", - "java_import_external", -) +load("@io_bazel_rules_closure//closure/private:java_import_external.bzl", + "java_import_external") load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external") load("//third_party/py:python_configure.bzl", "python_configure") -load( - "//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl", - "arm_compiler_configure", -) +load("//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl", + "arm_compiler_configure") + def _is_windows(repository_ctx): """Returns true if the host operating system is windows.""" return repository_ctx.os.name.lower().find("windows") != -1 + def _get_env_var(repository_ctx, name): """Find an environment variable.""" if name in repository_ctx.os.environ: @@ -26,6 +23,7 @@ def _get_env_var(repository_ctx, name): else: return None + # Parse the bazel version string from `native.bazel_version`. def _parse_bazel_version(bazel_version): # Remove commit from version. @@ -41,6 +39,7 @@ def _parse_bazel_version(bazel_version): version_tuple += (str(number),) return version_tuple + # Check that a specific bazel version is being used. def check_version(bazel_version): if "bazel_version" not in dir(native): @@ -57,9 +56,11 @@ def check_version(bazel_version): fail("\nCurrent Bazel version is {}, expected at least {}\n".format( native.bazel_version, bazel_version)) + def _repos_are_siblings(): return Label("@foo//bar").workspace_root.startswith("../") + # Temporary workaround to support including TensorFlow as a submodule until this # use-case is supported in the next Bazel release. def _temp_workaround_http_archive_impl(repo_ctx): @@ -72,7 +73,9 @@ def _temp_workaround_http_archive_impl(repo_ctx): if repo_ctx.attr.patch_file != None: _apply_patch(repo_ctx, repo_ctx.attr.patch_file) + temp_workaround_http_archive = repository_rule( + implementation = _temp_workaround_http_archive_impl, attrs = { "build_file": attr.label(), "repository": attr.string(), @@ -81,7 +84,6 @@ temp_workaround_http_archive = repository_rule( "sha256": attr.string(default = ""), "strip_prefix": attr.string(default = ""), }, - implementation = _temp_workaround_http_archive_impl, ) # Executes specified command with arguments and calls 'fail' if it exited with @@ -93,6 +95,7 @@ def _execute_and_check_ret_code(repo_ctx, cmd_and_args): + "Stderr: {3}").format(" ".join(cmd_and_args), result.return_code, result.stdout, result.stderr)) + # Apply a patch_file to the repository root directory # Runs 'patch -p1' def _apply_patch(repo_ctx, patch_file): @@ -110,6 +113,7 @@ def _apply_patch(repo_ctx, patch_file): cmd = [bazel_sh, "-c", " ".join(cmd)] _execute_and_check_ret_code(repo_ctx, cmd) + # Download the repository and apply a patch to its root def _patched_http_archive_impl(repo_ctx): repo_ctx.download_and_extract( @@ -118,7 +122,9 @@ def _patched_http_archive_impl(repo_ctx): stripPrefix=repo_ctx.attr.strip_prefix) _apply_patch(repo_ctx, repo_ctx.attr.patch_file) + patched_http_archive = repository_rule( + implementation = _patched_http_archive_impl, attrs = { "patch_file": attr.label(), "build_file": attr.label(), @@ -127,9 +133,9 @@ patched_http_archive = repository_rule( "sha256": attr.string(default = ""), "strip_prefix": attr.string(default = ""), }, - implementation = _patched_http_archive_impl, ) + # If TensorFlow is linked as a submodule. # path_prefix is no longer used. # tf_repo_name is thought to be under consideration. @@ -442,11 +448,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): native.http_archive( name = "nsync", urls = [ - "https://mirror.bazel.build/github.com/google/nsync/archive/93815892dddafe9146a5f7e7042281d59d0f4323.tar.gz", - # "https://github.com/google/nsync/archive/93815892dddafe9146a5f7e7042281d59d0f4323.tar.gz", + "https://mirror.bazel.build/github.com/google/nsync/archive/4fc8ff3e7626c5f24bc9674438d8257f0ffc226c.tar.gz", + # "https://github.com/google/nsync/archive/4fc8ff3e7626c5f24bc9674438d8257f0ffc226c.tar.gz", ], - sha256 = "e3bd4555415ace511338fc27e595351738eea4e9006f1612b76c82914770716b", - strip_prefix = "nsync-93815892dddafe9146a5f7e7042281d59d0f4323", + sha256 = "ffbbe828f3d0bef75462e34801de5cea31d10aa63eaa42a4ed74c46521bdfd58", + strip_prefix = "nsync-4fc8ff3e7626c5f24bc9674438d8257f0ffc226c", ) native.http_archive( @@ -815,12 +821,3 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://github.com/google/flatbuffers/archive/971a68110e4fc1bace10fcb6deeb189e7e1a34ce.tar.gz", ], ) - - native.new_http_archive( - name = "tflite_mobilenet", - build_file = str(Label("//third_party:tflite_mobilenet.BUILD")), - sha256 = "eb71679d23a0cbdb173b36ea39f3d3096de0a9b0410d148a8237f20cc1157a61", - urls = [ - "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_1.0_224_quantized_2017_11_01.zip" - ], - ) diff --git a/third_party/aws.BUILD b/third_party/aws.BUILD index bc6a2fd8cc..bc9e37ffb3 100644 --- a/third_party/aws.BUILD +++ b/third_party/aws.BUILD @@ -21,6 +21,9 @@ cc_library( "@%ws%//tensorflow:linux_ppc64le": glob([ "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp", ]), + "@%ws%//tensorflow:raspberry_pi_armeabi": glob([ + "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp", + ]), "//conditions:default": [], }) + glob([ "aws-cpp-sdk-core/include/**/*.h", diff --git a/third_party/boringssl/add_boringssl_s390x.patch b/third_party/boringssl/add_boringssl_s390x.patch deleted file mode 100644 index 8b42d10e68..0000000000 --- a/third_party/boringssl/add_boringssl_s390x.patch +++ /dev/null @@ -1,133 +0,0 @@ -diff --git a/src/include/openssl/base.h b/src/include/openssl/base.h -index 7a3adfb..88012ad 100644 ---- a/src/include/openssl/base.h -+++ b/src/include/openssl/base.h -@@ -94,6 +94,8 @@ extern "C" { - #define OPENSSL_PNACL - #elif defined(__myriad2__) - #define OPENSSL_32_BIT -+#elif defined(__s390x__) -+#define OPENSSL_64_BIT - #else - #error "Unknown target CPU" - #endif -diff --git a/BUILD b/BUILD -index 6b645e61..c90b7beb 100644 ---- a/BUILD -+++ b/BUILD -@@ -40,29 +40,46 @@ config_setting( - values = {"cpu": "darwin"}, - ) - --boringssl_copts = [ -- # Assembler option --noexecstack adds .note.GNU-stack to each object to -- # ensure that binaries can be built with non-executable stack. -- "-Wa,--noexecstack", -- -- # This is needed on Linux systems (at least) to get rwlock in pthread. -- "-D_XOPEN_SOURCE=700", -- -- # This list of warnings should match those in the top-level CMakeLists.txt. -- "-Wall", -- "-Werror", -- "-Wformat=2", -- "-Wsign-compare", -- "-Wmissing-field-initializers", -- "-Wwrite-strings", -- "-Wshadow", -- "-fno-common", -- -- # Modern build environments should be able to set this to use atomic -- # operations for reference counting rather than locks. However, it's -- # known not to work on some Android builds. -- # "-DOPENSSL_C11_ATOMIC", --] + select({ -+config_setting( -+ name = "windows", -+ values = {"cpu": "x64_windows"}, -+ visibility = ["//visibility:public"], -+) -+ -+config_setting( -+ name = "windows_msvc", -+ values = {"cpu": "x64_windows_msvc"}, -+ visibility = ["//visibility:public"], -+) -+ -+boringssl_copts = select({ -+ ":windows": [ -+ "-DWIN32_LEAN_AND_MEAN", -+ ], -+ "//conditions:default": [ -+ # Assembler option --noexecstack adds .note.GNU-stack to each object to -+ # ensure that binaries can be built with non-executable stack. -+ "-Wa,--noexecstack", -+ -+ # This is needed on Linux systems (at least) to get rwlock in pthread. -+ "-D_XOPEN_SOURCE=700", -+ -+ # This list of warnings should match those in the top-level CMakeLists.txt. -+ "-Wall", -+ "-Werror", -+ "-Wformat=2", -+ "-Wsign-compare", -+ "-Wmissing-field-initializers", -+ "-Wwrite-strings", -+ "-Wshadow", -+ "-fno-common", -+ -+ # Modern build environments should be able to set this to use atomic -+ # operations for reference counting rather than locks. However, it's -+ # known not to work on some Android builds. -+ # "-DOPENSSL_C11_ATOMIC", -+ ], -+}) + select({ - ":linux_x86_64": [], - ":mac_x86_64": [], - "//conditions:default": ["-DOPENSSL_NO_ASM"], -@@ -75,18 +92,26 @@ crypto_sources_asm = select({ - }) - - # For C targets only (not C++), compile with C11 support. --boringssl_copts_c11 = boringssl_copts + [ -- "-std=c11", -- "-Wmissing-prototypes", -- "-Wold-style-definition", -- "-Wstrict-prototypes", --] -+boringssl_copts_c11 = boringssl_copts + select({ -+ ":windows": [], -+ ":windows_msvc": [], -+ "//conditions:default": [ -+ "-std=c11", -+ "-Wmissing-prototypes", -+ "-Wold-style-definition", -+ "-Wstrict-prototypes", -+ ], -+}) - - # For C targets only (not C++), compile with C11 support. --boringssl_copts_cxx = boringssl_copts + [ -- "-std=c++11", -- "-Wmissing-declarations", --] -+boringssl_copts_cxx = boringssl_copts + select({ -+ ":windows": [], -+ ":windows_msvc": [], -+ "//conditions:default": [ -+ "-std=c++11", -+ "-Wmissing-declarations", -+ ], -+}) - - cc_library( - name = "crypto", -@@ -96,6 +121,8 @@ cc_library( - includes = ["src/include"], - linkopts = select({ - ":mac_x86_64": [], -+ ":windows": [], -+ ":windows_msvc": [], - "//conditions:default": ["-lpthread"], - }), - visibility = ["//visibility:public"], diff --git a/third_party/curl.BUILD b/third_party/curl.BUILD index 882967df1c..805a30d262 100644 --- a/third_party/curl.BUILD +++ b/third_party/curl.BUILD @@ -477,7 +477,6 @@ genrule( "# define HAVE_RAND_EGD 1", "# define HAVE_RAND_STATUS 1", "# define HAVE_SSL_GET_SHUTDOWN 1", - "# define HAVE_STROPTS_H 1", "# define HAVE_TERMIOS_H 1", "# define OS \"x86_64-pc-linux-gnu\"", "# define RANDOM_FILE \"/dev/urandom\"", diff --git a/third_party/nanopb.BUILD b/third_party/nanopb.BUILD deleted file mode 100644 index d21866911b..0000000000 --- a/third_party/nanopb.BUILD +++ /dev/null @@ -1,23 +0,0 @@ -# Description: -# Nanopb, a tiny ANSI C protobuf implementation for use on embedded devices. - -licenses(["notice"]) # zlib license - -exports_files(["LICENSE.txt"]) - -cc_library( - name = "nanopb", - srcs = [ - "pb_common.c", - "pb_decode.c", - "pb_encode.c", - ], - hdrs = [ - "pb.h", - "pb_common.h", - "pb_decode.h", - "pb_encode.h", - ], - includes = ["."], - visibility = ["//visibility:public"], -) diff --git a/third_party/sycl/crosstool/CROSSTOOL.tpl b/third_party/sycl/crosstool/CROSSTOOL.tpl index 32884d71e7..f8e50efcc6 100755 --- a/third_party/sycl/crosstool/CROSSTOOL.tpl +++ b/third_party/sycl/crosstool/CROSSTOOL.tpl @@ -35,10 +35,10 @@ toolchain { tool_path { name: "compat-ld" path: "/usr/bin/ld" } tool_path { name: "cpp" path: "/usr/bin/cpp" } tool_path { name: "dwp" path: "/usr/bin/dwp" } - tool_path { name: "gcc" path: "computecpp" } + tool_path { name: "gcc" path: "%{sycl_impl}" } # Use "-std=c++11" for nvcc. For consistency, force both the host compiler # and the device compiler to use "-std=c++11". - cxx_flag: "-std=c++11" + cxx_flag: "%{c++_std}" linker_flag: "-Wl,-no-as-needed" linker_flag: "-lstdc++" linker_flag: "-B/usr/bin/" @@ -53,7 +53,7 @@ toolchain { cxx_builtin_include_directory: "/usr/local/include" cxx_builtin_include_directory: "/usr/include" - cxx_builtin_include_directory: "%{computecpp_toolkit_path}" + cxx_builtin_include_directory: "%{sycl_include_dir}" cxx_builtin_include_directory: "%{python_lib_path}" tool_path { name: "gcov" path: "/usr/bin/gcov" } @@ -214,4 +214,4 @@ toolchain { compiler_flag: "-O2" compiler_flag: "-DNDEBUG" } -} +} \ No newline at end of file diff --git a/third_party/sycl/crosstool/trisycl.tpl b/third_party/sycl/crosstool/trisycl.tpl new file mode 100644 index 0000000000..b470772fbf --- /dev/null +++ b/third_party/sycl/crosstool/trisycl.tpl @@ -0,0 +1,73 @@ +#!/usr/bin/env python + +import os +import sys +import tempfile +from subprocess import call + +CPU_CXX_COMPILER = ('%{host_cxx_compiler}') +CPU_C_COMPILER = ('%{host_c_compiler}') + +CURRENT_DIR = os.path.dirname(sys.argv[0]) +TRISYCL_INCLUDE_DIR = CURRENT_DIR + '/../sycl/include' + +def main(): + compiler_flags = [] + + remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable', '-Wignored-attributes', '-fno-exceptions') + # remove -fsamotoze-coverage from string with g++ + if 'g++' in CPU_CXX_COMPILER: + remove_flags += ('-fsanitize-coverage',) + compiler_flags += ['-fopenmp'] + else: + compiler_flags += ['-fopenmp=libomp'] + + compiler_flags += [flag for flag in sys.argv[1:] if not flag.startswith(remove_flags)] + + + output_file_index = compiler_flags.index('-o') + 1 + output_file_name = compiler_flags[output_file_index] + + if(output_file_index == 1): + # we are linking + return call([CPU_CXX_COMPILER] + compiler_flags + + ['-Wl,--no-undefined']) + + # find what we compile + compiling_cpp = 0 + if('-c' in compiler_flags): + compiled_file_index = compiler_flags.index('-c') + 1 + compiled_file_name = compiler_flags[compiled_file_index] + if(compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', + '.C', '.cxx'))): + compiling_cpp = 1; + + debug_flags = ['-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL', '-lpthread', '-lboost_log', '-g', '-rdynamic'] + + opt_flags = ['-DNDEBUG', '-DBOOST_DISABLE_ASSERTS', '-O3'] + + compiler_flags = compiler_flags + ['-DEIGEN_USE_SYCL=1', + '-DEIGEN_HAS_C99_MATH', + '-DEIGEN_MAX_ALIGN_BYTES=16', + '-DTENSORFLOW_USE_SYCL'] + opt_flags + + if(compiling_cpp == 1): + # create a blacklist of folders that will be skipped when compiling + # with triSYCL + skip_extensions = [".cu.cc"] + skip_folders = ["tensorflow/compiler", "tensorflow/docs_src", "tensorflow/tensorboard", "third_party", "external", "hexagon"] + skip_folders = [(folder + '/') for folder in skip_folders] + # if compiling external project skip triSYCL + if any(compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any(_folder in output_file_name for _folder in skip_folders): + return call([CPU_CXX_COMPILER] + compiler_flags) + + host_compiler_flags = ['-xc++', '-Wno-unused-variable', + '-I', TRISYCL_INCLUDE_DIR] + compiler_flags + x = call([CPU_CXX_COMPILER] + host_compiler_flags) + return x + else: + # compile for C + return call([CPU_C_COMPILER] + compiler_flags) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/third_party/sycl/sycl/BUILD.tpl b/third_party/sycl/sycl/BUILD.tpl index 6cad190630..b6ceaadda7 100755 --- a/third_party/sycl/sycl/BUILD.tpl +++ b/third_party/sycl/sycl/BUILD.tpl @@ -10,16 +10,27 @@ package(default_visibility = ["//visibility:public"]) exports_files(["LICENSE.text"]) config_setting( - name = "using_sycl", - values = { - "define": "using_sycl=true", + name = "using_sycl_ccpp", + define_values = { + "using_sycl": "true", + "using_trisycl": "false", }, ) +config_setting( + name = "using_sycl_trisycl", + define_values = { + "using_sycl": "true", + "using_trisycl": "false", + }, +) + + cc_library( name = "sycl_headers", hdrs = glob([ "**/*.h", + "**/*.hpp", ]), includes = [".", "include"], ) diff --git a/third_party/sycl/sycl/build_defs.bzl.tpl b/third_party/sycl/sycl/build_defs.bzl.tpl index 09bef0a661..33386f8957 100755 --- a/third_party/sycl/sycl/build_defs.bzl.tpl +++ b/third_party/sycl/sycl/build_defs.bzl.tpl @@ -5,9 +5,24 @@ def if_sycl(if_true, if_false = []): Returns a select statement which evaluates to if_true if we're building with SYCL enabled. Otherwise, the select statement evaluates to if_false. + If we are building with triSYCL instead of ComputeCPP, a list with + the first element of if_true is returned. + """ + return select({ + "@local_config_sycl//sycl:using_sycl_ccpp": if_true, + "@local_config_sycl//sycl:using_sycl_trisycl": if_true[0:1], + "//conditions:default": if_false + }) + +def if_ccpp(if_true, if_false = []): + """Shorthand for select()'ing if we are building with ComputeCPP. + Returns a select statement which evaluates to if_true if we're building + with ComputeCPP enabled. Otherwise, the select statement evaluates + to if_false. """ return select({ - "@local_config_sycl//sycl:using_sycl": if_true, + "@local_config_sycl//sycl:using_sycl_ccpp": if_true, + "@local_config_sycl//sycl:using_sycl_trisycl": if_false, "//conditions:default": if_false }) diff --git a/third_party/sycl/sycl_configure.bzl b/third_party/sycl/sycl_configure.bzl index 7af063178e..a0c9e4e43a 100644 --- a/third_party/sycl/sycl_configure.bzl +++ b/third_party/sycl/sycl_configure.bzl @@ -5,20 +5,26 @@ * HOST_CXX_COMPILER: The host C++ compiler * HOST_C_COMPILER: The host C compiler * COMPUTECPP_TOOLKIT_PATH: The path to the ComputeCpp toolkit. + * TRISYCL_INCLUDE_DIR: The path to the include directory of triSYCL. + (if using triSYCL instead of ComputeCPP) * PYTHON_LIB_PATH: The path to the python lib """ _HOST_CXX_COMPILER = "HOST_CXX_COMPILER" _HOST_C_COMPILER= "HOST_C_COMPILER" _COMPUTECPP_TOOLKIT_PATH = "COMPUTECPP_TOOLKIT_PATH" +_TRISYCL_INCLUDE_DIR = "TRISYCL_INCLUDE_DIR" _PYTHON_LIB_PATH = "PYTHON_LIB_PATH" def _enable_sycl(repository_ctx): - if "TF_NEED_OPENCL" in repository_ctx.os.environ: - enable_sycl = repository_ctx.os.environ["TF_NEED_OPENCL"].strip() + if "TF_NEED_OPENCL_SYCL" in repository_ctx.os.environ: + enable_sycl = repository_ctx.os.environ["TF_NEED_OPENCL_SYCL"].strip() return enable_sycl == "1" return False +def _enable_compute_cpp(repository_ctx): + return _COMPUTECPP_TOOLKIT_PATH in repository_ctx.os.environ + def auto_configure_fail(msg): """Output failure message when auto configuration fails.""" red = "\033[0;31m" @@ -59,6 +65,15 @@ def find_computecpp_root(repository_ctx): return sycl_name fail("Cannot find SYCL compiler, please correct your path") +def find_trisycl_include_dir(repository_ctx): + """Find triSYCL include directory. """ + sycl_name = "" + if _TRISYCL_INCLUDE_DIR in repository_ctx.os.environ: + sycl_name = repository_ctx.os.environ[_TRISYCL_INCLUDE_DIR].strip() + if sycl_name.startswith("/"): + return sycl_name + fail( "Cannot find triSYCL include directory, please correct your path") + def find_python_lib(repository_ctx): """Returns python path.""" if _PYTHON_LIB_PATH in repository_ctx.os.environ: @@ -171,26 +186,53 @@ def _sycl_autoconf_imp(repository_ctx): _tpl(repository_ctx, "sycl:platform.bzl") _tpl(repository_ctx, "crosstool:BUILD") _file(repository_ctx, "sycl:LICENSE.text") - _tpl(repository_ctx, "crosstool:computecpp", - { - "%{host_cxx_compiler}" : find_cc(repository_ctx), - "%{host_c_compiler}" : find_c(repository_ctx), - }) - - computecpp_root = find_computecpp_root(repository_ctx) - _check_dir(repository_ctx, computecpp_root) - - _tpl(repository_ctx, "crosstool:CROSSTOOL", - { - "%{computecpp_toolkit_path}" : computecpp_root, - "%{python_lib_path}" : find_python_lib(repository_ctx), - }) - - # symlink libraries - _check_lib(repository_ctx, computecpp_root+"/lib", "libComputeCpp.so" ) - _symlink_dir(repository_ctx, computecpp_root + "/lib", "sycl/lib") - _symlink_dir(repository_ctx, computecpp_root + "/include", "sycl/include") - _symlink_dir(repository_ctx, computecpp_root + "/bin", "sycl/bin") + + if _enable_compute_cpp(repository_ctx): + _tpl(repository_ctx, "crosstool:computecpp", + { + "%{host_cxx_compiler}" : find_cc(repository_ctx), + "%{host_c_compiler}" : find_c(repository_ctx) + }) + + computecpp_root = find_computecpp_root(repository_ctx); + _check_dir(repository_ctx, computecpp_root) + + _tpl(repository_ctx, "crosstool:CROSSTOOL", + { + "%{sycl_include_dir}" : computecpp_root, + "%{sycl_impl}" : "computecpp", + "%{c++_std}" : "-std=c++11", + "%{python_lib_path}" : find_python_lib(repository_ctx), + }) + + # symlink libraries + _check_lib(repository_ctx, computecpp_root+"/lib", "libComputeCpp.so" ) + _symlink_dir(repository_ctx, computecpp_root + "/lib", "sycl/lib") + _symlink_dir(repository_ctx, computecpp_root + "/include", "sycl/include") + _symlink_dir(repository_ctx, computecpp_root + "/bin", "sycl/bin") + else: + + trisycl_include_dir = find_trisycl_include_dir(repository_ctx); + _check_dir(repository_ctx, trisycl_include_dir) + + _tpl(repository_ctx, "crosstool:trisycl", + { + "%{host_cxx_compiler}" : find_cc(repository_ctx), + "%{host_c_compiler}" : find_c(repository_ctx), + "%{trisycl_include_dir}" : trisycl_include_dir + }) + + + _tpl(repository_ctx, "crosstool:CROSSTOOL", + { + "%{sycl_include_dir}" : trisycl_include_dir, + "%{sycl_impl}" : "trisycl", + "%{c++_std}" : "-std=c++1y", + "%{python_lib_path}" : find_python_lib(repository_ctx), + }) + + _symlink_dir(repository_ctx, trisycl_include_dir, "sycl/include") + sycl_configure = repository_rule( implementation = _sycl_autoconf_imp, diff --git a/third_party/tflite_mobilenet.BUILD b/third_party/tflite_mobilenet.BUILD deleted file mode 100644 index 75663eff48..0000000000 --- a/third_party/tflite_mobilenet.BUILD +++ /dev/null @@ -1,13 +0,0 @@ -package(default_visibility = ["//visibility:public"]) - -licenses(["notice"]) # Apache 2.0 - -filegroup( - name = "model_files", - srcs = glob( - ["**/*"], - exclude = [ - "BUILD", - ], - ), -) diff --git a/third_party/zlib.BUILD b/third_party/zlib.BUILD index 8509668891..d164ee719c 100644 --- a/third_party/zlib.BUILD +++ b/third_party/zlib.BUILD @@ -49,7 +49,7 @@ cc_library( ":windows_msvc": [], "//conditions:default": [ "-Wno-shift-negative-value", - "-Wno-implicit-function-declaration", + "-DZ_HAVE_UNISTD_H", ], }), includes = ["."], diff --git a/tools/bazel.rc b/tools/bazel.rc index 414ddf2e47..f609efe188 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -9,13 +9,16 @@ build:win-cuda --define=using_cuda=true --define=using_cuda_nvcc=true build:mkl --define=using_mkl=true build:sycl --crosstool_top=@local_config_sycl//crosstool:toolchain -build:sycl --define=using_sycl=true +build:sycl --define=using_sycl=true --define=using_trisycl=false build:sycl_nodouble --crosstool_top=@local_config_sycl//crosstool:toolchain build:sycl_nodouble --define=using_sycl=true --cxxopt -DTENSORFLOW_SYCL_NO_DOUBLE build:sycl_asan --crosstool_top=@local_config_sycl//crosstool:toolchain -build:sycl_asan --define=using_sycl=true --copt -fno-omit-frame-pointer --copt -fsanitize-coverage=3 --copt -DGPR_NO_DIRECT_SYSCALLS --linkopt -fPIC --linkopt -fsanitize=address +build:sycl_asan --define=using_sycl=true --define=using_trisycl=false --copt -fno-omit-frame-pointer --copt -fsanitize-coverage=3 --copt -DGPR_NO_DIRECT_SYSCALLS --linkopt -fPIC --linkopt -fsanitize=address + +build:sycl_trisycl --crosstool_top=@local_config_sycl//crosstool:toolchain +build:sycl_trisycl --define=using_sycl=true --define=using_trisycl=true build --define=use_fast_cpp_protos=true build --define=allow_oversize_protos=true -- GitLab From dbc59101a8612ce7a106531dab6f1a520919ea14 Mon Sep 17 00:00:00 2001 From: Edd Wilder-James Date: Fri, 10 Nov 2017 13:55:47 -0800 Subject: [PATCH 0149/1801] Add named contacts to Code of Conduct (#14466) * Add ewilderj as CoC contact * Add named contacts to code of conduct * Update language to match multiple stewards --- CODE_OF_CONDUCT.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index cfc45049f7..ff11d13140 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -55,14 +55,14 @@ If you are experiencing or witnessing conflict, we ask you to use the following ## Reporting Violations -Violations of the Code of Conduct can be reported to TensorFlow’s Project Steward at conduct@tensorflow.org. The Project Steward will determine whether the Code of Conduct was violated, and will issue an appropriate sanction, possibly including a written warning or expulsion from the project, project sponsored spaces, or project forums. We ask that you make a good-faith effort to resolve your conflict via the conflict resolution policy before submitting a report. +Violations of the Code of Conduct can be reported to TensorFlow’s Project Stewards, Edd Wilder-James (ewj@google.com) and Sarah Novotny (sarahnovotny@google.com). The Project Steward will determine whether the Code of Conduct was violated, and will issue an appropriate sanction, possibly including a written warning or expulsion from the project, project sponsored spaces, or project forums. We ask that you make a good-faith effort to resolve your conflict via the conflict resolution policy before submitting a report. Violations of the Code of Conduct can occur in any setting, even those unrelated to the project. We will only consider complaints about conduct that has occurred within one year of the report. ## Enforcement -If the Project Steward receives a report alleging a violation of the Code of Conduct, the Project Steward will notify the accused of the report, and provide them an opportunity to discuss the report before a sanction is issued. The Project Steward will do their utmost to keep the reporter anonymous. If the act is ongoing (such as someone engaging in harassment), or involves a threat to anyone's safety (e.g. threats of violence), the Project Steward may issue sanctions without notice. +If the Project Stewards receive a report alleging a violation of the Code of Conduct, the Project Stewards will notify the accused of the report, and provide them an opportunity to discuss the report before a sanction is issued. The Project Stewards will do their utmost to keep the reporter anonymous. If the act is ongoing (such as someone engaging in harassment), or involves a threat to anyone's safety (e.g. threats of violence), the Project Stewards may issue sanctions without notice. ## Attribution -- GitLab From 054b515feec0a3fca4cfb1f29adbf423c9027c3a Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Fri, 10 Nov 2017 14:44:59 -0800 Subject: [PATCH 0150/1801] Fix license declaration for Python headers (#14469) This appears to be a legacy package so we might want to consider deleting it at some point too. --- util/python/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/python/BUILD b/util/python/BUILD index 96daf9947a..f5fa0c6d29 100644 --- a/util/python/BUILD +++ b/util/python/BUILD @@ -1,4 +1,4 @@ -licenses(["restricted"]) +licenses(["notice"]) # New BSD, Python Software Foundation package(default_visibility = ["//visibility:public"]) -- GitLab From a443df1c8287e135a4c67ab90462651d40507b7a Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Fri, 10 Nov 2017 16:13:54 -0800 Subject: [PATCH 0151/1801] Trivial change to cause hashes to be recompued --- ISSUE_TEMPLATE.md | 1 - 1 file changed, 1 deletion(-) diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md index 1a401997c6..1f6ae1aba5 100644 --- a/ISSUE_TEMPLATE.md +++ b/ISSUE_TEMPLATE.md @@ -1,5 +1,4 @@ Please go to Stack Overflow for help and support: - https://stackoverflow.com/questions/tagged/tensorflow If you open a GitHub issue, here is our policy: -- GitLab From 31c07fffcaad0688f2ab016ed5a5d6485a2f5488 Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Tue, 7 Nov 2017 16:18:26 -0800 Subject: [PATCH 0152/1801] Automated g4 rollback of changelist 174708213 PiperOrigin-RevId: 174930262 --- tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 060b3f9129..5a3b831429 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -66,7 +66,7 @@ _CROSS_REPLICA_SUM_OP = 'CrossReplicaSum' _RESERVED_PARAMS_KEYS = [_BATCH_SIZE_KEY] # TODO(b/65703635): Flip the value and remove all dead code. -_WRAP_INPUT_FN_INTO_WHILE_LOOP = True +_WRAP_INPUT_FN_INTO_WHILE_LOOP = False def _create_global_step(graph): -- GitLab From 0bd46f52dad251846996bf440177128a16d429c2 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Tue, 7 Nov 2017 16:24:37 -0800 Subject: [PATCH 0153/1801] Added profiler traces for GPU back-end operations. PiperOrigin-RevId: 174931093 --- .../compiler/xla/service/gpu/gpu_compiler.cc | 18 +++++++++++++----- .../gpu/llvm_gpu_backend/gpu_backend_lib.cc | 4 ++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index ceb0e530c1..187b4a705c 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -75,6 +75,7 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" #include "tensorflow/core/platform/subprocess.h" +#include "tensorflow/core/platform/tracing.h" namespace se = ::perftools::gputools; @@ -87,6 +88,7 @@ namespace gpu { namespace { +using tensorflow::port::Tracing; using tensorflow::strings::StrCat; // Any address of a variable residing in global memory or returned by one of the @@ -231,6 +233,7 @@ tensorflow::Status PrepareHloModuleForIrEmitting( // code (i.e. a cubin) as a byte array. StatusOr> CompilePtx(const string& ptx, int cc_major, int cc_minor) { + Tracing::TraceMe annotation("Compile PTX", /*is_expensive=*/true); const string ptxas_path = tensorflow::io::JoinPath(tensorflow::CudaRoot(), "bin", "ptxas"); VLOG(2) << "Using ptxas at " << ptxas_path; @@ -295,11 +298,15 @@ StatusOr> GpuCompiler::Compile( std::unique_ptr module, se::StreamExecutor* stream_exec) { TF_RET_CHECK(stream_exec != nullptr); - TF_RETURN_IF_ERROR(OptimizeHloModule(module.get(), - stream_exec->GetDeviceDescription(), - ShapeSizeBytesFunction())); - TF_RETURN_IF_ERROR( - PrepareHloModuleForIrEmitting(module.get(), ShapeSizeBytesFunction())); + { + Tracing::TraceMe annotation("HLO Transforms", module->name(), + /*is_expensive=*/true); + TF_RETURN_IF_ERROR(OptimizeHloModule(module.get(), + stream_exec->GetDeviceDescription(), + ShapeSizeBytesFunction())); + TF_RETURN_IF_ERROR( + PrepareHloModuleForIrEmitting(module.get(), ShapeSizeBytesFunction())); + } llvm::LLVMContext llvm_context; std::string buffer; @@ -444,6 +451,7 @@ StatusOr> GpuCompiler::Compile( std::vector GpuCompiler::CompilePtxOrGetCachedResult(const string& ptx, int cc_major, int cc_minor) { + Tracing::TraceMe annotation("PTX->CUBIN", /*is_expensive=*/true); bool inserted; decltype(compilation_cache_.begin()) iter; // Pointers into compilation_cache_ where the ptx and (optional) cubin are diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc index 817e95a31c..1cb963be61 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc @@ -60,6 +60,7 @@ limitations under the License. #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/tracing.h" namespace xla { namespace gpu { @@ -488,6 +489,9 @@ StatusOr CompileToPtx(llvm::Module* module, string ptx; { + tensorflow::port::Tracing::TraceMe annotation( + "Compiling IR", llvm_ir::AsString(module->getName()), + /*is_expensive=*/true); ScopedLoggingTimer compilation_timer( "Compile module " + llvm_ir::AsString(module->getName()), /*vlog_level=*/2); -- GitLab From 23bf184564e7842432efb8a66d6d22db4b79205e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 7 Nov 2017 16:29:35 -0800 Subject: [PATCH 0154/1801] [tf.data] Saveable iterator for dataset.take(.), dataset.skip(.) and dataset.repeat(.). PiperOrigin-RevId: 174931742 --- .../contrib/data/python/kernel_tests/BUILD | 2 + .../dataset_serialization_test_base.py | 73 +++++++++++------ .../kernel_tests/sequence_dataset_op_test.py | 78 +++++++++++++++++++ tensorflow/core/kernels/repeat_dataset_op.cc | 32 ++++++++ tensorflow/core/kernels/skip_dataset_op.cc | 46 ++++++++++- tensorflow/core/kernels/take_dataset_op.cc | 46 ++++++++++- 6 files changed, 245 insertions(+), 32 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 7283f0ff0a..c1f1d90c5d 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -365,6 +365,7 @@ py_test( srcs = ["sequence_dataset_op_test.py"], srcs_version = "PY2AND3", deps = [ + ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -428,6 +429,7 @@ py_test( srcs = ["zip_dataset_op_test.py"], srcs_version = "PY2AND3", deps = [ + ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/contrib/data/python/ops:iterator_ops", "//tensorflow/python:array_ops", diff --git a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py index df9147af6c..369b789a52 100644 --- a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py +++ b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py @@ -32,7 +32,7 @@ from tensorflow.python.util import nest class DatasetSerializationTestBase(test.TestCase): - """Base class for testing finite serializable datasets.""" + """Base class for testing serializable datasets.""" def tearDown(self): self._delete_ckpt() @@ -58,17 +58,19 @@ class DatasetSerializationTestBase(test.TestCase): if ds_fn2: self.verify_restore_in_modified_graph(ds_fn1, ds_fn2, num_outputs) - def verify_unused_iterator(self, ds_fn, num_outputs): + def verify_unused_iterator(self, ds_fn, num_outputs, verify_exhausted=True): """Verifies that saving and restoring an unused iterator works. Args: ds_fn: See `run_core_tests`. num_outputs: See `run_core_tests`. + verify_exhausted: See `gen_outputs`. Raises: AssertionError if any test fails. """ - self.verify_run_with_breaks(ds_fn, [0], num_outputs) + self.verify_run_with_breaks( + ds_fn, [0], num_outputs, verify_exhausted=verify_exhausted) def verify_fully_used_iterator(self, ds_fn, num_outputs): """Verifies that saving and restoring a fully used iterator works. @@ -104,12 +106,16 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn, [], 0, ckpt_saved=True, verify_exhausted=True) self.assertEqual(len(actual), 0) - def verify_init_before_restore(self, ds_fn, num_outputs): + def verify_init_before_restore(self, + ds_fn, + num_outputs, + verify_exhausted=True): """Verifies that retoring into an already initilized iterator works. Args: ds_fn: See `run_core_tests`. num_outputs: See `run_core_tests`. + verify_exhausted: See `gen_outputs`. Raises: AssertionError if any test fails. @@ -118,9 +124,14 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn, self.gen_break_points(num_outputs), num_outputs, - init_before_restore=True) + init_before_restore=True, + verify_exhausted=verify_exhausted) - def verify_multiple_breaks(self, ds_fn, num_outputs, num_breaks=10): + def verify_multiple_breaks(self, + ds_fn, + num_outputs, + num_breaks=10, + verify_exhausted=True): """Attempts to save/restore at multiple break points. Args: @@ -128,16 +139,22 @@ class DatasetSerializationTestBase(test.TestCase): num_outputs: See `run_core_tests`. num_breaks: The number of break points. These are uniformly spread in [0, num_outputs] both inclusive. + verify_exhausted: See `gen_outputs`. Raises: AssertionError if any test fails. """ - self.verify_run_with_breaks(ds_fn, - self.gen_break_points(num_outputs, num_breaks), - num_outputs) + self.verify_run_with_breaks( + ds_fn, + self.gen_break_points(num_outputs), + num_outputs, + verify_exhausted=verify_exhausted) - def verify_reset_restored_iterator(self, ds_fn, num_outputs, - break_point=None): + def verify_reset_restored_iterator(self, + ds_fn, + num_outputs, + break_point=None, + verify_exhausted=True): """Attempts to re-initialize a restored iterator. This is useful when restoring a training checkpoint during validation. @@ -146,6 +163,7 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn: See `run_core_tests`. num_outputs: See `run_core_tests`. break_point: Break point. Optional. Defaults to num_outputs/2. + verify_exhausted: See `gen_outputs`. Raises: AssertionError if any test fails. @@ -153,7 +171,8 @@ class DatasetSerializationTestBase(test.TestCase): break_point = num_outputs // 2 if not break_point else break_point # Collect ground truth containing all outputs. - expected = self.gen_outputs(ds_fn, [], num_outputs, verify_exhausted=True) + expected = self.gen_outputs( + ds_fn, [], num_outputs, verify_exhausted=verify_exhausted) # Skip some items and save checkpoint. self.gen_outputs(ds_fn, [], break_point, verify_exhausted=False) @@ -168,15 +187,17 @@ class DatasetSerializationTestBase(test.TestCase): sess.run(init_op) for _ in range(num_outputs): actual.append(sess.run(get_next_op)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) + if verify_exhausted: + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) self.match(expected, actual) def verify_restore_in_modified_graph(self, ds_fn1, ds_fn2, num_outputs, - break_point=None): + break_point=None, + verify_exhausted=True): """Attempts to restore an iterator in a modified graph. Builds an input pipeline using ds_fn1, runs it for `break_point` steps @@ -188,6 +209,7 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn2: See `run_core_tests`. num_outputs: See `run_core_tests`. break_point: Break point. Optional. Defaults to num_outputs/2. + verify_exhausted: See `gen_outputs`. Raises: AssertionError if any test fails. @@ -196,15 +218,15 @@ class DatasetSerializationTestBase(test.TestCase): # Skip `break_point` items and store the remaining produced from ds_fn1 # in `expected`. - self.gen_outputs(ds_fn1, [], break_point) + self.gen_outputs(ds_fn1, [], break_point, verify_exhausted=False) expected = self.gen_outputs( ds_fn1, [], num_outputs - break_point, ckpt_saved=True, - verify_exhausted=True) + verify_exhausted=verify_exhausted) # Generate `break_point` items from ds_fn1 and save checkpoint. - self.gen_outputs(ds_fn1, [], break_point) + self.gen_outputs(ds_fn1, [], break_point, verify_exhausted=False) actual = [] # Build graph for ds_fn2 but load checkpoint for ds_fn1. @@ -214,8 +236,9 @@ class DatasetSerializationTestBase(test.TestCase): self._restore(saver, sess) for _ in range(num_outputs - break_point): actual.append(sess.run(get_next_op)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) + if verify_exhausted: + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) self.match(expected, actual) @@ -223,6 +246,7 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn, break_points, num_outputs, + verify_exhausted=True, init_before_restore=False): """Verifies that ds_fn() produces the same outputs with and without breaks. @@ -237,6 +261,7 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn: See `gen_outputs`. break_points: See `gen_outputs`. num_outputs: See `gen_outputs`. + verify_exhausted: See `gen_outputs`. init_before_restore: See `gen_outputs`. Raises: @@ -245,13 +270,13 @@ class DatasetSerializationTestBase(test.TestCase): expected = self.gen_outputs( ds_fn, [], num_outputs, - verify_exhausted=True, + verify_exhausted=verify_exhausted, init_before_restore=init_before_restore) actual = self.gen_outputs( ds_fn, break_points, num_outputs, - verify_exhausted=True, + verify_exhausted=verify_exhausted, init_before_restore=init_before_restore) self.match(expected, actual) @@ -261,7 +286,7 @@ class DatasetSerializationTestBase(test.TestCase): num_outputs, ckpt_saved=False, init_before_restore=False, - verify_exhausted=False): + verify_exhausted=True): """Generates elements from input dataset while stopping at break points. Produces `num_outputs` outputs and saves the state of the iterator in the @@ -285,7 +310,7 @@ class DatasetSerializationTestBase(test.TestCase): after producing `num_outputs` elements. Returns: - A list if `num_outputs` items. + A list of `num_outputs` items. """ outputs = [] diff --git a/tensorflow/contrib/data/python/kernel_tests/sequence_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/sequence_dataset_op_test.py index 91615e9f62..1a26da82e5 100644 --- a/tensorflow/contrib/data/python/kernel_tests/sequence_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/sequence_dataset_op_test.py @@ -19,6 +19,7 @@ from __future__ import print_function import numpy as np +from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base from tensorflow.contrib.data.python.ops import dataset_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -207,5 +208,82 @@ class SequenceDatasetTest(test.TestCase): sess.run(get_next) +class SequenceDatasetSerializationTest( + dataset_serialization_test_base.DatasetSerializationTestBase): + + def _build_skip_dataset(self, count): + components = (np.arange(10),) + return dataset_ops.Dataset.from_tensor_slices(components).skip(count) + + def testSkipFewerThanInputs(self): + count = 4 + num_outputs = 10 - count + self.run_core_tests(lambda: self._build_skip_dataset(count), + lambda: self._build_skip_dataset(count + 2), + num_outputs) + + def testSkipVarious(self): + # Skip more than inputs + self.run_core_tests(lambda: self._build_skip_dataset(20), None, 0) + # Skip exactly the input size + self.run_core_tests(lambda: self._build_skip_dataset(10), None, 0) + self.run_core_tests(lambda: self._build_skip_dataset(-1), None, 0) + # Skip nothing + self.run_core_tests(lambda: self._build_skip_dataset(0), None, 10) + + def _build_take_dataset(self, count): + components = (np.arange(10),) + return dataset_ops.Dataset.from_tensor_slices(components).take(count) + + def testTakeFewerThanInputs(self): + count = 4 + self.run_core_tests( + lambda: self._build_take_dataset(count), + lambda: self._build_take_dataset(count + 2), + count, + ) + + def testTakeVarious(self): + # Take more than inputs + self.run_core_tests(lambda: self._build_take_dataset(20), None, 10) + # Take exactly the input size + self.run_core_tests(lambda: self._build_take_dataset(10), None, 10) + # Take all + self.run_core_tests(lambda: self._build_take_dataset(-1), None, 10) + # Take nothing + self.run_core_tests(lambda: self._build_take_dataset(0), None, 0) + + def _build_repeat_dataset(self, count, take_count=3): + components = (np.arange(10),) + return dataset_ops.Dataset.from_tensor_slices(components).take( + take_count).repeat(count) + + def testFiniteRepeat(self): + count = 10 + self.run_core_tests(lambda: self._build_repeat_dataset(count), + lambda: self._build_repeat_dataset(count + 2), + 3 * count) + + def testEmptyRepeat(self): + self.run_core_tests(lambda: self._build_repeat_dataset(0), None, 0) + + def testInfiniteRepeat(self): + self.verify_unused_iterator( + lambda: self._build_repeat_dataset(-1), 10, verify_exhausted=False) + self.verify_init_before_restore( + lambda: self._build_repeat_dataset(-1), 10, verify_exhausted=False) + self.verify_multiple_breaks( + lambda: self._build_repeat_dataset(-1), 20, verify_exhausted=False) + self.verify_reset_restored_iterator( + lambda: self._build_repeat_dataset(-1), 20, verify_exhausted=False) + self.verify_restore_in_modified_graph( + lambda: self._build_repeat_dataset(-1), + lambda: self._build_repeat_dataset(2), + 20, + verify_exhausted=False) + # Test repeat empty dataset + self.run_core_tests(lambda: self._build_repeat_dataset(-1, 0), None, 0) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/core/kernels/repeat_dataset_op.cc b/tensorflow/core/kernels/repeat_dataset_op.cc index 9813e99a70..6c0f4118e6 100644 --- a/tensorflow/core/kernels/repeat_dataset_op.cc +++ b/tensorflow/core/kernels/repeat_dataset_op.cc @@ -95,6 +95,15 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { *end_of_sequence = true; return Status::OK(); } + + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + return Status::OK(); + } + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + return Status::OK(); + } }; class FiniteIterator : public DatasetIterator { @@ -183,6 +192,29 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { } while (true); } + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + if (input_impl_) + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + else + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("uninitialized"), "")); + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + if (reader->Contains(full_name("uninitialized"))) { + input_impl_.reset(); + } else { + input_impl_ = dataset()->input_->MakeIterator(prefix()); + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + } + return Status::OK(); + } + private: mutex mu_; std::unique_ptr input_impl_ GUARDED_BY(mu_); diff --git a/tensorflow/core/kernels/skip_dataset_op.cc b/tensorflow/core/kernels/skip_dataset_op.cc index 52a6116a7c..05152db1ae 100644 --- a/tensorflow/core/kernels/skip_dataset_op.cc +++ b/tensorflow/core/kernels/skip_dataset_op.cc @@ -35,14 +35,14 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { int64 count; OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "count", &count)); - *output = new Dataset(count, input); + *output = new Dataset(ctx, count, input); } private: - class Dataset : public DatasetBase { + class Dataset : public GraphDatasetBase { public: - Dataset(int64 count, const DatasetBase* input) - : count_(count), input_(input) { + Dataset(OpKernelContext* ctx, int64 count, const DatasetBase* input) + : GraphDatasetBase(ctx), count_(count), input_(input) { input_->Ref(); } @@ -71,6 +71,18 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { string DebugString() override { return "SkipDatasetOp::Dataset"; } + protected: + Status AsGraphDefInternal(DatasetGraphDefBuilder* b, + Node** output) const override { + Node* input_graph_node = nullptr; + TF_RETURN_IF_ERROR(b->AddParentDataset(input_, &input_graph_node)); + Node* count = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(count_, &count)); + TF_RETURN_IF_ERROR( + b->AddDataset(this, {input_graph_node, count}, output)); + return Status::OK(); + } + private: class EmptyIterator : public DatasetIterator { public: @@ -82,6 +94,16 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { *end_of_sequence = true; return Status::OK(); } + + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + return Status::OK(); + } }; class FiniteIterator : public DatasetIterator { @@ -119,6 +141,22 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("i"), i_)); + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("i"), &i_)); + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + return Status::OK(); + } + private: mutex mu_; int64 i_ GUARDED_BY(mu_); diff --git a/tensorflow/core/kernels/take_dataset_op.cc b/tensorflow/core/kernels/take_dataset_op.cc index c3f33d663c..f9f675abda 100644 --- a/tensorflow/core/kernels/take_dataset_op.cc +++ b/tensorflow/core/kernels/take_dataset_op.cc @@ -35,14 +35,14 @@ class TakeDatasetOp : public UnaryDatasetOpKernel { // Create a new TakeDatasetOp::Dataset, and return it as the output. int64 count; OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "count", &count)); - *output = new Dataset(count, input); + *output = new Dataset(ctx, count, input); } private: - class Dataset : public DatasetBase { + class Dataset : public GraphDatasetBase { public: - Dataset(int64 count, const DatasetBase* input) - : count_(count), input_(input) { + Dataset(OpKernelContext* ctx, int64 count, const DatasetBase* input) + : GraphDatasetBase(ctx), count_(count), input_(input) { input_->Ref(); } @@ -72,6 +72,18 @@ class TakeDatasetOp : public UnaryDatasetOpKernel { string DebugString() override { return "TakeDatasetOp::Dataset"; } + protected: + Status AsGraphDefInternal(DatasetGraphDefBuilder* b, + Node** output) const override { + Node* input_graph_node = nullptr; + TF_RETURN_IF_ERROR(b->AddParentDataset(input_, &input_graph_node)); + Node* count = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(count_, &count)); + TF_RETURN_IF_ERROR( + b->AddDataset(this, {input_graph_node, count}, output)); + return Status::OK(); + } + private: class EmptyIterator : public DatasetIterator { public: @@ -83,6 +95,16 @@ class TakeDatasetOp : public UnaryDatasetOpKernel { *end_of_sequence = true; return Status::OK(); } + + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + return Status::OK(); + } }; class FiniteIterator : public DatasetIterator { @@ -110,6 +132,22 @@ class TakeDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("i"), i_)); + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("i"), &i_)); + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + return Status::OK(); + } + private: mutex mu_; int64 i_ GUARDED_BY(mu_); -- GitLab From 091ec068a932944a0cd0792a01aa127ed36647e6 Mon Sep 17 00:00:00 2001 From: Sergio Guadarrama Date: Tue, 7 Nov 2017 16:30:33 -0800 Subject: [PATCH 0155/1801] Allow passing other global_steps to summaries. PiperOrigin-RevId: 174931874 --- tensorflow/contrib/summary/summary_ops.py | 43 +++++++++++-------- .../contrib/summary/summary_ops_test.py | 12 ++++++ 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py index 56e3198593..9238671c4a 100644 --- a/tensorflow/contrib/summary/summary_ops.py +++ b/tensorflow/contrib/summary/summary_ops.py @@ -57,12 +57,14 @@ def should_record_summaries(): # TODO(apassos) consider how to handle local step here. @tf_contextlib.contextmanager -def record_summaries_every_n_global_steps(n): +def record_summaries_every_n_global_steps(n, global_step=None): """Sets the should_record_summaries Tensor to true if global_step % n == 0.""" + if global_step is None: + global_step = training_util.get_global_step() collection_ref = ops.get_collection_ref(_SHOULD_RECORD_SUMMARIES_NAME) old = collection_ref[:] with ops.device("cpu:0"): - collection_ref[:] = [math_ops.equal(training_util.get_global_step() % n, 0)] + collection_ref[:] = [math_ops.equal(global_step % n, 0)] yield collection_ref[:] = old @@ -204,68 +206,75 @@ def summary_writer_function(name, tensor, function, family=None): return op -def generic(name, tensor, metadata, family=None): +def generic(name, tensor, metadata, family=None, global_step=None): """Writes a tensor summary if possible.""" - + if global_step is None: + global_step = training_util.get_global_step() def function(tag, scope): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_summary( context.context().summary_writer_resource, - training_util.get_global_step(), array_ops.identity(tensor), + global_step, array_ops.identity(tensor), tag, metadata, name=scope) return summary_writer_function(name, tensor, function, family=family) -def scalar(name, tensor, family=None): +def scalar(name, tensor, family=None, global_step=None): """Writes a scalar summary if possible.""" - + if global_step is None: + global_step = training_util.get_global_step() def function(tag, scope): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_scalar_summary( context.context().summary_writer_resource, - training_util.get_global_step(), tag, array_ops.identity(tensor), + global_step, tag, array_ops.identity(tensor), name=scope) return summary_writer_function(name, tensor, function, family=family) -def histogram(name, tensor, family=None): +def histogram(name, tensor, family=None, global_step=None): """Writes a histogram summary if possible.""" - + if global_step is None: + global_step = training_util.get_global_step() def function(tag, scope): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_histogram_summary( context.context().summary_writer_resource, - training_util.get_global_step(), tag, array_ops.identity(tensor), + global_step, tag, array_ops.identity(tensor), name=scope) return summary_writer_function(name, tensor, function, family=family) -def image(name, tensor, bad_color=None, max_images=3, family=None): +def image(name, tensor, bad_color=None, max_images=3, family=None, + global_step=None): """Writes an image summary if possible.""" - + if global_step is None: + global_step = training_util.get_global_step() def function(tag, scope): bad_color_ = (constant_op.constant([255, 0, 0, 255], dtype=dtypes.uint8) if bad_color is None else bad_color) # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_image_summary( context.context().summary_writer_resource, - training_util.get_global_step(), tag, array_ops.identity(tensor), + global_step, tag, array_ops.identity(tensor), bad_color_, max_images, name=scope) return summary_writer_function(name, tensor, function, family=family) -def audio(name, tensor, sample_rate, max_outputs, family=None): +def audio(name, tensor, sample_rate, max_outputs, family=None, + global_step=None): """Writes an audio summary if possible.""" - + if global_step is None: + global_step = training_util.get_global_step() def function(tag, scope): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_audio_summary( context.context().summary_writer_resource, - training_util.get_global_step(), + global_step, tag, array_ops.identity(tensor), sample_rate=sample_rate, diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py index de7ae6ec27..466e194096 100644 --- a/tensorflow/contrib/summary/summary_ops_test.py +++ b/tensorflow/contrib/summary/summary_ops_test.py @@ -86,6 +86,18 @@ class TargetTest(test_util.TensorFlowTestCase): self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'scalar') + def testSummaryGlobalStep(self): + global_step = training_util.get_or_create_global_step() + logdir = tempfile.mkdtemp() + with summary_ops.create_summary_file_writer( + logdir, max_queue=0, + name='t2').as_default(), summary_ops.always_record_summaries(): + + summary_ops.scalar('scalar', 2.0, global_step=global_step) + + events = summary_test_util.events_from_file(logdir) + self.assertEqual(len(events), 2) + self.assertEqual(events[1].summary.value[0].tag, 'scalar') if __name__ == '__main__': test.main() -- GitLab From 7fdcbc508c6d94a785111bc9468b221335345ce7 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Tue, 7 Nov 2017 16:50:18 -0800 Subject: [PATCH 0156/1801] tfe.Network naming under variable scopes. Networks take on the full prefix of their parent variable scopes. Fixes #14164. PiperOrigin-RevId: 174934769 --- tensorflow/contrib/eager/python/network.py | 63 ++++++---- .../contrib/eager/python/network_test.py | 108 ++++++++++++++++-- tensorflow/python/layers/base.py | 15 ++- 3 files changed, 148 insertions(+), 38 deletions(-) diff --git a/tensorflow/contrib/eager/python/network.py b/tensorflow/contrib/eager/python/network.py index c6e628b074..1a5c6e8aec 100644 --- a/tensorflow/contrib/eager/python/network.py +++ b/tensorflow/contrib/eager/python/network.py @@ -244,6 +244,12 @@ class Network(base.Layer): self._owned_layers = {} # The scope to use if we end up without a parent. self._default_parent_variable_scope = variable_scope.get_variable_scope() + # Hold on to the variable scope counts from init to check whether a scope + # with the name we want was ever created in our parent scope. Without this + # check we might have name collisions if the parent scope on init gets + # closed before build is called. + self._variable_scope_counts_on_init = ( + variable_scope._get_default_variable_store().variable_scopes_count) self._custom_getter, self._deferred_restorations = ( _make_custom_getter_for_deferred_restorations()) @@ -261,18 +267,29 @@ class Network(base.Layer): def _finalize_name(self, parent_network): if not self._name: - if not parent_network: - name_uid_map = base._get_default_graph_uid_map() - else: - name_uid_map = parent_network._sub_layer_name_uids # Were were not passed a name explicitly (or it was blank), so this is an # anonymous Network. We make up a unique name. if parent_network: avoid_names = parent_network._owned_layers + name_uid_map = parent_network._sub_layer_name_uids else: - avoid_names = None + name_uid_map = base._get_default_graph_uid_map() + # Figure out which names we have to avoid based on which variable scope + # we're nested in. + strip_name = self._default_parent_variable_scope.name + if strip_name: + strip_name += "/" + def _strip_on_init_scope(name): + if name.startswith(strip_name): + return name[len(strip_name):] + else: + return None + avoid_names = set( + _strip_on_init_scope(name) + for name in self._variable_scope_counts_on_init.keys() if name) self._name, self._base_name = self._make_unique_name( - name_uid_map=name_uid_map, avoid_names=avoid_names) + name_uid_map=name_uid_map, avoid_names=avoid_names, + namespace=self._default_parent_variable_scope.name) if self._first_parent is None or (self._first_parent # False = no parent and self._first_parent() is None): # Save a pointer to the parent Network so that we can later check that the @@ -302,7 +319,13 @@ class Network(base.Layer): parent_scope = first_parent._scope else: parent_scope = self._default_parent_variable_scope - with variable_scope.variable_scope(parent_scope): + with variable_scope.variable_scope(parent_scope) as parent_vs: + expected_scope_name = parent_vs.name + "/" + self._name + if expected_scope_name in self._variable_scope_counts_on_init: + raise ValueError( + ("A Network named '%s' already exists (or a variable_scope was " + "created with this name). Names must be unique.") % ( + self._name,)) # Make sure variables with this prefix will be unique. with variable_scope.variable_scope( None, use_resource=True, default_name=self._name) as scope: @@ -319,25 +342,22 @@ class Network(base.Layer): "created with this name). Names must be unique.") % ( self._name,)) if (first_parent - and scope_prefix[:-1] != first_parent._scope.name): + and scope_prefix[:-1] != first_parent.scope_name): raise ValueError( ("Network variable names must match a nesting of sub-Network " "names. Expected prefix '%s' from parent network, but got " "'%s' when attempting to create a variable_scope for Network " "'%s'. Likely an explicit variable_scope was inserted into " "the nesting.") % ( - first_parent._scope.name, + first_parent.scope_name, scope_prefix[:-1], self._name)) elif not first_parent and scope_prefix: # For the case when this Network is not nested inside any other - # Network, but is in a variable_scope. This is an error for now. - raise ValueError( - "Creating Networks inside named variable_scopes is currently " - "not supported (to ensure that variable names match the names " - "of Networks in which they were first created). To set " - "options, try `with tf.variable_scope(''):`. If this " - "limitation bothers you, please file a feature request.") + # Network, but is in a variable_scope. This Network's name takes on + # the full variable scope prefix. + self._name = scope_name + for non_network_sublayer in self._non_network_sublayers: self._set_scope_for_nonnetwork_sublayer(non_network_sublayer) @@ -355,8 +375,7 @@ class Network(base.Layer): raise ValueError( ("The parent of a Layer added to Network %s was garbage collected " "before the Layer was built. If this limitation bothers you " - "please, comment on " - "https://github.com/tensorflow/tensorflow/issues/14164.") % + "please file a feature request.") % (self.name,)) with variable_scope.variable_scope(parent_scope): # Horrid hack to make Layer variable names which are direct @@ -420,7 +439,9 @@ class Network(base.Layer): # name, and we should respect it (subject to error checking). layer._name, layer._base_name = layer._make_unique_name( name_uid_map=self._sub_layer_name_uids, - avoid_names=self._owned_layers) + avoid_names=self._owned_layers + # No namespace required, since we've specified our own UID map. + ) layer._first_parent = weakref.ref(self) self._non_network_sublayers.append(layer) if (not layer.built @@ -556,7 +577,7 @@ class Network(base.Layer): if os.path.isdir(save_path): # If we were passed a directory, default to naming based on the Network # name. - save_path = os.path.join(save_path, self.name) + save_path = os.path.join(save_path, self.name.replace("/", "_")) user_map_func = map_func if map_func is None: map_func = _make_prefix_stripping_map_fn(self.scope_name) @@ -750,7 +771,7 @@ class Network(base.Layer): self._set_scope() # scope_name should be available to map_funcs if os.path.isdir(save_path): # If we don't have a name yet, set no parent. - save_path = os.path.join(save_path, self.name) + save_path = os.path.join(save_path, self.name.replace("/", "_")) user_map_func = map_func if map_func is None: map_func = _make_prefix_stripping_map_fn(self.scope_name) diff --git a/tensorflow/contrib/eager/python/network_test.py b/tensorflow/contrib/eager/python/network_test.py index 14adbafe57..1127055c05 100644 --- a/tensorflow/contrib/eager/python/network_test.py +++ b/tensorflow/contrib/eager/python/network_test.py @@ -410,19 +410,103 @@ class NetworkTest(test.TestCase): @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) def testWrappingInVariableScope(self): + one = constant_op.constant([[1.]]) + # Naming happens in the order of first build rather than the order of + # construction, but for clarity they're the same here and construction is + # annotated. + outside_net_before = MyNetwork() # name=my_network_1 + outside_net_before(one) + captured_scope = variable_scope.get_variable_scope() with variable_scope.variable_scope("outside_scope"): - net = MyNetwork() - one = constant_op.constant([[1.]]) - with self.assertRaisesRegexp( - ValueError, - ("Creating Networks inside named variable_scopes is currently not " - "supported")): - net(one) - # Alternatively, we could re-name the Network to match the variable_scope: - # self.assertEqual("outside_scope/my_network_1", net.name) - # self.assertStartsWith( - # expected_start="outside_scope/my_network_1/dense/", - # actual=net.trainable_weights[0].name) + net1 = MyNetwork() # name=outside_scope/my_network_1 + net1(one) + name_conflict1 = MyNetwork(name="name_conflict") # fine, unique so far + name_conflict2 = MyNetwork(name="name_conflict") # error on build + with variable_scope.variable_scope("inside_scope"): + # No issue here since the name is unique within its scope. + name_conflict3 = MyNetwork(name="name_conflict") + net2 = MyNetwork() # name=outside_scope/my_network_3 to avoid the + # variable_scope my_network_2 below. + vs_name_conflict = MyNetwork(name="vs_name_conflict") # conflict below + with variable_scope.variable_scope("intervening_scope"): + with variable_scope.variable_scope(captured_scope): + with variable_scope.variable_scope("outside_scope"): + name_conflict4 = MyNetwork(name="name_conflict") # error on build + with variable_scope.variable_scope("my_network_2"): + pass + with variable_scope.variable_scope("vs_name_conflict"): + pass + net3 = MyNetwork() # name=outside_scope/my_network_4 + name_conflict1(one) + with self.assertRaisesRegexp( + ValueError, "named 'name_conflict' already exists"): + name_conflict2(one) + name_conflict3(one) + net2(one) + with self.assertRaisesRegexp( + ValueError, "or a variable_scope was created with this name"): + vs_name_conflict(one) + with self.assertRaisesRegexp( + ValueError, "named 'name_conflict' already exists"): + name_conflict4(one) + self.assertEqual("outside_scope/name_conflict", + name_conflict1.name) + self.assertStartsWith( + expected_start="outside_scope/name_conflict/dense_1/", + actual=name_conflict1.variables[0].name) + self.assertEqual("outside_scope/inside_scope/name_conflict", + name_conflict3.name) + self.assertStartsWith( + expected_start="outside_scope/inside_scope/name_conflict/dense_1/", + actual=name_conflict3.variables[0].name) + self.assertEqual("outside_scope/my_network_1", net1.name) + self.assertStartsWith( + expected_start="outside_scope/my_network_1/dense_1/", + actual=net1.trainable_weights[0].name) + self.assertEqual("outside_scope/my_network_3", net2.name) + self.assertStartsWith( + expected_start="outside_scope/my_network_3/dense_1/", + actual=net2.trainable_weights[0].name) + net3(one) + self.assertEqual("outside_scope/my_network_4", net3.name) + self.assertStartsWith( + expected_start="outside_scope/my_network_4/dense_1/", + actual=net3.trainable_weights[0].name) + outside_net_after = MyNetwork() + outside_net_after(one) + self.assertEqual("my_network_1", outside_net_before.name) + self.assertStartsWith( + expected_start="my_network_1/dense_1/", + actual=outside_net_before.trainable_weights[0].name) + self.assertEqual("my_network_2", outside_net_after.name) + self.assertStartsWith( + expected_start="my_network_2/dense_1/", + actual=outside_net_after.trainable_weights[0].name) + + @test_util.run_in_graph_and_eager_modes() + def testVariableScopeStripping(self): + with variable_scope.variable_scope("scope1"): + with variable_scope.variable_scope("scope2"): + net = MyNetwork() + net(constant_op.constant([[2.0]])) + self.evaluate(net.variables[0].assign([[42.]])) + self.assertEqual(net.name, "scope1/scope2/my_network_1") + self.assertStartsWith( + expected_start="scope1/scope2/my_network_1/dense_1/", + actual=net.trainable_weights[0].name) + save_path = net.save(self.get_temp_dir()) + self.assertIn("scope1_scope2_my_network_1", save_path) + restore_net = MyNetwork() + # Delayed restoration + restore_net.restore(save_path) + restore_net(constant_op.constant([[1.0]])) + self.assertAllEqual([[42.]], + self.evaluate(restore_net.variables[0])) + self.evaluate(restore_net.variables[0].assign([[-1.]])) + # Immediate restoration + restore_net.restore(save_path) + self.assertAllEqual([[42.]], + self.evaluate(restore_net.variables[0])) @test_util.run_in_graph_and_eager_modes() def testLayerNamesRespected(self): diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 07b9d9b7a6..8c8d774b75 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -401,10 +401,11 @@ class Layer(object): """ return input_shape - def _make_unique_name(self, name_uid_map=None, avoid_names=None): + def _make_unique_name(self, name_uid_map=None, avoid_names=None, + namespace=''): base_name = _to_snake_case(self.__class__.__name__) name = _unique_layer_name(base_name, name_uid_map=name_uid_map, - avoid_names=avoid_names) + avoid_names=avoid_names, namespace=namespace) return (name, base_name) def _set_scope(self, scope=None): @@ -2370,7 +2371,7 @@ def _get_default_graph_uid_map(): return name_uid_map -def _unique_layer_name(name, name_uid_map=None, avoid_names=None): +def _unique_layer_name(name, name_uid_map=None, avoid_names=None, namespace=''): """Makes a layer name (or arbitrary string) unique within a TensorFlow graph. Arguments: @@ -2379,6 +2380,9 @@ def _unique_layer_name(name, name_uid_map=None, avoid_names=None): names. If None (default), uses a per-Graph dictionary. avoid_names: An optional set or dict with names which should not be used. If None (default) does not avoid any names. + namespace: Gets a name which is unique within the (graph, namespace). Layers + which are not Networks use a blank namespace and so get graph-global + names. Returns: Unique string name. @@ -2396,6 +2400,7 @@ def _unique_layer_name(name, name_uid_map=None, avoid_names=None): avoid_names = set() proposed_name = None while proposed_name is None or proposed_name in avoid_names: - name_uid_map[name] += 1 - proposed_name = name + '_' + str(name_uid_map[name]) + name_key = (namespace, name) + name_uid_map[name_key] += 1 + proposed_name = name + '_' + str(name_uid_map[name_key]) return proposed_name -- GitLab From 969eae0da7aed343b382d12d6e65dcf1d3bbcfad Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Tue, 7 Nov 2017 16:52:51 -0800 Subject: [PATCH 0157/1801] Fix FakeQuant to correctly set zero on CPU. PiperOrigin-RevId: 174935134 --- tensorflow/core/kernels/fake_quant_ops_functor.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/kernels/fake_quant_ops_functor.h b/tensorflow/core/kernels/fake_quant_ops_functor.h index b41b22d634..7aaad6e6c7 100644 --- a/tensorflow/core/kernels/fake_quant_ops_functor.h +++ b/tensorflow/core/kernels/fake_quant_ops_functor.h @@ -132,7 +132,7 @@ struct FakeQuantWithMinMaxVarsFunctor { const float max_val = max(); // If min and max are both zero, we should just return zero. if (min_val == 0.0f && max_val == 0.0f) { - outputs.setZero(); + outputs.device(d) = outputs.constant(0.0f); return; } float nudged_min, nudged_max, nudged_scale; @@ -163,8 +163,8 @@ struct FakeQuantWithMinMaxVarsGradientFunctor { // If min and max are both zero, we propagate everything to inputs. if (min_val == 0.0f && max_val == 0.0f) { backprops_wrt_input.device(d) = gradients; - backprop_wrt_min.setZero(); - backprop_wrt_max.setZero(); + backprop_wrt_min.device(d) = backprop_wrt_min.constant(0.0f); + backprop_wrt_max.device(d) = backprop_wrt_max.constant(0.0f); return; } float nudged_min, nudged_max, nudged_scale; @@ -205,7 +205,8 @@ struct FakeQuantWithMinMaxVarsPerChannelFunctor { const float max_val = max(i); // If min and max are both zero, we should just return zero. if (min_val == 0.0f && max_val == 0.0f) { - outputs.chip<1>(i).setZero(); + auto chip = outputs.chip<1>(i); + chip.device(d) = chip.constant(0.0f); continue; } float nudged_min, nudged_max, nudged_scale; @@ -242,8 +243,10 @@ struct FakeQuantWithMinMaxVarsPerChannelGradientFunctor { // If min and max are both zero, we propagate everything to inputs. if (min_val == 0.0f && max_val == 0.0f) { backprops_wrt_input.chip<1>(i).device(d) = gradients_chip; - backprop_wrt_min.chip<0>(i).setZero(); - backprop_wrt_max.chip<0>(i).setZero(); + auto min_chip = backprop_wrt_min.chip<0>(i); + auto max_chip = backprop_wrt_max.chip<0>(i); + min_chip.device(d) = min_chip.constant(0.0f); + max_chip.device(d) = max_chip.constant(0.0f); continue; } float nudged_min, nudged_max, nudged_scale; -- GitLab From fe258b30534f9aa0525ce677d23d968456654a65 Mon Sep 17 00:00:00 2001 From: Chris Leary Date: Tue, 7 Nov 2017 17:08:18 -0800 Subject: [PATCH 0158/1801] [XLA] Add binary operation name to shape inference error message. PiperOrigin-RevId: 174937290 --- tensorflow/compiler/xla/service/shape_inference.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 791d17365b..9c7dc2185e 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/math/math_util.h" #include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/protobuf.h" @@ -770,8 +771,12 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(lhs)); TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(rhs)); - TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(lhs, "lhs of binary operation")); - TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(rhs, "rhs of binary operation")); + TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque( + lhs, tensorflow::strings::StrCat("lhs of binary operation ", + BinaryOperation_Name(operation)))); + TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque( + rhs, tensorflow::strings::StrCat("rhs of binary operation ", + BinaryOperation_Name(operation)))); switch (operation) { case BINOP_DOT: return InferDotOpShape(lhs, rhs); -- GitLab From 6685e19909d6000adb32e4ee574c6f4768c85127 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Tue, 7 Nov 2017 17:12:18 -0800 Subject: [PATCH 0159/1801] Make quant_delay work even if user didn't create global step. PiperOrigin-RevId: 174937793 --- tensorflow/contrib/quantize/python/quantize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index 548e33663e..6382d3f7b4 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -387,7 +387,7 @@ class _QuantizeContext(object): if delay_requested and self.quant_delay and self.quant_delay > 0: activate_quant = math_ops.greater_equal( - training_util.get_global_step(), + training_util.get_or_create_global_step(), self.quant_delay, name=scope + '/activate_quant') quant = control_flow_ops.cond( -- GitLab From d8ba9e21b720e1f2a6a9cd3648f13d0395d7496c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 7 Nov 2017 17:12:57 -0800 Subject: [PATCH 0160/1801] Used tf.where to simplify conditional expression in div-sharding. PiperOrigin-RevId: 174937860 --- tensorflow/python/ops/embedding_ops.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py index 8c1ccc6840..f4561d1a83 100644 --- a/tensorflow/python/ops/embedding_ops.py +++ b/tensorflow/python/ops/embedding_ops.py @@ -191,12 +191,9 @@ def _embedding_lookup_and_transform(params, (flat_ids - extras) // ids_per_partition) # Emulate a conditional using a boolean indicator tensor - is_in_first_extras_partitions = math_ops.cast(p_assignments < extras, - flat_ids.dtype) - new_ids = (is_in_first_extras_partitions * (flat_ids % - (ids_per_partition + 1)) + - (1 - is_in_first_extras_partitions) * - ((flat_ids - extras) % ids_per_partition)) + new_ids = array_ops.where(p_assignments < extras, + flat_ids % (ids_per_partition + 1), + (flat_ids - extras) % ids_per_partition) else: raise ValueError("Unrecognized partition strategy: " + partition_strategy) -- GitLab From 3972ae8de65d42bdc37a048a23c8120c376b8622 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 7 Nov 2017 17:16:55 -0800 Subject: [PATCH 0161/1801] Removed an unused temporary variable from DeviceNameUtils::ParseFullName. PiperOrigin-RevId: 174938299 --- tensorflow/core/util/device_name_utils.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/util/device_name_utils.cc b/tensorflow/core/util/device_name_utils.cc index 2d797c855a..90c3fed2e8 100644 --- a/tensorflow/core/util/device_name_utils.cc +++ b/tensorflow/core/util/device_name_utils.cc @@ -116,7 +116,6 @@ bool DeviceNameUtils::ParseFullName(StringPiece fullname, ParsedName* p) { if (fullname == "/") { return true; } - StringPiece tmp; while (!fullname.empty()) { bool progress = false; if (str_util::ConsumePrefix(&fullname, "/job:")) { -- GitLab From 02b2bd9e22e2a88629ebd778e51bcfa39dd63438 Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Tue, 7 Nov 2017 17:22:20 -0800 Subject: [PATCH 0162/1801] [XLA] Fix comments for arg_literals parameter in HloEvaluator::Evaluate. PiperOrigin-RevId: 174939009 --- tensorflow/compiler/xla/service/hlo_evaluator.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h index 67b6e215fc..7557aaa248 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator.h @@ -39,16 +39,18 @@ class HloEvaluator : public DfsHloVisitorWithDefault { HloEvaluator(); // Evaluates an HLO module and an array of pointers to literals. // Returns the evaluated result as a literal if successful. - // Precondition: argument literals correspond to each input computation's - // parameters in their post-ordering. See comment below for example. + // Precondition: The indices of arg_literals correspond to the parameter + // numbers of the HLO parameters in the computation. See comment below for an + // example. StatusOr> Evaluate( const HloModule& module, tensorflow::gtl::ArraySlice arg_literals); // Evaluates an HLO computation and an array of pointers to literals. // Returns the evaluated result as a literal if successful. - // Precondition: argument literals correspond to the input computation's - // parameters in their post-ordering. For e.g., consider the following graph: + // Precondition: The indices of arg_literals correspond to the parameter + // numbers of the HLO parameters in the computation. For e.g., consider the + // following graph: // // * // / \ @@ -57,8 +59,9 @@ class HloEvaluator : public DfsHloVisitorWithDefault { // / \ // Parameter0 Constant // - // The input literals array will have its first literal map to Parameter0 and - // the second map to Parameter1. + // where Parameter0 has parameter_number 0 and Parameter1 has parameter_number + // 1 in this computation. The input literals array will then have its first + // literal map to Parameter0 and the second map to Parameter1. StatusOr> Evaluate( const HloComputation& computation, tensorflow::gtl::ArraySlice arg_literals); -- GitLab From 326d79c6d7aa3ac0bcd24d9d558481576c516edf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 7 Nov 2017 17:47:46 -0800 Subject: [PATCH 0163/1801] [tf.data] Saveable iterator for dataset.zip(..). PiperOrigin-RevId: 174941651 --- .../kernel_tests/zip_dataset_op_test.py | 27 +++++++++++++ tensorflow/core/kernels/zip_dataset_op.cc | 40 +++++++++++++++++-- 2 files changed, 63 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/zip_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/zip_dataset_op_test.py index b0e7218301..5d34b0024c 100644 --- a/tensorflow/contrib/data/python/kernel_tests/zip_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/zip_dataset_op_test.py @@ -19,6 +19,7 @@ from __future__ import print_function import numpy as np +from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base from tensorflow.contrib.data.python.ops import dataset_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -110,5 +111,31 @@ class ZipDatasetTest(test.TestCase): sess.run(get_next) +class ZipDatasetSerializationTest( + dataset_serialization_test_base.DatasetSerializationTestBase): + + def _build_dataset(self, arr): + components = [ + np.tile(np.array([[1], [2], [3], [4]]), 20), + np.tile(np.array([[12], [13], [14], [15]]), 22), + np.array(arr) + ] + datasets = [ + dataset_ops.Dataset.from_tensor_slices(component) + for component in components + ] + return dataset_ops.Dataset.zip((datasets[0], (datasets[1], datasets[2]))) + + def testCore(self): + # Equal length components + arr = [37.0, 38.0, 39.0, 40.0] + num_outputs = len(arr) + self.run_core_tests(lambda: self._build_dataset(arr), None, num_outputs) + # Variable length components + diff_size_arr = [1.0, 2.0] + self.run_core_tests(lambda: self._build_dataset(diff_size_arr), + lambda: self._build_dataset(arr), 2) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/core/kernels/zip_dataset_op.cc b/tensorflow/core/kernels/zip_dataset_op.cc index a80b9edbe4..30d64ea634 100644 --- a/tensorflow/core/kernels/zip_dataset_op.cc +++ b/tensorflow/core/kernels/zip_dataset_op.cc @@ -35,14 +35,15 @@ class ZipDatasetOp : public DatasetOpKernel { OP_REQUIRES_OK(ctx, GetDatasetFromVariantTensor(ctx->input(i), &input)); inputs.push_back(input); } - *output = new Dataset(inputs); + *output = new Dataset(ctx, inputs); } private: - class Dataset : public DatasetBase { + class Dataset : public GraphDatasetBase { public: - explicit Dataset(const std::vector& inputs) - : inputs_(inputs) { + explicit Dataset(OpKernelContext* ctx, + const std::vector& inputs) + : GraphDatasetBase(ctx), inputs_(inputs) { for (const auto& input : inputs_) { input->Ref(); for (DataType dt : input->output_dtypes()) { @@ -76,6 +77,21 @@ class ZipDatasetOp : public DatasetOpKernel { string DebugString() override { return "ZipDatasetOp::Dataset"; } + protected: + Status AsGraphDefInternal(DatasetGraphDefBuilder* b, + Node** output) const override { + std::vector input_graph_nodes; + input_graph_nodes.reserve(inputs_.size()); + for (const auto& input : inputs_) { + Node* input_node; + TF_RETURN_IF_ERROR(b->AddParentDataset(input, &input_node)); + input_graph_nodes.emplace_back(input_node); + } + TF_RETURN_IF_ERROR( + b->AddDatasetWithInputAsList(this, input_graph_nodes, output)); + return Status::OK(); + } + private: class Iterator : public DatasetIterator { public: @@ -109,6 +125,22 @@ class ZipDatasetOp : public DatasetOpKernel { return Status::OK(); } + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + for (auto& input_impl : input_impls_) + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl)); + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + for (auto& input_impl : input_impls_) + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl)); + return Status::OK(); + } + private: mutex mu_; std::vector> input_impls_ GUARDED_BY(mu_); -- GitLab From 6bc5375cb07d8d595411ec0516d29314053a8e83 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Tue, 7 Nov 2017 18:16:47 -0800 Subject: [PATCH 0164/1801] Register int64 for GPU StridedSlice kernel PiperOrigin-RevId: 174944857 --- tensorflow/core/kernels/strided_slice_op.cc | 1 + tensorflow/core/kernels/strided_slice_op_impl.h | 2 ++ tensorflow/python/kernel_tests/array_ops_test.py | 13 ++++++++++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc index 8fc40db3cc..73b6d4cf6a 100644 --- a/tensorflow/core/kernels/strided_slice_op.cc +++ b/tensorflow/core/kernels/strided_slice_op.cc @@ -427,6 +427,7 @@ REGISTER_STRIDED_SLICE(bfloat16); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); +TF_CALL_int64(REGISTER_GPU); // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel diff --git a/tensorflow/core/kernels/strided_slice_op_impl.h b/tensorflow/core/kernels/strided_slice_op_impl.h index de65147572..afe3a051e6 100644 --- a/tensorflow/core/kernels/strided_slice_op_impl.h +++ b/tensorflow/core/kernels/strided_slice_op_impl.h @@ -284,6 +284,7 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_FOR_N_GPU); TF_CALL_complex64(DECLARE_FOR_N_GPU); TF_CALL_complex128(DECLARE_FOR_N_GPU); DECLARE_FOR_N_GPU(int32); +DECLARE_FOR_N_GPU(int64); #endif // END GOOGLE_CUDA TF_CALL_ALL_TYPES(DECLARE_FOR_N_CPU); @@ -299,6 +300,7 @@ DECLARE_FOR_N_CPU(bfloat16); TF_CALL_SYCL_PROXY_TYPES(PREVENT_FOR_N_SYCL); TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DECLARE_FOR_N_SYCL); DECLARE_FOR_N_SYCL(int32); +DECLARE_FOR_N_SYCL(int64); #undef DECLARE_FOR_N_SYCL #endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 8f4c94f318..6eb9c66d06 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -486,7 +486,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase): _ = checker2[...] _ = checker2[tuple()] - def testInt64GPU(self): + def testFloatSlicedArrayAndInt64IndicesGPU(self): if not test_util.is_gpu_available(): self.skipTest("No GPU available") with self.test_session(use_gpu=True, force_gpu=True): @@ -497,6 +497,17 @@ class StridedSliceTest(test_util.TensorFlowTestCase): s = array_ops.strided_slice(x, begin, end, strides) self.assertAllEqual([3.], self.evaluate(s)) + def testInt64SlicedArrayAndIndicesGPU(self): + if not test_util.is_gpu_available(): + self.skipTest("No GPU available") + with self.test_session(use_gpu=True, force_gpu=True): + x = constant_op.constant([1, 2, 3], dtype=dtypes.int64) + begin = constant_op.constant([2], dtype=dtypes.int64) + end = constant_op.constant([3], dtype=dtypes.int64) + strides = constant_op.constant([1], dtype=dtypes.int64) + s = array_ops.strided_slice(x, begin, end, strides) + self.assertAllEqual([3], self.evaluate(s)) + def testDegenerateSlices(self): with self.test_session(use_gpu=True): checker = StridedSliceChecker(self, StridedSliceChecker.REF_TENSOR) -- GitLab From 54837e40096c35322e75d43a13bbf44c933f59db Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Tue, 7 Nov 2017 18:38:12 -0800 Subject: [PATCH 0165/1801] Add functionality to perform training of additional fixed point layer on top of quantized base model. Also modify retrain_test to test creation of model info for fixed point mobilenet. PiperOrigin-RevId: 174946745 --- .../examples/image_retraining/retrain.py | 82 +++++++++++++++---- .../examples/image_retraining/retrain_test.py | 23 +++++- 2 files changed, 85 insertions(+), 20 deletions(-) diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py index 3549891461..ebddfb20f4 100644 --- a/tensorflow/examples/image_retraining/retrain.py +++ b/tensorflow/examples/image_retraining/retrain.py @@ -69,11 +69,18 @@ to validate that you have gathered good training data, but if you want to deploy on resource-limited platforms, you can try the `--architecture` flag with a Mobilenet model. For example: +Run floating-point version of mobilenet: ```bash python tensorflow/examples/image_retraining/retrain.py \ --image_dir ~/flower_photos --architecture mobilenet_1.0_224 ``` +Run quantized version of mobilenet: +```bash +python tensorflow/examples/image_retraining/retrain.py \ + --image_dir ~/flower_photos/ --architecture mobilenet_1.0_224_quantized +``` + There are 32 different Mobilenet models to choose from, with a variety of file size and latency options. The first number can be '1.0', '0.75', '0.50', or '0.25' to control the size, and the second controls the input image size, either @@ -107,6 +114,7 @@ import numpy as np from six.moves import urllib import tensorflow as tf +from tensorflow.contrib.quantize.python import quant_ops from tensorflow.python.framework import graph_util from tensorflow.python.framework import tensor_shape from tensorflow.python.platform import gfile @@ -271,6 +279,7 @@ def create_model_graph(model_info): """ with tf.Graph().as_default() as graph: model_path = os.path.join(FLAGS.model_dir, model_info['model_file_name']) + print('Model path: ', model_path) with gfile.FastGFile(model_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) @@ -337,7 +346,10 @@ def maybe_download_and_extract(data_url): statinfo = os.stat(filepath) tf.logging.info('Successfully downloaded', filename, statinfo.st_size, 'bytes.') - tarfile.open(filepath, 'r:gz').extractall(dest_directory) + print('Extracting file from ', filepath) + tarfile.open(filepath, 'r:gz').extractall(dest_directory) + else: + print('Not extracting or downloading files, model already present in disk') def ensure_dir_exists(dir_name): @@ -733,7 +745,7 @@ def variable_summaries(var): def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, - bottleneck_tensor_size): + bottleneck_tensor_size, quantize_layer): """Adds a new softmax and fully-connected layer for training. We need to retrain the top layer to identify our new classes, so this function @@ -745,10 +757,12 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, Args: class_count: Integer of how many categories of things we're trying to - recognize. + recognize. final_tensor_name: Name string for the new final node that produces results. bottleneck_tensor: The output of the main CNN graph. bottleneck_tensor_size: How many entries in the bottleneck vector. + quantize_layer: Boolean, specifying whether the newly added layer should be + quantized. Returns: The tensors for the training and cross entropy results, and tensors for the @@ -771,18 +785,41 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, with tf.name_scope('weights'): initial_value = tf.truncated_normal( [bottleneck_tensor_size, class_count], stddev=0.001) - layer_weights = tf.Variable(initial_value, name='final_weights') + if quantize_layer: + quantized_layer_weights = quant_ops.MovingAvgQuantize( + layer_weights, is_training=True) + variable_summaries(quantized_layer_weights) variable_summaries(layer_weights) with tf.name_scope('biases'): layer_biases = tf.Variable(tf.zeros([class_count]), name='final_biases') + if quantize_layer: + quantized_layer_biases = quant_ops.MovingAvgQuantize( + layer_biases, is_training=True) + variable_summaries(quantized_layer_biases) + variable_summaries(layer_biases) + with tf.name_scope('Wx_plus_b'): - logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases - tf.summary.histogram('pre_activations', logits) + if quantize_layer: + logits = tf.matmul(bottleneck_input, + quantized_layer_weights) + quantized_layer_biases + logits = quant_ops.MovingAvgQuantize( + logits, + init_min=-32.0, + init_max=32.0, + is_training=True, + num_bits=8, + narrow_range=False, + ema_decay=0.5) + tf.summary.histogram('pre_activations', logits) + else: + logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases + tf.summary.histogram('pre_activations', logits) final_tensor = tf.nn.softmax(logits, name=final_tensor_name) + tf.summary.histogram('activations', final_tensor) with tf.name_scope('cross_entropy'): @@ -790,6 +827,7 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, labels=ground_truth_input, logits=logits) with tf.name_scope('total'): cross_entropy_mean = tf.reduce_mean(cross_entropy) + tf.summary.scalar('cross_entropy', cross_entropy_mean) with tf.name_scope('train'): @@ -825,6 +863,7 @@ def add_evaluation_step(result_tensor, ground_truth_tensor): def save_graph_to_file(sess, graph, graph_file_name): output_graph_def = graph_util.convert_variables_to_constants( sess, graph.as_graph_def(), [FLAGS.final_tensor_name]) + with gfile.FastGFile(graph_file_name, 'wb') as f: f.write(output_graph_def.SerializeToString()) return @@ -858,6 +897,7 @@ def create_model_info(architecture): ValueError: If architecture name is unknown. """ architecture = architecture.lower() + is_quantized = False if architecture == 'inception_v3': # pylint: disable=line-too-long data_url = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz' @@ -902,19 +942,28 @@ def create_model_info(architecture): architecture) return None is_quantized = True - data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' - data_url += version_string + '_' + size_string + '_frozen.tgz' - bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' + + if is_quantized: + data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' + data_url += version_string + '_' + size_string + '_quantized_frozen.tgz' + bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' + resized_input_tensor_name = 'Placeholder:0' + model_dir_name = ('mobilenet_v1_' + version_string + '_' + size_string + + '_quantized_frozen') + model_base_name = 'quantized_frozen_graph.pb' + + else: + data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' + data_url += version_string + '_' + size_string + '_frozen.tgz' + bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' + resized_input_tensor_name = 'input:0' + model_dir_name = 'mobilenet_v1_' + version_string + '_' + size_string + model_base_name = 'frozen_graph.pb' + bottleneck_tensor_size = 1001 input_width = int(size_string) input_height = int(size_string) input_depth = 3 - resized_input_tensor_name = 'input:0' - if is_quantized: - model_base_name = 'quantized_graph.pb' - else: - model_base_name = 'frozen_graph.pb' - model_dir_name = 'mobilenet_v1_' + version_string + '_' + size_string model_file_name = os.path.join(model_dir_name, model_base_name) input_mean = 127.5 input_std = 127.5 @@ -933,6 +982,7 @@ def create_model_info(architecture): 'model_file_name': model_file_name, 'input_mean': input_mean, 'input_std': input_std, + 'quantize_layer': is_quantized, } @@ -1028,7 +1078,7 @@ def main(_): (train_step, cross_entropy, bottleneck_input, ground_truth_input, final_tensor) = add_final_training_ops( len(image_lists.keys()), FLAGS.final_tensor_name, bottleneck_tensor, - model_info['bottleneck_tensor_size']) + model_info['bottleneck_tensor_size'], model_info['quantize_layer']) # Create the operations we need to evaluate the accuracy of our new layer. evaluation_step, prediction = add_evaluation_step( diff --git a/tensorflow/examples/image_retraining/retrain_test.py b/tensorflow/examples/image_retraining/retrain_test.py index c342a17dd8..2de4c4ec99 100644 --- a/tensorflow/examples/image_retraining/retrain_test.py +++ b/tensorflow/examples/image_retraining/retrain_test.py @@ -70,10 +70,18 @@ class ImageRetrainingTest(test_util.TensorFlowTestCase): def testAddFinalTrainingOps(self, flags_mock): with tf.Graph().as_default(): with tf.Session() as sess: - bottleneck = tf.placeholder( - tf.float32, [1, 1024], - name='bottleneck') - retrain.add_final_training_ops(5, 'final', bottleneck, 1024) + bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') + # Test creating final training op with quantization + retrain.add_final_training_ops(5, 'final', bottleneck, 1024, False) + self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) + + @tf.test.mock.patch.object(retrain, 'FLAGS', learning_rate=0.01) + def testAddFinalTrainingOpsQuantized(self, flags_mock): + with tf.Graph().as_default(): + with tf.Session() as sess: + bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') + # Test creating final training op with quantization + retrain.add_final_training_ops(5, 'final', bottleneck, 1024, True) self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) def testAddEvaluationStep(self): @@ -99,5 +107,12 @@ class ImageRetrainingTest(test_util.TensorFlowTestCase): self.assertIsNotNone(model_info) self.assertEqual(299, model_info['input_width']) + def testCreateModelInfoQuantized(self): + # Test for mobilenet_quantized + model_info = retrain.create_model_info('mobilenet_1.0_224') + self.assertIsNotNone(model_info) + self.assertEqual(224, model_info['input_width']) + + if __name__ == '__main__': tf.test.main() -- GitLab From 942545c98db7f29dbbd7b9ed765e1289656f93d5 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Tue, 7 Nov 2017 18:39:17 -0800 Subject: [PATCH 0166/1801] Fix Bazel builds for the TF Lite demo app Adds a new remote repository for the mobilenet tflite models necessary for running the TF Lite demo app. PiperOrigin-RevId: 174946867 --- tensorflow/workspace.bzl | 37 ++++++++++++++++-------------- third_party/tflite_mobilenet.BUILD | 13 +++++++++++ 2 files changed, 33 insertions(+), 17 deletions(-) create mode 100644 third_party/tflite_mobilenet.BUILD diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index afcae6eade..3081a8d1dc 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -1,21 +1,24 @@ # TensorFlow external dependencies that can be loaded in WORKSPACE files. load("//third_party/gpus:cuda_configure.bzl", "cuda_configure") + load("//third_party/sycl:sycl_configure.bzl", "sycl_configure") load("//third_party/mkl:build_defs.bzl", "mkl_repository") -load("@io_bazel_rules_closure//closure/private:java_import_external.bzl", - "java_import_external") +load( + "@io_bazel_rules_closure//closure/private:java_import_external.bzl", + "java_import_external", +) load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external") load("//third_party/py:python_configure.bzl", "python_configure") -load("//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl", - "arm_compiler_configure") - +load( + "//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl", + "arm_compiler_configure", +) def _is_windows(repository_ctx): """Returns true if the host operating system is windows.""" return repository_ctx.os.name.lower().find("windows") != -1 - def _get_env_var(repository_ctx, name): """Find an environment variable.""" if name in repository_ctx.os.environ: @@ -23,7 +26,6 @@ def _get_env_var(repository_ctx, name): else: return None - # Parse the bazel version string from `native.bazel_version`. def _parse_bazel_version(bazel_version): # Remove commit from version. @@ -39,7 +41,6 @@ def _parse_bazel_version(bazel_version): version_tuple += (str(number),) return version_tuple - # Check that a specific bazel version is being used. def check_version(bazel_version): if "bazel_version" not in dir(native): @@ -56,11 +57,9 @@ def check_version(bazel_version): fail("\nCurrent Bazel version is {}, expected at least {}\n".format( native.bazel_version, bazel_version)) - def _repos_are_siblings(): return Label("@foo//bar").workspace_root.startswith("../") - # Temporary workaround to support including TensorFlow as a submodule until this # use-case is supported in the next Bazel release. def _temp_workaround_http_archive_impl(repo_ctx): @@ -73,9 +72,7 @@ def _temp_workaround_http_archive_impl(repo_ctx): if repo_ctx.attr.patch_file != None: _apply_patch(repo_ctx, repo_ctx.attr.patch_file) - temp_workaround_http_archive = repository_rule( - implementation = _temp_workaround_http_archive_impl, attrs = { "build_file": attr.label(), "repository": attr.string(), @@ -84,6 +81,7 @@ temp_workaround_http_archive = repository_rule( "sha256": attr.string(default = ""), "strip_prefix": attr.string(default = ""), }, + implementation = _temp_workaround_http_archive_impl, ) # Executes specified command with arguments and calls 'fail' if it exited with @@ -95,7 +93,6 @@ def _execute_and_check_ret_code(repo_ctx, cmd_and_args): + "Stderr: {3}").format(" ".join(cmd_and_args), result.return_code, result.stdout, result.stderr)) - # Apply a patch_file to the repository root directory # Runs 'patch -p1' def _apply_patch(repo_ctx, patch_file): @@ -113,7 +110,6 @@ def _apply_patch(repo_ctx, patch_file): cmd = [bazel_sh, "-c", " ".join(cmd)] _execute_and_check_ret_code(repo_ctx, cmd) - # Download the repository and apply a patch to its root def _patched_http_archive_impl(repo_ctx): repo_ctx.download_and_extract( @@ -122,9 +118,7 @@ def _patched_http_archive_impl(repo_ctx): stripPrefix=repo_ctx.attr.strip_prefix) _apply_patch(repo_ctx, repo_ctx.attr.patch_file) - patched_http_archive = repository_rule( - implementation = _patched_http_archive_impl, attrs = { "patch_file": attr.label(), "build_file": attr.label(), @@ -133,9 +127,9 @@ patched_http_archive = repository_rule( "sha256": attr.string(default = ""), "strip_prefix": attr.string(default = ""), }, + implementation = _patched_http_archive_impl, ) - # If TensorFlow is linked as a submodule. # path_prefix is no longer used. # tf_repo_name is thought to be under consideration. @@ -821,3 +815,12 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://github.com/google/flatbuffers/archive/971a68110e4fc1bace10fcb6deeb189e7e1a34ce.tar.gz", ], ) + + native.new_http_archive( + name = "tflite_mobilenet", + build_file = str(Label("//third_party:tflite_mobilenet.BUILD")), + sha256 = "eb71679d23a0cbdb173b36ea39f3d3096de0a9b0410d148a8237f20cc1157a61", + urls = [ + "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_1.0_224_quantized_2017_11_01.zip" + ], + ) diff --git a/third_party/tflite_mobilenet.BUILD b/third_party/tflite_mobilenet.BUILD new file mode 100644 index 0000000000..75663eff48 --- /dev/null +++ b/third_party/tflite_mobilenet.BUILD @@ -0,0 +1,13 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +filegroup( + name = "model_files", + srcs = glob( + ["**/*"], + exclude = [ + "BUILD", + ], + ), +) -- GitLab From 149257363d06017d48e01ef3137efe23afc160c3 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Tue, 7 Nov 2017 18:45:36 -0800 Subject: [PATCH 0167/1801] MovingAvgQuantize and LastValueQuantize should use the updated value from the Assign op, otherwise min max variables never get updated. PiperOrigin-RevId: 174947421 --- tensorflow/contrib/quantize/BUILD | 18 +++- .../contrib/quantize/python/quant_ops.py | 57 +++++------- .../contrib/quantize/python/quant_ops_test.py | 87 +++++++++++++++++++ .../python/quantize_parameterized_test.py | 65 +++++++------- 4 files changed, 160 insertions(+), 67 deletions(-) create mode 100644 tensorflow/contrib/quantize/python/quant_ops_test.py diff --git a/tensorflow/contrib/quantize/BUILD b/tensorflow/contrib/quantize/BUILD index 935af80e7a..45a98c7f85 100644 --- a/tensorflow/contrib/quantize/BUILD +++ b/tensorflow/contrib/quantize/BUILD @@ -133,7 +133,6 @@ py_library( deps = [ "//tensorflow/contrib/framework:framework_py", "//tensorflow/python:array_ops", - "//tensorflow/python:check_ops", "//tensorflow/python:framework_ops", "//tensorflow/python:init_ops", "//tensorflow/python:math_ops", @@ -143,6 +142,23 @@ py_library( ], ) +py_test( + name = "quant_ops_test", + size = "small", + srcs = ["python/quant_ops_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":quant_ops", + "//tensorflow/python:array_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform_test", + "//tensorflow/python:session", + "//tensorflow/python:variables", + ], +) + py_library( name = "quantize", srcs = ["python/quantize.py"], diff --git a/tensorflow/contrib/quantize/python/quant_ops.py b/tensorflow/contrib/quantize/python/quant_ops.py index 0a38ef9fcd..f80d427ff0 100644 --- a/tensorflow/contrib/quantize/python/quant_ops.py +++ b/tensorflow/contrib/quantize/python/quant_ops.py @@ -22,15 +22,12 @@ from tensorflow.contrib.framework.python.ops import add_arg_scope from tensorflow.contrib.framework.python.ops import model_variable from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.training import moving_averages -EPSILON = 1e-5 - @add_arg_scope def FixedQuantize(inputs, init_min=-6.0, init_max=6.0, scope=None): @@ -133,12 +130,10 @@ def LastValueQuantize(inputs, batch_min = inputs else: batch_min = math_ops.reduce_min(inputs, name='BatchMin') - batch_min -= EPSILON - # B-eng requires that 0.0 if always in the [min; max] range. + # TFLite requires that 0.0 if always in the [min; max] range. batch_min = math_ops.minimum(batch_min, 0.0) - assign_min_op = state_ops.assign( - min_var, batch_min, name='AssignMinLast').op - ops.add_to_collection(updates_collection, assign_min_op) + assign_min = state_ops.assign(min_var, batch_min, name='AssignMinLast') + ops.add_to_collection(updates_collection, assign_min.op) if per_channel: if input_dim >= 2: @@ -148,17 +143,15 @@ def LastValueQuantize(inputs, batch_max = inputs else: batch_max = math_ops.reduce_max(inputs, name='BatchMax') - batch_max += EPSILON - # B-eng requires that 0.0 if always in the [min; max] range. + # TFLite requires that 0.0 if always in the [min; max] range. batch_max = math_ops.maximum(batch_max, 0.0) - assign_max_op = state_ops.assign( - max_var, batch_max, name='AssignMaxLast').op - ops.add_to_collection(updates_collection, assign_max_op) + assign_max = state_ops.assign(max_var, batch_max, name='AssignMaxLast') + ops.add_to_collection(updates_collection, assign_max.op) return _FakeQuantWithMinMaxVars( inputs, - batch_min, - batch_max, + assign_min, + assign_max, per_channel=per_channel, num_bits=num_bits, narrow_range=narrow_range) @@ -251,9 +244,9 @@ def MovingAvgQuantize(inputs, batch_min = math_ops.reduce_min(inputs, name='BatchMin') # B-eng requires that 0.0 if always in the [min; max] range. batch_min = math_ops.minimum(batch_min, 0.0) - assign_min_op = moving_averages.assign_moving_average( - min_var, batch_min, ema_decay, name='AssignMinEma').op - ops.add_to_collection(updates_collection, assign_min_op) + assign_min = moving_averages.assign_moving_average( + min_var, batch_min, ema_decay, name='AssignMinEma') + ops.add_to_collection(updates_collection, assign_min.op) if per_channel: if input_dim >= 2: @@ -265,14 +258,14 @@ def MovingAvgQuantize(inputs, batch_max = math_ops.reduce_max(inputs, name='BatchMax') # B-eng requires that 0.0 if always in the [min; max] range. batch_max = math_ops.maximum(batch_max, 0.0) - assign_max_op = moving_averages.assign_moving_average( - max_var, batch_max, ema_decay, name='AssignMaxEma').op - ops.add_to_collection(updates_collection, assign_max_op) + assign_max = moving_averages.assign_moving_average( + max_var, batch_max, ema_decay, name='AssignMaxEma') + ops.add_to_collection(updates_collection, assign_max.op) return _FakeQuantWithMinMaxVars( inputs, - min_var, - max_var, + assign_min, + assign_max, per_channel=per_channel, num_bits=num_bits, narrow_range=narrow_range) @@ -301,20 +294,10 @@ def _FakeQuantWithMinMaxVars(inputs, min_var, max_var, per_channel, num_bits, if per_channel: assert len(min_var.get_shape()) == 1 assert len(max_var.get_shape()) == 1 - with ops.control_dependencies([check_ops.assert_less(min_var, max_var)]): - return array_ops.fake_quant_with_min_max_vars_per_channel( - inputs, - min_var, - max_var, - num_bits=num_bits, - narrow_range=narrow_range) + return array_ops.fake_quant_with_min_max_vars_per_channel( + inputs, min_var, max_var, num_bits=num_bits, narrow_range=narrow_range) else: assert min_var.get_shape() == [] # pylint: disable=g-explicit-bool-comparison assert max_var.get_shape() == [] # pylint: disable=g-explicit-bool-comparison - with ops.control_dependencies([check_ops.assert_less(min_var, max_var)]): - return array_ops.fake_quant_with_min_max_vars( - inputs, - min_var, - max_var, - num_bits=num_bits, - narrow_range=narrow_range) + return array_ops.fake_quant_with_min_max_vars( + inputs, min_var, max_var, num_bits=num_bits, narrow_range=narrow_range) diff --git a/tensorflow/contrib/quantize/python/quant_ops_test.py b/tensorflow/contrib/quantize/python/quant_ops_test.py new file mode 100644 index 0000000000..3884679602 --- /dev/null +++ b/tensorflow/contrib/quantize/python/quant_ops_test.py @@ -0,0 +1,87 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for third_party.tensorflow.contrib.quantize.python.quant_ops.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.quantize.python import quant_ops +from tensorflow.python.client import session +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import googletest + +_MIN_MAX_VARS = 'min_max_vars' + + +class QuantOpsTest(googletest.TestCase): + + def testLastValueQuantizeTrainingAssign(self): + g = ops.Graph() + with session.Session(graph=g) as sess: + x = array_ops.placeholder(dtypes.float32, shape=[2]) + y = quant_ops.LastValueQuantize( + x, + init_min=0.0, + init_max=0.0, + is_training=True, + vars_collection=_MIN_MAX_VARS) + + # Run the step. + sess.run(variables.global_variables_initializer()) + sess.run(y, feed_dict={x: [-1.0, 1.0]}) + # Now check that the min_max_vars were, in fact, updated. + min_value, max_value = self._GetMinMaxValues(sess) + self.assertEqual(min_value, -1.0) + self.assertEqual(max_value, 1.0) + + def testMovingAvgQuantizeTrainingAssign(self): + g = ops.Graph() + with session.Session(graph=g) as sess: + x = array_ops.placeholder(dtypes.float32, shape=[2]) + y = quant_ops.MovingAvgQuantize( + x, + init_min=0.0, + init_max=0.0, + is_training=True, + vars_collection=_MIN_MAX_VARS) + + # Run the step. + sess.run(variables.global_variables_initializer()) + # Do two runs to avoid zero debias. + sess.run(y, feed_dict={x: [-1.0, 1.0]}) + sess.run(y, feed_dict={x: [0.0, 0.0]}) + # Now check that the min_max_vars were, in fact, updated. + min_value, max_value = self._GetMinMaxValues(sess) + self.assertGreater(min_value, -1.0) + self.assertLess(min_value, 0.0) + self.assertGreater(max_value, 0.0) + self.assertLess(max_value, 1.0) + + def _GetMinMaxValues(self, sess): + min_max_vars = ops.get_collection(_MIN_MAX_VARS) + self.assertEqual(len(min_max_vars), 2) + min_idx = 0 if 'min' in min_max_vars[0].name else 1 + max_idx = (min_idx + 1) % 2 + min_var, max_var = min_max_vars[min_idx], min_max_vars[max_idx] + min_max_values = sess.run([min_var, max_var]) + return min_max_values[0], min_max_values[1] + + +if __name__ == '__main__': + googletest.main() diff --git a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py index 3e62f95bd6..57dab03f16 100644 --- a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py +++ b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py @@ -97,8 +97,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(weights_quant.type, quantization_node_name) expected_inputs = [ - scope + '/weights_quant/Minimum', scope + '/weights_quant/Maximum', - scope + '/weights/read' + scope + '/weights_quant/AssignMinLast', + scope + '/weights_quant/AssignMaxLast', scope + '/weights/read' ] self._AssertInputOpsAre(weights_quant, expected_inputs) output_op_name = scope + '/Conv2D' @@ -109,8 +109,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(conv_quant.type, quantization_node_name) expected_inputs = [ - scope + '/conv_quant/min/read', scope + '/conv_quant/max/read', - scope + '/BiasAdd' + scope + '/conv_quant/AssignMinEma', + scope + '/conv_quant/AssignMaxEma', scope + '/BiasAdd' ] self._AssertInputOpsAre(conv_quant, expected_inputs) output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' @@ -122,7 +122,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): self.assertEqual(act_quant.type, quantization_node_name) expected_inputs = [ - 'test/act_quant/min/read', 'test/act_quant/max/read', + 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', 'test/' + activation_op_name ] self._AssertInputOpsAre(act_quant, expected_inputs) @@ -172,8 +172,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(weights_quant.type, quantization_node_name) expected_inputs = [ - scope + '/weights_quant/Minimum', scope + '/weights_quant/Maximum', - scope + '/weights/read' + scope + '/weights_quant/AssignMinLast', + scope + '/weights_quant/AssignMaxLast', scope + '/weights/read' ] self._AssertInputOpsAre(weights_quant, expected_inputs) output_op_name = scope + '/MatMul' @@ -184,8 +184,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(conv_quant.type, quantization_node_name) expected_inputs = [ - scope + '/conv_quant/min/read', scope + '/conv_quant/max/read', - scope + '/BiasAdd' + scope + '/conv_quant/AssignMinEma', + scope + '/conv_quant/AssignMaxEma', scope + '/BiasAdd' ] self._AssertInputOpsAre(conv_quant, expected_inputs) output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' @@ -196,7 +196,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(act_quant.type, quantization_node_name) expected_inputs = [ - 'test/act_quant/min/read', 'test/act_quant/max/read', + 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', 'test/' + activation_op_name ] self._AssertInputOpsAre(act_quant, expected_inputs) @@ -247,7 +247,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(weights_quant.type, quantization_node_name) expected_inputs = [ - scope + '/weights_quant/Minimum', scope + '/weights_quant/Maximum', + scope + '/weights_quant/AssignMinLast', + scope + '/weights_quant/AssignMaxLast', scope + '/depthwise_weights/read' ] self._AssertInputOpsAre(weights_quant, expected_inputs) @@ -259,8 +260,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(conv_quant.type, quantization_node_name) expected_inputs = [ - scope + '/conv_quant/min/read', scope + '/conv_quant/max/read', - scope + '/BiasAdd' + scope + '/conv_quant/AssignMinEma', + scope + '/conv_quant/AssignMaxEma', scope + '/BiasAdd' ] self._AssertInputOpsAre(conv_quant, expected_inputs) output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' @@ -271,7 +272,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(act_quant.type, quantization_node_name) expected_inputs = [ - 'test/act_quant/min/read', 'test/act_quant/max/read', + 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', 'test/' + activation_op_name ] self._AssertInputOpsAre(act_quant, expected_inputs) @@ -401,8 +402,10 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(weights_quant.type, quantization_node_name) expected_inputs = [ - scope + '/weights_quant/' + ('min/read' if use_ema else 'Minimum'), - scope + '/weights_quant/' + ('max/read' if use_ema else 'Maximum'), + scope + '/weights_quant/' + ('AssignMinEma' + if use_ema else 'AssignMinLast'), + scope + '/weights_quant/' + ('AssignMaxEma' + if use_ema else 'AssignMaxLast'), scope + '/mul_fold' ] self._AssertInputOpsAre(weights_quant, expected_inputs) @@ -415,8 +418,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(conv_quant.type, quantization_node_name) expected_inputs = [ - scope + '/conv_quant/min/read', scope + '/conv_quant/max/read', - scope + '/add_fold' + scope + '/conv_quant/AssignMinEma', + scope + '/conv_quant/AssignMaxEma', scope + '/add_fold' ] self._AssertInputOpsAre(conv_quant, expected_inputs) output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' @@ -427,7 +430,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(act_quant.type, quantization_node_name) expected_inputs = [ - 'test/act_quant/min/read', 'test/act_quant/max/read', + 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', 'test/' + activation_op_name ] self._AssertInputOpsAre(act_quant, expected_inputs) @@ -518,8 +521,10 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(weights_quant.type, quantization_node_name) expected_inputs = [ - scope + '/weights_quant/' + ('min/read' if use_ema else 'Minimum'), - scope + '/weights_quant/' + ('max/read' if use_ema else 'Maximum'), + scope + '/weights_quant/' + ('AssignMinEma' + if use_ema else 'AssignMinLast'), + scope + '/weights_quant/' + ('AssignMaxEma' + if use_ema else 'AssignMaxLast'), scope + '/mul_fold' ] self._AssertInputOpsAre(weights_quant, expected_inputs) @@ -532,8 +537,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(conv_quant.type, quantization_node_name) expected_inputs = [ - scope + '/conv_quant/min/read', scope + '/conv_quant/max/read', - scope + '/add_fold' + scope + '/conv_quant/AssignMinEma', + scope + '/conv_quant/AssignMaxEma', scope + '/add_fold' ] self._AssertInputOpsAre(conv_quant, expected_inputs) output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' @@ -544,7 +549,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(act_quant.type, quantization_node_name) expected_inputs = [ - 'test/act_quant/min/read', 'test/act_quant/max/read', + 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', 'test/' + activation_op_name ] self._AssertInputOpsAre(act_quant, expected_inputs) @@ -639,8 +644,10 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(weights_quant.type, quantization_node_name) expected_inputs = [ - scope + '/weights_quant/' + ('min/read' if use_ema else 'Minimum'), - scope + '/weights_quant/' + ('max/read' if use_ema else 'Maximum'), + scope + '/weights_quant/' + ('AssignMinEma' + if use_ema else 'AssignMinLast'), + scope + '/weights_quant/' + ('AssignMaxEma' + if use_ema else 'AssignMaxLast'), scope + '/mul_fold' ] self._AssertInputOpsAre(weights_quant, expected_inputs) @@ -653,8 +660,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(conv_quant.type, quantization_node_name) expected_inputs = [ - scope + '/conv_quant/min/read', scope + '/conv_quant/max/read', - scope + '/add_fold' + scope + '/conv_quant/AssignMinEma', + scope + '/conv_quant/AssignMaxEma', scope + '/add_fold' ] self._AssertInputOpsAre(conv_quant, expected_inputs) output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' @@ -665,7 +672,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(act_quant.type, quantization_node_name) expected_inputs = [ - 'test/act_quant/min/read', 'test/act_quant/max/read', + 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', 'test/' + activation_op_name ] self._AssertInputOpsAre(act_quant, expected_inputs) -- GitLab From a326f60ff52876d4a216fb5d810626e16232467e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 7 Nov 2017 18:46:10 -0800 Subject: [PATCH 0168/1801] Silenced an unnecessary warning PiperOrigin-RevId: 174947453 --- tensorflow/core/grappler/utils.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index d9f4cdb5ae..11bd8fa5cb 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -45,7 +45,6 @@ NodeDef* NodeMap::GetNode(const string& name) const { string node_name = NodeName(name); auto it = nodes_.find(node_name); if (it == nodes_.end()) { - LOG(WARNING) << "Node " << node_name << " is not in the graph."; return nullptr; } return it->second; -- GitLab From 9a6c0eb137d87f0578821793af64232fc54c53b6 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Tue, 7 Nov 2017 19:06:01 -0800 Subject: [PATCH 0169/1801] Fix link (the link tool expects these to be on one line) PiperOrigin-RevId: 174948909 --- tensorflow/docs_src/mobile/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/mobile/index.md b/tensorflow/docs_src/mobile/index.md index a6f1422f6f..06ad47bc62 100644 --- a/tensorflow/docs_src/mobile/index.md +++ b/tensorflow/docs_src/mobile/index.md @@ -35,8 +35,8 @@ speech-driven interface, and many of these require on-device processing. Most of the time a user isn’t giving commands, and so streaming audio continuously to a remote server would be a waste of bandwidth, since it would mostly be silence or background noises. To solve this problem it’s common to have a small neural -network running on-device @{$tutorials/audio_recognition$listening out for a -particular keyword}. Once that keyword has been spotted, the rest of the +network running on-device @{$tutorials/audio_recognition$listening out for a particular keyword}. +Once that keyword has been spotted, the rest of the conversation can be transmitted over to the server for further processing if more computing power is needed. -- GitLab From 8e729b337fc062369643b592a96cfbacd6e43712 Mon Sep 17 00:00:00 2001 From: Colin Raffel Date: Tue, 7 Nov 2017 21:05:37 -0800 Subject: [PATCH 0170/1801] Fix tf.contrib.seq2seq._monotonic_probability_fn to use a hard sigmoid when mode='hard'. Also adds tests to make sure the attention probabilities are 0 or 1 when mode='hard'. PiperOrigin-RevId: 174956465 --- .../kernel_tests/attention_wrapper_test.py | 37 +++++++++++++++++++ .../seq2seq/python/ops/attention_wrapper.py | 6 ++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py index 91493302b1..01a5540121 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py @@ -33,6 +33,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops from tensorflow.python.ops import rnn_cell from tensorflow.python.ops import variables from tensorflow.python.ops import variable_scope as vs @@ -589,6 +590,24 @@ class AttentionWrapperTest(test.TestCase): expected_final_alignment_history=expected_final_alignment_history, name='testBahdanauMonotonicNormalized') + def testBahdanauMonotonicHard(self): + # Run attention mechanism with mode='hard', make sure probabilities are hard + b, t, u, d = 10, 20, 30, 40 + with self.test_session(use_gpu=True) as sess: + a = wrapper.BahdanauMonotonicAttention( + d, + random_ops.random_normal((b, t, u)), + mode='hard') + # Just feed previous attention as [1, 0, 0, ...] + attn = a(random_ops.random_normal((b, d)), array_ops.one_hot([0]*b, t)) + sess.run(variables.global_variables_initializer()) + attn_out = attn.eval() + # All values should be 0 or 1 + self.assertTrue(np.all(np.logical_or(attn_out == 0, attn_out == 1))) + # Sum of distributions should be 0 or 1 (0 when all p_choose_i are 0) + self.assertTrue(np.all(np.logical_or(attn_out.sum(axis=1) == 1, + attn_out.sum(axis=1) == 0))) + def testLuongMonotonicNotNormalized(self): create_attention_mechanism = functools.partial( wrapper.LuongMonotonicAttention, sigmoid_noise=1.0, @@ -695,6 +714,24 @@ class AttentionWrapperTest(test.TestCase): expected_final_alignment_history=expected_final_alignment_history, name='testMultiAttention') + def testLuongMonotonicHard(self): + # Run attention mechanism with mode='hard', make sure probabilities are hard + b, t, u, d = 10, 20, 30, 40 + with self.test_session(use_gpu=True) as sess: + a = wrapper.LuongMonotonicAttention( + d, + random_ops.random_normal((b, t, u)), + mode='hard') + # Just feed previous attention as [1, 0, 0, ...] + attn = a(random_ops.random_normal((b, d)), array_ops.one_hot([0]*b, t)) + sess.run(variables.global_variables_initializer()) + attn_out = attn.eval() + # All values should be 0 or 1 + self.assertTrue(np.all(np.logical_or(attn_out == 0, attn_out == 1))) + # Sum of distributions should be 0 or 1 (0 when all p_choose_i are 0) + self.assertTrue(np.all(np.logical_or(attn_out.sum(axis=1) == 1, + attn_out.sum(axis=1) == 0))) + def testMultiAttentionNoAttentionLayer(self): create_attention_mechanisms = ( wrapper.BahdanauAttention, wrapper.LuongAttention) diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index 839df079ee..87230e3355 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -679,7 +679,11 @@ def _monotonic_probability_fn(score, previous_alignments, sigmoid_noise, mode, seed=seed) score += sigmoid_noise*noise # Compute "choosing" probabilities from the attention scores - p_choose_i = math_ops.sigmoid(score) + if mode == "hard": + # When mode is hard, use a hard sigmoid + p_choose_i = math_ops.cast(score > 0, score.dtype) + else: + p_choose_i = math_ops.sigmoid(score) # Convert from choosing probabilities to attention distribution return monotonic_attention(p_choose_i, previous_alignments, mode) -- GitLab From 1e53f4caff649fb86ef74a2485547312372d399f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 7 Nov 2017 22:02:42 -0800 Subject: [PATCH 0171/1801] Update nsync version---allow compilation with bazel on x86_32 The new version of nsync has a BUILD file that detects x86_32 (which bazel currently calls piii). PiperOrigin-RevId: 174959924 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 3081a8d1dc..dfe332b091 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -442,11 +442,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): native.http_archive( name = "nsync", urls = [ - "https://mirror.bazel.build/github.com/google/nsync/archive/4fc8ff3e7626c5f24bc9674438d8257f0ffc226c.tar.gz", - # "https://github.com/google/nsync/archive/4fc8ff3e7626c5f24bc9674438d8257f0ffc226c.tar.gz", + "https://mirror.bazel.build/github.com/google/nsync/archive/93815892dddafe9146a5f7e7042281d59d0f4323.tar.gz", + # "https://github.com/google/nsync/archive/93815892dddafe9146a5f7e7042281d59d0f4323.tar.gz", ], - sha256 = "ffbbe828f3d0bef75462e34801de5cea31d10aa63eaa42a4ed74c46521bdfd58", - strip_prefix = "nsync-4fc8ff3e7626c5f24bc9674438d8257f0ffc226c", + sha256 = "e3bd4555415ace511338fc27e595351738eea4e9006f1612b76c82914770716b", + strip_prefix = "nsync-93815892dddafe9146a5f7e7042281d59d0f4323", ) native.http_archive( -- GitLab From 18135df3a56d0fb0a4f8e93d7b8332e4de3283e2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 7 Nov 2017 22:33:54 -0800 Subject: [PATCH 0172/1801] Automated g4 rollback of changelist 174912490 PiperOrigin-RevId: 174961746 --- .../contrib/cmake/tf_core_kernels.cmake | 1 - tensorflow/contrib/cmake/tf_core_ops.cmake | 1 - tensorflow/contrib/cmake/tf_python.cmake | 2 - tensorflow/contrib/data/BUILD | 13 +- tensorflow/contrib/data/__init__.py | 2 +- tensorflow/contrib/data/ops/dataset_ops.cc | 232 --------- .../python/kernel_tests/iterator_ops_test.py | 2 +- .../kernel_tests/range_dataset_op_test.py | 2 +- .../kernel_tests/reader_dataset_ops_test.py | 2 +- tensorflow/contrib/data/python/ops/BUILD | 40 +- .../contrib/data/python/ops/batching.py | 2 +- .../contrib/data/python/ops/dataset_ops.py | 8 +- .../contrib/data/python/ops/error_ops.py | 2 +- .../contrib/data/python/ops/grouping.py | 2 +- .../contrib/data/python/ops/interleave_ops.py | 2 +- .../contrib/data/python/ops/iterator_ops.py | 2 +- tensorflow/contrib/data/python/ops/readers.py | 2 +- .../contrib/data/python/ops/scan_ops.py | 2 +- .../core/ops/compat/ops_history.v1.pbtxt | 452 ++++++++++++++++++ tensorflow/core/ops/dataset_ops.cc | 197 ++++++++ .../python/kernel_tests/iterator_ops_test.py | 62 +++ .../kernel_tests/range_dataset_op_test.py | 330 +++++++++++++ .../kernel_tests/reader_dataset_ops_test.py | 298 ++++++++++++ 23 files changed, 1366 insertions(+), 292 deletions(-) delete mode 100644 tensorflow/contrib/data/ops/dataset_ops.cc diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake index 5b62598aa5..f978c8ccd5 100644 --- a/tensorflow/contrib/cmake/tf_core_kernels.cmake +++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake @@ -70,7 +70,6 @@ if(tensorflow_BUILD_CONTRIB_KERNELS) "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/kernels/cudnn_rnn_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/ops/cudnn_rnn_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/data/kernels/prefetching_kernels.cc" - "${tensorflow_source_dir}/tensorflow/contrib/data/ops/dataset_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/data/ops/prefetching_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/factorization/kernels/clustering_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/factorization/kernels/masked_matmul_ops.cc" diff --git a/tensorflow/contrib/cmake/tf_core_ops.cmake b/tensorflow/contrib/cmake/tf_core_ops.cmake index 03c168795c..4a61ed7a35 100644 --- a/tensorflow/contrib/cmake/tf_core_ops.cmake +++ b/tensorflow/contrib/cmake/tf_core_ops.cmake @@ -81,7 +81,6 @@ GENERATE_CONTRIB_OP_LIBRARY(boosted_trees_prediction "${tensorflow_source_dir}/t GENERATE_CONTRIB_OP_LIBRARY(boosted_trees_quantiles "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(boosted_trees_stats_accumulator "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/ops/stats_accumulator_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(cudnn_rnn "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/ops/cudnn_rnn_ops.cc") -GENERATE_CONTRIB_OP_LIBRARY(data_dataset "${tensorflow_source_dir}/tensorflow/contrib/data/ops/dataset_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(data_prefetching "${tensorflow_source_dir}/tensorflow/contrib/data/ops/prefetching_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(factorization_clustering "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/clustering_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(factorization_factorization "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/factorization_ops.cc") diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index a14b733158..7636e9ba6e 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -776,8 +776,6 @@ GENERATE_PYTHON_OP_LIB("contrib_boosted_trees_stats_accumulator_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/boosted_trees/python/ops/gen_stats_accumulator_ops.py) GENERATE_PYTHON_OP_LIB("contrib_cudnn_rnn_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/cudnn_rnn/ops/gen_cudnn_rnn_ops.py) -GENERATE_PYTHON_OP_LIB("contrib_data_dataset_ops" - DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/data/python/ops/gen_dataset_ops.py) GENERATE_PYTHON_OP_LIB("contrib_data_prefetching_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/data/python/ops/gen_prefetching_ops.py) GENERATE_PYTHON_OP_LIB("contrib_factorization_clustering_ops" diff --git a/tensorflow/contrib/data/BUILD b/tensorflow/contrib/data/BUILD index 7bcf5a5f4d..eaede0e00e 100644 --- a/tensorflow/contrib/data/BUILD +++ b/tensorflow/contrib/data/BUILD @@ -35,19 +35,8 @@ tf_custom_op_library( ], ) -# TODO(mrry): Move the kernels out of the core library into this library. -tf_custom_op_library( - name = "_dataset_ops.so", - srcs = [ - "ops/dataset_ops.cc", - ], -) - tf_gen_op_libs( - op_lib_names = [ - "dataset_ops", - "prefetching_ops", - ], + op_lib_names = ["prefetching_ops"], ) filegroup( diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index 0c7e793689..824ac4298f 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -41,8 +41,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function - # pylint: disable=unused-import + from tensorflow.contrib.data.python.ops.batching import batch_and_drop_remainder from tensorflow.contrib.data.python.ops.batching import dense_to_sparse_batch from tensorflow.contrib.data.python.ops.batching import unbatch diff --git a/tensorflow/contrib/data/ops/dataset_ops.cc b/tensorflow/contrib/data/ops/dataset_ops.cc deleted file mode 100644 index 1574384cb2..0000000000 --- a/tensorflow/contrib/data/ops/dataset_ops.cc +++ /dev/null @@ -1,232 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/core/framework/common_shape_fns.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/op_def_builder.h" -#include "tensorflow/core/framework/shape_inference.h" - -namespace tensorflow { - -// -------------------------------------------------------------------------- - -// The ops in this section can be composed to define an input -// pipeline. Each op produces a DT_VARIANT tensor that represents -// a DAG of "dataset" objects. An "dataset" object can be converted -// to a stateful "iterator" by passing the "dataset" to the -// "MakeIterator" op. -// -// TODO(b/65524810): DT_VARIANT tensors that represent "dataset" objects are -// not presently serializable. To avoid issues with constant folding, ensure -// that any "source dataset" ops (i.e. ops that output a dataset and do not -// take one as input) are marked "stateful". - -REGISTER_OP("IgnoreErrorsDataset") - .Input("input_dataset: variant") - .Output("handle: variant") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that contains the elements of `input_dataset` ignoring errors. -)doc"); - -REGISTER_OP("MapAndBatchDataset") - .Input("input_dataset: variant") - .Input("other_arguments: Targuments") - .Input("batch_size: int64") - .Input("num_parallel_batches: int64") - .Output("handle: variant") - .Attr("f: func") - .Attr("Targuments: list(type) >= 0") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that applies `f` to the outputs of `input_dataset` and then -batches `batch_size` of them. - -Unlike a "MapDataset", which applies `f` sequentially, this dataset invokes up -to `batch_size * num_parallel_batches` copies of `f` in parallel. - -batch_size: A scalar representing the number of elements to accumulate in a - batch. It determines the number of concurrent invocations of `f` that process - elements from `input_dataset` in parallel. -num_parallel_batches: A scalar representing the number of batches to create in - parallel. Processing multiple batches in parallel benefits workloads prone to - stragglers. -)doc"); - -REGISTER_OP("ScanDataset") - .Input("input_dataset: variant") - .Input("initial_state: Tstate") - .Input("other_arguments: Targuments") - .Output("handle: variant") - .Attr("f: func") - .Attr("Tstate: list(type) >= 1") - .Attr("Targuments: list(type) >= 0") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset successively reduces `f` over the elements of `input_dataset`. -)doc"); - -REGISTER_OP("ParallelInterleaveDataset") - .Input("input_dataset: variant") - .Input("other_arguments: Targuments") - .Input("cycle_length: int64") - .Input("block_length: int64") - .Input("sloppy: bool") - .Output("handle: variant") - .Attr("f: func") - .Attr("Targuments: list(type) >= 0") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that applies `f` to the outputs of `input_dataset`. - -The resulting dataset is similar to the `InterleaveDataset`, with the exception -that if retrieving the next value from a dataset would cause the requester to -block, it will skip that input dataset. This dataset is especially useful -when loading data from a variable-latency datastores (e.g. HDFS, GCS), as it -allows the training step to proceed so long as some data is available. - -!! WARNING !! This dataset is not deterministic! - -f: A function mapping elements of `input_dataset`, concatenated with - `other_arguments`, to a Dataset variant that contains elements matching - `output_types` and `output_shapes`. -)doc"); - -REGISTER_OP("GroupByWindowDataset") - .Input("input_dataset: variant") - .Input("key_func_other_arguments: Tkey_func_other_arguments") - .Input("reduce_func_other_arguments: Treduce_func_other_arguments") - .Input( - "window_size_func_other_arguments: Twindow_size_func_other_arguments") - .Output("handle: variant") - .Attr("key_func: func") - .Attr("reduce_func: func") - .Attr("window_size_func: func") - .Attr("Tkey_func_other_arguments: list(type) >= 0") - .Attr("Treduce_func_other_arguments: list(type) >= 0") - .Attr("Twindow_size_func_other_arguments: list(type) >= 0") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that computes a windowed group-by on `input_dataset`. - -// TODO(mrry): Support non-int64 keys. - -key_func: A function mapping an element of `input_dataset`, concatenated - with `key_func_other_arguments` to a scalar value of type DT_INT64. -)doc"); - -REGISTER_OP("DenseToSparseBatchDataset") - .Input("input_dataset: variant") - .Input("batch_size: int64") - .Input("row_shape: int64") - .Output("handle: variant") - // NOTE(mrry): the 0th and 2nd elements will be DT_INT64. - .Attr("output_types: list(type) >= 1") - // NOTE(mrry): the 1st and 2nd elements will be vectors. - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that yields a SparseTensor for each element of the input. - -input_dataset: A handle to an input dataset. Must have a single component. -batch_size: A scalar representing the number of elements to accumulate in a - batch. -row_shape: A vector representing the dense shape of each row in the produced - SparseTensor. The shape may be partially specified, using `-1` to indicate - that a particular dimension should use the maximum size of all batch elements. -)doc"); - -REGISTER_OP("SqlDataset") - .Input("driver_name: string") - .Input("data_source_name: string") - .Input("query: string") - .Output("handle: variant") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetIsStateful() // TODO(b/65524810): Source dataset ops must be marked - // stateful to inhibit constant folding. - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that executes a SQL query and emits rows of the result set. - -driver_name: The database type. Currently, the only supported type is 'sqlite'. -data_source_name: A connection string to connect to the database. -query: A SQL query to execute. -)doc"); - -REGISTER_OP("DatasetToSingleElement") - .Input("dataset: variant") - .Output("components: output_types") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn([](shape_inference::InferenceContext* c) { - shape_inference::ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused)); - std::vector output_shapes; - TF_RETURN_IF_ERROR(c->GetAttr("output_shapes", &output_shapes)); - if (output_shapes.size() != c->num_outputs()) { - return errors::InvalidArgument( - "`output_shapes` must be the same length as `output_types` (", - output_shapes.size(), " vs. ", c->num_outputs()); - } - for (size_t i = 0; i < output_shapes.size(); ++i) { - shape_inference::ShapeHandle output_shape_handle; - TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape( - output_shapes[i], &output_shape_handle)); - c->set_output(static_cast(i), output_shape_handle); - } - return Status::OK(); - }) - .Doc(R"doc( -Outputs the single element from the given dataset. - -dataset: A handle to a dataset that contains a single element. -components: The components of the single element of `input`. -)doc"); - -REGISTER_OP("SerializeIterator") - .Input("resource_handle: resource") - .Output("serialized: variant") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Converts the given `resource_handle` representing an iterator to a variant tensor. - -resource_handle: A handle to an iterator resource. -serialized: A variant tensor storing the state of the iterator contained in the - resource. -)doc"); - -REGISTER_OP("DeserializeIterator") - .Input("resource_handle: resource") - .Input("serialized: variant") - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R"doc( -Converts the given variant tensor to an iterator and stores it in the given resource. - -resource_handle: A handle to an iterator resource. -serialized: A variant tensor storing the state of the iterator contained in the - resource. -)doc"); - -} // namespace tensorflow diff --git a/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py index 271d80a54b..bda9a2a4a3 100644 --- a/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py @@ -21,7 +21,6 @@ import os import numpy as np from tensorflow.contrib.data.python.ops import dataset_ops -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.contrib.data.python.ops import readers from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session @@ -34,6 +33,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import functional_ops +from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import io_ops from tensorflow.python.ops import math_ops diff --git a/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py index 329dc80ba5..f59ac760dc 100644 --- a/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py @@ -21,7 +21,6 @@ import os from tensorflow.contrib.data.python.ops import dataset_ops from tensorflow.contrib.data.python.ops import enumerate_ops -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops from tensorflow.python.data.ops import iterator_ops from tensorflow.python.framework import constant_op @@ -30,6 +29,7 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import io_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import variables diff --git a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py index 8033f1d388..3ae8f71d77 100644 --- a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py @@ -21,7 +21,6 @@ import gzip import os import zlib -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops from tensorflow.contrib.data.python.ops import readers from tensorflow.core.example import example_pb2 @@ -34,6 +33,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.lib.io import python_io from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import io_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.platform import test diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index 727c5d1c38..1b81cf5be9 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -11,6 +11,20 @@ load( ) load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library") +py_library( + name = "dataset_ops", + srcs = [ + "dataset_ops.py", + ], + srcs_version = "PY2AND3", + deps = [ + ":transformation_ops", + "//tensorflow/python:util", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", + ], +) + py_library( name = "iterator_ops", srcs = [ @@ -59,7 +73,6 @@ py_library( ], srcs_version = "PY2AND3", deps = [ - ":gen_dataset_ops", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", "//tensorflow/python:dataset_ops_gen", @@ -115,31 +128,6 @@ tf_custom_op_py_library( ], ) -tf_gen_op_wrapper_py( - name = "gen_dataset_ops", - out = "gen_dataset_ops.py", - deps = ["//tensorflow/contrib/data:dataset_ops_op_lib"], -) - -tf_custom_op_py_library( - name = "dataset_ops", - srcs = ["dataset_ops.py"], - dso = ["//tensorflow/contrib/data:_dataset_ops.so"], - kernels = [ - "//tensorflow/contrib/data:dataset_ops_op_lib", - ], - srcs_version = "PY2AND3", - deps = [ - ":gen_dataset_ops", - ":transformation_ops", - "//tensorflow/contrib/util:util_py", - "//tensorflow/python:platform", - "//tensorflow/python:util", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/util:nest", - ], -) - filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py index e6e5f716b6..abc9212a87 100644 --- a/tensorflow/contrib/data/python/ops/batching.py +++ b/tensorflow/contrib/data/python/ops/batching.py @@ -17,7 +17,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.framework import dtypes @@ -25,6 +24,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import math_ops diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py index c4c4426809..45d6dbe743 100644 --- a/tensorflow/contrib/data/python/ops/dataset_ops.py +++ b/tensorflow/contrib/data/python/ops/dataset_ops.py @@ -20,21 +20,15 @@ from __future__ import print_function from tensorflow.contrib.data.python.ops import batching from tensorflow.contrib.data.python.ops import enumerate_ops from tensorflow.contrib.data.python.ops import error_ops -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.contrib.data.python.ops import grouping -from tensorflow.contrib.util import loader from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest +from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import gen_io_ops -from tensorflow.python.platform import resource_loader from tensorflow.python.util import deprecation -_dataset_ops = loader.load_op_library( - resource_loader.get_path_to_datafile("../../_dataset_ops.so")) - - class Dataset(dataset_ops.Dataset): """Represents a potentially large set of elements. diff --git a/tensorflow/contrib/data/python/ops/error_ops.py b/tensorflow/contrib/data/python/ops/error_ops.py index 51a2791072..238bb52b02 100644 --- a/tensorflow/contrib/data/python/ops/error_ops.py +++ b/tensorflow/contrib/data/python/ops/error_ops.py @@ -17,9 +17,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest +from tensorflow.python.ops import gen_dataset_ops def ignore_errors(): diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py index 1c7c94b3c8..6df7b22fb6 100644 --- a/tensorflow/contrib/data/python/ops/grouping.py +++ b/tensorflow/contrib/data/python/ops/grouping.py @@ -17,12 +17,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops +from tensorflow.python.ops import gen_dataset_ops def group_by_window(key_func, diff --git a/tensorflow/contrib/data/python/ops/interleave_ops.py b/tensorflow/contrib/data/python/ops/interleave_ops.py index ce23e95697..74a919c1ff 100644 --- a/tensorflow/contrib/data/python/ops/interleave_ops.py +++ b/tensorflow/contrib/data/python/ops/interleave_ops.py @@ -17,12 +17,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops +from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.util import deprecation diff --git a/tensorflow/contrib/data/python/ops/iterator_ops.py b/tensorflow/contrib/data/python/ops/iterator_ops.py index 32d2f42c93..d736029fb0 100644 --- a/tensorflow/contrib/data/python/ops/iterator_ops.py +++ b/tensorflow/contrib/data/python/ops/iterator_ops.py @@ -17,8 +17,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.framework import ops +from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.training import saver diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py index f22298b757..2e1c3153ca 100644 --- a/tensorflow/contrib/data/python/ops/readers.py +++ b/tensorflow/contrib/data/python/ops/readers.py @@ -18,7 +18,6 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.data.python.ops import dataset_ops as contrib_dataset_ops -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import readers from tensorflow.python.data.util import nest @@ -26,6 +25,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.platform import gfile from tensorflow.python.util import deprecation diff --git a/tensorflow/contrib/data/python/ops/scan_ops.py b/tensorflow/contrib/data/python/ops/scan_ops.py index 87bbbb7d19..5acaed48a3 100644 --- a/tensorflow/contrib/data/python/ops/scan_ops.py +++ b/tensorflow/contrib/data/python/ops/scan_ops.py @@ -19,11 +19,11 @@ from __future__ import print_function import collections -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.framework import function from tensorflow.python.framework import ops +from tensorflow.python.ops import gen_dataset_ops class _ScanDataset(dataset_ops.Dataset): diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 8b8251f84b..a4b5ca16af 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -8270,6 +8270,29 @@ op { } } } +op { + name: "DatasetToSingleElement" + input_arg { + name: "dataset" + type: DT_VARIANT + } + output_arg { + name: "components" + type_list_attr: "output_types" + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "DebugGradientIdentity" input_arg { @@ -9248,6 +9271,69 @@ op { } } } +op { + name: "DenseToSparseBatchDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "batch_size" + type: DT_INT64 + } + input_arg { + name: "row_shape" + type: DT_INT64 + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + is_stateful: true +} +op { + name: "DenseToSparseBatchDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "batch_size" + type: DT_INT64 + } + input_arg { + name: "row_shape" + type: DT_INT64 + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "DenseToSparseSetOperation" input_arg { @@ -9741,6 +9827,18 @@ op { } } } +op { + name: "DeserializeIterator" + input_arg { + name: "resource_handle" + type: DT_RESOURCE + } + input_arg { + name: "serialized" + type: DT_VARIANT + } + is_stateful: true +} op { name: "DeserializeManySparse" input_arg { @@ -13494,6 +13592,131 @@ op { } } } +op { + name: "GroupByWindowDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "key_func_other_arguments" + type_list_attr: "Tkey_func_other_arguments" + } + input_arg { + name: "reduce_func_other_arguments" + type_list_attr: "Treduce_func_other_arguments" + } + input_arg { + name: "window_size_func_other_arguments" + type_list_attr: "Twindow_size_func_other_arguments" + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "key_func" + type: "func" + } + attr { + name: "reduce_func" + type: "func" + } + attr { + name: "window_size_func" + type: "func" + } + attr { + name: "Tkey_func_other_arguments" + type: "list(type)" + has_minimum: true + } + attr { + name: "Treduce_func_other_arguments" + type: "list(type)" + has_minimum: true + } + attr { + name: "Twindow_size_func_other_arguments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + is_stateful: true +} +op { + name: "GroupByWindowDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "key_func_other_arguments" + type_list_attr: "Tkey_func_other_arguments" + } + input_arg { + name: "reduce_func_other_arguments" + type_list_attr: "Treduce_func_other_arguments" + } + input_arg { + name: "window_size_func_other_arguments" + type_list_attr: "Twindow_size_func_other_arguments" + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "key_func" + type: "func" + } + attr { + name: "reduce_func" + type: "func" + } + attr { + name: "window_size_func" + type: "func" + } + attr { + name: "Tkey_func_other_arguments" + type: "list(type)" + has_minimum: true + } + attr { + name: "Treduce_func_other_arguments" + type: "list(type)" + has_minimum: true + } + attr { + name: "Twindow_size_func_other_arguments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "HSVToRGB" input_arg { @@ -13914,6 +14137,53 @@ op { } } } +op { + name: "IgnoreErrorsDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + is_stateful: true +} +op { + name: "IgnoreErrorsDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "Imag" input_arg { @@ -15818,6 +16088,50 @@ op { } is_stateful: true } +op { + name: "MapAndBatchDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + input_arg { + name: "batch_size" + type: DT_INT64 + } + input_arg { + name: "num_parallel_batches" + type: DT_INT64 + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "MapClear" attr { @@ -20556,6 +20870,54 @@ op { type: "type" } } +op { + name: "ParallelInterleaveDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + input_arg { + name: "cycle_length" + type: DT_INT64 + } + input_arg { + name: "block_length" + type: DT_INT64 + } + input_arg { + name: "sloppy" + type: DT_BOOL + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "ParallelMapDataset" input_arg { @@ -30146,6 +30508,52 @@ op { } } } +op { + name: "ScanDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "initial_state" + type_list_attr: "Tstate" + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + } + attr { + name: "Tstate" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "ScatterAdd" input_arg { @@ -31861,6 +32269,18 @@ op { } } } +op { + name: "SerializeIterator" + input_arg { + name: "resource_handle" + type: DT_RESOURCE + } + output_arg { + name: "serialized" + type: DT_VARIANT + } + is_stateful: true +} op { name: "SerializeManySparse" input_arg { @@ -37265,6 +37685,38 @@ op { } } } +op { + name: "SqlDataset" + input_arg { + name: "driver_name" + type: DT_STRING + } + input_arg { + name: "data_source_name" + type: DT_STRING + } + input_arg { + name: "query" + type: DT_STRING + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + is_stateful: true +} op { name: "Sqrt" input_arg { diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index 8f5d8308a3..f512213964 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -141,6 +141,16 @@ count: A scalar representing the number of elements from the `input_dataset` that should be skipped. If count is -1, skips everything. )doc"); +REGISTER_OP("IgnoreErrorsDataset") + .Input("input_dataset: variant") + .Output("handle: variant") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that contains the elements of `input_dataset` ignoring errors. +)doc"); + REGISTER_OP("MapDataset") .Input("input_dataset: variant") .Input("other_arguments: Targuments") @@ -174,6 +184,32 @@ num_parallel_calls: The number of concurrent invocations of `f` that process elements from `input_dataset` in parallel. )doc"); +REGISTER_OP("MapAndBatchDataset") + .Input("input_dataset: variant") + .Input("other_arguments: Targuments") + .Input("batch_size: int64") + .Input("num_parallel_batches: int64") + .Output("handle: variant") + .Attr("f: func") + .Attr("Targuments: list(type) >= 0") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that applies `f` to the outputs of `input_dataset` and then +batches `batch_size` of them. + +Unlike a "MapDataset", which applies `f` sequentially, this dataset invokes up +to `batch_size * num_parallel_batches` copies of `f` in parallel. + +batch_size: A scalar representing the number of elements to accumulate in a + batch. It determines the number of concurrent invocations of `f` that process + elements from `input_dataset` in parallel. +num_parallel_batches: A scalar representing the number of batches to create in + parallel. Processing multiple batches in parallel benefits workloads prone to + stragglers. +)doc"); + REGISTER_OP("PrefetchDataset") .Input("input_dataset: variant") .Input("buffer_size: int64") @@ -188,6 +224,21 @@ buffer_size: The maximum number of elements to buffer in an iterator over this dataset. )doc"); +REGISTER_OP("ScanDataset") + .Input("input_dataset: variant") + .Input("initial_state: Tstate") + .Input("other_arguments: Targuments") + .Output("handle: variant") + .Attr("f: func") + .Attr("Tstate: list(type) >= 1") + .Attr("Targuments: list(type) >= 0") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset successively reduces `f` over the elements of `input_dataset`. +)doc"); + REGISTER_OP("FlatMapDataset") .Input("input_dataset: variant") .Input("other_arguments: Targuments") @@ -234,6 +285,59 @@ f: A function mapping elements of `input_dataset`, concatenated with `output_types` and `output_shapes`. )doc"); +REGISTER_OP("ParallelInterleaveDataset") + .Input("input_dataset: variant") + .Input("other_arguments: Targuments") + .Input("cycle_length: int64") + .Input("block_length: int64") + .Input("sloppy: bool") + .Output("handle: variant") + .Attr("f: func") + .Attr("Targuments: list(type) >= 0") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that applies `f` to the outputs of `input_dataset`. + +The resulting dataset is similar to the `InterleaveDataset`, with the exception +that if retrieving the next value from a dataset would cause the requester to +block, it will skip that input dataset. This dataset is especially useful +when loading data from a variable-latency datastores (e.g. HDFS, GCS), as it +allows the training step to proceed so long as some data is available. + +!! WARNING !! This dataset is not deterministic! + +f: A function mapping elements of `input_dataset`, concatenated with + `other_arguments`, to a Dataset variant that contains elements matching + `output_types` and `output_shapes`. +)doc"); + +REGISTER_OP("GroupByWindowDataset") + .Input("input_dataset: variant") + .Input("key_func_other_arguments: Tkey_func_other_arguments") + .Input("reduce_func_other_arguments: Treduce_func_other_arguments") + .Input( + "window_size_func_other_arguments: Twindow_size_func_other_arguments") + .Output("handle: variant") + .Attr("key_func: func") + .Attr("reduce_func: func") + .Attr("window_size_func: func") + .Attr("Tkey_func_other_arguments: list(type) >= 0") + .Attr("Treduce_func_other_arguments: list(type) >= 0") + .Attr("Twindow_size_func_other_arguments: list(type) >= 0") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that computes a windowed group-by on `input_dataset`. + +// TODO(mrry): Support non-int64 keys. + +key_func: A function mapping an element of `input_dataset`, concatenated + with `key_func_other_arguments` to a scalar value of type DT_INT64. +)doc"); + REGISTER_OP("FilterDataset") .Input("input_dataset: variant") .Input("other_arguments: Targuments") @@ -304,6 +408,27 @@ padding_values: A list of scalars containing the padding value to use for each of the outputs. )doc"); +REGISTER_OP("DenseToSparseBatchDataset") + .Input("input_dataset: variant") + .Input("batch_size: int64") + .Input("row_shape: int64") + .Output("handle: variant") + // NOTE(mrry): the 0th and 2nd elements will be DT_INT64. + .Attr("output_types: list(type) >= 1") + // NOTE(mrry): the 1st and 2nd elements will be vectors. + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that yields a SparseTensor for each element of the input. + +input_dataset: A handle to an input dataset. Must have a single component. +batch_size: A scalar representing the number of elements to accumulate in a + batch. +row_shape: A vector representing the dense shape of each row in the produced + SparseTensor. The shape may be partially specified, using `-1` to indicate + that a particular dimension should use the maximum size of all batch elements. +)doc"); + REGISTER_OP("RangeDataset") .Input("start: int64") .Input("stop: int64") @@ -389,6 +514,24 @@ compression_type: A scalar containing either (i) the empty string (no buffer_size: A scalar containing the number of bytes to buffer. )doc"); +REGISTER_OP("SqlDataset") + .Input("driver_name: string") + .Input("data_source_name: string") + .Input("query: string") + .Output("handle: variant") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetIsStateful() // TODO(b/65524810): Source dataset ops must be marked + // stateful to inhibit constant folding. + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that executes a SQL query and emits rows of the result set. + +driver_name: The database type. Currently, the only supported type is 'sqlite'. +data_source_name: A connection string to connect to the database. +query: A SQL query to execute. +)doc"); + REGISTER_OP("FixedLengthRecordDataset") .Input("filenames: string") .Input("header_bytes: int64") @@ -519,6 +662,36 @@ REGISTER_OP("IteratorGetNext") Gets the next output from the given iterator. )doc"); +REGISTER_OP("DatasetToSingleElement") + .Input("dataset: variant") + .Output("components: output_types") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn([](shape_inference::InferenceContext* c) { + shape_inference::ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused)); + std::vector output_shapes; + TF_RETURN_IF_ERROR(c->GetAttr("output_shapes", &output_shapes)); + if (output_shapes.size() != c->num_outputs()) { + return errors::InvalidArgument( + "`output_shapes` must be the same length as `output_types` (", + output_shapes.size(), " vs. ", c->num_outputs()); + } + for (size_t i = 0; i < output_shapes.size(); ++i) { + shape_inference::ShapeHandle output_shape_handle; + TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape( + output_shapes[i], &output_shape_handle)); + c->set_output(static_cast(i), output_shape_handle); + } + return Status::OK(); + }) + .Doc(R"doc( +Outputs the single element from the given dataset. + +dataset: A handle to a dataset that contains a single element. +components: The components of the single element of `input`. +)doc"); + REGISTER_OP("IteratorToStringHandle") .Input("resource_handle: resource") .Output("string_handle: string") @@ -547,4 +720,28 @@ output_shapes: If specified, defines the shape of each tuple component in an element produced by the resulting iterator. )doc"); +REGISTER_OP("SerializeIterator") + .Input("resource_handle: resource") + .Output("serialized: variant") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Converts the given `resource_handle` representing an iterator to a variant tensor. + +resource_handle: A handle to an iterator resource. +serialized: A variant tensor storing the state of the iterator contained in the + resource. +)doc"); + +REGISTER_OP("DeserializeIterator") + .Input("resource_handle: resource") + .Input("serialized: variant") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Converts the given variant tensor to an iterator and stores it in the given resource. + +resource_handle: A handle to an iterator resource. +serialized: A variant tensor storing the state of the iterator contained in the + resource. +)doc"); + } // namespace tensorflow diff --git a/tensorflow/python/kernel_tests/iterator_ops_test.py b/tensorflow/python/kernel_tests/iterator_ops_test.py index 60a44b5b14..2128ef4ae1 100644 --- a/tensorflow/python/kernel_tests/iterator_ops_test.py +++ b/tensorflow/python/kernel_tests/iterator_ops_test.py @@ -17,12 +17,14 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os import numpy as np from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import iterator_ops +from tensorflow.python.data.ops import readers from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -31,7 +33,9 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import functional_ops +from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import io_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import script_ops @@ -533,6 +537,64 @@ class IteratorTest(test.TestCase): target_placeholder: "/job:localhost/replica:0/task:0/cpu:0" }) + def testIncorrectIteratorRestore(self): + + def _path(): + return os.path.join(self.get_temp_dir(), "iterator") + + def _save_op(iterator_resource): + iterator_state_variant = gen_dataset_ops.serialize_iterator( + iterator_resource) + save_op = io_ops.write_file( + _path(), parsing_ops.serialize_tensor(iterator_state_variant)) + return save_op + + def _restore_op(iterator_resource): + iterator_state_variant = parsing_ops.parse_tensor( + io_ops.read_file(_path()), dtypes.variant) + restore_op = gen_dataset_ops.deserialize_iterator(iterator_resource, + iterator_state_variant) + return restore_op + + def _build_range_dataset_graph(): + start = 1 + stop = 10 + iterator = dataset_ops.Dataset.range(start, + stop).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + save_op = _save_op(iterator._iterator_resource) + restore_op = _restore_op(iterator._iterator_resource) + return init_op, get_next, save_op, restore_op + + def _build_reader_dataset_graph(): + filenames = ["test"] # Does not exist but we don't care in this test. + iterator = readers.FixedLengthRecordDataset( + filenames, 1, 0, 0).make_initializable_iterator() + init_op = iterator.initializer + get_next_op = iterator.get_next() + save_op = _save_op(iterator._iterator_resource) + restore_op = _restore_op(iterator._iterator_resource) + return init_op, get_next_op, save_op, restore_op + + # Saving iterator for RangeDataset graph. + with ops.Graph().as_default() as g: + init_op, _, save_op, _ = _build_range_dataset_graph() + with self.test_session(graph=g) as sess: + sess.run(init_op) + sess.run(save_op) + + # Attempt to restore the saved iterator into an IteratorResource of + # incompatible type. An iterator of RangeDataset has output type int64, + # while an iterator of FixedLengthRecordDataset has output type string. + # So an InvalidArgumentError should be raised by + # IteratorResource::set_iterator. + with ops.Graph().as_default() as g: + _, _, _, restore_op = _build_reader_dataset_graph() + with self.test_session(graph=g) as sess: + with self.assertRaises(errors.InvalidArgumentError): + sess.run(restore_op) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/range_dataset_op_test.py b/tensorflow/python/kernel_tests/range_dataset_op_test.py index 3c1685c951..0c530522b8 100644 --- a/tensorflow/python/kernel_tests/range_dataset_op_test.py +++ b/tensorflow/python/kernel_tests/range_dataset_op_test.py @@ -17,15 +17,32 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os + from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import iterator_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_dataset_ops +from tensorflow.python.ops import io_ops +from tensorflow.python.ops import parsing_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import gfile from tensorflow.python.platform import test class RangeDatasetTest(test.TestCase): + def tearDown(self): + # Remove all checkpoint files. + prefix = self._iterator_checkpoint_prefix() + pattern = prefix + "*" + files = gfile.Glob(pattern) + map(gfile.Remove, files) + def testStop(self): stop = array_ops.placeholder(dtypes.int64, shape=[]) iterator = dataset_ops.Dataset.range(stop).make_initializable_iterator() @@ -151,6 +168,319 @@ class RangeDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def _iterator_checkpoint_prefix(self): + return os.path.join(self.get_temp_dir(), "iterator") + + def _save_op(self, iterator_resource): + iterator_state_variant = gen_dataset_ops.serialize_iterator( + iterator_resource) + save_op = io_ops.write_file( + self._iterator_checkpoint_prefix(), + parsing_ops.serialize_tensor(iterator_state_variant)) + return save_op + + def _restore_op(self, iterator_resource): + iterator_state_variant = parsing_ops.parse_tensor( + io_ops.read_file(self._iterator_checkpoint_prefix()), dtypes.variant) + restore_op = gen_dataset_ops.deserialize_iterator(iterator_resource, + iterator_state_variant) + return restore_op + + def testSaveRestore(self): + + def _build_graph(start, stop): + iterator = dataset_ops.Dataset.range(start, + stop).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + save_op = self._save_op(iterator._iterator_resource) + restore_op = self._restore_op(iterator._iterator_resource) + return init_op, get_next, save_op, restore_op + + # Saving and restoring in different sessions. + start = 2 + stop = 10 + break_point = 5 + with ops.Graph().as_default() as g: + init_op, get_next, save_op, _ = _build_graph(start, stop) + with self.test_session(graph=g) as sess: + sess.run(variables.global_variables_initializer()) + sess.run(init_op) + for i in range(start, break_point): + self.assertEqual(i, sess.run(get_next)) + sess.run(save_op) + + with ops.Graph().as_default() as g: + init_op, get_next, _, restore_op = _build_graph(start, stop) + with self.test_session(graph=g) as sess: + sess.run(init_op) + sess.run(restore_op) + for i in range(break_point, stop): + self.assertEqual(i, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + # Saving and restoring in same session. + with ops.Graph().as_default() as g: + init_op, get_next, save_op, restore_op = _build_graph(start, stop) + with self.test_session(graph=g) as sess: + sess.run(variables.global_variables_initializer()) + sess.run(init_op) + for i in range(start, break_point): + self.assertEqual(i, sess.run(get_next)) + sess.run(save_op) + sess.run(restore_op) + for i in range(break_point, stop): + self.assertEqual(i, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testRestoreWithoutBuildingDatasetGraph(self): + + def _build_graph(start, stop, num_epochs): + dataset = dataset_ops.Dataset.range(start, stop).repeat(num_epochs) + iterator = dataset.make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + save_op = self._save_op(iterator._iterator_resource) + restore_op = self._restore_op(iterator._iterator_resource) + return init_op, get_next, save_op, restore_op + + # Saving and restoring in different sessions. + start = 2 + stop = 10 + num_epochs = 5 + break_point = 5 + break_epoch = 3 + with ops.Graph().as_default() as g: + init_op, get_next, save_op, _ = _build_graph(start, stop, num_epochs) + with self.test_session(graph=g) as sess: + sess.run(variables.global_variables_initializer()) + sess.run(init_op) + for _ in range(break_epoch): + for i in range(start, stop): + self.assertEqual(i, sess.run(get_next)) + for i in range(start, break_point): + self.assertEqual(i, sess.run(get_next)) + sess.run(save_op) + + with ops.Graph().as_default() as g: + # Create an empty IteratorResource and restore the Iterator into it. + output_types = dtypes.int64 + output_shapes = tensor_shape.scalar() + iterator = iterator_ops.Iterator.from_structure(output_types, + output_shapes) + restore_op = self._restore_op(iterator._iterator_resource) + get_next = iterator.get_next() + with self.test_session(graph=g) as sess: + sess.run(restore_op) + for i in range(break_point, stop): + self.assertEqual(i, sess.run(get_next)) + for _ in range(break_epoch + 1, num_epochs): + for i in range(start, stop): + self.assertEqual(i, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testRestoreInModifiedGraph(self): + + def _build_graph(start, stop): + dataset = dataset_ops.Dataset.range(start, stop) + iterator = dataset.make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + save_op = self._save_op(iterator._iterator_resource) + restore_op = self._restore_op(iterator._iterator_resource) + return init_op, get_next, save_op, restore_op + + # Saving and restoring in different sessions. + start = 2 + stop = 10 + stop_1 = 8 + break_point = 5 + with ops.Graph().as_default() as g: + init_op, get_next, save_op, _ = _build_graph(start, stop) + with self.test_session(graph=g) as sess: + sess.run(variables.global_variables_initializer()) + sess.run(init_op) + for i in range(start, break_point): + self.assertEqual(i, sess.run(get_next)) + sess.run(save_op) + + with ops.Graph().as_default() as g: + # Intentionally build a graph with a different value for stop to make sure + # the original dataset graph is actually getting loaded. + init_op, get_next, _, restore_op = _build_graph(start, stop_1) + with self.test_session(graph=g) as sess: + sess.run(restore_op) + for i in range(break_point, stop): + self.assertEqual(i, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testInitThenRestore(self): + # Note: Calling init_op before restore_op is redundant. This test just makes + # sure we do not fail if restore is called on an already initialized + # iterator resource. + + def _build_graph(start, stop): + dataset = dataset_ops.Dataset.range(start, stop) + iterator = dataset.make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + save_op = self._save_op(iterator._iterator_resource) + restore_op = self._restore_op(iterator._iterator_resource) + return init_op, get_next, save_op, restore_op + + # Saving and restoring in different sessions. + start = 2 + stop = 10 + break_point = 5 + with ops.Graph().as_default() as g: + init_op, get_next, save_op, _ = _build_graph(start, stop) + with self.test_session(graph=g) as sess: + sess.run(variables.global_variables_initializer()) + sess.run(init_op) + for i in range(start, break_point): + self.assertEqual(i, sess.run(get_next)) + sess.run(save_op) + + with ops.Graph().as_default() as g: + init_op, get_next, _, restore_op = _build_graph(start, stop) + with self.test_session(graph=g) as sess: + sess.run(init_op) + sess.run(restore_op) + for i in range(break_point, stop): + self.assertEqual(i, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testMultipleSaves(self): + + def _build_graph(start, stop): + iterator = dataset_ops.Dataset.range(start, + stop).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + save_op = self._save_op(iterator._iterator_resource) + restore_op = self._restore_op(iterator._iterator_resource) + return init_op, get_next, save_op, restore_op + + start = 2 + stop = 10 + break_point1 = 5 + break_point2 = 7 + + with ops.Graph().as_default() as g: + init_op, get_next, save_op, _ = _build_graph(start, stop) + with self.test_session(graph=g) as sess: + sess.run(variables.global_variables_initializer()) + sess.run(init_op) + for i in range(start, break_point1): + self.assertEqual(i, sess.run(get_next)) + sess.run(save_op) + + with ops.Graph().as_default() as g: + init_op, get_next, save_op, restore_op = _build_graph(start, stop) + with self.test_session(graph=g) as sess: + sess.run(restore_op) + for i in range(break_point1, break_point2): + self.assertEqual(i, sess.run(get_next)) + sess.run(save_op) + + break_point2 = 7 + with ops.Graph().as_default() as g: + init_op, get_next, save_op, restore_op = _build_graph(start, stop) + with self.test_session(graph=g) as sess: + sess.run(restore_op) + for i in range(break_point2, stop): + self.assertEqual(i, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testSaveRestoreWithRepeat(self): + + def _build_graph(start, stop, num_epochs): + iterator = dataset_ops.Dataset.range( + start, stop).repeat(num_epochs).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + save_op = self._save_op(iterator._iterator_resource) + restore_op = self._restore_op(iterator._iterator_resource) + return init_op, get_next, save_op, restore_op + + start = 2 + stop = 10 + num_epochs = 5 + break_range = 5 + break_epoch = 3 + with ops.Graph().as_default() as g: + init_op, get_next, save_op, restore_op = _build_graph( + start, stop, num_epochs) + with self.test_session(graph=g) as sess: + sess.run(variables.global_variables_initializer()) + sess.run(init_op) + # Note: There is no checkpoint saved currently so a NotFoundError is + # raised. + with self.assertRaises(errors.NotFoundError): + sess.run(restore_op) + for _ in range(break_epoch - 1): + for i in range(start, stop): + self.assertEqual(i, sess.run(get_next)) + for i in range(start, break_range): + self.assertEqual(i, sess.run(get_next)) + sess.run(save_op) + + with ops.Graph().as_default() as g: + init_op, get_next, _, restore_op = _build_graph(start, stop, num_epochs) + with self.test_session(graph=g) as sess: + sess.run(restore_op) + for i in range(break_range, stop): + self.assertEqual(i, sess.run(get_next)) + for _ in range(break_epoch, num_epochs): + for i in range(start, stop): + self.assertEqual(i, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testSaveRestoreExhaustedIterator(self): + + def _build_graph(start, stop, num_epochs): + iterator = dataset_ops.Dataset.range( + start, stop).repeat(num_epochs).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + save_op = self._save_op(iterator._iterator_resource) + restore_op = self._restore_op(iterator._iterator_resource) + return init_op, get_next, save_op, restore_op + + start = 2 + stop = 10 + num_epochs = 5 + with ops.Graph().as_default() as g: + init_op, get_next, save_op, restore_op = _build_graph( + start, stop, num_epochs) + with self.test_session(graph=g) as sess: + sess.run(variables.global_variables_initializer()) + sess.run(init_op) + # Note: There is no checkpoint saved currently so a NotFoundError is + # raised. + with self.assertRaises(errors.NotFoundError): + sess.run(restore_op) + for _ in range(num_epochs): + for i in range(start, stop): + self.assertEqual(i, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + sess.run(save_op) + + with ops.Graph().as_default() as g: + init_op, get_next, _, restore_op = _build_graph(start, stop, num_epochs) + with self.test_session(graph=g) as sess: + sess.run(restore_op) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/reader_dataset_ops_test.py b/tensorflow/python/kernel_tests/reader_dataset_ops_test.py index 70b6ce442e..c8e7333b4b 100644 --- a/tensorflow/python/kernel_tests/reader_dataset_ops_test.py +++ b/tensorflow/python/kernel_tests/reader_dataset_ops_test.py @@ -26,8 +26,13 @@ from tensorflow.python.data.ops import readers from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape from tensorflow.python.lib.io import python_io from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_dataset_ops +from tensorflow.python.ops import io_ops +from tensorflow.python.ops import parsing_ops from tensorflow.python.platform import test from tensorflow.python.util import compat @@ -267,6 +272,299 @@ class FixedLengthRecordReaderTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(iterator.get_next()) + def _iterator_checkpoint_path(self): + return os.path.join(self.get_temp_dir(), "iterator") + + def _save_op(self, iterator_resource): + iterator_state_variant = gen_dataset_ops.serialize_iterator( + iterator_resource) + save_op = io_ops.write_file( + self._iterator_checkpoint_path(), + parsing_ops.serialize_tensor(iterator_state_variant)) + return save_op + + def _restore_op(self, iterator_resource): + iterator_state_variant = parsing_ops.parse_tensor( + io_ops.read_file(self._iterator_checkpoint_path()), dtypes.variant) + restore_op = gen_dataset_ops.deserialize_iterator(iterator_resource, + iterator_state_variant) + return restore_op + + def _build_iterator_graph(self, num_epochs): + filenames = self._createFiles() + dataset = (readers.FixedLengthRecordDataset( + filenames, self._record_bytes, self._header_bytes, self._footer_bytes) + .repeat(num_epochs)) + iterator = dataset.make_initializable_iterator() + init_op = iterator.initializer + get_next_op = iterator.get_next() + save_op = self._save_op(iterator._iterator_resource) + restore_op = self._restore_op(iterator._iterator_resource) + return init_op, get_next_op, save_op, restore_op + + def _restore_iterator(self): + output_types = dtypes.string + output_shapes = tensor_shape.scalar() + iterator = iterator_ops.Iterator.from_structure(output_types, output_shapes) + get_next = iterator.get_next() + restore_op = self._restore_op(iterator._iterator_resource) + return restore_op, get_next + + def testSaveRestore(self): + num_epochs = 10 + epoch_break = 5 + file_break = self._num_files // 2 + record_break = self._num_records // 2 + + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs) + with self.test_session(graph=g) as sess: + sess.run(init_op) + # Note: There is no checkpoint saved currently so a NotFoundError is + # raised. + with self.assertRaises(errors.NotFoundError): + sess.run(restore_op) + for epoch in range(num_epochs): + for f in range(self._num_files): + for r in range(self._num_records): + if (epoch == epoch_break and f == file_break and + r == record_break): + sess.run(save_op) + break + self.assertEqual(self._record(f, r), sess.run(get_next_op)) + else: + continue + break + else: + continue + break + else: + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs) + with self.test_session(graph=g) as sess: + sess.run(restore_op) + for epoch in range(num_epochs): + for f in range(self._num_files): + for r in range(self._num_records): + if (epoch < epoch_break or + (epoch == epoch_break and f < file_break) or + (epoch == epoch_break and f == file_break and + r < record_break)): + continue + self.assertEqual(self._record(f, r), sess.run(get_next_op)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + def testInitThenRestore(self): + # Note: Calling init_op before restore_op is redundant. This test just makes + # sure we do not fail if restore is called on an already initialized + # iterator resource. + num_epochs = 10 + epoch_break = 5 + file_break = self._num_files // 2 + record_break = self._num_records // 2 + + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs) + with self.test_session(graph=g) as sess: + sess.run(init_op) + # Note: There is no checkpoint saved currently so a NotFoundError is + # raised. + with self.assertRaises(errors.NotFoundError): + sess.run(restore_op) + for epoch in range(num_epochs): + for f in range(self._num_files): + for r in range(self._num_records): + if (epoch == epoch_break and f == file_break and + r == record_break): + sess.run(save_op) + break + self.assertEqual(self._record(f, r), sess.run(get_next_op)) + else: + continue + break + else: + continue + break + else: + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs) + with self.test_session(graph=g) as sess: + sess.run(init_op) + sess.run(restore_op) + for epoch in range(num_epochs): + for f in range(self._num_files): + for r in range(self._num_records): + if (epoch < epoch_break or + (epoch == epoch_break and f < file_break) or + (epoch == epoch_break and f == file_break and + r < record_break)): + continue + self.assertEqual(self._record(f, r), sess.run(get_next_op)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + def testRestoreInModifiedGraph(self): + num_epochs = 10 + num_epochs_1 = 20 + epoch_break = 5 + file_break = self._num_files // 2 + record_break = self._num_records // 2 + + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs) + with self.test_session(graph=g) as sess: + sess.run(init_op) + # Note: There is no checkpoint saved currently so a NotFoundError is + # raised. + with self.assertRaises(errors.NotFoundError): + sess.run(restore_op) + for epoch in range(num_epochs): + for f in range(self._num_files): + for r in range(self._num_records): + if (epoch == epoch_break and f == file_break and + r == record_break): + sess.run(save_op) + break + self.assertEqual(self._record(f, r), sess.run(get_next_op)) + else: + continue + break + else: + continue + break + else: + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs_1) + with self.test_session(graph=g) as sess: + sess.run(restore_op) + for epoch in range(num_epochs): + for f in range(self._num_files): + for r in range(self._num_records): + if (epoch < epoch_break or + (epoch == epoch_break and f < file_break) or + (epoch == epoch_break and f == file_break and + r < record_break)): + continue + self.assertEqual(self._record(f, r), sess.run(get_next_op)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + def testRestoreWithoutBuildingDatasetGraph(self): + num_epochs = 10 + epoch_break = 5 + file_break = self._num_files // 2 + record_break = self._num_records // 2 + + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs) + with self.test_session(graph=g) as sess: + sess.run(init_op) + # Note: There is no checkpoint saved currently so a NotFoundError is + # raised. + with self.assertRaises(errors.NotFoundError): + sess.run(restore_op) + for epoch in range(num_epochs): + for f in range(self._num_files): + for r in range(self._num_records): + if (epoch == epoch_break and f == file_break and + r == record_break): + sess.run(save_op) + break + self.assertEqual(self._record(f, r), sess.run(get_next_op)) + else: + continue + break + else: + continue + break + else: + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + with ops.Graph().as_default() as g: + restore_op, get_next_op = self._restore_iterator() + with self.test_session(graph=g) as sess: + sess.run(restore_op) + for epoch in range(num_epochs): + for f in range(self._num_files): + for r in range(self._num_records): + if (epoch < epoch_break or + (epoch == epoch_break and f < file_break) or + (epoch == epoch_break and f == file_break and + r < record_break)): + continue + self.assertEqual(self._record(f, r), sess.run(get_next_op)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + def testRestoreUnusedIterator(self): + num_epochs = 10 + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs) + with self.test_session(graph=g) as sess: + sess.run(init_op) + # Note: There is no checkpoint saved currently so a NotFoundError is + # raised. + with self.assertRaises(errors.NotFoundError): + sess.run(restore_op) + # Save unused iterator. + sess.run(save_op) + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs) + with self.test_session(graph=g) as sess: + sess.run(restore_op) + for _ in range(num_epochs * self._num_files * self._num_records): + sess.run(get_next_op) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + def testRestoreExhaustedIterator(self): + num_epochs = 10 + + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs) + with self.test_session(graph=g) as sess: + sess.run(init_op) + # Note: There is no checkpoint saved currently so a NotFoundError is + # raised. + with self.assertRaises(errors.NotFoundError): + sess.run(restore_op) + for _ in range(num_epochs): + for f in range(self._num_files): + for r in range(self._num_records): + self.assertEqual(self._record(f, r), sess.run(get_next_op)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + sess.run(save_op) + + with ops.Graph().as_default() as g: + init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( + num_epochs=num_epochs) + with self.test_session(graph=g) as sess: + sess.run(restore_op) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + class TFRecordDatasetTest(test.TestCase): -- GitLab From ba46a05afa45293e20c305cafc466c5c8a29517c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 7 Nov 2017 22:41:22 -0800 Subject: [PATCH 0173/1801] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 174962378 --- tensorflow/go/op/wrappers.go | 411 +++++++++++++++++++++++------------ 1 file changed, 277 insertions(+), 134 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 4e5d17f76f..bdfad48567 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -3983,41 +3983,6 @@ func TensorArrayWriteV2(scope *Scope, handle tf.Output, index tf.Output, value t return op.Output(0) } -// Identity op for gradient debugging. -// -// This op is hidden from public in Python. It is used by TensorFlow Debugger to -// register gradient tensors for gradient debugging. -func DebugGradientIdentity(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DebugGradientIdentity", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Deprecated. Use TensorArrayGradV3 -func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"source": source} - opspec := tf.OpSpec{ - Type: "TensorArrayGradV2", - Input: []tf.Input{ - handle, flow_in, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Get the current size of the TensorArray. // // Arguments: @@ -4551,31 +4516,6 @@ func QueueCloseV2(scope *Scope, handle tf.Output, optional ...QueueCloseV2Attr) return scope.AddOperation(opspec) } -// Concatenates tensors along one dimension. -// -// Arguments: -// values: List of `N` Tensors to concatenate. Their ranks and types must match, -// and their sizes must match in all dimensions except `concat_dim`. -// axis: 0-D. The dimension along which to concatenate. Must be in the -// range [-rank(values), rank(values)). -// -// Returns A `Tensor` with the concatenation of values stacked along the -// `concat_dim` dimension. This tensor's shape matches that of `values` except -// in `concat_dim` where it has the sum of the sizes. -func ConcatV2(scope *Scope, values []tf.Output, axis tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ConcatV2", - Input: []tf.Input{ - tf.OutputList(values), axis, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // QueueDequeueUpToV2Attr is an optional argument to QueueDequeueUpToV2. type QueueDequeueUpToV2Attr func(optionalAttr) @@ -4992,80 +4932,6 @@ func PriorityQueueV2(scope *Scope, shapes []tf.Shape, optional ...PriorityQueueV return op.Output(0) } -// FIFOQueueV2Attr is an optional argument to FIFOQueueV2. -type FIFOQueueV2Attr func(optionalAttr) - -// FIFOQueueV2Shapes sets the optional shapes attribute to value. -// -// value: The shape of each component in a value. The length of this attr must -// be either 0 or the same as the length of component_types. If the length of -// this attr is 0, the shapes of queue elements are not constrained, and -// only one element may be dequeued at a time. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func FIFOQueueV2Shapes(value []tf.Shape) FIFOQueueV2Attr { - return func(m optionalAttr) { - m["shapes"] = value - } -} - -// FIFOQueueV2Capacity sets the optional capacity attribute to value. -// -// value: The upper bound on the number of elements in this queue. -// Negative numbers mean no limit. -// If not specified, defaults to -1 -func FIFOQueueV2Capacity(value int64) FIFOQueueV2Attr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// FIFOQueueV2Container sets the optional container attribute to value. -// -// value: If non-empty, this queue is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func FIFOQueueV2Container(value string) FIFOQueueV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// FIFOQueueV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this queue will be shared under the given name -// across multiple sessions. -// If not specified, defaults to "" -func FIFOQueueV2SharedName(value string) FIFOQueueV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// A queue that produces elements in first-in first-out order. -// -// Arguments: -// component_types: The type of each component in a value. -// -// Returns The handle to the queue. -func FIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...FIFOQueueV2Attr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"component_types": component_types} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FIFOQueueV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // StridedSliceAttr is an optional argument to StridedSlice. type StridedSliceAttr func(optionalAttr) @@ -5445,6 +5311,101 @@ func DynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged return op.Output(0) } +// FIFOQueueV2Attr is an optional argument to FIFOQueueV2. +type FIFOQueueV2Attr func(optionalAttr) + +// FIFOQueueV2Shapes sets the optional shapes attribute to value. +// +// value: The shape of each component in a value. The length of this attr must +// be either 0 or the same as the length of component_types. If the length of +// this attr is 0, the shapes of queue elements are not constrained, and +// only one element may be dequeued at a time. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func FIFOQueueV2Shapes(value []tf.Shape) FIFOQueueV2Attr { + return func(m optionalAttr) { + m["shapes"] = value + } +} + +// FIFOQueueV2Capacity sets the optional capacity attribute to value. +// +// value: The upper bound on the number of elements in this queue. +// Negative numbers mean no limit. +// If not specified, defaults to -1 +func FIFOQueueV2Capacity(value int64) FIFOQueueV2Attr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// FIFOQueueV2Container sets the optional container attribute to value. +// +// value: If non-empty, this queue is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func FIFOQueueV2Container(value string) FIFOQueueV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// FIFOQueueV2SharedName sets the optional shared_name attribute to value. +// +// value: If non-empty, this queue will be shared under the given name +// across multiple sessions. +// If not specified, defaults to "" +func FIFOQueueV2SharedName(value string) FIFOQueueV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// A queue that produces elements in first-in first-out order. +// +// Arguments: +// component_types: The type of each component in a value. +// +// Returns The handle to the queue. +func FIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...FIFOQueueV2Attr) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"component_types": component_types} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "FIFOQueueV2", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Converts the given `resource_handle` representing an iterator to a variant tensor. +// +// Arguments: +// resource_handle: A handle to an iterator resource. +// +// Returns A variant tensor storing the state of the iterator contained in the +// resource. +func SerializeIterator(scope *Scope, resource_handle tf.Output) (serialized tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SerializeIterator", + Input: []tf.Input{ + resource_handle, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Return a tensor with the same shape and contents as the input tensor or value. func Identity(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { @@ -5576,6 +5537,39 @@ func IteratorToStringHandle(scope *Scope, resource_handle tf.Output) (string_han return op.Output(0) } +// Outputs the single element from the given dataset. +// +// Arguments: +// dataset: A handle to a dataset that contains a single element. +// +// +// +// Returns The components of the single element of `input`. +func DatasetToSingleElement(scope *Scope, dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "DatasetToSingleElement", + Input: []tf.Input{ + dataset, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if components, idx, err = makeOutputList(op, idx, "components"); err != nil { + scope.UpdateErr("DatasetToSingleElement", err) + return + } + return components +} + // Gets the next output from the given iterator. func IteratorGetNext(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { if scope.Err() != nil { @@ -5696,6 +5690,30 @@ func FixedLengthRecordDataset(scope *Scope, filenames tf.Output, header_bytes tf return op.Output(0) } +// Creates a dataset that executes a SQL query and emits rows of the result set. +// +// Arguments: +// driver_name: The database type. Currently, the only supported type is 'sqlite'. +// data_source_name: A connection string to connect to the database. +// query: A SQL query to execute. +// +// +func SqlDataset(scope *Scope, driver_name tf.Output, data_source_name tf.Output, query tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "SqlDataset", + Input: []tf.Input{ + driver_name, data_source_name, query, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // PlaceholderAttr is an optional argument to Placeholder. type PlaceholderAttr func(optionalAttr) @@ -5766,6 +5784,68 @@ func CacheDataset(scope *Scope, input_dataset tf.Output, filename tf.Output, out return op.Output(0) } +// Identity op for gradient debugging. +// +// This op is hidden from public in Python. It is used by TensorFlow Debugger to +// register gradient tensors for gradient debugging. +func DebugGradientIdentity(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "DebugGradientIdentity", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Deprecated. Use TensorArrayGradV3 +func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"source": source} + opspec := tf.OpSpec{ + Type: "TensorArrayGradV2", + Input: []tf.Input{ + handle, flow_in, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset that yields a SparseTensor for each element of the input. +// +// Arguments: +// input_dataset: A handle to an input dataset. Must have a single component. +// batch_size: A scalar representing the number of elements to accumulate in a +// batch. +// row_shape: A vector representing the dense shape of each row in the produced +// SparseTensor. The shape may be partially specified, using `-1` to indicate +// that a particular dimension should use the maximum size of all batch elements. +// +// +func DenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, row_shape tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "DenseToSparseBatchDataset", + Input: []tf.Input{ + input_dataset, batch_size, row_shape, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Creates a dataset that batches and pads `batch_size` elements from the input. // // Arguments: @@ -5826,6 +5906,69 @@ func TensorArrayConcatV2(scope *Scope, handle tf.Output, flow_in tf.Output, dtyp return op.Output(0), op.Output(1) } +// Converts the given variant tensor to an iterator and stores it in the given resource. +// +// Arguments: +// resource_handle: A handle to an iterator resource. +// serialized: A variant tensor storing the state of the iterator contained in the +// resource. +// +// Returns the created operation. +func DeserializeIterator(scope *Scope, resource_handle tf.Output, serialized tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "DeserializeIterator", + Input: []tf.Input{ + resource_handle, serialized, + }, + } + return scope.AddOperation(opspec) +} + +// Concatenates tensors along one dimension. +// +// Arguments: +// values: List of `N` Tensors to concatenate. Their ranks and types must match, +// and their sizes must match in all dimensions except `concat_dim`. +// axis: 0-D. The dimension along which to concatenate. Must be in the +// range [-rank(values), rank(values)). +// +// Returns A `Tensor` with the concatenation of values stacked along the +// `concat_dim` dimension. This tensor's shape matches that of `values` except +// in `concat_dim` where it has the sum of the sizes. +func ConcatV2(scope *Scope, values []tf.Output, axis tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ConcatV2", + Input: []tf.Input{ + tf.OutputList(values), axis, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset that contains the elements of `input_dataset` ignoring errors. +func IgnoreErrorsDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "IgnoreErrorsDataset", + Input: []tf.Input{ + input_dataset, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Creates a dataset that concatenates `input_dataset` with `another_dataset`. func ConcatenateDataset(scope *Scope, input_dataset tf.Output, another_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { -- GitLab From a9e3905de7d44d33efb056d292c9faa1006cb740 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 7 Nov 2017 23:19:03 -0800 Subject: [PATCH 0174/1801] Update ops-related pbtxt files. PiperOrigin-RevId: 174964560 --- tensorflow/core/ops/ops.pbtxt | 362 ++++++++++++++++++++++++++++++++++ 1 file changed, 362 insertions(+) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index d35decc182..8353b45e22 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -6058,6 +6058,32 @@ op { summary: "Compute the cumulative sum of the tensor `x` along `axis`." description: "By default, this op performs an inclusive cumsum, which means that the first\nelement of the input is identical to the first element of the output:\n\n```python\ntf.cumsum([a, b, c]) # => [a, a + b, a + b + c]\n```\n\nBy setting the `exclusive` kwarg to `True`, an exclusive cumsum is\nperformed instead:\n\n```python\ntf.cumsum([a, b, c], exclusive=True) # => [0, a, a + b]\n```\n\nBy setting the `reverse` kwarg to `True`, the cumsum is performed in the\nopposite direction:\n\n```python\ntf.cumsum([a, b, c], reverse=True) # => [a + b + c, b + c, c]\n```\n\nThis is more efficient than using separate `tf.reverse` ops.\n\nThe `reverse` and `exclusive` kwargs can also be combined:\n\n```python\ntf.cumsum([a, b, c], exclusive=True, reverse=True) # => [b + c, c, 0]\n```" } +op { + name: "DatasetToSingleElement" + input_arg { + name: "dataset" + description: "A handle to a dataset that contains a single element." + type: DT_VARIANT + } + output_arg { + name: "components" + description: "The components of the single element of `input`." + type_list_attr: "output_types" + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Outputs the single element from the given dataset." +} op { name: "DebugGradientIdentity" input_arg { @@ -6689,6 +6715,41 @@ op { summary: "Applies set operation along last dimension of 2 `Tensor` inputs." description: "See SetOperationOp::SetOperationFromContext for values of `set_operation`.\n\nOutput `result` is a `SparseTensor` represented by `result_indices`,\n`result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this\nhas rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`\ndimension contains the result of `set_operation` applied to the corresponding\n`[0...n-1]` dimension of `set`." } +op { + name: "DenseToSparseBatchDataset" + input_arg { + name: "input_dataset" + description: "A handle to an input dataset. Must have a single component." + type: DT_VARIANT + } + input_arg { + name: "batch_size" + description: "A scalar representing the number of elements to accumulate in a\nbatch." + type: DT_INT64 + } + input_arg { + name: "row_shape" + description: "A vector representing the dense shape of each row in the produced\nSparseTensor. The shape may be partially specified, using `-1` to indicate\nthat a particular dimension should use the maximum size of all batch elements." + type: DT_INT64 + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Creates a dataset that yields a SparseTensor for each element of the input." +} op { name: "DenseToSparseSetOperation" input_arg { @@ -7028,6 +7089,21 @@ op { summary: "Dequantize the \'input\' tensor into a float Tensor." description: "[min_range, max_range] are scalar floats that specify the range for\nthe \'input\' data. The \'mode\' attribute controls exactly which calculations are\nused to convert the float values to their quantized equivalents.\n\nIn \'MIN_COMBINED\' mode, each value of the tensor will undergo the following:\n\n```\nif T == qint8, in[i] += (range(T) + 1)/ 2.0\nout[i] = min_range + (in[i]* (max_range - min_range) / range(T))\n```\nhere `range(T) = numeric_limits::max() - numeric_limits::min()`\n\n*MIN_COMBINED Mode Example*\n\nIf the input comes from a QuantizedRelu6, the output type is\nquint8 (range of 0-255) but the possible range of QuantizedRelu6 is\n0-6. The min_range and max_range values are therefore 0.0 and 6.0.\nDequantize on quint8 will take each value, cast to float, and multiply\nby 6 / 255.\nNote that if quantizedtype is qint8, the operation will additionally add\neach value by 128 prior to casting.\n\nIf the mode is \'MIN_FIRST\', then this approach is used:\n\n```c++\nnum_discrete_values = 1 << (# of bits in T)\nrange_adjust = num_discrete_values / (num_discrete_values - 1)\nrange = (range_max - range_min) * range_adjust\nrange_scale = range / num_discrete_values\nconst double offset_input = static_cast(input) - lowest_quantized;\nresult = range_min + ((input - numeric_limits::min()) * range_scale)\n```\n\n*SCALED mode Example*\n\n`SCALED` mode matches the quantization approach used in\n`QuantizeAndDequantize{V2|V3}`.\n\nIf the mode is `SCALED`, we do not use the full range of the output type,\nchoosing to elide the lowest possible value for symmetry (e.g., output range is\n-127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to\n0.\n\nWe first find the range of values in our tensor. The\nrange we use is always centered on 0, so we find m such that\n```c++\n m = max(abs(input_min), abs(input_max))\n```\n\nOur input tensor range is then `[-m, m]`.\n\nNext, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`.\nIf T is signed, this is\n```\n num_bits = sizeof(T) * 8\n [min_fixed, max_fixed] =\n [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1]\n```\n\nOtherwise, if T is unsigned, the fixed-point range is\n```\n [min_fixed, max_fixed] = [0, (1 << num_bits) - 1]\n```\n\nFrom this we compute our scaling factor, s:\n```c++\n s = (2 * m) / (max_fixed - min_fixed)\n```\n\nNow we can dequantize the elements of our tensor:\n```c++\nresult = input * s\n```" } +op { + name: "DeserializeIterator" + input_arg { + name: "resource_handle" + description: "A handle to an iterator resource." + type: DT_RESOURCE + } + input_arg { + name: "serialized" + description: "A variant tensor storing the state of the iterator contained in the\nresource." + type: DT_VARIANT + } + summary: "Converts the given variant tensor to an iterator and stores it in the given resource." + is_stateful: true +} op { name: "DeserializeManySparse" input_arg { @@ -10142,6 +10218,71 @@ op { summary: "Returns the truth value of (x >= y) element-wise." description: "*NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)" } +op { + name: "GroupByWindowDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "key_func_other_arguments" + type_list_attr: "Tkey_func_other_arguments" + } + input_arg { + name: "reduce_func_other_arguments" + type_list_attr: "Treduce_func_other_arguments" + } + input_arg { + name: "window_size_func_other_arguments" + type_list_attr: "Twindow_size_func_other_arguments" + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "key_func" + type: "func" + description: "A function mapping an element of `input_dataset`, concatenated\nwith `key_func_other_arguments` to a scalar value of type DT_INT64." + } + attr { + name: "reduce_func" + type: "func" + } + attr { + name: "window_size_func" + type: "func" + } + attr { + name: "Tkey_func_other_arguments" + type: "list(type)" + has_minimum: true + } + attr { + name: "Treduce_func_other_arguments" + type: "list(type)" + has_minimum: true + } + attr { + name: "Twindow_size_func_other_arguments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Creates a dataset that computes a windowed group-by on `input_dataset`." + description: "// TODO(mrry): Support non-int64 keys." +} op { name: "HSVToRGB" input_arg { @@ -10602,6 +10743,30 @@ op { summary: "Compute the upper regularized incomplete Gamma function `Q(a, x)`." description: "The upper regularized incomplete Gamma function is defined as:\n\n\\\\(Q(a, x) = Gamma(a, x) / Gamma(a) = 1 - P(a, x)\\\\)\n\nwhere\n\n\\\\(Gamma(a, x) = int_{x}^{\\infty} t^{a-1} exp(-t) dt\\\\)\n\nis the upper incomplete Gama function.\n\nNote, above `P(a, x)` (`Igamma`) is the lower regularized complete\nGamma function." } +op { + name: "IgnoreErrorsDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Creates a dataset that contains the elements of `input_dataset` ignoring errors." +} op { name: "Imag" input_arg { @@ -12373,6 +12538,54 @@ op { description: "This operation may be executed multiple times. Each execution will reset the\niterator in `iterator` to the first element of `dataset`." is_stateful: true } +op { + name: "MapAndBatchDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + input_arg { + name: "batch_size" + description: "A scalar representing the number of elements to accumulate in a\nbatch. It determines the number of concurrent invocations of `f` that process\nelements from `input_dataset` in parallel." + type: DT_INT64 + } + input_arg { + name: "num_parallel_batches" + description: "A scalar representing the number of batches to create in\nparallel. Processing multiple batches in parallel benefits workloads prone to\nstragglers." + type: DT_INT64 + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Creates a dataset that applies `f` to the outputs of `input_dataset` and then" + description: "batches `batch_size` of them.\n\nUnlike a \"MapDataset\", which applies `f` sequentially, this dataset invokes up\nto `batch_size * num_parallel_batches` copies of `f` in parallel." +} op { name: "MapClear" attr { @@ -16043,6 +16256,57 @@ op { summary: "Interleave the values from the `data` tensors into a single tensor." description: "Builds a merged tensor such that\n\n```python\n merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]\n```\n\nFor example, if each `indices[m]` is scalar or vector, we have\n\n```python\n # Scalar indices:\n merged[indices[m], ...] = data[m][...]\n\n # Vector indices:\n merged[indices[m][i], ...] = data[m][i, ...]\n```\n\nEach `data[i].shape` must start with the corresponding `indices[i].shape`,\nand the rest of `data[i].shape` must be constant w.r.t. `i`. That is, we\nmust have `data[i].shape = indices[i].shape + constant`. In terms of this\n`constant`, the output shape is\n\n merged.shape = [max(indices)] + constant\n\nValues may be merged in parallel, so if an index appears in both `indices[m][i]`\nand `indices[n][j]`, the result may be invalid. This differs from the normal\nDynamicStitch operator that defines the behavior in that case.\n\nFor example:\n\n```python\n indices[0] = 6\n indices[1] = [4, 1]\n indices[2] = [[5, 2], [0, 3]]\n data[0] = [61, 62]\n data[1] = [[41, 42], [11, 12]]\n data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]\n merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],\n [51, 52], [61, 62]]\n```\n\nThis method can be used to merge partitions created by `dynamic_partition`\nas illustrated on the following example:\n\n```python\n # Apply function (increments x_i) on elements for which a certain condition\n # apply (x_i != -1 in this example).\n x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])\n condition_mask=tf.not_equal(x,tf.constant(-1.))\n partitioned_data = tf.dynamic_partition(\n x, tf.cast(condition_mask, tf.int32) , 2)\n partitioned_data[1] = partitioned_data[1] + 1.0\n condition_indices = tf.dynamic_partition(\n tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)\n x = tf.dynamic_stitch(condition_indices, partitioned_data)\n # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain\n # unchanged.\n```\n\n
\n\n
" } +op { + name: "ParallelInterleaveDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + input_arg { + name: "cycle_length" + type: DT_INT64 + } + input_arg { + name: "block_length" + type: DT_INT64 + } + input_arg { + name: "sloppy" + type: DT_BOOL + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + description: "A function mapping elements of `input_dataset`, concatenated with\n`other_arguments`, to a Dataset variant that contains elements matching\n`output_types` and `output_shapes`." + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Creates a dataset that applies `f` to the outputs of `input_dataset`." + description: "The resulting dataset is similar to the `InterleaveDataset`, with the exception\nthat if retrieving the next value from a dataset would cause the requester to\nblock, it will skip that input dataset. This dataset is especially useful\nwhen loading data from a variable-latency datastores (e.g. HDFS, GCS), as it\nallows the training step to proceed so long as some data is available.\n\n!! WARNING !! This dataset is not deterministic!" +} op { name: "ParallelMapDataset" input_arg { @@ -23850,6 +24114,53 @@ op { summary: "Outputs a `Summary` protocol buffer with scalar values." description: "The input `tags` and `values` must have the same shape. The generated summary\nhas a summary value for each tag-value pair in `tags` and `values`." } +op { + name: "ScanDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "initial_state" + type_list_attr: "Tstate" + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + } + attr { + name: "Tstate" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Creates a dataset successively reduces `f` over the elements of `input_dataset`." +} op { name: "ScatterAdd" input_arg { @@ -25044,6 +25355,21 @@ op { } summary: "Computes gradients for the scaled exponential linear (Selu) operation." } +op { + name: "SerializeIterator" + input_arg { + name: "resource_handle" + description: "A handle to an iterator resource." + type: DT_RESOURCE + } + output_arg { + name: "serialized" + description: "A variant tensor storing the state of the iterator contained in the\nresource." + type: DT_VARIANT + } + summary: "Converts the given `resource_handle` representing an iterator to a variant tensor." + is_stateful: true +} op { name: "SerializeManySparse" input_arg { @@ -28954,6 +29280,42 @@ op { } summary: "Splits a tensor into `num_split` tensors along one dimension." } +op { + name: "SqlDataset" + input_arg { + name: "driver_name" + description: "The database type. Currently, the only supported type is \'sqlite\'." + type: DT_STRING + } + input_arg { + name: "data_source_name" + description: "A connection string to connect to the database." + type: DT_STRING + } + input_arg { + name: "query" + description: "A SQL query to execute." + type: DT_STRING + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Creates a dataset that executes a SQL query and emits rows of the result set." + is_stateful: true +} op { name: "Sqrt" input_arg { -- GitLab From 6b753f33c99ae6010bfc7ec6b751a8e0a1bcdfab Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 02:25:01 -0800 Subject: [PATCH 0175/1801] Also register string types if __ANDROID_TYPES_FULL__ is defined PiperOrigin-RevId: 174979678 --- tensorflow/contrib/makefile/tf_op_files.txt | 18 +++++++++++++++ tensorflow/core/framework/register_types.h | 5 +++-- tensorflow/core/kernels/BUILD | 25 +++++++++++++++++++++ tensorflow/core/kernels/concat_lib_cpu.cc | 9 +++++--- 4 files changed, 52 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt index 8b77c99cb5..5f06106c1d 100644 --- a/tensorflow/contrib/makefile/tf_op_files.txt +++ b/tensorflow/contrib/makefile/tf_op_files.txt @@ -8,6 +8,7 @@ tensorflow/core/kernels/xent_op.cc tensorflow/core/kernels/where_op.cc tensorflow/core/kernels/variable_ops.cc tensorflow/core/kernels/unpack_op.cc +tensorflow/core/kernels/unique_op.cc tensorflow/core/kernels/transpose_op.cc tensorflow/core/kernels/transpose_functor_cpu.cc tensorflow/core/kernels/training_op_helpers.cc @@ -41,6 +42,9 @@ tensorflow/core/kernels/spectrogram_op.cc tensorflow/core/kernels/spectrogram.cc tensorflow/core/kernels/sparse_to_dense_op.cc tensorflow/core/kernels/sparse_matmul_op.cc +tensorflow/core/kernels/sparse_fill_empty_rows_op.cc +tensorflow/core/kernels/sparse_reshape_op.c +tensorflow/core/kernels/segment_reduction_ops.cc tensorflow/core/kernels/softsign_op.cc tensorflow/core/kernels/softplus_op.cc tensorflow/core/kernels/softmax_op.cc @@ -109,6 +113,10 @@ tensorflow/core/kernels/maxpooling_op.cc tensorflow/core/kernels/matmul_op.cc tensorflow/core/kernels/lrn_op.cc tensorflow/core/kernels/logging_ops.cc +tensorflow/core/kernels/initializable_lookup_table.c +tensorflow/core/kernels/lookup_table_init_op.cc +tensorflow/core/kernels/lookup_table_op.cc +tensorflow/core/kernels/lookup_util.cc tensorflow/core/kernels/inplace_ops.cc tensorflow/core/kernels/in_topk_op.cc tensorflow/core/kernels/immutable_constant_op.cc @@ -116,10 +124,18 @@ tensorflow/core/kernels/identity_op.cc tensorflow/core/kernels/identity_n_op.cc tensorflow/core/kernels/gather_op.cc tensorflow/core/kernels/gather_functor.cc +tensorflow/core/kernels/gather_nd_op.cc +tensorflow/core/kernels/gather_nd_op_cpu_impl_0.cc +tensorflow/core/kernels/gather_nd_op_cpu_impl_1.cc +tensorflow/core/kernels/gather_nd_op_cpu_impl_2.cc +tensorflow/core/kernels/gather_nd_op_cpu_impl_3.cc +tensorflow/core/kernels/gather_nd_op_cpu_impl_4.cc +tensorflow/core/kernels/gather_nd_op_cpu_impl_5.cc tensorflow/core/kernels/fused_batch_norm_op.cc tensorflow/core/kernels/function_ops.cc tensorflow/core/kernels/fill_functor.cc tensorflow/core/kernels/fifo_queue.cc +tensorflow/core/kernels/fifo_queue_op.cc tensorflow/core/kernels/fake_quant_ops.cc tensorflow/core/kernels/example_parsing_ops.cc tensorflow/core/kernels/encode_wav_op.cc @@ -166,6 +182,8 @@ tensorflow/core/kernels/cwise_op_floor.cc tensorflow/core/kernels/cwise_op_exp.cc tensorflow/core/kernels/cwise_op_equal_to_2.cc tensorflow/core/kernels/cwise_op_equal_to_1.cc +tensorflow/core/kernels/cwise_op_not_equal_to_2.cc +tensorflow/core/kernels/cwise_op_not_equal_to_1.cc tensorflow/core/kernels/cwise_op_div.cc tensorflow/core/kernels/cwise_op_bitwise_xor.cc tensorflow/core/kernels/cwise_op_bitwise_or.cc diff --git a/tensorflow/core/framework/register_types.h b/tensorflow/core/framework/register_types.h index c31ab18cc1..4bb37e4f6e 100644 --- a/tensorflow/core/framework/register_types.h +++ b/tensorflow/core/framework/register_types.h @@ -87,7 +87,8 @@ limitations under the License. #elif defined(__ANDROID_TYPES_FULL__) -// Only half, float, int32, int64, bool, and quantized types are supported. +// Only string, half, float, int32, int64, bool, and quantized types +// supported. #define TF_CALL_float(m) m(float) #define TF_CALL_double(m) #define TF_CALL_int32(m) m(::tensorflow::int32) @@ -96,7 +97,7 @@ limitations under the License. #define TF_CALL_int16(m) #define TF_CALL_int8(m) -#define TF_CALL_string(m) +#define TF_CALL_string(m) m(string) #define TF_CALL_resource(m) #define TF_CALL_variant(m) #define TF_CALL_complex64(m) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 34cd51ba66..6206963251 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -4420,6 +4420,15 @@ filegroup( "fill_functor.h", "function_ops.cc", "gather_functor.h", + "gather_nd_op.cc", + "gather_nd_op.h", + "gather_nd_op_cpu_impl.h", + "gather_nd_op_cpu_impl_0.cc", + "gather_nd_op_cpu_impl_1.cc", + "gather_nd_op_cpu_impl_2.cc", + "gather_nd_op_cpu_impl_3.cc", + "gather_nd_op_cpu_impl_4.cc", + "gather_nd_op_cpu_impl_5.cc", "gather_op.cc", "identity_n_op.cc", "identity_n_op.h", @@ -4513,6 +4522,10 @@ filegroup( "fused_batch_norm_op.h", "gemm_functors.h", "image_resizer_state.h", + "initializable_lookup_table.h", + "lookup_table_init_op.h", + "lookup_table_op.h", + "lookup_util.h", "maxpooling_op.h", "mfcc.h", "mfcc_dct.h", @@ -4529,6 +4542,7 @@ filegroup( "resize_nearest_neighbor_op.h", "reverse_op.h", "save_restore_tensor.h", + "segment_reduction_ops.h", "softplus_op.h", "softsign_op.h", "spacetobatch_functor.h", @@ -4578,6 +4592,8 @@ filegroup( "cwise_op_div.cc", "cwise_op_equal_to_1.cc", "cwise_op_equal_to_2.cc", + "cwise_op_not_equal_to_1.cc", + "cwise_op_not_equal_to_2.cc", "cwise_op_exp.cc", "cwise_op_floor.cc", "cwise_op_floor_div.cc", @@ -4619,6 +4635,7 @@ filegroup( "encode_wav_op.cc", "fake_quant_ops.cc", "fifo_queue.cc", + "fifo_queue_op.cc", "fused_batch_norm_op.cc", "population_count_op.cc", "population_count_op.h", @@ -4642,7 +4659,11 @@ filegroup( "depthtospace_op.cc", "dynamic_stitch_op.cc", "in_topk_op.cc", + "initializable_lookup_table.cc", "logging_ops.cc", + "lookup_table_init_op.cc", + "lookup_table_op.cc", + "lookup_util.cc", "lrn_op.cc", "maxpooling_op.cc", "mfcc.cc", @@ -4677,12 +4698,15 @@ filegroup( "save_op.cc", "save_restore_tensor.cc", "save_restore_v2_ops.cc", + "segment_reduction_ops.cc", "session_ops.cc", "softplus_op.cc", "softsign_op.cc", "spacetobatch_functor.cc", "spacetobatch_op.cc", "spacetodepth_op.cc", + "sparse_fill_empty_rows_op.cc", + "sparse_reshape_op.cc", "sparse_to_dense_op.cc", "spectrogram.cc", "spectrogram_op.cc", @@ -4705,6 +4729,7 @@ filegroup( "training_ops.cc", "transpose_functor_cpu.cc", "transpose_op.cc", + "unique_op.cc", "warn_about_ints.cc", "where_op.cc", "xent_op.cc", diff --git a/tensorflow/core/kernels/concat_lib_cpu.cc b/tensorflow/core/kernels/concat_lib_cpu.cc index 258ce15456..b0bec0c5dc 100644 --- a/tensorflow/core/kernels/concat_lib_cpu.cc +++ b/tensorflow/core/kernels/concat_lib_cpu.cc @@ -74,11 +74,14 @@ REGISTER(qint16) REGISTER(qint32) REGISTER(bfloat16) -#if defined(IS_MOBILE_PLATFORM) && !defined(SUPPORT_SELECTIVE_REGISTRATION) -// Primarily used for SavedModel support on mobile. +#if defined(IS_MOBILE_PLATFORM) && !defined(SUPPORT_SELECTIVE_REGISTRATION) && \ + !defined(__ANDROID_TYPES_FULL__) +// Primarily used for SavedModel support on mobile. Registering it here only if +// __ANDROID_TYPES_FULL__ is not defined, as that already register strings REGISTER(string); #endif // defined(IS_MOBILE_PLATFORM) && - // !defined(SUPPORT_SELECTIVE_REGISTRATION) + // !defined(SUPPORT_SELECTIVE_REGISTRATION) && + // !defined(__ANDROID_TYPES_FULL__) #ifdef TENSORFLOW_USE_SYCL template -- GitLab From 79ba0f0b8864eee4bf3530e492ba02bdc35b2937 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 03:09:46 -0800 Subject: [PATCH 0176/1801] Check GPU availability after creating test session. PiperOrigin-RevId: 174983466 --- tensorflow/contrib/nccl/python/ops/nccl_ops_test.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py index 0b13e3595e..bad0abd44c 100644 --- a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py +++ b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py @@ -72,14 +72,15 @@ class NcclTestCase(test.TestCase): two. device_sets: Tuple of virtual devices to run test on. """ - if not test.is_gpu_available(): - return # Test requires access to a GPU - for dtype in [np.float32, np.int32, np.int64, np.float64]: # Create session inside outer loop to test use of # same communicator across multiple sessions. with self.test_session(use_gpu=True) as sess: + # Check GPU availability *after* creating test session, see b/68975239. + if not test.is_gpu_available(): + return # Test requires access to a GPU + for devices in device_sets: shape = (3, 4) random = (np.random.random_sample(shape) - .5) * 1024 -- GitLab From c74d384091b46347a2d14ef30746001bb2f31aa3 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Wed, 8 Nov 2017 06:48:29 -0800 Subject: [PATCH 0177/1801] tfdbg: Add test for loading DebugDumpDir with a relative path PiperOrigin-RevId: 174999937 --- .../python/debug/wrappers/dumping_wrapper_test.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tensorflow/python/debug/wrappers/dumping_wrapper_test.py b/tensorflow/python/debug/wrappers/dumping_wrapper_test.py index d987ba84b5..eda5ecf508 100644 --- a/tensorflow/python/debug/wrappers/dumping_wrapper_test.py +++ b/tensorflow/python/debug/wrappers/dumping_wrapper_test.py @@ -111,6 +111,20 @@ class DumpingDebugWrapperSessionTest(test_util.TensorFlowTestCase): self.assertEqual(repr(self.inc_v), dump.run_fetches_info) self.assertEqual(repr(None), dump.run_feed_keys_info) + def testDumpingOnASingleRunWorksWithRelativePathForDebugDumpDir(self): + sess = dumping_wrapper.DumpingDebugWrapperSession( + self.sess, session_root=self.session_root, log_usage=False) + sess.run(self.inc_v) + dump_dirs = glob.glob(os.path.join(self.session_root, "run_*")) + cwd = os.getcwd() + try: + os.chdir(self.session_root) + dump = debug_data.DebugDumpDir( + os.path.relpath(dump_dirs[0], self.session_root)) + self.assertAllClose([10.0], dump.get_tensors("v", 0, "DebugIdentity")) + finally: + os.chdir(cwd) + def testDumpingOnASingleRunWithFeedDictWorks(self): sess = dumping_wrapper.DumpingDebugWrapperSession( self.sess, session_root=self.session_root, log_usage=False) -- GitLab From 15605b68f368753ae65476fbb172d293d997ec7d Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Wed, 8 Nov 2017 07:36:56 -0800 Subject: [PATCH 0178/1801] tfdbg: Fix a test bug hidden in a child thread PiperOrigin-RevId: 175004323 --- tensorflow/python/debug/wrappers/dumping_wrapper_test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/debug/wrappers/dumping_wrapper_test.py b/tensorflow/python/debug/wrappers/dumping_wrapper_test.py index eda5ecf508..acea9433e2 100644 --- a/tensorflow/python/debug/wrappers/dumping_wrapper_test.py +++ b/tensorflow/python/debug/wrappers/dumping_wrapper_test.py @@ -364,12 +364,14 @@ class DumpingDebugWrapperSessionTest(test_util.TensorFlowTestCase): thread_name_filter=r"MainThread$") self.assertAllClose(1.0, sess.run(self.delta)) + child_thread_result = [] def child_thread_job(): - sess.run(sess.run(self.eta)) + child_thread_result.append(sess.run(self.eta)) thread = threading.Thread(name="ChildThread", target=child_thread_job) thread.start() thread.join() + self.assertAllClose([-1.4], child_thread_result) dump_dirs = glob.glob(os.path.join(self.session_root, "run_*")) self.assertEqual(1, len(dump_dirs)) -- GitLab From d7fe124174adacb8c1a7479eaab5b5b628855277 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Wed, 8 Nov 2017 10:09:23 -0800 Subject: [PATCH 0179/1801] Fix incomplete spec of EagerTensor.numpy() PiperOrigin-RevId: 175023039 --- tensorflow/python/framework/ops.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 6469aca3ec..b256af2182 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -617,15 +617,16 @@ class _EagerTensorBase(Tensor): return dtypes._INTERN_TABLE[self._datatype_enum()] # pylint: disable=protected-access def numpy(self): - """Returns a numpy array with the same contents as the Tensor. + """Returns a numpy array or a scalar with the same contents as the Tensor. TODO(ashankar,agarwal): Perhaps this should NOT reference the underlying buffer but instead always explicitly copy? Note that currently it may or may not copy based on whether the numpy data is properly aligned or not. Returns: - A numpy array that may share memory with the Tensor object. Any changes - to one may be reflected in the other. + A numpy array or a scalar. Numpy array may share memory with the + Tensor object. Any changes to one may be reflected in the other. A scalar + value is returned when self has rank 0. Raises: ValueError: if the type of this Tensor is not representable in numpy. -- GitLab From 7d5f7d67db9288e52a3d0d2d12ed3c3ee7623c3e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 10:19:01 -0800 Subject: [PATCH 0180/1801] Do not return a mutable HloComputation* from a entry_computation() on a const HloModule*. PiperOrigin-RevId: 175024608 --- tensorflow/compiler/xla/service/buffer_assignment.cc | 10 +++++----- tensorflow/compiler/xla/service/hlo_module.h | 6 +++++- .../compiler/xla/service/interpreter/executable.cc | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index b422b22df9..c74f050f77 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -497,19 +497,19 @@ Status GatherComputationsByAllocationType( std::vector* global_computations) { // Create a worklist of computations paired with whether the allocation must // be thread-local. - std::deque> worklist; + std::deque> worklist; worklist.push_back(std::make_pair(module->entry_computation(), /*is_thread_local*/ false)); // Sets for quickly checking membership. Computations are returned in vectors // for stable iteration. - FlatSet thread_local_set; - FlatSet global_set; + FlatSet thread_local_set; + FlatSet global_set; while (!worklist.empty()) { auto worklist_front = worklist.front(); worklist.pop_front(); - HloComputation* computation = worklist_front.first; + const HloComputation* computation = worklist_front.first; bool is_thread_local = worklist_front.second; bool in_thread_local_set = thread_local_set.count(computation) > 0; bool in_global_set = global_set.count(computation) > 0; @@ -653,7 +653,7 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation, } if (allow_input_output_aliasing_ && allocation->maybe_live_out()) { - HloComputation* entry_computation = + const HloComputation* entry_computation = assignment->module_->entry_computation(); for (auto param : entry_computation->parameter_instructions()) { for (auto& param_buffer : diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h index 6469851791..5141e7bc8d 100644 --- a/tensorflow/compiler/xla/service/hlo_module.h +++ b/tensorflow/compiler/xla/service/hlo_module.h @@ -85,7 +85,11 @@ class HloModule { std::unique_ptr Clone(const string& suffix = "clone") const; // Return a pointer to the entry computation of the module.. - HloComputation* entry_computation() const { + const HloComputation* entry_computation() const { + CHECK_NE(nullptr, entry_computation_); + return entry_computation_; + } + HloComputation* entry_computation() { CHECK_NE(nullptr, entry_computation_); return entry_computation_; } diff --git a/tensorflow/compiler/xla/service/interpreter/executable.cc b/tensorflow/compiler/xla/service/interpreter/executable.cc index 86dee8462f..96f937caf9 100644 --- a/tensorflow/compiler/xla/service/interpreter/executable.cc +++ b/tensorflow/compiler/xla/service/interpreter/executable.cc @@ -89,7 +89,7 @@ StatusOr InterpreterExecutable::ExecuteOnStream( uint64 start_micros = tensorflow::Env::Default()->NowMicros(); - HloComputation* computation = module().entry_computation(); + const HloComputation* computation = module().entry_computation(); if (computation->num_parameters() != arguments.size()) { return tensorflow::errors::Internal( "Mismatch between argument count and graph parameter count."); -- GitLab From b02a2e0da62ededa960f1dcfb0633039297e8504 Mon Sep 17 00:00:00 2001 From: Thomas Schumm Date: Wed, 8 Nov 2017 10:43:48 -0800 Subject: [PATCH 0181/1801] HParams.set_hparam doesn't fully check types, contrary to its docstring. PiperOrigin-RevId: 175028981 --- .../training/python/training/hparam.py | 58 +++++++++++++++++-- .../training/python/training/hparam_test.py | 31 +++++++++- 2 files changed, 84 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/training/python/training/hparam.py b/tensorflow/contrib/training/python/training/hparam.py index 391899b34f..7db625cdd5 100644 --- a/tensorflow/contrib/training/python/training/hparam.py +++ b/tensorflow/contrib/training/python/training/hparam.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function import json +import numbers import re import six @@ -76,7 +77,7 @@ def _process_scalar_value(name, parse_fn, var_type, m_dict, values, function. Raises: - ValueError: If the name has already been sued. + ValueError: If the name has already been used. """ try: parsed_value = parse_fn(m_dict['val']) @@ -138,6 +139,54 @@ def _process_list_value(name, parse_fn, var_type, m_dict, values, _parse_fail(name, var_type, m_dict['vals'], values) +def _cast_to_type_if_compatible(name, param_type, value): + """Cast hparam to the provided type, if compatible. + + Args: + name: Name of the hparam to be cast. + param_type: The type of the hparam. + value: The value to be cast, if compatible. + + Returns: + The result of casting `value` to `param_type`. + + Raises: + ValueError: If the type of `value` is not compatible with param_type. + * If `param_type` is a string type, but `value` is not. + * If `param_type` is a boolean, but `value` is not, or vice versa. + * If `param_type` is an integer type, but `value` is not. + * If `param_type` is a float type, but `value` is not a numeric type. + """ + fail_msg = ( + "Could not cast hparam '%s' of type '%s' from value %r" % + (name, param_type, value)) + + # Some callers use None, for which we can't do any casting/checking. :( + if issubclass(param_type, type(None)): + return value + + # Avoid converting a non-string type to a string. + if (issubclass(param_type, (six.string_types, six.binary_type)) and + not isinstance(value, (six.string_types, six.binary_type))): + raise ValueError(fail_msg) + + # Avoid converting a number or string type to a boolean or vice versa. + if issubclass(param_type, bool) != isinstance(value, bool): + raise ValueError(fail_msg) + + # Avoid converting float to an integer (the reverse is fine). + if (issubclass(param_type, numbers.Integral) and + not isinstance(value, numbers.Integral)): + raise ValueError(fail_msg) + + # Avoid converting a non-numeric type to a numeric type. + if (issubclass(param_type, numbers.Number) and + not isinstance(value, numbers.Number)): + raise ValueError(fail_msg) + + return param_type(value) + + def parse_values(values, type_map): """Parses hyperparameter values from a string into a python map. @@ -438,17 +487,18 @@ class HParams(object): Raises: ValueError: If there is a type mismatch. """ - _, is_list = self._hparam_types[name] + param_type, is_list = self._hparam_types[name] if isinstance(value, list): if not is_list: raise ValueError( 'Must not pass a list for single-valued parameter: %s' % name) - setattr(self, name, value) + setattr(self, name, [ + _cast_to_type_if_compatible(name, param_type, v) for v in value]) else: if is_list: raise ValueError( 'Must pass a list for multi-valued parameter: %s.' % name) - setattr(self, name, value) + setattr(self, name, _cast_to_type_if_compatible(name, param_type, value)) def parse(self, values): """Override hyperparameter values, parsing new values from a string. diff --git a/tensorflow/contrib/training/python/training/hparam_test.py b/tensorflow/contrib/training/python/training/hparam_test.py index f54514cefd..949c262f5b 100644 --- a/tensorflow/contrib/training/python/training/hparam_test.py +++ b/tensorflow/contrib/training/python/training/hparam_test.py @@ -318,13 +318,42 @@ class HParamsTest(test.TestCase): self.assertEqual(3.0, hparams.b) self.assertEqual('relu4', hparams.c_c) - def testSetHParamTypeMismatch(self): + def testSetHParamListNonListMismatch(self): hparams = hparam.HParams(a=1, b=[2.0, 3.0]) with self.assertRaisesRegexp(ValueError, r'Must not pass a list'): hparams.set_hparam('a', [1.0]) with self.assertRaisesRegexp(ValueError, r'Must pass a list'): hparams.set_hparam('b', 1.0) + def testSetHParamTypeMismatch(self): + hparams = hparam.HParams( + int_=1, str_='str', bool_=True, float_=1.1, list_int=[1, 2], none=None) + + with self.assertRaises(ValueError): + hparams.set_hparam('str_', 2.2) + + with self.assertRaises(ValueError): + hparams.set_hparam('int_', False) + + with self.assertRaises(ValueError): + hparams.set_hparam('bool_', 1) + + with self.assertRaises(ValueError): + hparams.set_hparam('int_', 2.2) + + with self.assertRaises(ValueError): + hparams.set_hparam('list_int', [2, 3.3]) + + with self.assertRaises(ValueError): + hparams.set_hparam('int_', '2') + + # Casting int to float is OK + hparams.set_hparam('float_', 1) + + # Getting stuck with NoneType :( + hparams.set_hparam('none', '1') + self.assertEqual('1', hparams.none) + def testNonProtoFails(self): with self.assertRaisesRegexp(AssertionError, ''): hparam.HParams(hparam_def=1) -- GitLab From 4a618e411af3f808eb0f65ce4f7151450f1f16a5 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 8 Nov 2017 10:52:49 -0800 Subject: [PATCH 0182/1801] [XLA] Print constant literals of size <= 8 elements. Previously we'd only print scalars. But if you have a constant with just a few values, what the heck, show the whole thing. PiperOrigin-RevId: 175030210 --- .../compiler/xla/service/hlo_graph_dumper.cc | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index fd162622ce..1c063c973d 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -761,12 +761,22 @@ string HloDotDumper::DumpInstruction(const HloInstruction* instr) { string HloDotDumper::GetInstructionNodeInlinedOperands( const HloInstruction* instr) { auto stringify_constant = [](const HloInstruction* constant) { - if (ShapeUtil::IsEffectiveScalar(constant->shape())) { - auto elem_idx = IndexUtil::LinearIndexToMultidimensionalIndex( - constant->shape(), /*linear_index=*/0); - return Printf("%s (%s)", constant->literal().GetAsString(elem_idx), + const auto& shape = constant->shape(); + + // Print the literal value of constants with <= K elements. + optional elem_count; + if (!ShapeUtil::IsOpaque(shape) && !ShapeUtil::IsTuple(shape)) { + elem_count = 1; + for (int64 dim : shape.dimensions()) { + *elem_count *= dim; + } + } + if (elem_count.has_value() && *elem_count <= 8) { + return Printf("%s (%s)", constant->literal().ToString(), ShapeUtil::HumanString(constant->shape())); } + + // Otherwise, print e.g. "%constant.42 (s32[100])". string constant_name; if (tensorflow::StringPiece(constant->name()).starts_with("%constant")) { constant_name = constant->name(); -- GitLab From 35cc51dc2a716c4b92429db60238e4f15fba1ed3 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Wed, 8 Nov 2017 10:55:48 -0800 Subject: [PATCH 0183/1801] Add database writer ops to contrib/summary PiperOrigin-RevId: 175030602 --- .../contrib/cmake/tf_core_framework.cmake | 3 + tensorflow/contrib/summary/BUILD | 6 + tensorflow/contrib/summary/summary.py | 2 + tensorflow/contrib/summary/summary_ops.py | 125 ++++++++++++++++-- .../contrib/summary/summary_ops_test.py | 110 +++++++++++++++ tensorflow/contrib/tensorboard/db/BUILD | 2 + .../tensorboard/db/summary_db_writer.cc | 34 ++++- .../tensorboard/db/summary_db_writer_test.cc | 56 +++++++- tensorflow/core/kernels/BUILD | 3 + tensorflow/core/kernels/summary_interface.cc | 4 +- tensorflow/core/kernels/summary_kernels.cc | 50 +++++++ tensorflow/core/ops/summary_ops.cc | 41 ++++++ 12 files changed, 419 insertions(+), 17 deletions(-) diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake index c3dc8531bb..c607546f4a 100644 --- a/tensorflow/contrib/cmake/tf_core_framework.cmake +++ b/tensorflow/contrib/cmake/tf_core_framework.cmake @@ -301,6 +301,8 @@ file(GLOB_RECURSE tf_core_framework_srcs "${tensorflow_source_dir}/tensorflow/core/common_runtime/session.cc" "${tensorflow_source_dir}/tensorflow/core/common_runtime/session_factory.cc" "${tensorflow_source_dir}/tensorflow/core/common_runtime/session_options.cc" + "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/db/*.cc" + "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/db/*.h" "${tensorflow_source_dir}/public/*.h" ) @@ -314,6 +316,7 @@ file(GLOB_RECURSE tf_core_framework_exclude_srcs "${tensorflow_source_dir}/tensorflow/core/util/*test*.h" "${tensorflow_source_dir}/tensorflow/core/util/*test*.cc" "${tensorflow_source_dir}/tensorflow/core/util/*main.cc" + "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/db/*test*.cc" ) list(REMOVE_ITEM tf_core_framework_srcs ${tf_core_framework_exclude_srcs}) diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD index da23f1c380..3c60d2bb56 100644 --- a/tensorflow/contrib/summary/BUILD +++ b/tensorflow/contrib/summary/BUILD @@ -26,12 +26,18 @@ py_test( deps = [ ":summary_ops", ":summary_test_util", + "//tensorflow/python:array_ops", "//tensorflow/python:errors", + "//tensorflow/python:framework", "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:ops", "//tensorflow/python:platform", + "//tensorflow/python:state_ops", "//tensorflow/python:training", "//tensorflow/python/eager:function", "//tensorflow/python/eager:test", + "@six_archive//:six", ], ) diff --git a/tensorflow/contrib/summary/summary.py b/tensorflow/contrib/summary/summary.py index ca82ea094c..813e8b2b09 100644 --- a/tensorflow/contrib/summary/summary.py +++ b/tensorflow/contrib/summary/summary.py @@ -28,11 +28,13 @@ from __future__ import print_function from tensorflow.contrib.summary.summary_ops import all_summary_ops from tensorflow.contrib.summary.summary_ops import always_record_summaries from tensorflow.contrib.summary.summary_ops import audio +from tensorflow.contrib.summary.summary_ops import create_summary_db_writer from tensorflow.contrib.summary.summary_ops import create_summary_file_writer from tensorflow.contrib.summary.summary_ops import eval_dir from tensorflow.contrib.summary.summary_ops import generic from tensorflow.contrib.summary.summary_ops import histogram from tensorflow.contrib.summary.summary_ops import image +from tensorflow.contrib.summary.summary_ops import import_event from tensorflow.contrib.summary.summary_ops import never_record_summaries from tensorflow.contrib.summary.summary_ops import record_summaries_every_n_global_steps from tensorflow.contrib.summary.summary_ops import scalar diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py index 9238671c4a..f6be99f6ae 100644 --- a/tensorflow/contrib/summary/summary_ops.py +++ b/tensorflow/contrib/summary/summary_ops.py @@ -19,7 +19,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import getpass import os +import re +import time + +import six from tensorflow.contrib.summary import gen_summary_ops from tensorflow.python.eager import context @@ -42,6 +47,10 @@ _SHOULD_RECORD_SUMMARIES_NAME = "ShouldRecordSummaries" _SUMMARY_COLLECTION_NAME = "_SUMMARY_V2" _SUMMARY_WRITER_INIT_COLLECTION_NAME = "_SUMMARY_WRITER_V2" +_EXPERIMENT_NAME_PATTERNS = re.compile(r"^[^\x00-\x1F<>]{0,256}$") +_RUN_NAME_PATTERNS = re.compile(r"^[^\x00-\x1F<>]{0,512}$") +_USER_NAME_PATTERNS = re.compile(r"^[a-z]([-a-z0-9]{0,29}[a-z0-9])?$", re.I) + def should_record_summaries(): """Returns boolean Tensor which is true if summaries should be recorded.""" @@ -132,7 +141,8 @@ def create_summary_file_writer(logdir, flush once the queue gets bigger than this. flush_millis: the largest interval between flushes. filename_suffix: optional suffix for the event file name. - name: name for the summary writer. + name: Shared name for this SummaryWriter resource stored to default + Graph. Returns: Either a summary writer or an empty object which can be used as a @@ -147,14 +157,81 @@ def create_summary_file_writer(logdir, flush_millis = constant_op.constant(2 * 60 * 1000) if filename_suffix is None: filename_suffix = constant_op.constant("") - resource = gen_summary_ops.summary_writer(shared_name=name) - # TODO(apassos) ensure the initialization op runs when in graph mode; - # consider calling session.run here. - ops.add_to_collection( - _SUMMARY_WRITER_INIT_COLLECTION_NAME, - gen_summary_ops.create_summary_file_writer( - resource, logdir, max_queue, flush_millis, filename_suffix)) - return SummaryWriter(resource) + return _make_summary_writer( + name, + gen_summary_ops.create_summary_file_writer, + logdir=logdir, + max_queue=max_queue, + flush_millis=flush_millis, + filename_suffix=filename_suffix) + + +def create_summary_db_writer(db_uri, + experiment_name=None, + run_name=None, + user_name=None, + name=None): + """Creates a summary database writer in the current context. + + This can be used to write tensors from the execution graph directly + to a database. Only SQLite is supported right now. This function + will create the schema if it doesn't exist. Entries in the Users, + Experiments, and Runs tables will be created automatically if they + don't already exist. + + Args: + db_uri: For example "file:/tmp/foo.sqlite". + experiment_name: Defaults to YYYY-MM-DD in local time if None. + Empty string means the Run will not be associated with an + Experiment. Can't contain ASCII control characters or <>. Case + sensitive. + run_name: Defaults to HH:MM:SS in local time if None. Empty string + means a Tag will not be associated with any Run. Can't contain + ASCII control characters or <>. Case sensitive. + user_name: Defaults to system username if None. Empty means the + Experiment will not be associated with a User. Must be valid as + both a DNS label and Linux username. + name: Shared name for this SummaryWriter resource stored to default + Graph. + + Returns: + A new SummaryWriter instance. + """ + with ops.device("cpu:0"): + if experiment_name is None: + experiment_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + if run_name is None: + run_name = time.strftime("%H:%M:%S", time.localtime(time.time())) + if user_name is None: + user_name = getpass.getuser() + experiment_name = _cleanse_string( + "experiment_name", _EXPERIMENT_NAME_PATTERNS, experiment_name) + run_name = _cleanse_string("run_name", _RUN_NAME_PATTERNS, run_name) + user_name = _cleanse_string("user_name", _USER_NAME_PATTERNS, user_name) + return _make_summary_writer( + name, + gen_summary_ops.create_summary_db_writer, + db_uri=db_uri, + experiment_name=experiment_name, + run_name=run_name, + user_name=user_name) + + +def _make_summary_writer(name, factory, **kwargs): + resource = gen_summary_ops.summary_writer(shared_name=name) + # TODO(apassos): Consider doing this instead. + # node = factory(resource, **kwargs) + # if not context.in_eager_mode(): + # ops.get_default_session().run(node) + ops.add_to_collection(_SUMMARY_WRITER_INIT_COLLECTION_NAME, + factory(resource, **kwargs)) + return SummaryWriter(resource) + + +def _cleanse_string(name, pattern, value): + if isinstance(value, six.string_types) and pattern.search(value) is None: + raise ValueError("%s (%s) must match %s" % (name, value, pattern.pattern)) + return ops.convert_to_tensor(value, dtypes.string) def _nothing(): @@ -206,16 +283,22 @@ def summary_writer_function(name, tensor, function, family=None): return op -def generic(name, tensor, metadata, family=None, global_step=None): +def generic(name, tensor, metadata=None, family=None, global_step=None): """Writes a tensor summary if possible.""" if global_step is None: global_step = training_util.get_global_step() def function(tag, scope): + if metadata is None: + serialized_metadata = constant_op.constant("") + elif hasattr(metadata, "SerializeToString"): + serialized_metadata = constant_op.constant(metadata.SerializeToString()) + else: + serialized_metadata = metadata # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_summary( context.context().summary_writer_resource, global_step, array_ops.identity(tensor), - tag, metadata, name=scope) + tag, serialized_metadata, name=scope) return summary_writer_function(name, tensor, function, family=family) @@ -284,6 +367,26 @@ def audio(name, tensor, sample_rate, max_outputs, family=None, return summary_writer_function(name, tensor, function, family=family) +def import_event(tensor, name=None): + """Writes a tf.Event binary proto. + + When using create_summary_db_writer(), this can be used alongside + tf.TFRecordReader to load event logs into the database. Please note + that this is lower level than the other summary functions and will + ignore any conditions set by methods like should_record_summaries(). + + Args: + tensor: A `Tensor` of type `string` containing a serialized `Event` + proto. + name: A name for the operation (optional). + + Returns: + The created Operation. + """ + return gen_summary_ops.import_event( + context.context().summary_writer_resource, tensor, name=name) + + def eval_dir(model_dir, name=None): """Construct a logdir for an eval summary writer.""" return os.path.join(model_dir, "eval" if not name else "eval_" + name) diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py index 466e194096..6e1a746815 100644 --- a/tensorflow/contrib/summary/summary_ops_test.py +++ b/tensorflow/contrib/summary/summary_ops_test.py @@ -17,14 +17,22 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import functools +import os import tempfile +import six +import sqlite3 + from tensorflow.contrib.summary import summary_ops from tensorflow.contrib.summary import summary_test_util from tensorflow.python.eager import function from tensorflow.python.eager import test +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import state_ops from tensorflow.python.platform import gfile from tensorflow.python.training import training_util @@ -99,5 +107,107 @@ class TargetTest(test_util.TensorFlowTestCase): self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'scalar') + +class DbTest(test_util.TensorFlowTestCase): + + def setUp(self): + self.db_path = os.path.join(self.get_temp_dir(), 'DbTest.sqlite') + if os.path.exists(self.db_path): + os.unlink(self.db_path) + self.db = sqlite3.connect(self.db_path) + self.create_summary_db_writer = functools.partial( + summary_ops.create_summary_db_writer, + db_uri=self.db_path, + experiment_name='experiment', + run_name='run', + user_name='user') + + def tearDown(self): + self.db.close() + + def testIntegerSummaries(self): + step = training_util.create_global_step() + + def adder(x, y): + state_ops.assign_add(step, 1) + summary_ops.generic('x', x) + summary_ops.generic('y', y) + sum_ = x + y + summary_ops.generic('sum', sum_) + return sum_ + + with summary_ops.always_record_summaries(): + with self.create_summary_db_writer().as_default(): + self.assertEqual(5, adder(int64(2), int64(3)).numpy()) + + six.assertCountEqual(self, [1, 1, 1], + get_all(self.db, 'SELECT step FROM Tensors')) + six.assertCountEqual(self, ['x', 'y', 'sum'], + get_all(self.db, 'SELECT tag_name FROM Tags')) + x_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "x"') + y_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "y"') + sum_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "sum"') + + with summary_ops.always_record_summaries(): + with self.create_summary_db_writer().as_default(): + self.assertEqual(9, adder(int64(4), int64(5)).numpy()) + + six.assertCountEqual(self, [1, 1, 1, 2, 2, 2], + get_all(self.db, 'SELECT step FROM Tensors')) + six.assertCountEqual(self, [x_id, y_id, sum_id], + get_all(self.db, 'SELECT tag_id FROM Tags')) + self.assertEqual(2, get_tensor(self.db, x_id, 1)) + self.assertEqual(3, get_tensor(self.db, y_id, 1)) + self.assertEqual(5, get_tensor(self.db, sum_id, 1)) + self.assertEqual(4, get_tensor(self.db, x_id, 2)) + self.assertEqual(5, get_tensor(self.db, y_id, 2)) + self.assertEqual(9, get_tensor(self.db, sum_id, 2)) + six.assertCountEqual( + self, ['experiment'], + get_all(self.db, 'SELECT experiment_name FROM Experiments')) + six.assertCountEqual(self, ['run'], + get_all(self.db, 'SELECT run_name FROM Runs')) + six.assertCountEqual(self, ['user'], + get_all(self.db, 'SELECT user_name FROM Users')) + + def testBadExperimentName(self): + with self.assertRaises(ValueError): + self.create_summary_db_writer(experiment_name='\0') + + def testBadRunName(self): + with self.assertRaises(ValueError): + self.create_summary_db_writer(run_name='\0') + + def testBadUserName(self): + with self.assertRaises(ValueError): + self.create_summary_db_writer(user_name='-hi') + with self.assertRaises(ValueError): + self.create_summary_db_writer(user_name='hi-') + with self.assertRaises(ValueError): + self.create_summary_db_writer(user_name='@') + + +def get_one(db, q, *p): + return db.execute(q, p).fetchone()[0] + + +def get_all(db, q, *p): + return unroll(db.execute(q, p).fetchall()) + + +def get_tensor(db, tag_id, step): + return get_one( + db, 'SELECT tensor FROM Tensors WHERE tag_id = ? AND step = ?', tag_id, + step) + + +def int64(x): + return array_ops.constant(x, dtypes.int64) + + +def unroll(list_of_tuples): + return sum(list_of_tuples, ()) + + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/tensorboard/db/BUILD b/tensorflow/contrib/tensorboard/db/BUILD index d8bbf87d2c..068e862650 100644 --- a/tensorflow/contrib/tensorboard/db/BUILD +++ b/tensorflow/contrib/tensorboard/db/BUILD @@ -45,10 +45,12 @@ cc_library( tf_cc_test( name = "summary_db_writer_test", + size = "small", srcs = ["summary_db_writer_test.cc"], deps = [ ":summary_db_writer", "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core/lib/db:sqlite", diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc index df64e36305..a26ad61660 100644 --- a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc +++ b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc @@ -15,10 +15,12 @@ limitations under the License. #include "tensorflow/contrib/tensorboard/db/summary_db_writer.h" #include "tensorflow/contrib/tensorboard/db/schema.h" +#include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/lib/db/sqlite.h" #include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/snappy.h" +#include "tensorflow/core/util/event.pb.h" namespace tensorflow { namespace { @@ -86,13 +88,19 @@ class SummaryDbWriter : public SummaryWriterInterface { TF_RETURN_IF_ERROR(BindTensor(t)); break; } - TF_RETURN_IF_ERROR(insert_tensor_.StepAndReset()); - return Status::OK(); + return insert_tensor_.StepAndReset(); } Status WriteEvent(std::unique_ptr e) override { - // TODO(@jart): This will be used to load event logs. - return errors::Unimplemented("WriteEvent"); + mutex_lock ml(mu_); + TF_RETURN_IF_ERROR(InitializeParents()); + if (e->what_case() == Event::WhatCase::kSummary) { + const Summary& summary = e->summary(); + for (int i = 0; i < summary.value_size(); ++i) { + TF_RETURN_IF_ERROR(WriteSummary(e.get(), summary.value(i))); + } + } + return Status::OK(); } Status WriteScalar(int64 global_step, Tensor t, const string& tag) override { @@ -247,6 +255,24 @@ class SummaryDbWriter : public SummaryWriterInterface { return Status::OK(); } + Status WriteSummary(const Event* e, const Summary::Value& summary) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + int64 tag_id; + TF_RETURN_IF_ERROR(GetTagId(run_id_, summary.tag(), &tag_id)); + insert_tensor_.BindInt(1, tag_id); + insert_tensor_.BindInt(2, e->step()); + insert_tensor_.BindDouble(3, e->wall_time()); + switch (summary.value_case()) { + case Summary::Value::ValueCase::kSimpleValue: + insert_tensor_.BindDouble(4, summary.simple_value()); + break; + default: + // TODO(@jart): Handle the rest. + return Status::OK(); + } + return insert_tensor_.StepAndReset(); + } + mutex mu_; Env* env_; std::shared_ptr db_ GUARDED_BY(mu_); diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc index d32904f97c..c1af51e7b7 100644 --- a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc +++ b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc @@ -14,14 +14,19 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/tensorboard/db/summary_db_writer.h" +#include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/db/sqlite.h" +#include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/util/event.pb.h" namespace tensorflow { namespace { +const float kTolerance = 1e-5; + Tensor MakeScalarInt64(int64 x) { Tensor t(DT_INT64, TensorShape({})); t.scalar()() = x; @@ -41,7 +46,7 @@ class FakeClockEnv : public EnvWrapper { class SummaryDbWriterTest : public ::testing::Test { protected: - void SetUp() override { db_ = Sqlite::Open("file::memory:").ValueOrDie(); } + void SetUp() override { db_ = Sqlite::Open(":memory:").ValueOrDie(); } void TearDown() override { if (writer_ != nullptr) { @@ -158,5 +163,54 @@ TEST_F(SummaryDbWriterTest, TensorsWritten_RowsGetInitialized) { QueryString("SELECT tensor FROM Tensors WHERE step = 2").empty()); } +TEST_F(SummaryDbWriterTest, EmptyParentNames_NoParentsCreated) { + TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "", "", &env_, &writer_)); + TF_ASSERT_OK(writer_->WriteTensor(1, MakeScalarInt64(123LL), "taggy", + "this-is-metaaa")); + TF_ASSERT_OK(writer_->Flush()); + ASSERT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Users")); + ASSERT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Experiments")); + ASSERT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Runs")); + ASSERT_EQ(1LL, QueryInt("SELECT COUNT(*) FROM Tags")); + ASSERT_EQ(1LL, QueryInt("SELECT COUNT(*) FROM Tensors")); +} + +TEST_F(SummaryDbWriterTest, WriteEvent_Scalar) { + TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "", "", &env_, &writer_)); + std::unique_ptr e{new Event}; + e->set_step(7); + e->set_wall_time(123.456); + Summary::Value* s = e->mutable_summary()->add_value(); + s->set_tag("π"); + s->set_simple_value(3.14f); + s = e->mutable_summary()->add_value(); + s->set_tag("φ"); + s->set_simple_value(1.61f); + TF_ASSERT_OK(writer_->WriteEvent(std::move(e))); + TF_ASSERT_OK(writer_->Flush()); + ASSERT_EQ(2LL, QueryInt("SELECT COUNT(*) FROM Tags")); + ASSERT_EQ(2LL, QueryInt("SELECT COUNT(*) FROM Tensors")); + int64 tag1_id = QueryInt("SELECT tag_id FROM Tags WHERE tag_name = 'π'"); + int64 tag2_id = QueryInt("SELECT tag_id FROM Tags WHERE tag_name = 'φ'"); + EXPECT_GT(tag1_id, 0LL); + EXPECT_GT(tag2_id, 0LL); + EXPECT_EQ(123.456, QueryDouble(strings::StrCat( + "SELECT computed_time FROM Tensors WHERE tag_id = ", + tag1_id, " AND step = 7"))); + EXPECT_EQ(123.456, QueryDouble(strings::StrCat( + "SELECT computed_time FROM Tensors WHERE tag_id = ", + tag2_id, " AND step = 7"))); + EXPECT_NEAR(3.14, + QueryDouble(strings::StrCat( + "SELECT tensor FROM Tensors WHERE tag_id = ", tag1_id, + " AND step = 7")), + kTolerance); // Summary::simple_value is float + EXPECT_NEAR(1.61, + QueryDouble(strings::StrCat( + "SELECT tensor FROM Tensors WHERE tag_id = ", tag2_id, + " AND step = 7")), + kTolerance); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 6206963251..4169e842da 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -6243,8 +6243,11 @@ tf_kernel_library( srcs = ["summary_kernels.cc"], deps = [ ":summary_interface", + "//tensorflow/contrib/tensorboard/db:summary_db_writer", "//tensorflow/core:framework", + "//tensorflow/core:lib", "//tensorflow/core:summary_ops_op_lib", + "//tensorflow/core/lib/db:sqlite", ], ) diff --git a/tensorflow/core/kernels/summary_interface.cc b/tensorflow/core/kernels/summary_interface.cc index 313137ae49..cd366f8c13 100644 --- a/tensorflow/core/kernels/summary_interface.cc +++ b/tensorflow/core/kernels/summary_interface.cc @@ -257,7 +257,9 @@ class SummaryWriterImpl : public SummaryWriterInterface { Summary::Value* v = e->mutable_summary()->add_value(); t.AsProtoTensorContent(v->mutable_tensor()); v->set_tag(tag); - v->mutable_metadata()->ParseFromString(serialized_metadata); + if (!serialized_metadata.empty()) { + v->mutable_metadata()->ParseFromString(serialized_metadata); + } return WriteEvent(std::move(e)); } diff --git a/tensorflow/core/kernels/summary_kernels.cc b/tensorflow/core/kernels/summary_kernels.cc index cfa707de71..1fe2fc5b66 100644 --- a/tensorflow/core/kernels/summary_kernels.cc +++ b/tensorflow/core/kernels/summary_kernels.cc @@ -13,9 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/contrib/tensorboard/db/summary_db_writer.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/kernels/summary_interface.h" +#include "tensorflow/core/lib/db/sqlite.h" +#include "tensorflow/core/platform/protobuf.h" namespace tensorflow { @@ -46,6 +49,32 @@ class CreateSummaryFileWriterOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("CreateSummaryFileWriter").Device(DEVICE_CPU), CreateSummaryFileWriterOp); +class CreateSummaryDbWriterOp : public OpKernel { + public: + explicit CreateSummaryDbWriterOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + const Tensor* tmp; + OP_REQUIRES_OK(ctx, ctx->input("db_uri", &tmp)); + const string db_uri = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("experiment_name", &tmp)); + const string experiment_name = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("run_name", &tmp)); + const string run_name = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("user_name", &tmp)); + const string user_name = tmp->scalar()(); + SummaryWriterInterface* s; + auto db = Sqlite::Open(db_uri); + OP_REQUIRES_OK(ctx, db.status()); + OP_REQUIRES_OK( + ctx, CreateSummaryDbWriter(std::move(db.ValueOrDie()), experiment_name, + run_name, user_name, ctx->env(), &s)); + OP_REQUIRES_OK(ctx, CreateResource(ctx, HandleFromInput(ctx, 0), s)); + } +}; +REGISTER_KERNEL_BUILDER(Name("CreateSummaryDbWriter").Device(DEVICE_CPU), + CreateSummaryDbWriterOp); + class FlushSummaryWriterOp : public OpKernel { public: explicit FlushSummaryWriterOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} @@ -98,6 +127,27 @@ class WriteSummaryOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("WriteSummary").Device(DEVICE_CPU), WriteSummaryOp); +class ImportEventOp : public OpKernel { + public: + explicit ImportEventOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + SummaryWriterInterface* s; + OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); + core::ScopedUnref unref(s); + const Tensor* t; + OP_REQUIRES_OK(ctx, ctx->input("event", &t)); + std::unique_ptr event{new Event}; + if (!ParseProtoUnlimited(event.get(), t->scalar()())) { + ctx->CtxFailureWithWarning( + errors::DataLoss("Bad tf.Event binary proto tensor string")); + return; + } + OP_REQUIRES_OK(ctx, s->WriteEvent(std::move(event))); + } +}; +REGISTER_KERNEL_BUILDER(Name("ImportEvent").Device(DEVICE_CPU), ImportEventOp); + class WriteScalarSummaryOp : public OpKernel { public: explicit WriteScalarSummaryOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} diff --git a/tensorflow/core/ops/summary_ops.cc b/tensorflow/core/ops/summary_ops.cc index f778b48797..5efbac7ad7 100644 --- a/tensorflow/core/ops/summary_ops.cc +++ b/tensorflow/core/ops/summary_ops.cc @@ -49,6 +49,33 @@ flush_millis: How often, in milliseconds, to flush the pending events and filename_suffix: Every event file's name is suffixed with this suffix. )doc"); +REGISTER_OP("CreateSummaryDbWriter") + .Input("writer: resource") + .Input("db_uri: string") + .Input("experiment_name: string") + .Input("run_name: string") + .Input("user_name: string") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Creates summary database writer accessible by given resource handle. + +This can be used to write tensors from the execution graph directly +to a database. Only SQLite is supported right now. This function +will create the schema if it doesn't exist. Entries in the Users, +Experiments, and Runs tables will be created automatically if they +don't already exist. + +writer: Handle to SummaryWriter resource to overwrite. +db_uri: For example "file:/tmp/foo.sqlite". +experiment_name: Can't contain ASCII control characters or <>. Case + sensitive. If empty, then the Run will not be associated with any + Experiment. +run_name: Can't contain ASCII control characters or <>. Case sensitive. + If empty, then each Tag will not be associated with any Run. +user_name: Must be valid as both a DNS label and Linux username. If + empty, then the Experiment will not be associated with any User. +)doc"); + REGISTER_OP("FlushSummaryWriter") .Input("writer: resource") .SetShapeFn(shape_inference::NoOutputs) @@ -89,6 +116,20 @@ summary_metadata: Serialized SummaryMetadata protocol buffer containing plugin-related metadata for this summary. )doc"); +REGISTER_OP("ImportEvent") + .Input("writer: resource") + .Input("event: string") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Outputs a `tf.Event` protocol buffer. + +When CreateSummaryDbWriter is being used, this op can be useful for +importing data from event logs. + +writer: A handle to a summary writer. +event: A string containing a binary-encoded tf.Event proto. +)doc"); + REGISTER_OP("WriteScalarSummary") .Input("writer: resource") .Input("global_step: int64") -- GitLab From 07be74a743e41f3a19570d5471a92b58c7ab83a8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 11:27:04 -0800 Subject: [PATCH 0184/1801] Add comment describing how to get optimized builds in Dockerfile. PiperOrigin-RevId: 175036186 --- tensorflow/tools/docker/Dockerfile.devel | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index 20e1dcd085..1a0145b078 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -83,6 +83,11 @@ ENV CI_BUILD_PYTHON python RUN tensorflow/tools/ci_build/builds/configured CPU \ bazel build -c opt --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \ + # For optimized builds appropriate for the hardware platform of your choosing, uncomment below... + # For ivy-bridge or sandy-bridge + # --copt=-march="ivybridge" \ + # for haswell, broadwell, or skylake + # --copt=-march="haswell" \ tensorflow/tools/pip_package:build_pip_package && \ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/pip && \ pip --no-cache-dir install --upgrade /tmp/pip/tensorflow-*.whl && \ -- GitLab From a93fdc9e3e1a43922f40a9263a17a2a66840cf7a Mon Sep 17 00:00:00 2001 From: Chris Leary Date: Wed, 8 Nov 2017 11:28:21 -0800 Subject: [PATCH 0185/1801] [XLA] More diagnostic information in the reshape shape inference error. PiperOrigin-RevId: 175036413 --- tensorflow/compiler/xla/service/shape_inference.cc | 5 ++++- tensorflow/compiler/xla/tests/reshape_test.cc | 5 +++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 9c7dc2185e..dcd726f22c 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -1948,7 +1948,10 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( !std::is_permutation(dimensions.begin(), dimensions.end(), indices.begin())) { return InvalidArgument( - "Reshape dimensions not a permutation of the operand dimensions."); + "Reshape dimensions [%s] are not a permutation of the operand " + "dimensions (operand shape is %s).", + tensorflow::str_util::Join(dimensions, ",").c_str(), + ShapeUtil::HumanString(operand).c_str()); } return inferred_shape; diff --git a/tensorflow/compiler/xla/tests/reshape_test.cc b/tensorflow/compiler/xla/tests/reshape_test.cc index 72c68f24a0..d235b9a158 100644 --- a/tensorflow/compiler/xla/tests/reshape_test.cc +++ b/tensorflow/compiler/xla/tests/reshape_test.cc @@ -431,8 +431,9 @@ XLA_TEST_F(ReshapeTest, ToScalar) { XLA_TEST_F(ReshapeTest, BadDimensions) { ComputationBuilder b(client_, TestName()); b.Reshape(b.ConstantR1({1}), {}, {}); - EXPECT_THAT(ExecuteToString(&b, {}), - ::testing::HasSubstr("dimensions not a permutation")); + EXPECT_THAT( + ExecuteToString(&b, {}), + ::testing::HasSubstr("not a permutation of the operand dimensions")); } XLA_TEST_F(ReshapeTest, BadNewSizes) { -- GitLab From fda773aab208535612bfc5ecefb5096525669cbb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 11:30:23 -0800 Subject: [PATCH 0186/1801] Minor docstring fixes PiperOrigin-RevId: 175036743 --- tensorflow/python/ops/ctc_ops.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/tensorflow/python/ops/ctc_ops.py b/tensorflow/python/ops/ctc_ops.py index 477c0d1cb4..f037767cf4 100644 --- a/tensorflow/python/ops/ctc_ops.py +++ b/tensorflow/python/ops/ctc_ops.py @@ -22,8 +22,8 @@ from __future__ import print_function from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor -from tensorflow.python.ops import gen_ctc_ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_ctc_ops from tensorflow.python.ops.nn_grad import _BroadcastMul @@ -38,7 +38,8 @@ def ctc_loss(labels, inputs, sequence_length, [A. Graves, S. Fernandez, F. Gomez, J. Schmidhuber. Connectionist Temporal Classification: Labeling Unsegmented Sequence Data - with Recurrent Neural Networks. ICML 2006, Pittsburgh, USA, pp. 369-376.](http://www.cs.toronto.edu/~graves/icml_2006.pdf) + with Recurrent Neural Networks. ICML 2006, Pittsburgh, USA, + pp. 369-376.](http://www.cs.toronto.edu/~graves/icml_2006.pdf) Input requirements: @@ -108,9 +109,9 @@ def ctc_loss(labels, inputs, sequence_length, See `core/ops/ctc_ops.cc` for more details. inputs: 3-D `float` `Tensor`. If time_major == False, this will be a `Tensor` shaped: - `[batch_size x max_time x num_classes]`. + `[batch_size, max_time, num_classes]`. If time_major == True (default), this will be a `Tensor` shaped: - `[max_time x batch_size x num_classes]`. + `[max_time, batch_size, num_classes]`. The logits. sequence_length: 1-D `int32` vector, size `[batch_size]`. The sequence lengths. @@ -120,15 +121,18 @@ def ctc_loss(labels, inputs, sequence_length, ignore_longer_outputs_than_inputs: Boolean. Default: False. If True, sequences with longer outputs than inputs will be ignored. time_major: The shape format of the `inputs` Tensors. - If True, these `Tensors` must be shaped `[max_time, batch_size, num_classes]`. - If False, these `Tensors` must be shaped `[batch_size, max_time, num_classes]`. - Using `time_major = True` (default) is a bit more efficient because it avoids - transposes at the beginning of the ctc_loss calculation. However, most - TensorFlow data is batch-major, so by this function also accepts inputs - in batch-major form. + If True, these `Tensors` must be shaped `[max_time, batch_size, + num_classes]`. + If False, these `Tensors` must be shaped `[batch_size, max_time, + num_classes]`. + Using `time_major = True` (default) is a bit more efficient because it + avoids transposes at the beginning of the ctc_loss calculation. However, + most TensorFlow data is batch-major, so by this function also accepts + inputs in batch-major form. Returns: - A 1-D `float` `Tensor`, size `[batch]`, containing the negative log probabilities. + A 1-D `float` `Tensor`, size `[batch]`, containing the negative log + probabilities. Raises: TypeError: if labels is not a `SparseTensor`. @@ -198,7 +202,7 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True): Args: inputs: 3-D `float` `Tensor` sized - `[max_time x batch_size x num_classes]`. The logits. + `[max_time, batch_size, num_classes]`. The logits. sequence_length: 1-D `int32` vector containing sequence lengths, having size `[batch_size]`. merge_repeated: Boolean. Default: True. @@ -207,7 +211,7 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True): A tuple `(decoded, neg_sum_logits)` where decoded: A single-element list. `decoded[0]` is an `SparseTensor` containing the decoded outputs s.t.: - `decoded.indices`: Indices matrix `(total_decoded_outputs x 2)`. + `decoded.indices`: Indices matrix `(total_decoded_outputs, 2)`. The rows store: `[batch, time]`. `decoded.values`: Values vector, size `(total_decoded_outputs)`. The vector stores the decoded classes. -- GitLab From 76967158085d50b53d29901c140fea69b3cf15af Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 8 Nov 2017 11:32:03 -0800 Subject: [PATCH 0187/1801] [XLA:CPU] Implement single threaded Matrix-Vector products in LLVM IR Right now we're always doing a 8x8 tiling on the matrix. This can probably be tuned further. There are some other follow-up items that I did not want to put in this already large CL: - Eigen has some smarts to avoid issuing unaligned vector loads and stores which the current CL does not. We need to investigate if being smart about alignment is worth it. - Prevent LLVM from vectorizing the epilogue. In fact we should disable loop vectorization for all the loops we've explicitly vectorized. - Cache the kernels by their shape to reduce code size impact. - Add aliasing information to the loads and stores emitted by the PacketSupportLibrary. This is probably not super critical since we've already vectorized the code, but we should do this for completeness. PiperOrigin-RevId: 175036991 --- tensorflow/compiler/xla/service/cpu/BUILD | 2 + .../xla/service/cpu/dot_op_emitter.cc | 564 +++++++++++++++++- .../compiler/xla/service/cpu/dot_op_emitter.h | 28 + .../xla/service/cpu/ir_emission_utils.cc | 17 +- .../xla/service/cpu/ir_emission_utils.h | 11 +- .../xla/service/cpu/layout_assignment.cc | 4 +- tensorflow/compiler/xla/service/llvm_ir/BUILD | 24 + .../service/llvm_ir/kernel_support_library.cc | 63 ++ .../service/llvm_ir/kernel_support_library.h | 124 ++++ .../compiler/xla/service/llvm_ir/llvm_util.cc | 8 + .../compiler/xla/service/llvm_ir/llvm_util.h | 2 + .../service/llvm_ir/vector_support_library.cc | 150 +++++ .../service/llvm_ir/vector_support_library.h | 174 ++++++ .../compiler/xla/tests/dot_operation_test.cc | 80 +++ 14 files changed, 1233 insertions(+), 18 deletions(-) create mode 100644 tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc create mode 100644 tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h create mode 100644 tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc create mode 100644 tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 6213baee2f..10ec677e2f 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -290,8 +290,10 @@ cc_library( "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_module_config", "//tensorflow/compiler/xla/service/llvm_ir:ir_array", + "//tensorflow/compiler/xla/service/llvm_ir:kernel_support_library", "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop", "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", + "//tensorflow/compiler/xla/service/llvm_ir:vector_support_library", "//tensorflow/core:lib", "@llvm//:core", ], diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index e57d49172b..1cbd4094a3 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -25,7 +25,9 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/cpu_runtime.h" #include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" +#include "tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/util.h" @@ -38,6 +40,450 @@ using llvm_ir::SetToFirstInsertPoint; namespace cpu { +namespace { +// Loads a tile of values from a 2D tensor. +class TileLoader { + public: + // Constructs a TileLoader that will load a tile consisting of + // `tile_size_along_major_dim` vectors from the matrix `matrix`, starting at + // `major_dim_offset` in the major dimension. The tile size along the minor + // dimension is the vector size, and that is implicitly determined by `vsl`. + TileLoader(VectorSupportLibrary* vsl, llvm::IRBuilder<>* ir_builder, + llvm::Value* matrix, int64 matrix_size_along_minor_dim, + llvm::Value* major_dim_offset, int64 tile_size_along_major_dim) + : vsl_(vsl) { + pointers_.reserve(tile_size_along_major_dim); + for (int64 i = 0; i < tile_size_along_major_dim; i++) { + llvm::Value* total_offset = ir_builder->CreateMul( + ir_builder->getInt64(matrix_size_along_minor_dim), + ir_builder->CreateAdd(ir_builder->getInt64(i), major_dim_offset)); + pointers_.push_back(vsl_->ComputeOffsetPointer(matrix, total_offset)); + } + } + + // Load a tile consisting of `tile_size_along_major_dim_` vectors starting at + // `major_dim_offset_` in the major dimension and `minor_dim_offset` in the + // minor dimension. + std::vector LoadTile(llvm::Value* minor_dim_offset) const { + std::vector result; + result.reserve(pointers_.size()); + for (const auto& pointer : pointers_) { + result.push_back(vsl_->LoadVector(pointer, minor_dim_offset)); + } + return result; + } + + private: + VectorSupportLibrary* vsl_; + std::vector pointers_; +}; + +// Computes a dot product between "[M,K]{0,1} lhs" with a [K,1] vector (the +// layout of the vector does not matter). This implementation uses a tiling +// scheme to improve performance. +// +// We logically separate the LHS matrix into four segments: +// +// +----------------------+---+ +// | | | +// | | | +// | A | B | +// | | | +// | | | +// | | | +// +----------------------+---+ +// | C | D | +// +----------------------+---+ +// +// where A is the largest submatrix of the LHS that can be evenly dividied into +// tiles. For each tile in A, assuming tile_rows_ == tile_cols_ == 4, we have: +// +// +---+---+---+---+ +--+--+--+--+ +// |M00|M10|M20|M30| |V0|V1|V2|V3| +// +---+---+---+---+ +--+--+--+--+ +// |M01|M11|M21|M31| and |V0|V1|V2|V3| +// +---+---+---+---+ +--+--+--+--+ +// |M02|M12|M22|M32| |V0|V1|V2|V3| +// +---+---+---+---+ +--+--+--+--+ +// |M03|M13|M23|M33| |V0|V1|V2|V3| +// +---+---+---+---+ +--+--+--+--+ +// +// (Legend: rows are horizontal and columns are vertical; and each column is one +// llvm::Value of a vector type) +// +// where: +// +// a. The left tile is from the column major left matrix. +// b. The right tile is an elementwise broadcast of a [V0, V1, V2, V3] +// vector loaded from the RHS vector. +// +// As we iterate through the column dimension, we compute the change to the +// result vector by an elementwise multiplication between the two tiles above +// followed by a reduction along the major dimension: +// +// +-----------------------------------+ +// | M00*V0 + M10*V1 + M20*V2 + M30*V3 | +// +-----------------------------------+ +// | M01*V0 + M11*V1 + M21*V2 + M31*V3 | +// Result[R:R+4] += +-----------------------------------+ +// | M02*V0 + M12*V1 + M22*V2 + M32*V3 | +// +-----------------------------------+ +// | M03*V0 + M13*V1 + M23*V2 + M33*V3 | +// +-----------------------------------+ +// +// Where R is the starting row for the tile. +// +// We have an inner epilogue loop to deal with the "C" submatrix and an outer +// epilogue loop to deal with the B,D submarix. +// +// TODO(sanjoy): We should investigate if using gather loads and scatter stores +// can be used here have the same inner loop for both column-major and row-major +// matrix-vector products. +class ColumnMajorMatrixVectorProductEmitter { + public: + ColumnMajorMatrixVectorProductEmitter(PrimitiveType scalar_type, + int64 tile_rows, int64 tile_cols, + int64 m, int64 k, llvm::Value* lhs, + llvm::Value* rhs, llvm::Value* result, + llvm::IRBuilder<>* ir_builder) + : scalar_type_(scalar_type), + tile_rows_(tile_rows), + tile_cols_(tile_cols), + m_(m), + k_(k), + lhs_(lhs), + rhs_(rhs), + result_(result), + ir_builder_(ir_builder), + ksl_(ir_builder_), + vsl_(scalar_type_, /*vector_size=*/tile_rows_, ir_builder_, "") { + CHECK(tile_rows_ > 0 && IsPowerOfTwo(static_cast(tile_rows_))); + } + + void Emit(); + + private: + void EmitOuterLoopBody(llvm::Value* column, int64 column_count, + bool is_first_column); + + TileLoader GetLhsTileLoader(llvm::Value* column_start, int64 column_count) { + return TileLoader(&vsl_, ir_builder_, /*matrix=*/lhs_, + /*matrix_size_along_minor_dim=*/m_, + /*major_dim_offset=*/column_start, + /*tile_size_along_major_dim=*/column_count); + } + + // Load a tile of values from the RHS. For the RHS a "tile" is a contiguous + // sequnce of `count` values, each one broadcasted to the vector width. + std::vector LoadRhsTile(llvm::Value* offset, int64 count) { + llvm::Value* base_pointer = vsl_.ComputeOffsetPointer(rhs_, offset); + std::vector result; + result.reserve(count); + for (int64 i = 0; i < count; i++) { + result.push_back(vsl_.LoadBroadcast(base_pointer, i)); + } + return result; + } + + void EmitInnerLoopTiled(TileLoader* lhs_tile_loader, + const std::vector& rhs_tile, + int64 columns, bool is_first_column); + + void EmitInnerLoopEpilogue(llvm::Value* current_tile_col, int64 columns, + bool is_first_tiled_column); + + PrimitiveType scalar_type_; + int64 tile_rows_; + int64 tile_cols_; + int64 m_; + int64 k_; + llvm::Value* lhs_; + llvm::Value* rhs_; + llvm::Value* result_; + llvm::IRBuilder<>* ir_builder_; + KernelSupportLibrary ksl_; + VectorSupportLibrary vsl_; +}; + +void ColumnMajorMatrixVectorProductEmitter::EmitOuterLoopBody( + llvm::Value* column, int64 column_count, bool is_first_column) { + TileLoader lhs_tile_loader = GetLhsTileLoader(/*column_start=*/column, + /*column_count=*/column_count); + + std::vector rhs_tile = + LoadRhsTile(column, /*count=*/column_count); + EmitInnerLoopTiled(&lhs_tile_loader, rhs_tile, + /*columns=*/column_count, is_first_column); + EmitInnerLoopEpilogue(column, /*columns=*/column_count, is_first_column); +} + +void ColumnMajorMatrixVectorProductEmitter::Emit() { + // See the comment on the class declaration for the algorithm used here. + int64 column_remainder = k_ % tile_cols_; + int64 column_limit = k_ - column_remainder; + + ksl_.For("dot.outer.tiled", + /*start=*/0, /*end=*/column_limit, /*step=*/tile_cols_, + [&](llvm::Value* column, bool is_first_column) { + EmitOuterLoopBody(column, tile_cols_, is_first_column); + }); + + if (column_remainder != 0) { + EmitOuterLoopBody(ir_builder_->getInt64(column_limit), column_remainder, + column_limit == 0); + } +} + +void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopTiled( + TileLoader* lhs_tile_loader, const std::vector& rhs_tile, + int64 columns, bool is_first_column) { + int64 row_limit = m_ - (m_ % tile_rows_); + + ksl_.For("dot.inner.tiled", /*start=*/0, /*end=*/row_limit, + /*step=*/tile_rows_, [&](llvm::Value* row) { + std::vector lhs_tile = + lhs_tile_loader->LoadTile(/*minor_dim_offset=*/row); + llvm::Value* accumulator = is_first_column + ? vsl_.GetZeroVector() + : vsl_.LoadVector(result_, row); + for (int i = 0; i < columns; i++) { + accumulator = vsl_.MulAdd(lhs_tile[i], rhs_tile[i], accumulator); + } + vsl_.StoreVector(accumulator, result_, row); + }); +} + +void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue( + llvm::Value* current_tile_col, int64 columns, bool is_first_tiled_column) { + int64 row_start = m_ - (m_ % tile_rows_); + if (row_start == m_) { + return; + } + + llvm::Value* columns_llvm = ir_builder_->getInt64(columns); + + // for (col = current_tile_col; col < (columns + current_tile_col); col++) + // for (row = row_start, row < m_; row++) { + // result[row] += lhs[row, col] * rhs[col] + // // Also take into account that if col is 0 then result[row] is not + // // initialized. + // } + + ksl_.For( + "dot.inner.epilg.outer", /*start=*/current_tile_col, + /*end=*/ir_builder_->CreateAdd(columns_llvm, current_tile_col), + /*step=*/1, /*peel_first_iteration=*/false, + [&](llvm::Value* col, llvm::Value* is_first_scalar_col) { + llvm::Value* rhs_element = vsl_.LoadScalar(rhs_, col); + llvm::Value* total_offset = + ir_builder_->CreateMul(col, ir_builder_->getInt64(m_)); + llvm::Value* lhs_base_pointer = + vsl_.ComputeOffsetPointer(lhs_, total_offset); + ksl_.For( + "dot.inner.epilg.inner", /*start=*/row_start, /*end=*/m_, + /*step=*/1, [&](llvm::Value* scalar_row) { + llvm::Value* product = vsl_.Mul( + vsl_.LoadScalar(lhs_base_pointer, scalar_row), rhs_element); + llvm::Value* setting_result_first_time = ir_builder_->CreateAnd( + is_first_scalar_col, + ir_builder_->getInt1(is_first_tiled_column)); + ksl_.If( + setting_result_first_time, + [&]() { vsl_.StoreScalar(product, result_, scalar_row); }, + [&]() { + vsl_.StoreScalar( + vsl_.Add(vsl_.LoadScalar(result_, scalar_row), product), + result_, scalar_row); + }); + }); + }); +} + +// Computes a dot product between "[M,K]{1,0} lhs" with a [K,1] vector (the +// layout of the vector does not matter). This implementation uses a tiling +// scheme to improve performance. +// +// We logically separate the LHS matrix into four segments: +// +// +----------------------+---+ +// | | | +// | | | +// | A | B | +// | | | +// | | | +// | | | +// +----------------------+---+ +// | C | D | +// +----------------------+---+ +// +// where A is the largest submatrix of the LHS that can be evenly dividied into +// tiles. For each tile in A, assuming tile_rows_ == tile_cols_ == 4, we have: +// +// +---+---+---+---+ +// |M00|M10|M20|M30| +// +---+---+---+---+ +--+--+--+--+ +// |M01|M11|M21|M31| and |V0|V1|V2|V3| +// +---+---+---+---+ +--+--+--+--+ +// |M02|M12|M22|M32| +// +---+---+---+---+ +// |M03|M13|M23|M33| +// +---+---+---+---+ +// +// (Legend: rows are horizontal and columns are vertical; and each row is one +// llvm::Value of a vector type) +// +// where: +// +// a. The left tile is loaded from the row major left matrix. +// b. The right vector is loaded from the RHS vector. +// +// We keep 4 vector accumulators accumulating the following four vector +// expressions as we iterate over the row dimension: +// +// +------+------+------+------+ +// |M0I*V0|M1I*V1|M2I*V2|M3I*V3| for I in [0,4) +// +------+------+------+------+ +// +// In the end we do a horizontal reduction over these 4 vector accumulators to +// get 4 values in the result vector. +// +// We have an inner epilogue loop to deal with the "B" sub-matrix and an outer +// epilogue loop to deal with the C,D submatrix. +class RowMajorMatrixVectorProductEmitter { + public: + RowMajorMatrixVectorProductEmitter(PrimitiveType scalar_type, int64 tile_rows, + int64 tile_cols, int64 m, int64 k, + llvm::Value* lhs, llvm::Value* rhs, + llvm::Value* result, + llvm::IRBuilder<>* ir_builder) + : scalar_type_(scalar_type), + tile_rows_(tile_rows), + tile_cols_(tile_cols), + m_(m), + k_(k), + lhs_(lhs), + rhs_(rhs), + result_(result), + ir_builder_(ir_builder), + ksl_(ir_builder_), + vsl_(scalar_type_, /*vector_size=*/tile_rows_, ir_builder_, "") { + CHECK(tile_cols_ > 0 && IsPowerOfTwo(static_cast(tile_cols_))); + } + + void Emit(); + + private: + TileLoader GetLhsTileLoader(llvm::Value* row_start, int64 row_count) { + return TileLoader(&vsl_, ir_builder_, /*matrix=*/lhs_, + /*matrix_size_along_minor_dim=*/k_, + /*major_dim_offset=*/row_start, + /*tile_size_along_major_dim=*/row_count); + } + + void EmitOuterLoopBody(llvm::Value* row, int64 row_count); + + void EmitInnerLoopTiled(TileLoader* lhs_tile_loader, int64 rows, + std::vector* vector_accumulators); + + void EmitInnerLoopEpilogue(llvm::Value* current_tile_row, int64 rows, + std::vector* scalar_accumulators); + + PrimitiveType scalar_type_; + int64 tile_rows_; + int64 tile_cols_; + int64 m_; + int64 k_; + llvm::Value* lhs_; + llvm::Value* rhs_; + llvm::Value* result_; + llvm::IRBuilder<>* ir_builder_; + KernelSupportLibrary ksl_; + VectorSupportLibrary vsl_; +}; + +void RowMajorMatrixVectorProductEmitter::EmitOuterLoopBody(llvm::Value* row, + int64 row_count) { + TileLoader lhs_tile_loader = GetLhsTileLoader(/*row_start=*/row, + /*row_count=*/row_count); + std::vector vector_accumulators; + std::vector scalar_accumulators; + for (int i = 0; i < row_count; i++) { + vector_accumulators.emplace_back(&vsl_, vsl_.GetZeroVector()); + scalar_accumulators.emplace_back(&vsl_, vsl_.GetZeroScalar()); + } + EmitInnerLoopTiled(&lhs_tile_loader, /*rows=*/row_count, + &vector_accumulators); + EmitInnerLoopEpilogue(/*current_tile_row=*/row, /*rows=*/row_count, + &scalar_accumulators); + + for (int i = 0; i < row_count; i++) { + llvm::Value* result_value = + vsl_.Add(vsl_.AddReduce(vector_accumulators[i].Get()), + scalar_accumulators[i].Get()); + llvm::Value* offset = ir_builder_->CreateAdd(ir_builder_->getInt64(i), row); + vsl_.StoreScalar(result_value, result_, offset); + } +} + +void RowMajorMatrixVectorProductEmitter::Emit() { + // See the comment on the class declaration for the algorithm used here. + int64 row_remainder = m_ % tile_rows_; + int64 row_limit = m_ - row_remainder; + + ksl_.For("dot.outer.tiled", + /*start=*/0, /*end=*/row_limit, /*step=*/tile_rows_, + [&](llvm::Value* row) { EmitOuterLoopBody(row, tile_rows_); }); + + if (row_remainder != 0) { + EmitOuterLoopBody(ir_builder_->getInt64(row_limit), row_remainder); + } +} + +void RowMajorMatrixVectorProductEmitter::EmitInnerLoopTiled( + TileLoader* lhs_tile_loader, int64 rows, + std::vector* vector_accumulators) { + int64 column_limit = k_ - (k_ % tile_cols_); + + ksl_.For("dot.inner.tiled", /*start=*/0, /*end=*/column_limit, + /*step=*/tile_cols_, [&](llvm::Value* col) { + std::vector lhs_tile = + lhs_tile_loader->LoadTile(/*minor_dim_offset=*/col); + llvm::Value* rhs_value = vsl_.LoadVector(rhs_, col); + for (int i = 0; i < rows; i++) { + llvm::Value* old_sum = (*vector_accumulators)[i].Get(); + (*vector_accumulators)[i].Set( + vsl_.Add(old_sum, vsl_.Mul(rhs_value, lhs_tile[i]))); + } + }); +} + +void RowMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue( + llvm::Value* current_tile_row, int64 rows, + std::vector* scalar_accumulators) { + int64 column_start = k_ - (k_ % tile_cols_); + if (column_start == k_) { + return; + } + + for (int r = 0; r < rows; r++) { + llvm::Value* total_offset = ir_builder_->CreateMul( + ir_builder_->CreateAdd(ir_builder_->getInt64(r), current_tile_row), + ir_builder_->getInt64(k_)); + llvm::Value* lhs_base_pointer = + vsl_.ComputeOffsetPointer(lhs_, total_offset); + ksl_.For("dot.inner.epilg.inner", /*start=*/column_start, /*end=*/k_, + /*step=*/1, [&](llvm::Value* scalar_col) { + llvm::Value* product = + vsl_.Mul(vsl_.LoadScalar(lhs_base_pointer, scalar_col), + vsl_.LoadScalar(rhs_, scalar_col)); + llvm::Value* old_value = (*scalar_accumulators)[r].Get(); + (*scalar_accumulators)[r].Set(vsl_.Add(old_value, product)); + }); + } +} + +} // namespace + DotOpEmitter::DotOpEmitter(const HloInstruction& dot, bool transpose_lhs, bool transpose_rhs, const llvm_ir::IrArray& target_array, @@ -72,6 +518,88 @@ DotOpEmitter::DotOpEmitter(const HloInstruction& dot, bool transpose_lhs, bool DotOpEmitter::ShapesAreLegalForRuntimeDot() const { return true; } +bool DotOpEmitter::EmitLlvmIrDotIfProfitable() { + if (dot_.shape().dimensions_size() != 2 || + ProfitableToImplementDotInUntiledLlvmIr(dot_) == + DotInLlvmIrProfitable::kYes) { + return false; + } + + if (!primitive_util::IsFloatingPointType(dot_.shape().element_type()) && + !primitive_util::IsIntegralType(dot_.shape().element_type())) { + return false; + } + + MatMultDims mat_mult_dims = GetMatMultDims(); + bool is_column_major_matrix_vector = false; + bool is_row_major_matrix_vector = false; + + int64 m, k; + bool swap_operands; + + if (mat_mult_dims.m == 1) { + bool rhs_effectively_row_major = + transpose_rhs_ ^ !mat_mult_dims.rhs_column_major; + if (rhs_effectively_row_major) { + k = mat_mult_dims.k; + m = mat_mult_dims.n; + is_column_major_matrix_vector = true; + swap_operands = true; + } else { + k = mat_mult_dims.k; + m = mat_mult_dims.n; + is_row_major_matrix_vector = true; + swap_operands = true; + } + } + + if (mat_mult_dims.n == 1) { + bool lhs_effectively_column_major = + transpose_lhs_ ^ mat_mult_dims.lhs_column_major; + if (lhs_effectively_column_major) { + m = mat_mult_dims.m; + k = mat_mult_dims.k; + is_column_major_matrix_vector = true; + swap_operands = false; + } else { + m = mat_mult_dims.m; + k = mat_mult_dims.k; + is_row_major_matrix_vector = true; + swap_operands = false; + } + } + + if (!is_column_major_matrix_vector && !is_row_major_matrix_vector) { + return false; + } + + if (is_column_major_matrix_vector) { + VLOG(2) << "Emitting column major matrix-vector multiply with m = " << m + << " and k = " << k; + ColumnMajorMatrixVectorProductEmitter emitter( + dot_.shape().element_type(), 8, 8, m, k, + swap_operands ? rhs_array_.GetBasePointer() + : lhs_array_.GetBasePointer(), + swap_operands ? lhs_array_.GetBasePointer() + : rhs_array_.GetBasePointer(), + target_array_.GetBasePointer(), ir_builder_); + emitter.Emit(); + } else { + VLOG(2) << "Emitting row major matrix-vector multiply with m = " << m + << " and k = " << k; + RowMajorMatrixVectorProductEmitter emitter( + dot_.shape().element_type(), 8, 8, m, k, + swap_operands ? rhs_array_.GetBasePointer() + : lhs_array_.GetBasePointer(), + swap_operands ? lhs_array_.GetBasePointer() + : rhs_array_.GetBasePointer(), + target_array_.GetBasePointer(), ir_builder_); + emitter.Emit(); + } + + return true; +} + tensorflow::Status DotOpEmitter::Emit() { // The dot operation performs a sum of products over dimension 0 of the left // hand side operand and dimension 1 of the right hand side operand. @@ -105,6 +633,10 @@ tensorflow::Status DotOpEmitter::Emit() { return EmitScalarDot(); } + if (EmitLlvmIrDotIfProfitable()) { + return Status::OK(); + } + if (PotentiallyImplementedAsEigenDot(dot_)) { return EmitCallToRuntime(); } @@ -340,22 +872,17 @@ tensorflow::Status DotOpEmitter::EmitCallToRuntime() { // // Effectively this involves swapping the 'lhs' with 'rhs' and 'm' with 'n'. - const Shape& lhs_shape = lhs_array_.GetShape(); - const Shape& rhs_shape = rhs_array_.GetShape(); + MatMultDims mat_mult_dims = GetMatMultDims(); - CHECK(LayoutUtil::Equal(lhs_shape.layout(), rhs_shape.layout())); + CHECK_EQ(mat_mult_dims.lhs_column_major, mat_mult_dims.rhs_column_major); - int64 m = lhs_shape.dimensions(transpose_lhs_ ? 1 : 0); - int64 k = lhs_shape.dimensions(transpose_lhs_ ? 0 : 1); - int64 n = rhs_shape.dimensions(transpose_rhs_ ? 0 : 1); const llvm_ir::IrArray* lhs = &lhs_array_; const llvm_ir::IrArray* rhs = &rhs_array_; bool transpose_lhs = transpose_lhs_; bool transpose_rhs = transpose_rhs_; - bool is_column_major = lhs_shape.layout().minor_to_major(0) == 0; - if (!is_column_major) { - std::swap(m, n); + if (!mat_mult_dims.lhs_column_major) { + std::swap(mat_mult_dims.m, mat_mult_dims.n); std::swap(lhs, rhs); std::swap(transpose_lhs, transpose_rhs); } @@ -367,12 +894,27 @@ tensorflow::Status DotOpEmitter::EmitCallToRuntime() { float_ptr_type), ir_builder_->CreateBitCast(lhs->GetBasePointer(), float_ptr_type), ir_builder_->CreateBitCast(rhs->GetBasePointer(), float_ptr_type), - ir_builder_->getInt64(m), ir_builder_->getInt64(n), - ir_builder_->getInt64(k), ir_builder_->getInt32(transpose_lhs), + ir_builder_->getInt64(mat_mult_dims.m), + ir_builder_->getInt64(mat_mult_dims.n), + ir_builder_->getInt64(mat_mult_dims.k), + ir_builder_->getInt32(transpose_lhs), ir_builder_->getInt32(transpose_rhs)}); return tensorflow::Status::OK(); } +DotOpEmitter::MatMultDims DotOpEmitter::GetMatMultDims() const { + CHECK_EQ(dot_.shape().dimensions_size(), 2); + + const Shape& lhs_shape = lhs_array_.GetShape(); + const Shape& rhs_shape = rhs_array_.GetShape(); + + return {lhs_shape.dimensions(transpose_lhs_ ? 1 : 0), + lhs_shape.dimensions(transpose_lhs_ ? 0 : 1), + rhs_shape.dimensions(transpose_rhs_ ? 0 : 1), + lhs_shape.layout().minor_to_major(0) == 0, + rhs_shape.layout().minor_to_major(0) == 0}; +} + llvm_ir::IrArray::Index DotOpEmitter::EmitOperandArrayLoopNest( llvm_ir::ForLoopNest* loop_nest, const llvm_ir::IrArray& operand_array, int64 reduction_dimension, tensorflow::StringPiece name_suffix) { diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h index cfc1066045..182e1b8c68 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h @@ -59,6 +59,10 @@ class DotOpEmitter { // LHS and RHS) and store the results in the target. tensorflow::Status EmitScalarDot(); + // Emit an LLVM IR implementation of the dot operation if we can. Returns + // true if an LLVM IR implementation was emitted. + bool EmitLlvmIrDotIfProfitable(); + // Emits a call to the CPU runtime to perform the matrix multiply. tensorflow::Status EmitCallToRuntime(); @@ -77,6 +81,30 @@ class DotOpEmitter { // no padding, and a rank of two. bool ShapesAreLegalForRuntimeDot() const; + // Represents the dimensions of a matrix-matrix multiply operation. + struct MatMultDims { + // The number of rows in the LHS. + int64 m; + + // The number of columns in the LHS, which is also must be equal to the + // number of rows in the RHS. + int64 k; + + // The number of columns on the RHS. + int64 n; + + // True if the LHS matrix column major. + bool lhs_column_major; + + // True if the RHS matrix column major. + bool rhs_column_major; + }; + + // Get the MatMultDims instance for the dot product this DotOpEmitter + // represents. Precondition: the dot is of rank 2 (and thus its operands are + // of rank 2 as well). + MatMultDims GetMatMultDims() const; + const HloInstruction& dot_; const bool transpose_lhs_; const bool transpose_rhs_; diff --git a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc index b99b36a55e..7149a19310 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc @@ -105,7 +105,9 @@ bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo) { return false; } - if (ProfitableToImplementDotInLlvmIr(hlo) == DotInLlvmIrProfitable::kYes) { + if (ProfitableToImplementDotInUntiledLlvmIr(hlo) == + DotInLlvmIrProfitable::kYes || + ProfitableToImplementDotInTiledLlvmIr(hlo)) { return false; } @@ -136,7 +138,7 @@ bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo) { return false; } -DotInLlvmIrProfitable ProfitableToImplementDotInLlvmIr( +DotInLlvmIrProfitable ProfitableToImplementDotInUntiledLlvmIr( const HloInstruction& dot) { if (dot.opcode() == HloOpcode::kDot && dot.shape().dimensions_size() == 2) { const Shape& result_shape = dot.shape(); @@ -178,5 +180,16 @@ DotInLlvmIrProfitable ProfitableToImplementDotInLlvmIr( return DotInLlvmIrProfitable::kNo; } +bool ProfitableToImplementDotInTiledLlvmIr(const HloInstruction& dot) { + // Any Matrix-Vector product of floating point or integral type, or + // a transpose-dot fusion of the same can be lowered to a tiled LLVM + // IR implementation. + const Shape& shape = dot.shape(); + return shape.dimensions_size() == 2 && + (shape.dimensions(0) == 1 || shape.dimensions(1) == 1) && + (primitive_util::IsFloatingPointType(shape.element_type()) || + primitive_util::IsIntegralType(shape.element_type())); +} + } // namespace cpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h index 66656ed997..cbe07a7c2b 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h @@ -29,16 +29,21 @@ bool PotentiallyImplementedAsEigenDot(const HloInstruction& dot); enum class DotInLlvmIrProfitable { kYes, kNo, kWithColumnMajorRhs }; // Returns a value to indicate if (and under what conditions) will lowering -// |dot| as a pure LLVM IR dot operation be profitable over calling into Eigen. -// Possible return values are: +// |dot| as a untiled LLVM IR dot operation be profitable over calling into +// Eigen or emitting a tiled LLVM IR implementation. Possible return values +// are: // // * DotInLlvmIrProfitable::kYes - always profitable. // * DotInLlvmIrProfitable::kNo - never profitable. // * DotInLlvmIrProfitable::kWithColumnMajorRhs - only if we can manage to make // the Rhs layout column major. -DotInLlvmIrProfitable ProfitableToImplementDotInLlvmIr( +DotInLlvmIrProfitable ProfitableToImplementDotInUntiledLlvmIr( const HloInstruction& dot); +// Returns true to indicate that we can generate a tiled LLVM IR implementation +// for |dot|. +bool ProfitableToImplementDotInTiledLlvmIr(const HloInstruction& dot); + } // namespace cpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/layout_assignment.cc b/tensorflow/compiler/xla/service/cpu/layout_assignment.cc index c446b6b792..b75ca34e0a 100644 --- a/tensorflow/compiler/xla/service/cpu/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/cpu/layout_assignment.cc @@ -51,7 +51,7 @@ Status CpuLayoutAssignment::AddBackendConstraints( tensorflow::gtl::FlatMap should_make_rhs_col_major_cache; auto should_make_rhs_col_major = [&](const HloInstruction& instruction) { - if (ProfitableToImplementDotInLlvmIr(instruction) != + if (ProfitableToImplementDotInUntiledLlvmIr(instruction) != DotInLlvmIrProfitable::kWithColumnMajorRhs) { return false; } @@ -68,7 +68,7 @@ Status CpuLayoutAssignment::AddBackendConstraints( bool result = std::all_of( rhs->users().begin(), rhs->users().end(), [&](HloInstruction* user) { - return ProfitableToImplementDotInLlvmIr(*user) == + return ProfitableToImplementDotInUntiledLlvmIr(*user) == DotInLlvmIrProfitable::kWithColumnMajorRhs && user->operand(0) != rhs; }); diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD index 075d4a1ab5..8f24bb1718 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/BUILD +++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD @@ -155,6 +155,30 @@ cc_library( ], ) +cc_library( + name = "vector_support_library", + srcs = ["vector_support_library.cc"], + hdrs = ["vector_support_library.h"], + deps = [ + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", + "@llvm//:core", + ], +) + +cc_library( + name = "kernel_support_library", + srcs = ["kernel_support_library.cc"], + hdrs = ["kernel_support_library.h"], + deps = [ + ":llvm_loop", + "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", + "//tensorflow/core:lib", + "@llvm//:core", + ], +) + # ----------------------------------------------------------------------------- filegroup( diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc new file mode 100644 index 0000000000..123a327d4d --- /dev/null +++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc @@ -0,0 +1,63 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h" + +#include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h" + +namespace xla { +void KernelSupportLibrary::For( + tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, + llvm::Value* step, + const std::function& for_body_generator) { + If(ir_builder_->CreateICmpSLT(start, end), [&]() { + for_body_generator(start, /*is_first_iteration=*/true); + For(name, ir_builder_->CreateAdd(start, step), end, step, + [&](llvm::Value* iv) { for_body_generator(iv, false); }); + }); +} + +void KernelSupportLibrary::For( + tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, + llvm::Value* step, bool peel_first_iteration, + const std::function& for_body_generator) { + if (peel_first_iteration) { + For(name, start, end, step, true, + [&](llvm::Value* indvar, bool is_first_iteration) { + for_body_generator(indvar, ir_builder_->getInt1(is_first_iteration)); + }); + } else { + std::unique_ptr loop = llvm_ir::ForLoop::EmitForLoop( + name, start, end, step, ir_builder_, prevent_unrolling_); + ir_builder_->SetInsertPoint(&loop->GetBodyBasicBlock()->back()); + for_body_generator(loop->GetIndVarValue(), + /*is_first_iteration=*/ir_builder_->CreateICmpEQ( + loop->GetIndVarValue(), start)); + llvm_ir::SetToLastInsertPoint(loop->GetExitBasicBlock(), ir_builder_); + } +} + +void KernelSupportLibrary::If( + llvm::Value* condition, const std::function& true_block_generator, + const std::function& false_block_generator) { + llvm_ir::LlvmIfData if_data = + llvm_ir::EmitIfThenElse(condition, "", ir_builder_); + ir_builder_->SetInsertPoint(&if_data.true_block->back()); + true_block_generator(); + ir_builder_->SetInsertPoint(&if_data.false_block->back()); + false_block_generator(); + llvm_ir::SetToLastInsertPoint(if_data.after_block, ir_builder_); +} +} // namespace xla diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h new file mode 100644 index 0000000000..25aa2291a6 --- /dev/null +++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h @@ -0,0 +1,124 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_KERNEL_SUPPORT_LIBRARY_H_ +#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_KERNEL_SUPPORT_LIBRARY_H_ + +#include + +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Value.h" +#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" +#include "tensorflow/core/lib/core/stringpiece.h" + +namespace xla { +// A thin wrapper around llvm_loop.h to make code generating structured control +// flow more readable. +class KernelSupportLibrary { + public: + // `ir_builder` is the llvm::IRBuilder instance used to generate LLVM IR. + // If `prevent_unrolling` is true then unrolling is explicitly disabled on + // every loop generated by this instance of KernelSupportLibrary. + explicit KernelSupportLibrary(llvm::IRBuilder<>* ir_builder, + bool prevent_unrolling = true) + : ir_builder_(ir_builder), prevent_unrolling_(prevent_unrolling) {} + + // Generates the following control flow structure: + // + // if (`start` < `end`) { + // `for_body_generator(/*ind_var=*/start, /*is_first_iteration=*/true)`; + // for (i64 i = `start` + `step`; i s< `end`; i += `step`) + // `for_body_generator(/*ind_var=*/,i, /*is_first_iteration=*/false)`; + // } + void For( + tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, + llvm::Value* step, + const std::function& + for_body_generator); + + void For( + tensorflow::StringPiece name, int64 start, int64 end, int64 step, + const std::function& + for_body_generator) { + For(name, /*start=*/ir_builder_->getInt64(start), + /*end=*/ir_builder_->getInt64(end), + /*step=*/ir_builder_->getInt64(step), for_body_generator); + } + + // Generates the following control flow structure if `peel_first_iteration` is + // true: + // + // if (`start` < `end`) { + // `for_body_generator(/*ind_var=*/start, /*is_first_iteration=*/,true)`; + // for (i64 i = `start` + `step`; i s< `end`; i += `step`) + // `for_body_generator(/*ind_var=*/,i, /*is_first_iteration=*/,false)`; + // } + // + // and the following if `peel_first_iteration` is false: + // + // for (i64 i = `start`; i s< `end`; i += `step`) + // `for_body_generator(/*ind_var=*/,i, + // /*is_first_iteration=*/,(i != `start`))`; + void For(tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, + llvm::Value* step, bool peel_first_iteration, + const std::function& + for_body_generator); + + void For(tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, + int64 step, bool peel_first_iteration, + const std::function& + for_body_generator) { + For(name, /*start=*/start, /*end=*/end, + /*step=*/ir_builder_->getInt64(step), peel_first_iteration, + for_body_generator); + } + + void For( + tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, + llvm::Value* step, + const std::function& for_body_generator) { + For(name, start, end, step, + /*peel_first_iteration=*/false, + [&](llvm::Value* indvar, llvm::Value*) { for_body_generator(indvar); }); + } + + void For( + tensorflow::StringPiece name, int64 start, int64 end, int64 step, + const std::function& for_body_generator) { + For(name, /*start=*/ir_builder_->getInt64(start), + /*end=*/ir_builder_->getInt64(end), + /*step=*/ir_builder_->getInt64(step), for_body_generator); + } + + // Generates the following control flow structure: + // + // if (`condition`) + // `true_block_generator()`; + // else + // `false_block_generator()`; + void If(llvm::Value* condition, + const std::function& true_block_generator, + const std::function& false_block_generator = []() {}); + + private: + llvm::IRBuilder<>* ir_builder_; + bool prevent_unrolling_; +}; +} // namespace xla + +#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_KERNEL_SUPPORT_LIBRARY_H_ diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc index 956c0d5f05..d95409e399 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc @@ -537,6 +537,14 @@ void SetToFirstInsertPoint(llvm::BasicBlock* blk, llvm::IRBuilder<>* builder) { builder->SetInsertPoint(blk, blk->getFirstInsertionPt()); } +void SetToLastInsertPoint(llvm::BasicBlock* blk, llvm::IRBuilder<>* builder) { + if (llvm::Instruction* terminator = blk->getTerminator()) { + builder->SetInsertPoint(terminator); + } else { + builder->SetInsertPoint(blk); + } +} + llvm::Value* CreateRor(llvm::Value* rotand, llvm::Value* rotor, llvm::IRBuilder<>* builder) { auto size = rotand->getType()->getPrimitiveSizeInBits(); diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h index 304192b58e..f70d9f88b3 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h @@ -243,6 +243,8 @@ llvm::Instruction* AddRangeMetadata(int64 lower, int64 upper, void SetToFirstInsertPoint(llvm::BasicBlock* blk, llvm::IRBuilder<>* builder); +void SetToLastInsertPoint(llvm::BasicBlock* blk, llvm::IRBuilder<>* builder); + // Create a bitwise rotation of `rotand` by `rotor`. llvm::Value* CreateRor(llvm::Value* rotand, llvm::Value* rotor, llvm::IRBuilder<>* builder); diff --git a/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc b/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc new file mode 100644 index 0000000000..e8c6a83618 --- /dev/null +++ b/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc @@ -0,0 +1,150 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h" + +#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" + +namespace xla { +VectorSupportLibrary::VectorSupportLibrary(PrimitiveType primitive_type, + int64 vector_size, + llvm::IRBuilder<>* ir_builder, + std::string name) + : vector_size_(vector_size), + primitive_type_(primitive_type), + ir_builder_(ir_builder), + name_(std::move(name)) { + scalar_type_ = llvm_ir::PrimitiveTypeToIrType( + primitive_type, ir_builder_->GetInsertBlock()->getModule()); + scalar_pointer_type_ = llvm::PointerType::getUnqual(scalar_type_); + vector_type_ = llvm::VectorType::get(scalar_type_, vector_size); + vector_pointer_type_ = llvm::PointerType::getUnqual(vector_type_); +} + +llvm::Value* VectorSupportLibrary::Mul(llvm::Value* lhs, llvm::Value* rhs) { + if (scalar_type_->isFloatingPointTy()) { + return ir_builder()->CreateFMul(lhs, rhs, name()); + } else { + return ir_builder()->CreateMul(lhs, rhs, name()); + } +} + +llvm::Value* VectorSupportLibrary::Add(llvm::Value* lhs, llvm::Value* rhs) { + if (scalar_type_->isFloatingPointTy()) { + return ir_builder()->CreateFAdd(lhs, rhs, name()); + } else { + return ir_builder()->CreateAdd(lhs, rhs, name()); + } +} + +llvm::Value* VectorSupportLibrary::ComputeOffsetPointer( + llvm::Value* base_pointer, llvm::Value* offset_elements) { + if (base_pointer->getType() != scalar_pointer_type()) { + base_pointer = ir_builder()->CreateBitCast(base_pointer, + scalar_pointer_type(), name()); + } + return ir_builder()->CreateInBoundsGEP(base_pointer, {offset_elements}, + name()); +} + +llvm::Value* VectorSupportLibrary::LoadVector(llvm::Value* pointer) { + if (pointer->getType() != vector_pointer_type()) { + pointer = + ir_builder()->CreateBitCast(pointer, vector_pointer_type(), name()); + } + return ir_builder()->CreateAlignedLoad( + pointer, ShapeUtil::ByteSizeOfPrimitiveType(primitive_type_), name()); +} + +llvm::Value* VectorSupportLibrary::LoadScalar(llvm::Value* pointer) { + if (pointer->getType() != scalar_pointer_type()) { + pointer = + ir_builder()->CreateBitCast(pointer, scalar_pointer_type(), name()); + } + return ir_builder()->CreateAlignedLoad( + pointer, ShapeUtil::ByteSizeOfPrimitiveType(primitive_type_), name()); +} + +void VectorSupportLibrary::StoreVector(llvm::Value* value, + llvm::Value* pointer) { + if (pointer->getType() != vector_pointer_type()) { + pointer = ir_builder()->CreateBitCast(pointer, vector_pointer_type()); + } + ir_builder()->CreateAlignedStore( + value, pointer, ShapeUtil::ByteSizeOfPrimitiveType(primitive_type_)); +} + +void VectorSupportLibrary::StoreScalar(llvm::Value* value, + llvm::Value* pointer) { + if (pointer->getType() != scalar_pointer_type()) { + pointer = + ir_builder()->CreateBitCast(pointer, scalar_pointer_type(), name()); + } + ir_builder()->CreateAlignedStore( + value, pointer, ShapeUtil::ByteSizeOfPrimitiveType(primitive_type_)); +} + +llvm::Value* VectorSupportLibrary::LoadBroadcast(llvm::Value* pointer) { + if (pointer->getType() != scalar_pointer_type()) { + pointer = + ir_builder()->CreateBitCast(pointer, scalar_pointer_type(), name()); + } + return ir_builder()->CreateVectorSplat( + vector_size(), ir_builder()->CreateLoad(pointer), name()); +} + +llvm::Value* VectorSupportLibrary::AddReduce(llvm::Value* vector) { + llvm::SmallVector mask(vector_size(), nullptr); + for (unsigned i = vector_size(); i != 1; i >>= 1) { + // On every iteration, we shuffle half of the remaining lanes to the top + // half of shuffle, and add two old and the new vector. + + for (unsigned j = 0; j < vector_size(); ++j) { + if (j < (i / 2)) { + mask[j] = ir_builder()->getInt32(i / 2 + j); + } else { + mask[j] = llvm::UndefValue::get(ir_builder()->getInt32Ty()); + } + } + + llvm::Value* half_remaining_lanes = ir_builder()->CreateShuffleVector( + vector, llvm::UndefValue::get(vector_type()), + llvm::ConstantVector::get(mask), ""); + vector = Add(vector, half_remaining_lanes); + } + + return ir_builder()->CreateExtractElement(vector, ir_builder()->getInt32(0), + name()); +} + +llvm::Value* VectorSupportLibrary::GetZeroVector() { + return llvm::Constant::getNullValue(vector_type()); +} + +llvm::Value* VectorSupportLibrary::GetZeroScalar() { + return llvm::Constant::getNullValue(scalar_type()); +} + +LlvmVariable::LlvmVariable(llvm::Type* type, llvm::IRBuilder<>* ir_builder) + : ir_builder_(ir_builder) { + alloca_ = llvm_ir::EmitAllocaAtFunctionEntry(type, "", ir_builder_); +} + +llvm::Value* LlvmVariable::Get() { return ir_builder_->CreateLoad(alloca_); } + +void LlvmVariable::Set(llvm::Value* new_value) { + ir_builder_->CreateStore(new_value, alloca_); +} +} // namespace xla diff --git a/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h b/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h new file mode 100644 index 0000000000..3072677ab0 --- /dev/null +++ b/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h @@ -0,0 +1,174 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_VECTOR_SUPPORT_LIBRARY_H_ +#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_VECTOR_SUPPORT_LIBRARY_H_ + +#include + +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Value.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" + +namespace xla { +// A thin wrapper around llvm_util.h to make code generating vector math flow +// more readable. +class VectorSupportLibrary { + public: + // This VectorSupportLibrary instance remembers `primitive_type` and + // `vector_size`, and these are implicitly used by the methods on this + // instance (i.e. LoadVector will load a vector of type <`vector_size` x + // `primitive_type`>). + VectorSupportLibrary(PrimitiveType primitive_type, int64 vector_size, + llvm::IRBuilder<>* ir_builder, std::string name); + + llvm::Value* Mul(llvm::Value* lhs, llvm::Value* rhs); + llvm::Value* Mul(int64 lhs, llvm::Value* rhs) { + return Mul(ir_builder()->getInt64(lhs), rhs); + } + + llvm::Value* Add(llvm::Value* lhs, llvm::Value* rhs); + llvm::Value* Add(int64 lhs, llvm::Value* rhs) { + return Add(ir_builder()->getInt64(lhs), rhs); + } + + llvm::Value* MulAdd(llvm::Value* a, llvm::Value* b, llvm::Value* c) { + return Add(c, Mul(a, b)); + } + + llvm::Value* ComputeOffsetPointer(llvm::Value* base_pointer, + llvm::Value* offset_elements); + llvm::Value* ComputeOffsetPointer(llvm::Value* base_pointer, + int64 offset_elements) { + return ComputeOffsetPointer(base_pointer, + ir_builder()->getInt64(offset_elements)); + } + + llvm::Value* LoadVector(llvm::Value* pointer); + + llvm::Value* LoadVector(llvm::Value* base_pointer, + llvm::Value* offset_elements) { + return LoadVector(ComputeOffsetPointer(base_pointer, offset_elements)); + } + + llvm::Value* LoadVector(llvm::Value* base_pointer, int64 offset_elements) { + return LoadVector(base_pointer, ir_builder()->getInt64(offset_elements)); + } + + llvm::Value* LoadScalar(llvm::Value* pointer); + + llvm::Value* LoadScalar(llvm::Value* base_pointer, + llvm::Value* offset_elements) { + return LoadScalar(ComputeOffsetPointer(base_pointer, offset_elements)); + } + + llvm::Value* LoadScalar(llvm::Value* base_pointer, int64 offset_elements) { + return LoadScalar(base_pointer, ir_builder()->getInt64(offset_elements)); + } + + void StoreVector(llvm::Value* value, llvm::Value* pointer); + + void StoreVector(llvm::Value* value, llvm::Value* base_pointer, + llvm::Value* offset_elements) { + StoreVector(value, ComputeOffsetPointer(base_pointer, offset_elements)); + } + + void StoreVector(llvm::Value* value, llvm::Value* base_pointer, + int64 offset_elements) { + StoreVector(value, base_pointer, ir_builder()->getInt64(offset_elements)); + } + + void StoreScalar(llvm::Value* value, llvm::Value* pointer); + void StoreScalar(llvm::Value* value, llvm::Value* base_pointer, + llvm::Value* offset_elements) { + StoreScalar(value, ComputeOffsetPointer(base_pointer, offset_elements)); + } + + void StoreScalar(llvm::Value* value, llvm::Value* base_pointer, + int64 offset_elements) { + StoreScalar(base_pointer, ir_builder()->getInt64(offset_elements)); + } + + llvm::Value* LoadBroadcast(llvm::Value* pointer); + llvm::Value* LoadBroadcast(llvm::Value* base_pointer, + llvm::Value* offset_elements) { + return LoadBroadcast(ComputeOffsetPointer(base_pointer, offset_elements)); + } + llvm::Value* LoadBroadcast(llvm::Value* base_pointer, int64 offset_elements) { + return LoadBroadcast(base_pointer, ir_builder()->getInt64(offset_elements)); + } + + llvm::Value* AddReduce(llvm::Value* vector); + + llvm::Value* GetZeroVector(); + llvm::Value* GetZeroScalar(); + + llvm::IRBuilder<>* ir_builder() const { return ir_builder_; } + int64 vector_size() const { return vector_size_; } + llvm::Type* vector_type() const { return vector_type_; } + llvm::Type* vector_pointer_type() const { return vector_pointer_type_; } + llvm::Type* scalar_type() const { return scalar_type_; } + llvm::Type* scalar_pointer_type() const { return scalar_pointer_type_; } + + const std::string& name() const { return name_; } + + private: + int64 vector_size_; + PrimitiveType primitive_type_; + llvm::IRBuilder<>* ir_builder_; + llvm::Type* vector_type_; + llvm::Type* vector_pointer_type_; + llvm::Type* scalar_type_; + llvm::Type* scalar_pointer_type_; + std::string name_; +}; + +// This wraps an alloca-backed stack variable which LLVM's SSA construction pass +// can later convert to a SSA value. +class LlvmVariable { + public: + LlvmVariable(llvm::Type*, llvm::IRBuilder<>* ir_builder); + + llvm::Value* Get(); + void Set(llvm::Value* new_value); + + private: + llvm::AllocaInst* alloca_; + llvm::IRBuilder<>* ir_builder_; +}; + +class VectorVariable : public LlvmVariable { + public: + VectorVariable(VectorSupportLibrary* vector_support, + llvm::Value* initial_value) + : LlvmVariable(vector_support->vector_type(), + vector_support->ir_builder()) { + Set(initial_value); + } +}; + +class ScalarVariable : public LlvmVariable { + public: + ScalarVariable(VectorSupportLibrary* vector_support, + llvm::Value* initial_value) + : LlvmVariable(vector_support->scalar_type(), + vector_support->ir_builder()) { + Set(initial_value); + } +}; +} // namespace xla + +#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_VECTOR_SUPPORT_LIBRARY_H_ diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index cf089d748d..c4e422b506 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -277,6 +277,62 @@ XLA_TEST_F(DotOperationTest, MatrixDotF32_260_3_520_MinorToMajorFF) { TestMatrixDot(260, 3, 520, false, false); } +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x8x8) { + TestMatrixDot(1, 8, 8, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x130x8) { + TestMatrixDot(1, 130, 8, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x8x130) { + TestMatrixDot(1, 8, 130, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x290x130) { + TestMatrixDot(1, 290, 130, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_2x1x1) { + TestMatrixDot(2, 1, 1, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_8x8x1) { + TestMatrixDot(8, 8, 1, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_16x1x1) { + TestMatrixDot(16, 1, 1, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_16x3x1) { + TestMatrixDot(16, 3, 1, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_3x3x1) { + TestMatrixDot(3, 3, 1, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_29x29x1) { + TestMatrixDot(29, 29, 1, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x8x2) { + TestMatrixDot(1, 8, 2, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x2x8) { + TestMatrixDot(1, 2, 8, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_259x258x1) { + TestMatrixDot(259, 258, 1, true, true); +} + +XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_259x258x1_FT) { + TestMatrixDot(259, 258, 1, false, true); +} + XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorFF) { constexpr bool kLhsRowMajor = false; constexpr bool kRhsRowMajor = false; @@ -361,6 +417,30 @@ XLA_TEST_F(DotOperationTest, NonsquareMatrixDotC64) { TestNonsquareMatrixDot(); } +XLA_TEST_F(DotOperationTest, MatrixVectorC64) { + auto lhs_handle = + client_ + ->TransferToServer(*test_utils::CreateR2LiteralWithLayout( + {{1.0, 2.0, 3.0, -4.0}}, {1, 0})) + .ConsumeValueOrDie(); + auto rhs_handle = + client_ + ->TransferToServer(*test_utils::CreateR2LiteralWithLayout( + {{1.0, 1.0}, {2.0, 2.0}, {3.0, 3.0}, {-4.0, 4.0}}, {1, 0})) + .ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto prim_type = primitive_util::NativeToPrimitiveType(); + auto result = builder.Dot( + builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {1, 4}), "lhs"), + builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {4, 2}), "rhs")); + + Array2D expected({{30.0, -2.0}}); + + ComputeAndCompareR2( + &builder, expected, {lhs_handle.get(), rhs_handle.get()}, error_spec_); +} + XLA_TEST_F(DotOperationTest, ConcurrentMatMul) { ComputationBuilder builder(client_, TestName()); auto matrix1 = builder.ConstantR2({{1.0, 2.0}, {3.0, 4.0}}); -- GitLab From 845ac0cc752167e98172e65b8c12221d2e939061 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 11:35:53 -0800 Subject: [PATCH 0188/1801] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 175037663 --- tensorflow/go/op/wrappers.go | 56 ++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index bdfad48567..eb79da5384 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -62,6 +62,29 @@ func WriteScalarSummary(scope *Scope, writer tf.Output, global_step tf.Output, t return scope.AddOperation(opspec) } +// Outputs a `tf.Event` protocol buffer. +// +// When CreateSummaryDbWriter is being used, this op can be useful for +// importing data from event logs. +// +// Arguments: +// writer: A handle to a summary writer. +// event: A string containing a binary-encoded tf.Event proto. +// +// Returns the created operation. +func ImportEvent(scope *Scope, writer tf.Output, event tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ImportEvent", + Input: []tf.Input{ + writer, event, + }, + } + return scope.AddOperation(opspec) +} + // Outputs a `Summary` protocol buffer with a tensor. // // Arguments: @@ -22454,6 +22477,39 @@ func QuantizedBiasAdd(scope *Scope, input tf.Output, bias tf.Output, min_input t return op.Output(0), op.Output(1), op.Output(2) } +// Creates summary database writer accessible by given resource handle. +// +// This can be used to write tensors from the execution graph directly +// to a database. Only SQLite is supported right now. This function +// will create the schema if it doesn't exist. Entries in the Users, +// Experiments, and Runs tables will be created automatically if they +// don't already exist. +// +// Arguments: +// writer: Handle to SummaryWriter resource to overwrite. +// db_uri: For example "file:/tmp/foo.sqlite". +// experiment_name: Can't contain ASCII control characters or <>. Case +// sensitive. If empty, then the Run will not be associated with any +// Experiment. +// run_name: Can't contain ASCII control characters or <>. Case sensitive. +// If empty, then each Tag will not be associated with any Run. +// user_name: Must be valid as both a DNS label and Linux username. If +// empty, then the Experiment will not be associated with any User. +// +// Returns the created operation. +func CreateSummaryDbWriter(scope *Scope, writer tf.Output, db_uri tf.Output, experiment_name tf.Output, run_name tf.Output, user_name tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "CreateSummaryDbWriter", + Input: []tf.Input{ + writer, db_uri, experiment_name, run_name, user_name, + }, + } + return scope.AddOperation(opspec) +} + // HistogramFixedWidthAttr is an optional argument to HistogramFixedWidth. type HistogramFixedWidthAttr func(optionalAttr) -- GitLab From 0efd77cb299e50a863edca7cfbed99585e10a12a Mon Sep 17 00:00:00 2001 From: Michael Case Date: Wed, 8 Nov 2017 11:54:14 -0800 Subject: [PATCH 0189/1801] Having with_gcp_support and windows causes build error. Multiple statements in a select statement should not be able to be true at the same time (unless one rule is more 'specific' than another). PiperOrigin-RevId: 175040618 --- tensorflow/BUILD | 91 ++++++++++++++++--- .../core/platform/default/build_config.bzl | 21 +++-- 2 files changed, 91 insertions(+), 21 deletions(-) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index f2cdf37dbf..5a408db94e 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -110,7 +110,7 @@ config_setting( config_setting( name = "no_tensorflow_py_deps", - values = {"define": "no_tensorflow_py_deps=true"}, + define_values = {"no_tensorflow_py_deps": "true"}, visibility = ["//visibility:public"], ) @@ -166,55 +166,116 @@ config_setting( # TODO(jhseu): Enable on other platforms other than Linux. config_setting( name = "with_jemalloc_linux_x86_64", - values = { - "cpu": "k8", - "define": "with_jemalloc=true", - }, + define_values = {"with_jemalloc": "true"}, + values = {"cpu": "k8"}, visibility = ["//visibility:public"], ) config_setting( name = "with_jemalloc_linux_ppc64le", - values = { - "cpu": "ppc", - "define": "with_jemalloc=true", - }, + define_values = {"with_jemalloc": "true"}, + values = {"cpu": "ppc"}, visibility = ["//visibility:public"], ) config_setting( name = "with_gcp_support", - values = {"define": "with_gcp_support=true"}, + define_values = {"with_gcp_support": "true"}, visibility = ["//visibility:public"], ) config_setting( name = "with_hdfs_support", - values = {"define": "with_hdfs_support=true"}, + define_values = {"with_hdfs_support": "true"}, visibility = ["//visibility:public"], ) config_setting( name = "with_s3_support", - values = {"define": "with_s3_support=true"}, + define_values = {"with_s3_support": "true"}, + visibility = ["//visibility:public"], +) + +# Crosses between platforms and file system libraries not supported on those +# platforms due to limitations in nested select() statements. +config_setting( + name = "with_gcp_support_windows_override", + define_values = {"with_gcp_support": "true"}, + values = {"cpu": "x64_windows"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "with_hdfs_support_windows_override", + define_values = {"with_hdfs_support": "true"}, + values = {"cpu": "x64_windows"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "with_s3_support_windows_override", + define_values = {"with_s3_support": "true"}, + values = {"cpu": "x64_windows"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "with_gcp_support_android_override", + define_values = {"with_gcp_support": "true"}, + values = {"crosstool_top": "//external:android/crosstool"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "with_hdfs_support_android_override", + define_values = {"with_hdfs_support": "true"}, + values = {"crosstool_top": "//external:android/crosstool"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "with_s3_support_android_override", + define_values = {"with_s3_support": "true"}, + values = {"crosstool_top": "//external:android/crosstool"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "with_gcp_support_ios_override", + define_values = {"with_gcp_support": "true"}, + values = {"crosstool_top": "//tools/osx/crosstool:crosstool"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "with_hdfs_support_ios_override", + define_values = {"with_hdfs_support": "true"}, + values = {"crosstool_top": "//tools/osx/crosstool:crosstool"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "with_s3_support_ios_override", + define_values = {"with_s3_support": "true"}, + values = {"crosstool_top": "//tools/osx/crosstool:crosstool"}, visibility = ["//visibility:public"], ) config_setting( name = "with_xla_support", - values = {"define": "with_xla_support=true"}, + define_values = {"with_xla_support": "true"}, visibility = ["//visibility:public"], ) config_setting( name = "with_gdr_support", - values = {"define": "with_gdr_support=true"}, + define_values = {"with_gdr_support": "true"}, visibility = ["//visibility:public"], ) config_setting( name = "with_verbs_support", - values = {"define": "with_verbs_support=true"}, + define_values = {"with_verbs_support": "true"}, visibility = ["//visibility:public"], ) diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 6225c2c705..5eeb861bdd 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -458,16 +458,25 @@ def tf_additional_lib_deps(): def tf_additional_core_deps(): return select({ + "//tensorflow:with_gcp_support_windows_override": [], + "//tensorflow:with_gcp_support_android_override": [], + "//tensorflow:with_gcp_support_ios_override": [], "//tensorflow:with_gcp_support": [ "//tensorflow/core/platform/cloud:gcs_file_system", ], "//conditions:default": [], }) + select({ + "//tensorflow:with_hdfs_support_windows_override": [], + "//tensorflow:with_hdfs_support_android_override": [], + "//tensorflow:with_hdfs_support_ios_override": [], "//tensorflow:with_hdfs_support": [ "//tensorflow/core/platform/hadoop:hadoop_file_system", ], "//conditions:default": [], }) + select({ + "//tensorflow:with_s3_support_windows_override": [], + "//tensorflow:with_s3_support_android_override": [], + "//tensorflow:with_s3_support_ios_override": [], "//tensorflow:with_s3_support": [ "//tensorflow/core/platform/s3:s3_file_system", ], @@ -477,9 +486,9 @@ def tf_additional_core_deps(): # TODO(jart, jhseu): Delete when GCP is default on. def tf_additional_cloud_op_deps(): return select({ - "//tensorflow:windows": [], - "//tensorflow:android": [], - "//tensorflow:ios": [], + "//tensorflow:with_gcp_support_windows_override": [], + "//tensorflow:with_gcp_support_android_override": [], + "//tensorflow:with_gcp_support_ios_override": [], "//tensorflow:with_gcp_support": [ "//tensorflow/contrib/cloud:bigquery_reader_ops_op_lib", ], @@ -489,9 +498,9 @@ def tf_additional_cloud_op_deps(): # TODO(jart, jhseu): Delete when GCP is default on. def tf_additional_cloud_kernel_deps(): return select({ - "//tensorflow:windows": [], - "//tensorflow:android": [], - "//tensorflow:ios": [], + "//tensorflow:with_gcp_support_windows_override": [], + "//tensorflow:with_gcp_support_android_override": [], + "//tensorflow:with_gcp_support_ios_override": [], "//tensorflow:with_gcp_support": [ "//tensorflow/contrib/cloud/kernels:bigquery_reader_ops", ], -- GitLab From 87505c44571d149d4e57571be1088b9d1aa2c6fe Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 12:03:44 -0800 Subject: [PATCH 0190/1801] Supports logits as a Tensor in MultiHead. PiperOrigin-RevId: 175042091 --- tensorflow/contrib/estimator/BUILD | 5 +- .../estimator/python/estimator/multi_head.py | 67 +++++-- .../python/estimator/multi_head_test.py | 188 +++++++++++++++++- 3 files changed, 244 insertions(+), 16 deletions(-) diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index 6eb2cfdaca..bc67ef8354 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -204,10 +204,13 @@ py_library( ], srcs_version = "PY2AND3", deps = [ + "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", "//tensorflow/python:framework_ops", "//tensorflow/python:math_ops", + "//tensorflow/python:summary", "//tensorflow/python/estimator:head", + "//tensorflow/python/estimator:metric_keys", "//tensorflow/python/estimator:model_fn", "//tensorflow/python/saved_model:signature_constants", "@six_archive//:six", @@ -229,7 +232,7 @@ py_test( "//tensorflow/python:string_ops", "//tensorflow/python/estimator:metric_keys", "//tensorflow/python/estimator:model_fn", - "//tensorflow/python/ops/losses", + "//tensorflow/python/estimator:prediction_keys", "//tensorflow/python/saved_model:signature_constants", "//third_party/py/numpy", "@six_archive//:six", diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head.py b/tensorflow/contrib/estimator/python/estimator/multi_head.py index 69dbfcee62..73bae5acf9 100644 --- a/tensorflow/contrib/estimator/python/estimator/multi_head.py +++ b/tensorflow/contrib/estimator/python/estimator/multi_head.py @@ -22,10 +22,13 @@ import six from tensorflow.python.estimator import model_fn from tensorflow.python.estimator.canned import head as head_lib +from tensorflow.python.estimator.canned import metric_keys from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.saved_model import signature_constants +from tensorflow.python.summary import summary _DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY @@ -72,6 +75,23 @@ def multi_head(heads, head_weights=None): estimator.train(input_fn=input_fn, steps=100) ``` + Also supports `logits` as a `Tensor` of shape + `[D0, D1, ... DN, logits_dimension]`. It will split the `Tensor` along the + last dimension and distribute it appropriately among the heads. E.g.: + + ```python + def model_fn(features, labels, mode): + # Create simple heads and specify head name. + head1 = multi_class_head(n_classes=3, name='head1') + head2 = binary_classification_head(name='head2') + # Create multi-head from two simple heads. + head = multi_head([head1, head2]) + # Create logits for the multihead. + logits = logit_fn(logits_dimension=head.logits_dimension) + # Return the merged EstimatorSpec + return head.create_estimator_spec(..., logits=logits, ...) + ``` + Args: heads: List or tuple of `_Head` instances. All heads must have `name` specified. The first head in the list is the default used at serving time. @@ -161,18 +181,17 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access def create_loss(self, features, mode, logits, labels): """See `Head`.""" - # TODO(roumposg): Add support for logits as single Tensor (with - # _split_logits utility). - if not isinstance(logits, dict): - raise ValueError('logits must be a dict. Single Tensor support coming ' - 'soon.') + if isinstance(logits, dict): + logits_dict = logits + else: + logits_dict = self._split_logits(logits) weighted_sum_losses = [] example_weight_sums = [] labels_by_head = {} for head in self._heads: (weighted_sum_loss, example_weight_sum, processed_labels) = head.create_loss( - features, mode, logits[head.name], labels[head.name]) + features, mode, logits_dict[head.name], labels[head.name]) weighted_sum_losses.append(weighted_sum_loss) example_weight_sums.append(example_weight_sum) labels_by_head[head.name] = processed_labels @@ -205,10 +224,10 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access def create_estimator_spec( self, features, mode, logits, labels=None, train_op_fn=None): """See `_Head`.""" - # TODO(roumposg): Add support for logits as single Tensor (with - # _split_logits utility). - if not isinstance(logits, dict): - raise ValueError('logits must be a dict. Given: {}'.format(logits)) + if isinstance(logits, dict): + logits_dict = logits + else: + logits_dict = self._split_logits(logits) if labels and not isinstance(labels, dict): raise ValueError('labels must be a dict. Given: {}'.format(labels)) @@ -219,22 +238,42 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access head.create_estimator_spec( features=features, mode=mode, - logits=logits[head_name], + logits=logits_dict[head_name], labels=labels[head_name] if labels else None, train_op_fn=_no_op_train_fn)) - # TODO(roumposg): Add LOSS and LOSS_MEAN summaries for the total head- - # combined loss. if mode == model_fn.ModeKeys.TRAIN: if train_op_fn is None: raise ValueError('train_op_fn can not be None in TRAIN mode.') - return self._merge_train(all_estimator_spec, train_op_fn) + spec = self._merge_train(all_estimator_spec, train_op_fn) + with ops.name_scope(''): + summary.scalar(metric_keys.MetricKeys.LOSS, spec.loss) + return spec if mode == model_fn.ModeKeys.PREDICT: return self._merge_predict(all_estimator_spec) if mode == model_fn.ModeKeys.EVAL: return self._merge_eval(all_estimator_spec) raise ValueError('mode={} unrecognized'.format(mode)) + def _split_logits(self, logits): + """Splits logits along the last dimension and returns a dict.""" + logits_dict = {} + with ops.name_scope(None, 'split_logits', values=[logits]): + logits = ops.convert_to_tensor(logits) + batch_shape = array_ops.shape(logits)[:-1] + zeros_like_batch_shape = array_ops.zeros_like(batch_shape) + minus_ones_like_batch_shape = -1 * array_ops.ones_like(batch_shape) + begin_idx = 0 + for head in self._heads: + begin_tensor = array_ops.concat( + [zeros_like_batch_shape, [begin_idx]], axis=0) + size_tensor = array_ops.concat( + [minus_ones_like_batch_shape, [head.logits_dimension]], axis=0) + logits_dict[head.name] = array_ops.slice( + logits, begin=begin_tensor, size=size_tensor) + begin_idx += head.logits_dimension + return logits_dict + def _merge_train(self, all_estimator_spec, train_op_fn): """Merges list of `EstimatorSpec` for training. diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py index 16177aebd5..8d51a298b2 100644 --- a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py @@ -106,7 +106,8 @@ class MultiHeadTest(test.TestCase): multi_head = multi_head_lib.multi_head([head1, head2]) self.assertEqual('head1_head2', multi_head.name) - def test_predict_two_heads(self): + def test_predict_two_heads_logits_dict(self): + """Tests predict with logits as dict.""" head1 = head_lib.multi_label_head(n_classes=2, name='head1') head2 = head_lib.multi_label_head(n_classes=3, name='head2') multi_head = multi_head_lib.multi_head([head1, head2]) @@ -158,6 +159,111 @@ class MultiHeadTest(test.TestCase): expected_probabilities['head2'], sess.run(spec.export_outputs['head2'].scores)) + def test_predict_two_heads_logits_tensor(self): + """Tests predict with logits as Tensor.""" + head1 = head_lib.multi_label_head(n_classes=2, name='head1') + head2 = head_lib.multi_label_head(n_classes=3, name='head2') + multi_head = multi_head_lib.multi_head([head1, head2]) + + logits = np.array( + [[-1., 1., 2., -2., 2.], [-1.5, 1., -3., 2., -2.]], dtype=np.float32) + expected_logits1 = np.array([[-1., 1.], [-1.5, 1.]], dtype=np.float32) + expected_logits2 = np.array([[2., -2., 2.], [-3., 2., -2.]], + dtype=np.float32) + expected_probabilities = { + 'head1': _sigmoid(expected_logits1), + 'head2': _sigmoid(expected_logits2), + } + + spec = multi_head.create_estimator_spec( + features={'x': np.array(((42,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.PREDICT, + logits=logits) + + self.assertItemsEqual( + (_DEFAULT_SERVING_KEY, 'head1', 'classification/head1', 'predict/head1', + 'head2', 'classification/head2', 'predict/head2'), + spec.export_outputs.keys()) + + # Assert predictions and export_outputs. + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + self.assertIsNone(spec.scaffold.summary_op) + predictions = sess.run(spec.predictions) + self.assertAllClose( + expected_logits1, + predictions[('head1', prediction_keys.PredictionKeys.LOGITS)]) + self.assertAllClose( + expected_logits2, + predictions[('head2', prediction_keys.PredictionKeys.LOGITS)]) + self.assertAllClose( + expected_probabilities['head1'], + predictions[('head1', prediction_keys.PredictionKeys.PROBABILITIES)]) + self.assertAllClose( + expected_probabilities['head2'], + predictions[('head2', prediction_keys.PredictionKeys.PROBABILITIES)]) + + self.assertAllClose( + expected_probabilities['head1'], + sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].scores)) + self.assertAllClose( + expected_probabilities['head1'], + sess.run(spec.export_outputs['head1'].scores)) + self.assertAllClose( + expected_probabilities['head2'], + sess.run(spec.export_outputs['head2'].scores)) + + def test_predict_two_heads_logits_tensor_multi_dim(self): + """Tests predict with multi-dimensional logits of shape [2, 2, 5].""" + head1 = head_lib.regression_head(label_dimension=2, name='head1') + head2 = head_lib.regression_head(label_dimension=3, name='head2') + multi_head = multi_head_lib.multi_head([head1, head2]) + + logits = np.array( + [[[-1., 1., 2., -2., 2.], [-1., 1., 2., -2., 2.]], + [[-1.5, 1., -3., 2., -2.], [-1.5, 1., -3., 2., -2.]]], + dtype=np.float32) + expected_logits1 = np.array( + [[[-1., 1.], [-1., 1.]], + [[-1.5, 1.], [-1.5, 1.]]], + dtype=np.float32) + expected_logits2 = np.array( + [[[2., -2., 2.], [2., -2., 2.]], + [[-3., 2., -2.], [-3., 2., -2.]]], + dtype=np.float32) + + spec = multi_head.create_estimator_spec( + features={'x': np.array(((42,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.PREDICT, + logits=logits) + + self.assertItemsEqual( + (_DEFAULT_SERVING_KEY, 'head1', 'regression/head1', 'predict/head1', + 'head2', 'regression/head2', 'predict/head2'), + spec.export_outputs.keys()) + + # Assert predictions and export_outputs. + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + self.assertIsNone(spec.scaffold.summary_op) + predictions = sess.run(spec.predictions) + self.assertAllClose( + expected_logits1, + predictions[('head1', prediction_keys.PredictionKeys.PREDICTIONS)]) + self.assertAllClose( + expected_logits2, + predictions[('head2', prediction_keys.PredictionKeys.PREDICTIONS)]) + + self.assertAllClose( + expected_logits1, + sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].value)) + self.assertAllClose( + expected_logits1, + sess.run(spec.export_outputs['head1'].value)) + self.assertAllClose( + expected_logits2, + sess.run(spec.export_outputs['head2'].value)) + def test_eval_two_heads_with_weights(self): head1 = head_lib.multi_label_head(n_classes=2, name='head1') head2 = head_lib.multi_label_head(n_classes=3, name='head2') @@ -284,6 +390,84 @@ class MultiHeadTest(test.TestCase): # example_weight_sum = 1 * (1 + 2) + 2 * (2 + 3) = 13 self.assertAllClose(13., example_weight_sum.eval(), rtol=tol, atol=tol) + def test_train_create_loss_logits_tensor(self): + """Tests create_loss with logits Tensor.""" + weights1 = np.array([[1.], [2.]], dtype=np.float32) + weights2 = np.array([[2.], [3.]]) + head1 = head_lib.multi_label_head(n_classes=2, name='head1', + weight_column='weights1') + head2 = head_lib.multi_label_head(n_classes=3, name='head2', + weight_column='weights2') + multi_head = multi_head_lib.multi_head( + [head1, head2], head_weights=[1., 2.]) + + logits = np.array([[-10., 10., 20., -20., 20.], + [-15., 10., -30., 20., -20.]], dtype=np.float32) + labels = { + 'head1': np.array([[1, 0], [1, 1]], dtype=np.int64), + 'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64), + } + weighted_sum_loss, example_weight_sum, _ = multi_head.create_loss( + features={ + 'x': np.array(((42,),), dtype=np.int32), + 'weights1': weights1, + 'weights2': weights2 + }, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels) + tol = 1e-3 + with self.test_session(): + # loss of the first head is [[(10 + 10) / 2], [(15 + 0) / 2]] + # = [10, 7.5] + # weighted_sum_loss = 1 * 10 + 2 * 7.5 = 25 + # loss of the second head is [[(20 + 20 + 20) / 3], [(30 + 0 + 0) / 3]] + # = [20, 10] + # weighted_sum_loss = 2 * 20 + 3 * 10 = 70 + # head-weighted merge = 1 * 25 + 2 * 70 = 165 + self.assertAllClose(165, weighted_sum_loss.eval(), rtol=tol, atol=tol) + # example_weight_sum = 1 * (1 + 2) + 2 * (2 + 3) = 13 + self.assertAllClose(13., example_weight_sum.eval(), rtol=tol, atol=tol) + + def test_train_create_loss_logits_tensor_multi_dim(self): + """Tests create_loss with multi-dimensional logits of shape [2, 2, 5].""" + head1 = head_lib.regression_head(label_dimension=2, name='head1') + head2 = head_lib.regression_head(label_dimension=3, name='head2') + multi_head = multi_head_lib.multi_head([head1, head2]) + + logits = np.array( + [[[-1., 1., 2., -2., 2.], [-1., 1., 2., -2., 2.]], + [[-1.5, 1.5, -2., 2., -2.], [-1.5, 1.5, -2., 2., -2.]]], + dtype=np.float32) + labels = { + 'head1': np.array([[[1., 0.], [1., 0.]], + [[1.5, 1.5], [1.5, 1.5]]], dtype=np.float32), + 'head2': np.array([[[0., 1., 0.], [0., 1., 0.]], + [[2., 2., 0.], [2., 2., 0.]]], dtype=np.float32), + } + # Loss for the first head: + # loss1 = (1+1)^2 + (0-1)^2 + (1+1)^2 + (0-1)^2 + + # (1.5+1.5)^2 + (1.5-1.5)^2 + (1.5+1.5)^2 + (1.5-1.5)^2 + # = 28 + # Loss for the second head: + # loss2 = (0-2)^2 + (1+2)^2 + (0-2)^2 + (0-2)^2 + (1+2)^2 + (0-2)^2 + + # (2+2)^2 + (2-2)^2 + (0+2)^2 + (2+2)^2 + (2-2)^2 + (0+2)^2 + # = 74 + expected_weighted_sum_loss = 28. + 74. + + weighted_sum_loss, example_weight_sum, _ = multi_head.create_loss( + features={}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels) + tol = 1e-3 + with self.test_session(): + self.assertAllClose( + expected_weighted_sum_loss, weighted_sum_loss.eval(), + rtol=tol, atol=tol) + self.assertAllClose( + 2. * 2. * 5., example_weight_sum.eval(), rtol=tol, atol=tol) + def test_train_one_head(self): head1 = head_lib.multi_label_head(n_classes=2, name='head1') multi_head = multi_head_lib.multi_head([head1]) @@ -327,6 +511,7 @@ class MultiHeadTest(test.TestCase): six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), train_result) _assert_simple_summaries(self, { + metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS + '/head1': expected_loss, # Average loss over examples. metric_keys.MetricKeys.LOSS_MEAN + '/head1': expected_loss / 2, @@ -387,6 +572,7 @@ class MultiHeadTest(test.TestCase): six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), train_result) _assert_simple_summaries(self, { + metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS + '/head1': expected_loss_head1, metric_keys.MetricKeys.LOSS + '/head2': expected_loss_head2, # Average loss over examples. -- GitLab From fff359e8e7715736bf96ce61d904d87f4c8bf51a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 12:21:40 -0800 Subject: [PATCH 0191/1801] Add padded_batch_and_drop_remainder and factor out shared filter_irregular_batches. PiperOrigin-RevId: 175045241 --- .../kernel_tests/batch_dataset_op_test.py | 225 +++++++++++------- .../contrib/data/python/ops/batching.py | 87 +++++-- 2 files changed, 207 insertions(+), 105 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py index 670f622c3c..951d4bb5f7 100644 --- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py @@ -52,8 +52,9 @@ class BatchDatasetTest(test.TestCase): def _map_fn(x, y, z): return math_ops.square(x), math_ops.square(y), math_ops.square(z) - iterator = (dataset_ops.Dataset.from_tensor_slices(components).map(_map_fn) - .repeat(count).batch(batch_size).make_initializable_iterator()) + iterator = ( + dataset_ops.Dataset.from_tensor_slices(components).map(_map_fn) + .repeat(count).batch(batch_size).make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() @@ -69,7 +70,7 @@ class BatchDatasetTest(test.TestCase): result = sess.run(get_next) for component, result_component in zip(components, result): for j in range(14): - self.assertAllEqual(component[(i*14 + j) % 7]**2, + self.assertAllEqual(component[(i * 14 + j) % 7]**2, result_component[j]) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) @@ -84,12 +85,12 @@ class BatchDatasetTest(test.TestCase): result = sess.run(get_next) for component, result_component in zip(components, result): for j in range(8): - self.assertAllEqual(component[(i*8 + j) % 7]**2, + self.assertAllEqual(component[(i * 8 + j) % 7]**2, result_component[j]) result = sess.run(get_next) for component, result_component in zip(components, result): for j in range((14 * 7) % 8): - self.assertAllEqual(component[((num_batches - 1)*8 + j) % 7]**2, + self.assertAllEqual(component[((num_batches - 1) * 8 + j) % 7]**2, result_component[j]) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) @@ -107,10 +108,10 @@ class BatchDatasetTest(test.TestCase): seq_lens = array_ops.placeholder(dtypes.int32, shape=[None]) padded_shape = array_ops.placeholder(dtypes.int64, shape=[1]) - iterator = (dataset_ops.Dataset.from_tensor_slices(seq_lens) - .map(lambda x: array_ops.fill([x], x)).padded_batch( - 4, - padded_shapes=padded_shape).make_initializable_iterator()) + iterator = ( + dataset_ops.Dataset.from_tensor_slices(seq_lens) + .map(lambda x: array_ops.fill([x], x)).padded_batch( + 4, padded_shapes=padded_shape).make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() @@ -118,35 +119,40 @@ class BatchDatasetTest(test.TestCase): with self.test_session() as sess: # Test with random sequence lengths, and max padding. random_seq_lens = np.random.randint(20, size=(32,)).astype(np.int32) - sess.run(init_op, feed_dict={padded_shape: [-1], - seq_lens: random_seq_lens}) + sess.run( + init_op, feed_dict={ + padded_shape: [-1], + seq_lens: random_seq_lens + }) for i in range(8): result = sess.run(get_next) padded_len = np.max(result) self.assertEqual((4, padded_len), result.shape) for j in range(4): - seq_len = random_seq_lens[(i*4)+j] + seq_len = random_seq_lens[(i * 4) + j] self.assertAllEqual(result[j, :seq_len], [seq_len] * seq_len) self.assertAllEqual(result[j, seq_len:], [0] * (padded_len - seq_len)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) # Test with random sequence lengths, and constant padding. - sess.run(init_op, feed_dict={padded_shape: [25], - seq_lens: random_seq_lens}) + sess.run( + init_op, feed_dict={ + padded_shape: [25], + seq_lens: random_seq_lens + }) for i in range(8): result = sess.run(get_next) self.assertEqual((4, 25), result.shape) for j in range(4): - seq_len = random_seq_lens[(i*4)+j] + seq_len = random_seq_lens[(i * 4) + j] self.assertAllEqual(result[j, :seq_len], [seq_len] * seq_len) self.assertAllEqual(result[j, seq_len:], [0] * (25 - seq_len)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) # Test correct handling of empty tensors. - sess.run(init_op, feed_dict={padded_shape: [-1], - seq_lens: [0, 0, 0, 0]}) + sess.run(init_op, feed_dict={padded_shape: [-1], seq_lens: [0, 0, 0, 0]}) result = sess.run(get_next) self.assertAllEqual([[], [], [], []], result) with self.assertRaises(errors.OutOfRangeError): @@ -154,8 +160,7 @@ class BatchDatasetTest(test.TestCase): # Test error handling with constant sequence lengths, and # too-short padding. - sess.run(init_op, feed_dict={padded_shape: [5], - seq_lens: [6, 5, 5, 5]}) + sess.run(init_op, feed_dict={padded_shape: [5], seq_lens: [6, 5, 5, 5]}) with self.assertRaises(errors.DataLossError): result = sess.run(get_next) @@ -166,11 +171,13 @@ class BatchDatasetTest(test.TestCase): def fill_tuple(x): filled = array_ops.fill([x], x) return (filled, string_ops.as_string(filled)) - iterator = (dataset_ops.Dataset.from_tensor_slices(seq_lens).map(fill_tuple) - .padded_batch( - 4, - padded_shapes=(padded_shape, padded_shape), - padding_values=(-1, "")).make_initializable_iterator()) + + iterator = ( + dataset_ops.Dataset.from_tensor_slices(seq_lens).map(fill_tuple) + .padded_batch( + 4, + padded_shapes=(padded_shape, padded_shape), + padding_values=(-1, "")).make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() @@ -178,15 +185,18 @@ class BatchDatasetTest(test.TestCase): with self.test_session() as sess: # Test with random sequence lengths, and max padding. random_seq_lens = np.random.randint(20, size=(32,)).astype(np.int32) - sess.run(init_op, feed_dict={padded_shape: [-1], - seq_lens: random_seq_lens}) + sess.run( + init_op, feed_dict={ + padded_shape: [-1], + seq_lens: random_seq_lens + }) for i in range(8): result = sess.run(get_next) padded_len = np.max(result[0]) self.assertEqual((4, padded_len), result[0].shape) self.assertEqual((4, padded_len), result[1].shape) for j in range(4): - seq_len = random_seq_lens[(i*4)+j] + seq_len = random_seq_lens[(i * 4) + j] self.assertAllEqual(result[0][j, :seq_len], [seq_len] * seq_len) self.assertAllEqual(result[0][j, seq_len:], [-1] * (padded_len - seq_len)) @@ -220,20 +230,21 @@ class BatchDatasetTest(test.TestCase): constant_op.constant([-1, -1], dtype=dtypes.int64), constant_op.constant([37], dtype=dtypes.int64))) - for dataset in [dynamic_padding_from_tensor_shapes, - dynamic_padding_from_lists, - dynamic_padding_from_lists_with_minus_one, - dynamic_padding_from_tensors]: + for dataset in [ + dynamic_padding_from_tensor_shapes, dynamic_padding_from_lists, + dynamic_padding_from_lists_with_minus_one, dynamic_padding_from_tensors + ]: self.assertEqual([None, None], dataset.output_shapes[0].as_list()) self.assertEqual([None, None, None], dataset.output_shapes[1].as_list()) self.assertEqual([None, 37], dataset.output_shapes[2].as_list()) def testDenseToSparseBatchDataset(self): components = np.random.randint(12, size=(100,)).astype(np.int32) - iterator = (dataset_ops.Dataset.from_tensor_slices(components) - .map(lambda x: array_ops.fill([x], x)).apply( - batching.dense_to_sparse_batch(4, [12])) - .make_initializable_iterator()) + iterator = ( + dataset_ops.Dataset.from_tensor_slices(components) + .map(lambda x: array_ops.fill([x], x)).apply( + batching.dense_to_sparse_batch(4, + [12])).make_initializable_iterator()) init_op = iterator.initializer get_next = sparse_tensor.SparseTensor(*iterator.get_next()) @@ -242,24 +253,26 @@ class BatchDatasetTest(test.TestCase): for start in range(0, len(components), 4): results = sess.run(get_next) + self.assertAllEqual([[i, j] + for i, c in enumerate(components[start:start + 4]) + for j in range(c)], results.indices) self.assertAllEqual( - [[i, j] for i, c in enumerate(components[start:start+4]) - for j in range(c)], results.indices) - self.assertAllEqual( - [c for c in components[start:start+4] for _ in range(c)], + [c for c in components[start:start + 4] for _ in range(c)], results.values) - self.assertAllEqual( - [min(4, len(components) - start), 12], results.dense_shape) + self.assertAllEqual([min(4, + len(components) - start), 12], + results.dense_shape) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) def testDenseToSparseBatchDatasetWithUnknownShape(self): components = np.random.randint(5, size=(40,)).astype(np.int32) - iterator = (dataset_ops.Dataset.from_tensor_slices(components) - .map(lambda x: array_ops.fill([x, x], x)).apply( - batching.dense_to_sparse_batch( - 4, [5, -1])).make_initializable_iterator()) + iterator = ( + dataset_ops.Dataset.from_tensor_slices(components) + .map(lambda x: array_ops.fill([x, x], x)).apply( + batching.dense_to_sparse_batch( + 4, [5, -1])).make_initializable_iterator()) init_op = iterator.initializer get_next = sparse_tensor.SparseTensor(*iterator.get_next()) @@ -268,27 +281,30 @@ class BatchDatasetTest(test.TestCase): for start in range(0, len(components), 4): results = sess.run(get_next) - self.assertAllEqual( - [[i, j, z] for i, c in enumerate(components[start:start+4]) - for j in range(c) for z in range(c)], results.indices) - self.assertAllEqual( - [c for c in components[start:start+4] - for _ in range(c) for _ in range(c)], - results.values) - self.assertAllEqual( - [min(4, len(components) - start), - 5, - np.max(components[start:start+4])], - results.dense_shape) + self.assertAllEqual([[i, j, z] + for i, c in enumerate(components[start:start + 4]) + for j in range(c) + for z in range(c)], results.indices) + self.assertAllEqual([ + c + for c in components[start:start + 4] for _ in range(c) + for _ in range(c) + ], results.values) + self.assertAllEqual([ + min(4, + len(components) - start), 5, + np.max(components[start:start + 4]) + ], results.dense_shape) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) def testDenseToSparseBatchDatasetWithInvalidShape(self): input_tensor = array_ops.constant([[1]]) - iterator = (dataset_ops.Dataset.from_tensors(input_tensor) - .apply(batching.dense_to_sparse_batch(4, [-2])) - .make_initializable_iterator()) + iterator = ( + dataset_ops.Dataset.from_tensors(input_tensor).apply( + batching.dense_to_sparse_batch(4, [-2])) + .make_initializable_iterator()) init_op = iterator.initializer with self.test_session() as sess: @@ -298,8 +314,10 @@ class BatchDatasetTest(test.TestCase): def testDenseToSparseBatchDatasetShapeErrors(self): input_tensor = array_ops.placeholder(dtypes.int32) - iterator = (dataset_ops.Dataset.from_tensors(input_tensor).apply( - batching.dense_to_sparse_batch(4, [12])).make_initializable_iterator()) + iterator = ( + dataset_ops.Dataset.from_tensors(input_tensor).apply( + batching.dense_to_sparse_batch(4, + [12])).make_initializable_iterator()) init_op = iterator.initializer get_next = sparse_tensor.SparseTensor(*iterator.get_next()) @@ -356,8 +374,7 @@ class BatchDatasetTest(test.TestCase): def testUnbatchMultiElementTupleDataset(self): data = tuple([(math_ops.range(10 * i, 10 * i + 10), - array_ops.fill([10], "hi")) - for i in range(3)]) + array_ops.fill([10], "hi")) for i in range(3)]) data = dataset_ops.Dataset.from_tensor_slices(data) expected_types = ((dtypes.int32, dtypes.string),) * 3 data = data.batch(2) @@ -370,9 +387,7 @@ class BatchDatasetTest(test.TestCase): with self.test_session() as sess: for i in range(10): - self.assertEqual(((i, b"hi"), - (10 + i, b"hi"), - (20 + i, b"hi")), + self.assertEqual(((i, b"hi"), (10 + i, b"hi"), (20 + i, b"hi")), sess.run(op)) with self.assertRaises(errors.OutOfRangeError): @@ -385,9 +400,10 @@ class BatchDatasetTest(test.TestCase): batch_size = array_ops.placeholder(dtypes.int64, shape=[]) - iterator = (dataset_ops.Dataset.from_tensor_slices(components).apply( - batching.batch_and_drop_remainder(batch_size)) - .make_initializable_iterator()) + iterator = ( + dataset_ops.Dataset.from_tensor_slices(components).apply( + batching.batch_and_drop_remainder(batch_size)) + .make_initializable_iterator()) next_element = iterator.get_next() @@ -404,14 +420,51 @@ class BatchDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) + def testPaddedBatchAndDropRemainder(self): + els = [] + for length in [3, 6, 9, 4, 12, 10, 2]: + els.append((np.array(length), np.arange(length) + 1, + np.array(length * 2))) + + dataset = dataset_ops.Dataset.from_tensors(els[0]) + for el in els[1:]: + dataset = dataset.concatenate(dataset_ops.Dataset.from_tensors(el)) + + batch_size = array_ops.placeholder(dtypes.int64, shape=[]) + iterator = ( + dataset.apply( + batching.padded_batch_and_drop_remainder( + batch_size, ([], [None], []))).make_initializable_iterator()) + + next_element = iterator.get_next() + + with self.test_session() as sess: + for test_batch_size in [1, 3, 7, 10]: + sess.run(iterator.initializer, feed_dict={batch_size: test_batch_size}) + num_batches = 7 // test_batch_size + for i in range(num_batches): + result = sess.run(next_element) + for component_idx, result_component in enumerate(result): + for j in range(test_batch_size): + data_idx = i * test_batch_size + j + comp = result_component[j] + unpadded = comp[comp > 0] + if np.isscalar(comp): + # The boolean mask indexing above adds a dim back. Rm it. + unpadded = unpadded[0] + self.assertAllEqual(els[data_idx][component_idx], unpadded) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + def testBatchAndDropRemainderShapeInference(self): - components = (array_ops.placeholder(dtypes.int32), (array_ops.placeholder( - dtypes.int32, shape=[None]), array_ops.placeholder( - dtypes.int32, shape=[20, 30]))) + components = (array_ops.placeholder(dtypes.int32), + (array_ops.placeholder(dtypes.int32, shape=[None]), + array_ops.placeholder(dtypes.int32, shape=[20, 30]))) # Test with a statically known batch size. - dataset = (dataset_ops.Dataset.from_tensor_slices(components).apply( - batching.batch_and_drop_remainder(128))) + dataset = ( + dataset_ops.Dataset.from_tensor_slices(components).apply( + batching.batch_and_drop_remainder(128))) self.assertIs(None, dataset.output_shapes[0].ndims) self.assertEqual([128], dataset.output_shapes[1][0].as_list()) @@ -420,8 +473,9 @@ class BatchDatasetTest(test.TestCase): # Test with a dynamic batch size: the static shape will be unknown, because # `batch_size` is a placeholder. batch_size = array_ops.placeholder(dtypes.int64) - dataset = (dataset_ops.Dataset.from_tensor_slices(components).apply( - batching.batch_and_drop_remainder(batch_size))) + dataset = ( + dataset_ops.Dataset.from_tensor_slices(components).apply( + batching.batch_and_drop_remainder(batch_size))) self.assertIs(None, dataset.output_shapes[0].ndims) self.assertEqual([None], dataset.output_shapes[1][0].as_list()) @@ -441,9 +495,10 @@ class BatchDatasetTest(test.TestCase): def _map_fn(x, y, z): return math_ops.square(x), math_ops.square(y), math_ops.square(z) - iterator = (dataset_ops.Dataset.from_tensor_slices(components).repeat(count) - .apply(batching.map_and_batch(_map_fn, batch_size)) - .make_initializable_iterator()) + iterator = ( + dataset_ops.Dataset.from_tensor_slices(components).repeat(count).apply( + batching.map_and_batch(_map_fn, batch_size)) + .make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() @@ -459,7 +514,7 @@ class BatchDatasetTest(test.TestCase): result = sess.run(get_next) for component, result_component in zip(components, result): for j in range(14): - self.assertAllEqual(component[(i*14 + j) % 7]**2, + self.assertAllEqual(component[(i * 14 + j) % 7]**2, result_component[j]) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) @@ -474,7 +529,7 @@ class BatchDatasetTest(test.TestCase): result = sess.run(get_next) for component, result_component in zip(components, result): for j in range(8): - self.assertAllEqual(component[(i*8 + j) % 7]**2, + self.assertAllEqual(component[(i * 8 + j) % 7]**2, result_component[j]) # The last batch should fail with `OutOfRange`. with self.assertRaises(errors.OutOfRangeError): @@ -495,8 +550,9 @@ class BatchDatasetTest(test.TestCase): array_ops.check_numerics( constant_op.constant(1.0) / constant_op.constant(0.0), "oops")) batch_size = array_ops.placeholder(dtypes.int64, shape=[]) - iterator = (dataset.apply(batching.map_and_batch(lambda x: x, batch_size)) - .make_initializable_iterator()) + iterator = ( + dataset.apply(batching.map_and_batch(lambda x: x, batch_size)) + .make_initializable_iterator()) init_op = iterator.initializer with self.test_session() as sess: with self.assertRaisesRegexp(errors.InvalidArgumentError, "oops"): @@ -504,6 +560,7 @@ class BatchDatasetTest(test.TestCase): def testBatchAndMapDatasetShapeMismatch(self): """Test a dataset that maps a TF function across its input elements.""" + def generator(): yield [1] yield [2] diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py index abc9212a87..d4ade7adfd 100644 --- a/tensorflow/contrib/data/python/ops/batching.py +++ b/tensorflow/contrib/data/python/ops/batching.py @@ -103,6 +103,42 @@ def unbatch(): return _apply_fn +def filter_irregular_batches(batch_size): + """Transformation that filters out batches that are not of size batch_size.""" + + def _apply_fn(dataset): + """Function from `Dataset` to `Dataset` that applies the transformation.""" + tensor_batch_size = ops.convert_to_tensor( + batch_size, dtype=dtypes.int64, name="batch_size") + + flattened = _RestructuredDataset(dataset, + tuple(nest.flatten(dataset.output_types))) + + def _predicate(*xs): + """Return `True` if this element is a full batch.""" + # Extract the dynamic batch size from the first component of the flattened + # batched element. + first_component = xs[0] + first_component_batch_size = array_ops.shape( + first_component, out_type=dtypes.int64)[0] + + return math_ops.equal(first_component_batch_size, tensor_batch_size) + + filtered = flattened.filter(_predicate) + + maybe_constant_batch_size = tensor_util.constant_value(tensor_batch_size) + + def _set_first_dimension(shape): + return shape.merge_with( + tensor_shape.vector(maybe_constant_batch_size).concatenate(shape[1:])) + + known_shapes = nest.map_structure(_set_first_dimension, + dataset.output_shapes) + return _RestructuredDataset(filtered, dataset.output_types, known_shapes) + + return _apply_fn + + def batch_and_drop_remainder(batch_size): """A batching transformation that omits the final small batch (if present). @@ -135,34 +171,43 @@ def batch_and_drop_remainder(batch_size): def _apply_fn(dataset): """Function from `Dataset` to `Dataset` that applies the transformation.""" - tensor_batch_size = ops.convert_to_tensor( - batch_size, dtype=dtypes.int64, name="batch_size") + batched = dataset.batch(batch_size) + return filter_irregular_batches(batch_size)(batched) - batched = dataset.batch(tensor_batch_size) - flattened = _RestructuredDataset(batched, - tuple(nest.flatten(batched.output_types))) + return _apply_fn - def _predicate(*xs): - """Return `True` if this element is a full batch.""" - # Extract the dynamic batch size from the first component of the flattened - # batched element. - first_component = xs[0] - first_component_batch_size = array_ops.shape( - first_component, out_type=dtypes.int64)[0] - return math_ops.equal(first_component_batch_size, tensor_batch_size) +def padded_batch_and_drop_remainder(batch_size, + padded_shapes, + padding_values=None): + """A batching and padding transformation that omits the final small batch. - filtered = flattened.filter(_predicate) + Like @{tf.data.Dataset.padded_batch}, this transformation combines + consecutive elements of this dataset into batches. However, if the batch + size does not evenly divide the input dataset size, this transformation will + drop the final smaller element. - maybe_constant_batch_size = tensor_util.constant_value(tensor_batch_size) + See `@{tf.contrib.data.batch_and_drop_remainder}` for more details. - def _set_first_dimension(shape): - return shape.merge_with( - tensor_shape.vector(maybe_constant_batch_size).concatenate(shape[1:])) + Args: + batch_size: A `tf.int64` scalar `tf.Tensor`, representing the number of + consecutive elements of this dataset to combine in a single batch. + padded_shapes: A nested structure of `tf.TensorShape` or + `tf.int64` vector tensor-like objects. See + @{tf.data.Dataset.padded_batch} for details. + padding_values: (Optional.) A nested structure of scalar-shaped + `tf.Tensor`. See @{tf.data.Dataset.padded_batch} for details. - known_shapes = nest.map_structure(_set_first_dimension, - batched.output_shapes) - return _RestructuredDataset(filtered, batched.output_types, known_shapes) + Returns: + A `Dataset` transformation function, which can be passed to + @{tf.data.Dataset.apply} + """ + + def _apply_fn(dataset): + """Function from `Dataset` to `Dataset` that applies the transformation.""" + batched = dataset.padded_batch( + batch_size, padded_shapes=padded_shapes, padding_values=padding_values) + return filter_irregular_batches(batch_size)(batched) return _apply_fn -- GitLab From 9ac4d0b7db502dbb7acf8d7d752ea37fbd13182a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 12:56:45 -0800 Subject: [PATCH 0192/1801] Support replacing tpu_config. PiperOrigin-RevId: 175049981 --- tensorflow/contrib/tpu/python/tpu/tpu_config.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config.py b/tensorflow/contrib/tpu/python/tpu/tpu_config.py index 3965c087a1..097acd5ee7 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_config.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_config.py @@ -109,3 +109,12 @@ class RunConfig(run_config_lib.RunConfig): @property def tpu_config(self): return self._tpu_config + + def replace(self, **kwargs): + if 'tpu_config' not in kwargs: + return super(RunConfig, self).replace(**kwargs) + + tpu_config = kwargs.pop('tpu_config') + new_instance = super(RunConfig, self).replace(**kwargs) + new_instance._tpu_config = tpu_config # pylint: disable=protected-access + return new_instance -- GitLab From 6c95675492aa8d25619f5e4ce1674582c051a7fe Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Wed, 8 Nov 2017 13:00:12 -0800 Subject: [PATCH 0193/1801] Allow Operation._get_attr() to work with all attr types with C API enabled This is achieved by accessing the AttrValue directly and using the existing Python code instead of dispatching to the specific C API attr getter for every type. I started going down the dispatch path, but it turns out to be a lot of code (spread across Python, C, and SWIG), and this is likely good enough from a performance standpoint. We can optimize in the future if necessary. In addition, changes the colocation group logic to use _set_attr() and get_attr(), and makes _set_attr() work with the C API disabled. This allows the colocation tests to pass with both the C API enabled and disabled. Without these additional changes, the "_class" attribute would be set on the C NodeDef, and then it would try to retrieve it from the Python NodeDef. PiperOrigin-RevId: 175050473 --- tensorflow/c/c_api.cc | 4 +- tensorflow/c/c_api_test.cc | 4 +- tensorflow/python/client/tf_session.i | 10 --- tensorflow/python/framework/ops.py | 69 +++++++++++--------- tensorflow/python/framework/ops_test.py | 87 +++++++++++++------------ tensorflow/python/framework/test_ops.cc | 23 +++++++ 6 files changed, 109 insertions(+), 88 deletions(-) diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index 6dd1b99910..dd638de3c6 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -890,8 +890,8 @@ const tensorflow::AttrValue* GetAttrValue(TF_Operation* oper, TF_Status* status) { const tensorflow::AttrValue* attr = oper->node.attrs().Find(attr_name); if (attr == nullptr) { - status->status = - InvalidArgument("Operation has no attr named '", attr_name, "'."); + status->status = InvalidArgument("Operation '", oper->node.name(), + "' has no attr named '", attr_name, "'."); } return attr; } diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc index 05881e619b..e0057eb51c 100644 --- a/tensorflow/c/c_api_test.cc +++ b/tensorflow/c/c_api_test.cc @@ -383,7 +383,7 @@ TEST(CAPI, Graph) { EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s)); ASSERT_FALSE(GetAttrValue(feed, "missing", &attr_value, s)); - EXPECT_EQ(string("Operation has no attr named 'missing'."), + EXPECT_EQ(string("Operation 'feed' has no attr named 'missing'."), string(TF_Message(s))); // Make a constant oper with the scalar "3". @@ -1054,7 +1054,7 @@ class CApiColocationTest : public ::testing::Test { TF_OperationGetAttrMetadata(op, tensorflow::kColocationAttrName, s_); if (expected.empty()) { ASSERT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)) << TF_Message(s_); - EXPECT_EQ(std::string("Operation has no attr named '_class'."), + EXPECT_EQ(std::string("Operation 'add' has no attr named '_class'."), std::string(TF_Message(s_))); return; } diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i index f45bc13602..40731aba7d 100644 --- a/tensorflow/python/client/tf_session.i +++ b/tensorflow/python/client/tf_session.i @@ -344,16 +344,6 @@ bool PyTensorListToVector(PyObject* py_tensor_list, %rename("_TF_SetConfig") TF_SetConfig; %rename("_TF_NewSessionOptions") TF_NewSessionOptions; -// Create temporary int64_t to pass to TF_OperationGetAttrInt -%typemap(in, numinputs=0) int64_t* value (int64_t val) { - $1 = &val; -} - -// Convert value to Python int -%typemap(argout) int64_t* value { - $result = PyInt_FromLong(*$1); -} - %include "tensorflow/c/c_api.h" %include "tensorflow/c/python_api.h" diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index b256af2182..ad2e2993c1 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -1641,13 +1641,15 @@ class Operation(object): default_colocation_group = [ compat.as_bytes("loc:@%s" % self._node_def.name) ] - if "_class" not in self._node_def.attr: + try: + class_attr = self.get_attr("_class") + except ValueError: # This op has no explicit colocation group, so it is itself its # own root of a colocation group. return default_colocation_group attr_groups = [ - class_name for class_name in self.get_attr("_class") + class_name for class_name in class_attr if class_name.startswith(b"loc:@") ] @@ -2062,16 +2064,19 @@ class Operation(object): def _set_attr(self, attr_name, attr_value): """Private method used to set an attribute in the node_def.""" - if not _USE_C_API: - assert "_set_attr not supported with _USE_C_API == False" - return - buf = c_api.TF_NewBufferFromString( - compat.as_bytes(attr_value.SerializeToString())) - try: - with errors.raise_exception_on_not_ok_status() as status: - c_api.SetAttr(self._graph._c_graph, self._c_op, attr_name, buf, status) # pylint: disable=protected-access - finally: - c_api.TF_DeleteBuffer(buf) + if _USE_C_API: + buf = c_api.TF_NewBufferFromString( + compat.as_bytes(attr_value.SerializeToString())) + try: + with errors.raise_exception_on_not_ok_status() as status: + # pylint: disable=protected-access + c_api.SetAttr(self._graph._c_graph, self._c_op, attr_name, buf, + status) + # pylint: enable=protected-access + finally: + c_api.TF_DeleteBuffer(buf) + else: + self._node_def.attr[attr_name].CopyFrom(attr_value) def get_attr(self, name): """Returns the value of the attr of this op with the given `name`. @@ -2085,25 +2090,24 @@ class Operation(object): Raises: ValueError: If this op does not have an attr with the given `name`. """ - if _USE_C_API: + fields = ["s", "i", "f", "b", "type", "shape", "tensor", "func"] + if self._c_op: try: - # TODO(b/65162920): remove this try/except block when all attrs are - # implemented to use the _set_attr method instead of node_def.attr. - with errors.raise_exception_on_not_ok_status() as status: - metadata = c_api.TF_OperationGetAttrMetadata(self._c_op, name, status) - with errors.raise_exception_on_not_ok_status() as status: - if metadata.type == c_api.TF_ATTR_INT and metadata.is_list == 0: - return c_api.TF_OperationGetAttrInt(self._c_op, name, status) - except errors.InvalidArgumentError: - # Colocation ops are failing to find attrs begininning with "_*". They - # should fall through to the not-CAPI logic until the attribute is set - # via the C-API always. - pass + with c_api_util.tf_buffer() as buf: + with errors.raise_exception_on_not_ok_status() as status: + c_api.TF_OperationGetAttrValueProto(self._c_op, name, buf, status) + data = c_api.TF_GetBuffer(buf) + except errors.InvalidArgumentError as e: + # Convert to ValueError for backwards compatibility. + raise ValueError(str(e)) + x = attr_value_pb2.AttrValue() + x.ParseFromString(data) + else: + if name not in self._node_def.attr: + raise ValueError( + "No attr named '" + name + "' in " + str(self._node_def)) + x = self._node_def.attr[name] - fields = ["s", "i", "f", "b", "type", "shape", "tensor", "func"] - if name not in self._node_def.attr: - raise ValueError("No attr named '" + name + "' in " + str(self._node_def)) - x = self._node_def.attr[name] # Treat an empty oneof value as an empty list. if not x.WhichOneof("value"): return [] @@ -3103,9 +3107,10 @@ class Graph(object): ret._set_device(colocation_op.device) # pylint: disable=protected-access all_colocation_groups = sorted(set(all_colocation_groups)) - ret.node_def.attr["_class"].CopyFrom( - attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue( - s=all_colocation_groups))) + # pylint: disable=protected-access + ret._set_attr("_class", attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue(s=all_colocation_groups))) + # pylint: enable=protected-access # Sets "container" attribute if # (1) self._container is not None diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 3087d6060b..4e931e00c5 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -31,9 +31,11 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import device as pydev from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import function from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import tensor_util from tensorflow.python.framework import test_ops from tensorflow.python.framework import test_util from tensorflow.python.framework import versions @@ -357,54 +359,55 @@ class OperationTest(test_util.TensorFlowTestCase): self.assertEqual("", repr(op)) def testGetAttr(self): - # TODO(b/65162920): implement all tests for get_attr with C API + op = test_ops.default_attrs() + self.assertEqual(op.get_attr("string_val"), b"abc") + self.assertEqual(op.get_attr("string_list_val"), [b"abc", b""]) + self.assertEqual(op.get_attr("int_val"), 123) + self.assertEqual(op.get_attr("int_list_val"), [1, 2, 3]) + self.assertEqual(op.get_attr("float_val"), 10.0) + self.assertEqual(op.get_attr("float_list_val"), [10.0]) + self.assertEqual(op.get_attr("bool_val"), True) + self.assertEqual(op.get_attr("bool_list_val"), [True, False]) + self.assertEqual(op.get_attr("shape_val"), + tensor_shape.as_shape([2, 1]).as_proto()) + self.assertEqual(op.get_attr("shape_list_val"), + [tensor_shape.as_shape([]).as_proto(), + tensor_shape.as_shape([1]).as_proto()]) + self.assertEqual(op.get_attr("tensor_val"), + tensor_util.make_tensor_proto(1, dtypes.int32)) + self.assertEqual(op.get_attr("tensor_list_val"), + [tensor_util.make_tensor_proto(1, dtypes.int32)]) + + type_val = op.get_attr("type_val") + # First check that type_val is a DType, because the assertEquals will work + # no matter what since DType overrides __eq__ + self.assertIsInstance(type_val, dtypes.DType) + self.assertEqual(type_val, dtypes.int32) + + type_list_val = op.get_attr("type_list_val") + self.assertTrue(all(isinstance(x, dtypes.DType) for x in type_list_val)) + self.assertEqual(type_list_val, [dtypes.int32, dtypes.float32]) + + @function.Defun(dtypes.float32, func_name="MyFunc") + def func(x): + return x + + op = test_ops.func_attr(func) + self.assertEqual(op.get_attr("f"), + attr_value_pb2.NameAttrList(name="MyFunc")) + + # Try fetching missing attr if ops._USE_C_API: - op = test_ops.int_attr().op - self.assertEqual(op.get_attr("foo"), 1) - - op_str = test_ops.string_list_attr(a=["z"], b="y") - self.assertEqual(op_str.get_attr("a"), [b"z"]) - self.assertEqual(op_str.get_attr("b"), b"y") - + error_msg = "Operation 'FuncAttr' has no attr named 'FakeAttr'." else: - list_value = attr_value_pb2.AttrValue.ListValue() - - list_value.type.append(types_pb2.DT_STRING) - list_value.type.append(types_pb2.DT_DOUBLE) - op = ops.Operation( - ops._NodeDef( - "None", - "op1", - attrs={ - "value": - attr_value_pb2.AttrValue(i=32), - "dtype": - attr_value_pb2.AttrValue(type=types_pb2.DT_INT32), - "list": - attr_value_pb2.AttrValue(list=list_value), - "func": - attr_value_pb2.AttrValue( - func=attr_value_pb2.NameAttrList()) - }), ops.Graph(), [], [dtypes.int32]) - self.assertEqual(32, op.get_attr("value")) - self.assertEqual("", op.get_attr("func").name) - - d = op.get_attr("dtype") - # First check that d is a DType, because the assertEquals will - # work no matter what since DType overrides __eq__ - self.assertIsInstance(d, dtypes.DType) - self.assertEqual(dtypes.int32, d) - - l = op.get_attr("list") - for x in l: - self.assertIsInstance(x, dtypes.DType) - self.assertEqual([dtypes.string, dtypes.double], l) + error_msg = "No attr named 'FakeAttr' in name: \"FuncAttr\"" + + with self.assertRaisesRegexp(ValueError, error_msg): + op.get_attr("FakeAttr") # TODO(b/65162920): remove this test when users who are directly mutating the # node_def have been updated to proper usage. def testSetAttr(self): - if not ops._USE_C_API: - return op = test_ops.int_attr().op op._set_attr("foo", attr_value_pb2.AttrValue(i=2)) # TODO(skyewm): add node_def check diff --git a/tensorflow/python/framework/test_ops.cc b/tensorflow/python/framework/test_ops.cc index a8b7fc543f..35e0167b26 100644 --- a/tensorflow/python/framework/test_ops.cc +++ b/tensorflow/python/framework/test_ops.cc @@ -341,4 +341,27 @@ REGISTER_OP("StringListAttr") .Attr("b: string") .SetShapeFn(shape_inference::UnknownShape); +REGISTER_OP("DefaultAttrs") + .Attr("string_val: string = 'abc'") + .Attr("string_list_val: list(string) = ['abc', '']") + .Attr("int_val: int = 123") + .Attr("int_list_val: list(int) = [1, 2, 3]") + .Attr("float_val: float = 10.0") + .Attr("float_list_val: list(float) = [10.0]") + .Attr("bool_val: bool = true") + .Attr("bool_list_val: list(bool) = [true, false]") + .Attr("type_val: type = DT_INT32") + .Attr("type_list_val: list(type) = [DT_INT32, DT_FLOAT]") + .Attr("shape_val: shape = { dim { size: 2 } dim { size: 1 } }") + .Attr("shape_list_val: list(shape) = [{}, { dim { size: 1} }]") + .Attr("tensor_val: tensor = { dtype: DT_INT32 tensor_shape: {} int_val: 1}") + .Attr( + "tensor_list_val: list(tensor) = " + "[{ dtype: DT_INT32 tensor_shape: {} int_val: 1}]") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("FuncAttr") + .Attr("f: func") + .SetShapeFn(shape_inference::UnknownShape); + } // end namespace tensorflow -- GitLab From 2d682f84c186f7aca37d8e6e1b08c731dcc978e9 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Wed, 8 Nov 2017 13:19:27 -0800 Subject: [PATCH 0194/1801] Give a better error message when placeholders are used with eager PiperOrigin-RevId: 175053592 --- tensorflow/python/ops/array_ops.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 6b4919b16f..61bd41e7de 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1653,6 +1653,8 @@ def placeholder(dtype, shape=None, name=None): print(sess.run(y, feed_dict={x: rand_array})) # Will succeed. ``` + @compatibility{eager} Placeholders are not compatible with eager execution. + Args: dtype: The type of elements in the tensor to be fed. shape: The shape of the tensor to be fed (optional). If the shape is not @@ -1662,7 +1664,14 @@ def placeholder(dtype, shape=None, name=None): Returns: A `Tensor` that may be used as a handle for feeding a value, but not evaluated directly. + + Raises: + RuntimeError: if eager execution is enabled """ + if context.in_eager_mode(): + raise RuntimeError("tf.placeholder() is not compatible with " + "eager execution.") + return gen_array_ops._placeholder(dtype=dtype, shape=shape, name=name) @@ -1706,6 +1715,8 @@ def sparse_placeholder(dtype, shape=None, name=None): print(sess.run(y, feed_dict={x: sp_value})) # Will succeed. ``` + @compatibility{eager} Placeholders are not compatible with eager execution. + Args: dtype: The type of `values` elements in the tensor to be fed. shape: The shape of the tensor to be fed (optional). If the shape is not @@ -1715,7 +1726,14 @@ def sparse_placeholder(dtype, shape=None, name=None): Returns: A `SparseTensor` that may be used as a handle for feeding a value, but not evaluated directly. + + Raises: + RuntimeError: if eager execution is enabled """ + if context.in_eager_mode(): + raise RuntimeError("tf.placeholder() is not compatible with " + "eager execution.") + shape_name = (name + "/shape") if name is not None else None shape, rank = _normalize_sparse_shape(shape, shape_name) if shape is None: -- GitLab From fd52578963fdc3474be30c38fa9027c1c407301b Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 8 Nov 2017 13:34:58 -0800 Subject: [PATCH 0195/1801] Optimize gradient subgraphs by taking advantage of symbolic shapes whenever possible. PiperOrigin-RevId: 175055770 --- tensorflow/core/grappler/optimizers/BUILD | 1 + .../grappler/optimizers/constant_folding.cc | 100 ++++++++++++++++-- .../grappler/optimizers/constant_folding.h | 6 +- .../optimizers/constant_folding_test.cc | 53 ++++++++++ .../grappler/optimizers/meta_optimizer.cc | 4 +- 5 files changed, 153 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 669d02815c..54004a5e07 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -112,6 +112,7 @@ tf_cc_test( deps = [ ":constant_folding", "//tensorflow/cc:cc_ops", + "//tensorflow/cc:cc_ops_internal", "//tensorflow/core:all_kernels", "//tensorflow/core:core_cpu", "//tensorflow/core:core_cpu_internal", diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index cb02314183..a364ca487e 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -95,11 +95,15 @@ class DeviceSimple : public DeviceBase { }; } // namespace -ConstantFolding::ConstantFolding(DeviceBase* cpu_device) - : cpu_device_(cpu_device) { +ConstantFolding::ConstantFolding(RewriterConfig::Toggle opt_level, + DeviceBase* cpu_device) + : opt_level_(opt_level), cpu_device_(cpu_device) { resource_mgr_.reset(new ResourceMgr()); } +ConstantFolding::ConstantFolding(DeviceBase* cpu_device) + : ConstantFolding(RewriterConfig::ON, cpu_device) {} + // static string ConstantFolding::AddControlDependency(const string& input_name, GraphDef* graph, @@ -281,6 +285,84 @@ Status ConstantFolding::MaterializeShapes(const GrapplerItem& item, return Status::OK(); } +bool ShapesEqual(const TensorShapeProto& shape1, + const TensorShapeProto& shape2) { + if (shape1.unknown_rank() || shape2.unknown_rank()) { + return false; + } + if (shape1.dim_size() != shape2.dim_size()) { + return false; + } + for (int i = 0; i < shape1.dim_size(); ++i) { + if (shape1.dim(i).size() != shape2.dim(i).size()) { + return false; + } + } + return true; +} + +Status ConstantFolding::MaterializeConstants( + const GrapplerItem& item, const GraphProperties& properties) { + const int node_count = graph_.node_size(); + for (int i = 0; i < node_count; ++i) { + NodeDef& node = *graph_.mutable_node(i); + const string& op = node.op(); + if (op != "BroadcastGradientArgs") { + continue; + } + const NodeDef* shape_node1 = node_map_->GetNode(node.input(0)); + const NodeDef* shape_node2 = node_map_->GetNode(node.input(1)); + if (shape_node1 == nullptr || shape_node1->op() != "Shape" || + shape_node2 == nullptr || shape_node2->op() != "Shape") { + continue; + } + const std::vector& prop1 = + properties.GetInputProperties(shape_node1->name()); + const std::vector& prop2 = + properties.GetInputProperties(shape_node2->name()); + if (prop1.size() != 1 || prop2.size() != 1) { + continue; + } + const TensorShapeProto& shape1 = prop1[0].shape(); + const TensorShapeProto& shape2 = prop2[0].shape(); + if (ShapesEqual(shape1, shape2)) { + DataType type = node.attr().at("T").type(); + Tensor empty(type, TensorShape()); + NodeDef* out[2]; + for (int i = 0; i < 2; ++i) { + string const_name = AddPrefixToNodeName( + strings::StrCat(node.name(), "-", i), kConstantFoldingConst); + out[i] = node_map_->GetNode(const_name); + if (!out[i]) { + out[i] = graph_.add_node(); + *out[i] = CreateNodeDef(const_name, TensorValue(&empty)); + out[i]->set_device(node.device()); + node_map_->AddNode(const_name, out[i]); + string ctrl_dep = + AddControlDependency(node.name(), &graph_, node_map_.get()); + *out[i]->add_input() = ctrl_dep; + node_map_->AddOutput(NodeName(ctrl_dep), const_name); + } + } + + auto outputs = node_map_->GetOutputs(node.name()); + for (const auto& output : outputs) { + for (int k = 0; k < output->input_size(); ++k) { + int port; + string node_name = ParseNodeName(output->input(k), &port); + if (node_name == node.name() && port >= 0 && port < 2) { + *output->mutable_input(k) = out[port]->name(); + node_map_->UpdateInput(output->name(), node_name, + out[port]->name()); + } + } + } + } + } + + return Status::OK(); +} + bool ConstantFolding::IsFoldable(const NodeDef& node) const { // Folding not applicable to ops with no inputs. if (node.input().empty()) { @@ -921,23 +1003,25 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, } GraphProperties properties(item); + Status s = properties.InferStatically(); bool has_feed = !item.feed.empty(); - if (!has_feed) { + // bool has_feed = false; + if (!has_feed && s.ok()) { // Only use static shape information when there is no feed in the // graph. That's because it's possible to feed a placeholder with a tensor // of any shape, which could make the static information inconsistent with // the shapes actually fed. - Status s = properties.InferStatically(); - if (!s.ok()) { - VLOG(1) << "Failed to infer graph shapes: " << s; - } else { + if (s.ok()) { TF_RETURN_IF_ERROR(MaterializeShapes(item, properties)); } } + if (opt_level_ == RewriterConfig::AGGRESSIVE && s.ok()) { + TF_RETURN_IF_ERROR(MaterializeConstants(item, properties)); + } TF_RETURN_IF_ERROR(FoldGraph(output)); - if (!has_feed) { + if (!has_feed && s.ok()) { TF_RETURN_IF_ERROR(SimplifyGraph(output, properties)); } return Status::OK(); diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 30d778789a..dd988f336c 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/optimizers/graph_optimizer.h" #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/protobuf/rewriter_config.pb.h" namespace tensorflow { namespace grappler { @@ -37,6 +38,7 @@ class ConstantFolding : public GraphOptimizer { NodeMap* node_map); ConstantFolding(DeviceBase* cpu_device); + ConstantFolding(RewriterConfig::Toggle opt_level, DeviceBase* cpu_device); ~ConstantFolding() override {} @@ -51,7 +53,8 @@ class ConstantFolding : public GraphOptimizer { private: Status MaterializeShapes(const GrapplerItem& item, const GraphProperties& properties); - + Status MaterializeConstants(const GrapplerItem& item, + const GraphProperties& properties); bool IsFoldable(const NodeDef& node) const; Status EvaluateNode(const NodeDef& node, @@ -74,6 +77,7 @@ class ConstantFolding : public GraphOptimizer { GraphDef* output); // Points to an externally provided device or to owned_device_; + RewriterConfig::Toggle opt_level_; DeviceBase* cpu_device_; std::unique_ptr owned_device_; diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index a1dee6d2fb..17f9854b59 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/grappler/optimizers/constant_folding.h" +#include "tensorflow/cc/ops/array_ops_internal.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/tensor_testutil.h" @@ -838,6 +839,58 @@ TEST_F(ConstantFoldingTest, Packing) { // size needed to naively encode 1000 floats folded twice). EXPECT_GT(8000, output.ByteSizeLong()); } + +TEST_F(ConstantFoldingTest, ConstantMaterialization) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output a = + ops::Placeholder(s.WithOpName("a"), DT_FLOAT, + ops::Placeholder::Shape(PartialTensorShape({-1, -1}))); + Output b = ops::Square(s.WithOpName("b"), a); + Output c = ops::Mul(s.WithOpName("c"), a, b); + Output d = ops::Shape(s.WithOpName("d"), a); + Output e = ops::Shape(s.WithOpName("e"), b); + auto f = ops::internal::BroadcastGradientArgs(s.WithOpName("f"), d, e); + Output o1 = ops::Identity(s.WithOpName("o1"), f.r0); + Output o2 = ops::Identity(s.WithOpName("o2"), f.r1); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + ConstantFolding fold(RewriterConfig::AGGRESSIVE, nullptr /* cpu_device */); + GraphDef output; + Status status = fold.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + // Run a second time to make sure the optimization is idempotent. + item.graph.Swap(&output); + status = fold.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + int found = 0; + for (const auto& node : output.node()) { + if (node.name() == "o1") { + ++found; + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("ConstantFolding/f-0", node.input(0)); + } else if (node.name() == "o2") { + ++found; + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("ConstantFolding/f-1", node.input(0)); + } else if (node.name() == "ConstantFolding/f-0") { + ++found; + EXPECT_EQ("Const", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("^f", node.input(0)); + } else if (node.name() == "ConstantFolding/f-1") { + ++found; + EXPECT_EQ("Const", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("^f", node.input(0)); + } + } + EXPECT_EQ(4, found); +} + } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index a9875c06d8..6204a81f80 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -64,8 +64,8 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back(std::unique_ptr(new ModelPruner())); } if (cfg_.constant_folding() != RewriterConfig::OFF) { - optimizers.push_back( - std::unique_ptr(new ConstantFolding(cpu_device_))); + optimizers.push_back(std::unique_ptr( + new ConstantFolding(cfg_.constant_folding(), cpu_device_))); } if (cfg_.arithmetic_optimization() != RewriterConfig::OFF) { optimizers.push_back(std::unique_ptr( -- GitLab From 2545c4e93b7c1ee21ddb3666580ff4922630d974 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 8 Nov 2017 13:44:26 -0800 Subject: [PATCH 0196/1801] Moves imperative_grad to C Neutral-to-positive on all benchmarks. Also reduces overhead of should_record. PiperOrigin-RevId: 175057104 --- tensorflow/c/eager/BUILD | 1 + tensorflow/c/eager/tape.cc | 312 +++++++++++++++++++- tensorflow/c/eager/tape.h | 58 +++- tensorflow/python/eager/BUILD | 7 +- tensorflow/python/eager/backprop.py | 14 +- tensorflow/python/eager/backprop_test.py | 57 +--- tensorflow/python/eager/imperative_grad.py | 194 +------------ tensorflow/python/eager/pywrap_tensor.cc | 8 +- tensorflow/python/eager/pywrap_tensor.h | 25 ++ tensorflow/python/eager/pywrap_tfe.h | 13 +- tensorflow/python/eager/pywrap_tfe_src.cc | 313 +++++++++++++++++---- tensorflow/python/eager/tape.py | 12 +- tensorflow/python/eager/tape_test.py | 20 -- tensorflow/python/pywrap_tfe.i | 4 +- 14 files changed, 702 insertions(+), 336 deletions(-) create mode 100644 tensorflow/python/eager/pywrap_tensor.h diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index c77896b80b..74e94be8d6 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -39,6 +39,7 @@ tf_cuda_library( tf_cuda_library( name = "c_api_internal", hdrs = ["c_api_internal.h"], + visibility = ["//tensorflow:internal"], deps = [ ":c_api", ":runtime", diff --git a/tensorflow/c/eager/tape.cc b/tensorflow/c/eager/tape.cc index 464612a81e..459499bb69 100644 --- a/tensorflow/c/eager/tape.cc +++ b/tensorflow/c/eager/tape.cc @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include + #include "tensorflow/c/eager/tape.h" namespace tensorflow { @@ -94,8 +96,314 @@ void GradientTape::DeleteTrace(int64 tensor_id) { op_tape_.erase(op_it); } -std::pair GradientTape::Export() { - return {std::move(tensor_tape_), std::move(op_tape_)}; +// Terminology: +// +// - op: a possibly composite operation, which has an entry in the tape +// - target: dy in dx/dy +// - source: dx in dx/dy +// - tensor: one of the many inputs or outputs of an operation +// +// Below here we do the gradient algorithm. It works as follows: +// +// First we filter the tape to just the subset of operations we want to +// differentiate. In the process of doing so we count how many times each Tensor +// is used as an input to an op (so we know when we're done computing gradients +// for that Tensor). We also count, for each tape entry, how many of its output +// Tensors need gradients to be computed (Tensors which are not used do not need +// any gradients to be computed). +// +// Finally, we start a backprop stack with a set of tape entries for which we +// have all gradients available. This set usually is a subset of the set of +// targets (not all since targets which have outputs in the tape will not have +// gradients available initially). +// +// Then we repeatedly pop an entry from the stack, run its backprop, and update +// the gradients of its inputs. Once we have computed all gradients for a single +// input we can mark this input as done, and this can trigger adding an entry to +// the stack if all outputs of that entry are now done. +// +// When the stack is empty we have gradients for all tensors we're interested +// in. + +struct BackpropInitialState { + OpTape op_tape; + + // Map from tensor ID to how many references still exist for this tensor in + // the tape. + std::unordered_map tensor_usage_counts; + + // Maps from op ID to how many output tensors of this op still need to have + // their gradients computed. + std::unordered_map op_missing_tensor; +}; + +BackpropInitialState PrepareBackprop( + gtl::ArraySlice target, const TensorTape& tensor_tape, + OpTape op_tape, const std::unordered_set& sources_set) { + std::vector tensor_stack; + tensor_stack.reserve(target.size()); + for (auto t : target) { + tensor_stack.push_back(t); + } + BackpropInitialState result; + while (!tensor_stack.empty()) { + int64 tensor_id = tensor_stack.back(); + tensor_stack.pop_back(); + auto op_id_it = tensor_tape.find(tensor_id); + if (op_id_it == tensor_tape.end()) { + continue; + } + int64 op_id = op_id_it->second; + auto op_it = op_tape.find(op_id); + auto result_op_it = result.op_tape.find(op_id); + if (op_id == -1 || op_it == op_tape.end() || + result_op_it != result.op_tape.end()) { + continue; + } + CHECK(result.op_tape.emplace(op_id, op_it->second).second); + for (auto it : op_it->second.input_tensor_id) { + auto count_it = result.tensor_usage_counts.find(it); + if (count_it != result.tensor_usage_counts.end()) { + count_it->second++; + } else { + result.tensor_usage_counts[it] = 1; + if (sources_set.find(it) == sources_set.end() && + tensor_tape.find(it) != tensor_tape.end()) { + tensor_stack.push_back(it); + } + } + } + op_tape.erase(op_it); + } + for (auto& pair : result.tensor_usage_counts) { + auto it = tensor_tape.find(pair.first); + if (it != tensor_tape.end() && it->second != -1) { + result.op_missing_tensor[it->second] += 1; + } + } + // Call destructors for all unneeded gradient functions. + for (const auto& op_pair : op_tape) { + op_pair.second.backward_function_deleter(); + } + return result; +} + +std::vector InitialStack( + const OpTape& op_tape, + const std::unordered_map& op_missing_tensor) { + std::vector result; + for (auto& op_entry : op_tape) { + if (op_missing_tensor.find(op_entry.first) == op_missing_tensor.end()) { + result.push_back(op_entry.first); + } + } + return result; +} + +Status InitialGradients(const VSpace& vspace, gtl::ArraySlice target, + gtl::ArraySlice output_gradients, + std::unordered_map tensor_usage_counts, + std::unordered_map>* result) { + for (int i = 0; i < target.size(); ++i) { + int64 id = vspace.TensorId(target[i]); + if (tensor_usage_counts.find(id) != tensor_usage_counts.end()) { + if (!output_gradients.empty() && output_gradients[i] != nullptr) { + // TODO(apassos) figure out how to print debugging information here. + return errors::InvalidArgument( + "A gradient was provided for a tensor which is used as part of the " + "computation."); + } + } else { + if (output_gradients.empty() || output_gradients[i] == nullptr) { + (*result)[id].push_back(vspace.OnesLike(target[i])); + } else { + (*result)[id].push_back(output_gradients[i]); + } + } + } + return Status::OK(); +} + +// If over kMinAggregateCount gradients are accumulated and the total +// memory consumption is over kMinAggregateBytes, do an early aggregation +// so as to release the gradient tensor to save memory. +static const int kMinAggregateCount = 4; +static const int kMinAggregateBytes = 128 * 1024 * 1024; + +Status GradientTape::Gradient(const VSpace& vspace, + gtl::ArraySlice target, + gtl::ArraySlice sources, + gtl::ArraySlice output_gradients, + std::vector* result) { + std::vector id_sources; + id_sources.reserve(sources.size()); + for (void* s : sources) { + id_sources.push_back(vspace.TensorId(s)); + } + std::unordered_set sources_set(id_sources.begin(), id_sources.end()); + std::vector id_targets; + id_sources.reserve(target.size()); + for (void* t : target) { + id_targets.push_back(vspace.TensorId(t)); + } + BackpropInitialState state = PrepareBackprop( + id_targets, tensor_tape_, std::move(op_tape_), sources_set); + std::vector op_stack = + InitialStack(state.op_tape, state.op_missing_tensor); + std::unordered_map> gradients; + Status s = InitialGradients(vspace, target, output_gradients, + state.tensor_usage_counts, &gradients); + auto cleanup = [&state]() { + // Release all backprop functions + for (const auto& pair : state.op_tape) { + pair.second.backward_function_deleter(); + } + }; + if (!s.ok()) { + cleanup(); + return s; + } + std::unordered_map gradients_size; + // TODO(apassos) multiple threads could be dequeuing from op_stack at the same + // time, for better CPU backprop performance. + VLOG(1) << "Initial stack:"; + if (VLOG_IS_ON(1)) { + for (auto t : op_stack) { + VLOG(1) << " " << t; + } + } + std::unordered_map> + functions_accept_none_for_indices({ + {"SoftmaxCrossEntropyWithLogits", {1}}, + {"FusedBatchNorm", {1, 2, 3, 4}}, + }); + while (!op_stack.empty()) { + const int64 op = op_stack.back(); + VLOG(1) << "Popped " << op; + op_stack.pop_back(); + auto op_it = state.op_tape.find(op); + if (op_it == state.op_tape.end()) { + // It is possible for ops to end up on the stack if they are unrelated to + // the target; we should just skip them. + continue; + } + auto trace = std::move(op_it->second); + state.op_tape.erase(op_it); + std::vector out_gradients; + out_gradients.reserve(trace.output_tensor_info.size()); + for (int i = 0; i < trace.output_tensor_info.size(); ++i) { + const int64 id = trace.output_tensor_info[i].id; + auto grad_it = gradients.find(id); + if (grad_it == gradients.end()) { + auto func_name_it = + functions_accept_none_for_indices.find(trace.op_type); + if (func_name_it != functions_accept_none_for_indices.end() && + func_name_it->second.find(i) != func_name_it->second.end()) { + out_gradients.push_back(nullptr); + } else { + out_gradients.push_back( + vspace.Zeros(trace.output_tensor_info[i].shape, + trace.output_tensor_info[i].dtype)); + } + } else { + out_gradients.push_back(vspace.AggregateGradients(grad_it->second)); + if (sources_set.find(grad_it->first) == sources_set.end()) { + gradients.erase(grad_it); + } + } + } + std::vector in_gradients; + Status s = vspace.CallBackwardFunction(trace.backward_function, + out_gradients, &in_gradients); + if (!s.ok()) { + VLOG(1) << "Gradient function failed."; + cleanup(); + return s; + } + VLOG(1) << "Got " << in_gradients.size() << " in_gradients for " + << trace.input_tensor_id.size() << " sources"; + for (int i = 0; i < in_gradients.size(); ++i) { + const int64 id = trace.input_tensor_id[i]; + if (in_gradients[i] != nullptr) { + auto& unaggregated_grads = gradients[id]; + unaggregated_grads.push_back(in_gradients[i]); + if (unaggregated_grads.size() > kMinAggregateCount) { + auto size_it = gradients_size.find(id); + int64 size; + if (size_it == gradients_size.end()) { + size = vspace.NumElements(unaggregated_grads[0]); + gradients_size.emplace(id, size); + } else { + size = size_it->second; + } + if (unaggregated_grads.size() * size * 4 > kMinAggregateBytes) { + void* tensor = vspace.AggregateGradients(unaggregated_grads); + unaggregated_grads.clear(); + unaggregated_grads.push_back(tensor); + } + } + } + auto usage_count_it = state.tensor_usage_counts.find(id); + if (usage_count_it == state.tensor_usage_counts.end()) { + VLOG(1) << "Tensor " << id << " not used"; + continue; + } + usage_count_it->second--; + if (usage_count_it->second > 0) { + VLOG(1) << "Tensor " << id << " usage count " << usage_count_it->second; + continue; + } + auto tape_it = tensor_tape_.find(id); + if (tape_it == tensor_tape_.end()) { + VLOG(1) << "Tensor " << id + << " has no associated op. Deleting gradient"; + auto grad_it = gradients.find(id); + if (grad_it != gradients.end()) { + for (auto g : grad_it->second) { + vspace.DeleteTensor(g); + } + gradients.erase(grad_it); + } + continue; + } + const int64 op_id = tape_it->second; + if (op_id == -1) { + VLOG(1) << "Tensor " << id << " is source"; + continue; + } + auto missing_it = state.op_missing_tensor.find(op_id); + if (missing_it != state.op_missing_tensor.end()) { + missing_it->second--; + VLOG(1) << "Op " << op_id << " missing " << missing_it->second + << " output gradients"; + if (missing_it->second == 0) { + op_stack.push_back(op_id); + } + } + } + } + CHECK(state.op_tape.empty()); + result->reserve(sources.size()); + for (auto is : id_sources) { + auto grad_it = gradients.find(is); + if (grad_it == gradients.end()) { + result->push_back(nullptr); + } else { + if (grad_it->second.size() == 1) { + result->push_back(grad_it->second[0]); + } else { + result->push_back(vspace.AggregateGradients(grad_it->second)); + } + gradients.erase(grad_it); + } + } + VLOG(1) << "Final gradients size: " << gradients.size(); + for (auto grad_pair : gradients) { + for (const auto& g : grad_pair.second) { + vspace.DeleteTensor(g); + } + } + return Status::OK(); } } // namespace eager diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index df51f300eb..2bb62a7ab3 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -57,11 +57,57 @@ using TensorTape = std::unordered_map; // Map from operation-id to tape entry. using OpTape = std::unordered_map; +// Operations the tape needs to perform on tensors to do backpropagation. Named +// "vspace" because a subset of these are related to a vector space, such as +// adding gradients, getting zeroes, etc. Currently cannot be implemented +// without using tensorflow python code, hence left unspecified here. +// +// We currently use void* for tensors, backward functions, and gradients (which +// can be but are not required to be tensors). TODO(apassos) replace this first +// with templates to allow for pyobject specialization in the client followed by +// a TFE_TensorHandle specialization, which is blocked by quite a few things +// still. +class VSpace { + public: + virtual ~VSpace() {} + + // Returns the number of elements in the tensor. + virtual int64 NumElements(void* tensor) const = 0; + + // Consumes references to the tensors in the gradient_tensors list and returns + // a tensor with the result. + virtual void* AggregateGradients( + gtl::ArraySlice gradient_tensors) const = 0; + + // Returns a tensor of the right shape and dtype filled with zeros. + virtual void* Zeros(TensorShape shape, DataType dtype) const = 0; + + // Returns a Tensor which is filled with ones and like the input. + virtual void* OnesLike(void*) const = 0; + + // Returns an integer which is a unique-to-within-this-program handle for this + // tensor. + virtual int64 TensorId(void* tensor) const = 0; + + // Calls the passed-in backward function. + virtual Status CallBackwardFunction(void* backward_function, + gtl::ArraySlice output_gradients, + std::vector* result) const = 0; + + // Deletes the input tensor. + virtual void DeleteTensor(void* tensor) const = 0; +}; + // Traces the execution of operations, doing eager garbage collection, and // exporting a full trace so other code can do backpropagation. Not thread-safe. class GradientTape { public: GradientTape() {} + ~GradientTape() { + for (const auto& pair : op_tape_) { + pair.second.backward_function_deleter(); + } + } bool ShouldRecord(gtl::ArraySlice tensor_ids); @@ -75,10 +121,14 @@ class GradientTape { void DeleteTrace(int64 tensor_id); - // Note: it is only valid to call Export once per tape, and after calling - // export the tape is no longer valid (i.e. calls to ShouldRecord, Watch, - // Record, and Delete have undefined behavior). - std::pair Export(); + // Consumes the internal state of the tape (so cannot be called more than + // once) and produces the gradient of the target tensors with respect to the + // source tensors. The output gradients are used if not empty and not + // null. The result is populated with one tensor per target element. + Status Gradient(const VSpace& vspace, gtl::ArraySlice target, + gtl::ArraySlice sources, + gtl::ArraySlice output_gradients, + std::vector* result); private: TensorTape tensor_tape_; diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index bcd1e1d0dc..c36647b21c 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -14,11 +14,16 @@ cc_library( "pywrap_tensor.cc", "pywrap_tfe_src.cc", ], - hdrs = ["pywrap_tfe.h"], + hdrs = [ + "pywrap_tensor.h", + "pywrap_tfe.h", + ], visibility = ["//tensorflow:internal"], deps = [ "//tensorflow/c:c_api", + "//tensorflow/c:c_api_internal", "//tensorflow/c/eager:c_api", + "//tensorflow/c/eager:c_api_internal", "//tensorflow/c/eager:tape", "//tensorflow/core:lib", "//tensorflow/python:ndarray_tensor", diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 86b3776b8c..111d7cef56 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -727,11 +727,23 @@ def _num_elements(grad): raise ValueError("`grad` not a Tensor or IndexedSlices.") +_last_shape_dtype = [None, None] +_last_zero = [None] + + +def _zeros(shape, dtype): + """Wraps array_ops.zeros to cache last zero for a given shape and dtype.""" + if [shape, dtype] != _last_shape_dtype: + _last_shape_dtype[:] = [shape, dtype] + _last_zero[0] = array_ops.zeros(shape, dtype) + return _last_zero[0] + + _default_vspace = imperative_grad.VSpace( num_elements_fn=_num_elements, aggregate_fn=_aggregate_grads, tensor_id=ops.tensor_id, - zeros=array_ops.zeros, + zeros=_zeros, ones_like=lambda x: ops.convert_to_tensor(array_ops.ones_like(x))) diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index ed54b8e12e..ec9a185b73 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -24,11 +24,11 @@ from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.eager import custom_gradient -from tensorflow.python.eager import imperative_grad from tensorflow.python.eager import tape from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops @@ -41,7 +41,6 @@ from tensorflow.python.ops import random_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables from tensorflow.python.training import training -from tensorflow.python.util import compat class BackpropTest(test.TestCase): @@ -103,6 +102,18 @@ class BackpropTest(test.TestCase): grad_fn = backprop.gradients_function(f) self.assertAllEqual(2., grad_fn(1., dy=2.)[0]) + def testErrors(self): + + @custom_gradient.custom_gradient + def f(x): + def grad(_): + raise RuntimeError('x') + return x, grad + + # TODO(apassos) raise the right error here + with self.assertRaises(errors_impl.InternalError): + backprop.gradients_function(f)(constant_op.constant(1.0)) + def testImplicitGradOverEmbeddingLookup(self): batch_size = 8 embedding_size = 512 @@ -483,48 +494,6 @@ class BackpropTest(test.TestCase): initial_value=1., name='testSameObjectForMultipleArguments.Variable') self.assertAllEqual([1., 1.], np_g(v, v)) - def testEarlyGradAggregation(self): - # Needs to be a list so mutations by the callback affect this function. - add_n = [] - def callback(op_type, unused_1, unused_2, unused_3, unused_4): - if compat.as_bytes(op_type) == compat.as_bytes('AddN'): - add_n.append(1) - context.context().add_post_execution_callback(callback) - - v = resource_variable_ops.ResourceVariable(constant_op.constant(2.0), - name='v') - def fn(): - outputs = [] - for _ in range(20): - outputs.append(v * constant_op.constant(2.0)) - return math_ops.add_n(outputs) - - # By default the aggregation count is 2. - _ = backprop.implicit_grad(fn)()[0][1] - self.assertEqual(len(add_n), 2) - del add_n[:] - - # Reduce the aggregation limit, cause the backprop to do some - # early aggregation. - # pylint: disable=protected-access - old_cnt = imperative_grad._MIN_AGGREGATE_COUNT - old_bytes = imperative_grad._MIN_AGGREGATE_BYTES - imperative_grad._MIN_AGGREGATE_COUNT = 10 - imperative_grad._MIN_AGGREGATE_BYTES = 1 - _ = backprop.implicit_grad(fn)() - self.assertEqual(len(add_n), 6) - del add_n[:] - - # Aggregation is also limited by the memory. - imperative_grad._MIN_AGGREGATE_BYTES = 10000 - _ = backprop.implicit_grad(fn)() - self.assertEqual(len(add_n), 2) - - imperative_grad._MIN_AGGREGATE_COUNT = old_cnt - imperative_grad._MIN_AGGREGATE_BYTES = old_bytes - # pylint: enable=protected-access - context.context().clear_post_execution_callbacks() - def testImplicitGradientsCustomGradientAndCachedVariableValue(self): @custom_gradient.custom_gradient diff --git a/tensorflow/python/eager/imperative_grad.py b/tensorflow/python/eager/imperative_grad.py index c87719f84a..8932b7157b 100644 --- a/tensorflow/python/eager/imperative_grad.py +++ b/tensorflow/python/eager/imperative_grad.py @@ -20,102 +20,8 @@ from __future__ import print_function import collections -from tensorflow.python.eager import tape as tape_module - - -# Terminology: -# -# - op: a possibly composite operation, which has an entry in the tape -# - target: dy in dx/dy -# - source: dx in dx/dy -# - tensor: one of the many inputs or outputs of an operation -# -# Below here we do the gradient algorithm. It works as follows: -# -# First we filter the tape to just the subset of operations we want to -# differentiate. In the process of doing so we count how many times each Tensor -# is used as an input to an op (so we know when we're done computing gradients -# for that Tensor). We also count, for each tape entry, how many of its output -# Tensors need gradients to be computed (Tensors which are not used do not need -# any gradients to be computed). -# -# Finally, we start a backprop stack with a set of tape entries for which we -# have all gradients available. This set usually is a subset of the set of -# targets (not all since targets which have outputs in the tape will not have -# gradients available initially). -# -# Then we repeatedly pop an entry from the stack, run its backprop, and update -# the gradients of its inputs. Once we have computed all gradients for a single -# input we can mark this input as done, and this can trigger adding an entry to -# the stack if all outputs of that entry are now done. -# -# When the stack is empty we have gradients for all tensors we're interested in. -def _prepare_backprop(vspace, target, tensor_to_op, op_to_entry, id_sources): - """Filters the tape to only include relevant entries and counts tensor usages. - - Args: - vspace: information about the space we're differentiating in. - target: the target to optimize. - tensor_to_op: Map from tensor id to key in op_to_entry that produced it. - op_to_entry: Map from op id to a tape.TapeEntry object - id_sources: the ids of the sources wrt the gradient is being taken. - - Returns: - usage counts (how many entries downstream from a tensor use it) - op_to_entry_map: entry map (a filtered tape, with only the relevant - entries), - missing: map from tensor id to how many downstream gradients still need - to be computed before this tensor's gradient can be computed. - """ - tensor_stack = [vspace.tensor_id(x) for x in target] - tensor_usage_counts = {} - o_to_e = {} # Copy of just the bits we need from op_to_entry - while tensor_stack: - t = tensor_stack.pop() - op = tensor_to_op.get(t, None) - # op is None or -1 if the tensor is a source (i.e. was watched directly) - if op is None or op == -1 or op in o_to_e: - continue - op_trace = tape_module.TapeEntry(*op_to_entry[op]) - o_to_e[op] = op_trace - for it in op_trace.input_ids: - if it in tensor_usage_counts: - tensor_usage_counts[it] += 1 - else: - tensor_usage_counts[it] = 1 - if it not in id_sources and it in tensor_to_op: - tensor_stack.append(it) - op_missing_tensor_counts = collections.defaultdict(int) - for t in tensor_usage_counts: - if t in tensor_to_op and tensor_to_op[t] is not None: - op_missing_tensor_counts[tensor_to_op[t]] += 1 - return tensor_usage_counts, o_to_e, op_missing_tensor_counts - - -def _initialize_backprop_stack(op_to_entry, op_missing_tensor): - """Returns the set of tape entries which are available for backprop.""" - ready_ops = [] - for op in op_to_entry: - if op not in op_missing_tensor: - ready_ops.append(op) - return ready_ops - - -def _initial_gradients(vspace, target, output_gradients, tensor_usage_counts): - """Computes the initial gradients for each Tensor.""" - # Initialize the backprop stack - gradients = collections.defaultdict(list) - for i, t in enumerate(target): - if vspace.tensor_id(t) in tensor_usage_counts: - # Can't provide a gradient of something we're trying to differentiate - assert output_gradients is None or output_gradients[i] is None - else: - if output_gradients is None or output_gradients[i] is None: - out_grad = vspace.ones_like(t) - else: - out_grad = output_gradients[i] - gradients[vspace.tensor_id(t)].append(out_grad) - return gradients +from tensorflow.python import pywrap_tensorflow +from tensorflow.python.framework import errors VSpace = collections.namedtuple( @@ -123,13 +29,6 @@ VSpace = collections.namedtuple( ["aggregate_fn", "num_elements_fn", "tensor_id", "zeros", "ones_like"]) -# If over MIN_AGGREGATE_COUNT gradients are accumulated and the total -# memory consumption is over MIN_AGGREGATE_BYTES, do an early aggregation -# so as to release the gradient tensor to save memory. -_MIN_AGGREGATE_COUNT = 4 -_MIN_AGGREGATE_BYTES = 128 * 1024 * 1024 - - def imperative_grad( vspace, tape, @@ -161,89 +60,6 @@ def imperative_grad( or if only non-differentiable functions of the source were used in the computation of target. """ - tensor_to_op, op_to_entry = tape.export() - # This overwrites the op_to_entry variable, which will release all memory used - # to keep traces that are irrelevant to the gradient computation we're doing - # here. - id_sources = [vspace.tensor_id(t) for t in sources] - tensor_usage_counts, op_to_entry, op_missing_tensor = _prepare_backprop( - vspace, target, tensor_to_op, op_to_entry, id_sources) - ready_ops = _initialize_backprop_stack(op_to_entry, op_missing_tensor) - gradients = _initial_gradients(vspace, target, output_gradients, - tensor_usage_counts) - gradients_size = dict() - # Now exhaust the backprop stack - while ready_ops: - op = ready_ops.pop() - op_trace = op_to_entry.pop(op) - out_gradients = [gradients.pop(t, None) for t in op_trace.output_ids] - - # Cache the last used zero tensor. We reuse it if the next one - # we need is of the same shape and dtype. This is very helpful in - # large splits and should have negligible overhead in other cases. - last_shape_and_dtype = None - last_zeros = None - for i in range(len(out_gradients)): - if out_gradients[i] is None: - # TODO(apassos) this should be in the right device - none_indices = _grad_fn_accepts_none_for_indices.get( - op_trace.op_type, None) - if none_indices is None or i not in none_indices: - shape_and_dtype = op_trace.output_shape_and_dtype[i] - if shape_and_dtype != last_shape_and_dtype: - last_shape_and_dtype = shape_and_dtype - last_zeros = vspace.zeros(*shape_and_dtype) - out_gradients[i] = last_zeros - else: - out_gradients[i] = vspace.aggregate_fn(out_gradients[i]) - - in_gradients = op_trace.backward_function(*(out_gradients)) - for i, t in enumerate(op_trace.input_ids): - if in_gradients[i] is not None: - t_grads = gradients.setdefault(t, []) - t_grads.append(in_gradients[i]) - if len(t_grads) >= _MIN_AGGREGATE_COUNT: - if t not in gradients_size: - gradients_size[t] = vspace.num_elements_fn(t_grads[-1]) - size = gradients_size[t] - - if len(t_grads) * size * 4 > _MIN_AGGREGATE_BYTES: - t_grads[:] = [vspace.aggregate_fn(t_grads)] - if tensor_usage_counts.get(t, 0) > 0: - tensor_usage_counts[t] -= 1 - if (t in tensor_to_op - and tensor_usage_counts[t] == 0 - and t not in id_sources): - in_op = tensor_to_op[t] - if in_op is None or in_op == -1: - continue - if op_missing_tensor.get(in_op, 0) > 0: - op_missing_tensor[in_op] -= 1 - if op_missing_tensor.get(in_op, 0) == 0: - ready_ops.append(in_op) - result = [] - for i, s in enumerate(sources): - g = gradients.get(vspace.tensor_id(s), None) - if g is None: - result.append(None) - else: - result.append(vspace.aggregate_fn(g)) - return result - - -# TODO(agarwal): use an automatic mechanism for handling None arguments to -# gradient functions. -# Some gradient functions can accept None arguments for gradients. The following -# maps the operation name to the indices at which the corresponding gradient -# function can accept None values. -# e.g. FusedBatchNorm outputs 5 values and hence receives 5 gradient values -# during backprop. However the gradient function uses only the first of those -# values and ignores the rest. The entry, "FusedBatchNorm": [1, 2, 3, 4], -# indicates that only the gradient corresponding to index 0 is used, and the -# gradient values at indices 1-4 are ignored (and hence can be None). The -# backprop algorithm can then leverage this by not constructing zeros to -# pass for those indices. -_grad_fn_accepts_none_for_indices = { - "SoftmaxCrossEntropyWithLogits": [1], - "FusedBatchNorm": [1, 2, 3, 4] -} + with errors.raise_exception_on_not_ok_status() as status: + return pywrap_tensorflow.TFE_Py_TapeGradient( + tape._tape, vspace, target, sources, output_gradients, status) # pylint: disable=protected-access diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc index ca283862f9..653f3ef84e 100644 --- a/tensorflow/python/eager/pywrap_tensor.cc +++ b/tensorflow/python/eager/pywrap_tensor.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/python/lib/core/py_seq_tensor.h" #include "tensorflow/python/lib/core/safe_ptr.h" +#include "tensorflow/python/eager/pywrap_tensor.h" #include "tensorflow/python/eager/pywrap_tfe.h" #include "tensorflow/c/c_api.h" @@ -573,7 +574,7 @@ bool EagerTensor_CheckExact(const PyObject* o) { return Py_TYPE(o) == EagerTensorType; } -TFE_TensorHandle* EagerTensorHandle(const PyObject* o) { +TFE_TensorHandle* EagerTensor_Handle(const PyObject* o) { return reinterpret_cast(o)->handle; } @@ -594,6 +595,11 @@ PyObject* EagerTensorFromHandle(TFE_TensorHandle* handle) { return reinterpret_cast(t); } +tensorflow::int64 EagerTensor_id(const PyObject* tensor) { + CHECK(EagerTensor_CheckExact(tensor)); + return reinterpret_cast(tensor)->id; +} + PyObject* TFE_Py_InitEagerTensor(PyObject* base_class) { if (!PyType_Check(base_class)) { PyErr_SetString( diff --git a/tensorflow/python/eager/pywrap_tensor.h b/tensorflow/python/eager/pywrap_tensor.h new file mode 100644 index 0000000000..aa1efdd1b8 --- /dev/null +++ b/tensorflow/python/eager/pywrap_tensor.h @@ -0,0 +1,25 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_PYTHON_EAGER_PYWRAP_TENSOR_H_ +#define TENSORFLOW_PYTHON_EAGER_PYWRAP_TENSOR_H_ + +#include "tensorflow/c/eager/c_api.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/python/lib/core/numpy.h" + +bool EagerTensor_CheckExact(const PyObject* o); +tensorflow::int64 EagerTensor_id(const PyObject* tensor); + +#endif // TENSORFLOW_PYTHON_EAGER_PYWRAP_TENSOR_H_ diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h index 1d03df2933..6705483f3b 100644 --- a/tensorflow/python/eager/pywrap_tfe.h +++ b/tensorflow/python/eager/pywrap_tfe.h @@ -81,7 +81,7 @@ bool EagerTensor_CheckExact(const PyObject* o); PyObject* EagerTensorFromHandle(TFE_TensorHandle* handle); // Extracts the handle inside EagerTensor object `o`. Returns nullptr on error. -TFE_TensorHandle* EagerTensorHandle(const PyObject* o); +TFE_TensorHandle* EagerTensor_Handle(const PyObject* o); // Creates the `EagerTensor` class by subclassing `base_class` and returns the // newly created type, or nullptr on error. @@ -103,7 +103,16 @@ void TFE_Py_TapeRecordOperation(PyObject* tape, PyObject* op_type, PyObject* output_tensors, PyObject* input_tensor_ids, PyObject* backward_function); -PyObject* TFE_Py_TapeExport(PyObject* tape); + +// Computes a gradient based on information recorded on the tape.`tape` must +// have been produced by TFE_Py_NewTape. `vspace` must be a +// imperative_grad.py:VSpace named tuple. `target` and `sources` must be python +// lists of Tensor objects. `output_gradients` is either None or a python list +// of either Tensor or None, and if not None should have the same length as +// target. +PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, + PyObject* target, PyObject* sources, + PyObject* output_gradients, TF_Status* status); // Returns an EagerTensor of dimension [len(`tensor_list`)] containing // the `slice_dim`'th dimension of each tensor in `tensor_list`. In other words, diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 7456eb10f8..a00a7615d7 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -16,10 +16,13 @@ limitations under the License. #include "tensorflow/python/eager/pywrap_tfe.h" #include "tensorflow/c/c_api.h" +#include "tensorflow/c/c_api_internal.h" +#include "tensorflow/c/eager/c_api_internal.h" #include "tensorflow/c/eager/tape.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/python/eager/pywrap_tensor.h" using tensorflow::string; @@ -515,18 +518,50 @@ static std::vector MakeIntList(PyObject* list) { } PyObject* TFE_Py_TapeShouldRecord(PyObject* py_tape, PyObject* tensors) { + if (tensors == Py_None) { + Py_RETURN_FALSE; + } + PyObject* seq = PySequence_Fast(tensors, "expected a sequence"); + if (seq == nullptr) { + return nullptr; + } + int len = PySequence_Fast_GET_SIZE(seq); + // TODO(apassos) consider not building a list and changing the API to check + // each tensor individually. + std::vector tensor_ids; + tensor_ids.reserve(len); + for (int i = 0; i < len; ++i) { + PyObject* item = PySequence_Fast_GET_ITEM(seq, i); + if (EagerTensor_CheckExact(item)) { + tensor_ids.push_back(EagerTensor_id(item)); + } else { + PyObject* id_field = PyObject_GetAttrString(item, "_id"); + if (id_field == nullptr) { + return nullptr; + } + tensor_ids.push_back(MakeInt(id_field)); + Py_DECREF(id_field); + } + } + Py_DECREF(seq); TFE_Py_Tape* tape = reinterpret_cast(py_tape); - return PyBool_FromLong(tape->tape->ShouldRecord(MakeIntList(tensors))); + if (tape->tape->ShouldRecord(tensor_ids)) { + Py_RETURN_TRUE; + } else { + Py_RETURN_FALSE; + } } void TFE_Py_TapeWatch(PyObject* tape, tensorflow::int64 tensor_id) { reinterpret_cast(tape)->tape->Watch(tensor_id); } -// TODO(apassos) have a fast path for eager tensors here which gets information -// from the handle instead of from the python object, and use this only for the -// case of graph tensors. static tensorflow::eager::TapeTensor TapeTensorFromTensor(PyObject* tensor) { + if (EagerTensor_CheckExact(tensor)) { + TFE_TensorHandle* t = EagerTensor_Handle(tensor); + tensorflow::int64 id = EagerTensor_id(tensor); + return tensorflow::eager::TapeTensor{id, t->t.dtype(), t->t.shape()}; + } PyObject* id_field = PyObject_GetAttrString(tensor, "_id"); tensorflow::int64 id = MakeInt(id_field); Py_DECREF(id_field); @@ -592,64 +627,224 @@ void TFE_Py_TapeDeleteTrace(PyObject* tape, tensorflow::int64 tensor_id) { reinterpret_cast(tape)->tape->DeleteTrace(tensor_id); } -// TODO(apassos) when backprop.py moves to C most of this exporting logic can -// disappear. -PyObject* TFE_Py_TapeExport(PyObject* tape) { - std::pair exported = - reinterpret_cast(tape)->tape->Export(); - PyObject* tensor_tape = PyDict_New(); - for (const auto& pair : exported.first) { - PyObject* tid = PyLong_FromLong(pair.first); - PyObject* opid = PyLong_FromLong(pair.second); - PyDict_SetItem(tensor_tape, tid, opid); - Py_DECREF(tid); - Py_DECREF(opid); - } - - PyObject* op_tape = PyDict_New(); - for (const auto& pair : exported.second) { - PyObject* opid = PyLong_FromLong(pair.first); - const auto& entry = pair.second; - PyObject* op_type = PyBytes_FromString(entry.op_type.c_str()); - PyObject* output_ids = PyList_New(entry.output_tensor_info.size()); - for (int i = 0; i < entry.output_tensor_info.size(); ++i) { - PyObject* tid = PyLong_FromLong(entry.output_tensor_info[i].id); - PyList_SET_ITEM(output_ids, i, tid); +// TODO(apassos): cache the attribute lookups as member variables and decref +// them in the destructor. +class PyVSpace : public tensorflow::eager::VSpace { + public: + explicit PyVSpace(PyObject* py_vspace) : py_vspace_(py_vspace) {} + + tensorflow::Status Initialize() { + num_elements_ = PyObject_GetAttrString(py_vspace_, "num_elements_fn"); + if (num_elements_ == nullptr) { + return tensorflow::errors::InvalidArgument("invalid vspace"); + } + aggregate_fn_ = PyObject_GetAttrString(py_vspace_, "aggregate_fn"); + if (aggregate_fn_ == nullptr) { + return tensorflow::errors::InvalidArgument("invalid vspace"); + } + zeros_ = PyObject_GetAttrString(py_vspace_, "zeros"); + if (zeros_ == nullptr) { + return tensorflow::errors::InvalidArgument("invalid vspace"); } - PyObject* input_ids = PyList_New(entry.input_tensor_id.size()); - for (int i = 0; i < entry.input_tensor_id.size(); ++i) { - PyObject* tid = PyLong_FromLong(entry.input_tensor_id[i]); - PyList_SET_ITEM(input_ids, i, tid); + ones_like_ = PyObject_GetAttrString(reinterpret_cast(py_vspace_), + "ones_like"); + if (ones_like_ == nullptr) { + return tensorflow::errors::InvalidArgument("invalid vspace"); } - PyObject* backward_function = - reinterpret_cast(entry.backward_function); - PyObject* output_shape_and_dtype = - PyList_New(entry.output_tensor_info.size()); - for (int i = 0; i < entry.output_tensor_info.size(); ++i) { - const tensorflow::TensorShape& shape = entry.output_tensor_info[i].shape; - PyObject* shape_list = PyList_New(shape.dims()); - for (int j = 0; j < shape.dims(); ++j) { - PyList_SET_ITEM(shape_list, j, PyLong_FromLong(shape.dim_size(j))); + return tensorflow::Status::OK(); + } + + ~PyVSpace() override { + Py_XDECREF(num_elements_); + Py_XDECREF(aggregate_fn_); + Py_XDECREF(zeros_); + Py_XDECREF(ones_like_); + } + + tensorflow::int64 NumElements(void* tensor) const final { + PyObject* arglist = + Py_BuildValue("(O)", reinterpret_cast(tensor)); + PyObject* result = PyEval_CallObject(num_elements_, arglist); + tensorflow::int64 r = MakeInt(result); + Py_DECREF(result); + Py_DECREF(arglist); + return r; + } + + void* AggregateGradients( + tensorflow::gtl::ArraySlice gradient_tensors) const final { + PyObject* list = PyList_New(gradient_tensors.size()); + for (int i = 0; i < gradient_tensors.size(); ++i) { + // Note: stealing a reference to the gradient tensors. + CHECK(gradient_tensors[i] != nullptr); + CHECK(gradient_tensors[i] != Py_None); + PyList_SET_ITEM(list, i, + reinterpret_cast(gradient_tensors[i])); + } + PyObject* arglist = Py_BuildValue("(O)", list); + CHECK(arglist != nullptr); + PyObject* result = PyEval_CallObject(aggregate_fn_, arglist); + Py_DECREF(arglist); + Py_DECREF(list); + return result; + } + + void* Zeros(tensorflow::TensorShape shape, + tensorflow::DataType dtype) const final { + PyObject* py_shape = PyTuple_New(shape.dims()); + for (int i = 0; i < shape.dims(); ++i) { + PyTuple_SET_ITEM(py_shape, i, PyLong_FromLong(shape.dim_size(i))); + } + PyObject* py_dtype = PyLong_FromLong(static_cast(dtype)); + PyObject* arg_list = Py_BuildValue("OO", py_shape, py_dtype); + PyObject* result = PyEval_CallObject(zeros_, arg_list); + Py_DECREF(arg_list); + Py_DECREF(py_dtype); + Py_DECREF(py_shape); + return reinterpret_cast(result); + } + + void* OnesLike(void* tensor) const final { + PyObject* arg_list = Py_BuildValue("(O)", tensor); + PyObject* result = PyEval_CallObject(ones_like_, arg_list); + if (result == nullptr) { + VLOG(1) << "Call to ones_like failed"; + } + Py_DECREF(arg_list); + return reinterpret_cast(result); + } + + tensorflow::int64 TensorId(void* tensor) const final { + PyObject* py_tensor = reinterpret_cast(tensor); + PyObject* id_field = PyObject_GetAttrString(py_tensor, "_id"); + tensorflow::int64 id = MakeInt(id_field); + Py_DECREF(id_field); + return id; + } + + tensorflow::Status CallBackwardFunction( + void* backward_function, + tensorflow::gtl::ArraySlice output_gradients, + std::vector* result) const final { + PyObject* grads = PyTuple_New(output_gradients.size()); + for (int i = 0; i < output_gradients.size(); ++i) { + if (output_gradients[i] == nullptr) { + Py_INCREF(Py_None); + PyTuple_SET_ITEM(grads, i, Py_None); + } else { + PyTuple_SET_ITEM(grads, i, + reinterpret_cast(output_gradients[i])); } - PyObject* type_enum = PyLong_FromLong(entry.output_tensor_info[i].dtype); - PyObject* tuple = PyTuple_Pack(2, shape_list, type_enum); - Py_DECREF(shape_list); - Py_DECREF(type_enum); - PyList_SET_ITEM(output_shape_and_dtype, i, tuple); } - PyObject* opinfo = PyTuple_Pack(5, op_type, output_ids, input_ids, - backward_function, output_shape_and_dtype); - Py_DECREF(op_type); - Py_DECREF(output_ids); - Py_DECREF(input_ids); + PyObject* py_result = PyEval_CallObject( + reinterpret_cast(backward_function), grads); + Py_DECREF(grads); Py_DECREF(backward_function); - Py_DECREF(output_shape_and_dtype); - PyDict_SetItem(op_tape, opid, opinfo); - Py_DECREF(opid); - Py_DECREF(opinfo); - } - PyObject* retval = PyTuple_Pack(2, tensor_tape, op_tape); - Py_DECREF(tensor_tape); - Py_DECREF(op_tape); - return retval; + if (py_result == nullptr) { + VLOG(1) << "Gradient function threw exceptions"; + if (VLOG_IS_ON(1)) { + PyErr_Print(); + } + return tensorflow::errors::Internal("gradient function threw exceptions"); + } + result->clear(); + PyObject* seq = + PySequence_Fast(py_result, "expected a sequence of gradients"); + if (seq == nullptr) { + return tensorflow::errors::InvalidArgument( + "gradient function did not return a list"); + } + int len = PySequence_Fast_GET_SIZE(seq); + VLOG(1) << "Gradient length is " << len; + result->reserve(len); + for (int i = 0; i < len; ++i) { + PyObject* item = PySequence_Fast_GET_ITEM(seq, i); + if (item == Py_None) { + result->push_back(nullptr); + } else { + Py_INCREF(item); + result->push_back(item); + } + } + Py_DECREF(seq); + Py_DECREF(py_result); + return tensorflow::Status::OK(); + } + + void DeleteTensor(void* tensor) const final { + Py_XDECREF(reinterpret_cast(tensor)); + } + + private: + PyObject* py_vspace_; + + PyObject* num_elements_; + PyObject* aggregate_fn_; + PyObject* zeros_; + PyObject* ones_like_; +}; + +std::vector MakeTensorList(PyObject* tensors) { + PyObject* seq = PySequence_Fast(tensors, "expected a sequence"); + if (seq == nullptr) { + return {}; + } + int len = PySequence_Fast_GET_SIZE(seq); + std::vector list; + list.reserve(len); + for (int i = 0; i < len; ++i) { + list.push_back(PySequence_Fast_GET_ITEM(seq, i)); + } + Py_DECREF(seq); + return list; +} + +PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, + PyObject* target, PyObject* sources, + PyObject* output_gradients, TF_Status* status) { + PyVSpace c_vspace(vspace); + if (!c_vspace.Initialize().ok()) { + return nullptr; + } + + std::vector target_vec = MakeTensorList(target); + if (PyErr_Occurred()) { + return nullptr; + } + std::vector sources_vec = MakeTensorList(sources); + if (PyErr_Occurred()) { + return nullptr; + } + std::vector outgrad_vec; + if (output_gradients != Py_None) { + outgrad_vec = MakeTensorList(output_gradients); + if (PyErr_Occurred()) { + return nullptr; + } + for (void* tensor : outgrad_vec) { + // Calling the backward function will eat a reference to the tensors in + // outgrad_vec, so we need to increase their reference count. + Py_INCREF(reinterpret_cast(tensor)); + } + } + TFE_Py_Tape* tape_obj = reinterpret_cast(tape); + std::vector result; + status->status = tape_obj->tape->Gradient(c_vspace, target_vec, sources_vec, + outgrad_vec, &result); + if (!status->status.ok()) { + return nullptr; + } + if (!result.empty()) { + PyObject* py_result = PyList_New(result.size()); + for (int i = 0; i < result.size(); ++i) { + if (result[i] == nullptr) { + Py_INCREF(Py_None); + result[i] = Py_None; + } + PyList_SET_ITEM(py_result, i, reinterpret_cast(result[i])); + } + return py_result; + } + Py_INCREF(Py_None); + return Py_None; } diff --git a/tensorflow/python/eager/tape.py b/tensorflow/python/eager/tape.py index c16aa8c2f7..a06f5e1a67 100644 --- a/tensorflow/python/eager/tape.py +++ b/tensorflow/python/eager/tape.py @@ -72,7 +72,7 @@ class Tape(object): True if any of the tensors is in the tape. """ return pywrap_tensorflow.TFE_Py_TapeShouldRecord( - self._tape, [x._id for x in tensors]) # pylint: disable=protected-access + self._tape, tensors) def watch(self, tensor): """Adds a tensor to the tape.""" @@ -99,16 +99,6 @@ class Tape(object): """Deletes any trace we have for this tensor.""" self._delete_tensor_id(tensor_id) - def export(self): - """Exports the internal state of this tape. - - Returns: - tensor_tape: a map from tensor_id(tensor) to - responsible for generating that tensor. - op_tape: a map from to TapeEntry for that op. - """ - return pywrap_tensorflow.TFE_Py_TapeExport(self._tape) - class _TapeStack(threading.local): diff --git a/tensorflow/python/eager/tape_test.py b/tensorflow/python/eager/tape_test.py index c97cb62125..b490bac66d 100644 --- a/tensorflow/python/eager/tape_test.py +++ b/tensorflow/python/eager/tape_test.py @@ -22,7 +22,6 @@ from __future__ import print_function from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.eager import custom_gradient -from tensorflow.python.eager import tape from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -166,25 +165,6 @@ class TapeTest(test.TestCase): g, = backprop.gradients_function(fn, [0])(t) self.assertAllEqual(g, 1.0) - def testTapeGC(self): - # TODO(apassos) figure out how to test this without using tape internal - # APIs. - tape.push_new_tape() - - def f(): - x = constant_op.constant(1.0) - tape.watch(x) - x = gradient_is_constant(x) - x = gradient_is_constant(x) - x = gradient_is_constant(x) - - f() - t = tape.pop_tape() - tensor_tape, op_tape = t.export() - self.assertEqual(len(tensor_tape), 1) # The watched tensor will remain on - # the tape - self.assertEqual(len(op_tape), 0) # No operations should remain on the tape - def testCustomGradientGraphMode(self): with context.graph_mode(), self.test_session(): diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i index 637f738fed..cbacf458a0 100644 --- a/tensorflow/python/pywrap_tfe.i +++ b/tensorflow/python/pywrap_tfe.i @@ -29,7 +29,7 @@ limitations under the License. %rename("%s") TFE_Py_TapeWatch; %rename("%s") TFE_Py_TapeDeleteTrace; %rename("%s") TFE_Py_TapeRecordOperation; -%rename("%s") TFE_Py_TapeExport; +%rename("%s") TFE_Py_TapeGradient; %rename("%s") TFE_NewContextOptions; %rename("%s") TFE_ContextOptionsSetConfig; %rename("%s") TFE_ContextOptionsSetDevicePlacementPolicy; @@ -125,7 +125,7 @@ limitations under the License. SWIG_fail; } if (EagerTensor_CheckExact(elem)) { - (*$1)[i] = EagerTensorHandle(elem); + (*$1)[i] = EagerTensor_Handle(elem); } else { SWIG_exception_fail(SWIG_TypeError, "provided list of inputs contains objects other " -- GitLab From f7ecd257e0032aa36c12c166c38a3a9ed13b70a8 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 8 Nov 2017 13:49:36 -0800 Subject: [PATCH 0197/1801] [XLA:GPU] Add more logging to convolution autotuning. PiperOrigin-RevId: 175057863 --- .../xla/service/gpu/convolution_thunk.cc | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc index 536b96dcf6..e79d0a4c79 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" @@ -279,6 +280,13 @@ std::vector ConvolutionThunk::GetAlgorithms( return algorithms; } +static string AlgorithmToString(const se::dnn::AlgorithmDesc& algo) { + if (algo.tensor_ops_enabled()) { + return tensorflow::strings::StrCat(algo.algo_id(), "+TC"); + } + return tensorflow::strings::StrCat(algo.algo_id()); +} + tensorflow::Status ConvolutionThunk::ConvolveWithTune( const BatchDescriptor& input_descriptor, se::DeviceMemory input_data, const FilterDescriptor& filter_descriptor, @@ -303,6 +311,8 @@ tensorflow::Status ConvolutionThunk::ConvolveWithTune( buffer_allocations.device_ordinal(), buffer_allocations.memory_allocator()); se::dnn::ProfileResult profile_result; + VLOG(3) << "Trying algorithm " << AlgorithmToString(algorithm) + << " for ConvolutionThunk: " << this; bool launch_ok = Convolve(input_descriptor, input_data, filter_descriptor, filter_data, output_descriptor, output_data, convolution_descriptor, @@ -310,6 +320,11 @@ tensorflow::Status ConvolutionThunk::ConvolveWithTune( &scratch_allocator, &profile_result) .ok(); if (launch_ok && profile_result.is_valid()) { + VLOG(3) << "Run of algorithm " << AlgorithmToString(algorithm) + << " for ConvolutionThunk " << this << " succeeded, taking " + << profile_result.elapsed_time_in_ms() + << "ms. (Best result: " << best_result.elapsed_time_in_ms() + << "ms)"; if (profile_result.elapsed_time_in_ms() < best_result.elapsed_time_in_ms()) { best_result = profile_result; @@ -319,6 +334,9 @@ tensorflow::Status ConvolutionThunk::ConvolveWithTune( best_result_without_scratch.elapsed_time_in_ms()) { best_result_without_scratch = profile_result; } + } else { + VLOG(3) << "Run of algorithm " << AlgorithmToString(algorithm) + << " for ConvolutionThunk " << this << " failed."; } } @@ -343,8 +361,8 @@ tensorflow::Status ConvolutionThunk::ConvolveWithTune( { VLOG(2) << "Using convolution algorithm (" - << best_algorithm_.algorithm().algo_id() << ", " - << best_algorithm_.algorithm_no_scratch().algo_id() + << AlgorithmToString(best_algorithm_.algorithm()) << ", " + << AlgorithmToString(best_algorithm_.algorithm_no_scratch()) << ") for ConvolutionThunk: " << this; ConvolveScratchAllocator scratch_allocator( buffer_allocations.device_ordinal(), -- GitLab From 15aba57ec216da005411feacf53c08dff7c2652b Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 8 Nov 2017 14:10:50 -0800 Subject: [PATCH 0198/1801] Fix typo in tensorflow/python/client/session_clusterspec_prop_test.py PiperOrigin-RevId: 175061854 --- tensorflow/python/client/session_clusterspec_prop_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/client/session_clusterspec_prop_test.py b/tensorflow/python/client/session_clusterspec_prop_test.py index b77912b4f7..28a4dd27a7 100644 --- a/tensorflow/python/client/session_clusterspec_prop_test.py +++ b/tensorflow/python/client/session_clusterspec_prop_test.py @@ -169,7 +169,7 @@ class SessionClusterSpecPropagationTest(test_util.TensorFlowTestCase): # BaseRemoteRendezvous::SameWorkerRecvDone that means the test doesn't # actually capture the motivating bug unless run on a GPU machine. # - # Example error message (before bugfix -- linebreaks added because lint): + # Example error message (before bugfix -- line breaks added because lint): # # W0718 17:14:41.521534 190121 device_mgr.cc:107] Unknown device: # /job:worker/replica:0/task:0/device:CPU:0 all devices: -- GitLab From 20be26834218298e8fa8918b2e5d68b70015f809 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 8 Nov 2017 14:20:09 -0800 Subject: [PATCH 0199/1801] More idiomatic tests for defuns using variables. PiperOrigin-RevId: 175063558 --- tensorflow/python/eager/function_test.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 243efccac4..209715894e 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -62,13 +62,21 @@ class FunctionTest(test.TestCase): @function.defun def step(): def inner(): - tape.watch_variable(v) return v * v return backprop.implicit_grad(inner)()[0][0] self.assertAllEqual(step(), 2.0) + def testDefunDifferentiable(self): + v = resource_variable_ops.ResourceVariable(1.0) + + @function.defun + def f(): + return v * v + + self.assertAllEqual(backprop.implicit_grad(f)()[0][0], 2.0) + def testGraphModeCaptureVariable(self): with context.graph_mode(), self.test_session() as sess: -- GitLab From bf05a2eef97863fc78778bcde5987f93af8a7598 Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Wed, 8 Nov 2017 14:21:28 -0800 Subject: [PATCH 0200/1801] Run Estimator.export_savedmodel with the user's TFSession config. Estimator assumes a particular config_pb2.ConfigProto that configures the underlying session. The config is either the default one or a user-supplied one. The default config has allow_soft_placement=True, the option that allows silent placement of operations on devices with kernels when the requested device doesn't have a kernel for that operation. Estimator's train(), eval() and predict() calls run with the underlying session configured in accordance to the ConfigProto. However, export_savedmodel runs without such a configuration. This appears to be a problem when the ModeKeys.PREDICT graph has an op that was placed on GPU but doesn't have a GPU kernel. The graph works for predict(), but when export_savedmodel() is trying to restore the corresponding variable, the code fails with "no kernel for the op" error. I attempted to show that in a test. To fix this issue, I am passing the ConfigProto to the session inside export_savedmodel. An alternative conservative and ugly fix is to pass a new instance ConfigProto with only allow_soft_placement=Estimator._session_config.allow_soft_placement. Passing the whole ConfigProto feels like the right thing to do. Here's what else is in ConfigProto: https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/core/protobuf/config.proto#L280. I verified by running an internal pipeline. Here's allow_soft_placement logic: https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/core/common_runtime/placer.cc#L322. PiperOrigin-RevId: 175063803 --- .../estimator/replicate_model_fn_test.py | 5 +- tensorflow/python/estimator/estimator.py | 2 +- tensorflow/python/estimator/estimator_test.py | 66 +++++++++++++++++++ 3 files changed, 68 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py index 10b47fba5a..ce286c33b0 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py @@ -90,14 +90,11 @@ class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase): def optimizer_fn(): return optimizers.get_optimizer_instance('Adagrad', learning_rate=0.05) - # TODO(isaprykin): Switch Estimator to use allow_soft_placement=True - # during export_savedmodel and then switch this test to replicate over - # GPUs instead of CPUs. estimator = estimator_lib.Estimator( model_fn=replicate_model_fn.replicate_model_fn( estimator.model_fn, optimizer_fn, - devices=['/cpu:0', '/cpu:0', '/cpu:0']), + devices=['/gpu:0', '/gpu:1', '/gpu:2']), model_dir=estimator.model_dir, config=estimator.config, params=estimator.params) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index a730e107ba..2d036e2cfb 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -537,7 +537,7 @@ class Estimator(object): temp_export_dir = get_temp_export_dir(export_dir) # TODO(soergel): Consider whether MonitoredSession makes sense here - with tf_session.Session() as session: + with tf_session.Session(config=self._session_config) as session: saver_for_restore = estimator_spec.scaffold.saver or saver.Saver( sharded=True) diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py index 2b9b44523b..c1b773b8c4 100644 --- a/tensorflow/python/estimator/estimator_test.py +++ b/tensorflow/python/estimator/estimator_test.py @@ -50,6 +50,7 @@ from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import metrics as metrics_lib from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import state_ops +from tensorflow.python.ops import string_ops from tensorflow.python.ops import variables from tensorflow.python.ops.losses import losses from tensorflow.python.platform import gfile @@ -1910,6 +1911,71 @@ class EstimatorExportTest(test.TestCase): est.train(dummy_input_fn, steps=1) est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) + def test_export_savedmodel_respects_soft_placement(self): + def model_fn_with_a_gpu_op_but_no_kernel(features, labels, mode): + _, _ = features, labels + table = saver_test_utils.CheckpointedOp(name='v2') + + update_global_step = state_ops.assign_add(training.get_global_step(), 1) + with ops.control_dependencies([update_global_step]): + train_op = table.insert('k1', 30.0) + + # In this test, there are no GPUs available. The goal is to verify that + # export_savedmodel executes nevertheless. + with ops.device('/gpu:0'): + string_op = string_ops.as_string(update_global_step) + + with ops.control_dependencies([string_op]): + prediction = table.lookup('k1', 0.0) + + return model_fn_lib.EstimatorSpec( + mode, + predictions=prediction, + loss=constant_op.constant(1.), + train_op=train_op, + export_outputs={ + 'test': export_output.PredictOutput({ + 'prediction': prediction + }) + }) + + tmpdir = tempfile.mkdtemp() + est = estimator.Estimator( + model_fn=model_fn_with_a_gpu_op_but_no_kernel) + est.train(input_fn=dummy_input_fn, steps=1) + feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64), + 'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)} + serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( + feature_spec) + export_dir_base = os.path.join( + compat.as_bytes(tmpdir), compat.as_bytes('export')) + + export_dir = est.export_savedmodel( + export_dir_base, serving_input_receiver_fn) + + # At this point, if export_savedmodel executed with + # allow_soft_placement=True, then the GPU-assigned operation was silently + # placed on the CPU. Otherwise, an exception would have been raised + # related to the fact that the requested GPU device isn't available. + + # Expectations below assume that export_savedmodel has completed normally. + self.assertTrue(gfile.Exists(export_dir_base)) + self.assertTrue(gfile.Exists(export_dir)) + self.assertTrue(gfile.Exists(os.path.join( + compat.as_bytes(export_dir), + compat.as_bytes('saved_model.pb')))) + self.assertTrue(gfile.Exists(os.path.join( + compat.as_bytes(export_dir), + compat.as_bytes('variables')))) + self.assertTrue(gfile.Exists(os.path.join( + compat.as_bytes(export_dir), + compat.as_bytes('variables/variables.index')))) + self.assertTrue(gfile.Exists(os.path.join( + compat.as_bytes(export_dir), + compat.as_bytes('variables/variables.data-00000-of-00001')))) + + gfile.DeleteRecursively(tmpdir) + class EstimatorHookOrderingTest(test.TestCase): -- GitLab From 9e0e6ff16b0cb5764416297cf3797ff7521fd73a Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Wed, 8 Nov 2017 14:39:42 -0800 Subject: [PATCH 0201/1801] Fix tensorflow.org rendering of the example code for run_step_fn. Python code isn't indented correctly. PiperOrigin-RevId: 175067065 --- tensorflow/python/training/monitored_session.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py index af9f11bb07..1f6016a91b 100644 --- a/tensorflow/python/training/monitored_session.py +++ b/tensorflow/python/training/monitored_session.py @@ -536,6 +536,7 @@ class _MonitoredSession(object): will return True. Example usage: + ```python with tf.Graph().as_default(): c = tf.placeholder(dtypes.float32) @@ -552,6 +553,7 @@ class _MonitoredSession(object): while not session.should_stop(): a = session.run_step_fn(step_fn) ``` + Hooks interact with the `run_with_hooks()` call inside the `step_fn` as they do with a `MonitoredSession.run` call. -- GitLab From 3aaa1d3a0dcc52dd83aa9f1ad308e9da47556583 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Wed, 8 Nov 2017 14:57:30 -0800 Subject: [PATCH 0202/1801] Update tf.keras RNNs to the Keras 2.0.9 API. Does not include cuDNN layers. Additionally, fix a bug with handling of activity_regularizer in tf.layers base Layer (and add test). PiperOrigin-RevId: 175070161 --- tensorflow/python/keras/BUILD | 12 + .../keras/_impl/keras/engine/topology.py | 9 +- .../keras/_impl/keras/integration_test.py | 2 +- .../keras/_impl/keras/layers/gru_test.py | 12 +- .../keras/_impl/keras/layers/lstm_test.py | 11 +- .../keras/_impl/keras/layers/recurrent.py | 2449 +++++++++++++---- .../_impl/keras/layers/recurrent_test.py | 378 +++ .../_impl/keras/layers/simplernn_test.py | 12 +- tensorflow/python/keras/layers/__init__.py | 5 + tensorflow/python/layers/base.py | 2 +- tensorflow/python/layers/base_test.py | 7 + .../tensorflow.keras.layers.-g-r-u-cell.pbtxt | 179 ++ .../tensorflow.keras.layers.-g-r-u.pbtxt | 86 +- ...ensorflow.keras.layers.-l-s-t-m-cell.pbtxt | 179 ++ .../tensorflow.keras.layers.-l-s-t-m.pbtxt | 90 +- .../tensorflow.keras.layers.-r-n-n.pbtxt | 191 ++ ...flow.keras.layers.-simple-r-n-n-cell.pbtxt | 179 ++ ...ensorflow.keras.layers.-simple-r-n-n.pbtxt | 78 +- ...ow.keras.layers.-stacked-r-n-n-cells.pbtxt | 183 ++ .../api/golden/tensorflow.keras.layers.pbtxt | 20 + tensorflow/tools/ci_build/ci_sanity.sh | 3 +- 21 files changed, 3414 insertions(+), 673 deletions(-) create mode 100644 tensorflow/python/keras/_impl/keras/layers/recurrent_test.py create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index 4db48b45ed..6a762ee5d2 100644 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -498,6 +498,18 @@ py_test( ], ) +py_test( + name = "recurrent_test", + size = "small", + srcs = ["_impl/keras/layers/recurrent_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":keras", + "//tensorflow/python:client_testlib", + "//third_party/py/numpy", + ], +) + py_test( name = "serialization_test", size = "small", diff --git a/tensorflow/python/keras/_impl/keras/engine/topology.py b/tensorflow/python/keras/_impl/keras/engine/topology.py index f9be782f85..2bcbabf19c 100644 --- a/tensorflow/python/keras/_impl/keras/engine/topology.py +++ b/tensorflow/python/keras/_impl/keras/engine/topology.py @@ -29,6 +29,9 @@ from six.moves import zip # pylint: disable=redefined-builtin from tensorflow.python.eager import context from tensorflow.python.framework import tensor_shape from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras import constraints +from tensorflow.python.keras._impl.keras import initializers +from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.utils import conv_utils from tensorflow.python.keras._impl.keras.utils.io_utils import ask_to_proceed_with_overwrite from tensorflow.python.keras._impl.keras.utils.layer_utils import print_summary as print_layer_summary @@ -209,9 +212,9 @@ class Layer(tf_base_layers.Layer): dtype = K.floatx() weight = self.add_variable(name, shape, dtype=dtype, - initializer=initializer, - regularizer=regularizer, - constraint=constraint, + initializer=initializers.get(initializer), + regularizer=regularizers.get(regularizer), + constraint=constraints.get(constraint), trainable=trainable) return weight diff --git a/tensorflow/python/keras/_impl/keras/integration_test.py b/tensorflow/python/keras/_impl/keras/integration_test.py index 7110036848..871a8c7329 100644 --- a/tensorflow/python/keras/_impl/keras/integration_test.py +++ b/tensorflow/python/keras/_impl/keras/integration_test.py @@ -93,7 +93,7 @@ class KerasIntegrationTest(test.TestCase): y_test = keras.utils.to_categorical(y_test) model = keras.models.Sequential() - model.add(keras.layers.LSTM(3, return_sequences=True, + model.add(keras.layers.LSTM(5, return_sequences=True, input_shape=x_train.shape[1:])) model.add(keras.layers.GRU(y_train.shape[-1], activation='softmax')) model.compile(loss='categorical_crossentropy', diff --git a/tensorflow/python/keras/_impl/keras/layers/gru_test.py b/tensorflow/python/keras/_impl/keras/layers/gru_test.py index 03f0736161..c57fbac41c 100644 --- a/tensorflow/python/keras/_impl/keras/layers/gru_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/gru_test.py @@ -156,8 +156,10 @@ class GRULayerTest(test.TestCase): activity_regularizer='l1') layer.build((None, None, 2)) self.assertEqual(len(layer.losses), 3) - layer(keras.backend.variable(np.ones((2, 3, 2)))) - self.assertEqual(len(layer.losses), 4) + + x = keras.backend.variable(np.ones((2, 3, 2))) + layer(x) + self.assertEqual(len(layer.get_losses_for(x)), 1) def test_constraints_GRU(self): embedding_dim = 4 @@ -175,9 +177,9 @@ class GRULayerTest(test.TestCase): recurrent_constraint=r_constraint, bias_constraint=b_constraint) layer.build((None, None, embedding_dim)) - self.assertEqual(layer.kernel.constraint, k_constraint) - self.assertEqual(layer.recurrent_kernel.constraint, r_constraint) - self.assertEqual(layer.bias.constraint, b_constraint) + self.assertEqual(layer.cell.kernel.constraint, k_constraint) + self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint) + self.assertEqual(layer.cell.bias.constraint, b_constraint) def test_with_masking_layer_GRU(self): layer_class = keras.layers.GRU diff --git a/tensorflow/python/keras/_impl/keras/layers/lstm_test.py b/tensorflow/python/keras/_impl/keras/layers/lstm_test.py index f43d90fec8..8d359bf17c 100644 --- a/tensorflow/python/keras/_impl/keras/layers/lstm_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/lstm_test.py @@ -156,8 +156,9 @@ class LSTMLayerTest(test.TestCase): activity_regularizer='l1') layer.build((None, None, 2)) self.assertEqual(len(layer.losses), 3) - layer(keras.backend.variable(np.ones((2, 3, 2)))) - self.assertEqual(len(layer.losses), 4) + x = keras.backend.variable(np.ones((2, 3, 2))) + layer(x) + self.assertEqual(len(layer.get_losses_for(x)), 1) def test_constraints_LSTM(self): embedding_dim = 4 @@ -175,9 +176,9 @@ class LSTMLayerTest(test.TestCase): recurrent_constraint=r_constraint, bias_constraint=b_constraint) layer.build((None, None, embedding_dim)) - self.assertEqual(layer.kernel.constraint, k_constraint) - self.assertEqual(layer.recurrent_kernel.constraint, r_constraint) - self.assertEqual(layer.bias.constraint, b_constraint) + self.assertEqual(layer.cell.kernel.constraint, k_constraint) + self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint) + self.assertEqual(layer.cell.bias.constraint, b_constraint) def test_with_masking_layer_LSTM(self): layer_class = keras.layers.LSTM diff --git a/tensorflow/python/keras/_impl/keras/layers/recurrent.py b/tensorflow/python/keras/_impl/keras/layers/recurrent.py index 139523403c..2bc74d5f80 100644 --- a/tensorflow/python/keras/_impl/keras/layers/recurrent.py +++ b/tensorflow/python/keras/_impl/keras/layers/recurrent.py @@ -1,4 +1,4 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -29,99 +29,209 @@ from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer +from tensorflow.python.keras._impl.keras.utils.generic_utils import has_arg +from tensorflow.python.platform import tf_logging as logging -# pylint: disable=access-member-before-definition +class StackedRNNCells(Layer): + """Wrapper allowing a stack of RNN cells to behave as a single cell. - -def _time_distributed_dense(x, - w, - b=None, - dropout=None, - input_dim=None, - output_dim=None, - timesteps=None, - training=None): - """Apply `y . w + b` for every temporal slice y of x. + Used to implement efficient stacked RNNs. Arguments: - x: input tensor. - w: weight matrix. - b: optional bias vector. - dropout: whether to apply dropout (same dropout mask - for every temporal slice of the input). - input_dim: integer; optional dimensionality of the input. - output_dim: integer; optional dimensionality of the output. - timesteps: integer; optional number of timesteps. - training: training phase tensor or boolean. - - Returns: - Output tensor. - """ - if not input_dim: - input_dim = K.shape(x)[2] - if not timesteps: - timesteps = K.shape(x)[1] - if not output_dim: - output_dim = K.shape(w)[1] - - if dropout is not None and 0. < dropout < 1.: - # apply the same dropout pattern at every timestep - ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) - dropout_matrix = K.dropout(ones, dropout) - expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) - x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) - - # collapse time dimension and batch dimension together - x = K.reshape(x, (-1, input_dim)) - x = K.dot(x, w) - if b is not None: - x = K.bias_add(x, b) - # reshape to 3D tensor - if K.backend() == 'tensorflow': - x = K.reshape(x, K.stack([-1, timesteps, output_dim])) - x.set_shape([None, None, output_dim]) - else: - x = K.reshape(x, (-1, timesteps, output_dim)) - return x + cells: List of RNN cell instances. + Examples: -class Recurrent(Layer): - """Abstract base class for recurrent layers. + ```python + cells = [ + keras.layers.LSTMCell(output_dim), + keras.layers.LSTMCell(output_dim), + keras.layers.LSTMCell(output_dim), + ] - Do not use in a model -- it's not a valid layer! - Use its children classes `LSTM`, `GRU` and `SimpleRNN` instead. + inputs = keras.Input((timesteps, input_dim)) + x = keras.layers.RNN(cells)(inputs) + ``` + """ - All recurrent layers (`LSTM`, `GRU`, `SimpleRNN`) also - follow the specifications of this class and accept - the keyword arguments listed below. + def __init__(self, cells, **kwargs): + for cell in cells: + if not hasattr(cell, 'call'): + raise ValueError('All cells must have a `call` method. ' + 'received cells:', cells) + if not hasattr(cell, 'state_size'): + raise ValueError('All cells must have a ' + '`state_size` attribute. ' + 'received cells:', cells) + self.cells = cells + super(StackedRNNCells, self).__init__(**kwargs) + + @property + def state_size(self): + # States are a flat list + # in reverse order of the cell stack. + # This allows to preserve the requirement + # `stack.state_size[0] == output_dim`. + # e.g. states of a 2-layer LSTM would be + # `[h2, c2, h1, c1]` + # (assuming one LSTM has states [h, c]) + state_size = [] + for cell in self.cells[::-1]: + if hasattr(cell.state_size, '__len__'): + state_size += list(cell.state_size) + else: + state_size.append(cell.state_size) + return tuple(state_size) + + def call(self, inputs, states, **kwargs): + # Recover per-cell states. + nested_states = [] + for cell in self.cells[::-1]: + if hasattr(cell.state_size, '__len__'): + nested_states.append(states[:len(cell.state_size)]) + states = states[len(cell.state_size):] + else: + nested_states.append([states[0]]) + states = states[1:] + nested_states = nested_states[::-1] + + # Call the cells in order and store the returned states. + new_nested_states = [] + for cell, states in zip(self.cells, nested_states): + inputs, states = cell.call(inputs, states, **kwargs) + new_nested_states.append(states) + + # Format the new states as a flat list + # in reverse cell order. + states = [] + for cell_states in new_nested_states[::-1]: + states += cell_states + return inputs, states - Example: + def build(self, input_shape): + for cell in self.cells: + if isinstance(cell, Layer): + cell.build(input_shape) + if hasattr(cell.state_size, '__len__'): + output_dim = cell.state_size[0] + else: + output_dim = cell.state_size + input_shape = (input_shape[0], input_shape[1], output_dim) + self.built = True - ```python - # as the first layer in a Sequential model - model = Sequential() - model.add(LSTM(32, input_shape=(10, 64))) - # now model.output_shape == (None, 32) - # note: `None` is the batch dimension. - - # for subsequent layers, no need to specify the input size: - model.add(LSTM(16)) - - # to stack recurrent layers, you must use return_sequences=True - # on any recurrent layer that feeds into another recurrent layer. - # note that you only need to specify the input size on the first layer. - model = Sequential() - model.add(LSTM(64, input_dim=64, input_length=10, return_sequences=True)) - model.add(LSTM(32, return_sequences=True)) - model.add(LSTM(10)) - ``` + def get_config(self): + cells = [] + for cell in self.cells: + cells.append({ + 'class_name': cell.__class__.__name__, + 'config': cell.get_config() + }) + config = {'cells': cells} + base_config = super(StackedRNNCells, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config, custom_objects=None): + from tensorflow.python.keras._impl.keras.layers import deserialize as deserialize_layer # pylint: disable=g-import-not-at-top + cells = [] + for cell_config in config.pop('cells'): + cells.append( + deserialize_layer(cell_config, custom_objects=custom_objects)) + return cls(cells, **config) + + @property + def trainable_weights(self): + if not self.trainable: + return [] + weights = [] + for cell in self.cells: + if isinstance(cell, Layer): + weights += cell.trainable_weights + return weights + + @property + def non_trainable_weights(self): + weights = [] + for cell in self.cells: + if isinstance(cell, Layer): + weights += cell.non_trainable_weights + if not self.trainable: + trainable_weights = [] + for cell in self.cells: + if isinstance(cell, Layer): + trainable_weights += cell.trainable_weights + return trainable_weights + weights + return weights + + def get_weights(self): + """Retrieves the weights of the model. + + Returns: + A flat list of Numpy arrays. + """ + weights = [] + for cell in self.cells: + if isinstance(cell, Layer): + weights += cell.weights + return K.batch_get_value(weights) + + def set_weights(self, weights): + """Sets the weights of the model. + + Arguments: + weights: A list of Numpy arrays with shapes and types matching + the output of `model.get_weights()`. + """ + tuples = [] + for cell in self.cells: + if isinstance(cell, Layer): + num_param = len(cell.weights) + weights = weights[:num_param] + for sw, w in zip(cell.weights, weights): + tuples.append((sw, w)) + weights = weights[num_param:] + K.batch_set_value(tuples) + + @property + def losses(self): + losses = [] + for cell in self.cells: + if isinstance(cell, Layer): + cell_losses = cell.losses + losses += cell_losses + return losses + + def get_losses_for(self, inputs=None): + losses = [] + for cell in self.cells: + if isinstance(cell, Layer): + cell_losses = cell.get_losses_for(inputs) + losses += cell_losses + return losses + + +class RNN(Layer): + """Base class for recurrent layers. Arguments: - weights: list of Numpy arrays to set as initial weights. - The list should have 3 elements, of shapes: - `[(input_dim, output_dim), (output_dim, output_dim), (output_dim,)]`. - return_sequences: Boolean. Whether to return the last output + cell: A RNN cell instance. A RNN cell is a class that has: + - a `call(input_at_t, states_at_t)` method, returning + `(output_at_t, states_at_t_plus_1)`. The call method of the + cell can also take the optional argument `constants`, see + section "Note on passing external constants" below. + - a `state_size` attribute. This can be a single integer + (single state) in which case it is + the size of the recurrent state + (which should be the same as the size of the cell output). + This can also be a list/tuple of integers + (one size per state). In this case, the first entry + (`state_size[0]`) should be the same as + the size of the cell output. + It is also possible for `cell` to be a list of RNN cell instances, + in which cases the cells get stacked on after the other in the RNN, + implementing an efficient stacked RNN. + return_sequences: Boolean. Whether to return the last output. in the output sequence, or the full sequence. return_state: Boolean. Whether to return the last state in addition to the output. @@ -137,21 +247,9 @@ class Recurrent(Layer): Unrolling can speed-up a RNN, although it tends to be more memory-intensive. Unrolling is only suitable for short sequences. - implementation: one of {0, 1, or 2}. - If set to 0, the RNN will use - an implementation that uses fewer, larger matrix products, - thus running faster on CPU but consuming more memory. - If set to 1, the RNN will use more matrix products, - but smaller ones, thus running slower - (may actually be faster on GPU) while consuming less memory. - If set to 2 (LSTM/GRU only), - the RNN will combine the input gate, - the forget gate and the output gate into a single matrix, - enabling more time-efficient parallelization on the GPU. - Note: RNN dropout must be shared for all gates, - resulting in a slightly reduced regularization. input_dim: dimensionality of the input (integer). - This argument (or alternatively, the keyword argument `input_shape`) + This argument (or alternatively, + the keyword argument `input_shape`) is required when using this layer as the first layer in a model. input_length: Length of input sequences, to be specified when it is constant. @@ -163,7 +261,7 @@ class Recurrent(Layer): at the level of the first layer (e.g. via the `input_shape` argument) - Input shape:s + Input shape: 3D tensor with shape `(batch_size, timesteps, input_dim)`, (Optional) 2D tensors with shape `(batch_size, output_dim)`. @@ -178,7 +276,7 @@ class Recurrent(Layer): # Masking This layer supports masking for input data with a variable number of timesteps. To introduce masks to your data, - use an `Embedding` layer with the `mask_zero` parameter + use an [Embedding](embeddings.md) layer with the `mask_zero` parameter set to `True`. # Note on using statefulness in RNNs @@ -212,42 +310,128 @@ class Recurrent(Layer): calling `reset_states` with the keyword argument `states`. The value of `states` should be a numpy array or list of numpy arrays representing the initial state of the RNN layer. + + # Note on passing external constants to RNNs + You can pass "external" constants to the cell using the `constants` + keyword argument of `RNN.__call__` (as well as `RNN.call`) method. This + requires that the `cell.call` method accepts the same keyword argument + `constants`. Such constants can be used to condition the cell + transformation on additional static inputs (not changing over time), + a.k.a. an attention mechanism. + + Examples: + + ```python + # First, let's define a RNN Cell, as a layer subclass. + + class MinimalRNNCell(keras.layers.Layer): + + def __init__(self, units, **kwargs): + self.units = units + self.state_size = units + super(MinimalRNNCell, self).__init__(**kwargs) + + def build(self, input_shape): + self.kernel = self.add_weight(shape=(input_shape[-1], self.units), + initializer='uniform', + name='kernel') + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + initializer='uniform', + name='recurrent_kernel') + self.built = True + + def call(self, inputs, states): + prev_output = states[0] + h = K.dot(inputs, self.kernel) + output = h + K.dot(prev_output, self.recurrent_kernel) + return output, [output] + + # Let's use this cell in a RNN layer: + + cell = MinimalRNNCell(32) + x = keras.Input((None, 5)) + layer = RNN(cell) + y = layer(x) + + # Here's how to use the cell to build a stacked RNN: + + cells = [MinimalRNNCell(32), MinimalRNNCell(64)] + x = keras.Input((None, 5)) + layer = RNN(cells) + y = layer(x) + ``` """ def __init__(self, + cell, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False, - implementation=0, + activity_regularizer=None, **kwargs): - super(Recurrent, self).__init__(**kwargs) + if isinstance(cell, (list, tuple)): + cell = StackedRNNCells(cell) + if not hasattr(cell, 'call'): + raise ValueError('`cell` should have a `call` method. ' + 'The RNN was passed:', cell) + if not hasattr(cell, 'state_size'): + raise ValueError('The RNN cell should have ' + 'an attribute `state_size` ' + '(tuple of integers, ' + 'one integer per RNN state).') + super(RNN, self).__init__( + activity_regularizer=regularizers.get(activity_regularizer), **kwargs) + self.cell = cell self.return_sequences = return_sequences self.return_state = return_state self.go_backwards = go_backwards self.stateful = stateful self.unroll = unroll - self.implementation = implementation + self.supports_masking = True self.input_spec = [InputSpec(ndim=3)] self.state_spec = None - self.dropout = 0 - self.recurrent_dropout = 0 + self._states = None + self.constants_spec = None + self._num_constants = None + + @property + def states(self): + if self._states is None: + if isinstance(self.cell.state_size, int): + num_states = 1 + else: + num_states = len(self.cell.state_size) + return [None for _ in range(num_states)] + return self._states + + @states.setter + def states(self, states): + self._states = states def _compute_output_shape(self, input_shape): if isinstance(input_shape, list): input_shape = input_shape[0] input_shape = tensor_shape.TensorShape(input_shape).as_list() + + if hasattr(self.cell.state_size, '__len__'): + output_dim = self.cell.state_size[0] + else: + output_dim = self.cell.state_size + if self.return_sequences: - output_shape = (input_shape[0], input_shape[1], self.units) + output_shape = (input_shape[0], input_shape[1], output_dim) else: - output_shape = (input_shape[0], self.units) + output_shape = (input_shape[0], output_dim) if self.return_state: - state_shape = [tensor_shape.TensorShape( - (input_shape[0], self.units)) for _ in self.states] - return [tensor_shape.TensorShape(output_shape)] + state_shape + state_shape = [(input_shape[0], output_dim) for _ in self.states] + output_shape = [output_shape] + state_shape + else: + output_shape = output_shape return tensor_shape.TensorShape(output_shape) def compute_mask(self, inputs, mask): @@ -257,82 +441,123 @@ class Recurrent(Layer): if self.return_state: state_mask = [None for _ in self.states] return [output_mask] + state_mask - return output_mask + else: + return output_mask - def step(self, inputs, states): - raise NotImplementedError + def build(self, input_shape): + # Note input_shape will be list of shapes of initial states and + # constants if these are passed in __call__. + if self._num_constants is not None: + constants_shape = input_shape[-self._num_constants:] # pylint: disable=invalid-unary-operand-type + else: + constants_shape = None - def get_constants(self, inputs, training=None): - return [] + if isinstance(input_shape, list): + input_shape = input_shape[0] + input_shape = tuple(tensor_shape.TensorShape(input_shape).as_list()) + + batch_size = input_shape[0] if self.stateful else None + input_dim = input_shape[-1] + self.input_spec[0] = InputSpec(shape=(batch_size, None, input_dim)) + + # allow cell (if layer) to build before we set or validate state_spec + if isinstance(self.cell, Layer): + step_input_shape = (input_shape[0],) + input_shape[2:] + if constants_shape is not None: + self.cell.build([step_input_shape] + constants_shape) + else: + self.cell.build(step_input_shape) + + # set or validate state_spec + if hasattr(self.cell.state_size, '__len__'): + state_size = list(self.cell.state_size) + else: + state_size = [self.cell.state_size] + + if self.state_spec is not None: + # initial_state was passed in call, check compatibility + if [spec.shape[-1] for spec in self.state_spec] != state_size: + raise ValueError( + 'An initial_state was passed that is not compatible with ' + '`cell.state_size`. Received `state_spec`={}; ' + 'However `cell.state_size` is ' + '{}'.format(self.state_spec, self.cell.state_size)) + else: + self.state_spec = [InputSpec(shape=(None, dim)) for dim in state_size] + if self.stateful: + self.reset_states() def get_initial_state(self, inputs): # build an all-zero tensor of shape (samples, output_dim) initial_state = K.zeros_like(inputs) # (samples, timesteps, input_dim) initial_state = K.sum(initial_state, axis=(1, 2)) # (samples,) initial_state = K.expand_dims(initial_state) # (samples, 1) - initial_state = K.tile(initial_state, [1, - self.units]) # (samples, output_dim) - initial_state = [initial_state for _ in range(len(self.states))] - return initial_state - - def preprocess_input(self, inputs, training=None): - return inputs + if hasattr(self.cell.state_size, '__len__'): + return [K.tile(initial_state, [1, dim]) for dim in self.cell.state_size] + else: + return [K.tile(initial_state, [1, self.cell.state_size])] - def __call__(self, inputs, initial_state=None, **kwargs): - if (isinstance(inputs, (list, tuple)) and - len(inputs) > 1 - and initial_state is None): - initial_state = inputs[1:] - inputs = inputs[0] + def __call__(self, inputs, initial_state=None, constants=None, **kwargs): + inputs, initial_state, constants = self._standardize_args( + inputs, initial_state, constants) - # If `initial_state` is specified, - # and if it a Keras tensor, - # then add it to the inputs and temporarily - # modify the input spec to include the state. - if initial_state is None: - return super(Recurrent, self).__call__(inputs, **kwargs) + if initial_state is None and constants is None: + return super(RNN, self).__call__(inputs, **kwargs) - if not isinstance(initial_state, (list, tuple)): - initial_state = [initial_state] + # If any of `initial_state` or `constants` are specified and are Keras + # tensors, then add them to the inputs and temporarily modify the + # input_spec to include them. - is_keras_tensor = hasattr(initial_state[0], '_keras_history') - for tensor in initial_state: + additional_inputs = [] + additional_specs = [] + if initial_state is not None: + kwargs['initial_state'] = initial_state + additional_inputs += initial_state + self.state_spec = [ + InputSpec(shape=K.int_shape(state)) for state in initial_state + ] + additional_specs += self.state_spec + if constants is not None: + kwargs['constants'] = constants + additional_inputs += constants + self.constants_spec = [ + InputSpec(shape=K.int_shape(constant)) for constant in constants + ] + self._num_constants = len(constants) + additional_specs += self.constants_spec + # at this point additional_inputs cannot be empty + is_keras_tensor = hasattr(additional_inputs[0], '_keras_history') + for tensor in additional_inputs: if hasattr(tensor, '_keras_history') != is_keras_tensor: - raise ValueError('The initial state of an RNN layer cannot be' - ' specified with a mix of Keras tensors and' - ' non-Keras tensors') + raise ValueError('The initial state or constants of an RNN' + ' layer cannot be specified with a mix of' + ' Keras tensors and non-Keras tensors') if is_keras_tensor: - # Compute the full input spec, including state - input_spec = self.input_spec - state_spec = self.state_spec - if not isinstance(input_spec, list): - input_spec = [input_spec] - if not isinstance(state_spec, list): - state_spec = [state_spec] - self.input_spec = input_spec + state_spec - - # Compute the full inputs, including state - inputs = [inputs] + list(initial_state) - - # Perform the call - output = super(Recurrent, self).__call__(inputs, **kwargs) - - # Restore original input spec - self.input_spec = input_spec + # Compute the full input spec, including state and constants + full_input = [inputs] + additional_inputs + full_input_spec = self.input_spec + additional_specs + # Perform the call with temporarily replaced input_spec + original_input_spec = self.input_spec + self.input_spec = full_input_spec + output = super(RNN, self).__call__(full_input, **kwargs) + self.input_spec = original_input_spec return output else: - kwargs['initial_state'] = initial_state - return super(Recurrent, self).__call__(inputs, **kwargs) - - def call(self, inputs, mask=None, training=None, initial_state=None): + return super(RNN, self).__call__(inputs, **kwargs) + + def call(self, + inputs, + mask=None, + training=None, + initial_state=None, + constants=None): # input shape: `(samples, time (padded with zeros), input_dim)` # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if isinstance(inputs, list): - initial_state = inputs[1:] inputs = inputs[0] - elif initial_state is not None: + if initial_state is not None: pass elif self.stateful: initial_state = self.states @@ -343,13 +568,14 @@ class Recurrent(Layer): mask = mask[0] if len(initial_state) != len(self.states): - raise ValueError('Layer has ' + str(len(self.states)) + - ' states but was passed ' + str(len(initial_state)) + - ' initial states.') + raise ValueError( + 'Layer has ' + str(len(self.states)) + ' states but was passed ' + + str(len(initial_state)) + ' initial states.') input_shape = K.int_shape(inputs) - if self.unroll and input_shape[1] is None: + timesteps = input_shape[1] + if self.unroll and timesteps in [None, 1]: raise ValueError('Cannot unroll a RNN if the ' - 'time dimension is undefined. \n' + 'time dimension is undefined or equal to 1. \n' '- If using a Sequential model, ' 'specify the time dimension by passing ' 'an `input_shape` or `batch_input_shape` ' @@ -359,15 +585,31 @@ class Recurrent(Layer): '- If using the functional API, specify ' 'the time dimension by passing a `shape` ' 'or `batch_shape` argument to your Input layer.') - constants = self.get_constants(inputs, training=None) - preprocessed_input = self.preprocess_input(inputs, training=None) + + kwargs = {} + if has_arg(self.cell.call, 'training'): + kwargs['training'] = training + + if constants: + if not has_arg(self.cell.call, 'constants'): + raise ValueError('RNN cell does not support constants') + + def step(inputs, states): + constants = states[-self._num_constants:] # pylint: disable=invalid-unary-operand-type + states = states[:-self._num_constants] # pylint: disable=invalid-unary-operand-type + return self.cell.call(inputs, states, constants=constants, **kwargs) + else: + + def step(inputs, states): + return self.cell.call(inputs, states, **kwargs) + last_output, outputs, states = K.rnn( - self.step, - preprocessed_input, + step, + inputs, initial_state, + constants=constants, go_backwards=self.go_backwards, mask=mask, - constants=constants, unroll=self.unroll) if self.stateful: updates = [] @@ -375,21 +617,63 @@ class Recurrent(Layer): updates.append((self.states[i], states[i])) self.add_update(updates, inputs) - # Properly set learning phase - if 0 < self.dropout + self.recurrent_dropout: - last_output._uses_learning_phase = True - outputs._uses_learning_phase = True + if self.return_sequences: + output = outputs + else: + output = last_output - if not self.return_sequences: - outputs = last_output + # Properly set learning phase + if getattr(last_output, '_uses_learning_phase', False): + output._uses_learning_phase = True if self.return_state: if not isinstance(states, (list, tuple)): states = [states] else: states = list(states) - return [outputs] + states - return outputs + return [output] + states + else: + return output + + def _standardize_args(self, inputs, initial_state, constants): + """Standardize `__call__` arguments to a single list of tensor inputs. + + When running a model loaded from file, the input tensors + `initial_state` and `constants` can be passed to `RNN.__call__` as part + of `inputs` instead of by the dedicated keyword arguments. This method + makes sure the arguments are separated and that `initial_state` and + `constants` are lists of tensors (or None). + + Arguments: + inputs: tensor or list/tuple of tensors + initial_state: tensor or list of tensors or None + constants: tensor or list of tensors or None + + Returns: + inputs: tensor + initial_state: list of tensors or None + constants: list of tensors or None + """ + if isinstance(inputs, list): + assert initial_state is None and constants is None + if self._num_constants is not None: + constants = inputs[-self._num_constants:] # pylint: disable=invalid-unary-operand-type + inputs = inputs[:-self._num_constants] # pylint: disable=invalid-unary-operand-type + if len(inputs) > 1: + initial_state = inputs[1:] + inputs = inputs[0] + + def to_list_or_none(x): + if x is None or isinstance(x, list): + return x + if isinstance(x, tuple): + return list(x) + return [x] + + initial_state = to_list_or_none(initial_state) + constants = to_list_or_none(constants) + + return inputs, initial_state, constants def reset_states(self, states=None): if not self.stateful: @@ -408,10 +692,19 @@ class Recurrent(Layer): '`batch_shape` argument to your Input layer.') # initialize state if None if self.states[0] is None: - self.states = [K.zeros((batch_size, self.units)) for _ in self.states] + if hasattr(self.cell.state_size, '__len__'): + self.states = [ + K.zeros((batch_size, dim)) for dim in self.cell.state_size + ] + else: + self.states = [K.zeros((batch_size, self.cell.state_size))] elif states is None: - for state in self.states: - K.set_value(state, np.zeros((batch_size, self.units))) + if hasattr(self.cell.state_size, '__len__'): + for state, dim in zip(self.states, self.cell.state_size): + K.set_value(state, np.zeros((batch_size, dim))) + else: + K.set_value(self.states[0], np.zeros((batch_size, + self.cell.state_size))) else: if not isinstance(states, (list, tuple)): states = [states] @@ -421,11 +714,16 @@ class Recurrent(Layer): 'but it received ' + str(len(states)) + ' state values. Input received: ' + str(states)) for index, (value, state) in enumerate(zip(states, self.states)): - if value.shape != (batch_size, self.units): - raise ValueError('State ' + str(index) + - ' is incompatible with layer ' + self.name + - ': expected shape=' + str((batch_size, self.units)) + - ', found shape=' + str(value.shape)) + if hasattr(self.cell.state_size, '__len__'): + dim = self.cell.state_size[index] + else: + dim = self.cell.state_size + if value.shape != (batch_size, dim): + raise ValueError( + 'State ' + str(index) + ' is incompatible with layer ' + + self.name + ': expected shape=' + str( + (batch_size, dim)) + ', found shape=' + str(value.shape)) + # TODO(fchollet): consider batch calls to `set_value`. K.set_value(state, value) def get_config(self): @@ -434,51 +732,94 @@ class Recurrent(Layer): 'return_state': self.return_state, 'go_backwards': self.go_backwards, 'stateful': self.stateful, - 'unroll': self.unroll, - 'implementation': self.implementation + 'unroll': self.unroll } - base_config = super(Recurrent, self).get_config() + if self._num_constants is not None: + config['num_constants'] = self._num_constants + + cell_config = self.cell.get_config() + config['cell'] = { + 'class_name': self.cell.__class__.__name__, + 'config': cell_config + } + base_config = super(RNN, self).get_config() return dict(list(base_config.items()) + list(config.items())) + @classmethod + def from_config(cls, config, custom_objects=None): + from tensorflow.python.keras._impl.keras.layers import deserialize as deserialize_layer # pylint: disable=g-import-not-at-top + cell = deserialize_layer(config.pop('cell'), custom_objects=custom_objects) + num_constants = config.pop('num_constants', None) + layer = cls(cell, **config) + layer._num_constants = num_constants + return layer + + @property + def trainable_weights(self): + if isinstance(self.cell, Layer): + return self.cell.trainable_weights + return [] + + @property + def non_trainable_weights(self): + if isinstance(self.cell, Layer): + return self.cell.non_trainable_weights + return [] -class SimpleRNN(Recurrent): - """Fully-connected RNN where the output is to be fed back to input. + @property + def losses(self): + if isinstance(self.cell, Layer): + return self.cell.losses + return [] + + def get_losses_for(self, inputs=None): + if isinstance(self.cell, Layer): + cell_losses = self.cell.get_losses_for(inputs) + return cell_losses + super(RNN, self).get_losses_for(inputs) + return super(RNN, self).get_losses_for(inputs) + + +class SimpleRNNCell(Layer): + """Cell class for SimpleRNN. Arguments: units: Positive integer, dimensionality of the output space. - activation: Activation function to use. - If you don't specify anything, no activation is applied + activation: Activation function to use + (see [activations](../activations.md)). If you pass None, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs.. + used for the linear transformation of the inputs. + (see [initializers](../initializers.md)). recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, - used for the linear transformation of the recurrent state.. - bias_initializer: Initializer for the bias vector. + used for the linear transformation of the recurrent state. + (see [initializers](../initializers.md)). + bias_initializer: Initializer for the bias vector + (see [initializers](../initializers.md)). kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix. + the `kernel` weights matrix + (see [regularizer](../regularizers.md)). recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix. - bias_regularizer: Regularizer function applied to the bias vector. - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation").. + the `recurrent_kernel` weights matrix + (see [regularizer](../regularizers.md)). + bias_regularizer: Regularizer function applied to the bias vector + (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to - the `kernel` weights matrix. + the `kernel` weights matrix + (see [constraints](../constraints.md)). recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix. - bias_constraint: Constraint function applied to the bias vector. + the `recurrent_kernel` weights matrix + (see [constraints](../constraints.md)). + bias_constraint: Constraint function applied to the bias vector + (see [constraints](../constraints.md)). dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. - - References: - - [A Theoretically Grounded Application of Dropout in Recurrent Neural - Networks](http://arxiv.org/abs/1512.05287) """ def __init__(self, @@ -491,15 +832,13 @@ class SimpleRNN(Recurrent): kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, - activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., **kwargs): - super(SimpleRNN, self).__init__( - activity_regularizer=regularizers.get(activity_regularizer), **kwargs) + super(SimpleRNNCell, self).__init__(**kwargs) self.units = units self.activation = activations.get(activation) self.use_bias = use_bias @@ -518,23 +857,13 @@ class SimpleRNN(Recurrent): self.dropout = min(1., max(0., dropout)) self.recurrent_dropout = min(1., max(0., recurrent_dropout)) - self.state_spec = InputSpec(shape=(None, self.units)) + self.state_size = self.units + self._dropout_mask = None + self._recurrent_dropout_mask = None def build(self, input_shape): - if isinstance(input_shape, list): - input_shape = input_shape[0] - input_shape = tensor_shape.TensorShape(input_shape).as_list() - - batch_size = input_shape[0] if self.stateful else None - self.input_dim = input_shape[2] - self.input_spec[0] = InputSpec(shape=(batch_size, None, self.input_dim)) - - self.states = [None] - if self.stateful: - self.reset_states() - self.kernel = self.add_weight( - shape=(self.input_dim, self.units), + shape=(input_shape[-1], self.units), name='kernel', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, @@ -556,152 +885,121 @@ class SimpleRNN(Recurrent): self.bias = None self.built = True - def preprocess_input(self, inputs, training=None): - if self.implementation > 0: - return inputs - else: - input_shape = inputs.get_shape().as_list() - input_dim = input_shape[2] - timesteps = input_shape[1] - return _time_distributed_dense( - inputs, - self.kernel, - self.bias, - self.dropout, - input_dim, - self.units, - timesteps, - training=training) + def _generate_dropout_mask(self, inputs, training=None): + if 0 < self.dropout < 1: + ones = K.ones_like(K.squeeze(inputs[:, 0:1, :], axis=1)) - def step(self, inputs, states): - if self.implementation == 0: - h = inputs + def dropped_inputs(): + return K.dropout(ones, self.dropout) + + self._dropout_mask = K.in_train_phase( + dropped_inputs, ones, training=training) else: - if 0 < self.dropout < 1: - h = K.dot(inputs * states[1], self.kernel) - else: - h = K.dot(inputs, self.kernel) - if self.bias is not None: - h = K.bias_add(h, self.bias) + self._dropout_mask = None - prev_output = states[0] + def _generate_recurrent_dropout_mask(self, inputs, training=None): if 0 < self.recurrent_dropout < 1: - prev_output *= states[2] - output = h + K.dot(prev_output, self.recurrent_kernel) - if self.activation is not None: - output = self.activation(output) - - # Properly set learning phase on output tensor. - if 0 < self.dropout + self.recurrent_dropout: - output._uses_learning_phase = True - return output, [output] - - def get_constants(self, inputs, training=None): - constants = [] - if self.implementation != 0 and 0 < self.dropout < 1: - input_shape = K.int_shape(inputs) - input_dim = input_shape[-1] ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) - ones = K.tile(ones, (1, int(input_dim))) + ones = K.tile(ones, (1, self.units)) def dropped_inputs(): return K.dropout(ones, self.dropout) - dp_mask = K.in_train_phase(dropped_inputs, ones, training=training) - constants.append(dp_mask) + self._recurrent_dropout_mask = K.in_train_phase( + dropped_inputs, ones, training=training) else: - constants.append(K.cast_to_floatx(1.)) - - if 0 < self.recurrent_dropout < 1: - ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) - ones = K.tile(ones, (1, self.units)) + self._recurrent_dropout_mask = None - def dropped_inputs(): # pylint: disable=function-redefined - return K.dropout(ones, self.recurrent_dropout) + def call(self, inputs, states, training=None): + prev_output = states[0] + dp_mask = self._dropout_mask + rec_dp_mask = self._recurrent_dropout_mask - rec_dp_mask = K.in_train_phase(dropped_inputs, ones, training=training) - constants.append(rec_dp_mask) + if dp_mask is not None: + h = K.dot(inputs * dp_mask, self.kernel) else: - constants.append(K.cast_to_floatx(1.)) - return constants + h = K.dot(inputs, self.kernel) + if self.bias is not None: + h = K.bias_add(h, self.bias) - def get_config(self): - config = { - 'units': self.units, - 'activation': activations.serialize(self.activation), - 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'dropout': self.dropout, - 'recurrent_dropout': self.recurrent_dropout - } - base_config = super(SimpleRNN, self).get_config() - return dict(list(base_config.items()) + list(config.items())) + if rec_dp_mask is not None: + prev_output *= rec_dp_mask + output = h + K.dot(prev_output, self.recurrent_kernel) + if self.activation is not None: + output = self.activation(output) + # Properly set learning phase on output tensor. + if 0 < self.dropout + self.recurrent_dropout: + if training is None: + output._uses_learning_phase = True + return output, [output] -class GRU(Recurrent): - """Gated Recurrent Unit - Cho et al. - 2014. +class SimpleRNN(RNN): + """Fully-connected RNN where the output is to be fed back to input. Arguments: units: Positive integer, dimensionality of the output space. - activation: Activation function to use. + activation: Activation function to use + (see [activations](../activations.md)). If you pass None, no activation is applied (ie. "linear" activation: `a(x) = x`). - recurrent_activation: Activation function to use - for the recurrent step. use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs.. + used for the linear transformation of the inputs. + (see [initializers](../initializers.md)). recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, - used for the linear transformation of the recurrent state.. - bias_initializer: Initializer for the bias vector. + used for the linear transformation of the recurrent state. + (see [initializers](../initializers.md)). + bias_initializer: Initializer for the bias vector + (see [initializers](../initializers.md)). kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix. + the `kernel` weights matrix + (see [regularizer](../regularizers.md)). recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix. - bias_regularizer: Regularizer function applied to the bias vector. + the `recurrent_kernel` weights matrix + (see [regularizer](../regularizers.md)). + bias_regularizer: Regularizer function applied to the bias vector + (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to - the output of the layer (its "activation").. + the output of the layer (its "activation"). + (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to - the `kernel` weights matrix. + the `kernel` weights matrix + (see [constraints](../constraints.md)). recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix. - bias_constraint: Constraint function applied to the bias vector. + the `recurrent_kernel` weights matrix + (see [constraints](../constraints.md)). + bias_constraint: Constraint function applied to the bias vector + (see [constraints](../constraints.md)). dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. - - References: - - [On the Properties of Neural Machine Translation: Encoder-Decoder - Approaches](https://arxiv.org/abs/1409.1259) - - [Empirical Evaluation of Gated Recurrent Neural Networks on Sequence - Modeling](http://arxiv.org/abs/1412.3555v1) - - [A Theoretically Grounded Application of Dropout in Recurrent Neural - Networks](http://arxiv.org/abs/1512.05287) - """ + return_sequences: Boolean. Whether to return the last output. + in the output sequence, or the full sequence. + return_state: Boolean. Whether to return the last state + in addition to the output. + go_backwards: Boolean (default False). + If True, process the input sequence backwards and return the + reversed sequence. + stateful: Boolean (default False). If True, the last state + for each sample at index i in a batch will be used as initial + state for the sample of index i in the following batch. + unroll: Boolean (default False). + If True, the network will be unrolled, + else a symbolic loop will be used. + Unrolling can speed-up a RNN, + although it tends to be more memory-intensive. + Unrolling is only suitable for short sequences. + """ def __init__(self, units, activation='tanh', - recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', @@ -715,9 +1013,220 @@ class GRU(Recurrent): bias_constraint=None, dropout=0., recurrent_dropout=0., + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + unroll=False, **kwargs): - super(GRU, self).__init__( - activity_regularizer=regularizers.get(activity_regularizer), **kwargs) + if 'implementation' in kwargs: + kwargs.pop('implementation') + logging.warning('The `implementation` argument ' + 'in `SimpleRNN` has been deprecated. ' + 'Please remove it from your layer call.') + cell = SimpleRNNCell( + units, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + recurrent_initializer=recurrent_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + recurrent_regularizer=recurrent_regularizer, + bias_regularizer=bias_regularizer, + kernel_constraint=kernel_constraint, + recurrent_constraint=recurrent_constraint, + bias_constraint=bias_constraint, + dropout=dropout, + recurrent_dropout=recurrent_dropout) + super(SimpleRNN, self).__init__( + cell, + return_sequences=return_sequences, + return_state=return_state, + go_backwards=go_backwards, + stateful=stateful, + unroll=unroll, + activity_regularizer=regularizers.get(activity_regularizer), + **kwargs) + # self.activity_regularizer = regularizers.get(activity_regularizer) + + def call(self, inputs, mask=None, training=None, initial_state=None): + self.cell._generate_dropout_mask(inputs, training=training) + self.cell._generate_recurrent_dropout_mask(inputs, training=training) + return super(SimpleRNN, self).call( + inputs, mask=mask, training=training, initial_state=initial_state) + + @property + def units(self): + return self.cell.units + + @property + def activation(self): + return self.cell.activation + + @property + def use_bias(self): + return self.cell.use_bias + + @property + def kernel_initializer(self): + return self.cell.kernel_initializer + + @property + def recurrent_initializer(self): + return self.cell.recurrent_initializer + + @property + def bias_initializer(self): + return self.cell.bias_initializer + + @property + def kernel_regularizer(self): + return self.cell.kernel_regularizer + + @property + def recurrent_regularizer(self): + return self.cell.recurrent_regularizer + + @property + def bias_regularizer(self): + return self.cell.bias_regularizer + + @property + def kernel_constraint(self): + return self.cell.kernel_constraint + + @property + def recurrent_constraint(self): + return self.cell.recurrent_constraint + + @property + def bias_constraint(self): + return self.cell.bias_constraint + + @property + def dropout(self): + return self.cell.dropout + + @property + def recurrent_dropout(self): + return self.cell.recurrent_dropout + + def get_config(self): + config = { + 'units': + self.units, + 'activation': + activations.serialize(self.activation), + 'use_bias': + self.use_bias, + 'kernel_initializer': + initializers.serialize(self.kernel_initializer), + 'recurrent_initializer': + initializers.serialize(self.recurrent_initializer), + 'bias_initializer': + initializers.serialize(self.bias_initializer), + 'kernel_regularizer': + regularizers.serialize(self.kernel_regularizer), + 'recurrent_regularizer': + regularizers.serialize(self.recurrent_regularizer), + 'bias_regularizer': + regularizers.serialize(self.bias_regularizer), + 'activity_regularizer': + regularizers.serialize(self.activity_regularizer), + 'kernel_constraint': + constraints.serialize(self.kernel_constraint), + 'recurrent_constraint': + constraints.serialize(self.recurrent_constraint), + 'bias_constraint': + constraints.serialize(self.bias_constraint), + 'dropout': + self.dropout, + 'recurrent_dropout': + self.recurrent_dropout + } + base_config = super(SimpleRNN, self).get_config() + del base_config['cell'] + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + if 'implementation' in config: + config.pop('implementation') + return cls(**config) + + +class GRUCell(Layer): + """Cell class for the GRU layer. + + Arguments: + units: Positive integer, dimensionality of the output space. + activation: Activation function to use + (see [activations](../activations.md)). + If you pass None, no activation is applied + (ie. "linear" activation: `a(x) = x`). + recurrent_activation: Activation function to use + for the recurrent step + (see [activations](../activations.md)). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, + used for the linear transformation of the inputs. + (see [initializers](../initializers.md)). + recurrent_initializer: Initializer for the `recurrent_kernel` + weights matrix, + used for the linear transformation of the recurrent state. + (see [initializers](../initializers.md)). + bias_initializer: Initializer for the bias vector + (see [initializers](../initializers.md)). + kernel_regularizer: Regularizer function applied to + the `kernel` weights matrix + (see [regularizer](../regularizers.md)). + recurrent_regularizer: Regularizer function applied to + the `recurrent_kernel` weights matrix + (see [regularizer](../regularizers.md)). + bias_regularizer: Regularizer function applied to the bias vector + (see [regularizer](../regularizers.md)). + kernel_constraint: Constraint function applied to + the `kernel` weights matrix + (see [constraints](../constraints.md)). + recurrent_constraint: Constraint function applied to + the `recurrent_kernel` weights matrix + (see [constraints](../constraints.md)). + bias_constraint: Constraint function applied to the bias vector + (see [constraints](../constraints.md)). + dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the inputs. + recurrent_dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the recurrent state. + implementation: Implementation mode, either 1 or 2. + Mode 1 will structure its operations as a larger number of + smaller dot products and additions, whereas mode 2 will + batch them into fewer, larger operations. These modes will + have different performance profiles on different hardware and + for different applications. + """ + + def __init__(self, + units, + activation='tanh', + recurrent_activation='hard_sigmoid', + use_bias=True, + kernel_initializer='glorot_uniform', + recurrent_initializer='orthogonal', + bias_initializer='zeros', + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0., + recurrent_dropout=0., + implementation=1, + **kwargs): + super(GRUCell, self).__init__(**kwargs) self.units = units self.activation = activations.get(activation) self.recurrent_activation = activations.get(recurrent_activation) @@ -737,22 +1246,15 @@ class GRU(Recurrent): self.dropout = min(1., max(0., dropout)) self.recurrent_dropout = min(1., max(0., recurrent_dropout)) - self.state_spec = InputSpec(shape=(None, self.units)) + self.implementation = implementation + self.state_size = self.units + self._dropout_mask = None + self._recurrent_dropout_mask = None def build(self, input_shape): - if isinstance(input_shape, list): - input_shape = input_shape[0] - input_shape = tensor_shape.TensorShape(input_shape).as_list() - batch_size = input_shape[0] if self.stateful else None - self.input_dim = input_shape[2] - self.input_spec[0] = InputSpec(shape=(batch_size, None, self.input_dim)) - - self.states = [None] - if self.stateful: - self.reset_states() - + input_dim = input_shape[-1] self.kernel = self.add_weight( - shape=(self.input_dim, self.units * 3), + shape=(input_dim, self.units * 3), name='kernel', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, @@ -792,89 +1294,83 @@ class GRU(Recurrent): self.bias_h = None self.built = True - def preprocess_input(self, inputs, training=None): - if self.implementation == 0: - input_shape = inputs.get_shape().as_list() - input_dim = input_shape[2] - timesteps = input_shape[1] - - x_z = _time_distributed_dense( - inputs, - self.kernel_z, - self.bias_z, - self.dropout, - input_dim, - self.units, - timesteps, - training=training) - x_r = _time_distributed_dense( - inputs, - self.kernel_r, - self.bias_r, - self.dropout, - input_dim, - self.units, - timesteps, - training=training) - x_h = _time_distributed_dense( - inputs, - self.kernel_h, - self.bias_h, - self.dropout, - input_dim, - self.units, - timesteps, - training=training) - return K.concatenate([x_z, x_r, x_h], axis=2) - else: - return inputs - - def get_constants(self, inputs, training=None): - constants = [] - if self.implementation != 0 and 0 < self.dropout < 1: - input_shape = K.int_shape(inputs) - input_dim = input_shape[-1] - ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) - ones = K.tile(ones, (1, int(input_dim))) + def _generate_dropout_mask(self, inputs, training=None): + if 0 < self.dropout < 1: + ones = K.ones_like(K.squeeze(inputs[:, 0:1, :], axis=1)) def dropped_inputs(): return K.dropout(ones, self.dropout) - dp_mask = [ + self._dropout_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(3) ] - constants.append(dp_mask) else: - constants.append([K.cast_to_floatx(1.) for _ in range(3)]) + self._dropout_mask = None + def _generate_recurrent_dropout_mask(self, inputs, training=None): if 0 < self.recurrent_dropout < 1: ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, self.units)) - def dropped_inputs(): # pylint: disable=function-redefined - return K.dropout(ones, self.recurrent_dropout) + def dropped_inputs(): + return K.dropout(ones, self.dropout) - rec_dp_mask = [ + self._recurrent_dropout_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(3) ] - constants.append(rec_dp_mask) else: - constants.append([K.cast_to_floatx(1.) for _ in range(3)]) - return constants + self._recurrent_dropout_mask = None - def step(self, inputs, states): + def call(self, inputs, states, training=None): h_tm1 = states[0] # previous memory - dp_mask = states[1] # dropout matrices for recurrent units - rec_dp_mask = states[2] - if self.implementation == 2: - matrix_x = K.dot(inputs * dp_mask[0], self.kernel) + # dropout matrices for input units + dp_mask = self._dropout_mask + # dropout matrices for recurrent units + rec_dp_mask = self._recurrent_dropout_mask + + if self.implementation == 1: + if 0. < self.dropout < 1.: + inputs_z = inputs * dp_mask[0] + inputs_r = inputs * dp_mask[1] + inputs_h = inputs * dp_mask[2] + else: + inputs_z = inputs + inputs_r = inputs + inputs_h = inputs + x_z = K.dot(inputs_z, self.kernel_z) + x_r = K.dot(inputs_r, self.kernel_r) + x_h = K.dot(inputs_h, self.kernel_h) + if self.use_bias: + x_z = K.bias_add(x_z, self.bias_z) + x_r = K.bias_add(x_r, self.bias_r) + x_h = K.bias_add(x_h, self.bias_h) + + if 0. < self.recurrent_dropout < 1.: + h_tm1_z = h_tm1 * rec_dp_mask[0] + h_tm1_r = h_tm1 * rec_dp_mask[1] + h_tm1_h = h_tm1 * rec_dp_mask[2] + else: + h_tm1_z = h_tm1 + h_tm1_r = h_tm1 + h_tm1_h = h_tm1 + z = self.recurrent_activation( + x_z + K.dot(h_tm1_z, self.recurrent_kernel_z)) + r = self.recurrent_activation( + x_r + K.dot(h_tm1_r, self.recurrent_kernel_r)) + + hh = self.activation(x_h + K.dot(r * h_tm1_h, self.recurrent_kernel_h)) + else: + if 0. < self.dropout < 1.: + inputs *= dp_mask[0] + matrix_x = K.dot(inputs, self.kernel) if self.use_bias: matrix_x = K.bias_add(matrix_x, self.bias) - matrix_inner = K.dot(h_tm1 * rec_dp_mask[0], - self.recurrent_kernel[:, :2 * self.units]) + if 0. < self.recurrent_dropout < 1.: + h_tm1 *= rec_dp_mask[0] + matrix_inner = K.dot(h_tm1, self.recurrent_kernel[:, :2 * self.units]) x_z = matrix_x[:, :self.units] x_r = matrix_x[:, self.units:2 * self.units] @@ -885,116 +1381,323 @@ class GRU(Recurrent): r = self.recurrent_activation(x_r + recurrent_r) x_h = matrix_x[:, 2 * self.units:] - recurrent_h = K.dot(r * h_tm1 * rec_dp_mask[0], - self.recurrent_kernel[:, 2 * self.units:]) + recurrent_h = K.dot(r * h_tm1, self.recurrent_kernel[:, 2 * self.units:]) hh = self.activation(x_h + recurrent_h) - else: - if self.implementation == 0: - x_z = inputs[:, :self.units] - x_r = inputs[:, self.units:2 * self.units] - x_h = inputs[:, 2 * self.units:] - elif self.implementation == 1: - x_z = K.dot(inputs * dp_mask[0], self.kernel_z) - x_r = K.dot(inputs * dp_mask[1], self.kernel_r) - x_h = K.dot(inputs * dp_mask[2], self.kernel_h) - if self.use_bias: - x_z = K.bias_add(x_z, self.bias_z) - x_r = K.bias_add(x_r, self.bias_r) - x_h = K.bias_add(x_h, self.bias_h) - else: - raise ValueError('Unknown `implementation` mode.') - z = self.recurrent_activation(x_z + K.dot(h_tm1 * rec_dp_mask[0], - self.recurrent_kernel_z)) - r = self.recurrent_activation(x_r + K.dot(h_tm1 * rec_dp_mask[1], - self.recurrent_kernel_r)) - - hh = self.activation(x_h + K.dot(r * h_tm1 * rec_dp_mask[2], - self.recurrent_kernel_h)) h = z * h_tm1 + (1 - z) * hh if 0 < self.dropout + self.recurrent_dropout: - h._uses_learning_phase = True + if training is None: + h._uses_learning_phase = True return h, [h] + +class GRU(RNN): + # pylint: disable=line-too-long + """Gated Recurrent Unit - Cho et al. + + 2014. + + Arguments: + units: Positive integer, dimensionality of the output space. + activation: Activation function to use + (see [activations](../activations.md)). + If you pass None, no activation is applied + (ie. "linear" activation: `a(x) = x`). + recurrent_activation: Activation function to use + for the recurrent step + (see [activations](../activations.md)). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, + used for the linear transformation of the inputs. + (see [initializers](../initializers.md)). + recurrent_initializer: Initializer for the `recurrent_kernel` + weights matrix, + used for the linear transformation of the recurrent state. + (see [initializers](../initializers.md)). + bias_initializer: Initializer for the bias vector + (see [initializers](../initializers.md)). + kernel_regularizer: Regularizer function applied to + the `kernel` weights matrix + (see [regularizer](../regularizers.md)). + recurrent_regularizer: Regularizer function applied to + the `recurrent_kernel` weights matrix + (see [regularizer](../regularizers.md)). + bias_regularizer: Regularizer function applied to the bias vector + (see [regularizer](../regularizers.md)). + activity_regularizer: Regularizer function applied to + the output of the layer (its "activation"). + (see [regularizer](../regularizers.md)). + kernel_constraint: Constraint function applied to + the `kernel` weights matrix + (see [constraints](../constraints.md)). + recurrent_constraint: Constraint function applied to + the `recurrent_kernel` weights matrix + (see [constraints](../constraints.md)). + bias_constraint: Constraint function applied to the bias vector + (see [constraints](../constraints.md)). + dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the inputs. + recurrent_dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the recurrent state. + implementation: Implementation mode, either 1 or 2. + Mode 1 will structure its operations as a larger number of + smaller dot products and additions, whereas mode 2 will + batch them into fewer, larger operations. These modes will + have different performance profiles on different hardware and + for different applications. + return_sequences: Boolean. Whether to return the last output. + in the output sequence, or the full sequence. + return_state: Boolean. Whether to return the last state + in addition to the output. + go_backwards: Boolean (default False). + If True, process the input sequence backwards and return the + reversed sequence. + stateful: Boolean (default False). If True, the last state + for each sample at index i in a batch will be used as initial + state for the sample of index i in the following batch. + unroll: Boolean (default False). + If True, the network will be unrolled, + else a symbolic loop will be used. + Unrolling can speed-up a RNN, + although it tends to be more memory-intensive. + Unrolling is only suitable for short sequences. + + References: + - [On the Properties of Neural Machine Translation: Encoder-Decoder Approaches](https://arxiv.org/abs/1409.1259) + - [Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling](http://arxiv.org/abs/1412.3555v1) + - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287) + """ + # pylint: enable=line-too-long + + def __init__(self, + units, + activation='tanh', + recurrent_activation='hard_sigmoid', + use_bias=True, + kernel_initializer='glorot_uniform', + recurrent_initializer='orthogonal', + bias_initializer='zeros', + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0., + recurrent_dropout=0., + implementation=1, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + unroll=False, + **kwargs): + if implementation == 0: + logging.warning('`implementation=0` has been deprecated, ' + 'and now defaults to `implementation=1`.' + 'Please update your layer call.') + cell = GRUCell( + units, + activation=activation, + recurrent_activation=recurrent_activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + recurrent_initializer=recurrent_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + recurrent_regularizer=recurrent_regularizer, + bias_regularizer=bias_regularizer, + kernel_constraint=kernel_constraint, + recurrent_constraint=recurrent_constraint, + bias_constraint=bias_constraint, + dropout=dropout, + recurrent_dropout=recurrent_dropout, + implementation=implementation) + super(GRU, self).__init__( + cell, + return_sequences=return_sequences, + return_state=return_state, + go_backwards=go_backwards, + stateful=stateful, + unroll=unroll, + **kwargs) + self.activity_regularizer = regularizers.get(activity_regularizer) + + def call(self, inputs, mask=None, training=None, initial_state=None): + self.cell._generate_dropout_mask(inputs, training=training) + self.cell._generate_recurrent_dropout_mask(inputs, training=training) + return super(GRU, self).call( + inputs, mask=mask, training=training, initial_state=initial_state) + + @property + def units(self): + return self.cell.units + + @property + def activation(self): + return self.cell.activation + + @property + def recurrent_activation(self): + return self.cell.recurrent_activation + + @property + def use_bias(self): + return self.cell.use_bias + + @property + def kernel_initializer(self): + return self.cell.kernel_initializer + + @property + def recurrent_initializer(self): + return self.cell.recurrent_initializer + + @property + def bias_initializer(self): + return self.cell.bias_initializer + + @property + def kernel_regularizer(self): + return self.cell.kernel_regularizer + + @property + def recurrent_regularizer(self): + return self.cell.recurrent_regularizer + + @property + def bias_regularizer(self): + return self.cell.bias_regularizer + + @property + def kernel_constraint(self): + return self.cell.kernel_constraint + + @property + def recurrent_constraint(self): + return self.cell.recurrent_constraint + + @property + def bias_constraint(self): + return self.cell.bias_constraint + + @property + def dropout(self): + return self.cell.dropout + + @property + def recurrent_dropout(self): + return self.cell.recurrent_dropout + + @property + def implementation(self): + return self.cell.implementation + def get_config(self): config = { - 'units': self.units, - 'activation': activations.serialize(self.activation), + 'units': + self.units, + 'activation': + activations.serialize(self.activation), 'recurrent_activation': activations.serialize(self.recurrent_activation), - 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), + 'use_bias': + self.use_bias, + 'kernel_initializer': + initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), + 'bias_initializer': + initializers.serialize(self.bias_initializer), + 'kernel_regularizer': + regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), + 'bias_regularizer': + regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), + 'kernel_constraint': + constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'dropout': self.dropout, - 'recurrent_dropout': self.recurrent_dropout + 'bias_constraint': + constraints.serialize(self.bias_constraint), + 'dropout': + self.dropout, + 'recurrent_dropout': + self.recurrent_dropout, + 'implementation': + self.implementation } base_config = super(GRU, self).get_config() + del base_config['cell'] return dict(list(base_config.items()) + list(config.items())) + @classmethod + def from_config(cls, config): + if 'implementation' in config and config['implementation'] == 0: + config['implementation'] = 1 + return cls(**config) -class LSTM(Recurrent): - """Long-Short Term Memory unit - Hochreiter 1997. - For a step-by-step description of the algorithm, see - [this tutorial](http://deeplearning.net/tutorial/lstm.html). +class LSTMCell(Layer): + """Cell class for the LSTM layer. Arguments: units: Positive integer, dimensionality of the output space. - activation: Activation function to use. + activation: Activation function to use + (see [activations](../activations.md)). If you pass None, no activation is applied (ie. "linear" activation: `a(x) = x`). recurrent_activation: Activation function to use - for the recurrent step. + for the recurrent step + (see [activations](../activations.md)). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs.. + used for the linear transformation of the inputs. + (see [initializers](../initializers.md)). recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, - used for the linear transformation of the recurrent state.. - bias_initializer: Initializer for the bias vector. + used for the linear transformation of the recurrent state. + (see [initializers](../initializers.md)). + bias_initializer: Initializer for the bias vector + (see [initializers](../initializers.md)). unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at initialization. Setting it to true will also force `bias_initializer="zeros"`. This is recommended in [Jozefowicz et al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix. + the `kernel` weights matrix + (see [regularizer](../regularizers.md)). recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix. - bias_regularizer: Regularizer function applied to the bias vector. - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation").. + the `recurrent_kernel` weights matrix + (see [regularizer](../regularizers.md)). + bias_regularizer: Regularizer function applied to the bias vector + (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to - the `kernel` weights matrix. + the `kernel` weights matrix + (see [constraints](../constraints.md)). recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix. - bias_constraint: Constraint function applied to the bias vector. + the `recurrent_kernel` weights matrix + (see [constraints](../constraints.md)). + bias_constraint: Constraint function applied to the bias vector + (see [constraints](../constraints.md)). dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. - - References: - - [Long short-term - memory]((http://www.bioinf.jku.at/publications/older/2604.pdf) - (original 1997 paper) - - [Supervised sequence labeling with recurrent neural - networks](http://www.cs.toronto.edu/~graves/preprint.pdf) - - [A Theoretically Grounded Application of Dropout in Recurrent Neural - Networks](http://arxiv.org/abs/1512.05287) + implementation: Implementation mode, either 1 or 2. + Mode 1 will structure its operations as a larger number of + smaller dot products and additions, whereas mode 2 will + batch them into fewer, larger operations. These modes will + have different performance profiles on different hardware and + for different applications. """ def __init__(self, @@ -1009,15 +1712,14 @@ class LSTM(Recurrent): kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, - activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., + implementation=1, **kwargs): - super(LSTM, self).__init__( - activity_regularizer=regularizers.get(activity_regularizer), **kwargs) + super(LSTMCell, self).__init__(**kwargs) self.units = units self.activation = activations.get(activation) self.recurrent_activation = activations.get(recurrent_activation) @@ -1038,25 +1740,15 @@ class LSTM(Recurrent): self.dropout = min(1., max(0., dropout)) self.recurrent_dropout = min(1., max(0., recurrent_dropout)) - self.state_spec = [ - InputSpec(shape=(None, self.units)), - InputSpec(shape=(None, self.units)) - ] + self.implementation = implementation + self.state_size = (self.units, self.units) + self._dropout_mask = None + self._recurrent_dropout_mask = None def build(self, input_shape): - if isinstance(input_shape, list): - input_shape = input_shape[0] - input_shape = tensor_shape.TensorShape(input_shape).as_list() - batch_size = input_shape[0] if self.stateful else None - self.input_dim = input_shape[2] - self.input_spec[0] = InputSpec(shape=(batch_size, None, self.input_dim)) - - self.states = [None, None] - if self.stateful: - self.reset_states() - + input_dim = input_shape[-1] self.kernel = self.add_weight( - shape=(self.input_dim, self.units * 4), + shape=(input_dim, self.units * 4), name='kernel', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, @@ -1112,96 +1804,90 @@ class LSTM(Recurrent): self.bias_o = None self.built = True - def preprocess_input(self, inputs, training=None): - if self.implementation == 0: - input_shape = inputs.get_shape().as_list() - input_dim = input_shape[2] - timesteps = input_shape[1] - - x_i = _time_distributed_dense( - inputs, - self.kernel_i, - self.bias_i, - self.dropout, - input_dim, - self.units, - timesteps, - training=training) - x_f = _time_distributed_dense( - inputs, - self.kernel_f, - self.bias_f, - self.dropout, - input_dim, - self.units, - timesteps, - training=training) - x_c = _time_distributed_dense( - inputs, - self.kernel_c, - self.bias_c, - self.dropout, - input_dim, - self.units, - timesteps, - training=training) - x_o = _time_distributed_dense( - inputs, - self.kernel_o, - self.bias_o, - self.dropout, - input_dim, - self.units, - timesteps, - training=training) - return K.concatenate([x_i, x_f, x_c, x_o], axis=2) - else: - return inputs - - def get_constants(self, inputs, training=None): - constants = [] - if self.implementation != 0 and 0 < self.dropout < 1: - input_shape = K.int_shape(inputs) - input_dim = input_shape[-1] - ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) - ones = K.tile(ones, (1, int(input_dim))) + def _generate_dropout_mask(self, inputs, training=None): + if 0 < self.dropout < 1: + ones = K.ones_like(K.squeeze(inputs[:, 0:1, :], axis=1)) def dropped_inputs(): return K.dropout(ones, self.dropout) - dp_mask = [ + self._dropout_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(4) ] - constants.append(dp_mask) else: - constants.append([K.cast_to_floatx(1.) for _ in range(4)]) + self._dropout_mask = None + def _generate_recurrent_dropout_mask(self, inputs, training=None): if 0 < self.recurrent_dropout < 1: ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, self.units)) - def dropped_inputs(): # pylint: disable=function-redefined - return K.dropout(ones, self.recurrent_dropout) + def dropped_inputs(): + return K.dropout(ones, self.dropout) - rec_dp_mask = [ + self._recurrent_dropout_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(4) ] - constants.append(rec_dp_mask) else: - constants.append([K.cast_to_floatx(1.) for _ in range(4)]) - return constants - - def step(self, inputs, states): - h_tm1 = states[0] - c_tm1 = states[1] - dp_mask = states[2] - rec_dp_mask = states[3] - - if self.implementation == 2: - z = K.dot(inputs * dp_mask[0], self.kernel) - z += K.dot(h_tm1 * rec_dp_mask[0], self.recurrent_kernel) + self._recurrent_dropout_mask = None + + def call(self, inputs, states, training=None): + # dropout matrices for input units + dp_mask = self._dropout_mask + # dropout matrices for recurrent units + rec_dp_mask = self._recurrent_dropout_mask + + h_tm1 = states[0] # previous memory state + c_tm1 = states[1] # previous carry state + + if self.implementation == 1: + if 0 < self.dropout < 1.: + inputs_i = inputs * dp_mask[0] + inputs_f = inputs * dp_mask[1] + inputs_c = inputs * dp_mask[2] + inputs_o = inputs * dp_mask[3] + else: + inputs_i = inputs + inputs_f = inputs + inputs_c = inputs + inputs_o = inputs + x_i = K.dot(inputs_i, self.kernel_i) + x_f = K.dot(inputs_f, self.kernel_f) + x_c = K.dot(inputs_c, self.kernel_c) + x_o = K.dot(inputs_o, self.kernel_o) + if self.use_bias: + x_i = K.bias_add(x_i, self.bias_i) + x_f = K.bias_add(x_f, self.bias_f) + x_c = K.bias_add(x_c, self.bias_c) + x_o = K.bias_add(x_o, self.bias_o) + + if 0 < self.recurrent_dropout < 1.: + h_tm1_i = h_tm1 * rec_dp_mask[0] + h_tm1_f = h_tm1 * rec_dp_mask[1] + h_tm1_c = h_tm1 * rec_dp_mask[2] + h_tm1_o = h_tm1 * rec_dp_mask[3] + else: + h_tm1_i = h_tm1 + h_tm1_f = h_tm1 + h_tm1_c = h_tm1 + h_tm1_o = h_tm1 + i = self.recurrent_activation( + x_i + K.dot(h_tm1_i, self.recurrent_kernel_i)) + f = self.recurrent_activation( + x_f + K.dot(h_tm1_f, self.recurrent_kernel_f)) + c = f * c_tm1 + i * self.activation( + x_c + K.dot(h_tm1_c, self.recurrent_kernel_c)) + o = self.recurrent_activation( + x_o + K.dot(h_tm1_o, self.recurrent_kernel_o)) + else: + if 0. < self.dropout < 1.: + inputs *= dp_mask[0] + z = K.dot(inputs, self.kernel) + if 0. < self.recurrent_dropout < 1.: + h_tm1 *= rec_dp_mask[0] + z += K.dot(h_tm1, self.recurrent_kernel) if self.use_bias: z = K.bias_add(z, self.bias) @@ -1214,57 +1900,606 @@ class LSTM(Recurrent): f = self.recurrent_activation(z1) c = f * c_tm1 + i * self.activation(z2) o = self.recurrent_activation(z3) - else: - if self.implementation == 0: - x_i = inputs[:, :self.units] - x_f = inputs[:, self.units:2 * self.units] - x_c = inputs[:, 2 * self.units:3 * self.units] - x_o = inputs[:, 3 * self.units:] - elif self.implementation == 1: - x_i = K.dot(inputs * dp_mask[0], self.kernel_i) + self.bias_i - x_f = K.dot(inputs * dp_mask[1], self.kernel_f) + self.bias_f - x_c = K.dot(inputs * dp_mask[2], self.kernel_c) + self.bias_c - x_o = K.dot(inputs * dp_mask[3], self.kernel_o) + self.bias_o - else: - raise ValueError('Unknown `implementation` mode.') - i = self.recurrent_activation(x_i + K.dot(h_tm1 * rec_dp_mask[0], - self.recurrent_kernel_i)) - f = self.recurrent_activation(x_f + K.dot(h_tm1 * rec_dp_mask[1], - self.recurrent_kernel_f)) - c = f * c_tm1 + i * self.activation( - x_c + K.dot(h_tm1 * rec_dp_mask[2], self.recurrent_kernel_c)) - o = self.recurrent_activation(x_o + K.dot(h_tm1 * rec_dp_mask[3], - self.recurrent_kernel_o)) h = o * self.activation(c) if 0 < self.dropout + self.recurrent_dropout: - h._uses_learning_phase = True + if training is None: + h._uses_learning_phase = True return h, [h, c] - def get_config(self): - config = { - 'units': self.units, - 'activation': activations.serialize(self.activation), - 'recurrent_activation': - activations.serialize(self.recurrent_activation), - 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'unit_forget_bias': self.unit_forget_bias, - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'dropout': self.dropout, - 'recurrent_dropout': self.recurrent_dropout - } - base_config = super(LSTM, self).get_config() + +class LSTM(RNN): + # pylint: disable=line-too-long + """Long-Short Term Memory layer - Hochreiter 1997. + + Arguments: + units: Positive integer, dimensionality of the output space. + activation: Activation function to use + (see [activations](../activations.md)). + If you pass None, no activation is applied + (ie. "linear" activation: `a(x) = x`). + recurrent_activation: Activation function to use + for the recurrent step + (see [activations](../activations.md)). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, + used for the linear transformation of the inputs. + (see [initializers](../initializers.md)). + recurrent_initializer: Initializer for the `recurrent_kernel` + weights matrix, + used for the linear transformation of the recurrent state. + (see [initializers](../initializers.md)). + bias_initializer: Initializer for the bias vector + (see [initializers](../initializers.md)). + unit_forget_bias: Boolean. + If True, add 1 to the bias of the forget gate at initialization. + Setting it to true will also force `bias_initializer="zeros"`. + This is recommended in [Jozefowicz et + al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) + kernel_regularizer: Regularizer function applied to + the `kernel` weights matrix + (see [regularizer](../regularizers.md)). + recurrent_regularizer: Regularizer function applied to + the `recurrent_kernel` weights matrix + (see [regularizer](../regularizers.md)). + bias_regularizer: Regularizer function applied to the bias vector + (see [regularizer](../regularizers.md)). + activity_regularizer: Regularizer function applied to + the output of the layer (its "activation"). + (see [regularizer](../regularizers.md)). + kernel_constraint: Constraint function applied to + the `kernel` weights matrix + (see [constraints](../constraints.md)). + recurrent_constraint: Constraint function applied to + the `recurrent_kernel` weights matrix + (see [constraints](../constraints.md)). + bias_constraint: Constraint function applied to the bias vector + (see [constraints](../constraints.md)). + dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the inputs. + recurrent_dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the recurrent state. + implementation: Implementation mode, either 1 or 2. + Mode 1 will structure its operations as a larger number of + smaller dot products and additions, whereas mode 2 will + batch them into fewer, larger operations. These modes will + have different performance profiles on different hardware and + for different applications. + return_sequences: Boolean. Whether to return the last output. + in the output sequence, or the full sequence. + return_state: Boolean. Whether to return the last state + in addition to the output. + go_backwards: Boolean (default False). + If True, process the input sequence backwards and return the + reversed sequence. + stateful: Boolean (default False). If True, the last state + for each sample at index i in a batch will be used as initial + state for the sample of index i in the following batch. + unroll: Boolean (default False). + If True, the network will be unrolled, + else a symbolic loop will be used. + Unrolling can speed-up a RNN, + although it tends to be more memory-intensive. + Unrolling is only suitable for short sequences. + + References: + - [Long short-term memory](http://www.bioinf.jku.at/publications/older/2604.pdf) + - [Learning to forget: Continual prediction with LSTM](http://www.mitpressjournals.org/doi/pdf/10.1162/089976600300015015) + - [Supervised sequence labeling with recurrent neural networks](http://www.cs.toronto.edu/~graves/preprint.pdf) + - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287) + """ + # pylint: enable=line-too-long + + def __init__(self, + units, + activation='tanh', + recurrent_activation='hard_sigmoid', + use_bias=True, + kernel_initializer='glorot_uniform', + recurrent_initializer='orthogonal', + bias_initializer='zeros', + unit_forget_bias=True, + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0., + recurrent_dropout=0., + implementation=1, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + unroll=False, + **kwargs): + if implementation == 0: + logging.warning('`implementation=0` has been deprecated, ' + 'and now defaults to `implementation=1`.' + 'Please update your layer call.') + cell = LSTMCell( + units, + activation=activation, + recurrent_activation=recurrent_activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + recurrent_initializer=recurrent_initializer, + unit_forget_bias=unit_forget_bias, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + recurrent_regularizer=recurrent_regularizer, + bias_regularizer=bias_regularizer, + kernel_constraint=kernel_constraint, + recurrent_constraint=recurrent_constraint, + bias_constraint=bias_constraint, + dropout=dropout, + recurrent_dropout=recurrent_dropout, + implementation=implementation) + super(LSTM, self).__init__( + cell, + return_sequences=return_sequences, + return_state=return_state, + go_backwards=go_backwards, + stateful=stateful, + unroll=unroll, + **kwargs) + self.activity_regularizer = regularizers.get(activity_regularizer) + + def call(self, inputs, mask=None, training=None, initial_state=None): + self.cell._generate_dropout_mask(inputs, training=training) + self.cell._generate_recurrent_dropout_mask(inputs, training=training) + return super(LSTM, self).call( + inputs, mask=mask, training=training, initial_state=initial_state) + + @property + def units(self): + return self.cell.units + + @property + def activation(self): + return self.cell.activation + + @property + def recurrent_activation(self): + return self.cell.recurrent_activation + + @property + def use_bias(self): + return self.cell.use_bias + + @property + def kernel_initializer(self): + return self.cell.kernel_initializer + + @property + def recurrent_initializer(self): + return self.cell.recurrent_initializer + + @property + def bias_initializer(self): + return self.cell.bias_initializer + + @property + def unit_forget_bias(self): + return self.cell.unit_forget_bias + + @property + def kernel_regularizer(self): + return self.cell.kernel_regularizer + + @property + def recurrent_regularizer(self): + return self.cell.recurrent_regularizer + + @property + def bias_regularizer(self): + return self.cell.bias_regularizer + + @property + def kernel_constraint(self): + return self.cell.kernel_constraint + + @property + def recurrent_constraint(self): + return self.cell.recurrent_constraint + + @property + def bias_constraint(self): + return self.cell.bias_constraint + + @property + def dropout(self): + return self.cell.dropout + + @property + def recurrent_dropout(self): + return self.cell.recurrent_dropout + + @property + def implementation(self): + return self.cell.implementation + + def get_config(self): + config = { + 'units': + self.units, + 'activation': + activations.serialize(self.activation), + 'recurrent_activation': + activations.serialize(self.recurrent_activation), + 'use_bias': + self.use_bias, + 'kernel_initializer': + initializers.serialize(self.kernel_initializer), + 'recurrent_initializer': + initializers.serialize(self.recurrent_initializer), + 'bias_initializer': + initializers.serialize(self.bias_initializer), + 'unit_forget_bias': + self.unit_forget_bias, + 'kernel_regularizer': + regularizers.serialize(self.kernel_regularizer), + 'recurrent_regularizer': + regularizers.serialize(self.recurrent_regularizer), + 'bias_regularizer': + regularizers.serialize(self.bias_regularizer), + 'activity_regularizer': + regularizers.serialize(self.activity_regularizer), + 'kernel_constraint': + constraints.serialize(self.kernel_constraint), + 'recurrent_constraint': + constraints.serialize(self.recurrent_constraint), + 'bias_constraint': + constraints.serialize(self.bias_constraint), + 'dropout': + self.dropout, + 'recurrent_dropout': + self.recurrent_dropout, + 'implementation': + self.implementation + } + base_config = super(LSTM, self).get_config() + del base_config['cell'] + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + if 'implementation' in config and config['implementation'] == 0: + config['implementation'] = 1 + return cls(**config) + + +class Recurrent(Layer): + """Deprecated abstract base class for recurrent layers. + + It still exists because it is leveraged by the convolutional-recurrent layers. + It will be removed entirely in the future. + It was never part of the public API. + Do not use. + + Arguments: + weights: list of Numpy arrays to set as initial weights. + The list should have 3 elements, of shapes: + `[(input_dim, output_dim), (output_dim, output_dim), (output_dim,)]`. + return_sequences: Boolean. Whether to return the last output + in the output sequence, or the full sequence. + return_state: Boolean. Whether to return the last state + in addition to the output. + go_backwards: Boolean (default False). + If True, process the input sequence backwards and return the + reversed sequence. + stateful: Boolean (default False). If True, the last state + for each sample at index i in a batch will be used as initial + state for the sample of index i in the following batch. + unroll: Boolean (default False). + If True, the network will be unrolled, + else a symbolic loop will be used. + Unrolling can speed-up a RNN, + although it tends to be more memory-intensive. + Unrolling is only suitable for short sequences. + implementation: one of {0, 1, or 2}. + If set to 0, the RNN will use + an implementation that uses fewer, larger matrix products, + thus running faster on CPU but consuming more memory. + If set to 1, the RNN will use more matrix products, + but smaller ones, thus running slower + (may actually be faster on GPU) while consuming less memory. + If set to 2 (LSTM/GRU only), + the RNN will combine the input gate, + the forget gate and the output gate into a single matrix, + enabling more time-efficient parallelization on the GPU. + Note: RNN dropout must be shared for all gates, + resulting in a slightly reduced regularization. + input_dim: dimensionality of the input (integer). + This argument (or alternatively, the keyword argument `input_shape`) + is required when using this layer as the first layer in a model. + input_length: Length of input sequences, to be specified + when it is constant. + This argument is required if you are going to connect + `Flatten` then `Dense` layers upstream + (without it, the shape of the dense outputs cannot be computed). + Note that if the recurrent layer is not the first layer + in your model, you would need to specify the input length + at the level of the first layer + (e.g. via the `input_shape` argument) + + Input shape: + 3D tensor with shape `(batch_size, timesteps, input_dim)`, + (Optional) 2D tensors with shape `(batch_size, output_dim)`. + + Output shape: + - if `return_state`: a list of tensors. The first tensor is + the output. The remaining tensors are the last states, + each with shape `(batch_size, units)`. + - if `return_sequences`: 3D tensor with shape + `(batch_size, timesteps, units)`. + - else, 2D tensor with shape `(batch_size, units)`. + + # Masking + This layer supports masking for input data with a variable number + of timesteps. To introduce masks to your data, + use an `Embedding` layer with the `mask_zero` parameter + set to `True`. + + # Note on using statefulness in RNNs + You can set RNN layers to be 'stateful', which means that the states + computed for the samples in one batch will be reused as initial states + for the samples in the next batch. This assumes a one-to-one mapping + between samples in different successive batches. + + To enable statefulness: + - specify `stateful=True` in the layer constructor. + - specify a fixed batch size for your model, by passing + if sequential model: + `batch_input_shape=(...)` to the first layer in your model. + else for functional model with 1 or more Input layers: + `batch_shape=(...)` to all the first layers in your model. + This is the expected shape of your inputs + *including the batch size*. + It should be a tuple of integers, e.g. `(32, 10, 100)`. + - specify `shuffle=False` when calling fit(). + + To reset the states of your model, call `.reset_states()` on either + a specific layer, or on your entire model. + + # Note on specifying the initial state of RNNs + You can specify the initial state of RNN layers symbolically by + calling them with the keyword argument `initial_state`. The value of + `initial_state` should be a tensor or list of tensors representing + the initial state of the RNN layer. + + You can specify the initial state of RNN layers numerically by + calling `reset_states` with the keyword argument `states`. The value of + `states` should be a numpy array or list of numpy arrays representing + the initial state of the RNN layer. + """ + + def __init__(self, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + unroll=False, + implementation=0, + **kwargs): + super(Recurrent, self).__init__(**kwargs) + self.return_sequences = return_sequences + self.return_state = return_state + self.go_backwards = go_backwards + self.stateful = stateful + self.unroll = unroll + self.implementation = implementation + self.supports_masking = True + self.input_spec = [InputSpec(ndim=3)] + self.state_spec = None + self.dropout = 0 + self.recurrent_dropout = 0 + + def _compute_output_shape(self, input_shape): + if isinstance(input_shape, list): + input_shape = input_shape[0] + input_shape = tensor_shape.TensorShape(input_shape).as_list() + if self.return_sequences: + output_shape = (input_shape[0], input_shape[1], self.units) + else: + output_shape = (input_shape[0], self.units) + + if self.return_state: + state_shape = [tensor_shape.TensorShape( + (input_shape[0], self.units)) for _ in self.states] + return [tensor_shape.TensorShape(output_shape)] + state_shape + return tensor_shape.TensorShape(output_shape) + + def compute_mask(self, inputs, mask): + if isinstance(mask, list): + mask = mask[0] + output_mask = mask if self.return_sequences else None + if self.return_state: + state_mask = [None for _ in self.states] + return [output_mask] + state_mask + return output_mask + + def step(self, inputs, states): + raise NotImplementedError + + def get_constants(self, inputs, training=None): + return [] + + def get_initial_state(self, inputs): + # build an all-zero tensor of shape (samples, output_dim) + initial_state = K.zeros_like(inputs) # (samples, timesteps, input_dim) + initial_state = K.sum(initial_state, axis=(1, 2)) # (samples,) + initial_state = K.expand_dims(initial_state) # (samples, 1) + initial_state = K.tile(initial_state, [1, + self.units]) # (samples, output_dim) + initial_state = [initial_state for _ in range(len(self.states))] + return initial_state + + def preprocess_input(self, inputs, training=None): + return inputs + + def __call__(self, inputs, initial_state=None, **kwargs): + if (isinstance(inputs, (list, tuple)) and + len(inputs) > 1 + and initial_state is None): + initial_state = inputs[1:] + inputs = inputs[0] + + # If `initial_state` is specified, + # and if it a Keras tensor, + # then add it to the inputs and temporarily + # modify the input spec to include the state. + if initial_state is None: + return super(Recurrent, self).__call__(inputs, **kwargs) + + if not isinstance(initial_state, (list, tuple)): + initial_state = [initial_state] + + is_keras_tensor = hasattr(initial_state[0], '_keras_history') + for tensor in initial_state: + if hasattr(tensor, '_keras_history') != is_keras_tensor: + raise ValueError('The initial state of an RNN layer cannot be' + ' specified with a mix of Keras tensors and' + ' non-Keras tensors') + + if is_keras_tensor: + # Compute the full input spec, including state + input_spec = self.input_spec + state_spec = self.state_spec + if not isinstance(input_spec, list): + input_spec = [input_spec] + if not isinstance(state_spec, list): + state_spec = [state_spec] + self.input_spec = input_spec + state_spec + + # Compute the full inputs, including state + inputs = [inputs] + list(initial_state) + + # Perform the call + output = super(Recurrent, self).__call__(inputs, **kwargs) + + # Restore original input spec + self.input_spec = input_spec + return output + else: + kwargs['initial_state'] = initial_state + return super(Recurrent, self).__call__(inputs, **kwargs) + + def call(self, inputs, mask=None, training=None, initial_state=None): + # input shape: `(samples, time (padded with zeros), input_dim)` + # note that the .build() method of subclasses MUST define + # self.input_spec and self.state_spec with complete input shapes. + if isinstance(inputs, list): + initial_state = inputs[1:] + inputs = inputs[0] + elif initial_state is not None: + pass + elif self.stateful: + initial_state = self.states + else: + initial_state = self.get_initial_state(inputs) + + if isinstance(mask, list): + mask = mask[0] + + if len(initial_state) != len(self.states): + raise ValueError('Layer has ' + str(len(self.states)) + + ' states but was passed ' + str(len(initial_state)) + + ' initial states.') + input_shape = K.int_shape(inputs) + if self.unroll and input_shape[1] is None: + raise ValueError('Cannot unroll a RNN if the ' + 'time dimension is undefined. \n' + '- If using a Sequential model, ' + 'specify the time dimension by passing ' + 'an `input_shape` or `batch_input_shape` ' + 'argument to your first layer. If your ' + 'first layer is an Embedding, you can ' + 'also use the `input_length` argument.\n' + '- If using the functional API, specify ' + 'the time dimension by passing a `shape` ' + 'or `batch_shape` argument to your Input layer.') + constants = self.get_constants(inputs, training=None) + preprocessed_input = self.preprocess_input(inputs, training=None) + last_output, outputs, states = K.rnn( + self.step, + preprocessed_input, + initial_state, + go_backwards=self.go_backwards, + mask=mask, + constants=constants, + unroll=self.unroll) + if self.stateful: + updates = [] + for i in range(len(states)): + updates.append((self.states[i], states[i])) + self.add_update(updates, inputs) + + # Properly set learning phase + if 0 < self.dropout + self.recurrent_dropout: + last_output._uses_learning_phase = True + outputs._uses_learning_phase = True + + if not self.return_sequences: + outputs = last_output + + if self.return_state: + if not isinstance(states, (list, tuple)): + states = [states] + else: + states = list(states) + return [outputs] + states + return outputs + + def reset_states(self, states=None): + if not self.stateful: + raise AttributeError('Layer must be stateful.') + batch_size = self.input_spec[0].shape[0] + if not batch_size: + raise ValueError('If a RNN is stateful, it needs to know ' + 'its batch size. Specify the batch size ' + 'of your input tensors: \n' + '- If using a Sequential model, ' + 'specify the batch size by passing ' + 'a `batch_input_shape` ' + 'argument to your first layer.\n' + '- If using the functional API, specify ' + 'the time dimension by passing a ' + '`batch_shape` argument to your Input layer.') + # initialize state if None + if self.states[0] is None: + self.states = [K.zeros((batch_size, self.units)) for _ in self.states] + elif states is None: + for state in self.states: + K.set_value(state, np.zeros((batch_size, self.units))) + else: + if not isinstance(states, (list, tuple)): + states = [states] + if len(states) != len(self.states): + raise ValueError('Layer ' + self.name + ' expects ' + + str(len(self.states)) + ' states, ' + 'but it received ' + str(len(states)) + + ' state values. Input received: ' + str(states)) + for index, (value, state) in enumerate(zip(states, self.states)): + if value.shape != (batch_size, self.units): + raise ValueError('State ' + str(index) + + ' is incompatible with layer ' + self.name + + ': expected shape=' + str((batch_size, self.units)) + + ', found shape=' + str(value.shape)) + K.set_value(state, value) + + def get_config(self): + config = { + 'return_sequences': self.return_sequences, + 'return_state': self.return_state, + 'go_backwards': self.go_backwards, + 'stateful': self.stateful, + 'unroll': self.unroll, + 'implementation': self.implementation + } + base_config = super(Recurrent, self).get_config() return dict(list(base_config.items()) + list(config.items())) diff --git a/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py b/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py new file mode 100644 index 0000000000..b1f89a30bb --- /dev/null +++ b/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py @@ -0,0 +1,378 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for recurrent layers functionality other than GRU, LSTM, SimpleRNN. + +See also: lstm_test.py, gru_test.py, simplernn_test.py. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.keras._impl import keras +from tensorflow.python.platform import test + + +class RNNTest(test.TestCase): + + def test_minimal_rnn_cell_non_layer(self): + + class MinimalRNNCell(object): + + def __init__(self, units, input_dim): + self.units = units + self.state_size = units + self.kernel = keras.backend.variable( + np.random.random((input_dim, units))) + + def call(self, inputs, states): + prev_output = states[0] + output = keras.backend.dot(inputs, self.kernel) + prev_output + return output, [output] + + with self.test_session(): + # Basic test case. + cell = MinimalRNNCell(32, 5) + x = keras.Input((None, 5)) + layer = keras.layers.RNN(cell) + y = layer(x) + model = keras.models.Model(x, y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + # Test stacking. + cells = [MinimalRNNCell(8, 5), + MinimalRNNCell(32, 8), + MinimalRNNCell(32, 32)] + layer = keras.layers.RNN(cells) + y = layer(x) + model = keras.models.Model(x, y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + def test_minimal_rnn_cell_non_layer_multiple_states(self): + + class MinimalRNNCell(object): + + def __init__(self, units, input_dim): + self.units = units + self.state_size = (units, units) + self.kernel = keras.backend.variable( + np.random.random((input_dim, units))) + + def call(self, inputs, states): + prev_output_1 = states[0] + prev_output_2 = states[1] + output = keras.backend.dot(inputs, self.kernel) + output += prev_output_1 + output -= prev_output_2 + return output, [output * 2, output * 3] + + with self.test_session(): + # Basic test case. + cell = MinimalRNNCell(32, 5) + x = keras.Input((None, 5)) + layer = keras.layers.RNN(cell) + y = layer(x) + model = keras.models.Model(x, y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + # Test stacking. + cells = [MinimalRNNCell(8, 5), + MinimalRNNCell(16, 8), + MinimalRNNCell(32, 16)] + layer = keras.layers.RNN(cells) + assert layer.cell.state_size == (32, 32, 16, 16, 8, 8) + y = layer(x) + model = keras.models.Model(x, y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + def test_minimal_rnn_cell_layer(self): + + class MinimalRNNCell(keras.layers.Layer): + + def __init__(self, units, **kwargs): + self.units = units + self.state_size = units + super(MinimalRNNCell, self).__init__(**kwargs) + + def build(self, input_shape): + self.kernel = self.add_weight(shape=(input_shape[-1], self.units), + initializer='uniform', + name='kernel') + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + initializer='uniform', + name='recurrent_kernel') + self.built = True + + def call(self, inputs, states): + prev_output = states[0] + h = keras.backend.dot(inputs, self.kernel) + output = h + keras.backend.dot(prev_output, self.recurrent_kernel) + return output, [output] + + def get_config(self): + config = {'units': self.units} + base_config = super(MinimalRNNCell, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + with self.test_session(): + # Test basic case. + x = keras.Input((None, 5)) + cell = MinimalRNNCell(32) + layer = keras.layers.RNN(cell) + y = layer(x) + model = keras.models.Model(x, y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + # Test basic case serialization. + x_np = np.random.random((6, 5, 5)) + y_np = model.predict(x_np) + weights = model.get_weights() + config = layer.get_config() + with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}): + layer = keras.layers.RNN.from_config(config) + y = layer(x) + model = keras.models.Model(x, y) + model.set_weights(weights) + y_np_2 = model.predict(x_np) + self.assertAllClose(y_np, y_np_2, atol=1e-4) + + # Test stacking. + cells = [MinimalRNNCell(8), + MinimalRNNCell(12), + MinimalRNNCell(32)] + layer = keras.layers.RNN(cells) + y = layer(x) + model = keras.models.Model(x, y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + # Test stacked RNN serialization. + x_np = np.random.random((6, 5, 5)) + y_np = model.predict(x_np) + weights = model.get_weights() + config = layer.get_config() + with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}): + layer = keras.layers.RNN.from_config(config) + y = layer(x) + model = keras.models.Model(x, y) + model.set_weights(weights) + y_np_2 = model.predict(x_np) + self.assertAllClose(y_np, y_np_2, atol=1e-4) + + def test_rnn_cell_with_constants_layer(self): + + class RNNCellWithConstants(keras.layers.Layer): + + def __init__(self, units, **kwargs): + self.units = units + self.state_size = units + super(RNNCellWithConstants, self).__init__(**kwargs) + + def build(self, input_shape): + if not isinstance(input_shape, list): + raise TypeError('expects constants shape') + [input_shape, constant_shape] = input_shape + # will (and should) raise if more than one constant passed + + self.input_kernel = self.add_weight( + shape=(input_shape[-1], self.units), + initializer='uniform', + name='kernel') + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + initializer='uniform', + name='recurrent_kernel') + self.constant_kernel = self.add_weight( + shape=(constant_shape[-1], self.units), + initializer='uniform', + name='constant_kernel') + self.built = True + + def call(self, inputs, states, constants): + [prev_output] = states + [constant] = constants + h_input = keras.backend.dot(inputs, self.input_kernel) + h_state = keras.backend.dot(prev_output, self.recurrent_kernel) + h_const = keras.backend.dot(constant, self.constant_kernel) + output = h_input + h_state + h_const + return output, [output] + + def get_config(self): + config = {'units': self.units} + base_config = super(RNNCellWithConstants, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + with self.test_session(): + # Test basic case. + x = keras.Input((None, 5)) + c = keras.Input((3,)) + cell = RNNCellWithConstants(32) + layer = keras.layers.RNN(cell) + y = layer(x, constants=c) + model = keras.models.Model([x, c], y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch( + [np.zeros((6, 5, 5)), np.zeros((6, 3))], + np.zeros((6, 32)) + ) + + with self.test_session(): + # Test basic case serialization. + x_np = np.random.random((6, 5, 5)) + c_np = np.random.random((6, 3)) + y_np = model.predict([x_np, c_np]) + weights = model.get_weights() + config = layer.get_config() + custom_objects = {'RNNCellWithConstants': RNNCellWithConstants} + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.RNN.from_config(config.copy()) + y = layer(x, constants=c) + model = keras.models.Model([x, c], y) + model.set_weights(weights) + y_np_2 = model.predict([x_np, c_np]) + self.assertAllClose(y_np, y_np_2, atol=1e-4) + + with self.test_session(): + # test flat list inputs + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.RNN.from_config(config.copy()) + y = layer([x, c]) + model = keras.models.Model([x, c], y) + model.set_weights(weights) + y_np_3 = model.predict([x_np, c_np]) + self.assertAllClose(y_np, y_np_3, atol=1e-4) + + def test_rnn_cell_with_constants_layer_passing_initial_state(self): + + class RNNCellWithConstants(keras.layers.Layer): + + def __init__(self, units, **kwargs): + self.units = units + self.state_size = units + super(RNNCellWithConstants, self).__init__(**kwargs) + + def build(self, input_shape): + if not isinstance(input_shape, list): + raise TypeError('expects constants shape') + [input_shape, constant_shape] = input_shape + # will (and should) raise if more than one constant passed + + self.input_kernel = self.add_weight( + shape=(input_shape[-1], self.units), + initializer='uniform', + name='kernel') + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + initializer='uniform', + name='recurrent_kernel') + self.constant_kernel = self.add_weight( + shape=(constant_shape[-1], self.units), + initializer='uniform', + name='constant_kernel') + self.built = True + + def call(self, inputs, states, constants): + [prev_output] = states + [constant] = constants + h_input = keras.backend.dot(inputs, self.input_kernel) + h_state = keras.backend.dot(prev_output, self.recurrent_kernel) + h_const = keras.backend.dot(constant, self.constant_kernel) + output = h_input + h_state + h_const + return output, [output] + + def get_config(self): + config = {'units': self.units} + base_config = super(RNNCellWithConstants, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + with self.test_session(): + # Test basic case. + x = keras.Input((None, 5)) + c = keras.Input((3,)) + s = keras.Input((32,)) + cell = RNNCellWithConstants(32) + layer = keras.layers.RNN(cell) + y = layer(x, initial_state=s, constants=c) + model = keras.models.Model([x, s, c], y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch( + [np.zeros((6, 5, 5)), np.zeros((6, 32)), np.zeros((6, 3))], + np.zeros((6, 32)) + ) + + with self.test_session(): + # Test basic case serialization. + x_np = np.random.random((6, 5, 5)) + s_np = np.random.random((6, 32)) + c_np = np.random.random((6, 3)) + y_np = model.predict([x_np, s_np, c_np]) + weights = model.get_weights() + config = layer.get_config() + custom_objects = {'RNNCellWithConstants': RNNCellWithConstants} + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.RNN.from_config(config.copy()) + y = layer(x, initial_state=s, constants=c) + model = keras.models.Model([x, s, c], y) + model.set_weights(weights) + y_np_2 = model.predict([x_np, s_np, c_np]) + self.assertAllClose(y_np, y_np_2, atol=1e-4) + + # verify that state is used + y_np_2_different_s = model.predict([x_np, s_np + 10., c_np]) + with self.assertRaises(AssertionError): + self.assertAllClose(y_np, y_np_2_different_s, atol=1e-4) + + with self.test_session(): + # test flat list inputs + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.RNN.from_config(config.copy()) + y = layer([x, s, c]) + model = keras.models.Model([x, s, c], y) + model.set_weights(weights) + y_np_3 = model.predict([x_np, s_np, c_np]) + self.assertAllClose(y_np, y_np_3, atol=1e-4) + + def test_stacked_rnn_attributes(self): + cells = [keras.layers.LSTMCell(3), + keras.layers.LSTMCell(3, kernel_regularizer='l2')] + layer = keras.layers.RNN(cells) + layer.build((None, None, 5)) + + # Test regularization losses + assert len(layer.losses) == 1 + + # Test weights + assert len(layer.trainable_weights) == 6 + cells[0].trainable = False + assert len(layer.trainable_weights) == 3 + assert len(layer.non_trainable_weights) == 3 + + # Test `get_losses_for` + x = keras.Input((None, 5)) + y = keras.backend.sum(x) + cells[0].add_loss(y, inputs=x) + assert layer.get_losses_for(x) == [y] + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/python/keras/_impl/keras/layers/simplernn_test.py b/tensorflow/python/keras/_impl/keras/layers/simplernn_test.py index 9833485236..7edebdacd0 100644 --- a/tensorflow/python/keras/_impl/keras/layers/simplernn_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/simplernn_test.py @@ -156,8 +156,10 @@ class SimpleRNNLayerTest(test.TestCase): activity_regularizer='l1') layer.build((None, None, 2)) self.assertEqual(len(layer.losses), 3) - layer(keras.backend.variable(np.ones((2, 3, 2)))) - self.assertEqual(len(layer.losses), 4) + + x = keras.backend.variable(np.ones((2, 3, 2))) + layer(x) + self.assertEqual(len(layer.get_losses_for(x)), 1) def test_constraints_SimpleRNN(self): embedding_dim = 4 @@ -175,9 +177,9 @@ class SimpleRNNLayerTest(test.TestCase): recurrent_constraint=r_constraint, bias_constraint=b_constraint) layer.build((None, None, embedding_dim)) - self.assertEqual(layer.kernel.constraint, k_constraint) - self.assertEqual(layer.recurrent_kernel.constraint, r_constraint) - self.assertEqual(layer.bias.constraint, b_constraint) + self.assertEqual(layer.cell.kernel.constraint, k_constraint) + self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint) + self.assertEqual(layer.cell.bias.constraint, b_constraint) def test_with_masking_layer_SimpleRNN(self): layer_class = keras.layers.SimpleRNN diff --git a/tensorflow/python/keras/layers/__init__.py b/tensorflow/python/keras/layers/__init__.py index acf0a5e179..b94bf8f0f6 100644 --- a/tensorflow/python/keras/layers/__init__.py +++ b/tensorflow/python/keras/layers/__init__.py @@ -134,6 +134,11 @@ from tensorflow.python.keras._impl.keras.layers.pooling import GlobalMaxPool2D from tensorflow.python.keras._impl.keras.layers.pooling import GlobalMaxPool3D # Recurrent layers. +from tensorflow.python.keras._impl.keras.layers.recurrent import RNN +from tensorflow.python.keras._impl.keras.layers.recurrent import StackedRNNCells +from tensorflow.python.keras._impl.keras.layers.recurrent import SimpleRNNCell +from tensorflow.python.keras._impl.keras.layers.recurrent import GRUCell +from tensorflow.python.keras._impl.keras.layers.recurrent import LSTMCell from tensorflow.python.keras._impl.keras.layers.recurrent import SimpleRNN from tensorflow.python.keras._impl.keras.layers.recurrent import GRU from tensorflow.python.keras._impl.keras.layers.recurrent import LSTM diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 8c8d774b75..c71e8382e9 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -642,7 +642,7 @@ class Layer(object): for output in output_list: with ops.name_scope('ActivityRegularizer'): activity_regularization = self._activity_regularizer(output) - self.add_loss(activity_regularization) + self.add_loss(activity_regularization, inputs=inputs) if not in_deferred_mode: # TODO(fchollet): consider how masking will work with deferred mode. diff --git a/tensorflow/python/layers/base_test.py b/tensorflow/python/layers/base_test.py index 71eff2f965..7ddfe37827 100644 --- a/tensorflow/python/layers/base_test.py +++ b/tensorflow/python/layers/base_test.py @@ -574,6 +574,13 @@ class BaseLayerTest(test.TestCase): self.assertEqual(3, result['label'].numpy()) self.assertEqual(4.0, result['logits'].numpy()) + def testActivityRegularizer(self): + regularizer = math_ops.reduce_sum + layer = base_layers.Layer(activity_regularizer=regularizer) + x = array_ops.placeholder('int32') + layer.apply(x) + self.assertEqual(len(layer.get_losses_for(x)), 1) + class NetworkTest(test.TestCase): diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt new file mode 100644 index 0000000000..763184899c --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt @@ -0,0 +1,179 @@ +path: "tensorflow.keras.layers.GRUCell" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "scope_name" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\'], " + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_variable" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + } + member_method { + name: "apply" + argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "build" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\', \'states\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_losses_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_updates_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt index 9237399254..889f2cbc23 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt @@ -1,14 +1,34 @@ path: "tensorflow.keras.layers.GRU" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" + member { + name: "activation" + mtype: "" + } member { name: "activity_regularizer" mtype: "" } + member { + name: "bias_constraint" + mtype: "" + } + member { + name: "bias_initializer" + mtype: "" + } + member { + name: "bias_regularizer" + mtype: "" + } + member { + name: "dropout" + mtype: "" + } member { name: "dtype" mtype: "" @@ -17,6 +37,10 @@ tf_class { name: "graph" mtype: "" } + member { + name: "implementation" + mtype: "" + } member { name: "inbound_nodes" mtype: "" @@ -33,6 +57,18 @@ tf_class { name: "input_shape" mtype: "" } + member { + name: "kernel_constraint" + mtype: "" + } + member { + name: "kernel_initializer" + mtype: "" + } + member { + name: "kernel_regularizer" + mtype: "" + } member { name: "losses" mtype: "" @@ -65,10 +101,34 @@ tf_class { name: "output_shape" mtype: "" } + member { + name: "recurrent_activation" + mtype: "" + } + member { + name: "recurrent_constraint" + mtype: "" + } + member { + name: "recurrent_dropout" + mtype: "" + } + member { + name: "recurrent_initializer" + mtype: "" + } + member { + name: "recurrent_regularizer" + mtype: "" + } member { name: "scope_name" mtype: "" } + member { + name: "states" + mtype: "" + } member { name: "trainable_variables" mtype: "" @@ -77,10 +137,18 @@ tf_class { name: "trainable_weights" mtype: "" } + member { + name: "units" + mtype: "" + } member { name: "updates" mtype: "" } + member { + name: "use_bias" + mtype: "" + } member { name: "variables" mtype: "" @@ -91,7 +159,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\'], " + argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\', \'False\', \'False\', \'False\', \'False\', \'False\'], " } member_method { name: "add_loss" @@ -137,10 +205,6 @@ tf_class { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_constants" - argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " - } member_method { name: "get_initial_state" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -159,7 +223,7 @@ tf_class { } member_method { name: "get_losses_for" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_output_at" @@ -181,10 +245,6 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "preprocess_input" - argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " - } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -193,8 +253,4 @@ tf_class { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "step" - argspec: "args=[\'self\', \'inputs\', \'states\'], varargs=None, keywords=None, defaults=None" - } } diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt new file mode 100644 index 0000000000..4ce7c34f6c --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt @@ -0,0 +1,179 @@ +path: "tensorflow.keras.layers.LSTMCell" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "scope_name" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'unit_forget_bias\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\'], " + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_variable" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + } + member_method { + name: "apply" + argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "build" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\', \'states\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_losses_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_updates_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt index 20935e2f99..e1a1d0d58e 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt @@ -1,14 +1,34 @@ path: "tensorflow.keras.layers.LSTM" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" + member { + name: "activation" + mtype: "" + } member { name: "activity_regularizer" mtype: "" } + member { + name: "bias_constraint" + mtype: "" + } + member { + name: "bias_initializer" + mtype: "" + } + member { + name: "bias_regularizer" + mtype: "" + } + member { + name: "dropout" + mtype: "" + } member { name: "dtype" mtype: "" @@ -17,6 +37,10 @@ tf_class { name: "graph" mtype: "" } + member { + name: "implementation" + mtype: "" + } member { name: "inbound_nodes" mtype: "" @@ -33,6 +57,18 @@ tf_class { name: "input_shape" mtype: "" } + member { + name: "kernel_constraint" + mtype: "" + } + member { + name: "kernel_initializer" + mtype: "" + } + member { + name: "kernel_regularizer" + mtype: "" + } member { name: "losses" mtype: "" @@ -65,10 +101,34 @@ tf_class { name: "output_shape" mtype: "" } + member { + name: "recurrent_activation" + mtype: "" + } + member { + name: "recurrent_constraint" + mtype: "" + } + member { + name: "recurrent_dropout" + mtype: "" + } + member { + name: "recurrent_initializer" + mtype: "" + } + member { + name: "recurrent_regularizer" + mtype: "" + } member { name: "scope_name" mtype: "" } + member { + name: "states" + mtype: "" + } member { name: "trainable_variables" mtype: "" @@ -77,10 +137,22 @@ tf_class { name: "trainable_weights" mtype: "" } + member { + name: "unit_forget_bias" + mtype: "" + } + member { + name: "units" + mtype: "" + } member { name: "updates" mtype: "" } + member { + name: "use_bias" + mtype: "" + } member { name: "variables" mtype: "" @@ -91,7 +163,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'unit_forget_bias\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\'], " + argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'unit_forget_bias\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\', \'False\', \'False\', \'False\', \'False\', \'False\'], " } member_method { name: "add_loss" @@ -137,10 +209,6 @@ tf_class { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_constants" - argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " - } member_method { name: "get_initial_state" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -159,7 +227,7 @@ tf_class { } member_method { name: "get_losses_for" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_output_at" @@ -181,10 +249,6 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "preprocess_input" - argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " - } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -193,8 +257,4 @@ tf_class { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "step" - argspec: "args=[\'self\', \'inputs\', \'states\'], varargs=None, keywords=None, defaults=None" - } } diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt new file mode 100644 index 0000000000..c7c9b10f22 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt @@ -0,0 +1,191 @@ +path: "tensorflow.keras.layers.RNN" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "scope_name" + mtype: "" + } + member { + name: "states" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'cell\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\', \'activity_regularizer\'], varargs=None, keywords=kwargs, defaults=[\'False\', \'False\', \'False\', \'False\', \'False\', \'None\'], " + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_variable" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + } + member_method { + name: "apply" + argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "build" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\', \'constants\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_initial_state" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_losses_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_updates_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "reset_states" + argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt new file mode 100644 index 0000000000..10c7f8867c --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt @@ -0,0 +1,179 @@ +path: "tensorflow.keras.layers.SimpleRNNCell" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "scope_name" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'units\', \'activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\'], " + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_variable" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + } + member_method { + name: "apply" + argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "build" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\', \'states\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_losses_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_updates_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt index f4148fcc23..588df21088 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt @@ -1,14 +1,34 @@ path: "tensorflow.keras.layers.SimpleRNN" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" + member { + name: "activation" + mtype: "" + } member { name: "activity_regularizer" mtype: "" } + member { + name: "bias_constraint" + mtype: "" + } + member { + name: "bias_initializer" + mtype: "" + } + member { + name: "bias_regularizer" + mtype: "" + } + member { + name: "dropout" + mtype: "" + } member { name: "dtype" mtype: "" @@ -33,6 +53,18 @@ tf_class { name: "input_shape" mtype: "" } + member { + name: "kernel_constraint" + mtype: "" + } + member { + name: "kernel_initializer" + mtype: "" + } + member { + name: "kernel_regularizer" + mtype: "" + } member { name: "losses" mtype: "" @@ -65,10 +97,30 @@ tf_class { name: "output_shape" mtype: "" } + member { + name: "recurrent_constraint" + mtype: "" + } + member { + name: "recurrent_dropout" + mtype: "" + } + member { + name: "recurrent_initializer" + mtype: "" + } + member { + name: "recurrent_regularizer" + mtype: "" + } member { name: "scope_name" mtype: "" } + member { + name: "states" + mtype: "" + } member { name: "trainable_variables" mtype: "" @@ -77,10 +129,18 @@ tf_class { name: "trainable_weights" mtype: "" } + member { + name: "units" + mtype: "" + } member { name: "updates" mtype: "" } + member { + name: "use_bias" + mtype: "" + } member { name: "variables" mtype: "" @@ -91,7 +151,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'units\', \'activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\'], " + argspec: "args=[\'self\', \'units\', \'activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'False\', \'False\', \'False\', \'False\', \'False\'], " } member_method { name: "add_loss" @@ -137,10 +197,6 @@ tf_class { name: "get_config" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_constants" - argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " - } member_method { name: "get_initial_state" argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" @@ -159,7 +215,7 @@ tf_class { } member_method { name: "get_losses_for" - argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get_output_at" @@ -181,10 +237,6 @@ tf_class { name: "get_weights" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "preprocess_input" - argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " - } member_method { name: "reset_states" argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -193,8 +245,4 @@ tf_class { name: "set_weights" argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "step" - argspec: "args=[\'self\', \'inputs\', \'states\'], varargs=None, keywords=None, defaults=None" - } } diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt new file mode 100644 index 0000000000..5779e41342 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt @@ -0,0 +1,183 @@ +path: "tensorflow.keras.layers.StackedRNNCells" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "activity_regularizer" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph" + mtype: "" + } + member { + name: "inbound_nodes" + mtype: "" + } + member { + name: "input" + mtype: "" + } + member { + name: "input_mask" + mtype: "" + } + member { + name: "input_shape" + mtype: "" + } + member { + name: "losses" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "non_trainable_variables" + mtype: "" + } + member { + name: "non_trainable_weights" + mtype: "" + } + member { + name: "outbound_nodes" + mtype: "" + } + member { + name: "output" + mtype: "" + } + member { + name: "output_mask" + mtype: "" + } + member { + name: "output_shape" + mtype: "" + } + member { + name: "scope_name" + mtype: "" + } + member { + name: "state_size" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "trainable_weights" + mtype: "" + } + member { + name: "updates" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member { + name: "weights" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'cells\'], varargs=None, keywords=kwargs, defaults=None" + } + member_method { + name: "add_loss" + argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_update" + argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "add_variable" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + } + member_method { + name: "add_weight" + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " + } + member_method { + name: "apply" + argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "build" + argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "call" + argspec: "args=[\'self\', \'inputs\', \'states\'], varargs=None, keywords=kwargs, defaults=None" + } + member_method { + name: "compute_mask" + argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "count_params" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_input_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_losses_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "get_output_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_mask_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_output_shape_at" + argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_updates_for" + argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_weights" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "set_weights" + argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.pbtxt index 8466c3e039..fe336c4be5 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.pbtxt @@ -140,6 +140,10 @@ tf_module { name: "GRU" mtype: "" } + member { + name: "GRUCell" + mtype: "" + } member { name: "GaussianDropout" mtype: "" @@ -208,6 +212,10 @@ tf_module { name: "LSTM" mtype: "" } + member { + name: "LSTMCell" + mtype: "" + } member { name: "Lambda" mtype: "" @@ -272,6 +280,10 @@ tf_module { name: "Permute" mtype: "" } + member { + name: "RNN" + mtype: "" + } member { name: "RepeatVector" mtype: "" @@ -292,6 +304,10 @@ tf_module { name: "SimpleRNN" mtype: "" } + member { + name: "SimpleRNNCell" + mtype: "" + } member { name: "SpatialDropout1D" mtype: "" @@ -304,6 +320,10 @@ tf_module { name: "SpatialDropout3D" mtype: "" } + member { + name: "StackedRNNCells" + mtype: "" + } member { name: "ThresholdedReLU" mtype: "" diff --git a/tensorflow/tools/ci_build/ci_sanity.sh b/tensorflow/tools/ci_build/ci_sanity.sh index f1c207f9b6..8d4e4c23dc 100755 --- a/tensorflow/tools/ci_build/ci_sanity.sh +++ b/tensorflow/tools/ci_build/ci_sanity.sh @@ -98,7 +98,8 @@ do_pylint() { "^tensorflow/contrib/eager/python/evaluator\.py.*\[E0202.*method-hidden "\ "^tensorflow/contrib/eager/python/metrics_impl\.py.*\[E0202.*method-hidden "\ "^tensorflow/python/platform/gfile\.py.*\[E0301.*non-iterator "\ -"^tensorflow/python/keras/_impl/keras/callbacks\.py.*\[E1133.*not-an-iterable" +"^tensorflow/python/keras/_impl/keras/callbacks\.py.*\[E1133.*not-an-iterable "\ +"^tensorflow/python/keras/_impl/keras/layers/recurrent\.py.*\[E0203.*access-member-before-definition" echo "ERROR_WHITELIST=\"${ERROR_WHITELIST}\"" -- GitLab From a1f30a7846ada3bd8c72f719c96b923bfb427ac5 Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Wed, 8 Nov 2017 15:19:12 -0800 Subject: [PATCH 0203/1801] Improve usability of TPUEstimator. 1) Log how many batches to enqueue. The old message is very confusing. 2) If input_pipeline has queue runner, generate a logging (legacy mode) or error out (new mode) 3) If input pipeline has summaries, generate a logging (legacy mode) or error out (new mode) PiperOrigin-RevId: 175073856 --- .../contrib/tpu/python/tpu/tpu_estimator.py | 35 +++++++++++++++++-- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 5a3b831429..16d712af9e 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -535,13 +535,15 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook): session, self._dequeue_ops) def before_run(self, run_context): - logging.info('Enqueue next batch of data to infeed.') - iterations = run_context.session.run(self._iterations_per_loop_var) + + logging.info('Enqueue next (%d) batch(es) of data to infeed.', iterations) + self._infeed_thd_controller.send_next_batch_signal(iterations) if self._dequeue_ops is not None: # TODO(xiejw): Refactor the outfeed dequeue into tf.while_loop. - logging.info('Dequeue next batch of data from outfeed.') + logging.info( + 'Dequeue next (%d) batch(es) of data from outfeed.', iterations) self._outfeed_thd_controller.send_next_batch_signal(iterations) def end(self, session): @@ -842,6 +844,8 @@ class _InputPipeline(object): # structure is recorded. enqueue_ops = self._invoke_input_fn_and_record_structure() + self._validate_input_pipeline() + def dequeue_fn(): """dequeue_fn is used by TPU to retrieve the tensors.""" values = self._infeed_queue.generate_dequeue_op() @@ -920,6 +924,31 @@ class _InputPipeline(object): else: return enqueue_fn() + def _validate_input_pipeline(self): + # Perform some sanity checks to log user friendly information. We should + # error out to give users better error message. But, if + # _WRAP_INPUT_FN_INTO_WHILE_LOOP is False (legacy behavior), we cannot break + # user code, so, log a warning. + if ops.get_default_graph().get_collection(ops.GraphKeys.QUEUE_RUNNERS): + err_msg = ('Input pipeline contains one or more QueueRunners. ' + 'These are not supported via TPUEstimator. You must convert ' + 'your input pipeline to use `tf.data` instead (see ' + 'https://www.tensorflow.org/programmers_guide/datasets for ' + 'instructions.') + if _WRAP_INPUT_FN_INTO_WHILE_LOOP: + raise RuntimeError(err_msg) + else: + logging.warn(err_msg) + elif ops.get_default_graph().get_collection(ops.GraphKeys.SUMMARIES): + # Queue Runner has summary Ops by default. So here we use elif to do + # necessary checks for Dataset input pipeline only. + err_msg = ('Input pipeline contains `tf.summary` operations. ' + 'These are not currently supported.') + if _WRAP_INPUT_FN_INTO_WHILE_LOOP: + raise RuntimeError(err_msg) + else: + logging.warn(err_msg) + class _ModelFnWrapper(object): """A `model_fn` wrapper. -- GitLab From 6f7cf68cb0cf0728ed3f030ade20c439ceadccdf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 15:24:01 -0800 Subject: [PATCH 0204/1801] Hlo parser: support window and convolution. Also, to make the text format easier to write and unambiguous: - Print "window={}" around the window attribute; rename the "window" sub attribute to "size"; - Print the dim_lables in logical order, instead of physical order. PiperOrigin-RevId: 175074526 --- .../compiler/xla/service/hlo_instruction.cc | 10 +- .../compiler/xla/tools/parser/README.md | 16 +- .../compiler/xla/tools/parser/hlo_lexer.cc | 65 +- .../compiler/xla/tools/parser/hlo_lexer.h | 6 +- .../compiler/xla/tools/parser/hlo_parser.cc | 589 ++++++++++++++---- .../xla/tools/parser/hlo_parser_test.cc | 120 ++++ .../compiler/xla/tools/parser/hlo_token.h | 3 + tensorflow/compiler/xla/window_util.cc | 28 +- 8 files changed, 691 insertions(+), 146 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 5107ac782d..ee98c3fabc 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1850,7 +1850,7 @@ std::vector HloInstruction::ExtraAttributesToString() const { extra.push_back(StrCat("dimensions={", Join(dimensions(), ","), "}")); } if (window_ != nullptr) { - extra.push_back(window_util::ToString(*window_)); + extra.push_back(StrCat("window={", window_util::ToString(*window_), "}")); } if (padding_config_ != nullptr) { extra.push_back(StrCat("padding=", padding_config_->ShortDebugString())); @@ -2856,13 +2856,7 @@ string HloInstruction::ConvolutionDimensionNumbersToString() const { const auto append_dims = [&](const std::vector& dims, const Shape& shape) { CHECK_EQ(dims.size(), ShapeUtil::Rank(shape)); - for (int64 logical = 0; logical < dims.size(); ++logical) { - int64 physical = logical; - if (!shape.layout().minor_to_major().empty()) { - physical = LayoutUtil::Major(shape.layout(), logical); - } - result += dims[physical]; - } + StrAppend(&result, Join(dims, "")); }; // lhs_dims[i] is the symbol of the logical dimension i for the lhs diff --git a/tensorflow/compiler/xla/tools/parser/README.md b/tensorflow/compiler/xla/tools/parser/README.md index 2c864d77a2..986041caf6 100644 --- a/tensorflow/compiler/xla/tools/parser/README.md +++ b/tensorflow/compiler/xla/tools/parser/README.md @@ -43,14 +43,22 @@ operand : shape name ; -extra_attributes +attributes : /*empty*/ - | ',' extra_attribute - | ',' extra_attribute extra_attributes + | ',' attribute + | ',' attribute attributes ; -extra_attribute +attribute : attribute_name attribute_value ; +attribute_value + : kInt + | kName + | [0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,} /*dim_labels_pattern*/ + | [0-9]+(x[0-9]+)+ /*dxd_pattern*/ + | [0-9]+_[0-9]+(x[0-9]+_[0-9]+)* /*window_pad_pattern*/ + | '{' sub_attributes '}' + ; param_list : '(' param_list1 ')' diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc index d104ff3460..f70386411c 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc @@ -122,7 +122,7 @@ TokKind HloLexer::LexToken() { current_ptr_++; return TokKind::kArrow; } - return LexDigitOrNegative(); + return LexNumberOrPattern(); case '=': return TokKind::kEqual; case ',': @@ -149,12 +149,15 @@ TokKind HloLexer::LexToken() { } } -// Lex a shape, name, keyword, or opcode. +// Lex a shape, name, keyword, opcode, attribute name, or the dim labels +// pattern. +// // shape ::= ([a-zA-Z0-9_]*[0-9]*)\[([0-9,]*)\](?:\s*{([0-9,]*)})? // name ::= [a-zA-Z_][a-zA-Z0-9_.-]*: // keyword ::= HloModule, ENTRY, ... // opcode ::= add, greater-than, ... // attribute_name ::= condition, body, dimensions, ... +// dim_labels_pattern ::= [0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,} TokKind HloLexer::LexIdentifier() { { auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end()); @@ -220,6 +223,16 @@ TokKind HloLexer::LexIdentifier() { return TokKind::kOpcode; } + { + auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end()); + static LazyRE2 dim_labels_pattern = { + R"([0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,})"}; + if (RE2::Consume(&consumable, *dim_labels_pattern)) { + current_ptr_ = consumable.begin(); + str_val_.assign(token_start_, current_ptr_); + return TokKind::kDimLabels; + } + } current_ptr_ = token_start_ + 1; return TokKind::kError; } @@ -240,15 +253,20 @@ TokKind HloLexer::LexPercent() { return TokKind::kError; } -// Lex integer and floating-point values, and -inf. -// int [-]?[0-9]+ -// fp with exp [-]?([0-9]+|[0-9]+[.][0-9]*|[0-9]*[.][0-9]+)([eE][+-]?[0-9]+) -// fp without exp [-]?([0-9]+[.][0-9]*|[0-9]*[.][0-9]+) -// negative inf -inf -TokKind HloLexer::LexDigitOrNegative() { +// Lex integer and floating-point values, -inf, and patterns for dim labels, +// dxd (e.g. 1x2x3), and window pad. +// +// fp with exp ::= [-]?([0-9]+|[0-9]+[.][0-9]*|[0-9]*[.][0-9]+)([eE][+-]?[0-9]+) +// fp without exp ::= [-]?([0-9]+[.][0-9]*|[0-9]*[.][0-9]+) +// dim_labels_pattern ::= [0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,} +// dxd_pattern ::= [0-9]+(x[0-9]+)+ +// window_pad_pattern ::= [0-9]+_[0-9]+(x[0-9]+_[0-9]+)* +// int ::= [-]?[0-9]+ +// negative inf ::= '-inf' +TokKind HloLexer::LexNumberOrPattern() { auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end()); static LazyRE2 float_pattern = { - R"([-]?((\d+|\d+[.]\d*|\d*[.]\d+)([eE][+-]?\d+))|(\d+[.]\d*|\d*[.]\d+))"}; + R"([-]?((\d+|\d+[.]\d*|\d*[.]\d+)([eE][+-]?\d+))|[-]?(\d+[.]\d*|\d*[.]\d+))"}; if (RE2::Consume(&consumable, *float_pattern)) { current_ptr_ = consumable.begin(); tensorflow::strings::safe_strtod(string(token_start_, current_ptr_).c_str(), @@ -256,6 +274,29 @@ TokKind HloLexer::LexDigitOrNegative() { return TokKind::kDecimal; } + static LazyRE2 dim_labels_pattern = { + R"([0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,})"}; + static LazyRE2 dxd_pattern = {R"([0-9]+(x[0-9]+)+)"}; + static LazyRE2 pad_pattern = {R"([0-9]+_[0-9]+(x[0-9]+_[0-9]+)*)"}; + + if (RE2::Consume(&consumable, *dim_labels_pattern)) { + current_ptr_ = consumable.begin(); + str_val_.assign(token_start_, current_ptr_); + return TokKind::kDimLabels; + } + + if (RE2::Consume(&consumable, *dxd_pattern)) { + current_ptr_ = consumable.begin(); + str_val_.assign(token_start_, current_ptr_); + return TokKind::kDxD; + } + + if (RE2::Consume(&consumable, *pad_pattern)) { + current_ptr_ = consumable.begin(); + str_val_.assign(token_start_, current_ptr_); + return TokKind::kWindowPad; + } + static LazyRE2 int_pattern = {R"([-]?\d+)"}; if (RE2::Consume(&consumable, *int_pattern)) { current_ptr_ = consumable.begin(); @@ -350,6 +391,12 @@ string TokKindToString(TokKind kind) { return "kName"; case TokKind::kAttributeName: return "kAttributeName"; + case TokKind::kDimLabels: + return "kDimLabels"; + case TokKind::kDxD: + return "kDxD"; + case TokKind::kWindowPad: + return "kWindowPad"; case TokKind::kShape: return "kShape"; case TokKind::kOpcode: diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h index 3b9efcb92d..74e6829180 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h +++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h @@ -37,11 +37,15 @@ class HloLexer { } TokKind Lex() { return current_kind_ = LexToken(); } + TokKind GetKind() const { return current_kind_; } string GetStrVal() const { switch (GetKind()) { case TokKind::kName: case TokKind::kAttributeName: + case TokKind::kDimLabels: + case TokKind::kDxD: + case TokKind::kWindowPad: return str_val_; default: LOG(FATAL) << "This token does not have string value"; @@ -92,7 +96,7 @@ class HloLexer { TokKind LexPercent(); TokKind LexShape(); TokKind LexConstant(); - TokKind LexDigitOrNegative(); + TokKind LexNumberOrPattern(); TokKind LexComment(); const tensorflow::StringPiece buf_; diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index 6c2e37e3b5..f1e987cb15 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -28,6 +28,9 @@ namespace tools { namespace { using tensorflow::StringPiece; +using tensorflow::gtl::optional; +using tensorflow::str_util::Split; +using tensorflow::str_util::SplitAndParseAsInts; using tensorflow::strings::Printf; using tensorflow::strings::StrAppend; using tensorflow::strings::StrCat; @@ -57,8 +60,6 @@ class HloParser { bool ParseInstructionList(HloComputation::Builder* builder, string* root_name); bool ParseInstruction(HloComputation::Builder* builder, string* root_name); - bool ParseSharding(HloInstruction* instruction); - bool ParseControlPredecessors(HloInstruction* instruction); bool ParseLiteral(std::unique_ptr* literal, const Shape& shape); bool ParseTupleLiteral(std::unique_ptr* literal, const Shape& shape); bool ParseNonTupleLiteral(std::unique_ptr* literal, @@ -78,10 +79,55 @@ class HloParser { bool ParseOperands(std::vector* operands, const int expected_size); - template - bool ParseExtraAttribute(T* value, const string& expected_attribute); - template - bool ParseAttributeValue(T* value); + // Types of attributes. + enum class AttrTy { + kInt64, + kHloComputation, + kWindow, + kConvolutionDimensionNumbers, + kSharding, + kInstructionList, + }; + + struct AttrConfig { + bool required; // whether it's required or optional + AttrTy attr_type; // what type it is + void* result; // where to store the parsed result. + }; + + // Parses attributes given names and configs of the attributes. Each parsed + // result is passed back through the result pointer in corresponding + // AttrConfig. Note that the result pointer must point to a optional typed + // variable which outlives this function. Returns false on error. You should + // not use the any of the results if this function failed. + // + // Example usage: + // + // std::unordered_map attrs; + // optional foo; + // attrs["foo"] = {/*required=*/false, AttrTy::kInt64, &foo}; + // optional bar; + // attrs["bar"] = {/*required=*/true, AttrTy::kWindow, &bar}; + // if (!ParseAttribute(attrs)) { + // return false; // Do not use 'foo' 'bar' if failed. + // } + // // Do something with 'bar'. + // if (foo) { // If attr foo is seen, do something with 'foo'. } + // + bool ParseAttributes(const std::unordered_map& attrs); + + // Parses a name and finds the corresponding hlo computation. + bool ParseComputationName(HloComputation** value); + // Parses a list of names and finds the corresponding hlo instructions. + bool ParseInstructionNames(std::vector* instructions); + bool ParseWindow(Window* window); + bool ParseConvolutionDimensionNumbers(ConvolutionDimensionNumbers* dnums); + bool ParseSharding(OpSharding* sharding); + + // Parses a sub-attribute of the window attribute, e.g.,size=1x2x3. + bool ParseDxD(const string& name, std::vector* result); + // Parses window's pad sub-attriute, e.g., pad=0_0x3x3. + bool ParseWindowPad(std::vector>* pad); bool ParseParamList(); bool ParseName(string* result); @@ -214,7 +260,7 @@ bool HloParser::ParseInstructionList(HloComputation::Builder* builder, "expects '}' at the end of instruction list."); } -// instruction ::= ('ROOT')? name '=' shape opcode operands (extra_attribute)* +// instruction ::= ('ROOT')? name '=' shape opcode operands (attribute)* bool HloParser::ParseInstruction(HloComputation::Builder* builder, string* root_name) { string name; @@ -230,6 +276,15 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, if (is_root) { *root_name = name; } + + // Add optional attributes. + std::unordered_map attrs; + optional sharding; + attrs["sharding"] = {/*required=*/false, AttrTy::kSharding, &sharding}; + optional> predecessors; + attrs["control-predecessors"] = {/*required=*/false, AttrTy::kInstructionList, + &predecessors}; + HloInstruction* instruction; switch (opcode) { case HloOpcode::kParameter: { @@ -237,7 +292,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, if (!ParseToken(TokKind::kLparen, "expects '(' before parameter number") || !ParseInt64(¶meter_number) || - !ParseToken(TokKind::kRparen, "expects ')' after parameter number")) { + !ParseToken(TokKind::kRparen, "expects ')' after parameter number") || + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction( @@ -249,7 +305,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, if (!ParseToken(TokKind::kLparen, "expects '(' before constant literal") || !ParseLiteral(&literal, shape) || - !ParseToken(TokKind::kRparen, "expects ')' after constant literal")) { + !ParseToken(TokKind::kRparen, "expects ')' after constant literal") || + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction( @@ -275,7 +332,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, case HloOpcode::kSin: case HloOpcode::kSort: case HloOpcode::kTanh: { - if (!ParseOperands(&operands, /*expected_size=*/1)) { + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction( @@ -305,7 +363,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, case HloOpcode::kShiftLeft: case HloOpcode::kShiftRightArithmetic: case HloOpcode::kShiftRightLogical: { - if (!ParseOperands(&operands, /*expected_size=*/2)) { + if (!ParseOperands(&operands, /*expected_size=*/2) || + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction(HloInstruction::CreateBinary( @@ -315,7 +374,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, // Ternary ops. case HloOpcode::kClamp: case HloOpcode::kSelect: { - if (!ParseOperands(&operands, /*expected_size=*/3)) { + if (!ParseOperands(&operands, /*expected_size=*/3) || + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction(HloInstruction::CreateTernary( @@ -324,7 +384,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, } // Other supported ops. case HloOpcode::kConvert: { - if (!ParseOperands(&operands, /*expected_size=*/1)) { + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction( @@ -332,7 +393,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kCrossReplicaSum: { - if (!ParseOperands(&operands, /*expected_size=*/1)) { + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction( @@ -340,7 +402,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kReshape: { - if (!ParseOperands(&operands, /*expected_size=*/1)) { + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction( @@ -348,7 +411,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kTuple: { - if (!ParseOperands(&operands)) { + if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { return false; } instruction = @@ -356,70 +419,99 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kWhile: { - HloComputation* condition; - HloComputation* body; + optional condition; + optional body; + attrs["condition"] = {/*required=*/true, AttrTy::kHloComputation, + &condition}; + attrs["body"] = {/*required=*/true, AttrTy::kHloComputation, &body}; if (!ParseOperands(&operands, /*expected_size=*/1) || - !ParseExtraAttribute(&condition, - /*expected_attribute=*/"condition") || - !ParseExtraAttribute(&body, /*expected_attribute=*/"body")) { + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction(HloInstruction::CreateWhile( - shape, condition, body, /*init=*/operands[0])); + shape, *condition, *body, /*init=*/operands[0])); break; } case HloOpcode::kRecv: { - int64 channel_id; + optional channel_id; + attrs["channel_id"] = {/*required=*/true, AttrTy::kInt64, &channel_id}; if (!ParseOperands(&operands, /*expected_size=*/0) || - !ParseExtraAttribute(&channel_id, - /*expected_attribute=*/"channel_id")) { + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction( - HloInstruction::CreateRecv(shape, channel_id)); + HloInstruction::CreateRecv(shape, *channel_id)); break; } case HloOpcode::kSend: { - int64 channel_id; + optional channel_id; + attrs["channel_id"] = {/*required=*/true, AttrTy::kInt64, &channel_id}; if (!ParseOperands(&operands, /*expected_size=*/1) || - !ParseExtraAttribute(&channel_id, - /*expected_attribute=*/"channel_id")) { + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction( - HloInstruction::CreateSend(operands[0], channel_id)); + HloInstruction::CreateSend(operands[0], *channel_id)); break; } case HloOpcode::kGetTupleElement: { - int64 index; + optional index; + attrs["index"] = {/*required=*/true, AttrTy::kInt64, &index}; if (!ParseOperands(&operands, /*expected_size=*/1) || - !ParseExtraAttribute(&index, /*expected_attribute=*/"index")) { + !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction( - HloInstruction::CreateGetTupleElement(shape, operands[0], index)); + HloInstruction::CreateGetTupleElement(shape, operands[0], *index)); break; } case HloOpcode::kCall: { - HloComputation* to_apply; - if (!ParseOperands(&operands) || - !ParseExtraAttribute(&to_apply, - /*expected_attribute=*/"to_apply")) { + optional to_apply; + attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation, + &to_apply}; + if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction( - HloInstruction::CreateCall(shape, operands, to_apply)); + HloInstruction::CreateCall(shape, operands, *to_apply)); + break; + } + case HloOpcode::kReduceWindow: { + optional reduce_computation; + optional window; + attrs["window"] = {/*required=*/true, AttrTy::kWindow, &window}; + attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation, + &reduce_computation}; + if (!ParseOperands(&operands, /*expected_size=*/2) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreateReduceWindow( + shape, /*operand=*/operands[0], /*init_value=*/operands[1], *window, + *reduce_computation)); + break; + } + case HloOpcode::kConvolution: { + optional window; + optional dnums; + attrs["window"] = {/*required=*/true, AttrTy::kWindow, &window}; + attrs["dim_labels"] = {/*required=*/true, + AttrTy::kConvolutionDimensionNumbers, &dnums}; + if (!ParseOperands(&operands, /*expected_size=*/2) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreateConvolve( + shape, /*lhs=*/operands[0], /*rhs=*/operands[1], *window, *dnums)); break; } case HloOpcode::kBroadcast: case HloOpcode::kCustomCall: case HloOpcode::kConcatenate: case HloOpcode::kReducePrecision: - case HloOpcode::kConvolution: case HloOpcode::kMap: case HloOpcode::kPad: case HloOpcode::kReduce: - case HloOpcode::kReduceWindow: case HloOpcode::kSelectAndScatter: case HloOpcode::kReverse: case HloOpcode::kRng: @@ -438,43 +530,27 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, HloOpcodeString(opcode))); } - bool has_sharding = false; - bool has_control = false; - while (EatIfPresent(TokKind::kComma)) { - string attribute_name; - if (!ParseAttributeName(&attribute_name)) { - return TokenError("expects ', sharding=' or ', control-predecessors='"); - } - - if (attribute_name == "sharding") { - // Parse "sharding=". - if (has_sharding) { - return TokenError("expects at most 1 'sharding='"); - } - has_sharding = true; - if (!ParseSharding(instruction)) { - return false; - } - } else if (attribute_name == "control-predecessors") { - // Parse "control-predecessors" - if (has_control) { - return TokenError("expects at most 1 'control-predecessors='"); - } - has_control = true; - if (!ParseControlPredecessors(instruction)) { - return false; + // Add common attrs (sharding, control predecessors) to the instruction, if + // they were seen. + if (sharding) { + instruction->set_sharding( + HloSharding::FromProto(sharding.value()).ValueOrDie()); + } + if (predecessors) { + for (auto* pre : *predecessors) { + Status status = pre->AddControlDependencyTo(instruction); + if (!status.ok()) { + return TokenError(StrCat("error adding control dependency for: ", name, + " status: ", status.ToString())); } - } else { - return TokenError(StrCat("unexpected attribute: ", attribute_name)); } } - return AddInstruction(name, instruction); } // ::= '{' 'replicated'? 'maximal'? ('device=' int)? shape? ('devices=' ('[' // dims ']')* device_list)? '}' dims ::= int_list device_list ::= int_list -bool HloParser::ParseSharding(HloInstruction* instruction) { +bool HloParser::ParseSharding(OpSharding* sharding) { if (!ParseToken(TokKind::kLbrace, "expected '{' to start sharding attribute")) { return false; @@ -545,7 +621,6 @@ bool HloParser::ParseSharding(HloInstruction* instruction) { } } - OpSharding sharding; if (replicated) { if (!devices.empty()) { return TokenError( @@ -555,7 +630,7 @@ bool HloParser::ParseSharding(HloInstruction* instruction) { return TokenError( "replicated shardings should not have any tile shape set"); } - sharding.set_type(OpSharding::Type::OpSharding_Type_REPLICATED); + sharding->set_type(OpSharding::Type::OpSharding_Type_REPLICATED); } else if (maximal) { if (devices.size() != 1) { return TokenError( @@ -564,8 +639,8 @@ bool HloParser::ParseSharding(HloInstruction* instruction) { if (!ShapeUtil::Equal(tile_shape, Shape())) { return TokenError("maximal shardings should not have any tile shape set"); } - sharding.set_type(OpSharding::Type::OpSharding_Type_MAXIMAL); - sharding.add_tile_assignment_devices(devices[0]); + sharding->set_type(OpSharding::Type::OpSharding_Type_MAXIMAL); + sharding->add_tile_assignment_devices(devices[0]); } else { if (devices.size() <= 1) { return TokenError( @@ -579,47 +654,43 @@ bool HloParser::ParseSharding(HloInstruction* instruction) { "non-maximal shardings must have a tile assignment list including " "dimensions"); } - sharding.set_type(OpSharding::Type::OpSharding_Type_OTHER); - *sharding.mutable_tile_shape() = tile_shape; + sharding->set_type(OpSharding::Type::OpSharding_Type_OTHER); + *sharding->mutable_tile_shape() = tile_shape; for (int64 dim : tile_assignment_dimensions) { - sharding.add_tile_assignment_dimensions(dim); + sharding->add_tile_assignment_dimensions(dim); } for (int64 device : devices) { - sharding.add_tile_assignment_devices(device); + sharding->add_tile_assignment_devices(device); } } - instruction->set_sharding(HloSharding::FromProto(sharding).ValueOrDie()); lexer_.Lex(); return true; } // '{' name+ '}' -bool HloParser::ParseControlPredecessors(HloInstruction* instruction) { +bool HloParser::ParseInstructionNames( + std::vector* instructions) { if (!ParseToken(TokKind::kLbrace, - "expects '{' at the beginning of control predecessors")) { + "expects '{' at the beginning of instruction name list")) { return false; } do { string name; if (!ParseName(&name)) { - return TokenError("expects a control predecessor"); + return TokenError("expects a instruction name"); } - HloInstruction* pre = + HloInstruction* instr = tensorflow::gtl::FindPtrOrNull(instruction_pool_, name); - if (!pre) { + if (!instr) { return TokenError( - StrCat("control predecessor ", name, " is not defined: ")); - } - Status status = pre->AddControlDependencyTo(instruction); - if (!status.ok()) { - return TokenError(StrCat("error adding control dependency for: ", name, - " status: ", status.ToString())); + Printf("instruction '%s' is not defined", name.c_str())); } + instructions->push_back(instr); } while (EatIfPresent(TokKind::kComma)); return ParseToken(TokKind::kRbrace, - "expects '}' at the end of control predecessors"); + "expects '}' at the end of control instructions"); } bool HloParser::SetValueInLiteral(int64 value, int64 linear_index, @@ -957,28 +1028,95 @@ bool HloParser::ParseOperands(std::vector* operands, return true; } -// extra_attribute ::= ',' attribute_name value -template -bool HloParser::ParseExtraAttribute(T* value, - const string& expected_attribute) { - if (!ParseToken(TokKind::kComma, - "expects ',' in front of an extra attribute")) { - return false; - } - string attribute_name; - if (!ParseAttributeName(&attribute_name) && - attribute_name != expected_attribute) { - return TokenError(StrCat("expects attribute name: ", expected_attribute)); +bool HloParser::ParseAttributes( + const std::unordered_map& attrs) { + std::unordered_set seen_attrs; + while (EatIfPresent(TokKind::kComma)) { + string name; + if (!ParseAttributeName(&name)) { + return TokenError("error parsing attributes"); + } + VLOG(1) << "Parsing attribute " << name; + if (!seen_attrs.insert(name).second) { + return TokenError(Printf("attribute %s already exists", name.c_str())); + } + auto attr_it = attrs.find(name); + if (attr_it == attrs.end()) { + return TokenError(Printf("unexpected attribute %s", name.c_str())); + } + AttrTy attr_type = attr_it->second.attr_type; + void* attr_out_ptr = attr_it->second.result; + bool success = [&] { + switch (attr_type) { + case AttrTy::kInt64: { + int64 result; + if (!ParseInt64(&result)) { + return false; + } + static_cast*>(attr_out_ptr)->emplace(result); + return true; + } + case AttrTy::kHloComputation: { + HloComputation* result; + if (!ParseComputationName(&result)) { + return false; + } + static_cast*>(attr_out_ptr) + ->emplace(result); + return true; + } + case AttrTy::kWindow: { + Window result; + if (!ParseWindow(&result)) { + return false; + } + static_cast*>(attr_out_ptr)->emplace(result); + return true; + } + case AttrTy::kConvolutionDimensionNumbers: { + ConvolutionDimensionNumbers result; + if (!ParseConvolutionDimensionNumbers(&result)) { + return false; + } + static_cast*>(attr_out_ptr) + ->emplace(result); + return true; + } + case AttrTy::kSharding: { + OpSharding sharding; + if (!ParseSharding(&sharding)) { + return false; + } + static_cast*>(attr_out_ptr)->emplace(sharding); + return true; + } + case AttrTy::kInstructionList: { + std::vector result; + if (!ParseInstructionNames(&result)) { + return false; + } + static_cast>*>(attr_out_ptr) + ->emplace(result); + return true; + } + } + }(); + if (!success) { + return TokenError(Printf("error parsing attribute %s", name.c_str())); + } } - if (!ParseAttributeValue(value)) { - return TokenError( - StrCat("expects value for attribute: ", expected_attribute)); + // Check that all required attrs were seen. + for (const auto& attr_it : attrs) { + if (attr_it.second.required && + seen_attrs.find(attr_it.first) == seen_attrs.end()) { + return TokenError(Printf("attribute %s is expected but not seen", + attr_it.first.c_str())); + } } return true; } -template <> -bool HloParser::ParseAttributeValue(HloComputation** value) { +bool HloParser::ParseComputationName(HloComputation** value) { string name; if (!ParseName(&name)) { return TokenError("expects computation name"); @@ -990,9 +1128,191 @@ bool HloParser::ParseAttributeValue(HloComputation** value) { return true; } -template <> -bool HloParser::ParseAttributeValue(int64* value) { - return ParseInt64(value); +// ::= '{' size stride? pad? lhs_dilate? rhs_dilate? '}' +// The subattributes can appear in any order. 'size=' is required, others are +// optional. +bool HloParser::ParseWindow(Window* window) { + if (!ParseToken(TokKind::kLbrace, "expected '{' to start window attribute")) { + return false; + } + + std::vector size; + std::vector stride; + std::vector> pad; + std::vector lhs_dilate; + std::vector rhs_dilate; + while (lexer_.GetKind() != TokKind::kRbrace) { + string field_name; + if (!ParseAttributeName(&field_name)) { + return TokenError("expects sub-attributes in window"); + } + bool ok = [&] { + if (field_name == "size") { + return ParseDxD("size", &size); + } + if (field_name == "stride") { + return ParseDxD("stride", &stride); + } + if (field_name == "lhs_dilate") { + return ParseDxD("lhs_dilate", &lhs_dilate); + } + if (field_name == "rhs_dilate") { + return ParseDxD("rls_dilate", &rhs_dilate); + } + if (field_name == "pad") { + return ParseWindowPad(&pad); + } + return TokenError(StrCat("unexpected attribute name: ", field_name)); + }(); + if (!ok) { + return false; + } + } + + if (size.empty()) { + return TokenError( + "sub-attribute 'size=' is required in the window attribute"); + } + if (!stride.empty() && stride.size() != size.size()) { + return TokenError("expects 'stride=' has the same size as 'size='"); + } + if (!lhs_dilate.empty() && lhs_dilate.size() != size.size()) { + return TokenError("expects 'lhs_dilate=' has the same size as 'size='"); + } + if (!rhs_dilate.empty() && rhs_dilate.size() != size.size()) { + return TokenError("expects 'rhs_dilate=' has the same size as 'size='"); + } + if (!pad.empty() && pad.size() != size.size()) { + return TokenError("expects 'pad=' has the same size as 'size='"); + } + + for (int i = 0; i < size.size(); i++) { + window->add_dimensions()->set_size(size[i]); + if (!pad.empty()) { + window->mutable_dimensions(i)->set_padding_low(pad[i][0]); + window->mutable_dimensions(i)->set_padding_high(pad[i][1]); + } + // If some field is not present, it has the default value. + window->mutable_dimensions(i)->set_stride(stride.empty() ? 1 : stride[i]); + window->mutable_dimensions(i)->set_base_dilation( + lhs_dilate.empty() ? 1 : lhs_dilate[i]); + window->mutable_dimensions(i)->set_window_dilation( + rhs_dilate.empty() ? 1 : rhs_dilate[i]); + } + return ParseToken(TokKind::kRbrace, "expected '}' to end window attribute"); +} + +// This is the inverse of HloInstruction::ConvolutionDimensionNumbersToString. +// The string looks like "dim_labels=0bf_0io->0bf". +bool HloParser::ParseConvolutionDimensionNumbers( + ConvolutionDimensionNumbers* dnums) { + if (lexer_.GetKind() != TokKind::kDimLabels) { + return TokenError("expects dim labels pattern, e.g., 'bf0_0io->0bf'"); + } + string str = lexer_.GetStrVal(); + + // The str is expected to have 3 items, lhs, rhs, out, and it must looks like + // lhs_rhs->out, that is, the first separator is "_" and the second is "->". + // So we replace the "->" with "_" and then split on "_". + str = tensorflow::str_util::StringReplace(str, /*oldsub=*/"->", + /*newsub=*/"_", + /*replace_all=*/false); + std::vector lhs_rhs_out = Split(str, "_"); + if (lhs_rhs_out.size() != 3) { + LOG(FATAL) << "expects 3 items: lhs, rhs, and output dims, but sees " + << str; + } + + const int64 rank = lhs_rhs_out[0].length(); + if (rank != lhs_rhs_out[1].length() || rank != lhs_rhs_out[2].length()) { + return TokenError( + "convolution lhs, rhs, and output must have the same rank"); + } + if (rank < 3) { + return TokenError("convolution rank must >=3"); + } + + auto is_unique = [](string str) -> bool { + std::sort(str.begin(), str.end()); + return std::unique(str.begin(), str.end()) == str.end(); + }; + + // lhs + { + const string& lhs = lhs_rhs_out[0]; + if (!is_unique(lhs)) { + return TokenError( + StrCat("expects unique lhs dimension numbers, but sees ", lhs)); + } + for (int i = 0; i < rank - 2; i++) { + dnums->add_spatial_dimensions(-1); + } + for (int i = 0; i < rank; i++) { + char c = lhs[i]; + if (c == 'b') { + dnums->set_input_batch_dimension(i); + } else if (c == 'f') { + dnums->set_input_feature_dimension(i); + } else if (c < '0' + rank && c >= '0') { + dnums->set_spatial_dimensions(c - '0', i); + } else { + return TokenError( + Printf("expects [0-%lldbf] in lhs dimension numbers", rank - 1)); + } + } + } + // rhs + { + const string& rhs = lhs_rhs_out[1]; + if (!is_unique(rhs)) { + return TokenError( + StrCat("expects unique rhs dimension numbers, but sees ", rhs)); + } + for (int i = 0; i < rank - 2; i++) { + dnums->add_kernel_spatial_dimensions(-1); + } + for (int i = 0; i < rank; i++) { + char c = rhs[i]; + if (c == 'i') { + dnums->set_kernel_input_feature_dimension(i); + } else if (c == 'o') { + dnums->set_kernel_output_feature_dimension(i); + } else if (c < '0' + rank && c >= '0') { + dnums->set_kernel_spatial_dimensions(c - '0', i); + } else { + return TokenError( + Printf("expects [0-%lldio] in rhs dimension numbers", rank - 1)); + } + } + } + // output + { + const string& out = lhs_rhs_out[2]; + if (!is_unique(out)) { + return TokenError( + StrCat("expects unique output dimension numbers, but sees ", out)); + } + for (int i = 0; i < rank; i++) { + char c = out[i]; + if (c == 'b') { + dnums->set_output_batch_dimension(i); + } else if (c == 'f') { + dnums->set_output_feature_dimension(i); + } else if (c < '0' + rank && c >= '0') { + if (dnums->spatial_dimensions(c - '0') != i) { + return TokenError( + "output spatial dimensions should be the same as input spatial " + "dimensions"); + } + } else { + return TokenError( + Printf("expects [0-%lldbf] in output dimension numbers", rank - 1)); + } + } + } + + lexer_.Lex(); + return true; } // param_list ::= '(' param_list1 ')' @@ -1070,6 +1390,55 @@ bool HloParser::ParseAttributeName(string* result) { return true; } +bool HloParser::ParseDxD(const string& name, std::vector* result) { + if (!result->empty()) { + return TokenError( + Printf("sub-attribute '%s=' already exists", name.c_str())); + } + // 1D + if (lexer_.GetKind() == TokKind::kInt) { + int64 number; + if (!ParseInt64(&number)) { + return TokenError(Printf("expects sub-attribute '%s=i'", name.c_str())); + } + result->push_back(number); + return true; + } + // 2D or higher. + if (lexer_.GetKind() == TokKind::kDxD) { + string str = lexer_.GetStrVal(); + if (!SplitAndParseAsInts(str, 'x', result)) { + return TokenError( + Printf("expects sub-attribute '%s=ixj...'", name.c_str())); + } + lexer_.Lex(); + return true; + } + return TokenError("expects token type kInt or kDxD"); +} + +bool HloParser::ParseWindowPad(std::vector>* pad) { + if (!pad->empty()) { + return TokenError("sub-attribute 'pad=' already exists"); + } + if (lexer_.GetKind() != TokKind::kWindowPad) { + return TokenError("expects window pad pattern, e.g., '0_0x3_3'"); + } + string str = lexer_.GetStrVal(); + std::vector padding_str = Split(str, 'x'); + for (int i = 0; i < padding_str.size(); i++) { + std::vector low_high; + if (!SplitAndParseAsInts(padding_str[i], '_', &low_high) || + low_high.size() != 2) { + return TokenError( + "expects padding_low and padding_high separated by '_'"); + } + pad->push_back(low_high); + } + lexer_.Lex(); + return true; +} + bool HloParser::ParseOpcode(HloOpcode* result) { VLOG(1) << "ParseOpcode"; if (lexer_.GetKind() != TokKind::kOpcode) { diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index 359256f064..62b4385e76 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -25,6 +25,7 @@ namespace tools { namespace { using tensorflow::StringPiece; +using tensorflow::strings::StrCat; struct TestData { string test_name; @@ -247,6 +248,39 @@ ENTRY %CallR0F32IdentityScalar.v2 () -> f32[] { ROOT %call = f32[] call(f32[] %constant), to_apply=%Identity.v1 } +)" +}, +// reduce window +{ +"ReduceWindow", +R"(HloModule R4UnitWindow_module: + +%add_F32.v3 (lhs: f32[], rhs: f32[]) -> f32[] { + %lhs = f32[] parameter(0) + %rhs = f32[] parameter(1) + ROOT %add = f32[] add(f32[] %lhs, f32[] %rhs) +} + +ENTRY %R4UnitWindow.v3 (operand: f32[13,12,8,15]) -> f32[13,3,8,15] { + %operand = f32[13,12,8,15]{0,3,2,1} parameter(0) + %constant = f32[] constant(0) + ROOT %reduce-window = f32[13,3,8,15]{0,3,2,1} reduce-window(f32[13,12,8,15]{0,3,2,1} %operand, f32[] %constant), window={size=1x1x7x1 stride=1x4x1x1 pad=0_0x0_0x3_3x0_0}, to_apply=%add_F32.v3 +} + +)" +}, +// convolution +{ +"Convolution", +R"(HloModule Convolve1D1Window_0_module: + +ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2,1] { + %input = f32[1,2,1]{2,1,0} parameter(0) + %copy = f32[1,2,1]{2,0,1} copy(f32[1,2,1]{2,1,0} %input) + %filter = f32[1,1,1]{2,1,0} parameter(1) + ROOT %convolution = f32[1,2,1]{2,0,1} convolution(f32[1,2,1]{2,0,1} %copy, f32[1,1,1]{2,1,0} %filter), window={size=1}, dim_labels=b0f_0io->b0f +} + )" } }); @@ -427,6 +461,92 @@ ENTRY %ConstantWithExp.v4 () -> f32[] { // printed as "300". } +TEST_F(HloParserTest, AttibutesAnyOrder) { + const string original = R"(HloModule any_order_module: + +ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2,1] { + %input = f32[1,2,1]{2,1,0} parameter(0) + %copy = f32[1,2,1]{2,0,1} copy(f32[1,2,1]{2,1,0} %input) + %filter = f32[1,1,1]{2,1,0} parameter(1) + ROOT %convolution = f32[1,2,1]{2,0,1} convolution(f32[1,2,1]{2,0,1} %copy, f32[1,1,1]{2,1,0} %filter), sharding={maximal device=1}, dim_labels=b0f_0io->b0f, window={pad=1_1 size=2} +} + +)"; + TF_EXPECT_OK(Parse(original).status()); +} + +TEST_F(HloParserTest, InvalidDimLabels) { + string prefix = R"(HloModule invalid_dim_labels_module: + +ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2,1] { + %input = f32[1,2,1]{2,1,0} parameter(0) + %copy = f32[1,2,1]{2,0,1} copy(f32[1,2,1]{2,1,0} %input) + %filter = f32[1,1,1]{2,1,0} parameter(1) + ROOT %convolution = f32[1,2,1]{2,0,1} convolution(f32[1,2,1]{2,0,1} %copy, f32[1,1,1]{2,1,0} %filter), window={size=1} )"; + string suffix = R"( +} + +)"; + + ExpectHasSubstr(Parse(StrCat(prefix, ",dim_labels=00_01_10", suffix)) + .status() + .error_message(), + "expects dim labels pattern"); + + ExpectHasSubstr(Parse(StrCat(prefix, ",dim_labels=010_1100->010", suffix)) + .status() + .error_message(), + "must have the same rank"); + + ExpectHasSubstr(Parse(StrCat(prefix, ",dim_labels=0bf_io0->b0f", suffix)) + .status() + .error_message(), + "output spatial dimensions should be the same as input " + "spatial dimensions"); +} + +TEST_F(HloParserTest, UnexpectedAttribute) { + const string original = R"(HloModule unexpected_attr_module: + +ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { + %recv = f32[] recv(), channel_id=15 + ROOT %constant = f32[] constant(2.1) + %send = () send(f32[] %constant), channel_id=16, calls=%recv +} + +)"; + ExpectHasSubstr(Parse(original).status().error_message(), + "unexpected attribute calls"); +} + +TEST_F(HloParserTest, MissingAttribute) { + const string original = R"(HloModule missing_attr_module: + +ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { + %recv = f32[] recv(), channel_id=15 + ROOT %constant = f32[] constant(-2.1) + %send = () send(f32[] %constant) +} + +)"; + ExpectHasSubstr(Parse(original).status().error_message(), + "attribute channel_id is expected but not seen"); +} + +TEST_F(HloParserTest, PredecessorUndefined) { + const string original = R"(HloModule pre_not_found_module: + +ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { + %recv = f32[] recv(), channel_id=15 + ROOT %constant = f32[] constant(2.1) + %send = () send(f32[] %constant), channel_id=16, control-predecessors={%done} +} + +)"; + ExpectHasSubstr(Parse(original).status().error_message(), + "'done' is not defined"); +} + } // namespace } // namespace tools } // namespace xla diff --git a/tensorflow/compiler/xla/tools/parser/hlo_token.h b/tensorflow/compiler/xla/tools/parser/hlo_token.h index 9c2069e756..15ab8b1ccc 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_token.h +++ b/tensorflow/compiler/xla/tools/parser/hlo_token.h @@ -57,6 +57,9 @@ enum class TokKind { // Typed tokens. kName, // %foo kAttributeName, // dimensions= + kDimLabels, // [0-9bf]+_[0-9io]+->[0-9bf]+ + kDxD, // [0-9]+(x[0-9]+)+ + kWindowPad, // [0-9]+_[0-9]+(x[0-9]+_[0-9]+)* kShape, // f32[2,3]{1,0} kOpcode, // add kInt, // 42 diff --git a/tensorflow/compiler/xla/window_util.cc b/tensorflow/compiler/xla/window_util.cc index 23161873a0..6f7f1479b9 100644 --- a/tensorflow/compiler/xla/window_util.cc +++ b/tensorflow/compiler/xla/window_util.cc @@ -26,8 +26,8 @@ namespace xla { namespace window_util { /* static */ string ToString(const WindowDimension& dim) { - using tensorflow::strings::StrCat; using tensorflow::strings::StrAppend; + using tensorflow::strings::StrCat; string str = StrCat("(size=", dim.size()); if (dim.stride() != 1) { StrAppend(&str, ",stride=", dim.stride()); @@ -49,22 +49,22 @@ namespace window_util { } string ToString(const Window& window) { - using tensorflow::strings::StrCat; using tensorflow::strings::StrAppend; + using tensorflow::strings::StrCat; string str; - const auto add_field = [&]( - const char* heading, - std::function format) { - StrAppend(&str, heading, "="); - const char* prefix = ""; - for (const auto& window_dimension : window.dimensions()) { - StrAppend(&str, prefix, format(window_dimension)); - prefix = "x"; - } - }; - - add_field("window", + const auto add_field = + [&](const char* heading, + std::function format) { + StrAppend(&str, heading, "="); + const char* prefix = ""; + for (const auto& window_dimension : window.dimensions()) { + StrAppend(&str, prefix, format(window_dimension)); + prefix = "x"; + } + }; + + add_field("size", [](const WindowDimension& dim) { return StrCat(dim.size()); }); if (HasStride(window)) { add_field(" stride", -- GitLab From 2ba34173fad0d5b7d986baeb8171bdc6afdcd7bb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 15:24:05 -0800 Subject: [PATCH 0205/1801] Add a --all_tensor_names option, which is useful if I only want to know all tensor names. It is especially useful in cases whether some of the tensors has huge size. Also update the usage description. PiperOrigin-RevId: 175074541 --- tensorflow/python/tools/inspect_checkpoint.py | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/tools/inspect_checkpoint.py b/tensorflow/python/tools/inspect_checkpoint.py index 47a74e5abf..8716058e61 100644 --- a/tensorflow/python/tools/inspect_checkpoint.py +++ b/tensorflow/python/tools/inspect_checkpoint.py @@ -29,7 +29,8 @@ from tensorflow.python.platform import flags FLAGS = None -def print_tensors_in_checkpoint_file(file_name, tensor_name, all_tensors): +def print_tensors_in_checkpoint_file(file_name, tensor_name, all_tensors, + all_tensor_names): """Prints tensors in a checkpoint file. If no `tensor_name` is provided, prints the tensor names and shapes @@ -41,14 +42,16 @@ def print_tensors_in_checkpoint_file(file_name, tensor_name, all_tensors): file_name: Name of the checkpoint file. tensor_name: Name of the tensor in the checkpoint file to print. all_tensors: Boolean indicating whether to print all tensors. + all_tensor_names: Boolean indicating whether to print all tensor names. """ try: reader = pywrap_tensorflow.NewCheckpointReader(file_name) - if all_tensors: + if all_tensors or all_tensor_names: var_to_shape_map = reader.get_variable_to_shape_map() for key in sorted(var_to_shape_map): print("tensor_name: ", key) - print(reader.get_tensor(key)) + if all_tensors: + print(reader.get_tensor(key)) elif not tensor_name: print(reader.debug_string().decode("utf-8")) else: @@ -104,11 +107,14 @@ def parse_numpy_printoption(kv_str): def main(unused_argv): if not FLAGS.file_name: print("Usage: inspect_checkpoint --file_name=checkpoint_file_name " - "[--tensor_name=tensor_to_print]") + "[--tensor_name=tensor_to_print] " + "[--all_tensors] " + "[--all_tensor_names] " + "[--printoptions]") sys.exit(1) else: print_tensors_in_checkpoint_file(FLAGS.file_name, FLAGS.tensor_name, - FLAGS.all_tensors) + FLAGS.all_tensors, FLAGS.all_tensor_names) if __name__ == "__main__": @@ -130,6 +136,13 @@ if __name__ == "__main__": type="bool", default=False, help="If True, print the values of all the tensors.") + parser.add_argument( + "--all_tensor_names", + nargs="?", + const=True, + type="bool", + default=False, + help="If True, print the names of all the tensors.") parser.add_argument( "--printoptions", nargs="*", -- GitLab From 64d2636e2946772d4b1531ec91b389110a2787b7 Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Wed, 8 Nov 2017 15:35:27 -0800 Subject: [PATCH 0206/1801] Move MakeFakeLiteral from client/lib/testing.h to tests/test_utils.h. Also remove superfluous literal creation methods in that file, and replace them with the existing ones in the Literal class. Also, optionally print layout in Literal::ToString. PiperOrigin-RevId: 175076277 --- tensorflow/compiler/xla/client/lib/BUILD | 1 + tensorflow/compiler/xla/client/lib/testing.cc | 57 +-------- tensorflow/compiler/xla/client/lib/testing.h | 4 - tensorflow/compiler/xla/literal_util.cc | 22 +++- tensorflow/compiler/xla/literal_util.h | 2 +- tensorflow/compiler/xla/service/BUILD | 2 - .../compiler/xla/service/hlo_cse_test.cc | 24 ++-- .../xla/service/layout_assignment_test.cc | 32 ++--- tensorflow/compiler/xla/tests/BUILD | 3 +- .../xla/tests/client_library_test_base.h | 6 +- tensorflow/compiler/xla/tests/client_test.cc | 4 +- .../xla/tests/compilation_cache_test.cc | 8 +- .../xla/tests/compute_constant_test.cc | 4 +- .../compiler/xla/tests/dot_operation_test.cc | 25 ++-- .../xla/tests/local_client_execute_test.cc | 10 +- tensorflow/compiler/xla/tests/map_test.cc | 8 +- tensorflow/compiler/xla/tests/test_utils.cc | 120 ++++++++++++++++++ tensorflow/compiler/xla/tests/test_utils.h | 64 ++-------- tensorflow/compiler/xla/tools/BUILD | 1 + .../compiler/xla/tools/replay_computation.cc | 1 + 20 files changed, 209 insertions(+), 189 deletions(-) create mode 100644 tensorflow/compiler/xla/tests/test_utils.cc diff --git a/tensorflow/compiler/xla/client/lib/BUILD b/tensorflow/compiler/xla/client/lib/BUILD index ee34682087..fca2bf2688 100644 --- a/tensorflow/compiler/xla/client/lib/BUILD +++ b/tensorflow/compiler/xla/client/lib/BUILD @@ -44,6 +44,7 @@ cc_library( "//tensorflow/compiler/xla/client:computation", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/tests:test_utils", "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xla/client/lib/testing.cc b/tensorflow/compiler/xla/client/lib/testing.cc index e6645e4941..d936bd870b 100644 --- a/tensorflow/compiler/xla/client/lib/testing.cc +++ b/tensorflow/compiler/xla/client/lib/testing.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/test_utils.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/strings/strcat.h" @@ -48,62 +49,6 @@ std::unique_ptr MakeFakeDataViaDeviceOrDie(const Shape& shape, } // namespace -StatusOr> MakeFakeLiteral(const Shape& shape) { - if (ShapeUtil::IsTuple(shape)) { - std::vector> elements; - for (const Shape& element_shape : shape.tuple_shapes()) { - TF_ASSIGN_OR_RETURN(std::unique_ptr element, - MakeFakeLiteral(element_shape)); - elements.push_back(std::move(element)); - } - return Literal::MakeTupleOwned(std::move(elements)); - } - std::unique_ptr literal = Literal::CreateFromShape(shape); - std::minstd_rand0 engine; - switch (shape.element_type()) { - case F32: { - std::uniform_real_distribution generator(0.0f, 1.0f); - TF_CHECK_OK(literal->Populate( - [&](tensorflow::gtl::ArraySlice /*indices*/) { - return generator(engine); - })); - break; - } - case S32: { - std::uniform_int_distribution generator( - std::numeric_limits::lowest(), - std::numeric_limits::max()); - TF_CHECK_OK(literal->Populate( - [&](tensorflow::gtl::ArraySlice /*indices*/) { - return generator(engine); - })); - break; - } - case S64: { - std::uniform_int_distribution generator( - std::numeric_limits::lowest(), - std::numeric_limits::max()); - TF_CHECK_OK(literal->Populate( - [&](tensorflow::gtl::ArraySlice /*indices*/) { - return generator(engine); - })); - break; - } - case PRED: { - std::uniform_int_distribution generator(0, 1); - TF_CHECK_OK(literal->Populate( - [&](tensorflow::gtl::ArraySlice /*indices*/) { - return generator(engine); - })); - break; - } - default: - return Unimplemented("Unsupported type for fake literal generation: %s", - ShapeUtil::HumanString(shape).c_str()); - } - return std::move(literal); -} - std::unique_ptr MakeFakeDataOrDie(const Shape& shape, Client* client) { if (ShapeUtil::ByteSizeOf(shape) < (1LL << 30)) { diff --git a/tensorflow/compiler/xla/client/lib/testing.h b/tensorflow/compiler/xla/client/lib/testing.h index b5c4393dcc..7e640d1307 100644 --- a/tensorflow/compiler/xla/client/lib/testing.h +++ b/tensorflow/compiler/xla/client/lib/testing.h @@ -26,10 +26,6 @@ limitations under the License. namespace xla { -// Generates fake data in a literal of the given shape, or returns an error -// status if the element type is currently unhandled for fake data generation. -StatusOr> MakeFakeLiteral(const Shape& shape); - // Generates fake data of the given shape on the device or dies. The fake data // is created by performing a computation on the device rather than transferring // data from the host to the device. diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index fda791401d..0cb2223ae5 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -569,9 +569,17 @@ int64 Literal::LinearIndex( return IndexUtil::MultidimensionalIndexToLinearIndex(shape(), multi_index); } -string Literal::ToString() const { +string Literal::ToString(bool print_layout) const { std::vector pieces; + auto shape_to_string = [print_layout](const Shape& shape) { + if (print_layout) { + return ShapeUtil::HumanStringWithLayout(shape); + } else { + return ShapeUtil::HumanString(shape); + } + }; + auto element_to_string = [this](tensorflow::gtl::ArraySlice indices) -> string { PrimitiveType element_type = shape().element_type(); @@ -585,7 +593,7 @@ string Literal::ToString() const { // TODO(b/32894291): refactor this code to reduce code duplication. if (ShapeUtil::IsTuple(shape())) { - pieces.push_back(ShapeUtil::HumanString(shape())); + pieces.push_back(shape_to_string(shape())); pieces.push_back(" (\n"); pieces.push_back(tensorflow::str_util::Join( tuple_literals(), ",\n", [](string* out, const Literal& element) { @@ -601,7 +609,7 @@ string Literal::ToString() const { } pieces.push_back("}"); } else if (ShapeUtil::Rank(shape()) == 2) { - pieces.push_back(ShapeUtil::HumanString(shape())); + pieces.push_back(shape_to_string(shape())); pieces.push_back(" {\n"); for (int64 i0 = 0; i0 < shape().dimensions(0); ++i0) { pieces.push_back(" { "); @@ -613,7 +621,7 @@ string Literal::ToString() const { } pieces.push_back("}"); } else if (ShapeUtil::Rank(shape()) == 3) { - pieces.push_back(ShapeUtil::HumanString(shape())); + pieces.push_back(shape_to_string(shape())); pieces.push_back(" {\n"); for (int64 i0 = 0; i0 < shape().dimensions(0); ++i0) { pieces.push_back(i0 > 0 ? ",\n{" : "{"); @@ -628,7 +636,7 @@ string Literal::ToString() const { } pieces.push_back("\n}"); } else if (ShapeUtil::Rank(shape()) == 4) { - pieces.push_back(ShapeUtil::HumanString(shape())); + pieces.push_back(shape_to_string(shape())); pieces.push_back(" {\n"); for (int64 i0 = 0; i0 < shape().dimensions(0); ++i0) { pieces.push_back(tensorflow::strings::Printf(" { /*i0=%lld*/\n", i0)); @@ -649,7 +657,7 @@ string Literal::ToString() const { } pieces.push_back("}"); } else if (ShapeUtil::Rank(shape()) == 5) { - pieces.push_back(ShapeUtil::HumanString(shape())); + pieces.push_back(shape_to_string(shape())); pieces.push_back(" {\n"); for (int64 i0 = 0; i0 < shape().dimensions(0); ++i0) { pieces.push_back(tensorflow::strings::Printf(" { /*i0=%lld*/\n", i0)); @@ -676,7 +684,7 @@ string Literal::ToString() const { } pieces.push_back("}"); } else { - pieces.push_back(ShapeUtil::HumanString(shape())); + pieces.push_back(shape_to_string(shape())); pieces.push_back(" {...}"); } diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h index a1e288829f..667f926c46 100644 --- a/tensorflow/compiler/xla/literal_util.h +++ b/tensorflow/compiler/xla/literal_util.h @@ -450,7 +450,7 @@ class Literal { tensorflow::Status ValidateLiteral() const; // Returns a string representation of the literal value. - string ToString() const; + string ToString(bool print_layout = false) const; // Invokes the "per cell" callback for each element in the provided // literal with the element's indices and a string representation of diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index c6f6c6c38b..7cf24641b5 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1780,7 +1780,6 @@ tf_cc_test( "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:test_utils", - "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:lib", ], ) @@ -1851,7 +1850,6 @@ tf_cc_test( "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:test_utils", - "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xla/service/hlo_cse_test.cc b/tensorflow/compiler/xla/service/hlo_cse_test.cc index 7c4626e78a..3601a790c4 100644 --- a/tensorflow/compiler/xla/service/hlo_cse_test.cc +++ b/tensorflow/compiler/xla/service/hlo_cse_test.cc @@ -79,12 +79,12 @@ TEST_F(HloCseTest, CombineTwoConstantsDifferentLayoutsAndInsensitive) { // Test that two identical constants with different layouts are commoned if // the pass is not layout sensitive. auto builder = HloComputation::Builder(TestName()); - auto constant1 = builder.AddInstruction(HloInstruction::CreateConstant( - test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, - /*minor_to_major=*/{0, 1}))); - auto constant2 = builder.AddInstruction(HloInstruction::CreateConstant( - test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, - /*minor_to_major=*/{1, 0}))); + auto constant1 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2WithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({0, 1})))); + auto constant2 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2WithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({1, 0})))); auto add = builder.AddInstruction(HloInstruction::CreateBinary( constant1->shape(), HloOpcode::kAdd, constant1, constant2)); @@ -111,12 +111,12 @@ TEST_F(HloCseTest, CombineTwoConstantsDifferentLayoutsAndSensitive) { // Test that two identical constants with different layouts are *not* commoned // if the pass is layout sensitive. auto builder = HloComputation::Builder(TestName()); - auto constant1 = builder.AddInstruction(HloInstruction::CreateConstant( - test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, - /*minor_to_major=*/{0, 1}))); - auto constant2 = builder.AddInstruction(HloInstruction::CreateConstant( - test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, - /*minor_to_major=*/{1, 0}))); + auto constant1 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2WithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({0, 1})))); + auto constant2 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2WithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({1, 0})))); auto add = builder.AddInstruction(HloInstruction::CreateBinary( constant1->shape(), HloOpcode::kAdd, constant1, constant2)); diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc index c39ff52230..d51c0d1dfb 100644 --- a/tensorflow/compiler/xla/service/layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc @@ -131,10 +131,10 @@ TEST_F(LayoutAssignmentTest, FusionInstruction) { std::vector> minor_to_majors = {{0, 1}, {1, 0}}; for (auto& minor_to_major : minor_to_majors) { auto builder = HloComputation::Builder(TestName()); - auto constant_literal1 = test_utils::CreateR2LiteralWithLayout( - {{1.0, 2.0}, {3.0, 4.0}}, minor_to_major); - auto constant_literal2 = test_utils::CreateR2LiteralWithLayout( - {{5.0, 6.0}, {7.0, 8.0}}, minor_to_major); + auto constant_literal1 = Literal::CreateR2WithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout(minor_to_major)); + auto constant_literal2 = Literal::CreateR2WithLayout( + {{5.0, 6.0}, {7.0, 8.0}}, LayoutUtil::MakeLayout(minor_to_major)); Shape ashape = constant_literal1->shape(); auto constant1 = builder.AddInstruction( @@ -181,12 +181,12 @@ TEST_F(LayoutAssignmentTest, TupleLayout) { // Verify the layouts of a tuple are assigned properly (the element layouts // match their source). auto builder = HloComputation::Builder(TestName()); - auto constant0 = builder.AddInstruction(HloInstruction::CreateConstant( - test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, - {0, 1}))); - auto constant1 = builder.AddInstruction(HloInstruction::CreateConstant( - test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, - {1, 0}))); + auto constant0 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2WithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({0, 1})))); + auto constant1 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2WithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({1, 0})))); auto tuple = builder.AddInstruction( HloInstruction::CreateTuple({constant0, constant1})); @@ -218,12 +218,12 @@ TEST_F(LayoutAssignmentTest, TupleLayout) { TEST_F(LayoutAssignmentTest, TupleSelect) { // Verify layouts of a select with tuple operands is assigned properly. auto builder = HloComputation::Builder(TestName()); - auto constant0 = builder.AddInstruction(HloInstruction::CreateConstant( - test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, - {0, 1}))); - auto constant1 = builder.AddInstruction(HloInstruction::CreateConstant( - test_utils::CreateR2LiteralWithLayout({{1.0, 2.0}, {3.0, 4.0}}, - {1, 0}))); + auto constant0 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2WithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({0, 1})))); + auto constant1 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2WithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({1, 0})))); auto tuple0 = builder.AddInstruction( HloInstruction::CreateTuple({constant0, constant1})); auto tuple1 = builder.AddInstruction( diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 4e1be24b61..2333a30ad5 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -61,13 +61,14 @@ generate_backend_test_macros() cc_library( name = "test_utils", - testonly = True, + srcs = ["test_utils.cc"], hdrs = ["test_utils.h"], deps = [ "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:hlo", "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h index 7cfc276ec1..2c37466ff2 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.h +++ b/tensorflow/compiler/xla/tests/client_library_test_base.h @@ -469,8 +469,7 @@ template std::vector ClientLibraryTestBase::CreatePseudorandomR1( const int width, NativeT min_value, NativeT max_value, uint32 seed) { std::vector result(width); - test_utils::PseudorandomGenerator generator(min_value, max_value, - seed); + PseudorandomGenerator generator(min_value, max_value, seed); for (int i = 0; i < width; ++i) { result[i] = generator.get(); } @@ -482,8 +481,7 @@ std::unique_ptr> ClientLibraryTestBase::CreatePseudorandomR2( const int rows, const int cols, NativeT min_value, NativeT max_value, uint32 seed) { auto result = MakeUnique>(rows, cols); - test_utils::PseudorandomGenerator generator(min_value, max_value, - seed); + PseudorandomGenerator generator(min_value, max_value, seed); for (int y = 0; y < rows; ++y) { for (int x = 0; x < cols; ++x) { (*result)(y, x) = generator.get(); diff --git a/tensorflow/compiler/xla/tests/client_test.cc b/tensorflow/compiler/xla/tests/client_test.cc index 0853feeebd..183bcf1dd3 100644 --- a/tensorflow/compiler/xla/tests/client_test.cc +++ b/tensorflow/compiler/xla/tests/client_test.cc @@ -54,8 +54,8 @@ TEST_F(ClientTest, ExecuteWithLayout) { .ConsumeValueOrDie(); std::unique_ptr expected_literal = - test_utils::CreateR2LiteralWithLayout({{11, 22}, {33, 44}}, - transfer_layout); + Literal::CreateR2WithLayout( + {{11, 22}, {33, 44}}, LayoutUtil::MakeLayout(transfer_layout)); auto computed = client_->Transfer(*data, &expected_literal->shape()); diff --git a/tensorflow/compiler/xla/tests/compilation_cache_test.cc b/tensorflow/compiler/xla/tests/compilation_cache_test.cc index 707e439245..0f780fa87e 100644 --- a/tensorflow/compiler/xla/tests/compilation_cache_test.cc +++ b/tensorflow/compiler/xla/tests/compilation_cache_test.cc @@ -138,13 +138,13 @@ XLA_TEST_F(CompilationCacheTest, DifferentParameterLayouts) { // layouts. Use these arrays as parameters to a simple computation. If the // layout of the array changes then computation should be recompiled (cache // miss). - auto rowmaj_array = test_utils::CreateR2LiteralWithLayout( - {{1.0f, 2.0f}, {3.0f, 4.0f}}, /*minor_to_major=*/{1, 0}); + auto rowmaj_array = Literal::CreateR2WithLayout( + {{1.0f, 2.0f}, {3.0f, 4.0f}}, LayoutUtil::MakeLayout({1, 0})); auto rowmaj_handle = client_->TransferToServer(*rowmaj_array).ConsumeValueOrDie(); - auto colmaj_array = test_utils::CreateR2LiteralWithLayout( - {{1.0f, 2.0f}, {3.0f, 4.0f}}, /*minor_to_major=*/{0, 1}); + auto colmaj_array = Literal::CreateR2WithLayout( + {{1.0f, 2.0f}, {3.0f, 4.0f}}, LayoutUtil::MakeLayout({0, 1})); auto colmaj_handle = client_->TransferToServer(*colmaj_array).ConsumeValueOrDie(); diff --git a/tensorflow/compiler/xla/tests/compute_constant_test.cc b/tensorflow/compiler/xla/tests/compute_constant_test.cc index d423c78476..5226a78386 100644 --- a/tensorflow/compiler/xla/tests/compute_constant_test.cc +++ b/tensorflow/compiler/xla/tests/compute_constant_test.cc @@ -264,8 +264,8 @@ XLA_TEST_F(ComputeConstantTest, Layout) { ASSERT_TRUE(computed.ok()) << computed.status(); std::unique_ptr expected_literal = - test_utils::CreateR2LiteralWithLayout({{11, 22}, {33, 44}}, - layout); + Literal::CreateR2WithLayout({{11, 22}, {33, 44}}, + LayoutUtil::MakeLayout(layout)); LiteralTestUtil::AssertEqualShapesAndLayouts( expected_literal->shape(), computed.ValueOrDie()->shape()); LiteralTestUtil::ExpectEqual(*expected_literal, *computed.ValueOrDie()); diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index c4e422b506..b72dd2707c 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -177,15 +177,15 @@ void DotOperationTest::TestSquareMatrixDot(bool lhs_row_major, bool rhs_row_major) { auto lhs_handle = client_ - ->TransferToServer(*test_utils::CreateR2LiteralWithLayout( + ->TransferToServer(*Literal::CreateR2WithLayout( {{1.0, 2.0}, {3.0, -4.0}}, - MinorToMajorForIsRowMajor(lhs_row_major))) + LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(lhs_row_major)))) .ConsumeValueOrDie(); auto rhs_handle = client_ - ->TransferToServer(*test_utils::CreateR2LiteralWithLayout( + ->TransferToServer(*Literal::CreateR2WithLayout( {{1.0, 6.0}, {7.0, -4.0}}, - MinorToMajorForIsRowMajor(rhs_row_major))) + LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(rhs_row_major)))) .ConsumeValueOrDie(); ComputationBuilder builder(client_, TestName()); @@ -362,15 +362,15 @@ void DotOperationTest::TestNonsquareMatrixDot(bool lhs_row_major, bool rhs_row_major) { auto lhs_handle = client_ - ->TransferToServer(*test_utils::CreateR2LiteralWithLayout( + ->TransferToServer(*Literal::CreateR2WithLayout( {{1.0, 2.0, 3.0}, {3.0, -4.0, -1.0}}, - MinorToMajorForIsRowMajor(lhs_row_major))) + LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(lhs_row_major)))) .ConsumeValueOrDie(); auto rhs_handle = client_ - ->TransferToServer(*test_utils::CreateR2LiteralWithLayout( + ->TransferToServer(*Literal::CreateR2WithLayout( {{1.0, 6.0}, {2.0, 3.0}, {7.0, -4.0}}, - MinorToMajorForIsRowMajor(rhs_row_major))) + LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(rhs_row_major)))) .ConsumeValueOrDie(); ComputationBuilder builder(client_, TestName()); @@ -420,13 +420,14 @@ XLA_TEST_F(DotOperationTest, NonsquareMatrixDotC64) { XLA_TEST_F(DotOperationTest, MatrixVectorC64) { auto lhs_handle = client_ - ->TransferToServer(*test_utils::CreateR2LiteralWithLayout( - {{1.0, 2.0, 3.0, -4.0}}, {1, 0})) + ->TransferToServer(*Literal::CreateR2WithLayout( + {{1.0, 2.0, 3.0, -4.0}}, LayoutUtil::MakeLayout({1, 0}))) .ConsumeValueOrDie(); auto rhs_handle = client_ - ->TransferToServer(*test_utils::CreateR2LiteralWithLayout( - {{1.0, 1.0}, {2.0, 2.0}, {3.0, 3.0}, {-4.0, 4.0}}, {1, 0})) + ->TransferToServer(*Literal::CreateR2WithLayout( + {{1.0, 1.0}, {2.0, 2.0}, {3.0, 3.0}, {-4.0, 4.0}}, + LayoutUtil::MakeLayout({1, 0}))) .ConsumeValueOrDie(); ComputationBuilder builder(client_, TestName()); diff --git a/tensorflow/compiler/xla/tests/local_client_execute_test.cc b/tensorflow/compiler/xla/tests/local_client_execute_test.cc index 329b53012f..a196e250d1 100644 --- a/tensorflow/compiler/xla/tests/local_client_execute_test.cc +++ b/tensorflow/compiler/xla/tests/local_client_execute_test.cc @@ -136,16 +136,14 @@ XLA_TEST_F(LocalClientExecuteTest, AddArraysWithDifferentInputLayouts) { auto computation = builder.Build().ConsumeValueOrDie(); // Create x as a col-major array. - auto x_array = LiteralToShapedBuffer( - *test_utils::CreateR2LiteralWithLayout({{1.0f, 2.0f}, {3.0f, 4.0f}}, - /*minor_to_major=*/{0, 1})); + auto x_array = LiteralToShapedBuffer(*Literal::CreateR2WithLayout( + {{1.0f, 2.0f}, {3.0f, 4.0f}}, LayoutUtil::MakeLayout({0, 1}))); EXPECT_TRUE(LayoutUtil::Equal(x_array->shape().layout(), LayoutUtil::MakeLayout({0, 1}))); // Create y as a row-major array. - auto y_array = LiteralToShapedBuffer( - *test_utils::CreateR2LiteralWithLayout({{10.0f, 20.0f}, {30.0f, 40.0f}}, - /*minor_to_major=*/{1, 0})); + auto y_array = LiteralToShapedBuffer(*Literal::CreateR2WithLayout( + {{10.0f, 20.0f}, {30.0f, 40.0f}}, LayoutUtil::MakeLayout({1, 0}))); EXPECT_TRUE(LayoutUtil::Equal(y_array->shape().layout(), LayoutUtil::MakeLayout({1, 0}))); diff --git a/tensorflow/compiler/xla/tests/map_test.cc b/tensorflow/compiler/xla/tests/map_test.cc index 2ef392508d..2b0f7e6e80 100644 --- a/tensorflow/compiler/xla/tests/map_test.cc +++ b/tensorflow/compiler/xla/tests/map_test.cc @@ -405,13 +405,13 @@ TEST_F(MapTest, MapBinaryAdder) { // for Map that used to fail in shape inference (b/28989438). XLA_TEST_F(MapTest, AddWithMixedLayouts) { ComputationBuilder builder(client_, TestName()); - std::unique_ptr param0_literal = - test_utils::CreateR2LiteralWithLayout({{1, 2}, {3, 4}}, {1, 0}); + std::unique_ptr param0_literal = Literal::CreateR2WithLayout( + {{1, 2}, {3, 4}}, LayoutUtil::MakeLayout({1, 0})); std::unique_ptr param0_data = client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); - std::unique_ptr param1_literal = - test_utils::CreateR2LiteralWithLayout({{10, 20}, {30, 40}}, {0, 1}); + std::unique_ptr param1_literal = Literal::CreateR2WithLayout( + {{10, 20}, {30, 40}}, LayoutUtil::MakeLayout({0, 1})); std::unique_ptr param1_data = client_->TransferToServer(*param1_literal).ConsumeValueOrDie(); diff --git a/tensorflow/compiler/xla/tests/test_utils.cc b/tensorflow/compiler/xla/tests/test_utils.cc new file mode 100644 index 0000000000..cdd3d66bbb --- /dev/null +++ b/tensorflow/compiler/xla/tests/test_utils.cc @@ -0,0 +1,120 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/tests/test_utils.h" + +#include "tensorflow/compiler/xla/primitive_util.h" + +namespace xla { + +namespace { + +template +void PopulateWithRandomFloatingPointData(Literal* literal) { + CHECK_EQ(literal->shape().element_type(), + primitive_util::NativeToPrimitiveType()); + std::minstd_rand0 engine; + std::uniform_real_distribution generator(0.0f, 1.0f); + TF_CHECK_OK(literal->Populate( + [&](tensorflow::gtl::ArraySlice /*indices*/) { + return generator(engine); + })); +} + +template +void PopulateWithRandomIntegralData(Literal* literal) { + CHECK_EQ(literal->shape().element_type(), + primitive_util::NativeToPrimitiveType()); + std::minstd_rand0 engine; + std::uniform_int_distribution generator( + std::numeric_limits::lowest(), std::numeric_limits::max()); + TF_CHECK_OK(literal->Populate( + [&](tensorflow::gtl::ArraySlice /*indices*/) { + return generator(engine); + })); +} + +} // namespace + +StatusOr> MakeFakeLiteral(const Shape& shape) { + if (ShapeUtil::IsTuple(shape)) { + std::vector> elements; + for (const Shape& element_shape : shape.tuple_shapes()) { + TF_ASSIGN_OR_RETURN(std::unique_ptr element, + MakeFakeLiteral(element_shape)); + elements.push_back(std::move(element)); + } + return Literal::MakeTupleOwned(std::move(elements)); + } + std::unique_ptr literal = Literal::CreateFromShape(shape); + switch (shape.element_type()) { + case F32: + PopulateWithRandomFloatingPointData(literal.get()); + break; + case F64: + PopulateWithRandomFloatingPointData(literal.get()); + break; + case S8: + PopulateWithRandomIntegralData(literal.get()); + break; + case U8: + PopulateWithRandomIntegralData(literal.get()); + break; + case S16: + PopulateWithRandomIntegralData(literal.get()); + break; + case U16: + PopulateWithRandomIntegralData(literal.get()); + break; + case S32: + PopulateWithRandomIntegralData(literal.get()); + break; + case U32: + PopulateWithRandomIntegralData(literal.get()); + break; + case S64: + PopulateWithRandomIntegralData(literal.get()); + break; + case U64: + PopulateWithRandomIntegralData(literal.get()); + break; + case PRED: { + std::uniform_int_distribution generator(0, 1); + std::minstd_rand0 engine; + TF_CHECK_OK(literal->Populate( + [&](tensorflow::gtl::ArraySlice /*indices*/) { + return generator(engine); + })); + break; + } + default: + return Unimplemented("Unsupported type for fake literal generation: %s", + ShapeUtil::HumanString(shape).c_str()); + } + return std::move(literal); +} + +StatusOr>> MakeFakeArguments( + const HloModule& module) { + std::vector> arguments; + for (const ShapeLayout& shape_layout : + module.config().entry_computation_layout().parameter_layouts()) { + TF_ASSIGN_OR_RETURN(auto literal, MakeFakeLiteral(shape_layout.shape())); + arguments.push_back(std::move(literal)); + } + return std::move(arguments); +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/tests/test_utils.h b/tensorflow/compiler/xla/tests/test_utils.h index f3a522b05e..12d5255fce 100644 --- a/tensorflow/compiler/xla/tests/test_utils.h +++ b/tensorflow/compiler/xla/tests/test_utils.h @@ -23,12 +23,12 @@ limitations under the License. #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/platform/types.h" namespace xla { -namespace test_utils { // A class which generates pseudorandom numbers of a given type within a given // range. Not cryptographically secure and likely not perfectly evenly @@ -53,63 +53,15 @@ class PseudorandomGenerator { std::mt19937 generator_; }; -// Convenience function for creating a rank-2 array with arbitrary layout. -template -std::unique_ptr CreateR2LiteralWithLayout( - std::initializer_list> values, - tensorflow::gtl::ArraySlice minor_to_major) { - auto literal = MakeUnique(); - const int64 d0 = values.size(); - const int64 d1 = values.begin()->size(); - literal.get()->PopulateWithValue(0, {d0, d1}); - *literal->mutable_shape()->mutable_layout() = - LayoutUtil::MakeLayout(minor_to_major); - TF_CHECK_OK(ShapeUtil::ValidateShape(literal->shape())); - - int64 dim0 = 0; - for (auto inner_list : values) { - int64 dim1 = 0; - for (auto value : inner_list) { - literal.get()->Set({dim0, dim1}, value); - ++dim1; - } - ++dim0; - } - return literal; -} +// Generates fake data in a literal of the given shape, or returns an error +// status if the element type is currently unhandled for fake data generation. +StatusOr> MakeFakeLiteral(const Shape& shape); -// Convenience function for creating a rank-3 array with arbitrary layout. -template -std::unique_ptr CreateR3LiteralWithLayout( - std::initializer_list>> - values, - tensorflow::gtl::ArraySlice minor_to_major) { - auto literal = MakeUnique(); - const int64 d0 = values.size(); - const int64 d1 = values.begin()->size(); - const int64 d2 = values.begin()->begin()->size(); - literal.get()->PopulateWithValue(0, {d0, d1, d2}); - *literal->mutable_shape()->mutable_layout() = - LayoutUtil::MakeLayout(minor_to_major); - TF_CHECK_OK(ShapeUtil::ValidateShape(literal->shape())); - - int64 dim0 = 0; - for (auto inner_list : values) { - int64 dim1 = 0; - for (auto inner_inner_list : inner_list) { - int64 dim2 = 0; - for (auto value : inner_inner_list) { - literal.get()->Set({dim0, dim1, dim2}, value); - ++dim2; - } - ++dim1; - } - ++dim0; - } - return literal; -} +// Generates a vector of arguments containing fake data. The number, shape and +// layout of the arguments is appropriate for given HLO module. +StatusOr>> MakeFakeArguments( + const HloModule& module); -} // namespace test_utils } // namespace xla #endif // TENSORFLOW_COMPILER_XLA_TESTS_TEST_UTILS_H_ diff --git a/tensorflow/compiler/xla/tools/BUILD b/tensorflow/compiler/xla/tools/BUILD index 759921dce5..091fa0c3ec 100644 --- a/tensorflow/compiler/xla/tools/BUILD +++ b/tensorflow/compiler/xla/tools/BUILD @@ -88,6 +88,7 @@ cc_library( "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/lib:testing", "//tensorflow/compiler/xla/service:session_proto", + "//tensorflow/compiler/xla/tests:test_utils", "//tensorflow/core:framework_internal", "//tensorflow/core:lib", ], diff --git a/tensorflow/compiler/xla/tools/replay_computation.cc b/tensorflow/compiler/xla/tools/replay_computation.cc index 89b26b8916..503e7d456e 100644 --- a/tensorflow/compiler/xla/tools/replay_computation.cc +++ b/tensorflow/compiler/xla/tools/replay_computation.cc @@ -45,6 +45,7 @@ limitations under the License. #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/test_utils.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/threadpool.h" -- GitLab From f28ae398cc5b875b936ad6e5cd4d280928c38409 Mon Sep 17 00:00:00 2001 From: Olivia Nordquist Date: Wed, 8 Nov 2017 15:57:27 -0800 Subject: [PATCH 0207/1801] allows tf.Print to print empty data list and changes a noop test in function_test.py to verify that it doesn't raise a ValueError as an empty list would have previously PiperOrigin-RevId: 175079527 --- tensorflow/core/ops/logging_ops.cc | 2 +- tensorflow/python/framework/function_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/ops/logging_ops.cc b/tensorflow/core/ops/logging_ops.cc index 11cb9861a3..e6995821df 100644 --- a/tensorflow/core/ops/logging_ops.cc +++ b/tensorflow/core/ops/logging_ops.cc @@ -43,7 +43,7 @@ REGISTER_OP("Print") .Output("output: T") .SetIsStateful() .Attr("T: type") - .Attr("U: list(type)") + .Attr("U: list(type) >= 0") .Attr("message: string = ''") .Attr("first_n: int = -1") .Attr("summarize: int = 3") diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py index 36b0737cfc..ba43e9199b 100644 --- a/tensorflow/python/framework/function_test.py +++ b/tensorflow/python/framework/function_test.py @@ -370,7 +370,7 @@ class FunctionTest(test.TestCase): @function.Defun(dtypes.float32) def Foo(x): - y = logging_ops.Print(x, [x], "Hello") + y = logging_ops.Print(x, [], "Hello") with ops.control_dependencies([y]): z = control_flow_ops.no_op() with ops.control_dependencies([z]): -- GitLab From 0a31b36d68a0f8423279a4e94481fa7b91a15f10 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 15:59:24 -0800 Subject: [PATCH 0208/1801] Add Baseline Estimators to core TensorFlow PiperOrigin-RevId: 175079784 --- tensorflow/python/estimator/BUILD | 65 + .../python/estimator/canned/baseline.py | 349 ++++ .../python/estimator/canned/baseline_test.py | 1545 +++++++++++++++++ tensorflow/python/estimator/estimator_lib.py | 4 + ...rflow.estimator.-baseline-classifier.pbtxt | 54 + ...orflow.estimator.-baseline-regressor.pbtxt | 54 + .../api/golden/tensorflow.estimator.pbtxt | 8 + 7 files changed, 2079 insertions(+) create mode 100644 tensorflow/python/estimator/canned/baseline.py create mode 100644 tensorflow/python/estimator/canned/baseline_test.py create mode 100644 tensorflow/tools/api/golden/tensorflow.estimator.-baseline-classifier.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.estimator.-baseline-regressor.pbtxt diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index 26f1fd888a..dba7761700 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -25,6 +25,7 @@ py_library( srcs = ["estimator_lib.py"], srcs_version = "PY2AND3", deps = [ + ":baseline", ":dnn", ":dnn_linear_combined", ":estimator", @@ -186,6 +187,70 @@ py_test( ], ) +py_library( + name = "baseline", + srcs = ["canned/baseline.py"], + srcs_version = "PY2AND3", + deps = [ + ":estimator", + ":head", + ":model_fn", + ":optimizers", + "//tensorflow/python:init_ops", + "//tensorflow/python:layers", + "//tensorflow/python:nn", + "//tensorflow/python:partitioned_variables", + "//tensorflow/python:summary", + "//tensorflow/python:training", + "//tensorflow/python:variable_scope", + "//tensorflow/python/feature_column", + "@six_archive//:six", + ], +) + +py_test( + name = "baseline_test", + size = "medium", + srcs = ["canned/baseline_test.py"], + srcs_version = "PY2AND3", + tags = [ + "no_pip", + "notsan", # b/67510291 + ], + deps = [ + ":baseline", + ":estimator", + ":export_export", + ":metric_keys", + ":numpy_io", + ":pandas_io", + ":run_config", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:check_ops", + "//tensorflow/python:client", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:data_flow_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:parsing_ops", + "//tensorflow/python:platform", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python:state_ops", + "//tensorflow/python:summary", + "//tensorflow/python:training", + "//tensorflow/python:variable_scope", + "//tensorflow/python:variables", + "//tensorflow/python/feature_column", + "//third_party/py/numpy", + "//third_party/py/pandas", + "@six_archive//:six", + ], +) + py_library( name = "dnn", srcs = ["canned/dnn.py"], diff --git a/tensorflow/python/estimator/canned/baseline.py b/tensorflow/python/estimator/canned/baseline.py new file mode 100644 index 0000000000..96e4ecd29f --- /dev/null +++ b/tensorflow/python/estimator/canned/baseline.py @@ -0,0 +1,349 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Baseline estimators. + +Baseline estimators are bias-only estimators that can be used for debugging +and as simple baselines. + +Example: + +``` +# Build BaselineClassifier +classifier = BaselineClassifier(n_classes=3) + +# Input builders +def input_fn_train: # returns x, y (where y represents label's class index). + pass + +def input_fn_eval: # returns x, y (where y represents label's class index). + pass + +# Fit model. +classifier.train(input_fn=input_fn_train) + +# Evaluate cross entropy between the test and train labels. +loss = classifier.evaluate(input_fn=input_fn_eval)["loss"] + +# predict outputs the probability distribution of the classes as seen in +# training. +predictions = classifier.predict(new_samples) +``` +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six + +from tensorflow.python.estimator import estimator +from tensorflow.python.estimator.canned import head as head_lib +from tensorflow.python.estimator.canned import optimizers +from tensorflow.python.feature_column import feature_column as feature_column_lib +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.training import training_util + +# The default learning rate of 0.3 is a historical artifact of the initial +# implementation, but seems a reasonable choice. +_LEARNING_RATE = 0.3 + + +def _get_weight_column_key(weight_column): + if weight_column is None: + return None + if isinstance(weight_column, six.string_types): + return weight_column + if not isinstance(weight_column, feature_column_lib._NumericColumn): # pylint: disable=protected-access + raise TypeError('Weight column must be either a string or _NumericColumn.' + ' Given type: {}.'.format(type(weight_column))) + return weight_column.key() + + +def _baseline_logit_fn_builder(num_outputs, weight_column=None): + """Function builder for a baseline logit_fn. + + Args: + num_outputs: Number of outputs for the model. + weight_column: A string or a `_NumericColumn` created by + `tf.feature_column.numeric_column` defining feature column representing + weights. It will be multiplied by the loss of the example. + Returns: + A logit_fn (see below). + """ + + def baseline_logit_fn(features): + """Baseline model logit_fn. + + The baseline model simply learns a bias, so the output logits are a + `Variable` with one weight for each output that learns the bias for the + corresponding output. + + Args: + features: The first item returned from the `input_fn` passed to `train`, + `evaluate`, and `predict`. This should be a single `Tensor` or dict with + `Tensor` values. + Returns: + A `Tensor` representing the logits. + """ + size_checks = [] + batch_size = None + + weight_column_key = _get_weight_column_key(weight_column) + + # The first dimension is assumed to be a batch size and must be consistent + # among all of the features. + for key, feature in features.items(): + # Skip weight_column to ensure we don't add size checks to it. + # These would introduce a dependency on the weight at serving time. + if key == weight_column_key: + continue + first_dim = array_ops.shape(feature)[0] + if batch_size is None: + batch_size = first_dim + else: + size_checks.append(check_ops.assert_equal(batch_size, first_dim)) + + with ops.control_dependencies(size_checks): + with variable_scope.variable_scope('baseline'): + bias = variable_scope.get_variable('bias', shape=[num_outputs], + initializer=init_ops.Zeros) + return math_ops.multiply(bias, array_ops.ones([batch_size, + num_outputs])) + + return baseline_logit_fn + + +def _baseline_model_fn(features, labels, mode, head, optimizer, + weight_column=None, config=None): + """Model_fn for baseline models. + + Args: + features: `Tensor` or dict of `Tensor` (depends on data passed to `train`). + labels: `Tensor` of labels that are compatible with the `Head` instance. + mode: Defines whether this is training, evaluation or prediction. + See `ModeKeys`. + head: A `Head` instance. + optimizer: String, `tf.Optimizer` object, or callable that creates the + optimizer to use for training. If not specified, will use `FtrlOptimizer` + with a default learning rate of 0.3. + weight_column: A string or a `_NumericColumn` created by + `tf.feature_column.numeric_column` defining feature column representing + weights. It will be multiplied by the loss of the example. + config: `RunConfig` object to configure the runtime settings. + + Raises: + KeyError: If weight column is specified but not present. + ValueError: If features is an empty dictionary. + + Returns: + An `EstimatorSpec` instance. + """ + del config # Unused. + + logit_fn = _baseline_logit_fn_builder(head.logits_dimension, weight_column) + logits = logit_fn(features) + + def train_op_fn(loss): + opt = optimizers.get_optimizer_instance( + optimizer, learning_rate=_LEARNING_RATE) + return opt.minimize(loss, global_step=training_util.get_global_step()) + + return head.create_estimator_spec( + features=features, + mode=mode, + logits=logits, + labels=labels, + train_op_fn=train_op_fn) + + +class BaselineClassifier(estimator.Estimator): + """A classifier that can establish a simple baseline. + + This classifier ignores feature values and will learn to predict the average + value of each label. For single-label problems, this will predict the + probability distribution of the classes as seen in the labels. For multi-label + problems, this will predict the fraction of examples that are positive for + each class. + + Example: + + ```python + + # Build BaselineClassifier + classifier = BaselineClassifier(n_classes=3) + + # Input builders + def input_fn_train: # returns x, y (where y represents label's class index). + pass + + def input_fn_eval: # returns x, y (where y represents label's class index). + pass + + # Fit model. + classifier.train(input_fn=input_fn_train) + + # Evaluate cross entropy between the test and train labels. + loss = classifier.evaluate(input_fn=input_fn_eval)["loss"] + + # predict outputs the probability distribution of the classes as seen in + # training. + predictions = classifier.predict(new_samples) + + ``` + + Input of `train` and `evaluate` should have following features, + otherwise there will be a `KeyError`: + + * if `weight_column` is not `None`, a feature with + `key=weight_column` whose value is a `Tensor`. + """ + + def __init__(self, + model_dir=None, + n_classes=2, + weight_column=None, + label_vocabulary=None, + optimizer='Ftrl', + config=None): + """Initializes a BaselineClassifier instance. + + Args: + model_dir: Directory to save model parameters, graph and etc. This can + also be used to load checkpoints from the directory into a estimator to + continue training a previously saved model. + n_classes: number of label classes. Default is binary classification. + It must be greater than 1. Note: Class labels are integers representing + the class index (i.e. values from 0 to n_classes-1). For arbitrary + label values (e.g. string labels), convert to class indices first. + weight_column: A string or a `_NumericColumn` created by + `tf.feature_column.numeric_column` defining feature column representing + weights. It will be multiplied by the loss of the example. + label_vocabulary: Optional list of strings with size `[n_classes]` + defining the label vocabulary. Only supported for `n_classes` > 2. + optimizer: String, `tf.Optimizer` object, or callable that creates the + optimizer to use for training. If not specified, will use + `FtrlOptimizer` with a default learning rate of 0.3. + config: `RunConfig` object to configure the runtime settings. + Returns: + A `BaselineClassifier` estimator. + + Raises: + ValueError: If `n_classes` < 2. + """ + if n_classes == 2: + head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( # pylint: disable=protected-access + weight_column=weight_column, + label_vocabulary=label_vocabulary) + else: + head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( # pylint: disable=protected-access + n_classes, weight_column=weight_column, + label_vocabulary=label_vocabulary) + def _model_fn(features, labels, mode, config): + return _baseline_model_fn( + features=features, + labels=labels, + mode=mode, + head=head, + optimizer=optimizer, + weight_column=weight_column, + config=config) + super(BaselineClassifier, self).__init__( + model_fn=_model_fn, + model_dir=model_dir, + config=config) + + +class BaselineRegressor(estimator.Estimator): + """A regressor that can establish a simple baseline. + + This regressor ignores feature values and will learn to predict the average + value of each label. + + Example: + + ```python + + # Build BaselineRegressor + regressor = BaselineRegressor() + + # Input builders + def input_fn_train: # returns x, y (where y is the label). + pass + + def input_fn_eval: # returns x, y (where y is the label). + pass + + # Fit model. + regressor.train(input_fn=input_fn_train) + + # Evaluate squared-loss between the test and train targets. + loss = regressor.evaluate(input_fn=input_fn_eval)["loss"] + + # predict outputs the mean value seen during training. + predictions = regressor.predict(new_samples) + ``` + + Input of `train` and `evaluate` should have following features, + otherwise there will be a `KeyError`: + + * if `weight_column` is not `None`, a feature with + `key=weight_column` whose value is a `Tensor`. + """ + + def __init__(self, + model_dir=None, + label_dimension=1, + weight_column=None, + optimizer='Ftrl', + config=None): + """Initializes a BaselineRegressor instance. + + Args: + model_dir: Directory to save model parameters, graph and etc. This can + also be used to load checkpoints from the directory into a estimator to + continue training a previously saved model. + label_dimension: Number of regression targets per example. This is the + size of the last dimension of the labels and logits `Tensor` objects + (typically, these have shape `[batch_size, label_dimension]`). + weight_column: A string or a `_NumericColumn` created by + `tf.feature_column.numeric_column` defining feature column representing + weights. It will be multiplied by the loss of the example. + optimizer: String, `tf.Optimizer` object, or callable that creates the + optimizer to use for training. If not specified, will use + `FtrlOptimizer` with a default learning rate of 0.3. + config: `RunConfig` object to configure the runtime settings. + Returns: + A `BaselineRegressor` estimator. + """ + + head = head_lib._regression_head_with_mean_squared_error_loss( # pylint: disable=protected-access + label_dimension=label_dimension, + weight_column=weight_column) + def _model_fn(features, labels, mode, config): + return _baseline_model_fn( + features=features, + labels=labels, + mode=mode, + head=head, + optimizer=optimizer, + config=config) + super(BaselineRegressor, self).__init__( + model_fn=_model_fn, + model_dir=model_dir, + config=config) diff --git a/tensorflow/python/estimator/canned/baseline_test.py b/tensorflow/python/estimator/canned/baseline_test.py new file mode 100644 index 0000000000..96639e88ea --- /dev/null +++ b/tensorflow/python/estimator/canned/baseline_test.py @@ -0,0 +1,1545 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for baseline.py.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import os +import shutil +import tempfile + +import numpy as np +import six + +from tensorflow.core.example import example_pb2 +from tensorflow.core.example import feature_pb2 +from tensorflow.python.client import session as tf_session +from tensorflow.python.estimator.canned import baseline +from tensorflow.python.estimator.canned import metric_keys +from tensorflow.python.estimator.export import export +from tensorflow.python.estimator.inputs import numpy_io +from tensorflow.python.estimator.inputs import pandas_io +from tensorflow.python.feature_column import feature_column as feature_column_lib +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import data_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import parsing_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables +from tensorflow.python.platform import gfile +from tensorflow.python.platform import test +from tensorflow.python.summary.writer import writer_cache +from tensorflow.python.training import checkpoint_utils +from tensorflow.python.training import input as input_lib +from tensorflow.python.training import optimizer +from tensorflow.python.training import queue_runner +from tensorflow.python.training import saver + + +try: + # pylint: disable=g-import-not-at-top + import pandas as pd + HAS_PANDAS = True +except IOError: + # Pandas writes a temporary file during import. If it fails, don't use pandas. + HAS_PANDAS = False +except ImportError: + HAS_PANDAS = False + +# pylint rules which are disabled by default for test files. +# pylint: disable=invalid-name,protected-access,missing-docstring + +# Names of variables created by model. +BIAS_NAME = 'baseline/bias' + + +def assert_close(expected, actual, rtol=1e-04, name='assert_close'): + with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope: + expected = ops.convert_to_tensor(expected, name='expected') + actual = ops.convert_to_tensor(actual, name='actual') + rdiff = math_ops.abs(expected - actual, 'diff') / math_ops.abs(expected) + rtol = ops.convert_to_tensor(rtol, name='rtol') + return check_ops.assert_less( + rdiff, + rtol, + data=('Condition expected =~ actual did not hold element-wise:' + 'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff, + 'rtol = ', rtol,), + name=scope) + + +def save_variables_to_ckpt(model_dir): + init_all_op = [variables.global_variables_initializer()] + with tf_session.Session() as sess: + sess.run(init_all_op) + saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt')) + + +def queue_parsed_features(feature_map): + tensors_to_enqueue = [] + keys = [] + for key, tensor in six.iteritems(feature_map): + keys.append(key) + tensors_to_enqueue.append(tensor) + queue_dtypes = [x.dtype for x in tensors_to_enqueue] + input_queue = data_flow_ops.FIFOQueue(capacity=100, dtypes=queue_dtypes) + queue_runner.add_queue_runner( + queue_runner.QueueRunner(input_queue, + [input_queue.enqueue(tensors_to_enqueue)])) + dequeued_tensors = input_queue.dequeue() + return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))} + + +def sorted_key_dict(unsorted_dict): + return {k: unsorted_dict[k] for k in sorted(unsorted_dict)} + + +def sigmoid(x): + return 1 / (1 + np.exp(-1.0 * x)) + + +def _baseline_regressor_fn(*args, **kwargs): + return baseline.BaselineRegressor(*args, **kwargs) + + +def _baseline_classifier_fn(*args, **kwargs): + return baseline.BaselineClassifier(*args, **kwargs) + + +# Tests for Baseline Regressor. + + +# TODO(b/36813849): Add tests with dynamic shape inputs using placeholders. +class BaselineRegressorEvaluationTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + writer_cache.FileWriterCache.clear() + shutil.rmtree(self._model_dir) + + def test_evaluation_for_simple_data(self): + with ops.Graph().as_default(): + variables.Variable([13.0], name=BIAS_NAME) + variables.Variable( + 100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir) + eval_metrics = baseline_regressor.evaluate( + input_fn=lambda: ({'age': ((1,),)}, ((10.,),)), steps=1) + + # Logit is bias = 13, while label is 10. Loss is 3**2 = 9. + self.assertDictEqual({ + metric_keys.MetricKeys.LOSS: 9., + metric_keys.MetricKeys.LOSS_MEAN: 9., + ops.GraphKeys.GLOBAL_STEP: 100 + }, eval_metrics) + + def test_evaluation_batch(self): + """Tests evaluation for batch_size==2.""" + with ops.Graph().as_default(): + variables.Variable([13.0], name=BIAS_NAME) + variables.Variable( + 100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir) + eval_metrics = baseline_regressor.evaluate( + input_fn=lambda: ({'age': ((1,), (1,))}, ((10.,), (10.,))), steps=1) + + # Logit is bias = 13, while label is 10. + # Loss per example is 3**2 = 9. + # Training loss is the sum over batch = 9 + 9 = 18 + # Average loss is the average over batch = 9 + self.assertDictEqual({ + metric_keys.MetricKeys.LOSS: 18., + metric_keys.MetricKeys.LOSS_MEAN: 9., + ops.GraphKeys.GLOBAL_STEP: 100 + }, eval_metrics) + + def test_evaluation_weights(self): + """Tests evaluation with weights.""" + with ops.Graph().as_default(): + variables.Variable([13.0], name=BIAS_NAME) + variables.Variable( + 100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + def _input_fn(): + features = {'age': ((1,), (1,)), 'weights': ((1.,), (2.,))} + labels = ((10.,), (10.,)) + return features, labels + + baseline_regressor = _baseline_regressor_fn( + weight_column='weights', + model_dir=self._model_dir) + eval_metrics = baseline_regressor.evaluate(input_fn=_input_fn, steps=1) + + # Logit is bias = 13, while label is 10. + # Loss per example is 3**2 = 9. + # Training loss is the weighted sum over batch = 9 + 2*9 = 27 + # average loss is the weighted average = 9 + 2*9 / (1 + 2) = 9 + self.assertDictEqual({ + metric_keys.MetricKeys.LOSS: 27., + metric_keys.MetricKeys.LOSS_MEAN: 9., + ops.GraphKeys.GLOBAL_STEP: 100 + }, eval_metrics) + + def test_evaluation_for_multi_dimensions(self): + label_dim = 2 + with ops.Graph().as_default(): + variables.Variable([46.0, 58.0], name=BIAS_NAME) + variables.Variable(100, name='global_step', dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + baseline_regressor = _baseline_regressor_fn( + label_dimension=label_dim, + model_dir=self._model_dir) + input_fn = numpy_io.numpy_input_fn( + x={ + 'age': np.array([[2., 4., 5.]]), + }, + y=np.array([[46., 58.]]), + batch_size=1, + num_epochs=None, + shuffle=False) + eval_metrics = baseline_regressor.evaluate(input_fn=input_fn, steps=1) + + self.assertItemsEqual( + (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN, + ops.GraphKeys.GLOBAL_STEP), eval_metrics.keys()) + + # Logit is bias which is [46, 58] + self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS]) + + +class BaselineRegressorPredictTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + writer_cache.FileWriterCache.clear() + shutil.rmtree(self._model_dir) + + def test_1d(self): + """Tests predict when all variables are one-dimensional.""" + with ops.Graph().as_default(): + variables.Variable([.2], name=BIAS_NAME) + variables.Variable(100, name='global_step', dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir) + + predict_input_fn = numpy_io.numpy_input_fn( + x={'x': np.array([[2.]])}, + y=None, + batch_size=1, + num_epochs=1, + shuffle=False) + predictions = baseline_regressor.predict(input_fn=predict_input_fn) + predicted_scores = list([x['predictions'] for x in predictions]) + # x * weight + bias = 2. * 10. + .2 = 20.2 + self.assertAllClose([[.2]], predicted_scores) + + def testMultiDim(self): + """Tests predict when all variables are multi-dimenstional.""" + batch_size = 2 + label_dimension = 3 + with ops.Graph().as_default(): + variables.Variable( # shape=[label_dimension] + [.2, .4, .6], name=BIAS_NAME) + variables.Variable(100, name='global_step', dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + baseline_regressor = _baseline_regressor_fn( + label_dimension=label_dimension, + model_dir=self._model_dir) + + predict_input_fn = numpy_io.numpy_input_fn( + # x shape=[batch_size, x_dim] + x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])}, + y=None, + batch_size=batch_size, + num_epochs=1, + shuffle=False) + predictions = baseline_regressor.predict(input_fn=predict_input_fn) + predicted_scores = list([x['predictions'] for x in predictions]) + # score = bias, shape=[batch_size, label_dimension] + self.assertAllClose([[0.2, 0.4, 0.6], [0.2, 0.4, 0.6]], + predicted_scores) + + +class BaselineRegressorIntegrationTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + writer_cache.FileWriterCache.clear() + shutil.rmtree(self._model_dir) + + def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, + input_dimension, label_dimension, prediction_length): + feature_columns = [ + feature_column_lib.numeric_column('x', shape=(input_dimension,)) + ] + est = _baseline_regressor_fn( + label_dimension=label_dimension, + model_dir=self._model_dir) + + # TRAIN + # learn y = x + est.train(train_input_fn, steps=200) + + # EVALUTE + scores = est.evaluate(eval_input_fn) + self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP]) + self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) + + # PREDICT + predictions = np.array( + [x['predictions'] for x in est.predict(predict_input_fn)]) + self.assertAllEqual((prediction_length, label_dimension), predictions.shape) + + # EXPORT + feature_spec = feature_column_lib.make_parse_example_spec(feature_columns) + serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( + feature_spec) + export_dir = est.export_savedmodel(tempfile.mkdtemp(), + serving_input_receiver_fn) + self.assertTrue(gfile.Exists(export_dir)) + + def test_numpy_input_fn(self): + """Tests complete flow with numpy_input_fn.""" + label_dimension = 2 + input_dimension = label_dimension + batch_size = 10 + prediction_length = batch_size + data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) + data = data.reshape(batch_size, label_dimension) + + train_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, + y=data, + batch_size=batch_size, + num_epochs=None, + shuffle=True) + eval_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, + y=data, + batch_size=batch_size, + num_epochs=1, + shuffle=False) + predict_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, + y=None, + batch_size=batch_size, + num_epochs=1, + shuffle=False) + + self._test_complete_flow( + train_input_fn=train_input_fn, + eval_input_fn=eval_input_fn, + predict_input_fn=predict_input_fn, + input_dimension=input_dimension, + label_dimension=label_dimension, + prediction_length=prediction_length) + + def test_pandas_input_fn(self): + """Tests complete flow with pandas_input_fn.""" + if not HAS_PANDAS: + return + + # Pandas DataFrame natually supports 1 dim data only. + label_dimension = 1 + input_dimension = label_dimension + batch_size = 10 + data = np.array([1., 2., 3., 4.], dtype=np.float32) + x = pd.DataFrame({'x': data}) + y = pd.Series(data) + prediction_length = 4 + + train_input_fn = pandas_io.pandas_input_fn( + x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) + eval_input_fn = pandas_io.pandas_input_fn( + x=x, y=y, batch_size=batch_size, shuffle=False) + predict_input_fn = pandas_io.pandas_input_fn( + x=x, batch_size=batch_size, shuffle=False) + + self._test_complete_flow( + train_input_fn=train_input_fn, + eval_input_fn=eval_input_fn, + predict_input_fn=predict_input_fn, + input_dimension=input_dimension, + label_dimension=label_dimension, + prediction_length=prediction_length) + + def test_input_fn_from_parse_example(self): + """Tests complete flow with input_fn constructed from parse_example.""" + label_dimension = 2 + input_dimension = label_dimension + batch_size = 10 + prediction_length = batch_size + data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) + data = data.reshape(batch_size, label_dimension) + + serialized_examples = [] + for datum in data: + example = example_pb2.Example(features=feature_pb2.Features( + feature={ + 'x': + feature_pb2.Feature(float_list=feature_pb2.FloatList( + value=datum)), + 'y': + feature_pb2.Feature(float_list=feature_pb2.FloatList( + value=datum[:label_dimension])), + })) + serialized_examples.append(example.SerializeToString()) + + feature_spec = { + 'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32), + 'y': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32), + } + + def _train_input_fn(): + feature_map = parsing_ops.parse_example(serialized_examples, feature_spec) + features = queue_parsed_features(feature_map) + labels = features.pop('y') + return features, labels + + def _eval_input_fn(): + feature_map = parsing_ops.parse_example( + input_lib.limit_epochs(serialized_examples, num_epochs=1), + feature_spec) + features = queue_parsed_features(feature_map) + labels = features.pop('y') + return features, labels + + def _predict_input_fn(): + feature_map = parsing_ops.parse_example( + input_lib.limit_epochs(serialized_examples, num_epochs=1), + feature_spec) + features = queue_parsed_features(feature_map) + features.pop('y') + return features, None + + self._test_complete_flow( + train_input_fn=_train_input_fn, + eval_input_fn=_eval_input_fn, + predict_input_fn=_predict_input_fn, + input_dimension=input_dimension, + label_dimension=label_dimension, + prediction_length=prediction_length) + + +class BaselineRegressorTrainingTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + writer_cache.FileWriterCache.clear() + shutil.rmtree(self._model_dir) + + def _mock_optimizer(self, expected_loss=None): + expected_var_names = [ + '%s:0' % BIAS_NAME + ] + + def _minimize(loss, global_step=None, var_list=None): + trainable_vars = var_list or ops.get_collection( + ops.GraphKeys.TRAINABLE_VARIABLES) + self.assertItemsEqual(expected_var_names, + [var.name for var in trainable_vars]) + + # Verify loss. We can't check the value directly, so we add an assert op. + self.assertEquals(0, loss.shape.ndims) + if expected_loss is None: + if global_step is not None: + return state_ops.assign_add(global_step, 1).op + return control_flow_ops.no_op() + assert_loss = assert_close( + math_ops.to_float(expected_loss, name='expected'), + loss, + name='assert_loss') + with ops.control_dependencies((assert_loss,)): + if global_step is not None: + return state_ops.assign_add(global_step, 1).op + return control_flow_ops.no_op() + + mock_optimizer = test.mock.NonCallableMock( + spec=optimizer.Optimizer, + wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer')) + mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize) + + # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks. + # So, return mock_optimizer itself for deepcopy. + mock_optimizer.__deepcopy__ = lambda _: mock_optimizer + return mock_optimizer + + def _assert_checkpoint(self, + label_dimension, + expected_global_step, + expected_bias=None): + shapes = { + name: shape + for (name, shape) in checkpoint_utils.list_variables(self._model_dir) + } + + self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP]) + self.assertEqual(expected_global_step, + checkpoint_utils.load_variable(self._model_dir, + ops.GraphKeys.GLOBAL_STEP)) + + self.assertEqual([label_dimension], shapes[BIAS_NAME]) + if expected_bias is not None: + self.assertEqual(expected_bias, + checkpoint_utils.load_variable(self._model_dir, + BIAS_NAME)) + + def testFromScratchWithDefaultOptimizer(self): + # Create BaselineRegressor. + label = 5. + age = 17 + baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir) + + # Train for a few steps, and validate final checkpoint. + num_steps = 10 + baseline_regressor.train( + input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) + self._assert_checkpoint(label_dimension=1, expected_global_step=num_steps) + + def testTrainWithOneDimLabel(self): + label_dimension = 1 + batch_size = 20 + est = _baseline_regressor_fn( + label_dimension=label_dimension, + model_dir=self._model_dir) + data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32) + self.assertEqual((batch_size,), data_rank_1.shape) + + train_input_fn = numpy_io.numpy_input_fn( + x={'age': data_rank_1}, + y=data_rank_1, + batch_size=batch_size, + num_epochs=None, + shuffle=True) + est.train(train_input_fn, steps=200) + self._assert_checkpoint(label_dimension=1, expected_global_step=200) + + def testTrainWithOneDimWeight(self): + label_dimension = 1 + batch_size = 20 + est = _baseline_regressor_fn( + label_dimension=label_dimension, + weight_column='w', + model_dir=self._model_dir) + + data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32) + self.assertEqual((batch_size,), data_rank_1.shape) + + train_input_fn = numpy_io.numpy_input_fn( + x={'age': data_rank_1, + 'w': data_rank_1}, + y=data_rank_1, + batch_size=batch_size, + num_epochs=None, + shuffle=True) + est.train(train_input_fn, steps=200) + self._assert_checkpoint(label_dimension=1, expected_global_step=200) + + def testFromScratch(self): + # Create BaselineRegressor. + label = 5. + age = 17 + # loss = (logits - label)^2 = (0 - 5.)^2 = 25. + mock_optimizer = self._mock_optimizer(expected_loss=25.) + baseline_regressor = _baseline_regressor_fn( + model_dir=self._model_dir, + optimizer=mock_optimizer) + self.assertEqual(0, mock_optimizer.minimize.call_count) + + # Train for a few steps, and validate optimizer and final checkpoint. + num_steps = 10 + baseline_regressor.train( + input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) + self.assertEqual(1, mock_optimizer.minimize.call_count) + self._assert_checkpoint( + label_dimension=1, + expected_global_step=num_steps, + expected_bias=[0.]) + + def testFromCheckpoint(self): + # Create initial checkpoint. + bias = 7.0 + initial_global_step = 100 + with ops.Graph().as_default(): + variables.Variable([bias], name=BIAS_NAME) + variables.Variable( + initial_global_step, + name=ops.GraphKeys.GLOBAL_STEP, + dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + # logits = bias = 6. + # loss = (logits - label)^2 = (7 - 5)^2 = 4 + mock_optimizer = self._mock_optimizer(expected_loss=4.) + baseline_regressor = _baseline_regressor_fn( + model_dir=self._model_dir, + optimizer=mock_optimizer) + self.assertEqual(0, mock_optimizer.minimize.call_count) + + # Train for a few steps, and validate optimizer and final checkpoint. + num_steps = 10 + baseline_regressor.train( + input_fn=lambda: ({'age': ((17,),)}, ((5.,),)), steps=num_steps) + self.assertEqual(1, mock_optimizer.minimize.call_count) + self._assert_checkpoint( + label_dimension=1, + expected_global_step=initial_global_step + num_steps, + expected_bias=[bias]) + + def testFromCheckpointMultiBatch(self): + # Create initial checkpoint. + bias = 5.0 + initial_global_step = 100 + with ops.Graph().as_default(): + variables.Variable([bias], name=BIAS_NAME) + variables.Variable( + initial_global_step, + name=ops.GraphKeys.GLOBAL_STEP, + dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + # logits = bias + # logits[0] = 5. + # logits[1] = 5. + # loss = sum(logits - label)^2 = (5 - 5)^2 + (5 - 3)^2 = 4 + mock_optimizer = self._mock_optimizer(expected_loss=4.) + baseline_regressor = _baseline_regressor_fn( + model_dir=self._model_dir, + optimizer=mock_optimizer) + self.assertEqual(0, mock_optimizer.minimize.call_count) + + # Train for a few steps, and validate optimizer and final checkpoint. + num_steps = 10 + baseline_regressor.train( + input_fn=lambda: ({'age': ((17,), (15,))}, ((5.,), (3.,))), + steps=num_steps) + self.assertEqual(1, mock_optimizer.minimize.call_count) + self._assert_checkpoint( + label_dimension=1, + expected_global_step=initial_global_step + num_steps, + expected_bias=bias) + + +# Tests for Baseline Classifier. + + +class BaselineClassifierTrainingTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + shutil.rmtree(self._model_dir) + + def _mock_optimizer(self, expected_loss=None): + expected_var_names = [ + '%s:0' % BIAS_NAME + ] + + def _minimize(loss, global_step): + trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + self.assertItemsEqual( + expected_var_names, + [var.name for var in trainable_vars]) + + # Verify loss. We can't check the value directly, so we add an assert op. + self.assertEquals(0, loss.shape.ndims) + if expected_loss is None: + return state_ops.assign_add(global_step, 1).op + assert_loss = assert_close( + math_ops.to_float(expected_loss, name='expected'), + loss, + name='assert_loss') + with ops.control_dependencies((assert_loss,)): + return state_ops.assign_add(global_step, 1).op + + mock_optimizer = test.mock.NonCallableMock( + spec=optimizer.Optimizer, + wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer')) + mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize) + + # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks. + # So, return mock_optimizer itself for deepcopy. + mock_optimizer.__deepcopy__ = lambda _: mock_optimizer + return mock_optimizer + + def _assert_checkpoint( + self, n_classes, expected_global_step, expected_bias=None): + logits_dimension = n_classes if n_classes > 2 else 1 + + shapes = { + name: shape for (name, shape) in + checkpoint_utils.list_variables(self._model_dir) + } + + self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP]) + self.assertEqual( + expected_global_step, + checkpoint_utils.load_variable( + self._model_dir, ops.GraphKeys.GLOBAL_STEP)) + + self.assertEqual([logits_dimension], shapes[BIAS_NAME]) + if expected_bias is not None: + self.assertAllEqual(expected_bias, + checkpoint_utils.load_variable( + self._model_dir, BIAS_NAME)) + + def _testFromScratchWithDefaultOptimizer(self, n_classes): + label = 0 + age = 17 + est = baseline.BaselineClassifier( + n_classes=n_classes, + model_dir=self._model_dir) + + # Train for a few steps, and validate final checkpoint. + num_steps = 10 + est.train( + input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) + self._assert_checkpoint(n_classes, num_steps) + + def testBinaryClassesFromScratchWithDefaultOptimizer(self): + self._testFromScratchWithDefaultOptimizer(n_classes=2) + + def testMultiClassesFromScratchWithDefaultOptimizer(self): + self._testFromScratchWithDefaultOptimizer(n_classes=4) + + def _testTrainWithTwoDimsLabel(self, n_classes): + batch_size = 20 + + est = baseline.BaselineClassifier( + n_classes=n_classes, + model_dir=self._model_dir) + data_rank_1 = np.array([0, 1]) + data_rank_2 = np.array([[0], [1]]) + self.assertEqual((2,), data_rank_1.shape) + self.assertEqual((2, 1), data_rank_2.shape) + + train_input_fn = numpy_io.numpy_input_fn( + x={'age': data_rank_1}, + y=data_rank_2, + batch_size=batch_size, + num_epochs=None, + shuffle=True) + est.train(train_input_fn, steps=200) + self._assert_checkpoint(n_classes, 200) + + def testBinaryClassesTrainWithTwoDimsLabel(self): + self._testTrainWithTwoDimsLabel(n_classes=2) + + def testMultiClassesTrainWithTwoDimsLabel(self): + self._testTrainWithTwoDimsLabel(n_classes=4) + + def _testTrainWithOneDimLabel(self, n_classes): + batch_size = 20 + + est = baseline.BaselineClassifier( + n_classes=n_classes, + model_dir=self._model_dir) + data_rank_1 = np.array([0, 1]) + self.assertEqual((2,), data_rank_1.shape) + + train_input_fn = numpy_io.numpy_input_fn( + x={'age': data_rank_1}, + y=data_rank_1, + batch_size=batch_size, + num_epochs=None, + shuffle=True) + est.train(train_input_fn, steps=200) + self._assert_checkpoint(n_classes, 200) + + def testBinaryClassesTrainWithOneDimLabel(self): + self._testTrainWithOneDimLabel(n_classes=2) + + def testMultiClassesTrainWithOneDimLabel(self): + self._testTrainWithOneDimLabel(n_classes=4) + + def _testTrainWithTwoDimsWeight(self, n_classes): + batch_size = 20 + + est = baseline.BaselineClassifier( + weight_column='w', + n_classes=n_classes, + model_dir=self._model_dir) + data_rank_1 = np.array([0, 1]) + data_rank_2 = np.array([[0], [1]]) + self.assertEqual((2,), data_rank_1.shape) + self.assertEqual((2, 1), data_rank_2.shape) + + train_input_fn = numpy_io.numpy_input_fn( + x={'age': data_rank_1, 'w': data_rank_2}, y=data_rank_1, + batch_size=batch_size, num_epochs=None, + shuffle=True) + est.train(train_input_fn, steps=200) + self._assert_checkpoint(n_classes, 200) + + def testBinaryClassesTrainWithTwoDimsWeight(self): + self._testTrainWithTwoDimsWeight(n_classes=2) + + def testMultiClassesTrainWithTwoDimsWeight(self): + self._testTrainWithTwoDimsWeight(n_classes=4) + + def _testTrainWithOneDimWeight(self, n_classes): + batch_size = 20 + + est = baseline.BaselineClassifier( + weight_column='w', + n_classes=n_classes, + model_dir=self._model_dir) + data_rank_1 = np.array([0, 1]) + self.assertEqual((2,), data_rank_1.shape) + + train_input_fn = numpy_io.numpy_input_fn( + x={'age': data_rank_1, 'w': data_rank_1}, y=data_rank_1, + batch_size=batch_size, num_epochs=None, + shuffle=True) + est.train(train_input_fn, steps=200) + self._assert_checkpoint(n_classes, 200) + + def testBinaryClassesTrainWithOneDimWeight(self): + self._testTrainWithOneDimWeight(n_classes=2) + + def testMultiClassesTrainWithOneDimWeight(self): + self._testTrainWithOneDimWeight(n_classes=4) + + def _testFromScratch(self, n_classes): + label = 1 + age = 17 + # For binary classifier: + # loss = sigmoid_cross_entropy(logits, label) where logits=0 (weights are + # all zero initially) and label = 1 so, + # loss = 1 * -log ( sigmoid(logits) ) = 0.69315 + # For multi class classifier: + # loss = cross_entropy(logits, label) where logits are all 0s (weights are + # all zero initially) and label = 1 so, + # loss = 1 * -log ( 1.0 / n_classes ) + # For this particular test case, as logits are same, the formula + # 1 * -log ( 1.0 / n_classes ) covers both binary and multi class cases. + mock_optimizer = self._mock_optimizer( + expected_loss=-1 * math.log(1.0/n_classes)) + + est = baseline.BaselineClassifier( + n_classes=n_classes, + optimizer=mock_optimizer, + model_dir=self._model_dir) + self.assertEqual(0, mock_optimizer.minimize.call_count) + + # Train for a few steps, and validate optimizer and final checkpoint. + num_steps = 10 + est.train( + input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) + self.assertEqual(1, mock_optimizer.minimize.call_count) + self._assert_checkpoint( + n_classes, + expected_global_step=num_steps, + expected_bias=[0.] if n_classes == 2 else [.0] * n_classes) + + def testBinaryClassesFromScratch(self): + self._testFromScratch(n_classes=2) + + def testMultiClassesFromScratch(self): + self._testFromScratch(n_classes=4) + + def _testFromCheckpoint(self, n_classes): + # Create initial checkpoint. + label = 1 + age = 17 + bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes + initial_global_step = 100 + with ops.Graph().as_default(): + variables.Variable(bias, name=BIAS_NAME) + variables.Variable( + initial_global_step, name=ops.GraphKeys.GLOBAL_STEP, + dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + # For binary classifier: + # logits = bias = -1. + # loss = sigmoid_cross_entropy(logits, label) + # so, loss = 1 * -log ( sigmoid(-1) ) = 1.3133 + # For multi class classifier: + # loss = cross_entropy(logits, label) + # where logits = bias and label = 1 + # so, loss = 1 * -log ( softmax(logits)[1] ) + if n_classes == 2: + expected_loss = 1.3133 + else: + logits = bias + logits_exp = np.exp(logits) + softmax = logits_exp / logits_exp.sum() + expected_loss = -1 * math.log(softmax[label]) + + mock_optimizer = self._mock_optimizer(expected_loss=expected_loss) + + est = baseline.BaselineClassifier( + n_classes=n_classes, + optimizer=mock_optimizer, + model_dir=self._model_dir) + self.assertEqual(0, mock_optimizer.minimize.call_count) + + # Train for a few steps, and validate optimizer and final checkpoint. + num_steps = 10 + est.train( + input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) + self.assertEqual(1, mock_optimizer.minimize.call_count) + self._assert_checkpoint( + n_classes, + expected_global_step=initial_global_step + num_steps, + expected_bias=bias) + + def testBinaryClassesFromCheckpoint(self): + self._testFromCheckpoint(n_classes=2) + + def testMultiClassesFromCheckpoint(self): + self._testFromCheckpoint(n_classes=4) + + def _testFromCheckpointFloatLabels(self, n_classes): + """Tests float labels for binary classification.""" + # Create initial checkpoint. + if n_classes > 2: + return + label = 0.8 + age = 17 + bias = [-1.0] + initial_global_step = 100 + with ops.Graph().as_default(): + variables.Variable(bias, name=BIAS_NAME) + variables.Variable( + initial_global_step, name=ops.GraphKeys.GLOBAL_STEP, + dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + # logits = bias = -1. + # loss = sigmoid_cross_entropy(logits, label) + # => loss = -0.8 * log(sigmoid(-1)) -0.2 * log(sigmoid(+1)) = 1.1132617 + mock_optimizer = self._mock_optimizer(expected_loss=1.1132617) + + est = baseline.BaselineClassifier( + n_classes=n_classes, + optimizer=mock_optimizer, + model_dir=self._model_dir) + self.assertEqual(0, mock_optimizer.minimize.call_count) + + # Train for a few steps, and validate optimizer and final checkpoint. + num_steps = 10 + est.train( + input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) + self.assertEqual(1, mock_optimizer.minimize.call_count) + + def testBinaryClassesFromCheckpointFloatLabels(self): + self._testFromCheckpointFloatLabels(n_classes=2) + + def testMultiClassesFromCheckpointFloatLabels(self): + self._testFromCheckpointFloatLabels(n_classes=4) + + def _testFromCheckpointMultiBatch(self, n_classes): + # Create initial checkpoint. + label = [1, 0] + age = [17, 18.5] + # For binary case, the expected weight has shape (1,1). For multi class + # case, the shape is (1, n_classes). In order to test the weights, set + # weights as 2.0 * range(n_classes). + bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes + initial_global_step = 100 + with ops.Graph().as_default(): + variables.Variable(bias, name=BIAS_NAME) + variables.Variable( + initial_global_step, name=ops.GraphKeys.GLOBAL_STEP, + dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + # For binary classifier: + # logits = bias + # logits[0] = -1. + # logits[1] = -1. + # loss = sigmoid_cross_entropy(logits, label) + # so, loss[0] = 1 * -log ( sigmoid(-1) ) = 1.3133 + # loss[1] = (1 - 0) * -log ( 1- sigmoid(-1) ) = 0.3132 + # For multi class classifier: + # loss = cross_entropy(logits, label) + # where logits = bias and label = [1, 0] + # so, loss = 1 * -log ( softmax(logits)[label] ) + if n_classes == 2: + expected_loss = (1.3133 + 0.3132) + else: + # Expand logits since batch_size=2 + logits = bias * np.ones(shape=(2, 1)) + logits_exp = np.exp(logits) + softmax_row_0 = logits_exp[0] / logits_exp[0].sum() + softmax_row_1 = logits_exp[1] / logits_exp[1].sum() + expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) + expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) + expected_loss = expected_loss_0 + expected_loss_1 + + mock_optimizer = self._mock_optimizer(expected_loss=expected_loss) + + est = baseline.BaselineClassifier( + n_classes=n_classes, + optimizer=mock_optimizer, + model_dir=self._model_dir) + self.assertEqual(0, mock_optimizer.minimize.call_count) + + # Train for a few steps, and validate optimizer and final checkpoint. + num_steps = 10 + est.train( + input_fn=lambda: ({'age': (age)}, (label)), + steps=num_steps) + self.assertEqual(1, mock_optimizer.minimize.call_count) + self._assert_checkpoint( + n_classes, + expected_global_step=initial_global_step + num_steps, + expected_bias=bias) + + def testBinaryClassesFromCheckpointMultiBatch(self): + self._testFromCheckpointMultiBatch(n_classes=2) + + def testMultiClassesFromCheckpointMultiBatch(self): + self._testFromCheckpointMultiBatch(n_classes=4) + + +class BaselineClassifierEvaluationTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + shutil.rmtree(self._model_dir) + + def _test_evaluation_for_simple_data(self, n_classes): + label = 1 + age = 1. + + bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes + + with ops.Graph().as_default(): + variables.Variable(bias, name=BIAS_NAME) + variables.Variable( + 100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + est = _baseline_classifier_fn( + n_classes=n_classes, + model_dir=self._model_dir) + eval_metrics = est.evaluate( + input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=1) + + if n_classes == 2: + # Binary classes: loss = -log(sigmoid(-1)) = 1.3133 + # Prediction = sigmoid(-1) = 0.2689 + expected_metrics = { + metric_keys.MetricKeys.LOSS: 1.3133, + ops.GraphKeys.GLOBAL_STEP: 100, + metric_keys.MetricKeys.LOSS_MEAN: 1.3133, + metric_keys.MetricKeys.ACCURACY: 0., + metric_keys.MetricKeys.PREDICTION_MEAN: 0.2689, + metric_keys.MetricKeys.LABEL_MEAN: 1., + metric_keys.MetricKeys.ACCURACY_BASELINE: 1, + metric_keys.MetricKeys.AUC: 0., + metric_keys.MetricKeys.AUC_PR: 1., + } + else: + # Multi classes: loss = 1 * -log ( softmax(logits)[label] ) + logits = bias + logits_exp = np.exp(logits) + softmax = logits_exp / logits_exp.sum() + expected_loss = -1 * math.log(softmax[label]) + + expected_metrics = { + metric_keys.MetricKeys.LOSS: expected_loss, + ops.GraphKeys.GLOBAL_STEP: 100, + metric_keys.MetricKeys.LOSS_MEAN: expected_loss, + metric_keys.MetricKeys.ACCURACY: 0., + } + + self.assertAllClose(sorted_key_dict(expected_metrics), + sorted_key_dict(eval_metrics), rtol=1e-3) + + def test_binary_classes_evaluation_for_simple_data(self): + self._test_evaluation_for_simple_data(n_classes=2) + + def test_multi_classes_evaluation_for_simple_data(self): + self._test_evaluation_for_simple_data(n_classes=4) + + def _test_evaluation_batch(self, n_classes): + """Tests evaluation for batch_size==2.""" + label = [1, 0] + age = [17., 18.] + bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes + initial_global_step = 100 + with ops.Graph().as_default(): + variables.Variable(bias, name=BIAS_NAME) + variables.Variable( + initial_global_step, name=ops.GraphKeys.GLOBAL_STEP, + dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + est = _baseline_classifier_fn( + n_classes=n_classes, + model_dir=self._model_dir) + eval_metrics = est.evaluate( + input_fn=lambda: ({'age': (age)}, (label)), steps=1) + + if n_classes == 2: + # Logits are (-1., -1.) labels are (1, 0). + # Loss is + # loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133 + # loss for row 2: (1 - 0) * -log(1 - sigmoid(-1)) = 0.3132 + # Prediction = sigmoid(-1) = 0.2689 + expected_loss = 1.3133 + 0.3132 + + expected_metrics = { + metric_keys.MetricKeys.LOSS: expected_loss, + ops.GraphKeys.GLOBAL_STEP: 100, + metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2, + metric_keys.MetricKeys.ACCURACY: 0.5, + metric_keys.MetricKeys.PREDICTION_MEAN: 0.2689, + metric_keys.MetricKeys.LABEL_MEAN: 0.5, + metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5, + metric_keys.MetricKeys.AUC: 0.5, + metric_keys.MetricKeys.AUC_PR: 0.75, + } + else: + # Expand logits since batch_size=2 + logits = bias * np.ones(shape=(2, 1)) + logits_exp = np.exp(logits) + softmax_row_0 = logits_exp[0] / logits_exp[0].sum() + softmax_row_1 = logits_exp[1] / logits_exp[1].sum() + expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) + expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) + expected_loss = expected_loss_0 + expected_loss_1 + + expected_metrics = { + metric_keys.MetricKeys.LOSS: expected_loss, + ops.GraphKeys.GLOBAL_STEP: 100, + metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2, + metric_keys.MetricKeys.ACCURACY: 0.5, + } + + self.assertAllClose(sorted_key_dict(expected_metrics), + sorted_key_dict(eval_metrics), rtol=1e-3) + + def test_binary_classes_evaluation_batch(self): + self._test_evaluation_batch(n_classes=2) + + def test_multi_classes_evaluation_batch(self): + self._test_evaluation_batch(n_classes=4) + + def _test_evaluation_weights(self, n_classes): + """Tests evaluation with weights.""" + + label = [1, 0] + age = [17., 18.] + weights = [1., 2.] + # For binary case, the expected weight has shape (1,1). For multi class + # case, the shape is (1, n_classes). In order to test the weights, set + # weights as 2.0 * range(n_classes). + bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes + initial_global_step = 100 + with ops.Graph().as_default(): + variables.Variable(bias, name=BIAS_NAME) + variables.Variable( + initial_global_step, name=ops.GraphKeys.GLOBAL_STEP, + dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + est = _baseline_classifier_fn( + n_classes=n_classes, + weight_column='w', + model_dir=self._model_dir) + eval_metrics = est.evaluate( + input_fn=lambda: ({'age': (age), 'w': (weights)}, (label)), steps=1) + + if n_classes == 2: + # Logits are (-1., -1.) labels are (1, 0). + # Loss is + # loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133 + # loss for row 2: (1 - 0) * -log(1 - sigmoid(-1)) = 0.3132 + # weights = [1., 2.] + expected_loss = 1.3133 * 1. + 0.3132 * 2. + loss_mean = expected_loss / (1.0 + 2.0) + label_mean = np.average(label, weights=weights) + logits = [-1, -1] + logistics = sigmoid(np.array(logits)) + predictions_mean = np.average(logistics, weights=weights) + + expected_metrics = { + metric_keys.MetricKeys.LOSS: expected_loss, + ops.GraphKeys.GLOBAL_STEP: 100, + metric_keys.MetricKeys.LOSS_MEAN: loss_mean, + metric_keys.MetricKeys.ACCURACY: 2. / (1. + 2.), + metric_keys.MetricKeys.PREDICTION_MEAN: predictions_mean, + metric_keys.MetricKeys.LABEL_MEAN: label_mean, + metric_keys.MetricKeys.ACCURACY_BASELINE: ( + max(label_mean, 1-label_mean)), + metric_keys.MetricKeys.AUC: 0.5, + metric_keys.MetricKeys.AUC_PR: 2. / (1. + 2.), + } + else: + # Multi classes: unweighted_loss = 1 * -log ( soft_max(logits)[label] ) + # Expand logits since batch_size=2 + logits = bias * np.ones(shape=(2, 1)) + logits_exp = np.exp(logits) + softmax_row_0 = logits_exp[0] / logits_exp[0].sum() + softmax_row_1 = logits_exp[1] / logits_exp[1].sum() + expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) + expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) + loss_mean = np.average([expected_loss_0, expected_loss_1], + weights=weights) + expected_loss = loss_mean * np.sum(weights) + + expected_metrics = { + metric_keys.MetricKeys.LOSS: expected_loss, + ops.GraphKeys.GLOBAL_STEP: 100, + metric_keys.MetricKeys.LOSS_MEAN: loss_mean, + metric_keys.MetricKeys.ACCURACY: 2. / (1. + 2.), + } + + self.assertAllClose(sorted_key_dict(expected_metrics), + sorted_key_dict(eval_metrics), rtol=1e-3) + + def test_binary_classes_evaluation_weights(self): + self._test_evaluation_weights(n_classes=2) + + def test_multi_classes_evaluation_weights(self): + self._test_evaluation_weights(n_classes=4) + + +class BaselineClassifierPredictTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + shutil.rmtree(self._model_dir) + + def _testPredictions(self, n_classes, label_vocabulary, label_output_fn): + """Tests predict when all variables are one-dimensional.""" + age = 1. + + bias = [10.0] if n_classes == 2 else [10.0] * n_classes + + with ops.Graph().as_default(): + variables.Variable(bias, name=BIAS_NAME) + variables.Variable(100, name='global_step', dtype=dtypes.int64) + save_variables_to_ckpt(self._model_dir) + + est = _baseline_classifier_fn( + label_vocabulary=label_vocabulary, + n_classes=n_classes, + model_dir=self._model_dir) + + predict_input_fn = numpy_io.numpy_input_fn( + x={'age': np.array([[age]])}, + y=None, + batch_size=1, + num_epochs=1, + shuffle=False) + predictions = list(est.predict(input_fn=predict_input_fn)) + + if n_classes == 2: + scalar_logits = bias[0] + two_classes_logits = [0, scalar_logits] + two_classes_logits_exp = np.exp(two_classes_logits) + softmax = two_classes_logits_exp / two_classes_logits_exp.sum() + + expected_predictions = { + 'class_ids': [1], + 'classes': [label_output_fn(1)], + 'logistic': [sigmoid(np.array(scalar_logits))], + 'logits': [scalar_logits], + 'probabilities': softmax, + } + else: + onedim_logits = np.array(bias) + class_ids = onedim_logits.argmax() + logits_exp = np.exp(onedim_logits) + softmax = logits_exp / logits_exp.sum() + expected_predictions = { + 'class_ids': [class_ids], + 'classes': [label_output_fn(class_ids)], + 'logits': onedim_logits, + 'probabilities': softmax, + } + + self.assertEqual(1, len(predictions)) + # assertAllClose cannot handle byte type. + self.assertEqual(expected_predictions['classes'], predictions[0]['classes']) + expected_predictions.pop('classes') + predictions[0].pop('classes') + self.assertAllClose(sorted_key_dict(expected_predictions), + sorted_key_dict(predictions[0])) + + def testBinaryClassesWithoutLabelVocabulary(self): + n_classes = 2 + self._testPredictions(n_classes, + label_vocabulary=None, + label_output_fn=lambda x: ('%s' % x).encode()) + + def testBinaryClassesWithLabelVocabulary(self): + n_classes = 2 + self._testPredictions( + n_classes, + label_vocabulary=['class_vocab_{}'.format(i) + for i in range(n_classes)], + label_output_fn=lambda x: ('class_vocab_%s' % x).encode()) + + def testMultiClassesWithoutLabelVocabulary(self): + n_classes = 4 + self._testPredictions( + n_classes, + label_vocabulary=None, + label_output_fn=lambda x: ('%s' % x).encode()) + + def testMultiClassesWithLabelVocabulary(self): + n_classes = 4 + self._testPredictions( + n_classes, + label_vocabulary=['class_vocab_{}'.format(i) + for i in range(n_classes)], + label_output_fn=lambda x: ('class_vocab_%s' % x).encode()) + + +class BaselineClassifierIntegrationTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + shutil.rmtree(self._model_dir) + + def _test_complete_flow(self, n_classes, train_input_fn, eval_input_fn, + predict_input_fn, input_dimension, prediction_length): + feature_columns = [ + feature_column_lib.numeric_column('x', shape=(input_dimension,)) + ] + est = _baseline_classifier_fn( + n_classes=n_classes, + model_dir=self._model_dir) + + # TRAIN + # learn y = x + est.train(train_input_fn, steps=200) + + # EVALUTE + scores = est.evaluate(eval_input_fn) + self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP]) + self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) + + # PREDICT + predictions = np.array( + [x['classes'] for x in est.predict(predict_input_fn)]) + self.assertAllEqual((prediction_length, 1), predictions.shape) + + # EXPORT + feature_spec = feature_column_lib.make_parse_example_spec(feature_columns) + serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( + feature_spec) + export_dir = est.export_savedmodel(tempfile.mkdtemp(), + serving_input_receiver_fn) + self.assertTrue(gfile.Exists(export_dir)) + + def _test_numpy_input_fn(self, n_classes): + """Tests complete flow with numpy_input_fn.""" + input_dimension = 4 + batch_size = 10 + prediction_length = batch_size + data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32) + data = data.reshape(batch_size, input_dimension) + target = np.array([1] * batch_size) + + train_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, + y=target, + batch_size=batch_size, + num_epochs=None, + shuffle=True) + eval_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, + y=target, + batch_size=batch_size, + num_epochs=1, + shuffle=False) + predict_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, + y=None, + batch_size=batch_size, + num_epochs=1, + shuffle=False) + + self._test_complete_flow( + n_classes=n_classes, + train_input_fn=train_input_fn, + eval_input_fn=eval_input_fn, + predict_input_fn=predict_input_fn, + input_dimension=input_dimension, + prediction_length=prediction_length) + + def test_binary_classes_numpy_input_fn(self): + self._test_numpy_input_fn(n_classes=2) + + def test_multi_classes_numpy_input_fn(self): + self._test_numpy_input_fn(n_classes=4) + + def _test_pandas_input_fn(self, n_classes): + """Tests complete flow with pandas_input_fn.""" + if not HAS_PANDAS: + return + + # Pandas DataFrame natually supports 1 dim data only. + input_dimension = 1 + batch_size = 10 + data = np.array([1., 2., 3., 4.], dtype=np.float32) + target = np.array([1, 0, 1, 0], dtype=np.int32) + x = pd.DataFrame({'x': data}) + y = pd.Series(target) + prediction_length = 4 + + train_input_fn = pandas_io.pandas_input_fn( + x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) + eval_input_fn = pandas_io.pandas_input_fn( + x=x, y=y, batch_size=batch_size, shuffle=False) + predict_input_fn = pandas_io.pandas_input_fn( + x=x, batch_size=batch_size, shuffle=False) + + self._test_complete_flow( + n_classes=n_classes, + train_input_fn=train_input_fn, + eval_input_fn=eval_input_fn, + predict_input_fn=predict_input_fn, + input_dimension=input_dimension, + prediction_length=prediction_length) + + def test_binary_classes_pandas_input_fn(self): + self._test_pandas_input_fn(n_classes=2) + + def test_multi_classes_pandas_input_fn(self): + self._test_pandas_input_fn(n_classes=4) + + def _test_input_fn_from_parse_example(self, n_classes): + """Tests complete flow with input_fn constructed from parse_example.""" + input_dimension = 2 + batch_size = 10 + prediction_length = batch_size + data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32) + data = data.reshape(batch_size, input_dimension) + target = np.array([1] * batch_size, dtype=np.int64) + + serialized_examples = [] + for x, y in zip(data, target): + example = example_pb2.Example(features=feature_pb2.Features( + feature={ + 'x': + feature_pb2.Feature(float_list=feature_pb2.FloatList( + value=x)), + 'y': + feature_pb2.Feature(int64_list=feature_pb2.Int64List( + value=[y])), + })) + serialized_examples.append(example.SerializeToString()) + + feature_spec = { + 'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32), + 'y': parsing_ops.FixedLenFeature([1], dtypes.int64), + } + + def _train_input_fn(): + feature_map = parsing_ops.parse_example(serialized_examples, feature_spec) + features = queue_parsed_features(feature_map) + labels = features.pop('y') + return features, labels + + def _eval_input_fn(): + feature_map = parsing_ops.parse_example( + input_lib.limit_epochs(serialized_examples, num_epochs=1), + feature_spec) + features = queue_parsed_features(feature_map) + labels = features.pop('y') + return features, labels + + def _predict_input_fn(): + feature_map = parsing_ops.parse_example( + input_lib.limit_epochs(serialized_examples, num_epochs=1), + feature_spec) + features = queue_parsed_features(feature_map) + features.pop('y') + return features, None + + self._test_complete_flow( + n_classes=n_classes, + train_input_fn=_train_input_fn, + eval_input_fn=_eval_input_fn, + predict_input_fn=_predict_input_fn, + input_dimension=input_dimension, + prediction_length=prediction_length) + + def test_binary_classes_input_fn_from_parse_example(self): + self._test_input_fn_from_parse_example(n_classes=2) + + def test_multi_classes_input_fn_from_parse_example(self): + self._test_input_fn_from_parse_example(n_classes=4) + + +# Tests for Baseline logit_fn. + + +class BaselineLogitFnTest(test.TestCase): + + def test_basic_logit_correctness(self): + """baseline_logit_fn simply returns the bias variable.""" + with ops.Graph().as_default(): + logit_fn = baseline._baseline_logit_fn_builder(num_outputs=2) + logits = logit_fn(features={'age': [[23.], [31.]]}) + with variable_scope.variable_scope('baseline', reuse=True): + bias_var = variable_scope.get_variable('bias') + with tf_session.Session() as sess: + sess.run([variables.global_variables_initializer()]) + self.assertAllClose([[0., 0.], [0., 0.]], logits.eval()) + sess.run(bias_var.assign([10., 5.])) + self.assertAllClose([[10., 5.], [10., 5.]], logits.eval()) + + +if __name__ == '__main__': + test.main() + diff --git a/tensorflow/python/estimator/estimator_lib.py b/tensorflow/python/estimator/estimator_lib.py index 5b82fd75ff..bed2b67419 100644 --- a/tensorflow/python/estimator/estimator_lib.py +++ b/tensorflow/python/estimator/estimator_lib.py @@ -19,6 +19,8 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import,line-too-long,wildcard-import +from tensorflow.python.estimator.canned.baseline import BaselineClassifier +from tensorflow.python.estimator.canned.baseline import BaselineRegressor from tensorflow.python.estimator.canned.dnn import DNNClassifier from tensorflow.python.estimator.canned.dnn import DNNRegressor from tensorflow.python.estimator.canned.dnn_linear_combined import DNNLinearCombinedClassifier @@ -46,6 +48,8 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ # Canned Estimators + 'BaselineClassifier', + 'BaselineRegressor', 'DNNClassifier', 'DNNRegressor', 'DNNLinearCombinedClassifier', diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-classifier.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-classifier.pbtxt new file mode 100644 index 0000000000..f5ed263f0e --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-classifier.pbtxt @@ -0,0 +1,54 @@ +path: "tensorflow.estimator.BaselineClassifier" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "config" + mtype: "" + } + member { + name: "model_dir" + mtype: "" + } + member { + name: "model_fn" + mtype: "" + } + member { + name: "params" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'model_dir\', \'n_classes\', \'weight_column\', \'label_vocabulary\', \'optimizer\', \'config\'], varargs=None, keywords=None, defaults=[\'None\', \'2\', \'None\', \'None\', \'Ftrl\', \'None\'], " + } + member_method { + name: "evaluate" + argspec: "args=[\'self\', \'input_fn\', \'steps\', \'hooks\', \'checkpoint_path\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " + } + member_method { + name: "export_savedmodel" + argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], " + } + member_method { + name: "get_variable_names" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_variable_value" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "latest_checkpoint" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "predict" + argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "train" + argspec: "args=[\'self\', \'input_fn\', \'hooks\', \'steps\', \'max_steps\', \'saving_listeners\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-regressor.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-regressor.pbtxt new file mode 100644 index 0000000000..61a29942c5 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-regressor.pbtxt @@ -0,0 +1,54 @@ +path: "tensorflow.estimator.BaselineRegressor" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "config" + mtype: "" + } + member { + name: "model_dir" + mtype: "" + } + member { + name: "model_fn" + mtype: "" + } + member { + name: "params" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'model_dir\', \'label_dimension\', \'weight_column\', \'optimizer\', \'config\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'None\', \'Ftrl\', \'None\'], " + } + member_method { + name: "evaluate" + argspec: "args=[\'self\', \'input_fn\', \'steps\', \'hooks\', \'checkpoint_path\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " + } + member_method { + name: "export_savedmodel" + argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], " + } + member_method { + name: "get_variable_names" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_variable_value" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "latest_checkpoint" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "predict" + argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "train" + argspec: "args=[\'self\', \'input_fn\', \'hooks\', \'steps\', \'max_steps\', \'saving_listeners\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt index ef93a61bd8..cdc367b99e 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt @@ -1,5 +1,13 @@ path: "tensorflow.estimator" tf_module { + member { + name: "BaselineClassifier" + mtype: "" + } + member { + name: "BaselineRegressor" + mtype: "" + } member { name: "DNNClassifier" mtype: "" -- GitLab From a2147fdc59bce39441e56be7e4e86faf89f2a306 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 16:36:22 -0800 Subject: [PATCH 0209/1801] Update ops-related pbtxt files. PiperOrigin-RevId: 175085154 --- .../core/ops/compat/ops_history.v1.pbtxt | 46 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 1 - 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index a4b5ca16af..60f67543f1 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -21670,6 +21670,52 @@ op { } is_stateful: true } +op { + name: "Print" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "data" + type_list_attr: "U" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + } + attr { + name: "U" + type: "list(type)" + has_minimum: true + } + attr { + name: "message" + type: "string" + default_value { + s: "" + } + } + attr { + name: "first_n" + type: "int" + default_value { + i: -1 + } + } + attr { + name: "summarize" + type: "int" + default_value { + i: 3 + } + } + is_stateful: true +} op { name: "PriorityQueue" output_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 8353b45e22..2a74c20707 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -16977,7 +16977,6 @@ op { name: "U" type: "list(type)" has_minimum: true - minimum: 1 } attr { name: "message" -- GitLab From 361c55899cb524ca078c65eabdd3d79bfc10c8f9 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Wed, 8 Nov 2017 16:44:37 -0800 Subject: [PATCH 0210/1801] Make assert_equal/_none_equal/_less ops work in eager mode Also, fix documentation of eager mode execute() method and make tf_should_use work with empty list returned by execute() RELNOTES: tf.assert_equal no longer raises ValueError. It now raises InvalidArgumentError, as documented. PiperOrigin-RevId: 175086223 --- tensorflow/python/eager/execute.py | 3 +- tensorflow/python/kernel_tests/BUILD | 1 + .../python/kernel_tests/check_ops_test.py | 311 +++++++++++------- tensorflow/python/ops/check_ops.py | 79 ++++- tensorflow/python/ops/control_flow_ops.py | 41 ++- tensorflow/python/util/tf_should_use.py | 2 +- 6 files changed, 316 insertions(+), 121 deletions(-) diff --git a/tensorflow/python/eager/execute.py b/tensorflow/python/eager/execute.py index 983c1ea73e..c6457232e9 100644 --- a/tensorflow/python/eager/execute.py +++ b/tensorflow/python/eager/execute.py @@ -47,8 +47,7 @@ def execute(op_name, num_outputs, inputs, attrs, ctx, name=None): name: Customized name for the operation. Returns: - None if there are no outputs, a single Tensor object if there is one output - and a list of Tensor objects if there are multiple outputs. + List of output Tensor objects. The list is empty if there are no outputs Raises: An exception on error. diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 7fa504e85e..8d6f863a4c 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -1186,6 +1186,7 @@ cuda_py_test( srcs = ["check_ops_test.py"], additional_deps = [ "//third_party/py/numpy", + "//tensorflow/python/eager:context", "//tensorflow/python:array_ops", "//tensorflow/python:check_ops", "//tensorflow/python:client_testlib", diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py index ed859e3774..43785adcee 100644 --- a/tensorflow/python/kernel_tests/check_ops_test.py +++ b/tensorflow/python/kernel_tests/check_ops_test.py @@ -20,10 +20,13 @@ from __future__ import print_function import numpy as np +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.platform import test @@ -71,110 +74,178 @@ class AssertProperIterableTest(test.TestCase): class AssertEqualTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_equal(self): - with self.test_session(): + small = constant_op.constant([1, 2], name="small") + with ops.control_dependencies([check_ops.assert_equal(small, small)]): + out = array_ops.identity(small) + self.evaluate(out) + + def test_returns_none_with_eager(self): + with context.eager_mode(): small = constant_op.constant([1, 2], name="small") - with ops.control_dependencies([check_ops.assert_equal(small, small)]): - out = array_ops.identity(small) - out.eval() + x = check_ops.assert_equal(small, small) + assert x is None + @test_util.run_in_graph_and_eager_modes() def test_raises_when_greater(self): - with self.test_session(): - # Static check - static_small = constant_op.constant([1, 2], name="small") - static_big = constant_op.constant([3, 4], name="big") - with self.assertRaisesRegexp(ValueError, "fail"): - check_ops.assert_equal(static_big, static_small, message="fail") - # Dynamic check - small = array_ops.placeholder(dtypes.int32, name="small") - big = array_ops.placeholder(dtypes.int32, name="big") - with ops.control_dependencies( - [check_ops.assert_equal( - big, small, message="fail")]): - out = array_ops.identity(small) - with self.assertRaisesOpError("fail.*big.*small"): - out.eval(feed_dict={small: [1, 2], big: [3, 4]}) - + # Static check + static_small = constant_op.constant([1, 2], name="small") + static_big = constant_op.constant([3, 4], name="big") + with self.assertRaisesRegexp(errors.InvalidArgumentError, "fail"): + check_ops.assert_equal(static_big, static_small, message="fail") + + # Dynamic check + if context.in_graph_mode(): + with self.test_session(): + small = array_ops.placeholder(dtypes.int32, name="small") + big = array_ops.placeholder(dtypes.int32, name="big") + with ops.control_dependencies( + [check_ops.assert_equal( + big, small, message="fail")]): + out = array_ops.identity(small) + with self.assertRaisesOpError("fail.*big.*small"): + out.eval(feed_dict={small: [1, 2], big: [3, 4]}) + + def test_error_message_eager(self): + expected_error_msg_full = r"""big does not equal small +Condition x == y did not hold. +Indices of first 6 different values: +\[\[0 0\] + \[1 1\] + \[2 0\]\] +Corresponding x values: +\[2 3 6\] +Corresponding y values: +\[20 30 60\] +First 6 elements of x: +\[2 2 3 3 6 6\] +First 6 elements of y: +\[20 2 3 30 60 6\] +""" + expected_error_msg_short = r"""big does not equal small +Condition x == y did not hold. +Indices of first 2 different values: +\[\[0 0\] + \[1 1\]\] +Corresponding x values: +\[2 3\] +Corresponding y values: +\[20 30\] +First 2 elements of x: +\[2 2\] +First 2 elements of y: +\[20 2\] +""" + with context.eager_mode(): + big = constant_op.constant([[2, 2], [3, 3], [6, 6]]) + small = constant_op.constant([[20, 2], [3, 30], [60, 6]]) + with self.assertRaisesRegexp(errors.InvalidArgumentError, + expected_error_msg_full): + check_ops.assert_equal(big, small, message="big does not equal small", + summarize=10) + with self.assertRaisesRegexp(errors.InvalidArgumentError, + expected_error_msg_short): + check_ops.assert_equal(big, small, message="big does not equal small", + summarize=2) + + @test_util.run_in_graph_and_eager_modes() def test_raises_when_less(self): - with self.test_session(): - # Static check - static_small = constant_op.constant([3, 1], name="small") - static_big = constant_op.constant([4, 2], name="big") - with self.assertRaisesRegexp(ValueError, "fail"): - check_ops.assert_equal(static_big, static_small, message="fail") - # Dynamic check - small = array_ops.placeholder(dtypes.int32, name="small") - big = array_ops.placeholder(dtypes.int32, name="big") - with ops.control_dependencies([check_ops.assert_equal(small, big)]): - out = array_ops.identity(small) - with self.assertRaisesOpError("small.*big"): - out.eval(feed_dict={small: [3, 1], big: [4, 2]}) + # Static check + static_small = constant_op.constant([3, 1], name="small") + static_big = constant_op.constant([4, 2], name="big") + with self.assertRaisesRegexp(errors.InvalidArgumentError, "fail"): + check_ops.assert_equal(static_big, static_small, message="fail") + + # Dynamic check + if context.in_graph_mode(): + with self.test_session(): + small = array_ops.placeholder(dtypes.int32, name="small") + big = array_ops.placeholder(dtypes.int32, name="big") + with ops.control_dependencies([check_ops.assert_equal(small, big)]): + out = array_ops.identity(small) + with self.assertRaisesOpError("small.*big"): + out.eval(feed_dict={small: [3, 1], big: [4, 2]}) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_equal_and_broadcastable_shapes(self): - with self.test_session(): - small = constant_op.constant([1, 2], name="small") - small_2 = constant_op.constant([1, 2], name="small_2") - with ops.control_dependencies([check_ops.assert_equal(small, small_2)]): - out = array_ops.identity(small) - out.eval() + small = constant_op.constant([[1, 2], [1, 2]], name="small") + small_2 = constant_op.constant([1, 2], name="small_2") + with ops.control_dependencies([check_ops.assert_equal(small, small_2)]): + out = array_ops.identity(small) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_raises_when_equal_but_non_broadcastable_shapes(self): - with self.test_session(): - small = constant_op.constant([1, 1, 1], name="small") - small_2 = constant_op.constant([1, 1], name="small_2") - with self.assertRaisesRegexp(ValueError, "must be"): - with ops.control_dependencies([check_ops.assert_equal(small, small_2)]): - out = array_ops.identity(small) - out.eval() + small = constant_op.constant([1, 1, 1], name="small") + small_2 = constant_op.constant([1, 1], name="small_2") + # The exception in eager and non-eager mode is different because + # eager mode relies on shape check done as part of the C++ op, while + # graph mode does shape checks when creating the `Operation` instance. + with self.assertRaisesRegexp( + (errors.InvalidArgumentError, ValueError), + (r"Incompatible shapes: \[3\] vs. \[2\]|" + r"Dimensions must be equal, but are 3 and 2")): + with ops.control_dependencies([check_ops.assert_equal(small, small_2)]): + out = array_ops.identity(small) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_both_empty(self): - with self.test_session(): - larry = constant_op.constant([]) - curly = constant_op.constant([]) - with ops.control_dependencies([check_ops.assert_equal(larry, curly)]): - out = array_ops.identity(larry) - out.eval() + larry = constant_op.constant([]) + curly = constant_op.constant([]) + with ops.control_dependencies([check_ops.assert_equal(larry, curly)]): + out = array_ops.identity(larry) + self.evaluate(out) class AssertNoneEqualTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_not_equal(self): - with self.test_session(): - small = constant_op.constant([1, 2], name="small") - big = constant_op.constant([10, 20], name="small") - with ops.control_dependencies( - [check_ops.assert_none_equal(big, small)]): - out = array_ops.identity(small) - out.eval() - + small = constant_op.constant([1, 2], name="small") + big = constant_op.constant([10, 20], name="small") + with ops.control_dependencies( + [check_ops.assert_none_equal(big, small)]): + out = array_ops.identity(small) + self.evaluate(out) + + @test_util.run_in_graph_and_eager_modes() def test_raises_when_equal(self): - with self.test_session(): - small = constant_op.constant([3, 1], name="small") + small = constant_op.constant([3, 1], name="small") + with self.assertRaisesOpError("x != y did not hold"): with ops.control_dependencies( [check_ops.assert_none_equal(small, small)]): out = array_ops.identity(small) - with self.assertRaisesOpError("x != y did not hold"): - out.eval() + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_not_equal_and_broadcastable_shapes(self): - with self.test_session(): - small = constant_op.constant([1, 2], name="small") - big = constant_op.constant([3], name="big") - with ops.control_dependencies( - [check_ops.assert_none_equal(small, big)]): - out = array_ops.identity(small) - out.eval() - + small = constant_op.constant([1, 2], name="small") + big = constant_op.constant([3], name="big") + with ops.control_dependencies( + [check_ops.assert_none_equal(small, big)]): + out = array_ops.identity(small) + self.evaluate(out) + + @test_util.run_in_graph_and_eager_modes() def test_raises_when_not_equal_but_non_broadcastable_shapes(self): with self.test_session(): small = constant_op.constant([1, 1, 1], name="small") big = constant_op.constant([10, 10], name="big") - with self.assertRaisesRegexp(ValueError, "must be"): + # The exception in eager and non-eager mode is different because + # eager mode relies on shape check done as part of the C++ op, while + # graph mode does shape checks when creating the `Operation` instance. + with self.assertRaisesRegexp( + (ValueError, errors.InvalidArgumentError), + (r"Incompatible shapes: \[3\] vs. \[2\]|" + r"Dimensions must be equal, but are 3 and 2")): with ops.control_dependencies( [check_ops.assert_none_equal(small, big)]): out = array_ops.identity(small) - out.eval() + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_both_empty(self): with self.test_session(): larry = constant_op.constant([]) @@ -182,62 +253,82 @@ class AssertNoneEqualTest(test.TestCase): with ops.control_dependencies( [check_ops.assert_none_equal(larry, curly)]): out = array_ops.identity(larry) - out.eval() + self.evaluate(out) + + def test_returns_none_with_eager(self): + with context.eager_mode(): + t1 = constant_op.constant([1, 2]) + t2 = constant_op.constant([3, 4]) + x = check_ops.assert_none_equal(t1, t2) + assert x is None class AssertLessTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_raises_when_equal(self): - with self.test_session(): - small = constant_op.constant([1, 2], name="small") + small = constant_op.constant([1, 2], name="small") + with self.assertRaisesOpError("failure message.*\n*.* x < y did not hold"): with ops.control_dependencies( [check_ops.assert_less( - small, small, message="fail")]): + small, small, message="failure message")]): out = array_ops.identity(small) - with self.assertRaisesOpError("fail.*small.*small"): - out.eval() + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_raises_when_greater(self): - with self.test_session(): - small = constant_op.constant([1, 2], name="small") - big = constant_op.constant([3, 4], name="big") + small = constant_op.constant([1, 2], name="small") + big = constant_op.constant([3, 4], name="big") + with self.assertRaisesOpError("x < y did not hold"): with ops.control_dependencies([check_ops.assert_less(big, small)]): out = array_ops.identity(small) - with self.assertRaisesOpError("big.*small"): - out.eval() + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_less(self): - with self.test_session(): - small = constant_op.constant([3, 1], name="small") - big = constant_op.constant([4, 2], name="big") - with ops.control_dependencies([check_ops.assert_less(small, big)]): - out = array_ops.identity(small) - out.eval() + small = constant_op.constant([3, 1], name="small") + big = constant_op.constant([4, 2], name="big") + with ops.control_dependencies([check_ops.assert_less(small, big)]): + out = array_ops.identity(small) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_less_and_broadcastable_shapes(self): - with self.test_session(): - small = constant_op.constant([1], name="small") - big = constant_op.constant([3, 2], name="big") - with ops.control_dependencies([check_ops.assert_less(small, big)]): - out = array_ops.identity(small) - out.eval() + small = constant_op.constant([1], name="small") + big = constant_op.constant([3, 2], name="big") + with ops.control_dependencies([check_ops.assert_less(small, big)]): + out = array_ops.identity(small) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_raises_when_less_but_non_broadcastable_shapes(self): - with self.test_session(): - small = constant_op.constant([1, 1, 1], name="small") - big = constant_op.constant([3, 2], name="big") - with self.assertRaisesRegexp(ValueError, "must be"): - with ops.control_dependencies([check_ops.assert_less(small, big)]): - out = array_ops.identity(small) - out.eval() + small = constant_op.constant([1, 1, 1], name="small") + big = constant_op.constant([3, 2], name="big") + # The exception in eager and non-eager mode is different because + # eager mode relies on shape check done as part of the C++ op, while + # graph mode does shape checks when creating the `Operation` instance. + with self.assertRaisesRegexp( + (ValueError, errors.InvalidArgumentError), + (r"Incompatible shapes: \[3\] vs. \[2\]|" + "Dimensions must be equal, but are 3 and 2")): + with ops.control_dependencies([check_ops.assert_less(small, big)]): + out = array_ops.identity(small) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_both_empty(self): - with self.test_session(): - larry = constant_op.constant([]) - curly = constant_op.constant([]) - with ops.control_dependencies([check_ops.assert_less(larry, curly)]): - out = array_ops.identity(larry) - out.eval() + larry = constant_op.constant([]) + curly = constant_op.constant([]) + with ops.control_dependencies([check_ops.assert_less(larry, curly)]): + out = array_ops.identity(larry) + self.evaluate(out) + + def test_returns_none_with_eager(self): + with context.eager_mode(): + t1 = constant_op.constant([1, 2]) + t2 = constant_op.constant([3, 4]) + x = check_ops.assert_less(t1, t2) + assert x is None class AssertLessEqualTest(test.TestCase): diff --git a/tensorflow/python/ops/check_ops.py b/tensorflow/python/ops/check_ops.py index ceee009104..7e509f72c1 100644 --- a/tensorflow/python/ops/check_ops.py +++ b/tensorflow/python/ops/check_ops.py @@ -48,6 +48,7 @@ import numpy as np from tensorflow.python.eager import context from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_util @@ -96,10 +97,11 @@ def _maybe_constant_value_string(t): def _assert_static(condition, data): - """Raises a static ValueError with as much information as possible.""" + """Raises a InvalidArgumentError with as much information as possible.""" if not condition: data_static = [_maybe_constant_value_string(x) for x in data] - raise ValueError('\n'.join(data_static)) + raise errors.InvalidArgumentError(node_def=None, op=None, + message='\n'.join(data_static)) def assert_proper_iterable(values): @@ -303,11 +305,60 @@ def assert_equal(x, y, data=None, summarize=None, message=None, name=None): Returns: Op that raises `InvalidArgumentError` if `x == y` is False. + @compatibility{eager} returns None + + Raises: + InvalidArgumentError if the check can be performed immediately and + `x == y` is False. The check can be performed immediately during + eager execution or if `x` and `y` are statically known. """ message = message or '' with ops.name_scope(name, 'assert_equal', [x, y, data]): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') + + if context.in_eager_mode(): + eq = math_ops.equal(x, y) + condition = math_ops.reduce_all(eq) + if not condition: + # Prepare a message with first elements of x and y + summary_msg = '' + if summarize: + # reshape((-1,)) is the fastest way to get a flat array view. + x_np = x.numpy().reshape((-1,)) + y_np = y.numpy().reshape((-1,)) + x_sum = min(x_np.size, summarize) + y_sum = min(y_np.size, summarize) + summary_msg = ('First %d elements of x:\n%s\n' + 'First %d elements of y:\n%s\n' % + (x_sum, x_np[:x_sum], + y_sum, y_np[:y_sum])) + + # Get the values that actually differed and their indices + mask = math_ops.logical_not(eq) + indices = array_ops.where(mask) + indices_np = indices.numpy() + x_vals = array_ops.boolean_mask(x, mask) + y_vals = array_ops.boolean_mask(y, mask) + diff_to_print = 0 + if summarize: + diff_to_print = min(summarize, indices_np.size) + + raise errors.InvalidArgumentError( + node_def=None, op=None, + message=('%s\nCondition x == y did not hold.\n' + 'Indices of first %s different values:\n%s\n' + 'Corresponding x values:\n%s\n' + 'Corresponding y values:\n%s\n' + '%s' + % + (message or '', + diff_to_print, indices_np[:diff_to_print], + x_vals.numpy().reshape((-1,))[:diff_to_print], + y_vals.numpy().reshape((-1,))[:diff_to_print], + summary_msg))) + return + if data is None: data = [ message, @@ -356,12 +407,19 @@ def assert_none_equal( with ops.name_scope(name, 'assert_none_equal', [x, y, data]): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') + if context.in_eager_mode(): + x_name = 'x' + y_name = 'y' + else: + x_name = x.name + y_name = y.name + if data is None: data = [ message, - 'Condition x != y did not hold for every single element:' - 'x (%s) = ' % x.name, x, - 'y (%s) = ' % y.name, y + 'Condition x != y did not hold for every single element:', + 'x (%s) = ' % x_name, x, + 'y (%s) = ' % y_name, y ] condition = math_ops.reduce_all(math_ops.not_equal(x, y)) return control_flow_ops.Assert(condition, data, summarize=summarize) @@ -397,11 +455,18 @@ def assert_less(x, y, data=None, summarize=None, message=None, name=None): with ops.name_scope(name, 'assert_less', [x, y, data]): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') + if context.in_eager_mode(): + x_name = 'x' + y_name = 'y' + else: + x_name = x.name + y_name = y.name + if data is None: data = [ message, - 'Condition x < y did not hold element-wise:' - 'x (%s) = ' % x.name, x, 'y (%s) = ' % y.name, y + 'Condition x < y did not hold element-wise:', + 'x (%s) = ' % x_name, x, 'y (%s) = ' % y_name, y ] condition = math_ops.reduce_all(math_ops.less(x, y)) return control_flow_ops.Assert(condition, data, summarize=summarize) diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 10d8e01304..8afb079d20 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -60,6 +60,7 @@ from tensorflow.core.protobuf import control_flow_pb2 from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape @@ -86,6 +87,29 @@ from tensorflow.python.util import tf_should_use _basetuple = tuple +def _summarize_eager(tensor, summarize=None): + """Returns a summarized string representation of eager `tensor`. + + Args: + tensor: EagerTensor to summarize + summarize: Include these many first elements of `array` + """ + # reshape((-1,)) is the fastest way to get a flat array view + if tensor._rank(): # pylint: disable=protected-access + flat = tensor.numpy().reshape((-1,)) + lst = [str(x) for x in flat[:summarize]] + if len(lst) < flat.size: + lst.append("...") + else: + # tensor.numpy() returns a scalar for zero dimensional arrays + if summarize != 0: + lst = [str(tensor.numpy())] + else: + lst = [] + + return ", ".join(lst) + + # pylint: disable=protected-access @@ -98,7 +122,8 @@ def Assert(condition, data, summarize=None, name=None): If `condition` evaluates to false, print the list of tensors in `data`. `summarize` determines how many entries of the tensors to print. - NOTE: To ensure that Assert executes, one usually attaches a dependency: + NOTE: In graph mode, to ensure that Assert executes, one usually attaches + a dependency: ```python # Ensure maximum element of x is smaller or equal to 1 @@ -117,7 +142,21 @@ def Assert(condition, data, summarize=None, name=None): assert_op: An `Operation` that, when executed, raises a `tf.errors.InvalidArgumentError` if `condition` is not true. @compatibility{eager} returns None. + + Raises: + @compatibility{eager} `tf.errors.InvalidArgumentError` if `condition` + is not true """ + if context.in_eager_mode(): + if not condition: + xs = ops.convert_n_to_tensor(data) + data_str = [_summarize_eager(x, summarize) for x in xs] + raise errors.InvalidArgumentError( + node_def=None, op=None, + message="Expected '%s' to be true. Summarized data: %s" % ( + condition, "\n".join(data_str))) + return + with ops.name_scope(name, "Assert", [condition, data]) as name: xs = ops.convert_n_to_tensor(data) if all([x.dtype in {dtypes.string, dtypes.int32} for x in xs]): diff --git a/tensorflow/python/util/tf_should_use.py b/tensorflow/python/util/tf_should_use.py index a576547d5f..37733152e8 100644 --- a/tensorflow/python/util/tf_should_use.py +++ b/tensorflow/python/util/tf_should_use.py @@ -44,7 +44,7 @@ def _add_should_use_warning(x, fatal_error=False): and is a very shallow wrapper for `x` which logs access into `x`. """ del fatal_error - if x is None: # special corner case where x is None + if x is None or x == []: # pylint: disable=g-explicit-bool-comparison return x if context.in_eager_mode(): -- GitLab From b138012ce23ffc9416f6cb7b81a0fab76e381ba7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 16:49:17 -0800 Subject: [PATCH 0211/1801] Changed gradient of GatherNd to use IndexedSlices when possible rather than producing a dense output. PiperOrigin-RevId: 175086874 --- tensorflow/python/kernel_tests/gather_nd_op_test.py | 10 ++++++++-- tensorflow/python/ops/array_grad.py | 6 +++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/kernel_tests/gather_nd_op_test.py b/tensorflow/python/kernel_tests/gather_nd_op_test.py index af5e23c926..5109ed98c9 100644 --- a/tensorflow/python/kernel_tests/gather_nd_op_test.py +++ b/tensorflow/python/kernel_tests/gather_nd_op_test.py @@ -25,6 +25,7 @@ import numpy as np from tensorflow.python.client import session from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import variables @@ -185,6 +186,9 @@ class GatherNdTest(test.TestCase): self.assertAllEqual(expected.reshape([10, 10, 20]), gather_nd_val) self.assertEqual([10, 10, 20], gather_nd_t.get_shape()) + def assertIndexedSlices(self, t): + self.assertIsInstance(t, ops.IndexedSlices) + def testUnknownIndices(self): params = constant_op.constant([[0, 1, 2]]) indices = array_ops.placeholder(dtypes.int32) @@ -233,7 +237,8 @@ class GatherNdTest(test.TestCase): grads = gradients_impl.gradients([outputs], [inputs], [grad_vals])[0] expected_grads = np.array([[3, 4], [1, 2]], dtype=np.float64) with self.test_session(use_gpu=True): - self.assertAllEqual(expected_grads, grads.eval()) + self.assertIndexedSlices(grads) + self.assertAllEqual(expected_grads, ops.convert_to_tensor(grads).eval()) def testGradientsRank3Elements(self): indices = constant_op.constant( @@ -284,7 +289,8 @@ class GatherNdTest(test.TestCase): [0, 0, 0, 0, 0, 0, 0, 0, 0], [3, 3, 3, 3, 3, 3, 3, 3, 3]], dtype=np.float64) with self.test_session(use_gpu=True): - self.assertAllEqual(expected_grads, grads.eval()) + self.assertIndexedSlices(grads) + self.assertAllEqual(expected_grads, ops.convert_to_tensor(grads).eval()) class GatherNdOpBenchmark(test.Benchmark): diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index 3c025881cb..87f8d14860 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -460,7 +460,11 @@ def _GatherNdGrad(op, grad): ref = op.inputs[0] indices = op.inputs[1] ref_shape = array_ops.shape(ref, out_type=indices.dtype) - ref_grad = array_ops.scatter_nd(indices, grad, ref_shape) + if indices.shape.ndims == 2 and indices.shape[-1].value == 1: + ref_grad = ops.IndexedSlices(grad, array_ops.squeeze(indices, axis=-1), + ref_shape) + else: + ref_grad = array_ops.scatter_nd(indices, grad, ref_shape) return [ref_grad, None] -- GitLab From b9850393c684e70d9752440072f0fd3f9714a609 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Wed, 8 Nov 2017 16:59:13 -0800 Subject: [PATCH 0212/1801] Improve error message for @graph_callable argument check PiperOrigin-RevId: 175088248 --- tensorflow/python/eager/graph_callable.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py index a7f1061d18..ce51d17cfc 100644 --- a/tensorflow/python/eager/graph_callable.py +++ b/tensorflow/python/eager/graph_callable.py @@ -247,7 +247,9 @@ def _get_graph_callable_inputs(shape_and_dtypes): ret.append(_get_graph_callable_inputs(x)) else: raise errors.InvalidArgumentError( - None, None, "shape_and_dtypes not ShapeAndDtype, type: %s " % type(x)) + None, None, "Expected the argument to @graph_callable to be a " + "(possibly nested) list or tuple of ShapeAndDtype objects, " + "but got an object of type: %s" % type(x)) return tuple(ret) if isinstance(shape_and_dtypes, tuple) else ret @@ -267,7 +269,7 @@ def _graph_callable_internal(func, shape_and_dtypes): Args: func: The tfe Python function to compile. - shape_and_dtypes: A list of type ShapeAndDtype. + shape_and_dtypes: A possibly nested list or tuple of ShapeAndDtype objects. Raises: ValueError: If any one of func's outputs is not a Tensor. @@ -430,9 +432,10 @@ def graph_callable(shape_and_dtypes): ret = foo(tfe.Tensor(2.0)) # `ret` here now is a Tensor with value 9.0. ``` Args: - shape_and_dtypes: A list of type ShapeAndDtype that specifies shape and type - information for each of the callable's arguments. The length of this list - must be equal to the number of arguments accepted by the wrapped function. + shape_and_dtypes: A possibly nested list or tuple of ShapeAndDtype objects + that specifies shape and type information for each of the callable's + arguments. The length of this list must be equal to the number of + arguments accepted by the wrapped function. Returns: A callable graph object. -- GitLab From 9d86dc076f74cc9e2683f8ad789930408b0919f7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 8 Nov 2017 17:01:26 -0800 Subject: [PATCH 0213/1801] Remove extra copy of literal in client TransferToOutfeed PiperOrigin-RevId: 175088538 --- tensorflow/compiler/xla/client/client.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/client/client.cc b/tensorflow/compiler/xla/client/client.cc index 92cd8e729d..66937d64af 100644 --- a/tensorflow/compiler/xla/client/client.cc +++ b/tensorflow/compiler/xla/client/client.cc @@ -142,8 +142,7 @@ StatusOr> Client::TransferFromOutfeed( "TransferToClient request"); } - Literal literal(response.literal()); - return MakeUnique(literal); + return MakeUnique(response.literal()); } Status Client::ResetDevice() { -- GitLab From 17411ee8e7569085c475e8f0bd3f6677a9d44f77 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Wed, 8 Nov 2017 19:01:15 -0800 Subject: [PATCH 0214/1801] Add hooks keyword argument to slim evaluate_once to enable TFDBG debugging of slim.evaluation.evaluate_once() Fixes: #13444 PiperOrigin-RevId: 175101022 --- tensorflow/contrib/slim/BUILD | 2 + .../contrib/slim/python/slim/evaluation.py | 15 ++++-- .../slim/python/slim/evaluation_test.py | 46 +++++++++++++++++-- .../docs_src/programmers_guide/debugger.md | 26 ++++++++++- 4 files changed, 77 insertions(+), 12 deletions(-) diff --git a/tensorflow/contrib/slim/BUILD b/tensorflow/contrib/slim/BUILD index 23c23af2f4..c2f106c2b2 100644 --- a/tensorflow/contrib/slim/BUILD +++ b/tensorflow/contrib/slim/BUILD @@ -39,6 +39,8 @@ py_test( "//tensorflow/python:summary", "//tensorflow/python:training", "//tensorflow/python:variables", + "//tensorflow/python/debug:debug_data", + "//tensorflow/python/debug:hooks", "//third_party/py/numpy", ], ) diff --git a/tensorflow/contrib/slim/python/slim/evaluation.py b/tensorflow/contrib/slim/python/slim/evaluation.py index 2d4b08df61..cdb720b36b 100644 --- a/tensorflow/contrib/slim/python/slim/evaluation.py +++ b/tensorflow/contrib/slim/python/slim/evaluation.py @@ -153,7 +153,8 @@ def evaluate_once(master, summary_op=_USE_DEFAULT, summary_op_feed_dict=None, variables_to_restore=None, - session_config=None): + session_config=None, + hooks=None): """Evaluates the model at the given checkpoint path. Args: @@ -177,6 +178,8 @@ def evaluate_once(master, slim.variables.GetVariablesToRestore() is used. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. + hooks: A list of additional `SessionRunHook` objects to pass during the + evaluation. Returns: The value of `final_op` or `None` if `final_op` is `None`. @@ -184,11 +187,13 @@ def evaluate_once(master, if summary_op == _USE_DEFAULT: summary_op = summary.merge_all() - hooks = [evaluation.StopAfterNEvalsHook(num_evals),] + all_hooks = [evaluation.StopAfterNEvalsHook(num_evals),] if summary_op is not None: - hooks.append(evaluation.SummaryAtEndHook( + all_hooks.append(evaluation.SummaryAtEndHook( log_dir=logdir, summary_op=summary_op, feed_dict=summary_op_feed_dict)) + if hooks is not None: + all_hooks.extend(hooks) saver = None if variables_to_restore is not None: @@ -203,7 +208,7 @@ def evaluate_once(master, feed_dict=eval_op_feed_dict, final_ops=final_op, final_ops_feed_dict=final_op_feed_dict, - hooks=hooks, + hooks=all_hooks, config=session_config) @@ -256,7 +261,7 @@ def evaluation_loop(master, configure the `Session`. If left as `None`, the default will be used. timeout: The maximum amount of time to wait between checkpoints. If left as `None`, then the process will wait indefinitely. - hooks: A list of additional SessionRunHook objects to pass during + hooks: A list of additional `SessionRunHook` objects to pass during repeated evaluations. Returns: diff --git a/tensorflow/contrib/slim/python/slim/evaluation_test.py b/tensorflow/contrib/slim/python/slim/evaluation_test.py index d9e0f54b72..870f504d10 100644 --- a/tensorflow/contrib/slim/python/slim/evaluation_test.py +++ b/tensorflow/contrib/slim/python/slim/evaluation_test.py @@ -20,6 +20,7 @@ from __future__ import print_function import glob import os +import shutil import time import numpy as np @@ -29,6 +30,8 @@ from tensorflow.contrib.metrics.python.ops import metric_ops from tensorflow.contrib.slim.python.slim import evaluation from tensorflow.contrib.training.python.training import evaluation as evaluation_lib from tensorflow.core.protobuf import saver_pb2 +from tensorflow.python.debug.lib import debug_data +from tensorflow.python.debug.wrappers import hooks from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -230,11 +233,7 @@ class SingleEvaluationTest(test.TestCase): with self.assertRaises(errors.NotFoundError): evaluation.evaluate_once('', checkpoint_path, log_dir) - def testRestoredModelPerformance(self): - checkpoint_path = os.path.join(self.get_temp_dir(), 'model.ckpt') - log_dir = os.path.join(self.get_temp_dir(), 'log_dir1/') - - # First, save out the current model to a checkpoint: + def _prepareCheckpoint(self, checkpoint_path): init_op = control_flow_ops.group(variables.global_variables_initializer(), variables.local_variables_initializer()) saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V1) @@ -242,6 +241,13 @@ class SingleEvaluationTest(test.TestCase): sess.run(init_op) saver.save(sess, checkpoint_path) + def testRestoredModelPerformance(self): + checkpoint_path = os.path.join(self.get_temp_dir(), 'model.ckpt') + log_dir = os.path.join(self.get_temp_dir(), 'log_dir1/') + + # First, save out the current model to a checkpoint: + self._prepareCheckpoint(checkpoint_path) + # Next, determine the metric to evaluate: value_op, update_op = metric_ops.streaming_accuracy(self._predictions, self._labels) @@ -251,6 +257,36 @@ class SingleEvaluationTest(test.TestCase): '', checkpoint_path, log_dir, eval_op=update_op, final_op=value_op) self.assertAlmostEqual(accuracy_value, self._expected_accuracy) + def testAdditionalHooks(self): + checkpoint_path = os.path.join(self.get_temp_dir(), 'model.ckpt') + log_dir = os.path.join(self.get_temp_dir(), 'log_dir1/') + + # First, save out the current model to a checkpoint: + self._prepareCheckpoint(checkpoint_path) + + # Next, determine the metric to evaluate: + value_op, update_op = metric_ops.streaming_accuracy(self._predictions, + self._labels) + + dumping_root = os.path.join(self.get_temp_dir(), 'tfdbg_dump_dir') + dumping_hook = hooks.DumpingDebugHook(dumping_root, log_usage=False) + try: + # Run the evaluation and verify the results: + accuracy_value = evaluation.evaluate_once( + '', checkpoint_path, log_dir, eval_op=update_op, final_op=value_op, + hooks=[dumping_hook]) + self.assertAlmostEqual(accuracy_value, self._expected_accuracy) + + dump = debug_data.DebugDumpDir( + glob.glob(os.path.join(dumping_root, 'run_*'))[0]) + # Here we simply assert that the dumped data has been loaded and is + # non-empty. We do not care about the detailed model-internal tensors or + # their values. + self.assertTrue(dump.dumped_tensor_data) + finally: + if os.path.isdir(dumping_root): + shutil.rmtree(dumping_root) + if __name__ == '__main__': test.main() diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md index 36a016e880..1f856bbf3f 100644 --- a/tensorflow/docs_src/programmers_guide/debugger.md +++ b/tensorflow/docs_src/programmers_guide/debugger.md @@ -509,8 +509,12 @@ model.fit(...) # This will break into the TFDBG CLI. ## Debugging tf-slim with TFDBG -TFDBG currently supports only training with +TFDBG supports debugging of training and evaluation with [tf-slim](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim). +As detailed below, training and evaluation require slightly different debugging +workflows. + +### Debugging training in tf-slim To debug the training process, provide `LocalCLIDebugWrapperSession` to the `session_wrapper` argument of `slim.learning.train()`. For example: @@ -519,13 +523,31 @@ import tensorflow as tf from tensorflow.python import debug as tf_debug # ... Code that creates the graph and the train_op ... -tf.contrib.slim.learning_train( +tf.contrib.slim.learning.train( train_op, logdir, number_of_steps=10, session_wrapper=tf_debug.LocalCLIDebugWrapperSession) ``` +### Debugging evaluation in tf-slim +To debug the evaluation process, provide `LocalCLIDebugHook` to the +`hooks` argument of `slim.evaluation.evaluate_once()`. For example: + +``` python +import tensorflow as tf +from tensorflow.python import debug as tf_debug + +# ... Code that creates the graph and the eval and final ops ... +tf.contrib.slim.evaluation.evaluate_once( + '', + checkpoint_path, + logdir, + eval_op=my_eval_op, + final_op=my_value_op, + hooks=[tf_debug.LocalCLIDebugHook()]) +``` + ## Offline Debugging of Remotely-Running Sessions Often, your model is running on a remote machine or a process that you don't -- GitLab From 791c8bf0baf4198c5922dba08a74960ca6dac74f Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Wed, 8 Nov 2017 22:23:01 -0800 Subject: [PATCH 0215/1801] Simplify graph construction with an option to not validate colocation constraints (for graph optimizations, colocation constraints are already validated previously and device placement of nodes has completed previously and there is no need to validate again). PiperOrigin-RevId: 175113956 --- tensorflow/core/graph/graph_constructor.cc | 10 +++++--- tensorflow/core/graph/graph_constructor.h | 3 +++ .../core/graph/graph_constructor_test.cc | 15 +++++++++++ .../core/grappler/costs/graph_properties.cc | 5 ++++ .../grappler/costs/graph_properties_test.cc | 25 +++++++++++++++++++ 5 files changed, 55 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc index 753cb260e5..2ee409768b 100644 --- a/tensorflow/core/graph/graph_constructor.cc +++ b/tensorflow/core/graph/graph_constructor.cc @@ -68,7 +68,8 @@ class GraphConstructor { Options(const GraphConstructorOptions& in) // NOLINT(runtime/explicit) : allow_internal_ops(in.allow_internal_ops), expect_device_spec(in.expect_device_spec), - importing(false) {} + importing(false), + validate_colocation_constraints(false) {} Options(const ImportGraphDefOptions& in) // NOLINT(runtime/explicit) : allow_internal_ops(false), expect_device_spec(false), @@ -81,7 +82,8 @@ class GraphConstructor { control_dependencies(in.control_dependencies), return_tensors(in.return_tensors), return_nodes(in.return_nodes), - importing(true) {} + importing(true), + validate_colocation_constraints(in.validate_colocation_constraints) {} bool allow_internal_ops; bool expect_device_spec; @@ -103,6 +105,7 @@ class GraphConstructor { // applicable to ConvertGraphDefToGraph as well, so make an attempt to // remove this. bool importing; + bool validate_colocation_constraints; }; typedef gtl::ArraySlice NodeDefSlice; @@ -492,7 +495,8 @@ Status GraphConstructor::InitFromEdges() { Status GraphConstructor::ValidateColocationConstraints( const NodeDef& node_def) { - if (!opts_.importing) return Status::OK(); + if (!opts_.validate_colocation_constraints || !opts_.importing) + return Status::OK(); const auto iter = node_def.attr().find(kColocationAttrName); if (iter == node_def.attr().end()) return Status::OK(); for (const string& c : iter->second.list().s()) { diff --git a/tensorflow/core/graph/graph_constructor.h b/tensorflow/core/graph/graph_constructor.h index 416c0ee9ae..4b418b8622 100644 --- a/tensorflow/core/graph/graph_constructor.h +++ b/tensorflow/core/graph/graph_constructor.h @@ -119,6 +119,9 @@ struct ImportGraphDefOptions { // TODO(skyewm): make this work with `skip_mapped_nodes` if there's a need. std::vector return_nodes; + // If true, checks that all colocation constraints are nodes in the GraphDef. + bool validate_colocation_constraints = true; + // TODO(ashankar): Enable handling of GraphDefs produced by newer binaries // with ops that are not defined in the binary calling ImportGraphDef. // Similar to the producer_op_list argument to import_graph_def in the diff --git a/tensorflow/core/graph/graph_constructor_test.cc b/tensorflow/core/graph/graph_constructor_test.cc index cd541c7d86..893826da3e 100644 --- a/tensorflow/core/graph/graph_constructor_test.cc +++ b/tensorflow/core/graph/graph_constructor_test.cc @@ -2978,5 +2978,20 @@ versions { EXPECT_EQ(17, refiner.graph_def_version()); } +TEST_F(GraphConstructorTest, ImportGraphDef_ValidateColationConstraints) { + GraphDef def; + ASSERT_TRUE(protobuf::TextFormat::ParseFromString( + "node { name: 'A' op: 'TestInput' attr { key: '_class' value { list { " + "s:'loc:@missing' } } } }", + &def)); + ImportGraphDefOptions options; + // TODO(yaozhang): Extend ExpectError to check error type and use ExpectError + // and ExpectOK to replace the code below. + Status s = ImportGraphDef(options, def, &graph_, nullptr); + EXPECT_TRUE(errors::IsInvalidArgument(s)) << s; + options.validate_colocation_constraints = false; + TF_EXPECT_OK(ImportGraphDef(options, def, &graph_, nullptr)); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index a59879f53c..8654a2a3ed 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -447,6 +447,11 @@ Status GraphProperties::InferStatically() { shape_refiner.set_disable_constant_propagation(true); shape_refiner.set_function_library_for_shape_inference(&function_library); ImportGraphDefOptions options; + // Graph optimization happens at the late stage of graph execution, + // when colocation constraints are already validated previously and + // the device placement of nodes has also completed, so there + // is no need to validate colocation constraints again. + options.validate_colocation_constraints = false; Status s = ImportGraphDef(options, item_.graph, &graph, &shape_refiner); TF_RETURN_IF_ERROR(s); diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc index a33cdacc09..acd0b598ae 100644 --- a/tensorflow/core/grappler/costs/graph_properties_test.cc +++ b/tensorflow/core/grappler/costs/graph_properties_test.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h" #include "tensorflow/core/grappler/inputs/utils.h" +#include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/protobuf.h" @@ -784,6 +785,30 @@ TEST_F(GraphPropertiesTest, SymbolicShapes) { EXPECT_EQ(shape_f.dim(1).size(), shape_a.dim(1).size()); } +TEST_F(GraphPropertiesTest, DoNotValidateColocationConstraints) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output a = ops::Const(s.WithOpName("a"), 1.0f, {1}); + Output b = ops::Const(s.WithOpName("b"), 2.0f, {1}); + Output c = ops::Const(s.WithOpName("c").ColocateWith(a), 3.0f, {1}); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + // Create a graph with node a removed (say by some graph optimization + // pass), noting that node c is colocated with a. This is fine as it + // is in the late stage of graph execution, the colocation constraints have + // been validated previously and the device placement of nodes has completed. + GraphDef optimized_graph; + for (const auto& node : item.graph.node()) { + if (node.name() != "a") { + *optimized_graph.add_node() = node; + } + } + item.graph.Swap(&optimized_graph); + GraphProperties properties(item); + // This function should return OK, since it doesn't validate the colocation + // constraints internally. + TF_EXPECT_OK(properties.InferStatically()); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From c38781f6ec9710c0102bdc9d95bf6176fd96d1ce Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 02:32:44 -0800 Subject: [PATCH 0216/1801] When sharding a tuple, we typically want to describe the data sharding of each individual subtensor individually. Tuples are essentially just containers - the tensors they contain should be able to be sharded differently. Tuples are hierarchically structured, but shardings were designed to not contain the sharded type (the sharded type is inferred from the output type of the instruction the sharding is applied to). Therefore, shardings for tuples contain shardings for each subtensor as a non-structured list. This list is ordered as a preorder walk of the tuple shape, and of course only the leaf nodes of the tuple shape are stored. The structure is reapplied when the sharded instruction's shape is known. PiperOrigin-RevId: 175132692 --- .../compiler/xla/service/hlo_sharding.cc | 71 +++++++++++++++- .../compiler/xla/service/hlo_sharding.h | 83 +++++++++++++++++-- .../compiler/xla/service/hlo_sharding_test.cc | 68 +++++++++++++++ tensorflow/compiler/xla/shape_tree.h | 3 + .../compiler/xla/tools/parser/hlo_parser.cc | 41 ++++++++- .../xla/tools/parser/hlo_parser_test.cc | 15 +++- tensorflow/compiler/xla/xla_data.proto | 13 ++- 7 files changed, 278 insertions(+), 16 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc index 0d019d22f5..bc5663513b 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_sharding.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/strings/str_util.h" namespace xla { @@ -38,6 +39,15 @@ HloSharding HloSharding::Tile1D(const Shape& input_shape, int64 num_tiles) { } string HloSharding::ToString() const { + if (IsTuple()) { + std::vector parts; + parts.reserve(tuple_elements_.size()); + for (const HloSharding& element : tuple_elements_) { + parts.push_back(element.ToString()); + } + return StrCat("{", tensorflow::str_util::Join(parts, ", "), "}"); + } + string result = StrCat("{", (replicated_ ? " replicated" : ""), (maximal_ ? " maximal" : "")); @@ -53,6 +63,11 @@ string HloSharding::ToString() const { } bool HloSharding::UsesDevice(int64 device) const { + if (IsTuple()) { + return std::any_of( + tuple_elements_.begin(), tuple_elements_.end(), + [&](const HloSharding& s) { return s.UsesDevice(device); }); + } const auto& devices = tile_assignment_; return replicated_ || std::find(devices.begin(), devices.end(), device) != devices.end(); @@ -61,6 +76,7 @@ bool HloSharding::UsesDevice(int64 device) const { std::vector HloSharding::TileIndexForDevice(int64 device) const { CHECK(!ShapeUtil::IsTuple(tile_shape_)); CHECK(!maximal_); + CHECK(!IsTuple()); std::vector ret_index; tile_assignment_.Each([&](tensorflow::gtl::ArraySlice index, int64 d) { if (d == device) { @@ -74,6 +90,7 @@ std::vector HloSharding::TileIndexForDevice(int64 device) const { int64 HloSharding::DeviceForTileIndex( tensorflow::gtl::ArraySlice index) const { CHECK(!replicated_); + CHECK(!IsTuple()); if (maximal_) { return *tile_assignment_.begin(); } @@ -82,7 +99,7 @@ int64 HloSharding::DeviceForTileIndex( } std::vector HloSharding::TileOffsetForDevice(int64 device) const { - CHECK(!ShapeUtil::IsTuple(tile_shape_)); + CHECK(!IsTuple()); std::vector index = TileIndexForDevice(device); if (maximal_) { @@ -97,7 +114,7 @@ std::vector HloSharding::TileOffsetForDevice(int64 device) const { } std::vector HloSharding::TileLimitForDevice(int64 device) const { - CHECK(!ShapeUtil::IsTuple(tile_shape_)); + CHECK(!IsTuple()); CHECK(!maximal_); // Maximal shardings do not have a valid tile shape. std::vector index = TileIndexForDevice(device); @@ -108,13 +125,41 @@ std::vector HloSharding::TileLimitForDevice(int64 device) const { } StatusOr HloSharding::UniqueDevice() const { - if (!replicated_ && maximal_) { + if (IsTuple()) { + if (tuple_elements_.empty()) { + return tensorflow::errors::InvalidArgument( + "UniqueDevice() called on empty tuple"); + } + std::vector> results; + std::transform(tuple_elements_.begin(), tuple_elements_.end(), + std::back_inserter(results), + [](const HloSharding& s) { return s.UniqueDevice(); }); + if (std::all_of(results.begin(), results.end(), + [&](const StatusOr& s) { + return s.ok() && results[0].ok() && + s.ValueOrDie() == results[0].ValueOrDie(); + })) { + return results[0]; + } else { + return tensorflow::errors::InvalidArgument( + "Tuple did not contain a unique device"); + } + } + if (!replicated_ && maximal_ && !IsTuple()) { return static_cast(*tile_assignment_.begin()); } return tensorflow::errors::InvalidArgument( "UniqueDevice() called on sharding that executes on multiple devices"); } +bool HloSharding::HasUniqueDevice() const { + if (IsTuple()) { + return UniqueDevice().status().ok(); + } else { + return !IsReplicated() && IsTileMaximal(); + } +} + Status HloSharding::Validate(const Shape& shape, int64 num_devices) const { if (replicated_) { return Status::OK(); @@ -193,7 +238,16 @@ Status HloSharding::Validate(const Shape& shape, int64 num_devices) const { /*static*/ StatusOr HloSharding::FromProto( const OpSharding& proto) { - if (proto.type() == OpSharding::Type::OpSharding_Type_REPLICATED) { + if (proto.type() == OpSharding::Type::OpSharding_Type_TUPLE) { + std::vector tuple_shardings; + tuple_shardings.reserve(proto.tuple_shardings().size()); + for (const OpSharding& tuple_sharding_proto : proto.tuple_shardings()) { + TF_ASSIGN_OR_RETURN(HloSharding sharding, + HloSharding::FromProto(tuple_sharding_proto)); + tuple_shardings.push_back(sharding); + } + return HloSharding(tuple_shardings); + } else if (proto.type() == OpSharding::Type::OpSharding_Type_REPLICATED) { return Replicate(); } else if (proto.type() == OpSharding::Type::OpSharding_Type_MAXIMAL) { return HloSharding(proto.tile_assignment_devices(0)); @@ -212,6 +266,15 @@ Status HloSharding::Validate(const Shape& shape, int64 num_devices) const { OpSharding HloSharding::ToProto() const { OpSharding result; + + if (IsTuple()) { + for (const HloSharding& element : tuple_elements_) { + *result.add_tuple_shardings() = element.ToProto(); + } + result.set_type(OpSharding::Type::OpSharding_Type_TUPLE); + return result; + } + *result.mutable_tile_shape() = tile_shape_; for (int64 dim : tile_assignment_.dimensions()) { result.add_tile_assignment_dimensions(dim); diff --git a/tensorflow/compiler/xla/service/hlo_sharding.h b/tensorflow/compiler/xla/service/hlo_sharding.h index d7ada30c70..f8ef2a3d05 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.h +++ b/tensorflow/compiler/xla/service/hlo_sharding.h @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/compiler/xla/array.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/protobuf_util.h" +#include "tensorflow/compiler/xla/shape_tree.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/lib/hash/hash.h" @@ -67,6 +68,18 @@ class HloSharding { // `num_tiles` tiles. static HloSharding Tile1D(const Shape& input_shape, int64 num_tiles); + // Creates a new sharding for a tuple type. The given ShapeTree must have + // elements for every leaf shape contained in the tuple. + static HloSharding Tuple(const ShapeTree& sub_shardings) { + std::vector flattened_list; + flattened_list.reserve( + std::distance(sub_shardings.leaf_begin(), sub_shardings.leaf_end())); + for (const auto& index_to_sharding : sub_shardings.leaves()) { + flattened_list.push_back(index_to_sharding.second); + } + return HloSharding(flattened_list); + } + // Create a new sharding from a protobuf OpSharding. static StatusOr FromProto(const OpSharding& proto); @@ -76,47 +89,89 @@ class HloSharding { // Validate that this sharding can be applied to a tensor with shape `shape`. Status Validate(const Shape& shape, int64 num_devices) const; + // Returns true if the sharding has tuple type. + bool IsTuple() const { return tuple_; } + // Returns true if the sharding is trivial: replicate on all devices. - bool IsReplicated() const { return replicated_; } + bool IsReplicated() const { + if (!IsTuple()) { + return replicated_; + } + return std::all_of(tuple_elements_.begin(), tuple_elements_.end(), + [](const HloSharding& s) { return s.IsReplicated(); }); + } // Returns true if the tile size is the same as the input size. - bool IsTileMaximal() const { return maximal_; } + bool IsTileMaximal() const { + if (!IsTuple()) { + return maximal_; + } + return std::all_of(tuple_elements_.begin(), tuple_elements_.end(), + [](const HloSharding& s) { return s.IsTileMaximal(); }); + } // Returns true if the sharding defines an operation on the given device. bool UsesDevice(int64 device) const; // Returns the tile that should be executed on the given device. + // REQUIRES: !IsTuple() std::vector TileIndexForDevice(int64 device) const; // Returns the device that should execute the given tile. // It is an error to call this if is_replicated() is true. + // REQUIRES: !IsTuple() int64 DeviceForTileIndex(tensorflow::gtl::ArraySlice index) const; // Given a device ID, returns the offset within the input space of the // tile that should be executed on the given core. This returns the lower // extent of the tile in the input space. + // REQUIRES: !IsTuple() std::vector TileOffsetForDevice(int64 device) const; // Given a device ID, returns the limit within the input space of the // tile that should be executed on the given core. This returns the upper // extent of the tile in the input space. + // REQUIRES: !IsTuple() std::vector TileLimitForDevice(int64 device) const; // Returns the single device this op operates on. - // Requires !Replicated() && IsTileMaximal(). + // REQUIRES: !IsTuple&& !Replicated() && IsTileMaximal() StatusOr UniqueDevice() const; // Returns true if this op only uses a single device. - bool HasUniqueDevice() const { return !IsReplicated() && IsTileMaximal(); } + bool HasUniqueDevice() const; + + // Returns the ShapeTree containing the shardings for each element of this + // tuple. Only the leaf elements are populated. This creates a new ShapeTree + // object so is not cheap. REQUIRES: IsTuple() + ShapeTree GetTupleShardingsAsShapeTree( + const Shape& tuple_shape) const { + ShapeTree result(tuple_shape, HloSharding::Replicate()); + CHECK_EQ(std::distance(result.leaf_begin(), result.leaf_end()), + tuple_elements_.size()); + auto it = tuple_elements_.begin(); + for (auto& index_to_sharding : result.leaves()) { + index_to_sharding.second = *it++; + } + return result; + } bool operator==(const HloSharding& other) const { return replicated_ == other.replicated_ && maximal_ == other.maximal_ && protobuf_util::ProtobufEquals(tile_shape_, other.tile_shape_) && - tile_assignment_ == other.tile_assignment_; + tile_assignment_ == other.tile_assignment_ && + tuple_elements_ == other.tuple_elements_; } bool operator!=(const HloSharding& other) const { return !(*this == other); } size_t Hash() const { + if (!tuple_) { + size_t h = 0; + for (const auto& element : tuple_elements_) { + h = tensorflow::Hash64Combine(h, element.Hash()); + } + return h; + } if (replicated_) { return 0; } @@ -131,33 +186,47 @@ class HloSharding { } // Gets the tile shape. - // It is an error to call this if IsTileMaximal() is true. + // REQUIRES: !IsTileMaximal() && !IsTuple() const Shape& tile_shape() const { return tile_shape_; } // Gets the tile assignment tensor. - // It is an error to call this if IsReplicated() is true. + // REQUIRES: !IsReplicated() && !IsTuple() const Array& tile_assignment() const { return tile_assignment_; } private: HloSharding() : replicated_(true), maximal_(true), + tuple_(false), tile_shape_(), tile_assignment_({0}) {} explicit HloSharding(int64 device_id) : replicated_(false), maximal_(true), + tuple_(false), tile_shape_(), tile_assignment_({1}, device_id) {} HloSharding(const Shape& tile_shape, const Array& tile_assignment) : replicated_(false), maximal_(false), + tuple_(false), tile_shape_(tile_shape), tile_assignment_(tile_assignment) {} + HloSharding(const std::vector& tuple_shardings) + : replicated_(false), + maximal_(false), + tuple_(true), + tile_assignment_({0}), + tuple_elements_(tuple_shardings) {} bool replicated_; bool maximal_; + bool tuple_; Shape tile_shape_; Array tile_assignment_; + // Only non-empty when tuple_ is true, but because empty tuples are allowed + // may also be empty even then. This is a flattened list of all the leaf + // shardings in a tuple shape, by pre-order walk (ShapeTree iterator order). + std::vector tuple_elements_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_sharding_test.cc b/tensorflow/compiler/xla/service/hlo_sharding_test.cc index d0a20471a0..00ea38480e 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding_test.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding_test.cc @@ -132,6 +132,29 @@ TEST_F(HloShardingTest, Tile) { } } +TEST_F(HloShardingTest, NestedTuple) { + // nested_tuple_shape = (f32[], (f32[3]), f32[4, 6]) + Shape nested_tuple_shape = ShapeUtil::MakeTupleShape({ + ShapeUtil::MakeShape(F32, {}), + ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {3})}), + ShapeUtil::MakeShape(F32, {4, 6}), + }); + + OpSharding proto; + proto.set_type(OpSharding::Type::OpSharding_Type_TUPLE); + *proto.add_tuple_shardings() = HloSharding::Replicate().ToProto(); + *proto.add_tuple_shardings() = HloSharding::AssignDevice(0).ToProto(); + *proto.add_tuple_shardings() = HloSharding::AssignDevice(1).ToProto(); + HloSharding tuple_sharding = + HloSharding::FromProto(proto).ConsumeValueOrDie(); + + ShapeTree shape_tree = + tuple_sharding.GetTupleShardingsAsShapeTree(nested_tuple_shape); + EXPECT_EQ(shape_tree.element({0}), HloSharding::Replicate()); + EXPECT_EQ(shape_tree.element({1, 0}), HloSharding::AssignDevice(0)); + EXPECT_EQ(shape_tree.element({2}), HloSharding::AssignDevice(1)); +} + TEST_F(HloShardingTest, Hash) { auto hash_compare_equal = [](const HloSharding& a, const HloSharding& b) { if (a.Hash() != b.Hash()) { @@ -184,6 +207,51 @@ TEST_F(HloShardingTest, Hash) { MakeArray({2, 2}, {0, 3, 1, 2})); EXPECT_FALSE(hash_compare_equal(sharding1, sharding2)); } + + HloSharding default_sharding = HloSharding::Replicate(); + { + ShapeTree shape_tree(ShapeUtil::MakeTupleShape({}), + default_sharding); + HloSharding sharding1 = HloSharding::Replicate(); + HloSharding sharding2 = HloSharding::Tuple(shape_tree); + EXPECT_FALSE(hash_compare_equal(sharding1, sharding2)); + } + + { + ShapeTree shape_tree(ShapeUtil::MakeTupleShape({}), + default_sharding); + HloSharding sharding1 = HloSharding::Tuple(shape_tree); + HloSharding sharding2 = HloSharding::Tuple(shape_tree); + EXPECT_TRUE(hash_compare_equal(sharding1, sharding2)); + } + + { + ShapeTree shape_tree1( + ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {4})}), + default_sharding); + *shape_tree1.mutable_element({0}) = HloSharding::Replicate(); + ShapeTree shape_tree2( + ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {4})}), + default_sharding); + *shape_tree2.mutable_element({0}) = HloSharding::AssignDevice(0); + HloSharding sharding1 = HloSharding::Tuple(shape_tree1); + HloSharding sharding2 = HloSharding::Tuple(shape_tree2); + EXPECT_FALSE(hash_compare_equal(sharding1, sharding2)); + } + + { + ShapeTree shape_tree1( + ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {4})}), + default_sharding); + *shape_tree1.mutable_element({0}) = HloSharding::AssignDevice(0); + ShapeTree shape_tree2( + ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {4})}), + default_sharding); + *shape_tree2.mutable_element({0}) = HloSharding::AssignDevice(0); + HloSharding sharding1 = HloSharding::Tuple(shape_tree1); + HloSharding sharding2 = HloSharding::Tuple(shape_tree2); + EXPECT_TRUE(hash_compare_equal(sharding1, sharding2)); + } } } // namespace diff --git a/tensorflow/compiler/xla/shape_tree.h b/tensorflow/compiler/xla/shape_tree.h index 64a36471b9..a898a4d375 100644 --- a/tensorflow/compiler/xla/shape_tree.h +++ b/tensorflow/compiler/xla/shape_tree.h @@ -116,6 +116,7 @@ class ShapeTree { ShapeTree(const Shape* shape, const T& init_value); ShapeTree(const ShapeTree& other) { *this = other; } + ShapeTree(ShapeTree&&) = default; ShapeTree& operator=(const ShapeTree& other) { root_ = other.root_; @@ -132,6 +133,8 @@ class ShapeTree { return *this; } + ShapeTree& operator=(ShapeTree&& other) = default; + // Returns the data element associated with the array in the shape at the // given index (see ShapeUtil::GetSubshape for how indexes are defined). const T& element(const ShapeIndex& index) const; diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index f1e987cb15..df07e069a0 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -60,6 +60,7 @@ class HloParser { bool ParseInstructionList(HloComputation::Builder* builder, string* root_name); bool ParseInstruction(HloComputation::Builder* builder, string* root_name); + bool ParseControlPredecessors(HloInstruction* instruction); bool ParseLiteral(std::unique_ptr* literal, const Shape& shape); bool ParseTupleLiteral(std::unique_ptr* literal, const Shape& shape); bool ParseNonTupleLiteral(std::unique_ptr* literal, @@ -123,6 +124,7 @@ class HloParser { bool ParseWindow(Window* window); bool ParseConvolutionDimensionNumbers(ConvolutionDimensionNumbers* dnums); bool ParseSharding(OpSharding* sharding); + bool ParseSingleSharding(OpSharding* sharding, bool lbrace_pre_lexed); // Parses a sub-attribute of the window attribute, e.g.,size=1x2x3. bool ParseDxD(const string& name, std::vector* result); @@ -548,14 +550,49 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, return AddInstruction(name, instruction); } -// ::= '{' 'replicated'? 'maximal'? ('device=' int)? shape? ('devices=' ('[' -// dims ']')* device_list)? '}' dims ::= int_list device_list ::= int_list +// ::= '{' (single_sharding | tuple_sharding) '}' +// +// tuple_sharding ::= single_sharding* (',' single_sharding)* bool HloParser::ParseSharding(OpSharding* sharding) { + // A single sharding starts with '{' and is not followed by '{'. + // A tuple sharding starts with '{' and is followed by '{', or is '{''}' for + // an empty tuple. if (!ParseToken(TokKind::kLbrace, "expected '{' to start sharding attribute")) { return false; } + if (lexer_.GetKind() != TokKind::kLbrace && + lexer_.GetKind() != TokKind::kRbrace) { + return ParseSingleSharding(sharding, /*lbrace_pre_lexed=*/true); + } + + // Tuple sharding. + // Allow empty tuple shardings. + if (lexer_.GetKind() != TokKind::kRbrace) { + do { + if (!ParseSingleSharding(sharding->add_tuple_shardings(), + /*lbrace_pre_lexed=*/false)) { + return false; + } + } while (EatIfPresent(TokKind::kComma)); + } + sharding->set_type(OpSharding::Type::OpSharding_Type_TUPLE); + + return ParseToken(TokKind::kRbrace, "expected '}' to end sharding attribute"); +} + +// ::= '{' 'replicated'? 'maximal'? ('device=' int)? shape? +// ('devices=' ('[' dims ']')* device_list)? '}' +// dims ::= int_list device_list ::= int_list +bool HloParser::ParseSingleSharding(OpSharding* sharding, + bool lbrace_pre_lexed) { + if (!lbrace_pre_lexed && + !ParseToken(TokKind::kLbrace, + "expected '{' to start sharding attribute")) { + return false; + } + bool maximal = false; bool replicated = false; std::vector devices; diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index 62b4385e76..a9dc360978 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -152,7 +152,7 @@ ENTRY %SelectR1F32WithCmpR1F32sFromParamsSmall.v4 (v1: f32[4], v2: f32[4]) -> f3 %v1 = f32[4]{0} parameter(0), sharding={maximal device=1} %v2 = f32[4]{0} parameter(1), sharding={maximal device=1} %greater-than = pred[4]{0} greater-than(f32[4]{0} %v1, f32[4]{0} %v2), sharding={replicated} - ROOT %select = f32[4]{0} select(pred[4]{0} %greater-than, f32[4]{0} %v1, f32[4]{0} %v2) + ROOT %select = f32[4]{0} select(pred[4]{0} %greater-than, f32[4]{0} %v1, f32[4]{0} %v2), sharding={} } )" @@ -180,6 +180,19 @@ ENTRY %TupleCreate.v4 (v1: f32[], v2: f32[3], v3: f32[2,3]) -> (f32[], f32[3], f ROOT %tuple = (f32[], f32[3]{0}, f32[2,3]{1,0}) tuple(f32[] %v1, f32[3]{0} %v2, f32[2,3]{1,0} %v3) } +)" +}, +{ +"ShardedTupleCreate", +R"(HloModule ShardedTupleCreate_module: + +ENTRY %ShardedTupleCreate.v4 (v1: f32[], v2: f32[3], v3: f32[2,3]) -> (f32[], f32[3], f32[2,3]) { + %v1 = f32[] parameter(0) + %v2 = f32[3]{0} parameter(1) + %v3 = f32[2,3]{1,0} parameter(2) + ROOT %tuple = (f32[], f32[3]{0}, f32[2,3]{1,0}) tuple(f32[] %v1, f32[3]{0} %v2, f32[2,3]{1,0} %v3), sharding={{replicated}, {maximal device=0}, {replicated}} +} + )" }, // int32 result = 0; diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index 06987e0044..7146604708 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -825,8 +825,10 @@ message OpSharding { REPLICATED = 0; // This sharding is maximal - one device runs the entire operation. MAXIMAL = 1; - // Neither of the above; tile_shape and tile_assignment are both used. - OTHER = 2; + // This sharding is a tuple - only the tuple_shardings field is valid. + TUPLE = 2; + // None of the above; tile_shape and tile_assignment are both used. + OTHER = 3; } Type type = 1; // The shape of the sharded tile. @@ -838,6 +840,13 @@ message OpSharding { // Flattened list of device IDs. The order of flattening is the same as used // by IndexUtil::MultiToLinearIndex(tile_assignment_shape). repeated int64 tile_assignment_devices = 4; + // If type == TUPLE, the sub-shardings, one per leaf node in the tuple shape, + // in pre-order. The tuple shape could be nested; here we store just a + // flattened list of all leaves in the tuple shape. Note that the tuple shape + // is not stored here; shardings do not store the shapes to which they are + // applied, this is inferred from the instruction this sharding gets attached + // to. + repeated OpSharding tuple_shardings = 5; } message OpRequest { -- GitLab From 31d6b687da35fcbf4a1dd767fda06fab9213db31 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 05:36:43 -0800 Subject: [PATCH 0217/1801] Previously we had a large number of ComputeAndCompare* methods to run a computation and then compare the reuslt to a specified value (Array or Literal). The new method takes adventage of the recently added ComputeConstant method to calculate the expected value using the HloEvaluator eliminating the need for doing the calculation manually. As a usage example I converted the convolution tests to the new method what simplified them by quite a bit. If there is interest then we can migrate the other tests as well and then remove the old style ComputeAndCompare* methods. PiperOrigin-RevId: 175145596 --- .../xla/tests/client_library_test_base.cc | 54 ++++++ .../xla/tests/client_library_test_base.h | 17 ++ .../compiler/xla/tests/convolution_test.cc | 160 ++++++------------ 3 files changed, 126 insertions(+), 105 deletions(-) diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.cc b/tensorflow/compiler/xla/tests/client_library_test_base.cc index 065bce7e31..ef54714e46 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.cc +++ b/tensorflow/compiler/xla/tests/client_library_test_base.cc @@ -346,6 +346,60 @@ void ClientLibraryTestBase::ComputeAndCompareTuple( LiteralTestUtil::ExpectNearTuple(expected, *actual, error); } +void ClientLibraryTestBase::ComputeAndCompare( + ComputationBuilder* builder, const ComputationDataHandle& operand, + tensorflow::gtl::ArraySlice arguments) { + auto status_or_data = ComputeValueAndReference(builder, operand, arguments); + EXPECT_IS_OK(status_or_data); + if (!status_or_data.ok()) { + return; + } + std::unique_ptr reference, result; + std::tie(reference, result) = status_or_data.ConsumeValueOrDie(); + LiteralTestUtil::ExpectEqual(*reference, *result); +} + +void ClientLibraryTestBase::ComputeAndCompare( + ComputationBuilder* builder, const ComputationDataHandle& operand, + tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { + auto status_or_data = ComputeValueAndReference(builder, operand, arguments); + EXPECT_IS_OK(status_or_data); + if (!status_or_data.ok()) { + return; + } + std::unique_ptr reference, result; + std::tie(reference, result) = status_or_data.ConsumeValueOrDie(); + LiteralTestUtil::ExpectNear(*reference, *result, error); +} + +StatusOr, std::unique_ptr>> +ClientLibraryTestBase::ComputeValueAndReference( + ComputationBuilder* builder, const ComputationDataHandle& operand, + tensorflow::gtl::ArraySlice arguments) { + // Transfer the arguments to the executor service. We put the unique_ptr's + // into a vector to keep the data alive on the service until the end of this + // function. + std::vector> argument_data; + for (const auto& arg : arguments) { + TF_ASSIGN_OR_RETURN(auto data, client_->TransferToServer(arg)); + argument_data.push_back(std::move(data)); + } + + // Create raw pointers to the GlobalData for the rest of the call stack. + std::vector argument_data_ptr; + std::transform( + argument_data.begin(), argument_data.end(), + std::back_inserter(argument_data_ptr), + [](const std::unique_ptr& data) { return data.get(); }); + + TF_ASSIGN_OR_RETURN( + auto reference, + builder->ComputeConstant(operand, /*output_layout=*/nullptr, arguments)); + TF_ASSIGN_OR_RETURN(auto result, + ExecuteAndTransfer(builder, argument_data_ptr)); + return std::make_pair(std::move(reference), std::move(result)); +} + Computation ClientLibraryTestBase::CreateScalarRelu() { ComputationBuilder builder(client_, "relu"); auto z_value = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "z_value"); diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h index 2c37466ff2..b578667735 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.h +++ b/tensorflow/compiler/xla/tests/client_library_test_base.h @@ -196,6 +196,16 @@ class ClientLibraryTestBase : public ::testing::Test { ComputationBuilder* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec abs_error); + // Convenience method for running a built computation and comparing the result + // with the HloEvaluator. + void ComputeAndCompare(ComputationBuilder* builder, + const ComputationDataHandle& operand, + tensorflow::gtl::ArraySlice arguments); + void ComputeAndCompare(ComputationBuilder* builder, + const ComputationDataHandle& operand, + tensorflow::gtl::ArraySlice arguments, + ErrorSpec error); + // Create scalar operations for use in reductions. Computation CreateScalarRelu(); Computation CreateScalarMax(); @@ -298,6 +308,13 @@ class ClientLibraryTestBase : public ::testing::Test { const std::function& verify_output, const Shape* output_with_layout = nullptr); + + // Executes the computation and calculates the expected reference value using + // the HloEvaluator. Returns two literal in the order of (expected, actual). + StatusOr, std::unique_ptr>> + ComputeValueAndReference(ComputationBuilder* builder, + const ComputationDataHandle& operand, + tensorflow::gtl::ArraySlice arguments); }; template diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc index 0cc2e5fb7e..7425f778a6 100644 --- a/tensorflow/compiler/xla/tests/convolution_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_test.cc @@ -82,177 +82,127 @@ XLA_TEST_F(ConvolutionTest, ForwardPassConvolution_3x3x256_256_OutputZ_Iota) { ComputationBuilder builder(client_, TestName()); auto lhs = builder.ConstantR4FromArray4D(*alhs); auto rhs = builder.ConstantR4FromArray4D(*arhs); - builder.Conv(lhs, rhs, {1, 1}, Padding::kValid); + auto conv = builder.Conv(lhs, rhs, {1, 1}, Padding::kValid); - std::unique_ptr> aexpected = - ReferenceUtil::ConvArray4D(*alhs, *arhs, {1, 1}, Padding::kValid); - - ComputeAndCompareR4(&builder, *aexpected, {}, error_spec_); + ComputeAndCompare(&builder, conv, {}, error_spec_); } TEST_F(ConvolutionTest, Convolve_1x1x1x2_1x1x1x2_Valid) { ComputationBuilder builder(client_, TestName()); - { - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); - auto input = builder.Parameter(0, input_shape, "input"); - auto filter = builder.Parameter(1, filter_shape, "filter"); - builder.Conv(input, filter, {1, 1}, Padding::kValid); - } - - Array4D input(1, 1, 1, 2); - input.FillWithYX(Array2D({ + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + auto conv = builder.Conv(input, filter, {1, 1}, Padding::kValid); + + Array4D input_data(1, 1, 1, 2); + input_data.FillWithYX(Array2D({ {1, 2}, })); - Array4D filter(1, 1, 1, 2); - filter.FillWithYX(Array2D({ + Array4D filter_data(1, 1, 1, 2); + filter_data.FillWithYX(Array2D({ {5, 6}, })); - std::unique_ptr> aexpected = - ReferenceUtil::ConvArray4D(input, filter, {1, 1}, Padding::kValid); - - auto input_literal = - client_->TransferToServer(*Literal::CreateR4FromArray4D(input)) - .ConsumeValueOrDie(); - auto filter_literal = - client_->TransferToServer(*Literal::CreateR4FromArray4D(filter)) - .ConsumeValueOrDie(); - - ComputeAndCompareR4(&builder, *aexpected, - {input_literal.get(), filter_literal.get()}, - error_spec_); + ComputeAndCompare(&builder, conv, + {*Literal::CreateFromArray(input_data), + *Literal::CreateFromArray(filter_data)}, + error_spec_); } // Tests valid padding for 2D convolution in raster space. TEST_F(ConvolutionTest, Convolve_1x1x4x4_1x1x2x2_Valid) { ComputationBuilder builder(client_, TestName()); - { - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 2, 2}); - auto input = builder.Parameter(0, input_shape, "input"); - auto filter = builder.Parameter(1, filter_shape, "filter"); - builder.Conv(input, filter, {1, 1}, Padding::kValid); - } + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 2, 2}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + auto conv = builder.Conv(input, filter, {1, 1}, Padding::kValid); - Array4D input(1, 1, 4, 4); + Array4D input_data(1, 1, 4, 4); // clang-format off - input.FillWithYX(Array2D({ + input_data.FillWithYX(Array2D({ {1, 2, 3, 4 }, {5, 6, 7, 8 }, {9, 10, 11, 12}, {13, 14, 15, 16}, })); // clang-format on - Array4D filter(1, 1, 2, 2); + Array4D filter_data(1, 1, 2, 2); // clang-format off - filter.FillWithYX(Array2D({ + filter_data.FillWithYX(Array2D({ {5, 6}, {7, 8}, })); // clang-format on - - std::unique_ptr> aexpected = - ReferenceUtil::ConvArray4D(input, filter, {1, 1}, Padding::kValid); - - auto input_literal = - client_->TransferToServer(*Literal::CreateR4FromArray4D(input)) - .ConsumeValueOrDie(); - auto filter_literal = - client_->TransferToServer(*Literal::CreateR4FromArray4D(filter)) - .ConsumeValueOrDie(); - - ComputeAndCompareR4(&builder, *aexpected, - {input_literal.get(), filter_literal.get()}, - error_spec_); + ComputeAndCompare(&builder, conv, + {*Literal::CreateFromArray(input_data), + *Literal::CreateFromArray(filter_data)}, + error_spec_); } // Tests same padding for 2D convolution in raster space. TEST_F(ConvolutionTest, Convolve_1x1x4x4_1x1x2x2_Same) { ComputationBuilder builder(client_, TestName()); - { - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 2, 2}); - auto input = builder.Parameter(0, input_shape, "input"); - auto filter = builder.Parameter(1, filter_shape, "filter"); - builder.Conv(input, filter, {1, 1}, Padding::kSame); - } + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 2, 2}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + auto conv = builder.Conv(input, filter, {1, 1}, Padding::kSame); - Array4D input(1, 1, 4, 4); + Array4D input_data(1, 1, 4, 4); // clang-format off - input.FillWithYX(Array2D({ + input_data.FillWithYX(Array2D({ {1, 2, 3, 4 }, {5, 6, 7, 8 }, {9, 10, 11, 12}, {13, 14, 15, 16}, })); // clang-format on - Array4D filter(1, 1, 2, 2); + Array4D filter_data(1, 1, 2, 2); // clang-format off - filter.FillWithYX(Array2D({ + filter_data.FillWithYX(Array2D({ {5, 6}, {7, 8}, })); // clang-format on - - std::unique_ptr> aexpected = - ReferenceUtil::ConvArray4D(input, filter, {1, 1}, Padding::kSame); - - auto input_literal = - client_->TransferToServer(*Literal::CreateR4FromArray4D(input)) - .ConsumeValueOrDie(); - auto filter_literal = - client_->TransferToServer(*Literal::CreateR4FromArray4D(filter)) - .ConsumeValueOrDie(); - - ComputeAndCompareR4(&builder, *aexpected, - {input_literal.get(), filter_literal.get()}, - error_spec_); + ComputeAndCompare(&builder, conv, + {*Literal::CreateFromArray(input_data), + *Literal::CreateFromArray(filter_data)}, + error_spec_); } // Tests same padding for 2D convolution in raster space with an odd sized // kernel. TEST_F(ConvolutionTest, Convolve_1x1x4x4_1x1x3x3_Same) { ComputationBuilder builder(client_, TestName()); - { - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 3, 3}); - auto input = builder.Parameter(0, input_shape, "input"); - auto filter = builder.Parameter(1, filter_shape, "filter"); - builder.Conv(input, filter, {1, 1}, Padding::kSame); - } + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 3, 3}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + auto conv = builder.Conv(input, filter, {1, 1}, Padding::kSame); - Array4D input(1, 1, 4, 4); + Array4D input_data(1, 1, 4, 4); // clang-format off - input.FillWithYX(Array2D({ + input_data.FillWithYX(Array2D({ {1, 2, 3, 4 }, {5, 6, 7, 8 }, {9, 10, 11, 12}, {13, 14, 15, 16}, })); // clang-format on - Array4D filter(1, 1, 3, 3); + Array4D filter_data(1, 1, 3, 3); // clang-format off - filter.FillWithYX(Array2D({ + filter_data.FillWithYX(Array2D({ { 5, 6, 7}, { 8, 9, 10}, {11, 12, 13}, })); // clang-format on - - std::unique_ptr> aexpected = - ReferenceUtil::ConvArray4D(input, filter, {1, 1}, Padding::kSame); - - auto input_literal = - client_->TransferToServer(*Literal::CreateR4FromArray4D(input)) - .ConsumeValueOrDie(); - auto filter_literal = - client_->TransferToServer(*Literal::CreateR4FromArray4D(filter)) - .ConsumeValueOrDie(); - - ComputeAndCompareR4(&builder, *aexpected, - {input_literal.get(), filter_literal.get()}, - error_spec_); + ComputeAndCompare(&builder, conv, + {*Literal::CreateFromArray(input_data), + *Literal::CreateFromArray(filter_data)}, + error_spec_); } XLA_TEST_F(ConvolutionTest, Convolve1D_1x2x5_1x2x2_Valid) { -- GitLab From c5a7366bfef9cfb00cd9855c98c12c6005dbb1bb Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 9 Nov 2017 07:37:15 -0800 Subject: [PATCH 0218/1801] Removes void*s from the tape gradient code, replacing with templates. PiperOrigin-RevId: 175155685 --- tensorflow/c/eager/BUILD | 1 - tensorflow/c/eager/tape.cc | 410 ------------------- tensorflow/c/eager/tape.h | 473 ++++++++++++++++++++-- tensorflow/python/eager/pywrap_tfe_src.cc | 60 +-- 4 files changed, 479 insertions(+), 465 deletions(-) delete mode 100644 tensorflow/c/eager/tape.cc diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index 74e94be8d6..d533758e36 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -106,7 +106,6 @@ tf_cc_test( cc_library( name = "tape", - srcs = ["tape.cc"], hdrs = ["tape.h"], visibility = ["//tensorflow:internal"], deps = [ diff --git a/tensorflow/c/eager/tape.cc b/tensorflow/c/eager/tape.cc deleted file mode 100644 index 459499bb69..0000000000 --- a/tensorflow/c/eager/tape.cc +++ /dev/null @@ -1,410 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include - -#include "tensorflow/c/eager/tape.h" - -namespace tensorflow { -namespace eager { - -bool GradientTape::ShouldRecord(gtl::ArraySlice tensor_ids) { - for (int64 i : tensor_ids) { - if (tensor_tape_.find(i) != tensor_tape_.end()) { - return true; - } - } - return false; -} - -void GradientTape::Watch(int64 tensor_id) { - tensor_tape_.emplace(tensor_id, -1); -} - -void GradientTape::RecordOperation( - const string& op_type, gtl::ArraySlice output_tensors, - gtl::ArraySlice input_tensor_id, void* backward_function, - const std::function& backward_function_deleter) { - if (!ShouldRecord(input_tensor_id)) { - backward_function_deleter(); - return; - } - std::vector ids; - ids.reserve(input_tensor_id.size()); - for (int64 i : input_tensor_id) { - tensor_usage_[i]++; - ids.push_back(i); - } - const int64 op_id = next_op_id_++; - std::vector tensors; - tensors.reserve(output_tensors.size()); - for (const TapeTensor& o : output_tensors) { - // Note: the tensor can have already been watched and hence be in the tape, - // so we cannot check that we're inserting it here. - tensor_tape_[o.id] = op_id; - tensor_usage_[o.id] = 1; - tensors.push_back(o); - } - op_tape_[op_id] = OpTapeEntry{op_type, tensors, ids, backward_function, - backward_function_deleter}; -} - -void GradientTape::DeleteTrace(int64 tensor_id) { - auto it = tensor_usage_.find(tensor_id); - if (it == tensor_usage_.end()) { - return; - } - it->second--; - if (it->second != 0) { - return; - } - tensor_usage_.erase(it); - auto tensor_op_it = tensor_tape_.find(tensor_id); - if (tensor_op_it == tensor_tape_.end()) { - return; - } - const int64 op_id = tensor_op_it->second; - if (op_id == -1) { - // Do not delete watched tensors. - return; - } - tensor_tape_.erase(tensor_op_it); - auto op_it = op_tape_.find(op_id); - CHECK(op_it != op_tape_.end()); - for (const auto& output : op_it->second.output_tensor_info) { - if (tensor_usage_.find(output.id) != tensor_usage_.end()) { - // Found a usage for an output, so cannot delete the op. - return; - } - } - for (int64 id : op_it->second.input_tensor_id) { - DeleteTrace(id); - } - op_it->second.backward_function_deleter(); - op_tape_.erase(op_it); -} - -// Terminology: -// -// - op: a possibly composite operation, which has an entry in the tape -// - target: dy in dx/dy -// - source: dx in dx/dy -// - tensor: one of the many inputs or outputs of an operation -// -// Below here we do the gradient algorithm. It works as follows: -// -// First we filter the tape to just the subset of operations we want to -// differentiate. In the process of doing so we count how many times each Tensor -// is used as an input to an op (so we know when we're done computing gradients -// for that Tensor). We also count, for each tape entry, how many of its output -// Tensors need gradients to be computed (Tensors which are not used do not need -// any gradients to be computed). -// -// Finally, we start a backprop stack with a set of tape entries for which we -// have all gradients available. This set usually is a subset of the set of -// targets (not all since targets which have outputs in the tape will not have -// gradients available initially). -// -// Then we repeatedly pop an entry from the stack, run its backprop, and update -// the gradients of its inputs. Once we have computed all gradients for a single -// input we can mark this input as done, and this can trigger adding an entry to -// the stack if all outputs of that entry are now done. -// -// When the stack is empty we have gradients for all tensors we're interested -// in. - -struct BackpropInitialState { - OpTape op_tape; - - // Map from tensor ID to how many references still exist for this tensor in - // the tape. - std::unordered_map tensor_usage_counts; - - // Maps from op ID to how many output tensors of this op still need to have - // their gradients computed. - std::unordered_map op_missing_tensor; -}; - -BackpropInitialState PrepareBackprop( - gtl::ArraySlice target, const TensorTape& tensor_tape, - OpTape op_tape, const std::unordered_set& sources_set) { - std::vector tensor_stack; - tensor_stack.reserve(target.size()); - for (auto t : target) { - tensor_stack.push_back(t); - } - BackpropInitialState result; - while (!tensor_stack.empty()) { - int64 tensor_id = tensor_stack.back(); - tensor_stack.pop_back(); - auto op_id_it = tensor_tape.find(tensor_id); - if (op_id_it == tensor_tape.end()) { - continue; - } - int64 op_id = op_id_it->second; - auto op_it = op_tape.find(op_id); - auto result_op_it = result.op_tape.find(op_id); - if (op_id == -1 || op_it == op_tape.end() || - result_op_it != result.op_tape.end()) { - continue; - } - CHECK(result.op_tape.emplace(op_id, op_it->second).second); - for (auto it : op_it->second.input_tensor_id) { - auto count_it = result.tensor_usage_counts.find(it); - if (count_it != result.tensor_usage_counts.end()) { - count_it->second++; - } else { - result.tensor_usage_counts[it] = 1; - if (sources_set.find(it) == sources_set.end() && - tensor_tape.find(it) != tensor_tape.end()) { - tensor_stack.push_back(it); - } - } - } - op_tape.erase(op_it); - } - for (auto& pair : result.tensor_usage_counts) { - auto it = tensor_tape.find(pair.first); - if (it != tensor_tape.end() && it->second != -1) { - result.op_missing_tensor[it->second] += 1; - } - } - // Call destructors for all unneeded gradient functions. - for (const auto& op_pair : op_tape) { - op_pair.second.backward_function_deleter(); - } - return result; -} - -std::vector InitialStack( - const OpTape& op_tape, - const std::unordered_map& op_missing_tensor) { - std::vector result; - for (auto& op_entry : op_tape) { - if (op_missing_tensor.find(op_entry.first) == op_missing_tensor.end()) { - result.push_back(op_entry.first); - } - } - return result; -} - -Status InitialGradients(const VSpace& vspace, gtl::ArraySlice target, - gtl::ArraySlice output_gradients, - std::unordered_map tensor_usage_counts, - std::unordered_map>* result) { - for (int i = 0; i < target.size(); ++i) { - int64 id = vspace.TensorId(target[i]); - if (tensor_usage_counts.find(id) != tensor_usage_counts.end()) { - if (!output_gradients.empty() && output_gradients[i] != nullptr) { - // TODO(apassos) figure out how to print debugging information here. - return errors::InvalidArgument( - "A gradient was provided for a tensor which is used as part of the " - "computation."); - } - } else { - if (output_gradients.empty() || output_gradients[i] == nullptr) { - (*result)[id].push_back(vspace.OnesLike(target[i])); - } else { - (*result)[id].push_back(output_gradients[i]); - } - } - } - return Status::OK(); -} - -// If over kMinAggregateCount gradients are accumulated and the total -// memory consumption is over kMinAggregateBytes, do an early aggregation -// so as to release the gradient tensor to save memory. -static const int kMinAggregateCount = 4; -static const int kMinAggregateBytes = 128 * 1024 * 1024; - -Status GradientTape::Gradient(const VSpace& vspace, - gtl::ArraySlice target, - gtl::ArraySlice sources, - gtl::ArraySlice output_gradients, - std::vector* result) { - std::vector id_sources; - id_sources.reserve(sources.size()); - for (void* s : sources) { - id_sources.push_back(vspace.TensorId(s)); - } - std::unordered_set sources_set(id_sources.begin(), id_sources.end()); - std::vector id_targets; - id_sources.reserve(target.size()); - for (void* t : target) { - id_targets.push_back(vspace.TensorId(t)); - } - BackpropInitialState state = PrepareBackprop( - id_targets, tensor_tape_, std::move(op_tape_), sources_set); - std::vector op_stack = - InitialStack(state.op_tape, state.op_missing_tensor); - std::unordered_map> gradients; - Status s = InitialGradients(vspace, target, output_gradients, - state.tensor_usage_counts, &gradients); - auto cleanup = [&state]() { - // Release all backprop functions - for (const auto& pair : state.op_tape) { - pair.second.backward_function_deleter(); - } - }; - if (!s.ok()) { - cleanup(); - return s; - } - std::unordered_map gradients_size; - // TODO(apassos) multiple threads could be dequeuing from op_stack at the same - // time, for better CPU backprop performance. - VLOG(1) << "Initial stack:"; - if (VLOG_IS_ON(1)) { - for (auto t : op_stack) { - VLOG(1) << " " << t; - } - } - std::unordered_map> - functions_accept_none_for_indices({ - {"SoftmaxCrossEntropyWithLogits", {1}}, - {"FusedBatchNorm", {1, 2, 3, 4}}, - }); - while (!op_stack.empty()) { - const int64 op = op_stack.back(); - VLOG(1) << "Popped " << op; - op_stack.pop_back(); - auto op_it = state.op_tape.find(op); - if (op_it == state.op_tape.end()) { - // It is possible for ops to end up on the stack if they are unrelated to - // the target; we should just skip them. - continue; - } - auto trace = std::move(op_it->second); - state.op_tape.erase(op_it); - std::vector out_gradients; - out_gradients.reserve(trace.output_tensor_info.size()); - for (int i = 0; i < trace.output_tensor_info.size(); ++i) { - const int64 id = trace.output_tensor_info[i].id; - auto grad_it = gradients.find(id); - if (grad_it == gradients.end()) { - auto func_name_it = - functions_accept_none_for_indices.find(trace.op_type); - if (func_name_it != functions_accept_none_for_indices.end() && - func_name_it->second.find(i) != func_name_it->second.end()) { - out_gradients.push_back(nullptr); - } else { - out_gradients.push_back( - vspace.Zeros(trace.output_tensor_info[i].shape, - trace.output_tensor_info[i].dtype)); - } - } else { - out_gradients.push_back(vspace.AggregateGradients(grad_it->second)); - if (sources_set.find(grad_it->first) == sources_set.end()) { - gradients.erase(grad_it); - } - } - } - std::vector in_gradients; - Status s = vspace.CallBackwardFunction(trace.backward_function, - out_gradients, &in_gradients); - if (!s.ok()) { - VLOG(1) << "Gradient function failed."; - cleanup(); - return s; - } - VLOG(1) << "Got " << in_gradients.size() << " in_gradients for " - << trace.input_tensor_id.size() << " sources"; - for (int i = 0; i < in_gradients.size(); ++i) { - const int64 id = trace.input_tensor_id[i]; - if (in_gradients[i] != nullptr) { - auto& unaggregated_grads = gradients[id]; - unaggregated_grads.push_back(in_gradients[i]); - if (unaggregated_grads.size() > kMinAggregateCount) { - auto size_it = gradients_size.find(id); - int64 size; - if (size_it == gradients_size.end()) { - size = vspace.NumElements(unaggregated_grads[0]); - gradients_size.emplace(id, size); - } else { - size = size_it->second; - } - if (unaggregated_grads.size() * size * 4 > kMinAggregateBytes) { - void* tensor = vspace.AggregateGradients(unaggregated_grads); - unaggregated_grads.clear(); - unaggregated_grads.push_back(tensor); - } - } - } - auto usage_count_it = state.tensor_usage_counts.find(id); - if (usage_count_it == state.tensor_usage_counts.end()) { - VLOG(1) << "Tensor " << id << " not used"; - continue; - } - usage_count_it->second--; - if (usage_count_it->second > 0) { - VLOG(1) << "Tensor " << id << " usage count " << usage_count_it->second; - continue; - } - auto tape_it = tensor_tape_.find(id); - if (tape_it == tensor_tape_.end()) { - VLOG(1) << "Tensor " << id - << " has no associated op. Deleting gradient"; - auto grad_it = gradients.find(id); - if (grad_it != gradients.end()) { - for (auto g : grad_it->second) { - vspace.DeleteTensor(g); - } - gradients.erase(grad_it); - } - continue; - } - const int64 op_id = tape_it->second; - if (op_id == -1) { - VLOG(1) << "Tensor " << id << " is source"; - continue; - } - auto missing_it = state.op_missing_tensor.find(op_id); - if (missing_it != state.op_missing_tensor.end()) { - missing_it->second--; - VLOG(1) << "Op " << op_id << " missing " << missing_it->second - << " output gradients"; - if (missing_it->second == 0) { - op_stack.push_back(op_id); - } - } - } - } - CHECK(state.op_tape.empty()); - result->reserve(sources.size()); - for (auto is : id_sources) { - auto grad_it = gradients.find(is); - if (grad_it == gradients.end()) { - result->push_back(nullptr); - } else { - if (grad_it->second.size() == 1) { - result->push_back(grad_it->second[0]); - } else { - result->push_back(vspace.AggregateGradients(grad_it->second)); - } - gradients.erase(grad_it); - } - } - VLOG(1) << "Final gradients size: " << gradients.size(); - for (auto grad_pair : gradients) { - for (const auto& g : grad_pair.second) { - vspace.DeleteTensor(g); - } - } - return Status::OK(); -} - -} // namespace eager -} // namespace tensorflow diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index 2bb62a7ab3..654ceb7bec 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -19,6 +19,7 @@ limitations under the License. // maintains the data structures required to do so. #include +#include #include #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" @@ -36,13 +37,14 @@ struct TapeTensor { }; // Represents an entry in the tape. +template struct OpTapeEntry { string op_type; std::vector output_tensor_info; std::vector input_tensor_id; // TODO(apassos) consider narrowing down this interface. - void* backward_function; + BackwardFunction* backward_function; // Should be called before deleting the backward function. TODO(apassos) use // unique_ptrs to ensure this happens. @@ -55,51 +57,67 @@ struct OpTapeEntry { using TensorTape = std::unordered_map; // Map from operation-id to tape entry. -using OpTape = std::unordered_map; +template +using OpTape = std::unordered_map>; // Operations the tape needs to perform on tensors to do backpropagation. Named // "vspace" because a subset of these are related to a vector space, such as // adding gradients, getting zeroes, etc. Currently cannot be implemented // without using tensorflow python code, hence left unspecified here. // -// We currently use void* for tensors, backward functions, and gradients (which -// can be but are not required to be tensors). TODO(apassos) replace this first -// with templates to allow for pyobject specialization in the client followed by -// a TFE_TensorHandle specialization, which is blocked by quite a few things -// still. +// Tensor is a representation of a tensor. We need to take its ID, and it needs +// to match IDs in the tape. +// +// Gradient is the type returned by gradient functions. In Python TF it's either +// Tensor or IndexedSlices or None, which here we map to nullptr. Gradients need +// to allow their size to be computed and they need to be passable to a backward +// function and deleted (as the backprop code creates lots of gradients the user +// is not interested in). +// +// BackwardFunction needs to be a closure which stores intermediate activations +// from the forward computation and calls a vector-jacobian product function +// (also known as adjoint function) to compute, given downstream gradients, +// upstream gradients. +// +// TODO(apassos) provide concrete template instantiations for TFE_TensorHandle +// specialization, which is blocked by quite a few things needing to loop back +// into python now. +template class VSpace { public: virtual ~VSpace() {} - // Returns the number of elements in the tensor. - virtual int64 NumElements(void* tensor) const = 0; + // Returns the number of elements in the gradient tensor. + virtual int64 NumElements(Gradient* tensor) const = 0; // Consumes references to the tensors in the gradient_tensors list and returns // a tensor with the result. - virtual void* AggregateGradients( - gtl::ArraySlice gradient_tensors) const = 0; + virtual Gradient* AggregateGradients( + gtl::ArraySlice gradient_tensors) const = 0; // Returns a tensor of the right shape and dtype filled with zeros. - virtual void* Zeros(TensorShape shape, DataType dtype) const = 0; + virtual Gradient* Zeros(TensorShape shape, DataType dtype) const = 0; // Returns a Tensor which is filled with ones and like the input. - virtual void* OnesLike(void*) const = 0; + virtual Gradient* OnesLike(Tensor*) const = 0; // Returns an integer which is a unique-to-within-this-program handle for this // tensor. - virtual int64 TensorId(void* tensor) const = 0; + virtual int64 TensorId(Tensor* tensor) const = 0; // Calls the passed-in backward function. - virtual Status CallBackwardFunction(void* backward_function, - gtl::ArraySlice output_gradients, - std::vector* result) const = 0; + virtual Status CallBackwardFunction( + BackwardFunction* backward_function, + gtl::ArraySlice output_gradients, + std::vector* result) const = 0; // Deletes the input tensor. - virtual void DeleteTensor(void* tensor) const = 0; + virtual void DeleteGradient(Gradient* gradient) const = 0; }; // Traces the execution of operations, doing eager garbage collection, and // exporting a full trace so other code can do backpropagation. Not thread-safe. +template class GradientTape { public: GradientTape() {} @@ -116,7 +134,7 @@ class GradientTape { void RecordOperation(const string& op_type, gtl::ArraySlice output_tensors, gtl::ArraySlice input_tensor_id, - void* backward_function, + BackwardFunction* backward_function, const std::function& backward_function_deleter); void DeleteTrace(int64 tensor_id); @@ -125,14 +143,15 @@ class GradientTape { // once) and produces the gradient of the target tensors with respect to the // source tensors. The output gradients are used if not empty and not // null. The result is populated with one tensor per target element. - Status Gradient(const VSpace& vspace, gtl::ArraySlice target, - gtl::ArraySlice sources, - gtl::ArraySlice output_gradients, - std::vector* result); + Status ComputeGradient( + const VSpace& vspace, + gtl::ArraySlice target, gtl::ArraySlice sources, + gtl::ArraySlice output_gradients, + std::vector* result); private: TensorTape tensor_tape_; - OpTape op_tape_; + OpTape op_tape_; int64 next_op_id_{0}; // Map from tensor id to number of remaining usages (i.e. how many entries in @@ -140,6 +159,412 @@ class GradientTape { std::unordered_map tensor_usage_; }; +// Template instantiations here + +template +bool GradientTape::ShouldRecord( + gtl::ArraySlice tensor_ids) { + for (int64 i : tensor_ids) { + if (tensor_tape_.find(i) != tensor_tape_.end()) { + return true; + } + } + return false; +} + +template +void GradientTape::Watch(int64 tensor_id) { + tensor_tape_.emplace(tensor_id, -1); +} + +template +void GradientTape::RecordOperation( + const string& op_type, gtl::ArraySlice output_tensors, + gtl::ArraySlice input_tensor_id, BackwardFunction* backward_function, + const std::function& backward_function_deleter) { + if (!ShouldRecord(input_tensor_id)) { + backward_function_deleter(); + return; + } + std::vector ids; + ids.reserve(input_tensor_id.size()); + for (int64 i : input_tensor_id) { + tensor_usage_[i]++; + ids.push_back(i); + } + const int64 op_id = next_op_id_++; + std::vector tensors; + tensors.reserve(output_tensors.size()); + for (const TapeTensor& o : output_tensors) { + // Note: the tensor can have already been watched and hence be in the tape, + // so we cannot check that we're inserting it here. + tensor_tape_[o.id] = op_id; + tensor_usage_[o.id] = 1; + tensors.push_back(o); + } + op_tape_[op_id] = OpTapeEntry{ + op_type, tensors, ids, backward_function, backward_function_deleter}; +} + +template +void GradientTape::DeleteTrace( + int64 tensor_id) { + auto it = tensor_usage_.find(tensor_id); + if (it == tensor_usage_.end()) { + return; + } + it->second--; + if (it->second != 0) { + return; + } + tensor_usage_.erase(it); + auto tensor_op_it = tensor_tape_.find(tensor_id); + if (tensor_op_it == tensor_tape_.end()) { + return; + } + const int64 op_id = tensor_op_it->second; + if (op_id == -1) { + // Do not delete watched tensors. + return; + } + tensor_tape_.erase(tensor_op_it); + auto op_it = op_tape_.find(op_id); + CHECK(op_it != op_tape_.end()); + for (const auto& output : op_it->second.output_tensor_info) { + if (tensor_usage_.find(output.id) != tensor_usage_.end()) { + // Found a usage for an output, so cannot delete the op. + return; + } + } + for (int64 id : op_it->second.input_tensor_id) { + DeleteTrace(id); + } + op_it->second.backward_function_deleter(); + op_tape_.erase(op_it); +} + +// Terminology: +// +// - op: a possibly composite operation, which has an entry in the tape +// - target: dy in dx/dy +// - source: dx in dx/dy +// - tensor: one of the many inputs or outputs of an operation +// +// Below here we do the gradient algorithm. It works as follows: +// +// First we filter the tape to just the subset of operations we want to +// differentiate. In the process of doing so we count how many times each Tensor +// is used as an input to an op (so we know when we're done computing gradients +// for that Tensor). We also count, for each tape entry, how many of its output +// Tensors need gradients to be computed (Tensors which are not used do not need +// any gradients to be computed). +// +// Finally, we start a backprop stack with a set of tape entries for which we +// have all gradients available. This set usually is a subset of the set of +// targets (not all since targets which have outputs in the tape will not have +// gradients available initially). +// +// Then we repeatedly pop an entry from the stack, run its backprop, and update +// the gradients of its inputs. Once we have computed all gradients for a single +// input we can mark this input as done, and this can trigger adding an entry to +// the stack if all outputs of that entry are now done. +// +// When the stack is empty we have gradients for all tensors we're interested +// in. + +namespace { + +template +struct BackpropInitialState { + OpTape op_tape; + + // Map from tensor ID to how many references still exist for this tensor in + // the tape. + std::unordered_map tensor_usage_counts; + + // Maps from op ID to how many output tensors of this op still need to have + // their gradients computed. + std::unordered_map op_missing_tensor; +}; + +template +BackpropInitialState PrepareBackprop( + gtl::ArraySlice target, const TensorTape& tensor_tape, + OpTape op_tape, + const std::unordered_set& sources_set) { + std::vector tensor_stack; + tensor_stack.reserve(target.size()); + for (auto t : target) { + tensor_stack.push_back(t); + } + BackpropInitialState result; + while (!tensor_stack.empty()) { + int64 tensor_id = tensor_stack.back(); + tensor_stack.pop_back(); + auto op_id_it = tensor_tape.find(tensor_id); + if (op_id_it == tensor_tape.end()) { + continue; + } + int64 op_id = op_id_it->second; + auto op_it = op_tape.find(op_id); + auto result_op_it = result.op_tape.find(op_id); + if (op_id == -1 || op_it == op_tape.end() || + result_op_it != result.op_tape.end()) { + continue; + } + CHECK(result.op_tape.emplace(op_id, op_it->second).second); + for (auto it : op_it->second.input_tensor_id) { + auto count_it = result.tensor_usage_counts.find(it); + if (count_it != result.tensor_usage_counts.end()) { + count_it->second++; + } else { + result.tensor_usage_counts[it] = 1; + if (sources_set.find(it) == sources_set.end() && + tensor_tape.find(it) != tensor_tape.end()) { + tensor_stack.push_back(it); + } + } + } + op_tape.erase(op_it); + } + for (auto& pair : result.tensor_usage_counts) { + auto it = tensor_tape.find(pair.first); + if (it != tensor_tape.end() && it->second != -1) { + result.op_missing_tensor[it->second] += 1; + } + } + // Call destructors for all unneeded gradient functions. + for (const auto& op_pair : op_tape) { + op_pair.second.backward_function_deleter(); + } + return result; +} + +template +std::vector InitialStack( + const OpTape& op_tape, + const std::unordered_map& op_missing_tensor) { + std::vector result; + for (auto& op_entry : op_tape) { + if (op_missing_tensor.find(op_entry.first) == op_missing_tensor.end()) { + result.push_back(op_entry.first); + } + } + return result; +} + +template +Status InitialGradients( + const VSpace& vspace, + gtl::ArraySlice target, + gtl::ArraySlice output_gradients, + std::unordered_map tensor_usage_counts, + std::unordered_map>* result) { + for (int i = 0; i < target.size(); ++i) { + int64 id = vspace.TensorId(target[i]); + if (tensor_usage_counts.find(id) != tensor_usage_counts.end()) { + if (!output_gradients.empty() && output_gradients[i] != nullptr) { + // TODO(apassos) figure out how to print debugging information here. + return errors::InvalidArgument( + "A gradient was provided for a tensor which is used as part of the " + "computation."); + } + } else { + if (output_gradients.empty() || output_gradients[i] == nullptr) { + (*result)[id].push_back(vspace.OnesLike(target[i])); + } else { + (*result)[id].push_back(output_gradients[i]); + } + } + } + return Status::OK(); +} + +} // namespace + +// If over kMinAggregateCount gradients are accumulated and the total +// memory consumption is over kMinAggregateBytes, do an early aggregation +// so as to release the gradient tensor to save memory. +constexpr int kMinAggregateCount = 4; +constexpr int kMinAggregateBytes = 128 * 1024 * 1024; + +template +Status GradientTape::ComputeGradient( + const VSpace& vspace, + gtl::ArraySlice target, gtl::ArraySlice sources, + gtl::ArraySlice output_gradients, + std::vector* result) { + std::vector id_sources; + id_sources.reserve(sources.size()); + for (Tensor* s : sources) { + id_sources.push_back(vspace.TensorId(s)); + } + std::unordered_set sources_set(id_sources.begin(), id_sources.end()); + std::vector id_targets; + id_sources.reserve(target.size()); + for (Tensor* t : target) { + id_targets.push_back(vspace.TensorId(t)); + } + BackpropInitialState state = PrepareBackprop( + id_targets, tensor_tape_, std::move(op_tape_), sources_set); + std::vector op_stack = + InitialStack(state.op_tape, state.op_missing_tensor); + std::unordered_map> gradients; + Status s = InitialGradients(vspace, target, output_gradients, + state.tensor_usage_counts, &gradients); + auto cleanup = [&state]() { + // Release all backprop functions + for (const auto& pair : state.op_tape) { + pair.second.backward_function_deleter(); + } + }; + if (!s.ok()) { + cleanup(); + return s; + } + std::unordered_map gradients_size; + // TODO(apassos) multiple threads could be dequeuing from op_stack at the same + // time, for better CPU backprop performance. + VLOG(1) << "Initial stack:"; + if (VLOG_IS_ON(1)) { + for (auto t : op_stack) { + VLOG(1) << " " << t; + } + } + std::unordered_map> + functions_accept_none_for_indices({ + {"SoftmaxCrossEntropyWithLogits", {1}}, + {"FusedBatchNorm", {1, 2, 3, 4}}, + }); + while (!op_stack.empty()) { + const int64 op = op_stack.back(); + VLOG(1) << "Popped " << op; + op_stack.pop_back(); + auto op_it = state.op_tape.find(op); + if (op_it == state.op_tape.end()) { + // It is possible for ops to end up on the stack if they are unrelated to + // the target; we should just skip them. + continue; + } + auto trace = std::move(op_it->second); + state.op_tape.erase(op_it); + std::vector out_gradients; + out_gradients.reserve(trace.output_tensor_info.size()); + for (int i = 0; i < trace.output_tensor_info.size(); ++i) { + const int64 id = trace.output_tensor_info[i].id; + auto grad_it = gradients.find(id); + if (grad_it == gradients.end()) { + auto func_name_it = + functions_accept_none_for_indices.find(trace.op_type); + if (func_name_it != functions_accept_none_for_indices.end() && + func_name_it->second.find(i) != func_name_it->second.end()) { + out_gradients.push_back(nullptr); + } else { + out_gradients.push_back( + vspace.Zeros(trace.output_tensor_info[i].shape, + trace.output_tensor_info[i].dtype)); + } + } else { + out_gradients.push_back(vspace.AggregateGradients(grad_it->second)); + if (sources_set.find(grad_it->first) == sources_set.end()) { + gradients.erase(grad_it); + } + } + } + std::vector in_gradients; + Status s = vspace.CallBackwardFunction(trace.backward_function, + out_gradients, &in_gradients); + if (!s.ok()) { + VLOG(1) << "Gradient function failed."; + cleanup(); + return s; + } + VLOG(1) << "Got " << in_gradients.size() << " in_gradients for " + << trace.input_tensor_id.size() << " sources"; + for (int i = 0; i < in_gradients.size(); ++i) { + const int64 id = trace.input_tensor_id[i]; + if (in_gradients[i] != nullptr) { + auto& unaggregated_grads = gradients[id]; + unaggregated_grads.push_back(in_gradients[i]); + if (unaggregated_grads.size() > kMinAggregateCount) { + auto size_it = gradients_size.find(id); + int64 size; + if (size_it == gradients_size.end()) { + size = vspace.NumElements(unaggregated_grads[0]); + gradients_size.emplace(id, size); + } else { + size = size_it->second; + } + if (unaggregated_grads.size() * size * 4 > kMinAggregateBytes) { + Gradient* grad = vspace.AggregateGradients(unaggregated_grads); + unaggregated_grads.clear(); + unaggregated_grads.push_back(grad); + } + } + } + auto usage_count_it = state.tensor_usage_counts.find(id); + if (usage_count_it == state.tensor_usage_counts.end()) { + VLOG(1) << "Tensor " << id << " not used"; + continue; + } + usage_count_it->second--; + if (usage_count_it->second > 0) { + VLOG(1) << "Tensor " << id << " usage count " << usage_count_it->second; + continue; + } + auto tape_it = tensor_tape_.find(id); + if (tape_it == tensor_tape_.end()) { + VLOG(1) << "Tensor " << id + << " has no associated op. Deleting gradient"; + auto grad_it = gradients.find(id); + if (grad_it != gradients.end()) { + for (auto g : grad_it->second) { + vspace.DeleteGradient(g); + } + gradients.erase(grad_it); + } + continue; + } + const int64 op_id = tape_it->second; + if (op_id == -1) { + VLOG(1) << "Tensor " << id << " is source"; + continue; + } + auto missing_it = state.op_missing_tensor.find(op_id); + if (missing_it != state.op_missing_tensor.end()) { + missing_it->second--; + VLOG(1) << "Op " << op_id << " missing " << missing_it->second + << " output gradients"; + if (missing_it->second == 0) { + op_stack.push_back(op_id); + } + } + } + } + CHECK(state.op_tape.empty()); + result->reserve(sources.size()); + for (auto is : id_sources) { + auto grad_it = gradients.find(is); + if (grad_it == gradients.end()) { + result->push_back(nullptr); + } else { + if (grad_it->second.size() == 1) { + result->push_back(grad_it->second[0]); + } else { + result->push_back(vspace.AggregateGradients(grad_it->second)); + } + gradients.erase(grad_it); + } + } + VLOG(1) << "Final gradients size: " << gradients.size(); + for (auto grad_pair : gradients) { + for (const auto& g : grad_pair.second) { + vspace.DeleteGradient(g); + } + } + return Status::OK(); +} + } // namespace eager } // namespace tensorflow diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index a00a7615d7..d67c3b18f7 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -443,10 +443,13 @@ void TFE_DeleteContextCapsule(PyObject* context) { TF_DeleteStatus(status); } +using GradientTape = + tensorflow::eager::GradientTape; + typedef struct { PyObject_HEAD /* Type-specific fields go here. */ - tensorflow::eager::GradientTape* tape; + GradientTape* tape; } TFE_Py_Tape; static void TFE_Py_Tape_Delete(PyObject* tape) { @@ -481,7 +484,7 @@ PyObject* TFE_Py_NewTape() { TFE_Py_Tape_Type.tp_new = PyType_GenericNew; if (PyType_Ready(&TFE_Py_Tape_Type) < 0) return nullptr; TFE_Py_Tape* tape = PyObject_NEW(TFE_Py_Tape, &TFE_Py_Tape_Type); - tape->tape = new tensorflow::eager::GradientTape(); + tape->tape = new GradientTape(); return reinterpret_cast(tape); } @@ -627,9 +630,8 @@ void TFE_Py_TapeDeleteTrace(PyObject* tape, tensorflow::int64 tensor_id) { reinterpret_cast(tape)->tape->DeleteTrace(tensor_id); } -// TODO(apassos): cache the attribute lookups as member variables and decref -// them in the destructor. -class PyVSpace : public tensorflow::eager::VSpace { +class PyVSpace + : public tensorflow::eager::VSpace { public: explicit PyVSpace(PyObject* py_vspace) : py_vspace_(py_vspace) {} @@ -661,7 +663,7 @@ class PyVSpace : public tensorflow::eager::VSpace { Py_XDECREF(ones_like_); } - tensorflow::int64 NumElements(void* tensor) const final { + tensorflow::int64 NumElements(PyObject* tensor) const final { PyObject* arglist = Py_BuildValue("(O)", reinterpret_cast(tensor)); PyObject* result = PyEval_CallObject(num_elements_, arglist); @@ -671,8 +673,8 @@ class PyVSpace : public tensorflow::eager::VSpace { return r; } - void* AggregateGradients( - tensorflow::gtl::ArraySlice gradient_tensors) const final { + PyObject* AggregateGradients( + tensorflow::gtl::ArraySlice gradient_tensors) const final { PyObject* list = PyList_New(gradient_tensors.size()); for (int i = 0; i < gradient_tensors.size(); ++i) { // Note: stealing a reference to the gradient tensors. @@ -689,8 +691,8 @@ class PyVSpace : public tensorflow::eager::VSpace { return result; } - void* Zeros(tensorflow::TensorShape shape, - tensorflow::DataType dtype) const final { + PyObject* Zeros(tensorflow::TensorShape shape, + tensorflow::DataType dtype) const final { PyObject* py_shape = PyTuple_New(shape.dims()); for (int i = 0; i < shape.dims(); ++i) { PyTuple_SET_ITEM(py_shape, i, PyLong_FromLong(shape.dim_size(i))); @@ -701,20 +703,20 @@ class PyVSpace : public tensorflow::eager::VSpace { Py_DECREF(arg_list); Py_DECREF(py_dtype); Py_DECREF(py_shape); - return reinterpret_cast(result); + return reinterpret_cast(result); } - void* OnesLike(void* tensor) const final { + PyObject* OnesLike(PyObject* tensor) const final { PyObject* arg_list = Py_BuildValue("(O)", tensor); PyObject* result = PyEval_CallObject(ones_like_, arg_list); if (result == nullptr) { VLOG(1) << "Call to ones_like failed"; } Py_DECREF(arg_list); - return reinterpret_cast(result); + return result; } - tensorflow::int64 TensorId(void* tensor) const final { + tensorflow::int64 TensorId(PyObject* tensor) const final { PyObject* py_tensor = reinterpret_cast(tensor); PyObject* id_field = PyObject_GetAttrString(py_tensor, "_id"); tensorflow::int64 id = MakeInt(id_field); @@ -723,9 +725,9 @@ class PyVSpace : public tensorflow::eager::VSpace { } tensorflow::Status CallBackwardFunction( - void* backward_function, - tensorflow::gtl::ArraySlice output_gradients, - std::vector* result) const final { + PyObject* backward_function, + tensorflow::gtl::ArraySlice output_gradients, + std::vector* result) const final { PyObject* grads = PyTuple_New(output_gradients.size()); for (int i = 0; i < output_gradients.size(); ++i) { if (output_gradients[i] == nullptr) { @@ -771,9 +773,7 @@ class PyVSpace : public tensorflow::eager::VSpace { return tensorflow::Status::OK(); } - void DeleteTensor(void* tensor) const final { - Py_XDECREF(reinterpret_cast(tensor)); - } + void DeleteGradient(PyObject* tensor) const final { Py_XDECREF(tensor); } private: PyObject* py_vspace_; @@ -784,13 +784,13 @@ class PyVSpace : public tensorflow::eager::VSpace { PyObject* ones_like_; }; -std::vector MakeTensorList(PyObject* tensors) { +std::vector MakeTensorList(PyObject* tensors) { PyObject* seq = PySequence_Fast(tensors, "expected a sequence"); if (seq == nullptr) { return {}; } int len = PySequence_Fast_GET_SIZE(seq); - std::vector list; + std::vector list; list.reserve(len); for (int i = 0; i < len; ++i) { list.push_back(PySequence_Fast_GET_ITEM(seq, i)); @@ -807,30 +807,30 @@ PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, return nullptr; } - std::vector target_vec = MakeTensorList(target); + std::vector target_vec = MakeTensorList(target); if (PyErr_Occurred()) { return nullptr; } - std::vector sources_vec = MakeTensorList(sources); + std::vector sources_vec = MakeTensorList(sources); if (PyErr_Occurred()) { return nullptr; } - std::vector outgrad_vec; + std::vector outgrad_vec; if (output_gradients != Py_None) { outgrad_vec = MakeTensorList(output_gradients); if (PyErr_Occurred()) { return nullptr; } - for (void* tensor : outgrad_vec) { + for (PyObject* tensor : outgrad_vec) { // Calling the backward function will eat a reference to the tensors in // outgrad_vec, so we need to increase their reference count. - Py_INCREF(reinterpret_cast(tensor)); + Py_INCREF(tensor); } } TFE_Py_Tape* tape_obj = reinterpret_cast(tape); - std::vector result; - status->status = tape_obj->tape->Gradient(c_vspace, target_vec, sources_vec, - outgrad_vec, &result); + std::vector result; + status->status = tape_obj->tape->ComputeGradient( + c_vspace, target_vec, sources_vec, outgrad_vec, &result); if (!status->status.ok()) { return nullptr; } -- GitLab From 67289c65bd8ba779d37e92a9aefac80c6cd8c27b Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Thu, 9 Nov 2017 08:30:44 -0800 Subject: [PATCH 0219/1801] Fix typo in tensorflow/python/client/timeline.py COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/tensorflow/pull/14386 from yifeif:yifeif-patch-2 8391d3b0369f823fc94ea75aef2df04c611a1671 PiperOrigin-RevId: 175161296 --- tensorflow/python/client/timeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/client/timeline.py b/tensorflow/python/client/timeline.py index f3ba4244ce..1e96ac5ed4 100644 --- a/tensorflow/python/client/timeline.py +++ b/tensorflow/python/client/timeline.py @@ -275,7 +275,7 @@ class _TensorTracker(object): name: The name of the Tensor as a string. object_id: Chrome Trace object identifier assigned for this Tensor. timestamp: The creation timestamp of this event as a long integer. - pid: Process identifier of the assicaiated device, as an integer. + pid: Process identifier of the associated device, as an integer. allocator: Name of the allocator used to create the Tensor. num_bytes: Number of bytes allocated (long integer). -- GitLab From bb3355d49fae143159837e125700e48781496c6a Mon Sep 17 00:00:00 2001 From: Michael Case Date: Thu, 9 Nov 2017 08:46:31 -0800 Subject: [PATCH 0220/1801] Internal Change. PiperOrigin-RevId: 175163107 --- configure.py | 9 +++++---- tensorflow/BUILD | 6 ++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/configure.py b/configure.py index 650541770a..e98367ef9f 100644 --- a/configure.py +++ b/configure.py @@ -487,10 +487,11 @@ def set_cc_opt_flags(environ_cp): cc_opt_flags = get_from_env_or_user_or_default(environ_cp, 'CC_OPT_FLAGS', question, default_cc_opt_flags) for opt in cc_opt_flags.split(): - host_opt = '-march=native' # It should be safe on the same build host. - write_to_bazelrc( - 'build:opt --cxxopt=%s --copt=%s' % (opt, opt) + - ' --host_cxxopt=%s --host_copt=%s' % (host_opt, host_opt)) + write_to_bazelrc('build:opt --cxxopt=%s --copt=%s' % (opt, opt)) + host_opt = '-march=native' # It should be safe on the same build host. + write_to_bazelrc( + 'build:opt --host_cxxopt=%s --host_copt=%s' % (host_opt, host_opt)) + write_to_bazelrc('build:opt --define with_default_optimizations=true') def set_tf_cuda_clang(environ_cp): diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 5a408db94e..8d3d38b5a1 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -178,6 +178,12 @@ config_setting( visibility = ["//visibility:public"], ) +config_setting( + name = "with_default_optimizations", + define_values = {"with_default_optimizations": "true"}, + visibility = ["//visibility:public"], +) + config_setting( name = "with_gcp_support", define_values = {"with_gcp_support": "true"}, -- GitLab From 3ef37c5071f47ea8306f73f8e3c6410f92e32598 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Nov 2017 09:11:36 -0800 Subject: [PATCH 0221/1801] Implemented Processor, which allows us to merge shapes of unknown rank with shapes of known rank. Made sure Processor::Merge doesn't erase previously inferred dimensions. PiperOrigin-RevId: 175166217 --- .../core/grappler/costs/graph_properties.cc | 103 +++++++----------- .../grappler/costs/graph_properties_test.cc | 7 +- 2 files changed, 45 insertions(+), 65 deletions(-) diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index 8654a2a3ed..151455778a 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -50,13 +50,9 @@ template struct HandleToObject {}; template <> struct HandleToObject { - typedef TensorShapeProto Object; + typedef ShapeHandle Object; - static TensorShapeProto Unknown() { - TensorShapeProto result; - result.set_unknown_rank(true); - return result; - } + static ShapeHandle Unknown() { return ShapeHandle(); } }; template <> @@ -67,13 +63,24 @@ struct HandleToObject { }; template -struct Processor { +struct Processor {}; + +template <> +struct Processor { // Extract the shape or dim denoted by the handle. - void ExtractValue(Handle /*t1*/, - typename HandleToObject::Object* result) {} + void ExtractValue(ShapeHandle h, ShapeHandle* result) { *result = h; } // Merge the shapes or dims. - Status Merge(Handle /*t1*/, Handle /*t2*/, - typename HandleToObject::Object* result) { + Status Merge(ShapeHandle h1, ShapeHandle h2, ShapeHandle* result) { + if (InferenceContext::RankKnown(*result)) { + // The result was initialized in a previous merge to a shape of known + // rank, make sure we preserve that information. + return Status::OK(); + } + if (InferenceContext::RankKnown(h1)) { + *result = h1; + } else { + *result = h2; + } return Status::OK(); } }; @@ -101,24 +108,34 @@ struct Processor { if (dim1 >= 0 && dim2 >= 0) { CHECK_EQ(dim1, dim2); - *result = dim1; + RefineDim(dim1, result); } else if (dim1 >= 0 && dim2 < 0) { - *result = dim1; + RefineDim(dim1, result); } else if (dim1 < 0 && dim2 >= 0) { - *result = dim2; + RefineDim(dim2, result); } else if (dim1 < -1) { - *result = dim1; + RefineDim(dim1, result); } else if (dim2 < -1) { - *result = dim2; + RefineDim(dim2, result); } else { CHECK_EQ(dim1, dim2); CHECK_EQ(-1, dim1); - *result = -1; + RefineDim(-1, result); } return Status::OK(); } private: + void RefineDim(int64 dim, int64* result) { + if (*result >= 0) { + CHECK(*result == dim || dim < 0); + } else if (dim >= 0) { + *result = dim; + } else if (dim < *result) { + *result = dim; + } + } + int64 counter = 2; }; @@ -354,18 +371,17 @@ class SymbolicShapeManager { return dims_.Merge(d1, d2); } - int64 Value(DimensionHandle d) { return dims_.GetMergedValue(d); } - void AsTensorProperties(const ShapeHandle& shape, const DataType& type, - InferenceContext* ctx, OpInfo::TensorProperties* properties) { properties->set_dtype(type); - if (!ctx->RankKnown(shape)) { + ShapeHandle actual_shape = shapes_.GetMergedValue(shape); + if (!InferenceContext::RankKnown(actual_shape)) { properties->mutable_shape()->set_unknown_rank(true); } else { - for (int j = 0; j < ctx->Rank(shape); ++j) { - shape_inference::DimensionHandle dim = ctx->Dim(shape, j); - int64 d = Value(dim); + for (int j = 0; j < InferenceContext::Rank(actual_shape); ++j) { + shape_inference::DimensionHandle dim = + InferenceContext::DimKnownRank(actual_shape, j); + int64 d = dims_.GetMergedValue(dim); properties->mutable_shape()->add_dim()->set_size(d); } } @@ -477,41 +493,6 @@ Status GraphProperties::InferStatically() { } } } - - // Infer output shape for Restore op. - if (node->op_def().name() == "Restore" || - node->op_def().name() == "RestoreV2" || - node->op_def().name() == "RestoreSlice") { - auto ctx = shape_refiner.GetContext(node); - for (const Edge* out_edge : node->out_edges()) { - const Node* output = out_edge->dst(); - int output_idx = out_edge->src_output(); - if (output_idx < 0) { - continue; - } - if (!ctx->FullyDefined(ctx->output(output_idx)) && - output->op_def().name() == "Assign") { - if (!output->attrs().Find("validate_shape") || - !output->attrs().Find("validate_shape")->b()) { - continue; - } - auto output_ctx = shape_refiner.GetContext(output); - if (output_ctx->FullyDefined(output_ctx->output(0))) { - ctx->set_output(output_idx, output_ctx->output(0)); - output_ctx->MergeInput(1, output_ctx->output(0)); - } else { - const Node* var; - TF_CHECK_OK(node->input_node(0, &var)); - if (node->IsVariable()) { - auto var_ctx = shape_refiner.GetContext(var); - CHECK(var_ctx->FullyDefined(var_ctx->output(0))); - ctx->set_output(output_idx, var_ctx->output(0)); - output_ctx->MergeInput(1, var_ctx->output(0)); - } - } - } - } - } } // Propagate the initial shapes of Enter nodes manually (the Enter shape @@ -691,7 +672,7 @@ Status GraphProperties::InferStatically() { input_properties.resize(ctx->num_inputs()); for (int i = 0; i < ctx->num_inputs(); ++i) { shape_manager.AsTensorProperties(ctx->input(i), node->input_type(i), - ctx, &input_properties[i]); + &input_properties[i]); } for (const auto& edge : node->in_edges()) { if (!edge->src()->IsConstant()) { @@ -718,7 +699,7 @@ Status GraphProperties::InferStatically() { output_properties.resize(ctx->num_outputs()); for (int i = 0; i < ctx->num_outputs(); ++i) { shape_manager.AsTensorProperties(ctx->output(i), node->output_type(i), - ctx, &output_properties[i]); + &output_properties[i]); } } } diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc index acd0b598ae..f785f627e1 100644 --- a/tensorflow/core/grappler/costs/graph_properties_test.cc +++ b/tensorflow/core/grappler/costs/graph_properties_test.cc @@ -296,10 +296,9 @@ TEST_F(GraphPropertiesTest, Queues) { ASSERT_EQ(1, props2.size()); EXPECT_EQ("float: [3,7]", PropToString(props2[0])); - // The dequeue3 op shape is unknown. const auto props3 = properties.GetOutputProperties("Dequeue3"); ASSERT_EQ(1, props3.size()); - EXPECT_EQ("float: ?", PropToString(props3[0])); + EXPECT_EQ("float: [3,7]", PropToString(props3[0])); // The dequeue3 op shape is unknown. The square2 op shape is known. Verify // that we merge the 2 properly to determine the shape of the data coming out @@ -678,8 +677,8 @@ TEST_F(GraphPropertiesTest, InferRestoreOpShape) { TEST_F(GraphPropertiesTest, InferRestoreOpShape_WithTwoNodesShareSameOutput) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - Output var = - ops::Variable(s.WithOpName("var"), TensorShape(), DataType::DT_FLOAT); + Output var = ops::Variable(s.WithOpName("var"), PartialTensorShape(), + DataType::DT_FLOAT); Output var2 = ops::Variable(s.WithOpName("var2"), TensorShape({128, 256}), DataType::DT_FLOAT); Output filename = -- GitLab From 0ebe7563b48fab5ee2e04a9275e623506559fab2 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 9 Nov 2017 09:24:00 -0800 Subject: [PATCH 0222/1801] Explicitly disable vectorization in the LLVM IR generated for Dot. In practice this does not seem to make a difference, but I did it anyway for completeness. PiperOrigin-RevId: 175167706 --- .../service/llvm_ir/kernel_support_library.cc | 4 +- .../service/llvm_ir/kernel_support_library.h | 8 ++- .../compiler/xla/service/llvm_ir/llvm_loop.cc | 68 +++++++++++++------ .../compiler/xla/service/llvm_ir/llvm_loop.h | 29 +++++--- 4 files changed, 75 insertions(+), 34 deletions(-) diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc index 123a327d4d..29cc0f81bd 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc @@ -40,7 +40,9 @@ void KernelSupportLibrary::For( }); } else { std::unique_ptr loop = llvm_ir::ForLoop::EmitForLoop( - name, start, end, step, ir_builder_, prevent_unrolling_); + name, start, end, step, ir_builder_, + /*prevent_unrolling=*/prevent_unrolling_, + /*prevent_vectorization=*/prevent_vectorization_); ir_builder_->SetInsertPoint(&loop->GetBodyBasicBlock()->back()); for_body_generator(loop->GetIndVarValue(), /*is_first_iteration=*/ir_builder_->CreateICmpEQ( diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h index 25aa2291a6..9bafb7b577 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h +++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h @@ -33,8 +33,11 @@ class KernelSupportLibrary { // If `prevent_unrolling` is true then unrolling is explicitly disabled on // every loop generated by this instance of KernelSupportLibrary. explicit KernelSupportLibrary(llvm::IRBuilder<>* ir_builder, - bool prevent_unrolling = true) - : ir_builder_(ir_builder), prevent_unrolling_(prevent_unrolling) {} + bool prevent_unrolling = true, + bool prevent_vectorization = true) + : ir_builder_(ir_builder), + prevent_unrolling_(prevent_unrolling), + prevent_vectorization_(prevent_vectorization) {} // Generates the following control flow structure: // @@ -118,6 +121,7 @@ class KernelSupportLibrary { private: llvm::IRBuilder<>* ir_builder_; bool prevent_unrolling_; + bool prevent_vectorization_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc index 83d35cb9ef..7b227ce294 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc @@ -34,21 +34,24 @@ namespace llvm_ir { ForLoop::ForLoop(tensorflow::StringPiece prefix, tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, - llvm::Value* step, bool prevent_unrolling) + llvm::Value* step, bool prevent_unrolling, + bool prevent_vectorization) : prefix_(prefix.ToString()), suffix_(suffix.ToString()), start_index_(start_index), end_index_(end_index), step_(step), insert_before_bb_(nullptr), - prevent_unrolling_(prevent_unrolling) {} + prevent_unrolling_(prevent_unrolling), + prevent_vectorization_(prevent_vectorization) {} /* static */ std::unique_ptr ForLoop::EmitForLoop( tensorflow::StringPiece prefix, llvm::Value* start_index, llvm::Value* end_index, llvm::Value* step, llvm::IRBuilder<>* ir_builder, - bool prevent_unrolling) { - std::unique_ptr loop(new ForLoop( - prefix, /*suffix=*/"", start_index, end_index, step, prevent_unrolling)); + bool prevent_unrolling, bool prevent_vectorization) { + std::unique_ptr loop(new ForLoop(prefix, /*suffix=*/"", start_index, + end_index, step, prevent_unrolling, + prevent_vectorization)); loop->Emit(ir_builder); return loop; } @@ -127,14 +130,12 @@ void ForLoop::Emit(llvm::IRBuilder<>* ir_builder) { ir_builder->CreateStore(indvar_inc, indvar_address); llvm::BranchInst* back_branch = ir_builder->CreateBr(header_bb_); - if (prevent_unrolling_) { - const char* const kLlvmLoopUnrollDisableMDName = "llvm.loop.unroll.disable"; - llvm::LLVMContext* ctx = &back_branch->getContext(); - + std::vector loop_metadata = GetLoopMetadata(ir_builder); + if (!loop_metadata.empty()) { + llvm::LLVMContext* ctx = &start_index_->getContext(); auto temp_node = llvm::MDNode::getTemporary(*ctx, llvm::None); - auto no_unroll_node = llvm::MDNode::get( - *ctx, {llvm::MDString::get(*ctx, kLlvmLoopUnrollDisableMDName)}); - auto loop_id = llvm::MDNode::get(*ctx, {temp_node.get(), no_unroll_node}); + loop_metadata.insert(loop_metadata.begin(), temp_node.get()); + auto loop_id = llvm::MDNode::get(*ctx, loop_metadata); loop_id->replaceOperandWith(0, loop_id); back_branch->setMetadata(llvm::LLVMContext::MD_loop, loop_id); } @@ -143,6 +144,27 @@ void ForLoop::Emit(llvm::IRBuilder<>* ir_builder) { ir_builder->SetInsertPoint(exit_bb_); } +std::vector ForLoop::GetLoopMetadata( + llvm::IRBuilder<>* ir_builder) { + const char* const kLlvmLoopUnrollDisableMDName = "llvm.loop.unroll.disable"; + const char* const kLlvmLoopVectorizeMDName = "llvm.loop.vectorize.enable"; + llvm::LLVMContext* ctx = &start_index_->getContext(); + + std::vector result; + if (prevent_unrolling_) { + result.push_back(llvm::MDNode::get( + *ctx, {llvm::MDString::get(*ctx, kLlvmLoopUnrollDisableMDName)})); + } + + if (prevent_vectorization_) { + result.push_back(llvm::MDNode::get( + *ctx, {llvm::MDString::get(*ctx, kLlvmLoopVectorizeMDName), + llvm::ConstantAsMetadata::get(ir_builder->getFalse())})); + } + + return result; +} + string ForLoop::GetQualifiedName(tensorflow::StringPiece name) { return llvm_ir::IrName(prefix_, llvm_ir::IrName(name, suffix_)); } @@ -156,23 +178,25 @@ llvm::BasicBlock* ForLoop::CreateLoopBB(tensorflow::StringPiece name, std::unique_ptr ForLoopNest::AddLoop(tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, - bool prevent_unrolling) { + bool prevent_unrolling, + bool prevent_vectorization) { return AddLoop(suffix, start_index, end_index, ir_builder_->getInt64(1), - prevent_unrolling); + prevent_unrolling, prevent_vectorization); } std::unique_ptr ForLoopNest::AddLoop(tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, llvm::Value* stride, - bool prevent_unrolling) { + bool prevent_unrolling, + bool prevent_vectorization) { if (inner_loop_body_bb_ != nullptr) { // Create this loop inside the previous one. ir_builder_->SetInsertPoint(&*inner_loop_body_bb_->getFirstInsertionPt()); } std::unique_ptr loop(new ForLoop( /*prefix=*/name_, suffix, start_index, end_index, stride, - prevent_unrolling)); + prevent_unrolling, prevent_vectorization)); loop->Emit(ir_builder_); if (outer_loop_preheader_bb_ == nullptr) { @@ -191,20 +215,24 @@ std::unique_ptr ForLoopNest::AddLoop(tensorflow::StringPiece suffix, std::unique_ptr ForLoopNest::AddLoop(int64 start_index, int64 end_index, tensorflow::StringPiece suffix, - bool prevent_unrolling) { + bool prevent_unrolling, + bool prevent_vectorization) { CHECK_LE(start_index, end_index); return AddLoop(suffix, ir_builder_->getInt64(start_index), - ir_builder_->getInt64(end_index), prevent_unrolling); + ir_builder_->getInt64(end_index), prevent_unrolling, + prevent_vectorization); } std::unique_ptr ForLoopNest::AddLoop(int64 start_index, int64 end_index, int64 stride, tensorflow::StringPiece suffix, - bool prevent_unrolling) { + bool prevent_unrolling, + bool prevent_vectorization) { CHECK_LE(start_index, end_index); return AddLoop(suffix, ir_builder_->getInt64(start_index), ir_builder_->getInt64(end_index), - ir_builder_->getInt64(stride), prevent_unrolling); + ir_builder_->getInt64(stride), prevent_unrolling, + prevent_vectorization); } IrArray::Index ForLoopNest::AddLoopsForShape(const Shape& shape, diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h index 90f7c7df9e..20069ce5a2 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h @@ -71,12 +71,10 @@ class ForLoop { // // If `prevent_unrolling` is true then emit metadata that directs LLVM to not // unroll the generated loop. - static std::unique_ptr EmitForLoop(tensorflow::StringPiece prefix, - llvm::Value* start_index, - llvm::Value* end_index, - llvm::Value* step, - llvm::IRBuilder<>* ir_builder, - bool prevent_unrolling = false); + static std::unique_ptr EmitForLoop( + tensorflow::StringPiece prefix, llvm::Value* start_index, + llvm::Value* end_index, llvm::Value* step, llvm::IRBuilder<>* ir_builder, + bool prevent_unrolling = false, bool prevent_vectorization = false); // The names of the blocks follow LLVM's conventions. Control flow amongst the // blocks for the example C code looks like: @@ -130,7 +128,7 @@ class ForLoop { ForLoop(tensorflow::StringPiece prefix, tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, llvm::Value* step, - bool prevent_unrolling); + bool prevent_unrolling, bool prevent_vectorization); // Emit the loop at the insert point of the builder. void Emit(llvm::IRBuilder<>* ir_builder); @@ -142,6 +140,10 @@ class ForLoop { // they are set. string GetQualifiedName(tensorflow::StringPiece name); + // Return a list of metadata nodes that should be associated with the + // llvm::Loop for this `ForLoop`. + std::vector GetLoopMetadata(llvm::IRBuilder<>* ir_builder); + string prefix_; string suffix_; llvm::Value* start_index_; @@ -160,6 +162,7 @@ class ForLoop { llvm::BasicBlock* exit_bb_; llvm::Value* indvar_; bool prevent_unrolling_; + bool prevent_vectorization_; TF_DISALLOW_COPY_AND_ASSIGN(ForLoop); }; @@ -185,24 +188,28 @@ class ForLoopNest { std::unique_ptr AddLoop(tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, llvm::Value* stride, - bool prevent_unrolling = false); + bool prevent_unrolling = false, + bool prevent_vectorization = false); // Like the above, except that it defaults to a stride of one. std::unique_ptr AddLoop(tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, - bool prevent_unrolling = false); + bool prevent_unrolling = false, + bool prevent_vectorization = false); // A convenient wrapper of the other flavor of AddLoop. The given start and // end index are constant. std::unique_ptr AddLoop(int64 start_index, int64 end_index, int64 stride, tensorflow::StringPiece suffix, - bool prevent_unrolling = false); + bool prevent_unrolling = false, + bool prevent_vectorization = false); // Like the above, except that it defaults to a stride of one. std::unique_ptr AddLoop(int64 start_index, int64 end_index, tensorflow::StringPiece suffix, - bool prevent_unrolling = false); + bool prevent_unrolling = false, + bool prevent_vectorization = false); // Add loops to iterate through the indices within the specified // shape. The returned index collects the induction variables of the -- GitLab From fa198eb0ea0a96d59be193e9dfd19535afc232a0 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Thu, 9 Nov 2017 09:25:54 -0800 Subject: [PATCH 0223/1801] Internal Change. PiperOrigin-RevId: 175167946 --- tensorflow/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 8d3d38b5a1..8cb7edcc50 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -355,7 +355,7 @@ config_setting( visibility = ["//visibility:public"], ) -# Make a dummy rule that we can chaqnge "default" in select statements to. +# Make a dummy rule that we can change "default" in select statements to. # to disable dependencies in copybara. config_setting( name = "dummy_disabled_internal", -- GitLab From c0ef3c84123446e8d9988bc393348b82f4ebdffb Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Thu, 9 Nov 2017 10:12:58 -0800 Subject: [PATCH 0224/1801] Adds explicity docstring about TF version in examples. PiperOrigin-RevId: 175174326 --- tensorflow/examples/learn/iris.py | 5 ++++- tensorflow/examples/learn/wide_n_deep_tutorial.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/examples/learn/iris.py b/tensorflow/examples/learn/iris.py index 0a50b3ba87..03e60972aa 100644 --- a/tensorflow/examples/learn/iris.py +++ b/tensorflow/examples/learn/iris.py @@ -11,7 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Example of DNNClassifier for Iris plant dataset.""" +"""Example of DNNClassifier for Iris plant dataset. + +This example uses APIs in Tensorflow 1.4 or above. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/examples/learn/wide_n_deep_tutorial.py b/tensorflow/examples/learn/wide_n_deep_tutorial.py index e447b3e24e..072353392a 100644 --- a/tensorflow/examples/learn/wide_n_deep_tutorial.py +++ b/tensorflow/examples/learn/wide_n_deep_tutorial.py @@ -12,7 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Example code for TensorFlow Wide & Deep Tutorial using TF.Learn API.""" +"""Example code for TensorFlow Wide & Deep Tutorial using TF High Level API. + +This example uses APIs in Tensorflow 1.4 or above. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -- GitLab From d4f1845fc1aa57a2e872edc28d66901798205a85 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 10:27:15 -0800 Subject: [PATCH 0225/1801] Supports multi-dimensional logits and labels in multi_label head and some cleanup. PiperOrigin-RevId: 175176635 --- .../estimator/python/estimator/head.py | 143 ++++++++---- .../estimator/python/estimator/head_test.py | 206 +++++++++++++++++- tensorflow/python/estimator/canned/head.py | 131 +++++------ .../python/estimator/canned/head_test.py | 10 +- 4 files changed, 379 insertions(+), 111 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/head.py b/tensorflow/contrib/estimator/python/estimator/head.py index e344ee3c3e..a9311a20f1 100644 --- a/tensorflow/contrib/estimator/python/estimator/head.py +++ b/tensorflow/contrib/estimator/python/estimator/head.py @@ -28,6 +28,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import math_ops @@ -48,7 +49,20 @@ def multi_class_head(n_classes, Uses `sparse_softmax_cross_entropy` loss. - This head expects to be fed integer labels specifying the class index. + The head expects `logits` with shape `[D0, D1, ... DN, n_classes]`. + In many applications, the shape is `[batch_size, n_classes]`. + + `labels` must be a dense `Tensor` with shape matching `logits`, namely + `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string + `Tensor` with values from the vocabulary. If `label_vocabulary` is not given, + `labels` must be an integer `Tensor` with values specifying the class index. + + If `weight_column` is specified, weights must be of shape + `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`. + + The loss is the weighted sum over the input dimensions. Namely, if the input + labels have shape `[batch_size, 1]`, the loss is the weighted sum over + `batch_size`. Args: n_classes: Number of classes, must be greater than 2 (for 2 classes, use @@ -57,11 +71,11 @@ def multi_class_head(n_classes, `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. - label_vocabulary: A list of strings represents possible label values. If it - is not given, that means labels are already encoded as integer within - [0, n_classes). If given, labels must be string type and have any value in - `label_vocabulary`. Also there will be errors if vocabulary is not - provided and labels are string. + label_vocabulary: A list or tuple of strings representing possible label + values. If it is not given, that means labels are already encoded as an + integer within [0, n_classes). If given, labels must be of string type and + have any value in `label_vocabulary`. Note that errors will be raised if + `label_vocabulary` is not provided but labels are strings. name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. Also used as `name_scope` when creating ops. @@ -84,7 +98,20 @@ def binary_classification_head( This head uses `sigmoid_cross_entropy_with_logits` loss. - This head expects to be fed float labels of shape `(batch_size, 1)`. + The head expects `logits` with shape `[D0, D1, ... DN, 1]`. + In many applications, the shape is `[batch_size, 1]`. + + `labels` must be a dense `Tensor` with shape matching `logits`, namely + `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string + `Tensor` with values from the vocabulary. If `label_vocabulary` is not given, + `labels` must be float `Tensor` with values in the interval `[0, 1]`. + + If `weight_column` is specified, weights must be of shape + `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`. + + The loss is the weighted sum over the input dimensions. Namely, if the input + labels have shape `[batch_size, 1]`, the loss is the weighted sum over + `batch_size`. Args: weight_column: A string or a `_NumericColumn` created by @@ -96,11 +123,11 @@ def binary_classification_head( generated for each threshold value. This threshold is applied to the logistic values to determine the binary classification (i.e., above the threshold is `true`, below is `false`. - label_vocabulary: A list of strings represents possible label values. If it - is not given, that means labels are already encoded within [0, 1]. If - given, labels must be string type and have any value in - `label_vocabulary`. Also there will be errors if vocabulary is not - provided and labels are string. + label_vocabulary: A list or tuple of strings representing possible label + values. If it is not given, labels must be float with values within + [0, 1]. If given, labels must be string type and have any value in + `label_vocabulary`. Note that errors will be raised if `label_vocabulary` + is not provided but labels are strings. name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. Also used as `name_scope` when creating ops. @@ -120,9 +147,22 @@ def binary_classification_head( def regression_head(weight_column=None, label_dimension=1, name=None): - """Creates a `_Head` for regression using the mean squared loss. + """Creates a `_Head` for regression using the `mean_squared_error` loss. + + The loss is the weighted sum over all input dimensions. Namely, if the input + labels have shape `[batch_size, label_dimension]`, the loss is the weighted + sum over both `batch_size` and `label_dimension`. + + The head expects `logits` with shape `[D0, D1, ... DN, label_dimension]`. + In many applications, the shape is `[batch_size, label_dimension]`. + + The `labels` shape must match `logits`, namely + `[D0, D1, ... DN, label_dimension]`. If `label_dimension=1`, shape + `[D0, D1, ... DN]` is also supported. - Uses `mean_squared_error` loss. + If `weight_column` is specified, weights must be of shape + `[D0, D1, ... DN]`, `[D0, D1, ... DN, 1]` or + `[D0, D1, ... DN, label_dimension]`. Args: weight_column: A string or a `_NumericColumn` created by @@ -156,15 +196,29 @@ def multi_label_head(n_classes, or more associated labels, from a discrete set. This is distinct from `multi_class_head` which has exactly one label per example. - Uses `sigmoid_cross_entropy` loss averaged over classes. Expects labels as a - multi-hot tensor of shape `[batch_size, n_classes]`, or as an integer - `SparseTensor` of class indices. + Uses `sigmoid_cross_entropy` loss average over classes and weighted sum over + the batch. Namely, if the input logits have shape `[batch_size, n_classes]`, + the loss is the average over `n_classes` and the weighted sum over + `batch_size`. + + The head expects `logits` with shape `[D0, D1, ... DN, n_classes]`. In many + applications, the shape is `[batch_size, label_n_classes]`. + + Labels can be: + * A multi-hot tensor of shape `[D0, D1, ... DN, n_classes]` + * An integer `SparseTensor` of class indices. The `dense_shape` must be + `[D0, D1, ... DN, ?]` and the values within `[0, n_classes)`. + * If `label_vocabulary` is given, a string `SparseTensor`. The `dense_shape` + must be `[D0, D1, ... DN, ?]` and the values within `label_vocabulary`. + + If `weight_column` is specified, weights must be of shape + `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`. Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or `(labels, logits, features)` as arguments and returns unreduced loss with - shape `[batch_size, 1]`. `loss_fn` must support indicator `labels` with shape - `[batch_size, n_classes]`. Namely, the head applies `label_vocabulary` to the - input labels before passing them to `loss_fn`. + shape `[D0, D1, ... DN, 1]`. `loss_fn` must support indicator `labels` with + shape `[D0, D1, ... DN, n_classes]`. Namely, the head applies + `label_vocabulary` to the input labels before passing them to `loss_fn`. Args: n_classes: Number of classes, must be greater than 1 (for 1 class, use @@ -191,7 +245,7 @@ def multi_label_head(n_classes, An instance of `_Head` for multi-label classification. Raises: - ValueError: if `n_classes` or `thresholds` is invalid. + ValueError: if `n_classes`, `thresholds`, or `loss_fn` is invalid. """ thresholds = tuple(thresholds) if thresholds else tuple() if n_classes is None or n_classes < 2: @@ -259,26 +313,36 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access indices=labels.indices, values=label_ids_values, dense_shape=labels.dense_shape) + return math_ops.to_int64( + sparse_ops.sparse_to_indicator(label_ids, self._n_classes)) else: - label_ids = labels - return math_ops.to_int64( - sparse_ops.sparse_to_indicator(label_ids, self._n_classes)) - msg = ('labels shape must be [batch_size, {}]. ' - 'Given: ').format(self._n_classes) - labels_shape = array_ops.shape(labels) - check_rank_op = control_flow_ops.Assert( - math_ops.equal(array_ops.rank(labels), 2), - data=[msg, labels_shape]) - check_label_dim = control_flow_ops.Assert( - math_ops.equal(labels_shape[-1], self._n_classes), - data=[msg, labels_shape]) - with ops.control_dependencies([check_rank_op, check_label_dim]): - return array_ops.identity(labels) + err_msg = ( + r'labels must be an integer SparseTensor with values in ' + r'[0, {})'.format(self._n_classes)) + assert_int = check_ops.assert_integer( + labels.values, message=err_msg) + assert_less = check_ops.assert_less( + labels.values, + ops.convert_to_tensor(self._n_classes, dtype=labels.dtype), + message=err_msg) + assert_greater = check_ops.assert_non_negative( + labels.values, message=err_msg) + with ops.control_dependencies( + [assert_int, assert_less, assert_greater]): + return math_ops.to_int64( + sparse_ops.sparse_to_indicator(labels, self._n_classes)) + err_msg = ( + r'labels must be an integer indicator Tensor with values in [0, 1]') + return head_lib._assert_range(labels, 2, message=err_msg) # pylint:disable=protected-access, def create_loss(self, features, mode, logits, labels): """See `Head`.""" del mode # Unused for this head. + logits = ops.convert_to_tensor(logits) processed_labels = self._process_labels(labels) + processed_labels = head_lib._check_dense_labels_match_logits_and_reshape( # pylint:disable=protected-access + labels=processed_labels, logits=logits, + expected_labels_dimension=self.logits_dimension) if self._loss_fn: unweighted_loss = _call_loss_fn( loss_fn=self._loss_fn, labels=processed_labels, logits=logits, @@ -290,7 +354,8 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access # Averages loss over classes. unweighted_loss = math_ops.reduce_mean( unweighted_loss, axis=-1, keep_dims=True) - weights = head_lib._weights(features, self._weight_column) # pylint:disable=protected-access, + weights = head_lib._get_weights_and_check_match_logits( # pylint:disable=protected-access, + features=features, weight_column=self._weight_column, logits=logits) weighted_sum_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) # _weights() can return 1. @@ -305,7 +370,7 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access self, features, mode, logits, labels=None, train_op_fn=None): """See `Head`.""" with ops.name_scope(self._name, 'head'): - logits = head_lib._check_logits(logits, self.logits_dimension) # pylint:disable=protected-access + logits = head_lib._check_logits_final_dim(logits, self.logits_dimension) # pylint:disable=protected-access # Predict. pred_keys = prediction_keys.PredictionKeys @@ -335,6 +400,8 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access # Eval. if mode == model_fn.ModeKeys.EVAL: + weights = head_lib._get_weights_and_check_match_logits( # pylint:disable=protected-access, + features=features, weight_column=self._weight_column, logits=logits) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.EVAL, predictions=predictions, @@ -342,7 +409,7 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access eval_metric_ops=self._eval_metric_ops( labels=processed_labels, probabilities=probabilities, - weights=head_lib._weights(features, self._weight_column), # pylint:disable=protected-access, + weights=weights, weighted_sum_loss=weighted_sum_loss, example_weight_sum=example_weight_sum)) diff --git a/tensorflow/contrib/estimator/python/estimator/head_test.py b/tensorflow/contrib/estimator/python/estimator/head_test.py index fd8c53f6a9..d1cf909004 100644 --- a/tensorflow/contrib/estimator/python/estimator/head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/head_test.py @@ -316,13 +316,14 @@ class MultiLabelHead(test.TestCase): _initialize_variables(self, monitored_session.Scaffold()) with self.assertRaisesRegexp( errors.InvalidArgumentError, - r'labels shape must be \[batch_size, 2\]\. Given: \] \[2 1\]'): + r'\[expected_labels_shape: \] \[2 2\] \[labels_shape: \] \[2 1\]'): actual_weighted_sum_loss.eval({ labels_placeholder: np.array([[1], [1]], dtype=np.int64) }) with self.assertRaisesRegexp( errors.InvalidArgumentError, - r'labels shape must be \[batch_size, 2\]\. Given: \] \[2\]'): + r'labels shape must be \[D0, D1, ... DN, 2\]\..*' + r'\[Received shape: \] \[2\]'): actual_weighted_sum_loss.eval({ labels_placeholder: np.array([1, 1], dtype=np.int64) }) @@ -387,9 +388,11 @@ class MultiLabelHead(test.TestCase): logits=np.array([[-10., 10.], [-15., 10.]], dtype=np.float32), labels=None) - def _test_eval(self, head, logits, labels, expected_loss, expected_metrics): + def _test_eval( + self, head, logits, labels, expected_loss, expected_metrics, + features=None): spec = head.create_estimator_spec( - features={'x': np.array(((42,),), dtype=np.int32)}, + features=features or {}, mode=model_fn.ModeKeys.EVAL, logits=logits, labels=labels) @@ -655,6 +658,54 @@ class MultiLabelHead(test.TestCase): labels=None, train_op_fn=_no_op_train_fn) + def test_train_invalid_indicator_labels(self): + head = head_lib.multi_label_head(n_classes=2) + logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) + # The value 2 is outside the allowed range. + labels = np.array([[2, 0], [1, 1]], dtype=np.int64) + def _train_op_fn(loss): + del loss + return control_flow_ops.no_op() + + spec = head.create_estimator_spec( + features={}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + train_op_fn=_train_op_fn) + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r'labels must be an integer indicator Tensor with values in ' + r'\[0, 1\]'): + sess.run(spec.loss) + + def test_train_invalid_sparse_labels(self): + head = head_lib.multi_label_head(n_classes=2) + logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) + # The value 2 is outside the allowed range. + labels = sparse_tensor.SparseTensor( + values=[2, 0, 1], + indices=[[0, 0], [1, 0], [1, 1]], + dense_shape=[2, 2]) + def _train_op_fn(loss): + del loss + return control_flow_ops.no_op() + + spec = head.create_estimator_spec( + features={}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + train_op_fn=_train_op_fn) + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r'labels must be an integer SparseTensor with values in \[0, 2\)'): + sess.run(spec.loss) + def _test_train(self, head, logits, labels, expected_loss): expected_train_result = 'my_train_op' def _train_op_fn(loss): @@ -791,6 +842,153 @@ class MultiLabelHead(test.TestCase): metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 3, }, summary_str, tol) + def test_multi_dim_weighted_train_create_loss(self): + """Logits and labels of shape [2, 2, 3], weights [2, 2].""" + head = head_lib.multi_label_head(n_classes=3, weight_column='weights') + + logits = np.array([[[-10., 10., -10.], [10., -10., 10.]], + [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32) + labels = np.array([[[1, 0, 0], [1, 0, 0]], + [[0, 1, 1], [0, 1, 1]]], dtype=np.int64) + weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) + # loss = [[10 + 10 + 0, 0 + 0 + 10], [0 + 0 + 12, 12 + 12 + 0]] / 3 + # = [[20/3, 10/3], [4, 8]] + # weighted_sum_loss = 1*20/3 + 1.5*10/3 + 2*4 + 2.5*8 = 39.6667 + expected_weighted_sum_loss = 39.6667 + expected_example_weight_sum = np.sum(weights) + actual_weighted_sum_loss, actual_example_weight_sum, _ = head.create_loss( + features={'weights': weights}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels) + atol = 1.e-3 + with self.test_session(): + _initialize_variables(self, monitored_session.Scaffold()) + self.assertAllClose( + expected_weighted_sum_loss, actual_weighted_sum_loss.eval(), + atol=atol) + self.assertAllClose( + expected_example_weight_sum, actual_example_weight_sum.eval(), + atol=atol) + + def test_multi_dim_weighted_train(self): + """Logits and labels of shape [2, 2, 3], weights [2, 2].""" + head = head_lib.multi_label_head(n_classes=3, weight_column='weights') + + logits = np.array([[[-10., 10., -10.], [10., -10., 10.]], + [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32) + labels = np.array([[[1, 0, 0], [1, 0, 0]], + [[0, 1, 1], [0, 1, 1]]], dtype=np.int64) + weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) + # loss = [[10 + 10 + 0, 0 + 0 + 10], [0 + 0 + 12, 12 + 12 + 0]] / 3 + # = [[20/3, 10/3], [4, 8]] + # weighted_sum_loss = 1*20/3 + 1.5*10/3 + 2*4 + 2.5*8 = 39.6667 + expected_loss = 39.6667 + expected_train_result = 'my_train_op' + def _train_op_fn(loss): + return string_ops.string_join( + [constant_op.constant(expected_train_result), + string_ops.as_string(loss, precision=3)]) + + spec = head.create_estimator_spec( + features={'weights': weights}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + train_op_fn=_train_op_fn) + + atol = 1.e-3 + with self.test_session() as sess: + _initialize_variables(self, monitored_session.Scaffold()) + loss, train_result = sess.run((spec.loss, spec.train_op)) + self.assertAllClose(expected_loss, loss, atol=atol) + self.assertEqual( + six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), + train_result) + + def test_multi_dim_weights_wrong_inner_dim(self): + """Logits and labels of shape [2, 2, 3], weights [2, 1].""" + head = head_lib.multi_label_head(n_classes=3, weight_column='weights') + + logits = np.array([[[-10., 10., -10.], [10., -10., 10.]], + [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32) + labels = np.array([[[1, 0, 0], [1, 0, 0]], + [[0, 1, 1], [0, 1, 1]]], dtype=np.int64) + weights = np.array([[1.], [2.]], dtype=np.float32) + def _train_op_fn(loss): + del loss + return control_flow_ops.no_op() + + spec = head.create_estimator_spec( + features={'weights': weights}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + train_op_fn=_train_op_fn) + with self.test_session(): + _initialize_variables(self, monitored_session.Scaffold()) + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r'\[logits_shape: \] \[2 2 3\] \[weights_shape: \] \[2 1\]'): + spec.loss.eval() + + def test_multi_dim_weights_wrong_outer_dim(self): + """Logits and labels of shape [2, 2, 3], weights [2, 2, 3].""" + head = head_lib.multi_label_head(n_classes=3, weight_column='weights') + + logits = np.array([[[-10., 10., -10.], [10., -10., 10.]], + [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32) + labels = np.array([[[1, 0, 0], [1, 0, 0]], + [[0, 1, 1], [0, 1, 1]]], dtype=np.int64) + weights = np.array([[[1., 1., 1.], [1.5, 1.5, 1.5]], + [[2., 2., 2.], [2.5, 2.5, 2.5]]], dtype=np.float32) + weights_placeholder = array_ops.placeholder(dtype=dtypes.float32) + def _train_op_fn(loss): + del loss + return control_flow_ops.no_op() + + spec = head.create_estimator_spec( + features={'weights': weights_placeholder}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + train_op_fn=_train_op_fn) + with self.test_session(): + _initialize_variables(self, monitored_session.Scaffold()) + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r'\[logits_shape: \] \[2 2 3\] \[weights_shape: \] \[2 2 3\]'): + spec.loss.eval({weights_placeholder: weights}) + + def test_multi_dim_weighted_eval(self): + """Logits and labels of shape [2, 2, 3], weights [2, 2].""" + head = head_lib.multi_label_head(n_classes=3, weight_column='weights') + + logits = np.array([[[-10., 10., -10.], [10., -10., 10.]], + [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32) + labels = np.array([[[1, 0, 0], [1, 0, 0]], + [[0, 1, 1], [0, 1, 1]]], dtype=np.int64) + weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) + # loss = [[10 + 10 + 0, 0 + 0 + 10], [0 + 0 + 12, 12 + 12 + 0]] / 3 + # = [[20/3, 10/3], [4, 8]] + # weighted_sum_loss = 1*20/3 + 1.5*10/3 + 2*4 + 2.5*8 = 39.6667 + expected_loss = 39.6667 + keys = metric_keys.MetricKeys + expected_metrics = { + keys.LOSS_MEAN: expected_loss / np.sum(weights), + # auc and auc_pr cannot be reliably calculated for only 4 samples, but + # this assert tests that the algorithm remains consistent. + keys.AUC: 0.4977, + keys.AUC_PR: 0.6645, + } + self._test_eval( + head=head, + features={'weights': weights}, + logits=logits, + labels=labels, + expected_loss=expected_loss, + expected_metrics=expected_metrics) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py index 2c3e18cb12..eaed412c8b 100644 --- a/tensorflow/python/estimator/canned/head.py +++ b/tensorflow/python/estimator/canned/head.py @@ -264,26 +264,55 @@ def _check_dense_labels_match_logits_and_reshape( return array_ops.identity(labels, name=scope) -def _check_weights_match_logits_and_reshape(weights, logits): - """Checks that weights shape matches logits and reshapes if needed. +def _get_weights_and_check_match_logits( + features, weight_column, logits, allow_per_logit_weights=False): + """Fetches weights from features and checks that the shape matches logits. Consider logits of shape [D0, D1, ... DN, logits_dimension]. Weights shape can be either: - * [D0, D1, ... DN, logits_dimension] + * [D0, D1, ... DN, logits_dimension] if `allow_per_logit_weights=True`. * [D0, D1, ... DN, 1] * [D0, D1, ... DN]: In this case, weights is reshaped into [D0, D1, ... DN, 1] to work with weight broadcasting rules. Args: - weights: weights Tensor. + features: The features dict that contains weights. + weight_column: The weight column. If not given, this method returns 1. logits: logits Tensor. + allow_per_logit_weights: Boolean. Whether we allow weights along the logits + dimension, namely shape `[D0, D1, ... DN, logits_dimension]`. Returns: Validated and reshaped weights Tensor. + Raises: + ValueError: If the weights `Tensor` cannot be cast into float. """ - err_msg = ( - 'weights shape must be [D0, D1, ... DN], [D0, D1, ... DN, 1] or ' - '[D0, D1, ... DN, logits_dimension]') - with ops.name_scope(None, 'weights', (weights, logits)) as scope: + if allow_per_logit_weights: + err_msg = ( + 'weights shape must be [D0, D1, ... DN], [D0, D1, ... DN, 1] or ' + '[D0, D1, ... DN, logits_dimension]') + else: + err_msg = ( + 'weights shape must be [D0, D1, ... DN] or [D0, D1, ... DN, 1]') + with ops.name_scope( + None, 'weights', + values=tuple(six.itervalues(features)) + (logits,)) as scope: + # Fetch the weights. + if weight_column is None: + return 1. + if isinstance(weight_column, six.string_types): + weight_column = feature_column_lib.numeric_column( + key=weight_column, shape=(1,)) + if not isinstance(weight_column, feature_column_lib._NumericColumn): # pylint: disable=protected-access + raise TypeError('Weight column must be either a string or _NumericColumn.' + ' Given type: {}.'.format(type(weight_column))) + weights = weight_column._get_dense_tensor( # pylint: disable=protected-access + feature_column_lib._LazyBuilder(features)) # pylint: disable=protected-access + if not (weights.dtype.is_floating or weights.dtype.is_integer): + raise ValueError('Weight column should be castable to float. ' + 'Given dtype: {}'.format(weights.dtype)) + weights = math_ops.to_float(weights, name='weights') + + # Validate the weights shape. weights_shape = array_ops.shape(weights, name='weights_shape') logits_shape = array_ops.shape(logits, name='logits_shape') if (weights.shape.ndims is not None and logits.shape.ndims is not None and @@ -295,42 +324,24 @@ def _check_weights_match_logits_and_reshape(weights, logits): with ops.control_dependencies([assert_dimension]): return array_ops.expand_dims(weights, -1, name=scope) supported_weights_shape = array_ops.concat([logits_shape[:-1], [1]], axis=0) - condition = math_ops.reduce_any( - [math_ops.reduce_all(math_ops.equal(logits_shape, weights_shape)), - math_ops.reduce_all(math_ops.equal( - supported_weights_shape, weights_shape))]) - assert_dimension = control_flow_ops.Assert( - condition=condition, - data=[err_msg, 'logits_shape: ', logits_shape, - 'weights_shape: ', weights_shape]) + if allow_per_logit_weights: + condition = math_ops.reduce_any( + [math_ops.reduce_all(math_ops.equal(logits_shape, weights_shape)), + math_ops.reduce_all(math_ops.equal( + supported_weights_shape, weights_shape))]) + assert_dimension = control_flow_ops.Assert( + condition=condition, + data=[err_msg, 'logits_shape: ', logits_shape, + 'weights_shape: ', weights_shape]) + else: + assert_dimension = check_ops.assert_equal( + supported_weights_shape, weights_shape, message=err_msg, + data=['logits_shape: ', logits_shape, + 'weights_shape: ', weights_shape]) with ops.control_dependencies([assert_dimension]): return array_ops.identity(weights, name=scope) -# TODO(roumposg): Delete once all heads support multi-dim input. -def _check_logits(logits, expected_logits_dimension): - """Check logits type and shape.""" - with ops.name_scope(None, 'logits', (logits,)) as scope: - logits = math_ops.to_float(logits) - logits_shape = array_ops.shape(logits) - assert_rank = check_ops.assert_rank( - logits, 2, data=[logits_shape], - message='logits shape must be [batch_size, logits_dimension]') - with ops.control_dependencies([assert_rank]): - static_shape = logits.shape - if static_shape is not None: - dim1 = static_shape[1] - if (dim1 is not None) and (dim1 != expected_logits_dimension): - raise ValueError( - 'logits shape must be [batch_size, logits_dimension], got %s.' % - (static_shape,)) - assert_dimension = check_ops.assert_equal( - expected_logits_dimension, logits_shape[1], data=[logits_shape], - message='logits shape must be [batch_size, logits_dimension]') - with ops.control_dependencies([assert_dimension]): - return array_ops.identity(logits, name=scope) - - def _check_logits_final_dim(logits, expected_logits_dimension): """Checks that logits shape is [D0, D1, ... DN, logits_dimension].""" with ops.name_scope(None, 'logits', (logits,)) as scope: @@ -575,10 +586,8 @@ class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head): labels=label_ids, logits=logits, reduction=losses.Reduction.NONE) # Restore the squeezed dim, so unweighted_loss matches the weights shape. unweighted_loss = array_ops.expand_dims(unweighted_loss, axis=-1) - weights = _weights(features, self._weight_column) - if self._weight_column is not None: - weights = _check_weights_match_logits_and_reshape( - weights=weights, logits=logits) + weights = _get_weights_and_check_match_logits( + features=features, weight_column=self._weight_column, logits=logits) weighted_sum_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) # _weights() can return 1. @@ -680,7 +689,7 @@ class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head): def _binary_logistic_head_with_sigmoid_cross_entropy_loss( weight_column=None, thresholds=None, label_vocabulary=None, name=None): - """Creates a `Head` for single label binary classification. + """Creates a `_Head` for single label binary classification. This head uses `sigmoid_cross_entropy_with_logits` loss. @@ -718,7 +727,7 @@ def _binary_logistic_head_with_sigmoid_cross_entropy_loss( suffixed by `"/" + name`. Also used as `name_scope` when creating ops. Returns: - An instance of `Head` for binary classification. + An instance of `_Head` for binary classification. Raises: ValueError: if `thresholds` contains a value outside of `(0, 1)`. @@ -852,10 +861,8 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head): labels = _assert_range(labels, 2) unweighted_loss = nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=logits) - weights = _weights(features, self._weight_column) - if self._weight_column is not None: - weights = _check_weights_match_logits_and_reshape( - weights=weights, logits=logits) + weights = _get_weights_and_check_match_logits( + features=features, weight_column=self._weight_column, logits=logits) weighted_sum_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) # _weights() can return 1. @@ -918,12 +925,8 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head): # Eval. if mode == model_fn.ModeKeys.EVAL: - weights = _weights(features, self._weight_column) - # TODO(roumposg): Merge this logic inside _weights once all heads - # support multi-dimensional inputs. - if self._weight_column is not None: - weights = _check_weights_match_logits_and_reshape( - weights=weights, logits=logits) + weights = _get_weights_and_check_match_logits( + features=features, weight_column=self._weight_column, logits=logits) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.EVAL, predictions=predictions, @@ -957,7 +960,7 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head): def _regression_head_with_mean_squared_error_loss(weight_column=None, label_dimension=1, name=None): - """Creates a `_Head` for regression using the mean squared loss. + """Creates a `_Head` for regression using the `mean_squared_error` loss. The loss is the weighted sum over all input dimensions. Namely, if the input labels have shape `[batch_size, label_dimension]`, the loss is the weighted @@ -1023,10 +1026,9 @@ class _RegressionHeadWithMeanSquaredErrorLoss(_Head): labels = math_ops.to_float(labels) unweighted_loss = losses.mean_squared_error( labels=labels, predictions=logits, reduction=losses.Reduction.NONE) - weights = _weights(features, self._weight_column) - if self._weight_column is not None: - weights = _check_weights_match_logits_and_reshape( - weights=weights, logits=logits) + weights = _get_weights_and_check_match_logits( + features=features, weight_column=self._weight_column, logits=logits, + allow_per_logit_weights=True) weighted_sum_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) # _weights() can return 1. @@ -1111,18 +1113,19 @@ class _RegressionHeadWithMeanSquaredErrorLoss(_Head): train_op=train_op_fn(weighted_sum_loss)) -def _assert_range(labels, n_classes): +def _assert_range(labels, n_classes, message=None): with ops.name_scope(None, 'assert_range', (labels,)): assert_less = check_ops.assert_less( labels, ops.convert_to_tensor(n_classes, dtype=labels.dtype), - message='Label IDs must < n_classes') + message=message or 'Label IDs must < n_classes') assert_greater = check_ops.assert_non_negative( - labels, message='Label IDs must >= 0') + labels, message=message or 'Label IDs must >= 0') with ops.control_dependencies((assert_less, assert_greater)): return array_ops.identity(labels) +# TODO(b/69000400): Delete this method. def _weights(features, weight_column): """Fetches weights from features.""" with ops.name_scope(None, 'weights', values=features.values()): diff --git a/tensorflow/python/estimator/canned/head_test.py b/tensorflow/python/estimator/canned/head_test.py index 0a4ea7d81c..4497cd26f2 100644 --- a/tensorflow/python/estimator/canned/head_test.py +++ b/tensorflow/python/estimator/canned/head_test.py @@ -987,12 +987,14 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase): spec.loss.eval() def test_multi_dim_train_weights_wrong_outer_dim(self): - """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2, 2].""" + """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2, 3].""" head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, weight_column='weights') logits = np.array([[[10, 0, 0], [12, 0, 0]], [[0, 10, 0], [0, 15, 0]]], dtype=np.float32) labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64) + weights = np.array([[[1., 1.1, 1.2], [1.5, 1.6, 1.7]], + [[2., 2.1, 2.2], [2.5, 2.6, 2.7]]]) weights_placeholder = array_ops.placeholder(dtype=dtypes.float32) def _no_op_train_fn(loss): del loss @@ -1008,10 +1010,8 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase): _initialize_variables(self, monitored_session.Scaffold()) with self.assertRaisesRegexp( errors.InvalidArgumentError, - r'\[logits_shape: \]\s\[2 2 3\]\s\[weights_shape: \]\s\[2 2 2\]'): - spec.loss.eval({ - weights_placeholder: np.array([[[1., 1.1], [1.5, 1.6]], - [[2., 2.1], [2.5, 2.6]]])}) + r'\[logits_shape: \]\s\[2 2 3\]\s\[weights_shape: \]\s\[2 2 3\]'): + spec.loss.eval({weights_placeholder: weights}) def test_multi_dim_weighted_eval(self): """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2].""" -- GitLab From 0dbb1ad1d53976050180fc2e2289d768e78e300f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 10:35:13 -0800 Subject: [PATCH 0226/1801] Allow a key type without a constructor that takes an int in Squawd. PiperOrigin-RevId: 175178089 --- .../lib/quantiles/weighted_quantiles_buffer.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_buffer.h b/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_buffer.h index 5e316538ce..70037d5bd8 100644 --- a/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_buffer.h +++ b/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_buffer.h @@ -33,9 +33,9 @@ template Date: Thu, 9 Nov 2017 12:01:53 -0800 Subject: [PATCH 0227/1801] Fix typo in tensorflow/python/layers/base_test.py COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/tensorflow/pull/14412 from yifeif:yifeif-patch-3 4b91380c6fc1f995d48a5f184e7307f776541bd0 PiperOrigin-RevId: 175192097 --- tensorflow/python/estimator/BUILD | 2 -- tensorflow/python/layers/base_test.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index dba7761700..03f386e9cf 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -245,8 +245,6 @@ py_test( "//tensorflow/python:variable_scope", "//tensorflow/python:variables", "//tensorflow/python/feature_column", - "//third_party/py/numpy", - "//third_party/py/pandas", "@six_archive//:six", ], ) diff --git a/tensorflow/python/layers/base_test.py b/tensorflow/python/layers/base_test.py index 7ddfe37827..509ad5a7af 100644 --- a/tensorflow/python/layers/base_test.py +++ b/tensorflow/python/layers/base_test.py @@ -47,7 +47,7 @@ class BaseLayerTest(test.TestCase): self.assertEqual(layer.trainable_variables, []) self.assertEqual(layer.non_trainable_variables, []) if context.in_graph_mode(): - # updates, losses only suppported in GRAPH mode + # updates, losses only supported in GRAPH mode self.assertEqual(layer.updates, []) self.assertEqual(layer.losses, []) self.assertEqual(layer.built, False) -- GitLab From 2ad408da9d78bbfc76374e1cd027b0e6932fd989 Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Thu, 9 Nov 2017 12:22:01 -0800 Subject: [PATCH 0228/1801] `replicate_model_fn` supports aggregating gradients in IndexedSlices. `gradients.gradients` may return computed gradients in IndexedSlices as opposed to a Tensor: https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/python/ops/gradients_impl.py#L881. `replicate_model_fn` currently uses math_ops.add_n to aggregate gradients from all towers. It doesn't work with IndexedSlices and thus needs to be handled separately. PiperOrigin-RevId: 175194893 --- .../python/estimator/replicate_model_fn.py | 25 +++++- .../estimator/replicate_model_fn_test.py | 87 +++++++++++++++++-- 2 files changed, 104 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py index 7005a647db..421bf18c45 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py @@ -34,10 +34,12 @@ from tensorflow.python.estimator import util from tensorflow.python.estimator.export import export_output as export_output_lib from tensorflow.python.framework import device as framework_device from tensorflow.python.framework import ops as ops_lib +from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gradients as gradients_lib from tensorflow.python.ops import math_ops +from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib @@ -183,10 +185,17 @@ def _split_batch(features, labels, number_of_shards, device): """Split input features and labes into batches.""" def split_dictionary(dictionary): + """Split a dictionary into shards.""" shards = [{} for _ in range(number_of_shards)] for name, tensor in six.iteritems(dictionary): - for i, shard in enumerate(array_ops.split(tensor, number_of_shards)): - shards[i][name] = shard + if isinstance(tensor, sparse_tensor.SparseTensor): + for i, shard in enumerate( + sparse_ops.sparse_split( + sp_input=tensor, num_split=number_of_shards, axis=0)): + shards[i][name] = shard + else: + for i, shard in enumerate(array_ops.split(tensor, number_of_shards)): + shards[i][name] = shard return shards with ops_lib.name_scope('split_inputs'): @@ -313,7 +322,17 @@ def _call_optimizer_fn(optimizer_fn, params): def _compute_sum_on_device(values, device, name=None): with ops_lib.device(device): - return math_ops.add_n(values, name=name) + if isinstance(values[0], ops_lib.IndexedSlices): + if name: + raise ValueError('The name {} is not expected to be given to ' + 'IndexedSlices {}'.format(name, values)) + + values_concat = array_ops.concat([v.values for v in values], axis=0) + indices_concat = array_ops.concat([v.indices for v in values], axis=0) + return ops_lib.IndexedSlices(values_concat, indices_concat, + values[0].dense_shape) + else: + return math_ops.add_n(values, name=name) def _train_spec(tower_specs, diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py index ce286c33b0..c90169af8c 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py @@ -65,20 +65,35 @@ class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase): data = np.linspace( 0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) x_data = data.reshape(batch_size, input_dimension) + categorical_data = np.random.random_integers( + 0, len(x_data), size=len(x_data)) y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1)) train_input_fn = numpy_io.numpy_input_fn( - x={'x': x_data}, + x={'x': x_data, + 'categories': categorical_data}, y=y_data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( - x={'x': x_data}, y=y_data, batch_size=batch_size, shuffle=False) + x={'x': x_data, + 'categories': categorical_data}, + y=y_data, + batch_size=batch_size, + shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( - x={'x': x_data}, batch_size=batch_size, shuffle=False) + x={'x': x_data, + 'categories': categorical_data}, + batch_size=batch_size, + shuffle=False) feature_columns = [ - feature_column.numeric_column('x', shape=(input_dimension,)) + feature_column.numeric_column('x', shape=(input_dimension,)), + feature_column.indicator_column( + feature_column.categorical_column_with_vocabulary_list( + 'categories', + vocabulary_list=np.linspace( + 0., len(x_data), len(x_data), dtype=np.int64))) ] estimator = dnn.DNNClassifier( @@ -858,7 +873,7 @@ class LocalDeviceSetterTest(test_util.TensorFlowTestCase): class ComputeSumWithDevicePlacementTest(test_util.TensorFlowTestCase): - def test_example(self): + def test_vectors(self): with self.test_session() as session: total = replicate_model_fn._compute_sum_on_device( [1.0, 2.0, 3.0, 4.0], device='/device:GPU:0', name='test_sum') @@ -867,6 +882,68 @@ class ComputeSumWithDevicePlacementTest(test_util.TensorFlowTestCase): self.assertEqual('test_sum', total.op.name) self.assertEqual(10.0, session.run(total)) + def test_tensors(self): + with self.test_session() as session: + total = replicate_model_fn._compute_sum_on_device( + [[1.0, 2.0], [3.0, 4.0]], device='/device:GPU:0', name='test_sum') + + self.assertEqual('/device:GPU:0', total.device) + self.assertEqual('test_sum', total.op.name) + self.assertAllEqual([4.0, 6.0], session.run(total)) + + def test_indexedslices(self): + with self.test_session() as session: + a = ops_lib.IndexedSlices( + constant_op.constant([1.0, 2.0]), [0, 1], + dense_shape=constant_op.constant([2])) + b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1]) + + total = replicate_model_fn._compute_sum_on_device( + [a, b], device='/device:GPU:0') + + self.assertEqual('/device:GPU:0', total.device) + self.assertAllEqual([4.0, 6.0], + session.run(ops_lib.convert_to_tensor(total))) + + def test_indexedslices_higher_dimensions(self): + with self.test_session() as session: + a = ops_lib.IndexedSlices( + constant_op.constant([[1.0, 5.0], [2.0, 6.0]]), [0, 1], + dense_shape=constant_op.constant([2, 4])) + b = ops_lib.IndexedSlices( + constant_op.constant([[3.0, 7.0], [4.0, 8.0]]), [0, 1]) + + total = replicate_model_fn._compute_sum_on_device( + [a, b], device='/device:GPU:0') + + self.assertEqual('/device:GPU:0', total.device) + self.assertAllEqual([[4.0, 12.0], [6.0, 14.0]], + session.run(ops_lib.convert_to_tensor(total))) + + def test_indexedslices_some_dont_overlap(self): + with self.test_session() as session: + a = ops_lib.IndexedSlices( + constant_op.constant([1.0, 2.0]), [0, 3], + dense_shape=constant_op.constant([4])) + b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1]) + + total = replicate_model_fn._compute_sum_on_device( + [a, b], device='/device:GPU:0') + + self.assertEqual('/device:GPU:0', total.device) + self.assertAllEqual([4.0, 4.0, 0.0, 2.0], + session.run(ops_lib.convert_to_tensor(total))) + + def test_no_name_for_indexslices(self): + a = ops_lib.IndexedSlices( + constant_op.constant([1.0, 2.0]), [0, 1], + dense_shape=constant_op.constant([2])) + b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1]) + + with self.assertRaisesRegexp(ValueError, ''): + _ = replicate_model_fn._compute_sum_on_device( + [a, b], device='/device:GPU:0', name='cant_name_indexslices') + class ConcatTensorDictsTest(test_util.TensorFlowTestCase): -- GitLab From fefb78752cceb499b1c65a80a26a132ae5538695 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Thu, 9 Nov 2017 12:24:28 -0800 Subject: [PATCH 0229/1801] Fix cmake build. PiperOrigin-RevId: 175195239 --- tensorflow/contrib/cmake/tf_c.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/contrib/cmake/tf_c.cmake b/tensorflow/contrib/cmake/tf_c.cmake index f3882e8cf7..3ae28b7601 100644 --- a/tensorflow/contrib/cmake/tf_c.cmake +++ b/tensorflow/contrib/cmake/tf_c.cmake @@ -21,7 +21,6 @@ set(tf_c_srcs "${tensorflow_source_dir}/tensorflow/c/c_api_function.cc" "${tensorflow_source_dir}/tensorflow/c/eager/c_api.cc" "${tensorflow_source_dir}/tensorflow/c/eager/c_api.h" - "${tensorflow_source_dir}/tensorflow/c/eager/tape.cc" "${tensorflow_source_dir}/tensorflow/c/eager/tape.h" "${tensorflow_source_dir}/tensorflow/c/eager/runtime.cc" "${tensorflow_source_dir}/tensorflow/c/eager/runtime.h" -- GitLab From d2e9b66f8ae2dd2709127e64274000df64d3c003 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Nov 2017 12:48:14 -0800 Subject: [PATCH 0230/1801] Use error status instead of assertions to ensure shape consistency PiperOrigin-RevId: 175198248 --- .../core/grappler/costs/graph_properties.cc | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index 151455778a..35048a4fcf 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -108,32 +108,35 @@ struct Processor { if (dim1 >= 0 && dim2 >= 0) { CHECK_EQ(dim1, dim2); - RefineDim(dim1, result); + return RefineDim(dim1, result); } else if (dim1 >= 0 && dim2 < 0) { - RefineDim(dim1, result); + return RefineDim(dim1, result); } else if (dim1 < 0 && dim2 >= 0) { - RefineDim(dim2, result); + return RefineDim(dim2, result); } else if (dim1 < -1) { - RefineDim(dim1, result); + return RefineDim(dim1, result); } else if (dim2 < -1) { - RefineDim(dim2, result); + return RefineDim(dim2, result); } else { CHECK_EQ(dim1, dim2); CHECK_EQ(-1, dim1); - RefineDim(-1, result); + return RefineDim(-1, result); } return Status::OK(); } private: - void RefineDim(int64 dim, int64* result) { + Status RefineDim(int64 dim, int64* result) { if (*result >= 0) { - CHECK(*result == dim || dim < 0); + if (!(*result == dim || dim < 0)) { + return errors::InvalidArgument("Inconsistent dimensions detected"); + } } else if (dim >= 0) { *result = dim; } else if (dim < *result) { *result = dim; } + return Status::OK(); } int64 counter = 2; -- GitLab From a928b0ca6450a5ebd903f690c80b480b0318c34c Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Thu, 9 Nov 2017 12:54:10 -0800 Subject: [PATCH 0231/1801] Disable tensorflow/contrib/data/python/kernel_tests:prefetching_ops_test. Flaky in open source build. PiperOrigin-RevId: 175199083 --- tensorflow/contrib/data/python/kernel_tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index c1f1d90c5d..d811683ecd 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -448,6 +448,7 @@ py_test( size = "small", srcs = ["prefetching_ops_test.py"], srcs_version = "PY2AND3", + tags = ["no_oss"], # b/68785503 deps = [ "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/contrib/data/python/ops:prefetching_py", -- GitLab From f23a1c0007c8c1af3288e4a8b40b8050a82787e8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 13:02:38 -0800 Subject: [PATCH 0232/1801] Add per-host input for multi-host setup. PiperOrigin-RevId: 175200199 --- .../contrib/tpu/python/tpu/tpu_config.py | 5 +- .../contrib/tpu/python/tpu/tpu_estimator.py | 111 ++++++++++-------- 2 files changed, 61 insertions(+), 55 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config.py b/tensorflow/contrib/tpu/python/tpu/tpu_config.py index 097acd5ee7..916b9b3082 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_config.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_config.py @@ -45,10 +45,7 @@ class TPUConfig( is invoked once on each host. To be precise, with a global batch size `train_batch_size` in `TPUEstimator` constructor, the batch size for each shard is `train_batch_size` // #hosts. With Per-Core input pipeline - deployment, the shard batch size is `train_batch_size` // #cores. Note - that this only works for single-host TPU training now (tracked in - b/67051042). For multi-host, please use Per-Core, i.e., `False` for - `per_host_input_for_training`. + deployment, the shard batch size is `train_batch_size` // #cores. tpu_job_name: The name of the TPU job. Typically, this name is auto-inferred within TPUEstimator, however when using ClusterSpec propagation in more esoteric cluster configurations, you may need to specify the job name as a diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 16d712af9e..07877fcc76 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -232,8 +232,10 @@ class _TPUContext(object): mode == model_fn_lib.ModeKeys.TRAIN else self._eval_batch_size) # On TPU - return (global_batch_size // self.num_cores - if self.is_input_sharded_per_core() else global_batch_size) + if self.is_input_sharded_per_core(): + return global_batch_size // self.num_cores + else: + return global_batch_size // self.num_hosts @property def batch_size_for_model_fn(self): @@ -682,6 +684,40 @@ def generate_per_core_enqueue_ops_fn_for_host( return enqueue_ops_fn, (lambda: infeed_queue_holder['instance']) +def generate_per_host_enqueue_ops_fn_for_host( + ctx, input_fn, inputs_structure_recorder, batch_axis, device): + """Generates infeed enqueue ops for per-host input_fn on a single host.""" + infeed_queue_holder = {'instance': None} + + def enqueue_ops_fn(): + with ops.device(device): + num_cores_per_host = ctx.num_of_cores_per_host + inputs = input_fn() + if isinstance(inputs, tuple): + features, labels = inputs + else: + features, labels = inputs, None + inputs_structure_recorder.validate_and_record_structure( + features, labels) + unsharded_tensor_list = ( + inputs_structure_recorder.flatten_features_and_labels( + features, labels)) + + infeed_queue = tpu_feed.InfeedQueue( + tuple_types=[t.dtype for t in unsharded_tensor_list], + tuple_shapes=[t.shape for t in unsharded_tensor_list], + shard_dimensions=batch_axis) + infeed_queue_holder['instance'] = infeed_queue + infeed_queue.set_number_of_shards(num_cores_per_host) + + per_host_enqueue_ops = ( + infeed_queue.split_inputs_and_generate_enqueue_ops( + unsharded_tensor_list, + placement_function=lambda x: device)) + return per_host_enqueue_ops + return enqueue_ops_fn, (lambda: infeed_queue_holder['instance']) + + class _InputPipeline(object): """`_InputPipeline` handles invoking `input_fn` and piping to infeed queue. @@ -856,15 +892,15 @@ class _InputPipeline(object): return (enqueue_ops, dequeue_fn) def _invoke_input_fn_and_record_structure(self): + """Deploys the input pipeline and record input structure.""" + enqueue_ops = [] + infeed_queues = [] + num_hosts = self._ctx.num_hosts + tpu_host_placement_fn = self._ctx.tpu_host_placement_function if self._sharded_per_core: # Per-Core input pipeline deployment. - tpu_host_placement_fn = self._ctx.tpu_host_placement_function - enqueue_ops = [] - infeed_queues = [] - # Invoke input pipeline for each core and placed on the corresponding # host. - num_hosts = self._ctx.num_hosts for host_id in range(num_hosts): host_device = tpu_host_placement_fn(host_id=host_id) with ops.device(host_device): @@ -881,48 +917,27 @@ class _InputPipeline(object): # Infeed_queue_getter must be called after enqueue_ops_fn is called. infeed_queues.append(infeed_queue_getter()) - # infeed_queue is used to generate dequeue ops. The only thing it uses for - # dequeue is dtypes and types. So, any one can be used. Here, grab the - # first one. - self._infeed_queue = infeed_queues[0] - return enqueue_ops - else: - # TODO(b/67051042): Extend this to multi-host support. - host_id = 0 - host_device = self._ctx.tpu_host_placement_function(host_id=host_id) - def enqueue_fn(): + for host_id in range(num_hosts): + host_device = tpu_host_placement_fn(host_id=host_id) with ops.device(host_device): with ops.name_scope('input_pipeline_task%d' % (host_id)): - inputs = self._input_fn() - if isinstance(inputs, tuple): - features, labels = inputs - else: - features, labels = inputs, None - self._inputs_structure_recorder.validate_and_record_structure( - features, labels) - unsharded_tensor_list = ( - self._inputs_structure_recorder.flatten_features_and_labels( - features, labels)) - - self._infeed_queue = tpu_feed.InfeedQueue( - tuple_types=[t.dtype for t in unsharded_tensor_list], - tuple_shapes=[t.shape for t in unsharded_tensor_list], - shard_dimensions=self._batch_axis) - self._infeed_queue.set_number_of_shards(self._ctx.num_cores) - - def placement_fn(core_id): - return self._ctx.tpu_host_placement_function(core_id=core_id) - return ( - self._infeed_queue.split_inputs_and_generate_enqueue_ops( - unsharded_tensor_list, - placement_function=placement_fn)) + enqueue_ops_fn, infeed_queue_getter = ( + generate_per_host_enqueue_ops_fn_for_host( + self._ctx, self._input_fn, self._inputs_structure_recorder, + self._batch_axis, host_device)) - if _WRAP_INPUT_FN_INTO_WHILE_LOOP: - return _wrap_computation_in_while_loop(device=host_device, - op_fn=enqueue_fn) - else: - return enqueue_fn() + if _WRAP_INPUT_FN_INTO_WHILE_LOOP: + enqueue_ops.append(_wrap_computation_in_while_loop( + device=host_device, op_fn=enqueue_ops_fn)) + else: + enqueue_ops.append(enqueue_ops_fn()) + infeed_queues.append(infeed_queue_getter()) + # infeed_queue is used to generate dequeue ops. The only thing it uses for + # dequeue is dtypes and types. So, any one can be used. Here, grab the + # first one. + self._infeed_queue = infeed_queues[0] + return enqueue_ops def _validate_input_pipeline(self): # Perform some sanity checks to log user friendly information. We should @@ -1425,12 +1440,6 @@ class TPUEstimator(estimator_lib.Estimator): 'eval batch size {} must be divisible by number of shards {}' .format(eval_batch_size, config.tpu_config.num_shards)) - if (config.tpu_config.num_shards > 8 and - config.tpu_config.per_host_input_for_training): - # TODO(b/67051042): Support per_host input pipelines when num_shards > 8 - raise NotImplementedError( - 'Per-host input pipelines only available for num_shards <= 8') - # Verifies the model_fn signature according to Estimator framework. estimator_lib._verify_model_fn_args(model_fn, params) # pylint: disable=protected-access # We cannot store config and params in this constructor as parent -- GitLab From aeb1d81c6a1e71d5ae5862f281e8f6f9770066a4 Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Thu, 9 Nov 2017 13:10:02 -0800 Subject: [PATCH 0233/1801] Disable flaky tests in replicate_model_fn_test.py. I suspect that reducing local variables for eval metrics over more than one tower is flaky, but I haven't figured out why yet. PiperOrigin-RevId: 175201241 --- .../estimator/replicate_model_fn_test.py | 108 +++++++++--------- 1 file changed, 55 insertions(+), 53 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py index c90169af8c..bb06700160 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py @@ -223,33 +223,34 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): features, labels, self.params) del estimator_spec - def test_eval(self): - features = np.array([[0.01], [0.002]]) - labels = np.array([[0.01], [0.02]]) - - with self.test_session() as session: - replicated_model_fn = replicate_model_fn.replicate_model_fn( - self.model_fn, self.optimizer_fn, devices=['/gpu:0', '/gpu:1']) - estimator_spec = replicated_model_fn(model_fn_lib.ModeKeys.EVAL, features, - labels, self.params) - session.run(variables.local_variables_initializer()) - session.run(variables.global_variables_initializer()) - - accuracy, a = estimator_spec.eval_metric_ops['accuracy'] - auc, b = estimator_spec.eval_metric_ops['auc'] - - session.run([a, b]) - accuracy = session.run(accuracy) - auc = session.run(auc) - - # Accuracy is 0.0 (no match) in the first tower. - # Accuracy is 1.0 (match) in the second tower, since the feature - # times weight "c" happened to be equal to the label. - total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) - - self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01) - self.assertEqual(0, auc) - self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01) +# TODO(isaprykin): Resolve the source of flakinness. +# def test_eval(self): +# features = np.array([[0.01], [0.002]]) +# labels = np.array([[0.01], [0.02]]) +# +# with self.test_session() as session: +# replicated_model_fn = replicate_model_fn.replicate_model_fn( +# self.model_fn, self.optimizer_fn, devices=['/gpu:0', '/gpu:1']) +# estimator_spec = replicated_model_fn(model_fn_lib.ModeKeys.EVAL, features, +# labels, self.params) +# session.run(variables.local_variables_initializer()) +# session.run(variables.global_variables_initializer()) +# +# accuracy, a = estimator_spec.eval_metric_ops['accuracy'] +# auc, b = estimator_spec.eval_metric_ops['auc'] +# +# session.run([a, b]) +# accuracy = session.run(accuracy) +# auc = session.run(auc) +# +# # Accuracy is 0.0 (no match) in the first tower. +# # Accuracy is 1.0 (match) in the second tower, since the feature +# # times weight "c" happened to be equal to the label. +# total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) +# +# self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01) +# self.assertEqual(0, auc) +# self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01) def test_predict(self): features = np.array([[0.01], [0.002]]) @@ -523,32 +524,33 @@ class EvalSpecTest(test_util.TensorFlowTestCase): } return metrics - def test_example(self): - with self.test_session() as session: - tower_losses = map(self.create_constant_loss, [2, 4, 6]) - tower_metrics = map(self.create_eval_metrics, [0, 0.2, 0.3]) - tower_specs = [ - self.create_estimator_spec(l, m) - for l, m in zip(tower_losses, tower_metrics) - ] - session.run(variables.local_variables_initializer()) - - estimator_spec = replicate_model_fn._eval_spec( - tower_specs, aggregation_device='/device:GPU:0') - - accuracy, a = estimator_spec.eval_metric_ops['accuracy'] - auc, b = estimator_spec.eval_metric_ops['auc'] - - self.assertEqual('/device:CPU:0', accuracy.device) - self.assertEqual('/device:CPU:0', auc.device) - - session.run([a, b]) - accuracy = session.run(accuracy) - auc = session.run(auc) - - self.assertNear((12 - 2) / 12, accuracy, 0.01) - self.assertEqual(0, auc) - self.assertEqual(2 + 4 + 6, session.run(estimator_spec.loss)) +# TODO(isaprykin): Resolve the source of flakinness. +# def test_example(self): +# with self.test_session() as session: +# tower_losses = map(self.create_constant_loss, [2, 4, 6]) +# tower_metrics = map(self.create_eval_metrics, [0, 0.2, 0.3]) +# tower_specs = [ +# self.create_estimator_spec(l, m) +# for l, m in zip(tower_losses, tower_metrics) +# ] +# session.run(variables.local_variables_initializer()) +# +# estimator_spec = replicate_model_fn._eval_spec( +# tower_specs, aggregation_device='/device:GPU:0') +# +# accuracy, a = estimator_spec.eval_metric_ops['accuracy'] +# auc, b = estimator_spec.eval_metric_ops['auc'] +# +# self.assertEqual('/device:CPU:0', accuracy.device) +# self.assertEqual('/device:CPU:0', auc.device) +# +# session.run([a, b]) +# accuracy = session.run(accuracy) +# auc = session.run(auc) +# +# self.assertNear((12 - 2) / 12, accuracy, 0.01) +# self.assertEqual(0, auc) +# self.assertEqual(2 + 4 + 6, session.run(estimator_spec.loss)) def test_handles_single_tower(self): with self.test_session() as session: -- GitLab From f2fd274114fd9a6f01439d5f2a2d3ddd2bd3c3f9 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 9 Nov 2017 13:26:45 -0800 Subject: [PATCH 0234/1801] EagerVariableStore.trainable_variables() PiperOrigin-RevId: 175203593 --- .../python/kernel_tests/variable_scope_test.py | 12 ++++++++++++ tensorflow/python/ops/variable_scope.py | 5 +++++ 2 files changed, 17 insertions(+) diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py index bd4b12b7e8..5396214956 100644 --- a/tensorflow/python/kernel_tests/variable_scope_test.py +++ b/tensorflow/python/kernel_tests/variable_scope_test.py @@ -117,6 +117,18 @@ class VariableScopeTest(test.TestCase): w = variable_scope.get_variable("w", []) self.assertEqual(w.dtype.base_dtype, dtypes.float16) + def testEagerVaribleStore(self): + with context.eager_mode(): + store = variable_scope.EagerVariableStore() + with store.as_default(): + v = variable_scope.get_variable("v", shape=(), trainable=True) + w = variable_scope.get_variable("w", shape=(), trainable=False) + + self.assertTrue(v in store.variables()) + self.assertTrue(w in store.variables()) + self.assertTrue(v in store.trainable_variables()) + self.assertFalse(w in store.trainable_variables()) + @test_util.run_in_graph_and_eager_modes() def testInitFromNonTensorValue(self): v = variable_scope.get_variable("v4", initializer=4, dtype=dtypes.int32) diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index 92fa928eed..9a0ff75594 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -1227,6 +1227,11 @@ class EagerVariableStore(object): def variables(self): return self._store._vars.values() # pylint: disable=protected-access + def trainable_variables(self): + # pylint: disable=protected-access + return [x for x in self._store._vars.values() if x._trainable] + # pylint: enable=protected-access + def get_variable(name, shape=None, -- GitLab From 09698c9c268d341c4fd688d4f7f81dcd37f65261 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 13:29:56 -0800 Subject: [PATCH 0235/1801] Modify quantization to support add ops that occur after all quantizable types, not just Conv2D. PiperOrigin-RevId: 175204002 --- .../contrib/quantize/python/quantize.py | 4 +-- .../contrib/quantize/python/quantize_test.py | 25 +++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index 6382d3f7b4..7db2d863aa 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -89,8 +89,8 @@ def Quantize(graph, op.name[:-len('/depthwise')]) if separable_conv and separable_conv.type == 'Conv2D': continue - if op.type == 'Conv2D': - # Quantize add ops that come after Conv2D + # Quantize add ops that come after Conv2D or DepthwiseConv2dNative. + if op.type in ['Conv2D', 'DepthwiseConv2dNative']: add_context_re = re.search(r'^(.*)/[^/]+/', op.name) if add_context_re is not None: context.add_contexts.add(add_context_re.group(1)) diff --git a/tensorflow/contrib/quantize/python/quantize_test.py b/tensorflow/contrib/quantize/python/quantize_test.py index eb141a21bd..1e4dd7cf67 100644 --- a/tensorflow/contrib/quantize/python/quantize_test.py +++ b/tensorflow/contrib/quantize/python/quantize_test.py @@ -30,6 +30,7 @@ from tensorflow.python.ops import nn_ops from tensorflow.python.platform import googletest conv2d = layers.conv2d +separable_conv2d = layers.separable_conv2d class QuantizeTest(test_util.TensorFlowTestCase): @@ -77,6 +78,30 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantization_node_name) self.assertEqual(add_quant.type, quantization_node_name) + def testInsertQuantOpForAddAfterSeparableConv2d(self): + graph = ops.Graph() + with graph.as_default(): + batch_size, height, width, depth = 5, 128, 128, 3 + input1 = array_ops.zeros((batch_size, height, width, depth)) + input2 = array_ops.zeros((batch_size, height / 2, width / 2, depth)) + conv = separable_conv2d(input1, None, [5, 5], stride=2, + depth_multiplier=1.0, padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=None, scope='test/test') + node = math_ops.add(conv, input2, name='test/add') + node = array_ops.identity(node, name='test/identity') + update_barrier = control_flow_ops.no_op(name='update_barrier') + with ops.control_dependencies([update_barrier]): + array_ops.identity(node, name='control_dependency') + + quantize.Quantize(graph=graph, weight_bits=8, weight_narrow_range=True, + activation_bits=8) + + quantization_node_name = 'FakeQuantWithMinMaxVars' + add_quant = graph.get_operation_by_name('test/add_quant/' + + quantization_node_name) + self.assertEqual(add_quant.type, quantization_node_name) + def _WeightInit(self, stddev): """Returns truncated normal variable initializer. -- GitLab From 2414d38956bb33ba3b70a4258522976d0abe20c0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 13:30:25 -0800 Subject: [PATCH 0236/1801] Fix GANEstimator docstring. PiperOrigin-RevId: 175204075 --- .../contrib/gan/python/estimator/python/gan_estimator_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py index e89993991a..0824ecf616 100644 --- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py @@ -76,7 +76,7 @@ class GANEstimator(estimator.Estimator): return logits # Create GAN estimator. - gan_estimator = estimator.GANEstimator( + gan_estimator = tfgan.estimator.GANEstimator( model_dir, generator_fn=generator_fn, discriminator_fn=discriminator_fn, -- GitLab From 0a39ea335d8a4d911eda6903aba6e498808c675d Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Thu, 9 Nov 2017 13:42:15 -0800 Subject: [PATCH 0237/1801] eager: README title tweak. PiperOrigin-RevId: 175205782 --- tensorflow/contrib/eager/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/eager/README.md b/tensorflow/contrib/eager/README.md index ae4b07799f..dcc370cd00 100644 --- a/tensorflow/contrib/eager/README.md +++ b/tensorflow/contrib/eager/README.md @@ -1,4 +1,4 @@ -# TensorFlow Eager Execution +# Eager Execution > *WARNING*: This is a preview/pre-alpha version. The API and performance > characteristics are subject to change. -- GitLab From de5d8eb503234a2bcce5141b564337feb26928ef Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 13:49:12 -0800 Subject: [PATCH 0238/1801] Fix bug reported in b/69059093, by skipping rewrites that we can determine have already been applied. Make sure rewrites are idempotent by running the optimizer twice in unit tests. PiperOrigin-RevId: 175206742 --- .../optimizers/arithmetic_optimizer.cc | 12 +-- .../optimizers/arithmetic_optimizer_test.cc | 94 ++++++++++++++++++- 2 files changed, 96 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 14df3caebb..44d16e5a42 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -833,8 +833,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( } } - if (node->input_size() > 0 && IsAggregate(*node) && - !node_map->GetOutputs(node->name()).empty()) { + if (node->input_size() > 0 && IsAggregate(*node)) { // Discard aggregate nodes with a single input. if (node->input_size() == 1) { return node->input(0); @@ -855,7 +854,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( break; } } - if (all_equal) { + if (all_equal && node_map->GetNode(node->name() + "_const") == nullptr) { // 1. Create constant node with value N. const int N = node->input_size(); const auto type = GetDataTypeFromAttr(*node, "T"); @@ -898,7 +897,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( // where all the inputs are Mul nodes. This pattern occurs frequently in // regularization terms for the gradients during training. if (node->input_size() > 1 && IsAggregate(*node) && - !node_map->GetOutputs(node->name()).empty()) { + node_map->GetNode(node->name() + "_hoist") == nullptr) { // Determine the set of common factors if the input nodes are all Mul nodes. std::set common_factors; int i = 0; @@ -1011,8 +1010,9 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( } // Fold Conj into Transpose or ConjugateTranspose. - if (node->op() == "Conj" || node->op() == "Transpose" || - node->op() == "ConjugateTranspose") { + if ((node->op() == "Conj" || node->op() == "Transpose" || + node->op() == "ConjugateTranspose") && + node_map->GetNode(node->name() + "_fused") == nullptr) { const NodeDef* input = node_map->GetNode(node->input(0)); const NodeDef* transpose_op = node->op() == "Conj" ? input : node; const NodeDef* conj_op = node->op() == "Conj" ? node : input; diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 9f471302c7..60fb47f51a 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -38,8 +38,8 @@ TEST_F(ArithmeticOptimizerTest, NoOp) { ArithmeticOptimizer optimizer; GraphDef output; - Status s = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(s); + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); EXPECT_EQ(item.graph.node_size(), output.node_size()); for (int i = 0; i < item.graph.node_size(); ++i) { @@ -66,6 +66,10 @@ TEST_F(ArithmeticOptimizerTest, OpDedupping) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); EXPECT_EQ(2, output.node_size()); const NodeDef& new_c1 = output.node(0); @@ -91,6 +95,10 @@ TEST_F(ArithmeticOptimizerTest, OpDedupCommutative) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); EXPECT_EQ(4, output.node_size()); const NodeDef& new_c1 = output.node(0); @@ -146,13 +154,17 @@ TEST_F(ArithmeticOptimizerTest, SimplifyInvolutionsWithChain) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); EXPECT_EQ(6, output.node_size()); EXPECT_EQ("squeeze", output.node(5).input(0)); EXPECT_EQ("c", output.node(2).input(0)); } -TEST_F(ArithmeticOptimizerTest, SimplifyReplaceTrivialSums) { +TEST_F(ArithmeticOptimizerTest, TrivialSumsSimple) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2}); Output add = ops::Add(s.WithOpName("add"), x, x); @@ -165,6 +177,10 @@ TEST_F(ArithmeticOptimizerTest, SimplifyReplaceTrivialSums) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); EXPECT_EQ(5, output.node_size()); const NodeDef& new_const = output.node(3); @@ -178,7 +194,61 @@ TEST_F(ArithmeticOptimizerTest, SimplifyReplaceTrivialSums) { EXPECT_EQ("add_mul", new_id.input(0)); } -TEST_F(ArithmeticOptimizerTest, SimplifyHoistFactor) { +TEST_F(ArithmeticOptimizerTest, TrivialSumsRepeatedAdd) { + // Test case from b/69059093. + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output p = ops::Placeholder(s, DT_FLOAT, ops::Placeholder::Shape({10, 10})); + Output add = ops::Add(s.WithOpName("Add"), p, p); + Output add1 = ops::Add(s.WithOpName("Add_1"), p, p); + Output add4 = ops::Add(s.WithOpName("Add_4"), add, add1); + Output add5 = ops::Add(s.WithOpName("Add_5"), add, add1); + Output add6 = ops::Add(s.WithOpName("Add_6"), add4, add5); + Output id = ops::Identity(s.WithOpName("id"), add6); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + ArithmeticOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + EXPECT_EQ(11, output.node_size()); + const NodeDef& new_id = output.node(4); + EXPECT_EQ("id", new_id.name()); + EXPECT_EQ("Add_6_mul", new_id.input(0)); + + // Add4 and add5 get deduped, and we rewrite each of the 3 remaining add nodes + // of the form Add(x,x) into Mul(Const(2), x). + const NodeDef& new_add_4_const = output.node(5); + EXPECT_EQ("Add_4_const", new_add_4_const.name()); + EXPECT_EQ("^Add", new_add_4_const.input(0)); + const NodeDef& new_add_4_mul = output.node(6); + EXPECT_EQ("Add_4_mul", new_add_4_mul.name()); + EXPECT_EQ("Add_4_const", new_add_4_mul.input(0)); + EXPECT_EQ("Add_mul", new_add_4_mul.input(1)); + + const NodeDef& new_add_6_const = output.node(7); + EXPECT_EQ("Add_6_const", new_add_6_const.name()); + EXPECT_EQ("^Add_4_mul", new_add_6_const.input(0)); + const NodeDef& new_add_6_mul = output.node(8); + EXPECT_EQ("Add_6_mul", new_add_6_mul.name()); + EXPECT_EQ("Add_6_const", new_add_6_mul.input(0)); + EXPECT_EQ("Add_4_mul", new_add_6_mul.input(1)); + + const NodeDef& new_add_const = output.node(9); + EXPECT_EQ("Add_const", new_add_const.name()); + EXPECT_EQ("^Placeholder", new_add_const.input(0)); + const NodeDef& new_add_mul = output.node(10); + EXPECT_EQ("Add_mul", new_add_mul.name()); + EXPECT_EQ("Add_const", new_add_mul.input(0)); + EXPECT_EQ("Placeholder", new_add_mul.input(1)); +} + +TEST_F(ArithmeticOptimizerTest, HoistFactor) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2}); Output y1 = ops::Const(s.WithOpName("y1"), {3.0f, 4.0f}, {1, 2}); @@ -195,6 +265,10 @@ TEST_F(ArithmeticOptimizerTest, SimplifyHoistFactor) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); EXPECT_EQ(9, output.node_size()); const NodeDef& new_add = output.node(8); @@ -225,6 +299,10 @@ TEST_F(ArithmeticOptimizerTest, FuseConjAndTranspose) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); EXPECT_EQ(7, output.node_size()); EXPECT_EQ("trans_fused", output.node(6).name()); @@ -272,6 +350,10 @@ TEST_F(ArithmeticOptimizerTest, FuseTransposeAndConj) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); EXPECT_EQ(7, output.node_size()); EXPECT_EQ("conj_fused", output.node(6).name()); @@ -304,6 +386,10 @@ TEST_F(ArithmeticOptimizerTest, FoldTransposeIntoMatMul) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); EXPECT_EQ(7, output.node_size()); EXPECT_EQ("matmul_fused", output.node(6).name()); -- GitLab From f6ba27c7355c156a840b5902acfa88cbb304a84e Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 9 Nov 2017 13:56:17 -0800 Subject: [PATCH 0239/1801] Instances per second in the eager microbenchmarks. PiperOrigin-RevId: 175207829 --- tensorflow/python/eager/benchmarks_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 26a70a617d..b555f16f1d 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -66,7 +66,8 @@ class MicroBenchmarks(test.Benchmark): func() end = time.time() mean_us = (end - start) * 1e6 / num_iters - self.report_benchmark(iters=num_iters, wall_time=mean_us) + self.report_benchmark(iters=num_iters, wall_time=mean_us, + extras={"examples_per_sec": num_iters/(end-start)}) def benchmark_create_np_array(self): func = lambda: np.array([3.0]) -- GitLab From 9fe7c29605a5ee1519cceba8e67a6d8413444fac Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Thu, 9 Nov 2017 14:10:11 -0800 Subject: [PATCH 0240/1801] Limit internal fragmentation in BFCAllocator to 128mb per allocation. Previously, if you had a very large allocation, it would round up to the next power of 2, and then, if this didn't fit in your GPU's available memory, eat all remaining memory in the device. Now we waste at most 128mb of memory in a large alloc. PiperOrigin-RevId: 175209995 --- tensorflow/core/common_runtime/bfc_allocator.cc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc index 38fe247521..6399b8cf55 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.cc +++ b/tensorflow/core/common_runtime/bfc_allocator.cc @@ -296,12 +296,13 @@ void* BFCAllocator::FindChunkPtr(BinNum bin_num, size_t rounded_bytes, // it from the free bin structure prior to using. RemoveFreeChunkIterFromBin(&b->free_chunks, citer); - // If we can break the size of the chunk into two reasonably - // large pieces, do so. - // - // TODO(vrv): What should be the criteria when deciding when - // to split? - if (chunk->size >= rounded_bytes * 2) { + // If we can break the size of the chunk into two reasonably large + // pieces, do so. In any case don't waste more than + // kMaxInternalFragmentation bytes on padding this alloc. + const int64 kMaxInternalFragmentation = 128 << 20; // 128mb + if (chunk->size >= rounded_bytes * 2 || + static_cast(chunk->size) - rounded_bytes >= + kMaxInternalFragmentation) { SplitChunk(h, rounded_bytes); chunk = ChunkFromHandle(h); // Update chunk pointer in case it moved } -- GitLab From eadc9f824cef2fbad12675b035721f88458bb1e5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 14:14:30 -0800 Subject: [PATCH 0241/1801] Expose padded_batch_and_drop_remainder PiperOrigin-RevId: 175210678 --- tensorflow/contrib/data/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index 824ac4298f..6e43ae0e63 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -23,6 +23,7 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@TextLineDataset @@batch_and_drop_remainder +@@padded_batch_and_drop_remainder @@dense_to_sparse_batch @@enumerate_dataset @@group_by_window @@ -45,6 +46,7 @@ from __future__ import print_function from tensorflow.contrib.data.python.ops.batching import batch_and_drop_remainder from tensorflow.contrib.data.python.ops.batching import dense_to_sparse_batch +from tensorflow.contrib.data.python.ops.batching import padded_batch_and_drop_remainder from tensorflow.contrib.data.python.ops.batching import unbatch from tensorflow.contrib.data.python.ops.dataset_ops import Dataset from tensorflow.contrib.data.python.ops.dataset_ops import get_single_element -- GitLab From d60f6513232bd49b658c188c0597dd119e9a52d8 Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Thu, 9 Nov 2017 14:21:25 -0800 Subject: [PATCH 0242/1801] Add tf.nn.softmax_cross_entropy_with_logits_v2 which enables backprop wrt the labels. Clarify current backprop behavior. Original bugfix by Alexandre Passos. PiperOrigin-RevId: 175211803 --- .../python/kernel_tests/xent_op_test.py | 18 ++ tensorflow/python/ops/nn.py | 1 + tensorflow/python/ops/nn_grad.py | 5 +- tensorflow/python/ops/nn_ops.py | 164 +++++++++++++----- .../tools/api/golden/tensorflow.nn.pbtxt | 4 + 5 files changed, 143 insertions(+), 49 deletions(-) diff --git a/tensorflow/python/kernel_tests/xent_op_test.py b/tensorflow/python/kernel_tests/xent_op_test.py index 4b3dadc112..43be08f8a1 100644 --- a/tensorflow/python/kernel_tests/xent_op_test.py +++ b/tensorflow/python/kernel_tests/xent_op_test.py @@ -181,6 +181,24 @@ class XentTest(test.TestCase): print("cross entropy gradient err = ", err) self.assertLess(err, 5e-8) + def testGradientLabelWithV2(self): + with self.test_session(): + l = constant_op.constant( + [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.5], + shape=[3, 4], + dtype=dtypes.float64, + name="l") + f = constant_op.constant( + [0.1, 0.2, 0.3, 0.4, 0.1, 0.4, 0.9, 1.6, 0.1, 0.8, 2.7, 6.4], + shape=[3, 4], + dtype=dtypes.float64, + name="f") + x = nn_ops.softmax_cross_entropy_with_logits_v2(labels=l, logits=f, + name="xent") + err = gradient_checker.compute_gradient_error(l, [3, 4], x, [3]) + + self.assertLess(err, 5e-8) + def testSecondGradient(self): with self.test_session() as sess: l = constant_op.constant([0.0, 0.0, 1.0/3, 0.0, diff --git a/tensorflow/python/ops/nn.py b/tensorflow/python/ops/nn.py index 79af3ac117..ee1a00623a 100644 --- a/tensorflow/python/ops/nn.py +++ b/tensorflow/python/ops/nn.py @@ -74,6 +74,7 @@ See the @{$python/nn} guide. @@softmax @@log_softmax @@softmax_cross_entropy_with_logits +@@softmax_cross_entropy_with_logits_v2 @@sparse_softmax_cross_entropy_with_logits @@weighted_cross_entropy_with_logits @@embedding_lookup diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index 557f39fb42..4b406ba840 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -420,7 +420,6 @@ def _SoftmaxCrossEntropyWithLogitsGrad(op, grad_loss, grad_grad): # grad_loss is the backprop for cost, and we multiply it with the gradients # (which is output[1]) # grad_grad is the backprop for softmax gradient. - # There is no gradient for the labels # # Second derivative is just softmax derivative w.r.t. logits. softmax_grad = op.outputs[1] @@ -436,15 +435,15 @@ def _SoftmaxCrossEntropyWithLogitsGrad(op, grad_loss, grad_grad): const_fill_value = tensor_util.constant_value(g) return const_fill_value is not None and (const_fill_value == 0).all() + logits = op.inputs[0] if grad_grad is not None and not IsZero(grad_grad): - logits = op.inputs[0] softmax = nn_ops.softmax(logits) grad += ((grad_grad - array_ops.squeeze( math_ops.matmul(grad_grad[:, None, :], softmax[:, :, None]), axis=1)) * softmax) - return grad, None + return grad, _BroadcastMul(grad_loss, -nn_ops.log_softmax(logits)) @ops.RegisterGradient("SparseSoftmaxCrossEntropyWithLogits") diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index a37b68c6fa..bdaac65904 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -32,11 +32,13 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops + # go/tf-wildcard-import # pylint: disable=wildcard-import from tensorflow.python.ops.gen_nn_ops import * # pylint: enable=wildcard-import +from tensorflow.python.util import deprecation # Aliases for some automatically-generated names. local_response_normalization = gen_nn_ops.lrn @@ -1700,9 +1702,9 @@ def _ensure_xent_args(name, sentinel, labels, logits): raise ValueError("Both labels and logits must be provided.") -def softmax_cross_entropy_with_logits(_sentinel=None, # pylint: disable=invalid-name - labels=None, logits=None, - dim=-1, name=None): +def softmax_cross_entropy_with_logits_v2(_sentinel=None, # pylint: disable=invalid-name + labels=None, logits=None, + dim=-1, name=None): """Computes softmax cross entropy between `logits` and `labels`. Measures the probability error in discrete classification tasks in which the @@ -1726,6 +1728,10 @@ def softmax_cross_entropy_with_logits(_sentinel=None, # pylint: disable=invalid `[batch_size, num_classes]` and the same dtype (either `float16`, `float32`, or `float64`). + Backpropagation will happen into both `logits` and `labels`. To disallow + backpropagation into `labels`, pass label tensors through a `stop_gradients` + before feeding it to this function. + **Note that to avoid confusion, it is required to pass only named arguments to this function.** @@ -1747,57 +1753,123 @@ def softmax_cross_entropy_with_logits(_sentinel=None, # pylint: disable=invalid # could break users who call this with bad labels, but disregard the bad # results. - logits = ops.convert_to_tensor(logits) - labels = ops.convert_to_tensor(labels) - precise_logits = math_ops.cast(logits, dtypes.float32) if ( - logits.dtype == dtypes.float16) else logits - # labels and logits must be of the same type - labels = math_ops.cast(labels, precise_logits.dtype) - input_rank = array_ops.rank(precise_logits) - # For shape inference. - shape = logits.get_shape() + with ops.name_scope( + name, "softmax_cross_entropy_with_logits", [logits, labels]) as name: + logits = ops.convert_to_tensor(logits, name="logits") + labels = ops.convert_to_tensor(labels, name="labels") + precise_logits = math_ops.cast(logits, dtypes.float32) if ( + logits.dtype == dtypes.float16) else logits + # labels and logits must be of the same type + labels = math_ops.cast(labels, precise_logits.dtype) + input_rank = array_ops.rank(precise_logits) + # For shape inference. + shape = logits.get_shape() - # Move the dim to the end if dim is not the last dimension. - if dim is not -1: - def _move_dim_to_end(tensor, dim_index, rank): - return array_ops.transpose(tensor, - array_ops.concat([ - math_ops.range(dim_index), - math_ops.range(dim_index + 1, rank), - [dim_index] - ], 0)) + # Move the dim to the end if dim is not the last dimension. + if dim is not -1: + def _move_dim_to_end(tensor, dim_index, rank): + return array_ops.transpose(tensor, + array_ops.concat([ + math_ops.range(dim_index), + math_ops.range(dim_index + 1, rank), + [dim_index] + ], 0)) - precise_logits = _move_dim_to_end(precise_logits, dim, input_rank) - labels = _move_dim_to_end(labels, dim, input_rank) + precise_logits = _move_dim_to_end(precise_logits, dim, input_rank) + labels = _move_dim_to_end(labels, dim, input_rank) - input_shape = array_ops.shape(precise_logits) + input_shape = array_ops.shape(precise_logits) - # Make precise_logits and labels into matrices. - precise_logits = _flatten_outer_dims(precise_logits) - labels = _flatten_outer_dims(labels) + # Make precise_logits and labels into matrices. + precise_logits = _flatten_outer_dims(precise_logits) + labels = _flatten_outer_dims(labels) - # Do the actual op computation. - # The second output tensor contains the gradients. We use it in - # _CrossEntropyGrad() in nn_grad but not here. - cost, unused_backprop = gen_nn_ops._softmax_cross_entropy_with_logits( - precise_logits, labels, name=name) + # Do the actual op computation. + # The second output tensor contains the gradients. We use it in + # _CrossEntropyGrad() in nn_grad but not here. + cost, unused_backprop = gen_nn_ops._softmax_cross_entropy_with_logits( + precise_logits, labels, name=name) - # The output cost shape should be the input minus dim. - output_shape = array_ops.slice(input_shape, [0], - [math_ops.subtract(input_rank, 1)]) - cost = array_ops.reshape(cost, output_shape) + # The output cost shape should be the input minus dim. + output_shape = array_ops.slice(input_shape, [0], + [math_ops.subtract(input_rank, 1)]) + cost = array_ops.reshape(cost, output_shape) - # Make shape inference work since reshape and transpose may erase its static - # shape. - if context.in_graph_mode() and shape is not None and shape.dims is not None: - shape = shape.as_list() - del shape[dim] - cost.set_shape(shape) + # Make shape inference work since reshape and transpose may erase its static + # shape. + if context.in_graph_mode() and shape is not None and shape.dims is not None: + shape = shape.as_list() + del shape[dim] + cost.set_shape(shape) - if logits.dtype == dtypes.float16: - return math_ops.cast(cost, dtypes.float16) - else: - return cost + if logits.dtype == dtypes.float16: + return math_ops.cast(cost, dtypes.float16) + else: + return cost + + +_XENT_DEPRECATION = """ +Future major versions of TensorFlow will allow gradients to flow +into the labels input on backprop by default. + +See tf.nn.softmax_cross_entropy_with_logits_v2. +""" + + +@deprecation.deprecated(date=None, instructions=_XENT_DEPRECATION) +def softmax_cross_entropy_with_logits(_sentinel=None, # pylint: disable=invalid-name + labels=None, logits=None, + dim=-1, name=None): + """Computes softmax cross entropy between `logits` and `labels`. + + Measures the probability error in discrete classification tasks in which the + classes are mutually exclusive (each entry is in exactly one class). For + example, each CIFAR-10 image is labeled with one and only one label: an image + can be a dog or a truck, but not both. + + **NOTE:** While the classes are mutually exclusive, their probabilities + need not be. All that is required is that each row of `labels` is + a valid probability distribution. If they are not, the computation of the + gradient will be incorrect. + + If using exclusive `labels` (wherein one and only + one class is true at a time), see `sparse_softmax_cross_entropy_with_logits`. + + **WARNING:** This op expects unscaled logits, since it performs a `softmax` + on `logits` internally for efficiency. Do not call this op with the + output of `softmax`, as it will produce incorrect results. + + `logits` and `labels` must have the same shape, e.g. + `[batch_size, num_classes]` and the same dtype (either `float16`, `float32`, + or `float64`). + + Backpropagation will happen only into `logits`. To calculate a cross entropy + loss that allows backpropagation into both `logits` and `labels`, see + @{tf.nn.softmax_cross_entropy_with_logits_v2}. + + **Note that to avoid confusion, it is required to pass only named arguments to + this function.** + + Args: + _sentinel: Used to prevent positional parameters. Internal, do not use. + labels: Each row `labels[i]` must be a valid probability distribution. + logits: Unscaled log probabilities. + dim: The class dimension. Defaulted to -1 which is the last dimension. + name: A name for the operation (optional). + + Returns: + A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the + softmax cross entropy loss. + """ + _ensure_xent_args("softmax_cross_entropy_with_logits", _sentinel, + labels, logits) + + with ops.name_scope( + name, "softmax_cross_entropy_with_logits_sg", [logits, labels]) as name: + labels = array_ops.stop_gradient(labels, name="labels_stop_gradient") + + return softmax_cross_entropy_with_logits_v2( + labels=labels, logits=logits, dim=dim, name=name) def sparse_softmax_cross_entropy_with_logits(_sentinel=None, # pylint: disable=invalid-name diff --git a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt index 11637814a6..24c0448dea 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt @@ -288,6 +288,10 @@ tf_module { name: "softmax_cross_entropy_with_logits" argspec: "args=[\'_sentinel\', \'labels\', \'logits\', \'dim\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'-1\', \'None\'], " } + member_method { + name: "softmax_cross_entropy_with_logits_v2" + argspec: "args=[\'_sentinel\', \'labels\', \'logits\', \'dim\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'-1\', \'None\'], " + } member_method { name: "softplus" argspec: "args=[\'features\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " -- GitLab From 2bb46f6376c35ec86279e80a24dc06b068b41556 Mon Sep 17 00:00:00 2001 From: Anna R Date: Thu, 9 Nov 2017 14:31:06 -0800 Subject: [PATCH 0243/1801] Internal change. PiperOrigin-RevId: 175213336 --- tensorflow/contrib/nccl/BUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/nccl/BUILD b/tensorflow/contrib/nccl/BUILD index ed9fb64b95..df9dbb457a 100644 --- a/tensorflow/contrib/nccl/BUILD +++ b/tensorflow/contrib/nccl/BUILD @@ -48,8 +48,8 @@ tf_cuda_cc_test( # Disabled on jenkins until errors finding nvmlShutdown are found. tags = [ "manual", + "multi_gpu", "no_oss", - "noguitar", # note: is run manually there "notap", ], deps = if_cuda( @@ -138,8 +138,8 @@ cuda_py_test( # Disabled on jenkins until errors finding nvmlShutdown are found. tags = [ "manual", + "multi_gpu", "no_oss", - "noguitar", # note: is run manually there "notap", ], ) -- GitLab From 51895becce83ef4dc8bac263377d158fc50e4d53 Mon Sep 17 00:00:00 2001 From: HyoukJoong Lee Date: Thu, 9 Nov 2017 14:48:37 -0800 Subject: [PATCH 0244/1801] Change for asynchronous Send and Recv by splitting Send into {Send, SendDone} and Recv into {Recv, RecvDone}. See operation_semantics.md for the updated semantics. PiperOrigin-RevId: 175216012 --- .../compiler/xla/service/buffer_assignment.cc | 11 --- .../compiler/xla/service/cpu/ir_emitter.cc | 10 +++ .../compiler/xla/service/cpu/ir_emitter.h | 2 + .../compiler/xla/service/dfs_hlo_visitor.h | 6 +- .../service/dfs_hlo_visitor_with_default.h | 10 ++- .../compiler/xla/service/gpu/ir_emitter.cc | 8 ++ .../compiler/xla/service/gpu/ir_emitter.h | 2 + .../compiler/xla/service/hlo_cost_analysis.cc | 8 ++ .../compiler/xla/service/hlo_cost_analysis.h | 2 + .../xla/service/hlo_dataflow_analysis.cc | 65 ++++++++++++++ .../xla/service/hlo_dataflow_analysis.h | 2 + .../xla/service/hlo_dataflow_analysis_test.cc | 48 ++++++++++ .../compiler/xla/service/hlo_graph_dumper.cc | 4 + .../compiler/xla/service/hlo_instruction.cc | 57 ++++++++++-- .../compiler/xla/service/hlo_instruction.h | 22 +++-- .../compiler/xla/service/hlo_matchers.h | 2 + tensorflow/compiler/xla/service/hlo_opcode.h | 2 + .../xla/service/hlo_rematerialization.cc | 2 + .../compiler/xla/service/hlo_verifier.cc | 49 +++++++++- .../xla/service/instruction_fusion.cc | 2 + .../xla/service/logical_buffer_analysis.cc | 15 ++++ .../xla/service/logical_buffer_analysis.h | 2 + .../xla/service/tuple_points_to_analysis.cc | 58 ++++++++++++ .../xla/service/tuple_points_to_analysis.h | 2 + .../service/tuple_points_to_analysis_test.cc | 45 ++++++++++ .../compiler/xla/service/user_computation.cc | 6 +- .../xla/service/while_loop_simplifier.cc | 4 +- .../xla/service/while_loop_simplifier_test.cc | 6 +- .../compiler/xla/tools/parser/hlo_parser.cc | 30 ++++++- .../xla/tools/parser/hlo_parser_test.cc | 26 ++++-- .../performance/xla/operation_semantics.md | 89 +++++++++++++++++++ 31 files changed, 550 insertions(+), 47 deletions(-) diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index c74f050f77..3c5b360c8e 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -819,17 +819,6 @@ Status BufferAssigner::AssignBuffersForComputation( continue; } - if (instruction->opcode() == HloOpcode::kRecv) { - // Make sure that recv operations get a new unique allocation so that - // don't share their buffer with any other operations. - BufferAllocation* allocation = assignment->NewAllocation( - *buffer, buffer_size, is_thread_local, /*is_reusable=*/false); - allocation_indices.push_back(allocation->index()); - VLOG(3) << "New allocation #" << allocation->index() - << " for recv: " << *buffer; - continue; - } - if (ShapeUtil::IsTuple(buffer->shape())) { // TODO(b/34669761): Don't reuse tuple buffers because the GPU backend // assumes longer buffer liveness than indicated by the analysis. diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index a20ce6826c..e547f291b8 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -1983,6 +1983,11 @@ Status IrEmitter::HandleSend(HloInstruction* send) { return Unimplemented("Send is not implemented on CPU. See b/33942983."); } +Status IrEmitter::HandleSendDone(HloInstruction* send_done) { + // TODO(b/33942983): Support Send/Recv on CPU. + return Unimplemented("Send-done is not implemented on CPU. See b/33942983."); +} + Status IrEmitter::HandleSlice(HloInstruction* slice) { VLOG(2) << "HandleSlice: " << slice->ToString(); auto operand = slice->operand(0); @@ -2148,6 +2153,11 @@ Status IrEmitter::HandleRecv(HloInstruction* recv) { return Unimplemented("Recv is not implemented on CPU. See b/33942983."); } +Status IrEmitter::HandleRecvDone(HloInstruction* recv_done) { + // TODO(b/33942983): Support Send/Recv on CPU. + return Unimplemented("Recv-done is not implemented on CPU. See b/33942983."); +} + Status IrEmitter::HandlePad(HloInstruction* pad) { // CPU backend does not properly handle negative padding but this is ok // because negative padding should be removed by the algebraic simplifier. diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h index 5d061e11e3..83eded5ad8 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h @@ -171,11 +171,13 @@ class IrEmitter : public DfsHloVisitorWithDefault { Status HandleReduceWindow(HloInstruction* reduce_window) override; Status HandleSelectAndScatter(HloInstruction* select_and_scatter) override; Status HandleSend(HloInstruction* send) override; + Status HandleSendDone(HloInstruction* send_done) override; Status HandleSlice(HloInstruction* slice) override; Status HandleDynamicSlice(HloInstruction* dynamic_slice) override; Status HandleDynamicUpdateSlice( HloInstruction* dynamic_update_slice) override; Status HandleRecv(HloInstruction* recv) override; + Status HandleRecvDone(HloInstruction* recv_done) override; Status HandlePad(HloInstruction* pad) override; Status HandleTuple(HloInstruction* tuple) override; Status HandleMap(HloInstruction* map) override; diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h index de3cd15440..bc73839a88 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h @@ -211,9 +211,11 @@ class DfsHloVisitorBase { virtual Status HandlePad(HloInstructionPtr hlo) = 0; - virtual Status HandleSend(HloInstructionPtr hlo) = 0; + virtual Status HandleSend(HloInstructionPtr send) = 0; + virtual Status HandleSendDone(HloInstructionPtr send_done) = 0; - virtual Status HandleRecv(HloInstructionPtr hlo) = 0; + virtual Status HandleRecv(HloInstructionPtr recv) = 0; + virtual Status HandleRecvDone(HloInstructionPtr recv_done) = 0; virtual Status HandleBatchNormTraining(HloInstructionPtr hlo) = 0; diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h index 7ce88be89d..5415bab5b3 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h @@ -167,11 +167,17 @@ class DfsHloVisitorWithDefaultBase Status HandleWhile(HloInstructionPtr xla_while) override { return DefaultAction(xla_while); } + Status HandleRecv(HloInstructionPtr recv) override { + return DefaultAction(recv); + } + Status HandleRecvDone(HloInstructionPtr recv_done) override { + return DefaultAction(recv_done); + } Status HandleSend(HloInstructionPtr send) override { return DefaultAction(send); } - Status HandleRecv(HloInstructionPtr recv) override { - return DefaultAction(recv); + Status HandleSendDone(HloInstructionPtr send_done) override { + return DefaultAction(send_done); } // Invoked to inform the visitor that the traversal has completed, and that diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc index 57a3f713e3..9d55c7859d 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc @@ -128,10 +128,18 @@ Status IrEmitter::HandleSend(HloInstruction*) { return Unimplemented("Send is not implemented on GPU"); } +Status IrEmitter::HandleSendDone(HloInstruction*) { + return Unimplemented("Send-Done is not implemented on GPU"); +} + Status IrEmitter::HandleRecv(HloInstruction*) { return Unimplemented("Recv is not implemented on GPU"); } +Status IrEmitter::HandleRecvDone(HloInstruction*) { + return Unimplemented("Recv-done is not implemented on GPU"); +} + Status IrEmitter::HandleTuple(HloInstruction* tuple) { std::vector base_ptrs; for (const HloInstruction* operand : tuple->operands()) { diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.h b/tensorflow/compiler/xla/service/gpu/ir_emitter.h index 263992d925..61fdeaa0ee 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.h @@ -84,7 +84,9 @@ class IrEmitter : public DfsHloVisitorWithDefault { Status HandleOutfeed(HloInstruction* outfeed) override; Status HandleSort(HloInstruction* sort) override; Status HandleSend(HloInstruction* send) override; + Status HandleSendDone(HloInstruction* send_done) override; Status HandleRecv(HloInstruction* recv) override; + Status HandleRecvDone(HloInstruction* recv_done) override; Status HandleParameter(HloInstruction* parameter) override; Status HandleReduce(HloInstruction* reduce) override; Status HandleTuple(HloInstruction* tuple) override; diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc index 17ba2b673a..1877065f67 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc @@ -337,10 +337,18 @@ Status HloCostAnalysis::HandleSend(const HloInstruction*) { return Status::OK(); } +Status HloCostAnalysis::HandleSendDone(const HloInstruction*) { + return Status::OK(); +} + Status HloCostAnalysis::HandleRecv(const HloInstruction*) { return Status::OK(); } +Status HloCostAnalysis::HandleRecvDone(const HloInstruction*) { + return Status::OK(); +} + Status HloCostAnalysis::HandleReshape(const HloInstruction*) { return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.h b/tensorflow/compiler/xla/service/hlo_cost_analysis.h index 8074868e37..0f44775378 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.h +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.h @@ -60,7 +60,9 @@ class HloCostAnalysis : public ConstDfsHloVisitor { Status HandleReducePrecision(const HloInstruction* hlo) override; Status HandleConcatenate(const HloInstruction* concatenate) override; Status HandleSend(const HloInstruction* send) override; + Status HandleSendDone(const HloInstruction* send_done) override; Status HandleRecv(const HloInstruction* recv) override; + Status HandleRecvDone(const HloInstruction* recv_done) override; Status HandleConvert(const HloInstruction* convert) override; Status HandleCopy(const HloInstruction* copy) override; Status HandleDot(const HloInstruction* dot) override; diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc index 92261bce62..ff80f18bb5 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc @@ -242,6 +242,51 @@ bool HloDataflowAnalysis::UpdateBitcastValueSet(HloInstruction* bitcast) { return false; } +bool HloDataflowAnalysis::UpdateSendValueSet(HloInstruction* send) { + CHECK_EQ(send->opcode(), HloOpcode::kSend); + bool changed = false; + // Send forwards the operand value to the output tuple at {0}. + for (auto& pair : GetInstructionValueSet(send->operand(0))) { + const ShapeIndex& operand_index = pair.first; + const HloValueSet& operand_value_set = pair.second; + + ShapeIndex index = {0}; + for (int64 i : operand_index) { + index.push_back(i); + } + + HloValueSet& value_set = GetValueSet(send, index); + if (value_set != operand_value_set) { + value_set = operand_value_set; + changed = true; + } + } + return changed; +} + +bool HloDataflowAnalysis::UpdateRecvDoneValueSet(HloInstruction* recv_done) { + CHECK_EQ(recv_done->opcode(), HloOpcode::kRecvDone); + bool changed = false; + // RecvDone forwards the operand value at {0} to the output. + for (auto& pair : GetInstructionValueSet(recv_done)) { + ShapeIndex& index = pair.first; + HloValueSet& value_set = pair.second; + + ShapeIndex operand_index = {0}; + for (int64 i : index) { + operand_index.push_back(i); + } + + const HloValueSet& operand_value_set = + GetValueSet(recv_done->operand(0), operand_index); + if (value_set != operand_value_set) { + value_set = operand_value_set; + changed = true; + } + } + return changed; +} + bool HloDataflowAnalysis::UpdateCallValueSet(HloInstruction* call) { CHECK_EQ(call->opcode(), HloOpcode::kCall); InstructionValueSet& value_set = GetInstructionValueSet(call); @@ -429,6 +474,10 @@ bool HloDataflowAnalysis::UpdateInstructionValueSet( return UpdateCallValueSet(instruction); case HloOpcode::kWhile: return UpdateWhileValueSet(instruction); + case HloOpcode::kSend: + return UpdateSendValueSet(instruction); + case HloOpcode::kRecvDone: + return UpdateRecvDoneValueSet(instruction); default: // Instruction does not forward HloValues (it defines all values in its // output). No update is necessary. @@ -537,6 +586,12 @@ Status HloDataflowAnalysis::InitializeInstructionValueSets() { GetValueSet(instruction, /*index=*/{}).AddValue(value); }; + // Lambda to set the value set at the given index of the output. + auto define_value_at = [this, &instruction](const ShapeIndex& index) { + HloValue* value = NewHloValue(instruction, index, /*is_phi=*/false); + GetValueSet(instruction, index).AddValue(value); + }; + switch (instruction->opcode()) { case HloOpcode::kBitcast: if (bitcast_defines_value_) { @@ -577,6 +632,16 @@ Status HloDataflowAnalysis::InitializeInstructionValueSets() { // values flow from their operands. define_top_level_only(); break; + case HloOpcode::kRecvDone: + // RecvDone aliases its input tuple element {0}, therefore does not + // define any values. + break; + case HloOpcode::kSend: + // Send produces a tuple of {aliased operand, U32 context}, therefore + // only defines the top-level tuple and the tuple element at {1}. + define_value_at(/*index=*/{}); + define_value_at(/*index=*/{1}); + break; default: define_all_values(); break; diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h index 207e553bf7..63467f3206 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h @@ -146,7 +146,9 @@ class HloDataflowAnalysis { bool UpdateCopyValueSet(HloInstruction* copy); bool UpdateGetTupleElementValueSet(HloInstruction* gte); bool UpdateParameterValueSet(HloInstruction* parameter); + bool UpdateRecvDoneValueSet(HloInstruction* recv_done); bool UpdateSelectValueSet(HloInstruction* select); + bool UpdateSendValueSet(HloInstruction* send); bool UpdateTupleValueSet(HloInstruction* tuple); bool UpdateWhileValueSet(HloInstruction* xla_while); diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc index 4b8eb237a6..66a538fc51 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc @@ -1139,6 +1139,54 @@ TEST_P(HloDataflowAnalysisTest, TupleCopy) { analysis.GetValueDefinedAt(copy, /*index=*/{}).live_out_of_module()); } +TEST_P(HloDataflowAnalysisTest, SendAndSendDone) { + // Test that a Send forwards its operand to the output tuple at {0}. + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape_, "param0")); + auto send = builder.AddInstruction( + HloInstruction::CreateSend(param, /*channel_id=*/0)); + auto send_done = builder.AddInstruction(HloInstruction::CreateSendDone(send)); + module_->AddEntryComputation(builder.Build()); + + bool ssa_form = GetParam(); + const HloDataflowAnalysis& analysis = RunAnalysis(ssa_form); + + EXPECT_EQ(analysis.values().size(), 4); + + EXPECT_TRUE(analysis.ValueIsDefinedAt(param)); + EXPECT_TRUE(analysis.ValueIsDefinedAt(send, /*index=*/{})); + EXPECT_FALSE(analysis.ValueIsDefinedAt(send, /*index=*/{0})); + EXPECT_TRUE(analysis.ValueIsDefinedAt(send, /*index=*/{1})); + EXPECT_TRUE(analysis.ValueIsDefinedAt(send_done)); + EXPECT_THAT(HloValuesAt(send, /*index=*/{0}), + UnorderedElementsAre(analysis.GetValueDefinedAt(param))); +} + +TEST_P(HloDataflowAnalysisTest, RecvAndRecvDone) { + // Test that a RecvDone forwards its operand tuple element at {0} to the + // output. + auto builder = HloComputation::Builder(TestName()); + auto recv = builder.AddInstruction( + HloInstruction::CreateRecv(scalar_shape_, /*channel_id=*/0)); + auto recv_done = builder.AddInstruction(HloInstruction::CreateRecvDone(recv)); + module_->AddEntryComputation(builder.Build()); + + bool ssa_form = GetParam(); + const HloDataflowAnalysis& analysis = RunAnalysis(ssa_form); + + EXPECT_EQ(analysis.values().size(), 3); + + EXPECT_TRUE(analysis.ValueIsDefinedAt(recv, /*index=*/{})); + EXPECT_TRUE(analysis.ValueIsDefinedAt(recv, /*index=*/{0})); + EXPECT_TRUE(analysis.ValueIsDefinedAt(recv, /*index=*/{1})); + EXPECT_FALSE(analysis.ValueIsDefinedAt(recv_done)); + EXPECT_THAT(HloValuesAt(recv_done), + UnorderedElementsAre(analysis.GetValueDefinedAt(recv, {0}))); + EXPECT_TRUE( + analysis.GetValueDefinedAt(recv, /*index=*/{0}).live_out_of_module()); +} + TEST_P(HloDataflowAnalysisTest, ElementwiseChainInterference) { // A simple chain of elementwise operations. No values should interfere. // diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 1c063c973d..67e0238c4a 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -943,7 +943,9 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) { case HloOpcode::kFusion: return kGray; case HloOpcode::kSend: + case HloOpcode::kSendDone: case HloOpcode::kRecv: + case HloOpcode::kRecvDone: case HloOpcode::kInfeed: case HloOpcode::kOutfeed: case HloOpcode::kCrossReplicaSum: @@ -1037,7 +1039,9 @@ string HloDotDumper::GetInstructionNodeExtraInfo(const HloInstruction* instr) { ? "" : StrCat("stride=", VectorString(instr->slice_strides())); case HloOpcode::kSend: + case HloOpcode::kSendDone: case HloOpcode::kRecv: + case HloOpcode::kRecvDone: return StrCat("channel_id=", instr->channel_id()); default: return ""; diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index ee98c3fabc..ffb933155f 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -371,20 +371,50 @@ HloInstruction::CreateCrossReplicaSum(const Shape& shape, /* static */ std::unique_ptr HloInstruction::CreateSend( HloInstruction* operand, int64 channel_id) { + // Send instruction produces a tuple of {aliased operand, U32 context}. + Shape output_shape = ShapeUtil::MakeTupleShape( + {operand->shape(), ShapeUtil::MakeShape(U32, {})}); auto instruction = - WrapUnique(new HloInstruction(HloOpcode::kSend, ShapeUtil::MakeNil())); + WrapUnique(new HloInstruction(HloOpcode::kSend, output_shape)); instruction->AppendOperand(operand); instruction->channel_id_ = channel_id; return instruction; } +/* static */ std::unique_ptr HloInstruction::CreateSendDone( + HloInstruction* operand) { + CHECK(operand->opcode() == HloOpcode::kSend) + << "SendDone must take the context operand from Send"; + auto instruction = WrapUnique( + new HloInstruction(HloOpcode::kSendDone, ShapeUtil::MakeNil())); + instruction->AppendOperand(operand); + instruction->channel_id_ = operand->channel_id(); + return instruction; +} + /* static */ std::unique_ptr HloInstruction::CreateRecv( const Shape& shape, int64 channel_id) { - auto instruction = WrapUnique(new HloInstruction(HloOpcode::kRecv, shape)); + // Recv instruction produces a tuple of {receive buffer, U32 context}. + Shape output_shape = + ShapeUtil::MakeTupleShape({shape, ShapeUtil::MakeShape(U32, {})}); + auto instruction = + WrapUnique(new HloInstruction(HloOpcode::kRecv, output_shape)); instruction->channel_id_ = channel_id; return instruction; } +/* static */ std::unique_ptr HloInstruction::CreateRecvDone( + HloInstruction* operand) { + CHECK(operand->opcode() == HloOpcode::kRecv) + << "RecvDone must take the context operand from Recv"; + Shape output_shape = ShapeUtil::GetTupleElementShape(operand->shape(), 0); + auto instruction = + WrapUnique(new HloInstruction(HloOpcode::kRecvDone, output_shape)); + instruction->AppendOperand(operand); + instruction->channel_id_ = operand->channel_id(); + return instruction; +} + /* static */ std::unique_ptr HloInstruction::CreateReverse( const Shape& shape, HloInstruction* operand, tensorflow::gtl::ArraySlice dimensions) { @@ -908,7 +938,9 @@ RandomDistribution HloInstruction::random_distribution() const { bool HloInstruction::HasSideEffect() const { switch (opcode_) { case HloOpcode::kSend: + case HloOpcode::kSendDone: case HloOpcode::kRecv: + case HloOpcode::kRecvDone: case HloOpcode::kInfeed: case HloOpcode::kOutfeed: case HloOpcode::kTrace: @@ -1164,7 +1196,9 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( new_operands[4], epsilon(), feature_index()); break; case HloOpcode::kRecv: + case HloOpcode::kRecvDone: case HloOpcode::kSend: + case HloOpcode::kSendDone: case HloOpcode::kTrace: LOG(FATAL) << "Not yet implemented, clone: " << HloOpcodeString(opcode_); } @@ -1557,8 +1591,10 @@ bool HloInstruction::IdenticalSlowPath( case HloOpcode::kInfeed: case HloOpcode::kOutfeed: case HloOpcode::kSort: - case HloOpcode::kSend: case HloOpcode::kRecv: + case HloOpcode::kRecvDone: + case HloOpcode::kSend: + case HloOpcode::kSendDone: return false; } } @@ -1891,7 +1927,8 @@ std::vector HloInstruction::ExtraAttributesToString() const { }))); } - if (opcode() == HloOpcode::kSend || opcode() == HloOpcode::kRecv) { + if (opcode() == HloOpcode::kSend || opcode() == HloOpcode::kRecv || + opcode() == HloOpcode::kSendDone || opcode() == HloOpcode::kRecvDone) { extra.push_back(StrCat("channel_id=", channel_id_)); } @@ -2071,8 +2108,10 @@ bool HloInstruction::IsFusable() const { case HloOpcode::kOutfeed: case HloOpcode::kParameter: case HloOpcode::kTrace: - case HloOpcode::kSend: case HloOpcode::kRecv: + case HloOpcode::kRecvDone: + case HloOpcode::kSend: + case HloOpcode::kSendDone: return false; // Only fuse Rng if it is used once, otherwise the random numbers generated // will be different in each fusion. If it is the root (user count = 0) @@ -2279,10 +2318,14 @@ Status HloInstruction::Visit(DfsHloVisitorBase* visitor) { return visitor->HandleCall(this); case HloOpcode::kCustomCall: return visitor->HandleCustomCall(this); - case HloOpcode::kSend: - return visitor->HandleSend(this); case HloOpcode::kRecv: return visitor->HandleRecv(this); + case HloOpcode::kRecvDone: + return visitor->HandleRecvDone(this); + case HloOpcode::kSend: + return visitor->HandleSend(this); + case HloOpcode::kSendDone: + return visitor->HandleSendDone(this); // These opcodes are not handled here. case HloOpcode::kTrace: diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 4d8fe6bc10..974d43d89e 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -181,18 +181,28 @@ class HloInstruction { const Shape& shape, HloInstruction* operand, tensorflow::StringPiece outfeed_config); - // Creates a send instruction with the given channel id, which sends the - // operand data to a unique receive instruction in another computation that - // has the same channel id. + // Creates an asynchronous send instruction with the given channel id, which + // initiates sending the operand data to a unique receive instruction in + // another computation that has the same channel id. static std::unique_ptr CreateSend(HloInstruction* operand, int64 channel_id); - // Creates a receive instruction with the given channel id, which receives - // data of the given shape from a unique send instruction in another - // computation that has the same channel id. + // Blocks until data transfer for the Send instruction (operand) is complete. + // The operand must be kSend. + static std::unique_ptr CreateSendDone( + HloInstruction* operand); + + // Creates an asynchronous receive instruction with the given channel id, + // which allocates resources to receive data of the given shape from a unique + // send instruction in another computation that has the same channel id. static std::unique_ptr CreateRecv(const Shape& shape, int64 channel_id); + // Blocks until data transfer for the Recv instruction (operand) is complete + // and returns the receive buffer. The operand must be kRecv. + static std::unique_ptr CreateRecvDone( + HloInstruction* operand); + // Creates a slice instruction, where the operand is sliced by the given // start/limit indices. static std::unique_ptr CreateSlice( diff --git a/tensorflow/compiler/xla/service/hlo_matchers.h b/tensorflow/compiler/xla/service/hlo_matchers.h index 4d4010b025..268fa0f632 100644 --- a/tensorflow/compiler/xla/service/hlo_matchers.h +++ b/tensorflow/compiler/xla/service/hlo_matchers.h @@ -121,6 +121,7 @@ HLO_MATCHER(Outfeed); HLO_MATCHER(Pad); HLO_MATCHER(Power); HLO_MATCHER(Recv); +HLO_MATCHER(RecvDone); HLO_MATCHER(Reduce); HLO_MATCHER(ReducePrecision); HLO_MATCHER(ReduceWindow); @@ -131,6 +132,7 @@ HLO_MATCHER(Rng); HLO_MATCHER(Select); HLO_MATCHER(SelectAndScatter); HLO_MATCHER(Send); +HLO_MATCHER(SendDone); HLO_MATCHER(ShiftLeft); HLO_MATCHER(ShiftRightLogical); HLO_MATCHER(ShiftRightArithmetic); diff --git a/tensorflow/compiler/xla/service/hlo_opcode.h b/tensorflow/compiler/xla/service/hlo_opcode.h index d68fc20321..e0d02e0665 100644 --- a/tensorflow/compiler/xla/service/hlo_opcode.h +++ b/tensorflow/compiler/xla/service/hlo_opcode.h @@ -97,6 +97,7 @@ namespace xla { V(kPower, "power") \ V(kReal, "real") \ V(kRecv, "recv") \ + V(kRecvDone, "recv-done") \ V(kReduce, "reduce") \ V(kReducePrecision, "reduce-precision") \ V(kReduceWindow, "reduce-window") \ @@ -108,6 +109,7 @@ namespace xla { V(kSelect, "select") \ V(kSelectAndScatter, "select-and-scatter") \ V(kSend, "send") \ + V(kSendDone, "send-done") \ V(kShiftLeft, "shift-left") \ V(kShiftRightArithmetic, "shift-right-arithmetic") \ V(kShiftRightLogical, "shift-right-logical") \ diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc index c96df50e79..828be8490c 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc @@ -66,7 +66,9 @@ bool IsRematerializable(const HloInstruction* instruction) { case HloOpcode::kInfeed: case HloOpcode::kParameter: case HloOpcode::kRecv: + case HloOpcode::kRecvDone: case HloOpcode::kSend: + case HloOpcode::kSendDone: case HloOpcode::kTrace: case HloOpcode::kWhile: return false; diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index c1aa655401..c938450891 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -270,12 +270,40 @@ class ShapeVerifier : public DfsHloVisitor { pad->padding_config())); } - Status HandleSend(HloInstruction*) override { - return tensorflow::Status::OK(); + Status HandleSend(HloInstruction* send) override { + TF_RET_CHECK(send->users().size() == 1); + const HloInstruction* send_done = send->users()[0]; + TF_RET_CHECK(send_done->opcode() == HloOpcode::kSendDone); + TF_RETURN_IF_ERROR(CheckSameChannel(send, send_done)); + return CheckShape( + send, ShapeUtil::MakeTupleShape( + {send->operand(0)->shape(), ShapeUtil::MakeShape(U32, {})})); } - Status HandleRecv(HloInstruction*) override { - return tensorflow::Status::OK(); + Status HandleSendDone(HloInstruction* send_done) override { + TF_RET_CHECK(send_done->operands().size() == 1); + const HloInstruction* send = send_done->operand(0); + TF_RET_CHECK(send->opcode() == HloOpcode::kSend); + TF_RETURN_IF_ERROR(CheckSameChannel(send, send_done)); + return CheckShape(send_done, ShapeUtil::MakeNil()); + } + + Status HandleRecv(HloInstruction* recv) override { + TF_RET_CHECK(recv->users().size() == 1); + const HloInstruction* recv_done = recv->users()[0]; + TF_RET_CHECK(recv_done->opcode() == HloOpcode::kRecvDone); + TF_RETURN_IF_ERROR(CheckSameChannel(recv, recv_done)); + return CheckShape(recv, + ShapeUtil::MakeTupleShape( + {recv_done->shape(), ShapeUtil::MakeShape(U32, {})})); + } + + Status HandleRecvDone(HloInstruction* recv_done) override { + TF_RET_CHECK(recv_done->operands().size() == 1); + const HloInstruction* recv = recv_done->operand(0); + TF_RET_CHECK(recv->opcode() == HloOpcode::kRecv); + TF_RETURN_IF_ERROR(CheckSameChannel(recv, recv_done)); + return CheckShape(recv_done, recv->shape().tuple_shapes(0)); } Status HandleBatchNormTraining(HloInstruction* batch_norm_training) override { @@ -365,6 +393,19 @@ class ShapeVerifier : public DfsHloVisitor { instruction->opcode(), instruction->operands())); } + // Checks if the given two instructions shares the same channel id. + Status CheckSameChannel(const HloInstruction* instr1, + const HloInstruction* instr2) { + if (instr1->channel_id() != instr2->channel_id()) { + return FailedPrecondition( + "Expected to have the same channel id, actual channel ids are: %s " + "(%lld), %s (%lld)", + instr1->ToString().c_str(), instr1->channel_id(), + instr2->ToString().c_str(), instr2->channel_id()); + } + return tensorflow::Status::OK(); + } + // Returns the size of a Shape in bytes. const std::function shape_size_fn_; }; diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc index 0d1b7bc109..dea47b1fd7 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion.cc @@ -113,7 +113,9 @@ namespace xla { case HloOpcode::kTrace: case HloOpcode::kWhile: case HloOpcode::kSend: + case HloOpcode::kSendDone: case HloOpcode::kRecv: + case HloOpcode::kRecvDone: return true; } diff --git a/tensorflow/compiler/xla/service/logical_buffer_analysis.cc b/tensorflow/compiler/xla/service/logical_buffer_analysis.cc index b92017c6cb..02dc49e78c 100644 --- a/tensorflow/compiler/xla/service/logical_buffer_analysis.cc +++ b/tensorflow/compiler/xla/service/logical_buffer_analysis.cc @@ -104,6 +104,21 @@ Status LogicalBufferAnalysis::HandleBitcast(HloInstruction*) { return Status::OK(); } +Status LogicalBufferAnalysis::HandleRecvDone(HloInstruction*) { + // RecvDone doesn't create a new buffer but rather aliases its input (Recv) + // tuple element at {0} to its output. + return Status::OK(); +} + +Status LogicalBufferAnalysis::HandleSend(HloInstruction* send) { + // Send creates new buffers for the top-level tuple and the context (tuple + // element at {1}). Tuple element at {0} is an alias of the Send operand, so + // we don't need to create a new Logical Buffer for that. + NewLogicalBuffer(send, /*index=*/{}); + NewLogicalBuffer(send, /*index=*/{1}); + return Status::OK(); +} + Status LogicalBufferAnalysis::HandleTuple(HloInstruction* tuple) { // A Tuple instruction only creates the top-level buffer. NewLogicalBuffer(tuple, /*index=*/{}); diff --git a/tensorflow/compiler/xla/service/logical_buffer_analysis.h b/tensorflow/compiler/xla/service/logical_buffer_analysis.h index a82e83ec5c..598d08b720 100644 --- a/tensorflow/compiler/xla/service/logical_buffer_analysis.h +++ b/tensorflow/compiler/xla/service/logical_buffer_analysis.h @@ -60,6 +60,8 @@ class LogicalBufferAnalysis : public DfsHloVisitorWithDefault { Status HandleGetTupleElement(HloInstruction* get_tuple_element) override; Status HandleBitcast(HloInstruction* bitcast) override; Status HandleCopy(HloInstruction* copy) override; + Status HandleRecvDone(HloInstruction* recv_done) override; + Status HandleSend(HloInstruction* send) override; Status HandleSelect(HloInstruction* select) override; // A map from the buffer ID to the logical buffer diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc index df537bd7c1..a1f9451dd4 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc @@ -253,6 +253,64 @@ Status TuplePointsToAnalysis::HandleBitcast(HloInstruction* bitcast) { return Status::OK(); } +Status TuplePointsToAnalysis::HandleRecvDone(HloInstruction* recv_done) { + // RecvDone aliases its input (Recv) tuple element {0} to its output. + PointsToSet& points_to_set = CreateEmptyPointsToSet(recv_done); + const PointsToSet& operand_points_to_set = + GetPointsToSet(recv_done->operand(0)); + + // Recursively copy the points to set of the operand tuple {0}. + points_to_set.ForEachMutableElement( + [this, &points_to_set, &operand_points_to_set]( + const ShapeIndex& index, PointsToSet::BufferList* buffers) { + ShapeIndex src_index({0}); + for (auto element : index) { + src_index.push_back(element); + } + *buffers = operand_points_to_set.element(src_index); + for (auto& tuple_source : + operand_points_to_set.tuple_sources(src_index)) { + points_to_set.add_tuple_source(index, tuple_source); + } + }); + return Status::OK(); +} + +Status TuplePointsToAnalysis::HandleSend(HloInstruction* send) { + // Send creates a tuple of {aliased operand, U32 context}. + PointsToSet& points_to_set = CreateEmptyPointsToSet(send); + + // Creates the points to set for the tuple and its element at {1}. + auto top_buffer = points_to_set.mutable_element(ShapeIndex({})); + top_buffer->push_back( + &logical_buffer_analysis_->GetBuffer(send, ShapeIndex({}))); + points_to_set.add_tuple_source({}, send); + + auto context_buffer = points_to_set.mutable_element(ShapeIndex({1})); + context_buffer->push_back( + &logical_buffer_analysis_->GetBuffer(send, ShapeIndex({1}))); + + // Recursively copy the points to set of the operand to output tuple {0}. + const PointsToSet& operand_points_to_set = GetPointsToSet(send->operand(0)); + operand_points_to_set.ForEachElement( + [&points_to_set, &operand_points_to_set]( + const ShapeIndex& src_index, + const PointsToSet::BufferList& points_to) { + ShapeIndex target_index({0}); + for (auto element : src_index) { + target_index.push_back(element); + } + *points_to_set.mutable_element(target_index) = points_to; + + for (HloInstruction* tuple : + operand_points_to_set.tuple_sources(src_index)) { + points_to_set.add_tuple_source(target_index, tuple); + } + }); + + return Status::OK(); +} + Status TuplePointsToAnalysis::HandleTuple(HloInstruction* tuple) { tensorflow::gtl::ArraySlice operands(tuple->operands()); PointsToSet& points_to_set = CreateEmptyPointsToSet(tuple); diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h index e6157a1ed1..8928de107e 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h @@ -251,6 +251,8 @@ class TuplePointsToAnalysis : public DfsHloVisitorWithDefault { Status HandleGetTupleElement(HloInstruction* get_tuple_element) override; Status HandleBitcast(HloInstruction* bitcast) override; Status HandleCopy(HloInstruction* copy) override; + Status HandleRecvDone(HloInstruction* recv_done) override; + Status HandleSend(HloInstruction* send) override; Status HandleSelect(HloInstruction* select) override; string ToString() const; diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc index 694ed57fa2..dec446d4da 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc @@ -313,6 +313,51 @@ TEST_F(TuplePointsToAnalysisTest, TupleCopy) { {constant1, constant2, copy}); } +TEST_F(TuplePointsToAnalysisTest, SendAndSendDone) { + // Send forwards its operand to the output tuple at {0}. + auto builder = HloComputation::Builder(TestName()); + auto constant = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(1.0))); + auto send = builder.AddInstruction( + HloInstruction::CreateSend(constant, /*channel_id=*/0)); + auto send_done = builder.AddInstruction(HloInstruction::CreateSendDone(send)); + + BuildModuleAndRunAnalysis(builder.Build()); + + EXPECT_FALSE(points_to_analysis_->GetPointsToSet(send).IsAmbiguous()); + EXPECT_TRUE(points_to_analysis_->GetPointsToSet(send).IsDistinct()); + EXPECT_FALSE(points_to_analysis_->GetPointsToSet(send_done).IsAmbiguous()); + EXPECT_TRUE(points_to_analysis_->GetPointsToSet(send_done).IsDistinct()); + + ExpectHasTopLevelBuffers( + points_to_analysis_->GetPointsToSet(send).element({}), {send}); + ExpectHasTopLevelBuffers( + points_to_analysis_->GetPointsToSet(send).element({0}), {constant}); + ExpectHasTopLevelBuffers( + points_to_analysis_->GetPointsToSet(send_done).CreateFlattenedSet(), + {send_done}); + ExpectHasBufferAliases(constant, {}, {{constant, {}}, {send, {0}}}); +} + +TEST_F(TuplePointsToAnalysisTest, RecvAndRecvDone) { + // RecvDone forwards its operand tuple element at {0} to the output. + auto builder = HloComputation::Builder(TestName()); + auto recv = builder.AddInstruction(HloInstruction::CreateRecv( + ShapeUtil::MakeShape(F32, {1, 2, 3}), /*channel_id=*/0)); + auto recv_done = builder.AddInstruction(HloInstruction::CreateRecvDone(recv)); + + BuildModuleAndRunAnalysis(builder.Build()); + + EXPECT_FALSE(points_to_analysis_->GetPointsToSet(recv).IsAmbiguous()); + EXPECT_TRUE(points_to_analysis_->GetPointsToSet(recv).IsDistinct()); + EXPECT_FALSE(points_to_analysis_->GetPointsToSet(recv_done).IsAmbiguous()); + EXPECT_TRUE(points_to_analysis_->GetPointsToSet(recv_done).IsDistinct()); + + ExpectHasTopLevelBuffers( + points_to_analysis_->GetPointsToSet(recv).element({}), {recv}); + ExpectHasBufferAliases(recv, {0}, {{recv, {0}}, {recv_done, {}}}); +} + TEST_F(TuplePointsToAnalysisTest, TupleSelect) { // Select from two different tuples. This should create an ambiguous points to // set containing the union of both sides. diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc index e9d182509b..8d5bb08e51 100644 --- a/tensorflow/compiler/xla/service/user_computation.cc +++ b/tensorflow/compiler/xla/service/user_computation.cc @@ -2927,8 +2927,9 @@ void ComputationLowerer::Visit( case OpRequest::kRecvRequest: { const RecvRequest& recv_request = request.request().recv_request(); - hlo_instruction = add_instruction(HloInstruction::CreateRecv( + HloInstruction* recv = add_instruction(HloInstruction::CreateRecv( request.output_shape(), recv_request.channel_handle().handle())); + hlo_instruction = add_instruction(HloInstruction::CreateRecvDone(recv)); break; } @@ -3120,8 +3121,9 @@ void ComputationLowerer::Visit( case OpRequest::kSendRequest: { const SendRequest& send_request = request.request().send_request(); HloInstruction* operand = lookup_instruction(send_request.operand()); - hlo_instruction = add_instruction(HloInstruction::CreateSend( + HloInstruction* send = add_instruction(HloInstruction::CreateSend( operand, send_request.channel_handle().handle())); + hlo_instruction = add_instruction(HloInstruction::CreateSendDone(send)); break; } diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc index 65734f91bc..2fac914892 100644 --- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc +++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc @@ -58,7 +58,9 @@ static bool ContainsSendOrRecv(const HloComputation* comp) { static bool IsOrContainsSendOrRecv(const HloInstruction* instr) { if (instr->opcode() == HloOpcode::kSend || - instr->opcode() == HloOpcode::kRecv) { + instr->opcode() == HloOpcode::kSendDone || + instr->opcode() == HloOpcode::kRecv || + instr->opcode() == HloOpcode::kRecvDone) { return true; } for (const auto& subcomp : instr->called_computations()) { diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc index 8e1a2dcde1..d99b31dc00 100644 --- a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc @@ -144,10 +144,11 @@ TEST_F(WhileLoopSimplifierTest, NotRemovedIfContainsSend) { auto* while_op = computation->root_instruction(); ASSERT_EQ(while_op->opcode(), HloOpcode::kWhile); auto* while_body = while_op->while_body(); - while_body->AddInstruction(HloInstruction::CreateSend( + auto* send = while_body->AddInstruction(HloInstruction::CreateSend( while_body->AddInstruction( HloInstruction::CreateConstant(Literal::CreateR0(true))), /*channel_id=*/0)); + while_body->AddInstruction(HloInstruction::CreateSendDone(send)); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } @@ -156,9 +157,10 @@ TEST_F(WhileLoopSimplifierTest, NotRemovedIfContainsRecv) { auto* while_op = computation->root_instruction(); ASSERT_EQ(while_op->opcode(), HloOpcode::kWhile); auto* while_body = while_op->while_body(); - while_body->AddInstruction( + auto* recv = while_body->AddInstruction( HloInstruction::CreateRecv(ShapeUtil::MakeShape(F32, {1}), /*channel_id=*/0)); + while_body->AddInstruction(HloInstruction::CreateRecvDone(recv)); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index df07e069a0..3741c3daac 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -442,7 +442,21 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, return false; } instruction = builder->AddInstruction( - HloInstruction::CreateRecv(shape, *channel_id)); + HloInstruction::CreateRecv(shape.tuple_shapes(0), *channel_id)); + break; + } + case HloOpcode::kRecvDone: { + optional channel_id; + attrs["channel_id"] = {/*required=*/true, AttrTy::kInt64, &channel_id}; + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { + return false; + } + if (channel_id != operands[0]->channel_id()) { + return false; + } + instruction = + builder->AddInstruction(HloInstruction::CreateRecvDone(operands[0])); break; } case HloOpcode::kSend: { @@ -456,6 +470,20 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, HloInstruction::CreateSend(operands[0], *channel_id)); break; } + case HloOpcode::kSendDone: { + optional channel_id; + attrs["channel_id"] = {/*required=*/true, AttrTy::kInt64, &channel_id}; + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { + return false; + } + if (channel_id != operands[0]->channel_id()) { + return false; + } + instruction = + builder->AddInstruction(HloInstruction::CreateSendDone(operands[0])); + break; + } case HloOpcode::kGetTupleElement: { optional index; attrs["index"] = {/*required=*/true, AttrTy::kInt64, &index}; diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index a9dc360978..ca476a4bb7 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -226,9 +226,11 @@ ENTRY %WhileWithScalarS32Result.v2 () -> s32[] { R"(HloModule TwoSendRecvBothWayRecvFist_module: ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { - %recv = f32[] recv(), channel_id=15, sharding={maximal device=1} - ROOT %constant = f32[] constant(2.1), sharding={maximal device=0} - %send = () send(f32[] %constant), channel_id=16, sharding={maximal device=0}, control-predecessors={%recv} + %recv = (f32[], u32[]) recv(), channel_id=15, sharding={maximal device=1} + ROOT %recv-done = f32[] recv-done((f32[], u32[]) %recv), channel_id=15, sharding={maximal device=1} + %constant = f32[] constant(2.1), sharding={maximal device=0} + %send = (f32[], u32[]) send(f32[] %constant), channel_id=16, sharding={maximal device=0}, control-predecessors={%recv} + %send-done = () send-done((f32[], u32[]) %send), channel_id=16, sharding={maximal device=0} } )" @@ -522,9 +524,11 @@ TEST_F(HloParserTest, UnexpectedAttribute) { const string original = R"(HloModule unexpected_attr_module: ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { - %recv = f32[] recv(), channel_id=15 + %recv = (f32[], u32[]) recv(), channel_id=15 + %recv-done = f32[] recv-done((f32[], u32[]) %recv), channel_id=15 ROOT %constant = f32[] constant(2.1) - %send = () send(f32[] %constant), channel_id=16, calls=%recv + %send = (f32[], u32[]) send(f32[] %constant), channel_id=16, calls=%recv + %send-done = () send-done((f32[], u32[]) %send), channel_id=16 } )"; @@ -536,9 +540,11 @@ TEST_F(HloParserTest, MissingAttribute) { const string original = R"(HloModule missing_attr_module: ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { - %recv = f32[] recv(), channel_id=15 + %recv = (f32[], u32[]) recv(), channel_id=15 + %recv-done = f32[] recv-done((f32[], u32[]) %recv), channel_id=15 ROOT %constant = f32[] constant(-2.1) - %send = () send(f32[] %constant) + %send = (f32[], u32[]) send(f32[] %constant) + %send-done = () send-done((f32[], u32[]) %send), channel_id=16 } )"; @@ -550,9 +556,11 @@ TEST_F(HloParserTest, PredecessorUndefined) { const string original = R"(HloModule pre_not_found_module: ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { - %recv = f32[] recv(), channel_id=15 + %recv = (f32[], u32[]) recv(), channel_id=15 + %recv-done = f32[] recv-done((f32[], u32[]) %recv), channel_id=15 ROOT %constant = f32[] constant(2.1) - %send = () send(f32[] %constant), channel_id=16, control-predecessors={%done} + %send = (f32[], u32[]) send(f32[] %constant), channel_id=16, control-predecessors={%done} + %send-done = () send-done((f32[], u32[]) %send), channel_id=16 } )"; diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index 3ca3b51a5e..ccced8792e 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -901,6 +901,95 @@ are all 0. Figure below shows examples of different `edge_padding` and +## Recv + +See also +[`ComputationBuilder::Recv`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h). + + `Recv(shape, channel_handle)` + +| Arguments | Type | Semantics | +| ---------------- | --------------- | ------------------------------------ | +| `shape` | `Shape` | shape of the data to receive | +| `channel_handle` | `ChannelHandle` | unique identifier for each send/recv pair | + +Receives data of the given shape from a `Send` instruction in another +computation that shares the same channel handle. Returns a +ComputationDataHandle for the received data. + +The client API of `Recv` operation represents synchronous communication. +However, the instruction is internally decomposed into 2 HLO instructions +(`Recv` and `RecvDone`) to enable asynchronous data transfers. See also +[`HloInstruction::CreateRecv` and `HloInstruction::CreateRecvDone`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/hlo_instruction.h). + +`Recv(const Shape& shape, int64 channel_id)` + +Allocates resources required to receive data from a `Send` instruction with the +same channel_id. Returns a context for the allocated resources, which is used +by a following `RecvDone` instruction to wait for the completion of the data +transfer. The context is a tuple of {receive buffer (shape), request identifier +(U32)} and it can only be used by a `RecvDone` instruction. + + `RecvDone(HloInstruction context)` + +Given a context created by a `Recv` instruction, waits for the data transfer to +complete and returns the received data. + +## Send + +See also +[`ComputationBuilder::Send`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h). + + `Send(operand, channel_handle)` + +| Arguments | Type | Semantics | +| ---------------- | ----------------------- | -------------------------------- | +| `operand` | `ComputationDataHandle` | data to send (array of type T) | +| `channel_handle` | `ChannelHandle` | unique identifier for each send/recv pair | + +Sends the given operand data to a `Recv` instruction in another computation +that shares the same channel handle. Does not return any data. + +Similar to the `Recv` operation, the client API of `Send` operation represents +synchronous communication, and is internally decomposed into 2 HLO instructions +(`Send` and `SendDone`) to enable asynchronous data transfers. See also +[`HloInstruction::CreateSend` and `HloInstruction::CreateSendDone`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/hlo_instruction.h). + +`Send(HloInstruction operand, int64 channel_id)` + +Initiates an asynchronous transfer of the operand to the resources allocated by +the `Recv` instruction with the same channel id. Returns a context, which is +used by a following `SendDone` instruction to wait for the completion of the +data transfer. The context is a tuple of {operand (shape), request identifier +(U32)} and it can only be used by a `SendDone` instruction. + + `SendDone(HloInstruction context)` + +Given a context created by a `Send` instruction, waits for the data transfer to +complete. The instruction does not return any data. + + Scheduling of channel instructions + +The execution order of the 4 instructions for each channel (`Recv`, `RecvDone`, +`Send`, `SendDone`) is as below. + +
+ +
+ +* `Recv` happens before `Send` +* `Send` happens before `RecvDone` +* `Recv` happens before `RecvDone` +* `Send` happens before `SendDone` + +When the backend compilers generate a linear schedule for each computation that +communicates via channel instructions, there must not be cycles across the +computations. For example, below schedules lead to deadlocks. + +
+ +
+ ## Reduce See also -- GitLab From b9951e775e7a80b96520a1e8e0b5c4a485196796 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 14:55:09 -0800 Subject: [PATCH 0245/1801] Adds streaming_dynamic_auc to Tensorflow contrib metrics. This metric differs from streaming_auc because it uses every prediction as a threshold rather than linearly spaced fixed thresholds. PiperOrigin-RevId: 175217002 --- tensorflow/contrib/metrics/__init__.py | 2 + .../contrib/metrics/python/ops/metric_ops.py | 149 ++++++++++ .../metrics/python/ops/metric_ops_test.py | 262 ++++++++++++++++-- 3 files changed, 385 insertions(+), 28 deletions(-) diff --git a/tensorflow/contrib/metrics/__init__.py b/tensorflow/contrib/metrics/__init__.py index 302042c4dd..8eed45c4b3 100644 --- a/tensorflow/contrib/metrics/__init__.py +++ b/tensorflow/contrib/metrics/__init__.py @@ -27,6 +27,7 @@ See the @{$python/contrib.metrics} guide. @@streaming_false_negative_rate @@streaming_false_negative_rate_at_thresholds @@streaming_auc +@@streaming_dynamic_auc @@streaming_curve_points @@streaming_recall_at_k @@streaming_mean_absolute_error @@ -88,6 +89,7 @@ from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_auc from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_concat from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_covariance from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_curve_points +from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_dynamic_auc from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_false_negative_rate from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_false_negative_rate_at_thresholds from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_false_negatives diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index 3dd1f1a627..24692ff12f 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -1178,6 +1178,154 @@ def streaming_auc(predictions, name=name) +def _compute_dynamic_auc(labels, predictions, curve='ROC'): + """Computes the apporixmate AUC by a Riemann sum with data-derived thresholds. + + Computes the area under the ROC or PR curve using each prediction as a + threshold. This could be slow for large batches, but has the advantage of not + having its results degrade depending on the distribution of predictions. + + Args: + labels: A `Tensor` of ground truth labels with the same shape as + `predictions` with values of 0 or 1 and type `int64`. + predictions: A 1-D `Tensor` of predictions whose values are `float64`. + curve: The name of the curve to be computed, 'ROC' for the Receiving + Operating Characteristic or 'PR' for the Precision-Recall curve. + + Returns: + A scalar `Tensor` containing the area-under-curve value for the input. + """ + # Count the total number of positive and negative labels in the input. + size = array_ops.size(predictions) + total_positive = math_ops.cast(math_ops.reduce_sum(labels), dtypes.int32) + + def continue_computing_dynamic_auc(): + """Continues dynamic auc computation, entered if labels are not all equal. + + Returns: + A scalar `Tensor` containing the area-under-curve value. + """ + # Sort the predictions descending, and the corresponding labels as well. + ordered_predictions, indices = nn.top_k(predictions, k=size) + ordered_labels = array_ops.gather(labels, indices) + + # Get the counts of the unique ordered predictions. + _, _, counts = array_ops.unique_with_counts(ordered_predictions) + + # Compute the indices of the split points between different predictions. + splits = math_ops.cast( + array_ops.pad(math_ops.cumsum(counts), paddings=[[1, 0]]), dtypes.int32) + + # Count the positives to the left of the split indices. + positives = math_ops.cast( + array_ops.pad(math_ops.cumsum(ordered_labels), paddings=[[1, 0]]), + dtypes.int32) + true_positives = array_ops.gather(positives, splits) + if curve == 'ROC': + # Count the negatives to the left of every split point and the total + # number of negatives for computing the FPR. + false_positives = math_ops.subtract(splits, true_positives) + total_negative = size - total_positive + x_axis_values = math_ops.truediv(false_positives, total_negative) + y_axis_values = math_ops.truediv(true_positives, total_positive) + elif curve == 'PR': + x_axis_values = math_ops.truediv(true_positives, total_positive) + # For conformance, set precision to 1 when the number of positive + # classifications is 0. + y_axis_values = array_ops.where( + math_ops.greater(splits, 0), + math_ops.truediv(true_positives, splits), + array_ops.ones_like(true_positives, dtype=dtypes.float64)) + + # Calculate trapezoid areas. + heights = math_ops.add(y_axis_values[1:], y_axis_values[:-1]) / 2.0 + widths = math_ops.abs( + math_ops.subtract(x_axis_values[1:], x_axis_values[:-1])) + return math_ops.reduce_sum(math_ops.multiply(heights, widths)) + + # If all the labels are the same, AUC isn't well-defined (but raising an + # exception seems excessive) so we return 0, otherwise we finish computing. + return control_flow_ops.cond( + math_ops.logical_or( + math_ops.equal(total_positive, 0), + math_ops.equal(total_positive, size) + ), + true_fn=lambda: array_ops.constant(0, dtypes.float64), + false_fn=continue_computing_dynamic_auc) + + +def streaming_dynamic_auc(labels, + predictions, + curve='ROC', + metrics_collections=(), + updates_collections=(), + name=None): + """Computes the apporixmate AUC by a Riemann sum with data-derived thresholds. + + USAGE NOTE: this approach requires storing all of the predictions and labels + for a single evaluation in memory, so it may not be usable when the evaluation + batch size and/or the number of evaluation steps is very large. + + Computes the area under the ROC or PR curve using each prediction as a + threshold. This has the advantage of being resilient to the distribution of + predictions by aggregating across batches, accumulating labels and predictions + and performing the final calculation using all of the concatenated values. + + Args: + labels: A `Tensor` of ground truth labels with the same shape as `labels` + and with values of 0 or 1 whose values are castable to `int64`. + predictions: A `Tensor` of predictions whose values are castable to + `float64`. Will be flattened into a 1-D `Tensor`. + curve: The name of the curve for which to compute AUC, 'ROC' for the + Receiving Operating Characteristic or 'PR' for the Precision-Recall curve. + metrics_collections: An optional iterable of collections that `auc` should + be added to. + updates_collections: An optional iterable of collections that `update_op` + should be added to. + name: An optional name for the variable_scope that contains the metric + variables. + + Returns: + auc: A scalar `Tensor` containing the current area-under-curve value. + update_op: An operation that concatenates the input labels and predictions + to the accumulated values. + + Raises: + ValueError: If `labels` and `predictions` have mismatched shapes or if + `curve` isn't a recognized curve type. + """ + + if curve not in ['PR', 'ROC']: + raise ValueError('curve must be either ROC or PR, %s unknown' % curve) + + with variable_scope.variable_scope(name, default_name='dynamic_auc'): + labels.get_shape().assert_is_compatible_with(predictions.get_shape()) + predictions = array_ops.reshape( + math_ops.cast(predictions, dtypes.float64), [-1]) + labels = array_ops.reshape(math_ops.cast(labels, dtypes.int64), [-1]) + with ops.control_dependencies([ + check_ops.assert_greater_equal( + labels, + array_ops.zeros_like(labels, dtypes.int64), + message='labels must be 0 or 1, at least one is <0'), + check_ops.assert_less_equal( + labels, + array_ops.ones_like(labels, dtypes.int64), + message='labels must be 0 or 1, at least one is >1') + ]): + preds_accum, update_preds = streaming_concat(predictions, + name='concat_preds') + labels_accum, update_labels = streaming_concat(labels, + name='concat_labels') + update_op = control_flow_ops.group(update_labels, update_preds) + auc = _compute_dynamic_auc(labels_accum, preds_accum, curve=curve) + if updates_collections: + ops.add_to_collections(updates_collections, update_op) + if metrics_collections: + ops.add_to_collections(metrics_collections, auc) + return auc, update_op + + def streaming_precision_recall_at_equal_thresholds(predictions, labels, num_thresholds=None, @@ -3285,6 +3433,7 @@ __all__ = [ 'streaming_accuracy', 'streaming_auc', 'streaming_curve_points', + 'streaming_dynamic_auc', 'streaming_false_negative_rate', 'streaming_false_negative_rate_at_thresholds', 'streaming_false_negatives', diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py index 6a8e58b4da..5d0463e1f7 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py @@ -1708,6 +1708,34 @@ class StreamingCurvePointsTest(test.TestCase): [[1.0, 4.0 / 6.0], [0.75, 1.0], [0.0, 1.0]]) +def _np_auc(predictions, labels, weights=None): + """Computes the AUC explicitly using Numpy. + + Args: + predictions: an ndarray with shape [N]. + labels: an ndarray with shape [N]. + weights: an ndarray with shape [N]. + + Returns: + the area under the ROC curve. + """ + if weights is None: + weights = np.ones(np.size(predictions)) + is_positive = labels > 0 + num_positives = np.sum(weights[is_positive]) + num_negatives = np.sum(weights[~is_positive]) + + # Sort descending: + inds = np.argsort(-predictions) + + sorted_labels = labels[inds] + sorted_weights = weights[inds] + is_positive = sorted_labels > 0 + + tp = np.cumsum(sorted_weights * is_positive) / num_positives + return np.sum((sorted_weights * tp)[~is_positive]) / num_negatives + + class StreamingAUCTest(test.TestCase): def setUp(self): @@ -1896,33 +1924,6 @@ class StreamingAUCTest(test.TestCase): self.assertAlmostEqual(1, auc.eval(), 6) - def np_auc(self, predictions, labels, weights): - """Computes the AUC explicitly using Numpy. - - Args: - predictions: an ndarray with shape [N]. - labels: an ndarray with shape [N]. - weights: an ndarray with shape [N]. - - Returns: - the area under the ROC curve. - """ - if weights is None: - weights = np.ones(np.size(predictions)) - is_positive = labels > 0 - num_positives = np.sum(weights[is_positive]) - num_negatives = np.sum(weights[~is_positive]) - - # Sort descending: - inds = np.argsort(-predictions) - - sorted_labels = labels[inds] - sorted_weights = weights[inds] - is_positive = sorted_labels > 0 - - tp = np.cumsum(sorted_weights * is_positive) / num_positives - return np.sum((sorted_weights * tp)[~is_positive]) / num_negatives - def testWithMultipleUpdates(self): num_samples = 1000 batch_size = 10 @@ -1945,7 +1946,7 @@ class StreamingAUCTest(test.TestCase): for weights in (None, np.ones(num_samples), np.random.exponential( scale=1.0, size=num_samples)): - expected_auc = self.np_auc(predictions, labels, weights) + expected_auc = _np_auc(predictions, labels, weights) with self.test_session() as sess: enqueue_ops = [[] for i in range(num_batches)] @@ -1974,6 +1975,211 @@ class StreamingAUCTest(test.TestCase): self.assertAlmostEqual(expected_auc, auc.eval(), 2) +class StreamingDynamicAUCTest(test.TestCase): + + def setUp(self): + super(StreamingDynamicAUCTest, self).setUp() + np.random.seed(1) + ops.reset_default_graph() + + def testUnknownCurve(self): + with self.assertRaisesRegexp( + ValueError, 'curve must be either ROC or PR, TEST_CURVE unknown'): + metrics.streaming_dynamic_auc(labels=array_ops.ones((10, 1)), + predictions=array_ops.ones((10, 1)), + curve='TEST_CURVE') + + def testVars(self): + metrics.streaming_dynamic_auc( + labels=array_ops.ones((10, 1)), predictions=array_ops.ones((10, 1))) + _assert_metric_variables(self, ['dynamic_auc/concat_labels/array:0', + 'dynamic_auc/concat_labels/size:0', + 'dynamic_auc/concat_preds/array:0', + 'dynamic_auc/concat_preds/size:0']) + + def testMetricsCollection(self): + my_collection_name = '__metrics__' + auc, _ = metrics.streaming_dynamic_auc( + labels=array_ops.ones((10, 1)), + predictions=array_ops.ones((10, 1)), + metrics_collections=[my_collection_name]) + self.assertEqual(ops.get_collection(my_collection_name), [auc]) + + def testUpdatesCollection(self): + my_collection_name = '__updates__' + _, update_op = metrics.streaming_dynamic_auc( + labels=array_ops.ones((10, 1)), + predictions=array_ops.ones((10, 1)), + updates_collections=[my_collection_name]) + self.assertEqual(ops.get_collection(my_collection_name), [update_op]) + + def testValueTensorIsIdempotent(self): + predictions = random_ops.random_uniform( + (10, 3), maxval=1, dtype=dtypes_lib.float32, seed=1) + labels = random_ops.random_uniform( + (10, 3), maxval=2, dtype=dtypes_lib.int64, seed=2) + auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + # Run several updates. + for _ in xrange(10): + sess.run(update_op) + # Then verify idempotency. + initial_auc = auc.eval() + for _ in xrange(10): + self.assertAlmostEqual(initial_auc, auc.eval(), 5) + + def testAllLabelsOnes(self): + with self.test_session() as sess: + predictions = constant_op.constant([1., 1., 1.]) + labels = constant_op.constant([1, 1, 1]) + auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + self.assertEqual(0, auc.eval()) + + def testAllLabelsZeros(self): + with self.test_session() as sess: + predictions = constant_op.constant([1., 1., 1.]) + labels = constant_op.constant([0, 0, 0]) + auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + self.assertEqual(0, auc.eval()) + + def testNonZeroOnePredictions(self): + with self.test_session() as sess: + predictions = constant_op.constant([2.5, -2.5, 2.5, -2.5], + dtype=dtypes_lib.float32) + labels = constant_op.constant([1, 0, 1, 0]) + auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + self.assertAlmostEqual(auc.eval(), 1.0) + + def testAllCorrect(self): + inputs = np.random.randint(0, 2, size=(100, 1)) + with self.test_session() as sess: + predictions = constant_op.constant(inputs) + labels = constant_op.constant(inputs) + auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + self.assertEqual(1, auc.eval()) + + def testSomeCorrect(self): + with self.test_session() as sess: + predictions = constant_op.constant([1, 0, 1, 0]) + labels = constant_op.constant([0, 1, 1, 0]) + auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + self.assertAlmostEqual(0.5, auc.eval()) + + def testAllIncorrect(self): + inputs = np.random.randint(0, 2, size=(100, 1)) + with self.test_session() as sess: + predictions = constant_op.constant(inputs, dtype=dtypes_lib.float32) + labels = constant_op.constant(1 - inputs, dtype=dtypes_lib.float32) + auc, update_op = metrics.streaming_dynamic_auc(labels, predictions) + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + self.assertAlmostEqual(0, auc.eval()) + + def testExceptionOnIncompatibleShapes(self): + with self.test_session() as sess: + predictions = array_ops.ones([5]) + labels = array_ops.zeros([6]) + with self.assertRaisesRegexp(ValueError, 'Shapes .* are incompatible'): + _, update_op = metrics.streaming_dynamic_auc(labels, predictions) + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + + def testExceptionOnGreaterThanOneLabel(self): + with self.test_session() as sess: + predictions = constant_op.constant([1, 0.5, 0], dtypes_lib.float32) + labels = constant_op.constant([2, 1, 0]) + _, update_op = metrics.streaming_dynamic_auc(labels, predictions) + sess.run(variables.local_variables_initializer()) + with self.assertRaisesRegexp( + errors_impl.InvalidArgumentError, + '.*labels must be 0 or 1, at least one is >1.*'): + sess.run(update_op) + + def testExceptionOnNegativeLabel(self): + with self.test_session() as sess: + predictions = constant_op.constant([1, 0.5, 0], dtypes_lib.float32) + labels = constant_op.constant([1, 0, -1]) + _, update_op = metrics.streaming_dynamic_auc(labels, predictions) + sess.run(variables.local_variables_initializer()) + with self.assertRaisesRegexp( + errors_impl.InvalidArgumentError, + '.*labels must be 0 or 1, at least one is <0.*'): + sess.run(update_op) + + def testWithMultipleUpdates(self): + batch_size = 10 + num_batches = 100 + labels = np.array([]) + predictions = np.array([]) + tf_labels = variables.Variable(array_ops.ones(batch_size, dtypes_lib.int32), + collections=[ops.GraphKeys.LOCAL_VARIABLES], + dtype=dtypes_lib.int32) + tf_predictions = variables.Variable( + array_ops.ones(batch_size), + collections=[ops.GraphKeys.LOCAL_VARIABLES], + dtype=dtypes_lib.float32) + auc, update_op = metrics.streaming_dynamic_auc(tf_labels, tf_predictions) + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + for _ in xrange(num_batches): + new_labels = np.random.randint(0, 2, size=batch_size) + noise = np.random.normal(0.0, scale=0.2, size=batch_size) + new_predictions = 0.4 + 0.2 * new_labels + noise + labels = np.concatenate([labels, new_labels]) + predictions = np.concatenate([predictions, new_predictions]) + sess.run(tf_labels.assign(new_labels)) + sess.run(tf_predictions.assign(new_predictions)) + sess.run(update_op) + expected_auc = _np_auc(predictions, labels) + self.assertAlmostEqual(expected_auc, auc.eval()) + + def testAUCPRReverseIncreasingPredictions(self): + with self.test_session() as sess: + predictions = constant_op.constant( + [0.1, 0.4, 0.35, 0.8], dtype=dtypes_lib.float32) + labels = constant_op.constant([0, 0, 1, 1]) + auc, update_op = metrics.streaming_dynamic_auc( + labels, predictions, curve='PR') + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + self.assertAlmostEqual(0.79166, auc.eval(), delta=1e-5) + + def testAUCPRJumbledPredictions(self): + with self.test_session() as sess: + predictions = constant_op.constant( + [0.1, 0.4, 0.35, 0.8, 0.1, 0.135, 0.81], dtypes_lib.float32) + labels = constant_op.constant([0, 0, 1, 0, 1, 0, 1]) + auc, update_op = metrics.streaming_dynamic_auc( + labels, predictions, curve='PR') + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + self.assertAlmostEqual(0.610317, auc.eval(), delta=1e-6) + + def testAUCPRPredictionsLessThanHalf(self): + with self.test_session() as sess: + predictions = constant_op.constant( + [0.0, 0.1, 0.2, 0.33, 0.3, 0.4, 0.5], + shape=(1, 7), + dtype=dtypes_lib.float32) + labels = constant_op.constant([0, 0, 0, 0, 1, 1, 1], shape=(1, 7)) + auc, update_op = metrics.streaming_dynamic_auc( + labels, predictions, curve='PR') + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + self.assertAlmostEqual(0.90277, auc.eval(), delta=1e-5) + + class StreamingPrecisionRecallAtEqualThresholdsTest(test.TestCase): def setUp(self): -- GitLab From 69159eb8baed1705a12d6f2f71b6decf9858c836 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 15:00:15 -0800 Subject: [PATCH 0246/1801] [TF:XLA] Clean up unused XLA options and functions. PiperOrigin-RevId: 175217850 --- .../compiler/jit/kernels/xla_launch_op.cc | 1 - .../compiler/jit/xla_compilation_cache.cc | 3 --- tensorflow/compiler/tf2xla/xla_compiler.h | 6 ------ .../compiler/xla/client/local_client.cc | 20 ------------------- tensorflow/compiler/xla/client/local_client.h | 16 --------------- .../compiler/xla/service/hlo_module_config.cc | 4 ++-- .../compiler/xla/service/hlo_module_config.h | 10 ---------- .../compiler/xla/service/local_service.cc | 20 ------------------- tensorflow/compiler/xla/service/service.h | 2 -- 9 files changed, 2 insertions(+), 80 deletions(-) diff --git a/tensorflow/compiler/jit/kernels/xla_launch_op.cc b/tensorflow/compiler/jit/kernels/xla_launch_op.cc index 27c5da08c1..e481796d9e 100644 --- a/tensorflow/compiler/jit/kernels/xla_launch_op.cc +++ b/tensorflow/compiler/jit/kernels/xla_launch_op.cc @@ -257,7 +257,6 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { options.flib_def = ctx->function_library()->GetFunctionLibraryDefinition(); options.graph_def_version = ctx->function_library()->graph_def_version(); options.allow_cpu_custom_calls = (platform_id_ == gpu::host::kHostPlatformId); - options.local_executable_has_hybrid_result = true; const XlaCompiler::CompilationResult* kernel; xla::LocalExecutable* executable; diff --git a/tensorflow/compiler/jit/xla_compilation_cache.cc b/tensorflow/compiler/jit/xla_compilation_cache.cc index 23368b6c76..bc2eccd277 100644 --- a/tensorflow/compiler/jit/xla_compilation_cache.cc +++ b/tensorflow/compiler/jit/xla_compilation_cache.cc @@ -227,10 +227,7 @@ Status XlaCompilationCache::BuildExecutable( } xla::ExecutableBuildOptions build_options; build_options.set_device_ordinal(client_->default_device_ordinal()); - build_options.set_platform(client_->platform()); build_options.set_result_layout(result.xla_output_shape); - build_options.set_has_hybrid_result( - options.local_executable_has_hybrid_result); auto compile_result = client_->Compile(*result.computation, argument_layouts, build_options); diff --git a/tensorflow/compiler/tf2xla/xla_compiler.h b/tensorflow/compiler/tf2xla/xla_compiler.h index 4d40ca5825..ac7d4cfb12 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.h +++ b/tensorflow/compiler/tf2xla/xla_compiler.h @@ -236,12 +236,6 @@ class XlaCompiler { // to the computation. bool allow_cpu_custom_calls = false; - // If 'local_executable_has_hybrid_result', the top-level pointers of the - // result tuple of compiled programs are stored in host memory and the - // nested buffers in device memory, otherwise the whole result tuple is - // stored in device memory. - bool local_executable_has_hybrid_result = false; - // If not nullptr, populate_resource_manager is called with the // compilation device's resource manager when the compilation // device is created, and can be used to create metadata objects diff --git a/tensorflow/compiler/xla/client/local_client.cc b/tensorflow/compiler/xla/client/local_client.cc index 15c744ecd3..b50425a09c 100644 --- a/tensorflow/compiler/xla/client/local_client.cc +++ b/tensorflow/compiler/xla/client/local_client.cc @@ -27,16 +27,6 @@ namespace se = ::perftools::gputools; namespace xla { -ExecutableBuildOptions& ExecutableBuildOptions::set_platform( - perftools::gputools::Platform* platform) { - platform_ = platform; - return *this; -} - -perftools::gputools::Platform* ExecutableBuildOptions::platform() const { - return platform_; -} - ExecutableBuildOptions& ExecutableBuildOptions::set_device_ordinal( int device_ordinal) { device_ordinal_ = device_ordinal; @@ -56,16 +46,6 @@ const Shape* ExecutableBuildOptions::result_layout() const { return result_layout_set_ ? &result_layout_ : nullptr; } -ExecutableBuildOptions& ExecutableBuildOptions::set_has_hybrid_result( - bool has_hybrid_result) { - has_hybrid_result_ = has_hybrid_result; - return *this; -} - -bool ExecutableBuildOptions::has_hybrid_result() const { - return has_hybrid_result_; -} - namespace { StatusOr BorrowStreamForDevice(int device_ordinal, Backend* backend) { diff --git a/tensorflow/compiler/xla/client/local_client.h b/tensorflow/compiler/xla/client/local_client.h index 9f985ed527..e9eeaa0aa2 100644 --- a/tensorflow/compiler/xla/client/local_client.h +++ b/tensorflow/compiler/xla/client/local_client.h @@ -37,14 +37,6 @@ namespace xla { // LocalClient::Compile. class ExecutableBuildOptions { public: - // If set, this is the platform to build the computation for. This must match - // the underlying platform of the service. A value of nullptr indicates the - // option has not been set. - // - // TODO(b/28616830): Support multiple platforms. - ExecutableBuildOptions& set_platform(perftools::gputools::Platform* platform); - perftools::gputools::Platform* platform() const; - // If set, this is the device to build the computation for. Valid // device_ordinal values are: 0 to # of devices - 1. These values are // identical to the device ordinal values used by StreamExecutor. The built @@ -61,18 +53,10 @@ class ExecutableBuildOptions { ExecutableBuildOptions& set_result_layout(const Shape& shape_with_layout); const Shape* result_layout() const; - // If set, the executable will be built to output a hybrid - // ShapedBuffer with top-level tuple pointers in host memory and - // result buffers in device memory. - ExecutableBuildOptions& set_has_hybrid_result(bool has_hybrid_result); - bool has_hybrid_result() const; - private: - perftools::gputools::Platform* platform_ = nullptr; int device_ordinal_ = -1; Shape result_layout_; bool result_layout_set_ = false; - bool has_hybrid_result_ = true; }; class LocalExecutable { diff --git a/tensorflow/compiler/xla/service/hlo_module_config.cc b/tensorflow/compiler/xla/service/hlo_module_config.cc index 8974deb530..822e2f1f53 100644 --- a/tensorflow/compiler/xla/service/hlo_module_config.cc +++ b/tensorflow/compiler/xla/service/hlo_module_config.cc @@ -39,8 +39,8 @@ void HloModuleConfig::SetDefaultComputationLayout( } string HloModuleConfig::compilation_cache_key() const { - string key = tensorflow::strings::StrCat("profiling=", hlo_profiling_enabled_, - "::hybrid=", has_hybrid_result_); + string key = + tensorflow::strings::StrCat("profiling=", hlo_profiling_enabled_); StrAppend(&key, "::("); std::vector params; for (const ShapeLayout& param_layout : diff --git a/tensorflow/compiler/xla/service/hlo_module_config.h b/tensorflow/compiler/xla/service/hlo_module_config.h index 4a7ead9c10..a5ee895e48 100644 --- a/tensorflow/compiler/xla/service/hlo_module_config.h +++ b/tensorflow/compiler/xla/service/hlo_module_config.h @@ -104,16 +104,6 @@ class HloModuleConfig { // Whether to enable HLO-level profiling. bool hlo_profiling_enabled_ = false; - // If this flag is true, the generated executable will return a ShapedBuffer - // holding the result of the computation. In a ShapedBuffer, tuples have their - // structure held in host memory and the element arrays (leaves of the tuple - // structure) stored in device memory. The ShapedBuffer is considered "hybrid" - // because its leaves are on device but its structure is stored on - // host. Otherwise, if this flag is false, the generated executable will - // return a DeviceMemoryBase where the result is held entirely in device - // memory. - bool has_hybrid_result_ = false; - // Module/graph-level seed handle. uint64 seed_ = 0; diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc index d4d35da9d6..06f43bd3cb 100644 --- a/tensorflow/compiler/xla/service/local_service.cc +++ b/tensorflow/compiler/xla/service/local_service.cc @@ -68,26 +68,6 @@ LocalService::LocalService(const ServiceOptions& options, std::unique_ptr execute_backend) : Service(options, std::move(execute_backend)) {} -namespace { -// Returns the space required to allocate a shape. If -// allocate_space_for_deep_copy the space includes all sub-buffers of -// a tuple. -int64 RequiredSpace(const Shape& shape, bool allocate_space_for_deep_copy, - TransferManager* transfer_manager) { - int64 size = 0; - // TODO(b/33492279) remove once no devices represent result tuples as - // contiguous buffers. - if (allocate_space_for_deep_copy) { - ShapeUtil::ForEachSubshape( - shape, [&size, transfer_manager](const Shape& subshape, - const ShapeIndex& /*index*/) { - size += transfer_manager->GetByteSizeRequirement(subshape); - }); - } - return size; -} -} // namespace - StatusOr> LocalService::CompileExecutable( const ComputationHandle& computation, const tensorflow::gtl::ArraySlice argument_layouts, diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h index 6646be2e9a..47f4f0ade5 100644 --- a/tensorflow/compiler/xla/service/service.h +++ b/tensorflow/compiler/xla/service/service.h @@ -272,8 +272,6 @@ class Service : public ServiceInterface { // Create a Hlo module config for the given program shape and arguments. // execution_options is optional; if not given a default is used. - // has_hybrid_result is used to initialize the same-named field in - // HloModuleConfig -- see that class for documentation. StatusOr> CreateModuleConfig( const ProgramShape& program_shape, tensorflow::gtl::ArraySlice argument_shapes, -- GitLab From 0b73b56b46631be6e5edacba0786340645185b1c Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 9 Nov 2017 15:04:22 -0800 Subject: [PATCH 0247/1801] Make LLVMCompilerTest less stateful. Instead of assigning the pre and post optimization to a singleton xla::Compiler object, prefer creating a short-lived CpuCompiler or a GpuCompiler instance on the stack. Without this change, adding a second test case on the (Cpu|Gpu)Compiler in the same process triggers a use-after-free. (Btw, LLVMCompiler should really be spelled LlvmCompiler per Google C++ style, I'll do that rename shortly). PiperOrigin-RevId: 175218617 --- tensorflow/compiler/xla/tests/BUILD | 21 +-- .../compiler/xla/tests/llvm_compiler_test.cc | 143 +++++++++++++----- 2 files changed, 117 insertions(+), 47 deletions(-) diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 2333a30ad5..3e62481629 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1344,22 +1344,23 @@ xla_test( ], ) -xla_test( +tf_cc_test( name = "llvm_compiler_test", srcs = ["llvm_compiler_test.cc"], - backends = [ - "cpu", - "gpu", - "cpu_parallel", - ], + tags = ["requires-gpu-sm35"], deps = [ - "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla/service:backend", + "//tensorflow/compiler/xla/service:cpu_plugin", + "//tensorflow/compiler/xla/service:gpu_plugin", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:llvm_compiler", - "//tensorflow/compiler/xla/tests:hlo_test_base", - "//tensorflow/compiler/xla/tests:literal_test_util", - "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/compiler/xla/service:platform_util", + "//tensorflow/compiler/xla/service/cpu:cpu_compiler", + "//tensorflow/compiler/xla/service/gpu:gpu_compiler", "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/stream_executor", "@llvm//:core", ], ) diff --git a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc index 458258e7ee..70d8b764a3 100644 --- a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc +++ b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc @@ -14,49 +14,118 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/service/llvm_compiler.h" +#include "tensorflow/compiler/xla/service/backend.h" +#include "tensorflow/compiler/xla/service/cpu/cpu_compiler.h" +#include "tensorflow/compiler/xla/service/gpu/gpu_compiler.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" -#include "tensorflow/compiler/xla/tests/hlo_test_base.h" -#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/service/platform_util.h" +#include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/stream_executor/stream_executor.h" namespace xla { namespace { -class LLVMCompilerTest : public HloTestBase {}; - -XLA_TEST_F(LLVMCompilerTest, CompilerHooks) { - int pre_opt_hook_call_count = 0; - int post_opt_hook_call_count = 0; - - auto pre_opt_hook = [&pre_opt_hook_call_count](const llvm::Module &) { - ++pre_opt_hook_call_count; - return Status::OK(); - }; - auto post_opt_hook = [&post_opt_hook_call_count](const llvm::Module &) { - ++post_opt_hook_call_count; - return Status::OK(); - }; - - // Create HLO module, and run the compiler. - auto builder = HloComputation::Builder(TestName()); - builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(42.0))); - - auto hlo_module = CreateNewModule(); - hlo_module->AddEntryComputation(builder.Build()); - - auto compiler = static_cast(backend().compiler()); - compiler->SetPreOptimizationHook(pre_opt_hook); - compiler->SetPostOptimizationHook(post_opt_hook); - - ASSERT_TRUE( - compiler - ->Compile(std::move(hlo_module), backend().default_stream_executor()) - .ok()); - - // Test that hooks were called. - EXPECT_EQ(1, pre_opt_hook_call_count); - EXPECT_EQ(1, post_opt_hook_call_count); +class LLVMCompilerTest : public ::testing::Test { + public: + void SetUp() override { + Platform *platform = FindPlatform(); + ASSERT_NE(platform, nullptr); + + BackendOptions backend_options; + backend_options.set_platform(platform); + StatusOr> backend_or_status = + Backend::CreateBackend(backend_options); + ASSERT_IS_OK(backend_or_status.status()); + backend_ = backend_or_status.ConsumeValueOrDie(); + } + + ~LLVMCompilerTest() override {} + + protected: + using Platform = ::perftools::gputools::Platform; + + explicit LLVMCompilerTest(string platform_name) + : platform_name_(std::move(platform_name)) {} + + void TestCompilerHooks(LLVMCompiler *compiler) { + int pre_opt_hook_call_count = 0; + int post_opt_hook_call_count = 0; + + auto pre_opt_hook = [&pre_opt_hook_call_count](const llvm::Module &) { + ++pre_opt_hook_call_count; + return Status::OK(); + }; + auto post_opt_hook = [&post_opt_hook_call_count](const llvm::Module &) { + ++post_opt_hook_call_count; + return Status::OK(); + }; + + // Create HLO module, and run the compiler. + auto builder = HloComputation::Builder(TestName()); + builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(42.0))); + + auto hlo_module = CreateNewModule(); + hlo_module->AddEntryComputation(builder.Build()); + + compiler->SetPreOptimizationHook(pre_opt_hook); + compiler->SetPostOptimizationHook(post_opt_hook); + + ASSERT_TRUE(compiler + ->Compile(std::move(hlo_module), + backend_->default_stream_executor()) + .ok()); + + // Test that hooks were called. + EXPECT_EQ(1, pre_opt_hook_call_count); + EXPECT_EQ(1, post_opt_hook_call_count); + } + + private: + Platform *FindPlatform() { + for (Platform *platform : + PlatformUtil::GetSupportedPlatforms().ConsumeValueOrDie()) { + if (platform->Name() == platform_name_) { + return platform; + } + } + return nullptr; + } + + string platform_name_; + std::unique_ptr backend_; + + static string TestName() { + return ::testing::UnitTest::GetInstance()->current_test_info()->name(); + } + + static std::unique_ptr CreateNewModule() { + HloModuleConfig config; + config.set_debug_options(legacy_flags::GetDebugOptionsFromFlags()); + return MakeUnique(TestName(), VersionedComputationHandle(), + config); + } +}; + +class CpuCompilerTest : public LLVMCompilerTest { + public: + CpuCompilerTest() : LLVMCompilerTest("Host") {} +}; + +class GpuCompilerTest : public LLVMCompilerTest { + public: + GpuCompilerTest() : LLVMCompilerTest("CUDA") {} +}; + +TEST_F(CpuCompilerTest, HooksTest) { + cpu::CpuCompiler compiler; + TestCompilerHooks(&compiler); +} + +TEST_F(GpuCompilerTest, HooksTest) { + gpu::GpuCompiler compiler; + TestCompilerHooks(&compiler); } } // namespace -- GitLab From 4640cc88ffcdaeb8724ffc129bc4d900277a5403 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Thu, 9 Nov 2017 15:13:37 -0800 Subject: [PATCH 0248/1801] Fix typo in tensorflow/python/framework/function.py PiperOrigin-RevId: 175219920 --- tensorflow/python/framework/function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py index cef3f8d4c4..29cf223724 100644 --- a/tensorflow/python/framework/function.py +++ b/tensorflow/python/framework/function.py @@ -100,7 +100,7 @@ class Defun(object): grad_func - (optional). A function implementing the gradient of the function-to-register. This is must be a `_DefinedFunction` object. The gradient - function must satisify the criterion defined in + function must satisfy the criterion defined in function.proto:GradientDef. python_grad_func - (optional). A function implementing the -- GitLab From d93d985a737fc6dfad1d0c068500ecb62ef31c9a Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 9 Nov 2017 15:55:07 -0800 Subject: [PATCH 0249/1801] Tensor template argument to gradienttape was unnecessary. PiperOrigin-RevId: 175225805 --- tensorflow/c/eager/tape.h | 112 +++++++++++---------- tensorflow/python/eager/backprop.py | 2 +- tensorflow/python/eager/imperative_grad.py | 2 +- tensorflow/python/eager/pywrap_tfe_src.cc | 65 +++++++----- 4 files changed, 104 insertions(+), 77 deletions(-) diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index 654ceb7bec..29d73c5ca4 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -65,9 +65,6 @@ using OpTape = std::unordered_map>; // adding gradients, getting zeroes, etc. Currently cannot be implemented // without using tensorflow python code, hence left unspecified here. // -// Tensor is a representation of a tensor. We need to take its ID, and it needs -// to match IDs in the tape. -// // Gradient is the type returned by gradient functions. In Python TF it's either // Tensor or IndexedSlices or None, which here we map to nullptr. Gradients need // to allow their size to be computed and they need to be passable to a backward @@ -82,7 +79,7 @@ using OpTape = std::unordered_map>; // TODO(apassos) provide concrete template instantiations for TFE_TensorHandle // specialization, which is blocked by quite a few things needing to loop back // into python now. -template +template class VSpace { public: virtual ~VSpace() {} @@ -99,11 +96,7 @@ class VSpace { virtual Gradient* Zeros(TensorShape shape, DataType dtype) const = 0; // Returns a Tensor which is filled with ones and like the input. - virtual Gradient* OnesLike(Tensor*) const = 0; - - // Returns an integer which is a unique-to-within-this-program handle for this - // tensor. - virtual int64 TensorId(Tensor* tensor) const = 0; + virtual Gradient* Ones(TensorShape shape, DataType dtype) const = 0; // Calls the passed-in backward function. virtual Status CallBackwardFunction( @@ -117,7 +110,7 @@ class VSpace { // Traces the execution of operations, doing eager garbage collection, and // exporting a full trace so other code can do backpropagation. Not thread-safe. -template +template class GradientTape { public: GradientTape() {} @@ -143,11 +136,11 @@ class GradientTape { // once) and produces the gradient of the target tensors with respect to the // source tensors. The output gradients are used if not empty and not // null. The result is populated with one tensor per target element. - Status ComputeGradient( - const VSpace& vspace, - gtl::ArraySlice target, gtl::ArraySlice sources, - gtl::ArraySlice output_gradients, - std::vector* result); + Status ComputeGradient(const VSpace& vspace, + gtl::ArraySlice target_tensor_ids, + gtl::ArraySlice source_tensor_id, + gtl::ArraySlice output_gradients, + std::vector* result); private: TensorTape tensor_tape_; @@ -161,8 +154,8 @@ class GradientTape { // Template instantiations here -template -bool GradientTape::ShouldRecord( +template +bool GradientTape::ShouldRecord( gtl::ArraySlice tensor_ids) { for (int64 i : tensor_ids) { if (tensor_tape_.find(i) != tensor_tape_.end()) { @@ -172,13 +165,13 @@ bool GradientTape::ShouldRecord( return false; } -template -void GradientTape::Watch(int64 tensor_id) { +template +void GradientTape::Watch(int64 tensor_id) { tensor_tape_.emplace(tensor_id, -1); } -template -void GradientTape::RecordOperation( +template +void GradientTape::RecordOperation( const string& op_type, gtl::ArraySlice output_tensors, gtl::ArraySlice input_tensor_id, BackwardFunction* backward_function, const std::function& backward_function_deleter) { @@ -206,9 +199,8 @@ void GradientTape::RecordOperation( op_type, tensors, ids, backward_function, backward_function_deleter}; } -template -void GradientTape::DeleteTrace( - int64 tensor_id) { +template +void GradientTape::DeleteTrace(int64 tensor_id) { auto it = tensor_usage_.find(tensor_id); if (it == tensor_usage_.end()) { return; @@ -353,15 +345,16 @@ std::vector InitialStack( return result; } -template +template Status InitialGradients( - const VSpace& vspace, - gtl::ArraySlice target, - gtl::ArraySlice output_gradients, - std::unordered_map tensor_usage_counts, + const VSpace& vspace, + gtl::ArraySlice target_tensor_ids, + gtl::ArraySlice output_gradients, const TensorTape& tensor_tape, + const OpTape& op_tape, + const std::unordered_map& tensor_usage_counts, std::unordered_map>* result) { - for (int i = 0; i < target.size(); ++i) { - int64 id = vspace.TensorId(target[i]); + for (int i = 0; i < target_tensor_ids.size(); ++i) { + const int64 id = target_tensor_ids[i]; if (tensor_usage_counts.find(id) != tensor_usage_counts.end()) { if (!output_gradients.empty() && output_gradients[i] != nullptr) { // TODO(apassos) figure out how to print debugging information here. @@ -371,7 +364,31 @@ Status InitialGradients( } } else { if (output_gradients.empty() || output_gradients[i] == nullptr) { - (*result)[id].push_back(vspace.OnesLike(target[i])); + auto tensor_it = tensor_tape.find(id); + if (tensor_it != tensor_tape.end() && tensor_it->second != -1) { + auto op_it = op_tape.find(tensor_it->second); + if (op_it == op_tape.end()) { + return errors::Internal( + "Internal state of the gradient tape is invalid."); + } + bool found = false; + for (int j = 0; j < op_it->second.output_tensor_info.size(); ++j) { + if (op_it->second.output_tensor_info[j].id == id) { + found = true; + (*result)[id].push_back( + vspace.Ones(op_it->second.output_tensor_info[j].shape, + op_it->second.output_tensor_info[j].dtype)); + break; + } + } + if (!found) { + return errors::Internal( + "Internal state of the gradient tape is invalid."); + } + } else { + // No record of the target tensor found on the tape, so no gradient + // needs to be computed from it. Do nothing. + } } else { (*result)[id].push_back(output_gradients[i]); } @@ -388,29 +405,22 @@ Status InitialGradients( constexpr int kMinAggregateCount = 4; constexpr int kMinAggregateBytes = 128 * 1024 * 1024; -template -Status GradientTape::ComputeGradient( - const VSpace& vspace, - gtl::ArraySlice target, gtl::ArraySlice sources, +template +Status GradientTape::ComputeGradient( + const VSpace& vspace, + gtl::ArraySlice target_tensor_ids, + gtl::ArraySlice source_tensor_ids, gtl::ArraySlice output_gradients, std::vector* result) { - std::vector id_sources; - id_sources.reserve(sources.size()); - for (Tensor* s : sources) { - id_sources.push_back(vspace.TensorId(s)); - } - std::unordered_set sources_set(id_sources.begin(), id_sources.end()); - std::vector id_targets; - id_sources.reserve(target.size()); - for (Tensor* t : target) { - id_targets.push_back(vspace.TensorId(t)); - } + std::unordered_set sources_set(source_tensor_ids.begin(), + source_tensor_ids.end()); BackpropInitialState state = PrepareBackprop( - id_targets, tensor_tape_, std::move(op_tape_), sources_set); + target_tensor_ids, tensor_tape_, std::move(op_tape_), sources_set); std::vector op_stack = InitialStack(state.op_tape, state.op_missing_tensor); std::unordered_map> gradients; - Status s = InitialGradients(vspace, target, output_gradients, + Status s = InitialGradients(vspace, target_tensor_ids, output_gradients, + tensor_tape_, state.op_tape, state.tensor_usage_counts, &gradients); auto cleanup = [&state]() { // Release all backprop functions @@ -542,8 +552,8 @@ Status GradientTape::ComputeGradient( } } CHECK(state.op_tape.empty()); - result->reserve(sources.size()); - for (auto is : id_sources) { + result->reserve(source_tensor_ids.size()); + for (auto is : source_tensor_ids) { auto grad_it = gradients.find(is); if (grad_it == gradients.end()) { result->push_back(nullptr); diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 111d7cef56..0a92ab38a8 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -744,7 +744,7 @@ _default_vspace = imperative_grad.VSpace( aggregate_fn=_aggregate_grads, tensor_id=ops.tensor_id, zeros=_zeros, - ones_like=lambda x: ops.convert_to_tensor(array_ops.ones_like(x))) + ones=array_ops.ones) class GradientTape(object): diff --git a/tensorflow/python/eager/imperative_grad.py b/tensorflow/python/eager/imperative_grad.py index 8932b7157b..837cad974a 100644 --- a/tensorflow/python/eager/imperative_grad.py +++ b/tensorflow/python/eager/imperative_grad.py @@ -26,7 +26,7 @@ from tensorflow.python.framework import errors VSpace = collections.namedtuple( "VSpace", - ["aggregate_fn", "num_elements_fn", "tensor_id", "zeros", "ones_like"]) + ["aggregate_fn", "num_elements_fn", "tensor_id", "zeros", "ones"]) def imperative_grad( diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index d67c3b18f7..77b49be8f8 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -443,8 +443,7 @@ void TFE_DeleteContextCapsule(PyObject* context) { TF_DeleteStatus(status); } -using GradientTape = - tensorflow::eager::GradientTape; +using GradientTape = tensorflow::eager::GradientTape; typedef struct { PyObject_HEAD @@ -630,8 +629,7 @@ void TFE_Py_TapeDeleteTrace(PyObject* tape, tensorflow::int64 tensor_id) { reinterpret_cast(tape)->tape->DeleteTrace(tensor_id); } -class PyVSpace - : public tensorflow::eager::VSpace { +class PyVSpace : public tensorflow::eager::VSpace { public: explicit PyVSpace(PyObject* py_vspace) : py_vspace_(py_vspace) {} @@ -648,9 +646,9 @@ class PyVSpace if (zeros_ == nullptr) { return tensorflow::errors::InvalidArgument("invalid vspace"); } - ones_like_ = PyObject_GetAttrString(reinterpret_cast(py_vspace_), - "ones_like"); - if (ones_like_ == nullptr) { + ones_ = + PyObject_GetAttrString(reinterpret_cast(py_vspace_), "ones"); + if (ones_ == nullptr) { return tensorflow::errors::InvalidArgument("invalid vspace"); } return tensorflow::Status::OK(); @@ -660,7 +658,7 @@ class PyVSpace Py_XDECREF(num_elements_); Py_XDECREF(aggregate_fn_); Py_XDECREF(zeros_); - Py_XDECREF(ones_like_); + Py_XDECREF(ones_); } tensorflow::int64 NumElements(PyObject* tensor) const final { @@ -706,24 +704,21 @@ class PyVSpace return reinterpret_cast(result); } - PyObject* OnesLike(PyObject* tensor) const final { - PyObject* arg_list = Py_BuildValue("(O)", tensor); - PyObject* result = PyEval_CallObject(ones_like_, arg_list); - if (result == nullptr) { - VLOG(1) << "Call to ones_like failed"; + PyObject* Ones(tensorflow::TensorShape shape, + tensorflow::DataType dtype) const final { + PyObject* py_shape = PyTuple_New(shape.dims()); + for (int i = 0; i < shape.dims(); ++i) { + PyTuple_SET_ITEM(py_shape, i, PyLong_FromLong(shape.dim_size(i))); } + PyObject* py_dtype = PyLong_FromLong(static_cast(dtype)); + PyObject* arg_list = Py_BuildValue("OO", py_shape, py_dtype); + PyObject* result = PyEval_CallObject(ones_, arg_list); Py_DECREF(arg_list); + Py_DECREF(py_dtype); + Py_DECREF(py_shape); return result; } - tensorflow::int64 TensorId(PyObject* tensor) const final { - PyObject* py_tensor = reinterpret_cast(tensor); - PyObject* id_field = PyObject_GetAttrString(py_tensor, "_id"); - tensorflow::int64 id = MakeInt(id_field); - Py_DECREF(id_field); - return id; - } - tensorflow::Status CallBackwardFunction( PyObject* backward_function, tensorflow::gtl::ArraySlice output_gradients, @@ -781,7 +776,7 @@ class PyVSpace PyObject* num_elements_; PyObject* aggregate_fn_; PyObject* zeros_; - PyObject* ones_like_; + PyObject* ones_; }; std::vector MakeTensorList(PyObject* tensors) { @@ -799,6 +794,28 @@ std::vector MakeTensorList(PyObject* tensors) { return list; } +std::vector MakeTensorIDList(PyObject* tensors) { + PyObject* seq = PySequence_Fast(tensors, "expected a sequence"); + if (seq == nullptr) { + return {}; + } + int len = PySequence_Fast_GET_SIZE(seq); + std::vector list; + list.reserve(len); + for (int i = 0; i < len; ++i) { + PyObject* tensor = PySequence_Fast_GET_ITEM(seq, i); + if (EagerTensor_CheckExact(tensor)) { + list.push_back(EagerTensor_id(tensor)); + } else { + PyObject* id_field = PyObject_GetAttrString(tensor, "_id"); + list.push_back(MakeInt(id_field)); + Py_DECREF(id_field); + } + } + Py_DECREF(seq); + return list; +} + PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, PyObject* target, PyObject* sources, PyObject* output_gradients, TF_Status* status) { @@ -807,11 +824,11 @@ PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, return nullptr; } - std::vector target_vec = MakeTensorList(target); + std::vector target_vec = MakeTensorIDList(target); if (PyErr_Occurred()) { return nullptr; } - std::vector sources_vec = MakeTensorList(sources); + std::vector sources_vec = MakeTensorIDList(sources); if (PyErr_Occurred()) { return nullptr; } -- GitLab From 33135e87f8beb2e1b20f0a4c14e8b8bdc76c7faa Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Nov 2017 16:11:06 -0800 Subject: [PATCH 0250/1801] Materialize constants in more cases. PiperOrigin-RevId: 175228264 --- .../grappler/optimizers/constant_folding.cc | 140 +++++++++++++----- .../optimizers/constant_folding_test.cc | 29 +++- tensorflow/core/util/bcast.cc | 2 - 3 files changed, 131 insertions(+), 40 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index a364ca487e..02a732b092 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -36,6 +36,7 @@ limitations under the License. #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/public/version.h" +#include "tensorflow/core/util/bcast.h" namespace tensorflow { namespace grappler { @@ -301,6 +302,44 @@ bool ShapesEqual(const TensorShapeProto& shape1, return true; } +namespace { +bool ExtractShape(const NodeDef& shape_node, const GraphProperties& properties, + BCast::Vec* shape, int64* min_id) { + if (shape_node.op() == "Shape") { + const std::vector& prop1 = + properties.GetInputProperties(shape_node.name()); + if (prop1.size() != 1) { + return false; + } + const TensorShapeProto& shp = prop1[0].shape(); + if (shp.unknown_rank()) { + return false; + } + for (const auto& dim : shp.dim()) { + shape->push_back(dim.size()); + *min_id = std::min(*min_id, dim.size()); + } + } else { + const TensorProto& raw_val = shape_node.attr().at("value").tensor(); + if (raw_val.dtype() != DT_INT64 && raw_val.dtype() != DT_INT32) { + return false; + } + Tensor value(raw_val.dtype(), raw_val.tensor_shape()); + if (!value.FromProto(raw_val)) { + return false; + } + for (int j = 0; j < value.NumElements(); ++j) { + if (raw_val.dtype() == DT_INT64) { + shape->push_back(value.vec()(j)); + } else { + shape->push_back(value.vec()(j)); + } + } + } + return true; +} +} // namespace + Status ConstantFolding::MaterializeConstants( const GrapplerItem& item, const GraphProperties& properties) { const int node_count = graph_.node_size(); @@ -312,49 +351,76 @@ Status ConstantFolding::MaterializeConstants( } const NodeDef* shape_node1 = node_map_->GetNode(node.input(0)); const NodeDef* shape_node2 = node_map_->GetNode(node.input(1)); - if (shape_node1 == nullptr || shape_node1->op() != "Shape" || - shape_node2 == nullptr || shape_node2->op() != "Shape") { + if (shape_node1 == nullptr || + (shape_node1->op() != "Shape" && shape_node1->op() != "Const") || + shape_node2 == nullptr || + (shape_node2->op() != "Shape" && shape_node2->op() != "Const")) { continue; } - const std::vector& prop1 = - properties.GetInputProperties(shape_node1->name()); - const std::vector& prop2 = - properties.GetInputProperties(shape_node2->name()); - if (prop1.size() != 1 || prop2.size() != 1) { + int64 min_id = 0; + BCast::Vec shape1; + if (!ExtractShape(*shape_node1, properties, &shape1, &min_id)) { + continue; + } + BCast::Vec shape2; + if (!ExtractShape(*shape_node2, properties, &shape2, &min_id)) { + continue; + } + // A value of -1 means we don't known anything about the dimension. Replace + // the -1 values with unique dimension ids since we don't want two '-1' + // dimensions to be considered equal. + for (auto& id : shape1) { + if (id == -1) { + id = --min_id; + } + } + for (auto& id : shape2) { + if (id == -1) { + id = --min_id; + } + } + BCast bcast(shape1, shape2); + if (!bcast.IsValid()) { continue; } - const TensorShapeProto& shape1 = prop1[0].shape(); - const TensorShapeProto& shape2 = prop2[0].shape(); - if (ShapesEqual(shape1, shape2)) { - DataType type = node.attr().at("T").type(); - Tensor empty(type, TensorShape()); - NodeDef* out[2]; - for (int i = 0; i < 2; ++i) { + BCast::Vec reduce_dims[2]; + reduce_dims[0] = bcast.grad_x_reduce_idx(); + reduce_dims[1] = bcast.grad_y_reduce_idx(); + + const DataType type = node.attr().at("T").type(); + NodeDef* out[2]; + for (int j = 0; j < 2; ++j) { + if (!reduce_dims[j].empty()) { + // This is the case when a tensor dimension 1 is matched against an + // unknown dimension. The unknown dimension could also be equal to 1, in + // which case there would be no reduction. + out[j] = nullptr; + } else { + Tensor value(type, TensorShape({0})); string const_name = AddPrefixToNodeName( - strings::StrCat(node.name(), "-", i), kConstantFoldingConst); - out[i] = node_map_->GetNode(const_name); - if (!out[i]) { - out[i] = graph_.add_node(); - *out[i] = CreateNodeDef(const_name, TensorValue(&empty)); - out[i]->set_device(node.device()); - node_map_->AddNode(const_name, out[i]); + strings::StrCat(node.name(), "-", j), kConstantFoldingConst); + out[j] = node_map_->GetNode(const_name); + if (!out[j]) { + out[j] = graph_.add_node(); + *out[j] = CreateNodeDef(const_name, TensorValue(&value)); + out[j]->set_device(node.device()); + node_map_->AddNode(const_name, out[j]); string ctrl_dep = AddControlDependency(node.name(), &graph_, node_map_.get()); - *out[i]->add_input() = ctrl_dep; + *out[j]->add_input() = ctrl_dep; node_map_->AddOutput(NodeName(ctrl_dep), const_name); } } + } - auto outputs = node_map_->GetOutputs(node.name()); - for (const auto& output : outputs) { - for (int k = 0; k < output->input_size(); ++k) { - int port; - string node_name = ParseNodeName(output->input(k), &port); - if (node_name == node.name() && port >= 0 && port < 2) { - *output->mutable_input(k) = out[port]->name(); - node_map_->UpdateInput(output->name(), node_name, - out[port]->name()); - } + auto outputs = node_map_->GetOutputs(node.name()); + for (const auto& output : outputs) { + for (int k = 0; k < output->input_size(); ++k) { + int port; + string node_name = ParseNodeName(output->input(k), &port); + if (node_name == node.name() && port >= 0 && port < 2 && out[port]) { + *output->mutable_input(k) = out[port]->name(); + node_map_->UpdateInput(output->name(), node_name, out[port]->name()); } } } @@ -1005,15 +1071,13 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, GraphProperties properties(item); Status s = properties.InferStatically(); bool has_feed = !item.feed.empty(); - // bool has_feed = false; + if (!has_feed && s.ok()) { // Only use static shape information when there is no feed in the // graph. That's because it's possible to feed a placeholder with a tensor // of any shape, which could make the static information inconsistent with // the shapes actually fed. - if (s.ok()) { - TF_RETURN_IF_ERROR(MaterializeShapes(item, properties)); - } + TF_RETURN_IF_ERROR(MaterializeShapes(item, properties)); } if (opt_level_ == RewriterConfig::AGGRESSIVE && s.ok()) { TF_RETURN_IF_ERROR(MaterializeConstants(item, properties)); @@ -1040,12 +1104,14 @@ Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, GrapplerItem item_to_optimize = item; *output = item.graph; + int64 node_count; do { graph_.Swap(output); item_to_optimize.graph = graph_; *output = GraphDef(); + node_count = graph_.node_size(); TF_RETURN_IF_ERROR(RunOptimizationPass(cluster, item_to_optimize, output)); - } while (output->node_size() < graph_.node_size()); + } while (output->node_size() != node_count); *output->mutable_library() = item.graph.library(); *output->mutable_versions() = item.graph.versions(); diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 17f9854b59..43f84b1ddf 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -849,10 +849,18 @@ TEST_F(ConstantFoldingTest, ConstantMaterialization) { Output c = ops::Mul(s.WithOpName("c"), a, b); Output d = ops::Shape(s.WithOpName("d"), a); Output e = ops::Shape(s.WithOpName("e"), b); + auto f = ops::internal::BroadcastGradientArgs(s.WithOpName("f"), d, e); Output o1 = ops::Identity(s.WithOpName("o1"), f.r0); Output o2 = ops::Identity(s.WithOpName("o2"), f.r1); + Output g = ops::Placeholder(s.WithOpName("g"), DT_FLOAT, + ops::Placeholder::Shape(PartialTensorShape({1}))); + Output h = ops::Shape(s.WithOpName("h"), g); + auto i = ops::internal::BroadcastGradientArgs(s.WithOpName("i"), d, h); + Output p1 = ops::Identity(s.WithOpName("p1"), i.r0); + Output p2 = ops::Identity(s.WithOpName("p2"), i.r1); + GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); @@ -881,14 +889,33 @@ TEST_F(ConstantFoldingTest, ConstantMaterialization) { EXPECT_EQ("Const", node.op()); EXPECT_EQ(1, node.input_size()); EXPECT_EQ("^f", node.input(0)); + EXPECT_EQ(0, TensorShape(node.attr().at("value").tensor().tensor_shape()) + .num_elements()); } else if (node.name() == "ConstantFolding/f-1") { ++found; EXPECT_EQ("Const", node.op()); EXPECT_EQ(1, node.input_size()); EXPECT_EQ("^f", node.input(0)); + EXPECT_EQ(0, TensorShape(node.attr().at("value").tensor().tensor_shape()) + .num_elements()); + } else if (node.name() == "p1") { + ++found; + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("ConstantFolding/i-0", node.input(0)); + } else if (node.name() == "p2") { + ++found; + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("i:1", node.input(0)); + } else if (node.name() == "ConstantFolding/i-0") { + ++found; + EXPECT_EQ("Const", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("^i", node.input(0)); + EXPECT_EQ(0, TensorShape(node.attr().at("value").tensor().tensor_shape()) + .num_elements()); } } - EXPECT_EQ(4, found); + EXPECT_EQ(7, found); } } // namespace diff --git a/tensorflow/core/util/bcast.cc b/tensorflow/core/util/bcast.cc index 47e6ddb3d8..1eab7e3d02 100644 --- a/tensorflow/core/util/bcast.cc +++ b/tensorflow/core/util/bcast.cc @@ -68,9 +68,7 @@ BCast::BCast(const Vec& sx, const Vec& sy, const bool fewer_dims_optimization) { // Output shape. State curr = UNKNOWN; const int64 x_i = x[i]; // i-th dimension of x. - CHECK_GE(x_i, 0); const int64 y_i = y[i]; // i-th dimension of y. - CHECK_GE(y_i, 0); int64 o_i; // i-th dimension of the output. int64 bx_i; // i-th broadcast for x. int64 by_i; // i-th broadcast for y. -- GitLab From cbcd08fcc56f2a871b4dcb3b15ec111dc3e7ebf3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 16:11:25 -0800 Subject: [PATCH 0251/1801] Added some additional documentation to the swish() function PiperOrigin-RevId: 175228315 --- tensorflow/python/ops/nn_impl.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 2c83e4e29f..431ea1186a 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -275,9 +275,6 @@ def _swish_shape(op): return [op.inputs[0].shape] -# Set noinline=True so that sigmoid(features) is re-computed during -# backprop, and we can free the sigmoid(features) expression immediately -# after use during the forward pass. @function.Defun(shape_func=_swish_shape, func_name="swish_grad", noinline=True) def _swish_grad(features, grad): """Gradient of Swish function defined below.""" @@ -287,6 +284,11 @@ def _swish_grad(features, grad): return grad * activation_grad +# Naively, x * tf.nn.sigmoid(x) requires keeping both x and sigmoid(x) around +# for backprop, effectively doubling the tensor's memory consumption. We use a +# @Defun decorator with noinline=True so that sigmoid(features) is re-computed +# during backprop, and we can free the sigmoid(features) expression immediately +# after use during the forward pass. @function.Defun( grad_func=_swish_grad, shape_func=_swish_shape, @@ -296,7 +298,7 @@ def swish(features): # pylint: disable=g-doc-args """Computes the Swish activation function: `x * sigmoid(x)`. - Source: "Swish: a Self-Gated Activation Function" (Ramachandran et al. 2017) + Source: "Searching for Activation Functions" (Ramachandran et al. 2017) https://arxiv.org/abs/1710.05941 Args: -- GitLab From 8c557f9358f8cc003cba7e8b43f99a98ae7c8a52 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 16:23:43 -0800 Subject: [PATCH 0252/1801] Always push updated nodes to the queue for possible further optimization. PiperOrigin-RevId: 175229944 --- .../optimizers/arithmetic_optimizer.cc | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 44d16e5a42..f2277a9b79 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -703,7 +703,6 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( node_map->AddOutput(new_transpose->name(), new_cast->name()); new_nodes->push_back(new_transpose); - new_nodes->push_back(new_cast); // Add frame dependencies that the original node might have had. AddFrameControlDeps(node, {new_transpose, new_cast}, new_transpose->input(0), {new_transpose}, @@ -880,7 +879,6 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( new_mul_node->set_device(node->device()); SetDataTypeToAttr(type, "T", new_mul_node); node_map->AddNode(new_mul_node->name(), new_mul_node); - new_nodes->push_back(new_mul_node); new_mul_node->add_input(new_const_node->name()); node_map->AddOutput(new_const_node->name(), new_mul_node->name()); new_mul_node->add_input(node->input(0)); @@ -945,7 +943,6 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( new_mul_node->set_name(new_mul_node->name() + "_hoist"); new_mul_node->set_input(0, common_factor); new_mul_node->set_input(1, new_add_node->name()); - new_nodes->push_back(new_mul_node); node_map->AddNode(new_mul_node->name(), new_mul_node); } } @@ -1045,10 +1042,14 @@ namespace { template class SetVector { public: - void PushBack(const T& value) { - CHECK(!Exists(value)) << "Value " << value << " is already in the set."; - set_.insert(value); + // Returns false if value already existed in the set, true otherwise. + bool PushBack(const T& value) { + if (!set_.insert(value).second) { + VLOG(2) << "Value " << value << " is already in the set."; + return false; + } vector_.push_back(value); + return true; } T PopBack() { @@ -1089,6 +1090,11 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps( } if (NodeName(simplified_tensor) != node->name()) { + // Always consider simplified_tensor for further optimizations. + const NodeDef* simplified_node = node_map.GetNode(simplified_tensor); + if (simplified_node != nullptr) { + nodes_to_simplify.PushBack(simplified_node); + } // When `node` is simplifed to another node rather than in-place, the // consumers of `node` are already redirected to `simplified_tensor`. // Re-push the consumers into `nodes_to_simplify` for further -- GitLab From 9e6d4d4426a86a675dcdfdb26bd9de6d5a12196c Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Thu, 9 Nov 2017 16:26:07 -0800 Subject: [PATCH 0253/1801] Increase tolerance in flaky multinomial test. PiperOrigin-RevId: 175230217 --- .../python/kernel_tests/distributions/multinomial_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/kernel_tests/distributions/multinomial_test.py b/tensorflow/python/kernel_tests/distributions/multinomial_test.py index 614a34f077..ebc89f15c5 100644 --- a/tensorflow/python/kernel_tests/distributions/multinomial_test.py +++ b/tensorflow/python/kernel_tests/distributions/multinomial_test.py @@ -283,10 +283,10 @@ class MultinomialTest(test.TestCase): dist.variance(), dist.stddev(), ]) - self.assertAllClose(sample_mean_, analytic_mean, atol=0., rtol=0.01) - self.assertAllClose(sample_cov_, analytic_cov, atol=0., rtol=0.01) - self.assertAllClose(sample_var_, analytic_var, atol=0., rtol=0.01) - self.assertAllClose(sample_stddev_, analytic_stddev, atol=0., rtol=0.01) + self.assertAllClose(sample_mean_, analytic_mean, atol=0.01, rtol=0.01) + self.assertAllClose(sample_cov_, analytic_cov, atol=0.01, rtol=0.01) + self.assertAllClose(sample_var_, analytic_var, atol=0.01, rtol=0.01) + self.assertAllClose(sample_stddev_, analytic_stddev, atol=0.01, rtol=0.01) def testSampleUnbiasedNonScalarBatch(self): with self.test_session() as sess: -- GitLab From 50ff8aa814a611de1da266911415146ecae549f7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 16:35:57 -0800 Subject: [PATCH 0254/1801] Support more instructions in Hlo parser: Broadcast, Concatenate, Map, Reduce, SelectAndScatter, Reverse, Slice, DynamicSlice, DynamicUpdateSlice, Transpose, BatchNormTraining, BatchNormInference, BatchNormGrad. PiperOrigin-RevId: 175231463 --- .../compiler/xla/service/hlo_instruction.cc | 12 +- .../compiler/xla/tools/parser/hlo_parser.cc | 314 +++++++++++++++++- .../xla/tools/parser/hlo_parser_test.cc | 231 ++++++++++++- 3 files changed, 540 insertions(+), 17 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index ffb933155f..1b2161fc2e 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1891,7 +1891,7 @@ std::vector HloInstruction::ExtraAttributesToString() const { if (padding_config_ != nullptr) { extra.push_back(StrCat("padding=", padding_config_->ShortDebugString())); } - if (!slice_starts_.empty() && !slice_limits_.empty()) { + if (opcode() == HloOpcode::kSlice) { std::vector bounds; bounds.reserve(slice_starts_.size()); const bool omit_stride = @@ -1904,6 +1904,16 @@ std::vector HloInstruction::ExtraAttributesToString() const { } extra.push_back(StrCat("slice={", Join(bounds, ", "), "}")); } + if (opcode() == HloOpcode::kDynamicSlice) { + extra.push_back( + StrCat("dynamic_slice_sizes={", Join(dynamic_slice_sizes(), ","), "}")); + } + if (opcode() == HloOpcode::kBatchNormTraining || + opcode() == HloOpcode::kBatchNormInference || + opcode() == HloOpcode::kBatchNormGrad) { + extra.push_back(StrCat("epsilon=", epsilon())); + extra.push_back(StrCat("feature_index=", feature_index())); + } if (convolution_dimension_numbers_ != nullptr) { extra.push_back(ConvolutionDimensionNumbersToString()); diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index 3741c3daac..710877b4e0 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -80,14 +80,25 @@ class HloParser { bool ParseOperands(std::vector* operands, const int expected_size); + // Describes the start, limit, and stride on every dimension of the operand + // being sliced. + struct SliceRanges { + std::vector starts; + std::vector limits; + std::vector strides; + }; + // Types of attributes. enum class AttrTy { kInt64, + kFloat, + kBracedInt64List, kHloComputation, kWindow, kConvolutionDimensionNumbers, kSharding, kInstructionList, + kSliceRanges, }; struct AttrConfig { @@ -131,6 +142,10 @@ class HloParser { // Parses window's pad sub-attriute, e.g., pad=0_0x3x3. bool ParseWindowPad(std::vector>* pad); + bool ParseSliceRanges(SliceRanges* result); + bool ParseInt64List(const TokKind start, const TokKind end, + const TokKind delim, std::vector* result); + bool ParseParamList(); bool ParseName(string* result); bool ParseAttributeName(string* result); @@ -535,26 +550,190 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, shape, /*lhs=*/operands[0], /*rhs=*/operands[1], *window, *dnums)); break; } - case HloOpcode::kBroadcast: + case HloOpcode::kBroadcast: { + optional> broadcast_dimensions; + attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, + &broadcast_dimensions}; + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreateBroadcast( + shape, operands[0], *broadcast_dimensions)); + break; + } + case HloOpcode::kConcatenate: { + optional> dimensions; + attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, + &dimensions}; + if (!ParseOperands(&operands) || !ParseAttributes(attrs) || + dimensions->size() != 1) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreateConcatenate( + shape, operands, dimensions->at(0))); + break; + } + case HloOpcode::kMap: { + optional to_apply; + attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation, + &to_apply}; + if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction( + HloInstruction::CreateMap(shape, operands, *to_apply)); + break; + } + case HloOpcode::kReduce: { + optional reduce_computation; + attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation, + &reduce_computation}; + optional> dimensions_to_reduce; + attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, + &dimensions_to_reduce}; + if (!ParseOperands(&operands, /*expected_size=*/2) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreateReduce( + shape, /*operand=*/operands[0], /*init_value=*/operands[1], + *dimensions_to_reduce, *reduce_computation)); + break; + } + case HloOpcode::kReverse: { + optional> dimensions; + attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, + &dimensions}; + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction( + HloInstruction::CreateReverse(shape, operands[0], *dimensions)); + break; + } + case HloOpcode::kSelectAndScatter: { + optional select; + attrs["select"] = {/*required=*/true, AttrTy::kHloComputation, &select}; + optional scatter; + attrs["scatter"] = {/*required=*/true, AttrTy::kHloComputation, &scatter}; + optional window; + attrs["window"] = {/*required=*/true, AttrTy::kWindow, &window}; + if (!ParseOperands(&operands, /*expected_size=*/3) || + !ParseAttributes(attrs)) { + return false; + } + instruction = + builder->AddInstruction(HloInstruction::CreateSelectAndScatter( + shape, /*operand=*/operands[0], *select, *window, + /*source=*/operands[1], /*init_value=*/operands[2], *scatter)); + break; + } + case HloOpcode::kSlice: { + optional slice_ranges; + attrs["slice"] = {/*required=*/true, AttrTy::kSliceRanges, &slice_ranges}; + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreateSlice( + shape, operands[0], slice_ranges->starts, slice_ranges->limits, + slice_ranges->strides)); + break; + } + case HloOpcode::kDynamicSlice: { + optional> dynamic_slice_sizes; + attrs["dynamic_slice_sizes"] = { + /*required=*/true, AttrTy::kBracedInt64List, &dynamic_slice_sizes}; + if (!ParseOperands(&operands, /*expected_size=*/2) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreateDynamicSlice( + shape, /*operand=*/operands[0], /*start_indices=*/operands[1], + *dynamic_slice_sizes)); + break; + } + case HloOpcode::kDynamicUpdateSlice: { + if (!ParseOperands(&operands, /*expected_size=*/3) || + !ParseAttributes(attrs)) { + return false; + } + instruction = + builder->AddInstruction(HloInstruction::CreateDynamicUpdateSlice( + shape, /*operand=*/operands[0], /*update=*/operands[1], + /*start_indices=*/operands[2])); + break; + } + case HloOpcode::kTranspose: { + optional> dimensions; + attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, + &dimensions}; + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction( + HloInstruction::CreateTranspose(shape, operands[0], *dimensions)); + break; + } + case HloOpcode::kBatchNormTraining: { + optional epsilon; + attrs["epsilon"] = {/*required=*/true, AttrTy::kFloat, &epsilon}; + optional feature_index; + attrs["feature_index"] = {/*required=*/true, AttrTy::kInt64, + &feature_index}; + if (!ParseOperands(&operands, /*expected_size=*/3) || + !ParseAttributes(attrs)) { + return false; + } + instruction = + builder->AddInstruction(HloInstruction::CreateBatchNormTraining( + shape, /*operand=*/operands[0], /*scale=*/operands[1], + /*offset=*/operands[2], *epsilon, *feature_index)); + break; + } + case HloOpcode::kBatchNormInference: { + optional epsilon; + attrs["epsilon"] = {/*required=*/true, AttrTy::kFloat, &epsilon}; + optional feature_index; + attrs["feature_index"] = {/*required=*/true, AttrTy::kInt64, + &feature_index}; + if (!ParseOperands(&operands, /*expected_size=*/5) || + !ParseAttributes(attrs)) { + return false; + } + instruction = + builder->AddInstruction(HloInstruction::CreateBatchNormInference( + shape, /*operand=*/operands[0], /*scale=*/operands[1], + /*offset=*/operands[2], /*mean=*/operands[3], + /*variance=*/operands[4], *epsilon, *feature_index)); + break; + } + case HloOpcode::kBatchNormGrad: { + optional epsilon; + attrs["epsilon"] = {/*required=*/true, AttrTy::kFloat, &epsilon}; + optional feature_index; + attrs["feature_index"] = {/*required=*/true, AttrTy::kInt64, + &feature_index}; + if (!ParseOperands(&operands, /*expected_size=*/5) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreateBatchNormGrad( + shape, /*operand=*/operands[0], /*scale=*/operands[1], + /*mean=*/operands[2], /*variance=*/operands[3], + /*grad_output=*/operands[4], *epsilon, *feature_index)); + break; + } case HloOpcode::kCustomCall: - case HloOpcode::kConcatenate: case HloOpcode::kReducePrecision: - case HloOpcode::kMap: case HloOpcode::kPad: - case HloOpcode::kReduce: - case HloOpcode::kSelectAndScatter: - case HloOpcode::kReverse: case HloOpcode::kRng: - case HloOpcode::kSlice: - case HloOpcode::kDynamicSlice: - case HloOpcode::kDynamicUpdateSlice: - case HloOpcode::kTranspose: case HloOpcode::kFusion: - case HloOpcode::kBatchNormTraining: - case HloOpcode::kBatchNormInference: case HloOpcode::kInfeed: case HloOpcode::kOutfeed: - case HloOpcode::kBatchNormGrad: case HloOpcode::kTrace: return TokenError(StrCat("parsing not yet implemented for op: ", HloOpcodeString(opcode))); @@ -1121,6 +1300,19 @@ bool HloParser::ParseAttributes( static_cast*>(attr_out_ptr)->emplace(result); return true; } + case AttrTy::kFloat: { + double result; + if (!ParseDouble(&result)) { + return false; + } + if (result > std::numeric_limits::max() || + result < std::numeric_limits::lowest()) { + return TokenError("value out of range for float"); + } + static_cast*>(attr_out_ptr) + ->emplace(static_cast(result)); + return true; + } case AttrTy::kHloComputation: { HloComputation* result; if (!ParseComputationName(&result)) { @@ -1164,6 +1356,24 @@ bool HloParser::ParseAttributes( ->emplace(result); return true; } + case AttrTy::kBracedInt64List: { + std::vector result; + if (!ParseInt64List(TokKind::kLbrace, TokKind::kRbrace, + TokKind::kComma, &result)) { + return false; + } + static_cast>*>(attr_out_ptr) + ->emplace(result); + return true; + } + case AttrTy::kSliceRanges: { + SliceRanges result; + if (!ParseSliceRanges(&result)) { + return false; + } + static_cast*>(attr_out_ptr)->emplace(result); + return true; + } } }(); if (!success) { @@ -1380,6 +1590,84 @@ bool HloParser::ParseConvolutionDimensionNumbers( return true; } +// ::= '{' ranges '}' +// ::= /*empty*/ +// ::= range (',' range)* +// range ::= '[' start ':' limit (':' stride)? ']' +// +// The slice ranges are printed as: +// +// {[dim0_start:dim0_limit:dim0stride], [dim1_start:dim1_limit], ...} +// +// This function extracts the starts, limits, and strides as 3 vectors to the +// result. If stride is not present, stride is 1. For example, if the slice +// ranges is printed as: +// +// {[2:3:4], [5:6:7], [8:9]} +// +// The the parsed result will be: +// +// {/*starts=*/{2, 5, 8}, /*limits=*/{3, 6, 9}, /*strides=*/{4, 7, 1}} +// +bool HloParser::ParseSliceRanges(SliceRanges* result) { + if (!ParseToken(TokKind::kLbrace, "expects '{' to start ranges")) { + return false; + } + std::vector> ranges; + if (lexer_.GetKind() == TokKind::kRbrace) { + // empty + return ParseToken(TokKind::kRbrace, "expects '}' to end ranges"); + } + do { + ranges.emplace_back(); + if (!ParseInt64List(TokKind::kLsquare, TokKind::kRsquare, TokKind::kColon, + &ranges.back())) { + return false; + } + } while (EatIfPresent(TokKind::kComma)); + + for (const auto& range : ranges) { + if (range.size() != 2 && range.size() != 3) { + return TokenError(Printf( + "expects [start:limit:step] or [start:limit], but sees %ld elements.", + range.size())); + } + } + + for (const auto& range : ranges) { + result->starts.push_back(range[0]); + result->limits.push_back(range[1]); + result->strides.push_back(range.size() == 3 ? range[2] : 1); + } + return ParseToken(TokKind::kRbrace, "expects '}' to end ranges"); +} + +// int64list ::= start int64_elements end +// int64_elements +// ::= /*empty*/ +// ::= int64_val (delim int64_val)* +bool HloParser::ParseInt64List(const TokKind start, const TokKind end, + const TokKind delim, + std::vector* result) { + if (!ParseToken(start, StrCat("expects an int64 list starting with ", + TokKindToString(start)))) { + return false; + } + if (lexer_.GetKind() == end) { + // empty + } else { + do { + int64 i; + if (!ParseInt64(&i)) { + return false; + } + result->push_back(i); + } while (EatIfPresent(delim)); + } + return ParseToken( + end, StrCat("expects an int64 list to end with ", TokKindToString(end))); +} + // param_list ::= '(' param_list1 ')' // param_list1 // ::= /*empty*/ diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index ca476a4bb7..fbe0409e3d 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -44,10 +44,11 @@ std::vector CreateTestCases() { "AxpyParam", R"(HloModule axpy_module: -ENTRY %axpy.v5 (alpha: f32[2,4], x: f32[2,4], y: f32[2,4]) -> f32[2,4] { - %alpha = f32[2,4]{1,0} parameter(0) +ENTRY %axpy.v5 (alpha: f32[], x: f32[2,4], y: f32[2,4]) -> f32[2,4] { + %alpha = f32[] parameter(0) + %broadcast = f32[2,4]{1,0} broadcast(f32[] %alpha), dimensions={} %x = f32[2,4]{1,0} parameter(1) - %multiply = f32[2,4]{1,0} multiply(f32[2,4]{1,0} %alpha, f32[2,4]{1,0} %x) + %multiply = f32[2,4]{1,0} multiply(f32[2,4]{1,0} %broadcast, f32[2,4]{1,0} %x) %y = f32[2,4]{1,0} parameter(2) ROOT %add = f32[2,4]{1,0} add(f32[2,4]{1,0} %multiply, f32[2,4]{1,0} %y) } @@ -296,6 +297,218 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2 ROOT %convolution = f32[1,2,1]{2,0,1} convolution(f32[1,2,1]{2,0,1} %copy, f32[1,1,1]{2,1,0} %filter), window={size=1}, dim_labels=b0f_0io->b0f } +)" +}, +// reverse(constant) +{ +"Reverse4D", +R"(HloModule Reverse4DFloatArrayOnDim01_module: + +ENTRY %Reverse4DFloatArrayOnDim01.v2 () -> f32[4,3,2,1] { + %constant = f32[4,3,2,1]{0,1,2,3} constant(f32[4,3,2,1] { { /*i0=0*/ { /*i1=0*/ {1}, {2} }, { /*i1=1*/ {3}, {4} }, { /*i1=2*/ {5}, {6} } }, { /*i0=1*/ { /*i1=0*/ {7}, {8} }, { /*i1=1*/ {9}, {10} }, { /*i1=2*/ {11}, {12} } }, { /*i0=2*/ { /*i1=0*/ {13}, {14} }, { /*i1=1*/ {15}, {16} }, { /*i1=2*/ {17}, {18} } }, { /*i0=3*/ { /*i1=0*/ {19}, {20} }, { /*i1=1*/ {21}, {22} }, { /*i1=2*/ {23}, {24} } } }) + ROOT %reverse = f32[4,3,2,1]{0,1,2,3} reverse(f32[4,3,2,1]{0,1,2,3} %constant), dimensions={0,1} +} + +)" +}, +// concat +{ +"Concat", +R"(HloModule Concat2x3With2x5_module: + +ENTRY %Concat2x3With2x5.v3 () -> f32[2,8] { + %constant = f32[2,3]{1,0} constant(f32[2,3] { { 0, 1, 2 }, { 1000, 1001, 1002 } }) + %constant.1 = f32[2,5]{1,0} constant(f32[2,5] { { 64, 65, 66, 67, 68 }, { 1064, 1065, 1066, 1067, 1068 } }) + ROOT %concatenate = f32[2,8]{1,0} concatenate(f32[2,3]{1,0} %constant, f32[2,5]{1,0} %constant.1), dimensions={1} +} + +)" +}, +// map +{ +"Map", +R"(HloModule MapBinaryAdder_module: + +%add_F32.v3 (lhs: f32[], rhs: f32[]) -> f32[] { + %lhs = f32[] parameter(0) + %rhs = f32[] parameter(1) + ROOT %add = f32[] add(f32[] %lhs, f32[] %rhs) +} + +ENTRY %MapBinaryAdder.v3 (param0: f32[4], param1: f32[4]) -> f32[4] { + %param0 = f32[4]{0} parameter(0) + %param1 = f32[4]{0} parameter(1) + ROOT %map = f32[4]{0} map(f32[4]{0} %param0, f32[4]{0} %param1), to_apply=%add_F32.v3 +} + +)" +}, +// reduce +{ +"Reduce", +R"(HloModule ReduceR3ToR2_module: + +%add_F32.v3 (lhs: f32[], rhs: f32[]) -> f32[] { + %lhs = f32[] parameter(0) + %rhs = f32[] parameter(1) + ROOT %add = f32[] add(f32[] %lhs, f32[] %rhs) +} + +ENTRY %ReduceR3ToR2.v3 (input: f32[8,16,256]) -> f32[8,16] { + %input = f32[8,16,256]{2,1,0} parameter(0) + %constant = f32[] constant(0) + ROOT %reduce = f32[8,16]{1,0} reduce(f32[8,16,256]{2,1,0} %input, f32[] %constant), dimensions={2}, to_apply=%add_F32.v3 +} + +)" +}, +// select and scatter +{ +"SelectAndScatter", +R"(HloModule R4F32OverlapSmall_module: + +%ge_F32.v3 (lhs: f32[], rhs: f32[]) -> pred[] { + %lhs = f32[] parameter(0) + %rhs = f32[] parameter(1) + ROOT %greater-than-or-equal-to = pred[] greater-than-or-equal-to(f32[] %lhs, f32[] %rhs) +} + +%add_F32.v3 (lhs.1: f32[], rhs.1: f32[]) -> f32[] { + %lhs.1 = f32[] parameter(0) + %rhs.1 = f32[] parameter(1) + ROOT %add = f32[] add(f32[] %lhs.1, f32[] %rhs.1) +} + +ENTRY %R4F32OverlapSmall.v4 () -> f32[4,5,1,1] { + %constant = f32[4,5,1,1]{3,2,1,0} constant(f32[4,5,1,1] { { /*i0=0*/ { /*i1=0*/ {7} }, { /*i1=1*/ {2} }, { /*i1=2*/ {5} }, { /*i1=3*/ {3} }, { /*i1=4*/ {8} } }, { /*i0=1*/ { /*i1=0*/ {3} }, { /*i1=1*/ {8} }, { /*i1=2*/ {9} }, { /*i1=3*/ {3} }, { /*i1=4*/ {4} } }, { /*i0=2*/ { /*i1=0*/ {1} }, { /*i1=1*/ {5} }, { /*i1=2*/ {7} }, { /*i1=3*/ {5} }, { /*i1=4*/ {6} } }, { /*i0=3*/ { /*i1=0*/ {0} }, { /*i1=1*/ {6} }, { /*i1=2*/ {2} }, { /*i1=3*/ {10} }, { /*i1=4*/ {2} } } }) + %constant.1 = f32[2,2,1,1]{3,2,1,0} constant(f32[2,2,1,1] { { /*i0=0*/ { /*i1=0*/ {2} }, { /*i1=1*/ {6} } }, { /*i0=1*/ { /*i1=0*/ {3} }, { /*i1=1*/ {1} } } }) + %constant.2 = f32[] constant(0) + ROOT %select-and-scatter = f32[4,5,1,1]{3,2,1,0} select-and-scatter(f32[4,5,1,1]{3,2,1,0} %constant, f32[2,2,1,1]{3,2,1,0} %constant.1, f32[] %constant.2), window={size=2x3x1x1 stride=2x2x1x1}, select=%ge_F32.v3, scatter=%add_F32.v3 +} + +)" +}, +// slice +{ +"Slice", +R"(HloModule slice_module: + +ENTRY %slice.v2 (p0: f32[3,3,4,4]) -> f32[3,3,2,4] { + %p0 = f32[3,3,4,4]{3,2,1,0} parameter(0) + ROOT %slice = f32[3,3,2,4]{3,2,1,0} slice(f32[3,3,4,4]{3,2,1,0} %p0), slice={[0:3:1], [0:3:1], [0:4:2], [0:4:1]} +} + +)" +}, +// slice, no stride +{ +"SliceNoStride", +R"(HloModule Slice3x3x3_To_1x3x3_F32_module: + +ENTRY %Slice3x3x3_To_1x3x3_F32.v2 () -> f32[1,3,3] { + %constant = f32[3,3,3]{2,1,0} constant(f32[3,3,3] { { { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 } }, { { 9, 10, 11 }, { 12, 13, 14 }, { 15, 16, 17 } }, { { 18, 19, 20 }, { 21, 22, 23 }, { 24, 25, 26 } } }) + ROOT %slice = f32[1,3,3]{2,1,0} slice(f32[3,3,3]{2,1,0} %constant), slice={[0:1], [0:3], [0:3]} +} + +)" +}, +// slice R0 +{ +"SliceR0", +R"(HloModule SliceR0_module: + +ENTRY %SliceR0.v2 () -> s32[] { + %constant = s32[] constant(1) + ROOT %slice = s32[] slice(s32[] %constant), slice={} +} + +)" +}, +// transpose +{ +"Transpose", +R"(HloModule Transpose_module: + +ENTRY %Transpose.v2 () -> s32[1,2,3] { + %constant = s32[1,2,3]{2,1,0} constant(s32[1,2,3] { { { 1, 2, 3 }, { 4, 5, 6 } } }) + ROOT %transpose = s32[1,2,3]{2,1,0} transpose(s32[1,2,3]{2,1,0} %constant), dimensions={0,1,2} +} + +)" +}, +// Dynamic slice +{ +"DynamicSlice", +R"(HloModule DynamicSlice_module: + +ENTRY %DynamicSlice.v5 (original_parameter: s32[2,2,258], start_index: s32[1]) -> s32[2,2,258] { + %original_parameter = s32[2,2,258]{2,1,0} parameter(0) + %constant = s32[1]{0} constant({0}) + %start_index = s32[1]{0} parameter(1) + %concatenate = s32[3]{0} concatenate(s32[1]{0} %constant, s32[1]{0} %constant, s32[1]{0} %start_index), dimensions={0} + ROOT %dynamic-slice = s32[2,2,258]{2,1,0} dynamic-slice(s32[2,2,258]{2,1,0} %original_parameter, s32[3]{0} %concatenate), dynamic_slice_sizes={2,2,258} +} + +)" +}, +// Dynamic update slice +{ +"DynamicUpdateSlice", +R"(HloModule DynamicUpdateSlice_module: + +ENTRY %DynamicUpdateSlice.v4 (input: s32[1,1,25,1], update: s32[1,1,2,1], start_indices: s32[4]) -> s32[1,1,25,1] { + %input = s32[1,1,25,1]{3,2,1,0} parameter(0) + %update = s32[1,1,2,1]{3,2,1,0} parameter(1) + %start_indices = s32[4]{0} parameter(2) + ROOT %dynamic-update-slice = s32[1,1,25,1]{3,2,1,0} dynamic-update-slice(s32[1,1,25,1]{3,2,1,0} %input, s32[1,1,2,1]{3,2,1,0} %update, s32[4]{0} %start_indices) +} + +)" +}, +// batch norm training +{ +"BatchNormTraining", +R"(HloModule BasicTraining_module: + +ENTRY %BasicTraining.v4 () -> (f32[2,2,1,2], f32[2], f32[2]) { + %constant = f32[2,2,1,2]{3,2,1,0} constant(f32[2,2,1,2] { { /*i0=0*/ { /*i1=0*/ {1, 2} }, { /*i1=1*/ {3, 4} } }, { /*i0=1*/ { /*i1=0*/ {5, 6} }, { /*i1=1*/ {7, 8} } } }) + %constant.1 = f32[2]{0} constant({2, 3}) + %constant.2 = f32[2]{0} constant({1, 2}) + ROOT %batch-norm-training = (f32[2,2,1,2]{3,2,1,0}, f32[2]{0}, f32[2]{0}) batch-norm-training(f32[2,2,1,2]{3,2,1,0} %constant, f32[2]{0} %constant.1, f32[2]{0} %constant.2), epsilon=0.001, feature_index=3 +} + +)" +}, +// batch norm inference +{ +"BatchNormInference", +R"(HloModule BatchNormInference_module: + +ENTRY %BatchNormInference.v6 (input: f32[2,2,2,2], offset: f32[2], scale: f32[2], mean: f32[2], variance: f32[2]) -> f32[2,2,2,2] { + %input = f32[2,2,2,2]{3,2,1,0} parameter(0) + %offset = f32[2]{0} parameter(1) + %scale = f32[2]{0} parameter(2) + %mean = f32[2]{0} parameter(3) + %variance = f32[2]{0} parameter(4) + ROOT %batch-norm-inference = f32[2,2,2,2]{3,2,1,0} batch-norm-inference(f32[2,2,2,2]{3,2,1,0} %input, f32[2]{0} %offset, f32[2]{0} %scale, f32[2]{0} %mean, f32[2]{0} %variance), epsilon=0.001, feature_index=0 +} + +)" +}, +// batch norm grad +{ +"BatchNormGrad", +R"(HloModule BatchNormGrad_module: + +ENTRY %BatchNormGrad.v4 (input: f32[2,2,2,2], scale: f32[2], mean: f32[2], variance: f32[2], grad_output: f32[2,2,2,2]) -> (f32[2,2,2,2], f32[2], f32[2]) { + %input = f32[2,2,2,2]{3,2,1,0} parameter(0) + %scale = f32[2]{0} parameter(1) + %mean = f32[2]{0} parameter(2) + %variance = f32[2]{0} parameter(3) + %grad_output = f32[2,2,2,2]{3,2,1,0} parameter(4) + ROOT %batch-norm-grad = (f32[2,2,2,2]{3,2,1,0}, f32[2]{0}, f32[2]{0}) batch-norm-grad(f32[2,2,2,2]{3,2,1,0} %input, f32[2]{0} %scale, f32[2]{0} %mean, f32[2]{0} %variance, f32[2,2,2,2]{3,2,1,0} %grad_output), epsilon=0.001, feature_index=0 +} + )" } }); @@ -568,6 +781,18 @@ ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { "'done' is not defined"); } +TEST_F(HloParserTest, SliceAllowOmitStride1) { + const string original = R"(HloModule slice_module: + +ENTRY %slice.v2 (p0: f32[3,3,4,4]) -> f32[3,3,2,4] { + %p0 = f32[3,3,4,4]{3,2,1,0} parameter(0) + ROOT %slice = f32[3,3,2,4]{3,2,1,0} slice(f32[3,3,4,4]{3,2,1,0} %p0), slice={[0:3], [0:3], [0:4:2], [0:4]} +} + +)"; + TF_EXPECT_OK(Parse(original).status()); +} + } // namespace } // namespace tools } // namespace xla -- GitLab From be4b984747402c224d9aa3687e94db3da972ecf4 Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Thu, 9 Nov 2017 16:40:22 -0800 Subject: [PATCH 0255/1801] De-flakify and re-enable tests in replicate_model_fn_test.py. "Reduce metric variables" operation is a single operation across all metric variables, which means it is across all eval metrics. Previously, an update op for every eval metric was conditioned on a copy of overall "reduce metric variables" op. The latter was meant to be idempotent and thus the end result was supposed to be correct. However, "reduce metric variables" op consists of a number of variable assignments and thus is not atomic. If execution of two "reduce metric variables" ops interleaves, then the end result might come out to be incorrect. This caused flakiness in replicate_model_fn_test.py. To fix the problem, there is now a single copy of the "reduce metric variables" and every eval metric is associated with that single instance. PiperOrigin-RevId: 175232016 --- .../python/estimator/replicate_model_fn.py | 18 +-- .../estimator/replicate_model_fn_test.py | 108 +++++++++--------- 2 files changed, 58 insertions(+), 68 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py index 421bf18c45..0848c5f62f 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py @@ -357,25 +357,17 @@ def _eval_spec(tower_specs, aggregation_device, aggregated_loss_name='loss'): [spec.loss for spec in tower_specs], aggregation_device, aggregated_loss_name) - eval_metric_ops_lists = {} + update_ops = [] for tower_spec in tower_specs: - metrics = tower_spec.eval_metric_ops or {} - for name, (_, update_op) in six.iteritems(metrics): - update_ops = eval_metric_ops_lists.setdefault(name, ([])) + for name, (_, update_op) in six.iteritems(tower_spec.eval_metric_ops): update_ops.append(update_op) + with ops_lib.control_dependencies(update_ops): + reduced_update_op = _reduce_metric_variables(len(tower_specs)) + eval_metric_ops = {} for name, (metric_tensor, _) in six.iteritems(tower_specs[0].eval_metric_ops): - with ops_lib.control_dependencies(eval_metric_ops_lists[name]): - # This operation reduces local variables across all metrics, yet is - # called for every metric. This is redundant and it's done because - # it is hard to know what local variables correspond to what metric. - # Estimator is going to execute all `reduced_update_op`s as part of - # a group inside a single `Session.run()` call, which will avoid duplicate - # computation. - reduced_update_op = _reduce_metric_variables(len(tower_specs)) eval_metric_ops[name] = (metric_tensor, reduced_update_op) - estimator_spec['eval_metric_ops'] = eval_metric_ops return model_fn_lib.EstimatorSpec(**estimator_spec) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py index bb06700160..21d5a9c327 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py @@ -223,34 +223,34 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): features, labels, self.params) del estimator_spec -# TODO(isaprykin): Resolve the source of flakinness. -# def test_eval(self): -# features = np.array([[0.01], [0.002]]) -# labels = np.array([[0.01], [0.02]]) -# -# with self.test_session() as session: -# replicated_model_fn = replicate_model_fn.replicate_model_fn( -# self.model_fn, self.optimizer_fn, devices=['/gpu:0', '/gpu:1']) -# estimator_spec = replicated_model_fn(model_fn_lib.ModeKeys.EVAL, features, -# labels, self.params) -# session.run(variables.local_variables_initializer()) -# session.run(variables.global_variables_initializer()) -# -# accuracy, a = estimator_spec.eval_metric_ops['accuracy'] -# auc, b = estimator_spec.eval_metric_ops['auc'] -# -# session.run([a, b]) -# accuracy = session.run(accuracy) -# auc = session.run(auc) -# -# # Accuracy is 0.0 (no match) in the first tower. -# # Accuracy is 1.0 (match) in the second tower, since the feature -# # times weight "c" happened to be equal to the label. -# total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) -# -# self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01) -# self.assertEqual(0, auc) -# self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01) + def test_eval(self): + features = np.array([[0.01], [0.002]]) + labels = np.array([[0.01], [0.02]]) + + with self.test_session() as session: + replicated_model_fn = replicate_model_fn.replicate_model_fn( + self.model_fn, self.optimizer_fn, devices=['/gpu:0', '/gpu:1']) + estimator_spec = replicated_model_fn(model_fn_lib.ModeKeys.EVAL, features, + labels, self.params) + session.run(variables.local_variables_initializer()) + session.run(variables.global_variables_initializer()) + + accuracy, a = estimator_spec.eval_metric_ops['accuracy'] + auc, b = estimator_spec.eval_metric_ops['auc'] + + session.run([a, b]) + accuracy = session.run(accuracy) + auc = session.run(auc) + + # loss[i] = features[i] * 10 - labels[i]. + # Accuracy is 0.0 (no match) in the first tower. + # Accuracy is 1.0 (match) in the second tower, since the feature + # times weight "c" happened to be equal to the label. + total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) + + self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01) + self.assertEqual(0, auc) + self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01) def test_predict(self): features = np.array([[0.01], [0.002]]) @@ -524,33 +524,31 @@ class EvalSpecTest(test_util.TensorFlowTestCase): } return metrics -# TODO(isaprykin): Resolve the source of flakinness. -# def test_example(self): -# with self.test_session() as session: -# tower_losses = map(self.create_constant_loss, [2, 4, 6]) -# tower_metrics = map(self.create_eval_metrics, [0, 0.2, 0.3]) -# tower_specs = [ -# self.create_estimator_spec(l, m) -# for l, m in zip(tower_losses, tower_metrics) -# ] -# session.run(variables.local_variables_initializer()) -# -# estimator_spec = replicate_model_fn._eval_spec( -# tower_specs, aggregation_device='/device:GPU:0') -# -# accuracy, a = estimator_spec.eval_metric_ops['accuracy'] -# auc, b = estimator_spec.eval_metric_ops['auc'] -# -# self.assertEqual('/device:CPU:0', accuracy.device) -# self.assertEqual('/device:CPU:0', auc.device) -# -# session.run([a, b]) -# accuracy = session.run(accuracy) -# auc = session.run(auc) -# -# self.assertNear((12 - 2) / 12, accuracy, 0.01) -# self.assertEqual(0, auc) -# self.assertEqual(2 + 4 + 6, session.run(estimator_spec.loss)) + def test_example(self): + with self.test_session() as session: + tower_losses = map(self.create_constant_loss, [2, 4, 6]) + tower_metrics = map(self.create_eval_metrics, [0, 0.2, 0.3]) + tower_specs = [ + self.create_estimator_spec(l, m) + for l, m in zip(tower_losses, tower_metrics) + ] + session.run(variables.local_variables_initializer()) + + estimator_spec = replicate_model_fn._eval_spec( + tower_specs, aggregation_device='/device:GPU:0') + + accuracy, a = estimator_spec.eval_metric_ops['accuracy'] + auc, b = estimator_spec.eval_metric_ops['auc'] + + self.assertEqual('/device:CPU:0', accuracy.device) + self.assertEqual('/device:CPU:0', auc.device) + + session.run([a, b]) + accuracy, auc = session.run([accuracy, auc]) + + self.assertNear((12 - 2) / 12, accuracy, 0.01) + self.assertEqual(0, auc) + self.assertEqual(2 + 4 + 6, session.run(estimator_spec.loss)) def test_handles_single_tower(self): with self.test_session() as session: -- GitLab From 4506cb5ce176cb16bfed9a2b460d6392af43c0d6 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Nov 2017 16:45:14 -0800 Subject: [PATCH 0256/1801] Improved the reporting of dimensions PiperOrigin-RevId: 175232587 --- tensorflow/python/grappler/model_analyzer.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/grappler/model_analyzer.cc b/tensorflow/python/grappler/model_analyzer.cc index 4ec7620bce..7d365c3be9 100644 --- a/tensorflow/python/grappler/model_analyzer.cc +++ b/tensorflow/python/grappler/model_analyzer.cc @@ -59,10 +59,15 @@ void ModelAnalyzer::PrintNodeInfo(const NodeDef* node, if (i > 0) { os << ", "; } - if (prop.shape().dim(i).size() < 0) { + if (prop.shape().dim(i).size() >= 0) { + // Print the actual dimension. + os << prop.shape().dim(i).size(); + } else if (prop.shape().dim(i).size() == -1) { + // We don't know anything about the dimension. os << "?"; } else { - os << prop.shape().dim(i).size(); + // Symbolic dimension. + os << "x" << -prop.shape().dim(i).size(); } } os << "]"; -- GitLab From f9c7db10e294a011baf4d168e80c8209efb57f72 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Thu, 9 Nov 2017 17:10:33 -0800 Subject: [PATCH 0257/1801] [StreamExecutor] LOG(ERROR) the driver version when cudnnCreate fails. Previously we LOG(INFO)'ed the driver version, which meant it wouldn't be printed unless you passed --logtostderr. But this information is pretty important, especially since cudnnCreate failing is likely to be a fatal error. PiperOrigin-RevId: 175235628 --- tensorflow/stream_executor/cuda/cuda_dnn.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index a20334e40a..ad8164c7f9 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -390,8 +390,8 @@ port::Status CudnnSupport::Init() { << DriverVersionStatusToString(result); } else { const auto& version = result.ValueOrDie(); - LOG(INFO) << "possibly insufficient driver version: " - << DriverVersionToString(version); + LOG(ERROR) << "possibly insufficient driver version: " + << DriverVersionToString(version); // OS X kernel driver does not report version accurately #if !defined(__APPLE__) if (std::get<0>(version) < 340) { -- GitLab From 8392a4b8e9d6d7ccbfde15dcdda0477c2791b6dc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Nov 2017 17:55:08 -0800 Subject: [PATCH 0258/1801] Hlo parser: support padding. Also, give PaddingConfig its own ToString format. PiperOrigin-RevId: 175239832 --- .../compiler/xla/service/hlo_instruction.cc | 18 ++++++- .../compiler/xla/service/hlo_instruction.h | 2 + .../compiler/xla/tools/parser/README.md | 6 +-- .../compiler/xla/tools/parser/hlo_lexer.cc | 13 ++--- .../compiler/xla/tools/parser/hlo_lexer.h | 2 +- .../compiler/xla/tools/parser/hlo_parser.cc | 51 +++++++++++++++++- .../xla/tools/parser/hlo_parser_test.cc | 52 ++++++++++++++++++- .../compiler/xla/tools/parser/hlo_token.h | 2 +- 8 files changed, 130 insertions(+), 16 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 1b2161fc2e..674d3e3836 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1889,7 +1889,8 @@ std::vector HloInstruction::ExtraAttributesToString() const { extra.push_back(StrCat("window={", window_util::ToString(*window_), "}")); } if (padding_config_ != nullptr) { - extra.push_back(StrCat("padding=", padding_config_->ShortDebugString())); + extra.push_back( + StrCat("padding=", xla::PaddingConfigToString(*padding_config_))); } if (opcode() == HloOpcode::kSlice) { std::vector bounds; @@ -2894,6 +2895,21 @@ StatusOr StringToFusionKind( return InvalidArgument("Unknown fusion kind: %s", kind_name.c_str()); } +string PaddingConfigToString(const PaddingConfig& padding) { + bool has_interior_padding = + std::any_of(padding.dimensions().begin(), padding.dimensions().end(), + [](const PaddingConfig::PaddingConfigDimension& dim) { + return dim.interior_padding() != 0; + }); + return Join( + padding.dimensions(), "x", + [&](string* out, const PaddingConfig::PaddingConfigDimension& dim) { + StrAppend( + out, dim.edge_padding_low(), "_", dim.edge_padding_high(), + has_interior_padding ? StrCat("_", dim.interior_padding()) : ""); + }); +} + std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind) { return os << ToString(kind); } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 974d43d89e..64a88164a7 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -1234,6 +1234,8 @@ string ToString(HloInstruction::FusionKind kind); StatusOr StringToFusionKind( const string& kind_name); +string PaddingConfigToString(const PaddingConfig& padding); + std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind); // Map classes that guarantee a deterministic iteration order when the key is diff --git a/tensorflow/compiler/xla/tools/parser/README.md b/tensorflow/compiler/xla/tools/parser/README.md index 986041caf6..b768b94e77 100644 --- a/tensorflow/compiler/xla/tools/parser/README.md +++ b/tensorflow/compiler/xla/tools/parser/README.md @@ -54,9 +54,9 @@ attribute attribute_value : kInt | kName - | [0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,} /*dim_labels_pattern*/ - | [0-9]+(x[0-9]+)+ /*dxd_pattern*/ - | [0-9]+_[0-9]+(x[0-9]+_[0-9]+)* /*window_pad_pattern*/ + | [0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,} /*dim_labels_pattern*/ + | [0-9]+(x[0-9]+)+ /*dxd_pattern*/ + | [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)* /*pad_pattern*/ | '{' sub_attributes '}' ; diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc index f70386411c..b5befbf58b 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc @@ -254,13 +254,13 @@ TokKind HloLexer::LexPercent() { } // Lex integer and floating-point values, -inf, and patterns for dim labels, -// dxd (e.g. 1x2x3), and window pad. +// dxd (e.g. 1x2x3), and pad. // // fp with exp ::= [-]?([0-9]+|[0-9]+[.][0-9]*|[0-9]*[.][0-9]+)([eE][+-]?[0-9]+) // fp without exp ::= [-]?([0-9]+[.][0-9]*|[0-9]*[.][0-9]+) // dim_labels_pattern ::= [0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,} // dxd_pattern ::= [0-9]+(x[0-9]+)+ -// window_pad_pattern ::= [0-9]+_[0-9]+(x[0-9]+_[0-9]+)* +// pad_pattern ::= [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)* // int ::= [-]?[0-9]+ // negative inf ::= '-inf' TokKind HloLexer::LexNumberOrPattern() { @@ -277,7 +277,8 @@ TokKind HloLexer::LexNumberOrPattern() { static LazyRE2 dim_labels_pattern = { R"([0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,})"}; static LazyRE2 dxd_pattern = {R"([0-9]+(x[0-9]+)+)"}; - static LazyRE2 pad_pattern = {R"([0-9]+_[0-9]+(x[0-9]+_[0-9]+)*)"}; + static LazyRE2 pad_pattern = { + R"([0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)*)"}; if (RE2::Consume(&consumable, *dim_labels_pattern)) { current_ptr_ = consumable.begin(); @@ -294,7 +295,7 @@ TokKind HloLexer::LexNumberOrPattern() { if (RE2::Consume(&consumable, *pad_pattern)) { current_ptr_ = consumable.begin(); str_val_.assign(token_start_, current_ptr_); - return TokKind::kWindowPad; + return TokKind::kPad; } static LazyRE2 int_pattern = {R"([-]?\d+)"}; @@ -395,8 +396,8 @@ string TokKindToString(TokKind kind) { return "kDimLabels"; case TokKind::kDxD: return "kDxD"; - case TokKind::kWindowPad: - return "kWindowPad"; + case TokKind::kPad: + return "kPad"; case TokKind::kShape: return "kShape"; case TokKind::kOpcode: diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h index 74e6829180..79c4f271a1 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h +++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h @@ -45,7 +45,7 @@ class HloLexer { case TokKind::kAttributeName: case TokKind::kDimLabels: case TokKind::kDxD: - case TokKind::kWindowPad: + case TokKind::kPad: return str_val_; default: LOG(FATAL) << "This token does not have string value"; diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index 710877b4e0..fed0492a54 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -99,6 +99,7 @@ class HloParser { kSharding, kInstructionList, kSliceRanges, + kPaddingConfig, }; struct AttrConfig { @@ -134,6 +135,7 @@ class HloParser { bool ParseInstructionNames(std::vector* instructions); bool ParseWindow(Window* window); bool ParseConvolutionDimensionNumbers(ConvolutionDimensionNumbers* dnums); + bool ParsePaddingConfig(PaddingConfig* padding); bool ParseSharding(OpSharding* sharding); bool ParseSingleSharding(OpSharding* sharding, bool lbrace_pre_lexed); @@ -727,9 +729,19 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, /*grad_output=*/operands[4], *epsilon, *feature_index)); break; } + case HloOpcode::kPad: { + optional padding; + attrs["padding"] = {/*required=*/true, AttrTy::kPaddingConfig, &padding}; + if (!ParseOperands(&operands, /*expected_size=*/2) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreatePad( + shape, operands[0], /*padding_value=*/operands[1], *padding)); + break; + } case HloOpcode::kCustomCall: case HloOpcode::kReducePrecision: - case HloOpcode::kPad: case HloOpcode::kRng: case HloOpcode::kFusion: case HloOpcode::kInfeed: @@ -1374,6 +1386,14 @@ bool HloParser::ParseAttributes( static_cast*>(attr_out_ptr)->emplace(result); return true; } + case AttrTy::kPaddingConfig: { + PaddingConfig result; + if (!ParsePaddingConfig(&result)) { + return false; + } + static_cast*>(attr_out_ptr)->emplace(result); + return true; + } } }(); if (!success) { @@ -1774,7 +1794,7 @@ bool HloParser::ParseWindowPad(std::vector>* pad) { if (!pad->empty()) { return TokenError("sub-attribute 'pad=' already exists"); } - if (lexer_.GetKind() != TokKind::kWindowPad) { + if (lexer_.GetKind() != TokKind::kPad) { return TokenError("expects window pad pattern, e.g., '0_0x3_3'"); } string str = lexer_.GetStrVal(); @@ -1792,6 +1812,33 @@ bool HloParser::ParseWindowPad(std::vector>* pad) { return true; } +// This is the inverse xla::ToString(PaddingConfig). The padding config string +// looks like "0_0_0x3_3_1". The string is first separated by 'x', each +// substring represents one PaddingConfigDimension. The substring is 3 (or 2) +// numbers joined by '_'. +bool HloParser::ParsePaddingConfig(PaddingConfig* padding) { + if (lexer_.GetKind() != TokKind::kPad) { + return TokenError("expects padding config, e.g., '0_0_0x3_3_1'"); + } + string str = lexer_.GetStrVal(); + std::vector padding_str = Split(str, 'x'); + for (const auto& padding_dim_str : padding_str) { + std::vector padding_dim; + if (!SplitAndParseAsInts(padding_dim_str, '_', &padding_dim) || + (padding_dim.size() != 2 && padding_dim.size() != 3)) { + return TokenError( + "expects padding config pattern like 'low_high_interior' or " + "'low_high'"); + } + auto* dim = padding->add_dimensions(); + dim->set_edge_padding_low(padding_dim[0]); + dim->set_edge_padding_high(padding_dim[1]); + dim->set_interior_padding(padding_dim.size() == 3 ? padding_dim[2] : 0); + } + lexer_.Lex(); + return true; +} + bool HloParser::ParseOpcode(HloOpcode* result) { VLOG(1) << "ParseOpcode"; if (lexer_.GetKind() != TokKind::kOpcode) { diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index fbe0409e3d..d19c6e1877 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -36,6 +36,10 @@ string TestDataToString(const ::testing::TestParamInfo& data) { return data.param.test_name; } +// For each string below, we check that: +// - we parse it to an HloModule successfully, and +// - the stringification of the resulting HloModule is equal to our original +// string. std::vector CreateTestCases() { // clang-format off return std::vector({ @@ -509,6 +513,32 @@ ENTRY %BatchNormGrad.v4 (input: f32[2,2,2,2], scale: f32[2], mean: f32[2], varia ROOT %batch-norm-grad = (f32[2,2,2,2]{3,2,1,0}, f32[2]{0}, f32[2]{0}) batch-norm-grad(f32[2,2,2,2]{3,2,1,0} %input, f32[2]{0} %scale, f32[2]{0} %mean, f32[2]{0} %variance, f32[2,2,2,2]{3,2,1,0} %grad_output), epsilon=0.001, feature_index=0 } +)" +}, +// pad +{ +"Pad", +R"(HloModule Pad1DS3Array_module: + +ENTRY %Pad1DS3Array.v3 () -> f32[8] { + %constant = f32[3]{0} constant({1, 2, 3}) + %constant.1 = f32[] constant(0.1) + ROOT %pad = f32[8]{0} pad(f32[3]{0} %constant, f32[] %constant.1), padding=3_1 +} + +)" +}, +// pad has interior +{ +"PadHasInterior", +R"(HloModule PadHasInterior_module: + +ENTRY %PadHasInterior.v3 (input: f32[1,25,7,7]) -> f32[1,25,17,11] { + %input = f32[1,25,7,7]{3,2,1,0} parameter(0) + %constant = f32[] constant(-5.123) + ROOT %pad = f32[1,25,17,11]{3,2,1,0} pad(f32[1,25,7,7]{3,2,1,0} %input, f32[] %constant), padding=0_0_0x0_0_0x2_2_1x2_2_0 +} + )" } }); @@ -523,7 +553,10 @@ class HloParserTest : public ::testing::Test, << "'" << s << "' does not contain '" << expected << "'"; } - void ExpectSuccess() { + // Expects "ToString(Parse(string)) == string", that is, parses the string, + // asserts that it succeeded, stringifies the parsed module, and checks that + // the it equals the original string. + void ExpectEqual() { const string& original = GetParam().module_string; auto result = Parse(original); TF_EXPECT_OK(result.status()); @@ -532,7 +565,7 @@ class HloParserTest : public ::testing::Test, } }; -TEST_P(HloParserTest, Run) { ExpectSuccess(); } +TEST_P(HloParserTest, Run) { ExpectEqual(); } INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, HloParserTest, ::testing::ValuesIn(CreateTestCases()), @@ -793,6 +826,21 @@ ENTRY %slice.v2 (p0: f32[3,3,4,4]) -> f32[3,3,2,4] { TF_EXPECT_OK(Parse(original).status()); } +TEST_F(HloParserTest, PaddingConfigIsNotWindowPad) { + const string original = R"(HloModule window_pad_module: + +ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2,1] { + %input = f32[1,2,1]{2,1,0} parameter(0) + %copy = f32[1,2,1]{2,0,1} copy(f32[1,2,1]{2,1,0} %input) + %filter = f32[1,1,1]{2,1,0} parameter(1) + ROOT %convolution = f32[1,2,1]{2,0,1} convolution(f32[1,2,1]{2,0,1} %copy, f32[1,1,1]{2,1,0} %filter), dim_labels=b0f_0io->b0f, window={pad=1_1_0 size=1} +} + +)"; + ExpectHasSubstr(Parse(original).status().error_message(), + "expects padding_low and padding_high separated by '_'"); +} + } // namespace } // namespace tools } // namespace xla diff --git a/tensorflow/compiler/xla/tools/parser/hlo_token.h b/tensorflow/compiler/xla/tools/parser/hlo_token.h index 15ab8b1ccc..9afd2fac23 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_token.h +++ b/tensorflow/compiler/xla/tools/parser/hlo_token.h @@ -59,7 +59,7 @@ enum class TokKind { kAttributeName, // dimensions= kDimLabels, // [0-9bf]+_[0-9io]+->[0-9bf]+ kDxD, // [0-9]+(x[0-9]+)+ - kWindowPad, // [0-9]+_[0-9]+(x[0-9]+_[0-9]+)* + kPad, // [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)* kShape, // f32[2,3]{1,0} kOpcode, // add kInt, // 42 -- GitLab From 685f604f63a30a8162d8762e9d8d22f171dca85e Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Thu, 9 Nov 2017 19:27:20 -0800 Subject: [PATCH 0259/1801] [XLA] Don't deemphasize nodes inside of subcomputations in dumped XLA graphs. Nodes inside of subcomputations (e.g. fusion computations) are always printed by the HLO graph dumper. Before this change, the dumper was not fully aware of this fact, leading it to mark as "deemphasized" (i.e. draw as gray with a dashed outline) nodes that had no business of being deemphasized. PiperOrigin-RevId: 175247474 --- tensorflow/compiler/xla/service/hlo_graph_dumper.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 67e0238c4a..04b3059fb1 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -1303,7 +1303,9 @@ NodeFilter MakeNodeFilter(const HloInstruction* root, int64 radius) { auto is_displayed = [&](const HloInstruction* instr) { // Constants are displayed inline with their users; they're never omitted. - return nodes.count(instr) > 0 || instr->opcode() == HloOpcode::kConstant; + // Nodes in subcomputations are always shown. + return nodes.count(instr) > 0 || instr->opcode() == HloOpcode::kConstant || + instr->parent() != root->parent(); }; // Make a second pass over 'nodes' to fix up the NodeFilterResults now that we -- GitLab From 64d9aa1ace99c66f20b65532f633acb34ee3c057 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Thu, 9 Nov 2017 20:45:39 -0800 Subject: [PATCH 0260/1801] Add bfloat support to XLA. This is necessary in providing bfloat support in GPU backend. RELNOTES: bfloat support is now added to XLA infra. PiperOrigin-RevId: 175252067 --- tensorflow/compiler/tf2xla/type_util.cc | 3 + tensorflow/compiler/xla/BUILD | 1 + tensorflow/compiler/xla/literal_util.cc | 99 ++++++- tensorflow/compiler/xla/literal_util.h | 23 ++ tensorflow/compiler/xla/literal_util_test.cc | 62 +++++ tensorflow/compiler/xla/primitive_util.cc | 8 +- tensorflow/compiler/xla/primitive_util.h | 7 + tensorflow/compiler/xla/service/backend.cc | 4 +- .../xla/service/cpu/cpu_runtime_test.cc | 4 +- .../compiler/xla/service/hlo_evaluator.cc | 4 + tensorflow/compiler/xla/service/hlo_runner.cc | 3 +- tensorflow/compiler/xla/shape_util.cc | 1 + .../compiler/xla/tests/literal_test_util.cc | 13 +- .../xla/tests/local_client_test_base.cc | 3 +- tensorflow/compiler/xla/types.h | 3 + tensorflow/compiler/xla/xla_data.proto | 13 +- tensorflow/core/framework/bfloat16.cc | 30 +-- tensorflow/core/framework/bfloat16_test.cc | 92 +++++++ tensorflow/core/framework/numeric_types.h | 251 +++++++++++++++++- 19 files changed, 580 insertions(+), 44 deletions(-) diff --git a/tensorflow/compiler/tf2xla/type_util.cc b/tensorflow/compiler/tf2xla/type_util.cc index 1efbe0ffb1..c969212a1b 100644 --- a/tensorflow/compiler/tf2xla/type_util.cc +++ b/tensorflow/compiler/tf2xla/type_util.cc @@ -49,6 +49,9 @@ Status DataTypeToPrimitiveType(DataType data_type, xla::PrimitiveType* type) { case tensorflow::DT_UINT64: *type = xla::U64; return Status::OK(); + case tensorflow::DT_BFLOAT16: + *type = xla::BF16; + return Status::OK(); case tensorflow::DT_HALF: *type = xla::F16; return Status::OK(); diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD index 660f419e46..f6e405744a 100644 --- a/tensorflow/compiler/xla/BUILD +++ b/tensorflow/compiler/xla/BUILD @@ -77,6 +77,7 @@ cc_library( hdrs = ["types.h"], visibility = [":friends"], deps = [ + "//tensorflow/core:framework_lite", "//tensorflow/core:lib", "//third_party/eigen3", ], diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index 0cb2223ae5..93d3cd425f 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -33,6 +33,20 @@ limitations under the License. #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" +namespace { +using tensorflow::int64; + +constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; + +// Converts between little and big endian, assuming elements in the array are 16 +// bits long. +void ConvertEndianShort(char* bytes, int64 size) { + CHECK_EQ(size / 2, 0); + for (int64 i = 0; i < size; i += 2) { + std::swap(bytes[i], bytes[i + 1]); + } +} +} // namespace namespace xla { @@ -169,6 +183,8 @@ Status Literal::Copy(const Literal& src_literal, return CopyRange(src_literal, src_base, dest_base, copy_size); case F16: return CopyRange(src_literal, src_base, dest_base, copy_size); + case BF16: + return CopyRange(src_literal, src_base, dest_base, copy_size); case F32: return CopyRange(src_literal, src_base, dest_base, copy_size); case F64: @@ -200,6 +216,8 @@ Status Literal::Copy(const Literal& src_literal, return *Literal::CreateR0(0); case F16: return *Literal::CreateR0(static_cast(0.0f)); + case BF16: + return *Literal::CreateR0(static_cast(0.0f)); case F32: return *Literal::CreateR0(0); case F64: @@ -285,6 +303,9 @@ Status Literal::Copy(const Literal& src_literal, case F16: return *Literal::CreateR0( static_cast(-std::numeric_limits::infinity())); + case BF16: + return *Literal::CreateR0( + static_cast(-std::numeric_limits::infinity())); case TUPLE: LOG(FATAL) << "tuple element type has no minimum value"; case OPAQUE: @@ -321,6 +342,9 @@ Status Literal::Copy(const Literal& src_literal, case F16: return *Literal::CreateR0( static_cast(std::numeric_limits::infinity())); + case BF16: + return *Literal::CreateR0( + static_cast(std::numeric_limits::infinity())); case TUPLE: LOG(FATAL) << "tuple element type has no maximum value"; case OPAQUE: @@ -428,6 +452,7 @@ std::unique_ptr Literal::Transpose( // The shape with affine layout resulting from that operation will be // F32[8,11]{0,1}, since it leaves the original most minor (the 8 sized), the // most minor. + // // Essentially, given MinMaj(Di) the position of the Di dimension within the // minor to major vector, and given T(Di) the index that the original Di // dimension has within the transposed array, a layout is affine if @@ -536,6 +561,9 @@ string Literal::GetAsString( } case F16: return tensorflow::strings::StrCat(Get(multi_index)); + case BF16: + return tensorflow::strings::StrCat( + static_cast(Get(multi_index))); default: return tensorflow::strings::StrCat( "[", PrimitiveType_Name(shape().element_type()), "]"); @@ -743,6 +771,8 @@ void* Literal::MutableInternalData() { return reinterpret_cast(c64s_.data()); case F16: return reinterpret_cast(f16s_.data()); + case BF16: + return reinterpret_cast(bf16s_.data()); default: LOG(FATAL) << "primitive type not supported in literals: " << PrimitiveType_Name(shape().element_type()); @@ -785,6 +815,9 @@ void Literal::Reserve(int64 num_elements) { case F16: Resize(num_elements, static_cast(0.0f)); break; + case BF16: + Resize(num_elements, static_cast(0.0f)); + break; default: LOG(FATAL) << "primitive type not supported in literals: " << PrimitiveType_Name(shape().element_type()); @@ -824,6 +857,9 @@ tensorflow::Status Literal::ValidateLiteral() const { case F16: actual = f16s().size() / sizeof(half); break; + case BF16: + actual = bf16s().size(); + break; default: return tensorflow::errors::Unimplemented( "unhandled element type for literal validation: " + @@ -920,6 +956,7 @@ StatusOr> ConvertIfDestTypeMatches( CONVERT_IF_TYPES_MATCH(F16) CONVERT_IF_TYPES_MATCH(F32) CONVERT_IF_TYPES_MATCH(F64) + CONVERT_IF_TYPES_MATCH(BF16) #undef CONVERT_IF_TYPES_MATCH case C64: return ConvertToC64(src_literal); @@ -949,8 +986,9 @@ StatusOr> Literal::Convert( CONVERT_IF_DEST_TYPE_MATCHES(F16) CONVERT_IF_DEST_TYPE_MATCHES(F32) CONVERT_IF_DEST_TYPE_MATCHES(F64) + CONVERT_IF_DEST_TYPE_MATCHES(BF16) #undef CONVERT_IF_DEST_TYPE_MATCHES - // Other types are not yet supported. + // Other types are not yet supported. default: return InvalidArgument("Unimplemented: Convert from type %s to type %s", PrimitiveType_Name(shape().element_type()).c_str(), @@ -1019,6 +1057,8 @@ bool Literal::operator==(const Literal& other) const { return EqualElements(*this, other, 0, &multi_index); case F16: return EqualElements(*this, other, 0, &multi_index); + case BF16: + return EqualElements(*this, other, 0, &multi_index); case C64: return EqualElements(*this, other, 0, &multi_index); default: @@ -1128,13 +1168,18 @@ tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice() { template <> tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice() { - // TODO - there is an endianess problem here. fix it, or wait for uint16 - // support in protobuf auto values = mutable_f16s(); return tensorflow::gtl::MutableArraySlice(values->data(), values->size()); } +template <> +tensorflow::gtl::MutableArraySlice +Literal::GetMutableArraySlice() { + auto values = mutable_bf16s(); + return {values->data(), values->size()}; +} + template <> tensorflow::gtl::ArraySlice Literal::GetArraySlice() const { CHECK_EQ(shape().element_type(), PRED); @@ -1205,6 +1250,12 @@ tensorflow::gtl::ArraySlice Literal::GetArraySlice() const { f16s().size() / sizeof(half)); } +template <> +tensorflow::gtl::ArraySlice Literal::GetArraySlice() const { + CHECK_EQ(shape().element_type(), BF16); + return {bf16s().data(), bf16s().size()}; +} + template <> tensorflow::gtl::ArraySlice Literal::GetArraySlice() const { @@ -1253,6 +1304,9 @@ bool Literal::IsAll(int8 value) const { return AllElementsEqualValue(*this, value); case F16: return AllElementsEqualValue(*this, static_cast(value)); + case BF16: + return AllElementsEqualValue(*this, + static_cast(value)); case PRED: if (value == 0) { return AllElementsEqualValue(*this, false); @@ -1274,6 +1328,9 @@ bool Literal::IsAllFloat(float value) const { return AllElementsEqualValue(*this, value); case F16: return AllElementsEqualValue(*this, static_cast(value)); + case BF16: + return AllElementsEqualValue(*this, + static_cast(value)); default: return false; } @@ -1310,6 +1367,8 @@ bool Literal::IsZero(tensorflow::gtl::ArraySlice indices) const { return Get(indices) == complex64(0.0f, 0.0f); case F16: return Get(indices) == static_cast(0.0f); + case BF16: + return Get(indices) == static_cast(0.0f); case PRED: return Get(indices) == false; default: @@ -1377,6 +1436,12 @@ void Literal::Resize(int64 num_elements, half value) { mutable_f16s()->resize(num_elements, value); } +template <> +void Literal::Resize(int64 num_elements, bfloat16 value) { + CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements); + mutable_bf16s()->resize(num_elements, value); +} + template <> void Literal::Resize(int64 num_elements, complex64 value) { CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements); @@ -1425,6 +1490,19 @@ LiteralProto Literal::ToProto() const { *proto.mutable_f16s() = string(reinterpret_cast(f16s_.data()), f16s_.size() * sizeof(half)); + if (!kLittleEndian) { + ConvertEndianShort(const_cast(proto.mutable_f16s()->data()), + proto.f16s().size()); + } + break; + case BF16: + *proto.mutable_bf16s() = + string(reinterpret_cast(bf16s_.data()), + bf16s_.size() * sizeof(bfloat16)); + if (!kLittleEndian) { + ConvertEndianShort(const_cast(proto.mutable_bf16s()->data()), + proto.bf16s().size()); + } break; case F32: CopyToRepeatedField(proto.mutable_f32s(), f32s()); @@ -1493,6 +1571,21 @@ void Literal::CopyFromProto(const LiteralProto& literal_proto) { CHECK_EQ(0, s.size() % sizeof(half)); f16s_ = std::vector(s.size() / sizeof(half)); memcpy(f16s_.data(), s.data(), s.size()); + + if (!kLittleEndian) { + ConvertEndianShort(reinterpret_cast(f16s_.data()), s.size()); + } + break; + } + case BF16: { + const string& s(literal_proto.bf16s()); + CHECK_EQ(0, s.size() % sizeof(bfloat16)); + bf16s_ = std::vector(s.size() / sizeof(bfloat16)); + memcpy(bf16s_.data(), s.data(), s.size()); + + if (!kLittleEndian) { + ConvertEndianShort(reinterpret_cast(bf16s_.data()), s.size()); + } break; } case F32: diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h index 667f926c46..f37e529caf 100644 --- a/tensorflow/compiler/xla/literal_util.h +++ b/tensorflow/compiler/xla/literal_util.h @@ -163,6 +163,11 @@ class Literal { const std::vector& c64s() const { return c64s_; } std::vector* mutable_c64s() { return &c64s_; } + int bf16s_size() const { return bf16s().size(); } + bfloat16 bf16s(int i) const { return bf16s_[i]; } + const std::vector& bf16s() const { return bf16s_; } + std::vector* mutable_bf16s() { return &bf16s_; } + int tuple_literals_size() const { return tuple_literals().size(); } const Literal& tuple_literals(int i) const { return tuple_literals_[i]; } Literal* add_tuple_literals() { @@ -622,6 +627,7 @@ class Literal { std::vector u16s_; std::vector u32s_; std::vector u64s_; + std::vector bf16s_; std::vector f16s_; std::vector f32s_; std::vector f64s_; @@ -674,6 +680,9 @@ tensorflow::gtl::ArraySlice Literal::GetArraySlice() const; template <> tensorflow::gtl::ArraySlice Literal::GetArraySlice() const; +template <> +tensorflow::gtl::ArraySlice Literal::GetArraySlice() const; + template <> tensorflow::gtl::ArraySlice Literal::GetArraySlice() const; @@ -714,6 +723,9 @@ tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice(); template <> tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice(); +template <> +tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice(); + template <> tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice(); @@ -747,6 +759,9 @@ void Literal::Resize(int64 num_elements, double value); template <> void Literal::Resize(int64 num_elements, half value); +template <> +void Literal::Resize(int64 num_elements, bfloat16 value); + template <> void Literal::Resize(int64 num_elements, complex64 value); @@ -990,6 +1005,14 @@ inline half Literal::Get( return GetArraySlice()[linear_index]; } +template <> +inline bfloat16 Literal::Get( + tensorflow::gtl::ArraySlice multi_index) const { + CHECK(shape().element_type() == BF16); + int64 linear_index = LinearIndex(multi_index); + return GetArraySlice()[linear_index]; +} + template void Literal::Set(tensorflow::gtl::ArraySlice multi_index, NativeT value) { diff --git a/tensorflow/compiler/xla/literal_util_test.cc b/tensorflow/compiler/xla/literal_util_test.cc index 6d596da4ad..1e08101759 100644 --- a/tensorflow/compiler/xla/literal_util_test.cc +++ b/tensorflow/compiler/xla/literal_util_test.cc @@ -110,6 +110,18 @@ TEST_F(LiteralUtilTest, LiteralScalarToString) { auto c64_lit = Literal::CreateR0({3.14f, 2.78f}); ASSERT_EQ("(3.14, 2.78)", c64_lit->ToString()); + + auto bf16_lit = Literal::CreateR0(static_cast(0.5f)); + ASSERT_EQ("0.5", bf16_lit->ToString()); + + // 3.14 will be rounded to 3.125 in bfloat16 format (Round to nearest even). + auto bf16_lit_truncated = + Literal::CreateR0(static_cast(3.14f)); + ASSERT_EQ("3.140625", bf16_lit_truncated->ToString()); + + auto bf16_lit_truncated2 = + Literal::CreateR0(static_cast(9.001f)); + ASSERT_EQ("9", bf16_lit_truncated2->ToString()); } TEST_F(LiteralUtilTest, LiteralVectorToString) { @@ -397,6 +409,18 @@ TEST_F(LiteralUtilTest, IsAll) { EXPECT_FALSE(Literal::CreateR2({{h8}, {h9}})->IsAll(8)); EXPECT_FALSE(Literal::CreateR2({{h9}, {h8}})->IsAll(8)); + bfloat16 b8(8.0f); + bfloat16 b9(9.0f); + + EXPECT_TRUE(Literal::CreateR2({{b8}, {b8}})->IsAll(8)); + EXPECT_FALSE(Literal::CreateR2({{b8}, {b9}})->IsAll(8)); + EXPECT_FALSE(Literal::CreateR2({{b9}, {b8}})->IsAll(8)); + + // 9.001 will be truncated to 9.0 + bfloat16 b91(9.001f); + bfloat16 b90(9.00f); + EXPECT_TRUE(Literal::CreateR2({{b91}, {b90}})->IsAll(9.0)); + complex64 c8_9 = {8, 9}; EXPECT_FALSE(Literal::CreateR2({{c8_9}, {c8_9}})->IsAll(8)); @@ -691,6 +715,30 @@ TEST_F(LiteralUtilTest, PopulateR2C64) { EXPECT_EQ(output, *expected); } +TEST_F(LiteralUtilTest, PopulateWithValueR0BF16) { + Literal output; + bfloat16 h(0.25f); + output.PopulateWithValue(h, {}); + auto expected = Literal::CreateR0(h); + EXPECT_EQ(output, *expected); +} + +TEST_F(LiteralUtilTest, PopulateWithValueR1BF16) { + Literal output; + bfloat16 h(0.5f); + output.PopulateWithValue(h, {3}); + auto expected = Literal::CreateR1({h, h, h}); + EXPECT_EQ(output, *expected); +} + +TEST_F(LiteralUtilTest, PopulateWithValueR2BF16) { + Literal output; + bfloat16 h(2.0f); + output.PopulateWithValue(h, {2, 2}); + auto expected = Literal::CreateR2({{h, h}, {h, h}}); + EXPECT_EQ(output, *expected); +} + TEST_F(LiteralUtilTest, PopulateWithValueR0F32) { Literal output; output.PopulateWithValue(2.5f, {}); @@ -975,6 +1023,14 @@ TEST_F(LiteralUtilTest, ConvertIfTypesMatch) { {{half(26.0), half(0.0), half(28.0), half(0.0)}, {half(0.0), half(31.0), half(0.0), half(33.0)}}, }}, layout_r4_dim0major_); + auto bf16 = Literal::CreateR4WithLayout({{ + {{bfloat16(10.0), bfloat16(0.0), bfloat16(12.0), bfloat16(0.0)}, + {bfloat16(0.0), bfloat16(15.0), bfloat16(0.0), bfloat16(17.0)}}, + {{bfloat16(0.0), bfloat16(19.0), bfloat16(0.0), bfloat16(21.0)}, + {bfloat16(22.0), bfloat16(0.0), bfloat16(24.0), bfloat16(0.0)}}, + {{bfloat16(26.0), bfloat16(0.0), bfloat16(28.0), bfloat16(0.0)}, + {bfloat16(0.0), bfloat16(31.0), bfloat16(0.0), bfloat16(33.0)}}, + }}, layout_r4_dim0major_); auto f32 = Literal::CreateR4WithLayout({{ {{10.0f, 0.0f, 12.0f, 0.0f}, {0.0f, 15.0f, 0.0f, 17.0f}}, {{0.0f, 19.0f, 0.0f, 21.0f}, {22.0f, 0.0f, 24.0f, 0.0f}}, @@ -1008,6 +1064,12 @@ TEST_F(LiteralUtilTest, ConvertIfTypesMatch) { conv = s8->Convert(PRED).ConsumeValueOrDie(); EXPECT_EQ(*conv, *pred); + conv = bf16->Convert(S32).ConsumeValueOrDie(); + EXPECT_EQ(*conv, *s32); + + conv = bf16->Convert(F32).ConsumeValueOrDie(); + EXPECT_EQ(*conv, *f32); + conv = pred->Convert(S32).ConsumeValueOrDie(); EXPECT_EQ(*conv, *int32_pred); diff --git a/tensorflow/compiler/xla/primitive_util.cc b/tensorflow/compiler/xla/primitive_util.cc index 2113b5e06f..2bce56b7bd 100644 --- a/tensorflow/compiler/xla/primitive_util.cc +++ b/tensorflow/compiler/xla/primitive_util.cc @@ -78,6 +78,11 @@ PrimitiveType NativeToPrimitiveType() { return F64; } +template <> +PrimitiveType NativeToPrimitiveType() { + return BF16; +} + template <> PrimitiveType NativeToPrimitiveType() { return F16; @@ -89,7 +94,7 @@ PrimitiveType NativeToPrimitiveType() { } bool IsFloatingPointType(PrimitiveType type) { - return type == F16 || type == F32 || type == F64; + return type == F16 || type == F32 || type == F64 || type == BF16; } bool IsComplexType(PrimitiveType type) { return type == C64; } @@ -118,6 +123,7 @@ int BitWidth(PrimitiveType type) { case S16: case U16: case F16: + case BF16: return 16; case U32: diff --git a/tensorflow/compiler/xla/primitive_util.h b/tensorflow/compiler/xla/primitive_util.h index a49c8b86fc..19c6a13888 100644 --- a/tensorflow/compiler/xla/primitive_util.h +++ b/tensorflow/compiler/xla/primitive_util.h @@ -77,6 +77,8 @@ template <> PrimitiveType NativeToPrimitiveType(); template <> PrimitiveType NativeToPrimitiveType(); +template <> +PrimitiveType NativeToPrimitiveType(); // Complex template <> @@ -167,6 +169,11 @@ struct PrimitiveTypeToNative { using type = half; }; +template <> +struct PrimitiveTypeToNative { + using type = bfloat16; +}; + // Complex template <> struct PrimitiveTypeToNative { diff --git a/tensorflow/compiler/xla/service/backend.cc b/tensorflow/compiler/xla/service/backend.cc index 9abe30e3f3..05f2d06278 100644 --- a/tensorflow/compiler/xla/service/backend.cc +++ b/tensorflow/compiler/xla/service/backend.cc @@ -13,14 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#define EIGEN_USE_THREADS + #include "tensorflow/compiler/xla/service/backend.h" #include #include #include -#define EIGEN_USE_THREADS - #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/service/compiler.h" #include "tensorflow/compiler/xla/service/platform_util.h" diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc index f8e260dd90..f385829cdf 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc @@ -12,15 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ - +#define EIGEN_USE_THREADS #include "tensorflow/compiler/xla/service/cpu/cpu_runtime.h" #include #include #include -#define EIGEN_USE_THREADS - #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/array2d.h" #include "tensorflow/compiler/xla/client/local_client.h" diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 88b77ccdd0..a722d1b3d9 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -1450,6 +1450,10 @@ HloEvaluator::HloEvaluator() { typed_visitors_[F32] = MakeUnique>(this); typed_visitors_[F64] = MakeUnique>(this); typed_visitors_[C64] = MakeUnique>(this); + + typed_visitors_[BF16] = MakeUnique([](HloInstruction*) { + return Unimplemented("HloEvaluator: unhandled primitive type: BF16."); + }); typed_visitors_[TUPLE] = MakeUnique([](HloInstruction*) { return Unimplemented("HloEvaluator: unhandled primitive type: TUPLE."); }); diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index f463e57d99..158fb9a546 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#define EIGEN_USE_THREADS #include "tensorflow/compiler/xla/service/hlo_runner.h" @@ -19,8 +20,6 @@ limitations under the License. #include #include -#define EIGEN_USE_THREADS - #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/ptr_util.h" diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index b5eb81dfc6..4d0bafa908 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -263,6 +263,7 @@ StatusOr MakeShapeWithLayoutInternal( case S32: case S64: case F16: + case BF16: case F32: case F64: return true; diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc index 95a52ecd2f..75c9a0d3fb 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.cc +++ b/tensorflow/compiler/xla/tests/literal_test_util.cc @@ -116,16 +116,18 @@ template ::testing::AssertionResult CompareFloatsBitwiseEqual(FloatT lhs, FloatT rhs) { auto ulhs = tensorflow::bit_cast(lhs); auto urhs = tensorflow::bit_cast(rhs); + auto lhs_double = static_cast(lhs); + auto rhs_double = static_cast(rhs); if (ulhs != urhs) { return ::testing::AssertionFailure() << tensorflow::strings::Printf( "floating values are not bitwise-equal; and equality testing " "was requested: %s=%g=%a vs %s=%g=%a", tensorflow::strings::StrCat(tensorflow::strings::Hex(ulhs)) .c_str(), - lhs, lhs, + lhs_double, lhs_double, tensorflow::strings::StrCat(tensorflow::strings::Hex(urhs)) .c_str(), - rhs, rhs); + rhs_double, rhs_double); } return ::testing::AssertionSuccess(); } @@ -149,6 +151,10 @@ template // Specializations for floating types that do bitwise comparisons when equality // comparison is requested. template <> +::testing::AssertionResult CompareEqual(bfloat16 lhs, bfloat16 rhs) { + return CompareFloatsBitwiseEqual(lhs, rhs); +} +template <> ::testing::AssertionResult CompareEqual(float lhs, float rhs) { return CompareFloatsBitwiseEqual(lhs, rhs); } @@ -238,6 +244,9 @@ bool ExpectLiteralsEqual(const Literal& expected, const Literal& actual, case U64: match = ExpectLiteralsEqual(expected, actual, &multi_index, 0); break; + case BF16: + match = ExpectLiteralsEqual(expected, actual, &multi_index, 0); + break; case F32: match = ExpectLiteralsEqual(expected, actual, &multi_index, 0); break; diff --git a/tensorflow/compiler/xla/tests/local_client_test_base.cc b/tensorflow/compiler/xla/tests/local_client_test_base.cc index c11e1df0a7..d98875dbc2 100644 --- a/tensorflow/compiler/xla/tests/local_client_test_base.cc +++ b/tensorflow/compiler/xla/tests/local_client_test_base.cc @@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#define EIGEN_USE_THREADS #include "tensorflow/compiler/xla/tests/local_client_test_base.h" #include -#define EIGEN_USE_THREADS - #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/map_util.h" diff --git a/tensorflow/compiler/xla/types.h b/tensorflow/compiler/xla/types.h index 3b19ca321c..9fa4297523 100644 --- a/tensorflow/compiler/xla/types.h +++ b/tensorflow/compiler/xla/types.h @@ -19,6 +19,7 @@ limitations under the License. #include #include "third_party/eigen3/Eigen/Core" +#include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/platform/types.h" #include @@ -32,6 +33,8 @@ using ::tensorflow::int16; using ::tensorflow::int32; using ::tensorflow::int64; +using ::tensorflow::bfloat16; + using ::tensorflow::uint8; using ::tensorflow::uint16; using ::tensorflow::uint32; diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index 7146604708..eac8f2ff07 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -46,6 +46,12 @@ enum PrimitiveType { // converted to f16 from f32 at arbirary points in the computation. F16 = 10; F32 = 11; + + // Truncated 16 bit floating-point format. This is similar to IEEE's 16 bit + // floating-point format, but uses 1 bit for the sign, 8 bits for the exponent + // and 7 bits for the mantissa. + BF16 = 16; + F64 = 12; // Complex values of fixed width. @@ -63,6 +69,8 @@ enum PrimitiveType { // An opaque type used for passing context specific data to a custom // operation. OPAQUE = 14; + + // Next = 17 } // Describes the value held inside padding elements. @@ -310,7 +318,10 @@ message LiteralProto { repeated double f64s = 9; repeated float c64s = 12; // Stored as interleaved real, imag floats. repeated LiteralProto tuple_literals = 10; - bytes f16s = 11; // Note: the F16s are encoded in little endian byte order + // The F16s and BF16s are encoded in little endian byte order + bytes f16s = 11; + bytes bf16s = 13; + // Next = 14 } message WindowDimension { diff --git a/tensorflow/core/framework/bfloat16.cc b/tensorflow/core/framework/bfloat16.cc index a5ac0e1a8d..1a6f355c77 100644 --- a/tensorflow/core/framework/bfloat16.cc +++ b/tensorflow/core/framework/bfloat16.cc @@ -18,32 +18,24 @@ limitations under the License. namespace tensorflow { void FloatToBFloat16(const float* src, bfloat16* dst, int64 size) { - const uint16_t* p = reinterpret_cast(src); - uint16_t* q = reinterpret_cast(dst); -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - for (; size != 0; p += 2, q++, size--) { - *q = p[0]; - } -#else - for (; size != 0; p += 2, q++, size--) { - *q = p[1]; - } -#endif + for (int64 i = 0; i < size; ++i) { + dst[i] = bfloat16(src[i]); + } } void BFloat16ToFloat(const bfloat16* src, float* dst, int64 size) { const uint16_t* p = reinterpret_cast(src); uint16_t* q = reinterpret_cast(dst); #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - for (; size != 0; p++, q += 2, size--) { - q[0] = *p; - q[1] = 0; + for (; size != 0; p++, q += 2, size--) { + q[0] = *p; + q[1] = 0; } -#else - for (; size != 0; p++, q += 2, size--) { - q[0] = 0; - q[1] = *p; - } +#else + for (; size != 0; p++, q += 2, size--) { + q[0] = 0; + q[1] = *p; + } #endif } diff --git a/tensorflow/core/framework/bfloat16_test.cc b/tensorflow/core/framework/bfloat16_test.cc index af4e6a4411..a25b764ea2 100644 --- a/tensorflow/core/framework/bfloat16_test.cc +++ b/tensorflow/core/framework/bfloat16_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/framework/bfloat16.h" +#include "tensorflow/core/lib/core/casts.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" @@ -27,6 +28,97 @@ TEST(Bfloat16Test, Simple) { EXPECT_EQ(0x4140, a.value); } +float BinaryToFloat(uint32_t sign, uint32_t exponent, uint32_t high_mantissa, + uint32_t low_mantissa) { + return bit_cast((sign << 31) + (exponent << 23) + + (high_mantissa << 16) + low_mantissa); +} + +struct Bfloat16TestParam { + float input; + float expected; +}; + +class Bfloat16Test : public ::testing::Test, + public ::testing::WithParamInterface {}; + +TEST_P(Bfloat16Test, RoundOrTruncate) { + bfloat16 a(GetParam().input); + if (std::isnan(GetParam().input)) { + EXPECT_TRUE(std::isnan(float(a))); + return; + } + EXPECT_EQ(GetParam().expected, float(a)); +} + +INSTANTIATE_TEST_CASE_P( + Bfloat16Test_Instantiation, Bfloat16Test, + ::testing::Values( + // More than half. + Bfloat16TestParam{ + BinaryToFloat(0, 0b10000000, 0b1001000, 0b1111010111000011), + BinaryToFloat(0, 0b10000000, 0b1001001, 0b0000000000000000)}, + + Bfloat16TestParam{ + BinaryToFloat(1, 0b10000000, 0b1001000, 0b1111010111000011), + BinaryToFloat(1, 0b10000000, 0b1001001, 0b0000000000000000)}, + + // Exact half. + Bfloat16TestParam{ + BinaryToFloat(0, 0b10000000, 0b1001000, 0b1000000000000000), + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, + + // NaN stays at NaN. + Bfloat16TestParam{ + BinaryToFloat(0, 0b11111111, 0b0000000, 0b0000000000000001), + BinaryToFloat(0, 0b11111111, 0b1000000, 0b0000000000000000)}, + + // NaN stays at NaN -- no exponents overflow. + Bfloat16TestParam{ + BinaryToFloat(0, 0b11111111, 0b1111111, 0b1111111111111111), + BinaryToFloat(0, 0b11111111, 0b1000000, 0b0000000000000000)}, + + // More than half, round to an odd number. + Bfloat16TestParam{ + BinaryToFloat(1, 0b10000000, 0b1001000, 0b1100000000000000), + BinaryToFloat(1, 0b10000000, 0b1001001, 0b0000000000000000)}, + + // Less than half, truncate. + Bfloat16TestParam{ + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000), + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, + + // Less than half, truncate. + Bfloat16TestParam{ + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0100000000000000), + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, + + // Exact at half, but result is already even. + Bfloat16TestParam{ + BinaryToFloat(0, 0b10000000, 0b1001000, 0b1000000000000000), + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, + + // Denormal values. + Bfloat16TestParam{ + BinaryToFloat(0, 0b00000000, 0b1001000, 0b1000000000000000), + BinaryToFloat(0, 0b00000000, 0b1001000, 0b0000000000000000)}, + Bfloat16TestParam{ + BinaryToFloat(0, 0b00000000, 0b1111111, 0b1100000000000000), + BinaryToFloat(0, 0b00000001, 0b0000000, 0b0000000000000000)})); +TEST(Bfloat16Test, RoundWithFractionOverflow) { + // Still works with fraction overflow -- round to 4./ + // + // Input 3.9960938: + // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) + // 0 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1100000000000000 + // + // Should round to 4.0: + // Sign | Exp (8 bit) | Frac (first 7 bit) + // 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 + bfloat16 a(3.9960938f); + EXPECT_EQ(4.0, float(a)); +} + TEST(Bfloat16Test, Conversion) { float a[100]; for (int i = 0; i < 100; ++i) { diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h index a630bee38d..d005de2af1 100644 --- a/tensorflow/core/framework/numeric_types.h +++ b/tensorflow/core/framework/numeric_types.h @@ -44,29 +44,262 @@ typedef Eigen::QUInt16 quint16; // see framework/bfloat16.h for description. struct bfloat16 { EIGEN_DEVICE_FUNC bfloat16() {} - EIGEN_DEVICE_FUNC explicit bfloat16(const float v) { - const uint16_t* p = reinterpret_cast(&v); + + explicit EIGEN_DEVICE_FUNC bfloat16(float v) { + uint32_t input; + memcpy(&input, &v, sizeof(uint32_t)); + + if ((~input & 0x7f800000) == 0 && (input & 0x007fffff) != 0) { + // If the value is a NaN, squash it to a qNaN with msb of fraction set, + // this makes sure after truncation we don't end up with an inf. + // + // qNaN magic: All exponent bits set + most significant bit of fraction + // set. + value = 0x7fc0; + } else { + // Fast rounding algorithm that rounds a half value to nearest even. This + // reduces expected error when we convert a large number of floats. Here + // is how it works: + // + // Definitions: + // To convert a float 32 to bfloat16, a float 32 can be viewed as 32 bits + // with the following tags: + // + // Sign | Exp (8 bits) | Frac (23 bits) + // S EEEEEEEE FFFFFFLRTTTTTTTTTTTTTTT + // + // S: Sign bit. + // E: Exponent bits. + // F: First 6 bits of fraction. + // L: Least significant bit of resulting bfloat16 if we truncate away the + // rest of the float32. This is also the 7th bit of fraction + // R: Rounding bit, 8th bit of fraction. + // T: Sticky bits, rest of fraction, 15 bits. + // + // To round half to nearest even, there are 3 cases where we want to round + // down (simply truncate the result of the bits away, which consists of + // rounding bit and sticky bits) and two cases where we want to round up + // (truncate then add one to the result). + // + // The fast converting algorithm simply adds lsb (L) to 0x7fff (15 bits of + // 1s) as the rounding bias, adds the rounding bias to the input, then + // truncates the last 16 bits away. + // + // To understand how it works, we can analyze this algorithm case by case: + // + // 1. L = 0, R = 0: + // Expect: round down, this is less than half value. + // + // Algorithm: + // - Rounding bias: 0x7fff + 0 = 0x7fff + // - Adding rounding bias to input may create any carry, depending on + // whether there is any value set to 1 in T bits. + // - R may be set to 1 if there is a carry. + // - L remains 0. + // - Note that this case also handles Inf and -Inf, where all fraction + // bits, including L, R and Ts are all 0. The output remains Inf after + // this algorithm. + // + // 2. L = 1, R = 0: + // Expect: round down, this is less than half value. + // + // Algorithm: + // - Rounding bias: 0x7fff + 1 = 0x8000 + // - Adding rounding bias to input doesn't change sticky bits but + // adds 1 to rounding bit. + // - L remains 1. + // + // 3. L = 0, R = 1, all of T are 0: + // Expect: round down, this is exactly at half, the result is already + // even (L=0). + // + // Algorithm: + // - Rounding bias: 0x7fff + 0 = 0x7fff + // - Adding rounding bias to input sets all sticky bits to 1, but + // doesn't create a carry. + // - R remains 1. + // - L remains 0. + // + // 4. L = 1, R = 1: + // Expect: round up, this is exactly at half, the result needs to be + // round to the next even number. + // + // Algorithm: + // - Rounding bias: 0x7fff + 1 = 0x8000 + // - Adding rounding bias to input doesn't change sticky bits, but + // creates a carry from rounding bit. + // - The carry sets L to 0, creates another carry bit and propagate + // forward to F bits. + // - If all the F bits are 1, a carry then propagates to the exponent + // bits, which then creates the minimum value with the next exponent + // value. Note that we won't have the case where exponents are all 1, + // since that's either a NaN (handled in the other if condition) or inf + // (handled in case 1). + // + // 5. L = 0, R = 1, any of T is 1: + // Expect: round up, this is greater than half. + // + // Algorithm: + // - Rounding bias: 0x7fff + 0 = 0x7fff + // - Adding rounding bias to input creates a carry from sticky bits, + // sets rounding bit to 0, then create another carry. + // - The second carry sets L to 1. + // + // Examples: + // + // Exact half value that is already even: + // Input: + // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) + // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1000000000000000 + // + // This falls into case 3. We truncate the rest of 16 bits and no + // carry is created into F and L: + // + // Output: + // Sign | Exp (8 bit) | Frac (first 7 bit) + // S E E E E E E E E F F F F F F L + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 + // + // Exact half value, round to next even number: + // Input: + // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) + // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1000000000000000 + // + // This falls into case 4. We create a carry from R and T, + // which then propagates into L and F: + // + // Output: + // Sign | Exp (8 bit) | Frac (first 7 bit) + // S E E E E E E E E F F F F F F L + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 + // + // + // Max denormal value round to min normal value: + // Input: + // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) + // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT + // 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1111111111111111 + // + // This falls into case 4. We create a carry from R and T, + // propagate into L and F, which then propagates into exponent + // bits: + // + // Output: + // Sign | Exp (8 bit) | Frac (first 7 bit) + // S E E E E E E E E F F F F F F L + // 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 + // + // Max normal value round to Inf: + // Input: + // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) + // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT + // 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1111111111111111 + // + // This falls into case 4. We create a carry from R and T, + // propagate into L and F, which then propagates into exponent + // bits: + // + // Sign | Exp (8 bit) | Frac (first 7 bit) + // S E E E E E E E E F F F F F F L + // 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 + // + // + // Least significant bit of resulting bfloat. + uint32_t lsb = (input >> 16) & 1; + uint32_t rounding_bias = 0x7fff + lsb; + input += rounding_bias; + value = static_cast(input >> 16); + } + } + + template + explicit EIGEN_DEVICE_FUNC bfloat16(const T& val) + : bfloat16(static_cast(val)) {} + + EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const { + float result; + + uint16_t* q = reinterpret_cast(&result); + #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - value = p[0]; + q[0] = value; + q[1] = 0; #else - value = p[1]; + q[0] = 0; + q[1] = value; #endif + return result; + } + + EIGEN_DEVICE_FUNC explicit operator bool() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator Eigen::half() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator short() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator int() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator char() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator signed char() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator unsigned char() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator unsigned int() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator unsigned long() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator unsigned long long() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator long long() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator double() const { + return static_cast(float(*this)); } uint16_t value; }; +inline bool operator==(const bfloat16 a, const bfloat16 b) { + return a.value == b.value; +} + +inline bool operator!=(const bfloat16 a, const bfloat16 b) { + return a.value != b.value; +} + } // end namespace tensorflow namespace Eigen { template <> struct NumTraits : GenericNumTraits {}; -EIGEN_STRONG_INLINE bool operator==(const tensorflow::bfloat16 a, - const tensorflow::bfloat16 b) { - return a.value == b.value; -} - +using ::tensorflow::operator==; +using ::tensorflow::operator!=; } // namespace Eigen #ifdef COMPILER_MSVC -- GitLab From e32165b849df8c31d77e09011c648b23f24def99 Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Thu, 9 Nov 2017 21:01:00 -0800 Subject: [PATCH 0261/1801] Get rid of IteratorBase::is_exhausted flag since it is not possible to rely on it unless we lock each call to GetNext which is not preferable. Each iterator now handles saving/restoring exhausted state. As a guideline, we always reset the input_impl(s) when they get exhausted. This can be used as an indicator of exhausted-ness for non-terminal iterators. Also reduces memory overhead. Each iterator should also handle calls to GetNextInternal when it is exhausted. Fixed this for some datasets. Also fix a bug in dataset_serialization_test_base. We were not saving a checkpoint after exhausting the iterator so verify_exhausted_iterator was not really testing restoring an exhausted iterator. PiperOrigin-RevId: 175253023 --- .../dataset_serialization_test_base.py | 4 +-- tensorflow/core/kernels/batch_dataset_op.cc | 21 +++++++++++-- .../core/kernels/concatenate_dataset_op.cc | 8 ++++- tensorflow/core/kernels/dataset.cc | 1 - tensorflow/core/kernels/dataset.h | 23 ++------------ tensorflow/core/kernels/range_dataset_op.cc | 1 - tensorflow/core/kernels/reader_dataset_ops.cc | 1 - tensorflow/core/kernels/repeat_dataset_op.cc | 18 +++++++++-- tensorflow/core/kernels/shuffle_dataset_op.cc | 31 ++++++++++--------- tensorflow/core/kernels/skip_dataset_op.cc | 21 +++++++++++-- tensorflow/core/kernels/take_dataset_op.cc | 17 ++++++++-- tensorflow/core/kernels/zip_dataset_op.cc | 31 +++++++++++++++---- 12 files changed, 120 insertions(+), 57 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py index 369b789a52..07fecf04fa 100644 --- a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py +++ b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py @@ -337,11 +337,11 @@ class DatasetSerializationTestBase(test.TestCase): num_iters = end - start for _ in range(num_iters): outputs.append(sess.run(get_next_op)) - self._save(sess, saver) - ckpt_saved = True if i == len(break_points) and verify_exhausted: with self.assertRaises(errors.OutOfRangeError): sess.run(get_next_op) + self._save(sess, saver) + ckpt_saved = True return outputs diff --git a/tensorflow/core/kernels/batch_dataset_op.cc b/tensorflow/core/kernels/batch_dataset_op.cc index 2e52ad39f8..6a5fd17a9e 100644 --- a/tensorflow/core/kernels/batch_dataset_op.cc +++ b/tensorflow/core/kernels/batch_dataset_op.cc @@ -143,9 +143,13 @@ class BatchDatasetOp : public UnaryDatasetOpKernel { // Each row of `batch_elements` is a tuple of tensors from the // input iterator. std::vector> batch_elements; - batch_elements.reserve(dataset()->batch_size_); { mutex_lock l(mu_); + if (!input_impl_) { + *end_of_sequence = true; + return Status::OK(); + } + batch_elements.reserve(dataset()->batch_size_); *end_of_sequence = false; for (int i = 0; i < dataset()->batch_size_ && !*end_of_sequence; ++i) { @@ -154,6 +158,8 @@ class BatchDatasetOp : public UnaryDatasetOpKernel { end_of_sequence)); if (!*end_of_sequence) { batch_elements.emplace_back(std::move(batch_element_tuple)); + } else { + input_impl_.reset(); } } } @@ -194,14 +200,23 @@ class BatchDatasetOp : public UnaryDatasetOpKernel { protected: Status SaveInternal(IteratorStateWriter* writer) override { mutex_lock l(mu_); - TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + if (!input_impl_) { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("input_impl_empty"), "")); + } else { + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + } return Status::OK(); } Status RestoreInternal(OpKernelContext* ctx, IteratorStateReader* reader) override { mutex_lock l(mu_); - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + if (!reader->Contains(full_name("input_impl_empty"))) { + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + } else { + input_impl_.reset(); + } return Status::OK(); } diff --git a/tensorflow/core/kernels/concatenate_dataset_op.cc b/tensorflow/core/kernels/concatenate_dataset_op.cc index 711c234129..c3bd89c479 100644 --- a/tensorflow/core/kernels/concatenate_dataset_op.cc +++ b/tensorflow/core/kernels/concatenate_dataset_op.cc @@ -104,6 +104,10 @@ class ConcatenateDatasetOp : public BinaryDatasetOpKernel { std::vector* out_tensors, bool* end_of_sequence) override { mutex_lock l(mu_); + if (!input_impl_) { + *end_of_sequence = true; + return Status::OK(); + } while (i_ < 2) { TF_RETURN_IF_ERROR( input_impl_->GetNext(ctx, out_tensors, end_of_sequence)); @@ -140,7 +144,9 @@ class ConcatenateDatasetOp : public BinaryDatasetOpKernel { } else if (i_ == 2) { input_impl_.reset(); } - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + if (input_impl_) { + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + } return Status::OK(); } diff --git a/tensorflow/core/kernels/dataset.cc b/tensorflow/core/kernels/dataset.cc index 0414875a5d..fcfa2956f7 100644 --- a/tensorflow/core/kernels/dataset.cc +++ b/tensorflow/core/kernels/dataset.cc @@ -126,7 +126,6 @@ void BinaryDatasetOpKernel::MakeDataset(OpKernelContext* ctx, MakeDataset(ctx, input, another_input, output); } -const char IteratorBase::kIteratorExhausted[] = "ITERATOR_EXHAUSTED"; const char GraphDatasetBase::kDatasetGraphKey[] = "_DATASET_GRAPH"; const char GraphDatasetBase::kDatasetGraphOutputNodeKey[] = "_DATASET_GRAPH_OUTPUT_NODE"; diff --git a/tensorflow/core/kernels/dataset.h b/tensorflow/core/kernels/dataset.h index 4a42ac80c3..aa4f436b39 100644 --- a/tensorflow/core/kernels/dataset.h +++ b/tensorflow/core/kernels/dataset.h @@ -306,27 +306,14 @@ class IteratorBase { // Saves the state of this iterator. virtual Status Save(IteratorStateWriter* writer) { - if (is_exhausted_) { - LOG(INFO) << "Iterator exhausted."; - return writer->WriteScalar(kIteratorExhausted, kIteratorExhausted); - } else { - return SaveInternal(writer); - } + return SaveInternal(writer); } // Restores the state of this iterator. virtual Status Restore(OpKernelContext* ctx, IteratorStateReader* reader) { - if (reader->Contains(kIteratorExhausted)) { - LOG(INFO) << "Iterator exhausted. Nothing to restore."; - is_exhausted_ = true; - return Status::OK(); - } else { - return RestoreInternal(ctx, reader); - } + return RestoreInternal(ctx, reader); } - static const char kIteratorExhausted[]; - protected: // This is needed so that sub-classes of IteratorBase can call // `SaveInternal` on their parent iterators, e.g., in @@ -354,8 +341,6 @@ class IteratorBase { IteratorStateReader* reader) { return errors::Unimplemented("RestoreInternal"); } - - bool is_exhausted_ = false; // Whether the iterator has been exhausted. }; // Represents a (potentially infinite) range of outputs, where each @@ -491,10 +476,6 @@ class DatasetIterator : public IteratorBase { Status GetNext(IteratorContext* ctx, std::vector* out_tensors, bool* end_of_sequence) final { port::Tracing::TraceMe activity(params_.prefix); - if (is_exhausted_) { - *end_of_sequence = true; - return Status::OK(); - } return GetNextInternal(ctx, out_tensors, end_of_sequence); } diff --git a/tensorflow/core/kernels/range_dataset_op.cc b/tensorflow/core/kernels/range_dataset_op.cc index 7adfcc4f8d..e7ae840fc7 100644 --- a/tensorflow/core/kernels/range_dataset_op.cc +++ b/tensorflow/core/kernels/range_dataset_op.cc @@ -99,7 +99,6 @@ class RangeDatasetOp : public DatasetOpKernel { if ((dataset()->step_ > 0 && next_ >= dataset()->stop_) || (dataset()->step_ < 0 && next_ <= dataset()->stop_)) { *end_of_sequence = true; - is_exhausted_ = true; return Status::OK(); } Tensor value_tensor(cpu_allocator(), DT_INT64, {}); diff --git a/tensorflow/core/kernels/reader_dataset_ops.cc b/tensorflow/core/kernels/reader_dataset_ops.cc index 39ef92a5de..c08e42be1d 100644 --- a/tensorflow/core/kernels/reader_dataset_ops.cc +++ b/tensorflow/core/kernels/reader_dataset_ops.cc @@ -402,7 +402,6 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel { // Iteration ends when there are no more files to process. if (current_file_index_ == dataset()->filenames_.size()) { *end_of_sequence = true; - is_exhausted_ = true; return Status::OK(); } diff --git a/tensorflow/core/kernels/repeat_dataset_op.cc b/tensorflow/core/kernels/repeat_dataset_op.cc index 6c0f4118e6..0167b9ea64 100644 --- a/tensorflow/core/kernels/repeat_dataset_op.cc +++ b/tensorflow/core/kernels/repeat_dataset_op.cc @@ -117,6 +117,10 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { std::vector* out_tensors, bool* end_of_sequence) override { mutex_lock l(mu_); // TODO(mrry): Make locking less conservative. + if (!input_impl_) { + *end_of_sequence = true; + return Status::OK(); + } while (i_ < dataset()->count_) { TF_RETURN_IF_ERROR( input_impl_->GetNext(ctx, out_tensors, end_of_sequence)); @@ -127,7 +131,6 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { input_impl_ = dataset()->input_->MakeIterator(prefix()); } *end_of_sequence = true; - is_exhausted_ = true; input_impl_.reset(); return Status::OK(); } @@ -136,7 +139,12 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { Status SaveInternal(IteratorStateWriter* writer) override { mutex_lock l(mu_); TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("i"), i_)); - TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + if (!input_impl_) { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("input_impl_empty"), "")); + } else { + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + } return Status::OK(); } @@ -144,7 +152,11 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { IteratorStateReader* reader) override { mutex_lock l(mu_); TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("i"), &i_)); - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + if (!reader->Contains(full_name("input_impl_empty"))) { + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + } else { + input_impl_.reset(); + } return Status::OK(); } diff --git a/tensorflow/core/kernels/shuffle_dataset_op.cc b/tensorflow/core/kernels/shuffle_dataset_op.cc index 2146ba2aa1..dd0ab57e9d 100644 --- a/tensorflow/core/kernels/shuffle_dataset_op.cc +++ b/tensorflow/core/kernels/shuffle_dataset_op.cc @@ -105,8 +105,7 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { mutex_lock l(mu_); int64 start_micros = ctx->env()->NowMicros(); int64 num_log_entries = 0; - while (!end_of_input_sequence_ && - buffer_.size() < dataset()->buffer_size_) { + while (input_impl_ && buffer_.size() < dataset()->buffer_size_) { if (ctx->env()->NowMicros() > ((num_log_entries + 1) * kLogIntervalMicros) + start_micros) { num_log_entries++; @@ -114,9 +113,10 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { << buffer_.size() << " of " << dataset()->buffer_size_; } std::vector input_element; + bool end_of_input_sequence; TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &input_element, - &end_of_input_sequence_)); - if (!end_of_input_sequence_) { + &end_of_input_sequence)); + if (!end_of_input_sequence) { buffer_.emplace_back(std::move(input_element)); } else { input_impl_.reset(); @@ -135,7 +135,7 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { std::swap(buffer_[index], buffer_.back()); buffer_.pop_back(); } else { - DCHECK(end_of_input_sequence_); + DCHECK(input_impl_ == nullptr); *end_of_sequence = true; } return Status::OK(); @@ -148,11 +148,11 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { // Save the tensors in the buffer. TF_RETURN_IF_ERROR( writer->WriteScalar(full_name("buffer_size"), buffer_.size())); - for (int i = 0; i < buffer_.size(); i++) { + for (size_t i = 0; i < buffer_.size(); i++) { TF_RETURN_IF_ERROR(writer->WriteScalar( full_name(strings::StrCat("buffer_", i, "_size")), buffer_[i].size())); - for (int j = 0; j < buffer_[i].size(); j++) { + for (size_t j = 0; j < buffer_[i].size(); j++) { TF_RETURN_IF_ERROR(writer->WriteTensor( full_name(strings::StrCat("buffer_", i, "_", j)), buffer_[i][j])); @@ -165,7 +165,7 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { // Save input iterator if it hasn't been exhausted else write // "end_of_input_sequence". - if (end_of_input_sequence_) { + if (!input_impl_) { TF_RETURN_IF_ERROR( writer->WriteScalar(full_name("end_of_input_sequence"), "")); } else { @@ -180,10 +180,15 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { buffer_.clear(); // Restore the buffer. - int64 buffer_size; - TF_RETURN_IF_ERROR( - reader->ReadScalar(full_name("buffer_size"), &buffer_size)); - for (int i = 0; i < buffer_size; i++) { + size_t buffer_size; + { + int64 temp; + TF_RETURN_IF_ERROR( + reader->ReadScalar(full_name("buffer_size"), &temp)); + buffer_size = static_cast(temp); + } + buffer_.reserve(buffer_size); + for (size_t i = 0; i < buffer_size; i++) { int64 list_size; TF_RETURN_IF_ERROR(reader->ReadScalar( full_name(strings::StrCat("buffer_", i, "_size")), &list_size)); @@ -205,7 +210,6 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { input_impl_ = dataset()->input_->MakeIterator(prefix()); TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); } else { - end_of_input_sequence_ = true; input_impl_.reset(); } return Status::OK(); @@ -230,7 +234,6 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { mutex mu_; std::vector> buffer_ GUARDED_BY(mu_); std::unique_ptr input_impl_ GUARDED_BY(mu_); - bool end_of_input_sequence_ GUARDED_BY(mu_) = false; const int64 seed_ GUARDED_BY(mu_); const int64 seed2_ GUARDED_BY(mu_); random::PhiloxRandom parent_generator_ GUARDED_BY(mu_); diff --git a/tensorflow/core/kernels/skip_dataset_op.cc b/tensorflow/core/kernels/skip_dataset_op.cc index 05152db1ae..7ee945dd4c 100644 --- a/tensorflow/core/kernels/skip_dataset_op.cc +++ b/tensorflow/core/kernels/skip_dataset_op.cc @@ -118,6 +118,11 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { bool* end_of_sequence) override { mutex_lock l(mu_); // TODO(mrry): Make locking less conservative. + if (!input_impl_) { + *end_of_sequence = true; + return Status::OK(); + } + // Keep calling GetNext(). TODO(vrv): Figure out a way to // skip records without reading, perhaps by adding an // interface to iterator. @@ -138,6 +143,9 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { // Return GetNext() on the underlying iterator. TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, out_tensors, end_of_sequence)); + if (*end_of_sequence) { + input_impl_.reset(); + } return Status::OK(); } @@ -145,7 +153,12 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { Status SaveInternal(IteratorStateWriter* writer) override { mutex_lock l(mu_); TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("i"), i_)); - TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + if (input_impl_) { + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + } else { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("input_impl_empty"), "")); + } return Status::OK(); } @@ -153,7 +166,11 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { IteratorStateReader* reader) override { mutex_lock l(mu_); TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("i"), &i_)); - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + if (!reader->Contains(full_name("input_impl_empty"))) { + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + } else { + input_impl_.reset(); + } return Status::OK(); } diff --git a/tensorflow/core/kernels/take_dataset_op.cc b/tensorflow/core/kernels/take_dataset_op.cc index f9f675abda..fb294a96b1 100644 --- a/tensorflow/core/kernels/take_dataset_op.cc +++ b/tensorflow/core/kernels/take_dataset_op.cc @@ -118,6 +118,10 @@ class TakeDatasetOp : public UnaryDatasetOpKernel { std::vector* out_tensors, bool* end_of_sequence) override { mutex_lock l(mu_); // TODO(mrry): Make locking less conservative. + if (!input_impl_) { + *end_of_sequence = true; + return Status::OK(); + } while (i_ < dataset()->count_) { TF_RETURN_IF_ERROR( input_impl_->GetNext(ctx, out_tensors, end_of_sequence)); @@ -136,7 +140,12 @@ class TakeDatasetOp : public UnaryDatasetOpKernel { Status SaveInternal(IteratorStateWriter* writer) override { mutex_lock l(mu_); TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("i"), i_)); - TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + if (input_impl_) { + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + } else { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("input_impl_empty"), "")); + } return Status::OK(); } @@ -144,7 +153,11 @@ class TakeDatasetOp : public UnaryDatasetOpKernel { IteratorStateReader* reader) override { mutex_lock l(mu_); TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("i"), &i_)); - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + if (!reader->Contains(full_name("input_impl_empty"))) { + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + } else { + input_impl_.reset(); + } return Status::OK(); } diff --git a/tensorflow/core/kernels/zip_dataset_op.cc b/tensorflow/core/kernels/zip_dataset_op.cc index 30d64ea634..f466c8b268 100644 --- a/tensorflow/core/kernels/zip_dataset_op.cc +++ b/tensorflow/core/kernels/zip_dataset_op.cc @@ -109,6 +109,10 @@ class ZipDatasetOp : public DatasetOpKernel { std::vector* out_tensors, bool* end_of_sequence) override { mutex_lock l(mu_); + if (input_impls_.empty()) { + *end_of_sequence = true; + return Status::OK(); + } out_tensors->clear(); out_tensors->reserve(dataset()->output_dtypes().size()); for (const auto& input_impl : input_impls_) { @@ -116,28 +120,43 @@ class ZipDatasetOp : public DatasetOpKernel { TF_RETURN_IF_ERROR( input_impl->GetNext(ctx, &input_tensors, end_of_sequence)); if (*end_of_sequence) { - return Status::OK(); + break; } out_tensors->insert(out_tensors->end(), input_tensors.begin(), input_tensors.end()); } - *end_of_sequence = false; + if (*end_of_sequence) { + out_tensors->clear(); + input_impls_.clear(); + } else { + *end_of_sequence = false; + } return Status::OK(); } protected: Status SaveInternal(IteratorStateWriter* writer) override { mutex_lock l(mu_); - for (auto& input_impl : input_impls_) - TF_RETURN_IF_ERROR(SaveParent(writer, input_impl)); + if (input_impls_.empty()) { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("input_impls_empty"), "")); + } else { + for (auto& input_impl : input_impls_) + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl)); + } return Status::OK(); } Status RestoreInternal(OpKernelContext* ctx, IteratorStateReader* reader) override { mutex_lock l(mu_); - for (auto& input_impl : input_impls_) - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl)); + if (reader->Contains(full_name("input_impls_empty"))) { + input_impls_.clear(); + } else { + DCHECK_EQ(input_impls_.size(), dataset()->inputs_.size()); + for (auto& input_impl : input_impls_) + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl)); + } return Status::OK(); } -- GitLab From 6634bc414ee9a4bd69d9e870dcdd7e90a83b8e7d Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 9 Nov 2017 22:39:33 -0800 Subject: [PATCH 0262/1801] [XLA:CPU] Make one of the tile dimensions in the LLVM IR GEMV tunable. The tiling dimension corresponding to the number of vector registers in the tile can be changed easily. Expose this value as a backend specific flag so that we can experiment with it to find a good default value. This CL also fixes a bug exposed by a variable tiling factor in the row major GEMV implementation. This wasn't caught before because having tile_rows == tile_cols hides the bug. PiperOrigin-RevId: 175258553 --- tensorflow/compiler/xla/service/cpu/BUILD | 2 ++ .../compiler/xla/service/cpu/cpu_options.cc | 16 ++++++++++++++++ .../compiler/xla/service/cpu/cpu_options.h | 2 ++ .../compiler/xla/service/cpu/dot_op_emitter.cc | 11 ++++++++--- .../compiler/xla/service/cpu/dot_op_emitter.h | 9 +++++++++ 5 files changed, 37 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 10ec677e2f..4f6e69ebd4 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -280,6 +280,7 @@ cc_library( srcs = ["dot_op_emitter.cc"], hdrs = ["dot_op_emitter.h"], deps = [ + ":cpu_options", ":cpu_runtime", ":ir_emission_utils", "//tensorflow/compiler/xla:shape_util", @@ -719,6 +720,7 @@ cc_library( hdrs = ["cpu_options.h"], deps = [ "//tensorflow/compiler/xla/service:hlo_module_config", + "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xla/service/cpu/cpu_options.cc b/tensorflow/compiler/xla/service/cpu/cpu_options.cc index dba140d112..09f028463a 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_options.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_options.cc @@ -15,11 +15,14 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/cpu_options.h" +#include "tensorflow/core/lib/strings/numbers.h" + namespace { const char* const kXlaParallelCpuOption = "xla_cpu_parallel"; const char* const kXlaOptimizeForSizeCpuOption = "xla_cpu_optimize_for_size"; const char* const kXlaDisableVectorizedReduce = "xla_disable_vectorized_reduce"; +const char* const kLlvmIrDotTilingFactor = "xla_llvm_dot_tiling_factor"; } // namespace @@ -45,6 +48,19 @@ bool VectorizedReduceDisabled(const HloModuleConfig& config) { return extra_options_map.count(kXlaOptimizeForSizeCpuOption) > 0; } +tensorflow::gtl::optional LlvmIrGemvTilingFactor( + const HloModuleConfig& config) { + const auto& extra_options_map = + config.debug_options().xla_backend_extra_options(); + auto it = extra_options_map.find(kLlvmIrDotTilingFactor); + int64 tiling_factor; + if (it != extra_options_map.end() && + tensorflow::strings::safe_strto64(it->second, &tiling_factor)) { + return tiling_factor; + } + return tensorflow::gtl::nullopt; +} + } // namespace options } // namespace cpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/cpu_options.h b/tensorflow/compiler/xla/service/cpu/cpu_options.h index 5dc24ebc7b..6ba0fd2453 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_options.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_options.h @@ -27,6 +27,8 @@ namespace options { bool CpuParallelBackendRequested(const HloModuleConfig& config); bool OptimizeForSizeRequested(const HloModuleConfig& config); bool VectorizedReduceDisabled(const HloModuleConfig& config); +tensorflow::gtl::optional LlvmIrGemvTilingFactor( + const HloModuleConfig& config); } // namespace options } // namespace cpu diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index 1cbd4094a3..2a447a54b0 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -366,7 +366,7 @@ class RowMajorMatrixVectorProductEmitter { result_(result), ir_builder_(ir_builder), ksl_(ir_builder_), - vsl_(scalar_type_, /*vector_size=*/tile_rows_, ir_builder_, "") { + vsl_(scalar_type_, /*vector_size=*/tile_cols_, ir_builder_, "") { CHECK(tile_cols_ > 0 && IsPowerOfTwo(static_cast(tile_cols_))); } @@ -573,11 +573,15 @@ bool DotOpEmitter::EmitLlvmIrDotIfProfitable() { return false; } + int64 tiling_factor = GetGemvTilingFactor(); + CHECK_GT(tiling_factor, 0); + if (is_column_major_matrix_vector) { VLOG(2) << "Emitting column major matrix-vector multiply with m = " << m << " and k = " << k; ColumnMajorMatrixVectorProductEmitter emitter( - dot_.shape().element_type(), 8, 8, m, k, + dot_.shape().element_type(), /*tile_rows=*/8, + /*tile_cols=*/tiling_factor, m, k, swap_operands ? rhs_array_.GetBasePointer() : lhs_array_.GetBasePointer(), swap_operands ? lhs_array_.GetBasePointer() @@ -588,7 +592,8 @@ bool DotOpEmitter::EmitLlvmIrDotIfProfitable() { VLOG(2) << "Emitting row major matrix-vector multiply with m = " << m << " and k = " << k; RowMajorMatrixVectorProductEmitter emitter( - dot_.shape().element_type(), 8, 8, m, k, + dot_.shape().element_type(), /*tile_rows=*/tiling_factor, + /*tile_cols=*/8, m, k, swap_operands ? rhs_array_.GetBasePointer() : lhs_array_.GetBasePointer(), swap_operands ? lhs_array_.GetBasePointer() diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h index 182e1b8c68..470bf6ffb4 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_DOT_OP_EMITTER_H_ #include "llvm/IR/IRBuilder.h" +#include "tensorflow/compiler/xla/service/cpu/cpu_options.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module_config.h" #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h" @@ -105,6 +106,14 @@ class DotOpEmitter { // of rank 2 as well). MatMultDims GetMatMultDims() const; + // When doing a tiled GEMV in LLVM IR, a "tile" consists of this many vector + // registers. + int64 GetGemvTilingFactor() const { + const int64 kDefaultTilingFactor = 8; + return options::LlvmIrGemvTilingFactor(hlo_module_config_) + .value_or(kDefaultTilingFactor); + } + const HloInstruction& dot_; const bool transpose_lhs_; const bool transpose_rhs_; -- GitLab From c9de8dfda76e0dbd6d1e6c0bb97fcb3a7cb192af Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Thu, 9 Nov 2017 23:29:56 -0800 Subject: [PATCH 0263/1801] Java/OS X: Workaround for how the framework library is packaged in the .jar by the release process. See #13872 PiperOrigin-RevId: 175261983 --- .../java/org/tensorflow/NativeLibrary.java | 43 ++++++++++++++----- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/tensorflow/java/src/main/java/org/tensorflow/NativeLibrary.java b/tensorflow/java/src/main/java/org/tensorflow/NativeLibrary.java index 2b431eebf5..499757e8cf 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/NativeLibrary.java +++ b/tensorflow/java/src/main/java/org/tensorflow/NativeLibrary.java @@ -43,7 +43,6 @@ final class NativeLibrary { private static final boolean DEBUG = System.getProperty("org.tensorflow.NativeLibrary.DEBUG") != null; private static final String JNI_LIBNAME = "tensorflow_jni"; - private static final String FRAMEWORK_LIBNAME = "tensorflow_framework"; public static void load() { if (isLoaded() || tryLoadLibrary()) { @@ -59,12 +58,15 @@ final class NativeLibrary { } // Native code is not present, perhaps it has been packaged into the .jar file containing this. // Extract the JNI library itself - final String jniResourceName = makeResourceName(JNI_LIBNAME); + final String jniLibName = System.mapLibraryName(JNI_LIBNAME); + final String jniResourceName = makeResourceName(jniLibName); log("jniResourceName: " + jniResourceName); final InputStream jniResource = NativeLibrary.class.getClassLoader().getResourceAsStream(jniResourceName); // Extract the JNI's dependency - final String frameworkResourceName = makeResourceName(FRAMEWORK_LIBNAME); + final String frameworkLibName = + maybeAdjustForMacOS(System.mapLibraryName("tensorflow_framework")); + final String frameworkResourceName = makeResourceName(frameworkLibName); log("frameworkResourceName: " + frameworkResourceName); final InputStream frameworkResource = NativeLibrary.class.getClassLoader().getResourceAsStream(frameworkResourceName); @@ -88,12 +90,15 @@ final class NativeLibrary { tempPath.deleteOnExit(); final String tempDirectory = tempPath.toString(); if (frameworkResource != null) { - extractResource(frameworkResource, FRAMEWORK_LIBNAME, tempDirectory); + extractResource(frameworkResource, frameworkLibName, tempDirectory); } else { - log(frameworkResourceName + " not found. This is fine assuming " + jniResourceName - + " is not built to depend on it."); + log( + frameworkResourceName + + " not found. This is fine assuming " + + jniResourceName + + " is not built to depend on it."); } - System.load(extractResource(jniResource, JNI_LIBNAME, tempDirectory)); + System.load(extractResource(jniResource, jniLibName, tempDirectory)); } catch (IOException e) { throw new UnsatisfiedLinkError( String.format( @@ -121,9 +126,27 @@ final class NativeLibrary { } } + private static String maybeAdjustForMacOS(String libFilename) { + if (!System.getProperty("os.name").contains("OS X")) { + return libFilename; + } + // This is macOS, and the TensorFlow release process might have setup dependencies on + // libtensorflow_framework.so instead of libtensorflow_framework.dylib. Adjust for that. + final ClassLoader cl = NativeLibrary.class.getClassLoader(); + if (cl.getResource(makeResourceName(libFilename)) != null) { + return libFilename; + } + // liftensorflow_framework.dylib not found, try libtensorflow_framework.so + final String suffix = ".dylib"; + if (!libFilename.endsWith(suffix)) { + return libFilename; + } + return libFilename.substring(0, libFilename.length() - suffix.length()) + ".so"; + } + private static String extractResource( InputStream resource, String resourceName, String extractToDirectory) throws IOException { - final File dst = new File(extractToDirectory, System.mapLibraryName(resourceName)); + final File dst = new File(extractToDirectory, resourceName); dst.deleteOnExit(); final String dstPath = dst.toString(); log("extracting native library to: " + dstPath); @@ -157,9 +180,7 @@ final class NativeLibrary { } private static String makeResourceName(String baseName) { - return "org/tensorflow/native/" - + String.format("%s-%s/", os(), architecture()) - + System.mapLibraryName(baseName); + return "org/tensorflow/native/" + String.format("%s-%s/", os(), architecture()) + baseName; } private static long copy(InputStream src, File dstFile) throws IOException { -- GitLab From 15670bed76d42bc016b8730091ad55c99571683f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 02:48:04 -0800 Subject: [PATCH 0264/1801] Correct comment in K-FAC's layer_collection PiperOrigin-RevId: 175275184 --- tensorflow/contrib/kfac/python/ops/layer_collection.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py index 4eabb59b3e..7300a7998c 100644 --- a/tensorflow/contrib/kfac/python/ops/layer_collection.py +++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py @@ -448,10 +448,10 @@ class LayerCollection(object): tf.get_variable_scope().reuse. Raises: - ValueError: If reuse=True and name != None. - ValueError: If reuse=True and seed != None. - KeyError: If reuse=True and no existing LossFunction with 'name' found. - KeyError: If reuse=False and existing LossFunction with 'name' found. + ValueError: If reuse == True and name == None. + ValueError: If reuse == True and seed != None. + KeyError: If reuse == True and no existing LossFunction with 'name' found. + KeyError: If reuse == False and existing LossFunction with 'name' found. """ name = name or self._graph.unique_name( "register_categorical_predictive_distribution") -- GitLab From 8614ef614245cfcfdd09bda0d633d5aa4f6e856e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 03:30:53 -0800 Subject: [PATCH 0265/1801] Extend the Array class with more functionality. PiperOrigin-RevId: 175277161 --- tensorflow/compiler/xla/BUILD | 1 + tensorflow/compiler/xla/array.h | 159 +++++++++++++++++- tensorflow/compiler/xla/array_test.cc | 45 +++++ .../compiler/xla/client/computation_builder.h | 1 + .../compiler/xla/service/hlo_instruction.h | 5 + .../compiler/xla/service/hlo_sharding.cc | 3 +- 6 files changed, 205 insertions(+), 9 deletions(-) diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD index f6e405744a..515b572b0e 100644 --- a/tensorflow/compiler/xla/BUILD +++ b/tensorflow/compiler/xla/BUILD @@ -340,6 +340,7 @@ cc_library( name = "array", hdrs = ["array.h"], deps = [ + ":status", ":types", "//tensorflow/core:lib", ], diff --git a/tensorflow/compiler/xla/array.h b/tensorflow/compiler/xla/array.h index ba898d1f4e..213e0bac6c 100644 --- a/tensorflow/compiler/xla/array.h +++ b/tensorflow/compiler/xla/array.h @@ -23,8 +23,10 @@ limitations under the License. #include #include #include +#include #include +#include "tensorflow/compiler/xla/status.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/lib/core/bits.h" #include "tensorflow/core/lib/strings/str_util.h" @@ -35,10 +37,63 @@ limitations under the License. namespace xla { +namespace array_impl { + +// conjunction +// +// Performs a compile-time logical AND operation on the passed types (which +// must have `::value` members convertible to `bool`. Short-circuits if it +// encounters any `false` members (and does not compare the `::value` members +// of any remaining arguments). +// +// This metafunction is designed to be a drop-in replacement for the C++17 +// `std::conjunction` metafunction. +template +struct conjunction; + +template +struct conjunction + : std::conditional, T>::type {}; + +template <> +struct conjunction<> : std::true_type {}; + +// A type trait that is valid when all elements in a parameter pack are of +// integral type. +template +using pack_is_integral = conjunction...>; + +// Compares three same-sized vectors elementwise. For each item in `values`, +// returns false if any of values[i] is outside the half-open range [starts[i], +// ends[i]). +template +bool all_inside_range(const C1& values, const C2& range_starts, + const C3& range_ends) { + for (size_t i = 0, e = values.size(); i < e; ++i) { + if (values[i] < range_starts[i] || values[i] >= range_ends[i]) { + return false; + } + } + return true; +} + +} // namespace array_impl + // General N dimensional array class with arbitrary value type. template class Array { public: + // Type inference can have a hard time parsing very deep initializer list + // nests, especially if one or more dimensions is one as the compiler just + // sees a single-element integer initializer. These typedefs allow casting + // explicitly with less typing. + using InitializerList1D = std::initializer_list; + using InitializerList2D = std::initializer_list; + using InitializerList3D = std::initializer_list; + using InitializerList4D = std::initializer_list; + + using value_type = T; + // Creates a new array with the specified dimensions. explicit Array(tensorflow::gtl::ArraySlice sizes) : Array(sizes, T()) {} @@ -53,7 +108,7 @@ class Array { // Creates a 2D array from the given nested initializer list. The outer // initializer list is the first dimension, the inner is the second dimension. // For example, {{1, 2, 3}, {4, 5, 6}} results in an array with n1=2 and n2=3. - Array(std::initializer_list> values) + Array(InitializerList2D values) : Array(ToInt64Vector({values.size(), values.begin()->size()})) { int64 idx = 0; for (const auto& it1 : values) { @@ -67,8 +122,7 @@ class Array { // Creates a 3D array from the given nested initializer list. The outer // initializer list is the first dimension, and so on. - Array(std::initializer_list>> - values) + Array(InitializerList3D values) : Array(ToInt64Vector({values.size(), values.begin()->size(), values.begin()->begin()->size()})) { int64 idx = 0; @@ -85,9 +139,7 @@ class Array { // Creates a 4D array from the given nested initializer list. The outer // initializer list is the first dimension, and so on. - Array(std::initializer_list< - std::initializer_list>>> - values) + Array(InitializerList4D values) : Array(ToInt64Vector({values.size(), values.begin()->size(), values.begin()->begin()->size(), values.begin()->begin()->begin()->size()})) { @@ -173,10 +225,46 @@ class Array { } } + // Invokes a callback with the (indices, value_ptr) for each cell in the + // array. If a callback returns a non-OK status, returns that else returns + // Status::OK(). + Status EachStatus( + std::function, T*)> f) { + std::vector index(sizes_.size()); + for (int64 i = 0; i < num_elements(); ++i, next_index(&index)) { + Status s = f(index, &values_[i]); + if (!s.ok()) { + return s; + } + } + return Status::OK(); + } + + // Invokes a callback with the (indices, value) for each cell in the array. + // If a callback returns a non-OK status, returns that else returns + // Status::OK(). + Status EachStatus( + std::function, T)> f) const { + std::vector index(sizes_.size()); + for (int64 i = 0; i < num_elements(); ++i, next_index(&index)) { + Status s = f(index, values_[i]); + if (!s.ok()) { + return s; + } + } + return Status::OK(); + } + // Returns the value at the cell specified by the indexes. The number of // arguments have to match with the number of dimensions for the array. + // + // The type trait is required to avoid this overload participating too + // eagerly; a parameter pack can take zero or more elements, so we must + // restrict this to only parameter packs that are all of integral type. template - const T& operator()(Dims... dims) const { + typename std::enable_if::value, + const T&>::type + operator()(Dims... dims) const { // We are using a std::array to avoid having to allocate memory in this // function for performance reasons. std::array indexes{{static_cast(dims)...}}; @@ -186,7 +274,9 @@ class Array { // Returns the value at the cell specified by the indexes. The number of // arguments have to match with the number of dimensions for the array. template - T& operator()(Dims... dims) { + typename std::enable_if::value, + T&>::type + operator()(Dims... dims) { // We are using a std::array to avoid having to allocate memory in this // function for performance reasons. std::array indexes{{static_cast(dims)...}}; @@ -255,6 +345,59 @@ class Array { bool operator!=(const Array& other) const { return !(*this == other); } + // Performs the equivalent of a slice operation on this array. + Array Slice(tensorflow::gtl::ArraySlice starts, + tensorflow::gtl::ArraySlice limits) const { + CHECK_EQ(starts.size(), num_dimensions()); + CHECK_EQ(limits.size(), num_dimensions()); + + std::vector sizes; + std::transform(starts.begin(), starts.end(), limits.begin(), + std::back_inserter(sizes), + [](int64 start, int64 limit) { return limit - start; }); + Array result(sizes); + + std::vector index(sizes_.size()); + int64 slice_i = 0; + for (int64 i = 0; i < num_elements(); ++i, next_index(&index)) { + if (array_impl::all_inside_range(index, starts, limits)) { + // Even though the bounds of result are different to our bounds, we're + // iterating in the same order. So we can simply write successive linear + // indices instead of recalculating a multi-dimensional index. + result.values_[slice_i++] = values_[i]; + } + } + return result; + } + + // Performs the equivalent of a DynamicUpdateSlice in-place on this array. + void UpdateSlice(const Array& from, + tensorflow::gtl::ArraySlice start_indices) { + CHECK_EQ(from.num_dimensions(), num_dimensions()); + std::vector limit_indices; + std::transform(start_indices.begin(), start_indices.end(), + from.dimensions().begin(), std::back_inserter(limit_indices), + std::plus{}); + std::vector index(sizes_.size()); + int64 from_i = 0; + for (int64 i = 0; i < num_elements(); ++i, next_index(&index)) { + if (array_impl::all_inside_range(index, start_indices, limit_indices)) { + // Even though the bounds of from are different to our bounds, we're + // iterating in the same order. So we can simply write successive linear + // indices instead of recalculating a multi-dimensional index. + values_[i] = from.values_[from_i++]; + } + } + } + + // Performs an in-place reshape, modifying the dimensions but not the + // underlying data. + void Reshape(tensorflow::gtl::ArraySlice new_dimensions) { + int64 old_num_elements = num_elements(); + sizes_ = std::vector(new_dimensions.begin(), new_dimensions.end()); + CHECK_EQ(num_elements(), old_num_elements); + } + // Returns a string representation of the array suitable for debugging. string ToString() const { std::vector pieces; diff --git a/tensorflow/compiler/xla/array_test.cc b/tensorflow/compiler/xla/array_test.cc index 093784f541..8b94194774 100644 --- a/tensorflow/compiler/xla/array_test.cc +++ b/tensorflow/compiler/xla/array_test.cc @@ -71,6 +71,19 @@ TEST(ArrayTest, IndexingReadWrite) { EXPECT_EQ(arr(1, 2), 61); } +TEST(ArrayTest, DynamicIndexingReadWrite) { + Array arr({2, 3}); + + std::vector index1 = {1, 1}; + std::vector index2 = {1, 2}; + EXPECT_EQ(arr(index1), 0); + EXPECT_EQ(arr(index2), 0); + arr(index1) = 51; + arr(index2) = 61; + EXPECT_EQ(arr(1, 1), 51); + EXPECT_EQ(arr(1, 2), 61); +} + TEST(ArrayTest, IndexingReadWriteBool) { Array arr{{false, true, false}, {false, true, false}}; @@ -141,5 +154,37 @@ TEST(ArrayTest, Each) { EXPECT_EQ(arr.num_elements() * (arr.num_elements() - 1) / 2, each_sum); } +TEST(ArrayTest, Slice) { + Array arr({2, 4}); + arr.FillWithMultiples(1); + + Array identity_slice = arr.Slice({0, 0}, {2, 4}); + EXPECT_EQ(identity_slice.dimensions(), arr.dimensions()); + for (auto it1 = arr.begin(), it2 = identity_slice.begin(), e = arr.end(); + it1 != e; ++it1, ++it2) { + EXPECT_EQ(*it1, *it2); + } + + Array sub_slice = arr.Slice({1, 0}, {2, 2}); + EXPECT_EQ(sub_slice.dimensions(), (std::vector{1, 2})); + const string expected = R"([[4, 5]])"; + EXPECT_EQ(expected, sub_slice.ToString()); +} + +TEST(ArrayTest, UpdateSlice) { + Array arr({3, 4}); + arr.FillWithMultiples(1); + + Array sub_arr({2, 2}); + sub_arr.FillWithMultiples(3); + + arr.UpdateSlice(sub_arr, {1, 1}); + + const string expected = R"([[0, 1, 2, 3], + [4, 0, 3, 7], + [8, 6, 9, 11]])"; + EXPECT_EQ(expected, arr.ToString()); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index 8e1b4be1f3..4c6e320557 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -68,6 +68,7 @@ class ShardingBuilder { const TileAssignment& tile_assignment) { OpSharding result; result.set_type(OpSharding::Type::OpSharding_Type_OTHER); + *result.mutable_tile_shape() = tile_shape; for (int64 dim : tile_assignment.dimensions()) { result.add_tile_assignment_dimensions(dim); } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 64a88164a7..d174f05aa6 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -863,6 +863,11 @@ class HloInstruction { return *window_; } + // Sets the window data in a windowed operation such as convolution. + void set_window(const Window& window) { + window_ = MakeUnique(window); + } + // Returns the padding configuration for a pad node. // // Precondition: opcode() == HloOpcode::kPad diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc index bc5663513b..7356663454 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding.cc @@ -249,7 +249,8 @@ Status HloSharding::Validate(const Shape& shape, int64 num_devices) const { return HloSharding(tuple_shardings); } else if (proto.type() == OpSharding::Type::OpSharding_Type_REPLICATED) { return Replicate(); - } else if (proto.type() == OpSharding::Type::OpSharding_Type_MAXIMAL) { + } else if (proto.type() == OpSharding::Type::OpSharding_Type_MAXIMAL || + proto.tile_assignment_devices().size() == 1) { return HloSharding(proto.tile_assignment_devices(0)); } // Some versions of gcc cannot infer the TileAssignment constructor from a -- GitLab From 8ca7c8f3d4e0c39ec699eaea68d60c94fb624426 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 08:52:44 -0800 Subject: [PATCH 0266/1801] [XLA] Make TuplePointsToAnalysis and LogicalBufferAnalysis track nested fusion instructions. PiperOrigin-RevId: 175295981 --- .../xla/service/logical_buffer_analysis.cc | 23 ++++++++++++- .../xla/service/tuple_points_to_analysis.cc | 32 +++++++++++++++---- 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/logical_buffer_analysis.cc b/tensorflow/compiler/xla/service/logical_buffer_analysis.cc index 02dc49e78c..6aca6ba385 100644 --- a/tensorflow/compiler/xla/service/logical_buffer_analysis.cc +++ b/tensorflow/compiler/xla/service/logical_buffer_analysis.cc @@ -23,6 +23,23 @@ limitations under the License. namespace xla { +namespace { + +// Gather fusion instructions from 'instruction' into 'fusion_instructions'. +void GatherFusionInstructions( + HloInstruction* instruction, + std::vector* fusion_instructions) { + CHECK_EQ(HloOpcode::kFusion, instruction->opcode()); + for (auto* fused : instruction->fused_instructions()) { + if (fused->opcode() == HloOpcode::kFusion) { + GatherFusionInstructions(fused, fusion_instructions); + } + } + fusion_instructions->push_back(instruction); +} + +} // namespace + /* static */ StatusOr> LogicalBufferAnalysis::Run(const HloModule* module) { std::unique_ptr analysis( @@ -41,15 +58,19 @@ Status LogicalBufferAnalysis::Analyze() { // We filter out fusion computations, and get to them through fusion // instructions. This is because it's possible to have orphaned (unreachable) // fusion computations, and we don't want to try to assign buffers to those. + std::vector fusion_instructions; for (auto* computation : module_->MakeNonfusionComputations()) { TF_RETURN_IF_ERROR(computation->Accept(this)); for (auto* instruction : computation->instructions()) { if (instruction->opcode() != HloOpcode::kFusion) { continue; } - TF_RETURN_IF_ERROR(instruction->fused_expression_root()->Accept(this)); + GatherFusionInstructions(instruction, &fusion_instructions); } } + for (auto* instruction : fusion_instructions) { + TF_RETURN_IF_ERROR(instruction->fused_expression_root()->Accept(this)); + } return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc index a1f9451dd4..0c84856647 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc @@ -120,6 +120,23 @@ void PointsToSet::add_tuple_source(const ShapeIndex& index, tree_.mutable_element(index)->tuple_sources.insert(tuple); } +namespace { + +// Gather fusion instructions from 'instruction' into 'fusion_instructions'. +void GatherFusionInstructions( + HloInstruction* instruction, + std::vector* fusion_instructions) { + CHECK_EQ(HloOpcode::kFusion, instruction->opcode()); + for (auto* fused : instruction->fused_instructions()) { + if (fused->opcode() == HloOpcode::kFusion) { + GatherFusionInstructions(fused, fusion_instructions); + } + } + fusion_instructions->push_back(instruction); +} + +} // namespace + /* static */ StatusOr> TuplePointsToAnalysis::Run(const HloModule* module) { auto logical_buffer_analysis = LogicalBufferAnalysis::Run(module); @@ -137,20 +154,23 @@ Status TuplePointsToAnalysis::Analyze() { logical_buffer_aliases_.resize( logical_buffer_analysis_->num_logical_buffers()); + std::vector fusion_instructions; for (auto* computation : module_->MakeNonfusionComputations()) { TF_RETURN_IF_ERROR(computation->Accept(this)); TF_RETURN_IF_ERROR( PopulateDefinedBuffersAndAliases(computation->instructions())); - // Run points-to analysis on fusion instructions in 'computation'. for (auto* instruction : computation->instructions()) { - if (instruction->opcode() != HloOpcode::kFusion) { - continue; + if (instruction->opcode() == HloOpcode::kFusion) { + GatherFusionInstructions(instruction, &fusion_instructions); } - TF_RETURN_IF_ERROR(instruction->fused_expression_root()->Accept(this)); - TF_RETURN_IF_ERROR( - PopulateDefinedBuffersAndAliases(instruction->fused_instructions())); } } + // Run points-to analysis on fusion instructions in 'computation'. + for (auto* instruction : fusion_instructions) { + TF_RETURN_IF_ERROR(instruction->fused_expression_root()->Accept(this)); + TF_RETURN_IF_ERROR( + PopulateDefinedBuffersAndAliases(instruction->fused_instructions())); + } XLA_VLOG_LINES(3, ToString()); -- GitLab From f951e3ee9afe399d293aedd1de7aecf4e0c8a29b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 09:07:50 -0800 Subject: [PATCH 0267/1801] n/a (internal change only) PiperOrigin-RevId: 175297329 --- tensorflow/contrib/learn/BUILD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index 2917a30a17..94920db574 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -22,6 +22,8 @@ py_library( exclude = ["python/learn/**/*_test.py"], ), srcs_version = "PY2AND3", + # This library should not depend on sklearn, even though some of the code + # refers to it. (The code handles the presence of sklearn conditionally.) deps = [ "//tensorflow/contrib/factorization:factorization_py", "//tensorflow/contrib/framework:framework_py", -- GitLab From 187ce87c61ea2d6f162a7e6b0cd81a7974642f17 Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Fri, 10 Nov 2017 09:59:22 -0800 Subject: [PATCH 0268/1801] Add proper element_shape values to tf.nn.dynamic_rnn TensorArrays. PiperOrigin-RevId: 175302425 --- tensorflow/python/ops/rnn.py | 42 +++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py index 21c7ed361d..df66302402 100644 --- a/tensorflow/python/ops/rnn.py +++ b/tensorflow/python/ops/rnn.py @@ -32,6 +32,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops @@ -134,6 +135,13 @@ def _infer_state_dtype(explicit_dtype, state): return state.dtype +def _maybe_tensor_shape_from_tensor(shape): + if isinstance(shape, ops.Tensor): + return tensor_shape.as_shape(tensor_util.constant_value(shape)) + else: + return shape + + # pylint: disable=unused-argument def _rnn_step( time, sequence_length, min_sequence_length, max_sequence_length, @@ -715,18 +723,28 @@ def _dynamic_rnn_loop(cell, with ops.name_scope("dynamic_rnn") as scope: base_name = scope - def _create_ta(name, dtype): + def _create_ta(name, element_shape, dtype): return tensor_array_ops.TensorArray(dtype=dtype, size=time_steps, + element_shape=element_shape, tensor_array_name=base_name + name) in_graph_mode = context.in_graph_mode() if in_graph_mode: - output_ta = tuple(_create_ta("output_%d" % i, - _infer_state_dtype(dtype, state)) - for i in range(len(flat_output_size))) - input_ta = tuple(_create_ta("input_%d" % i, flat_input[i].dtype) - for i in range(len(flat_input))) + output_ta = tuple( + _create_ta( + "output_%d" % i, + element_shape=(tensor_shape.TensorShape([const_batch_size]) + .concatenate( + _maybe_tensor_shape_from_tensor(out_size))), + dtype=_infer_state_dtype(dtype, state)) + for i, out_size in enumerate(flat_output_size)) + input_ta = tuple( + _create_ta( + "input_%d" % i, + element_shape=flat_input_i.shape[1:], + dtype=flat_input_i.dtype) + for i, flat_input_i in enumerate(flat_input)) input_ta = tuple(ta.unstack(input_) for ta, input_ in zip(input_ta, flat_input)) else: @@ -1007,6 +1025,7 @@ def raw_rnn(cell, loop_fn, static_batch_size.merge_with(input_shape_i[0]) batch_size = static_batch_size.value + const_batch_size = batch_size if batch_size is None: batch_size = array_ops.shape(flat_input[0])[0] @@ -1029,8 +1048,15 @@ def raw_rnn(cell, loop_fn, flat_emit_ta = [ tensor_array_ops.TensorArray( - dtype=dtype_i, dynamic_size=True, size=0, name="rnn_output_%d" % i) - for i, dtype_i in enumerate(flat_emit_dtypes)] + dtype=dtype_i, + dynamic_size=True, + element_shape=(tensor_shape.TensorShape([const_batch_size]) + .concatenate( + _maybe_tensor_shape_from_tensor(size_i))), + size=0, + name="rnn_output_%d" % i) + for i, (dtype_i, size_i) + in enumerate(zip(flat_emit_dtypes, flat_emit_size))] emit_ta = nest.pack_sequence_as(structure=emit_structure, flat_sequence=flat_emit_ta) flat_zero_emit = [ -- GitLab From 813451826892869acec35643bc3ca9800437a00b Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Fri, 10 Nov 2017 10:06:12 -0800 Subject: [PATCH 0269/1801] Correct an IndexedSlices-related test in replicate_model_fn_test.py. I attempted to exercise compute_sum_on_device(IndexedSlices) via the DNNCLassifier test per reviewer's suggestion. This changelist is way to do it correctly. I verified that it indeed triggers the required codepath by adding logging and removing compute_sum_on_device(IndexedSlices) support. PiperOrigin-RevId: 175303333 --- .../estimator/python/estimator/replicate_model_fn_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py index 21d5a9c327..7fb1065ac0 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py @@ -89,11 +89,11 @@ class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase): feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)), - feature_column.indicator_column( + feature_column.embedding_column( feature_column.categorical_column_with_vocabulary_list( 'categories', vocabulary_list=np.linspace( - 0., len(x_data), len(x_data), dtype=np.int64))) + 0., len(x_data), len(x_data), dtype=np.int64)), 1) ] estimator = dnn.DNNClassifier( -- GitLab From d82ab02a926c3564c44a19deaf298afc10326bf6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 10:12:35 -0800 Subject: [PATCH 0270/1801] [TF:XLA] Add helper to append buffers to ShapedBuffer. PiperOrigin-RevId: 175304150 --- tensorflow/compiler/xla/service/shaped_buffer.cc | 8 ++++++++ tensorflow/compiler/xla/service/shaped_buffer.h | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/tensorflow/compiler/xla/service/shaped_buffer.cc b/tensorflow/compiler/xla/service/shaped_buffer.cc index a2a442eb1a..a57ebf59e7 100644 --- a/tensorflow/compiler/xla/service/shaped_buffer.cc +++ b/tensorflow/compiler/xla/service/shaped_buffer.cc @@ -63,6 +63,14 @@ void ShapedBuffer::clear() { } } +void ShapedBuffer::AddBufferAtIndex( + const perftools::gputools::DeviceMemoryBase& buffer, + const ShapeIndex& shape_index) { + *mutable_shape_index_to_buffer_entry()->mutable_element(shape_index) = + buffers().size(); + mutable_buffers()->push_back(buffer); +} + const se::DeviceMemoryBase& ShapedBuffer::buffer( const ShapeIndex& index) const { return buffers_[shape_index_to_buffer_entry_.element(index)]; diff --git a/tensorflow/compiler/xla/service/shaped_buffer.h b/tensorflow/compiler/xla/service/shaped_buffer.h index e5ea06fb13..b440948700 100644 --- a/tensorflow/compiler/xla/service/shaped_buffer.h +++ b/tensorflow/compiler/xla/service/shaped_buffer.h @@ -75,6 +75,10 @@ class ShapedBuffer { // Set all device memory pointers in the object to null. void clear(); + // Adds a new buffer at the given shape index. + void AddBufferAtIndex(const perftools::gputools::DeviceMemoryBase& buffer, + const ShapeIndex& shape_index); + protected: // The shape of the device buffer with layout. const Shape shape_; -- GitLab From 10d1827987b0eca4d0e6f8f56506c93c67e03f83 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Fri, 10 Nov 2017 10:16:53 -0800 Subject: [PATCH 0271/1801] [XLA] Include kConvolution in the dot-add liveness optimization PiperOrigin-RevId: 175304705 --- tensorflow/compiler/xla/service/liveness_util.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/liveness_util.cc b/tensorflow/compiler/xla/service/liveness_util.cc index c27a8956a7..53d88eda7a 100644 --- a/tensorflow/compiler/xla/service/liveness_util.cc +++ b/tensorflow/compiler/xla/service/liveness_util.cc @@ -215,7 +215,8 @@ bool CanShareOperandBufferWithUser( auto add_operand_it = std::find_if(add->operands().begin(), add->operands().end(), [&](HloInstruction* operand) { - return operand->opcode() == HloOpcode::kDot || + return operand->opcode() == HloOpcode::kConvolution || + operand->opcode() == HloOpcode::kDot || (operand->opcode() == HloOpcode::kFusion && operand->fusion_kind() == HloInstruction::FusionKind::kTransposeDot); @@ -294,7 +295,8 @@ bool CanShareOperandBufferWithUser(HloInstruction* operand, auto add_operand_it = std::find_if(add->operands().begin(), add->operands().end(), [&](HloInstruction* operand) { - return operand->opcode() == HloOpcode::kDot || + return operand->opcode() == HloOpcode::kConvolution || + operand->opcode() == HloOpcode::kDot || (operand->opcode() == HloOpcode::kFusion && operand->fusion_kind() == HloInstruction::FusionKind::kTransposeDot); -- GitLab From 51889acee1a266478b578afad3fbe7b3a90fc17a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 10:22:07 -0800 Subject: [PATCH 0272/1801] Add suffix to newly created Mul op in the optimizer to avoid the name collision when two Conv2D objects depend on the same Const. PiperOrigin-RevId: 175305425 --- .../optimizers/arithmetic_optimizer.cc | 3 +- .../optimizers/arithmetic_optimizer_test.cc | 46 ++++++++++++++++++- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index f2277a9b79..e8ef0e94b5 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -794,7 +794,8 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( scale_tensor.tensor_shape().dim_size() == 0) { // Create new node `scaled_weights`. NodeDef* scaled_weights = graph_def->add_node(); - scaled_weights->set_name(weights->name() + "_scaled"); + scaled_weights->set_name(weights->name() + "_scaled_" + + conv->name()); scaled_weights->set_op("Mul"); scaled_weights->set_device(weights->device()); (*scaled_weights->mutable_attr())["T"] = diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 60fb47f51a..4fcbb0120e 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -887,7 +887,7 @@ TEST_F(ArithmeticOptimizerTest, OptimizeCastMulTransposeConv) { CHECK_NOTNULL(node_map.GetNode("Transpose_uint8")); const NodeDef* cast_node = CHECK_NOTNULL(node_map.GetNode("Cast_new")); const NodeDef* weights_node = - CHECK_NOTNULL(node_map.GetNode("weights_scaled")); + CHECK_NOTNULL(node_map.GetNode("weights_scaled_Conv2D")); const NodeDef* conv_node = CHECK_NOTNULL(node_map.GetNode("Conv2D")); EXPECT_EQ(output.node_size(), 7); @@ -897,6 +897,50 @@ TEST_F(ArithmeticOptimizerTest, OptimizeCastMulTransposeConv) { EXPECT_EQ(conv_node->input(1), weights_node->name()); } +TEST_F(ArithmeticOptimizerTest, OptimizeMultipleMulTransposeConv) { + // This unit test exercises optimization of folding mul into conv for + // multiple nodes in the graph. + tensorflow::Scope s = tensorflow::Scope::NewRootScope().WithDevice("/gpu:0"); + + GrapplerItem item; + Output conv[2]; + + for (int i = 0; i < 2; ++i) { + Output inputs = + ops::Placeholder(s, DT_FLOAT, ops::Placeholder::Shape({8, 3, 28, 28})); + Output mul = ops::Mul(s, inputs, ops::Const(s, 1.0f / 255.0f)); + Output weights = ops::Const(s.WithOpName("weights"), + Input::Initializer(127.0f, {5, 5, 3, 16})); + conv[i] = ops::Conv2D(s, mul, weights, {1, 1, 1, 1}, "VALID", + ops::Conv2D::DataFormat("NCHW")); + } + Output outputs = ops::Add(s.WithOpName("outputs"), conv[0], conv[1]); + + item.fetch = {"outputs"}; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + GraphDef output; + TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output)); + + item.graph = output; + TF_EXPECT_OK( + ConstantFolding(/*cpu_device=*/nullptr).Optimize(nullptr, item, &output)); + + item.graph = output; + TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); + + NodeMap node_map(&output); + const NodeDef* weights_node = + CHECK_NOTNULL(node_map.GetNode("weights_scaled_Conv2D")); + const NodeDef* conv_node = CHECK_NOTNULL(node_map.GetNode("Conv2D")); + + const NodeDef* weights_node_1 = + CHECK_NOTNULL(node_map.GetNode("weights_scaled_Conv2D_1")); + const NodeDef* conv_node_1 = CHECK_NOTNULL(node_map.GetNode("Conv2D_1")); + EXPECT_EQ(conv_node->input(1), weights_node->name()); + EXPECT_EQ(conv_node_1->input(1), weights_node_1->name()); +} + TEST_F(ArithmeticOptimizerTest, CombineBitcasts) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output inputs = -- GitLab From 7ac140a5845553275427162aabd9d54987144b4a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 10:27:25 -0800 Subject: [PATCH 0273/1801] Adds properties to LossFunction to access inputs separated by minibatch. Currently, information about separate minibatches registered by `LossFunction`s is private, and only the concatenation of all minibatch inputs is exposed through the `inputs` property. This change adds `input_minibatches` and `num_registered_minibatches` to `LossFunction` to expose this information. PiperOrigin-RevId: 175306297 --- .../contrib/kfac/python/kernel_tests/BUILD | 1 + .../kernel_tests/loss_functions_test.py | 17 +++++ .../contrib/kfac/python/ops/loss_functions.py | 62 ++++++++++++++----- 3 files changed, 64 insertions(+), 16 deletions(-) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/BUILD b/tensorflow/contrib/kfac/python/kernel_tests/BUILD index 5d86373a23..5b7747b0a1 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/BUILD +++ b/tensorflow/contrib/kfac/python/kernel_tests/BUILD @@ -139,6 +139,7 @@ py_test( "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", "//tensorflow/python:framework_ops", + "//tensorflow/python:random_ops", "//third_party/py/numpy", ], ) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/loss_functions_test.py b/tensorflow/contrib/kfac/python/kernel_tests/loss_functions_test.py index 87339cb059..39ce3e9337 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/loss_functions_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/loss_functions_test.py @@ -24,6 +24,7 @@ from tensorflow.contrib.kfac.python.ops import loss_functions from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import random_ops from tensorflow.python.platform import test @@ -96,6 +97,22 @@ class CategoricalLogitsNegativeLogProbLossTest(test.TestCase): # difficult to say if the output is correct or not... neg_log_prob = sess.run(neg_log_prob) + def testMultiMinibatchRegistration(self): + """Ensure this loss function supports registering multiple minibatches.""" + with ops.Graph().as_default(): + tower_logits = [] + loss = None + num_towers = 5 + for _ in range(num_towers): + logits = random_ops.random_uniform(shape=[2, 3]) + tower_logits.append(logits) + if loss is None: + loss = loss_functions.CategoricalLogitsNegativeLogProbLoss(logits) + else: + loss.register_additional_minibatch(logits) + self.assertListEqual(loss.input_minibatches, tower_logits) + self.assertEqual(loss.num_registered_minibatches, num_towers) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/kfac/python/ops/loss_functions.py b/tensorflow/contrib/kfac/python/ops/loss_functions.py index 3cfde7f9ab..e2e5bc3ffe 100644 --- a/tensorflow/contrib/kfac/python/ops/loss_functions.py +++ b/tensorflow/contrib/kfac/python/ops/loss_functions.py @@ -56,6 +56,30 @@ class LossFunction(object): """The inputs to the loss function (excluding the targets).""" pass + @property + def input_minibatches(self): + """A `list` of inputs to the loss function, separated by minibatch. + + Typically there will be one minibatch per tower in a multi-tower setup. + Returns a list consisting of `self.inputs` by default; `LossFunction`s + supporting registering multiple minibatches should override this method. + + Returns: + A `list` of `Tensor`s representing + """ + return [self.inputs] + + @property + def num_registered_minibatches(self): + """Number of minibatches registered for this LossFunction. + + Typically equal to the number of towers in a multi-tower setup. + + Returns: + An `int` representing the number of registered minibatches. + """ + return len(self.input_minibatches) + def evaluate(self): """Evaluate the loss function on the targets.""" if self.targets is not None: @@ -75,7 +99,6 @@ class LossFunction(object): Returns: log probability of each target, summed across all targets. """ - pass @abc.abstractmethod @@ -415,8 +438,8 @@ class NormalMeanNegativeLogProbLoss(DistributionNegativeLogProbLoss, array_ops.ones(array_ops.shape(self._mean)[:1], dtype=self._mean.dtype), axis=-1) output_slice = self._var**-0.5 * ones_slice - return insert_slice_in_zeros(output_slice, 1, - int(self._mean.shape[1]), index[0]) + return insert_slice_in_zeros(output_slice, 1, int(self._mean.shape[1]), + index[0]) @property def fisher_factor_inner_shape(self): @@ -474,24 +497,23 @@ class NormalMeanVarianceNegativeLogProbLoss(DistributionNegativeLogProbLoss): @property def _fisher_mean(self): - return 1./self._variance + return 1. / self._variance @property def _fisher_mean_factor(self): - return 1./self._scale + return 1. / self._scale @property def _fisher_var(self): - return 1./(2*math_ops.square(self._variance)) + return 1. / (2 * math_ops.square(self._variance)) @property def _fisher_var_factor(self): - return 1./(math_ops.sqrt(2.)*self._variance) + return 1. / (math_ops.sqrt(2.) * self._variance) def multiply_fisher(self, vecs): mean_vec, var_vec = vecs - return (self._fisher_mean * mean_vec, - self._fisher_var * var_vec) + return (self._fisher_mean * mean_vec, self._fisher_var * var_vec) def multiply_fisher_factor(self, vecs): mean_vec, var_vec = self._split(vecs) @@ -511,8 +533,8 @@ class NormalMeanVarianceNegativeLogProbLoss(DistributionNegativeLogProbLoss): # Index corresponds to mean parameter. mean_slice = self._fisher_mean_factor[:, index] mean_slice = array_ops.expand_dims(mean_slice, axis=-1) - mean_output = insert_slice_in_zeros(mean_slice, 1, - int(self._mean.shape[1]), index) + mean_output = insert_slice_in_zeros(mean_slice, 1, int( + self._mean.shape[1]), index) var_output = array_ops.zeros_like(mean_output) else: index -= int(self._mean.shape[-1]) @@ -527,13 +549,17 @@ class NormalMeanVarianceNegativeLogProbLoss(DistributionNegativeLogProbLoss): @property def fisher_factor_inner_shape(self): - return array_ops.concat([array_ops.shape(self._mean)[:-1], - 2*array_ops.shape(self._mean)[-1:]], axis=0) + return array_ops.concat( + [ + array_ops.shape(self._mean)[:-1], + 2 * array_ops.shape(self._mean)[-1:] + ], + axis=0) @property def fisher_factor_inner_static_shape(self): shape = self._mean.shape.as_list() - return tensor_shape.TensorShape(shape[-1:] + [2*shape[-1]]) + return tensor_shape.TensorShape(shape[-1:] + [2 * shape[-1]]) def multiply_hessian(self, vector): raise NotImplementedError() @@ -605,6 +631,10 @@ class CategoricalLogitsNegativeLogProbLoss(DistributionNegativeLogProbLoss, def _logits(self): return array_ops.concat(self._logits_components, axis=0) + @property + def input_minibatches(self): + return self._logits_components + @property def targets(self): if all(target is None for target in self._targets_components): @@ -710,8 +740,8 @@ class MultiBernoulliNegativeLogProbLoss(DistributionNegativeLogProbLoss, assert len(index) == 1, "Length of index was {}".format(len(index)) probs_slice = array_ops.expand_dims(self._probs[:, index[0]], -1) output_slice = math_ops.sqrt(probs_slice * (1 - probs_slice)) - return insert_slice_in_zeros(output_slice, 1, - int(self._logits.shape[1]), index[0]) + return insert_slice_in_zeros(output_slice, 1, int(self._logits.shape[1]), + index[0]) @property def fisher_factor_inner_shape(self): -- GitLab From 0b15439f8f0f2d4755587f4096c3ea04cb199d23 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Fri, 10 Nov 2017 10:35:35 -0800 Subject: [PATCH 0274/1801] Internal Change. PiperOrigin-RevId: 175307445 --- configure.py | 2 + tensorflow/BUILD | 19 + tensorflow/contrib/BUILD | 1 + tensorflow/contrib/__init__.py | 1 + tensorflow/contrib/cmake/tf_python.cmake | 13 + tensorflow/contrib/lite/BUILD | 280 ++ tensorflow/contrib/lite/allocation.cc | 122 + tensorflow/contrib/lite/allocation.h | 94 + tensorflow/contrib/lite/build_def.bzl | 233 ++ tensorflow/contrib/lite/builtin_op_data.h | 164 + tensorflow/contrib/lite/context.c | 92 + tensorflow/contrib/lite/context.h | 298 ++ tensorflow/contrib/lite/context_test.cc | 74 + tensorflow/contrib/lite/error_reporter.cc | 50 + tensorflow/contrib/lite/error_reporter.h | 54 + tensorflow/contrib/lite/interpreter.cc | 567 +++ tensorflow/contrib/lite/interpreter.h | 376 ++ tensorflow/contrib/lite/interpreter_test.cc | 526 +++ tensorflow/contrib/lite/java/BUILD | 164 + tensorflow/contrib/lite/java/demo/.gitignore | 9 + .../contrib/lite/java/demo/app/build.gradle | 58 + .../demo/app/src/main/AndroidManifest.xml | 42 + .../contrib/lite/java/demo/app/src/main/BUILD | 43 + .../lite/java/demo/app/src/main/assets/BUILD | 26 + .../java/demo/app/src/main/assets/labels.txt | 1001 +++++ .../tflitecamerademo/AutoFitTextureView.java | 72 + .../Camera2BasicFragment.java | 708 ++++ .../tflitecamerademo/CameraActivity.java | 35 + .../tflitecamerademo/ImageClassifier.java | 184 + .../main/res/drawable-hdpi/ic_action_info.png | Bin 0 -> 490 bytes .../main/res/drawable-hdpi/ic_launcher.png | Bin 0 -> 3136 bytes .../app/src/main/res/drawable-hdpi/tile.9.png | Bin 0 -> 116 bytes .../main/res/drawable-mdpi/ic_action_info.png | Bin 0 -> 320 bytes .../main/res/drawable-mdpi/ic_launcher.png | Bin 0 -> 1915 bytes .../res/drawable-xhdpi/ic_action_info.png | Bin 0 -> 611 bytes .../main/res/drawable-xhdpi/ic_launcher.png | Bin 0 -> 4294 bytes .../res/drawable-xxhdpi/ic_action_info.png | Bin 0 -> 952 bytes .../main/res/drawable-xxhdpi/ic_launcher.png | Bin 0 -> 7279 bytes .../layout-land/fragment_camera2_basic.xml | 50 + .../src/main/res/layout/activity_camera.xml | 22 + .../res/layout/fragment_camera2_basic.xml | 45 + .../res/values-sw600dp/template-dimens.xml | 24 + .../res/values-sw600dp/template-styles.xml | 25 + .../main/res/values-v11/template-styles.xml | 22 + .../src/main/res/values-v21/base-colors.xml | 21 + .../res/values-v21/base-template-styles.xml | 24 + .../app/src/main/res/values/base-strings.xml | 30 + .../demo/app/src/main/res/values/colors.xml | 19 + .../demo/app/src/main/res/values/strings.xml | 24 + .../demo/app/src/main/res/values/styles.xml | 18 + .../src/main/res/values/template-dimens.xml | 32 + .../src/main/res/values/template-styles.xml | 42 + .../contrib/lite/java/demo/build.gradle | 23 + .../contrib/lite/java/demo/gradle.properties | 17 + .../demo/gradle/wrapper/gradle-wrapper.jar | Bin 0 -> 53636 bytes .../gradle/wrapper/gradle-wrapper.properties | 6 + tensorflow/contrib/lite/java/demo/gradlew | 160 + tensorflow/contrib/lite/java/demo/gradlew.bat | 90 + .../contrib/lite/java/demo/settings.gradle | 1 + .../java/org/tensorflow/lite/DataType.java | 76 + .../java/org/tensorflow/lite/Interpreter.java | 172 + .../lite/NativeInterpreterWrapper.java | 276 ++ .../main/java/org/tensorflow/lite/Tensor.java | 71 + .../org/tensorflow/lite/TensorFlowLite.java | 44 + .../org/tensorflow/lite/package-info.java | 17 + .../contrib/lite/java/src/main/native/BUILD | 70 + .../java/src/main/native/builtin_ops_jni.cc | 29 + .../java/src/main/native/exception_jni.cc | 66 + .../lite/java/src/main/native/exception_jni.h | 50 + .../native/nativeinterpreterwrapper_jni.cc | 446 ++ .../native/nativeinterpreterwrapper_jni.h | 151 + .../lite/java/src/main/native/tensor_jni.cc | 242 ++ .../lite/java/src/main/native/tensor_jni.h | 74 + .../src/main/native/tensorflow_lite_jni.cc | 26 + .../src/main/native/tensorflow_lite_jni.h | 36 + .../java/src/main/native/version_script.lds | 11 + .../org/tensorflow/lite/DataTypeTest.java | 34 + .../org/tensorflow/lite/InterpreterTest.java | 221 + .../lite/NativeInterpreterWrapperTest.java | 406 ++ .../tensorflow/lite/TensorFlowLiteTest.java | 32 + .../java/org/tensorflow/lite/TensorTest.java | 105 + .../testhelper/java/org/tensorflow/lite/BUILD | 30 + .../java/org/tensorflow/lite/TestHelper.java | 35 + tensorflow/contrib/lite/kernels/BUILD | 408 ++ .../contrib/lite/kernels/activation_functor.h | 58 + .../contrib/lite/kernels/activations.cc | 389 ++ .../contrib/lite/kernels/activations_test.cc | 323 ++ tensorflow/contrib/lite/kernels/add.cc | 184 + tensorflow/contrib/lite/kernels/add_test.cc | 171 + tensorflow/contrib/lite/kernels/basic_rnn.cc | 161 + .../contrib/lite/kernels/basic_rnn_test.cc | 267 ++ .../contrib/lite/kernels/concatenation.cc | 200 + .../lite/kernels/concatenation_test.cc | 162 + tensorflow/contrib/lite/kernels/conv.cc | 425 ++ tensorflow/contrib/lite/kernels/conv_test.cc | 440 ++ .../contrib/lite/kernels/depthwise_conv.cc | 289 ++ .../lite/kernels/depthwise_conv_test.cc | 186 + .../contrib/lite/kernels/embedding_lookup.cc | 104 + .../lite/kernels/embedding_lookup_sparse.cc | 248 ++ .../kernels/embedding_lookup_sparse_test.cc | 166 + .../lite/kernels/embedding_lookup_test.cc | 94 + .../contrib/lite/kernels/fully_connected.cc | 307 ++ .../lite/kernels/fully_connected_test.cc | 377 ++ .../contrib/lite/kernels/gemm_support.cc | 68 + .../contrib/lite/kernels/gemm_support.h | 54 + .../contrib/lite/kernels/hashtable_lookup.cc | 155 + .../lite/kernels/hashtable_lookup_test.cc | 176 + .../contrib/lite/kernels/internal/BUILD | 359 ++ .../contrib/lite/kernels/internal/common.h | 107 + .../lite/kernels/internal/compatibility.h | 78 + .../kernels/internal/optimized/cpu_check.h | 65 + .../internal/optimized/depthwiseconv_float.h | 987 +++++ .../internal/optimized/depthwiseconv_uint8.h | 1916 +++++++++ .../optimized/eigen_spatial_convolutions.h | 231 + ...gen_tensor_reduced_instantiations_google.h | 143 + .../eigen_tensor_reduced_instantiations_oss.h | 167 + .../internal/optimized/multithreaded_conv.h | 195 + .../internal/optimized/neon_tensor_utils.cc | 337 ++ .../internal/optimized/neon_tensor_utils.h | 113 + .../internal/optimized/optimized_ops.h | 3715 +++++++++++++++++ .../internal/optimized/tensor_utils_impl.h | 138 + .../kernels/internal/quantization_util.cc | 95 + .../lite/kernels/internal/quantization_util.h | 55 + .../internal/quantization_util_test.cc | 108 + .../internal/reference/depthwiseconv_float.h | 115 + .../internal/reference/depthwiseconv_uint8.h | 138 + .../reference/portable_tensor_utils.cc | 165 + .../reference/portable_tensor_utils.h | 189 + .../internal/reference/reference_ops.h | 2455 +++++++++++ .../contrib/lite/kernels/internal/round.h | 39 + .../contrib/lite/kernels/internal/tensor.h | 87 + .../lite/kernels/internal/tensor_test.cc | 55 + .../lite/kernels/internal/tensor_utils.cc | 27 + .../lite/kernels/internal/tensor_utils.h | 116 + .../kernels/internal/tensor_utils_test.cc | 192 + .../contrib/lite/kernels/internal/types.h | 81 + .../contrib/lite/kernels/kernel_util.cc | 87 + tensorflow/contrib/lite/kernels/kernel_util.h | 65 + tensorflow/contrib/lite/kernels/l2norm.cc | 112 + .../contrib/lite/kernels/l2norm_test.cc | 63 + .../lite/kernels/local_response_norm.cc | 109 + .../lite/kernels/local_response_norm_test.cc | 101 + .../contrib/lite/kernels/lsh_projection.cc | 204 + .../lite/kernels/lsh_projection_test.cc | 123 + tensorflow/contrib/lite/kernels/lstm.cc | 515 +++ tensorflow/contrib/lite/kernels/lstm_test.cc | 1088 +++++ tensorflow/contrib/lite/kernels/mul.cc | 167 + tensorflow/contrib/lite/kernels/mul_test.cc | 127 + tensorflow/contrib/lite/kernels/op_macros.h | 32 + .../lite/kernels/optional_tensor_test.cc | 343 ++ tensorflow/contrib/lite/kernels/padding.h | 28 + tensorflow/contrib/lite/kernels/pooling.cc | 355 ++ .../contrib/lite/kernels/pooling_test.cc | 161 + tensorflow/contrib/lite/kernels/register.cc | 109 + tensorflow/contrib/lite/kernels/register.h | 50 + tensorflow/contrib/lite/kernels/reshape.cc | 91 + .../contrib/lite/kernels/reshape_test.cc | 90 + .../contrib/lite/kernels/resize_bilinear.cc | 129 + .../lite/kernels/resize_bilinear_test.cc | 117 + tensorflow/contrib/lite/kernels/skip_gram.cc | 160 + .../contrib/lite/kernels/skip_gram_test.cc | 257 ++ .../contrib/lite/kernels/softmax_test.cc | 143 + .../contrib/lite/kernels/space_to_depth.cc | 146 + .../lite/kernels/space_to_depth_test.cc | 102 + tensorflow/contrib/lite/kernels/svdf.cc | 224 + tensorflow/contrib/lite/kernels/svdf_test.cc | 312 ++ tensorflow/contrib/lite/kernels/test_util.cc | 183 + tensorflow/contrib/lite/kernels/test_util.h | 202 + tensorflow/contrib/lite/model.cc | 673 +++ tensorflow/contrib/lite/model.h | 165 + tensorflow/contrib/lite/model_test.cc | 258 ++ .../contrib/lite/models/smartreply/BUILD | 15 + .../models/smartreply/ops/extract_feature.cc | 119 + .../smartreply/ops/extract_feature_test.cc | 100 + .../lite/models/smartreply/ops/normalize.cc | 105 + .../models/smartreply/ops/normalize_test.cc | 90 + .../lite/models/smartreply/ops/predict.cc | 174 + .../models/smartreply/ops/predict_test.cc | 183 + .../lite/models/smartreply/predictor.cc | 116 + .../lite/models/smartreply/predictor.h | 80 + .../lite/models/smartreply/predictor_test.cc | 150 + .../lite/models/speech_hotword_model_test.cc | 115 + .../models/speech_speakerid_model_test.cc | 114 + .../lite/models/speech_terse_am_model_test.cc | 127 + .../lite/models/speech_tts_model_test.cc | 116 + tensorflow/contrib/lite/models/test_utils.h | 84 + tensorflow/contrib/lite/nnapi/BUILD | 25 + .../contrib/lite/nnapi/NeuralNetworksShim.h | 1916 +++++++++ tensorflow/contrib/lite/nnapi_delegate.cc | 386 ++ tensorflow/contrib/lite/nnapi_delegate.h | 66 + .../contrib/lite/optional_debug_tools.cc | 108 + .../contrib/lite/optional_debug_tools.h | 32 + tensorflow/contrib/lite/python/BUILD | 46 + tensorflow/contrib/lite/python/lite.py | 199 + tensorflow/contrib/lite/python/lite_test.py | 45 + tensorflow/contrib/lite/schema/BUILD | 82 + .../schema/flatbuffer_compatibility_test.cc | 91 + tensorflow/contrib/lite/schema/schema.fbs | 346 ++ tensorflow/contrib/lite/schema/schema_v0.fbs | 247 ++ tensorflow/contrib/lite/schema/schema_v1.fbs | 295 ++ tensorflow/contrib/lite/schema/schema_v2.fbs | 303 ++ tensorflow/contrib/lite/schema/schema_v3.fbs | 326 ++ .../contrib/lite/schema/upgrade_schema.py | 341 ++ .../lite/schema/upgrade_schema_test.py | 317 ++ .../contrib/lite/simple_memory_arena.cc | 136 + tensorflow/contrib/lite/simple_memory_arena.h | 84 + .../contrib/lite/simple_memory_arena_test.cc | 91 + tensorflow/contrib/lite/string.h | 30 + tensorflow/contrib/lite/string_util.cc | 117 + tensorflow/contrib/lite/string_util.h | 91 + tensorflow/contrib/lite/string_util_test.cc | 117 + .../contrib/lite/testdata/0_subgraphs.bin | Bin 0 -> 80 bytes .../contrib/lite/testdata/2_subgraphs.bin | Bin 0 -> 172 bytes .../contrib/lite/testdata/empty_model.bin | Bin 0 -> 132 bytes .../contrib/lite/testdata/multi_add.bin | Bin 0 -> 652 bytes .../contrib/lite/testdata/multi_add.json | 46 + .../contrib/lite/testdata/no_subgraphs.bin | Bin 0 -> 80 bytes .../contrib/lite/testdata/test_model.bin | Bin 0 -> 496 bytes .../lite/testdata/test_model_broken.bin | Bin 0 -> 432 bytes .../lite/testdata/test_model_broken.json | 62 + .../contrib/lite/testdata/two_subgraphs.bin | Bin 0 -> 172 bytes tensorflow/contrib/lite/testing/BUILD | 213 + .../contrib/lite/testing/generate_examples.py | 1189 ++++++ .../lite/testing/generate_examples_report.py | 125 + .../testing/generated_examples_zip_test.cc | 279 ++ tensorflow/contrib/lite/testing/message.cc | 96 + tensorflow/contrib/lite/testing/message.h | 82 + .../contrib/lite/testing/message_test.cc | 121 + .../contrib/lite/testing/nnapi_example.cc | 114 + .../contrib/lite/testing/parse_testdata.cc | 335 ++ .../contrib/lite/testing/parse_testdata.h | 74 + tensorflow/contrib/lite/testing/split.cc | 42 + tensorflow/contrib/lite/testing/split.h | 77 + tensorflow/contrib/lite/testing/split_test.cc | 57 + tensorflow/contrib/lite/testing/test_runner.h | 124 + .../contrib/lite/testing/test_runner_test.cc | 84 + .../contrib/lite/testing/tflite_driver.cc | 208 + .../contrib/lite/testing/tflite_driver.h | 62 + .../lite/testing/tflite_driver_test.cc | 61 + tensorflow/contrib/lite/testing/tokenize.cc | 95 + tensorflow/contrib/lite/testing/tokenize.h | 42 + .../contrib/lite/testing/tokenize_test.cc | 105 + tensorflow/contrib/lite/toco/BUILD | 350 ++ .../lite/toco/allocate_transient_arrays.cc | 318 ++ .../lite/toco/allocate_transient_arrays.h | 44 + tensorflow/contrib/lite/toco/args.h | 225 + tensorflow/contrib/lite/toco/dump_graphviz.cc | 293 ++ tensorflow/contrib/lite/toco/dump_graphviz.h | 28 + .../contrib/lite/toco/export_tensorflow.cc | 1570 +++++++ .../contrib/lite/toco/export_tensorflow.h | 27 + tensorflow/contrib/lite/toco/format_port.h | 77 + .../convert_pure_conv_to_depthwise.cc | 98 + .../create_im2col_arrays.cc | 69 + .../toco/graph_transformations/dequantize.cc | 223 + .../graph_transformations/drop_fake_quant.cc | 56 + .../drop_im2col_arrays.cc | 42 + .../ensure_bias_vectors.cc | 57 + .../fuse_activation_functions.cc | 98 + .../fuse_binary_into_following_affine.cc | 300 ++ .../fuse_binary_into_preceding_affine.cc | 326 ++ .../graph_transformations.cc | 108 + .../graph_transformations.h | 186 + .../graph_transformations/hardcode_min_max.cc | 229 + .../identify_l2_normalization.cc | 170 + .../graph_transformations/identify_l2_pool.cc | 106 + .../graph_transformations/identify_lstm.cc | 396 ++ .../graph_transformations/identify_relu1.cc | 103 + .../make_initial_dequantize_operator.cc | 120 + .../propagate_array_data_types.cc | 142 + .../propagate_fixed_sizes.cc | 1129 +++++ .../toco/graph_transformations/quantize.cc | 467 +++ .../read_fake_quant_min_max.cc | 105 + .../remove_final_dequantize_op.cc | 59 + .../remove_tensorflow_assert.cc | 60 + .../remove_tensorflow_identity.cc | 38 + .../remove_trivial_binary.cc | 113 + .../remove_trivial_concatenation.cc | 40 + .../remove_trivial_concatenation_input.cc | 68 + .../remove_trivial_passthrough.cc | 107 + .../remove_trivial_passthrough.h | 55 + ...emove_trivial_quantized_activation_func.cc | 87 + .../remove_trivial_reshape.cc | 92 + .../graph_transformations/remove_unused_op.cc | 122 + .../resolve_batch_normalization.cc | 135 + .../resolve_constant_binary.cc | 247 ++ .../resolve_constant_concatenation.cc | 196 + .../resolve_constant_fake_quant.cc | 76 + .../resolve_constant_tensorflow_shape.cc | 62 + .../resolve_constant_unary.cc | 175 + .../resolve_mean_attributes.cc | 51 + .../resolve_pad_attributes.cc | 55 + .../resolve_reorder_axes.cc | 93 + .../resolve_reshape_attributes.cc | 49 + .../resolve_slice_attributes.cc | 52 + .../resolve_strided_slice_attributes.cc | 62 + .../resolve_tensorflow_concat.cc | 86 + .../resolve_tensorflow_matmul.cc | 106 + .../resolve_tensorflow_merge.cc | 63 + .../resolve_tensorflow_squeeze.cc | 54 + .../resolve_tensorflow_switch.cc | 123 + .../resolve_tensorflow_tile.cc | 97 + .../toco/graph_transformations/tests/BUILD | 31 + .../resolve_constant_concatenation_test.cc | 221 + .../unfuse_activation_functions.cc | 73 + .../contrib/lite/toco/import_tensorflow.cc | 1508 +++++++ .../contrib/lite/toco/import_tensorflow.h | 34 + tensorflow/contrib/lite/toco/model.h | 1372 ++++++ .../contrib/lite/toco/model_cmdline_flags.cc | 374 ++ .../contrib/lite/toco/model_cmdline_flags.h | 43 + .../contrib/lite/toco/model_flags.proto | 119 + tensorflow/contrib/lite/toco/python/BUILD | 76 + tensorflow/contrib/lite/toco/python/toco.i | 32 + .../lite/toco/python/toco_from_protos.py | 63 + .../lite/toco/python/toco_from_protos_test.py | 96 + .../lite/toco/python/toco_python_api.cc | 85 + .../lite/toco/python/toco_python_api.h | 33 + .../contrib/lite/toco/python/toco_wrapper.py | 35 + tensorflow/contrib/lite/toco/runtime/common.h | 26 + tensorflow/contrib/lite/toco/runtime/types.h | 32 + .../lite/toco/tensorflow_graph_matching/BUILD | 102 + .../toco/tensorflow_graph_matching/cluster.cc | 52 + .../toco/tensorflow_graph_matching/cluster.h | 101 + .../cluster_utils.cc | 34 + .../tensorflow_graph_matching/cluster_utils.h | 33 + .../resolve_cluster.cc | 151 + .../resolve_cluster.h | 63 + .../tensorflow_graph_matching/resolve_svdf.cc | 285 ++ .../tensorflow_graph_matching/resolve_svdf.h | 82 + .../resolve_svdf_test.cc | 212 + .../contrib/lite/toco/tensorflow_util.cc | 197 + .../contrib/lite/toco/tensorflow_util.h | 32 + tensorflow/contrib/lite/toco/tflite/BUILD | 142 + .../lite/toco/tflite/builtin_operator.h | 74 + .../lite/toco/tflite/custom_operator.h | 74 + tensorflow/contrib/lite/toco/tflite/export.cc | 322 ++ tensorflow/contrib/lite/toco/tflite/export.h | 76 + .../contrib/lite/toco/tflite/export_test.cc | 69 + tensorflow/contrib/lite/toco/tflite/import.cc | 183 + tensorflow/contrib/lite/toco/tflite/import.h | 49 + .../contrib/lite/toco/tflite/import_test.cc | 141 + .../contrib/lite/toco/tflite/operator.cc | 627 +++ .../contrib/lite/toco/tflite/operator.h | 89 + .../contrib/lite/toco/tflite/operator_test.cc | 370 ++ .../lite/toco/tflite/simple_operator.h | 50 + tensorflow/contrib/lite/toco/tflite/types.cc | 165 + tensorflow/contrib/lite/toco/tflite/types.h | 58 + .../contrib/lite/toco/tflite/types_test.cc | 191 + tensorflow/contrib/lite/toco/toco.cc | 119 + .../contrib/lite/toco/toco_cmdline_flags.cc | 206 + .../contrib/lite/toco/toco_cmdline_flags.h | 35 + tensorflow/contrib/lite/toco/toco_flags.proto | 126 + .../lite/toco/toco_graphviz_dump_options.cc | 22 + .../lite/toco/toco_graphviz_dump_options.h | 34 + tensorflow/contrib/lite/toco/toco_port.cc | 227 + tensorflow/contrib/lite/toco/toco_port.h | 80 + .../contrib/lite/toco/toco_port_test.cc | 58 + tensorflow/contrib/lite/toco/toco_tooling.cc | 277 ++ tensorflow/contrib/lite/toco/toco_tooling.h | 50 + tensorflow/contrib/lite/toco/toco_types.h | 45 + tensorflow/contrib/lite/toco/tooling_util.cc | 1552 +++++++ tensorflow/contrib/lite/toco/tooling_util.h | 292 ++ .../contrib/lite/toco/tooling_util_test.cc | 96 + tensorflow/contrib/lite/tools/BUILD | 60 + .../contrib/lite/tools/gen_op_registration.cc | 46 + .../contrib/lite/tools/gen_op_registration.h | 38 + .../lite/tools/gen_op_registration_main.cc | 98 + .../lite/tools/gen_op_registration_test.cc | 87 + .../contrib/lite/tools/mutable_op_resolver.cc | 43 + .../contrib/lite/tools/mutable_op_resolver.h | 45 + tensorflow/contrib/lite/version.h | 23 + tensorflow/tools/ci_build/ci_sanity.sh | 7 +- .../ci_build/linux/cpu/run_py3_contrib.sh | 33 +- .../tools/ci_build/osx/cpu/run_contrib.sh | 2 +- tensorflow/tools/pip_package/BUILD | 3 + tensorflow/tools/pip_package/MANIFEST.in | 1 + .../tools/pip_package/build_pip_package.sh | 3 + tensorflow/tools/pip_package/setup.py | 3 +- third_party/flatbuffers/flatbuffers.BUILD | 4 + 378 files changed, 66985 insertions(+), 4 deletions(-) create mode 100644 tensorflow/contrib/lite/BUILD create mode 100644 tensorflow/contrib/lite/allocation.cc create mode 100644 tensorflow/contrib/lite/allocation.h create mode 100644 tensorflow/contrib/lite/build_def.bzl create mode 100644 tensorflow/contrib/lite/builtin_op_data.h create mode 100644 tensorflow/contrib/lite/context.c create mode 100644 tensorflow/contrib/lite/context.h create mode 100644 tensorflow/contrib/lite/context_test.cc create mode 100644 tensorflow/contrib/lite/error_reporter.cc create mode 100644 tensorflow/contrib/lite/error_reporter.h create mode 100644 tensorflow/contrib/lite/interpreter.cc create mode 100644 tensorflow/contrib/lite/interpreter.h create mode 100644 tensorflow/contrib/lite/interpreter_test.cc create mode 100644 tensorflow/contrib/lite/java/BUILD create mode 100644 tensorflow/contrib/lite/java/demo/.gitignore create mode 100644 tensorflow/contrib/lite/java/demo/app/build.gradle create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/AndroidManifest.xml create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/BUILD create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/assets/BUILD create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/assets/labels.txt create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/AutoFitTextureView.java create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/CameraActivity.java create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-hdpi/ic_action_info.png create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-hdpi/ic_launcher.png create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-hdpi/tile.9.png create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-mdpi/ic_action_info.png create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-mdpi/ic_launcher.png create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-xhdpi/ic_action_info.png create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-xhdpi/ic_launcher.png create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-xxhdpi/ic_action_info.png create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-xxhdpi/ic_launcher.png create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/layout-land/fragment_camera2_basic.xml create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/layout/activity_camera.xml create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/values-sw600dp/template-dimens.xml create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/values-sw600dp/template-styles.xml create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/values-v11/template-styles.xml create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/values-v21/base-colors.xml create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/values-v21/base-template-styles.xml create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/values/base-strings.xml create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/values/colors.xml create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/values/strings.xml create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/values/styles.xml create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/values/template-dimens.xml create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/res/values/template-styles.xml create mode 100644 tensorflow/contrib/lite/java/demo/build.gradle create mode 100644 tensorflow/contrib/lite/java/demo/gradle.properties create mode 100644 tensorflow/contrib/lite/java/demo/gradle/wrapper/gradle-wrapper.jar create mode 100644 tensorflow/contrib/lite/java/demo/gradle/wrapper/gradle-wrapper.properties create mode 100755 tensorflow/contrib/lite/java/demo/gradlew create mode 100644 tensorflow/contrib/lite/java/demo/gradlew.bat create mode 100644 tensorflow/contrib/lite/java/demo/settings.gradle create mode 100644 tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/DataType.java create mode 100644 tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java create mode 100644 tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java create mode 100644 tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Tensor.java create mode 100644 tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java create mode 100644 tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/package-info.java create mode 100644 tensorflow/contrib/lite/java/src/main/native/BUILD create mode 100644 tensorflow/contrib/lite/java/src/main/native/builtin_ops_jni.cc create mode 100644 tensorflow/contrib/lite/java/src/main/native/exception_jni.cc create mode 100644 tensorflow/contrib/lite/java/src/main/native/exception_jni.h create mode 100644 tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc create mode 100644 tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h create mode 100644 tensorflow/contrib/lite/java/src/main/native/tensor_jni.cc create mode 100644 tensorflow/contrib/lite/java/src/main/native/tensor_jni.h create mode 100644 tensorflow/contrib/lite/java/src/main/native/tensorflow_lite_jni.cc create mode 100644 tensorflow/contrib/lite/java/src/main/native/tensorflow_lite_jni.h create mode 100644 tensorflow/contrib/lite/java/src/main/native/version_script.lds create mode 100644 tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/DataTypeTest.java create mode 100644 tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java create mode 100644 tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java create mode 100644 tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorFlowLiteTest.java create mode 100644 tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java create mode 100644 tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/BUILD create mode 100644 tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java create mode 100644 tensorflow/contrib/lite/kernels/BUILD create mode 100644 tensorflow/contrib/lite/kernels/activation_functor.h create mode 100644 tensorflow/contrib/lite/kernels/activations.cc create mode 100644 tensorflow/contrib/lite/kernels/activations_test.cc create mode 100644 tensorflow/contrib/lite/kernels/add.cc create mode 100644 tensorflow/contrib/lite/kernels/add_test.cc create mode 100644 tensorflow/contrib/lite/kernels/basic_rnn.cc create mode 100644 tensorflow/contrib/lite/kernels/basic_rnn_test.cc create mode 100644 tensorflow/contrib/lite/kernels/concatenation.cc create mode 100644 tensorflow/contrib/lite/kernels/concatenation_test.cc create mode 100644 tensorflow/contrib/lite/kernels/conv.cc create mode 100644 tensorflow/contrib/lite/kernels/conv_test.cc create mode 100644 tensorflow/contrib/lite/kernels/depthwise_conv.cc create mode 100644 tensorflow/contrib/lite/kernels/depthwise_conv_test.cc create mode 100644 tensorflow/contrib/lite/kernels/embedding_lookup.cc create mode 100644 tensorflow/contrib/lite/kernels/embedding_lookup_sparse.cc create mode 100644 tensorflow/contrib/lite/kernels/embedding_lookup_sparse_test.cc create mode 100644 tensorflow/contrib/lite/kernels/embedding_lookup_test.cc create mode 100644 tensorflow/contrib/lite/kernels/fully_connected.cc create mode 100644 tensorflow/contrib/lite/kernels/fully_connected_test.cc create mode 100644 tensorflow/contrib/lite/kernels/gemm_support.cc create mode 100644 tensorflow/contrib/lite/kernels/gemm_support.h create mode 100644 tensorflow/contrib/lite/kernels/hashtable_lookup.cc create mode 100644 tensorflow/contrib/lite/kernels/hashtable_lookup_test.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/BUILD create mode 100644 tensorflow/contrib/lite/kernels/internal/common.h create mode 100644 tensorflow/contrib/lite/kernels/internal/compatibility.h create mode 100644 tensorflow/contrib/lite/kernels/internal/optimized/cpu_check.h create mode 100644 tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h create mode 100644 tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h create mode 100644 tensorflow/contrib/lite/kernels/internal/optimized/eigen_spatial_convolutions.h create mode 100644 tensorflow/contrib/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_google.h create mode 100644 tensorflow/contrib/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_oss.h create mode 100644 tensorflow/contrib/lite/kernels/internal/optimized/multithreaded_conv.h create mode 100644 tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.h create mode 100644 tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h create mode 100644 tensorflow/contrib/lite/kernels/internal/optimized/tensor_utils_impl.h create mode 100644 tensorflow/contrib/lite/kernels/internal/quantization_util.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/quantization_util.h create mode 100644 tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h create mode 100644 tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h create mode 100644 tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.h create mode 100644 tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h create mode 100644 tensorflow/contrib/lite/kernels/internal/round.h create mode 100644 tensorflow/contrib/lite/kernels/internal/tensor.h create mode 100644 tensorflow/contrib/lite/kernels/internal/tensor_test.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/tensor_utils.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/tensor_utils.h create mode 100644 tensorflow/contrib/lite/kernels/internal/tensor_utils_test.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/types.h create mode 100644 tensorflow/contrib/lite/kernels/kernel_util.cc create mode 100644 tensorflow/contrib/lite/kernels/kernel_util.h create mode 100644 tensorflow/contrib/lite/kernels/l2norm.cc create mode 100644 tensorflow/contrib/lite/kernels/l2norm_test.cc create mode 100644 tensorflow/contrib/lite/kernels/local_response_norm.cc create mode 100644 tensorflow/contrib/lite/kernels/local_response_norm_test.cc create mode 100644 tensorflow/contrib/lite/kernels/lsh_projection.cc create mode 100644 tensorflow/contrib/lite/kernels/lsh_projection_test.cc create mode 100644 tensorflow/contrib/lite/kernels/lstm.cc create mode 100644 tensorflow/contrib/lite/kernels/lstm_test.cc create mode 100644 tensorflow/contrib/lite/kernels/mul.cc create mode 100644 tensorflow/contrib/lite/kernels/mul_test.cc create mode 100644 tensorflow/contrib/lite/kernels/op_macros.h create mode 100644 tensorflow/contrib/lite/kernels/optional_tensor_test.cc create mode 100644 tensorflow/contrib/lite/kernels/padding.h create mode 100644 tensorflow/contrib/lite/kernels/pooling.cc create mode 100644 tensorflow/contrib/lite/kernels/pooling_test.cc create mode 100644 tensorflow/contrib/lite/kernels/register.cc create mode 100644 tensorflow/contrib/lite/kernels/register.h create mode 100644 tensorflow/contrib/lite/kernels/reshape.cc create mode 100644 tensorflow/contrib/lite/kernels/reshape_test.cc create mode 100644 tensorflow/contrib/lite/kernels/resize_bilinear.cc create mode 100644 tensorflow/contrib/lite/kernels/resize_bilinear_test.cc create mode 100644 tensorflow/contrib/lite/kernels/skip_gram.cc create mode 100644 tensorflow/contrib/lite/kernels/skip_gram_test.cc create mode 100644 tensorflow/contrib/lite/kernels/softmax_test.cc create mode 100644 tensorflow/contrib/lite/kernels/space_to_depth.cc create mode 100644 tensorflow/contrib/lite/kernels/space_to_depth_test.cc create mode 100644 tensorflow/contrib/lite/kernels/svdf.cc create mode 100644 tensorflow/contrib/lite/kernels/svdf_test.cc create mode 100644 tensorflow/contrib/lite/kernels/test_util.cc create mode 100644 tensorflow/contrib/lite/kernels/test_util.h create mode 100644 tensorflow/contrib/lite/model.cc create mode 100644 tensorflow/contrib/lite/model.h create mode 100644 tensorflow/contrib/lite/model_test.cc create mode 100644 tensorflow/contrib/lite/models/smartreply/BUILD create mode 100644 tensorflow/contrib/lite/models/smartreply/ops/extract_feature.cc create mode 100644 tensorflow/contrib/lite/models/smartreply/ops/extract_feature_test.cc create mode 100644 tensorflow/contrib/lite/models/smartreply/ops/normalize.cc create mode 100644 tensorflow/contrib/lite/models/smartreply/ops/normalize_test.cc create mode 100644 tensorflow/contrib/lite/models/smartreply/ops/predict.cc create mode 100644 tensorflow/contrib/lite/models/smartreply/ops/predict_test.cc create mode 100644 tensorflow/contrib/lite/models/smartreply/predictor.cc create mode 100644 tensorflow/contrib/lite/models/smartreply/predictor.h create mode 100644 tensorflow/contrib/lite/models/smartreply/predictor_test.cc create mode 100644 tensorflow/contrib/lite/models/speech_hotword_model_test.cc create mode 100644 tensorflow/contrib/lite/models/speech_speakerid_model_test.cc create mode 100644 tensorflow/contrib/lite/models/speech_terse_am_model_test.cc create mode 100644 tensorflow/contrib/lite/models/speech_tts_model_test.cc create mode 100644 tensorflow/contrib/lite/models/test_utils.h create mode 100644 tensorflow/contrib/lite/nnapi/BUILD create mode 100644 tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h create mode 100644 tensorflow/contrib/lite/nnapi_delegate.cc create mode 100644 tensorflow/contrib/lite/nnapi_delegate.h create mode 100644 tensorflow/contrib/lite/optional_debug_tools.cc create mode 100644 tensorflow/contrib/lite/optional_debug_tools.h create mode 100644 tensorflow/contrib/lite/python/BUILD create mode 100644 tensorflow/contrib/lite/python/lite.py create mode 100644 tensorflow/contrib/lite/python/lite_test.py create mode 100644 tensorflow/contrib/lite/schema/BUILD create mode 100644 tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc create mode 100644 tensorflow/contrib/lite/schema/schema.fbs create mode 100644 tensorflow/contrib/lite/schema/schema_v0.fbs create mode 100644 tensorflow/contrib/lite/schema/schema_v1.fbs create mode 100644 tensorflow/contrib/lite/schema/schema_v2.fbs create mode 100644 tensorflow/contrib/lite/schema/schema_v3.fbs create mode 100644 tensorflow/contrib/lite/schema/upgrade_schema.py create mode 100644 tensorflow/contrib/lite/schema/upgrade_schema_test.py create mode 100644 tensorflow/contrib/lite/simple_memory_arena.cc create mode 100644 tensorflow/contrib/lite/simple_memory_arena.h create mode 100644 tensorflow/contrib/lite/simple_memory_arena_test.cc create mode 100644 tensorflow/contrib/lite/string.h create mode 100644 tensorflow/contrib/lite/string_util.cc create mode 100644 tensorflow/contrib/lite/string_util.h create mode 100644 tensorflow/contrib/lite/string_util_test.cc create mode 100644 tensorflow/contrib/lite/testdata/0_subgraphs.bin create mode 100644 tensorflow/contrib/lite/testdata/2_subgraphs.bin create mode 100644 tensorflow/contrib/lite/testdata/empty_model.bin create mode 100644 tensorflow/contrib/lite/testdata/multi_add.bin create mode 100644 tensorflow/contrib/lite/testdata/multi_add.json create mode 100644 tensorflow/contrib/lite/testdata/no_subgraphs.bin create mode 100644 tensorflow/contrib/lite/testdata/test_model.bin create mode 100644 tensorflow/contrib/lite/testdata/test_model_broken.bin create mode 100644 tensorflow/contrib/lite/testdata/test_model_broken.json create mode 100644 tensorflow/contrib/lite/testdata/two_subgraphs.bin create mode 100644 tensorflow/contrib/lite/testing/BUILD create mode 100644 tensorflow/contrib/lite/testing/generate_examples.py create mode 100644 tensorflow/contrib/lite/testing/generate_examples_report.py create mode 100644 tensorflow/contrib/lite/testing/generated_examples_zip_test.cc create mode 100644 tensorflow/contrib/lite/testing/message.cc create mode 100644 tensorflow/contrib/lite/testing/message.h create mode 100644 tensorflow/contrib/lite/testing/message_test.cc create mode 100644 tensorflow/contrib/lite/testing/nnapi_example.cc create mode 100644 tensorflow/contrib/lite/testing/parse_testdata.cc create mode 100644 tensorflow/contrib/lite/testing/parse_testdata.h create mode 100644 tensorflow/contrib/lite/testing/split.cc create mode 100644 tensorflow/contrib/lite/testing/split.h create mode 100644 tensorflow/contrib/lite/testing/split_test.cc create mode 100644 tensorflow/contrib/lite/testing/test_runner.h create mode 100644 tensorflow/contrib/lite/testing/test_runner_test.cc create mode 100644 tensorflow/contrib/lite/testing/tflite_driver.cc create mode 100644 tensorflow/contrib/lite/testing/tflite_driver.h create mode 100644 tensorflow/contrib/lite/testing/tflite_driver_test.cc create mode 100644 tensorflow/contrib/lite/testing/tokenize.cc create mode 100644 tensorflow/contrib/lite/testing/tokenize.h create mode 100644 tensorflow/contrib/lite/testing/tokenize_test.cc create mode 100644 tensorflow/contrib/lite/toco/BUILD create mode 100644 tensorflow/contrib/lite/toco/allocate_transient_arrays.cc create mode 100644 tensorflow/contrib/lite/toco/allocate_transient_arrays.h create mode 100644 tensorflow/contrib/lite/toco/args.h create mode 100644 tensorflow/contrib/lite/toco/dump_graphviz.cc create mode 100644 tensorflow/contrib/lite/toco/dump_graphviz.h create mode 100644 tensorflow/contrib/lite/toco/export_tensorflow.cc create mode 100644 tensorflow/contrib/lite/toco/export_tensorflow.h create mode 100644 tensorflow/contrib/lite/toco/format_port.h create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/dequantize.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/drop_fake_quant.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/drop_im2col_arrays.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_following_affine.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/identify_l2_normalization.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/identify_l2_pool.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/quantize.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/read_fake_quant_min_max.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/remove_final_dequantize_op.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_assert.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_identity.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_binary.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tensorflow_shape.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_mean_attributes.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_pad_attributes.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_reshape_attributes.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_slice_attributes.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_squeeze.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_tile.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/unfuse_activation_functions.cc create mode 100644 tensorflow/contrib/lite/toco/import_tensorflow.cc create mode 100644 tensorflow/contrib/lite/toco/import_tensorflow.h create mode 100644 tensorflow/contrib/lite/toco/model.h create mode 100644 tensorflow/contrib/lite/toco/model_cmdline_flags.cc create mode 100644 tensorflow/contrib/lite/toco/model_cmdline_flags.h create mode 100644 tensorflow/contrib/lite/toco/model_flags.proto create mode 100644 tensorflow/contrib/lite/toco/python/BUILD create mode 100644 tensorflow/contrib/lite/toco/python/toco.i create mode 100644 tensorflow/contrib/lite/toco/python/toco_from_protos.py create mode 100644 tensorflow/contrib/lite/toco/python/toco_from_protos_test.py create mode 100644 tensorflow/contrib/lite/toco/python/toco_python_api.cc create mode 100644 tensorflow/contrib/lite/toco/python/toco_python_api.h create mode 100644 tensorflow/contrib/lite/toco/python/toco_wrapper.py create mode 100644 tensorflow/contrib/lite/toco/runtime/common.h create mode 100644 tensorflow/contrib/lite/toco/runtime/types.h create mode 100644 tensorflow/contrib/lite/toco/tensorflow_graph_matching/BUILD create mode 100644 tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster.cc create mode 100644 tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster.h create mode 100644 tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster_utils.cc create mode 100644 tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster_utils.h create mode 100644 tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_cluster.cc create mode 100644 tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_cluster.h create mode 100644 tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_svdf.cc create mode 100644 tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_svdf.h create mode 100644 tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_svdf_test.cc create mode 100644 tensorflow/contrib/lite/toco/tensorflow_util.cc create mode 100644 tensorflow/contrib/lite/toco/tensorflow_util.h create mode 100644 tensorflow/contrib/lite/toco/tflite/BUILD create mode 100644 tensorflow/contrib/lite/toco/tflite/builtin_operator.h create mode 100644 tensorflow/contrib/lite/toco/tflite/custom_operator.h create mode 100644 tensorflow/contrib/lite/toco/tflite/export.cc create mode 100644 tensorflow/contrib/lite/toco/tflite/export.h create mode 100644 tensorflow/contrib/lite/toco/tflite/export_test.cc create mode 100644 tensorflow/contrib/lite/toco/tflite/import.cc create mode 100644 tensorflow/contrib/lite/toco/tflite/import.h create mode 100644 tensorflow/contrib/lite/toco/tflite/import_test.cc create mode 100644 tensorflow/contrib/lite/toco/tflite/operator.cc create mode 100644 tensorflow/contrib/lite/toco/tflite/operator.h create mode 100644 tensorflow/contrib/lite/toco/tflite/operator_test.cc create mode 100644 tensorflow/contrib/lite/toco/tflite/simple_operator.h create mode 100644 tensorflow/contrib/lite/toco/tflite/types.cc create mode 100644 tensorflow/contrib/lite/toco/tflite/types.h create mode 100644 tensorflow/contrib/lite/toco/tflite/types_test.cc create mode 100644 tensorflow/contrib/lite/toco/toco.cc create mode 100644 tensorflow/contrib/lite/toco/toco_cmdline_flags.cc create mode 100644 tensorflow/contrib/lite/toco/toco_cmdline_flags.h create mode 100644 tensorflow/contrib/lite/toco/toco_flags.proto create mode 100644 tensorflow/contrib/lite/toco/toco_graphviz_dump_options.cc create mode 100644 tensorflow/contrib/lite/toco/toco_graphviz_dump_options.h create mode 100644 tensorflow/contrib/lite/toco/toco_port.cc create mode 100644 tensorflow/contrib/lite/toco/toco_port.h create mode 100644 tensorflow/contrib/lite/toco/toco_port_test.cc create mode 100644 tensorflow/contrib/lite/toco/toco_tooling.cc create mode 100644 tensorflow/contrib/lite/toco/toco_tooling.h create mode 100644 tensorflow/contrib/lite/toco/toco_types.h create mode 100644 tensorflow/contrib/lite/toco/tooling_util.cc create mode 100644 tensorflow/contrib/lite/toco/tooling_util.h create mode 100644 tensorflow/contrib/lite/toco/tooling_util_test.cc create mode 100644 tensorflow/contrib/lite/tools/BUILD create mode 100644 tensorflow/contrib/lite/tools/gen_op_registration.cc create mode 100644 tensorflow/contrib/lite/tools/gen_op_registration.h create mode 100644 tensorflow/contrib/lite/tools/gen_op_registration_main.cc create mode 100644 tensorflow/contrib/lite/tools/gen_op_registration_test.cc create mode 100644 tensorflow/contrib/lite/tools/mutable_op_resolver.cc create mode 100644 tensorflow/contrib/lite/tools/mutable_op_resolver.h create mode 100644 tensorflow/contrib/lite/version.h diff --git a/configure.py b/configure.py index e98367ef9f..3c0df9475d 100644 --- a/configure.py +++ b/configure.py @@ -492,6 +492,8 @@ def set_cc_opt_flags(environ_cp): write_to_bazelrc( 'build:opt --host_cxxopt=%s --host_copt=%s' % (host_opt, host_opt)) write_to_bazelrc('build:opt --define with_default_optimizations=true') + write_to_bazelrc('build --copt=-DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK') + write_to_bazelrc('build --host_copt=-DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK') def set_tf_cuda_clang(environ_cp): diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 8cb7edcc50..82a57ac185 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -475,6 +475,25 @@ filegroup( "//tensorflow/contrib/learn/python/learn/datasets:all_files", "//tensorflow/contrib/linalg:all_files", "//tensorflow/contrib/linear_optimizer:all_files", + "//tensorflow/contrib/lite:all_files", + "//tensorflow/contrib/lite/java:all_files", + "//tensorflow/contrib/lite/java/demo/app/src/main:all_files", + "//tensorflow/contrib/lite/java/demo/app/src/main/assets:all_files", + "//tensorflow/contrib/lite/java/src/main/native:all_files", + "//tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite:all_files", + "//tensorflow/contrib/lite/kernels:all_files", + "//tensorflow/contrib/lite/kernels/internal:all_files", + "//tensorflow/contrib/lite/models/smartreply:all_files", + "//tensorflow/contrib/lite/nnapi:all_files", + "//tensorflow/contrib/lite/python:all_files", + "//tensorflow/contrib/lite/schema:all_files", + "//tensorflow/contrib/lite/testing:all_files", + "//tensorflow/contrib/lite/toco:all_files", + "//tensorflow/contrib/lite/toco/graph_transformations/tests:all_files", + "//tensorflow/contrib/lite/toco/python:all_files", + "//tensorflow/contrib/lite/toco/tensorflow_graph_matching:all_files", + "//tensorflow/contrib/lite/toco/tflite:all_files", + "//tensorflow/contrib/lite/tools:all_files", "//tensorflow/contrib/lookup:all_files", "//tensorflow/contrib/losses:all_files", "//tensorflow/contrib/makefile:all_files", diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 3d53cbba56..b7ade95115 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -51,6 +51,7 @@ py_library( "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/contrib/linear_optimizer:sdca_estimator_py", "//tensorflow/contrib/linear_optimizer:sdca_ops_py", + "//tensorflow/contrib/lite/python:lite", "//tensorflow/contrib/lookup:lookup_py", "//tensorflow/contrib/losses:losses_py", "//tensorflow/contrib/losses:metric_learning_py", diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index 3068e9ed8f..1eda1abfcf 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -79,6 +79,7 @@ from tensorflow.contrib import tpu from tensorflow.contrib import training from tensorflow.contrib import util from tensorflow.contrib.eager.python import tfe as eager +from tensorflow.contrib.lite.python import lite from tensorflow.contrib.ndlstm import python as ndlstm from tensorflow.contrib.remote_fused_graph import pylib as remote_fused_graph from tensorflow.contrib.specs import python as specs diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 7636e9ba6e..9517aa4963 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -499,6 +499,19 @@ add_python_module("tensorflow/contrib/linear_optimizer/kernels/g3doc") add_python_module("tensorflow/contrib/linear_optimizer/python") add_python_module("tensorflow/contrib/linear_optimizer/python/kernel_tests") add_python_module("tensorflow/contrib/linear_optimizer/python/ops") +add_custom_command(TARGET tf_python_touchup_modules PRE_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory + "${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/lite") +add_custom_command(TARGET tf_python_touchup_modules PRE_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory + "${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/lite/python") +add_custom_command(TARGET tf_python_touchup_modules PRE_BUILD + COMMAND ${CMAKE_COMMAND} -E touch + "${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/lite/python/__init__.py") +add_custom_command( + TARGET tf_python_copy_scripts_to_destination PRE_BUILD + COMMAND ${CMAKE_COMMAND} -E touch + ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/lite/python/lite.py) add_python_module("tensorflow/contrib/lookup") add_python_module("tensorflow/contrib/losses") add_python_module("tensorflow/contrib/losses/python") diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD new file mode 100644 index 0000000000..c58f77cb11 --- /dev/null +++ b/tensorflow/contrib/lite/BUILD @@ -0,0 +1,280 @@ +package(default_visibility = [ + "//visibility:public", +]) + +licenses(["notice"]) # Apache 2.0 + +load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts", "gen_selected_ops") + +exports_files(glob([ + "testdata/*.bin", + "models/testdata/*", +])) + +config_setting( + name = "mips", + values = { + "cpu": "mips", + }, +) + +config_setting( + name = "mips64", + values = { + "cpu": "mips64", + }, +) + +load( + "//tensorflow:tensorflow.bzl", + "tf_cc_test", +) + +cc_library( + name = "schema_fbs_version", + hdrs = ["version.h"], +) + +# Main library. No ops are included here. +# TODO(aselle): Resolve problems preventing C99 usage. +cc_library( + name = "context", + srcs = ["context.c"], + hdrs = ["context.h"], +) + +cc_library( + name = "builtin_op_data", + hdrs = [ + "builtin_op_data.h", + ], +) + +cc_library( + name = "string", + hdrs = [ + "string.h", + ], + deps = [ + "//tensorflow/core:lib_platform", + ], +) + +# TODO(ahentz): investigate dependency on gemm_support requiring usage of tf_copts. +cc_library( + name = "framework", + srcs = [ + "allocation.cc", + "error_reporter.cc", + "interpreter.cc", + "model.cc", + "nnapi_delegate.cc", + "optional_debug_tools.cc", + "simple_memory_arena.cc", + ], + hdrs = [ + "allocation.h", + "context.h", + "error_reporter.h", + "interpreter.h", + "model.h", + "nnapi_delegate.h", + "optional_debug_tools.h", + "simple_memory_arena.h", + ], + copts = tflite_copts(), + deps = [ + ":builtin_op_data", + ":context", + ":schema_fbs_version", + "//tensorflow/contrib/lite/kernels:gemm_support", + "//tensorflow/contrib/lite/nnapi:nnapi_lib", + "//tensorflow/contrib/lite/schema:schema_fbs", + "//tensorflow/core:lib_platform", + ], +) + +cc_library( + name = "string_util", + srcs = ["string_util.cc"], + hdrs = ["string_util.h"], + deps = [ + ":framework", + ":string", + ], +) + +cc_test( + name = "string_util_test", + size = "small", + srcs = ["string_util_test.cc"], + deps = [ + ":framework", + ":string_util", + "@com_google_googletest//:gtest", + ], +) + +# Test main interpreter +cc_test( + name = "interpreter_test", + size = "small", + srcs = ["interpreter_test.cc"], + deps = [ + ":framework", + ":string_util", + "@com_google_googletest//:gtest", + ], +) + +# Test arena allocator +cc_test( + name = "simple_memory_arena_test", + size = "small", + srcs = ["simple_memory_arena_test.cc"], + deps = [ + ":framework", + "@com_google_googletest//:gtest", + ], +) + +# Test model framework. +cc_test( + name = "model_test", + size = "small", + srcs = ["model_test.cc"], + data = [ + "testdata/0_subgraphs.bin", + "testdata/2_subgraphs.bin", + "testdata/empty_model.bin", + "testdata/test_model.bin", + "testdata/test_model_broken.bin", + ], + deps = [ + ":framework", + "@com_google_googletest//:gtest", + ], +) + +# Test the C extension API code. +cc_test( + name = "context_test", + size = "small", + srcs = ["context_test.cc"], + deps = [ + ":framework", + "@com_google_googletest//:gtest", + ], +) + +# Test the serialization of a model with optional tensors. + +# Model tests + +cc_library( + name = "models_test_utils", + testonly = 1, + hdrs = ["models/test_utils.h"], + deps = select({ + "//tensorflow:android": [], + "//conditions:default": [ + #"//file/base:path", + "//tensorflow/core:test", + ], + }), +) + +cc_test( + name = "speech_hotword_model_test", + size = "small", + srcs = ["models/speech_hotword_model_test.cc"], + data = [ + "models/testdata/speech_hotword_model_in.csv", + "models/testdata/speech_hotword_model_out_rank1.csv", + "models/testdata/speech_hotword_model_out_rank2.csv", + "models/testdata/speech_hotword_model_rank1.tflite", + "models/testdata/speech_hotword_model_rank2.tflite", + ], + deps = [ + ":framework", + ":models_test_utils", + #"//file/base:path", + "//tensorflow/contrib/lite/kernels:builtin_ops", + "@com_google_googletest//:gtest_main", + ], +) + +gen_selected_ops( + name = "speech_speakerid_ops", + model = "models/testdata/speech_speakerid_model.tflite", +) + +cc_test( + name = "speech_speakerid_model_test", + size = "small", + srcs = [ + "models/speech_speakerid_model_test.cc", + ":speech_speakerid_ops", + ], + data = [ + "models/testdata/speech_speakerid_model.tflite", + "models/testdata/speech_speakerid_model_in.csv", + "models/testdata/speech_speakerid_model_out.csv", + ], + deps = [ + ":framework", + ":models_test_utils", + #"//file/base:path", + "//tensorflow/contrib/lite/kernels:builtin_ops", + "//tensorflow/contrib/lite/tools:mutable_op_resolver", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "speech_terse_am_model_test", + size = "small", + srcs = ["models/speech_terse_am_model_test.cc"], + data = [ + "models/testdata/speech_terse_am_model.tflite", + "models/testdata/speech_terse_am_model_in.csv", + "models/testdata/speech_terse_am_model_out.csv", + ], + deps = [ + ":framework", + ":models_test_utils", + #"//file/base:path", + "//tensorflow/contrib/lite/kernels:builtin_ops", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "speech_tts_model_test", + size = "small", + srcs = ["models/speech_tts_model_test.cc"], + data = [ + "models/testdata/speech_tts_model.tflite", + "models/testdata/speech_tts_model_in.csv", + "models/testdata/speech_tts_model_out.csv", + ], + deps = [ + ":framework", + ":models_test_utils", + #"//file/base:path", + "//tensorflow/contrib/lite/kernels:builtin_ops", + "@com_google_googletest//:gtest_main", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/lite/allocation.cc b/tensorflow/contrib/lite/allocation.cc new file mode 100644 index 0000000000..4b322e027d --- /dev/null +++ b/tensorflow/contrib/lite/allocation.cc @@ -0,0 +1,122 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/allocation.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/error_reporter.h" +#include "tensorflow/contrib/lite/nnapi_delegate.h" + +namespace tflite { + +MMAPAllocation::MMAPAllocation(const char* filename, + ErrorReporter* error_reporter) + : Allocation(error_reporter), mmapped_buffer_(MAP_FAILED) { + mmap_fd_ = open(filename, O_RDONLY); + if (mmap_fd_ == -1) { + error_reporter_->Report("Could not open '%s'.", filename); + return; + } + struct stat sb; + fstat(mmap_fd_, &sb); + buffer_size_bytes_ = sb.st_size; + mmapped_buffer_ = + mmap(nullptr, buffer_size_bytes_, PROT_READ, MAP_SHARED, mmap_fd_, 0); + if (mmapped_buffer_ == MAP_FAILED) { + error_reporter_->Report("Mmap of '%s' failed.", filename); + return; + } +} + +MMAPAllocation::~MMAPAllocation() { + if (valid()) { + munmap(const_cast(mmapped_buffer_), buffer_size_bytes_); + } + if (mmap_fd_ != -1) close(mmap_fd_); +} + +const void* MMAPAllocation::base() const { return mmapped_buffer_; } + +size_t MMAPAllocation::bytes() const { return buffer_size_bytes_; } + +bool MMAPAllocation::valid() const { return mmapped_buffer_ != MAP_FAILED; } + +FileCopyAllocation::FileCopyAllocation(const char* filename, + ErrorReporter* error_reporter) + : Allocation(error_reporter) { + // Obtain the file size, using an alternative method that is does not + // require fstat for more compatibility. + std::unique_ptr file(fopen(filename, "rb"), fclose); + if (!file) { + error_reporter_->Report("Could not open '%s'.", filename); + return; + } + // TODO(ahentz): Why did you think using fseek here was better for finding + // the size? + struct stat sb; + if (fstat(fileno(file.get()), &sb) != 0) { + error_reporter_->Report("Failed to get file size of '%s'.", filename); + return; + } + buffer_size_bytes_ = sb.st_size; + std::unique_ptr buffer(new char[buffer_size_bytes_]); + if (!buffer) { + error_reporter_->Report("Malloc of buffer to hold copy of '%s' failed.", + filename); + return; + } + size_t bytes_read = + fread(buffer.get(), sizeof(char), buffer_size_bytes_, file.get()); + if (bytes_read != buffer_size_bytes_) { + error_reporter_->Report("Read of '%s' failed (too few bytes read).", + filename); + return; + } + copied_buffer_ = std::move(buffer); +} + +FileCopyAllocation::~FileCopyAllocation() {} + +const void* FileCopyAllocation::base() const { return copied_buffer_.get(); } + +size_t FileCopyAllocation::bytes() const { return buffer_size_bytes_; } + +bool FileCopyAllocation::valid() const { return copied_buffer_ != nullptr; } + +MemoryAllocation::MemoryAllocation(const void* ptr, size_t num_bytes, + ErrorReporter* error_reporter) + : Allocation(error_reporter) { + buffer_ = ptr; + buffer_size_bytes_ = num_bytes; +} + +MemoryAllocation::~MemoryAllocation() {} + +const void* MemoryAllocation::base() const { return buffer_; } + +size_t MemoryAllocation::bytes() const { return buffer_size_bytes_; } + +bool MemoryAllocation::valid() const { return buffer_ != nullptr; } + +} // namespace tflite diff --git a/tensorflow/contrib/lite/allocation.h b/tensorflow/contrib/lite/allocation.h new file mode 100644 index 0000000000..ee8a7ccd0b --- /dev/null +++ b/tensorflow/contrib/lite/allocation.h @@ -0,0 +1,94 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Main abstraction controlling the tflite interpreter. +// See context.h for the API for defining operations (TfLiteRegistration). +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_ALLOCATION_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_ALLOCATION_H_ + +#include +#include +#include +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/error_reporter.h" +#include "tensorflow/contrib/lite/simple_memory_arena.h" + +namespace tflite { + +// A memory allocation handle. This could be a mmap or shared memory. +class Allocation { + public: + Allocation(ErrorReporter* error_reporter) : error_reporter_(error_reporter) {} + virtual ~Allocation() {} + + // Base pointer of this allocation + virtual const void* base() const = 0; + // Size in bytes of the allocation + virtual size_t bytes() const = 0; + // Whether the allocation is valid + virtual bool valid() const = 0; + + protected: + ErrorReporter* error_reporter_; +}; + +class MMAPAllocation : public Allocation { + public: + MMAPAllocation(const char* filename, ErrorReporter* error_reporter); + virtual ~MMAPAllocation(); + const void* base() const override; + size_t bytes() const override; + bool valid() const override; + + protected: + // Data required for mmap. + int mmap_fd_ = -1; // mmap file descriptor + const void* mmapped_buffer_; + size_t buffer_size_bytes_ = 0; +}; + +class FileCopyAllocation : public Allocation { + public: + FileCopyAllocation(const char* filename, ErrorReporter* error_reporter); + virtual ~FileCopyAllocation(); + const void* base() const override; + size_t bytes() const override; + bool valid() const override; + + private: + // Data required for mmap. + std::unique_ptr copied_buffer_; + size_t buffer_size_bytes_ = 0; +}; + +class MemoryAllocation : public Allocation { + public: + // Allocates memory with the pointer and the number of bytes of the memory. + // The pointer has to remain alive and unchanged until the destructor is + // called. + MemoryAllocation(const void* ptr, size_t num_bytes, + ErrorReporter* error_reporter); + virtual ~MemoryAllocation(); + const void* base() const override; + size_t bytes() const override; + bool valid() const override; + + private: + const void* buffer_; + size_t buffer_size_bytes_ = 0; +}; + +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_ALLOCATION_H_ diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl new file mode 100644 index 0000000000..e3c9cdd99b --- /dev/null +++ b/tensorflow/contrib/lite/build_def.bzl @@ -0,0 +1,233 @@ +"""Generate Flatbuffer binary from json.""" + +def tflite_copts(): + """Defines compile time flags.""" + copts = [ + "-DFARMHASH_NO_CXX_STRING", + ] + select({ + "//tensorflow:android_arm64": [ + "-std=c++11", + "-O3", + ], + "//tensorflow:android_arm": [ + "-mfpu=neon", + "-mfloat-abi=softfp", + "-std=c++11", + "-O3", + ], + "//tensorflow:android_x86": [ + "-DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK", + ], + "//tensorflow:ios_x86_64": [ + "-msse4.1", + ], + "//conditions:default": [], + }) + select({ + "//tensorflow:with_default_optimizations": [], + "//conditions:default": ["-DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK"], + }) + + return copts + +LINKER_SCRIPT = "//tensorflow/contrib/lite/java/src/main/native:version_script.lds" + +def tflite_linkopts_unstripped(): + """Defines linker flags to reduce size of TFLite binary. + + These are useful when trying to investigate the relative size of the + symbols in TFLite. + + Returns: + a select object with proper linkopts + """ + return select({ + "//tensorflow:android": [ + "-Wl,--no-export-dynamic", # Only inc syms referenced by dynamic obj. + "-Wl,--exclude-libs,ALL", # Exclude syms in all libs from auto export. + "-Wl,--gc-sections", # Eliminate unused code and data. + "-Wl,--as-needed", # Don't link unused libs. + ], + "//tensorflow/contrib/lite:mips": [], + "//tensorflow/contrib/lite:mips64": [], + "//conditions:default": [ + "-Wl,--icf=all", # Identical code folding. + ], + }) + +def tflite_jni_linkopts_unstripped(): + """Defines linker flags to reduce size of TFLite binary with JNI. + + These are useful when trying to investigate the relative size of the + symbols in TFLite. + + Returns: + a select object with proper linkopts + """ + return select({ + "//tensorflow:android": [ + "-Wl,--gc-sections", # Eliminate unused code and data. + "-Wl,--as-needed", # Don't link unused libs. + ], + "//tensorflow/contrib/lite:mips": [], + "//tensorflow/contrib/lite:mips64": [], + "//conditions:default": [ + "-Wl,--icf=all", # Identical code folding. + ], + }) + +def tflite_linkopts(): + """Defines linker flags to reduce size of TFLite binary.""" + return tflite_linkopts_unstripped() + select({ + "//tensorflow:android": [ + "-s", # Omit symbol table. + ], + "//conditions:default": [], + }) + +def tflite_jni_linkopts(): + """Defines linker flags to reduce size of TFLite binary with JNI.""" + return tflite_jni_linkopts_unstripped() + select({ + "//tensorflow:android": [ + "-s", # Omit symbol table. + ], + "//conditions:default": [], + }) + + +def tflite_jni_binary(name, + copts=tflite_copts(), + linkopts=tflite_jni_linkopts(), + linkscript=LINKER_SCRIPT, + linkshared=1, + linkstatic=1, + deps=[]): + """Builds a jni binary for TFLite.""" + linkopts = linkopts + [ + "-Wl,--version-script", # Export only jni functions & classes. + linkscript, + ] + native.cc_binary( + name=name, + copts=copts, + linkshared=linkshared, + linkstatic=linkstatic, + deps= deps + [linkscript], + linkopts=linkopts) + +def tf_to_tflite(name, src, options, out): + """Convert a frozen tensorflow graphdef to TF Lite's flatbuffer. + + Args: + name: Name of rule. + src: name of the input graphdef file. + options: options passed to TOCO. + out: name of the output flatbuffer file. + """ + + toco = "//tensorflow/contrib/lite/toco:toco" + native.genrule( + name = name, + srcs=[src, options], + outs=[out], + cmd = ("$(location %s) " + + " --input_file=$(location %s) " + + " --output_file=$(location %s) " + + " --input_format=TENSORFLOW_GRAPHDEF" + + " --output_format=TFLITE" + + " `cat $(location %s)`") + % (toco, src, out, options), + tools= [toco], + ) + +def tflite_to_json(name, src, out): + """Convert a TF Lite flatbuffer to JSON. + + Args: + name: Name of rule. + src: name of the input flatbuffer file. + out: name of the output JSON file. + """ + + flatc = "@flatbuffers//:flatc" + schema = "//tensorflow/contrib/lite/schema:schema.fbs" + native.genrule( + name = name, + srcs = [schema, src], + outs = [out], + cmd = ("TMP=`mktemp`; cp $(location %s) $${TMP}.bin &&" + + "$(location %s) --raw-binary --strict-json -t" + + " -o /tmp $(location %s) -- $${TMP}.bin &&" + + "cp $${TMP}.json $(location %s)") + % (src, flatc, schema, out), + tools = [flatc], + ) + +def json_to_tflite(name, src, out): + """Convert a JSON file to TF Lite's flatbuffer. + + Args: + name: Name of rule. + src: name of the input JSON file. + out: name of the output flatbuffer file. + """ + + flatc = "@flatbuffers//:flatc" + schema = "//tensorflow/contrib/lite/schema:schema_fbs" + native.genrule( + name = name, + srcs = [schema, src], + outs = [out], + cmd = ("TMP=`mktemp`; cp $(location %s) $${TMP}.json &&" + + "$(location %s) --raw-binary --unknown-json --allow-non-utf8 -b" + + " -o /tmp $(location %s) $${TMP}.json &&" + + "cp $${TMP}.bin $(location %s)") + % (src, flatc, schema, out), + tools = [flatc], + ) + +def gen_zipped_test_files(name, files): + """Generate a zip file of tests by using :generate_examples. + + Args: + name: Name of output. We will produce "`name`_files" as a target. + files: A list of zip file basenames. + """ + toco = "//tensorflow/contrib/lite/toco:toco" + out_files = [] + for f in files: + out_file = name + "/" + f + out_files.append(out_file) + native.genrule( + name = name + "_" + f + ".files", + cmd = ("$(locations :generate_examples) --toco $(locations %s) " % toco + + " --zip_to_output " + f + + " $(@D) zipped"), + outs = [out_file], + tools = [ + ":generate_examples", + toco, + ], + ) + + native.filegroup( + name = name, + srcs = out_files, + ) + +def gen_selected_ops(name, model): + """Generate the library that includes only used ops. + + Args: + name: Name of the generated library. + model: TFLite model to interpret. + """ + out = name + "_registration.cc" + tool = "//tensorflow/contrib/lite/tools:generate_op_registrations" + native.genrule( + name = name, + srcs = [model], + outs = [out], + cmd = ("$(location %s) --input_model=$(location %s) --output_registration=$(location %s)") + % (tool, model, out), + tools = [tool], + ) diff --git a/tensorflow/contrib/lite/builtin_op_data.h b/tensorflow/contrib/lite/builtin_op_data.h new file mode 100644 index 0000000000..93072bf90b --- /dev/null +++ b/tensorflow/contrib/lite/builtin_op_data.h @@ -0,0 +1,164 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_BUILTIN_OP_DATA_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_BUILTIN_OP_DATA_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +// TODO(aselle): Consider using "if this then that" for testing. + +// Possible padding types (for convolutions) +typedef enum { + kTfLitePaddingUnknown = 0, + kTfLitePaddingSame, + kTfLitePaddingValid, +} TfLitePadding; + +typedef struct { + int width; + int height; +} TfLitePaddingValues; + +// Possible fused activation functions. +// TODO(aselle): rename to TfLiteActivation +typedef enum { + kTfLiteActNone = 0, + kTfLiteActRelu, + kTfLiteActRelu1, + kTfLiteActRelu6, + kTfLiteActTanh, + kTfLiteActSignBit, + kTfLiteActSigmoid, +} TfLiteFusedActivation; + +typedef struct { + TfLitePadding padding; + int stride_width; + int stride_height; + TfLiteFusedActivation activation; +} TfLiteConvParams; + +typedef struct { + TfLitePadding padding; + int stride_width; + int stride_height; + int filter_width; + int filter_height; + TfLiteFusedActivation activation; + struct { + TfLitePaddingValues padding; + } computed; +} TfLitePoolParams; + +typedef struct { + TfLitePadding padding; + int stride_width; + int stride_height; + int depth_multiplier; + TfLiteFusedActivation activation; +} TfLiteDepthwiseConvParams; + +typedef struct { + int rank; + TfLiteFusedActivation activation; +} TfLiteSVDFParams; + +typedef struct { + TfLiteFusedActivation activation; +} TfLiteRNNParams; + +typedef struct { TfLiteFusedActivation activation; } TfLiteFullyConnectedParams; + +typedef enum { + kTfLiteLshProjectionUnknown = 0, + kTfLiteLshProjectionSparse = 1, + kTfLiteLshProjectionDense = 2, +} TfLiteLSHProjectionType; + +typedef struct { TfLiteLSHProjectionType type; } TfLiteLSHProjectionParams; + +typedef struct { float beta; } TfLiteSoftmaxParams; + +typedef struct { + int axis; + TfLiteFusedActivation activation; +} TfLiteConcatenationParams; + +typedef struct { + TfLiteFusedActivation activation; +} TfLiteAddParams; + +typedef struct { + TfLiteFusedActivation activation; +} TfLiteMulParams; + +typedef struct { + TfLiteFusedActivation activation; +} TfLiteL2NormParams; + +typedef struct { + int radius; + float bias; + float alpha; + float beta; +} TfLiteLocalResponseNormParams; + +typedef struct { + TfLiteFusedActivation activation; + float cell_clip; + float proj_clip; +} TfLiteLSTMParams; + +typedef struct { + int new_height; + int new_width; +} TfLiteResizeBilinearParams; + +typedef struct { + // TODO(ahentz): We can't have dynamic data in this struct, at least not yet. + // For now we will fix the maximum possible number of dimensions. + int shape[8]; + int num_dimensions; +} TfLiteReshapeParams; + +typedef struct { + int ngram_size; + int max_skip_size; + bool include_all_ngrams; +} TfLiteSkipGramParams; + +typedef struct { + int block_size; +} TfLiteSpaceToDepthParams; + +typedef enum { + kTfLiteCombinerTypeSum = 0, + kTfLiteCombinerTypeMean = 1, + kTfLiteCombinerTypeSqrtn = 2, +} TfLiteCombinerType; + +typedef struct { + TfLiteCombinerType combiner; +} TfLiteEmbeddingLookupSparseParams; + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_BUILTIN_OP_DATA_H_ diff --git a/tensorflow/contrib/lite/context.c b/tensorflow/contrib/lite/context.c new file mode 100644 index 0000000000..c09e838c5c --- /dev/null +++ b/tensorflow/contrib/lite/context.c @@ -0,0 +1,92 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/context.h" +#include +#include + +TfLiteIntArray* TfLiteIntArrayCreate(int size) { + TfLiteIntArray* ret = + (TfLiteIntArray*)malloc(sizeof(*ret) + sizeof(ret->data[0]) * size); + ret->size = size; + return ret; +} + +void TfLiteIntArrayPrint(const char* s, TfLiteIntArray* a) { + printf("%s: length=%d [", s, a->size); + if (a->size) printf("%d", a->data[0]); + int i = 1; + for (; i < a->size; i++) { + printf(" %d", a->data[i]); + } + printf("]\n"); +} + +int TfLiteIntArrayEqual(TfLiteIntArray* a, TfLiteIntArray* b) { + if (a == b) return 1; + if (a == NULL || b == NULL) return 0; + if (a->size != b->size) return 0; + int i = 0; + for (; i < a->size; i++) + if (a->data[i] != b->data[i]) return 0; + return 1; +} + +TfLiteIntArray* TfLiteIntArrayCopy(TfLiteIntArray* src) { + if (!src) return NULL; + TfLiteIntArray* ret = TfLiteIntArrayCreate(src->size); + if (ret) { + memcpy(ret->data, src->data, src->size * sizeof(int)); + } + return ret; +} + +void TfLiteIntArrayFree(TfLiteIntArray* a) { free(a); } + +void TfLiteTensorFree(TfLiteTensor* t) { + if (t->allocation_type == kTfLiteDynamic && t->data.raw) { + free(t->data.raw); + } + if (t->dims) TfLiteIntArrayFree(t->dims); + t->data.raw = NULL; + t->dims = NULL; +} + +void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims, + TfLiteQuantizationParams quantization, char* buffer, + size_t size, TfLiteAllocationType allocation_type, + const void* allocation, TfLiteTensor* tensor) { + TfLiteTensorFree(tensor); + tensor->type = type; + tensor->name = name; + tensor->dims = dims; + tensor->params = quantization; + tensor->data.raw = buffer; + tensor->bytes = size; + tensor->allocation_type = allocation_type; + tensor->allocation = allocation; +} + +void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) { + if (tensor->allocation_type != kTfLiteDynamic) { + return; + } + if (!tensor->data.raw) { + tensor->data.raw = malloc(num_bytes); + } else if (num_bytes > tensor->bytes) { + tensor->data.raw = realloc(tensor->data.raw, num_bytes); + } + tensor->bytes = num_bytes; +} diff --git a/tensorflow/contrib/lite/context.h b/tensorflow/contrib/lite/context.h new file mode 100644 index 0000000000..41257a53b1 --- /dev/null +++ b/tensorflow/contrib/lite/context.h @@ -0,0 +1,298 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// This file defines a C API for implementing operations in tflite. +// These operations can be defined using c++ but the interface between +// the interpreter and the operations are C. +// +// Summary of abstractions +// TF_LITE_ENSURE - Self-sufficient error checking +// TfLiteStatus - Status reporting +// TfLiteIntArray - stores tensor shapes (dims), +// TfLiteContext - allows an op to access the tensors +// TfLiteTensor - tensor (a multidimensional array) +// TfLiteNode - a single node or operation +// TfLiteRegistration - the implementation of a conceptual operation. +// +// Some abstractions in this file are created and managed by Interpreter. +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_CONTEXT_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_CONTEXT_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +typedef enum { kTfLiteOk = 0, kTfLiteError = 1 } TfLiteStatus; + +#define kOptionalTensor (-1) + +// Fixed size list of integers. Used for dimensions and inputs/outputs tensor +// indices +typedef struct { + int size; +// gcc 6.1+ have a bug where flexible members aren't properly handled +// https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \ + __GNUC_MINOR__ >= 1 + int data[0]; +#else + int data[]; +#endif +} TfLiteIntArray; + +// Create a array of a given `size` (uninitialized entries). +// This returns a pointer, that you must free using TfLiteIntArrayFree(). +TfLiteIntArray* TfLiteIntArrayCreate(int size); + +// Check if two tensors are equal. Returns 1 if they are equal, 0 otherwise. +int TfLiteIntArrayEqual(TfLiteIntArray* a, TfLiteIntArray* b); + +// Create a copy of an array passed as `src`. +// You are expected to free memory with TfLiteIntArrayFree +TfLiteIntArray* TfLiteIntArrayCopy(TfLiteIntArray* src); + +// Free memory of array `v`. +void TfLiteIntArrayFree(TfLiteIntArray* v); + +// Since we must not depend on any libraries, define a minimal subset of +// error macros while avoiding names that have pre-conceived meanings like +// assert and check. + +// Check whether value is true, and if not return kTfLiteError from +// the current function (and report the error string msg). +#define TF_LITE_ENSURE_MSG(context, value, msg) \ + do { \ + if (!(value)) { \ + (context)->ReportError((context), __FILE__ " " msg); \ + return kTfLiteError; \ + } \ + } while (0) + +// Check whether the value `a` is true, and if not return kTfLiteError from +// the current function, while also reporting the location of the error. +#define TF_LITE_ENSURE(context, a) \ + do { \ + if (!(a)) { \ + (context)->ReportError((context), "%s:%d %s was not true.", __FILE__, \ + __LINE__, #a); \ + return kTfLiteError; \ + } \ + } while (0) + +#define TF_LITE_ENSURE_STATUS(a) \ + do { \ + if ((a) != kTfLiteOk) { \ + return kTfLiteError; \ + } \ + } while (0) + +// Check whether the value `a == b` is true, and if not return kTfLiteError from +// the current function, while also reporting the location of the error. +// `a` and `b` may be evaluated more than once, so no side effects or +// extremely expensive computations should be done. +#define TF_LITE_ENSURE_EQ(context, a, b) \ + do { \ + if ((a) != (b)) { \ + (context)->ReportError((context), "%s:%d %s != %s (%d != %d)", __FILE__, \ + __LINE__, #a, #b, (a), (b)); \ + return kTfLiteError; \ + } \ + } while (0) + +#define TF_LITE_ENSURE_OK(context, status) \ + do { \ + if ((status) != kTfLiteOk) { \ + return status; \ + } \ + } while (0) + +// Types supported by tensor +typedef enum { + kTfLiteNoType = 0, + kTfLiteFloat32 = 1, + kTfLiteInt32 = 2, + kTfLiteUInt8 = 3, + kTfLiteInt64 = 4, + kTfLiteString = 5, +} TfLiteType; + +// Parameters for asymmetric quantization. Quantized values can be converted +// back to float using: +// real_value = scale * (quantized_value - zero_point); +typedef struct { + float scale; + int32_t zero_point; +} TfLiteQuantizationParams; + +// A union of points that points to memory for a given tensor. +typedef union { + int* i32; + float* f; + char* raw; + const char* raw_const; + uint8_t* uint8; +} TfLitePtrUnion; + +// Memory allocation strategies. kTfLiteMmapRo is for read-only memory-mapped +// data (or data externally allocated). kTfLiteArenaRw is arena allocated +// data. kTfLiteDynamic is for tensors that are allocated during evaluation. +typedef enum { + kTfLiteMemNone = 0, + kTfLiteMmapRo, + kTfLiteArenaRw, + kTfLiteArenaRwPersistent, + kTfLiteDynamic, +} TfLiteAllocationType; + +// An tensor in the interpreter system which is a wrapper around a buffer of +// data including a dimensionality (or NULL if not currently defined). +typedef struct { + // The data type specification for data stored in `data`. This affects + // what member of `data` union should be used. + TfLiteType type; + // A union of data pointers. The appropriate type should be used for a typed + // tensor based on `type`. + TfLitePtrUnion data; + // A pointer to a structure representing the dimensionality interpretation + // that the buffer should have. NOTE: the product of elements of `dims` + // and the element datatype size should be equal to `bytes` below. + TfLiteIntArray* dims; + // Quantization information. + TfLiteQuantizationParams params; + // How memory is mapped + // kTfLiteMmapRo: Memory mapped read only. + // i.e. weights + // kTfLiteArenaRw: Arena allocated read write memory + // (i.e. temporaries, outputs). + TfLiteAllocationType allocation_type; + // The number of bytes required to store the data of this Tensor. I.e. + // (bytes of each element) * dims[0] * ... * dims[n-1]. For example, if + // type is kTfLiteFloat32 and dims = {3, 2} then + // bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24. + size_t bytes; + + // An opaque pointer to a tflite::MMapAllocation + const void* allocation; + + // Null-terminated name of this tensor. + const char* name; +} TfLiteTensor; + +// Free memory of tensor `t`; +void TfLiteTensorFree(TfLiteTensor* t); + +// Set all of a tensor's fields (and free any previously allocated data). +void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims, + TfLiteQuantizationParams quantization, char* buffer, + size_t size, TfLiteAllocationType allocation_type, + const void* allocation, TfLiteTensor* tensor); + +// Resize the allocated data of a (dynamic) tensor. +void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor); + +typedef struct TfLiteContext { + // Number of tensors in the context. + int tensors_size; + // An tensor of tensors in the interpreter context (of length `tensors_size`) + TfLiteTensor* tensors; + + // opaque full context ptr (an opaque c++ data structure) + void* impl_; + + // Request memory pointer be resized. Updates dimensions on the tensor. + // NOTE: ResizeTensor takes ownership of newSize. + TfLiteStatus (*ResizeTensor)(struct TfLiteContext*, TfLiteTensor* tensor, + TfLiteIntArray* new_size); + // Request that a error be reported with format string msg. + void (*ReportError)(struct TfLiteContext*, const char* msg, ...); + + // Add `tensors_to_add` tensors, preserving pre-existing Tensor entries. If + // non-null, the value pointed to by `first_new_tensor_index` will be set to + // the index of the first new tensor. + TfLiteStatus (*AddTensors)(struct TfLiteContext*, int tensors_to_add, + int* first_new_tensor_index); + + // TODO(ahentz): we should create a more general mechanism for this sort of + // library-global objects. + void* gemm_context; +} TfLiteContext; + +// A structure representing an instance of a node. +// This structure only exhibits the inputs, outputs and user defined data, not +// other features like the type. +typedef struct { + // Inputs to this node expressed as indices into the simulator's tensors. + TfLiteIntArray* inputs; + + // Outputs to this node expressed as indices into the simulator's tensors. + TfLiteIntArray* outputs; + + // Temporary tensors uses during the computations. This usually contains no + // tensors, but ops are allowed to change that if they need scratch space of + // any sort. + TfLiteIntArray* temporaries; + + // Opaque data provided by the node implementer through `Registration.init`. + void* user_data; + + // Opaque data provided to the node if the node is a builtin. + void* builtin_data; +} TfLiteNode; + +typedef struct { + // Initializes the op from serialized data. + // If a built-in op: + // `buffer` is the op's params data (TfLiteLSTMParams*). + // `length` is zero. + // If custom op: + // `buffer` is the op's `custom_options`. + // `length` is the size of the buffer. + // + // Returns a type-punned (i.e. void*) opaque data (e.g. a primitive pointer + // or an instance of a struct). + // + // The returned pointer will be stored with the node in the `user_data` field, + // accessible within prepare and invoke functions below. + // NOTE: if the data is already in the desired format, simply implement this + // function to return `nullptr` and implement the free function to be a no-op. + void* (*init)(TfLiteContext* context, const char* buffer, size_t length); + + // The pointer `buffer` is the data previously returned by an init invocation. + void (*free)(TfLiteContext* context, void* buffer); + + // prepare is called when the inputs this node depends on have been resized. + // context->ResizeTensor() can be called to request output tensors to be + // resized. + // + // Returns kTfLiteOk on success. + TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node); + + // Execute the node (should read node->inputs and output to node->outputs). + // Returns kTfLiteOk on success. + TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node); + + // Builtin codes. If this kernel refers to a builtin this is the code + // of the builtin. This is so we can do marshaling to other frameworks like + // NN API. Note, it is the responsibility of the registration binder to + // set this properly. + int32_t builtin_code; +} TfLiteRegistration; + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_CONTEXT_H_ diff --git a/tensorflow/contrib/lite/context_test.cc b/tensorflow/contrib/lite/context_test.cc new file mode 100644 index 0000000000..d0a104f43d --- /dev/null +++ b/tensorflow/contrib/lite/context_test.cc @@ -0,0 +1,74 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/context.h" +#include + +namespace tflite { + +// NOTE: this tests only the TfLiteIntArray part of context. +// most of context.h is provided in the context of using it with interpreter.h +// and interpreter.cc, so interpreter_test.cc tests context structures more +// thoroughly. + +TEST(IntArray, TestIntArrayCreate) { + TfLiteIntArray* a = TfLiteIntArrayCreate(0); + TfLiteIntArray* b = TfLiteIntArrayCreate(3); + TfLiteIntArrayFree(a); + TfLiteIntArrayFree(b); +} + +TEST(IntArray, TestIntArrayCopy) { + TfLiteIntArray* a = TfLiteIntArrayCreate(2); + a->data[0] = 22; + a->data[1] = 24; + TfLiteIntArray* b = TfLiteIntArrayCopy(a); + ASSERT_NE(a, b); + ASSERT_EQ(a->size, b->size); + ASSERT_EQ(a->data[0], b->data[0]); + ASSERT_EQ(a->data[1], b->data[1]); + TfLiteIntArrayFree(a); + TfLiteIntArrayFree(b); +} + +TEST(IntArray, TestIntArrayEqual) { + TfLiteIntArray* a = TfLiteIntArrayCreate(1); + a->data[0] = 1; + TfLiteIntArray* b = TfLiteIntArrayCreate(2); + b->data[0] = 5; + b->data[1] = 6; + TfLiteIntArray* c = TfLiteIntArrayCreate(2); + c->data[0] = 5; + c->data[1] = 6; + TfLiteIntArray* d = TfLiteIntArrayCreate(2); + d->data[0] = 6; + d->data[1] = 6; + ASSERT_FALSE(TfLiteIntArrayEqual(a, b)); + ASSERT_TRUE(TfLiteIntArrayEqual(b, c)); + ASSERT_TRUE(TfLiteIntArrayEqual(b, b)); + ASSERT_FALSE(TfLiteIntArrayEqual(c, d)); + TfLiteIntArrayFree(a); + TfLiteIntArrayFree(b); + TfLiteIntArrayFree(c); + TfLiteIntArrayFree(d); +} + +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/error_reporter.cc b/tensorflow/contrib/lite/error_reporter.cc new file mode 100644 index 0000000000..6ba5384a94 --- /dev/null +++ b/tensorflow/contrib/lite/error_reporter.cc @@ -0,0 +1,50 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/error_reporter.h" +#include +#include + +namespace tflite { + +ErrorReporter::~ErrorReporter() {} + +int ErrorReporter::Report(const char* format, ...) { + va_list args; + va_start(args, format); + int code = Report(format, args); + va_end(args); + return code; +} + +// TODO(aselle): Make the name of ReportError on context the same, so +// we can use the ensure functions w/o a context and w/ a reporter. +int ErrorReporter::ReportError(void*, const char* format, ...) { + va_list args; + va_start(args, format); + int code = Report(format, args); + va_end(args); + return code; +} + +int StderrReporter::Report(const char* format, va_list args) { + return vfprintf(stderr, format, args); +} + +ErrorReporter* DefaultErrorReporter() { + static StderrReporter* error_reporter = new StderrReporter; + return error_reporter; +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/error_reporter.h b/tensorflow/contrib/lite/error_reporter.h new file mode 100644 index 0000000000..637d456ce7 --- /dev/null +++ b/tensorflow/contrib/lite/error_reporter.h @@ -0,0 +1,54 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_ERROR_REPORTER_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_ERROR_REPORTER_H_ + +#include +#include "tensorflow/contrib/lite/context.h" + +namespace tflite { + +// A functor that reports error to supporting system. Invoked similar to +// printf. +// +// Usage: +// ErrorReporter foo; +// foo.Report("test %d\n", 5); +// or +// va_list args; +// foo.Report("test %d\n", args); // where args is va_list +// +// Sublclass ErrorReporter to provide another reporting destination. +// For example, if you have a GUI program, you might redirect to a buffer +// that drives a GUI error log box. +class ErrorReporter { + public: + virtual ~ErrorReporter(); + virtual int Report(const char* format, va_list args) = 0; + int Report(const char* format, ...); + int ReportError(void*, const char* format, ...); +}; + +// An error reporter that simplify writes the message to stderr. +struct StderrReporter : public ErrorReporter { + int Report(const char* format, va_list args) override; +}; + +// Return the default error reporter (output to stderr). +ErrorReporter* DefaultErrorReporter(); + +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_ERROR_REPORTER_H_ diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc new file mode 100644 index 0000000000..954e236ac8 --- /dev/null +++ b/tensorflow/contrib/lite/interpreter.cc @@ -0,0 +1,567 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/interpreter.h" +#include +#include +#include +#include +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/error_reporter.h" +#include "tensorflow/contrib/lite/kernels/gemm_support.h" +#include "tensorflow/contrib/lite/nnapi_delegate.h" + +namespace { + +// Memory allocation tuning +constexpr const int kDefaultArenaAlignment = 64; +constexpr const int kDefaultTensorAlignment = 4; +// std::vector preallocation tuning. +constexpr const int kSlotsToReserve = 128; + +} // namespace + +namespace tflite { + +Interpreter::Interpreter(ErrorReporter* error_reporter) + : arena_(kDefaultArenaAlignment), + persistent_arena_(kDefaultArenaAlignment), + error_reporter_(error_reporter ? error_reporter + : DefaultErrorReporter()) { + context_.impl_ = static_cast(this); + context_.ResizeTensor = ResizeTensor; + context_.ReportError = ReportError; + context_.AddTensors = AddTensors; + context_.tensors = nullptr; + context_.tensors_size = 0; + context_.gemm_context = nullptr; + // Reserve some space for the tensors to avoid excessive resizing. + tensors_.reserve(kSlotsToReserve); + nodes_and_registration_.reserve(kSlotsToReserve); + next_allocate_node_id_ = 0; + UseNNAPI(false); +} + +Interpreter::~Interpreter() { + for (auto& nodeAndReg : nodes_and_registration_) { + TfLiteNode& node = nodeAndReg.first; + TfLiteIntArrayFree(node.inputs); + TfLiteIntArrayFree(node.outputs); + TfLiteIntArrayFree(node.temporaries); + if (node.builtin_data) free(node.builtin_data); + OpFree(nodeAndReg.second, node.user_data); + node.builtin_data = nullptr; + } + + for (int i = 0; i < context_.tensors_size; i++) { + TfLiteTensorFree(&context_.tensors[i]); + } +} + +TfLiteStatus Interpreter::SetInputs(std::vector inputs) { + TF_LITE_ENSURE_OK(&context_, + CheckTensorIndices("inputs", inputs.data(), inputs.size())); + inputs_ = std::move(inputs); + return kTfLiteOk; +} + +TfLiteStatus Interpreter::SetOutputs(std::vector outputs) { + TF_LITE_ENSURE_OK( + &context_, CheckTensorIndices("outputs", outputs.data(), outputs.size())); + outputs_ = std::move(outputs); + return kTfLiteOk; +} + +TfLiteStatus Interpreter::CheckTensorIndices(const char* label, + const int* indices, int length) { + // Making sure kOptionalTensor is not re-defined to something other than -1. + static_assert(kOptionalTensor == -1, "kOptionalTensor should be defined -1"); + + for (int i = 0; i < length; i++) { + int index = indices[i]; + if (index < kOptionalTensor || index >= context_.tensors_size) { + ReportError(&context_, "Invalid tensor index %d in %s\n", index, label); + consistent_ = false; + return kTfLiteError; + } + } + return kTfLiteOk; +} + +TfLiteStatus Interpreter::BytesRequired(TfLiteType type, const int* dims, + int dims_size, size_t* bytes) { + // TODO(aselle): Check for overflow here using overflow.h in TensorFlow + // MultiplyWithoutOverflow. + TF_LITE_ENSURE(&context_, bytes != nullptr); + size_t count = 1; + for (int k = 0; k < dims_size; k++) count *= dims[k]; + switch (type) { + case kTfLiteFloat32: + *bytes = sizeof(float) * count; + break; + case kTfLiteInt32: + *bytes = sizeof(int32_t) * count; + break; + case kTfLiteUInt8: + *bytes = sizeof(uint8_t) * count; + break; + case kTfLiteInt64: + *bytes = sizeof(int64_t) * count; + break; + default: + ReportError(&context_, + "Only float32, int32, int64, uint8 supported currently."); + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus Interpreter::AllocateTensorsWhoseSizesAreKnown() { + if (!consistent_) { + ReportError(&context_, "AllocateTensors() called on inconsistent model."); + return kTfLiteError; + } + if (next_allocate_node_id_ == nodes_and_registration_.size() && invokable_) { + return kTfLiteOk; + } + allocs_and_refcounts_.resize(context_.tensors_size); + + int new_next_allocate_node_id = next_allocate_node_id_; + invokable_ = false; + + // Allocate graph input nodes. + if (next_allocate_node_id_ == 0) { + for (int i = 0; i < inputs_.size(); ++i) { + int tensor_index = inputs_[i]; + if (tensor_index == kOptionalTensor) { + continue; + } + TfLiteTensor& tensor = context_.tensors[tensor_index]; + if (tensor.allocation_type == kTfLiteArenaRw) { + TF_LITE_ENSURE_OK( + &context_, + arena_.Allocate(&context_, kDefaultTensorAlignment, tensor.bytes, + &allocs_and_refcounts_[tensor_index].alloc)); + } + } + // Add 1 to output tensors, so they will not get overwritten. + for (int i = 0; i < outputs_.size(); ++i) { + allocs_and_refcounts_[outputs_[i]].count++; + } + } + + // Count references to node input tensors, and resize node-referenced tensors + // until we encounter a node that has a dynamic output tensor. + for (int k = next_allocate_node_id_; k < nodes_and_registration_.size(); + k++) { + new_next_allocate_node_id++; + TfLiteNode& node = nodes_and_registration_[k].first; + const TfLiteRegistration& registration = nodes_and_registration_[k].second; + if (OpPrepare(registration, &node) == kTfLiteError) { + return kTfLiteError; + } + + TfLiteIntArray* node_inputs = node.inputs; + for (int i = 0; i < node_inputs->size; ++i) { + int tensor_index = node_inputs->data[i]; + if (tensor_index != kOptionalTensor) { + allocs_and_refcounts_[node_inputs->data[i]].count++; + } + } + + // Discontinue if the node has dynamic outputs. + bool has_unallocated_dynamic_tensor = false; + TfLiteIntArray* node_outputs = node.outputs; + for (int i = 0; i < node_outputs->size; ++i) { + TfLiteTensor& tensor = context_.tensors[node_outputs->data[i]]; + if (tensor.allocation_type == kTfLiteDynamic) { + has_unallocated_dynamic_tensor = true; + break; + } + } + if (has_unallocated_dynamic_tensor) { + break; + } + } + + // Allocate graph persistent outputs, e.g. RNN cell states, etc. + for (int k = next_allocate_node_id_; k < new_next_allocate_node_id; k++) { + TfLiteNode& node = nodes_and_registration_[k].first; + + // Go through output tensors and allocate the persistent ones first. + TfLiteIntArray* node_outputs = node.outputs; + for (int i = 0; i < node_outputs->size; ++i) { + int tensor_index = node_outputs->data[i]; + TfLiteTensor& tensor = context_.tensors[tensor_index]; + if (tensor.allocation_type == kTfLiteArenaRwPersistent) { + TF_LITE_ENSURE_OK(&context_, + persistent_arena_.Allocate( + &context_, kDefaultTensorAlignment, tensor.bytes, + &allocs_and_refcounts_[tensor_index].alloc)); + } + } + } + + // Go through the graph in execution order. + for (int k = next_allocate_node_id_; k < new_next_allocate_node_id; k++) { + TfLiteNode& node = nodes_and_registration_[k].first; + + // First allocate output tensors. + TfLiteIntArray* node_outputs = node.outputs; + for (int i = 0; i < node_outputs->size; ++i) { + int tensor_index = node_outputs->data[i]; + TfLiteTensor& tensor = context_.tensors[tensor_index]; + if (tensor.allocation_type == kTfLiteArenaRw) { + TF_LITE_ENSURE_OK( + &context_, + arena_.Allocate(&context_, kDefaultTensorAlignment, tensor.bytes, + &allocs_and_refcounts_[tensor_index].alloc)); + } + } + // Then the temporaries, in two passes. First allocate them all, them + // deallocate them. + TfLiteIntArray* node_temporaries = node.temporaries; + for (int i = 0; i < node_temporaries->size; ++i) { + int tensor_index = node_temporaries->data[i]; + TfLiteTensor& tensor = context_.tensors[tensor_index]; + if (tensor.allocation_type == kTfLiteArenaRw) { + TF_LITE_ENSURE_OK( + &context_, + arena_.Allocate(&context_, kDefaultTensorAlignment, tensor.bytes, + &allocs_and_refcounts_[tensor_index].alloc)); + } + } + for (int i = 0; i < node_temporaries->size; ++i) { + int tensor_index = node_temporaries->data[i]; + TfLiteTensor& tensor = context_.tensors[tensor_index]; + allocs_and_refcounts_[tensor_index].count--; + if (tensor.allocation_type == kTfLiteArenaRw && + allocs_and_refcounts_[tensor_index].count == 0) { + TF_LITE_ENSURE_OK( + &context_, + arena_.Deallocate(&context_, + allocs_and_refcounts_[tensor_index].alloc)); + } + } + + // Then process the node's inputs. + TfLiteIntArray* node_inputs = node.inputs; + for (int i = 0; i < node_inputs->size; ++i) { + int tensor_index = node_inputs->data[i]; + if (tensor_index == kOptionalTensor) { + continue; + } + TfLiteTensor& tensor = context_.tensors[tensor_index]; + + // Decrease reference count and deallocate if not needed anymore. + allocs_and_refcounts_[tensor_index].count--; + if (tensor.allocation_type == kTfLiteArenaRw && + allocs_and_refcounts_[tensor_index].count == 0) { + TF_LITE_ENSURE_OK( + &context_, + arena_.Deallocate(&context_, + allocs_and_refcounts_[tensor_index].alloc)); + } + } + } + + // Resize the buffer and commit the arena. + TF_LITE_ENSURE_OK(&context_, arena_.Commit(&context_)); + TF_LITE_ENSURE_OK(&context_, persistent_arena_.Commit(&context_)); + + // Rewire the tensors to use the underlying arena buffer. + for (int i = 0; i < context_.tensors_size; ++i) { + TfLiteTensor& tensor = context_.tensors[i]; + if (tensor.allocation_type == kTfLiteArenaRw) { + TF_LITE_ENSURE_OK( + &context_, + arena_.ResolveAlloc(&context_, allocs_and_refcounts_[i].alloc, + &tensor.data.raw)); + } + if (tensor.allocation_type == kTfLiteArenaRwPersistent) { + TF_LITE_ENSURE_OK( + &context_, + persistent_arena_.ResolveAlloc( + &context_, allocs_and_refcounts_[i].alloc, &tensor.data.raw)); + } + } + + invokable_ = true; + next_allocate_node_id_ = new_next_allocate_node_id; + return kTfLiteOk; +} + +namespace { +TfLiteIntArray* convertVectorToTfLiteIntArray(const std::vector& x) { + TfLiteIntArray* lite = TfLiteIntArrayCreate(x.size()); + for (size_t i = 0; i < x.size(); i++) lite->data[i] = x[i]; + return lite; +} +} // namespace + +TfLiteStatus Interpreter::AllocateTensors() { + next_allocate_node_id_ = 0; + TF_LITE_ENSURE_OK(&context_, arena_.Clear()); + TF_LITE_ENSURE_OK(&context_, persistent_arena_.Clear()); + allocs_and_refcounts_.clear(); + return AllocateTensorsWhoseSizesAreKnown(); +} + +TfLiteStatus Interpreter::AddNodeWithParameters( + const std::vector& inputs, const std::vector& outputs, + const char* init_data, size_t init_data_size, void* builtin_data, + const TfLiteRegistration* registration, int* node_index) { + invokable_ = false; + + std::unique_ptr builtin_data_deleter(builtin_data, + free); + + TF_LITE_ENSURE_OK(&context_, CheckTensorIndices("node inputs", inputs.data(), + inputs.size())); + TF_LITE_ENSURE_OK( + &context_, + CheckTensorIndices("node outputs", outputs.data(), outputs.size())); + + if (node_index) *node_index = nodes_and_registration_.size(); + nodes_and_registration_.resize(nodes_and_registration_.size() + 1); + auto& node_and_reg = nodes_and_registration_.back(); + TfLiteNode& node = node_and_reg.first; + if (node.inputs) TfLiteIntArrayFree(node.inputs); + if (node.outputs) TfLiteIntArrayFree(node.outputs); + if (node.temporaries) TfLiteIntArrayFree(node.temporaries); + + // NOTE, here we are not using move semantics yet, since our internal + // representation isn't std::vector, but in the future we would like to avoid + // copies, so we want the interface to take r-value references now. + node.inputs = convertVectorToTfLiteIntArray(inputs); + node.outputs = convertVectorToTfLiteIntArray(outputs); + node.temporaries = TfLiteIntArrayCreate(0); + if (init_data) { + node.user_data = OpInit(*registration, init_data, init_data_size); + } else { + node.user_data = + OpInit(*registration, + reinterpret_cast(builtin_data_deleter.get()), 0); + } + node.builtin_data = builtin_data_deleter.release(); + node_and_reg.second = *registration; + return kTfLiteOk; +} + +TfLiteStatus Interpreter::ResizeInputTensor(int tensor_index, + const std::vector& dims) { + // TODO(aselle): All bounds checks can be implemented as one-sided bounds + // checks by casting to unsigned for efficiency. Profile before doing this. + + TF_LITE_ENSURE(&context_, + tensor_index < context_.tensors_size && tensor_index >= 0); + invokable_ = false; + TfLiteIntArray* dims_lite = convertVectorToTfLiteIntArray(dims); + return ResizeTensorImpl(&context_.tensors[tensor_index], dims_lite); +} + +TfLiteStatus Interpreter::Invoke() { + if (!consistent_) { + ReportError(&context_, "Invoke called on model that is not consistent."); + return kTfLiteError; + } + if (!invokable_) { + ReportError(&context_, "Invoke called on model that is not ready."); + return kTfLiteError; + } + + TfLiteStatus status = kTfLiteOk; + if (nnapi_delegate_) { + if (AllocateTensorsWhoseSizesAreKnown() == kTfLiteError) { + return kTfLiteError; + } + if (next_allocate_node_id_ == nodes_and_registration_.size()) { + TF_LITE_ENSURE_OK(&context_, nnapi_delegate_->Invoke(this)); + return kTfLiteOk; + } else { + // TODO(aselle): In the future, we would like this to be an + // automatic tflite CPU fallback. + ReportError(&context_, + "NNAPI was requested, but dependent sized tensors " + "being used.\n"); + return kTfLiteError; + } + } + + for (int i = 0; i < nodes_and_registration_.size(); i++) { + // Ensure we have allocated up to this node. The point of this is to + // allocate as much as possible before running any evaluation, but + // dynamic shapes can prevent this from being possible. + if (i >= next_allocate_node_id_) { + if (AllocateTensorsWhoseSizesAreKnown() == kTfLiteError) { + return kTfLiteError; + } + } + TfLiteNode& node = nodes_and_registration_[i].first; + const TfLiteRegistration& registration = nodes_and_registration_[i].second; + if (OpInvoke(registration, &node) == kTfLiteError) { + status = kTfLiteError; + } + } + return status; +} + +TfLiteStatus Interpreter::ResizeTensor(TfLiteContext* context, + TfLiteTensor* tensor, + TfLiteIntArray* new_size) { + // Note here that context->impl_ is recovering the this pointer for an + // instance of Interpreter to call into the member function ResizeTensorImpl + // (this function is static). + return static_cast(context->impl_) + ->ResizeTensorImpl(tensor, new_size); +} + +void Interpreter::ReportErrorImpl(const char* format, va_list args) { + error_reporter_->Report(format, args); +} + +void Interpreter::ReportError(TfLiteContext* context, const char* format, ...) { + va_list args; + va_start(args, format); + auto* f = static_cast(context->impl_); + // Note here that context->impl_ is recovering the this pointer for an + // instance of Interpreter to call into the member function ReportErrorImpl + // (this function is static). + f->ReportErrorImpl(format, args); + va_end(args); +} + +TfLiteStatus Interpreter::AddTensors(int tensors_to_add, + int* first_new_tensor_index) { + int base_index = tensors_.size(); + if (first_new_tensor_index) *first_new_tensor_index = base_index; + tensors_.resize(tensors_.size() + tensors_to_add); + for (int i = base_index; i < tensors_.size(); i++) { + memset(&tensors_[i], 0, sizeof(tensors_[i])); + } + context_.tensors = tensors_.data(); + context_.tensors_size = tensors_.size(); + return kTfLiteOk; +} + +TfLiteStatus Interpreter::AddTensors(TfLiteContext* context, int tensors_to_add, + int* first_new_tensor_index) { + // Note here that context->impl_ is recovering the this pointer for an + // instance of Interpreter to call into the member function AddTensors + // (this function is static). + return static_cast(context->impl_) + ->AddTensors(tensors_to_add, first_new_tensor_index); +} + +TfLiteStatus Interpreter::SetTensorParametersReadOnly( + int tensor_index, TfLiteType type, const char* name, + const std::vector& dims, TfLiteQuantizationParams quantization, + const char* buffer, size_t bytes, const Allocation* allocation) { + TF_LITE_ENSURE(&context_, + tensor_index < context_.tensors_size && tensor_index >= 0); + // For most tensors we know exactly how much memory is necessary so we can + // ensure the buffer is large enough. However, we need to skip string tensors + // because their sizes change with the contents of the individual strings. + if (type != kTfLiteString) { + size_t required_bytes; + TF_LITE_ENSURE_OK(&context_, BytesRequired(type, dims.data(), dims.size(), + &required_bytes)); + TF_LITE_ENSURE_EQ(&context_, required_bytes, bytes); + } + invokable_ = false; + TfLiteTensorReset(type, name, convertVectorToTfLiteIntArray(dims), + quantization, const_cast(buffer), bytes, + kTfLiteMmapRo, allocation, &context_.tensors[tensor_index]); + return kTfLiteOk; +} + +// Set description of inputs/outputs/data/fptrs for node `node_index`. +// This variant assumes an external buffer has been allocated of size +// bytes. The lifetime of buffer must be ensured to be greater or equal +// to Interpreter. +TfLiteStatus Interpreter::SetTensorParametersReadWrite( + int tensor_index, TfLiteType type, const char* name, + const std::vector& dims, TfLiteQuantizationParams quantization) { + invokable_ = false; + TF_LITE_ENSURE(&context_, + tensor_index < context_.tensors_size && tensor_index >= 0); + size_t required_bytes = 0; + if (type != kTfLiteString) { + // These types will be allocated in our arena so we need to record how + // many bytes we will need based on the dimensions. String tensors are + // allocated dynamically and we can't know ahead of time how much space + // they will require. + TF_LITE_ENSURE_OK(&context_, BytesRequired(type, dims.data(), dims.size(), + &required_bytes)); + } + TfLiteTensorReset(type, name, convertVectorToTfLiteIntArray(dims), + quantization, + /*buffer=*/nullptr, required_bytes, + type == kTfLiteString ? kTfLiteDynamic : kTfLiteArenaRw, + nullptr, &context_.tensors[tensor_index]); + return kTfLiteOk; +} + +TfLiteStatus Interpreter::ResizeTensorImpl(TfLiteTensor* tensor, + TfLiteIntArray* new_size) { + // Note that in theory we could resize kTfLiteArenaRwPersistent tensors too. + if (tensor->allocation_type == kTfLiteArenaRw || + tensor->allocation_type == kTfLiteDynamic) { + if (tensor->type != kTfLiteString) { + size_t bytesRequired; + TfLiteStatus status = BytesRequired(tensor->type, new_size->data, + new_size->size, &bytesRequired); + if (status != kTfLiteOk) { + TfLiteIntArrayFree(new_size); + return kTfLiteError; + } + tensor->bytes = bytesRequired; + } + if (tensor->dims) TfLiteIntArrayFree(tensor->dims); + tensor->dims = new_size; + + if (tensor->allocation_type != kTfLiteDynamic) { + tensor->data.raw = nullptr; + } + } else { + // kTfLiteMmapRo tensors are stored in the flatbuffer and are therefore + // of fixed size. + TfLiteIntArrayFree(new_size); + ReportError(&context_, "Attempting to resize a fixed-size tensor."); + return kTfLiteError; + } + return kTfLiteOk; +} + +void Interpreter::UseNNAPI(bool enable) { + // TODO(aselle): This is a workaround for finding if NNAPI exists. + // We also need to make sure getLibraryHandle() is renamed to be NNAPI + // prefixed. + if (!NNAPIExists()) enable = false; + if (!enable) { + nnapi_delegate_.reset(); + } else if (!nnapi_delegate_) { + nnapi_delegate_.reset(new NNAPIDelegate); + } +} + +void Interpreter::SetNumThreads(int num_threads) { + // TODO(ahentz): this forces us to link against gemmlowp even when the ops + // don't use it. We should implement some dynamic mechanism for this sort of + // library-specific initialization. + tflite::gemm_support::SetMaxNumThreads(&context_, num_threads); +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h new file mode 100644 index 0000000000..8bf60e91f7 --- /dev/null +++ b/tensorflow/contrib/lite/interpreter.h @@ -0,0 +1,376 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Main abstraction controlling the tflite interpreter. +// See context.h for the API for defining operations (TfLiteRegistration). +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_INTERPRETER_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_INTERPRETER_H_ + +#include +#include +#include +#include "tensorflow/contrib/lite/allocation.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/error_reporter.h" +#include "tensorflow/contrib/lite/simple_memory_arena.h" +#include "tensorflow/core/platform/platform.h" + +namespace tflite { + +// Map statically from a c++ type to a TfLiteType (used below for safe casts). +template +constexpr TfLiteType typeToTfLiteType() { + return kTfLiteNoType; +} +template <> +constexpr TfLiteType typeToTfLiteType() { + return kTfLiteInt32; +} +template <> +constexpr TfLiteType typeToTfLiteType() { + return kTfLiteInt64; +} +template <> +constexpr TfLiteType typeToTfLiteType() { + return kTfLiteFloat32; +} +template <> +constexpr TfLiteType typeToTfLiteType() { + return kTfLiteUInt8; +} + +struct ArenaAllocRefCount { + ArenaAllocRefCount() : alloc(), count(0) {} + + ArenaAlloc alloc; + int count; +}; + +// Forward declare since NNAPIDelegate uses Interpreter. +class NNAPIDelegate; + +// An interpreter for a graph of nodes that input and output from tensors. +// Each node of the graph processes a set of input tensors and produces a +// set of output Tensors. All inputs/output tensors are referenced by index. +// +// Usage: +// +// -- Create basic model +// Interpreter foo(2, 1); +// foo.SetTensorParametersReadWrite(0, ...); +// foo.SetTensorParametersReadOnly(1, ...); +// foo.SetNodeParameters(0, ...) +// +// -- Resize input array to 1 length. +// foo.ResizeInputTensor(0, 1); +// foo.AllocateTensors(); +// -- Install array data +// foo.typed_tensor(0)[0] = 3; +// foo.Invoke(); +// foo.typed_tensor(0)[0] = 4; +// foo.Invoke(); +// -- Resize input array and set data. +// foo.ResizeInputTensor(0, 2); +// foo.AllocateTensors(); +// foo.typed_tensor(0)[0] = 4; +// foo.typed_tensor(0)[1] = 8; +// foo.Invoke(); +// + +class Interpreter { + public: + // Instantiate an interpreter. All errors associated with reading and + // processing this model will be forwarded to the error_reporter object. + // + // Note, if error_reporter is nullptr, then a default StderrReporter is + // used. + explicit Interpreter(ErrorReporter* error_reporter = DefaultErrorReporter()); + + ~Interpreter(); + + Interpreter(const Interpreter&) = delete; + Interpreter& operator=(const Interpreter&) = delete; + + // Functions to build interpreter + + // Provide a list of tensor indexes that are inputs to the model. + // Each index is bound check and this modifies the consistent_ flag of the + // interpreter. + TfLiteStatus SetInputs(std::vector inputs); + + // Provide a list of tensor indexes that are outputs to the model + // Each index is bound check and this modifies the consistent_ flag of the + // interpreter. + TfLiteStatus SetOutputs(std::vector outputs); + + // Adds a node with the given parameters and returns the index of the new + // node in `node_index` (optionally). Interpreter will take ownership of + // `builtin_data` and destroy it with `delete`. Ownership of 'init_data' + // remains with the caller. + TfLiteStatus AddNodeWithParameters(const std::vector& inputs, + const std::vector& outputs, + const char* init_data, + size_t init_data_size, void* builtin_data, + const TfLiteRegistration* registration, + int* node_index = nullptr); + + // Adds `tensors_to_add` tensors, preserving pre-existing Tensor entries. + // The value pointed to by `first_new_tensor_index` will be set to the + // index of the first new tensor if `first_new_tensor_index` is non-null. + TfLiteStatus AddTensors(int tensors_to_add, + int* first_new_tensor_index = nullptr); + + // Set description of inputs/outputs/data/fptrs for node `node_index`. + // This variant assumes an external buffer has been allocated of size + // bytes. The lifetime of buffer must be ensured to be greater or equal + // to Interpreter. + TfLiteStatus SetTensorParametersReadOnly( + int tensor_index, TfLiteType type, const char* name, + const std::vector& dims, TfLiteQuantizationParams quantization, + const char* buffer, size_t bytes, const Allocation* allocation = nullptr); + + // Set description of inputs/outputs/data/fptrs for node `node_index`. + // This variant assumes an external buffer has been allocated of size + // bytes. The lifetime of buffer must be ensured to be greater or equal + // to Interpreter. + TfLiteStatus SetTensorParametersReadWrite( + int tensor_index, TfLiteType type, const char* name, + const std::vector& dims, TfLiteQuantizationParams quantization); + + // Functions to access tensor data + + // Read only access to list of inputs. + const std::vector& inputs() const { return inputs_; } + + // Return the name of a given input. The given index must be between 0 and + // inputs().size(). + const char* GetInputName(int index) const { + return context_.tensors[inputs_[index]].name; + } + + // Read only access to list of outputs. + const std::vector& outputs() const { return outputs_; } + + // Return the name of a given output. The given index must be between 0 and + // outputs().size(). + const char* GetOutputName(int index) const { + return context_.tensors[outputs_[index]].name; + } + + // Return the number of tensors in the model. + int tensors_size() const { return context_.tensors_size; } + + // Return the number of ops in the model. + int nodes_size() const { return nodes_and_registration_.size(); } + + // Get a tensor data structure. + // TODO(aselle): Create a safe ArrayHandle interface to avoid exposing this + // read/write access to structure + TfLiteTensor* tensor(int tensor_index) { + if (tensor_index >= context_.tensors_size || tensor_index < 0) + return nullptr; + return &context_.tensors[tensor_index]; + } + + // Get a pointer to an operation and registration data structure if in bounds. + // TODO(aselle): Create a safe ArrayHandle interface to avoid exposing this + // read/write access to structure + const std::pair* node_and_registration( + int node_index) { + if (node_index >= nodes_and_registration_.size() || node_index < 0) + return nullptr; + return &nodes_and_registration_[node_index]; + } + + // Perform a checked cast to the appropriate tensor type. + template + T* typed_tensor(int tensor_index) { + if (TfLiteTensor* tensor_ptr = tensor(tensor_index)) { + if (tensor_ptr->type == typeToTfLiteType()) { + return reinterpret_cast(tensor_ptr->data.raw); + } + } + return nullptr; + } + + // Return a pointer into the data of a given input tensor. The given index + // must be between 0 and inputs().size(). + template + T* typed_input_tensor(int index) { + return typed_tensor(inputs_[index]); + } + + // Return a pointer into the data of a given output tensor. The given index + // must be between 0 and outputs().size(). + template + T* typed_output_tensor(int index) { + return typed_tensor(outputs_[index]); + } + + // Change the dimensionality of a given tensor. Note, this is only acceptable + // for tensor indices that are inputs. + // Returns status of failure or success. + // TODO(aselle): Consider implementing ArraySlice equivalent to make this + // more adept at accepting data without an extra copy. Use absl::ArraySlice + // if our partners determine that dependency is acceptable. + TfLiteStatus ResizeInputTensor(int tensor_index, + const std::vector& dims); + + // Update allocations for all tensors. This will redim dependent tensors using + // the input tensor dimensionality as given. This is relatively expensive. + // If you know that your sizes are not changing, you need not call this. + + // Returns status of success or failure. + // TODO(aselle): Madde + TfLiteStatus AllocateTensors(); + + // Invoke the interpreter (run the whole graph in dependency order). + // + // NOTE: It is possible that the interpreter is not in a ready state + // to evaluate (i.e. if a ResizeTensor() has been performed without an + // AllocateTensors(). + // Returns status of success or failure. + TfLiteStatus Invoke(); + + // Enable or disable the NN API (true to enable) + void UseNNAPI(bool enable); + + // Set the number of threads available to the interpreter. + void SetNumThreads(int num_threads); + + private: + // Give 'op_reg' a chance to initialize itself using the contents of + // 'buffer'. + void* OpInit(const TfLiteRegistration& op_reg, const char* buffer, + size_t length) { + if (op_reg.init == nullptr) return nullptr; + return op_reg.init(&context_, buffer, length); + } + + // Let 'op_reg' release any memory it might have allocated via 'OpInit'. + void OpFree(const TfLiteRegistration& op_reg, void* buffer) { + if (op_reg.free == nullptr) return; + if (buffer) { + op_reg.free(&context_, buffer); + } + } + + // Prepare the given 'node' for execution. + TfLiteStatus OpPrepare(const TfLiteRegistration& op_reg, TfLiteNode* node) { + if (op_reg.prepare == nullptr) return kTfLiteOk; + return op_reg.prepare(&context_, node); + } + + // Invoke the operator represented by 'node'. + TfLiteStatus OpInvoke(const TfLiteRegistration& op_reg, TfLiteNode* node) { + if (op_reg.invoke == nullptr) return kTfLiteError; + return op_reg.invoke(&context_, node); + } + + // Allocate tensors whose sizes are known in order of nodes. Discontinue when + // we encounter a node that has a dynamic output tensor. + TfLiteStatus AllocateTensorsWhoseSizesAreKnown(); + + // Tensors needed by the interpreter. Use `AddTensors` to add more blank + // tensor entries. Note, `tensors_.data()` needs to be synchronized to the + // `context_` whenever this std::vector is reallocated. Currently this + // only happens in `AddTensors()`. + std::vector tensors_; + + // Check if an array of tensor indices are valid with respect to the Tensor + // array. + // NOTE: this changes consistent_ to be false if indices are out of bounds. + TfLiteStatus CheckTensorIndices(const char* label, const int* indices, + int length); + + // Compute the number of bytes required to represent a tensor with dimensions + // specified by the array dims (of length dims_size). Returns the status code + // and bytes. + TfLiteStatus BytesRequired(TfLiteType type, const int* dims, int dims_size, + size_t* bytes); + + // Request an tensor be resized implementation. + TfLiteStatus ResizeTensorImpl(TfLiteTensor* tensor, TfLiteIntArray* new_size); + + // Report a detailed error string (will be printed to stderr). + // TODO(aselle): allow user of class to provide alternative destinations. + void ReportErrorImpl(const char* format, va_list args); + + // Entry point for C node plugin API to request an tensor be resized. + static TfLiteStatus ResizeTensor(TfLiteContext* context, TfLiteTensor* tensor, + TfLiteIntArray* new_size); + // Entry point for C node plugin API to report an error. + static void ReportError(TfLiteContext* context, const char* format, ...); + + // Entry point for C node plugin API to add new tensors. + static TfLiteStatus AddTensors(TfLiteContext* context, int tensors_to_add, + int* first_new_tensor_index); + + // A pure C data structure used to communicate with the pure C plugin + // interface. To avoid copying tensor metadata, this is also the definitive + // structure to store tensors. + TfLiteContext context_; + + // Node inputs/outputs are stored in TfLiteNode and TfLiteRegistration stores + // function pointers to actual implementation. + std::vector> + nodes_and_registration_; + + // Raw memory buffer that is allocated for all temporary and graph outputs. + // that are declared kTfLiteArenaRw. + SimpleMemoryArena arena_; + + // Raw memory buffer that is allocated for persistent tensors that are + // declared as kTfLiteArenaRwPersistent. + SimpleMemoryArena persistent_arena_; + + // Stores allocation and reference counts of all tensors. + std::vector allocs_and_refcounts_; + + // Whether the model is consistent. That is to say if the inputs and outputs + // of every node and the global inputs and outputs are valid indexes into + // the tensor array. + bool consistent_ = true; + + // Whether the model is safe to invoke (if any errors occurred this + // will be false). + bool invokable_ = false; + + // Array of indices representing the tensors that are inputs to the + // interpreter. + std::vector inputs_; + + // Array of indices representing the tensors that are outputs to the + // interpreter. + std::vector outputs_; + + // The error reporter delegate that tflite will forward queries errors to. + ErrorReporter* error_reporter_; + + // Next node to allocate output tensors. + // During Invoke(), Interpreter will allocate input tensors first, which are + // known to be fixed size. Then it will allocate outputs from nodes as many + // as possible. When there is a node that produces dynamic sized tensor. + // Intepreter will stop allocating tensors, set the value of next allocate + // node id, and execute the node to generate the output tensor before continue + // to allocate successors. This process repeats until all nodes are executed. + // NOTE: this relies on the order of nodes that is in topological order. + int next_allocate_node_id_; + + // Whether to delegate to NN API + std::unique_ptr nnapi_delegate_; +}; + +} // namespace tflite +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_INTERPRETER_H_ diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc new file mode 100644 index 0000000000..edff210943 --- /dev/null +++ b/tensorflow/contrib/lite/interpreter_test.cc @@ -0,0 +1,526 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/interpreter.h" +#include +#include "tensorflow/contrib/lite/error_reporter.h" +#include "tensorflow/contrib/lite/string_util.h" + +namespace tflite { +namespace { + +// Make an interpreter that has no tensors and no nodes +TEST(BasicInterpreter, ZeroInterpreter) { + Interpreter interpreter; + interpreter.SetInputs({}); + interpreter.SetOutputs({}); + ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk); + ASSERT_EQ(interpreter.Invoke(), kTfLiteOk); +} + +// Test various error conditions. +TEST(BasicInterpreter, InvokeInvalidModel) { + Interpreter interpreter; + ASSERT_NE(interpreter.Invoke(), kTfLiteOk); + ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk); + ASSERT_EQ(interpreter.Invoke(), kTfLiteOk); +} + +// Test size accesser functions. +TEST(BasicInterpreter, TestSizeFunctions) { + Interpreter interpreter; + int base_index; + ASSERT_EQ(interpreter.nodes_size(), 0); + ASSERT_EQ(interpreter.tensors_size(), 0); + ASSERT_EQ(interpreter.AddTensors(2, &base_index), kTfLiteOk); + ASSERT_EQ(interpreter.tensors_size(), 2); + ASSERT_EQ(base_index, 0); + ASSERT_EQ(interpreter.AddTensors(3, &base_index), kTfLiteOk); + ASSERT_EQ(interpreter.tensors_size(), 5); + ASSERT_EQ(interpreter.AddTensors(1), kTfLiteOk); + ASSERT_EQ(interpreter.tensors_size(), 6); + ASSERT_EQ(base_index, 2); +} + +// Test if invalid indices make a model inconsistent (and conversely if +// valid indices keep a model consistent). +TEST(BasicInterpreter, InconsistentModel) { + // Invalid inputs + { + Interpreter interpreter; + ASSERT_NE(interpreter.SetInputs({5}), kTfLiteOk); + ASSERT_NE(interpreter.AllocateTensors(), kTfLiteOk); + ASSERT_NE(interpreter.Invoke(), kTfLiteOk); + ASSERT_EQ(interpreter.inputs(), std::vector()); + } + // Invalid outputs + { + Interpreter interpreter; + ASSERT_NE(interpreter.SetOutputs({5}), kTfLiteOk); + ASSERT_NE(interpreter.AllocateTensors(), kTfLiteOk); + ASSERT_NE(interpreter.Invoke(), kTfLiteOk); + ASSERT_EQ(interpreter.outputs(), std::vector()); + } + // Invalid node inputs + { + Interpreter interpreter; + TfLiteRegistration registration = {nullptr, nullptr, nullptr, nullptr}; + ASSERT_NE(interpreter.AddNodeWithParameters({3}, {0}, nullptr, 0, nullptr, + ®istration), + kTfLiteOk); + ASSERT_NE(interpreter.AllocateTensors(), kTfLiteOk); + ASSERT_NE(interpreter.Invoke(), kTfLiteOk); + } + // Valid inputs and outputs and a node with valid inputs and outputs + { + Interpreter interpreter; + ASSERT_EQ(interpreter.AddTensors(2), kTfLiteOk); + TfLiteRegistration registration = {nullptr, nullptr, nullptr, nullptr}; + ASSERT_EQ(interpreter.SetInputs({0}), kTfLiteOk); + ASSERT_EQ(interpreter.SetOutputs({0}), kTfLiteOk); + ASSERT_EQ(interpreter.AddNodeWithParameters({0}, {1}, nullptr, 0, nullptr, + ®istration), + kTfLiteOk); + } +} + +// Make an interpreter that has one tensor but no ops +TEST(BasicInterpreter, CheckAllocate) { + struct { + TfLiteType type; + size_t size; + } cases[] = { + {kTfLiteFloat32, sizeof(float)}, + {kTfLiteInt32, sizeof(int32_t)}, + {kTfLiteUInt8, sizeof(uint8_t)}, + {kTfLiteInt64, sizeof(int64_t)}, + }; + + for (auto test : cases) { + Interpreter interpreter; + ASSERT_EQ(interpreter.AddTensors(2), kTfLiteOk); + interpreter.SetInputs({0, 1}); + interpreter.SetOutputs({}); + TfLiteQuantizationParams quant; + + interpreter.SetTensorParametersReadWrite(0, test.type, "", {3}, quant); + interpreter.SetTensorParametersReadWrite(1, test.type, "", {4}, quant); + ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk); + ASSERT_EQ(interpreter.tensor(0)->bytes, 3 * test.size); + ASSERT_NE(interpreter.tensor(0)->data.raw, nullptr); + ASSERT_EQ(interpreter.tensor(1)->bytes, 4 * test.size); + ASSERT_NE(interpreter.tensor(1)->data.raw, nullptr); + } +} + +TEST(BasicInterpreter, CheckResize) { + const float floats[] = {-3., -4.}; + const int32_t int32s[] = {-3, -4}; + const uint8_t uint8s[] = {3, 4}; + const int64_t int64s[] = {6, -7}; + + struct { + TfLiteType type; + size_t size; + const char* array; + } cases[] = { + {kTfLiteFloat32, sizeof(float), reinterpret_cast(floats)}, + {kTfLiteInt32, sizeof(int32_t), reinterpret_cast(int32s)}, + {kTfLiteUInt8, sizeof(uint8_t), reinterpret_cast(uint8s)}, + {kTfLiteInt64, sizeof(int64_t), reinterpret_cast(int64s)}, + }; + + for (auto test : cases) { + Interpreter interpreter; + + ASSERT_EQ(interpreter.AddTensors(2), kTfLiteOk); + interpreter.SetInputs({0, 1}); + interpreter.SetOutputs({}); + TfLiteQuantizationParams quant; + + ASSERT_EQ( + interpreter.SetTensorParametersReadWrite(0, test.type, "", {3}, quant), + kTfLiteOk); + ASSERT_EQ(interpreter.SetTensorParametersReadOnly( + 1, test.type, "", {2}, quant, test.array, 2 * test.size), + kTfLiteOk); + ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk); + ASSERT_EQ(interpreter.ResizeInputTensor(0, {1, 2}), kTfLiteOk); + // Resizing a mmapped tensor is not allowed and should produce error. + ASSERT_NE(interpreter.ResizeInputTensor(1, {3}), kTfLiteOk); + // Set the tensor to be mmapped but with a buffer size that is insufficient + // to match the dimensionality. + ASSERT_NE(interpreter.SetTensorParametersReadOnly( + 1, test.type, "", {2}, quant, test.array, 1 * test.size), + kTfLiteOk); + // Allocating should work since we should have our last correct array + // values in place. + ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk); + } +} + +TEST(BasicInterpreter, CheckAlignment) { + struct { + TfLiteType type; + } cases[] = { + {kTfLiteFloat32}, + {kTfLiteInt32}, + {kTfLiteUInt8}, + {kTfLiteInt64}, + }; + + for (auto test : cases) { + Interpreter interpreter; + + ASSERT_EQ(interpreter.AddTensors(4), kTfLiteOk); + + for (int i = 0; i < 4; i++) { + TfLiteQuantizationParams quant; + interpreter.SetTensorParametersReadWrite(i, test.type, "", {2 * i + 1}, + quant); + } + interpreter.AllocateTensors(); + for (int i = 0; i < 4; i++) { + const TfLiteTensor& tensor = *interpreter.tensor(i); + ASSERT_EQ(reinterpret_cast(tensor.data.raw) % 4, 0); + } + } +} + +TEST(BasicInterpreter, CheckArenaAllocation) { + Interpreter interpreter; + ASSERT_EQ(interpreter.AddTensors(10), kTfLiteOk); + + TfLiteQuantizationParams quant; + TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr}; + + std::vector sizes{2048, 4096, 1023, 2047, 1021, + 2047, 1023, 2046, 1021, 2048}; + for (int i = 0; i < sizes.size(); ++i) { + interpreter.SetTensorParametersReadWrite(i, kTfLiteUInt8, "", {sizes[i]}, + quant); + } + interpreter.SetInputs({0, 1}); + interpreter.SetOutputs({9, 4}); + interpreter.AddNodeWithParameters({0, 1}, {2, 3}, nullptr, 0, nullptr, ®); + interpreter.AddNodeWithParameters({2, 1}, {4, 5}, nullptr, 0, nullptr, ®); + interpreter.AddNodeWithParameters({4, 3}, {6, 7}, nullptr, 0, nullptr, ®); + interpreter.AddNodeWithParameters({6, 5}, {8}, nullptr, 0, nullptr, ®); + interpreter.AddNodeWithParameters({8, 7}, {9}, nullptr, 0, nullptr, ®); + + ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk); + + ASSERT_EQ(interpreter.tensor(0)->data.raw, interpreter.tensor(4)->data.raw); + ASSERT_EQ(interpreter.tensor(1)->data.raw, interpreter.tensor(7)->data.raw); + + ASSERT_LT(interpreter.tensor(4)->data.raw, interpreter.tensor(1)->data.raw); + ASSERT_LT(interpreter.tensor(6)->data.raw, interpreter.tensor(1)->data.raw); + ASSERT_LT(interpreter.tensor(0)->data.raw, interpreter.tensor(1)->data.raw); + + ASSERT_LT(interpreter.tensor(0)->data.raw, interpreter.tensor(3)->data.raw); + ASSERT_LT(interpreter.tensor(1)->data.raw, interpreter.tensor(3)->data.raw); + ASSERT_LT(interpreter.tensor(2)->data.raw, interpreter.tensor(3)->data.raw); + ASSERT_LT(interpreter.tensor(4)->data.raw, interpreter.tensor(3)->data.raw); + ASSERT_LT(interpreter.tensor(6)->data.raw, interpreter.tensor(3)->data.raw); + ASSERT_LT(interpreter.tensor(7)->data.raw, interpreter.tensor(3)->data.raw); + ASSERT_LT(interpreter.tensor(8)->data.raw, interpreter.tensor(3)->data.raw); + ASSERT_LT(interpreter.tensor(9)->data.raw, interpreter.tensor(3)->data.raw); + + ASSERT_LT(interpreter.tensor(0)->data.raw, interpreter.tensor(5)->data.raw); + ASSERT_LT(interpreter.tensor(1)->data.raw, interpreter.tensor(5)->data.raw); + ASSERT_LT(interpreter.tensor(2)->data.raw, interpreter.tensor(5)->data.raw); + ASSERT_LT(interpreter.tensor(3)->data.raw, interpreter.tensor(5)->data.raw); + ASSERT_LT(interpreter.tensor(4)->data.raw, interpreter.tensor(5)->data.raw); + ASSERT_LT(interpreter.tensor(6)->data.raw, interpreter.tensor(5)->data.raw); + ASSERT_LT(interpreter.tensor(7)->data.raw, interpreter.tensor(5)->data.raw); + ASSERT_LT(interpreter.tensor(8)->data.raw, interpreter.tensor(5)->data.raw); + ASSERT_LT(interpreter.tensor(9)->data.raw, interpreter.tensor(5)->data.raw); +} + +TEST(BasicInterpreter, BufferAccess) { + Interpreter interpreter; + ASSERT_EQ(interpreter.AddTensors(1), kTfLiteOk); + ASSERT_EQ(interpreter.SetInputs({0}), kTfLiteOk); + + ASSERT_EQ(interpreter.SetTensorParametersReadWrite( + 0, kTfLiteFloat32, "", {3}, TfLiteQuantizationParams()), + kTfLiteOk); + ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk); + // Verify we get a valid pointer.r + ASSERT_NE(interpreter.typed_tensor(0), nullptr); + // Verify incorrect pointer will not returned. + ASSERT_EQ(interpreter.typed_tensor(0), nullptr); + // Verify that raw c interface ptr matches safe interface. + ASSERT_EQ(interpreter.typed_tensor(0), interpreter.tensor(0)->data.f); +} + +TEST(BasicInterpreter, NoOpInterpreter) { + Interpreter interpreter; + ASSERT_EQ(interpreter.AddTensors(1), kTfLiteOk); + ASSERT_EQ(interpreter.SetInputs({0}), kTfLiteOk); + ASSERT_EQ(interpreter.SetOutputs({0}), kTfLiteOk); + + ASSERT_EQ(interpreter.SetTensorParametersReadWrite( + 0, kTfLiteFloat32, "", {3}, TfLiteQuantizationParams()), + kTfLiteOk); + + ASSERT_EQ(interpreter.ResizeInputTensor(interpreter.inputs()[0], {1, 2, 3}), + kTfLiteOk); + ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk); + ASSERT_EQ(interpreter.Invoke(), kTfLiteOk); +} + +TEST(BasicInterpreter, OneOpInterpreter) { + Interpreter interpreter; + ASSERT_EQ(interpreter.AddTensors(2), kTfLiteOk); + ASSERT_EQ(interpreter.SetInputs({0}), kTfLiteOk); + ASSERT_EQ(interpreter.SetOutputs({1}), kTfLiteOk); + + TfLiteQuantizationParams quantized; + ASSERT_EQ(interpreter.SetTensorParametersReadWrite(0, kTfLiteFloat32, "in1", + {3}, quantized), + kTfLiteOk); + ASSERT_EQ(interpreter.SetTensorParametersReadWrite(1, kTfLiteFloat32, "out0", + {3}, quantized), + kTfLiteOk); + + ASSERT_EQ(interpreter.GetInputName(0), "in1"); + ASSERT_EQ(interpreter.GetOutputName(0), "out0"); + + TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr}; + reg.init = [](TfLiteContext* context, const char*, size_t) -> void* { + auto* first_new_tensor = new int; + context->AddTensors(context, 2, first_new_tensor); + return first_new_tensor; + }; + reg.free = [](TfLiteContext* context, void* buffer) { + delete reinterpret_cast(buffer); + }; + reg.prepare = [](TfLiteContext* context, TfLiteNode* node) { + auto* first_new_tensor = reinterpret_cast(node->user_data); + + TfLiteTensor* tensor0 = &context->tensors[node->inputs->data[0]]; + TfLiteTensor* tensor1 = &context->tensors[node->outputs->data[0]]; + + TfLiteIntArray* newSize = TfLiteIntArrayCopy(tensor0->dims); + TF_LITE_ENSURE_STATUS(context->ResizeTensor(context, tensor1, newSize)); + + TfLiteIntArrayFree(node->temporaries); + node->temporaries = TfLiteIntArrayCreate(2); + for (int i = 0; i < 2; ++i) { + node->temporaries->data[i] = *(first_new_tensor) + i; + } + + auto setup_temporary = [&](int id) { + TfLiteTensor* tmp = &context->tensors[id]; + tmp->type = kTfLiteFloat32; + tmp->allocation_type = kTfLiteArenaRw; + return context->ResizeTensor(context, tmp, + TfLiteIntArrayCopy(tensor0->dims)); + }; + TF_LITE_ENSURE_STATUS(setup_temporary(node->temporaries->data[0])); + TF_LITE_ENSURE_STATUS(setup_temporary(node->temporaries->data[1])); + + return kTfLiteOk; + }; + reg.invoke = [](TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* a0 = &context->tensors[node->inputs->data[0]]; + + auto populate = [&](int id) { + TfLiteTensor* t = &context->tensors[id]; + int num = a0->dims->data[0]; + for (int i = 0; i < num; i++) { + t->data.f[i] = a0->data.f[i]; + } + }; + + populate(node->outputs->data[0]); + populate(node->temporaries->data[0]); + populate(node->temporaries->data[1]); + return kTfLiteOk; + }; + ASSERT_EQ( + interpreter.AddNodeWithParameters({0}, {1}, nullptr, 0, nullptr, ®), + kTfLiteOk); + ASSERT_EQ(interpreter.ResizeInputTensor(0, {3}), kTfLiteOk); + ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk); + + ASSERT_EQ(interpreter.Invoke(), kTfLiteOk); +} + +// Forcefully divides tensor allocation in three steps: one before invocation +// and two more at invocation time. This happens because we use string tensors +// and their sizes can't be determined until invocation time. +TEST(BasicInterpreter, ThreeStepAllocate) { + Interpreter interpreter; + ASSERT_EQ(interpreter.AddTensors(5), kTfLiteOk); + ASSERT_EQ(interpreter.SetInputs({0}), kTfLiteOk); + ASSERT_EQ(interpreter.SetOutputs({4}), kTfLiteOk); + + TfLiteQuantizationParams quantized; + char data[] = {1, 0, 0, 0, 12, 0, 0, 0, 15, 0, 0, 0, 'A', 'B', 'C'}; + // Read only string tensor. + ASSERT_EQ(interpreter.SetTensorParametersReadOnly(0, kTfLiteString, "", {1}, + quantized, data, 15), + kTfLiteOk); + // Read-write string tensor. + ASSERT_EQ(interpreter.SetTensorParametersReadWrite(1, kTfLiteString, "", {1}, + quantized), + kTfLiteOk); + ASSERT_EQ(interpreter.SetTensorParametersReadWrite(2, kTfLiteInt32, "", {1}, + quantized), + kTfLiteOk); + ASSERT_EQ(interpreter.SetTensorParametersReadWrite(3, kTfLiteString, "", {1}, + quantized), + kTfLiteOk); + ASSERT_EQ(interpreter.SetTensorParametersReadWrite(4, kTfLiteInt32, "", {1}, + quantized), + kTfLiteOk); + + // String-in String-out node. + TfLiteRegistration reg_copy = {nullptr, nullptr, nullptr, nullptr}; + reg_copy.invoke = [](TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* a0 = &context->tensors[node->inputs->data[0]]; + TfLiteTensor* a1 = &context->tensors[node->outputs->data[0]]; + DynamicBuffer buf; + StringRef str_ref = GetString(a0, 0); + buf.AddString(str_ref); + buf.WriteToTensor(a1); + return kTfLiteOk; + }; + + // String-in Int-out node. + TfLiteRegistration reg_len = {nullptr, nullptr, nullptr, nullptr}; + reg_len.prepare = [](TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* output = &context->tensors[node->outputs->data[0]]; + TfLiteIntArray* outputSize = TfLiteIntArrayCreate(1); + outputSize->data[0] = 1; + return context->ResizeTensor(context, output, outputSize); + }; + reg_len.invoke = [](TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* a0 = &context->tensors[node->inputs->data[0]]; + TfLiteTensor* a1 = &context->tensors[node->outputs->data[0]]; + a1->data.i32[0] = a0->bytes; + return kTfLiteOk; + }; + + ASSERT_EQ(interpreter.AddNodeWithParameters({0}, {1}, nullptr, 0, nullptr, + ®_copy), + kTfLiteOk); + ASSERT_EQ(interpreter.AddNodeWithParameters({1}, {2}, nullptr, 0, nullptr, + ®_len), + kTfLiteOk); + ASSERT_EQ(interpreter.AddNodeWithParameters({0}, {3}, nullptr, 0, nullptr, + ®_copy), + kTfLiteOk); + ASSERT_EQ(interpreter.AddNodeWithParameters({3}, {4}, nullptr, 0, nullptr, + ®_len), + kTfLiteOk); + + ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk); + ASSERT_EQ(interpreter.Invoke(), kTfLiteOk); + + ASSERT_EQ(interpreter.tensor(0)->bytes, 15); + ASSERT_NE(interpreter.tensor(0)->data.raw, nullptr); + ASSERT_EQ(interpreter.tensor(1)->bytes, 15); + ASSERT_NE(interpreter.tensor(1)->data.raw, nullptr); + ASSERT_EQ(interpreter.tensor(3)->bytes, 15); + ASSERT_NE(interpreter.tensor(4)->data.raw, nullptr); + ASSERT_EQ(interpreter.tensor(2)->bytes, 4); + ASSERT_EQ(interpreter.tensor(2)->data.i32[0], 15); + ASSERT_EQ(interpreter.tensor(4)->bytes, 4); + ASSERT_EQ(interpreter.tensor(4)->data.i32[0], 15); +} + +TEST(BasicInterpreter, AllocateTwice) { + Interpreter interpreter; + ASSERT_EQ(interpreter.AddTensors(2), kTfLiteOk); + ASSERT_EQ(interpreter.SetInputs({0}), kTfLiteOk); + ASSERT_EQ(interpreter.SetOutputs({1}), kTfLiteOk); + + TfLiteQuantizationParams quantized; + ASSERT_EQ(interpreter.SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {3}, + quantized), + kTfLiteOk); + ASSERT_EQ(interpreter.SetTensorParametersReadWrite(1, kTfLiteFloat32, "", {3}, + quantized), + kTfLiteOk); + + TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr}; + reg.prepare = [](TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* tensor0 = &context->tensors[node->inputs->data[0]]; + TfLiteTensor* tensor1 = &context->tensors[node->outputs->data[0]]; + TfLiteIntArray* newSize = TfLiteIntArrayCopy(tensor0->dims); + return context->ResizeTensor(context, tensor1, newSize); + }; + reg.invoke = [](TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* a0 = &context->tensors[node->inputs->data[0]]; + TfLiteTensor* a1 = &context->tensors[node->outputs->data[0]]; + int num = a0->dims->data[0]; + for (int i = 0; i < num; i++) { + a1->data.f[i] = a0->data.f[i]; + } + return kTfLiteOk; + }; + ASSERT_EQ( + interpreter.AddNodeWithParameters({0}, {1}, nullptr, 0, nullptr, ®), + kTfLiteOk); + ASSERT_EQ(interpreter.ResizeInputTensor(0, {3}), kTfLiteOk); + ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk); + ASSERT_EQ(interpreter.Invoke(), kTfLiteOk); + char* old_tensor0_ptr = interpreter.tensor(0)->data.raw; + char* old_tensor1_ptr = interpreter.tensor(1)->data.raw; + + ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk); + ASSERT_EQ(interpreter.Invoke(), kTfLiteOk); + ASSERT_EQ(old_tensor0_ptr, interpreter.tensor(0)->data.raw); + ASSERT_EQ(old_tensor1_ptr, interpreter.tensor(1)->data.raw); +} + +struct TestErrorReporter : public ErrorReporter { + int Report(const char* format, va_list args) override { + char buffer[1024]; + int size = vsnprintf(buffer, sizeof(buffer), format, args); + all_reports += buffer; + calls++; + return size; + } + int calls = 0; + std::string all_reports; +}; + +TEST(BasicInterpreter, TestNullErrorReporter) { + TestErrorReporter reporter; + Interpreter interpreter; +} + +TEST(BasicInterpreter, TestCustomErrorReporter) { + TestErrorReporter reporter; + Interpreter interpreter(&reporter); + ASSERT_NE(interpreter.Invoke(), kTfLiteOk); + ASSERT_EQ(reporter.all_reports, "Invoke called on model that is not ready."); + ASSERT_EQ(reporter.calls, 1); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { +#ifdef OS_LINUX + FLAGS_logtostderr = true; +#endif + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/java/BUILD b/tensorflow/contrib/lite/java/BUILD new file mode 100644 index 0000000000..80e8bd435e --- /dev/null +++ b/tensorflow/contrib/lite/java/BUILD @@ -0,0 +1,164 @@ +# Description: +# TensorFlow Lite Java API. + +package(default_visibility = ["//visibility:private"]) + +licenses(["notice"]) # Apache 2.0 + +load("//tensorflow/java:build_defs.bzl", "JAVACOPTS") +load("//tensorflow/contrib/lite:build_def.bzl", "tflite_jni_binary") + +android_library( + name = "tensorflowlite", + srcs = glob( + [ + "src/main/java/org/tensorflow/lite/*.java", + ], + ), + visibility = ["//visibility:public"], + deps = [ + ":tflite_runtime", + "@javax_validation", + ], +) + +android_library( + name = "tensorflowlite_java", + srcs = glob( + [ + "src/main/java/org/tensorflow/lite/*.java", + ], + ), + visibility = ["//visibility:public"], + deps = [ + "@javax_validation", + ], +) + +java_library( + name = "tensorflowlitelib", + srcs = glob( + [ + "src/main/java/org/tensorflow/lite/*.java", + ], + ), + javacopts = JAVACOPTS, + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/contrib/lite/java/src/main/native", + "@javax_validation", + ], +) + +java_test( + name = "TensorFlowLiteTest", + size = "small", + srcs = ["src/test/java/org/tensorflow/lite/TensorFlowLiteTest.java"], + javacopts = JAVACOPTS, + test_class = "org.tensorflow.lite.TensorFlowLiteTest", + deps = [ + ":tensorflowlitelib", + "@com_google_truth", + "@junit", + ], +) + +java_test( + name = "DataTypeTest", + size = "small", + srcs = ["src/test/java/org/tensorflow/lite/DataTypeTest.java"], + javacopts = JAVACOPTS, + test_class = "org.tensorflow.lite.DataTypeTest", + deps = [ + ":tensorflowlitelib", + "@com_google_truth", + "@junit", + ], +) + +java_test( + name = "NativeInterpreterWrapperTest", + size = "small", + srcs = ["src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java"], + data = [ + "src/testdata/add.bin", + "src/testdata/int32.bin", + "src/testdata/int64.bin", + "src/testdata/invalid.model.tflite", + "src/testdata/uint8.bin", + ], + javacopts = JAVACOPTS, + test_class = "org.tensorflow.lite.NativeInterpreterWrapperTest", + deps = [ + ":tensorflowlitelib", + "@com_google_truth", + "@junit", + ], +) + +# TODO: generate large models at runtime, instead of storing them. +java_test( + name = "InterpreterTest", + size = "small", + srcs = ["src/test/java/org/tensorflow/lite/InterpreterTest.java"], + data = [ + "src/testdata/add.bin", + "src/testdata/mobilenet.tflite.bin", + ], + javacopts = JAVACOPTS, + test_class = "org.tensorflow.lite.InterpreterTest", + deps = [ + ":tensorflowlitelib", + "@com_google_truth", + "@junit", + ], +) + +java_test( + name = "TensorTest", + size = "small", + srcs = ["src/test/java/org/tensorflow/lite/TensorTest.java"], + data = [ + "src/testdata/add.bin", + ], + javacopts = JAVACOPTS, + test_class = "org.tensorflow.lite.TensorTest", + deps = [ + ":tensorflowlitelib", + "@com_google_truth", + "@junit", + ], +) + +filegroup( + name = "libtensorflowlite_jni", + srcs = select({ + "//conditions:default": [":libtensorflowlite_jni.so"], + }), + visibility = ["//visibility:public"], +) + +cc_library( + name = "tflite_runtime", + srcs = ["libtensorflowlite_jni.so"], + visibility = ["//visibility:public"], +) + +tflite_jni_binary( + name = "libtensorflowlite_jni.so", + deps = [ + "//tensorflow/contrib/lite/java/src/main/native", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/lite/java/demo/.gitignore b/tensorflow/contrib/lite/java/demo/.gitignore new file mode 100644 index 0000000000..39fb081a42 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/.gitignore @@ -0,0 +1,9 @@ +*.iml +.gradle +/local.properties +/.idea/workspace.xml +/.idea/libraries +.DS_Store +/build +/captures +.externalNativeBuild diff --git a/tensorflow/contrib/lite/java/demo/app/build.gradle b/tensorflow/contrib/lite/java/demo/app/build.gradle new file mode 100644 index 0000000000..e1470fe717 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/build.gradle @@ -0,0 +1,58 @@ +apply plugin: 'com.android.application' + +android { + compileSdkVersion 26 + buildToolsVersion "26.0.1" + defaultConfig { + applicationId "android.example.com.tflitecamerademo" + minSdkVersion 15 + targetSdkVersion 26 + versionCode 1 + versionName "1.0" + testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner" + + // Remove this block. + jackOptions { + enabled true + } + } + lintOptions { + abortOnError false + } + buildTypes { + release { + minifyEnabled false + proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro' + } + } + aaptOptions { + noCompress "tflite" + } + + compileOptions { + sourceCompatibility JavaVersion.VERSION_1_8 + targetCompatibility JavaVersion.VERSION_1_8 + } +} + +repositories { + flatDir { + dirs 'libs' + } +} + +dependencies { + compile fileTree(dir: 'libs', include: ['*.jar']) + androidTestCompile('com.android.support.test.espresso:espresso-core:2.2.2', { + exclude group: 'com.android.support', module: 'support-annotations' + }) + compile 'com.android.support:appcompat-v7:25.2.0' + compile 'com.android.support.constraint:constraint-layout:1.0.2' + compile 'com.android.support:design:25.2.0' + compile 'com.android.support:support-annotations:25.3.1' + compile 'com.android.support:support-v13:25.2.0' + + compile 'org.tensorflow:tensorflow-lite:+' + + testCompile 'junit:junit:4.12' +} diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/AndroidManifest.xml b/tensorflow/contrib/lite/java/demo/app/src/main/AndroidManifest.xml new file mode 100644 index 0000000000..ba63dce5d9 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/AndroidManifest.xml @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/BUILD b/tensorflow/contrib/lite/java/demo/app/src/main/BUILD new file mode 100644 index 0000000000..512a86affe --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/BUILD @@ -0,0 +1,43 @@ +package(default_visibility = ["//visibility:private"]) + +licenses(["notice"]) # Apache 2.0 + +android_binary( + name = "TfLiteCameraDemo", + srcs = glob(["java/**/*.java"]), + assets = [ + ":assets", + ], + assets_dir = "", + custom_package = "com.example.android.tflitecamerademo", + manifest = "AndroidManifest.xml", + nocompress_extensions = [ + ".tflite", + ], + resource_files = glob(["res/**"]), + deps = [ + "//tensorflow/contrib/lite/java:tensorflowlite", + "//tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite:testhelper", + "@androidsdk//com.android.support:support-v13-25.2.0", + "@androidsdk//com.android.support:support-v4-25.2.0", + ], +) + +filegroup( + name = "assets", + srcs = [ + "@tflite_mobilenet//:model_files", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/assets/BUILD b/tensorflow/contrib/lite/java/demo/app/src/main/assets/BUILD new file mode 100644 index 0000000000..1a759f5652 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/assets/BUILD @@ -0,0 +1,26 @@ +package(default_visibility = ["//visibility:private"]) + +licenses(["notice"]) # Apache 2.0 + +filegroup( + name = "assets_files", + srcs = glob( + ["**/*"], + exclude = [ + "BUILD", + ], + ), + visibility = ["//visibility:public"], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/assets/labels.txt b/tensorflow/contrib/lite/java/demo/app/src/main/assets/labels.txt new file mode 100644 index 0000000000..fe811239d8 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/assets/labels.txt @@ -0,0 +1,1001 @@ +background +tench +goldfish +great white shark +tiger shark +hammerhead +electric ray +stingray +cock +hen +ostrich +brambling +goldfinch +house finch +junco +indigo bunting +robin +bulbul +jay +magpie +chickadee +water ouzel +kite +bald eagle +vulture +great grey owl +European fire salamander +common newt +eft +spotted salamander +axolotl +bullfrog +tree frog +tailed frog +loggerhead +leatherback turtle +mud turtle +terrapin +box turtle +banded gecko +common iguana +American chameleon +whiptail +agama +frilled lizard +alligator lizard +Gila monster +green lizard +African chameleon +Komodo dragon +African crocodile +American alligator +triceratops +thunder snake +ringneck snake +hognose snake +green snake +king snake +garter snake +water snake +vine snake +night snake +boa constrictor +rock python +Indian cobra +green mamba +sea snake +horned viper +diamondback +sidewinder +trilobite +harvestman +scorpion +black and gold garden spider +barn spider +garden spider +black widow +tarantula +wolf spider +tick +centipede +black grouse +ptarmigan +ruffed grouse +prairie chicken +peacock +quail +partridge +African grey +macaw +sulphur-crested cockatoo +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser +goose +black swan +tusker +echidna +platypus +wallaby +koala +wombat +jellyfish +sea anemone +brain coral +flatworm +nematode +conch +snail +slug +sea slug +chiton +chambered nautilus +Dungeness crab +rock crab +fiddler crab +king crab +American lobster +spiny lobster +crayfish +hermit crab +isopod +white stork +black stork +spoonbill +flamingo +little blue heron +American egret +bittern +crane +limpkin +European gallinule +American coot +bustard +ruddy turnstone +red-backed sandpiper +redshank +dowitcher +oystercatcher +pelican +king penguin +albatross +grey whale +killer whale +dugong +sea lion +Chihuahua +Japanese spaniel +Maltese dog +Pekinese +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound +basset +beagle +bloodhound +bluetick +black-and-tan coonhound +Walker hound +English foxhound +redbone +borzoi +Irish wolfhound +Italian greyhound +whippet +Ibizan hound +Norwegian elkhound +otterhound +Saluki +Scottish deerhound +Weimaraner +Staffordshire bullterrier +American Staffordshire terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier +Airedale +cairn +Australian terrier +Dandie Dinmont +Boston bull +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier +Tibetan terrier +silky terrier +soft-coated wheaten terrier +West Highland white terrier +Lhasa +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla +English setter +Irish setter +Gordon setter +Brittany spaniel +clumber +English springer +Welsh springer spaniel +cocker spaniel +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog +Shetland sheepdog +collie +Border collie +Bouvier des Flandres +Rottweiler +German shepherd +Doberman +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard +Eskimo dog +malamute +Siberian husky +dalmatian +affenpinscher +basenji +pug +Leonberg +Newfoundland +Great Pyrenees +Samoyed +Pomeranian +chow +keeshond +Brabancon griffon +Pembroke +Cardigan +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf +white wolf +red wolf +coyote +dingo +dhole +African hunting dog +hyena +red fox +kit fox +Arctic fox +grey fox +tabby +tiger cat +Persian cat +Siamese cat +Egyptian cat +cougar +lynx +leopard +snow leopard +jaguar +lion +tiger +cheetah +brown bear +American black bear +ice bear +sloth bear +mongoose +meerkat +tiger beetle +ladybug +ground beetle +long-horned beetle +leaf beetle +dung beetle +rhinoceros beetle +weevil +fly +bee +ant +grasshopper +cricket +walking stick +cockroach +mantis +cicada +leafhopper +lacewing +dragonfly +damselfly +admiral +ringlet +monarch +cabbage butterfly +sulphur butterfly +lycaenid +starfish +sea urchin +sea cucumber +wood rabbit +hare +Angora +hamster +porcupine +fox squirrel +marmot +beaver +guinea pig +sorrel +zebra +hog +wild boar +warthog +hippopotamus +ox +water buffalo +bison +ram +bighorn +ibex +hartebeest +impala +gazelle +Arabian camel +llama +weasel +mink +polecat +black-footed ferret +otter +skunk +badger +armadillo +three-toed sloth +orangutan +gorilla +chimpanzee +gibbon +siamang +guenon +patas +baboon +macaque +langur +colobus +proboscis monkey +marmoset +capuchin +howler monkey +titi +spider monkey +squirrel monkey +Madagascar cat +indri +Indian elephant +African elephant +lesser panda +giant panda +barracouta +eel +coho +rock beauty +anemone fish +sturgeon +gar +lionfish +puffer +abacus +abaya +academic gown +accordion +acoustic guitar +aircraft carrier +airliner +airship +altar +ambulance +amphibian +analog clock +apiary +apron +ashcan +assault rifle +backpack +bakery +balance beam +balloon +ballpoint +Band Aid +banjo +bannister +barbell +barber chair +barbershop +barn +barometer +barrel +barrow +baseball +basketball +bassinet +bassoon +bathing cap +bath towel +bathtub +beach wagon +beacon +beaker +bearskin +beer bottle +beer glass +bell cote +bib +bicycle-built-for-two +bikini +binder +binoculars +birdhouse +boathouse +bobsled +bolo tie +bonnet +bookcase +bookshop +bottlecap +bow +bow tie +brass +brassiere +breakwater +breastplate +broom +bucket +buckle +bulletproof vest +bullet train +butcher shop +cab +caldron +candle +cannon +canoe +can opener +cardigan +car mirror +carousel +carpenter's kit +carton +car wheel +cash machine +cassette +cassette player +castle +catamaran +CD player +cello +cellular telephone +chain +chainlink fence +chain mail +chain saw +chest +chiffonier +chime +china cabinet +Christmas stocking +church +cinema +cleaver +cliff dwelling +cloak +clog +cocktail shaker +coffee mug +coffeepot +coil +combination lock +computer keyboard +confectionery +container ship +convertible +corkscrew +cornet +cowboy boot +cowboy hat +cradle +crane +crash helmet +crate +crib +Crock Pot +croquet ball +crutch +cuirass +dam +desk +desktop computer +dial telephone +diaper +digital clock +digital watch +dining table +dishrag +dishwasher +disk brake +dock +dogsled +dome +doormat +drilling platform +drum +drumstick +dumbbell +Dutch oven +electric fan +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa +file +fireboat +fire engine +fire screen +flagpole +flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn +frying pan +fur coat +garbage truck +gasmask +gas pump +goblet +go-kart +golf ball +golfcart +gondola +gong +gown +grand piano +greenhouse +grille +grocery store +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower +hand-held computer +handkerchief +hard disc +harmonica +harp +harvester +hatchet +holster +home theater +honeycomb +hook +hoopskirt +horizontal bar +horse cart +hourglass +iPod +iron +jack-o'-lantern +jean +jeep +jersey +jigsaw puzzle +jinrikisha +joystick +kimono +knee pad +knot +lab coat +ladle +lampshade +laptop +lawn mower +lens cap +letter opener +library +lifeboat +lighter +limousine +liner +lipstick +Loafer +lotion +loudspeaker +loupe +lumbermill +magnetic compass +mailbag +mailbox +maillot +maillot +manhole cover +maraca +marimba +mask +matchstick +maypole +maze +measuring cup +medicine chest +megalith +microphone +microwave +military uniform +milk can +minibus +miniskirt +minivan +missile +mitten +mixing bowl +mobile home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter +mountain bike +mountain tent +mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook +obelisk +oboe +ocarina +odometer +oil filter +organ +oscilloscope +overskirt +oxcart +oxygen mask +packet +paddle +paddlewheel +padlock +paintbrush +pajama +palace +panpipe +paper towel +parachute +parallel bars +park bench +parking meter +passenger car +patio +pay-phone +pedestal +pencil box +pencil sharpener +perfume +Petri dish +photocopier +pick +pickelhaube +picket fence +pickup +pier +piggy bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate +pitcher +plane +planetarium +plastic bag +plate rack +plow +plunger +Polaroid camera +pole +police van +poncho +pool table +pop bottle +pot +potter's wheel +power drill +prayer rug +printer +prison +projectile +projector +puck +punching bag +purse +quill +quilt +racer +racket +radiator +radio +radio telescope +rain barrel +recreational vehicle +reel +reflex camera +refrigerator +remote control +restaurant +revolver +rifle +rocking chair +rotisserie +rubber eraser +rugby ball +rule +running shoe +safe +safety pin +saltshaker +sandal +sarong +sax +scabbard +scale +school bus +schooner +scoreboard +screen +screw +screwdriver +seat belt +sewing machine +shield +shoe shop +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule +sliding door +slot +snorkel +snowmobile +snowplow +soap dispenser +soccer ball +sock +solar dish +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web +spindle +sports car +spotlight +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch +stove +strainer +streetcar +stretcher +studio couch +stupa +submarine +suit +sundial +sunglass +sunglasses +sunscreen +suspension bridge +swab +sweatshirt +swimming trunks +swing +switch +syringe +table lamp +tank +tape player +teapot +teddy +television +tennis ball +thatch +theater curtain +thimble +thresher +throne +tile roof +toaster +tobacco shop +toilet seat +torch +totem pole +tow truck +toyshop +tractor +trailer truck +tray +trench coat +tricycle +trimaran +tripod +triumphal arch +trolleybus +trombone +tub +turnstile +typewriter keyboard +umbrella +unicycle +upright +vacuum +vase +vault +velvet +vending machine +vestment +viaduct +violin +volleyball +waffle iron +wall clock +wallet +wardrobe +warplane +washbasin +washer +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool +worm fence +wreck +yawl +yurt +web site +comic book +crossword puzzle +street sign +traffic light +book jacket +menu +plate +guacamole +consomme +hot pot +trifle +ice cream +ice lolly +French loaf +bagel +pretzel +cheeseburger +hotdog +mashed potato +head cabbage +broccoli +cauliflower +zucchini +spaghetti squash +acorn squash +butternut squash +cucumber +artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple +banana +jackfruit +custard apple +pomegranate +hay +carbonara +chocolate sauce +dough +meat loaf +pizza +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff +coral reef +geyser +lakeside +promontory +sandbar +seashore +valley +volcano +ballplayer +groom +scuba diver +rapeseed +daisy +yellow lady's slipper +corn +acorn +hip +buckeye +coral fungus +agaric +gyromitra +stinkhorn +earthstar +hen-of-the-woods +bolete +ear +toilet tissue diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/AutoFitTextureView.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/AutoFitTextureView.java new file mode 100644 index 0000000000..f204590659 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/AutoFitTextureView.java @@ -0,0 +1,72 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package com.example.android.tflitecamerademo; + +import android.content.Context; +import android.util.AttributeSet; +import android.view.TextureView; + +/** A {@link TextureView} that can be adjusted to a specified aspect ratio. */ +public class AutoFitTextureView extends TextureView { + + private int mRatioWidth = 0; + private int mRatioHeight = 0; + + public AutoFitTextureView(Context context) { + this(context, null); + } + + public AutoFitTextureView(Context context, AttributeSet attrs) { + this(context, attrs, 0); + } + + public AutoFitTextureView(Context context, AttributeSet attrs, int defStyle) { + super(context, attrs, defStyle); + } + + /** + * Sets the aspect ratio for this view. The size of the view will be measured based on the ratio + * calculated from the parameters. Note that the actual sizes of parameters don't matter, that is, + * calling setAspectRatio(2, 3) and setAspectRatio(4, 6) make the same result. + * + * @param width Relative horizontal size + * @param height Relative vertical size + */ + public void setAspectRatio(int width, int height) { + if (width < 0 || height < 0) { + throw new IllegalArgumentException("Size cannot be negative."); + } + mRatioWidth = width; + mRatioHeight = height; + requestLayout(); + } + + @Override + protected void onMeasure(int widthMeasureSpec, int heightMeasureSpec) { + super.onMeasure(widthMeasureSpec, heightMeasureSpec); + int width = MeasureSpec.getSize(widthMeasureSpec); + int height = MeasureSpec.getSize(heightMeasureSpec); + if (0 == mRatioWidth || 0 == mRatioHeight) { + setMeasuredDimension(width, height); + } else { + if (width < height * mRatioWidth / mRatioHeight) { + setMeasuredDimension(width, width * mRatioHeight / mRatioWidth); + } else { + setMeasuredDimension(height * mRatioWidth / mRatioHeight, height); + } + } + } +} diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java new file mode 100644 index 0000000000..74737a8b88 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java @@ -0,0 +1,708 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package com.example.android.tflitecamerademo; + +import android.app.Activity; +import android.app.AlertDialog; +import android.app.Dialog; +import android.app.DialogFragment; +import android.app.Fragment; +import android.content.Context; +import android.content.DialogInterface; +import android.content.pm.PackageInfo; +import android.content.pm.PackageManager; +import android.content.res.Configuration; +import android.graphics.Bitmap; +import android.graphics.ImageFormat; +import android.graphics.Matrix; +import android.graphics.Point; +import android.graphics.RectF; +import android.graphics.SurfaceTexture; +import android.hardware.camera2.CameraAccessException; +import android.hardware.camera2.CameraCaptureSession; +import android.hardware.camera2.CameraCharacteristics; +import android.hardware.camera2.CameraDevice; +import android.hardware.camera2.CameraManager; +import android.hardware.camera2.CaptureRequest; +import android.hardware.camera2.CaptureResult; +import android.hardware.camera2.TotalCaptureResult; +import android.hardware.camera2.params.StreamConfigurationMap; +import android.media.ImageReader; +import android.os.Bundle; +import android.os.Handler; +import android.os.HandlerThread; +import android.support.annotation.NonNull; +import android.support.v13.app.FragmentCompat; +import android.support.v4.content.ContextCompat; +import android.util.Log; +import android.util.Size; +import android.view.LayoutInflater; +import android.view.Surface; +import android.view.TextureView; +import android.view.View; +import android.view.ViewGroup; +import android.widget.TextView; +import android.widget.Toast; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; + +/** Basic fragments for the Camera. */ +public class Camera2BasicFragment extends Fragment + implements FragmentCompat.OnRequestPermissionsResultCallback { + + /** Tag for the {@link Log}. */ + private static final String TAG = "TfLiteCameraDemo"; + + private static final String FRAGMENT_DIALOG = "dialog"; + + private static final String HANDLE_THREAD_NAME = "CameraBackground"; + + private static final int PERMISSIONS_REQUEST_CODE = 1; + + private final Object lock = new Object(); + private boolean runClassifier = false; + private boolean checkedPermissions = false; + private TextView textView; + private ImageClassifier classifier; + + /** Max preview width that is guaranteed by Camera2 API */ + private static final int MAX_PREVIEW_WIDTH = 1920; + + /** Max preview height that is guaranteed by Camera2 API */ + private static final int MAX_PREVIEW_HEIGHT = 1080; + + /** + * {@link TextureView.SurfaceTextureListener} handles several lifecycle events on a {@link + * TextureView}. + */ + private final TextureView.SurfaceTextureListener surfaceTextureListener = + new TextureView.SurfaceTextureListener() { + + @Override + public void onSurfaceTextureAvailable(SurfaceTexture texture, int width, int height) { + openCamera(width, height); + } + + @Override + public void onSurfaceTextureSizeChanged(SurfaceTexture texture, int width, int height) { + configureTransform(width, height); + } + + @Override + public boolean onSurfaceTextureDestroyed(SurfaceTexture texture) { + return true; + } + + @Override + public void onSurfaceTextureUpdated(SurfaceTexture texture) {} + }; + + /** ID of the current {@link CameraDevice}. */ + private String cameraId; + + /** An {@link AutoFitTextureView} for camera preview. */ + private AutoFitTextureView textureView; + + /** A {@link CameraCaptureSession } for camera preview. */ + private CameraCaptureSession captureSession; + + /** A reference to the opened {@link CameraDevice}. */ + private CameraDevice cameraDevice; + + /** The {@link android.util.Size} of camera preview. */ + private Size previewSize; + + /** {@link CameraDevice.StateCallback} is called when {@link CameraDevice} changes its state. */ + private final CameraDevice.StateCallback stateCallback = + new CameraDevice.StateCallback() { + + @Override + public void onOpened(@NonNull CameraDevice currentCameraDevice) { + // This method is called when the camera is opened. We start camera preview here. + cameraOpenCloseLock.release(); + cameraDevice = currentCameraDevice; + createCameraPreviewSession(); + } + + @Override + public void onDisconnected(@NonNull CameraDevice currentCameraDevice) { + cameraOpenCloseLock.release(); + currentCameraDevice.close(); + cameraDevice = null; + } + + @Override + public void onError(@NonNull CameraDevice currentCameraDevice, int error) { + cameraOpenCloseLock.release(); + currentCameraDevice.close(); + cameraDevice = null; + Activity activity = getActivity(); + if (null != activity) { + activity.finish(); + } + } + }; + + /** An additional thread for running tasks that shouldn't block the UI. */ + private HandlerThread backgroundThread; + + /** A {@link Handler} for running tasks in the background. */ + private Handler backgroundHandler; + + /** An {@link ImageReader} that handles image capture. */ + private ImageReader imageReader; + + /** {@link CaptureRequest.Builder} for the camera preview */ + private CaptureRequest.Builder previewRequestBuilder; + + /** {@link CaptureRequest} generated by {@link #previewRequestBuilder} */ + private CaptureRequest previewRequest; + + /** A {@link Semaphore} to prevent the app from exiting before closing the camera. */ + private Semaphore cameraOpenCloseLock = new Semaphore(1); + + /** A {@link CameraCaptureSession.CaptureCallback} that handles events related to capture. */ + private CameraCaptureSession.CaptureCallback captureCallback = + new CameraCaptureSession.CaptureCallback() { + + @Override + public void onCaptureProgressed( + @NonNull CameraCaptureSession session, + @NonNull CaptureRequest request, + @NonNull CaptureResult partialResult) {} + + @Override + public void onCaptureCompleted( + @NonNull CameraCaptureSession session, + @NonNull CaptureRequest request, + @NonNull TotalCaptureResult result) {} + }; + + /** + * Shows a {@link Toast} on the UI thread for the classification results. + * + * @param text The message to show + */ + private void showToast(final String text) { + final Activity activity = getActivity(); + if (activity != null) { + activity.runOnUiThread( + new Runnable() { + @Override + public void run() { + textView.setText(text); + } + }); + } + } + + /** + * Resizes image. + * + * Attempting to use too large a preview size could exceed the camera bus' bandwidth limitation, + * resulting in gorgeous previews but the storage of garbage capture data. + * + * Given {@code choices} of {@code Size}s supported by a camera, choose the smallest one that is + * at least as large as the respective texture view size, and that is at most as large as the + * respective max size, and whose aspect ratio matches with the specified value. If such size + * doesn't exist, choose the largest one that is at most as large as the respective max size, and + * whose aspect ratio matches with the specified value. + * + * @param choices The list of sizes that the camera supports for the intended output class + * @param textureViewWidth The width of the texture view relative to sensor coordinate + * @param textureViewHeight The height of the texture view relative to sensor coordinate + * @param maxWidth The maximum width that can be chosen + * @param maxHeight The maximum height that can be chosen + * @param aspectRatio The aspect ratio + * @return The optimal {@code Size}, or an arbitrary one if none were big enough + */ + private static Size chooseOptimalSize( + Size[] choices, + int textureViewWidth, + int textureViewHeight, + int maxWidth, + int maxHeight, + Size aspectRatio) { + + // Collect the supported resolutions that are at least as big as the preview Surface + List bigEnough = new ArrayList<>(); + // Collect the supported resolutions that are smaller than the preview Surface + List notBigEnough = new ArrayList<>(); + int w = aspectRatio.getWidth(); + int h = aspectRatio.getHeight(); + for (Size option : choices) { + if (option.getWidth() <= maxWidth + && option.getHeight() <= maxHeight + && option.getHeight() == option.getWidth() * h / w) { + if (option.getWidth() >= textureViewWidth && option.getHeight() >= textureViewHeight) { + bigEnough.add(option); + } else { + notBigEnough.add(option); + } + } + } + + // Pick the smallest of those big enough. If there is no one big enough, pick the + // largest of those not big enough. + if (bigEnough.size() > 0) { + return Collections.min(bigEnough, new CompareSizesByArea()); + } else if (notBigEnough.size() > 0) { + return Collections.max(notBigEnough, new CompareSizesByArea()); + } else { + Log.e(TAG, "Couldn't find any suitable preview size"); + return choices[0]; + } + } + + public static Camera2BasicFragment newInstance() { + return new Camera2BasicFragment(); + } + + /** Layout the preview and buttons. */ + @Override + public View onCreateView( + LayoutInflater inflater, ViewGroup container, Bundle savedInstanceState) { + return inflater.inflate(R.layout.fragment_camera2_basic, container, false); + } + + /** Connect the buttons to their event handler. */ + @Override + public void onViewCreated(final View view, Bundle savedInstanceState) { + textureView = (AutoFitTextureView) view.findViewById(R.id.texture); + textView = (TextView) view.findViewById(R.id.text); + } + + /** Load the model and labels. */ + @Override + public void onActivityCreated(Bundle savedInstanceState) { + super.onActivityCreated(savedInstanceState); + try { + classifier = new ImageClassifier(getActivity()); + } catch (IOException e) { + Log.e(TAG, "Failed to initialize an image classifier."); + } + startBackgroundThread(); + } + + @Override + public void onResume() { + super.onResume(); + startBackgroundThread(); + + // When the screen is turned off and turned back on, the SurfaceTexture is already + // available, and "onSurfaceTextureAvailable" will not be called. In that case, we can open + // a camera and start preview from here (otherwise, we wait until the surface is ready in + // the SurfaceTextureListener). + if (textureView.isAvailable()) { + openCamera(textureView.getWidth(), textureView.getHeight()); + } else { + textureView.setSurfaceTextureListener(surfaceTextureListener); + } + } + + @Override + public void onPause() { + closeCamera(); + stopBackgroundThread(); + super.onPause(); + } + + @Override + public void onDestroy() { + classifier.close(); + super.onDestroy(); + } + + /** + * Sets up member variables related to camera. + * + * @param width The width of available size for camera preview + * @param height The height of available size for camera preview + */ + private void setUpCameraOutputs(int width, int height) { + Activity activity = getActivity(); + CameraManager manager = (CameraManager) activity.getSystemService(Context.CAMERA_SERVICE); + try { + for (String cameraId : manager.getCameraIdList()) { + CameraCharacteristics characteristics = manager.getCameraCharacteristics(cameraId); + + // We don't use a front facing camera in this sample. + Integer facing = characteristics.get(CameraCharacteristics.LENS_FACING); + if (facing != null && facing == CameraCharacteristics.LENS_FACING_FRONT) { + continue; + } + + StreamConfigurationMap map = + characteristics.get(CameraCharacteristics.SCALER_STREAM_CONFIGURATION_MAP); + if (map == null) { + continue; + } + + // // For still image captures, we use the largest available size. + Size largest = + Collections.max( + Arrays.asList(map.getOutputSizes(ImageFormat.JPEG)), new CompareSizesByArea()); + imageReader = + ImageReader.newInstance( + largest.getWidth(), largest.getHeight(), ImageFormat.JPEG, /*maxImages*/ 2); + + // Find out if we need to swap dimension to get the preview size relative to sensor + // coordinate. + int displayRotation = activity.getWindowManager().getDefaultDisplay().getRotation(); + // noinspection ConstantConditions + /* Orientation of the camera sensor */ + int sensorOrientation = characteristics.get(CameraCharacteristics.SENSOR_ORIENTATION); + boolean swappedDimensions = false; + switch (displayRotation) { + case Surface.ROTATION_0: + case Surface.ROTATION_180: + if (sensorOrientation == 90 || sensorOrientation == 270) { + swappedDimensions = true; + } + break; + case Surface.ROTATION_90: + case Surface.ROTATION_270: + if (sensorOrientation == 0 || sensorOrientation == 180) { + swappedDimensions = true; + } + break; + default: + Log.e(TAG, "Display rotation is invalid: " + displayRotation); + } + + Point displaySize = new Point(); + activity.getWindowManager().getDefaultDisplay().getSize(displaySize); + int rotatedPreviewWidth = width; + int rotatedPreviewHeight = height; + int maxPreviewWidth = displaySize.x; + int maxPreviewHeight = displaySize.y; + + if (swappedDimensions) { + rotatedPreviewWidth = height; + rotatedPreviewHeight = width; + maxPreviewWidth = displaySize.y; + maxPreviewHeight = displaySize.x; + } + + if (maxPreviewWidth > MAX_PREVIEW_WIDTH) { + maxPreviewWidth = MAX_PREVIEW_WIDTH; + } + + if (maxPreviewHeight > MAX_PREVIEW_HEIGHT) { + maxPreviewHeight = MAX_PREVIEW_HEIGHT; + } + + previewSize = + chooseOptimalSize( + map.getOutputSizes(SurfaceTexture.class), + rotatedPreviewWidth, + rotatedPreviewHeight, + maxPreviewWidth, + maxPreviewHeight, + largest); + + // We fit the aspect ratio of TextureView to the size of preview we picked. + int orientation = getResources().getConfiguration().orientation; + if (orientation == Configuration.ORIENTATION_LANDSCAPE) { + textureView.setAspectRatio(previewSize.getWidth(), previewSize.getHeight()); + } else { + textureView.setAspectRatio(previewSize.getHeight(), previewSize.getWidth()); + } + + this.cameraId = cameraId; + return; + } + } catch (CameraAccessException e) { + e.printStackTrace(); + } catch (NullPointerException e) { + // Currently an NPE is thrown when the Camera2API is used but not supported on the + // device this code runs. + ErrorDialog.newInstance(getString(R.string.camera_error)) + .show(getChildFragmentManager(), FRAGMENT_DIALOG); + } + } + + private String[] getRequiredPermissions() { + Activity activity = getActivity(); + try { + PackageInfo info = + activity + .getPackageManager() + .getPackageInfo(activity.getPackageName(), PackageManager.GET_PERMISSIONS); + String[] ps = info.requestedPermissions; + if (ps != null && ps.length > 0) { + return ps; + } else { + return new String[0]; + } + } catch (Exception e) { + return new String[0]; + } + } + + /** Opens the camera specified by {@link Camera2BasicFragment#cameraId}. */ + private void openCamera(int width, int height) { + if (!checkedPermissions && !allPermissionsGranted()) { + FragmentCompat.requestPermissions(this, getRequiredPermissions(), PERMISSIONS_REQUEST_CODE); + return; + } else { + checkedPermissions = true; + } + setUpCameraOutputs(width, height); + configureTransform(width, height); + Activity activity = getActivity(); + CameraManager manager = (CameraManager) activity.getSystemService(Context.CAMERA_SERVICE); + try { + if (!cameraOpenCloseLock.tryAcquire(2500, TimeUnit.MILLISECONDS)) { + throw new RuntimeException("Time out waiting to lock camera opening."); + } + manager.openCamera(cameraId, stateCallback, backgroundHandler); + } catch (CameraAccessException e) { + e.printStackTrace(); + } catch (InterruptedException e) { + throw new RuntimeException("Interrupted while trying to lock camera opening.", e); + } + } + + private boolean allPermissionsGranted() { + for (String permission : getRequiredPermissions()) { + if (ContextCompat.checkSelfPermission(getActivity(), permission) + != PackageManager.PERMISSION_GRANTED) { + return false; + } + } + return true; + } + + @Override + public void onRequestPermissionsResult( + int requestCode, @NonNull String[] permissions, @NonNull int[] grantResults) { + super.onRequestPermissionsResult(requestCode, permissions, grantResults); + } + + /** Closes the current {@link CameraDevice}. */ + private void closeCamera() { + try { + cameraOpenCloseLock.acquire(); + if (null != captureSession) { + captureSession.close(); + captureSession = null; + } + if (null != cameraDevice) { + cameraDevice.close(); + cameraDevice = null; + } + if (null != imageReader) { + imageReader.close(); + imageReader = null; + } + } catch (InterruptedException e) { + throw new RuntimeException("Interrupted while trying to lock camera closing.", e); + } finally { + cameraOpenCloseLock.release(); + } + } + + /** Starts a background thread and its {@link Handler}. */ + private void startBackgroundThread() { + backgroundThread = new HandlerThread(HANDLE_THREAD_NAME); + backgroundThread.start(); + backgroundHandler = new Handler(backgroundThread.getLooper()); + synchronized (lock) { + runClassifier = true; + } + backgroundHandler.post(periodicClassify); + } + + /** Stops the background thread and its {@link Handler}. */ + private void stopBackgroundThread() { + backgroundThread.quitSafely(); + try { + backgroundThread.join(); + backgroundThread = null; + backgroundHandler = null; + synchronized (lock) { + runClassifier = false; + } + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + + /** Takes photos and classify them periodically. */ + private Runnable periodicClassify = + new Runnable() { + @Override + public void run() { + synchronized (lock) { + if (runClassifier) { + classifyFrame(); + } + } + backgroundHandler.post(periodicClassify); + } + }; + + /** Creates a new {@link CameraCaptureSession} for camera preview. */ + private void createCameraPreviewSession() { + try { + SurfaceTexture texture = textureView.getSurfaceTexture(); + assert texture != null; + + // We configure the size of default buffer to be the size of camera preview we want. + texture.setDefaultBufferSize(previewSize.getWidth(), previewSize.getHeight()); + + // This is the output Surface we need to start preview. + Surface surface = new Surface(texture); + + // We set up a CaptureRequest.Builder with the output Surface. + previewRequestBuilder = cameraDevice.createCaptureRequest(CameraDevice.TEMPLATE_PREVIEW); + previewRequestBuilder.addTarget(surface); + + // Here, we create a CameraCaptureSession for camera preview. + cameraDevice.createCaptureSession( + Arrays.asList(surface), + new CameraCaptureSession.StateCallback() { + + @Override + public void onConfigured(@NonNull CameraCaptureSession cameraCaptureSession) { + // The camera is already closed + if (null == cameraDevice) { + return; + } + + // When the session is ready, we start displaying the preview. + captureSession = cameraCaptureSession; + try { + // Auto focus should be continuous for camera preview. + previewRequestBuilder.set( + CaptureRequest.CONTROL_AF_MODE, + CaptureRequest.CONTROL_AF_MODE_CONTINUOUS_PICTURE); + + // Finally, we start displaying the camera preview. + previewRequest = previewRequestBuilder.build(); + captureSession.setRepeatingRequest( + previewRequest, captureCallback, backgroundHandler); + } catch (CameraAccessException e) { + e.printStackTrace(); + } + } + + @Override + public void onConfigureFailed(@NonNull CameraCaptureSession cameraCaptureSession) { + showToast("Failed"); + } + }, + null); + } catch (CameraAccessException e) { + e.printStackTrace(); + } + } + + /** + * Configures the necessary {@link android.graphics.Matrix} transformation to `textureView`. This + * method should be called after the camera preview size is determined in setUpCameraOutputs and + * also the size of `textureView` is fixed. + * + * @param viewWidth The width of `textureView` + * @param viewHeight The height of `textureView` + */ + private void configureTransform(int viewWidth, int viewHeight) { + Activity activity = getActivity(); + if (null == textureView || null == previewSize || null == activity) { + return; + } + int rotation = activity.getWindowManager().getDefaultDisplay().getRotation(); + Matrix matrix = new Matrix(); + RectF viewRect = new RectF(0, 0, viewWidth, viewHeight); + RectF bufferRect = new RectF(0, 0, previewSize.getHeight(), previewSize.getWidth()); + float centerX = viewRect.centerX(); + float centerY = viewRect.centerY(); + if (Surface.ROTATION_90 == rotation || Surface.ROTATION_270 == rotation) { + bufferRect.offset(centerX - bufferRect.centerX(), centerY - bufferRect.centerY()); + matrix.setRectToRect(viewRect, bufferRect, Matrix.ScaleToFit.FILL); + float scale = + Math.max( + (float) viewHeight / previewSize.getHeight(), + (float) viewWidth / previewSize.getWidth()); + matrix.postScale(scale, scale, centerX, centerY); + matrix.postRotate(90 * (rotation - 2), centerX, centerY); + } else if (Surface.ROTATION_180 == rotation) { + matrix.postRotate(180, centerX, centerY); + } + textureView.setTransform(matrix); + } + + /** Classifies a frame from the preview stream. */ + private void classifyFrame() { + if (classifier == null || getActivity() == null || cameraDevice == null) { + showToast("Uninitialized Classifier or invalid context."); + return; + } + Bitmap bitmap = + textureView.getBitmap(ImageClassifier.DIM_IMG_SIZE_X, ImageClassifier.DIM_IMG_SIZE_Y); + String textToShow = classifier.classifyFrame(bitmap); + bitmap.recycle(); + showToast(textToShow); + } + + /** Compares two {@code Size}s based on their areas. */ + private static class CompareSizesByArea implements Comparator { + + @Override + public int compare(Size lhs, Size rhs) { + // We cast here to ensure the multiplications won't overflow + return Long.signum( + (long) lhs.getWidth() * lhs.getHeight() - (long) rhs.getWidth() * rhs.getHeight()); + } + } + + /** Shows an error message dialog. */ + public static class ErrorDialog extends DialogFragment { + + private static final String ARG_MESSAGE = "message"; + + public static ErrorDialog newInstance(String message) { + ErrorDialog dialog = new ErrorDialog(); + Bundle args = new Bundle(); + args.putString(ARG_MESSAGE, message); + dialog.setArguments(args); + return dialog; + } + + @Override + public Dialog onCreateDialog(Bundle savedInstanceState) { + final Activity activity = getActivity(); + return new AlertDialog.Builder(activity) + .setMessage(getArguments().getString(ARG_MESSAGE)) + .setPositiveButton( + android.R.string.ok, + new DialogInterface.OnClickListener() { + @Override + public void onClick(DialogInterface dialogInterface, int i) { + activity.finish(); + } + }) + .create(); + } + } +} diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/CameraActivity.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/CameraActivity.java new file mode 100644 index 0000000000..e7161ddb26 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/CameraActivity.java @@ -0,0 +1,35 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package com.example.android.tflitecamerademo; + +import android.app.Activity; +import android.os.Bundle; + +/** Main {@code Activity} class for the Camera app. */ +public class CameraActivity extends Activity { + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + setContentView(R.layout.activity_camera); + if (null == savedInstanceState) { + getFragmentManager() + .beginTransaction() + .replace(R.id.container, Camera2BasicFragment.newInstance()) + .commit(); + } + } +} diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java new file mode 100644 index 0000000000..e7bad46370 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java @@ -0,0 +1,184 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package com.example.android.tflitecamerademo; + +import android.app.Activity; +import android.content.res.AssetFileDescriptor; +import android.graphics.Bitmap; +import android.os.SystemClock; +import android.util.Log; +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.PriorityQueue; +import org.tensorflow.lite.Interpreter; + +/** Classifies images with Tensorflow Lite. */ +public class ImageClassifier { + + /** Tag for the {@link Log}. */ + private static final String TAG = "TfLiteCameraDemo"; + + /** Name of the model file stored in Assets. */ + private static final String MODEL_PATH = "mobilenet_quant_v1_224.tflite"; + + /** Name of the label file stored in Assets. */ + private static final String LABEL_PATH = "labels.txt"; + + /** Number of results to show in the UI. */ + private static final int RESULTS_TO_SHOW = 3; + + /** Dimensions of inputs. */ + private static final int DIM_BATCH_SIZE = 1; + + private static final int DIM_PIXEL_SIZE = 3; + + static final int DIM_IMG_SIZE_X = 224; + static final int DIM_IMG_SIZE_Y = 224; + + /* Preallocated buffers for storing image data in. */ + private int[] intValues = new int[DIM_IMG_SIZE_X * DIM_IMG_SIZE_Y]; + + /** An instance of the driver class to run model inference with Tensorflow Lite. */ + private Interpreter tflite; + + /** Labels corresponding to the output of the vision model. */ + private List labelList; + + /** A ByteBuffer to hold image data, to be feed into Tensorflow Lite as inputs. */ + private ByteBuffer imgData = null; + + /** An array to hold inference results, to be feed into Tensorflow Lite as outputs. */ + private byte[][] labelProbArray = null; + + private PriorityQueue> sortedLabels = + new PriorityQueue<>( + RESULTS_TO_SHOW, + new Comparator>() { + @Override + public int compare(Map.Entry o1, Map.Entry o2) { + return (o1.getValue()).compareTo(o2.getValue()); + } + }); + + /** Initializes an {@code ImageClassifier}. */ + ImageClassifier(Activity activity) throws IOException { + tflite = new Interpreter(loadModelFile(activity)); + labelList = loadLabelList(activity); + imgData = + ByteBuffer.allocateDirect( + DIM_BATCH_SIZE * DIM_IMG_SIZE_X * DIM_IMG_SIZE_Y * DIM_PIXEL_SIZE); + imgData.order(ByteOrder.nativeOrder()); + labelProbArray = new byte[1][labelList.size()]; + Log.d(TAG, "Created a Tensorflow Lite Image Classifier."); + } + + /** Classifies a frame from the preview stream. */ + String classifyFrame(Bitmap bitmap) { + if (tflite == null) { + Log.e(TAG, "Image classifier has not been initialized; Skipped."); + return "Uninitialized Classifier."; + } + convertBitmapToByteBuffer(bitmap); + // Here's where the magic happens!!! + long startTime = SystemClock.uptimeMillis(); + tflite.run(imgData, labelProbArray); + long endTime = SystemClock.uptimeMillis(); + Log.d(TAG, "Timecost to run model inference: " + Long.toString(endTime - startTime)); + String textToShow = printTopKLabels(); + textToShow = Long.toString(endTime - startTime) + "ms" + textToShow; + return textToShow; + } + + /** Closes tflite to release resources. */ + public void close() { + tflite.close(); + tflite = null; + } + + /** Reads label list from Assets. */ + private List loadLabelList(Activity activity) throws IOException { + List labelList = new ArrayList(); + BufferedReader reader = + new BufferedReader(new InputStreamReader(activity.getAssets().open(LABEL_PATH))); + String line; + while ((line = reader.readLine()) != null) { + labelList.add(line); + } + reader.close(); + return labelList; + } + + /** Memory-map the model file in Assets. */ + private MappedByteBuffer loadModelFile(Activity activity) throws IOException { + AssetFileDescriptor fileDescriptor = activity.getAssets().openFd(MODEL_PATH); + FileInputStream inputStream = new FileInputStream(fileDescriptor.getFileDescriptor()); + FileChannel fileChannel = inputStream.getChannel(); + long startOffset = fileDescriptor.getStartOffset(); + long declaredLength = fileDescriptor.getDeclaredLength(); + return fileChannel.map(FileChannel.MapMode.READ_ONLY, startOffset, declaredLength); + } + + /** Writes Image data into a {@code ByteBuffer}. */ + private void convertBitmapToByteBuffer(Bitmap bitmap) { + if (imgData == null) { + return; + } + imgData.rewind(); + bitmap.getPixels(intValues, 0, bitmap.getWidth(), 0, 0, bitmap.getWidth(), bitmap.getHeight()); + // Convert the image to floating point. + int pixel = 0; + long startTime = SystemClock.uptimeMillis(); + for (int i = 0; i < DIM_IMG_SIZE_X; ++i) { + for (int j = 0; j < DIM_IMG_SIZE_Y; ++j) { + final int val = intValues[pixel++]; + imgData.put((byte) ((val >> 16) & 0xFF)); + imgData.put((byte) ((val >> 8) & 0xFF)); + imgData.put((byte) (val & 0xFF)); + } + } + long endTime = SystemClock.uptimeMillis(); + Log.d(TAG, "Timecost to put values into ByteBuffer: " + Long.toString(endTime - startTime)); + } + + /** Prints top-K labels, to be shown in UI as the results. */ + private String printTopKLabels() { + for (int i = 0; i < labelList.size(); ++i) { + sortedLabels.add( + new AbstractMap.SimpleEntry<>(labelList.get(i), (labelProbArray[0][i] & 0xff) / 255.0f)); + if (sortedLabels.size() > RESULTS_TO_SHOW) { + sortedLabels.poll(); + } + } + String textToShow = ""; + final int size = sortedLabels.size(); + for (int i = 0; i < size; ++i) { + Map.Entry label = sortedLabels.poll(); + textToShow = "\n" + label.getKey() + ":" + Float.toString(label.getValue()) + textToShow; + } + return textToShow; + } +} diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-hdpi/ic_action_info.png b/tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-hdpi/ic_action_info.png new file mode 100644 index 0000000000000000000000000000000000000000..e0a70008b10b98162b4710385e21ac65333f1231 GIT binary patch literal 490 zcmeAS@N?(olHy`uVBq!ia0y~yU@!n-4i*LmhQHi~JPZts8$DedLn;{GhB3A?1q$3# zy0ehMf;ENz%DdlgOsX&VTfA;9R;re5T#&r5&S}!K>v6wJ=G+cHdwVzQx9S7Z&mPat zPdj!}(X~$|Bx~QVTGO!4r=@?_o?m)tzv}lGokE8HRqQ&p-_M_nHjQ#tUoyE%Bczc_ zV@ZeK_4}o(pDk&>e>iaY%|(~>{`YJ-{o+E{WSPHlyWY5pzIbAtb!UxFbJ?tvS0Zaa zCv&l_uKap(XY=%Q)BQma-G>uqb;y`8HvgR$nwk9es*UFC=T&)5eN9nEwr)s}_4ua4bNm+-zAa3S{g z#@M@52Tqb_=t`Wmh}a#vU2{cHVz(?N;`@x=+S(AA`!~J838R56)p=U|{fc^>bP0l+XkKT=(Tz literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-hdpi/ic_launcher.png b/tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-hdpi/ic_launcher.png new file mode 100644 index 0000000000000000000000000000000000000000..c22509d8dfccae14d9470e3042a9ed5b469ca2c9 GIT binary patch literal 3136 zcmeAS@N?(olHy`uVBq!ia0y~yVDJE84mJh`hS0a0-5D5o*gahwLn;{G%wnF9dQzxu zzfJA#<)+)NZ_90)mwS5~2>tHOmCn6)>bjxiZOQeE7JX0H``bMwGNh) zw^;{%e`$Aj`EQjSKfcTVx81$8@=(6j6KX6ZQZ9#50`R`DRYr z*3;MPcWzz#_2HfFtG6%z{wnNe{qCZJZP|sEzP`R%rAtTlMy#?jDSUJ!(?Q*VH$2+k z-(UFqO2G_o##LYYR&3n3accF(wzjsax!fG9cU0Zxgy4+4og6$i_>BhYqo2_!1zB+vVQRpyD_U4XBk~1bcyqV>^X}4|Karj@qM$$~I*|{xN+=caOgF=Qu#{|Yd%rCYtU{+u= zNw~t0k)6Np;ky$D91dzMk=ij^W}d{98@`hwSHGITuzOyafYhNEH(~-0E3lOF@-D6o zZB|&kPVfQ`@8JbEWSAIDR;(32vX9-mK~CwaNB1)~M#fESx3}y(D5LyLL%Mp^!<8@e zK8p#gncw?uZpZ1)#vA7@OnCU$N@%Nw!~}+CVgb9?MLFy2eY0z((Em9PPVt^F&sCJ@ zFp*HnyWDtPX?-ByskJRp^1fH(Jh#kb4sdh)A;GbI&D2EQgwtZ|BI1EALf32FIDYx5 z;qqY7bm!YXT$((G<>NV>YaLj^J@2(M_P;kTT+6b6rNcx8aIUXKdF@+8<_0{i~zK+S5<)CA=?ZNAckV#9fds&`RP>G$y z6fQGC+j0JvLnV35bL*u$>kmBaZEy&d_KC<4*v!VT!ZbLB$@pB$!+$USBqV^V{;x4{4bgZics;AH0xG{8nl3)`UICb<)9|=a)|Twzgru z?L?zhfuZ|rrrrJ1!dh_XZ{j&ugRdzkvhE0ntbQ`@pv3p;vTQFZG%&?^_KH(C)In7Y~=g5j$6ZO59bA&sh7A8Z}_Xf;pg-V zdyPMad}3C5{QRJy`Jp@B4pJJrp&x1|gx_526zKauG15)*>zl^?dqt=E-Z%;IgK!Al8) z3bDP_58of1Yw}EPOILHq-sw8>^D49y$yR#xJFqW zHR0v@biJYWv_tdpL#s8_H%6ttX2{CqKB{n7%K*sY6cHeu9W|I1n?BUyTj&$2C1%aV>-Mevk7G4FTwe4{(1B;b+B5dmawpGd{OiA( zq3D!k*RH0+vX)s=VM98nOf9o&%5(YuUj@|kblxPD_q%^w9bGs~+oPXfQnOj+a1%E3A3N5X{%8($=Ec)b3}wQKeVCiyq6XH>UkS@0+M-r_aQ zPDSfAj&6P6HKUgC%`f%_(OdR=`Oeiz$<)`M$WjTFZ!*1aX7QDA|N6@5{YRFzKB$<`}axmANQjG{y(p4H@s^vICIwEF;B)^MuxrBYZdbU%-n0oRQ}-z zYr)0S3=3yG`m)P+e$qi>Ay&?#np@8{WZ&c6U&~pN`&qoE{o29$YfpdgOg+T3g%oLV|#xtPNN)e!N2vv&PWTeb3#XOwd&d*T9_ zNzJ==Yn)E9uyfPCqHs`Aab6w+`}O5_TU2<|?tHs+B;fLV%PopC!>)yS8XC2yY&~xN zyL-9&|D)!ws$buqxPxEeNp0oKf4-J^&g=GVy&9~;$f@AcC&XRc#{a1}V|IF%x7m z5*?UK7<}8SU$Zf8WZM`iXfOZsnczuK^tWJ z__jAX2f2Ow(^!1o_WRWXj0?msNL=`TYbRTTWwU}!dVKEfZM_Tfo%w{ z@b=rwn)>~o+n(*+{_^qknv;cTWo28YzsWps<>jTN+Vf1auLYdl5I$wbgU>&mjMC2R zcz1Jix_HM6&KG|#I%d`78O`k3o_Dv)NP?%Vi?w2JTo><-7`=A4#fBXYf&W7!k9M2& zDP4$J?8aNoqrYtB2IZa7W;qtz=dM}K^|Pw`^P{F<>eVdX<@pZ^EPS?RUw;>|DdnU> z*@dsKuW!%0y)Adq&78hP`2vE1iQ3`oVgkOtx_UdwP()7d-OY{3?&@!T$%>Y@NF@CK z_cu{#>%E4Y`Txy>5EAKtMpTQqk!XCKrC&_i?V0 zG)^{ZloAmrc5uq2lsV?w{r!^0X=h>z`LC#M*52$hEq0MhXVi;nD=&TwTJV{V zpO5d|k(C-}9V4b}ImFZKtxy%!xwGj))ARk+-{0wN_dPk+F>$6&_3v+QKTn8iHt-5) z8{rc>B^Zw&MKR+M2 ze|LMnJZpfsawL0+Z{^=#Tji6P9u|t;xD>KR_uPSy{dIqL>qcz}FnFpJx+>)1v^XA< z%li+;?Wy>fd3LsWc)(#18?oXG_6N`GS{1r_oA~F6Ox9DnM78gw&VCfc61nHV>hSeD z)>;ej2D?U=vwi4iw|iG*G3mo&hhvQDubsa5G}hVqe|+;ab+!hd;?}U$Yo{x__Z|IV znxG-i?%=GzckJgHE2~SdYJPrNd+_nBM=?Lv8*K2q7Q3V1;n5uK+b)G1ZU)j3Txa-i zG0XfdO;FpQ6VWHn9+43_J@$p3YL4NttM$HyuF9pMhZM_}Rjlc9+iAclT_U1Bp+%*J ZNhP$mPH2zjV+IBW22WQ%mvv4FO#o4p>6rik literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-hdpi/tile.9.png b/tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-hdpi/tile.9.png new file mode 100644 index 0000000000000000000000000000000000000000..a84e3ef52c6dce90ccfa98f64db25fad7a8f0289 GIT binary patch literal 116 zcmeAS@N?(olHy`uVBq!ia0y~yVBlk5U=ZS9V_;y&j(Km$z`$Vb>Eakt!I=Ex|9|^1 z9+v-(64PyFB&4L6sIdn7KdxtDSjP8;_g6NHV98aB7N&MF&GZmuw`WJ&CthV@aH(F} VQuI6E4+8@OgQu&X%Q~loCIH3VBU}Ig literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-mdpi/ic_action_info.png b/tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-mdpi/ic_action_info.png new file mode 100644 index 0000000000000000000000000000000000000000..520c2dd100b092fad5987dc1b41575e1681b459c GIT binary patch literal 320 zcmeAS@N?(olHy`uVBq!ia0y~yU{C;I4i*LmhMUEk>KPaq**#qxLn;{GPBG+fG7xCh zKX`PZaNnt}i!0vlesPFF$oG%8lyFK)`mXrZ(Fac~=$b8@>UhsYfX6lIF+OE!=izX_l3>^B@a*!nbsUEl_}}1}G{dg(h(nIqo&yT; zA3U$H)CgTQ(qb|fSv9d@G3N`nPhsB-B}6pB{wDHcFnfj+j}a%9o>C7)Ozyd-BZski7&tQYtrO(>)g(*+&}Bj z(LenM^;a{5HuBWxZ*tHKV6$jmaqsQzIQ}hpH=jKV(F&cp-|*TkUGC|6RS%yO_tlDM zn02U~o~|FC)#Le@|A3{qN#>;`tQ%~a4UQf?>S)FOpP@;nfzhC2mV@zvlYfp!@u^K^ zoUEol`P_rL%+1Y2L)uy^J8ckTmG0~|}9a@=~b#do^}htzvH zsc!|AGHYhCey(nIYCYe5ZQ=pZDo$p}Ns7hQ_XW(%U)Dz0U2I=@n=wk(&16Ls_pgS@ znR9q;`<6Ue!XYfjxMxL(Y}VF>EZ&5_H8Z}ku9?MlRodzB+=Um{1*}!v7_xG8t@L}g z-m>7Mv))X+JTK_--r`&0SzC4HF#h?)b)fC2!5_~EJ3q%+@(j1T-IUqD~fx?J$cPH&$#{=hRt;ijpc{Vyia_Za$^gRnOuCHi<;;+fS(Gj&-T*sPsyCohck`&!P>ki+ToU+UOiwsS)M3#Ts$*0AZ7iB5l3 zzM*8rx~aczAAUFerQV}P-tL4))*GtSbfh>;XC3T&F4)TRze`iZFGp2oVvgRWDH|ED z)ycEoPT#|%vcKaa z*CYK$)7rCCVi_LaZ%S3}OnGTw%bg>ZwY^o0{dkB1$8XPX3X>yq7E~PZSy+;4@j>oI zv2D&xmcRMUx&H0n)L-swXx>_!7+2oA@9e9i9~M+A#&&Y-$&}`sXv6SJiD`l-**1LAdl`B}U%yzxCP!cNXil<`EtWZQ*N zwUZwED*s)@P-ik}@%eMxj!&2}gZ<`ujs$bxd_t|=7EaVw3Ijl|YvrKEBI{Ozh+f26qhfgN@Naf#a z@JSYNdA;#O4|nK+hrgQxQs-=4D4;&C;?cRgyUXWu^_?Gku~}5ovUWvKGUaEcCPVLoBYykvEvJeFsr$I z?6YV0{%fz=>$gIfCJZO!#}~-BOEdlbiF5b;M4`F25Z4?RV{@ zlPU8IS+_fwHXm&GQf1rpEo?)`1xdd4C0Da_8(GgBULU&Ta;1%&zorP+)f&6`Pl|Si z-?8?vEOubqbRkklOn7O~%C4hHnhA2J8!u;=WSL0tBsvOsY`&>;;klEJg~w9;7Z)_c z6sCNss+E{=z^;v1T&`dJv;mLxT)*8*gI2Z%^-DyqbXmZB_hZGKrxtysyYEIOoPNvj z`twh@m+9pf7cFoQ&yUmjUh{wBz2Bb30d;%hxNXkw-V^s;I459QZ-vy~iTB^DFAP}G z^k$R_oWy>USGC=U%^8mL~VTT=LDn z@_*m&7N4{HKI!=5j}=^dvpYz9ne#{rdh}C$>C_jS{=s z_3{?KnQDKD+wv&IQm?|x`+R%%C?1^Kxz67wq3MhHA=ydy{~!0&nkcue=U+*$vq_)% zOa4Cdiw6p<*5{vIa`^1NTE_KnBo+TMA1V<$U$^t-brbtz&ZV_#@saNQ$*P>&&OG~= zIq|jSgUPBo+XPj=q?_q`IMg$i_sstDG4*r0bh`8=mU|q<*5!AK6BeqsJrmydLDb$e z+oiv`=-i$LzUqBXsy_Zyd9sN6q^zaVg_+tfi>^&(*55UQgG2x6f+ue*+?&jF{j60s zx3N4fQQ6|AG-sAi_oqXu?7XLryt^TLiiKA%cqg~sZhrN|$+N)yA=wid(KovOmDCv<1l1xw}4beDTLtt?n;`%|M|xjXM2T(ZL5_?X<9 zF9oNB!m4c2>#y%$pm9xr!yc9^lt3Xnvd5bv_-G OcsyPGT-G@yGywo*_X!IC literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-xhdpi/ic_launcher.png b/tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-xhdpi/ic_launcher.png new file mode 100644 index 0000000000000000000000000000000000000000..15e419b7ccd88651bd21dac36853a827fc4075b8 GIT binary patch literal 4294 zcmeAS@N?(olHy`uVBq!ia0y~yU`POA4mJh`hDS5XEf^RCdOcknLn;{GOl>`-8ZOYX z-|F-3Be55D{APA7;1sKF*zr)Pe&&wdyVvi&cY8N`SKG5q`?}wXn5@0b9Qyo>LDG>9 zL1nidiNY|aGfn#Ue|`(hw8)urz9ZO;C0a@CJi}eCk{NG;W>}Y3B;;0p4fV5jdvj~+ zYsCcx*5>=|5(@t6UO$(AW5dIr*QAvF?8UA>o4@q zE_v%SY)d#3xY1~)kHun%jYTGmZMnC%{oTK7R~2VE(_{Mw^$83@iym=3uzQg8{mV(= z@I5_Jj3xS}4z}AktL%4W_u1ZNJHF_+!YjE`eByiV%W)W_sHX`uHqR+}H;-3g+1_h= z<>!90xG#3uQ(x&}MHkE6<0i>P?&gc$71lft{%^-G35Z>4*lubNjOb3NL) zVN%BVYQE|dvx@3}i9C8?t`Kdv@~-(3o2T=#E3-INA8Pur2nwILyk^dJ=a#SBDwbi_ zK1==(vEX3YAb+FSh0$h5Gt1}h&-)lJ^>AcXo^rGdKBB3D(nleo~qWd5fY8(lLw9J`j!IaztVnlFcmuYl!DVaW!QbjGMj zoI!?}-_%%M?c)e}Z8USH^SPgCDZOVJl{^$}M2#d{K3{YFpyv3)*zrg3qs#uA#Z`VP zq%3p@5%kR7`%ZQule%A6L+j2@n{5l7H&@@(D3|CJ-bnEH6>#^=`H{NjE*v-1_FB_Ba z`v`|0wc9L?oU=S|W13CkpYnGf64U0L-I{wl?Z@`q+Yz@r%A2?5+TGq}xo5jr{G@X) z=X}{?c{%;B>eQVlUhn(4Z`Hb0@9fV1-|Hs+uy?wPzyWES)D~T7yPHg%>X%MDPWaya z(rns(-uH2DPfQia^V@9jP$ltmrNLtD70WneKIp#KzT3a#*5l{)jN11;vV2^m#3m%O z)9&E|;rR~_y4AUKG;H`aQ|(i`oR@(1{jZ+i@42^~_bNVvr1Dctg-j=mODE2+-B#FhdN%pU?DW?m{WgUz^ZG{ z7ie@f-aDLd{qx^f9_w|JS06f5Kl8JI$@%;^141Op` zRB@Q_=wG`m<#s@Bx*)H2+)ojX57+)LsNDOSF{}FU1LnjR50&=1*Y9(f$dnV${Pl0b zjfVzrJS2()u9$sb?GklR;kOrZli@zx*Dz~dO=G`T!!pByW4T8wtK5xqW^m6IKfEpU z!Zuc|w*jjfcd@;&jM%^M{JB%c^Nkr@Tm+xw$8mMf4;Q_<&on@8)|zdBwkKXp4q>eQ z#kwKq^J#M-B_}z%=Z+Cym?9*vD^50?zq5D8wcZUMBqRPX^;oi&S@LI935uqjyebDJ1$)=@o*34(BWAmDK_8(eZ-}ZI$PYZ$l!e-nnbQ09ENi8fbuC}V`Rgs)?m3H7Y(LlC*rl|$_rf;OV|N*j|MTrl%sWzeOvXDv zL$F+&E%@D~qKWsa1C7hi#3!6kRI+bb;`R6V#rj4?_TSZ(8!82T_L_eSdJ>((=o6MA z7h%XZZO@yv_m?euw?MvRZ`aD~mgahuiyKPbMf{LiGt;TAWY@pt`}&m|EV(P4^UhSO z_zEqPz4UkgQrSy3D^|5K?`P&ceED&d8L-2v`jJK>xhWyvgez=|~Q77_o!hw#(fBr10Oje0!_vcvjl;M5pF2kZ} z!3}4heYu9EixW;y^0*g&QjDd08_UTPb3Y!=h@1cI->iEViUk5MW^+tg z6&yI>f5_=WQx)HpJ~6s-D6nB7_tR-#Lf@~lnkdSjb#2wF%b%y*KGA$`?|r3d3mPDd}a zSifXZ;k12C0w1+F+dL*+tpC2k(m}(B^Y`Rg(|bO1E|3T1kH-mPVVpKfS+Ra}>*dTxfqe z|6Fg6vVV%JOGCoUBgW4cx^kSo353!`^y_oyvP< zrfP5aVk((&deJVwB+W?cLx2A*-6hud%a+H=;L*?W2#&pda*Un|Qau_se(Gq1zRvP` zt=6zb;N7Z4XWI20YU3?Fl^eWbS)}mhtE;=i15-DX9$Vk&OOMxG-|AxQ-@tH(xkBps z0{Iu2r!zjB;NPoO#?s;b?Jgh${zc~ zdBv=moRkudxHBE@n~)p1EV}r0+N&zh&sCg3tFAH2`z~Ja|DMv`58gWyKPkI7@CmMQ zY7Cfqr9e^Mdd()~C&#TPl^pps`N!P{&soJ3;-6J?mnkgnJfsxzBSt~9_2l+NN(wrM zo@;PT`W|7~EYi}bz+v=5um9hX9UGD#mquq`RC$H=Jce=mS?)R%Y`?lBS z9fuzC2s|)USB`&hA;I-jV%V7oRqiHLhesAINh}?@@6>bj8LDNpkGRZH4VLKr&H9gl zF|RmyyV#orkGbQ73+C5Nk$8EBKO+0W;{5&Zj(#}j{$T!@#Y=Pq{)d;WXV{&ZH*4AB zzkAdL4)`CqK09k)BYUC^=bS|y5*Ced-?=OJ=WZzNTdVe2OV4G()Nfhur5cvFH)p)h zxn@>>+e|F|_P^ZQ+aSnx-}TVieWw+_KHakZ)!NDLGM5L>Zsc7(MK88$ip|8JOYZe+ zKCm4=)+71(5W9Sh!NCvq49gGN7C$?quN|^tL(n|Q+{>pK_zfh^Jbe51Z`a*C^SnPM z6$hrR7rMy$fulmY_-dfV07TxVF1HO*Th5a;mv+S=U@X8!ufqVV{|sj1rOyGvePn#yi4_xBvT zzrVlBKYst-zlLXy>(^ITS39_b&7ImE&#mCx#v{3Za_R4HZ?)%Im1ZTfd+5%2VERF$ zB3PgK&V$+y1{F~v;jH~-Z*PU_1uSr|`E$TcW%bop^NiE^zRz`Dv|)0ctl`Vc%aS#N}ohr@v!(+`duq z2BX1~&54KGdes+oPiob>^j_BIQTd+-=YJ_opYvJi^oi;E@s1W3uAe{OFYhIHY}cNj z&t_j$vGQ$YOP+9ZhE-{n;V_6iTba9!(n%?tgjjpYW-7Uga zfBg9I_K5Zi7cOvJ$dnL#YkHSaz$fj|WPiJj2~97%t=1HNG0MNYt5n#xR7xzs#W< zu_1x&-tK2>uGCHy?wp(IaBb($&(F`V2wLjptLGfUcF})Qgt^kJ#8u1+Q_dV}w z@F#E1FPWb<-SaJrpZ!=7xOm#B->YmCza~gdk4!5n`t-T-^Ru%DQ#(o>{a=-RcyO@Q zh-o$xubblLhP3N5^mlPM+_Dpu|(zAHIGq-J$(iZpI`#<7Azy2{J*vIbq%pS?kUSe!F|4^F?&U zw>LMFT}qg@O#YcL(>$Vms&=^Ek!RnNU;55rm3MsZxW_l_9Rr8b)B{r2Hl>`Lw68_=Uyp{9QdgDmgj-S225} z+)s;L=0D%==i2S}vPvEtV6>|FVPL&o;DAZClApAbC1I*7a92;lp5-vwb;eC z;O9CstFj`=$2pHCHLmaNTN-ZAx8AP&-JR~pO(`e)?Hu;RH{Fvp%e`eX@u+4oLyrsB z!}H>nH#&sC=cr+R?B_U~QZmOVQ$F`4)OnG;zt zC0=Q=hSP2p-21zu@vy2!+rOh743iIfCHQ`bHQ;{XdrW|RAMZ8ceY*C>O!*hz#$Mby z>4lu*|D+!coe7=>qG!0-CKj?ZvmU(O!umAbP4I|S*E&_#a`|7=orNE_G&3$}?KvFW xBdW|ZX})BuYYC6|T+?6gm3T8foTc`$zR>Mp>G4eWWnf@n@O1TaS?83{1ONzMGUxyR literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-xxhdpi/ic_action_info.png b/tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-xxhdpi/ic_action_info.png new file mode 100644 index 0000000000000000000000000000000000000000..fd933333b71590608d91201aad29553f9b365b6a GIT binary patch literal 952 zcmeAS@N?(olHy`uVBq!ia0y~yU`POA4i*Lm29JsRH#0CW*L%7+hEy=Vo#p5q?I>_u zU1g<&;|2+qsTBqqiz@Yvg$1;)I;_2?8c@7SsVU)cprPi9ijxN$+Bi?d<-PqdNdVxmQA}~_oPB-*-<#OVg=pAyzCjDVO!{?{|Q?25P+N-|E zNZP)8pn9#4`Ln^S2TX52>)QUW4#~@y&u!qeB%{^;XXm`T?Dh{e*E(>u`CGL2{wOMx zv(0&+!!!A6e~HWfxXAB}(h-MqxmKKD)eD%&bmy3q*^h@`r!QE1>B@$j{YT#ZDETmN zUtwmQ%)bMFET(^8c(19qNLH`0Jm~5AE4{u;T<2S|^A+%IUm;q(t=l^8Z{k$)}8trLLnE8H@ z(4_xdD_=Z1nEZG{bFz$d|6*><#P|cUztf)zdo;6TT6IdKs$V?d8Eaj8?$pvcLC(8J znzAyxZkK&@kW%ry-FoD0%QiEsP3vB7`u6;|_UwqmQK^^TUKcF4^HF(j`101`;3uA+ zrwZ1vN>1-^=~uft`TWEUiElPPSCL45$<)ZaC4Jrx#!VNxPFl>)I~TpqjrD=JdAgaV zTYs2?(C*5eYP>#%jH1t34(DVku#2DhmEWj*l_yr3@%lSO@%nFn9X?!f`+ZC=`oWoB zDLYQze`#zZ@6O64Vg;_WeQ8)#(hIyT7w=P27_adt*Ao>tlO<)G?|W_bxJL zbX8u!(r1w}b;pCV>|(#pw`yoKa4Tyhe3?GMY9sIB9dC0vb8j20lD=6G%<20~=2B)M z1m-hBHNa*o)Fz8 zCfA@ z^$uGaWISHI{#x=r(M;QG&+^;9RqNgS_B)GB#5E9tTz>M-X$n*PaqMk80|Nttr>mdK II;Vst05V>`#sB~S literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-xxhdpi/ic_launcher.png b/tensorflow/contrib/lite/java/demo/app/src/main/res/drawable-xxhdpi/ic_launcher.png new file mode 100644 index 0000000000000000000000000000000000000000..342ce34e1663960d8d7050a9be57face3571d336 GIT binary patch literal 7279 zcmeAS@N?(olHy`uVBq!ia0y~yV3+{H9Bd2>4A0#j?OUAk){`T6 zjurD2e6Q2Fx2E#QMpjBv-+^+ zp>mzoKmKcdkbUAi!EZIGH%~q*doA;W*GYvf!8)@*j~W)olGD_gw(ZDzQ`WHyC1yi<`iLPLDR*-+n z=JnZz-9huYj00BNr+uePS;ZuJfWd>&$RS9Cm2s)i0S`v5ojRxEUVW`< zUb(z#?>+4Y9R>~=Y%|!WHHan%Mrd5;op48feN5#kg?|^*EC#*(RsAN<`f*tJp2%edOvVvo96% zuAGwD(;m}lVU_&tvE*liEiBD;67ekwR|GGVl`FG7F-V*D@x__SZM z%KY^Qt9GlOt2&vyU%Bq@+y46k`O$ygKDYLa_xcraN&L4hN9`L2^E;dBCw<|)b^c8B zq<40oQ#{QsTzMyL@}e_#leU1U0PDOzQySMD6o^N5Vt*h##FhY9Deg zxWB+h`oaMOEPebN1)@;y5j2Yz{&H9>xV z>AdW{8Zo9TBKURhy>a#V7+G&UFL!q8f(_Zv)jzD66#adnUsOg?Y^}>(Mvwi1{}22S zTF|>>e&hr9XA%?S?04)A+96}Ucd^BVH*6NR=S|Bu^6_SW&vQB_lPtG8=2-fI4J9Yj zL>w<#OEBtel4NF^7%H+~HuT7o1#eqz52r*kde|h#iz<9NyOnRoJDtKkCmlFU7$+-F z;A7GGz~J#C`v25KyQ9Y$J^bzJUvK5lZ4@?0QR!2T$YISteyWD$VrJX!D_vz(eu>gH z$_AV3zJ|5whROHqK6;gJHT7el3iElT&6gs>WMAtjoWA=s;`s?lp z?@0Z2zi9d$+XZs3-)5Kmj%V0ao?gmur~BEhzh-y3C2sfF-ii0>wB7r5ZvW|fx*fk| z7VmVtT=FP;_Wdm@-f%8k8{2mG*}W&BySyyJie`$*rq|EgzD`NBLu})TyJilb=DD|@ zu03v+VCk+9+g|nd)_?vBt3;HKPcfReCtI#RiZ`2OLgoBOZjsGpH!CkZ-~ayykI*@$ zri`A)j7!C4{g&yIioDpOzmffO%yLVqW2g3rIIsWcrPUAZhx$B4c zU!S{hhpWH3=UVBS*ENg2M>Lz+Wh)%7`ThRl@$05DukSkbd-C35hO*R~hd!9!wAD6U;;?z4;k z?zM%gOeg;L%u{_WWT#LTziWc1L+h>fbqkD6uAFS~{m<(nH@+XY`?4lD{m^R3ZoIlc zY5ly`z5AQz+JAKpzPC=K)c8v1QjP8qr9I3`m(M8?um5TB<{z(4t@)pCmK$C@F395E zu4K7RxKv(*LrPYPb8S{smaO=L6GawVMEce@p3~kIEa>o*Q8?j2$P076TWt=(>o?iW zWz#5ibT)Q0{i8`CDOw)Jx4>MP9Y)?%`RrUv+LYCM`Hp zeS+5@$BJjKCDyr?!WD4CmQHlyes=Jod#8mJu{fk)_x2P zmtA1DIN?c-gk7}XWh(~%(yNR?7E-m=62;Mumv4BLu3F7^VaxJ^U;D#~cQ0RYgHfFC z;8Nu*-m;j6q}`MLI4PTK(b%(%ugy;R=`6`|-YblkVpF5THe1(Mu$=r@T9BkX=jOsZ z=JlU-`%Vkje6{7>E!pN5eBH@ENM?4CKidhLUXR(Sw`UZsY>v5FC-?229N(YFRBI=} z_ianQ6+c}v&AHojQNo|t#6PhMR`UNm{kmn#@uJ;R1(z?k$x>j~RC)VPRHugd%@4yZ z^S$M!b^TrR*3Q`W=c~*czifZpmQH*xzu<~zfB2gQQMLtF#OL|1|Jd5R;fw8tm(m4K zw=f=ddZOof!E?I*fj0evpZE_fP2Slk7}a{D73~j6@_)`scX5W`jxO)7`rmkdez9W1dX`U?VmET%rGLEvB zcB_>it$V@s)PZf*mW^gQHm~=yxXzwzalvHEp@vR_ta(C@Ilo`1dJ^-(-qE-Gdh4kx zSNUGs@wbaTeQI-e&2o!cZ5hCx!T@-_@q5j zTbBO@Kg(6&e4+DawmeoZ$lASAE%=?a+dIQ&-%KUu++%qBUXA^=?U|5in)31d)pc{% zbEgVi;xc_zT%e`vArTyZ{@i?q!lh!>2j@b+5l)ZI;#Q z^lQ7h`uLq$AE!P0keG8mf8l4NNv#~MYa`FMZd(}1>wQt>qQ^GPNGmE>D0J0ps##X!td}>v*KT=etaWfrrdRv)FAF}1+O!B{fAEKp6twyxzus| z;B|qJ!^>7yE9j}Je^I}tEq^M=&cgD3uRr4?u>~PZmRargp4-NB%xXg0*;9Onc3pUJ zTg_rZ!=leZb8fnteNsGW-?^>uYMtjfyU-$W)2pq_Q)iu%%C;@B+L>{qq;blhy)SlU zue?!n{-o^ey1%Uci-LCqoGpD>@l=2H`wU)tUj46@U*+O{&VTi(Q|^)7!^s)yoS)}v zGHXuXP}zU#)s55cMf2mFeSc`}KDRpkgu}keJHCpUbROf6Rh<0l-07(@8t)Ij`}N0* z?@DFstkdOE%n|o2C-@gzh--;zTL)ZmSf^zYR;J)})AZq1zueUacJ@DSmE4Wo>A%(BsO579zx_?Z)3ddFO}8b_4!tJO zUtE2{b@}!~sm3$*^Qgp@r!73)?s>fUWaFO;Pvc|E_^y=K+?<|TXT3V)%9eRG%Y?Y7r%jy>R*r!v8$eD7x=#wBvka_xOs+*7|Ov5D)ASPGJ+w!h-sEU8gDwl{;g&9o;p&UwH4SLqBHkJlpNb7!&y+z&LXY20S2p8eF_ zWVH$R+uzD-{;3=}YV_SOG3SAZFY_rE&HppsTKyMzc00{2$jJ6@q=4wD1BTZx70QY2 z6)pR>Crq}7DeZOnSC_tZ+HA9oCUE)AIy2+y<(o;DTJ(1F8JOSmGYjAN>DHWinl8fc zxY)(BnB)U*KA#@z^V~J!o1MkZKx5NeY?FhkxQ@^Af9c3~tu{Bw`su$@SB*lrD&Bl$ z<4v5`YE{o6sns;|e2{u9tN5jTi(+1wpKm;S|H^_b?_NAgyzxQw?~l{>?%$q}de>c; zf4S|{oTmqv%VHPWZ7x?hUi#*|)h*#Ir(*WHACSGzSzl>7=l1kBPA`7#sNe{zcQWXi zv-sM@1DaL+(-@`t0^hWJ+uZc*jGx6mvA%U}>rP+&YjNs9_QSsS4c8n!7-deJug~*T zJab5-MalN@^KEmv%9X$(Ku52AM*uPhrF<>3VNbfZr3a? z&%Bm(&8v7!sCC^!P^@8+xyX@nIZyxhJKjng zw;ivm;mRoDPZU#=S7P!yU$pPId*VmyFBddlyNIn?CHmVrTJ4_jv;AIr&ouwv%c$0g z;rJYTtWk4I6uTz-&2P0gzLcI_Y<@w)yy!{a!izdRv*+;y%g;DI`OWHL8TmC0iaKkR zTbFx$KAcyxXP(oxcb|XVy_%%%VkZ;o`T2apr`iY4mcR0=*7JIAv9sM^^Lcx*$5+*9rK;z##iqf|I>HW*wk{oHV!g(Q@`u)`?cojf#ic+Vuy?xR;XvZI2)B_E6%5T z?S1dO2=?dvPAgB(PraDGXWl-;qJ8szT)fjhZ(-B{;Zuzrmzo4Mm)b2_(I~g_zxccY%K#^)S58Zq1Z~5LrWZ!#b#|@f z6R8g?JfXOD+xJV;=028OvF_Ew!dC_l7W^s^$UNy$y0qQ>>4eKwmz+YLtM7I!ZG5cm0ZM3zh%=+*rJ4 z)%DCf+wThXYlj*1>e<7X9(cx$a$`~&GY?q&jNjqGRwIcug?8>u)cO7tJ=D&n=4kapFH$1 z|N4SAhbPx_d{%swCMmS=lcQJDNrtB_J+s;j>$jwN6`YWp_s4mL?(PPQ4Hd`dy!A4O zbXz05bh`bCGwsL1O1KJgj<&a|i#o)rGBA6jbX-`bIopdpxGXOJzH*}V&D3OcL!Iih-cB8xv!ZgGp*YZ zvg!A!o-HAVH>mw|m|4WZ{z~JPT3C##z_upVM!)_Xj-ojl>;4qaUL~_$HLc(Jz)puB z0>7Bw&b=P8dF^|vzQ-;5m4p^tWl~SITcC5vi7_MNeTeGAkgAXs7xw5KdsZWQV9)Z# z4U2ZW#QiY(UE6AD|IKl;-qYRf;XfyC(_~&J68?K0uY+T^jFPSctAI&V2IH+;yC+OK zJk81HqTJ!MV`^tynT{BDEGiV=d#_kJwwpcrm*3H>PqM<(YQjI=*T0zh=X$h5M~SYU zjiH0|g_XOOuV|e#?N#j_g&i3}7F%q-?>s;E$Dil+{~!Ct&ojGh(X#T-iLhyfqQM8v zo===qq{zC>%$PV!#Dk-Dx}CETE5T`=2<`IjS3 z=|3N{7A0M6e|e+pcAcQ4?VI=$RZ}A~uC2K#IoGQ6Roc$NLq`_(+x@yQLD9KlVV-?K zfQJa%qq5x}tvZ7md!n{xUES)pqJX(JuDw=Yh9e_pqv%CVsXZ>Yx8<706`wVo>}UV? zi+1zDgk4|v^O}2kdvjl3A8$XscAex35ni6#nmQ`CmKz1Io)A*|rDN=*tNr8k)z#&1 z-)_JE&qqPPV_ol&q>Y=N-`bkJyyV4&g`1b$ThXf;dQ9=d%jNU8dHeeM?o;R6=-F~O z@%7v7_v?bH!~e{9I=S5T-;c-rbBfPdns2PB;Xl1}-efi3U&&`?7#h1SnN0LevVEQ<>S1lT{Y~5ndeOvh{e!X-lVhJv}{Z)O=@cId|^dHk*Q=iy0<2E#Le& zbm-8cjmgKq9l8H@@~8G(tGn9F>U*ocz6z_V+NGp)|K!9>#UdB(DIU_1WkL<9J3gJ# zUcVw>q0_AEuXVMR@88~0`1qP}+8K%9fPe|hJyK5Wys_g;z2JoPoEg70ec}WryKP#M z#CnVA@<%??UBU6yZ?{(aTfg0sk#JY*{S;e)WkO4+Y<~qQWo$`3|$@e)p=UYj7Z<}PNLdjXWV3^_I^HR zZO?sjaa+nl3xUa2)3TV=KKgSkF%F!l-fE!mn`M?+Zq(OTtJl{lXlr*zE6zzewQP=U z^|v>F9{1a`d9}a4%rRwyVgSpB&vR$aoLLhedF+9R%1!lfx6~VLPaI7-{{Q)W-qHL{ z!Qr`~t3p=3{Qmo=-z1Gr?R7gIH9k8#yIjtu;)C8))!rQoHB>GMY*(G|Pw4IZMN*kT zJSXksGP=VJOIvSjJYz9O?#4qEp%T~Qj~+c@o}^o}^s%xT3)iXL-ZM2XUAk1lr0S`q zq7(L7$?R!}vMzS_N~bwC3r^(yx^zn^?Q^*yZ{Y4s zhmI%gPH%5-pI`QR?RGt@?{|vzA5K}#abDdde6s%jKc8lW$JdrNnSDCT&u-HD_$GU> z_bXTN*e{Z7d0b9A^_r|yukk4!ImzDDzu2Ppbh@~Acx+cC*;cK;T)R|TE zuvNV7)Gp46!7)shlWYInumAtoSzx~SCgv#;sRgSV3*>mS_qcJCocOf6I8m9|YpG6U z@G_rB28{Ou9!2#ZeiFF z=6;(`Css$yU%4t>UbW-Wewy>yw9hj;J50zFPf$ z?|0E_p)YNxJ3L)d8&~ym>E`U~>;5j;x9D_&`kQ-ut528TuQm5MZKrxG>*Ghhrzzec zVojV9yNaKm)2$Ly>a;LO;G5Sn!>+xwj5DNen{CDI-0ihJZ-f()Umu#eZEybleYu8< zinqKGHamUt#G*MVic{>j`_H$_O?)q`v?5e&(OUk5voDl$BNdOTUZ}sb>Bqx%`B#Z& zm&=tl<=sx*pY7K#XZ!3`@N`G*U0Omx%Yvr1Mm)Q^I{f@o28GHJp~Qn5?`^+d7u{cK zzPI0IS^9h_)2tOUS8_dmF8e(8$b=hus$pOac{lEmd*mseo_1o72zB zt-D=y(&+uk0WrW?JF@nOtIk8QGHJ8s|GTm4lTv;g_c3D_O&{>jIp6RxX{LERc zHfm||a~@>d6+Sv5?cO7i`1aP;(@9n7hd%{ezqHhQIj@vS#zeEysZ+LpdzdcEH0e60 zZpb1Q)7M*4PEJ}`ked3u_1oLq>0e)6JuPKb@}f6KSa|b<<0cP#iY+1wV&5O!R@{5M z>FQRuKAFs?PoG9wT3R06|Nmb#`||SJgo*Mk+uJ`0>%9>A=yRK2nIu%}Yu z*Rl_vo}S)rvG=~}tnLf@%WCHLl)k>Z`+EY9&^(=hrdgLI=PysNeZ#tKqW6Nu_d7Ta zR_-~_YVt~MlDk#^%Nl;E?dKH_Sw3ocJ?ZmRt4@yPF_A9!)stltg}!Zgmo+^u_qK!Z zBnQRGns4yRXd>Sw;CzL4RO z9K)1~XOmOxR{Zy^%vQX6ZS%SdPNy&O{i!LFH{{*jHF27>``z!S7_u0}nYS~qZK!S#J8y+2sCKF`@uZxB%MaO?PN_{e0*r-+Jl~3Q{yxniYPfDGgxSm5* zW?$5r_jjJZo_*|C?rn)@N1Pm%`G|%-nYmE+skQDCX9uwfydmN#r_Ve2aYy`(`l6US zA#)aU@)8LTr9;XAg6cB+U#zI#=J2~lmect50aby+SGIXZ{GVQMnAiLci1^2(Q*cvD U@o8}!0|Nttr>mdKI;Vst053@Zg8%>k literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/layout-land/fragment_camera2_basic.xml b/tensorflow/contrib/lite/java/demo/app/src/main/res/layout-land/fragment_camera2_basic.xml new file mode 100644 index 0000000000..a84f1bbfa0 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/res/layout-land/fragment_camera2_basic.xml @@ -0,0 +1,50 @@ + + + + + + + + + + + + + diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/layout/activity_camera.xml b/tensorflow/contrib/lite/java/demo/app/src/main/res/layout/activity_camera.xml new file mode 100644 index 0000000000..286e549c65 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/res/layout/activity_camera.xml @@ -0,0 +1,22 @@ + + diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml b/tensorflow/contrib/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml new file mode 100644 index 0000000000..15305c436e --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml @@ -0,0 +1,45 @@ + + + + + + + + + + + + diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/values-sw600dp/template-dimens.xml b/tensorflow/contrib/lite/java/demo/app/src/main/res/values-sw600dp/template-dimens.xml new file mode 100644 index 0000000000..22074a2bdb --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/res/values-sw600dp/template-dimens.xml @@ -0,0 +1,24 @@ + + + + + + + @dimen/margin_huge + @dimen/margin_medium + + diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/values-sw600dp/template-styles.xml b/tensorflow/contrib/lite/java/demo/app/src/main/res/values-sw600dp/template-styles.xml new file mode 100644 index 0000000000..03d1974183 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/res/values-sw600dp/template-styles.xml @@ -0,0 +1,25 @@ + + + + + + + diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/values-v11/template-styles.xml b/tensorflow/contrib/lite/java/demo/app/src/main/res/values-v11/template-styles.xml new file mode 100644 index 0000000000..8c1ea66f28 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/res/values-v11/template-styles.xml @@ -0,0 +1,22 @@ + + + + + + + + diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/values/base-strings.xml b/tensorflow/contrib/lite/java/demo/app/src/main/res/values/base-strings.xml new file mode 100644 index 0000000000..ab7d3fd496 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/res/values/base-strings.xml @@ -0,0 +1,30 @@ + + + + + TfLiteCameraDemo + + + + diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/values/colors.xml b/tensorflow/contrib/lite/java/demo/app/src/main/res/values/colors.xml new file mode 100644 index 0000000000..4b75d2b2bd --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/res/values/colors.xml @@ -0,0 +1,19 @@ + + + + #cc4285f4 + diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/values/strings.xml b/tensorflow/contrib/lite/java/demo/app/src/main/res/values/strings.xml new file mode 100644 index 0000000000..a08ec3eb62 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/res/values/strings.xml @@ -0,0 +1,24 @@ + + + Picture + Info + This sample needs camera permission. + This device doesn\'t support Camera2 API. + NN:On + NN:Off + Use NNAPI + diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/values/styles.xml b/tensorflow/contrib/lite/java/demo/app/src/main/res/values/styles.xml new file mode 100644 index 0000000000..3f3bdfb494 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/res/values/styles.xml @@ -0,0 +1,18 @@ + + + + + + + diff --git a/tensorflow/contrib/lite/java/demo/build.gradle b/tensorflow/contrib/lite/java/demo/build.gradle new file mode 100644 index 0000000000..b78a0b86c9 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/build.gradle @@ -0,0 +1,23 @@ +// Top-level build file where you can add configuration options common to all sub-projects/modules. + +buildscript { + repositories { + jcenter() + } + dependencies { + classpath 'com.android.tools.build:gradle:2.3.1' + + // NOTE: Do not place your application dependencies here; they belong + // in the individual module build.gradle files + } +} + +allprojects { + repositories { + jcenter() + } +} + +task clean(type: Delete) { + delete rootProject.buildDir +} diff --git a/tensorflow/contrib/lite/java/demo/gradle.properties b/tensorflow/contrib/lite/java/demo/gradle.properties new file mode 100644 index 0000000000..aac7c9b461 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/gradle.properties @@ -0,0 +1,17 @@ +# Project-wide Gradle settings. + +# IDE (e.g. Android Studio) users: +# Gradle settings configured through the IDE *will override* +# any settings specified in this file. + +# For more details on how to configure your build environment visit +# http://www.gradle.org/docs/current/userguide/build_environment.html + +# Specifies the JVM arguments used for the daemon process. +# The setting is particularly useful for tweaking memory settings. +org.gradle.jvmargs=-Xmx1536m + +# When configured, Gradle will run in incubating parallel mode. +# This option should only be used with decoupled projects. More details, visit +# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects +# org.gradle.parallel=true diff --git a/tensorflow/contrib/lite/java/demo/gradle/wrapper/gradle-wrapper.jar b/tensorflow/contrib/lite/java/demo/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000000000000000000000000000000000000..13372aef5e24af05341d49695ee84e5f9b594659 GIT binary patch literal 53636 zcmWIWW@h1HVBp|j(AqTBoq>UYfeAz~Ffed3FfjPKhB)ea`nl;dGoUKGK6OTrJp%(n zC<6n72(m(7M?X(D*WeI6U$@V`XHNTg>*`(P_14uocjo-&AcHH$51xKHqkF>htnXQE zPaQ_CS8XZNo-B#d+##;I?8%a(6Nk1+y_977`l*N!$wDzSm$5J~Fyt4dqc{p(4L4Lx zdQoCZPAXod!l+8iixLY8Qj0LOWHhCuEoNX~xXQr5pp0FMOMZD?PJUvFilJU|PGWI! zZI3V4Ap?Qd`x&ND+GYAp+}GRY9h5In)U$ESan9lN^jx)fHaGu+g-1jRU)wyhl{-_j z{`+H21?NKtB$AwJwwX^qUAs~>ao5(h7sEted);A8+-AIU+dh+58najHN~pE8mUd~Y zbLm#Tc8T>qUSGeuhry}Hz-?Er*gbE54{NFGhcxPTg&5^?e72uZA}L^7vs}LAf)bCD zn*^JDd+%Z1QeD|vv`{{Jul1S;69Yp53j>1-0Y3&;7MG;v1{CENq!yKArWOYj6s=9Oe7Czj+FK>}fEaBjYkqd=XQM*H!Lk5(qEoqFq9Rmjt>{EG=voV}f#hQ6zO zee81+nbX&mr{qukwEjzXuE|ICAB%f7J4{tz6n=mB+#8$EXKbu@e}DY^`g?{`6J7o5N+qTDTjJlY+Qu>E z`U9;gJl9?;2yA-xSwrYZWh8&<>0HlyUs`WZ-s2onuK$L$?!@fbpSi;%rbRow4c-~2 zF*|vNI2(_&-^PE9^-uB?rdZpqOnP9)?6J={*n8(9c46zgi5h1-Qw;bIt;^{MpC{-4 zj7cj~Q_Lu0iBxflq0Y9cuclc2I`Ljfk=JHJK$Ca*2lgEgvQmya+&tuLb4gZCy5i7= z-}Q%6Py7wDWL$G@4uf- zBR8dxi0tCXsgovmv;^C%GJLXpi^Zy!uj6JG`mDJapXOL!c;w64OUhA;=6X+y6A{~X zUf{QnA5Ub|EoW& zT+EIEU*fhKnR!{iDzTf*E`Ok1$a<#|8 zo`;{CqH!}(yfyxmNpyNp%VuT8h`ULFQ)Y&x^V^=Tt~K1a;(LqZmOqhaUppU)m_21_ zn|jcOQy&%yw3$jB`jD)@)9dE9Dcw6%dj2hb7ToSo5Wv2(MJ(oYu6OC%8((tXAE;+W z^(o8Bs=F~v3=DxR3=F#1ed-P_utJMdi#+mkQ+@LDvr7vgp6(5f_Lp)L`R6t<_S)B= zH5yyDWks_@-;8=2diludRh=Om3Y`8at(@!hcBe$<{0ZNyl9hGoxZM|bQ`^Hq+9#TK zpWJ!2d|&aiGiTo0+t)Eg99q>Bc;;lqq6eDuJUcWw65`xqesl`BZBTf)vuOguUV*AW zx1bE)L*AU;Jd>k;ef#C86@TmGy8NYIE;;(0pH_Bxeo0%ws~Z-kCZ{*7jyhNDyL;Cu zgS)-jsheK*%sVUYU0pRXG2`+^>3NCOtcSCT9_??v{+-*-WJlVqW#1<*w4bM8z+yI+ zOSNZ-&+PZ++3k1EO1F!N*IJ|=){Tkc(-vyMYE{rcY(ASFXsJrySzo~PAW(2 z<<%A44*Iuzdp>EzPB&n-I|9Vr4A=dt(Ls5DD}QaQEY#$ zs?grNlNI_3a|6a`L27t$Yb+A3W_r=oF`2dx#(ehiJ4nGAwGJ7#>oV2i#~RhQ27Q;8x`3@CURRR z`$wgGF*#g2{e}L!zfD!G4ii_f?Q3bCKEcZQ&=0oA8P{8tTzHhiI?8zZJaKOQrlH+$DSlbowk!6lAgwq~`Z$xjQA^Hr1nrBWfBHm%E4^K8KG?#0ckriQVVhW;=% z?z*=;V%Co79dXqWi>F4M+ETP}&eD`pQ?oqHpL?>7-qKI`lH@2IY;6*odphNc(?V~< zDRZ&}SNIol2;EwgCo@Y@xJdWi+KlWfon)z*i&9s%$$D$-(zDrSa;E#&j|Y{e$(Lu$ zSBx&3eO97haB^Ono641)lh}CE+e8lsd~JH%nAp?dv-0ypLu2m5jtf(m&R+S?ufWOp z@Vx1!V?jM@EMKv`o@93Z=G(|4!Uw!hJiDXgIJ-c9%G%f&|2zNW@XE!=CT3gREt>Jm z@tX75+fvdkwHH@E)4Q@A2trUq94dJz9F{Ks=~IWh$NiWNa}gJG5mbb2?_BQ4ZRaH4s_S;QF)xY zM=9~Lm+SNg8;_;N99dl#GwGvL-1LbDg6sAuKKAw56#wq$J58yin&+Do7aG0oD?TFA z#UYbl_(f^js+6{?IVmw0bCP;_W`?D#+o}9+j%$vwk<0hOReRq$)gA9IPuDwr3<`^{&@$UJ+g;{i2_L`o0}g zir0UCHuZ4AfoZE&do{XU)0 z*X9MePjVZKQtxC-Emt(n+sWmhd^dDTY~hr0>#VqSw|C^Zv+{;t{va2mv`%W)2Jr&F zL%mPtv?%kRH+-{L;yjRcKY07c1?{jC@-@5OZ{==Cec7J)!8n2Md;95KL*sUGrZ{|8_#K|pt zW##_s-T6eV_59U41D57|k}`|gXRZ9k!qJv-IEaNzM4D<+8%=nY&u=&#jfOJ0$!3 z-}cSZEobiUs9Lvs#&?b48wY&MgEn!p&-mDEv-F4gTKV5U^Ir+_Otfk`KEd7i{*mS# zEnz;71Z=ln3}=d8EPHAmE(ecAfPJL~U(SF4~v15Dq_X}NCobuRU z>J!VK>Kgx5G~9RGj&`L$-driMj}l-?@NS@6J^lRwlj zltoV;ZkZmQcZK%H&)S%O-&uL!tdmrCrcKI)Z@Km#-h`QNh;2{YP?%9?#Cx2p zXYJXq%XwPw{pxxd&9ijH@?@*r#h#|ib-(X5Zf0&-wb zzIocxz>Al)mOs{8xAM*ES?#O-e_Q3|zI#%-%T2+R+tVK^*WC~53z7G;cR%;u)@5qO zjujowm)}-ZU*n$rzAj|zgUEkUyrMi8uzoqA{Y=C7x93uB>-|_WI_ADFSo4C#OFuj1hjM`l|0^UhOZInZ;9L zX3SMMd$!sIX)IuD3n~{CbgHJH#dwyQR?-AJGqv;rAbfQ?*Bb0a2 zB*$r7uXQ-Zlwx(|&R=fSv>jM0oB5Q5fkBds zfx!TKiwC{G>yen3k^^h*^oG4I77i8ppF2C{aM|g_0bX9M988(NIvPZCxU?>KE#+4U z&}rJ`n?JcV#XQGsi>T=QA8vmS@$?-Ix3g?MEOFr6iSqv%U(3xGp6$MsR`qW0^V0J_ z=N6xTbN~OJpW+RFe?%VE>se9Z>@4hNGUv#OE_2f#s-LH@i6(zC&?^_#QVX3E_HkNU zSPx5~+d59qb6!P$&3!8s&Sh6fU%vFliTmEcqhc|Kc2C-)u5Ycr^nJF0fwE`NVV#zc zdD)I!nWD?OPIECiFH7C!E^6YwYq`jezT2Cf4z?Ly77}iK(^GIx%2=}K*0DEBCeAdH z37&V_XZ~!n&sEsy269eF0JJYCpNXXA`=*Kb>rKR=kIRDWrMM%g1Sv*j(( z+MyeajmmEBNIT+VZI+bgvS)XR&WR$x7YrtwKhz5K0GsYSa~y1YNHYSMc)PpovouUiYNHoUTQe_0TF$S1Nby5L1} z#n+^DH2lmkUFG-Mh13xk$-6mbzsLJJZ5KIIOcaTZI`IZ{BpWjNiKNZOik^hq%&nlBaF@ z;8l9KxaVmHAA6-|j4W%+WX7}FH)h1QJxr=!Vt;FGlH+kDy9h8Pz(Y7w2(EE7*4^PLExBN33uYB9jv{%TR zPki&8moj?d5p!;>o51X@wm#u@*NRW3-n{ij$OZK_m-RDuex#arBQy#~&zns=D zHT}CFdwN99Opm=vri*x$Ry>&FYyE9cjZfSk+4B|6H7h)=XMg?>sk*eS-~52(G=X(3 zTY`FZxT04}6y;4z7m%-=T6HuhYxQ%ZeOo5Z7vL^=xOvY~slKNR%QqPPdYpOKvtu=D zqN;1NOqg4ytlndP_6_;xrj&kLF0=c9+=$lRtu~J)35K2Wt>!iNnldK5c=&{=%uZUf(to**H3i;<-7j>KekUl1Xe@fo_pOC#pVB<(5a%pG>vxOxsxSHG zwRUbuO;3UGx%i6ix$T%0ZNWDoUC8+E!!b1EpSZjReQ%B zJ+JSAeR52f&VLoC$`o~-Gw1V@7^PL$Uw%}x--0k1w=fA#y_mTNMx%7!n zKSZ4^4n2x_^!-}k9F^F=>*U}0mjXnN@1K5LYrWyI0-qVebGcl@+wu+7+EO?L(yAJpr_+u_ zZ*A=spSNAZPvx3ga$8b_jI;5Sn=e~}9t($;9{FN5<)C)%@>_gg)1+*3+fvTYvy!qm zI&vfEUk;y(kCdd$vWZSnPR6~}JMh*5%X~w92jp7b;AyBQn3P3!VE_PyrsGw+=-H@-i6`~QFE|C%$-Ikx|iMb{y@ zIY)zJ*z%5O?p1IOY$VRKEvxPqkk_o&s*dYk-_56+gA>Rr3J_T_aq@H# z6}P+$^Q}37#@shGr3|>Ai@uTF;&c3}PkEU3v=Y|eYHG*l%gD{SXSH>GLiW!^c8_0t zOU;fnsWEPwT6;+2m5E^G9RuF)i<*vwd|%UaVZqepxBdwpzH;2_H1{sswYnmuAN3!k zo8F#KxN!c#3{NLQWb{oro zURkwm-p6#e>bn|Ka(qk9m+xPDT{z!!$CI4G-J-0eXRnm5P}sJ>^4AQt&Yq{4*TiS% zoKBM1w!iC?d<^R)(PYJem`e6G#8=i4lORwY82#|QZFgL6{T>R@)1$)8c#Rm`F z`n9PvSU#?_^zz3`GDdsfXq;)Alzgt~)NjkzJQHT`nmu>snYTilmJ}b;%bQpcJo{L9 zL>X(Llx#xlo;T(1=Jjg*b`A=Vo3qL5+akT$DZlvmzMkY16)y-pT4^J9v_FLB@o}wr zhfPI`E254DOFVYHmlt`||3g<7tIj8{>x(2bB#-XNTm56@i$6T}>28~}>J(RV|5(}; z-}xnb&!PuscBk*S@xxbSf2d*O^2lA2{l2MqUypG$+*jZnU$Xee+^+kN`S<)-?A-76 z(OtCu(EQLfk3&W455|jbsc@h6QqMX;q-fb{ah9*yiwYRmb3fYD)Lc4a!Gg8BtRszL zlOykcOaHwfRO{K~Wlzej zFZ|C;t9yL1vj3SirCoV;>CS>zuNqvWPO{%qS~+Kb zvBuQZ+-9-wQ&+t6(Z6|3w=#P#i`hrx#s{B&z4}pV7|Cw^dDn4G-JEX^53h`=-Tx_z z<;|sdolMu|$L^&yt$(xT@~p?NPS**k=X8q{TrXoZ>VEv^p4F-R-ELbR{jK0xqZQ73 zqM$bK_O9vnWnrfb-)+2D^hY7QzaY$_{j#XW`l?m(k0hS+lqei|zq!t9)%q4jo+2@3 z=f+0$6`H#W4JVx2)7Q3YYHbUvO6F6GZwoGOF1c}?Pp4C51IrPE+@EIuz5N5vZNEIz zNJ^WdcUiR-NB`1lV^NoLt!4sqC+|-ZeRI>HW?hrox+_NC)V>vF@D^$$*R!5|erWbC z_OcghWiP6N{J#dIFtb0uwD#dO4_4jR(GRyM9x^-{uxq1yrcB~A_KVG-Y2i-0-!$Lj zdTXXv*dKGy`@}{uUYk;3|DdcVk(2#zCHtIJ`075R$>8mqqZ&&&On=^CJ-Opxz0;ij zrJWl#_Be~m$a%|f?r&UN+&gpS&VctciA!Lso(tb7g{}246yP1U>acD zSjZpRyeGZ!{%#v}_1{9d-#=W?n0(Fd+U01s<7|CrTRWcGimJ@^pHf&;Tf!V$+$p@~ z;z{;-Pao@)J?~rdwOGbM*wAFM)m)u2@zc6L&Kd3FxMkR>IWHqAZJD97PVAKQk3HhP z_r-qM)!cBtuMp45cq#je*rKf|Lj2PUUpf7bZ83g5^Vm0|psRaS_GHXU>UGPgoU&8y z`ychco1^8T3XXmjSb8kf=Jmzd^A#KQXWvR%-uv@~+*`qCb9UWuY`1-FE4%RdX^(JQ zxwMpokVLm7M$6KUO;-4q6?$vio^6S8=P$cUv@fWP39PI=vOlEtzD48z>6Ly)MpHZm z9M8)hxwL*^qV41tU&GEieRsKUo1$Iue%BP$2Q`mOUd|OhbXx6I2n6su_ z`X4?!wThqYIm$Tey!Vp&mGh^cEZ*5sJ*{W1?1H3qUq5qIuG4u?ToKxE{^@6?iq*jt zr47@X#gDyOnmJ)vpvD?OuIIli*M2e4tZ6(n|5Nei`PV(&y&KLw{(32g<3sD7z=ziu z);|4McTeT~*-Ig;72NruKa^T;r(D{`u=e>|#;k|kqW1(Bggv}{X!?(zK|go+%+hN8 z%CNO?_o0OhxzEe&+yCbL*y;S_t~7OpJ4d~Dg(+2*Ee^LLgw-(9xVr}gag*So*W>X3WACUdJ$+L~h4%Rhcq=w!Y$ z%l*nUkxtj zZ142teOdVM(%R_Q(vwRBbNe}-?#$yhdvfmJqRTz^0#v4&luKrx=?E0(l05mu%68f9 zO;>jWX4pJrn(%q8N%FF-T`wnH>c9B!JV_I&-g;h@RJn>~`cD>HUays;!X zz(#=oj+Sg&%@#dZUFKF|*)pMB zsmGGFPsuEw{ZVRT=MJSQ*5^XXPAypwDDg%(t=em!xw?{%u*@gJ!>exY*~l5(RkVCd z*~{)}dfC3NGu7N(zP8PAY+dhM{`o|n+Ez_5=Jy?Ziw_xi`(=HpT-@Ox;L4I|ARH;# z?z7<5rX1tlPoA{6UD#c_qi1xyt|VTzVLC>Q2+i%c)?((#=+LL$!1p4T}|#1UfZs~w$PflEBp8y z>+XHK%CYa8U4!HHl4VQ=r78)t?7UW8{ibzc#?8!#(v2U)loQLoJiV2bY{TcFE3xo? z##cARl@`~p^12yKG1x83Uv)T*>6K4WSb|N^<(ykpe$sM`wRdM~TyQ%RYdPn(R^;x} z6K!RCzQ(#`qzBb(-cqt3MNo^-@uLi!@j3{&gmT~oaz7RMS)TGs8kOr3Z2 zM)3ukj&w=)xLom3V?EKaX5y7?9H&#nlwXu@NNh2es=DDh(NkN-KQdC`)|%@_gahwx zdc%2-BS`8+$#!Mi=MtsPN_r>DN>BAzOxdvT;*^WRDMuvQK7YOWAbKKeo;rr3QhX%ig?Y{`Bay{HvxUPUZ{qHr+M-aM(%1UUc`RmPf@g$(q*%f4ZIV z`jT3kcw5ptd%|R?RF_F2G84}-3Ry@hy)ORMy7lZOX|t5%fR=Tdk(({ri;{IjO*eVQ z>@=GEqszV6-?=17$`ooGzMNR-U&d#Y0zq^R`VpuKGRadvx{7 zo~)V6E`9#eK0y7uw8tq^o!)wbqljYt6JnjU%Ks+>p31$aB9){ zRa<7~1-z{KwdT<7(0z^hAwSjbE&lNQ$>X%C`46K*|JmH{di(Q;*IDK@o-b-0pXz^_ zzQA?eB#sx?ww8vj2p0Lb=~VSkGnTS&-cow(Ctzw7Ty0 zunW(ot31zsxMSj#q(X0J$E=rWK9`egXG}X#Cp}#zY^~`s^Z2t}*|Hu_W?k&AwYs!i z_sqH1tKDWA9;vVW{q#lH&)GE&Jl`gqy8q~}kF}qM%dB!Cokvk$CVMXUbofv1)OU(A zUMSSNKXp$KeYZ(l*EnT)tnR&qJ5OF%c-ZmbW$z@_jb;a~l=_9dzr5>9Y}>Uw3B$9O z7T?{Jo^8AAYG|y`X74X9A)9xVaQ7}eTdsTHdG_5-3$dtLDV@9Odt^3*E+7)yr8^Jyegz`SFJ8c4h87x~y%} zsxWQOX-AA^22Sn@Xq)qH%~9u^IlNPJ=E^ldCfnuQ}?SU=*Y=tXLd!cPGn4+S>*RxZq@TG zeHDwsCcmhTdng|A&(3~I=dHk2ncxeltILjs)J=>RS{XO(+0;a_Q({$G`%hH|1b5i0 zOp811BOSM4L+IqW>nIvVS&z#h>6h=44wx)fJy+^UU9W z)ZSh4Rn)%~Uljj4J=`ApZ|Q=^;eY&&U0Qs~WNJOH{O5cYYh!n-A%$eqC40Bu&4Jwe%0E4zFcYhyozXtpR-?1oZxt#2&t$ep3 z&+}(9AN#U>1yL;dnKNT1`N++i`KJ1CVOY)E3$>nsnUl5e)mUsSiHa?g;n3f>S;ioJ z%C-J^^Y!7)|cIn8)qz65l>|+S}qY8n14-(nU&;LB zlVwJkah$?c4>koJJF)9_dQ79p92rN|!+w5O&n4dK{I}~{ip``=Je|uQ&Q6inV*YWl zSy$;hR|4}ANvm^yg)iGIcRkfs3iozfpILq5qs6{ASHcy-jhQljKU zZHxPzQtb8N^RApwr zeZ636veUB_-l`|3-?WxKDJOktsn+G0%Tp$aO3txlR9P9Vx6*8T+W+ShjW--R$9VRm zoQj`_l(S9I^Btc80vsjwzP9!g@bmrlHR4j?{@1#J8%(aV9L`VeieVQBimfV&wMjH_ z>(MYWW-!)E`x5o?{6y(Avoz5e7k9q=I)L)$tqcc3d7TKUFZ$j`gM2uC5vDj)lwB&fsFZD7uL_(+N;=B zteC&d*8OOI?}W`fR@0h`R8!V2k__QJDSo49UFnY0r!lVaOY|KV)mY8m=Tq&d>frst z?8f!xtxrp=?{c0^b2wjHBA#}3>C6)Ido^E7*L+KQdVM3?^H6vBJg0aDWpfcfd#8h? z%_ULMM;Xp$eAM~uH_zq#kL=Igdn$itriB{ppP#1pS*-HC<-h5t680_U@H%Hz@Td7y z-K#kK_G|zcUV|Sjl}6{V5(_+~T)hmqYB!<(T(-P4rgyL|yPt z&*9a&WSajv&tBf*Usb@%<&zXQuUU99W4qIv?hK`^j8eBB9C~ZguD9&x7Us5HE$5VE zpK)CLDYC3L~rNmyk_-*ueU2cKit{Hcd7irvKVH2 zr~YM(pLyI^vUHzr$gcCg8eF1w*!(hkW?gjR%I}d{*`GtU$tf<_{Yb)1d!pC350__WG3dKSTd>QR_ub5X=+;DHw)h$Qa*ly?dA5^N0*g1{w;^PJ1 zSx%-3FRz~G;=j(XeWm@{%bPbe6y&a{n)vFpHv5WY*P>Nd#21L`vMGq=>BcO%u_%1Y zw#thecWl+&ziR7Q@h@FkTYvUn7CLx2=)GW+naJki+~qO6ZPzbYSR`pF8Fob2a4(wQ zE|J6VDSun?fp)v8YzlRDY4dtl>^T*-*>H85vTf>fnM&b5&u`}Y-!7cA`ig?g6OFIJ zHmcWNv1M9rsF}v{{pRlsL%FTGq3+WwZBKvd7U2CpF)Vb-{uS56zkI#iRl6jNt=9kI zDVtlntmQrGwilf+-<;dD-Er;hhqJ9$Oxf48;hz@EzUFhS5i=R|S#Q1H-dv&;dSI9P z#yYElx-)Kfd47Dc-1D@%Ad>ahx$bQ64ykq8YuX<_*xz=IF{|X$ZK)^b zJKp`-{enluB6Y&GLlM5M;$JQ=KXS3+OK`FMT5E@Fwv|hBCtTZp@((X_$wP+RDLxmw zSZ8O68s2}lE$V-Fh)f9Ac8dW1U(vraUxz&r_2dxWDzhMy?{;p5?DIWwcbINWeQI>y zaMDg5vACw`&KLG_eEcI&)L}EytyZo>3b9diu_kCVheJshPR`*ez>9sA& zhpGjhUsv3h@96X1{fJG6Mg_;WI*BcEhkpxi_>%aCm#ObK!;x}^J@+4b*WBhU_@*GW z-(UaGtLursD?8)Zj@z1a$1#fheK2i7_)F7@ozu!L@xN&QuqkEg^_T81taJANz0vlq zJ@R}#$BM4!%P&ZO36{R)`l9l1TiLvrw58_fa(@Wjm>*X0QgGLs!b`tb?%lb_;EMml zJ?t9yt1lEANLRI*^)Ehf-f$M%7GL%(m*z~qR+IdxY8Rr*BA%XXt(JoqG=T zwP)G%l+Fv?thJGIPF&vM)kW8o9%MJ{+0>X2{YmD|+ldheue3TlS$zDj#PdZcVsB4E z1*=-MsOB3}l|R!n1+R!KoYoM_nYPz=f6EPF0l7&RY$J-*{&MjNOq`y6cI%wF+}Yax zdA~33zUTTt#c@OV1gn(&uS+IeRegDL%9or+Pm9_V%d86e-z-kLxFpDK(S@b+lN?@3 zH;b?z*?qgMm-m7w^Arb0fiGU+MlYllydTW7a(FEJV0qK2jm*dM#h2|~ayfrL>)+#i zj*DeWEE61mM1C`Fh^Y{sXMWJjy!VFE{X;Kqn{ZaKFZ^lqB7#%*{KD@O4DYiqz2AII zUc>J{gIB#n-e2X6+7_$V@{TW+Lj>y2sV%I$k#&FZ{X^TVH=O;)w_5e+`?I$S^=l{G zv-sAu>Oh{*tHu0|SD7WhH5`^#tXA8;E%A^4g9lHpMPE=C(46_k)%RjhU~AN(+?n6= zW(7p1h6~xU_x+blc;PCt+)b>o-tl*Sz_;}pssDtZ-np;vsy?-mVNs-@_kwGT(MNt;xP-SB*Z zw(9bXkLj8}9Z%jo`**|U&9i^Me1EvS_=DxSC$G-gl$*)FKexN+4(A$!;(4KE)Ky}*RCb%VsDglSew zx;N;%xlCK3Bm82TUgmG_3Gt+NdU*miKxBjmy^%u+5r8Jv=y2@WZbEb9ixifo{ z|NcCmug|!rQ9kuYVYr)qn#vyi#4tDQXDN3M@9AOH`IP9{#%?HiI^yBszg0F5CbfT! zm?|*cKxMIDw$QcIoP^**>|Z?!%=>?AINE;v>yLnA?mr?9secXlSrqLiK1nEusU~>V z!kd?wY!-fKV<UR3UT`P{87kzuCFd8?#ph>v5_iVbML0CCjt$Xuns358TlSXmu32Y~ z`IsK{kSOq%IMcUo{V}hQKLu?8)=G?LRX55C@}K>B`pi93&GxyfY+|=BMqcXMWwj|s z-~Z0VI>XAfQ}dR$E!+A0=3$lF+f6T@@OrTH#>JixpReCSEcB^%kV1= zIsHnVJvy{%uH+H{F_tTtTP8eCJLk2@@_ERDu!l-_nwu2XX;!drXDNx9V8!UQ((NGM zC809Wj2n|bxHR_lFg5c#&Jg-0TEpe-C|06g^+L+^&MMb0Opj04rk~%aapGaah0j-n z*LJ-i6+ABzt6iyiKF zYm*bM^gd+HweM(o$eYg$t1S;-GkUWofhjHh%$#{#Qp;qotbAj}eQUSQiy#iMS~l_b zYuHQ|YxreO$<4JsZp*^XzNuM|_54TM_HWG%2mk-v@}hSRv$o@tt!KZ6YLx_Q#8llC zS39k}HDE(eXymaIo7O5_KN_`t*-TmcQ@3=2|83}!)AF}iKj|iaNVjCj`+|+^BFeD^ zUFJ=bcCC9G<+C)sTyy5;9g83B+p4oh`{{$tXXdT^_{Z9LJ;(J0mv?>m$L@6X?DDRJ z#;A>|ch(iAt$yY6BeeJNDOankTeAui>=s(rtTySXzJL0Vh|9yQi4FU@%z~PiOy<}% zFXp^-qDIVBZxyAzi$1Z%%=@=l?QV$XVq?jx%i0~}Zf7n1CMbBvP}gXcLWFX^DaSn5 zbuVj}ufOk>tKK;Uq zn;y0QSF`SXc6Q;!iR9?+_IzFe2>}G@X7?4P|5$kn@;ALU2e&nv`te} zM>Y4-S=ZBdrzv{|OP#oyb7|J1YbW2CMBaGQa`UDf2bYG1)*=}t5f|6B+Z|uBbu=sz zJ1YJDx827>%=sU_pY<`@d^?x7{>|*~d%y2}|D*PJ_3tOe{PtW4dxbtd)~&lEcp$o2 z}UmnLghCEJSwY;x;xH`o` z<#6@WFTYbN_HU?BXy8Bf-Fefe#F@1!4U#|a=^u7y{xPFQe!|lt|e)RiC3~k%F5iZwYsytI-{as zMS4YwkK>B>M!D;+O}r8$EE{;*XNj7bX2hl3C6}g6+fb!<^=Q8@;o&G%w zcz(j{{Ijk-XJ2ePcUStHO?G+7miU98V)r`lUaZ`F*tS*3)|&NE)8%D`YiEZ!`D;f! zn=x;r(8<1qcdX8?k9u%xmdB-kTJ6S(B^Ej2uQ>9Msb+q$L(-^^)CXIn1MQQ3A*KiJW=#xWxF z(uGIP^IUSHL}oQbD@;rhe03|tPx$5o&; zwe`0r2=Dfut#7+aRb=uNZC$0AI=il?N{GF^yzIB>M-O9G)xrzQX9~M4S=PZPHL<`& zE5l-*6YJK;u219_yLHbx)KhR{n`H2pos&N7oS3lZx|w|Lon1-aR^`pNG+59%rMPQO z^qM6*_PL){dH(zD&hANDR<-GZGB_Z4r)Y^q)8yn4nF{gt~Sd{5k4pW?CNhiTnm ztJs1Hi^-dWf~(eT5Z$uNtW~_F{riO(1(#=)1Y6`y@R1CQxPSgqX8Kv36H+qM-@Q1V zA8ONkzw^-j%~|4+wpwM6Q?-t5@IJbtR!DsAdoWis)NnbFA|z09 z!9Vdf%O6#q6FHEuz3chbcS|Jx{Q6l_7`>&dXM>*pndj?#lq9EL`j&p?iGg-xsL0Zm znRf#jW&}5SN!xzXdw=2VmbVv;-e;8uC5yiC>_~{WUKltv$i|r8tu8jjRv{p-9!&|h3Pa<*m&I>ZxMm0O@4g}7gcP%+7 zZw{Nt**TSOBAV5!CM#4m1ibscB+>UBo6MIr^QzvIU#aPS>Kc4YCwXV=o6OmFriOpw z)3%&*dT+(~ulIud`ZezDd);+cA|!sU-kufH&7Rx)wya+Ez-s!VROcMMRjbrudv*r2 zO!;|n&$(A!O!s1}ABO#!5$!nd*3=ngS6DB{9lBZAvfG+Z(DI+DnNg=UyH6dc>U>(#eVyaRx~8)Z031%YDd<$?hwb@i}H%MUp(RLDSy5679`tGsF zzu#+F1YB&lCC><%62f_HvfRfD+8+H?FW4?w+k82>q`c}yu;>00&g)%0^2*LR7wxDTS{hpuR7E1cIcQOd#6pHchyf}XPrQ*?y1=C}h=-k7n4}}}#kT3x#!DVw99L|P_%!21 zuSjTkYf#@>kxOn8RyJ9;W3Hz>ThY2_r_9zz-R;Ql;v7*_pKhr4!kA*mkEz>TF-WQ1sKuMRj7@n=LLzW*1xwyP2(SnXPV} zotRYSCANC*(crm9-DVf|Zr*k+GJDhYS^NtV7oTraestiXQo)tqX%|1;_*;_9`|gl! z#B-*NqTZHy4gIgLTF7p-Xb!X&R)46oZca1DIn}L#dvCZ{m>oV+xr6a}u$%hQn1<>? z#Z51^G1@d=xYPY~2ha3iIY%d zQ^)gX{SNc{URKR3aKm}w*T@`;rlrLeb0c@xiKU-;bVf3&G%S&iH}GlMvKJj|gGya? zXzlGXvUELfbo1#GUa$Ky=hB$w3Uki&=8eqZ$$ccinOnB9T&?FinozGYfk zk>SzYA1_tZK2m#SMUEZ1#{EkCnsmi@lB zcm8c^jOR=}uyXCcLw-{~wpc&dwd%sm>^q-j-~BYQjIa2ib^hU|I}fu9=XQHea`T?f zxz*|xyVQLkoxYyK{ta^LjGr>JQkN_R3%DVm|bTOiK$i)eM3t#VM(`MBompK>Oo z-S-Hnv1CyB_s9H^)rOn8M;(^9Ccd{=dZGQbt*Y3GOr8h4Wouk7`L0{Cc1p-Ak1Soa zmu~(G`lhLDHZKZGI=$!S)JpD^M!N$yMBEfL%lUHkci)0N_slO>v81jwUj5F+wdLpD zT(`&z+h=WZ*xfWG!Z%WJ&rG>Hhq^zSshxRyXaC{m&WI z!|nAonJVXZU#}#XKU!$CU9kV>n^z_4mov${`=)Sf!sWXUUWH{W{&4@FJ!-?aMQX|F zOg1tiIxadE2zgw5HLM=F#9G=3=ePzPAhTe?_ z1R{kLCI~D@Ys}puiC-?KIlUp>-&$7>o}{{cRj6`-}HTTL@WP_h_=mKash$LJMXmf zYyFrY&%bKL(e}_KhKl>uo_?ATBlj)&RDb`9503?R)`uLl6qP@ktR;8+cu0-R{D6<1 zHVYpFbJ?v;GM*RmQL^S*|9Q_@=YM}>WsRRQKYmiX|H;Gp-fsRsKZ>4wvAV9t_u%!V zy5%RIO4r@dtKa|f`1X(*>HC2nV(s^-ckW;Ri2cXHn7Sp8j{oTFn*S)Bt8U#Q{`~?g zZdESWx_9;Cg8Ka9jfZzv?Xu?k{^s@TH#fBfURW)gf8ns9t)0-jtwI-noDgfAWM({X z%anp{FD{qwlnnATUUZ!8i%ZD6*ahdMdevC3mCfv&_`$pyw)MJmsL*zz$&Zaz(dRvMr1Ff9AN%rSgGt%^US~H~ zLF2ln9+iWsJ6ZNFpFGWQo4&GEx8rRS$x4l9J493e*tpy(YTNvxBW~WAL~n7+Z%fx? z=-4n%FSFY7Yvmig;+`_GV`nA`u6*Mf7x-cmzi0R}Co4b6iBnRA_8;S}FEE={Qhj`7 zhheT?tDVi9*UXJmOE)hztX5}UZIvX`8~$X$_25+d8aSJJ_ygNgULb$~)#O)%Q>>Y%zDN3h(|SmrFv|SxcJ!n`*tpS2M}1 z*GSXk57+*%57NB*kG}6Hj#==aLM!V+y7d2k?G+Ed|7dQrf8@{gZ_PvfAKH_%o;kg} zG6+iwv-KyuXmi<%cnoQ4bY03F< z-rIXcopcgbr7mYbTdFZT{nVnA-2sAjz9Jn?z8XC%J_MTvw%m?zTbH;)*p%t$O%*56 zqm%rSch2zd4R`5O^KzZ3bjoz22a8DH!cMP^5rW<(fvT%sZn1o){nvJ~K6_B1wpCgR z-%=?@SDmXzT!Z582hLjAnVPioz&0~mIrFs_HaM2|7z#Q~oH5U*@ABq_i_?^ztddT8 z&LQhNBY7dm>^p+CnJrn@RkCNNFU>qZG1=|Nr6XJ$gS>b$lctJ@^V~MysrfMGX{240 zu3lQk_gRY4s!tZREL+VI>T$GPbLpkkOR^Wat+{w4Do}evr_G%x;jY_9)uzapD_vY1 zm~0`^JHNetv(hV*!2ayWdA6sxm9M8Q^Dw{deA%w>si}9ew8+I3ou!L9uLn-ZjS!yx zz|3{c^zVJpSc;jN;WvH!Q@`5m97zWA8;&f4|sr^!91%U8b_o&T7?CuV!O zw4`s-mB&W?VNaJhTwL+B~fkb=WzD9xd(B^tud`kHNP)$^Ap#bmC5_ARrIk& zrj%Vw)|N6{^JZOy?cI%^*jbOxO=ej)%YE^QM~6=BSZbH9-@anXRjt)=3-rXNA6vKY zgN9>jr8s-B_3q%cWz~KH+g`3c($9ao^78K4PyKZ^zdif1>gUD}a<1Y}`7~Q%&DE3F zhZ)}Gv+++4t&46*x)F9kv3<{`W5F$#du`6|+10B1{L$y#)~8u{<#dX7zt>X>@Zob6 zJ#&WJzwKvbYWkLW4{}S47HVvsu6t#}w^v0eSA>pL_)K5yrF63SZ0TpN#n%nOt?h$m z#hWqbPJVl2eR5B%Qr29)=Z^y3bnP^hu|KhWy78osH>Vvh zeKzIKs`KT?xZM3>XRb6iHn-N@o_+euk_Yc1SLjS$Yr!AvcjD^JbFZX*XQ1+Rdu*s7@n$eNnwZcKo5g3)j?YYPQr|E7;pN!Qjab zUJ(-?2b+Y&GS8win8P1bt((=%AnX1kx`m^y(>1BE?NJwJb&U}BE-Cem8)kRszZc?zXnB*2ZLH(-kzXkg=&s!BI^?b5@q0$bU1 z+?Dl?TD@^H{&qPxJlS^dWyyJ>%&P6JLKS%%ZpSPd1kc`=QJjqsFg0-|X6aBQ*8K$FJ`;?Vi4Fa@G5F2ii=` zmKA17O<%e(^L(ww%UOq>Ej-2aUqr%|44(SJv~V?Fm0Q;cnOS3H#=FevYx} zFW6tByL*@2Spl&R!jB1%dX?dAx(B#yyv3{SyA8 zwdk3u4r8d#lBqsNrIr;d*Gey&zCPi~c7xn<Ba}u_0 za5Pl(dvwe<`q(9fe&cz8?#k*5`4{Xtq5OuDfedfys5KT#c>BxNuDe?I#=VZU zenF~yZ_4RSkJP?K*?qrf{CwW#_m;oyzMTJGQzUo5?N6=buEK~T-io z$et{ilH8#r-YDtJlWbHGQ1w&e*wU>o#)^+RIHHfF@GLG^RFE1l;o$ehACByjXzrhs zvFuiX$nhf&cJRn&9p?XV*z6C7z1YVBk^YWf7bWVNjwe;bJnDJWsWGqTk><;J0w1@W zsaJE`a^CGj42!yQMcl&LQyZ*v&9iST4twZ#+DtSoMXg6uId^BK=tZkmN#p}$bb)OeZ)vs*3p;?j|qd)&#wc9zf%kH*| z0}S?l@wo8q`s|{*MTTcS?6|l)^K(IFkfI#RW?uFAa?2VgKW_=EzoS)_7w{rOEcJrs zueRLoyJn|1{*mSCSD$VaGovMK@vSKRs=)5Ra@mjzzwXLx-e;6-`79u_=**XI(#b*X z6>g`BE_~3D44*n{O%Nm3uLX|{nsP%=4 z*L5eUR4YbK!svHd)dlmNT-Sv^dmgq3-X!V?yuB zHf@VeN&G(J-;UG850+ciwTYMJ91N3{-Vo@%z9YI|b)WBzHIL2itXs^le=)pPLsma| z`=P15EjL3qy_xg!@FDqp8UsMDiiC$DK{a&M`nyc;k z>mN&3)VIbPN3BgN+2v>RSFNeMbWeM*)xG2Ff`7t}zyDAzEB}b!>R-oY!I~wH{eKj* z$iJQ;XrKD#d7?qWt!4Ut9@1)8zV=35&Q>jZIGg#& zyhGELvAbT@-ItNyI%(7K>Cb)Z*DG#ddgNjuXQX!4?bbQ@v`4oe*1hsGvosZ3xoXdY zBP-ssX|=wdeAaL6vB%5LYVlq13U55kdnZejd5(5x@$-m_TfO(~y#C?%ObO-LrrkE_ z<{y@vjrqiRD@F42&aAVXGtc}~kXH5h{N{R-$=)ENJiGXl$C}=RZvPW>(>vyZWN`9s zpI4%1UlkU}6|Hx;SN3uOd$#nZ_e|Ca|2le~J=>K%nK%4mQxDfV4Nvz)Z_6u{%|AG5 z#WvTT{@-@jpz`CweMt-3Hm-fO^x$N>n^*rBb?RR$s_v0Ks`yzVHp6e}%7c4C6UySb zeAazj)_rFW<6ZVw^JGf5KmBvqZN2xt(j9sg@@ z9J_h@=Iw4VmmBwHMA-Brn^yYii6OAu!+Ta=dIJV%SE8FL-8U-2l(U);oMtr2Z0 z-&4cBH16g7j{HUPpDZ5vI%!wse-vD%dL{a~fbEoD61u56*6lYI8fS33RY{(HtgUe> zqOz@DG|M@4&8%ayzNQ{ozHt_p>Zu2ZQ#bGUQ(|EJPfF>QX2!y|vX^H>T>39s{I7l2 z=?GgTyY#};s|IzV_Q6Z1PFxTw8P0xxzv8ZUn_qA&v@Y0|wxlgh>^jfK%TcXsqnxa{ z<5pe1p|>|%)Xupidxzq&(w>PN#f*$ovL?7xh+XfW^XvWc(ETPabRM&H9{YbSGAwoB z?Ac5!bQ>2}7)i8A{SA{9NDGhsx8s2HjQwk)RJWLQixlsvQ&~87-3F&sR$W)8?Dd%B z9DQOD&(7i*d)ZIS5l%Vl`RSQ+*f*QC{~PE2FfV(k{bjWUTL}Ao{_Mi(yJVZQn7*%g zd}77q8;j!(PN7y07=qZ6Ago#S^WirtV*lwr4p@A#1`GMh1p3W(Ece z1_lPu!KTndOUofso%$$ymQ{T6lM{1XGK))!GLuS6;0NqX4T|=bb`<${F758slGTM< zx7~`~QrVIH`qrapZ$}{(#VxI3@--ShlN9}S-Q=C@+x_3`V8Dlk_6OucZH(J4tq`cW zF|+u$Uh$37`^D=S_8eVpc_v{k2fx4XmoqZwYxjH!RQr)@dZIsMzQO$$UGtT~BKNEp z`dN2pmA3DPPsye$-UygU&3t!6R`*tCPkrXa_ujJgfymY>Qa)tWr~a0` zufBZrym&I_^MnW2Hf>YUF$vMK=dJc#xu26w=#bkrp_2rIjh=tG1rf6{?HAG4bbr)!p-gNY!J(^O%u(9NelnV7BlJpbWG z`(TF*n*tFjXNQ$3I=A@ph4_EYdH%*)W=D;_d$e-E7iqsQRV_|m(pbLtyyg?rX`dN* z=1S@nejP=}PVLmRWF4`e&AmO-l9dqXKf>(VywHn!%m6cdWjn6QjD=baMQq%BueCY0SaIjF-K8&J5(`lim^ zGdbyLoFTb;SAEUfDi^vWPBglI+O`~x3y(rWY@*hNMgI~1BYx%i?7MFS!_Mgc?EC)v z-s9@|&#RxGo?1V@p7lW4pWxKw)(y;-7w_mxMW;U2C{|G{ zpD8DuZ`P7K$QH&>N@+Qis9aQm)%YID=L`ohkVqX6S5)LXP<7*9HB|UT=POc z$gx^Sd2%;HSMvzC1c9vuDhSNmTwN8a_}KN4%5ylkg#%+}mv8)VB$3bT)$*-@?1 zm{xoh5=?vt#v-yr)2`ve|goT*1$iF1$aNKI6FW zSDE))#m=r-x?JKrujlDGrT(^0Mq3%jTup zR9f#o^ufYZ`g30*|MFg;7ia$D)aCg#ytcMEG~vvjLsJwF<)6A47Ug1|^|UoG=$T&B z)=%qNwiVe-t~8%sKC{X>enF{y@sWz_*DF*{`zUKRJ>`pUd%ZUG?~KT`a%vVQYr@KV zUq01PUDI23b@4>6hErbxkG4r3_TgN&UdzwRTX*VGuG>1xJuXam8p7TF_K8jJ(Y1T# znz*d0be|@9LH|w0jqXC;?T>Ax+6)&ZhM1Hm&*_|7emADxFTj~a|M7CEy0&!HH#_;( z=KcF$9=@Z-@0cX-`Nyo1`dJp;O>L@kE*tc6O6q4?^m9LXUgYO)}#)6MJHe3LzG*HzbO9=45CTi0@;NbHt)>GHi^8{=J^O;^Srn{WDu zHUGqq)ouG9`tsHve=YS-QRv;lJN0W=c7@HFdLT#3Ph3mrX3;NS{*tMSr8d4hxNqZS z$rmNJ7SBC5;rs1S(c#^CxwM9R=$jKSKxKt~7Z}yeI?f1fzK5&Ib|Fl~FW6g@bhwn_EeW{ryUYZr#TcWTaY?X~UGc&B(RX=%vbxGh98EGx$Q z%dIHs&TH!-wdbVIqDajsA+t3Ffx#DUy;{FvhM{@zvA>mKpSbjHzj_e#U;5e! zme5dl&eyAqLZZ#CUlH6ab~{d%--W-wEA~dlP3TEFw@JK2F0p*BvW#4Oq{OSc z#&=G9e_Xcj?!8V1vjlTCOSwn8FSJKn?q8d=k3I2Y?L!M4JHt5pS6><5-R!Q-Ic`zB z^ZNO^+V4V3-p$+mq4fJaSLgEkkv}xK=6qap_h1=wQ;vDz_q3J`ti2h>rQEjg$?lk} zeb?2xG^zG!% z@Vk(GTIi7Q#017{HdnXne5VBKcbF=PUl98!z9G-x!lt!97a6T+DO)+w!9_EnHYxqY zcNMksia`h3)Gz$9F>A}$NVv?J!5MGs1owUL zWxZ2#L0D?{m3{Cy%X-_9QjH-B&h|I|pd@JzFD&9iYfnXk&AAMn^Fb$w7->!#}()y zbG9+)y7#aCAdyxu;ZM(`^WQJ^`{=)4KHr(8V$N=U<1T@x?H}w;g))XoK3knSWBs&* zZx40tockv5$yRX6r>v=)_ty7}(#d_ZSZW?jiO}&_u;KblkN*`rW{2!kzjL@u>CWl0 z3-a%TV-@b`?@Fj}C@K0eE$wM)j=xF3YQ=dUz8?{8^iY_`;nwuAMP}k-&ZXw_GWfo2 zRLp(!{6pXKQ!C$pkU4kZ<6a*}CS99*RdS`={5z)}fAd(;rE6JN<5WJG2d`v;5Bl4d zUcPrUCz0vd-fqox`EAPfYkMAO{Ab!?>axOZ#yvZ+d26P}$&2l=@Nv@h>h8bro?({g zuUn1>uKq`BR~(SEI~Ksez~BcxxE|8F)P=S#b-~9o79^Hr=oP@vat}Ut)?f3iXQ+p# z@98tAbxxi+|2XK|+4I2#p1xrQMj-}9n~Y308yN-`l<@`{7#Wy6)zUb1-pf}{bDOWe zm(NMhtG?$>KGD)Tt9|l}@7AYGNT&&KZJO$?a&ubhIR*v>8Ab*M2?hp+q|(fs6y2iK zub+CoV0@cJ$t|_`oC{PqSY?w$5(3UHjzY ziNw;)d)L)HSmMmXbC_}8oaZqo^rJj;0yIN9de_{45|^;(`?@{RYj>4xIJW7nmZh0! zV7@^6F6;Tfr)4ZzmL6idwV-<^%cnKE`5Mby%F+Zcv*^{cMSap0v%NG&^4*q;=F8tC zUC^*ntY6YJV}1M2!>wP-#n;!VZV>u(W!2>)jgyTxEU8Ej{-Vq{sj`kO``MRz2g5D= z;w^7CK5?44RY)WL!Apw^NQY^H!T@y10_gB85Y3O;*h|jIM7ewbqVzWyIuL5DIk=bINdOa%`1G0L8w`rWD0Qnpf-Ro1nw;^^0y$kdK!pO*7P z-F;uscD3#G$H|H7&$^z@D(CbxuetqdUeuw`-<&H>o`2V{@XCkQWUE83N_J+J$8NqR z=aG1G)4T6g)2;`ml-{>~?o!7*(^oa>_i%cOIKrnm04{ zbn%((B8wtU8{gRdO>%pxrF@%5x2nvgP1`z})_Gpa*1Kw}bE5aVuHl2G#O$~OXAa(H zHx$|XV#-v>HJYujEgQFKWTqIz)o!ee-?h*D!p7qTt6y5pQ*Ccr_a-6JY|$z95_zq) zk2iE>N$hg+S;6ggS)Ob5{<2W{&Ea2jbt zyRt{E?X9Xy>*+HtS6htV6#DzkcH&(q_~(h0qjOpD_45tCE+rj2W3VU2V$09UNeY{d zon6cSsfB3CY`yX&JMx#dii6XO`+|zzXBV1!?v{#lGYi|n@Y~T(q&%aSJwZx)i`sF< z2-mJRcfCDk=`23CHgMje-)>>byMJ;|>=*BtC*q}?(Hd0Pa^oJy$M=`Hv{M`_IoDjS zQ(h4w@!U^KGk@;A4F`7n`?o9#u4rhVvijUaP75>j=Mu;5XC1R&_Cx(ic8kr?%??#< z^EOUD8kVpf`H(G8QZ$NB?O(>g!0>_rV~s8-d1CJA0WcwdQkXC*$UlECeN=0dNvL1LcuY+3hX}H%ctFY{kMYYgg`rovrEg1 zoV;xha@Wq-aKiVtSM0m&v+9|$w@a1ZPcpdw!bvjx^{WGSuTD`+YgxK+>WVcwqTNnc zA3d*}D(KKr9<}-G)RI0~#g`MEJYzc_FMYU3dC7kd&sUbrHR&%{?=$t(ut`+U|JD14 z_p9#A8nlus$ca;S1rr0qcQytF8_aBn=~v&(;^NG_bpHZS$K0_fJ-8&XBo)$In>#hu z|8j!J@&D(|cCCG#vOr;MD^-wEuk6H z(p@#rx)@3yHDx<`tCZK<#qhYnmm8Dk7u>%1mUsGYtLASG9Czc4cRqf>cR1$erqxGR zoz-3zcgL{z(X7zbg@KyVPT&8k^*Kz_Jk_Ph^Ihz)(>8gByvd52(n2-nJ7(_K7k>NN z+vPR4mrQ=RY3t#$zIoC?{CvvqxSKD@R;F0=DXy~nv@Mo*Rn(h#uI~O?s-chbx0hbZ zxnA!cxasNiKcf1xVnVv5*ZJn1-l^MWuzj`Ber0{hNz*bMb4ndb8opis+oF0+w@^as zqS5`Ueml$gFH0$;cB)O@y~t^6dhzVQm~@82pZB%hySw~}uG@>VJ$t4~sP#QQv>-2{ z`deS!`3c=dRY%gIzBwOO2>N8U-tj0WFSo?y4IR=_=N|59Q|)ngT=Vnaw~oYB)5VO> ze7on+=JUPGz(+3GqHb|=Tk#I(RpuXJgQq8^ZL-zxd9^iZ{W9+9ig(XSX8vk8su-Rb z;n%Ffs%WWbC8_k*E>p2e?b92Eg*6Y=5B*-!c0wjaPVIcTM8f-poA@m}(--fv^qa`y zKPkfPdS(yng{2Iuo^Y4C>@%(4dJdoZe@TUz{~HTrnOj#{ zO?jWY!(UGCt+ZLeNd|S*sY?V-iW|r^~6{|>5^fgnj!d8SUZvBlX%!6*GtAkpiq&0hVHsZ;IP=K3ig&z7xq%FePVxi|a!zRLHv^Y48* zeqR4RgUH7-2Lzt+N@QK!xFGprL!0W>8CKImXXOMQ@9GO}Yt(&tB4M`TwE6Z47f$8f zO_EsYalGoP&AO<2(t%qwjiL&(57`OK3;C_Fg0=lpy2tyuESE~ZG5cHfZmOTa(COAS-QT5}{~09PzNH=(bPVp?^i_Y=S%+KC6 z=-9s7U$U-ougo*Odo|Bf)=Qx=g~@;8y5#1EmGOP452JVY#f`?jBRmc;EMse_3>+A^fle$72-Wb4%I_DIY& zk~fCadw1fknJJl30`WUo{`>G9=UB_Ebt2ef;`|Ke0_n#A#jc-R+U*tH3KNzWG|u?O zxa*r>R8IBB_jh+qvu!q9I9K^x^v&X@^Ay)@-RD$&T2D}4plZR@%$oWCoDaAgo;g0@ z>Ejtsg-?9;6!|F@TIidImRM{rqa2o3K1`l7?UV*-2AohZ&ff20rxNWHV{<*J;Lg*-E&M ztxV|qWN5?paq|rBT)E^FV-a~L#>G!;6}qn|+)ZHgpYW3vF#rcj>o1gfKe;n9FnBXz zZDb*p_KrpArManjC9W08soQp8W4c-D*AP^8uWm+|7Q+oE!{Umbe4C6xX8)TWLm zgB=@|{w|SPHgTqlk@2hSomx9SFxF@odU}iMHbvffGVjjsyTYITef-7tfak0b*A1h@ z+pc$An-5q%Z(45K^lM?gzx+z^K*6fNA%%iv{cS9*-j-=Bq4T-RW*=2v##SPdx%AoT z1~=zbJ5D}NQk?st<-w8ZT3?)8j|+Zo>ZqP>)VESX`a~<&<{6z|URfsaYffL}{ot66 zXGX@#{V@VtrdRx4#-rwCH}mJU0>cH7i#)$B6=hkv?@Y*nbvr-a{&o7sO2#RHEy+%; z3qJRU3%;p4yZ^MyspM@tT(~R$a;H6YIKv_vJNIkJyvw^Uzw>d7d-J!ePj$6f=~KS2 zuPY5#U-~s`sf9*ztTE4Oqx8EkBrEssetYht$ja6LjmWMLc`91BKV@>H8gpAK zLu1(TSrbx<`5C(}Y*Adaq|HIvr)SHp#Bv`tbK~1HZ?e2O>J|O=Zrla4u$OGx|G!oKvZ?)bfBstA^t7_gv+r)`wEz8l_UF0p@9Zo7eeT`S|MTh@ z9jxz7-N;(LZh@-eV&P;VA4$cC9?wsoD&unEWa46$`Fl$6Cp#Syl)V061rKYv;g?o} zeG!fN4^R7)Z1;Jz|IqDfm736FRW-If)u}m$YmN3b=6^c5J*7tae&V6`Z69rq-HC5} z|CoKxpMXRCKYm!q+bcYlkel)$S+aD)p&y*}>XnrmJ@Y?(Pn7I`knZ#QGuI!c@7_OD zS67A{TCXxkAYjds?1*O#+0xHCS|+#LoZh-HcYWroGJmIHnY)4+Z#Tc0cug?6Tm8$m zh(j4~Hy%4?Vtg-s#;>}wWhJ}SX4khn>|F2o;MM63zieD$1!ud9C43AyvRR^Y>6t>) z@aY9zJ8Fg5Bor?M-MOYyWmX#Fz2wKVskQA}3)h-HmFm1^cewCn&d~jXU;<|zt+r{CUGUF`8%W0k$XZB<&!ggXY32Lym%+>PQ#%! zPoi%c?S4D)%8a~SOuJuwK74hGPu4i2tUZXW8y+}mTPC)H{IU0b@83_A1|(Rl`V8X7?u-WAX}z!qqAT0 zh12#XM;=ZSy}V$n$z}4LFCBkuXE}bB(<=OL*xGAXtxB`*T#~)ICFbhYcUMBshFq4s z@AS9O?(uh7t2M_@Y`Xuz)9k$_tN&xk#<_Qwt?F03yI9h!)T#K&lH6YDLfdus5^rYZ z2wv3{^3%3xzOS`|)m*zm%$e(4lE(@5!dbHOl!{o}XOx@)<5|>w8&e#9oCi^BjqV!9SXhqd3 zuD$s>HVbDfzgw|<_4K&ucO=^`MA__XuwL61HBP$Z7aOPXq??QXOcg%7 zfBAufsus%sKgO+M*4`4~Q$Ab48GqGV=#$yq$ zMM-Q+FIu*=r3aX`e#*EkvLxfP$f1j`E{KG=tmf37|54?h%j>BVGLG0VM%r{~eUvck zn!Lknj?>x-nchd7Uj%*CPes^$OOk)!m{Z=#zVYc&xg*=!Z93NKE5H8sI9O6zwSS+z zRc}}OhD^Rw1u3a7D%VKq_0DTGDc}n~v*6RwW0`CGKIXX*-XX1jb%Z<0+ zI3XwC?w5GhdblyKj-0o)!Zc&4Kkj7VySOVcwR}3d{Mo2*MjR|AO6Xh{faXG`ZGs<(Zy0J z`EL#9Tm6>g*QHu9g^Ttb(77vIJv;5AU|6*4fvZhv>$87J&t4u_{OWK0&!jB<1Q_Ihlr7v0`{-9Rz!c;QDjwWG5uBKEXSev}&{H2uT83ikhk?}K#y zy|diYeW!j-QQ3@5i$uOB2*qsJ$HZaz?QQQ}AB9kxKuRqnbR z8+|va@Fi{8xN@geNURxiVVdI%`Hx%PeSB#Yx$=R))$Jb^L^|t*V@y6`Ma>Qc@10By&Go+=AX1li{bHb%M1JrGRu>-W{L}^o z9>Fi~PuQczrS@8z1htAdKZJ%47-}!yd zmil#df7t>K^!1oLOZoCCod21s&Iy5y8p)ieh2p#-CQf|xTO`gi;_-)4PFGH8p*Z7` zT#0E9bB$Dre%7*WOp$-ZQ!9G-Qu*v&=DR=F$j(?Ix#EWD**n)|n%_GweNk%~({|?G z#`GIuC-Y>o`<~t8YYy6`b60cbtk~Lp>Xw&EbFfcyIU1L=p0l2l=^5(?al)Rg6}Uh>0gSKX8SI+LMBsvR#sM6?Aofn z#H-zny$ZoQ%^u%7I5F;X*IcPOu7|e|m9DyQOqZ22gzrbm#z<|E%ByPcnNnV?lPdN8 zJj-VB)c>+uUrH|fwVp@%W8^KBtesirb7$>K{;=&tSAgwY@2DmuO62wD&nj8g zpy$}^n0Y>Q(VL6B;b+`?k8Zul^?Y6MWc}pEeffJ$Y-+!hiYR`Vw)if?+mNWtbra^; zwSQ=4VQl{L?vYnntKy`o3=N4kEb|?-0$==@dt%a-b!X-%7BYV1ws!xdzDrN>?YXiQ z*N$>8`LVA$&&8&>=2q9WZWeJvjSF*G4)*VoF=!9m`GuqZ@8mTPI6d!tww#yLQ?vX< z(qom*nYmVXH-9LRKk2l`!~cBDqV#F=j5U@|442_=SeuaPBVhS?spq5?o*xpeN3yqY zC7z2YIk$;%f$I#FLozpoPyGIodFID!9k~luj+H*anwzHF6yKsWS*h2{epBEt$CVLB zU&N#KY@Q@tRe#9Dz>vqzz+g{I4hhZ6#?y-48+tlO*ippR_M}RpkpqXo$JP#wlL8zM z7bNy6J4?Q`0a7t6=8J9xSu56y7 zn~&$o9hGtZBUpZV*KRc?o>$DbuiZ`Uo_@M&W@6IO!rnO#KP_7NoNL<5=rS+WJn?X! z==G1>UOo8CqxepW=j7D%Kdk2?@0b;3&&$5WCVP~-J)-U5;xl&s8~f%fOk}-mw%3)- zwsy`&OULS?>DMF=o%Wm9^;%4G_0&tt-q}5yJxwp}iO7zv=VF83G$-7s&YI-G?EmKZ zw$GR9c08Tg_jmdn-6;h^Gv?h-eQ~cgF0QE8%X7PJkonx33pOMNx4mBb>h`@!6RM5o zW^VWIN|wKLFLOcO?Hs*CDp_H^Z`gnUt0>Et-QE0M=-z?xKp_zDBqRHJXH{PT*@{HY!2)3GSMP;ZBo zcU+3+!|PkCm6Q(c`?)9UtI2~@i~fTQGr1>h+SY$!w(^7br3pLP_BosiIavH;>Er|U zH3h3?1yoP$>HgO}L*jiy+QgzGqMW(YuiSFYkPj^Sq_|iq;)ef`o9lN?5(*7_<)N>~ z9wWU(;br#>vuU#S47V+VuCmM92cKOTDtdUT!*dBY=DEopzpc7MkG=0{F`Td4?f2=h zNN2KGR>Ys=bB|0yD?4q3l5)}tZP)xk>wzj&os62q%D^y_7vow9v=MJeuM;vDFgGl^ z_;Q%QzbO)@H@XV(9C;|G(2y*AM~rF8k^qkF4jv6lJe?P~ZVYt~nHUrK?9%VOf4TR3 zS^IP;|CZoGOU{)1I^vxEOZ}JgtvdIxl$)Epmrd?GH}C4ayZPn&Z9mD^|NkZ3F#Th$ zjJ1JkvQ?j%Picbny%z^~(ptYPt2m=t!2Fr>&Jo>@72d~gbKYswN_KR!>pS;qWzz%` z#l6Z)7FFmRTX4Lb?$ptKhx^Vd$Pn->&VKU75#wRp;3etrg?Zoo@c+&Wf#Dmdt7`yn99M?uk61i-LEr%$+P0^pc^+(0%h& zR%dCs850F0w{p$Wko5NpnI>jZ9koaJz}h`oXI5;}`!>%gt1@%z%fEGuF#+p>k8hGR zeWdg1&7pbX$!pdx?Dux^uPxYifcb(>znJWqbvMJVD8^lxb25}E*Mq-BW6S!$>q)oSyJ-@l+5_}H+{^8(Mp6s7ev*Lkr z%^ALHG5xPow^?6Zberp2;{GQ84=bCPoqqAm?|v)(#V|hG>3)K|{?=%&OA-kWpJ$iL zmc}QA91Q;vb?KXcyhsykD*OC- zck}&U8k3q*6Bh5tUcAp&;OzbHpXbjhc*=b5)Vg9{F1=r6k#)yZg(@d}Qp~Dzw^oYL zPo3v<%EtNjo!~9@jw+vxuh}|oSvGOAc8BMVq5@V~A-N>!)*VMacXpo9@8DVIka_sc z;`)ETI`7?@__&xqspj31|026Cbjv;4G&`cL^Qp~J!>Bzj-8{N&6Cb>v`PXLVv)SPV zGk$b%xf`(Txa23J`YCye=(^Ceh4$;iHRmpvo)u>KuH~#1G(W!GKW154HV2+RIX~{uE#s0+ zFJm9CiThrx5xG$>`LNFQC&f!PpT6-!_R-CE`%-geomDgMEw-_pa(@d#uidy0WARl5&aj)(bw1{1m>KE$A+L9>X-}III5F;Dd2x?W>Fr#`$e~v2=#k!#7Glqe{yTX_uK7_Z0coS|`%I@3izb}1OcVoMJ)Q_3Dubvluells{lV>yUoE1AM`(cvzZy%{o4ku@x z5MJVRF7NV8sRPQtD~{F5u`buYX;B+B`6cr@6WQ3EA2-}9Dp9{vopPyoQKhISr}~~9 z+dgFpC8e?6*0{EAh2OdZLi+NPuWn=t_4;`u=-$z2>GXvse?6|5ddW0=*FU67jzPVK zIXN-smoqXjEW=!Qi&m;bMiEsYg*$|UG^98+C>FHv)^>B`<}zR3Ewdh#_h`Cl=843s zC}lW^G;vL|`C;bS_Vz?tSem-(|Hb>4__x~qVE(nZtK5uHkUO(X?%dqn&*z=CE&uoD z?_Zt=0`WB|8(tnhx~zRtdv4pCl_8hr7^bYM&Sl#!_?^XhpXSsZl3%Qy?tk`knRztr z&eOoz*_rPjFUj6_vL*51x05F#?iSAFxNCIh!umT~gPhEj=BYGk96uY;?zo~-U&zsX zlB4pw?>k@Jv|GIEl=Vcb!!>I(t~_(&IlGh3c8cV)dtX+DepmC}FZwmvHL7`EK-$F(03VPCZlOAszTMSf%H~hQkbPcVDHvJ-q8~i}4!6 zkbmvIy-^<>%8xGH;;p&wvCIw5;u((DoLe7e{0cP?iHqI!)}~93_h+fpbcs`w-4iu@ zm3FFD9nyOqK=OfjTvp z#BXRr{m~V9Gp4XJFdP@eTBU;{IR$CJA5x`H4Zm409WMIM?e>XFE*nFS_uW+sy=*Zp z)v`_HLdDAN$7;SZw>1kr3!=D$vz7cOq))kcqBGjdd*OxFrAt|7O{+ETZd_u*sb)XT z{Kv<0()oKngbVDmHkR_fw!={0`1w7{@7B+2(sy36|M&a2J@XHPd;SZA+jxu{1$xAu z1Wqf|Y+SK>!;!^Ghy4~Dy}MFvW6NpJn1h=tY?L2IO3wFQAnL|+`(upI{Nt&T=hZ(l z?GrfOZM1L3gN^#C7y72B)L1s=Nfg$&?uqYvEc&r7G-HpNU!7^^`iGk$>S7scoR7UO z+%wUEf8Wd_vn#w`)Og$M)0N9V`J=DpZQ9LizKe@eDz9xTS$tyd%I;T_tRl94z4Ty9 zl-$G%Mythnuda)l)iSv^-u-T>r)gKS-hmTSGDX?Dr$uRGHh)&F{ZM#yRYogkabch- zSMaWdbBlEPdIT*)w{7W4yYVh;>BS#w(t@8JJT{l1nQy{U-C5pI2id(c!*jf4w|8B2 zeN?=7D_^?TdX5)+rMHA#rDa96G7)v2`JZ{HN!R7y-PZHQSHSZLu;|K;7g&MiGGsmb2y zuE!p2i`4${Ehj!eXo`2}&Y*F58*q{UXb%V9BcK zT(dmZt_kmZ8P*zg$#UY&1s|BSOG-}fYW%lNXT{Vg+qWmovcFuMeU0_%>YbPKmQPfW zlQy50t-gCxDXVqjG_PBG4sW;EyEjE_`4-i+Prj$SZ`jll^Pt_ANuFbyq|C3Kye?N4 z-L7KaDr)-sMcd9v>q7nirrf-~Wv3^@?JG_P9c>m*TXjqA%jTHH-tWGPy}Hg(Z{_g) z-P8Z|Evt7pZ%tdjJo6gMg3H&Btcl$EY-{qaj1PTkKmH!dWNrUw#u=;raz|9;gVsd} z48LdF6@1X%lh3bV(ZVeBF7m%sUgWDy3K0k0KLkpKi|%PIPpt_*`29mItNj6f)s%P7 z%#F91hKc;)u%Gr(*zAbgyVO4+2hV>f=49VnXffyI$|uIX;uCjYuZZs4%XT36x&EdJ zqGujVX%hX@6nMZ+#qhA9%t2+x3{{?>~q)38qRTjY*5~*;?gwt z_4#?**qq;;@5$wr=FJXM?X|f4yI`B-o=Wu#%{N#tM}>46id@*{UuSUQ*V#qWPR|OA zkgw@&&iTAr)h=IZ+p6vt zEzkcUJWIXa#eIU9EI(&+OzQ{5bvIx9`=xNb-E0krp{VlHu1E3I;RcfigOqoBChca` zESFxmGn(VRLrc<>q&I9&N)7!swkADdoAft3K*i_z^$osZhzRjJ>_%HwHO|H&ZtpS zX8c6<&)WNrD<|8ZEjM~nZP@wSpgVTMMrr*=b$eQP;?z7B+Z4`Wyy@{Uu}86O`NeMc z4~GAa&rp42(I=2U>4$K|lrp(P$LFqBbY$#%B=NwbU(ods=P9oH@6Ijpf8^;kN2X?y zQ%zG?i(maN9b7LY{Xd7 zH?sE!cWt`lCS#<0D0u#v%S$3ANIpM#*~h8l#qp0UncUX}b+(-Q!7_j85yv-zHBHt^ zj=4Mk%|0Wls-MT|M2QME-Q~kE0*U4ZERP0vvmF5 zFA+1&32vFq$Jx9n)wu5Nx1aC6JPcm*{nsv0>-pa@|MIBDmBsxLUDVRrm*H%++uY@O z)bh{uhtJ)9G1L2fx5d>v`dr_{s{T)kUw>f$o$#(D{Jo%QTBx2?(8 zEAaI5jPEm~N@g%GTI6ZA&1K~})kSg(q+J{Rd)@A8$bH?kfNGA94_BJ7EuZna`1Z%vyJ4Rit#5hE*Vg^9K(o1_HE&CHaGP>+QK?VF z*B|K_HJ7hDwYBtSt^Iny?aa5x7dK_sy1bbA;#~g=)$2Y#?x;=?zQ)!k^s%qCPe!Xf zzH`OXz4l+Pby#UAwc9k_YDud1y4w4VQ(Si4+8bRZpVD5(X?cI&aaES<{?=u@5ux`! zNQLc{?OA*$(?HI!{zNwKp@a2&y#*G{bIaMIHr}@gF-U)BFC`i9C2`rg*)wye^qhNZ z+dS`#GZ$;{X5nI4?qy=j85eg8*KnRTjOl;)CF)fnn{KW1<@gs~Ota>>p6+~+#WZWC zt2XC~pXKT+zoIQcGRg_x=EcOoz{1MF;D#BeNKpuGwiT&3Cl;rA<`t*r6=#-YmZf4} zi?lQtvLM#DM(07&#BDz!k9JMd-@Wu#l0vUnl9Ni7)8+4sY;)3X=9W!Aw*OK6gQL@D z{Ya?!<}UeON7{#*P1t!x_49K(pP#cW|Mq-+{(VN14>ruq{ze`zRU2*hPT>tH*^#v% z@4?E(+O`yLOD&6}j;Y$}JRi4NPvMb{eR`t&vS)nN@kRN;D)&p-57`OMyYyQ_g5OSS z{zJ#;ADbTkxb!h}!MRlZ4^xcuc272b*m>Zq#F@Uw9_w?t?@yodVb|-bJ$Q(|7V@Fd8)YnxyEjx$U8^(Zg3H^e)3Vqqch3n zPm@mnl!Vn&HO??{A7T--Tk=_=%=wS<6sO-3=7;PoPR2R+y{ntAQ?&9vY6;Z3bc=Yp z5Cg+8Ev!8YjMNJ0S(t_ZohHBS4p#pKD%Diax+ZIt*=CQo8;YWQH}5;BFwfTOLw z!ozP;<4K%Oy7sPJ&il7t??vCP&ab{* zl^y+Z|F@Yl&-n1EeS7@)jcxV%z1MEN{`h`t`Mp0sb_%^qv?$F^+nyG?(v#gna+iOq>48H`ZTPk$eBx`000+pFSd8o>sAk zC)dX4D5sxH@9OdS`q3ogPn;*ZxPFFkHETVz z4z}Cu;=eVoOm6<+;wQliKE})yE8cqSb4k>x&!%w;3VZ!dS1k0~y2qnKEPId3^3O-P zyi0clq=|jrDRuo-$wq^iW5qQw>S31tY14jZEcdXCw-bL+qqu&;Jy)stIqt73miE6k z&71H}OK|6{v(hh|<8f(>!rrfsx*EPPNyR^VtakG{+uW$i#b3;R zR&0)VUNm?62T`lpd#)ZoyNL78#YLNsuC>dXe6p;!+2=^g8jkk08L=mNj;@ioJR@}O z#{`u-68&t4b4w@o`59*&Qx5FiZs@ztIQd#v!5OaOO&=flY0vJ^ydO7rU!L{l9o~m; zAD$SSSY5PLvN!VLu^p1zP52xi9bG>2b%Iub1fQ|!RrX}Vj&14Li*vKMs#oUrMh8}# zEGulu+#+RZFVTBcdiOS`^?b&6nAlFQx#n@@l+^Vd#;FarV$-gin$T@?;eAOU?vzdx^ETt& zDz(dMThzs^P8@;S%4gFu=1TIOdVger^*y;3sbIUr{B7wjHJ)DUTfCI??W-r8ysYUx zRy=OTToXD2@!FiA`hhgsNa`Whu4M*gjOPv9imc}4fAVrW5u=N1FLos#w4ABj-PRsowLIb6Qp3%>eqWYXFWa_RSTHYLROo(3io%x{ z9$SufsZBm1C%Tm(f9)yJlN0}#Yz<>eUbHfM_f_5-H!|CHDO^czeH{36@14AEqluNX zq^5;VT(^T|!Q!?P4s)DkUs;?;DXc4!Fi|$1(pEH2U`^w-tBLQ@X3ffVJpN(sw5BPU;aY|DCW zW*~Scc=E<6+M7}jh#G2#o~YPfwsl*V|1n;nEk0*Ec7$ol8=e2o*w%e2CE0s!xyt0j zcdjfy_aa2=Ra)_8(Qe<_*2QX+Hb8oPGV0M-~N1NR)`*&As5b>+%Bq3%P7}9m|b+ z{XfNB6@CAv>+rH^Hxs)rSma&c-*I}u;XT6gRRMP283fqPDi%s+A6F}UWc@;eyF7Ex z?FV`LE-h$(X}Ijs!oFt5HxrLZ#ixFdyQj@#uRMSC%vJ3+-(+4#Mb`En`^~d&+WN~C z`wz~4`kMEj%zcJAr~7o9^PezxG@kNg-4-PN-a^qm6G|yHKC*yCW{PiavRR6SVelPB9bD&(}kAi*5 zhr=}|9_U}TWwSrjpH*|@!FrDEIqZC=%$nsNuUBJM65g*`w{Pyl2U`PAH#N^p~FXFHB6DVKF+~Ja&qi7RMxeP3r<*>~B<3UF*htYuijkk=?Pf7HfiAEwc2h zUmrDIc05glJ^5%F2gij=v3Hu36mqtPbcyMA&fK5*VY`ZS)~hoG7DD=F(NSH>nc2Q; zc-7r!<<5$@({rr5ob~bBqsCfkAyOsWcUOHp&9?v0`m8^*QjUD}k#0Y^(L#u?RNUPC zP@%Cz+oHK|(Of}p@K(uf6i^p{I-0* zJ1={e|7SPD@`?hh7blZ;{G9W*;GoD4lj%zzKB#@R*!}0rm$E_nfA_rXJG!>dk)QKh z;hus8YtCD(2#vI_x}aO%yz5!|La|e}>m`NXOybIT{9xCHl7gA`O3b?sIEemvV6tw> zmy)H6|C_X~eqnO$=du2oZx3sZdt<307Vl9>x5#2hch zDnuBb$ojv>*JRGcvu_{Wxf5~yT7=S~Zm%$lX^)guUR>U`;b0>7|1)oIf1I0^;xt1d za_{vsGRKM?u5E4?&|_3oESZWkN49P#X|Gs z7iaukGQ>8iocA()-n!vdzzg9A>@OE3JFjeSZt9WQE}!6*rnDgU!sj&+#U?Xq-oBoq z-t&{sOmf-OJ-659rOunkJo)Rgh~vtPocn)V$?jV8(eaPf@7|-`>tyY|X`ej6?O4UZ z!}$88zNYXTqiTg4HftBIlh`8lHpN$F$>E;6M}8k@@?V;r7GS+dIcDAJS4L^=VNoCF ze=KhmS?7B7ZEZuKtkyBN2dhQ%wi)H5=&#={u3PdTX8sd%t=`#x&T`DSlytNyaJ6bF zzc9;-LoL^O3PQJk_V=i2-`{m1a!d29juYOGmsYsxyXU<>^2PP)e%b$p6T|;DuhN}T zZ5Q~@VyE__?-~~#-RgTk{X@3l&WndLBJC?@^-bD7eM{UY+qDvRy4Uwj%h2pyQvA~H z3(xF1@0PQ7h^%H8USl@d{i2`F>!mRpF3+ygd@r%{g6XVNclVk+lK9B**vUgJs_i|~ zhSiHXOXdj~97~%dps;uM3&oBb?);ah8>VN^UA&^`^r`;KC*-8QhIFkBf4N<&H#Dy+ zRL|C_Kj|OGuT5{)Klp$6i^R^Bm9G`PoM)CUw|{E>HP|uu=H@4_7CSAAvsyge?fUoN zz~IP+>P#Q!O8MTSSC79f|9;V8nR880Ro$}u3qPd%Tkl_XPM3S6{%77HPFs_Y0#CnR zUZNIhweumz{qBd4JwR(@{u9NO7-x!EC?vlCEuDO0b z(}$3w-4RTSq}J!$e*bpyDH)eD5)YsHdW77%xSR8y>8wRpgr@w~`n8g)Cr#|!tA#h6 zY<9Pwn^>0Bd;b0Fg_i9rF1+{76JK?KQF7kx2P@C%o)VoLYVY!SqxPRkwP#mcG>&}5 zIjySe>7BN}J8Pa?IqjaIS#r`d+}yCfWf}j{=>jiLrGA#`tFhUFQ=j4}#RZM%`C*&9{b2L6YuT{@%E?Z{si`QRNUdlM1vu_pMc_C}{tgnuFd5glm z)?dE7;PQgzz1Q03{$;#YCA2^Lr)3qJE&tTd)*Fr(dVZBDH{!oo&GPHvwFCQI^JLwp z`8|yfl(%J{{axMAEzoxU#I< z)2>%ApS-i`l(R6Q+SOg9UU+rPflore#4_%LI9z@> zUwy-2(*+eX;||_Pa#;7{%u)-Ljm5%yS8cf5exjcD-g(c;Q*xEgk{^A4ZOawBuJwU= z)~D#dTehs+GyNQE%{8X5>86aVud80}Y3mZ%_w8cQj79U$wZGI`Q5dd~eTCt$&^HUs zy)BM=%w}`9p5@vQ=+V)$U*qJHB!kG#GuLwTcK*A=GQISYx5E}ag`l@}4avzFJV*rDscb$4G%5WHi@j*5OrgpS7q1t#7kV)6ArvR|MhikNvHRVbR~r+ z3o@qIc9+F*vMs%<@=|rnaB zJNf~^lbJHE?CQ1MA}Adt;PEb9cv}$5h1)R|JNIh(vsF&EK6o*%;+0yp6|Ef;@tOuw(#V&`QXrlw!r-hE?2qOg;9`-RGRIY;%8kCc|!Z!{62|-@aezx_aTO#-Y33rYX)- z*f;UA)=BvXWft;WxWTsLzun&MjiwJ)98*~_IkWK||BH5xpD}ea>s;*Jgns;t{KaWf za4}~e`>lI!d2imZ{#xl}>^aLaBiFWm+NICW8A7A4=^Lbdu24Su*mlWMJ^j<2zn)Le zSjZlG#XtPO%(T7LoA^U--1echtKmhSpRcRy`7(7S-W7*MwVMF0eOG5 z|4M&*=5EjU%R;NZw{%I>`@`Fq#81DlKcLKhr}g~YmkZXvsBga3_KTlkll#lm{?ub>#n$3J+S-7@0D9uZqS{`Fz?voEBCw|Lo6me zxjpIg?aj~ogFJLDUvz$Wm~)ng+1WaChTq5PqVA|HP1i5I#xC+>$>Z-!xUBj1~Sm&+@Nt%(Yu|`bEBm(m%gygL2)TB5q^ba~6g-T+csV z;py}{V~T|PRm0iMJOUZ#9qZROm5INpS=v-)&etNSweGG3@A9P!{f~#SL{c4 zk&^oTcAuZ`kGza%4YN@eE3=;$njXGD*&jo-KQIooTu2N97-}HHw{Hi#p{G@Tb~rU%hzK#3n=Es`HlL z@7%Y1ey{lU{Qdj>G6Wp!ViUbAwQ0snbDPi2 zcFLz2GWk7&GtRus;qS_;am<;0WYD`FDyGJ za_n+h{3R$z-g*ts6V*FihEw>Q6;H36VA!@<@Uh_91urye+<)y{SnuYQcH{d?h7FQT z-nrNLHV7~-E$LHfee!vS#KQM(rrDFSYFyp!FV8A!<4lRvp0?_mVs>s(pqQQ9$9EI^ zd*)y4wcT)+S#n=T>Wr5KMmt3EW=~NwkW4GFjInf8XMIuPr^UlF@p`7z`6W;JFC1W% z@-|sCV`=?@M$3%-Xlps$FQ&bj#Kgcbhn2`Q4@vHjIRr?WpBntu548IEGXJ(ITVqd6 z*O=85dP_G|^O1VTF)3kXMqwVd|GaM_rsb5+zI*h=;vX#krn)FxJi`8AVNdxoZKvBT zKZ>8dMIpJ&GZ!c^4KkdDRQ`x6icWyq~r~6oACP(?(2FLpWyw<52HBWaPT5oB? z!SXgLd*1ymv+VLM4z04@7Wz3#_nlcvxA`6ZmUxR^b;g;?IO1E|uwRwc^E}j4Wwx9D4t9c2b z$9a7WH+1#Ao^k2Jl!Zl_uXv9A|0Qi*y-3dJ%p7~;s?)BPd5dSuzT|Uy*M892`cA6E z>*AeDzkfMZxm))R+r_u#3)Dq3m#utu{&Abl-yJ-EuSxh$Ip!JX=Qs1(l2g4GW=k*4 zn7zniZRBT(!p$`uEtcgRWeXN0b4^?D-Li${vf+sp8mANHeEP}9BfNu6!+%@ismZ-b zPcG-WGBK=G`$M*k1W|WN#gE1+Zb0V^UT(X zzqnCnAS%=zg(R>rFi3GTFj!+2>#(z8h?xXk8x~zI6)N(NkH^Po=Ch`SrfO?Fc|@6b z*KW(yHSxN9CE=v&%FayfvIfqXB54I_9WMWy->-XG9sim;H^w`B{$JUDe5Lan6;d`V zjXd-y=G)Em_xbg|=hXgqygt63y@1u;!=nAR(lpgMM`iD=@z~MruewI*?DF{^FWPcS zuJMdGeoVmZ<4=Ko4%5uPvwv91!oQ7egKbM;%~5shJ(UWN6z~7{IC5R7i09vuW!c|^ zn$2ZyCJ9XIU3AJ|(#4X}y^QeE z-&&Rw)zU5eaJ^M>+s*5*CvUr0^i}Jvn$aaOZeK2myxkWp)VgJN3;7ylZ42vLuX)ww z$&oom;iej#I=-G&i{$b&pU&D9w0_pARLcoWdvAQnC_YlszS2b1Wh1X#&kdequLOIg zWt&5C(`U-hnf&5hnf#o?uRcE9d2Q{ilEWq5({}8iTIHClB=V6r)qM5TlA~ptem|D} zd~^Bj6PgFted9fGbLYhS;oN0PkHi8`y_#NGywrN}+ozA}V-LlJ>sPKSkomUR?X1p< za?^Q-GU~-^8*fkgXr?CN=6`GIN4;gKXA+{0Bul<`7MwUe&>`@cPJG}-?L|kzzqw=> zef#L*66waei^JJX^!_P>gX<6IN}6TyOek!4pK_(}#O(Ky=Sq%hD>qeIH7?hR7WpP^ z@b=of3tlsqM!8;n`>iu=MqTXIg+)=y?)9MwZ;o%vj=z2Q=s%N{8!oJV6cYSMQQ+Mg2FbIJ!t-`SA2L|5b(O}{ zz3X^oubMuX7Iaee-?gi|-r5}If01}~uIKmQ8~a`@S|)z=*D*ck4dSAi|5UDI{O;XQ zT|WP4+J#jgoqfMmXuK>ruWl*vPE|bZ#s8z0ANo0t<=bw3e?iC3_O9=bXY-r?Ix+LB zt^8_zNMYH5_iGn7eBQyp!KBaYXMg!sdZoYB`^0srTfAyiM5QHe{JWmbz_fKTOG($h=YmT8G6zd8M+r9?b6xD8c#k7=fx+9f zJrxtgzW-CN{2n~NyJ$ZzkC}pOVn^cNOM)qF^EWs(8}sNt+^Olq(fE+fO{~`O{&|V+ zizQz;lH+phkE)#A;~3A}$f@{3SkC-ISoX{J;v3VS9DKcCpTm@<8yted+(!x857wVqs65kWBn#E8t^T-mm=6q-C$Tm==tSea0bzW`Xj|H7x6I^P) zxl5jZqqkJVvu5U8%inX~*W9->-v7VmKVyQRwq{B|K%kFelN0|hgZYb|wsp<+)LAm2 zVcs!a9ks&fy7wz~Xz&02WYdyI`?}bK)?E5G6Oxd^0(!GBb^gY!CG=z6)cl%qz-vj zuDfVtd(ot_p@cK)Q1ULDyMCrqKDrzZ*39_!|5#;PYI61T>nqx)pX+OT*P(rnb#L_Y zscqWs8}C{EQkYueo|zEa%=Q1|zSW0fvrYEa`dpfQ@x-Qs%I%f@5h#&xvb_i{b@;zgfKgg4TUS4P`e=+)Y}O@Rc`ko=>x9*kMY^K< z73&)AC97=P^h$A(eRsTowG+Q&t$bUs`okB3mjwSbeOnn@IK!kVYh}pd`9F@YbdnR6 zy*mBY)#I9SA6LIFne_kBq_0zS9x+d)hDssGWc z?`;P*6whE{U|7Y*z(B%@JC5avMXBH{QiQZPeQoGlKjAjI%{x(W{`|fA&uJyZr+*56KBm=@i7JfONgDOX z{hIbVXU{XG0PWv*mhaj3)?(R>EjypjY>DQ3Yjk^l=fCV-C+~;(erl=jNR*hV_4Z@< zo`mA%ff~OL6-lk$xtu3ZPioV{c`Cg}O3PxNcPLb@x%vC=i$m6S_m+i~MK4Ori~Lk! zWcwm=sIr1w$^~Bz3KZjx9L3m`83Sb!#Ha7B7Gw!|h;=8r=Xz;lSivRV~S3Z(smQXt!Zf$)(?(UAKvb<$FIoBoG zD);_-(5qk3dRzCi{^^jHOJ4;ppZPiJ?3X=n4#~vc=@gD}-MK{n-|4BgFK_02Uom%H zwvEj4o{Q2!%gfVQ4i_D5EQ!*ybzKyY(chM_>G=t5K_xGdEsHV1o+JaW6W!7na(w^)*Pk?h1>!ZD{VlRK> zqqbPpSv+_o85tPTL4y&P1wYb21ll6K-audP!wv$qm#-bWvVN8Nc_kArxAnf79)TiE zT%7l|zucAMX};<1lz(RY{}}40?C@R4GTBLd+T6;`@AoD?d;a~k^a0y*vL&)}gyyIO zbFbrFc%OTD&cQ!ZG#~DlSa;LnsUv5#tbb(^(|RsJtNzn1^-?oe?rb)0lJ;CUXv(?t*3QkZWtW)k zTQ~PuyNK)UaM7gBpo|v>s@1=+74FZnSu;0i%2TU{{zhAWiL36sSj8E0e_ce&>9^-* zuWxx*&wc1@*`sE$pFP@}Z-&fv$Ve!irRyZDJNxa;HK`xXWTZ_u#E0CO<#jxauOQ+` z_7jfdN2@=vqlT~ZMX_hEm>C#qxEY8$#?C1Q`@Mw03<8WNap|;^*;j;L5r+1#WeZKSjyy|cNzP``bXIN4ge=PFM+(l_| zZW6^Zl9EDVsX7m$7`%6!``9@{r+5v=4PHs3Fw1Lf8_pF5A7itdYS`i1H>XEno5d%Y z?UKLVzW-WP(w3-`9yK-OXi)j;s8umngCjSG1^3>alb5kQboIJONhkeRZF%~JYy7JY z$GclRyK5GcmNnb))U4f7hb0qlWL@!^RqC!AdQ#}r%+#&B)<*4`Vct7w!D)fGi|=Mn z+qNvaboT`f(^?}9>9&QDH&;ci&t{qZSw-^gs}4UY4S9#%9hWP`+8i%y+*pj>+ z%z~P822c9#Mr_HLyngPalEwpb8)+| ztWD<=zB{b5p7Hwz?`6xgl*^i|b-$-D#%$*d9`{B@9{&4YT9upqtY1E>Hd3CoB_mC8 z(cU-76S&h3YX#5hkdb+Jz=wOM=$ykHO^JI>7q%-_DoGxFwwr6)N7m!hxw>v7c2sz- zcp$egX~y@SlIs;2Gx~e=PFB2Ip~@<5pcYYkTEO1wz^2g5D<+5IYJ;xLn*Yz~a_W+q zVkfm8D&?JXRXg@t_%hGI=09hq%Vng_er(OE<)xG~^Wm!J6V3W9E);xceK`Hq;i=}U z+4l+*IqTVn>Ap2sxL7RBzwY46xXR_PH+DX5k}#{*a@rfG6{$OYYn82)x%2ebLHyq< zPxW5i@HOkzv|k<~=bhQKQbff65TJkMC0KgLPCDVQ*?(fQTC8@^Mw z2UOk4E?GZ|Ek4~zbWYIbWs`J1$5!04j$0YFm9PA!_M^X_*-~#jzW(4Z|BwAgeCHg| zR{FAl>#}tZYxAN#mrwA|U-9kB_p1Ih^UQb4<2sMlx9W@ezd88d_-t*({9BikdSjjC zqn#RJkF%#pFEc%qJf-HaWZ+g+>BH_jZhZM=&UV3l?>7FG68;-(zvLdRYkV*HOZfc3 z+8@l!+?P7Os9#$5^NK&mTW7svnNl5sah1FJ4<^0X^2Pd`$lsHPj9I=Ld?}J&xah`2 znTc_V_bYELs+ODcBCYckZ_lRCo{gp|H>`|un(uX6w*O0=9?`w8jnhXc*KpRNiTsRj z6xvv1Yo|Q%Op*!`P>I}Cxx;3y$CYAt-LeH?`wI?>%{4F-zr=Oe(EF8fddbO)-aUGc zlb=_|6xPHXZNIX9LRtDt%bIyQfv=9MevFB8m=kwY^Oc}XLxo@$>9p5j2a&e(H&*9v-F7YO zs_Pw7Ct;T(t>P2J#Dnw$Ue?W$yF5`vSnaaYJ+JU4ZutZ4Lh6|UD;Uvcr=3w#pHQ}s5m zc!%lm{s@VvIJv0n>;5TeF03I77+jf_xG#`75^y|Rr13GMSMIuux=m}3$(vX@zRjvw zyY=D*-nEyD4`+Nk!<&9(=At|Si$BLrSKf|Ypy$>oa%Ix~;tlMUUmpaXX6ih*=+P#r znOeeAi+}4o%nMED^wN5kypbfH{9cTI^dzODPq{^z4^HM<|| zKinnH=iW?dX34xXPyYLy_chPw+`eC5_m8Q7;kcXBZ8M`A=N4r>mMqh`qHDr2Gko5c zMb}$G_7*xBSRcHgx~9zbrCeOk?{&X4e%V!f-Az#VBfQWi*MCl0$+Om$I~xNQuCAKc zabxCxg{7;l_Hj5*=a0VY&^u?Mkn{agwtde_nm$IHSh#M&lhEatZnqqYsCl_NXXT3* z!B1ExZHRk6uiN|K52wC4aZ7A0v&{WCBlY)id2*Uw(n){Bq1`ygY4Zke4Ygmq_m&rT zozdLAW3FV*vkBZ!5*5~;*f;n3`AfxIPeM*uTW-0a(h#s(zs)M+b*}XM`NFII3;S-U zKG-U?cqx*{!9_;nAD6N4cW9d`Z(O0lbZ1&P4lblv+L0YvklL<2~9e13?$_VU+IaSTZkl!Q8I4Arj1y-)A9+tZwq)(hC!a*-e=(hJGH-#xj_!8a4 z0{{G?IhHVbu^zEbN?oA4g;j7vg=VN;kVb^-#rTCsq%=4uvwak;VYttnUJ@{~@Xzy} z{zuLh?>xCgyk{RWcsze|b=mit^zzNSE%*Pc`OT-$<|=r$!{m~X=*6ZEk4=vHZ3W&3 zHZ6mLpp?^!qZzeeo4}&YS^RGB5oVqagQg4h(`ia$*s*}#Hp15S+ zM#b#anxVmBDT^0K6mF6WH@&*&xWU!eIsewTs>bPV?EbE6STX6>szVnJv3`!0*6#Wk z5_fX)k@=T47u9*1h3wPkvb_`Aw(UrEu%TM$;)JwqVOOwB^>GT2W2kLsd6tM@_zeYT}+XPn9Gu z2W*Ym6LabLn@UNupAV+>%J|1f%GFxl-K4(U%gI|j_3FO8xx(TfCNT26PXF|9?&0pp zqYtaZpL+M`H}f<`oipr^tUY}~YqHk*8GS~x8RyAy*3Fsxz z-Q@6xmA6(+oAE|k`1YoSGQBG&C4G2O>-6+pjB9B30kcI`lQ%~$4dCc;Tj|xFmC<9j z+jzT=sr<~z=0C4kC@tVSr>W-b|8j!I`Zb!7#s70wdK_TtU8THmuYKO0IgZxT&drRP zUA}EuV7T_J_rW~#UWvw?-DC0fU-+v->W;4Bf1gIbusu*?XLq5a`0&x^(q~_r-SWMB z&o_jt&F-)$Piy(Ze;(E1w=#2|S#9gE`ok>Fc5%mM_QILB-yYm8!!_CC)%|d@ zw=z2(J#Tnp8FiC;%jQ&;8``JmE~vfOXw4bpZoc#2PU#~%HC|{i?cm(rzxaz-tWR*p zpCt)L&+sL$d)|K2iE~E*gWiJv2Q!Ycu3cd%?sVf+*8Wz*25uL14d%079=%dI8M^4Y zjZ#&|q3aInNfM0pSw{ub_fCHi`@ubmM_5igt#g^pqj=O@nR%}kynDulfq_Aufq@~t zC^01`RW~^&Q#Uy$vADQ^oErd)0x)j?c#3lFKO>U}GZzB`2M2@Jrm5}>3=9mgQ|~!J z955DOcU#RQ=?8eDYP&vlMiA)OqEH3~1`&`-C|=U2$-uw>*XHZ!=jrAe z9HQszhHL`pZY7Wgn8hq04j2nC{B@iKt@!Zi;0CJ(5g;A=pgJHSh~ijutAs&%z!;?Q zC8|c~1q>JlWHhCuEoNX~xC*_m0j!^afniG{=)wdn_k+Q1g;FtubkWega`ay7H4!ET zh5{A_22k`P3HgPqZBeEue)cohQ!!)*r0rJV>H z%UDp>8NkB86Xpl(K44i{bvK5IfgzBEfk78$7&u50Sh1St4o=;n#i>Oe`MIe+`T5zU z1=x+2c@%s5C@TZQFD}T0FwFEVjqDs)jdra_&M7U`jPEIW@_D#%7OiwMs zZowJeI5*G<+CSOQ-7d?A)q-%ywQtV(d1;yHrA3J)@H^hngE_EPHuEV91A`jd@MQX|FO!y&@F;!v#Sp+vn3j{6SL^8)(qc?er-!?D_ezr;bmGzVea)p&U22`#;_ zR-nl415IwBUwVTuFfD-r=M^~Urhs&TvLuKGT^j?U85ls7OI9L^FF^~q&^1p$yF><} z8C0>%#MBI1CV2%ArPY>9HhxTbeE$aYJ{-mWfO)iVAGIi)3Lj}8|^>^gqa7sFwG?X zcm;H~8=-CTN7ys92g4rB!o!)EE&u3twxVs-M%d}xPuNcE;f21Y7GdVQg@nz-vceYK z(dY{p5f(gJLWBj7qy#CYu=@ag`53~2qsuWYfYlPj4E&<|1bw*&!lJr07#3k9SBOWj zdjWk49bt0IdJK~>6BEQ}>=WwfoQZO489Rir*Ei!c7931PDtPAa z&^>@Yu8*)udIyG0==}$zup{6h^s#1yHScy}Sc7CN)I$&oPuYh)=!h`ibq^8d;~s)U m53fwLaTJ6_xA(&vhPXym0=!w-KpG4g3>gFj85rUYfOr7Y2S`i+ literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/lite/java/demo/gradle/wrapper/gradle-wrapper.properties b/tensorflow/contrib/lite/java/demo/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000000..fa7a38a0e4 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,6 @@ +#Thu Sep 28 09:01:41 PDT 2017 +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-3.3-all.zip diff --git a/tensorflow/contrib/lite/java/demo/gradlew b/tensorflow/contrib/lite/java/demo/gradlew new file mode 100755 index 0000000000..9d82f78915 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/gradlew @@ -0,0 +1,160 @@ +#!/usr/bin/env bash + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS="" + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn ( ) { + echo "$*" +} + +die ( ) { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; +esac + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin, switch paths to Windows format before running java +if $cygwin ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=$((i+1)) + done + case $i in + (0) set -- ;; + (1) set -- "$args0" ;; + (2) set -- "$args0" "$args1" ;; + (3) set -- "$args0" "$args1" "$args2" ;; + (4) set -- "$args0" "$args1" "$args2" "$args3" ;; + (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules +function splitJvmOpts() { + JVM_OPTS=("$@") +} +eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS +JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME" + +exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@" diff --git a/tensorflow/contrib/lite/java/demo/gradlew.bat b/tensorflow/contrib/lite/java/demo/gradlew.bat new file mode 100644 index 0000000000..8a0b282aa6 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/gradlew.bat @@ -0,0 +1,90 @@ +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS= + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto init + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto init + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:init +@rem Get command-line arguments, handling Windowz variants + +if not "%OS%" == "Windows_NT" goto win9xME_args +if "%@eval[2+2]" == "4" goto 4NT_args + +:win9xME_args +@rem Slurp the command line arguments. +set CMD_LINE_ARGS= +set _SKIP=2 + +:win9xME_args_slurp +if "x%~1" == "x" goto execute + +set CMD_LINE_ARGS=%* +goto execute + +:4NT_args +@rem Get arguments from the 4NT Shell from JP Software +set CMD_LINE_ARGS=%$ + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/tensorflow/contrib/lite/java/demo/settings.gradle b/tensorflow/contrib/lite/java/demo/settings.gradle new file mode 100644 index 0000000000..e7b4def49c --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/settings.gradle @@ -0,0 +1 @@ +include ':app' diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/DataType.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/DataType.java new file mode 100644 index 0000000000..d63c299589 --- /dev/null +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/DataType.java @@ -0,0 +1,76 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite; + +/** Type of elements in a {@link TfLiteTensor}. */ +enum DataType { + /** 32-bit single precision floating point. */ + FLOAT32(1), + + /** 32-bit signed integer. */ + INT32(2), + + /** 8-bit unsigned integer. */ + UINT8(3), + + /** 64-bit signed integer. */ + INT64(4), + + /** A {@link ByteBuffer}. */ + BYTEBUFFER(999); + + private final int value; + + DataType(int value) { + this.value = value; + } + + /** Corresponding value of the kTfLite* enum in the TensorFlow Lite CC API. */ + int getNumber() { + return value; + } + + /** Converts an integer to the corresponding type. */ + static DataType fromNumber(int c) { + for (DataType t : values) { + if (t.value == c) { + return t; + } + } + throw new IllegalArgumentException( + "DataType " + c + " is not recognized in Java (version " + TensorFlowLite.version() + ")"); + } + + /** Returns byte size of the type. */ + int elemByteSize() { + switch (this) { + case FLOAT32: + return 4; + case INT32: + return 4; + case UINT8: + return 1; + case INT64: + return 8; + case BYTEBUFFER: + return 1; + } + throw new IllegalArgumentException("DataType " + this + " is not supported yet"); + } + + // Cached to avoid copying it + private static final DataType[] values = values(); +} diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java new file mode 100644 index 0000000000..dd883d69d2 --- /dev/null +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java @@ -0,0 +1,172 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite; + +import java.io.File; +import java.nio.MappedByteBuffer; +import java.util.HashMap; +import java.util.Map; +import javax.validation.constraints.NotNull; + +/** + * Driver class to drive model inference with TensorFlow Lite. + * + *

A {@code Interpreter} encapsulates a pre-trained TensorFlow Lite model, in which operations + * are executed for model inference. + * + *

For example, if a model takes only one input and returns only one output: + * + *

{@code
+ * try (Interpreter interpreter = new Interpreter(file_of_a_tensorflowlite_model)) {
+ *   interpreter.run(input, output);
+ * }
+ * }
+ * + *

If a model takes multiple inputs or outputs: + * + *

{@code
+ * Object[] inputs = {input0, input1, ...};
+ * Map map_of_indices_to_outputs = new HashMap<>();
+ * float[][][] ith_output = new float[3][2][4];
+ * map_of_indices_to_outputs.put(i, ith_output);
+ * try (Interpreter interpreter = new Interpreter(file_of_a_tensorflowlite_model)) {
+ *   interpreter.runForMultipleInputsOutputs(inputs, map_of_indices_to_outputs);
+ * }
+ * }
+ * + *

Orders of inputs and outputs are determined when converting TensorFlow model to TensorFlowLite + * model with Toco. + * + *

WARNING:Instances of a {@code Interpreter} is not thread-safe. A {@code + * Interpreter} owns resources that must be explicitly freed by invoking {@link #close()} + */ +public final class Interpreter implements AutoCloseable { + + /** + * Initializes a {@code Interpreter} + * + * @param modelFile: a File of a pre-trained TF Lite model. + */ + public Interpreter(@NotNull File modelFile) { + if (modelFile == null) { + return; + } + wrapper = new NativeInterpreterWrapper(modelFile.getAbsolutePath()); + } + + /** + * Initializes a {@code Interpreter} with a {@code MappedByteBuffer} to the model file. + * + *

The {@code MappedByteBuffer} should remain unchanged after the construction of a {@code + * Interpreter}. + */ + public Interpreter(@NotNull MappedByteBuffer mappedByteBuffer) { + wrapper = new NativeInterpreterWrapper(mappedByteBuffer); + } + + /** + * Runs model inference if the model takes only one input, and provides only one output. + * + * @param input an array or multidimensional array, or a {@link ByteBuffer} of primitive types + * including int, float, long, and byte. {@link ByteBuffer} is the preferred way to pass large + * input data. When {@link ByteBuffer} is used, its content should remain unchanged until + * model inference is done. + * @param output a multidimensional array of output data. + */ + public void run(@NotNull Object input, @NotNull Object output) { + Object[] inputs = {input}; + Map outputs = new HashMap<>(); + outputs.put(0, output); + runForMultipleInputsOutputs(inputs, outputs); + } + + /** + * Runs model inference if the model takes multiple inputs, or returns multiple outputs. + * + * @param inputs an array of input data. The inputs should be in the same order as inputs of the + * model. Each input can be an array or multidimensional array, or a {@link ByteBuffer} of + * primitive types including int, float, long, and byte. {@link ByteBuffer} is the preferred + * way to pass large input data. When {@link ByteBuffer} is used, its content should remain + * unchanged until model inference is done. + * @param outputs a map mapping output indices to multidimensional arrays of output data. It only + * needs to keep entries for the outputs to be used. + */ + public void runForMultipleInputsOutputs( + @NotNull Object[] inputs, @NotNull Map outputs) { + if (wrapper == null) { + throw new IllegalStateException("The Interpreter has already been closed."); + } + Tensor[] tensors = wrapper.run(inputs); + if (outputs == null || tensors == null || outputs.size() > tensors.length) { + throw new IllegalArgumentException("Outputs do not match with model outputs."); + } + final int size = tensors.length; + for (Integer idx : outputs.keySet()) { + if (idx == null || idx < 0 || idx >= size) { + throw new IllegalArgumentException( + String.format("Invalid index of output %d (should be in range [0, %d))", idx, size)); + } + tensors[idx].copyTo(outputs.get(idx)); + } + } + + /** + * Resizes idx-th input of the native model to the given dims. + * + *

IllegalArgumentException will be thrown if it fails to resize. + */ + public void resizeInput(int idx, @NotNull int[] dims) { + if (wrapper == null) { + throw new IllegalStateException("The Interpreter has already been closed."); + } + wrapper.resizeInput(idx, dims); + } + + /** + * Gets index of an input given the op name of the input. + * + *

IllegalArgumentException will be thrown if the op name does not exist in the model file used + * to initialize the {@link Interpreter}. + */ + public int getInputIndex(String opName) { + if (wrapper == null) { + throw new IllegalStateException("The Interpreter has already been closed."); + } + return wrapper.getInputIndex(opName); + } + + /** + * Gets index of an output given the op name of the output. + * + *

IllegalArgumentException will be thrown if the op name does not exist in the model file used + * to initialize the {@link Interpreter}. + */ + public int getOutputIndex(String opName) { + if (wrapper == null) { + throw new IllegalStateException("The Interpreter has already been closed."); + } + return wrapper.getOutputIndex(opName); + } + + /** Release resources associated with the {@code Interpreter}. */ + @Override + public void close() { + wrapper.close(); + wrapper = null; + } + + NativeInterpreterWrapper wrapper; +} diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java new file mode 100644 index 0000000000..1939a078ad --- /dev/null +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java @@ -0,0 +1,276 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite; + +import java.lang.reflect.Array; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.MappedByteBuffer; +import java.util.HashMap; +import java.util.Map; + +/** + * A wrapper wraps native interpreter and controls model execution. + * + *

WARNING: Resources consumed by the {@code NativeInterpreterWrapper} object must be + * explicitly freed by invoking the {@link #close()} method when the {@code + * NativeInterpreterWrapper} object is no longer needed. + */ +final class NativeInterpreterWrapper implements AutoCloseable { + + NativeInterpreterWrapper(String modelPath) { + errorHandle = createErrorReporter(ERROR_BUFFER_SIZE); + modelHandle = createModel(modelPath, errorHandle); + interpreterHandle = createInterpreter(modelHandle); + } + + /** + * Initializes a {@code NativeInterpreterWrapper} with a {@code MappedByteBuffer}. The + * MappedByteBuffer should not be modified after the construction of a {@code + * NativeInterpreterWrapper}. + */ + NativeInterpreterWrapper(MappedByteBuffer mappedByteBuffer) { + modelByteBuffer = mappedByteBuffer; + errorHandle = createErrorReporter(ERROR_BUFFER_SIZE); + modelHandle = createModelWithBuffer(modelByteBuffer, errorHandle); + interpreterHandle = createInterpreter(modelHandle); + } + + /** Releases resources associated with this {@code NativeInterpreterWrapper}. */ + @Override + public void close() { + delete(errorHandle, modelHandle, interpreterHandle); + errorHandle = 0; + modelHandle = 0; + interpreterHandle = 0; + modelByteBuffer = null; + inputsIndexes = null; + outputsIndexes = null; + } + + /** Sets inputs, runs model inference and returns outputs. */ + Tensor[] run(Object[] inputs) { + if (inputs == null || inputs.length == 0) { + throw new IllegalArgumentException("Invalid inputs. Inputs should not be null or empty."); + } + int[] dataTypes = new int[inputs.length]; + Object[] sizes = new Object[inputs.length]; + int[] numsOfBytes = new int[inputs.length]; + for (int i = 0; i < inputs.length; ++i) { + DataType dataType = dataTypeOf(inputs[i]); + dataTypes[i] = dataType.getNumber(); + if (dataType == DataType.BYTEBUFFER) { + ByteBuffer buffer = (ByteBuffer) inputs[i]; + if (buffer.order() != ByteOrder.nativeOrder()) { + throw new IllegalArgumentException( + "Invalid ByteBuffer. It shoud use ByteOrder.nativeOrder()."); + } + numsOfBytes[i] = buffer.limit(); + sizes[i] = getInputDims(interpreterHandle, i, numsOfBytes[i]); + } else if (isNonEmptyArray(inputs[i])) { + int[] dims = shapeOf(inputs[i]); + sizes[i] = dims; + numsOfBytes[i] = dataType.elemByteSize() * numElements(dims); + } else { + throw new IllegalArgumentException( + String.format( + "%d-th element of the %d inputs is not an array or a ByteBuffer.", + i, inputs.length)); + } + } + long[] outputsHandles = + run(interpreterHandle, errorHandle, sizes, dataTypes, numsOfBytes, inputs); + if (outputsHandles == null || outputsHandles.length == 0) { + throw new IllegalStateException("Interpreter has no outputs."); + } + Tensor[] outputs = new Tensor[outputsHandles.length]; + for (int i = 0; i < outputsHandles.length; ++i) { + outputs[i] = Tensor.fromHandle(outputsHandles[i]); + } + return outputs; + } + + /** Resizes dimensions of a specific input. */ + void resizeInput(int idx, int[] dims) { + resizeInput(interpreterHandle, errorHandle, idx, dims); + } + + void setUseNNAPI(boolean useNNAPI) { + useNNAPI(interpreterHandle, useNNAPI); + } + + /** Gets index of an input given its name. */ + int getInputIndex(String name) { + if (inputsIndexes == null) { + String[] names = getInputNames(interpreterHandle); + inputsIndexes = new HashMap<>(); + if (names != null) { + for (int i = 0; i < names.length; ++i) { + inputsIndexes.put(names[i], i); + } + } + } + if (inputsIndexes.containsKey(name)) { + return inputsIndexes.get(name); + } else { + throw new IllegalArgumentException( + String.format( + "%s is not a valid name for any input. The indexes of the inputs are %s", + name, inputsIndexes.toString())); + } + } + + /** Gets index of an output given its name. */ + int getOutputIndex(String name) { + if (outputsIndexes == null) { + String[] names = getOutputNames(interpreterHandle); + outputsIndexes = new HashMap<>(); + if (names != null) { + for (int i = 0; i < names.length; ++i) { + outputsIndexes.put(names[i], i); + } + } + } + if (outputsIndexes.containsKey(name)) { + return outputsIndexes.get(name); + } else { + throw new IllegalArgumentException( + String.format( + "%s is not a valid name for any output. The indexes of the outputs are %s", + name, outputsIndexes.toString())); + } + } + + static int numElements(int[] shape) { + if (shape == null) { + return 0; + } + int n = 1; + for (int i = 0; i < shape.length; i++) { + n *= shape[i]; + } + return n; + } + + static boolean isNonEmptyArray(Object o) { + return (o != null && o.getClass().isArray() && Array.getLength(o) != 0); + } + + /** Returns the type of the data. */ + static DataType dataTypeOf(Object o) { + if (o != null) { + Class c = o.getClass(); + while (c.isArray()) { + c = c.getComponentType(); + } + if (float.class.equals(c)) { + return DataType.FLOAT32; + } else if (int.class.equals(c)) { + return DataType.INT32; + } else if (byte.class.equals(c)) { + return DataType.UINT8; + } else if (long.class.equals(c)) { + return DataType.INT64; + } else if (ByteBuffer.class.isInstance(o)) { + return DataType.BYTEBUFFER; + } + } + throw new IllegalArgumentException("cannot resolve DataType of " + o.getClass().getName()); + } + + /** Returns the shape of an object as an int array. */ + static int[] shapeOf(Object o) { + int size = numDimensions(o); + int[] dimensions = new int[size]; + fillShape(o, 0, dimensions); + return dimensions; + } + + static int numDimensions(Object o) { + if (o == null || !o.getClass().isArray()) { + return 0; + } + if (Array.getLength(o) == 0) { + throw new IllegalArgumentException("array lengths cannot be 0."); + } + return 1 + numDimensions(Array.get(o, 0)); + } + + static void fillShape(Object o, int dim, int[] shape) { + if (shape == null || dim == shape.length) { + return; + } + final int len = Array.getLength(o); + if (shape[dim] == 0) { + shape[dim] = len; + } else if (shape[dim] != len) { + throw new IllegalArgumentException( + String.format("mismatched lengths (%d and %d) in dimension %d", shape[dim], len, dim)); + } + for (int i = 0; i < len; ++i) { + fillShape(Array.get(o, i), dim + 1, shape); + } + } + + private static final int ERROR_BUFFER_SIZE = 512; + + private long errorHandle; + + private long interpreterHandle; + + private long modelHandle; + + private int inputSize; + + private MappedByteBuffer modelByteBuffer; + + private Map inputsIndexes; + + private Map outputsIndexes; + + private static native String[] getInputNames(long interpreterHandle); + + private static native String[] getOutputNames(long interpreterHandle); + + private static native void resizeInput( + long interpreterHandle, long errorHandle, int inputIdx, int[] dims); + + private static native void useNNAPI(long interpreterHandle, boolean state); + + private static native long createErrorReporter(int size); + + private static native long createModel(String modelPathOrBuffer, long errorHandle); + + private static native long createModelWithBuffer(MappedByteBuffer modelBuffer, long errorHandle); + + private static native long createInterpreter(long modelHandle); + + private static native long[] run( + long interpreterHandle, + long errorHandle, + Object[] sizes, + int[] dtypes, + int[] numsOfBytes, + Object[] values); + + private static native void delete(long errorHandle, long modelHandle, long interpreterHandle); + + private static native int[] getInputDims(long interpreterHandle, int inputIdx, int numBytes); + + static { + TensorFlowLite.init(); + } +} diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Tensor.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Tensor.java new file mode 100644 index 0000000000..54ace6c63c --- /dev/null +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Tensor.java @@ -0,0 +1,71 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite; + +import java.util.Arrays; + +/** + * A typed multi-dimensional array used in Tensorflow Lite. + * + *

The native handle of a {@code Tensor} belongs to {@code NativeInterpreterWrapper}, thus not + * needed to be closed here. + */ +final class Tensor { + + static Tensor fromHandle(long nativeHandle) { + return new Tensor(nativeHandle); + } + + /** Reads Tensor content into an array. */ + T copyTo(T dst) { + if (NativeInterpreterWrapper.dataTypeOf(dst) != dtype) { + throw new IllegalArgumentException( + String.format( + "Cannot convert an TensorFlowLite tensor with type %s to a Java object of " + + "type %s (which is compatible with the TensorFlowLite type %s)", + dtype, dst.getClass().getName(), NativeInterpreterWrapper.dataTypeOf(dst))); + } + int[] dstShape = NativeInterpreterWrapper.shapeOf(dst); + if (!Arrays.equals(dstShape, shapeCopy)) { + throw new IllegalArgumentException( + String.format( + "Shape of output target %s does not match with the shape of the Tensor %s.", + Arrays.toString(dstShape), Arrays.toString(shapeCopy))); + } + readMultiDimensionalArray(nativeHandle, dst); + return dst; + } + + final long nativeHandle; + final DataType dtype; + final int[] shapeCopy; + + private Tensor(long nativeHandle) { + this.nativeHandle = nativeHandle; + this.dtype = DataType.fromNumber(dtype(nativeHandle)); + this.shapeCopy = shape(nativeHandle); + } + + private static native int dtype(long handle); + + private static native int[] shape(long handle); + + private static native void readMultiDimensionalArray(long handle, Object value); + + static { + TensorFlowLite.init(); + } +} diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java new file mode 100644 index 0000000000..711638a9f9 --- /dev/null +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java @@ -0,0 +1,44 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite; + +/** Static utility methods loading the TensorFlowLite runtime. */ +public final class TensorFlowLite { + + private static final String LIBNAME = "tensorflowlite_jni"; + + private TensorFlowLite() {} + + /** Returns the version of the underlying TensorFlowLite runtime. */ + public static native String version(); + + /** + * Load the TensorFlowLite runtime C library. + */ + static boolean init() { + try { + System.loadLibrary(LIBNAME); + return true; + } catch (UnsatisfiedLinkError e) { + System.err.println("TensorFlowLite: failed to load native library: " + e.getMessage()); + return false; + } + } + + static { + init(); + } +} diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/package-info.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/package-info.java new file mode 100644 index 0000000000..68e6a0f578 --- /dev/null +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/package-info.java @@ -0,0 +1,17 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +/** Defines classes to load and execute TensorFlowLite models. */ +package org.tensorflow.lite; diff --git a/tensorflow/contrib/lite/java/src/main/native/BUILD b/tensorflow/contrib/lite/java/src/main/native/BUILD new file mode 100644 index 0000000000..9c172a1f68 --- /dev/null +++ b/tensorflow/contrib/lite/java/src/main/native/BUILD @@ -0,0 +1,70 @@ +# Description: +# Java Native Interface (JNI) library intended for implementing the +# TensorFlow Lite Java API using the TensorFlow Lite CC library. + +package(default_visibility = ["//visibility:public"]) + +load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts") + +licenses(["notice"]) # Apache 2.0 + +cc_library( + name = "native_framework_only", + srcs = [ + "exception_jni.cc", + "nativeinterpreterwrapper_jni.cc", + "tensor_jni.cc", + "tensorflow_lite_jni.cc", + ], + hdrs = [ + "exception_jni.h", + "nativeinterpreterwrapper_jni.h", + "tensor_jni.h", + "tensorflow_lite_jni.h", + ], + copts = tflite_copts(), + linkopts = [ + "-lm", + "-ldl", + ], + deps = [ + "//tensorflow/contrib/lite:context", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite:schema_fbs_version", + ], + alwayslink = 1, +) + +# This includes all ops. If you want a smaller binary, you should copy and +# modify builtin_ops_jni.cc. You should then link your binary against both +# ":native_framework_only" and your own version of ":native_builtin_ops". +cc_library( + name = "native", + srcs = [ + "builtin_ops_jni.cc", + ], + copts = tflite_copts(), + deps = [ + ":native_framework_only", + "//tensorflow/contrib/lite/kernels:builtin_ops", + ], + alwayslink = 1, +) + +exports_files( + [ + "version_script.lds", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/lite/java/src/main/native/builtin_ops_jni.cc b/tensorflow/contrib/lite/java/src/main/native/builtin_ops_jni.cc new file mode 100644 index 0000000000..cce356370f --- /dev/null +++ b/tensorflow/contrib/lite/java/src/main/native/builtin_ops_jni.cc @@ -0,0 +1,29 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/kernels/register.h" + +namespace tflite { + +// The JNI code in interpreter_jni.cc expects a CreateOpResolver() function in +// the tflite namespace. This one instantiates a BuiltinOpResolver, with all the +// builtin ops. For smaller binary sizes users should avoid linking this in, and +// should provide a custom make CreateOpResolver() instead. +std::unique_ptr CreateOpResolver() { // NOLINT + return std::unique_ptr( + new tflite::ops::builtin::BuiltinOpResolver()); +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/java/src/main/native/exception_jni.cc b/tensorflow/contrib/lite/java/src/main/native/exception_jni.cc new file mode 100644 index 0000000000..1578c9e3dd --- /dev/null +++ b/tensorflow/contrib/lite/java/src/main/native/exception_jni.cc @@ -0,0 +1,66 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include + +#include "tensorflow/contrib/lite/java/src/main/native/exception_jni.h" + +const char kIllegalArgumentException[] = "java/lang/IllegalArgumentException"; +const char kIllegalStateException[] = "java/lang/IllegalStateException"; +const char kNullPointerException[] = "java/lang/NullPointerException"; +const char kIndexOutOfBoundsException[] = "java/lang/IndexOutOfBoundsException"; +const char kUnsupportedOperationException[] = + "java/lang/UnsupportedOperationException"; + +void throwException(JNIEnv* env, const char* clazz, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + const size_t max_msg_len = 512; + auto* message = static_cast(malloc(max_msg_len)); + if (vsnprintf(message, max_msg_len, fmt, args) >= 0) { + env->ThrowNew(env->FindClass(clazz), message); + } else { + env->ThrowNew(env->FindClass(clazz), ""); + } + free(message); + va_end(args); +} + +BufferErrorReporter::BufferErrorReporter(JNIEnv* env, int limit) { + buffer_ = new char[limit]; + if (!buffer_) { + throwException(env, kNullPointerException, + "Malloc of BufferErrorReporter to hold %d char failed.", + limit); + return; + } + start_idx_ = 0; + end_idx_ = limit - 1; +} + +BufferErrorReporter::~BufferErrorReporter() { delete[] buffer_; } + +int BufferErrorReporter::Report(const char* format, va_list args) { + int size = 0; + if (start_idx_ < end_idx_) { + size = vsnprintf(buffer_ + start_idx_, end_idx_ - start_idx_, format, args); + } + start_idx_ += size; + return size; +} + +const char* BufferErrorReporter::CachedErrorMessage() { return buffer_; } diff --git a/tensorflow/contrib/lite/java/src/main/native/exception_jni.h b/tensorflow/contrib/lite/java/src/main/native/exception_jni.h new file mode 100644 index 0000000000..3ffff052df --- /dev/null +++ b/tensorflow/contrib/lite/java/src/main/native/exception_jni.h @@ -0,0 +1,50 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_LITE_JAVA_EXCEPTION_JNI_H_ +#define TENSORFLOW_CONTRIB_LITE_JAVA_EXCEPTION_JNI_H_ + +#include +#include "tensorflow/contrib/lite/error_reporter.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern const char kIllegalArgumentException[]; +extern const char kIllegalStateException[]; +extern const char kNullPointerException[]; +extern const char kIndexOutOfBoundsException[]; +extern const char kUnsupportedOperationException[]; + +void throwException(JNIEnv* env, const char* clazz, const char* fmt, ...); + +class BufferErrorReporter : public tflite::ErrorReporter { + public: + BufferErrorReporter(JNIEnv* env, int limit); + virtual ~BufferErrorReporter(); + int Report(const char* format, va_list args) override; + const char* CachedErrorMessage(); + + private: + char* buffer_; + int start_idx_ = 0; + int end_idx_ = 0; +}; + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus +#endif // TENSORFLOW_CONTRIB_LITE_JAVA_EXCEPTION_JNI_H_ diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc new file mode 100644 index 0000000000..bc6462eb54 --- /dev/null +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc @@ -0,0 +1,446 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h" + +namespace { + +const int kByteBufferValue = 999; +const int kBufferSize = 256; + +tflite::Interpreter* convertLongToInterpreter(JNIEnv* env, jlong handle) { + if (handle == 0) { + throwException(env, kIllegalArgumentException, + "Invalid handle to Interpreter."); + return nullptr; + } + return reinterpret_cast(handle); +} + +tflite::FlatBufferModel* convertLongToModel(JNIEnv* env, jlong handle) { + if (handle == 0) { + throwException(env, kIllegalArgumentException, "Invalid handle to model."); + return nullptr; + } + return reinterpret_cast(handle); +} + +BufferErrorReporter* convertLongToErrorReporter(JNIEnv* env, jlong handle) { + if (handle == 0) { + throwException(env, kIllegalArgumentException, + "Invalid handle to ErrorReporter."); + return nullptr; + } + return reinterpret_cast(handle); +} + +std::vector convertJIntArrayToVector(JNIEnv* env, jintArray inputs) { + int size = static_cast(env->GetArrayLength(inputs)); + std::vector outputs(size, 0); + jint* ptr = env->GetIntArrayElements(inputs, nullptr); + if (ptr == nullptr) { + throwException(env, kIllegalArgumentException, + "Empty dimensions of input array."); + return {}; + } + for (int i = 0; i < size; ++i) { + outputs[i] = ptr[i]; + } + env->ReleaseIntArrayElements(inputs, ptr, JNI_ABORT); + return outputs; +} + +bool isByteBuffer(jint data_type) { return data_type == kByteBufferValue; } + +TfLiteType resolveDataType(jint data_type) { + switch (data_type) { + case 1: + return kTfLiteFloat32; + case 2: + return kTfLiteInt32; + case 3: + return kTfLiteUInt8; + case 4: + return kTfLiteInt64; + default: + return kTfLiteNoType; + } +} + +void printDims(char* buffer, int max_size, int* dims, int num_dims) { + if (max_size <= 0) return; + buffer[0] = '?'; + int size = 1; + for (int i = 1; i < num_dims; ++i) { + if (max_size > size) { + int written_size = + snprintf(buffer + size, max_size - size, ",%d", dims[i]); + if (written_size < 0) return; + size += written_size; + } + } +} + +TfLiteStatus checkInputs(JNIEnv* env, tflite::Interpreter* interpreter, + const int input_size, jintArray data_types, + jintArray nums_of_bytes, jobjectArray values, + jobjectArray sizes) { + if (input_size != interpreter->inputs().size()) { + throwException(env, kIllegalArgumentException, + "Expected num of inputs is %d but got %d", + interpreter->inputs().size(), input_size); + return kTfLiteError; + } + if (input_size != env->GetArrayLength(data_types) || + input_size != env->GetArrayLength(nums_of_bytes) || + input_size != env->GetArrayLength(values)) { + throwException(env, kIllegalArgumentException, + "Arrays in arguments should be of the same length, but got " + "%d sizes, %d data_types, %d nums_of_bytes, and %d values", + input_size, env->GetArrayLength(data_types), + env->GetArrayLength(nums_of_bytes), + env->GetArrayLength(values)); + return kTfLiteError; + } + for (int i = 0; i < input_size; ++i) { + int input_idx = interpreter->inputs()[i]; + TfLiteTensor* target = interpreter->tensor(input_idx); + jintArray dims = + static_cast(env->GetObjectArrayElement(sizes, i)); + int num_dims = static_cast(env->GetArrayLength(dims)); + if (target->dims->size != num_dims) { + throwException(env, kIllegalArgumentException, + "%d-th input should have %d dimensions, but found %d " + "dimensions", + i, target->dims->size, num_dims); + return kTfLiteError; + } + jint* ptr = env->GetIntArrayElements(dims, nullptr); + for (int j = 1; j < num_dims; ++j) { + if (target->dims->data[j] != ptr[j]) { + std::unique_ptr expected_dims(new char[kBufferSize]); + std::unique_ptr obtained_dims(new char[kBufferSize]); + printDims(expected_dims.get(), kBufferSize, target->dims->data, + num_dims); + printDims(obtained_dims.get(), kBufferSize, ptr, num_dims); + throwException(env, kIllegalArgumentException, + "%d-th input dimension should be [%s], but found [%s]", + i, expected_dims.get(), obtained_dims.get()); + env->ReleaseIntArrayElements(dims, ptr, JNI_ABORT); + return kTfLiteError; + } + } + env->ReleaseIntArrayElements(dims, ptr, JNI_ABORT); + env->DeleteLocalRef(dims); + if (env->ExceptionCheck()) return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus resizeInputs(JNIEnv* env, tflite::Interpreter* interpreter, + int input_size, jobjectArray sizes) { + for (int i = 0; i < input_size; ++i) { + int input_idx = interpreter->inputs()[i]; + jintArray dims = + static_cast(env->GetObjectArrayElement(sizes, i)); + TfLiteStatus status = interpreter->ResizeInputTensor( + input_idx, convertJIntArrayToVector(env, dims)); + if (status != kTfLiteOk) { + return status; + } + env->DeleteLocalRef(dims); + if (env->ExceptionCheck()) return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus setInputs(JNIEnv* env, tflite::Interpreter* interpreter, + int input_size, jintArray data_types, + jintArray nums_of_bytes, jobjectArray values) { + jint* data_type = env->GetIntArrayElements(data_types, nullptr); + jint* num_bytes = env->GetIntArrayElements(nums_of_bytes, nullptr); + for (int i = 0; i < input_size; ++i) { + int input_idx = interpreter->inputs()[i]; + TfLiteTensor* target = interpreter->tensor(input_idx); + jobject value = env->GetObjectArrayElement(values, i); + bool is_byte_buffer = isByteBuffer(data_type[i]); + if (is_byte_buffer) { + writeByteBuffer(env, value, &(target->data.raw), + static_cast(num_bytes[i])); + } else { + TfLiteType type = resolveDataType(data_type[i]); + if (type != target->type) { + throwException(env, kIllegalArgumentException, + "DataType (%d) of input data does not match with the " + "DataType (%d) of model inputs.", + type, target->type); + return kTfLiteError; + } + writeMultiDimensionalArray(env, value, target->type, target->dims->size, + &(target->data.raw), + static_cast(num_bytes[i])); + } + env->DeleteLocalRef(value); + if (env->ExceptionCheck()) return kTfLiteError; + } + env->ReleaseIntArrayElements(data_types, data_type, JNI_ABORT); + env->ReleaseIntArrayElements(nums_of_bytes, num_bytes, JNI_ABORT); + return kTfLiteOk; +} + +} // namespace + +JNIEXPORT jobjectArray JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputNames(JNIEnv* env, + jclass clazz, + jlong handle) { + tflite::Interpreter* interpreter = convertLongToInterpreter(env, handle); + if (interpreter == nullptr) return nullptr; + jclass string_class = env->FindClass("java/lang/String"); + if (string_class == nullptr) { + throwException(env, kUnsupportedOperationException, + "Can not find java/lang/String class to get input names."); + return nullptr; + } + size_t size = interpreter->inputs().size(); + jobjectArray names = static_cast( + env->NewObjectArray(size, string_class, env->NewStringUTF(""))); + for (int i = 0; i < size; ++i) { + env->SetObjectArrayElement(names, i, + env->NewStringUTF(interpreter->GetInputName(i))); + } + return names; +} + +JNIEXPORT jobjectArray JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_getOutputNames(JNIEnv* env, + jclass clazz, + jlong handle) { + tflite::Interpreter* interpreter = convertLongToInterpreter(env, handle); + if (interpreter == nullptr) return nullptr; + jclass string_class = env->FindClass("java/lang/String"); + if (string_class == nullptr) { + throwException(env, kUnsupportedOperationException, + "Can not find java/lang/String class to get output names."); + return nullptr; + } + size_t size = interpreter->outputs().size(); + jobjectArray names = static_cast( + env->NewObjectArray(size, string_class, env->NewStringUTF(""))); + for (int i = 0; i < size; ++i) { + env->SetObjectArrayElement( + names, i, env->NewStringUTF(interpreter->GetOutputName(i))); + } + return names; +} + +JNIEXPORT void JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_useNNAPI(JNIEnv* env, + jclass clazz, + jlong handle, + jboolean state) { + tflite::Interpreter* interpreter = convertLongToInterpreter(env, handle); + if (interpreter == nullptr) return; + interpreter->UseNNAPI(static_cast(state)); +} + +JNIEXPORT jlong JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_createErrorReporter( + JNIEnv* env, jclass clazz, jint size) { + BufferErrorReporter* error_reporter = + new BufferErrorReporter(env, static_cast(size)); + return reinterpret_cast(error_reporter); +} + +JNIEXPORT jlong JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_createModel( + JNIEnv* env, jclass clazz, jstring model_file, jlong error_handle) { + BufferErrorReporter* error_reporter = + convertLongToErrorReporter(env, error_handle); + if (error_reporter == nullptr) return 0; + const char* path = env->GetStringUTFChars(model_file, nullptr); + auto model = tflite::FlatBufferModel::BuildFromFile(path, error_reporter); + if (!model) { + throwException(env, kIllegalArgumentException, + "Contents of %s does not encode a valid TensorFlowLite " + "model: %s", + path, error_reporter->CachedErrorMessage()); + env->ReleaseStringUTFChars(model_file, path); + return 0; + } + env->ReleaseStringUTFChars(model_file, path); + return reinterpret_cast(model.release()); +} + +JNIEXPORT jlong JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_createModelWithBuffer( + JNIEnv* env, jclass /*clazz*/, jobject model_buffer, jlong error_handle) { + BufferErrorReporter* error_reporter = + convertLongToErrorReporter(env, error_handle); + if (error_reporter == nullptr) return 0; + const char* buf = + static_cast(env->GetDirectBufferAddress(model_buffer)); + jlong capacity = env->GetDirectBufferCapacity(model_buffer); + auto model = tflite::FlatBufferModel::BuildFromBuffer( + buf, static_cast(capacity), error_reporter); + if (!model) { + throwException(env, kIllegalArgumentException, + "MappedByteBuffer does not encode a valid TensorFlowLite " + "model: %s", + error_reporter->CachedErrorMessage()); + return 0; + } + return reinterpret_cast(model.release()); +} + +JNIEXPORT jlong JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_createInterpreter( + JNIEnv* env, jclass clazz, jlong model_handle) { + tflite::FlatBufferModel* model = convertLongToModel(env, model_handle); + if (model == nullptr) return 0; + auto resolver = ::tflite::CreateOpResolver(); + std::unique_ptr interpreter; + tflite::InterpreterBuilder(*model, *(resolver.get()))(&interpreter); + return reinterpret_cast(interpreter.release()); +} + +// Sets inputs, runs inference, and returns outputs as long handles. +JNIEXPORT jlongArray JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_run( + JNIEnv* env, jclass clazz, jlong interpreter_handle, jlong error_handle, + jobjectArray sizes, jintArray data_types, jintArray nums_of_bytes, + jobjectArray values) { + tflite::Interpreter* interpreter = + convertLongToInterpreter(env, interpreter_handle); + if (interpreter == nullptr) return nullptr; + BufferErrorReporter* error_reporter = + convertLongToErrorReporter(env, error_handle); + if (error_reporter == nullptr) return nullptr; + const int input_size = env->GetArrayLength(sizes); + // validates inputs + TfLiteStatus status = checkInputs(env, interpreter, input_size, data_types, + nums_of_bytes, values, sizes); + if (status != kTfLiteOk) return nullptr; + // resizes inputs + status = resizeInputs(env, interpreter, input_size, sizes); + if (status != kTfLiteOk) { + throwException(env, kNullPointerException, "Can not resize the input: %s", + error_reporter->CachedErrorMessage()); + return nullptr; + } + // allocates memory + status = interpreter->AllocateTensors(); + if (status != kTfLiteOk) { + throwException(env, kNullPointerException, + "Can not allocate memory for the given inputs: %s", + error_reporter->CachedErrorMessage()); + return nullptr; + } + // sets inputs + status = setInputs(env, interpreter, input_size, data_types, nums_of_bytes, + values); + if (status != kTfLiteOk) return nullptr; + // runs inference + if (interpreter->Invoke() != kTfLiteOk) { + throwException(env, kIllegalArgumentException, + "Failed to run on the given Interpreter: %s", + error_reporter->CachedErrorMessage()); + return nullptr; + } + // returns outputs + const std::vector& results = interpreter->outputs(); + if (results.empty()) { + throwException(env, kIllegalArgumentException, + "The Interpreter does not have any outputs."); + return nullptr; + } + jlongArray outputs = env->NewLongArray(results.size()); + size_t size = results.size(); + for (int i = 0; i < size; ++i) { + TfLiteTensor* source = interpreter->tensor(results[i]); + jlong output = reinterpret_cast(source); + env->SetLongArrayRegion(outputs, i, 1, &output); + } + return outputs; +} + +JNIEXPORT jintArray JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputDims( + JNIEnv* env, jclass clazz, jlong handle, jint input_idx, jint num_bytes) { + tflite::Interpreter* interpreter = convertLongToInterpreter(env, handle); + if (interpreter == nullptr) return nullptr; + const int idx = static_cast(input_idx); + if (input_idx >= interpreter->inputs().size()) { + throwException(env, kIllegalArgumentException, + "Out of range: Failed to get %d-th input out of %d inputs", + input_idx, interpreter->inputs().size()); + return nullptr; + } + TfLiteTensor* target = interpreter->tensor(interpreter->inputs()[idx]); + int size = target->dims->size; + int expected_num_bytes = elementByteSize(target->type); + for (int i = 0; i < size; ++i) { + expected_num_bytes *= target->dims->data[i]; + } + if (num_bytes != expected_num_bytes) { + throwException(env, kIllegalArgumentException, + "Failed to get input dimensions. %d-th input should have" + " %d bytes, but found %d bytes.", + idx, expected_num_bytes, num_bytes); + return nullptr; + } + jintArray outputs = env->NewIntArray(size); + env->SetIntArrayRegion(outputs, 0, size, &(target->dims->data[0])); + return outputs; +} + +JNIEXPORT void JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_resizeInput( + JNIEnv* env, jclass clazz, jlong interpreter_handle, jlong error_handle, + jint input_idx, jintArray dims) { + BufferErrorReporter* error_reporter = + convertLongToErrorReporter(env, error_handle); + if (error_reporter == nullptr) return; + tflite::Interpreter* interpreter = + convertLongToInterpreter(env, interpreter_handle); + if (interpreter == nullptr) return; + const int idx = static_cast(input_idx); + if (idx < 0 || idx >= interpreter->inputs().size()) { + throwException(env, kIllegalArgumentException, + "Can not resize %d-th input for a model having %d inputs.", + idx, interpreter->inputs().size()); + } + TfLiteStatus status = interpreter->ResizeInputTensor( + interpreter->inputs()[idx], convertJIntArrayToVector(env, dims)); + if (status != kTfLiteOk) { + throwException(env, kIllegalArgumentException, + "Failed to resize %d-th input: %s", idx, + error_reporter->CachedErrorMessage()); + } +} + +JNIEXPORT void JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_delete( + JNIEnv* env, jclass clazz, jlong error_handle, jlong model_handle, + jlong interpreter_handle) { + if (interpreter_handle != 0) { + delete convertLongToInterpreter(env, interpreter_handle); + } + if (model_handle != 0) { + delete convertLongToModel(env, model_handle); + } + if (error_handle != 0) { + delete convertLongToErrorReporter(env, error_handle); + } +} diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h new file mode 100644 index 0000000000..430886b7cc --- /dev/null +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h @@ -0,0 +1,151 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_LITE_JAVA_NATIVEINTERPRETERWRAPPER_JNI_H_ +#define TENSORFLOW_CONTRIB_LITE_JAVA_NATIVEINTERPRETERWRAPPER_JNI_H_ + +#include +#include +#include +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/java/src/main/native/exception_jni.h" +#include "tensorflow/contrib/lite/java/src/main/native/tensor_jni.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +// This is to be provided at link-time by a library. +extern std::unique_ptr CreateOpResolver(); +} // namespace tflite + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/* + * Class: org_tensorflow_lite_NativeInterpreterWrapper + * Method: + * Signature: (J)[Ljava/lang/Object; + */ +JNIEXPORT jobjectArray JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputNames(JNIEnv* env, + jclass clazz, + jlong handle); + +/* + * Class: org_tensorflow_lite_NativeInterpreterWrapper + * Method: + * Signature: (J)[Ljava/lang/Object; + */ +JNIEXPORT jobjectArray JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_getOutputNames(JNIEnv* env, + jclass clazz, + jlong handle); + +/* + * Class: org_tensorflow_lite_NativeInterpreterWrapper + * Method: + * Signature: (JZ) + */ +JNIEXPORT void JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_useNNAPI(JNIEnv* env, + jclass clazz, + jlong handle, + jboolean state); + +/* + * Class: org_tensorflow_lite_NativeInterpreterWrapper + * Method: + * Signature: (I)J + */ +JNIEXPORT jlong JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_createErrorReporter( + JNIEnv* env, jclass clazz, jint size); + +/* + * Class: org_tensorflow_lite_NativeInterpreterWrapper + * Method: + * Signature: (Ljava/lang/String;J)J + */ +JNIEXPORT jlong JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_createModel( + JNIEnv* env, jclass clazz, jstring model_file, jlong error_handle); + +/* + * Class: org_tensorflow_lite_NativeInterpreterWrapper + * Method: + * Signature: (Ljava/lang/Object;J)J + */ +JNIEXPORT jlong JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_createModelWithBuffer( + JNIEnv* env, jclass clazz, jobject model_buffer, jlong error_handle); + +/* + * Class: org_tensorflow_lite_NativeInterpreterWrapper + * Method: + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_createInterpreter( + JNIEnv* env, jclass clazz, jlong model_handle); + +/* + * Class: org_tensorflow_lite_NativeInterpreterWrapper + * Method: + * Signature: (JJ[Ljava/lang/Object;[I[I[Ljava/lang/Object;)[J + */ +JNIEXPORT jlongArray JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_run( + JNIEnv* env, jclass clazz, jlong interpreter_handle, jlong error_handle, + jobjectArray sizes, jintArray data_types, jintArray nums_of_bytes, + jobjectArray values); + +/* + * Class: org_tensorflow_lite_NativeInterpreterWrapper + * Method: + * Signature: (JII)[I + * + * It gets input dimensions if num_bytes matches number of bytes required by + * the input, else returns null and throws IllegalArgumentException. + */ +JNIEXPORT jintArray JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputDims( + JNIEnv* env, jclass clazz, jlong handle, jint input_idx, jint num_bytes); + +/* + * Class: org_tensorflow_lite_NativeInterpreterWrapper + * Method: + * Signature: (JJI[I) + * + * It resizes dimensions of a input. + */ +JNIEXPORT void JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_resizeInput( + JNIEnv* env, jclass clazz, jlong interpreter_handle, jlong error_handle, + jint input_idx, jintArray dims); + +/* + * Class: org_tensorflow_lite_NativeInterpreterWrapper + * Method: + * Signature: (JJJ) + */ +JNIEXPORT void JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_delete( + JNIEnv* env, jclass clazz, jlong error_handle, jlong model_handle, + jlong interpreter_handle); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus +#endif // TENSORFLOW_CONTRIB_LITE_JAVA_NATIVEINTERPRETERWRAPPER_JNI_H_ diff --git a/tensorflow/contrib/lite/java/src/main/native/tensor_jni.cc b/tensorflow/contrib/lite/java/src/main/native/tensor_jni.cc new file mode 100644 index 0000000000..65126e78a3 --- /dev/null +++ b/tensorflow/contrib/lite/java/src/main/native/tensor_jni.cc @@ -0,0 +1,242 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/java/src/main/native/tensor_jni.h" +#include +#include +#include "tensorflow/contrib/lite/java/src/main/native/exception_jni.h" + +namespace { + +TfLiteTensor* convertLongToTensor(JNIEnv* env, jlong handle) { + if (handle == 0) { + throwException(env, kIllegalArgumentException, + "Invalid handle to TfLiteTensor."); + return nullptr; + } + return reinterpret_cast(handle); +} + +size_t writeOneDimensionalArray(JNIEnv* env, jobject object, TfLiteType type, + void* dst, size_t dst_size) { + jarray array = static_cast(object); + const int num_elements = env->GetArrayLength(array); + size_t to_copy = num_elements * elementByteSize(type); + if (to_copy > dst_size) { + throwException(env, kIllegalStateException, + "cannot write Java array of %d bytes to Tensor of %d bytes", + to_copy, dst_size); + return 0; + } + switch (type) { + case kTfLiteFloat32: { + jfloatArray a = static_cast(array); + jfloat* values = env->GetFloatArrayElements(a, nullptr); + memcpy(dst, values, to_copy); + env->ReleaseFloatArrayElements(a, values, JNI_ABORT); + return to_copy; + } + case kTfLiteInt32: { + jintArray a = static_cast(array); + jint* values = env->GetIntArrayElements(a, nullptr); + memcpy(dst, values, to_copy); + env->ReleaseIntArrayElements(a, values, JNI_ABORT); + return to_copy; + } + case kTfLiteInt64: { + jlongArray a = static_cast(array); + jlong* values = env->GetLongArrayElements(a, nullptr); + memcpy(dst, values, to_copy); + env->ReleaseLongArrayElements(a, values, JNI_ABORT); + return to_copy; + } + case kTfLiteUInt8: { + jbyteArray a = static_cast(array); + jbyte* values = env->GetByteArrayElements(a, nullptr); + memcpy(dst, values, to_copy); + env->ReleaseByteArrayElements(a, values, JNI_ABORT); + return to_copy; + } + default: { + throwException(env, kUnsupportedOperationException, + "TensorFlowLite currently supports float (32 bits), " + "int (32 bits), byte (8 bits), and long (64 bits), " + "support for other types (DataType %d in this case) will " + "be added in the future", + kTfLiteFloat32, type); + return 0; + } + } +} + +size_t readOneDimensionalArray(JNIEnv* env, TfLiteType data_type, + const void* src, size_t src_size, jarray dst) { + const int len = env->GetArrayLength(dst); + const size_t size = len * elementByteSize(data_type); + if (size > src_size) { + throwException( + env, kIllegalStateException, + "cannot fill a Java array of %d bytes with a Tensor of %d bytes", size, + src_size); + return 0; + } + switch (data_type) { + case kTfLiteFloat32: { + jfloatArray float_array = static_cast(dst); + env->SetFloatArrayRegion(float_array, 0, len, + static_cast(src)); + return size; + } + case kTfLiteInt32: { + jintArray int_array = static_cast(dst); + env->SetIntArrayRegion(int_array, 0, len, static_cast(src)); + return size; + } + case kTfLiteInt64: { + jlongArray long_array = static_cast(dst); + env->SetLongArrayRegion(long_array, 0, len, + static_cast(src)); + return size; + } + case kTfLiteUInt8: { + jbyteArray byte_array = static_cast(dst); + env->SetByteArrayRegion(byte_array, 0, len, + static_cast(src)); + return size; + } + default: { + throwException(env, kIllegalStateException, "invalid DataType(%d)", + data_type); + } + } + return 0; +} + +size_t readMultiDimensionalArray(JNIEnv* env, TfLiteType data_type, char* src, + size_t src_size, int dims_left, jarray dst) { + if (dims_left == 1) { + return readOneDimensionalArray(env, data_type, src, src_size, dst); + } else { + jobjectArray ndarray = static_cast(dst); + int len = env->GetArrayLength(ndarray); + size_t size = 0; + for (int i = 0; i < len; ++i) { + jarray row = static_cast(env->GetObjectArrayElement(ndarray, i)); + size += readMultiDimensionalArray(env, data_type, src + size, + src_size - size, dims_left - 1, row); + env->DeleteLocalRef(row); + if (env->ExceptionCheck()) return size; + } + return size; + } +} + +} // namespace + +size_t elementByteSize(TfLiteType data_type) { + // The code in this file makes the assumption that the + // TensorFlow TF_DataTypes and the Java primitive types + // have the same byte sizes. Validate that: + switch (data_type) { + case kTfLiteFloat32: + static_assert(sizeof(jfloat) == 4, + "Java float not compatible with kTfLiteFloat"); + return 4; + case kTfLiteInt32: + static_assert(sizeof(jint) == 4, + "Java int not compatible with kTfLiteInt"); + return 4; + case kTfLiteUInt8: + static_assert(sizeof(jbyte) == 1, + "Java byte not compatible with kTfLiteUInt8"); + return 1; + case kTfLiteInt64: + static_assert(sizeof(jlong) == 8, + "Java long not compatible with kTfLiteInt64"); + return 8; + default: + return 0; + } +} + +size_t writeByteBuffer(JNIEnv* env, jobject object, char** dst, int dst_size) { + char* buf = static_cast(env->GetDirectBufferAddress(object)); + if (!buf) { + throwException(env, kIllegalArgumentException, + "Input ByteBuffer is not a direct buffer"); + return 0; + } + *dst = buf; + return dst_size; +} + +size_t writeMultiDimensionalArray(JNIEnv* env, jobject src, TfLiteType type, + int dims_left, char** dst, int dst_size) { + if (dims_left <= 1) { + return writeOneDimensionalArray(env, src, type, *dst, dst_size); + } else { + jobjectArray ndarray = static_cast(src); + int len = env->GetArrayLength(ndarray); + size_t sz = 0; + for (int i = 0; i < len; ++i) { + jobject row = env->GetObjectArrayElement(ndarray, i); + char* next_dst = *dst + sz; + sz += writeMultiDimensionalArray(env, row, type, dims_left - 1, &next_dst, + dst_size - sz); + env->DeleteLocalRef(row); + if (env->ExceptionCheck()) return sz; + } + return sz; + } +} + +JNIEXPORT void JNICALL +Java_org_tensorflow_lite_Tensor_readMultiDimensionalArray(JNIEnv* env, + jclass clazz, + jlong handle, + jobject value) { + TfLiteTensor* tensor = convertLongToTensor(env, handle); + if (tensor == nullptr) return; + int num_dims = tensor->dims->size; + if (num_dims == 0) { + throwException(env, kIllegalArgumentException, + "copyTo() is not meant for scalar Tensors."); + return; + } + readMultiDimensionalArray(env, tensor->type, tensor->data.raw, tensor->bytes, + num_dims, static_cast(value)); +} + +JNIEXPORT jint JNICALL Java_org_tensorflow_lite_Tensor_dtype(JNIEnv* env, + jclass clazz, + jlong handle) { + TfLiteTensor* tensor = convertLongToTensor(env, handle); + if (tensor == nullptr) return 0; + return static_cast(tensor->type); +} + +JNIEXPORT jintArray JNICALL +Java_org_tensorflow_lite_Tensor_shape(JNIEnv* env, jclass clazz, jlong handle) { + TfLiteTensor* tensor = convertLongToTensor(env, handle); + if (tensor == nullptr) return nullptr; + int num_dims = tensor->dims->size; + jintArray result = env->NewIntArray(num_dims); + jint* dims = env->GetIntArrayElements(result, nullptr); + for (int i = 0; i < num_dims; ++i) { + dims[i] = static_cast(tensor->dims->data[i]); + } + env->ReleaseIntArrayElements(result, dims, 0); + return result; +} diff --git a/tensorflow/contrib/lite/java/src/main/native/tensor_jni.h b/tensorflow/contrib/lite/java/src/main/native/tensor_jni.h new file mode 100644 index 0000000000..3a4910dcc3 --- /dev/null +++ b/tensorflow/contrib/lite/java/src/main/native/tensor_jni.h @@ -0,0 +1,74 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_LITE_JAVA_TENSOR_JNI_H_ +#define TENSORFLOW_CONTRIB_LITE_JAVA_TENSOR_JNI_H_ + +#include +#include "tensorflow/contrib/lite/context.h" + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/* + * Class: org_tensorflow_lite_TfLiteTensor + * Method: + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_tensorflow_lite_Tensor_dtype(JNIEnv* env, + jclass clazz, + jlong handle); + +/* + * Class: org_tensorflow_lite_TfLiteTensor + * Method: + * Signature: (J)[I + */ +JNIEXPORT jintArray JNICALL Java_org_tensorflow_lite_Tensor_shape(JNIEnv* env, + jclass clazz, + jlong handle); + +/* + * Class: org_tensorflow_lite_TfLiteTensor + * Method: + * Signature: (JLjava/lang/Object;) + */ +JNIEXPORT void JNICALL +Java_org_tensorflow_lite_Tensor_readMultiDimensionalArray(JNIEnv* env, + jclass clazz, + jlong handle, + jobject value); + +/* + * Finds the size of each data type. + */ +size_t elementByteSize(TfLiteType data_type); + +/* + * Writes data of a ByteBuffer into dest. + */ +size_t writeByteBuffer(JNIEnv* env, jobject object, char** dst, int dst_size); + +/* + * Writes a multi-dimensional array into dest. + */ +size_t writeMultiDimensionalArray(JNIEnv* env, jobject src, TfLiteType type, + int dims_left, char** dst, int dst_size); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus +#endif // TENSORFLOW_CONTRIB_LITE_JAVA_TENSOR_JNI_H_ diff --git a/tensorflow/contrib/lite/java/src/main/native/tensorflow_lite_jni.cc b/tensorflow/contrib/lite/java/src/main/native/tensorflow_lite_jni.cc new file mode 100644 index 0000000000..2e7f2f5692 --- /dev/null +++ b/tensorflow/contrib/lite/java/src/main/native/tensorflow_lite_jni.cc @@ -0,0 +1,26 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/contrib/lite/java/src/main/native/tensorflow_lite_jni.h" +#include "tensorflow/contrib/lite/version.h" + +JNIEXPORT jstring JNICALL +Java_org_tensorflow_lite_TensorFlowLite_version(JNIEnv* env, jclass /*clazz*/) { + char buf[64]; + snprintf(buf, sizeof(buf), "%d", TFLITE_SCHEMA_VERSION); + return env->NewStringUTF(buf); +} diff --git a/tensorflow/contrib/lite/java/src/main/native/tensorflow_lite_jni.h b/tensorflow/contrib/lite/java/src/main/native/tensorflow_lite_jni.h new file mode 100644 index 0000000000..65f8341149 --- /dev/null +++ b/tensorflow/contrib/lite/java/src/main/native/tensorflow_lite_jni.h @@ -0,0 +1,36 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_LITE_JAVA_TENSORFLOW_LITE_JNI_H_ +#define TENSORFLOW_CONTRIB_LITE_JAVA_TENSORFLOW_LITE_JNI_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/* + * Class: org_tensorflow_lite_TensorFlowLite + * Method: version + * Signature: ()Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL +Java_org_tensorflow_lite_TensorFlowLite_version(JNIEnv*, jclass); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus +#endif // TENSORFLOW_CONTRIB_LITE_JAVA_TENSORFLOW_LITE_JNI_H_ diff --git a/tensorflow/contrib/lite/java/src/main/native/version_script.lds b/tensorflow/contrib/lite/java/src/main/native/version_script.lds new file mode 100644 index 0000000000..38c93dda73 --- /dev/null +++ b/tensorflow/contrib/lite/java/src/main/native/version_script.lds @@ -0,0 +1,11 @@ +VERS_1.0 { + # Export JNI symbols. + global: + Java_*; + JNI_OnLoad; + JNI_OnUnload; + + # Hide everything else. + local: + *; +}; diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/DataTypeTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/DataTypeTest.java new file mode 100644 index 0000000000..cebc944200 --- /dev/null +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/DataTypeTest.java @@ -0,0 +1,34 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +package org.tensorflow.lite; + +import static com.google.common.truth.Truth.assertThat; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Unit tests for {@link org.tensorflow.lite.DataType}. */ +@RunWith(JUnit4.class) +public final class DataTypeTest { + + @Test + public void testElemByteSize() { + assertThat(DataType.FLOAT32.elemByteSize()).isEqualTo(4); + assertThat(DataType.INT32.elemByteSize()).isEqualTo(4); + assertThat(DataType.UINT8.elemByteSize()).isEqualTo(1); + assertThat(DataType.INT64.elemByteSize()).isEqualTo(8); + } +} diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java new file mode 100644 index 0000000000..a60c63d4b8 --- /dev/null +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java @@ -0,0 +1,221 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite; + +import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.fail; + +import java.io.File; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.Map; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Unit tests for {@link org.tensorflow.lite.Interpreter}. */ +@RunWith(JUnit4.class) +public final class InterpreterTest { + + private static final File MODEL_FILE = + new File("third_party/tensorflow/contrib/lite/java/src/testdata/add.bin"); + + private static final File MOBILENET_MODEL_FILE = + new File("third_party/tensorflow/contrib/lite/java/src/testdata/mobilenet.tflite.bin"); + + @Test + public void testInterpreter() throws Exception { + Interpreter interpreter = new Interpreter(MODEL_FILE); + assertThat(interpreter).isNotNull(); + interpreter.close(); + } + + @Test + public void testRunWithMappedByteBufferModel() throws Exception { + Path path = MODEL_FILE.toPath(); + FileChannel fileChannel = + (FileChannel) Files.newByteChannel(path, EnumSet.of(StandardOpenOption.READ)); + MappedByteBuffer mappedByteBuffer = + fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size()); + Interpreter interpreter = new Interpreter(mappedByteBuffer); + float[] oneD = {1.23f, 6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + float[][][][] fourD = {threeD, threeD}; + float[][][][] parsedOutputs = new float[2][8][8][3]; + interpreter.run(fourD, parsedOutputs); + float[] outputOneD = parsedOutputs[0][0][0]; + float[] expected = {3.69f, 19.62f, 23.43f}; + assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder(); + interpreter.close(); + fileChannel.close(); + } + + @Test + public void testRun() { + Interpreter interpreter = new Interpreter(MODEL_FILE); + Float[] oneD = {1.23f, 6.54f, 7.81f}; + Float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + Float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + Float[][][][] fourD = {threeD, threeD}; + Float[][][][] parsedOutputs = new Float[2][8][8][3]; + try { + interpreter.run(fourD, parsedOutputs); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageThat().contains("cannot resolve DataType of [[[[Ljava.lang.Float;"); + } + interpreter.close(); + } + + @Test + public void testRunWithBoxedInputs() { + Interpreter interpreter = new Interpreter(MODEL_FILE); + float[] oneD = {1.23f, 6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + float[][][][] fourD = {threeD, threeD}; + float[][][][] parsedOutputs = new float[2][8][8][3]; + interpreter.run(fourD, parsedOutputs); + float[] outputOneD = parsedOutputs[0][0][0]; + float[] expected = {3.69f, 19.62f, 23.43f}; + assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder(); + interpreter.close(); + } + + @Test + public void testRunForMultipleInputsOutputs() { + Interpreter interpreter = new Interpreter(MODEL_FILE); + float[] oneD = {1.23f, 6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + float[][][][] fourD = {threeD, threeD}; + Object[] inputs = {fourD}; + float[][][][] parsedOutputs = new float[2][8][8][3]; + Map outputs = new HashMap<>(); + outputs.put(0, parsedOutputs); + interpreter.runForMultipleInputsOutputs(inputs, outputs); + float[] outputOneD = parsedOutputs[0][0][0]; + float[] expected = {3.69f, 19.62f, 23.43f}; + assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder(); + interpreter.close(); + } + + @Test + public void testMobilenetRun() { + // Create a gray image. + float[][][][] img = new float[1][224][224][3]; + for (int i = 0; i < 224; ++i) { + for (int j = 0; j < 224; ++j) { + img[0][i][j][0] = 0.5f; + img[0][i][j][1] = 0.5f; + img[0][i][j][2] = 0.5f; + } + } + + // Allocate memory to receive the output values. + float[][] labels = new float[1][1001]; + + Interpreter interpreter = new Interpreter(MOBILENET_MODEL_FILE); + interpreter.run(img, labels); + interpreter.close(); + + assertThat(labels[0]) + .usingExactEquality() + .containsNoneOf(new float[] {Float.NaN, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY}); + } + + @Test + public void testRunWithWrongInputType() { + Interpreter interpreter = new Interpreter(MODEL_FILE); + int[] oneD = {4, 3, 9}; + int[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + int[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + int[][][][] fourD = {threeD, threeD}; + float[][][][] parsedOutputs = new float[2][8][8][3]; + try { + interpreter.run(fourD, parsedOutputs); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e) + .hasMessageThat() + .contains( + "DataType (2) of input data does not match with the DataType (1) of model inputs."); + } + interpreter.close(); + } + + @Test + public void testRunWithWrongOutputType() { + Interpreter interpreter = new Interpreter(MODEL_FILE); + float[] oneD = {1.23f, 6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + float[][][][] fourD = {threeD, threeD}; + int[][][][] parsedOutputs = new int[2][8][8][3]; + try { + interpreter.run(fourD, parsedOutputs); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e) + .hasMessageThat() + .contains( + "Cannot convert an TensorFlowLite tensor with type " + + "FLOAT32 to a Java object of type [[[[I (which is compatible with the" + + " TensorFlowLite type INT32)"); + } + interpreter.close(); + } + + @Test + public void testGetInputIndex() { + Interpreter interpreter = new Interpreter(MOBILENET_MODEL_FILE); + try { + interpreter.getInputIndex("WrongInputName"); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e) + .hasMessageThat() + .contains( + "WrongInputName is not a valid name for any input. The indexes of the inputs" + + " are {input=0}"); + } + int index = interpreter.getInputIndex("input"); + assertThat(index).isEqualTo(0); + } + + @Test + public void testGetOutputIndex() { + Interpreter interpreter = new Interpreter(MOBILENET_MODEL_FILE); + try { + interpreter.getOutputIndex("WrongOutputName"); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e) + .hasMessageThat() + .contains( + "WrongOutputName is not a valid name for any output. The indexes of the outputs" + + " are {MobilenetV1/Predictions/Softmax=0}"); + } + int index = interpreter.getOutputIndex("MobilenetV1/Predictions/Softmax"); + assertThat(index).isEqualTo(0); + } +} diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java new file mode 100644 index 0000000000..9e4724b8e9 --- /dev/null +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java @@ -0,0 +1,406 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite; + +import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.fail; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Unit tests for {@link org.tensorflow.lite.NativeInterpreterWrapper}. */ +@RunWith(JUnit4.class) +public final class NativeInterpreterWrapperTest { + + private static final String FLOAT_MODEL_PATH = + "third_party/tensorflow/contrib/lite/java/src/testdata/add.bin"; + + private static final String INT_MODEL_PATH = + "third_party/tensorflow/contrib/lite/java/src/testdata/int32.bin"; + + private static final String LONG_MODEL_PATH = + "third_party/tensorflow/contrib/lite/java/src/testdata/int64.bin"; + + private static final String BYTE_MODEL_PATH = + "third_party/tensorflow/contrib/lite/java/src/testdata/uint8.bin"; + + private static final String INVALID_MODEL_PATH = + "third_party/tensorflow/contrib/lite/java/src/testdata/invalid.model.tflite"; + + @Test + public void testConstructor() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(FLOAT_MODEL_PATH); + assertThat(wrapper).isNotNull(); + wrapper.close(); + } + + @Test + public void testConstructorWithInvalidModel() { + try { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(INVALID_MODEL_PATH); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e) + .hasMessageThat() + .contains("Model provided has model identifier ' is ', should be 'TFL3'"); + } + } + + @Test + public void testRunWithFloat() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(FLOAT_MODEL_PATH); + float[] oneD = {1.23f, -6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + float[][][][] fourD = {threeD, threeD}; + Object[] inputs = {fourD}; + Tensor[] outputs = wrapper.run(inputs); + assertThat(outputs.length).isEqualTo(1); + float[][][][] parsedOutputs = new float[2][8][8][3]; + outputs[0].copyTo(parsedOutputs); + float[] outputOneD = parsedOutputs[0][0][0]; + float[] expected = {3.69f, -19.62f, 23.43f}; + assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder(); + wrapper.close(); + } + + @Test + public void testRunWithInt() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(INT_MODEL_PATH); + int[] oneD = {3, 7, -4}; + int[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + int[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + int[][][][] fourD = {threeD, threeD}; + Object[] inputs = {fourD}; + Tensor[] outputs = wrapper.run(inputs); + assertThat(outputs.length).isEqualTo(1); + int[][][][] parsedOutputs = new int[2][4][4][12]; + outputs[0].copyTo(parsedOutputs); + int[] outputOneD = parsedOutputs[0][0][0]; + int[] expected = {3, 7, -4, 3, 7, -4, 3, 7, -4, 3, 7, -4}; + assertThat(outputOneD).isEqualTo(expected); + wrapper.close(); + } + + @Test + public void testRunWithLong() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(LONG_MODEL_PATH); + long[] oneD = {-892834092L, 923423L, 2123918239018L}; + long[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + long[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + long[][][][] fourD = {threeD, threeD}; + Object[] inputs = {fourD}; + Tensor[] outputs = wrapper.run(inputs); + assertThat(outputs.length).isEqualTo(1); + long[][][][] parsedOutputs = new long[2][4][4][12]; + outputs[0].copyTo(parsedOutputs); + long[] outputOneD = parsedOutputs[0][0][0]; + long[] expected = {-892834092L, 923423L, 2123918239018L, -892834092L, 923423L, 2123918239018L, + -892834092L, 923423L, 2123918239018L, -892834092L, 923423L, 2123918239018L}; + assertThat(outputOneD).isEqualTo(expected); + wrapper.close(); + } + + @Test + public void testRunWithByte() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(BYTE_MODEL_PATH); + byte[] oneD = {(byte) 0xe0, 0x4f, (byte) 0xd0}; + byte[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + byte[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + byte[][][][] fourD = {threeD, threeD}; + Object[] inputs = {fourD}; + int[] inputDims = {2, 8, 8, 3}; + wrapper.resizeInput(0, inputDims); + Tensor[] outputs = wrapper.run(inputs); + assertThat(outputs.length).isEqualTo(1); + byte[][][][] parsedOutputs = new byte[2][4][4][12]; + outputs[0].copyTo(parsedOutputs); + byte[] outputOneD = parsedOutputs[0][0][0]; + byte[] expected = {(byte) 0xe0, 0x4f, (byte) 0xd0, (byte) 0xe0, 0x4f, (byte) 0xd0, + (byte) 0xe0, 0x4f, (byte) 0xd0, (byte) 0xe0, 0x4f, (byte) 0xd0}; + assertThat(outputOneD).isEqualTo(expected); + wrapper.close(); + } + + @Test + public void testRunWithByteBufferHavingBytes() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(BYTE_MODEL_PATH); + ByteBuffer bbuf = ByteBuffer.allocateDirect(2 * 8 * 8 * 3); + bbuf.order(ByteOrder.nativeOrder()); + bbuf.rewind(); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 8; ++j) { + for (int k = 0; k < 8; ++k) { + bbuf.put((byte) 0xe0); + bbuf.put((byte) 0x4f); + bbuf.put((byte) 0xd0); + } + } + } + Object[] inputs = {bbuf}; + int[] inputDims = {2, 8, 8, 3}; + wrapper.resizeInput(0, inputDims); + Tensor[] outputs = wrapper.run(inputs); + assertThat(outputs.length).isEqualTo(1); + byte[][][][] parsedOutputs = new byte[2][4][4][12]; + outputs[0].copyTo(parsedOutputs); + byte[] outputOneD = parsedOutputs[0][0][0]; + byte[] expected = { + (byte) 0xe0, 0x4f, (byte) 0xd0, (byte) 0xe0, 0x4f, (byte) 0xd0, + (byte) 0xe0, 0x4f, (byte) 0xd0, (byte) 0xe0, 0x4f, (byte) 0xd0 + }; + assertThat(outputOneD).isEqualTo(expected); + wrapper.close(); + } + + @Test + public void testRunWithByteBufferHavingFloats() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(FLOAT_MODEL_PATH); + ByteBuffer bbuf = ByteBuffer.allocateDirect(4 * 8 * 8 * 3 * 4); + bbuf.order(ByteOrder.nativeOrder()); + bbuf.rewind(); + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 8; ++j) { + for (int k = 0; k < 8; ++k) { + bbuf.putFloat(1.23f); + bbuf.putFloat(-6.54f); + bbuf.putFloat(7.81f); + } + } + } + Object[] inputs = {bbuf}; + try { + wrapper.run(inputs); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e) + .hasMessageThat() + .contains( + "Failed to get input dimensions. 0-th input should have 768 bytes, but found 3072 bytes"); + } + int[] inputDims = {4, 8, 8, 3}; + wrapper.resizeInput(0, inputDims); + Tensor[] outputs = wrapper.run(inputs); + assertThat(outputs.length).isEqualTo(1); + float[][][][] parsedOutputs = new float[4][8][8][3]; + outputs[0].copyTo(parsedOutputs); + float[] outputOneD = parsedOutputs[0][0][0]; + float[] expected = {3.69f, -19.62f, 23.43f}; + assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder(); + wrapper.close(); + } + + @Test + public void testRunWithByteBufferHavingWrongSize() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(BYTE_MODEL_PATH); + ByteBuffer bbuf = ByteBuffer.allocateDirect(2 * 7 * 8 * 3); + bbuf.order(ByteOrder.nativeOrder()); + Object[] inputs = {bbuf}; + try { + wrapper.run(inputs); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e) + .hasMessageThat() + .contains( + "Failed to get input dimensions. 0-th input should have 192 bytes, but found 336 bytes."); + } + wrapper.close(); + } + + @Test + public void testRunWithWrongInputType() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(FLOAT_MODEL_PATH); + int[] oneD = {4, 3, 9}; + int[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + int[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + int[][][][] fourD = {threeD, threeD}; + Object[] inputs = {fourD}; + try { + wrapper.run(inputs); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e) + .hasMessageThat() + .contains( + "DataType (2) of input data does not match with the DataType (1) of model inputs."); + } + wrapper.close(); + } + + @Test + public void testRunAfterClose() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(FLOAT_MODEL_PATH); + wrapper.close(); + float[] oneD = {1.23f, 6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + float[][][][] fourD = {threeD, threeD}; + Object[] inputs = {fourD}; + try { + wrapper.run(inputs); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageThat().contains("Invalid handle to Interpreter."); + } + } + + @Test + public void testRunWithEmptyInputs() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(FLOAT_MODEL_PATH); + try { + Object[] inputs = {}; + wrapper.run(inputs); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e) + .hasMessageThat() + .contains("Invalid inputs. Inputs should not be null or empty."); + } + wrapper.close(); + } + + @Test + public void testRunWithWrongInputSize() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(FLOAT_MODEL_PATH); + float[] oneD = {1.23f, 6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + float[][][][] fourD = {threeD, threeD}; + Object[] inputs = {fourD, fourD}; + try { + wrapper.run(inputs); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageThat().contains("Expected num of inputs is 1 but got 2"); + } + wrapper.close(); + } + + @Test + public void testRunWithWrongInputNumOfDims() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(FLOAT_MODEL_PATH); + float[] oneD = {1.23f, 6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + Object[] inputs = {threeD}; + try { + wrapper.run(inputs); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e) + .hasMessageThat() + .contains("0-th input should have 4 dimensions, but found 3 dimensions"); + } + wrapper.close(); + } + + @Test + public void testRunWithWrongInputDims() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(FLOAT_MODEL_PATH); + float[] oneD = {1.23f, 6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + float[][][][] fourD = {threeD, threeD}; + Object[] inputs = {fourD}; + try { + wrapper.run(inputs); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e) + .hasMessageThat() + .contains("0-th input dimension should be [?,8,8,3], but found [?,8,7,3]"); + } + wrapper.close(); + } + + @Test + public void testNumElements() { + int[] shape = {2, 3, 4}; + int num = NativeInterpreterWrapper.numElements(shape); + assertThat(num).isEqualTo(24); + shape = null; + num = NativeInterpreterWrapper.numElements(shape); + assertThat(num).isEqualTo(0); + } + + @Test + public void testIsNonEmtpyArray() { + assertThat(NativeInterpreterWrapper.isNonEmptyArray(null)).isFalse(); + assertThat(NativeInterpreterWrapper.isNonEmptyArray(3.2)).isFalse(); + int[] emptyArray = {}; + assertThat(NativeInterpreterWrapper.isNonEmptyArray(emptyArray)).isFalse(); + int[] validArray = {9, 5, 2, 1}; + assertThat(NativeInterpreterWrapper.isNonEmptyArray(validArray)).isTrue(); + } + + @Test + public void testDataTypeOf() { + float[] testEmtpyArray = {}; + DataType dataType = NativeInterpreterWrapper.dataTypeOf(testEmtpyArray); + assertThat(dataType).isEqualTo(DataType.FLOAT32); + float[] testFloatArray = {0.783f, 0.251f}; + dataType = NativeInterpreterWrapper.dataTypeOf(testFloatArray); + assertThat(dataType).isEqualTo(DataType.FLOAT32); + float[][] testMultiDimArray = {testFloatArray, testFloatArray, testFloatArray}; + dataType = NativeInterpreterWrapper.dataTypeOf(testFloatArray); + assertThat(dataType).isEqualTo(DataType.FLOAT32); + try { + double[] testDoubleArray = {0.783, 0.251}; + NativeInterpreterWrapper.dataTypeOf(testDoubleArray); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageThat().contains("cannot resolve DataType of"); + } + try { + Float[] testBoxedArray = {0.783f, 0.251f}; + NativeInterpreterWrapper.dataTypeOf(testBoxedArray); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageThat().contains("cannot resolve DataType of [Ljava.lang.Float;"); + } + } + + @Test + public void testNumDimensions() { + int scalar = 1; + assertThat(NativeInterpreterWrapper.numDimensions(scalar)).isEqualTo(0); + int[][] array = {{2, 4}, {1, 9}}; + assertThat(NativeInterpreterWrapper.numDimensions(array)).isEqualTo(2); + try { + int[] emptyArray = {}; + NativeInterpreterWrapper.numDimensions(emptyArray); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageThat().contains("array lengths cannot be 0."); + } + } + + @Test + public void testFillShape() { + int[][][] array = {{{23}, {14}, {87}}, {{12}, {42}, {31}}}; + int num = NativeInterpreterWrapper.numDimensions(array); + int[] shape = new int[num]; + NativeInterpreterWrapper.fillShape(array, 0, shape); + assertThat(num).isEqualTo(3); + assertThat(shape[0]).isEqualTo(2); + assertThat(shape[1]).isEqualTo(3); + assertThat(shape[2]).isEqualTo(1); + } +} diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorFlowLiteTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorFlowLiteTest.java new file mode 100644 index 0000000000..665c937cb6 --- /dev/null +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorFlowLiteTest.java @@ -0,0 +1,32 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite; + +import static com.google.common.truth.Truth.assertThat; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Unit tests for {@link org.tensorflow.lite.TensorFlowLite}. */ +@RunWith(JUnit4.class) +public final class TensorFlowLiteTest { + + @Test + public void testVersion() { + assertThat(TensorFlowLite.version()).isEqualTo("3"); + } +} diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java new file mode 100644 index 0000000000..e41e971159 --- /dev/null +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java @@ -0,0 +1,105 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite; + +import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.fail; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Unit tests for {@link org.tensorflow.lite.Tensor}. */ +@RunWith(JUnit4.class) +public final class TensorTest { + + private static final String MODEL_PATH = + "third_party/tensorflow/contrib/lite/java/src/testdata/add.bin"; + + private NativeInterpreterWrapper wrapper; + private long nativeHandle; + + @Before + public void setUp() { + wrapper = new NativeInterpreterWrapper(MODEL_PATH); + float[] oneD = {1.23f, 6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + float[][][][] fourD = {threeD, threeD}; + Object[] inputs = {fourD}; + Tensor[] outputs = wrapper.run(inputs); + nativeHandle = outputs[0].nativeHandle; + } + + @After + public void tearDown() { + wrapper.close(); + } + + @Test + public void testFromHandle() throws Exception { + Tensor tensor = Tensor.fromHandle(nativeHandle); + assertThat(tensor).isNotNull(); + int[] expectedShape = {2, 8, 8, 3}; + assertThat(tensor.shapeCopy).isEqualTo(expectedShape); + assertThat(tensor.dtype).isEqualTo(DataType.FLOAT32); + } + + @Test + public void testCopyTo() { + Tensor tensor = Tensor.fromHandle(nativeHandle); + float[][][][] parsedOutputs = new float[2][8][8][3]; + tensor.copyTo(parsedOutputs); + float[] outputOneD = parsedOutputs[0][0][0]; + float[] expected = {3.69f, 19.62f, 23.43f}; + assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder(); + } + + @Test + public void testCopyToWrongType() { + Tensor tensor = Tensor.fromHandle(nativeHandle); + int[][][][] parsedOutputs = new int[2][8][8][3]; + try { + tensor.copyTo(parsedOutputs); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e) + .hasMessageThat() + .contains( + "Cannot convert an TensorFlowLite tensor with type " + + "FLOAT32 to a Java object of type [[[[I (which is compatible with the TensorFlowLite " + + "type INT32)"); + } + } + + @Test + public void testCopyToWrongShape() { + Tensor tensor = Tensor.fromHandle(nativeHandle); + float[][][][] parsedOutputs = new float[1][8][8][3]; + try { + tensor.copyTo(parsedOutputs); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e) + .hasMessageThat() + .contains( + "Shape of output target [1, 8, 8, 3] does not match " + + "with the shape of the Tensor [2, 8, 8, 3]."); + } + } +} diff --git a/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/BUILD b/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/BUILD new file mode 100644 index 0000000000..2b4f37bc6c --- /dev/null +++ b/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/BUILD @@ -0,0 +1,30 @@ +# Description: +# Internal helper function to test TF Lite API. + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +android_library( + name = "testhelper", + srcs = glob( + [ + "*.java", + ], + ), + deps = [ + "//tensorflow/contrib/lite/java:tensorflowlite_java", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java b/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java new file mode 100644 index 0000000000..8660cabf70 --- /dev/null +++ b/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java @@ -0,0 +1,35 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite; + +/** A helper class for internal tests. */ +public class TestHelper { + + /** + * Turns on/off NNAPI of an {@code Interpreter}. + * + * @param interpreter an instance of {@code Interpreter}. If it is not initialized, an {@code + * IllegalArgumentException} will be thrown. + * @param useNNAPI a boolean value indicating to turn on or off NNAPI. + */ + public static void setUseNNAPI(Interpreter interpreter, boolean useNNAPI) { + if (interpreter != null && interpreter.wrapper != null) { + interpreter.wrapper.setUseNNAPI(useNNAPI); + } else { + throw new IllegalArgumentException("Interpreter has not initialized; Failed to setUseNNAPI."); + } + } +} diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD new file mode 100644 index 0000000000..bbbfa3e741 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -0,0 +1,408 @@ +package(default_visibility = [ + "//visibility:public", +]) + +licenses(["notice"]) # Apache 2.0 + +load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts") +load( + "//tensorflow:tensorflow.bzl", + "tf_cc_test", +) + +tf_cc_test( + name = "optional_tensor_test", + size = "small", + srcs = ["optional_tensor_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "test_util", + testonly = 1, + srcs = ["test_util.cc"], + hdrs = ["test_util.h"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite:schema_fbs_version", + "//tensorflow/contrib/lite:string_util", + "//tensorflow/core:lib", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "gemm_support", + srcs = [ + "gemm_support.cc", + ], + hdrs = [ + "gemm_support.h", + ], + copts = tflite_copts(), + deps = [ + ":op_macros", + "//tensorflow/contrib/lite:context", + "@gemmlowp//:gemmlowp", + ], +) + +cc_library( + name = "activation_functor", + hdrs = [ + "activation_functor.h", + ], + deps = [ + "//tensorflow/contrib/lite:builtin_op_data", + ], +) + +cc_library( + name = "op_macros", + hdrs = [ + "op_macros.h", + ], +) + +cc_library( + name = "builtin_ops", + srcs = [ + "activations.cc", + "add.cc", + "basic_rnn.cc", + "concatenation.cc", + "conv.cc", + "depthwise_conv.cc", + "embedding_lookup.cc", + "embedding_lookup_sparse.cc", + "fully_connected.cc", + "hashtable_lookup.cc", + "kernel_util.cc", + "l2norm.cc", + "local_response_norm.cc", + "lsh_projection.cc", + "lstm.cc", + "mul.cc", + "pooling.cc", + "register.cc", + "reshape.cc", + "resize_bilinear.cc", + "skip_gram.cc", + "space_to_depth.cc", + "svdf.cc", + ], + hdrs = [ + "kernel_util.h", + "padding.h", + "register.h", + ], + # Suppress warnings that are introduced by Eigen Tensor. + copts = tflite_copts() + [ + "-Wno-error=reorder", + ] + select({ + "//tensorflow:ios": ["-Wno-error=invalid-partial-specialization"], + "//conditions:default": [ + ], + }), + deps = [ + ":activation_functor", + ":op_macros", + "//tensorflow/contrib/lite:builtin_op_data", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite:string_util", + "//tensorflow/contrib/lite/kernels:gemm_support", + "//tensorflow/contrib/lite/kernels/internal:optimized", + "//tensorflow/contrib/lite/kernels/internal:optimized_base", + "//tensorflow/contrib/lite/kernels/internal:quantization_util", + "//tensorflow/contrib/lite/kernels/internal:reference", + "//tensorflow/contrib/lite/kernels/internal:reference_base", + "//tensorflow/contrib/lite/kernels/internal:round", + "//tensorflow/contrib/lite/kernels/internal:tensor_utils", + "@farmhash_archive//:farmhash", + ], +) + +tf_cc_test( + name = "activations_test", + size = "small", + srcs = ["activations_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "add_test", + size = "small", + srcs = ["add_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "concatenation_test", + size = "small", + srcs = ["concatenation_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "conv_test", + size = "small", + srcs = ["conv_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "depthwise_conv_test", + size = "small", + srcs = ["depthwise_conv_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "basic_rnn_test", + size = "small", + srcs = ["basic_rnn_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "l2norm_test", + size = "small", + srcs = ["l2norm_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "mul_test", + size = "small", + srcs = ["mul_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "reshape_test", + size = "small", + srcs = ["reshape_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "resize_bilinear_test", + size = "small", + srcs = ["resize_bilinear_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "svdf_test", + size = "small", + srcs = ["svdf_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "embedding_lookup_test", + size = "small", + srcs = ["embedding_lookup_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "embedding_lookup_sparse_test", + size = "small", + srcs = ["embedding_lookup_sparse_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "fully_connected_test", + size = "small", + srcs = ["fully_connected_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "local_response_norm_test", + size = "small", + srcs = ["local_response_norm_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "pooling_test", + size = "small", + srcs = ["pooling_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "softmax_test", + size = "small", + srcs = ["softmax_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "//tensorflow/contrib/lite/kernels/internal:reference_base", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "lsh_projection_test", + size = "small", + srcs = ["lsh_projection_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "hashtable_lookup_test", + size = "small", + srcs = ["hashtable_lookup_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite:string_util", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "lstm_test", + size = "small", + srcs = ["lstm_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "skip_gram_test", + size = "small", + srcs = ["skip_gram_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite:string_util", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "space_to_depth_test", + size = "small", + srcs = ["space_to_depth_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/lite/kernels/activation_functor.h b/tensorflow/contrib/lite/kernels/activation_functor.h new file mode 100644 index 0000000000..cfb3369e99 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/activation_functor.h @@ -0,0 +1,58 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_ACTIVATION_FUNCTOR_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_ACTIVATION_FUNCTOR_H_ + +#include +#include +#include + +#include "tensorflow/contrib/lite/builtin_op_data.h" + +namespace tflite { + +// Dynamic (non-fused) activation functor. perhaps it is worth having +// template instantiation? +// TODO(aselle): Make this more efficient by pulling the switch to conv_eval +// using template inlining. +class ActivationFunctor { + public: + explicit ActivationFunctor(TfLiteFusedActivation act) : act_(act) {} + + float operator()(float a) const { + switch (act_) { + case kTfLiteActNone: + return a; + case kTfLiteActRelu: + return a < 0.f ? 0.f : a; + case kTfLiteActRelu6: + return std::max(0.f, std::min(a, 6.f)); + case kTfLiteActTanh: + return std::tanh(a); + case kTfLiteActSigmoid: + return 1.0f / (1.0f + std::exp(-a)); + default: + // TODO(aselle): More informative fatal error! + exit(1); + } + } + + private: + TfLiteFusedActivation act_; +}; + +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_ACTIVATION_FUNCTOR_H_ diff --git a/tensorflow/contrib/lite/kernels/activations.cc b/tensorflow/contrib/lite/kernels/activations.cc new file mode 100644 index 0000000000..7ab60a33e5 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/activations.cc @@ -0,0 +1,389 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace activations { + +struct OpData { + int32_t input_multiplier = 0; + int input_left_shift = 0; + int32_t input_range_radius = 0; + int diff_min = 0; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + // This is a builtin op, so we don't use the contents in 'buffer', if any. + // Instead, we allocate a new object to carry information from Prepare() to + // Eval(). + return new OpData; +} + +void Free(TfLiteContext* context, void* buffer) { + delete reinterpret_cast(buffer); +} + +TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* input = GetInput(context, node, 0); + TfLiteTensor* output = GetOutput(context, node, 0); + TF_LITE_ENSURE_EQ(context, input->type, output->type); + + return context->ResizeTensor(context, output, + TfLiteIntArrayCopy(input->dims)); +} + +TfLiteStatus SigmoidPrepare(TfLiteContext* context, TfLiteNode* node) { + OpData* data = reinterpret_cast(node->user_data); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* input = GetInput(context, node, 0); + TfLiteTensor* output = GetOutput(context, node, 0); + TF_LITE_ENSURE_EQ(context, input->type, output->type); + + if (input->type == kTfLiteUInt8) { + TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); + TF_LITE_ENSURE(context, output->params.scale == 1. / 256); + + static constexpr int kInputIntegerBits = 4; + + const double input_real_multiplier = + input->params.scale * + static_cast(1 << (31 - kInputIntegerBits)); + + QuantizeMultiplierGreaterThanOne(input_real_multiplier, + &data->input_multiplier, + &data->input_left_shift); + data->input_range_radius = + CalculateInputRadius(kInputIntegerBits, data->input_left_shift); + } + + return context->ResizeTensor(context, output, + TfLiteIntArrayCopy(input->dims)); +} + +TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + OpData* data = reinterpret_cast(node->user_data); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* input = GetInput(context, node, 0); + TfLiteTensor* output = GetOutput(context, node, 0); + TF_LITE_ENSURE_EQ(context, input->type, output->type); + + TF_LITE_ENSURE(context, + NumDimensions(input) == 2 || NumDimensions(input) == 4); + + if (input->type == kTfLiteUInt8) { + TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); + TF_LITE_ENSURE(context, output->params.scale == 1. / 256); + + static const int kScaledDiffIntegerBits = 5; + + tflite::PreprocessSoftmaxScaling( + params->beta, input->params.scale, kScaledDiffIntegerBits, + &data->input_multiplier, &data->input_left_shift); + data->diff_min = -1.0 * tflite::CalculateInputRadius( + kScaledDiffIntegerBits, data->input_left_shift); + } + + return context->ResizeTensor(context, output, + TfLiteIntArrayCopy(input->dims)); +} + +TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* input = GetInput(context, node, 0); + TfLiteTensor* output = GetOutput(context, node, 0); + switch (input->type) { + case kTfLiteFloat32: { + size_t elements = input->bytes / sizeof(float); + float* in = input->data.f; + float* in_end = in + elements; + float* out = output->data.f; + for (; in < in_end; in++, out++) *out = std::max(0.f, *in); + return kTfLiteOk; + } + break; + default: + context->ReportError(context, "Only float32 supported currently."); + return kTfLiteError; + } +} + +TfLiteStatus Relu1Eval(TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* input = GetInput(context, node, 0); + TfLiteTensor* output = GetOutput(context, node, 0); + switch (input->type) { + case kTfLiteFloat32: { + size_t elements = input->bytes / sizeof(float); + float* in = input->data.f; + float* in_end = in + elements; + float* out = output->data.f; + for (; in < in_end; in++, out++) { + *out = std::min(std::max(-1.f, *in), 1.f); + } + return kTfLiteOk; + } break; + default: + context->ReportError(context, "Only float32 supported currently."); + return kTfLiteError; + } +} + +TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* input = GetInput(context, node, 0); + TfLiteTensor* output = GetOutput(context, node, 0); + switch (input->type) { + case kTfLiteFloat32: { + size_t elements = input->bytes / sizeof(float); + float* in = input->data.f; + float* in_end = in + elements; + float* out = output->data.f; + for (; in < in_end; in++, out++) *out = std::min(std::max(0.f, *in), 6.f); + return kTfLiteOk; + } + break; + default: + context->ReportError(context, "Only float32 supported currently."); + return kTfLiteError; + } +} + +TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* input = GetInput(context, node, 0); + TfLiteTensor* output = GetOutput(context, node, 0); + switch (input->type) { + case kTfLiteFloat32: { + size_t elements = input->bytes / sizeof(float); + float* in = input->data.f; + float* in_end = in + elements; + float* out = output->data.f; + for (; in < in_end; in++, out++) *out = std::tanh(*in); + return kTfLiteOk; + } + break; + default: + context->ReportError(context, "Only float32 supported currently."); + return kTfLiteError; + } +} + +// Sigmoid is also know as "Logistic". +TfLiteStatus SigmoidEval(TfLiteContext* context, TfLiteNode* node) { + OpData* data = reinterpret_cast(node->user_data); + + TfLiteTensor* input = GetInput(context, node, 0); + TfLiteTensor* output = GetOutput(context, node, 0); + switch (input->type) { + case kTfLiteFloat32: { + size_t elements = input->bytes / sizeof(float); + float* in = input->data.f; + float* in_end = in + elements; + float* out = output->data.f; + for (; in < in_end; in++, out++) *out = 1.f / (1.f + std::exp(-*in)); + break; + } + case kTfLiteUInt8: { + optimized_ops::Logistic( + GetTensorData(input), GetTensorDims(input), + input->params.zero_point, data->input_range_radius, + data->input_multiplier, data->input_left_shift, + GetTensorData(output), GetTensorDims(output)); + break; + } + default: + context->ReportError(context, "Only float32 supported currently."); + return kTfLiteError; + } + return kTfLiteOk; +} + +// Takes a 2D tensor and perform softmax along the second dimension. +void Softmax2DFloat(TfLiteTensor* input, TfLiteTensor* output, + TfLiteSoftmaxParams* params) { + const int batch_size = input->dims->data[0]; + const int input_size = input->dims->data[1]; + float* in = input->data.f; + float* out = output->data.f; + TF_LITE_ASSERT(input_size > 0); + + // For each batch + for (int b = 0; b < batch_size; b++) { + // Find the max coeff. + float max_coeff = in[0]; + for (int i = 1; i < input_size; i++) { + if (in[i] > max_coeff) max_coeff = in[i]; + } + + // Compute the normalized sum of exps. + float exp_sum = 0.0; + for (int i = 0; i < input_size; i++) { + out[i] = std::exp((in[i] - max_coeff) * params->beta); + exp_sum += out[i]; + } + + // Divide by the sum of exps. + float reciprocal_sum_exp = 1.f / exp_sum; + for (int i = 0; i < input_size; i++) { + out[i] *= reciprocal_sum_exp; + } + + // Advance in and out pointers for the next batch. + in += input_size; + out += input_size; + } +} + +void Softmax2DQuantized(TfLiteTensor* input, TfLiteTensor* output, + TfLiteSoftmaxParams* params, OpData* data) { + // TODO(ahentz): this is arguably a dirty trick. Since the implementation + // always traverses the last dimension of a 4D tensor, we will pretend our 2D + // tensor is 4D in a special way. We will convert a (X, Y) shape into a (X, + // 1, 1, Y) shape. + const int batch_size = input->dims->data[0]; + const int input_size = input->dims->data[1]; + optimized_ops::Softmax(GetTensorData(input), + GetTensorDims({batch_size, 1, 1, input_size}), + data->input_multiplier, data->input_left_shift, + data->diff_min, GetTensorData(output), + GetTensorDims({batch_size, 1, 1, input_size})); +} + +// Takes a 4D tensor and perform softmax along the forth dimension. +void Softmax4DFloat(TfLiteTensor* input, TfLiteTensor* output, + TfLiteSoftmaxParams* params) { + optimized_ops::Softmax(GetTensorData(input), GetTensorDims(input), + params->beta, GetTensorData(output), + GetTensorDims(output)); +} + +void Softmax4DQuantized(TfLiteTensor* input, TfLiteTensor* output, + TfLiteSoftmaxParams* params, OpData* data) { + optimized_ops::Softmax(GetTensorData(input), GetTensorDims(input), + data->input_multiplier, data->input_left_shift, + data->diff_min, GetTensorData(output), + GetTensorDims(output)); +} + +TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + OpData* data = reinterpret_cast(node->user_data); + + TfLiteTensor* input = GetInput(context, node, 0); + TfLiteTensor* output = GetOutput(context, node, 0); + + // TODO(ahentz): consider an implementation that works for many (all?) + // dimensions. + switch (input->type) { + case kTfLiteFloat32: { + if (NumDimensions(input) == 2) { + Softmax2DFloat(input, output, params); + return kTfLiteOk; + } + if (NumDimensions(input) == 4) { + Softmax4DFloat(input, output, params); + return kTfLiteOk; + } + context->ReportError(context, + "Only 2D and 4D tensors supported currently."); + return kTfLiteError; + } + case kTfLiteUInt8: { + if (NumDimensions(input) == 2) { + Softmax2DQuantized(input, output, params, data); + return kTfLiteOk; + } + if (NumDimensions(input) == 4) { + Softmax4DQuantized(input, output, params, data); + return kTfLiteOk; + } + context->ReportError(context, + "Only 2D and 4D tensors supported currently."); + return kTfLiteError; + } + default: + context->ReportError(context, + "Only float32 and uint8_t supported currently."); + return kTfLiteError; + } +} + +} // namespace activations + +TfLiteRegistration* Register_RELU() { + static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr, + activations::GenericPrepare, + activations::ReluEval}; + return &r; +} + +TfLiteRegistration* Register_RELU1() { + static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr, + activations::GenericPrepare, + activations::Relu1Eval}; + return &r; +} + +TfLiteRegistration* Register_RELU6() { + static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr, + activations::GenericPrepare, + activations::Relu6Eval}; + return &r; +} + +TfLiteRegistration* Register_TANH() { + static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr, + activations::GenericPrepare, + activations::TanhEval}; + return &r; +} + +TfLiteRegistration* Register_LOGISTIC() { + static TfLiteRegistration r = {activations::Init, activations::Free, + activations::SigmoidPrepare, + activations::SigmoidEval}; + return &r; +} + +TfLiteRegistration* Register_SOFTMAX() { + static TfLiteRegistration r = {activations::Init, activations::Free, + activations::SoftmaxPrepare, + activations::SoftmaxEval}; + return &r; +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/activations_test.cc b/tensorflow/contrib/lite/kernels/activations_test.cc new file mode 100644 index 0000000000..f10aee7017 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/activations_test.cc @@ -0,0 +1,323 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class BaseActivationsOpModel : public SingleOpModel { + public: + // Most activations don't take any options, so this constructor works for + // them. + BaseActivationsOpModel(BuiltinOperator type, TensorData input) { + input_ = AddInput(input); + if (input.type == TensorType_UINT8) { + output_ = AddOutput({input.type, {}, 0, 0, 1. / 256}); + } else { + output_ = AddOutput({input.type, {}}); + } + SetBuiltinOp(type, BuiltinOptions_NONE, 0); + BuildInterpreter({GetShape(input_)}); + } + + // A dedicated constructor for SOFTMAX, which does some options. + BaseActivationsOpModel(float softmax_beta, TensorData input) { + input_ = AddInput(input); + if (input.type == TensorType_UINT8) { + output_ = AddOutput({input.type, {}, 0, 0, 1. / 256}); + } else { + output_ = AddOutput({input.type, {}}); + } + SetBuiltinOp(BuiltinOperator_SOFTMAX, BuiltinOptions_SoftmaxOptions, + CreateSoftmaxOptions(builder_, softmax_beta).Union()); + BuildInterpreter({GetShape(input_)}); + } + + protected: + int input_; + int output_; +}; + +class FloatActivationsOpModel : public BaseActivationsOpModel { + public: + using BaseActivationsOpModel::BaseActivationsOpModel; + + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); + } + std::vector GetOutput() { return ExtractVector(output_); } +}; + +// TODO(ahentz): I don't quite understand the tradeoffs in the quantized +// implementation of sigmoid and software, but a tolerance of twice the output +// scale seems reasonable. We might want to change this if we have a better +// theoretical bound. +const float kQuantizedTolerance = 2 * (1. / 256); + +class QuantizedActivationsOpModel : public BaseActivationsOpModel { + public: + using BaseActivationsOpModel::BaseActivationsOpModel; + + void SetInput(std::initializer_list data) { + QuantizeAndPopulate(input_, data); + } + std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetDequantizedOutput() { + return Dequantize(ExtractVector(output_), + GetScale(output_), GetZeroPoint(output_)); + } +}; + +TEST(FloatActivationsOpTest, Relu) { + FloatActivationsOpModel m(BuiltinOperator_RELU, + /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}}); + m.SetInput({ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({ + 0, 0, 2, 4, // + 3, 0, 10, 1, // + })); +} + +TEST(FloatActivationsOpTest, Relu1) { + FloatActivationsOpModel m(BuiltinOperator_RELU1, + /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}}); + m.SetInput({ + 0.0, -0.6, 0.2, -0.4, // + 0.3, -2.0, 1.1, -0.1, // + }); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({ + 0.0, -0.6, 0.2, -0.4, // + 0.3, -1.0, 1.0, -0.1, // + })); +} + +TEST(FloatActivationsOpTest, Relu6) { + FloatActivationsOpModel m(BuiltinOperator_RELU6, + /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}}); + m.SetInput({ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({ + 0, 0, 2, 4, // + 3, 0, 6, 1, // + })); +} + +TEST(FloatActivationsOpTest, Tanh) { + FloatActivationsOpModel m(BuiltinOperator_TANH, + /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}}); + m.SetInput({ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({ + 0, -0.9999877, 0.9640275, 0.999329, // + 0.99505475, -0.9640275, 1, 0.7615941, // + }))); +} + +TEST(FloatActivationsOpTest, Sigmoid) { + FloatActivationsOpModel m(BuiltinOperator_LOGISTIC, + /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}}); + m.SetInput({ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({ + 0.5, 0.002473, 0.880797, 0.982014, // + 0.952574, 0.119203, 0.999955, 0.731059, // + }))); +} + +TEST(QuantizedActivationsOpTest, Sigmoid) { + QuantizedActivationsOpModel m( + BuiltinOperator_LOGISTIC, + /*input=*/{TensorType_UINT8, {1, 2, 4, 1}, -10, 10}); + m.SetInput({ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + { + 0.5, 0.002473, 0.880797, 0.982014, // + 0.952574, 0.119203, 0.999955, 0.731059, // + }, + kQuantizedTolerance))); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray({128, 1, 227, 251, 244, 32, 255, 188})); +} + +TEST(FloatActivationsOpTest, Softmax4D) { + FloatActivationsOpModel m(0.1, + /*input=*/{TensorType_FLOAT32, {1, 2, 1, 4}}); + m.SetInput({ + 0, -6, 2, 4, // depth = 0 + 3, -2, 10, 1, // depth = 1 + }); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({ + .23463, .12877, .28658, .35003, // + .22528, .13664, .45365, .18443, // + }))); + + // Same input, but a different shape. + FloatActivationsOpModel m2(0.1, + /*input=*/{TensorType_FLOAT32, {4, 1, 1, 2}}); + m2.SetInput({ + 0, -6, // + 2, 4, // + 3, -2, // + 10, 1, // + }); + m2.Invoke(); + EXPECT_THAT(m2.GetOutput(), ElementsAreArray(ArrayFloatNear({ + 0.645656, 0.354344, // + 0.450166, 0.549834, // + 0.622459, 0.377541, // + 0.710949, 0.28905, // + }))); +} + +TEST(QuantizedActivationsOpTest, Softmax4D) { + QuantizedActivationsOpModel m( + 0.1, + /*input=*/{TensorType_UINT8, {1, 2, 1, 4}, -10, 10}); + m.SetInput({ + 0, -6, 2, 4, // depth = 0 + 3, -2, 10, 1, // depth = 1 + }); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + { + .23463, .12877, .28658, .35003, // + .22528, .13664, .45365, .18443, // + }, + kQuantizedTolerance))); + + // Same input, but a different shape. + QuantizedActivationsOpModel m2( + 0.1, + /*input=*/{TensorType_UINT8, {4, 1, 1, 2}, -10, 10}); + m2.SetInput({ + 0, -6, // + 2, 4, // + 3, -2, // + 10, 1, // + }); + m2.Invoke(); + EXPECT_THAT(m2.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( + { + 0.645656, 0.354344, // + 0.450166, 0.549834, // + 0.622459, 0.377541, // + 0.710949, 0.28905, // + }, + kQuantizedTolerance))); +} + +TEST(FloatActivationsOpTest, Softmax2D) { + FloatActivationsOpModel m(0.1, + /*input=*/{TensorType_FLOAT32, {2, 4}}); + m.SetInput({ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({ + .23463, .12877, .28658, .35003, // + .22528, .13664, .45365, .18443, // + }))); + + // Same input, but a different shape. + FloatActivationsOpModel m2(0.1, + /*input=*/{TensorType_FLOAT32, {4, 2}}); + m2.SetInput({ + 0, -6, // + 2, 4, // + 3, -2, // + 10, 1, // + }); + m2.Invoke(); + EXPECT_THAT(m2.GetOutput(), ElementsAreArray(ArrayFloatNear({ + 0.645656, 0.354344, // + 0.450166, 0.549834, // + 0.622459, 0.377541, // + 0.710949, 0.28905, // + }))); +} + +TEST(QuantizedActivationsOpTest, Softmax2D) { + QuantizedActivationsOpModel m(0.1, + /*input=*/{TensorType_UINT8, {2, 4}, -10, 10}); + m.SetInput({ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + { + .23463, .12877, .28658, .35003, // + .22528, .13664, .45365, .18443, // + }, + kQuantizedTolerance))); + + // Same input, but a different shape. + QuantizedActivationsOpModel m2(0.1, + /*input=*/{TensorType_UINT8, {4, 2}, -10, 10}); + m2.SetInput({ + 0, -6, // + 2, 4, // + 3, -2, // + 10, 1, // + }); + m2.Invoke(); + EXPECT_THAT(m2.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( + { + 0.645656, 0.354344, // + 0.450166, 0.549834, // + 0.622459, 0.377541, // + 0.710949, 0.28905, // + }, + kQuantizedTolerance))); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/add.cc b/tensorflow/contrib/lite/kernels/add.cc new file mode 100644 index 0000000000..0e10a249ab --- /dev/null +++ b/tensorflow/contrib/lite/kernels/add.cc @@ -0,0 +1,184 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace add { + +// This file has three implementation of Add. +enum KernelType { + kReference, + kGenericOptimized, // Neon-free + kNeonOptimized, +}; + +constexpr int kInputTensor1 = 0; +constexpr int kInputTensor2 = 1; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); + TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TF_LITE_ENSURE_EQ(context, NumDimensions(input1), NumDimensions(input2)); + for (int i = 0; i < NumDimensions(input1); ++i) { + TF_LITE_ENSURE_EQ(context, SizeOfDimension(input1, i), + SizeOfDimension(input2, i)); + } + + TF_LITE_ENSURE_EQ(context, input1->type, output->type); + TF_LITE_ENSURE_EQ(context, input2->type, output->type); + + TfLiteIntArray* output_size = TfLiteIntArrayCopy(input1->dims); + return context->ResizeTensor(context, output, output_size); +} + +template +void EvalAddFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteAddParams* params, TfLiteTensor* input1, + TfLiteTensor* input2, TfLiteTensor* output) { + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(params->activation, &output_activation_min, + &output_activation_max); +#define TF_LITE_ADD(type) \ + type::Add(GetTensorData(input1), GetTensorDims(input1), \ + GetTensorData(input2), GetTensorDims(input2), \ + output_activation_min, output_activation_max, \ + GetTensorData(output), GetTensorDims(output)) + if (kernel_type == kReference) { + TF_LITE_ADD(reference_ops); + } else { + TF_LITE_ADD(optimized_ops); + } +#undef TF_LITE_ADD +} + +template +void EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteAddParams* params, TfLiteTensor* input1, + TfLiteTensor* input2, TfLiteTensor* output) { + auto input1_offset = -input1->params.zero_point; + auto input2_offset = -input2->params.zero_point; + auto output_offset = output->params.zero_point; + const int left_shift = 20; + const double twice_max_input_scale = + 2 * std::max(input1->params.scale, input2->params.scale); + const double real_input1_multiplier = + input1->params.scale / twice_max_input_scale; + const double real_input2_multiplier = + input2->params.scale / twice_max_input_scale; + const double real_output_multiplier = + twice_max_input_scale / ((1 << left_shift) * output->params.scale); + + int32 input1_multiplier; + int input1_shift; + QuantizeMultiplierSmallerThanOne(real_input1_multiplier, &input1_multiplier, + &input1_shift); + int32 input2_multiplier; + int input2_shift; + QuantizeMultiplierSmallerThanOne(real_input2_multiplier, &input2_multiplier, + &input2_shift); + int32 output_multiplier; + int output_shift; + QuantizeMultiplierSmallerThanOne(real_output_multiplier, &output_multiplier, + &output_shift); + + int32 output_activation_min, output_activation_max; + CalculateActivationRangeUint8(params->activation, output, + &output_activation_min, &output_activation_max); + +#define TF_LITE_ADD(type) \ + type::BroadcastAdd( \ + left_shift, GetTensorData(input1), GetTensorDims(input1), \ + input1_offset, input1_multiplier, input1_shift, \ + GetTensorData(input2), GetTensorDims(input2), input2_offset, \ + input2_multiplier, input2_shift, output_offset, output_multiplier, \ + output_shift, output_activation_min, output_activation_max, \ + GetTensorData(output), GetTensorDims(output)); + + if (kernel_type == kReference) { + TF_LITE_ADD(reference_ops); + } else { + TF_LITE_ADD(optimized_ops); + } +#undef TF_LITE_ADD +} + +template +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + + TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); + TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + if (output->type == kTfLiteFloat32) { + EvalAddFloat(context, node, params, input1, input2, output); + } else if (output->type == kTfLiteUInt8) { + EvalAddQuantized(context, node, params, input1, input2, + output); + } else { + context->ReportError(context, + "Inputs and outputs not all float|unit8 types."); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace add + +TfLiteRegistration* Register_ADD_REF() { + static TfLiteRegistration r = {nullptr, nullptr, add::Prepare, + add::Eval}; + return &r; +} + +TfLiteRegistration* Register_ADD_GENERIC_OPT() { + static TfLiteRegistration r = {nullptr, nullptr, add::Prepare, + add::Eval}; + return &r; +} + +TfLiteRegistration* Register_ADD_NEON_OPT() { + static TfLiteRegistration r = {nullptr, nullptr, add::Prepare, + add::Eval}; + return &r; +} + +TfLiteRegistration* Register_ADD() { +#ifdef USE_NEON + return Register_ADD_NEON_OPT(); +#else + return Register_ADD_GENERIC_OPT(); +#endif +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/add_test.cc b/tensorflow/contrib/lite/kernels/add_test.cc new file mode 100644 index 0000000000..8e12a837c4 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/add_test.cc @@ -0,0 +1,171 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class BaseAddOpModel : public SingleOpModel { + public: + BaseAddOpModel(const TensorData& input, const TensorData& output, + ActivationFunctionType activation_type) { + input1_ = AddInput(input); + input2_ = AddInput(input); + output_ = AddOutput(output); + SetBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions, + CreateAddOptions(builder_, activation_type).Union()); + BuildInterpreter({GetShape(input1_), GetShape(input2_)}); + } + + int input1() { return input1_; } + int input2() { return input2_; } + + protected: + int input1_; + int input2_; + int output_; +}; + +class FloatAddOpModel : public BaseAddOpModel { + public: + using BaseAddOpModel::BaseAddOpModel; + + std::vector GetOutput() { return ExtractVector(output_); } +}; + +class QuantizedAddOpModel : public BaseAddOpModel { + public: + using BaseAddOpModel::BaseAddOpModel; + + std::vector GetDequantizedOutput() { + return Dequantize(ExtractVector(output_), + GetScale(output_), GetZeroPoint(output_)); + } +}; + +// for quantized Add, the error shouldn't exceed 2*step +float GetTolerance(int min, int max) { + float kQuantizedStep = (max - min) / 255.0; + float kQuantizedTolerance = 2.0 * kQuantizedStep; + return kQuantizedTolerance; +} + +TEST(FloatAddOpModel, NoActivation) { + FloatAddOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 0.7, 0.8}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.3, 0.5}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.9, 0.4, 1.0, 1.3})); +} + +TEST(FloatAddOpModel, ActivationRELU1) { + FloatAddOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_RELU1); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 0.7, 0.8}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.3, 0.5}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.0, 0.4, 1.0, 1.0})); +} + +TEST(FloatAddOpModel, VariousInputShapes) { + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + FloatAddOpModel m({TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.3, 0.5, 1.1, 0.1}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray({-1.9, 0.4, 1.0, 1.3, 2.2, 2.1})) + << "With shape number " << i; + } +} + +TEST(QuantizedAddOpModel, QuantizedTestsNoActivation) { + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::vector> inputs1 = { + {0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}}; + std::vector> inputs2 = { + {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.8}, {0.6, 0.4, -0.8, 0.5}}; + std::vector> results = { + {0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.1, 0.8}}; + for (int i = 0; i < inputs1.size(); ++i) { + QuantizedAddOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, + {TensorType_UINT8, {}, -1.0, 1.0}, + ActivationFunctionType_NONE); + m.QuantizeAndPopulate(m.input1(), inputs1[i]); + m.QuantizeAndPopulate(m.input2(), inputs2[i]); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( + results[i], kQuantizedTolerance))) + << "With test number " << i; + } +} + +TEST(QuantizedAddOpModel, QuantizedTestsActivationRELU1) { + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::vector> inputs1 = {{-0.8, 0.2, 0.9, 0.7}, + {-0.8, 0.2, 0.7, 0.3}}; + std::vector> inputs2 = {{0.6, 0.4, 0.9, -0.8}, + {0.6, 0.4, -0.8, 0.5}}; + std::vector> results = {{-0.2, 0.6, 1.0, -0.1}, + {-0.2, 0.6, -0.1, 0.8}}; + for (int i = 0; i < inputs1.size(); ++i) { + QuantizedAddOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, + {TensorType_UINT8, {}, -1.0, 1.0}, + ActivationFunctionType_RELU1); + m.QuantizeAndPopulate(m.input1(), inputs1[i]); + m.QuantizeAndPopulate(m.input2(), inputs2[i]); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( + results[i], kQuantizedTolerance))) + << "With test number " << i; + } +} + +TEST(QuantizedAddOpModel, QuantizedVariousInputShapes) { + float kQuantizedTolerance = GetTolerance(-3.0, 3.0); + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + QuantizedAddOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0}, + {TensorType_UINT8, {}, -3.0, 3.0}, + ActivationFunctionType_NONE); + m.QuantizeAndPopulate(m.input1(), {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0}); + m.QuantizeAndPopulate(m.input2(), {0.1, 0.3, 0.3, 0.5, 1.1, 0.1}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({-1.9, 0.5, 1.0, 1.3, 2.2, 2.1}, + kQuantizedTolerance))) + << "With shape number " << i; + } +} + +} // namespace +} // namespace tflite +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/basic_rnn.cc b/tensorflow/contrib/lite/kernels/basic_rnn.cc new file mode 100644 index 0000000000..3cee43c68b --- /dev/null +++ b/tensorflow/contrib/lite/kernels/basic_rnn.cc @@ -0,0 +1,161 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/activation_functor.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace rnn { + +constexpr int kInputTensor = 0; +constexpr int kWeightsTensor = 1; +constexpr int kRecurrentWeightsTensor = 2; +constexpr int kBiasTensor = 3; +constexpr int KHiddenStateTensor = 0; +constexpr int kOutputTensor = 1; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + // Check we have all the inputs and outputs we need. + TF_LITE_ENSURE_EQ(context, node->inputs->size, 4); + TF_LITE_ENSURE_EQ(context, node->outputs->size, 2); + + TfLiteTensor* input = &context->tensors[node->inputs->data[kInputTensor]]; + TfLiteTensor* input_weights = + &context->tensors[node->inputs->data[kWeightsTensor]]; + TfLiteTensor* recurrent_weights = + &context->tensors[node->inputs->data[kRecurrentWeightsTensor]]; + TfLiteTensor* bias = &context->tensors[node->inputs->data[kBiasTensor]]; + + // Check all the parameters of tensor match within themselves and match the + // input configuration. + const int batch_size = input->dims->data[0]; + const int num_units = input_weights->dims->data[0]; + TF_LITE_ASSERT_EQ(input->dims->data[1], input_weights->dims->data[1]); + TF_LITE_ASSERT_EQ(input_weights->dims->data[0], bias->dims->data[0]); + TF_LITE_ASSERT_EQ(recurrent_weights->dims->data[0], bias->dims->data[0]); + TF_LITE_ASSERT_EQ(recurrent_weights->dims->data[1], bias->dims->data[0]); + + TfLiteTensor* hidden_state = + &context->tensors[node->outputs->data[KHiddenStateTensor]]; + TfLiteTensor* output = &context->tensors[node->outputs->data[kOutputTensor]]; + + // Resize state. + TfLiteIntArray* hidden_state_size_array = TfLiteIntArrayCreate(2); + hidden_state_size_array->data[0] = batch_size; + hidden_state_size_array->data[1] = num_units; + TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, hidden_state, + hidden_state_size_array)); + + // Mark hidden state as a persistent tensor. + hidden_state->allocation_type = kTfLiteArenaRwPersistent; + + // Resize output. + TfLiteIntArray* output_size_array = TfLiteIntArrayCreate(2); + output_size_array->data[0] = batch_size; + output_size_array->data[1] = num_units; + TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, output, + output_size_array)); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + + TfLiteTensor* input = &context->tensors[node->inputs->data[kInputTensor]]; + TfLiteTensor* input_weights = + &context->tensors[node->inputs->data[kWeightsTensor]]; + TfLiteTensor* recurrent_weights = + &context->tensors[node->inputs->data[kRecurrentWeightsTensor]]; + TfLiteTensor* bias = &context->tensors[node->inputs->data[kBiasTensor]]; + TfLiteTensor* hidden_state = + &context->tensors[node->outputs->data[KHiddenStateTensor]]; + TfLiteTensor* output = &context->tensors[node->outputs->data[kOutputTensor]]; + + // Initialize the pointer bias. + const float* bias_ptr = bias->data.f; + + const int batch_size = input->dims->data[0]; + const int num_units = input_weights->dims->data[0]; + const int input_size = input->dims->data[1]; + const int input_weights_stride = input_weights->dims->data[1]; + const int recurrent_weights_stride = recurrent_weights->dims->data[1]; + + // For each batch + for (int b = 0; b < batch_size; b++) { + // Initialize the pointer to input, output and bias. + const float* input_ptr_batch = input->data.f + b * input_size; + float* output_ptr_batch = output->data.f + b * num_units; + float* hidden_state_ptr_batch = hidden_state->data.f + b * num_units; + + // Initialize input_weights and recurrent_weights. + const float* input_weights_ptr = input_weights->data.f; + const float* recurrent_weights_ptr = recurrent_weights->data.f; + + // Output = bias + for (int o = 0; o < num_units; o++) { + output_ptr_batch[o] = bias_ptr[o]; + } + + // Output += input * input_weights + for (int o = 0; o < num_units; o++) { + for (int i = 0; i < input_size; i++) { + output_ptr_batch[o] += input_ptr_batch[i] * input_weights_ptr[i]; + } + input_weights_ptr += input_weights_stride; + } + + // Output += recurrent_weights * hidden_state + for (int o = 0; o < num_units; o++) { + for (int h = 0; h < num_units; h++) { + output_ptr_batch[o] += + hidden_state_ptr_batch[h] * recurrent_weights_ptr[h]; + } + recurrent_weights_ptr += recurrent_weights_stride; + } + + // Output = activation(Output) and update hidden_state + for (int o = 0; o < num_units; o++) { + output_ptr_batch[o] = + (ActivationFunctor(params->activation))(output_ptr_batch[o]); + hidden_state_ptr_batch[o] = output_ptr_batch[o]; + } + } + + return kTfLiteOk; +} + +} // namespace rnn + +TfLiteRegistration* Register_RNN() { + static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr, + rnn::Prepare, rnn::Eval}; + return &r; +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/basic_rnn_test.cc b/tensorflow/contrib/lite/kernels/basic_rnn_test.cc new file mode 100644 index 0000000000..dfa75655bc --- /dev/null +++ b/tensorflow/contrib/lite/kernels/basic_rnn_test.cc @@ -0,0 +1,267 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Unit test for TFLite RNN op. + +#include +#include + +#include +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +static float rnn_input[] = { + 0.23689353, 0.285385, 0.037029743, -0.19858193, -0.27569133, + 0.43773448, 0.60379338, 0.35562468, -0.69424844, -0.93421471, + -0.87287879, 0.37144363, -0.62476718, 0.23791671, 0.40060222, + 0.1356622, -0.99774903, -0.98858172, -0.38952237, -0.47685933, + 0.31073618, 0.71511042, -0.63767755, -0.31729108, 0.33468103, + 0.75801885, 0.30660987, -0.37354088, 0.77002847, -0.62747043, + -0.68572164, 0.0069220066, 0.65791464, 0.35130811, 0.80834007, + -0.61777675, -0.21095741, 0.41213346, 0.73784804, 0.094794154, + 0.47791874, 0.86496925, -0.53376222, 0.85315156, 0.10288584, + 0.86684, -0.011186242, 0.10513687, 0.87825835, 0.59929144, + 0.62827742, 0.18899453, 0.31440187, 0.99059987, 0.87170351, + -0.35091716, 0.74861872, 0.17831337, 0.2755419, 0.51864719, + 0.55084288, 0.58982027, -0.47443086, 0.20875752, -0.058871567, + -0.66609079, 0.59098077, 0.73017097, 0.74604273, 0.32882881, + -0.17503482, 0.22396147, 0.19379807, 0.29120302, 0.077113032, + -0.70331609, 0.15804303, -0.93407321, 0.40182066, 0.036301374, + 0.66521823, 0.0300982, -0.7747041, -0.02038002, 0.020698071, + -0.90300065, 0.62870288, -0.23068321, 0.27531278, -0.095755219, + -0.712036, -0.17384434, -0.50593495, -0.18646687, -0.96508682, + 0.43519354, 0.14744234, 0.62589407, 0.1653645, -0.10651493, + -0.045277178, 0.99032974, -0.88255352, -0.85147917, 0.28153265, + 0.19455957, -0.55479527, -0.56042433, 0.26048636, 0.84702539, + 0.47587705, -0.074295521, -0.12287641, 0.70117295, 0.90532446, + 0.89782166, 0.79817224, 0.53402734, -0.33286154, 0.073485017, + -0.56172788, -0.044897556, 0.89964068, -0.067662835, 0.76863563, + 0.93455386, -0.6324693, -0.083922029}; + +static float rnn_golden_output[] = { + 0.496726, 0, 0.965996, 0, 0.0584254, 0, + 0, 0.12315, 0, 0, 0.612266, 0.456601, + 0, 0.52286, 1.16099, 0.0291232, + + 0, 0, 0.524901, 0, 0, 0, + 0, 1.02116, 0, 1.35762, 0, 0.356909, + 0.436415, 0.0355727, 0, 0, + + 0, 0, 0, 0.262335, 0, 0, + 0, 1.33992, 0, 2.9739, 0, 0, + 1.31914, 2.66147, 0, 0, + + 0.942568, 0, 0, 0, 0.025507, 0, + 0, 0, 0.321429, 0.569141, 1.25274, 1.57719, + 0.8158, 1.21805, 0.586239, 0.25427, + + 1.04436, 0, 0.630725, 0, 0.133801, 0.210693, + 0.363026, 0, 0.533426, 0, 1.25926, 0.722707, + 0, 1.22031, 1.30117, 0.495867, + + 0.222187, 0, 0.72725, 0, 0.767003, 0, + 0, 0.147835, 0, 0, 0, 0.608758, + 0.469394, 0.00720298, 0.927537, 0, + + 0.856974, 0.424257, 0, 0, 0.937329, 0, + 0, 0, 0.476425, 0, 0.566017, 0.418462, + 0.141911, 0.996214, 1.13063, 0, + + 0.967899, 0, 0, 0, 0.0831304, 0, + 0, 1.00378, 0, 0, 0, 1.44818, + 1.01768, 0.943891, 0.502745, 0, + + 0.940135, 0, 0, 0, 0, 0, + 0, 2.13243, 0, 0.71208, 0.123918, 1.53907, + 1.30225, 1.59644, 0.70222, 0, + + 0.804329, 0, 0.430576, 0, 0.505872, 0.509603, + 0.343448, 0, 0.107756, 0.614544, 1.44549, 1.52311, + 0.0454298, 0.300267, 0.562784, 0.395095, + + 0.228154, 0, 0.675323, 0, 1.70536, 0.766217, + 0, 0, 0, 0.735363, 0.0759267, 1.91017, + 0.941888, 0, 0, 0, + + 0, 0, 1.5909, 0, 0, 0, + 0, 0.5755, 0, 0.184687, 0, 1.56296, + 0.625285, 0, 0, 0, + + 0, 0, 0.0857888, 0, 0, 0, + 0, 0.488383, 0.252786, 0, 0, 0, + 1.02817, 1.85665, 0, 0, + + 0.00981836, 0, 1.06371, 0, 0, 0, + 0, 0, 0, 0.290445, 0.316406, 0, + 0.304161, 1.25079, 0.0707152, 0, + + 0.986264, 0.309201, 0, 0, 0, 0, + 0, 1.64896, 0.346248, 0, 0.918175, 0.78884, + 0.524981, 1.92076, 2.07013, 0.333244, + + 0.415153, 0.210318, 0, 0, 0, 0, + 0, 2.02616, 0, 0.728256, 0.84183, 0.0907453, + 0.628881, 3.58099, 1.49974, 0 +}; + +class RNNOpModel : public SingleOpModel { + public: + RNNOpModel(int batches, int units, int size) + : batches_(batches), units_(units), input_size_(size) { + input_ = AddInput(TensorType_FLOAT32); + weights_ = AddInput(TensorType_FLOAT32); + recurrent_weights_ = AddInput(TensorType_FLOAT32); + bias_ = AddInput(TensorType_FLOAT32); + hidden_state_ = AddOutput(TensorType_FLOAT32); + output_ = AddOutput(TensorType_FLOAT32); + SetBuiltinOp( + BuiltinOperator_RNN, BuiltinOptions_RNNOptions, + CreateRNNOptions(builder_, ActivationFunctionType_RELU).Union()); + BuildInterpreter({{batches_, input_size_}, + {units_, input_size_}, + {units_, units_}, + {units_}}); + } + + void SetBias(std::initializer_list f) { PopulateTensor(bias_, f); } + + void SetWeights(std::initializer_list f) { + PopulateTensor(weights_, f); + } + + void SetRecurrentWeights(std::initializer_list f) { + PopulateTensor(recurrent_weights_, f); + } + + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); + } + + void SetInput(int offset, float* begin, float* end) { + PopulateTensor(input_, offset, begin, end); + } + + void ResetHiddenState() { + const int zero_buffer_size = units_ * batches_; + std::unique_ptr zero_buffer(new float[zero_buffer_size]); + memset(zero_buffer.get(), 0, zero_buffer_size * sizeof(float)); + PopulateTensor(hidden_state_, 0, zero_buffer.get(), + zero_buffer.get() + zero_buffer_size); + } + + std::vector GetOutput() { return ExtractVector(output_); } + + int input_size() { return input_size_; } + int num_units() { return units_; } + int num_batches() { return batches_; } + + private: + int input_; + int weights_; + int recurrent_weights_; + int bias_; + int hidden_state_; + int output_; + + int batches_; + int units_; + int input_size_; +}; + +TEST(FullyConnectedOpTest, BlackBoxTest) { + RNNOpModel rnn(2, 16, 8); + rnn.SetWeights( + {0.461459, 0.153381, 0.529743, -0.00371218, 0.676267, -0.211346, + 0.317493, 0.969689, -0.343251, 0.186423, 0.398151, 0.152399, + 0.448504, 0.317662, 0.523556, -0.323514, 0.480877, 0.333113, + -0.757714, -0.674487, -0.643585, 0.217766, -0.0251462, 0.79512, + -0.595574, -0.422444, 0.371572, -0.452178, -0.556069, -0.482188, + -0.685456, -0.727851, 0.841829, 0.551535, -0.232336, 0.729158, + -0.00294906, -0.69754, 0.766073, -0.178424, 0.369513, -0.423241, + 0.548547, -0.0152023, -0.757482, -0.85491, 0.251331, -0.989183, + 0.306261, -0.340716, 0.886103, -0.0726757, -0.723523, -0.784303, + 0.0354295, 0.566564, -0.485469, -0.620498, 0.832546, 0.697884, + -0.279115, 0.294415, -0.584313, 0.548772, 0.0648819, 0.968726, + 0.723834, -0.0080452, -0.350386, -0.272803, 0.115121, -0.412644, + -0.824713, -0.992843, -0.592904, -0.417893, 0.863791, -0.423461, + -0.147601, -0.770664, -0.479006, 0.654782, 0.587314, -0.639158, + 0.816969, -0.337228, 0.659878, 0.73107, 0.754768, -0.337042, + 0.0960841, 0.368357, 0.244191, -0.817703, -0.211223, 0.442012, + 0.37225, -0.623598, -0.405423, 0.455101, 0.673656, -0.145345, + -0.511346, -0.901675, -0.81252, -0.127006, 0.809865, -0.721884, + 0.636255, 0.868989, -0.347973, -0.10179, -0.777449, 0.917274, + 0.819286, 0.206218, -0.00785118, 0.167141, 0.45872, 0.972934, + -0.276798, 0.837861, 0.747958, -0.0151566, -0.330057, -0.469077, + 0.277308, 0.415818}); + + rnn.SetBias({0.065691948, -0.69055247, 0.1107955, -0.97084129, -0.23957068, + -0.23566568, -0.389184, 0.47481549, -0.4791103, 0.29931796, + 0.10463274, 0.83918178, 0.37197268, 0.61957061, 0.3956964, + -0.37609905}); + + rnn.SetRecurrentWeights({0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0.1}); + + rnn.ResetHiddenState(); + const int input_sequence_size = sizeof(rnn_input) / sizeof(float) / + (rnn.input_size() * rnn.num_batches()); + + for (int i = 0; i < input_sequence_size; i++) { + float* batch_start = rnn_input + i * rnn.input_size(); + float* batch_end = batch_start + rnn.input_size(); + rnn.SetInput(0, batch_start, batch_end); + rnn.SetInput(rnn.input_size(), batch_start, batch_end); + + rnn.Invoke(); + + float* golden_start = rnn_golden_output + i * rnn.num_units(); + float* golden_end = golden_start + rnn.num_units(); + std::vector expected; + expected.insert(expected.end(), golden_start, golden_end); + expected.insert(expected.end(), golden_start, golden_end); + + EXPECT_THAT(rnn.GetOutput(), ElementsAreArray(ArrayFloatNear(expected))); + } +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/concatenation.cc b/tensorflow/contrib/lite/kernels/concatenation.cc new file mode 100644 index 0000000000..9e7a1233da --- /dev/null +++ b/tensorflow/contrib/lite/kernels/concatenation.cc @@ -0,0 +1,200 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace concatenation { + +// This file has two implementation of Concatenation. +enum KernelType { + kReference, + kGenericOptimized, +}; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + int axis = params->axis; + int num_inputs = node->inputs->size; + + // The number of dimensions of the input tensors must match, and all + // dimensions except 'axis' must be equal. + TfLiteTensor* t0 = &context->tensors[node->inputs->data[0]]; + TfLiteType input_type = t0->type; + TF_LITE_ENSURE(context, axis >= 0); + TF_LITE_ENSURE(context, axis < t0->dims->size); + + // TODO(ahentz): These are limitations of our implementation that could be + // removed with a bit of effort. + TF_LITE_ENSURE(context, t0->dims->size <= 4); + TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone); + TF_LITE_ENSURE(context, + input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8); + + // Output dimensions will match input dimensions, except 'axis', which + // will be the sum of inputs + int sum_axis = t0->dims->data[axis]; + for (int i = 1; i < num_inputs; ++i) { + TfLiteTensor* t = &context->tensors[node->inputs->data[i]]; + TF_LITE_ENSURE_EQ(context, t->dims->size, t0->dims->size); + TF_LITE_ENSURE_EQ(context, t->type, input_type); + if (input_type == kTfLiteUInt8) { + TF_LITE_ENSURE_EQ(context, t->params.zero_point, t0->params.zero_point); + TF_LITE_ENSURE_EQ(context, t->params.scale, t0->params.scale); + } + for (int d = 0; d < t0->dims->size; ++d) { + if (d == axis) { + sum_axis += t->dims->data[axis]; + } else { + TF_LITE_ENSURE_EQ(context, t->dims->data[d], t0->dims->data[d]); + } + } + } + + TfLiteIntArray* output_size = TfLiteIntArrayCreate(t0->dims->size); + for (int d = 0; d < t0->dims->size; ++d) { + output_size->data[d] = (d == axis) ? sum_axis : t0->dims->data[d]; + } + + TfLiteTensor* output = &context->tensors[node->outputs->data[0]]; + TF_LITE_ENSURE_EQ(context, output->type, input_type); + if (input_type == kTfLiteUInt8) { + TF_LITE_ENSURE_EQ(context, output->params.zero_point, + t0->params.zero_point); + TF_LITE_ENSURE_EQ(context, output->params.scale, t0->params.scale); + } + + return context->ResizeTensor(context, output, output_size); +} + +template +class VectorOfInputs { + public: + VectorOfInputs(const TfLiteContext& context, const TfLiteIntArray& inputs) { + int num_inputs = inputs.size; + + all_data_.reserve(num_inputs); + all_dims_.reserve(num_inputs); + all_dims_ptr_.reserve(num_inputs); + + for (int i = 0; i < num_inputs; ++i) { + TfLiteTensor* input = &context.tensors[inputs.data[i]]; + all_data_.push_back(GetTensorData(input)); + all_dims_.push_back(GetTensorDims(input)); + } + + // Taking the pointer from inside a std::vector is only OK if the vector is + // never modified, so we populate all_dims in the previous loop and then we + // are free to grab iterators here. + for (int i = 0; i < num_inputs; ++i) { + all_dims_ptr_.push_back(&all_dims_[i]); + } + } + const T* const* data() const { return all_data_.data(); } + const Dims<4>* const* dims() const { return all_dims_ptr_.data(); } + + private: + std::vector all_data_; + std::vector> all_dims_; + std::vector*> all_dims_ptr_; +}; + +template +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + + TfLiteTensor* output = &context->tensors[node->outputs->data[0]]; + +// TODO(ahentz): Creating 'all_inputs' below is not very efficient. We should +// allocate and populate these during Prepare(). +// TODO(ycling): Activation function parameter is ignored. For now we dont have +// a model with a Concatenation with fused activation function. +#define TF_LITE_CONCATENATION(type, scalar) \ + VectorOfInputs all_inputs(*context, *node->inputs); \ + type::Concatenation( \ + RemapDim(NumDimensions(output), params->axis), all_inputs.data(), \ + all_inputs.dims(), node->inputs->size, GetTensorData(output), \ + GetTensorDims(output)) + + switch (output->type) { // Already know in/outtypes are same. + case kTfLiteFloat32: + if (kernel_type == kReference) { + TF_LITE_CONCATENATION(reference_ops, float); + } else { + TF_LITE_CONCATENATION(optimized_ops, float); + } + break; + case kTfLiteUInt8: + if (kernel_type == kReference) { + TF_LITE_CONCATENATION(reference_ops, uint8_t); + } else { + TF_LITE_CONCATENATION(optimized_ops, uint8_t); + } + break; + default: + context->ReportError(context, + "Only float32 and uint8 are currently supported."); + return kTfLiteError; + } + +#undef TF_LITE_CONCATENATION + + return kTfLiteOk; +} + +#undef TF_LITE_MACRO_DISPATCH + +} // namespace concatenation + +TfLiteRegistration* Register_CONCATENATION_REF() { + static TfLiteRegistration r = { + nullptr, nullptr, concatenation::Prepare, + concatenation::Eval}; + return &r; +} + +TfLiteRegistration* Register_CONCATENATION_GENERIC_OPT() { + static TfLiteRegistration r = { + nullptr, nullptr, concatenation::Prepare, + concatenation::Eval}; + return &r; +} + +TfLiteRegistration* Register_CONCATENATION() { + // TODO(ahentz): It turns out the two versions of Concatenation are almost + // identical, so we should consider removing one. + return Register_CONCATENATION_GENERIC_OPT(); +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/concatenation_test.cc b/tensorflow/contrib/lite/kernels/concatenation_test.cc new file mode 100644 index 0000000000..94e5b2acdc --- /dev/null +++ b/tensorflow/contrib/lite/kernels/concatenation_test.cc @@ -0,0 +1,162 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class BaseConcatenationOpModel : public SingleOpModel { + public: + // TODO(ahentz): Also test different activation types, axis, input + // dimensions. + BaseConcatenationOpModel(const TensorData& input_template, int axis, + int num_inputs) { + std::vector> all_input_shapes; + for (int i = 0; i < num_inputs; ++i) { + all_input_shapes.push_back(input_template.shape); + AddInput(input_template); + } + output_ = AddOutput({input_template.type, /*shape=*/{}, input_template.min, + input_template.max}); + SetBuiltinOp( + BuiltinOperator_CONCATENATION, BuiltinOptions_ConcatenationOptions, + CreateConcatenationOptions(builder_, axis, ActivationFunctionType_NONE) + .Union()); + BuildInterpreter(all_input_shapes); + } + + protected: + int output_; +}; + +class ConcatenationOpModel : public BaseConcatenationOpModel { + public: + using BaseConcatenationOpModel::BaseConcatenationOpModel; + void SetInput(int index, std::initializer_list data) { + PopulateTensor(index, data); + } + std::vector GetOutput() { return ExtractVector(output_); } +}; + +class QuantizedConcatenationOpModel : public BaseConcatenationOpModel { + public: + using BaseConcatenationOpModel::BaseConcatenationOpModel; + void SetInput(int index, std::initializer_list data) { + QuantizeAndPopulate(index, data); + } + std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetDequantizedOutput() { + return Dequantize(ExtractVector(output_), + GetScale(output_), GetZeroPoint(output_)); + } +}; + +TEST(ConcatenationOpTest, ThreeDimensionalOneInput) { + ConcatenationOpModel m0({TensorType_FLOAT32, {2, 1, 2}}, /*axis=*/1, + /*num_inputs=*/1); + m0.SetInput(0, {1.0f, 3.0f, 4.0f, 7.0f}); + m0.Invoke(); + EXPECT_THAT(m0.GetOutput(), ElementsAreArray({1, 3, 4, 7})); +} + +TEST(ConcatenationOpTest, OneTrivialInput) { + ConcatenationOpModel m0({TensorType_FLOAT32, {1}}, /*axis=*/0, + /*num_inputs=*/1); + m0.SetInput(0, {5.0f}); + m0.Invoke(); + EXPECT_THAT(m0.GetOutput(), ::testing::ElementsAre(5)); +} + +TEST(ConcatenationOpTest, TwoDimensionalOneInput) { + ConcatenationOpModel m0({TensorType_FLOAT32, {2, 3}}, /*axis=*/0, + /*num_inputs=*/1); + m0.SetInput(0, {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}); + m0.Invoke(); + EXPECT_THAT(m0.GetOutput(), ElementsAreArray({1, 2, 3, 4, 5, 6})); +} + +TEST(ConcatenationOpTest, TwoInputsTwoAxis) { + // We will concatenate two tensors along different dimensions. + auto tensor0 = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + auto tensor1 = {7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; + + ConcatenationOpModel m0({TensorType_FLOAT32, {2, 3}}, /*axis=*/0, + /*num_inputs=*/2); + m0.SetInput(0, tensor0); + m0.SetInput(1, tensor1); + m0.Invoke(); + EXPECT_THAT(m0.GetOutput(), + ElementsAreArray({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12})); + + ConcatenationOpModel m1({TensorType_FLOAT32, {2, 3}}, /*axis=*/1, + /*num_inputs=*/2); + m1.SetInput(0, tensor0); + m1.SetInput(1, tensor1); + m1.Invoke(); + EXPECT_THAT(m1.GetOutput(), + ElementsAreArray({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12})); +} + +TEST(ConcatenationOpTest, FourInputs) { + ConcatenationOpModel m0({TensorType_FLOAT32, {2, 1, 2}}, /*axis=*/2, + /*num_inputs=*/4); + m0.SetInput(0, {1.0f, 3.0f, 4.0f, 7.0f}); + m0.SetInput(1, {1.1f, 3.1f, 4.1f, 7.1f}); + m0.SetInput(2, {1.2f, 3.2f, 4.2f, 7.2f}); + m0.SetInput(3, {1.3f, 3.3f, 4.3f, 7.3f}); + m0.Invoke(); + EXPECT_THAT(m0.GetOutput(), + ElementsAreArray({ + 1.0f, 3.0f, 1.1f, 3.1f, 1.2f, 3.2f, 1.3f, 3.3f, // + 4.0f, 7.0f, 4.1f, 7.1f, 4.2f, 7.2f, 4.3f, 7.3f, // + })); +} + +TEST(ConcatenationOpTest, FourInputsQuantized) { + QuantizedConcatenationOpModel m0({TensorType_UINT8, {2, 1, 2}, -12.7, 12.8}, + /*axis=*/2, + /*num_inputs=*/4); + + m0.SetInput(0, {1.0f, 3.0f, 4.0f, 7.0f}); + m0.SetInput(1, {1.1f, 3.1f, 4.1f, 7.1f}); + m0.SetInput(2, {1.2f, 3.2f, 4.2f, 7.2f}); + m0.SetInput(3, {1.3f, 3.3f, 4.3f, 7.3f}); + m0.Invoke(); + EXPECT_THAT(m0.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({ + 1.0f, 3.0f, 1.1f, 3.1f, 1.2f, 3.2f, 1.3f, 3.3f, // + 4.0f, 7.0f, 4.1f, 7.1f, 4.2f, 7.2f, 4.3f, 7.3f, // + }))); + EXPECT_THAT(m0.GetOutput(), ElementsAreArray({ + 137, 157, 138, 158, 139, 159, 140, 160, // + 167, 197, 168, 198, 169, 199, 170, 200, // + })); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/conv.cc b/tensorflow/contrib/lite/kernels/conv.cc new file mode 100644 index 0000000000..c75c04baea --- /dev/null +++ b/tensorflow/contrib/lite/kernels/conv.cc @@ -0,0 +1,425 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/gemm_support.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/multithreaded_conv.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" +#include "tensorflow/contrib/lite/kernels/padding.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace conv { + +// This file has three implementation of Conv. +enum KernelType { + kReference, + kGenericOptimized, // Neon-free + kNeonOptimized, +}; + +struct OpData { + // IDs are the arbitrary identifiers used by TF Lite to identify and access + // memory buffers. + int im2col_id; + int hwcn_weights_id; + + TfLitePaddingValues padding; + // The scaling factor from input to output (aka the 'real multiplier') can + // be represented as a fixed point multipler plus a left shift. + int32_t output_multiplier; + int output_shift; + // The range of the fused activation layer. For example for kNone and + // uint8_t these would be 0 and 255. + int32_t output_activation_min; + int32_t output_activation_max; + // Indexes are the offset to the memory buffer in the array used to keep track + // of the allocated temporaries. + int32_t im2col_index; + int32_t hwcn_weights_index; + bool need_hwcn_weights; + bool have_weights_been_transposed; + bool need_im2col; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + // This is a builtin op, so we don't use the contents in 'buffer', if any. + // Instead, we allocate a new object to use as scratch space for im2col, and + // to carry information from Prepare() to Eval(). + auto* data = new OpData; + context->AddTensors(context, 1, &data->im2col_id); + context->AddTensors(context, 1, &data->hwcn_weights_id); + gemm_support::IncrementUsageCounter(context); + return data; +} + +void Free(TfLiteContext* context, void* buffer) { + gemm_support::DecrementUsageCounter(context); + delete reinterpret_cast(buffer); +} + +// Naive implementation of transpose for floats. Could be optimized to be more +// cache friendly, but for now it's a one-time cost on first run, and we would +// prefer to remove the need to do this at all eventually. +void TransposeFloatTensor(TfLiteTensor* input, TfLiteTensor* output) { + const int rows = output->dims->data[1]; + const int cols = output->dims->data[0]; + const float* input_data = GetTensorData(input); + float* output_data = GetTensorData(output); + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < cols; ++j) { + const float in_value = input_data[i * cols + j]; + output_data[j * rows + i] = in_value; + } + } +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + OpData* data = reinterpret_cast(node->user_data); + + bool hasBias = node->inputs->size == 3; + // Check number of inputs/outputs + TF_LITE_ENSURE(context, hasBias || node->inputs->size == 2); + TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); + TfLiteTensor* output = &context->tensors[node->outputs->data[0]]; + TfLiteTensor* input = &context->tensors[node->inputs->data[0]]; + TfLiteTensor* filter = &context->tensors[node->inputs->data[1]]; + // Check dimensionality of input, filter + TF_LITE_ENSURE_EQ(context, input->dims->size, 4); + TF_LITE_ENSURE_EQ(context, filter->dims->size, 4); + // Check input channels matching filter + TF_LITE_ENSURE_EQ(context, input->dims->data[3], filter->dims->data[3]); + + // Check types. (We assume that UINT8 refers to quantized tensors) + TfLiteType data_type = input->type; + TF_LITE_ENSURE(context, + data_type == kTfLiteFloat32 || data_type == kTfLiteUInt8); + TF_LITE_ENSURE_EQ(context, output->type, data_type); + TF_LITE_ENSURE_EQ(context, filter->type, data_type); + + TfLiteTensor* bias = nullptr; + + // TODO(ahentz): At this point the optimized versions require 'bias'. We can + // either change that or document that convolution requires it. + TF_LITE_ENSURE(context, hasBias); + + if (hasBias) { + bias = &context->tensors[node->inputs->data[2]]; + if (data_type == kTfLiteUInt8) { + TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32); + TF_LITE_ENSURE_EQ(context, bias->params.zero_point, 0); + } else { + TF_LITE_ENSURE_EQ(context, bias->type, data_type); + } + TF_LITE_ENSURE_EQ(context, bias->dims->size, 1); + TF_LITE_ENSURE_EQ(context, bias->dims->data[0], filter->dims->data[0]); + } + + int channels_out = filter->dims->data[0]; + int width = input->dims->data[2]; + int height = input->dims->data[1]; + int filter_width = filter->dims->data[2]; + int filter_height = filter->dims->data[1]; + int batches = input->dims->data[0]; + + // Matching GetWindowedOutputSize in TensorFlow. + auto padding = params->padding; + auto computeOutSize = [padding](int imageSize, int filterSize, + int stride) -> int { + return padding == kTfLitePaddingSame + ? (imageSize + stride - 1) / stride + : padding == kTfLitePaddingValid + ? (imageSize - filterSize + stride) / stride + : 0; + }; + + int outWidth = computeOutSize(width, filter_width, params->stride_width); + int outHeight = computeOutSize(height, filter_height, params->stride_height); + + data->padding.height = + ComputePadding(params->stride_height, height, filter_height, outHeight); + data->padding.width = + ComputePadding(params->stride_width, width, filter_width, outWidth); + + TF_LITE_ENSURE(context, hasBias); + + // Note that quantized inference requires that all tensors have their + // parameters set. This is usually done during quantized training. + if (data_type != kTfLiteFloat32) { + double real_multiplier = 0.0; + TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( + context, input, filter, bias, output, &real_multiplier)); + QuantizeMultiplierSmallerThanOne(real_multiplier, &data->output_multiplier, + &data->output_shift); + CalculateActivationRangeUint8(params->activation, output, + &data->output_activation_min, + &data->output_activation_max); + } + + TfLiteIntArray* output_size = TfLiteIntArrayCreate(4); + output_size->data[0] = batches; + output_size->data[1] = outHeight; + output_size->data[2] = outWidth; + output_size->data[3] = channels_out; + auto output_status = context->ResizeTensor(context, output, output_size); + + if (output_status != kTfLiteOk) return output_status; + + // We don't always need to allocate im2col. It is only used in some versions + // of the optimized Conv. This test just mimics something that happens inside + // optimized_ops.h, in order to avoid a DCHECK(!im2col_data). + data->need_im2col = + (params->stride_width != 1 || params->stride_height != 1 || + filter_width != 1 || filter_height != 1); + // If we're using the optimized multithreaded EigenTensor implementation of + // convolution, it expects the filter weights to be transposed compared to + // the normal TF Lite buffer format. Typical TF Lite weights are + // [filter_count, filter_height, filter_width, input_depth], but for the float + // implementation we need them as [filter_height, filter_width, input_depth, + // filter_count]. We get to that format by transposing, and create a temporary + // buffer to store the results. + // This path is only used for float processing, so only create the buffer if + // we're running with that data type. + data->need_hwcn_weights = (data_type == kTfLiteFloat32); + + int temporaries_count = 0; + if (data->need_im2col) { + data->im2col_index = temporaries_count; + ++temporaries_count; + } + if (data->need_hwcn_weights) { + data->hwcn_weights_index = temporaries_count; + ++temporaries_count; + } + + TfLiteIntArrayFree(node->temporaries); + node->temporaries = TfLiteIntArrayCreate(temporaries_count); + + if (data->need_im2col) { + node->temporaries->data[data->im2col_index] = data->im2col_id; + + TfLiteIntArray* im2col_size = TfLiteIntArrayCreate(4); + + int input_depth = input->dims->data[3]; + im2col_size->data[0] = output_size->data[0]; + im2col_size->data[1] = output_size->data[1]; + im2col_size->data[2] = output_size->data[2]; + im2col_size->data[3] = input_depth * filter_height * filter_width; + + TfLiteTensor* im2col = + &context->tensors[node->temporaries->data[data->im2col_index]]; + im2col->type = data_type; + im2col->allocation_type = kTfLiteArenaRw; + auto im2col_status = context->ResizeTensor(context, im2col, im2col_size); + if (im2col_status != kTfLiteOk) return im2col_status; + } + + if (data->need_hwcn_weights) { + node->temporaries->data[data->hwcn_weights_index] = data->hwcn_weights_id; + TfLiteIntArray* hwcn_weights_size = TfLiteIntArrayCreate(2); + + // Because we're treating the filter weights as a matrix when we do the + // transpose, we allocate the buffer with a two-dimensional shape, where one + // dimension is the number of elements in each filter, and the second is the + // total number of filters. + int input_depth = input->dims->data[3]; + hwcn_weights_size->data[0] = (filter_height * filter_width * input_depth); + hwcn_weights_size->data[1] = channels_out; + + TfLiteTensor* hwcn_weights = + &context->tensors[node->temporaries->data[data->hwcn_weights_index]]; + hwcn_weights->type = data_type; + hwcn_weights->allocation_type = kTfLiteDynamic; + // Make sure we release any previous allocations before we reallocate. + // TODO(petewarden): Persistent arenas would be a better fit for this, but + // they aren't fully implemented yet. + if (hwcn_weights->data.raw) { + free(hwcn_weights->data.raw); + hwcn_weights->data.raw = nullptr; + } + auto hwcn_weights_status = + context->ResizeTensor(context, hwcn_weights, hwcn_weights_size); + if (hwcn_weights_status != kTfLiteOk) return hwcn_weights_status; + hwcn_weights->data.raw = static_cast(malloc(hwcn_weights->bytes)); + + // TODO(petewarden): If Resize() is called when the size hasn't actually + // changed, this will do extra redundant work. + data->have_weights_been_transposed = false; + } + + return kTfLiteOk; +} + +template +void EvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteConvParams* params, OpData* data, TfLiteTensor* input, + TfLiteTensor* filter, TfLiteTensor* bias, + TfLiteTensor* im2col, TfLiteTensor* hwcn_weights, + TfLiteTensor* output) { + gemmlowp::GemmContext* gemm_context = gemm_support::GetFromContext(context); + + auto input_offset = -input->params.zero_point; + auto filter_offset = -filter->params.zero_point; + auto output_offset = output->params.zero_point; + + if (kernel_type == kReference) { + reference_ops::Conv( + GetTensorData(input), GetTensorDims(input), input_offset, + GetTensorData(filter), GetTensorDims(filter), filter_offset, + GetTensorData(bias), GetTensorDims(bias), params->stride_width, + params->stride_height, data->padding.width, data->padding.height, + output_offset, data->output_multiplier, data->output_shift, + data->output_activation_min, data->output_activation_max, + GetTensorData(output), GetTensorDims(output), + GetTensorData(im2col), GetTensorDims(im2col), gemm_context); + } else { + optimized_ops::Conv( + GetTensorData(input), GetTensorDims(input), input_offset, + GetTensorData(filter), GetTensorDims(filter), filter_offset, + GetTensorData(bias), GetTensorDims(bias), params->stride_width, + params->stride_height, data->padding.width, data->padding.height, + output_offset, data->output_multiplier, data->output_shift, + data->output_activation_min, data->output_activation_max, + GetTensorData(output), GetTensorDims(output), + GetTensorData(im2col), GetTensorDims(im2col), gemm_context); + } +} + +template +void EvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteConvParams* params, OpData* data, TfLiteTensor* input, + TfLiteTensor* filter, TfLiteTensor* bias, TfLiteTensor* im2col, + TfLiteTensor* hwcn_weights, TfLiteTensor* output) { + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(params->activation, &output_activation_min, + &output_activation_max); + + const float* filter_data; + if (data->need_hwcn_weights) { + filter_data = GetTensorData(hwcn_weights); + } else { + filter_data = GetTensorData(filter); + } + + if (kernel_type == kReference) { + reference_ops::Conv( + GetTensorData(input), GetTensorDims(input), filter_data, + GetTensorDims(filter), GetTensorData(bias), GetTensorDims(bias), + params->stride_width, params->stride_height, data->padding.width, + data->padding.height, output_activation_min, output_activation_max, + GetTensorData(output), GetTensorDims(output), + GetTensorData(im2col), GetTensorDims(im2col)); + } else { + multithreaded_ops::Conv( + GetTensorData(input), GetTensorDims(input), filter_data, + GetTensorDims(filter), GetTensorData(bias), GetTensorDims(bias), + params->stride_width, params->stride_height, data->padding.width, + data->padding.height, params->padding, output_activation_min, + output_activation_max, GetTensorData(output), + GetTensorDims(output), GetTensorData(im2col), + GetTensorDims(im2col)); + } +} + +template +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + OpData* data = reinterpret_cast(node->user_data); + + TfLiteTensor* output = &context->tensors[node->outputs->data[0]]; + TfLiteTensor* input = &context->tensors[node->inputs->data[0]]; + TfLiteTensor* filter = &context->tensors[node->inputs->data[1]]; + bool hasBias = node->inputs->size == 3; + TfLiteTensor* bias = + hasBias ? &context->tensors[node->inputs->data[2]] : nullptr; + TfLiteTensor* im2col = + data->need_im2col + ? &context->tensors[node->temporaries->data[data->im2col_index]] + : nullptr; + TfLiteTensor* hwcn_weights = + data->need_hwcn_weights + ? &context->tensors[node->temporaries->data[data->hwcn_weights_index]] + : nullptr; + + if (data->need_hwcn_weights && !data->have_weights_been_transposed) { + TransposeFloatTensor(filter, hwcn_weights); + data->have_weights_been_transposed = true; + } + + // TODO(aselle): Consider whether float conv and quantized conv should be + // separate ops to avoid dispatch overhead here. + switch (input->type) { // Already know in/outtypes are same. + case kTfLiteFloat32: + EvalFloat(context, node, params, data, input, filter, bias, + im2col, hwcn_weights, output); + break; + case kTfLiteUInt8: + EvalQuantized(context, node, params, data, input, filter, + bias, im2col, hwcn_weights, output); + break; + default: + context->ReportError(context, "Type not currently supported."); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace conv + +TfLiteRegistration* Register_CONVOLUTION_REF() { + static TfLiteRegistration r = {conv::Init, conv::Free, conv::Prepare, + conv::Eval}; + return &r; +} + +TfLiteRegistration* Register_CONVOLUTION_GENERIC_OPT() { + static TfLiteRegistration r = {conv::Init, conv::Free, conv::Prepare, + conv::Eval}; + return &r; +} + +TfLiteRegistration* Register_CONVOLUTION_NEON_OPT() { + static TfLiteRegistration r = {conv::Init, conv::Free, conv::Prepare, + conv::Eval}; + return &r; +} + +TfLiteRegistration* Register_CONV_2D() { +#ifdef USE_NEON + return Register_CONVOLUTION_NEON_OPT(); +#else + return Register_CONVOLUTION_GENERIC_OPT(); +#endif +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/conv_test.cc b/tensorflow/contrib/lite/kernels/conv_test.cc new file mode 100644 index 0000000000..18d7a31d59 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/conv_test.cc @@ -0,0 +1,440 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class BaseConvolutionOpModel : public SingleOpModel { + public: + // TODO(ahentz): Also test different activation types, bias, padding types, + // stride values. + BaseConvolutionOpModel( + const TensorData& input, const TensorData& filter, + const TensorData& output, int stride_width = 2, int stride_height = 2, + enum Padding padding = Padding_VALID, + enum ActivationFunctionType activation = ActivationFunctionType_NONE) { + input_ = AddInput(input); + filter_ = AddInput(filter); + + int bias_size = GetShape(filter_)[0]; + if (input.type == TensorType_FLOAT32) { + bias_ = AddInput({TensorType_FLOAT32, {bias_size}}); + } else { + // This is a quantized version. The scale of 'bias' depends on the scales + // of input and filter. Supposedly this is correctly set during quantized + // training. + auto bias_scale = GetScale(input_) * GetScale(filter_); + TensorData bias{TensorType_INT32, {bias_size}, 0, 0, bias_scale}; + bias_ = AddInput(bias); + } + + output_ = AddOutput(output); + if (input.type != TensorType_FLOAT32) { + // The following is required by quantized inference. It is the unittest's + // responsibility to make sure the output scale falls into the correct + // range. + CHECK_LT(GetScale(input_) * GetScale(filter_), GetScale(output_)); + } + + SetBuiltinOp(BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions, + CreateConv2DOptions(builder_, padding, stride_width, + stride_height, activation) + .Union()); + + BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)}); + } + + protected: + int input_; + int filter_; + int bias_; + int output_; +}; + +class ConvolutionOpModel : public BaseConvolutionOpModel { + public: + using BaseConvolutionOpModel::BaseConvolutionOpModel; + + void SetFilter(std::initializer_list f) { PopulateTensor(filter_, f); } + + void SetBias(std::initializer_list f) { PopulateTensor(bias_, f); } + + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); + } + + std::vector GetOutput() { return ExtractVector(output_); } +}; + +TEST(ConvolutionOpTest, SimpleTestFloat32) { + ConvolutionOpModel m({TensorType_FLOAT32, {2, 2, 4, 1}}, + {TensorType_FLOAT32, {3, 2, 2, 1}}, + {TensorType_FLOAT32, {}}); + + m.SetInput({ + // First batch + 1, 1, 1, 1, // row = 1 + 2, 2, 2, 2, // row = 2 + // Second batch + 1, 2, 3, 4, // row = 1 + 1, 2, 3, 4, // row = 2 + }); + m.SetFilter({ + 1, 2, 3, 4, // first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter + }); + m.SetBias({1, 2, 3}); + + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray({ + 18, 2, 5, // first batch, left + 18, 2, 5, // first batch, right + 17, 4, 3, // second batch, left + 37, 4, 3, // second batch, right + })); +} + +TEST(ConvolutionOpTest, SimpleTestFloat32WithAnisotropicStrides) { + ConvolutionOpModel m({TensorType_FLOAT32, {1, 3, 6, 1}}, + {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, + /*stride_width=*/3, /*stride_height=*/1); + m.SetInput({ + 3, 2, 1, -1, -2, -3, // + 4, 3, 2, -2, -3, -4, // + 5, 4, 3, -3, -4, -5, // + }); + m.SetFilter({ + 1, 2, // + 3, 4, // + }); + m.SetBias({-1}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({ + 30, -24, // + 40, -34, // + })); +} + +TEST(ConvolutionOpTest, HandCalculatedFloat32) { + const int depth = 1; + const int image_width = 4; + const int image_height = 3; + const int image_batch_count = 1; + const int filter_size = 3; + const int filter_count = 1; + const int stride_width = 1; + const int stride_height = 1; + const Padding padding = Padding_SAME; + ConvolutionOpModel m( + {TensorType_FLOAT32, + {image_batch_count, image_height, image_width, depth}}, + {TensorType_FLOAT32, {depth, filter_size, filter_size, filter_count}}, + {TensorType_FLOAT32, {}}, stride_width, stride_height, padding); + + // The image matrix is: + // | 1 | 2 | 3 | 4 | + // | 5 | 6 | 7 | 8 | + // | 9 | 10 | 11 | 12 | + m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + // The filter matrix is: + // | 1 | 4 | 7 | + // | 2 | 5 | 8 | + // | 3 | 6 | 9 | + m.SetFilter({1, 4, 7, 2, 5, 8, 3, 6, 9}); + // No bias for this test. + m.SetBias({0}); + + m.Invoke(); + // We're sliding the 3x3 filter across the 3x4 image, with accesses outside + // the input set to zero because we're using the 'SAME' padding mode. + // The calculations behind the expected output are: + // (1*0)+(4*0)+(7*0)+(2*0)+(5*1)+(8*2)+(3*0)+(6*5)+(9*6)=105 + // (1*0)+(4*0)+(7*0)+(2*1)+(5*2)+(8*3)+(3*5)+(6*6)+(9*7)=150 + // (1*0)+(4*0)+(7*0)+(2*2)+(5*3)+(8*4)+(3*6)+(6*7)+(9*8)=183 + // (1*0)+(4*0)+(7*0)+(2*3)+(5*4)+(8*0)+(3*7)+(6*8)+(9*0)=95 + // (1*0)+(4*1)+(7*2)+(2*0)+(5*5)+(8*6)+(3*0)+(6*9)+(9*10)=235 + // (1*1)+(4*2)+(7*3)+(2*5)+(5*6)+(8*7)+(3*9)+(6*10)+(9*11)=312 + // (1*2)+(4*3)+(7*4)+(2*6)+(5*7)+(8*8)+(3*10)+(6*11)+(9*12)=357 + // (1*3)+(4*4)+(7*0)+(2*7)+(5*8)+(8*0)+(3*11)+(6*12)+(9*0)=178 + // (1*0)+(4*5)+(7*6)+(2*0)+(5*9)+(8*10)+(3*0)+(6*0)+(9*0)=187 + // (1*5)+(4*6)+(7*7)+(2*9)+(5*10)+(8*11)+(3*0)+(6*0)+(9*0)=234 + // (1*6)+(4*7)+(7*8)+(2*10)+(5*11)+(8*12)+(3*0)+(6*0)+(9*0)=261 + // (1*7)+(4*11)+(7*0)+(2*8)+(5*12)+(8*0)+(3*0)+(6*0)+(9*0)=121 + // This means we should end up with this matrix: + // | 105 | 150 | 183 | 95 | + // | 235 | 312 | 357 | 178 | + // | 187 | 234 | 261 | 121 | + EXPECT_THAT(m.GetOutput(), ElementsAreArray({105, 150, 183, 95, 235, 312, 357, + 178, 187, 234, 261, 121})); +} + +TEST(ConvolutionOpTest, HandCalculatedWithBiasFloat32) { + const int depth = 1; + const int image_width = 4; + const int image_height = 3; + const int image_batch_count = 1; + const int filter_size = 3; + const int filter_count = 1; + const int stride_width = 1; + const int stride_height = 1; + const Padding padding = Padding_SAME; + ConvolutionOpModel m( + {TensorType_FLOAT32, + {image_batch_count, image_height, image_width, depth}}, + {TensorType_FLOAT32, {depth, filter_size, filter_size, filter_count}}, + {TensorType_FLOAT32, {}}, stride_width, stride_height, padding); + + // The image matrix is: + // | 1 | 2 | 3 | 4 | + // | 5 | 6 | 7 | 8 | + // | 9 | 10 | 11 | 12 | + m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + // The filter matrix is: + // | 1 | 4 | 7 | + // | 2 | 5 | 8 | + // | 3 | 6 | 9 | + m.SetFilter({1, 4, 7, 2, 5, 8, 3, 6, 9}); + // Bias is | 10 |. + m.SetBias({10}); + + m.Invoke(); + // We're sliding the 3x3 filter across the 3x4 image, with accesses outside + // the input set to zero because we're using the 'SAME' padding mode. + // The calculations behind the expected output are: + // (1*0)+(4*0)+(7*0)+(2*0)+(5*1)+(8*2)+(3*0)+(6*5)+(9*6)+10=115 + // (1*0)+(4*0)+(7*0)+(2*1)+(5*2)+(8*3)+(3*5)+(6*6)+(9*7)+10=160 + // (1*0)+(4*0)+(7*0)+(2*2)+(5*3)+(8*4)+(3*6)+(6*7)+(9*8)+10=193 + // (1*0)+(4*0)+(7*0)+(2*3)+(5*4)+(8*0)+(3*7)+(6*8)+(9*0)+10=105 + // (1*0)+(4*1)+(7*2)+(2*0)+(5*5)+(8*6)+(3*0)+(6*9)+(9*10)+10=245 + // (1*1)+(4*2)+(7*3)+(2*5)+(5*6)+(8*7)+(3*9)+(6*10)+(9*11)+10=322 + // (1*2)+(4*3)+(7*4)+(2*6)+(5*7)+(8*8)+(3*10)+(6*11)+(9*12)+10=367 + // (1*3)+(4*4)+(7*0)+(2*7)+(5*8)+(8*0)+(3*11)+(6*12)+(9*0)+10=188 + // (1*0)+(4*5)+(7*6)+(2*0)+(5*9)+(8*10)+(3*0)+(6*0)+(9*0)+10=197 + // (1*5)+(4*6)+(7*7)+(2*9)+(5*10)+(8*11)+(3*0)+(6*0)+(9*0)+10=244 + // (1*6)+(4*7)+(7*8)+(2*10)+(5*11)+(8*12)+(3*0)+(6*0)+(9*0)+10=271 + // (1*7)+(4*11)+(7*0)+(2*8)+(5*12)+(8*0)+(3*0)+(6*0)+(9*0)+10=131 + // This means we should end up with this matrix: + // | 115 | 160 | 193 | 105 | + // | 245 | 322 | 367 | 188 | + // | 197 | 244 | 271 | 131 | + EXPECT_THAT(m.GetOutput(), ElementsAreArray({115, 160, 193, 105, 245, 322, + 367, 188, 197, 244, 271, 131})); +} + +TEST(ConvolutionOpTest, HandCalculatedWithReluFloat32) { + const int depth = 1; + const int image_width = 4; + const int image_height = 3; + const int image_batch_count = 1; + const int filter_size = 3; + const int filter_count = 1; + const int stride_width = 1; + const int stride_height = 1; + const Padding padding = Padding_SAME; + ConvolutionOpModel m( + {TensorType_FLOAT32, + {image_batch_count, image_height, image_width, depth}}, + {TensorType_FLOAT32, {depth, filter_size, filter_size, filter_count}}, + {TensorType_FLOAT32, {}}, stride_width, stride_height, padding, + ActivationFunctionType_RELU); + + // The image matrix is: + // | 1 | 2 | 3 | 4 | + // | 5 | 6 | 7 | 8 | + // | 9 | 10 | 11 | 12 | + m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + // The filter matrix is: + // | 1 | 4 | 7 | + // | 2 | 5 | 8 | + // | 3 | 6 | 9 | + m.SetFilter({1, 4, 7, 2, 5, 8, 3, 6, 9}); + // Bias is | -200 |. + m.SetBias({-200}); + + m.Invoke(); + // We're sliding the 3x3 filter across the 3x4 image, with accesses outside + // the input set to zero because we're using the 'SAME' padding mode. + // The calculations behind the expected output are: + // (1*0)+(4*0)+(7*0)+(2*0)+(5*1)+(8*2)+(3*0)+(6*5)+(9*6)-200=-95 + // (1*0)+(4*0)+(7*0)+(2*1)+(5*2)+(8*3)+(3*5)+(6*6)+(9*7)-200=-50 + // (1*0)+(4*0)+(7*0)+(2*2)+(5*3)+(8*4)+(3*6)+(6*7)+(9*8)-200=-17 + // (1*0)+(4*0)+(7*0)+(2*3)+(5*4)+(8*0)+(3*7)+(6*8)+(9*0)-200=-105 + // (1*0)+(4*1)+(7*2)+(2*0)+(5*5)+(8*6)+(3*0)+(6*9)+(9*10)-200=35 + // (1*1)+(4*2)+(7*3)+(2*5)+(5*6)+(8*7)+(3*9)+(6*10)+(9*11)-200=112 + // (1*2)+(4*3)+(7*4)+(2*6)+(5*7)+(8*8)+(3*10)+(6*11)+(9*12)-200=157 + // (1*3)+(4*4)+(7*0)+(2*7)+(5*8)+(8*0)+(3*11)+(6*12)+(9*0)-200=-22 + // (1*0)+(4*5)+(7*6)+(2*0)+(5*9)+(8*10)+(3*0)+(6*0)+(9*0)-200=-13 + // (1*5)+(4*6)+(7*7)+(2*9)+(5*10)+(8*11)+(3*0)+(6*0)+(9*0)-200=34 + // (1*6)+(4*7)+(7*8)+(2*10)+(5*11)+(8*12)+(3*0)+(6*0)+(9*0)-200=61 + // (1*7)+(4*11)+(7*0)+(2*8)+(5*12)+(8*0)+(3*0)+(6*0)+(9*0)-200=-79 + // All negative values are gated to zero by the Relu activation function. + // This means we should end up with this matrix: + // | 0 | 0 | 0 | 0 | + // | 35 | 112 | 157 | 0 | + // | 0 | 34 | 61 | 0 | + EXPECT_THAT(m.GetOutput(), + ElementsAreArray({0, 0, 0, 0, 35, 112, 157, 0, 0, 34, 61, 0})); +} + +TEST(ConvolutionOpTest, HandCalculatedValidFloat32) { + const int depth = 1; + const int image_width = 4; + const int image_height = 3; + const int image_batch_count = 1; + const int filter_size = 3; + const int filter_count = 1; + const int stride_width = 1; + const int stride_height = 1; + const Padding padding = Padding_VALID; + ConvolutionOpModel m( + {TensorType_FLOAT32, + {image_batch_count, image_height, image_width, depth}}, + {TensorType_FLOAT32, {depth, filter_size, filter_size, filter_count}}, + {TensorType_FLOAT32, {}}, stride_width, stride_height, padding); + + // The image matrix is: + // | 1 | 2 | 3 | 4 | + // | 5 | 6 | 7 | 8 | + // | 9 | 10 | 11 | 12 | + m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + // The filter matrix is: + // | 1 | 4 | 7 | + // | 2 | 5 | 8 | + // | 3 | 6 | 9 | + m.SetFilter({1, 4, 7, 2, 5, 8, 3, 6, 9}); + // No bias for this test. + m.SetBias({0}); + + m.Invoke(); + // We're sliding the 3x3 filter across the 3x4 image, with no accesses outside + // the input because we're using the 'VALID' padding mode, giving a 2x1 + // output. + // The calculations behind the expected output are: + // (1*1)+(4*2)+(7*3)+(2*5)+(5*6)+(8*7)+(3*9)+(6*10)+(9*11)=312 + // (1*2)+(4*3)+(7*4)+(2*6)+(5*7)+(8*8)+(3*10)+(6*11)+(9*12)=357 + // This means we should end up with this matrix: + // | 312 | 357 | + EXPECT_THAT(m.GetOutput(), ElementsAreArray({312, 357})); +} + +class QuantizedConvolutionOpModel : public BaseConvolutionOpModel { + public: + using BaseConvolutionOpModel::BaseConvolutionOpModel; + + void SetInput(std::initializer_list data) { + QuantizeAndPopulate(input_, data); + } + + void SetFilter(std::initializer_list data) { + QuantizeAndPopulate(filter_, data); + } + + void SetBias(std::initializer_list data) { + QuantizeAndPopulate(bias_, data); + } + + std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetDequantizedOutput() { + return Dequantize(ExtractVector(output_), + GetScale(output_), GetZeroPoint(output_)); + } +}; + +// In this tests we set the input and output scales so that the results +// match exactly the 'non-quantized' version. +TEST(ConvolutionOpTest, SimpleTestQuantized) { + QuantizedConvolutionOpModel m({TensorType_UINT8, {2, 2, 4, 1}, -63.5, 64}, + {TensorType_UINT8, {3, 2, 2, 1}, -63.5, 64}, + {TensorType_UINT8, {}, -127, 128}); + m.SetInput({ + // First batch + 1, 1, 1, 1, // row = 1 + 2, 2, 2, 2, // row = 2 + // Second batch + 1, 2, 3, 4, // row = 1 + 1, 2, 3, 4, // row = 2 + }); + m.SetFilter({ + 1, 2, 3, 4, // first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter + }); + m.SetBias({1, 2, 3}); + + m.Invoke(); + + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + { + 18, 2, 5, // first batch, left + 18, 2, 5, // first batch, right + 17, 4, 3, // second batch, left + 37, 4, 3, // second batch, right + }, + 1e-5))); + // For good measure, let's also verify the quantized values: + EXPECT_THAT(m.GetOutput(), ElementsAreArray({ + 145, 129, 132, // + 145, 129, 132, // + 144, 131, 130, // + 164, 131, 130, // + })); +} + +TEST(ConvolutionOpTest, SimpleTestQuantizedWithAnisotropicStrides) { + QuantizedConvolutionOpModel m({TensorType_UINT8, {1, 3, 6, 1}, -63.5, 64}, + {TensorType_UINT8, {1, 2, 2, 1}, -63.5, 64}, + {TensorType_UINT8, {}, -127, 128}, + /*stride_width=*/3, /*stride_height=*/1); + m.SetInput({ + 3, 2, 1, -1, -2, -3, // + 4, 3, 2, -2, -3, -4, // + 5, 4, 3, -3, -4, -5, // + }); + m.SetFilter({ + 1, 2, // + 3, 4, // + }); + m.SetBias({-1}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear({ + 30, -24, // + 40, -34, // + }))); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({ + 157, 103, // + 167, 93, // + })); +} +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv.cc b/tensorflow/contrib/lite/kernels/depthwise_conv.cc new file mode 100644 index 0000000000..15dbfe08c8 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/depthwise_conv.cc @@ -0,0 +1,289 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h" +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" +#include "tensorflow/contrib/lite/kernels/padding.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace depthwise_conv { + +constexpr int kInputTensor = 0; +constexpr int kFilterTensor = 1; +constexpr int kBiasTensor = 2; +constexpr int kOutputTensor = 0; + +// This file has three implementation of DepthwiseConv. +enum KernelType { + kReference, + kGenericOptimized, // Neon-free + kNeonOptimized, +}; + +struct OpData { + TfLitePaddingValues padding; + // The scaling factor from input to output (aka the 'real multiplier') can + // be represented as a fixed point multipler plus a left shift. + int32_t output_multiplier; + int output_shift; + // The range of the fused activation layer. For example for kNone and + // uint8_t these would be 0 and 255. + int32_t output_activation_min; + int32_t output_activation_max; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + // This is a builtin op, so we don't use the contents in 'buffer', if any. + // Instead, we allocate a new object to carry information from Prepare() to + // Eval(). + return new OpData; +} + +void Free(TfLiteContext* context, void* buffer) { + delete reinterpret_cast(buffer); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + OpData* data = reinterpret_cast(node->user_data); + + // TODO(ahentz): use could use GetOptionalInputTensor() here, but we need to + // decide whether we are OK with optional tensors being completely absent, as + // opposed to having -1 as their index. + bool hasBias = NumInputs(node) == 3; + + TF_LITE_ENSURE(context, hasBias || NumInputs(node) == 2); + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* filter = GetInput(context, node, kFilterTensor); + TfLiteTensor* bias = nullptr; + + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4); + TF_LITE_ENSURE_EQ(context, NumDimensions(filter), 4); + + // The parameter 'depth_multiplier' is redundant, so we check here to make + // sure it is consistent with the given dimensions. + TF_LITE_ENSURE_EQ(context, + params->depth_multiplier * SizeOfDimension(input, 3), + SizeOfDimension(filter, 3)); + + const TfLiteType data_type = input->type; + TF_LITE_ENSURE(context, + data_type == kTfLiteFloat32 || data_type == kTfLiteUInt8); + TF_LITE_ENSURE_EQ(context, output->type, data_type); + TF_LITE_ENSURE_EQ(context, filter->type, data_type); + + if (hasBias) { + bias = GetInput(context, node, kBiasTensor); + if (data_type == kTfLiteUInt8) { + TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32); + TF_LITE_ENSURE_EQ(context, bias->params.zero_point, 0); + } else { + TF_LITE_ENSURE_EQ(context, bias->type, data_type); + } + TF_LITE_ENSURE_EQ(context, NumDimensions(bias), 1); + TF_LITE_ENSURE_EQ(context, SizeOfDimension(filter, 3), + SizeOfDimension(bias, 0)); + } + + int channels_out = SizeOfDimension(filter, 3); + int width = SizeOfDimension(input, 2); + int height = SizeOfDimension(input, 1); + int filter_width = SizeOfDimension(filter, 2); + int filter_height = SizeOfDimension(filter, 1); + int batches = SizeOfDimension(input, 0); + + // Matching GetWindowedOutputSize in TensorFlow. + auto padding = params->padding; + auto compute_out_size = [padding](int imageSize, int filterSize, + int stride) -> int { + return padding == kTfLitePaddingSame + ? (imageSize + stride - 1) / stride + : padding == kTfLitePaddingValid + ? (imageSize - filterSize + stride) / stride + : 0; + }; + + int out_width = compute_out_size(width, filter_width, params->stride_width); + int out_height = + compute_out_size(height, filter_height, params->stride_height); + + data->padding.height = + ComputePadding(params->stride_height, height, filter_height, out_height); + data->padding.width = + ComputePadding(params->stride_width, width, filter_width, out_width); + + // Note that quantized inference requires that all tensors have their + // parameters set. This is usually done during quantized training. + if (data_type != kTfLiteFloat32) { + double real_multiplier = 0.0; + TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( + context, input, filter, bias, output, &real_multiplier)); + QuantizeMultiplierSmallerThanOne(real_multiplier, &data->output_multiplier, + &data->output_shift); + CalculateActivationRangeUint8(params->activation, output, + &data->output_activation_min, + &data->output_activation_max); + } + + TfLiteIntArray* outputSize = TfLiteIntArrayCreate(4); + outputSize->data[0] = batches; + outputSize->data[1] = out_height; + outputSize->data[2] = out_width; + outputSize->data[3] = channels_out; + return context->ResizeTensor(context, output, outputSize); +} + +template +void EvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteDepthwiseConvParams* params, OpData* data, + TfLiteTensor* input, TfLiteTensor* filter, TfLiteTensor* bias, + TfLiteTensor* output) { + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(params->activation, &output_activation_min, + &output_activation_max); + + void (*depthwise_conv)(const float*, const Dims<4>&, const float*, + const Dims<4>&, const float*, const Dims<4>&, int, int, + int, int, int, float, float, float*, const Dims<4>&); + if (kernel_type == kReference) { + depthwise_conv = &reference_ops::DepthwiseConv; + } else { + depthwise_conv = &optimized_ops::DepthwiseConv; + } + + depthwise_conv( + GetTensorData(input), GetTensorDims(input), + GetTensorData(filter), GetTensorDims(filter), + GetTensorData(bias), GetTensorDims(bias), params->stride_width, + params->stride_height, data->padding.width, data->padding.height, + params->depth_multiplier, output_activation_min, output_activation_max, + GetTensorData(output), GetTensorDims(output)); +} + +template +void EvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteDepthwiseConvParams* params, OpData* data, + TfLiteTensor* input, TfLiteTensor* filter, + TfLiteTensor* bias, TfLiteTensor* output) { + auto input_offset = -input->params.zero_point; + auto filter_offset = -filter->params.zero_point; + auto output_offset = output->params.zero_point; + + void (*depthwise_conv)(const uint8*, const Dims<4>&, int32, const uint8*, + const Dims<4>&, int32, const int32*, const Dims<4>&, + int, int, int, int, int, int32, int32, int, int32, + int32, uint8*, const Dims<4>&); + if (kernel_type == kReference) { + depthwise_conv = &reference_ops::DepthwiseConv; + } else { + depthwise_conv = &optimized_ops::DepthwiseConv; + } + + depthwise_conv( + GetTensorData(input), GetTensorDims(input), input_offset, + GetTensorData(filter), GetTensorDims(filter), filter_offset, + GetTensorData(bias), GetTensorDims(bias), params->stride_width, + params->stride_height, data->padding.width, data->padding.height, + params->depth_multiplier, output_offset, data->output_multiplier, + data->output_shift, data->output_activation_min, + data->output_activation_max, GetTensorData(output), + GetTensorDims(output)); +} + +template +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + OpData* data = reinterpret_cast(node->user_data); + + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* filter = GetInput(context, node, kFilterTensor); + TfLiteTensor* bias = + (NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr; + + // TODO(aselle): Consider whether float conv and quantized conv should be + // separate ops to avoid dispatch overhead here. + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: + EvalFloat(context, node, params, data, input, filter, bias, + output); + break; + case kTfLiteUInt8: + EvalQuantized(context, node, params, data, input, filter, + bias, output); + break; + default: + context->ReportError(context, "Type not currently supported."); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace depthwise_conv + +TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_REF() { + static TfLiteRegistration r = { + depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare, + depthwise_conv::Eval}; + return &r; +} + +TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT() { + static TfLiteRegistration r = { + depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare, + depthwise_conv::Eval}; + return &r; +} + +TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_NEON_OPT() { + static TfLiteRegistration r = { + depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare, + depthwise_conv::Eval}; + return &r; +} + +TfLiteRegistration* Register_DEPTHWISE_CONV_2D() { +#ifdef USE_NEON + return Register_DEPTHWISE_CONVOLUTION_NEON_OPT(); +#else + return Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT(); +#endif +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc b/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc new file mode 100644 index 0000000000..39227b2811 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc @@ -0,0 +1,186 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class BaseDepthwiseConvolutionOpModel : public SingleOpModel { + public: + // TODO(ahentz): Also test different activation types, bias, padding types, + // stride values. + BaseDepthwiseConvolutionOpModel(const TensorData& input, + const TensorData& filter, + const TensorData& output) { + input_ = AddInput(input); + filter_ = AddInput(filter); + + int bias_size = GetShape(filter_)[3]; + if (input.type == TensorType_FLOAT32) { + bias_ = AddInput({TensorType_FLOAT32, {bias_size}}); + } else { + // This is a quantized version. The scale of 'bias' depends on the scales + // of input and filter. Supposedly this is correctly set during quantized + // training. + auto bias_scale = GetScale(input_) * GetScale(filter_); + TensorData bias{TensorType_INT32, {bias_size}, 0, 0, bias_scale}; + bias_ = AddInput(bias); + } + + output_ = AddOutput(output); + if (input.type != TensorType_FLOAT32) { + // The following is required by quantized inference. It is the unittest's + // responsibility to make sure the output scale falls into the correct + // range. + CHECK_LT(GetScale(input_) * GetScale(filter_), GetScale(output_)); + } + + int input_depth = GetShape(input_)[3]; + int output_depth = GetShape(filter_)[3]; + int depth_mul = output_depth / input_depth; + + SetBuiltinOp( + BuiltinOperator_DEPTHWISE_CONV_2D, + BuiltinOptions_DepthwiseConv2DOptions, + CreateDepthwiseConv2DOptions(builder_, Padding_VALID, 1, 1, depth_mul, + ActivationFunctionType_NONE) + .Union()); + + BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)}); + } + + protected: + int input_; + int filter_; + int bias_; + int output_; +}; + +class DepthwiseConvolutionOpModel : public BaseDepthwiseConvolutionOpModel { + public: + using BaseDepthwiseConvolutionOpModel::BaseDepthwiseConvolutionOpModel; + + void SetFilter(std::initializer_list f) { PopulateTensor(filter_, f); } + + void SetBias(std::initializer_list f) { PopulateTensor(bias_, f); } + + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); + } + + std::vector GetOutput() { return ExtractVector(output_); } +}; + +TEST(DepthwiseConvolutionOpTest, SimpleTest) { + DepthwiseConvolutionOpModel m({TensorType_FLOAT32, {1, 3, 2, 2}}, + {TensorType_FLOAT32, {1, 2, 2, 4}}, + {TensorType_FLOAT32, {}}); + + m.SetInput({ + 1, 2, 7, 8, // column 1 + 3, 4, 9, 10, // column 2 + 5, 6, 11, 12, // column 3 + }); + m.SetFilter({ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }); + m.SetBias({1, 2, 3, 4}); + + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray({ + 71, -34, 99, -20, // + 91, -26, 127, -4, // + })); +} + +class QuantizedDepthwiseConvolutionOpModel + : public BaseDepthwiseConvolutionOpModel { + public: + using BaseDepthwiseConvolutionOpModel::BaseDepthwiseConvolutionOpModel; + + void SetInput(std::initializer_list data) { + QuantizeAndPopulate(input_, data); + } + + void SetFilter(std::initializer_list data) { + QuantizeAndPopulate(filter_, data); + } + + void SetBias(std::initializer_list data) { + QuantizeAndPopulate(bias_, data); + } + + std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetDequantizedOutput() { + return Dequantize(ExtractVector(output_), + GetScale(output_), GetZeroPoint(output_)); + } +}; + +// In this test we set the input and output scales so that the results match +// exactly the 'non-quantized' version. +TEST(QuantizedDepthwiseConvolutionOpTest, SimpleTestQuantized) { + QuantizedDepthwiseConvolutionOpModel m( + {TensorType_UINT8, {1, 3, 2, 2}, -63.5, 64}, + {TensorType_UINT8, {1, 2, 2, 4}, -63.5, 64}, + {TensorType_UINT8, {}, -127, 128}); + + m.SetInput({ + 1, 2, 7, 8, // column 1 + 3, 4, 9, 10, // column 2 + 5, 6, 11, 12, // column 3 + }); + m.SetFilter({ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }); + m.SetBias({1, 2, 3, 4}); + + m.Invoke(); + + EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( + { + 71, -34, 99, -20, // + 91, -26, 127, -4, // + }, + 1e-5))); + // For good measure, let's also verify the quantized values: + EXPECT_THAT(m.GetOutput(), ElementsAreArray({ + 198, 93, 226, 107, // + 218, 101, 254, 123, // + })); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/embedding_lookup.cc b/tensorflow/contrib/lite/kernels/embedding_lookup.cc new file mode 100644 index 0000000000..4e8cb396d4 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/embedding_lookup.cc @@ -0,0 +1,104 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Ops that looks up items from matrix. +// +// Input: +// Tensor[0]: Row number to lookup, dim.size == 1, int32 +// Tensor[1]: 2-dimensional matrix of multi-dimensional items +// dim.size >= 2, any data type. +// first dimension is row, second dimension is column. +// +// Output: +// Output.dim[0] == Tensor[0].dim[0], num of lookups +// Output.dim[1] == Tensor[1].dim[1], num of items per row +// Each item in output is a raw bytes copy of corresponding item in input. +// When indices are out of bound, the ops will not succeed. +// + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace embedding_lookup { + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* lookup = GetInput(context, node, 0); + TF_LITE_ENSURE_EQ(context, NumDimensions(lookup), 1); + TF_LITE_ENSURE_EQ(context, lookup->type, kTfLiteInt32); + + TfLiteTensor* value = GetInput(context, node, 1); + TF_LITE_ENSURE(context, NumDimensions(value) >= 2); + + TfLiteTensor* output = GetOutput(context, node, 0); + TfLiteIntArray* outputSize = TfLiteIntArrayCreate(NumDimensions(value)); + + outputSize->data[0] = SizeOfDimension(lookup, 0); + outputSize->data[1] = SizeOfDimension(value, 1); + for (int i = 2; i < NumDimensions(value); i++) { + outputSize->data[i] = SizeOfDimension(value, i); + } + return context->ResizeTensor(context, output, outputSize); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* output = GetOutput(context, node, 0); + TfLiteTensor* lookup = GetInput(context, node, 0); + TfLiteTensor* value = GetInput(context, node, 1); + + const int row_size = SizeOfDimension(value, 0); + const int row_bytes = value->bytes / row_size; + + for (int i = 0; i < SizeOfDimension(lookup, 0); i++) { + int idx = lookup->data.i32[i]; + if (idx >= row_size || idx < 0) { + context->ReportError(context, "Embedding Lookup: index out of bounds."); + return kTfLiteError; + } else { + memcpy(output->data.raw + i * row_bytes, + value->data.raw + idx * row_bytes, row_bytes); + } + } + + return kTfLiteOk; +} + +} // namespace embedding_lookup + +TfLiteRegistration* Register_EMBEDDING_LOOKUP() { + static TfLiteRegistration r = {nullptr, nullptr, embedding_lookup::Prepare, + embedding_lookup::Eval}; + return &r; +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/embedding_lookup_sparse.cc b/tensorflow/contrib/lite/kernels/embedding_lookup_sparse.cc new file mode 100644 index 0000000000..6c770e7f71 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/embedding_lookup_sparse.cc @@ -0,0 +1,248 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Op that looks up items from a sparse tensor in an embedding matrix. +// The sparse lookup tensor is represented by three individual tensors: lookup, +// indices, and dense_shape. The representation assume that the corresponding +// dense tensor would satisfy: +// * dense.shape = dense_shape +// * dense[tuple(indices[i])] = lookup[i] +// +// By convention, indices should be sorted. +// +// Options: +// combiner: The reduction op (SUM, MEAN, SQRTN). +// * SUM computes the weighted sum of the embedding results. +// * MEAN is the weighted sum divided by the total weight. +// * SQRTN is the weighted sum divided by the square root of the sum of the +// squares of the weights. +// +// Input: +// Tensor[0]: Ids to lookup, dim.size == 1, int32. +// Tensor[1]: Indices, int32. +// Tensor[2]: Dense shape, int32. +// Tensor[3]: Weights to use for aggregation, float. +// Tensor[4]: Params, a matrix of multi-dimensional items, +// dim.size >= 2, float. +// +// Output: +// A (dense) tensor representing the combined embeddings for the sparse ids. +// For each row in the sparse tensor represented by (lookup, indices, shape) +// the op looks up the embeddings for all ids in that row, multiplies them by +// the corresponding weight, and combines these embeddings as specified in the +// last dimension. +// +// Output.dim = [l0, ... , ln-1, e1, ..., em] +// Where dense_shape == [l0, ..., ln] and Tensor[4].dim == [e0, e1, ..., em] +// +// For instance, if params is a 10x20 matrix and ids, weights are: +// +// [0, 0]: id 1, weight 2.0 +// [0, 1]: id 3, weight 0.5 +// [1, 0]: id 0, weight 1.0 +// [2, 3]: id 1, weight 3.0 +// +// with combiner=MEAN, then the output will be a (3, 20) tensor where: +// +// output[0, :] = (params[1, :] * 2.0 + params[3, :] * 0.5) / (2.0 + 0.5) +// output[1, :] = (params[0, :] * 1.0) / 1.0 +// output[2, :] = (params[1, :] * 3.0) / 3.0 +// +// When indices are out of bound, the op will not succeed. + +#include +#include + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { + +namespace { + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 5); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* ids = GetInput(context, node, 0); + TF_LITE_ENSURE_EQ(context, NumDimensions(ids), 1); + TF_LITE_ENSURE_EQ(context, ids->type, kTfLiteInt32); + + TfLiteTensor* indices = GetInput(context, node, 1); + TF_LITE_ENSURE_EQ(context, NumDimensions(indices), 2); + TF_LITE_ENSURE_EQ(context, indices->type, kTfLiteInt32); + + TfLiteTensor* shape = GetInput(context, node, 2); + TF_LITE_ENSURE_EQ(context, NumDimensions(shape), 1); + TF_LITE_ENSURE_EQ(context, shape->type, kTfLiteInt32); + + TfLiteTensor* weights = GetInput(context, node, 3); + TF_LITE_ENSURE_EQ(context, NumDimensions(weights), 1); + TF_LITE_ENSURE_EQ(context, weights->type, kTfLiteFloat32); + + TF_LITE_ENSURE_EQ(context, SizeOfDimension(indices, 0), + SizeOfDimension(ids, 0)); + TF_LITE_ENSURE_EQ(context, SizeOfDimension(indices, 0), + SizeOfDimension(weights, 0)); + + TfLiteTensor* value = GetInput(context, node, 4); + TF_LITE_ENSURE(context, NumDimensions(value) >= 2); + + // Mark the output as a dynamic tensor. + TfLiteTensor* output = GetOutput(context, node, 0); + TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32); + output->allocation_type = kTfLiteDynamic; + + return kTfLiteOk; +} + +void FinalizeAggregation(TfLiteCombinerType combiner, int num_elements, + float current_total_weight, + float current_squares_weight, int embedding_size, + float* output) { + if (combiner != kTfLiteCombinerTypeSum && num_elements > 0) { + float multiplier = 1.0; + switch (combiner) { + case kTfLiteCombinerTypeMean: + multiplier = current_total_weight; + break; + case kTfLiteCombinerTypeSqrtn: + multiplier = std::sqrt(current_squares_weight); + break; + default: + break; + } + for (int k = 0; k < embedding_size; k++) { + output[k] /= multiplier; + } + } +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + TfLiteTensor* output = GetOutput(context, node, 0); + TfLiteTensor* ids = GetInput(context, node, 0); + TfLiteTensor* indices = GetInput(context, node, 1); + TfLiteTensor* dense_shape = GetInput(context, node, 2); + TfLiteTensor* weights = GetInput(context, node, 3); + TfLiteTensor* value = GetInput(context, node, 4); + + const int lookup_rank = SizeOfDimension(indices, 1); + const int embedding_rank = NumDimensions(value); + const int num_lookups = SizeOfDimension(ids, 0); + const int num_rows = SizeOfDimension(value, 0); + + // The last dimension gets replaced by the embedding. + const int output_rank = (lookup_rank - 1) + (embedding_rank - 1); + + // Make sure that the actual dense shape of the sparse tensor represented by + // (loopkup, indices, dense_shape) is consistent. + TF_LITE_ENSURE_EQ(context, SizeOfDimension(dense_shape, 0), lookup_rank); + + // Resize output tensor. + TfLiteIntArray* output_shape = TfLiteIntArrayCreate(output_rank); + int k = 0; + int embedding_size = 1; + int lookup_size = 1; + for (int i = 0; i < lookup_rank - 1; i++, k++) { + const int dim = dense_shape->data.i32[i]; + lookup_size *= dim; + output_shape->data[k] = dim; + } + for (int i = 1; i < embedding_rank; i++, k++) { + const int dim = SizeOfDimension(value, i); + embedding_size *= dim; + output_shape->data[k] = dim; + } + TF_LITE_ENSURE_STATUS(context->ResizeTensor(context, output, output_shape)); + const int output_size = lookup_size * embedding_size; + TfLiteTensorRealloc(output_size * sizeof(float), output); + + tensor_utils::ZeroVector(output->data.f, output_size); + + // Keep track of the current bucket for aggregation/combination. + int current_output_offset = 0; + float current_total_weight = 0.0; + float current_squares_weight = 0.0; + int num_elements = 0; + + for (int i = 0; i < num_lookups; i++) { + int idx = ids->data.i32[i]; + if (idx >= num_rows || idx < 0) { + context->ReportError(context, + "Embedding Lookup Sparse: index out of bounds."); + return kTfLiteError; + } + + // Check where we need to aggregate. + const int example_indices_offset = i * lookup_rank; + int output_bucket = 0; + int stride = 1; + for (int k = (lookup_rank - 1) - 1; k >= 0; k--) { + output_bucket += indices->data.i32[example_indices_offset + k] * stride; + stride *= dense_shape->data.i32[k]; + } + const int output_offset = output_bucket * embedding_size; + + // If we are in a new aggregation bucket and the combiner is not the sum, + // go back and finalize the result of the previous bucket. + if (output_offset != current_output_offset) { + FinalizeAggregation(params->combiner, num_elements, current_total_weight, + current_squares_weight, embedding_size, + &output->data.f[current_output_offset]); + + // Track next bucket. + num_elements = 0; + current_total_weight = 0.0; + current_squares_weight = 0.0; + current_output_offset = output_offset; + } + + // Add element to aggregation. + ++num_elements; + const int example_embedding_offset = idx * embedding_size; + const float w = weights->data.f[i]; + current_squares_weight += w * w; + current_total_weight += w; + for (int k = 0; k < embedding_size; k++) { + output->data.f[current_output_offset + k] += + (value->data.f[example_embedding_offset + k] * w); + } + } + + // Finalize last bucket. + FinalizeAggregation(params->combiner, num_elements, current_total_weight, + current_squares_weight, embedding_size, + &output->data.f[current_output_offset]); + + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration* Register_EMBEDDING_LOOKUP_SPARSE() { + static TfLiteRegistration r = {nullptr, nullptr, Prepare, Eval}; + return &r; +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/embedding_lookup_sparse_test.cc b/tensorflow/contrib/lite/kernels/embedding_lookup_sparse_test.cc new file mode 100644 index 0000000000..69d9c5cc7d --- /dev/null +++ b/tensorflow/contrib/lite/kernels/embedding_lookup_sparse_test.cc @@ -0,0 +1,166 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Unit test for TFLite sparse lookup op. + +#include +#include + +#include +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class EmbeddingLookupSparseOpModel : public SingleOpModel { + public: + EmbeddingLookupSparseOpModel(CombinerType type, + std::initializer_list lookup_shape, + std::initializer_list indices_shape, + std::initializer_list dense_shape_shape, + std::initializer_list value_shape) { + lookup_ = AddInput(TensorType_INT32); + indices_ = AddInput(TensorType_INT32); + dense_shape_ = AddInput(TensorType_INT32); + weights_ = AddInput(TensorType_FLOAT32); + value_ = AddInput(TensorType_FLOAT32); + output_ = AddOutput(TensorType_FLOAT32); + SetBuiltinOp(BuiltinOperator_EMBEDDING_LOOKUP_SPARSE, + BuiltinOptions_EmbeddingLookupSparseOptions, + CreateEmbeddingLookupSparseOptions(builder_, type).Union()); + BuildInterpreter({lookup_shape, indices_shape, dense_shape_shape, + lookup_shape, value_shape}); + } + + void SetInput(std::initializer_list lookup_data, + std::initializer_list indices_data, + std::initializer_list dense_shape_data, + std::initializer_list weights_data) { + PopulateTensor(lookup_, lookup_data); + PopulateTensor(indices_, indices_data); + PopulateTensor(dense_shape_, dense_shape_data); + PopulateTensor(weights_, weights_data); + } + + void Set3DWeightMatrix(const std::function& function) { + TfLiteTensor* tensor = interpreter_->tensor(value_); + int rows = tensor->dims->data[0]; + int columns = tensor->dims->data[1]; + int features = tensor->dims->data[2]; + for (int i = 0; i < rows; i++) { + for (int j = 0; j < columns; j++) { + for (int k = 0; k < features; k++) { + tensor->data.f[(i * columns + j) * features + k] = function(i, j, k); + } + } + } + } + + std::vector GetOutput() { return ExtractVector(output_); } + + private: + int lookup_; + int weights_; + int indices_; + int dense_shape_; + int value_; + int output_; +}; + +TEST(EmbeddingLookupOpTest, SimpleTest) { + EmbeddingLookupSparseOpModel m(CombinerType_SUM, {3}, {3, 2}, {2}, {4, 3, 2}); + m.SetInput({1, 3, 0}, {0, 0, 2, 0, 2, 1}, {3, 2}, {1.0, 2.0, 4.0}); + m.Set3DWeightMatrix( + [](int i, int j, int k) { return i + j / 10.0f + k / 100.0f; }); + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear({ + 1.00, 1.01, 1.10, 1.11, 1.20, 1.21, // Row 1 + 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, // - + 6.00, 6.06, 6.60, 6.66, 7.20, 7.26, // 2 * Row 3 + 4 * Row 0 + }))); +} + +TEST(EmbeddingLookupOpTest, SimpleTestMean) { + EmbeddingLookupSparseOpModel m(CombinerType_MEAN, {3}, {3, 2}, {2}, + {4, 3, 2}); + m.SetInput({1, 3, 0}, {0, 0, 2, 0, 2, 1}, {3, 2}, {1.0, 2.0, 4.0}); + m.Set3DWeightMatrix( + [](int i, int j, int k) { return i + j / 10.0f + k / 100.0f; }); + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear({ + 1.00, 1.01, 1.10, 1.11, 1.20, 1.21, // Row 1 + 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, // - + 1.00, 1.01, 1.10, 1.11, 1.20, 1.21, // 2 * Row 3 + 4 * Row 0 + }))); +} + +TEST(EmbeddingLookupOpTest, SimpleTestSqrtn) { + EmbeddingLookupSparseOpModel m(CombinerType_SQRTN, {3}, {3, 2}, {2}, + {4, 3, 2}); + m.SetInput({1, 3, 0}, {0, 0, 2, 0, 2, 1}, {3, 2}, {1.0, 2.0, 4.0}); + m.Set3DWeightMatrix( + [](int i, int j, int k) { return i + j / 10.0f + k / 100.0f; }); + m.Invoke(); + + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray(ArrayFloatNear({ + 1.00, 1.01, 1.10, 1.11, 1.20, 1.21, // Row 1 + 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, // - + 6.00f / std::sqrt(20.0f), 6.06f / std::sqrt(20.0f), + 6.60f / std::sqrt(20.0f), 6.66f / std::sqrt(20.0f), + 7.20f / std::sqrt(20.0f), + 7.26f / + std::sqrt( + 20.0f), // 2 * Row 3 + 4 * Row 0, // 2 * Row 3 + 4 * Row 0 + }))); +} + +TEST(EmbeddingLookupOpTest, Indices3DTest) { + EmbeddingLookupSparseOpModel m(CombinerType_SUM, {3}, {3, 3}, {3}, {4, 3, 2}); + m.SetInput({1, 3, 0}, {0, 0, 0, 2, 0, 0, 2, 0, 1}, {3, 2, 2}, + {1.0, 2.0, 4.0}); + m.Set3DWeightMatrix( + [](int i, int j, int k) { return i + j / 10.0f + k / 100.0f; }); + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear({ + 1.00, 1.01, 1.10, 1.11, 1.20, 1.21, 0.00, 0.00, 0.00, + 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, + 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 6.00, 6.06, 6.60, + 6.66, 7.20, 7.26, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, + }))); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { +#ifdef OS_LINUX + tflite::LogToStderr(); +#endif + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/embedding_lookup_test.cc b/tensorflow/contrib/lite/kernels/embedding_lookup_test.cc new file mode 100644 index 0000000000..8c030b0677 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/embedding_lookup_test.cc @@ -0,0 +1,94 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Unit test for TFLite Lookup op. + +#include +#include + +#include +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class EmbeddingLookupOpModel : public SingleOpModel { + public: + EmbeddingLookupOpModel(std::initializer_list index_shape, + std::initializer_list weight_shape) { + input_ = AddInput(TensorType_INT32); + weight_ = AddInput(TensorType_FLOAT32); + output_ = AddOutput(TensorType_FLOAT32); + SetBuiltinOp(BuiltinOperator_EMBEDDING_LOOKUP, BuiltinOptions_NONE, 0); + BuildInterpreter({index_shape, weight_shape}); + } + + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); + } + + void Set3DWeightMatrix(const std::function& function) { + TfLiteTensor* tensor = interpreter_->tensor(weight_); + int rows = tensor->dims->data[0]; + int columns = tensor->dims->data[1]; + int features = tensor->dims->data[2]; + for (int i = 0; i < rows; i++) { + for (int j = 0; j < columns; j++) { + for (int k = 0; k < features; k++) { + tensor->data.f[(i * columns + j) * features + k] = function(i, j, k); + } + } + } + } + + std::vector GetOutput() { return ExtractVector(output_); } + + private: + int input_; + int weight_; + int output_; +}; + +// TODO(ahentz): write more tests that exercise the details of the op, such as +// lookup errors and variable input shapes. +TEST(EmbeddingLookupOpTest, SimpleTest) { + EmbeddingLookupOpModel m({3}, {3, 2, 4}); + m.PopulateTensor(0, {1, 0, 2}); + m.Set3DWeightMatrix( + [](int i, int j, int k) { return i + j / 10.0f + k / 100.0f; }); + + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear({ + 1.00, 1.01, 1.02, 1.03, 1.10, 1.11, 1.12, 1.13, // Row 1 + 0.00, 0.01, 0.02, 0.03, 0.10, 0.11, 0.12, 0.13, // Row 0 + 2.00, 2.01, 2.02, 2.03, 2.10, 2.11, 2.12, 2.13, // Row 2 + }))); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/fully_connected.cc b/tensorflow/contrib/lite/kernels/fully_connected.cc new file mode 100644 index 0000000000..a77fe94e49 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/fully_connected.cc @@ -0,0 +1,307 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/activation_functor.h" +#include "tensorflow/contrib/lite/kernels/gemm_support.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace fully_connected { + +// This file has four implementations of FullyConnected +enum KernelType { + kReference, + kGenericOptimized, // Neon-free + kNeonOptimized, + kPie, // Used by the PIE team +}; + +struct OpData { + // The scaling factor from input to output (aka the 'real multiplier') can + // be represented as a fixed point multipler plus a left shift. + int32_t output_multiplier; + int output_shift; + // The range of the fused activation layer. For example for kNone and + // uint8_t these would be 0 and 255. + int32_t output_activation_min; + int32_t output_activation_max; +}; + +constexpr int kInputTensor = 0; +constexpr int kWeightsTensor = 1; +constexpr int kBiasTensor = 2; +constexpr int kOutputTensor = 0; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + // This is a builtin op, so we don't use the contents in 'buffer', if any. + // Instead, we allocate a new object to carry information from Prepare() to + // Eval(). + gemm_support::IncrementUsageCounter(context); + return new OpData; +} + +void Free(TfLiteContext* context, void* buffer) { + gemm_support::DecrementUsageCounter(context); + delete reinterpret_cast(buffer); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + OpData* data = reinterpret_cast(node->user_data); + + // Check we have all the inputs and outputs we need. + TF_LITE_ENSURE_EQ(context, node->inputs->size, 3); + TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); + + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* filter = GetInput(context, node, kWeightsTensor); + TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + // Check all the parameters of tensor match within themselves and match the + // input configuration. + int input_size = 1; + for (int i = 0; i < input->dims->size; i++) { + input_size *= input->dims->data[i]; + } + + const int batch_size = input_size / filter->dims->data[1]; + const int num_units = filter->dims->data[0]; + + TF_LITE_ASSERT_EQ(input_size, batch_size * filter->dims->data[1]); + if (bias) { + TF_LITE_ASSERT_EQ(bias->dims->data[0], num_units); + } + + TF_LITE_ENSURE_EQ(context, NumDimensions(filter), 2); + TF_LITE_ENSURE_EQ(context, NumDimensions(bias), 1); + + // Note that quantized inference requires that all tensors have their + // parameters set. This is usually done during quantized training. + TfLiteType data_type = input->type; + if (data_type != kTfLiteFloat32) { + double real_multiplier = 0.0; + TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( + context, input, filter, bias, output, &real_multiplier)); + QuantizeMultiplierSmallerThanOne(real_multiplier, &data->output_multiplier, + &data->output_shift); + CalculateActivationRangeUint8(params->activation, output, + &data->output_activation_min, + &data->output_activation_max); + } + + // Resize output. + TfLiteIntArray* output_size_array = TfLiteIntArrayCreate(2); + output_size_array->data[0] = batch_size; + output_size_array->data[1] = num_units; + TF_LITE_ENSURE_OK(context, + context->ResizeTensor(context, output, output_size_array)); + return kTfLiteOk; +} + +TfLiteStatus EvalPie(TfLiteContext* context, TfLiteNode* node, + TfLiteFullyConnectedParams* params, OpData* data, + TfLiteTensor* input, TfLiteTensor* filter, + TfLiteTensor* bias, TfLiteTensor* output) { + int total_input_size = 1; + for (int i = 0; i < input->dims->size; i++) { + total_input_size *= input->dims->data[i]; + } + + int input_size = filter->dims->data[1]; + const int batch_size = total_input_size / filter->dims->data[1]; + const int num_units = filter->dims->data[0]; + + // Output = bias if bias tensor exists. + if (bias) { + tensor_utils::VectorBatchVectorAssign(bias->data.f, num_units, batch_size, + output->data.f); + } else { + tensor_utils::ZeroVector(output->data.f, batch_size * num_units); + } + + // Compute output += weight * input + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + filter->data.f, num_units, input_size, input->data.f, batch_size, + output->data.f, /*result_stride=*/1); + + // Apply activation function + tensor_utils::ApplyActivationToVector(output->data.f, batch_size * num_units, + params->activation, output->data.f); + + return kTfLiteOk; +} + +#define TF_LITE_MACRO_DISPATCH(macro_name, params, target_namespace) \ + if (params->activation == kTfLiteActNone) { \ + macro_name(target_namespace, kNone); \ + } \ + if (params->activation == kTfLiteActRelu) { \ + macro_name(target_namespace, kRelu); \ + } \ + if (params->activation == kTfLiteActRelu6) { \ + macro_name(target_namespace, kRelu6); \ + } + +template +TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteFullyConnectedParams* params, OpData* data, + TfLiteTensor* input, TfLiteTensor* filter, + TfLiteTensor* bias, TfLiteTensor* output) { + gemmlowp::GemmContext* gemm_context = gemm_support::GetFromContext(context); + + int32_t input_offset = -input->params.zero_point; + int32_t filter_offset = -filter->params.zero_point; + int32_t output_offset = output->params.zero_point; +#define TF_LITE_FULLY_CONNECTED(type) \ + type::FullyConnected( \ + GetTensorData(input), GetTensorDims(input), input_offset, \ + GetTensorData(filter), GetTensorDims(filter), filter_offset, \ + GetTensorData(bias), GetTensorDims(bias), output_offset, \ + data->output_multiplier, data->output_shift, \ + data->output_activation_min, data->output_activation_max, \ + GetTensorData(output), GetTensorDims(output), gemm_context) + if (kernel_type == kReference) { + TF_LITE_FULLY_CONNECTED(reference_ops); + } else if (kernel_type == kPie) { + // TODO(ahentz): we don't have a quantized version of the PIE kernels, so + // we just defer to the MINI ones. + TF_LITE_FULLY_CONNECTED(optimized_ops); + } else { + TF_LITE_FULLY_CONNECTED(optimized_ops); + } +#undef TF_LITE_FULLY_CONNECTED + + return kTfLiteOk; +} + +template +TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteFullyConnectedParams* params, OpData* data, + TfLiteTensor* input, TfLiteTensor* filter, + TfLiteTensor* bias, TfLiteTensor* output) { + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(params->activation, &output_activation_min, + &output_activation_max); +#define TF_LITE_FULLY_CONNECTED(type) \ + type::FullyConnected(GetTensorData(input), GetTensorDims(input), \ + GetTensorData(filter), GetTensorDims(filter), \ + GetTensorData(bias), GetTensorDims(bias), \ + output_activation_min, output_activation_max, \ + GetTensorData(output), GetTensorDims(output)) + if (kernel_type == kReference) { + TF_LITE_FULLY_CONNECTED(reference_ops); + } else if (kernel_type == kPie) { + return EvalPie(context, node, params, data, input, filter, bias, output); + } else { + TF_LITE_FULLY_CONNECTED(optimized_ops); + } +#undef TF_LITE_FULLY_CONNECTED + + return kTfLiteOk; +} + +#undef TF_LITE_MACRO_DISPATCH + +template +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + OpData* data = reinterpret_cast(node->user_data); + + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* filter = GetInput(context, node, kWeightsTensor); + TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: + return EvalFloat(context, node, params, data, input, filter, + bias, output); + case kTfLiteUInt8: + return EvalQuantized(context, node, params, data, input, + filter, bias, output); + default: + context->ReportError(context, "Type not currently supported."); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace fully_connected + +TfLiteRegistration* Register_FULLY_CONNECTED_REF() { + static TfLiteRegistration r = { + fully_connected::Init, fully_connected::Free, fully_connected::Prepare, + fully_connected::Eval}; + return &r; +} + +TfLiteRegistration* Register_FULLY_CONNECTED_NEON_OPT() { + static TfLiteRegistration r = { + fully_connected::Init, fully_connected::Free, fully_connected::Prepare, + fully_connected::Eval}; + return &r; +} + +TfLiteRegistration* Register_FULLY_CONNECTED_GENERIC_OPT() { + static TfLiteRegistration r = { + fully_connected::Init, fully_connected::Free, fully_connected::Prepare, + fully_connected::Eval}; + return &r; +} + +TfLiteRegistration* Register_FULLY_CONNECTED_PIE() { + static TfLiteRegistration r = {fully_connected::Init, fully_connected::Free, + fully_connected::Prepare, + fully_connected::Eval}; + return &r; +} + +TfLiteRegistration* Register_FULLY_CONNECTED() { + // TODO(ahentz): We don't have a dedicated quantized version of the PIE + // kernel. For now, the quantized version just defer to the corresponding + // optimized MINI kernel. At some point we will allow different libraries to + // be built with different kernels, but for now we have to pick one here. + return Register_FULLY_CONNECTED_PIE(); +#ifdef USE_NEON + return Register_FULLY_CONNECTED_NEON_OPT(); +#else + return Register_FULLY_CONNECTED_GENERIC_OPT(); +#endif +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/fully_connected_test.cc b/tensorflow/contrib/lite/kernels/fully_connected_test.cc new file mode 100644 index 0000000000..112e3f1ba0 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/fully_connected_test.cc @@ -0,0 +1,377 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Unit test for TFLite FULLY_CONNECTED op. + +#include +#include + +#include +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAre; +using ::testing::ElementsAreArray; + +static float fully_connected_input[] = { + 0.503691, 0.196961, 0.521017, 0.554248, 0.288678, 0.792476, 0.561653, + 0.462230, 0.650736, 0.163132, 0.029658, 0.411544, 0.470539, 0.572390, + 0.538755, 0.212030, 0.264309, 0.193908, 0.777480, 0.745661, 0.423314, + 0.470804, 0.175501, 0.492225, 0.192743, 0.540183, 0.372514, 0.446550, + 0.498173, 0.126472, 0.132706, 0.001864, 0.323433, 0.653723, 0.556112, + 0.612111, 0.446199, 0.117765, 0.074341, 0.096935, 0.280897, 0.103999, + 0.508479, 0.751437, 0.676389, 0.047234, 0.963467, 0.940698, 0.241142, + 0.740947, 0.686359, 0.664456, 0.211751, 0.861860, 0.156681, 0.404494, + 0.402043, 0.529195, 0.851044, 0.900216, 0.655667, 0.983750, 0.902081, + 0.979100, 0.637473, 0.458193, 0.591211, 0.083671, 0.575958, 0.665552, + 0.180606, 0.856856, 0.769551, 0.689086, 0.608293, 0.445940, 0.736320, + 0.571760, 0.386637, 0.977461, 0.312707, 0.072996, 0.641918, 0.524458, + 0.934856, 0.798598, 0.928951, 0.336899, 0.327793, 0.779995, 0.237115, + 0.983460, 0.763746, 0.139196, 0.962560, 0.401218, 0.597389, 0.553771, + 0.484890, 0.173347, 0.219322, 0.665496, 0.030203, 0.988873, 0.354582, + 0.638496, 0.434813, 0.090902, 0.210256, 0.821450, 0.068363, 0.522962, + 0.894446, 0.710280, 0.047420, 0.829302, 0.508879, 0.976371, 0.166202, + 0.836672, 0.756367, 0.403317, 0.820132, 0.520112, 0.542513, 0.782691, + 0.921330, 0.139902}; + +static float fully_connected_golden_output[] = { + 0, 0.0732134, 0, 0, 0, 0.280859, + 0, 0.128927, 0, 0.0777251, 0, 0.270268, + 0.271435, 0.0173503, 0.335465, 0.235562, + + 0, 0.0745866, 0, 0.051611, 0, 0.253876, + 0, 0.0814873, 0, 0.104104, 0, 0.248529, + 0.264194, 0, 0.302973, 0.166252, + + 0, 0.0170409, 0, 0.0509851, 0, 0.212834, + 0, 0.0208326, 0, 0.129932, 0.203978, 0.103428, + 0.298051, 0, 0.332233, 0.00445903, + + 0, 0.125246, 0, 0.0735336, 0, 0.0910256, + 0, 0, 0, 0.18933, 0.378111, 0.0712443, + 0.277298, 0.0123414, 0.267454, 0, + + 0, 0.14687, 0, 0.155495, 0.0300215, 0.147256, + 0, 0, 0, 0.156412, 0.434914, 0.0461529, + 0.246508, 0, 0.363138, 0, + + 0, 0, 0, 0.0212949, 0, 0.301708, + 0, 0.35497, 0, 0.406223, 0.0260211, 0.049195, + 0.197161, 0, 0.37316, 0, + + 0, 0.221783, 0, 0, 0.0116515, 0.281945, + 0, 0, 0, 0, 0.285626, 0.181773, + 0.296401, 0.170452, 0.367135, 0.142597, + + 0, 0, 0, 0, 0, 0.418886, + 0, 0.291063, 0, 0.227541, 0.0424759, 0.27589, + 0.398286, 0.177146, 0.40359, 0.121452, + + 0, 0.0834884, 0, 0, 0, 0.287441, + 0, 0.0046838, 0, 0.0122087, 0, 0.217376, + 0.140183, 0.0948412, 0.436677, 0.0589876, + + 0, 0.0289969, 0, 0.0921397, 0, 0.396802, + 0, 0.0126157, 0, 0.0968433, 0, 0.172271, + 0.173295, 0.0664741, 0.53645, 0.00915603, + + 0, 0, 0, 0, 0, 0.147942, + 0, 0.263795, 0, 0.39782, 0, 0.382435, + 0.561072, 0.0579847, 0.145712, 0.13508, + + 0, 0, 0, 0.16382, 0, 0.322294, + 0, 0.163798, 0, 0.405211, 0.367953, 0.076852, + 0.342473, 0.0834118, 0.377537, 0, + + 0, 0.206, 0, 0, 0, 0.375769, + 0, 0, 0, 0, 0, 0.125165, + 0, 0.105591, 0.52055, 0.0536445, + + 0, 0.259261, 0, 0, 0, 0.247707, + 0, 0, 0, 0, 0, 0.215862, + 0.149153, 0.224678, 0.359519, 0.129419, + + 0, 0.17611, 0, 0.280895, 0, 0.576484, + 0, 0.000418848, 0, 0, 0, 0.151112, + 0.211902, 0, 0.566341, 0.106305, + + 0, 0.0246284, 0, 0, 0, 0.196267, + 0, 0.0248624, 0, 0.265635, 0, 0.436199, + 0.408079, 0.134514, 0.328489, 0.411368}; + +class BaseFullyConnectedOpModel : public SingleOpModel { + public: + // TODO(ahentz): test different activation types too. + BaseFullyConnectedOpModel(int units, int batches, const TensorData& input, + const TensorData& output = {TensorType_FLOAT32}) + : batches_(batches), units_(units) { + int total_input_size = 1; + for (int i = 0; i < input.shape.size(); ++i) { + total_input_size *= input.shape[i]; + } + input_size_ = total_input_size / batches_; + + input_ = AddInput(input); + weights_ = + AddInput({input.type, {units_, input_size_}, input.min, input.max}); + + if (input.type == TensorType_FLOAT32) { + bias_ = AddInput({TensorType_FLOAT32, {units_}}); + } else { + // This is a quantized version. The scale of 'bias' depends on the scales + // of input and filter. Supposedly this is correctly set during quantized + // training. + auto bias_scale = GetScale(input_) * GetScale(weights_); + TensorData bias{TensorType_INT32, {units_}, 0, 0, bias_scale}; + bias_ = AddInput(bias); + } + + output_ = AddOutput(output); + + SetBuiltinOp( + BuiltinOperator_FULLY_CONNECTED, BuiltinOptions_FullyConnectedOptions, + CreateFullyConnectedOptions(builder_, ActivationFunctionType_RELU) + .Union()); + BuildInterpreter({GetShape(input_), GetShape(weights_), GetShape(bias_)}); + } + + int input_size() { return input_size_; } + int num_units() { return units_; } + int num_batches() { return batches_; } + + protected: + int input_; + int weights_; + int bias_; + int output_; + + int batches_; + int units_; + int input_size_; +}; + +class FloatFullyConnectedOpModel : public BaseFullyConnectedOpModel { + public: + using BaseFullyConnectedOpModel::BaseFullyConnectedOpModel; + + void SetBias(std::initializer_list f) { PopulateTensor(bias_, f); } + + void SetWeights(std::initializer_list f) { + PopulateTensor(weights_, f); + } + + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); + } + void SetInput(int offset, float* begin, float* end) { + PopulateTensor(input_, offset, begin, end); + } + + std::vector GetOutput() { return ExtractVector(output_); } +}; + +class QuantizedFullyConnectedOpModel : public BaseFullyConnectedOpModel { + public: + using BaseFullyConnectedOpModel::BaseFullyConnectedOpModel; + + void SetBias(std::initializer_list data) { + QuantizeAndPopulate(bias_, data); + } + void SetWeights(std::initializer_list data) { + QuantizeAndPopulate(weights_, data); + } + void SetInput(std::initializer_list data) { + QuantizeAndPopulate(input_, data); + } + + std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetDequantizedOutput() { + return Dequantize(ExtractVector(output_), + GetScale(output_), GetZeroPoint(output_)); + } +}; + +// TODO(ahentz): add more small tests like this one, focused on making sure the +// calculations are correct. +TEST(FullyConnectedOpTest, SimpleTest) { + FloatFullyConnectedOpModel m(3, 2, {TensorType_FLOAT32, {2, 10}}); + m.SetWeights({ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 + }); + m.SetBias({1, 2, 3}); + + m.SetInput({ + 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 + 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 + }); + + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), ElementsAre(24, 25, 26, 58, 59, 60)); +} + +TEST(FullyConnectedOpTest, SimpleTestQuantized) { + QuantizedFullyConnectedOpModel m( + 3, 2, + /*input=*/{TensorType_UINT8, {2, 10}, -63.5, 64}, + /*output=*/{TensorType_UINT8, {}, -127, 128}); + + // input_product_scale < output_scale was not true. + m.SetWeights({ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 + }); + m.SetBias({1, 2, 3}); + + m.SetInput({ + 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 + 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 + }); + + m.Invoke(); + + EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear({ + 24, 25, 26, // + 58, 59, 60, // + }))); + EXPECT_THAT(m.GetOutput(), ElementsAre(151, 152, 153, 185, 186, 187)); +} + +TEST(FullyConnectedOpTest, SimpleTest4DInput) { + // Note that it is not required that the first dimension be the number of + // batches. All we care is that the input can be evenly distributed in + // batches. In this case, we need the input to have multiples of '2'. + FloatFullyConnectedOpModel m(/*units=*/3, + /*batches=*/2, + /*input=*/{TensorType_FLOAT32, {4, 1, 5, 1}}); + m.SetWeights({ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 + }); + m.SetBias({1, 2, 3}); + + m.SetInput({ + 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // first batch + 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // second batch + }); + + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray({ + 24, 25, 26, // first batch + 58, 59, 60, // second batch + })); +} + +TEST(FullyConnectedOpTest, SimpleTest4dInputQuantized) { + QuantizedFullyConnectedOpModel m( + 3, 2, + /*input=*/{TensorType_UINT8, {4, 1, 5, 1}, -63.5, 64}, + /*output=*/{TensorType_UINT8, {}, -127, 128}); + + // input_product_scale < output_scale was not true. + m.SetWeights({ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 + }); + m.SetBias({1, 2, 3}); + + m.SetInput({ + 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 + 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 + }); + + m.Invoke(); + + EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear({ + 24, 25, 26, // + 58, 59, 60, // + }))); + EXPECT_THAT(m.GetOutput(), ElementsAre(151, 152, 153, 185, 186, 187)); +} + +// TODO(ahentz): Reconsider this test. Having arbitrary weights makes it hard +// to debug errors and doesn't necessarily test all the important details. +TEST(FullyConnectedOpTest, BlackBoxTest) { + FloatFullyConnectedOpModel m(16, 2, {TensorType_FLOAT32, {2, 8}}); + m.SetWeights( + {0.091327, 0.103366, -0.316505, -0.083120, 0.149366, -0.196636, + -0.123672, 0.062800, 0.063031, 0.191670, -0.062001, -0.061504, + -0.275581, 0.059388, -0.118497, -0.079224, 0.109758, 0.008307, + -0.062657, -0.060962, -0.049782, -0.106719, -0.319482, -0.103650, + 0.266455, 0.051517, -0.123448, 0.322464, 0.043282, -0.173782, + -0.190381, 0.002013, 0.096086, 0.131157, 0.031164, 0.100638, + -0.312191, -0.080923, -0.101318, -0.116614, 0.142238, 0.086540, + -0.139154, 0.174268, -0.073161, 0.080072, 0.006874, 0.229382, + -0.104321, -0.176035, -0.208587, -0.001019, -0.162032, 0.080824, + -0.025021, 0.074460, -0.252595, -0.161750, -0.136403, 0.008308, + 0.005710, 0.096600, 0.289839, 0.218816, -0.304651, -0.070958, + 0.054598, 0.147113, -0.139112, -0.072798, -0.163335, -0.167863, + -0.128762, -0.035780, 0.117262, 0.017177, 0.263335, -0.176612, + 0.262961, -0.093654, -0.339283, 0.333071, 0.180827, 0.287583, + 0.066350, -0.197947, -0.114449, -0.236035, 0.103532, -0.034284, + 0.093299, -0.145361, 0.054001, 0.250570, 0.157010, -0.143480, + -0.139061, -0.048873, 0.067557, 0.139038, 0.324106, 0.227041, + 0.037793, -0.225747, -0.241619, 0.357835, 0.135762, -0.306764, + -0.125982, 0.091916, 0.266587, 0.030135, 0.265148, 0.141627, + 0.020120, 0.083815, -0.124556, -0.100124, -0.048159, 0.181172, + 0.302309, -0.041084, 0.146334, -0.061511, -0.232605, 0.281324, + 0.145408, -0.221897}); + m.SetBias({-0.160594, 0.205770, -0.078307, -0.077984, 0.001937, 0.015860, + 0.036810, 0.012346, 0.001028, 0.038551, 0.075415, 0.020804, + 0.048478, -0.032270, 0.175688, -0.085662}); + + const int input_sequence_size = sizeof(fully_connected_input) / + sizeof(float) / + (m.input_size() * m.num_batches()); + for (int i = 0; i < input_sequence_size; i++) { + // TODO(ahentz): This is what the original test was doing: two equal + // batches per invocation. We could instead use two different batches. + float* batch_start = fully_connected_input + i * m.input_size(); + float* batch_end = batch_start + m.input_size(); + m.SetInput(0, batch_start, batch_end); + m.SetInput(m.input_size(), batch_start, batch_end); + + m.Invoke(); + + float* golden_start = fully_connected_golden_output + i * m.num_units(); + float* golden_end = golden_start + m.num_units(); + std::vector expected; + expected.insert(expected.end(), golden_start, golden_end); + expected.insert(expected.end(), golden_start, golden_end); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear(expected))); + } +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/gemm_support.cc b/tensorflow/contrib/lite/kernels/gemm_support.cc new file mode 100644 index 0000000000..eb2b0aacf7 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/gemm_support.cc @@ -0,0 +1,68 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/kernels/gemm_support.h" + +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace gemm_support { + +struct RefCountedGemmContext { + gemmlowp::GemmContext* gemm_context_ = nullptr; + int num_references_ = 0; +}; + +void IncrementUsageCounter(TfLiteContext* context) { + auto* ptr = reinterpret_cast(context->gemm_context); + if (ptr == nullptr) { + ptr = new RefCountedGemmContext; + ptr->gemm_context_ = new gemmlowp::GemmContext(); + ptr->num_references_ = 0; + context->gemm_context = ptr; + } + ptr->num_references_++; +} + +void DecrementUsageCounter(TfLiteContext* context) { + auto* ptr = reinterpret_cast(context->gemm_context); + if (ptr == nullptr) { + TF_LITE_FATAL( + "Call to DecrementUsageCounter() not preceded by " + "IncrementUsageCounter()"); + } + if (--ptr->num_references_ == 0) { + delete ptr->gemm_context_; + delete ptr; + context->gemm_context = nullptr; + } +} + +gemmlowp::GemmContext* GetFromContext(TfLiteContext* context) { + auto* ptr = reinterpret_cast(context->gemm_context); + if (ptr == nullptr) { + TF_LITE_FATAL( + "Call to GetFromContext() not preceded by IncrementUsageCounter()"); + } + return ptr->gemm_context_; +} + +void SetMaxNumThreads(TfLiteContext* context, int num_threads) { + IncrementUsageCounter(context); + GetFromContext(context)->set_max_num_threads(num_threads); + DecrementUsageCounter(context); +} + +} // namespace gemm_support +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/gemm_support.h b/tensorflow/contrib/lite/kernels/gemm_support.h new file mode 100644 index 0000000000..b531959ffb --- /dev/null +++ b/tensorflow/contrib/lite/kernels/gemm_support.h @@ -0,0 +1,54 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_GEMM_SUPPORT_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_GEMM_SUPPORT_H_ + +#include "public/gemmlowp.h" +#include "tensorflow/contrib/lite/context.h" + +namespace tflite { +namespace gemm_support { + +// Returns the GemmContext stored in 'context', allowing multiple ops to +// share a single object, as long as they share a TfLiteContext. The caller +// must ensure that this is called between IncrementUsageCounter() and +// DecrementUsageCounter(). For example, in the implementation of an op: +// void* Init(TfLiteContext* context, const char*, size_t) { +// gemm_support::IncrementUsageCounter(context); +// return nullptr; +// } +// void Free(TfLiteContext* context, void*) { +// gemm_support::DecrementUsageCounter(context); +// } +// TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { +// auto* gemm_context = gemm_support::GetFromContext(context); +// } +gemmlowp::GemmContext* GetFromContext(TfLiteContext* context); + +// Let the framework know that the GemmContext stored in 'context' will be used +// by an op. If necessary a new GemmContext is created and placed in 'context'. +void IncrementUsageCounter(TfLiteContext* context); + +// Let the framework know that the op stopped using the GemmContext stored in +// 'context'. If there are no more usages the GemmContext will be deleted. +void DecrementUsageCounter(TfLiteContext* context); + +// Set the maximum number threads available for gemmlowp operations. +void SetMaxNumThreads(TfLiteContext* context, int num_threads); + +} // namespace gemm_support +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_GEMM_SUPPORT_H_ diff --git a/tensorflow/contrib/lite/kernels/hashtable_lookup.cc b/tensorflow/contrib/lite/kernels/hashtable_lookup.cc new file mode 100644 index 0000000000..3b82601d11 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/hashtable_lookup.cc @@ -0,0 +1,155 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Op that looks up items from hashtable. +// +// Input: +// Tensor[0]: Hash key to lookup, dim.size == 1, int32 +// Tensor[1]: Key of hashtable, dim.size == 1, int32 +// *MUST* be sorted in ascending order. +// Tensor[2]: Value of hashtable, dim.size >= 1 +// Tensor[1].Dim[0] == Tensor[2].Dim[0] +// +// Output: +// Output[0].dim[0] == Tensor[0].dim[0], num of lookups +// Each item in output is a raw bytes copy of corresponding item in input. +// When key does not exist in hashtable, the returned bytes are all 0s. +// +// Output[1].dim = { Tensor[0].dim[0] }, num of lookups +// Each item indicates whether the corresponding lookup has a returned value. +// 0 for missing key, 1 for found key. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" +#include "tensorflow/contrib/lite/string_util.h" + +namespace tflite { +namespace ops { +namespace builtin { + +namespace { + +int greater(const void* a, const void* b) { + return *static_cast(a) - *static_cast(b); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 3); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 2); + + TfLiteTensor* lookup = GetInput(context, node, 0); + TF_LITE_ENSURE_EQ(context, NumDimensions(lookup), 1); + TF_LITE_ENSURE_EQ(context, lookup->type, kTfLiteInt32); + + TfLiteTensor* key = GetInput(context, node, 1); + TF_LITE_ENSURE_EQ(context, NumDimensions(key), 1); + TF_LITE_ENSURE_EQ(context, key->type, kTfLiteInt32); + + TfLiteTensor* value = GetInput(context, node, 2); + TF_LITE_ENSURE(context, NumDimensions(value) >= 1); + TF_LITE_ENSURE_EQ(context, SizeOfDimension(key, 0), + SizeOfDimension(value, 0)); + if (value->type == kTfLiteString) { + TF_LITE_ENSURE_EQ(context, NumDimensions(value), 1); + } + + TfLiteTensor* hits = GetOutput(context, node, 1); + TF_LITE_ENSURE_EQ(context, hits->type, kTfLiteUInt8); + TfLiteIntArray* hitSize = TfLiteIntArrayCreate(1); + hitSize->data[0] = SizeOfDimension(lookup, 0); + + TfLiteTensor* output = GetOutput(context, node, 0); + TF_LITE_ENSURE_EQ(context, value->type, output->type); + + TfLiteStatus status = kTfLiteOk; + if (output->type != kTfLiteString) { + TfLiteIntArray* outputSize = TfLiteIntArrayCreate(NumDimensions(value)); + outputSize->data[0] = SizeOfDimension(lookup, 0); + for (int i = 1; i < NumDimensions(value); i++) { + outputSize->data[i] = SizeOfDimension(value, i); + } + status = context->ResizeTensor(context, output, outputSize); + } + if (context->ResizeTensor(context, hits, hitSize) == kTfLiteError) { + status = kTfLiteError; + } + return status; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* output = GetOutput(context, node, 0); + TfLiteTensor* hits = GetOutput(context, node, 1); + TfLiteTensor* lookup = GetInput(context, node, 0); + TfLiteTensor* key = GetInput(context, node, 1); + TfLiteTensor* value = GetInput(context, node, 2); + + const int num_rows = SizeOfDimension(value, 0); + const int row_bytes = value->bytes / num_rows; + void* pointer = nullptr; + DynamicBuffer buf; + + for (int i = 0; i < SizeOfDimension(lookup, 0); i++) { + int idx = -1; + pointer = bsearch(&(lookup->data.i32[i]), key->data.i32, num_rows, + sizeof(int32_t), greater); + if (pointer != nullptr) { + idx = (reinterpret_cast(pointer) - (key->data.raw)) / + sizeof(int32_t); + } + + if (idx >= num_rows || idx < 0) { + if (output->type == kTfLiteString) { + buf.AddString(nullptr, 0); + } else { + memset(output->data.raw + i * row_bytes, 0, row_bytes); + } + hits->data.uint8[i] = 0; + } else { + if (output->type == kTfLiteString) { + buf.AddString(GetString(value, idx)); + } else { + memcpy(output->data.raw + i * row_bytes, + value->data.raw + idx * row_bytes, row_bytes); + } + hits->data.uint8[i] = 1; + } + } + if (output->type == kTfLiteString) { + buf.WriteToTensor(output); + } + + return kTfLiteOk; +} +} // namespace + +TfLiteRegistration* Register_HASHTABLE_LOOKUP() { + static TfLiteRegistration r = {nullptr, nullptr, Prepare, Eval}; + return &r; +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/hashtable_lookup_test.cc b/tensorflow/contrib/lite/kernels/hashtable_lookup_test.cc new file mode 100644 index 0000000000..916a23225e --- /dev/null +++ b/tensorflow/contrib/lite/kernels/hashtable_lookup_test.cc @@ -0,0 +1,176 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Unit test for TFLite Lookup op. + +#include +#include + +#include +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/string_util.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class HashtableLookupOpModel : public SingleOpModel { + public: + HashtableLookupOpModel(std::initializer_list lookup_shape, + std::initializer_list key_shape, + std::initializer_list value_shape, + TensorType type) { + lookup_ = AddInput(TensorType_INT32); + key_ = AddInput(TensorType_INT32); + value_ = AddInput(type); + output_ = AddOutput(type); + hit_ = AddOutput(TensorType_UINT8); + SetBuiltinOp(BuiltinOperator_HASHTABLE_LOOKUP, BuiltinOptions_NONE, 0); + BuildInterpreter({lookup_shape, key_shape, value_shape}); + } + + void SetLookup(std::initializer_list data) { + PopulateTensor(lookup_, data); + } + + void SetHashtableKey(std::initializer_list data) { + PopulateTensor(key_, data); + } + + void SetHashtableValue(const std::vector& content) { + PopulateStringTensor(value_, content); + } + + void SetHashtableValue(const std::function& function) { + TfLiteTensor* tensor = interpreter_->tensor(value_); + int rows = tensor->dims->data[0]; + for (int i = 0; i < rows; i++) { + tensor->data.f[i] = function(i); + } + } + + void SetHashtableValue(const std::function& function) { + TfLiteTensor* tensor = interpreter_->tensor(value_); + int rows = tensor->dims->data[0]; + int features = tensor->dims->data[1]; + for (int i = 0; i < rows; i++) { + for (int j = 0; j < features; j++) { + tensor->data.f[i * features + j] = function(i, j); + } + } + } + + std::vector GetStringOutput() { + TfLiteTensor* output = interpreter_->tensor(output_); + int num = GetStringCount(output); + std::vector result(num); + for (int i = 0; i < num; i++) { + auto ref = GetString(output, i); + result[i] = string(ref.str, ref.len); + } + return result; + } + + std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetHit() { return ExtractVector(hit_); } + + private: + int lookup_; + int key_; + int value_; + int output_; + int hit_; +}; + +// TODO(yichengfan): write more tests that exercise the details of the op, +// such as lookup errors and variable input shapes. +TEST(HashtableLookupOpTest, Test2DInput) { + HashtableLookupOpModel m({4}, {3}, {3, 2}, TensorType_FLOAT32); + + m.SetLookup({1234, -292, -11, 0}); + m.SetHashtableKey({-11, 0, 1234}); + m.SetHashtableValue([](int i, int j) { return i + j / 10.0f; }); + + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({ + 2.0, 2.1, // 2-nd item + 0, 0, // Not found + 0.0, 0.1, // 0-th item + 1.0, 1.1, // 1-st item + }))); + EXPECT_THAT(m.GetHit(), ElementsAreArray({ + 1, 0, 1, 1, + })); +} + +TEST(HashtableLookupOpTest, Test1DInput) { + HashtableLookupOpModel m({4}, {3}, {3}, TensorType_FLOAT32); + + m.SetLookup({1234, -292, -11, 0}); + m.SetHashtableKey({-11, 0, 1234}); + m.SetHashtableValue([](int i) { return i * i / 10.0f; }); + + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({ + 0.4, // 2-nd item + 0, // Not found + 0.0, // 0-th item + 0.1, // 1-st item + }))); + EXPECT_THAT(m.GetHit(), ElementsAreArray({ + 1, + 0, + 1, + 1, + })); +} + +TEST(HashtableLookupOpTest, TestString) { + HashtableLookupOpModel m({4}, {3}, {3}, TensorType_STRING); + + m.SetLookup({1234, -292, -11, 0}); + m.SetHashtableKey({-11, 0, 1234}); + m.SetHashtableValue({"Hello", "", "Hi"}); + + m.Invoke(); + + EXPECT_THAT(m.GetStringOutput(), ElementsAreArray({ + "Hi", // 2-nd item + "", // Not found + "Hello", // 0-th item + "", // 1-st item + })); + EXPECT_THAT(m.GetHit(), ElementsAreArray({ + 1, + 0, + 1, + 1, + })); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD new file mode 100644 index 0000000000..288534099b --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/BUILD @@ -0,0 +1,359 @@ +package(default_visibility = [ + "//visibility:public", +]) + +licenses(["notice"]) # Apache 2.0 + +load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts") + +tflite_deps_intel = [ + "@arm_neon_2_x86_sse", +] + +NEON_FLAGS_IF_APPLICABLE = select({ + ":arm": [ + "-O3", + "-mfpu=neon", + "-mfloat-abi=softfp", + ], + ":armeabi-v7a": [ + "-O3", + "-mfpu=neon", + "-mfloat-abi=softfp", + ], + ":armv7a": [ + "-O3", + "-mfpu=neon", + "-mfloat-abi=softfp", + ], + "//conditions:default": [ + "-O3", + ], +}) + +cc_library( + name = "types", + srcs = [], + hdrs = [ + "compatibility.h", + "types.h", + ], +) + +config_setting( + name = "arm", + values = { + "cpu": "arm", + }, +) + +config_setting( + name = "arm64-v8a", + values = { + "cpu": "arm64-v8a", + }, +) + +config_setting( + name = "armv7a", + values = { + "cpu": "armv7a", + }, +) + +config_setting( + name = "armeabi-v7a", + values = { + "cpu": "armeabi-v7a", + }, +) + +config_setting( + name = "haswell", + values = { + "cpu": "haswell", + }, +) + +config_setting( + name = "ios_x86_64", + values = { + "cpu": "ios_x86_64", + }, +) + +config_setting( + name = "ios_armv7", + values = { + "cpu": "ios_armv7", + }, +) + +config_setting( + name = "ios_arm64", + values = { + "cpu": "ios_arm64", + }, +) + +config_setting( + name = "k8", + values = { + "cpu": "k8", + }, +) + +config_setting( + name = "x86", + values = { + "cpu": "x86", + }, +) + +config_setting( + name = "x86_64", + values = { + "cpu": "x86_64", + }, +) + +config_setting( + name = "darwin", + values = { + "cpu": "darwin", + }, +) + +cc_library( + name = "optimized_base", + srcs = [], + hdrs = [ + "common.h", + "optimized/depthwiseconv_float.h", + "optimized/depthwiseconv_uint8.h", + "optimized/optimized_ops.h", + ], + copts = tflite_copts(), + deps = [ + ":types", + ":round", + "//third_party/eigen3", + "@gemmlowp//:gemmlowp", + "//tensorflow/contrib/lite:builtin_op_data", + ] + select({ + ":haswell": tflite_deps_intel, + ":ios_x86_64": tflite_deps_intel, + ":k8": tflite_deps_intel, + ":x86": tflite_deps_intel, + ":x86_64": tflite_deps_intel, + ":darwin": tflite_deps_intel, + "//conditions:default": [], + }), +) + +cc_library( + name = "optimized", + hdrs = [ + "optimized/eigen_spatial_convolutions.h", + "optimized/eigen_tensor_reduced_instantiations_oss.h", + "optimized/multithreaded_conv.h", + "tensor.h", + ], + deps = [ + ":optimized_base", + ":types", + "//tensorflow/contrib/lite:builtin_op_data", + "//tensorflow/contrib/lite:context", + "//third_party/eigen3", + ], +) + +cc_test( + name = "tensor_test", + srcs = ["tensor_test.cc"], + deps = [ + ":reference", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "round", + srcs = [], + hdrs = ["round.h"], +) + +cc_library( + name = "quantization_util", + srcs = ["quantization_util.cc"], + hdrs = [ + "compatibility.h", + "quantization_util.h", + ], + deps = [":round"], +) + +cc_test( + name = "quantization_util_test", + srcs = ["quantization_util_test.cc"], + deps = [ + ":quantization_util", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "reference_base", + srcs = [], + hdrs = [ + "common.h", + "reference/depthwiseconv_float.h", + "reference/depthwiseconv_uint8.h", + "reference/reference_ops.h", + ], + deps = [ + ":round", + ":types", + "//third_party/eigen3", + "@gemmlowp//:gemmlowp", + "//tensorflow/contrib/lite:builtin_op_data", + ] + select({ + ":haswell": tflite_deps_intel, + ":ios_x86_64": tflite_deps_intel, + ":k8": tflite_deps_intel, + ":x86": tflite_deps_intel, + ":x86_64": tflite_deps_intel, + ":darwin": tflite_deps_intel, + "//conditions:default": [], + }), +) + +cc_library( + name = "reference", + hdrs = ["tensor.h"], + deps = [ + ":types", + "//tensorflow/contrib/lite:context", + ], +) + +cc_library( + name = "portable_tensor_utils", + srcs = [ + "reference/portable_tensor_utils.cc", + ], + hdrs = [ + "reference/portable_tensor_utils.h", + ], + deps = [ + "//tensorflow/contrib/lite:builtin_op_data", + "//tensorflow/contrib/lite/kernels:activation_functor", + "//tensorflow/contrib/lite/kernels:op_macros", + ], +) + +cc_library( + name = "neon_tensor_utils", + srcs = [ + "optimized/neon_tensor_utils.cc", + ], + hdrs = [ + "optimized/neon_tensor_utils.h", + "optimized/tensor_utils_impl.h", + ], + copts = NEON_FLAGS_IF_APPLICABLE, + deps = [ + ":cpu_check", + ":portable_tensor_utils", + "//tensorflow/contrib/lite:builtin_op_data", + "//tensorflow/contrib/lite/kernels:activation_functor", + ], +) + +cc_library( + name = "tensor_utils", + srcs = [ + "tensor_utils.cc", + ], + hdrs = [ + "optimized/tensor_utils_impl.h", + "reference/portable_tensor_utils.h", + "tensor_utils.h", + ], + copts = NEON_FLAGS_IF_APPLICABLE, + deps = [ + "//tensorflow/contrib/lite/kernels:activation_functor", + "//tensorflow/contrib/lite:builtin_op_data", + ] + select({ + ":arm": [ + ":neon_tensor_utils", + ], + ":arm64-v8a": [ + ":neon_tensor_utils", + ], + ":armeabi-v7a": [ + ":neon_tensor_utils", + ], + ":armv7a": [ + ":neon_tensor_utils", + ], + ":ios_armv7": [ + ":neon_tensor_utils", + ], + ":ios_arm64": [ + ":neon_tensor_utils", + ], + "//conditions:default": [ + ":portable_tensor_utils", + ], + }), +) + +cc_test( + name = "tensor_utils_test", + srcs = ["tensor_utils_test.cc"], + copts = NEON_FLAGS_IF_APPLICABLE, + linkopts = select({ + "//tensorflow:android": [ + "-fPIE -pie", + ], + "//conditions:default": [], + }), + linkstatic = 1, + deps = [ + ":tensor_utils", + "//tensorflow/contrib/lite:builtin_op_data", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "cpu_check", + hdrs = [ + "optimized/cpu_check.h", + ], + deps = [ + ] + select( + { + "//tensorflow:android": [ + "@androidndk//:cpufeatures", + ], + "//conditions:default": [], + }, + ), +) + +exports_files(["optimized/eigen_tensor_reduced_instantiations_oss.h"]) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/lite/kernels/internal/common.h b/tensorflow/contrib/lite/kernels/internal/common.h new file mode 100644 index 0000000000..28f19a2506 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/common.h @@ -0,0 +1,107 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_COMMON_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_COMMON_H_ + +#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK +#ifdef GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK +#define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK +#endif +#endif + +#ifndef USE_NEON +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#define USE_NEON +#include +#endif + +#if defined __GNUC__ && defined __SSE4_1__ +#define USE_NEON + +#define OPTIMIZED_OPS_H__IGNORE_DEPRECATED_DECLARATIONS +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#pragma GCC diagnostic ignored "-Wattributes" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnarrowing" +#pragma GCC diagnostic ignored "-Wsequence-point" + +#include "NEON_2_SSE.h" + +#pragma GCC diagnostic pop +#endif +#endif + +#include "public/gemmlowp.h" +#include "tensorflow/contrib/lite/kernels/internal/types.h" + +namespace tflite { + +inline void GetActivationMinMax(FusedActivationFunctionType ac, + float* output_activation_min, + float* output_activation_max) { + switch (ac) { + case FusedActivationFunctionType::kNone: + *output_activation_min = std::numeric_limits::lowest(); + *output_activation_max = std::numeric_limits::max(); + break; + case FusedActivationFunctionType::kRelu: + *output_activation_min = 0.f; + *output_activation_max = std::numeric_limits::max(); + break; + case FusedActivationFunctionType::kRelu1: + *output_activation_min = -1.f; + *output_activation_max = 1.f; + break; + case FusedActivationFunctionType::kRelu6: + *output_activation_min = 0.f; + *output_activation_max = 6.f; + break; + } +} + +inline float ActivationFunctionWithMinMax(float x, float output_activation_min, + float output_activation_max) { + return std::min(std::max(x, output_activation_min), output_activation_max); +} + +// Legacy function, left for compatibility only. +template +float ActivationFunction(float x) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + return ActivationFunctionWithMinMax(x, output_activation_min, + output_activation_max); +} + +inline int32 MultiplyByQuantizedMultiplierSmallerThanOne( + int32 x, int32 quantized_multiplier, int right_shift) { + using gemmlowp::RoundingDivideByPOT; + using gemmlowp::SaturatingRoundingDoublingHighMul; + return RoundingDivideByPOT( + SaturatingRoundingDoublingHighMul(x, quantized_multiplier), right_shift); +} + +inline int32 MultiplyByQuantizedMultiplierGreaterThanOne( + int32 x, int32 quantized_multiplier, int left_shift) { + using gemmlowp::SaturatingRoundingDoublingHighMul; + return SaturatingRoundingDoublingHighMul(x * (1 << left_shift), + quantized_multiplier); +} + +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_COMMON_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/compatibility.h b/tensorflow/contrib/lite/kernels/internal/compatibility.h new file mode 100644 index 0000000000..796a03566a --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/compatibility.h @@ -0,0 +1,78 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_ + +#include +#include +#include + +#ifndef TFLITE_DCHECK +#define TFLITE_DCHECK(condition) (condition) ? (void)0 : assert(false) +#endif + +#ifndef TFLITE_DCHECK_EQ +#define TFLITE_DCHECK_EQ(x, y) ((x) == (y)) ? (void)0 : assert(false) +#endif + +#ifndef TFLITE_DCHECK_GE +#define TFLITE_DCHECK_GE(x, y) ((x) >= (y)) ? (void)0 : assert(false) +#endif + +#ifndef TFLITE_DCHECK_GT +#define TFLITE_DCHECK_GT(x, y) ((x) > (y)) ? (void)0 : assert(false) +#endif + +#ifndef TFLITE_DCHECK_LE +#define TFLITE_DCHECK_LE(x, y) ((x) <= (y)) ? (void)0 : assert(false) +#endif + +#ifndef TFLITE_DCHECK_LT +#define TFLITE_DCHECK_LT(x, y) ((x) < (y)) ? (void)0 : assert(false) +#endif + +// TODO(ahentz): Clean up: We should stick to the DCHECK versions. +#ifndef TFLITE_CHECK +#define TFLITE_CHECK(condition) (condition) ? (void)0 : abort() +#endif + +#ifndef TFLITE_CHECK_EQ +#define TFLITE_CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : abort() +#endif + +#ifndef TFLITE_CHECK_GE +#define TFLITE_CHECK_GE(x, y) ((x) >= (y)) ? (void)0 : abort() +#endif + +#ifndef TFLITE_CHECK_GT +#define TFLITE_CHECK_GT(x, y) ((x) > (y)) ? (void)0 : abort() +#endif + +#ifndef TFLITE_CHECK_LE +#define TFLITE_CHECK_LE(x, y) ((x) <= (y)) ? (void)0 : abort() +#endif + +#ifndef TFLITE_CHECK_LT +#define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : abort() +#endif + +// TODO(ahentz): Clean up. +using uint8 = std::uint8_t; +using int16 = std::int16_t; +using uint16 = std::uint16_t; +using int32 = std::int32_t; +using uint32 = std::uint32_t; + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/cpu_check.h b/tensorflow/contrib/lite/kernels/internal/optimized/cpu_check.h new file mode 100644 index 0000000000..dea46cc120 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/optimized/cpu_check.h @@ -0,0 +1,65 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_CPU_CHECK_ +#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_CPU_CHECK_ + +namespace tflite { + +#ifdef __ANDROID__ +#include "ndk/sources/android/cpufeatures/cpu-features.h" + +// Runtime check for Neon support on Android. +inline bool TestCPUFeatureNeon() { +#ifdef __aarch64__ + // ARM-64 always has NEON support. + return true; +#else + static bool kUseAndroidNeon = + (android_getCpuFamily() == ANDROID_CPU_FAMILY_ARM && + android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_ARMv7 && + android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON); + return kUseAndroidNeon; +#endif // __aarch64__ +} + +#elif __ARM_NEON + +inline bool TestCPUFeatureNeon() { + return true; +} + +#else + +inline bool TestCPUFeatureNeon() { + return false; +} + +#endif + +} // namespace tflite + +// NEON_OR_PORTABLE(SomeFunc, arcs) calls NeonSomeFunc(args) if Neon is both +// enabled at build time and detected at runtime, or PortableSomeFunc(args) +// otherwise. +#ifdef __ARM_ARCH_5TE__ +// Neon isn't available at all on ARMv5. +#define NEON_OR_PORTABLE(funcname, ...) Portable##funcname(__VA_ARGS__) +#else +#define NEON_OR_PORTABLE(funcname, ...) \ + TestCPUFeatureNeon() ? Neon##funcname(__VA_ARGS__) \ + : Portable##funcname(__VA_ARGS__) +#endif + +#endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_CPU_CHECK_ diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h new file mode 100644 index 0000000000..974611f52a --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h @@ -0,0 +1,987 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_DEPTHWISECONV_FLOAT_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_DEPTHWISECONV_FLOAT_H_ + +#include "public/gemmlowp.h" +#include "tensorflow/contrib/lite/kernels/internal/common.h" +#include "tensorflow/contrib/lite/kernels/internal/types.h" + +namespace tflite { +namespace optimized_ops { + +// Implementation of float DepthwiseConv + +template +struct FloatDepthwiseConvKernel {}; + +#ifdef USE_NEON + +template <> +struct FloatDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float* input_ptr, int input_ptr_increment, + const float* filter_ptr, float* acc_buffer_ptr) { + // Load the filters + float32x4_t filter[2]; + for (int i = 0; i < 2; i++) { + filter[i] = vld1q_f32(filter_ptr + 4 * i); + } + int outp = 0; + // Handle 2 output pixels at a time. + for (; outp <= num_output_pixels - 2; outp += 2) { + // Load the inputs + float32x4_t input[4]; + for (int i = 0; i < 4; i++) { + input[i] = vld1q_f32(input_ptr + 4 * i); + } + input_ptr += 16; + // Load the accumulators from acc_buffer + float32x4_t acc[4]; + for (int i = 0; i < 4; i++) { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + acc[0] = vmlaq_f32(acc[0], input[0], filter[0]); + acc[1] = vmlaq_f32(acc[1], input[1], filter[1]); + acc[2] = vmlaq_f32(acc[2], input[2], filter[0]); + acc[3] = vmlaq_f32(acc[3], input[3], filter[1]); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + // Handle one output pixel at a time. + for (; outp < num_output_pixels; outp++) { + // Load the inputs + float32x4_t input[2]; + for (int i = 0; i < 2; i++) { + input[i] = vld1q_f32(input_ptr + 4 * i); + } + input_ptr += 8; + // Load the accumulators from acc_buffer + float32x4_t acc[2]; + for (int i = 0; i < 2; i++) { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + for (int i = 0; i < 2; i++) { + acc[i] = vmlaq_f32(acc[i], input[i], filter[i]); + } + // Store the accumulators back to acc_buffer + for (int i = 0; i < 2; i++) { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 8; + } + } +}; + +template <> +struct FloatDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float* input_ptr, int input_ptr_increment, + const float* filter_ptr, float* acc_buffer_ptr) { + const float32x2_t filters = vld1_f32(filter_ptr); + const float32x4_t filters_dup2 = vcombine_f32(filters, filters); + int outp = 0; + // Handle 8 output pixels at a time. + for (; outp <= num_output_pixels - 8; outp += 8) { + // Load the inputs + float32x4_t input[4]; + for (int i = 0; i < 4; i++) { + input[i] = vld1q_f32(input_ptr + 4 * i); + } + input_ptr += 16; + // Load the accumulators from acc_buffer + float32x4_t acc[4]; + for (int i = 0; i < 4; i++) { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + for (int i = 0; i < 4; i++) { + acc[i] = vmlaq_f32(acc[i], input[i], filters_dup2); + } + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + // Handle 4 output pixels at a time. + for (; outp <= num_output_pixels - 4; outp += 4) { + // Load the inputs + float32x4_t input[2]; + for (int i = 0; i < 2; i++) { + input[i] = vld1q_f32(input_ptr + 4 * i); + } + input_ptr += 8; + // Load the accumulators from acc_buffer + float32x4_t acc[2]; + for (int i = 0; i < 2; i++) { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + for (int i = 0; i < 2; i++) { + acc[i] = vmlaq_f32(acc[i], input[i], filters_dup2); + } + // Store the accumulators back to acc_buffer + for (int i = 0; i < 2; i++) { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 8; + } + // Handle 2 output pixels at a time. + for (; outp <= num_output_pixels - 2; outp += 2) { + // Load the inputs + const float32x4_t input = vld1q_f32(input_ptr); + input_ptr += 4; + // Load the accumulators from acc_buffer + float32x4_t acc = vld1q_f32(acc_buffer_ptr); + // Multiply-accumulate + acc = vmlaq_f32(acc, input, filters_dup2); + // Store the accumulators back to acc_buffer + vst1q_f32(acc_buffer_ptr, acc); + acc_buffer_ptr += 4; + } + // Handle 1 output pixel at a time + for (; outp < num_output_pixels; outp++) { + // Load the inputs + const float32x2_t input = vld1_f32(input_ptr); + input_ptr += 2; + // Load the accumulators from acc_buffer + float32x2_t acc = vld1_f32(acc_buffer_ptr); + // Multiply-accumulate + acc = vmla_f32(acc, input, filters); + // Store the accumulators back to acc_buffer + vst1_f32(acc_buffer_ptr, acc); + acc_buffer_ptr += 2; + } + } +}; + +template <> +struct FloatDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float* input_ptr, int input_ptr_increment, + const float* filter_ptr, float* acc_buffer_ptr) { + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) { + const float* local_filter_ptr = filter_ptr; + const float* local_input_ptr = input_ptr; + int ic = 0; + // Handle 16 input channels at a time. + for (; ic <= input_depth - 16; ic += 16) { + // Load the filters + float32x4_t filter_0 = vld1q_f32(local_filter_ptr + 4 * 0); + float32x4_t filter_1 = vld1q_f32(local_filter_ptr + 4 * 1); + float32x4_t filter_2 = vld1q_f32(local_filter_ptr + 4 * 2); + float32x4_t filter_3 = vld1q_f32(local_filter_ptr + 4 * 3); + local_filter_ptr += 16; + // Load the inputs + float32x4_t input_0 = vld1q_f32(local_input_ptr + 4 * 0); + float32x4_t input_1 = vld1q_f32(local_input_ptr + 4 * 1); + float32x4_t input_2 = vld1q_f32(local_input_ptr + 4 * 2); + float32x4_t input_3 = vld1q_f32(local_input_ptr + 4 * 3); + local_input_ptr += 16; + // Load the accumulators from acc_buffer + float32x4_t acc_0 = vld1q_f32(acc_buffer_ptr + 4 * 0); + float32x4_t acc_1 = vld1q_f32(acc_buffer_ptr + 4 * 1); + float32x4_t acc_2 = vld1q_f32(acc_buffer_ptr + 4 * 2); + float32x4_t acc_3 = vld1q_f32(acc_buffer_ptr + 4 * 3); + // Multiply-accumulate + acc_0 = vmlaq_f32(acc_0, input_0, filter_0); + acc_1 = vmlaq_f32(acc_1, input_1, filter_1); + acc_2 = vmlaq_f32(acc_2, input_2, filter_2); + acc_3 = vmlaq_f32(acc_3, input_3, filter_3); + // Store the accumulators back to acc_buffer + vst1q_f32(acc_buffer_ptr + 4 * 0, acc_0); + vst1q_f32(acc_buffer_ptr + 4 * 1, acc_1); + vst1q_f32(acc_buffer_ptr + 4 * 2, acc_2); + vst1q_f32(acc_buffer_ptr + 4 * 3, acc_3); + acc_buffer_ptr += 16; + } + // Handle 4 input channels at a time. + for (; ic <= input_depth - 4; ic += 4) { + // Load the filters + float32x4_t filter; + filter = vld1q_f32(local_filter_ptr); + local_filter_ptr += 4; + // Load the inputs + float32x4_t input; + input = vld1q_f32(local_input_ptr); + local_input_ptr += 4; + // Load the accumulators from acc_buffer + float32x4_t acc; + acc = vld1q_f32(acc_buffer_ptr); + // Multiply-accumulate + acc = vmlaq_f32(acc, input, filter); + // Store the accumulators back to acc_buffer + vst1q_f32(acc_buffer_ptr, acc); + acc_buffer_ptr += 4; + } + // Handle one input channel at a time. + for (; ic < input_depth; ic++) { + const float input_val = *local_input_ptr++; + const float filter_val = *local_filter_ptr++; + *acc_buffer_ptr++ += filter_val * input_val; + } + input_ptr += input_ptr_increment; + } + } +}; + +template <> +struct FloatDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float* input_ptr, int input_ptr_increment, + const float* filter_ptr, float* acc_buffer_ptr) { + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) { + const float* local_filter_ptr = filter_ptr; + const float* local_input_ptr = input_ptr; + int ic = 0; + // Handle 2 input channels at a time. + for (; ic <= input_depth - 2; ic += 2) { + // Load the filters + float32x4_t filter[4]; + for (int i = 0; i < 4; i++) { + filter[i] = vld1q_f32(local_filter_ptr + 4 * i); + } + local_filter_ptr += 16; + // Load the inputs + const float32x2_t input = vld1_f32(local_input_ptr); + local_input_ptr += 2; + // Load the accumulators from acc_buffer + float32x4_t acc[4]; + for (int i = 0; i < 4; i++) { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + acc[0] = vmlaq_lane_f32(acc[0], filter[0], input, 0); + acc[1] = vmlaq_lane_f32(acc[1], filter[1], input, 0); + acc[2] = vmlaq_lane_f32(acc[2], filter[2], input, 1); + acc[3] = vmlaq_lane_f32(acc[3], filter[3], input, 1); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + // Handle one input channel at a time. + for (; ic < input_depth; ic++) { + // Load the filters + float32x4_t filter[2]; + for (int i = 0; i < 2; i++) { + filter[i] = vld1q_f32(local_filter_ptr + 4 * i); + } + local_filter_ptr += 8; + // Load the inputs + const float input_val = *local_input_ptr++; + // Load the accumulators from acc_buffer + float32x4_t acc[2]; + for (int i = 0; i < 2; i++) { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + for (int i = 0; i < 2; i++) { + acc[i] = vmlaq_n_f32(acc[i], filter[i], input_val); + } + // Store the accumulators back to acc_buffer + for (int i = 0; i < 2; i++) { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 8; + } + input_ptr += input_ptr_increment; + } + } +}; + +template <> +struct FloatDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float* input_ptr, int input_ptr_increment, + const float* filter_ptr, float* acc_buffer_ptr) { + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) { + const float* local_filter_ptr = filter_ptr; + const float* local_input_ptr = input_ptr; + int ic = 0; + // Handle 8 input channels at a time. + for (; ic <= input_depth - 8; ic += 8) { + // Load the filters + float32x4_t filter[4]; + for (int i = 0; i < 4; i++) { + filter[i] = vld1q_f32(local_filter_ptr + 4 * i); + } + local_filter_ptr += 16; + // Load the inputs + float32x4x2_t input_dup2[2]; + for (int i = 0; i < 2; i++) { + const float32x4_t input = vld1q_f32(local_input_ptr + 4 * i); + input_dup2[i] = vzipq_f32(input, input); + } + local_input_ptr += 8; + // Load the accumulators from acc_buffer + float32x4_t acc[4]; + for (int i = 0; i < 4; i++) { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + acc[0] = vmlaq_f32(acc[0], filter[0], input_dup2[0].val[0]); + acc[1] = vmlaq_f32(acc[1], filter[1], input_dup2[0].val[1]); + acc[2] = vmlaq_f32(acc[2], filter[2], input_dup2[1].val[0]); + acc[3] = vmlaq_f32(acc[3], filter[3], input_dup2[1].val[1]); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + // Handle 4 input channels at a time. + for (; ic <= input_depth - 4; ic += 4) { + // Load the filters + float32x2_t filter[4]; + for (int i = 0; i < 4; i++) { + filter[i] = vld1_f32(local_filter_ptr + 2 * i); + } + local_filter_ptr += 8; + // Load the inputs + const float32x4_t input = vld1q_f32(local_input_ptr); + local_input_ptr += 4; + // Load the accumulators from acc_buffer + float32x2_t acc[4]; + for (int i = 0; i < 4; i++) { + acc[i] = vld1_f32(acc_buffer_ptr + 2 * i); + } + // Multiply-accumulate + acc[0] = vmla_lane_f32(acc[0], filter[0], vget_low_f32(input), 0); + acc[1] = vmla_lane_f32(acc[1], filter[1], vget_low_f32(input), 1); + acc[2] = vmla_lane_f32(acc[2], filter[2], vget_high_f32(input), 0); + acc[3] = vmla_lane_f32(acc[3], filter[3], vget_high_f32(input), 1); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) { + vst1_f32(acc_buffer_ptr + 2 * i, acc[i]); + } + acc_buffer_ptr += 8; + } + // Handle 2 input channels at a time. + for (; ic <= input_depth - 2; ic += 2) { + // Load the filters + const float32x4_t filter = vld1q_f32(local_filter_ptr); + local_filter_ptr += 4; + // Load the inputs + const float32x2_t input = vld1_f32(local_input_ptr); + local_input_ptr += 2; + // Load the accumulators from acc_buffer + float32x2_t acc[2]; + for (int i = 0; i < 2; i++) { + acc[i] = vld1_f32(acc_buffer_ptr + 2 * i); + } + // Multiply-accumulate + acc[0] = vmla_lane_f32(acc[0], vget_low_f32(filter), input, 0); + acc[1] = vmla_lane_f32(acc[1], vget_high_f32(filter), input, 1); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 2; i++) { + vst1_f32(acc_buffer_ptr + 2 * i, acc[i]); + } + acc_buffer_ptr += 4; + } + // Handle one input channel at a time. + for (; ic < input_depth; ic++) { + // Load the inputs + const float input_val = *local_input_ptr++; + // Multiply-accumulate + for (int i = 0; i < 2; i++) { + acc_buffer_ptr[i] += local_filter_ptr[i] * input_val; + } + local_filter_ptr += 2; + acc_buffer_ptr += 2; + } + input_ptr += input_ptr_increment; + } + } +}; + +template <> +struct FloatDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float* input_ptr, int input_ptr_increment, + const float* filter_ptr, float* acc_buffer_ptr) { + // Load the filters + float32x4_t filter[2]; + for (int i = 0; i < 2; i++) { + filter[i] = vld1q_f32(filter_ptr + 4 * i); + } + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) { + // Load the inputs + const float input_val = *input_ptr; + input_ptr += input_ptr_increment; + // Load the accumulators from acc_buffer + float32x4_t acc[2]; + for (int i = 0; i < 2; i++) { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + for (int i = 0; i < 2; i++) { + acc[i] = vmlaq_n_f32(acc[i], filter[i], input_val); + } + // Store the accumulators back to acc_buffer + for (int i = 0; i < 2; i++) { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 8; + } + } +}; + +template <> +struct FloatDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float* input_ptr, int input_ptr_increment, + const float* filter_ptr, float* acc_buffer_ptr) { + // Load the filters + float32x4_t filter_0 = vld1q_f32(filter_ptr + 4 * 0); + float32x4_t filter_1 = vld1q_f32(filter_ptr + 4 * 1); + float32x4_t filter_2 = vld1q_f32(filter_ptr + 4 * 2); + float32x4_t filter_3 = vld1q_f32(filter_ptr + 4 * 3); + float32x4_t filter_4 = vld1q_f32(filter_ptr + 4 * 4); + float32x4_t filter_5 = vld1q_f32(filter_ptr + 4 * 5); + float32x4_t filter_6 = vld1q_f32(filter_ptr + 4 * 6); + float32x4_t filter_7 = vld1q_f32(filter_ptr + 4 * 7); + + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) { + // Load the inputs + const float input_val = *input_ptr; + input_ptr += input_ptr_increment; + // Load the accumulators from acc_buffer + float32x4_t acc_0 = vld1q_f32(acc_buffer_ptr + 4 * 0); + float32x4_t acc_1 = vld1q_f32(acc_buffer_ptr + 4 * 1); + float32x4_t acc_2 = vld1q_f32(acc_buffer_ptr + 4 * 2); + float32x4_t acc_3 = vld1q_f32(acc_buffer_ptr + 4 * 3); + float32x4_t acc_4 = vld1q_f32(acc_buffer_ptr + 4 * 4); + float32x4_t acc_5 = vld1q_f32(acc_buffer_ptr + 4 * 5); + float32x4_t acc_6 = vld1q_f32(acc_buffer_ptr + 4 * 6); + float32x4_t acc_7 = vld1q_f32(acc_buffer_ptr + 4 * 7); + // Multiply-accumulate + acc_0 = vmlaq_n_f32(acc_0, filter_0, input_val); + acc_1 = vmlaq_n_f32(acc_1, filter_1, input_val); + acc_2 = vmlaq_n_f32(acc_2, filter_2, input_val); + acc_3 = vmlaq_n_f32(acc_3, filter_3, input_val); + acc_4 = vmlaq_n_f32(acc_4, filter_4, input_val); + acc_5 = vmlaq_n_f32(acc_5, filter_5, input_val); + acc_6 = vmlaq_n_f32(acc_6, filter_6, input_val); + acc_7 = vmlaq_n_f32(acc_7, filter_7, input_val); + // Store the accumulators back to acc_buffer + vst1q_f32(acc_buffer_ptr + 4 * 0, acc_0); + vst1q_f32(acc_buffer_ptr + 4 * 1, acc_1); + vst1q_f32(acc_buffer_ptr + 4 * 2, acc_2); + vst1q_f32(acc_buffer_ptr + 4 * 3, acc_3); + vst1q_f32(acc_buffer_ptr + 4 * 4, acc_4); + vst1q_f32(acc_buffer_ptr + 4 * 5, acc_5); + vst1q_f32(acc_buffer_ptr + 4 * 6, acc_6); + vst1q_f32(acc_buffer_ptr + 4 * 7, acc_7); + acc_buffer_ptr += 32; + } + } +}; + +template <> +struct FloatDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float* input_ptr, int input_ptr_increment, + const float* filter_ptr, float* acc_buffer_ptr) { + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) { + const float* local_filter_ptr = filter_ptr; + const float* local_input_ptr = input_ptr; + for (int ic = 0; ic < input_depth; ic++) { + // Load the filters + float32x4_t filter[4]; + for (int i = 0; i < 4; i++) { + filter[i] = vld1q_f32(local_filter_ptr + 4 * i); + } + local_filter_ptr += 16; + // Load the inputs + const float input_val = *local_input_ptr++; + // Load the accumulators from acc_buffer + float32x4_t acc[4]; + for (int i = 0; i < 4; i++) { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + for (int i = 0; i < 4; i++) { + acc[i] = vmlaq_n_f32(acc[i], filter[i], input_val); + } + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + input_ptr += input_ptr_increment; + } + } +}; + +template <> +struct FloatDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float* input_ptr, int input_ptr_increment, + const float* filter_ptr, float* acc_buffer_ptr) { + // Load the filters + float32x4_t filter[2]; + for (int i = 0; i < 2; i++) { + filter[i] = vld1q_f32(filter_ptr + 4 * i); + } + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) { + // Load the inputs + float32x4_t input[2]; + for (int i = 0; i < 2; i++) { + input[i] = vld1q_f32(input_ptr + 4 * i); + } + // Load the accumulators from acc_buffer + float32x4_t acc[2]; + for (int i = 0; i < 2; i++) { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + for (int i = 0; i < 2; i++) { + acc[i] = vmlaq_f32(acc[i], input[i], filter[i]); + } + // Store the accumulators back to acc_buffer + for (int i = 0; i < 2; i++) { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 8; + input_ptr += input_ptr_increment; + } + } +}; + +template <> +struct FloatDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float* input_ptr, int input_ptr_increment, + const float* filter_ptr, float* acc_buffer_ptr) { + float32x2_t filter = vld1_f32(filter_ptr); + float32x4_t filter_x4 = vcombine_f32(filter, filter); + int outp = 0; + + // Handle two output pixels at a time. + for (; outp <= num_output_pixels - 2; outp += 2) { + // Load the inputs + float32x2_t input_1 = vld1_f32(input_ptr); + input_ptr += input_ptr_increment; + float32x2_t input_2 = vld1_f32(input_ptr); + input_ptr += input_ptr_increment; + float32x4_t input = vcombine_f32(input_1, input_2); + + // Load the accumulators from acc_buffer + float32x4_t acc = vld1q_f32(acc_buffer_ptr); + + // Multiply-accumulate + acc = vmlaq_f32(acc, input, filter_x4); + + // Store the accumulators back to acc_buffer + vst1q_f32(acc_buffer_ptr, acc); + acc_buffer_ptr += 4; + } + // Handle one output pixel at a time. + for (; outp < num_output_pixels; outp++) { + // Load the inputs + float32x2_t input = vld1_f32(input_ptr); + input_ptr += input_ptr_increment; + + // Load the accumulators from acc_buffer + float32x2_t acc = vld1_f32(acc_buffer_ptr); + + // Multiply-accumulate + acc = vmla_f32(acc, input, filter); + + // Store the accumulators back to acc_buffer + vst1_f32(acc_buffer_ptr, acc); + acc_buffer_ptr += 2; + } + } +}; + +template <> +struct FloatDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float* input_ptr, int input_ptr_increment, + const float* filter_ptr, float* acc_buffer_ptr) { + float32x4_t filter = vld1q_f32(filter_ptr); + + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) { + // Load the inputs + float32x4_t input = vld1q_f32(input_ptr); + // Load the accumulators from acc_buffer + float32x4_t acc = vld1q_f32(acc_buffer_ptr); + // Multiply-accumulate + acc = vmlaq_f32(acc, input, filter); + // Store the accumulators back to acc_buffer + vst1q_f32(acc_buffer_ptr, acc); + acc_buffer_ptr += 4; + input_ptr += input_ptr_increment; + } + } +}; +#endif + +// Accumulates the effect of one row of the filter, on a segment of one row +// of the output, accessing the corresponding one row of the input. +template +void FloatDepthwiseConvAccumRow(int stride, int input_depth, int input_width, + const float* input_data, int pad_width, + int depth_multiplier, int filter_width, + const float* filter_data, + int out_x_buffer_start, int out_x_buffer_end, + int output_depth, float* acc_buffer) { +#ifdef GEMMLOWP_PROFILING + gemmlowp::ScopedProfilingLabel label(__PRETTY_FUNCTION__); +#endif + // Sanity check parameters. This is important in particular to ensure + // that we keep the number of template instantiations minimal, so we don't + // increase binary size unnecessarily. + static_assert(kFixedDepthMultiplier || !kFixedInputDepth, ""); + static_assert(kFixedInputDepth || kAllowStrided, ""); + TFLITE_DCHECK(stride == 1 || kAllowStrided); + if (kFixedInputDepth) { + TFLITE_DCHECK_EQ(input_depth, kFixedInputDepth); + } + if (kFixedDepthMultiplier) { + TFLITE_DCHECK_EQ(depth_multiplier, kFixedDepthMultiplier); + } + TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); + const int input_ptr_increment = stride * input_depth; + const float* filter_base_ptr = filter_data; + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + // For the current (filter_x, filter_y) point in the filter, + // compute the boundaries of the corresponding output row segment. + int out_x_loop_start_unclampled = 0; + int out_x_loop_end_unclampled = 0; + if (kAllowStrided) { + if (stride == 2) { + out_x_loop_start_unclampled = (pad_width - filter_x + 1) / 2; + out_x_loop_end_unclampled = + (pad_width + input_width - filter_x + 1) / 2; + } else if (stride == 4) { + out_x_loop_start_unclampled = (pad_width - filter_x + 3) / 4; + out_x_loop_end_unclampled = + (pad_width + input_width - filter_x + 3) / 4; + } else { + out_x_loop_start_unclampled = + (pad_width - filter_x + stride - 1) / stride; + out_x_loop_end_unclampled = + (pad_width + input_width - filter_x + stride - 1) / stride; + } + } else { + out_x_loop_start_unclampled = pad_width - filter_x; + out_x_loop_end_unclampled = pad_width + input_width - filter_x; + } + // The kernel will have to iterate on the segment of the + // output row that starts at out_x_loop_start and out_x_loop_end. + const int out_x_loop_start = + std::max(out_x_buffer_start, out_x_loop_start_unclampled); + const int out_x_loop_end = + std::min(out_x_buffer_end, out_x_loop_end_unclampled); + + float* acc_buffer_ptr = + acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth; + const int in_x_origin = (out_x_loop_start * stride) - pad_width + filter_x; + const float* input_ptr = input_data + in_x_origin * input_depth; + const int num_output_pixels = out_x_loop_end - out_x_loop_start; + FloatDepthwiseConvKernel::Run(num_output_pixels, + input_depth, + depth_multiplier, + input_ptr, + input_ptr_increment, + filter_base_ptr, + acc_buffer_ptr); + filter_base_ptr += output_depth; + } +} + +// generic fallback of FloatDepthwiseConvAccumRow, portable, non-templatized. +inline void FloatDepthwiseConvAccumRowGeneric( + int stride, int input_depth, int input_width, const float* input_data, + int pad_width, int depth_multiplier, int filter_width, + const float* filter_data, int out_x_buffer_start, int out_x_buffer_end, + int output_depth, float* acc_buffer) { + gemmlowp::ScopedProfilingLabel label("DepthwiseConvAccumRowGeneric (slow)"); +#ifdef TFLITE_PREVENT_SLOW_GENERIC_DEPTHWISECONV_FALLBACK +#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK + LOG(FATAL) + << "\n\n" + << "*****************************************************************\n" + << "* This tfmini inference code was about to use the slow generic\n" + << "* fallback implementation for a DepthwiseConv op, and we want you\n" + << "* to be aware of that so that you will know why you get terrible\n" + << "* performance.\n" + << "*\n" + << "* If you would like to carry on with the slow code, compile\n" + << "* with this preprocessor token defined:\n" + << "* ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK.\n" + << "*\n" + << "* The right thing to do, if you care about performance, is to add\n" + << "* a new DepthwiseConv kernel to tfmini to cover your case.\n" + << "* The relevant parameters defining your case are:\n" + << "* stride = " << stride << "\n" + << "* input_depth = " << input_depth << "\n" + << "* depth_multiplier = " << depth_multiplier << "\n" + << "*\n" + << "* Please do not hesitate to contact benoitjacob@ with this\n" + << "* information.\n" + << "*****************************************************************\n"; +#endif // ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK +#endif // TFLITE_PREVENT_SLOW_GENERIC_DEPTHWISECONV_FALLBACK + const float* filter_base_ptr = filter_data; + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + const int out_x_loop_start = std::max( + out_x_buffer_start, (pad_width - filter_x + stride - 1) / stride); + const int out_x_loop_end = + std::min(out_x_buffer_end, + (pad_width + input_width - filter_x + stride - 1) / stride); + + float* acc_buffer_ptr = + acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth; + const int in_x_origin = (out_x_loop_start * stride) - pad_width + filter_x; + const float* input_ptr = input_data + in_x_origin * input_depth; + const int input_ptr_increment = (stride - 1) * input_depth; + for (int out_x = out_x_loop_start; out_x < out_x_loop_end; out_x++) { + const float* filter_ptr = filter_base_ptr; + for (int ic = 0; ic < input_depth; ++ic) { + const float input_val = *input_ptr++; + for (int m = 0; m < depth_multiplier; m++) { + const float filter_val = *filter_ptr++; + *acc_buffer_ptr++ += filter_val * input_val; + } + } + input_ptr += input_ptr_increment; + } + filter_base_ptr += output_depth; + } +} + +// Initializes the accumulator buffer with bias values. +inline void DepthwiseConvInitAccBuffer(int num_output_pixels, int output_depth, + const float* bias_data, + float* acc_buffer) { + // TODO(benoitjacob): This might need optimized specializations + // for small output_depth values, if that ever becomes an important + // case (like it was for some quantized DepthwiseConv cases). + for (int i = 0; i < num_output_pixels; i++) { + memcpy(acc_buffer + i * output_depth, bias_data, + sizeof(acc_buffer[0]) * output_depth); + } +} + +inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int depth_multiplier, + float output_activation_min, + float output_activation_max, float* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("DepthwiseConv"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int input_depth = ArraySize(input_dims, 0); + const int filter_height = ArraySize(filter_dims, 2); + const int filter_width = ArraySize(filter_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + TFLITE_DCHECK(output_depth == input_depth * depth_multiplier); + + static const int kAccBufferMaxSize = 2048; + float acc_buffer[kAccBufferMaxSize]; + TFLITE_DCHECK_GE(kAccBufferMaxSize, output_depth); + const int kOutputPixelsInAccBuffer = kAccBufferMaxSize / output_depth; + const int kAccBufferActualSize = kOutputPixelsInAccBuffer * output_depth; + TFLITE_DCHECK_LE(kOutputPixelsInAccBuffer * output_depth, + kAccBufferActualSize); + TFLITE_DCHECK_LE(kAccBufferActualSize, kAccBufferMaxSize); + TFLITE_DCHECK_GE(kOutputPixelsInAccBuffer, 1); + + // row_accum_func will point to the core accumulation function to be used + // for this DepthwiseConv op. + using row_accum_func_t = decltype(&FloatDepthwiseConvAccumRowGeneric); + row_accum_func_t row_accum_func = nullptr; + +#define TFMINI_USE_DEPTHWISECONV_KERNEL(ALLOW_STRIDED, FIXED_INPUT_DEPTH, \ + FIXED_DEPTH_MULTIPLIER) \ + if (!row_accum_func && (stride_width == 1 || ALLOW_STRIDED) && \ + (input_depth == FIXED_INPUT_DEPTH || FIXED_INPUT_DEPTH == 0) && \ + depth_multiplier == FIXED_DEPTH_MULTIPLIER) { \ + row_accum_func = \ + FloatDepthwiseConvAccumRow; \ + } + +#ifdef USE_NEON + // We go over our list of kernels by decreasing order of preference + // for the cases where multiple kernels could apply. + + // Start with the fastest kernels: AllowStrided=false, fixed input depth. + + TFMINI_USE_DEPTHWISECONV_KERNEL(false, 8, 1) + TFMINI_USE_DEPTHWISECONV_KERNEL(false, 2, 1) + + // Next come the strided kernels: AllowStrided=true, fixed input depth. + // They are a bit less efficient, but allow stride!=1. + + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 8, 1) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 8) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 32) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 2, 1) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 4, 1) + + // Finally, the kernels allowing a variable input depth, + // these are the least efficient but most general kernels. + + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 1) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 2) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 8) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 16) + +#endif // USE_NEON + +#undef TFMINI_USE_DEPTHWISECONV_KERNEL + + // No matching fast kernel found, use slow fallback. + if (!row_accum_func) { + row_accum_func = FloatDepthwiseConvAccumRowGeneric; + } + + // Now that we have determined row_accum_func, we can start work. + float* output_ptr = output_data; + for (int b = 0; b < batches; ++b) { + for (int out_y = 0; out_y < output_height; ++out_y) { + const int in_y_origin = (out_y * stride_height) - pad_height; + const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_end = + std::min(filter_height, input_height - in_y_origin); + for (int out_x_buffer_start = 0; out_x_buffer_start < output_width; + out_x_buffer_start += kOutputPixelsInAccBuffer) { + const int out_x_buffer_end = std::min( + output_width, out_x_buffer_start + kOutputPixelsInAccBuffer); + // We call a 'pixel' a group of activation that share all but the + // 'depth'/'channel' coordinate. num_output_pixels is the number of + // output pixels that we will accumulate in this loop iteration. + const int num_output_pixels = out_x_buffer_end - out_x_buffer_start; + // Initialize our local accumulator with the bias values, so we don't + // have to add them later. + DepthwiseConvInitAccBuffer(num_output_pixels, output_depth, bias_data, + acc_buffer); + // Accumulation loop. Most of the time should be spent in here. + for (int filter_y = filter_y_start; filter_y < filter_y_end; + ++filter_y) { + const int in_y = in_y_origin + filter_y; + row_accum_func(stride_width, input_depth, input_width, + input_data + in_y * input_dims.strides[2] + + b * input_dims.strides[3], + pad_width, depth_multiplier, filter_width, + filter_data + filter_y * filter_dims.strides[2], + out_x_buffer_start, out_x_buffer_end, output_depth, + acc_buffer); + } + // Finished accumulating. Now store to destination. + const int num_output_values = output_depth * num_output_pixels; + int i = 0; +// TODO(benoitjacob) optimized code goes here +#ifdef USE_NEON + // Handle 16 values at a time + for (; i <= num_output_values - 16; i += 16) { + float32x4_t acc[4]; + for (int k = 0; k < 4; k++) { + acc[k] = vld1q_f32(acc_buffer + i + 4 * k); + } + for (int k = 0; k < 4; k++) { + acc[k] = vmaxq_f32( + vdupq_n_f32(output_activation_min), + vminq_f32(vdupq_n_f32(output_activation_max), acc[k])); + } + for (int k = 0; k < 4; k++) { + vst1q_f32(output_ptr + 4 * k, acc[k]); + } + output_ptr += 16; + } + // Handle 4 values at a time + for (; i <= num_output_values - 4; i += 4) { + float32x4_t acc = vld1q_f32(acc_buffer + i); + + acc = vmaxq_f32(vdupq_n_f32(output_activation_min), + vminq_f32(vdupq_n_f32(output_activation_max), acc)); + + vst1q_f32(output_ptr, acc); + output_ptr += 4; + } +#endif + // Handle leftover values, one by one. This is very slow. + for (; i < num_output_values; i++) { + float acc = acc_buffer[i]; + acc = std::max(output_activation_min, + std::min(output_activation_max, acc)); + + *output_ptr++ = acc; + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int depth_multiplier, float* output_data, + const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data, + bias_dims, stride_width, stride_height, pad_width, pad_height, + depth_multiplier, output_activation_min, output_activation_max, + output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, int depth_multiplier, + float* output_data, const Dims<4>& output_dims) { + DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data, + bias_dims, stride, stride, pad_width, pad_height, + depth_multiplier, output_data, output_dims); +} + +} // namespace optimized_ops +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_DEPTHWISECONV_FLOAT_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h new file mode 100644 index 0000000000..051ed2a2c4 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h @@ -0,0 +1,1916 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_DEPTHWISECONV_UINT8_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_DEPTHWISECONV_UINT8_H_ + +#include "fixedpoint/fixedpoint.h" +#include "public/gemmlowp.h" +#include "tensorflow/contrib/lite/kernels/internal/common.h" +#include "tensorflow/contrib/lite/kernels/internal/types.h" + +namespace tflite { +namespace optimized_ops { + +// Implementation of quantized DepthwiseConv + +template +struct QuantizedDepthwiseConvKernel {}; + +#ifdef USE_NEON +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // Load the filters, add filter_offset. + uint8x8x2_t filter_u8; + filter_u8.val[0] = vld1_u8(filter_ptr); + filter_u8.val[1] = vld1_u8(filter_ptr + 8); + int16x8_t filter[2]; + for (int i = 0; i < 2; i++) { + filter[i] = vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(filter_u8.val[i])), + vdupq_n_s16(filter_offset)); + } + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) { + // Load the accumulators from acc_buffer + int32x4x2_t acc[2]; + for (int i = 0; i < 2; i++) { + acc[i].val[0] = vld1q_s32(acc_buffer_ptr + 4 * i); + acc[i].val[1] = vld1q_s32(acc_buffer_ptr + 4 * i + 8); + } + // Load the inputs, add input_offset. + const uint8x8_t input_u8 = vld1_u8(input_ptr); + input_ptr += input_ptr_increment; + const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8)); + const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset)); + // Duplicate the input values, 2-fold + const int16x8x2_t input_dup2 = vzipq_s16(input, input); + // Multiply-accumulate + for (int i = 0; i < 2; i++) { + acc[0].val[i] = vmlal_s16(acc[0].val[i], vget_low_s16(filter[i]), + vget_low_s16(input_dup2.val[i])); + acc[1].val[i] = vmlal_s16(acc[1].val[i], vget_high_s16(filter[i]), + vget_high_s16(input_dup2.val[i])); + } + // Store the accumulators back to acc_buffer + for (int i = 0; i < 2; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i].val[0]); + vst1q_s32(acc_buffer_ptr + 4 * i + 8, acc[i].val[1]); + } + acc_buffer_ptr += 16; + } + } +}; + +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // Load the filters, add filter_offset. + const uint8x8_t filter_u8 = vld1_u8(filter_ptr); + const int16x8_t filter_s16 = vreinterpretq_s16_u16(vmovl_u8(filter_u8)); + const int16x8_t filter = vaddq_s16(filter_s16, vdupq_n_s16(filter_offset)); + + int outp = 0; + // Handle 2 output pixels at a time. + for (; outp <= num_output_pixels - 2; outp += 2) { + // Load the accumulators from acc_buffer. + int32x4_t acc[4]; + for (int i = 0; i < 4; i++) { + acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i); + } + // Load the inputs, add input_offset. + uint8x8_t input_u8[2]; + for (int i = 0; i < 2; i++) { + input_u8[i] = vld1_u8(input_ptr + 8 * i); + } + input_ptr += 16; + int16x8_t input[2]; + for (int i = 0; i < 2; i++) { + input[i] = vreinterpretq_s16_u16(vmovl_u8(input_u8[i])); + } + for (int i = 0; i < 2; i++) { + input[i] = vaddq_s16(input[i], vdupq_n_s16(input_offset)); + } + // Multiply-accumulate. + acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input[0])); + acc[1] = + vmlal_s16(acc[1], vget_high_s16(filter), vget_high_s16(input[0])); + acc[2] = vmlal_s16(acc[2], vget_low_s16(filter), vget_low_s16(input[1])); + acc[3] = + vmlal_s16(acc[3], vget_high_s16(filter), vget_high_s16(input[1])); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + // Handle 1 output pixel at a time. + for (; outp < num_output_pixels; outp++) { + // Load the accumulators from acc_buffer. + int32x4_t acc[2]; + acc[0] = vld1q_s32(acc_buffer_ptr); + acc[1] = vld1q_s32(acc_buffer_ptr + 4); + + // Load the inputs, add input_offset. + const uint8x8_t input_u8 = vld1_u8(input_ptr); + input_ptr += 8; + const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8)); + const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset)); + // Multiply-accumulate. + acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input)); + acc[1] = vmlal_s16(acc[1], vget_high_s16(filter), vget_high_s16(input)); + // Store the accumulators back to acc_buffer + vst1q_s32(acc_buffer_ptr, acc[0]); + vst1q_s32(acc_buffer_ptr + 4, acc[1]); + acc_buffer_ptr += 8; + } + } +}; + +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // Load the filters, add filter_offset. + const uint8x8_t filter_u8 = vld1_u8(filter_ptr); + const int16x8_t filter_s16 = vreinterpretq_s16_u16(vmovl_u8(filter_u8)); + const int16x8_t filter = vaddq_s16(filter_s16, vdupq_n_s16(filter_offset)); + + int outp = 0; + // Handle 2 output pixels at a time. + for (; outp <= num_output_pixels - 2; outp += 2) { + // Load the accumulators from acc_buffer + int32x4_t acc[4]; + for (int i = 0; i < 4; i++) { + acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i); + } + // Load the inputs, add input_offset. + const uint8x8_t input_u8 = vld1_u8(input_ptr); + input_ptr += 8; + const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8)); + const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset)); + // Duplicate the input values, 2-fold + const int16x8x2_t input_dup2 = vzipq_s16(input, input); + // Multiply-accumulate + for (int i = 0; i < 2; i++) { + acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], vget_low_s16(filter), + vget_low_s16(input_dup2.val[i])); + acc[2 * i + 1] = vmlal_s16(acc[2 * i + 1], vget_high_s16(filter), + vget_high_s16(input_dup2.val[i])); + } + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + // Handle one output pixel at a time. + for (; outp < num_output_pixels; outp++) { + // Load the accumulators from acc_buffer + int32x4_t acc[2]; + for (int i = 0; i < 2; i++) { + acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i); + } + // Load the inputs, add input_offset. + uint8x8_t input_u8 = vdup_n_u8(0); + input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0); + input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1); + input_u8 = vset_lane_u8(input_ptr[2], input_u8, 2); + input_u8 = vset_lane_u8(input_ptr[3], input_u8, 3); + input_ptr += 4; + const int16x4_t input_s16 = + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8))); + const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset)); + // Duplicate the input values, 2-fold + const int16x4x2_t input_dup2 = vzip_s16(input, input); + // Multiply-accumulate + acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), input_dup2.val[0]); + acc[1] = vmlal_s16(acc[1], vget_high_s16(filter), input_dup2.val[1]); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 2; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 8; + } + } +}; + +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // Load the filters, add filter_offset. + int16x8_t filter[2]; + for (int i = 0; i < 2; i++) { + const uint8x8_t filter_u8 = vld1_u8(filter_ptr + 8 * i); + const int16x8_t filter_s16 = vreinterpretq_s16_u16(vmovl_u8(filter_u8)); + filter[i] = vaddq_s16(filter_s16, vdupq_n_s16(filter_offset)); + } + int outp = 0; + // Handle two output pixels at a time. + for (; outp <= num_output_pixels - 2; outp += 2) { + // Load the accumulators from acc_buffer. + int32x4_t acc[8]; + for (int i = 0; i < 8; i++) { + acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i); + } + // Load the inputs, add input_offset. + uint8x8_t input_u8 = vdup_n_u8(0); + input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0); + input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1); + input_u8 = vset_lane_u8(input_ptr[2], input_u8, 2); + input_u8 = vset_lane_u8(input_ptr[3], input_u8, 3); + input_ptr += 4; + const int16x4_t input_s16 = + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8))); + const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset)); + // Multiply-accumulate. + acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), input, 0); + acc[1] = vmlal_lane_s16(acc[1], vget_high_s16(filter[0]), input, 0); + acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), input, 1); + acc[3] = vmlal_lane_s16(acc[3], vget_high_s16(filter[1]), input, 1); + acc[4] = vmlal_lane_s16(acc[4], vget_low_s16(filter[0]), input, 2); + acc[5] = vmlal_lane_s16(acc[5], vget_high_s16(filter[0]), input, 2); + acc[6] = vmlal_lane_s16(acc[6], vget_low_s16(filter[1]), input, 3); + acc[7] = vmlal_lane_s16(acc[7], vget_high_s16(filter[1]), input, 3); + // Store the accumulators back to acc_buffer. + for (int i = 0; i < 8; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 32; + } + // Handle one output pixel at a time. + for (; outp < num_output_pixels; outp++) { + // Load the accumulators from acc_buffer. + int32x4_t acc[4]; + for (int i = 0; i < 4; i++) { + acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i); + } + // Load the inputs, add input_offset. + uint8x8_t input_u8 = vdup_n_u8(0); + input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0); + input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1); + input_ptr += 2; + const int16x4_t input_s16 = + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8))); + const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset)); + + // Multiply-accumulate. + acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), input, 0); + acc[1] = vmlal_lane_s16(acc[1], vget_high_s16(filter[0]), input, 0); + acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), input, 1); + acc[3] = vmlal_lane_s16(acc[3], vget_high_s16(filter[1]), input, 1); + + // Store the accumulators back to acc_buffer. + for (int i = 0; i < 4; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + } +}; + +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // Load the filters, add filter_offset. + uint8x8_t filter_u8 = vdup_n_u8(0); + filter_u8 = vset_lane_u8(filter_ptr[0], filter_u8, 0); + filter_u8 = vset_lane_u8(filter_ptr[1], filter_u8, 1); + filter_u8 = vset_lane_u8(filter_ptr[2], filter_u8, 2); + filter_u8 = vset_lane_u8(filter_ptr[3], filter_u8, 3); + const int16x4_t filter_s16 = + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(filter_u8))); + const int16x4_t filter = vadd_s16(filter_s16, vdup_n_s16(filter_offset)); + + int outp = 0; + // Handle 4 output pixels at a time. + for (; outp <= num_output_pixels - 4; outp += 4) { + // Load the accumulators from acc_buffer + int32x4_t acc[4]; + for (int i = 0; i < 4; i++) { + acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i); + } + + // Load the inputs, add input_offset. + const uint8x8_t input_u8 = vld1_u8(input_ptr); + input_ptr += 8; + const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8)); + const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset)); + // Duplicate the input values, 2-fold + const int16x8x2_t input_dup2 = vzipq_s16(input, input); + // Multiply-accumulate + acc[0] = vmlal_s16(acc[0], filter, vget_low_s16(input_dup2.val[0])); + acc[1] = vmlal_s16(acc[1], filter, vget_high_s16(input_dup2.val[0])); + acc[2] = vmlal_s16(acc[2], filter, vget_low_s16(input_dup2.val[1])); + acc[3] = vmlal_s16(acc[3], filter, vget_high_s16(input_dup2.val[1])); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + // Handle one output pixel at a time. + for (; outp < num_output_pixels; outp++) { + // Load the accumulators from acc_buffer + int32x4_t acc = vld1q_s32(acc_buffer_ptr); + + uint8x8_t input_u8 = vdup_n_u8(0); + input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0); + input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1); + input_ptr += 2; + const int16x4_t input_s16 = + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8))); + const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset)); + // Duplicate the input values, 2-fold + const int16x4_t input_dup2 = vzip_s16(input, input).val[0]; + // Multiply-accumulate + acc = vmlal_s16(acc, filter, input_dup2); + // Store the accumulators back to acc_buffer + vst1q_s32(acc_buffer_ptr, acc); + acc_buffer_ptr += 4; + } + } +}; + +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // Load the filters, add filter_offset. + uint8x8_t filter_u8 = vdup_n_u8(0); + filter_u8 = vset_lane_u8(filter_ptr[0], filter_u8, 0); + filter_u8 = vset_lane_u8(filter_ptr[1], filter_u8, 1); + filter_u8 = vset_lane_u8(filter_ptr[0], filter_u8, 2); + filter_u8 = vset_lane_u8(filter_ptr[1], filter_u8, 3); + const int16x4_t filter_s16 = + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(filter_u8))); + const int16x4_t filter = vadd_s16(filter_s16, vdup_n_s16(filter_offset)); + + int outp = 0; + // Handle 8 output pixels at a time. + for (; outp <= num_output_pixels - 8; outp += 8) { + // Load the accumulators from acc_buffer. + int32x4_t acc[4]; + for (int i = 0; i < 4; i++) { + acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i); + } + // Load the inputs, add input_offset. + uint8x8_t input_u8[2]; + for (int i = 0; i < 2; i++) { + input_u8[i] = vld1_u8(input_ptr + 8 * i); + } + input_ptr += 16; + int16x8_t input[2]; + for (int i = 0; i < 2; i++) { + input[i] = vreinterpretq_s16_u16(vmovl_u8(input_u8[i])); + } + for (int i = 0; i < 2; i++) { + input[i] = vaddq_s16(input[i], vdupq_n_s16(input_offset)); + } + + // Multiply-accumulate. + acc[0] = vmlal_s16(acc[0], filter, vget_low_s16(input[0])); + acc[1] = vmlal_s16(acc[1], filter, vget_high_s16(input[0])); + acc[2] = vmlal_s16(acc[2], filter, vget_low_s16(input[1])); + acc[3] = vmlal_s16(acc[3], filter, vget_high_s16(input[1])); + // Store the accumulators back to acc_buffer. + for (int i = 0; i < 4; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + // Handle 4 output pixels at a time. + for (; outp <= num_output_pixels - 4; outp += 4) { + // Load the accumulators from acc_buffer. + int32x4_t acc[2]; + for (int i = 0; i < 2; i++) { + acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i); + } + // Load the inputs, add input_offset. + const uint8x8_t input_u8 = vld1_u8(input_ptr); + input_ptr += 8; + const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8)); + const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset)); + + // Multiply-accumulate. + acc[0] = vmlal_s16(acc[0], filter, vget_low_s16(input)); + acc[1] = vmlal_s16(acc[1], filter, vget_high_s16(input)); + // Store the accumulators back to acc_buffer. + for (int i = 0; i < 2; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 8; + } + // Handle 2 output pixels at a time. + for (; outp <= num_output_pixels - 2; outp += 2) { + // Load the accumulators from acc_buffer. + int32x4_t acc = vld1q_s32(acc_buffer_ptr); + // Load the inputs, add input_offset. + uint8x8_t input_u8 = vdup_n_u8(0); + input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0); + input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1); + input_u8 = vset_lane_u8(input_ptr[2], input_u8, 2); + input_u8 = vset_lane_u8(input_ptr[3], input_u8, 3); + input_ptr += 4; + const int16x4_t input_s16 = + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8))); + const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset)); + + // Multiply-accumulate. + acc = vmlal_s16(acc, filter, input); + // Store the accumulators back to acc_buffer. + vst1q_s32(acc_buffer_ptr, acc); + acc_buffer_ptr += 4; + } + // Handle 1 output pixel at a time. + for (; outp < num_output_pixels; outp++) { + // Load the accumulators from acc_buffer. + int32x2_t acc = vld1_s32(acc_buffer_ptr); + // Load the inputs, add input_offset. + uint8x8_t input_u8 = vdup_n_u8(0); + input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0); + input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1); + input_ptr += 2; + const int16x4_t input_s16 = + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8))); + const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset)); + + // Multiply-accumulate. + acc = vget_low_s32(vmlal_s16(vcombine_s32(acc, acc), filter, input)); + // Store the accumulators back to acc_buffer. + vst1_s32(acc_buffer_ptr, acc); + acc_buffer_ptr += 2; + } + } +}; + +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // Load the filters, add filter_offset. + uint8x8_t filter_u8 = vdup_n_u8(0); + filter_u8 = vset_lane_u8(filter_ptr[0], filter_u8, 0); + filter_u8 = vset_lane_u8(filter_ptr[1], filter_u8, 1); + filter_u8 = vset_lane_u8(filter_ptr[0], filter_u8, 2); + filter_u8 = vset_lane_u8(filter_ptr[1], filter_u8, 3); + const int16x4_t filter_s16 = + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(filter_u8))); + const int16x4_t filter = vadd_s16(filter_s16, vdup_n_s16(filter_offset)); + + int outp = 0; + // Handle 8 output pixels at a time. + for (; outp <= num_output_pixels - 8; outp += 8) { + // Load the accumulators from acc_buffer + int32x4_t acc[4]; + for (int i = 0; i < 4; i++) { + acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i); + } + + // Load the inputs, add input_offset. + const uint8x8_t input_u8 = vld1_u8(input_ptr); + input_ptr += 8; + const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8)); + const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset)); + // Duplicate the input values, 2-fold + const int16x8x2_t input_dup2 = vzipq_s16(input, input); + // Multiply-accumulate + acc[0] = vmlal_s16(acc[0], filter, vget_low_s16(input_dup2.val[0])); + acc[1] = vmlal_s16(acc[1], filter, vget_high_s16(input_dup2.val[0])); + acc[2] = vmlal_s16(acc[2], filter, vget_low_s16(input_dup2.val[1])); + acc[3] = vmlal_s16(acc[3], filter, vget_high_s16(input_dup2.val[1])); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + // Handle one output pixel at a time. + for (; outp < num_output_pixels; outp++) { + // Load the accumulators from acc_buffer + int32x2_t acc = vld1_s32(acc_buffer_ptr); + + // Load the inputs, add input_offset. + const uint32 input = *input_ptr++ + input_offset; + + // Multiply-accumulate + acc = vget_low_s32(vmlal_n_s16(vcombine_s32(acc, acc), filter, input)); + // Store the accumulators back to acc_buffer + vst1_s32(acc_buffer_ptr, acc); + acc_buffer_ptr += 2; + } + } +}; + +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // Load the filters, add filter_offset. + uint8x8_t filter_u8 = vdup_n_u8(0); + filter_u8 = vset_lane_u8(filter_ptr[0], filter_u8, 0); + filter_u8 = vset_lane_u8(filter_ptr[1], filter_u8, 1); + filter_u8 = vset_lane_u8(filter_ptr[2], filter_u8, 2); + filter_u8 = vset_lane_u8(filter_ptr[3], filter_u8, 3); + const int16x4_t filter_s16 = + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(filter_u8))); + const int16x4_t filter = vadd_s16(filter_s16, vdup_n_s16(filter_offset)); + + int outp = 0; + // Handle 8 output pixels at a time. + for (; outp <= num_output_pixels - 8; outp += 8) { + // Load the accumulators from acc_buffer + int32x4_t acc[8]; + for (int i = 0; i < 8; i++) { + acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i); + } + + // Load the inputs, add input_offset. + uint8x8_t input_u8 = vld1_u8(input_ptr); + input_ptr += 8; + const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8)); + const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset)); + + // Multiply-accumulate + acc[0] = vmlal_lane_s16(acc[0], filter, vget_low_s16(input), 0); + acc[1] = vmlal_lane_s16(acc[1], filter, vget_low_s16(input), 1); + acc[2] = vmlal_lane_s16(acc[2], filter, vget_low_s16(input), 2); + acc[3] = vmlal_lane_s16(acc[3], filter, vget_low_s16(input), 3); + acc[4] = vmlal_lane_s16(acc[4], filter, vget_high_s16(input), 0); + acc[5] = vmlal_lane_s16(acc[5], filter, vget_high_s16(input), 1); + acc[6] = vmlal_lane_s16(acc[6], filter, vget_high_s16(input), 2); + acc[7] = vmlal_lane_s16(acc[7], filter, vget_high_s16(input), 3); + + // Store the accumulators back to acc_buffer + for (int i = 0; i < 8; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 32; + } + // Handle 4 output pixels at a time. + for (; outp <= num_output_pixels - 4; outp += 4) { + // Load the accumulators from acc_buffer + int32x4_t acc[4]; + for (int i = 0; i < 4; i++) { + acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i); + } + + // Load the inputs, add input_offset. + uint8x8_t input_u8 = vdup_n_u8(0); + input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0); + input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1); + input_u8 = vset_lane_u8(input_ptr[2], input_u8, 2); + input_u8 = vset_lane_u8(input_ptr[3], input_u8, 3); + input_ptr += 4; + const int16x4_t input_s16 = + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8))); + const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset)); + + // Multiply-accumulate + acc[0] = vmlal_lane_s16(acc[0], filter, input, 0); + acc[1] = vmlal_lane_s16(acc[1], filter, input, 1); + acc[2] = vmlal_lane_s16(acc[2], filter, input, 2); + acc[3] = vmlal_lane_s16(acc[3], filter, input, 3); + + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + // Handle one output pixel at a time. + for (; outp < num_output_pixels; outp++) { + // Load the accumulators from acc_buffer + int32x4_t acc = vld1q_s32(acc_buffer_ptr); + + // Load the inputs, add input_offset. + const uint32 input = *input_ptr++ + input_offset; + + // Multiply-accumulate + acc = vmlal_n_s16(acc, filter, input); + // Store the accumulators back to acc_buffer + vst1q_s32(acc_buffer_ptr, acc); + acc_buffer_ptr += 4; + } + } +}; + +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // Load the filters, add filter_offset. + uint8x8_t filter_u8 = vdup_n_u8(0); + filter_u8 = vset_lane_u8(filter_ptr[0], filter_u8, 0); + filter_u8 = vset_lane_u8(filter_ptr[1], filter_u8, 1); + filter_u8 = vset_lane_u8(filter_ptr[2], filter_u8, 2); + filter_u8 = vset_lane_u8(filter_ptr[3], filter_u8, 3); + const int16x4_t filter_s16 = + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(filter_u8))); + const int16x4_t filter = vadd_s16(filter_s16, vdup_n_s16(filter_offset)); + + int outp = 0; + // Handle 4 output pixels at a time. + for (; outp <= num_output_pixels - 4; outp += 4) { + // Load the accumulators from acc_buffer + int32x4_t acc[4]; + for (int i = 0; i < 4; i++) { + acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i); + } + // Load the inputs, add input_offset. + int16x8_t input[2]; + for (int i = 0; i < 2; i++) { + const uint8x8_t input_u8 = vld1_u8(input_ptr + 8 * i); + const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8)); + input[i] = vaddq_s16(input_s16, vdupq_n_s16(input_offset)); + } + input_ptr += 16; + // Multiply-accumulate + for (int i = 0; i < 2; i++) { + acc[2 * i + 0] = + vmlal_s16(acc[2 * i + 0], filter, vget_low_s16(input[i])); + acc[2 * i + 1] = + vmlal_s16(acc[2 * i + 1], filter, vget_high_s16(input[i])); + } + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + // Handle one output pixel at a time. + for (; outp < num_output_pixels; outp++) { + // Load the accumulators from acc_buffer + int32x4_t acc; + acc = vld1q_s32(acc_buffer_ptr); + + // Load the inputs, add input_offset. + uint8x8_t input_u8 = vdup_n_u8(0); + input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0); + input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1); + input_u8 = vset_lane_u8(input_ptr[2], input_u8, 2); + input_u8 = vset_lane_u8(input_ptr[3], input_u8, 3); + input_ptr += 4; + const int16x4_t input_s16 = + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8))); + const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset)); + // Multiply-accumulate + acc = vmlal_s16(acc, filter, input); + // Store the accumulators back to acc_buffer + vst1q_s32(acc_buffer_ptr, acc); + acc_buffer_ptr += 4; + } + } +}; + +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // Load the filters, add filter_offset. + int16x8_t filter[2]; + for (int i = 0; i < 2; i++) { + const uint8x8_t filter_u8 = vld1_u8(filter_ptr + 8 * i); + const int16x8_t filter_s16 = vreinterpretq_s16_u16(vmovl_u8(filter_u8)); + filter[i] = vaddq_s16(filter_s16, vdupq_n_s16(filter_offset)); + } + + int outp = 0; + // Handle 2 output pixels at a time. + for (; outp <= num_output_pixels - 2; outp += 2) { + // Load the accumulators from acc_buffer + int32x4_t acc[8]; + for (int i = 0; i < 8; i++) { + acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i); + } + + // Load the inputs, add input_offset. + uint8x8_t input_u8 = vld1_u8(input_ptr); + input_ptr += 8; + const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8)); + const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset)); + + // Multiply-accumulate + acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), + vget_low_s16(input), 0); + acc[1] = vmlal_lane_s16(acc[1], vget_high_s16(filter[0]), + vget_low_s16(input), 1); + acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), + vget_low_s16(input), 2); + acc[3] = vmlal_lane_s16(acc[3], vget_high_s16(filter[1]), + vget_low_s16(input), 3); + acc[4] = vmlal_lane_s16(acc[4], vget_low_s16(filter[0]), + vget_high_s16(input), 0); + acc[5] = vmlal_lane_s16(acc[5], vget_high_s16(filter[0]), + vget_high_s16(input), 1); + acc[6] = vmlal_lane_s16(acc[6], vget_low_s16(filter[1]), + vget_high_s16(input), 2); + acc[7] = vmlal_lane_s16(acc[7], vget_high_s16(filter[1]), + vget_high_s16(input), 3); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 8; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 32; + } + // Handle one output pixel at a time. + for (; outp < num_output_pixels; outp++) { + // Load the accumulators from acc_buffer + int32x4_t acc[4]; + for (int i = 0; i < 4; i++) { + acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i); + } + + // Load the inputs, add input_offset. + uint8x8_t input_u8 = vdup_n_u8(0); + input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0); + input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1); + input_u8 = vset_lane_u8(input_ptr[2], input_u8, 2); + input_u8 = vset_lane_u8(input_ptr[3], input_u8, 3); + input_ptr += 4; + const int16x4_t input_s16 = + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8))); + const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset)); + + // Multiply-accumulate + acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), input, 0); + acc[1] = vmlal_lane_s16(acc[1], vget_high_s16(filter[0]), input, 1); + acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), input, 2); + acc[3] = vmlal_lane_s16(acc[3], vget_high_s16(filter[1]), input, 3); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + } +}; + +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // We will have to duplicate bytes in a NEON register, 3-fold. + // We will do that by register-level table-look-up using VTBL instructions. + // Here we prepare the registers containing the table-lookup indices. + static const uint8 dup3_indices_array[3][8] = {{0, 0, 0, 1, 1, 1, 2, 2}, + {2, 3, 3, 3, 4, 4, 4, 5}, + {5, 5, 6, 6, 6, 7, 7, 7}}; + uint8x8_t dup3_indices[3]; + for (int i = 0; i < 3; i++) { + dup3_indices[i] = vld1_u8(dup3_indices_array[i]); + } + + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) { + const uint8* local_filter_ptr = filter_ptr; + const uint8* local_input_ptr = input_ptr; + int ic = 0; + // Handle 8 input channels at a time. + for (; ic <= input_depth - 8; ic += 8) { + // Load the filters, add filter_offset. + int16x8_t filter[3]; + uint8x8x3_t filter_u8; + filter_u8.val[0] = vld1_u8(local_filter_ptr); + filter_u8.val[1] = vld1_u8(local_filter_ptr + 8); + filter_u8.val[2] = vld1_u8(local_filter_ptr + 16); + local_filter_ptr += 24; + for (int i = 0; i < 3; i++) { + const int16x8_t filter_s16 = + vreinterpretq_s16_u16(vmovl_u8(filter_u8.val[i])); + filter[i] = vaddq_s16(filter_s16, vdupq_n_s16(filter_offset)); + } + // Load the inputs, duplicate 3-fold, add input_offset. + const uint8x8_t input_u8 = vld1_u8(local_input_ptr); + local_input_ptr += 8; + + uint8x8_t input_u8_dup3[3]; + for (int i = 0; i < 3; i++) { + input_u8_dup3[i] = vtbl1_u8(input_u8, dup3_indices[i]); + } + int16x8_t input_dup3[3]; + for (int i = 0; i < 3; i++) { + const int16x8_t input_s16_dup3 = + vreinterpretq_s16_u16(vmovl_u8(input_u8_dup3[i])); + input_dup3[i] = vaddq_s16(input_s16_dup3, vdupq_n_s16(input_offset)); + } + // Load the accumulators from acc_buffer + int32x4x3_t acc[2]; + for (int i = 0; i < 2; i++) { + acc[i].val[0] = vld1q_s32(acc_buffer_ptr + 4 * i); + acc[i].val[1] = vld1q_s32(acc_buffer_ptr + 4 * i + 8); + acc[i].val[2] = vld1q_s32(acc_buffer_ptr + 4 * i + 16); + } + // Multiply-accumulate + for (int j = 0; j < 3; j++) { + acc[0].val[j] = vmlal_s16(acc[0].val[j], vget_low_s16(input_dup3[j]), + vget_low_s16(filter[j])); + acc[1].val[j] = vmlal_s16(acc[1].val[j], vget_high_s16(input_dup3[j]), + vget_high_s16(filter[j])); + } + // Store the accumulators back to acc_buffer + for (int i = 0; i < 2; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i].val[0]); + vst1q_s32(acc_buffer_ptr + 4 * i + 8, acc[i].val[1]); + vst1q_s32(acc_buffer_ptr + 4 * i + 16, acc[i].val[2]); + } + acc_buffer_ptr += 24; + } + // Handle one input channel at a time. + for (; ic < input_depth; ic++) { + const int16 input_val = *local_input_ptr++ + input_offset; + for (int i = 0; i < 3; i++) { + const int16 filter_val = local_filter_ptr[i] + filter_offset; + *acc_buffer_ptr++ += static_cast(filter_val) * input_val; + } + local_filter_ptr += 3; + } + input_ptr += input_ptr_increment; + } + } +}; + +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) { + const uint8* local_filter_ptr = filter_ptr; + const uint8* local_input_ptr = input_ptr; + int ic = 0; + // Handle 8 input channels at a time. + for (; ic <= input_depth - 8; ic += 8) { + // Load the filters, add filter_offset. + int16x8_t filter[2]; + uint8x8x2_t filter_u8; + filter_u8.val[0] = vld1_u8(local_filter_ptr); + filter_u8.val[1] = vld1_u8(local_filter_ptr + 8); + local_filter_ptr += 16; + for (int i = 0; i < 2; i++) { + const int16x8_t filter_s16 = + vreinterpretq_s16_u16(vmovl_u8(filter_u8.val[i])); + filter[i] = vaddq_s16(filter_s16, vdupq_n_s16(filter_offset)); + } + // Load the inputs, add input_offset, duplicate 2-fold. + const uint8x8_t input_u8 = vld1_u8(local_input_ptr); + local_input_ptr += 8; + const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8)); + const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset)); + const int16x8x2_t input_dup2 = vzipq_s16(input, input); + // Load the accumulators from acc_buffer. + int32x4x2_t acc[2]; + for (int i = 0; i < 2; i++) { + acc[i].val[0] = vld1q_s32(acc_buffer_ptr + 4 * i); + acc[i].val[1] = vld1q_s32(acc_buffer_ptr + 4 * i + 8); + } + // Multiply-accumulate. + for (int j = 0; j < 2; j++) { + acc[0].val[j] = vmlal_s16(acc[0].val[j], vget_low_s16(filter[j]), + vget_low_s16(input_dup2.val[j])); + acc[1].val[j] = vmlal_s16(acc[1].val[j], vget_high_s16(filter[j]), + vget_high_s16(input_dup2.val[j])); + } + // Store the accumulators back to acc_buffer. + for (int i = 0; i < 2; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i].val[0]); + vst1q_s32(acc_buffer_ptr + 4 * i + 8, acc[i].val[1]); + } + acc_buffer_ptr += 16; + } + // Handle one input channel at a time. + for (; ic < input_depth; ic++) { + // Load the inputs. + const int16 input_val = *local_input_ptr++ + input_offset; + for (int i = 0; i < 2; i++) { + const int16 filter_val = local_filter_ptr[i] + filter_offset; + *acc_buffer_ptr++ += static_cast(filter_val) * input_val; + } + local_filter_ptr += 2; + } + input_ptr += input_ptr_increment; + } + } +}; + +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) { + const uint8* local_filter_ptr = filter_ptr; + const uint8* local_input_ptr = input_ptr; + int ic = 0; + // Handle 16 input channels at a time. + for (; ic <= input_depth - 16; ic += 16) { + // Load the filters, add filter_offset. + uint8x8_t filter_u8_0 = vld1_u8(local_filter_ptr + 8 * 0); + uint8x8_t filter_u8_1 = vld1_u8(local_filter_ptr + 8 * 1); + local_filter_ptr += 16; + int16x8_t filter_0 = vreinterpretq_s16_u16(vmovl_u8(filter_u8_0)); + int16x8_t filter_1 = vreinterpretq_s16_u16(vmovl_u8(filter_u8_1)); + filter_0 = vaddq_s16(filter_0, vdupq_n_s16(filter_offset)); + filter_1 = vaddq_s16(filter_1, vdupq_n_s16(filter_offset)); + // Load the inputs, add input_offset. + uint8x8_t input_u8_0 = vld1_u8(local_input_ptr + 8 * 0); + uint8x8_t input_u8_1 = vld1_u8(local_input_ptr + 8 * 1); + local_input_ptr += 16; + int16x8_t input_0 = vreinterpretq_s16_u16(vmovl_u8(input_u8_0)); + int16x8_t input_1 = vreinterpretq_s16_u16(vmovl_u8(input_u8_1)); + input_0 = vaddq_s16(input_0, vdupq_n_s16(input_offset)); + input_1 = vaddq_s16(input_1, vdupq_n_s16(input_offset)); + // Load the accumulators from acc_buffer + int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0); + int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1); + int32x4_t acc_2 = vld1q_s32(acc_buffer_ptr + 4 * 2); + int32x4_t acc_3 = vld1q_s32(acc_buffer_ptr + 4 * 3); + acc_0 = vmlal_s16(acc_0, vget_low_s16(input_0), vget_low_s16(filter_0)); + acc_1 = + vmlal_s16(acc_1, vget_high_s16(input_0), vget_high_s16(filter_0)); + acc_2 = vmlal_s16(acc_2, vget_low_s16(input_1), vget_low_s16(filter_1)); + acc_3 = + vmlal_s16(acc_3, vget_high_s16(input_1), vget_high_s16(filter_1)); + // Store the accumulators back to acc_buffer + vst1q_s32(acc_buffer_ptr + 4 * 0, acc_0); + vst1q_s32(acc_buffer_ptr + 4 * 1, acc_1); + vst1q_s32(acc_buffer_ptr + 4 * 2, acc_2); + vst1q_s32(acc_buffer_ptr + 4 * 3, acc_3); + acc_buffer_ptr += 16; + } + // Handle 8 input channels at a time. + for (; ic <= input_depth - 8; ic += 8) { + // Load the filters, add filter_offset. + const uint8x8_t filter_u8 = vld1_u8(local_filter_ptr); + local_filter_ptr += 8; + const int16x8_t filter_s16 = vreinterpretq_s16_u16(vmovl_u8(filter_u8)); + const int16x8_t filter = + vaddq_s16(filter_s16, vdupq_n_s16(filter_offset)); + // Load the inputs, add input_offset. + const uint8x8_t input_u8 = vld1_u8(local_input_ptr); + local_input_ptr += 8; + const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8)); + const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset)); + // Load the accumulators from acc_buffer + int32x4_t acc[2]; + for (int i = 0; i < 2; i++) { + acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + acc[0] = vmlal_s16(acc[0], vget_low_s16(input), vget_low_s16(filter)); + acc[1] = vmlal_s16(acc[1], vget_high_s16(input), vget_high_s16(filter)); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 2; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 8; + } + // Handle one input channel at a time. + for (; ic < input_depth; ic++) { + const int16 input_val = *local_input_ptr++ + input_offset; + const int16 filter_val = *local_filter_ptr++ + filter_offset; + *acc_buffer_ptr++ += static_cast(filter_val) * input_val; + } + input_ptr += input_ptr_increment; + } + } +}; + +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // Load the filters, add filter_offset. + uint8x8_t filter_u8[2]; + for (int i = 0; i < 2; i++) { + filter_u8[i] = vld1_u8(filter_ptr + 8 * i); + } + int16x8_t filter[2]; + for (int i = 0; i < 2; i++) { + filter[i] = vreinterpretq_s16_u16(vmovl_u8(filter_u8[i])); + } + for (int i = 0; i < 2; i++) { + filter[i] = vaddq_s16(filter[i], vdupq_n_s16(filter_offset)); + } + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) { + // Load the inputs, add input_offset. + uint8x8_t input_u8[2]; + for (int i = 0; i < 2; i++) { + input_u8[i] = vld1_u8(input_ptr + 8 * i); + } + input_ptr += input_ptr_increment; + int16x8_t input[2]; + for (int i = 0; i < 2; i++) { + input[i] = vreinterpretq_s16_u16(vmovl_u8(input_u8[i])); + } + for (int i = 0; i < 2; i++) { + input[i] = vaddq_s16(input[i], vdupq_n_s16(input_offset)); + } + // Load the accumulators from acc_buffer + int32x4_t acc[4]; + for (int i = 0; i < 4; i++) { + acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + for (int i = 0; i < 2; i++) { + acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], vget_low_s16(input[i]), + vget_low_s16(filter[i])); + acc[2 * i + 1] = vmlal_s16(acc[2 * i + 1], vget_high_s16(input[i]), + vget_high_s16(filter[i])); + } + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + } +}; + +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // Load the filters, add filter_offset. + const uint8x8_t filter_u8 = vld1_u8(filter_ptr); + const int16x8_t filter_s16 = vreinterpretq_s16_u16(vmovl_u8(filter_u8)); + const int16x8_t filter = vaddq_s16(filter_s16, vdupq_n_s16(filter_offset)); + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) { + // Load the inputs, add input_offset. + const uint8x8_t input_u8 = vld1_u8(input_ptr); + const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8)); + const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset)); + // Load the accumulators from acc_buffer + int32x4_t acc[2]; + for (int i = 0; i < 2; i++) { + acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + acc[0] = vmlal_s16(acc[0], vget_low_s16(input), vget_low_s16(filter)); + acc[1] = vmlal_s16(acc[1], vget_high_s16(input), vget_high_s16(filter)); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 2; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 8; + input_ptr += input_ptr_increment; + } + } +}; + +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // Load the filters, add filter_offset. + uint8x8_t filter_u8[2]; + for (int i = 0; i < 2; i++) { + filter_u8[i] = vld1_u8(filter_ptr + 8 * i); + } + int16x8_t filter[2]; + for (int i = 0; i < 2; i++) { + filter[i] = vreinterpretq_s16_u16(vmovl_u8(filter_u8[i])); + } + for (int i = 0; i < 2; i++) { + filter[i] = vaddq_s16(filter[i], vdupq_n_s16(filter_offset)); + } + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) { + uint8 input_u8 = *input_ptr; + input_ptr += input_ptr_increment; + int16 input = static_cast(input_u8 + input_offset); + // Load the accumulators from acc_buffer + int32x4_t acc[4]; + for (int i = 0; i < 4; i++) { + acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + for (int i = 0; i < 2; i++) { + acc[2 * i + 0] = + vmlal_n_s16(acc[2 * i + 0], vget_low_s16(filter[i]), input); + acc[2 * i + 1] = + vmlal_n_s16(acc[2 * i + 1], vget_high_s16(filter[i]), input); + } + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + } +}; + +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // Load the filters, add filter_offset. + uint8x8_t filter_u8_0 = vld1_u8(filter_ptr + 8 * 0); + uint8x8_t filter_u8_1 = vld1_u8(filter_ptr + 8 * 1); + uint8x8_t filter_u8_2 = vld1_u8(filter_ptr + 8 * 2); + uint8x8_t filter_u8_3 = vld1_u8(filter_ptr + 8 * 3); + int16x8_t filter_0 = vreinterpretq_s16_u16(vmovl_u8(filter_u8_0)); + int16x8_t filter_1 = vreinterpretq_s16_u16(vmovl_u8(filter_u8_1)); + int16x8_t filter_2 = vreinterpretq_s16_u16(vmovl_u8(filter_u8_2)); + int16x8_t filter_3 = vreinterpretq_s16_u16(vmovl_u8(filter_u8_3)); + filter_0 = vaddq_s16(filter_0, vdupq_n_s16(filter_offset)); + filter_1 = vaddq_s16(filter_1, vdupq_n_s16(filter_offset)); + filter_2 = vaddq_s16(filter_2, vdupq_n_s16(filter_offset)); + filter_3 = vaddq_s16(filter_3, vdupq_n_s16(filter_offset)); + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) { + uint8 input_u8 = *input_ptr; + input_ptr += input_ptr_increment; + int16 input = static_cast(input_u8 + input_offset); + // Load the accumulators from acc_buffer + int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0); + int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1); + int32x4_t acc_2 = vld1q_s32(acc_buffer_ptr + 4 * 2); + int32x4_t acc_3 = vld1q_s32(acc_buffer_ptr + 4 * 3); + int32x4_t acc_4 = vld1q_s32(acc_buffer_ptr + 4 * 4); + int32x4_t acc_5 = vld1q_s32(acc_buffer_ptr + 4 * 5); + int32x4_t acc_6 = vld1q_s32(acc_buffer_ptr + 4 * 6); + int32x4_t acc_7 = vld1q_s32(acc_buffer_ptr + 4 * 7); + // Multiply-accumulate + acc_0 = vmlal_n_s16(acc_0, vget_low_s16(filter_0), input); + acc_1 = vmlal_n_s16(acc_1, vget_high_s16(filter_0), input); + acc_2 = vmlal_n_s16(acc_2, vget_low_s16(filter_1), input); + acc_3 = vmlal_n_s16(acc_3, vget_high_s16(filter_1), input); + acc_4 = vmlal_n_s16(acc_4, vget_low_s16(filter_2), input); + acc_5 = vmlal_n_s16(acc_5, vget_high_s16(filter_2), input); + acc_6 = vmlal_n_s16(acc_6, vget_low_s16(filter_3), input); + acc_7 = vmlal_n_s16(acc_7, vget_high_s16(filter_3), input); + // Store the accumulators back to acc_buffer + vst1q_s32(acc_buffer_ptr + 4 * 0, acc_0); + vst1q_s32(acc_buffer_ptr + 4 * 1, acc_1); + vst1q_s32(acc_buffer_ptr + 4 * 2, acc_2); + vst1q_s32(acc_buffer_ptr + 4 * 3, acc_3); + vst1q_s32(acc_buffer_ptr + 4 * 4, acc_4); + vst1q_s32(acc_buffer_ptr + 4 * 5, acc_5); + vst1q_s32(acc_buffer_ptr + 4 * 6, acc_6); + vst1q_s32(acc_buffer_ptr + 4 * 7, acc_7); + acc_buffer_ptr += 32; + } + } +}; + +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // Load the filters, add filter_offset. + const uint8x8_t filter_u8 = vld1_u8(filter_ptr); + const int16x8_t filter = vaddq_s16( + vreinterpretq_s16_u16(vmovl_u8(filter_u8)), vdupq_n_s16(filter_offset)); + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) { + uint8 input_u8 = *input_ptr; + input_ptr += input_ptr_increment; + int16 input = static_cast(input_u8 + input_offset); + // Load the accumulators from acc_buffer + int32x4_t acc[2]; + for (int i = 0; i < 2; i++) { + acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + acc[0] = vmlal_n_s16(acc[0], vget_low_s16(filter), input); + acc[1] = vmlal_n_s16(acc[1], vget_high_s16(filter), input); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 2; i++) { + vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 8; + } + } +}; + +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // Load the filters, add filter_offset. + uint8x8_t filter_u8 = vdup_n_u8(0); + filter_u8 = vset_lane_u8(filter_ptr[0], filter_u8, 0); + filter_u8 = vset_lane_u8(filter_ptr[1], filter_u8, 1); + filter_u8 = vset_lane_u8(filter_ptr[0], filter_u8, 2); + filter_u8 = vset_lane_u8(filter_ptr[1], filter_u8, 3); + const int16x4_t filter_s16 = + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(filter_u8))); + const int16x4_t filter = vadd_s16(filter_s16, vdup_n_s16(filter_offset)); + + int outp = 0; + + // Handle 2 output pixels at a time. + for (; outp <= num_output_pixels - 2; outp += 2) { + // Load the accumulators from acc_buffer. + int32x4_t acc = vld1q_s32(acc_buffer_ptr); + // Load the inputs, add input_offset. + uint16x4_t input_u16 = vdup_n_u16(0); + input_u16 = vset_lane_u16((reinterpret_cast(input_ptr))[0], + input_u16, 0); + input_ptr += input_ptr_increment; + input_u16 = vset_lane_u16((reinterpret_cast(input_ptr))[0], + input_u16, 1); + input_ptr += input_ptr_increment; + const int16x4_t input_s16 = vreinterpret_s16_u16( + vget_low_u16(vmovl_u8(vreinterpret_u8_u16(input_u16)))); + const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset)); + + // Multiply-accumulate. + acc = vmlal_s16(acc, filter, input); + // Store the accumulators back to acc_buffer. + vst1q_s32(acc_buffer_ptr, acc); + acc_buffer_ptr += 4; + } + + // Handle 1 output pixel at a time. + for (; outp < num_output_pixels; outp++) { + // Load the accumulators from acc_buffer. + int32x2_t acc = vld1_s32(acc_buffer_ptr); + // Load the inputs, add input_offset. + uint8x8_t input_u8 = vdup_n_u8(0); + input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0); + input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1); + input_ptr += input_ptr_increment; + const int16x4_t input_s16 = + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8))); + const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset)); + + // Multiply-accumulate. + acc = vget_low_s32(vmlal_s16(vcombine_s32(acc, acc), filter, input)); + // Store the accumulators back to acc_buffer. + vst1_s32(acc_buffer_ptr, acc); + acc_buffer_ptr += 2; + } + } +}; + +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + if (num_output_pixels <= 0) { + return; + } + + // Load the filters, add filter_offset. + uint8x8_t filter_u8 = vdup_n_u8(0); + filter_u8 = vset_lane_u8(filter_ptr[0], filter_u8, 0); + filter_u8 = vset_lane_u8(filter_ptr[1], filter_u8, 1); + filter_u8 = vset_lane_u8(filter_ptr[2], filter_u8, 2); + filter_u8 = vset_lane_u8(filter_ptr[3], filter_u8, 3); + const int16x4_t filter_s16 = + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(filter_u8))); + const int16x4_t filter = vadd_s16(filter_s16, vdup_n_s16(filter_offset)); + + int outp = 0; + + // Handle one output pixel at a time until second to the last pixel. Second + // to the last because we read eight input pixels while only processing + // four. + for (; outp < num_output_pixels - 1; outp++) { + // Load the accumulators from acc_buffer + int32x4_t acc; + acc = vld1q_s32(acc_buffer_ptr); + + // Load the inputs, add input_offset. + uint8x8_t input_u8 = vld1_u8(input_ptr); + input_ptr += input_ptr_increment; + const int16x4_t input_s16 = + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8))); + const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset)); + // Multiply-accumulate + acc = vmlal_s16(acc, filter, input); + // Store the accumulators back to acc_buffer + vst1q_s32(acc_buffer_ptr, acc); + acc_buffer_ptr += 4; + } + + // Handle the last output pixel. + // Load the accumulators from acc_buffer + int32x4_t acc; + acc = vld1q_s32(acc_buffer_ptr); + + // Load the inputs, add input_offset. + uint8x8_t input_u8 = vdup_n_u8(0); + input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0); + input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1); + input_u8 = vset_lane_u8(input_ptr[2], input_u8, 2); + input_u8 = vset_lane_u8(input_ptr[3], input_u8, 3); + const int16x4_t input_s16 = + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8))); + const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset)); + // Multiply-accumulate + acc = vmlal_s16(acc, filter, input); + // Store the accumulators back to acc_buffer + vst1q_s32(acc_buffer_ptr, acc); + } +}; + +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // Load the filters, add filter_offset. + uint8x8_t filter_u8_0 = vld1_u8(filter_ptr); + uint8x8_t filter_u8_1 = vld1_u8(filter_ptr + 4); + int16x8_t filter_s16_0 = vreinterpretq_s16_u16(vmovl_u8(filter_u8_0)); + int16x8_t filter_s16_1 = vreinterpretq_s16_u16(vmovl_u8(filter_u8_1)); + filter_s16_0 = vaddq_s16(filter_s16_0, vdupq_n_s16(filter_offset)); + filter_s16_1 = vaddq_s16(filter_s16_1, vdupq_n_s16(filter_offset)); + int16x4_t filter_0 = vget_low_s16(filter_s16_0); + int16x4_t filter_1 = vget_high_s16(filter_s16_0); + int16x4_t filter_2 = vget_high_s16(filter_s16_1); + + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) { + // Load the inputs, add input_offset. + uint8x8_t input_u8_0 = vld1_u8(input_ptr); + uint8x8_t input_u8_1 = vld1_u8(input_ptr + 4); + input_ptr += input_ptr_increment; + int16x8_t input_0 = vreinterpretq_s16_u16(vmovl_u8(input_u8_0)); + int16x8_t input_1 = vreinterpretq_s16_u16(vmovl_u8(input_u8_1)); + input_0 = vaddq_s16(input_0, vdupq_n_s16(input_offset)); + input_1 = vaddq_s16(input_1, vdupq_n_s16(input_offset)); + + // Load the accumulators from acc_buffer + int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0); + int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1); + int32x4_t acc_2 = vld1q_s32(acc_buffer_ptr + 4 * 2); + + // Multiply-accumulate + acc_0 = vmlal_s16(acc_0, vget_low_s16(input_0), filter_0); + acc_1 = vmlal_s16(acc_1, vget_high_s16(input_0), filter_1); + acc_2 = vmlal_s16(acc_2, vget_high_s16(input_1), filter_2); + + // Store the accumulators back to acc_buffer + vst1q_s32(acc_buffer_ptr + 4 * 0, acc_0); + vst1q_s32(acc_buffer_ptr + 4 * 1, acc_1); + vst1q_s32(acc_buffer_ptr + 4 * 2, acc_2); + + acc_buffer_ptr += 12; + } + } +}; +#endif + +// Accumulates the effect of one row of the filter, on a segment of one row +// of the output, accessing the corresponding one row of the input. +template +void QuantizedDepthwiseConvAccumRow( + int stride, int input_depth, int input_width, const uint8* input_data, + int16 input_offset, int pad_width, int depth_multiplier, int filter_width, + const uint8* filter_data, int16 filter_offset, int out_x_buffer_start, + int out_x_buffer_end, int output_depth, int32* acc_buffer) { +#ifdef GEMMLOWP_PROFILING + gemmlowp::ScopedProfilingLabel label(__PRETTY_FUNCTION__); +#endif + // Sanity check parameters. This is important in particular to ensure + // that we keep the number of template instantiations minimal, so we don't + // increase binary size unnecessarily. + static_assert(kFixedDepthMultiplier || !kFixedInputDepth, ""); + static_assert(kFixedInputDepth || kAllowStrided, ""); + TFLITE_DCHECK(stride == 1 || kAllowStrided); + if (kFixedInputDepth) { + TFLITE_DCHECK_EQ(input_depth, kFixedInputDepth); + } + if (kFixedDepthMultiplier) { + TFLITE_DCHECK_EQ(depth_multiplier, kFixedDepthMultiplier); + } + TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); + const int input_ptr_increment = stride * input_depth; + const uint8* filter_base_ptr = filter_data; + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + // For the current (filter_x, filter_y) point in the filter, + // compute the boundaries of the corresponding output row segment. + int out_x_loop_start_unclampled = 0; + int out_x_loop_end_unclampled = 0; + if (kAllowStrided) { + if (stride == 2) { + out_x_loop_start_unclampled = (pad_width - filter_x + 1) / 2; + out_x_loop_end_unclampled = + (pad_width + input_width - filter_x + 1) / 2; + } else if (stride == 4) { + out_x_loop_start_unclampled = (pad_width - filter_x + 3) / 4; + out_x_loop_end_unclampled = + (pad_width + input_width - filter_x + 3) / 4; + } else { + out_x_loop_start_unclampled = + (pad_width - filter_x + stride - 1) / stride; + out_x_loop_end_unclampled = + (pad_width + input_width - filter_x + stride - 1) / stride; + } + } else { + out_x_loop_start_unclampled = pad_width - filter_x; + out_x_loop_end_unclampled = pad_width + input_width - filter_x; + } + // The kernel will have to iterate on the segment of the + // output row that starts at out_x_loop_start and out_x_loop_end. + const int out_x_loop_start = + std::max(out_x_buffer_start, out_x_loop_start_unclampled); + const int out_x_loop_end = + std::min(out_x_buffer_end, out_x_loop_end_unclampled); + + int32* acc_buffer_ptr = + acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth; + const int in_x_origin = (out_x_loop_start * stride) - pad_width + filter_x; + const uint8* input_ptr = input_data + in_x_origin * input_depth; + const int num_output_pixels = out_x_loop_end - out_x_loop_start; + QuantizedDepthwiseConvKernel< + kAllowStrided, kFixedInputDepth, + kFixedDepthMultiplier>::Run(num_output_pixels, input_depth, + depth_multiplier, input_ptr, input_offset, + input_ptr_increment, filter_base_ptr, + filter_offset, acc_buffer_ptr); + filter_base_ptr += output_depth; + } +} + +// generic fallback of DepthwiseConvAccumRow, portable, non-templatized. +inline void QuantizedDepthwiseConvAccumRowGeneric( + int stride, int input_depth, int input_width, const uint8* input_data, + int16 input_offset, int pad_width, int depth_multiplier, int filter_width, + const uint8* filter_data, int16 filter_offset, int out_x_buffer_start, + int out_x_buffer_end, int output_depth, int32* acc_buffer) { + gemmlowp::ScopedProfilingLabel label("DepthwiseConvAccumRowGeneric (slow)"); +#ifdef TFLITE_PREVENT_SLOW_GENERIC_DEPTHWISECONV_FALLBACK +#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK + LOG(FATAL) + << "\n\n" + << "*****************************************************************\n" + << "* This tfmini inference code was about to use the slow generic\n" + << "* fallback implementation for a DepthwiseConv op, and we want you\n" + << "* to be aware of that so that you will know why you get terrible\n" + << "* performance.\n" + << "*\n" + << "* If you would like to carry on with the slow code, compile\n" + << "* with this preprocessor token defined:\n" + << "* TFLITE_ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK.\n" + << "*\n" + << "* The right thing to do, if you care about performance, is to add\n" + << "* a new DepthwiseConv kernel to tfmini to cover your case.\n" + << "* The relevant parameters defining your case are:\n" + << "* stride = " << stride << "\n" + << "* input_depth = " << input_depth << "\n" + << "* depth_multiplier = " << depth_multiplier << "\n" + << "*\n" + << "* Please do not hesitate to contact benoitjacob@ with this\n" + << "* information.\n" + << "*****************************************************************\n"; +#endif // ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK +#endif // TFLITE_PREVENT_SLOW_GENERIC_DEPTHWISECONV_FALLBACK + const uint8* filter_base_ptr = filter_data; + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + const int out_x_loop_start = std::max( + out_x_buffer_start, (pad_width - filter_x + stride - 1) / stride); + const int out_x_loop_end = + std::min(out_x_buffer_end, + (pad_width + input_width - filter_x + stride - 1) / stride); + + int32* acc_buffer_ptr = + acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth; + const int in_x_origin = (out_x_loop_start * stride) - pad_width + filter_x; + const uint8* input_ptr = input_data + in_x_origin * input_depth; + const int input_ptr_increment = (stride - 1) * input_depth; + for (int out_x = out_x_loop_start; out_x < out_x_loop_end; out_x++) { + const uint8* filter_ptr = filter_base_ptr; + for (int ic = 0; ic < input_depth; ++ic) { + const int16 input_val = *input_ptr++ + input_offset; + for (int m = 0; m < depth_multiplier; m++) { + const int16 filter_val = *filter_ptr++ + filter_offset; + *acc_buffer_ptr++ += static_cast(filter_val) * input_val; + } + } + input_ptr += input_ptr_increment; + } + filter_base_ptr += output_depth; + } +} + +// Initializes the accumulator buffer with bias values. +inline void DepthwiseConvInitAccBuffer(int num_output_pixels, int output_depth, + const int32* bias_data, + int32* acc_buffer) { + int i = 0; +#ifdef USE_NEON + if (output_depth == 1) { + const int32x4_t b = vdupq_n_s32(bias_data[0]); + for (; i <= num_output_pixels - 16; i += 16) { + vst1q_s32(acc_buffer + i + 0, b); + vst1q_s32(acc_buffer + i + 4, b); + vst1q_s32(acc_buffer + i + 8, b); + vst1q_s32(acc_buffer + i + 12, b); + } + for (; i <= num_output_pixels - 4; i += 4) { + vst1q_s32(acc_buffer + i, b); + } + } else if (output_depth == 2) { + int32x4_t b = vdupq_n_s32(bias_data[0]); + b = vsetq_lane_s32(bias_data[1], b, 1); + b = vsetq_lane_s32(bias_data[1], b, 3); + for (; i <= num_output_pixels - 8; i += 8) { + vst1q_s32(acc_buffer + 2 * i + 0, b); + vst1q_s32(acc_buffer + 2 * i + 4, b); + vst1q_s32(acc_buffer + 2 * i + 8, b); + vst1q_s32(acc_buffer + 2 * i + 12, b); + } + for (; i <= num_output_pixels - 2; i += 2) { + vst1q_s32(acc_buffer + 2 * i, b); + } + } else if (output_depth == 4) { + const int32x4_t b = vld1q_s32(bias_data); + for (; i <= num_output_pixels - 4; i += 4) { + vst1q_s32(acc_buffer + 4 * i + 0, b); + vst1q_s32(acc_buffer + 4 * i + 4, b); + vst1q_s32(acc_buffer + 4 * i + 8, b); + vst1q_s32(acc_buffer + 4 * i + 12, b); + } + for (; i < num_output_pixels; i++) { + vst1q_s32(acc_buffer + 4 * i, b); + } + } else if (output_depth == 8) { + const int32x4_t b0 = vld1q_s32(bias_data); + const int32x4_t b1 = vld1q_s32(bias_data + 4); + for (; i <= num_output_pixels - 2; i += 2) { + vst1q_s32(acc_buffer + 8 * i + 0, b0); + vst1q_s32(acc_buffer + 8 * i + 4, b1); + vst1q_s32(acc_buffer + 8 * i + 8, b0); + vst1q_s32(acc_buffer + 8 * i + 12, b1); + } + for (; i < num_output_pixels; i++) { + vst1q_s32(acc_buffer + 8 * i + 0, b0); + vst1q_s32(acc_buffer + 8 * i + 4, b1); + } + } else if (output_depth == 16) { + const int32x4_t b0 = vld1q_s32(bias_data); + const int32x4_t b1 = vld1q_s32(bias_data + 4); + const int32x4_t b2 = vld1q_s32(bias_data + 8); + const int32x4_t b3 = vld1q_s32(bias_data + 12); + for (; i < num_output_pixels; i++) { + vst1q_s32(acc_buffer + 16 * i + 0, b0); + vst1q_s32(acc_buffer + 16 * i + 4, b1); + vst1q_s32(acc_buffer + 16 * i + 8, b2); + vst1q_s32(acc_buffer + 16 * i + 12, b3); + } + } +#endif + for (; i < num_output_pixels; i++) { + memcpy(acc_buffer + i * output_depth, bias_data, + sizeof(acc_buffer[0]) * output_depth); + } +} + +inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int depth_multiplier, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("DepthwiseConv/8bit"); + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int input_depth = ArraySize(input_dims, 0); + const int filter_height = ArraySize(filter_dims, 2); + const int filter_width = ArraySize(filter_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + TFLITE_DCHECK(output_depth == input_depth * depth_multiplier); + + static const int kAccBufferMaxSize = 2048; + int32 acc_buffer[kAccBufferMaxSize]; + TFLITE_DCHECK_GE(kAccBufferMaxSize, output_depth); + const int kOutputPixelsInAccBuffer = kAccBufferMaxSize / output_depth; + const int kAccBufferActualSize = kOutputPixelsInAccBuffer * output_depth; + TFLITE_DCHECK_LE(kOutputPixelsInAccBuffer * output_depth, + kAccBufferActualSize); + TFLITE_DCHECK_LE(kAccBufferActualSize, kAccBufferMaxSize); + TFLITE_DCHECK_GE(kOutputPixelsInAccBuffer, 1); + + // row_accum_func will point to the core accumulation function to be used + // for this DepthwiseConv op. + using row_accum_func_t = decltype(&QuantizedDepthwiseConvAccumRowGeneric); + row_accum_func_t row_accum_func = nullptr; + +#define TFMINI_USE_DEPTHWISECONV_KERNEL(ALLOW_STRIDED, FIXED_INPUT_DEPTH, \ + FIXED_DEPTH_MULTIPLIER) \ + if (!row_accum_func && (stride_width == 1 || ALLOW_STRIDED) && \ + (input_depth == FIXED_INPUT_DEPTH || FIXED_INPUT_DEPTH == 0) && \ + depth_multiplier == FIXED_DEPTH_MULTIPLIER) { \ + row_accum_func = \ + QuantizedDepthwiseConvAccumRow; \ + } + +#ifdef USE_NEON + // We go over our list of kernels by decreasing order of preference + // for the cases where multiple kernels could apply. + + // Start with the fastest kernels: AllowStrided=false, fixed input depth. + + TFMINI_USE_DEPTHWISECONV_KERNEL(false, 1, 2) + TFMINI_USE_DEPTHWISECONV_KERNEL(false, 2, 2) + TFMINI_USE_DEPTHWISECONV_KERNEL(false, 4, 2) + TFMINI_USE_DEPTHWISECONV_KERNEL(false, 1, 4) + TFMINI_USE_DEPTHWISECONV_KERNEL(false, 4, 1) + TFMINI_USE_DEPTHWISECONV_KERNEL(false, 4, 4) + TFMINI_USE_DEPTHWISECONV_KERNEL(false, 8, 1) + TFMINI_USE_DEPTHWISECONV_KERNEL(false, 2, 8) + TFMINI_USE_DEPTHWISECONV_KERNEL(false, 2, 1) + TFMINI_USE_DEPTHWISECONV_KERNEL(false, 12, 1) + + // Next come the strided kernels: AllowStrided=true, fixed input depth. + // They are a bit less efficient, but allow stride!=1. + + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 8, 2) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 16, 1) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 16) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 32) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 8) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 8, 1) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 2, 1) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 4, 1) + + // Finally, the kernels allowing a variable input depth, + // these are the least efficient but most general kernels. + + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 1) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 2) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 3) +#endif // USE_NEON + + // No matching fast kernel found, use slow fallback. + if (!row_accum_func) { + row_accum_func = QuantizedDepthwiseConvAccumRowGeneric; + } + +#undef TFMINI_USE_DEPTHWISECONV_KERNEL + + // Now that we have determined row_accum_func, we can start work. + uint8* output_ptr = output_data; + for (int b = 0; b < batches; ++b) { + for (int out_y = 0; out_y < output_height; ++out_y) { + const int in_y_origin = (out_y * stride_height) - pad_height; + const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_end = + std::min(filter_height, input_height - in_y_origin); + for (int out_x_buffer_start = 0; out_x_buffer_start < output_width; + out_x_buffer_start += kOutputPixelsInAccBuffer) { + const int out_x_buffer_end = std::min( + output_width, out_x_buffer_start + kOutputPixelsInAccBuffer); + // We call a 'pixel' a group of activation that share all but the + // 'depth'/'channel' coordinate. num_output_pixels is the number of + // output pixels that we will accumulate in this loop iteration. + const int num_output_pixels = out_x_buffer_end - out_x_buffer_start; + // Initialize our local accumulator with the bias values, so we don't + // have to add them later. + DepthwiseConvInitAccBuffer(num_output_pixels, output_depth, bias_data, + acc_buffer); + // Accumulation loop. Most of the time should be spent in here. + for (int filter_y = filter_y_start; filter_y < filter_y_end; + ++filter_y) { + const int in_y = in_y_origin + filter_y; + row_accum_func( + stride_width, input_depth, input_width, + input_data + in_y * input_dims.strides[2] + + b * input_dims.strides[3], + input_offset, pad_width, depth_multiplier, filter_width, + filter_data + filter_y * filter_dims.strides[2], filter_offset, + out_x_buffer_start, out_x_buffer_end, output_depth, acc_buffer); + } + // Finished accumulating int32 values. Now need to convert them to + // the final 8bit form and store them. + gemmlowp::ScopedProfilingLabel label("downquantize+store"); + const int num_output_values = output_depth * num_output_pixels; + int i = 0; +#ifdef USE_NEON + using gemmlowp::RoundingDivideByPOT; + const int32x4_t output_offset_vec = vdupq_n_s32(output_offset); + const int32x4_t output_activation_min_vec = + vdupq_n_s32(output_activation_min); + const int32x4_t output_activation_max_vec = + vdupq_n_s32(output_activation_max); + // Handle 16 values at once. + // This allows us to issue 4 mutually independent int32 + // multiplications (vqrdmulh), which should alleviate most of their + // high latency. + for (; i <= num_output_values - 16; i += 16) { + int32x4_t acc[4]; + for (int j = 0; j < 4; j++) { + acc[j] = vld1q_s32(acc_buffer + i + 4 * j); + } + + // Fixed-point multiplication. + for (int j = 0; j < 4; j++) { + acc[j] = vqrdmulhq_n_s32(acc[j], output_multiplier); + } + for (int j = 0; j < 4; j++) { + acc[j] = RoundingDivideByPOT(acc[j], output_shift); + } + // Add the output offset. + for (int j = 0; j < 4; j++) { + acc[j] = vaddq_s32(acc[j], output_offset_vec); + } + // Apply the activation function. + for (int j = 0; j < 4; j++) { + acc[j] = vmaxq_s32(acc[j], output_activation_min_vec); + } + for (int j = 0; j < 4; j++) { + acc[j] = vminq_s32(acc[j], output_activation_max_vec); + } + // Saturating cast to uint8 and store to destination. + int16x4_t acc_s16[4]; + for (int j = 0; j < 4; j++) { + acc_s16[j] = vqmovn_s32(acc[j]); + } + const int16x8_t res_s16_0 = vcombine_s16(acc_s16[0], acc_s16[1]); + const int16x8_t res_s16_1 = vcombine_s16(acc_s16[2], acc_s16[3]); + const uint8x8_t res_u8_0 = vqmovun_s16(res_s16_0); + const uint8x8_t res_u8_1 = vqmovun_s16(res_s16_1); + vst1q_u8(output_ptr, vcombine_u8(res_u8_0, res_u8_1)); + output_ptr += 16; + } + // Handle 8 values at once. + // Not as good as 16 (now we're only issuing 2 mutually independent + // vqrdmulh instructions, so we're probably paying for their high + // latency). + for (; i <= num_output_values - 8; i += 8) { + int32x4_t acc0 = vld1q_s32(acc_buffer + i); + int32x4_t acc1 = vld1q_s32(acc_buffer + i + 4); + // Fixed-point multiplication. + acc0 = vqrdmulhq_n_s32(acc0, output_multiplier); + acc1 = vqrdmulhq_n_s32(acc1, output_multiplier); + // Rounding right shift. + acc0 = RoundingDivideByPOT(acc0, output_shift); + acc1 = RoundingDivideByPOT(acc1, output_shift); + // Add the output offset. + acc0 = vaddq_s32(acc0, output_offset_vec); + acc1 = vaddq_s32(acc1, output_offset_vec); + // Apply the activation function. + acc0 = vmaxq_s32(acc0, output_activation_min_vec); + acc1 = vmaxq_s32(acc1, output_activation_min_vec); + acc0 = vminq_s32(acc0, output_activation_max_vec); + acc1 = vminq_s32(acc1, output_activation_max_vec); + // Saturating cast to uint8 and store to destination. + const int16x4_t acc0_s16 = vqmovn_s32(acc0); + const int16x4_t acc1_s16 = vqmovn_s32(acc1); + const int16x8_t res_s16 = vcombine_s16(acc0_s16, acc1_s16); + const uint8x8_t res_u8 = vqmovun_s16(res_s16); + vst1_u8(output_ptr, res_u8); + output_ptr += 8; + } + // Handle 4 values at once. Now we're paying the full price of the + // high latency of vqrdmulh. Also, storing only 4 bytes at the end + // (without any alignment) can only be done 1 byte at a time. + // Yet, that is still worth doing to minimize the amount of leftover + // that will have to go through the very slow scalar code. + for (; i <= num_output_values - 4; i += 4) { + int32x4_t acc = vld1q_s32(acc_buffer + i); + // Fixed-point multiplication. + acc = vqrdmulhq_n_s32(acc, output_multiplier); + // Rounding right shift. + acc = RoundingDivideByPOT(acc, output_shift); + // Add the output offset. + acc = vaddq_s32(acc, output_offset_vec); + // Apply the activation function. + acc = vmaxq_s32(acc, output_activation_min_vec); + acc = vminq_s32(acc, output_activation_max_vec); + // Saturating cast to uint8 and store to destination. + const int16x4_t acc_s16 = vqmovn_s32(acc); + const int16x8_t res_s16 = vcombine_s16(acc_s16, acc_s16); + const uint8x8_t res_u8 = vqmovun_s16(res_s16); + vst1_lane_u8(output_ptr + 0, res_u8, 0); + vst1_lane_u8(output_ptr + 1, res_u8, 1); + vst1_lane_u8(output_ptr + 2, res_u8, 2); + vst1_lane_u8(output_ptr + 3, res_u8, 3); + output_ptr += 4; + } +#endif // USE_NEON + + // Handle leftover values, one by one. This is very slow. + for (; i < num_output_values; i++) { + int32 acc = acc_buffer[i]; + acc = MultiplyByQuantizedMultiplierSmallerThanOne( + acc, output_multiplier, output_shift); + acc += output_offset; + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + *output_ptr++ = static_cast(acc); + } + } + } + } +} + +// Legacy, for compatibility with old checked-in code. +template +void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int depth_multiplier, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims) { + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride_width, + stride_height, pad_width, pad_height, depth_multiplier, + output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data, + output_dims); +} + +// Legacy, for compatibility with old checked-in code. +template +void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, int depth_multiplier, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + DepthwiseConv(input_data, input_dims, input_offset, filter_data, + filter_dims, filter_offset, bias_data, bias_dims, stride, + stride, pad_width, pad_height, depth_multiplier, + output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data, + output_dims); +} + +} // namespace optimized_ops +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_DEPTHWISECONV_UINT8_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/eigen_spatial_convolutions.h b/tensorflow/contrib/lite/kernels/internal/optimized/eigen_spatial_convolutions.h new file mode 100644 index 0000000000..8004c24a99 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/optimized/eigen_spatial_convolutions.h @@ -0,0 +1,231 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Copied from tensorflow/core/kernels/eigen_spatial_convolutions.h. +// TODO(petewarden) - move this to a common location in Eigen itself. + +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_EIGEN_SPATIAL_CONVOLUTIONS_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_EIGEN_SPATIAL_CONVOLUTIONS_H_ + +#define EIGEN_USE_CUSTOM_THREAD_POOL +#define EIGEN_USE_THREADS + +// NOTE: Eigen is slightly different internally and externally. We need to +// hack the unsupported/Eigen/CXX11/Tensor header instantiation macros at +// specific places, so we need two copies of the hacked file, one for +// internal and one for external. +// If you have trouble simply undef out the reducer macro e.g. +// TFLITE_REDUCE_INSTANTIATIONS_GOOGLE, but be aware this will make +// the binary much bigger! +#define TFLITE_REDUCE_INSTANTIATIONS_OPEN_SOURCE +#define Eigen EigenForTFLite +#if defined(TFLITE_REDUCE_INSTANTIATIONS_GOOGLE) +#include "tensorflow/contrib/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_google.h" +#elif defined(TFLITE_REDUCE_INSTANTIATIONS_OPEN_SOURCE) +#include "tensorflow/contrib/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_oss.h" +#else +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#endif + + +namespace Eigen { + +/** SpatialConvolution + * \ingroup CXX11_NeuralNetworks_Module + * + * \brief Applies a 2D convolution over a multichannel input image. + * + * The input parameter is expected to be a tensor with a rank of 3 or more + * (channels, height, width, and optionally others) + * The kernel parameter is expected to be a 4D tensor (filters, channels, + * kernel_height, kernel_width) + * The input and the kernel must both be in col-major layout. The result will + * also be in col-major layout. + * + * If col_in_stride, row_in_stride > 1, then applies convolution with holes + * (aka atrous convolution), sampling every col_in_stride, row_in_stride input + * pixels. + * + * The result can be assigned to a tensor of rank equal to the rank of the + * input. The dimensions of the result will be filters, height, width (and + * others if applicable). + * + * It is possible to swap the order of the width and height dimensions provided + * that the same order is used in the input, the kernel, and the output. + * + */ +template +EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE static const typename internal::conditional< + internal::traits::Layout == ColMajor, + TensorReshapingOp< + const DSizes::Index, + internal::traits::NumDimensions>, + const TensorContractionOp< + const array::Index>, + 1>, + const TensorReshapingOp< + const DSizes::Index, 2>, + const Kernel>, + const TensorReshapingOp< + const DSizes::Index, 2>, + const TensorImagePatchOp > > >, + TensorReshapingOp< + const DSizes::Index, + internal::traits::NumDimensions>, + const TensorContractionOp< + const array::Index>, + 1>, + const TensorReshapingOp< + const DSizes::Index, 2>, + const TensorImagePatchOp >, + const TensorReshapingOp< + const DSizes::Index, 2>, + const Kernel> > > >::type + SpatialConvolution(const Input& input, const Kernel& kernel, + const DenseIndex row_stride = 1, + const DenseIndex col_stride = 1, + const PaddingType padding_type = PADDING_SAME, + const DenseIndex row_in_stride = 1, + const DenseIndex col_in_stride = 1) { + typedef typename internal::traits::Index TensorIndex; + TensorRef::Scalar, + internal::traits::NumDimensions, + internal::traits::Layout, TensorIndex> > + in(input); + TensorRef::Scalar, + internal::traits::NumDimensions, + internal::traits::Layout, TensorIndex> > + kern(kernel); + + EIGEN_STATIC_ASSERT( + internal::traits::Layout == internal::traits::Layout, + YOU_MADE_A_PROGRAMMING_MISTAKE); + const bool isColMajor = (internal::traits::Layout == ColMajor); + + const int NumDims = internal::traits::NumDimensions; + + // Number of filters to apply. This is the same as the output depth of the + // result + const TensorIndex kernelFilters = + isColMajor ? kern.dimensions()[0] : kern.dimensions()[3]; + // Number of channels. This is the same as the input depth. + const TensorIndex kernelChannels = + isColMajor ? kern.dimensions()[1] : kern.dimensions()[2]; + const TensorIndex kernelRows = + isColMajor ? kern.dimensions()[2] : kern.dimensions()[1]; + const TensorIndex kernelCols = + isColMajor ? kern.dimensions()[3] : kern.dimensions()[0]; + + const DenseIndex kernelRowsEff = + kernelRows + (kernelRows - 1) * (row_in_stride - 1); + const DenseIndex kernelColsEff = + kernelCols + (kernelCols - 1) * (col_in_stride - 1); + + array, 1> contract_dims; + contract_dims[0] = IndexPair(1, 0); + + const TensorIndex InputRows = + isColMajor ? in.dimension(1) : in.dimension(NumDims - 2); + const TensorIndex InputCols = + isColMajor ? in.dimension(2) : in.dimension(NumDims - 3); + + TensorIndex out_height; + TensorIndex out_width; + switch (padding_type) { + case PADDING_VALID: + out_height = numext::ceil((InputRows - kernelRowsEff + 1.f) / + static_cast(row_stride)); + out_width = numext::ceil((InputCols - kernelColsEff + 1.f) / + static_cast(col_stride)); + break; + case PADDING_SAME: + out_height = numext::ceil(InputRows / static_cast(row_stride)); + out_width = numext::ceil(InputCols / static_cast(col_stride)); + break; + default: + // Initialize unused variables to avoid a compiler warning + out_height = 0; + out_width = 0; + eigen_assert(false && "unexpected padding"); + } + + // Molds the output of the patch extraction code into a 2d tensor: + // - the first dimension (dims[0]): the patch values to be multiplied with the + // kernels + // - the second dimension (dims[1]): everything else + DSizes pre_contract_dims; + if (isColMajor) { + pre_contract_dims[0] = kernelChannels * kernelRows * kernelCols; + pre_contract_dims[1] = out_height * out_width; + for (int i = 3; i < NumDims; ++i) { + pre_contract_dims[1] *= in.dimension(i); + } + } else { + pre_contract_dims[1] = kernelChannels * kernelRows * kernelCols; + pre_contract_dims[0] = out_height * out_width; + for (int i = 0; i < NumDims - 3; ++i) { + pre_contract_dims[0] *= in.dimension(i); + } + } + + // Molds the output of the contraction into the shape expected by the used + // (assuming this is ColMajor): + // - 1st dim: kernel filters + // - 2nd dim: output height + // - 3rd dim: output width + // - 4th dim and beyond: everything else including batch size + DSizes post_contract_dims; + if (isColMajor) { + post_contract_dims[0] = kernelFilters; + post_contract_dims[1] = out_height; + post_contract_dims[2] = out_width; + for (int i = 3; i < NumDims; ++i) { + post_contract_dims[i] = in.dimension(i); + } + } else { + post_contract_dims[NumDims - 1] = kernelFilters; + post_contract_dims[NumDims - 2] = out_height; + post_contract_dims[NumDims - 3] = out_width; + for (int i = 0; i < NumDims - 3; ++i) { + post_contract_dims[i] = in.dimension(i); + } + } + + DSizes kernel_dims; + if (isColMajor) { + kernel_dims[0] = kernelFilters; + kernel_dims[1] = kernelChannels * kernelRows * kernelCols; + } else { + kernel_dims[0] = kernelChannels * kernelRows * kernelCols; + kernel_dims[1] = kernelFilters; + } + // TODO(yangke): choose() is defined in TensorContraction.h -- consider + // moving it to somewhere more "common". + return + input + .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride, + row_in_stride, col_in_stride, padding_type) + .reshape(pre_contract_dims) + .contract(kernel.reshape(kernel_dims), contract_dims) + .reshape(post_contract_dims); +} + +} // end namespace Eigen + +// clang-format on + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_EIGEN_SPATIAL_CONVOLUTIONS_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_google.h b/tensorflow/contrib/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_google.h new file mode 100644 index 0000000000..7f78f69360 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_google.h @@ -0,0 +1,143 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_EIGEN_TENSOR_REDUCED_INSTANTIATIONS_GOOGLE_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_EIGEN_TENSOR_REDUCED_INSTANTIATIONS_GOOGLE_H_ + +#define EIGEN_USE_CUSTOM_THREAD_POOL +#define EIGEN_USE_THREADS + +// clang-format off + +#include + +#include +#include +#include +#include +#include +#include // NOLINT(build/c++11) +#include // NOLINT(build/c++11) +#include // NOLINT(build/c++11) +#include + +#ifdef _WIN32 +#include +#elif defined(__APPLE__) +#include +#else +#include +#endif + + +// Because some programs may link Eigen in through other frameworks with +// different flags, we can run into multiple definition issues if we don't have +// a private namespace for our versions. This is a nasty hack, but a similar +// approach is used elsewhere to handle the problem, so it should be stable. +#define Eigen EigenForTFLite + +#include "Eigen/src/Core/util/StaticAssert.h" +#include "unsupported/Eigen/CXX11/Core" +#include "unsupported/Eigen/SpecialFunctions" + +#include "Eigen/src/Core/util/DisableStupidWarnings.h" + +#include "Eigen/Core" + +// Beware: the order of the include matters to some compilers. For example +// TensorIndexList.h should be included before TensorDimensions.h in order to +// use index lists to encode tensor dimensions when compiling with llvm. +// We're defining this ourselves rather than using the Eigen Tensor header file +// so that we can alter the macro definition of TENSOR_CONTRACTION_DISPATCH to +// reduce binary size. +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/ThreadPoolInterface.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorNonBlockingThreadPool.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h" + +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStats.h" + +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h" + +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h" + +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMappers.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h" +#undef TENSOR_CONTRACTION_DISPATCH +#define TENSOR_CONTRACTION_DISPATCH(METHOD, ALIGNMENT, ARGS) \ + if (this->m_lhs_inner_dim_contiguous && \ + this->m_rhs_inner_dim_contiguous && \ + !this->m_rhs_inner_dim_reordered) { \ + METHOD ARGS; \ + } else { \ + eigen_assert(false && "Unsupported contraction formats"); \ + } + +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h" + +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h" + +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h" + +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h" + +#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h" + +#include "Eigen/src/Core/util/ReenableStupidWarnings.h" +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_EIGEN_TENSOR_REDUCED_INSTANTIATIONS_H diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_oss.h b/tensorflow/contrib/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_oss.h new file mode 100644 index 0000000000..1d5c316194 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_oss.h @@ -0,0 +1,167 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This is essentially unsupported/CXX11/Eigen/Tensor.h +// TODO(petewarden) - move this to a common location in Eigen itself. + +// clang-format off + + +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_EIGEN_TENSOR_REDUCED_INSTANTIATIONS_OSS_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_EIGEN_TENSOR_REDUCED_INSTANTIATIONS_OSS_H_ + + +#include "Eigen/Core" + +#if defined(EIGEN_USE_SYCL) +#undef min +#undef max +#undef isnan +#undef isinf +#undef isfinite +#include +#include +#include +#include +#include +#endif +#include +#include +#include + + + + + +#ifdef _WIN32 +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#include +#else +#include +#include +#endif + +#if __cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900 +#include +#endif + +#ifdef _WIN32 +#include +#elif defined(__APPLE__) +#include +#else +#include +#endif + +// #if defined(EIGEN_USE_LIBXSMM) +// #include "libxsmm.h" +// #endif + +#ifdef EIGEN_USE_THREADS +#include "unsupported/Eigen/CXX11/ThreadPool" +#endif + + +#include "Eigen/src/Core/util/DisableStupidWarnings.h" + +#include "unsupported/Eigen/SpecialFunctions" +#include "unsupported/Eigen/CXX11/src/util/CXX11Meta.h" +#include "unsupported/Eigen/CXX11/src/util/MaxSizeVector.h" + + +#include "unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h" + +#include "unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorBase.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h" + +#undef TENSOR_CONTRACTION_DISPATCH +#define TENSOR_CONTRACTION_DISPATCH(METHOD, ALIGNMENT, ARGS) \ + if (this->m_lhs_inner_dim_contiguous && \ + this->m_rhs_inner_dim_contiguous && \ + !this->m_rhs_inner_dim_reordered) { \ + METHOD ARGS; \ + } else { \ + eigen_assert(false && "Unsupported contraction formats"); \ + } + + +#include "unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorScan.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h" +#include "unsupported/Eigen/CXX11/src/Tensor/Tensor.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorMap.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorRef.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorIO.h" + +#include "Eigen/src/Core/util/ReenableStupidWarnings.h" + + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_EIGEN_TENSOR_REDUCED_INSTANTIATIONS_OSS_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/multithreaded_conv.h b/tensorflow/contrib/lite/kernels/internal/optimized/multithreaded_conv.h new file mode 100644 index 0000000000..b3615f4658 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/optimized/multithreaded_conv.h @@ -0,0 +1,195 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MULTITHREAD_CONV +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MULTITHREAD_CONV + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/kernels/internal/common.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/eigen_spatial_convolutions.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/types.h" + +namespace tflite { +namespace multithreaded_ops { + +class EigenThreadPoolWrapper : public Eigen::ThreadPoolInterface { + public: + explicit EigenThreadPoolWrapper(Eigen::ThreadPool* pool) : pool_(pool) {} + ~EigenThreadPoolWrapper() override {} + + void Schedule(std::function fn) override { + pool_->Schedule(std::move(fn)); + } + int NumThreads() const override { return pool_->NumThreads(); } + int CurrentThreadId() const override { return pool_->CurrentThreadId(); } + + private: + Eigen::ThreadPool* pool_ = nullptr; +}; + +// We have a single global threadpool for all convolution operations. This means +// that inferences started from different threads may block each other, but +// since the underlying resource of CPU cores should be consumed by the +// operations anyway, it shouldn't affect overall performance. +const Eigen::ThreadPoolDevice& GetThreadPoolDevice() { + const int thread_count = 4; + static Eigen::ThreadPool* tp = new Eigen::ThreadPool(thread_count); + static EigenThreadPoolWrapper* thread_pool_wrapper = + new EigenThreadPoolWrapper(tp); + static Eigen::ThreadPoolDevice* device = + new Eigen::ThreadPoolDevice(thread_pool_wrapper, thread_count); + return *device; +} + +// Shorthands for the types we need when interfacing with the EigenTensor +// library. +typedef Eigen::TensorMap< + Eigen::Tensor, Eigen::Aligned> + EigenMatrix; +typedef Eigen::TensorMap< + Eigen::Tensor, + Eigen::Aligned> + ConstEigenMatrix; + +typedef Eigen::TensorMap< + Eigen::Tensor, Eigen::Aligned> + EigenTensor; +typedef Eigen::TensorMap< + Eigen::Tensor, + Eigen::Aligned> + ConstEigenTensor; + +// Utility functions we need for the EigenTensor API. +template +struct MatMulConvFunctor { + // Computes on device "d": out = in0 * in1, where * is matrix + // multiplication. + void operator()( + const Device& d, EigenMatrix out, ConstEigenMatrix in0, + ConstEigenMatrix in1, + const Eigen::array, 1>& dim_pair) { + out.device(d) = in0.contract(in1, dim_pair); + } +}; + +template +class EigenTensorConvFunctor { + private: + Eigen::PaddingType TfLitePadding2EigenPadding(TfLitePadding padding) { + switch (padding) { + case kTfLitePaddingValid: + return Eigen::PADDING_VALID; + case kTfLitePaddingSame: + return Eigen::PADDING_SAME; + case kTfLitePaddingUnknown: + assert(false); // should never get here. + return Eigen::PADDING_VALID; + } + return Eigen::PADDING_SAME; // Prevent compiler warning about missing + // return + } + + public: + void operator()(const T* input_data, T* im2col_buffer, int input_batches, + int input_height, int input_width, int input_depth, + const T* filter_data, int filter_height, int filter_width, + int filter_count, int stride_rows, int stride_cols, + int pad_width, int pad_height, TfLitePadding padding, + T* output_data, int output_height, int output_width) { + const Eigen::ThreadPoolDevice& device = GetThreadPoolDevice(); + + const bool is_1x1_kernel = (filter_height == 1 && filter_width == 1 && + stride_rows == 1 && stride_cols == 1); + if (is_1x1_kernel) { + // For 1x1 kernel, the 2D convolution is reduced to matrix + // multiplication. + const int conv_width = output_height * output_width; + Eigen::array, 1> dim_pair; + dim_pair[0] = Eigen::IndexPair(1, 0); + EigenMatrix output(output_data, conv_width, filter_count); + ConstEigenMatrix input(input_data, conv_width, input_depth); + ConstEigenMatrix filter(filter_data, input_depth, filter_count); + MatMulConvFunctor()(device, output, input, + filter, dim_pair); + } else if (filter_height == input_height && filter_width == input_width && + pad_width == 0 && pad_height == 0) { + // If the input data and filter have the same height/width, + // the 2D convolution is reduced to matrix multiplication. + const int k = // Length of reduction dimension. + filter_width * filter_height * input_depth; + Eigen::array, 1> dim_pair; + dim_pair[0] = Eigen::IndexPair(1, 0); + EigenMatrix output(output_data, 1, filter_count); + ConstEigenMatrix input(input_data, 1, k); + ConstEigenMatrix filter(filter_data, k, filter_count); + MatMulConvFunctor()(device, output, input, + filter, dim_pair); + } else { + EigenTensor output(output_data, input_batches, output_height, + output_width, filter_count); + ConstEigenTensor input(input_data, input_batches, input_height, + input_width, input_depth); + ConstEigenTensor filter(filter_data, filter_height, filter_width, + input_depth, filter_count); + output.device(device) = + Eigen::SpatialConvolution(input, filter, stride_cols, stride_rows, + TfLitePadding2EigenPadding(padding)); + } + } +}; + +inline void Conv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, TfLitePadding padding, + float output_activation_min, float output_activation_max, + float* output_data, const Dims<4>& output_dims, + float* im2col_data, const Dims<4>& im2col_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 0); + const int output_depth = MatchingArraySize(filter_dims, 3, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int filter_height = ArraySize(filter_dims, 2); + const int filter_width = ArraySize(filter_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + EigenTensorConvFunctor conv_functor; + conv_functor(input_data, im2col_data, batches, input_height, input_width, + input_depth, filter_data, filter_height, filter_width, + output_depth, stride_height, stride_width, pad_height, pad_width, + padding, output_data, output_height, output_width); + + optimized_ops::AddBiasAndEvalActivationFunction( + bias_data, bias_dims, output_data, output_dims, output_activation_min, + output_activation_max); +} + +} // namespace multithreaded_ops +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MULTITHREAD_CONV diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc new file mode 100644 index 0000000000..bf0bdfb1fb --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc @@ -0,0 +1,337 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/kernels/activation_functor.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/tensor_utils_impl.h" + +#ifdef USE_NEON + +#include +#define kFloatWeightsPerNeonLane 4 + +namespace tflite { +namespace tensor_utils { + +void NeonMatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows, + int m_cols, const float* vector, + int n_batch, float* result, + int result_stride) { + // If v_size is not divisible by kWeightsPerNeonLane, we cannot use the main + // vectorized loop, and we need to process sequentially. postamble_start shows + // the start index where this should happen. + const int postamble_start = + m_cols - (m_cols & (kFloatWeightsPerNeonLane - 1)); + + // The arrays used to cache the vector. + float32x4_t* vector_cache_float32x4 = + new float32x4_t[(m_cols / kFloatWeightsPerNeonLane) * + sizeof(float32x4_t)]; + const int kUnrollSize = 2; + for (int b = 0; b < n_batch; b++) { + float* result_in_batch = result + b * m_rows * result_stride; + const float* vector_in_batch = vector + b * m_cols; + + const float* matrix_ptr0 = matrix; + // If there is only 1 row, we don't want to assign an illegal pointer. + const float* matrix_ptr1 = nullptr; + if (m_rows > 1) { + matrix_ptr1 = matrix + m_cols; + } + + // Cahce the vector. + for (int c = 0; c < postamble_start; c += kFloatWeightsPerNeonLane) { + vector_cache_float32x4[c >> 2] = vld1q_f32(vector_in_batch + c); + } + + // Main matrix by vector multiplication loop, which handles two rows of + // matrix by vector multiplication. + for (int r = 0; r < (m_rows & ~(kUnrollSize - 1)); r += kUnrollSize) { + float32x4_t acc0_32x4 = vmovq_n_f32(0.0); + float32x4_t acc1_32x4 = vmovq_n_f32(0.0); + for (int c = 0; c < postamble_start; c += kFloatWeightsPerNeonLane) { + float32x4_t temp = vector_cache_float32x4[c >> 2]; + // Load 4 float values from vector1 and vector2 and accumulator. + float32x4_t v0_f32x4 = vld1q_f32(matrix_ptr0 + c); + float32x4_t v1_f32x4 = vld1q_f32(matrix_ptr1 + c); + // Vector multiply-accumulate 4 float + acc0_32x4 = vmlaq_f32(acc0_32x4, v0_f32x4, temp); + acc1_32x4 = vmlaq_f32(acc1_32x4, v1_f32x4, temp); + } + // Add the 4 intermediate sum values to get the final dot-prod value for + // this column. + *result_in_batch += + (vgetq_lane_f32(acc0_32x4, 0) + vgetq_lane_f32(acc0_32x4, 1) + + vgetq_lane_f32(acc0_32x4, 2) + vgetq_lane_f32(acc0_32x4, 3)); + *(result_in_batch + result_stride) += + (vgetq_lane_f32(acc1_32x4, 0) + vgetq_lane_f32(acc1_32x4, 1) + + vgetq_lane_f32(acc1_32x4, 2) + vgetq_lane_f32(acc1_32x4, 3)); + for (int c = postamble_start; c < m_cols; c++) { + *result_in_batch += matrix_ptr0[c] * vector_in_batch[c]; + *(result_in_batch + result_stride) += + matrix_ptr1[c] * vector_in_batch[c]; + } + matrix_ptr0 += kUnrollSize * m_cols; + matrix_ptr1 += kUnrollSize * m_cols; + result_in_batch += kUnrollSize * result_stride; + } + for (int r = (m_rows & ~(kUnrollSize - 1)); r < m_rows; r++) { + float32x4_t acc0_32x4 = vmovq_n_f32(0.0); + for (int c = 0; c < postamble_start; c += kFloatWeightsPerNeonLane) { + float32x4_t temp = vector_cache_float32x4[c >> 2]; + // Load 4 float values from vector1 and vector2 and accumulator. + float32x4_t v0_f32x4 = vld1q_f32(matrix_ptr0 + c); + // Vector multiply-accumulate 4 float + acc0_32x4 = vmlaq_f32(acc0_32x4, v0_f32x4, temp); + } + // Add the 4 intermediate sum values to get the final dot-prod value for + // this column. + *result_in_batch += + (vgetq_lane_f32(acc0_32x4, 0) + vgetq_lane_f32(acc0_32x4, 1) + + vgetq_lane_f32(acc0_32x4, 2) + vgetq_lane_f32(acc0_32x4, 3)); + for (int c = postamble_start; c < m_cols; c++) { + *result_in_batch += matrix_ptr0[c] * vector_in_batch[c]; + } + matrix_ptr0 += m_cols; + result_in_batch += result_stride; + } + } + delete[] vector_cache_float32x4; +} + +void NeonVectorVectorCwiseProduct(const float* vector1, const float* vector2, + int v_size, float* result) { + // If v_size is not divisible by kWeightsPerNeonLane, we cannot use the main + // vectorized loop, and we need to process sequentially. postamble_start shows + // the start index where this should happen. + const int postamble_start = + v_size - (v_size & (kFloatWeightsPerNeonLane - 1)); + for (int v = 0; v < postamble_start; v += kFloatWeightsPerNeonLane) { + // Load 4 float values from vector1 and vector2. + float32x4_t v1_f32x4 = vld1q_f32(vector1 + v); + float32x4_t v2_f32x4 = vld1q_f32(vector2 + v); + // Vector multiply 4 float + float32x4_t mul_32x4 = vmulq_f32(v1_f32x4, v2_f32x4); + // Save to result array. + vst1q_f32(&result[v], mul_32x4); + } + for (int v = postamble_start; v < v_size; v++) { + result[v] = vector1[v] * vector2[v]; + } +} + +void NeonVectorVectorCwiseProductAccumulate(const float* vector1, + const float* vector2, int v_size, + float* result) { + // If v_size is not divisible by kWeightsPerNeonLane, we cannot use the main + // vectorized loop, and we need to process sequentially. postamble_start shows + // the start index where this should happen. + const int postamble_start = + v_size - (v_size & (kFloatWeightsPerNeonLane - 1)); + for (int v = 0; v < postamble_start; v += kFloatWeightsPerNeonLane) { + // Load 4 float values from vector1 and vector2 and accumulator. + float32x4_t v1_f32x4 = vld1q_f32(vector1 + v); + float32x4_t v2_f32x4 = vld1q_f32(vector2 + v); + float32x4_t acc_32x4 = vld1q_f32(result + v); + // Vector multiply-accumulate 4 float + acc_32x4 = vmlaq_f32(acc_32x4, v1_f32x4, v2_f32x4); + // Save to result array. + vst1q_f32(&result[v], acc_32x4); + } + for (int v = postamble_start; v < v_size; v++) { + result[v] += vector1[v] * vector2[v]; + } +} + +void NeonVectorBatchVectorCwiseProductAccumulate(const float* vector, + int v_size, + const float* batch_vector, + int n_batch, float* result) { + // If v_size is not divisible by kWeightsPerNeonLane, we cannot use the main + // vectorized loop, and we need to process sequentially. postamble_start shows + // the start index where this should happen. + const int postamble_start = + v_size - (v_size & (kFloatWeightsPerNeonLane - 1)); + + // The arrays used to cache the vector. + float32x4_t* vector_cache_float32x4 = + new float32x4_t[(v_size / kFloatWeightsPerNeonLane) * + sizeof(float32x4_t)]; + for (int v = 0; v < postamble_start; v += kFloatWeightsPerNeonLane) { + vector_cache_float32x4[v >> 2] = vld1q_f32(vector + v); + } + + float* result_ptr = result; + const float* batch_vector_ptr = batch_vector; + for (int b = 0; b < n_batch; b++) { + for (int v = 0; v < postamble_start; v += kFloatWeightsPerNeonLane) { + // Load from memory to vectors. + float32x4_t result_f32x4 = vld1q_f32(result_ptr + v); + float32x4_t batch_vector_f32x4 = vld1q_f32(batch_vector_ptr + v); + // Multiply-accumulate. + result_f32x4 = vmlaq_f32(result_f32x4, batch_vector_f32x4, + vector_cache_float32x4[v >> 2]); + // Store. + vst1q_f32(result_ptr + v, result_f32x4); + } + // Postamble loop + for (int v = postamble_start; v < v_size; v++) { + result_ptr[v] += vector[v] * batch_vector_ptr[v]; + } + // Update the pointers. + result_ptr += v_size; + batch_vector_ptr += v_size; + } + delete[] vector_cache_float32x4; +} + +void NeonSub1Vector(const float* vector, int v_size, float* result) { + // If v_size is not divisible by kWeightsPerNeonLane, we cannot use the main + // vectorized loop, and we need to process sequentially. postamble_start shows + // the start index where this should happen. + const int postamble_start = + v_size - (v_size & (kFloatWeightsPerNeonLane - 1)); + + float32x4_t one_f32x4 = vmovq_n_f32(1.0); + for (int v = 0; v < postamble_start; v += kFloatWeightsPerNeonLane) { + // Load 4 float values from the current pointers of the input column and + // subtract from 1. + float32x4_t v_f32x4 = vld1q_f32(vector + v); + float32x4_t result_f32x4 = vsubq_f32(one_f32x4, v_f32x4); + // Save to output. + vst1q_f32(result + v, result_f32x4); + } + for (int v = postamble_start; v < v_size; v++) { + result[v] = 1.0f - vector[v]; + } +} + +void NeonClipVector(const float* vector, int v_size, float abs_limit, + float* result) { + // If v_size is not divisible by kWeightsPerNeonLane, we cannot use the main + // vectorized loop, and we need to process sequentially. postamble_start shows + // the start index where this should happen. + const int postamble_start = + v_size - (v_size & (kFloatWeightsPerNeonLane - 1)); + + // Replicate abs_limit and -abs_limit in two vectors. + const float32x4_t abs_limit_f32x4 = vmovq_n_f32(abs_limit); + const float32x4_t neg_abs_limit_f32x4 = vmovq_n_f32(-abs_limit); + + for (int v = 0; v < postamble_start; v += kFloatWeightsPerNeonLane) { + // Load from memory to vector. + float32x4_t v_f32x4 = vld1q_f32(vector + v); + // Clip between abs_limit and -abs_limit. + float32x4_t result_f32x4 = vminq_f32(abs_limit_f32x4, v_f32x4); + result_f32x4 = vmaxq_f32(neg_abs_limit_f32x4, result_f32x4); + // Save to output. + vst1q_f32(result + v, result_f32x4); + } + // Postamble loop. + for (int v = postamble_start; v < v_size; v++) { + result[v] = (abs_limit < vector[v]) ? abs_limit : vector[v]; + result[v] = (-abs_limit > result[v]) ? -abs_limit : result[v]; + } +} + +float NeonVectorVectorDotProduct(const float* vector1, const float* vector2, + int v_size) { + // If v_size is not divisible by kWeightsPerNeonLane, we cannot use the main + // vectorized loop, and we need to process sequentially. postamble_start shows + // the start index where this should happen. + const int postamble_start = + v_size - (v_size & (kFloatWeightsPerNeonLane - 1)); + float32x4_t acc_32x4 = vmovq_n_f32(0.0); + for (int v = 0; v < postamble_start; v += kFloatWeightsPerNeonLane) { + // Load 4 float values from vector1 and vector2 and accumulator. + float32x4_t v1_f32x4 = vld1q_f32(vector1 + v); + float32x4_t v2_f32x4 = vld1q_f32(vector2 + v); + // Vector multiply-accumulate 4 float + acc_32x4 = vmlaq_f32(acc_32x4, v1_f32x4, v2_f32x4); + } + + float result = (vgetq_lane_f32(acc_32x4, 0) + vgetq_lane_f32(acc_32x4, 1) + + vgetq_lane_f32(acc_32x4, 2) + vgetq_lane_f32(acc_32x4, 3)); + // Postamble loop. + for (int v = postamble_start; v < v_size; v++) { + result += vector1[v] * vector2[v]; + } + return result; +} + +void NeonBatchVectorBatchVectorDotProduct(const float* vector1, + const float* vector2, int v_size, + int n_batch, float* result, + int result_stride) { + float* result_ptr = result; + const float* vector1_ptr = vector1; + const float* vector2_ptr = vector2; + for (int b = 0; b < n_batch; b++) { + *result_ptr = NeonVectorVectorDotProduct(vector1_ptr, vector2_ptr, v_size); + vector1_ptr += v_size; + vector2_ptr += v_size; + result_ptr += result_stride; + } +} + +void NeonReductionSumVector(const float* input_vector, float* output_vector, + int output_size, int reduction_size) { + const float* input_vector_ptr = input_vector; + for (int o = 0; o < output_size; o++) { + // If reduction_size is not divisible by kWeightsPerNeonLane, we cannot use + // the main vectorized loop, and we need to process sequentially. + // postamble_start shows the start index where this should happen. + const int postamble_start = + reduction_size - (reduction_size & (kFloatWeightsPerNeonLane - 1)); + float32x4_t sum_f32x4 = vmovq_n_f32(0.0); + for (int r = 0; r < postamble_start; r += kFloatWeightsPerNeonLane) { + float32x4_t v1_f32x4 = vld1q_f32(input_vector_ptr + r); + sum_f32x4 = vaddq_f32(sum_f32x4, v1_f32x4); + } + output_vector[o] += + (vgetq_lane_f32(sum_f32x4, 0) + vgetq_lane_f32(sum_f32x4, 1) + + vgetq_lane_f32(sum_f32x4, 2) + vgetq_lane_f32(sum_f32x4, 3)); + input_vector_ptr += postamble_start; + + // Postamble loop. + for (int r = postamble_start; r < reduction_size; r++) { + output_vector[o] += *input_vector_ptr++; + } + } +} + +void NeonVectorShiftLeft(float* vector, int v_size, float shift_value) { + // This variable keeps track of the next to the last index which is being + // copied to make sure we are not out of the vector boundary. + int last_index_copy = kFloatWeightsPerNeonLane; + int current_index_copy = 0; + while (last_index_copy < v_size) { + float32x4_t v_f32x4 = vld1q_f32(vector + current_index_copy + 1); + vst1q_f32(vector + current_index_copy, v_f32x4); + current_index_copy += kFloatWeightsPerNeonLane; + last_index_copy += kFloatWeightsPerNeonLane; + } + // Postamble loop. + for (int i = current_index_copy; i < v_size - 1; i++) { + vector[i] = vector[i + 1]; + } + vector[v_size - 1] = shift_value; +} + +} // namespace tensor_utils +} // namespace tflite + +#endif // USE_NEON diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.h b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.h new file mode 100644 index 0000000000..3a4af87304 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.h @@ -0,0 +1,113 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_TENSOR_UTILS_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_TENSOR_UTILS_H_ + +// TODO(ghodrat): Remove this header file and the dependency to internal data +// structure. +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/cpu_check.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/tensor_utils_impl.h" + +namespace tflite { +namespace tensor_utils { + +void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows, + int m_cols, const float* vector, + int n_batch, float* result, + int result_stride) { + NEON_OR_PORTABLE(MatrixBatchVectorMultiplyAccumulate, matrix, m_rows, m_cols, + vector, n_batch, result, result_stride); +} + +void VectorVectorCwiseProduct(const float* vector1, const float* vector2, + int v_size, float* result) { + NEON_OR_PORTABLE(VectorVectorCwiseProduct, vector1, vector2, v_size, result); +} + +void VectorVectorCwiseProductAccumulate(const float* vector1, + const float* vector2, int v_size, + float* result) { + NEON_OR_PORTABLE(VectorVectorCwiseProductAccumulate, vector1, vector2, v_size, + result); +} + +void VectorBatchVectorCwiseProductAccumulate(const float* vector, int v_size, + const float* batch_vector, + int n_batch, float* result) { + NEON_OR_PORTABLE(VectorBatchVectorCwiseProductAccumulate, vector, v_size, + batch_vector, n_batch, result); +} + +float VectorVectorDotProduct(const float* vector1, const float* vector2, + int v_size) { + return NEON_OR_PORTABLE(VectorVectorDotProduct, vector1, vector2, v_size); +} + +void BatchVectorBatchVectorDotProduct(const float* vector1, + const float* vector2, int v_size, + int n_batch, float* result, + int result_stride) { + NEON_OR_PORTABLE(BatchVectorBatchVectorDotProduct, vector1, vector2, v_size, + n_batch, result, result_stride); +} + +void VectorBatchVectorAssign(const float* vector, int v_size, int n_batch, + float* batch_vector) { + PortableVectorBatchVectorAssign(vector, v_size, n_batch, batch_vector); +} + +void ApplySigmoidToVector(const float* vector, int v_size, float* result) { + PortableApplySigmoidToVector(vector, v_size, result); +} + +void ApplyActivationToVector(const float* vector, int v_size, + TfLiteFusedActivation activation, float* result) { + PortableApplyActivationToVector(vector, v_size, activation, result); +} + +void CopyVector(const float* vector, int v_size, float* result) { + PortableCopyVector(vector, v_size, result); +} + +void Sub1Vector(const float* vector, int v_size, float* result) { + NEON_OR_PORTABLE(Sub1Vector, vector, v_size, result); +} + +void ZeroVector(float* vector, int v_size) { + PortableZeroVector(vector, v_size); +} + +float Clip(float f, float abs_limit) { return PortableClip(f, abs_limit); } + +void ClipVector(const float* vector, int v_size, float abs_limit, + float* result) { + NEON_OR_PORTABLE(ClipVector, vector, v_size, abs_limit, result); +} + +void VectorShiftLeft(float* vector, int v_size, float shift_value) { + NEON_OR_PORTABLE(VectorShiftLeft, vector, v_size, shift_value); +} + +void ReductionSumVector(const float* input_vector, float* output_vector, + int output_size, int reduction_size) { + NEON_OR_PORTABLE(ReductionSumVector, input_vector, output_vector, output_size, + reduction_size); +} + +} // namespace tensor_utils +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_TENSOR_UTILS_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h new file mode 100644 index 0000000000..cd565c16a1 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -0,0 +1,3715 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_OPS_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_OPS_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "third_party/eigen3/Eigen/Core" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "fixedpoint/fixedpoint.h" +#include "public/gemmlowp.h" +#include "tensorflow/contrib/lite/kernels/internal/common.h" +#include "tensorflow/contrib/lite/kernels/internal/round.h" +#include "tensorflow/contrib/lite/kernels/internal/types.h" + +namespace tflite { +namespace optimized_ops { + +// Make a local VectorMap typedef allowing to map a float array +// as a Eigen vector expression. The std::conditional here is to +// construct the suitable Eigen type for the constness of the +// data. Indeed, for const data, we need to produce +// Eigen::Map> +// and not the more straightforward +// Eigen::Map> +template +using VectorMap = typename std::conditional< + std::is_const::value, + Eigen::Map::type, + Eigen::Dynamic, 1>>, + Eigen::Map>>::type; + +template +VectorMap MapAsVector(Scalar* data, const Dims& dims) { + const int size = RequiredBufferSizeForDims(dims); + return VectorMap(data, size, 1); +} + +// Make a local VectorMap typedef allowing to map a float array +// as a Eigen matrix expression. The same explanation as for VectorMap +// above also applies here. +template +using MatrixMap = typename std::conditional< + std::is_const::value, + Eigen::Map::type, + Eigen::Dynamic, Eigen::Dynamic>>, + Eigen::Map>>::type; + +template +MatrixMap MapAsMatrixWithFirstDimAsRows(Scalar* data, + const Dims& dims) { + const int rows = dims.sizes[0]; + int cols = 1; + for (int d = 1; d < N; d++) { + cols *= dims.sizes[d]; + } + return MatrixMap(data, rows, cols); +} + +template +MatrixMap MapAsMatrixWithLastDimAsCols(Scalar* data, + const Dims& dims) { + const int cols = dims.sizes[N - 1]; + int rows = 1; + for (int d = 0; d < N - 1; d++) { + rows *= dims.sizes[d]; + } + return MatrixMap(data, rows, cols); +} + +template +using ArrayMap = typename std::conditional< + std::is_const::value, + Eigen::Map::type, + Eigen::Dynamic, Eigen::Dynamic>>, + Eigen::Map>>::type; + +template +ArrayMap MapAsArrayWithFirstDimAsRows(Scalar* data, + const Dims& dims) { + const int rows = dims.sizes[0]; + int cols = 1; + for (int d = 1; d < N; d++) { + cols *= dims.sizes[d]; + } + return ArrayMap(data, rows, cols); +} + +// TODO(b/62193649): this function is only needed as long +// as we have the --variable_batch hack. +template +MatrixMap MapAsMatrixWithGivenNumberOfRows(Scalar* data, + const Dims& dims, + int rows) { + int cols = 1; + bool matched_rows = false; + for (int d = 0; d < N; d++) { + cols *= dims.sizes[d]; + if (cols == rows) { + matched_rows = true; + cols = 1; + } + } + TFLITE_DCHECK(matched_rows); + return MatrixMap(data, rows, cols); +} + +// DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING ELEMENT-WISE +// BROADCASTING. +// +// NdArrayDesc describes the shape and memory layout of an N-dimensional +// rectangular array of numbers. +// +// NdArrayDesc is basically identical to Dims defined in types.h. +// However, as Dims is to be deprecated, this class exists as an adaptor +// to enable simple unoptimized implementations of element-wise broadcasting +// operations. +template +struct NdArrayDesc { + // The "extent" of each dimension. Indices along dimension d must be in the + // half-open interval [0, extents[d]). + int extents[N]; + + // The number of *elements* (not bytes) between consecutive indices of each + // dimension. + int strides[N]; +}; + +// DO NOT USE THIS FUNCTION FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING +// ELEMENT-WISE BROADCASTING. +// +// Same as Offset(), except takes as NdArrayDesc instead of Dims. +inline int SubscriptToIndex(const NdArrayDesc<4>& desc, int i0, int i1, int i2, + int i3) { + TFLITE_DCHECK(i0 >= 0 && i0 < desc.extents[0]); + TFLITE_DCHECK(i1 >= 0 && i1 < desc.extents[1]); + TFLITE_DCHECK(i2 >= 0 && i2 < desc.extents[2]); + TFLITE_DCHECK(i3 >= 0 && i3 < desc.extents[3]); + return i0 * desc.strides[0] + i1 * desc.strides[1] + i2 * desc.strides[2] + + i3 * desc.strides[3]; +} + +// Given the dimensions of the operands for an element-wise binary broadcast, +// adjusts them so that they can be directly iterated over with simple loops. +// Returns the adjusted dims as instances of NdArrayDesc in 'desc0_out' and +// 'desc1_out'. 'desc0_out' and 'desc1_out' cannot be nullptr. +// +// This function assumes that the two input shapes are compatible up to +// broadcasting and the shorter one has already been prepended with 1s to be the +// same length. E.g., if shape0 is (1, 16, 16, 64) and shape1 is (1, 64), +// shape1 must already have been prepended to be (1, 1, 1, 64). Recall that +// Dims refer to shapes in reverse order. In this case, input0_dims will be +// (64, 16, 16, 1) and input1_dims will be (64, 1, 1, 1). +// +// When two shapes are compatible up to broadcasting, for each dimension d, +// the input extents are either equal, or one of them is 1. +// +// This function performs the following for each dimension d: +// - If the extents are equal, then do nothing since the loop that walks over +// both of the input arrays is correct. +// - Otherwise, one (and only one) of the extents must be 1. Say extent0 is 1 +// and extent1 is e1. Then set extent0 to e1 and stride0 *to 0*. This allows +// array0 to be referenced *at any index* in dimension d and still access the +// same slice. +template +inline void NdArrayDescsForElementwiseBroadcast(const Dims& input0_dims, + const Dims& input1_dims, + NdArrayDesc* desc0_out, + NdArrayDesc* desc1_out) { + TFLITE_DCHECK(desc0_out != nullptr); + TFLITE_DCHECK(desc1_out != nullptr); + + // Copy dims to desc. + for (int i = 0; i < N; ++i) { + desc0_out->extents[i] = input0_dims.sizes[i]; + desc0_out->strides[i] = input0_dims.strides[i]; + desc1_out->extents[i] = input1_dims.sizes[i]; + desc1_out->strides[i] = input1_dims.strides[i]; + } + + // Walk over each dimension. If the extents are equal do nothing. + // Otherwise, set the desc with extent 1 to have extent equal to the other and + // stride 0. + for (int i = 0; i < N; ++i) { + const int extent0 = ArraySize(input0_dims, i); + const int extent1 = ArraySize(input1_dims, i); + if (extent0 != extent1) { + if (extent0 == 1) { + desc0_out->strides[i] = 0; + desc0_out->extents[i] = extent1; + } else { + TFLITE_DCHECK_EQ(extent1, 1); + desc1_out->strides[i] = 0; + desc1_out->extents[i] = extent0; + } + } + } +} + +inline bool AreSameDims(const Dims<4>& dims1, const Dims<4>& dims2) { + for (int i = 0; i < 4; i++) { + if (dims1.sizes[i] != dims2.sizes[i]) { + return false; + } + } + return true; +} + +inline void AddBiasAndEvalActivationFunction(const float* bias_data, + const Dims<4>& bias_dims, + float* array_data, + const Dims<4>& array_dims, + float output_activation_min, + float output_activation_max) { +#ifdef USE_NEON + gemmlowp::ScopedProfilingLabel label("AddBiasAndEvalActivationFunction"); + const int bias_size = bias_dims.sizes[3] * bias_dims.strides[3]; + const int array_size = array_dims.sizes[3] * array_dims.strides[3]; + TFLITE_DCHECK_EQ((array_size % bias_size), 0); + float* array_ptr = array_data; + float* array_end_ptr = array_ptr + array_size; + const auto activation_min = vdupq_n_f32(output_activation_min); + const auto activation_max = vdupq_n_f32(output_activation_max); + for (; array_ptr != array_end_ptr; array_ptr += bias_size) { + int i = 0; + for (; i <= bias_size - 16; i += 16) { + auto b0 = vld1q_f32(bias_data + i); + auto b1 = vld1q_f32(bias_data + i + 4); + auto b2 = vld1q_f32(bias_data + i + 8); + auto b3 = vld1q_f32(bias_data + i + 12); + auto a0 = vld1q_f32(array_ptr + i); + auto a1 = vld1q_f32(array_ptr + i + 4); + auto a2 = vld1q_f32(array_ptr + i + 8); + auto a3 = vld1q_f32(array_ptr + i + 12); + auto x0 = vaddq_f32(a0, b0); + auto x1 = vaddq_f32(a1, b1); + auto x2 = vaddq_f32(a2, b2); + auto x3 = vaddq_f32(a3, b3); + x0 = vmaxq_f32(activation_min, x0); + x1 = vmaxq_f32(activation_min, x1); + x2 = vmaxq_f32(activation_min, x2); + x3 = vmaxq_f32(activation_min, x3); + x0 = vminq_f32(activation_max, x0); + x1 = vminq_f32(activation_max, x1); + x2 = vminq_f32(activation_max, x2); + x3 = vminq_f32(activation_max, x3); + vst1q_f32(array_ptr + i, x0); + vst1q_f32(array_ptr + i + 4, x1); + vst1q_f32(array_ptr + i + 8, x2); + vst1q_f32(array_ptr + i + 12, x3); + } + for (; i <= bias_size - 4; i += 4) { + auto b = vld1q_f32(bias_data + i); + auto a = vld1q_f32(array_ptr + i); + auto x = vaddq_f32(a, b); + x = vmaxq_f32(activation_min, x); + x = vminq_f32(activation_max, x); + vst1q_f32(array_ptr + i, x); + } + for (; i < bias_size; i++) { + array_ptr[i] = ActivationFunctionWithMinMax(array_ptr[i] + bias_data[i], + output_activation_min, + output_activation_max); + } + } +#else // not NEON + gemmlowp::ScopedProfilingLabel label("AddBiasAndEvalActivationFunction"); + const int bias_size = bias_dims.sizes[3] * bias_dims.strides[3]; + const int array_size = array_dims.sizes[3] * array_dims.strides[3]; + TFLITE_DCHECK_EQ((array_size % bias_size), 0); + for (int array_offset = 0; array_offset < array_size; + array_offset += bias_size) { + for (int i = 0; i < bias_size; i++) { + array_data[array_offset + i] = ActivationFunctionWithMinMax( + array_data[array_offset + i] + bias_data[i], output_activation_min, + output_activation_max); + } + } +#endif +} + +// legacy, for compatibility with old checked-in code +template +void AddBiasAndEvalActivationFunction(const float* bias_data, + const Dims<4>& bias_dims, + float* array_data, + const Dims<4>& array_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + AddBiasAndEvalActivationFunction(bias_data, bias_dims, array_data, array_dims, + output_activation_min, + output_activation_max); +} + +template +void Gemm(const Eigen::MatrixBase& lhs, const Eigen::MatrixBase& rhs, + Eigen::MatrixBase* result) { + if (rhs.cols() == 1) { + gemmlowp::ScopedProfilingLabel label("GEMV"); + result->col(0).noalias() = lhs * rhs.col(0); + } else { + gemmlowp::ScopedProfilingLabel label("GEMM"); + result->noalias() = lhs * rhs; + } +} + +inline void FullyConnected(const float* input_data, const Dims<4>& input_dims, + const float* weights_data, + const Dims<4>& weights_dims, const float* bias_data, + const Dims<4>& bias_dims, + float output_activation_min, + float output_activation_max, float* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("FullyConnected"); + // TODO(b/62193649): this convoluted shape computation (determining + // input_rows from the weights_dims, then MapAsMatrixWithGivenNumberOfRows) + // is because the current --variable_batch hack consists in overwriting the + // 3rd dimension with the runtime batch size, as we don't keep track for each + // array of which dimension is the batch dimension in it. + // When that is fixed, this should become: + // const auto input_matrix_map = + // MapAsMatrixWithFirstDimAsRows(input_data, input_dims); + const int input_rows = ArraySize(weights_dims, 0); + const auto input_matrix_map = + MapAsMatrixWithGivenNumberOfRows(input_data, input_dims, input_rows); + const auto filter_matrix_map = + MapAsMatrixWithFirstDimAsRows(weights_data, weights_dims); + auto output_matrix_map = + MapAsMatrixWithFirstDimAsRows(output_data, output_dims); + + Gemm(filter_matrix_map.transpose(), input_matrix_map, &output_matrix_map); + AddBiasAndEvalActivationFunction(bias_data, bias_dims, output_data, + output_dims, output_activation_min, + output_activation_max); +} + +// legacy, for compatibility with old checked-in code +template +void FullyConnected(const float* input_data, const Dims<4>& input_dims, + const float* weights_data, const Dims<4>& weights_dims, + const float* bias_data, const Dims<4>& bias_dims, + float* output_data, const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + FullyConnected(input_data, input_dims, weights_data, weights_dims, bias_data, + bias_dims, output_activation_min, output_activation_max, + output_data, output_dims); +} + +inline void preload_l1_stream(const uint8* ptr) { +#ifdef GEMMLOWP_ARM_64 + asm volatile("prfm pldl1strm, [%[ptr]]\n" ::[ptr] "r"(ptr) :); +#else + gemmlowp::Prefetch(ptr); +#endif +} + +#ifdef USE_NEON +inline void FullyConnectedAsGEMV( + const uint8* input_data, const Dims<4>& input_dims, int32 input_offset, + const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, int32 output_offset, + int32 output_multiplier, int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("FullyConnectedAsGEMV/8bit"); + TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(bias_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(output_dims)); + TFLITE_DCHECK_EQ(ArraySize(output_dims, 1) * ArraySize(output_dims, 2) * + ArraySize(output_dims, 3), + 1); + const int input_size = input_dims.strides[3]; + const int output_size = MatchingArraySize(filter_dims, 1, output_dims, 0); + static constexpr int kPeel = 4; + for (int k = 0; k < input_size; k += 64) { + preload_l1_stream(input_data + k); + } + for (int k = 0; k < kPeel * input_size; k += 64) { + preload_l1_stream(filter_data + k); + } + TFLITE_DCHECK(!(output_size % kPeel)); + const int32* bias_ptr = bias_data; + uint8* output_ptr = output_data; + for (int out = 0; out < output_size; out += kPeel) { + int32x4_t acc[kPeel]; + for (int k = 0; k < kPeel; k++) { + acc[k] = vdupq_n_s32(0); + } + const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); + const int16x8_t filter_offset_vec = vdupq_n_s16(filter_offset); + int in = 0; + for (; in <= input_size - 16; in += 16) { + const uint8x16_t input_val_u8 = vld1q_u8(input_data + in); + uint8x16_t filter_val_u8[kPeel]; + for (int k = 0; k < kPeel; k++) { + const uint8* filter_ptr = filter_data + in + (out + k) * input_size; + filter_val_u8[k] = vld1q_u8(filter_ptr); + preload_l1_stream(filter_ptr + 64); + } + int16x8_t input_val[2]; + const uint8x8_t low = vget_low_u8(input_val_u8); + const uint8x8_t high = vget_high_u8(input_val_u8); + input_val[0] = vreinterpretq_s16_u16(vmovl_u8(low)); + input_val[1] = vreinterpretq_s16_u16(vmovl_u8(high)); + input_val[0] = vaddq_s16(input_val[0], input_offset_vec); + input_val[1] = vaddq_s16(input_val[1], input_offset_vec); + int16x8_t filter_val[kPeel][2]; + for (int k = 0; k < kPeel; k++) { + const uint8x8_t low = vget_low_u8(filter_val_u8[k]); + const uint8x8_t high = vget_high_u8(filter_val_u8[k]); + filter_val[k][0] = vreinterpretq_s16_u16(vmovl_u8(low)); + filter_val[k][1] = vreinterpretq_s16_u16(vmovl_u8(high)); + filter_val[k][0] = vaddq_s16(filter_val[k][0], filter_offset_vec); + filter_val[k][1] = vaddq_s16(filter_val[k][1], filter_offset_vec); + } + for (int p = 0; p < 2; p++) { + for (int k = 0; k < kPeel; k++) { + acc[k] = vmlal_s16(acc[k], vget_low_s16(filter_val[k][p]), + vget_low_s16(input_val[p])); + } + for (int k = 0; k < kPeel; k++) { + acc[k] = vmlal_s16(acc[k], vget_high_s16(filter_val[k][p]), + vget_high_s16(input_val[p])); + } + } + } + for (; in <= input_size - 8; in += 8) { + const uint8x8_t input_val_u8 = vld1_u8(input_data + in); + uint8x8_t filter_val_u8[kPeel]; + for (int k = 0; k < kPeel; k++) { + const uint8* filter_ptr = filter_data + in + (out + k) * input_size; + filter_val_u8[k] = vld1_u8(filter_ptr); + } + int16x8_t input_val; + input_val = vreinterpretq_s16_u16(vmovl_u8(input_val_u8)); + input_val = vaddq_s16(input_val, input_offset_vec); + int16x8_t filter_val[kPeel]; + for (int k = 0; k < kPeel; k++) { + filter_val[k] = vreinterpretq_s16_u16(vmovl_u8(filter_val_u8[k])); + filter_val[k] = vaddq_s16(filter_val[k], filter_offset_vec); + } + for (int k = 0; k < kPeel; k++) { + acc[k] = vmlal_s16(acc[k], vget_low_s16(filter_val[k]), + vget_low_s16(input_val)); + } + for (int k = 0; k < kPeel; k++) { + acc[k] = vmlal_s16(acc[k], vget_high_s16(filter_val[k]), + vget_high_s16(input_val)); + } + } + if (in < input_size) { + int32 buf[4 * kPeel]; + for (int k = 0; k < 4; k++) { + vst1q_s32(buf + 4 * k, acc[k]); + } + for (; in < input_size; in++) { + int lane = (in + 8 - input_size) % 4; + const int32 input_val = input_data[in] + input_offset; + for (int k = 0; k < kPeel; k++) { + int32 filter_val = + filter_data[in + (out + k) * input_size] + filter_offset; + buf[lane + 4 * k] += filter_val * input_val; + } + } + for (int k = 0; k < 4; k++) { + acc[k] = vld1q_s32(buf + 4 * k); + } + } + + // Horizontally reduce accumulators + int32x2_t pairwise_reduced_acc[kPeel]; + for (int k = 0; k < kPeel; k++) { + pairwise_reduced_acc[k] = + vpadd_s32(vget_low_s32(acc[k]), vget_high_s32(acc[k])); + } + static_assert(kPeel == 4, "the code below currently assumes kPeel = 4"); + const int32x2_t reduced_lo = + vpadd_s32(pairwise_reduced_acc[0], pairwise_reduced_acc[1]); + const int32x2_t reduced_hi = + vpadd_s32(pairwise_reduced_acc[2], pairwise_reduced_acc[3]); + int32x4_t reduced = vcombine_s32(reduced_lo, reduced_hi); + // Add bias values. + int32x4_t bias_vec = vld1q_s32(bias_ptr); + bias_ptr += 4; + reduced = vaddq_s32(reduced, bias_vec); + // Multiply by the fixed-point multiplier. + reduced = vqrdmulhq_n_s32(reduced, output_multiplier); + // Rounding-shift-right. + using gemmlowp::RoundingDivideByPOT; + reduced = RoundingDivideByPOT(reduced, output_shift); + // Add the output offset. + const int32x4_t output_offset_vec = vdupq_n_s32(output_offset); + reduced = vaddq_s32(reduced, output_offset_vec); + // Narrow values down to 16 bit signed. + const int16x4_t res16 = vqmovn_s32(reduced); + // Narrow values down to 8 bit unsigned, saturating. + uint8x8_t res8 = vqmovun_s16(vcombine_s16(res16, res16)); + // Apply the clamping from the activation function + res8 = vmax_u8(res8, vdup_n_u8(output_activation_min)); + res8 = vmin_u8(res8, vdup_n_u8(output_activation_max)); + // Store results to destination. Assumes 32bit alignment. + vst1_lane_u32(reinterpret_cast(output_ptr), + vreinterpret_u32_u8(res8), 0); + output_ptr += kPeel; + } +} +#endif // USE_NEON + +struct GemmlowpOutputPipeline { + typedef gemmlowp::VectorMap + ColVectorMap; + typedef std::tuple< + gemmlowp::OutputStageBiasAddition, + gemmlowp::OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint, + gemmlowp::OutputStageClamp, gemmlowp::OutputStageSaturatingCastToUint8> + Pipeline; + static Pipeline Make(const int32* bias_data, int output_rows, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max) { + ColVectorMap bias_vector(bias_data, output_rows); + gemmlowp::OutputStageBiasAddition bias_addition_stage; + bias_addition_stage.bias_vector = bias_vector; + gemmlowp::OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint + quantize_down_stage; + quantize_down_stage.result_offset_after_shift = output_offset; + quantize_down_stage.result_fixedpoint_multiplier = output_multiplier; + quantize_down_stage.result_shift = output_shift; + gemmlowp::OutputStageClamp clamp_stage; + clamp_stage.min = output_activation_min; + clamp_stage.max = output_activation_max; + gemmlowp::OutputStageSaturatingCastToUint8 saturating_cast_stage; + return std::make_tuple(bias_addition_stage, quantize_down_stage, + clamp_stage, saturating_cast_stage); + } +}; + +inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { + gemmlowp::ScopedProfilingLabel label("FullyConnected/8bit"); + // TODO(benoitjacob): This really should be: + // const int batches = ArraySize(output_dims, 1); + // but the current --variable_batch hack consists in overwriting the 3rd + // dimension with the runtime batch size, as we don't keep track for each + // array of which dimension is the batch dimension in it. + const int batches = ArraySize(output_dims, 1) * ArraySize(output_dims, 2) * + ArraySize(output_dims, 3); +#ifdef USE_NEON + const int output_size = MatchingArraySize(filter_dims, 1, output_dims, 0); + if (batches == 1 && !(output_size % 4)) { + return FullyConnectedAsGEMV( + input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, output_data, + output_dims); + } +#endif // USE_NEON + const int filter_rows = filter_dims.sizes[1]; + const int filter_cols = filter_dims.sizes[0]; + TFLITE_DCHECK_EQ(filter_dims.sizes[2], 1); + TFLITE_DCHECK_EQ(filter_dims.sizes[3], 1); + const int output_rows = output_dims.sizes[0]; + TFLITE_DCHECK_EQ(output_rows, filter_rows); + TFLITE_DCHECK_EQ(bias_dims.sizes[0], output_rows); + TFLITE_DCHECK_EQ(bias_dims.sizes[1], 1); + TFLITE_DCHECK_EQ(bias_dims.sizes[2], 1); + TFLITE_DCHECK_EQ(bias_dims.sizes[3], 1); + + gemmlowp::MatrixMap filter_matrix( + filter_data, output_rows, filter_cols, filter_cols); + gemmlowp::MatrixMap input_matrix( + input_data, filter_cols, batches, filter_cols); + gemmlowp::MatrixMap output_matrix( + output_data, output_rows, batches, output_rows); + const auto& output_pipeline = GemmlowpOutputPipeline::Make( + bias_data, output_rows, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max); + gemmlowp::GemmWithOutputPipeline( + gemm_context, filter_matrix, input_matrix, &output_matrix, filter_offset, + input_offset, output_pipeline); +} + +// legacy, for compatibility with old checked-in code +template +void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + FullyConnected(input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_data, output_dims, gemm_context); +} + +template +inline void ExtractPatchIntoBufferColumn( + const Dims<4>& input_dims, int w, int h, int b, int kheight, int kwidth, + int stride_width, int stride_height, int pad_width, int pad_height, + int in_width, int in_height, int in_depth, int single_buffer_length, + int buffer_id, const T* in_data, T* conv_buffer_data, uint8 byte_zero) { + gemmlowp::ScopedProfilingLabel label("ExtractPatchIntoBufferColumn"); + // This chunk of code reshapes all the inputs corresponding to + // output (b, h, w) to a column vector in conv_buffer(:, buffer_id). + const int kwidth_times_indepth = kwidth * in_depth; + const int inwidth_times_indepth = in_width * in_depth; + const int ih_ungated_start = h * stride_height - pad_height; + const int ih_ungated_end = (ih_ungated_start + kheight); + const int ih_end = std::min(ih_ungated_end, in_height); + const int iw_ungated_start = w * stride_width - pad_width; + const int iw_ungated_end = (iw_ungated_start + kwidth); + const int iw_end = std::min(iw_ungated_end, in_width); + // If the patch is off the edge of the input image, skip writing those rows + // and columns from the patch into the output array. + const int h_offset = std::max(0, -ih_ungated_start); + const int w_offset = std::max(0, -iw_ungated_start); + const int ih_start = std::max(0, ih_ungated_start); + const int iw_start = std::max(0, iw_ungated_start); + const int single_row_num = + std::min(kwidth - w_offset, in_width - iw_start) * in_depth; + const int output_row_offset = (buffer_id * single_buffer_length); + int out_offset = + output_row_offset + (h_offset * kwidth + w_offset) * in_depth; + int in_offset = Offset(input_dims, 0, iw_start, ih_start, b); + + // Express all of the calculations as padding around the input patch. + const int top_padding = h_offset; + const int bottom_padding = (ih_ungated_end - ih_end); + const int left_padding = w_offset; + const int right_padding = (iw_ungated_end - iw_end); + assert(single_row_num == + ((kwidth - (left_padding + right_padding)) * in_depth)); + + // Write out zeroes to the elements representing the top rows of the input + // patch that are off the edge of the input image. + if (top_padding > 0) { + const int top_row_elements = (top_padding * kwidth * in_depth); + memset(conv_buffer_data + output_row_offset, byte_zero, + (top_row_elements * sizeof(T))); + } + + // If the patch is on the interior of the input image horizontally, just copy + // over the rows sequentially, otherwise add zero padding at the start or end. + if ((left_padding == 0) && (right_padding == 0)) { + for (int ih = ih_start; ih < ih_end; ++ih) { + memcpy(conv_buffer_data + out_offset, in_data + in_offset, + single_row_num * sizeof(T)); + out_offset += kwidth_times_indepth; + in_offset += inwidth_times_indepth; + } + } else { + for (int ih = ih_start; ih < ih_end; ++ih) { + if (left_padding > 0) { + const int left_start = (out_offset - (left_padding * in_depth)); + memset(conv_buffer_data + left_start, byte_zero, + (left_padding * in_depth * sizeof(T))); + } + memcpy(conv_buffer_data + out_offset, in_data + in_offset, + single_row_num * sizeof(T)); + if (right_padding > 0) { + const int right_start = (out_offset + single_row_num); + memset(conv_buffer_data + right_start, byte_zero, + (right_padding * in_depth * sizeof(T))); + } + out_offset += kwidth_times_indepth; + in_offset += inwidth_times_indepth; + } + } + + // If the bottom of the patch falls off the input image, pad the values + // representing those input rows with zeroes. + if (bottom_padding > 0) { + const int bottom_row_elements = (bottom_padding * kwidth * in_depth); + const int bottom_start = + output_row_offset + + ((top_padding + (ih_end - ih_start)) * kwidth * in_depth); + memset(conv_buffer_data + bottom_start, byte_zero, + (bottom_row_elements * sizeof(T))); + } +} + +template +void Im2col(const T* input_data, const Dims<4>& input_dims, int stride_width, + int stride_height, int pad_width, int pad_height, int kheight, + int kwidth, uint8 byte_zero, T* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Im2col"); + TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(output_dims)); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int input_depth = ArraySize(input_dims, 0); + const int input_width = ArraySize(input_dims, 1); + const int input_height = ArraySize(input_dims, 2); + const int output_depth = ArraySize(output_dims, 0); + const int output_width = ArraySize(output_dims, 1); + const int output_height = ArraySize(output_dims, 2); + + int buffer_id = 0; + // Loop over the output nodes. + for (int b = 0; b < batches; ++b) { + for (int h = 0; h < output_height; ++h) { + for (int w = 0; w < output_width; ++w) { + ExtractPatchIntoBufferColumn( + input_dims, w, h, b, kheight, kwidth, stride_width, stride_height, + pad_width, pad_height, input_width, input_height, input_depth, + output_depth, buffer_id, input_data, output_data, byte_zero); + ++buffer_id; + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void Im2col(const T* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int kheight, int kwidth, + uint8 byte_zero, T* output_data, const Dims<4>& output_dims) { + Im2col(input_data, input_dims, stride, stride, pad_width, pad_height, kheight, + kwidth, byte_zero, output_data, output_dims); +} + +inline void Conv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, float output_activation_min, + float output_activation_max, float* output_data, + const Dims<4>& output_dims, float* im2col_data, + const Dims<4>& im2col_dims) { + (void)im2col_data; + (void)im2col_dims; + gemmlowp::ScopedProfilingLabel label("Conv"); + + const float* gemm_input_data = nullptr; + const Dims<4>* gemm_input_dims = nullptr; + const int filter_width = ArraySize(filter_dims, 1); + const int filter_height = ArraySize(filter_dims, 2); + const bool need_im2col = stride_width != 1 || stride_height != 1 || + filter_width != 1 || filter_height != 1; + if (need_im2col) { + TFLITE_DCHECK(im2col_data); + Im2col(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_height, filter_width, 0, im2col_data, + im2col_dims); + gemm_input_data = im2col_data; + gemm_input_dims = &im2col_dims; + } else { + // TODO(aselle): We need to make sure to not send im2col if it is not + // needed. + TFLITE_DCHECK(!im2col_data); + gemm_input_data = input_data; + gemm_input_dims = &input_dims; + } + + const auto im2col_matrix_map = + MapAsMatrixWithFirstDimAsRows(gemm_input_data, *gemm_input_dims); + const auto filter_matrix_map = + MapAsMatrixWithLastDimAsCols(filter_data, filter_dims); + auto output_matrix_map = + MapAsMatrixWithFirstDimAsRows(output_data, output_dims); + + Gemm(filter_matrix_map.transpose(), im2col_matrix_map, &output_matrix_map); + + AddBiasAndEvalActivationFunction(bias_data, bias_dims, output_data, + output_dims, output_activation_min, + output_activation_max); +} + +// legacy, for compatibility with old checked-in code +template +void Conv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, int stride_width, + int stride_height, int pad_width, int pad_height, float* output_data, + const Dims<4>& output_dims, float* im2col_data, + const Dims<4>& im2col_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + Conv(input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims, + stride_width, stride_height, pad_width, pad_height, + output_activation_min, output_activation_max, output_data, output_dims, + im2col_data, im2col_dims); +} + +// legacy, for compatibility with old checked-in code +template +void Conv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, float* output_data, + const Dims<4>& output_dims, float* im2col_data, + const Dims<4>& im2col_dims) { + Conv(input_data, input_dims, filter_data, filter_dims, bias_data, + bias_dims, stride, stride, pad_width, pad_height, output_data, + output_dims, im2col_data, im2col_dims); +} + +inline void Conv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims, uint8* im2col_data, + const Dims<4>& im2col_dims, + gemmlowp::GemmContext* gemm_context) { + gemmlowp::ScopedProfilingLabel label("Conv/8bit"); + + TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(output_dims)); + + const uint8* gemm_input_data = nullptr; + const Dims<4>* gemm_input_dims = nullptr; + const int filter_width = ArraySize(filter_dims, 1); + const int filter_height = ArraySize(filter_dims, 2); + const bool need_im2col = stride_width != 1 || stride_height != 1 || + filter_width != 1 || filter_height != 1; + if (need_im2col) { + TFLITE_DCHECK(im2col_data); + const int input_zero_point = -input_offset; + TFLITE_DCHECK_GE(input_zero_point, 0); + TFLITE_DCHECK_LE(input_zero_point, 255); + Im2col(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_height, filter_width, input_zero_point, + im2col_data, im2col_dims); + gemm_input_data = im2col_data; + gemm_input_dims = &im2col_dims; + } else { + TFLITE_DCHECK(!im2col_data); + gemm_input_data = input_data; + gemm_input_dims = &input_dims; + } + + const int gemm_input_rows = gemm_input_dims->sizes[0]; + const int gemm_input_cols = gemm_input_dims->sizes[1] * + gemm_input_dims->sizes[2] * + gemm_input_dims->sizes[3]; + const int filter_rows = filter_dims.sizes[3]; + const int filter_cols = + filter_dims.sizes[0] * filter_dims.sizes[1] * filter_dims.sizes[2]; + const int output_rows = output_dims.sizes[0]; + const int output_cols = + output_dims.sizes[1] * output_dims.sizes[2] * output_dims.sizes[3]; + TFLITE_DCHECK_EQ(output_rows, filter_rows); + TFLITE_DCHECK_EQ(output_cols, gemm_input_cols); + TFLITE_DCHECK_EQ(filter_cols, gemm_input_rows); + TFLITE_DCHECK_EQ(bias_dims.sizes[0], output_rows); + TFLITE_DCHECK_EQ(bias_dims.sizes[1], 1); + TFLITE_DCHECK_EQ(bias_dims.sizes[2], 1); + TFLITE_DCHECK_EQ(bias_dims.sizes[3], 1); + gemmlowp::MatrixMap filter_matrix( + filter_data, filter_rows, filter_cols); + gemmlowp::MatrixMap input_matrix( + gemm_input_data, gemm_input_rows, gemm_input_cols); + gemmlowp::MatrixMap output_matrix( + output_data, output_rows, output_cols); + const auto& output_pipeline = GemmlowpOutputPipeline::Make( + bias_data, output_rows, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max); + gemmlowp::GemmWithOutputPipeline( + gemm_context, filter_matrix, input_matrix, &output_matrix, filter_offset, + input_offset, output_pipeline); +} + +// legacy, for compatibility with old checked-in code +template +inline void Conv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims, uint8* im2col_data, + const Dims<4>& im2col_dims, + gemmlowp::GemmContext* gemm_context) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + Conv(input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride_width, stride_height, + pad_width, pad_height, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data, output_dims, + im2col_data, im2col_dims, gemm_context); +} + +// legacy, for compatibility with old checked-in code +template +void Conv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims, uint8* im2col_data, + const Dims<4>& im2col_dims, gemmlowp::GemmContext* gemm_context) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + Conv(input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride, stride, pad_width, + pad_height, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data, output_dims, + im2col_data, im2col_dims, gemm_context); +} + +template +inline void DepthToSpace(const T* input_data, const Dims<4>& input_dims, + int block_size, T* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("DepthToSpace"); + + const int input_depth = ArraySize(input_dims, 0); + const int input_width = ArraySize(input_dims, 1); + const int input_height = ArraySize(input_dims, 2); + + const int output_depth = ArraySize(output_dims, 0); + const int batch_size = ArraySize(output_dims, 3); + + // Number of continuous values that we can copy in one interation. + const int stride = block_size * output_depth; + + for (int batch = 0; batch < batch_size; ++batch) { + for (int in_h = 0; in_h < input_height; ++in_h) { + const T* input_ptr = input_data + Offset(input_dims, 0, 0, in_h, batch); + for (int offset_h = 0; offset_h < block_size; ++offset_h) { + const T* src = input_ptr; + for (int in_w = 0; in_w < input_width; ++in_w) { + memcpy(output_data, src, stride * sizeof(T)); + output_data += stride; + src += input_depth; + } + input_ptr += stride; + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void Im2col(const T* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int kheight, int kwidth, + uint8 byte_zero, T* output_data, const Dims<4>& output_dims) { + Im2col(input_data, input_dims, stride, stride, pad_width, pad_height, kheight, + kwidth, byte_zero, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void ConvAsGemm(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("ConvAsGemm"); + + const auto input_matrix_map = + MapAsMatrixWithFirstDimAsRows(input_data, input_dims); + const auto filter_matrix_map = + MapAsMatrixWithLastDimAsCols(filter_data, filter_dims); + auto output_matrix_map = + MapAsMatrixWithFirstDimAsRows(output_data, output_dims); + + Gemm(filter_matrix_map.transpose(), input_matrix_map, &output_matrix_map); + + AddBiasAndEvalActivationFunction(bias_data, bias_dims, output_data, + output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void ConvAsGemm(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int32 output_offset, int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { + gemmlowp::ScopedProfilingLabel label("ConvAsGemm/8bit"); + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + const int input_rows = input_dims.sizes[0]; + const int input_cols = + input_dims.sizes[1] * input_dims.sizes[2] * input_dims.sizes[3]; + const int filter_rows = filter_dims.sizes[3]; + const int filter_cols = + filter_dims.sizes[0] * filter_dims.sizes[1] * filter_dims.sizes[2]; + const int output_rows = output_dims.sizes[0]; + const int output_cols = + output_dims.sizes[1] * output_dims.sizes[2] * output_dims.sizes[3]; + TFLITE_DCHECK_EQ(output_rows, filter_rows); + TFLITE_DCHECK_EQ(output_cols, input_cols); + TFLITE_DCHECK_EQ(filter_cols, input_rows); + TFLITE_DCHECK_EQ(bias_dims.sizes[0], output_rows); + TFLITE_DCHECK_EQ(bias_dims.sizes[1], 1); + TFLITE_DCHECK_EQ(bias_dims.sizes[2], 1); + TFLITE_DCHECK_EQ(bias_dims.sizes[3], 1); + gemmlowp::MatrixMap filter_matrix( + filter_data, output_rows, filter_cols, filter_cols); + gemmlowp::MatrixMap input_matrix( + input_data, filter_cols, output_cols, filter_cols); + gemmlowp::MatrixMap output_matrix( + output_data, output_rows, output_cols, output_rows); + const auto& output_pipeline = GemmlowpOutputPipeline::Make( + bias_data, output_rows, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max); + gemmlowp::GemmWithOutputPipeline( + gemm_context, filter_matrix, input_matrix, &output_matrix, filter_offset, + input_offset, output_pipeline); +} + +template +inline void SpaceToDepth(const T* input_data, const Dims<4>& input_dims, + int block_size, T* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("SpaceToDepth"); + + const int output_depth = ArraySize(output_dims, 0); + const int output_width = ArraySize(output_dims, 1); + const int output_height = ArraySize(output_dims, 2); + + const int input_depth = ArraySize(input_dims, 0); + const int batch_size = ArraySize(input_dims, 3); + + // Number of continuous values that we can copy in one interation. + const int stride = block_size * input_depth; + + for (int batch = 0; batch < batch_size; ++batch) { + for (int out_h = 0; out_h < output_height; ++out_h) { + T* output_ptr = output_data + Offset(output_dims, 0, 0, out_h, batch); + for (int offset_h = 0; offset_h < block_size; ++offset_h) { + T* dst = output_ptr; + for (int out_w = 0; out_w < output_width; ++out_w) { + memcpy(dst, input_data, stride * sizeof(T)); + input_data += stride; + dst += output_depth; + } + output_ptr += stride; + } + } + } +} + +template +void NonGlobalBatchNormalization( + const float* input_data, const Dims<4>& input_dims, const float* mean_data, + const Dims<4>& mean_dims, const float* multiplier_data, + const Dims<4>& multiplier_dims, const float* offset_data, + const Dims<4>& offset_dims, float* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("NonGlobalBatchNormalization"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = + MatchingArraySize(input_dims, 2, mean_dims, 2, multiplier_dims, 2, + offset_dims, 2, output_dims, 2); + const int width = + MatchingArraySize(input_dims, 1, mean_dims, 1, multiplier_dims, 1, + offset_dims, 1, output_dims, 1); + const int depth = + MatchingArraySize(input_dims, 0, mean_dims, 0, multiplier_dims, 0, + offset_dims, 0, output_dims, 0); + + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + output_data[Offset(output_dims, c, x, y, b)] = ActivationFunction( + (input_data[Offset(input_dims, c, x, y, b)] - + mean_data[Offset(mean_dims, c, x, y, 0)]) * + multiplier_data[Offset(multiplier_dims, c, x, y, 0)] + + offset_data[Offset(offset_dims, c, x, y, 0)]); + } + } + } + } +} + +template +void GlobalBatchNormalization(const float* input_data, + const Dims<4>& input_dims, const float* mean_data, + const Dims<4>& mean_dims, + const float* multiplier_data, + const Dims<4>& multiplier_dims, + const float* offset_data, + const Dims<4>& offset_dims, float* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("GlobalBatchNormalization"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = + MatchingArraySize(input_dims, 0, mean_dims, 0, multiplier_dims, 0, + offset_dims, 0, output_dims, 0); + + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + output_data[Offset(output_dims, c, x, y, b)] = ActivationFunction( + (input_data[Offset(input_dims, c, x, y, b)] - + mean_data[Offset(mean_dims, c, 0, 0, 0)]) * + multiplier_data[Offset(multiplier_dims, c, 0, 0, 0)] + + offset_data[Offset(offset_dims, c, 0, 0, 0)]); + } + } + } + } +} + +inline void Relu(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Relu (not fused)"); + + const auto input = MapAsVector(input_data, input_dims); + auto output = MapAsVector(output_data, output_dims); + output = input.cwiseMax(0.0f); +} + +inline void Relu1(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Relu1 (not fused)"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + float val = input_data[Offset(input_dims, c, x, y, b)]; + const float upper = 1; + const float lower = -1; + float clamped = val > upper ? upper : val < lower ? lower : val; + output_data[Offset(output_dims, c, x, y, b)] = clamped; + } + } + } + } +} + +inline void Relu6(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Relu6 (not fused)"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + float val = input_data[Offset(input_dims, c, x, y, b)]; + const float upper = 6; + const float lower = 0; + float clamped = val > upper ? upper : val < lower ? lower : val; + output_data[Offset(output_dims, c, x, y, b)] = clamped; + } + } + } + } +} + +template +void L2Normalization(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("L2Normalization"); + static_assert(Ac == FusedActivationFunctionType::kNone, ""); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + float squared_l2_norm = 0; + for (int c = 0; c < depth; ++c) { + float val = input_data[Offset(input_dims, c, x, y, b)]; + squared_l2_norm += val * val; + } + float inverse_l2_norm = 1.0f / std::sqrt(squared_l2_norm); + for (int c = 0; c < depth; ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + input_data[Offset(input_dims, c, x, y, b)] * inverse_l2_norm; + } + } + } + } +} + +inline void GetInvSqrtQuantizedMultiplier(int32 input, int32* output_inv_sqrt, + int* output_shift) { + *output_shift = 11; + while (input >= (1 << 29)) { + input /= 4; + ++*output_shift; + } + TFLITE_DCHECK_GT(input, 0); + const unsigned max_left_shift_bits = __builtin_clz(input) - 1; + const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2; + const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1; + *output_shift -= left_shift_bit_pairs; + input <<= 2 * left_shift_bit_pairs; + TFLITE_DCHECK_GE(input, (1 << 27)); + TFLITE_DCHECK_LT(input, (1 << 29)); + using gemmlowp::FixedPoint; + using gemmlowp::Rescale; + using gemmlowp::SaturatingRoundingMultiplyByPOT; + // Using 3 integer bits gives us enough room for the internal arithmetic in + // this Newton-Raphson iteration. + using F3 = FixedPoint; + using F0 = FixedPoint; + const F3 fixedpoint_input = F3::FromRaw(input >> 1); + const F3 fixedpoint_half_input = + SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input); + const F3 fixedpoint_half_three = + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5); + // Newton-Raphson iteration + // Naive unoptimized starting guess: x = 1 + F3 x = F3::One(); + // Naive unoptimized number of iterations: 5 + for (int i = 0; i < 5; i++) { + const F3 x3 = Rescale<3>(x * x * x); + x = Rescale<3>(fixedpoint_half_three * x - fixedpoint_half_input * x3); + } + const F0 fixedpoint_half_sqrt_2 = + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.); + x = x * fixedpoint_half_sqrt_2; + *output_inv_sqrt = x.raw(); + if (*output_shift < 0) { + *output_inv_sqrt <<= -*output_shift; + *output_shift = 0; + } +} + +inline void L2Normalization(const uint8* input_data, const Dims<4>& input_dims, + int32 input_zero_point, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("L2Normalization/8bit"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(output_dims)); + TFLITE_DCHECK_EQ(batches, 1); + TFLITE_DCHECK_EQ(height, 1); + TFLITE_DCHECK_EQ(width, 1); + int32 square_l2_norm = 0; + for (int i = 0; i < depth; i++) { + int32 diff = input_data[i] - input_zero_point; + square_l2_norm += diff * diff; + } + int32 inv_l2norm_multiplier; + int inv_l2norm_shift; + GetInvSqrtQuantizedMultiplier(square_l2_norm, &inv_l2norm_multiplier, + &inv_l2norm_shift); + + for (int i = 0; i < depth; i++) { + int32 diff = input_data[i] - input_zero_point; + int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOne( + 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift); + int32 unclamped_output_val = 128 + rescaled_diff; + int32 output_val = std::min(255, std::max(0, unclamped_output_val)); + output_data[i] = static_cast(output_val); + } +} + +inline void Add(const float* input1_data, const Dims<4>& input1_dims, + const float* input2_data, const Dims<4>& input2_dims, + float output_activation_min, float output_activation_max, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Add"); + /* const int batches = */ MatchingArraySize(input1_dims, 3, input2_dims, 3, + output_dims, 3); + /* const int height = */ MatchingArraySize(input1_dims, 2, input2_dims, 2, + output_dims, 2); + /* const int width = */ MatchingArraySize(input1_dims, 1, input2_dims, 1, + output_dims, 1); + /* const int depth = */ MatchingArraySize(input1_dims, 0, input2_dims, 0, + output_dims, 0); + TFLITE_DCHECK(IsPackedWithoutStrides(input1_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(input2_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(output_dims)); + + int i = 0; + const int size = input1_dims.sizes[3] * input1_dims.strides[3]; +#ifdef USE_NEON + const auto activation_min = vdupq_n_f32(output_activation_min); + const auto activation_max = vdupq_n_f32(output_activation_max); + for (; i <= size - 16; i += 16) { + auto a10 = vld1q_f32(input1_data + i); + auto a11 = vld1q_f32(input1_data + i + 4); + auto a12 = vld1q_f32(input1_data + i + 8); + auto a13 = vld1q_f32(input1_data + i + 12); + auto a20 = vld1q_f32(input2_data + i); + auto a21 = vld1q_f32(input2_data + i + 4); + auto a22 = vld1q_f32(input2_data + i + 8); + auto a23 = vld1q_f32(input2_data + i + 12); + auto x0 = vaddq_f32(a10, a20); + auto x1 = vaddq_f32(a11, a21); + auto x2 = vaddq_f32(a12, a22); + auto x3 = vaddq_f32(a13, a23); + x0 = vmaxq_f32(activation_min, x0); + x1 = vmaxq_f32(activation_min, x1); + x2 = vmaxq_f32(activation_min, x2); + x3 = vmaxq_f32(activation_min, x3); + x0 = vminq_f32(activation_max, x0); + x1 = vminq_f32(activation_max, x1); + x2 = vminq_f32(activation_max, x2); + x3 = vminq_f32(activation_max, x3); + vst1q_f32(output_data + i, x0); + vst1q_f32(output_data + i + 4, x1); + vst1q_f32(output_data + i + 8, x2); + vst1q_f32(output_data + i + 12, x3); + } + for (; i <= size - 4; i += 4) { + auto a1 = vld1q_f32(input1_data + i); + auto a2 = vld1q_f32(input2_data + i); + auto x = vaddq_f32(a1, a2); + x = vmaxq_f32(activation_min, x); + x = vminq_f32(activation_max, x); + vst1q_f32(output_data + i, x); + } +#endif // NEON + + for (; i < size; i++) { + auto x = input1_data[i] + input2_data[i]; + output_data[i] = ActivationFunctionWithMinMax(x, output_activation_min, + output_activation_max); + } +} + +// legacy, for compatibility with old checked-in code +template +void Add(const float* input1_data, const Dims<4>& input1_dims, + const float* input2_data, const Dims<4>& input2_dims, + float* output_data, const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + + Add(input1_data, input1_dims, input2_data, input2_dims, output_activation_min, + output_activation_max, output_data, output_dims); +} + +template +inline void Add(int left_shift, const uint8* input1_data, + const Dims<4>& input1_dims, int32 input1_offset, + int32 input1_multiplier, int input1_shift, + const uint8* input2_data, const Dims<4>& input2_dims, + int32 input2_offset, int32 input2_multiplier, int input2_shift, + int32 output_offset, int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + gemmlowp::ScopedProfilingLabel label("Add/8bit"); + /* const int batches = */ MatchingArraySize(input1_dims, 3, input2_dims, 3, + output_dims, 3); + /* const int height = */ MatchingArraySize(input1_dims, 2, input2_dims, 2, + output_dims, 2); + /* const int width = */ MatchingArraySize(input1_dims, 1, input2_dims, 1, + output_dims, 1); + /* const int depth = */ MatchingArraySize(input1_dims, 0, input2_dims, 0, + output_dims, 0); + TFLITE_DCHECK(IsPackedWithoutStrides(input1_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(input2_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(output_dims)); + + int i = 0; + const int size = input1_dims.sizes[3] * input1_dims.strides[3]; + TFLITE_DCHECK_GT(input1_offset, -256); + TFLITE_DCHECK_GT(input2_offset, -256); + TFLITE_DCHECK_LT(input1_offset, 256); + TFLITE_DCHECK_LT(input2_offset, 256); +#ifdef USE_NEON + for (; i <= size - 8; i += 8) { + const auto input1_val_original = vld1_u8(input1_data + i); + const auto input2_val_original = vld1_u8(input2_data + i); + const auto input1_val_s16 = + vreinterpretq_s16_u16(vmovl_u8(input1_val_original)); + const auto input2_val_s16 = + vreinterpretq_s16_u16(vmovl_u8(input2_val_original)); + const auto input1_val = + vaddq_s16(input1_val_s16, vdupq_n_s16(input1_offset)); + const auto input2_val = + vaddq_s16(input2_val_s16, vdupq_n_s16(input2_offset)); + const auto input1_val_high = vget_high_s16(input1_val); + const auto input1_val_low = vget_low_s16(input1_val); + const auto input2_val_high = vget_high_s16(input2_val); + const auto input2_val_low = vget_low_s16(input2_val); + auto x11 = vmovl_s16(input1_val_low); + auto x12 = vmovl_s16(input1_val_high); + auto x21 = vmovl_s16(input2_val_low); + auto x22 = vmovl_s16(input2_val_high); + const auto left_shift_dup = vdupq_n_s32(left_shift); + x11 = vshlq_s32(x11, left_shift_dup); + x12 = vshlq_s32(x12, left_shift_dup); + x21 = vshlq_s32(x21, left_shift_dup); + x22 = vshlq_s32(x22, left_shift_dup); + x11 = vqrdmulhq_n_s32(x11, input1_multiplier); + x12 = vqrdmulhq_n_s32(x12, input1_multiplier); + x21 = vqrdmulhq_n_s32(x21, input2_multiplier); + x22 = vqrdmulhq_n_s32(x22, input2_multiplier); + const auto input1_shift_dup = vdupq_n_s32(-input1_shift); + const auto input2_shift_dup = vdupq_n_s32(-input2_shift); + x11 = vshlq_s32(x11, input1_shift_dup); + x12 = vshlq_s32(x12, input1_shift_dup); + x21 = vshlq_s32(x21, input2_shift_dup); + x22 = vshlq_s32(x22, input2_shift_dup); + auto s1 = vaddq_s32(x11, x21); + auto s2 = vaddq_s32(x12, x22); + s1 = vqrdmulhq_n_s32(s1, output_multiplier); + s2 = vqrdmulhq_n_s32(s2, output_multiplier); + using gemmlowp::RoundingDivideByPOT; + s1 = RoundingDivideByPOT(s1, output_shift); + s2 = RoundingDivideByPOT(s2, output_shift); + const auto s1_narrowed = vmovn_s32(s1); + const auto s2_narrowed = vmovn_s32(s2); + const auto s = vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed), + vdupq_n_s16(output_offset)); + vst1_u8(output_data + i, vqmovun_s16(s)); + } +#endif // NEON + + for (; i < size; i++) { + const int32 input1_val = input1_offset + input1_data[i]; + const int32 input2_val = input2_offset + input2_data[i]; + const int32 shifted_input1_val = input1_val * (1 << left_shift); + const int32 shifted_input2_val = input2_val * (1 << left_shift); + const int32 scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input1_val, input1_multiplier, input1_shift); + const int32 scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input2_val, input2_multiplier, input2_shift); + const int32 raw_sum = scaled_input1_val + scaled_input2_val; + const int32 raw_output = MultiplyByQuantizedMultiplierSmallerThanOne( + raw_sum, output_multiplier, output_shift) + + output_offset; + const int32 clamped_output = std::min( + output_activation_max, std::max(output_activation_min, raw_output)); + output_data[i] = static_cast(clamped_output); + } +} + +template +void Add(const int32* input1_data, const Dims<4>& input1_dims, + const int32* input2_data, const Dims<4>& input2_dims, + int32* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Add/int32"); + TFLITE_DCHECK(Ac == FusedActivationFunctionType::kNone); + + auto input1_map = MapAsVector(input1_data, input1_dims); + auto input2_map = MapAsVector(input2_data, input2_dims); + auto output_map = MapAsVector(output_data, output_dims); + if (AreSameDims(input1_dims, input2_dims)) { + output_map.array() = input1_map.array() + input2_map.array(); + } else if (RequiredBufferSizeForDims(input2_dims) == 1) { + auto scalar = input2_data[0]; + output_map.array() = input1_map.array() + scalar; + } else if (RequiredBufferSizeForDims(input1_dims) == 1) { + auto scalar = input1_data[0]; + output_map.array() = scalar + input2_map.array(); + } else { + // Should not come here. + TFLITE_DCHECK(false); + } +} + +// TODO(jiawen): We can implement BroadcastAdd on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +// TODO(benoitjacob): BroadcastAdd is intentionally duplicated from +// reference_ops.h. Once an optimized version is implemented and NdArrayDesc +// is no longer referenced in this file, move NdArrayDesc from types.h to +// reference_ops.h. +template +void BroadcastAdd(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastAdd"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = ActivationFunction( + input1_data[SubscriptToIndex(desc1, c, x, y, b)] + + input2_data[SubscriptToIndex(desc2, c, x, y, b)]); + } + } + } + } +} + +inline void BroadcastAdd(int left_shift, const uint8* input1_data, + const Dims<4>& input1_dims, int32 input1_offset, + int32 input1_multiplier, int input1_shift, + const uint8* input2_data, const Dims<4>& input2_dims, + int32 input2_offset, int32 input2_multiplier, + int input2_shift, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastAdd/8bit"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + const int32 input1_val = + input1_offset + input1_data[SubscriptToIndex(desc1, c, x, y, b)]; + const int32 input2_val = + input2_offset + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; + const int32 shifted_input1_val = input1_val * (1 << left_shift); + const int32 shifted_input2_val = input2_val * (1 << left_shift); + const int32 scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input1_val, input1_multiplier, input1_shift); + const int32 scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input2_val, input2_multiplier, input2_shift); + const int32 raw_sum = scaled_input1_val + scaled_input2_val; + const int32 raw_output = + MultiplyByQuantizedMultiplierSmallerThanOne( + raw_sum, output_multiplier, output_shift) + + output_offset; + const int32 clamped_output = + std::min(output_activation_max, + std::max(output_activation_min, raw_output)); + output_data[Offset(output_dims, c, x, y, b)] = + static_cast(clamped_output); + } + } + } + } +} + +template +inline void BroadcastAdd(int left_shift, const uint8* input1_data, + const Dims<4>& input1_dims, int32 input1_offset, + int32 input1_multiplier, int input1_shift, + const uint8* input2_data, const Dims<4>& input2_dims, + int32 input2_offset, int32 input2_multiplier, + int input2_shift, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + BroadcastAdd(left_shift, input1_data, input1_dims, input1_offset, + input1_multiplier, input1_shift, input2_data, input2_dims, + input2_offset, input2_multiplier, input2_shift, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_data, output_dims); +} + +inline void Mul(const float* input1_data, const Dims<4>& input1_dims, + const float* input2_data, const Dims<4>& input2_dims, + float output_activation_min, float output_activation_max, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Mul"); + /* const int batches = */ MatchingArraySize(input1_dims, 3, input2_dims, 3, + output_dims, 3); + /* const int height = */ MatchingArraySize(input1_dims, 2, input2_dims, 2, + output_dims, 2); + /* const int width = */ MatchingArraySize(input1_dims, 1, input2_dims, 1, + output_dims, 1); + /* const int depth = */ MatchingArraySize(input1_dims, 0, input2_dims, 0, + output_dims, 0); + TFLITE_DCHECK(IsPackedWithoutStrides(input1_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(input2_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(output_dims)); + + int i = 0; + const int size = input1_dims.sizes[3] * input1_dims.strides[3]; +#ifdef USE_NEON + const auto activation_min = vdupq_n_f32(output_activation_min); + const auto activation_max = vdupq_n_f32(output_activation_max); + for (; i <= size - 16; i += 16) { + auto a10 = vld1q_f32(input1_data + i); + auto a11 = vld1q_f32(input1_data + i + 4); + auto a12 = vld1q_f32(input1_data + i + 8); + auto a13 = vld1q_f32(input1_data + i + 12); + auto a20 = vld1q_f32(input2_data + i); + auto a21 = vld1q_f32(input2_data + i + 4); + auto a22 = vld1q_f32(input2_data + i + 8); + auto a23 = vld1q_f32(input2_data + i + 12); + auto x0 = vmulq_f32(a10, a20); + auto x1 = vmulq_f32(a11, a21); + auto x2 = vmulq_f32(a12, a22); + auto x3 = vmulq_f32(a13, a23); + + x0 = vmaxq_f32(activation_min, x0); + x1 = vmaxq_f32(activation_min, x1); + x2 = vmaxq_f32(activation_min, x2); + x3 = vmaxq_f32(activation_min, x3); + x0 = vminq_f32(activation_max, x0); + x1 = vminq_f32(activation_max, x1); + x2 = vminq_f32(activation_max, x2); + x3 = vminq_f32(activation_max, x3); + + vst1q_f32(output_data + i, x0); + vst1q_f32(output_data + i + 4, x1); + vst1q_f32(output_data + i + 8, x2); + vst1q_f32(output_data + i + 12, x3); + } + for (; i <= size - 4; i += 4) { + auto a1 = vld1q_f32(input1_data + i); + auto a2 = vld1q_f32(input2_data + i); + auto x = vmulq_f32(a1, a2); + + x = vmaxq_f32(activation_min, x); + x = vminq_f32(activation_max, x); + + vst1q_f32(output_data + i, x); + } +#endif // NEON + + for (; i < size; i++) { + auto x = input1_data[i] * input2_data[i]; + output_data[i] = ActivationFunctionWithMinMax(x, output_activation_min, + output_activation_max); + } +} + +// legacy, for compatibility with old checked-in code +template +void Mul(const float* input1_data, const Dims<4>& input1_dims, + const float* input2_data, const Dims<4>& input2_dims, + float* output_data, const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + + Mul(input1_data, input1_dims, input2_data, input2_dims, output_activation_min, + output_activation_max, output_data, output_dims); +} + +template +void Mul(const int32* input1_data, const Dims<4>& input1_dims, + const int32* input2_data, const Dims<4>& input2_dims, + int32* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Mul/int32"); + TFLITE_DCHECK(Ac == FusedActivationFunctionType::kNone); + + auto input1_map = MapAsVector(input1_data, input1_dims); + auto input2_map = MapAsVector(input2_data, input2_dims); + auto output_map = MapAsVector(output_data, output_dims); + if (AreSameDims(input1_dims, input2_dims)) { + output_map.array() = input1_map.array() * input2_map.array(); + } else if (RequiredBufferSizeForDims(input2_dims) == 1) { + auto scalar = input2_data[0]; + output_map.array() = input1_map.array() * scalar; + } else if (RequiredBufferSizeForDims(input1_dims) == 1) { + auto scalar = input1_data[0]; + output_map.array() = scalar * input2_map.array(); + } else { + // Should not come here. + TFLITE_DCHECK(false); + } +} + +// TODO(jiawen): We can implement BroadcastMul on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +// TODO(benoitjacob): BroadcastMul is intentionally duplicated from +// reference_ops.h. Once an optimized version is implemented and NdArrayDesc +// is no longer referenced in this file, move NdArrayDesc from types.h to +// reference_ops.h. +template +void BroadcastMul(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastMul"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = ActivationFunction( + input1_data[SubscriptToIndex(desc1, c, x, y, b)] * + input2_data[SubscriptToIndex(desc2, c, x, y, b)]); + } + } + } + } +} + +inline void BroadcastMul(const uint8* input1_data, const Dims<4>& input1_dims, + int32 input1_offset, const uint8* input2_data, + const Dims<4>& input2_dims, int32 input2_offset, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastMul/8bit"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + const int32 input1_val = + input1_offset + input1_data[SubscriptToIndex(desc1, c, x, y, b)]; + const int32 input2_val = + input2_offset + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; + const int32 unclamped_result = + output_offset + + MultiplyByQuantizedMultiplierSmallerThanOne( + input1_val * input2_val, output_multiplier, output_shift); + const int32 clamped_output = + std::min(output_activation_max, + std::max(output_activation_min, unclamped_result)); + output_data[Offset(output_dims, c, x, y, b)] = + static_cast(clamped_output); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +inline void BroadcastMul(const uint8* input1_data, const Dims<4>& input1_dims, + int32 input1_offset, const uint8* input2_data, + const Dims<4>& input2_dims, int32 input2_offset, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + BroadcastMul(input1_data, input1_dims, input1_offset, input2_data, + input2_dims, input2_offset, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, + output_data, output_dims); +} + +template +void Concatenation(int concat_dim, const Scalar* const* input_data, + const Dims<4>* const* input_dims, int inputs_count, + Scalar* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Concatenation"); + int concat_size = 0; + for (int i = 0; i < inputs_count; i++) { + for (int j = 0; j < 4; j++) { + if (j != concat_dim) { + MatchingArraySize(*input_dims[i], j, output_dims, j); + } + } + concat_size += ArraySize(*input_dims[i], concat_dim); + } + TFLITE_DCHECK_EQ(concat_size, ArraySize(output_dims, concat_dim)); + TFLITE_DCHECK(IsPackedWithoutStrides(output_dims)); + // for now we dont have a model with a Concatenation + // with fused activation function. + TFLITE_DCHECK(Ac == FusedActivationFunctionType::kNone); + int outer_size = 1; + for (int i = concat_dim + 1; i < 4; i++) { + outer_size *= output_dims.sizes[i]; + } + Scalar* output_ptr = output_data; + for (int k = 0; k < outer_size; k++) { + for (int i = 0; i < inputs_count; ++i) { + const int copy_size = + input_dims[i]->sizes[concat_dim] * input_dims[i]->strides[concat_dim]; + memcpy(output_ptr, input_data[i] + k * copy_size, + copy_size * sizeof(Scalar)); + output_ptr += copy_size; + } + } +} + +template +void DepthConcatenation(const Scalar* const* input_data, + const Dims<4>* const* input_dims, int inputs_count, + Scalar* output_data, const Dims<4>& output_dims) { + Concatenation(0, input_data, input_dims, inputs_count, + output_data, output_dims); +} + +inline void LstmCell(const float* input_data, const Dims<4>& input_dims, + const float* prev_activ_data, + const Dims<4>& prev_activ_dims, const float* weights_data, + const Dims<4>& weights_dims, const float* bias_data, + const Dims<4>& bias_dims, const float* prev_state_data, + const Dims<4>& prev_state_dims, float* output_state_data, + const Dims<4>& output_state_dims, float* output_activ_data, + const Dims<4>& output_activ_dims, float* concat_temp_data, + const Dims<4>& concat_temp_dims, float* activ_temp_data, + const Dims<4>& activ_temp_dims) { + gemmlowp::ScopedProfilingLabel label("LstmCell"); + MatchingArraySize( // batches + input_dims, 3, prev_activ_dims, 3, prev_state_dims, 3, output_state_dims, + 3, output_activ_dims, 3); + MatchingArraySize( // height + input_dims, 2, prev_activ_dims, 2, prev_state_dims, 2, output_state_dims, + 2, output_activ_dims, 2); + MatchingArraySize( // width + input_dims, 1, prev_activ_dims, 1, prev_state_dims, 1, output_state_dims, + 1, output_activ_dims, 1); + TFLITE_CHECK_EQ(ArraySize(weights_dims, 2), 1); + TFLITE_CHECK_EQ(ArraySize(weights_dims, 3), 1); + const int input_depth = ArraySize(input_dims, 0); + const int prev_activ_depth = ArraySize(prev_activ_dims, 0); + const int total_input_depth = prev_activ_depth + input_depth; + TFLITE_CHECK_EQ(ArraySize(weights_dims, 0), total_input_depth); + TFLITE_CHECK_EQ(MatchingArraySize(bias_dims, 1, bias_dims, 2, bias_dims, 3), + 1); + const int intern_activ_depth = + MatchingArraySize(weights_dims, 1, bias_dims, 0); + TFLITE_CHECK_EQ(intern_activ_depth % 4, 0); + const int output_depth = + MatchingArraySize(prev_state_dims, 0, prev_activ_dims, 0, + output_state_dims, 0, output_activ_dims, 0); + TFLITE_CHECK_EQ(output_depth, intern_activ_depth / 4); + + // Concatenate prev_activ and input data together + std::vector concat_input_arrays_data; + std::vector const*> concat_input_arrays_dims; + concat_input_arrays_data.push_back(input_data); + concat_input_arrays_data.push_back(prev_activ_data); + concat_input_arrays_dims.push_back(&input_dims); + concat_input_arrays_dims.push_back(&prev_activ_dims); + Concatenation( + 0, &(concat_input_arrays_data[0]), &(concat_input_arrays_dims[0]), + concat_input_arrays_data.size(), concat_temp_data, concat_temp_dims); + + // Fully connected + FullyConnected( + concat_temp_data, concat_temp_dims, weights_data, weights_dims, bias_data, + bias_dims, activ_temp_data, activ_temp_dims); + + // Map raw arrays to Eigen arrays so we can use Eigen's optimized array + // operations. + ArrayMap activ_temp_map = + MapAsArrayWithFirstDimAsRows(activ_temp_data, activ_temp_dims); + auto input_gate_sm = activ_temp_map.block(0 * output_depth, 0, output_depth, + activ_temp_map.cols()); + auto new_input_sm = activ_temp_map.block(1 * output_depth, 0, output_depth, + activ_temp_map.cols()); + auto forget_gate_sm = activ_temp_map.block(2 * output_depth, 0, output_depth, + activ_temp_map.cols()); + auto output_gate_sm = activ_temp_map.block(3 * output_depth, 0, output_depth, + activ_temp_map.cols()); + ArrayMap prev_state_map = + MapAsArrayWithFirstDimAsRows(prev_state_data, prev_state_dims); + ArrayMap output_state_map = + MapAsArrayWithFirstDimAsRows(output_state_data, output_state_dims); + ArrayMap output_activ_map = + MapAsArrayWithFirstDimAsRows(output_activ_data, output_activ_dims); + + // Combined memory state and final output calculation + gemmlowp::ScopedProfilingLabel label2("MemoryStateAndFinalOutput"); + output_state_map = + input_gate_sm.unaryExpr(Eigen::internal::scalar_sigmoid_op()) * + new_input_sm.tanh() + + forget_gate_sm.unaryExpr(Eigen::internal::scalar_sigmoid_op()) * + prev_state_map; + output_activ_map = + output_gate_sm.unaryExpr(Eigen::internal::scalar_sigmoid_op()) * + output_state_map.tanh(); +} + +template +void TensorFlowSplit(const Scalar* input_data, const Dims<4>& input_dims, + int outputs_count, Scalar* const* output_data, + const Dims<4>* const* output_dims) { + gemmlowp::ScopedProfilingLabel label("TensorFlowSplit"); + TFLITE_DCHECK_GE(outputs_count, 1); + for (int i = 0; i < outputs_count; i++) { + /* batches = */ MatchingArraySize(*output_dims[i], 3, input_dims, 3); + /* height = */ MatchingArraySize(*output_dims[i], 2, input_dims, 2); + /* width = */ MatchingArraySize(*output_dims[i], 1, input_dims, 1); + } + const int batches = MatchingArraySize(*output_dims[0], 3, input_dims, 3); + const int height = MatchingArraySize(*output_dims[0], 2, input_dims, 2); + const int width = MatchingArraySize(*output_dims[0], 1, input_dims, 1); + TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); + // for now we dont have a model with a TensorFlowSplit + // with fused activation function. + TFLITE_DCHECK(Ac == FusedActivationFunctionType::kNone); + const int whb = width * height * batches; + const Scalar* input_ptr = input_data; + for (int k = 0; k < whb; k++) { + for (int i = 0; i < outputs_count; ++i) { + memcpy(output_data[i] + k * output_dims[i]->sizes[0], input_ptr, + output_dims[i]->sizes[0] * sizeof(Scalar)); + input_ptr += output_dims[i]->sizes[0]; + } + } +} + +inline int NodeOffset(int b, int h, int w, int height, int width) { + return (b * height + h) * width + w; +} + +inline void AveragePool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int kwidth, int kheight, + float output_activation_min, + float output_activation_max, float* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("AveragePool"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + + // TODO(benoitjacob) make this a proper reference impl without Eigen! + const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims); + auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims); + // TODO(benoitjacob) get rid of the dynamic memory allocation here! + Eigen::VectorXf out_count(out_mat.cols()); + out_count.setZero(); + // Prefill the output to 0. + out_mat.setZero(); + for (int b = 0; b < batches; ++b) { + for (int h = 0; h < input_height; ++h) { + for (int w = 0; w < input_width; ++w) { + // (h_start, h_end) * (w_start, w_end) is the range that the input + // vector projects to. + int hpad = h + pad_height; + int wpad = w + pad_width; + int h_start = + (hpad < kheight) ? 0 : (hpad - kheight) / stride_height + 1; + int h_end = std::min(hpad / stride_height + 1, output_height); + int w_start = (wpad < kwidth) ? 0 : (wpad - kwidth) / stride_width + 1; + int w_end = std::min(wpad / stride_width + 1, output_width); + // compute elementwise sum + for (int ph = h_start; ph < h_end; ++ph) { + for (int pw = w_start; pw < w_end; ++pw) { + int out_offset = NodeOffset(b, ph, pw, output_height, output_width); + out_mat.col(out_offset) += + in_mat.col(NodeOffset(b, h, w, input_height, input_width)); + out_count(out_offset)++; + } + } + } + } + } + // Divide the output by the actual number of elements being averaged over + TFLITE_DCHECK_GT(out_count.minCoeff(), 0); + out_mat.array().rowwise() /= out_count.transpose().array(); + + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < output_height; ++y) { + for (int x = 0; x < output_width; ++x) { + for (int c = 0; c < depth; ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + ActivationFunctionWithMinMax( + output_data[Offset(output_dims, c, x, y, b)], + output_activation_min, output_activation_max); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void AveragePool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int kwidth, int kheight, float* output_data, + const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + + AveragePool(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, kwidth, kheight, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void AveragePool(const float* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int filter_width, + int filter_height, float* output_data, + const Dims<4>& output_dims) { + AveragePool(input_data, input_dims, stride, stride, pad_width, pad_height, + filter_width, filter_height, output_data, output_dims); +} + +inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int filter_width, int filter_height, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("AveragePool/8bit"); + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + const int filter_x_start = std::max(0, -in_x_origin); + const int filter_x_end = + std::min(filter_width, input_width - in_x_origin); + const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_end = + std::min(filter_height, input_height - in_y_origin); + const int filter_count = + (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start); + // 1280 required by Inception v3 + static constexpr int kAccBufferMaxSize = 2048; + TFLITE_DCHECK_LE(depth, kAccBufferMaxSize); + uint16 acc[kAccBufferMaxSize]; + memset(acc, 0, depth * sizeof(acc[0])); + const uint8* input_ptr = + input_data + input_dims.strides[1] * in_x_origin + + input_dims.strides[2] * in_y_origin + input_dims.strides[3] * batch; + for (int fy = filter_y_start; fy < filter_y_end; fy++) { + const uint8* input_row_ptr = input_ptr + fy * input_dims.strides[2] + + filter_x_start * input_dims.strides[1]; + for (int fx = filter_x_start; fx < filter_x_end; fx++) { + int channel = 0; +#ifdef USE_NEON + for (; channel <= depth - 16; channel += 16) { + uint16x8_t acc_reg[2]; + for (int i = 0; i < 2; i++) { + acc_reg[i] = vld1q_u16(acc + channel + 8 * i); + } + uint8x16_t input_reg = vld1q_u8(input_row_ptr); + input_row_ptr += 16; + acc_reg[0] = vaddw_u8(acc_reg[0], vget_low_u8(input_reg)); + acc_reg[1] = vaddw_u8(acc_reg[1], vget_high_u8(input_reg)); + for (int i = 0; i < 2; i++) { + vst1q_u16(acc + channel + 8 * i, acc_reg[i]); + } + } + for (; channel <= depth - 8; channel += 8) { + uint16x8_t acc_reg = vld1q_u16(acc + channel); + uint8x8_t input_reg = vld1_u8(input_row_ptr); + input_row_ptr += 8; + acc_reg = vaddw_u8(acc_reg, input_reg); + vst1q_u16(acc + channel, acc_reg); + } +#endif + for (; channel < depth; ++channel) { + acc[channel] += *input_row_ptr++; + } + } + } + uint8* output_ptr = + output_data + Offset(output_dims, 0, out_x, out_y, batch); + int channel = 0; +#ifdef USE_NEON +#define AVGPOOL_DIVIDING_BY(FILTER_COUNT) \ + if (filter_count == FILTER_COUNT) { \ + for (; channel <= depth - 8; channel += 8) { \ + uint16 buf[8]; \ + for (int i = 0; i < 8; i++) { \ + buf[i] = (acc[channel + i] + FILTER_COUNT / 2) / FILTER_COUNT; \ + } \ + uint8x8_t buf8 = vqmovn_u16(vld1q_u16(buf)); \ + buf8 = vmin_u8(buf8, vdup_n_u8(output_activation_max)); \ + buf8 = vmax_u8(buf8, vdup_n_u8(output_activation_min)); \ + vst1_u8(output_ptr + channel, buf8); \ + } \ + } + AVGPOOL_DIVIDING_BY(9) + AVGPOOL_DIVIDING_BY(15) +#undef AVGPOOL_DIVIDING_BY + for (; channel <= depth - 8; channel += 8) { + uint16 buf[8]; + for (int i = 0; i < 8; i++) { + buf[i] = (acc[channel + i] + filter_count / 2) / filter_count; + } + uint8x8_t buf8 = vqmovn_u16(vld1q_u16(buf)); + buf8 = vmin_u8(buf8, vdup_n_u8(output_activation_max)); + buf8 = vmax_u8(buf8, vdup_n_u8(output_activation_min)); + vst1_u8(output_ptr + channel, buf8); + } +#endif + for (; channel < depth; ++channel) { + uint16 a = (acc[channel] + filter_count / 2) / filter_count; + a = std::max(a, output_activation_min); + a = std::min(a, output_activation_max); + output_ptr[channel] = static_cast(a); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void AveragePool(const uint8* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int filter_width, int filter_height, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + AveragePool(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int filter_width, + int filter_height, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + AveragePool(input_data, input_dims, stride, stride, pad_width, pad_height, + filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +inline void MaxPool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int kwidth, int kheight, + float output_activation_min, float output_activation_max, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("MaxPool"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + + const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims); + auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims); + // Prefill the output to minimum representable float value + out_mat.setConstant(std::numeric_limits::lowest()); + for (int b = 0; b < batches; ++b) { + for (int h = 0; h < input_height; ++h) { + for (int w = 0; w < input_width; ++w) { + // (h_start, h_end) * (w_start, w_end) is the range that the input + // vector projects to. + int hpad = h + pad_height; + int wpad = w + pad_width; + int h_start = + (hpad < kheight) ? 0 : (hpad - kheight) / stride_height + 1; + int h_end = std::min(hpad / stride_height + 1, output_height); + int w_start = (wpad < kwidth) ? 0 : (wpad - kwidth) / stride_width + 1; + int w_end = std::min(wpad / stride_width + 1, output_width); + // compute elementwise sum + for (int ph = h_start; ph < h_end; ++ph) { + for (int pw = w_start; pw < w_end; ++pw) { + int out_offset = NodeOffset(b, ph, pw, output_height, output_width); + out_mat.col(out_offset) = + out_mat.col(out_offset) + .cwiseMax(in_mat.col( + NodeOffset(b, h, w, input_height, input_width))); + } + } + } + } + } + + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < output_height; ++y) { + for (int x = 0; x < output_width; ++x) { + for (int c = 0; c < depth; ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + ActivationFunctionWithMinMax( + output_data[Offset(output_dims, c, x, y, b)], + output_activation_min, output_activation_max); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void MaxPool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, int pad_height, + int kwidth, int kheight, float* output_data, + const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + MaxPool(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, kwidth, kheight, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void MaxPool(const float* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int filter_width, int filter_height, + float* output_data, const Dims<4>& output_dims) { + MaxPool(input_data, input_dims, stride, stride, pad_width, pad_height, + filter_width, filter_height, output_data, output_dims); +} + +inline void MaxPool(const uint8* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int filter_width, int filter_height, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("MaxPool/8bit"); + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + const int filter_x_start = std::max(0, -in_x_origin); + const int filter_x_end = + std::min(filter_width, input_width - in_x_origin); + const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_end = + std::min(filter_height, input_height - in_y_origin); + // 2048 required by Inception v3 + static constexpr int kAccBufferMaxSize = 2048; + TFLITE_DCHECK_LE(depth, kAccBufferMaxSize); + uint8 acc[kAccBufferMaxSize]; + memset(acc, 0, depth * sizeof(acc[0])); + const uint8* input_ptr = + input_data + input_dims.strides[1] * in_x_origin + + input_dims.strides[2] * in_y_origin + input_dims.strides[3] * batch; + for (int fy = filter_y_start; fy < filter_y_end; fy++) { + const uint8* input_row_ptr = input_ptr + fy * input_dims.strides[2] + + filter_x_start * input_dims.strides[1]; + for (int fx = filter_x_start; fx < filter_x_end; fx++) { + int channel = 0; +#ifdef USE_NEON + for (; channel <= depth - 16; channel += 16) { + uint8x16_t acc_reg = vld1q_u8(acc + channel); + uint8x16_t input_reg = vld1q_u8(input_row_ptr); + input_row_ptr += 16; + acc_reg = vmaxq_u8(acc_reg, input_reg); + vst1q_u8(acc + channel, acc_reg); + } + + for (; channel <= depth - 8; channel += 8) { + uint8x8_t acc_reg = vld1_u8(acc + channel); + uint8x8_t input_reg = vld1_u8(input_row_ptr); + input_row_ptr += 8; + acc_reg = vmax_u8(acc_reg, input_reg); + vst1_u8(acc + channel, acc_reg); + } +#endif + for (; channel < depth; ++channel) { + acc[channel] = std::max(acc[channel], *input_row_ptr++); + } + } + } + uint8* output_ptr = + output_data + Offset(output_dims, 0, out_x, out_y, batch); + int channel = 0; +#ifdef USE_NEON + for (; channel <= depth - 16; channel += 16) { + uint8x16_t a = vld1q_u8(acc + channel); + a = vminq_u8(a, vdupq_n_u8(output_activation_max)); + a = vmaxq_u8(a, vdupq_n_u8(output_activation_min)); + vst1q_u8(output_ptr + channel, a); + } + for (; channel <= depth - 8; channel += 8) { + uint8x8_t a = vld1_u8(acc + channel); + a = vmin_u8(a, vdup_n_u8(output_activation_max)); + a = vmax_u8(a, vdup_n_u8(output_activation_min)); + vst1_u8(output_ptr + channel, a); + } +#endif + for (; channel < depth; ++channel) { + uint8 a = acc[channel]; + a = std::max(a, output_activation_min); + a = std::min(a, output_activation_max); + output_ptr[channel] = static_cast(a); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void MaxPool(const uint8* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, int pad_height, + int filter_width, int filter_height, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + MaxPool(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void MaxPool(const uint8* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int filter_width, int filter_height, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims) { + MaxPool(input_data, input_dims, stride, stride, pad_width, pad_height, + filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +inline void L2Pool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int filter_width, int filter_height, + float output_activation_min, float output_activation_max, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("L2Pool"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + // Actually carry out L2 Pool. Code is written in forward mode: we go through + // the input values once, and write to all the pooled regions that it maps to. + const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims); + auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims); + Eigen::VectorXf in_square(in_mat.rows()); + Eigen::VectorXf out_count(out_mat.cols()); + out_count.setZero(); + // Prefill the output to 0. + out_mat.setZero(); + for (int b = 0; b < batches; ++b) { + for (int h = 0; h < input_height; ++h) { + for (int w = 0; w < input_width; ++w) { + // (h_start, h_end) * (w_start, w_end) is the range that the input + // vector projects to. + const int hpad = h + pad_height; + const int wpad = w + pad_width; + const int h_start = (hpad < filter_height) + ? 0 + : (hpad - filter_height) / stride_height + 1; + const int h_end = std::min(hpad / stride_height + 1, output_height); + const int w_start = (wpad < filter_width) + ? 0 + : (wpad - filter_width) / stride_width + 1; + const int w_end = std::min(wpad / stride_width + 1, output_width); + // pre-compute square + const int in_offset = w + input_width * (h + input_height * b); + in_square = + in_mat.col(in_offset).array() * in_mat.col(in_offset).array(); + // compute elementwise sum of squares + for (int ph = h_start; ph < h_end; ++ph) { + for (int pw = w_start; pw < w_end; ++pw) { + const int out_offset = pw + output_width * (ph + output_height * b); + out_mat.col(out_offset) += in_square; + out_count(out_offset)++; + } + } + } + } + } + + out_count = out_count.array().inverse(); + out_mat = + (out_mat.array().rowwise() * out_count.transpose().array()).cwiseSqrt(); +} + +// legacy, for compatibility with old checked-in code +template +void L2Pool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, int pad_height, + int filter_width, int filter_height, float* output_data, + const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + L2Pool(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void L2Pool(const float* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int filter_width, int filter_height, + float* output_data, const Dims<4>& output_dims) { + L2Pool(input_data, input_dims, stride, stride, pad_width, pad_height, + filter_width, filter_height, output_data, output_dims); +} + +inline void LocalResponseNormalization(const float* input_data, + const Dims<4>& input_dims, int range, + float bias, float alpha, float beta, + float* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("LocalResponseNormalization"); + /* const int batches = */ MatchingArraySize(input_dims, 3, output_dims, 3); + /* const int height = */ MatchingArraySize(input_dims, 2, output_dims, 2); + /* const int width = */ MatchingArraySize(input_dims, 1, output_dims, 1); + /* const int depth = */ MatchingArraySize(input_dims, 0, output_dims, 0); + + const auto data_in = MapAsMatrixWithFirstDimAsRows(input_data, input_dims); + auto data_out = MapAsMatrixWithFirstDimAsRows(output_data, output_dims); + + // Carry out local response normalization, vector by vector. + // Since the data are stored column major, making row-wise operation + // probably not memory efficient anyway, we do an explicit for loop over + // the columns. + const int double_range = range * 2; + Eigen::VectorXf padded_square(data_in.rows() + double_range); + padded_square.setZero(); + for (int r = 0; r < data_in.cols(); ++r) { + // Do local response normalization for data_in(:, r) + // first, compute the square and store them in buffer for repeated use + padded_square.block(range, 0, data_in.rows(), 1) = + data_in.col(r).cwiseProduct(data_in.col(r)) * alpha; + // Then, compute the scale and writes them to data_out + float accumulated_scale = 0; + for (int i = 0; i < double_range; ++i) { + accumulated_scale += padded_square(i); + } + for (int i = 0; i < data_in.rows(); ++i) { + accumulated_scale += padded_square(i + double_range); + data_out(i, r) = bias + accumulated_scale; + accumulated_scale -= padded_square(i); + } + } + + // In a few cases, the pow computation could benefit from speedups. + if (beta == 1) { + data_out.array() = data_in.array() * data_out.array().inverse(); + } else if (beta == 0.5) { + data_out.array() = data_in.array() * data_out.array().sqrt().inverse(); + } else { + data_out.array() = data_in.array() * data_out.array().pow(-beta); + } +} + +inline void Softmax(const float* input_data, const Dims<4>& input_dims, + float beta, float* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Softmax"); + /* const int batches = */ MatchingArraySize(input_dims, 3, output_dims, 3); + /* const int height = */ MatchingArraySize(input_dims, 2, output_dims, 2); + /* const int width = */ MatchingArraySize(input_dims, 1, output_dims, 1); + /* const int depth = */ MatchingArraySize(input_dims, 0, output_dims, 0); + + const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims); + auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims); + // Compute the exponential first, removing the max coefficient for numerical + // stability. + out_mat = (in_mat.rowwise() - in_mat.colwise().maxCoeff()).array() * beta; + // We are separating out the exp function so that exp can be vectorized. + out_mat = out_mat.array().exp(); + // Normalize to get the activations. + Eigen::Array scale = + out_mat.array().colwise().sum().inverse(); + out_mat.array().rowwise() *= scale; +} + +inline void Softmax(const uint8* input_data, const Dims<4>& input_dims, + int32 input_beta_multiplier, int32 input_beta_left_shift, + int diff_min, uint8* output_data, + const Dims<4>& output_dims) { + // The representation chosen for the input to the exp() function is Q5.26. + // We need to leave extra space since values that we skip might be as large as + // -32 before multiplying by input_beta_multiplier, and therefore as large as + // -16 afterwards. Note that exp(-8) is definitely not insignificant to + // accumulation, but exp(-16) definitely is. + static const int kScaledDiffIntegerBits = 5; + static const int kAccumulationIntegerBits = 12; + using FixedPointScaledDiff = + gemmlowp::FixedPoint; + using FixedPointAccum = gemmlowp::FixedPoint; + using FixedPoint0 = gemmlowp::FixedPoint; + + gemmlowp::ScopedProfilingLabel label("Softmax"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + + for (int b = 0; b < batches; ++b) { + for (int x = 0; x < width; ++x) { + for (int y = 0; y < height; ++y) { + uint8 max_in_row = 0; + for (int c = 0; c < depth; ++c) { + max_in_row = + std::max(max_in_row, input_data[Offset(input_dims, c, x, y, b)]); + } + + FixedPointAccum sum_of_exps = FixedPointAccum::Zero(); + for (int c = 0; c < depth; ++c) { + int32 input_diff = + static_cast(input_data[Offset(input_dims, c, x, y, b)]) - + max_in_row; + if (input_diff >= diff_min) { + const int32 input_diff_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_diff, input_beta_multiplier, input_beta_left_shift); + const FixedPointScaledDiff scaled_diff_f8 = + FixedPointScaledDiff::FromRaw(input_diff_rescaled); + sum_of_exps = + sum_of_exps + gemmlowp::Rescale( + exp_on_negative_values(scaled_diff_f8)); + } + } + + int32 fixed_sum_of_exps = sum_of_exps.raw(); + // TODO(starka): Use a NEON intrinsic like vclzq_u32 instead. + int headroom_plus_one = + __builtin_clz(static_cast(fixed_sum_of_exps)); + // This is the number of bits to the left of the binary point above 1.0. + // Consider fixed_sum_of_exps=1.25. In that case shifted_scale=0.8 and + // no later adjustment will be needed. + int num_bits_over_unit = kAccumulationIntegerBits - headroom_plus_one; + int32 shifted_sum_minus_one = static_cast( + (static_cast(fixed_sum_of_exps) << headroom_plus_one) - + (static_cast(1) << 31)); + + FixedPoint0 shifted_scale = gemmlowp::one_over_one_plus_x_for_x_in_0_1( + FixedPoint0::FromRaw(shifted_sum_minus_one)); + + for (int c = 0; c < depth; ++c) { + int32 input_diff = + static_cast(input_data[Offset(input_dims, c, x, y, b)]) - + max_in_row; + if (input_diff >= diff_min) { + const int32 input_diff_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_diff, input_beta_multiplier, input_beta_left_shift); + const FixedPointScaledDiff scaled_diff_f8 = + FixedPointScaledDiff::FromRaw(input_diff_rescaled); + + FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8); + int32 unsat_output = gemmlowp::RoundingDivideByPOT( + (shifted_scale * exp_in_0).raw(), num_bits_over_unit + 31 - 8); + + output_data[Offset(output_dims, c, x, y, b)] = + std::max(std::min(unsat_output, 255), 0); + + } else { + output_data[Offset(output_dims, c, x, y, b)] = 0; + } + } + } + } + } +} + +inline void Logistic(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Logistic"); + auto input_map = MapAsVector(input_data, input_dims); + auto output_map = MapAsVector(output_data, output_dims); + output_map.array() = + input_map.array().unaryExpr(Eigen::internal::scalar_sigmoid_op()); +} + +inline void Logistic(const uint8* input_data, const Dims<4>& input_dims, + int32 input_zero_point, int32 input_range_radius, + int32 input_multiplier, int input_left_shift, + uint8* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Logistic"); + /* batches */ MatchingArraySize(input_dims, 3, output_dims, 3); + /* height */ MatchingArraySize(input_dims, 2, output_dims, 2); + /* width */ MatchingArraySize(input_dims, 1, output_dims, 1); + /* depth */ MatchingArraySize(input_dims, 0, output_dims, 0); + const int size = RequiredBufferSizeForDims(input_dims); + + int c = 0; +#ifdef USE_NEON + // Handle 16 values at a time + for (; c <= size - 16; c += 16) { + // Read input uint8 values, cast to int16 and subtract input_zero_point + uint8x16_t input_val_u8 = vld1q_u8(input_data + c); + int16x8_t input_val_centered_0 = + vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(input_val_u8))), + vdupq_n_s16(input_zero_point)); + int16x8_t input_val_centered_1 = + vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(input_val_u8))), + vdupq_n_s16(input_zero_point)); + + // Prepare the bit masks that we will use at the end to implement the logic + // that was expressed in the scalar code with branching: + // if (input_val_centered < -input_range_radius) { + // output_val = 0; + // } else if (input_val_centered > input_range_radius) { + // output_val = 255; + // } else { + // ... + uint16x8_t mask_rightclamp_0 = + vcgtq_s16(input_val_centered_0, vdupq_n_s16(input_range_radius)); + uint16x8_t mask_rightclamp_1 = + vcgtq_s16(input_val_centered_1, vdupq_n_s16(input_range_radius)); + uint16x8_t mask_leftclamp_0 = + vcgeq_s16(input_val_centered_0, vdupq_n_s16(-input_range_radius)); + uint16x8_t mask_leftclamp_1 = + vcgeq_s16(input_val_centered_1, vdupq_n_s16(-input_range_radius)); + uint8x16_t mask_rightclamp = vcombine_u8(vshrn_n_u16(mask_rightclamp_0, 8), + vshrn_n_u16(mask_rightclamp_1, 8)); + uint8x16_t mask_leftclamp = vcombine_u8(vshrn_n_u16(mask_leftclamp_0, 8), + vshrn_n_u16(mask_leftclamp_1, 8)); + + // This performs what is expressed in the scalar code as + // const int32 input_val_rescaled = + // MultiplyByQuantizedMultiplierGreaterThanOne( + // input_val_centered, input_multiplier, input_left_shift); + int32x4_t input_val_rescaled_0 = + vshlq_s32(vmovl_s16(vget_low_s16(input_val_centered_0)), + vdupq_n_s32(input_left_shift)); + int32x4_t input_val_rescaled_1 = + vshlq_s32(vmovl_s16(vget_high_s16(input_val_centered_0)), + vdupq_n_s32(input_left_shift)); + int32x4_t input_val_rescaled_2 = + vshlq_s32(vmovl_s16(vget_low_s16(input_val_centered_1)), + vdupq_n_s32(input_left_shift)); + int32x4_t input_val_rescaled_3 = + vshlq_s32(vmovl_s16(vget_high_s16(input_val_centered_1)), + vdupq_n_s32(input_left_shift)); + input_val_rescaled_0 = + vqrdmulhq_n_s32(input_val_rescaled_0, input_multiplier); + input_val_rescaled_1 = + vqrdmulhq_n_s32(input_val_rescaled_1, input_multiplier); + input_val_rescaled_2 = + vqrdmulhq_n_s32(input_val_rescaled_2, input_multiplier); + input_val_rescaled_3 = + vqrdmulhq_n_s32(input_val_rescaled_3, input_multiplier); + + // Invoke gemmlowp::logistic on FixedPoint wrapping int32x4_t + using FixedPoint4 = gemmlowp::FixedPoint; + using FixedPoint0 = gemmlowp::FixedPoint; + const FixedPoint4 input_val_f4_0 = + FixedPoint4::FromRaw(input_val_rescaled_0); + const FixedPoint4 input_val_f4_1 = + FixedPoint4::FromRaw(input_val_rescaled_1); + const FixedPoint4 input_val_f4_2 = + FixedPoint4::FromRaw(input_val_rescaled_2); + const FixedPoint4 input_val_f4_3 = + FixedPoint4::FromRaw(input_val_rescaled_3); + const FixedPoint0 output_val_f0_0 = gemmlowp::logistic(input_val_f4_0); + const FixedPoint0 output_val_f0_1 = gemmlowp::logistic(input_val_f4_1); + const FixedPoint0 output_val_f0_2 = gemmlowp::logistic(input_val_f4_2); + const FixedPoint0 output_val_f0_3 = gemmlowp::logistic(input_val_f4_3); + + // Divide by 2^23 as in the scalar code + using gemmlowp::RoundingDivideByPOT; + int32x4_t output_val_s32_0 = RoundingDivideByPOT(output_val_f0_0.raw(), 23); + int32x4_t output_val_s32_1 = RoundingDivideByPOT(output_val_f0_1.raw(), 23); + int32x4_t output_val_s32_2 = RoundingDivideByPOT(output_val_f0_2.raw(), 23); + int32x4_t output_val_s32_3 = RoundingDivideByPOT(output_val_f0_3.raw(), 23); + + // Cast output values to uint8, saturating + int16x8_t output_val_s16_0 = vcombine_s16(vqmovn_s32(output_val_s32_0), + vqmovn_s32(output_val_s32_1)); + int16x8_t output_val_s16_1 = vcombine_s16(vqmovn_s32(output_val_s32_2), + vqmovn_s32(output_val_s32_3)); + uint8x16_t output_val_u8 = vcombine_u8(vqmovun_s16(output_val_s16_0), + vqmovun_s16(output_val_s16_1)); + + // Perform the bit-masking with the bit masks computed at the beginning, + // see the comment there. + output_val_u8 = vorrq_u8(output_val_u8, mask_rightclamp); + output_val_u8 = vandq_u8(output_val_u8, mask_leftclamp); + + // Store back to memory + vst1q_u8(output_data + c, output_val_u8); + } +#endif + // Leftover loop: handle one value at a time with scalar code. + for (; c < size; ++c) { + const uint8 input_val_u8 = input_data[c]; + const int32 input_val_centered = + static_cast(input_val_u8) - input_zero_point; + uint8 output_val; + if (input_val_centered < -input_range_radius) { + output_val = 0; + } else if (input_val_centered > input_range_radius) { + output_val = 255; + } else { + const int32 input_val_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_val_centered, input_multiplier, input_left_shift); + using FixedPoint4 = gemmlowp::FixedPoint; + using FixedPoint0 = gemmlowp::FixedPoint; + const FixedPoint4 input_val_f4 = FixedPoint4::FromRaw(input_val_rescaled); + const FixedPoint0 output_val_f0 = gemmlowp::logistic(input_val_f4); + using gemmlowp::RoundingDivideByPOT; + int32 output_val_s32 = RoundingDivideByPOT(output_val_f0.raw(), 23); + if (output_val_s32 == 256) { + output_val_s32 = 255; + } + TFLITE_DCHECK_GE(output_val_s32, 0); + TFLITE_DCHECK_LE(output_val_s32, 255); + output_val = static_cast(output_val_s32); + } + output_data[c] = output_val; + } +} + +inline void Tanh(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Tanh"); + auto input_map = MapAsVector(input_data, input_dims); + auto output_map = MapAsVector(output_data, output_dims); + output_map.array() = input_map.array().tanh(); +} + +inline void Dequantize(const uint8* input_data, const Dims<4>& input_dims, + int32 zero_point, double scale, float* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Dequantize"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + int32 val = input_data[Offset(input_dims, c, x, y, b)]; + float result = static_cast(scale * (val - zero_point)); + output_data[Offset(output_dims, c, x, y, b)] = result; + } + } + } + } +} + +inline void FakeQuant(const float* input_data, const Dims<4>& input_dims, + float rmin, float rmax, float* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("FakeQuant"); + + // 0 should always be a representable value. Let's assume that the initial + // min,max range contains 0. + TFLITE_DCHECK_LE(rmin, 0.); + TFLITE_DCHECK_GE(rmax, 0.); + + // Determine quantization parameters: zero_point, scale. + using Integer = uint8; + const Integer qmin = std::numeric_limits::min(); + const Integer qmax = std::numeric_limits::max(); + const float qmin_float = qmin; + const float qmax_float = qmax; + int32 zero_point = 0; + float scale = 0.f; + // If rmin==rmax, both must be zero per the above assertion, + // so we are done. + if (rmin != rmax) { + // First determine the scale. + scale = (rmax - rmin) / (qmax_float - qmin_float); + + // Zero-point computation. + // First the initial floating-point computation. The zero-point can be + // determined from solving an affine equation for any known pair + // (real value, corresponding quantized value). + // We know two such pairs: (rmin, qmin) and (rmax, qmax). + // The arithmetic error on the zero point computed from either pair + // will be roughly machine_epsilon * (sum of absolute values of terms) + // so we want to use the variant that adds the smaller terms. + const float zero_point_from_min = qmin_float - rmin / scale; + const float zero_point_from_max = qmax_float - rmax / scale; + const float zero_point_from_min_error = + std::abs(qmin_float) + std::abs(rmin / scale); + const float zero_point_from_max_error = + std::abs(qmax_float) + std::abs(rmax / scale); + + const float zero_point_float = + zero_point_from_min_error < zero_point_from_max_error + ? zero_point_from_min + : zero_point_from_max; + + // Now we need to nudge the zero point to be an integer + // (our zero points are integer, and this is motivated by the requirement + // to be able to represent the real value "0" exactly as a quantized value, + // which is required in multiple places, for example in Im2col with SAME + // padding). + if (zero_point_float < qmin_float) { + zero_point = qmin; + } else if (zero_point_float > qmax_float) { + zero_point = qmax; + } else { + zero_point = static_cast(TfLiteRound(zero_point_float)); + } + // The zero point should always be in the range of quantized value, + // [qmin, qmax]. + TFLITE_DCHECK_GE(zero_point, qmin); + TFLITE_DCHECK_LE(zero_point, qmax); + } + + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + const float src_val = input_data[Offset(input_dims, c, x, y, b)]; + const float unclamped_quantized_val = + TfLiteRound(zero_point + src_val / scale); + const float quantized_val = std::min( + qmax_float, std::max(qmin_float, unclamped_quantized_val)); + const float dst_val = scale * (quantized_val - zero_point); + output_data[Offset(output_dims, c, x, y, b)] = dst_val; + } + } + } + } +} + +template +inline void Cast(const SrcT* input_data, const Dims<4>& input_dims, + DstT* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Cast"); + auto input_map = MapAsVector(input_data, input_dims); + auto output_map = MapAsVector(output_data, output_dims); + output_map.array() = input_map.array().template cast(); +} + +inline void Floor(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Floor"); + auto input_map = MapAsVector(input_data, input_dims); + auto output_map = MapAsVector(output_data, output_dims); + output_map.array() = Eigen::floor(input_map.array()); +} + +template +inline void Gather(const T* input_data, const Dims<4>& input_dims, + int input_rank, const int32* coords_data, + const Dims<4>& coords_dims, T* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Gather"); + + TFLITE_DCHECK(coords_dims.sizes[0] == output_dims.sizes[input_rank - 1]); + int stride = input_dims.strides[input_rank - 1]; + T* out = output_data; + + for (int i = 0; i < coords_dims.sizes[0]; i++) { + TFLITE_DCHECK_GE(coords_data[i], 0); + TFLITE_DCHECK_LT(coords_data[i], input_dims.sizes[input_rank - 1]); + const T* in = input_data + coords_data[i] * stride; + memcpy(out, in, sizeof(T) * stride); + out += stride; + } +} + +#ifdef USE_NEON +inline void ResizeBilinearKernel(const float* input_ptr, int32 depth, + float scale, float* output_ptr) { + int ic = 0; + // Handle 32 input channels at a time. + for (; ic <= depth - 32; ic += 32) { + float32x4x2_t input[4]; + for (int i = 0; i < 4; i++) { + input[i].val[0] = vld1q_f32(input_ptr + 8 * i); + input[i].val[1] = vld1q_f32(input_ptr + 8 * i + 4); + } + float32x4x2_t acc[4]; + for (int i = 0; i < 4; i++) { + acc[i].val[0] = vld1q_f32(output_ptr + 8 * i); + acc[i].val[1] = vld1q_f32(output_ptr + 8 * i + 4); + } + for (int i = 0; i < 4; i++) { + acc[i].val[0] = vmlaq_n_f32(acc[i].val[0], input[i].val[0], scale); + acc[i].val[1] = vmlaq_n_f32(acc[i].val[1], input[i].val[1], scale); + } + for (int i = 0; i < 4; i++) { + vst1q_f32(output_ptr, acc[i].val[0]); + vst1q_f32(output_ptr + 4, acc[i].val[1]); + output_ptr += 8; + } + input_ptr += 32; + } + // Handle 16 input channels at a time. + for (; ic <= depth - 16; ic += 16) { + float32x4x2_t input[2]; + for (int i = 0; i < 2; i++) { + input[i].val[0] = vld1q_f32(input_ptr + 8 * i); + input[i].val[1] = vld1q_f32(input_ptr + 8 * i + 4); + } + float32x4x2_t acc[2]; + for (int i = 0; i < 2; i++) { + acc[i].val[0] = vld1q_f32(output_ptr + 8 * i); + acc[i].val[1] = vld1q_f32(output_ptr + 8 * i + 4); + } + for (int i = 0; i < 2; i++) { + acc[i].val[0] = vmlaq_n_f32(acc[i].val[0], input[i].val[0], scale); + acc[i].val[1] = vmlaq_n_f32(acc[i].val[1], input[i].val[1], scale); + } + for (int i = 0; i < 2; i++) { + vst1q_f32(output_ptr, acc[i].val[0]); + vst1q_f32(output_ptr + 4, acc[i].val[1]); + output_ptr += 8; + } + input_ptr += 16; + } + // Handle 8 input channels at a time. + for (; ic <= depth - 8; ic += 8) { + float32x4x2_t input; + input.val[0] = vld1q_f32(input_ptr); + input.val[1] = vld1q_f32(input_ptr + 4); + + float32x4x2_t acc; + acc.val[0] = vld1q_f32(output_ptr); + acc.val[1] = vld1q_f32(output_ptr + 4); + acc.val[0] = vmlaq_n_f32(acc.val[0], input.val[0], scale); + acc.val[1] = vmlaq_n_f32(acc.val[1], input.val[1], scale); + + vst1q_f32(output_ptr, acc.val[0]); + vst1q_f32(output_ptr + 4, acc.val[1]); + + input_ptr += 8; + output_ptr += 8; + } + // Handle 4 input channels at a time. + for (; ic <= depth - 4; ic += 4) { + float32x4_t input = vld1q_f32(input_ptr); + float32x4_t acc = vld1q_f32(output_ptr); + + acc = vmlaq_n_f32(acc, input, scale); + vst1q_f32(output_ptr, acc); + + input_ptr += 4; + output_ptr += 4; + } + // Handle 1 input channel at a time. + for (; ic < depth; ic++) { + *output_ptr += *input_ptr * scale; + output_ptr++; + input_ptr++; + } +} +#else +inline void ResizeBilinearKernel(const float* input_ptr, int32 depth, + float scale, float* output_ptr) { + for (int32 i = 0; i < depth; i++) { + *output_ptr += *input_ptr * scale; + output_ptr++; + input_ptr++; + } +} +#endif + +inline void ResizeBilinearKernel2x2(int32 x0, int32 x1, int32 y0, int32 y1, + int32 x, int32 y, int32 depth, int32 batch, + const float* input_data, + const Dims<4>& input_dims, + float* output_data, + const Dims<4>& output_dims) { + const int32 input_width = ArraySize(input_dims, 1); + const int32 output_width = ArraySize(output_dims, 1); + + const int32 input_x_offset = (x1 - x0) * depth; + const int32 input_y_offset = (y1 - y0) * depth * input_width; + const int32 output_x_offset = depth; + const int32 output_y_offset = depth * output_width; + +#ifdef USE_NEON + TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); + TFLITE_DCHECK(x1 >= x0); + TFLITE_DCHECK(y1 >= y0); + + int ic = 0; + // Handle 8 input channels at a time. + for (; ic <= depth - 8; ic += 8) { + const float* input_ptr = nullptr; + + float32x4x2_t x0y0; + input_ptr = &input_data[Offset(input_dims, ic, x0, y0, batch)]; + x0y0.val[0] = vld1q_f32(input_ptr); + x0y0.val[1] = vld1q_f32(input_ptr + 4); + + float32x4x2_t x1y0; + input_ptr += input_x_offset; + x1y0.val[0] = vld1q_f32(input_ptr); + x1y0.val[1] = vld1q_f32(input_ptr + 4); + + float32x4x2_t x0y1; + input_ptr += -input_x_offset + input_y_offset; + x0y1.val[0] = vld1q_f32(input_ptr); + x0y1.val[1] = vld1q_f32(input_ptr + 4); + + float32x4x2_t x1y1; + input_ptr += input_x_offset; + x1y1.val[0] = vld1q_f32(input_ptr); + x1y1.val[1] = vld1q_f32(input_ptr + 4); + + // Top left corner. + float* output_ptr = &output_data[Offset(output_dims, ic, x, y, batch)]; + vst1q_f32(output_ptr, x0y0.val[0]); + vst1q_f32(output_ptr + 4, x0y0.val[1]); + + // Top right corner. + output_ptr += output_x_offset; + float32x4x2_t tr; + tr.val[0] = vaddq_f32(x0y0.val[0], x1y0.val[0]); + tr.val[1] = vaddq_f32(x0y0.val[1], x1y0.val[1]); + tr.val[0] = vmulq_n_f32(tr.val[0], 0.5f); + tr.val[1] = vmulq_n_f32(tr.val[1], 0.5f); + + vst1q_f32(output_ptr, tr.val[0]); + vst1q_f32(output_ptr + 4, tr.val[1]); + + // Bottom left corner. + output_ptr += -output_x_offset + output_y_offset; + float32x4x2_t bl; + bl.val[0] = vaddq_f32(x0y0.val[0], x0y1.val[0]); + bl.val[1] = vaddq_f32(x0y0.val[1], x0y1.val[1]); + bl.val[0] = vmulq_n_f32(bl.val[0], 0.5f); + bl.val[1] = vmulq_n_f32(bl.val[1], 0.5f); + vst1q_f32(output_ptr, bl.val[0]); + vst1q_f32(output_ptr + 4, bl.val[1]); + + // Bottom right corner. + output_ptr += output_x_offset; + float32x4x2_t br; + br.val[0] = vaddq_f32(x1y0.val[0], x1y1.val[0]); + br.val[1] = vaddq_f32(x1y0.val[1], x1y1.val[1]); + br.val[0] = vmlaq_n_f32(bl.val[0], br.val[0], 0.5f); + br.val[1] = vmlaq_n_f32(bl.val[1], br.val[1], 0.5f); + br.val[0] = vmulq_n_f32(br.val[0], 0.5f); + br.val[1] = vmulq_n_f32(br.val[1], 0.5f); + vst1q_f32(output_ptr, br.val[0]); + vst1q_f32(output_ptr + 4, br.val[1]); + } + // Handle 4 input channels at a time. + for (; ic <= depth - 4; ic += 4) { + const float* input_ptr = &input_data[Offset(input_dims, ic, x0, y0, batch)]; + float32x4_t x0y0 = vld1q_f32(input_ptr); + float32x4_t x1y0 = vld1q_f32(input_ptr + input_x_offset); + float32x4_t x0y1 = vld1q_f32(input_ptr + input_y_offset); + float32x4_t x1y1 = vld1q_f32(input_ptr + input_x_offset + input_y_offset); + + // Top left corner. + float* output_ptr = &output_data[Offset(output_dims, ic, x, y, batch)]; + vst1q_f32(output_ptr, x0y0); + + // Top right corner. + output_ptr += output_x_offset; + float32x4_t tr = vaddq_f32(x0y0, x1y0); + tr = vmulq_n_f32(tr, 0.5f); + vst1q_f32(output_ptr, tr); + + // Bottom left corner. + output_ptr += -output_x_offset + output_y_offset; + float32x4_t bl = vaddq_f32(x0y0, x0y1); + bl = vmulq_n_f32(bl, 0.5f); + vst1q_f32(output_ptr, bl); + + // Bottom right corner. + output_ptr += output_x_offset; + float32x4_t br = vaddq_f32(x1y0, x1y1); + br = vmlaq_n_f32(bl, br, 0.5f); + br = vmulq_n_f32(br, 0.5f); + vst1q_f32(output_ptr, br); + } + // Handle one input channel at a time. + for (; ic < depth; ic++) { + const int32 input_offset = Offset(input_dims, ic, x0, y0, batch); + + float x0y0 = input_data[input_offset]; + float x1y0 = input_data[input_offset + input_x_offset]; + float x0y1 = input_data[input_offset + input_y_offset]; + float x1y1 = input_data[input_offset + input_x_offset + input_y_offset]; + + // Top left corner. + const int32 output_offset = Offset(output_dims, ic, x, y, batch); + output_data[output_offset] = x0y0; + + // Top right corner. + output_data[output_offset + output_x_offset] = (x0y0 + x1y0) / 2; + + // Bottom left corner. + float output = (x0y0 + x0y1) / 2; + output_data[output_offset + output_y_offset] = output; + + // Bottom right corner. + output_data[output_offset + output_x_offset + output_y_offset] = + (output + ((x1y0 + x1y1) / 2)) / 2; + } +#else + for (int ch = 0; ch < depth; ch++) { + const int32 input_offset = Offset(input_dims, ch, x0, y0, batch); + + float x0y0 = input_data[input_offset]; + float x1y0 = input_data[input_offset + input_x_offset]; + float x0y1 = input_data[input_offset + input_y_offset]; + float x1y1 = input_data[input_offset + input_x_offset + input_y_offset]; + + // Top left corner. + const int32 output_offset = Offset(output_dims, ch, x, y, batch); + output_data[output_offset] = x0y0; + + // Top right corner. + output_data[output_offset + output_x_offset] = (x0y0 + x1y0) / 2; + + // Bottom left corner. + float output = (x0y0 + x0y1) / 2; + output_data[output_offset + output_y_offset] = output; + + // Bottom right corner. + output_data[output_offset + output_x_offset + output_y_offset] = + (output + ((x1y0 + x1y1) / 2)) / 2; + } +#endif +} + +inline void ResizeBilinear2x2(const float* input_data, + const Dims<4>& input_dims, float* output_data, + const Dims<4>& output_dims, int32 batches, + int32 input_height, int32 input_width, + int32 depth, int32 output_height, + int32 output_width) { + for (int b = 0; b < batches; b++) { + for (int y0 = 0, y = 0; y <= output_height - 2; y += 2, y0++) { + for (int x0 = 0, x = 0; x <= output_width - 2; x += 2, x0++) { + int32 x1 = std::min(x0 + 1, input_width - 1); + int32 y1 = std::min(y0 + 1, input_height - 1); + ResizeBilinearKernel2x2(x0, x1, y0, y1, x, y, depth, b, input_data, + input_dims, output_data, output_dims); + } + } + } +} + +inline void ResizeBilinearGeneric(const float* input_data, + const Dims<4>& input_dims, float* output_data, + const Dims<4>& output_dims, int32 batches, + int32 input_height, int32 input_width, + int32 depth, int32 output_height, + int32 output_width, float height_scale, + float width_scale) { + memset(output_data, 0, + batches * output_height * output_width * depth * sizeof(float)); + + int32 output_offset = 0; + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < output_height; ++y) { + float input_y = y * height_scale; + int32 y0 = static_cast(std::floor(input_y)); + int32 y1 = std::min(y0 + 1, input_height - 1); + for (int x = 0; x < output_width; ++x) { + float input_x = x * width_scale; + int32 x0 = static_cast(input_x); + int32 x1 = std::min(x0 + 1, input_width - 1); + float* output_ptr = &output_data[output_offset]; + + // Run kernel on the 4 corners of the bilinear resize algorithm. + int32 input_offset = Offset(input_dims, 0, x0, y0, b); + float scale = (1 - (input_y - y0)) * (1 - (input_x - x0)); + const float* input_ptr = &input_data[input_offset]; + ResizeBilinearKernel(input_ptr, depth, scale, output_ptr); + + input_offset = Offset(input_dims, 0, x1, y0, b); + scale = (1 - (input_y - y0)) * (input_x - x0); + input_ptr = &input_data[input_offset]; + ResizeBilinearKernel(input_ptr, depth, scale, output_ptr); + + input_offset = Offset(input_dims, 0, x0, y1, b); + scale = (input_y - y0) * (1 - (input_x - x0)); + input_ptr = &input_data[input_offset]; + ResizeBilinearKernel(input_ptr, depth, scale, output_ptr); + + input_offset = Offset(input_dims, 0, x1, y1, b); + scale = (input_y - y0) * (input_x - x0); + input_ptr = &input_data[input_offset]; + ResizeBilinearKernel(input_ptr, depth, scale, output_ptr); + + output_offset += depth; + } + } + } +} + +inline void ResizeBilinear(const float* input_data, const Dims<4>& input_dims, + const int32* output_size_data, + const Dims<4>& output_size_dims, float* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("ResizeBilinear"); + int32 batches = MatchingArraySize(input_dims, 3, output_dims, 3); + int32 input_height = ArraySize(input_dims, 2); + int32 input_width = ArraySize(input_dims, 1); + int32 depth = MatchingArraySize(input_dims, 0, output_dims, 0); + + TFLITE_DCHECK_EQ(ArraySize(output_size_dims, 3), 1); + TFLITE_DCHECK_EQ(ArraySize(output_size_dims, 2), 1); + TFLITE_DCHECK_EQ(ArraySize(output_size_dims, 1), 1); + TFLITE_DCHECK_EQ(ArraySize(output_size_dims, 0), 2); + int32 output_height = output_size_data[Offset(output_size_dims, 0, 0, 0, 0)]; + int32 output_width = output_size_data[Offset(output_size_dims, 1, 0, 0, 0)]; + + // Specialize for 2x2 upsample. + if (output_height == 2 * input_height && output_width == 2 * input_width) { + ResizeBilinear2x2(input_data, input_dims, output_data, output_dims, batches, + input_height, input_width, depth, output_height, + output_width); + } else { + float height_scale = static_cast(input_height) / output_height; + float width_scale = static_cast(input_width) / output_width; + + ResizeBilinearGeneric(input_data, input_dims, output_data, output_dims, + batches, input_height, input_width, depth, + output_height, output_width, height_scale, + width_scale); + } +} + +template +inline void SpaceToBatchND(const T* input_data, const Dims<4>& input_dims, + const int32* block_shape_data, + const Dims<4>& block_shape_dims, + const int32* paddings_data, + const Dims<4>& paddings_dims, T* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("SpaceToBatchND"); + + const int output_batch_size = ArraySize(output_dims, 3); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + const int input_batch_size = ArraySize(input_dims, 3); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int depth = ArraySize(input_dims, 0); + const int block_shape_height = block_shape_data[0]; + const int block_shape_width = block_shape_data[1]; + const int padding_top = paddings_data[0]; + const int padding_left = paddings_data[2]; + + for (int out_b = 0; out_b < output_batch_size; ++out_b) { + int input_batch = out_b % input_batch_size; + int shift_w = (out_b / input_batch_size) % block_shape_width; + int shift_h = (out_b / input_batch_size) / block_shape_width; + for (int out_h = 0; out_h < output_height; ++out_h) { + for (int out_w = 0; out_w < output_width; ++out_w) { + T* out = output_data + Offset(output_dims, 0, out_w, out_h, out_b); + if (out_h * block_shape_height < padding_top || + out_h * block_shape_height >= padding_top + input_height || + out_w * block_shape_width < padding_left || + out_w * block_shape_width >= padding_left + input_width) { + memset(out, 0, depth * sizeof(T)); + } else { + const T* in = + input_data + + Offset(input_dims, 0, + (out_w * block_shape_width + shift_w) - padding_left, + (out_h * block_shape_height + shift_h) - padding_top, + input_batch); + memcpy(out, in, depth * sizeof(T)); + } + } + } + } +} + +template +inline void BatchToSpaceND(const T* input_data, const Dims<4>& input_dims, + const int32* block_shape_data, + const Dims<4>& block_shape_dims, T* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BatchToSpaceND"); + + const int output_batch_size = ArraySize(output_dims, 3); + const int input_batch_size = ArraySize(input_dims, 3); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int depth = ArraySize(input_dims, 0); + const int block_shape_width = block_shape_data[1]; + const int block_shape_height = block_shape_data[0]; + + for (int in_batch = 0; in_batch < input_batch_size; ++in_batch) { + for (int in_h = 0; in_h < input_height; ++in_h) { + for (int in_w = 0; in_w < input_width; ++in_w) { + int out_batch = in_batch % output_batch_size; + int out_w = in_w * block_shape_width + + (in_batch / output_batch_size) % block_shape_width; + int out_h = in_h * block_shape_height + + (in_batch / output_batch_size) / block_shape_width; + T* out = output_data + Offset(output_dims, 0, out_w, out_h, out_batch); + const T* in = input_data + Offset(input_dims, 0, in_w, in_h, in_batch); + memcpy(out, in, depth * sizeof(T)); + } + } + } +} + +template +inline void Pad(const T* input_data, const Dims<4>& input_dims, + const std::vector& left_paddings, + const std::vector& right_paddings, T* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Pad"); + const int output_batch = ArraySize(output_dims, 3); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + const int output_depth = ArraySize(output_dims, 0); + + const int left_b_padding = left_paddings[3]; + const int left_h_padding = left_paddings[2]; + const int left_w_padding = left_paddings[1]; + const int left_d_padding = left_paddings[0]; + + const int right_b_padding = right_paddings[3]; + const int right_h_padding = right_paddings[2]; + const int right_w_padding = right_paddings[1]; + const int right_d_padding = right_paddings[0]; + + const int input_depth = ArraySize(input_dims, 0); + + if (left_b_padding != 0) { + memset(output_data, 0, + left_b_padding * output_height * output_width * output_depth * + sizeof(T)); + } + for (int out_b = left_b_padding; out_b < output_batch - right_b_padding; + ++out_b) { + if (left_h_padding != 0) { + memset(output_data + Offset(output_dims, 0, 0, 0, out_b), 0, + left_h_padding * output_width * output_depth * sizeof(T)); + } + for (int out_h = left_h_padding; out_h < output_height - right_h_padding; + ++out_h) { + if (left_w_padding != 0) { + memset(output_data + Offset(output_dims, 0, 0, out_h, out_b), 0, + left_w_padding * output_depth * sizeof(T)); + } + for (int out_w = left_w_padding; out_w < output_width - right_w_padding; + ++out_w) { + if (left_d_padding != 0) { + memset(output_data + Offset(output_dims, 0, out_w, out_h, out_b), 0, + left_d_padding * sizeof(T)); + } + + T* out = output_data + + Offset(output_dims, left_d_padding, out_w, out_h, out_b); + const T* in = + input_data + Offset(input_dims, 0, out_w - left_w_padding, + out_h - left_h_padding, out_b - left_b_padding); + memcpy(out, in, input_depth * sizeof(T)); + + if (right_d_padding != 0) { + memset( + output_data + Offset(output_dims, output_depth - right_d_padding, + out_w, out_h, out_b), + 0, right_d_padding * sizeof(T)); + } + } + if (right_w_padding != 0) { + memset( + output_data + Offset(output_dims, 0, output_width - right_w_padding, + out_h, out_b), + 0, right_w_padding * output_depth * sizeof(T)); + } + } + if (right_h_padding != 0) { + memset(output_data + Offset(output_dims, 0, 0, + output_height - right_h_padding, out_b), + 0, right_h_padding * output_width * output_depth * sizeof(T)); + } + } + if (right_b_padding != 0) { + memset(output_data + + Offset(output_dims, 0, 0, 0, output_batch - right_b_padding), + 0, + right_b_padding * output_height * output_width * output_depth * + sizeof(T)); + } +} + +template +inline void StridedSlice(const T* input_data, const Dims<4>& input_dims, + int begin_mask, int end_mask, + const std::vector& starts, + const std::vector& stops, + const std::vector& strides, T* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("StridedSlice"); + const int start_b = (begin_mask & 8) ? 0 : starts[3]; + const int stop_b = (end_mask & 8) ? input_dims.sizes[3] : stops[3]; + const int start_h = (begin_mask & 4) ? 0 : starts[2]; + const int stop_h = (end_mask & 4) ? input_dims.sizes[2] : stops[2]; + const int start_w = (begin_mask & 2) ? 0 : starts[1]; + const int stop_w = (end_mask & 2) ? input_dims.sizes[1] : stops[1]; + const int start_d = (begin_mask & 1) ? 0 : starts[0]; + const int stop_d = (end_mask & 1) ? input_dims.sizes[0] : stops[0]; + + T* out_ptr = output_data; + if (strides[0] == 0) { + for (int in_b = start_b; in_b < stop_b; in_b += strides[3]) { + for (int in_h = start_h; in_h < stop_h; in_h += strides[2]) { + for (int in_w = start_w; in_w < stop_w; in_w += strides[1]) { + const int len = stop_d - start_d; + memcpy(out_ptr, + input_data + Offset(input_dims, start_d, in_w, in_h, in_b), + len * sizeof(T)); + out_ptr += len; + } + } + } + } else { + for (int in_b = start_b; in_b < stop_b; in_b += strides[3]) { + for (int in_h = start_h; in_h < stop_h; in_h += strides[2]) { + for (int in_w = start_w; in_w < stop_w; in_w += strides[1]) { + for (int in_d = start_d; in_d < stop_d; in_d += strides[0]) { + *out_ptr++ = input_data[Offset(input_dims, in_d, in_w, in_h, in_b)]; + } + } + } + } + } +} + +template +inline void Slice(const T* input_data, const Dims<4>& input_dims, + const std::vector& begin, const std::vector& size, + T* output_data, const Dims<4>& output_dims) { + // TODO(dkalenichenko): This op only supports 4D tensors. + TFLITE_DCHECK_EQ(begin.size(), 4); + TFLITE_DCHECK_EQ(size.size(), 4); + const int start_b = begin[3]; + const int stop_b = + size[3] == -1 ? input_dims.sizes[3] - start_b : start_b + size[3]; + const int start_h = begin[2]; + const int stop_h = + size[2] == -1 ? input_dims.sizes[2] - start_b : start_b + size[2]; + const int start_w = begin[1]; + const int stop_w = + size[1] == -1 ? input_dims.sizes[1] - start_b : start_b + size[1]; + const int start_d = begin[0]; + const int stop_d = + size[0] == -1 ? input_dims.sizes[0] - start_d : start_d + size[0]; + + T* out_ptr = output_data; + for (int in_b = start_b; in_b < stop_b; ++in_b) { + for (int in_h = start_h; in_h < stop_h; ++in_h) { + for (int in_w = start_w; in_w < stop_w; ++in_w) { + const int len = stop_d - start_d; + memcpy(out_ptr, + input_data + Offset(input_dims, start_d, in_w, in_h, in_b), + len * sizeof(T)); + out_ptr += len; + } + } + } +} + +template +inline void Mean(const T* input_data, const Dims<4>& input_dims, + const std::vector& reduction_indices, T* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Mean"); + const int output_batch = ArraySize(output_dims, 3); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + const int output_depth = ArraySize(output_dims, 0); + + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + + // The current implementation only supports simultaneous reduction over + // width and height. + TFLITE_DCHECK_EQ(reduction_indices.size(), 2); + TFLITE_DCHECK((reduction_indices[0] == 1 && reduction_indices[1] == 2) || + (reduction_indices[0] == 2 && reduction_indices[1] == 1)); + TFLITE_DCHECK_EQ(output_height, 1); + TFLITE_DCHECK_EQ(output_width, 1); + + for (int out_b = 0; out_b < output_batch; ++out_b) { + for (int out_d = 0; out_d < output_depth; ++out_d) { + float value = 0; + for (int in_h = 0; in_h < input_height; ++in_h) { + for (int in_w = 0; in_w < input_width; ++in_w) { + value += input_data[Offset(input_dims, out_d, in_w, in_h, out_b)]; + } + } + output_data[Offset(output_dims, out_d, 0, 0, out_b)] = + value / (input_width * input_height); + } + } +} + +template +void GenericBroadcastSub(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("GenericBroadcastSub"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + input1_data[SubscriptToIndex(desc1, c, x, y, b)] - + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; + } + } + } + } +} + +template +void Sub(const T* input1_data, const Dims<4>& input1_dims, const T* input2_data, + const Dims<4>& input2_dims, T* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Sub"); + + auto input1_map = MapAsVector(input1_data, input1_dims); + auto input2_map = MapAsVector(input2_data, input2_dims); + auto output_map = MapAsVector(output_data, output_dims); + if (AreSameDims(input1_dims, input2_dims)) { + output_map.array() = input1_map.array() - input2_map.array(); + } else if (RequiredBufferSizeForDims(input1_dims) == 1) { + auto scalar = input1_data[0]; + output_map.array() = scalar - input2_map.array(); + } else if (RequiredBufferSizeForDims(input2_dims) == 1) { + auto scalar = input2_data[0]; + output_map.array() = input1_map.array() - scalar; + } else { + GenericBroadcastSub(input1_data, input1_dims, input2_data, input2_dims, + output_data, output_dims); + } +} + +template +void TensorFlowMinimum(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, T* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("TensorFlowMinimum"); + auto input1_map = MapAsVector(input1_data, input1_dims); + auto output_map = MapAsVector(output_data, output_dims); + auto min_value = input2_data[0]; + output_map.array() = input1_map.array().min(min_value); +} + +template +void TensorFlowMaximum(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, T* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("TensorFlowMaximum"); + auto input1_map = MapAsVector(input1_data, input1_dims); + auto output_map = MapAsVector(output_data, output_dims); + auto max_value = input2_data[0]; + output_map.array() = input1_map.array().max(max_value); +} +} // namespace optimized_ops +} // namespace tflite + +#if defined OPTIMIZED_OPS_H__IGNORE_DEPRECATED_DECLARATIONS +#undef OPTIMIZED_OPS_H__IGNORE_DEPRECATED_DECLARATIONS +#pragma GCC diagnostic pop +#endif + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_OPS_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/tensor_utils_impl.h b/tensorflow/contrib/lite/kernels/internal/optimized/tensor_utils_impl.h new file mode 100644 index 0000000000..f8be99e82f --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/optimized/tensor_utils_impl.h @@ -0,0 +1,138 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TF_LITE_KERNELS_INTERNAL_OPTIMIZED_TENSOR_UTILS_IMPL_H_ +#define TF_LITE_KERNELS_INTERNAL_OPTIMIZED_TENSOR_UTILS_IMPL_H_ + +// TDOD(ghodrat): Remove this header file and the dependency to internal data +// structure. +#include "tensorflow/contrib/lite/builtin_op_data.h" + +#ifndef USE_NEON +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#define USE_NEON +#endif // defined(__ARM_NEON__) || defined(__ARM_NEON) +#endif // USE_NEON + +namespace tflite { +namespace tensor_utils { + +// Multiply a matrix by a batch vector, and store results in a batch-size +// vector. +void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix, + int m_rows, int m_cols, + const float* vector, + int n_batch, float* result, + int result_stride); +void NeonMatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows, + int m_cols, const float* vector, + int n_batch, float* result, + int result_stride); + +// Cwise product of two vectors. +void PortableVectorVectorCwiseProduct(const float* vector1, + const float* vector2, int v_size, + float* result); +void NeonVectorVectorCwiseProduct(const float* vector1, const float* vector2, + int v_size, float* result); + +// Cwise product and accumulate of two vectors. Since it's a MAC operation, the +// assumption here is that result array is initialized to valid values. +void PortableVectorVectorCwiseProductAccumulate(const float* vector1, + const float* vector2, + int v_size, float* result); +void NeonVectorVectorCwiseProductAccumulate(const float* vector1, + const float* vector2, int v_size, + float* result); + +// Dot product of two vectors. +float PortableVectorVectorDotProduct(const float* vector1, const float* vector2, + int v_size); +float NeonVectorVectorDotProduct(const float* vector1, const float* vector2, + int v_size); + +// Dot product of two batch vectors. +void PortableBatchVectorBatchVectorDotProduct(const float* vector1, + const float* vector2, int v_size, + int n_batch, float* result, + int result_stride); +void NeonBatchVectorBatchVectorDotProduct(const float* vector1, + const float* vector2, int v_size, + int n_batch, float* result, + int result_stride); + +// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC +// operation, the assumption here is that result array is initialized to valid +// values. +void PortableVectorBatchVectorCwiseProductAccumulate(const float* vector, + int v_size, + const float* batch_vector, + int n_batch, + float* result); +void NeonVectorBatchVectorCwiseProductAccumulate(const float* vector, + int v_size, + const float* batch_vector, + int n_batch, float* result); + +// Compute "1.0f - elements of vector" (used in CIFG). +void PortableSub1Vector(const float* vector, int v_size, float* result); +void NeonSub1Vector(const float* vector, int v_size, float* result); + +// Clip elements of a vector using a abs_limit value. +void PortableClipVector(const float* vector, int v_size, float abs_limit, + float* result); +void NeonClipVector(const float* vector, int v_size, float abs_limit, + float* result); + +// Batch vector initialization with another vector. +void PortableVectorBatchVectorAssign(const float* vector, int v_size, + int n_batch, float* batch_vector); + +// Apply sigmoid to elements of a vector. +void PortableApplySigmoidToVector(const float* vector, int v_size, + float* result); + +// Apply activation function to elements of a vector. +void PortableApplyActivationToVector(const float* vector, int v_size, + TfLiteFusedActivation activation, + float* result); + +// Copy vector to another vector. +void PortableCopyVector(const float* vector, int v_size, float* result); + +// Fill vector with 0.f. +void PortableZeroVector(float* vector, int v_size); + +// Limit a float input f between +abs_limit and -abs_limit. +float PortableClip(float f, float abs_limit); + +// Shift left a vector in place with v_size size. +void PortableVectorShiftLeft(float* vector, int v_size, float shift_value); +void NeonVectorShiftLeft(float* vector, int v_size, float shift_value); + +// Reduce-sum on a float input vector: +// input_vector: float pointer to input vector. +// output_vector: float pointer to vector. +// output_size: output vector size. +// reduction_size: number of consecutive elements from input vector which are +// added to get one element of output. +void PortableReductionSumVector(const float* input_vector, float* output_vector, + int output_size, int reduction_size); +void NeonReductionSumVector(const float* input_vector, float* output_vector, + int output_size, int reduction_size); + +} // namespace tensor_utils +} // namespace tflite + +#endif // TF_LITE_KERNELS_INTERNAL_OPTIMIZED_TENSOR_UTILS_IMPL_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/quantization_util.cc b/tensorflow/contrib/lite/kernels/internal/quantization_util.cc new file mode 100644 index 0000000000..98f2e365c5 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/quantization_util.cc @@ -0,0 +1,95 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include + +#include "tensorflow/contrib/lite/kernels/internal/compatibility.h" +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" +#include "tensorflow/contrib/lite/kernels/internal/round.h" + +namespace tflite { + +void QuantizeMultiplierSmallerThanOne(double double_multiplier, + int32_t* quantized_multiplier, + int* right_shift) { + TFLITE_CHECK(double_multiplier >= 0.); + TFLITE_CHECK(double_multiplier < 1.); + if (double_multiplier == 0.) { + *quantized_multiplier = 0; + *right_shift = 0; + return; + } + TFLITE_CHECK(double_multiplier > 0.); + const double q = std::frexp(double_multiplier, right_shift); + *right_shift *= -1; + + auto q_fixed = static_cast(TfLiteRound(q * (1ll << 31))); + TFLITE_CHECK(q_fixed <= (1ll << 31)); + if (q_fixed == (1ll << 31)) { + q_fixed /= 2; + --*right_shift; + } + TFLITE_CHECK_GE(*right_shift, 0); + TFLITE_CHECK_LE(q_fixed, std::numeric_limits::max()); + *quantized_multiplier = static_cast(q_fixed); +} + +void QuantizeMultiplierGreaterThanOne(double double_multiplier, + int32_t* quantized_multiplier, + int* left_shift) { + TFLITE_CHECK(double_multiplier > 1.); + const double q = std::frexp(double_multiplier, left_shift); + auto q_fixed = static_cast(TfLiteRound(q * (1ll << 31))); + TFLITE_CHECK(q_fixed <= (1ll << 31)); + if (q_fixed == (1ll << 31)) { + q_fixed /= 2; + ++*left_shift; + } + TFLITE_CHECK_GE(*left_shift, 0); + TFLITE_CHECK_LE(q_fixed, std::numeric_limits::max()); + *quantized_multiplier = static_cast(q_fixed); +} + +void PreprocessSoftmaxScaling(double beta, double input_scale, + int input_integer_bits, + int32_t* quantized_multiplier, int* left_shift) { + // If the overall multiplier (input and beta) is large, then exp() of an + // input difference of 1 scaled by this will be large. In other words, we + // can cap the multiplier and know that, when it is used, the output will be + // (round to) zero wherever the input is not at the maximum value. + + // If the overall scale is less than one, and input_integer_bits=0, then the + // result is double equivalent of Q0.31 (actually with more precision). Thus + // this generates a Q(input_integer_bits).(31-input_integer_bits) + // representation. + const double input_beta_real_multiplier = std::min( + beta * input_scale * (1 << (31 - input_integer_bits)), (1ll << 31) - 1.0); + + QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier, + quantized_multiplier, left_shift); +} + +int CalculateInputRadius(int input_integer_bits, int input_left_shift) { + const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) * + (1ll << (31 - input_integer_bits)) / + (1ll << input_left_shift); + // Tighten bound using floor. Suppose that we could use the exact value. + // After scaling the difference, the result would be at the maximum. Thus we + // must ensure that our value has lower magnitude. + return static_cast(std::floor(max_input_rescaled)); +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/quantization_util.h b/tensorflow/contrib/lite/kernels/internal/quantization_util.h new file mode 100644 index 0000000000..efb7191c8d --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/quantization_util.h @@ -0,0 +1,55 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef PHOTOS_VISION_LEARNING_TENSORFLOW_MINI_QUANTIZATION_UTIL_H_ +#define PHOTOS_VISION_LEARNING_TENSORFLOW_MINI_QUANTIZATION_UTIL_H_ + +#include + +namespace tflite { + +// Decompose a double multiplier into a Q0.31 int32 representation of its +// significand, and shift representation of its exponent. +// +// Restricted to the case where the multiplier < 1 (and non-negative). +void QuantizeMultiplierSmallerThanOne(double double_multiplier, + int32_t* quantized_multiplier, + int* right_shift); + +// Decompose a double multiplier into a Q0.31 int32 representation of its +// significand, and shift representation of its exponent. +// +// Restricted to the case where the multiplier > 1. +void QuantizeMultiplierGreaterThanOne(double double_multiplier, + int32_t* quantized_multiplier, + int* left_shift); + +// This first creates a multiplier in a double equivalent of +// Q(input_integer_bits).(31-input_integer_bits) representation, with extra +// precision in the double's fractional bits. It then splits the result into +// significand and exponent. +void PreprocessSoftmaxScaling(double beta, double input_scale, + int input_integer_bits, + int32_t* quantized_multiplier, int* left_shift); + +// Calculate the largest input that will result in a within-bounds intermediate +// result within MultiplyByQuantizedMultiplierGreaterThanOne. In other words, +// it must not overflow before we reduce the value by multiplication by the +// input multiplier. The negative radius is used as the minimum difference +// in Softmax. +int CalculateInputRadius(int input_integer_bits, int input_left_shift); + +} // namespace tflite + +#endif // PHOTOS_VISION_LEARNING_TENSORFLOW_MINI_QUANTIZATION_UTIL_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc b/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc new file mode 100644 index 0000000000..d6f306e2cb --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc @@ -0,0 +1,108 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" + +#include +#include + +namespace tflite { +namespace { + +using ::testing::Pair; + +TEST(QuantizationUtilTest, QuantizeMultiplierSmallerThanOne) { + auto quantize = [](double d) { + int32_t q; + int s; + QuantizeMultiplierSmallerThanOne(d, &q, &s); + return std::pair{q, s}; + }; + + EXPECT_DEATH(quantize(-0.1), ""); + EXPECT_THAT(quantize(0.0), Pair(0, 0)); + EXPECT_THAT(quantize(0.25), Pair(1073741824, 1)); + + // Around 0.5 we can see the change in exponent and how we try hard to + // void hitting max int32. + EXPECT_THAT(quantize(0.50 - 5e-9), Pair(2147483627, 1)); + EXPECT_THAT(quantize(0.50 - 1e-10), Pair(1073741824, 0)); + EXPECT_THAT(quantize(0.50), Pair(1073741824, 0)); + + EXPECT_THAT(quantize(0.75), Pair(1610612736, 0)); + EXPECT_THAT(quantize(1 - 1e-9), Pair(2147483646, 0)); + + // If we get close enough to 1.0 it crashes and dies in one of two ways: + // Either the shift becomes negative or we trigger the 'less-than-one' CHECK. + EXPECT_DEATH(quantize(1 - 1e-15), ""); + EXPECT_DEATH(quantize(1 - 1e-17), ""); + EXPECT_DEATH(quantize(1.0), ""); +} + +TEST(QuantizationUtilTest, QuantizeMultiplierGreaterThanOne) { + auto quantize = [](double d) { + int32_t q; + int s; + QuantizeMultiplierGreaterThanOne(d, &q, &s); + return std::pair{q, s}; + }; + + // If we are close enough to 1.0 it crashes. + EXPECT_DEATH(quantize(1 + 1e-16), ""); + + EXPECT_THAT(quantize(1 + 1e-11), Pair(1073741824, 1)); + EXPECT_THAT(quantize(1.25), Pair(1342177280, 1)); + EXPECT_THAT(quantize(1.50), Pair(1610612736, 1)); + EXPECT_THAT(quantize(1.75), Pair(1879048192, 1)); + + // Around the powers of two we see the change in exponent. Also, + // we try hard to avoid hitting max int32. + EXPECT_THAT(quantize(2 - 1e-9), Pair(2147483647, 1)); + EXPECT_THAT(quantize(2 - 1e-11), Pair(1073741824, 2)); + EXPECT_THAT(quantize(2), Pair(1073741824, 2)); +} + +TEST(QuantizationUtilTest, PreprocessSoftmaxScaling) { + auto quantize = [](double beta, double scale, int integer_bits) { + int32_t q; + int s; + PreprocessSoftmaxScaling(beta, scale, integer_bits, &q, &s); + return std::pair{q, s}; + }; + + // If beta * scale is greater than fits in the number of integer bits, the + // result is move near the maximum. Otherwise they quantize as expected. + // With 4 integer bits we can represent up to 16.0. + EXPECT_THAT(quantize(1.0, 16.0, 4), Pair(2147483647, 31)); + EXPECT_THAT(quantize(1.0, 8.0, 4), Pair(1073741824, 31)); + // But with 5 bits we can go further. + EXPECT_THAT(quantize(2.0, 16.0, 5), Pair(2147483647, 31)); + EXPECT_THAT(quantize(2.0, 8.0, 5), Pair(1073741824, 31)); +} + +TEST(QuantizationUtilTest, CalculateInputRadius) { + EXPECT_EQ(CalculateInputRadius(4, 27), 15); + EXPECT_EQ(CalculateInputRadius(3, 27), 14); + EXPECT_EQ(CalculateInputRadius(3, 28), 7); + EXPECT_EQ(CalculateInputRadius(4, 2), 503316480); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h new file mode 100644 index 0000000000..8e0f234545 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h @@ -0,0 +1,115 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_ + +#include "tensorflow/contrib/lite/kernels/internal/common.h" +#include "tensorflow/contrib/lite/kernels/internal/compatibility.h" +#include "tensorflow/contrib/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int depth_multiplier, + float output_activation_min, + float output_activation_max, float* output_data, + const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int input_depth = ArraySize(input_dims, 0); + const int filter_height = ArraySize(filter_dims, 2); + const int filter_width = ArraySize(filter_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + TFLITE_DCHECK(output_depth == input_depth * depth_multiplier); + + for (int b = 0; b < batches; ++b) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int ic = 0; ic < input_depth; ++ic) { + for (int m = 0; m < depth_multiplier; m++) { + const int oc = m + ic * depth_multiplier; + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + float total = 0.f; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + const int in_x = in_x_origin + filter_x; + const int in_y = in_y_origin + filter_y; + // If the location is outside the bounds of the input image, + // use zero as a default value. + if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && + (in_y < input_height)) { + float input_value = + input_data[Offset(input_dims, ic, in_x, in_y, b)]; + float filter_value = filter_data[Offset( + filter_dims, oc, filter_x, filter_y, 0)]; + total += (input_value * filter_value); + } + } + } + float bias_value = 0.0f; + if (bias_data) { + bias_value = bias_data[Offset(bias_dims, oc, 0, 0, 0)]; + } + output_data[Offset(output_dims, oc, out_x, out_y, b)] = + ActivationFunctionWithMinMax(total + bias_value, + output_activation_min, + output_activation_max); + } + } + } + } + } +} + +// Legacy, for compatibility with old checked-in code. +template +void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int depth_multiplier, float* output_data, + const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data, + bias_dims, stride_width, stride_height, pad_width, pad_height, + depth_multiplier, output_activation_min, output_activation_max, + output_data, output_dims); +} + +// Legacy, for compatibility with old checked-in code. +template +void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, int depth_multiplier, + float* output_data, const Dims<4>& output_dims) { + DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data, + bias_dims, stride, stride, pad_width, pad_height, + depth_multiplier, output_data, output_dims); +} + +} // end namespace reference_ops +} // end namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h new file mode 100644 index 0000000000..8a80558b32 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h @@ -0,0 +1,138 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_ + +#include + +#include "fixedpoint/fixedpoint.h" +#include "public/gemmlowp.h" +#include "tensorflow/contrib/lite/kernels/internal/common.h" +#include "tensorflow/contrib/lite/kernels/internal/compatibility.h" +#include "tensorflow/contrib/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int depth_multiplier, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int input_depth = ArraySize(input_dims, 0); + const int filter_height = ArraySize(filter_dims, 2); + const int filter_width = ArraySize(filter_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + TFLITE_DCHECK(output_depth == input_depth * depth_multiplier); + + for (int b = 0; b < batches; ++b) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int ic = 0; ic < input_depth; ++ic) { + for (int m = 0; m < depth_multiplier; m++) { + const int oc = m + ic * depth_multiplier; + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + int32 acc = 0; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + const int in_x = in_x_origin + filter_x; + const int in_y = in_y_origin + filter_y; + // If the location is outside the bounds of the input image, + // use zero as a default value. + if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && + (in_y < input_height)) { + int32 input_val = + input_data[Offset(input_dims, ic, in_x, in_y, b)]; + int32 filter_val = filter_data[Offset(filter_dims, oc, + filter_x, filter_y, 0)]; + acc += + (filter_val + filter_offset) * (input_val + input_offset); + } + } + } + if (bias_data) { + acc += bias_data[Offset(bias_dims, oc, 0, 0, 0)]; + } + acc = MultiplyByQuantizedMultiplierSmallerThanOne( + acc, output_multiplier, output_shift); + acc += output_offset; + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + output_data[Offset(output_dims, oc, out_x, out_y, b)] = + static_cast(acc); + } + } + } + } + } +} + +// Legacy, for compatibility with old checked-in code. +template +void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int depth_multiplier, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims) { + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride_width, + stride_height, pad_width, pad_height, depth_multiplier, + output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data, + output_dims); +} + +// Legacy, for compatibility with old checked-in code. +template +void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, int depth_multiplier, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + DepthwiseConv(input_data, input_dims, input_offset, filter_data, + filter_dims, filter_offset, bias_data, bias_dims, stride, + stride, pad_width, pad_height, depth_multiplier, + output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data, + output_dims); +} + +} // end namespace reference_ops +} // end namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc new file mode 100644 index 0000000000..c5b0bccc9d --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc @@ -0,0 +1,165 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/kernels/activation_functor.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace tensor_utils { + +float PortableClip(float f, float abs_limit) { + float result = (abs_limit < f) ? abs_limit : f; + result = (-abs_limit > result) ? -abs_limit : result; + return result; +} + +void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix, + int m_rows, int m_cols, + const float* vector, + int n_batch, float* result, + int result_stride) { + float* result_in_batch = result; + for (int b = 0; b < n_batch; b++) { + const float* matrix_ptr = matrix; + for (int r = 0; r < m_rows; r++) { + const float* vector_in_batch = vector + b * m_cols; + for (int c = 0; c < m_cols; c++) { + *result_in_batch += *matrix_ptr++ * *vector_in_batch++; + } + result_in_batch += result_stride; + } + } +} + +void PortableVectorVectorCwiseProduct(const float* vector1, + const float* vector2, int v_size, + float* result) { + for (int v = 0; v < v_size; v++) { + *result++ = *vector1++ * *vector2++; + } +} + +float PortableVectorVectorDotProduct(const float* vector1, const float* vector2, + int v_size) { + float result = 0.0; + for (int v = 0; v < v_size; v++) { + result += *vector1++ * *vector2++; + } + return result; +} + +void PortableBatchVectorBatchVectorDotProduct(const float* vector1, + const float* vector2, int v_size, + int n_batch, float* result, + int result_stride) { + float* result_ptr = result; + const float* vector1_ptr = vector1; + const float* vector2_ptr = vector2; + for (int b = 0; b < n_batch; b++) { + *result_ptr = + PortableVectorVectorDotProduct(vector1_ptr, vector2_ptr, v_size); + vector1_ptr += v_size; + vector2_ptr += v_size; + result_ptr += result_stride; + } +} + +void PortableVectorVectorCwiseProductAccumulate(const float* vector1, + const float* vector2, + int v_size, float* result) { + for (int v = 0; v < v_size; v++) { + *result++ += *vector1++ * *vector2++; + } +} + +void PortableVectorBatchVectorCwiseProductAccumulate(const float* vector, + int v_size, + const float* batch_vector, + int n_batch, + float* result) { + for (int b = 0; b < n_batch; b++) { + for (int v = 0; v < v_size; v++) { + *result++ += vector[v] * *batch_vector++; + } + } +} + +void PortableVectorBatchVectorAssign(const float* vector, int v_size, + int n_batch, float* batch_vector) { + for (int b = 0; b < n_batch; b++) { + memcpy(batch_vector + b * v_size, vector, v_size * sizeof(float)); + } +} + +void PortableApplySigmoidToVector(const float* vector, int v_size, + float* result) { + auto sigmoid_func = ActivationFunctor(kTfLiteActSigmoid); + for (int v = 0; v < v_size; v++) { + *result++ = (sigmoid_func)(*vector++); + } +} + +void PortableApplyActivationToVector(const float* vector, int v_size, + TfLiteFusedActivation activation, + float* result) { + auto activation_func = ActivationFunctor(activation); + for (int v = 0; v < v_size; v++) { + *result++ = (activation_func)(*vector++); + } +} + +void PortableCopyVector(const float* vector, int v_size, float* result) { + memcpy(result, vector, v_size * sizeof(float)); +} + +void PortableSub1Vector(const float* vector, int v_size, float* result) { + for (int v = 0; v < v_size; v++) { + *result++ = 1.0f - *vector++; + } +} + +void PortableZeroVector(float* vector, int v_size) { + memset(vector, 0, v_size * sizeof(float)); +} + +void PortableClipVector(const float* vector, int v_size, float abs_limit, + float* result) { + for (int v = 0; v < v_size; v++) { + *result++ = PortableClip(*vector++, abs_limit); + } +} + +void PortableVectorShiftLeft(float* vector, int v_size, float shift_value) { + TF_LITE_ASSERT(v_size > 0); + for (int i = 0; i < v_size - 1; i++) { + vector[i] = vector[i + 1]; + } + vector[v_size - 1] = shift_value; +} + +void PortableReductionSumVector(const float* input_vector, float* output_vector, + int output_size, int reduction_size) { + const float* input_vector_ptr = input_vector; + for (int o = 0; o < output_size; o++) { + for (int r = 0; r < reduction_size; r++) { + output_vector[o] += *input_vector_ptr++; + } + } +} + +} // namespace tensor_utils +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.h b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.h new file mode 100644 index 0000000000..c2ab78000b --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.h @@ -0,0 +1,189 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_ + +// TDOD(ghodrat): Remove this header file and the dependency to internal data +// structure. +#include "tensorflow/contrib/lite/builtin_op_data.h" + +namespace tflite { +namespace tensor_utils { + +// Limit a float input f betweeen +abs_limit and -abs_limit. +float PortableClip(float f, float abs_limit); + +// Multiply a matrix by a batch vector, and store results in a batch-size +// vector. +void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix, + int m_rows, int m_cols, + const float* vector, + int n_batch, float* result, + int result_stride); + +// Cwise product of two vectors. +void PortableVectorVectorCwiseProduct(const float* vector1, + const float* vector2, int v_size, + float* result); + +// Cwise product and accumulate of two vectors. Since it's a MAC opertation, the +// assumption here is that result array is initialized to valid values. +void PortableVectorVectorCwiseProductAccumulate(const float* vector1, + const float* vector2, + int v_size, float* result); + +// Dot product of two vectors. +float PortableVectorVectorDotProduct(const float* vector1, const float* vector2, + int v_size); + +// Dot product of two batch vectors. +void PortableBatchVectorBatchVectorDotProduct(const float* vector1, + const float* vector2, int v_size, + int n_batch, float* result, + int result_stride); + +// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC +// operation, the assumption here is that result array is initialized to valid +// values. +void PortableVectorBatchVectorCwiseProductAccumulate(const float* vector, + int v_size, + const float* batch_vector, + int n_batch, + float* result); + +// Batch vector initialization with another vector. +void PortableVectorBatchVectorAssign(const float* vector, int v_size, + int n_batch, float* batch_vector); + +// Apply sigmoid to elements of a vector. +void PortableApplySigmoidToVector(const float* vector, int v_size, + float* result); + +// Apply activation function to elements of a vector. +void PortableApplyActivationToVector(const float* vector, int v_size, + TfLiteFusedActivation activation, + float* result); + +// Copy vector to another vector. +void PortableCopyVector(const float* vector, int v_size, float* result); + +// Compute "1.0f - elements of vector" (used in CIFG). +void PortableSub1Vector(const float* vector, int v_size, float* result); + +// Fill vector with 0.f. +void PortableZeroVector(float* vector, int v_size); + +// Clip elements of a vector using a abs_limit value. +void PortableClipVector(const float* vector, int v_size, float abs_limit, + float* result); + +// Shift left a vector in place with v_size size. +void PortableVectorShiftLeft(float* vector, int v_size, float shift_value); + +// Reduce-sum on a float input vector: +// input_vector: float pointer to input vector. +// output_vector: float pointer to vector. +// output_size: output vector size. +// reduction_size: number of consecutive elements from input vector which are +// added to get one element of output. +void PortableReductionSumVector(const float* input_vector, float* output_vector, + int output_size, int reduction_size); + +float Clip(float f, float abs_limit) { return PortableClip(f, abs_limit); } + +void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows, + int m_cols, const float* vector, + int n_batch, float* result, + int result_stride) { + PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector, + n_batch, result, result_stride); +} + +void VectorVectorCwiseProduct(const float* vector1, const float* vector2, + int v_size, float* result) { + PortableVectorVectorCwiseProduct(vector1, vector2, v_size, result); +} + +void VectorVectorCwiseProductAccumulate(const float* vector1, + const float* vector2, int v_size, + float* result) { + PortableVectorVectorCwiseProductAccumulate(vector1, vector2, v_size, result); +} + +void VectorBatchVectorCwiseProductAccumulate(const float* vector, int v_size, + const float* batch_vector, + int n_batch, float* result) { + PortableVectorBatchVectorCwiseProductAccumulate(vector, v_size, batch_vector, + n_batch, result); +} + +float VectorVectorDotProduct(const float* vector1, const float* vector2, + int v_size) { + return PortableVectorVectorDotProduct(vector1, vector2, v_size); +} + +void BatchVectorBatchVectorDotProduct(const float* vector1, + const float* vector2, int v_size, + int n_batch, float* result, + int result_stride) { + PortableBatchVectorBatchVectorDotProduct(vector1, vector2, v_size, n_batch, + result, result_stride); +} + +void VectorBatchVectorAssign(const float* vector, int v_size, int n_batch, + float* batch_vector) { + PortableVectorBatchVectorAssign(vector, v_size, n_batch, batch_vector); +} + +void ApplySigmoidToVector(const float* vector, int v_size, float* result) { + PortableApplySigmoidToVector(vector, v_size, result); +} + +void ApplyActivationToVector(const float* vector, int v_size, + TfLiteFusedActivation activation, float* result) { + PortableApplyActivationToVector(vector, v_size, activation, result); +} + +void CopyVector(const float* vector, int v_size, float* result) { + PortableCopyVector(vector, v_size, result); +} + +void Sub1Vector(const float* vector, int v_size, float* result) { + PortableSub1Vector(vector, v_size, result); +} + +void ZeroVector(float* vector, int v_size) { + PortableZeroVector(vector, v_size); +} + +void ClipVector(const float* vector, int v_size, float abs_limit, + float* result) { + PortableClipVector(vector, v_size, abs_limit, result); +} + +void VectorShiftLeft(float* vector, int v_size, float shift_value) { + PortableVectorShiftLeft(vector, v_size, shift_value); +} + +void ReductionSumVector(const float* input_vector, float* output_vector, + int output_size, int reduction_size) { + PortableReductionSumVector(input_vector, output_vector, output_size, + reduction_size); +} + +} // namespace tensor_utils +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h new file mode 100644 index 0000000000..b9ca3d5c62 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -0,0 +1,2455 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_REFERENCE_OPS_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_REFERENCE_OPS_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "third_party/eigen3/Eigen/Core" +#include "fixedpoint/fixedpoint.h" +#include "public/gemmlowp.h" +#include "tensorflow/contrib/lite/kernels/internal/common.h" +#include "tensorflow/contrib/lite/kernels/internal/round.h" +#include "tensorflow/contrib/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +inline int32 MultiplyByQuantizedMultiplierSmallerThanOne( + int32 x, int32 quantized_multiplier, int right_shift) { + using gemmlowp::RoundingDivideByPOT; + using gemmlowp::SaturatingRoundingDoublingHighMul; + return RoundingDivideByPOT( + SaturatingRoundingDoublingHighMul(x, quantized_multiplier), right_shift); +} + +inline int32 MultiplyByQuantizedMultiplierGreaterThanOne( + int32 x, int32 quantized_multiplier, int left_shift) { + using gemmlowp::SaturatingRoundingDoublingHighMul; + return SaturatingRoundingDoublingHighMul(x * (1 << left_shift), + quantized_multiplier); +} + +template +int CountLeadingZeros(T integer_input) { + static_assert(std::is_unsigned::value, + "Only unsigned integer types handled."); + const T one_in_leading_positive = static_cast(1) + << (std::numeric_limits::digits - 1); + int leading_zeros = 0; + while (integer_input < one_in_leading_positive) { + integer_input <<= 1; + ++leading_zeros; + } + return leading_zeros; +} + +// DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING ELEMENT-WISE +// BROADCASTING. +// +// NdArrayDesc describes the shape and memory layout of an N-dimensional +// rectangular array of numbers. +// +// NdArrayDesc is basically identical to Dims defined in types.h. +// However, as Dims is to be deprecated, this class exists as an adaptor +// to enable simple unoptimized implementations of element-wise broadcasting +// operations. +template +struct NdArrayDesc { + // The "extent" of each dimension. Indices along dimension d must be in the + // half-open interval [0, extents[d]). + int extents[N]; + + // The number of *elements* (not bytes) between consecutive indices of each + // dimension. + int strides[N]; +}; + +// DO NOT USE THIS FUNCTION FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING +// ELEMENT-WISE BROADCASTING. +// +// Same as Offset(), except takes as NdArrayDesc instead of Dims. +inline int SubscriptToIndex(const NdArrayDesc<4>& desc, int i0, int i1, int i2, + int i3) { + TFLITE_DCHECK(i0 >= 0 && i0 < desc.extents[0]); + TFLITE_DCHECK(i1 >= 0 && i1 < desc.extents[1]); + TFLITE_DCHECK(i2 >= 0 && i2 < desc.extents[2]); + TFLITE_DCHECK(i3 >= 0 && i3 < desc.extents[3]); + return i0 * desc.strides[0] + i1 * desc.strides[1] + i2 * desc.strides[2] + + i3 * desc.strides[3]; +} + +// Given the dimensions of the operands for an element-wise binary broadcast, +// adjusts them so that they can be directly iterated over with simple loops. +// Returns the adjusted dims as instances of NdArrayDesc in 'desc0_out' and +// 'desc1_out'. 'desc0_out' and 'desc1_out' cannot be nullptr. +// +// This function assumes that the two input shapes are compatible up to +// broadcasting and the shorter one has already been prepended with 1s to be the +// same length. E.g., if shape0 is (1, 16, 16, 64) and shape1 is (1, 64), +// shape1 must already have been prepended to be (1, 1, 1, 64). Recall that +// Dims refer to shapes in reverse order. In this case, input0_dims will be +// (64, 16, 16, 1) and input1_dims will be (64, 1, 1, 1). +// +// When two shapes are compatible up to broadcasting, for each dimension d, +// the input extents are either equal, or one of them is 1. +// +// This function performs the following for each dimension d: +// - If the extents are equal, then do nothing since the loop that walks over +// both of the input arrays is correct. +// - Otherwise, one (and only one) of the extents must be 1. Say extent0 is 1 +// and extent1 is e1. Then set extent0 to e1 and stride0 *to 0*. This allows +// array0 to be referenced *at any index* in dimension d and still access the +// same slice. +template +inline void NdArrayDescsForElementwiseBroadcast(const Dims& input0_dims, + const Dims& input1_dims, + NdArrayDesc* desc0_out, + NdArrayDesc* desc1_out) { + TFLITE_DCHECK(desc0_out != nullptr); + TFLITE_DCHECK(desc1_out != nullptr); + + // Copy dims to desc. + for (int i = 0; i < N; ++i) { + desc0_out->extents[i] = input0_dims.sizes[i]; + desc0_out->strides[i] = input0_dims.strides[i]; + desc1_out->extents[i] = input1_dims.sizes[i]; + desc1_out->strides[i] = input1_dims.strides[i]; + } + + // Walk over each dimension. If the extents are equal do nothing. + // Otherwise, set the desc with extent 1 to have extent equal to the other and + // stride 0. + for (int i = 0; i < N; ++i) { + const int extent0 = ArraySize(input0_dims, i); + const int extent1 = ArraySize(input1_dims, i); + if (extent0 != extent1) { + if (extent0 == 1) { + desc0_out->strides[i] = 0; + desc0_out->extents[i] = extent1; + } else { + TFLITE_DCHECK_EQ(extent1, 1); + desc1_out->strides[i] = 0; + desc1_out->extents[i] = extent0; + } + } + } +} + +inline void Conv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, float output_activation_min, + float output_activation_max, float* output_data, + const Dims<4>& output_dims, float* im2col_data, + const Dims<4>& im2col_dims) { + (void)im2col_data; // only used in optimized code. + (void)im2col_dims; // only used in optimized code. + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 0); + const int output_depth = MatchingArraySize(filter_dims, 3, output_dims, 0); + if (bias_data) { + TFLITE_DCHECK_EQ(ArraySize(filter_dims, 3), ArraySize(bias_dims, 0)); + } + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int filter_height = ArraySize(filter_dims, 2); + const int filter_width = ArraySize(filter_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int out_channel = 0; out_channel < output_depth; ++out_channel) { + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + float total = 0.f; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) { + const int in_x = in_x_origin + filter_x; + const int in_y = in_y_origin + filter_y; + // If the location is outside the bounds of the input image, + // use zero as a default value. + if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && + (in_y < input_height)) { + float input_value = input_data[Offset(input_dims, in_channel, + in_x, in_y, batch)]; + float filter_value = + filter_data[Offset(filter_dims, in_channel, filter_x, + filter_y, out_channel)]; + total += (input_value * filter_value); + } + } + } + } + float bias_value = 0.0f; + if (bias_data) { + bias_value = bias_data[Offset(bias_dims, out_channel, 0, 0, 0)]; + } + output_data[Offset(output_dims, out_channel, out_x, out_y, batch)] = + ActivationFunctionWithMinMax(total + bias_value, + output_activation_min, + output_activation_max); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void Conv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, int stride_width, + int stride_height, int pad_width, int pad_height, float* output_data, + const Dims<4>& output_dims, float* im2col_data, + const Dims<4>& im2col_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + Conv(input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims, + stride_width, stride_height, pad_width, pad_height, + output_activation_min, output_activation_max, output_data, output_dims, + im2col_data, im2col_dims); +} + +// legacy, for compatibility with old checked-in code +template +void Conv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, float* output_data, + const Dims<4>& output_dims, float* im2col_data, + const Dims<4>& im2col_dims) { + Conv(input_data, input_dims, filter_data, filter_dims, bias_data, + bias_dims, stride, stride, pad_width, pad_height, output_data, + output_dims, im2col_data, im2col_dims); +} + +inline void Conv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims, uint8* im2col_data, + const Dims<4>& im2col_dims, + gemmlowp::GemmContext* gemm_context) { + (void)im2col_data; // only used in optimized code. + (void)im2col_dims; // only used in optimized code. + (void)gemm_context; // only used in optimized code. + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 0); + const int output_depth = + MatchingArraySize(filter_dims, 3, bias_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int filter_height = ArraySize(filter_dims, 2); + const int filter_width = ArraySize(filter_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int out_channel = 0; out_channel < output_depth; ++out_channel) { + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + int32 acc = 0; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) { + const int in_x = in_x_origin + filter_x; + const int in_y = in_y_origin + filter_y; + // If the location is outside the bounds of the input image, + // use zero as a default value. + if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && + (in_y < input_height)) { + int32 input_val = input_data[Offset(input_dims, in_channel, + in_x, in_y, batch)]; + int32 filter_val = + filter_data[Offset(filter_dims, in_channel, filter_x, + filter_y, out_channel)]; + acc += + (filter_val + filter_offset) * (input_val + input_offset); + } + } + } + } + if (bias_data) { + acc += bias_data[Offset(bias_dims, out_channel, 0, 0, 0)]; + } + acc = MultiplyByQuantizedMultiplierSmallerThanOne( + acc, output_multiplier, output_shift); + acc += output_offset; + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + output_data[Offset(output_dims, out_channel, out_x, out_y, batch)] = + static_cast(acc); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +inline void Conv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims, uint8* im2col_data, + const Dims<4>& im2col_dims, + gemmlowp::GemmContext* gemm_context) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + Conv(input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride_width, stride_height, + pad_width, pad_height, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data, output_dims, + im2col_data, im2col_dims, gemm_context); +} + +// legacy, for compatibility with old checked-in code +template +void Conv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims, uint8* im2col_data, + const Dims<4>& im2col_dims, gemmlowp::GemmContext* gemm_context) { + Conv(input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride, stride, pad_width, + pad_height, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data, + output_dims, im2col_data, im2col_dims, gemm_context); +} + +template +inline void DepthToSpace(const T* input_data, const Dims<4>& input_dims, + int block_size, T* output_data, + const Dims<4>& output_dims) { + const int input_depth = ArraySize(input_dims, 0); + const int input_width = ArraySize(input_dims, 1); + const int input_height = ArraySize(input_dims, 2); + const int input_batch = ArraySize(input_dims, 3); + + const int output_depth = ArraySize(output_dims, 0); + const int output_width = ArraySize(output_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_batch = ArraySize(output_dims, 3); + + TFLITE_DCHECK_EQ(input_width * block_size, output_width); + TFLITE_DCHECK_EQ(input_height * block_size, output_height); + TFLITE_DCHECK_EQ(input_depth, output_depth * block_size * block_size); + TFLITE_DCHECK_EQ(input_batch, output_batch); + + for (int out_b = 0; out_b < output_batch; ++out_b) { + for (int out_h = 0; out_h < output_height; ++out_h) { + for (int out_w = 0; out_w < output_width; ++out_w) { + for (int out_d = 0; out_d < output_depth; ++out_d) { + const int in_d = + out_d + ((out_h % block_size) * block_size + out_w % block_size) * + output_depth; + const int in_w = out_w / block_size; + const int in_h = out_h / block_size; + const int in_b = out_b; + + const int output_index = + Offset(output_dims, out_d, out_w, out_h, out_b); + const int input_index = Offset(input_dims, in_d, in_w, in_h, in_b); + + output_data[output_index] = input_data[input_index]; + } + } + } + } +} + +template +inline void SpaceToDepth(const T* input_data, const Dims<4>& input_dims, + int block_size, T* output_data, + const Dims<4>& output_dims) { + const int input_depth = ArraySize(input_dims, 0); + const int input_width = ArraySize(input_dims, 1); + const int input_height = ArraySize(input_dims, 2); + const int input_batch = ArraySize(input_dims, 3); + + const int output_depth = ArraySize(output_dims, 0); + const int output_width = ArraySize(output_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_batch = ArraySize(output_dims, 3); + + TFLITE_DCHECK_EQ(input_width, output_width * block_size); + TFLITE_DCHECK_EQ(input_height, output_height * block_size); + TFLITE_DCHECK_EQ(input_depth * block_size * block_size, output_depth); + TFLITE_DCHECK_EQ(input_batch, output_batch); + + for (int in_b = 0; in_b < input_batch; ++in_b) { + for (int in_h = 0; in_h < input_height; ++in_h) { + for (int in_w = 0; in_w < input_width; ++in_w) { + for (int in_d = 0; in_d < input_depth; ++in_d) { + const int out_d = + in_d + ((in_h % block_size) * block_size + in_w % block_size) * + input_depth; + const int out_w = in_w / block_size; + const int out_h = in_h / block_size; + const int out_b = in_b; + + const int output_index = + Offset(output_dims, out_d, out_w, out_h, out_b); + const int input_index = Offset(input_dims, in_d, in_w, in_h, in_b); + + output_data[output_index] = input_data[input_index]; + } + } + } + } +} + +inline void FullyConnected(const float* input_data, const Dims<4>& input_dims, + const float* weights_data, + const Dims<4>& weights_dims, const float* bias_data, + const Dims<4>& bias_dims, + float output_activation_min, + float output_activation_max, float* output_data, + const Dims<4>& output_dims) { + // TODO(benoitjacob): This really should be: + // const int batches = ArraySize(output_dims, 1); + // but the current --variable_batch hack consists in overwriting the 3rd + // dimension with the runtime batch size, as we don't keep track for each + // array of which dimension is the batch dimension in it. + const int batches = ArraySize(output_dims, 1) * ArraySize(output_dims, 2) * + ArraySize(output_dims, 3); + const int output_depth = MatchingArraySize(weights_dims, 1, output_dims, 0); + const int accum_depth = ArraySize(weights_dims, 0); + TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(weights_dims)); + for (int b = 0; b < batches; ++b) { + for (int out_c = 0; out_c < output_depth; ++out_c) { + float total = 0.f; + for (int d = 0; d < accum_depth; ++d) { + total += input_data[b * accum_depth + d] * + weights_data[out_c * accum_depth + d]; + } + float bias_value = 0.0f; + if (bias_data) { + bias_value = bias_data[Offset(bias_dims, out_c, 0, 0, 0)]; + } + output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax( + total + bias_value, output_activation_min, output_activation_max); + } + } +} + +// legacy, for compatibility with old checked-in code +template +void FullyConnected(const float* input_data, const Dims<4>& input_dims, + const float* weights_data, const Dims<4>& weights_dims, + const float* bias_data, const Dims<4>& bias_dims, + float* output_data, const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + FullyConnected(input_data, input_dims, weights_data, weights_dims, bias_data, + bias_dims, output_activation_min, output_activation_max, + output_data, output_dims); +} + +inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { + (void)gemm_context; // only used in optimized code. + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + // TODO(benoitjacob): This really should be: + // const int batches = ArraySize(output_dims, 1); + // but the current --variable_batch hack consists in overwriting the 3rd + // dimension with the runtime batch size, as we don't keep track for each + // array of which dimension is the batch dimension in it. + const int batches = ArraySize(output_dims, 1) * ArraySize(output_dims, 2) * + ArraySize(output_dims, 3); + const int output_depth = MatchingArraySize(filter_dims, 1, output_dims, 0); + const int accum_depth = ArraySize(filter_dims, 0); + TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims)); + for (int b = 0; b < batches; ++b) { + for (int out_c = 0; out_c < output_depth; ++out_c) { + int32 acc = 0; + for (int d = 0; d < accum_depth; ++d) { + int32 input_val = input_data[b * accum_depth + d]; + int32 filter_val = filter_data[out_c * accum_depth + d]; + acc += (filter_val + filter_offset) * (input_val + input_offset); + } + if (bias_data) { + acc += bias_data[Offset(bias_dims, out_c, 0, 0, 0)]; + } + acc = MultiplyByQuantizedMultiplierSmallerThanOne(acc, output_multiplier, + output_shift); + acc += output_offset; + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + output_data[out_c + output_depth * b] = static_cast(acc); + } + } +} + +// legacy, for compatibility with old checked-in code +template +void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + FullyConnected(input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_data, output_dims, gemm_context); +} + +template +void NonGlobalBatchNormalization( + const float* input_data, const Dims<4>& input_dims, const float* mean_data, + const Dims<4>& mean_dims, const float* multiplier_data, + const Dims<4>& multiplier_dims, const float* offset_data, + const Dims<4>& offset_dims, float* output_data, + const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = + MatchingArraySize(input_dims, 2, mean_dims, 2, multiplier_dims, 2, + offset_dims, 2, output_dims, 2); + const int width = + MatchingArraySize(input_dims, 1, mean_dims, 1, multiplier_dims, 1, + offset_dims, 1, output_dims, 1); + const int depth = + MatchingArraySize(input_dims, 0, mean_dims, 0, multiplier_dims, 0, + offset_dims, 0, output_dims, 0); + + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + output_data[Offset(output_dims, c, x, y, b)] = ActivationFunction( + (input_data[Offset(input_dims, c, x, y, b)] - + mean_data[Offset(mean_dims, c, x, y, 0)]) * + multiplier_data[Offset(multiplier_dims, c, x, y, 0)] + + offset_data[Offset(offset_dims, c, x, y, 0)]); + } + } + } + } +} + +template +void GlobalBatchNormalization(const float* input_data, + const Dims<4>& input_dims, const float* mean_data, + const Dims<4>& mean_dims, + const float* multiplier_data, + const Dims<4>& multiplier_dims, + const float* offset_data, + const Dims<4>& offset_dims, float* output_data, + const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = + MatchingArraySize(input_dims, 0, mean_dims, 0, multiplier_dims, 0, + offset_dims, 0, output_dims, 0); + + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + output_data[Offset(output_dims, c, x, y, b)] = ActivationFunction( + (input_data[Offset(input_dims, c, x, y, b)] - + mean_data[Offset(mean_dims, c, 0, 0, 0)]) * + multiplier_data[Offset(multiplier_dims, c, 0, 0, 0)] + + offset_data[Offset(offset_dims, c, 0, 0, 0)]); + } + } + } + } +} + +inline void Relu(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + float val = input_data[Offset(input_dims, c, x, y, b)]; + const float lower = 0; + float clamped = val < lower ? lower : val; + output_data[Offset(output_dims, c, x, y, b)] = clamped; + } + } + } + } +} + +inline void Relu1(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + float val = input_data[Offset(input_dims, c, x, y, b)]; + const float upper = 1; + const float lower = -1; + float clamped = val > upper ? upper : val < lower ? lower : val; + output_data[Offset(output_dims, c, x, y, b)] = clamped; + } + } + } + } +} + +inline void Relu6(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + float val = input_data[Offset(input_dims, c, x, y, b)]; + const float upper = 6; + const float lower = 0; + float clamped = val > upper ? upper : val < lower ? lower : val; + output_data[Offset(output_dims, c, x, y, b)] = clamped; + } + } + } + } +} + +template +void L2Normalization(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone, ""); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + float squared_l2_norm = 0; + for (int c = 0; c < depth; ++c) { + float val = input_data[Offset(input_dims, c, x, y, b)]; + squared_l2_norm += val * val; + } + float l2_norm = std::sqrt(squared_l2_norm); + for (int c = 0; c < depth; ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + input_data[Offset(input_dims, c, x, y, b)] / l2_norm; + } + } + } + } +} + +inline void GetInvSqrtQuantizedMultiplier(int32 input, int32* output_inv_sqrt, + int* output_shift) { + *output_shift = 11; + while (input >= (1 << 29)) { + input /= 4; + ++*output_shift; + } + TFLITE_DCHECK_GT(input, 0); + const unsigned max_left_shift_bits = __builtin_clz(input) - 1; + const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2; + const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1; + *output_shift -= left_shift_bit_pairs; + input <<= 2 * left_shift_bit_pairs; + TFLITE_DCHECK_GE(input, (1 << 27)); + TFLITE_DCHECK_LT(input, (1 << 29)); + using gemmlowp::FixedPoint; + using gemmlowp::Rescale; + using gemmlowp::SaturatingRoundingMultiplyByPOT; + // Using 3 integer bits gives us enough room for the internal arithmetic in + // this Newton-Raphson iteration. + using F3 = FixedPoint; + using F0 = FixedPoint; + const F3 fixedpoint_input = F3::FromRaw(input >> 1); + const F3 fixedpoint_half_input = + SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input); + const F3 fixedpoint_half_three = + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5); + // Newton-Raphson iteration + // Naive unoptimized starting guess: x = 1 + F3 x = F3::One(); + // Naive unoptimized number of iterations: 5 + for (int i = 0; i < 5; i++) { + const F3 x3 = Rescale<3>(x * x * x); + x = Rescale<3>(fixedpoint_half_three * x - fixedpoint_half_input * x3); + } + const F0 fixedpoint_half_sqrt_2 = + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.); + x = x * fixedpoint_half_sqrt_2; + *output_inv_sqrt = x.raw(); + if (*output_shift < 0) { + *output_inv_sqrt <<= -*output_shift; + *output_shift = 0; + } +} + +inline void L2Normalization(const uint8* input_data, const Dims<4>& input_dims, + int32 input_zero_point, uint8* output_data, + const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + TFLITE_DCHECK_EQ(batches, 1); + TFLITE_DCHECK_EQ(height, 1); + TFLITE_DCHECK_EQ(width, 1); + int32 square_l2_norm = 0; + for (int i = 0; i < depth; i++) { + int32 diff = input_data[Offset(input_dims, i, 0, 0, 0)] - input_zero_point; + square_l2_norm += diff * diff; + } + int32 inv_l2norm_multiplier; + int inv_l2norm_shift; + GetInvSqrtQuantizedMultiplier(square_l2_norm, &inv_l2norm_multiplier, + &inv_l2norm_shift); + + for (int i = 0; i < depth; i++) { + int32 diff = input_data[Offset(input_dims, i, 0, 0, 0)] - input_zero_point; + int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOne( + 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift); + int32 unclamped_output_val = 128 + rescaled_diff; + int32 output_val = std::min(255, std::max(0, unclamped_output_val)); + output_data[Offset(output_dims, i, 0, 0, 0)] = + static_cast(output_val); + } +} + +inline void Add(const float* input1_data, const Dims<4>& input1_dims, + const float* input2_data, const Dims<4>& input2_dims, + float output_activation_min, float output_activation_max, + float* output_data, const Dims<4>& output_dims) { + const int batches = + MatchingArraySize(input1_dims, 3, input2_dims, 3, output_dims, 3); + const int height = + MatchingArraySize(input1_dims, 2, input2_dims, 2, output_dims, 2); + const int width = + MatchingArraySize(input1_dims, 1, input2_dims, 1, output_dims, 1); + const int depth = + MatchingArraySize(input1_dims, 0, input2_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + ActivationFunctionWithMinMax( + input1_data[Offset(input1_dims, c, x, y, b)] + + input2_data[Offset(input2_dims, c, x, y, b)], + output_activation_min, output_activation_max); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void Add(const float* input1_data, const Dims<4>& input1_dims, + const float* input2_data, const Dims<4>& input2_dims, + float* output_data, const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + + Add(input1_data, input1_dims, input2_data, input2_dims, output_activation_min, + output_activation_max, output_data, output_dims); +} + +template +inline void Add(int left_shift, const uint8* input1_data, + const Dims<4>& input1_dims, int32 input1_offset, + int32 input1_multiplier, int input1_shift, + const uint8* input2_data, const Dims<4>& input2_dims, + int32 input2_offset, int32 input2_multiplier, int input2_shift, + int32 output_offset, int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + const int batches = + MatchingArraySize(input1_dims, 3, input2_dims, 3, output_dims, 3); + const int height = + MatchingArraySize(input1_dims, 2, input2_dims, 2, output_dims, 2); + const int width = + MatchingArraySize(input1_dims, 1, input2_dims, 1, output_dims, 1); + const int depth = + MatchingArraySize(input1_dims, 0, input2_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + const int32 input1_val = + input1_offset + input1_data[Offset(input1_dims, c, x, y, b)]; + const int32 input2_val = + input2_offset + input2_data[Offset(input2_dims, c, x, y, b)]; + const int32 shifted_input1_val = input1_val * (1 << left_shift); + const int32 shifted_input2_val = input2_val * (1 << left_shift); + const int32 scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input1_val, input1_multiplier, input1_shift); + const int32 scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input2_val, input2_multiplier, input2_shift); + const int32 raw_sum = scaled_input1_val + scaled_input2_val; + const int32 raw_output = + MultiplyByQuantizedMultiplierSmallerThanOne( + raw_sum, output_multiplier, output_shift) + + output_offset; + const int32 clamped_output = + std::min(output_activation_max, + std::max(output_activation_min, raw_output)); + output_data[Offset(output_dims, c, x, y, b)] = + static_cast(clamped_output); + } + } + } + } +} + +// TODO(jiawen): We can implement BroadcastAdd on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +template +void BroadcastAdd(const float* input1_data, const Dims<4>& input1_dims, + const float* input2_data, const Dims<4>& input2_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastAdd"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = ActivationFunction( + input1_data[SubscriptToIndex(desc1, c, x, y, b)] + + input2_data[SubscriptToIndex(desc2, c, x, y, b)]); + } + } + } + } +} + +inline void BroadcastAdd(int left_shift, const uint8* input1_data, + const Dims<4>& input1_dims, int32 input1_offset, + int32 input1_multiplier, int input1_shift, + const uint8* input2_data, const Dims<4>& input2_dims, + int32 input2_offset, int32 input2_multiplier, + int input2_shift, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastAdd/8bit"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + const int32 input1_val = + input1_offset + input1_data[SubscriptToIndex(desc1, c, x, y, b)]; + const int32 input2_val = + input2_offset + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; + const int32 shifted_input1_val = input1_val * (1 << left_shift); + const int32 shifted_input2_val = input2_val * (1 << left_shift); + const int32 scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input1_val, input1_multiplier, input1_shift); + const int32 scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input2_val, input2_multiplier, input2_shift); + const int32 raw_sum = scaled_input1_val + scaled_input2_val; + const int32 raw_output = + MultiplyByQuantizedMultiplierSmallerThanOne( + raw_sum, output_multiplier, output_shift) + + output_offset; + const int32 clamped_output = + std::min(output_activation_max, + std::max(output_activation_min, raw_output)); + output_data[Offset(output_dims, c, x, y, b)] = + static_cast(clamped_output); + } + } + } + } +} + +template +inline void BroadcastAdd(int left_shift, const uint8* input1_data, + const Dims<4>& input1_dims, int32 input1_offset, + int32 input1_multiplier, int input1_shift, + const uint8* input2_data, const Dims<4>& input2_dims, + int32 input2_offset, int32 input2_multiplier, + int input2_shift, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + BroadcastAdd(left_shift, input1_data, input1_dims, input1_offset, + input1_multiplier, input1_shift, input2_data, input2_dims, + input2_offset, input2_multiplier, input2_shift, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_data, output_dims); +} + +inline void Mul(const float* input1_data, const Dims<4>& input1_dims, + const float* input2_data, const Dims<4>& input2_dims, + float output_activation_min, float output_activation_max, + float* output_data, const Dims<4>& output_dims) { + const int batches = + MatchingArraySize(input1_dims, 3, input2_dims, 3, output_dims, 3); + const int height = + MatchingArraySize(input1_dims, 2, input2_dims, 2, output_dims, 2); + const int width = + MatchingArraySize(input1_dims, 1, input2_dims, 1, output_dims, 1); + const int depth = + MatchingArraySize(input1_dims, 0, input2_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + ActivationFunctionWithMinMax( + input1_data[Offset(input1_dims, c, x, y, b)] * + input2_data[Offset(input2_dims, c, x, y, b)], + output_activation_min, output_activation_max); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void Mul(const float* input1_data, const Dims<4>& input1_dims, + const float* input2_data, const Dims<4>& input2_dims, + float* output_data, const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + + Mul(input1_data, input1_dims, input2_data, input2_dims, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// TODO(jiawen): We can implement BroadcastMul on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +template +void BroadcastMul(const float* input1_data, const Dims<4>& input1_dims, + const float* input2_data, const Dims<4>& input2_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastMul"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest + // stride, typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for + // the best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = ActivationFunction( + input1_data[SubscriptToIndex(desc1, c, x, y, b)] * + input2_data[SubscriptToIndex(desc2, c, x, y, b)]); + } + } + } + } +} + +inline void BroadcastMul(const uint8* input1_data, const Dims<4>& input1_dims, + int32 input1_offset, const uint8* input2_data, + const Dims<4>& input2_dims, int32 input2_offset, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastMul/8bit"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest + // stride, typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for + // the best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + const int32 input1_val = + input1_offset + input1_data[SubscriptToIndex(desc1, c, x, y, b)]; + const int32 input2_val = + input2_offset + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; + const int32 unclamped_result = + output_offset + + MultiplyByQuantizedMultiplierSmallerThanOne( + input1_val * input2_val, output_multiplier, output_shift); + const int32 clamped_output = + std::min(output_activation_max, + std::max(output_activation_min, unclamped_result)); + output_data[Offset(output_dims, c, x, y, b)] = + static_cast(clamped_output); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +inline void BroadcastMul(const uint8* input1_data, const Dims<4>& input1_dims, + int32 input1_offset, const uint8* input2_data, + const Dims<4>& input2_dims, int32 input2_offset, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + BroadcastMul(input1_data, input1_dims, input1_offset, input2_data, + input2_dims, input2_offset, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, + output_data, output_dims); +} + +template +void Concatenation(int concat_dim, const Scalar* const* input_data, + const Dims<4>* const* input_dims, int inputs_count, + Scalar* output_data, const Dims<4>& output_dims) { + TFLITE_DCHECK_GT(inputs_count, 1); + int concat_size = 0; + for (int i = 0; i < inputs_count; i++) { + for (int j = 0; j < 4; j++) { + if (j != concat_dim) { + MatchingArraySize(*input_dims[i], j, output_dims, j); + } + } + concat_size += ArraySize(*input_dims[i], concat_dim); + } + TFLITE_DCHECK_EQ(concat_size, ArraySize(output_dims, concat_dim)); + TFLITE_DCHECK(Ac == FusedActivationFunctionType::kNone); + int outer_size = 1; + for (int i = concat_dim + 1; i < 4; i++) { + outer_size *= output_dims.sizes[i]; + } + Scalar* output_ptr = output_data; + for (int k = 0; k < outer_size; k++) { + for (int i = 0; i < inputs_count; ++i) { + const int copy_size = + input_dims[i]->sizes[concat_dim] * input_dims[i]->strides[concat_dim]; + memcpy(output_ptr, input_data[i] + k * copy_size, + copy_size * sizeof(Scalar)); + output_ptr += copy_size; + } + } +} + +template +void DepthConcatenation(const Scalar* const* input_data, + const Dims<4>* const* input_dims, int inputs_count, + Scalar* output_data, const Dims<4>& output_dims) { + Concatenation(0, input_data, input_dims, inputs_count, + output_data, output_dims); +} + +inline void LstmCell(const float* input_data, const Dims<4>& input_dims, + const float* prev_activ_data, + const Dims<4>& prev_activ_dims, const float* weights_data, + const Dims<4>& weights_dims, const float* bias_data, + const Dims<4>& bias_dims, const float* prev_state_data, + const Dims<4>& prev_state_dims, float* output_state_data, + const Dims<4>& output_state_dims, float* output_activ_data, + const Dims<4>& output_activ_dims, float* concat_temp_data, + const Dims<4>& concat_temp_dims, float* activ_temp_data, + const Dims<4>& activ_temp_dims) { + const int batches = + MatchingArraySize(input_dims, 3, prev_activ_dims, 3, prev_state_dims, 3, + output_state_dims, 3, output_activ_dims, 3); + const int height = + MatchingArraySize(input_dims, 2, prev_activ_dims, 2, prev_state_dims, 2, + output_state_dims, 2, output_activ_dims, 2); + const int width = + MatchingArraySize(input_dims, 1, prev_activ_dims, 1, prev_state_dims, 1, + output_state_dims, 1, output_activ_dims, 1); + TFLITE_CHECK_EQ(ArraySize(weights_dims, 2), 1); + TFLITE_CHECK_EQ(ArraySize(weights_dims, 3), 1); + const int input_depth = ArraySize(input_dims, 0); + const int prev_activ_depth = ArraySize(prev_activ_dims, 0); + const int total_input_depth = prev_activ_depth + input_depth; + TFLITE_CHECK_EQ(ArraySize(weights_dims, 0), total_input_depth); + TFLITE_CHECK_EQ(MatchingArraySize(bias_dims, 1, bias_dims, 2, bias_dims, 3), + 1); + const int intern_activ_depth = + MatchingArraySize(weights_dims, 1, bias_dims, 0); + TFLITE_CHECK_EQ(intern_activ_depth % 4, 0); + const int output_depth = + MatchingArraySize(prev_state_dims, 0, prev_activ_dims, 0, + output_state_dims, 0, output_activ_dims, 0); + TFLITE_CHECK_EQ(output_depth, intern_activ_depth / 4); + + // Concatenate prev_activ and input data together + std::vector concat_input_arrays_data; + std::vector const*> concat_input_arrays_dims; + concat_input_arrays_data.push_back(input_data); + concat_input_arrays_data.push_back(prev_activ_data); + concat_input_arrays_dims.push_back(&input_dims); + concat_input_arrays_dims.push_back(&prev_activ_dims); + Concatenation( + 0, &(concat_input_arrays_data[0]), &(concat_input_arrays_dims[0]), + concat_input_arrays_data.size(), concat_temp_data, concat_temp_dims); + + // Fully connected + FullyConnected( + concat_temp_data, concat_temp_dims, weights_data, weights_dims, bias_data, + bias_dims, activ_temp_data, activ_temp_dims); + + // Memory state update (the LSTM "guts") + for (int b = 0; b < batches; ++b) { + for (int w = 0; w < width; ++w) { + for (int h = 0; h < height; ++h) { + for (int c = 0; c < output_depth; ++c) { + const float input_gate = + 1.f / + (1.f + std::exp(-activ_temp_data[Offset( + activ_temp_dims, 0 * output_depth + c, w, h, b)])); + const float new_input = std::tanh(activ_temp_data[Offset( + activ_temp_dims, 1 * output_depth + c, w, h, b)]); + const float forget_gate = + 1.f / + (1.f + std::exp(-activ_temp_data[Offset( + activ_temp_dims, 2 * output_depth + c, w, h, b)])); + const float output_gate = + 1.f / + (1.f + std::exp(-activ_temp_data[Offset( + activ_temp_dims, 3 * output_depth + c, w, h, b)])); + const float new_state = + input_gate * new_input + + forget_gate * + prev_state_data[Offset(prev_state_dims, c, w, h, b)]; + output_state_data[Offset(output_state_dims, c, w, h, b)] = new_state; + output_activ_data[Offset(output_activ_dims, c, w, h, b)] = + output_gate * std::tanh(new_state); + } + } + } + } +} + +template +void TensorFlowSplit(const Scalar* input_data, const Dims<4>& input_dims, + int outputs_count, Scalar* const* output_data, + const Dims<4>* const* output_dims) { + TFLITE_DCHECK_GE(outputs_count, 1); + for (int i = 0; i < outputs_count; i++) { + /* batches = */ MatchingArraySize(*output_dims[i], 3, input_dims, 3); + /* height = */ MatchingArraySize(*output_dims[i], 2, input_dims, 2); + /* width = */ MatchingArraySize(*output_dims[i], 1, input_dims, 1); + } + const int batches = MatchingArraySize(*output_dims[0], 3, input_dims, 3); + const int height = MatchingArraySize(*output_dims[0], 2, input_dims, 2); + const int width = MatchingArraySize(*output_dims[0], 1, input_dims, 1); + // for now we dont have a model with a TensorFlowSplit + // with fused activation function. + TFLITE_DCHECK(Ac == FusedActivationFunctionType::kNone); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + int in_c = 0; + for (int i = 0; i < outputs_count; ++i) { + const int depth = ArraySize(*output_dims[i], 0); + for (int c = 0; c < depth; ++c) { + output_data[i][Offset(*output_dims[i], c, x, y, b)] = + input_data[Offset(input_dims, in_c, x, y, b)]; + in_c++; + } + } + TFLITE_DCHECK(in_c == ArraySize(input_dims, 0)); + } + } + } +} + +// TODO(benoitjacob) make this a proper reference impl without Eigen! +template +using MatrixMap = typename std::conditional< + std::is_const::value, + Eigen::Map::type, + Eigen::Dynamic, Eigen::Dynamic>>, + Eigen::Map>>::type; + +template +MatrixMap MapAsMatrixWithFirstDimAsRows(Scalar* data, + const Dims& dims) { + const int rows = dims.sizes[0]; + int cols = 1; + for (int d = 1; d < N; d++) { + cols *= dims.sizes[d]; + } + return MatrixMap(data, rows, cols); +} + +template +MatrixMap MapAsMatrixWithLastDimAsCols(Scalar* data, + const Dims& dims) { + const int cols = dims.sizes[N - 1]; + int rows = 1; + for (int d = 0; d < N - 1; d++) { + rows *= dims.sizes[d]; + } + return MatrixMap(data, rows, cols); +} + +inline int NodeOffset(int b, int h, int w, int height, int width) { + return (b * height + h) * width + w; +} + +inline void AveragePool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int filter_width, int filter_height, + float output_activation_min, + float output_activation_max, float* output_data, + const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int channel = 0; channel < depth; ++channel) { + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + // Compute the boundaries of the filter region clamped so as to + // ensure that the filter window fits in the input array. + const int filter_x_start = std::max(0, -in_x_origin); + const int filter_x_end = + std::min(filter_width, input_width - in_x_origin); + const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_end = + std::min(filter_height, input_height - in_y_origin); + float total = 0.f; + float filter_count = 0; + for (int filter_y = filter_y_start; filter_y < filter_y_end; + ++filter_y) { + for (int filter_x = filter_x_start; filter_x < filter_x_end; + ++filter_x) { + const int in_x = in_x_origin + filter_x; + const int in_y = in_y_origin + filter_y; + total += + input_data[Offset(input_dims, channel, in_x, in_y, batch)]; + filter_count++; + } + } + const float average = total / filter_count; + output_data[Offset(output_dims, channel, out_x, out_y, batch)] = + ActivationFunctionWithMinMax(average, output_activation_min, + output_activation_max); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void AveragePool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int filter_width, int filter_height, + float* output_data, const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + AveragePool(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void AveragePool(const float* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int filter_width, + int filter_height, float* output_data, + const Dims<4>& output_dims) { + AveragePool(input_data, input_dims, stride, stride, pad_width, pad_height, + filter_width, filter_height, output_data, output_dims); +} + +inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int filter_width, int filter_height, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int channel = 0; channel < depth; ++channel) { + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + // Compute the boundaries of the filter region clamped so as to + // ensure that the filter window fits in the input array. + const int filter_x_start = std::max(0, -in_x_origin); + const int filter_x_end = + std::min(filter_width, input_width - in_x_origin); + const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_end = + std::min(filter_height, input_height - in_y_origin); + int32 acc = 0; + int filter_count = 0; + for (int filter_y = filter_y_start; filter_y < filter_y_end; + ++filter_y) { + for (int filter_x = filter_x_start; filter_x < filter_x_end; + ++filter_x) { + const int in_x = in_x_origin + filter_x; + const int in_y = in_y_origin + filter_y; + acc += input_data[Offset(input_dims, channel, in_x, in_y, batch)]; + filter_count++; + } + } + acc = (acc + filter_count / 2) / filter_count; + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + output_data[Offset(output_dims, channel, out_x, out_y, batch)] = + static_cast(acc); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void AveragePool(const uint8* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int filter_width, int filter_height, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + AveragePool(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int filter_width, + int filter_height, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + AveragePool(input_data, input_dims, stride, stride, pad_width, pad_height, + filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +inline void L2Pool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int filter_width, int filter_height, + float output_activation_min, float output_activation_max, + float* output_data, const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int channel = 0; channel < depth; ++channel) { + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + // Compute the boundaries of the filter region clamped so as to + // ensure that the filter window fits in the input array. + const int filter_x_start = std::max(0, -in_x_origin); + const int filter_x_end = + std::min(filter_width, input_width - in_x_origin); + const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_end = + std::min(filter_height, input_height - in_y_origin); + float sum_squares = 0.f; + int filter_count = 0; + for (int filter_y = filter_y_start; filter_y < filter_y_end; + ++filter_y) { + for (int filter_x = filter_x_start; filter_x < filter_x_end; + ++filter_x) { + const int in_x = in_x_origin + filter_x; + const int in_y = in_y_origin + filter_y; + const float val = + input_data[Offset(input_dims, channel, in_x, in_y, batch)]; + sum_squares += val * val; + filter_count++; + } + } + const float l2pool_result = std::sqrt(sum_squares / filter_count); + output_data[Offset(output_dims, channel, out_x, out_y, batch)] = + ActivationFunctionWithMinMax(l2pool_result, output_activation_min, + output_activation_max); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void L2Pool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, int pad_height, + int filter_width, int filter_height, float* output_data, + const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + + L2Pool(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void L2Pool(const float* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int filter_width, int filter_height, + float* output_data, const Dims<4>& output_dims) { + L2Pool(input_data, input_dims, stride, stride, pad_width, pad_height, + filter_width, filter_height, output_data, output_dims); +} + +inline void MaxPool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int filter_width, int filter_height, + float output_activation_min, float output_activation_max, + float* output_data, const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int channel = 0; channel < depth; ++channel) { + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + // Compute the boundaries of the filter region clamped so as to + // ensure that the filter window fits in the input array. + const int filter_x_start = std::max(0, -in_x_origin); + const int filter_x_end = + std::min(filter_width, input_width - in_x_origin); + const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_end = + std::min(filter_height, input_height - in_y_origin); + float max = std::numeric_limits::lowest(); + for (int filter_y = filter_y_start; filter_y < filter_y_end; + ++filter_y) { + for (int filter_x = filter_x_start; filter_x < filter_x_end; + ++filter_x) { + const int in_x = in_x_origin + filter_x; + const int in_y = in_y_origin + filter_y; + max = std::max( + max, + input_data[Offset(input_dims, channel, in_x, in_y, batch)]); + } + } + output_data[Offset(output_dims, channel, out_x, out_y, batch)] = + ActivationFunctionWithMinMax(max, output_activation_min, + output_activation_max); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void MaxPool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, int pad_height, + int filter_width, int filter_height, float* output_data, + const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + MaxPool(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void MaxPool(const float* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int filter_width, int filter_height, + float* output_data, const Dims<4>& output_dims) { + MaxPool(input_data, input_dims, stride, stride, pad_width, pad_height, + filter_width, filter_height, output_data, output_dims); +} + +inline void MaxPool(const uint8* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int filter_width, int filter_height, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims) { + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + TFLITE_DCHECK_GE(output_activation_min, 0); + TFLITE_DCHECK_LE(output_activation_max, 255); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int channel = 0; channel < depth; ++channel) { + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + // Compute the boundaries of the filter region clamped so as to + // ensure that the filter window fits in the input array. + const int filter_x_start = std::max(0, -in_x_origin); + const int filter_x_end = + std::min(filter_width, input_width - in_x_origin); + const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_end = + std::min(filter_height, input_height - in_y_origin); + uint8 max = 0; + for (int filter_y = filter_y_start; filter_y < filter_y_end; + ++filter_y) { + for (int filter_x = filter_x_start; filter_x < filter_x_end; + ++filter_x) { + const int in_x = in_x_origin + filter_x; + const int in_y = in_y_origin + filter_y; + max = std::max( + max, + input_data[Offset(input_dims, channel, in_x, in_y, batch)]); + } + } + max = std::max(max, output_activation_min); + max = std::min(max, output_activation_max); + output_data[Offset(output_dims, channel, out_x, out_y, batch)] = + static_cast(max); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void MaxPool(const uint8* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, int pad_height, + int filter_width, int filter_height, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + MaxPool(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void MaxPool(const uint8* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int filter_width, int filter_height, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims) { + MaxPool(input_data, input_dims, stride, stride, pad_width, pad_height, + filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +inline void LocalResponseNormalization(const float* input_data, + const Dims<4>& input_dims, int range, + float bias, float alpha, float beta, + float* output_data, + const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + const int begin_input_c = std::max(0, c - range); + const int end_input_c = std::min(depth, c + range); + float accum = 0.f; + for (int input_c = begin_input_c; input_c < end_input_c; ++input_c) { + const float input_val = + input_data[Offset(input_dims, input_c, x, y, b)]; + accum += input_val * input_val; + } + const float multiplier = std::pow(bias + alpha * accum, -beta); + output_data[Offset(output_dims, c, x, y, b)] = + input_data[Offset(input_dims, c, x, y, b)] * multiplier; + } + } + } + } +} + +inline void Softmax(const float* input_data, const Dims<4>& input_dims, + float beta, float* output_data, + const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + // Find max element value which we'll use to ensure numerical stability + // taking advantage of the following equality: + // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C)) + float max = std::numeric_limits::lowest(); + for (int c = 0; c < depth; ++c) { + max = std::max(max, input_data[Offset(input_dims, c, x, y, b)]); + } + + // Compute sum. + float sum = 0.f; + for (int c = 0; c < depth; ++c) { + sum += std::exp((input_data[Offset(input_dims, c, x, y, b)] - max) * + beta); + } + + // Compute result. + for (int c = 0; c < depth; ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + std::exp((input_data[Offset(input_dims, c, x, y, b)] - max) * + beta) / + sum; + } + } + } + } +} + +inline void Softmax(const uint8* input_data, const Dims<4>& input_dims, + int32 input_beta_multiplier, int32 input_beta_left_shift, + int diff_min, uint8* output_data, + const Dims<4>& output_dims) { + // The representation chosen for the input to the exp() function is Q5.26. + // We need to leave extra space since values that we skip might be as large as + // -32 before multiplying by input_beta_multiplier, and therefore as large as + // -16 afterwards. Note that exp(-8) is definitely not insignificant to + // accumulation, but exp(-16) definitely is. + static const int kScaledDiffIntegerBits = 5; + static const int kAccumulationIntegerBits = 12; + using FixedPointScaledDiff = + gemmlowp::FixedPoint; + using FixedPointAccum = gemmlowp::FixedPoint; + using FixedPoint0 = gemmlowp::FixedPoint; + + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + + for (int b = 0; b < batches; ++b) { + for (int x = 0; x < width; ++x) { + for (int y = 0; y < height; ++y) { + uint8 max_in_row = 0; + for (int c = 0; c < depth; ++c) { + max_in_row = + std::max(max_in_row, input_data[Offset(input_dims, c, x, y, b)]); + } + + FixedPointAccum sum_of_exps = FixedPointAccum::Zero(); + for (int c = 0; c < depth; ++c) { + int32 input_diff = + static_cast(input_data[Offset(input_dims, c, x, y, b)]) - + max_in_row; + if (input_diff >= diff_min) { + const int32 input_diff_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_diff, input_beta_multiplier, input_beta_left_shift); + const FixedPointScaledDiff scaled_diff_f8 = + FixedPointScaledDiff::FromRaw(input_diff_rescaled); + sum_of_exps = + sum_of_exps + gemmlowp::Rescale( + exp_on_negative_values(scaled_diff_f8)); + } + } + + int32 fixed_sum_of_exps = sum_of_exps.raw(); + int headroom_plus_one = + CountLeadingZeros(static_cast(fixed_sum_of_exps)); + // This is the number of bits to the left of the binary point above 1.0. + // Consider fixed_sum_of_exps=1.25. In that case shifted_scale=0.8 and + // no later adjustment will be needed. + int num_bits_over_unit = kAccumulationIntegerBits - headroom_plus_one; + int32 shifted_sum_minus_one = static_cast( + (static_cast(fixed_sum_of_exps) << headroom_plus_one) - + (static_cast(1) << 31)); + + FixedPoint0 shifted_scale = gemmlowp::one_over_one_plus_x_for_x_in_0_1( + FixedPoint0::FromRaw(shifted_sum_minus_one)); + + for (int c = 0; c < depth; ++c) { + int32 input_diff = + static_cast(input_data[Offset(input_dims, c, x, y, b)]) - + max_in_row; + if (input_diff >= diff_min) { + const int32 input_diff_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_diff, input_beta_multiplier, input_beta_left_shift); + const FixedPointScaledDiff scaled_diff_f8 = + FixedPointScaledDiff::FromRaw(input_diff_rescaled); + + FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8); + int32 unsat_output = gemmlowp::RoundingDivideByPOT( + (shifted_scale * exp_in_0).raw(), num_bits_over_unit + 31 - 8); + + output_data[Offset(output_dims, c, x, y, b)] = static_cast( + std::max(std::min(unsat_output, static_cast(255)), 0)); + + } else { + output_data[Offset(output_dims, c, x, y, b)] = 0; + } + } + } + } + } +} + +inline void Logistic(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + float val = input_data[Offset(input_dims, c, x, y, b)]; + float result = 1.f / (1.f + std::exp(-val)); + output_data[Offset(output_dims, c, x, y, b)] = result; + } + } + } + } +} + +inline void Logistic(const uint8* input_data, const Dims<4>& input_dims, + int32 input_zero_point, int32 input_range_radius, + int32 input_multiplier, int input_left_shift, + uint8* output_data, const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + const uint8 input_val_u8 = input_data[Offset(input_dims, c, x, y, b)]; + const int32 input_val_centered = + static_cast(input_val_u8) - input_zero_point; + uint8 output_val; + if (input_val_centered <= -input_range_radius) { + output_val = 0; + } else if (input_val_centered >= input_range_radius) { + output_val = 255; + } else { + const int32 input_val_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_val_centered, input_multiplier, input_left_shift); + using FixedPoint4 = gemmlowp::FixedPoint; + using FixedPoint0 = gemmlowp::FixedPoint; + const FixedPoint4 input_val_f4 = + FixedPoint4::FromRaw(input_val_rescaled); + const FixedPoint0 output_val_f0 = gemmlowp::logistic(input_val_f4); + using gemmlowp::RoundingDivideByPOT; + int32 output_val_s32 = RoundingDivideByPOT(output_val_f0.raw(), 23); + if (output_val_s32 == 256) { + output_val_s32 = 255; + } + TFLITE_DCHECK_GE(output_val_s32, 0); + TFLITE_DCHECK_LE(output_val_s32, 255); + output_val = static_cast(output_val_s32); + } + output_data[Offset(output_dims, c, x, y, b)] = output_val; + } + } + } + } +} + +inline void Tanh(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + float val = input_data[Offset(input_dims, c, x, y, b)]; + float result = std::tanh(val); + output_data[Offset(output_dims, c, x, y, b)] = result; + } + } + } + } +} + +inline void Dequantize(const uint8* input_data, const Dims<4>& input_dims, + int32 zero_point, double scale, float* output_data, + const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + int32 val = input_data[Offset(input_dims, c, x, y, b)]; + float result = static_cast(scale * (val - zero_point)); + output_data[Offset(output_dims, c, x, y, b)] = result; + } + } + } + } +} + +inline void FakeQuant(const float* input_data, const Dims<4>& input_dims, + float rmin, float rmax, float* output_data, + const Dims<4>& output_dims) { + // 0 should always be a representable value. Let's assume that the initial + // min,max range contains 0. + TFLITE_DCHECK_LE(rmin, 0.); + TFLITE_DCHECK_GE(rmax, 0.); + + // Determine quantization parameters: zero_point, scale. + using Integer = uint8; + const Integer qmin = std::numeric_limits::min(); + const Integer qmax = std::numeric_limits::max(); + const float qmin_float = qmin; + const float qmax_float = qmax; + int32 zero_point = 0; + float scale = 0.f; + // If rmin==rmax, both must be zero per the above assertion, + // so we are done. + if (rmin != rmax) { + // First determine the scale. + scale = (rmax - rmin) / (qmax_float - qmin_float); + + // Zero-point computation. + // First the initial floating-point computation. The zero-point can be + // determined from solving an affine equation for any known pair + // (real value, corresponding quantized value). + // We know two such pairs: (rmin, qmin) and (rmax, qmax). + // The arithmetic error on the zero point computed from either pair + // will be roughly machine_epsilon * (sum of absolute values of terms) + // so we want to use the variant that adds the smaller terms. + const float zero_point_from_min = qmin_float - rmin / scale; + const float zero_point_from_max = qmax_float - rmax / scale; + const float zero_point_from_min_error = + std::abs(qmin_float) + std::abs(rmin / scale); + const float zero_point_from_max_error = + std::abs(qmax_float) + std::abs(rmax / scale); + + const float zero_point_float = + zero_point_from_min_error < zero_point_from_max_error + ? zero_point_from_min + : zero_point_from_max; + + // Now we need to nudge the zero point to be an integer + // (our zero points are integer, and this is motivated by the requirement + // to be able to represent the real value "0" exactly as a quantized value, + // which is required in multiple places, for example in Im2col with SAME + // padding). + if (zero_point_float < qmin_float) { + zero_point = qmin; + } else if (zero_point_float > qmax_float) { + zero_point = qmax; + } else { + zero_point = static_cast(TfLiteRound(zero_point_float)); + } + // The zero point should always be in the range of quantized value, + // [qmin, qmax]. + TFLITE_DCHECK_GE(zero_point, qmin); + TFLITE_DCHECK_LE(zero_point, qmax); + } + + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + const float src_val = input_data[Offset(input_dims, c, x, y, b)]; + const float unclamped_quantized_val = + TfLiteRound(zero_point + src_val / scale); + const float quantized_val = std::min( + qmax_float, std::max(qmin_float, unclamped_quantized_val)); + const float dst_val = scale * (quantized_val - zero_point); + output_data[Offset(output_dims, c, x, y, b)] = dst_val; + } + } + } + } +} + +template +inline void Cast(const SrcT* input_data, const Dims<4>& input_dims, + DstT* output_data, const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + int offset = Offset(input_dims, c, x, y, b); + output_data[offset] = static_cast(input_data[offset]); + } + } + } + } +} + +inline void Floor(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + int offset = Offset(input_dims, c, x, y, b); + output_data[offset] = std::floor(input_data[offset]); + } + } + } + } +} + +template +inline void Gather(const T* input_data, const Dims<4>& input_dims, + int input_rank, const int32* coords_data, + const Dims<4>& coords_dims, T* output_data, + const Dims<4>& output_dims) { + TFLITE_DCHECK(coords_dims.sizes[0] == output_dims.sizes[input_rank - 1]); + int stride = input_dims.strides[input_rank - 1]; + T* out = output_data; + + for (int i = 0; i < coords_dims.sizes[0]; i++) { + TFLITE_DCHECK_GE(coords_data[i], 0); + TFLITE_DCHECK_LT(coords_data[i], input_dims.sizes[input_rank - 1]); + const T* in = input_data + coords_data[i] * stride; + memcpy(out, in, sizeof(T) * stride); + out += stride; + } +} + +inline void ResizeBilinear(const float* input_data, const Dims<4>& input_dims, + const int32* output_size_data, + const Dims<4>& output_size_dims, float* output_data, + const Dims<4>& output_dims) { + int32 batches = MatchingArraySize(input_dims, 3, output_dims, 3); + int32 input_height = ArraySize(input_dims, 2); + int32 input_width = ArraySize(input_dims, 1); + int32 depth = MatchingArraySize(input_dims, 0, output_dims, 0); + + TFLITE_DCHECK_EQ(ArraySize(output_size_dims, 3), 1); + TFLITE_DCHECK_EQ(ArraySize(output_size_dims, 2), 1); + TFLITE_DCHECK_EQ(ArraySize(output_size_dims, 1), 1); + TFLITE_DCHECK_EQ(ArraySize(output_size_dims, 0), 2); + int32 output_height = output_size_data[Offset(output_size_dims, 0, 0, 0, 0)]; + int32 output_width = output_size_data[Offset(output_size_dims, 1, 0, 0, 0)]; + float height_scale = static_cast(input_height) / output_height; + float width_scale = static_cast(input_width) / output_width; + + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < output_height; ++y) { + float input_y = y * height_scale; + int32 y0 = static_cast(std::floor(input_y)); + int32 y1 = std::min(y0 + 1, input_height - 1); + for (int x = 0; x < output_width; ++x) { + float input_x = x * width_scale; + int32 x0 = static_cast(std::floor(input_x)); + int32 x1 = std::min(x0 + 1, input_width - 1); + for (int c = 0; c < depth; ++c) { + float interpolation = input_data[Offset(input_dims, c, x0, y0, b)] * + (1 - (input_y - y0)) * + (1 - (input_x - x0)) + + input_data[Offset(input_dims, c, x0, y1, b)] * + (input_y - y0) * (1 - (input_x - x0)) + + input_data[Offset(input_dims, c, x1, y0, b)] * + (1 - (input_y - y0)) * (input_x - x0) + + input_data[Offset(input_dims, c, x1, y1, b)] * + (input_y - y0) * (input_x - x0); + output_data[Offset(output_dims, c, x, y, b)] = interpolation; + } + } + } + } +} + +template +inline void SpaceToBatchND(const T* input_data, const Dims<4>& input_dims, + const int32* block_shape_data, + const Dims<4>& block_shape_dims, + const int32* paddings_data, + const Dims<4>& paddings_dims, T* output_data, + const Dims<4>& output_dims) { + const int output_batch_size = ArraySize(output_dims, 3); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + const int input_batch_size = ArraySize(input_dims, 3); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int depth = ArraySize(input_dims, 0); + const int block_shape_height = block_shape_data[0]; + const int block_shape_width = block_shape_data[1]; + const int padding_top = paddings_data[0]; + const int padding_left = paddings_data[2]; + + for (int out_b = 0; out_b < output_batch_size; ++out_b) { + int input_batch = out_b % input_batch_size; + int shift_w = (out_b / input_batch_size) % block_shape_width; + int shift_h = (out_b / input_batch_size) / block_shape_width; + for (int out_h = 0; out_h < output_height; ++out_h) { + for (int out_w = 0; out_w < output_width; ++out_w) { + T* out = output_data + Offset(output_dims, 0, out_w, out_h, out_b); + if (out_h * block_shape_height < padding_top || + out_h * block_shape_height >= padding_top + input_height || + out_w * block_shape_width < padding_left || + out_w * block_shape_width >= padding_left + input_width) { + memset(out, 0, depth * sizeof(T)); + } else { + const T* in = + input_data + + Offset(input_dims, 0, + (out_w * block_shape_width + shift_w) - padding_left, + (out_h * block_shape_height + shift_h) - padding_top, + input_batch); + memcpy(out, in, depth * sizeof(T)); + } + } + } + } +} + +template +inline void BatchToSpaceND(const T* input_data, const Dims<4>& input_dims, + const int32* block_shape_data, + const Dims<4>& block_shape_dims, T* output_data, + const Dims<4>& output_dims) { + const int output_batch_size = ArraySize(output_dims, 3); + const int input_batch_size = ArraySize(input_dims, 3); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int depth = ArraySize(input_dims, 0); + const int block_shape_width = block_shape_data[1]; + const int block_shape_height = block_shape_data[0]; + + for (int in_batch = 0; in_batch < input_batch_size; ++in_batch) { + for (int in_h = 0; in_h < input_height; ++in_h) { + for (int in_w = 0; in_w < input_width; ++in_w) { + int out_batch = in_batch % output_batch_size; + int out_w = in_w * block_shape_width + + (in_batch / output_batch_size) % block_shape_width; + int out_h = in_h * block_shape_height + + (in_batch / output_batch_size) / block_shape_width; + T* out = output_data + Offset(output_dims, 0, out_w, out_h, out_batch); + const T* in = input_data + Offset(input_dims, 0, in_w, in_h, in_batch); + memcpy(out, in, depth * sizeof(T)); + } + } + } +} + +template +inline void Pad(const T* input_data, const Dims<4>& input_dims, + const std::vector& left_paddings, + const std::vector& right_paddings, T* output_data, + const Dims<4>& output_dims) { + const int output_batch = ArraySize(output_dims, 3); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + const int output_depth = ArraySize(output_dims, 0); + + const int left_b_padding = left_paddings[3]; + const int left_h_padding = left_paddings[2]; + const int left_w_padding = left_paddings[1]; + const int left_d_padding = left_paddings[0]; + + const int right_b_padding = right_paddings[3]; + const int right_h_padding = right_paddings[2]; + const int right_w_padding = right_paddings[1]; + const int right_d_padding = right_paddings[0]; + + const T* in_ptr = input_data; + T* out_ptr = output_data; + for (int out_b = 0; out_b < output_batch; ++out_b) { + for (int out_h = 0; out_h < output_height; ++out_h) { + for (int out_w = 0; out_w < output_width; ++out_w) { + for (int out_d = 0; out_d < output_depth; ++out_d) { + if (out_b < left_b_padding || + out_b >= output_batch - right_b_padding || + out_h < left_h_padding || + out_h >= output_height - right_h_padding || + out_w < left_w_padding || + out_w >= output_width - right_w_padding || + out_d < left_d_padding || + out_d >= output_depth - right_d_padding) { + *out_ptr++ = 0; + } else { + *out_ptr++ = *in_ptr++; + } + } + } + } + } +} + +template +inline void StridedSlice(const T* input_data, const Dims<4>& input_dims, + int begin_mask, int end_mask, + const std::vector& starts, + const std::vector& stops, + const std::vector& strides, T* output_data, + const Dims<4>& output_dims) { + const int start_b = (begin_mask & 8) ? 0 : starts[3]; + const int stop_b = (end_mask & 8) ? input_dims.sizes[3] : stops[3]; + const int start_h = (begin_mask & 4) ? 0 : starts[2]; + const int stop_h = (end_mask & 4) ? input_dims.sizes[2] : stops[2]; + const int start_w = (begin_mask & 2) ? 0 : starts[1]; + const int stop_w = (end_mask & 2) ? input_dims.sizes[1] : stops[1]; + const int start_d = (begin_mask & 1) ? 0 : starts[0]; + const int stop_d = (end_mask & 1) ? input_dims.sizes[0] : stops[0]; + + T* out_ptr = output_data; + for (int in_b = start_b; in_b < stop_b; in_b += strides[3]) { + for (int in_h = start_h; in_h < stop_h; in_h += strides[2]) { + for (int in_w = start_w; in_w < stop_w; in_w += strides[1]) { + for (int in_d = start_d; in_d < stop_d; in_d += strides[0]) { + *out_ptr++ = input_data[Offset(input_dims, in_d, in_w, in_h, in_b)]; + } + } + } + } +} + +template +inline void Slice(const T* input_data, const Dims<4>& input_dims, + const std::vector& begin, const std::vector& size, + T* output_data, const Dims<4>& output_dims) { + // TODO(dkalenichenko): This op only supports 4D tensors. + TFLITE_DCHECK_EQ(begin.size(), 4); + TFLITE_DCHECK_EQ(size.size(), 4); + const int start_b = begin[3]; + const int stop_b = + size[3] == -1 ? input_dims.sizes[3] - start_b : start_b + size[3]; + const int start_h = begin[2]; + const int stop_h = + size[2] == -1 ? input_dims.sizes[2] - start_b : start_b + size[2]; + const int start_w = begin[1]; + const int stop_w = + size[1] == -1 ? input_dims.sizes[1] - start_b : start_b + size[1]; + const int start_d = begin[0]; + const int stop_d = + size[0] == -1 ? input_dims.sizes[0] - start_d : start_d + size[0]; + + T* out_ptr = output_data; + for (int in_b = start_b; in_b < stop_b; ++in_b) { + for (int in_h = start_h; in_h < stop_h; ++in_h) { + for (int in_w = start_w; in_w < stop_w; ++in_w) { + for (int in_d = start_d; in_d < stop_d; ++in_d) { + *out_ptr++ = input_data[Offset(input_dims, in_d, in_w, in_h, in_b)]; + } + } + } + } +} + +template +inline void Mean(const T* input_data, const Dims<4>& input_dims, + const std::vector& reduction_indices, T* output_data, + const Dims<4>& output_dims) { + const int output_batch = ArraySize(output_dims, 3); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + const int output_depth = ArraySize(output_dims, 0); + + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + + // The current implementation only supports simultaneous reduction over + // width and height. + TFLITE_DCHECK_EQ(reduction_indices.size(), 2); + TFLITE_DCHECK((reduction_indices[0] == 1 && reduction_indices[1] == 2) || + (reduction_indices[0] == 2 && reduction_indices[1] == 1)); + TFLITE_DCHECK_EQ(output_height, 1); + TFLITE_DCHECK_EQ(output_width, 1); + + for (int out_b = 0; out_b < output_batch; ++out_b) { + for (int out_d = 0; out_d < output_depth; ++out_d) { + float value = 0; + for (int in_h = 0; in_h < input_height; ++in_h) { + for (int in_w = 0; in_w < input_width; ++in_w) { + value += input_data[Offset(input_dims, out_d, in_w, in_h, out_b)]; + } + } + output_data[Offset(output_dims, out_d, 0, 0, out_b)] = + value / (input_width * input_height); + } + } +} + +template +void Sub(const T* input1_data, const Dims<4>& input1_dims, const T* input2_data, + const Dims<4>& input2_dims, T* output_data, + const Dims<4>& output_dims) { + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + input1_data[SubscriptToIndex(desc1, c, x, y, b)] - + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; + } + } + } + } +} + +template +void TensorFlowMinimum(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, T* output_data, + const Dims<4>& output_dims) { + int batches = MatchingArraySize(input1_dims, 3, output_dims, 3); + int input_height = MatchingArraySize(input1_dims, 2, output_dims, 2); + int input_width = MatchingArraySize(input1_dims, 1, output_dims, 1); + int depth = MatchingArraySize(input1_dims, 0, output_dims, 0); + + auto min_value = input2_data[0]; + + for (int b = 0; b < batches; b++) { + for (int y = 0; y < input_height; y++) { + for (int x = 0; x < input_width; x++) { + for (int c = 0; c < depth; c++) { + int offset = Offset(input1_dims, c, x, y, b); + output_data[offset] = + input1_data[offset] > min_value ? min_value : input1_data[offset]; + } + } + } + } +} + +template +void TensorFlowMaximum(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, T* output_data, + const Dims<4>& output_dims) { + int batches = MatchingArraySize(input1_dims, 3, output_dims, 3); + int input_height = MatchingArraySize(input1_dims, 2, output_dims, 2); + int input_width = MatchingArraySize(input1_dims, 1, output_dims, 1); + int depth = MatchingArraySize(input1_dims, 0, output_dims, 0); + + auto max_value = input2_data[0]; + + for (int b = 0; b < batches; b++) { + for (int y = 0; y < input_height; y++) { + for (int x = 0; x < input_width; x++) { + for (int c = 0; c < depth; c++) { + int offset = Offset(input1_dims, c, x, y, b); + output_data[offset] = + input1_data[offset] < max_value ? max_value : input1_data[offset]; + } + } + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_REFERENCE_OPS_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/round.h b/tensorflow/contrib/lite/kernels/internal/round.h new file mode 100644 index 0000000000..38525b0e20 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/round.h @@ -0,0 +1,39 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_ROUND_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_ROUND_H_ + +#include + +namespace tflite { + +// TODO(aselle): See if we can do this only on jdk. Also mikecase, check +// if you need this for java host build. +#if defined(__ANDROID__) && !defined(__NDK_MAJOR__) +template +inline float TfLiteRound(const float x) { + return ::round(x); +} +inline double TfLiteRound(const double x) { return ::round(x); } +#else +template +inline T TfLiteRound(const T x) { + return std::round(x); +} +#endif + +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_ROUND_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/tensor.h b/tensorflow/contrib/lite/kernels/internal/tensor.h new file mode 100644 index 0000000000..ee4111e041 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/tensor.h @@ -0,0 +1,87 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TENSOR_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TENSOR_H_ + +#include +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/types.h" + +namespace tflite { + +template +inline T* GetTensorData(TfLiteTensor* tensor); + +template <> +inline float* GetTensorData(TfLiteTensor* tensor) { + return tensor != nullptr ? tensor->data.f : nullptr; +} + +template <> +inline uint8_t* GetTensorData(TfLiteTensor* tensor) { + return tensor != nullptr ? tensor->data.uint8 : nullptr; +} + +template <> +inline int32_t* GetTensorData(TfLiteTensor* tensor) { + return tensor != nullptr ? tensor->data.i32 : nullptr; +} + +template <> +inline int64_t* GetTensorData(TfLiteTensor* tensor) { + return tensor != nullptr ? reinterpret_cast(tensor->data.raw) + : nullptr; +} + +inline int RemapDim(int max_dimensions, int d) { + return max_dimensions - d - 1; +} + +// TODO(ahentz): the implementations in kernels/internal/ take a Dims<4> object +// even if the original tensors were not 4D. We should consider rewriting them +// to take a more generic 'shape' object. +inline Dims<4> GetTensorDims(const int data[], const int size) { + Dims<4> d; + for (int i = 0; i < 4; ++i) { + int src = size - i - 1; + if (src >= 0) { + d.sizes[i] = data[src]; + } else { + d.sizes[i] = 1; + } + } + d.strides[0] = 1; + for (int i = 1; i < 4; i++) { + d.strides[i] = d.strides[i - 1] * d.sizes[i - 1]; + } + return d; +} + +inline Dims<4> GetTensorDims(std::vector data) { + return GetTensorDims(data.data(), data.size()); +} + +inline Dims<4> GetTensorDims(const TfLiteTensor* tensor) { + if (tensor == nullptr) { + return Dims<4>(); + } + + auto* dims = tensor->dims; + return GetTensorDims(dims->data, dims->size); +} + +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TENSOR_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/tensor_test.cc b/tensorflow/contrib/lite/kernels/internal/tensor_test.cc new file mode 100644 index 0000000000..bf2068d320 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/tensor_test.cc @@ -0,0 +1,55 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include +#include + +namespace tflite { +namespace { + +using ::testing::ElementsAre; + +TEST(TensorTest, GetTensorDims4D) { + Dims<4> d = GetTensorDims({2, 3, 4, 5}); + EXPECT_THAT(d.sizes, ElementsAre(5, 4, 3, 2)); + EXPECT_THAT(d.strides, ElementsAre(1, 5, 20, 60)); +} + +TEST(TensorTest, GetTensorDims3D) { + Dims<4> d = GetTensorDims({3, 4, 5}); + EXPECT_THAT(d.sizes, ElementsAre(5, 4, 3, 1)); + EXPECT_THAT(d.strides, ElementsAre(1, 5, 20, 60)); +} + +TEST(TensorTest, GetTensorDims2D) { + Dims<4> d = GetTensorDims({4, 5}); + EXPECT_THAT(d.sizes, ElementsAre(5, 4, 1, 1)); + EXPECT_THAT(d.strides, ElementsAre(1, 5, 20, 20)); +} + +TEST(TensorTest, GetTensorDims1D) { + Dims<4> d = GetTensorDims({5}); + EXPECT_THAT(d.sizes, ElementsAre(5, 1, 1, 1)); + EXPECT_THAT(d.strides, ElementsAre(1, 5, 5, 5)); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/internal/tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/tensor_utils.cc new file mode 100644 index 0000000000..904a97803a --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/tensor_utils.cc @@ -0,0 +1,27 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h" + +#ifndef USE_NEON +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#define USE_NEON +#endif // defined(__ARM_NEON__) || defined(__ARM_NEON) +#endif // USE_NEON + +#ifdef USE_NEON +#include "tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.h" +#else +#include "tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.h" +#endif // USE_NEON diff --git a/tensorflow/contrib/lite/kernels/internal/tensor_utils.h b/tensorflow/contrib/lite/kernels/internal/tensor_utils.h new file mode 100644 index 0000000000..0e69ef5982 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/tensor_utils.h @@ -0,0 +1,116 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TENSOR_UTILS_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TENSOR_UTILS_H_ + +#include "tensorflow/contrib/lite/builtin_op_data.h" + +namespace tflite { +namespace tensor_utils { + +// Limit a float input f betweeen +abs_limit and -abs_limit. +float Clip(float f, float abs_limit); + +// Multiply a matrix by a batch vector, and store results in a batch-size +// vector using a stride value provided in result_stride. 'result_stride' shows +// how the number of elements between consecutive result values. For example +// result_stride = 1, will cause the output to look like this: +// [O_1, 0_2, ... O_rows] in memory, but result_stride = 3, will cause it to be +// arranged like this in memory: [O_1, x, x, 0_2, x, x, ..., O_rows] +void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows, + int m_cols, const float* vector, + int n_batch, float* result, + int result_stride); + +// Cwise product of two vectors. +void VectorVectorCwiseProduct(const float* vector1, const float* vector2, + int v_size, float* result); + +// Cwise product and accumulate of two vectors. Since it's a MAC opertation, the +// assumption here is that result array is initialized to valid values. +void VectorVectorCwiseProductAccumulate(const float* vector1, + const float* vector2, int v_size, + float* result); + +// Dot product of two vectors. +float VectorVectorDotProduct(const float* vector1, const float* vector2, + int v_size); + +// Dot product of two batch vectors of size n_batch * v_size: +// vector1 = [x_1_1, x_1_2, ..., x_1_vsize, +// x_2_1, x_2_2, ..., x_2_vsize, +// ... +// x_nbatch_1,..., x_nbatch_vsize] +// vector2 = [y_1_1, y_1_2, ..., y_1_vsize, +// y_2_1, y_2_2, ..., y_2_vsize, +// ... +// y_nbatch_1,..., y_nbatch_vsize] +// Then result will be a vector of n_batch size which will be saved with a +// stride of result_stride in memory starting from 'result': +// [x_1_1 * y_1_1 + x_1_2 * y_1_2 + ... + x_1_vsize * y_1_vsize, +// x_2_1 * y_2_1 + x_2_2 * y_2_2 + ... + x_2_vsize * y_2_vsize, +// ... +// x_nbatch_1 * y_nbatch_1 + ... + x_nbatch_vsize * y_nbatch_vsize] +void BatchVectorBatchVectorDotProduct(const float* vector1, + const float* vector2, int v_size, + int n_batch, float* result, + int result_stride); + +// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC +// operation, the assumption here is that result array is initialized to valid +// values. +void VectorBatchVectorCwiseProductAccumulate(const float* vector, int v_size, + const float* batch_vector, + int n_batch, float* result); + +// Batch vector initialization with another vector. +void VectorBatchVectorAssign(const float* vector, int v_size, int n_batch, + float* batch_vector); + +// Apply sigmoid to elements of a vector. +void ApplySigmoidToVector(const float* vector, int v_size, float* result); + +// Apply activation function to elements of a vector. +void ApplyActivationToVector(const float* vector, int v_size, + TfLiteFusedActivation activation, float* result); + +// Copy vector to another vector. +void CopyVector(const float* vector, int v_size, float* result); + +// Compute "1.0f - elements of vector" (used in CIFG). +void Sub1Vector(const float* vector, int v_size, float* result); + +// Fill vector with 0.f. +void ZeroVector(float* vector, int v_size); + +// Clip elements of a vector using a abs_limit value. +void ClipVector(const float* vector, int v_size, float abs_limit, + float* result); + +// Shift left a vector in place with v_size size. +void VectorShiftLeft(float* vector, int v_size, float shift_value); + +// Reduce-sum on a float input vector: +// input_vector: float pointer to input vector. +// output_vector: float pointer to vector. +// output_size: output vector size. +// reduction_size: number of consecutive elements from input vector which are +// added to get one element of output. +void ReductionSumVector(const float* input_vector, float* output_vector, + int output_size, int reduction_size); +} // namespace tensor_utils +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TENSOR_UTILS_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/tensor_utils_test.cc b/tensorflow/contrib/lite/kernels/internal/tensor_utils_test.cc new file mode 100644 index 0000000000..588f1a428b --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/tensor_utils_test.cc @@ -0,0 +1,192 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h" +#include +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" + +namespace tflite { +namespace tensor_utils { + +TEST(uKernels, ClipTest) { + constexpr int kVectorSize = 10; + constexpr float kAbsLimit = 2.0; + static float input[kVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0, + -2.5, 3.0, -3.5, 4.0, -4.5}; + std::vector output(kVectorSize); + ClipVector(input, kVectorSize, kAbsLimit, output.data()); + EXPECT_THAT(output, + ElementsAreArray(ArrayFloatNear( + {0.0, -0.5, 1.0, -1.5, 2.0, -2.0, 2.0, -2.0, 2.0, -2.0}))); +} + +TEST(uKernels, MatrixBatchVectorMultiplyAccumulateTest) { + constexpr int kRow = 3; + constexpr int kCol = 4; + constexpr int kBatch = 2; + static float matrix[kRow * kCol] = {1.0, 2.0, 3.0, 4.0, // + -1.0, -2.0, -3.0, -4.0, // + 1.0, -2.0, 3.0, -4.0}; + static float vector[kCol * kBatch] = {1.0, -1.0, 1.0, -1.0, // + 2.0, -2.0, 2.0, -2.0}; + std::vector output(kRow * kBatch); + std::fill(output.begin(), output.end(), 3.0); + MatrixBatchVectorMultiplyAccumulate(matrix, kRow, kCol, vector, kBatch, + output.data(), /*result_stride=*/1); + EXPECT_THAT(output, ElementsAreArray(ArrayFloatNear({1., 5., 13., // + -1., 7., 23.}))); + + std::vector output_with_stride2(kRow * kBatch * 2); + std::fill(output_with_stride2.begin(), output_with_stride2.end(), 3.0); + MatrixBatchVectorMultiplyAccumulate(matrix, kRow, kCol, vector, kBatch, + output_with_stride2.data(), + /*result_stride=*/2); + EXPECT_THAT(output_with_stride2, + ElementsAreArray(ArrayFloatNear({1., 3., 5., 3., 13., 3., // + -1., 3., 7., 3., 23., 3.}))); +} + +TEST(uKernels, VectorVectorCwiseProductTest) { + constexpr int kVectorSize = 10; + static float input1[kVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0, + -2.5, 3.0, -3.5, 4.0, -4.5}; + static float input2[kVectorSize] = {0.1, -0.1, 0.1, -0.1, 0.1, + -0.1, 0.1, -0.1, 0.1, -0.1}; + std::vector output(kVectorSize); + VectorVectorCwiseProduct(input1, input2, kVectorSize, output.data()); + EXPECT_THAT(output, + ElementsAreArray(ArrayFloatNear( + {0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45}))); +} + +TEST(uKernels, VectorVectorCwiseProductAccumulateTest) { + constexpr int kVectorSize = 10; + static float input1[kVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0, + -2.5, 3.0, -3.5, 4.0, -4.5}; + static float input2[kVectorSize] = {0.1, -0.1, 0.1, -0.1, 0.1, + -0.1, 0.1, -0.1, 0.1, -0.1}; + std::vector output(kVectorSize); + std::fill(output.begin(), output.end(), 1.0); + VectorVectorCwiseProductAccumulate(input1, input2, kVectorSize, + output.data()); + EXPECT_THAT(output, + ElementsAreArray(ArrayFloatNear( + {1.0, 1.05, 1.1, 1.15, 1.2, 1.25, 1.3, 1.35, 1.4, 1.45}))); +} + +TEST(uKernels, VectorBatchVectorAssignTest) { + constexpr int kVectorSize = 5; + constexpr int kBatchSize = 3; + static float input[kVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0}; + std::vector output(kVectorSize * kBatchSize); + VectorBatchVectorAssign(input, kVectorSize, kBatchSize, output.data()); + EXPECT_THAT(output, ElementsAreArray(ArrayFloatNear( + {0.0, -0.5, 1.0, -1.5, 2.0, 0.0, -0.5, 1.0, -1.5, 2.0, + 0.0, -0.5, 1.0, -1.5, 2.0}))); +} + +TEST(uKernels, ApplySigmoidToVectorTest) { + constexpr int kVectorSize = 5; + static float input[kVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0}; + std::vector output(kVectorSize); + ApplySigmoidToVector(input, kVectorSize, output.data()); + EXPECT_THAT(output, ElementsAreArray(ArrayFloatNear( + {0.5, 0.377541, 0.731059, 0.182426, 0.880797}))); +} + +TEST(uKernels, ApplyActivationToVectorTest) { + constexpr int kVectorSize = 5; + static float input[kVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0}; + std::vector output(kVectorSize); + ApplyActivationToVector(input, kVectorSize, kTfLiteActRelu, output.data()); + EXPECT_THAT(output, + ElementsAreArray(ArrayFloatNear({0.0, 0.0, 1.0, 0.0, 2.0}))); + + ApplyActivationToVector(input, kVectorSize, kTfLiteActTanh, output.data()); + EXPECT_THAT(output, ElementsAreArray(ArrayFloatNear( + {0.0, -0.462117, 0.761594, -0.905148, 0.964028}))); +} + +TEST(uKernels, CopyVectorTest) { + constexpr int kVectorSize = 5; + static float input[kVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0}; + std::vector output(kVectorSize); + CopyVector(input, kVectorSize, output.data()); + EXPECT_THAT(output, + ElementsAreArray(ArrayFloatNear({0.0, -0.5, 1.0, -1.5, 2.0}))); +} + +TEST(uKernels, Sub1VectorTest) { + constexpr int kVectorSize = 5; + static float input[kVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0}; + std::vector output(kVectorSize); + Sub1Vector(input, kVectorSize, output.data()); + EXPECT_THAT(output, + ElementsAreArray(ArrayFloatNear({1.0, 1.5, 0.0, 2.5, -1.0}))); +} + +TEST(uKernels, ZeroVectorTest) { + constexpr int kVectorSize = 5; + std::vector output(kVectorSize); + ZeroVector(output.data(), kVectorSize); + EXPECT_THAT(output, + ElementsAreArray(ArrayFloatNear({0.0, 0.0, 0.0, 0.0, 0.0}))); +} + +TEST(uKernels, BatchVectorBatchVectorDotProductTest) { + constexpr int kVectorSize = 5; + constexpr int kBatch = 2; + static float input1[kVectorSize * kBatch] = {0.0, -0.5, 1.0, -1.5, 2.0, + -2.5, 3.0, -3.5, 4.0, -4.5}; + static float input2[kVectorSize * kBatch] = {0.1, -0.1, 0.1, -0.1, 0.1, + -0.1, 0.1, -0.1, 0.1, -0.1}; + std::vector output(kBatch); + BatchVectorBatchVectorDotProduct(input1, input2, kVectorSize, kBatch, + output.data(), /*result_stride=*/1); + EXPECT_THAT(output, ElementsAreArray(ArrayFloatNear({0.5, 1.75}))); +} + +TEST(uKernels, VectorShiftLeftTest) { + constexpr int kVectorSize = 5; + static float input[kVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0}; + std::vector result(kVectorSize); + VectorShiftLeft(input, kVectorSize, 3.0); + result.assign(input, input + kVectorSize); + EXPECT_THAT(result, + ElementsAreArray(ArrayFloatNear({-0.5, 1.0, -1.5, 2.0, 3.0}))); +} + +TEST(uKernels, ReductionSumVectorTest) { + constexpr int kInputVectorSize = 10; + constexpr int kOutputVectorSize1 = 5; + constexpr int kReductionSize1 = 2; + static float input[kInputVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0, + 0.0, -0.5, 1.0, 1.0, 2.0}; + std::vector result1(kOutputVectorSize1); + ReductionSumVector(input, result1.data(), kOutputVectorSize1, + kReductionSize1); + EXPECT_THAT(result1, + ElementsAreArray(ArrayFloatNear({-0.5, -0.5, 2.0, 0.5, 3.0}))); + + constexpr int kOutputVectorSize2 = 2; + constexpr int kReductionSize2 = 5; + std::vector result2(kOutputVectorSize2); + ReductionSumVector(input, result2.data(), kOutputVectorSize2, + kReductionSize2); + EXPECT_THAT(result2, ElementsAreArray(ArrayFloatNear({1.0, 3.5}))); +} + +} // namespace tensor_utils +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h new file mode 100644 index 0000000000..07f1cb4004 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/types.h @@ -0,0 +1,81 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TYPES_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TYPES_H_ + +#include "tensorflow/contrib/lite/kernels/internal/compatibility.h" + +namespace tflite { + +enum class FusedActivationFunctionType : uint8 { kNone, kRelu6, kRelu1, kRelu }; + +template +struct Dims { + int sizes[N]; + int strides[N]; +}; + +inline int Offset(const Dims<4>& dims, int i0, int i1, int i2, int i3) { + TFLITE_DCHECK(i0 >= 0 && i0 < dims.sizes[0]); + TFLITE_DCHECK(i1 >= 0 && i1 < dims.sizes[1]); + TFLITE_DCHECK(i2 >= 0 && i2 < dims.sizes[2]); + TFLITE_DCHECK(i3 >= 0 && i3 < dims.sizes[3]); + return i0 * dims.strides[0] + i1 * dims.strides[1] + i2 * dims.strides[2] + + i3 * dims.strides[3]; +} + +// Get array size, DCHECKing that the dim index is in range. +template +int ArraySize(const Dims& array, int index) { + TFLITE_DCHECK(index >= 0 && index < N); + return array.sizes[index]; +} + +// Get common array size, DCHECKing that they all agree. +template +int MatchingArraySize(const ArrayType1& array1, int index1, + const ArrayType2& array2, int index2) { + TFLITE_DCHECK_EQ(ArraySize(array1, index1), ArraySize(array2, index2)); + return ArraySize(array1, index1); +} + +template +int MatchingArraySize(const ArrayType1& array1, int index1, + const ArrayType2& array2, int index2, Args... args) { + TFLITE_DCHECK_EQ(ArraySize(array1, index1), ArraySize(array2, index2)); + return MatchingArraySize(array1, index1, args...); +} + +inline int RequiredBufferSizeForDims(const Dims<4>& dims) { + int max_offset = 0; + for (int i = 0; i < 4; i++) { + max_offset += (dims.sizes[i] - 1) * dims.strides[i]; + } + return max_offset + 1; +} + +template +bool IsPackedWithoutStrides(const Dims& dims) { + int expected_stride = 1; + for (int d = 0; d < N; d++) { + if (dims.strides[d] != expected_stride) return false; + expected_stride *= dims.sizes[d]; + } + return true; +} + +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TYPES_H_ diff --git a/tensorflow/contrib/lite/kernels/kernel_util.cc b/tensorflow/contrib/lite/kernels/kernel_util.cc new file mode 100644 index 0000000000..b0546c00cf --- /dev/null +++ b/tensorflow/contrib/lite/kernels/kernel_util.cc @@ -0,0 +1,87 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include +#include +#include "tensorflow/contrib/lite/kernels/internal/round.h" + +namespace tflite { + +TfLiteStatus GetQuantizedConvolutionMultipler( + TfLiteContext* context, TfLiteTensor* input, TfLiteTensor* filter, + TfLiteTensor* bias, TfLiteTensor* output, double* multiplier) { + const double input_product_scale = input->params.scale * filter->params.scale; + const double bias_scale = bias->params.scale; + const double output_scale = output->params.scale; + + // TODO(ahentz): The following conditions must be guaranteed by the training + // pipeline. + TF_LITE_ENSURE(context, std::abs(input_product_scale - bias_scale) <= + 1e-6 * std::min(input_product_scale, bias_scale)); + TF_LITE_ENSURE(context, input_product_scale >= 0); + TF_LITE_ENSURE(context, input_product_scale < output_scale); + + *multiplier = input_product_scale / output_scale; + + return kTfLiteOk; +} + +void CalculateActivationRangeUint8(TfLiteFusedActivation activation, + TfLiteTensor* output, int32_t* act_min, + int32_t* act_max) { + const int32_t qmin = std::numeric_limits::min(); + const int32_t qmax = std::numeric_limits::max(); + + const auto scale = output->params.scale; + const auto zero_point = output->params.zero_point; + + auto quantize = [scale, zero_point](float f) { + return zero_point + static_cast(TfLiteRound(f / scale)); + }; + + if (activation == kTfLiteActRelu) { + *act_min = std::max(qmin, quantize(0.0)); + *act_max = qmax; + } else if (activation == kTfLiteActRelu6) { + *act_min = std::max(qmin, quantize(0.0)); + *act_max = std::min(qmax, quantize(6.0)); + } else if (activation == kTfLiteActRelu1) { + *act_min = std::max(qmin, quantize(-1.0)); + *act_max = std::min(qmax, quantize(1.0)); + } else { + *act_min = qmin; + *act_max = qmax; + } +} + +void CalculateActivationRangeFloat(TfLiteFusedActivation activation, + float* activation_min, + float* activation_max) { + if (activation == kTfLiteActRelu) { + *activation_min = 0.f; + *activation_max = std::numeric_limits::max(); + } else if (activation == kTfLiteActRelu6) { + *activation_min = 0.f; + *activation_max = 6.f; + } else if (activation == kTfLiteActRelu1) { + *activation_min = -1.f; + *activation_max = 1.f; + } else { + *activation_min = std::numeric_limits::lowest(); + *activation_max = std::numeric_limits::max(); + } +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/kernel_util.h b/tensorflow/contrib/lite/kernels/kernel_util.h new file mode 100644 index 0000000000..25556ae456 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/kernel_util.h @@ -0,0 +1,65 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_KERNEL_UTIL_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_KERNEL_UTIL_H_ + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" + +namespace tflite { + +inline int NumDimensions(const TfLiteTensor* t) { return t->dims->size; } +inline int SizeOfDimension(const TfLiteTensor* t, int dim) { + return t->dims->data[dim]; +} +inline TfLiteTensor* GetInput(TfLiteContext* context, TfLiteNode* node, + int index) { + return &context->tensors[node->inputs->data[index]]; +} +inline TfLiteTensor* GetOutput(TfLiteContext* context, TfLiteNode* node, + int index) { + return &context->tensors[node->outputs->data[index]]; +} +inline int NumInputs(const TfLiteNode* node) { return node->inputs->size; } +inline int NumOutputs(const TfLiteNode* node) { return node->outputs->size; } + +inline TfLiteTensor* GetOptionalInputTensor(TfLiteContext* context, + const TfLiteNode* node, int index) { + const bool use_tensor = node->inputs->data[index] != kOptionalTensor; + if (use_tensor) { + return &context->tensors[node->inputs->data[index]]; + } + return nullptr; +} + +// Calculates the multiplication factor for a quantized convolution (or +// quantized depthwise convolution) involving the given tensors. Returns an +// error if the scales of the tensors are not compatible. +TfLiteStatus GetQuantizedConvolutionMultipler( + TfLiteContext* context, TfLiteTensor* input, TfLiteTensor* filter, + TfLiteTensor* bias, TfLiteTensor* output, double* multiplier); + +// Calculates the useful range of an activation layer given its activation +// tensor. +void CalculateActivationRangeUint8(TfLiteFusedActivation activation, + TfLiteTensor* output, int32_t* act_min, + int32_t* act_max); +void CalculateActivationRangeFloat(TfLiteFusedActivation activation, + float* activation_min, + float* activation_max); + +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_KERNEL_UTIL_H_ diff --git a/tensorflow/contrib/lite/kernels/l2norm.cc b/tensorflow/contrib/lite/kernels/l2norm.cc new file mode 100644 index 0000000000..f43aa372b6 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/l2norm.cc @@ -0,0 +1,112 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace l2norm { + +// This file has two implementation of L2Norm. +enum KernelType { + kReference, + kGenericOptimized, +}; + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + // TODO(ahentz): Our current implementations rely on the inputs being 4D. + TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4); + + // TODO(ahentz): Our current implementations only support float32. + TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32); + TF_LITE_ENSURE_EQ(context, input->type, output->type); + + // TODO(ahentz): For some reason our implementations don't support + // activations. + TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone); + + TfLiteIntArray* output_size = TfLiteIntArrayCreate(4); + output_size->data[0] = input->dims->data[0]; + output_size->data[1] = input->dims->data[1]; + output_size->data[2] = input->dims->data[2]; + output_size->data[3] = input->dims->data[3]; + + return context->ResizeTensor(context, output, output_size); +} + +template +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + if (output->type == kTfLiteFloat32) { +#define TF_LITE_L2NORM(type) \ + type::L2Normalization( \ + GetTensorData(input), GetTensorDims(input), \ + GetTensorData(output), GetTensorDims(output)) + + if (kernel_type == kReference) { + TF_LITE_L2NORM(reference_ops); + } + if (kernel_type == kGenericOptimized) { + TF_LITE_L2NORM(optimized_ops); + } +#undef TF_LITE_L2NORM + } else { + context->ReportError(context, "Inputs and outputs not all float types."); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace l2norm + +TfLiteRegistration* Register_L2NORM_REF() { + static TfLiteRegistration r = {nullptr, nullptr, l2norm::Prepare, + l2norm::Eval}; + return &r; +} + +TfLiteRegistration* Register_L2NORM_GENERIC_OPT() { + static TfLiteRegistration r = {nullptr, nullptr, l2norm::Prepare, + l2norm::Eval}; + return &r; +} + +TfLiteRegistration* Register_L2_NORMALIZATION() { + return Register_L2NORM_GENERIC_OPT(); +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/l2norm_test.cc b/tensorflow/contrib/lite/kernels/l2norm_test.cc new file mode 100644 index 0000000000..b1db89b8bd --- /dev/null +++ b/tensorflow/contrib/lite/kernels/l2norm_test.cc @@ -0,0 +1,63 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class L2NormOpModel : public SingleOpModel { + public: + L2NormOpModel(std::initializer_list input_shape, + ActivationFunctionType activation_type) { + input_ = AddInput(TensorType_FLOAT32); + output_ = AddOutput(TensorType_FLOAT32); + SetBuiltinOp(BuiltinOperator_L2_NORMALIZATION, BuiltinOptions_L2NormOptions, + CreateL2NormOptions(builder_, activation_type).Union()); + BuildInterpreter({input_shape}); + } + + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); + } + + std::vector GetOutput() { return ExtractVector(output_); } + + private: + int input_; + int output_; +}; + +TEST(L2NormOpTest, SimpleTest) { + L2NormOpModel m({1, 1, 1, 6}, ActivationFunctionType_NONE); + m.SetInput({-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05})); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/local_response_norm.cc b/tensorflow/contrib/lite/kernels/local_response_norm.cc new file mode 100644 index 0000000000..c1c70d0dfa --- /dev/null +++ b/tensorflow/contrib/lite/kernels/local_response_norm.cc @@ -0,0 +1,109 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace local_response_norm { + +// This file has two implementation of LocalResponseNorm. +enum KernelType { + kReference, + kGenericOptimized, +}; + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4); + + TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32); + TF_LITE_ENSURE_EQ(context, input->type, output->type); + + TfLiteIntArray* output_size = TfLiteIntArrayCreate(4); + output_size->data[0] = input->dims->data[0]; + output_size->data[1] = input->dims->data[1]; + output_size->data[2] = input->dims->data[2]; + output_size->data[3] = input->dims->data[3]; + + return context->ResizeTensor(context, output, output_size); +} + +template +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + if (output->type == kTfLiteFloat32) { +#define TF_LITE_LOCAL_RESPONSE_NORM(type) \ + type::LocalResponseNormalization( \ + GetTensorData(input), GetTensorDims(input), params->radius, \ + params->bias, params->alpha, params->beta, GetTensorData(output), \ + GetTensorDims(output)) + if (kernel_type == kReference) { + TF_LITE_LOCAL_RESPONSE_NORM(reference_ops); + } + if (kernel_type == kGenericOptimized) { + TF_LITE_LOCAL_RESPONSE_NORM(optimized_ops); + } +#undef TF_LITE_LOCAL_RESPONSE_NORM + } else { + context->ReportError(context, "Inputs and outputs not all float types."); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace local_response_norm + +TfLiteRegistration* Register_LOCAL_RESPONSE_NORM_REF() { + static TfLiteRegistration r = { + nullptr, nullptr, local_response_norm::Prepare, + local_response_norm::Eval}; + return &r; +} + +TfLiteRegistration* Register_LOCAL_RESPONSE_NORM_GENERIC_OPT() { + static TfLiteRegistration r = { + nullptr, nullptr, local_response_norm::Prepare, + local_response_norm::Eval}; + return &r; +} + +TfLiteRegistration* Register_LOCAL_RESPONSE_NORMALIZATION() { + return Register_LOCAL_RESPONSE_NORM_GENERIC_OPT(); +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/local_response_norm_test.cc b/tensorflow/contrib/lite/kernels/local_response_norm_test.cc new file mode 100644 index 0000000000..63a8b0a3d0 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/local_response_norm_test.cc @@ -0,0 +1,101 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class LocalResponseNormOpModel : public SingleOpModel { + public: + LocalResponseNormOpModel(std::initializer_list input_shape, int radius, + float bias, float alpha, float beta) { + input_ = AddInput(TensorType_FLOAT32); + output_ = AddOutput(TensorType_FLOAT32); + SetBuiltinOp(BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, + BuiltinOptions_LocalResponseNormalizationOptions, + CreateLocalResponseNormalizationOptions(builder_, radius, bias, + alpha, beta) + .Union()); + BuildInterpreter({input_shape}); + } + + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); + } + + std::vector GetOutput() { return ExtractVector(output_); } + + private: + int input_; + int output_; +}; + +TEST(LocalResponseNormOpTest, SameAsL2Norm) { + LocalResponseNormOpModel m({1, 1, 1, 6}, /*radius=*/20, /*bias=*/0.0, + /*alpha=*/1.0, /*beta=*/0.5); + m.SetInput({-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}); + m.Invoke(); + // The result is every input divided by 2. + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05}))); +} + +TEST(LocalResponseNormOpTest, WithAlpha) { + LocalResponseNormOpModel m({1, 1, 1, 6}, /*radius=*/20, /*bias=*/0.0, + /*alpha=*/4.0, /*beta=*/0.5); + m.SetInput({-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}); + m.Invoke(); + // The result is every input divided by 3. + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( + {-0.275, 0.15, 0.175, 0.3, -0.175, 0.025}))); +} + +TEST(LocalResponseNormOpTest, WithBias) { + LocalResponseNormOpModel m({1, 1, 1, 6}, /*radius=*/20, /*bias=*/9.0, + /*alpha=*/4.0, /*beta=*/0.5); + m.SetInput({-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}); + m.Invoke(); + // The result is every input divided by 5. + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-0.22, 0.12, 0.14, 0.24, -0.14, 0.02}))); +} + +TEST(LocalResponseNormOpTest, SmallRadius) { + LocalResponseNormOpModel m({1, 1, 1, 6}, /*radius=*/2, /*bias=*/9.0, + /*alpha=*/4.0, /*beta=*/0.5); + m.SetInput({-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}); + m.Invoke(); + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray(ArrayFloatNear( + {-0.264926, 0.125109, 0.140112, 0.267261, -0.161788, 0.0244266}))); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/lsh_projection.cc b/tensorflow/contrib/lite/kernels/lsh_projection.cc new file mode 100644 index 0000000000..5f73b56ed9 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/lsh_projection.cc @@ -0,0 +1,204 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// LSH Projection projects an input to a bit vector via locality senstive +// hashing. +// +// Options: +// Sparse: +// Computed bit vector is considered to be sparse. +// Each output element is an int32 made up by multiple bits computed from +// hash functions. +// +// Dense: +// Computed bit vector is considered to be dense. Each output element is +// either 0 or 1 that represents a bit. +// +// Input: +// Tensor[0]: Hash functions. Dim.size == 2, DataType: Float. +// Tensor[0].Dim[0]: Num of hash functions. +// Tensor[0].Dim[1]: Num of projected output bits generated by +// each hash function. +// In sparse case, Tensor[0].Dim[1] + ceil( log2(Tensor[0].Dim[0] )) <= 32. +// +// Tensor[1]: Input. Dim.size >= 1, No restriction on DataType. +// Tensor[2]: Optional, Weight. Dim.size == 1, DataType: Float. +// If not set, each element of input is considered to have same +// weight of 1.0 Tensor[1].Dim[0] == Tensor[2].Dim[0] +// +// Output: +// Sparse: +// Output.Dim == { Tensor[0].Dim[0] } +// A tensor of int32 that represents hash signatures, +// +// NOTE: To avoid collisions across hash functions, an offset value of +// k * (1 << Tensor[0].Dim[1]) will be added to each signature, +// k is the index of the hash function. +// Dense: +// Output.Dim == { Tensor[0].Dim[0] * Tensor[0].Dim[1] } +// A flattened tensor represents projected bit vectors. + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" +#include + +namespace tflite { +namespace ops { +namespace builtin { +namespace lsh_projection { + +TfLiteStatus Resize(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* hash = GetInput(context, node, 0); + TF_LITE_ENSURE_EQ(context, NumDimensions(hash), 2); + // Support up to 32 bits. + TF_LITE_ENSURE(context, SizeOfDimension(hash, 1) <= 32); + + TfLiteTensor* input = GetInput(context, node, 1); + TF_LITE_ENSURE(context, NumDimensions(input) >= 1); + + if (NumInputs(node) == 3) { + TfLiteTensor* weight = GetInput(context, node, 2); + TF_LITE_ENSURE_EQ(context, NumDimensions(weight), 1); + TF_LITE_ENSURE_EQ(context, SizeOfDimension(weight, 0), + SizeOfDimension(input, 0)); + } + + TfLiteTensor* output = GetOutput(context, node, 0); + TfLiteIntArray* outputSize = TfLiteIntArrayCreate(1); + switch (params->type) { + case kTfLiteLshProjectionSparse: + outputSize->data[0] = SizeOfDimension(hash, 0); + break; + case kTfLiteLshProjectionDense: + outputSize->data[0] = SizeOfDimension(hash, 0) * SizeOfDimension(hash, 1); + break; + default: + return kTfLiteError; + } + return context->ResizeTensor(context, output, outputSize); +} + +// Compute sign bit of dot product of hash(seed, input) and weight. +// NOTE: use float as seed, and convert it to double as a temporary solution +// to match the trained model. This is going to be changed once the new +// model is trained in an optimized method. +// +int RunningSignBit(const TfLiteTensor* input, const TfLiteTensor* weight, + float seed) { + double score = 0.0; + int input_item_bytes = input->bytes / SizeOfDimension(input, 0); + char* input_ptr = input->data.raw; + + const size_t seed_size = sizeof(float); + const size_t key_bytes = sizeof(float) + input_item_bytes; + std::unique_ptr key(new char[key_bytes]); + + for (int i = 0; i < SizeOfDimension(input, 0); ++i) { + // Create running hash id and value for current dimension. + memcpy(key.get(), &seed, seed_size); + memcpy(key.get() + seed_size, input_ptr, input_item_bytes); + + int64_t hash_signature = ::util::Fingerprint64(key.get(), key_bytes); + double running_value = static_cast(hash_signature); + input_ptr += input_item_bytes; + if (weight == nullptr) { + score += running_value; + } else { + score += weight->data.f[i] * running_value; + } + } + + return (score > 0) ? 1 : 0; +} + +void SparseLshProjection(const TfLiteTensor* hash, const TfLiteTensor* input, + const TfLiteTensor* weight, int32_t* out_buf) { + int num_hash = SizeOfDimension(hash, 0); + int num_bits = SizeOfDimension(hash, 1); + for (int i = 0; i < num_hash; i++) { + int32_t hash_signature = 0; + for (int j = 0; j < num_bits; j++) { + float seed = hash->data.f[i * num_bits + j]; + int bit = RunningSignBit(input, weight, seed); + hash_signature = (hash_signature << 1) | bit; + } + *out_buf++ = hash_signature + i * (1 << num_bits); + } +} + +void DenseLshProjection(const TfLiteTensor* hash, const TfLiteTensor* input, + const TfLiteTensor* weight, int32_t* out_buf) { + int num_hash = SizeOfDimension(hash, 0); + int num_bits = SizeOfDimension(hash, 1); + for (int i = 0; i < num_hash; i++) { + for (int j = 0; j < num_bits; j++) { + float seed = hash->data.f[i * num_bits + j]; + int bit = RunningSignBit(input, weight, seed); + *out_buf++ = bit; + } + } +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + + int32_t* out_buf = GetOutput(context, node, 0)->data.i32; + TfLiteTensor* hash = GetInput(context, node, 0); + TfLiteTensor* input = GetInput(context, node, 1); + TfLiteTensor* weight = + NumInputs(node) == 2 ? nullptr : GetInput(context, node, 2); + + switch (params->type) { + case kTfLiteLshProjectionDense: + DenseLshProjection(hash, input, weight, out_buf); + break; + case kTfLiteLshProjectionSparse: + SparseLshProjection(hash, input, weight, out_buf); + break; + default: + return kTfLiteError; + } + + return kTfLiteOk; +} +} // namespace lsh_projection + +TfLiteRegistration* Register_LSH_PROJECTION() { + static TfLiteRegistration r = {nullptr, nullptr, lsh_projection::Resize, + lsh_projection::Eval}; + return &r; +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/lsh_projection_test.cc b/tensorflow/contrib/lite/kernels/lsh_projection_test.cc new file mode 100644 index 0000000000..1011927848 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/lsh_projection_test.cc @@ -0,0 +1,123 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAre; + +class LSHProjectionOpModel : public SingleOpModel { + public: + LSHProjectionOpModel(LSHProjectionType type, + std::initializer_list hash_shape, + std::initializer_list input_shape, + std::initializer_list weight_shape) { + hash_ = AddInput(TensorType_FLOAT32); + input_ = AddInput(TensorType_INT32); + if (weight_shape.size() > 0) { + weight_ = AddInput(TensorType_FLOAT32); + } + output_ = AddOutput(TensorType_INT32); + + SetBuiltinOp(BuiltinOperator_LSH_PROJECTION, + BuiltinOptions_LSHProjectionOptions, + CreateLSHProjectionOptions(builder_, type).Union()); + if (weight_shape.size() > 0) { + BuildInterpreter({hash_shape, input_shape, weight_shape}); + } else { + BuildInterpreter({hash_shape, input_shape}); + } + + output_size_ = 1; + for (int i : hash_shape) { + output_size_ *= i; + if (type == LSHProjectionType_SPARSE) { + break; + } + } + } + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); + } + + void SetHash(std::initializer_list data) { + PopulateTensor(hash_, data); + } + + void SetWeight(std::initializer_list f) { PopulateTensor(weight_, f); } + + std::vector GetOutput() { return ExtractVector(output_); } + + private: + int input_; + int hash_; + int weight_; + int output_; + + int output_size_; +}; + +TEST(LSHProjectionOpTest2, Dense1DInputs) { + LSHProjectionOpModel m(LSHProjectionType_DENSE, {3, 2}, {5}, {5}); + + m.SetInput({12345, 54321, 67890, 9876, -12345678}); + m.SetHash({0.123, 0.456, -0.321, 1.234, 5.678, -4.321}); + m.SetWeight({1.0, 1.0, 1.0, 1.0, 1.0}); + + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), ElementsAre(0, 0, 0, 1, 0, 0)); +} + +TEST(LSHProjectionOpTest2, Sparse1DInputs) { + LSHProjectionOpModel m(LSHProjectionType_SPARSE, {3, 2}, {5}, {}); + + m.SetInput({12345, 54321, 67890, 9876, -12345678}); + m.SetHash({0.123, 0.456, -0.321, 1.234, 5.678, -4.321}); + + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), ElementsAre(0 + 0, 4 + 1, 8 + 0)); +} + +TEST(LSHProjectionOpTest2, Sparse3DInputs) { + LSHProjectionOpModel m(LSHProjectionType_SPARSE, {3, 2}, {5, 2, 2}, {5}); + + m.SetInput({1234, 2345, 3456, 1234, 4567, 5678, 6789, 4567, 7891, 8912, + 9123, 7890, -987, -876, -765, -987, -543, -432, -321, -543}); + m.SetHash({0.123, 0.456, -0.321, 1.234, 5.678, -4.321}); + m.SetWeight({0.12, 0.34, 0.56, 0.67, 0.78}); + + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), ElementsAre(0 + 2, 4 + 1, 8 + 1)); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/lstm.cc b/tensorflow/contrib/lite/kernels/lstm.cc new file mode 100644 index 0000000000..6c06264d84 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/lstm.cc @@ -0,0 +1,515 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/activation_functor.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace lstm { + +// Input Tensors of size {n_batch, n_input} +constexpr int kInputTensor = 0; + +// Input weight tensors of size: {n_cell, n_input} +constexpr int kInputToInputWeightsTensor = 1; // Optional +constexpr int kInputToForgetWeightsTensor = 2; +constexpr int kInputToCellWeightsTensor = 3; +constexpr int kInputToOutputWeightsTensor = 4; + +// Recurrent weight tensors of size {n_cell, n_output} +constexpr int kRecurrentToInputWeightsTensor = 5; // Optional +constexpr int kRecurrentToForgetWeightsTensor = 6; +constexpr int kRecurrentToCellWeightsTensor = 7; +constexpr int kRecurrentToOutputWeightsTensor = 8; + +// Peephole weights tensors of size {n_cell}, representing a diagonal matrix. +constexpr int kCellToInputWeightsTensor = 9; // Optional +constexpr int kCellToForgetWeightsTensor = 10; // Optional +constexpr int kCellToOutputWeightsTensor = 11; // Optional + +// Gates bias tensors of size {n_cell} +constexpr int kInputGateBiasTensor = 12; // Optional +constexpr int kForgetGateBiasTensor = 13; +constexpr int kCellGateBiasTensor = 14; +constexpr int kOutputGateBiasTensor = 15; + +// Projection weight tensor of size {n_output, n_cell} +constexpr int kProjectionWeightsTensor = 16; // Optional +// Projection bias tensor of size {n_output} +constexpr int kProjectionBiasTensor = 17; // Optional + +// Output tensors. +constexpr int kScratchBufferTensor = 0; +constexpr int kOutputStateTensor = 1; +constexpr int kCellStateTensor = 2; +constexpr int kOutputTensor = 3; + +// Check that input tensor dimensions matches with each other. +TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, + TfLiteNode* node, int n_input, + int n_output, int n_cell) { + auto* params = reinterpret_cast(node->builtin_data); + + // Making sure clipping parameters have valid values. + // == 0 means no clipping + // > 0 means clipping + TF_LITE_ENSURE(context, params->cell_clip >= 0); + TF_LITE_ENSURE(context, params->proj_clip >= 0); + + TfLiteTensor* input_to_input_weights = + GetOptionalInputTensor(context, node, kInputToInputWeightsTensor); + if (input_to_input_weights) { + TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->size, 2); + TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[1], n_input); + } + + TfLiteTensor* input_to_forget_weights = + GetInput(context, node, kInputToForgetWeightsTensor); + TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->size, 2); + TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[1], n_input); + + TfLiteTensor* input_to_cell_weights = + GetInput(context, node, kInputToCellWeightsTensor); + TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->size, 2); + TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[1], n_input); + + TfLiteTensor* recurrent_to_input_weights = + GetOptionalInputTensor(context, node, kRecurrentToInputWeightsTensor); + if (recurrent_to_input_weights) { + TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->size, 2); + TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->data[0], + n_cell); + TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->data[1], + n_output); + } + + TfLiteTensor* recurrent_to_forget_weights = + GetInput(context, node, kRecurrentToForgetWeightsTensor); + TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->dims->size, 2); + TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->dims->data[0], + n_cell); + TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->dims->data[1], + n_output); + + TfLiteTensor* recurrent_to_cell_weights = + GetInput(context, node, kRecurrentToCellWeightsTensor); + TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->size, 2); + TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->data[1], + n_output); + + // We make sure the input-gate's parameters are either both present (regular + // LSTM) or not at all (CIFG-LSTM). + const bool cifg_weights_all_or_none = + ((input_to_input_weights != nullptr) && + (recurrent_to_input_weights != nullptr)) || + ((input_to_input_weights == nullptr) && + (recurrent_to_input_weights == nullptr)); + TF_LITE_ENSURE(context, cifg_weights_all_or_none == true); + + TfLiteTensor* cell_to_input_weights = + GetOptionalInputTensor(context, node, kCellToInputWeightsTensor); + if (cell_to_input_weights) { + TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->size, 1); + TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->data[0], n_cell); + } + + TfLiteTensor* cell_to_forget_weights = + GetOptionalInputTensor(context, node, kCellToForgetWeightsTensor); + if (cell_to_forget_weights) { + TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->size, 1); + TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->data[0], n_cell); + } + + TfLiteTensor* cell_to_output_weights = + GetOptionalInputTensor(context, node, kCellToOutputWeightsTensor); + if (cell_to_output_weights) { + TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->size, 1); + TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->data[0], n_cell); + } + + // Making sure the peephole weights are there all or none. + const bool use_cifg = (input_to_input_weights == nullptr); + const bool peephole_weights_all_or_none = + ((cell_to_input_weights != nullptr || use_cifg) && + (cell_to_forget_weights != nullptr) && + (cell_to_output_weights != nullptr)) || + ((cell_to_input_weights == nullptr) && + (cell_to_forget_weights == nullptr) && + (cell_to_output_weights == nullptr)); + TF_LITE_ENSURE(context, peephole_weights_all_or_none == true); + + // Make sure the input gate bias is present only when not a CIFG-LSTM. + TfLiteTensor* input_gate_bias = + GetOptionalInputTensor(context, node, kInputGateBiasTensor); + if (use_cifg) { + TF_LITE_ENSURE_EQ(context, input_gate_bias, nullptr); + } else { + TF_LITE_ENSURE_EQ(context, input_gate_bias->dims->size, 1); + TF_LITE_ENSURE_EQ(context, input_gate_bias->dims->data[0], n_cell); + } + + TfLiteTensor* forget_gate_bias = + GetInput(context, node, kForgetGateBiasTensor); + TF_LITE_ENSURE_EQ(context, forget_gate_bias->dims->size, 1); + TF_LITE_ENSURE_EQ(context, forget_gate_bias->dims->data[0], n_cell); + + TfLiteTensor* cell_bias = GetInput(context, node, kCellGateBiasTensor); + TF_LITE_ENSURE_EQ(context, cell_bias->dims->size, 1); + TF_LITE_ENSURE_EQ(context, cell_bias->dims->data[0], n_cell); + + TfLiteTensor* output_gate_bias = + GetInput(context, node, kOutputGateBiasTensor); + TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->size, 1); + TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->data[0], n_cell); + + TfLiteTensor* projection_weights = + GetOptionalInputTensor(context, node, kProjectionWeightsTensor); + if (projection_weights) { + TF_LITE_ENSURE_EQ(context, projection_weights->dims->size, 2); + TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[0], n_output); + TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[1], n_cell); + } + + TfLiteTensor* projection_bias = + GetOptionalInputTensor(context, node, kProjectionBiasTensor); + if (projection_bias) { + TF_LITE_ENSURE_EQ(context, projection_bias->dims->size, 1); + TF_LITE_ENSURE_EQ(context, projection_bias->dims->data[0], n_output); + } + + // Making sure the projection tensors are consistent: + // 1) If projection weight is not present, then projection bias should not be + // present. + // 2) If projection weight is present, then projection bias is optional. + // TODO(ghodrat): make sure this is correct. + const bool projecton_tensors_consistent = + ((projection_weights != nullptr) || (projection_bias == nullptr)); + TF_LITE_ENSURE(context, projecton_tensors_consistent == true); + + return kTfLiteOk; +} + +// Resize the output, state and scratch tensors based on the sizes of the input +// tensors. Also check that the size of the input tensors match each other. +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + // Check we have all the inputs and outputs we need. + TF_LITE_ENSURE_EQ(context, node->inputs->size, 18); + TF_LITE_ENSURE_EQ(context, node->outputs->size, 4); + + // Inferring batch size, number of outputs and number of cells from the + // input tensors. + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input->dims->size > 1); + const int n_batch = input->dims->data[0]; + const int n_input = input->dims->data[1]; + + TfLiteTensor* input_to_output_weights = + GetInput(context, node, kInputToOutputWeightsTensor); + const int n_cell = input_to_output_weights->dims->data[0]; + TF_LITE_ENSURE_EQ(context, input_to_output_weights->dims->size, 2); + TF_LITE_ENSURE_EQ(context, input_to_output_weights->dims->data[1], n_input); + + TfLiteTensor* recurrent_to_output_weights = + GetInput(context, node, kRecurrentToOutputWeightsTensor); + TF_LITE_ENSURE_EQ(context, recurrent_to_output_weights->dims->size, 2); + TF_LITE_ENSURE_EQ(context, recurrent_to_output_weights->dims->data[0], + n_cell); + const int n_output = recurrent_to_output_weights->dims->data[1]; + + // Check that input tensor dimensions matches with each other. + CheckInputTensorDimensions(context, node, n_input, n_output, n_cell); + + // Get the pointer to output, state and scratch buffer tensors. + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TfLiteTensor* output_state = GetOutput(context, node, kOutputStateTensor); + TfLiteTensor* cell_state = GetOutput(context, node, kCellStateTensor); + // TODO(ghodrat): Modify this as soon as we have a finalized method for + // scratch buffers. + TfLiteTensor* scratch_buffer = GetOutput(context, node, kScratchBufferTensor); + + // Resize the output and output_state tensors. + TfLiteIntArray* output_size = TfLiteIntArrayCreate(2); + output_size->data[0] = n_batch; + output_size->data[1] = n_output; + TF_LITE_ENSURE_OK(context, + context->ResizeTensor(context, output, output_size)); + + TfLiteIntArray* output_state_size = TfLiteIntArrayCreate(2); + output_state_size->data[0] = n_batch; + output_state_size->data[1] = n_output; + TF_LITE_ENSURE_OK( + context, context->ResizeTensor(context, output_state, output_state_size)); + + // Resize the output, state and scratch buffer tensors. + TfLiteIntArray* cell_size = TfLiteIntArrayCreate(2); + cell_size->data[0] = n_batch; + cell_size->data[1] = n_cell; + TF_LITE_ENSURE_OK(context, + context->ResizeTensor(context, cell_state, cell_size)); + + // Mark state tensors as persistent tensors. + output_state->allocation_type = kTfLiteArenaRwPersistent; + cell_state->allocation_type = kTfLiteArenaRwPersistent; + + TfLiteTensor* input_to_input_weights = + GetOptionalInputTensor(context, node, kInputToInputWeightsTensor); + const bool use_cifg = (input_to_input_weights == nullptr); + if (use_cifg) { + TfLiteIntArray* scratch_buffer_size = TfLiteIntArrayCreate(2); + scratch_buffer_size->data[0] = n_batch; + // Reserving space for Cell, Forget, Output gates + scratch_buffer_size->data[1] = n_cell * 3; + TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scratch_buffer, + scratch_buffer_size)); + } else { + TfLiteIntArray* scratch_buffer_size = TfLiteIntArrayCreate(2); + scratch_buffer_size->data[0] = n_batch; + // Reserving space for Input, Cell, Forget, Output gates + scratch_buffer_size->data[1] = n_cell * 4; + TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scratch_buffer, + scratch_buffer_size)); + } + return kTfLiteOk; +} + +// The LSTM Op engine. +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + TfLiteTensor* input = GetInput(context, node, kInputTensor); + + TfLiteTensor* input_to_input_weights = + GetOptionalInputTensor(context, node, kInputToInputWeightsTensor); + TfLiteTensor* input_to_forget_weights = + GetInput(context, node, kInputToForgetWeightsTensor); + TfLiteTensor* input_to_cell_weights = + GetInput(context, node, kInputToCellWeightsTensor); + TfLiteTensor* input_to_output_weights = + GetInput(context, node, kInputToOutputWeightsTensor); + + TfLiteTensor* recurrent_to_input_weights = + GetOptionalInputTensor(context, node, kRecurrentToInputWeightsTensor); + TfLiteTensor* recurrent_to_forget_weights = + GetInput(context, node, kRecurrentToForgetWeightsTensor); + TfLiteTensor* recurrent_to_cell_weights = + GetInput(context, node, kRecurrentToCellWeightsTensor); + TfLiteTensor* recurrent_to_output_weights = + GetInput(context, node, kRecurrentToOutputWeightsTensor); + + TfLiteTensor* cell_to_input_weights = + GetOptionalInputTensor(context, node, kCellToInputWeightsTensor); + TfLiteTensor* cell_to_forget_weights = + GetOptionalInputTensor(context, node, kCellToForgetWeightsTensor); + TfLiteTensor* cell_to_output_weights = + GetOptionalInputTensor(context, node, kCellToOutputWeightsTensor); + + TfLiteTensor* input_gate_bias = + GetOptionalInputTensor(context, node, kInputGateBiasTensor); + TfLiteTensor* forget_gate_bias = + GetInput(context, node, kForgetGateBiasTensor); + TfLiteTensor* cell_bias = GetInput(context, node, kCellGateBiasTensor); + TfLiteTensor* output_gate_bias = + GetInput(context, node, kOutputGateBiasTensor); + + TfLiteTensor* projection_weights = + GetOptionalInputTensor(context, node, kProjectionWeightsTensor); + TfLiteTensor* projection_bias = + GetOptionalInputTensor(context, node, kProjectionBiasTensor); + + TfLiteTensor* output_state = GetOutput(context, node, kOutputStateTensor); + TfLiteTensor* cell_state = GetOutput(context, node, kCellStateTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + const int n_batch = input->dims->data[0]; + const int n_input = input->dims->data[1]; + // n_cell and n_output will be the same size when there is no projection. + const int n_cell = input_to_output_weights->dims->data[0]; + const int n_output = recurrent_to_output_weights->dims->data[1]; + + // Since we have already checked that weights are all there or none, we can + // check the existense of only one to the get the condition. + const bool use_cifg = (input_to_input_weights == nullptr); + const bool use_peephole = (cell_to_output_weights != nullptr); + + // Index the scratch buffers pointers to the global scratch buffer. + TfLiteTensor* scratch_buffer = GetOutput(context, node, kScratchBufferTensor); + float* input_gate_scratch = nullptr; + float* cell_scratch = nullptr; + float* forget_gate_scratch = nullptr; + float* output_gate_scratch = nullptr; + if (use_cifg) { + cell_scratch = scratch_buffer->data.f; + forget_gate_scratch = scratch_buffer->data.f + n_cell * n_batch; + output_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch; + } else { + input_gate_scratch = scratch_buffer->data.f; + cell_scratch = scratch_buffer->data.f + n_cell * n_batch; + forget_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch; + output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch; + } + + // Initialize scratch buffers with bias. + if (!use_cifg) { + tensor_utils::VectorBatchVectorAssign(input_gate_bias->data.f, n_cell, + n_batch, input_gate_scratch); + } + tensor_utils::VectorBatchVectorAssign(forget_gate_bias->data.f, n_cell, + n_batch, forget_gate_scratch); + tensor_utils::VectorBatchVectorAssign(cell_bias->data.f, n_cell, n_batch, + cell_scratch); + tensor_utils::VectorBatchVectorAssign(output_gate_bias->data.f, n_cell, + n_batch, output_gate_scratch); + + // For each batch and cell: compute input_weight * input. + if (!use_cifg) { + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_to_input_weights->data.f, n_cell, n_input, input->data.f, n_batch, + input_gate_scratch, /*result_stride=*/1); + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_to_forget_weights->data.f, n_cell, n_input, input->data.f, n_batch, + forget_gate_scratch, /*result_stride=*/1); + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_to_cell_weights->data.f, n_cell, n_input, input->data.f, n_batch, + cell_scratch, /*result_stride=*/1); + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_to_output_weights->data.f, n_cell, n_input, input->data.f, n_batch, + output_gate_scratch, /*result_stride=*/1); + + // For each batch and cell: compute recurrent_weight * output_state. + if (!use_cifg) { + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_to_input_weights->data.f, n_cell, n_output, + output_state->data.f, n_batch, input_gate_scratch, /*result_stride=*/1); + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_to_forget_weights->data.f, n_cell, n_output, + output_state->data.f, n_batch, forget_gate_scratch, /*result_stride=*/1); + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_to_cell_weights->data.f, n_cell, n_output, output_state->data.f, + n_batch, cell_scratch, /*result_stride=*/1); + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_to_output_weights->data.f, n_cell, n_output, + output_state->data.f, n_batch, output_gate_scratch, /*result_stride=*/1); + + // For each batch and cell: update input gate. + if (!use_cifg) { + if (use_peephole) { + tensor_utils::VectorBatchVectorCwiseProductAccumulate( + cell_to_input_weights->data.f, n_cell, cell_state->data.f, n_batch, + input_gate_scratch); + } + tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch, + input_gate_scratch); + } + + // For each batch and cell: update forget gate. + if (use_peephole) { + tensor_utils::VectorBatchVectorCwiseProductAccumulate( + cell_to_forget_weights->data.f, n_cell, cell_state->data.f, n_batch, + forget_gate_scratch); + } + tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch, + forget_gate_scratch); + + // For each batch and cell: update the cell. + tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, + cell_state->data.f, n_batch * n_cell, + cell_state->data.f); + tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell, + params->activation, cell_scratch); + if (use_cifg) { + tensor_utils::Sub1Vector(forget_gate_scratch, n_batch * n_cell, + forget_gate_scratch); + tensor_utils::VectorVectorCwiseProductAccumulate( + cell_scratch, forget_gate_scratch, n_batch * n_cell, + cell_state->data.f); + } else { + tensor_utils::VectorVectorCwiseProductAccumulate( + cell_scratch, input_gate_scratch, n_batch * n_cell, cell_state->data.f); + } + if (params->cell_clip > 0.0) { + tensor_utils::ClipVector(cell_state->data.f, n_batch * n_cell, + params->cell_clip, cell_state->data.f); + } + + // For each batch and cell: update the output gate. + if (use_peephole) { + tensor_utils::VectorBatchVectorCwiseProductAccumulate( + cell_to_output_weights->data.f, n_cell, cell_state->data.f, n_batch, + output_gate_scratch); + } + tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell, + output_gate_scratch); + tensor_utils::ApplyActivationToVector(cell_state->data.f, n_batch * n_cell, + params->activation, cell_scratch); + tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch, + n_batch * n_cell, output_gate_scratch); + + // For each batch: update the projection and output_state. + const bool use_projection_weight = (projection_weights != nullptr); + const bool use_projection_bias = (projection_bias != nullptr); + if (use_projection_weight) { + if (use_projection_bias) { + tensor_utils::VectorBatchVectorAssign(projection_bias->data.f, n_output, + n_batch, output->data.f); + } else { + tensor_utils::ZeroVector(output->data.f, n_batch * n_output); + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + projection_weights->data.f, n_output, n_cell, output_gate_scratch, + n_batch, output->data.f, /*result_stride=*/1); + if (params->proj_clip > 0.0) { + tensor_utils::ClipVector(output->data.f, n_batch * n_output, + params->proj_clip, output->data.f); + } + } else { + tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output, + output->data.f); + } + tensor_utils::CopyVector(output->data.f, n_batch * n_output, + output_state->data.f); + + return kTfLiteOk; +} + +} // namespace lstm + +TfLiteRegistration* Register_LSTM() { + static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr, + lstm::Prepare, lstm::Eval}; + return &r; +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/lstm_test.cc b/tensorflow/contrib/lite/kernels/lstm_test.cc new file mode 100644 index 0000000000..be4c7ddbf8 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/lstm_test.cc @@ -0,0 +1,1088 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Unit test for TFLite LSTM op. + +#include +#include +#include + +#include +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class LSTMOpModel : public SingleOpModel { + public: + LSTMOpModel(int n_batch, int n_input, int n_cell, int n_output, bool use_cifg, + bool use_peephole, bool use_projection_weights, + bool use_projection_bias, float cell_clip, float proj_clip, + const std::vector>& input_shapes) + : n_batch_(n_batch), + n_input_(n_input), + n_cell_(n_cell), + n_output_(n_output) { + input_ = AddInput(TensorType_FLOAT32); + + if (use_cifg) { + input_to_input_weights_ = AddNullInput(); + } else { + input_to_input_weights_ = AddInput(TensorType_FLOAT32); + } + + input_to_forget_weights_ = AddInput(TensorType_FLOAT32); + input_to_cell_weights_ = AddInput(TensorType_FLOAT32); + input_to_output_weights_ = AddInput(TensorType_FLOAT32); + + if (use_cifg) { + recurrent_to_input_weights_ = AddNullInput(); + } else { + recurrent_to_input_weights_ = AddInput(TensorType_FLOAT32); + } + + recurrent_to_forget_weights_ = AddInput(TensorType_FLOAT32); + recurrent_to_cell_weights_ = AddInput(TensorType_FLOAT32); + recurrent_to_output_weights_ = AddInput(TensorType_FLOAT32); + + if (use_peephole) { + if (use_cifg) { + cell_to_input_weights_ = AddNullInput(); + } else { + cell_to_input_weights_ = AddInput(TensorType_FLOAT32); + } + cell_to_forget_weights_ = AddInput(TensorType_FLOAT32); + cell_to_output_weights_ = AddInput(TensorType_FLOAT32); + } else { + cell_to_input_weights_ = AddNullInput(); + cell_to_forget_weights_ = AddNullInput(); + cell_to_output_weights_ = AddNullInput(); + } + + if (use_cifg) { + input_gate_bias_ = AddNullInput(); + } else { + input_gate_bias_ = AddInput(TensorType_FLOAT32); + } + forget_gate_bias_ = AddInput(TensorType_FLOAT32); + cell_bias_ = AddInput(TensorType_FLOAT32); + output_gate_bias_ = AddInput(TensorType_FLOAT32); + + if (use_projection_weights) { + projection_weights_ = AddInput(TensorType_FLOAT32); + if (use_projection_bias) { + projection_bias_ = AddInput(TensorType_FLOAT32); + } else { + projection_bias_ = AddNullInput(); + } + } else { + projection_weights_ = AddNullInput(); + projection_bias_ = AddNullInput(); + } + + scratch_buffer_ = AddOutput(TensorType_FLOAT32); + // TODO(ghodrat): Modify these states when we have a permanent solution for + // persistent buffer. + output_state_ = AddOutput(TensorType_FLOAT32); + cell_state_ = AddOutput(TensorType_FLOAT32); + output_ = AddOutput(TensorType_FLOAT32); + + SetBuiltinOp(BuiltinOperator_LSTM, BuiltinOptions_LSTMOptions, + CreateLSTMOptions(builder_, ActivationFunctionType_TANH, + cell_clip, proj_clip) + .Union()); + BuildInterpreter(input_shapes); + } + + void SetInputToInputWeights(std::initializer_list f) { + PopulateTensor(input_to_input_weights_, f); + } + + void SetInputToForgetWeights(std::initializer_list f) { + PopulateTensor(input_to_forget_weights_, f); + } + + void SetInputToCellWeights(std::initializer_list f) { + PopulateTensor(input_to_cell_weights_, f); + } + + void SetInputToOutputWeights(std::initializer_list f) { + PopulateTensor(input_to_output_weights_, f); + } + + void SetRecurrentToInputWeights(std::initializer_list f) { + PopulateTensor(recurrent_to_input_weights_, f); + } + + void SetRecurrentToForgetWeights(std::initializer_list f) { + PopulateTensor(recurrent_to_forget_weights_, f); + } + + void SetRecurrentToCellWeights(std::initializer_list f) { + PopulateTensor(recurrent_to_cell_weights_, f); + } + + void SetRecurrentToOutputWeights(std::initializer_list f) { + PopulateTensor(recurrent_to_output_weights_, f); + } + + void SetCellToInputWeights(std::initializer_list f) { + PopulateTensor(cell_to_input_weights_, f); + } + + void SetCellToForgetWeights(std::initializer_list f) { + PopulateTensor(cell_to_forget_weights_, f); + } + + void SetCellToOutputWeights(std::initializer_list f) { + PopulateTensor(cell_to_output_weights_, f); + } + + void SetInputGateBias(std::initializer_list f) { + PopulateTensor(input_gate_bias_, f); + } + + void SetForgetGateBias(std::initializer_list f) { + PopulateTensor(forget_gate_bias_, f); + } + + void SetCellBias(std::initializer_list f) { + PopulateTensor(cell_bias_, f); + } + + void SetOutputGateBias(std::initializer_list f) { + PopulateTensor(output_gate_bias_, f); + } + + void SetProjectionWeights(std::initializer_list f) { + PopulateTensor(projection_weights_, f); + } + + void SetProjectionBias(std::initializer_list f) { + PopulateTensor(projection_bias_, f); + } + + void ResetOutputState() { + const int zero_buffer_size = n_cell_ * n_batch_; + std::unique_ptr zero_buffer(new float[zero_buffer_size]); + memset(zero_buffer.get(), 0, zero_buffer_size * sizeof(float)); + PopulateTensor(output_state_, 0, zero_buffer.get(), + zero_buffer.get() + zero_buffer_size); + } + + void ResetCellState() { + const int zero_buffer_size = n_cell_ * n_batch_; + std::unique_ptr zero_buffer(new float[zero_buffer_size]); + memset(zero_buffer.get(), 0, zero_buffer_size * sizeof(float)); + PopulateTensor(cell_state_, 0, zero_buffer.get(), + zero_buffer.get() + zero_buffer_size); + } + + void SetInput(int offset, float* begin, float* end) { + PopulateTensor(input_, offset, begin, end); + } + + std::vector GetOutput() { return ExtractVector(output_); } + + int num_inputs() { return n_input_; } + int num_outputs() { return n_output_; } + int num_cells() { return n_cell_; } + int num_batches() { return n_batch_; } + + private: + int input_; + int input_to_input_weights_; + int input_to_forget_weights_; + int input_to_cell_weights_; + int input_to_output_weights_; + + int recurrent_to_input_weights_; + int recurrent_to_forget_weights_; + int recurrent_to_cell_weights_; + int recurrent_to_output_weights_; + + int cell_to_input_weights_; + int cell_to_forget_weights_; + int cell_to_output_weights_; + + int input_gate_bias_; + int forget_gate_bias_; + int cell_bias_; + int output_gate_bias_; + + int projection_weights_; + int projection_bias_; + + int output_; + int output_state_; + int cell_state_; + int scratch_buffer_; + + int n_batch_; + int n_input_; + int n_cell_; + int n_output_; +}; + +TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) { + const int n_batch = 1; + const int n_input = 2; + // n_cell and n_output have the same size when there is no projection. + const int n_cell = 4; + const int n_output = 4; + + LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, + /*use_cifg=*/false, /*use_peephole=*/false, + /*use_projection_weights=*/false, + /*use_projection_bias=*/false, + /*cell_clip=*/0.0, /*proj_clip=*/0.0, + { + {n_batch, n_input}, // input tensor + + {n_cell, n_input}, // input_to_input_weight tensor + {n_cell, n_input}, // input_to_forget_weight tensor + {n_cell, n_input}, // input_to_cell_weight tensor + {n_cell, n_input}, // input_to_output_weight tensor + + {n_cell, n_output}, // recurrent_to_input_weight tensor + {n_cell, n_output}, // recurrent_to_forget_weight tensor + {n_cell, n_output}, // recurrent_to_cell_weight tensor + {n_cell, n_output}, // recurrent_to_output_weight tensor + + {0}, // cell_to_input_weight tensor + {0}, // cell_to_forget_weight tensor + {0}, // cell_to_output_weight tensor + + {n_cell}, // input_gate_bias tensor + {n_cell}, // forget_gate_bias tensor + {n_cell}, // cell_bias tensor + {n_cell}, // output_gate_bias tensor + + {0, 0}, // projection_weight tensor + {0}, // projection_bias tensor + }); + + lstm.SetInputToInputWeights({-0.45018822, -0.02338299, -0.0870589, + -0.34550029, 0.04266912, -0.15680569, + -0.34856534, 0.43890524}); + + lstm.SetInputToCellWeights({-0.50013041, 0.1370284, 0.11810488, 0.2013163, + -0.20583314, 0.44344562, 0.22077113, + -0.29909778}); + + lstm.SetInputToForgetWeights({0.09701663, 0.20334584, -0.50592935, + -0.31343272, -0.40032279, 0.44781327, + 0.01387155, -0.35593212}); + + lstm.SetInputToOutputWeights({-0.25065863, -0.28290087, 0.04613829, + 0.40525138, 0.44272184, 0.03897077, -0.1556896, + 0.19487578}); + + lstm.SetInputGateBias({0., 0., 0., 0.}); + + lstm.SetCellBias({0., 0., 0., 0.}); + + lstm.SetForgetGateBias({1., 1., 1., 1.}); + + lstm.SetOutputGateBias({0., 0., 0., 0.}); + + lstm.SetRecurrentToInputWeights( + {-0.0063535, -0.2042388, 0.31454784, -0.35746509, 0.28902304, 0.08183324, + -0.16555229, 0.02286911, -0.13566875, 0.03034258, 0.48091322, + -0.12528998, 0.24077177, -0.51332325, -0.33502164, 0.10629296}); + + lstm.SetRecurrentToCellWeights( + {-0.3407414, 0.24443203, -0.2078532, 0.26320225, 0.05695659, -0.00123841, + -0.4744786, -0.35869038, -0.06418842, -0.13502428, -0.501764, 0.22830659, + -0.46367589, 0.26016325, -0.03894562, -0.16368064}); + + lstm.SetRecurrentToForgetWeights( + {-0.48684245, -0.06655136, 0.42224967, 0.2112639, 0.27654213, 0.20864892, + -0.07646349, 0.45877004, 0.00141793, -0.14609534, 0.36447752, 0.09196436, + 0.28053468, 0.01560611, -0.20127171, -0.01140004}); + + lstm.SetRecurrentToOutputWeights( + {0.43385774, -0.17194885, 0.2718237, 0.09215671, 0.24107647, -0.39835793, + 0.18212086, 0.01301402, 0.48572797, -0.50656658, 0.20047462, -0.20607421, + -0.51818722, -0.15390486, 0.0468148, 0.39922136}); + + static float lstm_input[] = {2., 3., 3., 4., 1., 1.}; + static float lstm_golden_output[] = {-0.02973187, 0.1229473, 0.20885126, + -0.15358765, -0.03716109, 0.12507336, + 0.41193449, -0.20860538, -0.15053082, + 0.09120187, 0.24278517, -0.12222792}; + + // Resetting cell_state and output_state + lstm.ResetCellState(); + lstm.ResetOutputState(); + + const int input_sequence_size = + sizeof(lstm_input) / sizeof(float) / (lstm.num_inputs()); + for (int i = 0; i < input_sequence_size; i++) { + float* batch0_start = lstm_input + i * lstm.num_inputs(); + float* batch0_end = batch0_start + lstm.num_inputs(); + + lstm.SetInput(0, batch0_start, batch0_end); + + lstm.Invoke(); + + float* golden_start = lstm_golden_output + i * lstm.num_outputs(); + float* golden_end = golden_start + lstm.num_outputs(); + std::vector expected; + expected.insert(expected.end(), golden_start, golden_end); + EXPECT_THAT(lstm.GetOutput(), ElementsAreArray(ArrayFloatNear(expected))); + } +} + +TEST(LSTMOpTest, BlackBoxTestWithCifgWithPeepholeNoProjectionNoClipping) { + const int n_batch = 1; + const int n_input = 2; + // n_cell and n_output have the same size when there is no projection. + const int n_cell = 4; + const int n_output = 4; + + LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, + /*use_cifg=*/true, /*use_peephole=*/true, + /*use_projection_weights=*/false, + /*use_projection_bias=*/false, + /*cell_clip=*/0.0, /*proj_clip=*/0.0, + { + {n_batch, n_input}, // input tensor + + {0, 0}, // input_to_input_weight tensor + {n_cell, n_input}, // input_to_forget_weight tensor + {n_cell, n_input}, // input_to_cell_weight tensor + {n_cell, n_input}, // input_to_output_weight tensor + + {0, 0}, // recurrent_to_input_weight tensor + {n_cell, n_output}, // recurrent_to_forget_weight tensor + {n_cell, n_output}, // recurrent_to_cell_weight tensor + {n_cell, n_output}, // recurrent_to_output_weight tensor + + {0}, // cell_to_input_weight tensor + {n_cell}, // cell_to_forget_weight tensor + {n_cell}, // cell_to_output_weight tensor + + {0}, // input_gate_bias tensor + {n_cell}, // forget_gate_bias tensor + {n_cell}, // cell_bias tensor + {n_cell}, // output_gate_bias tensor + + {0, 0}, // projection_weight tensor + {0}, // projection_bias tensor + }); + + lstm.SetInputToCellWeights({-0.49770179, -0.27711356, -0.09624726, 0.05100781, + 0.04717243, 0.48944736, -0.38535351, + -0.17212132}); + + lstm.SetInputToForgetWeights({-0.55291498, -0.42866567, 0.13056988, + -0.3633365, -0.22755712, 0.28253698, 0.24407166, + 0.33826375}); + + lstm.SetInputToOutputWeights({0.10725588, -0.02335852, -0.55932593, + -0.09426838, -0.44257352, 0.54939759, + 0.01533556, 0.42751634}); + + lstm.SetCellBias({0., 0., 0., 0.}); + + lstm.SetForgetGateBias({1., 1., 1., 1.}); + + lstm.SetOutputGateBias({0., 0., 0., 0.}); + + lstm.SetRecurrentToCellWeights( + {0.54066205, -0.32668582, -0.43562764, -0.56094903, 0.42957711, + 0.01841056, -0.32764608, -0.33027974, -0.10826075, 0.20675004, + 0.19069612, -0.03026325, -0.54532051, 0.33003211, 0.44901288, + 0.21193194}); + + lstm.SetRecurrentToForgetWeights( + {-0.13832897, -0.0515101, -0.2359007, -0.16661474, -0.14340827, + 0.36986142, 0.23414481, 0.55899, 0.10798943, -0.41174671, 0.17751795, + -0.34484994, -0.35874045, -0.11352962, 0.27268326, 0.54058349}); + + lstm.SetRecurrentToOutputWeights( + {0.41613156, 0.42610586, -0.16495961, -0.5663873, 0.30579174, -0.05115908, + -0.33941799, 0.23364776, 0.11178309, 0.09481031, -0.26424935, 0.46261835, + 0.50248802, 0.26114327, -0.43736315, 0.33149987}); + + lstm.SetCellToForgetWeights( + {0.47485286, -0.51955009, -0.24458408, 0.31544167}); + lstm.SetCellToOutputWeights( + {-0.17135078, 0.82760304, 0.85573703, -0.77109635}); + + static float lstm_input[] = {2., 3., 3., 4., 1., 1.}; + static float lstm_golden_output[] = {-0.36444446, -0.00352185, 0.12886585, + -0.05163646, -0.42312205, -0.01218222, + 0.24201041, -0.08124574, -0.358325, + -0.04621704, 0.21641694, -0.06471302}; + + // Resetting cell_state and output_state + lstm.ResetCellState(); + lstm.ResetOutputState(); + + const int input_sequence_size = + sizeof(lstm_input) / sizeof(float) / (lstm.num_inputs()); + for (int i = 0; i < input_sequence_size; i++) { + float* batch0_start = lstm_input + i * lstm.num_inputs(); + float* batch0_end = batch0_start + lstm.num_inputs(); + + lstm.SetInput(0, batch0_start, batch0_end); + + lstm.Invoke(); + + float* golden_start = lstm_golden_output + i * lstm.num_outputs(); + float* golden_end = golden_start + lstm.num_outputs(); + std::vector expected; + expected.insert(expected.end(), golden_start, golden_end); + EXPECT_THAT(lstm.GetOutput(), ElementsAreArray(ArrayFloatNear(expected))); + } +} + +TEST(LSTMOpTest, BlackBoxTestWithPeepholeWithProjectionNoClipping) { + const int n_batch = 2; + const int n_input = 5; + const int n_cell = 20; + const int n_output = 16; + + LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, + /*use_cifg=*/false, /*use_peephole=*/true, + /*use_projection_weights=*/true, + /*use_projection_bias=*/false, + /*cell_clip=*/0.0, /*proj_clip=*/0.0, + { + {n_batch, n_input}, // input tensor + + {n_cell, n_input}, // input_to_input_weight tensor + {n_cell, n_input}, // input_to_forget_weight tensor + {n_cell, n_input}, // input_to_cell_weight tensor + {n_cell, n_input}, // input_to_output_weight tensor + + {n_cell, n_output}, // recurrent_to_input_weight tensor + {n_cell, n_output}, // recurrent_to_forget_weight tensor + {n_cell, n_output}, // recurrent_to_cell_weight tensor + {n_cell, n_output}, // recurrent_to_output_weight tensor + + {n_cell}, // cell_to_input_weight tensor + {n_cell}, // cell_to_forget_weight tensor + {n_cell}, // cell_to_output_weight tensor + + {n_cell}, // input_gate_bias tensor + {n_cell}, // forget_gate_bias tensor + {n_cell}, // cell_bias tensor + {n_cell}, // output_gate_bias tensor + + {n_output, n_cell}, // projection_weight tensor + {0}, // projection_bias tensor + }); + + lstm.SetInputToInputWeights( + {0.021393683, 0.06124551, 0.046905167, -0.014657677, -0.03149463, + 0.09171803, 0.14647801, 0.10797193, -0.0057968358, 0.0019193048, + -0.2726754, 0.10154029, -0.018539885, 0.080349885, -0.10262385, + -0.022599787, -0.09121155, -0.008675967, -0.045206103, -0.0821282, + -0.008045952, 0.015478081, 0.055217247, 0.038719587, 0.044153627, + -0.06453243, 0.05031825, -0.046935108, -0.008164439, 0.014574226, + -0.1671009, -0.15519552, -0.16819797, -0.13971269, -0.11953059, + 0.25005487, -0.22790983, 0.009855087, -0.028140958, -0.11200698, + 0.11295408, -0.0035217577, 0.054485075, 0.05184695, 0.064711206, + 0.10989193, 0.11674786, 0.03490607, 0.07727357, 0.11390585, + -0.1863375, -0.1034451, -0.13945189, -0.049401227, -0.18767063, + 0.042483903, 0.14233552, 0.13832581, 0.18350165, 0.14545603, + -0.028545704, 0.024939531, 0.050929718, 0.0076203286, -0.0029723682, + -0.042484224, -0.11827596, -0.09171104, -0.10808628, -0.16327988, + -0.2273378, -0.0993647, -0.017155107, 0.0023917493, 0.049272764, + 0.0038534778, 0.054764505, 0.089753784, 0.06947234, 0.08014476, + -0.04544234, -0.0497073, -0.07135631, -0.048929106, -0.004042012, + -0.009284026, 0.018042054, 0.0036860977, -0.07427302, -0.11434604, + -0.018995456, 0.031487543, 0.012834908, 0.019977754, 0.044256654, + -0.39292613, -0.18519334, -0.11651281, -0.06809892, 0.011373677}); + + lstm.SetInputToForgetWeights( + {-0.0018401089, -0.004852237, 0.03698424, 0.014181704, 0.028273236, + -0.016726194, -0.05249759, -0.10204261, 0.00861066, -0.040979505, + -0.009899187, 0.01923892, -0.028177269, -0.08535103, -0.14585495, + 0.10662567, -0.01909731, -0.017883534, -0.0047269356, -0.045103323, + 0.0030784295, 0.076784775, 0.07463696, 0.094531395, 0.0814421, + -0.12257899, -0.033945758, -0.031303465, 0.045630626, 0.06843887, + -0.13492945, -0.012480007, -0.0811829, -0.07224499, -0.09628791, + 0.045100946, 0.0012300825, 0.013964662, 0.099372394, 0.02543059, + 0.06958324, 0.034257296, 0.0482646, 0.06267997, 0.052625068, + 0.12784666, 0.07077897, 0.025725935, 0.04165009, 0.07241905, + 0.018668644, -0.037377294, -0.06277783, -0.08833636, -0.040120605, + -0.011405586, -0.007808335, -0.010301386, -0.005102167, 0.027717464, + 0.05483423, 0.11449111, 0.11289652, 0.10939839, 0.13396506, + -0.08402166, -0.01901462, -0.044678304, -0.07720565, 0.014350063, + -0.11757958, -0.0652038, -0.08185733, -0.076754324, -0.092614375, + 0.10405491, 0.052960336, 0.035755895, 0.035839386, -0.012540553, + 0.036881298, 0.02913376, 0.03420159, 0.05448447, -0.054523353, + 0.02582715, 0.02327355, -0.011857179, -0.0011980024, -0.034641717, + -0.026125094, -0.17582615, -0.15923657, -0.27486774, -0.0006143371, + 0.0001771948, -8.470171e-05, 0.02651807, 0.045790765, 0.06956496}); + + lstm.SetInputToCellWeights( + {-0.04580283, -0.09549462, -0.032418985, -0.06454633, + -0.043528453, 0.043018587, -0.049152344, -0.12418144, + -0.078985475, -0.07596889, 0.019484362, -0.11434962, + -0.0074034138, -0.06314844, -0.092981495, 0.0062155537, + -0.025034338, -0.0028890965, 0.048929527, 0.06235075, + 0.10665918, -0.032036792, -0.08505916, -0.10843358, + -0.13002433, -0.036816437, -0.02130134, -0.016518239, + 0.0047691227, -0.0025825808, 0.066017866, 0.029991534, + -0.10652836, -0.1037554, -0.13056071, -0.03266643, + -0.033702414, -0.006473424, -0.04611692, 0.014419339, + -0.025174323, 0.0396852, 0.081777506, 0.06157468, + 0.10210095, -0.009658194, 0.046511717, 0.03603906, + 0.0069369148, 0.015960095, -0.06507666, 0.09551598, + 0.053568836, 0.06408714, 0.12835667, -0.008714329, + -0.20211966, -0.12093674, 0.029450472, 0.2849013, + -0.029227901, 0.1164364, -0.08560263, 0.09941786, + -0.036999565, -0.028842626, -0.0033637602, -0.017012902, + -0.09720865, -0.11193351, -0.029155117, -0.017936034, + -0.009768936, -0.04223324, -0.036159635, 0.06505112, + -0.021742892, -0.023377212, -0.07221364, -0.06430552, + 0.05453865, 0.091149814, 0.06387331, 0.007518393, + 0.055960953, 0.069779344, 0.046411168, 0.10509911, + 0.07463894, 0.0075130584, 0.012850982, 0.04555431, + 0.056955688, 0.06555285, 0.050801456, -0.009862683, + 0.00826772, -0.026555609, -0.0073611983, -0.0014897042}); + + lstm.SetInputToOutputWeights( + {-0.0998932, -0.07201956, -0.052803773, -0.15629593, -0.15001918, + -0.07650751, 0.02359855, -0.075155355, -0.08037709, -0.15093534, + 0.029517552, -0.04751393, 0.010350531, -0.02664851, -0.016839722, + -0.023121163, 0.0077019283, 0.012851257, -0.05040649, -0.0129761, + -0.021737747, -0.038305793, -0.06870586, -0.01481247, -0.001285394, + 0.10124236, 0.083122835, 0.053313006, -0.062235646, -0.075637154, + -0.027833903, 0.029774971, 0.1130802, 0.09218906, 0.09506135, + -0.086665764, -0.037162706, -0.038880914, -0.035832845, -0.014481564, + -0.09825003, -0.12048569, -0.097665586, -0.05287633, -0.0964047, + -0.11366429, 0.035777505, 0.13568819, 0.052451383, 0.050649304, + 0.05798951, -0.021852335, -0.099848844, 0.014740475, -0.078897946, + 0.04974699, 0.014160473, 0.06973932, 0.04964942, 0.033364646, + 0.08190124, 0.025535367, 0.050893165, 0.048514254, 0.06945813, + -0.078907564, -0.06707616, -0.11844508, -0.09986688, -0.07509403, + 0.06263226, 0.14925587, 0.20188436, 0.12098451, 0.14639415, + 0.0015017595, -0.014267382, -0.03417257, 0.012711468, 0.0028300495, + -0.024758482, -0.05098548, -0.0821182, 0.014225672, 0.021544158, + 0.08949725, 0.07505268, -0.0020780868, 0.04908258, 0.06476295, + -0.022907063, 0.027562456, 0.040185735, 0.019567577, -0.015598739, + -0.049097303, -0.017121866, -0.083368234, -0.02332002, -0.0840956}); + + lstm.SetInputGateBias( + {0.02234832, 0.14757581, 0.18176508, 0.10380666, 0.053110216, + -0.06928846, -0.13942584, -0.11816189, 0.19483899, 0.03652339, + -0.10250295, 0.036714908, -0.18426876, 0.036065217, 0.21810818, + 0.02383196, -0.043370757, 0.08690144, -0.04444982, 0.00030581196}); + + lstm.SetForgetGateBias({0.035185695, -0.042891346, -0.03032477, 0.23027696, + 0.11098921, 0.15378423, 0.09263801, 0.09790885, + 0.09508917, 0.061199076, 0.07665568, -0.015443159, + -0.03499149, 0.046190713, 0.08895977, 0.10899629, + 0.40694186, 0.06030037, 0.012413437, -0.06108739}); + + lstm.SetCellBias({-0.024379363, 0.0055531194, 0.23377132, 0.033463873, + -0.1483596, -0.10639995, -0.091433935, 0.058573797, + -0.06809782, -0.07889636, -0.043246906, -0.09829136, + -0.4279842, 0.034901652, 0.18797937, 0.0075234566, + 0.016178843, 0.1749513, 0.13975595, 0.92058027}); + + lstm.SetOutputGateBias( + {0.046159424, -0.0012809046, 0.03563469, 0.12648113, 0.027195795, + 0.35373217, -0.018957434, 0.008907322, -0.0762701, 0.12018895, + 0.04216877, 0.0022856654, 0.040952638, 0.3147856, 0.08225149, + -0.057416286, -0.14995944, -0.008040261, 0.13208859, 0.029760877}); + + lstm.SetRecurrentToInputWeights( + {-0.001374326, -0.078856036, 0.10672688, 0.029162422, + -0.11585556, 0.02557986, -0.13446963, -0.035785314, + -0.01244275, 0.025961924, -0.02337298, -0.044228926, + -0.055839065, -0.046598054, -0.010546039, -0.06900766, + 0.027239809, 0.022582639, -0.013296484, -0.05459212, + 0.08981, -0.045407712, 0.08682226, -0.06867011, + -0.14390695, -0.02916037, 0.000996957, 0.091420636, + 0.14283475, -0.07390571, -0.06402044, 0.062524505, + -0.093129106, 0.04860203, -0.08364217, -0.08119002, + 0.009352075, 0.22920375, 0.0016303885, 0.11583097, + -0.13732095, 0.012405723, -0.07551853, 0.06343048, + 0.12162708, -0.031923793, -0.014335606, 0.01790974, + -0.10650317, -0.0724401, 0.08554849, -0.05727212, + 0.06556731, -0.042729504, -0.043227166, 0.011683251, + -0.013082158, -0.029302018, -0.010899579, -0.062036745, + -0.022509435, -0.00964907, -0.01567329, 0.04260106, + -0.07787477, -0.11576462, 0.017356863, 0.048673786, + -0.017577527, -0.05527947, -0.082487635, -0.040137455, + -0.10820036, -0.04666372, 0.022746278, -0.07851417, + 0.01068115, 0.032956902, 0.022433773, 0.0026891115, + 0.08944216, -0.0685835, 0.010513544, 0.07228705, + 0.02032331, -0.059686817, -0.0005566496, -0.086984694, + 0.040414046, -0.1380399, 0.094208956, -0.05722982, + 0.012092817, -0.04989123, -0.086576, -0.003399834, + -0.04696032, -0.045747425, 0.10091314, 0.048676282, + -0.029037097, 0.031399418, -0.0040285117, 0.047237843, + 0.09504992, 0.041799378, -0.049185462, -0.031518843, + -0.10516937, 0.026374253, 0.10058866, -0.0033195973, + -0.041975245, 0.0073591834, 0.0033782164, -0.004325073, + -0.10167381, 0.042500053, -0.01447153, 0.06464186, + -0.017142897, 0.03312627, 0.009205989, 0.024138335, + -0.011337001, 0.035530265, -0.010912711, 0.0706555, + -0.005894094, 0.051841937, -0.1401738, -0.02351249, + 0.0365468, 0.07590991, 0.08838724, 0.021681072, + -0.10086113, 0.019608743, -0.06195883, 0.077335775, + 0.023646897, -0.095322326, 0.02233014, 0.09756986, + -0.048691444, -0.009579111, 0.07595467, 0.11480546, + -0.09801813, 0.019894179, 0.08502348, 0.004032281, + 0.037211012, 0.068537936, -0.048005626, -0.091520436, + -0.028379958, -0.01556313, 0.06554592, -0.045599163, + -0.01672207, -0.020169014, -0.011877351, -0.20212261, + 0.010889619, 0.0047078193, 0.038385306, 0.08540671, + -0.017140968, -0.0035865551, 0.016678626, 0.005633034, + 0.015963363, 0.00871737, 0.060130805, 0.028611384, + 0.10109069, -0.015060172, -0.07894427, 0.06401885, + 0.011584063, -0.024466386, 0.0047652307, -0.09041358, + 0.030737216, -0.0046374933, 0.14215417, -0.11823516, + 0.019899689, 0.006106124, -0.027092824, 0.0786356, + 0.05052217, -0.058925, -0.011402121, -0.024987547, + -0.0013661642, -0.06832946, -0.015667673, -0.1083353, + -0.00096863037, -0.06988685, -0.053350925, -0.027275559, + -0.033664223, -0.07978348, -0.025200296, -0.017207067, + -0.058403496, -0.055697463, 0.005798788, 0.12965427, + -0.062582195, 0.0013350133, -0.10482091, 0.0379771, + 0.072521195, -0.0029455067, -0.13797039, -0.03628521, + 0.013806405, -0.017858358, -0.01008298, -0.07700066, + -0.017081132, 0.019358726, 0.0027079724, 0.004635139, + 0.062634714, -0.02338735, -0.039547626, -0.02050681, + 0.03385117, -0.083611414, 0.002862572, -0.09421313, + 0.058618143, -0.08598433, 0.00972939, 0.023867095, + -0.053934585, -0.023203006, 0.07452513, -0.048767887, + -0.07314807, -0.056307215, -0.10433547, -0.06440842, + 0.04328182, 0.04389765, -0.020006588, -0.09076438, + -0.11652589, -0.021705797, 0.03345259, -0.010329105, + -0.025767034, 0.013057034, -0.07316461, -0.10145612, + 0.06358255, 0.18531723, 0.07759293, 0.12006465, + 0.1305557, 0.058638252, -0.03393652, 0.09622831, + -0.16253184, -2.4580743e-06, 0.079869635, -0.070196845, + -0.005644518, 0.06857898, -0.12598175, -0.035084512, + 0.03156317, -0.12794146, -0.031963028, 0.04692781, + 0.030070418, 0.0071660685, -0.095516115, -0.004643372, + 0.040170413, -0.062104587, -0.0037324072, 0.0554317, + 0.08184801, -0.019164372, 0.06791302, 0.034257166, + -0.10307039, 0.021943003, 0.046745934, 0.0790918, + -0.0265588, -0.007824208, 0.042546265, -0.00977924, + -0.0002440307, -0.017384544, -0.017990116, 0.12252321, + -0.014512694, -0.08251313, 0.08861942, 0.13589665, + 0.026351685, 0.012641483, 0.07466548, 0.044301085, + -0.045414884, -0.051112458, 0.03444247, -0.08502782, + -0.04106223, -0.028126027, 0.028473156, 0.10467447}); + + lstm.SetRecurrentToForgetWeights( + {-0.057784554, -0.026057621, -0.068447545, -0.022581743, + 0.14811787, 0.10826372, 0.09471067, 0.03987225, + -0.0039523416, 0.00030638507, 0.053185795, 0.10572994, + 0.08414449, -0.022036452, -0.00066928595, -0.09203576, + 0.032950465, -0.10985798, -0.023809856, 0.0021431844, + -0.02196096, -0.00326074, 0.00058621005, -0.074678116, + -0.06193199, 0.055729095, 0.03736828, 0.020123724, + 0.061878487, -0.04729229, 0.034919553, -0.07585433, + -0.04421272, -0.044019096, 0.085488975, 0.04058006, + -0.06890133, -0.030951202, -0.024628663, -0.07672815, + 0.034293607, 0.08556707, -0.05293577, -0.033561368, + -0.04899627, 0.0241671, 0.015736353, -0.095442444, + -0.029564252, 0.016493602, -0.035026584, 0.022337519, + -0.026871363, 0.004780428, 0.0077918363, -0.03601621, + 0.016435321, -0.03263031, -0.09543275, -0.047392778, + 0.013454138, 0.028934088, 0.01685226, -0.086110644, + -0.046250615, -0.01847454, 0.047608484, 0.07339695, + 0.034546845, -0.04881143, 0.009128804, -0.08802852, + 0.03761666, 0.008096139, -0.014454086, 0.014361001, + -0.023502491, -0.0011840804, -0.07607001, 0.001856849, + -0.06509276, -0.006021153, -0.08570962, -0.1451793, + 0.060212336, 0.055259194, 0.06974018, 0.049454916, + -0.027794661, -0.08077226, -0.016179763, 0.1169753, + 0.17213494, -0.0056326236, -0.053934924, -0.0124349, + -0.11520337, 0.05409887, 0.088759385, 0.0019655675, + 0.0042065294, 0.03881498, 0.019844765, 0.041858196, + -0.05695512, 0.047233116, 0.038937137, -0.06542224, + 0.014429736, -0.09719407, 0.13908425, -0.05379757, + 0.012321099, 0.082840554, -0.029899208, 0.044217527, + 0.059855383, 0.07711018, -0.045319796, 0.0948846, + -0.011724666, -0.0033288454, -0.033542685, -0.04764985, + -0.13873616, 0.040668588, 0.034832682, -0.015319203, + -0.018715994, 0.046002675, 0.0599172, -0.043107376, + 0.0294216, -0.002314414, -0.022424703, 0.0030315618, + 0.0014641669, 0.0029166266, -0.11878115, 0.013738511, + 0.12375372, -0.0006038222, 0.029104086, 0.087442465, + 0.052958444, 0.07558703, 0.04817258, 0.044462286, + -0.015213451, -0.08783778, -0.0561384, -0.003008196, + 0.047060397, -0.002058388, 0.03429439, -0.018839769, + 0.024734668, 0.024614193, -0.042046934, 0.09597743, + -0.0043254104, 0.04320769, 0.0064070094, -0.0019131786, + -0.02558259, -0.022822596, -0.023273505, -0.02464396, + -0.10991725, -0.006240552, 0.0074488563, 0.024044557, + 0.04383914, -0.046476185, 0.028658995, 0.060410924, + 0.050786525, 0.009452605, -0.0073054377, -0.024810238, + 0.0052906186, 0.0066939713, -0.0020913032, 0.014515517, + 0.015898481, 0.021362653, -0.030262267, 0.016587038, + -0.011442813, 0.041154444, -0.007631438, -0.03423484, + -0.010977775, 0.036152758, 0.0066366293, 0.11915515, + 0.02318443, -0.041350313, 0.021485701, -0.10906167, + -0.028218046, -0.00954771, 0.020531068, -0.11995105, + -0.03672871, 0.024019798, 0.014255957, -0.05221243, + -0.00661567, -0.04630967, 0.033188973, 0.10107534, + -0.014027541, 0.030796422, -0.10270911, -0.035999842, + 0.15443139, 0.07684145, 0.036571592, -0.035900835, + -0.0034699554, 0.06209149, 0.015920248, -0.031122351, + -0.03858649, 0.01849943, 0.13872518, 0.01503974, + 0.069941424, -0.06948533, -0.0088794185, 0.061282158, + -0.047401894, 0.03100163, -0.041533746, -0.10430945, + 0.044574402, -0.01425562, -0.024290353, 0.034563623, + 0.05866852, 0.023947537, -0.09445152, 0.035450947, + 0.02247216, -0.0042998926, 0.061146557, -0.10250651, + 0.020881841, -0.06747029, 0.10062043, -0.0023941975, + 0.03532124, -0.016341697, 0.09685456, -0.016764693, + 0.051808182, 0.05875331, -0.04536488, 0.001626336, + -0.028892258, -0.01048663, -0.009793449, -0.017093895, + 0.010987891, 0.02357273, -0.00010856845, 0.0099760275, + -0.001845119, -0.03551521, 0.0018358806, 0.05763657, + -0.01769146, 0.040995963, 0.02235177, -0.060430344, + 0.11475477, -0.023854522, 0.10071741, 0.0686208, + -0.014250481, 0.034261297, 0.047418304, 0.08562733, + -0.030519066, 0.0060542435, 0.014653856, -0.038836084, + 0.04096551, 0.032249358, -0.08355519, -0.026823482, + 0.056386515, -0.010401743, -0.028396193, 0.08507674, + 0.014410365, 0.020995233, 0.17040324, 0.11511526, + 0.02459721, 0.0066619175, 0.025853224, -0.023133837, + -0.081302024, 0.017264642, -0.009585969, 0.09491168, + -0.051313367, 0.054532815, -0.014298593, 0.10657464, + 0.007076659, 0.10964551, 0.0409152, 0.008275321, + -0.07283536, 0.07937492, 0.04192024, -0.1075027}); + + lstm.SetRecurrentToCellWeights( + {-0.037322544, 0.018592842, 0.0056175636, -0.06253426, + 0.055647098, -0.05713207, -0.05626563, 0.005559383, + 0.03375411, -0.025757805, -0.088049285, 0.06017052, + -0.06570978, 0.007384076, 0.035123326, -0.07920549, + 0.053676967, 0.044480428, -0.07663568, 0.0071805613, + 0.08089997, 0.05143358, 0.038261272, 0.03339287, + -0.027673481, 0.044746667, 0.028349208, 0.020090483, + -0.019443132, -0.030755889, -0.0040000007, 0.04465846, + -0.021585021, 0.0031670958, 0.0053199246, -0.056117613, + -0.10893326, 0.076739706, -0.08509834, -0.027997585, + 0.037871376, 0.01449768, -0.09002357, -0.06111149, + -0.046195522, 0.0422062, -0.005683705, -0.1253618, + -0.012925729, -0.04890792, 0.06985068, 0.037654128, + 0.03398274, -0.004781977, 0.007032333, -0.031787455, + 0.010868644, -0.031489216, 0.09525667, 0.013939797, + 0.0058680447, 0.0167067, 0.02668468, -0.04797466, + -0.048885044, -0.12722108, 0.035304096, 0.06554885, + 0.00972396, -0.039238118, -0.05159735, -0.11329045, + 0.1613692, -0.03750952, 0.06529313, -0.071974665, + -0.11769596, 0.015524369, -0.0013754242, -0.12446318, + 0.02786344, -0.014179351, 0.005264273, 0.14376344, + 0.015983658, 0.03406988, -0.06939408, 0.040699873, + 0.02111075, 0.09669095, 0.041345075, -0.08316494, + -0.07684199, -0.045768797, 0.032298047, -0.041805092, + 0.0119405, 0.0061010392, 0.12652606, 0.0064572375, + -0.024950314, 0.11574242, 0.04508852, -0.04335324, + 0.06760663, -0.027437469, 0.07216407, 0.06977076, + -0.05438599, 0.034033038, -0.028602652, 0.05346137, + 0.043184172, -0.037189785, 0.10420091, 0.00882477, + -0.054019816, -0.074273005, -0.030617684, -0.0028467078, + 0.024302477, -0.0038869337, 0.005332455, 0.0013399826, + 0.04361412, -0.007001822, 0.09631092, -0.06702025, + -0.042049985, -0.035070654, -0.04103342, -0.10273396, + 0.0544271, 0.037184782, -0.13150354, -0.0058036847, + -0.008264958, 0.042035464, 0.05891794, 0.029673764, + 0.0063542654, 0.044788733, 0.054816857, 0.062257513, + -0.00093483756, 0.048938446, -0.004952862, -0.007730018, + -0.04043371, -0.017094059, 0.07229206, -0.023670016, + -0.052195564, -0.025616996, -0.01520939, 0.045104615, + -0.007376126, 0.003533447, 0.006570588, 0.056037236, + 0.12436656, 0.051817212, 0.028532185, -0.08686856, + 0.11868599, 0.07663395, -0.07323171, 0.03463402, + -0.050708205, -0.04458982, -0.11590894, 0.021273347, + 0.1251325, -0.15313013, -0.12224372, 0.17228661, + 0.023029093, 0.086124025, 0.006445803, -0.03496501, + 0.028332196, 0.04449512, -0.042436164, -0.026587414, + -0.006041347, -0.09292539, -0.05678812, 0.03897832, + 0.09465633, 0.008115513, -0.02171956, 0.08304309, + 0.071401566, 0.019622514, 0.032163795, -0.004167056, + 0.02295182, 0.030739572, 0.056506045, 0.004612461, + 0.06524936, 0.059999723, 0.046395954, -0.0045512207, + -0.1335546, -0.030136576, 0.11584653, -0.014678886, + 0.0020118146, -0.09688814, -0.0790206, 0.039770417, + -0.0329582, 0.07922767, 0.029322514, 0.026405897, + 0.04207835, -0.07073373, 0.063781224, 0.0859677, + -0.10925287, -0.07011058, 0.048005477, 0.03438226, + -0.09606514, -0.006669445, -0.043381985, 0.04240257, + -0.06955775, -0.06769346, 0.043903265, -0.026784198, + -0.017840602, 0.024307009, -0.040079936, -0.019946516, + 0.045318738, -0.12233574, 0.026170589, 0.0074471775, + 0.15978073, 0.10185836, 0.10298046, -0.015476589, + -0.039390966, -0.072174534, 0.0739445, -0.1211869, + -0.0347889, -0.07943156, 0.014809798, -0.12412325, + -0.0030663363, 0.039695457, 0.0647603, -0.08291318, + -0.018529687, -0.004423833, 0.0037507233, 0.084633216, + -0.01514876, -0.056505352, -0.012800942, -0.06994386, + 0.012962922, -0.031234352, 0.07029052, 0.016418684, + 0.03618972, 0.055686004, -0.08663945, -0.017404709, + -0.054761406, 0.029065743, 0.052404847, 0.020238016, + 0.0048197987, -0.0214882, 0.07078733, 0.013016777, + 0.06262858, 0.009184685, 0.020785125, -0.043904778, + -0.0270329, -0.03299152, -0.060088247, -0.015162964, + -0.001828936, 0.12642565, -0.056757294, 0.013586685, + 0.09232601, -0.035886683, 0.06000002, 0.05229691, + -0.052580316, -0.082029596, -0.010794592, 0.012947712, + -0.036429964, -0.085508935, -0.13127148, -0.017744139, + 0.031502828, 0.036232427, -0.031581745, 0.023051167, + -0.05325106, -0.03421577, 0.028793324, -0.034633752, + -0.009881397, -0.043551125, -0.018609839, 0.0019097115, + -0.008799762, 0.056595087, 0.0022273948, 0.055752404}); + + lstm.SetRecurrentToOutputWeights({ + 0.025825322, -0.05813119, 0.09495884, -0.045984812, -0.01255415, + -0.0026479573, -0.08196161, -0.054914974, -0.0046604523, -0.029587349, + -0.044576716, -0.07480124, -0.082868785, 0.023254942, 0.027502948, + -0.0039728214, -0.08683098, -0.08116779, -0.014675607, -0.037924774, + -0.023314456, -0.007401714, -0.09255757, 0.029460307, -0.08829125, + -0.005139627, -0.08989442, -0.0555066, 0.13596267, -0.025062224, + -0.048351806, -0.03850004, 0.07266485, -0.022414139, 0.05940088, + 0.075114764, 0.09597592, -0.010211725, -0.0049794707, -0.011523867, + -0.025980417, 0.072999895, 0.11091378, -0.081685916, 0.014416728, + 0.043229222, 0.034178585, -0.07530371, 0.035837382, -0.085607, + -0.007721233, -0.03287832, -0.043848954, -0.06404588, -0.06632928, + -0.073643476, 0.008214239, -0.045984086, 0.039764922, 0.03474462, + 0.060612556, -0.080590084, 0.049127717, 0.04151091, -0.030063879, + 0.008801774, -0.023021035, -0.019558564, 0.05158114, -0.010947698, + -0.011825728, 0.0075720972, 0.0699727, -0.0039981045, 0.069350146, + 0.08799282, 0.016156472, 0.035502106, 0.11695009, 0.006217345, + 0.13392477, -0.037875112, 0.025745004, 0.08940699, -0.00924166, + 0.0046702605, -0.036598757, -0.08811812, 0.10522024, -0.032441203, + 0.008176899, -0.04454919, 0.07058152, 0.0067963637, 0.039206743, + 0.03259838, 0.03725492, -0.09515802, 0.013326398, -0.052055415, + -0.025676316, 0.03198509, -0.015951829, -0.058556724, 0.036879618, + 0.043357447, 0.028362012, -0.05908629, 0.0059240665, -0.04995891, + -0.019187413, 0.0276265, -0.01628143, 0.0025863599, 0.08800015, + 0.035250366, -0.022165963, -0.07328642, -0.009415526, -0.07455109, + 0.11690406, 0.0363299, 0.07411125, 0.042103454, -0.009660886, + 0.019076364, 0.018299393, -0.046004917, 0.08891175, 0.0431396, + -0.026327137, -0.051502608, 0.08979574, -0.051670972, 0.04940282, + -0.07491107, -0.021240504, 0.022596184, -0.034280192, 0.060163025, + -0.058211457, -0.051837247, -0.01349775, -0.04639988, -0.035936575, + -0.011681591, 0.064818054, 0.0073146066, -0.021745546, -0.043124277, + -0.06471268, -0.07053354, -0.029321948, -0.05330136, 0.016933719, + -0.053782392, 0.13747959, -0.1361751, -0.11569455, 0.0033329215, + 0.05693899, -0.053219706, 0.063698, 0.07977434, -0.07924483, + 0.06936997, 0.0034815092, -0.007305279, -0.037325785, -0.07251102, + -0.033633437, -0.08677009, 0.091591336, -0.14165086, 0.021752775, + 0.019683983, 0.0011612234, -0.058154266, 0.049996935, 0.0288841, + -0.0024567875, -0.14345716, 0.010955264, -0.10234828, 0.1183656, + -0.0010731248, -0.023590032, -0.072285876, -0.0724771, -0.026382286, + -0.0014920527, 0.042667855, 0.0018776858, 0.02986552, 0.009814309, + 0.0733756, 0.12289186, 0.018043943, -0.0458958, 0.049412545, + 0.033632483, 0.05495232, 0.036686596, -0.013781798, -0.010036754, + 0.02576849, -0.08307328, 0.010112348, 0.042521734, -0.05869831, + -0.071689695, 0.03876447, -0.13275425, -0.0352966, -0.023077697, + 0.10285965, 0.084736146, 0.15568255, -0.00040734606, 0.027835453, + -0.10292561, -0.032401145, 0.10053256, -0.026142767, -0.08271222, + -0.0030240538, -0.016368777, 0.1070414, 0.042672627, 0.013456989, + -0.0437609, -0.022309763, 0.11576483, 0.04108048, 0.061026827, + -0.0190714, -0.0869359, 0.037901703, 0.0610107, 0.07202949, + 0.01675338, 0.086139716, -0.08795751, -0.014898893, -0.023771819, + -0.01965048, 0.007955471, -0.043740474, 0.03346837, -0.10549954, + 0.090567775, 0.042013682, -0.03176985, 0.12569028, -0.02421228, + -0.029526481, 0.023851605, 0.031539805, 0.05292009, -0.02344001, + -0.07811758, -0.08834428, 0.10094801, 0.16594367, -0.06861939, + -0.021256343, -0.041093912, -0.06669611, 0.035498552, 0.021757556, + -0.09302526, -0.015403468, -0.06614931, -0.051798206, -0.013874718, + 0.03630673, 0.010412845, -0.08077351, 0.046185967, 0.0035662893, + 0.03541868, -0.094149634, -0.034814864, 0.003128424, -0.020674974, + -0.03944324, -0.008110165, -0.11113267, 0.08484226, 0.043586485, + 0.040582247, 0.0968012, -0.065249965, -0.028036479, 0.0050708856, + 0.0017462453, 0.0326779, 0.041296225, 0.09164146, -0.047743853, + -0.015952192, -0.034451712, 0.084197424, -0.05347844, -0.11768019, + 0.085926116, -0.08251791, -0.045081906, 0.0948852, 0.068401024, + 0.024856757, 0.06978981, -0.057309967, -0.012775832, -0.0032452994, + 0.01977615, -0.041040014, -0.024264973, 0.063464895, 0.05431621, + }); + + lstm.SetCellToInputWeights( + {0.040369894, 0.030746894, 0.24704495, 0.018586371, -0.037586458, + -0.15312155, -0.11812848, -0.11465643, 0.20259799, 0.11418174, + -0.10116027, -0.011334949, 0.12411352, -0.076769054, -0.052169047, + 0.21198851, -0.38871562, -0.09061183, -0.09683246, -0.21929175}); + + lstm.SetCellToForgetWeights( + {-0.01998659, -0.15568835, -0.24248174, -0.012770197, 0.041331276, + -0.072311886, -0.052123554, -0.0066330447, -0.043891653, 0.036225766, + -0.047248036, 0.021479502, 0.033189066, 0.11952997, -0.020432774, + 0.64658105, -0.06650122, -0.03467612, 0.095340036, 0.23647355}); + + lstm.SetCellToOutputWeights( + {0.08286371, -0.08261836, -0.51210177, 0.002913762, 0.17764764, + -0.5495371, -0.08460716, -0.24552552, 0.030037103, 0.04123544, + -0.11940523, 0.007358328, 0.1890978, 0.4833202, -0.34441817, + 0.36312827, -0.26375428, 0.1457655, -0.19724406, 0.15548733}); + + lstm.SetProjectionWeights( + {-0.009802181, 0.09401916, 0.0717386, -0.13895074, 0.09641832, + 0.060420845, 0.08539281, 0.054285463, 0.061395317, 0.034448683, + -0.042991187, 0.019801661, -0.16840284, -0.015726732, -0.23041931, + -0.024478018, -0.10959692, -0.013875541, 0.18600968, -0.061274476, + 0.0138165, -0.08160894, -0.07661644, 0.032372914, 0.16169067, + 0.22465782, -0.03993472, -0.004017731, 0.08633481, -0.28869787, + 0.08682067, 0.17240396, 0.014975425, 0.056431185, 0.031037588, + 0.16702051, 0.0077946745, 0.15140012, 0.29405436, 0.120285, + -0.188994, -0.027265169, 0.043389652, -0.022061434, 0.014777949, + -0.20203483, 0.094781205, 0.19100232, 0.13987629, -0.036132768, + -0.06426278, -0.05108664, 0.13221376, 0.009441198, -0.16715929, + 0.15859416, -0.040437475, 0.050779544, -0.022187516, 0.012166504, + 0.027685808, -0.07675938, -0.0055694645, -0.09444123, 0.0046453946, + 0.050794356, 0.10770313, -0.20790008, -0.07149004, -0.11425117, + 0.008225835, -0.035802525, 0.14374903, 0.15262283, 0.048710253, + 0.1847461, -0.007487823, 0.11000021, -0.09542012, 0.22619456, + -0.029149994, 0.08527916, 0.009043713, 0.0042746216, 0.016261552, + 0.022461696, 0.12689082, -0.043589946, -0.12035478, -0.08361797, + -0.050666027, -0.1248618, -0.1275799, -0.071875185, 0.07377272, + 0.09944291, -0.18897448, -0.1593054, -0.06526116, -0.040107165, + -0.004618631, -0.067624845, -0.007576253, 0.10727444, 0.041546922, + -0.20424393, 0.06907816, 0.050412357, 0.00724631, 0.039827548, + 0.12449835, 0.10747581, 0.13708383, 0.09134148, -0.12617786, + -0.06428341, 0.09956831, 0.1208086, -0.14676677, -0.0727722, + 0.1126304, 0.010139365, 0.015571211, -0.038128063, 0.022913318, + -0.042050496, 0.16842307, -0.060597885, 0.10531834, -0.06411776, + -0.07451711, -0.03410368, -0.13393489, 0.06534304, 0.003620307, + 0.04490757, 0.05970546, 0.05197996, 0.02839995, 0.10434969, + -0.013699693, -0.028353551, -0.07260381, 0.047201227, -0.024575593, + -0.036445823, 0.07155557, 0.009672501, -0.02328883, 0.009533515, + -0.03606021, -0.07421458, -0.028082801, -0.2678904, -0.13221288, + 0.18419984, -0.13012612, -0.014588381, -0.035059117, -0.04824723, + 0.07830115, -0.056184657, 0.03277091, 0.025466874, 0.14494097, + -0.12522776, -0.098633975, -0.10766018, -0.08317623, 0.08594209, + 0.07749552, 0.039474737, 0.1776665, -0.07409566, -0.0477268, + 0.29323658, 0.10801441, 0.1154011, 0.013952499, 0.10739139, + 0.10708251, -0.051456142, 0.0074137426, -0.10430189, 0.10034707, + 0.045594677, 0.0635285, -0.0715442, -0.089667566, -0.10811871, + 0.00026344223, 0.08298446, -0.009525053, 0.006585689, -0.24567553, + -0.09450807, 0.09648481, 0.026996298, -0.06419476, -0.04752702, + -0.11063944, -0.23441927, -0.17608605, -0.052156363, 0.067035615, + 0.19271925, -0.0032889997, -0.043264326, 0.09663576, -0.057112187, + -0.10100678, 0.0628376, 0.04447668, 0.017961001, -0.10094388, + -0.10190601, 0.18335468, 0.10494553, -0.052095775, -0.0026118709, + 0.10539724, -0.04383912, -0.042349473, 0.08438151, -0.1947263, + 0.02251204, 0.11216432, -0.10307853, 0.17351969, -0.039091777, + 0.08066188, -0.00561982, 0.12633002, 0.11335965, -0.0088127935, + -0.019777594, 0.06864014, -0.059751723, 0.016233567, -0.06894641, + -0.28651384, -0.004228674, 0.019708522, -0.16305895, -0.07468996, + -0.0855457, 0.099339016, -0.07580735, -0.13775392, 0.08434318, + 0.08330512, -0.12131499, 0.031935584, 0.09180414, -0.08876437, + -0.08049874, 0.008753825, 0.03498998, 0.030215185, 0.03907079, + 0.089751154, 0.029194152, -0.03337423, -0.019092513, 0.04331237, + 0.04299654, -0.036394123, -0.12915532, 0.09793732, 0.07512415, + -0.11319543, -0.032502122, 0.15661901, 0.07671967, -0.005491124, + -0.19379048, -0.218606, 0.21448623, 0.017840758, 0.1416943, + -0.07051762, 0.19488361, 0.02664691, -0.18104725, -0.09334311, + 0.15026465, -0.15493552, -0.057762887, -0.11604192, -0.262013, + -0.01391798, 0.012185008, 0.11156489, -0.07483202, 0.06693364, + -0.26151478, 0.046425626, 0.036540434, -0.16435726, 0.17338543, + -0.21401681, -0.11385144, -0.08283257, -0.069031075, 0.030635102, + 0.010969227, 0.11109743, 0.010919218, 0.027526086, 0.13519906, + 0.01891392, -0.046839405, -0.040167913, 0.017953383, -0.09700955, + 0.0061885654, -0.07000971, 0.026893595, -0.038844477, 0.14543656}); + + static float lstm_input[][20] = { + {// Batch0: 4 (input_sequence_size) * 5 (n_input) + 0.787926, 0.151646, 0.071352, 0.118426, 0.458058, 0.596268, 0.998386, + 0.568695, 0.864524, 0.571277, 0.073204, 0.296072, 0.743333, 0.069199, + 0.045348, 0.867394, 0.291279, 0.013714, 0.482521, 0.626339}, + + {// Batch1: 4 (input_sequence_size) * 5 (n_input) + 0.295743, 0.544053, 0.690064, 0.858138, 0.497181, 0.642421, 0.524260, + 0.134799, 0.003639, 0.162482, 0.640394, 0.930399, 0.050782, 0.432485, + 0.988078, 0.082922, 0.563329, 0.865614, 0.333232, 0.259916}}; + + static float lstm_golden_output[][64] = { + {// Batch0: 4 (input_sequence_size) * 16 (n_output) + -0.00396806, 0.029352, -0.00279226, 0.0159977, -0.00835576, + -0.0211779, 0.0283512, -0.0114597, 0.00907307, -0.0244004, + -0.0152191, -0.0259063, 0.00914318, 0.00415118, 0.017147, + 0.0134203, -0.0166936, 0.0381209, 0.000889694, 0.0143363, + -0.0328911, -0.0234288, 0.0333051, -0.012229, 0.0110322, + -0.0457725, -0.000832209, -0.0202817, 0.0327257, 0.0121308, + 0.0155969, 0.0312091, -0.0213783, 0.0350169, 0.000324794, + 0.0276012, -0.0263374, -0.0371449, 0.0446149, -0.0205474, + 0.0103729, -0.0576349, -0.0150052, -0.0292043, 0.0376827, + 0.0136115, 0.0243435, 0.0354492, -0.0189322, 0.0464512, + -0.00251373, 0.0225745, -0.0308346, -0.0317124, 0.0460407, + -0.0189395, 0.0149363, -0.0530162, -0.0150767, -0.0340193, + 0.0286833, 0.00824207, 0.0264887, 0.0305169}, + {// Batch1: 4 (input_sequence_size) * 16 (n_output) + -0.013869, 0.0287268, -0.00334693, 0.00733398, -0.0287926, + -0.0186926, 0.0193662, -0.0115437, 0.00422612, -0.0345232, + 0.00223253, -0.00957321, 0.0210624, 0.013331, 0.0150954, + 0.02168, -0.0141913, 0.0322082, 0.00227024, 0.0260507, + -0.0188721, -0.0296489, 0.0399134, -0.0160509, 0.0116039, + -0.0447318, -0.0150515, -0.0277406, 0.0316596, 0.0118233, + 0.0214762, 0.0293641, -0.0204549, 0.0450315, -0.00117378, + 0.0167673, -0.0375007, -0.0238314, 0.038784, -0.0174034, + 0.0131743, -0.0506589, -0.0048447, -0.0240239, 0.0325789, + 0.00790065, 0.0220157, 0.0333314, -0.0264787, 0.0387855, + -0.000764675, 0.0217599, -0.037537, -0.0335206, 0.0431679, + -0.0211424, 0.010203, -0.062785, -0.00832363, -0.025181, + 0.0412031, 0.0118723, 0.0239643, 0.0394009}}; + + // Resetting cell_state and output_state + lstm.ResetCellState(); + lstm.ResetOutputState(); + + const int input_sequence_size = + sizeof(lstm_input[0]) / sizeof(float) / (lstm.num_inputs()); + for (int i = 0; i < input_sequence_size; i++) { + float* batch0_start = lstm_input[0] + i * lstm.num_inputs(); + float* batch0_end = batch0_start + lstm.num_inputs(); + + lstm.SetInput(0, batch0_start, batch0_end); + + float* batch1_start = lstm_input[1] + i * lstm.num_inputs(); + float* batch1_end = batch1_start + lstm.num_inputs(); + lstm.SetInput(lstm.num_inputs(), batch1_start, batch1_end); + + lstm.Invoke(); + + float* golden_start_batch0 = lstm_golden_output[0] + i * lstm.num_outputs(); + float* golden_end_batch0 = golden_start_batch0 + lstm.num_outputs(); + float* golden_start_batch1 = lstm_golden_output[1] + i * lstm.num_outputs(); + float* golden_end_batch1 = golden_start_batch1 + lstm.num_outputs(); + std::vector expected; + expected.insert(expected.end(), golden_start_batch0, golden_end_batch0); + expected.insert(expected.end(), golden_start_batch1, golden_end_batch1); + EXPECT_THAT(lstm.GetOutput(), ElementsAreArray(ArrayFloatNear(expected))); + } +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/mul.cc b/tensorflow/contrib/lite/kernels/mul.cc new file mode 100644 index 0000000000..81c73f2523 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/mul.cc @@ -0,0 +1,167 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace mul { + +// This file has three implementation of Mul. +enum KernelType { + kReference, + kGenericOptimized, // Neon-free + kNeonOptimized, +}; + +constexpr int kInputTensor1 = 0; +constexpr int kInputTensor2 = 1; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); + TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TF_LITE_ENSURE_EQ(context, NumDimensions(input1), NumDimensions(input2)); + for (int i = 0; i < NumDimensions(input1); ++i) { + TF_LITE_ENSURE_EQ(context, SizeOfDimension(input1, i), + SizeOfDimension(input2, i)); + } + + TF_LITE_ENSURE_EQ(context, input1->type, output->type); + TF_LITE_ENSURE_EQ(context, input2->type, output->type); + + TfLiteIntArray* output_size = TfLiteIntArrayCopy(input1->dims); + return context->ResizeTensor(context, output, output_size); +} + +template +void EvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteMulParams* params, TfLiteTensor* input1, + TfLiteTensor* input2, TfLiteTensor* output) { + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(params->activation, &output_activation_min, + &output_activation_max); +#define TF_LITE_MUL(type) \ + type::Mul(GetTensorData(input1), GetTensorDims(input1), \ + GetTensorData(input2), GetTensorDims(input2), \ + output_activation_min, output_activation_max, \ + GetTensorData(output), GetTensorDims(output)) + if (kernel_type == kReference) { + TF_LITE_MUL(reference_ops); + } else { + TF_LITE_MUL(optimized_ops); + } +#undef TF_LITE_MUL +} + +template +void EvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteMulParams* params, TfLiteTensor* input1, + TfLiteTensor* input2, TfLiteTensor* output) { + auto input1_offset = -input1->params.zero_point; + auto input2_offset = -input2->params.zero_point; + auto output_offset = output->params.zero_point; + + int32_t output_multiplier; + int output_shift; + + double real_multiplier = + input1->params.scale * input2->params.scale / output->params.scale; + QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, + &output_shift); + + int32 output_activation_min, output_activation_max; + CalculateActivationRangeUint8(params->activation, output, + &output_activation_min, &output_activation_max); + +#define TF_LITE_MUL(type) \ + type::BroadcastMul(GetTensorData(input1), GetTensorDims(input1), \ + input1_offset, GetTensorData(input2), \ + GetTensorDims(input2), input2_offset, output_offset, \ + output_multiplier, output_shift, output_activation_min, \ + output_activation_max, GetTensorData(output), \ + GetTensorDims(output)); + if (kernel_type == kReference) { + TF_LITE_MUL(reference_ops); + } else { + TF_LITE_MUL(optimized_ops); + } +#undef TF_LITE_MUL +} + +template +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + + TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); + TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + if (output->type == kTfLiteFloat32) { + EvalFloat(context, node, params, input1, input2, output); + } else if (output->type == kTfLiteUInt8) { + EvalQuantized(context, node, params, input1, input2, output); + } else { + context->ReportError(context, + "Mul only supports FLOAT32 and quantized UINT8 now."); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace mul + +TfLiteRegistration* Register_MUL_REF() { + static TfLiteRegistration r = {nullptr, nullptr, mul::Prepare, + mul::Eval}; + return &r; +} + +TfLiteRegistration* Register_MUL_GENERIC_OPT() { + static TfLiteRegistration r = {nullptr, nullptr, mul::Prepare, + mul::Eval}; + return &r; +} + +TfLiteRegistration* Register_MUL_NEON_OPT() { + static TfLiteRegistration r = {nullptr, nullptr, mul::Prepare, + mul::Eval}; + return &r; +} + +TfLiteRegistration* Register_MUL() { +#ifdef USE_NEON + return Register_MUL_NEON_OPT(); +#else + return Register_MUL_GENERIC_OPT(); +#endif +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/mul_test.cc b/tensorflow/contrib/lite/kernels/mul_test.cc new file mode 100644 index 0000000000..4b858e1f39 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/mul_test.cc @@ -0,0 +1,127 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class BaseMulOpModel : public SingleOpModel { + public: + BaseMulOpModel(TensorData input, TensorData output, + ActivationFunctionType activation_type) { + input1_ = AddInput(input); + input2_ = AddInput(input); + output_ = AddOutput(output); + SetBuiltinOp(BuiltinOperator_MUL, BuiltinOptions_MulOptions, + CreateMulOptions(builder_, activation_type).Union()); + BuildInterpreter({GetShape(input1_), GetShape(input2_)}); + } + + int input1() { return input1_; } + int input2() { return input2_; } + + protected: + int input1_; + int input2_; + int output_; +}; + +class FloatMulOpModel : public BaseMulOpModel { + public: + using BaseMulOpModel::BaseMulOpModel; + + std::vector GetOutput() { return ExtractVector(output_); } +}; + +// For quantized Mul, the error shouldn't exceed (2*step + step^2). +// The param min=-1.0 & max=1.0 is used in the following tests. +// The tolerance value is ~0.0157. +const float kQuantizedStep = 2.0 / 255.0; +const float kQuantizedTolerance = + 2.0 * kQuantizedStep + kQuantizedStep * kQuantizedStep; + +class QuantizedMulOpModel : public BaseMulOpModel { + public: + using BaseMulOpModel::BaseMulOpModel; + + std::vector GetDequantizedOutput() { + return Dequantize(ExtractVector(output_), + GetScale(output_), GetZeroPoint(output_)); + } +}; + +TEST(FloatMulOpTest, NoActivation) { + FloatMulOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 0.7, 0.8}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.3, 0.5}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-0.2, 0.04, 0.21, 0.4}))); +} + +TEST(FloatMulOpTest, ActivationRELU1) { + FloatMulOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_RELU1); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 0.7, 0.8}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.3, 5}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-0.2, 0.04, 0.21, 1.0}))); +} + +TEST(FloatMulOpTest, VariousInputShapes) { + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + FloatMulOpModel m({TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.3, 0.5, 1.1, 0.1}); + m.Invoke(); + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-0.2, 0.04, 0.21, 0.4, 1.21, 0.2}))) + << "With shape number " << i; + } +} + +TEST(QuantizedMulOpTest, NoActivation) { + QuantizedMulOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, + {TensorType_UINT8, {}, -1.0, 1.0}, + ActivationFunctionType_NONE); + m.QuantizeAndPopulate(m.input1(), {-0.8, 0.2, 0.9, 0.7}); + m.QuantizeAndPopulate(m.input2(), {0.6, 0.4, 0.9, 0.8}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({-0.48, 0.08, 0.81, 0.56}, + kQuantizedTolerance))); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/op_macros.h b/tensorflow/contrib/lite/kernels/op_macros.h new file mode 100644 index 0000000000..7535afaf8e --- /dev/null +++ b/tensorflow/contrib/lite/kernels/op_macros.h @@ -0,0 +1,32 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_OP_UTIL_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_OP_UTIL_H_ + +#define TF_LITE_FATAL(msg) \ + do { \ + fprintf(stderr, "%s\n", (msg)); \ + exit(1); \ + } while (0) +#define TF_LITE_ASSERT(x) \ + do { \ + if (!(x)) TF_LITE_FATAL(#x); \ + } while (0) +#define TF_LITE_ASSERT_EQ(x, y) \ + do { \ + if ((x) != (y)) TF_LITE_FATAL(#x " didn't equal " #y); \ + } while (0) + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_OP_UTIL_H_ diff --git a/tensorflow/contrib/lite/kernels/optional_tensor_test.cc b/tensorflow/contrib/lite/kernels/optional_tensor_test.cc new file mode 100644 index 0000000000..8977d27f73 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/optional_tensor_test.cc @@ -0,0 +1,343 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Unit test for TFLite LSTM op. + +#include +#include +#include + +#include +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class LSTMOpModel : public SingleOpModel { + public: + LSTMOpModel(int n_batch, int n_input, int n_cell, int n_output, bool use_cifg, + bool use_peephole, bool use_projection_weights, + bool use_projection_bias, float cell_clip, float proj_clip, + const std::vector>& input_shapes) + : n_batch_(n_batch), + n_input_(n_input), + n_cell_(n_cell), + n_output_(n_output) { + input_ = AddInput(TensorType_FLOAT32); + + if (use_cifg) { + input_to_input_weights_ = AddNullInput(); + } else { + input_to_input_weights_ = AddInput(TensorType_FLOAT32); + } + + input_to_forget_weights_ = AddInput(TensorType_FLOAT32); + input_to_cell_weights_ = AddInput(TensorType_FLOAT32); + input_to_output_weights_ = AddInput(TensorType_FLOAT32); + + if (use_cifg) { + recurrent_to_input_weights_ = AddNullInput(); + } else { + recurrent_to_input_weights_ = AddInput(TensorType_FLOAT32); + } + + recurrent_to_forget_weights_ = AddInput(TensorType_FLOAT32); + recurrent_to_cell_weights_ = AddInput(TensorType_FLOAT32); + recurrent_to_output_weights_ = AddInput(TensorType_FLOAT32); + + if (use_peephole) { + if (use_cifg) { + cell_to_input_weights_ = AddNullInput(); + } else { + cell_to_input_weights_ = AddInput(TensorType_FLOAT32); + } + cell_to_forget_weights_ = AddInput(TensorType_FLOAT32); + cell_to_output_weights_ = AddInput(TensorType_FLOAT32); + } else { + cell_to_input_weights_ = AddNullInput(); + cell_to_forget_weights_ = AddNullInput(); + cell_to_output_weights_ = AddNullInput(); + } + + if (use_cifg) { + input_gate_bias_ = AddNullInput(); + } else { + input_gate_bias_ = AddInput(TensorType_FLOAT32); + } + forget_gate_bias_ = AddInput(TensorType_FLOAT32); + cell_bias_ = AddInput(TensorType_FLOAT32); + output_gate_bias_ = AddInput(TensorType_FLOAT32); + + if (use_projection_weights) { + projection_weights_ = AddInput(TensorType_FLOAT32); + if (use_projection_bias) { + projection_bias_ = AddInput(TensorType_FLOAT32); + } else { + projection_bias_ = AddNullInput(); + } + } else { + projection_weights_ = AddNullInput(); + projection_bias_ = AddNullInput(); + } + + scratch_buffer_ = AddOutput(TensorType_FLOAT32); + // TODO(ghodrat): Modify these states when we have a permanent solution for + // persistent buffer. + output_state_ = AddOutput(TensorType_FLOAT32); + cell_state_ = AddOutput(TensorType_FLOAT32); + output_ = AddOutput(TensorType_FLOAT32); + + SetBuiltinOp(BuiltinOperator_LSTM, BuiltinOptions_LSTMOptions, + CreateLSTMOptions(builder_, ActivationFunctionType_TANH, + cell_clip, proj_clip) + .Union()); + BuildInterpreter(input_shapes); + } + + void SetInputToInputWeights(std::initializer_list f) { + PopulateTensor(input_to_input_weights_, f); + } + + void SetInputToForgetWeights(std::initializer_list f) { + PopulateTensor(input_to_forget_weights_, f); + } + + void SetInputToCellWeights(std::initializer_list f) { + PopulateTensor(input_to_cell_weights_, f); + } + + void SetInputToOutputWeights(std::initializer_list f) { + PopulateTensor(input_to_output_weights_, f); + } + + void SetRecurrentToInputWeights(std::initializer_list f) { + PopulateTensor(recurrent_to_input_weights_, f); + } + + void SetRecurrentToForgetWeights(std::initializer_list f) { + PopulateTensor(recurrent_to_forget_weights_, f); + } + + void SetRecurrentToCellWeights(std::initializer_list f) { + PopulateTensor(recurrent_to_cell_weights_, f); + } + + void SetRecurrentToOutputWeights(std::initializer_list f) { + PopulateTensor(recurrent_to_output_weights_, f); + } + + void SetCellToInputWeights(std::initializer_list f) { + PopulateTensor(cell_to_input_weights_, f); + } + + void SetCellToForgetWeights(std::initializer_list f) { + PopulateTensor(cell_to_forget_weights_, f); + } + + void SetCellToOutputWeights(std::initializer_list f) { + PopulateTensor(cell_to_output_weights_, f); + } + + void SetInputGateBias(std::initializer_list f) { + PopulateTensor(input_gate_bias_, f); + } + + void SetForgetGateBias(std::initializer_list f) { + PopulateTensor(forget_gate_bias_, f); + } + + void SetCellBias(std::initializer_list f) { + PopulateTensor(cell_bias_, f); + } + + void SetOutputGateBias(std::initializer_list f) { + PopulateTensor(output_gate_bias_, f); + } + + void SetProjectionWeights(std::initializer_list f) { + PopulateTensor(projection_weights_, f); + } + + void SetProjectionBias(std::initializer_list f) { + PopulateTensor(projection_bias_, f); + } + + void ResetOutputState() { + const int zero_buffer_size = n_cell_ * n_batch_; + std::unique_ptr zero_buffer(new float[zero_buffer_size]); + memset(zero_buffer.get(), 0, zero_buffer_size * sizeof(float)); + PopulateTensor(output_state_, 0, zero_buffer.get(), + zero_buffer.get() + zero_buffer_size); + } + + void ResetCellState() { + const int zero_buffer_size = n_cell_ * n_batch_; + std::unique_ptr zero_buffer(new float[zero_buffer_size]); + memset(zero_buffer.get(), 0, zero_buffer_size * sizeof(float)); + PopulateTensor(cell_state_, 0, zero_buffer.get(), + zero_buffer.get() + zero_buffer_size); + } + + void SetInput(int offset, float* begin, float* end) { + PopulateTensor(input_, offset, begin, end); + } + + std::vector GetOutput() { return ExtractVector(output_); } + void Verify() { + auto model = tflite::UnPackModel(builder_.GetBufferPointer()); + EXPECT_NE(model, nullptr); + } + + int num_inputs() { return n_input_; } + int num_outputs() { return n_output_; } + int num_cells() { return n_cell_; } + int num_batches() { return n_batch_; } + + private: + int input_; + int input_to_input_weights_; + int input_to_forget_weights_; + int input_to_cell_weights_; + int input_to_output_weights_; + + int recurrent_to_input_weights_; + int recurrent_to_forget_weights_; + int recurrent_to_cell_weights_; + int recurrent_to_output_weights_; + + int cell_to_input_weights_; + int cell_to_forget_weights_; + int cell_to_output_weights_; + + int input_gate_bias_; + int forget_gate_bias_; + int cell_bias_; + int output_gate_bias_; + + int projection_weights_; + int projection_bias_; + + int output_; + int output_state_; + int cell_state_; + int scratch_buffer_; + + int n_batch_; + int n_input_; + int n_cell_; + int n_output_; +}; + + +TEST(LSTMOpTest, BlackBoxTestWithCifgWithPeepholeNoProjectionNoClipping) { + const int n_batch = 1; + const int n_input = 2; + // n_cell and n_output have the same size when there is no projection. + const int n_cell = 4; + const int n_output = 4; + + LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, + /*use_cifg=*/true, /*use_peephole=*/true, + /*use_projection_weights=*/false, + /*use_projection_bias=*/false, + /*cell_clip=*/0.0, /*proj_clip=*/0.0, + { + {n_batch, n_input}, // input tensor + + {0, 0}, // input_to_input_weight tensor + {n_cell, n_input}, // input_to_forget_weight tensor + {n_cell, n_input}, // input_to_cell_weight tensor + {n_cell, n_input}, // input_to_output_weight tensor + + {0, 0}, // recurrent_to_input_weight tensor + {n_cell, n_output}, // recurrent_to_forget_weight tensor + {n_cell, n_output}, // recurrent_to_cell_weight tensor + {n_cell, n_output}, // recurrent_to_output_weight tensor + + {0}, // cell_to_input_weight tensor + {n_cell}, // cell_to_forget_weight tensor + {n_cell}, // cell_to_output_weight tensor + + {0}, // input_gate_bias tensor + {n_cell}, // forget_gate_bias tensor + {n_cell}, // cell_bias tensor + {n_cell}, // output_gate_bias tensor + + {0, 0}, // projection_weight tensor + {0}, // projection_bias tensor + }); + + + lstm.SetInputToCellWeights({-0.49770179, -0.27711356, -0.09624726, 0.05100781, + 0.04717243, 0.48944736, -0.38535351, + -0.17212132}); + + lstm.SetInputToForgetWeights({-0.55291498, -0.42866567, 0.13056988, + -0.3633365, -0.22755712, 0.28253698, 0.24407166, + 0.33826375}); + + lstm.SetInputToOutputWeights({0.10725588, -0.02335852, -0.55932593, + -0.09426838, -0.44257352, 0.54939759, + 0.01533556, 0.42751634}); + + lstm.SetCellBias({0., 0., 0., 0.}); + + lstm.SetForgetGateBias({1., 1., 1., 1.}); + + lstm.SetOutputGateBias({0., 0., 0., 0.}); + + lstm.SetRecurrentToCellWeights( + {0.54066205, -0.32668582, -0.43562764, -0.56094903, 0.42957711, + 0.01841056, -0.32764608, -0.33027974, -0.10826075, 0.20675004, + 0.19069612, -0.03026325, -0.54532051, 0.33003211, 0.44901288, + 0.21193194}); + + lstm.SetRecurrentToForgetWeights( + {-0.13832897, -0.0515101, -0.2359007, -0.16661474, -0.14340827, + 0.36986142, 0.23414481, 0.55899, 0.10798943, -0.41174671, 0.17751795, + -0.34484994, -0.35874045, -0.11352962, 0.27268326, 0.54058349}); + + lstm.SetRecurrentToOutputWeights( + {0.41613156, 0.42610586, -0.16495961, -0.5663873, 0.30579174, -0.05115908, + -0.33941799, 0.23364776, 0.11178309, 0.09481031, -0.26424935, 0.46261835, + 0.50248802, 0.26114327, -0.43736315, 0.33149987}); + + lstm.SetCellToForgetWeights( + {0.47485286, -0.51955009, -0.24458408, 0.31544167}); + lstm.SetCellToOutputWeights( + {-0.17135078, 0.82760304, 0.85573703, -0.77109635}); + + // Resetting cell_state and output_state + lstm.ResetCellState(); + lstm.ResetOutputState(); + + // Verify the model by unpacking it. + lstm.Verify(); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/padding.h b/tensorflow/contrib/lite/kernels/padding.h new file mode 100644 index 0000000000..3a60274524 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/padding.h @@ -0,0 +1,28 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_PADDING_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_PADDING_H_ + +namespace tflite { + +inline int ComputePadding(int stride, int in_size, int filter_size, + int out_size) { + int padding = ((out_size - 1) * stride + filter_size - in_size) / 2; + return padding > 0 ? padding : 0; +} + +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_PADDING_H_ diff --git a/tensorflow/contrib/lite/kernels/pooling.cc b/tensorflow/contrib/lite/kernels/pooling.cc new file mode 100644 index 0000000000..b798801108 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/pooling.cc @@ -0,0 +1,355 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" +#include "tensorflow/contrib/lite/kernels/padding.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace pooling { + +// This file has two implementation of each pooling op. +enum KernelType { + kReference, + kGenericOptimized, +}; + +enum PoolType { + kAverage, + kMax, + kL2, +}; + +struct OpData { + TfLitePaddingValues padding; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + // This is a builtin op, so we don't use the contents in 'buffer', if any. + // Instead, we allocate a new object to carry information from Prepare() to + // Eval(). + return new OpData; +} + +void Free(TfLiteContext* context, void* buffer) { + delete reinterpret_cast(buffer); +} + +template +TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + OpData* data = reinterpret_cast(node->user_data); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* output = GetOutput(context, node, 0); + TfLiteTensor* input = GetInput(context, node, 0); + TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4); + TF_LITE_ENSURE_EQ(context, input->type, output->type); + + int batches = input->dims->data[0]; + int height = input->dims->data[1]; + int width = input->dims->data[2]; + int channels_out = input->dims->data[3]; + + // Matching GetWindowedOutputSize in TensorFlow. + auto padding = params->padding; + auto computeOutSize = [padding](int imageSize, int filterSize, + int stride) -> int { + return padding == kTfLitePaddingSame + ? (imageSize + stride - 1) / stride + : padding == kTfLitePaddingValid + ? (imageSize - filterSize + stride) / stride + : 0; + }; + + int outWidth = + computeOutSize(width, params->filter_width, params->stride_width); + int outHeight = + computeOutSize(height, params->filter_height, params->stride_height); + + data->padding.height = ComputePadding(params->stride_height, height, + params->filter_height, outHeight); + data->padding.width = ComputePadding(params->stride_width, width, + params->filter_width, outWidth); + + if (input->type == kTfLiteUInt8) { + if (pool_type == kAverage || pool_type == kMax) { + TF_LITE_ENSURE_EQ(context, input->params.scale, output->params.scale); + TF_LITE_ENSURE_EQ(context, input->params.zero_point, + output->params.zero_point); + } + if (pool_type == kL2) { + // We currently don't have a quantized implementation of L2Pool + TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32); + } + } + + TfLiteIntArray* outputSize = TfLiteIntArrayCreate(4); + outputSize->data[0] = batches; + outputSize->data[1] = outHeight; + outputSize->data[2] = outWidth; + outputSize->data[3] = channels_out; + return context->ResizeTensor(context, output, outputSize); +} + +template +void AverageEvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLitePoolParams* params, OpData* data, + TfLiteTensor* input, TfLiteTensor* output) { + float activation_min, activation_max; + CalculateActivationRangeFloat(params->activation, &activation_min, + &activation_max); +#define TF_LITE_AVERAGE_POOL(type) \ + type::AveragePool( \ + GetTensorData(input), GetTensorDims(input), params->stride_width, \ + params->stride_height, data->padding.width, data->padding.height, \ + params->filter_width, params->filter_height, activation_min, \ + activation_max, GetTensorData(output), GetTensorDims(output)) + if (kernel_type == kReference) { + TF_LITE_AVERAGE_POOL(reference_ops); + } else { + TF_LITE_AVERAGE_POOL(optimized_ops); + } +#undef TF_LITE_AVERAGE_POOL +} + +template +void AverageEvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLitePoolParams* params, OpData* data, + TfLiteTensor* input, TfLiteTensor* output) { + int32_t activation_min; + int32_t activation_max; + CalculateActivationRangeUint8(params->activation, output, &activation_min, + &activation_max); +#define TF_LITE_AVERAGE_POOL(type) \ + type::AveragePool(GetTensorData(input), GetTensorDims(input), \ + params->stride_width, params->stride_height, \ + data->padding.width, data->padding.height, \ + params->filter_width, params->filter_height, \ + activation_min, activation_max, \ + GetTensorData(output), GetTensorDims(output)) + if (kernel_type == kReference) { + TF_LITE_AVERAGE_POOL(reference_ops); + } else { + TF_LITE_AVERAGE_POOL(optimized_ops); + } +#undef TF_LITE_AVERAGE_POOL +} + +template +void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLitePoolParams* params, OpData* data, TfLiteTensor* input, + TfLiteTensor* output) { + float activation_min, activation_max; + CalculateActivationRangeFloat(params->activation, &activation_min, + &activation_max); +#define TF_LITE_MAX_POOL(type) \ + type::MaxPool( \ + GetTensorData(input), GetTensorDims(input), params->stride_width, \ + params->stride_height, data->padding.width, data->padding.height, \ + params->filter_width, params->filter_height, activation_min, \ + activation_max, GetTensorData(output), GetTensorDims(output)) + if (kernel_type == kReference) { + TF_LITE_MAX_POOL(reference_ops); + } else { + TF_LITE_MAX_POOL(optimized_ops); + } +#undef TF_LITE_MAX_POOL +} + +template +void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLitePoolParams* params, OpData* data, + TfLiteTensor* input, TfLiteTensor* output) { + int32_t activation_min; + int32_t activation_max; + CalculateActivationRangeUint8(params->activation, output, &activation_min, + &activation_max); +#define TF_LITE_MAX_POOL(type) \ + type::MaxPool(GetTensorData(input), GetTensorDims(input), \ + params->stride_width, params->stride_height, \ + data->padding.width, data->padding.height, \ + params->filter_width, params->filter_height, activation_min, \ + activation_max, GetTensorData(output), \ + GetTensorDims(output)) + if (kernel_type == kReference) { + TF_LITE_MAX_POOL(reference_ops); + } else { + TF_LITE_MAX_POOL(optimized_ops); + } +#undef TF_LITE_MAX_POOL +} + +template +void L2EvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLitePoolParams* params, OpData* data, TfLiteTensor* input, + TfLiteTensor* output) { + float activation_min, activation_max; + CalculateActivationRangeFloat(params->activation, &activation_min, + &activation_max); +#define TF_LITE_L2_POOL(type) \ + type::L2Pool( \ + GetTensorData(input), GetTensorDims(input), params->stride_width, \ + params->stride_height, data->padding.width, data->padding.height, \ + params->filter_width, params->filter_height, activation_min, \ + activation_max, GetTensorData(output), GetTensorDims(output)) + if (kernel_type == kReference) { + TF_LITE_L2_POOL(reference_ops); + } else { + TF_LITE_L2_POOL(optimized_ops); + } +#undef TF_LITE_L2_POOL +} + +#undef TF_LITE_KERNEL_TYPE_DISPATCH + +template +TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + OpData* data = reinterpret_cast(node->user_data); + + TfLiteTensor* output = GetOutput(context, node, 0); + TfLiteTensor* input = GetInput(context, node, 0); + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: + AverageEvalFloat(context, node, params, data, input, output); + break; + case kTfLiteUInt8: + AverageEvalQuantized(context, node, params, data, input, + output); + break; + default: + context->ReportError(context, "Type not currently supported."); + return kTfLiteError; + } + return kTfLiteOk; +} + +template +TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + OpData* data = reinterpret_cast(node->user_data); + + TfLiteTensor* output = GetOutput(context, node, 0); + TfLiteTensor* input = GetInput(context, node, 0); + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: + MaxEvalFloat(context, node, params, data, input, output); + break; + case kTfLiteUInt8: + MaxEvalQuantized(context, node, params, data, input, output); + break; + default: + context->ReportError(context, "Type not currently supported."); + return kTfLiteError; + } + return kTfLiteOk; +} + +template +TfLiteStatus L2Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + OpData* data = reinterpret_cast(node->user_data); + + TfLiteTensor* output = GetOutput(context, node, 0); + TfLiteTensor* input = GetInput(context, node, 0); + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: + L2EvalFloat(context, node, params, data, input, output); + break; + case kTfLiteUInt8: + // We don't have a quantized implementation, so just fall through to the + // 'default' case. + default: + context->ReportError(context, "Type not currently supported."); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace pooling + +TfLiteRegistration* Register_AVERAGE_POOL_REF() { + static TfLiteRegistration r = {pooling::Init, pooling::Free, + pooling::GenericPrepare, + pooling::AverageEval}; + return &r; +} + +TfLiteRegistration* Register_MAX_POOL_REF() { + static TfLiteRegistration r = {pooling::Init, pooling::Free, + pooling::GenericPrepare, + pooling::MaxEval}; + return &r; +} + +TfLiteRegistration* Register_L2_POOL_REF() { + static TfLiteRegistration r = {pooling::Init, pooling::Free, + pooling::GenericPrepare, + pooling::L2Eval}; + return &r; +} + +TfLiteRegistration* Register_AVERAGE_POOL_GENERIC_OPT() { + static TfLiteRegistration r = { + pooling::Init, pooling::Free, pooling::GenericPrepare, + pooling::AverageEval}; + return &r; +} + +TfLiteRegistration* Register_MAX_POOL_GENERIC_OPT() { + static TfLiteRegistration r = {pooling::Init, pooling::Free, + pooling::GenericPrepare, + pooling::MaxEval}; + return &r; +} + +TfLiteRegistration* Register_L2_POOL_GENERIC_OPT() { + static TfLiteRegistration r = {pooling::Init, pooling::Free, + pooling::GenericPrepare, + pooling::L2Eval}; + return &r; +} + +TfLiteRegistration* Register_AVERAGE_POOL_2D() { + return Register_AVERAGE_POOL_GENERIC_OPT(); +} + +TfLiteRegistration* Register_MAX_POOL_2D() { + return Register_MAX_POOL_GENERIC_OPT(); +} + +TfLiteRegistration* Register_L2_POOL_2D() { + return Register_L2_POOL_GENERIC_OPT(); +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/pooling_test.cc b/tensorflow/contrib/lite/kernels/pooling_test.cc new file mode 100644 index 0000000000..e1b51ec7d5 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/pooling_test.cc @@ -0,0 +1,161 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class BasePoolingOpModel : public SingleOpModel { + public: + // TODO(ahentz): Also test different activation types, bias, padding types, + // stride values. + BasePoolingOpModel(BuiltinOperator type, const TensorData& input, + int filter_width, int filter_height, + const TensorData& output) { + input_ = AddInput(input); + output_ = AddOutput(output); + + SetBuiltinOp( + type, BuiltinOptions_Pool2DOptions, + CreatePool2DOptions(builder_, Padding_VALID, 2, 2, filter_width, + filter_height, ActivationFunctionType_NONE) + .Union()); + + BuildInterpreter({GetShape(input_)}); + } + + protected: + int input_; + int output_; +}; + +class FloatPoolingOpModel : public BasePoolingOpModel { + public: + using BasePoolingOpModel::BasePoolingOpModel; + + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); + } + + std::vector GetOutput() { return ExtractVector(output_); } +}; + +class QuantizedPoolingOpModel : public BasePoolingOpModel { + public: + using BasePoolingOpModel::BasePoolingOpModel; + + void SetInput(std::initializer_list data) { + QuantizeAndPopulate(input_, data); + } + + std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetDequantizedOutput() { + return Dequantize(ExtractVector(output_), + GetScale(output_), GetZeroPoint(output_)); + } +}; + +TEST(FloatPoolingOpTest, AveragePool) { + FloatPoolingOpModel m(BuiltinOperator_AVERAGE_POOL_2D, + /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}}, + /*filter_width=*/2, /*filter_height=*/2, + /*output=*/{TensorType_FLOAT32, {}}); + m.SetInput({ + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({2.75, 5.75})); +} + +TEST(QuantizedPoolingOpTest, AveragePool) { + // Choose the input ranges carefully so that the dequantized output matches + // the results of the float model above. + QuantizedPoolingOpModel m( + BuiltinOperator_AVERAGE_POOL_2D, + /*input=*/{TensorType_UINT8, {1, 2, 4, 1}, 0, 15.9375}, + /*filter_width=*/2, /*filter_height=*/2, + /*output=*/{TensorType_UINT8, {}, 0, 15.9375}); + m.SetInput({ + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }); + m.Invoke(); + + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({2.75, 5.75}))); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({44, 92})); +} + +TEST(FloatPoolingOpTest, MaxPool) { + FloatPoolingOpModel m(BuiltinOperator_MAX_POOL_2D, + /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}}, + /*filter_width=*/2, /*filter_height=*/2, + /*output=*/{TensorType_FLOAT32, {}}); + m.SetInput({ + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({6, 10})); +} + +TEST(QuantizedPoolingOpTest, MaxPool) { + // Choose the input ranges carefully so that the dequantized output matches + // the results of the float model above. + QuantizedPoolingOpModel m( + BuiltinOperator_MAX_POOL_2D, + /*input=*/{TensorType_UINT8, {1, 2, 4, 1}, 0, 15.9375}, + /*filter_width=*/2, /*filter_height=*/2, + /*output=*/{TensorType_UINT8, {}, 0, 15.9375}); + m.SetInput({ + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }); + m.Invoke(); + + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({6, 10}))); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({96, 160})); +} + +TEST(FloatPoolingOpTest, L2Pool) { + FloatPoolingOpModel m(BuiltinOperator_L2_POOL_2D, + /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}}, + /*filter_width=*/2, /*filter_height=*/2, + /*output=*/{TensorType_FLOAT32, {}}); + m.SetInput({ + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({3.5, 6.5})); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc new file mode 100644 index 0000000000..ca7a0dd194 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/register.cc @@ -0,0 +1,109 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/kernels/register.h" + +namespace tflite { +namespace ops { +namespace builtin { + +TfLiteRegistration* Register_RELU(); +TfLiteRegistration* Register_RELU1(); +TfLiteRegistration* Register_RELU6(); +TfLiteRegistration* Register_TANH(); +TfLiteRegistration* Register_LOGISTIC(); +TfLiteRegistration* Register_AVERAGE_POOL_2D(); +TfLiteRegistration* Register_MAX_POOL_2D(); +TfLiteRegistration* Register_L2_POOL_2D(); +TfLiteRegistration* Register_CONV_2D(); +TfLiteRegistration* Register_DEPTHWISE_CONV_2D(); +TfLiteRegistration* Register_SVDF(); +TfLiteRegistration* Register_RNN(); +TfLiteRegistration* Register_EMBEDDING_LOOKUP(); +TfLiteRegistration* Register_EMBEDDING_LOOKUP_SPARSE(); +TfLiteRegistration* Register_FULLY_CONNECTED(); +TfLiteRegistration* Register_LSH_PROJECTION(); +TfLiteRegistration* Register_HASHTABLE_LOOKUP(); +TfLiteRegistration* Register_SOFTMAX(); +TfLiteRegistration* Register_CONCATENATION(); +TfLiteRegistration* Register_ADD(); +TfLiteRegistration* Register_MUL(); +TfLiteRegistration* Register_L2_NORMALIZATION(); +TfLiteRegistration* Register_LOCAL_RESPONSE_NORMALIZATION(); +TfLiteRegistration* Register_LSTM(); +TfLiteRegistration* Register_RESHAPE(); +TfLiteRegistration* Register_RESIZE_BILINEAR(); +TfLiteRegistration* Register_SKIP_GRAM(); +TfLiteRegistration* Register_SPACE_TO_DEPTH(); + +BuiltinOpResolver::BuiltinOpResolver() { + AddBuiltin(BuiltinOperator_RELU, Register_RELU()); + AddBuiltin(BuiltinOperator_RELU1, Register_RELU1()); + AddBuiltin(BuiltinOperator_RELU6, Register_RELU6()); + AddBuiltin(BuiltinOperator_TANH, Register_TANH()); + AddBuiltin(BuiltinOperator_LOGISTIC, Register_LOGISTIC()); + AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D, Register_AVERAGE_POOL_2D()); + AddBuiltin(BuiltinOperator_MAX_POOL_2D, Register_MAX_POOL_2D()); + AddBuiltin(BuiltinOperator_L2_POOL_2D, Register_L2_POOL_2D()); + AddBuiltin(BuiltinOperator_CONV_2D, Register_CONV_2D()); + AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, Register_DEPTHWISE_CONV_2D()); + AddBuiltin(BuiltinOperator_SVDF, Register_SVDF()); + AddBuiltin(BuiltinOperator_RNN, Register_RNN()); + AddBuiltin(BuiltinOperator_EMBEDDING_LOOKUP, Register_EMBEDDING_LOOKUP()); + AddBuiltin(BuiltinOperator_EMBEDDING_LOOKUP_SPARSE, + Register_EMBEDDING_LOOKUP_SPARSE()); + AddBuiltin(BuiltinOperator_FULLY_CONNECTED, Register_FULLY_CONNECTED()); + AddBuiltin(BuiltinOperator_LSH_PROJECTION, Register_LSH_PROJECTION()); + AddBuiltin(BuiltinOperator_HASHTABLE_LOOKUP, Register_HASHTABLE_LOOKUP()); + AddBuiltin(BuiltinOperator_SOFTMAX, Register_SOFTMAX()); + AddBuiltin(BuiltinOperator_CONCATENATION, Register_CONCATENATION()); + AddBuiltin(BuiltinOperator_ADD, Register_ADD()); + AddBuiltin(BuiltinOperator_MUL, Register_MUL()); + AddBuiltin(BuiltinOperator_L2_NORMALIZATION, Register_L2_NORMALIZATION()); + AddBuiltin(BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, + Register_LOCAL_RESPONSE_NORMALIZATION()); + AddBuiltin(BuiltinOperator_LSTM, Register_LSTM()); + AddBuiltin(BuiltinOperator_RESHAPE, Register_RESHAPE()); + AddBuiltin(BuiltinOperator_RESIZE_BILINEAR, Register_RESIZE_BILINEAR()); + AddBuiltin(BuiltinOperator_SKIP_GRAM, Register_SKIP_GRAM()); + AddBuiltin(BuiltinOperator_SPACE_TO_DEPTH, Register_SPACE_TO_DEPTH()); +} + +TfLiteRegistration* BuiltinOpResolver::FindOp( + tflite::BuiltinOperator op) const { + auto it = builtins_.find(op); + return it != builtins_.end() ? it->second : nullptr; +} + +TfLiteRegistration* BuiltinOpResolver::FindOp(const char* op) const { + auto it = custom_ops_.find(op); + return it != custom_ops_.end() ? it->second : nullptr; +} + +void BuiltinOpResolver::AddBuiltin(tflite::BuiltinOperator op, + TfLiteRegistration* registration) { + registration->builtin_code = op; + builtins_.insert(std::make_pair(op, registration)); +} + +void BuiltinOpResolver::AddCustom(const char* name, + TfLiteRegistration* registration) { + registration->builtin_code = BuiltinOperator_CUSTOM; + custom_ops_.insert(std::make_pair(std::string(name), registration)); +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/register.h b/tensorflow/contrib/lite/kernels/register.h new file mode 100644 index 0000000000..28f5e0fcc8 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/register.h @@ -0,0 +1,50 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_REGISTER_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_REGISTER_H_ + +#include +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace ops { +namespace builtin { + +class BuiltinOpResolver : public OpResolver { + public: + BuiltinOpResolver(); + TfLiteRegistration* FindOp(tflite::BuiltinOperator op) const override; + TfLiteRegistration* FindOp(const char* op) const override; + void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration); + void AddCustom(const char* name, TfLiteRegistration* registration); + + private: + struct BuiltinOperatorHasher { + size_t operator()(const tflite::BuiltinOperator& x) const { + return std::hash()(static_cast(x)); + } + }; + std::unordered_map + builtins_; + std::unordered_map custom_ops_; +}; + +} // namespace builtin +} // namespace ops +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_BUILTIN_KERNELS_H diff --git a/tensorflow/contrib/lite/kernels/reshape.cc b/tensorflow/contrib/lite/kernels/reshape.cc new file mode 100644 index 0000000000..f3e6ddc9f4 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/reshape.cc @@ -0,0 +1,91 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace reshape { + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + + // TODO(ahentz): we are often given a tensor with the shape but we only pay + // attention to what the shape specified in 'params'. + TF_LITE_ENSURE(context, NumInputs(node) == 1 || NumInputs(node) == 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + // Tensorflow's Reshape allows one of the shape components to have the + // special -1 value, meaning it will be calculated automatically based on the + // input. Here we calculate what that dimension should be so that the number + // of output elements in the same as the number of input elements. + int num_input_elements = 1; + for (int i = 0; i < NumDimensions(input); ++i) { + num_input_elements *= SizeOfDimension(input, i); + } + + TfLiteIntArray* output_size = TfLiteIntArrayCreate(params->num_dimensions); + int num_output_elements = 1; + int strech_dim = -1; + for (int i = 0; i < params->num_dimensions; ++i) { + int value = params->shape[i]; + if (value == -1) { + TF_LITE_ENSURE_EQ(context, strech_dim, -1); + strech_dim = i; + } else { + num_output_elements *= value; + output_size->data[i] = value; + } + } + if (strech_dim != -1) { + output_size->data[strech_dim] = num_input_elements / num_output_elements; + num_output_elements *= output_size->data[strech_dim]; + } + + TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements); + return context->ResizeTensor(context, output, output_size); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + memcpy(output->data.raw, input->data.raw, input->bytes); + + return kTfLiteOk; +} + +} // namespace reshape + +TfLiteRegistration* Register_RESHAPE() { + static TfLiteRegistration r = {nullptr, nullptr, reshape::Prepare, + reshape::Eval}; + return &r; +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/reshape_test.cc b/tensorflow/contrib/lite/kernels/reshape_test.cc new file mode 100644 index 0000000000..59ce7d5648 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/reshape_test.cc @@ -0,0 +1,90 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class ReshapeOpModel : public SingleOpModel { + public: + ReshapeOpModel(std::initializer_list input_shape, + std::initializer_list new_shape) { + input_ = AddInput(TensorType_FLOAT32); + output_ = AddOutput(TensorType_FLOAT32); + SetBuiltinOp( + BuiltinOperator_RESHAPE, BuiltinOptions_ReshapeOptions, + CreateReshapeOptions(builder_, builder_.CreateVector(new_shape)) + .Union()); + BuildInterpreter({input_shape}); + } + + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); + } + std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetOutputShape() { return GetTensorShape(output_); } + + private: + int input_; + int output_; +}; + +TEST(ReshapeOpTest, MismatchedDimensions) { + EXPECT_DEATH(ReshapeOpModel({1, 2, 4, 1}, {2, 1}), + "num_input_elements != num_output_elements"); +} + +TEST(ReshapeOpTest, TooManyDimensions) { + EXPECT_DEATH( + ReshapeOpModel({1, 2, 3, 4, 5, 6, 7, 8, 9}, {1, 2, 3, 4, 5, 6, 7, 8, 9}), + "Found too many dimensions"); +} + +TEST(ReshapeOpTest, TooManySpecialDimensions) { + EXPECT_DEATH(ReshapeOpModel({1, 2, 4, 1}, {-1, -1, 2, 4}), + "strech_dim != -1"); +} + +TEST(ReshapeOpTest, SimpleTest) { + ReshapeOpModel m({1, 2, 4, 1}, {2, 2, 2}); + m.SetInput({1, 2, 3, 4, 5, 6, 7, 8}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 2, 3, 4, 5, 6, 7, 8})); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 2, 2})); +} + +TEST(ReshapeOpTest, WithStretchDimension) { + ReshapeOpModel m({1, 2, 4, 1}, {2, 1, -1}); + m.SetInput({1, 2, 3, 4, 5, 6, 7, 8}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 2, 3, 4, 5, 6, 7, 8})); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 1, 4})); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/resize_bilinear.cc b/tensorflow/contrib/lite/kernels/resize_bilinear.cc new file mode 100644 index 0000000000..1613c9a89f --- /dev/null +++ b/tensorflow/contrib/lite/kernels/resize_bilinear.cc @@ -0,0 +1,129 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace resize_bilinear { + +// This file has three implementation of RESIZE_BILINEAR. +enum KernelType { + kReference, + kGenericOptimized, // Neon-free + kNeonOptimized, +}; + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + // TODO(ahentz): Our current implementations rely on the inputs being 4D. + TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4); + + // TODO(ahentz): Our current implementations only support float32. + TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32); + TF_LITE_ENSURE_EQ(context, input->type, output->type); + + TfLiteIntArray* output_size = TfLiteIntArrayCreate(4); + output_size->data[0] = input->dims->data[0]; + output_size->data[1] = params->new_height; + output_size->data[2] = params->new_width; + output_size->data[3] = input->dims->data[3]; + + return context->ResizeTensor(context, output, output_size); +} + +template +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + // We have to fake a tensor here, to satisfy ResizeBilinear(). + int32 output_size_data[2] = {params->new_height, params->new_width}; + + if (output->type == kTfLiteFloat32) { +#define TF_LITE_RESIZE_BILINEAR(type) \ + type::ResizeBilinear(GetTensorData(input), GetTensorDims(input), \ + output_size_data, GetTensorDims({1, 1, 1, 2}), \ + GetTensorData(output), GetTensorDims(output)) + + if (kernel_type == kReference) { + TF_LITE_RESIZE_BILINEAR(reference_ops); + } + if (kernel_type == kGenericOptimized || kernel_type == kNeonOptimized) { + TF_LITE_RESIZE_BILINEAR(optimized_ops); + } +#undef TF_LITE_RESIZE_BILINEAR + } else { + context->ReportError(context, "Inputs and outputs not all float types."); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace resize_bilinear + +TfLiteRegistration* Register_RESIZE_BILINEAR_REF() { + static TfLiteRegistration r = { + nullptr, nullptr, resize_bilinear::Prepare, + resize_bilinear::Eval}; + return &r; +} + +TfLiteRegistration* Register_RESIZE_BILINEAR_GENERIC_OPT() { + static TfLiteRegistration r = { + nullptr, nullptr, resize_bilinear::Prepare, + resize_bilinear::Eval}; + return &r; +} + +TfLiteRegistration* Register_RESIZE_BILINEAR_NEON_OPT() { + static TfLiteRegistration r = { + nullptr, nullptr, resize_bilinear::Prepare, + resize_bilinear::Eval}; + return &r; +} + +TfLiteRegistration* Register_RESIZE_BILINEAR() { +#ifdef USE_NEON + return Register_RESIZE_BILINEAR_NEON_OPT(); +#else + return Register_RESIZE_BILINEAR_GENERIC_OPT(); +#endif +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/resize_bilinear_test.cc b/tensorflow/contrib/lite/kernels/resize_bilinear_test.cc new file mode 100644 index 0000000000..0257c0b557 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/resize_bilinear_test.cc @@ -0,0 +1,117 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class ResizeBilinearOpModel : public SingleOpModel { + public: + ResizeBilinearOpModel(std::initializer_list input_shape, int new_height, + int new_width) { + input_ = AddInput(TensorType_FLOAT32); + output_ = AddOutput(TensorType_FLOAT32); + SetBuiltinOp( + BuiltinOperator_RESIZE_BILINEAR, BuiltinOptions_ResizeBilinearOptions, + CreateResizeBilinearOptions(builder_, new_height, new_width).Union()); + BuildInterpreter({input_shape}); + } + + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); + } + + std::vector GetOutput() { return ExtractVector(output_); } + + private: + int input_; + int output_; +}; + +TEST(ResizeBilinearOpTest, HorizontalResize) { + ResizeBilinearOpModel m({1, 1, 2, 1}, 1, 3); + m.SetInput({3, 6}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({3, 5, 6}))); +} + +TEST(ResizeBilinearOpTest, VerticalResize) { + ResizeBilinearOpModel m({1, 2, 1, 1}, 3, 1); + m.SetInput({3, 9}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({3, 7, 9}))); +} + +TEST(ResizeBilinearOpTest, TwoDimensionalResize) { + ResizeBilinearOpModel m({1, 2, 2, 1}, 3, 3); + m.SetInput({ + 3, 6, // + 9, 12 // + }); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({ + 3, 5, 6, // + 7, 9, 10, // + 9, 11, 12, // + }))); +} + +TEST(ResizeBilinearOpTest, TwoDimensionalResizeWithTwoBatches) { + ResizeBilinearOpModel m({2, 2, 2, 1}, 3, 3); + m.SetInput({ + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({ + 3, 5, 6, // + 7, 9, 10, // + 9, 11, 12, // + 4, 8, 10, // + 8, 12, 14, // + 10, 14, 16, // + }))); +} + +TEST(ResizeBilinearOpTest, ThreeDimensionalResize) { + ResizeBilinearOpModel m({1, 2, 2, 2}, 3, 3); + m.SetInput({ + 3, 4, 6, 10, // + 9, 10, 12, 16, // + }); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({ + 3, 4, 5, 8, 6, 10, // + 7, 8, 9, 12, 10, 14, // + 9, 10, 11, 14, 12, 16, // + }))); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/skip_gram.cc b/tensorflow/contrib/lite/kernels/skip_gram.cc new file mode 100644 index 0000000000..c90a15b3a2 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/skip_gram.cc @@ -0,0 +1,160 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Generate a list of skip grams from an input. +// +// Options: +// ngram_size: num of words for each output item. +// max_skip_size: max num of words to skip. +// The op generates ngrams when it is 0. +// include_all_ngrams: include all ngrams with size up to ngram_size. +// +// Input: +// A string tensor to generate n-grams. +// Dim = {1} +// +// Output: +// A list of strings, each of which contains ngram_size words. +// Dim = {num_ngram} + +#include +#include +#include + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" +#include "tensorflow/contrib/lite/string_util.h" + +namespace tflite { +namespace ops { +namespace builtin { + +namespace { + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TF_LITE_ENSURE_EQ(context, GetInput(context, node, 0)->type, kTfLiteString); + TF_LITE_ENSURE_EQ(context, GetOutput(context, node, 0)->type, kTfLiteString); + return kTfLiteOk; +} + +bool ShouldIncludeCurrentNgram(const TfLiteSkipGramParams* params, int size) { + if (size <= 0) { + return false; + } + if (params->include_all_ngrams) { + return size <= params->ngram_size; + } else { + return size == params->ngram_size; + } +} + +bool ShouldStepInRecursion(const TfLiteSkipGramParams* params, + const std::vector& stack, int stack_idx, + int num_words) { + // If current stack size and next word enumeration are within valid range. + if (stack_idx < params->ngram_size && stack[stack_idx] + 1 < num_words) { + // If this stack is empty, step in for first word enumeration. + if (stack_idx == 0) { + return true; + } + // If next word enumeration are within the range of max_skip_size. + // NOTE: equivalent to + // next_word_idx = stack[stack_idx] + 1 + // next_word_idx - stack[stack_idx-1] <= max_skip_size + 1 + if (stack[stack_idx] - stack[stack_idx - 1] <= params->max_skip_size) { + return true; + } + } + return false; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + + // Split sentence to words. + std::vector words; + tflite::StringRef strref = tflite::GetString(GetInput(context, node, 0), 0); + int prev_idx = 0; + for (int i = 1; i < strref.len; i++) { + if (isspace(*(strref.str + i))) { + if (i > prev_idx && !isspace(*(strref.str + prev_idx))) { + words.push_back({strref.str + prev_idx, i - prev_idx}); + } + prev_idx = i + 1; + } + } + if (strref.len > prev_idx) { + words.push_back({strref.str + prev_idx, strref.len - prev_idx}); + } + + // Generate n-grams recursively. + tflite::DynamicBuffer buf; + if (words.size() < params->ngram_size) { + buf.WriteToTensor(GetOutput(context, node, 0)); + return kTfLiteOk; + } + + // Stack stores the index of word used to generate ngram. + // The size of stack is the size of ngram. + std::vector stack(params->ngram_size, 0); + // Stack index that indicates which depth the recursion is operating at. + int stack_idx = 1; + int num_words = words.size(); + + while (stack_idx >= 0) { + if (ShouldStepInRecursion(params, stack, stack_idx, num_words)) { + // When current depth can fill with a new word + // and the new word is within the max range to skip, + // fill this word to stack, recurse into next depth. + stack[stack_idx]++; + stack_idx++; + if (stack_idx < params->ngram_size) { + stack[stack_idx] = stack[stack_idx - 1]; + } + } else { + if (ShouldIncludeCurrentNgram(params, stack_idx)) { + // Add n-gram to tensor buffer when the stack has filled with enough + // words to generate the ngram. + std::vector gram(stack_idx); + for (int i = 0; i < stack_idx; i++) { + gram[i] = words[stack[i]]; + } + buf.AddJoinedString(gram, ' '); + } + // When current depth cannot fill with a valid new word, + // and not in last depth to generate ngram, + // step back to previous depth to iterate to next possible word. + stack_idx--; + } + } + + buf.WriteToTensor(GetOutput(context, node, 0)); + return kTfLiteOk; +} +} // namespace + +TfLiteRegistration* Register_SKIP_GRAM() { + static TfLiteRegistration r = {nullptr, nullptr, Prepare, Eval}; + return &r; +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/skip_gram_test.cc b/tensorflow/contrib/lite/kernels/skip_gram_test.cc new file mode 100644 index 0000000000..e7f6bc904b --- /dev/null +++ b/tensorflow/contrib/lite/kernels/skip_gram_test.cc @@ -0,0 +1,257 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/string_util.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAre; + +static char kSentence[] = "The quick\t brown fox\n jumps over\n the lazy dog!"; + +class SkipGramOp : public SingleOpModel { + public: + SkipGramOp(int ngram_size, int max_skip_size, bool include_all_ngrams) { + input_ = AddInput(TensorType_STRING); + output_ = AddOutput(TensorType_STRING); + + SetBuiltinOp(BuiltinOperator_SKIP_GRAM, BuiltinOptions_SkipGramOptions, + CreateSkipGramOptions(builder_, ngram_size, max_skip_size, + include_all_ngrams) + .Union()); + BuildInterpreter({{1}}); + } + void SetInput(const string& content) { + PopulateStringTensor(input_, {content}); + } + + std::vector GetOutput() { + std::vector ans; + TfLiteTensor* tensor = interpreter_->tensor(output_); + + int num = GetStringCount(tensor); + for (int i = 0; i < num; i++) { + StringRef strref = GetString(tensor, i); + ans.push_back(string(strref.str, strref.len)); + } + return ans; + } + + private: + int input_; + int output_; +}; + +TEST(SkipGramTest, TestUnigram) { + SkipGramOp m(1, 0, false); + + m.SetInput(kSentence); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), testing::UnorderedElementsAreArray( + {"The", "quick", "brown", "fox", "jumps", + "over", "the", "lazy", "dog!"})); +} + +TEST(SkipGramTest, TestBigram) { + SkipGramOp m(2, 0, false); + m.SetInput(kSentence); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + testing::UnorderedElementsAreArray( + {"The quick", "quick brown", "brown fox", "fox jumps", + "jumps over", "over the", "the lazy", "lazy dog!"})); +} + +TEST(SkipGramTest, TestAllBigram) { + SkipGramOp m(2, 0, true); + m.SetInput(kSentence); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + testing::UnorderedElementsAreArray( + {// Unigram + "The", "quick", "brown", "fox", "jumps", "over", "the", + "lazy", "dog!", + // Bigram + "The quick", "quick brown", "brown fox", "fox jumps", + "jumps over", "over the", "the lazy", "lazy dog!"})); +} + +TEST(SkipGramTest, TestAllTrigram) { + SkipGramOp m(3, 0, true); + m.SetInput(kSentence); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + testing::UnorderedElementsAreArray( + {// Unigram + "The", "quick", "brown", "fox", "jumps", "over", "the", + "lazy", "dog!", + // Bigram + "The quick", "quick brown", "brown fox", "fox jumps", + "jumps over", "over the", "the lazy", "lazy dog!", + // Trigram + "The quick brown", "quick brown fox", "brown fox jumps", + "fox jumps over", "jumps over the", "over the lazy", + "the lazy dog!"})); +} + +TEST(SkipGramTest, TestSkip1Bigram) { + SkipGramOp m(2, 1, false); + m.SetInput(kSentence); + m.Invoke(); + EXPECT_THAT( + m.GetOutput(), + testing::UnorderedElementsAreArray( + {"The quick", "The brown", "quick brown", "quick fox", "brown fox", + "brown jumps", "fox jumps", "fox over", "jumps over", "jumps the", + "over the", "over lazy", "the lazy", "the dog!", "lazy dog!"})); +} + +TEST(SkipGramTest, TestSkip2Bigram) { + SkipGramOp m(2, 2, false); + m.SetInput(kSentence); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + testing::UnorderedElementsAreArray( + {"The quick", "The brown", "The fox", "quick brown", + "quick fox", "quick jumps", "brown fox", "brown jumps", + "brown over", "fox jumps", "fox over", "fox the", + "jumps over", "jumps the", "jumps lazy", "over the", + "over lazy", "over dog!", "the lazy", "the dog!", + "lazy dog!"})); +} + +TEST(SkipGramTest, TestSkip1Trigram) { + SkipGramOp m(3, 1, false); + m.SetInput(kSentence); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + testing::UnorderedElementsAreArray( + {"The quick brown", "The quick fox", "The brown fox", + "The brown jumps", "quick brown fox", "quick brown jumps", + "quick fox jumps", "quick fox over", "brown fox jumps", + "brown fox over", "brown jumps over", "brown jumps the", + "fox jumps over", "fox jumps the", "fox over the", + "fox over lazy", "jumps over the", "jumps over lazy", + "jumps the lazy", "jumps the dog!", "over the lazy", + "over the dog!", "over lazy dog!", "the lazy dog!"})); +} + +TEST(SkipGramTest, TestSkip2Trigram) { + SkipGramOp m(3, 2, false); + m.SetInput(kSentence); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + testing::UnorderedElementsAreArray( + {"The quick brown", "The quick fox", "The quick jumps", + "The brown fox", "The brown jumps", "The brown over", + "The fox jumps", "The fox over", "The fox the", + "quick brown fox", "quick brown jumps", "quick brown over", + "quick fox jumps", "quick fox over", "quick fox the", + "quick jumps over", "quick jumps the", "quick jumps lazy", + "brown fox jumps", "brown fox over", "brown fox the", + "brown jumps over", "brown jumps the", "brown jumps lazy", + "brown over the", "brown over lazy", "brown over dog!", + "fox jumps over", "fox jumps the", "fox jumps lazy", + "fox over the", "fox over lazy", "fox over dog!", + "fox the lazy", "fox the dog!", "jumps over the", + "jumps over lazy", "jumps over dog!", "jumps the lazy", + "jumps the dog!", "jumps lazy dog!", "over the lazy", + "over the dog!", "over lazy dog!", "the lazy dog!"})); +} + +TEST(SkipGramTest, TestAllSkip2Trigram) { + SkipGramOp m(3, 2, true); + m.SetInput(kSentence); + m.Invoke(); + EXPECT_THAT( + m.GetOutput(), + testing::UnorderedElementsAreArray( + {// Unigram + "The", "quick", "brown", "fox", "jumps", "over", "the", "lazy", + "dog!", + // Bigram + "The quick", "The brown", "The fox", "quick brown", "quick fox", + "quick jumps", "brown fox", "brown jumps", "brown over", "fox jumps", + "fox over", "fox the", "jumps over", "jumps the", "jumps lazy", + "over the", "over lazy", "over dog!", "the lazy", "the dog!", + "lazy dog!", + // Trigram + "The quick brown", "The quick fox", "The quick jumps", + "The brown fox", "The brown jumps", "The brown over", + "The fox jumps", "The fox over", "The fox the", "quick brown fox", + "quick brown jumps", "quick brown over", "quick fox jumps", + "quick fox over", "quick fox the", "quick jumps over", + "quick jumps the", "quick jumps lazy", "brown fox jumps", + "brown fox over", "brown fox the", "brown jumps over", + "brown jumps the", "brown jumps lazy", "brown over the", + "brown over lazy", "brown over dog!", "fox jumps over", + "fox jumps the", "fox jumps lazy", "fox over the", "fox over lazy", + "fox over dog!", "fox the lazy", "fox the dog!", "jumps over the", + "jumps over lazy", "jumps over dog!", "jumps the lazy", + "jumps the dog!", "jumps lazy dog!", "over the lazy", + "over the dog!", "over lazy dog!", "the lazy dog!"})); +} + +TEST(SkipGramTest, TestSingleWord) { + SkipGramOp m(1, 1, false); + m.SetInput("Hi"); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAre("Hi")); +} + +TEST(SkipGramTest, TestWordsLessThanGram) { + SkipGramOp m(3, 1, false); + m.SetInput("Hi hi"); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), std::vector()); +} + +TEST(SkipGramTest, TestEmptyInput) { + SkipGramOp m(1, 1, false); + m.SetInput(""); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAre()); +} + +TEST(SkipGramTest, TestWhitespaceInput) { + SkipGramOp m(1, 1, false); + m.SetInput(" "); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAre()); +} + +TEST(SkipGramTest, TestInputWithExtraSpace) { + SkipGramOp m(1, 1, false); + m.SetInput(" Hello world ! "); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAre("Hello", "world", "!")); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/softmax_test.cc b/tensorflow/contrib/lite/kernels/softmax_test.cc new file mode 100644 index 0000000000..ec8ec03b0d --- /dev/null +++ b/tensorflow/contrib/lite/kernels/softmax_test.cc @@ -0,0 +1,143 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Unit test for TFLite SOFTMAX op. + +#include +#include +#include + +#include +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +class SoftmaxOpModel : public SingleOpModel { + public: + SoftmaxOpModel(int batches, int size, float beta) + : batches_(batches), input_size_(size), beta_(beta) { + input_ = AddInput(TensorType_FLOAT32); + output_ = AddOutput(TensorType_FLOAT32); + SetBuiltinOp(BuiltinOperator_SOFTMAX, BuiltinOptions_SoftmaxOptions, + CreateSoftmaxOptions(builder_, beta_).Union()); + BuildInterpreter({{batches_, input_size_}}); + } + + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); + } + + void SetInput(int offset, float* begin, float* end) { + PopulateTensor(input_, offset, begin, end); + } + + std::vector GetOutput() { return ExtractVector(output_); } + + private: + int input_; + int output_; + + int batches_; + int input_size_; + float beta_; +}; + +TEST(SoftmaxOpTest, SimpleTest) { + SoftmaxOpModel m(/*batches=*/2, /*size=*/5, /*beta=*/1.0); + m.SetInput({ + 1.0, 2.0, 3.0, 4.0, 5.0, // b = 0 + -1.0, -2.0, -3.0, -4.0, -5.0, // b = 0 + }); + + m.Invoke(); + + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray(ArrayFloatNear( + {0.011656231, 0.031684921, 0.086128544, 0.234121657, 0.636408647, + 0.636408647, 0.234121657, 0.086128544, 0.031684921, 0.011656231}, + 1e-6))); +} + +TEST(SoftmaxOpTest, CompareWithTFminiBetaEq1) { + const int batch_size = 2; + const int input_size = 5; + const float beta = 1.0; + static float input_buffer[] = { + 1.0, 2.0, 3.0, 4.0, 5.0, // b = 0 + -1.0, -2.0, -3.0, -4.0, -5.0, // b = 1 + }; + + SoftmaxOpModel m(batch_size, input_size, beta); + + m.SetInput(0, input_buffer, input_buffer + input_size * batch_size); + + m.Invoke(); + + std::unique_ptr output_buffer(new float[input_size * batch_size]); + static tflite::Dims<4> input_dims = {{input_size, 1, 1, batch_size}, + {1, 0, 0, input_size}}; + tflite::reference_ops::Softmax(input_buffer, input_dims, beta, + output_buffer.get(), input_dims); + + std::vector expected; + expected.insert(expected.end(), output_buffer.get(), + output_buffer.get() + input_size * batch_size); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear(expected, 1e-6))); +} + +TEST(SoftmaxOpTest, CompareWithTFminiBetaNotEq1) { + const int batch_size = 2; + const int input_size = 5; + const float beta = 0.5; + static float input_buffer[] = { + 1.0, 2.0, 3.0, 4.0, 5.0, // b = 0 + -1.0, -2.0, -3.0, -4.0, -5.0, // b = 1 + }; + + SoftmaxOpModel m(batch_size, input_size, beta); + + m.SetInput(0, input_buffer, input_buffer + input_size * batch_size); + + m.Invoke(); + + std::unique_ptr output_buffer(new float[input_size * batch_size]); + static tflite::Dims<4> input_dims = {{input_size, 1, 1, batch_size}, + {1, 0, 0, input_size}}; + tflite::reference_ops::Softmax(input_buffer, input_dims, beta, + output_buffer.get(), input_dims); + + std::vector expected; + expected.insert(expected.end(), output_buffer.get(), + output_buffer.get() + input_size * batch_size); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear(expected, 1e-6))); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/space_to_depth.cc b/tensorflow/contrib/lite/kernels/space_to_depth.cc new file mode 100644 index 0000000000..cb2e509c98 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/space_to_depth.cc @@ -0,0 +1,146 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace space_to_depth { + +// This file has two implementation of SpaceToDepth. Note that SpaceToDepth +// only works on 4D tensors. +enum KernelType { + kReference, + kGenericOptimized, +}; + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4); + + auto data_type = output->type; + TF_LITE_ENSURE(context, + data_type == kTfLiteFloat32 || data_type == kTfLiteUInt8 || + data_type == kTfLiteInt32 || data_type == kTfLiteInt64); + TF_LITE_ENSURE_EQ(context, input->type, output->type); + + const int block_size = params->block_size; + const int input_height = input->dims->data[1]; + const int input_width = input->dims->data[2]; + int output_height = input_height / block_size; + int output_width = input_width / block_size; + + TF_LITE_ENSURE_EQ(context, input_height, output_height * block_size); + TF_LITE_ENSURE_EQ(context, input_width, output_width * block_size); + + TfLiteIntArray* output_size = TfLiteIntArrayCreate(4); + output_size->data[0] = input->dims->data[0]; + output_size->data[1] = output_height; + output_size->data[2] = output_width; + output_size->data[3] = input->dims->data[3] * block_size * block_size; + + return context->ResizeTensor(context, output, output_size); +} + +template +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + +#define TF_LITE_SPACE_TO_DEPTH(type, scalar) \ + type::SpaceToDepth( \ + GetTensorData(input), GetTensorDims(input), params->block_size, \ + GetTensorData(output), GetTensorDims(output)) + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: + if (kernel_type == kReference) { + TF_LITE_SPACE_TO_DEPTH(reference_ops, float); + } else { + TF_LITE_SPACE_TO_DEPTH(optimized_ops, float); + } + break; + case kTfLiteUInt8: + if (kernel_type == kReference) { + TF_LITE_SPACE_TO_DEPTH(reference_ops, uint8_t); + } else { + TF_LITE_SPACE_TO_DEPTH(optimized_ops, uint8_t); + } + break; + case kTfLiteInt32: + if (kernel_type == kReference) { + TF_LITE_SPACE_TO_DEPTH(reference_ops, int32_t); + } else { + TF_LITE_SPACE_TO_DEPTH(optimized_ops, int32_t); + } + break; + case kTfLiteInt64: + if (kernel_type == kReference) { + TF_LITE_SPACE_TO_DEPTH(reference_ops, int64_t); + } else { + TF_LITE_SPACE_TO_DEPTH(optimized_ops, int64_t); + } + break; + default: + context->ReportError(context, "Type not currently supported."); + return kTfLiteError; + } +#undef TF_LITE_SPACE_TO_DEPTH + + return kTfLiteOk; +} + +} // namespace space_to_depth + +TfLiteRegistration* Register_SPACE_TO_DEPTH_REF() { + static TfLiteRegistration r = { + nullptr, nullptr, space_to_depth::Prepare, + space_to_depth::Eval}; + return &r; +} + +TfLiteRegistration* Register_SPACE_TO_DEPTH_GENERIC_OPT() { + static TfLiteRegistration r = { + nullptr, nullptr, space_to_depth::Prepare, + space_to_depth::Eval}; + return &r; +} + +TfLiteRegistration* Register_SPACE_TO_DEPTH() { + return Register_SPACE_TO_DEPTH_GENERIC_OPT(); +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/space_to_depth_test.cc b/tensorflow/contrib/lite/kernels/space_to_depth_test.cc new file mode 100644 index 0000000000..911f08a92c --- /dev/null +++ b/tensorflow/contrib/lite/kernels/space_to_depth_test.cc @@ -0,0 +1,102 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAre; +using ::testing::ElementsAreArray; + +class SpaceToDepthOpModel : public SingleOpModel { + public: + SpaceToDepthOpModel(const TensorData& tensor_data, int block_size) { + input_ = AddInput(tensor_data); + output_ = AddOutput(tensor_data); + SetBuiltinOp(BuiltinOperator_SPACE_TO_DEPTH, + BuiltinOptions_SpaceToDepthOptions, + CreateSpaceToDepthOptions(builder_, block_size).Union()); + BuildInterpreter({GetShape(input_)}); + } + + template + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); + } + template + std::vector GetOutput() { + return ExtractVector(output_); + } + std::vector GetOutputShape() { return GetTensorShape(output_); } + + private: + int input_; + int output_; +}; + +TEST(SpaceToDepthOpModel, BadBlockSize) { + EXPECT_DEATH(SpaceToDepthOpModel({TensorType_FLOAT32, {1, 2, 2, 1}}, 3), + "Cannot allocate tensors"); +} + +TEST(SpaceToDepthOpModel, Float32) { + SpaceToDepthOpModel m({TensorType_FLOAT32, {1, 2, 2, 2}}, 2); + m.SetInput({1.4, 2.3, 3.2, 4.1, 5.4, 6.3, 7.2, 8.1}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray({1.4, 2.3, 3.2, 4.1, 5.4, 6.3, 7.2, 8.1})); + EXPECT_THAT(m.GetOutputShape(), ElementsAre(1, 1, 1, 8)); +} + +TEST(SpaceToDepthOpModel, Uint8) { + SpaceToDepthOpModel m({TensorType_UINT8, {1, 2, 2, 1}}, 2); + m.SetInput({1, 2, 3, 4}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 2, 3, 4})); + EXPECT_THAT(m.GetOutputShape(), ElementsAre(1, 1, 1, 4)); +} + +TEST(SpaceToDepthOpModel, Int32) { + SpaceToDepthOpModel m({TensorType_INT32, {1, 2, 2, 3}}, 2); + m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12})); + EXPECT_THAT(m.GetOutputShape(), ElementsAre(1, 1, 1, 12)); +} + +TEST(SpaceToDepthOpModel, Int64) { + SpaceToDepthOpModel m({TensorType_INT64, {1, 4, 4, 1}}, 2); + m.SetInput({1, 2, 5, 6, 3, 4, 7, 8, 9, 10, 13, 14, 11, 12, 15, 16}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray( + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16})); + EXPECT_THAT(m.GetOutputShape(), ElementsAre(1, 2, 2, 4)); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/svdf.cc b/tensorflow/contrib/lite/kernels/svdf.cc new file mode 100644 index 0000000000..dd414d53bd --- /dev/null +++ b/tensorflow/contrib/lite/kernels/svdf.cc @@ -0,0 +1,224 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/activation_functor.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace svdf { + +constexpr int kInputTensor = 0; +constexpr int kWeightsFeatureTensor = 1; +constexpr int kWeightsTimeTensor = 2; +constexpr int kBiasTensor = 3; +constexpr int kStateTensor = 0; +constexpr int KOutputTensor = 1; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + auto* scratch_tensor_index = new int; + context->AddTensors(context, 1, scratch_tensor_index); + return scratch_tensor_index; +} + +void Free(TfLiteContext* context, void* buffer) { + delete reinterpret_cast(buffer); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + int* scratch_tensor_index = reinterpret_cast(node->user_data); + + // Check we have all the inputs and outputs we need. + TF_LITE_ENSURE_EQ(context, node->inputs->size, 4); + TF_LITE_ENSURE_EQ(context, node->outputs->size, 2); + + TfLiteTensor* input = &context->tensors[node->inputs->data[kInputTensor]]; + TfLiteTensor* weights_feature = + &context->tensors[node->inputs->data[kWeightsFeatureTensor]]; + TfLiteTensor* weights_time = + &context->tensors[node->inputs->data[kWeightsTimeTensor]]; + + // Check all the parameters of tensor match within themselves and match the + // input configuration. + const int rank = params->rank; + const int batch_size = input->dims->data[0]; + const int num_filters = weights_feature->dims->data[0]; + TF_LITE_ASSERT_EQ(num_filters % rank, 0); + const int num_units = num_filters / rank; + const int memory_size = weights_time->dims->data[1]; + TF_LITE_ASSERT_EQ(input->dims->data[1], weights_feature->dims->data[1]); + TF_LITE_ASSERT_EQ(weights_time->dims->data[0], num_filters); + + TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); + if (bias) { + TF_LITE_ASSERT_EQ(bias->dims->data[0], num_units); + } + + TfLiteTensor* state = &context->tensors[node->outputs->data[kStateTensor]]; + TfLiteTensor* output = &context->tensors[node->outputs->data[KOutputTensor]]; + + // Resize state. + // For each batch, the state is a 2-D tensor: memory_size * num_filters + // The left most column is used to save current cycle activation. + // The right most column is used to save temporary output which will be + // reduced to num_units outputs. + TfLiteIntArray* state_size_array = TfLiteIntArrayCreate(2); + state_size_array->data[0] = batch_size; + state_size_array->data[1] = memory_size * num_filters; + TF_LITE_ENSURE_OK(context, + context->ResizeTensor(context, state, state_size_array)); + + // Mark state as a persistent tensor. + state->allocation_type = kTfLiteArenaRwPersistent; + + // Resize output. + TfLiteIntArray* output_size_array = TfLiteIntArrayCreate(2); + output_size_array->data[0] = batch_size; + output_size_array->data[1] = num_units; + TF_LITE_ENSURE_OK(context, + context->ResizeTensor(context, output, output_size_array)); + + // Resize scratch. + TfLiteIntArrayFree(node->temporaries); + node->temporaries = TfLiteIntArrayCreate(1); + node->temporaries->data[0] = *scratch_tensor_index; + + TfLiteIntArray* scratch_size_array = TfLiteIntArrayCreate(2); + scratch_size_array->data[0] = batch_size; + scratch_size_array->data[1] = num_filters; + + TfLiteTensor* scratch_tensor = &context->tensors[node->temporaries->data[0]]; + scratch_tensor->type = input->type; + scratch_tensor->allocation_type = kTfLiteArenaRw; + TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scratch_tensor, + scratch_size_array)); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + + TfLiteTensor* input = &context->tensors[node->inputs->data[kInputTensor]]; + TfLiteTensor* weights_feature = + &context->tensors[node->inputs->data[kWeightsFeatureTensor]]; + TfLiteTensor* weights_time = + &context->tensors[node->inputs->data[kWeightsTimeTensor]]; + + TfLiteTensor* state = &context->tensors[node->outputs->data[kStateTensor]]; + TfLiteTensor* output = &context->tensors[node->outputs->data[KOutputTensor]]; + TfLiteTensor* scratch = &context->tensors[node->temporaries->data[0]]; + + TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); + + const int rank = params->rank; + const int batch_size = input->dims->data[0]; + const int input_size = input->dims->data[1]; + const int num_filters = weights_feature->dims->data[0]; + const int num_units = num_filters / rank; + const int memory_size = weights_time->dims->data[1]; + + // Clear the activation (state left most column). + // TODO(ghodrat): Add a test which initialize state with invalid values in + // left most column and make sure it passes. + for (int b = 0; b < batch_size; b++) { + float* state_ptr_batch = state->data.f + b * memory_size * num_filters; + for (int c = 0; c < num_filters; c++) { + float* state_ptr = state_ptr_batch + c * memory_size; + state_ptr[memory_size - 1] = 0.0; + } + } + + // Compute conv1d(inputs, weights_feature). + // The state left most column is used to save current cycle activation. This + // is achieved by starting at state->data.f[memory_size - 1] and having the + // stride equal to memory_size. + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + weights_feature->data.f, num_filters, input_size, input->data.f, + batch_size, &state->data.f[memory_size - 1], memory_size); + + // Compute matmul(state, weights_time). + // The right most column is used to save temporary output (with the size of + // num_filters). This is achieved by starting at state->data.f and having the + // stride equal to memory_size. + for (int b = 0; b < batch_size; b++) { + float* state_ptr_batch = state->data.f + b * memory_size * num_filters; + float* scratch_ptr_batch = scratch->data.f + b * num_filters; + tensor_utils::BatchVectorBatchVectorDotProduct( + weights_time->data.f, state_ptr_batch, memory_size, num_filters, + scratch_ptr_batch, /*result_stride=*/1); + } + + // Initialize output with bias if provided. + if (bias) { + tensor_utils::VectorBatchVectorAssign(bias->data.f, num_units, batch_size, + output->data.f); + } else { + tensor_utils::ZeroVector(output->data.f, batch_size * num_units); + } + + // Reduction sum + // TODO(ghodrat): Consider not reusing state for the temporary output, this + // way ReductionSum operates on row-vector instead of column vector. + for (int b = 0; b < batch_size; b++) { + float* output_ptr_batch = output->data.f + b * num_units; + float* scratch_ptr_batch = scratch->data.f + b * num_filters; + tensor_utils::ReductionSumVector(scratch_ptr_batch, output_ptr_batch, + num_units, rank); + } + + // Apply activation. + for (int b = 0; b < batch_size; b++) { + float* output_ptr_batch = output->data.f + b * num_units; + tensor_utils::ApplyActivationToVector(output_ptr_batch, num_units, + params->activation, output_ptr_batch); + } + + // Right shift the state. + for (int b = 0; b < batch_size; b++) { + float* state_ptr_batch = state->data.f + b * memory_size * num_filters; + for (int f = 0; f < num_filters; f++) { + tensor_utils::VectorShiftLeft(state_ptr_batch, memory_size, + /*shift_value=*/0.0); + state_ptr_batch += memory_size; + } + } + return kTfLiteOk; +} + +} // namespace svdf + +TfLiteRegistration* Register_SVDF() { + static TfLiteRegistration r = {svdf::Init, svdf::Free, svdf::Prepare, + svdf::Eval}; + return &r; +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/svdf_test.cc b/tensorflow/contrib/lite/kernels/svdf_test.cc new file mode 100644 index 0000000000..d956025e9d --- /dev/null +++ b/tensorflow/contrib/lite/kernels/svdf_test.cc @@ -0,0 +1,312 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Unit test for TFLite SVDF op. + +#include +#include + +#include +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +static float svdf_input[] = { + 0.12609188, -0.46347019, -0.89598465, + 0.35867718, 0.36897406, 0.73463392, + + 0.14278367, -1.64410412, -0.75222826, + -0.57290924, 0.12729003, 0.7567004, + + 0.49837467, 0.19278903, 0.26584083, + 0.17660543, 0.52949083, -0.77931279, + + -0.11186574, 0.13164264, -0.05349274, + -0.72674477, -0.5683046, 0.55900657, + + -0.68892461, 0.37783599, 0.18263303, + -0.63690937, 0.44483393, -0.71817774, + + -0.81299269, -0.86831826, 1.43940818, + -0.95760226, 1.82078898, 0.71135032, + + -1.45006323, -0.82251364, -1.69082689, + -1.65087092, -1.89238167, 1.54172635, + + 0.03966608, -0.24936394, -0.77526885, + 2.06740379, -1.51439476, 1.43768692, + + 0.11771342, -0.23761693, -0.65898693, + 0.31088525, -1.55601168, -0.87661445, + + -0.89477462, 1.67204106, -0.53235275, + -0.6230064, 0.29819036, 1.06939757, +}; + +static float svdf_golden_output_rank_1[] = { + 0.014899, -0.0517661, -0.143725, -0.00271883, + -0.03004015, 0.09565311, 0.1587342, 0.00784263, + + 0.068281, -0.162217, -0.152268, 0.00323521, + 0.01582633, 0.03858774, -0.03001583, -0.02671271, + + -0.0317821, -0.0333089, 0.0609602, 0.0333759, + -0.01432795, 0.05524484, 0.1101355, -0.02382665, + + -0.00623099, -0.077701, -0.391193, -0.0136691, + -0.02333033, 0.02293761, 0.12338032, 0.04326871, + + 0.201551, -0.164607, -0.179462, -0.0592739, + 0.01064911, -0.17503069, 0.07821996, -0.00224009, + + 0.0886511, -0.0875401, -0.269283, 0.0281379, + -0.02282338, 0.09741908, 0.32973239, 0.12281385, + + -0.201174, -0.586145, -0.628624, -0.0330412, + 0.24780814, -0.39304617, -0.22473189, 0.02589256, + + -0.0839096, -0.299329, 0.108746, 0.109808, + 0.10084175, -0.06416984, 0.28936723, 0.0026358, + + 0.419114, -0.237824, -0.422627, 0.175115, + -0.2314795, -0.18584411, -0.4228974, -0.12928449, + + 0.36726, -0.522303, -0.456502, -0.175475, + 0.17012937, -0.34447709, 0.38505614, -0.28158101, +}; + +static float svdf_golden_output_rank_2[] = { + -0.09623547, -0.10193135, 0.11083051, -0.0347917, + 0.1141196, 0.12965347, -0.12652366, 0.01007236, + + -0.16396809, -0.21247184, 0.11259045, -0.04156673, + 0.10132131, -0.06143532, -0.00924693, 0.10084561, + + 0.01257364, 0.0506071, -0.19287863, -0.07162561, + -0.02033747, 0.22673416, 0.15487903, 0.02525555, + + -0.1411963, -0.37054959, 0.01774767, 0.05867489, + 0.09607603, -0.0141301, -0.08995658, 0.12867066, + + -0.27142537, -0.16955489, 0.18521598, -0.12528358, + 0.00331409, 0.11167502, 0.02218599, -0.07309391, + + 0.09593632, -0.28361851, -0.0773851, 0.17199151, + -0.00075242, 0.33691186, -0.1536046, 0.16572715, + + -0.27916506, -0.27626723, 0.42615682, 0.3225764, + -0.37472126, -0.55655634, -0.05013514, 0.289112, + + -0.24418658, 0.07540751, -0.1940318, -0.08911639, + 0.00732617, 0.46737891, 0.26449674, 0.24888524, + + -0.17225097, -0.54660404, -0.38795233, 0.08389944, + 0.07736043, -0.28260678, 0.15666828, 1.14949894, + + -0.57454878, -0.64704704, 0.73235172, -0.34616736, + 0.21120001, -0.22927976, 0.02455296, -0.35906726, +}; + +// Derived class of SingleOpModel, which is used to test SVDF TFLite op. +class SVDFOpModel : public SingleOpModel { + public: + SVDFOpModel(int batches, int units, int input_size, int memory_size, int rank) + : batches_(batches), + units_(units), + input_size_(input_size), + memory_size_(memory_size), + rank_(rank) { + input_ = AddInput(TensorType_FLOAT32); + weights_feature_ = AddInput(TensorType_FLOAT32); + weights_time_ = AddInput(TensorType_FLOAT32); + bias_ = AddNullInput(); + state_ = AddOutput(TensorType_FLOAT32); + output_ = AddOutput(TensorType_FLOAT32); + SetBuiltinOp( + BuiltinOperator_SVDF, BuiltinOptions_SVDFOptions, + CreateSVDFOptions(builder_, rank, ActivationFunctionType_NONE).Union()); + BuildInterpreter({ + {batches_, input_size_}, // Input tensor + {units_ * rank, input_size_}, // weights_feature tensor + {units_ * rank, memory_size_}, // weights_time tensor + {units_} // bias tensor + }); + } + + // Populates the weights_feature tensor. + void SetWeightsFeature(std::initializer_list f) { + PopulateTensor(weights_feature_, f); + } + + // Populates the weights_time tensor. + void SetWeightsTime(std::initializer_list f) { + PopulateTensor(weights_time_, f); + } + + // Populates the input tensor. + void SetInput(int offset, float* begin, float* end) { + PopulateTensor(input_, offset, begin, end); + } + + // Resets the state of SVDF op by filling it with 0's. + void ResetState() { + const int zero_buffer_size = rank_ * units_ * batches_ * memory_size_; + std::unique_ptr zero_buffer(new float[zero_buffer_size]); + memset(zero_buffer.get(), 0, zero_buffer_size * sizeof(float)); + PopulateTensor(state_, 0, zero_buffer.get(), + zero_buffer.get() + zero_buffer_size); + } + + // Extracts the output tensor from the SVDF op. + std::vector GetOutput() { return ExtractVector(output_); } + + int input_size() { return input_size_; } + int num_units() { return units_; } + int num_batches() { return batches_; } + + private: + int input_; + int weights_feature_; + int weights_time_; + int bias_; + int state_; + int output_; + + int batches_; + int units_; + int input_size_; + int memory_size_; + int rank_; +}; + +TEST(SVDFOpTest, BlackBoxTestRank1) { + SVDFOpModel svdf(/*batches=*/2, /*units=*/4, /*input_size=*/3, + /*memory_size=*/10, /*rank=*/1); + svdf.SetWeightsFeature({-0.31930989, -0.36118156, 0.0079667, 0.37613347, + 0.22197971, 0.12416199, 0.27901134, 0.27557442, + 0.3905206, -0.36137494, -0.06634006, -0.10640851}); + + svdf.SetWeightsTime( + {-0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657}); + + svdf.ResetState(); + const int svdf_num_batches = svdf.num_batches(); + const int svdf_input_size = svdf.input_size(); + const int svdf_num_units = svdf.num_units(); + const int input_sequence_size = + sizeof(svdf_input) / sizeof(float) / (svdf_input_size * svdf_num_batches); + // Going over each input batch, setting the input tensor, invoking the SVDF op + // and checking the output with the expected golden values. + for (int i = 0; i < input_sequence_size; i++) { + float* batch_start = svdf_input + i * svdf_input_size * svdf_num_batches; + float* batch_end = batch_start + svdf_input_size * svdf_num_batches; + svdf.SetInput(0, batch_start, batch_end); + + svdf.Invoke(); + + float* golden_start = + svdf_golden_output_rank_1 + i * svdf_num_units * svdf_num_batches; + float* golden_end = golden_start + svdf_num_units * svdf_num_batches; + std::vector expected; + expected.insert(expected.end(), golden_start, golden_end); + + EXPECT_THAT(svdf.GetOutput(), ElementsAreArray(ArrayFloatNear(expected))); + } +} + +TEST(SVDFOpTest, BlackBoxTestRank2) { + SVDFOpModel svdf(/*batches=*/2, /*units=*/4, /*input_size=*/3, + /*memory_size=*/10, /*rank=*/2); + svdf.SetWeightsFeature({-0.31930989, 0.0079667, 0.39296314, 0.37613347, + 0.12416199, 0.15785322, 0.27901134, 0.3905206, + 0.21931258, -0.36137494, -0.10640851, 0.31053296, + -0.36118156, -0.0976817, -0.36916667, 0.22197971, + 0.15294972, 0.38031587, 0.27557442, 0.39635518, + -0.21580373, -0.06634006, -0.02702999, 0.27072677}); + + svdf.SetWeightsTime( + {-0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657, + + -0.14884081, 0.19931212, -0.36002168, 0.34663299, -0.11405486, + 0.12672701, 0.39463779, -0.07886535, -0.06384811, 0.08249187, + + -0.26816407, -0.19905911, 0.29211238, 0.31264046, -0.28664589, + 0.05698794, 0.11613581, 0.14078894, 0.02187902, -0.21781836, + + -0.15567942, 0.08693647, -0.38256618, 0.36580828, -0.22922277, + -0.0226903, 0.12878349, -0.28122205, -0.10850525, -0.11955214, + + 0.27179423, -0.04710215, 0.31069002, 0.22672787, 0.09580326, + 0.08682203, 0.1258215, 0.1851041, 0.29228821, 0.12366763}); + + svdf.ResetState(); + const int svdf_num_batches = svdf.num_batches(); + const int svdf_input_size = svdf.input_size(); + const int svdf_num_units = svdf.num_units(); + const int input_sequence_size = + sizeof(svdf_input) / sizeof(float) / (svdf_input_size * svdf_num_batches); + // Going over each input batch, setting the input tensor, invoking the SVDF op + // and checking the output with the expected golden values. + for (int i = 0; i < input_sequence_size; i++) { + float* batch_start = svdf_input + i * svdf_input_size * svdf_num_batches; + float* batch_end = batch_start + svdf_input_size * svdf_num_batches; + svdf.SetInput(0, batch_start, batch_end); + + svdf.Invoke(); + + float* golden_start = + svdf_golden_output_rank_2 + i * svdf_num_units * svdf_num_batches; + float* golden_end = golden_start + svdf_num_units * svdf_num_batches; + std::vector expected; + expected.insert(expected.end(), golden_start, golden_end); + + EXPECT_THAT(svdf.GetOutput(), ElementsAreArray(ArrayFloatNear(expected))); + } +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/test_util.cc b/tensorflow/contrib/lite/kernels/test_util.cc new file mode 100644 index 0000000000..f716ba8741 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/test_util.cc @@ -0,0 +1,183 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/kernels/test_util.h" + +#include "tensorflow/contrib/lite/version.h" +#include "tensorflow/core/platform/logging.h" + +namespace tflite { + +using ::testing::FloatNear; +using ::testing::Matcher; + +namespace { +template +std::pair QuantizationParams(float f_min, float f_max) { + // These are required by many quantized operations. + CHECK_LE(f_min, 0); + CHECK_GE(f_max, 0); + T q_min = std::numeric_limits::min(); + T q_max = std::numeric_limits::max(); + float range = q_max - q_min; + float scale = (f_max - f_min) / range; + int32_t zero_point = std::min( + q_max, + std::max(q_min, static_cast(std::round(q_min - f_min / scale)))); + return {scale, zero_point}; +} +} // namespace + +std::vector> ArrayFloatNear(const std::vector& values, + float max_abs_error) { + std::vector> matchers; + matchers.reserve(values.size()); + for (const float& v : values) { + matchers.emplace_back(FloatNear(v, max_abs_error)); + } + return matchers; +} + +int SingleOpModel::AddTensor(TensorData t) { + int id = tensors_.size(); + + // This is slightly different depending on whether we are adding a + // quantized or a regular tensor. + bool is_quantized = (t.min != 0 || t.max != 0 || t.scale != 0); + + flatbuffers::Offset q_params = 0; + + if (is_quantized) { + if (t.min != 0 || t.max != 0) { + if (t.type == TensorType_UINT8) { + std::tie(t.scale, t.zero_point) = + QuantizationParams(t.min, t.max); + } else if (t.type == TensorType_INT32) { + std::tie(t.scale, t.zero_point) = + QuantizationParams(t.min, t.max); + } else { + LOG(FATAL) << "No support for the requested quantized type"; + } + t.min = 0; + t.max = 0; + } + + q_params = CreateQuantizationParameters( + builder_, /*min=*/0, /*max=*/0, builder_.CreateVector({t.scale}), + builder_.CreateVector({t.zero_point})); + } + + tensors_.push_back(CreateTensor(builder_, builder_.CreateVector({}), + t.type, /*buffer=*/0, + /*name=*/0, q_params)); + + tensor_data_[id] = t; + + return id; +} + +int SingleOpModel::AddInput(const TensorData& t) { + int id = AddTensor(t); + inputs_.push_back(id); + return id; +} + +int SingleOpModel::AddNullInput() { + int id = kOptionalTensor; + inputs_.push_back(id); + return id; +} + +int SingleOpModel::AddOutput(const TensorData& t) { + int id = AddTensor(t); + outputs_.push_back(id); + return id; +} + +void SingleOpModel::SetBuiltinOp(BuiltinOperator type, + BuiltinOptions builtin_options_type, + flatbuffers::Offset builtin_options) { + opcodes_.push_back(CreateOperatorCode(builder_, type, 0)); + operators_.push_back(CreateOperator( + builder_, /*opcode_index=*/0, builder_.CreateVector(inputs_), + builder_.CreateVector(outputs_), builtin_options_type, + builtin_options, + /*custom_options=*/0, CustomOptionsFormat_FLEXBUFFERS)); +} + +void SingleOpModel::SetCustomOp( + const string& name, const std::vector& custom_option, + const std::function& registeration) { + custom_registrations_[name] = registeration; + opcodes_.push_back( + CreateOperatorCodeDirect(builder_, BuiltinOperator_CUSTOM, name.data())); + operators_.push_back(CreateOperator( + builder_, /*opcode_index=*/0, builder_.CreateVector(inputs_), + builder_.CreateVector(outputs_), BuiltinOptions_NONE, 0, + builder_.CreateVector(custom_option), + CustomOptionsFormat_FLEXBUFFERS)); +} + +void SingleOpModel::BuildInterpreter( + std::vector> input_shapes) { + auto opcodes = builder_.CreateVector(opcodes_); + auto operators = builder_.CreateVector(operators_); + auto tensors = builder_.CreateVector(tensors_); + auto inputs = builder_.CreateVector(inputs_); + auto outputs = builder_.CreateVector(outputs_); + // Create a single subgraph + std::vector> subgraphs; + auto subgraph = CreateSubGraph(builder_, tensors, inputs, outputs, operators); + subgraphs.push_back(subgraph); + auto subgraphs_flatbuffer = builder_.CreateVector(subgraphs); + + std::vector> buffers_vec; + auto buffers = builder_.CreateVector(buffers_vec); + auto description = builder_.CreateString("programmatic model"); + builder_.Finish(CreateModel(builder_, TFLITE_SCHEMA_VERSION, opcodes, + subgraphs_flatbuffer, description, buffers)); + + auto* model = GetModel(builder_.GetBufferPointer()); + + ops::builtin::BuiltinOpResolver builtins; + for (const auto& reg : custom_registrations_) { + builtins.AddCustom(reg.first.data(), reg.second()); + } + InterpreterBuilder(model, builtins)(&interpreter_); + + CHECK(interpreter_ != nullptr); + + int i = 0; + for (const auto& shape : input_shapes) { + int input_idx = interpreter_->inputs()[i++]; + if (input_idx == kOptionalTensor) continue; + CHECK(interpreter_->ResizeInputTensor(input_idx, shape) == kTfLiteOk); + } + CHECK(interpreter_->AllocateTensors() == kTfLiteOk) + << "Cannot allocate tensors"; +} + +void SingleOpModel::Invoke() { CHECK(interpreter_->Invoke() == kTfLiteOk); } + +int32_t SingleOpModel::GetTensorSize(int index) const { + TfLiteTensor* t = interpreter_->tensor(index); + CHECK(t); + int total_size = 1; + for (int i = 0; i < t->dims->size; ++i) { + total_size *= t->dims->data[i]; + } + return total_size; +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/test_util.h b/tensorflow/contrib/lite/kernels/test_util.h new file mode 100644 index 0000000000..e68e494661 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/test_util.h @@ -0,0 +1,202 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_TEST_UTIL_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_TEST_UTIL_H_ + +#include + +#include +#include + +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/string_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace tflite { + +inline void LogToStderr() { +#ifdef PLATFORM_GOOGLE + FLAGS_logtostderr = true; +#endif +} + +// A gmock matcher that check that elements of a float vector match to a given +// tolerance. +std::vector<::testing::Matcher> ArrayFloatNear( + const std::vector& values, float max_abs_error = 1e-5); + +template +inline std::vector Quantize(const std::vector& data, float scale, + int32_t zero_point) { + std::vector q; + for (float f : data) { + q.push_back(std::max( + std::numeric_limits::min(), + std::min(std::numeric_limits::max(), + static_cast(std::round(zero_point + (f / scale)))))); + } + return q; +} + +template +inline std::vector Dequantize(const std::vector& data, float scale, + int32_t zero_point) { + std::vector f; + for (T q : data) { + f.push_back(scale * (q - zero_point)); + } + return f; +} + +// A test model that contains a single operator. All operator inputs and +// output are external to the model, so the tests can directly access them. +// Typical usage: +// SingleOpModel m; +// int a = m.AddInput({TensorType_FLOAT32, a_shape}); +// int b = m.AddInput({TensorType_FLOAT32, b_shape}); +// int c = m.AddOutput({TensorType_FLOAT32, {}}); +// m.SetBuiltinOp(...); +// m.BuildInterpreter({GetShape(a), GetShape(b)}); +// m.PopulateTensor(a, {...}); +// m.PopulateTensor(b, {...}); +// m.Invoke(); +// EXPECT_THAT(m.ExtractVector(c), ArrayFloatNear({...})); +// + +// A helper struct to construct test tensors. This is particularly useful for +// quantized tensor which must have their scale and zero_point defined before +// the actual data is known. This mimics what happens in practice: quantization +// parameters are calculate during training. +struct TensorData { + TensorType type; + std::vector shape; + float min; + float max; + float scale; + int32_t zero_point; +}; + +class SingleOpModel { + public: + SingleOpModel() {} + ~SingleOpModel() {} + + // Copying or assignment is disallowed to simplify ownership semantics. + SingleOpModel(const SingleOpModel&) = delete; + SingleOpModel& operator=(const SingleOpModel&) = delete; + + // Add a TensorType input tensor and return its index. + int AddInput(TensorType type) { return AddInput(TensorData{type}); } + int AddInput(const TensorData& t); + + // Add a null input tensor (optional input) and return kOptionalTensor. + int AddNullInput(); + + // Add a TensorType output tensor and return its index. + int AddOutput(TensorType type) { return AddOutput(TensorData{type}); } + int AddOutput(const TensorData& t); + + template + void QuantizeAndPopulate(int index, std::initializer_list data) { + TfLiteTensor* t = interpreter_->tensor(index); + auto q = Quantize(data, t->params.scale, t->params.zero_point); + PopulateTensor(index, 0, q.data(), q.data() + q.size()); + } + + const std::vector& GetShape(int id) { return tensor_data_.at(id).shape; } + + float GetScale(int id) { return tensor_data_.at(id).scale; } + int32_t GetZeroPoint(int id) { return tensor_data_.at(id).zero_point; } + + // Define the operator in this model. + void SetBuiltinOp(BuiltinOperator type, BuiltinOptions builtin_options_type, + flatbuffers::Offset builtin_options); + void SetCustomOp(const string& name, + const std::vector& custom_option, + const std::function& registeration); + + // Build the interpreter for this model. Also, resize and allocate all + // tensors given the shapes of the inputs. + void BuildInterpreter(std::vector> input_shapes); + + void Invoke(); + + void PopulateStringTensor(int index, const std::vector& content) { + auto tensor = interpreter_->tensor(index); + DynamicBuffer buf; + for (const string& s : content) { + buf.AddString(s.data(), s.length()); + } + buf.WriteToTensor(tensor); + } + + // Populate the tensor given its index. + template + void PopulateTensor(int index, std::initializer_list data) { + T* v = interpreter_->typed_tensor(index); + CHECK(v) << "No tensor with index '" << index << "'."; + for (T f : data) { + *v = f; + ++v; + } + } + + // Partially populate the tensor, starting at the given offset. + template + void PopulateTensor(int index, int offset, T* begin, T* end) { + T* v = interpreter_->typed_tensor(index); + memcpy(v + offset, begin, (end - begin) * sizeof(T)); + } + + // Return a vector with the flattened contents of a tensor. + template + std::vector ExtractVector(int index) { + T* v = interpreter_->typed_tensor(index); + CHECK(v); + return std::vector(v, v + GetTensorSize(index)); + } + + std::vector GetTensorShape(int index) { + std::vector result; + TfLiteTensor* t = interpreter_->tensor(index); + for (int i = 0; i < t->dims->size; ++i) { + result.push_back(t->dims->data[i]); + } + return result; + } + + protected: + int32_t GetTensorSize(int index) const; + + flatbuffers::FlatBufferBuilder builder_; + std::unique_ptr interpreter_; + + private: + int AddTensor(TensorData t); + + std::map tensor_data_; + std::vector inputs_; + std::vector outputs_; + std::vector> tensors_; + std::vector> opcodes_; + std::vector> operators_; + std::map> custom_registrations_; +}; + +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_TEST_UTIL_H_ diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc new file mode 100644 index 0000000000..f8208f6f98 --- /dev/null +++ b/tensorflow/contrib/lite/model.cc @@ -0,0 +1,673 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/allocation.h" +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/error_reporter.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/nnapi_delegate.h" +#include "tensorflow/contrib/lite/version.h" + +namespace tflite { + +const char* kEmptyTensorName = ""; + +std::unique_ptr FlatBufferModel::BuildFromFile( + const char* filename, ErrorReporter* error_reporter) { + std::unique_ptr model; + model.reset(new FlatBufferModel(filename, /*mmap_file=*/true, error_reporter, + /*use_nnapi=*/true)); + if (!model->initialized()) model.reset(); + return model; +} + +std::unique_ptr FlatBufferModel::BuildFromBuffer( + const char* buffer, size_t buffer_size, ErrorReporter* error_reporter) { + std::unique_ptr model; + model.reset(new FlatBufferModel(buffer, buffer_size, error_reporter)); + if (!model->initialized()) model.reset(); + return model; +} + +FlatBufferModel::FlatBufferModel(const char* filename, bool mmap_file, + ErrorReporter* error_reporter, bool use_nnapi) + : error_reporter_(error_reporter ? error_reporter + : DefaultErrorReporter()) { + if (mmap_file) { + if (use_nnapi && NNAPIExists()) + allocation_ = new NNAPIAllocation(filename, error_reporter); + else + allocation_ = new MMAPAllocation(filename, error_reporter); + } else { + allocation_ = new FileCopyAllocation(filename, error_reporter); + } + if (!allocation_->valid()) return; + if (!CheckModelIdentifier()) return; + + model_ = ::tflite::GetModel(allocation_->base()); +} + +bool FlatBufferModel::CheckModelIdentifier() const { + if (!tflite::ModelBufferHasIdentifier(allocation_->base())) { + const char* ident = flatbuffers::GetBufferIdentifier(allocation_->base()); + error_reporter_->Report( + "Model provided has model identifier '%c%c%c%c', should be '%s'\n", + ident[0], ident[1], ident[2], ident[3], tflite::ModelIdentifier()); + return false; + } + return true; +} + +FlatBufferModel::FlatBufferModel(const char* ptr, size_t num_bytes, + ErrorReporter* error_reporter) + : error_reporter_(error_reporter ? error_reporter + : DefaultErrorReporter()) { + allocation_ = new MemoryAllocation(ptr, num_bytes, error_reporter); + if (!allocation_->valid()) return; + model_ = ::tflite::GetModel(allocation_->base()); +} + +FlatBufferModel::~FlatBufferModel() { delete allocation_; } + +InterpreterBuilder::InterpreterBuilder(const FlatBufferModel& model, + const OpResolver& op_resolver) + : model_(model.GetModel()), + op_resolver_(op_resolver), + error_reporter_(model.error_reporter()), + allocation_(model.allocation()) {} + +InterpreterBuilder::InterpreterBuilder(const ::tflite::Model* model, + const OpResolver& op_resolver, + ErrorReporter* error_reporter) + : model_(model), + op_resolver_(op_resolver), + error_reporter_(error_reporter ? error_reporter + : DefaultErrorReporter()) {} + +TfLiteStatus InterpreterBuilder::BuildLocalIndexToRegistrationMapping() { + TfLiteStatus status = kTfLiteOk; + auto opcodes = model_->operator_codes(); + for (const OperatorCode* opcode : *opcodes) { + TfLiteRegistration* registration = nullptr; + + if (opcode->builtin_code() != BuiltinOperator_CUSTOM) { + auto x = opcode->builtin_code(); + flatbuffer_op_index_to_registration_types_.push_back(x); + registration = op_resolver_.FindOp(x); + if (registration == nullptr) { + error_reporter_->Report("Didn't find op for builtin opcode '%s'\n", + EnumNameBuiltinOperator(x)); + status = kTfLiteError; + } + } else if (!opcode->custom_code()) { + error_reporter_->Report( + "Operator with builtin_code==0 has no custom_code.\n"); + status = kTfLiteError; + } else { + const char* name = opcode->custom_code()->c_str(); + registration = op_resolver_.FindOp(name); + flatbuffer_op_index_to_registration_types_.push_back( + BuiltinOperator_CUSTOM); + if (registration == nullptr) { + error_reporter_->Report("Didn't find custom op for name '%s'\n", name); + status = kTfLiteError; + } + } + flatbuffer_op_index_to_registration_.push_back(registration); + } + return status; +} + +namespace { +template +std::vector FlatBufferIntArrayToVector(T* flat_array) { + std::vector ret(flat_array->Length()); + for (int i = 0; i < flat_array->Length(); i++) { + ret[i] = flat_array->Get(i); + } + return ret; +} + +// Allocate a structure using C malloc, but make sure the structure is a +// POD structure that doesn't require constructors to run. The reason we do +// this, is that Interpreter's C extension part will take ownership and wants +// to use malloc() and free(). +template +T* MallocPOD() { + static_assert(std::is_pod::value, "Builtin data structure must be POD."); + return static_cast(malloc(sizeof(T))); +} + +// Parse the appropriate data out of the op. +// +// This handles builtin data explicitly as there are flatbuffer schemas. +// +// Returns memory that must be feed. +void* ParseOpData(const Operator* op, BuiltinOperator op_type, + ErrorReporter* error_reporter) { + auto parse_padding = [](Padding padding) { + switch (padding) { + case Padding_SAME: + return kTfLitePaddingSame; + case Padding_VALID: + return kTfLitePaddingValid; + } + return kTfLitePaddingUnknown; + }; + auto parse_activation = [](ActivationFunctionType activation) { + switch (activation) { + case ActivationFunctionType_NONE: + return kTfLiteActNone; + case ActivationFunctionType_RELU: + return kTfLiteActRelu; + case ActivationFunctionType_RELU1: + return kTfLiteActRelu1; + case ActivationFunctionType_RELU6: + return kTfLiteActRelu6; + case ActivationFunctionType_TANH: + return kTfLiteActTanh; + case ActivationFunctionType_SIGN_BIT: + return kTfLiteActSignBit; + } + return kTfLiteActNone; + }; + auto parseLSHProjectionType = [](LSHProjectionType type) { + switch (type) { + case LSHProjectionType_SPARSE: + return kTfLiteLshProjectionSparse; + case LSHProjectionType_DENSE: + return kTfLiteLshProjectionDense; + default: + return kTfLiteLshProjectionUnknown; + } + }; + auto parseCombinerType = [](CombinerType type) { + switch (type) { + case CombinerType_MEAN: + return kTfLiteCombinerTypeMean; + case CombinerType_SQRTN: + return kTfLiteCombinerTypeSqrtn; + case CombinerType_SUM: + default: + return kTfLiteCombinerTypeSum; + } + }; + + void* builtin_data = nullptr; + switch (op_type) { + case BuiltinOperator_CALL: + // TODO(aselle): Implement call in BuiltinOptions, but nullptrs are + // ok for now, since there is no call implementation either. + break; + case BuiltinOperator_CUSTOM: + break; + case BuiltinOperator_CONV_2D: { + TfLiteConvParams* params = MallocPOD(); + if (auto* conv_params = op->builtin_options_as_Conv2DOptions()) { + params->padding = parse_padding(conv_params->padding()); + params->stride_width = conv_params->stride_w(); + params->stride_height = conv_params->stride_h(); + params->activation = + parse_activation(conv_params->fused_activation_function()); + } + builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_TANH: + case BuiltinOperator_LOGISTIC: + case BuiltinOperator_RELU: + case BuiltinOperator_RELU1: + case BuiltinOperator_RELU6: + case BuiltinOperator_CONCAT_EMBEDDINGS: + break; + case BuiltinOperator_LSH_PROJECTION: { + TfLiteLSHProjectionParams* params = + MallocPOD(); + if (auto* lshParams = op->builtin_options_as_LSHProjectionOptions()) { + params->type = parseLSHProjectionType(lshParams->type()); + } + builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_AVERAGE_POOL_2D: + case BuiltinOperator_MAX_POOL_2D: + case BuiltinOperator_L2_POOL_2D: { + TfLitePoolParams* params = MallocPOD(); + if (auto* pool_params = op->builtin_options_as_Pool2DOptions()) { + params->padding = parse_padding(pool_params->padding()); + params->stride_width = pool_params->stride_w(); + params->stride_height = pool_params->stride_h(); + params->filter_width = pool_params->filter_width(); + params->filter_height = pool_params->filter_height(); + params->activation = + parse_activation(pool_params->fused_activation_function()); + } + builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_DEPTHWISE_CONV_2D: { + TfLiteDepthwiseConvParams* params = + MallocPOD(); + if (auto* conv_params = op->builtin_options_as_DepthwiseConv2DOptions()) { + params->padding = parse_padding(conv_params->padding()); + params->stride_width = conv_params->stride_w(); + params->stride_height = conv_params->stride_h(); + params->depth_multiplier = conv_params->depth_multiplier(); + params->activation = + parse_activation(conv_params->fused_activation_function()); + } + builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_SVDF: { + TfLiteSVDFParams* params = MallocPOD(); + if (auto* svdf_params = op->builtin_options_as_SVDFOptions()) { + params->rank = svdf_params->rank(); + params->activation = + parse_activation(svdf_params->fused_activation_function()); + } + builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_RNN: { + TfLiteRNNParams* params = MallocPOD(); + if (auto* rnn_params = op->builtin_options_as_RNNOptions()) { + params->activation = + parse_activation(rnn_params->fused_activation_function()); + } + builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_EMBEDDING_LOOKUP: + // no-op. + break; + case BuiltinOperator_EMBEDDING_LOOKUP_SPARSE: { + TfLiteEmbeddingLookupSparseParams* params = + MallocPOD(); + if (auto* embedding_params = + op->builtin_options_as_EmbeddingLookupSparseOptions()) { + params->combiner = parseCombinerType(embedding_params->combiner()); + } + builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_FULLY_CONNECTED: { + TfLiteFullyConnectedParams* params = + MallocPOD(); + if (auto* fully_connected_params = + op->builtin_options_as_FullyConnectedOptions()) { + params->activation = parse_activation( + fully_connected_params->fused_activation_function()); + } + builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_HASHTABLE_LOOKUP: + // no-op. + break; + case BuiltinOperator_SOFTMAX: { + TfLiteSoftmaxParams* params = MallocPOD(); + if (auto* softmax_params = op->builtin_options_as_SoftmaxOptions()) { + params->beta = softmax_params->beta(); + } + builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_CONCATENATION: { + TfLiteConcatenationParams* params = + MallocPOD(); + if (auto* concatenation_params = + op->builtin_options_as_ConcatenationOptions()) { + params->activation = + parse_activation(concatenation_params->fused_activation_function()); + params->axis = concatenation_params->axis(); + } + builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_MUL: { + auto* params = MallocPOD(); + if (auto* schema_params = op->builtin_options_as_MulOptions()) { + params->activation = + parse_activation(schema_params->fused_activation_function()); + } + builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_ADD: { + auto* params = MallocPOD(); + if (auto* schema_params = op->builtin_options_as_AddOptions()) { + params->activation = + parse_activation(schema_params->fused_activation_function()); + } + builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_L2_NORMALIZATION: { + auto* params = MallocPOD(); + if (auto* schema_params = op->builtin_options_as_L2NormOptions()) { + params->activation = + parse_activation(schema_params->fused_activation_function()); + } + builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION: { + auto* params = MallocPOD(); + if (auto* schema_params = + op->builtin_options_as_LocalResponseNormalizationOptions()) { + params->radius = schema_params->radius(); + params->bias = schema_params->bias(); + params->alpha = schema_params->alpha(); + params->beta = schema_params->beta(); + } + builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_LSTM: { + TfLiteLSTMParams* params = MallocPOD(); + if (auto* lstm_params = op->builtin_options_as_LSTMOptions()) { + params->activation = + parse_activation(lstm_params->fused_activation_function()); + params->cell_clip = lstm_params->cell_clip(); + params->proj_clip = lstm_params->proj_clip(); + } + builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_RESIZE_BILINEAR: { + auto* params = MallocPOD(); + if (auto* schema_params = + op->builtin_options_as_ResizeBilinearOptions()) { + params->new_height = schema_params->new_height(); + params->new_width = schema_params->new_width(); + } + builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_RESHAPE: { + auto* params = MallocPOD(); + if (auto* schema_params = op->builtin_options_as_ReshapeOptions()) { + auto* new_shape = schema_params->new_shape(); + if (!new_shape) { + error_reporter->Report("No new_shape provided for Reshape\n"); + } else { + params->num_dimensions = new_shape->Length(); + if (params->num_dimensions > sizeof(params->shape) / sizeof(int)) { + error_reporter->Report( + "Found too many dimensions in Reshape's new_shape\n"); + } else { + for (int i = 0; i < params->num_dimensions; ++i) { + params->shape[i] = new_shape->Get(i); + } + } + } + } + builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_SKIP_GRAM: { + TfLiteSkipGramParams* params = MallocPOD(); + if (auto* skip_gram_params = op->builtin_options_as_SkipGramOptions()) { + params->ngram_size = skip_gram_params->ngram_size(); + params->max_skip_size = skip_gram_params->max_skip_size(); + params->include_all_ngrams = skip_gram_params->include_all_ngrams(); + } + builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_SPACE_TO_DEPTH: { + auto* params = MallocPOD(); + if (auto* schema_params = op->builtin_options_as_SpaceToDepthOptions()) { + params->block_size = schema_params->block_size(); + } + builtin_data = reinterpret_cast(params); + break; + } + } + return builtin_data; +} + +} // namespace + +TfLiteStatus InterpreterBuilder::ParseNodes( + const flatbuffers::Vector>* operators, + Interpreter* interpreter) { + TfLiteStatus status = kTfLiteOk; + for (int i = 0; i < operators->Length(); ++i) { + const auto* op = operators->Get(i); + int index = op->opcode_index(); + if (index < 0 || index >= flatbuffer_op_index_to_registration_.size()) { + error_reporter_->Report("Missing registration for opcode_index %d\n", + index); + status = kTfLiteError; + continue; + } + const TfLiteRegistration* reg = + flatbuffer_op_index_to_registration_[op->opcode_index()]; + if (reg == nullptr) { + error_reporter_->Report("Skipping op for opcode_index %d\n", index); + status = kTfLiteError; + continue; + } + + auto op_type = + flatbuffer_op_index_to_registration_types_[op->opcode_index()]; + if (op_type != BuiltinOperator_CUSTOM && op->custom_options()) { + error_reporter_->Report( + "Found builtin operator %s with custom options.\n", + EnumNameBuiltinOperator(op_type)); + } + if (op->custom_options()) { + interpreter->AddNodeWithParameters( + FlatBufferIntArrayToVector(op->inputs()), + FlatBufferIntArrayToVector(op->outputs()), + reinterpret_cast(op->custom_options()->data()), + op->custom_options()->size(), nullptr, reg); + } else { + interpreter->AddNodeWithParameters( + FlatBufferIntArrayToVector(op->inputs()), + FlatBufferIntArrayToVector(op->outputs()), nullptr, 0, + ParseOpData(op, op_type, error_reporter_), reg); + } + } + + return status; +} + +TfLiteStatus InterpreterBuilder::ParseTensors( + const flatbuffers::Vector>* buffers, + const flatbuffers::Vector>* tensors, + Interpreter* interpreter) { + TfLiteStatus status = kTfLiteOk; + + // A little helper to get the names of inputs and outputs. Note that they + // must outlive the interpreter. + auto get_name = [](const tflite::Tensor* t) -> const char* { + auto name = t->name(); + if (name) return name->c_str(); + return kEmptyTensorName; + }; + + for (int i = 0; i < tensors->Length(); ++i) { + const auto* tensor = tensors->Get(i); + std::vector dims = FlatBufferIntArrayToVector(tensor->shape()); + + TfLiteQuantizationParams quantization; + quantization.scale = 0; + quantization.zero_point = 0; + auto* q_params = tensor->quantization(); + if (q_params) { + // Note that the schema could hold per-channel quantization parameters + // but we really only support one value for the whole tensor. + // TODO(aselle): This breaks as well if these are nullptr's. + // TODO(aselle): This assumes non per-channel quantization. + if (q_params->scale()) quantization.scale = q_params->scale()->Get(0); + if (q_params->zero_point()) + quantization.zero_point = q_params->zero_point()->Get(0); + } + + TfLiteType type; + switch (tensor->type()) { + case TensorType_FLOAT32: + type = kTfLiteFloat32; + break; + case TensorType_INT32: + type = kTfLiteInt32; + break; + case TensorType_UINT8: + type = kTfLiteUInt8; + break; + case TensorType_INT64: + type = kTfLiteInt64; + break; + case TensorType_STRING: + type = kTfLiteString; + break; + default: + // tensorType = ArrayType::NONE; + error_reporter_->Report("Unimplemented data type %s (%d) in tensor\n", + EnumNameTensorType(tensor->type()), + tensor->type()); + status = kTfLiteError; + continue; + } + auto get_readonly_data = [&](const char** buffer_data, + size_t* buffer_size) { + // TODO(aselle): Check what happens if we have an unspecified size + // constant. + *buffer_data = nullptr; + if (tensor->buffer() == 0) return kTfLiteOk; + if (tensor->buffer() >= buffers->size()) { + error_reporter_->Report( + "Tensor %d specifies out of range buffer %d (only %d buffers).\n", + i, tensor->buffer(), buffers->size()); + return kTfLiteError; + } + if (auto* buffer = (*buffers)[tensor->buffer()]) { + if (auto* array = buffer->data()) { + if (size_t size = array->size()) { + *buffer_size = size; + *buffer_data = reinterpret_cast(array->data()); + return kTfLiteOk; + } + } + } + return kTfLiteOk; + }; + size_t buffer_size = 0; + const char* buffer_ptr; + TF_LITE_ENSURE_STATUS(get_readonly_data(&buffer_ptr, &buffer_size)); + + if (buffer_ptr) { + if (interpreter->SetTensorParametersReadOnly( + i, type, get_name(tensor), dims, quantization, buffer_ptr, + buffer_size, allocation_) != kTfLiteOk) { + error_reporter_->Report("Tensor %d is invalidly specified in schema.\n", + i); + status = kTfLiteError; + } + } else { + if (interpreter->SetTensorParametersReadWrite( + i, type, get_name(tensor), dims, quantization) != kTfLiteOk) { + error_reporter_->Report("Tensor %d is invalidly specified in schema.\n", + i); + status = kTfLiteError; + } + } + } + + return status; +} + +TfLiteStatus InterpreterBuilder::operator()( + std::unique_ptr* interpreter) { + if (!interpreter) { + error_reporter_->Report( + "Null output pointer passed to InterpreterBuilder."); + return kTfLiteError; + } + + // Safe exit by deleting partially created interpreter, to reduce verbosity + // on error conditions. Use by return cleanup_on_error(); + auto cleanup_and_error = [&interpreter]() { + interpreter->reset(); + return kTfLiteError; + }; + + if (!model_) { + error_reporter_->Report("Null pointer passed in as model."); + return cleanup_and_error(); + } + + if (model_->version() != TFLITE_SCHEMA_VERSION) { + error_reporter_->Report( + "Model provided is schema version %d not equal " + "to supported version %d.\n", + model_->version(), TFLITE_SCHEMA_VERSION); + return cleanup_and_error(); + } + + if (BuildLocalIndexToRegistrationMapping() != kTfLiteOk) { + error_reporter_->Report("Registration failed.\n"); + return cleanup_and_error(); + } + + // Flatbuffer model schemas define a list of opcodes independent of the graph. + // We first map those to registrations. This reduces string lookups for custom + // ops since we only do it once per custom op rather than once per custom op + // invocation in the model graph. + // Construct interpreter with correct number of tensors and operators. + auto* subgraphs = model_->subgraphs(); + auto* buffers = model_->buffers(); + if (subgraphs->size() != 1) { + error_reporter_->Report("Only 1 subgraph is currently supported.\n"); + return cleanup_and_error(); + } + const tflite::SubGraph* subgraph = (*subgraphs)[0]; + auto operators = subgraph->operators(); + auto tensors = subgraph->tensors(); + if (!operators || !tensors || !buffers) { + error_reporter_->Report( + "Did not get operators, tensors, or buffers in input flat buffer.\n"); + return cleanup_and_error(); + } + interpreter->reset(new Interpreter(error_reporter_)); + if ((**interpreter).AddTensors(tensors->Length()) != kTfLiteOk) { + return cleanup_and_error(); + } + + // Parse inputs/outputs + (**interpreter).SetInputs(FlatBufferIntArrayToVector(subgraph->inputs())); + (**interpreter).SetOutputs(FlatBufferIntArrayToVector(subgraph->outputs())); + + // Finally setup nodes and tensors + if (ParseNodes(operators, interpreter->get()) != kTfLiteOk) + return cleanup_and_error(); + if (ParseTensors(buffers, tensors, interpreter->get()) != kTfLiteOk) + return cleanup_and_error(); + + return kTfLiteOk; +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/model.h b/tensorflow/contrib/lite/model.h new file mode 100644 index 0000000000..15659d33f3 --- /dev/null +++ b/tensorflow/contrib/lite/model.h @@ -0,0 +1,165 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Deserialization infrastructure for tflite. Provides functionality +// to go from a serialized tflite model in flatbuffer format to an +// interpreter. +// +// using namespace tflite; +// StderrReporter error_reporter; +// auto model = FlatBufferModel::BuildFromFile("interesting_model.tflite", +// &error_reporter); +// MyOpResolver resolver; // You need to subclass OpResolver to provide +// // implementations. +// InterpreterBuilder builder(*model, resolver); +// std::unique_ptr interpreter; +// if(builder(&interpreter) == kTfLiteOk) { +// .. run model inference with interpreter +// } +// +// OpResolver must be defined to provide your kernel implementations to the +// interpreter. This is environment specific and may consist of just the builtin +// ops, or some custom operators you defined to extend tflite. +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MODEL_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MODEL_H_ + +#include +#include "tensorflow/contrib/lite/error_reporter.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/schema/schema_generated.h" + +namespace tflite { + +// An RAII object that represents a read-only tflite model, copied from disk, +// or mmapped. This uses flatbuffers as the serialization format. +class FlatBufferModel { + public: + // Build a model based on a file. Return a nullptr in case of failure. + static std::unique_ptr BuildFromFile( + const char* filename, + ErrorReporter* error_reporter = DefaultErrorReporter()); + + // Build a model based on a pre-loaded flatbuffer. The caller retains + // ownership of the buffer and should keep it alive until the returned object + // is destroyed. Return a nullptr in case of failure. + static std::unique_ptr BuildFromBuffer( + const char* buffer, size_t buffer_size, + ErrorReporter* error_reporter = DefaultErrorReporter()); + + // Releases memory or unmaps mmaped meory. + ~FlatBufferModel(); + + // Copying or assignment is disallowed to simplify ownership semantics. + FlatBufferModel(const FlatBufferModel&) = delete; + FlatBufferModel& operator=(const FlatBufferModel&) = delete; + + bool initialized() const { return model_ != nullptr; } + const tflite::Model* operator->() const { return model_; } + const tflite::Model* GetModel() const { return model_; } + ErrorReporter* error_reporter() const { return error_reporter_; } + const Allocation* allocation() const { return allocation_; } + + // Returns true if the model identifier is correct (otherwise false and + // reports an error). + bool CheckModelIdentifier() const; + + private: + // Load a model from `filename`. If `mmap_file` is true then use mmap, + // otherwise make a copy of the model in a buffer. + // + // Note, if `error_reporter` is null, then a DefaultErrorReporter() will be + // used. + explicit FlatBufferModel( + const char* filename, bool mmap_file = true, + ErrorReporter* error_reporter = DefaultErrorReporter(), + bool use_nnapi = false); + + // Load a model from `ptr` and `num_bytes` of the model file. The `ptr` has to + // remain alive and unchanged until the end of this flatbuffermodel's + // lifetime. + // + // Note, if `error_reporter` is null, then a DefaultErrorReporter() will be + // used. + FlatBufferModel(const char* ptr, size_t num_bytes, + ErrorReporter* error_reporter = DefaultErrorReporter()); + + // Flatbuffer traverser pointer. (Model* is a pointer that is within the + // allocated memory of the data allocated by allocation's internals. + const tflite::Model* model_ = nullptr; + ErrorReporter* error_reporter_; + Allocation* allocation_ = nullptr; +}; + +// Abstract interface that returns TfLiteRegistrations given op codes or custom +// op names. This is the mechanism that ops being referenced in the flatbuffer +// model are mapped to executable function pointers (TfLiteRegistrations). +class OpResolver { + public: + // Find the op registration for a builtin operator by enum code. + virtual TfLiteRegistration* FindOp(tflite::BuiltinOperator op) const = 0; + // Find the op registration of a custom operator by op name. + virtual TfLiteRegistration* FindOp(const char* op) const = 0; + virtual ~OpResolver() {} +}; + +// Build an interpreter capable of interpreting `model`. +// +// model: a scoped model whose lifetime must be at least as long as +// the interpreter. In principle multiple interpreters can be made from +// a single model. +// op_resolver: An instance that implements the Resolver interface which maps +// custom op names and builtin op codes to op registrations. +// reportError: a functor that is called to report errors that handles +// printf var arg semantics. The lifetime of the reportError object must +// be greater than or equal to the Interpreter created by operator(). +// +// Returns a kTfLiteOk when successful and sets interpreter to a valid +// Interpreter. Note: the user must ensure the model lifetime is at least as +// long as interpreter's lifetime. +class InterpreterBuilder { + public: + InterpreterBuilder(const FlatBufferModel& model, + const OpResolver& op_resolver); + // Build an interpreter given only the raw flatbuffer Model object (instead + // of a FlatBufferModel). Mostly used for testing. + // If `error_reporter` is null, then DefaultErrorReporter() is used. + InterpreterBuilder(const ::tflite::Model* model, + const OpResolver& op_resolver, + ErrorReporter* error_reporter = DefaultErrorReporter()); + InterpreterBuilder(const InterpreterBuilder&) = delete; + InterpreterBuilder& operator=(const InterpreterBuilder&) = delete; + TfLiteStatus operator()(std::unique_ptr* interpreter); + + private: + TfLiteStatus BuildLocalIndexToRegistrationMapping(); + TfLiteStatus ParseNodes( + const flatbuffers::Vector>* operators, + Interpreter* interpreter); + TfLiteStatus ParseTensors( + const flatbuffers::Vector>* buffers, + const flatbuffers::Vector>* tensors, + Interpreter* interpreter); + + const ::tflite::Model* model_; + const OpResolver& op_resolver_; + ErrorReporter* error_reporter_; + + std::vector flatbuffer_op_index_to_registration_; + std::vector flatbuffer_op_index_to_registration_types_; + const Allocation* allocation_ = nullptr; +}; + +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MODEL_H_ diff --git a/tensorflow/contrib/lite/model_test.cc b/tensorflow/contrib/lite/model_test.cc new file mode 100644 index 0000000000..ae823650d6 --- /dev/null +++ b/tensorflow/contrib/lite/model_test.cc @@ -0,0 +1,258 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/model.h" + +#include +#include "tensorflow/contrib/lite/error_reporter.h" + +// Comparison for TfLiteRegistration. Since TfLiteRegistration is a C object, +// we must declare this in global namespace, so argument-dependent operator +// lookup works. +inline bool operator==(const TfLiteRegistration& a, + const TfLiteRegistration& b) { + return a.invoke == b.invoke && a.init == b.init && a.prepare == b.prepare && + a.free == b.free; +} + +namespace tflite { + +// Provide a dummy operation that does nothing. +namespace { +void* dummy_init(TfLiteContext*, const char*, size_t) { return nullptr; } +void dummy_free(TfLiteContext*, void*) {} +TfLiteStatus dummy_resize(TfLiteContext*, TfLiteNode*) { return kTfLiteOk; } +TfLiteStatus dummy_invoke(TfLiteContext*, TfLiteNode*) { return kTfLiteOk; } +TfLiteRegistration dummy_reg = {dummy_init, dummy_free, dummy_resize, + dummy_invoke}; +} // namespace + +// Provide a trivial resolver that returns a constant value no matter what +// op is asked for. +class TrivialResolver : public OpResolver { + public: + explicit TrivialResolver(TfLiteRegistration* constant_return = nullptr) + : constant_return_(constant_return) {} + // Find the op registration of a custom operator by op name. + TfLiteRegistration* FindOp(tflite::BuiltinOperator op) const override { + return constant_return_; + } + // Find the op registration of a custom operator by op name. + TfLiteRegistration* FindOp(const char* op) const override { + return constant_return_; + } + + private: + TfLiteRegistration* constant_return_; +}; + +TEST(BasicFlatBufferModel, TestNonExistantFiles) { + ASSERT_TRUE(!FlatBufferModel::BuildFromFile("/tmp/tflite_model_1234")); +} + +// Make sure a model with nothing in it loads properly. +TEST(BasicFlatBufferModel, TestEmptyModelsAndNullDestination) { + auto model = FlatBufferModel::BuildFromFile( + "tensorflow/contrib/lite/testdata/empty_model.bin"); + ASSERT_TRUE(model); + // Now try to build it into a model. + std::unique_ptr interpreter; + ASSERT_EQ(InterpreterBuilder(*model, TrivialResolver())(&interpreter), + kTfLiteOk); + ASSERT_NE(interpreter, nullptr); + ASSERT_NE(InterpreterBuilder(*model, TrivialResolver())(nullptr), kTfLiteOk); +} + +// Make sure currently unsupported # of subgraphs are checked +// TODO(aselle): Replace this test when multiple subgraphs are supported. +TEST(BasicFlatBufferModel, TestZeroAndMultipleSubgraphs) { + auto m1 = FlatBufferModel::BuildFromFile( + "tensorflow/contrib/lite/testdata/0_subgraphs.bin"); + ASSERT_TRUE(m1); + std::unique_ptr interpreter1; + ASSERT_NE(InterpreterBuilder(*m1, TrivialResolver())(&interpreter1), + kTfLiteOk); + + auto m2 = FlatBufferModel::BuildFromFile( + "tensorflow/contrib/lite/testdata/2_subgraphs.bin"); + ASSERT_TRUE(m2); + std::unique_ptr interpreter2; + ASSERT_NE(InterpreterBuilder(*m2, TrivialResolver())(&interpreter2), + kTfLiteOk); +} + +// Test what happens if we cannot bind any of the ops. +TEST(BasicFlatBufferModel, TestModelWithoutNullRegistrations) { + auto model = FlatBufferModel::BuildFromFile( + "tensorflow/contrib/lite/testdata/test_model.bin"); + ASSERT_TRUE(model); + // Check that we get an error code and interpreter pointer is reset. + std::unique_ptr interpreter(new Interpreter); + ASSERT_NE(InterpreterBuilder(*model, TrivialResolver(nullptr))(&interpreter), + kTfLiteOk); + ASSERT_EQ(interpreter, nullptr); +} + +// Make sure model is read to interpreter propelrly +TEST(BasicFlatBufferModel, TestModelInInterpreter) { + auto model = FlatBufferModel::BuildFromFile( + "tensorflow/contrib/lite/testdata/test_model.bin"); + ASSERT_TRUE(model); + // Check that we get an error code and interpreter pointer is reset. + std::unique_ptr interpreter(new Interpreter); + ASSERT_EQ( + InterpreterBuilder(*model, TrivialResolver(&dummy_reg))(&interpreter), + kTfLiteOk); + ASSERT_NE(interpreter, nullptr); + ASSERT_EQ(interpreter->tensors_size(), 4); + ASSERT_EQ(interpreter->nodes_size(), 2); + std::vector inputs = {0, 1}; + std::vector outputs = {2, 3}; + ASSERT_EQ(interpreter->inputs(), inputs); + ASSERT_EQ(interpreter->outputs(), outputs); + + EXPECT_EQ(std::string(interpreter->GetInputName(0)), "input0"); + EXPECT_EQ(std::string(interpreter->GetInputName(1)), "input1"); + EXPECT_EQ(std::string(interpreter->GetOutputName(0)), "out1"); + EXPECT_EQ(std::string(interpreter->GetOutputName(1)), "out2"); + + // Make sure all input tensors are correct + TfLiteTensor* i0 = interpreter->tensor(0); + ASSERT_EQ(i0->type, kTfLiteFloat32); + ASSERT_NE(i0->data.raw, nullptr); // mmapped + ASSERT_EQ(i0->allocation_type, kTfLiteMmapRo); + TfLiteTensor* i1 = interpreter->tensor(1); + ASSERT_EQ(i1->type, kTfLiteFloat32); + ASSERT_EQ(i1->data.raw, nullptr); + ASSERT_EQ(i1->allocation_type, kTfLiteArenaRw); + TfLiteTensor* o0 = interpreter->tensor(2); + ASSERT_EQ(o0->type, kTfLiteFloat32); + ASSERT_EQ(o0->data.raw, nullptr); + ASSERT_EQ(o0->allocation_type, kTfLiteArenaRw); + TfLiteTensor* o1 = interpreter->tensor(3); + ASSERT_EQ(o1->type, kTfLiteFloat32); + ASSERT_EQ(o1->data.raw, nullptr); + ASSERT_EQ(o1->allocation_type, kTfLiteArenaRw); + + // Check op 0 which has inputs {0, 1} outputs {2}. + { + const std::pair* node_and_reg0 = + interpreter->node_and_registration(0); + ASSERT_NE(node_and_reg0, nullptr); + const TfLiteNode& node0 = node_and_reg0->first; + const TfLiteRegistration& reg0 = node_and_reg0->second; + TfLiteIntArray* desired_inputs = TfLiteIntArrayCreate(2); + desired_inputs->data[0] = 0; + desired_inputs->data[1] = 1; + TfLiteIntArray* desired_outputs = TfLiteIntArrayCreate(1); + desired_outputs->data[0] = 2; + ASSERT_TRUE(TfLiteIntArrayEqual(node0.inputs, desired_inputs)); + ASSERT_TRUE(TfLiteIntArrayEqual(node0.outputs, desired_outputs)); + TfLiteIntArrayFree(desired_inputs); + TfLiteIntArrayFree(desired_outputs); + ASSERT_EQ(reg0, dummy_reg); + } + + // Check op 1 which has inputs {2} outputs {3}. + { + const std::pair* node_and_reg1 = + interpreter->node_and_registration(1); + ASSERT_NE(node_and_reg1, nullptr); + const TfLiteNode& node1 = node_and_reg1->first; + const TfLiteRegistration& reg1 = node_and_reg1->second; + TfLiteIntArray* desired_inputs = TfLiteIntArrayCreate(1); + TfLiteIntArray* desired_outputs = TfLiteIntArrayCreate(1); + desired_inputs->data[0] = 2; + desired_outputs->data[0] = 3; + ASSERT_TRUE(TfLiteIntArrayEqual(node1.inputs, desired_inputs)); + ASSERT_TRUE(TfLiteIntArrayEqual(node1.outputs, desired_outputs)); + TfLiteIntArrayFree(desired_inputs); + TfLiteIntArrayFree(desired_outputs); + ASSERT_EQ(reg1, dummy_reg); + } +} + +// This tests on a flatbuffer that defines a shape of 2 to be a memory mapped +// buffer. But the buffer is provided to be only 1 element. +TEST(BasicFlatBufferModel, TestBrokenMmap) { + ASSERT_FALSE(FlatBufferModel::BuildFromFile( + "tensorflow/contrib/lite/testdata/test_model_broken.bin")); +} + +TEST(BasicFlatBufferModel, TestNullModel) { + // Check that we get an error code and interpreter pointer is reset. + std::unique_ptr interpreter(new Interpreter); + ASSERT_NE( + InterpreterBuilder(nullptr, TrivialResolver(&dummy_reg))(&interpreter), + kTfLiteOk); + ASSERT_EQ(interpreter.get(), nullptr); +} + +struct TestErrorReporter : public ErrorReporter { + int Report(const char* format, va_list args) override { + calls++; + return 0; + } + int calls = 0; +}; + +// This makes sure the ErrorReporter is marshalled from FlatBufferModel to +// the Interpreter. +TEST(BasicFlatBufferModel, TestCustomErrorReporter) { + TestErrorReporter reporter; + auto model = FlatBufferModel::BuildFromFile( + "tensorflow/contrib/lite/testdata/empty_model.bin", + &reporter); + ASSERT_TRUE(model); + + std::unique_ptr interpreter; + TrivialResolver resolver; + InterpreterBuilder(*model, resolver)(&interpreter); + ASSERT_NE(interpreter->Invoke(), kTfLiteOk); + ASSERT_EQ(reporter.calls, 1); +} + +// This makes sure the ErrorReporter is marshalled from FlatBufferModel to +// the Interpreter. +TEST(BasicFlatBufferModel, TestNullErrorReporter) { + auto model = FlatBufferModel::BuildFromFile( + "tensorflow/contrib/lite/testdata/empty_model.bin", nullptr); + ASSERT_TRUE(model); + + std::unique_ptr interpreter; + TrivialResolver resolver; + InterpreterBuilder(*model, resolver)(&interpreter); + ASSERT_NE(interpreter->Invoke(), kTfLiteOk); +} + +// TODO(aselle): Add tests for serialization of builtin op data types. +// These tests will occur with the evaluation tests of individual operators, +// not here. + +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/models/smartreply/BUILD b/tensorflow/contrib/lite/models/smartreply/BUILD new file mode 100644 index 0000000000..fbdf19f205 --- /dev/null +++ b/tensorflow/contrib/lite/models/smartreply/BUILD @@ -0,0 +1,15 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/lite/models/smartreply/ops/extract_feature.cc b/tensorflow/contrib/lite/models/smartreply/ops/extract_feature.cc new file mode 100644 index 0000000000..1c422b659a --- /dev/null +++ b/tensorflow/contrib/lite/models/smartreply/ops/extract_feature.cc @@ -0,0 +1,119 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Convert a list of strings to integers via hashing. +// Input: +// Input[0]: A list of ngrams. string[num of input] +// +// Output: +// Output[0]: Hashed features. int32[num of input] +// Output[1]: Weights. float[num of input] + +#include +#include +#include "re2/re2.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/string_util.h" +#include + +namespace tflite { +namespace ops { +namespace custom { + +namespace extract { + +static const int kMaxDimension = 1000000; +static const std::vector kBlacklistNgram = {"", "", " "}; + +bool Equals(const string& x, const tflite::StringRef& strref) { + if (strref.len != x.length()) { + return false; + } + if (strref.len > 0) { + int r = memcmp(strref.str, x.data(), strref.len); + return r == 0; + } + return true; +} + +bool IsValidNgram(const tflite::StringRef& strref) { + for (const auto& s : kBlacklistNgram) { + if (Equals(s, strref)) { + return false; + } + } + return true; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TfLiteIntArray* outputSize1 = TfLiteIntArrayCreate(1); + TfLiteIntArray* outputSize2 = TfLiteIntArrayCreate(1); + TfLiteTensor* input = GetInput(context, node, 0); + int dim = input->dims->data[0]; + if (dim == 0) { + // TFLite non-string output should have size greater than 0. + dim = 1; + } + TF_LITE_ENSURE_EQ(context, input->type, kTfLiteString); + outputSize1->data[0] = dim; + outputSize2->data[0] = dim; + context->ResizeTensor(context, GetOutput(context, node, 0), outputSize1); + context->ResizeTensor(context, GetOutput(context, node, 1), outputSize2); + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* input = GetInput(context, node, 0); + int num_strings = tflite::GetStringCount(input); + TfLiteTensor* label = GetOutput(context, node, 0); + TfLiteTensor* weight = GetOutput(context, node, 1); + + std::map feature_id_counts; + for (int i = 0; i < num_strings; i++) { + // Use fingerprint of feature name as id. + auto strref = tflite::GetString(input, i); + if (!IsValidNgram(strref)) { + label->data.i32[i] = 0; + weight->data.i32[i] = 0; + continue; + } + + int64 feature_id = + ::util::Fingerprint64(strref.str, strref.len) % kMaxDimension; + + label->data.i32[i] = static_cast(feature_id); + weight->data.f[i] = + std::count(strref.str, strref.str + strref.len, ' ') + 1; + } + // Explicitly set an empty result to make preceding ops run. + if (num_strings == 0) { + label->data.i32[0] = 0; + weight->data.i32[0] = 0; + } + return kTfLiteOk; +} + +} // namespace extract + +TfLiteRegistration* Register_EXTRACT_FEATURES() { + static TfLiteRegistration r = {nullptr, nullptr, extract::Prepare, + extract::Eval}; + return &r; +} + +} // namespace custom +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/models/smartreply/ops/extract_feature_test.cc b/tensorflow/contrib/lite/models/smartreply/ops/extract_feature_test.cc new file mode 100644 index 0000000000..9b8676bab6 --- /dev/null +++ b/tensorflow/contrib/lite/models/smartreply/ops/extract_feature_test.cc @@ -0,0 +1,100 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" +#include + +namespace tflite { + +namespace ops { +namespace custom { +TfLiteRegistration* Register_EXTRACT_FEATURES(); + +namespace { + +using ::testing::ElementsAre; + +class ExtractFeatureOpModel : public SingleOpModel { + public: + explicit ExtractFeatureOpModel(const std::vector& input) { + input_ = AddInput(TensorType_STRING); + signature_ = AddOutput(TensorType_INT32); + weight_ = AddOutput(TensorType_FLOAT32); + + SetCustomOp("ExtractFeatures", {}, Register_EXTRACT_FEATURES); + BuildInterpreter({{static_cast(input.size())}}); + PopulateStringTensor(input_, input); + } + + std::vector GetSignature() { return ExtractVector(signature_); } + std::vector GetWeight() { return ExtractVector(weight_); } + + private: + int input_; + int signature_; + int weight_; +}; + +int CalcFeature(const string& str) { + return ::util::Fingerprint64(str) % 1000000; +} + +TEST(ExtractFeatureOpTest, RegularInput) { + ExtractFeatureOpModel m({"", " Hi", "Hi", "Hi !", "!", "! ", ""}); + m.Invoke(); + EXPECT_THAT(m.GetSignature(), + ElementsAre(0, CalcFeature(" Hi"), CalcFeature("Hi"), + CalcFeature("Hi !"), CalcFeature("!"), + CalcFeature("! "), 0)); + EXPECT_THAT(m.GetWeight(), ElementsAre(0, 2, 1, 2, 1, 2, 0)); +} + +TEST(ExtractFeatureOpTest, OneInput) { + ExtractFeatureOpModel m({"Hi"}); + m.Invoke(); + EXPECT_THAT(m.GetSignature(), ElementsAre(CalcFeature("Hi"))); + EXPECT_THAT(m.GetWeight(), ElementsAre(1)); +} + +TEST(ExtractFeatureOpTest, ZeroInput) { + ExtractFeatureOpModel m({}); + m.Invoke(); + EXPECT_THAT(m.GetSignature(), ElementsAre(0)); + EXPECT_THAT(m.GetWeight(), ElementsAre(0)); +} + +TEST(ExtractFeatureOpTest, AllBlacklistInput) { + ExtractFeatureOpModel m({"", ""}); + m.Invoke(); + EXPECT_THAT(m.GetSignature(), ElementsAre(0, 0)); + EXPECT_THAT(m.GetWeight(), ElementsAre(0, 0)); +} + +} // namespace +} // namespace custom +} // namespace ops +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/models/smartreply/ops/normalize.cc b/tensorflow/contrib/lite/models/smartreply/ops/normalize.cc new file mode 100644 index 0000000000..d0dc2a35a7 --- /dev/null +++ b/tensorflow/contrib/lite/models/smartreply/ops/normalize.cc @@ -0,0 +1,105 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Normalize the string input. +// +// Input: +// Input[0]: One sentence. string[1] +// +// Output: +// Output[0]: Normalized sentence. string[1] +// +#include "absl/strings/ascii.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/strip.h" +#include "re2/re2.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/string_util.h" + +namespace tflite { +namespace ops { +namespace custom { + +namespace normalize { + +// Predictor transforms. +const char kPunctuationsRegex[] = "[.*()\"]"; + +const std::map* kRegexTransforms = + new std::map({ + {"([^\\s]+)n't", "\\1 not"}, + {"([^\\s]+)'nt", "\\1 not"}, + {"([^\\s]+)'ll", "\\1 will"}, + {"([^\\s]+)'re", "\\1 are"}, + {"([^\\s]+)'ve", "\\1 have"}, + {"i'm", "i am"}, + }); + +static const char kStartToken[] = ""; +static const char kEndToken[] = ""; +static const int32 kMaxInputChars = 300; + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + tflite::StringRef input = tflite::GetString(GetInput(context, node, 0), 0); + + string result(absl::AsciiStrToLower(absl::string_view(input.str, input.len))); + absl::StripAsciiWhitespace(&result); + // Do not remove commas, semi-colons or colons from the sentences as they can + // indicate the beginning of a new clause. + RE2::GlobalReplace(&result, kPunctuationsRegex, ""); + RE2::GlobalReplace(&result, "\\s('t|'nt|n't|'d|'ll|'s|'m|'ve|'re)([\\s,;:/])", + "\\1\\2"); + RE2::GlobalReplace(&result, "\\s('t|'nt|n't|'d|'ll|'s|'m|'ve|'re)$", "\\1"); + for (auto iter = kRegexTransforms->begin(); iter != kRegexTransforms->end(); + iter++) { + RE2::GlobalReplace(&result, iter->first, iter->second); + } + + // Treat questions & interjections as special cases. + RE2::GlobalReplace(&result, "([?])+", "\\1"); + RE2::GlobalReplace(&result, "([!])+", "\\1"); + RE2::GlobalReplace(&result, "([^?!]+)([?!])", "\\1 \\2 "); + RE2::GlobalReplace(&result, "([?!])([?!])", "\\1 \\2"); + + RE2::GlobalReplace(&result, "[\\s,:;\\-&'\"]+$", ""); + RE2::GlobalReplace(&result, "^[\\s,:;\\-&'\"]+", ""); + absl::StripAsciiWhitespace(&result); + + // Add start and end token. + // Truncate input to maximum allowed size. + if (result.length() <= kMaxInputChars) { + absl::StrAppend(&result, " ", kEndToken); + } else { + result = result.substr(0, kMaxInputChars); + } + result = absl::StrCat(kStartToken, " ", result); + + tflite::DynamicBuffer buf; + buf.AddString(result.data(), result.length()); + buf.WriteToTensor(GetOutput(context, node, 0)); + return kTfLiteOk; +} + +} // namespace normalize + +TfLiteRegistration* Register_NORMALIZE() { + static TfLiteRegistration r = {nullptr, nullptr, nullptr, normalize::Eval}; + return &r; +} + +} // namespace custom +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/models/smartreply/ops/normalize_test.cc b/tensorflow/contrib/lite/models/smartreply/ops/normalize_test.cc new file mode 100644 index 0000000000..4d35dba9a6 --- /dev/null +++ b/tensorflow/contrib/lite/models/smartreply/ops/normalize_test.cc @@ -0,0 +1,90 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/string_util.h" + +namespace tflite { + +namespace ops { +namespace custom { +TfLiteRegistration* Register_NORMALIZE(); + +namespace { + +using ::testing::ElementsAreArray; + +class NormalizeOpModel : public SingleOpModel { + public: + explicit NormalizeOpModel(const string& input) { + input_ = AddInput(TensorType_STRING); + output_ = AddOutput(TensorType_STRING); + + SetCustomOp("Normalize", {}, Register_NORMALIZE); + BuildInterpreter({{static_cast(input.size())}}); + PopulateStringTensor(input_, {input}); + } + + std::vector GetStringOutput() { + TfLiteTensor* output = interpreter_->tensor(output_); + int num = GetStringCount(output); + std::vector result(num); + for (int i = 0; i < num; i++) { + auto ref = GetString(output, i); + result[i] = string(ref.str, ref.len); + } + return result; + } + + private: + int input_; + int output_; +}; + +TEST(NormalizeOpTest, RegularInput) { + NormalizeOpModel m("I'm good; you're welcome"); + m.Invoke(); + EXPECT_THAT(m.GetStringOutput(), + ElementsAreArray({" i am good; you are welcome "})); +} + +TEST(NormalizeOpTest, OneInput) { + NormalizeOpModel m("Hi!!!!"); + m.Invoke(); + EXPECT_THAT(m.GetStringOutput(), ElementsAreArray({" hi ! "})); +} + +TEST(NormalizeOpTest, EmptyInput) { + NormalizeOpModel m(""); + m.Invoke(); + EXPECT_THAT(m.GetStringOutput(), ElementsAreArray({" "})); +} + +} // namespace +} // namespace custom +} // namespace ops +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/models/smartreply/ops/predict.cc b/tensorflow/contrib/lite/models/smartreply/ops/predict.cc new file mode 100644 index 0000000000..7b23adb990 --- /dev/null +++ b/tensorflow/contrib/lite/models/smartreply/ops/predict.cc @@ -0,0 +1,174 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Lookup projected hash signatures in Predictor model, +// output predicted labels and weights in decreasing order. +// +// Input: +// Input[0]: A list of hash signatures. int32[num of input] +// Input[1]: Hash signature keys in the model. int32[keys of model] +// Input[2]: Labels in the model. int32[keys of model, item per entry] +// Input[3]: Weights in the model. float[keys of model, item per entry] +// +// Output: +// Output[0]: Predicted labels. int32[num of output] +// Output[1]: Predicted weights. float[num of output] +// + +#include +#include +#include + +#include "tensorflow/contrib/lite/context.h" + +namespace tflite { +namespace ops { +namespace custom { + +namespace predict { + +struct PredictOption { + int32_t num_output; + float weight_threshold; + + static PredictOption* Cast(void* ptr) { + return reinterpret_cast(ptr); + } +}; + +bool WeightGreater(const std::pair& a, + const std::pair& b) { + return a.second > b.second; +} + +void* Init(TfLiteContext* context, const char* custom_option, size_t length) { + if (custom_option == nullptr || length != sizeof(PredictOption)) { + fprintf(stderr, "No Custom option set\n"); + exit(1); + } + PredictOption* option = new PredictOption; + int offset = 0; + option->num_output = + *reinterpret_cast(custom_option + offset); + offset += sizeof(int32_t); + option->weight_threshold = + *reinterpret_cast(custom_option + offset); + return reinterpret_cast(option); +} + +void Free(TfLiteContext* context, void* buffer) { + delete PredictOption::Cast(buffer); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, node->inputs->size, 4); + TF_LITE_ENSURE_EQ(context, node->outputs->size, 2); + + TfLiteTensor* lookup = &context->tensors[node->inputs->data[0]]; + TfLiteTensor* model_key = &context->tensors[node->inputs->data[1]]; + TfLiteTensor* model_label = &context->tensors[node->inputs->data[2]]; + TfLiteTensor* model_weight = &context->tensors[node->inputs->data[3]]; + TF_LITE_ENSURE_EQ(context, lookup->type, kTfLiteInt32); + TF_LITE_ENSURE_EQ(context, model_key->type, kTfLiteInt32); + TF_LITE_ENSURE_EQ(context, model_label->type, kTfLiteInt32); + TF_LITE_ENSURE_EQ(context, model_weight->type, kTfLiteFloat32); + TF_LITE_ENSURE_EQ(context, lookup->dims->size, 1); + TF_LITE_ENSURE_EQ(context, model_key->dims->size, 1); + TF_LITE_ENSURE_EQ(context, model_label->dims->size, 2); + TF_LITE_ENSURE_EQ(context, model_weight->dims->size, 2); + TF_LITE_ENSURE_EQ(context, model_key->dims->data[0], + model_label->dims->data[0]); + TF_LITE_ENSURE_EQ(context, model_key->dims->data[0], + model_weight->dims->data[0]); + TF_LITE_ENSURE_EQ(context, model_label->dims->data[1], + model_weight->dims->data[1]); + + PredictOption* option = PredictOption::Cast(node->user_data); + TfLiteTensor* output_label = &context->tensors[node->outputs->data[0]]; + TfLiteTensor* output_weight = &context->tensors[node->outputs->data[1]]; + TF_LITE_ENSURE_EQ(context, output_label->type, kTfLiteInt32); + TF_LITE_ENSURE_EQ(context, output_weight->type, kTfLiteFloat32); + + TfLiteIntArray* label_size = TfLiteIntArrayCreate(1); + label_size->data[0] = option->num_output; + TfLiteIntArray* weight_size = TfLiteIntArrayCreate(1); + weight_size->data[0] = option->num_output; + TfLiteStatus status = + context->ResizeTensor(context, output_label, label_size); + if (status != kTfLiteOk) { + return status; + } + return context->ResizeTensor(context, output_weight, weight_size); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* lookup = &context->tensors[node->inputs->data[0]]; + TfLiteTensor* model_key = &context->tensors[node->inputs->data[1]]; + TfLiteTensor* model_label = &context->tensors[node->inputs->data[2]]; + TfLiteTensor* model_weight = &context->tensors[node->inputs->data[3]]; + + // Aggregate by key + std::unordered_map aggregation; + const int num_input = lookup->dims->data[0]; + const int num_rows = model_key->dims->data[0]; + const int items = model_label->dims->data[1]; + int* model_key_end = model_key->data.i32 + num_rows; + + for (int i = 0; i < num_input; i++) { + int* ptr = std::lower_bound(model_key->data.i32, model_key_end, + lookup->data.i32[i]); + if (ptr != nullptr && ptr != model_key_end && *ptr == lookup->data.i32[i]) { + int idx = ptr - model_key->data.i32; + for (int j = 0; j < items; j++) { + aggregation[model_label->data.i32[idx * items + j]] += + model_weight->data.f[idx * items + j] / num_input; + } + } + } + + // Sort by value + std::vector> sorted_labels(aggregation.begin(), + aggregation.end()); + std::sort(sorted_labels.begin(), sorted_labels.end(), WeightGreater); + + PredictOption* option = PredictOption::Cast(node->user_data); + TfLiteTensor* output_label = &context->tensors[node->outputs->data[0]]; + TfLiteTensor* output_weight = &context->tensors[node->outputs->data[1]]; + for (int i = 0; i < output_label->dims->data[0]; i++) { + if (i >= sorted_labels.size() || + sorted_labels[i].second < option->weight_threshold) { + // Set -1 to avoid lookup message with id 0, which is set for backoff. + output_label->data.i32[i] = -1; + output_weight->data.f[i] = 0.0f; + } else { + output_label->data.i32[i] = sorted_labels[i].first; + output_weight->data.f[i] = sorted_labels[i].second; + } + } + + return kTfLiteOk; +} + +} // namespace predict + +TfLiteRegistration* Register_PREDICT() { + static TfLiteRegistration r = {predict::Init, predict::Free, predict::Prepare, + predict::Eval}; + return &r; +} + +} // namespace custom +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/models/smartreply/ops/predict_test.cc b/tensorflow/contrib/lite/models/smartreply/ops/predict_test.cc new file mode 100644 index 0000000000..e97c58cbd1 --- /dev/null +++ b/tensorflow/contrib/lite/models/smartreply/ops/predict_test.cc @@ -0,0 +1,183 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/string_util.h" + +namespace tflite { + +namespace ops { +namespace custom { +TfLiteRegistration* Register_PREDICT(); + +namespace { + +using ::testing::ElementsAreArray; + +class PredictOpModel : public SingleOpModel { + public: + PredictOpModel(std::initializer_list input_signature_shape, + std::initializer_list key_shape, + std::initializer_list labelweight_shape, int num_output, + float threshold) { + input_signature_ = AddInput(TensorType_INT32); + model_key_ = AddInput(TensorType_INT32); + model_label_ = AddInput(TensorType_INT32); + model_weight_ = AddInput(TensorType_FLOAT32); + output_label_ = AddOutput(TensorType_INT32); + output_weight_ = AddOutput(TensorType_FLOAT32); + + std::vector predict_option; + writeInt32(num_output, &predict_option); + writeFloat32(threshold, &predict_option); + SetCustomOp("Predict", predict_option, Register_PREDICT); + BuildInterpreter({{input_signature_shape, key_shape, labelweight_shape, + labelweight_shape}}); + } + + void SetInputSignature(std::initializer_list data) { + PopulateTensor(input_signature_, data); + } + + void SetModelKey(std::initializer_list data) { + PopulateTensor(model_key_, data); + } + + void SetModelLabel(std::initializer_list data) { + PopulateTensor(model_label_, data); + } + + void SetModelWeight(std::initializer_list data) { + PopulateTensor(model_weight_, data); + } + + std::vector GetLabel() { return ExtractVector(output_label_); } + std::vector GetWeight() { + return ExtractVector(output_weight_); + } + + void writeFloat32(float value, std::vector* data) { + union { + float v; + uint8_t r[4]; + } float_to_raw; + float_to_raw.v = value; + for (unsigned char i : float_to_raw.r) { + data->push_back(i); + } + } + + void writeInt32(int32_t value, std::vector* data) { + union { + int32_t v; + uint8_t r[4]; + } int32_to_raw; + int32_to_raw.v = value; + for (unsigned char i : int32_to_raw.r) { + data->push_back(i); + } + } + + private: + int input_signature_; + int model_key_; + int model_label_; + int model_weight_; + int output_label_; + int output_weight_; +}; + +TEST(PredictOpTest, AllLabelsAreValid) { + PredictOpModel m({4}, {5}, {5, 2}, 2, 0.0001); + m.SetInputSignature({1, 3, 7, 9}); + m.SetModelKey({1, 2, 4, 6, 7}); + m.SetModelLabel({11, 12, 11, 12, 11, 12, 11, 12, 11, 12}); + m.SetModelWeight({0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2}); + m.Invoke(); + EXPECT_THAT(m.GetLabel(), ElementsAreArray({12, 11})); + EXPECT_THAT(m.GetWeight(), ElementsAreArray(ArrayFloatNear({0.1, 0.05}))); +} + +TEST(PredictOpTest, MoreLabelsThanRequired) { + PredictOpModel m({4}, {5}, {5, 2}, 1, 0.0001); + m.SetInputSignature({1, 3, 7, 9}); + m.SetModelKey({1, 2, 4, 6, 7}); + m.SetModelLabel({11, 12, 11, 12, 11, 12, 11, 12, 11, 12}); + m.SetModelWeight({0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2}); + m.Invoke(); + EXPECT_THAT(m.GetLabel(), ElementsAreArray({12})); + EXPECT_THAT(m.GetWeight(), ElementsAreArray(ArrayFloatNear({0.1}))); +} + +TEST(PredictOpTest, OneLabelDoesNotPassThreshold) { + PredictOpModel m({4}, {5}, {5, 2}, 2, 0.07); + m.SetInputSignature({1, 3, 7, 9}); + m.SetModelKey({1, 2, 4, 6, 7}); + m.SetModelLabel({11, 12, 11, 12, 11, 12, 11, 12, 11, 12}); + m.SetModelWeight({0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2}); + m.Invoke(); + EXPECT_THAT(m.GetLabel(), ElementsAreArray({12, -1})); + EXPECT_THAT(m.GetWeight(), ElementsAreArray(ArrayFloatNear({0.1, 0}))); +} + +TEST(PredictOpTest, NoneLabelPassThreshold) { + PredictOpModel m({4}, {5}, {5, 2}, 2, 0.6); + m.SetInputSignature({1, 3, 7, 9}); + m.SetModelKey({1, 2, 4, 6, 7}); + m.SetModelLabel({11, 12, 11, 12, 11, 12, 11, 12, 11, 12}); + m.SetModelWeight({0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2}); + m.Invoke(); + EXPECT_THAT(m.GetLabel(), ElementsAreArray({-1, -1})); + EXPECT_THAT(m.GetWeight(), ElementsAreArray(ArrayFloatNear({0, 0}))); +} + +TEST(PredictOpTest, OnlyOneLabelGenerated) { + PredictOpModel m({4}, {5}, {5, 2}, 2, 0.0001); + m.SetInputSignature({1, 3, 7, 9}); + m.SetModelKey({1, 2, 4, 6, 7}); + m.SetModelLabel({11, 0, 11, 0, 11, 0, 11, 0, 11, 0}); + m.SetModelWeight({0.1, 0, 0.1, 0, 0.1, 0, 0.1, 0, 0.1, 0}); + m.Invoke(); + EXPECT_THAT(m.GetLabel(), ElementsAreArray({11, -1})); + EXPECT_THAT(m.GetWeight(), ElementsAreArray(ArrayFloatNear({0.05, 0}))); +} + +TEST(PredictOpTest, NoLabelGenerated) { + PredictOpModel m({4}, {5}, {5, 2}, 2, 0.0001); + m.SetInputSignature({5, 3, 7, 9}); + m.SetModelKey({1, 2, 4, 6, 7}); + m.SetModelLabel({11, 0, 11, 0, 11, 0, 11, 0, 0, 0}); + m.SetModelWeight({0.1, 0, 0.1, 0, 0.1, 0, 0.1, 0, 0, 0}); + m.Invoke(); + EXPECT_THAT(m.GetLabel(), ElementsAreArray({-1, -1})); + EXPECT_THAT(m.GetWeight(), ElementsAreArray(ArrayFloatNear({0, 0}))); +} + +} // namespace +} // namespace custom +} // namespace ops +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/models/smartreply/predictor.cc b/tensorflow/contrib/lite/models/smartreply/predictor.cc new file mode 100644 index 0000000000..a28222213e --- /dev/null +++ b/tensorflow/contrib/lite/models/smartreply/predictor.cc @@ -0,0 +1,116 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/models/smartreply/predictor.h" + +#include "absl/strings/str_split.h" +#include "re2/re2.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/string_util.h" +#include "tensorflow/contrib/lite/tools/mutable_op_resolver.h" + +void RegisterSelectedOps(::tflite::MutableOpResolver* resolver); + +namespace tflite { +namespace custom { +namespace smartreply { + +// Split sentence into segments (using punctuation). +std::vector SplitSentence(const string& input) { + string result(input); + + RE2::GlobalReplace(&result, "([?.!,])+", " \\1"); + RE2::GlobalReplace(&result, "([?.!,])+\\s+", "\\1\t"); + RE2::GlobalReplace(&result, "[ ]+", " "); + RE2::GlobalReplace(&result, "\t+$", ""); + + return strings::Split(result, '\t'); +} + +// Predict with TfLite model. +void ExecuteTfLite(const string& sentence, ::tflite::Interpreter* interpreter, + std::map* response_map) { + { + TfLiteTensor* input = interpreter->tensor(interpreter->inputs()[0]); + tflite::DynamicBuffer buf; + buf.AddString(sentence.data(), sentence.length()); + buf.WriteToTensor(input); + interpreter->AllocateTensors(); + + interpreter->Invoke(); + + TfLiteTensor* messages = interpreter->tensor(interpreter->outputs()[0]); + TfLiteTensor* confidence = interpreter->tensor(interpreter->outputs()[1]); + + for (int i = 0; i < confidence->dims->data[0]; i++) { + float weight = confidence->data.f[i]; + auto response_text = tflite::GetString(messages, i); + if (response_text.len > 0) { + (*response_map)[string(response_text.str, response_text.len)] += weight; + } + } + } +} + +void GetSegmentPredictions( + const std::vector& input, const ::tflite::FlatBufferModel& model, + const SmartReplyConfig& config, + std::vector* predictor_responses) { + // Initialize interpreter + std::unique_ptr<::tflite::Interpreter> interpreter; + ::tflite::MutableOpResolver resolver; + RegisterSelectedOps(&resolver); + ::tflite::InterpreterBuilder(model, resolver)(&interpreter); + + if (!model.initialized()) { + fprintf(stderr, "Failed to mmap model \n"); + return; + } + + // Execute Tflite Model + std::map response_map; + std::vector sentences; + for (const string& str : input) { + std::vector splitted_str = SplitSentence(str); + sentences.insert(sentences.end(), splitted_str.begin(), splitted_str.end()); + } + for (const auto& sentence : sentences) { + ExecuteTfLite(sentence, interpreter.get(), &response_map); + } + + // Generate the result. + for (const auto& iter : response_map) { + PredictorResponse prediction(iter.first, iter.second); + predictor_responses->emplace_back(prediction); + } + std::sort(predictor_responses->begin(), predictor_responses->end(), + [](const PredictorResponse& a, const PredictorResponse& b) { + return a.GetScore() > b.GetScore(); + }); + + // Add backoff response. + for (const string& backoff : config.backoff_responses) { + if (predictor_responses->size() >= config.num_response) { + break; + } + predictor_responses->push_back({backoff, config.backoff_confidence}); + } +} + +} // namespace smartreply +} // namespace custom +} // namespace tflite diff --git a/tensorflow/contrib/lite/models/smartreply/predictor.h b/tensorflow/contrib/lite/models/smartreply/predictor.h new file mode 100644 index 0000000000..3b9a2b32e1 --- /dev/null +++ b/tensorflow/contrib/lite/models/smartreply/predictor.h @@ -0,0 +1,80 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MODELS_SMARTREPLY_PREDICTOR_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MODELS_SMARTREPLY_PREDICTOR_H_ + +#include +#include + +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace custom { +namespace smartreply { + +const int kDefaultNumResponse = 10; +const float kDefaultBackoffConfidence = 1e-4; + +class PredictorResponse; +struct SmartReplyConfig; + +// With a given string as input, predict the response with a Tflite model. +// When config.backoff_response is not empty, predictor_responses will be filled +// with messagees from backoff response. +void GetSegmentPredictions(const std::vector& input, + const ::tflite::FlatBufferModel& model, + const SmartReplyConfig& config, + std::vector* predictor_responses); + +// Data object used to hold a single predictor response. +// It includes messages, and confidence. +class PredictorResponse { + public: + PredictorResponse(const string& response_text, float score) { + response_text_ = response_text; + prediction_score_ = score; + } + + // Accessor methods. + const string& GetText() const { return response_text_; } + float GetScore() const { return prediction_score_; } + + private: + string response_text_ = ""; + float prediction_score_ = 0.0; +}; + +// Configurations for SmartReply. +struct SmartReplyConfig { + // Maximum responses to return. + int num_response; + // Default confidence for backoff responses. + float backoff_confidence; + // Backoff responses are used when predicted responses cannot fulfill the + // list. + const std::vector& backoff_responses; + + SmartReplyConfig(std::vector backoff_responses) + : num_response(kDefaultNumResponse), + backoff_confidence(kDefaultBackoffConfidence), + backoff_responses(backoff_responses) {} +}; + +} // namespace smartreply +} // namespace custom +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MODELS_SMARTREPLY_PREDICTOR_H_ diff --git a/tensorflow/contrib/lite/models/smartreply/predictor_test.cc b/tensorflow/contrib/lite/models/smartreply/predictor_test.cc new file mode 100644 index 0000000000..2fa9923bc9 --- /dev/null +++ b/tensorflow/contrib/lite/models/smartreply/predictor_test.cc @@ -0,0 +1,150 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/models/smartreply/predictor.h" + +#include +#include + +#include "base/logging.h" +#include +#include +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" +#include "tensorflow/contrib/lite/models/test_utils.h" + +namespace tflite { +namespace custom { +namespace smartreply { +namespace { + +const char kModelName[] = "smartreply_ondevice_model.bin"; +const char kSamples[] = "smartreply_samples.tsv"; + +MATCHER_P(IncludeAnyResponesIn, expected_response, "contains the response") { + bool has_expected_response = false; + for (const auto &item : *arg) { + const string &response = item.GetText(); + if (expected_response.find(response) != expected_response.end()) { + has_expected_response = true; + break; + } + } + return has_expected_response; +} + +class PredictorTest : public ::testing::Test { + protected: + PredictorTest() { + model_ = tflite::FlatBufferModel::BuildFromFile( + StrCat(TestDataPath(), "/", kModelName).c_str()); + CHECK(model_); + } + ~PredictorTest() override {} + + std::unique_ptr<::tflite::FlatBufferModel> model_; +}; + +TEST_F(PredictorTest, GetSegmentPredictions) { + std::vector predictions; + + GetSegmentPredictions({"Welcome"}, *model_, /*config=*/{{}}, &predictions); + EXPECT_GT(predictions.size(), 0); + + float max = 0; + for (const auto &item : predictions) { + LOG(INFO) << "Response: " << item.GetText(); + if (item.GetScore() > max) { + max = item.GetScore(); + } + } + + EXPECT_GT(max, 0.3); + EXPECT_THAT( + &predictions, + IncludeAnyResponesIn(std::unordered_set({"Thanks very much"}))); +} + +TEST_F(PredictorTest, TestTwoSentences) { + std::vector predictions; + + GetSegmentPredictions({"Hello", "How are you?"}, *model_, /*config=*/{{}}, + &predictions); + EXPECT_GT(predictions.size(), 0); + + float max = 0; + for (const auto &item : predictions) { + LOG(INFO) << "Response: " << item.GetText(); + if (item.GetScore() > max) { + max = item.GetScore(); + } + } + + EXPECT_GT(max, 0.3); + EXPECT_THAT(&predictions, IncludeAnyResponesIn(std::unordered_set( + {"Hi, how are you doing?"}))); +} + +TEST_F(PredictorTest, TestBackoff) { + std::vector predictions; + + GetSegmentPredictions({"你好"}, *model_, /*config=*/{{}}, &predictions); + EXPECT_EQ(predictions.size(), 0); + + // Backoff responses are returned in order. + GetSegmentPredictions({"你好"}, *model_, /*config=*/{{"Yes", "Ok"}}, + &predictions); + EXPECT_EQ(predictions.size(), 2); + EXPECT_EQ(predictions[0].GetText(), "Yes"); + EXPECT_EQ(predictions[1].GetText(), "Ok"); +} + +TEST_F(PredictorTest, BatchTest) { + int total_items = 0; + int total_responses = 0; + int total_triggers = 0; + + string line; + std::ifstream fin(StrCat(TestDataPath(), "/", kSamples)); + while (std::getline(fin, line)) { + const std::vector &fields = strings::Split(line, '\t'); + if (fields.empty()) { + continue; + } + + // Parse sample file and predict + const string &msg = fields[0]; + std::vector predictions; + GetSegmentPredictions({msg}, *model_, /*config=*/{{}}, &predictions); + + // Validate response and generate stats. + total_items++; + total_responses += predictions.size(); + if (!predictions.empty()) { + total_triggers++; + } + EXPECT_THAT(&predictions, IncludeAnyResponesIn(std::unordered_set( + fields.begin() + 1, fields.end()))); + } + + LOG(INFO) << "Responses: " << total_responses << " / " << total_items; + LOG(INFO) << "Triggers: " << total_triggers << " / " << total_items; + EXPECT_EQ(total_triggers, total_items); +} + +} // namespace +} // namespace smartreply +} // namespace custom +} // namespace tflite diff --git a/tensorflow/contrib/lite/models/speech_hotword_model_test.cc b/tensorflow/contrib/lite/models/speech_hotword_model_test.cc new file mode 100644 index 0000000000..f5d1f436bc --- /dev/null +++ b/tensorflow/contrib/lite/models/speech_hotword_model_test.cc @@ -0,0 +1,115 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Unit test for speech Hotword model using TFLite Ops. + +#include + +#include +#include + +#include "base/logging.h" +#include "file/base/path.h" +#include "testing/base/public/googletest.h" +#include +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/models/test_utils.h" + +namespace tflite { +namespace models { + +void RunTest(int model_input_tensor, int svdf_layer_state_tensor, + int model_output_tensor, const string& model_name, + const string& golden_in_name, const string& golden_out_name) { + // Read the model. + string tflite_file_path = file::JoinPath(TestDataPath(), model_name); + auto model = FlatBufferModel::BuildFromFile(tflite_file_path.c_str()); + CHECK(model) << "Failed to read model from file " << tflite_file_path; + + // Initialize the interpreter. + ops::builtin::BuiltinOpResolver builtins; + std::unique_ptr interpreter; + InterpreterBuilder(*model, builtins)(&interpreter); + CHECK(interpreter != nullptr); + interpreter->AllocateTensors(); + + // Reset the SVDF layer state. + memset(interpreter->tensor(svdf_layer_state_tensor)->data.raw, 0, + interpreter->tensor(svdf_layer_state_tensor)->bytes); + + // Load the input frames. + Frames input_frames; + const string input_file_path = file::JoinPath(TestDataPath(), golden_in_name); + ReadFrames(input_file_path, &input_frames); + + // Load the golden output results. + Frames output_frames; + const string output_file_path = + file::JoinPath(TestDataPath(), golden_out_name); + ReadFrames(output_file_path, &output_frames); + + const int speech_batch_size = + interpreter->tensor(model_input_tensor)->dims->data[0]; + const int speech_input_size = + interpreter->tensor(model_input_tensor)->dims->data[1]; + const int speech_output_size = + interpreter->tensor(model_output_tensor)->dims->data[1]; + const int input_sequence_size = + input_frames[0].size() / (speech_input_size * speech_batch_size); + float* input_ptr = interpreter->tensor(model_input_tensor)->data.f; + float* output_ptr = interpreter->tensor(model_output_tensor)->data.f; + + // The first layer (SVDF) input size is 40 (speech_input_size). Each speech + // input frames for this model is 1280 floats, which can be fed to input in a + // sequence of size 32 (input_sequence_size). + for (int i = 0; i < TestInputSize(input_frames); i++) { + int frame_ptr = 0; + for (int s = 0; s < input_sequence_size; s++) { + for (int k = 0; k < speech_input_size * speech_batch_size; k++) { + input_ptr[k] = input_frames[i][frame_ptr++]; + } + interpreter->Invoke(); + } + // After the whole frame (1280 floats) is fed, we can check the output frame + // matches with the golden output frame. + for (int k = 0; k < speech_output_size; k++) { + ASSERT_NEAR(output_ptr[k], output_frames[i][k], 1e-5); + } + } +} + +TEST(SpeechHotword, OkGoogleTestRank1) { + constexpr int kModelInputTensor = 0; + constexpr int kSvdfLayerStateTensor = 4; + constexpr int kModelOutputTensor = 18; + + RunTest(kModelInputTensor, kSvdfLayerStateTensor, kModelOutputTensor, + "speech_hotword_model_rank1.tflite", "speech_hotword_model_in.csv", + "speech_hotword_model_out_rank1.csv"); +} + +TEST(SpeechHotword, OkGoogleTestRank2) { + constexpr int kModelInputTensor = 17; + constexpr int kSvdfLayerStateTensor = 1; + constexpr int kModelOutputTensor = 18; + RunTest(kModelInputTensor, kSvdfLayerStateTensor, kModelOutputTensor, + "speech_hotword_model_rank2.tflite", "speech_hotword_model_in.csv", + "speech_hotword_model_out_rank2.csv"); +} + +} // namespace models +} // namespace tflite diff --git a/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc b/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc new file mode 100644 index 0000000000..687cfab0b2 --- /dev/null +++ b/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc @@ -0,0 +1,114 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Unit test for speech SpeakerId model using TFLite Ops. + +#include + +#include +#include + +#include "base/logging.h" +#include "file/base/path.h" +#include "testing/base/public/googletest.h" +#include +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/models/test_utils.h" +#include "tensorflow/contrib/lite/tools/mutable_op_resolver.h" + +void RegisterSelectedOps(::tflite::MutableOpResolver* resolver); + +namespace tflite { +namespace models { + +constexpr int kModelInputTensor = 0; +constexpr int kLstmLayer1OutputStateTensor = 19; +constexpr int kLstmLayer1CellStateTensor = 20; +constexpr int kLstmLayer2OutputStateTensor = 40; +constexpr int kLstmLayer2CellStateTensor = 41; +constexpr int kLstmLayer3OutputStateTensor = 61; +constexpr int kLstmLayer3CellStateTensor = 62; +constexpr int kModelOutputTensor = 66; + +TEST(SpeechSpeakerId, OkGoogleTest) { + // Read the model. + string tflite_file_path = + file::JoinPath(TestDataPath(), "speech_speakerid_model.tflite"); + auto model = FlatBufferModel::BuildFromFile(tflite_file_path.c_str()); + CHECK(model) << "Failed to read model from file " << tflite_file_path; + + // Initialize the interpreter. + ::tflite::MutableOpResolver resolver; + RegisterSelectedOps(&resolver); + std::unique_ptr interpreter; + InterpreterBuilder(*model, resolver)(&interpreter); + CHECK(interpreter != nullptr); + interpreter->AllocateTensors(); + + // Load the input frames. + Frames input_frames; + const string input_file_path = + file::JoinPath(TestDataPath(), "speech_speakerid_model_in.csv"); + ReadFrames(input_file_path, &input_frames); + + // Load the golden output results. + Frames output_frames; + const string output_file_path = + file::JoinPath(TestDataPath(), "speech_speakerid_model_out.csv"); + ReadFrames(output_file_path, &output_frames); + + const int speech_batch_size = + interpreter->tensor(kModelInputTensor)->dims->data[0]; + const int speech_input_size = + interpreter->tensor(kModelInputTensor)->dims->data[1]; + const int speech_output_size = + interpreter->tensor(kModelOutputTensor)->dims->data[1]; + + float* input_ptr = interpreter->tensor(kModelInputTensor)->data.f; + float* output_ptr = interpreter->tensor(kModelOutputTensor)->data.f; + + // Clear the LSTM state for layers. + memset(interpreter->tensor(kLstmLayer1OutputStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer1OutputStateTensor)->bytes); + memset(interpreter->tensor(kLstmLayer1CellStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer1CellStateTensor)->bytes); + + memset(interpreter->tensor(kLstmLayer2OutputStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer2OutputStateTensor)->bytes); + memset(interpreter->tensor(kLstmLayer2CellStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer2CellStateTensor)->bytes); + + memset(interpreter->tensor(kLstmLayer3OutputStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer3OutputStateTensor)->bytes); + memset(interpreter->tensor(kLstmLayer3CellStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer3CellStateTensor)->bytes); + for (int i = 0; i < input_frames.size(); i++) { + // Feed the input to model. + int frame_ptr = 0; + for (int k = 0; k < speech_input_size * speech_batch_size; k++) { + input_ptr[k] = input_frames[i][frame_ptr++]; + } + // Run the model. + interpreter->Invoke(); + // Validate the output. + for (int k = 0; k < speech_output_size; k++) { + ASSERT_NEAR(output_ptr[k], output_frames[i][k], 1e-5); + } + } +} + +} // namespace models +} // namespace tflite diff --git a/tensorflow/contrib/lite/models/speech_terse_am_model_test.cc b/tensorflow/contrib/lite/models/speech_terse_am_model_test.cc new file mode 100644 index 0000000000..30d89a1354 --- /dev/null +++ b/tensorflow/contrib/lite/models/speech_terse_am_model_test.cc @@ -0,0 +1,127 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Unit test for speech TERSE AM model using TFLite Ops. + +#include + +#include +#include + +#include "base/logging.h" +#include "file/base/path.h" +#include "testing/base/public/googletest.h" +#include +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/models/test_utils.h" + +namespace tflite { +namespace models { + +constexpr int kModelInputTensor = 0; +constexpr int kLstmLayer1OutputStateTensor = 19; +constexpr int kLstmLayer1CellStateTensor = 20; +constexpr int kLstmLayer2OutputStateTensor = 40; +constexpr int kLstmLayer2CellStateTensor = 41; +constexpr int kLstmLayer3OutputStateTensor = 61; +constexpr int kLstmLayer3CellStateTensor = 62; +constexpr int kLstmLayer4OutputStateTensor = 82; +constexpr int kLstmLayer4CellStateTensor = 83; +constexpr int kLstmLayer5OutputStateTensor = 103; +constexpr int kLstmLayer5CellStateTensor = 104; +constexpr int kModelOutputTensor = 109; + +TEST(SpeechTerseAm, RandomIOTest) { + // Read the model. + string tflite_file_path = + file::JoinPath(TestDataPath(), "speech_terse_am_model.tflite"); + auto model = FlatBufferModel::BuildFromFile(tflite_file_path.c_str()); + CHECK(model) << "Failed to mmap model " << tflite_file_path; + + // Initialize the interpreter. + ops::builtin::BuiltinOpResolver builtins; + std::unique_ptr interpreter; + InterpreterBuilder(*model, builtins)(&interpreter); + CHECK(interpreter != nullptr); + interpreter->AllocateTensors(); + + // Load the input frames. + Frames input_frames; + const string input_file_path = + file::JoinPath(TestDataPath(), "speech_terse_am_model_in.csv"); + ReadFrames(input_file_path, &input_frames); + + // Load the golden output results. + Frames output_frames; + const string output_file_path = + file::JoinPath(TestDataPath(), "speech_terse_am_model_out.csv"); + ReadFrames(output_file_path, &output_frames); + + const int speech_batch_size = + interpreter->tensor(kModelInputTensor)->dims->data[0]; + const int speech_input_size = + interpreter->tensor(kModelInputTensor)->dims->data[1]; + const int speech_output_size = + interpreter->tensor(kModelOutputTensor)->dims->data[1]; + + float* input_ptr = interpreter->tensor(kModelInputTensor)->data.f; + float* output_ptr = interpreter->tensor(kModelOutputTensor)->data.f; + + // Clear the LSTM state for layers. + memset(interpreter->tensor(kLstmLayer1OutputStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer1OutputStateTensor)->bytes); + memset(interpreter->tensor(kLstmLayer1CellStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer1CellStateTensor)->bytes); + + memset(interpreter->tensor(kLstmLayer2OutputStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer2OutputStateTensor)->bytes); + memset(interpreter->tensor(kLstmLayer2CellStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer2CellStateTensor)->bytes); + + memset(interpreter->tensor(kLstmLayer3OutputStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer3OutputStateTensor)->bytes); + memset(interpreter->tensor(kLstmLayer3CellStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer3CellStateTensor)->bytes); + + memset(interpreter->tensor(kLstmLayer4OutputStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer4OutputStateTensor)->bytes); + memset(interpreter->tensor(kLstmLayer4CellStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer4CellStateTensor)->bytes); + + memset(interpreter->tensor(kLstmLayer5OutputStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer5OutputStateTensor)->bytes); + memset(interpreter->tensor(kLstmLayer5CellStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer5CellStateTensor)->bytes); + + + for (int i = 0; i < input_frames.size(); i++) { + // Feed the input to model. + int frame_ptr = 0; + for (int k = 0; k < speech_input_size * speech_batch_size; k++) { + input_ptr[k] = input_frames[i][frame_ptr++]; + } + // Run the model. + interpreter->Invoke(); + // Validate the output. + for (int k = 0; k < speech_output_size; k++) { + ASSERT_NEAR(output_ptr[k], output_frames[i][k], 5.2e-4); + } + } +} + +} // namespace models +} // namespace tflite diff --git a/tensorflow/contrib/lite/models/speech_tts_model_test.cc b/tensorflow/contrib/lite/models/speech_tts_model_test.cc new file mode 100644 index 0000000000..e6f2673a42 --- /dev/null +++ b/tensorflow/contrib/lite/models/speech_tts_model_test.cc @@ -0,0 +1,116 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Unit test for speech TTS model using TFLite Ops. + +#include + +#include +#include + +#include "base/logging.h" +#include "file/base/path.h" +#include "testing/base/public/googletest.h" +#include +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/models/test_utils.h" + +namespace tflite { +namespace models { + +constexpr int kModelInputTensor = 0; +constexpr int kLstmLayer1OutputStateTensor = 25; +constexpr int kLstmLayer1CellStateTensor = 26; +constexpr int kLstmLayer2OutputStateTensor = 46; +constexpr int kLstmLayer2CellStateTensor = 47; +constexpr int kLstmLayer3OutputStateTensor = 67; +constexpr int kLstmLayer3CellStateTensor = 68; +constexpr int kRnnLayerHiddenStateTensor = 73; +constexpr int kModelOutputTensor = 74; + +TEST(SpeechTTS, RandomIOTest) { + // Read the model. + string tflite_file_path = + file::JoinPath(TestDataPath(), "speech_tts_model.tflite"); + auto model = FlatBufferModel::BuildFromFile(tflite_file_path.c_str()); + CHECK(model) << "Failed to mmap model " << tflite_file_path; + + // Initialize the interpreter. + ops::builtin::BuiltinOpResolver builtins; + std::unique_ptr interpreter; + InterpreterBuilder(*model, builtins)(&interpreter); + CHECK(interpreter != nullptr); + interpreter->AllocateTensors(); + + // Load the input frames. + Frames input_frames; + const string input_file_path = + file::JoinPath(TestDataPath(), "speech_tts_model_in.csv"); + ReadFrames(input_file_path, &input_frames); + + // Load the golden output results. + Frames output_frames; + const string output_file_path = + file::JoinPath(TestDataPath(), "speech_tts_model_out.csv"); + ReadFrames(output_file_path, &output_frames); + + const int speech_batch_size = + interpreter->tensor(kModelInputTensor)->dims->data[0]; + const int speech_input_size = + interpreter->tensor(kModelInputTensor)->dims->data[1]; + const int speech_output_size = + interpreter->tensor(kModelOutputTensor)->dims->data[1]; + + float* input_ptr = interpreter->tensor(kModelInputTensor)->data.f; + float* output_ptr = interpreter->tensor(kModelOutputTensor)->data.f; + + // Clear the LSTM state for layers. + memset(interpreter->tensor(kLstmLayer1OutputStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer1OutputStateTensor)->bytes); + memset(interpreter->tensor(kLstmLayer1CellStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer1CellStateTensor)->bytes); + + memset(interpreter->tensor(kLstmLayer2OutputStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer2OutputStateTensor)->bytes); + memset(interpreter->tensor(kLstmLayer2CellStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer2CellStateTensor)->bytes); + + memset(interpreter->tensor(kLstmLayer3OutputStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer3OutputStateTensor)->bytes); + memset(interpreter->tensor(kLstmLayer3CellStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer3CellStateTensor)->bytes); + + memset(interpreter->tensor(kRnnLayerHiddenStateTensor)->data.raw, 0, + interpreter->tensor(kRnnLayerHiddenStateTensor)->bytes); + + for (int i = 0; i < input_frames.size(); i++) { + // Feed the input to model. + int frame_ptr = 0; + for (int k = 0; k < speech_input_size * speech_batch_size; k++) { + input_ptr[k] = input_frames[i][frame_ptr++]; + } + // Run the model. + interpreter->Invoke(); + // Validate the output. + for (int k = 0; k < speech_output_size; k++) { + ASSERT_NEAR(output_ptr[k], output_frames[i][k], 1e-5); + } + } +} + +} // namespace models +} // namespace tflite diff --git a/tensorflow/contrib/lite/models/test_utils.h b/tensorflow/contrib/lite/models/test_utils.h new file mode 100644 index 0000000000..b2596babd0 --- /dev/null +++ b/tensorflow/contrib/lite/models/test_utils.h @@ -0,0 +1,84 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MODELS_TEST_UTILS_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MODELS_TEST_UTILS_H_ + +#include +#include + +#include +#include +#include +#include + +namespace tflite { +namespace models { +using Frames = std::vector>; +} // namespace models +} // namespace tflite + +#ifndef __ANDROID__ +#include "file/base/path.h" +#include "tensorflow/core/platform/test.h" + +inline string TestDataPath() { + return string(file::JoinPath(tensorflow::testing::TensorFlowSrcRoot(), + "contrib/lite/models/testdata/")); +} +inline int TestInputSize(const tflite::models::Frames& input_frames) { + return input_frames.size(); +} +#else +inline string TestDataPath() { + return string("third_party/tensorflow/contrib/lite/models/testdata/"); +} + +inline int TestInputSize(const tflite::models::Frames& input_frames) { + // Android TAP is very slow, we only test the first 20 frames. + return 20; +} +#endif + +namespace tflite { +namespace models { + +// Read float data from a comma-separated file: +// Each line will be read into a float vector. +// The return result will be a vector of float vectors. +void ReadFrames(const string& csv_file_path, Frames* frames) { + std::ifstream csv_file(csv_file_path); + string line; + while (std::getline(csv_file, line, '\n')) { + std::vector fields; + // Used by strtok_r internaly for successive calls on the same string. + char* save_ptr = nullptr; + + // Tokenize the line. + char* next_token = + strtok_r(const_cast(line.c_str()), ",", &save_ptr); + while (next_token != nullptr) { + float f = strtod(next_token, nullptr); + fields.push_back(f); + next_token = strtok_r(nullptr, ",", &save_ptr); + } + frames->push_back(fields); + } + csv_file.close(); +} + +} // namespace models +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MODELS_TEST_UTILS_H_ diff --git a/tensorflow/contrib/lite/nnapi/BUILD b/tensorflow/contrib/lite/nnapi/BUILD new file mode 100644 index 0000000000..402f1e949b --- /dev/null +++ b/tensorflow/contrib/lite/nnapi/BUILD @@ -0,0 +1,25 @@ +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = [ + "//visibility:public", +]) + +cc_library( + name = "nnapi_lib", + hdrs = [ + "NeuralNetworksShim.h", + ], + linkopts = ["-ldl"], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h new file mode 100644 index 0000000000..5d06165772 --- /dev/null +++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h @@ -0,0 +1,1916 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef NN_API_SHIM_H0 +#define NN_API_SHIM_H0 + +#include +#include +#include +#include + +// helpers + +#define NNAPI_LOG(format, ...) printf(format "\n", __VA_ARGS__); +#define LOAD_FUNCTION(name) \ + static name##_fn fn = reinterpret_cast(loadFunction(#name)); +#define EXECUTE_FUNCTION(...) \ + if (fn != nullptr) { \ + fn(__VA_ARGS__); \ + } +#define EXECUTE_FUNCTION_RETURN(...) return fn != nullptr ? fn(__VA_ARGS__) : 0; + +inline void* loadLibrary(const char* name) { + // TODO: change RTLD_LOCAL? Assumes there can be multiple instances of nn + // api RT + void* handle = dlopen(name, RTLD_LAZY | RTLD_LOCAL); + if (handle == nullptr) { + NNAPI_LOG("nnapi error: unable to open library %s", name); + } + return handle; +} + +inline void* getLibraryHandle() { + static void* handle = loadLibrary("libneuralnetworks.so"); + return handle; +} + +inline void* loadFunction(const char* name) { + void* fn = nullptr; + if (getLibraryHandle() != nullptr) { + fn = dlsym(getLibraryHandle(), name); + } + if (fn == nullptr) { + NNAPI_LOG("nnapi error: unable to open function %s", name); + } + return fn; +} + +inline bool NNAPIExists() { + static bool nnapi_is_available = getLibraryHandle(); + return nnapi_is_available; +} + +// nn api types + +/** + * Operand types. + * + * The type of operands that can be added to a model. + * + * Although we define many types, most operators accept just a few + * types. Most used are ANEURALNETWORKS_TENSOR_FLOAT32, + * ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, and ANEURALNETWORKS_INT32. + */ +enum { + /** The following entries are used to declare scalars. */ + + /** A 32 bit floating point scalar value. */ + ANEURALNETWORKS_FLOAT32 = 0, + /** A signed 32 bit integer scalar value. */ + ANEURALNETWORKS_INT32 = 1, + /** An unsigned 32 bit integer scalar value. */ + ANEURALNETWORKS_UINT32 = 2, + + /** The following entries are used to declare tensors. */ + + /** A tensor of 32 bit floating point values. */ + ANEURALNETWORKS_TENSOR_FLOAT32 = 3, + /** A tensor of 32 bit integer values. */ + ANEURALNETWORKS_TENSOR_INT32 = 4, + /** A tensor of 8 bit integers that represent real numbers. + * + * Attached to this tensor are two numbers that can be used to convert + * the 8 bit integer to the real value and vice versa. These two numbers are: + * - scale: a 32 bit floating point value + * - zero_value: an 32 bit integer + * + * The formula is: + * real_value = (integer_value - zero_value) * scale. + */ + ANEURALNETWORKS_TENSOR_QUANT8_ASYMM = 5, +}; + +/** + * Operation types. + * + * The type of operations that can be added to a model. + */ +enum { + /** Adds two tensors, elment-wise. + * + * Takes two input tensors of identical type and compatible dimensions. The + * output is the sum of both input tensors, optionally modified by an + * activation function. + * + * Two dimensions are compatible when: + * 1. they are equal, or + * 2. one of them is 1 + * + * The size of the output is the maximum size along each dimension of the + * input operands. It starts with the trailing dimensions, and works its way + * forward. + * + * Example: + * + * input1.dimension = {4, 1, 2} + * input2.dimension = {5, 4, 3, 1} + * output.dimension = {5, 4, 3, 2} + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * + * Supported tensor rank: up to 4 + * + * Inputs: + * * 0: A tensor. + * * 1: A tensor of the same type, and compatible dimensions as input0. + * * 2: An INT32 value, and has to be one of the {@link FuseCode} values. + * Specifies the activation to invoke on the result of each addition. + * + * Outputs: + * * 0: The sum, a tensor of the same type as input0. + */ + ANEURALNETWORKS_ADD = 0, + /** Performs a 2-D average pooling operation. + * + * The output dimensions are functions of the filter dimensions, stride, and + * padding. + * + * The values in the output tensor are computed as: + * + * output[batch, row, col, channel] = + * sum_{i, j}(input[batch, row + i, col + j, channel]) / sum(1) + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: 4, with "NHWC" data layout. + * + * Inputs: + * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the + * input. + * * 1: An INT32 value, specifying the padding on the left, in the ‘width’ + * dimension. + * * 2: An INT32 value, specifying the padding on the right,in the ‘width’ + * dimension. + * * 3: An INT32 value, specifying the padding on the top, in the ‘height’ + * dimension. + * * 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ + * dimension. + * * 5: An INT32 value, specifying the output stride in the ‘width’ dimension. + * * 6: An INT32 value, specifying the output stride in the ‘height’ + * dimension. + * * 7: An INT32 value, specifying the filter width. + * * 8: An INT32 value, specifying the filter height. + * * 9: An INT32 value, and has to be one of the {@link FuseCode} values. + * Specifies the activation to invoke on the result of each addition. + * + * Outputs: + * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, + * depth]. + */ + ANEURALNETWORKS_AVERAGE_POOL_2D = 1, + /** Concatenates the input tensors along the given dimension. + * + * The input tensors must have identical type and the same dimensions except + * the dimension along the concatenation axis. + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: up to 4 + * + * Inputs: + * 0 ~ n: The list on n input tensors, of shape [D0, D1, ..., Daxis(i), ..., + * Dm] n+1: An INT32 value, specifying the concatenation axis. n+2: An INT32 + * value, and has to be one of the {@link FuseCode} values. Specifies the + * activation to invoke on the result of each addition. + * + * Outputs: + * * 0: The output, a tensor of the same type as the input tensors. + * The output shape is [D0, D1, ..., sum(Daxis(i)), ..., Dm]. + */ + ANEURALNETWORKS_CONCATENATION = 2, + /** Performs an 2-D convolution operation. + * + * The CONV_2D op sweeps a 2-D filter that can mix channels together over a + * batch of images, applying the filter to each window of each image of the + * appropriate size. + * + * The output dimensions are functions of the filter dimensions, stride, and + * padding. + * + * The values in the output tensor are computed as: + * + * output[batch, row, col, channel] = + * sum_{i, j} ( + * input[batch, row + i, col + j, k] * + * filter[channel, row + i, col + j, k] + + * bias[channel] + * ) + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: 4, with "NHWC" data layout. + * + * Inputs: + * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying + * the input. + * * 1: A 4-D tensor, of shape [depth_out, filter_height, filter_width, + * depth_in], specifying the filter. + * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. + * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32} type, the + * bias should also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}. For input + * tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the bias should + * be of {@link ANEURALNETWORKS_TENSOR_INT32}. + * * 3: An INT32 value, specifying the padding on the left, in the ‘width’ + * dimension. + * * 4: An INT32 value, specifying the padding on the right,in the ‘width’ + * dimension. + * * 5: An INT32 value, specifying the padding on the top, in the ‘height’ + * dimension. + * * 6: An INT32 value, specifying the padding on the bottom, in the ‘height’ + * dimension. + * * 7: An INT32 value, specifying the output stride in the ‘width’ dimension. + * * 8: An INT32 value, specifying the output stride in the ‘height’ + * dimension. + * * 9: An INT32 value, and has to be one of the {@link FuseCode} values. + * Specifies the activation to invoke on the result of each addition. + * + * Outputs: + * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, + * depth_out]. + */ + ANEURALNETWORKS_CONV_2D = 3, + /** Performs a depthwise 2-D convolution operation. + * + * Given an input tensor of shape [batches, height, width, depth_in] and a + * filter tensor of shape [depth_out, filter_height, filter_width, depth_in] + * containing in_channels convolutional filters of depth 1, DEPTHWISE_CONV + * applies a different filter to each input channel (expanding from 1 channel + * to channel_multiplier channels for each), then concatenates the results + * together. + * + * The output has depth_out = depth_in * depth_multiplier channels. + * The output dimensions are functions of the filter dimensions, stride, and + * padding. + * + * The values in the output tensor are computed as: + * + * output[b, i, j, k * channel_multiplier + q] = + * sum_{di, dj} ( + * input[b, strides[1] * i + di, strides[2] * j + dj, k] * + * filter[di, dj, k, q] + * ) + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: 4, with "NHWC" data layout. + * + * Inputs: + * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying + * the input. + * * 1: A 4-D tensor, of shape [depth_out, filter_height, filter_width, + * depth_in], specifying the filter. + * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. + * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32} type, the + * bias should also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}. For input + * tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the bias should + * be of {@link ANEURALNETWORKS_TENSOR_INT32}. + * * 3: An INT32 value, specifying the padding on the left, in the ‘width’ + * dimension. + * * 4: An INT32 value, specifying the padding on the right,in the ‘width’ + * dimension. + * * 5: An INT32 value, specifying the padding on the top, in the ‘height’ + * dimension. + * * 6: An INT32 value, specifying the padding on the bottom, in the ‘height’ + * dimension. + * * 7: An INT32 value, specifying the output stride in the ‘width’ dimension. + * * 8: An INT32 value, specifying the output stride in the ‘height’ + * dimension. + * * 9: An INT32 value, specifying the depthwise multiplier. + * * 10: An INT32 value, and has to be one of the {@link FuseCode} values. + * Specifies the activation to invoke on the result of each addition. + * + * Outputs: + * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, + * depth_out]. + */ + ANEURALNETWORKS_DEPTHWISE_CONV_2D = 4, + /** Rearranges data from depth into blocks of spatial data. + * + * More specifically, this op outputs a copy of the input tensor where values + * from the depth dimension are moved in spatial blocks to the height and + * width dimensions. The value block_size indicates the input block size and + * how the data is moved. + * + * Chunks of data of size block_size * block_size from depth are rearranged + * into non-overlapping blocks of size block_size x block_size. + * + * The width of the output tensor is input_depth * block_size, whereas the + * height is input_height * block_size. The depth of the input tensor must be + * divisible by block_size * block_size + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: 4, with "NHWC" data layout. + * + * Inputs: + * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying + * the input. + * * 1: An INT32 value, specifying the block_size. block_size must be >=1 and + * block_size * block_size must be a divisor of the input depth. + * + * Outputs: + * * 0: The output 4-D tensor, of shape [batch, height*block_size, + * width*block_size, depth/(block_size*block_size)]. + */ + ANEURALNETWORKS_DEPTH_TO_SPACE = 5, + /** Dequantizes the input tensor. + * + * The formula is: + * + * output = (input - zero_value) * scale. + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: up to 4 + * + * Inputs: + * * 0: A tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}. + * + * Outputs: + * * 0: The output tensor of same shape as input0, but with type + * {@link ANEURALNETWORKS_TENSOR_FLOAT32}. + */ + ANEURALNETWORKS_DEQUANTIZE = 6, + + /** + * Looks up items from a given tensor. + * + * Each item in the output is a raw copy of the corresponding item in + * the input “values”. If the the given “lookup” indices are out of bounds, + * the op will fail and an error will be reported. + * + * Inputs: + * * 0: Values. An n-D tensor of any type X (where n >= 2). E.g., if n is 2, + * then the shape would be [lookup_dimension, values_dimension], where + * “lookup_dimension” corresponds to the indexing dimension in the lookup + * table, and “values_dimension” to the contents. + * * 1: Lookups. An 1-D tensor of type T, of shape [lookup_size], where + * “lookup_size” is the number of elements to look for, and each entry + * corresponds to the first dimension of the “values” tensor. + * + * Output: + * * 0: A n-D tensor of type X and the same rank and shape as the “values” + * tensor, except for the first dimension which has size “lookup_size”. + */ + ANEURALNETWORKS_EMBEDDING_LOOKUP = 7, + + /** Computes element-wise floor() on the input tensor. + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * + * Supported tensor rank: up to 4 + * + * Inputs: + * * 0: A tensor. + * + * Outputs: + * * 0: The output, a tensor of the same type and dimensions as input0. + */ + ANEURALNETWORKS_FLOOR = 8, + /** Denotes a fully (densely) connected layer, which connects all elements in + * the input tensor with each element in the output tensor. + * + * This layer implements the operation: + * + * outputs = activation(inputs * weights’ + bias) + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: up to 4. + * + * Inputs: + * * 0: A tensor, specifying the input. If rank is greater than 2, then it + * gets flattened to a 2-D Tensor. The 2-D Tensor is handled as if dimensions + * corresponded to shape [batch_size, input_size], where “batch_size” + * corresponds to the batching dimension, and “input_size” is the size of the + * input. + * * 1: A 2-D tensor, specifying the weights, of shape [num_units, + * input_size], where "num_units" corresponds to the number of output nodes. + * * 2: A 1-D tensor, of shape [num_units], specifying the bias. + * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32} type, the + * bias should also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}. For input + * tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the bias should + * be of {@link ANEURALNETWORKS_TENSOR_INT32}. + * * 3: An INT32 value, and has to be one of the {@link FuseCode} values. + * Specifies the activation to invoke on the result of each addition. + * + * Outputs: + * * 0: The output tensor, of shape [batch_size, num_units]. + */ + ANEURALNETWORKS_FULLY_CONNECTED = 9, + + /** + * Looks up values of a hash table with given keys. + * + * Inputs: + * * 0: Lookups. A 1-D int32 tensor with shape [ k ]. + * * 1: Keys. A 1-D int32 tensor with shape [ n ], *MUST* be sorted in + * ascending order. + * * 2: Values. A tensor with shape [ n … ]. + * + * Outputs: + * * 0: Output. A tensor with shape [ k …]. + * * 1: Hits. A uint8 tensor with shape [ k ] indicates whether the lookup + * hits or not. + */ + ANEURALNETWORKS_HASHTABLE_LOOKUP = 10, + + /** Applies L2 normalization along the depth dimension. + * + * The values in the output tensor are computed as: + * + * output[batch, row, col, channel] = + * input[batch, row, col, channel] / + * sqrt(sum_{c} pow(input[batch, row, col, c], 2)) + * + * For x with more dimensions, independently normalizes each 1-D slice along + * dimension dim. + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * + * Supported tensor rank: 4, with "NHWC" data layout. + * + * Inputs: + * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the + * input. + * + * Outputs: + * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, + * depth]. + */ + ANEURALNETWORKS_L2_NORMALIZATION = 11, + + /** Performs an 2-D L2 pooling operation. + * + * The output dimensions are functions of the filter dimensions, stride, and + * padding. + * + * The values in the output tensor are computed as: + * + * output[batch, row, col, channel] = + * sqrt(sum_{i, j} pow(input[batch, row + i, col + j, channel], 2) / + * sum(1)) + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * + * Supported tensor rank: 4, with "NHWC" data layout. + * + * Inputs: + * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the + * input. + * * 1: An INT32 value, specifying the padding on the left, in the ‘width’ + * dimension. + * * 2: An INT32 value, specifying the padding on the right,in the ‘width’ + * dimension. + * * 3: An INT32 value, specifying the padding on the top, in the ‘height’ + * dimension. + * * 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ + * dimension. + * * 5: An INT32 value, specifying the output stride in the ‘width’ dimension. + * * 6: An INT32 value, specifying the output stride in the ‘height’ + * dimension. + * * 7: An INT32 value, specifying the filter width. + * * 8: An INT32 value, specifying the filter height. + * * 9: An INT32 value, and has to be one of the {@link FuseCode} values. + * Specifies the activation to invoke on the result of each addition. + * + * Outputs: + * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, + * depth]. + */ + ANEURALNETWORKS_L2_POOL_2D = 12, + /** Applies Local Response Normalization along the depth dimension. + * + * The 4-D input tensor is treated as a 3-D array of 1-D vectors (along the + * last dimension), and each vector is normalized independently. Within a + * given vector, each component is divided by the weighted, squared sum of + * inputs within depth_radius. + * + * The output is calculated using this formula: + * + * sqr_sum[a, b, c, d] = + * sum(pow(input[a, b, c, d - depth_radius : d + depth_radius + 1], 2) + * output = input / pow((bias + alpha * sqr_sum), beta) + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * + * Supported tensor rank: 4, with "NHWC" data layout. + * + * Inputs: + * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the + * input. + * * 1: An INT32 value, specifying the radius of the normalization window. + * * 2: A FLOAT32 value, specifying the bias, must not be zero. + * * 3: A FLOAT32 value, specifying the scale factor, alpha. + * * 4: A FLOAT32 value, specifying the exponent, beta. + * + * Outputs: + * * 0: The output tensor of same shape as input0. + */ + ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION = 13, + /** Computes sigmoid activation on the input tensor element-wise. + * + * The output is calculated using this formula: + * + * output = 1 / (1 + exp(-input)) + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: up to 4. + * + * Inputs: + * * 0: A tensor, specifying the input. + * + * Outputs: + * * 0: The output tensor of same shape as input0. + */ + ANEURALNETWORKS_LOGISTIC = 14, + + /** + * Projects an input to a bit vector via locality senstive hashing. + * + * Inputs: + * * 0: Hash functions. Dim.size == 2, DataType: Float. + * Tensor[0].Dim[0]: Number of hash functions. + * Tensor[0].Dim[1]: Number of seeds per hash functions. + * Tensor[0].Dim[1] <= 32 in sparse case. + * + * * 1: Input. Dim.size >= 1, no restriction on DataType. + * * 2: Weight. Optional. Dim.size == 1, DataType: Float. + * If not set, each input element is considered to have the same weight of + * 1.0. + * Tensor[1].Dim[0] == Tensor[2].Dim[0] + * * 3: Type: + * Sparse: Value LSHProjectionType_SPARSE(=1). + * Computed bit vector is considered to be sparse. + * Each output element is an int32 made up of multiple bits computed + * from hash functions. + * + * Dense: Value LSHProjectionType_DENSE(=2). + * Computed bit vector is considered to be dense. Each output element + * represents a bit and can take the value of either 0 or 1. + * + * Outputs: + * * 0: If the projection type is sparse: + * Output.Dim == { Tensor[0].Dim[0] } + * A tensor of int32 that represents hash signatures. + * If the projection type is Dense: + * Output.Dim == { Tensor[0].Dim[0] * Tensor[0].Dim[1] } + * A flattened tensor that represents projected bit vectors. + */ + ANEURALNETWORKS_LSH_PROJECTION = 15, + + /** + * Long short-term memory unit (LSTM) recurrent network layer. + * + * The default non-peephole implementation is based on: + * http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf + * S. Hochreiter and J. Schmidhuber. "Long Short-Term Memory". Neural + * Computation, 9(8):1735-1780, 1997. + * + * The peephole implementation is based on: + * https://research.google.com/pubs/archive/43905.pdf + * Hasim Sak, Andrew Senior, and Francoise Beaufays. "Long short-term memory + * recurrent neural network architectures for large scale acoustic modeling." + * INTERSPEECH, 2014. + * + * The coupling of input and forget gate (CIFG) is based on: + * http://arxiv.org/pdf/1503.04069.pdf + * Greff et al. "LSTM: A Search Space Odyssey" + * + * The class has the following independently optional inputs: + * * If input gate (if CIFG): “input_to_forget_weights”, + * “recurrent_to_input_weights”, “cell_to_input_weights”, “input_gate_bias”. + * * If no peephole connections: “cell_to_input_weights”, + * “cell_to_forget_weights”, “cell_to_output_weights”. + * * If no projection layer: “projection_weights” and “projection_bias”. + * * If no projection bias: “projection_bias”. + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * + * Inputs: + * * 0: Input. + * A 2-D tensor of type T, of shape [batch_size, input_size], where + * “batch_size” corresponds to the batching dimension, and “input_size” + * is the size of the input. + * * 1: input_to_input_weights. + * A 2-D tensor of type T, of shape [num_units, input_size], where + * “num_units” corresponds to the number of cell units. + * * 2: input_to_forget_weights. + * A 2-D tensor of type T, of shape [num_units, input_size]. + * * 3: input_to_cell_weights. + * A 2-D tensor of type T, of shape [num_units, input_size]. + * * 4: input_to_output_weights. + * A 2-D tensor of type T, of shape [num_units, input_size]. + * * 5: recurrent_to_input_weights. + * A 2-D tensor of type T, of shape [num_units, output_size], where + * “output_size” corresponds to either the number of cell units (i.e., + * “num_units”), or the second dimension of the “projection_weights”, if + * defined. + * * 6: recurrent_to_forget_weights. + * A 2-D tensor of type T, of shape [num_units, output_size]. + * * 7: recurrent_to_cell_weights. + * A 2-D tensor of type T, of shape [num_units, output_size]. + * * 8: recurrent_to_output_weights. + * A 2-D tensor of type T, of shape [num_units, output_size]. + * * 9: cell_to_input_weights. + * A 1-D tensor of type T, of shape [num_units]. + * * 10:cell_to_forget_weights. + * A 1-D tensor of type T, of shape [num_units]. + * * 11:cell_to_output_weights. + * A 1-D tensor of type T, of shape [num_units]. + * * 12:input_gate_bias. + * A 1-D tensor of type T, of shape [num_units]. + * * 13:forget_gate_bias. + * A 1-D tensor of type T, of shape [num_units]. + * * 14:cell_bias. + * A 1-D tensor of type T, of shape [num_units]. + * * 15:output_gate_bias. + * A 1-D tensor of type T, of shape [num_units]. + * * 16:projection_weights. + * A 2-D tensor of type T, of shape [output_size, num_units]. + * * 17:projection_bias. + * A 1-D tensor of type T, of shape [output_size]. + * + * Parameters: + * * 18:fused_activation_function. + * An (optional) ActivationFunctionType indicating the activation + * function. + * If “NONE” is specified then it results in a linear activation. + * * 19:cell_clip. + * A clipping threshold for the cell state, such that values are bound + * within [-cell_clip, cell_clip]. If set to 0.0 then clipping is + * disabled. + * * 20:proj_clip. + * A clipping threshold for the output from the projection layer, such + * that values are bound within [-proj_clip, proj_clip]. If set to 0.0 + * then clipping is disabled. + * + * Outputs: + * * 0: scratch_buffer. + * A 3-D tensor of type T, of shape [batch_size, num_cell, 4]. + * * 1: output_state. + * A 2-D tensor of type T, of shape [batch_size, output_size]. + * * 2: cell_state. + * A 2-D tensor of type T, of shape [batch_size, num_units]. + * * 3: output. + * A 2-D tensor of type T, of shape [batch_size, output_size]. This is + * effectively the same as the current “output_state” value. + */ + ANEURALNETWORKS_LSTM = 16, + + /** Performs an 2-D max pooling operation. + * + * The output dimensions are functions of the filter dimensions, stride, and + * padding. + * + * The values in the output tensor are computed as: + * + * output[batch, row, col, channel] = + * max_{i, j} (input[batch, row + i, col + j, channel]) + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: 4, with "NHWC" data layout. + * + * Inputs: + * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the + * input. + * * 1: An INT32 value, specifying the padding on the left, in the ‘width’ + * dimension. + * * 2: An INT32 value, specifying the padding on the right,in the ‘width’ + * dimension. + * * 3: An INT32 value, specifying the padding on the top, in the ‘height’ + * dimension. + * * 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ + * dimension. + * * 5: An INT32 value, specifying the output stride in the ‘width’ dimension. + * * 6: An INT32 value, specifying the output stride in the ‘height’ + * dimension. + * * 7: An INT32 value, specifying the filter width. + * * 8: An INT32 value, specifying the filter height. + * * 9: An INT32 value, and has to be one of the {@link FuseCode} values. + * Specifies the activation to invoke on the result of each addition. + * + * Outputs: + * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, + * depth]. + */ + ANEURALNETWORKS_MAX_POOL_2D = 17, + + /** Multiplies two tensors, elment-wise. + * + * Takes two input tensors of identical type and compatible dimensions. The + * output is the product of both input tensors, optionally modified by an + * activation function. + * + * Two dimensions are compatible when: + * 1. they are equal, or + * 2. one of them is 1 + * + * The size of the resulting output is the maximum size along each dimension + * of the input operands. It starts with the trailing dimensions, and works + * its way forward. + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * + * Supported tensor rank: up to 4 + * + * Inputs: + * * 0: A tensor. + * * 1: A tensor of the same type, and compatible dimensions as input0. + * * 2: An INT32 value, and has to be one of the {@link FuseCode} values. + * Specifies the activation to invoke on the result of each addition. + * + * Outputs: + * * 0: The product, a tensor of the same type as input0. + */ + ANEURALNETWORKS_MUL = 18, + /** Computes rectified linear activation on the input tensor element-wise. + * + * The output is calculated using this formula: + * + * output = max(0, input) + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: up to 4. + * + * Inputs: + * * 0: A tensor, specifying the input. + * + * Outputs: + * * 0: The output tensor of same shape as input0. + */ + ANEURALNETWORKS_RELU = 19, + /** Computes rectified linear 1 activation on the input tensor element-wise. + * + * The output is calculated using this formula: + * + * output = min(1.f, max(-1.f, input)) + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: up to 4. + * + * Inputs: + * * 0: A tensor, specifying the input. + * + * Outputs: + * * 0: The output tensor of same shape as input0. + */ + ANEURALNETWORKS_RELU1 = 20, + /** Computes rectified linear 6 activation on the input tensor element-wise. + * + * The output is calculated using this formula: + * + * output = min(6, max(0, input)) + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: up to 4. + * + * Inputs: + * * 0: A tensor, specifying the input. + * + * Outputs: + * * 0: The output tensor of same shape as input0. + */ + ANEURALNETWORKS_RELU6 = 21, + /** Reshapes a tensor. + * + * Given tensor, this operation returns a tensor that has the same values as + * tensor, but with a newly specified shape. + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: up to 4. + * + * Inputs: + * * 0: A tensor, specifying the tensor to be reshaped. + * * 1: A 1-D tensor of type {@link ANEURALNETWORKS_TENSOR_INT32}, defining + * the shape of the output tensor. The number of elements implied by shape + * must be the same as the number of elements in the input tensor. + * + * Outputs: + * * 0: The output tensor, of shape specified by the input shape. + */ + ANEURALNETWORKS_RESHAPE = 22, + /** Resizes images to given size using the bilinear interpretation. + * + * Resized images will be distorted if their original aspect ratio is not the + * same as input. + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * + * Supported tensor rank: 4, with "NHWC" data layout. + * + * Inputs: + * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the + * input. + * * 1: An INT32 value, specifying the output width of the output tensor. + * * 2: An INT32 value, specifying the output height of the output tensor. + * + * Outputs: + * * 0: The output 4-D tensor, of shape [batches, new_height, new_width, + * depth]. + */ + ANEURALNETWORKS_RESIZE_BILINEAR = 23, + + /** + * A basic recurrent neural network layer. + * + * This layer implements the operation: + * outputs = state = activation(inputs * input_weights + state * + * recurrent_weights + bias) + * + * Where: + * * “input_weights” is a weight matrix that multiplies the inputs; + * * “recurrent_weights” is a weight matrix that multiplies the current + * “state” which itself is the output from the previous time step + * computation; + * * “bias” is a bias vector (added to each output vector in the batch); + * * “activation” is the function passed as the “fused_activation_function” + * argument (if not “NONE”). + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * + * Inputs: + * * 0: input. + * A 2-D tensor of type T, of shape [batch_size, input_size], where + * “batch_size” corresponds to the batching dimension, and “input_size” + * is the size of the input. + * * 1: weights. + * A 2-D tensor of type T, of shape [num_units, input_size], where + * “num_units” corresponds to the number of units. + * * 2: recurrent_weights. + * A 2-D tensor of type T, of shape [num_units, num_units], with columns + * corresponding to the weights from each unit. + * * 3: bias. + * A 1-D tensor of type T, of shape [num_units]. + * + * For FLOAT32 input tensor, bias must also be FLOAT32. + * For UINT8 input tensor, bias must be INT32. + * + * Parameters + * * 4: fused_activation_function. + * An (optional) ActivationFunctionType indicating the activation + * function. If “NONE” is specified then it results in a linear + * activation. + * + * * 5: Hidden state. + * A 2-D tensor of type T, of shape [batch_size, num_units]. + * + * Outputs: + * * 0: output. + * A 2-D tensor of type T, of shape [batch_size, num_units]. This is + * effectively the same as the current state value. + */ + ANEURALNETWORKS_RNN = 24, + + /** Computes the softmax activation on the input tensor element-wise, per + * batch, by normalizing the input vector so the maximum coefficient is zero. + * + * The output is calculated using this formula: + * + * output[batch, i] = + * exp((input[batch, i] - max(input[batch, :])) * beta) / + * sum_{k}{exp((input[batch, k] - max(input[batch, :])) * beta)} + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: 2 or 4. + * + * Inputs: + * * 0: A 2-D or 4-D tensor, specifying the tensor to be reshaped. + * * 1: A FLOAT32 value, specifying the scaling factor for the exponent, beta. + * + * Outputs: + * * 0: The output tensor of same shape as input0. + */ + ANEURALNETWORKS_SOFTMAX = 25, + + /** Rearranges blocks of spatial data, into depth. + * + * More specifically, this op outputs a copy of the input tensor where values + * from the height and width dimensions are moved to the depth dimension. The + * value block_size indicates the input block size and how the data is moved. + * + * Chunks of data of size block_size * block_size from depth are rearranged + * into non-overlapping blocks of size block_size x block_size. + * + * The depth of the output tensor is input_depth * block_size * block_size. + * The input tensor's height and width must be divisible by block_size. + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: 4, with "NHWC" data layout. + * + * Inputs: + * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying + * the input. + * * 1: An INT32 value, specifying the block_size. block_size must be >=1 and + * block_size must be a divisor of both the input height and width. + * + * Outputs: + * * 0: The output 4-D tensor, of shape [batch, height/block_size, + * width/block_size, depth*block_size*block_size]. + */ + ANEURALNETWORKS_SPACE_TO_DEPTH = 26, + + /** + * SVDF op is a kind of stateful layer derived from the notion that a + * densely connected layer that's processing a sequence of input frames can + * be approximated by using a singular value decomposition of each of its + * nodes. The implementation is based on: + * + * https://research.google.com/pubs/archive/43813.pdf + * + * P. Nakkiran, R. Alvarez, R. Prabhavalkar, C. Parada. + * “Compressing Deep Neural Networks using a Rank-Constrained Topology”. + * INTERSPEECH, 2015. + * + * It processes the incoming input using a 2-stage filtering mechanism: + * * stage 1 performs filtering on the "features" dimension, whose outputs get + * pushed into a memory of fixed-size memory_size. + * * stage 2 performs filtering on the "time" dimension of the memory_size + * memoized outputs of stage 1. + * + * Specifically, for rank 1, this layer implements the operation: + * + * memory = push(conv1d(inputs, weights_feature, feature_dim, "VALID")); + * outputs = activation(memory * weights_time + bias); + * + * Where: + * * “weights_feature” is a weights matrix that processes the inputs (by + * convolving the input with every “feature filter”), and whose outputs get + * pushed, stacked in order, into the fixed-size “memory” (the oldest entry + * gets dropped); + * * “weights_time” is a weights matrix that processes the “memory” (by a + * batched matrix multiplication on the num_units); + * * “bias” is an optional bias vector (added to each output vector in the + * batch); and + * * “activation” is the function passed as the “fused_activation_function” + * argument (if not “NONE”). + * + * Each rank adds a dimension to the weights matrices by means of stacking + * the filters. + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * + * Inputs: + * * 0: input. + * A 2-D tensor of type T, of shape [batch_size, input_size], where + * “batch_size” corresponds to the batching dimension, and “input_size” + * is the size of the input. + * * 1: weights_feature. + * A 2-D tensor of type T, of shape [num_units, input_size], where + * “num_units” corresponds to the number of units. + * * 2: weights_time. + * A 2-D tensor of type T, of shape [num_units, memory_size], where + * “memory_size” corresponds to the fixed-size of the memory. + * * 3: bias. + * A optional 1-D tensor of type T, of shape [num_units]. + * + * For FLOAT32 input tensor, bias must also be FLOAT32. + * For UINT8 input tensor, bias must be INT32. + * + * Parameters: + * * 4: rank. + * The rank of the SVD approximation. + * * 5: fused_activation_function. + * An (optional) ActivationFunctionType indicating the activation + * function. If “NONE” is specified then it results in a linear activation. + * + * Outputs: + * * 0: state. + * A 2-D tensor of type T, of shape [batch_size, (memory_size - 1) * + * num_units * rank]. + * * 1: output. + * A 2-D tensor of type T, of shape [batch_size, num_units]. + */ + ANEURALNETWORKS_SVDF = 27, + + /** Computes hyperbolic tangent of input tensor element-wise. + * + * The output is calculated using this formula: + * + * output = tanh(input) + * + * Supported tensor types: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * + * Supported tensor rank: up to 4. + * + * Inputs: + * * 0: A tensor, specifying the input. + * + * Outputs: + * * 0: The output tensor of same shape as input0. + */ + ANEURALNETWORKS_TANH = 28, +}; + +/** + * Fused activation function types. + * + */ +enum { + /** NO fused activation function. */ + ANEURALNETWORKS_FUSED_NONE = 0, + /** Fused ReLU activation function. */ + ANEURALNETWORKS_FUSED_RELU = 1, + /** Fused ReLU1 activation function. */ + ANEURALNETWORKS_FUSED_RELU1 = 2, + /** Fused ReLU6 activation function. */ + ANEURALNETWORKS_FUSED_RELU6 = 3, +}; + +/** + * Execution preferences. + */ +enum { + /** + * Prefer executing in a way that minimizes battery drain. + * This is desirable for compilations that will be executed often. + */ + ANEURALNETWORKS_PREFER_LOW_POWER = 0, + /** + * Prefer returning a single answer as fast as possible, even if this causes + * more power consumption. + */ + ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER = 1, + /** + * Prefer maximizing the throughput of successive frames, for example when + * processing successive frames coming from the camera. + */ + ANEURALNETWORKS_PREFER_SUSTAINED_SPEED = 2, +}; + +/** + * Result codes. + */ +enum { + ANEURALNETWORKS_NO_ERROR = 0, + ANEURALNETWORKS_OUT_OF_MEMORY = 1, + ANEURALNETWORKS_INCOMPLETE = 2, + ANEURALNETWORKS_UNEXPECTED_NULL = 3, + ANEURALNETWORKS_BAD_DATA = 4, + ANEURALNETWORKS_OP_FAILED = 5, + ANEURALNETWORKS_UNMAPPABLE = 5, + ANEURALNETWORKS_BAD_STATE = 6, +}; + +/** + * ANeuralNetworksMemory is an opaque type that represents memory. + * + * This type is used to represent shared memory, memory mapped files, + * and similar memories. + * + * By using shared memory, a program can efficiently communicate to the + * runtime and drivers the tensors that define a model. See + * {@link ANeuralNetworksModel_setOperandValueFromMemory}. An application + * should typically create one shared memory object that contains every tensor + * needed to define a model. {@link ANeuralNetworksMemory_createFromFd} can be + * used to create shared memory from a file handle. {@link + * ANeuralNetworksMemory_createShared} can be used to directly created shared + * memory. + * + * Memory objects can also be used to specify the input and output arguments of + * an execution. See {@link ANeuralNetworksExecution_setInputFromMemory} + * and {@link ANeuralNetworksExecution_setOutputFromMemory}. + */ +typedef struct ANeuralNetworksMemory ANeuralNetworksMemory; + +/** + * ANeuralNetworksModel is an opaque type that contains a description of the + * mathematical operations that constitute the model. + * + *

The model will be built by calling

    + *
  • {@link ANeuralNetworksModel_create},
  • + *
  • {@link ANeuralNetworksModel_addOperation},
  • + *
  • {@link ANeuralNetworksModel_addOperand},
  • + *
+ * + * A model is completed by calling {@link ANeuralNetworksModel_finish}. + * A model is destroyed by calling {@link ANeuralNetworksModel_free}. + * + *

It is the application's responsibility to make sure that only one thread + * modifies a model at a given time. It is however safe for more than one + * thread to use the model once {@link ANeuralNetworksModel_finish} has + * returned.

+ * + *

It is also the application's responsibility to ensure that there are no + * other uses of the model after calling {@link ANeuralNetworksModel_free}. This + * includes any compilation or execution object created using the model.

+ */ +typedef struct ANeuralNetworksModel ANeuralNetworksModel; + +/** + * ANeuralNetworksCompilation is an opaque type that can be used to compile + * a machine learning model. + * + *

To use:

    + *
  • Create a new compilation instance by calling the + * {@link ANeuralNetworksCompilation_create} function.
  • + *
  • Perform the compilation with {@link + * ANeuralNetworksCompilation_start}.
  • Wait for the compilation to + * complete with {@link ANeuralNetworksCompilation_wait}.
  • Use the + * compilation as many times as needed with {@link + * ANeuralNetworksExecution_create}.
  • Destroy the compilation with + * {@link ANeuralNetworksCompilation_free} once all executions using the + * compilation have completed.

+ * + *

A compilation cannot be modified once {@link + * ANeuralNetworksCompilation_start} has been called on it.

+ * + *

It is the application's responsibility to make sure that only one thread + * modifies a compilation at a given time. It is however safe for more than one + * thread to use {@link ANeuralNetworksCompilation_wait} at the same time. + * It is also safe for multiple threads to use a compilation object once + * {@link ANeuralNetworksCompilation_wait} has completed.

+ * + *

It is also the application's responsibility to ensure that there are no + * other uses of the compilation after calling {@link + * ANeuralNetworksCompilation_free}. This includes any execution object created + * using the compilation.

+ */ +typedef struct ANeuralNetworksCompilation ANeuralNetworksCompilation; + +/** + * ANeuralNetworksExecution is an opaque type that can be used to apply a + * machine learning model to a set of inputs. + * + *

To use:

    + *
  • Create a new execution instance by calling the + * {@link ANeuralNetworksExecution_create} function.
  • + *
  • Associate data to the model inputs with + * {@link ANeuralNetworksExecution_setInput} or + * {@link ANeuralNetworksExecution_setInputFromMemory}.
  • + *
  • Associate output buffers to the model outputs with + * {@link ANeuralNetworksExecution_setOutput} or + * {@link ANeuralNetworksExecution_setOutputFromMemory}.
  • + *
  • Apply the model with {@link + * ANeuralNetworksExecution_startCompute}.
  • Wait for the execution to + * complete with {@link ANeuralNetworksExecution_wait}.
  • Destroy the + * execution with + * {@link ANeuralNetworksExecution_free}.

+ * + *

An execution cannot be modified once {@link + * ANeuralNetworksExecution_start} has been called on it.

+ * + *

An execution can be applied to a model with + * {@link ANeuralNetworksExecution_startCompute} only once. Create new + * executions to do new evaluations of the model.

+ * + *

It is the application's responsibility to make sure that only one thread + * modifies an execution at a given time. It is however safe for more than one + * thread to use {@link ANeuralNetworksExecution_wait} at the same time.

+ * + *

It is also the application's responsibility to ensure that there are no + * other uses of the request after calling {@link + * ANeuralNetworksRequest_free}.

+ */ +typedef struct ANeuralNetworksExecution ANeuralNetworksExecution; + +/** + * ANeuralNetworksOperandType describes the type of an operand. + * This structure is used to describe both scalars and tensors. + */ +typedef struct ANeuralNetworksOperandType { + /** The data type, e.g ANEURALNETWORKS_INT8. */ + int32_t type; + /** The number of dimensions. It should be 0 for scalars. */ + uint32_t dimensionCount; + /** The dimensions of the tensor. It should be nullptr for scalars. */ + const uint32_t* dimensions; + /** These two fields are only used for quantized tensors. + * They should be zero for scalars and non-fixed point tensors. + * The dequantized value of each entry is (value - offset) * scale. + */ + float scale; + int32_t zeroPoint; +} ANeuralNetworksOperandType; + +/** + * ANeuralNetworksEvent is an opaque type that represents an event + * that will be signaled once an execution completes. + */ +typedef struct ANeuralNetworksEvent ANeuralNetworksEvent; + +typedef int32_t ANeuralNetworksOperationType; + +// nn api function types + +typedef int (*ANeuralNetworksMemory_createFromFd_fn)( + size_t size, int protect, int fd, size_t offset, + ANeuralNetworksMemory** memory); + +typedef void (*ANeuralNetworksMemory_free_fn)(ANeuralNetworksMemory* memory); + +typedef int (*ANeuralNetworksModel_create_fn)(ANeuralNetworksModel** model); + +typedef int (*ANeuralNetworksModel_finish_fn)(ANeuralNetworksModel* model); + +typedef void (*ANeuralNetworksModel_free_fn)(ANeuralNetworksModel* model); + +typedef int (*ANeuralNetworksCompilation_create_fn)( + ANeuralNetworksModel* model, ANeuralNetworksCompilation** compilation); + +typedef void (*ANeuralNetworksCompilation_free_fn)( + ANeuralNetworksCompilation* compilation); + +typedef int (*ANeuralNetworksCompilation_setPreference_fn)( + ANeuralNetworksCompilation* compilation, int32_t preference); + +typedef int (*ANeuralNetworksCompilation_finish_fn)( + ANeuralNetworksCompilation* compilation); + +typedef int (*ANeuralNetworksModel_addOperand_fn)( + ANeuralNetworksModel* model, const ANeuralNetworksOperandType* type); + +typedef int (*ANeuralNetworksModel_setOperandValue_fn)( + ANeuralNetworksModel* model, int32_t index, const void* buffer, + size_t length); + +typedef int (*ANeuralNetworksModel_setOperandValueFromMemory_fn)( + ANeuralNetworksModel* model, int32_t index, + const ANeuralNetworksMemory* memory, size_t offset, size_t length); + +typedef int (*ANeuralNetworksModel_addOperation_fn)( + ANeuralNetworksModel* model, ANeuralNetworksOperationType type, + uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, + const uint32_t* outputs); + +typedef int (*ANeuralNetworksModel_identifyInputsAndOutputs_fn)( + ANeuralNetworksModel* model, uint32_t inputCount, const uint32_t* inputs, + uint32_t outputCount, const uint32_t* outputs); + +typedef int (*ANeuralNetworksExecution_create_fn)( + ANeuralNetworksCompilation* compilation, + ANeuralNetworksExecution** execution); + +typedef void (*ANeuralNetworksExecution_free_fn)( + ANeuralNetworksExecution* execution); + +typedef int (*ANeuralNetworksExecution_setInput_fn)( + ANeuralNetworksExecution* execution, int32_t index, + const ANeuralNetworksOperandType* type, const void* buffer, size_t length); + +typedef int (*ANeuralNetworksExecution_setInputFromMemory_fn)( + ANeuralNetworksExecution* execution, int32_t index, + const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory, + size_t offset, size_t length); + +typedef int (*ANeuralNetworksExecution_setOutput_fn)( + ANeuralNetworksExecution* execution, int32_t index, + const ANeuralNetworksOperandType* type, void* buffer, size_t length); + +typedef int (*ANeuralNetworksExecution_setOutputFromMemory_fn)( + ANeuralNetworksExecution* execution, int32_t index, + const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory, + size_t offset, size_t length); + +typedef int (*ANeuralNetworksExecution_startCompute_fn)( + ANeuralNetworksExecution* execution, ANeuralNetworksEvent** event); + +typedef int (*ANeuralNetworksEvent_wait_fn)(ANeuralNetworksEvent* event); + +typedef void (*ANeuralNetworksEvent_free_fn)(ANeuralNetworksEvent* event); + +/** + * Creates a shared memory object from a file descriptor. + * + * The shared memory is backed by a file descriptor via mmap. + * See {@link ANeuralNetworksMemory} for a description on how to use + * this shared memory. + * + * @param size The requested size in bytes. + * Must not be larger than the file size. + * @param prot The desired memory protection for the mapping. + * It is either PROT_NONE or the bitwise OR of one or + * more of the following flags: PROT_READ, PROT_WRITE. + * @param fd The requested file descriptor. + * The file descriptor has to be mmap-able. The file + * descriptor will be duplicated. + * @param offset The offset to the beginning of the file of the area to map. + * The offset has to be aligned to a page size. + * @param memory The memory object to be created. + * Set to NULL if unsuccessful. + * + * @return ANEURALNETWORKS_NO_ERROR if the request completed normally. + */ +inline int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, + size_t offset, + ANeuralNetworksMemory** memory) { + LOAD_FUNCTION(ANeuralNetworksMemory_createFromFd); + EXECUTE_FUNCTION_RETURN(size, protect, fd, offset, memory); +} + +/** + * Delete a memory object. + * + * Destroys the object used by the run time to keep track of the memory. + * This will free the underlying actual memory if no other code has open + * handles to this memory. + * + * @param memory The memory object to be freed. + */ +inline void ANeuralNetworksMemory_free(ANeuralNetworksMemory* memory) { + LOAD_FUNCTION(ANeuralNetworksMemory_free); + EXECUTE_FUNCTION(memory); +} + +/** + * Create an empty {@link ANeuralNetworksModel}. + * + *

This only creates the object. Computation is performed once + * {@link ANeuralNetworksExecution_startCompute} is invoked. + * + * The model should be constructed with calls to + * {@link ANeuralNetworksModel_addOperation} and + * {@link ANeuralNetworksModel_addOperand} + * + *

{@link ANeuralNetworksModel_finish} should be called once the model + * has been fully constructed.

+ * + *

{@link ANeuralNetworksModel_free} should be called once the model + * is no longer needed.

+ * + * @param model The {@link ANeuralNetworksModel} to be created. + * Set to NULL if unsuccessful. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ +inline int ANeuralNetworksModel_create(ANeuralNetworksModel** model) { + LOAD_FUNCTION(ANeuralNetworksModel_create); + EXECUTE_FUNCTION_RETURN(model); +} + +/** + * Destroy a model. + * + * The model need not have been finished by a call to + * {@link ANeuralNetworksModel_finish}. + * + * See {@link ANeuralNetworksModel} for information on multithreaded usage. + * + * @param model The model to be destroyed. Passing NULL is acceptable and + * results in no operation. + */ +inline void ANeuralNetworksModel_free(ANeuralNetworksModel* model) { + LOAD_FUNCTION(ANeuralNetworksModel_free); + EXECUTE_FUNCTION(model); +} + +/** + * Indicate that we have finished modifying a model. Required before + * calling {@link ANeuralNetworksCompilation_compile}. + * + * An application is responsible to make sure that no other thread uses + * the model at the same time. + * + * See {@link ANeuralNetworksModel} for information on multithreaded usage. + * + * @param model The model to be finished. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ +inline int ANeuralNetworksModel_finish(ANeuralNetworksModel* model) { + LOAD_FUNCTION(ANeuralNetworksModel_finish); + EXECUTE_FUNCTION_RETURN(model); +} + +/** + * Add an operand to a model. + * + * The order in which the operands are added is important. The first one added + * to a model will have the index value 0, the second 1, etc. These indexes are + * used as operand identifiers in {@link ANeuralNetworksModel_addOperation}, + * {@link ANeuralNetworksExecution_setInput}, + * {@link ANeuralNetworksExecution_setInputFromMemory}, + * {@link ANeuralNetworksExecution_setOutput}, + * {@link ANeuralNetworksExecution_setOutputFromMemory} and + * {@link ANeuralNetworksExecution_setOperandValue}. + * + * To build a model that can accomodate inputs of various sizes, as you may want + * to do for a CNN, set the size of the dimensions that will vary at run time to + * 0. If you do so, provide the full dimensions when calling + * {@link ANeuralNetworksExecution_setInput} or {@link + * ANeuralNetworksExecution_setInputFromMemory}. + * + * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has + * been called will return an error. + * + * See {@link ANeuralNetworksModel} for information on multithreaded usage. + * + * @param model The model to be modified. + * @param type The {@link ANeuralNetworksOperandType} that describes the shape + * of the operand. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ +inline int ANeuralNetworksModel_addOperand( + ANeuralNetworksModel* model, const ANeuralNetworksOperandType* type) { + LOAD_FUNCTION(ANeuralNetworksModel_addOperand); + EXECUTE_FUNCTION_RETURN(model, type); +} + +/** + * Sets an operand to a constant value. + * + * For scalar values, the content of buffer is copied into the model. + * + * For tensor values, a pointer to the buffer is stored within the model. + * The application is responsible for not changing the content of this region + * until all executions using this model have completed. As the data may + * be copied during processing, modifying the data after this call yields + * undefined results. + * + * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has + * been called will return an error. + * + * See {@link ANeuralNetworksModel} for information on multithreaded usage. + * + * @param model The model to be modified. + * @param index The index of the model operand we're setting. + * @param buffer A pointer to the data to use. + * @param length The size in bytes of the data value. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ +inline int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel* model, + int32_t index, + const void* buffer, + size_t length) { + LOAD_FUNCTION(ANeuralNetworksModel_setOperandValue); + EXECUTE_FUNCTION_RETURN(model, index, buffer, length); +} + +/** + * Sets an operand to a value stored in a memory object. + * + * The content of the memory is not copied. A reference to that memory is stored + * inside the model. The application is responsible for not changing the content + * of the memory region until all executions using this model have completed. + * As the data may be copied during processing, modifying the data after this + * call yields undefined results. + * + * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has + * been called will return an error. + * + * See {@link ANeuralNetworksModel} for information on multithreaded usage. + * + * @param model The model to be modified. + * @param index The index of the model operand we're setting. + * @param buffer A pointer to the data to use. + * @param memory The memory containing the data. + * @param offset This specifies the location of the data within the memory. + * The offset is in bytes from the start of memory. + * @param length The size in bytes of the data value. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ +inline int ANeuralNetworksModel_setOperandValueFromMemory( + ANeuralNetworksModel* model, int32_t index, + const ANeuralNetworksMemory* memory, size_t offset, size_t length) { + LOAD_FUNCTION(ANeuralNetworksModel_setOperandValueFromMemory); + EXECUTE_FUNCTION_RETURN(model, index, memory, offset, length); +} + +/** + * Add an operation to a model. + * + * @param model The model to be modified. + * @param type The type of the operation. + * @param inputCount The number of entries in the inputs array. + * @param inputs An array of indexes identifying each operand. + * @param outputCount The number of entries in the outputs array. + * @param outputs An array of indexes identifying each operand. + * + * The operands specified by inputs and outputs must have been + * previously added by calls to {@link ANeuralNetworksModel_addOperand}. + * + * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has + * been called will return an error. + * + * See {@link ANeuralNetworksModel} for information on multithreaded usage. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ +inline int ANeuralNetworksModel_addOperation(ANeuralNetworksModel* model, + ANeuralNetworksOperationType type, + uint32_t inputCount, + const uint32_t* inputs, + uint32_t outputCount, + const uint32_t* outputs) { + LOAD_FUNCTION(ANeuralNetworksModel_addOperation); + EXECUTE_FUNCTION_RETURN(model, type, inputCount, inputs, outputCount, + outputs); +} + +/** + * Specfifies which operands will be the model's inputs and outputs. + * + * An operand cannot be used for both input and output. Doing so will + * return an error. + * + * @param model The model to be modified. + * @param inputCount The number of entries in the inputs array. + * @param inputs An array of indexes identifying the input operands. + * @param outputCount The number of entries in the outputs array. + * @param outputs An array of indexes identifying the output operands. + * + * The operands specified by inputs and outputs must have been + * previously added by calls to {@link ANeuralNetworksModel_addOperand}. + * + * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has + * been called will return an error. + * + * See {@link ANeuralNetworksModel} for information on multithreaded usage. + * + */ +inline int ANeuralNetworksModel_identifyInputsAndOutputs( + ANeuralNetworksModel* model, uint32_t inputCount, const uint32_t* inputs, + uint32_t outputCount, const uint32_t* outputs) { + LOAD_FUNCTION(ANeuralNetworksModel_identifyInputsAndOutputs); + EXECUTE_FUNCTION_RETURN(model, inputCount, inputs, outputCount, outputs); +} + +/** + * Create a {@link ANeuralNetworksCompilation} to compile the given model. + * This only creates the object. Compilation is only performed once + * {@link ANeuralNetworksCompilation_start} is invoked. + * + *

The provided model must outlive the compilation.

+ * + * The model must already have been finished by a call to + * {@link ANeuralNetworksModel_finish}. + * + * See {@link ANeuralNetworksCompilation} for information on multithreaded + * usage. + * + * @param model The {@link ANeuralNetworksModel} to be compiled. + * @param compilation The newly created object or NULL if unsuccessful. + * + * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA + * if the model is invalid. + */ +inline int ANeuralNetworksCompilation_create( + ANeuralNetworksModel* model, ANeuralNetworksCompilation** compilation) { + LOAD_FUNCTION(ANeuralNetworksCompilation_create); + EXECUTE_FUNCTION_RETURN(model, compilation); +} + +/** + * Destroy a compilation. + * + *

If called on a compilation for which + * {@link ANeuralNetworksCompilation_start} has been called, the + * function will return immediately but will mark the compilation to be deleted + * once the compilation completes. The {@link ANeuralNetworksCompilation_wait} + * will return ERROR_DELETED. + * + * See {@link ANeuralNetworksCompilation} for information on multithreaded + * usage. + * + * @param compilation The compilation to be destroyed. Passing NULL is + * acceptable and results in no operation. + */ +inline void ANeuralNetworksCompilation_free( + ANeuralNetworksCompilation* compilation) { + LOAD_FUNCTION(ANeuralNetworksCompilation_free); + EXECUTE_FUNCTION(compilation); +} + +/** + * Sets the execution preference. + * + *

Provides guidance to the runtime when trade-offs are possible.

+ * + * See {@link ANeuralNetworksCompilation} for information on multithreaded + * usage. + * + * @param compilation The compilation to be modified. + * @param preference Either {@link PREFER_LOW_POWER}, + * {@link PREFER_SINGLE_FAST_ANSWER}, or + * {@link PREFER_SUSTAINED_SPEED}. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ +inline int ANeuralNetworksCompilation_setPreference( + ANeuralNetworksCompilation* compilation, int32_t preference) { + LOAD_FUNCTION(ANeuralNetworksCompilation_setPreference); + EXECUTE_FUNCTION_RETURN(compilation, preference); +} + +/** + * Waits until the compilation completes. + * + * More than one thread can wait on a compilation. When the compilation + * completes, all threads will be released. + * + * See {@link ANeuralNetworksCompilation} for information on multithreaded + * usage. + * + * @return ANEURALNETWORKS_NO_ERROR if the compilation completed normally. + */ +inline int ANeuralNetworksCompilation_finish( + ANeuralNetworksCompilation* compilation) { + LOAD_FUNCTION(ANeuralNetworksCompilation_finish); + EXECUTE_FUNCTION_RETURN(compilation); +} +/** + * Create a {@link ANeuralNetworksExecution} to apply the given compilation. + * This only creates the object. Computation is only performed once + * {@link ANeuralNetworksExecution_startCompute} is invoked. + * + *

The provided compilation must outlive the execution.

+ * + * See {@link ANeuralNetworksExecution} for information on multithreaded usage. + * + * @param compilation The {@link ANeuralNetworksCompilation} to be evaluated. + * @param execution The newly created object or NULL if unsuccessful. + * + * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA + * if the compilation is invalid. + */ +inline int ANeuralNetworksExecution_create( + ANeuralNetworksCompilation* compilation, + ANeuralNetworksExecution** execution) { + LOAD_FUNCTION(ANeuralNetworksExecution_create); + EXECUTE_FUNCTION_RETURN(compilation, execution); +} + +/** + * Destroy an execution. + * + *

If called on an execution for which + * {@link ANeuralNetworksExecution_startCompute} has been called, the + * function will return immediately but will mark the execution to be deleted + * once the computation completes. The {link ANeuralNetworksExecution_wait} + * will return ANEURALNETWORKS_ERROR_DELETED. + * + * See {@link ANeuralNetworksExecution} for information on multithreaded usage. + * + * @param execution The execution to be destroyed. Passing NULL is acceptable + * and results in no operation. + */ +inline void ANeuralNetworksExecution_free(ANeuralNetworksExecution* execution) { + LOAD_FUNCTION(ANeuralNetworksExecution_free); + EXECUTE_FUNCTION(execution); +} + +/** + * Associate a user buffer with an input of the model of the + * {@link ANeuralNetworksExecution}. + * + *

The provided buffer must outlive the execution.

+ * + * See {@link ANeuralNetworksExecution} for information on multithreaded usage. + * + * @param execution The execution to be modified. + * @param index The index of the input argument we are setting. It is + * an index into the lists passed to + * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not + * the index associated with {@link + * ANeuralNetworksModel_addOperand}. + * @param type The type of the operand. This should be used to specify the + * dimensions that were set to 0 when the operand was added to the + * model. All other properties of the type must be the same as + * specified in the model. If the type is the same as specified + * when the model was built, NULL can be passed. + * @param buffer The buffer containing the data. + * @param length The length in bytes of the buffer. + * + * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if + * the name is not recognized or the buffer is too small for the input. + */ +inline int ANeuralNetworksExecution_setInput( + ANeuralNetworksExecution* execution, int32_t index, + const ANeuralNetworksOperandType* type, const void* buffer, size_t length) { + LOAD_FUNCTION(ANeuralNetworksExecution_setInput); + EXECUTE_FUNCTION_RETURN(execution, index, type, buffer, length); +} + +/** + * Associate part of a memory object with an input of the model of the + * {@link ANeuralNetworksExecution}. + * + *

The provided memory must outlive the execution.

+ * + * See {@link ANeuralNetworksExecution} for information on multithreaded usage. + * + * @param execution The execution to be modified. + * @param index The index of the input argument we are setting. It is + * an index into the lists passed to + * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not + * the index associated with {@link + * ANeuralNetworksModel_addOperand}. + * @param type The type of the operand. This can be used to specify the + * dimensions that were set to 0 when the operand was added to the + * model. All other values must be the same as specified in the + * model. If the type is the same as specified when the model + * was built, NULL can be passed. + * @param memory The memory containing the data. + * @param offset This specifies the location of the data whithin the memory. + * The offset is in bytes from the start of memory. + * @param length The size in bytes of the data value. + * + * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if + * the name is not recognized or the buffer is too small for the input. + */ +inline int ANeuralNetworksExecution_setInputFromMemory( + ANeuralNetworksExecution* execution, int32_t index, + const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory, + size_t offset, size_t length) { + LOAD_FUNCTION(ANeuralNetworksExecution_setInputFromMemory); + EXECUTE_FUNCTION_RETURN(execution, index, type, memory, offset, length); +} + +/** + * Associate a user buffer with an output of the model of the + * {@link ANeuralNetworksExecution}. + * + *

The provided buffer must outlive the execution.

+ * + * See {@link ANeuralNetworksExecution} for information on multithreaded usage. + * + * @param execution The execution to be modified. + * @param index The index of the output argument we are setting. It is + * an index into the lists passed to + * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not + * the index associated with {@link + * ANeuralNetworksModel_addOperand}. + * @param type The type of the operand. This can be used to specify the + * dimensions that were set to 0 when the operand was added to the + * model. All other values must be the same as specified in the + * model. If the type is the same as specified when the model + * was built, NULL can be passed. + * @param buffer The buffer where the data is to be written. + * @param length The length in bytes of the buffer. + * + * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if + * the name is not recognized or the buffer is too small for the output. + */ +inline int ANeuralNetworksExecution_setOutput( + ANeuralNetworksExecution* execution, int32_t index, + const ANeuralNetworksOperandType* type, void* buffer, size_t length) { + LOAD_FUNCTION(ANeuralNetworksExecution_setOutput); + EXECUTE_FUNCTION_RETURN(execution, index, type, buffer, length); +} + +/** + * Associate part of a memory object with an output of the model of the + * {@link ANeuralNetworksExecution}. + * + *

The provided memory must outlive the execution.

+ * + * See {@link ANeuralNetworksExecution} for information on multithreaded usage. + * + * @param execution The execution to be modified. + * @param index The index of the output argument we are setting. It is + * an index into the lists passed to + * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not + * the index associated with {@link + * ANeuralNetworksModel_addOperand}. + * @param type The type of the operand. This can be used to specify the + * dimensions that were set to 0 when the operand was added to the + * model. All other values must be the same as specified in the + * model. If the type is the same as specified when the model + * was built, NULL can be passed. + * @param memory The memory where the data is to be stored. + * @param offset This specifies the location of the data whithin the memory. + * The offset is in bytes from the start of memory. + * @param length The length in bytes of the data value. + * + * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if + * the name is not recognized or the buffer is too small for the output. + */ +inline int ANeuralNetworksExecution_setOutputFromMemory( + ANeuralNetworksExecution* execution, int32_t index, + const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory, + size_t offset, size_t length) { + LOAD_FUNCTION(ANeuralNetworksExecution_setOutputFromMemory); + EXECUTE_FUNCTION_RETURN(execution, index, type, memory, offset, length); +} + +/** + * Schedule evaluation of the execution. + * + *

Schedules evaluation of the execution. Once the model has been + * applied and the outputs are ready to be consumed, the execution will be + * signaled. Use {@link ANeuralNetworksExecution_wait} to wait for that signal. + *

+ * + * Multiple executions can be scheduled and evaluated concurrently, and + * compilations can be performed concurrently with executions. The runtime makes + * no guarantee on the ordering of the completion of compilations and + * executions. If it's important to the application, the application should + * enforce the ordering by using {@link ANeuralNetworksCompilation_wait} and + * {@link ANeuralNetworksExecution_wait}. + * + * ANeuralNetworksExecution_wait must be called to recuperate the resources used + * by the execution. + * + * See {@link ANeuralNetworksExecution} for information on multithreaded usage. + * + * @param execution The execution to be scheduled and executed. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ +inline int ANeuralNetworksExecution_startCompute( + ANeuralNetworksExecution* execution, ANeuralNetworksEvent** event) { + LOAD_FUNCTION(ANeuralNetworksExecution_startCompute); + EXECUTE_FUNCTION_RETURN(execution, event); +} + +/** + * Waits until the execution completes. + * + * More than one thread can wait on an event. When the execution completes, + * all threads will be released. + * + * See {@link ANeuralNetworksExecution} for information on multithreaded usage. + * + * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally. + */ +inline int ANeuralNetworksEvent_wait(ANeuralNetworksEvent* event) { + LOAD_FUNCTION(ANeuralNetworksEvent_wait); + EXECUTE_FUNCTION_RETURN(event); +} + +/** + * Destroys the event. + * + * See {@link ANeuralNetworksExecution} for information on multithreaded usage. + */ +inline void ANeuralNetworksEvent_free(ANeuralNetworksEvent* event) { + LOAD_FUNCTION(ANeuralNetworksEvent_free); + EXECUTE_FUNCTION(event); +} + +/**/ + +#endif // NN_API_SHIM_H0 diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc new file mode 100644 index 0000000000..6a199cc840 --- /dev/null +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -0,0 +1,386 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/nnapi_delegate.h" +#include +#include +#include +#include +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/error_reporter.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h" + +namespace tflite { + +// TODO(aselle): FATAL leaves resources hanging. +void FATAL(const char* format, ...) { + va_list args; + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + fflush(stderr); + exit(1); +} + +// TODO(aselle): Change the error model to use status codes. +#define CHECK_TFLITE_SUCCESS(x) \ + if (x != kTfLiteOk) { \ + FATAL("Aborting since tflite returned failure."); \ + } + +#define CHECK_NN(x) \ + if (x != ANEURALNETWORKS_NO_ERROR) { \ + FATAL("Aborting since tflite returned failure."); \ + } + +NNAPIAllocation::NNAPIAllocation(const char* filename, + ErrorReporter* error_reporter) + : MMAPAllocation(filename, error_reporter) { + if (mmapped_buffer_ != MAP_FAILED) + CHECK_NN(ANeuralNetworksMemory_createFromFd(buffer_size_bytes_, PROT_READ, + mmap_fd_, 0, &handle_)); +} + +NNAPIAllocation::~NNAPIAllocation() { + if (handle_) { + ANeuralNetworksMemory_free(handle_); + } +} + +NNAPIDelegate::~NNAPIDelegate() { + if (nn_model_) { + ANeuralNetworksModel_free(nn_model_); + nn_model_ = nullptr; + // TODO(aselle): Is this thread-safe and callable multiple times? + } + // ANeuralNetworksShutdown(); +} + +// Adds the tensors of the interpreter to the NN API model. +// Returns the number of operands added. +uint32_t addTensorOperands(tflite::Interpreter* interpreter, + ANeuralNetworksModel* nn_model) { + uint32_t next_id = 0; + for (size_t i = 0; i < interpreter->tensors_size(); i++) { + int32_t nn_type = 0; + float scale = 1.0f; + int32_t zeroPoint = 0; + TfLiteTensor* tensor = interpreter->tensor(i); + switch (tensor->type) { + case kTfLiteNoType: + // Tensors added during initialization of Ops don't have a type yet and + // should not be registered with the NNAPI. + continue; + case kTfLiteFloat32: + nn_type = ANEURALNETWORKS_TENSOR_FLOAT32; + break; + case kTfLiteUInt8: + nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM; + scale = tensor->params.scale; + zeroPoint = tensor->params.zero_point; + break; + case kTfLiteInt32: + nn_type = ANEURALNETWORKS_TENSOR_INT32; + scale = tensor->params.scale; + zeroPoint = tensor->params.zero_point; + break; + default: + FATAL("Unsupported type."); + } + // TODO(aselle): Note, many of these are intermediate results. Do I need + // to ever specify these sizes. I am currently below doing setValue + // on all of them, but I shouldn't in the future. + // Answer(jeanluc): If all the operators can set the dimension correctly, + // you won't need to. + ANeuralNetworksOperandType operand_type{ + nn_type, static_cast(tensor->dims->size), + reinterpret_cast(tensor->dims->data), scale, zeroPoint}; + CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)); + + // TODO(aselle): Based on Michael's suggestion, limiting this to read + // only memory + if (tensor->allocation_type == kTfLiteMmapRo) { + if (const NNAPIAllocation* alloc = dynamic_cast( + static_cast(tensor->allocation))) { + CHECK_NN(ANeuralNetworksModel_setOperandValueFromMemory( + nn_model, i, alloc->memory(), alloc->offset(tensor->data.raw), + tensor->bytes)); + } else { + CHECK_NN(ANeuralNetworksModel_setOperandValue( + nn_model, i, tensor->data.raw, tensor->bytes)); + } + } + ++next_id; + } + return next_id; +} + +// Adds the operations and their parameters to the NN API model. +// 'next-id' is the operand ID of the next operand of the model. +void AddOpsAndParams(tflite::Interpreter* interpreter, + ANeuralNetworksModel* nn_model, uint32_t next_id) { + for (size_t i = 0; i < interpreter->nodes_size(); i++) { + const auto* node_and_registration = interpreter->node_and_registration(i); + const TfLiteNode& node = node_and_registration->first; + const TfLiteRegistration& registration = node_and_registration->second; + tflite::BuiltinOperator builtin = + static_cast(registration.builtin_code); + + // Add the parameters. + std::vector augmented_inputs( + node.inputs->data, node.inputs->data + node.inputs->size); + + auto add_scalar_int32 = [&nn_model, &augmented_inputs, + &next_id](int value) { + ANeuralNetworksOperandType operand_type{.type = ANEURALNETWORKS_INT32}; + CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)) + CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id, &value, + sizeof(int32_t))) + augmented_inputs.push_back(next_id++); + }; + + auto add_scalar_float32 = [&nn_model, &augmented_inputs, + &next_id](float value) { + ANeuralNetworksOperandType operand_type{.type = ANEURALNETWORKS_FLOAT32}; + CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)) + CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id, &value, + sizeof(float))) + augmented_inputs.push_back(next_id++); + }; + + auto add_add_params = [&add_scalar_int32]() { add_scalar_int32(0); }; + + auto add_pooling_params = [&add_scalar_int32](void* data) { + auto builtin = reinterpret_cast(data); + add_scalar_int32(builtin->padding); + add_scalar_int32(builtin->stride_width); + add_scalar_int32(builtin->stride_height); + add_scalar_int32(builtin->filter_width); + add_scalar_int32(builtin->filter_height); + add_scalar_int32(builtin->activation); + }; + + auto add_convolution_params = [&add_scalar_int32](void* data) { + auto builtin = reinterpret_cast(data); + add_scalar_int32(builtin->padding); + add_scalar_int32(builtin->stride_width); + add_scalar_int32(builtin->stride_height); + add_scalar_int32(builtin->activation); + }; + + auto add_depthwise_conv_params = [&add_scalar_int32](void* data) { + auto builtin = reinterpret_cast(data); + add_scalar_int32(builtin->padding); + add_scalar_int32(builtin->stride_width); + add_scalar_int32(builtin->stride_height); + add_scalar_int32(builtin->depth_multiplier); + add_scalar_int32(builtin->activation); + }; + + auto add_fully_connected_params = [&add_scalar_int32](void* data) { + auto builtin = reinterpret_cast(data); + add_scalar_int32(builtin->activation); + }; + + auto add_concatenation_params = [&add_scalar_int32](void* data) { + auto builtin = reinterpret_cast(data); + add_scalar_int32(builtin->axis); + if (builtin->activation != kTfLiteActNone) { + FATAL("Concatenation does not support fused activation in NNAPI"); + } + }; + + auto add_softmax_params = [&add_scalar_float32](void* data) { + auto builtin = reinterpret_cast(data); + add_scalar_float32(builtin->beta); + }; + +#if 0 + auto add_reshape_params = [&](void* data) { + auto builtin = reinterpret_cast(data); + uint32_t tensor_size_shape = builtin->num_dimensions; + ANeuralNetworksOperandType operand_type{ + ANEURALNETWORKS_TENSOR_INT32, + {static_cast(1), + reinterpret_cast(&tensor_size_shape)}, + 0, + 0}; + CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)) + CHECK_NN(ANeuralNetworksModel_setOperandValue( + nn_model, next_id, builtin->shape, + sizeof(int) * builtin->num_dimensions)); + augmented_inputs.push_back(next_id++); + }; +#endif + + ANeuralNetworksOperationType nn_op_type; + switch (builtin) { + case tflite::BuiltinOperator_ADD: + nn_op_type = ANEURALNETWORKS_ADD; + add_add_params(); + break; + case tflite::BuiltinOperator_AVERAGE_POOL_2D: + add_pooling_params(node.builtin_data); + nn_op_type = ANEURALNETWORKS_AVERAGE_POOL_2D; + break; + case tflite::BuiltinOperator_MAX_POOL_2D: + add_pooling_params(node.builtin_data); + nn_op_type = ANEURALNETWORKS_MAX_POOL_2D; + break; + case tflite::BuiltinOperator_L2_POOL_2D: + add_pooling_params(node.builtin_data); + nn_op_type = ANEURALNETWORKS_L2_POOL_2D; + break; + case tflite::BuiltinOperator_CONV_2D: + add_convolution_params(node.builtin_data); + nn_op_type = ANEURALNETWORKS_CONV_2D; + break; + case tflite::BuiltinOperator_RELU: + nn_op_type = ANEURALNETWORKS_RELU; + break; + case tflite::BuiltinOperator_RELU6: + nn_op_type = ANEURALNETWORKS_RELU6; + break; + case tflite::BuiltinOperator_TANH: + nn_op_type = ANEURALNETWORKS_TANH; + break; + case tflite::BuiltinOperator_LOGISTIC: + nn_op_type = ANEURALNETWORKS_LOGISTIC; + break; + case tflite::BuiltinOperator_DEPTHWISE_CONV_2D: + add_depthwise_conv_params(node.builtin_data); + nn_op_type = ANEURALNETWORKS_DEPTHWISE_CONV_2D; + break; + case tflite::BuiltinOperator_CONCATENATION: + add_concatenation_params(node.builtin_data); + nn_op_type = ANEURALNETWORKS_CONCATENATION; + break; + case tflite::BuiltinOperator_SOFTMAX: + add_softmax_params(node.builtin_data); + nn_op_type = ANEURALNETWORKS_SOFTMAX; + break; + case tflite::BuiltinOperator_FULLY_CONNECTED: + add_fully_connected_params(node.builtin_data); + nn_op_type = ANEURALNETWORKS_FULLY_CONNECTED; + break; + case tflite::BuiltinOperator_RESHAPE: + nn_op_type = ANEURALNETWORKS_RESHAPE; + // add_reshape_params(node.builtin_data); + break; + case tflite::BuiltinOperator_CONCAT_EMBEDDINGS: + case tflite::BuiltinOperator_LSH_PROJECTION: + case tflite::BuiltinOperator_SVDF: + case tflite::BuiltinOperator_HASHTABLE_LOOKUP: + case tflite::BuiltinOperator_RNN: + case tflite::BuiltinOperator_EMBEDDING_LOOKUP: + case tflite::BuiltinOperator_EMBEDDING_LOOKUP_SPARSE: + case tflite::BuiltinOperator_LSTM: + case tflite::BuiltinOperator_L2_NORMALIZATION: + case tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION: + case tflite::BuiltinOperator_MUL: + case tflite::BuiltinOperator_RESIZE_BILINEAR: + case tflite::BuiltinOperator_CALL: + case tflite::BuiltinOperator_SKIP_GRAM: + case tflite::BuiltinOperator_RELU1: + case tflite::BuiltinOperator_SPACE_TO_DEPTH: + FATAL("Op code %d is currently not delegated to NNAPI", builtin); + nn_op_type = -1; // set to invalid + break; + case tflite::BuiltinOperator_CUSTOM: + FATAL("Custom operations are not supported when using NNAPI."); + nn_op_type = -1; // set to invalid + break; + } + + // Add the operation. + CHECK_NN(ANeuralNetworksModel_addOperation( + nn_model, nn_op_type, static_cast(augmented_inputs.size()), + augmented_inputs.data(), static_cast(node.outputs->size), + reinterpret_cast(node.outputs->data))); + } +} + +TfLiteStatus NNAPIDelegate::BuildGraph(Interpreter* interpreter) { + // TODO(aselle): This is not correct. need to handle resize invalidation. + if (nn_model_ && nn_compiled_model_) return kTfLiteOk; + + if (!nn_model_) { + CHECK_NN(ANeuralNetworksModel_create(&nn_model_)); + + uint32_t next_id = addTensorOperands(interpreter, nn_model_); + AddOpsAndParams(interpreter, nn_model_, next_id); + CHECK_NN(ANeuralNetworksModel_identifyInputsAndOutputs( + nn_model_, static_cast(interpreter->inputs().size()), + reinterpret_cast(interpreter->inputs().data()), + static_cast(interpreter->outputs().size()), + reinterpret_cast(interpreter->outputs().data()))); + CHECK_NN(ANeuralNetworksModel_finish(nn_model_)); + } + if (!nn_compiled_model_) { + CHECK_NN(ANeuralNetworksCompilation_create(nn_model_, &nn_compiled_model_)); + CHECK_NN(ANeuralNetworksCompilation_finish(nn_compiled_model_)); + } + return kTfLiteOk; +} + +TfLiteStatus NNAPIDelegate::Invoke(Interpreter* interpreter) { + if (!nn_model_) { + TF_LITE_ENSURE_STATUS(BuildGraph(interpreter)); + } + + ANeuralNetworksExecution* execution = nullptr; + CHECK_NN(ANeuralNetworksExecution_create(nn_compiled_model_, &execution)); + + // Currently perform deep copy of input buffer + for (size_t i = 0; i < interpreter->inputs().size(); i++) { + int input = interpreter->inputs()[i]; + // TODO(aselle): Is this what we want or do we want input instead? + // TODO(aselle): This should be called setInputValue maybe to be cons. + TfLiteTensor* tensor = interpreter->tensor(input); + CHECK_NN(ANeuralNetworksExecution_setInput( + execution, i, nullptr, tensor->data.raw, tensor->bytes)); + } + // Tell nn api where to place final data. + for (size_t i = 0; i < interpreter->outputs().size(); i++) { + int output = interpreter->outputs()[i]; + TfLiteTensor* tensor = interpreter->tensor(output); + CHECK_NN(ANeuralNetworksExecution_setOutput( + execution, i, nullptr, tensor->data.raw, tensor->bytes)); + } + // Currently use blocking compute. + ANeuralNetworksEvent* event = nullptr; + CHECK_NN(ANeuralNetworksExecution_startCompute(execution, &event)); + CHECK_NN(ANeuralNetworksEvent_wait(event)); + ANeuralNetworksEvent_free(event); + ANeuralNetworksExecution_free(execution); + +#if 0 + printf("From the NN API:\n"); + TfLiteTensor* tensor = interpreter->tensor(interpreter->outputs()[0]); + if (float* data = + interpreter->typed_tensor(interpreter->outputs()[0])) { + size_t num = tensor->bytes / sizeof(float); + for (float* p = data; p < data + num; p++) { + printf(" %f", *p); + } + printf("\n"); + } +#endif + + return kTfLiteOk; +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/nnapi_delegate.h b/tensorflow/contrib/lite/nnapi_delegate.h new file mode 100644 index 0000000000..f29aa9e18e --- /dev/null +++ b/tensorflow/contrib/lite/nnapi_delegate.h @@ -0,0 +1,66 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_NNAPI_DELEGATE_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_NNAPI_DELEGATE_H_ + +#include "tensorflow/contrib/lite/allocation.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/error_reporter.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h" + +class ANeuralNetworsModel; + +namespace tflite { + +class NNAPIAllocation : public MMAPAllocation { + public: + NNAPIAllocation(const char* filename, ErrorReporter* error_reporter); + ~NNAPIAllocation(); + + size_t offset(const void* ptr) const { + auto signed_offset = reinterpret_cast(ptr) - + reinterpret_cast(mmapped_buffer_); + + return static_cast(signed_offset); + } + + ANeuralNetworksMemory* memory() const { return handle_; } + bool valid() const override { return handle_ != nullptr; } + + private: + mutable ANeuralNetworksMemory* handle_ = nullptr; +}; + +class NNAPIDelegate { + public: + ~NNAPIDelegate(); + + // Convert a tflite graph to NNAPI + TfLiteStatus BuildGraph(Interpreter* interpreter); + + // Run + TfLiteStatus Invoke(Interpreter* interpreter); + + private: + // The NN API model handle + ANeuralNetworksModel* nn_model_ = nullptr; + // The NN API compilation handle + ANeuralNetworksCompilation* nn_compiled_model_ = nullptr; +}; + +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_NNAPI_DELEGATE_H_ diff --git a/tensorflow/contrib/lite/optional_debug_tools.cc b/tensorflow/contrib/lite/optional_debug_tools.cc new file mode 100644 index 0000000000..1f762e6688 --- /dev/null +++ b/tensorflow/contrib/lite/optional_debug_tools.cc @@ -0,0 +1,108 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/optional_debug_tools.h" + +namespace tflite { + +void PrintIntVector(const std::vector& v) { + for (const auto& it : v) { + printf(" %d", it); + } + printf("\n"); +} + +void PrintTfLiteIntVector(const TfLiteIntArray* v) { + if (!v) { + printf(" (null)"); + return; + } + for (int k = 0; k < v->size; k++) { + printf(" %d", v->data[k]); + } + printf("\n"); +} + +const char* TensorTypeName(TfLiteType type) { + switch (type) { + case kTfLiteNoType: + return "kTfLiteNoType"; + case kTfLiteFloat32: + return "kTfLiteFloat32"; + case kTfLiteInt32: + return "kTfLiteInt32"; + case kTfLiteUInt8: + return "kTfLiteUInt8"; + case kTfLiteInt64: + return "kTfLiteInt64"; + case kTfLiteString: + return "kTfLiteString"; + } + return "(invalid)"; +} + +const char* AllocTypeName(TfLiteAllocationType type) { + switch (type) { + case kTfLiteMemNone: + return "kTfLiteMemNone"; + case kTfLiteMmapRo: + return "kTfLiteMmapRo"; + case kTfLiteDynamic: + return "kTfLiteDynamic"; + case kTfLiteArenaRw: + return "kTfLiteArenaRw"; + case kTfLiteArenaRwPersistent: + return "kTfLiteArenaRwPersistent"; + } + return "(invalid)"; +} + +// Prints a dump of what tensors and what nodes are in the interpreter. +void PrintInterpreterState(Interpreter* interpreter) { + printf("Interpreter has %d tensors and %d nodes\n", + interpreter->tensors_size(), interpreter->nodes_size()); + printf("Inputs:"); + PrintIntVector(interpreter->inputs()); + printf("Outputs:"); + PrintIntVector(interpreter->outputs()); + printf("\n"); + for (int tensor_index = 0; tensor_index < interpreter->tensors_size(); + tensor_index++) { + TfLiteTensor* tensor = interpreter->tensor(tensor_index); + printf("Tensor %3d %10s %15s %10zu bytes (%4.1f MB) ", tensor_index, + TensorTypeName(tensor->type), AllocTypeName(tensor->allocation_type), + tensor->bytes, float(tensor->bytes) / float(1 << 20)); + PrintTfLiteIntVector(tensor->dims); + printf("\n"); + } + + for (int node_index = 0; node_index < interpreter->nodes_size(); + node_index++) { + const std::pair* node_and_reg = + interpreter->node_and_registration(node_index); + const TfLiteNode& node = node_and_reg->first; + const TfLiteRegistration& reg = node_and_reg->second; + printf("Node %3d Operator Builtin Code %3d\n", node_index, + reg.builtin_code); + printf(" Inputs:"); + PrintTfLiteIntVector(node.inputs); + printf(" Outputs:"); + PrintTfLiteIntVector(node.outputs); + } +} + +// Prints a dump of what tensors and what nodes are in the interpreter. +TfLiteStatus ValidateInterpreterState(const Interpreter* interpreter); + +} // namespace tflite diff --git a/tensorflow/contrib/lite/optional_debug_tools.h b/tensorflow/contrib/lite/optional_debug_tools.h new file mode 100644 index 0000000000..54d4876095 --- /dev/null +++ b/tensorflow/contrib/lite/optional_debug_tools.h @@ -0,0 +1,32 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Optional debugging functionality. For small sized binaries, these are not +// needed. +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_DEBUG_TOOLS_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_DEBUG_TOOLS_H_ + +#include "tensorflow/contrib/lite/interpreter.h" + +namespace tflite { + +// Prints a dump of what tensors and what nodes are in the interpreter. +void PrintInterpreterState(Interpreter* interpreter); + +// Prints a dump of what tensors and what nodes are in the interpreter. +TfLiteStatus ValidateInterpreterState(const Interpreter* interpreter); + +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_DEBUG_TOOLS_H_ diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD new file mode 100644 index 0000000000..b4aa032ff8 --- /dev/null +++ b/tensorflow/contrib/lite/python/BUILD @@ -0,0 +1,46 @@ +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//tensorflow:internal"]) + +load("//tensorflow:tensorflow.bzl", "py_test") + +py_library( + name = "lite", + srcs = ["lite.py"], + # data = [ + # "//tensorflow/contrib/lite/toco/python:toco_from_protos", + # ], + srcs_version = "PY2AND3", + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/contrib/lite/toco:model_flags_proto_py", + "//tensorflow/contrib/lite/toco:toco_flags_proto_py", + "//tensorflow/contrib/lite/toco/python:tensorflow_wrap_toco", + "//tensorflow/python:platform", + ], +) + +py_test( + name = "lite_test", + srcs = ["lite_test.py"], + deps = [ + ":lite", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", + "//tensorflow/python:platform_test", + "//tensorflow/python:session", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py new file mode 100644 index 0000000000..5e8edbb937 --- /dev/null +++ b/tensorflow/contrib/lite/python/lite.py @@ -0,0 +1,199 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TensorFlow Lite tooling helper functionality. + +EXPERIMENTAL: APIs here are unstable and likely to change without notice. + +@@toco_convert +@@toco_convert_protos + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import subprocess +import tempfile + +from tensorflow.contrib.lite.toco import model_flags_pb2 as _model_flags_pb2 +from tensorflow.contrib.lite.toco import toco_flags_pb2 as _toco_flags_pb2 +from tensorflow.contrib.lite.toco.python.tensorflow_wrap_toco import TocoConvert as _toco_convert_protos +from tensorflow.python.framework import dtypes as _dtypes +# from tensorflow.python.platform import +# resource_loader as _resource_loader + +# Enum types from the protobuf promoted to the API +FLOAT = _toco_flags_pb2.FLOAT +INT32 = _toco_flags_pb2.INT32 +INT64 = _toco_flags_pb2.INT64 +STRING = _toco_flags_pb2.STRING +QUANTIZED_UINT8 = _toco_flags_pb2.QUANTIZED_UINT8 +TENSORFLOW_GRAPHDEF = _toco_flags_pb2.TENSORFLOW_GRAPHDEF +TFLITE = _toco_flags_pb2.TFLITE +GRAPHVIZ_DOT = _toco_flags_pb2.GRAPHVIZ_DOT + +# Currently the default mode of operation is to shell to another python process +# to protect against crashes. +EXPERIMENTAL_USE_TOCO_API_DIRECTLY = True + +# Find the toco_from_protos binary using the resource loader if using from +# bazel, otherwise we are in a pip where console_scripts already has +# the toco_from_protos tool. +# toco_from_proto_bin = _resource_loader.get_path_to_datafile( +# "../toco/python/toco_from_protos") +# if not os.path.exists(toco_from_proto_bin): +# toco_from_proto_bin = "toco_from_protos" + + +def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str): + """Convert `input_data_str` according to model and toco parameters. + + Unless you know what you are doing consider using + the more friendly @{tf.contrib.lite.toco_convert}}. + + Args: + model_flags_str: Serialized proto describing model properties, see + `toco/model_flags.proto`. + toco_flags_str: Serialized proto describing conversion properties, see + `toco/toco_flags.proto`. + input_data_str: Input data in serialized form (e.g. a graphdef is common) + Returns: + Converted model in serialized form (e.g. a TFLITE model is common). + Raises: + RuntimeError: When conversion fails, an exception is raised with the error + message embedded. + """ + # TODO(aselle): When toco does not use fatal errors for failure, we can + # switch this on. + if EXPERIMENTAL_USE_TOCO_API_DIRECTLY: + return _toco_convert_protos(model_flags_str, toco_flags_str, input_data_str) + + # with tempfile.NamedTemporaryFile() as fp_toco, \ + # tempfile.NamedTemporaryFile() as fp_model, \ + # tempfile.NamedTemporaryFile() as fp_input, \ + # tempfile.NamedTemporaryFile() as fp_output: + # fp_model.write(model_flags_str) + # fp_toco.write(toco_flags_str) + # fp_input.write(input_data_str) + # fp_model.flush() + # fp_toco.flush() + # fp_input.flush() + + # cmd = [ + # toco_from_proto_bin, fp_model.name, fp_toco.name, fp_input.name, + # fp_output.name + # ] + # cmdline = " ".join(cmd) + # proc = subprocess.Popen( + # cmdline, + # shell=True, + # stdout=subprocess.PIPE, + # stderr=subprocess.STDOUT, + # close_fds=True) + # stdout, stderr = proc.communicate() + # exitcode = proc.returncode + # if exitcode == 0: + # stuff = fp_output.read() + # return stuff + # else: + # raise RuntimeError("TOCO failed see console for info.\n%s\n%s\n" % + # (stdout, stderr)) + + +def _tensor_name(x): + return x.name.split(":")[0] + + +def toco_convert(input_data, + input_tensors, + output_tensors, + inference_type=FLOAT, + input_format=TENSORFLOW_GRAPHDEF, + output_format=TFLITE, + quantized_input_stats=None, + drop_control_dependency=True): + """Convert a model using TOCO from `input_format` to `output_format`. + + Typically this is to convert from TensorFlow GraphDef to TFLite, in which + case the default `input_format` and `output_format` are sufficient. + + Args: + input_data: Input data (i.e. often `sess.graph_def`). + input_tensors: List of input tensors. Type and shape are computed using + `foo.get_shape()` and `foo.dtype`. + output_tensors: List of output tensors (only .name is used from this). + inference_type: Currently must be `{FLOAT, QUANTIZED_UINT8}`. + input_format: Type of data to read (currently must be TENSORFLOW_GRAPHDEF). + output_format: Type of data to write (currently must be TFLITE or + GRAPHVIZ_DOT) + quantized_input_stats: For each member of input_tensors the mean and + std deviation of training data. Only needed if `inference_type` is + `QUANTIZED_UINT8`. + drop_control_dependency: Drops control dependencies silently. This is due + to tf lite not supporting control dependencies. + + Returns: + The converted data. For example if tflite was the destination, then + this will be a tflite flatbuffer in a bytes array. + + Raises: + ValueError: If the input tensor type is unknown + RuntimeError: If TOCO fails to convert (in which case the runtime error's + error text will contain the TOCO error log) + """ + toco = _toco_flags_pb2.TocoFlags() + toco.input_format = input_format + toco.output_format = output_format + model = _model_flags_pb2.ModelFlags() + model.drop_control_dependency = drop_control_dependency + toco.inference_type = inference_type + for idx, input_tensor in enumerate(input_tensors): + if input_tensor.dtype == _dtypes.float32: + tflite_input_type = FLOAT + elif input_tensor.dtype == _dtypes.int32: + tflite_input_type = INT32 + elif input_tensor.dtype == _dtypes.int64: + tflite_input_type = INT64 + # TODO(aselle): Insert strings when they are available + else: + raise ValueError("Tensors %s not known type %r" % (input_tensor.name, + input_tensor.dtype)) + + input_array = model.input_arrays.add() + + if inference_type == QUANTIZED_UINT8: + if tflite_input_type == FLOAT: + tflite_input_type = QUANTIZED_UINT8 + input_array.mean, input_array.std = quantized_input_stats[idx] + + input_array.name = _tensor_name(input_tensor) + input_array.shape.extend(map(int, input_tensor.get_shape())) + toco.input_types.append(tflite_input_type) + + for output_tensor in output_tensors: + model.output_arrays.append(_tensor_name(output_tensor)) + + data = toco_convert_protos(model.SerializeToString(), + toco.SerializeToString(), + input_data.SerializeToString()) + return data + + +# remove_undocumented(__name__) + +del os +del subprocess +del tempfile diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py new file mode 100644 index 0000000000..da360aeb34 --- /dev/null +++ b/tensorflow/contrib/lite/python/lite_test.py @@ -0,0 +1,45 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TensorFlow Lite Python Interface: Sanity check.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.lite.python import lite +from tensorflow.python.client import session +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test + + +class LiteTest(test_util.TensorFlowTestCase): + + def testBasic(self): + in_tensor = array_ops.placeholder(shape=[1, 16, 16, 3], + dtype=dtypes.float32) + out_tensor = in_tensor + in_tensor + sess = session.Session() + # Try running on valid graph + result = lite.toco_convert(sess.graph_def, [in_tensor], [out_tensor]) + self.assertTrue(result) + # TODO(aselle): remove tests that fail. + # Try running on identity graph (known fail) + # with self.assertRaisesRegexp(RuntimeError, "!model->operators.empty()"): + # result = lite.toco_convert(sess.graph_def, [in_tensor], [in_tensor]) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/lite/schema/BUILD b/tensorflow/contrib/lite/schema/BUILD new file mode 100644 index 0000000000..3e04d6f34f --- /dev/null +++ b/tensorflow/contrib/lite/schema/BUILD @@ -0,0 +1,82 @@ +package(default_visibility = [ + "//visibility:public", +]) + +licenses(["notice"]) # Apache 2.0 + +load("//tensorflow:tensorflow.bzl", "py_test") + +py_binary( + name = "upgrade_schema", + srcs = [ + "upgrade_schema.py", + ], + data = [ + "schema_v0.fbs", + "schema_v1.fbs", + "schema_v2.fbs", + "schema_v3.fbs", + ], + deps = [ + "//tensorflow:tensorflow_py", + "//tensorflow/python:platform", + "@flatbuffers//:flatc", + ], +) + +py_test( + name = "upgrade_schema_test", + size = "small", + srcs = ["upgrade_schema_test.py"], + srcs_version = "PY2AND3", + tags = ["no_pip"], + deps = [ + ":upgrade_schema", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_test_lib", + ], +) + +exports_files([ + "schema_v0.fbs", + "schema_v1.fbs", + "schema_v2.fbs", + "schema_v3.fbs", +]) + +load("//third_party/flatbuffers:build_defs.bzl", "flatbuffer_cc_library") + +# Generic schema for inference on device. +flatbuffer_cc_library( + name = "schema_fbs", + srcs = ["schema.fbs"], +) + +# Schema test to make sure we don't introduce backward incompatible changes +# to schemas. +cc_test( + name = "flatbuffer_compatibility_test", + size = "small", + srcs = ["flatbuffer_compatibility_test.cc"], + data = [ + "schema.fbs", + "schema_v3.fbs", + ], + deps = [ + "//tensorflow/core:lib_platform", + "@com_google_googletest//:gtest", + "@flatbuffers//:flatc_library", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc b/tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc new file mode 100644 index 0000000000..17ee0af8dd --- /dev/null +++ b/tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc @@ -0,0 +1,91 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include "third_party/flatbuffers/include/flatbuffers/flatc.h" +#include "tensorflow/core/platform/platform.h" + +#ifdef PLATFORM_GOOGLE +#define TFLITE_TF_PREFIX "third_party/tensorflow/" +#else +#define TFLITE_TF_PREFIX "tensorflow/" +#endif +/// Load filename `name` +bool LoadFileRaw(const char *name, std::string *buf) { + std::ifstream fp(name, std::ios::binary); + if (!fp) { + fprintf(stderr, "Failed to read '%s'\n", name); + return false; + } + std::string s((std::istreambuf_iterator(fp)), + std::istreambuf_iterator()); + if (s.empty()) { + fprintf(stderr, "Read '%s' resulted in empty\n", name); + return false; + } + *buf = s; + return true; +} + +bool ParseFile(flatbuffers::Parser *parser, const std::string &filename, + const std::string &contents) { + std::vector include_directories; + auto local_include_directory = flatbuffers::StripFileName(filename); + include_directories.push_back(local_include_directory.c_str()); + include_directories.push_back(nullptr); + if (!parser->Parse(contents.c_str(), include_directories.data(), + filename.c_str())) { + fprintf(stderr, "Failed to parse flatbuffer schema '%s'\n", + contents.c_str()); + return false; + } + return true; +} + +// Checks to make sure current schema in current code does not cause an +// incompatibility. +TEST(SchemaTest, TestCompatibility) { + // Read file contents of schemas into strings + // TODO(aselle): Need a reliable way to load files. + std::string base_contents, current_contents; + const char *base_filename = + TFLITE_TF_PREFIX "contrib/lite/schema/schema_v3.fbs"; + const char *current_filename = + TFLITE_TF_PREFIX "contrib/lite/schema/schema.fbs"; + + ASSERT_TRUE(LoadFileRaw(base_filename, &base_contents)); + ASSERT_TRUE(LoadFileRaw(current_filename, ¤t_contents)); + // Parse the schemas + flatbuffers::Parser base_parser, current_parser; + std::vector include_directories; + ASSERT_TRUE(ParseFile(&base_parser, base_filename, base_contents)); + ASSERT_TRUE(ParseFile(¤t_parser, current_filename, current_contents)); + // Check that the schemas conform and fail if they don't + auto err = current_parser.ConformTo(base_parser); + if (!err.empty()) { + fprintf(stderr, + "Schemas don't conform:\n%s\n" + "In other words some change you made means that new parsers can't" + "parse old files.\n", + err.c_str()); + FAIL(); + } +} + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs new file mode 100644 index 0000000000..ddb2ab792c --- /dev/null +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -0,0 +1,346 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Revision History +// Version 0: Initial version. +// Version 1: Add subgraphs to schema. +// Version 2: Rename operators to conform to NN API. +// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers. + +namespace tflite; + +// This corresponds to the version. +file_identifier "TFL3"; +// File extension of any written files. +file_extension "tflite"; + +// The type of data stored in a tensor. +enum TensorType : byte { + FLOAT32 = 0, + FLOAT16 = 1, + INT32 = 2, + UINT8 = 3, + INT64 = 4, + STRING = 5, +} + +// Parameters for converting a quantized tensor back to float. Given a +// quantized value q, the corresponding float value f should be: +// f = scale * (q - zero_point) +table QuantizationParameters { + min:[float]; // For importing back into tensorflow. + max:[float]; // For importing back into tensorflow. + scale:[float]; + zero_point:[long]; +} + +table Tensor { + // The tensor shape. The meaning of each entry is operator-specific but + // builtin ops use: [batch size, number of channels, height, width] (That's + // Tensorflow's NCHW). + shape:[int]; + type:TensorType; + // An index that refers to the buffers table at the root of the model. Or, + // if there is no data buffer associated (i.e. intermediate results), then + // this is 0 (which refers to an always existant empty buffer). + // + // The data_buffer itself is an opaque container, with the assumption that the + // target device is little-endian. In addition, all builtin operators assume + // the memory is ordered such that if `shape` is [4, 3, 2], then index + // [i, j, k] maps to data_buffer[i*3*2 + j*3 + k]. + buffer:uint; + name:string; // For debugging and importing back into tensorflow. + quantization:QuantizationParameters; // Optional. +} + +// A list of builtin operators. Builtin operators a slighlty faster than custom +// ones, but not by much. Moreover, while custom operators accept an opaque +// object containing configuration parameters, builtins have a predetermined +// set of acceptable options. +enum BuiltinOperator : byte { + ADD = 0, + AVERAGE_POOL_2D = 1, + CONCATENATION = 2, + CONV_2D = 3, + DEPTHWISE_CONV_2D = 4, + // DEPTH_TO_SPACE = 5, + // DEQUANTIZE = 6, + EMBEDDING_LOOKUP = 7, + // FLOOR = 8, + FULLY_CONNECTED = 9, + HASHTABLE_LOOKUP = 10, + L2_NORMALIZATION = 11, + L2_POOL_2D = 12, + LOCAL_RESPONSE_NORMALIZATION = 13, + LOGISTIC = 14, + LSH_PROJECTION = 15, + LSTM = 16, + MAX_POOL_2D = 17, + MUL = 18, + RELU = 19, + RELU1 = 20, + RELU6 = 21, + RESHAPE = 22, + RESIZE_BILINEAR = 23, + RNN = 24, + SOFTMAX = 25, + SPACE_TO_DEPTH = 26, + SVDF = 27, + TANH = 28, + // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS + CONCAT_EMBEDDINGS = 29, + SKIP_GRAM = 30, + CALL = 31, + CUSTOM = 32, + EMBEDDING_LOOKUP_SPARSE = 33, +} + +// Options for the builtin operators. +union BuiltinOptions { + Conv2DOptions, + DepthwiseConv2DOptions, + ConcatEmbeddingsOptions, + LSHProjectionOptions, + Pool2DOptions, + SVDFOptions, + RNNOptions, + FullyConnectedOptions, + SoftmaxOptions, + ConcatenationOptions, + AddOptions, + L2NormOptions, + LocalResponseNormalizationOptions, + LSTMOptions, + ResizeBilinearOptions, + CallOptions, + ReshapeOptions, + SkipGramOptions, + SpaceToDepthOptions, + EmbeddingLookupSparseOptions, + MulOptions, +} + +enum Padding : byte { SAME, VALID } + +enum ActivationFunctionType : byte { + NONE = 0, + RELU = 1, + RELU1 = 2, + RELU6 = 3, + TANH = 4, + SIGN_BIT = 5, +} + +table Conv2DOptions { + padding:Padding; + stride_w:int; + stride_h:int; + fused_activation_function:ActivationFunctionType; +} + +table Pool2DOptions { + padding:Padding; + stride_w:int; + stride_h:int; + filter_width:int; + filter_height:int; + fused_activation_function:ActivationFunctionType; +} + +table DepthwiseConv2DOptions { + padding:Padding; + stride_w:int; + stride_h:int; + depth_multiplier:int; + fused_activation_function:ActivationFunctionType; +} + +table ConcatEmbeddingsOptions { + num_channels:int; + num_columns_per_channel:[int]; + embedding_dim_per_channel:[int]; // This could be inferred from parameters. +} + +enum LSHProjectionType: byte { + UNKNOWN = 0, + SPARSE = 1, + DENSE = 2, +} + +table LSHProjectionOptions { + type: LSHProjectionType; +} + +table SVDFOptions { + rank:int; + fused_activation_function:ActivationFunctionType; +} + +// An implementation of TensorFlow RNNCell. +table RNNOptions { + fused_activation_function:ActivationFunctionType; +} + +// An implementation of TensorFlow fully_connected (a.k.a Dense) layer. +table FullyConnectedOptions { + fused_activation_function:ActivationFunctionType; +} + +table SoftmaxOptions { + beta: float; +} + +// An implementation of TensorFlow concat. +table ConcatenationOptions { + axis:int; + fused_activation_function:ActivationFunctionType; +} + +table AddOptions { + fused_activation_function:ActivationFunctionType; +} + +table MulOptions { + fused_activation_function:ActivationFunctionType; +} + +table L2NormOptions { + fused_activation_function:ActivationFunctionType; +} + +table LocalResponseNormalizationOptions { + radius:int; + bias:float; + alpha:float; + beta:float; +} + +// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell +table LSTMOptions { + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping +} + +table ResizeBilinearOptions { + new_height:int; + new_width:int; +} + +// A call operation options +table CallOptions { + // The subgraph index that needs to be called. + subgraph:uint; +} + +table ReshapeOptions { + new_shape:[int]; +} + +table SkipGramOptions { + ngram_size: int; + max_skip_size: int; + include_all_ngrams: bool; +} + +table SpaceToDepthOptions { + block_size: int; +} + +enum CombinerType : byte { + SUM = 0, + MEAN = 1, + SQRTN = 2, +} + +table EmbeddingLookupSparseOptions { + combiner:CombinerType; +} + +// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a +// builtin, or a string if the operator is custom. +table OperatorCode { + builtin_code:BuiltinOperator; + custom_code:string; +} + +enum CustomOptionsFormat : byte { + FLEXBUFFERS = 0, +} + +// An operator takes tensors as inputs and outputs. The type of operation being +// performed is determined by an index into the list of valid OperatorCodes, +// while the specifics of each operations is configured using builtin_options +// or custom_options. +table Operator { + // Index into the operator_codes array. Using an integer here avoids + // complicate map lookups. + opcode_index:uint; + + // Optional input and output tensors are indicated by -1. + inputs:[int]; + outputs:[int]; + + builtin_options:BuiltinOptions; + custom_options:[ubyte]; + custom_options_format:CustomOptionsFormat; +} + +// The root type, defining a model. +table SubGraph { + // A list of all tensors used in this model. + tensors:[Tensor]; + + // Indices of the input tensors. + inputs:[int]; + + // Indices of the output tensors. + outputs:[int]; + + // All operators, in execution order. + operators:[Operator]; + + // Name of subgraph (used for debugging). + name:string; +} + +// Table of raw data buffers (used for constant tensors). Referenced by tensors +// by index. +table Buffer { + data:[ubyte]; +} + +table Model { + // Version of the schema. + version:uint; + + // A list of all operator codes used in this model. This is + // kept in order because operators carry an index into this + // vector. + operator_codes:[OperatorCode]; + + // All the subgraphs of the model. The 0th is assumed to be the main + // model. + subgraphs:[SubGraph]; + + // A description of the model. + description:string; + + // Buffers of the model + buffers:[Buffer]; + +} + +root_type Model; + diff --git a/tensorflow/contrib/lite/schema/schema_v0.fbs b/tensorflow/contrib/lite/schema/schema_v0.fbs new file mode 100644 index 0000000000..852ea988f3 --- /dev/null +++ b/tensorflow/contrib/lite/schema/schema_v0.fbs @@ -0,0 +1,247 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace tflite; + +// The type of data stored in a tensor. +enum TensorType : byte { + FLOAT32 = 0, + FLOAT16 = 1, + INT32 = 2, + UINT8 = 3, + INT64 = 4, +} + +// Parameters for converting a quantized tensor back to float. Given a +// quantized value q, the corresponding float value f should be: +// f = scale * (q - zero_point) +table QuantizationParameters { + min:[float]; // For importing back into tensorflow. + max:[float]; // For importing back into tensorflow. + scale:[float]; + zero_point:[long]; +} + +table Tensor { + // The tensor shape. The meaning of each entry is operator-specific but + // builtin ops use: [batch size, number of channels, height, width] (That's + // Tensorflow's NCHW). + shape:[int]; + type:TensorType; + // The data_buffer is an opaque container, with the assumption that the + // target device is little-endian. In addition, all builtin operators assume + // the memory is ordered such that if `shape` is [4, 3, 2], then index + // [i, j, k] maps to data_buffer[i*4*3 + j*3 + k]. + data_buffer:[ubyte]; + name:string; // For debugging and importing back into tensorflow. + quantization:QuantizationParameters; // Optional. +} + +// A list of builtin operators. Builtin operators a slighlty faster than custom +// ones, but not by much. Moreover, while custom operators accept an opaque +// object containing configuration parameters, builtins have a predetermined +// set of acceptable options. +enum BuiltinOperator : byte { + CUSTOM = 0, + CONVOLUTION = 1, + DEPTHWISE_CONVOLUTION = 2, + CONCAT_EMBEDDINGS = 3, + LSH_PROJECTION = 4, + TANH = 5, + RELU = 6, + AVERAGE_POOL = 7, + MAX_POOL = 8, + L2_POOL = 9, + SIGMOID = 10, + SVDF = 11, + BasicRNN = 12, + RELU6 = 13, + EMBEDDING_LOOKUP = 14, + FULLY_CONNECTED = 15, + HASHTABLE_LOOKUP = 16, + SOFTMAX = 17, + CONCATENATION = 18, + LSTM = 19, + ADD = 20, + L2NORM = 21, + LOCAL_RESPONSE_NORM = 22, + RESIZE_BILINEAR = 23, +} + +// Options for the builtin operators. +union BuiltinOptions { + ConvolutionOptions, + DepthwiseConvolutionOptions, + ConcatEmbeddingsOptions, + LSHProjectionOptions, + PoolOptions, + SVDFOptions, + BasicRNNOptions, + FullyConnectedOptions, + SoftmaxOptions, + ConcatenationOptions, + AddOptions, + L2NormOptions, + LocalResponseNormOptions, + LSTMOptions, + ResizeBilinearOptions, +} + +enum Padding : byte { SAME, VALID } + +enum ActivationFunctionType : byte { + NONE = 0, + RELU = 1, + RELU1 = 2, + RELU6 = 3, + TANH = 4, + SIGN_BIT = 5, +} + +table ConvolutionOptions { + padding:Padding; + stride_w:int; + stride_h:int; + fused_activation_function:ActivationFunctionType; +} + +table PoolOptions { + padding:Padding; + stride_w:int; + stride_h:int; + filter_width:int; + filter_height:int; + fused_activation_function:ActivationFunctionType; +} + +table DepthwiseConvolutionOptions { + padding:Padding; + stride_w:int; + stride_h:int; + depth_multiplier:int; + fused_activation_function:ActivationFunctionType; +} + +table ConcatEmbeddingsOptions { + num_channels:int; + num_columns_per_channel:[int]; + embedding_dim_per_channel:[int]; // This could be inferred from parameters. +} + +enum LSHProjectionType: byte { + UNKNOWN = 0, + SPARSE = 1, + DENSE = 2, +} + +table LSHProjectionOptions { + type: LSHProjectionType; +} + +table SVDFOptions { + rank:int; + fused_activation_function:ActivationFunctionType; +} + +// An implementation of TensorFlow BasicRNNCell. +table BasicRNNOptions { + fused_activation_function:ActivationFunctionType; +} + +// An implementation of TensorFlow fully_connected (a.k.a Dense) layer. +table FullyConnectedOptions { + fused_activation_function:ActivationFunctionType; +} + +table SoftmaxOptions { + beta: float; +} + +// An implementation of TensorFlow concat. +table ConcatenationOptions { + axis:int; + fused_activation_function:ActivationFunctionType; +} + +table AddOptions { + fused_activation_function:ActivationFunctionType; +} + +table L2NormOptions { + fused_activation_function:ActivationFunctionType; +} + +table LocalResponseNormOptions { + radius:int; + bias:float; + alpha:float; + beta:float; +} + +// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell +table LSTMOptions { + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping +} + +table ResizeBilinearOptions { + new_height:int; + new_width:int; +} + +// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a +// builtin, or a string if the operator is custom. +table OperatorCode { + builtin_code:BuiltinOperator; + custom_code:string; +} + +// An operator takes tensors as inputs and outputs. The type of operation being +// performed is determined by an index into the list of valid OperatorCodes, +// while the specifics of each operations is configured using builtin_options +// or custom_options. +table Operator { + // Index into the operator_codes array. Using an integer here avoids + // complicate map lookups. + opcode_index:int; + + inputs:[int]; + outputs:[int]; + + builtin_options:BuiltinOptions; + custom_options:[ubyte]; +} + +// The root type, defining a model. +table Model { + // A list of all tensors used in this model. + tensors:[Tensor]; + + // Indices of the input tensors. + inputs:[int]; + + // Indices of the output tensors. + outputs:[int]; + + // A list of all operator codes used in this model. This is + // kept in order because operators carry an index into this + // vector. + operator_codes:[OperatorCode]; + + // All operators, in execution order. + operators:[Operator]; +} + +root_type Model; diff --git a/tensorflow/contrib/lite/schema/schema_v1.fbs b/tensorflow/contrib/lite/schema/schema_v1.fbs new file mode 100644 index 0000000000..06cd9408ed --- /dev/null +++ b/tensorflow/contrib/lite/schema/schema_v1.fbs @@ -0,0 +1,295 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Revision History +// Version 0: Initial version. +// Version 1: Add subgraphs to schema. + +namespace tflite; + +// The type of data stored in a tensor. +enum TensorType : byte { + FLOAT32 = 0, + FLOAT16 = 1, + INT32 = 2, + UINT8 = 3, + INT64 = 4, + STRING = 5, +} + +// Parameters for converting a quantized tensor back to float. Given a +// quantized value q, the corresponding float value f should be: +// f = scale * (q - zero_point) +table QuantizationParameters { + min:[float]; // For importing back into tensorflow. + max:[float]; // For importing back into tensorflow. + scale:[float]; + zero_point:[long]; +} + +table Tensor { + // The tensor shape. The meaning of each entry is operator-specific but + // builtin ops use: [batch size, number of channels, height, width] (That's + // Tensorflow's NCHW). + shape:[int]; + type:TensorType; + // The data_buffer is an opaque container, with the assumption that the + // target device is little-endian. In addition, all builtin operators assume + // the memory is ordered such that if `shape` is [4, 3, 2], then index + // [i, j, k] maps to data_buffer[i*3*2 + j*3 + k]. + data_buffer:[ubyte]; + name:string; // For debugging and importing back into tensorflow. + quantization:QuantizationParameters; // Optional. +} + +// A list of builtin operators. Builtin operators a slighlty faster than custom +// ones, but not by much. Moreover, while custom operators accept an opaque +// object containing configuration parameters, builtins have a predetermined +// set of acceptable options. +enum BuiltinOperator : byte { + CUSTOM = 0, + CONVOLUTION = 1, + DEPTHWISE_CONVOLUTION = 2, + CONCAT_EMBEDDINGS = 3, + LSH_PROJECTION = 4, + TANH = 5, + RELU = 6, + AVERAGE_POOL = 7, + MAX_POOL = 8, + L2_POOL = 9, + SIGMOID = 10, + SVDF = 11, + BasicRNN = 12, + RELU6 = 13, + EMBEDDING_LOOKUP = 14, + FULLY_CONNECTED = 15, + HASHTABLE_LOOKUP = 16, + SOFTMAX = 17, + CONCATENATION = 18, + LSTM = 19, + ADD = 20, + L2NORM = 21, + LOCAL_RESPONSE_NORM = 22, + RESIZE_BILINEAR = 23, + CALL = 24, + RESHAPE = 25, + SKIP_GRAM = 26, + SPACE_TO_DEPTH = 27, +} + +// Options for the builtin operators. +union BuiltinOptions { + ConvolutionOptions, + DepthwiseConvolutionOptions, + ConcatEmbeddingsOptions, + LSHProjectionOptions, + PoolOptions, + SVDFOptions, + BasicRNNOptions, + FullyConnectedOptions, + SoftmaxOptions, + ConcatenationOptions, + AddOptions, + L2NormOptions, + LocalResponseNormOptions, + LSTMOptions, + ResizeBilinearOptions, + CallOptions, + ReshapeOptions, + SkipGramOptions, + SpaceToDepthOptions, +} + +enum Padding : byte { SAME, VALID } + +enum ActivationFunctionType : byte { + NONE = 0, + RELU = 1, + RELU1 = 2, + RELU6 = 3, + TANH = 4, + SIGN_BIT = 5, +} + +table ConvolutionOptions { + padding:Padding; + stride_w:int; + stride_h:int; + fused_activation_function:ActivationFunctionType; +} + +table PoolOptions { + padding:Padding; + stride_w:int; + stride_h:int; + filter_width:int; + filter_height:int; + fused_activation_function:ActivationFunctionType; +} + +table DepthwiseConvolutionOptions { + padding:Padding; + stride_w:int; + stride_h:int; + depth_multiplier:int; + fused_activation_function:ActivationFunctionType; +} + +table ConcatEmbeddingsOptions { + num_channels:int; + num_columns_per_channel:[int]; + embedding_dim_per_channel:[int]; // This could be inferred from parameters. +} + +enum LSHProjectionType: byte { + UNKNOWN = 0, + SPARSE = 1, + DENSE = 2, +} + +table LSHProjectionOptions { + type: LSHProjectionType; +} + +table SVDFOptions { + rank:int; + fused_activation_function:ActivationFunctionType; +} + +// An implementation of TensorFlow BasicRNNCell. +table BasicRNNOptions { + fused_activation_function:ActivationFunctionType; +} + +// An implementation of TensorFlow fully_connected (a.k.a Dense) layer. +table FullyConnectedOptions { + fused_activation_function:ActivationFunctionType; +} + +table SoftmaxOptions { + beta: float; +} + +// An implementation of TensorFlow concat. +table ConcatenationOptions { + axis:int; + fused_activation_function:ActivationFunctionType; +} + +table AddOptions { + fused_activation_function:ActivationFunctionType; +} + +table L2NormOptions { + fused_activation_function:ActivationFunctionType; +} + +table LocalResponseNormOptions { + radius:int; + bias:float; + alpha:float; + beta:float; +} + +// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell +table LSTMOptions { + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping +} + +table ResizeBilinearOptions { + new_height:int; + new_width:int; +} + +// A call operation options +table CallOptions { + // The subgraph index that needs to be called. + subgraph:int; +} + +table ReshapeOptions { + new_shape:[int]; +} + +table SkipGramOptions { + ngram_size: int; + max_skip_size: int; + include_all_ngrams: bool; +} + +table SpaceToDepthOptions { + block_size: int; +} + +// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a +// builtin, or a string if the operator is custom. +table OperatorCode { + builtin_code:BuiltinOperator; + custom_code:string; +} + +// An operator takes tensors as inputs and outputs. The type of operation being +// performed is determined by an index into the list of valid OperatorCodes, +// while the specifics of each operations is configured using builtin_options +// or custom_options. +table Operator { + // Index into the operator_codes array. Using an integer here avoids + // complicate map lookups. + opcode_index:int; + + inputs:[int]; + outputs:[int]; + + builtin_options:BuiltinOptions; + custom_options:[ubyte]; +} + +// The root type, defining a model. +table SubGraph { + // A list of all tensors used in this model. + tensors:[Tensor]; + + // Indices of the input tensors. + inputs:[int]; + + // Indices of the output tensors. + outputs:[int]; + + // All operators, in execution order. + operators:[Operator]; + + // Name of subgraph (used for debugging). + name:string; +} + +table Model { + // Version of the schema. + version:int; + + // A list of all operator codes used in this model. This is + // kept in order because operators carry an index into this + // vector. + operator_codes:[OperatorCode]; + + // All the subgraphs of the model. The 0th is assumed to be the main + // model. + subgraphs:[SubGraph]; + + // A description of the model. + description:string; +} + +root_type Model; diff --git a/tensorflow/contrib/lite/schema/schema_v2.fbs b/tensorflow/contrib/lite/schema/schema_v2.fbs new file mode 100644 index 0000000000..96731c8aae --- /dev/null +++ b/tensorflow/contrib/lite/schema/schema_v2.fbs @@ -0,0 +1,303 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Revision History +// Version 0: Initial version. +// Version 1: Add subgraphs to schema. +// Version 2: Rename operators to conform to NN API. + +namespace tflite; + +// The type of data stored in a tensor. +enum TensorType : byte { + FLOAT32 = 0, + FLOAT16 = 1, + INT32 = 2, + UINT8 = 3, + INT64 = 4, + STRING = 5, +} + +// Parameters for converting a quantized tensor back to float. Given a +// quantized value q, the corresponding float value f should be: +// f = scale * (q - zero_point) +table QuantizationParameters { + min:[float]; // For importing back into tensorflow. + max:[float]; // For importing back into tensorflow. + scale:[float]; + zero_point:[long]; +} + +table Tensor { + // The tensor shape. The meaning of each entry is operator-specific but + // builtin ops use: [batch size, number of channels, height, width] (That's + // Tensorflow's NCHW). + shape:[int]; + type:TensorType; + // The data_buffer is an opaque container, with the assumption that the + // target device is little-endian. In addition, all builtin operators assume + // the memory is ordered such that if `shape` is [4, 3, 2], then index + // [i, j, k] maps to data_buffer[i*3*2 + j*3 + k]. + data_buffer:[ubyte]; + name:string; // For debugging and importing back into tensorflow. + quantization:QuantizationParameters; // Optional. +} + +// A list of builtin operators. Builtin operators a slighlty faster than custom +// ones, but not by much. Moreover, while custom operators accept an opaque +// object containing configuration parameters, builtins have a predetermined +// set of acceptable options. +enum BuiltinOperator : byte { + ADD = 0, + AVERAGE_POOL_2D = 1, + CONCATENATION = 2, + CONV_2D = 3, + DEPTHWISE_CONV_2D = 4, + // DEPTH_TO_SPACE = 5, + // DEQUANTIZE = 6, + EMBEDDING_LOOKUP = 7, + // FLOOR = 8, + FULLY_CONNECTED = 9, + HASHTABLE_LOOKUP = 10, + L2_NORMALIZATION = 11, + L2_POOL_2D = 12, + LOCAL_RESPONSE_NORMALIZATION = 13, + LOGISTIC = 14, + LSH_PROJECTION = 15, + LSTM = 16, + MAX_POOL_2D = 17, + // MUL = 18, + RELU = 19, + // RELU1=20, + RELU6 = 21, + RESHAPE = 22, + RESIZE_BILINEAR = 23, + RNN = 24, + SOFTMAX = 25, + SPACE_TO_DEPTH = 26, + SVDF = 27, + TANH = 28, + // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS + CONCAT_EMBEDDINGS = 29, + SKIP_GRAM = 30, + CALL = 31, + CUSTOM = 32, + +} + +// Options for the builtin operators. +union BuiltinOptions { + Conv2DOptions, + DepthwiseConv2DOptions, + ConcatEmbeddingsOptions, + LSHProjectionOptions, + Pool2DOptions, + SVDFOptions, + RNNOptions, + FullyConnectedOptions, + SoftmaxOptions, + ConcatenationOptions, + AddOptions, + L2NormOptions, + LocalResponseNormalizationOptions, + LSTMOptions, + ResizeBilinearOptions, + CallOptions, + ReshapeOptions, + SkipGramOptions, + SpaceToDepthOptions, +} + +enum Padding : byte { SAME, VALID } + +enum ActivationFunctionType : byte { + NONE = 0, + RELU = 1, + RELU1 = 2, + RELU6 = 3, + TANH = 4, + SIGN_BIT = 5, +} + +table Conv2DOptions { + padding:Padding; + stride_w:int; + stride_h:int; + fused_activation_function:ActivationFunctionType; +} + +table Pool2DOptions { + padding:Padding; + stride_w:int; + stride_h:int; + filter_width:int; + filter_height:int; + fused_activation_function:ActivationFunctionType; +} + +table DepthwiseConv2DOptions { + padding:Padding; + stride_w:int; + stride_h:int; + depth_multiplier:int; + fused_activation_function:ActivationFunctionType; +} + +table ConcatEmbeddingsOptions { + num_channels:int; + num_columns_per_channel:[int]; + embedding_dim_per_channel:[int]; // This could be inferred from parameters. +} + +enum LSHProjectionType: byte { + UNKNOWN = 0, + SPARSE = 1, + DENSE = 2, +} + +table LSHProjectionOptions { + type: LSHProjectionType; +} + +table SVDFOptions { + rank:int; + fused_activation_function:ActivationFunctionType; +} + +// An implementation of TensorFlow RNNCell. +table RNNOptions { + fused_activation_function:ActivationFunctionType; +} + +// An implementation of TensorFlow fully_connected (a.k.a Dense) layer. +table FullyConnectedOptions { + fused_activation_function:ActivationFunctionType; +} + +table SoftmaxOptions { + beta: float; +} + +// An implementation of TensorFlow concat. +table ConcatenationOptions { + axis:int; + fused_activation_function:ActivationFunctionType; +} + +table AddOptions { + fused_activation_function:ActivationFunctionType; +} + +table L2NormOptions { + fused_activation_function:ActivationFunctionType; +} + +table LocalResponseNormalizationOptions { + radius:int; + bias:float; + alpha:float; + beta:float; +} + +// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell +table LSTMOptions { + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping +} + +table ResizeBilinearOptions { + new_height:int; + new_width:int; +} + +// A call operation options +table CallOptions { + // The subgraph index that needs to be called. + subgraph:int; +} + +table ReshapeOptions { + new_shape:[int]; +} + +table SkipGramOptions { + ngram_size: int; + max_skip_size: int; + include_all_ngrams: bool; +} + +table SpaceToDepthOptions { + block_size: int; +} + +// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a +// builtin, or a string if the operator is custom. +table OperatorCode { + builtin_code:BuiltinOperator; + custom_code:string; +} + +// An operator takes tensors as inputs and outputs. The type of operation being +// performed is determined by an index into the list of valid OperatorCodes, +// while the specifics of each operations is configured using builtin_options +// or custom_options. +table Operator { + // Index into the operator_codes array. Using an integer here avoids + // complicate map lookups. + opcode_index:int; + + inputs:[int]; + outputs:[int]; + + builtin_options:BuiltinOptions; + custom_options:[ubyte]; +} + +// The root type, defining a model. +table SubGraph { + // A list of all tensors used in this model. + tensors:[Tensor]; + + // Indices of the input tensors. + inputs:[int]; + + // Indices of the output tensors. + outputs:[int]; + + // All operators, in execution order. + operators:[Operator]; + + // Name of subgraph (used for debugging). + name:string; +} + +table Model { + // Version of the schema. + version:int; + + // A list of all operator codes used in this model. This is + // kept in order because operators carry an index into this + // vector. + operator_codes:[OperatorCode]; + + // All the subgraphs of the model. The 0th is assumed to be the main + // model. + subgraphs:[SubGraph]; + + // A description of the model. + description:string; +} + +root_type Model; diff --git a/tensorflow/contrib/lite/schema/schema_v3.fbs b/tensorflow/contrib/lite/schema/schema_v3.fbs new file mode 100644 index 0000000000..cedefe08f3 --- /dev/null +++ b/tensorflow/contrib/lite/schema/schema_v3.fbs @@ -0,0 +1,326 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Revision History +// Version 0: Initial version. +// Version 1: Add subgraphs to schema. +// Version 2: Rename operators to conform to NN API. +// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers. + +namespace tflite; + +// This corresponds to the version (4). +file_identifier "TFL3"; +// File extension of any written files. +file_extension "tflite"; + +// The type of data stored in a tensor. +enum TensorType : byte { + FLOAT32 = 0, + FLOAT16 = 1, + INT32 = 2, + UINT8 = 3, + INT64 = 4, + STRING = 5, +} + +// Parameters for converting a quantized tensor back to float. Given a +// quantized value q, the corresponding float value f should be: +// f = scale * (q - zero_point) +table QuantizationParameters { + min:[float]; // For importing back into tensorflow. + max:[float]; // For importing back into tensorflow. + scale:[float]; + zero_point:[long]; +} + +table Tensor { + // The tensor shape. The meaning of each entry is operator-specific but + // builtin ops use: [batch size, number of channels, height, width] (That's + // Tensorflow's NCHW). + shape:[int]; + type:TensorType; + // An index that refers to the buffers table at the root of the model. Or, + // if there is no data buffer associated (i.e. intermediate results), then + // this is 0 (which refers to an always existant empty buffer). + // + // The data_buffer itself is an opaque container, with the assumption that the + // target device is little-endian. In addition, all builtin operators assume + // the memory is ordered such that if `shape` is [4, 3, 2], then index + // [i, j, k] maps to data_buffer[i*3*2 + j*3 + k]. + buffer:uint; + name:string; // For debugging and importing back into tensorflow. + quantization:QuantizationParameters; // Optional. +} + +// A list of builtin operators. Builtin operators a slighlty faster than custom +// ones, but not by much. Moreover, while custom operators accept an opaque +// object containing configuration parameters, builtins have a predetermined +// set of acceptable options. +enum BuiltinOperator : byte { + ADD = 0, + AVERAGE_POOL_2D = 1, + CONCATENATION = 2, + CONV_2D = 3, + DEPTHWISE_CONV_2D = 4, + // DEPTH_TO_SPACE = 5, + // DEQUANTIZE = 6, + EMBEDDING_LOOKUP = 7, + // FLOOR = 8, + FULLY_CONNECTED = 9, + HASHTABLE_LOOKUP = 10, + L2_NORMALIZATION = 11, + L2_POOL_2D = 12, + LOCAL_RESPONSE_NORMALIZATION = 13, + LOGISTIC = 14, + LSH_PROJECTION = 15, + LSTM = 16, + MAX_POOL_2D = 17, + // MUL = 18, + RELU = 19, + // RELU1=20, + RELU6 = 21, + RESHAPE = 22, + RESIZE_BILINEAR = 23, + RNN = 24, + SOFTMAX = 25, + SPACE_TO_DEPTH = 26, + SVDF = 27, + TANH = 28, + // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS + CONCAT_EMBEDDINGS = 29, + SKIP_GRAM = 30, + CALL = 31, + CUSTOM = 32, + +} + +// Options for the builtin operators. +union BuiltinOptions { + Conv2DOptions, + DepthwiseConv2DOptions, + ConcatEmbeddingsOptions, + LSHProjectionOptions, + Pool2DOptions, + SVDFOptions, + RNNOptions, + FullyConnectedOptions, + SoftmaxOptions, + ConcatenationOptions, + AddOptions, + L2NormOptions, + LocalResponseNormalizationOptions, + LSTMOptions, + ResizeBilinearOptions, + CallOptions, + ReshapeOptions, + SkipGramOptions, + SpaceToDepthOptions, +} + +enum Padding : byte { SAME, VALID } + +enum ActivationFunctionType : byte { + NONE = 0, + RELU = 1, + RELU1 = 2, + RELU6 = 3, + TANH = 4, + SIGN_BIT = 5, +} + +table Conv2DOptions { + padding:Padding; + stride_w:int; + stride_h:int; + fused_activation_function:ActivationFunctionType; +} + +table Pool2DOptions { + padding:Padding; + stride_w:int; + stride_h:int; + filter_width:int; + filter_height:int; + fused_activation_function:ActivationFunctionType; +} + +table DepthwiseConv2DOptions { + padding:Padding; + stride_w:int; + stride_h:int; + depth_multiplier:int; + fused_activation_function:ActivationFunctionType; +} + +table ConcatEmbeddingsOptions { + num_channels:int; + num_columns_per_channel:[int]; + embedding_dim_per_channel:[int]; // This could be inferred from parameters. +} + +enum LSHProjectionType: byte { + UNKNOWN = 0, + SPARSE = 1, + DENSE = 2, +} + +table LSHProjectionOptions { + type: LSHProjectionType; +} + +table SVDFOptions { + rank:int; + fused_activation_function:ActivationFunctionType; +} + +// An implementation of TensorFlow RNNCell. +table RNNOptions { + fused_activation_function:ActivationFunctionType; +} + +// An implementation of TensorFlow fully_connected (a.k.a Dense) layer. +table FullyConnectedOptions { + fused_activation_function:ActivationFunctionType; +} + +table SoftmaxOptions { + beta: float; +} + +// An implementation of TensorFlow concat. +table ConcatenationOptions { + axis:int; + fused_activation_function:ActivationFunctionType; +} + +table AddOptions { + fused_activation_function:ActivationFunctionType; +} + +table L2NormOptions { + fused_activation_function:ActivationFunctionType; +} + +table LocalResponseNormalizationOptions { + radius:int; + bias:float; + alpha:float; + beta:float; +} + +// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell +table LSTMOptions { + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping +} + +table ResizeBilinearOptions { + new_height:int; + new_width:int; +} + +// A call operation options +table CallOptions { + // The subgraph index that needs to be called. + subgraph:uint; +} + +table ReshapeOptions { + new_shape:[int]; +} + +table SkipGramOptions { + ngram_size: int; + max_skip_size: int; + include_all_ngrams: bool; +} + +table SpaceToDepthOptions { + block_size: int; +} + +// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a +// builtin, or a string if the operator is custom. +table OperatorCode { + builtin_code:BuiltinOperator; + custom_code:string; +} + +// An operator takes tensors as inputs and outputs. The type of operation being +// performed is determined by an index into the list of valid OperatorCodes, +// while the specifics of each operations is configured using builtin_options +// or custom_options. +table Operator { + // Index into the operator_codes array. Using an integer here avoids + // complicate map lookups. + opcode_index:uint; + + inputs:[int]; + outputs:[int]; + + builtin_options:BuiltinOptions; + custom_options:[ubyte]; +} + +// The root type, defining a model. +table SubGraph { + // A list of all tensors used in this model. + tensors:[Tensor]; + + // Indices of the input tensors. + inputs:[int]; + + // Indices of the output tensors. + outputs:[int]; + + // All operators, in execution order. + operators:[Operator]; + + // Name of subgraph (used for debugging). + name:string; +} + +// Table of raw data buffers (used for constant tensors). Referenced by tensors +// by index. +table Buffer { + data:[ubyte]; +} + +table Model { + // Version of the schema. + version:uint; + + // A list of all operator codes used in this model. This is + // kept in order because operators carry an index into this + // vector. + operator_codes:[OperatorCode]; + + // All the subgraphs of the model. The 0th is assumed to be the main + // model. + subgraphs:[SubGraph]; + + // A description of the model. + description:string; + + // Buffers of the model. + // NOTE: It is required that the first entry in here is always an empty + // buffer. This is so that the default buffer index of zero in Tensor + // will always refer to a valid empty buffer. + buffers:[Buffer]; + +} + +root_type Model; diff --git a/tensorflow/contrib/lite/schema/upgrade_schema.py b/tensorflow/contrib/lite/schema/upgrade_schema.py new file mode 100644 index 0000000000..320c7138d2 --- /dev/null +++ b/tensorflow/contrib/lite/schema/upgrade_schema.py @@ -0,0 +1,341 @@ +# ============================================================================== +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Upgrade script to move from pre-release schema to new schema. + +Usage examples: + +bazel run tensorflow/contrib/lite/schema/upgrade_schema -- in.json out.json +bazel run tensorflow/contrib/lite/schema/upgrade_schema -- in.bin out.bin +bazel run tensorflow/contrib/lite/schema/upgrade_schema -- in.bin out.json +bazel run tensorflow/contrib/lite/schema/upgrade_schema -- in.json out.bin +bazel run tensorflow/contrib/lite/schema/upgrade_schema -- in.tflite out.tflite +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import contextlib +import json +import os +import shutil +import subprocess +import sys +import tempfile + +import tensorflow as tf +from tensorflow.python.platform import resource_loader + +parser = argparse.ArgumentParser( + description="Script to move TFLite models from pre-release schema to" + " new schema.") +parser.add_argument( + "input", + type=str, + help="Input TensorFlow lite file in `.json`, `.bin` or `.tflite` format.") +parser.add_argument( + "output", + type=str, + help="Output json or bin TensorFlow lite model compliant with" + "the new schema. Extension must be `.json`, `.bin` or `.tflite`.") + + +# RAII Temporary Directory, because flatc doesn't allow direct use of tempfiles. +@contextlib.contextmanager +def TemporaryDirectoryResource(): + temporary = tempfile.mkdtemp() + try: + yield temporary + finally: + shutil.rmtree(temporary) + + +class Converter(object): + """Converts TensorFlow flatbuffer models from old to new version of schema. + + This can convert between any version to the latest version. It uses + an incremental upgrade strategy to go from version to version. + + Usage: + converter = Converter() + converter.Convert("a.tflite", "a.json") + converter.Convert("b.json", "b.tflite") + """ + + def __init__(self): + # TODO(aselle): make this work in the open source version with better + # path. + self._flatc_path = resource_loader.get_path_to_datafile( + "../../../../flatbuffers/flatc") + + def FindSchema(base_name): + return resource_loader.get_path_to_datafile("%s" % base_name) + + # Supported schemas for upgrade. + self._schemas = [ + (0, FindSchema("schema_v0.fbs"), True, self._Upgrade0To1), + (1, FindSchema("schema_v1.fbs"), True, self._Upgrade1To2), + (2, FindSchema("schema_v2.fbs"), True, self._Upgrade2To3), + (3, FindSchema("schema_v3.fbs"), False, None) # Non-callable by design. + ] + # Ensure schemas are sorted, and extract latest version and upgrade + # dispatch function table. + self._schemas.sort() + self._new_version, self._new_schema = self._schemas[-1][:2] + self._upgrade_dispatch = dict( + (version, dispatch) + for version, unused1, unused2, dispatch in self._schemas) + + def _Read(self, input_file, schema, raw_binary=False): + """Read a tflite model assuming the given flatbuffer schema. + + If `input_file` is in bin, then we must use flatc to convert the schema + from binary to json. + + Args: + input_file: a binary (flatbuffer) or json file to read from. Extension + must be `.tflite`, `.bin`, or `.json` for FlatBuffer Binary or + FlatBuffer JSON. + schema: which schema to use for reading + raw_binary: whether to assume raw_binary (versions previous to v3) + that lacked file_identifier require this. + + Raises: + RuntimeError: When flatc cannot be invoked. + ValueError: When the extension is not json or bin. + + Returns: + A dictionary representing the read tflite model. + """ + raw_binary = ["--raw-binary"] if raw_binary else [] + with TemporaryDirectoryResource() as tempdir: + basename = os.path.basename(input_file) + basename_no_extension, extension = os.path.splitext(basename) + if extension in [".bin", ".tflite"]: + # Convert to json using flatc + returncode = subprocess.call([ + self._flatc_path, + "-t", + "--strict-json", + "--defaults-json", + ] + raw_binary + ["-o", tempdir, schema, "--", input_file]) + if returncode != 0: + raise RuntimeError("flatc failed to convert from binary to json.") + json_file = os.path.join(tempdir, basename_no_extension + ".json") + if not os.path.exists(json_file): + raise RuntimeError("Could not find %r" % json_file) + elif extension == ".json": + json_file = input_file + else: + raise ValueError("Invalid extension on input file %r" % input_file) + return json.load(open(json_file)) + + def _Write(self, data, output_file): + """Output a json or bin version of the flatbuffer model. + + Args: + data: Dict representing the TensorFlow Lite model to write. + output_file: filename to write the converted flatbuffer to. (json, + tflite, or bin extension is required). + Raises: + ValueError: When the extension is not json or bin + RuntimeError: When flatc fails to convert json data to binary. + """ + _, extension = os.path.splitext(output_file) + with TemporaryDirectoryResource() as tempdir: + if extension == ".json": + json.dump(data, open(output_file, "w"), sort_keys=True, indent=2) + elif extension in [".tflite", ".bin"]: + input_json = os.path.join(tempdir, "temp.json") + with open(input_json, "w") as fp: + json.dump(data, fp, sort_keys=True, indent=2) + returncode = subprocess.call([ + self._flatc_path, "-b", "--defaults-json", "--strict-json", "-o", + tempdir, self._new_schema, input_json + ]) + if returncode != 0: + raise RuntimeError("flatc failed to convert upgraded json to binary.") + + shutil.copy(os.path.join(tempdir, "temp.tflite"), output_file) + else: + raise ValueError("Invalid extension on output file %r" % output_file) + + def _Upgrade0To1(self, data): + """Upgrade data from Version 0 to Version 1. + + Changes: Added subgraphs (which contains a subset of formally global + entries). + + Args: + data: Dictionary representing the TensorFlow lite data to be upgraded. + This will be modified in-place to be an upgraded version. + """ + subgraph = {} + for key_to_promote in ["tensors", "operators", "inputs", "outputs"]: + subgraph[key_to_promote] = data[key_to_promote] + del data[key_to_promote] + data["subgraphs"] = [subgraph] + + def _Upgrade1To2(self, data): + """Upgrade data from Version 1 to Version 2. + + Changes: Rename operators to Conform to NN API. + + Args: + data: Dictionary representing the TensorFlow lite data to be upgraded. + This will be modified in-place to be an upgraded version. + Raises: + ValueError: Throws when model builtins are numeric rather than symbols. + """ + + def RemapOperator(opcode_name): + """Go from old schema op name to new schema op name. + + Args: + opcode_name: String representing the ops (see :schema.fbs). + Returns: + Converted opcode_name from V1 to V2. + """ + old_name_to_new_name = { + "CONVOLUTION": "CONV_2D", + "DEPTHWISE_CONVOLUTION": "DEPTHWISE_CONV_2D", + "AVERAGE_POOL": "AVERAGE_POOL_2D", + "MAX_POOL": "MAX_POOL_2D", + "L2_POOL": "L2_POOL_2D", + "SIGMOID": "LOGISTIC", + "L2NORM": "L2_NORMALIZATION", + "LOCAL_RESPONSE_NORM": "LOCAL_RESPONSE_NORMALIZATION", + "Basic_RNN": "RNN", + } + + return (old_name_to_new_name[opcode_name] + if opcode_name in old_name_to_new_name else opcode_name) + + def RemapOperatorType(operator_type): + """Remap operator structs from old names to new names. + + Args: + operator_type: String representing the builtin operator data type + string. + (see :schema.fbs). + Returns: + Upgraded builtin operator data type as a string. + """ + old_to_new = { + "PoolOptions": "Pool2DOptions", + "DepthwiseConvolutionOptions": "DepthwiseConv2DOptions", + "ConvolutionOptions": "Conv2DOptions", + "LocalResponseNormOptions": "LocalResponseNormalizationOptions", + "BasicRNNOptions": "RNNOptions", + } + return (old_to_new[operator_type] + if operator_type in old_to_new else operator_type) + + for subgraph in data["subgraphs"]: + for ops in subgraph["operators"]: + ops["builtin_options_type"] = RemapOperatorType( + ops["builtin_options_type"]) + + # Upgrade the operator codes + for operator_code in data["operator_codes"]: + if not isinstance(operator_code["builtin_code"], unicode): + raise ValueError("builtin_code %r is non-string. this usually means" + "your model has consistency problems." % + (operator_code["builtin_code"])) + operator_code["builtin_code"] = (RemapOperator( + operator_code["builtin_code"])) + + def _Upgrade2To3(self, data): + """Upgrade data from Version 2 to Version 3. + + Changed actual read-only tensor data to be in a buffers table instead + of inline with the tensor. + + Args: + data: Dictionary representing the TensorFlow lite data to be upgraded. + This will be modified in-place to be an upgraded version. + """ + buffers = [{"data": []}] # Start with 1 empty buffer + for subgraph in data["subgraphs"]: + if "tensors" not in subgraph: + continue + for tensor in subgraph["tensors"]: + if "data_buffer" not in tensor: + tensor["buffer"] = 0 + else: + if tensor["data_buffer"]: + tensor[u"buffer"] = len(buffers) + buffers.append({"data": tensor["data_buffer"]}) + else: + tensor["buffer"] = 0 + del tensor["data_buffer"] + data["buffers"] = buffers + + def _PerformUpgrade(self, data): + """Manipulate the `data` (parsed JSON) based on changes in format. + + This incrementally will upgrade from version to version within data. + + Args: + data: Dictionary representing the TensorFlow data. This will be upgraded + in place. + """ + while data["version"] < self._new_version: + self._upgrade_dispatch[data["version"]](data) + data["version"] += 1 + + def Convert(self, input_file, output_file): + """Perform schema conversion from input_file to output_file. + + Args: + input_file: Filename of TensorFlow Lite data to convert from. Must + be `.json` or `.bin` extension files for JSON or Binary forms of + the TensorFlow FlatBuffer schema. + output_file: Filename to write to. Extension also must be `.json` + or `.bin`. + + Raises: + RuntimeError: Generated when none of the upgrader supported schemas + matche the `input_file` data. + """ + # Read data in each schema (since they are incompatible). Version is + # always present. Use the read data that matches the version of the + # schema. + for version, schema, raw_binary, _ in self._schemas: + try: + data_candidate = self._Read(input_file, schema, raw_binary) + except RuntimeError: + continue # Skip and hope another schema works + if "version" not in data_candidate: # Assume version 1 if not present. + data_candidate["version"] = 1 + elif data_candidate["version"] == 0: # Version 0 doesn't exist in wild. + data_candidate["version"] = 1 + + if data_candidate["version"] == version: + self._PerformUpgrade(data_candidate) + self._Write(data_candidate, output_file) + return + raise RuntimeError("No schema that the converter understands worked with " + "the data file you provided.") + + +def main(argv): + del argv + Converter().Convert(FLAGS.input, FLAGS.output) + + +if __name__ == "__main__": + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/contrib/lite/schema/upgrade_schema_test.py b/tensorflow/contrib/lite/schema/upgrade_schema_test.py new file mode 100644 index 0000000000..475cdb9d8b --- /dev/null +++ b/tensorflow/contrib/lite/schema/upgrade_schema_test.py @@ -0,0 +1,317 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Testing for updating TensorFlow lite schema.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import tempfile +from tensorflow.contrib.lite.schema import upgrade_schema as upgrade_schema_lib +from tensorflow.python.framework import test_util +from tensorflow.python.platform import test as test_lib + +EMPTY_TEST_SCHEMA_V1 = { + "version": 1, + "operator_codes": [], + "subgraphs": [], +} + +EMPTY_TEST_SCHEMA_V3 = { + "version": 3, + "operator_codes": [], + "subgraphs": [], + "buffers": [{ + "data": [] + }] +} + +TEST_SCHEMA_V0 = { + "operator_codes": [], + "tensors": [], + "inputs": [], + "outputs": [], + "operators": [], + "version": 0 +} + +TEST_SCHEMA_V3 = { + "operator_codes": [], + "buffers": [{ + "data": [] + }], + "subgraphs": [{ + "tensors": [], + "inputs": [], + "outputs": [], + "operators": [], + }], + "version": + 3 +} + +FULL_TEST_SCHEMA_V1 = { + "version": + 1, + "operator_codes": [ + { + "builtin_code": "CONVOLUTION" + }, + { + "builtin_code": "DEPTHWISE_CONVOLUTION" + }, + { + "builtin_code": "AVERAGE_POOL" + }, + { + "builtin_code": "MAX_POOL" + }, + { + "builtin_code": "L2_POOL" + }, + { + "builtin_code": "SIGMOID" + }, + { + "builtin_code": "L2NORM" + }, + { + "builtin_code": "LOCAL_RESPONSE_NORM" + }, + { + "builtin_code": "ADD" + }, + { + "builtin_code": "Basic_RNN" + }, + ], + "subgraphs": [{ + "operators": [ + { + "builtin_options_type": "PoolOptions" + }, + { + "builtin_options_type": "DepthwiseConvolutionOptions" + }, + { + "builtin_options_type": "ConvolutionOptions" + }, + { + "builtin_options_type": "LocalResponseNormOptions" + }, + { + "builtin_options_type": "BasicRNNOptions" + }, + ], + }], + "description": + "", +} + +FULL_TEST_SCHEMA_V3 = { + "version": + 3, + "operator_codes": [ + { + "builtin_code": "CONV_2D" + }, + { + "builtin_code": "DEPTHWISE_CONV_2D" + }, + { + "builtin_code": "AVERAGE_POOL_2D" + }, + { + "builtin_code": "MAX_POOL_2D" + }, + { + "builtin_code": "L2_POOL_2D" + }, + { + "builtin_code": "LOGISTIC" + }, + { + "builtin_code": "L2_NORMALIZATION" + }, + { + "builtin_code": "LOCAL_RESPONSE_NORMALIZATION" + }, + { + "builtin_code": "ADD" + }, + { + "builtin_code": "RNN" + }, + ], + "subgraphs": [{ + "operators": [ + { + "builtin_options_type": "Pool2DOptions" + }, + { + "builtin_options_type": "DepthwiseConv2DOptions" + }, + { + "builtin_options_type": "Conv2DOptions" + }, + { + "builtin_options_type": "LocalResponseNormalizationOptions" + }, + { + "builtin_options_type": "RNNOptions" + }, + ], + }], + "description": + "", + "buffers": [{ + "data": [] + }] +} + +BUFFER_TEST_V2 = { + "operator_codes": [], + "buffers": [], + "subgraphs": [{ + "tensors": [ + { + "data_buffer": [1, 2, 3, 4] + }, + { + "data_buffer": [1, 2, 3, 4, 5, 6, 7, 8] + }, + { + "data_buffer": [] + }, + ], + "inputs": [], + "outputs": [], + "operators": [], + }], + "version": + 2 +} + +BUFFER_TEST_V3 = { + "operator_codes": [], + "subgraphs": [{ + "tensors": [ + { + "buffer": 1 + }, + { + "buffer": 2 + }, + { + "buffer": 0 + }, + ], + "inputs": [], + "outputs": [], + "operators": [], + }], + "buffers": [ + { + "data": [] + }, + { + "data": [1, 2, 3, 4] + }, + { + "data": [1, 2, 3, 4, 5, 6, 7, 8] + }, + ], + "version": + 3 +} + + +def JsonDumpAndFlush(data, fp): + """Write the dictionary `data` to a JSON file `fp` (and flush). + + Args: + data: in a dictionary that is JSON serializable. + fp: File-like object + """ + json.dump(data, fp) + fp.flush() + + +class TestSchemaUpgrade(test_util.TensorFlowTestCase): + + def testNonExistantFile(self): + converter = upgrade_schema_lib.Converter() + non_existent = tempfile.mktemp(suffix=".json") + with self.assertRaisesRegexp(IOError, "No such file or directory"): + converter.Convert(non_existent, non_existent) + + def testInvalidExtension(self): + converter = upgrade_schema_lib.Converter() + invalid_extension = tempfile.mktemp(suffix=".foo") + with self.assertRaisesRegexp(ValueError, "Invalid extension on input"): + converter.Convert(invalid_extension, invalid_extension) + with tempfile.NamedTemporaryFile(suffix=".json") as in_json: + JsonDumpAndFlush(EMPTY_TEST_SCHEMA_V1, in_json) + with self.assertRaisesRegexp(ValueError, "Invalid extension on output"): + converter.Convert(in_json.name, invalid_extension) + + def CheckConversion(self, data_old, data_expected): + """Given a data dictionary, test upgrading to current version. + + Args: + data_old: TFLite model as a dictionary (arbitrary version). + data_expected: TFLite model as a dictionary (upgraded). + """ + converter = upgrade_schema_lib.Converter() + with tempfile.NamedTemporaryFile(suffix=".json") as in_json, \ + tempfile.NamedTemporaryFile(suffix=".json") as out_json, \ + tempfile.NamedTemporaryFile(suffix=".bin") as out_bin, \ + tempfile.NamedTemporaryFile(suffix=".tflite") as out_tflite: + JsonDumpAndFlush(data_old, in_json) + # Test JSON output + converter.Convert(in_json.name, out_json.name) + # Test binary output + # Convert to .tflite and then to .bin and check if binary is equal + converter.Convert(in_json.name, out_tflite.name) + converter.Convert(out_tflite.name, out_bin.name) + self.assertEqual(open(out_bin.name).read(), open(out_tflite.name).read()) + # Test that conversion actually produced successful new json. + converted_schema = json.load(out_json) + self.assertEqual(converted_schema, data_expected) + + def testAlreadyUpgraded(self): + """A file already at version 3 should stay at version 3.""" + self.CheckConversion(EMPTY_TEST_SCHEMA_V3, EMPTY_TEST_SCHEMA_V3) + self.CheckConversion(TEST_SCHEMA_V3, TEST_SCHEMA_V3) + self.CheckConversion(BUFFER_TEST_V3, BUFFER_TEST_V3) + + # Disable this while we have incorrectly versioned structures around. + # def testV0Upgrade_IntroducesSubgraphs(self): + # """V0 did not have subgraphs; check to make sure they get introduced.""" + # self.CheckConversion(TEST_SCHEMA_V0, TEST_SCHEMA_V3) + + def testV1Upgrade_RenameOps(self): + """V1 had many different names for ops; check to make sure they rename.""" + self.CheckConversion(EMPTY_TEST_SCHEMA_V1, EMPTY_TEST_SCHEMA_V3) + self.CheckConversion(FULL_TEST_SCHEMA_V1, FULL_TEST_SCHEMA_V3) + + def testV2Upgrade_CreateBuffers(self): + """V2 did not have buffers; check to make sure they are created.""" + self.CheckConversion(BUFFER_TEST_V2, BUFFER_TEST_V3) + + +if __name__ == "__main__": + test_lib.main() diff --git a/tensorflow/contrib/lite/simple_memory_arena.cc b/tensorflow/contrib/lite/simple_memory_arena.cc new file mode 100644 index 0000000000..4aab244989 --- /dev/null +++ b/tensorflow/contrib/lite/simple_memory_arena.cc @@ -0,0 +1,136 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/simple_memory_arena.h" + +#include +#include +#include + +namespace { + +template +T AlignTo(size_t alignment, T offset) { + return offset % alignment == 0 ? offset + : offset + (alignment - offset % alignment); +} + +} // namespace + +namespace tflite { + +TfLiteStatus SimpleMemoryArena::Allocate(TfLiteContext* context, + size_t alignment, size_t size, + ArenaAlloc* new_alloc) { + TF_LITE_ENSURE(context, alignment < arena_alignment_); + + size_t current_top = 0; + + if (!allocs_.empty()) { + auto last = allocs_.rbegin(); + current_top = last->offset + last->size; + } + + // If we don't find a better gap just allocate at the end of the buffer. + size_t best_offset = AlignTo(alignment, current_top); + size_t best_offset_fit = std::numeric_limits::max(); + auto best_insertion_it = allocs_.end(); + + // Go through the sorted allocs and look at the gaps between them. + size_t current_offset = 0; + for (auto it = allocs_.begin(); it != allocs_.end(); ++it) { + size_t aligned_current_offset = AlignTo(alignment, current_offset); + // If we found a gap larger than required size, and smaller than previous + // best fit, take it. + if (aligned_current_offset + size <= it->offset && + it->offset - current_offset < best_offset_fit) { + best_offset = aligned_current_offset; + best_offset_fit = it->offset - current_offset; + best_insertion_it = it; + } + current_offset = it->offset + it->size; + } + + // Update the required buffer size. + high_water_mark_ = std::max(high_water_mark_, best_offset + size); + + new_alloc->offset = best_offset; + new_alloc->size = size; + allocs_.insert(best_insertion_it, *new_alloc); + + return kTfLiteOk; +} + +TfLiteStatus SimpleMemoryArena::Deallocate(TfLiteContext* context, + const ArenaAlloc& alloc) { + int erased_allocs_count = 0; + auto it = allocs_.begin(); + while (it != allocs_.end()) { + if (it->offset == alloc.offset) { + TF_LITE_ENSURE_EQ(context, it->size, alloc.size); + erased_allocs_count++; + it = allocs_.erase(it); + } else { + ++it; + } + } + TF_LITE_ENSURE_EQ(context, erased_allocs_count, 1); + return kTfLiteOk; +} + +TfLiteStatus SimpleMemoryArena::Commit(TfLiteContext* context) { + size_t required_size = RequiredBufferSize(); + if (required_size > underlying_buffer_size_) { + char* new_alloc = new char[required_size]; + char* new_underlying_buffer_aligned_ptr = reinterpret_cast( + AlignTo(arena_alignment_, reinterpret_cast(new_alloc))); + + // If the arena had been previously allocated, copy over the old memory. + // Since Alloc pointers are offset based, they will remain valid in the new + // memory block. + if (high_water_mark_ > 0 && underlying_buffer_size_ > 0) { + size_t copy_amount = std::min( + underlying_buffer_.get() + underlying_buffer_size_ - + underlying_buffer_aligned_ptr_, + new_alloc + required_size - new_underlying_buffer_aligned_ptr); + memcpy(new_underlying_buffer_aligned_ptr, underlying_buffer_aligned_ptr_, + copy_amount); + } + + underlying_buffer_.reset(new_alloc); + underlying_buffer_size_ = required_size; + underlying_buffer_aligned_ptr_ = new_underlying_buffer_aligned_ptr; + } + commited_ = true; + return underlying_buffer_ != nullptr ? kTfLiteOk : kTfLiteError; +} + +TfLiteStatus SimpleMemoryArena::ResolveAlloc(TfLiteContext* context, + const ArenaAlloc& alloc, + char** output_ptr) { + TF_LITE_ENSURE(context, commited_); + TF_LITE_ENSURE(context, output_ptr != nullptr); + *output_ptr = underlying_buffer_aligned_ptr_ + alloc.offset; + return kTfLiteOk; +} + +TfLiteStatus SimpleMemoryArena::Clear() { + commited_ = false; + high_water_mark_ = 0; + allocs_.clear(); + return kTfLiteOk; +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/simple_memory_arena.h b/tensorflow/contrib/lite/simple_memory_arena.h new file mode 100644 index 0000000000..0d0b7f9ff7 --- /dev/null +++ b/tensorflow/contrib/lite/simple_memory_arena.h @@ -0,0 +1,84 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_SIMPLE_MEMORY_ARENA_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_SIMPLE_MEMORY_ARENA_H_ + +#include +#include +#include "tensorflow/contrib/lite/context.h" + +namespace tflite { + +// This little structure holds the offset and the size for a dynamic memory +// allocation in the memory arena. When the arena is commited and the +// underlying buffer is set, the alloc can be resolved into an actual memory +// pointer. +struct ArenaAlloc { + ArenaAlloc() : offset(0), size(0) {} + + size_t offset; + size_t size; + + inline bool operator<(const ArenaAlloc& other) const { + return offset < other.offset; + } +}; + +// This small class is responsible for allocating, dealocating and reusing +// dynamic memory from a common underlying buffer. The arena can be used in +// scenarios when the pattern of memory allocations and dealocations is +// repetitive, e.g. running NN inference in multiple iterations. +class SimpleMemoryArena { + public: + explicit SimpleMemoryArena(size_t arena_alignment) + : commited_(false), + arena_alignment_(arena_alignment), + high_water_mark_(0), + underlying_buffer_size_(0), + allocs_() {} + + TfLiteStatus Allocate(TfLiteContext* context, size_t alignment, size_t size, + ArenaAlloc* new_alloc); + + TfLiteStatus Deallocate(TfLiteContext* context, const ArenaAlloc& alloc); + + inline size_t RequiredBufferSize() { + // Add in a small amount of padding to reduce the chance of resize events + // for small allocations. + size_t padding = arena_alignment_; + return arena_alignment_ + high_water_mark_ + padding; + } + + TfLiteStatus Commit(TfLiteContext* context); + + TfLiteStatus ResolveAlloc(TfLiteContext* context, const ArenaAlloc& alloc, + char** output_ptr); + + TfLiteStatus Clear(); + + private: + bool commited_; + size_t arena_alignment_; + size_t high_water_mark_; + std::unique_ptr underlying_buffer_; + size_t underlying_buffer_size_; + char* underlying_buffer_aligned_ptr_; + // TODO(maciekc): add list iterator to the ArenaAlloc to lookup quickly. + std::list allocs_; +}; + +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_SIMPLE_MEMORY_ARENA_H_ diff --git a/tensorflow/contrib/lite/simple_memory_arena_test.cc b/tensorflow/contrib/lite/simple_memory_arena_test.cc new file mode 100644 index 0000000000..ac676092c6 --- /dev/null +++ b/tensorflow/contrib/lite/simple_memory_arena_test.cc @@ -0,0 +1,91 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/simple_memory_arena.h" + +#include +#include + +namespace tflite { +namespace { + +TEST(SimpleMemoryArenaTest, BasicArenaOperations) { + TfLiteContext context; + SimpleMemoryArena arena(64); + ArenaAlloc allocs[6]; + + arena.Allocate(&context, 32, 2047, &allocs[0]); + arena.Allocate(&context, 32, 2047, &allocs[1]); + arena.Allocate(&context, 32, 2047, &allocs[2]); + arena.Deallocate(&context, allocs[0]); + arena.Allocate(&context, 32, 1023, &allocs[3]); + arena.Allocate(&context, 32, 2047, &allocs[4]); + arena.Deallocate(&context, allocs[1]); + arena.Allocate(&context, 32, 1023, &allocs[5]); + + EXPECT_EQ(allocs[0].offset, 0); + EXPECT_EQ(allocs[1].offset, 2048); + EXPECT_EQ(allocs[2].offset, 4096); + EXPECT_EQ(allocs[3].offset, 0); + EXPECT_EQ(allocs[4].offset, 6144); + EXPECT_EQ(allocs[5].offset, 1024); +} + +TEST(SimpleMemoryArenaTest, TestAfterClear) { + TfLiteContext context; + SimpleMemoryArena arena(64); + ArenaAlloc allocs[9]; + + arena.Allocate(&context, 32, 2047, &allocs[0]); + arena.Allocate(&context, 32, 2047, &allocs[1]); + arena.Allocate(&context, 32, 2047, &allocs[2]); + arena.Commit(&context); + + EXPECT_EQ(allocs[0].offset, 0); + EXPECT_EQ(allocs[1].offset, 2048); + EXPECT_EQ(allocs[2].offset, 4096); + + arena.Clear(); + + // Test with smaller allocs. + arena.Allocate(&context, 32, 1023, &allocs[3]); + arena.Allocate(&context, 32, 1023, &allocs[4]); + arena.Allocate(&context, 32, 1023, &allocs[5]); + arena.Commit(&context); + + EXPECT_EQ(allocs[3].offset, 0); + EXPECT_EQ(allocs[4].offset, 1024); + EXPECT_EQ(allocs[5].offset, 2048); + + arena.Clear(); + + // Test larger allocs which should require a reallocation. + arena.Allocate(&context, 32, 4095, &allocs[6]); + arena.Allocate(&context, 32, 4095, &allocs[7]); + arena.Allocate(&context, 32, 4095, &allocs[8]); + arena.Commit(&context); + + EXPECT_EQ(allocs[6].offset, 0); + EXPECT_EQ(allocs[7].offset, 4096); + EXPECT_EQ(allocs[8].offset, 8192); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/string.h b/tensorflow/contrib/lite/string.h new file mode 100644 index 0000000000..ecd6f04ec2 --- /dev/null +++ b/tensorflow/contrib/lite/string.h @@ -0,0 +1,30 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Abstract string. We don't want even absl at this level. +#ifndef _THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_STRING_H_ +#define _THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_STRING_H_ + +#include +#include "tensorflow/core/platform/platform.h" + +namespace tflite { + +#ifndef PLATFORM_GOOGLE +using std::string; +#endif + +} // namespace tflite + +#endif // _THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_STRING_H_ diff --git a/tensorflow/contrib/lite/string_util.cc b/tensorflow/contrib/lite/string_util.cc new file mode 100644 index 0000000000..cd41299d38 --- /dev/null +++ b/tensorflow/contrib/lite/string_util.cc @@ -0,0 +1,117 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/string_util.h" + +#include +#include +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/interpreter.h" + +namespace tflite { +namespace { + +// Convenient method to get pointer to int32_t. +int32_t* GetIntPtr(char* ptr) { return reinterpret_cast(ptr); } +} // namespace + +void DynamicBuffer::AddString(const char* str, size_t len) { + data_.resize(data_.size() + len); + memcpy(data_.data() + offset_.back(), str, len); + offset_.push_back(offset_.back() + len); +} + +void DynamicBuffer::AddString(const StringRef& string) { + AddString(string.str, string.len); +} + +void DynamicBuffer::AddJoinedString(const std::vector& strings, + char separator) { + // Resize the data buffer. + int total_len = strings.size() - 1; + for (StringRef ref : strings) { + total_len += ref.len; + } + data_.resize(data_.size() + total_len); + + int current_idx = 0; + for (StringRef ref : strings) { + char* dst = data_.data() + offset_.back() + current_idx; + + // Fill separator if not first string. + if (current_idx != 0) { + *dst = separator; + ++dst; + ++current_idx; + } + + // Fill content of the string. + memcpy(dst, ref.str, ref.len); + current_idx += ref.len; + } + offset_.push_back(offset_.back() + total_len); +} + +void DynamicBuffer::WriteToTensor(TfLiteTensor* tensor) { + // Allocate sufficient memory to tensor buffer. + int32_t num_strings = offset_.size() - 1; + // Total bytes include: + // * size of content (data_.size) + // * offset of each tensor (sizeof(int32_t) * num_strings) + // * length of whole buffer (int32_t) + // * num of strings (int32_t). + int32_t bytes = data_.size() // size of content + + sizeof(int32_t) * (num_strings + 2); // size of header + + // Output tensor will take over the ownership of tensor_buffer, and free it + // during Interpreter destruction. + char* tensor_buffer = static_cast(malloc(bytes)); + + // Set num of string + memcpy(tensor_buffer, &num_strings, sizeof(int32_t)); + + // Set offset of strings. + int32_t start = sizeof(int32_t) * (num_strings + 2); + for (int i = 0; i < offset_.size(); i++) { + int32_t offset = start + offset_[i]; + memcpy(tensor_buffer + sizeof(int32_t) * (i + 1), &offset, sizeof(int32_t)); + } + + // Copy data of strings. + memcpy(tensor_buffer + start, data_.data(), data_.size()); + + // Set tensor content pointer to tensor_buffer, and release original data. + auto dims = TfLiteIntArrayCreate(1); + dims->data[0] = num_strings; + TfLiteTensorReset(tensor->type, tensor->name, dims, tensor->params, + tensor_buffer, bytes, kTfLiteDynamic, tensor->allocation, + tensor); +} + +int GetStringCount(const TfLiteTensor* tensor) { + // The first integers in the raw buffer is the number of strings. + return *GetIntPtr(tensor->data.raw); +} + +StringRef GetString(const TfLiteTensor* tensor, int string_index) { + int32_t* offset = + GetIntPtr(tensor->data.raw + sizeof(int32_t) * (string_index + 1)); + return { + tensor->data.raw + (*offset), + (*(offset + 1)) - (*offset), + }; +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/string_util.h b/tensorflow/contrib/lite/string_util.h new file mode 100644 index 0000000000..12872d1123 --- /dev/null +++ b/tensorflow/contrib/lite/string_util.h @@ -0,0 +1,91 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Util methods to read and write String tensors. +// String tensors are considered to be char tensor with protocol. +// [0, 3] 4 bytes: N, num of strings in the tensor in little endian. +// [(i+1)*4, (i+1)*4+3] 4 bytes: offset of i-th string in little endian. +// [(N+2)*4, (N+2)*4+3] 4 bytes: length of the whole char buffer. +// [offset(i), offset(i+1) - 1] : content of i-th string. +// Example of a string tensor: +// [ +// 2, 0, 0, 0, # 2 strings. +// 16, 0, 0, 0, # 0-th string starts from index 12. +// 18, 0, 0, 0, # 1-st string starts from index 18. +// 18, 0, 0, 0, # total length of array. +// 'A', 'B', # 0-th string [16..17]: "AB" +// ] # 1-th string, empty +// +// A typical usage: +// In op.Eval(context, node): +// DynamicBuffer buf; +// # Add string "AB" to tensor, string is stored in dynamic buffer. +// buf.AddString("AB", 2); +// # Write content of DynamicBuffer to tensor in format of string tensor +// # described above. +// buf.WriteToTensor(tensor) + +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_STRING_UTIL_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_STRING_UTIL_H_ + +#include + +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/string.h" + +namespace tflite { + +// Convenient structure to store string pointer and length. +typedef struct { + char* str; + int len; +} StringRef; + +// DynamicBuffer holds temporary buffer that will be used to create a dynamic +// tensor. A typical usage is to initialize a DynamicBuffer object, fill in +// content and call CreateStringTensor in op.Eval(). +class DynamicBuffer { + public: + DynamicBuffer() : offset_({0}) {} + + // Add string to dynamic buffer by resizing the buffer and copying the data. + void AddString(const StringRef& string); + + // Add string to dynamic buffer by resizing the buffer and copying the data. + void AddString(const char* str, size_t len); + + // Join a list of string with separator, and add as a single string to the + // buffer. + void AddJoinedString(const std::vector& strings, char separator); + + // Fill content into a string tensor. + void WriteToTensor(TfLiteTensor* tensor); + + private: + // Data buffer to store contents of strings, not including headers. + std::vector data_; + // Offset of the starting index of each string in data buffer. + std::vector offset_; +}; + +// Return num of strings in a String tensor. +int GetStringCount(const TfLiteTensor* tensor); + +// Get String pointer and length of index-th string in tensor. +// NOTE: This will not create a copy of string data. +StringRef GetString(const TfLiteTensor* tensor, int string_index); +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_STRING_UTIL_H_ diff --git a/tensorflow/contrib/lite/string_util_test.cc b/tensorflow/contrib/lite/string_util_test.cc new file mode 100644 index 0000000000..5c351638dc --- /dev/null +++ b/tensorflow/contrib/lite/string_util_test.cc @@ -0,0 +1,117 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/string_util.h" + +#include +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/interpreter.h" + +namespace tflite { + +TEST(StringUtil, TestStringUtil) { + Interpreter interpreter; + interpreter.AddTensors(3); + + TfLiteTensor* t0 = interpreter.tensor(0); + t0->type = kTfLiteString; + t0->allocation_type = kTfLiteDynamic; + + TfLiteTensor* t1 = interpreter.tensor(1); + t1->type = kTfLiteString; + t1->allocation_type = kTfLiteDynamic; + + char data[] = {1, 0, 0, 0, 12, 0, 0, 0, 15, 0, 0, 0, 'X', 'Y', 'Z'}; + + interpreter.SetTensorParametersReadOnly(2, kTfLiteString, "", {1}, {}, data, + 15); + TfLiteTensor* t2 = interpreter.tensor(2); + interpreter.AllocateTensors(); + + char s0[] = "ABC"; + string s1 = "DEFG"; + char s2[] = ""; + + // Write strings to tensors + DynamicBuffer buf0; + buf0.AddString(s0, 3); + DynamicBuffer buf1; + buf1.AddString(s1.data(), s1.length()); + buf0.AddString(s2, 0); + buf0.WriteToTensor(t0); + buf1.WriteToTensor(t1); + + // Read strings from tensors. + ASSERT_EQ(GetStringCount(t0), 2); + StringRef str_ref; + str_ref = GetString(t0, 0); + ASSERT_EQ(string(str_ref.str, str_ref.len), "ABC"); + str_ref = GetString(t0, 1); + ASSERT_EQ(string(str_ref.str, str_ref.len), ""); + ASSERT_EQ(t0->bytes, 19); + + ASSERT_EQ(GetStringCount(t1), 1); + str_ref = GetString(t1, 0); + ASSERT_EQ(string(str_ref.str, str_ref.len), "DEFG"); + ASSERT_EQ(t1->bytes, 16); + + ASSERT_EQ(GetStringCount(t2), 1); + str_ref = GetString(t2, 0); + ASSERT_EQ(string(str_ref.str, str_ref.len), "XYZ"); + ASSERT_EQ(t2->bytes, 15); +} + +TEST(StringUtil, TestAddJoinedString) { + Interpreter interpreter; + interpreter.AddTensors(1); + TfLiteTensor* t0 = interpreter.tensor(0); + t0->type = kTfLiteString; + t0->allocation_type = kTfLiteDynamic; + + char s0[] = "ABC"; + char s1[] = "DEFG"; + char s2[] = ""; + char s3[] = "XYZ"; + + DynamicBuffer buf; + buf.AddJoinedString({{s0, 3}, {s1, 4}, {s2, 0}, {s3, 3}}, ' '); + buf.WriteToTensor(t0); + + ASSERT_EQ(GetStringCount(t0), 1); + StringRef str_ref; + str_ref = GetString(t0, 0); + ASSERT_EQ(string(str_ref.str, str_ref.len), "ABC DEFG XYZ"); + ASSERT_EQ(t0->bytes, 25); +} + +TEST(StringUtil, TestEmptyList) { + Interpreter interpreter; + interpreter.AddTensors(1); + TfLiteTensor* t0 = interpreter.tensor(0); + t0->type = kTfLiteString; + t0->allocation_type = kTfLiteDynamic; + DynamicBuffer buf; + buf.WriteToTensor(t0); + + ASSERT_EQ(GetStringCount(t0), 0); + ASSERT_EQ(t0->bytes, 8); +} + +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/testdata/0_subgraphs.bin b/tensorflow/contrib/lite/testdata/0_subgraphs.bin new file mode 100644 index 0000000000000000000000000000000000000000..5606898d7fd50aa25f7c4be692d2308bcea7c87d GIT binary patch literal 80 zcmb1OU|-Fjx?wu7ZJqA%O^WnILx%p)Q4ifnfs?>XI247;X@u zE{OpYUOZrbgF+G%x*&hz^Lru#0|OHS0|P4q0|Og09zgyyfrj6r|Ns9pFfee7Fn~;C zU;vo~3Nw&ekUmftg2J`r|NsBz(A3H>K;jW(Hb^Z4)JzrzHmDmx=>TLd4+A?i9bmHy vWIjkg$X*cr;Q#;sATf}6FfoujKx~kISr|YNVmBxZKtAJO0NDyj6Ciy6)UYsP literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/lite/testdata/multi_add.json b/tensorflow/contrib/lite/testdata/multi_add.json new file mode 100644 index 0000000000..97b931dba8 --- /dev/null +++ b/tensorflow/contrib/lite/testdata/multi_add.json @@ -0,0 +1,46 @@ +{ + "version": 1, + "operator_codes": [ + { + "builtin_code": "ADD" + } + ], + "subgraphs": [ + { + "tensors": [ + { "shape": [ 1, 8, 8, 3 ], "name": "a" }, + { "shape": [ 1, 8, 8, 3 ], "name": "b" }, + { "shape": [ 1, 8, 8, 3 ], "name": "c" }, + { "shape": [ 1, 8, 8, 3 ], "name": "d" }, + { "shape": [ 1, 8, 8, 3 ], "name": "i" }, + { "shape": [ 1, 8, 8, 3 ], "name": "x" }, + { "shape": [ 1, 8, 8, 3 ], "name": "y" } + ], + "inputs": [ 0, 1, 2, 3 ], + "outputs": [ 5, 6 ], + "operators": [ + { + "inputs": [ 1, 2 ], + "outputs": [ 4 ], + "builtin_options_type": "AddOptions", + "builtin_options": { + } + }, + { + "inputs": [ 0, 4 ], + "outputs": [ 5 ], + "builtin_options_type": "AddOptions", + "builtin_options": { + } + }, + { + "inputs": [ 3, 4 ], + "outputs": [ 6 ], + "builtin_options_type": "AddOptions", + "builtin_options": { + } + } + ] + } + ] +} diff --git a/tensorflow/contrib/lite/testdata/no_subgraphs.bin b/tensorflow/contrib/lite/testdata/no_subgraphs.bin new file mode 100644 index 0000000000000000000000000000000000000000..5606898d7fd50aa25f7c4be692d2308bcea7c87d GIT binary patch literal 80 zcmb1OU|jKWMJT6U|^WR$iTn|mSJGv zfvXgNs%Bu&VPs&~!N9<9fPsO5g@J*ggMon|f`NfS1Ij<}|NnoGIUv(P=77uwsmm`d zF=Aj~V0eIA-Vh|u!@vVJ7vvg{8R%xQF)%P><`tBd7=qj<09Fg~3rH;pgY1OSAU0gR z0RsaQ0|bNI1Y$ct{mH<<#Q-u1WDXYt!v7$5gUkS_2hkusAhjU#K_LiID**L3Odg^J zVg@?fgaPDch|ky=IKbh}z`&pYvIk^VNosLPW?p)HegP;X*cdn%*cm|X2SpLcECr}t Z@BaV)57Gm|AbS}YKx$dQYC&Qk-2epXATs~} literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/lite/testdata/test_model_broken.bin b/tensorflow/contrib/lite/testdata/test_model_broken.bin new file mode 100644 index 0000000000000000000000000000000000000000..9fd050cd4a82a89c00aa3e1c6fac0e05223a285c GIT binary patch literal 432 zcmWe(00BM*2?iDh4h9|u0R|BUJ_ZH`9|i`74n_us3PuJ74h9AW7DfgJCI$uu0|o{L z9tMyYSQH}1z`(%8z`#(FT3nKummZ&A0Fq|`84Q=7!N9-}1Jw_bY5V{GzYN$MkXo=( z29S7uX^9bt$H>6Iz|6qFunxC6Lu7T=u&85WU|`71D<~~7WMF`q$H2hHAON-rotJr1^FF>Js21mKw%~THU(k^$Q=R<5Op9LW(LUpAhSUF1i+yN e3S*FJ1_qEi5QeD*sR3b-EKCoG4>AKp!^8n}j~v?o literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/lite/testdata/test_model_broken.json b/tensorflow/contrib/lite/testdata/test_model_broken.json new file mode 100644 index 0000000000..b701eb9a25 --- /dev/null +++ b/tensorflow/contrib/lite/testdata/test_model_broken.json @@ -0,0 +1,62 @@ +{ + "subgraphs": [ + { + "inputs": [0, 1], + "outputs": [2, 3], + "operators": [ + { + "opcode_index": 0, + "inputs": [0,1], + "outputs": [2] + }, + { + "opcode_index": 1, + "inputs": [2], + "outputs": [3] + } + ], + "tensors": [ + { + "shape" : [ + 2 + ], + "type" : "FLOAT32", + "name" : "input0", + "data_buffer" : [1,0,0,0] + }, + { + "shape" : [ + 3 + ], + "type" : "FLOAT32", + "name" : "input1", + "data_buffer" : [] + }, + { + "shape" : [ + 3 + ], + "type" : "FLOAT32", + "name" : "out1", + "data_buffer" : [] + }, + { + "shape" : [ + 3 + ], + "type" : "FLOAT32", + "name" : "out2", + "data_buffer" : [] + } + ], + } + ], + "operator_codes": [ + { + "builtin_code": 0 + }, + { + "custom_code": "testing_op" + } + ] +} diff --git a/tensorflow/contrib/lite/testdata/two_subgraphs.bin b/tensorflow/contrib/lite/testdata/two_subgraphs.bin new file mode 100644 index 0000000000000000000000000000000000000000..07308ba62b2db533bb541c47872ba9f239e8b045 GIT binary patch literal 172 zcmb1OU| zipped + +bazel run //tensorflow/contrib/lite/testing:generate_examples + third_party/tensorflow/contrib/lite/testing/generated_examples zipped +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import itertools +import os +import re +import sys +import tempfile +import traceback +import zipfile +import numpy as np +from six import StringIO +import tensorflow as tf +from google.protobuf import text_format +# TODO(aselle): switch to TensorFlow's resource_loader +from tensorflow.contrib.lite.testing import generate_examples_report as report_lib +from tensorflow.python.framework import graph_util as tf_graph_util + +parser = argparse.ArgumentParser(description="Script to generate TFLite tests.") +parser.add_argument("output_path", + help="Directory where the outputs will be go.") +# TODO(ahentz): remove this flag +parser.add_argument("type", help="zipped") +parser.add_argument("--zip_to_output", + type=str, + help="Particular zip to output.", + required=False) +parser.add_argument("--toco", + type=str, + help="Path to toco tool.", + required=True) +parser.add_argument( + "--known_bugs_are_errors", + action="store_true", + help=("If a particular model is affected by a known bug," + " count it as a toco error.")) +parser.add_argument( + "--ignore_toco_errors", + action="store_true", + help="Raise an exception if any toco error is encountered.") +parser.add_argument( + "--save_graphdefs", + action="store_true", + help="Include intermediate graphdefs in the output zip files.") + + +RANDOM_SEED = 342 +TEST_INPUT_DEPTH = 3 + + +# A map from regular expression to bug number. Any test failure with label +# matching the expression will be considered due to the corresponding bug. +KNOWN_BUGS = { + # TOCO doesn't support scalars as input. + r"relu.*input_shape=\[\]": "67587484", + r"sigmoid.*input_shape=\[\]": "67645668", + # Concat doesn't work with a single input tensor + r"concat.*num_tensors=1": "67378344", + # Transposition in MatMul is not supported. + r"fully_connected.*transpose_.=True": "67586970", + # Softmax graphs are too complex. + r"softmax.*dim=0": "67749831", + r"softmax.*input_shape=\[1,3,4,3\]": "67749831", + # SpaceToDepth only supports float32. + r"space_to_depth.*(float16|int32|uint8|int64)": "68018134", +} + + +def toco_options(data_types, + input_arrays, + output_arrays, + shapes, + drop_control_dependency): + """Create TOCO options to process a model. + + Args: + data_types: input and inference types used by TOCO. + input_arrays: names of the input tensors + output_arrays: name of the output tensors + shapes: shapes of the input tensors + drop_control_dependency: whether to ignore control dependency nodes. + + Returns: + the options in a string. + """ + shape_str = ":".join([",".join(str(y) for y in x) for x in shapes]) + inference_type = "FLOAT" + # TODO(ahentz): if we get multi-input quantization to work we need this + # to change + if data_types[0] == "QUANTIZED_UINT8": + inference_type = "QUANTIZED_UINT8" + s = (" --input_types=%s" % ",".join(data_types) + + " --inference_type=%s" % inference_type + + " --input_format=TENSORFLOW_GRAPHDEF" + " --output_format=TFLITE" + + " --input_arrays=%s" % ",".join(input_arrays) + + " --input_shapes=%s" % shape_str + + " --output_arrays=%s" % ",".join(output_arrays)) + if drop_control_dependency: + s += " --drop_control_dependency" + return s + + +def write_toco_options(filename, + data_types, + input_arrays, + output_arrays, + shapes, + drop_control_dependency=False): + """Create TOCO options to process a model. + + Args: + filename: Filename to write the options to. + data_types: input and inference types used by TOCO. + input_arrays: names of the input tensors + output_arrays: names of the output tensors + shapes: shapes of the input tensors + drop_control_dependency: whether to ignore control dependency nodes. + """ + with open(filename, "w") as fp: + fp.write( + toco_options( + data_types=data_types, + input_arrays=input_arrays, + output_arrays=output_arrays, + shapes=shapes, + drop_control_dependency=drop_control_dependency)) + + +def write_examples(fp, examples): + """Given a list `examples`, write a text format representation. + + The file format is csv like with a simple repeated pattern. We would ike + to use proto here, but we can't yet due to interfacing with the Android + team using this format. + + Args: + fp: File-like object to write to. + examples: Example dictionary consiting of keys "inputs" and "outputs" + """ + + def write_tensor(fp, x): + """Write tensor in file format supported by TFLITE example.""" + fp.write("dtype,%s\n" % x.dtype) + fp.write("shape," + ",".join(map(str, x.shape)) + "\n") + # Output 9 digits after the point to ensure the precision is good enough. + values = ["{:.9f}".format(value) for value in list(x.flatten())] + fp.write("values," + ",".join(values) + "\n") + + fp.write("test_cases,%d\n" % len(examples)) + for example in examples: + fp.write("inputs,%d\n" % len(example["inputs"])) + for i in example["inputs"]: + write_tensor(fp, i) + fp.write("outputs,%d\n" % len(example["outputs"])) + for i in example["outputs"]: + write_tensor(fp, i) + + +def write_test_cases(fp, model_name, examples): + """Given a dictionary of `examples`, write a text format representation. + + The file format is protocol-buffer-like, even though we don't use proto due + to the needs of the Android team. + + Args: + fp: File-like object to write to. + model_name: Filename where the model was written to, relative to filename. + examples: Example dictionary consiting of keys "inputs" and "outputs" + """ + + fp.write("load_model: %s\n" % os.path.basename(model_name)) + for example in examples: + fp.write("reshape {\n") + for t in example["inputs"]: + fp.write(" input: \"" + ",".join(map(str, t.shape)) + "\"\n") + fp.write("}\n") + fp.write("invoke {\n") + + for t in example["inputs"]: + values = ["{:.9f}".format(value) for value in list(t.flatten())] + fp.write(" input: \"" + ",".join(values) + "\"\n") + for t in example["outputs"]: + values = ["{:.9f}".format(value) for value in list(t.flatten())] + fp.write(" output: \"" + ",".join(values) + "\"\n") + fp.write("}\n") + + +_TF_TYPE_INFO = { + tf.float32: (np.float32, "FLOAT"), + tf.float16: (np.float16, "FLOAT"), + tf.int32: (np.int32, "INT32"), + tf.uint8: (np.uint8, "QUANTIZED_UINT8"), + tf.int64: (np.int64, "INT64"), +} + + +def create_tensor_data(dtype, shape, min_value=-100, max_value=100): + """Build tensor data spreading the range [min_value, max_value).""" + + if dtype in _TF_TYPE_INFO: + dtype = _TF_TYPE_INFO[dtype][0] + + if dtype in (tf.float32, tf.float16): + value = (max_value-min_value)*np.random.random_sample(shape)+min_value + elif dtype in (tf.int32, tf.uint8, tf.int64): + value = np.random.random_integers(min_value, max_value, shape) + return value.astype(dtype) + + +def freeze_graph(session, outputs): + """Freeze the current graph. + + Args: + session: Tensorflow sessions containing the graph + outputs: List of output tensors + + Returns: + The frozen graph_def. + """ + return tf_graph_util.convert_variables_to_constants( + session, session.graph.as_graph_def(), [x.op.name for x in outputs]) + + +def make_control_dep_tests(zip_path): + """Make a set of tests that use control dependencies.""" + + test_parameters = [{ + "input_shape": [[], [1, 1, 1, 1], [1, 15, 14, 1], [3, 15, 14, 3]], + }] + + def build_graph(parameters): + input_tensor = tf.placeholder( + dtype=tf.float32, name="input", shape=parameters["input_shape"]) + filter_value = tf.zeros((3, 3, TEST_INPUT_DEPTH, 8), tf.float32) + assert_op = tf.assert_greater_equal(input_tensor, input_tensor - 1) + with tf.control_dependencies([assert_op]): + out = tf.nn.conv2d(input_tensor, filter_value, + strides=(1, 1, 1, 1), padding="SAME") + return [input_tensor], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_values = create_tensor_data(tf.float32, parameters["input_shape"]) + return [input_values], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_values]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs, + drop_control_dependency=True) + + +def toco_convert(graph_def_str, input_tensors, output_tensors, + drop_control_dependency=False): + """Convert a model's graph def into a tflite model. + + NOTE: this currently shells out to the toco binary, but we would like + convert to Python API tooling in the future. + + Args: + graph_def_str: Graph def proto in serialized string format. + input_tensors: List of input tensor tuples `(name, shape, type)` + output_tensors: List of output tensors (names) + drop_control_dependency: whether to ignore control dependency nodes. + + Returns: + output tflite model, log_txt from conversion + or None, log_txt if it did not convert properly. + """ + data_types = [_TF_TYPE_INFO[x[2]][1] for x in input_tensors] + opts = toco_options( + data_types=data_types, + input_arrays=[x[0] for x in input_tensors], + shapes=[x[1] for x in input_tensors], + output_arrays=output_tensors, + drop_control_dependency=drop_control_dependency) + + with tempfile.NamedTemporaryFile() as graphdef_file, \ + tempfile.NamedTemporaryFile() as output_file, \ + tempfile.NamedTemporaryFile("w+") as stdout_file: + graphdef_file.write(graph_def_str) + graphdef_file.flush() + + # TODO(aselle): Switch this to subprocess at some point. + cmd = ("%s --input_file=%s --output_file=%s %s > %s 2>&1" % + (bin_path, graphdef_file.name, output_file.name, opts, + stdout_file.name)) + exit_code = os.system(cmd) + log = ( + cmd + "exited with code %d" % exit_code + "\n------------------\n" + + stdout_file.read()) + return (None if exit_code != 0 else output_file.read()), log + + +def make_zip_of_tests(zip_path, + test_parameters, + make_graph, + make_test_inputs, + drop_control_dependency=False): + """Helper to make a zip file of a bunch of TensorFlow models. + + This does a cartestian product of the dictionary of test_parameters and + calls make_graph() for each item in the cartestian product set. + If the graph is built successfully, then make_test_inputs() is called to + build expected input/output value pairs. The model is then converted to tflite + with toco, and the examples are serialized with the tflite model into a zip + file (2 files per item in the cartesian product set). + + Args: + zip_path: Path of zip file to write + test_parameters: Dictionary mapping to lists for each parameter. + e.g. `{"strides": [[1,3,3,1], [1,2,2,1]], "foo": [1.2, 1.3]}` + make_graph: function that takes current parameters and returns tuple + `[input1, input2, ...], [output1, output2, ...]` + make_test_inputs: function taking `curr_params`, `session`, `input_tensors`, + `output_tensors` and returns tuple `(input_values, output_values)`. + drop_control_dependency: whether to ignore control dependency nodes. + Raises: + RuntimeError: if there are toco errors that can't be ignored. + """ + + # TODO(aselle): Make this allow multiple inputs outputs. + archive = zipfile.PyZipFile(zip_path, "w") + zip_manifest = [] + convert_report = [] + toco_errors = 0 + for parameters in test_parameters: + keys = parameters.keys() + for curr in itertools.product(*parameters.values()): + label = zip_path.replace(".zip", "") + (",".join( + "%s=%r" % z for z in sorted(zip(keys, curr))).replace(" ", "")) + if label[0] == "/": + label = label[1:] + param_dict = dict(zip(keys, curr)) + + def build_example(label, param_dict_real): + """Build the model with parameter values set in param_dict_real. + + Args: + label: Label of the model (i.e. the filename in the zip). + param_dict_real: Parameter dictionary (arguments to the factories + make_graph and make_test_inputs) + Returns: + (tflite_model_binary, report) where tflite_model_binary is the + serialized flatbuffer as a string and report is a dictionary with + keys `toco_log` (log of toco conversion), `tf_log` (log of tf + conversion), `toco` (a string of success status of the conversion), + `tf` (a string success status of the conversion). + """ + + np.random.seed(RANDOM_SEED) + report = {"toco": report_lib.NOTRUN, "tf": report_lib.FAILED} + + # Build graph + report["tf_log"] = "" + report["toco_log"] = "" + tf.reset_default_graph() + + try: + inputs, outputs = make_graph(param_dict_real) + except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError, + ValueError): + report["tf_log"] += traceback.format_exc() + return None, report + + sess = tf.Session() + try: + baseline_inputs, baseline_outputs = (make_test_inputs( + param_dict_real, sess, inputs, outputs)) + except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError, + ValueError): + report["tf_log"] += traceback.format_exc() + return None, report + report["toco"] = report_lib.FAILED + report["tf"] = report_lib.SUCCESS + + # Convert graph to toco + tflite_model_binary, toco_log = toco_convert( + sess.graph_def.SerializeToString(), + [(input_tensor.name.split(":")[0], input_tensor.get_shape(), + input_tensor.dtype) for input_tensor in inputs], + [out.name.split(":")[0] + for out in outputs], drop_control_dependency) + report["toco"] = (report_lib.SUCCESS if tflite_model_binary is not None + else report_lib.FAILED) + report["toco_log"] = toco_log + + if FLAGS.save_graphdefs: + archive.writestr(label + ".pb", + text_format.MessageToString(sess.graph_def), + zipfile.ZIP_DEFLATED) + + if tflite_model_binary: + archive.writestr(label + ".bin", tflite_model_binary, + zipfile.ZIP_DEFLATED) + example = {"inputs": baseline_inputs, "outputs": baseline_outputs} + + example_fp = StringIO() + write_examples(example_fp, [example]) + archive.writestr(label + ".inputs", + example_fp.getvalue(), zipfile.ZIP_DEFLATED) + + example_fp2 = StringIO() + write_test_cases(example_fp2, label + ".bin", [example]) + archive.writestr(label + "_tests.txt", + example_fp2.getvalue(), zipfile.ZIP_DEFLATED) + + zip_manifest.append(label + "\n") + + return tflite_model_binary, report + + _, report = build_example(label, param_dict) + + if report["toco"] == report_lib.FAILED: + ignore_error = False + if not FLAGS.known_bugs_are_errors: + for pattern, bug_number in KNOWN_BUGS.items(): + if re.search(pattern, label): + print("Ignored TOCO error due to bug %s" % bug_number) + ignore_error = True + if not ignore_error: + toco_errors += 1 + print("-----------------\ntoco error!\n%s\n-----------------\n" % + report["toco_log"]) + + convert_report.append((param_dict, report)) + report_io = StringIO() + report_lib.make_report_table(report_io, zip_path, convert_report) + archive.writestr("report.html", report_io.getvalue()) + + archive.writestr("manifest.txt", "".join(zip_manifest), zipfile.ZIP_DEFLATED) + + # Log statistics of what succeeded + total_conversions = len(convert_report) + tf_success = sum(1 for x in convert_report + if x[1]["tf"] == report_lib.SUCCESS) + toco_success = sum(1 for x in convert_report + if x[1]["toco"] == report_lib.SUCCESS) + percent = 0 + if tf_success > 0: + percent = float(toco_success) / float(tf_success) * 100. + tf.logging.info(("Archive %s Considered %d graphs, %d TF evaluated graphs " + " and %d TOCO converted graphs (%.1f%%"), zip_path, + total_conversions, tf_success, toco_success, percent) + + if not FLAGS.ignore_toco_errors and toco_errors > 0: + raise RuntimeError( + "Found %d errors while generating toco models" % toco_errors) + + +def make_pool_tests(pool_op_in): + """Make a set of tests to do average pooling. + + Args: + pool_op_in: TensorFlow pooling operation to test i.e. `tf.nn.avg_pool`. + + Returns: + A function representing the true generator (after curried pool_op_in). + """ + + pool_op = pool_op_in + + def f(zip_path): + """Actual function that generates examples. + + Args: + zip_path: path to write zip to. + """ + + # Chose a set of parameters + test_parameters = [{ + "ksize": [[2, 1, 1, 2], [1, 1, 1, 1], [1, 1, 2, 1], [1, 10, 11, 1]], + "strides": [[2, 1, 1, 2], [1, 1, 1, 1], [1, 1, 2, 1], [1, 10, 11, 1]], + # TODO(aselle): should add in a degenerate shape (e.g. [1, 0, 1, 1]). + "input_shape": [[], [1, 1, 1, 1], [1, 15, 14, 1], [3, 15, 14, 3]], + "padding": ["SAME", "VALID"], + "data_format": ["NHWC"], # TODO(aselle): NCHW would be good + }] + + def build_graph(parameters): + input_tensor = tf.placeholder( + dtype=tf.float32, name="input", shape=parameters["input_shape"]) + out = pool_op( + input_tensor, + ksize=parameters["ksize"], + strides=parameters["strides"], + data_format=parameters["data_format"], + padding=parameters["padding"]) + return [input_tensor], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_values = create_tensor_data(tf.float32, parameters["input_shape"]) + return [input_values], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_values]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + return f + + +def make_relu_tests(zip_path): + """Make a set of tests to do relu.""" + + # Chose a set of parameters + test_parameters = [{ + "input_shape": [[], [1], [2, 3], [1, 1, 1, 1], [1, 3, 4, 3], + [3, 15, 14, 3], [3, 1, 2, 4, 6], [2, 2, 3, 4, 5, 6]], + }] + + def build_graph(parameters): + input_tensor = tf.placeholder( + dtype=tf.float32, name="input", shape=parameters["input_shape"]) + out = tf.nn.relu(input_tensor) + return [input_tensor], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_values = create_tensor_data( + np.float32, parameters["input_shape"], min_value=-4, max_value=10) + return [input_values], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_values]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_relu1_tests(zip_path): + """Make a set of tests to do relu1.""" + + # Chose a set of parameters + test_parameters = [{ + "input_shape": [[], [1, 1, 1, 1], [1, 3, 4, 3], [3, 15, 14, 3], + [3, 1, 2, 4, 6], [2, 2, 3, 4, 5, 6]], + }] + + def build_graph(parameters): + input_tensor = tf.placeholder( + dtype=tf.float32, name="input", shape=parameters["input_shape"]) + # Note that the following is not supported: + # out = tf.maximum(-1.0, tf.minimum(input_tensor, 1.0)) + out = tf.minimum(1.0, tf.maximum(input_tensor, -1.0)) + return [input_tensor], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_values = create_tensor_data( + np.float32, parameters["input_shape"], min_value=-3, max_value=10) + return [input_values], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_values]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_relu6_tests(zip_path): + """Make a set of tests to do relu6.""" + + # Chose a set of parameters + test_parameters = [{ + "input_shape": [[], [1, 1, 1, 1], [1, 3, 4, 3], [3, 15, 14, 3], + [3, 1, 2, 4, 6], [2, 2, 3, 4, 5, 6]], + }] + + def build_graph(parameters): + input_tensor = tf.placeholder( + dtype=tf.float32, name="input", shape=parameters["input_shape"]) + out = tf.nn.relu(input_tensor) + return [input_tensor], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_values = create_tensor_data( + np.float32, parameters["input_shape"], min_value=-3, max_value=10) + return [input_values], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_values]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +# This function tests various TensorFLow functions that generates Const op, +# including `tf.ones`, `tf.zeros` and random functions. +def make_constant_tests(zip_path): + """Make a set of tests to do constant ops.""" + + test_parameters = [{ + "dtype": [tf.float32, tf.int32], + "input_shape": [[1], [2], [1, 1, 1, 1], [2, 2, 2, 2]], + }] + + def build_graph(parameters): + # Since Toco & Tflite can't have a single constant op in the entire graph, + # this test adds a zero tesnor with a constant op tensor. + input1 = tf.placeholder(dtype=parameters["dtype"], name="input1", + shape=parameters["input_shape"]) + out = tf.ones(parameters["input_shape"], dtype=parameters["dtype"]) + input1 + return [input1], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input1 = np.zeros(parameters["input_shape"], + dtype=_TF_TYPE_INFO[parameters["dtype"]][0]) + return [input1], sess.run(outputs, feed_dict={inputs[0]: input1}) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_add_tests(zip_path): + """Make a set of tests to do add with and without broadcast.""" + + # These parameters are split because we don't support broadcasting. + test_parameters = [{ + "dtype": [tf.float32, tf.int32], + "input_shape_1": [[1, 3, 4, 3]], + "input_shape_2": [[1, 3, 4, 3]], + }, { + "dtype": [tf.float32], + "input_shape_1": [[5]], + "input_shape_2": [[5]], + }, { + "dtype": [tf.float32], + "input_shape_1": [[1, 3, 4, 3]], + "input_shape_2": [[3]], + }] + + def build_graph(parameters): + input1 = tf.placeholder(dtype=parameters["dtype"], name="input1", + shape=parameters["input_shape_1"]) + input2 = tf.placeholder(dtype=parameters["dtype"], name="input2", + shape=parameters["input_shape_2"]) + out = tf.add(input1, input2) + return [input1, input2], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input1 = create_tensor_data(parameters["dtype"], + parameters["input_shape_1"]) + input2 = create_tensor_data(parameters["dtype"], + parameters["input_shape_2"]) + return [input1, input2], sess.run( + outputs, feed_dict={ + inputs[0]: input1, + inputs[1]: input2 + }) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_mul_tests(zip_path): + """Make a set of tests to do mul with and without broadcast.""" + + # These parameters are split because we don't support broadcasting. + test_parameters = [{ + "dtype": [tf.float32, tf.int32], + "input_shape_1": [[1, 3, 4, 3]], + "input_shape_2": [[1, 3, 4, 3]], + }, { + "dtype": [tf.float32], + "input_shape_1": [[5]], + "input_shape_2": [[5]], + }, { + "dtype": [tf.float32], + "input_shape_1": [[1, 3, 4, 3]], + "input_shape_2": [[3]], + }] + + def build_graph(parameters): + input1 = tf.placeholder(dtype=parameters["dtype"], name="input1", + shape=parameters["input_shape_1"]) + input2 = tf.placeholder(dtype=parameters["dtype"], name="input2", + shape=parameters["input_shape_2"]) + out = tf.multiply(input1, input2) + return [input1, input2], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input1 = create_tensor_data(parameters["dtype"], + parameters["input_shape_1"]) + input2 = create_tensor_data(parameters["dtype"], + parameters["input_shape_2"]) + return [input1, input2], sess.run( + outputs, feed_dict={inputs[0]: input1, + inputs[1]: input2}) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_global_batch_norm_tests(zip_path): + """Make a set of tests to do batch_norm_with_global_normalization.""" + + test_parameters = [{ + "dtype": [tf.float32], + "input_shape": [[1, 1, 6, 2], [3, 4, 5, 4]], + "epsilon": [0.1, 0.0001], + "scale_after": [True, False], + }] + + def build_graph(parameters): + """Build the global batch norm testing graph.""" + input_shape = parameters["input_shape"] + scale_shape = input_shape[3] + + scale = create_tensor_data(parameters["dtype"], scale_shape) + offset = create_tensor_data(parameters["dtype"], scale_shape) + mean = create_tensor_data(parameters["dtype"], scale_shape) + variance = create_tensor_data(parameters["dtype"], scale_shape) + + x = create_tensor_data(parameters["dtype"], parameters["input_shape"]) + x_norm = tf.nn.batch_norm_with_global_normalization( + x, mean, variance, scale, offset, + parameters["epsilon"], parameters["scale_after"]) + + input_tensor = tf.placeholder(dtype=parameters["dtype"], name="input", + shape=parameters["input_shape"]) + out = tf.add(input_tensor, x_norm) + return [input_tensor], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_value = create_tensor_data(parameters["dtype"], + parameters["input_shape"]) + return [input_value], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_value]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_fused_batch_norm_tests(zip_path): + """Make a set of tests to do fused_batch_norm.""" + + test_parameters = [{ + "dtype": [tf.float32], + "input_shape": [[1, 1, 6, 2]], + "epsilon": [0.001, 0.1], + }] + + def build_graph(parameters): + """Build the testing graph for fused batch normalization.""" + input_shape = parameters["input_shape"] + scale_shape = input_shape[3] + + scale = create_tensor_data(parameters["dtype"], scale_shape) + offset = create_tensor_data(parameters["dtype"], scale_shape) + mean = create_tensor_data(parameters["dtype"], scale_shape) + variance = create_tensor_data(parameters["dtype"], scale_shape) + + x = create_tensor_data(parameters["dtype"], parameters["input_shape"]) + [x_norm, _, _] = tf.nn.fused_batch_norm( + x, scale, offset, mean, variance, + parameters["epsilon"], data_format="NHWC", is_training=False) + + input_tensor = tf.placeholder(dtype=parameters["dtype"], name="input", + shape=parameters["input_shape"]) + out = tf.add(input_tensor, x_norm) + return [input_tensor], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_value = create_tensor_data(parameters["dtype"], + parameters["input_shape"]) + return [input_value], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_value]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_conv_tests(zip_path): + """Make a set of tests to do convolution.""" + + test_parameters = [{ + "input_shape": [[1, 3, 4, 3]], + "filter_shape": [[1, 1, 3, 2]], + "strides": [[1, 1, 1, 1], [1, 2, 3, 1]], + "padding": ["SAME", "VALID"], + "data_format": ["NHWC"], # TODO(aselle): NCHW would be good + }, { + "input_shape": [[2, 14, 14, 2]], + "filter_shape": [[6, 6, 2, 2]], + "strides": [[1, 1, 1, 1], [1, 2, 3, 1]], + "padding": ["SAME", "VALID"], + "data_format": ["NHWC"], # TODO(aselle): NCHW would be good + }] + + def build_graph(parameters): + input_tensor = tf.placeholder( + dtype=tf.float32, name="input", shape=parameters["input_shape"]) + filter_values = create_tensor_data(np.float32, parameters["filter_shape"]) + out = tf.nn.conv2d(input_tensor, filter_values, + strides=parameters["strides"], + padding=parameters["padding"], + data_format=parameters["data_format"]) + return [input_tensor], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_values = create_tensor_data(np.float32, parameters["input_shape"]) + return [input_values], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_values]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_depthwiseconv_tests(zip_path): + """Make a set of tests to do convolution.""" + + # Tensorflow only supports equal strides + test_parameters = [{ + "input_shape": [[1, 3, 4, 3], [1, 10, 10, 3]], + "filter_size": [[1, 1], [1, 2], [3, 3]], + "strides": [[1, 1, 1, 1], [1, 3, 3, 1]], + "channel_multiplier": [1, 2], + "rate": [[1, 1]], + "padding": ["SAME", "VALID"], + "data_format": ["NHWC"], + }, { + "input_shape": [[1, 3, 4, 3]], + "filter_size": [[1, 1]], + "strides": [[1, 1, 2, 1]], # TF needs [1, x, x, 1] + "channel_multiplier": [2], + "rate": [[2, 2]], # Only [1, 1] is supported + "padding": ["SAME"], + "data_format": ["NHWC"], + }] + + def build_graph(parameters): + """Build a depthwise conv graph given `parameters`.""" + input_shape = parameters["input_shape"] + filter_size = parameters["filter_size"] + input_tensor = tf.placeholder( + dtype=tf.float32, name="input", shape=input_shape) + filter_shape = filter_size + [ + input_shape[3], parameters["channel_multiplier"]] + filter_values = create_tensor_data(np.float32, filter_shape) + out = tf.nn.depthwise_conv2d( + input_tensor, filter_values, + strides=parameters["strides"], + rate=parameters["rate"], + padding=parameters["padding"], + data_format=parameters["data_format"]) + return [input_tensor], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_values = create_tensor_data(np.float32, parameters["input_shape"]) + return [input_values], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_values]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_concatenation_tests(zip_path): + """Make a set of tests to do concatenatinon.""" + + test_parameters = [{ + "base_shape": [[1, 3, 4, 3], [3, 4]], + "num_tensors": [1, 2, 3, 4, 5, 6], + "axis": [0, 1, 2, 3], + }] + + def get_shape(parameters, delta): + """Return a tweaked version of 'base_shape'.""" + axis = parameters["axis"] + shape = parameters["base_shape"][:] + if axis < len(shape): + shape[axis] += delta + return shape + + def build_graph(parameters): + all_tensors = [] + for n in range(0, parameters["num_tensors"]): + input_tensor = tf.placeholder(dtype=tf.float32, name=("input%d" % n), + shape=get_shape(parameters, n)) + all_tensors.append(input_tensor) + out = tf.concat(all_tensors, parameters["axis"]) + return all_tensors, [out] + + def build_inputs(parameters, sess, inputs, outputs): + all_values = [] + for n in range(0, parameters["num_tensors"]): + input_values = create_tensor_data(np.float32, + get_shape(parameters, n)) + all_values.append(input_values) + return all_values, sess.run( + outputs, feed_dict=dict(zip(inputs, all_values))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_fully_connected_tests(zip_path): + """Make a set of tests to do fully_connected.""" + + test_parameters = [{ + "shape1": [[3, 3]], + "shape2": [[3, 3]], + "transpose_a": [True, False], + "transpose_b": [True, False], + }, { + "shape1": [[4, 4], [1, 4], [4]], + "shape2": [[4, 4], [4, 1], [4]], + "transpose_a": [False], + "transpose_b": [False], + }, { + "shape1": [[40, 37]], + "shape2": [[37, 40]], + "transpose_a": [False], + "transpose_b": [False], + + }] + + def build_graph(parameters): + input_tensor1 = tf.placeholder(dtype=tf.float32, name="input1", + shape=parameters["shape1"]) + input_tensor2 = create_tensor_data(np.float32, parameters["shape2"]) + out = tf.matmul(input_tensor1, input_tensor2, + transpose_a=parameters["transpose_a"], + transpose_b=parameters["transpose_b"]) + return [input_tensor1], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_values1 = create_tensor_data(np.float32, shape=parameters["shape1"]) + return [input_values1], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_values1]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_l2norm_tests(zip_path): + """Make a set of tests to do l2norm.""" + + # Chose a set of parameters + test_parameters = [{ + "input_shape": [[5, 7], [1, 1, 1, 1], [1, 3, 4, 3], [3, 15, 14, 3], + [3, 1, 2, 4, 6], [2, 2, 3, 4, 5, 6]], + "dim": [0, 1, 2, 3, [2, 3], -2], + "epsilon": [None, 1e-12, 1e-3], + }] + + def build_graph(parameters): + input_tensor = tf.placeholder( + dtype=tf.float32, name="input", shape=parameters["input_shape"]) + if parameters["epsilon"]: + out = tf.nn.l2_normalize( + input_tensor, parameters["dim"], epsilon=parameters["epsilon"]) + else: + out = tf.nn.l2_normalize(input_tensor, parameters["dim"]) + return [input_tensor], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_values = create_tensor_data( + np.float32, parameters["input_shape"], min_value=-4, max_value=10) + return [input_values], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_values]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_local_response_norm_tests(zip_path): + """Make a set of tests to do local_response_norm.""" + + # Chose a set of parameters + test_parameters = [{ + "input_shape": [[1, 1, 1, 1], [1, 3, 4, 3], [3, 15, 14, 3]], + "depth_radius": [None, 0, 1, 3, 4, 5], + "bias": [None, 0.1, 0.3, -0.1], + "alpha": [None, 1, 2, -3], + "beta": [None, 0.5, 0.25, 2], + }] + + def build_graph(parameters): + input_tensor = tf.placeholder( + dtype=tf.float32, name="input", shape=parameters["input_shape"]) + out = tf.nn.local_response_normalization( + input_tensor, depth_radius=parameters["depth_radius"], + bias=parameters["bias"], alpha=parameters["alpha"], + beta=parameters["beta"]) + return [input_tensor], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_values = create_tensor_data( + np.float32, parameters["input_shape"], min_value=-4, max_value=10) + return [input_values], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_values]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_reshape_tests(zip_path): + """Make a set of tests to do reshape.""" + + # Alll shapes below are suitable for tensors with 420 elements. + test_parameters = [{ + "dtype": [tf.float32, tf.int32], + "input_shape": [[3, 4, 5, 7], [4, 105], [21, 5, 2, 2], [420]], + "output_shape": [[15, 28], [420], [1, -1, 5, 7], [-1]], + }] + + def build_graph(parameters): + input_tensor = tf.placeholder(dtype=parameters["dtype"], name="input", + shape=parameters["input_shape"]) + out = tf.reshape(input_tensor, shape=parameters["output_shape"]) + return [input_tensor], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_values = create_tensor_data(parameters["dtype"], + parameters["input_shape"]) + return [input_values], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_values]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_resize_bilinear_tests(zip_path): + """Make a set of tests to do resize_bilinear.""" + + test_parameters = [{ + "dtype": [tf.float32, tf.int32], + "input_shape": [[1, 3, 4, 3], [1, 10, 2, 1]], + "size": [[1, 1], [4, 3], [2, 2], [5, 6]], + "align_corners": [None, True, False], + }] + + def build_graph(parameters): + input_tensor = tf.placeholder(dtype=parameters["dtype"], name="input", + shape=parameters["input_shape"]) + out = tf.image.resize_bilinear(input_tensor, size=parameters["size"], + align_corners=parameters["align_corners"]) + return [input_tensor], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_values = create_tensor_data(parameters["dtype"], + parameters["input_shape"]) + return [input_values], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_values]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_sigmoid_tests(zip_path): + """Make a set of tests to do sigmoid.""" + + test_parameters = [{ + "dtype": [tf.float32], + "input_shape": [[1, 3, 4, 3], [4], [], [1, 2, 3, 4, 5, 6]], + }] + + def build_graph(parameters): + input_tensor = tf.placeholder(dtype=parameters["dtype"], name="input", + shape=parameters["input_shape"]) + out = tf.sigmoid(input_tensor) + return [input_tensor], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_values = create_tensor_data(parameters["dtype"], + parameters["input_shape"]) + return [input_values], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_values]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_softmax_tests(zip_path): + """Make a set of tests to do softmax.""" + + test_parameters = [{ + "dtype": [tf.float32], + "input_shape": [[1, 3, 4, 3], [2, 3]], + "dim": [-1, 0], + }, { + "dtype": [tf.float32], + "input_shape": [[4, 7]], + "dim": [-1, 1], + }] + + def build_graph(parameters): + input_tensor = tf.placeholder(dtype=parameters["dtype"], name="input", + shape=parameters["input_shape"]) + out = tf.nn.softmax(input_tensor, dim=parameters["dim"]) + return [input_tensor], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_values = create_tensor_data(parameters["dtype"], + parameters["input_shape"]) + return [input_values], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_values]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_space_to_depth_tests(zip_path): + """Make a set of tests to do space_to_depth.""" + + test_parameters = [{ + "dtype": [tf.float32, tf.float16, tf.int32, tf.uint8, tf.int64], + "input_shape": [[2, 12, 24, 1]], + "block_size": [2, 3, 4], + }] + + def build_graph(parameters): + input_tensor = tf.placeholder(dtype=parameters["dtype"], name="input", + shape=parameters["input_shape"]) + out = tf.space_to_depth(input_tensor, block_size=parameters["block_size"]) + return [input_tensor], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_values = create_tensor_data(parameters["dtype"], + parameters["input_shape"]) + return [input_values], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_values]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_l2_pool(input_tensor, ksize, strides, padding, data_format): + """Given an input perform a sequence of TensorFlow ops to produce l2pool.""" + return tf.sqrt(tf.nn.avg_pool( + tf.square(input_tensor), ksize=ksize, strides=strides, + padding=padding, data_format=data_format)) + + +# Toco binary path provided by the generate rule. +bin_path = None + + +def main(unused_args): + global bin_path + def mkdir_if_not_exist(x): + if not os.path.isdir(x): + os.mkdir(x) + if not os.path.isdir(x): + raise RuntimeError("Failed to create dir %r" % x) + + if FLAGS.type == "zipped": + opstest_path = os.path.join(FLAGS.output_path) + mkdir_if_not_exist(opstest_path) + def _path(filename): + return os.path.join(opstest_path, filename) + + dispatch = { + "control_dep.zip": make_control_dep_tests, + "add.zip": make_add_tests, + "conv.zip": make_conv_tests, + "constant.zip": make_constant_tests, + "depthwiseconv.zip": make_depthwiseconv_tests, + "concat.zip": make_concatenation_tests, + "fully_connected.zip": make_fully_connected_tests, + "global_batch_norm.zip": make_global_batch_norm_tests, + "fused_batch_norm.zip": make_fused_batch_norm_tests, + "l2norm.zip": make_l2norm_tests, + "local_response_norm.zip": make_local_response_norm_tests, + "mul.zip": make_mul_tests, + "relu.zip": make_relu_tests, + "relu1.zip": make_relu1_tests, + "relu6.zip": make_relu6_tests, + "l2_pool.zip": make_pool_tests(make_l2_pool), + "avg_pool.zip": make_pool_tests(tf.nn.avg_pool), + "max_pool.zip": make_pool_tests(tf.nn.max_pool), + "reshape.zip": make_reshape_tests, + "resize_bilinear.zip": make_resize_bilinear_tests, + "sigmoid.zip": make_sigmoid_tests, + "softmax.zip": make_softmax_tests, + "space_to_depth.zip": make_space_to_depth_tests, + } + out = FLAGS.zip_to_output + bin_path = FLAGS.toco + if out in dispatch: + dispatch[out](_path(out)) + else: + raise RuntimeError("Invalid zip to output %r" % out) + + else: + raise RuntimeError("Invalid argument for type of generation.") + + +if __name__ == "__main__": + FLAGS, unparsed = parser.parse_known_args() + + if unparsed: + print("Usage: %s zipped ") + else: + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/contrib/lite/testing/generate_examples_report.py b/tensorflow/contrib/lite/testing/generate_examples_report.py new file mode 100644 index 0000000000..7bcf8cd86a --- /dev/null +++ b/tensorflow/contrib/lite/testing/generate_examples_report.py @@ -0,0 +1,125 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Make HTML tables that report where TF and TOCO failed to convert models. + +This is primarily used by generate_examples.py. See it or +`make_report_table` for more details on usage. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import cgi +import json + +FAILED = "FAILED" +SUCCESS = "SUCCESS" +NOTRUN = "NOTRUN" + + +def make_report_table(fp, title, reports): + """Make an HTML report of the success/failure reports. + + Args: + fp: File-like object in which to put the html. + title: "Title of the zip file this pertains to." + reports: a list of conversion attempts. (report_args, report_vals) i.e. + ({"shape": [1,2,3], "type": "tf.float32"}, + {"tf": "SUCCESS", "toco": "FAILURE", "toco_log": "Unsupported type.", + "tf_log": ""}) + """ + # sort reports by if TOCO failure and then TF failure (reversed) + reports.sort(key=lambda x: x[1]["toco"], reverse=False) + reports.sort(key=lambda x: x[1]["tf"], reverse=True) + def result_cell(x, row, col): + """Produce a cell with the condition string `x`.""" + s = cgi.escape(repr(x), quote=True) + color = "#44ff44" if x == SUCCESS else ( + "#ff4444" if x == FAILED else "#eeeeee") + handler = "ShowLog(%d, %d)" % (row, col) + fp.write("
\n" % ( + color, handler, s)) + + fp.write(""" + +tflite report + + +""") + # Write the log data to a javascript variable and also make a function + # in javascript to show the log when an item is clicked. + fp.write("\n") + + # Write the main table and use onclick on the items that have log items. + fp.write(""" + +

TOCO Conversion

+

%s

+""" % title) + + # Get a list of keys that are in any of the records. + param_keys = {} + for params, _ in reports: + for k in params.keys(): + param_keys[k] = True + + fp.write("
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.4.0rc1CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.4.0rc1GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.368
tensorflow-1.4.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.4.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.368
tensorflow-1.3.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.3.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.368
tensorflow-1.2.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.2.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.35.18
tensorflow-1.1.0CPU3.5MSVC 2015 update 3Cmake v3.6.3N/AN/A
%s
\n") + fp.write("\n") + fp.write("\n") + fp.write("
\n") + fp.write("
\n") + fp.write("\n") + fp.write("\n") + for p in param_keys: + fp.write("\n" % cgi.escape(p, quote=True)) + fp.write("\n") + fp.write("\n") + fp.write("\n") + for idx, (params, vals) in enumerate(reports): + fp.write("\n") + for p in param_keys: + fp.write(" \n" % cgi.escape(repr(params[p]), quote=True)) + + result_cell(vals["tf"], idx, 0) + result_cell(vals["toco"], idx, 1) + fp.write("\n") + fp.write("
%sTensorFlowTOCO
%s
\n") + fp.write("
\n") + fp.write("
\n") + fp.write("\n") + fp.write(""" + + + """) diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc new file mode 100644 index 0000000000..e7df97ee54 --- /dev/null +++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc @@ -0,0 +1,279 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include +#include "re2/re2.h" +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/testing/parse_testdata.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/subprocess.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/util/command_line_flags.h" + +namespace { +bool FLAGS_ignore_known_bugs = true; +} // namespace + +namespace tflite { +namespace testing { + +// TensorFlow system environment for file system called. +tensorflow::Env* env = tensorflow::Env::Default(); + +// List of tests that are expected to fail when +// --test_arg=--ignore_known_bugs=false +// Key is a substring of the test name and value is a bug number. +// TODO(ahentz): make sure we clean this list up frequently. +std::map kBrokenTests = { + // Add doesn't support broadcasting. + {R"(addd.*input_shape_1=\[1,3,4,3\],input_shape_2=\[3\])", "68500195"}, + {R"(muld.*input_shape_1=\[1,3,4,3\],input_shape_2=\[3\])", "68500195"}, + + // Add only supports float32. (and "constant" tests use Add) + {R"(addd.*int32)", "68808744"}, + {R"(constant.*int32)", "68808744"}, + {R"(mul.*int32)", "68808744"}, + + // Toco or TFLite has a bug to deal with some constant functions with + // more than 1 element. + {R"(constant.*input_shape=\[(2|2,2,2,2)\])", "68721522"}, + + // L2Norm only supports 4D tensors. + {R"(l2normdim=.*,epsilon=.*,input_shape=\[.,.\])", "67963684"}, + {R"(l2normdim=.*,epsilon=.*,input_shape=\[.,.,.,.,.*\])", "67963684"}, + + // L2Norm only works for dim=-1. + {R"(l2normdim=-2,epsilon=.*,input_shape=\[3,15,14,3\])", "67963812"}, + {R"(l2normdim=-2,epsilon=.*,input_shape=\[1,3,4,3\])", "67963812"}, + {R"(l2normdim=2,epsilon=.*,input_shape=\[3,15,14,3\])", "67963812"}, + {R"(l2normdim=2,epsilon=.*,input_shape=\[1,3,4,3\])", "67963812"}, + {R"(l2normdim=0,epsilon=.*,input_shape=\[3,15,14,3\])", "67963812"}, + {R"(l2normdim=0,epsilon=.*,input_shape=\[1,3,4,3\])", "67963812"}, + {R"(l2normdim=1,epsilon=.*,input_shape=\[3,15,14,3\])", "67963812"}, + {R"(l2normdim=1,epsilon=.*,input_shape=\[1,3,4,3\])", "67963812"}, + {R"(l2normdim=\[2,3\],epsilon=.*,input_shape=\[3,15,14,3\])", "67963812"}, + {R"(l2normdim=\[2,3\],epsilon=.*,input_shape=\[1,3,4,3\])", "67963812"}, + + // ResizeBilinear looks completely incompatible with Tensorflow + {R"(resize_bilinear)", "67964336"}, +}; + +// Allows test data to be unzipped into a temporary directory and makes +// sure those temporary directories are removed later. +class ZipEnvironment : public ::testing::Environment { + public: + ~ZipEnvironment() override {} + + // Delete all temporary directories on teardown. + void TearDown() override { + for (const auto& dir : temporary_directories_) { + tensorflow::int64 undeleted_dirs, undeleted_files; + TF_CHECK_OK( + env->DeleteRecursively(dir, &undeleted_dirs, &undeleted_files)); + } + temporary_directories_.clear(); + } + + // Unzip `zip` file into a new temporary directory `out_dir`. + tensorflow::Status UnZip(const std::string& zip, std::string* out_dir) { + string dir; + TF_CHECK_OK(MakeTemporaryDirectory(&dir)); + tensorflow::SubProcess proc; + std::string unzip_binary = + "/usr/bin/unzip"; + proc.SetProgram(unzip_binary, {"unzip", "-d", dir, zip.c_str()}); + proc.SetChannelAction(tensorflow::CHAN_STDOUT, tensorflow::ACTION_PIPE); + proc.SetChannelAction(tensorflow::CHAN_STDERR, tensorflow::ACTION_PIPE); + if (!proc.Start()) + return tensorflow::Status(tensorflow::error::UNKNOWN, + "unzip couldn't start"); + string out, err; + int status = proc.Communicate(nullptr, &out, &err); + if (WEXITSTATUS(status) == 0) { + *out_dir = dir; + return tensorflow::Status::OK(); + } else { + return tensorflow::Status(tensorflow::error::UNKNOWN, "unzip failed"); + } + } + + private: + // Make a temporary directory and return its name in `temporary`. + tensorflow::Status MakeTemporaryDirectory(string* temporary) { + if (env->LocalTempFilename(temporary)) { + TF_CHECK_OK(env->CreateDir(*temporary)); + temporary_directories_.push_back(*temporary); + return tensorflow::Status::OK(); + } + return tensorflow::Status(tensorflow::error::UNKNOWN, + "make temporary directory failed"); + } + + std::vector temporary_directories_; +}; + +// Return the singleton zip_environment. +ZipEnvironment* zip_environment() { + static ZipEnvironment* env = new ZipEnvironment; + return env; +} + +// Read the manifest.txt out of the unarchived zip file. Specifically +// `original_file` is the original zip file for error messages. `dir` is +// the temporary directory where the zip file has been unarchived and +// `test_paths` is the list of test prefixes that were in the manifest. +// Note, it is an error for a manifest to contain no tests. +tensorflow::Status ReadManifest(const std::string& original_file, + const std::string& dir, + std::vector* test_paths) { + // Read the newline delimited list of entries in the manifest. + std::ifstream manifest_fp(dir + "/manifest.txt"); + std::string manifest((std::istreambuf_iterator(manifest_fp)), + std::istreambuf_iterator()); + size_t pos = 0; + int added = 0; + while (true) { + size_t end_pos = manifest.find("\n", pos); + if (end_pos == std::string::npos) break; + std::string filename = manifest.substr(pos, end_pos - pos); + test_paths->push_back(dir + "/" + filename); + pos = end_pos + 1; + added += 1; + } + if (!added) { + std::string message = "Test had no examples: " + original_file; + return tensorflow::Status(tensorflow::error::UNKNOWN, message.c_str()); + } + return tensorflow::Status::OK(); +} + +// Get a list of tests from a zip file `zip_file_name`. +std::vector UnarchiveZipAndFindTestNames( + const std::string& zip_file_name) { + std::string zip_file = ::tensorflow::testing::TensorFlowSrcRoot() + + "/contrib/lite/testing/optest/" + zip_file_name; + std::string decompress_tmp_dir; + TF_CHECK_OK(zip_environment()->UnZip(zip_file, &decompress_tmp_dir)); + std::vector stuff; + TF_CHECK_OK(ReadManifest(zip_file, decompress_tmp_dir, &stuff)); + return stuff; +} + +class OpsTest : public ::testing::TestWithParam {}; + +TEST_P(OpsTest, RunStuff) { + std::string test_path = GetParam(); + std::string tflite_file = test_path + ".bin"; + std::string tflite_examples = test_path + ".inputs"; + auto model = tflite::FlatBufferModel::BuildFromFile(tflite_file.c_str()); + std::unique_ptr interpreter; + + tflite::ops::builtin::BuiltinOpResolver builtins; + ASSERT_EQ(tflite::InterpreterBuilder(*model, builtins)(&interpreter), + kTfLiteOk); + + std::vector examples; + ASSERT_EQ(tflite::testing::ParseExamples(tflite_examples.c_str(), &examples), + kTfLiteOk); + + string bug_number; + for (const auto& p : kBrokenTests) { + if (RE2::PartialMatch(test_path, p.first)) { + bug_number = p.second; + } + } + + for (const auto& example : examples) { + ASSERT_EQ(interpreter->inputs().size(), example.inputs.size()); + auto result = [&]() { + TF_LITE_ENSURE_STATUS(FeedExample(interpreter.get(), example)); + TF_LITE_ENSURE_STATUS(interpreter->Invoke()); + TF_LITE_ENSURE_STATUS(CheckOutputs(interpreter.get(), example)); + return kTfLiteOk; + }(); + + if (bug_number.empty()) { + ASSERT_EQ(result, kTfLiteOk); + } else { + if (FLAGS_ignore_known_bugs) { + ASSERT_EQ(result, kTfLiteError) + << "Not failing as expected dut to http://b/" << bug_number; + } else { + ASSERT_EQ(result, kTfLiteOk) + << "Possibly due to http://b/" << bug_number; + } + } + } +} + +// Instantiate a test. This assumes `zip_base`.zip is a declared data file +// of this test. +#define INSTANTIATE_TESTS(zip_base) \ + INSTANTIATE_TEST_CASE_P( \ + zip_base, OpsTest, \ + ::testing::ValuesIn(UnarchiveZipAndFindTestNames(#zip_base ".zip"))); + +INSTANTIATE_TESTS(add) +INSTANTIATE_TESTS(avg_pool) +INSTANTIATE_TESTS(concat) +INSTANTIATE_TESTS(constant) +INSTANTIATE_TESTS(control_dep) +INSTANTIATE_TESTS(conv) +INSTANTIATE_TESTS(depthwiseconv) +INSTANTIATE_TESTS(fully_connected) +INSTANTIATE_TESTS(fused_batch_norm) +INSTANTIATE_TESTS(global_batch_norm) +INSTANTIATE_TESTS(l2norm) +INSTANTIATE_TESTS(l2_pool) +INSTANTIATE_TESTS(local_response_norm) +INSTANTIATE_TESTS(max_pool) +INSTANTIATE_TESTS(mul) +INSTANTIATE_TESTS(relu) +INSTANTIATE_TESTS(relu1) +INSTANTIATE_TESTS(relu6) +INSTANTIATE_TESTS(reshape) +INSTANTIATE_TESTS(resize_bilinear) +INSTANTIATE_TESTS(sigmoid) +INSTANTIATE_TESTS(softmax) +INSTANTIATE_TESTS(space_to_depth) + +} // namespace testing +} // namespace tflite + +int main(int argc, char** argv) { + ::testing::AddGlobalTestEnvironment(tflite::testing::zip_environment()); + + std::vector flags = {tensorflow::Flag( + "ignore_known_bugs", &FLAGS_ignore_known_bugs, + "If a particular model is affected by a known bug, the " + "corresponding test should expect the outputs to not match.")}; + bool success = tensorflow::Flags::Parse(&argc, argv, flags); + if (!success || (argc == 2 && !strcmp(argv[1], "--helpfull"))) { + fprintf(stderr, "%s", tensorflow::Flags::Usage(argv[0], flags).c_str()); + return 1; + } + + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/testing/message.cc b/tensorflow/contrib/lite/testing/message.cc new file mode 100644 index 0000000000..03fae4bb86 --- /dev/null +++ b/tensorflow/contrib/lite/testing/message.cc @@ -0,0 +1,96 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/testing/message.h" + +#include + +#include "tensorflow/contrib/lite/testing/tokenize.h" + +namespace tflite { +namespace testing { + +// A token processor that builds messages and forward calls to the current +// message object. Place a new message at the top of the stack when it start +// and remove it when it is finished. +class MessageStack : public TokenProcessor { + public: + // Start a new MessageStack with the given first_node, which will be used to + // process freestanding fields and submessages. + explicit MessageStack(Message* first_node) { + nodes_.push(first_node); + valid_ = true; + } + + void ConsumeToken(std::string* token) override { + if (!valid_) return; + Message* current_node = nodes_.top(); + if (*token == "{") { + // This is the beginning of a new message, names after the previous token. + if (previous_token_.empty()) { + valid_ = false; + return; + } + nodes_.push(current_node ? current_node->AddChild(previous_token_) + : nullptr); + previous_token_.clear(); + } else if (*token == "}") { + // A message is being completed. There should be no previous token. Note + // that the top-level message never closes, so we should always have at + // least one entry in the stack. + if (nodes_.size() == 1 || !previous_token_.empty()) { + valid_ = false; + return; + } + if (current_node) { + current_node->Finish(); + } + nodes_.pop(); + } else if (*token == ":") { + // We reached the end of the 'key' portion of a field. Store the token + // until we have the 'value' portion. + if (previous_token_.empty()) { + valid_ = false; + return; + } + } else { + if (previous_token_.empty()) { + previous_token_.swap(*token); + } else { + // This is the 'value' portion of a field. The previous token is the + // 'key'. + if (current_node) { + current_node->SetField(previous_token_, *token); + } + previous_token_.clear(); + } + } + } + + bool valid() const { return valid_; } + + private: + std::stack nodes_; + std::string previous_token_; + bool valid_; +}; + +bool Message::Read(std::istream* input, Message* message) { + MessageStack stack(message); + Tokenize(input, &stack); + return stack.valid(); +} + +} // namespace testing +} // namespace tflite diff --git a/tensorflow/contrib/lite/testing/message.h b/tensorflow/contrib/lite/testing/message.h new file mode 100644 index 0000000000..78ef7e2cbe --- /dev/null +++ b/tensorflow/contrib/lite/testing/message.h @@ -0,0 +1,82 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_MESSAGE_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_MESSAGE_H_ + +#include +#include +#include + +namespace tflite { +namespace testing { + +// A Message is a textual protobuf-like structure that looks like: +// tag { +// f : "values" +// child { +// a : 1 +// } +// } +// This class provides the framework for processing message but does not +// associate any particular behavior to fields and submessage. In order +// to properly parse a stream this class must be derived. +class Message { + public: + // Reads a stream, tokenizes it and create a new message under the given + // top-level message. Returns true if the parsing succeeded. + static bool Read(std::istream* input, Message* message); + + Message() {} + virtual ~Message() {} + + // Called when a new field is found. For example, when: + // f : "values" + // is found, it triggers: + // SetField("f", "values"); + virtual void SetField(const std::string& name, const std::string& value) {} + + // Called when a submessage is started. For example, when: + // child { + // is found, it triggers + // AddChild("child"); + // If nullptr is returned, the contents of the submessage will be ignored. + // Otherwise, the returned Message will be used to handle new fields and new + // submessages. The caller should not take ownership of the returned pointer. + virtual Message* AddChild(const std::string& name) { return nullptr; } + + // Called when a submessage is completed, that is, whenever a '}' is found. + virtual void Finish() {} + + protected: + // Takes ownership of the given pointer. Subclasses can use this method if + // they don't want to implement their own ownership semantics. + Message* Store(Message* n) { + children_.emplace_back(n); + return n; + } + + // Returns a list of all owned submessages. + const std::vector>& Children() const { + return children_; + } + + private: + std::vector> children_; +}; + +} // namespace testing +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_MESSAGE_H_ diff --git a/tensorflow/contrib/lite/testing/message_test.cc b/tensorflow/contrib/lite/testing/message_test.cc new file mode 100644 index 0000000000..fb6a49bd6f --- /dev/null +++ b/tensorflow/contrib/lite/testing/message_test.cc @@ -0,0 +1,121 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/testing/message.h" + +#include + +#include +#include + +namespace tflite { +namespace testing { +namespace { + +// A hierarchical, key-value store. +class TestMessage : public Message { + public: + TestMessage() {} + explicit TestMessage(const std::string& text_to_parse) { + std::stringstream ss(text_to_parse); + finished_ = Message::Read(&ss, this); + } + void SetField(const std::string& name, const std::string& value) override { + fields_[name] = value; + } + Message* AddChild(const std::string& name) override { + TestMessage* m = new TestMessage; + m->name_ = name; + return Store(m); + } + void Finish() override { finished_ = true; } + + int NumChildren() const { return Children().size(); } + + const TestMessage* GetChild(int i) const { + return dynamic_cast(Children()[i].get()); + } + + int NumFields() const { return fields_.size(); } + const std::string& GetField(const std::string& key) const { + return fields_.at(key); + } + + const std::string& name() const { return name_; } + bool finished() const { return finished_; } + + protected: + std::string name_; + std::map fields_; + bool finished_ = false; +}; + +TEST(MessageTest, Simple) { + TestMessage message("x{a:1 b:2} y{} z{c:3} d:4"); + ASSERT_TRUE(message.finished()); + + ASSERT_EQ(message.NumFields(), 1); + EXPECT_EQ(message.GetField("d"), "4"); + + ASSERT_EQ(message.NumChildren(), 3); + + auto* x = message.GetChild(0); + EXPECT_EQ(x->name(), "x"); + ASSERT_EQ(x->NumFields(), 2); + EXPECT_EQ(x->GetField("a"), "1"); + EXPECT_EQ(x->GetField("b"), "2"); + + auto* y = message.GetChild(1); + EXPECT_EQ(y->name(), "y"); + ASSERT_EQ(y->NumFields(), 0); + + auto* z = message.GetChild(2); + EXPECT_EQ(z->name(), "z"); + ASSERT_EQ(z->NumFields(), 1); + EXPECT_EQ(z->GetField("c"), "3"); +} + +TEST(MessageTest, Unnamed) { + TestMessage message("x{c:3} {} y{d:4}"); + ASSERT_FALSE(message.finished()); + EXPECT_EQ(message.NumChildren(), 1); +} + +TEST(MessageTest, TooManyBraces) { + TestMessage message("x{c:3} } y{d:4}"); + ASSERT_FALSE(message.finished()); + EXPECT_EQ(message.NumChildren(), 1); +} + +TEST(MessageTest, LeftoverToken) { + TestMessage message("x{c:3} z{test} y{d:4}"); + ASSERT_FALSE(message.finished()); + EXPECT_EQ(message.NumChildren(), 2); +} + +TEST(MessageTest, MissingKey) { + TestMessage message("x{c:3} z{:test} y{d:4}"); + ASSERT_FALSE(message.finished()); + EXPECT_EQ(message.NumChildren(), 2); +} + +TEST(MessageTest, MissingValue) { + TestMessage message("x{c:3} z{test:} y{d:4}"); + ASSERT_FALSE(message.finished()); + EXPECT_EQ(message.NumChildren(), 2); +} + +} // namespace +} // namespace testing +} // namespace tflite diff --git a/tensorflow/contrib/lite/testing/nnapi_example.cc b/tensorflow/contrib/lite/testing/nnapi_example.cc new file mode 100644 index 0000000000..74f6cfc3de --- /dev/null +++ b/tensorflow/contrib/lite/testing/nnapi_example.cc @@ -0,0 +1,114 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// NOTE: this is an example driver that converts a tflite model to TensorFlow. +// This is an example that will be integrated more tightly into tflite in +// the future. +// +// Usage: bazel run -c opt \ +// tensorflow/contrib/lite/nnapi:nnapi_example -- +// +#include +#include +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h" +#include "tensorflow/contrib/lite/testing/parse_testdata.h" + +// TODO(aselle): FATAL leaves resources hanging. +void FATAL(const char* format, ...) { + va_list args; + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + fflush(stderr); + exit(1); +} + +#define CHECK_TFLITE_SUCCESS(x) \ + if (x != kTfLiteOk) { \ + FATAL("Aborting since tflite returned failure."); \ + } + +void Interpret(const char* filename, const char* examples_filename, + bool use_nnapi) { + // TODO(aselle): Resize of input image should go here + // ... + // For now I am allocating all tensors. This means I am fixed size. + // So I am not using the variable size ability yet. + fprintf(stderr, "example file %s\n", examples_filename); + std::vector examples; + CHECK_TFLITE_SUCCESS( + tflite::testing::ParseExamples(examples_filename, &examples)); + + for (const tflite::testing::Example& example : examples) { + auto model = tflite::FlatBufferModel::BuildFromFile(filename); + if (!model) FATAL("Cannot read file %s\n", filename); + std::unique_ptr interpreter; + tflite::ops::builtin::BuiltinOpResolver builtins; + + CHECK_TFLITE_SUCCESS( + tflite::InterpreterBuilder(*model, builtins)(&interpreter)); + + printf("Use nnapi is set to: %d\n", use_nnapi); + interpreter->UseNNAPI(use_nnapi); + CHECK_TFLITE_SUCCESS( + tflite::testing::FeedExample(interpreter.get(), example)); + + { + TfLiteTensor* tensor = interpreter->tensor(interpreter->outputs()[0]); + if (float* data = + interpreter->typed_tensor(interpreter->outputs()[0])) { + size_t num = tensor->bytes / sizeof(float); + for (float* p = data; p < data + num; p++) { + *p = 0; + } + } + } + interpreter->Invoke(); + + CHECK_TFLITE_SUCCESS( + tflite::testing::CheckOutputs(interpreter.get(), example)); + + printf("Result:\n"); + TfLiteTensor* tensor = interpreter->tensor(interpreter->outputs()[0]); + if (float* data = + interpreter->typed_tensor(interpreter->outputs()[0])) { + size_t num = tensor->bytes / sizeof(float); + for (float* p = data; p < data + num; p++) { + printf(" %f", *p); + } + } + } +} + +int main(int argc, char* argv[]) { + bool use_nnapi = true; + if (argc == 4) { + use_nnapi = strcmp(argv[3], "1") == 0 ? true : false; + } + if (argc < 3) { + fprintf(stderr, + "Compiled " __DATE__ __TIME__ + "\n" + "Usage!!!: %s " + "{ use nn api i.e. 0,1}\n", + argv[0]); + return 1; + } + Interpret(argv[1], argv[2], use_nnapi); + return 0; +} diff --git a/tensorflow/contrib/lite/testing/parse_testdata.cc b/tensorflow/contrib/lite/testing/parse_testdata.cc new file mode 100644 index 0000000000..2b67052cad --- /dev/null +++ b/tensorflow/contrib/lite/testing/parse_testdata.cc @@ -0,0 +1,335 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Parses tflite example input data. +// Format is ASCII +// TODO(aselle): Switch to protobuf, but the android team requested a simple +// ASCII file. +#include "tensorflow/contrib/lite/testing/parse_testdata.h" + +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/error_reporter.h" +#include "tensorflow/contrib/lite/testing/message.h" +#include "tensorflow/contrib/lite/testing/split.h" + +namespace tflite { +namespace testing { +namespace { + +// Fatal error if parse error occurs +#define PARSE_CHECK_EQ(filename, current_line, x, y) \ + if ((x) != (y)) { \ + fprintf(stderr, "Parse Error @ %s:%d\n File %s\n Line %d, %s != %s\n", \ + __FILE__, __LINE__, filename, current_line + 1, #x, #y); \ + return kTfLiteError; \ + } + +// Breakup a "," delimited line into a std::vector. +// This is extremely inefficient, and just used for testing code. +// TODO(aselle): replace with absl when we use it. +std::vector ParseLine(const std::string& line) { + size_t pos = 0; + std::vector elements; + while (true) { + size_t end = line.find(',', pos); + if (end == std::string::npos) { + elements.push_back(line.substr(pos)); + break; + } else { + elements.push_back(line.substr(pos, end - pos)); + } + pos = end + 1; + } + return elements; +} + +} // namespace + +// Given a `filename`, produce a vector of Examples corresopnding +// to test cases that can be applied to a tflite model. +TfLiteStatus ParseExamples(const char* filename, + std::vector* examples) { + std::ifstream fp(filename); + if (!fp.good()) { + fprintf(stderr, "Could not read '%s'\n", filename); + return kTfLiteError; + } + std::string str((std::istreambuf_iterator(fp)), + std::istreambuf_iterator()); + size_t pos = 0; + + // \n and , delimit parse a file. + std::vector> csv; + while (true) { + size_t end = str.find('\n', pos); + + if (end == std::string::npos) { + csv.emplace_back(ParseLine(str.substr(pos))); + break; + } + csv.emplace_back(ParseLine(str.substr(pos, end - pos))); + pos = end + 1; + } + + int current_line = 0; + PARSE_CHECK_EQ(filename, current_line, csv[0][0], "test_cases"); + int example_count = std::stoi(csv[0][1]); + current_line++; + + auto parse_tensor = [&filename, ¤t_line, + &csv](FloatTensor* tensor_ptr) { + PARSE_CHECK_EQ(filename, current_line, csv[current_line][0], "dtype"); + current_line++; + // parse shape + PARSE_CHECK_EQ(filename, current_line, csv[current_line][0], "shape"); + size_t elements = 1; + FloatTensor& tensor = *tensor_ptr; + + for (size_t i = 1; i < csv[current_line].size(); i++) { + const auto& shape_part_to_parse = csv[current_line][i]; + if (shape_part_to_parse.empty()) { + // Case of a 0-dimensional shape + break; + } + int shape_part = std::stoi(shape_part_to_parse); + elements *= shape_part; + tensor.shape.push_back(shape_part); + } + current_line++; + // parse data + PARSE_CHECK_EQ(filename, current_line, csv[current_line].size() - 1, + elements); + for (size_t i = 1; i < csv[current_line].size(); i++) { + tensor.flat_data.push_back(std::stof(csv[current_line][i])); + } + current_line++; + + return kTfLiteOk; + }; + + for (int example_idx = 0; example_idx < example_count; example_idx++) { + Example example; + PARSE_CHECK_EQ(filename, current_line, csv[current_line][0], "inputs"); + int inputs = std::stoi(csv[current_line][1]); + current_line++; + // parse dtype + for (int input_index = 0; input_index < inputs; input_index++) { + example.inputs.push_back(FloatTensor()); + TF_LITE_ENSURE_STATUS(parse_tensor(&example.inputs.back())); + } + + PARSE_CHECK_EQ(filename, current_line, csv[current_line][0], "outputs"); + int outputs = std::stoi(csv[current_line][1]); + current_line++; + for (int input_index = 0; input_index < outputs; input_index++) { + example.outputs.push_back(FloatTensor()); + TF_LITE_ENSURE_STATUS(parse_tensor(&example.outputs.back())); + } + examples->emplace_back(example); + } + return kTfLiteOk; +} + +TfLiteStatus FeedExample(tflite::Interpreter* interpreter, + const Example& example) { + // Resize inputs to match example & allocate. + for (size_t i = 0; i < interpreter->inputs().size(); i++) { + int input_index = interpreter->inputs()[i]; + + TF_LITE_ENSURE_STATUS( + interpreter->ResizeInputTensor(input_index, example.inputs[i].shape)); + } + TF_LITE_ENSURE_STATUS(interpreter->AllocateTensors()); + // Copy data into tensors. + for (size_t i = 0; i < interpreter->inputs().size(); i++) { + int input_index = interpreter->inputs()[i]; + if (float* data = interpreter->typed_tensor(input_index)) { + for (size_t idx = 0; idx < example.inputs[i].flat_data.size(); idx++) { + data[idx] = example.inputs[i].flat_data[idx]; + } + } else if (int32_t* data = + interpreter->typed_tensor(input_index)) { + for (size_t idx = 0; idx < example.inputs[i].flat_data.size(); idx++) { + data[idx] = example.inputs[i].flat_data[idx]; + } + } else { + fprintf(stderr, "input[%zu] was not float or int data\n", i); + return kTfLiteError; + } + } + return kTfLiteOk; +} + +TfLiteStatus CheckOutputs(tflite::Interpreter* interpreter, + const Example& example) { + constexpr double kRelativeThreshold = 1e-2f; + constexpr double kAbsoluteThreshold = 1e-4f; + + ErrorReporter* context = DefaultErrorReporter(); + int model_outputs = interpreter->outputs().size(); + TF_LITE_ENSURE_EQ(context, model_outputs, example.outputs.size()); + for (size_t i = 0; i < interpreter->outputs().size(); i++) { + int output_index = interpreter->outputs()[i]; + if (const float* data = interpreter->typed_tensor(output_index)) { + for (size_t idx = 0; idx < example.outputs[i].flat_data.size(); idx++) { + float computed = data[idx]; + float reference = example.outputs[0].flat_data[idx]; + float diff = std::abs(computed - reference); + bool error_is_large = false; + // For very small numbers, try absolute error, otherwise go with + // relative. + if (std::abs(reference) < kRelativeThreshold) { + error_is_large = (diff > kAbsoluteThreshold); + } else { + error_is_large = (diff > kRelativeThreshold * std::abs(reference)); + } + if (error_is_large) { + fprintf(stdout, "output[%zu][%zu] did not match %f vs reference %f\n", + i, idx, data[idx], reference); + return kTfLiteError; + } + } + fprintf(stderr, "\n"); + } else if (const int32_t* data = + interpreter->typed_tensor(output_index)) { + for (size_t idx = 0; idx < example.outputs[i].flat_data.size(); idx++) { + int32_t computed = data[idx]; + int32_t reference = example.outputs[0].flat_data[idx]; + if (std::abs(computed - reference) > 0) { + fprintf(stderr, "output[%zu][%zu] did not match %d vs reference %f\n", + i, idx, data[idx], example.outputs[0].flat_data[idx]); + return kTfLiteError; + } + } + fprintf(stderr, "\n"); + } else { + fprintf(stderr, "output[%zu] was not float or int data\n", i); + return kTfLiteError; + } + } + return kTfLiteOk; +} + +// Process an 'invoke' message, triggering execution of the test runner, as +// well as verification of outputs. An 'invoke' message looks like: +// invoke { +// id: xyz +// input: 1,2,1,1,1,2,3,4 +// ouput: 4,5,6 +// } +class Invoke : public Message { + public: + explicit Invoke(TestRunner* test_runner) : test_runner_(test_runner) { + expected_inputs_ = test_runner->GetInputs(); + expected_outputs_ = test_runner->GetOutputs(); + } + + void SetField(const std::string& name, const std::string& value) override { + if (name == "id") { + test_runner_->SetInvocationId(value); + } else if (name == "input") { + if (expected_inputs_.empty()) { + return test_runner_->Invalidate("Too many inputs"); + } + test_runner_->SetInput(*expected_inputs_.begin(), value); + expected_inputs_.erase(expected_inputs_.begin()); + } else if (name == "output") { + if (expected_outputs_.empty()) { + return test_runner_->Invalidate("Too many outputs"); + } + test_runner_->SetExpectation(*expected_outputs_.begin(), value); + expected_outputs_.erase(expected_outputs_.begin()); + } + } + void Finish() override { + test_runner_->Invoke(); + test_runner_->CheckResults(); + } + + private: + std::vector expected_inputs_; + std::vector expected_outputs_; + + TestRunner* test_runner_; +}; + +// Process an 'reshape' message, triggering resizing of the input tensors via +// the test runner. A 'reshape' message looks like: +// reshape { +// input: 1,2,1,1,1,2,3,4 +// } +class Reshape : public Message { + public: + explicit Reshape(TestRunner* test_runner) : test_runner_(test_runner) { + expected_inputs_ = test_runner->GetInputs(); + } + + void SetField(const std::string& name, const std::string& value) override { + if (name == "input") { + if (expected_inputs_.empty()) { + return test_runner_->Invalidate("Too many inputs to reshape"); + } + test_runner_->ReshapeTensor(*expected_inputs_.begin(), value); + expected_inputs_.erase(expected_inputs_.begin()); + } + } + + private: + std::vector expected_inputs_; + TestRunner* test_runner_; +}; + +// This is the top-level message in a test file. +class TestData : public Message { + public: + explicit TestData(TestRunner* test_runner) : test_runner_(test_runner) {} + + void SetField(const std::string& name, const std::string& value) override { + if (name == "load_model") { + test_runner_->LoadModel(value); + } else if (name == "init_state") { + test_runner_->AllocateTensors(); + for (int id : Split(value, ",")) { + test_runner_->ResetTensor(id); + } + } + } + Message* AddChild(const std::string& s) override { + if (s == "invoke") { + test_runner_->AllocateTensors(); + return Store(new Invoke(test_runner_)); + } else if (s == "reshape") { + return Store(new Reshape(test_runner_)); + } + return nullptr; + } + + private: + TestRunner* test_runner_; +}; + +bool ParseAndRunTests(std::istream* input, TestRunner* test_runner) { + TestData test_data(test_runner); + Message::Read(input, &test_data); + return test_runner->IsValid() && test_runner->GetOverallSuccess(); +} + +} // namespace testing +} // namespace tflite diff --git a/tensorflow/contrib/lite/testing/parse_testdata.h b/tensorflow/contrib/lite/testing/parse_testdata.h new file mode 100644 index 0000000000..90839fe245 --- /dev/null +++ b/tensorflow/contrib/lite/testing/parse_testdata.h @@ -0,0 +1,74 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_NNAPI_PARSE_TESTDATA_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_NNAPI_PARSE_TESTDATA_H_ + +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/testing/test_runner.h" + +namespace tflite { +namespace testing { + +// Shape and data for a float tensor +struct FloatTensor { + std::vector shape; + std::vector flat_data; +}; + +// A prescribed input, output example +struct Example { + std::vector inputs; + std::vector outputs; +}; + +// Parses an example input and output file (used for unit tests) +TfLiteStatus ParseExamples(const char* filename, + std::vector* examples); + +// Inputs Tensors into a TensorFlow lite interpreter. Note, this will run +// interpreter.AllocateTensors(); +TfLiteStatus FeedExample(tflite::Interpreter* interpreter, const Example&); + +// Check outputs against (already) evaluated result. +TfLiteStatus CheckOutputs(tflite::Interpreter* interpreter, const Example&); + +// Parses a test description and feeds the given test runner with data. +// The input format is similar to an ASCII proto: +// // Loads model 'add.bin' from the TestRunner's model directory. +// load_model: "add.bin" +// // Changes the shape of inputs, provided in the same order they appear +// // in the model. +// reshape { +// input: "1,224,224,3" +// input: "1,3,4,1" +// } +// // Fills the given persistent tensors with zeros. +// init_state: 0,1,2,3 +// // Invokes the interpreter with the given input and checks that it +// // produces the expected output. Inputs and outputs should be specified in +// // the order they appear in the model. +// invoke { +// input: "1,2,3,4,56" +// input: "0.1,0.2,0.3,4.3,56.4" +// output: "12,3,4,545,3" +// output: "0.01,0.02" +// } +bool ParseAndRunTests(std::istream* input, TestRunner* test_runner); + +} // namespace testing +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_NNAPI_PARSE_TESTDATA_H_ diff --git a/tensorflow/contrib/lite/testing/split.cc b/tensorflow/contrib/lite/testing/split.cc new file mode 100644 index 0000000000..5836f4ff04 --- /dev/null +++ b/tensorflow/contrib/lite/testing/split.cc @@ -0,0 +1,42 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/testing/split.h" + +namespace tflite { +namespace testing { + +std::vector> SplitToPos(const string& s, + const string& delimiter) { + std::vector> fields; + if (delimiter.length() == 0) { + fields.emplace_back(0, s.length()); + return fields; + } + size_t pos = 0; + size_t start = 0; + while ((pos = s.find(delimiter, start)) != string::npos) { + if (pos != start) { + fields.emplace_back(start, pos); + } + start = pos + delimiter.length(); + } + if (start != s.length()) { + fields.emplace_back(start, s.length()); + } + return fields; +} + +} // namespace testing +} // namespace tflite diff --git a/tensorflow/contrib/lite/testing/split.h b/tensorflow/contrib/lite/testing/split.h new file mode 100644 index 0000000000..24071442e8 --- /dev/null +++ b/tensorflow/contrib/lite/testing/split.h @@ -0,0 +1,77 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_SPLIT_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_SPLIT_H_ + +#include +#include +#include +#include +#include "tensorflow/contrib/lite/string.h" + +namespace tflite { +namespace testing { + +// Splits a string based on the given delimiter string. Each pair in the +// returned vector has the start and past-the-end positions for each of the +// parts of the original string. Empty fields are not represented in the +// output. +std::vector> SplitToPos(const string& s, + const string& delimiter); + +// Splits the given string and converts each part to the given T. +template +std::vector Split(const string& s, const string& delimiter); + +template <> +inline std::vector Split(const string& s, const string& delimiter) { + std::vector fields; + for (const auto& p : SplitToPos(s, delimiter)) { + fields.push_back(s.substr(p.first, p.second - p.first)); + } + return fields; +} + +template <> +inline std::vector Split(const string& s, const string& delimiter) { + std::vector fields; + for (const auto& p : SplitToPos(s, delimiter)) { + fields.push_back(strtol(s.data() + p.first, nullptr, 10)); + } + return fields; +} + +template <> +inline std::vector Split(const string& s, const string& delimiter) { + std::vector fields; + for (const auto& p : SplitToPos(s, delimiter)) { + fields.push_back(strtod(s.data() + p.first, nullptr)); + } + return fields; +} + +template <> +inline std::vector Split(const string& s, const string& delimiter) { + std::vector fields; + for (const auto& p : SplitToPos(s, delimiter)) { + fields.push_back(strtol(s.data() + p.first, nullptr, 10)); + } + return fields; +} + +} // namespace testing +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_SPLIT_H_ diff --git a/tensorflow/contrib/lite/testing/split_test.cc b/tensorflow/contrib/lite/testing/split_test.cc new file mode 100644 index 0000000000..3d1e25d9c7 --- /dev/null +++ b/tensorflow/contrib/lite/testing/split_test.cc @@ -0,0 +1,57 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/testing/split.h" + +#include +#include + +namespace tflite { +namespace testing { +namespace { + +using ::testing::ElementsAre; +using ::testing::Pair; + +TEST(SplitTest, SplitToPos) { + EXPECT_THAT(SplitToPos("test;:1-2-3 ;: test", ";:"), + ElementsAre(Pair(0, 4), Pair(6, 12), Pair(14, 19))); + EXPECT_THAT(SplitToPos("test;:1-2-3 ;: test", ":"), + ElementsAre(Pair(0, 5), Pair(6, 13), Pair(14, 19))); + EXPECT_THAT(SplitToPos("test", ":"), ElementsAre(Pair(0, 4))); + EXPECT_THAT(SplitToPos("test ", ":"), ElementsAre(Pair(0, 5))); + EXPECT_THAT(SplitToPos("", ":"), ElementsAre()); + EXPECT_THAT(SplitToPos("test ", ""), ElementsAre(Pair(0, 5))); + EXPECT_THAT(SplitToPos("::::", ":"), ElementsAre()); +} + +TEST(SplitTest, SplitString) { + EXPECT_THAT(Split("A;B;C", ";"), ElementsAre("A", "B", "C")); +} + +TEST(SplitTest, SplitFloat) { + EXPECT_THAT(Split("1.0 B 1e-5", " "), ElementsAre(1.0, 0.0, 1e-5)); +} + +TEST(SplitTest, SplitInt) { + EXPECT_THAT(Split("1,-1,258", ","), ElementsAre(1, -1, 258)); +} + +TEST(SplitTest, SplitUint8) { + EXPECT_THAT(Split("1,-1,258", ","), ElementsAre(1, 255, 2)); +} + +} // namespace +} // namespace testing +} // namespace tflite diff --git a/tensorflow/contrib/lite/testing/test_runner.h b/tensorflow/contrib/lite/testing/test_runner.h new file mode 100644 index 0000000000..04ee4d9f7d --- /dev/null +++ b/tensorflow/contrib/lite/testing/test_runner.h @@ -0,0 +1,124 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_TEST_RUNNER_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_TEST_RUNNER_H_ + +#include +#include +#include +#include "tensorflow/contrib/lite/string.h" + +namespace tflite { +namespace testing { + +// This is the base class for processing test data. Each one of the virtual +// methods must be implemented to forward the data to the appropriate executor +// (e.g. TF Lite's interpreter, or the NNAPI). +class TestRunner { + public: + TestRunner() {} + virtual ~TestRunner() {} + + // Load the given model, as a path relative to SetModelBaseDir(). + virtual void LoadModel(const string& bin_file_path) = 0; + + // Return the list of input tensors in the loaded model. + virtual const std::vector& GetInputs() = 0; + + // Return the list of output tensors in the loaded model. + virtual const std::vector& GetOutputs() = 0; + + // Prepare for a run by resize the given tensor. The given 'id' is + // guaranteed to be one of the ids returned by GetInputs(). + virtual void ReshapeTensor(int id, const string& csv_values) = 0; + + // Reserve memory for all tensors. + virtual void AllocateTensors() = 0; + + // Set the given tensor to some initial state, usually zero. This is + // used to reset persistent buffers in a model. + virtual void ResetTensor(int id) = 0; + + // Define the contents of the given input tensor. The given 'id' is + // guaranteed to be one of the ids returned by GetInputs(). + virtual void SetInput(int id, const string& csv_values) = 0; + + // Define what should be expected for an output tensor after Invoke() runs. + // The given 'id' is guaranteed to be one of the ids returned by + // GetOutputs(). + virtual void SetExpectation(int id, const string& csv_values) = 0; + + // Run the model. + virtual void Invoke() = 0; + + // Verify that the contents of all ouputs conform to the existing + // expectations. Return true if there are no expectations or they are all + // satisfied. + virtual bool CheckResults() = 0; + + // Set the base path for loading models. + void SetModelBaseDir(const string& path) { + model_base_dir_ = path; + if (path[path.length() - 1] != '/') { + model_base_dir_ += "/"; + } + } + + // Return the full path of a model. + string GetFullPath(const string& path) { return model_base_dir_ + path; } + + // Give an id to the next invocation to make error reporting more meaningful. + void SetInvocationId(const string& id) { invocation_id_ = id; } + const string& GetInvocationId() const { return invocation_id_; } + + // Invalidate the test runner, preventing it from executing any further. + void Invalidate(const string& error_message) { + error_message_ = error_message; + } + bool IsValid() const { return error_message_.empty(); } + const string& GetErrorMessage() const { return error_message_; } + + // Handle the overall success of this test runner. This will be true if all + // invocations were successful. + void SetOverallSuccess(bool value) { overall_success_ = value; } + bool GetOverallSuccess() const { return overall_success_; } + + protected: + // A helper to check of the given number of values is consistent with the + // number of bytes in a tensor of type T. When incompatibles sizes are found, + // the test runner is invalidated and false is returned. + template + bool CheckSizes(size_t tensor_bytes, size_t num_values) { + size_t num_tensor_elements = tensor_bytes / sizeof(T); + if (num_tensor_elements != num_values) { + Invalidate("Expected '" + std::to_string(num_tensor_elements) + + "' elements for a tensor, but only got '" + + std::to_string(num_values) + "'"); + return false; + } + return true; + } + + private: + string model_base_dir_; + string invocation_id_; + bool overall_success_ = true; + + string error_message_; +}; + +} // namespace testing +} // namespace tflite +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_TEST_RUNNER_H_ diff --git a/tensorflow/contrib/lite/testing/test_runner_test.cc b/tensorflow/contrib/lite/testing/test_runner_test.cc new file mode 100644 index 0000000000..f712a5347a --- /dev/null +++ b/tensorflow/contrib/lite/testing/test_runner_test.cc @@ -0,0 +1,84 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/testing/test_runner.h" + +#include +#include + +namespace tflite { +namespace testing { +namespace { + +class ConcreteTestRunner : public TestRunner { + public: + void LoadModel(const string& bin_file_path) override {} + const std::vector& GetInputs() override { return ids_; } + const std::vector& GetOutputs() override { return ids_; } + void ReshapeTensor(int id, const string& csv_values) override {} + void AllocateTensors() override {} + void ResetTensor(int id) override {} + void SetInput(int id, const string& csv_values) override {} + void SetExpectation(int id, const string& csv_values) override {} + void Invoke() override {} + bool CheckResults() override { return true; } + bool CheckFloatSizes(size_t bytes, size_t values) { + return CheckSizes(bytes, values); + } + + private: + std::vector ids_; +}; + +TEST(TestRunner, ModelPath) { + ConcreteTestRunner runner; + EXPECT_EQ(runner.GetFullPath("test.bin"), "test.bin"); + runner.SetModelBaseDir("/tmp"); + EXPECT_EQ(runner.GetFullPath("test.bin"), "/tmp/test.bin"); +} + +TEST(TestRunner, InvocationId) { + ConcreteTestRunner runner; + EXPECT_EQ(runner.GetInvocationId(), ""); + runner.SetInvocationId("X"); + EXPECT_EQ(runner.GetInvocationId(), "X"); +} + +TEST(TestRunner, Invalidation) { + ConcreteTestRunner runner; + EXPECT_TRUE(runner.IsValid()); + EXPECT_EQ(runner.GetErrorMessage(), ""); + runner.Invalidate("Some Error"); + EXPECT_FALSE(runner.IsValid()); + EXPECT_EQ(runner.GetErrorMessage(), "Some Error"); +} + +TEST(TestRunner, OverallSuccess) { + ConcreteTestRunner runner; + EXPECT_TRUE(runner.GetOverallSuccess()); + runner.SetOverallSuccess(false); + EXPECT_FALSE(runner.GetOverallSuccess()); +} + +TEST(TestRunner, CheckSizes) { + ConcreteTestRunner runner; + EXPECT_TRUE(runner.CheckFloatSizes(16, 4)); + EXPECT_FALSE(runner.CheckFloatSizes(16, 2)); + EXPECT_EQ(runner.GetErrorMessage(), + "Expected '4' elements for a tensor, but only got '2'"); +} + +} // namespace +} // namespace testing +} // namespace tflite diff --git a/tensorflow/contrib/lite/testing/tflite_driver.cc b/tensorflow/contrib/lite/testing/tflite_driver.cc new file mode 100644 index 0000000000..cf9df2ec26 --- /dev/null +++ b/tensorflow/contrib/lite/testing/tflite_driver.cc @@ -0,0 +1,208 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/testing/tflite_driver.h" + +#include + +#include "tensorflow/contrib/lite/testing/split.h" + +namespace tflite { +namespace testing { + +namespace { + +// Returns the value in the given position in a tensor. +template +T Value(const TfLitePtrUnion& data, int index); +template <> +float Value(const TfLitePtrUnion& data, int index) { + return data.f[index]; +} +template <> +uint8_t Value(const TfLitePtrUnion& data, int index) { + return data.uint8[index]; +} + +template +void SetTensorData(const std::vector& values, TfLitePtrUnion* data) { + T* input_ptr = reinterpret_cast(data->raw); + for (const T& v : values) { + *input_ptr = v; + ++input_ptr; + } +} + +} // namespace + +class TfLiteDriver::Expectation { + public: + Expectation() { data_.raw = nullptr; } + ~Expectation() { delete[] data_.raw; } + template + void SetData(const string& csv_values) { + const auto& values = testing::Split(csv_values, ","); + data_.raw = new char[values.size() * sizeof(T)]; + SetTensorData(values, &data_); + } + + bool Check(bool verbose, const TfLiteTensor& tensor) { + switch (tensor.type) { + case kTfLiteFloat32: + return TypedCheck(verbose, tensor); + case kTfLiteUInt8: + return TypedCheck(verbose, tensor); + default: + return false; + } + } + + private: + template + bool TypedCheck(bool verbose, const TfLiteTensor& tensor) { + int tensor_size = tensor.bytes / sizeof(T); + + bool good_output = true; + for (int i = 0; i < tensor_size; ++i) { + if (std::abs(Value(data_, i) - Value(tensor.data, i)) > 1e-5) { + good_output = false; + if (verbose) { + std::cerr << " index " << i << ": " << Value(data_, i) + << " != " << Value(tensor.data, i) << std::endl; + } + } + } + return good_output; + } + + TfLitePtrUnion data_; +}; + +TfLiteDriver::TfLiteDriver(bool use_nnapi) : use_nnapi_(use_nnapi) {} +TfLiteDriver::~TfLiteDriver() {} + +void TfLiteDriver::AllocateTensors() { + if (must_allocate_tensors_) { + if (interpreter_->AllocateTensors() != kTfLiteOk) { + std::cerr << "Failed to allocate tensors" << std::endl; + abort(); + } + must_allocate_tensors_ = false; + } +} + +void TfLiteDriver::LoadModel(const string& bin_file_path) { + if (!IsValid()) return; + std::cout << std::endl << "Loading model: " << bin_file_path << std::endl; + + model_ = FlatBufferModel::BuildFromFile(GetFullPath(bin_file_path).c_str()); + if (!model_) { + Invalidate("Failed to mmap model " + bin_file_path); + return; + } + ops::builtin::BuiltinOpResolver builtins; + InterpreterBuilder(*model_, builtins)(&interpreter_); + if (!interpreter_) { + Invalidate("Failed build interpreter"); + return; + } + + must_allocate_tensors_ = true; +} + +void TfLiteDriver::ResetTensor(int id) { + if (!IsValid()) return; + auto* tensor = interpreter_->tensor(id); + memset(tensor->data.raw, 0, tensor->bytes); +} + +void TfLiteDriver::ReshapeTensor(int id, const string& csv_values) { + if (!IsValid()) return; + if (interpreter_->ResizeInputTensor( + id, testing::Split(csv_values, ",")) != kTfLiteOk) { + Invalidate("Failed to resize input tensor " + std::to_string(id)); + return; + } + must_allocate_tensors_ = true; +} + +void TfLiteDriver::SetInput(int id, const string& csv_values) { + if (!IsValid()) return; + auto* tensor = interpreter_->tensor(id); + switch (tensor->type) { + case kTfLiteFloat32: { + const auto& values = testing::Split(csv_values, ","); + if (!CheckSizes(tensor->bytes, values.size())) return; + SetTensorData(values, &tensor->data); + break; + } + case kTfLiteUInt8: { + const auto& values = testing::Split(csv_values, ","); + if (!CheckSizes(tensor->bytes, values.size())) return; + SetTensorData(values, &tensor->data); + break; + } + default: + Invalidate("Unsupported tensor data type"); + return; + } +} + +void TfLiteDriver::SetExpectation(int id, const string& csv_values) { + if (!IsValid()) return; + auto* tensor = interpreter_->tensor(id); + expected_output_[id].reset(new Expectation); + switch (tensor->type) { + case kTfLiteFloat32: + expected_output_[id]->SetData(csv_values); + break; + case kTfLiteUInt8: + expected_output_[id]->SetData(csv_values); + break; + default: + Invalidate("Unsupported tensor data type"); + return; + } +} + +void TfLiteDriver::Invoke() { + if (!IsValid()) return; + if (interpreter_->Invoke() != kTfLiteOk) { + Invalidate("Failed to invoke interpreter"); + } +} + +bool TfLiteDriver::CheckResults() { + if (!IsValid()) return false; + bool success = true; + for (const auto& p : expected_output_) { + int id = p.first; + auto* tensor = interpreter_->tensor(id); + if (!p.second->Check(/*verbose=*/false, *tensor)) { + // Do not invalidate anything here. Instead, simply output the + // differences and return false. Invalidating would prevent all + // subsequent invocations from running.. + std::cerr << "There were errors in invocation '" << GetInvocationId() + << "', output tensor '" << id << "':" << std::endl; + p.second->Check(/*verbose=*/true, *tensor); + success = false; + SetOverallSuccess(false); + } + } + expected_output_.clear(); + return success; +} + +} // namespace testing +} // namespace tflite diff --git a/tensorflow/contrib/lite/testing/tflite_driver.h b/tensorflow/contrib/lite/testing/tflite_driver.h new file mode 100644 index 0000000000..4440d4285e --- /dev/null +++ b/tensorflow/contrib/lite/testing/tflite_driver.h @@ -0,0 +1,62 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_TFLITE_DRIVER_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_TFLITE_DRIVER_H_ + +#include + +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/testing/test_runner.h" + +namespace tflite { +namespace testing { + +// A test runner that feeds inputs into TF Lite and verifies its outputs. +class TfLiteDriver : public TestRunner { + public: + explicit TfLiteDriver(bool use_nnapi); + ~TfLiteDriver() override; + + void LoadModel(const string& bin_file_path) override; + const std::vector& GetInputs() override { + return interpreter_->inputs(); + } + const std::vector& GetOutputs() override { + return interpreter_->outputs(); + } + void ReshapeTensor(int id, const string& csv_values) override; + void AllocateTensors() override; + void ResetTensor(int id) override; + void SetInput(int id, const string& csv_values) override; + void SetExpectation(int id, const string& csv_values) override; + void Invoke() override; + bool CheckResults() override; + + private: + class Expectation; + + bool use_nnapi_ = false; + std::unique_ptr model_; + std::unique_ptr interpreter_; + std::map> expected_output_; + bool must_allocate_tensors_ = true; +}; + +} // namespace testing +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_TFLITE_DRIVER_H_ diff --git a/tensorflow/contrib/lite/testing/tflite_driver_test.cc b/tensorflow/contrib/lite/testing/tflite_driver_test.cc new file mode 100644 index 0000000000..79e8a86972 --- /dev/null +++ b/tensorflow/contrib/lite/testing/tflite_driver_test.cc @@ -0,0 +1,61 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/testing/tflite_driver.h" + +#include +#include + +namespace tflite { +namespace testing { +namespace { + +using ::testing::ElementsAre; + +TEST(TfliteDriverTest, SimpleTest) { + std::unique_ptr runner(new TfLiteDriver(/*use_nnapi=*/false)); + + runner->SetModelBaseDir("third_party/tensorflow/contrib/lite"); + runner->LoadModel("testdata/multi_add.bin"); + ASSERT_TRUE(runner->IsValid()); + + ASSERT_THAT(runner->GetInputs(), ElementsAre(0, 1, 2, 3)); + ASSERT_THAT(runner->GetOutputs(), ElementsAre(5, 6)); + + for (int i : {0, 1, 2, 3}) { + runner->ReshapeTensor(i, "1,2,2,1"); + } + ASSERT_TRUE(runner->IsValid()); + + runner->AllocateTensors(); + + runner->SetInput(0, "0.1,0.2,0.3,0.4"); + runner->SetInput(1, "0.001,0.002,0.003,0.004"); + runner->SetInput(2, "0.001,0.002,0.003,0.004"); + runner->SetInput(3, "0.01,0.02,0.03,0.04"); + + runner->ResetTensor(2); + + runner->SetExpectation(5, "0.101,0.202,0.303,0.404"); + runner->SetExpectation(6, "0.011,0.022,0.033,0.044"); + + runner->Invoke(); + ASSERT_TRUE(runner->IsValid()); + + ASSERT_TRUE(runner->CheckResults()); +} + +} // namespace +} // namespace testing +} // namespace tflite diff --git a/tensorflow/contrib/lite/testing/tokenize.cc b/tensorflow/contrib/lite/testing/tokenize.cc new file mode 100644 index 0000000000..2e84ea475c --- /dev/null +++ b/tensorflow/contrib/lite/testing/tokenize.cc @@ -0,0 +1,95 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/testing/tokenize.h" +#include +#include +#include "tensorflow/contrib/lite/string.h" + +namespace tflite { +namespace testing { + +void Tokenize(std::istream* input, TokenProcessor* processor) { + enum State { kBuildQuotedToken, kBuildToken, kIdle }; + + std::string current_token; + State state = kIdle; + auto start_token = [&](char c) { + state = kBuildToken; + current_token.clear(); + current_token = c; + }; + auto issue_token = [&]() { + state = kIdle; + processor->ConsumeToken(¤t_token); + current_token.clear(); + }; + auto start_quoted_token = [&]() { + state = kBuildQuotedToken; + current_token.clear(); + }; + auto issue_quoted_token = [&]() { + state = kIdle; + processor->ConsumeToken(¤t_token); + current_token.clear(); + }; + auto issue_delim = [&](char d) { + current_token = string(1, d); + processor->ConsumeToken(¤t_token); + current_token.clear(); + }; + auto is_delim = [](char c) { return c == '{' || c == '}' || c == ':'; }; + auto is_quote = [](char c) { return c == '"'; }; + + for (auto it = std::istreambuf_iterator(*input); + it != std::istreambuf_iterator(); ++it) { + switch (state) { + case kIdle: + if (is_delim(*it)) { + issue_delim(*it); + } else if (is_quote(*it)) { + start_quoted_token(); + } else if (!isspace(*it)) { + start_token(*it); + } + break; + case kBuildToken: + if (is_delim(*it)) { + issue_token(); + issue_delim(*it); + } else if (is_quote(*it)) { + issue_token(); + start_quoted_token(); + } else if (isspace(*it)) { + issue_token(); + } else { + current_token += *it; + } + break; + case kBuildQuotedToken: + if (is_quote(*it)) { + issue_quoted_token(); + } else { + current_token += *it; + } + break; + } + } + if (state != kIdle) { + issue_token(); + } +} + +} // namespace testing +} // namespace tflite diff --git a/tensorflow/contrib/lite/testing/tokenize.h b/tensorflow/contrib/lite/testing/tokenize.h new file mode 100644 index 0000000000..daccf0e84a --- /dev/null +++ b/tensorflow/contrib/lite/testing/tokenize.h @@ -0,0 +1,42 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_TOKENIZER_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_TOKENIZER_H_ + +#include +#include + +namespace tflite { +namespace testing { + +// Process tokens coming from Tokenize(). +class TokenProcessor { + public: + virtual ~TokenProcessor() {} + // Process a single token. The token won't be reused, so it is OK to call + // token.swap(). + virtual void ConsumeToken(std::string* token) = 0; +}; + +// Tokenize a stream on whitespaces, colons and curly braces. Whitespaces are +// removed from the tokens and double-quotes can be used to avoid that. Note +// that there is no way to escape double-quotes, so there's no way to have a +// double-quote inside a token. +void Tokenize(std::istream* input, TokenProcessor* processor); + +} // namespace testing +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_TOKENIZER_H_ diff --git a/tensorflow/contrib/lite/testing/tokenize_test.cc b/tensorflow/contrib/lite/testing/tokenize_test.cc new file mode 100644 index 0000000000..80f44aacca --- /dev/null +++ b/tensorflow/contrib/lite/testing/tokenize_test.cc @@ -0,0 +1,105 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/testing/tokenize.h" + +#include +#include + +namespace tflite { +namespace testing { +namespace { + +using ::testing::ElementsAre; +using ::testing::ElementsAreArray; + +class TokenCollector : public TokenProcessor { + public: + void ConsumeToken(std::string* token) override { tokens_.push_back(*token); } + const std::vector& Tokens() { return tokens_; } + + private: + std::vector tokens_; +}; + +std::vector TokenizeString(const std::string& s) { + std::stringstream ss(s); + TokenCollector collector; + Tokenize(&ss, &collector); + return collector.Tokens(); +} + +TEST(TokenizeTest, TokenDetection) { + EXPECT_THAT(TokenizeString("x :1"), ElementsAre("x", ":", "1")); + EXPECT_THAT(TokenizeString("x:1"), ElementsAre("x", ":", "1")); + EXPECT_THAT(TokenizeString("x {1"), ElementsAre("x", "{", "1")); + EXPECT_THAT(TokenizeString("x{1"), ElementsAre("x", "{", "1")); + EXPECT_THAT(TokenizeString("x }1"), ElementsAre("x", "}", "1")); + EXPECT_THAT(TokenizeString("x}1"), ElementsAre("x", "}", "1")); + EXPECT_THAT(TokenizeString("x \"1"), ElementsAre("x", "1")); + EXPECT_THAT(TokenizeString("x\"1"), ElementsAre("x", "1")); +} + +TEST(TokenizeTest, QuotedTokenDetection) { + EXPECT_THAT(TokenizeString("\"w:x{y}z\"1"), ElementsAre("w:x{y}z", "1")); + EXPECT_THAT(TokenizeString("\"w:x{y}z\"\"1\""), ElementsAre("w:x{y}z", "1")); +} + +TEST(TokenizeTest, Delimiters) { + EXPECT_THAT(TokenizeString("}"), ElementsAre("}")); + EXPECT_THAT(TokenizeString("}}"), ElementsAre("}", "}")); + EXPECT_THAT(TokenizeString("{"), ElementsAre("{")); + EXPECT_THAT(TokenizeString("{{"), ElementsAre("{", "{")); + EXPECT_THAT(TokenizeString(":"), ElementsAre(":")); + EXPECT_THAT(TokenizeString("::"), ElementsAre(":", ":")); +} + +TEST(TokenizeTest, CornerCases) { + EXPECT_THAT(TokenizeString(" i { b:a } "), + ElementsAre("i", "{", "b", ":", "a", "}")); + EXPECT_THAT(TokenizeString(" }"), ElementsAre("}")); + EXPECT_THAT(TokenizeString(" } "), ElementsAre("}")); + EXPECT_THAT(TokenizeString(" {} "), ElementsAre("{", "}")); + EXPECT_THAT(TokenizeString(" x{} y{} "), + ElementsAre("x", "{", "}", "y", "{", "}")); + EXPECT_THAT(TokenizeString("x:1 y:2 "), + ElementsAre("x", ":", "1", "y", ":", "2")); + EXPECT_THAT(TokenizeString("x:\"1\" y:2 "), + ElementsAre("x", ":", "1", "y", ":", "2")); + EXPECT_THAT(TokenizeString("x:\"1, 2\" y:\"\" "), + ElementsAre("x", ":", "1, 2", "y", ":", "")); +} + +TEST(TokenizeTest, NewLines) { + EXPECT_THAT(TokenizeString("x:\n1,\n 2 \n y :\n3 \n"), + ElementsAre("x", ":", "1,", "2", "y", ":", "3")); +} + +TEST(TokenizeTest, LongString) { + EXPECT_THAT( + TokenizeString(" i { b:a } input {" + "a: \"1e-1, 2,3\" b:\"1,2,3\"\n c{ " + "id:1 x{d{a:" + "1}}} f:2 " + "\n}\n t:1"), + ElementsAreArray({"i", "{", "b", ":", "a", "}", "input", "{", + "a", ":", "1e-1, 2,3", "b", ":", "1,2,3", "c", "{", + "id", ":", "1", "x", "{", "d", "{", "a", + ":", "1", "}", "}", "}", "f", ":", "2", + "}", "t", ":", "1"})); +} + +} // namespace +} // namespace testing +} // namespace tflite diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD new file mode 100644 index 0000000000..1c73ab8f4a --- /dev/null +++ b/tensorflow/contrib/lite/toco/BUILD @@ -0,0 +1,350 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +load( + "//tensorflow/core:platform/default/build_config.bzl", + "tf_proto_library_cc", + "tf_proto_library_py", +) +load( + "//tensorflow:tensorflow.bzl", + "tf_cc_binary", + "tf_cc_test", +) + +tf_proto_library_cc( + name = "toco_flags_proto", + srcs = ["toco_flags.proto"], + visibility = ["//visibility:public"], +) + +tf_proto_library_cc( + name = "model_flags_proto", + srcs = ["model_flags.proto"], + visibility = ["//visibility:public"], +) + +tf_proto_library_py( + name = "toco_flags_proto", + srcs = [ + "toco_flags.proto", + ], + visibility = ["//visibility:public"], +) + +tf_proto_library_py( + name = "model_flags_proto", + srcs = [ + "model_flags.proto", + ], + visibility = ["//visibility:public"], +) + +cc_library( + name = "tensorflow_core_cc_protos_all", + deps = ["//tensorflow/core:protos_all_cc"], +) + +cc_library( + name = "runtime", + hdrs = [ + "runtime/common.h", + "runtime/types.h", + ], + linkstatic = 1, + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/contrib/lite/kernels/internal:reference_base", + "//tensorflow/contrib/lite/kernels/internal:types", + ], +) + +# :model offers the core data structures representing a model (a.k.a. "graph") +# for tooling purposes (not needed at inference runtime). +# That includes the top-level Model structure, and the lower-level Operator, +# Array, Buffer structures, etc. +cc_library( + name = "model", + hdrs = [ + "model.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":model_flags_proto_cc", + ":runtime", + ":toco_port", + "//tensorflow/core:lib", + "@com_google_absl//absl/base:core_headers", + "@protobuf_archive//:protobuf_headers", + ], +) + +cc_library( + name = "toco_graphviz_dump_options", + srcs = [ + "toco_graphviz_dump_options.cc", + ], + hdrs = [ + "toco_graphviz_dump_options.h", + ], + visibility = ["//visibility:public"], + deps = [ + "@com_google_absl//absl/strings", + ], +) + +cc_library( + name = "toco_cmdline_flags", + srcs = [ + "toco_cmdline_flags.cc", + ], + hdrs = [ + "toco_cmdline_flags.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":model_cmdline_flags", + ":toco_flags_proto_cc", + ":toco_port", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "@com_google_absl//absl/strings", + ], +) + +cc_library( + name = "model_cmdline_flags", + srcs = [ + "model_cmdline_flags.cc", + ], + hdrs = [ + "args.h", + "model_cmdline_flags.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":model_flags_proto_cc", + ":toco_graphviz_dump_options", + ":toco_port", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "@com_google_absl//absl/strings", + ], +) + +cc_library( + name = "toco_port", + srcs = [ + "toco_port.cc", + ], + hdrs = [ + "format_port.h", + "toco_port.h", + "toco_types.h", + ], + deps = [ + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + ] + select({ + "//tensorflow:android": [], + "//tensorflow:darwin": [], + "//tensorflow:ios": [], + "//conditions:default": [], + "//tensorflow:dummy_disabled_internal": [], + }), +) + +cc_library( + name = "graph_transformations", + srcs = [ + "graph_transformations/convert_pure_conv_to_depthwise.cc", + "graph_transformations/create_im2col_arrays.cc", + "graph_transformations/dequantize.cc", + "graph_transformations/drop_fake_quant.cc", + "graph_transformations/drop_im2col_arrays.cc", + "graph_transformations/ensure_bias_vectors.cc", + "graph_transformations/fuse_activation_functions.cc", + "graph_transformations/fuse_binary_into_following_affine.cc", + "graph_transformations/fuse_binary_into_preceding_affine.cc", + "graph_transformations/graph_transformations.cc", + "graph_transformations/hardcode_min_max.cc", + "graph_transformations/identify_l2_normalization.cc", + "graph_transformations/identify_l2_pool.cc", + "graph_transformations/identify_lstm.cc", + "graph_transformations/identify_relu1.cc", + "graph_transformations/make_initial_dequantize_operator.cc", + "graph_transformations/propagate_array_data_types.cc", + "graph_transformations/propagate_fixed_sizes.cc", + "graph_transformations/quantize.cc", + "graph_transformations/read_fake_quant_min_max.cc", + "graph_transformations/remove_final_dequantize_op.cc", + "graph_transformations/remove_tensorflow_assert.cc", + "graph_transformations/remove_tensorflow_identity.cc", + "graph_transformations/remove_trivial_binary.cc", + "graph_transformations/remove_trivial_concatenation.cc", + "graph_transformations/remove_trivial_concatenation_input.cc", + "graph_transformations/remove_trivial_passthrough.cc", + "graph_transformations/remove_trivial_passthrough.h", + "graph_transformations/remove_trivial_quantized_activation_func.cc", + "graph_transformations/remove_trivial_reshape.cc", + "graph_transformations/remove_unused_op.cc", + "graph_transformations/resolve_batch_normalization.cc", + "graph_transformations/resolve_constant_binary.cc", + "graph_transformations/resolve_constant_concatenation.cc", + "graph_transformations/resolve_constant_fake_quant.cc", + "graph_transformations/resolve_constant_tensorflow_shape.cc", + "graph_transformations/resolve_constant_unary.cc", + "graph_transformations/resolve_mean_attributes.cc", + "graph_transformations/resolve_pad_attributes.cc", + "graph_transformations/resolve_reorder_axes.cc", + "graph_transformations/resolve_reshape_attributes.cc", + "graph_transformations/resolve_slice_attributes.cc", + "graph_transformations/resolve_strided_slice_attributes.cc", + "graph_transformations/resolve_tensorflow_concat.cc", + "graph_transformations/resolve_tensorflow_matmul.cc", + "graph_transformations/resolve_tensorflow_merge.cc", + "graph_transformations/resolve_tensorflow_squeeze.cc", + "graph_transformations/resolve_tensorflow_switch.cc", + "graph_transformations/resolve_tensorflow_tile.cc", + "graph_transformations/unfuse_activation_functions.cc", + ], + hdrs = [ + "graph_transformations/graph_transformations.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":model", + ":model_flags_proto_cc", + ":runtime", + ":toco_port", + ":tooling_util", + "//tensorflow/core:lib", + "@com_google_absl//absl/strings", + ], +) + +# :toco_tooling is the library providing the offline tooling functionality +# exposed by the :toco command-line tool. +cc_library( + name = "toco_tooling", + srcs = [ + "allocate_transient_arrays.cc", + "export_tensorflow.cc", + "import_tensorflow.cc", + "tensorflow_util.cc", + "toco_tooling.cc", + ], + hdrs = [ + "allocate_transient_arrays.h", + "export_tensorflow.h", + "import_tensorflow.h", + "tensorflow_util.h", + "toco_tooling.h", + ], + copts = select({ + "//tensorflow:darwin": ["-DTOCO_SUPPORT_PORTABLE_PROTOS=0"], + "//conditions:default": [], + }), + visibility = ["//visibility:public"], + deps = [ + ":graph_transformations", + ":model_flags_proto_cc", + ":toco_flags_proto_cc", + ":model", + ":runtime", + "//tensorflow/core:protos_all_cc", + ":toco_port", + ":tooling_util", + ":toco_graphviz_dump_options", + "@protobuf_archive//:protobuf_headers", + "//tensorflow/contrib/lite/toco/tflite:export", + "//tensorflow/contrib/lite/toco/tflite:import", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/memory", + "//tensorflow/core:lib", + "//tensorflow/contrib/lite/toco/tensorflow_graph_matching:resolve_cluster", + "//tensorflow/core:core_cpu_lib", + "//tensorflow/core:framework", + ] + select({ + # Placeholder for internal darwin rule. + "//conditions:default": [], + }), +) + +cc_library( + name = "tooling_util", + srcs = [ + "dump_graphviz.cc", + "tooling_util.cc", + ], + hdrs = [ + "dump_graphviz.h", + "tooling_util.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":model", + ":model_flags_proto_cc", + ":runtime", + ":toco_flags_proto_cc", + ":toco_graphviz_dump_options", + ":toco_port", + "//tensorflow/core:lib", + "@com_google_absl//absl/strings", + "@protobuf_archive//:protobuf_headers", + ], +) + +tf_cc_test( + name = "tooling_util_test", + srcs = ["tooling_util_test.cc"], + deps = [ + ":model", + ":tooling_util", + "@com_google_googletest//:gtest_main", + ], +) + +# :toco is the main public command-line tool exposing the functionality +# of the :toco_tooling library. +tf_cc_binary( + name = "toco", + srcs = ["toco.cc"], + visibility = ["//visibility:public"], + deps = [ + ":model", + ":model_cmdline_flags", + ":model_flags_proto_cc", + ":toco_cmdline_flags", + ":toco_flags_proto_cc", + ":toco_port", + ":toco_tooling", + "//tensorflow/core:lib", + "@com_google_absl//absl/strings", + ], +) + +tf_cc_test( + name = "toco_port_test", + srcs = ["toco_port_test.cc"], + data = [ + "toco_port_test.cc", + ], + deps = [ + ":toco_port", + "@com_google_googletest//:gtest_main", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc b/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc new file mode 100644 index 0000000000..2f4454d7c8 --- /dev/null +++ b/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc @@ -0,0 +1,318 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/allocate_transient_arrays.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/model_flags.pb.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { +namespace { + +// The life span of an array. +struct ArrayLifespan { + // If true, the array is persistent state (as in a RNN). In that case, + // its allocation is permanent and the first_op, last_op members are + // unused. (The term 'transient' is a misnomer and we should think in + // terms of 'workspace' instead). + bool persistent = false; + // Index of the first op addressing that array. The array must be allocated + // just before executing this op. + std::size_t first_op = 0; + // Index of the last op addressing that array. We want to deallocate the array + // immediately after executing this op. + std::size_t last_op = 0; +}; + +bool StartsAt(const ArrayLifespan& lifespan, std::size_t op_index) { + return !lifespan.persistent && lifespan.first_op == op_index; +} + +bool EndsAt(const ArrayLifespan& lifespan, std::size_t op_index) { + return !lifespan.persistent && lifespan.last_op == op_index; +} + +// Helper function for ComputeArrayLifespans: updates one ArrayLifespan for +// one array for one op. +void UpdateArrayLifespan( + const string& array_name, std::size_t op_index, + std::unordered_map* array_lifespans) { + if (array_lifespans->count(array_name)) { + auto& lifespan = array_lifespans->at(array_name); + if (!lifespan.persistent) { + lifespan.first_op = std::min(lifespan.first_op, op_index); + lifespan.last_op = std::max(lifespan.last_op, op_index); + } + } else { + ArrayLifespan lifespan; + lifespan.first_op = op_index; + lifespan.last_op = op_index; + (*array_lifespans)[array_name] = lifespan; + } +} + +// Computes the ArrayLifespan for each array. +void ComputeArrayLifespans( + const Model& model, + std::unordered_map* array_lifespans) { + CHECK(array_lifespans->empty()); + for (const auto& rnn_state : model.flags.rnn_states()) { + ArrayLifespan lifespan; + lifespan.persistent = true; + (*array_lifespans)[rnn_state.state_array()] = lifespan; + } + for (std::size_t op_index = 0; op_index < model.operators.size(); + op_index++) { + const auto& op = model.operators[op_index]; + for (const auto& input : op->inputs) { + UpdateArrayLifespan(input, op_index, array_lifespans); + } + for (const auto& output : op->outputs) { + UpdateArrayLifespan(output, op_index, array_lifespans); + } + } +} + +inline bool operator==(const Alloc& a, const Alloc& b) { + CHECK(a.start != b.start || a.end == b.end); + return a.start == b.start; +} + +// Helper to keep track of total allocation size and of currently live +// allocations, and containing the core allocation routine. +class Allocator { + public: + Allocator() : total_size_(0) {} + + // Core allocation routine. + void Allocate(std::size_t size, Alloc* result) { + // Naive algorithm: pick the first gap between live allocations, + // that is wide enough for the new array. + std::size_t pos = 0; + for (const auto& a : live_allocs_) { + if (a.start >= pos + size) { + result->start = pos; + result->end = pos + size; + live_allocs_.insert(*result); + return; + } + pos = a.end; + } + // No sufficiently wide gap was found before an existing live allocation, + // so we allocate the new array at the end of the allocation space. + // We may then have to grow total_size_. + total_size_ = std::max(total_size_, pos + size); + result->start = pos; + result->end = pos + size; + live_allocs_.insert(*result); + } + + void Deallocate(const Alloc& a) { + auto iter = std::lower_bound(live_allocs_.begin(), live_allocs_.end(), a); + CHECK(iter != live_allocs_.end()); + CHECK(*iter == a); + live_allocs_.erase(iter); + } + + std::size_t total_size() const { return total_size_; } + + private: + std::size_t total_size_; + std::set live_allocs_; +}; + +// Returns the required transient allocation size (in bytes) for a given array, +// or 0 if it's not a transient array. +std::size_t TransientArraySize(const Model& model, const string& array_name, + std::size_t transient_data_alignment) { + if (!IsAllocatableTransientArray(model, array_name)) { + return 0; + } + const auto& array = model.arrays.at(array_name); + CHECK(array->has_shape()) + << "Array '" << array_name << "' doesn't have a shape"; + if (array->data_type == ArrayDataType::kNone) { + // Catch a typical issue at the moment with RNN states + for (const auto& rnn_state : model.flags.rnn_states()) { + if (rnn_state.state_array() == array_name) { + LOG(FATAL) + << "A RNN state array, " << array_name << ", still does not " + << "have a known data type after all graph transformations have " + << "run. That's mostly a toco bug --- sorry. For now, you can " + << "work around this issue by adding manually_create:true in the " + << "--rnn_state description of this RNN state."; + } + } + LOG(FATAL) << "An array, " << array_name << ", still does not " + << "have a known data type after all graph transformations have " + << "run."; + } + const std::size_t elem_size = ElementSize(array->data_type); + const std::size_t raw_size = + elem_size * RequiredBufferSizeForShape(array->shape()); + const std::size_t rounded_size = + RoundUpToNextMultipleOf(raw_size, transient_data_alignment); + return rounded_size; +} + +// Allocates an array: call this for every array just before the first +// op where it is used. +void AllocateTransientArray(const Model& model, const string& array_name, + Allocator* allocator, + std::size_t transient_data_alignment) { + if (!IsAllocatableTransientArray(model, array_name)) { + return; + } + const std::size_t size = + TransientArraySize(model, array_name, transient_data_alignment); + const auto& array = model.arrays.at(array_name); + CHECK(!array->alloc); + allocator->Allocate(size, &array->GetOrCreateAlloc()); +} + +// Deallocates an array: call this for every array just after the last +// op where it is used. +void DeallocateTransientArray(const Model& model, const string& array_name, + Allocator* allocator) { + if (!IsAllocatableTransientArray(model, array_name)) { + return; + } + const auto& array = model.arrays.at(array_name); + CHECK(!!array->alloc); + allocator->Deallocate(*array->alloc); +} + +} // namespace + +void AllocateTransientArrays(Model* model, + std::size_t transient_data_alignment) { + // Precompute the lifespans for all arrays. + std::unordered_map array_lifespans; + ComputeArrayLifespans(*model, &array_lifespans); + + // In case of variable batch, our convention will be to compute the + // allocations for batch==1, then let the inference code multiply all + // the offsets by the actual runtime batch size. Conveniently, + // the variable_batch and batch flags are mutually exclusive, and the default + // value of batch is 1, so we have nothing special to do here. Let us + // just guard this assumption with a CHECK: + bool batchless_input_shapes = true; + for (const auto& input_array : model->flags.input_arrays()) { + if (input_array.shape().empty() || input_array.shape(0) != 1) { + batchless_input_shapes = false; + break; + } + } + CHECK(!model->flags.variable_batch() || batchless_input_shapes); + + Allocator allocator; + + // Construct a sorted map of array names, so that other layout engines can + // match exactly. + std::map ordered_arrays_map; + for (const auto& pair : model->arrays) { + ordered_arrays_map[pair.first] = pair.second.get(); + } + + // Allocate persistent arrays (like RNN states). For them, 'transient' + // is a misnormer, should read 'workspace'. + for (const auto& array_pair : ordered_arrays_map) { + const string& array_name = array_pair.first; + const auto& array_lifespan = array_lifespans.find(array_name)->second; + if (array_lifespan.persistent) { + AllocateTransientArray(*model, array_name, &allocator, + transient_data_alignment); + } + } + + for (std::size_t op_index = 0; op_index < model->operators.size(); + op_index++) { + const auto& op = model->operators[op_index]; + // Allocate those arrays whose lifespan starts exactly here. + for (const auto& input : op->inputs) { + if (StartsAt(array_lifespans[input], op_index)) { + AllocateTransientArray(*model, input, &allocator, + transient_data_alignment); + } + } + for (const auto& output : op->outputs) { + if (StartsAt(array_lifespans[output], op_index)) { + AllocateTransientArray(*model, output, &allocator, + transient_data_alignment); + } + } + // Deallocate those arrays whose lifespan ends exactly here. + for (const auto& input : op->inputs) { + if (EndsAt(array_lifespans[input], op_index)) { + DeallocateTransientArray(*model, input, &allocator); + } + } + for (const auto& output : op->outputs) { + if (EndsAt(array_lifespans[output], op_index)) { + DeallocateTransientArray(*model, output, &allocator); + } + } + } + + // Just out of curiosity (not used in the actual allocation process) + // evaluate the optimal total allocated size. + // First, compute the size of persistent arrays. + std::size_t optimal_transient_alloc_size = 0; + std::size_t persistent_alloc_size = 0; + for (const auto& array_pair : ordered_arrays_map) { + const string& array_name = array_pair.first; + const auto& array_lifespan = array_lifespans.find(array_name)->second; + if (array_lifespan.persistent) { + persistent_alloc_size += + TransientArraySize(*model, array_name, transient_data_alignment); + } + } + for (const auto& op : model->operators) { + // for each operator, compute the sum of the sizes of the array that must + // be live during the execution of this operator, plus the size of + // persistent arrays that must be live at all times. + std::size_t size = persistent_alloc_size; + for (const auto& input : op->inputs) { + if (!array_lifespans[input].persistent) { + size += TransientArraySize(*model, input, transient_data_alignment); + } + } + for (const auto& output : op->outputs) { + if (!array_lifespans[output].persistent) { + size += TransientArraySize(*model, output, transient_data_alignment); + } + } + // The optimal total size is the maximum of all operator-specific sizes. + optimal_transient_alloc_size = std::max(optimal_transient_alloc_size, size); + } + + model->transient_data_size = allocator.total_size(); + model->transient_data_alignment = transient_data_alignment; + CHECK_GE(model->transient_data_size, optimal_transient_alloc_size); + LOG(INFO) << "Total transient array allocated size: " + << model->transient_data_size << " bytes, " + << "theoretical optimal value: " << optimal_transient_alloc_size + << " bytes."; + CheckInvariants(*model); +} +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/allocate_transient_arrays.h b/tensorflow/contrib/lite/toco/allocate_transient_arrays.h new file mode 100644 index 0000000000..12d0d0498f --- /dev/null +++ b/tensorflow/contrib/lite/toco/allocate_transient_arrays.h @@ -0,0 +1,44 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_ALLOCATE_TRANSIENT_ARRAYS_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_ALLOCATE_TRANSIENT_ARRAYS_H_ + +#include "tensorflow/contrib/lite/toco/model.h" + +namespace toco { + +// We align the allocated sizes to the next multiple of a cache line, +// to get simple performance characteristics without side effects of +// accesses to one buffer on accesses to another buffer. +// That also takes care of data type alignment for any reasonable type +// (no reasonable data type should have alignment greater than a cache line). +// Here we make CPU-centric assumptions, in particular, we assume 64-byte cache +// lines. Getting this wrong by a factor of 2x (if this ever changes) wouldn't +// be terrible. +// Embedded architectures may use a different value for alignment. +constexpr std::size_t kDefaultTransientDataAlignment = 64; + +// Rounds up dividend to a value divisible by divisor. +inline std::size_t RoundUpToNextMultipleOf(std::size_t dividend, + std::size_t divisor) { + return ((dividend + divisor - 1) / divisor) * divisor; +} + +void AllocateTransientArrays(Model* model, + std::size_t transient_data_alignment); + +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_ALLOCATE_TRANSIENT_ARRAYS_H_ diff --git a/tensorflow/contrib/lite/toco/args.h b/tensorflow/contrib/lite/toco/args.h new file mode 100644 index 0000000000..28661d4ff0 --- /dev/null +++ b/tensorflow/contrib/lite/toco/args.h @@ -0,0 +1,225 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// This abstracts command line arguments in toco. +// Arg is a parseable type that can register a default value, be able to +// parse itself, and keep track of whether it was specified. +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_ARGS_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_ARGS_H_ + +#include +#include +#include +#include "absl/strings/numbers.h" +#include "absl/strings/str_split.h" +#include "tensorflow/contrib/lite/toco/toco_port.h" +#include "tensorflow/contrib/lite/toco/toco_types.h" + +namespace toco { + +// Since std::vector is in the std namespace, and we are not allowed +// to add ParseFlag/UnparseFlag to std, we introduce a simple wrapper type +// to use as the flag type: +struct IntList { + std::vector elements; +}; +struct StringMapList { + std::vector> elements; +}; + +// command_line_flags.h don't track whether or not a flag is specified. Arg +// contains the value (which will be default if not specified) and also +// whether the flag is specified. +// TODO(aselle): consider putting doc string and ability to construct the +// tensorflow argument into this, so declaration of parameters can be less +// distributed. +// Every template specialization of Arg is required to implement +// default_value(), specified(), value(), parse(), bind(). +template +class Arg final { + public: + explicit Arg(T default_ = T()) : value_(default_) {} + virtual ~Arg() {} + + // Provide default_value() to arg list + T default_value() const { return value_; } + // Return true if the command line argument was specified on the command line. + bool specified() const { return specified_; } + // Const reference to parsed value. + const T& value() const { return value_; } + + // Parsing callback for the tensorflow::Flags code + bool parse(T value_in) { + value_ = value_in; + specified_ = true; + return true; + } + + // Bind the parse member function so tensorflow::Flags can call it. + std::function bind() { + return std::bind(&Arg::parse, this, std::placeholders::_1); + } + + private: + // Becomes true after parsing if the value was specified + bool specified_ = false; + // Value of the argument (initialized to the default in the constructor). + T value_; +}; + +template <> +class Arg final { + public: + // Provide default_value() to arg list + string default_value() const { return ""; } + // Return true if the command line argument was specified on the command line. + bool specified() const { return specified_; } + // Bind the parse member function so tensorflow::Flags can call it. + bool parse(string text) { + parsed_value_.elements.clear(); + specified_ = true; + // strings::Split("") produces {""}, but we need {} on empty input. + // TODO(aselle): Moved this from elsewhere, but ahentz recommends we could + // use absl::SplitLeadingDec32Values(text.c_str(), &parsed_values_.elements) + if (!text.empty()) { + int32 element; + for (absl::string_view part : absl::StrSplit(text, ',')) { + if (!SimpleAtoi(part, &element)) return false; + parsed_value_.elements.push_back(element); + } + } + return true; + } + + std::function bind() { + return std::bind(&Arg::parse, this, std::placeholders::_1); + } + + const toco::IntList& value() const { return parsed_value_; } + + private: + toco::IntList parsed_value_; + bool specified_ = false; +}; + +template <> +class Arg final { + public: + // Provide default_value() to StringMapList + string default_value() const { return ""; } + // Return true if the command line argument was specified on the command line. + bool specified() const { return specified_; } + // Bind the parse member function so tensorflow::Flags can call it. + + bool parse(string text) { + parsed_value_.elements.clear(); + specified_ = true; + + if (text.empty()) { + return true; + } + +#if defined(PLATFORM_GOOGLE) + std::vector outer_vector; + absl::string_view text_disposable_copy = text; + SplitStructuredLine(text_disposable_copy, ',', "{}", &outer_vector); + for (const absl::string_view& outer_member_stringpiece : outer_vector) { + string outer_member(outer_member_stringpiece); + if (outer_member.empty()) { + continue; + } + string outer_member_copy = outer_member; + absl::StripAsciiWhitespace(&outer_member); + if (!TryStripPrefixString(outer_member, "{", &outer_member)) return false; + if (!TryStripSuffixString(outer_member, "}", &outer_member)) return false; + const std::vector inner_fields_vector = + strings::Split(outer_member, ','); + + std::unordered_map element; + for (const string& member_field : inner_fields_vector) { + std::vector outer_member_key_value = + strings::Split(member_field, ':'); + if (outer_member_key_value.size() != 2) return false; + string& key = outer_member_key_value[0]; + string& value = outer_member_key_value[1]; + absl::StripAsciiWhitespace(&key); + absl::StripAsciiWhitespace(&value); + if (element.count(key) != 0) return false; + element[key] = value; + } + parsed_value_.elements.push_back(element); + } + return true; +#else + // TODO(aselle): Fix argument parsing when absl supports structuredline + fprintf(stderr, "%s:%d StringMapList arguments not supported\n", __FILE__, + __LINE__); + abort(); +#endif + } + + std::function bind() { + return std::bind(&Arg::parse, this, std::placeholders::_1); + } + + const toco::StringMapList& value() const { return parsed_value_; } + + private: + toco::StringMapList parsed_value_; + bool specified_ = false; +}; + +// Flags that describe a model. See model_cmdline_flags.cc for details. +struct ParsedModelFlags { + Arg input_array; + Arg input_arrays; + Arg output_array; + Arg output_arrays; + Arg input_shapes; + Arg mean_value = Arg(0.f); + Arg mean_values; + Arg std_value = Arg(1.f); + Arg std_values; + Arg variable_batch = Arg(false); + Arg drop_control_dependency = Arg(false); + Arg input_shape; + Arg rnn_states; + Arg model_checks; + // Debugging output options + Arg graphviz_first_array; + Arg graphviz_last_array; + Arg dump_graphviz; + Arg dump_graphviz_video = Arg(false); +}; + +// Flags that describe the operation you would like to do (what conversion +// you want). See toco_cmdline_flags.cc for details. +struct ParsedTocoFlags { + Arg input_file; + Arg output_file; + Arg input_format; + Arg output_format; + // TODO(aselle): command_line_flags doesn't support doubles + Arg default_ranges_min = Arg(0.); + Arg default_ranges_max = Arg(0.); + Arg input_type; + Arg input_types; + Arg inference_type; + Arg drop_fake_quant = Arg(false); + Arg reorder_across_fake_quant = Arg(false); + Arg allow_custom_ops = Arg(false); +}; + +} // namespace toco +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_ARGS_H_ diff --git a/tensorflow/contrib/lite/toco/dump_graphviz.cc b/tensorflow/contrib/lite/toco/dump_graphviz.cc new file mode 100644 index 0000000000..f5e2868dc0 --- /dev/null +++ b/tensorflow/contrib/lite/toco/dump_graphviz.cc @@ -0,0 +1,293 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/dump_graphviz.h" + +#include +#include +#include +#include + +#include "absl/strings/str_replace.h" +#include "tensorflow/contrib/lite/toco/model_flags.pb.h" +#include "tensorflow/contrib/lite/toco/toco_graphviz_dump_options.h" +#include "tensorflow/contrib/lite/toco/toco_port.h" +#include "tensorflow/contrib/lite/toco/toco_types.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +using toco::port::AppendF; +using toco::port::StringF; + +namespace toco { +namespace { + +class Color { + public: + Color() {} + Color(uint8 r, uint8 g, uint8 b) : r_(r), g_(g), b_(b) {} + // Returns the string serialization of this color in graphviz format, + // for use as 'fillcolor' in boxes. + string FillColorString() const { return StringF("%.2X%.2X%.2X", r_, g_, b_); } + // Returns the serialization in graphviz format of a suitable color to use + // 'fontcolor' in the same boxes. It should black or white, whichever offers + // the better contrast from FillColorString(). + string TextColorString() const { + // https://en.wikipedia.org/wiki/Relative_luminance + const float luminance = 0.2126f * r_ + 0.7152f * g_ + 0.0722f * b_; + const uint8 l = luminance > 128.f ? 0 : 255; + return StringF("%.2X%.2X%.2X", l, l, l); + } + + private: + uint8 r_ = 0, g_ = 0, b_ = 0; +}; + +struct NodeProperties { + // The text to display inside the box for this node. + string label; + // The color to use for this node; will be used as 'fillcolor' + // for its box. See Color::FillColorString. A suitable, different + // color will be chosen for the 'fontcolor' for the inside text + // label, see Color::TextColorString. + Color color; +}; + +// All colors in this file are from: +// https://material.io/guidelines/style/color.html + +Color GetColorForArray(const Model& model, const string& array_name) { + // Arrays involved in RNN back-edges have a different color + for (const auto& rnn_state : model.flags.rnn_states()) { + // RNN state, fed by a back-edge. Bold color. + if (array_name == rnn_state.state_array()) { + return Color(0x0F, 0x9D, 0x58); + } + // RNN back-edge source, feeding a RNN state. + // Light tone of the same color as RNN states. + if (array_name == rnn_state.back_edge_source_array()) { + return Color(0xB7, 0xE1, 0xCD); + } + } + // Constant parameter arrays have their own bold color + if (model.GetArray(array_name).buffer) { + return Color(0x42, 0x85, 0xF4); + } + // Remaining arrays are activations. + // We use gray colors for them because they are the majority + // of arrays so we want to highlight other arrays instead of them. + // First, we use a bolder gray for input/output arrays: + const auto& dump_options = *GraphVizDumpOptions::singleton(); + if (IsInputArray(model, array_name) || + array_name == dump_options.graphviz_first_array || + array_name == dump_options.graphviz_last_array) { + return Color(0x9E, 0x9E, 0x9E); + } + for (const string& output_array : model.flags.output_arrays()) { + if (array_name == output_array) { + return Color(0x9E, 0x9E, 0x9E); + } + } + // Remaining arrays are intermediate activation arrays. + // Lighter tone of the same grey as for input/output arrays: + // We want these to be very discrete. + return Color(0xF5, 0xF5, 0xF5); +} + +NodeProperties GetPropertiesForArray(const Model& model, + const string& array_name) { + NodeProperties node_properties; + node_properties.color = GetColorForArray(model, array_name); + node_properties.label = absl::StrReplaceAll(array_name, {{"/", "/\\n"}}); + + // Append array shape to the label. + auto& array = model.GetArray(array_name); + + if (array.data_type == ArrayDataType::kFloat) { + AppendF(&node_properties.label, "\\nType: float"); + } else if (array.data_type == ArrayDataType::kInt32) { + AppendF(&node_properties.label, "\\nType: int32"); + } else if (array.data_type == ArrayDataType::kUint8) { + AppendF(&node_properties.label, "\\nType: uint8"); + } + + if (array.has_shape()) { + auto& array_shape = array.shape(); + node_properties.label += "\\n["; + for (int id = 0; id < array_shape.dimensions_count(); id++) { + if (id == 0) { + AppendF(&node_properties.label, "%d", array_shape.dims(id)); + } else { + AppendF(&node_properties.label, "x%d", array_shape.dims(id)); + } + } + node_properties.label += "]"; + } + + if (array.minmax) { + AppendF(&node_properties.label, "\\nMinMax: [%.3g, %.3g]", + array.minmax->min, array.minmax->max); + } + + if (array.quantization_params) { + AppendF(&node_properties.label, "\\nQuantization: %.3g * (x - %d)", + array.quantization_params->scale, + array.quantization_params->zero_point); + } + + if (array.alloc) { + AppendF(&node_properties.label, "\\nTransient Alloc: [%d, %d)", + array.alloc->start, array.alloc->end); + } + + return node_properties; +} + +NodeProperties GetPropertiesForOperator(const Operator& op) { + NodeProperties node_properties; + if (op.type == OperatorType::kTensorFlowUnsupported) { + node_properties.label = + static_cast(op).tensorflow_op; + } else { + node_properties.label = OperatorTypeName(op.type); + } + // Additional information for some of the operators. + switch (op.type) { + case OperatorType::kConv: { + const auto& conv_op = static_cast(op); + node_properties.color = Color(0xC5, 0x39, 0x29); // Bolder color + AppendF(&node_properties.label, "\\n%dx%d/%s", conv_op.stride_width, + conv_op.stride_height, + conv_op.padding.type == PaddingType::kSame ? "S" : "V"); + break; + } + case OperatorType::kDepthwiseConv: { + const auto& conv_op = static_cast(op); + node_properties.color = Color(0xC5, 0x39, 0x29); // Bolder color + AppendF(&node_properties.label, "\\n%dx%d/%s", conv_op.stride_width, + conv_op.stride_height, + conv_op.padding.type == PaddingType::kSame ? "S" : "V"); + break; + } + case OperatorType::kFullyConnected: { + node_properties.color = Color(0xC5, 0x39, 0x29); // Bolder color + break; + } + default: + node_properties.color = Color(0xDB, 0x44, 0x37); + break; + } + + return node_properties; +} + +std::vector OperatorsToDump(const Model& model) { + const auto& dump_options = *GraphVizDumpOptions::singleton(); + bool first_specified = !dump_options.graphviz_first_array.empty(); + bool last_specified = !dump_options.graphviz_last_array.empty(); + CHECK_EQ(first_specified, last_specified); + std::vector ops_to_dump; + if (last_specified) { + // Return only the part of the graph between graphviz_first_array + // and graphviz_last_array. + CHECK(model.arrays.count(dump_options.graphviz_first_array)); + CHECK(model.arrays.count(dump_options.graphviz_last_array)); + std::unordered_set arrays_already_produced; + std::vector arrays_to_produce; + arrays_to_produce.push_back(dump_options.graphviz_last_array); + while (!arrays_to_produce.empty()) { + const string array = arrays_to_produce.back(); + arrays_to_produce.pop_back(); + CHECK(!arrays_already_produced.count(array)); + arrays_already_produced.insert(array); + const Operator* op = GetOpWithOutput(model, array); + if (!op) { + continue; + } + ops_to_dump.push_back(op); + for (const string& input : op->inputs) { + if (arrays_already_produced.count(input) || + input == dump_options.graphviz_first_array) { + continue; + } + arrays_to_produce.push_back(input); + } + } + } else { + // Return the whole graph. + for (const auto& op : model.operators) { + ops_to_dump.push_back(op.get()); + } + } + return ops_to_dump; +} + +} // namespace + +void DumpGraphviz(const Model& model, string* output_file_contents) { + AppendF(output_file_contents, "digraph Computegraph {\n"); + + constexpr char kNodeFormat[] = + "\t \"%s\" [label=\"%s\", shape=%s, style=filled, fillcolor=\"#%s\", " + "fontcolor = \"#%sDD\"];\n"; + + constexpr char kEdgeFormat[] = "\t \"%s\" -> \"%s\";\n"; + + constexpr char kRNNBackEdgeFormat[] = + "\t \"%s\" -> \"%s\" [color=\"#0F9D58\"];\n"; + + std::vector ops_to_dump = OperatorsToDump(model); + std::set already_added_arrays; + for (int op_index = 0; op_index < ops_to_dump.size(); op_index++) { + const Operator& op = *ops_to_dump[op_index]; + // Add node for operator. + auto op_properties = GetPropertiesForOperator(op); + string operator_id = StringF("op%05d", op_index); + AppendF(output_file_contents, kNodeFormat, operator_id, op_properties.label, + "box", op_properties.color.FillColorString().c_str(), + op_properties.color.TextColorString().c_str()); + // Add nodes and edges for all inputs of the operator. + for (const auto& input : op.inputs) { + auto array_properties = GetPropertiesForArray(model, input); + if (!already_added_arrays.count(input)) { + AppendF(output_file_contents, kNodeFormat, input, + array_properties.label, "octagon", + array_properties.color.FillColorString().c_str(), + array_properties.color.TextColorString().c_str()); + } + AppendF(output_file_contents, kEdgeFormat, input, operator_id); + already_added_arrays.insert(input); + } + // Add nodes and edges for all outputs of the operator. + for (const auto& output : op.outputs) { + auto array_properties = GetPropertiesForArray(model, output); + if (!already_added_arrays.count(output)) { + AppendF(output_file_contents, kNodeFormat, output, + array_properties.label, "octagon", + array_properties.color.FillColorString().c_str(), + array_properties.color.TextColorString().c_str()); + } + AppendF(output_file_contents, kEdgeFormat, operator_id, output); + already_added_arrays.insert(output); + } + } + + for (const auto& rnn_state : model.flags.rnn_states()) { + AppendF(output_file_contents, kRNNBackEdgeFormat, + rnn_state.back_edge_source_array(), rnn_state.state_array()); + } + + AppendF(output_file_contents, "}\n"); +} +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/dump_graphviz.h b/tensorflow/contrib/lite/toco/dump_graphviz.h new file mode 100644 index 0000000000..0fb28e3de8 --- /dev/null +++ b/tensorflow/contrib/lite/toco/dump_graphviz.h @@ -0,0 +1,28 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_DUMP_GRAPHVIZ_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_DUMP_GRAPHVIZ_H_ + +#include + +#include "tensorflow/contrib/lite/toco/model.h" + +namespace toco { + +void DumpGraphviz(const Model& model, string* output_file_contents); + +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_DUMP_GRAPHVIZ_H_ diff --git a/tensorflow/contrib/lite/toco/export_tensorflow.cc b/tensorflow/contrib/lite/toco/export_tensorflow.cc new file mode 100644 index 0000000000..16b9fa2260 --- /dev/null +++ b/tensorflow/contrib/lite/toco/export_tensorflow.cc @@ -0,0 +1,1570 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include + +#include "google/protobuf/map.h" +#include "google/protobuf/text_format.h" +#include "absl/memory/memory.h" +#include "absl/strings/string_view.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/model_flags.pb.h" +#include "tensorflow/contrib/lite/toco/runtime/types.h" +#include "tensorflow/contrib/lite/toco/tensorflow_util.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/platform/logging.h" + +using tensorflow::DT_FLOAT; +using tensorflow::DT_INT32; +using tensorflow::GraphDef; +using tensorflow::TensorProto; + +namespace toco { +namespace { + +// TensorFlow sometimes forbids what it calls "legacy scalars", +// which are 1-D shapes where the unique shape size is 1. +// See OpKernel::IsLegacyScalar and OpKernel::allow_legacy_scalars. +// For that reason, we generally avoid creating legacy scalars, +// by detecting the case where a 1-D shape would be of size 1 and +// replacing that by a 0-D shape. +// However, there is a special circumstance where we must not do that +// and must unconditionally create a 1-D shape even if it is going to +// be of size 1: that is the case of bias vectors, with BiasAdd nodes. +// Indeed, TensorFlow requires bias vectors to be 1-D; in the case of +// a depth of 1, that would be a legacy scalar, so in that case we +// must go ahead and keep the shape 1-D, letting it be a legacy scalar. +enum class LegacyScalarPolicy { kAvoidLegacyScalars, kDoCreateLegacyScalars }; + +void ExportFloatArray(const Shape& input_shape, const float* input_data, + TensorProto* output_tensor, + LegacyScalarPolicy legacy_scalar_policy) { + output_tensor->set_dtype(DT_FLOAT); + const int input_flat_size = RequiredBufferSizeForShape(input_shape); + auto* shape = output_tensor->mutable_tensor_shape(); + + const int kDims = input_shape.dimensions_count(); + if (legacy_scalar_policy == LegacyScalarPolicy::kDoCreateLegacyScalars || + kDims > 1 || (kDims == 1 && input_shape.dims(0) > 1)) { + for (int i = 0; i < kDims; ++i) { + shape->add_dim()->set_size(input_shape.dims(i)); + } + } + output_tensor->set_tensor_content( + string(reinterpret_cast(input_data), + sizeof(*input_data) * input_flat_size)); +} + +void ExportFloatArray(AxesOrder input_axes_order, const Shape& input_shape, + const float* input_data, AxesOrder output_axes_order, + TensorProto* output_tensor, + LegacyScalarPolicy legacy_scalar_policy) { + CHECK_EQ(AxesCount(output_axes_order), AxesCount(input_axes_order)); + output_tensor->set_dtype(DT_FLOAT); + CHECK_EQ(input_shape.dimensions_count(), AxesCount(input_axes_order)); + const int input_flat_size = RequiredBufferSizeForShape(input_shape); + + Shape shuffled_shape; + ShuffleDims(input_shape, input_axes_order, output_axes_order, + &shuffled_shape); + std::vector shuffled_data(input_flat_size); + ShuffleArray(input_shape, input_axes_order, output_axes_order, shuffled_shape, + input_data, shuffled_data.data()); + + ExportFloatArray(shuffled_shape, shuffled_data.data(), output_tensor, + legacy_scalar_policy); +} + +bool HasAlreadyExportedConst(const string& name, + const GraphDef& tensorflow_graph) { + for (const auto& node : tensorflow_graph.node()) { + if (node.op() == "Const" && node.name() == name) { + return true; + } + } + return false; +} + +void ConvertFloatTensorConst(const string& name, const Shape& input_shape, + const float* input_data, + AxesOrder input_axes_order, + AxesOrder output_axes_order, + GraphDef* tensorflow_graph, + LegacyScalarPolicy legacy_scalar_policy) { + if (HasAlreadyExportedConst(name, *tensorflow_graph)) { + return; + } + auto* const_op = tensorflow_graph->add_node(); + const_op->set_op("Const"); + const_op->set_name(name); + (*const_op->mutable_attr())["dtype"].set_type(DT_FLOAT); + auto* tensor = (*const_op->mutable_attr())["value"].mutable_tensor(); + ExportFloatArray(input_axes_order, input_shape, input_data, output_axes_order, + tensor, legacy_scalar_policy); +} + +void ConvertFloatTensorConst(const string& name, const Shape& input_shape, + const float* input_data, + AxesOrder input_axes_order, + AxesOrder output_axes_order, + GraphDef* tensorflow_graph) { + if (HasAlreadyExportedConst(name, *tensorflow_graph)) { + return; + } + auto* const_op = tensorflow_graph->add_node(); + const_op->set_op("Const"); + const_op->set_name(name); + (*const_op->mutable_attr())["dtype"].set_type(DT_FLOAT); + auto* tensor = (*const_op->mutable_attr())["value"].mutable_tensor(); + ExportFloatArray(input_axes_order, input_shape, input_data, output_axes_order, + tensor, LegacyScalarPolicy::kAvoidLegacyScalars); +} + +void ConvertFloatTensorConst(const Model& model, const string& name, + AxesOrder input_axes_order, + AxesOrder output_axes_order, + GraphDef* tensorflow_graph) { + if (HasAlreadyExportedConst(name, *tensorflow_graph)) { + return; + } + auto* const_op = tensorflow_graph->add_node(); + const_op->set_op("Const"); + const_op->set_name(name); + (*const_op->mutable_attr())["dtype"].set_type(DT_FLOAT); + auto* tensor = (*const_op->mutable_attr())["value"].mutable_tensor(); + CHECK(model.arrays.count(name)); + const auto& input_array = *model.arrays.at(name); + const auto& input_shape = input_array.shape(); + CHECK(input_array.buffer); + CHECK(input_array.buffer->type == ArrayDataType::kFloat); + const float* input_data = + input_array.GetBuffer().data.data(); + ExportFloatArray(input_axes_order, input_shape, input_data, output_axes_order, + tensor, LegacyScalarPolicy::kAvoidLegacyScalars); +} + +void ConvertFloatTensorConst(const Model& model, const string& name, + GraphDef* tensorflow_graph) { + if (HasAlreadyExportedConst(name, *tensorflow_graph)) { + return; + } + auto* const_op = tensorflow_graph->add_node(); + const_op->set_op("Const"); + const_op->set_name(name); + (*const_op->mutable_attr())["dtype"].set_type(DT_FLOAT); + auto* tensor = (*const_op->mutable_attr())["value"].mutable_tensor(); + CHECK(model.arrays.count(name)); + const auto& input_array = *model.arrays.at(name); + const auto& input_shape = input_array.shape(); + CHECK(input_array.buffer); + CHECK(input_array.buffer->type == ArrayDataType::kFloat); + const float* input_data = + input_array.GetBuffer().data.data(); + ExportFloatArray(input_shape, input_data, tensor, + LegacyScalarPolicy::kAvoidLegacyScalars); +} + +void ConvertIntTensorConst(const Model& model, const string& name, + GraphDef* tensorflow_graph) { + if (HasAlreadyExportedConst(name, *tensorflow_graph)) { + return; + } + CHECK(model.arrays.count(name)); + const auto& array = *model.arrays.at(name); + auto* const_op = tensorflow_graph->add_node(); + const_op->set_op("Const"); + const_op->set_name(name); + (*const_op->mutable_attr())["dtype"].set_type(DT_INT32); + auto* tensor = (*const_op->mutable_attr())["value"].mutable_tensor(); + tensor->set_dtype(DT_INT32); + const auto& data = array.GetBuffer().data; + for (auto index : data) { + tensor->add_int_val(index); + } + const auto& array_shape = array.shape(); + auto* shape = tensor->mutable_tensor_shape(); + for (int i = 0; i < array_shape.dimensions_count(); i++) { + shape->add_dim()->set_size(array_shape.dims(i)); + } +} + +void CreateMatrixShapeTensorConst(const string& name, int rows, int cols, + GraphDef* tensorflow_graph) { + if (HasAlreadyExportedConst(name, *tensorflow_graph)) { + return; + } + auto* const_op = tensorflow_graph->add_node(); + const_op->set_op("Const"); + const_op->set_name(name); + (*const_op->mutable_attr())["dtype"].set_type(DT_INT32); + auto* tensor = (*const_op->mutable_attr())["value"].mutable_tensor(); + tensor->set_dtype(DT_INT32); + const int32 data[2] = {cols, rows}; + tensor->set_tensor_content( + string(reinterpret_cast(data), sizeof(data))); + auto* shape = tensor->mutable_tensor_shape(); + shape->add_dim()->set_size(2); +} + +void CreateDummyConcatDimTensorConst(const string& name, int dim, + GraphDef* tensorflow_graph) { + if (HasAlreadyExportedConst(name, *tensorflow_graph)) { + return; + } + auto* const_op = tensorflow_graph->add_node(); + const_op->set_op("Const"); + const_op->set_name(name); + (*const_op->mutable_attr())["dtype"].set_type(DT_INT32); + auto* tensor = (*const_op->mutable_attr())["value"].mutable_tensor(); + tensor->set_dtype(DT_INT32); + tensor->add_int_val(dim); +} + +void CreateReshapeShapeTensorConst(const string& name, + const std::vector& shape, + GraphDef* tensorflow_graph) { + if (HasAlreadyExportedConst(name, *tensorflow_graph)) { + return; + } + auto* const_op = tensorflow_graph->add_node(); + const_op->set_op("Const"); + const_op->set_name(name); + (*const_op->mutable_attr())["dtype"].set_type(DT_INT32); + auto* tensor = (*const_op->mutable_attr())["value"].mutable_tensor(); + tensor->set_dtype(DT_INT32); + for (auto s : shape) { + tensor->add_int_val(s); + } + // TensorFlow sometimes forbids what it calls "legacy scalars", + // which are shapes of size 1 where the unique shape size is 1. + // See OpKernel::IsLegacyScalar and OpKernel::allow_legacy_scalars. + if (shape.size() > 1) { + auto* tensor_shape = tensor->mutable_tensor_shape(); + tensor_shape->add_dim()->set_size(shape.size()); + } +} + +string WalkUpToConstantArray(const Model& model, const string& name) { + const Array& original_array = model.GetArray(name); + if (original_array.buffer) { + return name; + } + const auto* op = GetOpWithOutput(model, name); + CHECK(op); + CHECK(op->type == OperatorType::kFakeQuant); + const string& input_of_fakequant_name = op->inputs[0]; + const Array& input_of_fakequant = model.GetArray(input_of_fakequant_name); + CHECK(input_of_fakequant.buffer); + return input_of_fakequant_name; +} + +void ConvertConvOperator(const Model& model, const ConvOperator& src_op, + GraphDef* tensorflow_graph) { + const bool has_bias = src_op.inputs.size() >= 3; + string conv_output = src_op.outputs[0]; + if (has_bias) { + conv_output += "/conv"; + } + + auto* conv2d_op = tensorflow_graph->add_node(); + conv2d_op->set_op("Conv2D"); + conv2d_op->set_name(conv_output); + *conv2d_op->add_input() = src_op.inputs[0]; + *conv2d_op->add_input() = src_op.inputs[1]; + (*conv2d_op->mutable_attr())["T"].set_type(DT_FLOAT); + const string& weights_array_name = + WalkUpToConstantArray(model, src_op.inputs[1]); + const auto& weights_array = model.GetArray(weights_array_name); + CHECK(weights_array.buffer->type == ArrayDataType::kFloat); + ConvertFloatTensorConst(model, weights_array_name, AxesOrder::kOHWI, + AxesOrder::kHWIO, tensorflow_graph); + auto& strides = (*conv2d_op->mutable_attr())["strides"]; + strides.mutable_list()->add_i(1); + strides.mutable_list()->add_i(src_op.stride_height); + strides.mutable_list()->add_i(src_op.stride_width); + strides.mutable_list()->add_i(1); + string padding; + if (src_op.padding.type == PaddingType::kSame) { + padding = "SAME"; + } else if (src_op.padding.type == PaddingType::kValid) { + padding = "VALID"; + } else { + LOG(FATAL) << "Bad padding (only SAME and VALID are supported)"; + } + (*conv2d_op->mutable_attr())["padding"].set_s(padding); + + if (has_bias) { + auto* biasadd_op = tensorflow_graph->add_node(); + biasadd_op->set_op("BiasAdd"); + biasadd_op->set_name(src_op.outputs[0]); + biasadd_op->add_input(conv_output); + biasadd_op->add_input(src_op.inputs[2]); + (*biasadd_op->mutable_attr())["T"].set_type(DT_FLOAT); + CHECK(model.arrays.count(src_op.inputs[2])); + const string& bias_array_name = + WalkUpToConstantArray(model, src_op.inputs[2]); + const auto& bias_array = model.GetArray(bias_array_name); + // TODO(b/62904716) Bias arrays should be 1-D, and used directly. + Shape bias_shape_1d = bias_array.shape(); + UnextendShape(&bias_shape_1d, 1); + CHECK(bias_array.buffer->type == ArrayDataType::kFloat); + const float* bias_data = + bias_array.GetBuffer().data.data(); + ConvertFloatTensorConst(bias_array_name, bias_shape_1d, bias_data, + AxesOrder::kOneAxis, AxesOrder::kOneAxis, + tensorflow_graph, + LegacyScalarPolicy::kDoCreateLegacyScalars); + } +} + +void ConvertDepthwiseConvOperator(const Model& model, + const DepthwiseConvOperator& src_op, + GraphDef* tensorflow_graph) { + const bool has_bias = src_op.inputs.size() >= 3; + string conv_output = src_op.outputs[0]; + if (has_bias) { + conv_output += "/conv"; + } + + auto* dc2d_op = tensorflow_graph->add_node(); + dc2d_op->set_op("DepthwiseConv2dNative"); + dc2d_op->set_name(conv_output); + *dc2d_op->add_input() = src_op.inputs[0]; + *dc2d_op->add_input() = src_op.inputs[1]; + (*dc2d_op->mutable_attr())["T"].set_type(DT_FLOAT); + + // Our internal DepthwiseConv weights are 1 x H x W x OutputDepth. + // We need to convert that to H x W x InputDepth x Multiplier. + // That's only a matter of constructing a Dims object; the actual + // array layout is the same. + CHECK(model.arrays.count(src_op.inputs[1])); + const string& src_weights_name = + WalkUpToConstantArray(model, src_op.inputs[1]); + const auto& src_weights_array = model.GetArray(src_weights_name); + const auto& src_weights_shape = src_weights_array.shape(); + CHECK_EQ(src_weights_shape.dimensions_count(), 4); + const Shape dst_weights_shape = + Shape({src_weights_shape.dims(1), src_weights_shape.dims(2), + src_weights_shape.dims(3) / src_op.depth_multiplier, + src_op.depth_multiplier}); + CHECK_EQ(src_weights_shape.dims(3) % src_op.depth_multiplier, 0); + CHECK(dst_weights_shape.dims(2) * dst_weights_shape.dims(3) == + src_weights_shape.dims(3)); + CHECK_EQ(src_weights_shape.dims(0), 1); + + CHECK(src_weights_array.buffer->type == ArrayDataType::kFloat); + const float* src_weights_data = + src_weights_array.GetBuffer().data.data(); + ConvertFloatTensorConst(src_weights_name, dst_weights_shape, src_weights_data, + AxesOrder::kHWIM, AxesOrder::kHWIM, tensorflow_graph); + + auto& strides = (*dc2d_op->mutable_attr())["strides"]; + strides.mutable_list()->add_i(1); + strides.mutable_list()->add_i(src_op.stride_height); + strides.mutable_list()->add_i(src_op.stride_width); + strides.mutable_list()->add_i(1); + string padding; + if (src_op.padding.type == PaddingType::kSame) { + padding = "SAME"; + } else if (src_op.padding.type == PaddingType::kValid) { + padding = "VALID"; + } else { + LOG(FATAL) << "Bad padding (only SAME and VALID are supported)"; + } + (*dc2d_op->mutable_attr())["padding"].set_s(padding); + + if (has_bias) { + auto* biasadd_op = tensorflow_graph->add_node(); + biasadd_op->set_op("BiasAdd"); + biasadd_op->set_name(src_op.outputs[0]); + biasadd_op->add_input(conv_output); + biasadd_op->add_input(src_op.inputs[2]); + (*biasadd_op->mutable_attr())["T"].set_type(DT_FLOAT); + CHECK(model.arrays.count(src_op.inputs[2])); + const string& bias_name = WalkUpToConstantArray(model, src_op.inputs[2]); + const auto& bias_array = model.GetArray(bias_name); + // TODO(b/62904716) Bias arrays should be 1-D, and used directly. + Shape bias_shape_1d = bias_array.shape(); + UnextendShape(&bias_shape_1d, 1); + CHECK(bias_array.buffer->type == ArrayDataType::kFloat); + const float* bias_data = + bias_array.GetBuffer().data.data(); + ConvertFloatTensorConst(bias_name, bias_shape_1d, bias_data, + AxesOrder::kOneAxis, AxesOrder::kOneAxis, + tensorflow_graph, + LegacyScalarPolicy::kDoCreateLegacyScalars); + } +} + +void ConvertDepthToSpaceOperator(const Model& model, + const DepthToSpaceOperator& src_op, + GraphDef* tensorflow_graph) { + auto* op = tensorflow_graph->add_node(); + op->set_op("DepthToSpace"); + op->set_name(src_op.outputs[0]); + *op->add_input() = src_op.inputs[0]; + (*op->mutable_attr())["T"].set_type(DT_FLOAT); + (*op->mutable_attr())["block_size"].set_i(src_op.block_size); +} + +void ConvertSpaceToDepthOperator(const Model& model, + const SpaceToDepthOperator& src_op, + GraphDef* tensorflow_graph) { + auto* op = tensorflow_graph->add_node(); + op->set_op("SpaceToDepth"); + op->set_name(src_op.outputs[0]); + *op->add_input() = src_op.inputs[0]; + (*op->mutable_attr())["T"].set_type(DT_FLOAT); + (*op->mutable_attr())["block_size"].set_i(src_op.block_size); +} + +void ConvertFullyConnectedOperator(const Model& model, + const FullyConnectedOperator& src_op, + GraphDef* tensorflow_graph) { + const string reshape_output = src_op.outputs[0] + "/reshape"; + const string reshape_shape = src_op.outputs[0] + "/reshape/shape"; + auto* reshape_op = tensorflow_graph->add_node(); + reshape_op->set_op("Reshape"); + reshape_op->set_name(reshape_output); + reshape_op->add_input(src_op.inputs[0]); + reshape_op->add_input(reshape_shape); + (*reshape_op->mutable_attr())["T"].set_type(DT_FLOAT); + + const bool has_bias = src_op.inputs.size() >= 3; + string matmul_output = src_op.outputs[0]; + if (has_bias) { + matmul_output += "/matmul"; + } + + auto* matmul_op = tensorflow_graph->add_node(); + matmul_op->set_op("MatMul"); + + matmul_op->set_name(matmul_output); + *matmul_op->add_input() = reshape_output; + *matmul_op->add_input() = src_op.inputs[1]; + (*matmul_op->mutable_attr())["T"].set_type(DT_FLOAT); + (*matmul_op->mutable_attr())["transpose_a"].set_b(false); + (*matmul_op->mutable_attr())["transpose_b"].set_b(false); + CHECK(model.arrays.count(src_op.inputs[1])); + const string& fc_weights_name = + WalkUpToConstantArray(model, src_op.inputs[1]); + const auto& fc_weights_array = *model.arrays.at(fc_weights_name); + const auto& fc_weights_shape = fc_weights_array.shape(); + CHECK_EQ(fc_weights_shape.dimensions_count(), 2); + CreateMatrixShapeTensorConst(reshape_shape, fc_weights_shape.dims(1), -1, + tensorflow_graph); + + CHECK(fc_weights_array.buffer); + CHECK(fc_weights_array.buffer->type == ArrayDataType::kFloat); + const float* fc_weights_data = + fc_weights_array.GetBuffer().data.data(); + ConvertFloatTensorConst(fc_weights_name, fc_weights_shape, fc_weights_data, + AxesOrder::kCR, AxesOrder::kRC, tensorflow_graph); + + if (has_bias) { + auto* biasadd_op = tensorflow_graph->add_node(); + biasadd_op->set_op("BiasAdd"); + biasadd_op->set_name(src_op.outputs[0]); + biasadd_op->add_input(matmul_output); + biasadd_op->add_input(src_op.inputs[2]); + (*biasadd_op->mutable_attr())["T"].set_type(DT_FLOAT); + CHECK(model.arrays.count(src_op.inputs[2])); + const auto& bias_array = *model.arrays.at(src_op.inputs[2]); + // TODO(b/62904716) Bias arrays should be 1-D, and used directly. + Shape bias_shape_1d = bias_array.shape(); + UnextendShape(&bias_shape_1d, 1); + CHECK(bias_array.buffer); + CHECK(bias_array.buffer->type == ArrayDataType::kFloat); + const float* bias_data = + bias_array.GetBuffer().data.data(); + ConvertFloatTensorConst(WalkUpToConstantArray(model, src_op.inputs[2]), + bias_shape_1d, bias_data, AxesOrder::kOneAxis, + AxesOrder::kOneAxis, tensorflow_graph, + LegacyScalarPolicy::kDoCreateLegacyScalars); + } +} + +void ConvertAddOperator(const Model& model, const AddOperator& src_op, + GraphDef* tensorflow_graph) { + auto* add_op = tensorflow_graph->add_node(); + add_op->set_op("Add"); + add_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 2); + *add_op->add_input() = src_op.inputs[0]; + *add_op->add_input() = src_op.inputs[1]; + (*add_op->mutable_attr())["T"].set_type(DT_FLOAT); +} + +void ConvertMulOperator(const Model& model, const MulOperator& src_op, + GraphDef* tensorflow_graph) { + auto* add_op = tensorflow_graph->add_node(); + add_op->set_op("Mul"); + add_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 2); + *add_op->add_input() = src_op.inputs[0]; + *add_op->add_input() = src_op.inputs[1]; + (*add_op->mutable_attr())["T"].set_type(DT_FLOAT); +} + +void ConvertReluOperator(const ReluOperator& src_op, + GraphDef* tensorflow_graph) { + auto* relu_op = tensorflow_graph->add_node(); + relu_op->set_op("Relu"); + relu_op->set_name(src_op.outputs[0]); + *relu_op->add_input() = src_op.inputs[0]; + (*relu_op->mutable_attr())["T"].set_type(DT_FLOAT); +} + +void ConvertRelu1Operator(const Relu1Operator& src_op, + GraphDef* tensorflow_graph) { + const string max_bounds = src_op.outputs[0] + "/max_bounds"; + const string min_bounds = src_op.outputs[0] + "/min_bounds"; + const string max_output = src_op.outputs[0] + "/max_output"; + + auto* max_bounds_const_op = tensorflow_graph->add_node(); + max_bounds_const_op->set_op("Const"); + max_bounds_const_op->set_name(max_bounds); + (*max_bounds_const_op->mutable_attr())["dtype"].set_type(DT_FLOAT); + auto* max_bounds_const_op_tensor = + (*max_bounds_const_op->mutable_attr())["value"].mutable_tensor(); + max_bounds_const_op_tensor->set_dtype(DT_FLOAT); + max_bounds_const_op_tensor->add_float_val(-1.0f); + + auto* min_bounds_const_op = tensorflow_graph->add_node(); + min_bounds_const_op->set_op("Const"); + min_bounds_const_op->set_name(min_bounds); + (*min_bounds_const_op->mutable_attr())["dtype"].set_type(DT_FLOAT); + auto* min_bounds_const_op_tensor = + (*min_bounds_const_op->mutable_attr())["value"].mutable_tensor(); + min_bounds_const_op_tensor->set_dtype(DT_FLOAT); + min_bounds_const_op_tensor->add_float_val(1.0f); + + auto* max_op = tensorflow_graph->add_node(); + max_op->set_op("Maximum"); + max_op->set_name(max_output); + *max_op->add_input() = src_op.inputs[0]; + *max_op->add_input() = max_bounds; + (*max_op->mutable_attr())["T"].set_type(DT_FLOAT); + + auto* min_op = tensorflow_graph->add_node(); + min_op->set_op("Minimum"); + min_op->set_name(src_op.outputs[0]); + *min_op->add_input() = max_output; + *min_op->add_input() = min_bounds; + (*min_op->mutable_attr())["T"].set_type(DT_FLOAT); +} + +void ConvertRelu6Operator(const Relu6Operator& src_op, + GraphDef* tensorflow_graph) { + auto* relu_op = tensorflow_graph->add_node(); + relu_op->set_op("Relu6"); + relu_op->set_name(src_op.outputs[0]); + *relu_op->add_input() = src_op.inputs[0]; + (*relu_op->mutable_attr())["T"].set_type(DT_FLOAT); +} + +void ConvertLogisticOperator(const LogisticOperator& src_op, + GraphDef* tensorflow_graph) { + auto* relu_op = tensorflow_graph->add_node(); + relu_op->set_op("Sigmoid"); + relu_op->set_name(src_op.outputs[0]); + *relu_op->add_input() = src_op.inputs[0]; + (*relu_op->mutable_attr())["T"].set_type(DT_FLOAT); +} + +void ConvertTanhOperator(const TanhOperator& src_op, + GraphDef* tensorflow_graph) { + auto* tanh_op = tensorflow_graph->add_node(); + tanh_op->set_op("Tanh"); + tanh_op->set_name(src_op.outputs[0]); + *tanh_op->add_input() = src_op.inputs[0]; + (*tanh_op->mutable_attr())["T"].set_type(DT_FLOAT); +} + +void ConvertSoftmaxOperator(const Model& model, const SoftmaxOperator& src_op, + GraphDef* tensorflow_graph) { + string softmax_input; + Operator* providing_op = GetOpWithOutput(model, src_op.inputs[0]); + if (providing_op->type == OperatorType::kTensorFlowReshape) { + softmax_input = src_op.inputs[0]; + } else { + // Insert a reshape operator that reduces the dimensions down to the 2 that + // are required for TensorFlow Logits. + const string reshape_output = src_op.outputs[0] + "/softmax_insert_reshape"; + const string softmax_size = src_op.outputs[0] + "/softmax_insert_size"; + softmax_input = reshape_output; + + auto* reshape_op = tensorflow_graph->add_node(); + reshape_op->set_op("Reshape"); + reshape_op->set_name(reshape_output); + *reshape_op->add_input() = src_op.inputs[0]; + *reshape_op->add_input() = softmax_size; + (*reshape_op->mutable_attr())["T"].set_type(DT_FLOAT); + + const auto& input_shape = model.arrays.at(src_op.inputs[0])->shape(); + int32 flattened_size = 1; + for (int i = 0; i < input_shape.dimensions_count() - 1; ++i) { + flattened_size *= input_shape.dims(i); + } + const std::vector shape_data = { + flattened_size, input_shape.dims(input_shape.dimensions_count() - 1)}; + CreateReshapeShapeTensorConst(softmax_size, shape_data, tensorflow_graph); + } + + auto* softmax_op = tensorflow_graph->add_node(); + softmax_op->set_op("Softmax"); + softmax_op->set_name(src_op.outputs[0]); + *softmax_op->add_input() = softmax_input; + // TensorFlow's Softmax doesn't seem to admit a 'beta' parameter + CHECK_EQ(src_op.beta, 1.f); + (*softmax_op->mutable_attr())["T"].set_type(DT_FLOAT); +} + +void ConvertL2NormalizationOperator(const L2NormalizationOperator& src_op, + GraphDef* tensorflow_graph) { + const string square_output = src_op.outputs[0] + "/square"; + const string sum_reduction_indices = src_op.outputs[0] + "/reduction_indices"; + const string sum_output = src_op.outputs[0] + "/sum"; + const string rsqrt_output = src_op.outputs[0] + "/rsqrt"; + const string rsqrt_tiled_output = src_op.outputs[0] + "/rsqrt_tiled"; + + auto* sum_reduction_indices_op = tensorflow_graph->add_node(); + sum_reduction_indices_op->set_op("Const"); + sum_reduction_indices_op->set_name(sum_reduction_indices); + (*sum_reduction_indices_op->mutable_attr())["dtype"].set_type(DT_INT32); + auto* sum_reduction_indices_tensor = + (*sum_reduction_indices_op->mutable_attr())["value"].mutable_tensor(); + sum_reduction_indices_tensor->set_dtype(DT_INT32); + auto* sum_reduction_indices_shape = + sum_reduction_indices_tensor->mutable_tensor_shape(); + auto* sum_reduction_indices_dim = sum_reduction_indices_shape->add_dim(); + sum_reduction_indices_dim->set_size(2); + sum_reduction_indices_tensor->add_int_val(0); + sum_reduction_indices_tensor->add_int_val(1); + + auto* square_op = tensorflow_graph->add_node(); + square_op->set_op("Square"); + square_op->set_name(square_output); + *square_op->add_input() = src_op.inputs[0]; + (*square_op->mutable_attr())["T"].set_type(DT_FLOAT); + + auto* sum_op = tensorflow_graph->add_node(); + sum_op->set_op("Sum"); + sum_op->set_name(sum_output); + *sum_op->add_input() = square_output; + *sum_op->add_input() = sum_reduction_indices; + (*sum_op->mutable_attr())["T"].set_type(DT_FLOAT); + + auto* rsqrt_op = tensorflow_graph->add_node(); + rsqrt_op->set_op("Rsqrt"); + rsqrt_op->set_name(rsqrt_output); + *rsqrt_op->add_input() = sum_output; + (*rsqrt_op->mutable_attr())["T"].set_type(DT_FLOAT); + + auto* mul_op = tensorflow_graph->add_node(); + mul_op->set_op("Mul"); + mul_op->set_name(src_op.outputs[0]); + *mul_op->add_input() = src_op.inputs[0]; + *mul_op->add_input() = rsqrt_output; + (*mul_op->mutable_attr())["T"].set_type(DT_FLOAT); +} + +void ConvertLocalResponseNormalizationOperator( + const LocalResponseNormalizationOperator& src_op, + GraphDef* tensorflow_graph) { + auto* lrn_op = tensorflow_graph->add_node(); + lrn_op->set_op("LRN"); + lrn_op->set_name(src_op.outputs[0]); + *lrn_op->add_input() = src_op.inputs[0]; + (*lrn_op->mutable_attr())["depth_radius"].set_i(src_op.range); + (*lrn_op->mutable_attr())["bias"].set_f(src_op.bias); + (*lrn_op->mutable_attr())["alpha"].set_f(src_op.alpha); + (*lrn_op->mutable_attr())["beta"].set_f(src_op.beta); +} + +void ConvertFakeQuantOperator(const FakeQuantOperator& src_op, + GraphDef* tensorflow_graph) { + auto* fakequant_op = tensorflow_graph->add_node(); + fakequant_op->set_op("FakeQuantWithMinMaxArgs"); + fakequant_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 1); + *fakequant_op->add_input() = src_op.inputs[0]; + CHECK(src_op.minmax); + (*fakequant_op->mutable_attr())["min"].set_f(src_op.minmax->min); + (*fakequant_op->mutable_attr())["max"].set_f(src_op.minmax->max); +} + +void ConvertMaxPoolOperator(const MaxPoolOperator& src_op, + GraphDef* tensorflow_graph) { + auto* maxpool_op = tensorflow_graph->add_node(); + maxpool_op->set_op("MaxPool"); + maxpool_op->set_name(src_op.outputs[0]); + *maxpool_op->add_input() = src_op.inputs[0]; + auto& strides = (*maxpool_op->mutable_attr())["strides"]; + strides.mutable_list()->add_i(1); + strides.mutable_list()->add_i(src_op.stride_height); + strides.mutable_list()->add_i(src_op.stride_width); + strides.mutable_list()->add_i(1); + string padding; + if (src_op.padding.type == PaddingType::kSame) { + padding = "SAME"; + } else if (src_op.padding.type == PaddingType::kValid) { + padding = "VALID"; + } else { + LOG(FATAL) << "Bad padding (only SAME and VALID are supported)"; + } + (*maxpool_op->mutable_attr())["padding"].set_s(padding); + (*maxpool_op->mutable_attr())["T"].set_type(DT_FLOAT); + auto& ksize = (*maxpool_op->mutable_attr())["ksize"]; + ksize.mutable_list()->add_i(1); + ksize.mutable_list()->add_i(src_op.kheight); + ksize.mutable_list()->add_i(src_op.kwidth); + ksize.mutable_list()->add_i(1); +} + +void ConvertAveragePoolOperator(const AveragePoolOperator& src_op, + GraphDef* tensorflow_graph) { + auto* avgpool_op = tensorflow_graph->add_node(); + avgpool_op->set_op("AvgPool"); + avgpool_op->set_name(src_op.outputs[0]); + *avgpool_op->add_input() = src_op.inputs[0]; + auto& strides = (*avgpool_op->mutable_attr())["strides"]; + strides.mutable_list()->add_i(1); + strides.mutable_list()->add_i(src_op.stride_height); + strides.mutable_list()->add_i(src_op.stride_width); + strides.mutable_list()->add_i(1); + string padding; + if (src_op.padding.type == PaddingType::kSame) { + padding = "SAME"; + } else if (src_op.padding.type == PaddingType::kValid) { + padding = "VALID"; + } else { + LOG(FATAL) << "Bad padding (only SAME and VALID are supported)"; + } + (*avgpool_op->mutable_attr())["padding"].set_s(padding); + (*avgpool_op->mutable_attr())["T"].set_type(DT_FLOAT); + auto& ksize = (*avgpool_op->mutable_attr())["ksize"]; + ksize.mutable_list()->add_i(1); + ksize.mutable_list()->add_i(src_op.kheight); + ksize.mutable_list()->add_i(src_op.kwidth); + ksize.mutable_list()->add_i(1); +} + +void ConvertConcatenationOperator(const Model& model, + const ConcatenationOperator& src_op, + GraphDef* tensorflow_graph) { + auto* dc_op = tensorflow_graph->add_node(); + dc_op->set_op("ConcatV2"); + dc_op->set_name(src_op.outputs[0]); + const string dummy_concat_dim = src_op.outputs[0] + "/concat_dim"; + CreateDummyConcatDimTensorConst(dummy_concat_dim, src_op.concat_dim, + tensorflow_graph); + for (const auto& input : src_op.inputs) { + *dc_op->add_input() = input; + } + *dc_op->add_input() = dummy_concat_dim; + (*dc_op->mutable_attr())["T"].set_type(DT_FLOAT); + (*dc_op->mutable_attr())["Tidx"].set_type(DT_INT32); + (*dc_op->mutable_attr())["N"].set_i(src_op.inputs.size()); +} + +void ConvertTensorFlowReshapeOperator(const Model& model, + const TensorFlowReshapeOperator& src_op, + GraphDef* tensorflow_graph) { + auto* reshape_op = tensorflow_graph->add_node(); + reshape_op->set_op("Reshape"); + reshape_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 2); + *reshape_op->add_input() = src_op.inputs[0]; + *reshape_op->add_input() = src_op.inputs[1]; + (*reshape_op->mutable_attr())["T"].set_type(DT_FLOAT); + const auto& shape_array = model.GetArray(src_op.inputs[1]); + CHECK(shape_array.data_type == ArrayDataType::kInt32); + CHECK(shape_array.buffer != nullptr); + const auto& shape_data = shape_array.GetBuffer().data; + CreateReshapeShapeTensorConst(src_op.inputs[1], shape_data, tensorflow_graph); +} + +void ConvertL2PoolOperator(const L2PoolOperator& src_op, + GraphDef* tensorflow_graph) { + const string square_output = src_op.outputs[0] + "/square"; + const string avgpool_output = src_op.outputs[0] + "/avgpool"; + + auto* square_op = tensorflow_graph->add_node(); + square_op->set_op("Square"); + square_op->set_name(square_output); + *square_op->add_input() = src_op.inputs[0]; + (*square_op->mutable_attr())["T"].set_type(DT_FLOAT); + + string padding; + if (src_op.padding.type == PaddingType::kSame) { + padding = "SAME"; + } else if (src_op.padding.type == PaddingType::kValid) { + padding = "VALID"; + } else { + LOG(FATAL) << "Bad padding (only SAME and VALID are supported)"; + } + + auto* avgpool_op = tensorflow_graph->add_node(); + avgpool_op->set_op("AvgPool"); + avgpool_op->set_name(avgpool_output); + *avgpool_op->add_input() = square_output; + auto& strides = (*avgpool_op->mutable_attr())["strides"]; + strides.mutable_list()->add_i(1); + strides.mutable_list()->add_i(src_op.stride_height); + strides.mutable_list()->add_i(src_op.stride_width); + strides.mutable_list()->add_i(1); + + (*avgpool_op->mutable_attr())["padding"].set_s(padding); + (*avgpool_op->mutable_attr())["T"].set_type(DT_FLOAT); + auto& ksize = (*avgpool_op->mutable_attr())["ksize"]; + ksize.mutable_list()->add_i(1); + ksize.mutable_list()->add_i(src_op.kheight); + ksize.mutable_list()->add_i(src_op.kwidth); + ksize.mutable_list()->add_i(1); + + auto* sqrt_op = tensorflow_graph->add_node(); + sqrt_op->set_op("Sqrt"); + sqrt_op->set_name(src_op.outputs[0]); + *sqrt_op->add_input() = avgpool_output; + (*sqrt_op->mutable_attr())["T"].set_type(DT_FLOAT); +} + +void ConvertSquareOperator(const TensorFlowSquareOperator& src_op, + GraphDef* tensorflow_graph) { + auto* square_op = tensorflow_graph->add_node(); + square_op->set_op("Square"); + square_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 1); + *square_op->add_input() = src_op.inputs[0]; + (*square_op->mutable_attr())["T"].set_type(DT_FLOAT); +} + +void ConvertSqrtOperator(const TensorFlowSqrtOperator& src_op, + GraphDef* tensorflow_graph) { + auto* sqrt_op = tensorflow_graph->add_node(); + sqrt_op->set_op("Sqrt"); + sqrt_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 1); + *sqrt_op->add_input() = src_op.inputs[0]; + (*sqrt_op->mutable_attr())["T"].set_type(DT_FLOAT); +} + +void ConvertSplitOperator(const Model& model, + const TensorFlowSplitOperator& src_op, + GraphDef* tensorflow_graph) { + auto* split_op = tensorflow_graph->add_node(); + split_op->set_op("Split"); + split_op->set_name(src_op.outputs[0]); + for (const auto& input : src_op.inputs) { + *split_op->add_input() = input; + } + (*split_op->mutable_attr())["T"].set_type(DT_FLOAT); + (*split_op->mutable_attr())["num_split"].set_i(src_op.num_split); + const auto& split_dim_array = model.GetArray(src_op.inputs[0]); + CHECK(split_dim_array.buffer); + CHECK(split_dim_array.data_type == ArrayDataType::kInt32); + const auto& split_dim_data = + split_dim_array.GetBuffer().data; + CHECK_EQ(split_dim_data.size(), 1); + const int split_dim = split_dim_data[0]; + CreateDummyConcatDimTensorConst(src_op.inputs[0], split_dim, + tensorflow_graph); +} + +tensorflow::DataType GetTensorFlowDataType(const Model& model, + const string& array_name) { + auto& dtype = model.GetArray(array_name).data_type; + CHECK(dtype == ArrayDataType::kFloat || dtype == ArrayDataType::kInt32 || + dtype == ArrayDataType::kUint8); + if (dtype == ArrayDataType::kFloat) { + return tensorflow::DT_FLOAT; + } else if (dtype == ArrayDataType::kInt32) { + return tensorflow::DT_INT32; + } else if (dtype == ArrayDataType::kUint8) { + return tensorflow::DT_UINT8; + } else { + LOG(FATAL) << "Wrong data type"; + } +} + +void ConvertCastOperator(const Model& model, const CastOperator& src_op, + GraphDef* tensorflow_graph) { + auto* cast_op = tensorflow_graph->add_node(); + cast_op->set_op("Cast"); + cast_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 1); + *cast_op->add_input() = src_op.inputs[0]; + + (*cast_op->mutable_attr())["DstT"].set_type( + GetTensorFlowDataType(model, src_op.outputs[0])); + (*cast_op->mutable_attr())["SrcT"].set_type( + GetTensorFlowDataType(model, src_op.inputs[0])); +} + +void ConvertFloorOperator(const Model& model, const FloorOperator& src_op, + GraphDef* tensorflow_graph) { + auto* floor_op = tensorflow_graph->add_node(); + floor_op->set_op("Floor"); + floor_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 1); + *floor_op->add_input() = src_op.inputs[0]; + (*floor_op->mutable_attr())["T"].set_type(DT_FLOAT); +} + +void ConvertGatherOperator(const Model& model, const GatherOperator& src_op, + GraphDef* tensorflow_graph) { + auto* gather_op = tensorflow_graph->add_node(); + gather_op->set_op("Gather"); + gather_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 2); + *gather_op->add_input() = src_op.inputs[0]; + *gather_op->add_input() = src_op.inputs[1]; + + (*gather_op->mutable_attr())["Tindices"].set_type(DT_INT32); + const auto params_type = GetTensorFlowDataType(model, src_op.inputs[0]); + (*gather_op->mutable_attr())["Tparams"].set_type(params_type); +} + +void ConvertResizeBilinearOperator(const Model& model, + const ResizeBilinearOperator& src_op, + GraphDef* tensorflow_graph) { + auto* resize_op = tensorflow_graph->add_node(); + resize_op->set_op("ResizeBilinear"); + resize_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 2); + *resize_op->add_input() = src_op.inputs[0]; + *resize_op->add_input() = src_op.inputs[1]; + (*resize_op->mutable_attr())["T"].set_type(DT_FLOAT); +} + +namespace { +// TODO(aselle): Remove when available in absl +absl::string_view FindLongestCommonPrefix(absl::string_view a, + absl::string_view b) { + if (a.empty() || b.empty()) return absl::string_view(); + + const char* pa = a.data(); + const char* pb = b.data(); + string::difference_type count = 0; + const string::difference_type limit = std::min(a.size(), b.size()); + while (count < limit && *pa == *pb) { + ++pa; + ++pb; + ++count; + } + + return absl::string_view(a.data(), count); +} +} // namespace + +void ConvertLstmCellOperator(const Model& model, const LstmCellOperator& src_op, + GraphDef* tensorflow_graph) { + // Find the base name + const string base( + FindLongestCommonPrefix(src_op.outputs[LstmCellOperator::STATE_OUTPUT], + src_op.outputs[LstmCellOperator::ACTIV_OUTPUT])); + + // Concatenate inputs + const string concat_output = base + "basic_lstm_cell/concat"; + // Op names have been chosen to match the tf.slim LSTM naming + // as closely as possible. + const int concat_dim = + model.arrays.at(src_op.inputs[LstmCellOperator::PREV_ACTIV_INPUT]) + ->shape() + .dimensions_count() - + 1; + // Note that DATA_INPUT may have extra size 1 dimensions, but TF concat + // works the same since the tensor has the same underlying data layout. + const string concat_dim_output = concat_output + "/concat_dim"; + CreateDummyConcatDimTensorConst(concat_dim_output, concat_dim, + tensorflow_graph); + auto* concat_op = tensorflow_graph->add_node(); + concat_op->set_op("ConcatV2"); + concat_op->set_name(concat_output); + *concat_op->add_input() = src_op.inputs[LstmCellOperator::DATA_INPUT]; + *concat_op->add_input() = src_op.inputs[LstmCellOperator::PREV_ACTIV_INPUT]; + *concat_op->add_input() = concat_dim_output; + (*concat_op->mutable_attr())["T"].set_type(DT_FLOAT); + (*concat_op->mutable_attr())["Tidx"].set_type(DT_INT32); + (*concat_op->mutable_attr())["N"].set_i(2); // Number of inputs + + // Write weights + const string weights_output = base + "weights"; + CHECK(model.arrays.count(src_op.inputs[LstmCellOperator::WEIGHTS_INPUT])); + const auto& weights_array = + *model.arrays.at(src_op.inputs[LstmCellOperator::WEIGHTS_INPUT]); + // Convert 4D FullyConnected weights into 2D matrix + const auto& weights_shape = weights_array.shape(); + CHECK_EQ(weights_shape.dimensions_count(), 2); + CHECK(weights_array.buffer); + CHECK(weights_array.buffer->type == ArrayDataType::kFloat); + const float* weights_data = + weights_array.GetBuffer().data.data(); + ConvertFloatTensorConst(weights_output, weights_shape, weights_data, + AxesOrder::kCR, AxesOrder::kRC, tensorflow_graph); + + // Fully connected matrix multiply + const string matmul_output = base + "MatMul"; + auto* matmul_op = tensorflow_graph->add_node(); + matmul_op->set_op("MatMul"); + matmul_op->set_name(matmul_output); + *matmul_op->add_input() = concat_output; + *matmul_op->add_input() = weights_output; + (*matmul_op->mutable_attr())["transpose_a"].set_b(false); + (*matmul_op->mutable_attr())["transpose_b"].set_b(false); + (*matmul_op->mutable_attr())["T"].set_type(DT_FLOAT); + + // Write biases + const string biases_output = base + "biases"; + CHECK(model.arrays.count(src_op.inputs[LstmCellOperator::BIASES_INPUT])); + const auto& bias_array = + *model.arrays.at(src_op.inputs[LstmCellOperator::BIASES_INPUT]); + // TODO(b/62904716) Bias arrays should be 1-D, and used directly. + Shape bias_shape_1d = bias_array.shape(); + UnextendShape(&bias_shape_1d, 1); + CHECK(bias_array.buffer); + CHECK(bias_array.buffer->type == ArrayDataType::kFloat); + const float* bias_data = + bias_array.GetBuffer().data.data(); + ConvertFloatTensorConst(biases_output, bias_shape_1d, bias_data, + AxesOrder::kOneAxis, AxesOrder::kOneAxis, + tensorflow_graph, + LegacyScalarPolicy::kDoCreateLegacyScalars); + + // Add biases + string biasadd_output = base + "BiasAdd"; + auto* biasadd_op = tensorflow_graph->add_node(); + biasadd_op->set_op("BiasAdd"); + biasadd_op->set_name(biasadd_output); + biasadd_op->add_input(matmul_output); + biasadd_op->add_input(biases_output); + (*biasadd_op->mutable_attr())["data_format"].set_s("NHWC"); + (*biasadd_op->mutable_attr())["T"].set_type(DT_FLOAT); + + // Split + string split_dim_output = base + "split/split_dim"; + // The dimension is the same as the concatenation dimension + CreateDummyConcatDimTensorConst(split_dim_output, concat_dim, + tensorflow_graph); + string split_output = base + "split"; + auto* split_op = tensorflow_graph->add_node(); + split_op->set_op("Split"); + split_op->set_name(split_output); + *split_op->add_input() = split_dim_output; + *split_op->add_input() = biasadd_output; + (*split_op->mutable_attr())["T"].set_type(DT_FLOAT); + (*split_op->mutable_attr())["num_split"].set_i(4); // Split into four outputs + + // Activation functions and memory computations + const string tanh_0_output = base + "Tanh"; + auto* tanh_0_op = tensorflow_graph->add_node(); + tanh_0_op->set_op("Tanh"); + tanh_0_op->set_name(tanh_0_output); + *tanh_0_op->add_input() = split_output + ":1"; + (*tanh_0_op->mutable_attr())["T"].set_type(DT_FLOAT); + + const string sigmoid_1_output = base + "Sigmoid_1"; + auto* logistic_1_op = tensorflow_graph->add_node(); + logistic_1_op->set_op("Sigmoid"); + logistic_1_op->set_name(sigmoid_1_output); + *logistic_1_op->add_input() = split_output; + (*logistic_1_op->mutable_attr())["T"].set_type(DT_FLOAT); + + const string mul_1_output = base + "mul_1"; + auto* mul_1_op = tensorflow_graph->add_node(); + mul_1_op->set_op("Mul"); + mul_1_op->set_name(mul_1_output); + *mul_1_op->add_input() = sigmoid_1_output; + *mul_1_op->add_input() = tanh_0_output; + (*mul_1_op->mutable_attr())["T"].set_type(DT_FLOAT); + + const string sigmoid_0_output = base + "Sigmoid"; + auto* logistic_2_op = tensorflow_graph->add_node(); + logistic_2_op->set_op("Sigmoid"); + logistic_2_op->set_name(sigmoid_0_output); + *logistic_2_op->add_input() = split_output + ":2"; + (*logistic_2_op->mutable_attr())["T"].set_type(DT_FLOAT); + + const string sigmoid_2_output = base + "Sigmoid_2"; + auto* logistic_3_op = tensorflow_graph->add_node(); + logistic_3_op->set_op("Sigmoid"); + logistic_3_op->set_name(sigmoid_2_output); + *logistic_3_op->add_input() = split_output + ":3"; + (*logistic_3_op->mutable_attr())["T"].set_type(DT_FLOAT); + + const string mul_0_output = base + "mul"; + auto* mul_0_op = tensorflow_graph->add_node(); + mul_0_op->set_op("Mul"); + mul_0_op->set_name(mul_0_output); + *mul_0_op->add_input() = src_op.inputs[LstmCellOperator::PREV_STATE_INPUT]; + *mul_0_op->add_input() = sigmoid_0_output; + (*mul_0_op->mutable_attr())["T"].set_type(DT_FLOAT); + + const string add_1_output = src_op.outputs[LstmCellOperator::STATE_OUTPUT]; + auto* add_1_op = tensorflow_graph->add_node(); + add_1_op->set_op("Add"); + add_1_op->set_name(add_1_output); + *add_1_op->add_input() = mul_0_output; + *add_1_op->add_input() = mul_1_output; + (*add_1_op->mutable_attr())["T"].set_type(DT_FLOAT); + + const string tanh_1_output = base + "Tanh_1"; + auto* tanh_1_op = tensorflow_graph->add_node(); + tanh_1_op->set_op("Tanh"); + tanh_1_op->set_name(tanh_1_output); + *tanh_1_op->add_input() = add_1_output; + (*tanh_1_op->mutable_attr())["T"].set_type(DT_FLOAT); + + const string mul_2_output = src_op.outputs[LstmCellOperator::ACTIV_OUTPUT]; + auto* mul_2_op = tensorflow_graph->add_node(); + mul_2_op->set_op("Mul"); + mul_2_op->set_name(mul_2_output); + *mul_2_op->add_input() = tanh_1_output; + *mul_2_op->add_input() = sigmoid_2_output; + (*mul_2_op->mutable_attr())["T"].set_type(DT_FLOAT); +} + +void ConvertSpaceToBatchNDOperator(const Model& model, + const SpaceToBatchNDOperator& src_op, + GraphDef* tensorflow_graph) { + auto* new_op = tensorflow_graph->add_node(); + new_op->set_op("SpaceToBatchND"); + new_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 3); + *new_op->add_input() = src_op.inputs[0]; + *new_op->add_input() = src_op.inputs[1]; + *new_op->add_input() = src_op.inputs[2]; + const auto params_type = GetTensorFlowDataType(model, src_op.inputs[0]); + (*new_op->mutable_attr())["T"].set_type(params_type); + (*new_op->mutable_attr())["Tblock_shape"].set_type(DT_INT32); + (*new_op->mutable_attr())["Tpaddings"].set_type(DT_INT32); +} + +void ConvertBatchToSpaceNDOperator(const Model& model, + const BatchToSpaceNDOperator& src_op, + GraphDef* tensorflow_graph) { + auto* new_op = tensorflow_graph->add_node(); + new_op->set_op("BatchToSpaceND"); + new_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 3); + *new_op->add_input() = src_op.inputs[0]; + *new_op->add_input() = src_op.inputs[1]; + *new_op->add_input() = src_op.inputs[2]; + const auto params_type = GetTensorFlowDataType(model, src_op.inputs[0]); + (*new_op->mutable_attr())["T"].set_type(params_type); + (*new_op->mutable_attr())["Tblock_shape"].set_type(DT_INT32); + (*new_op->mutable_attr())["Tcrops"].set_type(DT_INT32); +} + +void ConvertPadOperator(const Model& model, const PadOperator& src_op, + GraphDef* tensorflow_graph) { + auto* new_op = tensorflow_graph->add_node(); + new_op->set_op("Pad"); + new_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 2); + *new_op->add_input() = src_op.inputs[0]; + *new_op->add_input() = src_op.inputs[1]; + + const auto params_type = GetTensorFlowDataType(model, src_op.inputs[0]); + (*new_op->mutable_attr())["T"].set_type(params_type); + + // Create the params tensor. + auto* params_op = tensorflow_graph->add_node(); + params_op->set_op("Const"); + params_op->set_name(src_op.inputs[1]); + (*params_op->mutable_attr())["dtype"].set_type(DT_INT32); + auto* tensor = (*params_op->mutable_attr())["value"].mutable_tensor(); + tensor->set_dtype(DT_INT32); + + CHECK_EQ(src_op.left_padding.size(), src_op.right_padding.size()); + for (int i = 0; i < src_op.left_padding.size(); ++i) { + tensor->add_int_val(src_op.left_padding[i]); + tensor->add_int_val(src_op.right_padding[i]); + } + auto* shape = tensor->mutable_tensor_shape(); + shape->add_dim()->set_size(src_op.left_padding.size()); + shape->add_dim()->set_size(2); +} + +void CreateSliceInput(const string& input_name, const std::vector& values, + GraphDef* tensorflow_graph) { + auto* params_op = tensorflow_graph->add_node(); + params_op->set_op("Const"); + params_op->set_name(input_name); + (*params_op->mutable_attr())["dtype"].set_type(DT_INT32); + auto* tensor = (*params_op->mutable_attr())["value"].mutable_tensor(); + tensor->set_dtype(DT_INT32); + + for (int i = 0; i < values.size(); ++i) { + tensor->add_int_val(values[i]); + } + auto* shape = tensor->mutable_tensor_shape(); + shape->add_dim()->set_size(values.size()); +} + +void ConvertStridedSliceOperator(const Model& model, + const StridedSliceOperator& src_op, + GraphDef* tensorflow_graph) { + auto* new_op = tensorflow_graph->add_node(); + new_op->set_op("StridedSlice"); + new_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 4); + *new_op->add_input() = src_op.inputs[0]; + *new_op->add_input() = src_op.inputs[1]; + *new_op->add_input() = src_op.inputs[2]; + *new_op->add_input() = src_op.inputs[3]; + + const auto params_type = GetTensorFlowDataType(model, src_op.inputs[0]); + (*new_op->mutable_attr())["T"].set_type(params_type); + + (*new_op->mutable_attr())["Index"].set_type(DT_INT32); + (*new_op->mutable_attr())["begin_mask"].set_i(src_op.begin_mask); + (*new_op->mutable_attr())["ellipsis_mask"].set_i(src_op.ellipsis_mask); + (*new_op->mutable_attr())["end_mask"].set_i(src_op.end_mask); + (*new_op->mutable_attr())["new_axis_mask"].set_i(src_op.new_axis_mask); + (*new_op->mutable_attr())["shrink_axis_mask"].set_i(src_op.shrink_axis_mask); + + // Create tensors for start/stop indices and strides. + CreateSliceInput(src_op.inputs[1], src_op.start_indices, tensorflow_graph); + CreateSliceInput(src_op.inputs[2], src_op.stop_indices, tensorflow_graph); + CreateSliceInput(src_op.inputs[3], src_op.strides, tensorflow_graph); +} + +void ConvertSliceOperator(const Model& model, const SliceOperator& src_op, + GraphDef* tensorflow_graph) { + auto* new_op = tensorflow_graph->add_node(); + new_op->set_op("Slice"); + new_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 3); + *new_op->add_input() = src_op.inputs[0]; + *new_op->add_input() = src_op.inputs[1]; + *new_op->add_input() = src_op.inputs[2]; + + const auto params_type = GetTensorFlowDataType(model, src_op.inputs[0]); + (*new_op->mutable_attr())["T"].set_type(params_type); + (*new_op->mutable_attr())["Index"].set_type(DT_INT32); + + // Create tensors for begin and size inputs. + CreateSliceInput(src_op.inputs[1], src_op.begin, tensorflow_graph); + CreateSliceInput(src_op.inputs[2], src_op.size, tensorflow_graph); +} + +void ConvertMeanOperator(const Model& model, const MeanOperator& src_op, + GraphDef* tensorflow_graph) { + auto* new_op = tensorflow_graph->add_node(); + new_op->set_op("Mean"); + new_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 2); + *new_op->add_input() = src_op.inputs[0]; + *new_op->add_input() = src_op.inputs[1]; + + const auto params_type = GetTensorFlowDataType(model, src_op.inputs[0]); + (*new_op->mutable_attr())["T"].set_type(params_type); + + // Create the params tensor. + auto* params_op = tensorflow_graph->add_node(); + params_op->set_op("Const"); + params_op->set_name(src_op.inputs[1]); + (*params_op->mutable_attr())["dtype"].set_type(DT_INT32); + auto* tensor = (*params_op->mutable_attr())["value"].mutable_tensor(); + tensor->set_dtype(DT_INT32); + + for (int i = 0; i < src_op.reduction_indices.size(); ++i) { + tensor->add_int_val(src_op.reduction_indices[i]); + } + auto* shape = tensor->mutable_tensor_shape(); + shape->add_dim()->set_size(src_op.reduction_indices.size()); +} + +void ConvertSqueezeOperator(const Model& model, const SqueezeOperator& src_op, + GraphDef* tensorflow_graph) { + auto* new_op = tensorflow_graph->add_node(); + new_op->set_op("Squeeze"); + new_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 1); + *new_op->add_input() = src_op.inputs[0]; + + const auto params_type = GetTensorFlowDataType(model, src_op.inputs[0]); + (*new_op->mutable_attr())["T"].set_type(params_type); + + auto& squeeze_dims = (*new_op->mutable_attr())["squeeze_dims"]; + for (int i : src_op.squeeze_dims) { + squeeze_dims.mutable_list()->add_i(i); + } +} + +void ConvertSubOperator(const Model& model, const SubOperator& src_op, + GraphDef* tensorflow_graph) { + auto* sub_op = tensorflow_graph->add_node(); + sub_op->set_op("Sub"); + sub_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 2); + *sub_op->add_input() = src_op.inputs[0]; + *sub_op->add_input() = src_op.inputs[1]; + const auto data_type = GetTensorFlowDataType(model, src_op.inputs[0]); + (*sub_op->mutable_attr())["T"].set_type(data_type); +} + +void ConvertTensorFlowMinimumOperator(const Model& model, + const TensorFlowMinimumOperator& src_op, + GraphDef* tensorflow_graph) { + auto* sub_op = tensorflow_graph->add_node(); + sub_op->set_op("Minimum"); + sub_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 2); + *sub_op->add_input() = src_op.inputs[0]; + *sub_op->add_input() = src_op.inputs[1]; + const auto data_type = GetTensorFlowDataType(model, src_op.inputs[0]); + (*sub_op->mutable_attr())["T"].set_type(data_type); +} + +void ConvertTensorFlowMaximumOperator(const Model& model, + const TensorFlowMaximumOperator& src_op, + GraphDef* tensorflow_graph) { + auto* sub_op = tensorflow_graph->add_node(); + sub_op->set_op("Maximum"); + sub_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 2); + *sub_op->add_input() = src_op.inputs[0]; + *sub_op->add_input() = src_op.inputs[1]; + const auto data_type = GetTensorFlowDataType(model, src_op.inputs[0]); + (*sub_op->mutable_attr())["T"].set_type(data_type); +} + +void ConvertOperator(const Model& model, const Operator& src_op, + GraphDef* tensorflow_graph) { + if (src_op.fused_activation_function != FusedActivationFunctionType::kNone) { + LOG(FATAL) + << "Unsupported: the input model has a fused activation function"; + } + + if (src_op.type == OperatorType::kConv) { + ConvertConvOperator(model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kDepthwiseConv) { + ConvertDepthwiseConvOperator( + model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kDepthToSpace) { + ConvertDepthToSpaceOperator( + model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kSpaceToDepth) { + ConvertSpaceToDepthOperator( + model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kFullyConnected) { + ConvertFullyConnectedOperator( + model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kAdd) { + ConvertAddOperator(model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kMul) { + ConvertMulOperator(model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kRelu) { + ConvertReluOperator(static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kRelu1) { + ConvertRelu1Operator(static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kRelu6) { + ConvertRelu6Operator(static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kLogistic) { + ConvertLogisticOperator(static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kTanh) { + ConvertTanhOperator(static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kL2Normalization) { + ConvertL2NormalizationOperator( + static_cast(src_op), tensorflow_graph); + } else if (src_op.type == OperatorType::kSoftmax) { + ConvertSoftmaxOperator(model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kLocalResponseNormalization) { + ConvertLocalResponseNormalizationOperator( + static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kLstmCell) { + ConvertLstmCellOperator(model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kMaxPool) { + ConvertMaxPoolOperator(static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kAveragePool) { + ConvertAveragePoolOperator(static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kConcatenation) { + ConvertConcatenationOperator( + model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kTensorFlowReshape) { + ConvertTensorFlowReshapeOperator( + model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kL2Pool) { + ConvertL2PoolOperator(static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kTensorFlowSquare) { + ConvertSquareOperator(static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kTensorFlowSqrt) { + ConvertSqrtOperator(static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kTensorFlowSplit) { + ConvertSplitOperator(model, + static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kFakeQuant) { + ConvertFakeQuantOperator(static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kCast) { + ConvertCastOperator(model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kFloor) { + ConvertFloorOperator(model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kGather) { + ConvertGatherOperator(model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kResizeBilinear) { + ConvertResizeBilinearOperator( + model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kSpaceToBatchND) { + ConvertSpaceToBatchNDOperator( + model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kBatchToSpaceND) { + ConvertBatchToSpaceNDOperator( + model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kPad) { + ConvertPadOperator(model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kStridedSlice) { + ConvertStridedSliceOperator( + model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kMean) { + ConvertMeanOperator(model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kSub) { + ConvertSubOperator(model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kTensorFlowMinimum) { + ConvertTensorFlowMinimumOperator( + model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kTensorFlowMaximum) { + ConvertTensorFlowMaximumOperator( + model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kSqueeze) { + ConvertSqueezeOperator(model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kSlice) { + ConvertSliceOperator(model, static_cast(src_op), + tensorflow_graph); + } else { + LOG(FATAL) << "Unhandled operator type " << OperatorTypeName(src_op.type); + } +} + +void AddPlaceholder(const string& name, GraphDef* tensorflow_graph) { + auto* placeholder = tensorflow_graph->add_node(); + placeholder->set_op("Placeholder"); + (*placeholder->mutable_attr())["dtype"].set_type(DT_FLOAT); + placeholder->set_name(name); +} + +void AddPlaceholderForRNNState(const Model& model, const string& name, int size, + GraphDef* tensorflow_graph) { + auto* placeholder = tensorflow_graph->add_node(); + placeholder->set_op("Placeholder"); + placeholder->set_name(name); + (*placeholder->mutable_attr())["dtype"].set_type(DT_FLOAT); + + auto* shape = (*placeholder->mutable_attr())["shape"].mutable_shape(); + const auto& state_array = *model.arrays.at(name); + if (state_array.has_shape()) { + const auto& state_shape = state_array.shape(); + const int kDims = state_shape.dimensions_count(); + for (int i = 0; i < kDims; ++i) { + shape->add_dim()->set_size(state_shape.dims(i)); + } + } else { + shape->add_dim()->set_size(1); + shape->add_dim()->set_size(size); + } +} + +void ExportTensorFlowGraphDefImplementation(const Model& model, + GraphDef* tensorflow_graph) { + for (const auto& input_array : model.flags.input_arrays()) { + AddPlaceholder(input_array.name(), tensorflow_graph); + } + for (const auto& rnn_state : model.flags.rnn_states()) { + AddPlaceholderForRNNState(model, rnn_state.state_array(), rnn_state.size(), + tensorflow_graph); + } + for (const auto& op : model.operators) { + ConvertOperator(model, *op, tensorflow_graph); + } + // Generically export arrays that haven't been exported already + // by the above operators export. It's important that this comes + // after, as some operators need to export arrays that they reference + // in a specific way, rather than in the generic way done below. + for (const auto& array_pair : model.arrays) { + const string& array_name = array_pair.first; + const auto& array = *array_pair.second; + if (array.buffer) { + switch (array.data_type) { + case ArrayDataType::kFloat: + ConvertFloatTensorConst(model, array_name, tensorflow_graph); + break; + case ArrayDataType::kInt32: + ConvertIntTensorConst(model, array_name, tensorflow_graph); + break; + default: + break; + } + } + } +} +} // namespace + +void ExportTensorFlowGraphDef(const Model& model, + string* output_file_contents) { + CHECK(output_file_contents->empty()); + GraphDef tensorflow_graph; + ExportTensorFlowGraphDefImplementation(model, &tensorflow_graph); + LogDumpGraphDef(kLogLevelModelChanged, "AT EXPORT", tensorflow_graph); + CHECK(tensorflow_graph.SerializeToString(output_file_contents)); +} +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/export_tensorflow.h b/tensorflow/contrib/lite/toco/export_tensorflow.h new file mode 100644 index 0000000000..eca9774576 --- /dev/null +++ b/tensorflow/contrib/lite/toco/export_tensorflow.h @@ -0,0 +1,27 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_EXPORT_TENSORFLOW_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_EXPORT_TENSORFLOW_H_ + +#include +#include "tensorflow/contrib/lite/toco/model.h" + +namespace toco { + +void ExportTensorFlowGraphDef(const Model& model, string* output_file_contents); + +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_EXPORT_TENSORFLOW_H_ diff --git a/tensorflow/contrib/lite/toco/format_port.h b/tensorflow/contrib/lite/toco/format_port.h new file mode 100644 index 0000000000..3bc3295d04 --- /dev/null +++ b/tensorflow/contrib/lite/toco/format_port.h @@ -0,0 +1,77 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// This file is used to provide equivalents of internal util::format::FormatF +// and util::format::AppendF. Unfortunately, type safety is not as good as a +// a full C++ example. +// TODO(aselle): When absl adds support for StrFormat, use that instead. +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_FORMAT_PORT_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_FORMAT_PORT_H_ + +#include "tensorflow/contrib/lite/toco/toco_types.h" +#include "tensorflow/core/lib/strings/stringprintf.h" + +namespace toco { +namespace port { + +/// Identity (default case) +template +T IdentityOrConvertStringToRaw(T foo) { + return foo; +} + +// Overloaded case where we return std::string. +inline const char* IdentityOrConvertStringToRaw(const std::string& foo) { + return foo.c_str(); +} + +#if defined(PLATFORM_GOOGLE) +// Overloaded case where we return string. +inline const char* IdentityOrConvertStringToRaw(const string& foo) { + return foo.c_str(); +} +#endif // PLATFORM_GOOGLE +// Delegate to TensorFlow Appendf function until absl has an equivalent. +template +inline void AppendFHelper(string* destination, const char* fmt, + Args&&... args) { + tensorflow::strings::Appendf(destination, fmt, args...); +} + +// Specialization for no argument format string (avoid security bug). +inline void AppendFHelper(string* destination, const char* fmt) { + tensorflow::strings::Appendf(destination, "%s", fmt); +} + +// Append formatted string (with format fmt and args args) to the string +// pointed to by destination. fmt follows C printf semantics. +// One departure is that %s can be driven by a std::string or string. +template +inline void AppendF(string* destination, const char* fmt, Args&&... args) { + AppendFHelper(destination, fmt, IdentityOrConvertStringToRaw(args)...); +} + +// Return formatted string (with format fmt and args args). fmt follows C printf +// semantics. One departure is that %s can be driven by a std::string or string. +template +inline string StringF(const char* fmt, Args&&... args) { + string result; + AppendFHelper(&result, fmt, IdentityOrConvertStringToRaw(args)...); + return result; +} + +} // namespace port +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_FORMAT_PORT_H_ diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc new file mode 100644 index 0000000000..bf454c40c7 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc @@ -0,0 +1,98 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool ConvertPureConvToDepthwise::Run(Model* model, std::size_t op_index) { + auto conv_it = model->operators.begin() + op_index; + if (conv_it->get()->type != OperatorType::kConv) { + return false; + } + const auto* conv_op = static_cast(conv_it->get()); + if (conv_op->stride_width != conv_op->stride_height) { + return false; + } + auto& weights_array = model->GetArray(conv_op->inputs[1]); + if (!weights_array.buffer) { + // Yield until the weights are resolved as a constant array. + return false; + } + if (weights_array.data_type != ArrayDataType::kFloat) { + return false; + } + if (weights_array.shape().dims(3) != 1) { + // Not a pure convolution: Conv does accumulation across the depth + // dimension. + return false; + } + // At this point we know we have a pure conv. Rewrite it as DepthwiseConv. + AddMessageF( + "%s is purely convolutional (input/weights depth is 1), replacing it by " + "a DepthwiseConv.", + LogName(*conv_op)); + auto* depthwiseconv_op = new DepthwiseConvOperator; + // Conv and DepthwiseConv take the same inputs + depthwiseconv_op->inputs = conv_op->inputs; + // Conv may have a 2nd output for im2col + depthwiseconv_op->outputs = {conv_op->outputs[0]}; + if (conv_op->outputs.size() > 1) { + // delete the im2col array. + model->arrays.erase(conv_op->outputs[1]); + } + depthwiseconv_op->fused_activation_function = + conv_op->fused_activation_function; + // Let PropagateFixedSizes recompute fixed padding, just in case some day it + // may be different for Conv vs DepthwiseConv. + depthwiseconv_op->padding.type = conv_op->padding.type; + depthwiseconv_op->stride_height = conv_op->stride_height; + depthwiseconv_op->stride_width = conv_op->stride_width; + depthwiseconv_op->depth_multiplier = weights_array.shape().dims(0); + // Replace the operator in the graph. + const auto depthwiseconv_it = + model->operators.emplace(conv_it, depthwiseconv_op); + conv_it = depthwiseconv_it + 1; + CHECK_EQ(conv_it->get(), conv_op); + model->operators.erase(conv_it); + // Shuffle the weights. + const auto& weights_shape = weights_array.shape(); + auto& weights_buffer = + weights_array.GetMutableBuffer(); + const std::vector& conv_weights_data = weights_buffer.data; + std::vector depthwise_conv_weights_data(conv_weights_data.size()); + const int depth = weights_shape.dims(0); + const int width = weights_shape.dims(1); + const int height = weights_shape.dims(2); + const int width_height = width * height; + for (int c = 0; c < depth; c++) { + for (int xy = 0; xy < width_height; xy++) { + depthwise_conv_weights_data[c + depth * xy] = + conv_weights_data[xy + width_height * c]; + } + } + *weights_array.mutable_shape()->mutable_dims() = {1, width, height, depth}; + weights_buffer.data = depthwise_conv_weights_data; + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc b/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc new file mode 100644 index 0000000000..1735b51e5b --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc @@ -0,0 +1,69 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "absl/strings/str_cat.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool CreateIm2colArrays::Run(Model* model, std::size_t op_index) { + auto conv_it = model->operators.begin() + op_index; + if (conv_it->get()->type != OperatorType::kConv) { + return false; + } + auto* conv_op = static_cast(conv_it->get()); + if (conv_op->outputs.size() == 2) { + // We already have an im2col array + return false; + } + const auto& weights_array = *model->arrays[conv_op->inputs[1]]; + if (!weights_array.has_shape()) { + // We need to yield until weights dims have been resolved, because + // from the weights dims we determine whether an im2col array is + // needed. + return false; + } + const auto& weights_shape = weights_array.shape(); + const int kheight = weights_shape.dims(1); + const int kwidth = weights_shape.dims(2); + if (kwidth == 1 && kheight == 1 && conv_op->stride_width == 1 && + conv_op->stride_height == 1) { + // 1x1 unstrided conv does not need an im2col array. + return false; + } + + // Create the im2col array. + CHECK_EQ(conv_op->outputs.size(), 1); + const string& im2col_array_name = + AvailableArrayName(*model, conv_op->inputs[0] + "_im2col"); + model->GetOrCreateArray(im2col_array_name); + conv_op->outputs.push_back(im2col_array_name); + AddMessageF( + "Created an im2col array for %s, with %dx%d kernel and stride_width=%d, " + "stride_height=%d", + LogName(*conv_op), kwidth, kheight, conv_op->stride_width, + conv_op->stride_height); + + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/dequantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/dequantize.cc new file mode 100644 index 0000000000..b89e3f5310 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/dequantize.cc @@ -0,0 +1,223 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +template +void DequantizeBuffer(Array* array) { + const auto old_data = array->GetBuffer().data; + array->buffer = nullptr; + array->data_type = ArrayDataType::kFloat; + auto& new_data = array->GetMutableBuffer().data; + new_data.resize(old_data.size()); + const auto& qparams = array->GetQuantizationParams(); + for (int i = 0; i < old_data.size(); i++) { + new_data[i] = qparams.scale * (old_data[i] - qparams.zero_point); + } +} + +std::vector>::iterator FindFirstOpWithInput( + Model* model, const string& array_name) { + for (auto it = model->operators.begin(); it != model->operators.end(); ++it) { + for (const auto& input : it->get()->inputs) { + if (input == array_name) { + return it; + } + } + } + return model->operators.end(); +} + +void ClearArrayQuantizationParams(const string& array_name, Model* model) { + auto* array = model->arrays.at(array_name).get(); + CHECK(array->quantization_params); + for (auto& input_array : *model->flags.mutable_input_arrays()) { + if (input_array.name() == array_name) { + auto& qparams = *array->quantization_params; + const double new_std_value = 1. / qparams.scale; + const double new_mean_value = qparams.zero_point; + if (input_array.has_std_value()) { + CHECK_LE(std::abs(new_std_value - input_array.std_value()), 0.001); + } else { + input_array.set_std_value(new_std_value); + } + if (input_array.has_mean_value()) { + CHECK_LE(std::abs(new_mean_value - input_array.mean_value()), 0.001); + } else { + input_array.set_mean_value(new_mean_value); + } + } + } + array->quantization_params = nullptr; +} + +bool DequantizeArray(const string& array_name, + GraphTransformation* transformation, Model* model) { + auto* array = model->arrays.at(array_name).get(); + if (!array->quantization_params) { + return false; + } + transformation->AddMessageF("Dequantizing array: %s", array_name); + + // Dequantize any buffer + if (array->buffer) { + if (array->data_type == ArrayDataType::kUint8) { + DequantizeBuffer(array); + } else if (array->data_type == ArrayDataType::kInt32) { + DequantizeBuffer(array); + } else { + LOG(FATAL) << "Unhandled data type"; + } + CHECK(array->data_type == ArrayDataType::kFloat); + CHECK(array->buffer->type == ArrayDataType::kFloat); + + // Clear quantization params, officially makes this a non-quantized array. + ClearArrayQuantizationParams(array_name, model); + return true; + } else { + array->data_type = ArrayDataType::kFloat; + } + + // Clear quantization params, officially makes this a non-quantized array. + ClearArrayQuantizationParams(array_name, model); + + if (array->buffer) { + return true; + } + + auto* op_outputting_array = GetOpWithOutput(*model, array_name); + if (op_outputting_array) { + if (op_outputting_array->type == OperatorType::kTensorFlowReshape) { + return true; + } + } + + // If there was no minmax info, we can return now. Indeed, + // the below only serves to create a FakeQuant node, but some arrays are + // quantized without MinMax (see the CHECK above) and that corresponds to + // places where a FakeQuant node is actually not wanted, because the + // quantization params are meant to be inferred in another way (e.g. bias + // vector for a Conv op, see their special-casing in quantize.cc). + if (!array->minmax) { + return true; + } + + // Determine whether to insert a FakeQuant before or after + // this array. + bool must_insert_fakequant_before = false; + bool must_insert_fakequant_after = false; + if (IsInputArray(*model, array_name)) { + must_insert_fakequant_after = true; + } + for (const string& output_array : model->flags.output_arrays()) { + if (array_name == output_array) { + must_insert_fakequant_before = true; + } + } + for (const auto& rnn_state : model->flags.rnn_states()) { + if (array_name == rnn_state.state_array()) { + must_insert_fakequant_after = true; + } + if (array_name == rnn_state.back_edge_source_array()) { + must_insert_fakequant_before = true; + } + } + CHECK(!(must_insert_fakequant_before && must_insert_fakequant_after)); + + // Create and insert the FakeQuant node + auto* fakequant_op = new FakeQuantOperator; + model->operators.emplace(FindFirstOpWithInput(model, array_name), + fakequant_op); + const string& new_array_name = AvailableArrayName(*model, array_name); + auto& new_array = model->GetOrCreateArray(new_array_name); + new_array.data_type = ArrayDataType::kFloat; + new_array.copy_shape(array->shape()); + new_array.GetOrCreateMinMax() = array->GetMinMax(); + fakequant_op->minmax.reset(new MinMax); + *fakequant_op->minmax = array->GetMinMax(); + if (must_insert_fakequant_before) { + for (const auto& op : model->operators) { + for (string& output : op->outputs) { + if (output == array_name) { + output = new_array_name; + } + } + } + fakequant_op->inputs = {new_array_name}; + fakequant_op->outputs = {array_name}; + } else { + for (const auto& op : model->operators) { + for (string& input : op->inputs) { + if (input == array_name) { + input = new_array_name; + } + } + } + fakequant_op->inputs = {array_name}; + fakequant_op->outputs = {new_array_name}; + } + return true; +} + +} // namespace + +bool Dequantize::Run(Model* model, std::size_t op_index) { + const auto op_it = model->operators.begin() + op_index; + auto* op = op_it->get(); + + if (op->type == OperatorType::kDequantize) { + auto& input_array = model->GetArray(op->inputs[0]); + if (input_array.data_type == ArrayDataType::kFloat) { + return false; + } + if (input_array.final_data_type != ArrayDataType::kFloat) { + return false; + } + input_array.data_type = ArrayDataType::kFloat; + input_array.quantization_params = nullptr; + auto& output_array = model->GetArray(op->outputs[0]); + output_array.data_type = ArrayDataType::kFloat; + output_array.quantization_params = nullptr; + return RemoveTrivialPassthroughOp(this, model, op_index); + } + + std::vector arrays; + for (const string& input : op->inputs) { + arrays.push_back(input); + } + for (const string& output : op->outputs) { + arrays.push_back(output); + } + bool changed = false; + for (const string& array : arrays) { + changed |= DequantizeArray(array, this, model); + } + + return changed; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/drop_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/drop_fake_quant.cc new file mode 100644 index 0000000000..fea360740f --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/drop_fake_quant.cc @@ -0,0 +1,56 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool DropFakeQuant::Run(Model* model, std::size_t op_index) { + const auto fakequant_it = model->operators.begin() + op_index; + auto* fakequant_base_op = fakequant_it->get(); + if (fakequant_base_op->type != OperatorType::kFakeQuant) { + return false; + } + auto* fakequant_op = static_cast(fakequant_base_op); + + if (!fakequant_op->minmax) { + return false; + } + + const auto& output_array = model->GetArray(fakequant_op->outputs[0]); + if (!output_array.minmax) { + return false; + } + + // Drop min/max inputs + for (int i = 1; i < fakequant_op->inputs.size(); i++) { + if (CountOpsWithInput(*model, fakequant_op->inputs[i]) == 1) { + model->arrays.erase(fakequant_op->inputs[i]); + } + } + fakequant_op->inputs.resize(1); + + return RemoveTrivialPassthroughOp(this, model, op_index); +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/drop_im2col_arrays.cc b/tensorflow/contrib/lite/toco/graph_transformations/drop_im2col_arrays.cc new file mode 100644 index 0000000000..a3ed6663bc --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/drop_im2col_arrays.cc @@ -0,0 +1,42 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool DropIm2colArrays::Run(Model* model, std::size_t op_index) { + auto conv_it = model->operators.begin() + op_index; + if (conv_it->get()->type != OperatorType::kConv) { + return false; + } + auto* conv_op = static_cast(conv_it->get()); + if (conv_op->outputs.size() < 2) { + // Conv op does not have im2col. + return false; + } + + // Drop the im2col array. + CHECK_EQ(conv_op->outputs.size(), 2); + model->arrays.erase(conv_op->outputs[1]); + conv_op->outputs.resize(1); + AddMessageF("Dropped an im2col array for %s", LogName(*conv_op)); + + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc b/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc new file mode 100644 index 0000000000..badefeca88 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc @@ -0,0 +1,57 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +bool ProcessLinearOperator(Model* model, Operator* op) { + if (op->inputs.size() >= 3) { + return false; + } + const string& output_name = op->outputs[0]; + const string& bias_name = AvailableArrayName(*model, output_name + "_bias"); + op->inputs.push_back(bias_name); + DCHECK_EQ(op->inputs.size(), 3); + auto& bias_array = model->GetOrCreateArray(bias_name); + bias_array.data_type = ArrayDataType::kFloat; + + return true; +} +} // namespace + +bool EnsureBiasVectors::Run(Model* model, std::size_t op_index) { + auto* op = model->operators[op_index].get(); + if (op->type == OperatorType::kConv || + op->type == OperatorType::kDepthwiseConv || + op->type == OperatorType::kFullyConnected) { + if (ProcessLinearOperator(model, op)) { + AddMessageF("Added bias vector to %s", LogName(*op)); + return true; + } + } + return false; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc new file mode 100644 index 0000000000..7a86510025 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc @@ -0,0 +1,98 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/runtime/types.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) { + const auto ac_it = model->operators.begin() + op_index; + const auto* ac_op = ac_it->get(); + + if (ac_op->type != OperatorType::kRelu6 && + ac_op->type != OperatorType::kRelu1 && + ac_op->type != OperatorType::kRelu) { + return false; + } + + // Find the op producing the array passed to this activation function + Operator* op = GetOpWithOutput(*model, ac_op->inputs[0]); + + if (!op) return false; + + if (CountTrueOutputs(*model, *op) > 1) { + AddMessageF( + "Not fusing activation function into %s because it has more than one " + " consumed output", + LogName(*op)); + return false; + } + + CHECK_EQ(op->outputs[0], ac_op->inputs[0]); + + int count_ops_consuming_output = CountOpsWithInput(*model, ac_op->inputs[0]); + DCHECK_GE(count_ops_consuming_output, 1); + if (count_ops_consuming_output > 1) { + AddMessageF( + "Not fusing activation function into %s because it is consumed by more " + "than 1 other operator", + LogName(*op)); + return false; + } + + if (op->fused_activation_function != FusedActivationFunctionType::kNone) { + AddMessageF( + "Not fusing activation function into %s because it already has a fused " + "activation function", + LogName(*op)); + return false; + } + + // TODO(dkalenichenko): Great many ops don't support activation function + // fusing. Switch to the whilelist approach instead. + if (op->type == OperatorType::kConcatenation || + op->type == OperatorType::kSlice) { + AddMessageF( + "Not fusing activation function because the %s op doesn't support it", + LogName(*op)); + return false; + } + + AddMessageF("Fusing activation function %s into the preceding %s", + LogName(*ac_op), LogName(*op)); + if (ac_op->type == OperatorType::kRelu6) { + op->fused_activation_function = FusedActivationFunctionType::kRelu6; + } else if (ac_op->type == OperatorType::kRelu1) { + op->fused_activation_function = FusedActivationFunctionType::kRelu1; + } else if (ac_op->type == OperatorType::kRelu) { + op->fused_activation_function = FusedActivationFunctionType::kRelu; + } else { + LOG(FATAL) << "Unhandled activation function type"; + } + model->arrays.erase(ac_op->inputs[0]); + op->outputs[0] = ac_op->outputs[0]; + model->operators.erase(ac_it); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_following_affine.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_following_affine.cc new file mode 100644 index 0000000000..4619d8bbee --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_following_affine.cc @@ -0,0 +1,300 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/runtime/types.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +void FuseAddOrSubParamsIntoFollowingAffine(Model* model, Operator* following_op, + const Operator* add_or_sub_op, + int index_of_constant_input) { + CHECK(add_or_sub_op->type == OperatorType::kAdd || + add_or_sub_op->type == OperatorType::kSub); + CHECK(index_of_constant_input == 0 || index_of_constant_input == 1); + // If the op is a subtraction, the constant input should be the right hand + // side. + // This should have been checked before this point. + CHECK(add_or_sub_op->type != OperatorType::kSub || + index_of_constant_input == 1); + if (following_op->inputs.size() < 3) { + LOG(FATAL) << "Missing bias parameter"; + } + const auto& weights = model->GetArray(following_op->inputs[1]); + auto& bias = model->GetArray(following_op->inputs[2]); + bias.minmax = nullptr; + const auto& operand = + model->GetArray(add_or_sub_op->inputs[index_of_constant_input]); + // We're only supporting the case of a scalar operand. Should have + // been checked earlier. + CHECK_EQ(RequiredBufferSizeForShape(operand.shape()), 1); + + const float scalar_operand = + operand.GetBuffer().data[0]; + // At this point we reduce the case of subtraction to that of addition + // by negating the operand. + float add_scalar_operand = 0.f; + if (add_or_sub_op->type == OperatorType::kAdd) { + add_scalar_operand = scalar_operand; + } else if (add_or_sub_op->type == OperatorType::kSub && + index_of_constant_input == 1) { + add_scalar_operand = -scalar_operand; + } else { + LOG(FATAL) << "Should not get here"; + } + // From here on we are fusing an addition. add_or_sub_op->type does not + // matter anymore. + + const Shape& weights_shape = weights.shape(); + const Shape& bias_shape = bias.shape(); + const auto& weights_buffer = weights.GetBuffer(); + const float* const weights_data = weights_buffer.data.data(); + auto& bias_buffer = bias.GetMutableBuffer(); + float* const bias_data = bias_buffer.data.data(); + + if (following_op->type == OperatorType::kConv || + following_op->type == OperatorType::kFullyConnected) { + const int output_depth = weights_shape.dims(0); + // TODO(b/62904716): Bias array should become 1-D when padding removed. + CHECK_EQ(output_depth, bias_shape.dims(bias_shape.dimensions_count() - 1)); + const int weights_size = RequiredBufferSizeForShape(weights_shape); + const int weights_per_depth = weights_size / output_depth; + CHECK_EQ(weights_size, weights_per_depth * output_depth); + + for (int d = 0; d < output_depth; d++) { + float accumulation = 0; + for (int i = 0; i < weights_per_depth; i++) { + accumulation += + add_scalar_operand * weights_data[d * weights_per_depth + i]; + } + bias_data[d] += accumulation; + } + } else if (following_op->type == OperatorType::kDepthwiseConv) { + const int output_depth = + weights_shape.dims(weights_shape.dimensions_count() - 1); + const int weights_size = RequiredBufferSizeForShape(weights_shape); + const int weights_per_depth = weights_size / output_depth; + CHECK_EQ(weights_size, weights_per_depth * output_depth); + + for (int c = 0; c < output_depth; c++) { + float accumulation = 0; + for (int k = 0; k < weights_per_depth; k++) { + accumulation += add_scalar_operand * weights_data[k * output_depth + c]; + } + bias_data[c] += accumulation; + } + } else { + LOG(FATAL) << "Should not get here."; + } +} + +void FuseMulOrDivParamsIntoFollowingAffine(Model* model, Operator* following_op, + const Operator* mul_or_div_op, + int index_of_constant_input) { + CHECK(mul_or_div_op->type == OperatorType::kMul || + mul_or_div_op->type == OperatorType::kDiv); + CHECK(index_of_constant_input == 0 || index_of_constant_input == 1); + // If the op is a division, the constant input should be the right hand side. + // This should have been checked before this point. + CHECK(mul_or_div_op->type != OperatorType::kDiv || + index_of_constant_input == 1); + const auto& weights_name = following_op->inputs[1]; + const auto& bias_name = following_op->inputs[2]; + auto& weights = model->GetArray(weights_name); + DropMinMax(model, weights_name); + DropMinMax(model, bias_name); + const auto& operand = + model->GetArray(mul_or_div_op->inputs[index_of_constant_input]); + // We're only supporting the case of a scalar operand. Should have + // been checked earlier. + CHECK_EQ(RequiredBufferSizeForShape(operand.shape()), 1); + + const float scalar_operand = + operand.GetBuffer().data[0]; + + float* weights_data = + weights.GetMutableBuffer().data.data(); + const int weights_size = RequiredBufferSizeForShape(weights.shape()); + for (int i = 0; i < weights_size; i++) { + if (mul_or_div_op->type == OperatorType::kMul) { + weights_data[i] *= scalar_operand; + } else if (mul_or_div_op->type == OperatorType::kDiv) { + weights_data[i] /= scalar_operand; + } else { + LOG(FATAL) << "Should not get here"; + } + } +} + +} // namespace + +bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { + const auto binary_it = model->operators.begin() + op_index; + auto* binary_op = binary_it->get(); + if (binary_op->type != OperatorType::kAdd && + binary_op->type != OperatorType::kMul && + binary_op->type != OperatorType::kSub && + binary_op->type != OperatorType::kDiv) { + return false; + } + + CHECK_EQ(binary_op->inputs.size(), 2); + + // We only can fuse an binary when the two operands break down as follows: + // 1. One operand is the (variable) output of a typical affine (linear plus + // bias) + // op of a finite list of possible types: at the moment Conv, + // DepthwiseConv and + // FullyConnected are supported. + // 2. The other operand is a constant param array. + const bool is_input_constant[2] = { + IsConstantParameterArray(*model, binary_op->inputs[0]), + IsConstantParameterArray(*model, binary_op->inputs[1]), + }; + if (!is_input_constant[0] && !is_input_constant[1]) { + // Neither input is constant, so nothing we can fuse into a constant. + return false; + } + if (is_input_constant[0] && is_input_constant[1]) { + // Both inputs are constants. That's a job for constants + // propagation, not for us to handle here. + return false; + } + const int index_of_constant_input = is_input_constant[0] ? 0 : 1; + const int index_of_variable_input = is_input_constant[0] ? 1 : 0; + CHECK(is_input_constant[index_of_constant_input]); + CHECK(!is_input_constant[index_of_variable_input]); + + // For division, we can only fuse if the denominator is constant. + if (binary_op->type == OperatorType::kDiv) { + if (index_of_constant_input != 1) { + AddMessageF("Not fusing %s because the denominator is not constant", + LogName(*binary_op)); + return false; + } + } + + const auto& operand_shape = + model->GetArray(binary_op->inputs[index_of_constant_input]).shape(); + for (const auto& dim : operand_shape.dims()) { + if (dim > 1) { + AddMessageF( + "Not fusing %s into the following affine op, because we only know " + "how to do so when the constant operand is a scalar", + LogName(*binary_op)); + return false; + } + } + + if (binary_op->fused_activation_function != + FusedActivationFunctionType::kNone) { + AddMessageF("Not fusing %s because it has a fused activation function", + LogName(*binary_op)); + return false; + } + + Operator* following_op = GetOpWithInput(*model, binary_op->outputs[0]); + + if (!following_op) { + AddMessageF( + "Not fusing %s because it is not consumed by exactly one other op", + LogName(*binary_op)); + return false; + } + + if (following_op->type != OperatorType::kConv && + following_op->type != OperatorType::kFullyConnected && + following_op->type != OperatorType::kDepthwiseConv) { + AddMessageF( + "Not fusing %s because the following %s is not of one of the supported " + "types", + LogName(*binary_op), LogName(*following_op)); + return false; + } + + if (following_op->inputs.size() < 3) { + AddMessageF( + "Not fusing %s because the following %s does not have a bias vector", + LogName(*following_op), LogName(*binary_op)); + return false; + } + + const auto& weights = model->GetArray(following_op->inputs[1]); + const auto& bias = model->GetArray(following_op->inputs[2]); + if (!weights.buffer || !bias.buffer) { + AddMessageF( + "Not fusing %s because the following %s has non-constant weights or " + "bias arrays", + LogName(*binary_op), LogName(*following_op)); + return false; + } + + // Try to fuse the binary params into the following op's params + if (binary_op->type == OperatorType::kAdd || + binary_op->type == OperatorType::kSub) { + if (following_op->type == OperatorType::kConv) { + if (static_cast(following_op)->padding.type != + PaddingType::kValid) { + AddMessageF( + "Not fusing %s because the following %s does not use VALID padding", + LogName(*binary_op), LogName(*following_op)); + return false; + } + } + if (following_op->type == OperatorType::kDepthwiseConv) { + if (static_cast(following_op)->padding.type != + PaddingType::kValid) { + AddMessageF( + "Not fusing %s because the following %s does not use VALID padding", + LogName(*binary_op), LogName(*following_op)); + return false; + } + } + FuseAddOrSubParamsIntoFollowingAffine(model, following_op, binary_op, + index_of_constant_input); + } else if (binary_op->type == OperatorType::kMul || + binary_op->type == OperatorType::kDiv) { + FuseMulOrDivParamsIntoFollowingAffine(model, following_op, binary_op, + index_of_constant_input); + } else { + LOG(FATAL) << "should not get here"; + } + + AddMessageF("Fusing %s into the following %s", LogName(*binary_op), + LogName(*following_op)); + + model->arrays.erase(binary_op->outputs[0]); + following_op->inputs[0] = binary_op->inputs[index_of_variable_input]; + const auto& old_constant_param_name = + binary_op->inputs[index_of_constant_input]; + CHECK(IsConstantParameterArray(*model, old_constant_param_name)); + if (CountOpsWithInput(*model, old_constant_param_name) == 1) { + model->arrays.erase(old_constant_param_name); + } + model->operators.erase(binary_it); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc new file mode 100644 index 0000000000..8948653ec3 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc @@ -0,0 +1,326 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/runtime/types.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +void FuseAddOrSubParamsIntoPrecedingAffine(Model* model, Operator* preceding_op, + const Operator* add_or_sub_op, + int index_of_constant_input) { + CHECK(add_or_sub_op->type == OperatorType::kAdd || + add_or_sub_op->type == OperatorType::kSub); + CHECK(index_of_constant_input == 0 || index_of_constant_input == 1); + if (preceding_op->inputs.size() < 3) { + LOG(FATAL) << "Missing bias parameter"; + } + auto& bias = model->GetArray(preceding_op->inputs[2]); + bias.minmax = nullptr; + const auto& operand = + model->GetArray(add_or_sub_op->inputs[index_of_constant_input]); + + const Shape& bias_shape = bias.shape(); + const Shape& operand_shape = operand.shape(); + auto& bias_buffer = bias.GetMutableBuffer(); + float* const bias_data = bias_buffer.data.data(); + const auto& operand_buffer = operand.GetBuffer(); + const float* const operand_data = operand_buffer.data.data(); + + // TODO(b/62904716): Bias array should become 1-D when padding removed. + const int depth = bias_shape.dims(bias_shape.dimensions_count() - 1); + CHECK_EQ(depth, operand_shape.dims(operand_shape.dimensions_count() - 1)); + + enum class OpType { BiasPlusOperand, BiasMinusOperand, OperandMinusBias }; + + const OpType optype = (add_or_sub_op->type == OperatorType::kAdd) + ? OpType::BiasPlusOperand + : (index_of_constant_input == 1) + ? OpType::BiasMinusOperand + : OpType::OperandMinusBias; + + for (int i = 0; i < depth; i++) { + float& bias_val = bias_data[i]; + const float operand_val = operand_data[i]; + if (optype == OpType::BiasPlusOperand) { + bias_val += operand_val; + } else if (optype == OpType::BiasMinusOperand) { + bias_val -= operand_val; + } else if (optype == OpType::OperandMinusBias) { + bias_val = operand_val - bias_val; + } else { + LOG(FATAL) << "Should not get here."; + } + } +} + +void FuseMulOrDivParamsIntoPrecedingAffine(Model* model, Operator* preceding_op, + const Operator* mul_or_div_op, + int index_of_constant_input) { + CHECK(mul_or_div_op->type == OperatorType::kMul || + mul_or_div_op->type == OperatorType::kDiv); + CHECK(index_of_constant_input == 0 || index_of_constant_input == 1); + // If the op is a division, the constant input should be the right hand side. + // This should have been checked before this point. + CHECK(mul_or_div_op->type != OperatorType::kDiv || + index_of_constant_input == 1); + if (preceding_op->inputs.size() < 3) { + LOG(FATAL) << "Missing bias parameter"; + } + const auto& weights_name = preceding_op->inputs[1]; + const auto& bias_name = preceding_op->inputs[2]; + auto& weights = model->GetArray(weights_name); + DropMinMax(model, weights_name); + auto& bias = model->GetArray(bias_name); + DropMinMax(model, bias_name); + const auto& operand = + model->GetArray(mul_or_div_op->inputs[index_of_constant_input]); + + const Shape& weights_shape = weights.shape(); + const Shape& bias_shape = bias.shape(); + const Shape& operand_shape = operand.shape(); + auto& weights_buffer = weights.GetMutableBuffer(); + float* const weights_data = weights_buffer.data.data(); + auto& bias_buffer = bias.GetMutableBuffer(); + float* const bias_data = bias_buffer.data.data(); + const auto& operand_buffer = operand.GetBuffer(); + const float* const operand_data = operand_buffer.data.data(); + + // We support broadcasting the operand along the depth dimension, + // when the operand's depth is 1. + int operand_channel_increment = 0; + if (operand_shape.dimensions_count() >= 1 && + operand_shape.dims(operand_shape.dimensions_count() - 1) == + bias_shape.dims(bias_shape.dimensions_count() - 1)) { + operand_channel_increment = 1; + } else if (operand_shape.dimensions_count() == 0 || + operand_shape.dims(operand_shape.dimensions_count() - 1) == 1) { + operand_channel_increment = 0; + } else { + LOG(FATAL) << "Operand shape mismatch."; + } + + int output_depth; + + if (preceding_op->type == OperatorType::kConv || + preceding_op->type == OperatorType::kFullyConnected) { + output_depth = weights_shape.dims(0); + } else if (preceding_op->type == OperatorType::kDepthwiseConv) { + output_depth = weights_shape.dims(weights_shape.dimensions_count() - 1); + } else { + LOG(FATAL) << "Should not get here"; + } + + const int weights_size = RequiredBufferSizeForShape(weights_shape); + const int weights_per_depth = weights_size / output_depth; + CHECK_EQ(weights_size, weights_per_depth * output_depth); + + int operand_channel = 0; + for (int c = 0; c < output_depth; c++) { + if (mul_or_div_op->type == OperatorType::kMul) { + bias_data[c] *= operand_data[operand_channel]; + } else if (mul_or_div_op->type == OperatorType::kDiv) { + bias_data[c] /= operand_data[operand_channel]; + } else { + LOG(FATAL) << "Should not get here"; + } + if (preceding_op->type == OperatorType::kConv || + preceding_op->type == OperatorType::kFullyConnected) { + for (int i = 0; i < weights_per_depth; i++) { + if (mul_or_div_op->type == OperatorType::kMul) { + weights_data[c * weights_per_depth + i] *= + operand_data[operand_channel]; + } else if (mul_or_div_op->type == OperatorType::kDiv) { + weights_data[c * weights_per_depth + i] /= + operand_data[operand_channel]; + } else { + LOG(FATAL) << "Should not get here"; + } + } + } else if (preceding_op->type == OperatorType::kDepthwiseConv) { + for (int k = 0; k < weights_per_depth; k++) { + if (mul_or_div_op->type == OperatorType::kMul) { + weights_data[k * output_depth + c] *= operand_data[operand_channel]; + } else if (mul_or_div_op->type == OperatorType::kDiv) { + weights_data[k * output_depth + c] /= operand_data[operand_channel]; + } else { + LOG(FATAL) << "Should not get here"; + } + } + } else { + LOG(FATAL) << "Should not get here"; + } + operand_channel += operand_channel_increment; + } +} +} // namespace + +bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { + const auto binary_it = model->operators.begin() + op_index; + const auto* binary_op = binary_it->get(); + if (binary_op->type != OperatorType::kAdd && + binary_op->type != OperatorType::kMul && + binary_op->type != OperatorType::kSub && + binary_op->type != OperatorType::kDiv) { + return false; + } + + CHECK_EQ(binary_op->inputs.size(), 2); + + // We only can fuse an binary when the two operands break down as follows: + // 1. One operand is the (variable) output of a typical affine (linear plus + // bias) + // op of a finite list of possible types: at the moment Conv, + // DepthwiseConv and + // FullyConnected are supported. + // 2. The other operand is a constant param array. + const bool is_input_constant[2] = { + IsConstantParameterArray(*model, binary_op->inputs[0]), + IsConstantParameterArray(*model, binary_op->inputs[1]), + }; + if (!is_input_constant[0] && !is_input_constant[1]) { + // Neither input is constant, so nothing we can fuse into a constant. + return false; + } + if (is_input_constant[0] && is_input_constant[1]) { + // Both inputs are constants. That's a job for constants + // propagation, not for us to handle here. + return false; + } + const int index_of_constant_input = is_input_constant[0] ? 0 : 1; + const int index_of_variable_input = is_input_constant[0] ? 1 : 0; + CHECK(is_input_constant[index_of_constant_input]); + CHECK(!is_input_constant[index_of_variable_input]); + + // For division, we can only fuse if the denominator is constant. + if (binary_op->type == OperatorType::kDiv) { + if (index_of_constant_input != 1) { + AddMessageF("Not fusing %s because the denominator is not constant", + LogName(*binary_op)); + return false; + } + } + + Operator* preceding_op = + GetOpWithOutput(*model, binary_op->inputs[index_of_variable_input]); + if (!preceding_op) { + AddMessageF("Not fusing %s because it is not the output of another op", + LogName(*binary_op)); + return false; + } + + for (const string& output_array : model->flags.output_arrays()) { + if (preceding_op->outputs[0] == output_array) { + return false; + } + } + + if (preceding_op->type != OperatorType::kConv && + preceding_op->type != OperatorType::kFullyConnected && + preceding_op->type != OperatorType::kDepthwiseConv) { + AddMessageF( + "Not fusing %s because the preceding %s is not of one of the supported " + "types", + LogName(*binary_op), LogName(*preceding_op)); + return false; + } + + if (preceding_op->fused_activation_function != + FusedActivationFunctionType::kNone) { + AddMessageF( + "Not fusing %s because the preceding %s has a fused activation " + "function", + LogName(*binary_op), LogName(*preceding_op)); + return false; + } + + if (preceding_op->inputs.size() < 3) { + AddMessageF( + "Not fusing %s because the preceding %s does not have a bias vector", + LogName(*binary_op), LogName(*preceding_op)); + return false; + } + + const auto& weights = model->GetArray(preceding_op->inputs[1]); + const auto& bias = model->GetArray(preceding_op->inputs[2]); + if (binary_op->type == OperatorType::kAdd || + binary_op->type == OperatorType::kSub) { + if (!bias.buffer) { + AddMessageF( + "Not fusing %s because the preceding %s has a non-constant bias " + "array", + LogName(*binary_op), LogName(*preceding_op)); + return false; + } + } else { + if (!weights.buffer || !bias.buffer) { + AddMessageF( + "Not fusing %s because the preceding %s has non-constant weights or " + "bias arrays", + LogName(*binary_op), LogName(*preceding_op)); + return false; + } + } + + int count_ops_consuming_output = + CountOpsWithInput(*model, preceding_op->outputs[0]); + DCHECK_GE(count_ops_consuming_output, 1); + if (count_ops_consuming_output > 1) { + AddMessageF( + "Not fusing %s because the output of the preceding %s is consumed by " + "another op", + LogName(*binary_op), LogName(*preceding_op)); + return false; + } + + AddMessageF("Fusing %s into the preceding %s", LogName(*binary_op), + LogName(*preceding_op)); + + if (binary_op->type == OperatorType::kAdd || + binary_op->type == OperatorType::kSub) { + FuseAddOrSubParamsIntoPrecedingAffine(model, preceding_op, binary_op, + index_of_constant_input); + } else if (binary_op->type == OperatorType::kMul || + binary_op->type == OperatorType::kDiv) { + FuseMulOrDivParamsIntoPrecedingAffine(model, preceding_op, binary_op, + index_of_constant_input); + } else { + LOG(FATAL) << "should not get here"; + } + + model->arrays.erase(preceding_op->outputs[0]); + preceding_op->outputs[0] = binary_op->outputs[0]; + preceding_op->fused_activation_function = + binary_op->fused_activation_function; + const auto& old_constant_param_name = + binary_op->inputs[index_of_constant_input]; + CHECK(IsConstantParameterArray(*model, old_constant_param_name)); + if (CountOpsWithInput(*model, old_constant_param_name) == 1) { + model->arrays.erase(old_constant_param_name); + } + model->operators.erase(binary_it); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc new file mode 100644 index 0000000000..323fec6cf8 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc @@ -0,0 +1,108 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" + +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/toco_port.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +void PrintModelStats(const string& label, const Model& model) { + int quantized_arrays = 0; + for (const auto& array : model.arrays) { + if (array.second->quantization_params) { + quantized_arrays++; + } + } + LOG(INFO) << label << ": " << model.operators.size() << " operators, " + << model.arrays.size() << " arrays (" << quantized_arrays + << " quantized)"; +} + +bool GraphTransformationsPass(int increment, Model* model, + const GraphTransformationsSet& transformations) { + CHECK(increment == 1 || increment == -1); + bool changed = false; + CHECK(!model->operators.empty()); + int op_index = increment == 1 ? 0 : model->operators.size() - 1; + while (true) { + bool changed_now = false; + // Loop over all transformations at the current position in the graph. + for (const auto& transformation : transformations) { + CHECK(!changed_now); + CHECK(transformation->Messages().empty()); + changed_now = transformation->Run(model, op_index); + if (changed_now) { + DumpGraphvizVideoFrame(*model); + CHECK(!model->operators.empty()); + op_index = std::min(op_index, model->operators.size() - 1); + // Uncomment for debugging + // CheckInvariants(*model); + } + const char* made_a_change_msg = + changed_now ? "made a change" : "did NOT make a change"; + const int log_level = + changed_now ? kLogLevelModelChanged : kLogLevelModelUnchanged; + for (const string& message : transformation->Messages()) { + VLOG(log_level) << transformation->Name() << " " << made_a_change_msg + << " at op_index=" << op_index << "/" + << model->operators.size() - 1 << ": " << message; + } + transformation->ClearMessages(); + if (changed_now) { + break; + } + } + if (changed_now) { + changed = true; + } else { + const int op_index_last = + increment == 1 ? model->operators.size() - 1 : 0; + if (op_index == op_index_last) { + break; + } + op_index += increment; + } + } + return changed; +} + +} // namespace + +void RunGraphTransformations(Model* model, const string& msg, + const GraphTransformationsSet& transformations) { + PrintModelStats(toco::port::StringF("Before %s", msg), *model); + int pass_index = 0; + while (GraphTransformationsPass((pass_index % 2) ? -1 : 1, model, + transformations)) { + pass_index++; + const auto& label = + toco::port::StringF("After %s pass %d", msg, pass_index); + PrintModelStats(label, *model); + CheckInvariants(*model); + } +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h new file mode 100644 index 0000000000..2cc24ff361 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h @@ -0,0 +1,186 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_GRAPH_TRANSFORMATIONS_GRAPH_TRANSFORMATIONS_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_GRAPH_TRANSFORMATIONS_GRAPH_TRANSFORMATIONS_H_ + +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/toco_port.h" + +namespace toco { + +class GraphTransformation { + public: + virtual bool Run(Model* model, std::size_t op_index) = 0; + virtual const char* Name() const = 0; + virtual ~GraphTransformation() {} + // Returns the list of messages that this graph transformation + // generated since ClearMessages() was called. + const std::vector& Messages() const { return messages_; } + // Clears the list of messages; should be called after every + // run of this graph transformation. + void ClearMessages() { return messages_.clear(); } + // Adds a message; normally only called by the graph transformation + // itself during its run (this function could be protected). + template + void AddMessageF(const char* format, const Args&... args) { + return messages_.push_back(toco::port::StringF(format, args...)); + } + + protected: + GraphTransformation() {} + + // List of messages generated by this graph transformation. + std::vector messages_; + + private: + GraphTransformation(const GraphTransformation& other) = delete; + GraphTransformation(const GraphTransformation&& other) = delete; +}; + +class GraphTransformationsSet { + public: + // The choice of a container with fully-specified iteration order + // ensures that graph transformations are always run in the same order, + // which avoids having toco randomly fail or produce different results + // depending on the toolchain. Ideally success/results should be independent + // of the order in which graph transformations are run, but that's + // unfortunately not currently guaranteed to be the case. + using TransformationsContainer = + std::vector>; + + GraphTransformationsSet() {} + GraphTransformationsSet( + const std::initializer_list transformations) { + for (GraphTransformation* t : transformations) { + Add(t); + } + } + void Add(GraphTransformation* transformation) { + const string& name = transformation->Name(); + CHECK(!names_.count(name)); + names_.insert(name); + transformations_.emplace_back(transformation); + } + TransformationsContainer::const_iterator begin() const { + return transformations_.begin(); + } + TransformationsContainer::const_iterator end() const { + return transformations_.end(); + } + bool empty() const { return transformations_.empty(); } + + private: + GraphTransformationsSet(const GraphTransformationsSet& other) = delete; + GraphTransformationsSet(const GraphTransformationsSet&& other) = delete; + std::vector> transformations_; + // Names of transformations in the set. Only used to guard against dupes. + std::unordered_set names_; +}; + +// Run the given list of graph transformations on the model. +// The message is only for logging purposes. +// The transformations is a rvalue reference, indicating that +// nothing else will use these pointers. The user is supposed to +// construct GraphTransformation objects by using 'new', pass us +// the resulting raw pointers, and this RunGraphTransformations +// takes care of delete'ing these pointers. +void RunGraphTransformations(Model* model, const string& message, + const GraphTransformationsSet& transformations); + +#define DECLARE_GRAPH_TRANSFORMATION(GTName) \ + class GTName : public GraphTransformation { \ + public: \ + bool Run(Model* model, std::size_t op_index) override; \ + const char* Name() const { return #GTName; } \ + }; + +// List of all graph transformations +DECLARE_GRAPH_TRANSFORMATION(ConvertPureConvToDepthwise) +DECLARE_GRAPH_TRANSFORMATION(EnsureBiasVectors) +DECLARE_GRAPH_TRANSFORMATION(FuseActivationFunctions) +DECLARE_GRAPH_TRANSFORMATION(FuseBinaryIntoFollowingAffine) +DECLARE_GRAPH_TRANSFORMATION(FuseBinaryIntoPrecedingAffine) +DECLARE_GRAPH_TRANSFORMATION(IdentifyL2Normalization) +DECLARE_GRAPH_TRANSFORMATION(IdentifyL2Pool) +DECLARE_GRAPH_TRANSFORMATION(IdentifyLstmCell) +DECLARE_GRAPH_TRANSFORMATION(IdentifyRelu1) +DECLARE_GRAPH_TRANSFORMATION(MakeInitialDequantizeOperator) +DECLARE_GRAPH_TRANSFORMATION(PropagateArrayDataTypes) +DECLARE_GRAPH_TRANSFORMATION(PropagateFixedSizes) +DECLARE_GRAPH_TRANSFORMATION(HardcodeMinMax) +DECLARE_GRAPH_TRANSFORMATION(Quantize) +DECLARE_GRAPH_TRANSFORMATION(RemoveFinalDequantizeOp) +DECLARE_GRAPH_TRANSFORMATION(RemoveTensorFlowAssert) +DECLARE_GRAPH_TRANSFORMATION(RemoveTensorFlowIdentity) +DECLARE_GRAPH_TRANSFORMATION(RemoveTrivialBinaryOperator) +DECLARE_GRAPH_TRANSFORMATION(RemoveTrivialConcatenation) +DECLARE_GRAPH_TRANSFORMATION(RemoveTrivialConcatenationInput) +DECLARE_GRAPH_TRANSFORMATION(RemoveTrivialQuantizedActivationFunc) +DECLARE_GRAPH_TRANSFORMATION(RemoveUnusedOp) +DECLARE_GRAPH_TRANSFORMATION(ResolveBatchNormalization) +DECLARE_GRAPH_TRANSFORMATION(ResolveConstantBinaryOperator) +DECLARE_GRAPH_TRANSFORMATION(ResolveConstantUnaryOperator) +DECLARE_GRAPH_TRANSFORMATION(CreateIm2colArrays) +DECLARE_GRAPH_TRANSFORMATION(DropIm2colArrays) +DECLARE_GRAPH_TRANSFORMATION(ReadFakeQuantMinMax) +DECLARE_GRAPH_TRANSFORMATION(ResolveReorderAxes) +DECLARE_GRAPH_TRANSFORMATION(ResolveTensorFlowConcat) +DECLARE_GRAPH_TRANSFORMATION(ResolveTensorFlowMatMul) +DECLARE_GRAPH_TRANSFORMATION(ResolveTensorFlowMerge) +DECLARE_GRAPH_TRANSFORMATION(ResolveTensorFlowSqueeze) +DECLARE_GRAPH_TRANSFORMATION(ResolveTensorFlowSwitch) +DECLARE_GRAPH_TRANSFORMATION(ResolveTensorFlowTile) +DECLARE_GRAPH_TRANSFORMATION(ResolveConstantFakeQuant) +DECLARE_GRAPH_TRANSFORMATION(ResolveConstantConcatenation) +DECLARE_GRAPH_TRANSFORMATION(DropFakeQuant) +DECLARE_GRAPH_TRANSFORMATION(UnfuseActivationFunctions) +DECLARE_GRAPH_TRANSFORMATION(ResolvePadAttributes) +DECLARE_GRAPH_TRANSFORMATION(ResolveStridedSliceAttributes) +DECLARE_GRAPH_TRANSFORMATION(ResolveSliceAttributes) +DECLARE_GRAPH_TRANSFORMATION(ResolveMeanAttributes) +DECLARE_GRAPH_TRANSFORMATION(ResolveConstantTensorFlowShape) +DECLARE_GRAPH_TRANSFORMATION(Dequantize) + +class ResolveReshapeAttributes : public GraphTransformation { + public: + bool Run(Model* model, std::size_t op_index) override; + const char* Name() const override { return "ResolveReshapeAttributes"; } +}; + +class RemoveTrivialReshape : public GraphTransformation { + public: + bool Run(Model* model, std::size_t op_index) override; + const char* Name() const override { return "RemoveTrivialReshape"; } + bool treat_expand_dims_as_trivial() const { + return treat_expand_dims_as_trivial_; + } + void set_treat_expand_dims_as_trivial(bool val) { + treat_expand_dims_as_trivial_ = val; + } + + private: + bool treat_expand_dims_as_trivial_ = false; +}; + +#undef DECLARE_GRAPH_TRANSFORMATION + +} // end namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_GRAPH_TRANSFORMATIONS_GRAPH_TRANSFORMATIONS_H_ diff --git a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc new file mode 100644 index 0000000000..d44b5dc7b0 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc @@ -0,0 +1,229 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +bool HardcodeMinMaxForIm2colArray(Model* model, Operator* op) { + if (op->outputs.size() != 2) { + return false; + } + auto& im2col_array = model->GetArray(op->outputs[1]); + if (im2col_array.minmax) { + return false; + } + const auto& input_array = model->GetArray(op->inputs[0]); + if (!input_array.minmax) { + return false; + } + const auto& input_minmax = input_array.GetMinMax(); + CHECK(!im2col_array.minmax); + auto& im2col_minmax = im2col_array.GetOrCreateMinMax(); + im2col_minmax.min = input_minmax.min; + im2col_minmax.max = input_minmax.max; + return true; +} + +bool HardcodeMinMaxForL2Normalization(Model* model, Operator* op) { + auto& output_array = model->GetArray(op->outputs[0]); + if (output_array.minmax) { + return false; + } + const auto& input_array = model->GetArray(op->inputs[0]); + if (!input_array.minmax) { + return false; + } + const auto& input_minmax = input_array.GetMinMax(); + CHECK(!output_array.minmax); + auto& output_minmax = output_array.GetOrCreateMinMax(); + output_minmax.min = input_minmax.min >= 0. ? 0. : -1.; + output_minmax.max = input_minmax.max <= 0. ? 0. : 1.; + return true; +} + +bool HardcodeMinMaxForConcatenation(Model* model, Operator* op) { + // Do not early return if the output already has min/max: + // we may still need to adjust the inputs min/max. + bool has_minmax = false; + double overall_min = std::numeric_limits::infinity(); + double overall_max = -std::numeric_limits::infinity(); + for (const auto& input : op->inputs) { + if (model->GetArray(input).minmax) { + has_minmax = true; + const auto* minmax = model->GetArray(input).minmax.get(); + if (minmax) { + overall_min = std::min(overall_min, minmax->min); + overall_max = std::max(overall_max, minmax->max); + } + } + } + auto& output = model->GetArray(op->outputs[0]); + if (output.minmax) { + has_minmax = true; + const auto* minmax = model->GetArray(op->outputs[0]).minmax.get(); + if (minmax) { + overall_min = std::min(overall_min, minmax->min); + overall_max = std::max(overall_max, minmax->max); + } + } + if (!has_minmax) { + return false; + } + MinMax overall_minmax; + overall_minmax.min = overall_min; + overall_minmax.max = overall_max; + bool changed = false; + for (const auto& input : op->inputs) { + auto& array = model->GetArray(input); + if (!array.minmax) { + changed = true; + } else if (!(overall_minmax == array.GetMinMax())) { + changed = true; + LOG(WARNING) + << "Tweaking the MinMax of array " << input << ", which is " + << "an input to " << LogName(*op) << ", because we want all inputs " + << "and outputs of a Concatenation operator to have the same MinMax " + << "so that it can be implemented as a pure byte-copy, no " + "arithmetic."; + } + array.GetOrCreateMinMax() = overall_minmax; + } + if (!output.minmax) { + changed = true; + } else if (!(overall_minmax == output.GetMinMax())) { + changed = true; + LOG(WARNING) + << "Tweaking the MinMax of the output array of " << LogName(*op) + << ", because we want all inputs " + << "and outputs of a Concatenation operator to have the same MinMax " + << "so that it can be implemented as a pure byte-copy, no arithmetic."; + } + output.GetOrCreateMinMax() = overall_minmax; + + return changed; +} + +// The output of average or max pooling is within the same range as its input. +bool HardcodeMinMaxForAverageOrMaxPool(Model* model, Operator* op) { + auto& output_array = model->GetArray(op->outputs[0]); + if (output_array.minmax) { + return false; + } + const auto& input_array = model->GetArray(op->inputs[0]); + if (!input_array.minmax) { + return false; + } + const auto& input_minmax = input_array.GetMinMax(); + CHECK(!output_array.minmax); + auto& output_minmax = output_array.GetOrCreateMinMax(); + output_minmax.min = std::min(input_minmax.min, 0.); + output_minmax.max = std::max(input_minmax.max, 0.); + return true; +} + +bool HardcodeMinMaxForReshape(Model* model, Operator* op) { + auto& output_array = model->GetArray(op->outputs[0]); + if (output_array.minmax) { + return false; + } + const auto& input_array = model->GetArray(op->inputs[0]); + if (!input_array.minmax) { + return false; + } + const auto& input_minmax = input_array.GetMinMax(); + CHECK(!output_array.minmax); + auto& output_minmax = output_array.GetOrCreateMinMax(); + output_minmax.min = input_minmax.min; + output_minmax.max = input_minmax.max; + return true; +} + +bool HardcodeMinMaxForOutput(Model* model, Operator* op, double min, + double max) { + CHECK_EQ(op->outputs.size(), 1); + auto& output_array = model->GetArray(op->outputs[0]); + if (output_array.minmax) { + return false; + } + const auto& input_array = model->GetArray(op->inputs[0]); + if (!input_array.minmax) { + return false; + } + CHECK(!output_array.minmax); + auto& output_minmax = output_array.GetOrCreateMinMax(); + output_minmax.min = min; + output_minmax.max = max; + return true; +} +} // namespace + +bool HardcodeMinMax::Run(Model* model, std::size_t op_index) { + auto it = model->operators.begin() + op_index; + auto* op = it->get(); + bool changed = false; + switch (op->type) { + case OperatorType::kConv: + changed = HardcodeMinMaxForIm2colArray(model, op); + break; + + case OperatorType::kL2Normalization: + changed = HardcodeMinMaxForL2Normalization(model, op); + break; + + case OperatorType::kConcatenation: + changed = HardcodeMinMaxForConcatenation(model, op); + break; + + case OperatorType::kAveragePool: + case OperatorType::kMaxPool: + changed = HardcodeMinMaxForAverageOrMaxPool(model, op); + break; + + case OperatorType::kTensorFlowReshape: + changed = HardcodeMinMaxForReshape(model, op); + break; + + case OperatorType::kLogistic: + // We hardcode quantization_params to: zero_point=0, scale=1/256. + // This choice of minmax is the one that is equivalent to that. + changed = HardcodeMinMaxForOutput(model, op, 0, 255. / 256.); + break; + + case OperatorType::kSoftmax: + // We hardcode quantization_params to: zero_point=0, scale=1/256. + // This choice of minmax is the one that is equivalent to that. + changed = HardcodeMinMaxForOutput(model, op, 0, 255. / 256.); + break; + + default: + break; + } + if (changed) { + AddMessageF("Hardcoded min-max through %s", LogName(*op)); + } + return changed; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_normalization.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_normalization.cc new file mode 100644 index 0000000000..01b75e37c6 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_normalization.cc @@ -0,0 +1,170 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +std::vector>::iterator FindOperator( + Model* model, const Operator* op) { + auto it = model->operators.begin(); + for (; it != model->operators.end(); ++it) { + if (it->get() == op) { + break; + } + } + return it; +} +} // namespace + +bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) { + const auto div_it = model->operators.begin() + op_index; + const auto* div_or_mul_op = div_it->get(); + OperatorType expected_op_type_producing_div_or_mul_input; + if (div_or_mul_op->type == OperatorType::kDiv) { + expected_op_type_producing_div_or_mul_input = OperatorType::kTensorFlowSqrt; + } else if (div_or_mul_op->type == OperatorType::kMul) { + expected_op_type_producing_div_or_mul_input = + OperatorType::kTensorFlowRsqrt; + } else { + return false; + } + CHECK_EQ(div_or_mul_op->inputs.size(), 2); + Operator* op_producing_div_or_mul_input[2] = { + GetOpWithOutput(*model, div_or_mul_op->inputs[0]), + GetOpWithOutput(*model, div_or_mul_op->inputs[1]), + }; + if (!op_producing_div_or_mul_input[1] || + op_producing_div_or_mul_input[1]->type != + expected_op_type_producing_div_or_mul_input) { + return false; + } + Operator* sqrt_or_rsqrt_op = op_producing_div_or_mul_input[1]; + CHECK_EQ(sqrt_or_rsqrt_op->inputs.size(), 1); + Operator* op_producing_sqrt_or_rsqrt_input = + GetOpWithOutput(*model, sqrt_or_rsqrt_op->inputs[0]); + if (!op_producing_sqrt_or_rsqrt_input) { + return false; + } + + // There may be an Add or a Maximum here, adding or clamping to a "small" + // constant scalar. + // Reported bug: b/29395854 + Operator* add_op = nullptr; + Operator* op_producing_add_input = nullptr; + if (op_producing_sqrt_or_rsqrt_input->type == OperatorType::kAdd || + op_producing_sqrt_or_rsqrt_input->type == + OperatorType::kTensorFlowMaximum) { + add_op = op_producing_sqrt_or_rsqrt_input; + bool add_can_be_removed = false; + CHECK_EQ(op_producing_sqrt_or_rsqrt_input->inputs.size(), 2); + for (int i = 0; i < 2; i++) { + const auto& input_array = + model->GetArray(op_producing_sqrt_or_rsqrt_input->inputs[i]); + if (!input_array.buffer) { + continue; + } + if (input_array.buffer->type != ArrayDataType::kFloat) { + continue; + } + if (RequiredBufferSizeForShape(input_array.shape()) != 1) { + continue; + } + const auto& input_float_data = + input_array.GetBuffer().data; + if (std::abs(input_float_data[0]) > 1e-3f) { + continue; + } + add_can_be_removed = true; + op_producing_add_input = GetOpWithOutput(*model, add_op->inputs[1 - i]); + break; + } + if (!add_can_be_removed) { + AddMessageF( + "Giving up trying to identify L2Normalization subgraph " + " because the operator producing the input to the square root, %s," + ", does not match the expected pattern", + LogName(*op_producing_sqrt_or_rsqrt_input)); + return false; + } + } + + Operator* sum_op = + add_op ? op_producing_add_input : op_producing_sqrt_or_rsqrt_input; + if (sum_op->type != OperatorType::kTensorFlowSum) { + AddMessageF( + "Giving up trying to identify L2Normalization subgraph: " + "expected Sum op, got %s", + LogName(*sum_op)); + return false; + } + + Operator* square_op = GetOpWithOutput(*model, sum_op->inputs[0]); + if (square_op->type != OperatorType::kTensorFlowSquare) { + AddMessageF( + "Giving up trying to identify L2Normalization subgraph: " + "expected Square op, got %s", + LogName(*square_op)); + return false; + } + + CHECK_EQ(square_op->inputs.size(), 1); + + if (square_op->inputs[0] != div_or_mul_op->inputs[0]) { + AddMessageF( + "Giving up trying to identify L2Normalization subgraph: %s does not " + "take the same input as the Mul/Div node", + LogName(*square_op)); + return false; + } + + // Create and emplace the new L2Normalization + auto* l2norm_op = new L2NormalizationOperator; + l2norm_op->inputs = {div_or_mul_op->inputs[0]}; + l2norm_op->outputs = div_or_mul_op->outputs; + model->operators.emplace(div_it, l2norm_op); + + AddMessageF("Creating %s replacing equivalent subgraph", LogName(*l2norm_op)); + + // Erase the subgraph that is now replaced by L2Normalization + model->operators.erase(FindOperator(model, square_op)); + model->arrays.erase(sum_op->inputs[0]); + if (sum_op->inputs.size() > 1) { + model->arrays.erase(sum_op->inputs[1]); + } + model->operators.erase(FindOperator(model, sum_op)); + if (add_op) { + model->arrays.erase(add_op->inputs[0]); + model->arrays.erase(add_op->inputs[1]); + model->operators.erase(FindOperator(model, add_op)); + } + model->arrays.erase(sqrt_or_rsqrt_op->inputs[0]); + model->operators.erase(FindOperator(model, sqrt_or_rsqrt_op)); + model->arrays.erase(div_or_mul_op->inputs[1]); + model->operators.erase(FindOperator(model, div_or_mul_op)); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_pool.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_pool.cc new file mode 100644 index 0000000000..1865416fc2 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_pool.cc @@ -0,0 +1,106 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +std::vector>::iterator FindOperator( + Model* model, const Operator* op) { + auto it = model->operators.begin(); + for (; it != model->operators.end(); ++it) { + if (it->get() == op) { + break; + } + } + return it; +} +} // namespace + +bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) { + const auto sqrt_it = model->operators.begin() + op_index; + const auto* sqrt_op = sqrt_it->get(); + if (sqrt_op->type != OperatorType::kTensorFlowSqrt) { + return false; + } + + CHECK_EQ(sqrt_op->inputs.size(), 1); + CHECK_EQ(sqrt_op->outputs.size(), 1); + + const AveragePoolOperator* avpool_op; + const Operator* square_op; + + Operator* prev_to_sqrt_op = GetOpWithOutput(*model, sqrt_op->inputs[0]); + if (prev_to_sqrt_op->type != OperatorType::kAveragePool) { + AddMessageF( + "Giving up trying to identify L2Pool subgraph: " + "expected AveragePool op, got %s", + LogName(*prev_to_sqrt_op)); + return false; + } + + avpool_op = static_cast(prev_to_sqrt_op); + CHECK_EQ(avpool_op->inputs.size(), 1); + + square_op = GetOpWithOutput(*model, avpool_op->inputs[0]); + CHECK_EQ(square_op->inputs.size(), 1); + if (square_op->type != OperatorType::kTensorFlowSquare) { + AddMessageF( + "Giving up trying to identify L2Pool subgraph: " + "expected Square op, got %s", + LogName(*square_op)); + return false; + } + + // Create and emplace L2Pool node. + auto* l2pool_op = new L2PoolOperator; + + l2pool_op->inputs = {square_op->inputs[0]}; + l2pool_op->outputs = sqrt_op->outputs; + + l2pool_op->padding.type = avpool_op->padding.type; + // Note that we do not setup avpool_op->padding.fixed here. This is done by + // the PropagateFixedSizes graph transformation. + + l2pool_op->stride_height = avpool_op->stride_height; + l2pool_op->stride_width = avpool_op->stride_width; + l2pool_op->kheight = avpool_op->kheight; + l2pool_op->kwidth = avpool_op->kwidth; + model->operators.emplace(sqrt_it, l2pool_op); + + AddMessageF("Creating %s replacing equivalent subgraph", LogName(*l2pool_op)); + + // Erase intermediate arrays, keeping input to square op. + model->arrays.erase(avpool_op->inputs[0]); + model->arrays.erase(sqrt_op->inputs[0]); + + // Erase three operators being replaced. + model->operators.erase(FindOperator(model, square_op)); + model->operators.erase(FindOperator(model, avpool_op)); + model->operators.erase(FindOperator(model, sqrt_op)); + + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc new file mode 100644 index 0000000000..082820fddc --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc @@ -0,0 +1,396 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" + +namespace toco { + +namespace { + +std::vector>::iterator FindOperator( + Model* model, const Operator& op) { + auto it = model->operators.begin(); + for (; it != model->operators.end(); ++it) { + if (it->get() == &op) { + break; + } + } + return it; +} + +bool GetStateArrayForBackEdge(const Model& model, + const string& back_edge_source_array, + string* state_array = nullptr) { + for (const auto& rnn_state : model.flags.rnn_states()) { + if (back_edge_source_array == rnn_state.back_edge_source_array()) { + // Found LSTM cell output + if (state_array) { + *state_array = rnn_state.state_array(); + } + return true; + } + } + return false; +} + +// Returns true if the given operator has exactly 1 input, and is connected to +// the given op_type. +// We use kNone to indicate an input unattached to an operator output. Usually +// these are the static input arrays. +bool MatchOperatorInputs(const Operator& op, const Model& model, + OperatorType op_type, Operator** connected_op) { + // Check for required number of inputs + if (op.inputs.size() != 1) { + return false; + } + + // Check if first input is disconnected/connected to an operator + Operator* x = GetOpWithOutput(model, op.inputs[0]); + if ((op_type == OperatorType::kNone) && (x != nullptr)) { + return false; + } + if ((op_type != OperatorType::kNone) && (x == nullptr)) { + return false; + } + + // Check that first operator, if connected, is of correct type + if ((x != nullptr) && (x->type != op_type)) { + return false; + } + + // Successfully matched. Optionally return matching input operators. + if (connected_op) { + *connected_op = x; + } + + return true; +} + +// Returns true if the given operator has exactly 2 inputs, which are connected +// to the given op_types. +// We use kNone to indicate an input unattached to an operator output. Usually +// these are the static input arrays. +bool MatchOperatorInputs(const Operator& op, const Model& model, + OperatorType a_op_type, Operator** a_op, + OperatorType b_op_type, Operator** b_op) { + // Check for required number of inputs + if (op.inputs.size() != 2) { + return false; + } + + // Check if first input is disconnected/connected to an operator + Operator* x = GetOpWithOutput(model, op.inputs[0]); + if ((a_op_type == OperatorType::kNone) && (x != nullptr)) { + return false; + } + if ((a_op_type != OperatorType::kNone) && (x == nullptr)) { + return false; + } + + // Check that first operator, if connected, is of correct type + if ((x != nullptr) && (x->type != a_op_type)) { + return false; + } + + // Check if second input is disconnected/connected to an operator + Operator* y = GetOpWithOutput(model, op.inputs[1]); + if ((b_op_type == OperatorType::kNone) && (y != nullptr)) { + return false; + } + if ((b_op_type != OperatorType::kNone) && (y == nullptr)) { + return false; + } + + // Check that second operator, if connected, is of correct type + if ((y != nullptr) && (y->type != b_op_type)) { + return false; + } + + // Successfully matched. Optionally return matching input operators. + if (a_op != nullptr) { + *a_op = x; + } + if (b_op != nullptr) { + *b_op = y; + } + return true; +} + +// Returns true if the given operator has exactly 3 inputs, which are connected +// to the given op_types. +// We use kNone to indicate an input unattached to an operator output. Usually +// these are the static input arrays. +bool MatchOperatorInputs(const Operator& op, const Model& model, + OperatorType a_op_type, Operator** a_op, + OperatorType b_op_type, Operator** b_op, + OperatorType c_op_type, Operator** c_op) { + // Check for required number of inputs + if (op.inputs.size() != 3) { + return false; + } + + // Check if first input is disconnected/connected to an operator + Operator* x = GetOpWithOutput(model, op.inputs[0]); + if ((a_op_type == OperatorType::kNone) && (x != nullptr)) { + return false; + } + if ((a_op_type != OperatorType::kNone) && (x == nullptr)) { + return false; + } + + // Check that first operator, if connected, is of correct type + if ((x != nullptr) && (x->type != a_op_type)) { + return false; + } + + // Check if second input is disconnected/connected to an operator + Operator* y = GetOpWithOutput(model, op.inputs[1]); + if ((b_op_type == OperatorType::kNone) && (y != nullptr)) { + return false; + } + if ((b_op_type != OperatorType::kNone) && (y == nullptr)) { + return false; + } + + // Check that second operator, if connected, is of correct type + if ((y != nullptr) && (y->type != b_op_type)) { + return false; + } + + // Check if third input is disconnected/connected to an operator + Operator* z = GetOpWithOutput(model, op.inputs[2]); + if ((c_op_type == OperatorType::kNone) && (z != nullptr)) { + return false; + } + if ((c_op_type != OperatorType::kNone) && (z == nullptr)) { + return false; + } + + // Check that third operator, if connected, is of correct type + if ((z != nullptr) && (z->type != c_op_type)) { + return false; + } + + // Successfully matched. Optionally return matching input operators. + if (a_op != nullptr) { + *a_op = x; + } + if (b_op != nullptr) { + *b_op = y; + } + if (c_op != nullptr) { + *c_op = z; + } + return true; +} + +absl::string_view FindLongestCommonPrefix(absl::string_view a, + absl::string_view b) { + if (a.empty() || b.empty()) return absl::string_view(); + + const char* pa = a.data(); + const char* pb = b.data(); + size_t count = 0; + const ssize_t limit = std::min(a.size(), b.size()); + while (count < limit && *pa == *pb) { + ++pa; + ++pb; + ++count; + } + + return absl::string_view(a.data(), count); +} + +} // namespace + +bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) { + // This LSTM cell identification method is not invariant to commutation of + // commutative operator inputs. For example, if input[0] and input[1] of the + // final output multiplication were swapped, this method would not identify it + // as an LSTM cell. This is OK in most cases, because + // tf.rnn.contrib.BasicLSTMCell always generates LSTM cells the same way. + + // Final output multiply + auto op_it = model->operators.begin() + op_index; + Operator* final_output_mul = op_it->get(); + if (final_output_mul->type != OperatorType::kMul) { + return false; + } + Operator *state_output_tanh, *fc_output_sig; + if (!MatchOperatorInputs(*final_output_mul, *model, OperatorType::kTanh, + &state_output_tanh, OperatorType::kLogistic, + &fc_output_sig)) { + return false; + } + + // State output TanH + // (We don't count an operator as ID'd until we verify it has the correct + // operator types feeding into it.) + Operator* state_combine_add; + if (!MatchOperatorInputs(*state_output_tanh, *model, OperatorType::kAdd, + &state_combine_add)) { + return false; + } + string prev_state; + if (!GetStateArrayForBackEdge(*model, state_output_tanh->inputs[0], + &prev_state)) { + return false; + } + + // State forget & remember addition + Operator *state_forget_mul, *state_remember_mul; + if (!MatchOperatorInputs(*state_combine_add, *model, OperatorType::kMul, + &state_forget_mul, OperatorType::kMul, + &state_remember_mul)) { + return false; + } + if (state_forget_mul->inputs[0] != prev_state) { + return false; + } + + // State forget gate + Operator* state_forget_sig; + if (!MatchOperatorInputs(*state_forget_mul, *model, OperatorType::kNone, + nullptr, OperatorType::kLogistic, + &state_forget_sig)) { + return false; + } + + // State remember gate + Operator *state_remember_sig, *state_info_tanh; + if (!MatchOperatorInputs(*state_remember_mul, *model, OperatorType::kLogistic, + &state_remember_sig, OperatorType::kTanh, + &state_info_tanh)) { + return false; + } + + // State remember "information" activation function + Operator* fc_output_split; + if (!MatchOperatorInputs(*state_info_tanh, *model, + OperatorType::kTensorFlowSplit, &fc_output_split)) { + return false; + } + // State remember gate activation function + Operator* tmp; + if (!MatchOperatorInputs(*state_remember_sig, *model, + OperatorType::kTensorFlowSplit, &tmp) || + (tmp != fc_output_split)) { + return false; + } + // State forget gate activation function + if (!MatchOperatorInputs(*state_forget_sig, *model, + OperatorType::kTensorFlowSplit, &tmp) || + (tmp != fc_output_split)) { + return false; + } + // Fully connected output activation function + if (!MatchOperatorInputs(*fc_output_sig, *model, + OperatorType::kTensorFlowSplit, &tmp) || + (tmp != fc_output_split)) { + return false; + } + // Fully connected output split + Operator* fully_connected; + if (!MatchOperatorInputs(*fc_output_split, *model, OperatorType::kNone, + nullptr, OperatorType::kFullyConnected, + &fully_connected)) { + return false; + } + + // Fully connected op + Operator* concat_inputs; + if (!MatchOperatorInputs(*fully_connected, *model, + OperatorType::kConcatenation, &concat_inputs, + OperatorType::kNone, nullptr, OperatorType::kNone, + nullptr)) { + return false; + } + + // Emplace a new LSTM cell operator + auto* lstm_cell_op = new LstmCellOperator; + lstm_cell_op->inputs.resize(LstmCellOperator::NUM_INPUTS); + lstm_cell_op->inputs[LstmCellOperator::DATA_INPUT] = concat_inputs->inputs[0]; + lstm_cell_op->inputs[LstmCellOperator::PREV_ACTIV_INPUT] = + concat_inputs->inputs[1]; + lstm_cell_op->inputs[LstmCellOperator::WEIGHTS_INPUT] = + fully_connected->inputs[1]; + lstm_cell_op->inputs[LstmCellOperator::BIASES_INPUT] = + fully_connected->inputs[2]; + lstm_cell_op->inputs[LstmCellOperator::PREV_STATE_INPUT] = prev_state; + lstm_cell_op->outputs.resize(LstmCellOperator::NUM_OUTPUTS); + lstm_cell_op->outputs[LstmCellOperator::STATE_OUTPUT] = + state_output_tanh->inputs[0]; + lstm_cell_op->outputs[LstmCellOperator::ACTIV_OUTPUT] = + final_output_mul->outputs[0]; + model->operators.emplace(op_it, lstm_cell_op); + AddMessageF("Creating %s replacing equivalent subgraph", + LogName(*lstm_cell_op)); + + // Create temp arrays used internally during runtime. + const string base_name(FindLongestCommonPrefix( + lstm_cell_op->outputs[LstmCellOperator::STATE_OUTPUT], + lstm_cell_op->outputs[LstmCellOperator::ACTIV_OUTPUT])); + const string& concat_temp_array_name = + AvailableArrayName(*model, base_name + "concat_temp"); + model->GetOrCreateArray(concat_temp_array_name); + lstm_cell_op->outputs[LstmCellOperator::CONCAT_TEMP] = concat_temp_array_name; + const string& activ_temp_array_name = + AvailableArrayName(*model, base_name + "activ_temp"); + model->GetOrCreateArray(activ_temp_array_name); + lstm_cell_op->outputs[LstmCellOperator::ACTIV_TEMP] = activ_temp_array_name; + AddMessageF("Created temp outputs %s and %s on operator %s", + concat_temp_array_name, activ_temp_array_name, + LogName(*lstm_cell_op)); + + // Delete arrays and operators replaced by the LSTM cell operator. Order is + // important - DeleteArrayIfUnused() only succeeds if dependent operators + // have been removed first. Start at the output and work towards the input. + model->operators.erase(FindOperator(model, *final_output_mul)); + DeleteArrayIfUnused(state_output_tanh->outputs[0], model); + DeleteArrayIfUnused(fc_output_sig->outputs[0], model); + model->operators.erase(FindOperator(model, *state_output_tanh)); + model->operators.erase(FindOperator(model, *fc_output_sig)); + model->operators.erase(FindOperator(model, *state_combine_add)); + DeleteArrayIfUnused(state_forget_mul->outputs[0], model); + DeleteArrayIfUnused(state_remember_mul->outputs[0], model); + model->operators.erase(FindOperator(model, *state_forget_mul)); + model->operators.erase(FindOperator(model, *state_remember_mul)); + DeleteArrayIfUnused(state_forget_sig->outputs[0], model); + DeleteArrayIfUnused(state_info_tanh->outputs[0], model); + DeleteArrayIfUnused(state_remember_sig->outputs[0], model); + model->operators.erase(FindOperator(model, *state_forget_sig)); + model->operators.erase(FindOperator(model, *state_info_tanh)); + model->operators.erase(FindOperator(model, *state_remember_sig)); + DeleteArrayIfUnused(fc_output_split->outputs[0], model); + DeleteArrayIfUnused(fc_output_split->outputs[1], model); + DeleteArrayIfUnused(fc_output_split->outputs[2], model); + DeleteArrayIfUnused(fc_output_split->outputs[3], model); + string dims_array = fc_output_split->inputs[0]; + model->operators.erase(FindOperator(model, *fc_output_split)); + DeleteArrayIfUnused(dims_array, model); + DeleteArrayIfUnused(fully_connected->outputs[0], model); + model->operators.erase(FindOperator(model, *fully_connected)); + DeleteArrayIfUnused(concat_inputs->outputs[0], model); + model->operators.erase(FindOperator(model, *concat_inputs)); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc new file mode 100644 index 0000000000..cfc77024e7 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc @@ -0,0 +1,103 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +std::vector>::iterator FindOperator( + Model* model, const Operator* op) { + auto it = model->operators.begin(); + for (; it != model->operators.end(); ++it) { + if (it->get() == op) { + break; + } + } + return it; +} + +bool CheckArrayIsScalarFloat(Model* model, const std::string& name, float val) { + const auto& op_array = model->GetArray(name); + if (!op_array.buffer || op_array.buffer->type != ArrayDataType::kFloat || + RequiredBufferSizeForShape(op_array.shape()) != 1) { + return false; + } + const auto& op_data = op_array.GetBuffer().data; + return op_data[0] == val; +} + +// Returns index of scalar input when there is exactly one scalar, -1 otherwise +int GetSingleScalarInputIndexOfBinaryOp(Model* model, const Operator* op, + float val) { + bool input0_is_scalar = CheckArrayIsScalarFloat(model, op->inputs[0], val); + bool input1_is_scalar = CheckArrayIsScalarFloat(model, op->inputs[1], val); + return input0_is_scalar == input1_is_scalar ? -1 : input0_is_scalar ? 0 : 1; +} +} // namespace + +bool IdentifyRelu1::Run(Model* model, std::size_t op_index) { + const auto maximum_it = model->operators.begin() + op_index; + const auto* maximum_op = maximum_it->get(); + if (maximum_op->type != OperatorType::kTensorFlowMaximum) { + return false; + } + CHECK_EQ(maximum_op->inputs.size(), 2); + if (maximum_op->outputs.size() != 1) { + return false; + } + int scalar_input_index = + GetSingleScalarInputIndexOfBinaryOp(model, maximum_op, -1.0f); + if (scalar_input_index == -1) { + return false; + } + const auto* minimum_op = GetOpWithInput(*model, maximum_op->outputs[0]); + if (!minimum_op || minimum_op->type != OperatorType::kTensorFlowMinimum) { + return false; + } + if (GetSingleScalarInputIndexOfBinaryOp(model, minimum_op, 1.0f) == -1) { + return false; + } + CHECK_EQ(minimum_op->inputs.size(), 2); + + // Create and emplace Relu1 node + auto* relu1_op = new Relu1Operator; + relu1_op->inputs = {maximum_op->inputs[!scalar_input_index]}; + relu1_op->outputs = minimum_op->outputs; + model->operators.emplace(maximum_it, relu1_op); + + AddMessageF("Creating %s replacing equivalent subgraph", LogName(*relu1_op)); + + // Erase Maximum scalar input & operator + model->arrays.erase(maximum_op->inputs[scalar_input_index]); + model->operators.erase(FindOperator(model, maximum_op)); + + // Erase Minimum inputs & operator + model->arrays.erase(minimum_op->inputs[0]); + model->arrays.erase(minimum_op->inputs[1]); + model->operators.erase(FindOperator(model, minimum_op)); + + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc b/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc new file mode 100644 index 0000000000..d83603e9a2 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc @@ -0,0 +1,120 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/model_flags.pb.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +// This inserts an operator whose output is a float array (name: +// flags.input_array()). It has to wait for any existing operators that +// generate this output to be removed by graph transformations. Note that there +// may be more than one operator that takes the input_array as their input, and +// that some of these may be removed by graph transformations. +bool AddDequantizeOperatorToInput(const string& input_name, const Operator* op, + GraphTransformation* transformation, + Model* model) { + // An operator with the required output may be a dequantize operator already + // created. Alternatively it may be an operator that needs to be removed + // because it is unused, in which case we wait for RemoveUnusedOp to do its + // work. + if (GetOpWithOutput(*model, input_name)) { + return false; + } + + // We only apply for the first operator if there is more than one. This is + // not strictly necessary for ordering correctness, since we insert the + // dequant operator at the beginning of the op sequence, but it makes the + // insertion more predictable (eg forward vs backwards operator sweep). + if (CountOpsWithInput(*model, input_name) > 1) { + if (op != GetFirstOpWithInput(*model, input_name)) { + return false; + } + } + + auto& input_array = model->GetArray(input_name); + if (input_array.data_type != ArrayDataType::kFloat) { + return false; + } + + if (input_array.final_data_type == input_array.data_type || + input_array.final_data_type == ArrayDataType::kNone) { + return false; + } + + const auto& dequantized_input_name = + AvailableArrayName(*model, input_name + "_dequantized"); + for (auto& other_op : model->operators) { + for (string& other_op_input : other_op->inputs) { + if (other_op_input == input_name) { + other_op_input = dequantized_input_name; + } + } + } + + auto& dequantized_input_array = + model->GetOrCreateArray(dequantized_input_name); + auto* image_input_op = new DequantizeOperator; + image_input_op->inputs = {input_name}; + image_input_op->outputs = {dequantized_input_name}; + model->operators.emplace(model->operators.begin(), image_input_op); + + CHECK(input_array.final_data_type == ArrayDataType::kUint8); + input_array.data_type = ArrayDataType::kUint8; + dequantized_input_array.data_type = ArrayDataType::kFloat; + const auto& input_minmax = input_array.GetMinMax(); + auto& dequantized_input_minmax = dequantized_input_array.GetOrCreateMinMax(); + dequantized_input_minmax = input_minmax; + auto& input_qparams = input_array.GetOrCreateQuantizationParams(); + GetQuantizationParamsFromMinMax( + model->flags, input_minmax, &input_qparams); + + transformation->AddMessageF( + "Created %s" + " to handle quantized input image data, taking over existing" + " mean_value and std_value flags. Cleared those flags.", + LogName(*image_input_op)); + + return true; +} + +bool MakeInitialDequantizeOperator::Run(Model* model, std::size_t op_index) { + // This is effectively a transformation applied to edges. We iterate over the + // specified node (op) and proceed for input edges. + const auto it = model->operators.begin() + op_index; + const auto* op = it->get(); + bool change_made = false; + for (auto& input : op->inputs) { + for (auto& input_array : *model->flags.mutable_input_arrays()) { + if (input_array.name() == input) { + if (AddDequantizeOperatorToInput(input_array.name(), op, this, model)) { + change_made = true; + input_array.clear_mean_value(); + input_array.clear_std_value(); + } + } + } + } + return change_made; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc new file mode 100644 index 0000000000..1ff4e827aa --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc @@ -0,0 +1,142 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +ArrayDataType CommonDataTypeOfAllInputs(const Model& model, + const Operator& op) { + CHECK_GT(op.inputs.size(), 0); + const ArrayDataType data_type = model.GetArray(op.inputs[0]).data_type; + for (const auto& input : op.inputs) { + const auto& array = model.GetArray(input); + CHECK(array.data_type == data_type) + << " Unexpected: this operator has inputs with different data types."; + } + return data_type; +} + +void SetDataTypeForAllOutputs(Model* model, Operator* op, + ArrayDataType data_type) { + for (const auto& output : op->outputs) { + model->arrays[output]->data_type = data_type; + } +} +} // namespace + +bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) { + auto it = model->operators.begin() + op_index; + auto* op = it->get(); + + // If the data type of some input is unknown, we need to yield. + for (const auto& input : op->inputs) { + if (model->arrays[input]->data_type == ArrayDataType::kNone) { + return false; + } + } + // Record data types of output before processing, so we can see at the + // end if we changed anything, and return the correct boolean value. + std::unordered_map old_output_data_types; + for (const auto& output : op->outputs) { + old_output_data_types[output] = model->arrays[output]->data_type; + } + // Do the actual output data types propagation. + if (op->type == OperatorType::kDequantize || + op->type == OperatorType::kResizeBilinear) { + // These operators unconditionally produce float outputs + SetDataTypeForAllOutputs(model, op, ArrayDataType::kFloat); + } else if (op->type == OperatorType::kTensorFlowLess || + op->type == OperatorType::kTensorFlowLessEqual || + op->type == OperatorType::kTensorFlowGreater || + op->type == OperatorType::kTensorFlowGreaterEqual) { + // These operators unconditionally produce bool outputs + SetDataTypeForAllOutputs(model, op, ArrayDataType::kBool); + } else if (op->type == OperatorType::kTensorFlowShape) { + // These operators are assumed to produce int32 outputs. + SetDataTypeForAllOutputs(model, op, ArrayDataType::kInt32); + } else if (op->type == OperatorType::kAveragePool || + op->type == OperatorType::kMaxPool || + op->type == OperatorType::kL2Pool || + op->type == OperatorType::kConv || + op->type == OperatorType::kDepthwiseConv || + op->type == OperatorType::kFullyConnected || + op->type == OperatorType::kTensorFlowMax || + op->type == OperatorType::kTensorFlowMin || + op->type == OperatorType::kPad || + op->type == OperatorType::kStridedSlice || + op->type == OperatorType::kTensorFlowReshape || + op->type == OperatorType::kSlice || + op->type == OperatorType::kSqueeze || + op->type == OperatorType::kTensorFlowSum || + op->type == OperatorType::kTensorFlowSwitch || + op->type == OperatorType::kTensorFlowTile || + op->type == OperatorType::kTensorFlowAll || + op->type == OperatorType::kReorderAxes || + op->type == OperatorType::kTensorFlowConcatV2 || + op->type == OperatorType::kFloor || + op->type == OperatorType::kGather || + op->type == OperatorType::kSpaceToBatchND || + op->type == OperatorType::kBatchToSpaceND || + op->type == OperatorType::kMean) { + // These operators produce outputs with the same type as their 1st input + CHECK_GT(op->inputs.size(), 0); + const ArrayDataType data_type = model->arrays[op->inputs[0]]->data_type; + SetDataTypeForAllOutputs(model, op, data_type); + } else if (op->type == OperatorType::kTensorFlowSplit || + op->type == OperatorType::kTensorFlowConcat) { + // These operators produce an output with the same type as their 2nd input + CHECK_GT(op->inputs.size(), 1); + const ArrayDataType data_type = model->arrays[op->inputs[1]]->data_type; + SetDataTypeForAllOutputs(model, op, data_type); + } else if (op->type == OperatorType::kCast) { + // Data type of the Cast op is specified. + CHECK_EQ(op->outputs.size(), 1); + auto* cast_op = static_cast(op); + model->arrays[op->outputs[0]]->data_type = cast_op->dst_data_type; + } else if (op->type == OperatorType::kTensorFlowUnsupported) { + auto* unsupported_op = static_cast(op); + if (unsupported_op->output_data_types.size() != op->outputs.size()) { + return false; + } + for (int i = 0; i < unsupported_op->output_data_types.size(); ++i) { + auto output = op->outputs[i]; + auto data_type = unsupported_op->output_data_types[i]; + model->arrays[output]->data_type = data_type; + } + } else { + // These operators produce an output with the same type as any of their + // inputs, which must always have the same type. + const ArrayDataType data_type = CommonDataTypeOfAllInputs(*model, *op); + SetDataTypeForAllOutputs(model, op, data_type); + } + // Return true if any output data type changed, false if none changed. + for (const auto& output : op->outputs) { + if (old_output_data_types[output] != model->arrays[output]->data_type) { + return true; + } + } + return false; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc new file mode 100644 index 0000000000..82a43bc2ce --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc @@ -0,0 +1,1129 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +void ComputeConvSizes(const Shape& input_shape, int output_depth, int kwidth, + int kheight, int stride_width, int stride_height, + PaddingType padding_type, Shape* output_shape, + FixedPadding* fixed_padding) { + const int input_width = input_shape.dims(2); + const int input_height = input_shape.dims(1); + const int batch = input_shape.dims(0); + + int output_height = 0; + int output_width = 0; + if (padding_type == PaddingType::kValid) { + output_height = (input_height + stride_height - kheight) / stride_height; + output_width = (input_width + stride_width - kwidth) / stride_width; + } else if (padding_type == PaddingType::kSame) { + output_height = (input_height + stride_height - 1) / stride_height; + output_width = (input_width + stride_width - 1) / stride_width; + } else { + LOG(FATAL) << "Only supporting SAME or VALID padding"; + } + + fixed_padding->height = + ((output_height - 1) * stride_height + kheight - input_height) / 2; + fixed_padding->width = + ((output_width - 1) * stride_width + kwidth - input_width) / 2; + + // Actually had to debug a situation where those were negative due to bad + // propagation of placeholder -1 sizes in TensorFlowReshape. + CHECK_GT(output_width, 0); + CHECK_GT(output_height, 0); + output_shape->ReplaceDims({batch, output_height, output_width, output_depth}); +} + +void ComputeBinaryOperatorOutputSize(const Shape& input_shape1, + const Shape& input_shape2, + Array* output_array) { + const int size1 = RequiredBufferSizeForShape(input_shape1); + const int size2 = RequiredBufferSizeForShape(input_shape2); + if (size1 > size2) { + output_array->copy_shape(input_shape1); + } else if (size2 > size1) { + output_array->copy_shape(input_shape2); + } else { + CHECK_EQ(size1, size2); + const int dims1 = input_shape1.dimensions_count(); + const int dims2 = input_shape2.dimensions_count(); + if (dims1 >= dims2) { + output_array->copy_shape(input_shape1); + } else { + output_array->copy_shape(input_shape2); + } + } + CHECK(output_array->has_shape()); +} + +int GetOutputDepthFromWeights(const Model& model, const Operator& op) { + const string& weights_name = op.inputs[1]; + const auto& weights_shape = model.arrays.at(weights_name)->shape(); + if (op.type == OperatorType::kConv || + op.type == OperatorType::kFullyConnected) { + return weights_shape.dims(0); + } else if (op.type == OperatorType::kDepthwiseConv) { + return weights_shape.dims(3); + } else { + LOG(FATAL) << "Unhandled operator type"; + } +} + +bool EnsureBiasVectorShape(Model* model, Operator* op) { + const string& weights_name = op->inputs[1]; + const auto& weights_array = *model->arrays[weights_name]; + // Yield until weights shape has been resolved. + if (!weights_array.has_shape()) { + return false; + } + + if (op->inputs.size() < 3) { + return false; + } + auto& bias_array = *model->arrays[op->inputs[2]]; + if (bias_array.has_shape()) { + return true; + } + + const int output_depth = GetOutputDepthFromWeights(*model, *op); + bias_array.copy_shape(Shape({output_depth})); + + auto& float_buffer = bias_array.GetMutableBuffer(); + float_buffer.data.resize(output_depth, 0); + + return true; +} + +void ProcessConvOperator(Model* model, ConvOperator* op) { + if (!EnsureBiasVectorShape(model, op)) { + return; + } + + const auto& input_array = *model->arrays[op->inputs[0]]; + // Yield until input dims have been resolved. + if (!input_array.has_shape()) { + return; + } + const auto& input_shape = input_array.shape(); + CHECK_EQ(input_shape.dimensions_count(), 4); + + const auto& weights_array = *model->arrays[op->inputs[1]]; + // Yield until weights dims have been resolved. + if (!weights_array.has_shape()) { + return; + } + const auto& weights_shape = weights_array.shape(); + CHECK_EQ(weights_shape.dimensions_count(), 4); + + auto& output_array = model->GetArray(op->outputs[0]); + const int output_depth = weights_shape.dims(0); + const int kheight = weights_shape.dims(1); + const int kwidth = weights_shape.dims(2); + ComputeConvSizes(input_shape, output_depth, kwidth, kheight, op->stride_width, + op->stride_height, op->padding.type, + output_array.mutable_shape(), + &op->padding.GetOrCreateFixedPadding()); + CHECK_EQ(output_array.shape().dimensions_count(), 4); + + // Set im2col array dimensions if there is one. + if (op->outputs.size() == 2) { + const auto& output_shape = output_array.shape(); + const int input_depth = weights_shape.dims(3); + auto& im2col_array = *model->arrays[op->outputs[1]]; + im2col_array.copy_shape(Shape{output_shape.dims(0), output_shape.dims(1), + output_shape.dims(2), + input_depth * kheight * kwidth}); + } +} + +void ProcessDepthwiseConvOperator(Model* model, DepthwiseConvOperator* op) { + if (!EnsureBiasVectorShape(model, op)) { + return; + } + + const auto& input_array = *model->arrays[op->inputs[0]]; + // Yield until input dims have been resolved. + if (!input_array.has_shape()) { + return; + } + const auto& input_shape = input_array.shape(); + CHECK_EQ(input_shape.dimensions_count(), 4); + + const auto& weights_array = *model->arrays[op->inputs[1]]; + // Yield until weights dims have been resolved. + if (!weights_array.has_shape()) { + return; + } + const auto& weights_shape = weights_array.shape(); + CHECK_EQ(weights_shape.dimensions_count(), 4); + + const string& output_name = op->outputs[0]; + const int input_depth = input_shape.dims(3); + const int output_depth = weights_shape.dims(3); + // TensorFlow doesn't define the depth_multiplier value on DepthwiseConv ops, + // instead it has to be inferred from the weights dims. However, once we are + // here, weights dims have already been converted to our own internal format, + // where the multiplier is no longer readily apparent. So instead we get it + // as the quotient of output and input depths. We only want to do that when + // depth_multiplier had the zero value: any other value should be checked + // as done by the next if() below. + if (!op->depth_multiplier) { + op->depth_multiplier = output_depth / input_depth; + } + QCHECK_EQ(output_depth, input_depth * op->depth_multiplier) + << "input/output depths and depth_multiplier don't match"; + + const int kheight = weights_shape.dims(1); + const int kwidth = weights_shape.dims(2); + ComputeConvSizes(input_shape, output_depth, kwidth, kheight, op->stride_width, + op->stride_height, op->padding.type, + model->GetArray(output_name).mutable_shape(), + &op->padding.GetOrCreateFixedPadding()); +} + +void ProcessDepthToSpaceOperator(Model* model, DepthToSpaceOperator* op) { + const auto& input_array = *model->arrays[op->inputs[0]]; + // Yield until input dims have been resolved. + if (!input_array.has_shape()) { + return; + } + const auto& input_shape = input_array.shape(); + CHECK_EQ(input_shape.dimensions_count(), 4); + + const string& output_name = op->outputs[0]; + const int block_size = op->block_size; + CHECK_NE(block_size, 0) << "Invalid block_size in " << output_name; + const int batch = input_shape.dims(0); + const int height = input_shape.dims(1); + const int width = input_shape.dims(2); + const int depth = input_shape.dims(3); + QCHECK_EQ(depth % (block_size * block_size), 0); + + model->GetArray(output_name) + .copy_shape(Shape({batch, height * block_size, width * block_size, + depth / block_size / block_size})); +} + +void ProcessSpaceToDepthOperator(Model* model, SpaceToDepthOperator* op) { + const auto& input_array = *model->arrays[op->inputs[0]]; + // Yield until input dims have been resolved. + if (!input_array.has_shape()) { + return; + } + const auto& input_shape = input_array.shape(); + CHECK_EQ(input_shape.dimensions_count(), 4); + + const string& output_name = op->outputs[0]; + const int block_size = op->block_size; + CHECK_NE(block_size, 0) << "Invalid block_size in " << output_name; + const int batch = input_shape.dims(0); + const int height = input_shape.dims(1); + const int width = input_shape.dims(2); + const int depth = input_shape.dims(3); + QCHECK_EQ(width % block_size, 0); + QCHECK_EQ(height % block_size, 0); + + model->GetArray(output_name) + .copy_shape(Shape({batch, height / block_size, width / block_size, + depth * block_size * block_size})); +} + +void ProcessFullyConnectedOperator(Model* model, FullyConnectedOperator* op) { + if (!EnsureBiasVectorShape(model, op)) { + return; + } + + const auto& input_array = *model->arrays[op->inputs[0]]; + // Yield until input dims have been resolved. + if (!input_array.has_shape()) { + return; + } + const auto& input_shape = input_array.shape(); + CHECK_GE(input_shape.dimensions_count(), 1); + + const auto& weights_array = *model->arrays[op->inputs[1]]; + // Yield until weights dims have been resolved. + if (!weights_array.has_shape()) { + return; + } + const auto& weights_shape = weights_array.shape(); + + const int weights_output_depth = weights_shape.dims(0); + CHECK_EQ(weights_shape.dimensions_count(), 2); + + const int input_overall_size = RequiredBufferSizeForShape(input_shape); + const int matmul_repeats = input_overall_size / weights_shape.dims(1); + CHECK_EQ(matmul_repeats * weights_shape.dims(1), input_overall_size); + + auto& output_array = model->GetArray(op->outputs[0]); + output_array.copy_shape(Shape({matmul_repeats, weights_output_depth})); +} + +void ProcessTensorFlowReshapeOperator(Model* model, + TensorFlowReshapeOperator* op) { + auto& output_array = *model->arrays[op->outputs[0]]; + // Bail if we already have output dims + if (output_array.has_shape()) { + return; + } + + const auto& input_array = *model->arrays[op->inputs[0]]; + // Yield until input dims have been resolved. + if (!input_array.has_shape()) { + return; + } + const auto& input_shape = input_array.shape(); + + const string& shape_name = op->inputs[1]; + auto& shape_array = model->GetArray(shape_name); + // Yield until the shape is resolved as a constant array + if (!shape_array.buffer) { + return; + } + CHECK(shape_array.data_type == ArrayDataType::kInt32); + // shape_data is the raw array of ints describing the shape + // in the TensorFlow node. We intentionally make a copy here, rather than + // modify wildcards in-place below, because in some graphs, the same shape + // array with a wildcard may be referenced from multiple Reshape nodes, where + // the wildcard needs to resolved to distinct values. + std::vector shape_data = + shape_array.GetBuffer().data; + // The Reshape shape may have a wildcard dim, encoded as -1. + bool has_wildcard = false; + int wildcard_index = 0; + int product_non_wildcard_dims = 1; + for (int i = 0; i < shape_data.size(); i++) { + if (shape_data[i] == -1) { + CHECK(!has_wildcard); + has_wildcard = true; + wildcard_index = i; + } else { + product_non_wildcard_dims *= shape_data[i]; + } + } + const int input_flat_size = RequiredBufferSizeForShape(input_shape); + if (has_wildcard) { + shape_data[wildcard_index] = input_flat_size / product_non_wildcard_dims; + } + auto& output_shape = *output_array.mutable_shape(); + *output_shape.mutable_dims() = shape_data; + const int output_flat_size = RequiredBufferSizeForShape(output_shape); + CHECK_EQ(output_flat_size, input_flat_size); +} + +void ProcessSimpleOperator(Model* model, Operator* op) { + const auto& input_array = *model->arrays[op->inputs[0]]; + // Yield until input dims have been resolved. + if (!input_array.has_shape()) { + return; + } + + const string& output_name = op->outputs[0]; + auto& output_array = *model->arrays[output_name]; + if (output_array.has_shape()) { + return; + } + + output_array.copy_shape(input_array.shape()); +} + +void ProcessSimpleBinaryOperator(Model* model, Operator* op) { + CHECK_EQ(op->inputs.size(), 2); + const auto& input0_array = *model->arrays[op->inputs[0]]; + const auto& input1_array = *model->arrays[op->inputs[1]]; + // Yield until input dims have been resolved. + if (!input0_array.has_shape() || !input1_array.has_shape()) { + return; + } + const string& output_name = op->outputs[0]; + auto& output_array = *model->arrays[output_name]; + ComputeBinaryOperatorOutputSize(input0_array.shape(), input1_array.shape(), + &output_array); +} + +void ProcessTensorFlowReductionOperator(Model* model, Operator* op) { + CHECK_LE(op->inputs.size(), 2); + auto& output_array = *model->arrays[op->outputs[0]]; + if (output_array.has_shape()) { + return; + } + if (op->inputs.size() == 2) { + // There is a reduction_indices input. + const auto& input_array = *model->arrays[op->inputs[0]]; + const auto& reduction_array = *model->arrays[op->inputs[1]]; + if (!reduction_array.buffer) { + return; + } + if (!input_array.has_shape()) { + return; + } + auto& input_shape = input_array.shape(); + CHECK(reduction_array.buffer->type == ArrayDataType::kInt32); + const auto& reduction_array_vals = + reduction_array.GetBuffer().data; + auto& output_dims = *output_array.mutable_shape()->mutable_dims(); + output_dims.clear(); + for (int i = 0; i < input_shape.dimensions_count(); i++) { + bool is_reduction_dim = false; + for (int r : reduction_array_vals) { + if (i == r) { + is_reduction_dim = true; + } + } + if (!is_reduction_dim) { + output_dims.push_back(input_shape.dims(i)); + } + } + } else { + // No reduction_indices means complete reduction to a single scalar. + output_array.copy_shape(Shape({})); + } +} + +void ProcessSliceOperator(Model* model, SliceOperator* op) { + CHECK_EQ(op->inputs.size(), 3); + CHECK_EQ(op->outputs.size(), 1); + + // Yield until the Slice params have been resolved. + if (op->begin.empty()) return; + + // Yield until input dims have been resolved. + const auto& input_array = *model->arrays[op->inputs[0]]; + if (!input_array.has_shape()) return; + const Shape& input_shape = input_array.shape(); + + auto& output_array = *model->arrays[op->outputs[0]]; + if (output_array.has_shape()) return; + + CHECK_EQ(input_shape.dims().size(), op->size.size()); + CHECK_EQ(op->begin.size(), op->size.size()); + + std::vector output_dims; + for (int i = 0; i < op->begin.size(); ++i) { + int size = op->size[i]; + if (size == -1) { + size = input_array.shape().dims(i) - op->begin[i]; + } + output_dims.push_back(size); + } + + *output_array.mutable_shape()->mutable_dims() = output_dims; +} + +void ProcessReorderAxesOperator(Model* model, ReorderAxesOperator* op) { + const string& input_name = op->inputs[0]; + const auto& input_array = *model->arrays[input_name]; + // Yield until input dims have been resolved. + if (!input_array.has_shape()) { + return; + } + const auto& input_shape = input_array.shape(); + const string& output_name = op->outputs[0]; + Shape* output_shape = model->GetArray(output_name).mutable_shape(); + ShuffleDims(input_shape, op->input_axes_order, op->output_axes_order, + output_shape); +} + +void ProcessConcatenationOperator(Model* model, ConcatenationOperator* op) { + // Yield until input dims have been resolved. + for (const auto& input_name : op->inputs) { + auto& input_array = *model->arrays[input_name]; + if (!input_array.has_shape()) { + return; + } + } + auto& output_array = model->GetArray(op->outputs[0]); + // Use 0 input as basis for output dimensions. + const auto& first_input_array = *model->arrays[op->inputs[0]]; + output_array.copy_shape(first_input_array.shape()); + // Determine the concat size, and enfore that all inputs have + // the same dimensions count. + int concat_size = 0; + for (const auto& input_name : op->inputs) { + auto& input_array = *model->arrays[input_name]; + CHECK(input_array.has_shape()); + if (input_array.shape().dimensions_count() == 0) { + continue; + } + CHECK_EQ(input_array.shape().dimensions_count(), + output_array.shape().dimensions_count()); + const std::vector& input_dims = input_array.shape().dims(); + CHECK_LT(op->concat_dim, input_dims.size()); + concat_size += input_dims[op->concat_dim]; + } + // Write out the concat_size on the output array shape. + auto& output_shape = *output_array.mutable_shape(); + auto& output_dims = *output_shape.mutable_dims(); + CHECK_LT(op->concat_dim, output_shape.dimensions_count()); + output_dims[op->concat_dim] = concat_size; +} + +void ProcessTensorFlowSplitOperator(Model* model, TensorFlowSplitOperator* op) { + CHECK_EQ(op->inputs.size(), 2); + const string& input_name = op->inputs[1]; + const auto& input_array = *model->arrays[input_name]; + // Yield until input dims have been resolved. + if (!input_array.has_shape()) { + return; + } + const Shape& input_shape = input_array.shape(); + + // This code is slightly suspect. The TensorFlow docs say that the axis + // selection defaults to 0, but we are splitting across the final axis. + const int input_dims_count = input_shape.dimensions_count(); + const int input_depth = input_shape.dims(input_dims_count - 1); + CHECK_EQ(input_depth % op->num_split, 0); + const int split_depth = input_depth / op->num_split; + + Shape output_shape = input_shape; + (*output_shape.mutable_dims())[input_dims_count - 1] = split_depth; + + CHECK_EQ(op->outputs.size(), op->num_split); + for (const auto& output : op->outputs) { + model->arrays[output]->copy_shape(output_shape); + } +} + +void ProcessAveragePoolOperator(Model* model, AveragePoolOperator* op) { + const string& input_name = op->inputs[0]; + const auto& input_array = *model->arrays[input_name]; + // Yield until input dims have been resolved. + if (!input_array.has_shape()) { + return; + } + const auto& input_shape = input_array.shape(); + CHECK_EQ(input_shape.dimensions_count(), 4); + const string& output_name = op->outputs[0]; + const int output_depth = input_shape.dims(3); + ComputeConvSizes(input_shape, output_depth, op->kwidth, op->kheight, + op->stride_width, op->stride_height, op->padding.type, + model->GetArray(output_name).mutable_shape(), + &op->padding.GetOrCreateFixedPadding()); +} + +void ProcessMaxPoolOperator(Model* model, MaxPoolOperator* op) { + const string& input_name = op->inputs[0]; + const auto& input_array = *model->arrays[input_name]; + // Yield until input dims have been resolved. + if (!input_array.has_shape()) { + return; + } + const auto& input_shape = input_array.shape(); + CHECK_EQ(input_shape.dimensions_count(), 4); + const string& output_name = op->outputs[0]; + const int output_depth = input_shape.dims(3); + ComputeConvSizes(input_shape, output_depth, op->kwidth, op->kheight, + op->stride_width, op->stride_height, op->padding.type, + model->GetArray(output_name).mutable_shape(), + &op->padding.GetOrCreateFixedPadding()); +} + +void ProcessL2PoolOperator(Model* model, L2PoolOperator* op) { + const string& input_name = op->inputs[0]; + const auto& input_array = *model->arrays[input_name]; + // Yield until input dims have been resolved. + if (!input_array.has_shape()) { + return; + } + const auto& input_shape = input_array.shape(); + if (input_shape.dimensions_count() < 4) { + LOG(FATAL) << "missing dimensions for " << input_name; + } + const string& output_name = op->outputs[0]; + const int output_depth = input_shape.dims(3); + ComputeConvSizes(input_shape, output_depth, op->kwidth, op->kheight, + op->stride_width, op->stride_height, op->padding.type, + model->GetArray(output_name).mutable_shape(), + &op->padding.GetOrCreateFixedPadding()); +} + +void ProcessResizeBilinearOperator(Model* model, ResizeBilinearOperator* op) { + CHECK_EQ(op->inputs.size(), 2); + CHECK_EQ(op->outputs.size(), 1); + + if (!model->arrays[op->inputs[0]]->has_shape() || + !model->arrays[op->inputs[1]]->has_shape()) { + return; + } + const auto& input_data_shape = model->arrays[op->inputs[0]]->shape(); + + const string& output_size_name = op->inputs[1]; + const auto& output_size_array = *model->arrays[output_size_name]; + CHECK(output_size_array.data_type == ArrayDataType::kInt32); + CHECK(output_size_array.has_shape()); + const auto& output_size_shape = output_size_array.shape(); + CHECK_EQ(output_size_shape.dimensions_count(), 1); + CHECK_EQ(output_size_shape.dims(0), 2); + std::vector output_shape = + output_size_array.GetBuffer().data; + model->arrays[op->outputs[0]]->copy_shape( + Shape({input_data_shape.dims(0), output_shape[0], output_shape[1], + input_data_shape.dims(3)})); +} + +void ProcessLstmCellOperator(Model* model, LstmCellOperator* op) { + // I/O arrays should be allocated on creation of op. + QCHECK_EQ(op->inputs.size(), LstmCellOperator::NUM_INPUTS); + QCHECK_EQ(op->outputs.size(), LstmCellOperator::NUM_OUTPUTS); + + const auto& input_array = + *model->arrays[op->inputs[LstmCellOperator::DATA_INPUT]]; + // Yield until all input dims have been resolved. + if (!input_array.has_shape()) { + return; + } + const auto& input_shape = input_array.shape(); + CHECK_GE(input_shape.dimensions_count(), 2); + + const auto& prev_activ_array = + *model->arrays[op->inputs[LstmCellOperator::PREV_ACTIV_INPUT]]; + // Yield until all input dims have been resolved. + if (!prev_activ_array.has_shape()) { + return; + } + const auto& prev_activ_shape = prev_activ_array.shape(); + CHECK_GE(prev_activ_shape.dimensions_count(), 2); + + const auto& weights_array = + *model->arrays[op->inputs[LstmCellOperator::WEIGHTS_INPUT]]; + // Yield until weights dims have been resolved. + if (!weights_array.has_shape()) { + return; + } + const auto& weights_shape = weights_array.shape(); + CHECK_EQ(weights_shape.dimensions_count(), 2); + + const auto& bias_array = + *model->arrays[op->inputs[LstmCellOperator::BIASES_INPUT]]; + // Yield until bias dims have been resolved. + if (!bias_array.has_shape()) { + return; + } + const auto& bias_shape = bias_array.shape(); + CHECK_GE(bias_shape.dimensions_count(), 1); + + const auto& prev_state_array = + *model->arrays[op->inputs[LstmCellOperator::PREV_STATE_INPUT]]; + // Yield until all input dims have been resolved. + if (!prev_state_array.has_shape()) { + return; + } + const auto& prev_state_shape = prev_state_array.shape(); + CHECK_GE(prev_state_shape.dimensions_count(), 2); + + const int fc_output_depth = weights_shape.dims(0); + CHECK_EQ(fc_output_depth, bias_shape.dims(0)); + CHECK_EQ(fc_output_depth % 4, 0); + const int depth = fc_output_depth / 4; + + const int input_depth = input_shape.dims(input_shape.dimensions_count() - 1); + const int fc_input_depth = weights_shape.dims(1); + CHECK_EQ(input_depth + depth, fc_input_depth); + Shape output_shape(input_shape); + (*output_shape.mutable_dims())[output_shape.dimensions_count() - 1] = depth; + + // Set output dimensions + model->GetArray(op->outputs[LstmCellOperator::STATE_OUTPUT]) + .copy_shape(output_shape); + model->GetArray(op->outputs[LstmCellOperator::ACTIV_OUTPUT]) + .copy_shape(output_shape); + + Shape concat_temp_shape(input_shape); + (*concat_temp_shape + .mutable_dims())[concat_temp_shape.dimensions_count() - 1] = + fc_input_depth; + model->GetArray(op->outputs[LstmCellOperator::CONCAT_TEMP]) + .copy_shape(concat_temp_shape); + + Shape activ_temp_shape(input_shape); + (*activ_temp_shape.mutable_dims())[activ_temp_shape.dimensions_count() - 1] = + fc_output_depth; + model->GetArray(op->outputs[LstmCellOperator::ACTIV_TEMP]) + .copy_shape(activ_temp_shape); +} + +void ProcessSpaceToBatchNDOperator(Model* model, SpaceToBatchNDOperator* op) { + const auto& input_array = *model->arrays[op->inputs[0]]; + // Yield until input dims have been resolved. + if (!input_array.has_shape()) { + return; + } + const auto& input_shape = input_array.shape(); + CHECK_EQ(input_shape.dimensions_count(), 4); + const auto input_height = input_shape.dims(1); + const auto input_width = input_shape.dims(2); + + const auto& block_shape_array = *model->arrays[op->inputs[1]]; + const auto& paddings_array = *model->arrays[op->inputs[2]]; + const auto& block_shape_array_shape = block_shape_array.shape(); + const auto& paddings_array_shape = paddings_array.shape(); + QCHECK_EQ(block_shape_array_shape.dimensions_count(), 1); + QCHECK_EQ(paddings_array_shape.dimensions_count(), 2); + + // We only support two dimensions. + QCHECK_EQ(block_shape_array_shape.dims(0), 2); + if (!block_shape_array.buffer) { + return; + } + QCHECK(block_shape_array.data_type == ArrayDataType::kInt32); + const auto& block_shape_data = + block_shape_array.GetBuffer().data; + auto block_height = block_shape_data[0]; + auto block_width = block_shape_data[1]; + + QCHECK_EQ(paddings_array_shape.dims(0), 2); // Number of block dimensions + QCHECK_EQ(paddings_array_shape.dims(1), 2); // Two parameters per dimension. + if (!paddings_array.buffer) { + return; + } + QCHECK(paddings_array.data_type == ArrayDataType::kInt32); + const auto& paddings_data = + paddings_array.GetBuffer().data; + int height_with_paddings = input_height + paddings_data[0] + paddings_data[1]; + int width_with_paddings = input_width + paddings_data[2] + paddings_data[3]; + QCHECK_EQ(height_with_paddings % block_height, 0); + QCHECK_EQ(width_with_paddings % block_width, 0); + int output_height = height_with_paddings / block_height; + int output_width = width_with_paddings / block_width; + + model->arrays[op->outputs[0]]->copy_shape( + Shape({input_shape.dims(0) * block_height * block_width, output_height, + output_width, input_shape.dims(3)})); +} + +void ProcessBatchToSpaceNDOperator(Model* model, BatchToSpaceNDOperator* op) { + const auto& input_array = *model->arrays[op->inputs[0]]; + // Yield until input dims have been resolved. + if (!input_array.has_shape()) { + return; + } + const auto& input_shape = input_array.shape(); + CHECK_EQ(input_shape.dimensions_count(), 4); + const auto input_height = input_shape.dims(1); + const auto input_width = input_shape.dims(2); + + const auto& block_shape_array = *model->arrays[op->inputs[1]]; + const auto& crops_array = *model->arrays[op->inputs[2]]; + const auto& block_shape_array_shape = block_shape_array.shape(); + const auto& crops_array_shape = crops_array.shape(); + QCHECK_EQ(block_shape_array_shape.dimensions_count(), 1); + QCHECK_EQ(crops_array_shape.dimensions_count(), 2); + + // We only support two dimensions. + QCHECK_EQ(block_shape_array_shape.dims(0), 2); + if (!block_shape_array.buffer) { + return; + } + QCHECK(block_shape_array.data_type == ArrayDataType::kInt32); + const auto& block_shape_data = + block_shape_array.GetBuffer().data; + auto block_height = block_shape_data[0]; + auto block_width = block_shape_data[1]; + + QCHECK_EQ(crops_array_shape.dims(0), 2); // Number of block dimensions + QCHECK_EQ(crops_array_shape.dims(1), 2); // Two parameters per dimension. + if (!crops_array.buffer) { + return; + } + QCHECK(crops_array.data_type == ArrayDataType::kInt32); + const auto& crops_data = crops_array.GetBuffer().data; + // We don't support crops now. + QCHECK_EQ(crops_data[0], 0); + QCHECK_EQ(crops_data[1], 0); + QCHECK_EQ(crops_data[2], 0); + QCHECK_EQ(crops_data[3], 0); + + QCHECK_EQ(input_shape.dims(0) % (block_height * block_width), 0); + + int output_height = input_height * block_height; + int output_width = input_width * block_width; + + model->arrays[op->outputs[0]]->copy_shape( + Shape({input_shape.dims(0) / (block_height * block_width), output_height, + output_width, input_shape.dims(3)})); +} + +void ProcessGatherOperator(Model* model, GatherOperator* op) { + const auto& input_array = *model->arrays[op->inputs[0]]; + const auto& indices_array = *model->arrays[op->inputs[1]]; + auto& output_array = *model->arrays[op->outputs[0]]; + + // Bail if we already know the output shape. + if (output_array.has_shape()) { + return; + } + + // Yield until input dims have been resolved. + if (!input_array.has_shape() || !indices_array.has_shape()) { + return; + } + + const auto& input_shape = input_array.shape(); + const auto& indices_shape = indices_array.shape(); + QCHECK_GE(input_shape.dimensions_count(), 1); + op->input_rank = input_shape.dimensions_count(); + + // We only support 1-D indices. + QCHECK_EQ(indices_shape.dimensions_count(), 1); + + // Copy the input dimensions to the output except for dimension 0, + // where the dimension of indices_shape is used. + auto output_dims = output_array.mutable_shape()->mutable_dims(); + output_dims->push_back(indices_shape.dims(0)); + for (int dim = 1; dim < input_shape.dimensions_count(); dim++) { + output_dims->push_back(input_shape.dims(dim)); + } +} + +void ProcessPadOperator(Model* model, PadOperator* op) { + CHECK_EQ(op->inputs.size(), 2); + CHECK_EQ(op->outputs.size(), 1); + + const auto& input_array = *model->arrays[op->inputs[0]]; + + // Yield until input dims have been resolved. + if (!input_array.has_shape()) return; + + if (op->left_padding.empty()) return; + CHECK_EQ(op->left_padding.size(), op->right_padding.size()); + + auto& output_array = *model->arrays[op->outputs[0]]; + if (output_array.has_shape()) return; + + Shape output_shape = input_array.shape(); + std::vector& dims = *output_shape.mutable_dims(); + CHECK_EQ(op->left_padding.size(), dims.size()); + + for (int i = 0; i < op->left_padding.size(); ++i) { + dims[i] += op->left_padding[i] + op->right_padding[i]; + } + + output_array.copy_shape(output_shape); +} + +void ProcessMeanOperator(Model* model, MeanOperator* op) { + CHECK_EQ(op->inputs.size(), 2); + CHECK_EQ(op->outputs.size(), 1); + + const auto& input_array = *model->arrays[op->inputs[0]]; + + // Yield until input dims have been resolved. + if (!input_array.has_shape()) return; + const std::vector& indices = op->reduction_indices; + if (indices.empty()) return; + + auto& output_array = *model->arrays[op->outputs[0]]; + if (output_array.has_shape()) return; + + const std::vector& input_dims = input_array.shape().dims(); + std::vector output_dims; + for (int i = 0; i < input_dims.size(); ++i) { + if (std::find(indices.begin(), indices.end(), i) == indices.end()) { + output_dims.push_back(input_dims[i]); + } + } + CHECK(!output_dims.empty()); + CHECK_EQ(output_dims.size(), 2); + + *output_array.mutable_shape()->mutable_dims() = output_dims; +} + +void ProcessStridedSliceOperator(Model* model, StridedSliceOperator* op) { + CHECK_EQ(op->inputs.size(), 4); + CHECK_EQ(op->outputs.size(), 1); + + const auto& input_array = *model->arrays[op->inputs[0]]; + + // Yield until input dims have been resolved. + if (!input_array.has_shape()) return; + + if (op->start_indices.empty()) return; + CHECK_EQ(op->start_indices.size(), op->stop_indices.size()); + CHECK_EQ(op->start_indices.size(), op->strides.size()); + + auto& output_array = *model->arrays[op->outputs[0]]; + if (output_array.has_shape()) return; + + Shape output_shape = input_array.shape(); + std::vector& dims = *output_shape.mutable_dims(); + CHECK_EQ(op->start_indices.size(), dims.size()); + + for (int i = 0; i < op->start_indices.size(); ++i) { + const int mask = 1 << i; + const int start = (op->begin_mask & mask) ? 0 : op->start_indices[i]; + const int stop = (op->end_mask & mask) ? input_array.shape().dims()[i] + : op->stop_indices[i]; + dims[i] = (stop - start) / op->strides[i]; + } + + output_array.copy_shape(output_shape); +} + +void ProcessSqueezeOperator(Model* model, SqueezeOperator* op) { + CHECK_EQ(op->inputs.size(), 1); + CHECK_EQ(op->outputs.size(), 1); + + const auto& input_array = *model->arrays[op->inputs[0]]; + + // Yield until input dims have been resolved. + if (!input_array.has_shape()) return; + + auto& output_array = *model->arrays[op->outputs[0]]; + if (output_array.has_shape()) return; + + const std::vector& input_dims = input_array.shape().dims(); + std::vector output_dims; + + for (int i = 0; i < input_dims.size(); ++i) { + if (input_dims[i] != 1 || + (!op->squeeze_dims.empty() && + std::find(op->squeeze_dims.begin(), op->squeeze_dims.end(), i) == + op->squeeze_dims.end())) { + output_dims.push_back(input_dims[i]); + } + } + *output_array.mutable_shape()->mutable_dims() = output_dims; +} + +void ProcessSvdfOperator(Model* model, SvdfOperator* op) { + CHECK(op->inputs.size() == 3 || op->inputs.size() == 4); + const auto& input_array = *model->arrays[op->inputs[0]]; + if (!input_array.has_shape()) return; + + auto& weights_feature_array = *model->arrays[op->inputs[1]]; + if (!weights_feature_array.has_shape()) return; + + const auto& weights_time_array = *model->arrays[op->inputs[2]]; + if (!weights_time_array.has_shape()) return; + + const bool has_bias = (op->inputs.size() == 4); + if (has_bias) { + const auto& bias_array = *model->arrays[op->inputs[3]]; + if (!bias_array.has_shape()) return; + } + + const int batch_size = input_array.shape().dims()[0]; + const int num_units = weights_feature_array.shape().dims()[0]; + const int memory_size = weights_time_array.shape().dims()[1]; + + auto& state_array = model->GetArray(op->outputs[0]); + state_array.mutable_shape()->ReplaceDims( + {batch_size, memory_size * num_units}); + + auto& output_array = model->GetArray(op->outputs[1]); + output_array.mutable_shape()->ReplaceDims({batch_size, num_units}); +} +} // namespace + +bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) { + auto it = model->operators.begin() + op_index; + auto* op = it->get(); + std::unordered_map> old_output_dims; + for (const auto& output : op->outputs) { + if (model->arrays[output]->has_shape()) { + old_output_dims[output] = model->arrays[output]->shape().dims(); + } + } + + switch (op->type) { + case OperatorType::kBatchNormalization: + case OperatorType::kL2Normalization: + case OperatorType::kDequantize: + case OperatorType::kRelu: + case OperatorType::kRelu1: + case OperatorType::kRelu6: + case OperatorType::kSoftmax: + case OperatorType::kLogistic: + case OperatorType::kTanh: + case OperatorType::kLocalResponseNormalization: + case OperatorType::kTensorFlowIdentity: + case OperatorType::kFakeQuant: + case OperatorType::kTensorFlowRsqrt: + case OperatorType::kTensorFlowSqrt: + case OperatorType::kTensorFlowSquare: + case OperatorType::kTensorFlowAll: + case OperatorType::kTensorFlowAssert: + case OperatorType::kCast: + case OperatorType::kFloor: + ProcessSimpleOperator(model, op); + break; + case OperatorType::kGather: + ProcessGatherOperator(model, static_cast(op)); + break; + + case OperatorType::kAdd: + case OperatorType::kSub: + case OperatorType::kMul: + case OperatorType::kDiv: + case OperatorType::kTensorFlowLess: + case OperatorType::kTensorFlowLessEqual: + case OperatorType::kTensorFlowGreater: + case OperatorType::kTensorFlowMaximum: + case OperatorType::kTensorFlowMinimum: + case OperatorType::kTensorFlowGreaterEqual: + ProcessSimpleBinaryOperator(model, op); + break; + case OperatorType::kConv: + ProcessConvOperator(model, static_cast(op)); + break; + case OperatorType::kDepthwiseConv: + ProcessDepthwiseConvOperator(model, + static_cast(op)); + break; + case OperatorType::kDepthToSpace: + ProcessDepthToSpaceOperator(model, + static_cast(op)); + break; + case OperatorType::kSpaceToDepth: + ProcessSpaceToDepthOperator(model, + static_cast(op)); + break; + case OperatorType::kFullyConnected: + ProcessFullyConnectedOperator(model, + static_cast(op)); + break; + case OperatorType::kTensorFlowReshape: + ProcessTensorFlowReshapeOperator( + model, static_cast(op)); + break; + case OperatorType::kAveragePool: + ProcessAveragePoolOperator(model, static_cast(op)); + break; + case OperatorType::kMaxPool: + ProcessMaxPoolOperator(model, static_cast(op)); + break; + case OperatorType::kL2Pool: + ProcessL2PoolOperator(model, static_cast(op)); + break; + case OperatorType::kTensorFlowMin: + case OperatorType::kTensorFlowMax: + case OperatorType::kTensorFlowSum: + ProcessTensorFlowReductionOperator(model, op); + break; + + case OperatorType::kSlice: + ProcessSliceOperator(model, static_cast(op)); + break; + + case OperatorType::kTensorFlowTile: + // We don't currently implement the propagation of fixed sizes through + // a TensorFlow Tile. + // + // Fortunately, we don't need to: so far, we have only dealt with Tile + // or Slice ops in subgraphs that are identified as L2Normalization. + // See IdentifyL2Normalization. + break; + case OperatorType::kTensorFlowSwitch: + // We can't know the sizes of the outputs until we have resolved the + // predicate, and once we have resolved the predicate, the whole + // Switch node will get resolved away. + // See ResolveTensorFlowSwitch. + break; + case OperatorType::kTensorFlowMerge: + // No need to bother resolving TensorFlow Merge ops: other graph + // transformations will remove them anyway. + // See ResolveTensorFlowMerge. + break; + case OperatorType::kTensorFlowSplit: + ProcessTensorFlowSplitOperator(model, + static_cast(op)); + break; + case OperatorType::kSqueeze: + ProcessSqueezeOperator(model, static_cast(op)); + break; + case OperatorType::kTensorFlowConcat: + case OperatorType::kTensorFlowConcatV2: + // Unimplemented, hopefully another graph transformation will + // drop it or rewrite it. Concretely, either ResolveTensorFlowConcat + // will resolve this node to a DepthConcatenation, or else we have + // a more general non-depth concatenation that will hopefully be dropped, + // or else at the moment we will abort. + break; + case OperatorType::kTensorFlowShape: + // Unimplemented, hopefully another graph transformation will drop it or + // rewrite it. + break; + case OperatorType::kReorderAxes: + ProcessReorderAxesOperator(model, static_cast(op)); + break; + case OperatorType::kConcatenation: + ProcessConcatenationOperator(model, + static_cast(op)); + break; + case OperatorType::kResizeBilinear: + ProcessResizeBilinearOperator(model, + static_cast(op)); + break; + case OperatorType::kLstmCell: + ProcessLstmCellOperator(model, static_cast(op)); + break; + case OperatorType::kTensorFlowMatMul: + // MatMul operators are converted to FullyConnected, after which their + // shapes are propagated. + break; + case OperatorType::kSpaceToBatchND: + ProcessSpaceToBatchNDOperator(model, + static_cast(op)); + break; + case OperatorType::kBatchToSpaceND: + ProcessBatchToSpaceNDOperator(model, + static_cast(op)); + break; + case OperatorType::kPad: + ProcessPadOperator(model, static_cast(op)); + break; + case OperatorType::kMean: + ProcessMeanOperator(model, static_cast(op)); + break; + case OperatorType::kStridedSlice: + ProcessStridedSliceOperator(model, + static_cast(op)); + break; + case OperatorType::kTensorFlowUnsupported: + break; + case OperatorType::kSvdf: + ProcessSvdfOperator(model, static_cast(op)); + break; + default: + // Unimplemented, another graph transformation should drop it. + LOG(FATAL) << "Unhandled operator type " << OperatorTypeName(op->type); + } + + // Return true if any output dim changed, false if none changed. + // Assumption: no transformation clears an output shape, they only add shapes. + for (const auto& output : op->outputs) { + if (model->arrays[output]->has_shape() && + (old_output_dims[output] != model->arrays[output]->shape().dims())) { + return true; + } + } + return false; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc new file mode 100644 index 0000000000..5551755ea7 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc @@ -0,0 +1,467 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/model_flags.pb.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +bool SupportsQuantization(const Operator& op) { + auto type = op.type; + if (type == OperatorType::kTensorFlowUnsupported) { + auto* unsupported = static_cast(&op); + return unsupported->quantized; + } + return type == OperatorType::kConv || type == OperatorType::kDepthwiseConv || + type == OperatorType::kFullyConnected || + type == OperatorType::kConcatenation || + type == OperatorType::kL2Normalization || type == OperatorType::kAdd || + type == OperatorType::kAveragePool || type == OperatorType::kMaxPool || + type == OperatorType::kLogistic || type == OperatorType::kSoftmax || + type == OperatorType::kTensorFlowReshape || + type == OperatorType::kMul || type == OperatorType::kSpaceToDepth || + type == OperatorType::kDepthToSpace; +} + +template +std::unique_ptr QuantizeBuffer( + const GenericBuffer& buffer, + const QuantizationParams& quantization_params) { + const auto inverse_scale = 1. / quantization_params.scale; + CHECK(buffer.type == ArrayDataType::kFloat); + const auto& float_buffer = + static_cast&>(buffer); + auto* quantized_buffer = new Buffer; + quantized_buffer->data.resize(float_buffer.data.size()); + const auto qmin = static_cast(std::numeric_limits>::min()); + const auto qmax = static_cast(std::numeric_limits>::max()); + for (std::size_t i = 0; i < float_buffer.data.size(); i++) { + const float src_val = float_buffer.data[i]; + double scaled_val; // Astonishingly, using 'float' degrades accuracy just + // enough to make a few tests fail! + if (quantization_params.scale == 0) { + CHECK_EQ(src_val, 0) << "The quantization scale for this array is 0, " + << "so all its values should be 0."; + scaled_val = quantization_params.zero_point; + } else { + scaled_val = quantization_params.zero_point + inverse_scale * src_val; + } + const auto rounded_val = static_cast(std::round(scaled_val)); + const auto clamped_val = std::min(qmax, std::max(qmin, rounded_val)); + quantized_buffer->data[i] = static_cast>(clamped_val); + } + return std::unique_ptr(quantized_buffer); +} + +template +void QuantizeArray(GraphTransformation* transformation, Model* model, + const string& name, + const QuantizationParams& quantization_params) { + auto& array = model->GetArray(name); + CHECK(array.data_type == ArrayDataType::kFloat); + CHECK(!array.quantization_params); + array.GetOrCreateQuantizationParams() = quantization_params; + if (array.buffer) { + array.buffer = QuantizeBuffer(*array.buffer, quantization_params); + } + array.data_type = A; + transformation->AddMessageF("Quantized array %s", name); +} + +void QuantizeArray(GraphTransformation* transformation, Model* model, + const string& name, ArrayDataType quantized_data_type, + const QuantizationParams& quantization_params) { + switch (quantized_data_type) { + case ArrayDataType::kUint8: + return QuantizeArray(transformation, model, name, + quantization_params); + case ArrayDataType::kInt32: + return QuantizeArray(transformation, model, name, + quantization_params); + default: + LOG(FATAL) << "Unhandled case."; + } +} + +const MinMax& GetOrComputeMinMax(Model* model, const string& array_name) { + auto& array = model->GetArray(array_name); + // Normally we should have a MinMax recorded on this Array, + // so we just use it. + if (array.minmax != nullptr) { + return *array.minmax; + } + + // We don't have a MinMax. That's bad news: we need + // the graph to provide MinMax info for all arrays in order + // for inference to reproduce faithfully the same quantization + // error as the training process had. + // + // But we still want to support a fallback for constant arrays, + // just using the plain min and max computed from array elements. + // We should hopefully never rely on that in production, as that + // will not give very good accuracy as that typically won't be + // exactly what the training process used. But it will be useful + // to allow easily trying out quantization even if the graph + // lacks some minmax information. + if (array.buffer != nullptr) { + LOG(WARNING) + << "Constant array " << array_name + << " lacks MinMax information. To make up for that, we will now compute" + << " the MinMax from actual array elements. That will result in" + << " quantization parameters that probably do not match whichever " + "arithmetic" + << " was used during training, and thus will probably be a cause of " + "poor" + << " inference accuracy."; + CHECK(array.buffer->type == ArrayDataType::kFloat); + const auto& data = array.GetBuffer().data; + // We always want [min, max] to contain 0. + float min = 0.f; + float max = 0.f; + for (auto val : data) { + min = std::min(min, val); + max = std::max(max, val); + } + auto& minmax = array.GetOrCreateMinMax(); + minmax.min = min; + minmax.max = max; + return minmax; + } + + LOG(FATAL) << "Array " << array_name + << " does not have MinMax information, " + "and is not a constant array. Cannot " + "proceed with quantization."; +} + +bool ChooseQuantizationForOperatorInput( + GraphTransformation* transformation, Model* model, const Operator& op, + std::size_t input_index, ArrayDataType* quantized_data_type, + QuantizationParams* quantization_params) { + const auto& input = op.inputs[input_index]; + auto& array = model->GetArray(input); + if (array.data_type != ArrayDataType::kFloat) { + return false; + } + if (op.type == OperatorType::kConv || + op.type == OperatorType::kDepthwiseConv || + op.type == OperatorType::kFullyConnected) { + if (input_index == 2) { + // Quantization of bias vector. + // We need both of the mandatory inputs (input activations and weights) to + // have + // been already quantized. + const auto& input_activations = model->GetArray(op.inputs[0]); + const auto& input_weights = model->GetArray(op.inputs[1]); + if (!input_activations.quantization_params || + !input_weights.quantization_params) { + return false; + } + const auto input_activations_scale = + input_activations.quantization_params->scale; + const auto input_weights_scale = input_weights.quantization_params->scale; + quantization_params->scale = + input_activations_scale * input_weights_scale; + quantization_params->zero_point = 0; + *quantized_data_type = ArrayDataType::kInt32; + transformation->AddMessageF( + "Input array %s is a bias vector. Choosing quantization params " + "accordingly.", + input); + return true; + } + } + + const MinMax& minmax = GetOrComputeMinMax(model, input); + GetQuantizationParamsFromMinMax(model->flags, minmax, + quantization_params); + transformation->AddMessageF( + "For input array %s with min=%g" + ", max=%g" + ", chose to quantize as uint8 with zero_point=%d" + ", scale=%g", + input, minmax.min, minmax.max, quantization_params->zero_point, + quantization_params->scale); + *quantized_data_type = ArrayDataType::kUint8; + return true; +} + +bool IsExactlyRepresentable(double real_value, ArrayDataType data_type, + const QuantizationParams& quantization_params) { + const double scaled_value = + quantization_params.zero_point + real_value / quantization_params.scale; + const double fractional_scaled_value = + scaled_value - std::round(scaled_value); + if (std::abs(fractional_scaled_value) > 1e-12) { + return false; + } + const double rounded_scaled_value = std::round(scaled_value); + if (data_type == ArrayDataType::kUint8) { + if (rounded_scaled_value < 0 || rounded_scaled_value > 255) { + return false; + } + } + return true; +} + +bool ChooseHardcodedQuantizationForOperatorOutput( + const Operator& op, ArrayDataType* quantized_data_type, + QuantizationParams* quantization_params) { + if (op.type == OperatorType::kL2Normalization) { + // L2Normalization has range: [-1, 1]. + // 0 should be exactly representable, as values will typically be centered + // around 0, with many values near 0. + *quantized_data_type = ArrayDataType::kUint8; + quantization_params->zero_point = 128; + quantization_params->scale = 1. / 128.; + CHECK( + IsExactlyRepresentable(0., *quantized_data_type, *quantization_params)); + return true; + } + if ((op.type == OperatorType::kLogistic) || + (op.type == OperatorType::kSoftmax)) { + // Logistic and Softmax have range: [0, 1]. + // + // For Logistic, 0.5 should be exactly representable, as implementations + // will typically exploit the symmetry logistic(-x) = 1 - logistic(x), and + // the glueing of the two halves of the graph will only be seamless if we + // are accurately representing logistic(0) == 0.5. + *quantized_data_type = ArrayDataType::kUint8; + quantization_params->zero_point = 0; + quantization_params->scale = 1. / 256.; + CHECK(IsExactlyRepresentable(0.5, *quantized_data_type, + *quantization_params)); + return true; + } + return false; +} + +bool ChooseQuantizationForOperatorOutput( + GraphTransformation* transformation, Model* model, const Operator& op, + std::size_t output_index, ArrayDataType* quantized_data_type, + QuantizationParams* quantization_params) { + const auto& output = op.outputs[output_index]; + auto& array = model->GetArray(output); + if (array.data_type != ArrayDataType::kFloat) { + return false; + } + if (ChooseHardcodedQuantizationForOperatorOutput(op, quantized_data_type, + quantization_params)) { + transformation->AddMessageF( + "Output array %s is produced by a %s operator. Choosing fixed " + "quantization params accordingly.", + output, OperatorTypeName(op.type)); + return true; + } + if ((op.type == OperatorType::kDepthToSpace) || + (op.type == OperatorType::kSpaceToDepth)) { + // DepthToSpace and SpaceToDepth should preserve the quantization parameters + // of the input array, as these are simple reshape operations. + const auto& input_quantization_params = + model->GetArray(op.inputs[0]).GetQuantizationParams(); + *quantized_data_type = ArrayDataType::kUint8; + quantization_params->zero_point = input_quantization_params.zero_point; + quantization_params->scale = input_quantization_params.scale; + + transformation->AddMessageF( + "Output array %s is produced by a %s operator. Copying quantization " + "params from input array.", + output, OperatorTypeName(op.type)); + return true; + } + const MinMax& minmax = GetOrComputeMinMax(model, output); + GetQuantizationParamsFromMinMax(model->flags, minmax, + quantization_params); + *quantized_data_type = ArrayDataType::kUint8; + transformation->AddMessageF( + "For output array %s with min=%g, max=%g" + ", chose to quantize as uint8 with zero_point=%d" + ", scale=%g", + output, minmax.min, minmax.max, quantization_params->zero_point, + quantization_params->scale); + + return true; +} +} // namespace + +bool Quantize::Run(Model* model, std::size_t op_index) { + // Our general "quantization" graph transformation consists in replacing + // QuantizedInputArrays[] -> + // DequantizeOperators[] -> + // FloatInputArrays[] -> + // Operator -> + // FloatOutputArray + // by + // QuantizedInputArrays[] -> + // Operator -> + // QuantizedOutputArray -> + // DequantizeOperator -> + // FloatOutputArray + // + // In other words, this is pushing Dequantize operators to the right of + // other operators. + // + + auto& op = *model->operators[op_index]; + if (op.type == OperatorType::kDequantize || + op.type == OperatorType::kFakeQuant) { + return false; + } + + // Our assumption here is that the input arrays are already quantized - + // that is typically the case in models operating on an input bitmap + // image, and MakeInitialDequantizeOp should have already resolved + // the handling of the input image as an initial Dequantize op. + // + // Thus we are building around the assumption that the graph always starts + // with a quantized input array, and only after some Dequantize op do we have + // float arrays. The problem of quantizing the graph thus becomes a problem of + // pushing Dequantize ops to the right of other ops. + // + // Let us just guard this assumption by the following assertion: + for (const auto& input : op.inputs) { + if (IsInputArray(*model, input)) { + const auto& input_array = model->GetArray(input); + CHECK(input_array.quantization_params); + } + } + if (!SupportsQuantization(op)) { + LOG(FATAL) << "Unimplemented: this graph contains an operator of type " + << HelpfulOperatorTypeName(op) + << " for which the quantized form is not yet implemented. " + "Sorry, and patches welcome (that's a relatively fun patch " + "to write, mostly providing the actual quantized arithmetic " + "code for this op)."; + } + + for (const auto& input : op.inputs) { + const auto& array = model->GetArray(input); + if (array.data_type == ArrayDataType::kFloat) { + if (!array.minmax && !array.buffer) { + LOG(ERROR) << "Can't quantize input array " << input + << " because it lacks min/max info"; + return false; + } + const auto* other_op = GetOpWithOutput(*model, input); + if (other_op && other_op->type != OperatorType::kDequantize) { + AddMessageF( + "Not quantizing %s for now, because its input array %s is not " + "produced by a Dequantize op, " + "which means that we should yield and let other ops " + "get quantized first", + LogName(op), input); + return false; + } + } + } + + bool changed = false; + + // Quantize inputs, remove any Dequantize op on the inputs side + for (std::size_t input_index = 0; input_index < op.inputs.size(); + input_index++) { + ArrayDataType quantized_data_type; + QuantizationParams quantization_params; + if (ChooseQuantizationForOperatorInput(this, model, op, input_index, + &quantized_data_type, + &quantization_params)) { + changed = true; + const auto& input = op.inputs[input_index]; + if (IsConstantParameterArray(*model, input)) { + QuantizeArray(this, model, input, quantized_data_type, + quantization_params); + } else { + auto dequantize_it = FindOpWithOutput(*model, input); + CHECK(dequantize_it != model->operators.end()); + auto* dequantize_op = dequantize_it->get(); + CHECK(dequantize_op->type == OperatorType::kDequantize); + op.inputs[input_index] = dequantize_op->inputs[0]; + // Check if the output of that Dequantize op was not used by any + // other operator. We will then erase that Dequantize op. + if (!CountOpsWithInput(*model, dequantize_op->outputs[0])) { + // If any of the model's output_arrays was pointing to the + // Dequantize op's output, let it point to the Dequantize op's + // input instead. + for (int i = 0; i < model->flags.output_arrays_size(); i++) { + if (model->flags.output_arrays(i) == dequantize_op->outputs[0]) { + model->flags.set_output_arrays(i, dequantize_op->inputs[0]); + } + } + model->arrays.erase(dequantize_op->outputs[0]); + model->operators.erase(dequantize_it); + } + } + } + } + + // Quantize outputs, add Dequantize ops as needed on the outputs side + for (std::size_t output_index = 0; output_index < op.outputs.size(); + output_index++) { + ArrayDataType quantized_data_type; + QuantizationParams quantization_params; + if (ChooseQuantizationForOperatorOutput(this, model, op, output_index, + &quantized_data_type, + &quantization_params)) { + changed = true; + const auto& output = op.outputs[output_index]; + QuantizeArray(this, model, output, quantized_data_type, + quantization_params); + const auto& dequantized_output = + AvailableArrayName(*model, output + "_dequantized"); + const auto& output_array = model->GetArray(output); + const auto& output_minmax = output_array.GetMinMax(); + auto& dequantized_output_array = + model->GetOrCreateArray(dequantized_output); + dequantized_output_array.data_type = ArrayDataType::kFloat; + auto& dequantized_output_minmax = + dequantized_output_array.GetOrCreateMinMax(); + dequantized_output_minmax.min = output_minmax.min; + dequantized_output_minmax.max = output_minmax.max; + for (const auto& other_op : model->operators) { + for (auto& other_op_input : other_op->inputs) { + if (other_op_input == output) { + other_op_input = dequantized_output; + } + } + } + auto* dequantize_op = new DequantizeOperator; + dequantize_op->inputs = {output}; + dequantize_op->outputs = {dequantized_output}; + for (int i = 0; i < model->flags.output_arrays_size(); i++) { + if (model->flags.output_arrays(i) == output) { + model->flags.set_output_arrays(i, dequantized_output); + } + } + const auto op_it = FindOp(*model, &op); + model->operators.emplace(op_it + 1, dequantize_op); + } + } + + return changed; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/read_fake_quant_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/read_fake_quant_min_max.cc new file mode 100644 index 0000000000..371ced388a --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/read_fake_quant_min_max.cc @@ -0,0 +1,105 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +bool ApplyMinMaxToArray(GraphTransformation* transformation, Model* model, + const MinMax& minmax, const string& array_name) { + auto& annotated_array = model->GetArray(array_name); + if (annotated_array.minmax) { + return false; + } + annotated_array.GetOrCreateMinMax() = minmax; + transformation->AddMessageF( + "Read min/max annotation for array %s: min=%g, max=%g", array_name, + minmax.min, minmax.max); + return true; +} + +} // end namespace + +bool ReadFakeQuantMinMax::Run(Model* model, std::size_t op_index) { + const auto fakequant_it = model->operators.begin() + op_index; + auto* fakequant_base_op = fakequant_it->get(); + if (fakequant_base_op->type != OperatorType::kFakeQuant) { + return false; + } + auto* fakequant_op = static_cast(fakequant_base_op); + + bool changed = false; + + if (!fakequant_op->minmax) { + CHECK_EQ(fakequant_op->inputs.size(), 3); + // We need to yield until the min and max parameters have been + // resolved to constant arrays. + for (int i = 1; i <= 2; i++) { + if (!IsConstantParameterArray(*model, fakequant_op->inputs[1])) { + return false; + } + } + + // Obtain the final min/max values + const auto& min_array = model->GetArray(fakequant_op->inputs[1]); + const auto& max_array = model->GetArray(fakequant_op->inputs[2]); + CHECK_EQ(RequiredBufferSizeForShape(min_array.shape()), 1); + CHECK_EQ(RequiredBufferSizeForShape(max_array.shape()), 1); + fakequant_op->minmax.reset(new MinMax); + MinMax& minmax = *fakequant_op->minmax; + minmax.min = min_array.GetBuffer().data[0]; + minmax.max = max_array.GetBuffer().data[0]; + // We always want [min, max] to contain 0. + minmax.min = std::min(minmax.min, 0.); + minmax.max = std::max(minmax.max, 0.); + + // We won't use the input arrays that provided these min and max + // values, anymore. Delete them unless they are used by something + // else. + for (int i = 1; i <= 2; i++) { + if (CountOpsWithInput(*model, fakequant_op->inputs[i]) == 1) { + model->arrays.erase(fakequant_op->inputs[i]); + } + } + fakequant_op->inputs.resize(1); + changed = true; + } + + // At this point, this FakeQuantOperator should have a MinMax + // attached to it, and should only have 1 input (it should not have + // 2nd and 3rd input arrays giving min and max anymore). + CHECK(fakequant_op->minmax); + CHECK_EQ(1, fakequant_op->inputs.size()); + + const MinMax& minmax = *fakequant_op->minmax; + + // Record the MinMax info on the input and output arrays + changed |= ApplyMinMaxToArray(this, model, minmax, fakequant_op->inputs[0]); + changed |= ApplyMinMaxToArray(this, model, minmax, fakequant_op->outputs[0]); + + return changed; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_final_dequantize_op.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_final_dequantize_op.cc new file mode 100644 index 0000000000..3992e7d1ef --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_final_dequantize_op.cc @@ -0,0 +1,59 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/model_flags.pb.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool RemoveFinalDequantizeOp::Run(Model* model, std::size_t op_index) { + const auto dequantize_it = model->operators.begin() + op_index; + const auto* dequantize_op = dequantize_it->get(); + if (dequantize_op->type != OperatorType::kDequantize) { + return false; + } + const auto& output = dequantize_op->outputs[0]; + // We can remove any dequantize op whose output is not consumed by + // any op. This is not necessarily equivalent to the output being + // one of the model's output arrays, as some intermediate array + // in the middle of the graph might be designated as an output + // array. + if (CountOpsWithInput(*model, output)) { + return false; + } + + // If one of the model's output arrays was actually the Dequantize op's + // output, then we need to update it to point to the Dequantize op's input. + for (int i = 0; i < model->flags.output_arrays_size(); i++) { + if (output == model->flags.output_arrays(i)) { + model->flags.set_output_arrays(i, dequantize_op->inputs[0]); + } + } + + // Remove the node and its output array. + AddMessageF("Removed final %s", LogName(*dequantize_op)); + model->arrays.erase(output); + model->operators.erase(dequantize_it); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_assert.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_assert.cc new file mode 100644 index 0000000000..35a0c46532 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_assert.cc @@ -0,0 +1,60 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool RemoveTensorFlowAssert::Run(Model* model, std::size_t op_index) { + const auto assert_it = model->operators.begin() + op_index; + const auto* assert_op = assert_it->get(); + if (assert_op->type != OperatorType::kTensorFlowAssert) { + return false; + } + + bool changed = false; + // Remove any other node's dependency on this assert node + for (const auto& op : model->operators) { + auto it = op->inputs.begin(); + while (it != op->inputs.end()) { + if (*it == assert_op->outputs[0]) { + op->inputs.erase(it); + changed = true; + } else { + ++it; + } + } + } + CHECK(!CountOpsWithInput(*model, assert_op->outputs[0])); + + if (changed) { + AddMessageF( + "Prepared for the removal of %s by removing any other op's dependency " + "on it", + LogName(*assert_op)); + } + + // That's it. We can stop here, no need to duplicate the work that + // RemoveUnusedOp will do removing this now-unused node. + return changed; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_identity.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_identity.cc new file mode 100644 index 0000000000..404269bbfd --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_identity.cc @@ -0,0 +1,38 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool RemoveTensorFlowIdentity::Run(Model* model, std::size_t op_index) { + const auto passthru_it = model->operators.begin() + op_index; + const auto* passthru_op = passthru_it->get(); + if (passthru_op->type != OperatorType::kTensorFlowIdentity) { + return false; + } + + return RemoveTrivialPassthroughOp(this, model, op_index); +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_binary.cc new file mode 100644 index 0000000000..6add443f2d --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_binary.cc @@ -0,0 +1,113 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +template +bool AreAllBufferElementsEqualTo(const std::vector& buffer_data, + Scalar value) { + for (auto x : buffer_data) { + if (x != value) { + return false; + } + } + return true; +} +} // namespace + +// A binary operator is called trivial when exactly one of its operands is +// a constant and is such that the binary operation is equivalent to +// the identity operation on its other input. +// For example, an Add operator is trivial if +// one of its operands is constant 0, a Mul operator is trivial +// if one of its operands is constant 1, etc. +bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) { + const auto binary_it = model->operators.begin() + op_index; + auto* binary_op = binary_it->get(); + if (binary_op->type != OperatorType::kAdd && + binary_op->type != OperatorType::kMul && + binary_op->type != OperatorType::kSub && + binary_op->type != OperatorType::kDiv) { + return false; + } + + CHECK_EQ(binary_op->inputs.size(), 2); + + // This graph transformation is only concerned with the case + // when one input is constant and the other is not constant. + const bool is_input_constant[2] = { + IsConstantParameterArray(*model, binary_op->inputs[0]), + IsConstantParameterArray(*model, binary_op->inputs[1]), + }; + if (!is_input_constant[0] && !is_input_constant[1]) { + // Neither input is constant, so nothing we can resolve here. + return false; + } + if (is_input_constant[0] && is_input_constant[1]) { + // Both inputs are constants. That's a job for constants + // propagation, not for us to handle here. + return false; + } + const int index_of_constant_input = is_input_constant[0] ? 0 : 1; + const int index_of_variable_input = is_input_constant[0] ? 1 : 0; + CHECK(is_input_constant[index_of_constant_input]); + CHECK(!is_input_constant[index_of_variable_input]); + + // Now check if the constant operand makes this binary + // operator trivial. + const auto& constant_input_array = + *model->arrays[binary_op->inputs[index_of_constant_input]]; + // For now, we only handle floats here. + if (constant_input_array.data_type != ArrayDataType::kFloat) { + return false; + } + const auto& constant_input_float_data = + constant_input_array.GetBuffer().data; + bool is_trivial = false; + if (binary_op->type != OperatorType::kAdd) { + is_trivial = AreAllBufferElementsEqualTo(constant_input_float_data, 0.f); + } else if (binary_op->type != OperatorType::kSub) { + is_trivial = index_of_constant_input == 1 && + AreAllBufferElementsEqualTo(constant_input_float_data, 0.f); + } else if (binary_op->type != OperatorType::kMul) { + is_trivial = AreAllBufferElementsEqualTo(constant_input_float_data, 1.f); + } else if (binary_op->type != OperatorType::kDiv) { + is_trivial = index_of_constant_input == 1 && + AreAllBufferElementsEqualTo(constant_input_float_data, 1.f); + } + + if (!is_trivial) { + return false; + } + + // Now we know that this node is trivial, so we can remove it. + AddMessageF("Removing trivial %s", LogName(*binary_op)); + return RemoveTrivialPassthroughOp(this, model, op_index); +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation.cc new file mode 100644 index 0000000000..3ceb93d8ee --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation.cc @@ -0,0 +1,40 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool RemoveTrivialConcatenation::Run(Model* model, std::size_t op_index) { + const auto concat_it = model->operators.begin() + op_index; + auto* concat_op = concat_it->get(); + if (concat_op->type != OperatorType::kConcatenation) { + return false; + } + if (concat_op->inputs.size() != 1) { + return false; + } + return RemoveTrivialPassthroughOp(this, model, op_index); +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc new file mode 100644 index 0000000000..b603735704 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc @@ -0,0 +1,68 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool RemoveTrivialConcatenationInput::Run(Model* model, std::size_t op_index) { + // TensorFlow allows Concatenation nodes to have 0-D inputs, + // and they are then treated as empty i.e. omitted from concatenation, + // in violation of the notion that 0-D is equivalent to 1x1x1x1. + // Thus we have to drop these 0-D inputs from Concatenation nodes. + // Sometimes, there will remain only one non-trivial input, and + // the other graph transformation RemoveTrivialConcatenation will then drop + // it. + const auto concat_it = model->operators.begin() + op_index; + auto* concat_op = concat_it->get(); + if (concat_op->type != OperatorType::kConcatenation) { + return false; + } + std::vector trivial_inputs; + std::vector nontrivial_inputs; + for (const string& input : concat_op->inputs) { + const auto& input_array = model->GetArray(input); + const bool is_trivial = + input_array.has_shape() && input_array.shape().dimensions_count() == 0; + if (is_trivial) { + trivial_inputs.push_back(input); + } else { + nontrivial_inputs.push_back(input); + } + } + + if (trivial_inputs.empty()) { + return false; + } + + // Drop trivial inputs. + for (const string& input : trivial_inputs) { + if (CountOpsWithInput(*model, input) == 1) { + model->arrays.erase(input); + } + } + concat_op->inputs = nontrivial_inputs; + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc new file mode 100644 index 0000000000..a0d1338298 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc @@ -0,0 +1,107 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/model_flags.pb.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { +// Reroute all edges involving a given discardable array to another +// array instead. from_array is assumed to be discardable, and consequently +// this only updates operator edges (since discardable arrays only +// appear there, and not e.g. in model flags). +void RerouteEdges(const string& from_array, const string& to_array, + Model* model) { + for (const auto& op : model->operators) { + for (auto& output : op->outputs) { + if (output == from_array) { + output = to_array; + } + } + for (auto& input : op->inputs) { + if (input == from_array) { + input = to_array; + } + } + } +} + +} // end anonymous namespace + +bool RemoveTrivialPassthroughOp(GraphTransformation* transformation, + Model* model, std::size_t op_index) { + const auto passthru_it = model->operators.begin() + op_index; + auto* passthru_op = passthru_it->get(); + CHECK_EQ(passthru_op->outputs.size(), 1); + CHECK_GE(passthru_op->inputs.size(), 1); + int count_nonconstant_input_arrays = 0; + // We call 'main input' the unique nonconstant input array if there is one, + // or else the 0-th input. + int main_input_array_index = 0; + for (int i = 0; i < passthru_op->inputs.size(); i++) { + if (!model->GetArray(passthru_op->inputs[i]).buffer) { + count_nonconstant_input_arrays++; + main_input_array_index = i; + } + } + CHECK_LE(count_nonconstant_input_arrays, 1); + + const string main_input_name = passthru_op->inputs[main_input_array_index]; + const string output_name = passthru_op->outputs[0]; + if (IsDiscardableArray(*model, output_name)) { + transformation->AddMessageF( + "Removing %s, keeping its non-constant input array", + LogName(*passthru_op)); + model->arrays.erase(output_name); + for (const string& input : passthru_op->inputs) { + if (IsDiscardableArray(*model, input) && input != main_input_name && + CountOpsWithInput(*model, input) == 1) { + model->arrays.erase(input); + } + } + RerouteEdges(output_name, main_input_name, model); + } else if (IsDiscardableArray(*model, main_input_name)) { + transformation->AddMessageF("Removing %s, keeping its output array", + LogName(*passthru_op)); + for (const string& input : passthru_op->inputs) { + if (IsDiscardableArray(*model, input) && + (input == main_input_name || CountOpsWithInput(*model, input) == 1)) { + model->arrays.erase(input); + } + } + RerouteEdges(main_input_name, output_name, model); + } else { + transformation->AddMessageF( + "Cannot remove %s, neither its nonconstant input nor its output may be " + "discarded", + LogName(*passthru_op)); + return false; + } + + // Remove the pass-through node. + model->operators.erase(passthru_it); + + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h new file mode 100644 index 0000000000..b72c85c0e5 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h @@ -0,0 +1,55 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_GRAPH_TRANSFORMATIONS_REMOVE_TRIVIAL_PASSTHROUGH_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_GRAPH_TRANSFORMATIONS_REMOVE_TRIVIAL_PASSTHROUGH_H_ + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" + +namespace toco { + +// A "passthrough op" is an op that satisfies the following conditions: +// 1. It has at most one non-constant input (it may have other constant +// inputs). +// 2. It has exactly one output. +// 3. It forwards exactly its single non-constant input to its single output. +// +// Examples include: +// 1. TensorFlow Identity ops. (Have one input). +// 2. TensorFlow Reshape ops when the input and output shapes agree. +// 3. Any binary operator, one of whose two inputs is a constant and is the +// neutral value for that operation. For example, a binary Add operator +// where one of its inputs is a constant array filled with zeros. +// +// A passthrough op is "trivial" and can be removed when it is possible to +// discard either its single non-constant input or output array, rerouting any +// edge involving it to the other of these two arrays. +// +// It is only possible to discard such an array if it is not explicitly +// designated as a global input/output array of the graph, e.g. the model's +// input arrays, output arrays, and any array involved in a RNN back-edge +// specified by the model. +// +// This function does not check that the given operator is a passthrough op: +// that's the responsibility of the caller. +// Given that it is a passthrough op, this function checks whether it is trivial +// and then discards it and returns true, or, if it's not trivial (if neither +// the input nor the output may be discarded), returns false. +bool RemoveTrivialPassthroughOp(GraphTransformation* transformation, + Model* model, std::size_t op_index); + +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_GRAPH_TRANSFORMATIONS_REMOVE_TRIVIAL_PASSTHROUGH_H_ diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc new file mode 100644 index 0000000000..28f76c9d36 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc @@ -0,0 +1,87 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/runtime/types.h" +#include "tensorflow/contrib/lite/toco/toco_types.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool RemoveTrivialQuantizedActivationFunc::Run(Model* model, + std::size_t op_index) { + const auto it = model->operators.begin() + op_index; + auto* op = it->get(); + if (op->fused_activation_function != FusedActivationFunctionType::kRelu && + op->fused_activation_function != FusedActivationFunctionType::kRelu6) { + return false; + } + const auto& output_array = model->GetArray(op->outputs[0]); + if (!output_array.quantization_params) { + return false; + } + if (output_array.data_type != ArrayDataType::kUint8) { + return false; + } + const auto& quantization_params = output_array.GetQuantizationParams(); + + bool has_nontrivial_min_bound = false; + bool has_nontrivial_max_bound = false; + + if (op->fused_activation_function == FusedActivationFunctionType::kRelu || + op->fused_activation_function == FusedActivationFunctionType::kRelu6) { + double lowest_representable_output = + (0. - quantization_params.zero_point) * quantization_params.scale; + if (lowest_representable_output < 0.) { + has_nontrivial_min_bound = true; + AddMessageF( + "Quantized activation function is not trivial: " + "the lowest representable output value %g" + " less than the clamp min bound.", + lowest_representable_output); + } + } + if (op->fused_activation_function == FusedActivationFunctionType::kRelu6) { + double highest_representable_output = + (255. - quantization_params.zero_point) * quantization_params.scale; + if (highest_representable_output > 6.) { + has_nontrivial_max_bound = true; + AddMessageF( + "Quantized activation function is not trivial: " + "the highest representable output value %g" + " is greater than the clamp max bound.", + highest_representable_output); + } + } + + if (has_nontrivial_min_bound || has_nontrivial_max_bound) { + return false; + } + + op->fused_activation_function = FusedActivationFunctionType::kNone; + AddMessageF( + "Removing trivial quantized activation function on %s" + " because the output quantization parameters imply at least as tight" + " a clamp anyway.", + LogName(*op)); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc new file mode 100644 index 0000000000..90f9381ec1 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc @@ -0,0 +1,92 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +bool IsReshapeTrivial(const Model& model, const Operator& op, + RemoveTrivialReshape* transformation) { + CHECK(op.type == OperatorType::kTensorFlowReshape); + + // One way in which a reshape can be trivial is if its + // output shape is == its input shape + const auto& input_array = model.GetArray(op.inputs[0]); + const auto& output_array = model.GetArray(op.outputs[0]); + if (input_array.has_shape() && output_array.has_shape()) { + if (transformation->treat_expand_dims_as_trivial() && + ShapesAgreeUpToExtending(input_array.shape(), output_array.shape())) { + transformation->AddMessageF( + "%s is trivial because its input and output shapes are equal up to " + "extending " + "by 1's, and we are told to aggressively discard such Reshape ops.", + LogName(op)); + return true; + } + if (input_array.shape().dims() == output_array.shape().dims()) { + transformation->AddMessageF( + "%s is trivial because its input and output shapes are equal", + LogName(op)); + return true; + } + } + + // Another way in which a reshape can be trivial is if its output + // is only consumed by another reshape. + if (CountOpsWithInput(model, op.outputs[0]) == 1) { + const auto* next_op = GetOpWithInput(model, op.outputs[0]); + if (next_op->type == OperatorType::kTensorFlowReshape) { + transformation->AddMessageF( + "%s is trivial because its output is only consumed by another " + "Reshape op", + LogName(op)); + return true; + } + } + + return false; +} + +} // namespace + +bool RemoveTrivialReshape::Run(Model* model, std::size_t op_index) { + const auto reshape_it = model->operators.begin() + op_index; + auto* reshape_op = reshape_it->get(); + if (reshape_op->type != OperatorType::kTensorFlowReshape) { + return false; + } + + if (!IsReshapeTrivial(*model, *reshape_op, this)) { + return false; + } + + AddMessageF("Removing trivial %s", LogName(*reshape_op)); + + CHECK_EQ(reshape_op->inputs.size(), 2); + return RemoveTrivialPassthroughOp(this, model, op_index); +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc new file mode 100644 index 0000000000..1f1f1f6948 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc @@ -0,0 +1,122 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/model_flags.pb.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) { + const auto it = model->operators.begin() + op_index; + const auto* op = it->get(); + + // Bail if any output is used, and is not an input_array of + // the model. We allow specifying an arbitrary input_array, + // treating the part of the graph leading up to it as unused. + for (const auto& output : op->outputs) { + CHECK(model->arrays.count(output)); + // If this output is provided as the model's input array, + // then we don't need this operator to produce its contents. + if (IsInputArray(*model, output)) { + continue; + } + // If this output is provided as a RNN's state array, + // then we don't need this operator to produce its contents. + // So far this case has only been encountered with TensorFlow + // Fill ops used to zero-initialize RNN states, which is + // redundant for us as we zero-initialize RNN states anyway. + bool found_output_as_rnn_state_array = false; + for (const auto& rnn_state : model->flags.rnn_states()) { + if (output == rnn_state.state_array()) { + CHECK(op->type == OperatorType::kTensorFlowUnsupported); + CHECK_EQ(static_cast(op) + ->tensorflow_op, + "Fill"); + found_output_as_rnn_state_array = true; + break; + } + } + if (found_output_as_rnn_state_array) { + continue; + } + for (const string& output_array : model->flags.output_arrays()) { + if (output == output_array) { + return false; + } + } + for (const auto& rnn_state : model->flags.rnn_states()) { + if (output == rnn_state.back_edge_source_array()) { + return false; + } + } + if (CountOpsWithInput(*model, output)) { + return false; + } + } + + if (op->unresolved_outputs) { + AddMessageF("Not discarding %s because it has unresolved outputs.", + LogName(*op)); + return false; + } + + AddMessageF("Discarding %s because none of its outputs is used.", + LogName(*op)); + + // At that point we know that none of the outputs is used, so we will + // definitely remove the node and all its outputs. + + // Remove any input array that is not used by anything else, + // and that is not the output of some other operator. + for (const auto& input : op->inputs) { + if (CountOpsWithInput(*model, input) == 1 && + !GetOpWithOutput(*model, input)) { + model->arrays.erase(input); + } + } + + // Remove the node and its now-unused output arrays. + for (const auto& output : op->outputs) { + // If the output array is the model's input array, don't remove that. + // That's the case when cropping a model at a given --input_array. + if (IsInputArray(*model, output)) { + continue; + } + // Likewise, if the output array is a RNN state array, don't remove that. + bool found_output_as_rnn_state_array = false; + for (const auto& rnn_state : model->flags.rnn_states()) { + if (output == rnn_state.state_array()) { + found_output_as_rnn_state_array = true; + break; + } + } + if (found_output_as_rnn_state_array) { + continue; + } + // Generic case: do delete this output array. + model->arrays.erase(output); + } + model->operators.erase(it); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc new file mode 100644 index 0000000000..3eb7fa3896 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc @@ -0,0 +1,135 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/runtime/types.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool ResolveBatchNormalization::Run(Model* model, std::size_t op_index) { + auto bn_it = model->operators.begin() + op_index; + if (bn_it->get()->type != OperatorType::kBatchNormalization) { + return false; + } + const auto* bn_op = + static_cast(bn_it->get()); + + const auto& mean_array = model->GetArray(bn_op->inputs[1]); + const auto& multiplier_array = model->GetArray(bn_op->inputs[2]); + const auto& offset_array = model->GetArray(bn_op->inputs[3]); + + CHECK(IsConstantParameterArray(*model, bn_op->inputs[1]) && + IsConstantParameterArray(*model, bn_op->inputs[2]) && + IsConstantParameterArray(*model, bn_op->inputs[3])) + << "Batch normalization resolution requires that mean, multiplier and " + "offset arrays be constant."; + + // We should only have *float* BatchNormalizations... let's guard this + // assumption by CHECK's. + CHECK(mean_array.data_type == ArrayDataType::kFloat); + CHECK(multiplier_array.data_type == ArrayDataType::kFloat); + CHECK(offset_array.data_type == ArrayDataType::kFloat); + + // Create the new Mul, Add operators + auto* mul_op = new MulOperator; + auto* add_op = new AddOperator; + const string mul_name = + AvailableArrayName(*model, bn_op->outputs[0] + "_mul"); + const string add_name = + AvailableArrayName(*model, bn_op->outputs[0] + "_add"); + const string mul_param_name = AvailableArrayName(*model, mul_name + "_param"); + const string add_param_name = AvailableArrayName(*model, add_name + "_param"); + mul_op->inputs = {bn_op->inputs[0], mul_param_name}; + mul_op->outputs = {mul_name}; + add_op->inputs = {mul_name, add_param_name}; + add_op->outputs = {bn_op->outputs[0]}; + AddMessageF("Splitting %s into %s and %s", LogName(*bn_op), LogName(*mul_op), + LogName(*add_op)); + + // Create the intermediate activation array (output of mul, input of add) + auto& intermediate_array = model->GetOrCreateArray(mul_op->outputs[0]); + intermediate_array.data_type = model->GetArray(bn_op->inputs[0]).data_type; + + // Insert the new operators in the graph + auto add_it = model->operators.emplace(bn_it, add_op); + auto mul_it = model->operators.emplace(add_it, mul_op); + // update invalidated iterators. + DCHECK_EQ(mul_it->get(), mul_op); + add_it = mul_it + 1; + DCHECK_EQ(add_it->get(), add_op); + bn_it = add_it + 1; + DCHECK_EQ(bn_it->get(), bn_op); + + // Create the new param arrays + const auto& mean_shape = mean_array.shape(); + const auto& multiplier_shape = multiplier_array.shape(); + const auto& offset_shape = offset_array.shape(); + CHECK(mean_shape.dims() == multiplier_shape.dims()); + CHECK(mean_shape.dims() == offset_shape.dims()); + const auto& param_shape = mean_shape; + const int buffer_size = RequiredBufferSizeForShape(param_shape); + auto& mul_param_array = model->GetOrCreateArray(mul_param_name); + auto& add_param_array = model->GetOrCreateArray(add_param_name); + DropMinMax(model, mul_param_name); + DropMinMax(model, add_param_name); + mul_param_array.copy_shape(param_shape); + add_param_array.copy_shape(param_shape); + mul_param_array.data_type = ArrayDataType::kFloat; + add_param_array.data_type = ArrayDataType::kFloat; + auto& mul_float_data = + mul_param_array.GetMutableBuffer().data; + auto& add_float_data = + add_param_array.GetMutableBuffer().data; + mul_float_data.resize(buffer_size); + add_float_data.resize(buffer_size); + const auto& mean_float_data = + mean_array.GetBuffer().data; + const auto& multiplier_float_data = + multiplier_array.GetBuffer().data; + const auto& offset_float_data = + offset_array.GetBuffer().data; + + CHECK(mul_float_data.size() == buffer_size); + CHECK(add_float_data.size() == buffer_size); + CHECK(mean_float_data.size() == buffer_size); + CHECK(multiplier_float_data.size() == buffer_size); + CHECK(offset_float_data.size() == buffer_size); + + for (int i = 0; i < buffer_size; i++) { + mul_float_data[i] = multiplier_float_data[i]; + add_float_data[i] = + offset_float_data[i] - mean_float_data[i] * multiplier_float_data[i]; + } + + // Remove the old param arrays + model->arrays.erase(bn_op->inputs[1]); + model->arrays.erase(bn_op->inputs[2]); + model->arrays.erase(bn_op->inputs[3]); + + // Remove the old operator + DCHECK_EQ(bn_it->get(), bn_op); + model->operators.erase(bn_it); + + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc new file mode 100644 index 0000000000..53e1be7a05 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc @@ -0,0 +1,247 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/runtime/types.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +std::vector VectorGreaterThan(const std::vector& a, + const std::vector& b) { + DCHECK_EQ(a.size(), b.size()); + const int size = a.size(); + std::vector result(size); + for (int i = 0; i < size; i++) { + result[i] = a[i] > b[i]; + } + return result; +} + +void PairwiseVectorSelect(const std::vector& selector, + const std::vector& input_a, + const std::vector& input_b, + std::vector* output_a, + std::vector* output_b) { + DCHECK_EQ(input_a.size(), input_b.size()); + DCHECK_EQ(output_a->size(), output_b->size()); + DCHECK_EQ(input_a.size(), output_a->size()); + DCHECK_EQ(selector.size(), input_a.size()); + const int size = input_a.size(); + for (int i = 0; i < size; i++) { + if (selector[i]) { + (*output_a)[i] = input_a[i]; + (*output_b)[i] = input_b[i]; + } else { + (*output_a)[i] = input_b[i]; + (*output_b)[i] = input_a[i]; + } + } +} + +template +void EvaluateBinaryOperatorOnConstantInputs(Model* model, + const Operator* binary_op) { + CHECK(IsConstantParameterArray(*model, binary_op->inputs[0])); + CHECK(IsConstantParameterArray(*model, binary_op->inputs[1])); + CHECK(binary_op->fused_activation_function == + FusedActivationFunctionType::kNone); + const auto& input0_array = model->GetArray(binary_op->inputs[0]); + const auto& input1_array = model->GetArray(binary_op->inputs[1]); + const auto& output_name = binary_op->outputs[0]; + auto& output_array = model->GetArray(output_name); + CHECK(input0_array.data_type == InputsDataType); + CHECK(input1_array.data_type == InputsDataType); + CHECK(output_array.data_type == OutputDataType); + + // We have already tested above for existence of input buffers + // (synonymous to being a constant param). + CHECK(input0_array.buffer); + CHECK(input1_array.buffer); + // On the other hand, the output should not already have a buffer. + CHECK(!output_array.buffer); + + const auto& input0_data = input0_array.GetBuffer().data; + const auto& input1_data = input1_array.GetBuffer().data; + // Create the buffer on the output array, effectively turning it into + // a constant parameter + + const Shape& output_shape = output_array.shape(); + auto& output_data = output_array.GetMutableBuffer().data; + const int output_buffer_size = RequiredBufferSizeForShape(output_shape); + output_data.resize(output_buffer_size); + const int dims_count = output_shape.dimensions_count(); + + // It will be convenient here to have copies of the operands shapes + // extended to match the number of dimensions of the output shape. + Shape input0_shape = input0_array.shape(); + Shape input1_shape = input1_array.shape(); + ExtendShape(&input0_shape, dims_count); + ExtendShape(&input1_shape, dims_count); + // Now we may still have operands of different sizes, which would indicate + // that we have to "broadcast" the smaller dimension. We do this using a + // a vector of Booleans indicating which input is the larger in each + // dimension. + CHECK_EQ(input0_shape.dimensions_count(), input1_shape.dimensions_count()); + CHECK_EQ(input0_shape.dimensions_count(), dims_count); + const std::vector input0_larger = + VectorGreaterThan(input0_shape.dims(), input1_shape.dims()); + + std::vector big_sizes(dims_count); + std::vector small_sizes(dims_count); + PairwiseVectorSelect(input0_larger, input0_shape.dims(), input1_shape.dims(), + &big_sizes, &small_sizes); + + // The output should already be correctly sized to match the big dimensions. + for (int i = 0; i < dims_count; i++) { + CHECK_EQ(output_shape.dims(i), big_sizes[i]); + } + + std::vector input0_indices(dims_count); + std::vector input1_indices(dims_count); + std::vector modulo_indices(dims_count); + + for (int k = 0; k < output_buffer_size; k++) { + const std::vector output_indices = ReverseOffset(output_shape, k); + for (int i = 0; i < dims_count; i++) { + modulo_indices[i] = output_indices[i] % small_sizes[i]; + } + PairwiseVectorSelect(input0_larger, output_indices, modulo_indices, + &input0_indices, &input1_indices); + const auto val0 = input0_data[Offset(input0_shape, input0_indices)]; + const auto val1 = input1_data[Offset(input1_shape, input1_indices)]; + + DataType outval; + if (binary_op->type == OperatorType::kAdd) { + outval = val0 + val1; + } else if (binary_op->type == OperatorType::kMul) { + outval = val0 * val1; + } else if (binary_op->type == OperatorType::kSub) { + outval = val0 - val1; + } else if (binary_op->type == OperatorType::kDiv) { + outval = val0 / val1; + } else if (binary_op->type == OperatorType::kTensorFlowMinimum) { + outval = std::min(val0, val1); + } else if (binary_op->type == OperatorType::kTensorFlowMaximum) { + outval = std::max(val0, val1); + } else if (binary_op->type == OperatorType::kTensorFlowLess) { + outval = val0 < val1; + } else if (binary_op->type == OperatorType::kTensorFlowLessEqual) { + outval = val0 <= val1; + } else if (binary_op->type == OperatorType::kTensorFlowGreater) { + outval = val0 > val1; + } else if (binary_op->type == OperatorType::kTensorFlowGreaterEqual) { + outval = val0 >= val1; + } else { + LOG(FATAL) << "should not get here"; + } + output_data[Offset(output_shape, output_indices)] = outval; + } +} + +void EvaluateBinaryOperatorOnConstantInputs(Model* model, + const Operator* binary_op) { + const auto inputs_data_type = model->arrays[binary_op->inputs[0]]->data_type; + const auto output_data_type = model->arrays[binary_op->outputs[0]]->data_type; +#define TOCO_HANDLE_CASE(InputsDataType, OutputDataType) \ + if (inputs_data_type == InputsDataType && \ + output_data_type == OutputDataType) { \ + EvaluateBinaryOperatorOnConstantInputs( \ + model, binary_op); \ + return; \ + } + TOCO_HANDLE_CASE(ArrayDataType::kFloat, ArrayDataType::kFloat) + TOCO_HANDLE_CASE(ArrayDataType::kFloat, ArrayDataType::kBool) + TOCO_HANDLE_CASE(ArrayDataType::kInt32, ArrayDataType::kInt32) + TOCO_HANDLE_CASE(ArrayDataType::kInt32, ArrayDataType::kBool) + TOCO_HANDLE_CASE(ArrayDataType::kInt64, ArrayDataType::kInt64) + TOCO_HANDLE_CASE(ArrayDataType::kInt64, ArrayDataType::kBool) + LOG(FATAL) << "Unimplemented: don't know how to resolve a constant " + << "binary operator for these data types."; +#undef TOCO_HANDLE_CASE +} +} // namespace + +bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) { + const auto binary_it = model->operators.begin() + op_index; + const auto* binary_op = binary_it->get(); + // Test for binary ops of types that we know how to resolve + if (binary_op->type != OperatorType::kAdd && + binary_op->type != OperatorType::kMul && + binary_op->type != OperatorType::kSub && + binary_op->type != OperatorType::kDiv && + binary_op->type != OperatorType::kTensorFlowMinimum && + binary_op->type != OperatorType::kTensorFlowMaximum && + binary_op->type != OperatorType::kTensorFlowLess && + binary_op->type != OperatorType::kTensorFlowLessEqual && + binary_op->type != OperatorType::kTensorFlowGreater && + binary_op->type != OperatorType::kTensorFlowGreaterEqual) { + return false; + } + CHECK_EQ(binary_op->inputs.size(), 2); + + const auto& input0_array = model->GetArray(binary_op->inputs[0]); + const auto& input1_array = model->GetArray(binary_op->inputs[1]); + // Check if both inputs are constant parameters. + if (!input0_array.buffer || !input1_array.buffer) { + return false; + } + + auto& output_array = *model->arrays[binary_op->outputs[0]]; + // Yield until the output array dims have been resolved. + if (!output_array.has_shape()) { + return false; + } + + // At the moment we don't want to care about fused activation functions. + // The idea is that we should do the present constants-propagation before + // activation functions get fused. + if (binary_op->fused_activation_function != + FusedActivationFunctionType::kNone) { + AddMessageF( + "Not resolving constant %s because it has a fused activation function", + LogName(*binary_op)); + return false; + } + + // Check that input data types agree. + CHECK(input0_array.data_type == input1_array.data_type); + + // Do the actual constants propagation + EvaluateBinaryOperatorOnConstantInputs(model, binary_op); + + // Remove the binary operator and its inputs + if (CountOpsWithInput(*model, binary_op->inputs[0]) == 1) { + model->arrays.erase(binary_op->inputs[0]); + } + if (CountOpsWithInput(*model, binary_op->inputs[1]) == 1) { + model->arrays.erase(binary_op->inputs[1]); + } + AddMessageF("Resolved constant %s to the equivalent constant array", + LogName(*binary_op)); + model->operators.erase(binary_it); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc new file mode 100644 index 0000000000..0983c43849 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc @@ -0,0 +1,196 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "absl/strings/str_cat.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +// Copies data from multiple source arrays to a destination array based on a +// concatenation dimension. From each array in input_arrays, it copies chunk +// sizes provided in array_copy_size vector (per array). It uses the buffer +// in concatenated_array as destination buffer. +template +void CopyTensorSegments(const std::vector& input_arrays, + const std::vector& array_copy_size, + const int num_elements_concatenated_array, + Array* concatenated_array) { + for (Array* input_array : input_arrays) { + if (!input_array->buffer) { + return; + } + } + + auto& concatenated_array_buffer = + concatenated_array->GetMutableBuffer().data; + concatenated_array_buffer.resize(num_elements_concatenated_array); + + // It does not matter which array to use to find the value for the total + // number of copy steps. + CHECK(!input_arrays.empty()); + CHECK_NE(array_copy_size[0], 0); + const int total_copy_steps = + input_arrays[0]->GetBuffer().data.size() / array_copy_size[0]; + + // Initialize the source pointers to point to beginning of the array buffers. + std::vector src_ptr; + src_ptr.reserve(input_arrays.size()); + for (Array* input_array : input_arrays) { + src_ptr.push_back(input_array->GetBuffer().data.data()); + } + + // Copy the data from input_arrays to concatenated_array_buffer. + T* dest_ptr = concatenated_array_buffer.data(); + for (int s = 0; s < total_copy_steps; s++) { + for (int i = 0; i < input_arrays.size(); i++) { + std::copy(src_ptr[i], src_ptr[i] + array_copy_size[i], dest_ptr); + src_ptr[i] += array_copy_size[i]; + dest_ptr += array_copy_size[i]; + } + } +} + +// Receives a series of input arrays of type Array and an integer showing the +// axis on which those arrays will be concatenated. It returns the concatenated +// arrray. +template +void ConcatenateTensorBuffers(const std::vector& input_arrays, + int concatenation_axis, + Array* concatenated_array) { + int num_elements_concatenated_array = 1; + for (int i = 0; i < concatenated_array->shape().dimensions_count(); i++) { + num_elements_concatenated_array *= concatenated_array->shape().dims()[i]; + } + // Prepare the data needed for segmented copy from multiple source arrays to + // a destination array based on a oncatenation dimension. + std::vector array_copy_size(input_arrays.size()); + int count = 0; + for (Array* input_array : input_arrays) { + const Shape array_shape = input_array->shape(); + array_copy_size[count] = 1; + for (int i = concatenation_axis; i < array_shape.dimensions_count(); i++) { + array_copy_size[count] *= array_shape.dims()[i]; + } + count++; + } + + // Do the actual data copy. + CopyTensorSegments>(input_arrays, array_copy_size, + num_elements_concatenated_array, + concatenated_array); +} + +// Sets the minimum and maximum values for the concatenated array. If it's +// already set (e.g. because of previous pass in TOCO), it doesn't change it and +// returns. Otherwise it uses the input arrays min and max values to compute the +// concatenated array min and max. +void SetMinMaxForConcatenedArray(const std::vector& input_arrays, + Array* concatenated_array) { + CHECK(concatenated_array->data_type == ArrayDataType::kFloat); + // If the minmax is already set, use it + if (concatenated_array->minmax) return; + + double concat_min = std::numeric_limits::infinity(); + double concat_max = -std::numeric_limits::infinity(); + + for (Array* input_array : input_arrays) { + // If any of the input arrays minmax is not set, return. + // TODO(ghodrat): shall we add the logic to compute the minmax? + if (!input_array->minmax) return; + const MinMax& input_minmax = input_array->GetMinMax(); + concat_min = std::min(concat_min, input_minmax.min); + concat_max = std::max(concat_max, input_minmax.max); + } + MinMax& minmax = concatenated_array->GetOrCreateMinMax(); + minmax.min = concat_min; + minmax.max = concat_max; +} + +} // namespace + +// Resolves the concatenation operator if all its inputs are constant arrays. +bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) { + const auto concat_it = model->operators.begin() + op_index; + const auto* concat_base_op = concat_it->get(); + if (concat_base_op->type != OperatorType::kConcatenation) { + return false; + } + const auto* concat_op = + static_cast(concat_base_op); + + for (const string& input_name : concat_op->inputs) { + // We only expect constant unquantized arrays as input, otherwise we return. + // We also make sure the shapes of the input arrays are known and they are + // all discardable. + const Operator* input_op = GetOpWithOutput(*model, input_name); + if (input_op) return false; + if (!IsConstantParameterArray(*model, input_name)) return false; + if (!model->GetArray(input_name).has_shape()) return false; + if (model->GetArray(input_name).quantization_params) return false; + if (!IsDiscardableArray(*model, input_name)) return false; + } + + const int concatenation_axis = concat_op->concat_dim; + + CHECK_EQ(concat_op->outputs.size(), 1); + string concatenated_array_name = concat_op->outputs[0]; + Array& concatenated_array = model->GetOrCreateArray(concatenated_array_name); + std::vector input_arrays; + for (const string& input_name : concat_op->inputs) { + input_arrays.push_back(&model->GetArray(input_name)); + } + + switch (concatenated_array.data_type) { + case ArrayDataType::kFloat: + ConcatenateTensorBuffers( + input_arrays, concatenation_axis, &concatenated_array); + SetMinMaxForConcatenedArray(input_arrays, &concatenated_array); + break; + case ArrayDataType::kUint8: + ConcatenateTensorBuffers( + input_arrays, concatenation_axis, &concatenated_array); + break; + case ArrayDataType::kInt32: + ConcatenateTensorBuffers( + input_arrays, concatenation_axis, &concatenated_array); + break; + case ArrayDataType::kInt64: + ConcatenateTensorBuffers( + input_arrays, concatenation_axis, &concatenated_array); + break; + default: + LOG(FATAL) << "ArrayDataType not supported"; + } + + // Remove all the resolved arrays. + for (const string& input_name : concat_op->inputs) { + model->arrays.erase(input_name); + } + + // Remove concatenate operator + model->operators.erase(concat_it); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc new file mode 100644 index 0000000000..244adcc4c4 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc @@ -0,0 +1,76 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) { + const auto fakequant_it = model->operators.begin() + op_index; + const auto* fakequant_base_op = fakequant_it->get(); + if (fakequant_base_op->type != OperatorType::kFakeQuant) { + return false; + } + + const auto* fakequant_op = + static_cast(fakequant_base_op); + + // Yield until the fakequant MinMax has been resolved. + if (!fakequant_op->minmax) { + return false; + } + + // This transformation only applies when the input array is constant. + if (!IsConstantParameterArray(*model, fakequant_op->inputs[0])) { + return false; + } + + const auto& input_array = model->GetArray(fakequant_op->inputs[0]); + auto& output_array = model->GetArray(fakequant_op->outputs[0]); + CHECK(input_array.data_type == ArrayDataType::kFloat); + output_array.data_type = ArrayDataType::kFloat; + CHECK(!output_array.buffer); + const auto& input_buffer = input_array.GetBuffer(); + auto& output_buffer = output_array.GetMutableBuffer(); + const int size = input_buffer.data.size(); + output_buffer.data.resize(size); + QuantizationParams qparams; + GetQuantizationParamsFromMinMax( + model->flags, *fakequant_op->minmax, &qparams); + for (int i = 0; i < size; i++) { + const double src_val = input_buffer.data[i]; + const double unclamped_quantized_val = + std::round(qparams.zero_point + src_val / qparams.scale); + const double quantized_val = + std::min(255., std::max(0., unclamped_quantized_val)); + const double dst_val = qparams.scale * (quantized_val - qparams.zero_point); + output_buffer.data[i] = dst_val; + } + if (CountOpsWithInput(*model, fakequant_op->inputs[0]) == 1) { + model->arrays.erase(fakequant_op->inputs[0]); + } + model->operators.erase(fakequant_it); + + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tensorflow_shape.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tensorflow_shape.cc new file mode 100644 index 0000000000..8cc6db1619 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tensorflow_shape.cc @@ -0,0 +1,62 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool ResolveConstantTensorFlowShape::Run(Model* model, std::size_t op_index) { + const auto tfshape_it = model->operators.begin() + op_index; + const auto* tfshape_base_op = tfshape_it->get(); + if (tfshape_base_op->type != OperatorType::kTensorFlowShape) { + return false; + } + + const auto* tfshape_op = + static_cast(tfshape_base_op); + + const auto& input_array = model->GetArray(tfshape_op->inputs[0]); + auto& output_array = model->GetArray(tfshape_op->outputs[0]); + + // Yield until the input array's shape has been resolved. + if (!input_array.has_shape()) { + return false; + } + + // Create a buffer for the output array, making it a constant array, and + // copy the input shape into the output buffer. + CHECK(!output_array.buffer); + auto& output_buffer = output_array.GetMutableBuffer(); + output_buffer.data = input_array.shape().dims(); + + // Erase the input array if no longer used + if (IsDiscardableArray(*model, tfshape_op->inputs[0]) && + CountOpsWithInput(*model, tfshape_op->inputs[0]) == 1) { + model->arrays.erase(tfshape_op->inputs[0]); + } + model->operators.erase(tfshape_it); + + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc new file mode 100644 index 0000000000..bb9bda3c82 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc @@ -0,0 +1,175 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/runtime/types.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { + const auto unary_it = model->operators.begin() + op_index; + const auto* unary_op = unary_it->get(); + // Test for unary ops of types that we know how to resolve + if (unary_op->type != OperatorType::kTensorFlowRsqrt && + unary_op->type != OperatorType::kTensorFlowSqrt && + unary_op->type != OperatorType::kTensorFlowSquare && + unary_op->type != OperatorType::kTensorFlowSum && + unary_op->type != OperatorType::kTensorFlowMin && + unary_op->type != OperatorType::kTensorFlowMax && + unary_op->type != OperatorType::kTensorFlowReshape) { + return false; + } + // Check if the input is a constant parameter. + if (!IsConstantParameterArray(*model, unary_op->inputs[0])) { + return false; + } + + // if the unary op involves a tensor required by a rnn state, ignore it + for (const auto& rnn_state : model->flags.rnn_states()) { + if (unary_op->inputs[0] == rnn_state.back_edge_source_array()) { + return false; + } + if (unary_op->inputs[0] == rnn_state.state_array()) { + return false; + } + } + + // At the moment we don't want to care about fused activation functions. + // The idea is that we should do the present constants-propagation before + // activation functions get fused. + if (unary_op->fused_activation_function != + FusedActivationFunctionType::kNone) { + AddMessageF( + "Not resolving constant %s " + " because it has a fused activation function", + LogName(*unary_op)); + return false; + } + const auto& input_array = model->GetArray(unary_op->inputs[0]); + // We have already tested above for existence of buffers (synonymous to being + // a constant param). + CHECK(input_array.buffer); + // At the moment we only support float buffers. + if (input_array.buffer->type != ArrayDataType::kFloat) { + return false; + } + const auto& input_float_data = + input_array.GetBuffer().data; + // Create the float buffer on the output array, effectively turning it into + // a constant parameter + const auto& output_name = unary_op->outputs[0]; + auto& output_array = model->GetArray(output_name); + // Yield until the output array dims have been resolved. + if (!output_array.has_shape()) { + return false; + } + + int input_buffer_size = RequiredBufferSizeForShape(input_array.shape()); + int output_buffer_size = RequiredBufferSizeForShape(output_array.shape()); + const Shape& input_shape = input_array.shape(); + const Shape& output_shape = output_array.shape(); + + auto& output_float_data = + output_array.GetMutableBuffer().data; + output_float_data.resize(output_buffer_size); + + const int output_dims_count = output_shape.dimensions_count(); + if (unary_op->type == OperatorType::kTensorFlowReshape) { + CHECK(input_buffer_size == output_buffer_size); + memcpy(output_float_data.data(), input_float_data.data(), + input_buffer_size * sizeof(input_float_data[0])); + } else if (unary_op->type == OperatorType::kTensorFlowSum) { + // At the moment only full reduction across all dimensions is supported. + for (int i = 0; i < output_dims_count; i++) { + CHECK_EQ(output_shape.dims(i), 1); + } + float sum = 0.f; + const int input_size = RequiredBufferSizeForShape(input_shape); + for (int i = 0; i < input_size; i++) { + sum += input_float_data[i]; + } + output_float_data[0] = sum; + } else if (unary_op->type == OperatorType::kTensorFlowMin) { + // At the moment only full reduction across all dimensions is supported. + // TODO(starka): Output should not be padded. + for (int i = 0; i < output_dims_count; i++) { + CHECK_EQ(output_shape.dims(i), 1); + } + float min = input_float_data[0]; + const int input_size = RequiredBufferSizeForShape(input_shape); + for (int i = 0; i < input_size; i++) { + min = std::min(min, input_float_data[i]); + } + output_float_data[0] = min; + } else if (unary_op->type == OperatorType::kTensorFlowMax) { + // At the moment only full reduction across all dimensions is supported. + // TODO(starka): Output should not be padded. + for (int i = 0; i < output_dims_count; i++) { + CHECK_EQ(output_shape.dims(i), 1); + } + float max = input_float_data[0]; + const int input_size = RequiredBufferSizeForShape(input_shape); + for (int i = 0; i < input_size; i++) { + max = std::max(max, input_float_data[i]); + } + output_float_data[0] = max; + } else if (unary_op->type == OperatorType::kTensorFlowRsqrt || + unary_op->type == OperatorType::kTensorFlowSqrt || + unary_op->type == OperatorType::kTensorFlowSquare) { + // Element-wise ops. Should have perfectly matching sizes here. + const int input_size = RequiredBufferSizeForShape(input_shape); + for (int i = 0; i < output_dims_count; i++) { + CHECK_EQ(output_shape.dims(i), input_shape.dims(i)); + } + + for (int i = 0; i < input_size; i++) { + const float val = input_float_data[i]; + float outval = 0.f; + if (unary_op->type == OperatorType::kTensorFlowRsqrt) { + outval = 1.0f / std::sqrt(val); + } else if (unary_op->type == OperatorType::kTensorFlowSqrt) { + outval = std::sqrt(val); + } else if (unary_op->type == OperatorType::kTensorFlowSquare) { + outval = val * val; + } else { + LOG(FATAL) << "should not get here."; + } + output_float_data[i] = outval; + } + } else { + LOG(FATAL) << "should not get here."; + } + for (const auto& input : unary_op->inputs) { + if (CountOpsWithInput(*model, input) == 1) { + model->arrays.erase(input); + } + } + AddMessageF("Resolved constant %s to the equivalent constant array", + LogName(*unary_op)); + model->operators.erase(unary_it); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_mean_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_mean_attributes.cc new file mode 100644 index 0000000000..d25c773f19 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_mean_attributes.cc @@ -0,0 +1,51 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool ResolveMeanAttributes::Run(Model* model, std::size_t op_index) { + auto* mean_op = model->operators[op_index].get(); + if (mean_op->type != OperatorType::kMean) return false; + auto* op = static_cast(mean_op); + + if (!op->reduction_indices.empty()) return false; + if (op->inputs.size() != 2) return false; + if (!IsConstantParameterArray(*model, op->inputs[1])) return false; + + const auto& indices_array = *model->arrays[op->inputs[1]]; + if (!indices_array.has_shape()) return false; + + op->reduction_indices = indices_array.GetBuffer().data; + + // At the moment, we only support simultaneous reduction over width and + // height. This is mainly limited by the fact that currently, the runtime + // arrays are always 4-dimensional. + CHECK_EQ(op->reduction_indices.size(), 2); + CHECK((op->reduction_indices[0] == 1 && op->reduction_indices[1] == 2) || + (op->reduction_indices[0] == 2 && op->reduction_indices[1] == 1)); + + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_pad_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_pad_attributes.cc new file mode 100644 index 0000000000..d5f5869c62 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_pad_attributes.cc @@ -0,0 +1,55 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool ResolvePadAttributes::Run(Model* model, std::size_t op_index) { + const auto pad_it = model->operators.begin() + op_index; + auto* pad_op = pad_it->get(); + if (pad_op->type != OperatorType::kPad) return false; + + auto* op = static_cast(pad_op); + if (!op->left_padding.empty()) return false; + + CHECK_EQ(op->inputs.size(), 2); + if (!IsConstantParameterArray(*model, op->inputs[1])) return false; + + const auto& array = *model->arrays[op->inputs[1]]; + if (!array.has_shape()) return false; + + const std::vector& dims = array.shape().dims(); + CHECK_EQ(dims.size(), 2); + + std::vector buffer = array.GetBuffer().data; + + for (int i = 0; i < dims[0]; ++i) { + op->left_padding.push_back(buffer[i * 2]); + op->right_padding.push_back(buffer[i * 2 + 1]); + } + + // TODO(dkalenichenko): Delete the extra input? + + return true; +} +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc new file mode 100644 index 0000000000..8fa7b83bed --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc @@ -0,0 +1,93 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool ResolveReorderAxes::Run(Model* model, std::size_t op_index) { + auto reorder_it = model->operators.begin() + op_index; + auto* reorder_op = static_cast(reorder_it->get()); + if (reorder_op->type != OperatorType::kReorderAxes) { + return false; + } + const auto& input_array_name = reorder_op->inputs[0]; + const auto& output_array_name = reorder_op->outputs[0]; + auto& input_array = model->GetArray(input_array_name); + auto& output_array = model->GetArray(output_array_name); + string constant_input_array_name = input_array_name; + if (!input_array.buffer) { + const auto* op_producing_input = GetOpWithOutput(*model, input_array_name); + if (op_producing_input && + op_producing_input->type == OperatorType::kFakeQuant) { + constant_input_array_name = op_producing_input->inputs[0]; + } + } + auto& constant_input_array = model->GetArray(constant_input_array_name); + if (!constant_input_array.buffer) { + return false; + } + // Yield until output dims have been resolved. + if (!output_array.has_shape()) { + return false; + } + // Reorder the input array dims and buffer data + CHECK(constant_input_array.buffer->type == ArrayDataType::kFloat); + CHECK(!output_array.buffer); + auto& input_data = + constant_input_array.GetMutableBuffer().data; + std::vector reordered_data; + reordered_data.resize(RequiredBufferSizeForShape(output_array.shape())); + const auto input_axes_order = reorder_op->input_axes_order; + const auto output_axes_order = reorder_op->output_axes_order; + // TODO(b/62904716) Shapes should be used directly. + Shape input_shape = constant_input_array.shape(); + Shape output_shape = output_array.shape(); + if (AxesCount(input_axes_order) == 2) { + UnextendShape(&input_shape, 2); + UnextendShape(&output_shape, 2); + } + ShuffleArray(input_shape, input_axes_order, output_axes_order, output_shape, + input_data.data(), reordered_data.data()); + input_data = reordered_data; + input_array.copy_shape(output_array.shape()); + constant_input_array.copy_shape(output_array.shape()); + + // Update the edges of the graph to point to the input array + for (const auto& other_op : model->operators) { + for (auto& input : other_op->inputs) { + if (input == output_array_name) { + input = input_array_name; + } + } + } + + AddMessageF("Reordered axes for array %s", input_array_name); + + // Remove the op and output array. + model->arrays.erase(output_array_name); + model->operators.erase(reorder_it); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reshape_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reshape_attributes.cc new file mode 100644 index 0000000000..bed2a85bd2 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reshape_attributes.cc @@ -0,0 +1,49 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool ResolveReshapeAttributes::Run(Model* model, std::size_t op_index) { + const auto reshape_it = model->operators.begin() + op_index; + auto* reshape_op = reshape_it->get(); + if (reshape_op->type != OperatorType::kTensorFlowReshape) { + return false; + } + + auto* op = static_cast(reshape_op); + + if (!op->shape.empty()) return false; + + if (IsConstantParameterArray(*model, reshape_op->inputs[1])) { + const auto& constant_input_array = *model->arrays[reshape_op->inputs[1]]; + op->shape = constant_input_array.GetBuffer().data; + } + + if (op->shape.empty()) return false; + + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_slice_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_slice_attributes.cc new file mode 100644 index 0000000000..1d0a2ec8f6 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_slice_attributes.cc @@ -0,0 +1,52 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool ResolveSliceAttributes::Run(Model* model, std::size_t op_index) { + const auto slice_it = model->operators.begin() + op_index; + auto* slice_op = slice_it->get(); + if (slice_op->type != OperatorType::kSlice) return false; + + auto* op = static_cast(slice_op); + if (!op->begin.empty()) return false; + + CHECK_EQ(op->inputs.size(), 3); + if (!IsConstantParameterArray(*model, op->inputs[1])) return false; + if (!IsConstantParameterArray(*model, op->inputs[2])) return false; + + const auto& begin_array = *model->arrays[op->inputs[1]]; + if (!begin_array.has_shape()) return false; + + const auto& size_array = *model->arrays[op->inputs[2]]; + if (!size_array.has_shape()) return false; + + op->begin = begin_array.GetBuffer().data; + op->size = size_array.GetBuffer().data; + + // TODO(dkalenichenko): Delete the extra inputs? + + return true; +} +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc new file mode 100644 index 0000000000..5fc3b25bc1 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc @@ -0,0 +1,62 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool ResolveStridedSliceAttributes::Run(Model* model, std::size_t op_index) { + const auto slice_it = model->operators.begin() + op_index; + auto* slice_op = slice_it->get(); + if (slice_op->type != OperatorType::kStridedSlice) return false; + + auto* op = static_cast(slice_op); + if (!op->start_indices.empty()) return false; + + CHECK_EQ(op->inputs.size(), 4); + if (!IsConstantParameterArray(*model, op->inputs[1])) return false; + if (!IsConstantParameterArray(*model, op->inputs[2])) return false; + if (!IsConstantParameterArray(*model, op->inputs[3])) return false; + + const auto& start_array = *model->arrays[op->inputs[1]]; + if (!start_array.has_shape()) return false; + + const auto& stop_array = *model->arrays[op->inputs[2]]; + if (!stop_array.has_shape()) return false; + + const auto& stride_array = *model->arrays[op->inputs[3]]; + if (!stride_array.has_shape()) return false; + + op->start_indices = start_array.GetBuffer().data; + op->stop_indices = stop_array.GetBuffer().data; + op->strides = stride_array.GetBuffer().data; + + // Only 4D arrays are supported for now. + CHECK_EQ(op->start_indices.size(), 4); + CHECK_EQ(op->stop_indices.size(), 4); + CHECK_EQ(op->strides.size(), 4); + + // TODO(dkalenichenko): Delete the extra inputs? + + return true; +} +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc new file mode 100644 index 0000000000..b482f5cf51 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc @@ -0,0 +1,86 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool ResolveTensorFlowConcat::Run(Model* model, std::size_t op_index) { + auto concat_it = model->operators.begin() + op_index; + const auto* tf_concat_op = concat_it->get(); + if (tf_concat_op->type != OperatorType::kTensorFlowConcat && + tf_concat_op->type != OperatorType::kTensorFlowConcatV2) { + return false; + } + + CHECK_GE(tf_concat_op->inputs.size(), 2); + // TensorFlow Concat and ConcatV2 nodes only differ by the ordering + // of inputs: in Concat, the concat_dim is the first input, while in + // ConcatV2, it is the last input. + std::size_t concat_dim_pos = 0; + if (tf_concat_op->type == OperatorType::kTensorFlowConcatV2) { + concat_dim_pos = tf_concat_op->inputs.size() - 1; + } + const string concat_dim_name = tf_concat_op->inputs[concat_dim_pos]; + std::vector concat_input_names; + for (std::size_t i = 0; i < tf_concat_op->inputs.size(); i++) { + if (i != concat_dim_pos) { + concat_input_names.push_back(tf_concat_op->inputs[i]); + } + } + // If the concat_dim array hasn't been resolved to a constant yet, + // we need to yield. + const auto& concat_dim_array = model->GetArray(concat_dim_name); + if (!concat_dim_array.buffer) { + AddMessageF("Waiting for the concat_dim of %s to be resolved to a constant", + LogName(*tf_concat_op)); + return false; + } + + CHECK(concat_dim_array.data_type == ArrayDataType::kInt32); + const auto& concat_dim_data = + concat_dim_array.GetBuffer().data; + CHECK_EQ(concat_dim_data.size(), 1); + const int concat_dim = concat_dim_data[0]; + + // Create the Concatenation op replacing the TensorFlowConcat op. + auto* concatenation_op = new ConcatenationOperator; + concatenation_op->concat_dim = concat_dim; + concatenation_op->inputs = concat_input_names; + concatenation_op->outputs = {tf_concat_op->outputs[0]}; + auto depth_concat_it = model->operators.emplace(concat_it, concatenation_op); + CHECK_EQ(depth_concat_it->get(), concatenation_op); + // Update invalidated iterator + concat_it = depth_concat_it + 1; + CHECK_EQ(concat_it->get(), tf_concat_op); + + // Remove the concat_dim array if it is not used by anything else. + if (CountOpsWithInput(*model, concat_dim_name) == 1) { + model->arrays.erase(concat_dim_name); + } + // Remove the TensorFlowConcat op + model->operators.erase(concat_it); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc new file mode 100644 index 0000000000..bea7487051 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc @@ -0,0 +1,106 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool ResolveTensorFlowMatMul::Run(Model* model, std::size_t op_index) { + auto matmul_it = model->operators.begin() + op_index; + if (matmul_it->get()->type != OperatorType::kTensorFlowMatMul) { + return false; + } + const auto* matmul_op = matmul_it->get(); + + // Find the op producing the array passed to this MatMul + auto previous_op_it = model->operators.begin(); + bool found = false; + for (; previous_op_it != model->operators.end(); ++previous_op_it) { + for (const auto& output : (*previous_op_it)->outputs) { + if (output == matmul_op->inputs[0]) { + found = true; + break; + } + } + if (found) { + break; + } + } + Operator* previous_op = (found) ? previous_op_it->get() : nullptr; + + // construct the new FullyConnectedOperator + auto* fc_op = new FullyConnectedOperator; + fc_op->outputs = matmul_op->outputs; + + // insert the newly constructed FullyConnectedOperator + auto fc_it = model->operators.emplace(matmul_it, fc_op); + + // refresh invalidated iterator + matmul_it = fc_it + 1; + DCHECK_EQ(matmul_it->get(), matmul_op); + + // The way that TensorFlow encodes FullyConnected ops is as a pair + // (Reshape, MatMul), so we want to remove the Reshape op and rewrite the + // MatMul + // op as a FullyConnected. However, TensorFlow skips the Reshape ops if the + // input doesn't need reshaping, so we can't just match (Reshape, MatMul) + // pairs. + if (previous_op && previous_op->type == OperatorType::kTensorFlowReshape) { + AddMessageF("Combining %s and %s into %s", LogName(*previous_op), + LogName(*matmul_op), LogName(*fc_op)); + const auto& previous_op_output = previous_op->outputs[0]; + if (CountOpsWithInput(*model, previous_op_output) == 1) { + model->arrays.erase(previous_op_output); + } + CHECK_EQ(previous_op->inputs.size(), 2); + fc_op->inputs = {previous_op->inputs[0], matmul_op->inputs[1]}; + // Only remove Reshape node if no other node uses its output. + if (CountOpsWithInput(*model, previous_op_output) == 1) { + const auto& previous_op_shape = previous_op->inputs[1]; + if (CountOpsWithInput(*model, previous_op_shape) == 1 && + !GetOpWithOutput(*model, previous_op_shape)) { + model->arrays.erase(previous_op_shape); + } + model->operators.erase(previous_op_it); + } + + // We may have just invalidated matmul_it, so let's refresh it now. + matmul_it = model->operators.begin(); + for (; matmul_it != model->operators.end(); ++matmul_it) { + if (matmul_it->get() == matmul_op) { + break; + } + } + CHECK(matmul_it != model->operators.end()); + CHECK(matmul_it->get() == matmul_op); + } else { + AddMessageF("Replacing %s by a FullyConnected operator", + LogName(*matmul_op)); + fc_op->inputs = {matmul_op->inputs[0], matmul_op->inputs[1]}; + } + + // erase the MatMul operator + model->operators.erase(matmul_it); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc new file mode 100644 index 0000000000..cfa5ce0716 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc @@ -0,0 +1,63 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool ResolveTensorFlowMerge::Run(Model* model, std::size_t op_index) { + const auto merge_it = model->operators.begin() + op_index; + const auto* merge_op = merge_it->get(); + if (merge_op->type != OperatorType::kTensorFlowMerge) { + return false; + } + + // We need to yield until this Merge node has only 1 input, which will mean + // that that is the selected input. Other graph transformations on other nodes + // such as ResolveTensorFlowSwitch, will take care of trimming the + // non-selected inputs, so that at some point there will be only 1 input left. + if (merge_op->inputs.size() > 1) { + AddMessageF("Waiting for %s to be resolved", LogName(*merge_op)); + return false; + } + + // Now that the merge node has 1 input exactly, it is the same as an Identity + // node and can be resolved trivially. + CHECK_EQ(merge_op->inputs.size(), 1); + + // Update the edges of the graph ahead of removing the node. + for (const auto& other_op : model->operators) { + for (auto& input : other_op->inputs) { + if (input == merge_op->outputs[0]) { + input = merge_op->inputs[0]; + } + } + } + + // Remove the node and its output array. + AddMessageF("Removing already-resolved %s", LogName(*merge_op)); + model->arrays.erase(merge_op->outputs[0]); + model->operators.erase(merge_it); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_squeeze.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_squeeze.cc new file mode 100644 index 0000000000..1d3f42b5ec --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_squeeze.cc @@ -0,0 +1,54 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool ResolveTensorFlowSqueeze::Run(Model* model, std::size_t op_index) { + const auto squeeze_it = model->operators.begin() + op_index; + const auto* squeeze_op = squeeze_it->get(); + if (squeeze_op->type != OperatorType::kSqueeze) { + return false; + } + + CHECK_EQ(squeeze_op->inputs.size(), 1); + CHECK_EQ(squeeze_op->outputs.size(), 1); + + // If the output is consumed by a reshape op, it's a trivial squeeze. + if (CountOpsWithInput(*model, squeeze_op->outputs[0]) == 1) { + const auto* next_op = GetOpWithInput(*model, squeeze_op->outputs[0]); + if (next_op->type == OperatorType::kTensorFlowReshape) { + AddMessageF( + "%s is trivial because its output is only consumed by a " + "Reshape op", + LogName(*squeeze_op)); + + return RemoveTrivialPassthroughOp(this, model, op_index); + } + } + + return false; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc new file mode 100644 index 0000000000..55adfca037 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc @@ -0,0 +1,123 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool ResolveTensorFlowSwitch::Run(Model* model, std::size_t op_index) { + const auto switch_it = model->operators.begin() + op_index; + const auto* switch_op = switch_it->get(); + if (switch_op->type != OperatorType::kTensorFlowSwitch) { + return false; + } + + CHECK_EQ(switch_op->inputs.size(), 2); + CHECK_EQ(switch_op->outputs.size(), 2); + const string& predicate_name = switch_op->inputs[1]; + // If the predicate array hasn't been resolved to a constant yet, + // we need to yield. + if (!IsConstantParameterArray(*model, predicate_name)) { + AddMessageF( + "Waiting for the boolean predicate of %s to be resolved to a constant", + LogName(*switch_op)); + return false; + } + + // The predicate should be boolean, and should consist of a single value. + const auto& predicate_array = model->GetArray(predicate_name); + CHECK(predicate_array.data_type == ArrayDataType::kBool); + for (const auto& dim : predicate_array.shape().dims()) { + CHECK_EQ(dim, 1); + } + + // Obtain the predicate boolean value. + const auto& predicate_data = + predicate_array.GetBuffer().data; + CHECK_EQ(predicate_data.size(), 1); + const bool predicate_value = predicate_data[0]; + + // From the TensorFlow docs on .switch() in + // third_party/tensorflow/python/ops/control_flow_ops.py + // + // If `pred` is false, the `data` input is forwared to the first output. + // Otherwise, the data goes to the second output. + // + // Note that this comment used to say the opposite and was recently fixed: + // https://github.com/tensorflow/tensorflow/commit/bc456e361d49d1d89a74b80060c70efb51fd7d87#diff-76ab9dafbe12c20ddc3769c6b108986c + const int selected_output_index = predicate_value ? 1 : 0; + const int nonselected_output_index = predicate_value ? 0 : 1; + + // Update the edges of the graph ahead of removing the node: + // edges that were pointing to the selected output, should instead + // point to the input of the Switch node. + for (const auto& other_op : model->operators) { + for (auto& input : other_op->inputs) { + if (input == switch_op->outputs[selected_output_index]) { + input = switch_op->inputs[0]; + } + } + } + + // There remains to handle the edges that were pointing to the nonselected + // output. We will just discard those edges. Concretely, at the moment, + // our only examples of graphs with Switch nodes have them feeding into Merge + // nodes, so what we're saying here is that we'll make the convention, + // in our toco internal representation, that Merge nodes with only 1 input + // are Merge nodes that have been resolved already and should be have as + // Identity nodes, simply forwarding their input. + // + for (const auto& other_op : model->operators) { + auto input_it = other_op->inputs.begin(); + while (input_it != other_op->inputs.end()) { + if (*input_it == switch_op->outputs[nonselected_output_index]) { + // Let us guard our assumption that only Merge nodes consume the outputs + // of Switch nodes: + CHECK(other_op->type == OperatorType::kTensorFlowMerge); + input_it = other_op->inputs.erase(input_it); + } else { + ++input_it; + } + } + } + + // Remove the output arrays if they are now unused. + for (int i = 0; i < 2; i++) { + if (!GetOpWithInput(*model, switch_op->outputs[i])) { + model->arrays.erase(switch_op->outputs[i]); + } + } + // Remove input arrays if they are only used by the switch itself and aren't + // the output of another op (will get handled by RemoveUnusedOp in that case). + for (const auto& input : switch_op->inputs) { + if (CountOpsWithInput(*model, input) == 1 && + !GetOpWithOutput(*model, input)) { + model->arrays.erase(input); + } + } + // Remove the switch node itself. + AddMessageF("Removing already-resolved %s", LogName(*switch_op)); + model->operators.erase(switch_it); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_tile.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_tile.cc new file mode 100644 index 0000000000..9f7e7c42a2 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_tile.cc @@ -0,0 +1,97 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +void RemoveTileOperator(Model* model, Operator* tile_op, Operator* binary_op, + int operand_index) { + CHECK(tile_op->type == OperatorType::kTensorFlowTile); + CHECK_EQ(binary_op->inputs.size(), 2); + CHECK_EQ(tile_op->inputs.size(), 2); + const string tile_multiplier_array = tile_op->inputs[1]; + const string tile_output_array = tile_op->outputs[0]; + binary_op->inputs[operand_index] = tile_op->inputs[0]; + auto tile_it = model->operators.begin(); + for (; tile_it != model->operators.end(); ++tile_it) { + if (tile_it->get() == tile_op) { + break; + } + } + CHECK(tile_it != model->operators.end()); + CHECK(tile_it->get() == tile_op); + model->operators.erase(tile_it); + if (!CountOpsWithInput(*model, tile_multiplier_array) && + !GetOpWithOutput(*model, tile_multiplier_array)) { + model->arrays.erase(tile_multiplier_array); + } + if (!CountOpsWithInput(*model, tile_output_array)) { + model->arrays.erase(tile_output_array); + } +} +} // namespace + +bool ResolveTensorFlowTile::Run(Model* model, std::size_t op_index) { + const auto binary_it = model->operators.begin() + op_index; + auto* binary_op = binary_it->get(); + // Test for binary ops of types that we know how to resolve + if (binary_op->inputs.size() != 2) { + return false; + } + if (binary_op->type != OperatorType::kAdd && + binary_op->type != OperatorType::kMul && + binary_op->type != OperatorType::kSub && + binary_op->type != OperatorType::kDiv) { + return false; + } + + Operator* const op[2] = { + GetOpWithOutput(*model, binary_op->inputs[0]), + GetOpWithOutput(*model, binary_op->inputs[1]), + }; + + // In the unlikely case where both operands are Tile, we can't infer the + // output + // size without the Tile nodes, so we have to bail out. + if (op[0] && op[0]->type == OperatorType::kTensorFlowTile && op[1] && + op[1]->type == OperatorType::kTensorFlowTile) { + return false; + } + + for (int i = 0; i < 2; i++) { + if (op[i] && op[i]->type == OperatorType::kTensorFlowTile) { + // We can only remove a Tile operator is no other op than the present + // binary op was consuming its tiled output. + if (CountOpsWithInput(*model, binary_op->inputs[i]) == 1) { + AddMessageF("Removing %s", LogName(*op[i])); + RemoveTileOperator(model, op[i], binary_op, i); + return true; + } + } + } + return false; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD b/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD new file mode 100644 index 0000000000..8931498782 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD @@ -0,0 +1,31 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +load( + "//tensorflow:tensorflow.bzl", + "tf_cc_test", +) + +tf_cc_test( + name = "resolve_constant_concatenation_test", + srcs = ["resolve_constant_concatenation_test.cc"], + deps = [ + "//tensorflow/contrib/lite/toco:graph_transformations", + "//tensorflow/contrib/lite/toco:model", + "//tensorflow/contrib/lite/toco:tooling_util", + "@com_google_googletest//:gtest_main", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc new file mode 100644 index 0000000000..c6705ad305 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc @@ -0,0 +1,221 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include +#include +//#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" + +namespace toco { + +namespace { +// A gmock matcher that check that elements of a float vector match to a given +// tolerance. +std::vector> ArrayFloatNear( + const std::vector& values, float max_abs_error = 1e-5) { + std::vector> matchers; + matchers.reserve(values.size()); + for (const float& v : values) { + matchers.emplace_back(testing::FloatNear(v, max_abs_error)); + } + return matchers; +} +} // namespace + +// The following 3 tests make sure the concatenation operation on different axis +// values match TensorFlow results listed below: +// +// x0 = [[[0, 1], [2, 3]], [[4, 5], [6, 7]]] +// x1 = [[[10, 11], [12, 13]], [[14, 15], [16, 17]]] +// x2 = [[[20, 21], [22, 23]], [[24, 25], [26, 27]]] +// x3 = [[[30, 31], [32, 33]], [[34, 35], [36, 37]]] +// +// ConcatAtAxis0 test: +// t0 = tf.concat([x0, x1, x2, x3], 0) +// [[[ 0 1] +// [ 2 3]] +// +// [[ 4 5] +// [ 6 7]] +// +// [[10 11] +// [12 13]] +// +// [[14 15] +// [16 17]] +// +// [[20 21] +// [22 23]] +// +// [[24 25] +// [26 27]] +// +// [[30 31] +// [32 33]] +// +// [[34 35] +// [36 37]]] +// +// ConcatAtAxis1 test: +// t1 = tf.concat([x0, x1, x2, x3], 1) +// [[[ 0 1] +// [ 2 3] +// [10 11] +// [12 13] +// [20 21] +// [22 23] +// [30 31] +// [32 33]] +// +// [[ 4 5] +// [ 6 7] +// [14 15] +// [16 17] +// [24 25] +// [26 27] +// [34 35] +// [36 37]]] +// +// ConcatAtAxis2 test: +// t2 = tf.concat([x0, x1, x2, x3], 2) +// [[[ 0 1 10 11 20 21 30 31] +// [ 2 3 12 13 22 23 32 33]] +// +// [[ 4 5 14 15 24 25 34 35] +// [ 6 7 16 17 26 27 36 37]]] + +class ResolveConstantConcatenationTest : public ::testing::Test { + protected: + ResolveConstantConcatenationTest() {} + + // Prepare a hypothetical TOCO model with one Concatenation operator in it + // together with 4 arrays as its inputs. + // It receives the dimension of concatenation as input. + void PrepareModel(Model* model, int concat_dim) { + std::vector concat_input_names = {"array0", "array1", "array2", + "array3"}; + + const int kDim = 3; + const int kElementPerDim = 2; + const int kBufSize = 8; + const int kNumArrays = 4; + static float in_buf[kNumArrays][kBufSize] = { + {0., 1., 2., 3., 4., 5., 6., 7.}, + {10., 11., 12., 13., 14., 15., 16., 17.}, + {20., 21., 22., 23., 24., 25., 26., 27.}, + {30., 31., 32., 33., 34., 35., 36., 37.}}; + int cnt = 0; + for (const string& concat_input_name : concat_input_names) { + Array& in_array = model->GetOrCreateArray(concat_input_name); + in_array.data_type = ArrayDataType::kFloat; + + // Initialize shape for the input array. + Shape* in_array_shape = in_array.mutable_shape(); + std::vector* in_array_shape_dim = in_array_shape->mutable_dims(); + for (int i = 0; i < kDim; i++) { + in_array_shape_dim->push_back(kElementPerDim); + } + auto& in_array_buffer = + in_array.GetMutableBuffer(); + in_array_buffer.data.resize(kBufSize); + float* buf_ptr = + in_array.GetMutableBuffer().data.data(); + std::copy(in_buf[cnt], in_buf[cnt] + kBufSize, buf_ptr); + cnt++; + } + auto* concatenation_op = new ConcatenationOperator; + concatenation_op->concat_dim = concat_dim; + concatenation_op->inputs = concat_input_names; + concatenation_op->outputs = {"concat_op_outputs"}; + Array& out_array = model->GetOrCreateArray(concatenation_op->outputs[0]); + out_array.data_type = ArrayDataType::kFloat; + Shape* out_array_shape = out_array.mutable_shape(); + std::vector* out_array_shape_dim = out_array_shape->mutable_dims(); + out_array_shape_dim->resize(kDim); + for (int i = 0; i < kDim; i++) { + if (i == concat_dim) { + (*out_array_shape_dim)[i] = kNumArrays * kElementPerDim; + } else { + (*out_array_shape_dim)[i] = kElementPerDim; + } + } + model->operators.push_back(std::unique_ptr(concatenation_op)); + } +}; + +TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis0) { + Model model; + const int concat_dim = 0; + PrepareModel(&model, concat_dim); + + GraphTransformationsSet graph_transformation_set; + graph_transformation_set.Add(new toco::ResolveConstantConcatenation); + EXPECT_THAT(model.arrays.size(), 5); + (*graph_transformation_set.begin())->Run(&model, /*op_index=*/0); + EXPECT_THAT(model.arrays.size(), 1); + + auto& concatenated_array = (*model.arrays.begin()).second; + EXPECT_THAT(concatenated_array->GetBuffer().data, + ElementsAreArray(ArrayFloatNear( + {0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., + 13., 14., 15., 16., 17., 20., 21., 22., 23., 24., 25., + 26., 27., 30., 31., 32., 33., 34., 35., 36., 37.}))); +} + +TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis1) { + Model model; + const int concat_dim = 1; + PrepareModel(&model, concat_dim); + + GraphTransformationsSet graph_transformation_set; + graph_transformation_set.Add(new toco::ResolveConstantConcatenation); + EXPECT_THAT(model.arrays.size(), 5); + (*graph_transformation_set.begin())->Run(&model, /*op_index=*/0); + EXPECT_THAT(model.arrays.size(), 1); + + auto& concatenated_array = (*model.arrays.begin()).second; + EXPECT_THAT(concatenated_array->GetBuffer().data, + ElementsAreArray(ArrayFloatNear( + {0., 1., 2., 3., 10., 11., 12., 13., 20., 21., 22., + 23., 30., 31., 32., 33., 4., 5., 6., 7., 14., 15., + 16., 17., 24., 25., 26., 27., 34., 35., 36., 37.}))); +} + +TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis2) { + Model model; + const int concat_dim = 2; + PrepareModel(&model, concat_dim); + + GraphTransformationsSet graph_transformation_set; + graph_transformation_set.Add(new toco::ResolveConstantConcatenation); + EXPECT_THAT(model.arrays.size(), 5); + (*graph_transformation_set.begin())->Run(&model, /*op_index=*/0); + EXPECT_THAT(model.arrays.size(), 1); + + auto& concatenated_array = (*model.arrays.begin()).second; + EXPECT_THAT(concatenated_array->GetBuffer().data, + ElementsAreArray(ArrayFloatNear( + {0., 1., 10., 11., 20., 21., 30., 31., 2., 3., 12., + 13., 22., 23., 32., 33., 4., 5., 14., 15., 24., 25., + 34., 35., 6., 7., 16., 17., 26., 27., 36., 37.}))); +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/unfuse_activation_functions.cc b/tensorflow/contrib/lite/toco/graph_transformations/unfuse_activation_functions.cc new file mode 100644 index 0000000000..4e273343df --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/unfuse_activation_functions.cc @@ -0,0 +1,73 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/runtime/types.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool UnfuseActivationFunctions::Run(Model* model, std::size_t op_index) { + const auto it = model->operators.begin() + op_index; + auto* op = it->get(); + + // If a conv operation has an im2col array, yield: it should be dropped first. + if ((op->type == OperatorType::kConv) && (op->outputs.size() == 2)) { + return false; + } + + Operator* ac_op = nullptr; + switch (op->fused_activation_function) { + case FusedActivationFunctionType::kRelu: + ac_op = new ReluOperator; + break; + case FusedActivationFunctionType::kRelu6: + ac_op = new Relu6Operator; + break; + case FusedActivationFunctionType::kRelu1: + ac_op = new Relu1Operator; + break; + default: + return false; + } + + // At this point we know that the op has a fused activation function. At the + // moment that only happens with ops having a single output, may be + // relaxed in the future. + CHECK_EQ(op->outputs.size(), 1); + + // Emplace unfused activation function, drop the fused one. + model->operators.emplace(it + 1, ac_op); + op->fused_activation_function = FusedActivationFunctionType::kNone; + + // Wire up arrays, constructing a new intermediate array to connect the + // op to its new unfused activation function. + ac_op->outputs = op->outputs; + const string& tmp_array_name = + AvailableArrayName(*model, op->outputs[0] + "_unfused"); + CHECK(!model->arrays.count(tmp_array_name)); + model->GetOrCreateArray(tmp_array_name); + ac_op->inputs = {tmp_array_name}; + op->outputs = {tmp_array_name}; + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc new file mode 100644 index 0000000000..c889149ada --- /dev/null +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -0,0 +1,1508 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "google/protobuf/map.h" +#include "google/protobuf/text_format.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" +#include "absl/strings/strip.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/model_flags.pb.h" +#include "tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_cluster.h" +#include "tensorflow/contrib/lite/toco/tensorflow_util.h" +#include "tensorflow/contrib/lite/toco/toco_port.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/process_function_library_runtime.h" +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/function.pb.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/graph/graph_constructor.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/public/version.h" + +using tensorflow::AttrValue; +using tensorflow::DT_BOOL; +using tensorflow::DT_FLOAT; +using tensorflow::DT_INT32; +using tensorflow::DT_INT64; +using tensorflow::DT_UINT8; +using tensorflow::GraphDef; +using tensorflow::NodeDef; +using tensorflow::TensorProto; +using tensorflow::TensorShapeProto; + +namespace toco { +namespace { +bool HasAttr(const NodeDef& node, const string& attr_name) { + return node.attr().count(attr_name) > 0; +} + +const string& GetStringAttr(const NodeDef& node, const string& attr_name) { + CHECK(HasAttr(node, attr_name)); + const auto& attr = node.attr().at(attr_name); + CHECK_EQ(attr.value_case(), AttrValue::kS); + return attr.s(); +} + +int GetIntAttr(const NodeDef& node, const string& attr_name) { + CHECK(HasAttr(node, attr_name)) << attr_name << " not found in:\n" + << node.DebugString(); + const auto& attr = node.attr().at(attr_name); + CHECK_EQ(attr.value_case(), AttrValue::kI); + return attr.i(); +} + +float GetFloatAttr(const NodeDef& node, const string& attr_name) { + CHECK(HasAttr(node, attr_name)); + const auto& attr = node.attr().at(attr_name); + CHECK_EQ(attr.value_case(), AttrValue::kF); + return attr.f(); +} + +bool GetBoolAttr(const NodeDef& node, const string& attr_name) { + CHECK(HasAttr(node, attr_name)); + const auto& attr = node.attr().at(attr_name); + CHECK_EQ(attr.value_case(), AttrValue::kB); + return attr.b(); +} + +tensorflow::DataType GetDataTypeAttr(const NodeDef& node, + const string& attr_name) { + CHECK(HasAttr(node, attr_name)); + const auto& attr = node.attr().at(attr_name); + CHECK_EQ(attr.value_case(), AttrValue::kType); + return attr.type(); +} + +const TensorShapeProto& GetShapeAttr(const NodeDef& node, + const string& attr_name) { + CHECK(HasAttr(node, attr_name)); + const auto& attr = node.attr().at(attr_name); + CHECK_EQ(attr.value_case(), AttrValue::kShape); + return attr.shape(); +} + +const TensorProto& GetTensorAttr(const NodeDef& node, const string& attr_name) { + CHECK(HasAttr(node, attr_name)); + const auto& attr = node.attr().at(attr_name); + CHECK_EQ(attr.value_case(), AttrValue::kTensor); + return attr.tensor(); +} + +const AttrValue::ListValue& GetListAttr(const NodeDef& node, + const string& attr_name) { + CHECK(HasAttr(node, attr_name)); + const auto& attr = node.attr().at(attr_name); + CHECK_EQ(attr.value_case(), AttrValue::kList); + return attr.list(); +} + +ArrayDataType ConvertDataType(tensorflow::DataType dtype) { + if (dtype == DT_UINT8) + return ArrayDataType::kUint8; + else if (dtype == DT_FLOAT) + return ArrayDataType::kFloat; + else if (dtype == DT_BOOL) + return ArrayDataType::kBool; + else if (dtype == DT_INT32) + return ArrayDataType::kInt32; + else if (dtype == DT_INT64) + return ArrayDataType::kInt64; + else + LOG(INFO) << "Unsupported data type in placehoder op: " << dtype; + return ArrayDataType::kNone; +} + +void ImportShape(const TFLITE_PROTO_NS::RepeatedPtrField< + tensorflow::TensorShapeProto_Dim>& input_dims, + Shape* shape) { + std::vector input_dims_only_sizes; + for (auto& d : input_dims) { + if (d.size() == 0) { + // Some TensorFlow shapes contain a 0 dim, effectively making + // them of flat size 0 even though they have other nonzero dims. + // This breaks our invariant, that array dims can't be 0. + // For now, tweaking this to record a 0-D shape instead. + input_dims_only_sizes.clear(); + break; + } + input_dims_only_sizes.push_back(d.size()); + } + *shape->mutable_dims() = input_dims_only_sizes; +} + +void ImportFloatArray(const TensorProto& input_tensor, Array* output_array) { + CHECK_EQ(input_tensor.dtype(), DT_FLOAT); + const auto& input_shape = input_tensor.tensor_shape(); + CHECK_LE(input_shape.dim_size(), 4); + ImportShape(input_shape.dim(), output_array->mutable_shape()); + int input_flat_size = 1; + for (int k = 0; k < input_shape.dim_size(); k++) { + input_flat_size *= input_shape.dim(k).size(); + } + auto& output_float_data = + output_array->GetMutableBuffer().data; + output_float_data.resize(input_flat_size); + if (input_tensor.float_val_size()) { + for (int i = 0; i < input_tensor.float_val_size(); i++) { + output_float_data[i] = input_tensor.float_val(i); + } + } else if (input_tensor.tensor_content().size() == + input_flat_size * sizeof(float)) { + toco::port::CopyToBuffer(input_tensor.tensor_content(), + reinterpret_cast(output_float_data.data())); + } else { + LOG(FATAL) << "Neither input_content nor float_val have the right " + "dimensions for this float tensor."; + } +} + +void ImportInt32Array(const TensorProto& input_tensor, Array* output_array) { + CHECK_EQ(input_tensor.dtype(), DT_INT32); + const auto& input_shape = input_tensor.tensor_shape(); + CHECK_LE(input_shape.dim_size(), 4); + ImportShape(input_shape.dim(), output_array->mutable_shape()); + int input_flat_size = 1; + for (int k = 0; k < input_shape.dim_size(); k++) { + input_flat_size *= input_shape.dim(k).size(); + } + auto& output_int_data = + output_array->GetMutableBuffer().data; + output_int_data.resize(input_flat_size); + if (input_tensor.int_val_size()) { + for (int i = 0; i < input_tensor.int_val_size(); i++) { + output_int_data[i] = input_tensor.int_val(i); + } + } else if (input_tensor.tensor_content().size() == + input_flat_size * sizeof(int32)) { + toco::port::CopyToBuffer(input_tensor.tensor_content(), + reinterpret_cast(output_int_data.data())); + } else { + LOG(FATAL) << "Neither input_content nor int_val have the right " + "dimensions for this int32 tensor."; + } +} + +void ImportInt64Array(const TensorProto& input_tensor, Array* output_array) { + CHECK_EQ(input_tensor.dtype(), DT_INT64); + const auto& input_shape = input_tensor.tensor_shape(); + CHECK_LE(input_shape.dim_size(), 4); + ImportShape(input_shape.dim(), output_array->mutable_shape()); + int input_flat_size = 1; + for (int k = 0; k < input_shape.dim_size(); k++) { + input_flat_size *= input_shape.dim(k).size(); + } + auto& output_int_data = + output_array->GetMutableBuffer().data; + output_int_data.resize(input_flat_size); + if (input_tensor.int64_val_size()) { + for (int i = 0; i < input_tensor.int64_val_size(); i++) { + output_int_data[i] = input_tensor.int64_val(i); + } + } else if (input_tensor.tensor_content().size() == + input_flat_size * sizeof(int64)) { + toco::port::CopyToBuffer(input_tensor.tensor_content(), + reinterpret_cast(output_int_data.data())); + } else { + LOG(FATAL) << "Neither input_content nor int64_val have the right " + "dimensions for this int64 tensor."; + } +} + +// Count the number of inputs of a given node. If `drop_control_dependency` is +// true, count the number of non-control-dependency inputs. +size_t GetInputsCount(const NodeDef& node, bool drop_control_dependency) { + if (drop_control_dependency) { + for (size_t i = 0; i < node.input_size(); ++i) { + if (node.input(i)[0] == '^') { + LOG(INFO) << "Reached first control dependency input: " + << node.input(i); + return i; + } + } + return node.input_size(); + } else { + return node.input_size(); + } +} + +void ConvertConstOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Const"); + const auto& tensor = GetTensorAttr(node, "value"); + const auto dtype = GetDataTypeAttr(node, "dtype"); + + auto& array = model->GetOrCreateArray(node.name()); + array.data_type = dtype == DT_FLOAT + ? ArrayDataType::kFloat + : dtype == DT_INT32 + ? ArrayDataType::kInt32 + : dtype == DT_INT64 ? ArrayDataType::kInt64 + : ArrayDataType::kNone; + if (dtype == DT_FLOAT) { + ImportFloatArray(tensor, &array); + } else if (dtype == DT_INT32) { + ImportInt32Array(tensor, &array); + } else if (dtype == DT_INT64) { + ImportInt64Array(tensor, &array); + } else { + // do nothing, silently ignore the Const data. For example, there are consts + // of string type. We just make a dummy buffer to indicate that this array + // does not rely on external input. + array.GetMutableBuffer(); + } +} + +void ConvertConvOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Conv2D"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + + // We only support NHWC, which is the default data_format. + // So if data_format is not defined, we're all good. + if (node.attr().count("data_format")) { + CHECK_EQ(GetStringAttr(node, "data_format"), "NHWC"); + } + CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT); + + const auto& input_name = node.input(0); + const auto& weights_name = node.input(1); + const auto& reordered_weights_name = weights_name + "_reordered"; + // Check if a ReorderAxesOperator was already created for these weights + // (that happens when multiple layers share the same weights). + const Operator* existing_reorder = + GetOpWithOutput(*model, reordered_weights_name); + if (existing_reorder) { + // Check that it is safe to rely on the _reordered naming of the output + // array! + CHECK(existing_reorder->type == OperatorType::kReorderAxes); + } else { + // Create a new ReorderAxesOperator + auto* reorder = new ReorderAxesOperator; + reorder->inputs = {weights_name}; + reorder->outputs = {reordered_weights_name}; + reorder->input_axes_order = AxesOrder::kHWIO; + reorder->output_axes_order = AxesOrder::kOHWI; + model->operators.emplace_back(reorder); + } + auto* conv = new ConvOperator; + conv->inputs = {input_name, reordered_weights_name}; + conv->outputs = {node.name()}; + const auto& strides = GetListAttr(node, "strides"); + CHECK_EQ(strides.i_size(), 4); + CHECK_EQ(strides.i(0), 1); + CHECK_EQ(strides.i(3), 1); + conv->stride_height = strides.i(1); + conv->stride_width = strides.i(2); + const auto& padding = GetStringAttr(node, "padding"); + if (padding == "SAME") { + conv->padding.type = PaddingType::kSame; + } else if (padding == "VALID") { + conv->padding.type = PaddingType::kValid; + } else { + LOG(FATAL) << "Bad padding (only SAME and VALID are supported)"; + } + model->operators.emplace_back(conv); +} + +void ConvertDepthwiseConvOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "DepthwiseConv2dNative"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + + // We only support NHWC, which is the default data_format. + // So if data_format is not defined, we're all good. + if (node.attr().count("data_format")) { + CHECK_EQ(GetStringAttr(node, "data_format"), "NHWC"); + } + CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT); + + const auto& input_name = node.input(0); + const auto& weights_name = node.input(1); + const auto& reordered_weights_name = weights_name + "_reordered"; + // Check if a ReorderAxesOperator was already created for these weights + // (that happens when multiple layers share the same weights). + const Operator* existing_reorder = + GetOpWithOutput(*model, reordered_weights_name); + if (existing_reorder) { + // Check that it is safe to rely on the _reordered naming of the output + // array! + CHECK(existing_reorder->type == OperatorType::kReorderAxes); + } else { + // Create a new ReorderAxesOperator + auto* reorder = new ReorderAxesOperator; + reorder->inputs = {weights_name}; + reorder->outputs = {reordered_weights_name}; + reorder->input_axes_order = AxesOrder::kHWIM; + reorder->output_axes_order = AxesOrder::k1HWO; + model->operators.emplace_back(reorder); + } + auto* conv = new DepthwiseConvOperator; + conv->inputs = {input_name, reordered_weights_name}; + conv->outputs = {node.name()}; + const auto& strides = GetListAttr(node, "strides"); + CHECK_EQ(strides.i_size(), 4); + CHECK_EQ(strides.i(0), 1); + CHECK_EQ(strides.i(3), 1); + conv->stride_height = strides.i(1); + conv->stride_width = strides.i(2); + const auto& padding = GetStringAttr(node, "padding"); + if (padding == "SAME") { + conv->padding.type = PaddingType::kSame; + } else if (padding == "VALID") { + conv->padding.type = PaddingType::kValid; + } else { + LOG(FATAL) << "Bad padding (only SAME and VALID are supported)"; + } + model->operators.emplace_back(conv); +} + +void ConvertDepthToSpaceOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "DepthToSpace"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT); + auto* op = new DepthToSpaceOperator; + op->inputs.push_back(node.input(0)); + op->outputs.push_back(node.name()); + op->block_size = GetIntAttr(node, "block_size"); + QCHECK_GE(op->block_size, 2); + model->operators.emplace_back(op); +} + +void ConvertSpaceToDepthOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "SpaceToDepth"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT); + auto* op = new SpaceToDepthOperator; + op->inputs.push_back(node.input(0)); + op->outputs.push_back(node.name()); + op->block_size = GetIntAttr(node, "block_size"); + QCHECK_GE(op->block_size, 2); + model->operators.emplace_back(op); +} + +void ConvertBiasAddOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "BiasAdd"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + const auto& input_name = node.input(0); + const auto& bias_name = node.input(1); + CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT); + auto* biasadd = new AddOperator; + biasadd->inputs.push_back(input_name); + biasadd->inputs.push_back(bias_name); + biasadd->outputs.push_back(node.name()); + model->operators.emplace_back(biasadd); +} + +void ConvertReluOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Relu"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + const auto& input_name = node.input(0); + auto* relu = new ReluOperator; + relu->inputs.push_back(input_name); + relu->outputs.push_back(node.name()); + model->operators.emplace_back(relu); +} + +void ConvertRelu6Operator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Relu6"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + const auto& input_name = node.input(0); + auto* op = new Relu6Operator; + op->inputs.push_back(input_name); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertLogisticOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Sigmoid"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + const auto& input_name = node.input(0); + auto* op = new LogisticOperator; + op->inputs.push_back(input_name); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertTanhOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Tanh"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + const auto& input_name = node.input(0); + auto* op = new TanhOperator; + op->inputs.push_back(input_name); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertDivOperator(const NodeDef& node, Model* model) { + CHECK(node.op() == "Div" || node.op() == "RealDiv"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + auto* op = new DivOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertIdentityOperator(const NodeDef& node, Model* model) { + CHECK(node.op() == "Identity" || node.op() == "CheckNumerics" || + node.op() == "PlaceholderWithDefault"); + auto* op = new TensorFlowIdentityOperator; + // Amazingly, some TensorFlow graphs (at least rajeev_lstm.pb) have + // identity nodes with multiple inputs, but the other inputs seem + // to be gratuitous (in the case of rajeev_lstm.pb, these are + // enumerating the LSTM state arrays). We will just ignore extra + // inputs beyond the first input. + CHECK_GE(node.input_size(), 1); + const auto& input_name = node.input(0); + op->inputs.push_back(input_name); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertFakeQuantWithMinMaxArgs(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "FakeQuantWithMinMaxArgs"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + auto* op = new FakeQuantOperator; + op->inputs.push_back(node.input(0)); + op->minmax.reset(new MinMax); + auto& minmax = *op->minmax; + minmax.min = GetFloatAttr(node, "min"); + minmax.max = GetFloatAttr(node, "max"); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertFakeQuantWithMinMaxVars(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "FakeQuantWithMinMaxVars"); + const int num_inputs = + GetInputsCount(node, model->flags.drop_control_dependency()); + CHECK(num_inputs == 3 || num_inputs == 4); + auto* op = new FakeQuantOperator; + for (int i = 0; i < 3; i++) { + op->inputs.push_back(node.input(i)); + } + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertRsqrtOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Rsqrt"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + auto* op = new TensorFlowRsqrtOperator; + op->inputs.push_back(node.input(0)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertSqrtOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Sqrt"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + auto* op = new TensorFlowSqrtOperator; + op->inputs.push_back(node.input(0)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertSqueezeOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Squeeze"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + auto* op = new SqueezeOperator; + op->inputs.push_back(node.input(0)); + op->outputs.push_back(node.name()); + + const auto& squeeze_dims = GetListAttr(node, "squeeze_dims"); + for (int i = 0; i < squeeze_dims.i_size(); ++i) { + op->squeeze_dims.push_back(squeeze_dims.i(i)); + } + + model->operators.emplace_back(op); +} + +void ConvertSquareOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Square"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + auto* op = new TensorFlowSquareOperator; + op->inputs.push_back(node.input(0)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertAddOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Add"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + auto* op = new AddOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertMulOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Mul"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + auto* op = new MulOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertSubOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Sub"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + auto* op = new SubOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertSumOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Sum"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + auto* op = new TensorFlowSumOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertTileOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Tile"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + auto* op = new TensorFlowTileOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertSliceOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Slice"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 3); + auto* op = new SliceOperator; + for (int i = 0; i < 3; ++i) { + op->inputs.push_back(node.input(i)); + } + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertPadOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Pad"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + auto* op = new PadOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertShapeOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Shape"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + auto* op = new TensorFlowShapeOperator; + op->inputs.push_back(node.input(0)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertSplitOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Split"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + auto* op = new TensorFlowSplitOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + const int num_split = GetIntAttr(node, "num_split"); + op->outputs.push_back(node.name()); + for (int i = 1; i < num_split; i++) { + op->outputs.push_back(absl::StrCat(node.name(), ":", i)); + } + op->num_split = num_split; + model->operators.emplace_back(op); +} + +void ConvertMergeOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Merge"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + auto* op = new TensorFlowMergeOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertSwitchOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Switch"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + auto* op = new TensorFlowSwitchOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->outputs.push_back(node.name()); + // Switch operators have two outputs: "name" and "name:1". + op->outputs.push_back(node.name() + ":1"); + model->operators.emplace_back(op); +} +void ConvertSoftmaxOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Softmax"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + const auto& input_name = node.input(0); + auto* softmax = new SoftmaxOperator; + softmax->inputs.push_back(input_name); + softmax->outputs.push_back(node.name()); + // TensorFlow's Softmax doesn't seem to admit a 'beta' parameter. + CHECK(!node.attr().count("beta")); // Stab in the dark, just in case. + softmax->beta = 1.f; + model->operators.emplace_back(softmax); +} + +void ConvertLRNOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "LRN"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + const auto& input_name = node.input(0); + auto* lrn = new LocalResponseNormalizationOperator; + lrn->inputs.push_back(input_name); + lrn->outputs.push_back(node.name()); + lrn->range = GetIntAttr(node, "depth_radius"); + lrn->bias = GetFloatAttr(node, "bias"); + lrn->alpha = GetFloatAttr(node, "alpha"); + lrn->beta = GetFloatAttr(node, "beta"); + model->operators.emplace_back(lrn); +} + +void ConvertMaxPoolOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "MaxPool"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + const auto& input_name = node.input(0); + if (HasAttr(node, "T")) { + CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT); + } else { + LOG(WARNING) << "Found MaxPool operator missing 'T' attribute"; + } + auto* maxpool = new MaxPoolOperator; + maxpool->inputs.push_back(input_name); + maxpool->outputs.push_back(node.name()); + const auto& strides = GetListAttr(node, "strides"); + CHECK_EQ(strides.i_size(), 4); + CHECK_EQ(strides.i(0), 1); + CHECK_EQ(strides.i(3), 1); + maxpool->stride_height = strides.i(1); + maxpool->stride_width = strides.i(2); + const auto& ksize = GetListAttr(node, "ksize"); + CHECK_EQ(ksize.i_size(), 4); + CHECK_EQ(ksize.i(0), 1); + CHECK_EQ(ksize.i(3), 1); + maxpool->kheight = ksize.i(1); + maxpool->kwidth = ksize.i(2); + const auto& padding = GetStringAttr(node, "padding"); + if (padding == "SAME") { + maxpool->padding.type = PaddingType::kSame; + } else if (padding == "VALID") { + maxpool->padding.type = PaddingType::kValid; + } else { + LOG(FATAL) << "Bad padding (only SAME and VALID are supported)"; + } + model->operators.emplace_back(maxpool); +} + +void ConvertAvgPoolOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "AvgPool"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + const auto& input_name = node.input(0); + CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT); + auto* avgpool = new AveragePoolOperator; + avgpool->inputs.push_back(input_name); + avgpool->outputs.push_back(node.name()); + const auto& strides = GetListAttr(node, "strides"); + CHECK_EQ(strides.i_size(), 4); + CHECK_EQ(strides.i(0), 1); + CHECK_EQ(strides.i(3), 1); + avgpool->stride_height = strides.i(1); + avgpool->stride_width = strides.i(2); + const auto& ksize = GetListAttr(node, "ksize"); + CHECK_EQ(ksize.i_size(), 4); + CHECK_EQ(ksize.i(0), 1); + CHECK_EQ(ksize.i(3), 1); + avgpool->kheight = ksize.i(1); + avgpool->kwidth = ksize.i(2); + const auto& padding = GetStringAttr(node, "padding"); + if (padding == "SAME") { + avgpool->padding.type = PaddingType::kSame; + } else if (padding == "VALID") { + avgpool->padding.type = PaddingType::kValid; + } else { + LOG(FATAL) << "Bad padding (only SAME and VALID are supported)"; + } + model->operators.emplace_back(avgpool); +} + +void ConvertReshapeOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Reshape"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + auto* op = new TensorFlowReshapeOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertMatMulOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "MatMul"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + // Transpose flags should be easy to support, but we don't have a + // GraphDef with them to test on at the moment. + CHECK_EQ(GetBoolAttr(node, "transpose_a"), false); + CHECK_EQ(GetBoolAttr(node, "transpose_b"), false); + const auto& input_name = node.input(0); + const auto& weights_name = node.input(1); + const auto& reordered_weights_name = weights_name + "_reordered"; + // Check if a ReorderAxesOperator was already created for these weights + // (that happens when multiple layers share the same weights). + const Operator* existing_reorder = + GetOpWithOutput(*model, reordered_weights_name); + if (existing_reorder) { + // Check that it is safe to rely on the _reordered naming of the output + // array! + CHECK(existing_reorder->type == OperatorType::kReorderAxes); + } else { + // Create a new ReorderAxesOperator + auto* reorder = new ReorderAxesOperator; + reorder->inputs = {weights_name}; + reorder->outputs = {reordered_weights_name}; + reorder->input_axes_order = AxesOrder::kRC; + reorder->output_axes_order = AxesOrder::kCR; + model->operators.emplace_back(reorder); + } + auto* matmul = new TensorFlowMatMulOperator; + matmul->inputs = {input_name, reordered_weights_name}; + matmul->outputs = {node.name()}; + model->operators.emplace_back(matmul); +} + +void ConvertConcatOperator(const NodeDef& node, Model* model) { + Operator* op = nullptr; + if (node.op() == "Concat") { + op = new TensorFlowConcatOperator; + } else if (node.op() == "ConcatV2") { + op = new TensorFlowConcatV2Operator; + } else { + LOG(FATAL) << "Expected Concat or ConcatV2"; + } + const int num_inputs = + GetInputsCount(node, model->flags.drop_control_dependency()); + CHECK_GE(num_inputs, 2); + CHECK_EQ(num_inputs, 1 + GetIntAttr(node, "N")); + for (int i = 0; i < num_inputs; ++i) { + op->inputs.push_back(node.input(i)); + } + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertAllOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "All"); + auto* op = new TensorFlowAllOperator; + const int num_inputs = + GetInputsCount(node, model->flags.drop_control_dependency()); + for (int i = 0; i < num_inputs; ++i) { + op->inputs.push_back(node.input(i)); + } + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertAssertOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Assert"); + auto* op = new TensorFlowAssertOperator; + const int num_inputs = + GetInputsCount(node, model->flags.drop_control_dependency()); + for (int i = 0; i < num_inputs; ++i) { + op->inputs.push_back(node.input(i)); + } + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertLessOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Less"); + auto* op = new TensorFlowLessOperator; + const int num_inputs = + GetInputsCount(node, model->flags.drop_control_dependency()); + for (int i = 0; i < num_inputs; ++i) { + op->inputs.push_back(node.input(i)); + } + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertLessEqualOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "LessEqual"); + auto* op = new TensorFlowLessEqualOperator; + const int num_inputs = + GetInputsCount(node, model->flags.drop_control_dependency()); + for (int i = 0; i < num_inputs; ++i) { + op->inputs.push_back(node.input(i)); + } + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertGreaterOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Greater"); + auto* op = new TensorFlowGreaterOperator; + const int num_inputs = + GetInputsCount(node, model->flags.drop_control_dependency()); + for (int i = 0; i < num_inputs; ++i) { + op->inputs.push_back(node.input(i)); + } + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertGreaterEqualOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "GreaterEqual"); + auto* op = new TensorFlowGreaterEqualOperator; + const int num_inputs = + GetInputsCount(node, model->flags.drop_control_dependency()); + for (int i = 0; i < num_inputs; ++i) { + op->inputs.push_back(node.input(i)); + } + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertMaxOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Max"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + auto* op = new TensorFlowMaxOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertMinOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Min"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + auto* op = new TensorFlowMinOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertMaximumOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Maximum"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + auto* op = new TensorFlowMaximumOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertMinimumOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Minimum"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + auto* op = new TensorFlowMinimumOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertUnsupportedOperator(const NodeDef& node, Model* model) { + LOG(INFO) << "Converting unsupported operation: " << node.op(); + auto* op = new TensorFlowUnsupportedOperator; + const int num_inputs = + GetInputsCount(node, model->flags.drop_control_dependency()); + for (int i = 0; i < num_inputs; ++i) { + op->inputs.push_back(node.input(i)); + } + op->outputs.push_back(node.name()); + op->tensorflow_op = node.op(); + node.SerializeToString(&op->tensorflow_node_def); + model->operators.emplace_back(op); + if (HasAttr(node, "_output_quantized")) { + op->quantized = GetBoolAttr(node, "_output_quantized"); + } + if (HasAttr(node, "_output_types")) { + const auto& output_types = GetListAttr(node, "_output_types"); + for (int i = 0; i < output_types.type_size(); ++i) { + op->output_data_types.push_back(ConvertDataType(output_types.type(i))); + } + } +} + +void ConvertStridedSliceOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "StridedSlice"); + CHECK_EQ(node.input_size(), 4); + + // Only a subset of the full TF op functionality is supported now. + if ( // No 64-bit indices. + GetDataTypeAttr(node, "Index") != DT_INT32 || + // No dimensionality changes. + GetIntAttr(node, "new_axis_mask") != 0 || + GetIntAttr(node, "shrink_axis_mask") != 0 || + // No sparse indices. + GetIntAttr(node, "ellipsis_mask") != 0 || + // Only 4D tensors are supported. + GetIntAttr(node, "begin_mask") > 15 || + GetIntAttr(node, "end_mask") > 15) { + ConvertUnsupportedOperator(node, model); + return; + } + + auto* op = new StridedSliceOperator; + for (const auto& input : node.input()) { + op->inputs.push_back(input); + } + op->outputs.push_back(node.name()); + + op->begin_mask = GetIntAttr(node, "begin_mask"); + op->ellipsis_mask = GetIntAttr(node, "ellipsis_mask"); + op->end_mask = GetIntAttr(node, "end_mask"); + op->new_axis_mask = GetIntAttr(node, "new_axis_mask"); + op->shrink_axis_mask = GetIntAttr(node, "shrink_axis_mask"); + model->operators.emplace_back(op); +} + +void ConvertPlaceholderOperator(const NodeDef& node, Model* model) { + CHECK(node.op() == "Placeholder" || node.op() == "LegacyFedInput"); + if (node.op() == "Placeholder") { + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 0); + } + auto& array = model->GetOrCreateArray(node.name()); + if (node.attr().count("dtype")) { + array.data_type = ConvertDataType(GetDataTypeAttr(node, "dtype")); + } + if (node.attr().count("shape")) { + const auto& shape = GetShapeAttr(node, "shape"); + auto num_dims = shape.dim_size(); + bool has_wildcard = false; + for (std::size_t i = 0; i < num_dims; i++) { + if (shape.dim(i).size() == -1) { + has_wildcard = true; + } + } + // TODO(b/62716978): This logic needs to be revisted. During dims + // refactoring it is an interim fix. + if (num_dims > 0 && !has_wildcard) { + auto& dst_array_dims = *array.mutable_shape()->mutable_dims(); + dst_array_dims.resize(num_dims); + for (std::size_t i = 0; i < num_dims; i++) { + dst_array_dims[i] = shape.dim(i).size(); + } + } + } +} + +void ConvertNoOpOperator(const NodeDef& node, Model* model) {} + +ArrayDataType GetArrayDataType(tensorflow::DataType tf_data_type) { + if (tf_data_type == DT_UINT8) { + return ArrayDataType::kUint8; + } else if (tf_data_type == DT_INT32) { + return ArrayDataType::kInt32; + } else if (tf_data_type == DT_FLOAT) { + return ArrayDataType::kFloat; + } else { + return ArrayDataType::kNone; + } +} + +void ConvertCastOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Cast"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + const auto tf_src_dtype = GetDataTypeAttr(node, "SrcT"); + const auto tf_dst_dtype = GetDataTypeAttr(node, "DstT"); + CHECK(tf_src_dtype == DT_UINT8 || tf_src_dtype == DT_INT32 || + tf_src_dtype == DT_FLOAT); + CHECK(tf_dst_dtype == DT_UINT8 || tf_dst_dtype == DT_INT32 || + tf_dst_dtype == DT_FLOAT); + CHECK_NE(tf_src_dtype, tf_dst_dtype) + << "Same input and output data type. No need to cast."; + auto* op = new CastOperator; + op->src_data_type = GetArrayDataType(tf_src_dtype); + op->dst_data_type = GetArrayDataType(tf_dst_dtype); + op->inputs.push_back(node.input(0)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertFloorOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Floor"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + const auto data_type = GetDataTypeAttr(node, "T"); + CHECK(data_type == DT_FLOAT); + auto* op = new FloorOperator; + op->inputs.push_back(node.input(0)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertGatherOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Gather"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + const auto indices_data_type = GetDataTypeAttr(node, "Tindices"); + CHECK(indices_data_type == DT_INT32); + auto* op = new GatherOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertResizeBilinearOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "ResizeBilinear"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + auto* op = new ResizeBilinearOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertBatchNormWithGlobalNormalizationOperator(const NodeDef& node, + Model* model) { + CHECK_EQ(node.op(), "BatchNormWithGlobalNormalization"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 5); + + // TODO(ahentz): to really match tensorflow we need to add variance_epsilon + // to the input, before feeding it into TensorFlowRsqrtOperator. + // CHECK_EQ(GetFloatAttr(node, "variance_epsilon"), 0.001f); + + string multiplier = node.name() + "_mul"; + if (GetBoolAttr(node, "scale_after_normalization")) { + // Create graph: + // v -> RSQRT -> + // MUL -> multiplier + // gamma -----> + string rsqrt = node.name() + "_rsqrt"; + + auto* rsqrt_op = new TensorFlowRsqrtOperator; + rsqrt_op->inputs.push_back(node.input(2)); + rsqrt_op->outputs.push_back(rsqrt); + model->operators.emplace_back(rsqrt_op); + + auto* mul_op = new MulOperator; + mul_op->inputs.push_back(rsqrt); + mul_op->inputs.push_back(node.input(4)); + mul_op->outputs.push_back(multiplier); + model->operators.emplace_back(mul_op); + } else { + // Create graph: + // v -> RSQRT -> multiplier + auto* rsqrt_op = new TensorFlowRsqrtOperator; + rsqrt_op->inputs.push_back(node.input(2)); + rsqrt_op->outputs.push_back(multiplier); + model->operators.emplace_back(rsqrt_op); + } + + auto* op = new BatchNormalizationOperator; + op->global_normalization = true; + + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->inputs.push_back(multiplier); + op->inputs.push_back(node.input(3)); + op->outputs.push_back(node.name()); + + model->operators.emplace_back(op); +} + +void ConvertFusedBatchNormOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "FusedBatchNorm"); + CHECK_EQ(node.input_size(), 5); + + // Declare shortcuts for the inputs. + const string& gamma_input = node.input(1); + const string& beta_input = node.input(2); + const string& moving_mean_input = node.input(3); + const string& moving_variance_input = node.input(4); + + // Create an array holding the epsilon value (typically, 0.001). + const string epsilon_array_name = node.name() + "_epsilon_array"; + auto& epsilon_array = model->GetOrCreateArray(epsilon_array_name); + epsilon_array.data_type = ArrayDataType::kFloat; + *epsilon_array.mutable_shape()->mutable_dims() = {1}; + epsilon_array.GetMutableBuffer().data.push_back( + GetFloatAttr(node, "epsilon")); + + // Add epsilon to the moving variance. + const string epsilon_add_op_name = node.name() + "_epsilon"; + auto* epsilon_add_op = new AddOperator; + epsilon_add_op->inputs.push_back(moving_variance_input); + epsilon_add_op->inputs.push_back(epsilon_array_name); + epsilon_add_op->outputs.push_back(epsilon_add_op_name); + model->operators.emplace_back(epsilon_add_op); + + // Take the inverse square root of the (variance + epsilon). + const string rsqrt_op_name = node.name() + "_rsqrt"; + auto* rsqrt_op = new TensorFlowRsqrtOperator; + rsqrt_op->inputs.push_back(epsilon_add_op_name); + rsqrt_op->outputs.push_back(rsqrt_op_name); + model->operators.emplace_back(rsqrt_op); + + // Multiply the result by gamma. + const string multiplier = node.name() + "_mul"; + auto* mul_op = new MulOperator; + mul_op->inputs.push_back(rsqrt_op_name); + mul_op->inputs.push_back(gamma_input); + mul_op->outputs.push_back(multiplier); + model->operators.emplace_back(mul_op); + + // Now we have all required inputs for the BatchNormalizationOperator. + auto* op = new BatchNormalizationOperator; + op->global_normalization = true; + + op->inputs.push_back(node.input(0)); + op->inputs.push_back(moving_mean_input); + op->inputs.push_back(multiplier); + op->inputs.push_back(beta_input); + op->outputs.push_back(node.name()); + + model->operators.emplace_back(op); +} + +void ConvertSpaceToBatchNDOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "SpaceToBatchND"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 3); + CHECK_EQ(GetDataTypeAttr(node, "Tblock_shape"), DT_INT32); + CHECK_EQ(GetDataTypeAttr(node, "Tpaddings"), DT_INT32); + auto* op = new SpaceToBatchNDOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->inputs.push_back(node.input(2)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertBatchToSpaceNDOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "BatchToSpaceND"); + CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 3); + CHECK_EQ(GetDataTypeAttr(node, "Tblock_shape"), DT_INT32); + CHECK_EQ(GetDataTypeAttr(node, "Tcrops"), DT_INT32); + auto* op = new BatchToSpaceNDOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->inputs.push_back(node.input(2)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertMeanOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Mean"); + CHECK_EQ(node.input_size(), 2); + auto* op = new MeanOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + +void ConvertSvdfOperator(const NodeDef& node, Model* model) { + CHECK_EQ(node.op(), "Svdf"); + bool has_bias = (node.input_size() == 4); + auto* op = new SvdfOperator; + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->inputs.push_back(node.input(2)); + if (has_bias) { + op->inputs.push_back(node.input(3)); + } + op->outputs.push_back(node.name() + "_state"); + op->outputs.push_back(node.name()); + if (node.attr().at("ActivationFunction").s() == "Relu") { + op->fused_activation_function = FusedActivationFunctionType::kRelu; + } else { + op->fused_activation_function = FusedActivationFunctionType::kNone; + } + op->rank = node.attr().at("Rank").i(); + model->operators.emplace_back(op); +} + +void StripCaretFromArrayNames(Model* model) { + for (auto& op : model->operators) { + for (auto& input : op->inputs) { + input = string(absl::StripPrefix(input, "^")); + } + for (auto& output : op->outputs) { + output = string(absl::StripPrefix(output, "^")); + } + } + for (auto& array : model->arrays) { + if (absl::StartsWith(array.first, "^")) { + LOG(FATAL) << "What?"; + } + } +} + +void AddExtraOutputsFedIntoOtherOps(Model* model) { + for (const auto& consumer_op : model->operators) { + for (const string& input : consumer_op->inputs) { + const std::vector& split = absl::StrSplit(input, ':'); + if (split.size() != 2) { + continue; + } + int output_index = 0; + if (!absl::SimpleAtoi(split[1], &output_index)) { + continue; + } + auto* producer_op = GetOpWithOutput(*model, split[0]); + if (!producer_op) { + continue; + } + while (producer_op->outputs.size() <= output_index) { + using toco::port::StringF; + producer_op->outputs.push_back( + StringF("%s:%d", split[0], producer_op->outputs.size())); + } + } + } +} + +bool InlineAllFunctions(GraphDef* graphdef) { + if (graphdef->library().function().empty()) { + VLOG(kLogLevelModelUnchanged) << "No functions to inline."; + return false; + } + + // Override "_noinline" attribute on all functions + GraphDef graphdef_copy(*graphdef); + for (auto& function : + (*graphdef_copy.mutable_library()->mutable_function())) { + auto* attributes = function.mutable_attr(); + if (attributes->count(tensorflow::kNoInlineAttr) != 0) { + (*attributes)[tensorflow::kNoInlineAttr].set_b(false); + } + } + + // Construct minimum resources needed to use ExpandInlineFunctions(). + tensorflow::SessionOptions options; + auto* device_count = options.config.mutable_device_count(); + device_count->insert({"CPU", 1}); + std::vector devices; + TF_CHECK_OK(tensorflow::DeviceFactory::AddDevices( + options, "/job:localhost/replica:0/task:0", &devices)); + + tensorflow::FunctionLibraryDefinition fld(tensorflow::OpRegistry::Global(), + graphdef_copy.library()); + tensorflow::DeviceMgr device_mgr(devices); + tensorflow::OptimizerOptions o_opts; + tensorflow::ProcessFunctionLibraryRuntime pflr( + &device_mgr, tensorflow::Env::Default(), TF_GRAPH_DEF_VERSION, &fld, + o_opts, nullptr); + tensorflow::FunctionLibraryRuntime* flr; + flr = pflr.GetFLR("/job:localhost/replica:0/task:0/cpu:0"); + + tensorflow::Graph graph(fld); + tensorflow::GraphConstructorOptions gc_opts; + TF_CHECK_OK( + tensorflow::ConvertGraphDefToGraph(gc_opts, graphdef_copy, &graph)); + + // Iterate over the graph until there are no more nodes to be inlined. + bool graph_modified = false; + while (tensorflow::ExpandInlineFunctions(flr, &graph)) { + graph_modified = true; + LOG(INFO) << "Found functions that were inlined."; + } + + // Output inlined graph + if (graph_modified) { + graph.ToGraphDef(graphdef); + } + return graph_modified; +} +} // namespace + +std::unique_ptr ImportTensorFlowGraphDef(const ModelFlags& model_flags, + const GraphDef& tf_graph) { + LogDumpGraphDef(kLogLevelModelChanged, "AT IMPORT", tf_graph); + + GraphDef inlined_graph(tf_graph); + if (InlineAllFunctions(&inlined_graph)) { + LogDumpGraphDef(kLogLevelModelChanged, "AFTER INLINING", inlined_graph); + } + + Model* model = new Model; + ResolveModelFlags(model_flags, model); + + for (const auto& node : inlined_graph.node()) { + if (node.op() == "Const") { + ConvertConstOperator(node, model); + } else if (node.op() == "Conv2D") { + ConvertConvOperator(node, model); + } else if (node.op() == "DepthwiseConv2dNative") { + ConvertDepthwiseConvOperator(node, model); + } else if (node.op() == "DepthToSpace") { + ConvertDepthToSpaceOperator(node, model); + } else if (node.op() == "SpaceToDepth") { + ConvertSpaceToDepthOperator(node, model); + } else if (node.op() == "BiasAdd") { + ConvertBiasAddOperator(node, model); + } else if (node.op() == "Relu") { + ConvertReluOperator(node, model); + } else if (node.op() == "Relu6") { + ConvertRelu6Operator(node, model); + } else if (node.op() == "Sigmoid") { + ConvertLogisticOperator(node, model); + } else if (node.op() == "Tanh") { + ConvertTanhOperator(node, model); + } else if (node.op() == "MaxPool") { + ConvertMaxPoolOperator(node, model); + } else if (node.op() == "AvgPool") { + ConvertAvgPoolOperator(node, model); + } else if (node.op() == "Reshape") { + ConvertReshapeOperator(node, model); + } else if (node.op() == "MatMul") { + ConvertMatMulOperator(node, model); + } else if (node.op() == "Div" || node.op() == "RealDiv") { + ConvertDivOperator(node, model); + } else if (node.op() == "Identity" || node.op() == "CheckNumerics") { + ConvertIdentityOperator(node, model); + } else if (node.op() == "FakeQuantWithMinMaxVars") { + ConvertFakeQuantWithMinMaxVars(node, model); + } else if (node.op() == "FakeQuantWithMinMaxArgs") { + ConvertFakeQuantWithMinMaxArgs(node, model); + } else if (node.op() == "Rsqrt") { + ConvertRsqrtOperator(node, model); + } else if (node.op() == "Squeeze") { + ConvertSqueezeOperator(node, model); + } else if (node.op() == "Sqrt") { + ConvertSqrtOperator(node, model); + } else if (node.op() == "Square") { + ConvertSquareOperator(node, model); + } else if (node.op() == "Add") { + ConvertAddOperator(node, model); + } else if (node.op() == "Mul") { + ConvertMulOperator(node, model); + } else if (node.op() == "Sub") { + ConvertSubOperator(node, model); + } else if (node.op() == "Sum") { + ConvertSumOperator(node, model); + } else if (node.op() == "Tile") { + ConvertTileOperator(node, model); + } else if (node.op() == "Concat" || node.op() == "ConcatV2") { + ConvertConcatOperator(node, model); + } else if (node.op() == "LRN") { + ConvertLRNOperator(node, model); + } else if (node.op() == "Softmax") { + ConvertSoftmaxOperator(node, model); + } else if (node.op() == "All") { + ConvertAllOperator(node, model); + } else if (node.op() == "Assert") { + ConvertAssertOperator(node, model); + } else if (node.op() == "Less") { + ConvertLessOperator(node, model); + } else if (node.op() == "LessEqual") { + ConvertLessEqualOperator(node, model); + } else if (node.op() == "Greater") { + ConvertGreaterOperator(node, model); + } else if (node.op() == "GreaterEqual") { + ConvertGreaterEqualOperator(node, model); + } else if (node.op() == "Max") { + ConvertMaxOperator(node, model); + } else if (node.op() == "Min") { + ConvertMinOperator(node, model); + } else if (node.op() == "Maximum") { + ConvertMaximumOperator(node, model); + } else if (node.op() == "Minimum") { + ConvertMinimumOperator(node, model); + } else if (node.op() == "Merge") { + ConvertMergeOperator(node, model); + } else if (node.op() == "Pad") { + ConvertPadOperator(node, model); + } else if (node.op() == "StridedSlice") { + ConvertStridedSliceOperator(node, model); + } else if (node.op() == "Shape") { + ConvertShapeOperator(node, model); + } else if (node.op() == "Slice") { + ConvertSliceOperator(node, model); + } else if (node.op() == "Split") { + ConvertSplitOperator(node, model); + } else if (node.op() == "Switch") { + ConvertSwitchOperator(node, model); + } else if (node.op() == "Placeholder") { + ConvertPlaceholderOperator(node, model); + } else if (node.op() == "PlaceholderWithDefault") { + ConvertIdentityOperator(node, model); + } else if (node.op() == "LegacyFedInput") { + ConvertPlaceholderOperator(node, model); + } else if (node.op() == "NoOp") { + ConvertNoOpOperator(node, model); + } else if (node.op() == "Cast") { + ConvertCastOperator(node, model); + } else if (node.op() == "Floor") { + ConvertFloorOperator(node, model); + } else if (node.op() == "Gather") { + ConvertGatherOperator(node, model); + } else if (node.op() == "ResizeBilinear") { + ConvertResizeBilinearOperator(node, model); + } else if (node.op() == "BatchNormWithGlobalNormalization") { + ConvertBatchNormWithGlobalNormalizationOperator(node, model); + } else if (node.op() == "FusedBatchNorm") { + ConvertFusedBatchNormOperator(node, model); + } else if (node.op() == "SpaceToBatchND") { + ConvertSpaceToBatchNDOperator(node, model); + } else if (node.op() == "BatchToSpaceND") { + ConvertBatchToSpaceNDOperator(node, model); + } else if (node.op() == "Mean") { + ConvertMeanOperator(node, model); + } else if (node.op() == "Svdf") { + ConvertSvdfOperator(node, model); + } else { + ConvertUnsupportedOperator(node, model); + } + } + + StripCaretFromArrayNames(model); + AddExtraOutputsFedIntoOtherOps(model); + FixNoMissingArray(model); + FixNoOrphanedArray(model); + FixOperatorOrdering(model); + CheckInvariants(*model); + + // if rnn state arrays are constant, make them transient + for (const auto& rnn_state : model->flags.rnn_states()) { + model->GetArray(rnn_state.state_array()).buffer = nullptr; + } + + return std::unique_ptr(model); +} + +std::unique_ptr ImportTensorFlowGraphDef( + const ModelFlags& model_flags, const string& input_file_contents) { + std::unique_ptr tf_graph(new GraphDef); + CHECK(ParseFromStringEitherTextOrBinary(input_file_contents, tf_graph.get())); + + std::unique_ptr pruned_graph = + MaybeReplaceCompositeSubgraph(*tf_graph); + if (pruned_graph) { + tf_graph = std::move(pruned_graph); + } + return ImportTensorFlowGraphDef(model_flags, *tf_graph); +} +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.h b/tensorflow/contrib/lite/toco/import_tensorflow.h new file mode 100644 index 0000000000..d2eb423ca4 --- /dev/null +++ b/tensorflow/contrib/lite/toco/import_tensorflow.h @@ -0,0 +1,34 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_IMPORT_TENSORFLOW_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_IMPORT_TENSORFLOW_H_ + +#include +#include +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/model_flags.pb.h" +#include "tensorflow/core/framework/graph.pb.h" + +namespace toco { + +std::unique_ptr ImportTensorFlowGraphDef( + const ModelFlags& model_flags, const tensorflow::GraphDef& graph_def); + +std::unique_ptr ImportTensorFlowGraphDef( + const ModelFlags& model_flags, const string& input_file_contents); + +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_IMPORT_TENSORFLOW_H_ diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h new file mode 100644 index 0000000000..d992f8458f --- /dev/null +++ b/tensorflow/contrib/lite/toco/model.h @@ -0,0 +1,1372 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_MODEL_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_MODEL_H_ + +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/model_flags.pb.h" +#include "tensorflow/contrib/lite/toco/runtime/types.h" +#include "tensorflow/contrib/lite/toco/toco_types.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +enum class OperatorType { + kNone, + // General-purpose neural network operators. + kAdd, + kAveragePool, + kBatchNormalization, + kConv, + kConcatenation, + kDepthwiseConv, + kDepthToSpace, + kSpaceToDepth, + kDequantize, + kDiv, + kFullyConnected, + kL2Normalization, + kL2Pool, + kLstmCell, + kLocalResponseNormalization, + kLogistic, + kMaxPool, + kFakeQuant, + kMul, + kRelu, + kRelu1, + kRelu6, + kSoftmax, + kSub, + kTanh, + kCast, + kFloor, + kGather, + kResizeBilinear, + kSpaceToBatchND, + kBatchToSpaceND, + kPad, + kStridedSlice, + kSlice, + kSqueeze, + kMean, + // The SVDF Op is a decomposition of a densely connected Op into + // low rank filters. For details: + // https://research.google.com/pubs/pub43813.html + kSvdf, + // Special operators used for importing TensorFlow nodes. + // The general intent is to have some graph transformation either + // drop them or rewrite them as general-purpose operators. + kTensorFlowAll, + kTensorFlowAssert, + kTensorFlowConcat, + kTensorFlowConcatV2, + kTensorFlowGreater, + kTensorFlowGreaterEqual, + kTensorFlowIdentity, + kTensorFlowLess, + kTensorFlowLessEqual, + kTensorFlowMax, + kTensorFlowMaximum, + kTensorFlowMin, + kTensorFlowMinimum, + kTensorFlowMatMul, + kTensorFlowMerge, + kTensorFlowReshape, + kTensorFlowRsqrt, + kTensorFlowShape, + kTensorFlowSplit, + kTensorFlowSqrt, + kTensorFlowSquare, + kTensorFlowSum, + kTensorFlowSwitch, + kTensorFlowTile, + // An unsupported TF operation. It's only needed to be able to represent TF + // graph internally and is expected to be dropped by graph transformations. + kTensorFlowUnsupported, + // Finally, TensorFlow uses different conventions for axes ordering, + // see AxesOrder, and this cannot always be resolved at the time of importing + // nodes, as TensorFlow parameters may be constant-expression subgraphs + // instead of being given as plain constant arrays. So we need to insert + // special nodes in the graph to shuffle axes. + kReorderAxes, +}; + +// Helper to deal with TensorFlow arrays using a different ordering of +// dimensions +// ("axes") than our own. +// TODO(benoitjacob): Ultimately, we shouldn't have any "ordering" of axes, +// we should have associative arrays mapping symbolic axes identifiers (like +// "output_depth") to dimensions. We would then not need this anymore. +enum class AxesOrder { + kOneAxis, // one-dimensional array, one unique axis. + kCR, // column-major matrix storage order. Our standard. + kRC, // row-major matrix storage order. TensorFlow default. + kOHWI, // Our standard for conv weights + kHWIO, // TensorFlow conv weights + k1HWO, // Our standard for DepthwiseConv weights + kHWIM, // TensorFlow DepthwiseConv weights + kNHWC, // TensorFlow activations +}; + +// The type of the scalars in an array. +// Note that that does not by itself tell whether the values in the array are +// real (are literally interpreted as real numbers) or quantized (only acquire +// a meaning as real numbers in conjuction with QuantizationParams). +// +// In practice though: +// float values are always real +// uint8 values are always quantized +// int32 values are either real or quantized (depending on whether +// QuantizationParams are present). +// other types are unused at the moment. +// +// kNone means that we don't know the data type yet, or that we don't care +// because we'll be dropping the array anyway (e.g. some exotic array types +// may be involved only in debug-only subgraphs that we may not be interested +// in actually supporting). +enum class ArrayDataType { kNone, kBool, kFloat, kUint8, kInt32, kInt64 }; + +// Compile-time logic to map ArrayDataType to the corresponding C++ scalar type +template +struct DataTypeImpl {}; +template <> +struct DataTypeImpl { + typedef int Type; +}; +template <> +struct DataTypeImpl { + typedef bool Type; +}; +template <> +struct DataTypeImpl { + typedef float Type; +}; +template <> +struct DataTypeImpl { + typedef uint8 Type; +}; +template <> +struct DataTypeImpl { + typedef int32 Type; +}; +template <> +struct DataTypeImpl { + typedef int64 Type; +}; + +template +using DataType = typename DataTypeImpl::Type; + +// Base class for type-specific buffer types. +struct GenericBuffer { + // Non-default-constructible: only ArrayDataType-specific subclass + // objects may be constructed. + GenericBuffer() = delete; + // Non-copyable-or-movable: we should only store pointers-to-Buffer + // in containers, not Operators themselves, so there should be no + // copy or move. + GenericBuffer(const GenericBuffer&) = delete; + GenericBuffer(const GenericBuffer&&) = delete; + + // We need a virtual destructor so we can store pointers-to-Buffer + // in containers and have the containers call the right subclass destructor. + virtual ~GenericBuffer() {} + + const ArrayDataType type; + + protected: + // Constructor used by subclasses for specific ArrayDataType's. + explicit GenericBuffer(ArrayDataType t) : type(t) {} +}; + +// Type-specific buffer, containing type-specific storage. +template +struct Buffer : GenericBuffer { + Buffer() : GenericBuffer(A) {} + + std::vector> data; +}; + +// Base class for all operator classes. +struct Operator { + // Non-default-constructible: only OperatorType-specific subclass + // objects may be constructed. + Operator() = delete; + // Non-copyable-or-movable: we should only store pointers-to-Operator + // in containers, not Operators themselves, so there should be no + // copy or move. + Operator(const Operator&) = delete; + Operator(const Operator&&) = delete; + + // We need a virtual destructor so we can store pointers-to-Operator + // in containers and have the containers call the right subclass destructor. + virtual ~Operator() {} + + // The specific type of operator. Corresponds 1:1 to subclasses. + const OperatorType type; + + // The activation function that may be fused into this operator, + // or None if no activation function is fused. + FusedActivationFunctionType fused_activation_function; + + // Input arrays: either activation arrays or constant array parameters. + // We refer to them by their name, not by their address; the mapping of + // names to addresses is given by the Model, which owns both Operator's and + // Array's. Thus, an Operator on its own doesn't contain much information, + // it is meant to be used in conjunction with the Model that owns it. + std::vector inputs; + + // Output activation arrays. Same comments as for inputs apply here too. + std::vector outputs; + + // If true, the array has more outputs than are listed in the 'outputs' + // member. These need to be resolved by some graph transformation. + // This flag is only here to indicate that an operator should not be + // discarded as unused, even if from its 'outputs' member alone it + // looks unused. + bool unresolved_outputs = false; + + protected: + // Constructor used by subclasses for specific OperatorType's. + explicit Operator(OperatorType t) + : type(t), + fused_activation_function(FusedActivationFunctionType::kNone) {} +}; + +// Padding types for Conv-like operators. This is how padding is typically +// specified in model files. But for inference, we will need to resolve this +// to a FixedPadding, see below. +enum class PaddingType { kNone, kSame, kValid }; + +// Padding as resolved for a specific layer shape, as needed for inference. +// For a given layer shape, a given padding type will resolve to a choice of +// a number of padding rows and columns, which we call the padding height and +// width respectively. +struct FixedPadding { + int width = 0; + int height = 0; +}; + +// "Universal" padding struct containing both a generic PaddingType (as +// represented in a model file), and a FixedPadding (as needed for inference). +// The latter is resolved during the PropagateFixedSizes pass. +struct Padding { + FixedPadding& GetOrCreateFixedPadding() { + if (!fixed) { + FixedPadding* ptr = new FixedPadding; + fixed = std::unique_ptr(ptr); + } + return *fixed; + } + + Padding() : type(PaddingType::kNone) {} + PaddingType type; + std::unique_ptr fixed; +}; + +// "Convolutional" layer, as represented in model files. +// +// Inputs: +// inputs[0]: required: the input activations array +// inputs[1]: required: the Conv weights +// inputs[2]: optional: the bias vector, specifying the biases for each output +// channel. +// +// Outputs: +// outputs[0]: required: the output activations array +// outputs[1]: optional: the intermediate array of im2col-replicated input +// activations. Present when targeting implementations +// of Conv layers as Im2col+GEMM. +// +// TensorFlow equivalent: Conv2D +struct ConvOperator : Operator { + ConvOperator() : Operator(OperatorType::kConv) {} + Padding padding; + int stride_width = 0; + int stride_height = 0; +}; + +// Depthwise-separable convolution operator. +// +// Inputs: +// inputs[0]: required: the input activations array +// inputs[1]: required: the DepthwiseConv weights +// inputs[2]: optional: the bias vector, specifying the biases for each output +// channel. +// +// TensorFlow equivalent: DepthwiseConv2dNative +struct DepthwiseConvOperator : Operator { + DepthwiseConvOperator() : Operator(OperatorType::kDepthwiseConv) {} + Padding padding; + int stride_height = 0; + int stride_width = 0; + int depth_multiplier = 0; +}; + +// Depth-to-space transform operator. +// +// Inputs: +// inputs[0]: required: the input activations array +// +// TensorFlow equivalent: DepthToSpace +struct DepthToSpaceOperator : Operator { + DepthToSpaceOperator() : Operator(OperatorType::kDepthToSpace) {} + int block_size = 0; +}; + +// Space-to-depth transform operator. +// +// Inputs: +// inputs[0]: required: the input activations array +// +// TensorFlow equivalent: SpaceToDepth +struct SpaceToDepthOperator : Operator { + SpaceToDepthOperator() : Operator(OperatorType::kSpaceToDepth) {} + int block_size = 0; +}; + +// Fully-connected operator. +// +// Inputs: +// inputs[0]: required: the input activations array +// inputs[1]: required: the FullyConnected weights +// inputs[2]: optional: the bias vector, specifying the biases for each output +// channel. +// +// TensorFlow equivalent: a pair consisting of a Reshape node reshaping the +// input activations as a matrix, followed by a MatMul node. +struct FullyConnectedOperator : Operator { + FullyConnectedOperator() : Operator(OperatorType::kFullyConnected) {} +}; + +// Dequantization operator, converting a quantized array of integers with +// quantization parameters specifying how these integers correspond to real +// numbers +// (see QuantizationParams) to an output activations array of floating-point +// values. +// +// In floating-point image models, there is typically a Dequantization operator +// at the very beginning, converting the input image RGB data, consisting of +// uint8 integer values, to floating-point input activations. That is where +// image model parameters such as "mean_value" and "std_value" are typically +// handled. +// +// This is the only operator type that converts from quantized to +// floating-point, +// and there is at the moment no operator type at all to convert from +// floating-point +// to quantized. Every other operator does either float->float or +// quantized->quantized. +// +// Inputs: +// inputs[0]: required: the input quantized activations array +// +// TensorFlow equivalent: Dequantize +struct DequantizeOperator : Operator { + DequantizeOperator() : Operator(OperatorType::kDequantize) {} +}; + +// Batch-normalization operator. +// +// We only support batch-normalization using pre-learned moments, so this is +// just +// computing (input - mean) * multiplier + offset. As such, this can be +// expressed as a combination of Add and Mul nodes, and indeed this is how +// we break it down during tooling for the purpose of fusing it into +// other operators. +// +// Inputs: +// inputs[0]: required: the input activations array +// inputs[1]: required: the learned mean array +// inputs[2]: required: the learned multiplier array +// inputs[3]: required: the learned offset array +// +// TensorFlow equivalent: a combination of Add and Mul nodes +struct BatchNormalizationOperator : Operator { + BatchNormalizationOperator() + : Operator(OperatorType::kBatchNormalization), + global_normalization(false) {} + bool global_normalization; +}; + +// L2-normalization operator. +// +// Inputs: +// inputs[0]: required: the input activations array +// +// TensorFlow equivalent: none. In TensorFlow, L2 normalization is implemented +// by a sub-graph of operators implementing L2-normalization +// from lower-level arithmetic nodes; during tooling, we identify such +// sub-graphs +// and replace them by L2NormalizationOperator's. See IdentifyL2Normalization. +struct L2NormalizationOperator : Operator { + L2NormalizationOperator() : Operator(OperatorType::kL2Normalization) {} +}; + +// LSTM Cell operator. +// +// Inputs: +// inputs[0]: required: the input data array +// inputs[1]: required: the previous output activations array +// inputs[2]: required: the learned weights array +// inputs[3]: required: the learned biases array +// inputs[4]: required: the previous output state +// outputs[0]: required: the output activations array +// outputs[1]: required: the new state array +// +// TensorFlow equivalent: none. In TensorFlow, an LSTM is implemented +// with a sub-graph of lower-level arithmetic nodes; during tooling, we identify +// such sub-graphs and replace them with LstmCells. See IdentifyLstmCell(). +struct LstmCellOperator : Operator { + enum Inputs { + DATA_INPUT = 0, + PREV_ACTIV_INPUT = 1, + WEIGHTS_INPUT = 2, + BIASES_INPUT = 3, + PREV_STATE_INPUT = 4, + NUM_INPUTS = 5 + }; + enum Outputs { + ACTIV_OUTPUT = 0, + STATE_OUTPUT = 1, + CONCAT_TEMP = 2, + ACTIV_TEMP = 3, + NUM_OUTPUTS = 4 + }; + LstmCellOperator() : Operator(OperatorType::kLstmCell) {} +}; + +// Element-wise multiplication operator. +// +// Inputs: +// inputs[0]: required: the left-hand side array +// inputs[1]: required: the right-hand side array +// +// TensorFlow equivalent: Mul +struct MulOperator : Operator { + MulOperator() : Operator(OperatorType::kMul) {} +}; + +// Element-wise Relu operator: +// x -> max(0, x) +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: Relu +struct ReluOperator : Operator { + ReluOperator() : Operator(OperatorType::kRelu) {} +}; + +// Element-wise Relu1 operator: +// x -> min(max(x, -1), 1) +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: none. We can construct the operator with Minimum +// and Maximum operations +struct Relu1Operator : Operator { + Relu1Operator() : Operator(OperatorType::kRelu1) {} +}; + +// Element-wise Relu6 operator: +// x -> max(0, min(6, x)) +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: Relu6 +struct Relu6Operator : Operator { + Relu6Operator() : Operator(OperatorType::kRelu6) {} +}; + +// Element-wise Logistic operator: +// x -> Logistic(x) = 1 / (1 + exp(-x)) +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: Sigmoid +struct LogisticOperator : Operator { + LogisticOperator() : Operator(OperatorType::kLogistic) {} +}; + +// Element-wise Tanh operator: +// x -> Tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x)) +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: Tanh +struct TanhOperator : Operator { + TanhOperator() : Operator(OperatorType::kTanh) {} +}; + +// Element-wise addition operator. +// +// Inputs: +// inputs[0]: required: the left-hand side array +// inputs[1]: required: the right-hand side array +// +// TensorFlow equivalent: Add +struct AddOperator : Operator { + AddOperator() : Operator(OperatorType::kAdd) {} +}; + +// Concatenation operator: concatenates its inputs +// along the concat_dim dimension. +// +// Inputs: this operator accepts any number >= 1 of inputs. +// inputs[i]: the i-th array to concatenate. +// +// TensorFlow equivalent: Concat. +struct ConcatenationOperator : Operator { + ConcatenationOperator() : Operator(OperatorType::kConcatenation) {} + int concat_dim = 0; +}; + +// Reordering dimensions. Used only during tooling to transform graphs from +// the TensorFlow format. +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: none. This is only useful to convert between formats. +struct ReorderAxesOperator : Operator { + ReorderAxesOperator() : Operator(OperatorType::kReorderAxes) {} + AxesOrder input_axes_order; + AxesOrder output_axes_order; +}; + +// Average-pooling operator. +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: AveragePool +struct AveragePoolOperator : Operator { + AveragePoolOperator() : Operator(OperatorType::kAveragePool) {} + Padding padding; + int stride_height = 0; + int stride_width = 0; + int kheight = 0; + int kwidth = 0; +}; + +// Local response normalization operator. +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: LRN +struct LocalResponseNormalizationOperator : Operator { + LocalResponseNormalizationOperator() + : Operator(OperatorType::kLocalResponseNormalization) {} + + int range = 0; + float bias = 0.f; + float alpha = 0.f; + float beta = 0.f; +}; + +// Max-pooling operator. +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: MaxPool +struct MaxPoolOperator : Operator { + MaxPoolOperator() : Operator(OperatorType::kMaxPool) {} + Padding padding; + int stride_height = 0; + int stride_width = 0; + int kheight = 0; + int kwidth = 0; +}; + +// L2-pooling operator. +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: none. Can be shimmed by squaring+avgpool+sqrt. +struct L2PoolOperator : Operator { + L2PoolOperator() : Operator(OperatorType::kL2Pool) {} + Padding padding; + int stride_height = 0; + int stride_width = 0; + int kheight = 0; + int kwidth = 0; +}; + +// The expected [min, max] range of values in a given array. +// Used for quantization only. +// This information typically comes from special nodes found in quantized +// models, +// see FakeQuantOperator, and is used during quantization to resolve +// actual quantization parameters (see QuantizationParams). +struct MinMax { + double min = 0.; + double max = 0.; +}; + +inline bool operator==(const MinMax& m1, const MinMax& m2) { + return m1.min == m2.min && m1.max == m2.max; +} + +// Fake-quantization operator. This does two things: +// - Annotate its input and output arrays with MinMax information, +// - Arithmetic-wise, this operator rounds incoming activation values +// to the nearest representable value on the scale of 256 +// values from the min to the max value dictated by its MinMax info. +// +// Inputs: +// inputs[0]: required: the input array +// inputs[1]: optional: the 'min' value, if it has not yet been resolved +// to a constant. +// inputs[2]: optional: the 'max' value, if it has not yet been resolved +// to a constant. +// +// TensorFlow equivalent: FakeQuantWithMinMaxVars, FakeQuantWithMinMaxArgs. +struct FakeQuantOperator : Operator { + FakeQuantOperator() : Operator(OperatorType::kFakeQuant) {} + std::unique_ptr minmax; +}; + +// Element-wise division operator. +// +// Inputs: +// inputs[0]: required: the left-hand side array +// inputs[1]: required: the right-hand side array +// +// TensorFlow equivalent: Div +struct DivOperator : Operator { + DivOperator() : Operator(OperatorType::kDiv) {} +}; + +// Element-wise identity (x->x) operator. +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: Identity +struct TensorFlowIdentityOperator : Operator { + TensorFlowIdentityOperator() : Operator(OperatorType::kTensorFlowIdentity) {} +}; + +// General matrix multiplication operator. We don't want to support general +// matrix multiplication at inference time, so we resolve it during tooling +// to more specific operator types, namely, FullyConnected. +// +// Inputs: +// inputs[0]: required: the left-hand side matrix +// inputs[1]: required: the right-hand side matrix +// +// TensorFlow equivalent: MatMul +struct TensorFlowMatMulOperator : Operator { + TensorFlowMatMulOperator() : Operator(OperatorType::kTensorFlowMatMul) {} +}; + +// Padding operator. Pads a tensor with zeros. +// +// Inputs: +// inputs[0]: required: the input array +// inputs[1]: required: the padding array +// +// This operation pads a `input` with zeros according to the `paddings` you +// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the +// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates +// how many zeros to add before the contents of `input` in that dimension, and +// `paddings[D, 1]` indicates how many zeros to add after the contents of +// `input` in that dimension. +// +// TensorFlow equivalent: Pad +struct PadOperator : Operator { + PadOperator() : Operator(OperatorType::kPad) {} + + std::vector left_padding; + std::vector right_padding; +}; + +// Strided slice operator. +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: StridedSlice +struct StridedSliceOperator : Operator { + StridedSliceOperator() : Operator(OperatorType::kStridedSlice) {} + + std::vector start_indices; + std::vector stop_indices; + std::vector strides; + + int begin_mask; + int ellipsis_mask; + int end_mask; + int new_axis_mask; + int shrink_axis_mask; +}; + +// Reshaping operator, reshaping its input array to a two-dimensional shape +// (a "matrix"). This is used in the TensorFlow format, in conjunction with +// MatMul nodes, to implement fully-connected layers. +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: Reshape --- except that we only support a special case +// here, where the output shape is a matrix (2D) shape. +struct TensorFlowReshapeOperator : Operator { + TensorFlowReshapeOperator() : Operator(OperatorType::kTensorFlowReshape) {} + std::vector shape; +}; + +// Removes dimensions of size 1 from the shape of a tensor. +// https://www.tensorflow.org/api_docs/python/tf/squeeze +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: Squeeze +struct SqueezeOperator : Operator { + SqueezeOperator() : Operator(OperatorType::kSqueeze) {} + + std::vector squeeze_dims; +}; + +// Element-wise reciprocal-square-root (x^-0.5) operator. +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: Rsqrt +struct TensorFlowRsqrtOperator : Operator { + TensorFlowRsqrtOperator() : Operator(OperatorType::kTensorFlowRsqrt) {} +}; + +// Shape operator. Extracts the shape of the tensor. +// +// Inputs: +// inputs[0]: required: the input array +// +// This operation outputs a 1-D integer tensor representing the shape of +// the input. +// +// TensorFlow equivalent: Shape. We currently assume that the output is int32 +// and not int64. The output type could be stored herein. +struct TensorFlowShapeOperator : Operator { + TensorFlowShapeOperator() : Operator(OperatorType::kTensorFlowShape) {} +}; + +// Element-wise square-root (x^0.5) operator. +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: Sqrt +struct TensorFlowSqrtOperator : Operator { + TensorFlowSqrtOperator() : Operator(OperatorType::kTensorFlowSqrt) {} +}; + +// Element-wise square (x*x) operator. +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: Square +struct TensorFlowSquareOperator : Operator { + TensorFlowSquareOperator() : Operator(OperatorType::kTensorFlowSquare) {} +}; + +// Element-wise subtraction operator. +// +// Inputs: +// inputs[0]: required: the left-hand side array +// inputs[1]: required: the right-hand side array +// +// TensorFlow equivalent: Sub +struct SubOperator : Operator { + SubOperator() : Operator(OperatorType::kSub) {} +}; + +// Global sum reduction: computes the sum of all of entries in the input array. +// Thus the output is "0-dimensional": it consists of a single scalar value. +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: Sum --- except that we only support the special case +// of global reduction across all dimensions. +struct TensorFlowSumOperator : Operator { + TensorFlowSumOperator() : Operator(OperatorType::kTensorFlowSum) {} +}; + +// TensorFlow Tile equivalent. Refer to TensorFlow documentation for details. +// Not fully supported, just a placeholder to handle TensorFlow graphs and +// support graph transformations to other operator types by matching sub-graphs. +struct TensorFlowTileOperator : Operator { + TensorFlowTileOperator() : Operator(OperatorType::kTensorFlowTile) {} +}; + +// TensorFlow Slice equivalent. Refer to TensorFlow documentation for details. +struct SliceOperator : Operator { + SliceOperator() : Operator(OperatorType::kSlice) {} + + std::vector begin; + std::vector size; +}; + +// TensorFlow Split equivalent. Refer to TensorFlow documentation for details. +// Not fully supported, just a placeholder to handle TensorFlow graphs and +// support graph transformations to other operator types by matching sub-graphs. +struct TensorFlowSplitOperator : Operator { + TensorFlowSplitOperator() : Operator(OperatorType::kTensorFlowSplit) {} + int num_split = 0; +}; + +// TensorFlow Concat equivalent. Refer to TensorFlow documentation for details. +// Not fully supported, just a placeholder to handle TensorFlow graphs and +// support graph transformations to other operator types by matching sub-graphs. +// Concretely, once the concat dim becomes known, if it is the depth +// dimension then we can change this op into a DepthConcatenation op. +// Otherwise, we hope for some other graph transformation to drop this node. +struct TensorFlowConcatOperator : Operator { + TensorFlowConcatOperator() : Operator(OperatorType::kTensorFlowConcat) {} +}; + +// TensorFlow ConcatV2 equivalent. Refer to TensorFlow documentation for +// details. +// Not fully supported, just a placeholder to handle TensorFlow graphs and +// support graph transformations to other operator types by matching sub-graphs. +// Concretely, once the concat dim becomes known, if it is the depth +// dimension then we can change this op into a DepthConcatenation op. +// Otherwise, we hope for some other graph transformation to drop this node. +struct TensorFlowConcatV2Operator : Operator { + TensorFlowConcatV2Operator() : Operator(OperatorType::kTensorFlowConcatV2) {} +}; + +// TensorFlow Merge equivalent. Refer to TensorFlow documentation for details. +// +// Inputs: this operator accepts any number >= 1 of inputs. +// inputs[i]: the i-th array to merge. +// +// It is expected that graph transformations will drop all but exactly one +// of the inputs, at which point the Merge node will be equivalent to an +// Identity node forwarding the remaining input. +// +// Note: We do not currently support runtime control flow: we only support +// control flow that can be resolved at tooling time (independently of input +// activations). +struct TensorFlowMergeOperator : Operator { + TensorFlowMergeOperator() : Operator(OperatorType::kTensorFlowMerge) {} +}; + +// TensorFlow Switch equivalent. Refer to TensorFlow documentation for details. +// +// Inputs: +// inputs[0]: required: the input array +// inputs[1]: required: the boolean predicate, given as an array of size 1 +// and of type kBool, will determine which output gets selected. +// +// Outputs: a TensorFlow Switch node always has exactly two outputs. Depending +// on the boolean value that the input predicate resolves to (see note below), +// one or the other of the outputs will be 'selected': the input array will be +// forwarded to the 'selected output' as if by a Identity node, while the other +// output will be discarded, and any graph edge connecting that discarded output +// will be dropped. The rule for selecting outputs is as follows: +// outputs[0] will be selected if the input predicate resolves to 'true'. +// outputs[1] will be selected if the input predicate resolves to 'false'. +// +// Note: We do not currently support runtime control flow: we only support +// control flow that can be resolved at tooling time (independently of input +// activations). +struct TensorFlowSwitchOperator : Operator { + TensorFlowSwitchOperator() : Operator(OperatorType::kTensorFlowSwitch) {} +}; + +// TensorFlow All equivalent. Refer to TensorFlow documentation for details. +// Not fully supported, just a placeholder to handle TensorFlow graphs and +// support graph transformations to other operator types by matching sub-graphs. +// Typically, this is only used as an input to an Assert node, so can be +// removed as an unused node as we drop Assert nodes. +struct TensorFlowAllOperator : Operator { + TensorFlowAllOperator() : Operator(OperatorType::kTensorFlowAll) {} +}; + +// TensorFlow Assert equivalent. Refer to TensorFlow documentation for details. +// Not fully supported, just a placeholder to handle TensorFlow graphs and +// support graph transformations to other operator types by matching sub-graphs. +// Typically, we just drop Assert nodes. +struct TensorFlowAssertOperator : Operator { + TensorFlowAssertOperator() : Operator(OperatorType::kTensorFlowAssert) {} +}; + +// TensorFlow Less equivalent. Refer to TensorFlow documentation for details. +// Not fully supported, just a placeholder to handle TensorFlow graphs and +// support graph transformations to other operator types by matching sub-graphs. +// Typically, this is only used as an input to an Assert node, so can be +// removed as an unused node as we drop Assert nodes. +struct TensorFlowLessOperator : Operator { + TensorFlowLessOperator() : Operator(OperatorType::kTensorFlowLess) {} +}; + +// TensorFlow LessEqual equivalent. Refer to TensorFlow documentation for +// details. +// Not fully supported, just a placeholder to handle TensorFlow graphs and +// support graph transformations to other operator types by matching sub-graphs. +// Typically, this is only used as an input to an Assert node, so can be +// removed as an unused node as we drop Assert nodes. +struct TensorFlowLessEqualOperator : Operator { + TensorFlowLessEqualOperator() + : Operator(OperatorType::kTensorFlowLessEqual) {} +}; + +// TensorFlow Less equivalent. Refer to TensorFlow documentation for details. +// Not fully supported, just a placeholder to handle TensorFlow graphs and +// support graph transformations to other operator types by matching sub-graphs. +// Typically, this is only used as an input to an Assert node, so can be +// removed as an unused node as we drop Assert nodes. +struct TensorFlowGreaterOperator : Operator { + TensorFlowGreaterOperator() : Operator(OperatorType::kTensorFlowGreater) {} +}; + +// TensorFlow GreaterEqual equivalent. Refer to TensorFlow documentation for +// details. +// Not fully supported, just a placeholder to handle TensorFlow graphs and +// support graph transformations to other operator types by matching sub-graphs. +// Typically, this is only used as an input to an Assert node, so can be +// removed as an unused node as we drop Assert nodes. +struct TensorFlowGreaterEqualOperator : Operator { + TensorFlowGreaterEqualOperator() + : Operator(OperatorType::kTensorFlowGreaterEqual) {} +}; + +// Global max reduction: computes the max of all of entries in the input array. +// Thus the output is "0-dimensional": it consists of a single scalar value. +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: Max --- except that we only support the special case +// of global reduction across all dimensions. +struct TensorFlowMaxOperator : Operator { + TensorFlowMaxOperator() : Operator(OperatorType::kTensorFlowMax) {} +}; + +// Global min reduction: computes the min of all of entries in the input array. +// Thus the output is "0-dimensional": it consists of a single scalar value. +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: Min --- except that we only support the special case +// of global reduction across all dimensions. +struct TensorFlowMinOperator : Operator { + TensorFlowMinOperator() : Operator(OperatorType::kTensorFlowMin) {} +}; + +// Element-wise maximum operator. Currently it only supports scalar as +// the second operand. +// +// Inputs: +// inputs[0]: required: the left-hand side array +// inputs[1]: required: the right-hand side array +// +// TensorFlow equivalent: Maximum +struct TensorFlowMaximumOperator : Operator { + TensorFlowMaximumOperator() : Operator(OperatorType::kTensorFlowMaximum) {} +}; + +// Element-wise minimum operator. Currently it only supports scalar as +// the second operand. +// +// Inputs: +// inputs[0]: required: the left-hand side array +// inputs[1]: required: the right-hand side array +// +// TensorFlow equivalent: Minimum +struct TensorFlowMinimumOperator : Operator { + TensorFlowMinimumOperator() : Operator(OperatorType::kTensorFlowMinimum) {} +}; + +// General TF operation, unsupported by tf.mini. Expected to be dropped by +// graph transformations. +struct TensorFlowUnsupportedOperator : Operator { + TensorFlowUnsupportedOperator() + : Operator(OperatorType::kTensorFlowUnsupported) {} + + // The original TF operation type. Used for diagnostic purposes. + string tensorflow_op; + // A serialized tensorflow::NodeDef string. + string tensorflow_node_def; + // A boolean indicating if the unsupported op should be treated as quantized. + bool quantized = false; + // Output data types + std::vector output_data_types; +}; + +// Softmax activation function. +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: Softmax +struct SoftmaxOperator : Operator { + SoftmaxOperator() : Operator(OperatorType::kSoftmax) {} + float beta = 0.f; +}; + +// Cast operator. +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: Cast +struct CastOperator : Operator { + CastOperator() : Operator(OperatorType::kCast) {} + ArrayDataType src_data_type = ArrayDataType::kNone; + ArrayDataType dst_data_type = ArrayDataType::kNone; +}; + +// Floor operator. +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: Floor +struct FloorOperator : Operator { + FloorOperator() : Operator(OperatorType::kFloor) {} +}; + +// Gather operator. It gathers slices from params according to indices. +// Only 1-D indices are supported at the moment. +// +// Inputs: +// inputs[0]: required: the params array +// inputs[1]: required: the indices to gather +// +// TensorFlow equivalent: Gather +struct GatherOperator : Operator { + GatherOperator() : Operator(OperatorType::kGather) {} + int input_rank; +}; + +// ResizeBilinear operator. It resizes input images with bilinear interpolation. +// It does not support align_corners at the moment. +// +// Inputs: +// inputs[0]: required: the input array +// inputs[1]: required: the new image size +// +// TensorFlow equivalent: ResizeBilinear +struct ResizeBilinearOperator : Operator { + ResizeBilinearOperator() : Operator(OperatorType::kResizeBilinear) {} +}; + +// SpaceToBatchND operator. It divides spatial dimensions into a grid of +// blocks and interleaves these blocks with the batch dimension. Currently, +// only 2-d blocks are supported. +// +// Inputs: +// inputs[0]: required: the input array +// inputs[1]: required: the block shape +// inputs[2]: required: the paddings +// +// TensorFlow equivalent: SpaceToBatchND +struct SpaceToBatchNDOperator : Operator { + SpaceToBatchNDOperator() : Operator(OperatorType::kSpaceToBatchND) {} +}; + +// BatchToSpaceND operator. Rearranges data from batch into blocks of +// spatial data. Currently, only 2-d blocks are supported. Cropping is not +// supported, either, and the crops array should be all zero. +// +// Inputs: +// inputs[0]: required: the input array +// inputs[1]: required: the block shape +// inputs[2]: required: the crops +// +// TensorFlow equivalent: BatchToSpaceND +struct BatchToSpaceNDOperator : Operator { + BatchToSpaceNDOperator() : Operator(OperatorType::kBatchToSpaceND) {} +}; + +// Mean operator. +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: Mean +struct MeanOperator : Operator { + MeanOperator() : Operator(OperatorType::kMean) {} + + std::vector reduction_indices; +}; + +// Svdf operator: +// +// Inputs: +// inputs[0]: required: the input array +// inputs[1]: required: weights_feature +// inputs[2]: required: weights_time +// inputs[3]: optional: bias +struct SvdfOperator : Operator { + SvdfOperator() : Operator(OperatorType::kSvdf) {} + int rank; +}; + +// Alloc's are used for transient arrays only. An Alloc specifies which interval +// of the "transient_data" workspace buffer passed to inference functions, is to +// be used for the transient array at hand. The 'start' and 'end' values are +// offsets from the start of the workspace buffer, expressed in bytes. +struct Alloc { + int start = 0; + int end = 0; +}; + +inline bool operator<(const Alloc& a, const Alloc& b) { + return a.start < b.start; +} + +// Quantization parameters, determining the mapping of quantized values +// to real values (i.e. determining how quantized values are mathematically +// interpreted). +// +// The correspondence is as follows: +// +// real_value = scale * (quantized_value - zero_point); +// +// In other words, zero_point designates which quantized value corresponds to +// the real 0 value, and scale designates the difference between the real values +// corresponding to consecutive quantized values differing by 1. +struct QuantizationParams { + int32 zero_point = 0; + double scale = 0.; +}; + +class Shape { + public: + // For Shape, we stick to half-way encapsulation for now: + // we hide the raw dims_ member, but expose it raw by accessors + // because from some brainstorming, it's not at all easy to + // anticipate which flavor of more hermetic encapsulation would + // actually buy us future-proof-ness without being needlessly + // cumbersome. + Shape() {} + Shape(std::initializer_list dim_list) : dims_(dim_list) {} + + void ReplaceDims(std::initializer_list dim_list) { + dims_ = std::vector(dim_list); + } + + const std::vector& dims() const { return dims_; } + std::vector* mutable_dims() { return &dims_; } + const int dimensions_count() const { return dims_.size(); } + + // We still have that one convenience accessor to avoid + // the awkward double bracket issue: shape.dims()[i]. + int dims(int i) const { return dims_[i]; } + + bool operator==(const Shape& comp) const { + return (this->dims_ == comp.dims()); + } + + bool operator!=(const Shape& comp) const { return !((*this) == comp); } + + private: + std::vector dims_; +}; + +// Array represents an array (either a constant parameter array or an +// activations array) in a Model. +struct Array { + template + const Buffer& GetBuffer() const { + DCHECK(buffer); + DCHECK(buffer->type == A); + return *static_cast*>(buffer.get()); + } + template + Buffer& GetMutableBuffer() { + if (!buffer) { + Buffer* ptr = new Buffer; + buffer = std::unique_ptr(ptr); + } + DCHECK(buffer); + DCHECK(buffer->type == A); + return *static_cast*>(buffer.get()); + } + Alloc& GetOrCreateAlloc() { + if (!alloc) { + alloc = std::unique_ptr(new Alloc); + } + return *alloc; + } + MinMax& GetOrCreateMinMax() { + if (!minmax) { + minmax = std::unique_ptr(new MinMax); + } + return *minmax; + } + MinMax& GetMinMax() const { + DCHECK(minmax); + return *minmax; + } + QuantizationParams& GetOrCreateQuantizationParams() { + if (!quantization_params) { + quantization_params = + std::unique_ptr(new QuantizationParams); + } + return *quantization_params; + } + QuantizationParams& GetQuantizationParams() const { + DCHECK(quantization_params); + return *quantization_params; + } + + // The data type of the actual elements of this array, that is: + // - If there is a buffer (see 'buffer' member), it must be of the same + // type. + // - If there is no buffer, meaning that this is a runtime (i.e. activations) + // array, then this specifies the type of elements that there will be + // at runtime. + // + // Note that this only specifies the storage type of elements; this does + // not specify whether these are to be treated as 'real' or 'quantized' + // values. + // That is decided by whether the 'quantization_params' member is null. + ArrayDataType data_type = ArrayDataType::kNone; + // The final value that data_type should have at the end of graph + // transformations + ArrayDataType final_data_type = ArrayDataType::kNone; + // The dimensions of this array --- this specifies both sizes and strides + // (the storage layout). + // + // Issues with shape handling that remain include: + // - No way to distinguish between 0-dimensional dims and missing dims. + // - No way to describe dims that may be runtime-variable. + // - Addressing of dims by integer index differs in different graph formats + // (TensorFlow vs. other frameworks vs. what we have informally grown + // within toco). + // This is currently quite messy; see ReorderAxesOperator which is how we + // bridge some of these discrepancies at the moment. This is overdue for + // a redesign; I'm thinking that it would be nice to have more flexible + // dims that allow mapping 1:1, cleanly, dims as they are in various + // formats, + // then explicitly convert between different conventions. + + // Proto-style accessors + bool has_shape() const { return array_shape != nullptr; } + const Shape& shape() const { + CHECK(has_shape()); + return *array_shape; + } + Shape* mutable_shape() { + if (!array_shape) { + array_shape.reset(new Shape); + } + return array_shape.get(); + } + void copy_shape(const Shape& src_shape) { *mutable_shape() = src_shape; } + void clear_shape() { array_shape = nullptr; } + + // The constant buffer backing this array. This is non-null if and only if + // this is a constant parameter array. Conversely, this is null for + // activations arrays. + // + // Note that this buffer is pure storage. In the case of quantized values, + // it only stores the quantized values, it does not know by itself about the + // quantization parameters necessary to interprete these values, that is + // in the separate 'quantization_params' field. In fact, this 'buffer' field + // does no even know whether values are quantized. It only has a data_type, + // which must equal the 'data_type' member here, and which only describes + // the storage type of element, does not tell whether they are quantized i.e. + // whether they are to be interpreted with quantization_params. + std::unique_ptr buffer; + // Only for activation arrays (i.e. when 'buffer' is null). + // Only for code generation. + // + // Describes the allocation of this array within the workspace buffer + // allocated + // for all transient arrays. + std::unique_ptr alloc; + // Describes the [min, max] range of values + // to be assumed when determining quantization_params. + // + // Only used for quantization. In fact, only used for determining + // quantization_params. + // + // Used for both constant arrays (those having a 'buffer') and non-constant + // arrays (activations). Indeed, it is important to use the same min-max range + // as was used during training, even if that min-max range is slightly wrong + // w.r.t. actual buffer elements. Doing otherwise would defeat the point of + // re-training for quantization. + std::unique_ptr minmax; + // Quantization parameters. The non-null-ness of this pointer is what + // defines whether this array is quantized or not. + // + // If this is non-null, then these quantization parameters are to be used + // to assign a meaning as real numbers to the elements of this array. + std::unique_ptr quantization_params; + + private: + std::unique_ptr array_shape; +}; + +// Our Model struct, represents an entire model (our "top-level" struct). +// Owns everything. +struct Model { + Array& GetArray(const string& name) const { + DCHECK(arrays.count(name)); + return *arrays.at(name); + } + Array& GetOrCreateArray(const string& name) { + if (!arrays.count(name)) { + Array* ptr = new Array; + arrays[name] = std::unique_ptr(ptr); + } + Array& result = GetArray(name); + return result; + } + + // The list of operators. Notice how it's a list of unique_ptr's, implying + // that the Model is what owns Operator's and keeps them alive. + std::vector> operators; + // The associative array mapping names to Array's. + // Notice how it's a container of unique_ptr's, implying + // that the Model is what owns Array's and keeps them alive. + // The Operator's refer to these Array's by their name strings, not by their + // addresses. See Operator::inputs, Operator::outputs. + std::unordered_map> arrays; + // Generic flags, a place where we combine information passed to us via + // command-line parameters (e.g. --input_width=N) with information that + // we may or may not find in the input model file. + ModelFlags flags; + // For code-generation only: required size of the transient_data buffer + std::size_t transient_data_size = 0; + // For code-generation only: required alignment of the transient_data buffer + std::size_t transient_data_alignment = 0; +}; +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_MODEL_H_ diff --git a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc new file mode 100644 index 0000000000..699c95753f --- /dev/null +++ b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc @@ -0,0 +1,374 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/model_cmdline_flags.h" + +#include +#include + +#include "absl/strings/ascii.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_join.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/strings/strip.h" +#include "tensorflow/contrib/lite/toco/args.h" +#include "tensorflow/contrib/lite/toco/toco_graphviz_dump_options.h" +#include "tensorflow/contrib/lite/toco/toco_port.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/util/command_line_flags.h" +// "batch" flag only exists internally +#ifdef PLATFORM_GOOGLE +#include "base/commandlineflags.h" +#endif + +namespace toco { + +bool ParseModelFlagsFromCommandLineFlags( + int* argc, char* argv[], string* msg, + ParsedModelFlags* parsed_model_flags_ptr) { + ParsedModelFlags& parsed_flags = *parsed_model_flags_ptr; + using tensorflow::Flag; + std::vector flags = { + Flag("input_array", parsed_flags.input_array.bind(), + parsed_flags.input_array.default_value(), + "Name of the input array. If not specified, will try to read " + "that information from the input file."), + Flag("input_arrays", parsed_flags.input_arrays.bind(), + parsed_flags.input_arrays.default_value(), + "Names of the output arrays, comma-separated. If not specified, " + "will try to read that information from the input file."), + Flag("output_array", parsed_flags.output_array.bind(), + parsed_flags.output_array.default_value(), + "Name of the output array, when specifying a unique output array. " + "If not specified, will try to read that information from the " + "input file."), + Flag("output_arrays", parsed_flags.output_arrays.bind(), + parsed_flags.output_arrays.default_value(), + "Names of the output arrays, comma-separated. " + "If not specified, will try to read " + "that information from the input file."), + Flag("input_shape", parsed_flags.input_shape.bind(), + parsed_flags.output_arrays.default_value(), + "Input array shape. For many models the shape takes the form " + "batch size, input array height, input array width, input array " + "depth."), + Flag("input_shapes", parsed_flags.input_shapes.bind(), + parsed_flags.input_shapes.default_value(), + "Shapes corresponding to --input_arrays, colon-separated. For " + "many models each shape takes the form batch size, input array " + "height, input array width, input array depth."), + Flag("mean_value", parsed_flags.mean_value.bind(), + parsed_flags.mean_value.default_value(), + "mean_value parameter for image models, used to compute input " + "activations from input pixel data."), + Flag("mean_values", parsed_flags.mean_values.bind(), + parsed_flags.mean_values.default_value(), + "mean_values parameter for image models, comma-separated list of " + "doubles, used to compute input activations from input pixel " + "data. Each entry in the list should match an entry in " + "--input_arrays."), + Flag("std_value", parsed_flags.std_value.bind(), + parsed_flags.std_value.default_value(), + "std_value parameter for image models, used to compute input " + "activations from input pixel data."), + Flag("std_values", parsed_flags.std_values.bind(), + parsed_flags.std_values.default_value(), + "std_value parameter for image models, comma-separated list of " + "doubles, used to compute input activations from input pixel " + "data. Each entry in the list should match an entry in " + "--input_arrays."), + Flag("variable_batch", parsed_flags.variable_batch.bind(), + parsed_flags.variable_batch.default_value(), + "If true, the model accepts an arbitrary batch size. Mutually " + "exclusive " + "with the 'batch' field: at most one of these two fields can be " + "set."), + Flag( + "drop_control_dependency", + parsed_flags.drop_control_dependency.bind(), + parsed_flags.drop_control_dependency.default_value(), + "If true, ignore control dependency requirements in input TensorFlow " + "GraphDef. Otherwise an error will be raised upon control dependency " + "inputs."), + Flag("rnn_states", parsed_flags.rnn_states.bind(), + parsed_flags.rnn_states.default_value(), ""), + Flag("model_checks", parsed_flags.model_checks.bind(), + parsed_flags.model_checks.default_value(), + "A list of model checks to be applied to verify the form of the " + "model. Applied after the graph transformations after import."), + Flag("graphviz_first_array", parsed_flags.graphviz_first_array.bind(), + parsed_flags.graphviz_first_array.default_value(), + "If set, defines the start of the sub-graph to be dumped to " + "GraphViz."), + Flag( + "graphviz_last_array", parsed_flags.graphviz_last_array.bind(), + parsed_flags.graphviz_last_array.default_value(), + "If set, defines the end of the sub-graph to be dumped to GraphViz."), + Flag("dump_graphviz", parsed_flags.dump_graphviz.bind(), + parsed_flags.dump_graphviz.default_value(), + "Dump graphviz during LogDump call. If string is non-empty then " + "it defines path to dump, otherwise will skip dumping."), + Flag("dump_graphviz_video", parsed_flags.dump_graphviz_video.bind(), + parsed_flags.dump_graphviz_video.default_value(), + "If true, will dump graphviz at each " + "graph transformation, which may be used to generate a video."), + }; + bool asked_for_help = + *argc == 2 && (!strcmp(argv[1], "--help") || !strcmp(argv[1], "-help")); + if (asked_for_help) { + *msg += tensorflow::Flags::Usage(argv[0], flags); + return false; + } else { + if (!tensorflow::Flags::Parse(argc, argv, flags)) return false; + } + auto& dump_options = *GraphVizDumpOptions::singleton(); + dump_options.graphviz_first_array = parsed_flags.graphviz_first_array.value(); + dump_options.graphviz_last_array = parsed_flags.graphviz_last_array.value(); + dump_options.dump_graphviz_video = parsed_flags.dump_graphviz_video.value(); + dump_options.dump_graphviz = parsed_flags.dump_graphviz.value(); + + return true; +} + +void ReadModelFlagsFromCommandLineFlags( + const ParsedModelFlags& parsed_model_flags, ModelFlags* model_flags) { + toco::port::CheckInitGoogleIsDone("InitGoogle is not done yet"); + +// "batch" flag only exists internally +#ifdef PLATFORM_GOOGLE + CHECK(!((base::SpecifiedOnCommandLine("batch") && + parsed_model_flags.variable_batch.specified()))) + << "The --batch and --variable_batch flags are mutually exclusive."; +#endif + CHECK(!(parsed_model_flags.output_array.specified() && + parsed_model_flags.output_arrays.specified())) + << "The --output_array and --vs flags are mutually exclusive."; + + if (parsed_model_flags.output_array.specified()) { + model_flags->add_output_arrays(parsed_model_flags.output_array.value()); + } + + if (parsed_model_flags.output_arrays.specified()) { + std::vector output_arrays = + absl::StrSplit(parsed_model_flags.output_arrays.value(), ','); + for (const string& output_array : output_arrays) { + model_flags->add_output_arrays(output_array); + } + } + + const bool uses_single_input_flags = + parsed_model_flags.input_array.specified() || + parsed_model_flags.mean_value.specified() || + parsed_model_flags.std_value.specified() || + parsed_model_flags.input_shape.specified(); + + const bool uses_multi_input_flags = + parsed_model_flags.input_arrays.specified() || + parsed_model_flags.mean_values.specified() || + parsed_model_flags.std_values.specified() || + parsed_model_flags.input_shapes.specified(); + + QCHECK(!(uses_single_input_flags && uses_multi_input_flags)) + << "Use either the singular-form input flags (--input_array, " + "--input_shape, --mean_value, --std_value) or the plural form input " + "flags (--input_arrays, --input_shapes, --mean_values, --std_values), " + "but not both forms within the same command line."; + + if (parsed_model_flags.input_array.specified()) { + QCHECK(uses_single_input_flags); + model_flags->add_input_arrays()->set_name( + parsed_model_flags.input_array.value()); + } + if (parsed_model_flags.input_arrays.specified()) { + QCHECK(uses_multi_input_flags); + for (const auto& input_array : + absl::StrSplit(parsed_model_flags.input_arrays.value(), ',')) { + model_flags->add_input_arrays()->set_name(string(input_array)); + } + } + if (parsed_model_flags.mean_value.specified()) { + QCHECK(uses_single_input_flags); + model_flags->mutable_input_arrays(0)->set_mean_value( + parsed_model_flags.mean_value.value()); + } + if (parsed_model_flags.mean_values.specified()) { + QCHECK(uses_multi_input_flags); + std::vector mean_values = + absl::StrSplit(parsed_model_flags.mean_values.value(), ','); + QCHECK(mean_values.size() == model_flags->input_arrays_size()); + for (int i = 0; i < mean_values.size(); ++i) { + char* last = nullptr; + model_flags->mutable_input_arrays(i)->set_mean_value( + strtod(mean_values[i].data(), &last)); + CHECK(last != mean_values[i].data()); + } + } + if (parsed_model_flags.std_value.specified()) { + QCHECK(uses_single_input_flags); + model_flags->mutable_input_arrays(0)->set_std_value( + parsed_model_flags.std_value.value()); + } + if (parsed_model_flags.std_values.specified()) { + QCHECK(uses_multi_input_flags); + std::vector std_values = + absl::StrSplit(parsed_model_flags.std_values.value(), ','); + QCHECK(std_values.size() == model_flags->input_arrays_size()); + for (int i = 0; i < std_values.size(); ++i) { + char* last = nullptr; + model_flags->mutable_input_arrays(i)->set_std_value( + strtod(std_values[i].data(), &last)); + CHECK(last != std_values[i].data()); + } + } + if (parsed_model_flags.input_shape.specified()) { + QCHECK(uses_single_input_flags); + if (model_flags->input_arrays().empty()) { + model_flags->add_input_arrays(); + } + auto* shape = model_flags->mutable_input_arrays(0)->mutable_shape(); + shape->Clear(); + const IntList& list = parsed_model_flags.input_shape.value(); + for (auto& dim : list.elements) { + shape->Add(dim); + } + } + if (parsed_model_flags.input_shapes.specified()) { + QCHECK(uses_multi_input_flags); + std::vector input_shapes = + absl::StrSplit(parsed_model_flags.input_shapes.value(), ':'); + QCHECK(input_shapes.size() == model_flags->input_arrays_size()); + for (int i = 0; i < input_shapes.size(); ++i) { + auto* shape = model_flags->mutable_input_arrays(i)->mutable_shape(); + shape->Clear(); + if (input_shapes[i].empty()) { + // empty i.e. 0-dimensional input shape. + // Unfortunately, the current toco::InputArray + // proto does not allow to distinguish between a known 0-D shape, + // and an unknown shape. Indeed, shape is currently a plain array, + // and it being empty means unknown shape. So here, we import a + // 0-D shape as a 1-D shape of size. + // TODO(benoitjacob): fix toco::InputArray to allow 0-D shape, + // probably by making shape an optional message, + // encapsulating the array. + shape->Add(1); + } else { + for (const auto& dim_str : absl::StrSplit(input_shapes[i], ',')) { + int size; + CHECK(absl::SimpleAtoi(dim_str, &size)) + << "Failed to parse input_shape: " << input_shapes[i]; + shape->Add(size); + } + } + } + } + +#define READ_MODEL_FLAG(name) \ + do { \ + if (parsed_model_flags.name.specified()) { \ + model_flags->set_##name(parsed_model_flags.name.value()); \ + } \ + } while (false) + + READ_MODEL_FLAG(variable_batch); + READ_MODEL_FLAG(drop_control_dependency); + +#undef READ_MODEL_FLAG + + for (const auto& element : parsed_model_flags.rnn_states.value().elements) { + auto* rnn_state_proto = model_flags->add_rnn_states(); + for (const auto& kv_pair : element) { + const string& key = kv_pair.first; + const string& value = kv_pair.second; + if (key == "state_array") { + rnn_state_proto->set_state_array(value); + } else if (key == "back_edge_source_array") { + rnn_state_proto->set_back_edge_source_array(value); + } else if (key == "size") { + int32 size = 0; + CHECK(absl::SimpleAtoi(value, &size)); + CHECK_GT(size, 0); + rnn_state_proto->set_size(size); + } else if (key == "manually_create") { + CHECK_EQ(absl::AsciiStrToLower(value), "true"); + rnn_state_proto->set_manually_create(true); + } else { + LOG(FATAL) << "Unknown key '" << key << "' in --rnn_states"; + } + } + CHECK(rnn_state_proto->has_state_array() && + rnn_state_proto->has_back_edge_source_array() && + rnn_state_proto->has_size()) + << "--rnn_states must include state_array, back_edge_source_array and " + "size."; + } + + for (const auto& element : parsed_model_flags.model_checks.value().elements) { + auto* model_check_proto = model_flags->add_model_checks(); + for (const auto& kv_pair : element) { + const string& key = kv_pair.first; + const string& value = kv_pair.second; + if (key == "count_type") { + model_check_proto->set_count_type(value); + } else if (key == "count_min") { + int32 count = 0; + CHECK(absl::SimpleAtoi(value, &count)); + CHECK_GE(count, -1); + model_check_proto->set_count_min(count); + } else if (key == "count_max") { + int32 count = 0; + CHECK(absl::SimpleAtoi(value, &count)); + CHECK_GE(count, -1); + model_check_proto->set_count_max(count); + } else { + LOG(FATAL) << "Unknown key '" << key << "' in --model_checks"; + } + } + } +} + +ParsedModelFlags* UncheckedGlobalParsedModelFlags(bool must_already_exist) { + static auto* flags = [must_already_exist]() { + if (must_already_exist) { + fprintf(stderr, __FILE__ + ":" + "GlobalParsedModelFlags() used without initialization\n"); + fflush(stderr); + abort(); + } + return new toco::ParsedModelFlags; + }(); + return flags; +} + +ParsedModelFlags* GlobalParsedModelFlags() { + return UncheckedGlobalParsedModelFlags(true); +} + +void ParseModelFlagsOrDie(int* argc, char* argv[]) { + // TODO(aselle): in the future allow Google version to use + // flags, and only use this mechanism for open source + auto* flags = UncheckedGlobalParsedModelFlags(false); + string msg; + bool model_success = + toco::ParseModelFlagsFromCommandLineFlags(argc, argv, &msg, flags); + if (!model_success || !msg.empty()) { + // Log in non-standard way since this happens pre InitGoogle. + fprintf(stderr, "%s", msg.c_str()); + fflush(stderr); + abort(); + } +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/model_cmdline_flags.h b/tensorflow/contrib/lite/toco/model_cmdline_flags.h new file mode 100644 index 0000000000..dfa3d3c1ef --- /dev/null +++ b/tensorflow/contrib/lite/toco/model_cmdline_flags.h @@ -0,0 +1,43 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_MODEL_CMDLINE_FLAGS_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_MODEL_CMDLINE_FLAGS_H_ + +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/args.h" +#include "tensorflow/contrib/lite/toco/model_flags.pb.h" + +namespace toco { +// Parse and remove arguments for models (in toco). Returns true if parsing +// is successful. msg has the usage string if there was an error or +// "--help" was specified +bool ParseModelFlagsFromCommandLineFlags( + int* argc, char* argv[], string* msg, + ParsedModelFlags* parsed_model_flags_ptr); +// Populate the ModelFlags proto with model data. +void ReadModelFlagsFromCommandLineFlags( + const ParsedModelFlags& parsed_model_flags, ModelFlags* model_flags); +// Parse the global model flags to a static +void ParseModelFlagsOrDie(int* argc, char* argv[]); +// Get the global parsed model flags +ParsedModelFlags* GlobalParsedModelFlags(); + +} // namespace toco + + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_MODEL_CMDLINE_FLAGS_H_ diff --git a/tensorflow/contrib/lite/toco/model_flags.proto b/tensorflow/contrib/lite/toco/model_flags.proto new file mode 100644 index 0000000000..743e08b16f --- /dev/null +++ b/tensorflow/contrib/lite/toco/model_flags.proto @@ -0,0 +1,119 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +syntax = "proto2"; + +package toco; + +// Next ID to USE: 5. +message InputArray { + // Name of the input arrays, i.e. the arrays from which input activations + // will be read. + optional string name = 1; + + // Shape of the input. For many applications the dimensions are {batch, + // height, width, depth}. Often the batch is left "unspecified" by providing + // a value of -1. + // + // The last dimension is typically called 'depth' or 'channels'. For example, + // for an image model taking RGB images as input, this would have the value 3. + repeated int32 shape = 2; + + // mean_value and std_value parameters control the interpretation of raw input + // activation values (elements of the input array) as real numbers. The + // mapping is given by: + // + // real_value = (raw_input_value - mean_value) / std_value + // + // In particular, the defaults (mean_value=0, std_value=1) yield + // real_value = raw_input_value. Often, non-default values are used in image + // models. For example, an image model taking uint8 image channel values as + // its raw inputs, in [0, 255] range, may use mean_value=128, std_value=128 to + // map them into the interval [-1, 1). + // + // Note: this matches exactly the meaning of mean_value and std_value in + // (TensorFlow via LegacyFedInput). + optional float mean_value = 3; + optional float std_value = 4 [default = 1.]; +} + +// ModelFlags encodes properties of a model that, depending on the file +// format, may or may not be recorded in the model file. The purpose of +// representing these properties in ModelFlags is to allow passing them +// separately from the input model file, for instance as command-line +// parameters, so that we can offer a single uniform interface that can +// handle files from different input formats. +// +// For each of these properties, and each supported file format, we +// detail in comments below whether the property exists in the given file +// format. +// +// Obsolete flags that have been removed: +// optional int32 input_depth = 3; +// optional int32 input_width = 4; +// optional int32 input_height = 5; +// optional int32 batch = 6 [ default = 1]; +// optional float mean_value = 7; +// optional float std_value = 8 [default = 1.]; +// optional int32 input_dims = 11 [ default = 4]; +// repeated int32 input_shape = 13; +// +// Next ID to USE: 16. +message ModelFlags { + // Information about the input arrays, i.e. the arrays from which input + // activations will be read. + repeated InputArray input_arrays = 1; + + // Name of the output arrays, i.e. the arrays into which output activations + // will be written. + repeated string output_arrays = 2; + + // If true, the model accepts an arbitrary batch size. Mutually exclusive with + // the 'batch' field: at most one of these two fields can be set. + optional bool variable_batch = 10; + + message RnnState { + optional string state_array = 1; + optional string back_edge_source_array = 2; + optional int32 size = 3; + // TODO(benoitjacob): manually_create is a temporary hack: + // due to discrepancies between the current toco dims tracking and + // TensorFlow shapes, for some models we need to manually create RNN state + // arrays with a specified shape. + // Maybe we should actually implement back-edges as operators of their own, + // which would remove the need for much special-casing, including here, + // we could probably consistently let PropagateFixedSizes handle state + // arrays. + optional bool manually_create = 4; + } + repeated RnnState rnn_states = 12; + + // Checks applied to the model, typically after toco's comprehensive + // graph transformations. + // Next ID to USE: 4. + message ModelCheck { + // Use the name of a type of operator to check its counts. + // Use "Total" for overall operator counts. + // Use "Arrays" for overall array counts. + optional string count_type = 1 [default = "None"]; + // A count of zero is a meaningful check, so negative used to mean disable. + optional int32 count_min = 2 [default = -1]; + // If count_max < count_min, then count_min is only allowed value. + optional int32 count_max = 3 [default = -1]; + } + repeated ModelCheck model_checks = 14; + + // If true, ignore control dependency requirements in input TensorFlow + // GraphDef. Otherwise an error will be raised upon control dependency inputs. + optional bool drop_control_dependency = 15; +} diff --git a/tensorflow/contrib/lite/toco/python/BUILD b/tensorflow/contrib/lite/toco/python/BUILD new file mode 100644 index 0000000000..92246a8aed --- /dev/null +++ b/tensorflow/contrib/lite/toco/python/BUILD @@ -0,0 +1,76 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc") +load("//tensorflow:tensorflow.bzl", "tf_py_test") + +cc_library( + name = "toco_python_api", + srcs = ["toco_python_api.cc"], + hdrs = ["toco_python_api.h"], + deps = [ + "//tensorflow/contrib/lite/toco:model_flags_proto_cc", + "//tensorflow/contrib/lite/toco:toco_flags_proto_cc", + "//tensorflow/contrib/lite/toco:toco_port", + "//tensorflow/contrib/lite/toco:toco_tooling", + "//tensorflow/core:lib", + "//util/python:python_headers", + ], +) + +tf_py_wrap_cc( + name = "tensorflow_wrap_toco", + srcs = ["toco.i"], + deps = [ + ":toco_python_api", + "//tensorflow/contrib/lite/toco:model_flags_proto_cc", + "//tensorflow/contrib/lite/toco:toco_flags_proto_cc", + "//util/python:python_headers", + "@com_google_absl//absl/strings", + ], +) + +py_binary( + name = "toco_from_protos", + srcs = ["toco_from_protos.py"], + srcs_version = "PY2AND3", + deps = [ + ":tensorflow_wrap_toco", + "//tensorflow/python:platform", + ], +) + +py_binary( + name = "toco_wrapper", + srcs = ["toco_wrapper.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow:tensorflow_py", + ], +) + +tf_py_test( + name = "toco_from_protos_test", + srcs = ["toco_from_protos_test.py"], + additional_deps = [ + "//tensorflow:tensorflow_py", + "//tensorflow/contrib/lite/toco:model_flags_proto_py", + "//tensorflow/contrib/lite/toco:toco_flags_proto_py", + ], + data = [ + ":toco_from_protos", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/lite/toco/python/toco.i b/tensorflow/contrib/lite/toco/python/toco.i new file mode 100644 index 0000000000..3787cba4a3 --- /dev/null +++ b/tensorflow/contrib/lite/toco/python/toco.i @@ -0,0 +1,32 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +%include "std_string.i" + +%{ +#include "tensorflow/contrib/lite/toco/python/toco_python_api.h" +%} + +namespace toco { + +// Convert a model represented in `input_contents`. `model_flags_proto` +// describes model parameters. `toco_flags_proto` describes conversion +// parameters (see relevant .protos for more information). Returns a string +// representing the contents of the converted model. +PyObject* TocoConvert(PyObject* model_flags_proto_txt_raw, + PyObject* toco_flags_proto_txt_raw, + PyObject* input_contents_txt_raw); + +} // namespace toco \ No newline at end of file diff --git a/tensorflow/contrib/lite/toco/python/toco_from_protos.py b/tensorflow/contrib/lite/toco/python/toco_from_protos.py new file mode 100644 index 0000000000..c0b032083b --- /dev/null +++ b/tensorflow/contrib/lite/toco/python/toco_from_protos.py @@ -0,0 +1,63 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Python console command to invoke TOCO from serialized protos.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys +from tensorflow.contrib.lite.toco.python import tensorflow_wrap_toco +from tensorflow.python.platform import app + +FLAGS = None + + +def execute(unused_args): + model_str = open(FLAGS.model_proto_file, "rb").read() + toco_str = open(FLAGS.toco_proto_file, "rb").read() + input_str = open(FLAGS.model_input_file, "rb").read() + + output_str = tensorflow_wrap_toco.TocoConvert(model_str, toco_str, input_str) + open(FLAGS.model_output_file, "wb").write(output_str) + sys.exit(0) + + +def main(): + global FLAGS + parser = argparse.ArgumentParser( + description="Invoke toco using protos as input.") + parser.add_argument( + "model_proto_file", + type=str, + help="File containing serialized proto that describes the model.") + parser.add_argument( + "toco_proto_file", + type=str, + help="File containing serialized proto describing how TOCO should run.") + parser.add_argument( + "model_input_file", type=str, help="Input model is read from this file.") + parser.add_argument( + "model_output_file", + type=str, + help="Result of applying TOCO conversion is written here.") + + FLAGS, unparsed = parser.parse_known_args() + + app.run(main=execute, argv=[sys.argv[0]] + unparsed) + + +if __name__ == "__main__": + main() diff --git a/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py b/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py new file mode 100644 index 0000000000..2a593beeca --- /dev/null +++ b/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py @@ -0,0 +1,96 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tempfile + +import tensorflow as tf +from tensorflow.contrib.lite.toco import model_flags_pb2 +from tensorflow.contrib.lite.toco import toco_flags_pb2 +from tensorflow.python.platform import googletest +from tensorflow.python.platform import resource_loader + + +def TensorName(x): + """Get the canonical (non foo:0 name).""" + return x.name.split(":")[0] + + +class TocoFromProtosTest(googletest.TestCase): + + def _run(self, sess, in_tensor, out_tensor, should_succeed): + """Use toco binary to check conversion from graphdef to tflite. + + Args: + sess: Active TensorFlow session containing graph. + in_tensor: TensorFlow tensor to use as input. + out_tensor: TensorFlow tensor to use as output. + should_succeed: Whether this is a valid conversion. + """ + # Build all protos and extract graphdef + graph_def = sess.graph_def + toco_flags = toco_flags_pb2.TocoFlags() + toco_flags.input_format = toco_flags_pb2.TENSORFLOW_GRAPHDEF + toco_flags.output_format = toco_flags_pb2.TFLITE + toco_flags.input_types.append(toco_flags_pb2.FLOAT) + toco_flags.inference_type = toco_flags_pb2.FLOAT + model_flags = model_flags_pb2.ModelFlags() + input_array = model_flags.input_arrays.add() + input_array.name = TensorName(in_tensor) + input_array.shape.extend(map(int, in_tensor.get_shape())) + model_flags.output_arrays.append(TensorName(out_tensor)) + # Shell out to run toco (in case it crashes) + with tempfile.NamedTemporaryFile() as fp_toco, \ + tempfile.NamedTemporaryFile() as fp_model, \ + tempfile.NamedTemporaryFile() as fp_input, \ + tempfile.NamedTemporaryFile() as fp_output: + fp_model.write(model_flags.SerializeToString()) + fp_toco.write(toco_flags.SerializeToString()) + fp_input.write(graph_def.SerializeToString()) + fp_model.flush() + fp_toco.flush() + fp_input.flush() + tflite_bin = resource_loader.get_path_to_datafile("toco_from_protos") + cmdline = " ".join([ + tflite_bin, fp_model.name, fp_toco.name, fp_input.name, fp_output.name + ]) + exitcode = os.system(cmdline) + if exitcode == 0: + stuff = fp_output.read() + self.assertEqual(stuff is not None, should_succeed) + else: + self.assertFalse(should_succeed) + + def test_toco(self): + """Run a couple of TensorFlow graphs against TOCO through the python bin.""" + with tf.Session() as sess: + img = tf.placeholder(name="img", dtype=tf.float32, shape=(1, 64, 64, 3)) + val = img + tf.constant([1., 2., 3.]) + tf.constant([1., 4., 4.]) + out = tf.identity(val, name="out") + out2 = tf.sin(val, name="out2") + # This is a valid mdoel + self._run(sess, img, out, True) + # This uses an invalid function. + # TODO(aselle): Check to make sure a warning is included. + self._run(sess, img, out2, True) + # This is an identity graph, which doesn't work + self._run(sess, img, img, False) + + +if __name__ == "__main__": + googletest.main() diff --git a/tensorflow/contrib/lite/toco/python/toco_python_api.cc b/tensorflow/contrib/lite/toco/python/toco_python_api.cc new file mode 100644 index 0000000000..8a5e483f3f --- /dev/null +++ b/tensorflow/contrib/lite/toco/python/toco_python_api.cc @@ -0,0 +1,85 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include "tensorflow/core/platform/logging.h" + +#include "tensorflow/contrib/lite/toco/model_flags.pb.h" +#include "tensorflow/contrib/lite/toco/python/toco_python_api.h" +#include "tensorflow/contrib/lite/toco/toco_flags.pb.h" +#include "tensorflow/contrib/lite/toco/toco_port.h" +#include "tensorflow/contrib/lite/toco/toco_tooling.h" +#include "tensorflow/contrib/lite/toco/toco_types.h" + +namespace toco { + +#if PY_MAJOR_VERSION >= 3 +#define TOCO_PY_TO_CPPSTRING PyBytes_AsStringAndSize +#define TOCO_FROM_CPPSTRING_TO_PY PyBytes_FromStringAndSize +#else +#define TOCO_PY_TO_CPPSTRING PyString_AsStringAndSize +#define TOCO_FROM_CPPSTRING_TO_PY PyString_FromStringAndSize +#endif + +// NOTE(aselle): We are using raw PyObject's here because we want to make +// sure we input and output bytes rather than unicode strings for Python3. +PyObject* TocoConvert(PyObject* model_flags_proto_txt_raw, + PyObject* toco_flags_proto_txt_raw, + PyObject* input_contents_txt_raw) { + // Use Python C API to validate and convert arguments. In py3 (bytes), + // in py2 (str). + auto ConvertArg = [&](PyObject* obj, bool* error) { + char* buf; + Py_ssize_t len; + if (TOCO_PY_TO_CPPSTRING(obj, &buf, &len) == -1) { + *error = true; + return std::string(); + } else { + *error = false; + return std::string(buf, len); + } + }; + + bool error; + std::string model_flags_proto_txt = + ConvertArg(model_flags_proto_txt_raw, &error); + if (error) return nullptr; + std::string toco_flags_proto_txt = + ConvertArg(toco_flags_proto_txt_raw, &error); + if (error) return nullptr; + std::string input_contents_txt = ConvertArg(input_contents_txt_raw, &error); + if (error) return nullptr; + + // Use toco to produce new outputs + toco::ModelFlags model_flags; + if (!model_flags.ParseFromString(model_flags_proto_txt)) { + LOG(FATAL) << "Model proto failed to parse." << std::endl; + } + toco::TocoFlags toco_flags; + if (!toco_flags.ParseFromString(toco_flags_proto_txt)) { + LOG(FATAL) << "Toco proto failed to parse." << std::endl; + } + std::unique_ptr model = + toco::Import(toco_flags, model_flags, input_contents_txt); + toco::Transform(toco_flags, model.get()); + string output_file_contents_txt; + Export(toco_flags, *model, &output_file_contents_txt); + + // Convert arguments back to byte (py3) or str (py2) + return TOCO_FROM_CPPSTRING_TO_PY(output_file_contents_txt.data(), + output_file_contents_txt.size()); +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/python/toco_python_api.h b/tensorflow/contrib/lite/toco/python/toco_python_api.h new file mode 100644 index 0000000000..dc378353f7 --- /dev/null +++ b/tensorflow/contrib/lite/toco/python/toco_python_api.h @@ -0,0 +1,33 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef _THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_PYTHON_TOCO_PYTHON_API_H_ +#define _THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_PYTHON_TOCO_PYTHON_API_H_ + +#include +#include + +namespace toco { + +// Convert a model represented in `input_contents`. `model_flags_proto` +// describes model parameters. `toco_flags_proto` describes conversion +// parameters (see relevant .protos for more information). Returns a string +// representing the contents of the converted model. +PyObject* TocoConvert(PyObject* model_flags_proto_txt_raw, + PyObject* toco_flags_proto_txt_raw, + PyObject* input_contents_txt_raw); + +} // namespace toco + +#endif // _THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_PYTHON_TOCO_PYTHON_API_H_ diff --git a/tensorflow/contrib/lite/toco/python/toco_wrapper.py b/tensorflow/contrib/lite/toco/python/toco_wrapper.py new file mode 100644 index 0000000000..e39b5f22c7 --- /dev/null +++ b/tensorflow/contrib/lite/toco/python/toco_wrapper.py @@ -0,0 +1,35 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Wrapper for runninmg toco binary embedded in pip site-package. + +NOTE: this mainly exists since PIP setup.py cannot install binaries to bin/. +It can only install Python "console-scripts." This will work as a console +script. See tools/pip_package/setup.py (search for CONSOLE_SCRIPTS). +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import tensorflow as tf + + +def main(): + # Pip installs the binary in aux-bin off of main site-package install. + # Just find it and exec, passing all arguments in the process. + # TODO(aselle): it is unfortunate to use all of tensorflow to lookup binary. + binary = os.path.join(tf.__path__[0], 'aux-bin/toco') + os.execvp(binary, sys.argv) diff --git a/tensorflow/contrib/lite/toco/runtime/common.h b/tensorflow/contrib/lite/toco/runtime/common.h new file mode 100644 index 0000000000..bd55544f57 --- /dev/null +++ b/tensorflow/contrib/lite/toco/runtime/common.h @@ -0,0 +1,26 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_RUNTIME_COMMON_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_RUNTIME_COMMON_H_ + +#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK +#ifdef GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK +#define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK +#endif +#endif + +#include "tensorflow/contrib/lite/kernels/internal/common.h" + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_RUNTIME_COMMON_H_ diff --git a/tensorflow/contrib/lite/toco/runtime/types.h b/tensorflow/contrib/lite/toco/runtime/types.h new file mode 100644 index 0000000000..df63b2d59e --- /dev/null +++ b/tensorflow/contrib/lite/toco/runtime/types.h @@ -0,0 +1,32 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_RUNTIME_TYPES_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_RUNTIME_TYPES_H_ + +#include "tensorflow/contrib/lite/kernels/internal/common.h" +#include "tensorflow/contrib/lite/kernels/internal/compatibility.h" +#include "tensorflow/contrib/lite/kernels/internal/types.h" + +namespace toco { + +// TODO(ahentz): These are just stopgaps for now, untils we move all +// the code over to tflite. +using tflite::Dims; +using tflite::FusedActivationFunctionType; +using tflite::RequiredBufferSizeForDims; + +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_RUNTIME_TYPES_H_ diff --git a/tensorflow/contrib/lite/toco/tensorflow_graph_matching/BUILD b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/BUILD new file mode 100644 index 0000000000..0c1a1141fc --- /dev/null +++ b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/BUILD @@ -0,0 +1,102 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +load( + "//tensorflow:tensorflow.bzl", + "tf_cc_test", +) + +cc_library( + name = "cluster_utils", + srcs = [ + "cluster_utils.cc", + ], + hdrs = [ + "cluster_utils.h", + ], + deps = [ + "//tensorflow/contrib/lite/toco:toco_port", + ], +) + +cc_library( + name = "cluster", + srcs = [ + "cluster.cc", + ], + hdrs = [ + "cluster.h", + ], + deps = [ + ":cluster_utils", + "//tensorflow/contrib/lite/toco:model", + "//tensorflow/contrib/lite/toco:tooling_util", + "//tensorflow/core:protos_all_cc", + ], +) + +cc_library( + name = "resolve_svdf", + srcs = [ + "resolve_svdf.cc", + ], + hdrs = [ + "resolve_svdf.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":cluster", + ":cluster_utils", + "//tensorflow/contrib/lite/toco:model", + "//tensorflow/contrib/lite/toco:toco_port", + "//tensorflow/contrib/lite/toco:tooling_util", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "@protobuf_archive//:protobuf_headers", + ], +) + +tf_cc_test( + name = "resolve_svdf_test", + srcs = ["resolve_svdf_test.cc"], + deps = [ + ":cluster", + ":cluster_utils", + ":resolve_cluster", + ":resolve_svdf", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "resolve_cluster", + srcs = [ + "resolve_cluster.cc", + ], + hdrs = [ + "resolve_cluster.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":cluster", + ":cluster_utils", + ":resolve_svdf", + "//tensorflow/contrib/lite/toco:tooling_util", + "//tensorflow/core:protos_all_cc", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster.cc b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster.cc new file mode 100644 index 0000000000..98a130ea39 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster.cc @@ -0,0 +1,52 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster.h" + +namespace toco { + +void Cluster::SetGraphDefInfo(const tensorflow::GraphDef* graph_def) { + graph_def_ = graph_def; + for (const tensorflow::NodeDef& node : graph_def_->node()) { + if (StrContains(node.name(), name_)) { + nodes_.push_back(&node); + } + } +} + +bool Cluster::FindClusterInputsAndOutputs() { + // For every node N in the graph: + // If N belongs to this cluster C, then each of N's inputs that are not part + // of C are then inputs of C. + // If N does not belong to cluster C, then each of N's inputs that belong to C + // are then outputs of C. + for (const tensorflow::NodeDef& node : graph_def_->node()) { + if (StrContains(node.name(), name_)) { + for (int i = 0; i < node.input_size(); i++) { + if (!StrContains(node.input(i), name_)) { + inputs_.push_back(node.input(i)); + } + } + } else { + for (int i = 0; i < node.input_size(); i++) { + if (StrContains(node.input(i), name_)) { + outputs_.push_back(node.input(i)); + } + } + } + } + return (!inputs_.empty()) && (!outputs_.empty()); +} + +} // end namespace toco diff --git a/tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster.h b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster.h new file mode 100644 index 0000000000..18ff73ac39 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster.h @@ -0,0 +1,101 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_TOCO_TENSORFLOW_GRAPH_MATCHING_CLUSTER_H +#define TENSORFLOW_CONTRIB_LITE_TOCO_TENSORFLOW_GRAPH_MATCHING_CLUSTER_H + +#include +#include + +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster_utils.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" + +namespace toco { + +// The base class for Cluster. A cluster is group of nodes all related to each +// other because their name match a given "pattern", which shows they all belong +// to a composite op supported in TFLite. The nodes in a cluster will be +// collapsed into a single composite op node plus a series of constant nodes +// holding the input parameters to that node. The nodes in a cluster are assumed +// to be using the same device. By changing the "pattern" we can have different +// subclasses of the base Cluster class. +class Cluster { + public: + virtual ~Cluster() {} + + virtual void CreateNodes() = 0; + + // Save the following info from the original GraphDef this cluster is from: + // 1- a pointer to the GraphDef + // 2- All the nodes in GraphDef which belong to this cluster. + void SetGraphDefInfo(const tensorflow::GraphDef* graph_def); + + const string& GetName() const { return name_; } + + const std::vector>& GetNewNodes() const { + return new_nodes_; + } + + const std::vector& GetNodes() { return nodes_; } + + void SetName(const string& name) { name_ = name; } + + void SetDevice(const string& device) { device_ = device; } + + // Find the input(s) and output(s) of this Cluster. + bool FindClusterInputsAndOutputs(); + + protected: + string name_; + string device_; + std::vector inputs_; + std::vector outputs_; + + // Used to hold the pointers to nodes which are in this cluster. These nodes + // are pointing to the nodes in graph_def_. + std::vector nodes_; + + // Used to cache the newly generated nodes: like the nodes created by + // collapsing Const nodes, or the nodes which is used to show the composite + // op. + std::vector> new_nodes_; + + const tensorflow::GraphDef* graph_def_; /*Not owned*/ +}; + +// A factory interface for cluster class. +// It defines a virtual function interface which is responsible for creating +// a cluster. Each cluster factory is responsible to pack a cluster of nodes +// into a cluster using a name-based pattern matching approach. +class ClusterFactoryInterface { + public: + virtual ~ClusterFactoryInterface() {} + + // Creates a cluster of nodes using a name-based pattern matching approach. It + // uses a node as a seed and if its name matches a certain pattern, then it + // builds the cluster around that node. + virtual std::unique_ptr CreateCluster( + const tensorflow::NodeDef& node, + const tensorflow::GraphDef& graph_def) const = 0; +}; + +} // end namespace toco + +#endif // TENSORFLOW_CONTRIB_LITE_TOCO_TENSORFLOW_GRAPH_MATCHING_CLUSTER_H diff --git a/tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster_utils.cc b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster_utils.cc new file mode 100644 index 0000000000..14c3cd6487 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster_utils.cc @@ -0,0 +1,34 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/toco/toco_types.h" +namespace toco { + +bool StrContains(const string& x, const string& search_pattern) { + return x.find(search_pattern) != string::npos; +} + +void Transpose2DTensor(const float* tensor, int row, int col, + float* transposed_tensor) { + float* result = transposed_tensor; + for (int r = 0; r < row; ++r) { + for (int c = 0; c < col; ++c) { + *(result + c * row) = *tensor++; + } + ++result; + } +} + +} // end namespace toco diff --git a/tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster_utils.h b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster_utils.h new file mode 100644 index 0000000000..a15e480e70 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster_utils.h @@ -0,0 +1,33 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_TOCO_TENSORFLOW_GRAPH_MATCHING_CLUSTERUTILS_H +#define TENSORFLOW_CONTRIB_LITE_TOCO_TENSORFLOW_GRAPH_MATCHING_CLUSTERUTILS_H + +#include + +namespace toco { + +// Check if string x includes string search_pattern. +bool StrContains(const string& x, const string& search_pattern); + +// Transpose a 2D tensor of size row * col pointed by "tensor" and return the +// results in "transposed_tensor". "transposed_tensor" must be pre-allocated +// by the same size as "tensor". +void Transpose2DTensor(const float* tensor, int row, int col, + float* transposed_tensor); + +} // end namespace toco + +#endif // TENSORFLOW_CONTRIB_LITE_TOCO_TENSORFLOW_GRAPH_MATCHING_CLUSTERUTILS_H diff --git a/tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_cluster.cc b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_cluster.cc new file mode 100644 index 0000000000..fddf6cc836 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_cluster.cc @@ -0,0 +1,151 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_cluster.h" + +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster.h" +#include "tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster_utils.h" +#include "tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_svdf.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/function.pb.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" + +namespace toco { + +using tensorflow::GraphDef; +using tensorflow::NodeDef; + +void AddNodeToGraph(const NodeDef& node, + const std::vector& cluster_names, GraphDef* graph) { + NodeDef* new_node = graph->add_node(); + new_node->set_op(node.op()); + new_node->set_name(node.name()); + new_node->set_device(node.device()); + // If the inputs are coming from a node which belongs to another cluster, then + // those inputs are renamed to the source cluster name. Otherwise the original + // input name is used. + for (const string& node_input : node.input()) { + bool input_from_cluster = false; + for (const string& cluster_name : cluster_names) { + if (StrContains(node_input, cluster_name) && + !StrContains(node.name(), cluster_name)) { + new_node->add_input(cluster_name); + input_from_cluster = true; + break; + } + } + if (!input_from_cluster) { + new_node->add_input(node_input); + } + } + for (const auto& attr : node.attr()) { + (*new_node->mutable_attr())[attr.first] = attr.second; + } +} + +bool FindCluster(const ClusterFactoryInterface& cluster_factory, + const GraphDef& graph_def, + std::unordered_map* is_node_in_cluster, + std::vector>* clusters) { + for (const NodeDef& node : graph_def.node()) { + // If the node is not assigned to any cluster, then we check if it belong to + // the cluster_factory. + bool node_in_cluster = (*is_node_in_cluster)[node.name()]; + if (!node_in_cluster) { + std::unique_ptr cluster = + cluster_factory.CreateCluster(node, graph_def); + if (cluster) { + // Label all the nodes in is_node_in_cluster which are in this cluster + // as belonged to this cluster. + for (const NodeDef* cluster_node : cluster->GetNodes()) { + (*is_node_in_cluster)[cluster_node->name()] = true; + } + clusters->push_back(std::move(cluster)); + } + } + } + return (!clusters->empty()); +} + +std::unique_ptr MaybeResolveClusters( + const GraphDef& graph_def, + const std::vector& cluster_factories) { + std::unique_ptr pruned_graph(new GraphDef); + // The structure to keep track of which cluster each node is assigned to, and + // to initialize them to all un-assigned, + std::unordered_map is_node_in_cluster; + for (const NodeDef& node : graph_def.node()) { + is_node_in_cluster[node.name()] = false; + } + + std::vector cluster_names; + std::vector> all_clusters; + // Find the clusters for all available cluster factories. + for (const ClusterFactoryInterface* cluster_factory : cluster_factories) { + std::vector> clusters; + if (FindCluster(*cluster_factory, graph_def, &is_node_in_cluster, + &clusters)) { + for (auto itr = clusters.begin(); itr != clusters.end(); ++itr) { + cluster_names.push_back((*itr)->GetName()); + (*itr)->CreateNodes(); + all_clusters.push_back(std::move(*itr)); + } + } + } + + for (const std::unique_ptr& cluster : all_clusters) { + for (const std::unique_ptr& src_node : + cluster->GetNewNodes()) { + // Add it to the output GraphDef. + AddNodeToGraph(*src_node, cluster_names, pruned_graph.get()); + } + } + + // Add any node which is not part of a cluster. + for (const NodeDef& node : graph_def.node()) { + bool node_in_cluster = is_node_in_cluster[node.name()]; + if (!node_in_cluster) { + AddNodeToGraph(node, cluster_names, pruned_graph.get()); + } + } + + if (pruned_graph->node_size() == 0) { + return nullptr; + } else { + return pruned_graph; + } +} + +std::unique_ptr MaybeReplaceCompositeSubgraph( + const GraphDef& tf_graph) { + SvdfClusterFactory svdf_cluster_factory; + + std::vector cluster_factories; + cluster_factories.push_back(&svdf_cluster_factory); + + std::unique_ptr pruned_graph = + MaybeResolveClusters(tf_graph, cluster_factories); + + // Copy function definitions + *(pruned_graph->mutable_library()) = tf_graph.library(); + return pruned_graph; +} + +} // end namespace toco diff --git a/tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_cluster.h b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_cluster.h new file mode 100644 index 0000000000..7d33dd1885 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_cluster.h @@ -0,0 +1,63 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_TOCO_TENSORFLOW_GRAPH_MATCHING_RESOLVE_CLUSTER_H +#define TENSORFLOW_CONTRIB_LITE_TOCO_TENSORFLOW_GRAPH_MATCHING_RESOLVE_CLUSTER_H + +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster.h" +#include "tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_svdf.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" + +namespace toco { + +// Given a graph info and a list of cluster classes (cluster_factories), it +// partitions the graph to clusters, and then collapses each cluster into their +// corresponding composite ops. It generates a new graph using the newly +// generated composite ops. Each cluster factory is responsible to recognize a +// cluster of nodes into a cluster using a name-based pattern matching approach. +std::unique_ptr MaybeResolveClusters( + const tensorflow::GraphDef& graph_def, + const std::vector& cluster_factories); + +// Adds a node to a given graph. The added node will be a copy of a given source +// node, except for the inputs. If the inputs are coming from a node which +// belongs to another cluster, then those inputs are renamed to the source +// cluster name. +void AddNodeToGraph(const tensorflow::NodeDef& node, + const std::vector& cluster_names, + tensorflow::GraphDef* graph); + +// Given a graph and a cluster class, it finds all the nodes which belong to a +// given class factory, encapsulate them inside a cluster of the given type and +// returns a vector of those clusters. It also labels the nodes in that graph if +// they belong to the generated clusters. +bool FindCluster(const ClusterFactoryInterface& cluster_factory, + const tensorflow::GraphDef& graph_def, + std::unordered_map* is_node_in_cluster, + std::vector>* clusters); + +// Receives a graph and generates another graph by replacing the cluster of +// nodes which matches a given composite op. Each composite op is represented +// using a class factory. +std::unique_ptr MaybeReplaceCompositeSubgraph( + const tensorflow::GraphDef& tf_graph); + +} // end namespace toco + +#endif // CONTRIB_LITE_TOCO_TENSORFLOW_GRAPH_MATCHING_RESOLVE_CLUSTER_H diff --git a/tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_svdf.cc b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_svdf.cc new file mode 100644 index 0000000000..d6a099817c --- /dev/null +++ b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_svdf.cc @@ -0,0 +1,285 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_svdf.h" + +#include +#include +#include +#include +#include +#include + +#include "google/protobuf/map.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster.h" +#include "tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster_utils.h" +#include "tensorflow/contrib/lite/toco/toco_port.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" + +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/platform/logging.h" + +using tensorflow::GraphDef; +using tensorflow::NodeDef; + +namespace toco { + +namespace { + +// Receives a vector of cluster nodes and returns only those which are array +// partitions (of type 'Const' and have the pattern 'part_<.*>' in their name. +// Since these nodes are connected to a Concatenate node, it makes sure the +// axis value input of the Concatenate operator is 0. +void FilterPartitionedConstNodes( + const string& const_pattern, + const std::vector& cluster_nodes, + std::vector* const_node_parts) { + for (const NodeDef* node : cluster_nodes) { + string node_name_to_upper = node->name(); + std::transform(node_name_to_upper.begin(), node_name_to_upper.end(), + node_name_to_upper.begin(), ::toupper); + if (StrContains(node->name(), const_pattern) && node->op() == "Const") { + if (StrContains(node_name_to_upper, "/PART_")) { + const_node_parts->push_back(node); + } else if (StrContains(node->name(), "AXIS") && + StrContains(node->name(), "CONCAT")) { + // For now only supporting Concatenate on Axix 0 + const auto& value_attr = node->attr().at("value"); + const tensorflow::TensorProto& tensor = value_attr.tensor(); + CHECK_EQ(tensor.int_val(0), 0); + } + } + } + sort(const_node_parts->begin(), const_node_parts->end(), + [](const NodeDef* a, const NodeDef* b) { + return (a->name().compare(b->name()) < 0 && + (a->name().size() < b->name().size())); + }); +} + +} // namespace + +// SvdfCluster methods + +int SvdfCluster::InferFilterRank() { + for (const NodeDef* node : nodes_) { + if (StrContains(node->name(), "Reshape/shape")) { + const auto& value_attr = node->attr().at("value"); + const tensorflow::TensorProto& tensor = value_attr.tensor(); + std::vector shape_values( + tensor.tensor_content().size() / sizeof(int), 0); + port::CopyToBuffer(tensor.tensor_content(), + reinterpret_cast(shape_values.data())); + CHECK_EQ(shape_values.size(), 3); + // shape_value array is arranged as: + // [num_units, rank, -1] + CHECK_EQ(shape_values[2], -1); + return shape_values[1]; + } + } + return -1; +} + +void SvdfCluster::CreateNodes() { + for (const string& const_pattern : const_node_patterns_) { + CreateConstNode(const_pattern); + } + std::unique_ptr svdf_node(new NodeDef); + svdf_node->set_op("Svdf"); + svdf_node->set_name(name_); + svdf_node->set_device(device_); + + // Add the main input. + svdf_node->add_input(inputs_[0]); + + // Add the rest of the inputs to Svdf cell: weights and bias. + CHECK(new_nodes_.size() == 3 || new_nodes_.size() == 2); + string* weights_feature_input = svdf_node->add_input(); + string* weights_time_input = svdf_node->add_input(); + string* bias_input; + if (new_nodes_.size() == 3) { + bias_input = svdf_node->add_input(); + } + for (const std::unique_ptr& node : new_nodes_) { + const string node_name = node->name(); + if (StrContains(node_name, "SVDF_weights_feature")) { + *weights_feature_input = node_name; + } else if (StrContains(node_name, "SVDF_weights_time")) { + *weights_time_input = node_name; + } else if (StrContains(node_name, "SVDF_bias")) { + CHECK(bias_input) << "Bias input cannot be provided when there are only " + "two Const input nodes!"; + *bias_input = node_name; + } else { + // Unexpected input for Svdf op. + LOG(FATAL) << "Unexpected input node for SVDF op! Accepted inputs are: " + "weights_feature, weights_time and bias."; + } + } + const int rank = InferFilterRank(); + CHECK_GT(rank, 0); + + // Add Svdf activation and rank. + string activation_function = + StrContains(outputs_[0], "Relu") ? "Relu" : "None"; + (*svdf_node->mutable_attr())["ActivationFunction"].set_s(activation_function); + (*svdf_node->mutable_attr())["Rank"].set_i(rank); + + // Finally add it to the list of the newly created nodes. + new_nodes_.push_back(std::move(svdf_node)); +} + +void SvdfCluster::CreateConstNode(const string& const_pattern) { + // Find the nodes with pattern like: "const_pattern"/part_xxx of type Const. + std::vector const_node_parts; + FilterPartitionedConstNodes(const_pattern, nodes_, &const_node_parts); + + if (const_node_parts.empty()) return; + + bool transpose_tensor_value = + StrContains(const_pattern, "SVDF_weights_feature"); + + // Merge them if necessary. + std::unique_ptr merged_node(new NodeDef); + MaybeMergeConstNodes(const_node_parts, transpose_tensor_value, merged_node); + new_nodes_.push_back(std::move(merged_node)); +} + +void SvdfCluster::MaybeMergeConstNodes( + const std::vector& const_node_parts, + bool transpose_tensor_value, + const std::unique_ptr& merged_node) { + merged_node->set_name(const_node_parts[0]->name()); + merged_node->set_op("Const"); + merged_node->set_device(const_node_parts[0]->device()); + (*merged_node->mutable_attr())["dtype"].set_type( + const_node_parts[0]->attr().at("dtype").type()); + + // Figuring out Value attribute for the merged node. + // Assuming the partitioning is done on Axis 0. + // The attributes which are inferred: + // * Shape and dimensions + // * Float content values + + // Inferring shape and dimension + int dim0_size = 0; + int dim1_size = 1; + tensorflow::TensorProto* allocated_tensor = + (*merged_node->mutable_attr())["value"].mutable_tensor(); + tensorflow::TensorShapeProto* allocated_tensor_shape = + allocated_tensor->mutable_tensor_shape(); + auto tensor_shape_dim0 = allocated_tensor_shape->add_dim(); + int allocated_content_flat_size = 0; + for (int i = 0; i < const_node_parts.size(); i++) { + const auto& value_attr = const_node_parts[i]->attr().at("value"); + const tensorflow::TensorProto& tensor = value_attr.tensor(); + if (i == 0) { + allocated_tensor->set_dtype(tensor.dtype()); + } else { + CHECK_EQ(allocated_tensor->dtype(), tensor.dtype()); + } + allocated_content_flat_size += tensor.tensor_content().size(); + CHECK(tensor.has_tensor_shape()); + const tensorflow::TensorShapeProto shape = tensor.tensor_shape(); + dim0_size += shape.dim(0).size(); + for (int d = 1; d < shape.dim_size(); d++) { + if (i == 0) { + allocated_tensor_shape->add_dim()->set_size(shape.dim(d).size()); + allocated_tensor_shape->set_unknown_rank(shape.unknown_rank()); + dim1_size *= shape.dim(d).size(); + } else { + CHECK_EQ(shape.dim(d).size(), allocated_tensor_shape->dim(d).size()); + CHECK_EQ(allocated_tensor_shape->unknown_rank(), shape.unknown_rank()); + } + } + } + + // Copying the float content from each array partition. + std::unique_ptr allocated_content( + new char[allocated_content_flat_size]); + char* content_ptr = allocated_content.get(); + for (int i = 0; i < const_node_parts.size(); i++) { + const auto& value_attr = const_node_parts[i]->attr().at("value"); + const tensorflow::TensorProto& tensor = value_attr.tensor(); + port::CopyToBuffer(tensor.tensor_content(), content_ptr); + content_ptr += tensor.tensor_content().size(); + } + + // Transpose the tensor if needed. + if (transpose_tensor_value) { + // We use dimension 0 to show the row size for the tensor. + // We use multiplication of the rest of dimension size to for the col size + // of the tensor. + std::unique_ptr transposed_tensor( + new float[dim0_size * dim1_size]); + Transpose2DTensor(reinterpret_cast(allocated_content.get()), + dim0_size, dim1_size, transposed_tensor.get()); + allocated_tensor_shape->clear_dim(); + allocated_tensor_shape->add_dim()->set_size(dim1_size); + allocated_tensor_shape->add_dim()->set_size(dim0_size); + + // Set the tensor attributes. + allocated_tensor->set_tensor_content( + string(reinterpret_cast(transposed_tensor.get()), + allocated_content_flat_size)); + } else { + tensor_shape_dim0->set_size(dim0_size); + + // Set the tensor attributes. + allocated_tensor->set_tensor_content( + string(reinterpret_cast(allocated_content.get()), + allocated_content_flat_size)); + } +} + +// SvdfClusterFactory methods + +std::unique_ptr SvdfClusterFactory::CreateCluster( + const NodeDef& node, const GraphDef& graph_def) const { + std::vector node_patterns = {"SVDF_weights_feature", + "SVDF_weights_time", "SVDF_bias"}; + + string node_name_to_upper = node.name(); + std::transform(node_name_to_upper.begin(), node_name_to_upper.end(), + node_name_to_upper.begin(), ::toupper); + std::unique_ptr cluster = nullptr; + if (node_name_to_upper.find("SVDF", 0) != string::npos) { + size_t weights_pos = node.name().find(node_patterns[0]); + if (weights_pos != string::npos) { + // Assuming the node name has a pattern like: + // "SOMESTRING1/CELLNAME/SEARCH_PATTERN/SOMESTRING2", we use + // CELLNAME as the cluster name. + size_t cell_pos = node.name().rfind("/", weights_pos - 2) + 1; + string cell_name = + node.name().substr(cell_pos, weights_pos - cell_pos - 1); + cluster = std::unique_ptr(new SvdfCluster); + cluster->SetName(cell_name); + cluster->SetDevice(node.device()); + cluster->SetGraphDefInfo(&graph_def); + CHECK(cluster->FindClusterInputsAndOutputs()); + + for (const string& const_pattern : node_patterns) { + cluster->AddConstNodePattern(const_pattern); + } + } + } + return std::move(cluster); +} + +} // end namespace toco diff --git a/tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_svdf.h b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_svdf.h new file mode 100644 index 0000000000..c4c6c34117 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_svdf.h @@ -0,0 +1,82 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_TOCO_TENSORFLOW_GRAPH_MATCHING_RESOLVE_SVDF_H +#define TENSORFLOW_CONTRIB_LITE_TOCO_TENSORFLOW_GRAPH_MATCHING_RESOLVE_SVDF_H + +#include +#include + +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster.h" +#include "tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster_utils.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" + +namespace toco { + +class SvdfCluster : public Cluster { + public: + // For this cluster, it collapses all the nodes in nodes_ into a composite op + // and it returns all the newly generated ops in new_nodes_. + void CreateNodes() override; + + // A helper function to set the pattern of Const nodes which CreateNodes() + // should handle specially. + void AddConstNodePattern(const string& const_pattern) { + const_node_patterns_.push_back(const_pattern); + } + + virtual ~SvdfCluster() {} + + private: + // The main function which is used to create Const nodes for this cluster. + // These Const nodes are the inputs to the composite op generated for this + // cluster. + void CreateConstNode(const string& const_pattern); + + // Receives a vector of Const nodes, merge them (if necessary) and returns + // only one Const node holding all the arrays contents. It transposes it if + // needed. + void MaybeMergeConstNodes( + const std::vector& const_node_parts, + bool transpose_tensor_value, + const std::unique_ptr& merged_node); + + // Infer the value of Svdf filter rank, by looking up a reshape operator which + // is used for 'output' which reshapes output from [num_filters, batch, 1] + // shape to [num_units, rank, batch] shape. The 2nd shape element is rank. + int InferFilterRank(); + + std::vector const_node_patterns_; +}; + +class SvdfClusterFactory : public ClusterFactoryInterface { + public: + // Creates a cluster of nodes using a name-based pattern matching approach. It + // uses a node as a seed and if its name matches a certain pattern, then it + // builds the cluster around that node. + // This factory expects nodes which have "SVDF_weights_feature" and + // "SVDF_weights_time" pattern in their names (and optionally "SVDF_bias") + // and it creates an SVDF Op from them. + std::unique_ptr CreateCluster( + const tensorflow::NodeDef& node, + const tensorflow::GraphDef& graph_def) const; +}; + +} // end namespace toco + +#endif // TENSORFLOW_CONTRIB_LITE_TOCO_TENSORFLOW_GRAPH_MATCHING_RESOLVE_SVDF_H diff --git a/tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_svdf_test.cc b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_svdf_test.cc new file mode 100644 index 0000000000..664e828c19 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_svdf_test.cc @@ -0,0 +1,212 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_svdf.h" + +#include +#include +#include + +#include +#include +#include "tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster.h" +#include "tensorflow/contrib/lite/toco/tensorflow_graph_matching/cluster_utils.h" +#include "tensorflow/contrib/lite/toco/tensorflow_graph_matching/resolve_cluster.h" +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/platform/logging.h" + +using tensorflow::GraphDef; +using tensorflow::NodeDef; + +namespace toco { + +class ResolveSvdfTest : public ::testing::Test { + public: + ResolveSvdfTest() { + AddNewNode("Input1", "Const", {}); + AddNewNode("Svdf1/SVDF_weights_feature/part_0", "Const", {}, + {0.1, 0.2, 0.3}); + AddNewNode("Svdf1/SVDF_weights_feature/part_0/read", "Identity", + {"Svdf1/SVDF_weights_feature/part_0"}); + AddNewNode("Svdf1/SVDF_weights_time/part_0", "Const", {}, {0.1, 0.2, 0.3}); + AddNewNode("Svdf1/SVDF_weights_time/part_0/read", "Identity", + {"Svdf1/SVDF_weights_time/part_0"}); + + AddNewNode("Svdf1/f1", "SVDF_F1", + {"Input1", "Svdf1/SVDF_weights_feature/part_0/read"}); + AddNewNode("Svdf1/f2", "SVDF_F2", + {"Svdf1/SVDF_weights_time/part_0/read", "Svdf1/f1"}); + AddNewNode("Svdf1/Relu", "Relu", {"Svdf1/f2"}); + AddShapeNode("Svdf1/Reshape/shape", {10, 1, -1}); + AddNewNode("Output1", "Const", {"Svdf1/Relu"}); + + AddNewNode("Input2", "Const", {}); + AddNewNode("Svdf2/SVDF_weights_feature/part_0", "Const", {}, + {0.1, 0.2, 0.3}); + AddNewNode("Svdf2/SVDF_weights_feature/part_0/read", "Identity", + {"Svdf2/SVDF_weights_feature/part_0"}); + AddNewNode("Svdf2/SVDF_weights_time/part_0", "Const", {}, {0.1, 0.2, 0.3}); + AddNewNode("Svdf2/SVDF_weights_time/part_0/read", "Identity", + {"Svdf2/SVDF_weights_time/part_0"}); + + AddNewNode("Svdf2/f1", "SVDF_F1", + {"Input1", "Svdf2/SVDF_weights_feature/part_0/read"}); + AddNewNode("Svdf2/f2", "SVDF_F2", + {"Svdf2/SVDF_weights_time/part_0/read", "Svdf2/f1"}); + AddNewNode("Svdf2/Relu", "Relu", {"Svdf2/f2"}); + AddShapeNode("Svdf2/Reshape/shape", {10, 2, -1}); + AddNewNode("Output2", "Const", {"Svdf2/Relu"}); + } + + ~ResolveSvdfTest() override {} + + protected: + void AddNewNode(const string& name, const string& op, + const std::vector& inputs) { + NodeDef* node = graph_.add_node(); + node->set_name(name); + node->set_op(op); + node->set_device(""); + for (int i = 0; i < inputs.size(); i++) { + node->add_input(); + node->set_input(i, inputs[i]); + } + } + + void AddNewNode(const string& name, const string& op, + const std::vector& inputs, + const std::vector& values) { + NodeDef* node = graph_.add_node(); + node->set_name(name); + node->set_op(op); + node->set_device(""); + for (int i = 0; i < inputs.size(); i++) { + node->add_input(); + node->set_input(i, inputs[i]); + } + // Add the float vector as an attribute to the node. + (*node->mutable_attr())["dtype"].set_type(tensorflow::DT_FLOAT); + tensorflow::TensorProto* allocated_tensor = new tensorflow::TensorProto; + tensorflow::TensorShapeProto* allocated_tesnor_shape = + new tensorflow::TensorShapeProto; + auto tensor_shape_dim0 = allocated_tesnor_shape->add_dim(); + tensor_shape_dim0->set_size(values.size()); + allocated_tensor->set_allocated_tensor_shape(allocated_tesnor_shape); + allocated_tensor->set_tensor_content( + string(reinterpret_cast(values.data()), + values.size() * sizeof(float))); + (*node->mutable_attr())["value"].set_allocated_tensor(allocated_tensor); + } + + void AddShapeNode(const string& name, const std::vector& values) { + NodeDef* node = graph_.add_node(); + node->set_name(name); + node->set_op("Const"); + node->set_device(""); + // Add the float vector as an attribute to the node. + (*node->mutable_attr())["dtype"].set_type(tensorflow::DT_INT32); + tensorflow::TensorProto* allocated_tensor = new tensorflow::TensorProto; + tensorflow::TensorShapeProto* allocated_tesnor_shape = + new tensorflow::TensorShapeProto; + auto tensor_shape_dim0 = allocated_tesnor_shape->add_dim(); + tensor_shape_dim0->set_size(values.size()); + allocated_tensor->set_allocated_tensor_shape(allocated_tesnor_shape); + allocated_tensor->set_tensor_content( + string(reinterpret_cast(values.data()), + values.size() * sizeof(int))); + (*node->mutable_attr())["value"].set_allocated_tensor(allocated_tensor); + } + + GraphDef graph_; + SvdfClusterFactory svdf_cluster_factory_; + std::vector> clusters_; +}; + +TEST_F(ResolveSvdfTest, TestTranspose2DTensor) { + static float matrix[] = {1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.}; + static float expected_transposed_matrix[] = {1., 5., 9., 2., 6., 10., + 3., 7., 11., 4., 8., 12.}; + float* transposed_matrix = new float[12]; + Transpose2DTensor(matrix, 3, 4, transposed_matrix); + + std::vector actual; + actual.insert( + actual.end(), transposed_matrix, + transposed_matrix + sizeof(expected_transposed_matrix) / sizeof(float)); + std::vector expected; + expected.insert(expected.end(), expected_transposed_matrix, + expected_transposed_matrix + + sizeof(expected_transposed_matrix) / sizeof(float)); + delete[] transposed_matrix; +} + +TEST_F(ResolveSvdfTest, TestResolveSvdfFlow) { + std::unordered_map is_node_in_cluster; + for (const NodeDef& node : graph_.node()) { + is_node_in_cluster[node.name()] = false; + } + + std::vector cluster_names; + CHECK(FindCluster(svdf_cluster_factory_, graph_, &is_node_in_cluster, + &clusters_)); + + for (const std::unique_ptr& cluster : clusters_) { + cluster_names.push_back(cluster->GetName()); + cluster->CreateNodes(); + } + + EXPECT_THAT(cluster_names, + testing::UnorderedElementsAreArray({"Svdf1", "Svdf2"})); + + std::vector new_node_names; + std::vector content_array(3); + for (const std::unique_ptr& cluster : clusters_) { + // After CreateNodes in each cluster we have three nodes: Svdf, + // weights_feature and weights_time. + CHECK_EQ(cluster->GetNewNodes().size(), 3); + for (const std::unique_ptr& node : + cluster->GetNewNodes()) { + new_node_names.push_back(node->name()); + if (node->op() == "Const") { + CHECK_EQ(node->attr().at("dtype").type(), tensorflow::DT_FLOAT); + toco::port::CopyToBuffer( + node->attr().at("value").tensor().tensor_content(), + reinterpret_cast(content_array.data())); + EXPECT_THAT(content_array, + testing::UnorderedElementsAreArray({0.1, 0.2, 0.3})); + } else { + // Checking the Svdf node attributes (rank and activation type) are + // correct. + if (node->name() == "Svdf1") { + CHECK_EQ(node->attr().at("Rank").i(), 1); + } else if (node->name() == "Svdf2") { + CHECK_EQ(node->attr().at("Rank").i(), 2); + } + CHECK_EQ(node->attr().at("ActivationFunction").s(), "Relu"); + } + } + } + EXPECT_THAT(new_node_names, testing::UnorderedElementsAreArray( + {"Svdf2/SVDF_weights_feature/part_0", + "Svdf2/SVDF_weights_time/part_0", "Svdf2", + "Svdf1/SVDF_weights_feature/part_0", + "Svdf1/SVDF_weights_time/part_0", "Svdf1"})); +} + +} // end namespace toco diff --git a/tensorflow/contrib/lite/toco/tensorflow_util.cc b/tensorflow/contrib/lite/toco/tensorflow_util.cc new file mode 100644 index 0000000000..82e2800ca2 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tensorflow_util.cc @@ -0,0 +1,197 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/tensorflow_util.h" + +#include +#include +#include + +#ifdef GOOGLE_PLATFORM +#include "file/logging/log_lines.h" +#endif +#include "google/protobuf/map.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "tensorflow/contrib/lite/toco/toco_port.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +using tensorflow::AttrValue; +using tensorflow::GraphDef; + +void LogDumpGraphDef(int log_level, const string& message, + const GraphDef& tf_graph) { + if (!VLOG_IS_ON(log_level)) { + return; + } + std::set ops; + for (const auto& node : tf_graph.node()) { + ops.insert(node.op()); + } + string dump; + toco::port::AppendF(&dump, R"MSG( +BEGIN DUMP OF TENSORFLOW GRAPHDEF (%s) +There are %d nodes. +There are %zu different op types: +)MSG", message, tf_graph.node_size(), ops.size()); + for (const auto& op : ops) { + toco::port::AppendF(&dump, " %s\n", op); + } + dump.append(R"MSG( +PROTO DUMP +)MSG"); + for (const auto& node : tf_graph.node()) { + toco::port::AppendF(&dump, R"MSG( +BEGIN NODE: name = %s + op = %s + inputs = [ +)MSG", node.name(), node.op()); + for (const auto& input : node.input()) { + toco::port::AppendF(&dump, " %s\n", input); + } + dump.append(" ]\n"); + for (const auto& attr : node.attr()) { + toco::port::AppendF(&dump, " ATTR: name = %s\n", attr.first); + if (attr.second.value_case() == AttrValue::kFunc) { + dump.append(" func\n"); + } else if (attr.second.value_case() == AttrValue::kPlaceholder) { + toco::port::AppendF(&dump, " placeholder: %s\n", + attr.second.placeholder()); + } else if (attr.second.value_case() == AttrValue::kS) { + dump.append(" string:\n"); + dump.append(R"MSG( + BEGIN EMBEDDED STRING +)MSG"); + const auto& lines = absl::StrSplit(attr.second.s(), '\n'); + for (const auto& line : lines) { + toco::port::AppendF(&dump, " %s\n", line); + } + dump.append(R"MSG( + END EMBEDDED STRING +)MSG"); + } else if (attr.second.value_case() == AttrValue::kI) { + toco::port::AppendF(&dump, " int: %lld\n", attr.second.i()); + } else if (attr.second.value_case() == AttrValue::kF) { + toco::port::AppendF(&dump, " float: %g\n", attr.second.f()); + } else if (attr.second.value_case() == AttrValue::kB) { + toco::port::AppendF(&dump, " bool: %s\n", + attr.second.b() ? "true" : "false"); + } else if (attr.second.value_case() == AttrValue::kType) { + toco::port::AppendF(&dump, " type: %s\n", + tensorflow::DataType_Name(attr.second.type())); + } else if (attr.second.value_case() == AttrValue::kShape) { + dump.append(" shape: [ "); + const auto& shape = attr.second.shape(); + for (int i = 0; i < shape.dim_size(); i++) { + toco::port::AppendF(&dump, "%lld ", shape.dim(i).size()); + } + dump.append("]\n"); + } else if (attr.second.value_case() == AttrValue::kTensor) { + const auto& tensor = attr.second.tensor(); + dump.append(" TENSOR:\n"); + toco::port::AppendF(&dump, " type: %s\n", + tensorflow::DataType_Name(tensor.dtype())); + const auto& shape = tensor.tensor_shape(); + dump.append(" shape: [ "); + for (int i = 0; i < shape.dim_size(); i++) { + toco::port::AppendF(&dump, "%lld ", shape.dim(i).size()); + } + dump.append("]\n"); + if (!tensor.tensor_content().empty()) { + toco::port::AppendF(&dump, " tensor_content: %zu bytes\n", + tensor.tensor_content().size()); + } + if (tensor.dtype() == tensorflow::DT_INT32) { + CHECK_EQ(0, tensor.tensor_content().size() % sizeof(int32)); + const int size = tensor.tensor_content().size() / sizeof(int32); + std::vector data(size); + toco::port::CopyToBuffer(tensor.tensor_content(), + reinterpret_cast(data.data())); + const int kMaxValsToPrint = 4; + dump.append(" tensor_content as ints: [ "); + for (int i = 0; i < kMaxValsToPrint && i < size; i++) { + toco::port::AppendF(&dump, "%d ", data[i]); + } + if (size > kMaxValsToPrint) { + dump.append("... "); + } + dump.append("]\n"); + } + if (tensor.dtype() == tensorflow::DT_FLOAT) { + CHECK_EQ(0, tensor.tensor_content().size() % sizeof(float)); + const int size = tensor.tensor_content().size() / sizeof(float); + std::vector data(size); + toco::port::CopyToBuffer(tensor.tensor_content(), + reinterpret_cast(data.data())); + const int kMaxValsToPrint = 4; + dump.append(" tensor_content as floats: [ "); + for (int i = 0; i < kMaxValsToPrint && i < size; i++) { + toco::port::AppendF(&dump, "%g ", data[i]); + } + if (size > kMaxValsToPrint) { + dump.append("... "); + } + dump.append("]\n"); + } + if (tensor.int_val_size()) { + toco::port::AppendF(&dump, " int_val: %d ints: [ ", + tensor.int_val_size()); + const int kMaxValsToPrint = 4; + for (int i = 0; i < kMaxValsToPrint && i < tensor.int_val_size(); + i++) { + toco::port::AppendF(&dump, "%d ", tensor.int_val(i)); + } + if (tensor.int_val_size() > kMaxValsToPrint) { + dump.append("... "); + } + dump.append("]\n"); + } + if (tensor.float_val_size()) { + toco::port::AppendF(&dump, " float_val: %d floats: [ ", + tensor.float_val_size()); + const int kMaxValsToPrint = 4; + for (int i = 0; i < kMaxValsToPrint && i < tensor.float_val_size(); + i++) { + toco::port::AppendF(&dump, "%g ", tensor.float_val(i)); + } + if (tensor.float_val_size() > kMaxValsToPrint) { + dump.append("... "); + } + dump.append("]\n"); + } + if (tensor.string_val_size()) { + toco::port::AppendF(&dump, " string_val: %d strings\n", + tensor.string_val_size()); + } + } else if (attr.second.value_case() == AttrValue::kList) { + dump.append(" LIST\n"); + } + } + dump.append("END NODE\n"); + } + toco::port::AppendF(&dump, "END DUMP OF TENSORFLOW GRAPHDEF (%s)\n", message); +#if defined(GOOGLE_PLATFORM) + VLOG_LINES(log_level, dump); +#else + VLOG(log_level) << dump; +#endif +} +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/tensorflow_util.h b/tensorflow/contrib/lite/toco/tensorflow_util.h new file mode 100644 index 0000000000..152b4f7a72 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tensorflow_util.h @@ -0,0 +1,32 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TENSORFLOW_UTIL_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TENSORFLOW_UTIL_H_ + +#include +#include + +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" + +namespace toco { + +void LogDumpGraphDef(int log_level, const string& message, + const tensorflow::GraphDef& tf_graph); + +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TENSORFLOW_UTIL_H_ diff --git a/tensorflow/contrib/lite/toco/tflite/BUILD b/tensorflow/contrib/lite/toco/tflite/BUILD new file mode 100644 index 0000000000..e910e3957f --- /dev/null +++ b/tensorflow/contrib/lite/toco/tflite/BUILD @@ -0,0 +1,142 @@ +licenses(["notice"]) # Apache 2.0 + +load( + "//tensorflow:tensorflow.bzl", + "tf_cc_test", +) + +cc_library( + name = "operator", + srcs = [ + "operator.cc", + ], + hdrs = [ + "builtin_operator.h", + "custom_operator.h", + "operator.h", + "simple_operator.h", + ], + deps = [ + ":types", + "//tensorflow/contrib/lite/schema:schema_fbs", + "//tensorflow/contrib/lite/toco:model", + "//tensorflow/core:protos_all_cc", + "@com_google_absl//absl/memory", + "@flatbuffers//:flatbuffers", + ], +) + +tf_cc_test( + name = "operator_test", + srcs = [ + "operator_test.cc", + ], + deps = [ + ":operator", + "//tensorflow/contrib/lite/toco:tooling_util", + "//tensorflow/core:protos_all_cc", + "@com_google_googletest//:gtest_main", + "@flatbuffers//:flatbuffers", + ], +) + +cc_library( + name = "types", + srcs = [ + "types.cc", + ], + hdrs = [ + "types.h", + ], + deps = [ + "//tensorflow/contrib/lite/schema:schema_fbs", + "//tensorflow/contrib/lite/toco:model", + ], +) + +tf_cc_test( + name = "types_test", + srcs = [ + "types_test.cc", + ], + deps = [ + ":types", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "export", + srcs = [ + "export.cc", + ], + hdrs = [ + "export.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":operator", + ":types", + "//tensorflow/contrib/lite:schema_fbs_version", + "//tensorflow/contrib/lite/schema:schema_fbs", + "//tensorflow/contrib/lite/toco:model", + "//tensorflow/contrib/lite/toco:tooling_util", + "@com_google_absl//absl/strings", + "@flatbuffers//:flatbuffers", + ], +) + +tf_cc_test( + name = "export_test", + srcs = [ + "export_test.cc", + ], + deps = [ + ":export", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "import", + srcs = [ + "import.cc", + ], + hdrs = [ + "import.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":operator", + ":types", + "//tensorflow/contrib/lite/schema:schema_fbs", + "//tensorflow/contrib/lite/toco:model", + "@flatbuffers//:flatbuffers", + ], +) + +tf_cc_test( + name = "import_test", + srcs = [ + "import_test.cc", + ], + deps = [ + ":import", + "//tensorflow/contrib/lite:schema_fbs_version", + "//tensorflow/contrib/lite/schema:schema_fbs", + "@com_google_googletest//:gtest_main", + "@flatbuffers//:flatbuffers", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/lite/toco/tflite/builtin_operator.h b/tensorflow/contrib/lite/toco/tflite/builtin_operator.h new file mode 100644 index 0000000000..93cc79ddb6 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tflite/builtin_operator.h @@ -0,0 +1,74 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_BUILTIN_OPERATOR_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_BUILTIN_OPERATOR_H_ + +#include "absl/memory/memory.h" +#include "tensorflow/contrib/lite/toco/tflite/operator.h" + +namespace toco { + +namespace tflite { + +// Builtin operators have special TF Lite objects describing their options. +// This class has the boilerplate code for creating those. +// +// Template arguments: +// - T1 must derive from ::toco::Operator. +// - T2 must be one of TF Lite's objects defining Builtin Options, such as +// ::tflite::Conv2DOptions. +template +class BuiltinOperator : public BaseOperator { + public: + using TocoOperator = T1; + using TfLiteOptions = T2; + + BuiltinOperator(::tflite::BuiltinOperator op, OperatorType type) + : BaseOperator(::tflite::EnumNameBuiltinOperator(op), type) {} + + // Build the configuration object in the given flatbuffer builder. Return + // its offset. + virtual flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const = 0; + + // Read options from the TF Lite object and set the corresponding values in + // the tf.mini operator. + virtual void ReadOptions(const TfLiteOptions& opt, + TocoOperator* op) const = 0; + + Options Serialize(const Operator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + auto options = WriteOptions(static_cast(op), builder); + return Options::Builtin(TfLiteEnum, options.Union()); + } + + std::unique_ptr Deserialize( + const BuiltinOptions* builtin_options, + const CustomOptions* custom_options) const override { + auto op = absl::make_unique(); + auto* options = static_cast(builtin_options); + if (options) { + ReadOptions(*options, op.get()); + } + return std::unique_ptr(op.release()); + } +}; + +} // namespace tflite + +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_BUILTIN_OPERATOR_H_ diff --git a/tensorflow/contrib/lite/toco/tflite/custom_operator.h b/tensorflow/contrib/lite/toco/tflite/custom_operator.h new file mode 100644 index 0000000000..1a4bfac7d4 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tflite/custom_operator.h @@ -0,0 +1,74 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_CUSTOM_OPERATOR_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_CUSTOM_OPERATOR_H_ + +#include "flatbuffers/flexbuffers.h" +#include "absl/memory/memory.h" +#include "tensorflow/contrib/lite/toco/tflite/operator.h" + +namespace toco { + +namespace tflite { + +// Custom operators have a generic byte buffer describing their options. This +// class provides the boilerplate code for populating those options using +// flexbuffers. Note that most of toco's operators will likely be supported +// as builtin operators in TF Lite. +// +// Template argument T must derive from ::toco::Operator. +template +class CustomOperator : public BaseOperator { + public: + using TocoOperator = T; + using BaseOperator::BaseOperator; + + // Populate the given flexbuffer with options obtained from the tf.mini + // operator. + virtual void WriteOptions(const TocoOperator& op, + flexbuffers::Builder* fbb) const {} + + // Set options in the given tf.mini operator using values from the flexbuffer + // map. + virtual void ReadOptions(const flexbuffers::Map& m, TocoOperator* op) const {} + + Options Serialize(const Operator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + flexbuffers::Builder fbb; + fbb.Map( + [&]() { WriteOptions(static_cast(op), &fbb); }); + fbb.Finish(); + return Options::Custom(builder->CreateVector(fbb.GetBuffer())); + } + + std::unique_ptr Deserialize( + const BuiltinOptions* builtin_options, + const CustomOptions* custom_options) const override { + auto op = absl::make_unique(); + if (custom_options) { + auto flexbuffer_map = + flexbuffers::GetRoot(custom_options->data(), custom_options->size()) + .AsMap(); + ReadOptions(flexbuffer_map, op.get()); + } + return std::unique_ptr(op.release()); + } +}; + +} // namespace tflite + +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_CUSTOM_OPERATOR_H_ diff --git a/tensorflow/contrib/lite/toco/tflite/export.cc b/tensorflow/contrib/lite/toco/tflite/export.cc new file mode 100644 index 0000000000..beda710614 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tflite/export.cc @@ -0,0 +1,322 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/tflite/export.h" + +#include "flatbuffers/flexbuffers.h" +#include "absl/strings/str_join.h" +#include "tensorflow/contrib/lite/schema/schema_generated.h" +#include "tensorflow/contrib/lite/toco/tflite/operator.h" +#include "tensorflow/contrib/lite/toco/tflite/types.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/contrib/lite/version.h" + +namespace toco { + +namespace tflite { + +using ::tflite::Buffer; +using ::tflite::BuiltinOperator; +using ::tflite::BuiltinOperator_CUSTOM; +using ::tflite::BuiltinOperator_MAX; +using ::tflite::BuiltinOperator_MIN; +using ::tflite::CreateBuffer; +using ::tflite::CreateModel; +using ::tflite::CreateOperator; +using ::tflite::CreateTensor; +using ::tflite::Operator; +using ::tflite::OperatorCode; +using ::tflite::SubGraph; +using ::tflite::Tensor; +using flatbuffers::FlatBufferBuilder; +using flatbuffers::Offset; +using flatbuffers::Vector; + +namespace { + +details::OperatorKey GetOperatorKey(const ::toco::Operator& op) { + string custom_code; + if (op.type == OperatorType::kTensorFlowUnsupported) { + const TensorFlowUnsupportedOperator& unsupported_op = + static_cast(op); + custom_code = unsupported_op.tensorflow_op; + } + return details::OperatorKey(op.type, custom_code); +} + +} // Anonymous namespace. + +namespace details { + +void LoadTensorsMap(const Model& model, TensorsMap* tensors_map) { + // First find a list of unique array names. + std::set names; + for (const auto& array_pair : model.arrays) { + names.insert(array_pair.first); + } + + // Now assign indices to them and fill in the map. + int index = 0; + for (const auto& name : names) { + (*tensors_map)[name] = index; + ++index; + } +} + +void LoadOperatorsMap(const Model& model, OperatorsMap* operators_map) { + // First find a list of unique operator types. + std::set keys; + for (const auto& op : model.operators) { + keys.insert(GetOperatorKey(*op)); + } + // Now assign indices to them and fill in the map. + int index = 0; + for (const auto& key : keys) { + (*operators_map)[key] = index; + ++index; + } +} +} // namespace details + +Offset>> ExportTensors( + const Model& model, const details::TensorsMap& tensors_map, + FlatBufferBuilder* builder, std::vector* buffers_to_write) { + // In the end we will need to produce a vector sorted by the indices of the + // tensors in the tensors_map. + std::map> ordered_tensors; + + for (const auto& array_pair : model.arrays) { + const string& tensor_name = array_pair.first; + const toco::Array& array = *array_pair.second; + + int buffer_index = buffers_to_write->size(); + auto type = DataType::Serialize(array.data_type); + buffers_to_write->push_back(&array); + + std::vector shape; + if (array.has_shape()) { + for (int d : array.shape().dims()) { + shape.push_back(d); + } + } + + Offset> min; + Offset> max; + Offset> scale; + Offset> zero_point; + if (array.minmax) { + min = builder->CreateVector( + std::vector{static_cast(array.minmax->min)}); + max = builder->CreateVector( + std::vector{static_cast(array.minmax->max)}); + } + if (array.quantization_params) { + scale = builder->CreateVector(std::vector{ + static_cast(array.quantization_params->scale)}); + zero_point = builder->CreateVector( + std::vector{array.quantization_params->zero_point}); + } + auto q_param = ::tflite::CreateQuantizationParameters(*builder, min, max, + scale, zero_point); + + int index = tensors_map.at(tensor_name); + ordered_tensors[index] = + CreateTensor(*builder, builder->CreateVector(shape), type, buffer_index, + builder->CreateString(tensor_name), q_param); + } + + std::vector> tensor_vector; + tensor_vector.reserve(ordered_tensors.size()); + for (const auto& tensor : ordered_tensors) { + tensor_vector.push_back(tensor.second); + } + + return builder->CreateVector(tensor_vector); +} + +Offset> ExportInputTensors( + const Model& model, const details::TensorsMap& tensors_map, + FlatBufferBuilder* builder) { + std::vector inputs; + for (const auto& input : model.flags.input_arrays()) { + inputs.push_back(tensors_map.at(input.name())); + } + return builder->CreateVector(inputs); +} + +Offset> ExportOutputTensors( + const Model& model, const details::TensorsMap& tensors_map, + FlatBufferBuilder* builder) { + std::vector outputs; + for (const string& output : model.flags.output_arrays()) { + outputs.push_back(tensors_map.at(output)); + } + return builder->CreateVector(outputs); +} + +Offset>> ExportOperatorCodes( + const Model& model, + const std::map>& ops_by_type, + const details::OperatorsMap& operators_map, FlatBufferBuilder* builder, + std::set* error_summary) { + // Map from operator name to TF Lite enum value, for all builtins. + std::map builtin_ops; + for (int i = BuiltinOperator_MIN; i <= BuiltinOperator_MAX; ++i) { + BuiltinOperator op = static_cast(i); + string name = EnumNameBuiltinOperator(op); + if (op != BuiltinOperator_CUSTOM && !name.empty()) { + builtin_ops[name] = op; + } + } + + // We will need to produce a vector of codes in the same order as they + // appear in the operators_map. + std::map> ordered_opcodes; + + for (const auto& op : model.operators) { + const details::OperatorKey operator_key = GetOperatorKey(*op); + int op_index = operators_map.at(operator_key); + + if (ops_by_type.count(op->type) == 0) { + LOG(FATAL) << "Unsupported operator: " << HelpfulOperatorTypeName(*op); + } + + string name = ops_by_type.at(op->type)->name(); + if (builtin_ops.count(name) > 0) { + ordered_opcodes[op_index] = + CreateOperatorCode(*builder, builtin_ops[name], 0); + } else { + // If use the custom operation code if it's available in the OperatorKey. + if (!operator_key.custom_code.empty()) { + name = operator_key.custom_code; + } + if (error_summary) { + error_summary->insert(name); + } + ordered_opcodes[op_index] = CreateOperatorCode( + *builder, BuiltinOperator_CUSTOM, builder->CreateString(name)); + } + } + + std::vector> opcode_vector; + opcode_vector.reserve(ordered_opcodes.size()); + for (const auto& opcode : ordered_opcodes) { + opcode_vector.push_back(opcode.second); + } + + return builder->CreateVector(opcode_vector); +} + +Offset>> ExportOperators( + const Model& model, + const std::map>& ops_by_type, + const details::OperatorsMap& operators_map, + const details::TensorsMap& tensors_map, FlatBufferBuilder* builder) { + // The operators are in execution order, so we just follow tf.mini order. + std::vector> op_vector; + for (const auto& op : model.operators) { + if (ops_by_type.count(op->type) == 0) { + LOG(FATAL) << "Op type '" << OperatorTypeName(op->type) + << "' not supported"; + } + + std::vector inputs; + for (const string& input : op->inputs) { + inputs.push_back(tensors_map.at(input)); + } + + std::vector outputs; + for (const string& output : op->outputs) { + outputs.push_back(tensors_map.at(output)); + } + + auto options = ops_by_type.at(op->type)->Serialize(*op, builder); + int op_index = operators_map.at(GetOperatorKey(*op)); + // The only supported CustomOptionFormat is FLEXBUFFERS now. + op_vector.push_back(CreateOperator( + *builder, op_index, builder->CreateVector(inputs), + builder->CreateVector(outputs), options.type, options.builtin, + options.custom, ::tflite::CustomOptionsFormat_FLEXBUFFERS)); + } + + return builder->CreateVector(op_vector); +} + +Offset>> ExportBuffers( + const Model& model, const std::vector& buffers_to_write, + FlatBufferBuilder* builder) { + std::vector> buffer_vector; + size_t index = 0; + for (const Array* array_ptr : buffers_to_write) { + const Array& array = *array_ptr; + Offset> data_buffer = DataBuffer::Serialize(array, builder); + buffer_vector.push_back(CreateBuffer(*builder, data_buffer)); + index++; + } + return builder->CreateVector(buffer_vector); +} + +void Export(const Model& model, bool allow_custom_ops, + string* output_file_contents) { + flatbuffers::FlatBufferBuilder builder(/*initial_size=*/10240); + + const auto ops_by_type = BuildOperatorByTypeMap(); + + details::TensorsMap tensors_map; + details::LoadTensorsMap(model, &tensors_map); + + details::OperatorsMap operators_map; + details::LoadOperatorsMap(model, &operators_map); + + std::vector buffers_to_write; + Array empty_array; + buffers_to_write.push_back(&empty_array); + + auto tensors = ExportTensors(model, tensors_map, &builder, &buffers_to_write); + auto inputs = ExportInputTensors(model, tensors_map, &builder); + auto outputs = ExportOutputTensors(model, tensors_map, &builder); + + std::set error_summary; + auto op_codes = ExportOperatorCodes(model, ops_by_type, operators_map, + &builder, &error_summary); + if (!allow_custom_ops && !error_summary.empty()) { + LOG(QFATAL) << "Some of the operators in the model are not supported by " + "the standard TensorFlow Lite runtime. If you have a custom " + "implementation for them you can disable this error with " + "--allow_custom_ops. Here is a list of operators for which " + "you will need custom implementations: " + << absl::StrJoin(error_summary, ", ") << "."; + } + + auto ops = + ExportOperators(model, ops_by_type, operators_map, tensors_map, &builder); + + // TODO(aselle): add support to toco for multiple subgraphs. + auto subgraph = CreateSubGraph(builder, tensors, inputs, outputs, ops); + std::vector> subgraphs = {subgraph}; + + auto buffers = ExportBuffers(model, buffers_to_write, &builder); + auto description = builder.CreateString("TOCO Converted."); + auto new_model_location = + CreateModel(builder, TFLITE_SCHEMA_VERSION, op_codes, + builder.CreateVector(subgraphs), description, buffers); + ::tflite::FinishModelBuffer(builder, new_model_location); + const uint8_t* buffer = builder.GetBufferPointer(); + int size = builder.GetSize(); + *output_file_contents = string(reinterpret_cast(buffer), size); +} + +} // namespace tflite + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/tflite/export.h b/tensorflow/contrib/lite/toco/tflite/export.h new file mode 100644 index 0000000000..44012b7126 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tflite/export.h @@ -0,0 +1,76 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_EXPORT_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_EXPORT_H_ + +#include "tensorflow/contrib/lite/toco/model.h" + +namespace toco { + +namespace tflite { + +// Transform the given tf.mini model into a TF Lite flatbuffer and deposit the +// result in the given string. +void Export(const Model& model, bool allow_custom_ops, + string* output_file_contents); +// This if backward-compatibility. +inline void Export(const Model& model, string* output_file_contents) { + Export(model, true, output_file_contents); +} + +namespace details { + +// A maps from tensor name to its final position in the TF Lite buffer. +using TensorsMap = std::unordered_map; + +// A key to identify an operator. +// Only when `type` is `kTensorFlowUnsupported`, `custom_code` is filled to +// identify which operation is used. +struct OperatorKey { + OperatorKey(OperatorType type, const std::string& custom_code) + : type(type), custom_code(custom_code) {} + const OperatorType type; + const std::string custom_code; + + bool operator<(const OperatorKey& other) const { + if (type < other.type) return true; + if (type > other.type) return false; + return custom_code < other.custom_code; + } + + bool operator==(const OperatorKey& other) const { + return type == other.type && custom_code == other.custom_code; + } + + struct Hash { + std::size_t operator()(const OperatorKey& key) const { + return std::hash()(static_cast(key.type)) ^ + std::hash()(key.custom_code); + } + }; +}; + +// A maps from operator type to its final position in the TF Lite buffer. +using OperatorsMap = std::unordered_map; + +void LoadTensorsMap(const Model& model, TensorsMap* tensors_map); +void LoadOperatorsMap(const Model& model, OperatorsMap* operators_map); + +} // namespace details +} // namespace tflite + +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_EXPORT_H_ diff --git a/tensorflow/contrib/lite/toco/tflite/export_test.cc b/tensorflow/contrib/lite/toco/tflite/export_test.cc new file mode 100644 index 0000000000..e395645383 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tflite/export_test.cc @@ -0,0 +1,69 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/tflite/export.h" + +#include +#include + +namespace toco { + +namespace tflite { +namespace { + +class ExportTest : public ::testing::Test { + protected: + // This is a very simplistic model. We are not interested in testing all the + // details here, since tf.mini's testing framework will be exercising all the + // conversions multiple times, and the conversion of operators is tested by + // separate unittests. + void BuildTestModel() { + input_model_.GetOrCreateArray("tensor_one"); + input_model_.GetOrCreateArray("tensor_two"); + input_model_.operators.emplace_back(new ConvOperator); + input_model_.operators.emplace_back(new AddOperator); + auto unsupported_operator = new TensorFlowUnsupportedOperator; + unsupported_operator->tensorflow_op = "MyCrazyOp"; + input_model_.operators.emplace_back(unsupported_operator); + } + + Model input_model_; +}; + +TEST_F(ExportTest, LoadTensorsMap) { + BuildTestModel(); + + details::TensorsMap tensors; + details::LoadTensorsMap(input_model_, &tensors); + EXPECT_EQ(0, tensors["tensor_one"]); + EXPECT_EQ(1, tensors["tensor_two"]); +} + +TEST_F(ExportTest, LoadOperatorsMap) { + BuildTestModel(); + + details::OperatorsMap operators; + details::LoadOperatorsMap(input_model_, &operators); + EXPECT_EQ(0, operators[details::OperatorKey(OperatorType::kAdd, "")]); + EXPECT_EQ(1, operators[details::OperatorKey(OperatorType::kConv, "")]); + EXPECT_EQ(2, operators[details::OperatorKey( + OperatorType::kTensorFlowUnsupported, "MyCrazyOp")]); +} + +// TODO(ahentz): tests for tensors, inputs, outpus, opcodes and operators. + +} // namespace +} // namespace tflite + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/tflite/import.cc b/tensorflow/contrib/lite/toco/tflite/import.cc new file mode 100644 index 0000000000..bbf201fd28 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tflite/import.cc @@ -0,0 +1,183 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/tflite/import.h" + +#include "flatbuffers/flexbuffers.h" +#include "tensorflow/contrib/lite/schema/schema_generated.h" +#include "tensorflow/contrib/lite/toco/tflite/operator.h" +#include "tensorflow/contrib/lite/toco/tflite/types.h" + +namespace toco { + +namespace tflite { + +namespace details { +void LoadTensorsTable(const ::tflite::Model& input_model, + TensorsTable* tensors_table) { + // TODO(aselle): add support to toco for multiple subgraphs. + auto tensors = (*input_model.subgraphs())[0]->tensors(); + if (!tensors) return; + for (const auto* tensor : *tensors) { + tensors_table->push_back(tensor->name()->c_str()); + } +} + +void LoadOperatorsTable(const ::tflite::Model& input_model, + OperatorsTable* operators_table) { + auto opcodes = input_model.operator_codes(); + if (!opcodes) return; + for (const auto* opcode : *opcodes) { + if (opcode->builtin_code() != ::tflite::BuiltinOperator_CUSTOM) { + operators_table->push_back( + EnumNameBuiltinOperator(opcode->builtin_code())); + } else { + operators_table->push_back(opcode->custom_code()->c_str()); + } + } +} +} // namespace details + +void ImportTensors(const ::tflite::Model& input_model, Model* model) { + auto tensors = (*input_model.subgraphs())[0]->tensors(); + auto* buffers = input_model.buffers(); + // auto tensors = input_model.tensors(); + if (!tensors) return; + for (const auto* input_tensor : *tensors) { + Array& array = model->GetOrCreateArray(input_tensor->name()->c_str()); + array.data_type = DataType::Deserialize(input_tensor->type()); + int buffer_index = input_tensor->buffer(); + auto* buffer = buffers->Get(buffer_index); + DataBuffer::Deserialize(*input_tensor, *buffer, &array); + + auto shape = input_tensor->shape(); + if (shape) { + for (int i = 0; i < shape->Length(); ++i) { + auto d = shape->Get(i); + array.mutable_shape()->mutable_dims()->push_back(d); + } + } + + auto quantization = input_tensor->quantization(); + if (quantization) { + // Note that tf.mini only supports a single quantization parameters for + // the whole array. + if (quantization->min() && quantization->max()) { + CHECK_EQ(1, quantization->min()->Length()); + CHECK_EQ(1, quantization->max()->Length()); + MinMax& minmax = array.GetOrCreateMinMax(); + minmax.min = quantization->min()->Get(0); + minmax.max = quantization->max()->Get(0); + } + if (quantization->scale() && quantization->zero_point()) { + CHECK_EQ(1, quantization->scale()->Length()); + CHECK_EQ(1, quantization->zero_point()->Length()); + QuantizationParams& q = array.GetOrCreateQuantizationParams(); + q.scale = quantization->scale()->Get(0); + q.zero_point = quantization->zero_point()->Get(0); + } + } + } +} + +void ImportOperators( + const ::tflite::Model& input_model, + const std::map>& ops_by_name, + const details::TensorsTable& tensors_table, + const details::OperatorsTable& operators_table, Model* model) { + // TODO(aselle): add support for multiple subgraphs. + auto ops = (*input_model.subgraphs())[0]->operators(); + + if (!ops) return; + for (const auto* input_op : *ops) { + int index = input_op->opcode_index(); + if (index < 0 || index > operators_table.size()) { + LOG(FATAL) << "Index " << index << " must be between zero and " + << operators_table.size(); + } + string opname = operators_table.at(index); + if (ops_by_name.count(opname) == 0) { + LOG(FATAL) << "Op '" << opname << "' not supported"; + } + + auto new_op = ops_by_name.at(opname)->Deserialize( + input_op->builtin_options(), input_op->custom_options()); + model->operators.emplace_back(new_op.release()); + auto* op = model->operators.back().get(); + + auto inputs = input_op->inputs(); + for (int i = 0; i < inputs->Length(); i++) { + auto input_index = inputs->Get(i); + const string& input_name = tensors_table.at(input_index); + op->inputs.push_back(input_name); + } + auto outputs = input_op->outputs(); + for (int i = 0; i < outputs->Length(); i++) { + auto output_index = outputs->Get(i); + const string& output_name = tensors_table.at(output_index); + op->outputs.push_back(output_name); + } + } +} + +void ImportIOTensors(const ::tflite::Model& input_model, + const details::TensorsTable& tensors_table, Model* model) { + auto inputs = (*input_model.subgraphs())[0]->inputs(); + if (inputs) { + for (int input : *inputs) { + const string& input_name = tensors_table.at(input); + model->flags.add_input_arrays()->set_name(input_name); + } + } + + auto outputs = (*input_model.subgraphs())[0]->outputs(); + if (outputs) { + for (int output : *outputs) { + const string& output_name = tensors_table.at(output); + model->flags.add_output_arrays(output_name); + } + } +} + +std::unique_ptr Import(const ModelFlags& model_flags, + const string& input_file_contents) { + const ::tflite::Model* input_model = + ::tflite::GetModel(input_file_contents.data()); + + // Full list of all known operators. + const auto ops_by_name = BuildOperatorByNameMap(); + + if (input_model->subgraphs()->size() != 1) { + LOG(FATAL) << "# of subgraphs in tflite should be exactly 1 for now."; + } + std::unique_ptr model; + model.reset(new Model); + + details::TensorsTable tensors_table; + details::LoadTensorsTable(*input_model, &tensors_table); + + details::OperatorsTable operators_table; + details::LoadOperatorsTable(*input_model, &operators_table); + + ImportTensors(*input_model, model.get()); + ImportOperators(*input_model, ops_by_name, tensors_table, operators_table, + model.get()); + ImportIOTensors(*input_model, tensors_table, model.get()); + + return model; +} + +} // namespace tflite + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/tflite/import.h b/tensorflow/contrib/lite/toco/tflite/import.h new file mode 100644 index 0000000000..3c27a2843c --- /dev/null +++ b/tensorflow/contrib/lite/toco/tflite/import.h @@ -0,0 +1,49 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_IMPORT_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_IMPORT_H_ + +#include "tensorflow/contrib/lite/schema/schema_generated.h" +#include "tensorflow/contrib/lite/toco/model.h" + +namespace toco { + +namespace tflite { + +// Parse the given string as TF Lite flatbuffer and return a new tf.mini model. +std::unique_ptr Import(const ModelFlags &model_flags, + const string &input_file_contents); + +namespace details { + +// The names of all tensors found in a TF Lite model. +using TensorsTable = std::vector; + +// The names of all operators found in TF Lite model. If the operator is +// builtin, the string representation of the corresponding enum value is used +// as name. +using OperatorsTable = std::vector; + +void LoadTensorsTable(const ::tflite::Model &input_model, + TensorsTable *tensors_table); +void LoadOperatorsTable(const ::tflite::Model &input_model, + OperatorsTable *operators_table); + +} // namespace details +} // namespace tflite + +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_IMPORT_H_ diff --git a/tensorflow/contrib/lite/toco/tflite/import_test.cc b/tensorflow/contrib/lite/toco/tflite/import_test.cc new file mode 100644 index 0000000000..309fa6d7f6 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tflite/import_test.cc @@ -0,0 +1,141 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/tflite/import.h" + +#include "flatbuffers/flexbuffers.h" +#include +#include +#include "tensorflow/contrib/lite/schema/schema_generated.h" +#include "tensorflow/contrib/lite/version.h" + +namespace toco { + +namespace tflite { +namespace { + +using ::testing::ElementsAre; + +class ImportTest : public ::testing::Test { + protected: + template + flatbuffers::Offset> CreateDataVector( + const std::vector& data) { + return builder_.CreateVector(reinterpret_cast(data.data()), + sizeof(T) * data.size()); + } + // This is a very simplistic model. We are not interested in testing all the + // details here, since tf.mini's testing framework will be exercising all the + // conversions multiple times, and the conversion of operators is tested by + // separate unittests. + void BuildTestModel() { + // The tensors + auto q = ::tflite::CreateQuantizationParameters( + builder_, + /*min=*/builder_.CreateVector({0.1f}), + /*max=*/builder_.CreateVector({0.2f}), + /*scale=*/builder_.CreateVector({0.3f}), + /*zero_point=*/builder_.CreateVector({100ll})); + auto buf0 = ::tflite::CreateBuffer(builder_, CreateDataVector({})); + auto buf1 = + ::tflite::CreateBuffer(builder_, CreateDataVector({1.0f, 2.0f})); + auto buf2 = + ::tflite::CreateBuffer(builder_, CreateDataVector({3.0f})); + auto buffers = builder_.CreateVector( + std::vector>({buf0, buf1, buf2})); + auto t1 = ::tflite::CreateTensor(builder_, + builder_.CreateVector({1, 2, 3, 4}), + ::tflite::TensorType_FLOAT32, 1, + builder_.CreateString("tensor_one"), q); + auto t2 = + ::tflite::CreateTensor(builder_, builder_.CreateVector({2, 1}), + ::tflite::TensorType_FLOAT32, 2, + builder_.CreateString("tensor_two"), q); + auto tensors = builder_.CreateVector( + std::vector>({t1, t2})); + + // The operator codes. + auto c1 = + ::tflite::CreateOperatorCode(builder_, ::tflite::BuiltinOperator_CUSTOM, + builder_.CreateString("custom_op_one")); + auto c2 = ::tflite::CreateOperatorCode( + builder_, ::tflite::BuiltinOperator_CONV_2D, 0); + auto opcodes = builder_.CreateVector( + std::vector>({c1, c2})); + + auto subgraph = ::tflite::CreateSubGraph(builder_, tensors, 0, 0, 0); + std::vector> subgraph_vector( + {subgraph}); + auto subgraphs = builder_.CreateVector(subgraph_vector); + auto s = builder_.CreateString(""); + builder_.Finish(::tflite::CreateModel(builder_, TFLITE_SCHEMA_VERSION, + opcodes, subgraphs, s, buffers)); + + input_model_ = ::tflite::GetModel(builder_.GetBufferPointer()); + } + string InputModelAsString() { + return string(reinterpret_cast(builder_.GetBufferPointer()), + builder_.GetSize()); + } + flatbuffers::FlatBufferBuilder builder_; + // const uint8_t* buffer_ = nullptr; + const ::tflite::Model* input_model_ = nullptr; +}; + +TEST_F(ImportTest, LoadTensorsTable) { + BuildTestModel(); + + details::TensorsTable tensors; + details::LoadTensorsTable(*input_model_, &tensors); + EXPECT_THAT(tensors, ElementsAre("tensor_one", "tensor_two")); +} + +TEST_F(ImportTest, LoadOperatorsTable) { + BuildTestModel(); + + details::OperatorsTable operators; + details::LoadOperatorsTable(*input_model_, &operators); + EXPECT_THAT(operators, ElementsAre("custom_op_one", "CONV_2D")); +} + +TEST_F(ImportTest, Tensors) { + BuildTestModel(); + + auto model = Import(ModelFlags(), InputModelAsString()); + + ASSERT_GT(model->arrays.count("tensor_one"), 0); + Array& a1 = model->GetArray("tensor_one"); + EXPECT_EQ(ArrayDataType::kFloat, a1.data_type); + EXPECT_THAT(a1.GetBuffer().data, + ElementsAre(1.0f, 2.0f)); + ASSERT_TRUE(a1.has_shape()); + EXPECT_THAT(a1.shape().dims(), ElementsAre(1, 2, 3, 4)); + + const auto& mm = a1.minmax; + ASSERT_TRUE(mm.get()); + EXPECT_FLOAT_EQ(0.1, mm->min); + EXPECT_FLOAT_EQ(0.2, mm->max); + + const auto& q = a1.quantization_params; + ASSERT_TRUE(q.get()); + EXPECT_FLOAT_EQ(0.3, q->scale); + EXPECT_EQ(100, q->zero_point); +} + +// TODO(ahentz): still need tests for Operators and IOTensors. + +} // namespace +} // namespace tflite + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc new file mode 100644 index 0000000000..8a33500ddc --- /dev/null +++ b/tensorflow/contrib/lite/toco/tflite/operator.cc @@ -0,0 +1,627 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/tflite/operator.h" + +#include "tensorflow/contrib/lite/toco/tflite/builtin_operator.h" +#include "tensorflow/contrib/lite/toco/tflite/custom_operator.h" +#include "tensorflow/contrib/lite/toco/tflite/simple_operator.h" +#include "tensorflow/contrib/lite/toco/tflite/types.h" + +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" + +namespace toco { + +namespace tflite { + +class AveragePool + : public BuiltinOperator { + public: + using BuiltinOperator::BuiltinOperator; + + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + auto padding = Padding::Serialize(op.padding.type); + auto activation_function = + ActivationFunction::Serialize(op.fused_activation_function); + return ::tflite::CreatePool2DOptions(*builder, padding, op.stride_width, + op.stride_height, op.kwidth, + op.kheight, activation_function); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + op->padding.type = Padding::Deserialize(options.padding()); + op->stride_width = options.stride_w(); + op->stride_height = options.stride_h(); + op->kwidth = options.filter_width(); + op->kheight = options.filter_height(); + op->fused_activation_function = + ActivationFunction::Deserialize(options.fused_activation_function()); + } +}; + +class Convolution + : public BuiltinOperator { + public: + using BuiltinOperator::BuiltinOperator; + + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + auto padding = Padding::Serialize(op.padding.type); + auto activation_function = + ActivationFunction::Serialize(op.fused_activation_function); + return ::tflite::CreateConv2DOptions(*builder, padding, op.stride_width, + op.stride_height, activation_function); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + op->padding.type = Padding::Deserialize(options.padding()); + op->stride_width = options.stride_w(); + op->stride_height = options.stride_h(); + op->fused_activation_function = + ActivationFunction::Deserialize(options.fused_activation_function()); + } +}; + +class DepthwiseConvolution + : public BuiltinOperator { + public: + using BuiltinOperator::BuiltinOperator; + + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + auto padding = Padding::Serialize(op.padding.type); + auto activation_function = + ActivationFunction::Serialize(op.fused_activation_function); + return ::tflite::CreateDepthwiseConv2DOptions( + *builder, padding, op.stride_width, op.stride_height, + op.depth_multiplier, activation_function); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + op->padding.type = Padding::Deserialize(options.padding()); + op->stride_width = options.stride_w(); + op->stride_height = options.stride_h(); + op->depth_multiplier = options.depth_multiplier(); + op->fused_activation_function = + ActivationFunction::Deserialize(options.fused_activation_function()); + } +}; + +class Add : public BuiltinOperator { + public: + using BuiltinOperator::BuiltinOperator; + + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + auto activation_function = + ActivationFunction::Serialize(op.fused_activation_function); + return ::tflite::CreateAddOptions(*builder, activation_function); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + op->fused_activation_function = + ActivationFunction::Deserialize(options.fused_activation_function()); + } +}; + +class Cast : public CustomOperator { + public: + using CustomOperator::CustomOperator; + void WriteOptions(const TocoOperator& op, + flexbuffers::Builder* fbb) const override { + fbb->Int("src_data_type", DataType::Serialize(op.src_data_type)); + fbb->Int("dst_data_type", DataType::Serialize(op.dst_data_type)); + } + void ReadOptions(const flexbuffers::Map& m, TocoOperator* op) const override { + op->src_data_type = DataType::Deserialize(m["src_data_type"].AsInt64()); + op->dst_data_type = DataType::Deserialize(m["dst_data_type"].AsInt64()); + } +}; + +class Concatenation + : public BuiltinOperator { + public: + using BuiltinOperator::BuiltinOperator; + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + return ::tflite::CreateConcatenationOptions(*builder, op.concat_dim); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + op->concat_dim = options.axis(); + } +}; + +class DepthToSpace : public CustomOperator { + public: + using CustomOperator::CustomOperator; + void WriteOptions(const TocoOperator& op, + flexbuffers::Builder* fbb) const override { + fbb->Int("block_size", op.block_size); + } + void ReadOptions(const flexbuffers::Map& m, TocoOperator* op) const override { + op->block_size = m["block_size"].AsInt64(); + } +}; + +class FakeQuant : public CustomOperator { + public: + using CustomOperator::CustomOperator; + void WriteOptions(const TocoOperator& op, + flexbuffers::Builder* fbb) const override { + fbb->Float("min", op.minmax->min); + fbb->Float("max", op.minmax->max); + } + void ReadOptions(const flexbuffers::Map& m, TocoOperator* op) const override { + auto* minmax = new MinMax; + minmax->min = m["min"].AsFloat(); + minmax->max = m["max"].AsFloat(); + op->minmax.reset(minmax); + } +}; + +class FullyConnected + : public BuiltinOperator { + public: + using BuiltinOperator::BuiltinOperator; + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + auto activation_function = + ActivationFunction::Serialize(op.fused_activation_function); + return ::tflite::CreateFullyConnectedOptions(*builder, activation_function); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + op->fused_activation_function = + ActivationFunction::Deserialize(options.fused_activation_function()); + } +}; + +class Svdf : public BuiltinOperator { + public: + using BuiltinOperator::BuiltinOperator; + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + auto activation_function = + ActivationFunction::Serialize(op.fused_activation_function); + return ::tflite::CreateSVDFOptions(*builder, op.rank, activation_function); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + op->fused_activation_function = + ActivationFunction::Deserialize(options.fused_activation_function()); + op->rank = options.rank(); + } +}; + +class L2Normalization + : public BuiltinOperator { + public: + using BuiltinOperator::BuiltinOperator; + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + auto activation_function = + ActivationFunction::Serialize(op.fused_activation_function); + return ::tflite::CreateL2NormOptions(*builder, activation_function); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + op->fused_activation_function = + ActivationFunction::Deserialize(options.fused_activation_function()); + } +}; + +class L2Pool : public BuiltinOperator { + public: + using BuiltinOperator::BuiltinOperator; + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + auto padding = Padding::Serialize(op.padding.type); + auto activation_function = + ActivationFunction::Serialize(op.fused_activation_function); + return ::tflite::CreatePool2DOptions(*builder, padding, op.stride_width, + op.stride_height, op.kwidth, + op.kheight, activation_function); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + op->padding.type = Padding::Deserialize(options.padding()); + op->stride_width = options.stride_w(); + op->stride_height = options.stride_h(); + op->kwidth = options.filter_width(); + op->kheight = options.filter_height(); + op->fused_activation_function = + ActivationFunction::Deserialize(options.fused_activation_function()); + } +}; + +class LocalResponseNormalization + : public BuiltinOperator< + LocalResponseNormalizationOperator, + ::tflite::LocalResponseNormalizationOptions, + ::tflite::BuiltinOptions_LocalResponseNormalizationOptions> { + public: + using BuiltinOperator::BuiltinOperator; + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + return ::tflite::CreateLocalResponseNormalizationOptions( + *builder, op.range, op.bias, op.alpha, op.beta); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + op->range = options.radius(); + op->bias = options.bias(); + op->alpha = options.alpha(); + op->beta = options.beta(); + } +}; + +class MaxPool : public BuiltinOperator { + public: + using BuiltinOperator::BuiltinOperator; + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + auto padding = Padding::Serialize(op.padding.type); + auto activation_function = + ActivationFunction::Serialize(op.fused_activation_function); + return ::tflite::CreatePool2DOptions(*builder, padding, op.stride_width, + op.stride_height, op.kwidth, + op.kheight, activation_function); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + op->padding.type = Padding::Deserialize(options.padding()); + op->stride_width = options.stride_w(); + op->stride_height = options.stride_h(); + op->kwidth = options.filter_width(); + op->kheight = options.filter_height(); + op->fused_activation_function = + ActivationFunction::Deserialize(options.fused_activation_function()); + } +}; + +class Mul : public BuiltinOperator { + public: + using BuiltinOperator::BuiltinOperator; + + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + auto activation_function = + ActivationFunction::Serialize(op.fused_activation_function); + return ::tflite::CreateMulOptions(*builder, activation_function); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + op->fused_activation_function = + ActivationFunction::Deserialize(options.fused_activation_function()); + } +}; + +class Reshape + : public BuiltinOperator { + public: + using BuiltinOperator::BuiltinOperator; + + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + return ::tflite::CreateReshapeOptions(*builder, + builder->CreateVector(op.shape)); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + op->shape.insert(op->shape.end(), options.new_shape()->begin(), + options.new_shape()->end()); + } +}; + +class Softmax + : public BuiltinOperator { + public: + using BuiltinOperator::BuiltinOperator; + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + return ::tflite::CreateSoftmaxOptions(*builder, op.beta); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + op->beta = options.beta(); + } +}; + +class SpaceToDepth + : public BuiltinOperator { + public: + using BuiltinOperator::BuiltinOperator; + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + return ::tflite::CreateSpaceToDepthOptions(*builder, op.block_size); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + op->block_size = options.block_size(); + } +}; + +class Split : public CustomOperator { + public: + using CustomOperator::CustomOperator; + void WriteOptions(const TocoOperator& op, + flexbuffers::Builder* fbb) const override { + fbb->Int("num_split", op.num_split); + } + void ReadOptions(const flexbuffers::Map& m, TocoOperator* op) const override { + op->num_split = m["num_split"].AsInt64(); + } +}; + +class TensorFlowUnsupported : public BaseOperator { + public: + using BaseOperator::BaseOperator; + + Options Serialize(const Operator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + auto fbb = + WriteOptions(static_cast(op)); + if (fbb) { + return Options::Custom(builder->CreateVector(fbb->GetBuffer())); + } else { + return Options::Custom(0); + } + } + + std::unique_ptr Deserialize( + const BuiltinOptions* builtin_options, + const CustomOptions* custom_options) const override { + auto op = absl::make_unique(); + if (custom_options) { + auto flexbuffer_map = + flexbuffers::GetRoot(custom_options->data(), custom_options->size()) + .AsMap(); + ReadOptions(flexbuffer_map, op.get()); + } + return std::unique_ptr(op.release()); + } + + std::unique_ptr WriteOptions( + const TensorFlowUnsupportedOperator& op) const { + auto fbb = absl::make_unique(); + + ::tensorflow::NodeDef node_def; + if (!node_def.ParseFromString(op.tensorflow_node_def)) { + LOG(ERROR) << "Failed to parse TensorFlow NodeDef"; + return std::unique_ptr(); + } + + bool has_valid_attr = false; + size_t map_start = fbb->StartMap(); + for (const auto& pair : node_def.attr()) { + const char* key = pair.first.c_str(); + const auto& attr = pair.second; + switch (attr.value_case()) { + case ::tensorflow::AttrValue::kS: + fbb->String(key, attr.s()); + has_valid_attr = true; + break; + case ::tensorflow::AttrValue::kI: + fbb->Int(key, attr.i()); + has_valid_attr = true; + break; + case ::tensorflow::AttrValue::kF: + fbb->Float(key, attr.f()); + has_valid_attr = true; + break; + case ::tensorflow::AttrValue::kB: + fbb->Bool(key, attr.b()); + has_valid_attr = true; + break; + default: + LOG(WARNING) << "Ignoring unsupported attribute type with key '" + << key << "'"; + break; + } + } + if (!has_valid_attr) { + return std::unique_ptr(); + } + fbb->EndMap(map_start); + fbb->Finish(); + return std::unique_ptr(fbb.release()); + } + + void ReadOptions(const flexbuffers::Map& m, + TensorFlowUnsupportedOperator* op) const { + ::tensorflow::NodeDef node_def; + auto attr = node_def.mutable_attr(); + + const auto& keys = m.Keys(); + for (size_t i = 0; i < keys.size(); ++i) { + const auto key = keys[i].AsKey(); + const auto& value = m[key]; + switch (value.GetType()) { + case flexbuffers::TYPE_STRING: + (*attr)[key].set_s(value.AsString().c_str()); + break; + case flexbuffers::TYPE_INT: + (*attr)[key].set_i(value.AsInt64()); + break; + case flexbuffers::TYPE_FLOAT: + (*attr)[key].set_f(value.AsFloat()); + break; + case flexbuffers::TYPE_BOOL: + (*attr)[key].set_b(value.AsBool()); + break; + default: + LOG(WARNING) << "Ignoring unsupported attribute type with key '" + << key << "'"; + break; + } + } + node_def.SerializeToString(&op->tensorflow_node_def); + } +}; + +namespace { +// Build a vector containing all the known operators. +std::vector> BuildOperatorList() { + std::vector> ops; + + // Builtin Operators. + ops.emplace_back(new Add(::tflite::BuiltinOperator_ADD, OperatorType::kAdd)); + ops.emplace_back(new AveragePool(::tflite::BuiltinOperator_AVERAGE_POOL_2D, + OperatorType::kAveragePool)); + ops.emplace_back(new Concatenation(::tflite::BuiltinOperator_CONCATENATION, + OperatorType::kConcatenation)); + ops.emplace_back( + new Convolution(::tflite::BuiltinOperator_CONV_2D, OperatorType::kConv)); + ops.emplace_back( + new DepthwiseConvolution(::tflite::BuiltinOperator_DEPTHWISE_CONV_2D, + OperatorType::kDepthwiseConv)); + ops.emplace_back(new FullyConnected(::tflite::BuiltinOperator_FULLY_CONNECTED, + OperatorType::kFullyConnected)); + ops.emplace_back( + new L2Normalization(::tflite::BuiltinOperator_L2_NORMALIZATION, + OperatorType::kL2Normalization)); + ops.emplace_back( + new L2Pool(::tflite::BuiltinOperator_L2_POOL_2D, OperatorType::kL2Pool)); + ops.emplace_back(new LocalResponseNormalization( + ::tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, + OperatorType::kLocalResponseNormalization)); + ops.emplace_back(new MaxPool(::tflite::BuiltinOperator_MAX_POOL_2D, + OperatorType::kMaxPool)); + ops.emplace_back(new Mul(::tflite::BuiltinOperator_MUL, OperatorType::kMul)); + ops.emplace_back(new Reshape(::tflite::BuiltinOperator_RESHAPE, + OperatorType::kTensorFlowReshape)); + ops.emplace_back( + new Softmax(::tflite::BuiltinOperator_SOFTMAX, OperatorType::kSoftmax)); + ops.emplace_back(new SpaceToDepth(::tflite::BuiltinOperator_SPACE_TO_DEPTH, + OperatorType::kSpaceToDepth)); + ops.emplace_back( + new Svdf(::tflite::BuiltinOperator_SVDF, OperatorType::kSvdf)); + + // Custom Operators. + ops.emplace_back(new Cast("CAST", OperatorType::kCast)); + ops.emplace_back( + new DepthToSpace("DEPTH_TO_SPACE", OperatorType::kDepthToSpace)); + ops.emplace_back(new FakeQuant("FAKE_QUANT", OperatorType::kFakeQuant)); + ops.emplace_back(new Split("SPLIT", OperatorType::kTensorFlowSplit)); + ops.emplace_back(new TensorFlowUnsupported( + "TENSORFLOW_UNSUPPORTED", OperatorType::kTensorFlowUnsupported)); + + // There operators are supported by Toco, but not by TF Lite, and has no + // attributes. + ops.emplace_back(new SimpleOperator( + "RSQRT", OperatorType::kTensorFlowRsqrt)); + ops.emplace_back( + new SimpleOperator("DIV", OperatorType::kDiv)); + + // Simple Operators. + ops.emplace_back(new SimpleOperator( + "DEQUANTIZE", OperatorType::kDequantize)); + ops.emplace_back( + new SimpleOperator("FLOOR", OperatorType::kFloor)); + ops.emplace_back( + new SimpleOperator("GATHER", OperatorType::kGather)); + ops.emplace_back( + new SimpleOperator("RELU", OperatorType::kRelu)); + ops.emplace_back( + new SimpleOperator("RELU1", OperatorType::kRelu1)); + ops.emplace_back( + new SimpleOperator("RELU6", OperatorType::kRelu6)); + ops.emplace_back(new SimpleOperator( + "RESIZE_BILINEAR", OperatorType::kResizeBilinear)); + ops.emplace_back(new SimpleOperator( + "LOGISTIC", OperatorType::kLogistic)); + ops.emplace_back( + new SimpleOperator("TANH", OperatorType::kTanh)); + + return ops; +} +} // namespace + +std::map> BuildOperatorByTypeMap() { + std::map> result; + + std::vector> ops = BuildOperatorList(); + for (auto& op : ops) { + result[op->type()] = std::move(op); + } + + return result; +} + +std::map> BuildOperatorByNameMap() { + std::map> result; + + std::vector> ops = BuildOperatorList(); + for (auto& op : ops) { + result[op->name()] = std::move(op); + } + + return result; +} + +} // namespace tflite + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/tflite/operator.h b/tensorflow/contrib/lite/toco/tflite/operator.h new file mode 100644 index 0000000000..37df302d46 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tflite/operator.h @@ -0,0 +1,89 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_OPERATOR_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_OPERATOR_H_ + +#include "flatbuffers/flatbuffers.h" +#include "tensorflow/contrib/lite/schema/schema_generated.h" +#include "tensorflow/contrib/lite/toco/model.h" + +namespace toco { + +namespace tflite { + +class BaseOperator; + +// Return a map contained all knwo TF Lite Operators, keyed by their names. +std::map> BuildOperatorByNameMap(); + +// Return a map contained all knwo TF Lite Operators, keyed by the type of +// their tf.mini counterparts. +std::map> BuildOperatorByTypeMap(); + +// These are the flatbuffer types for custom and builtin options. +using CustomOptions = flatbuffers::Vector; +using BuiltinOptions = void; + +// A simple wrapper around the flatbuffer objects used to describe options that +// configure operators. +struct Options { + // Build custom options. + static Options Custom(flatbuffers::Offset offset) { + return {::tflite::BuiltinOptions_NONE, 0, offset}; + } + + // Build builtin options of the given type. + static Options Builtin(::tflite::BuiltinOptions type, + flatbuffers::Offset offset) { + return {type, offset, 0}; + } + + ::tflite::BuiltinOptions type; + flatbuffers::Offset builtin; + flatbuffers::Offset custom; +}; + +// A BaseOperator encapsulates the relationship between operators in tf.mini +// and TF lite, and provides methods for converting between those two formats. +class BaseOperator { + public: + // Build an operator with the given TF Lite name and tf.mini type. + BaseOperator(const string& name, OperatorType type) + : name_(name), type_(type) {} + virtual ~BaseOperator() = default; + + string name() const { return name_; } + OperatorType type() const { return type_; } + + // Given a tf.mini operator, create the corresponding flatbuffer options and + // return their offsets. + virtual Options Serialize(const Operator& op, + flatbuffers::FlatBufferBuilder* builder) const = 0; + + // Read TF Lite options and create the appropriate tf.mini operator. + virtual std::unique_ptr Deserialize( + const BuiltinOptions* builtin_options, + const CustomOptions* custom_options) const = 0; + + private: + string name_; + OperatorType type_; +}; + +} // namespace tflite + +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_OPERATOR_H_ diff --git a/tensorflow/contrib/lite/toco/tflite/operator_test.cc b/tensorflow/contrib/lite/toco/tflite/operator_test.cc new file mode 100644 index 0000000000..543a9bd06c --- /dev/null +++ b/tensorflow/contrib/lite/toco/tflite/operator_test.cc @@ -0,0 +1,370 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/tflite/operator.h" + +#include "flatbuffers/flexbuffers.h" +#include +#include +#include "tensorflow/contrib/lite/toco/tooling_util.h" + +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" + +namespace toco { + +namespace tflite { +namespace { + +class OperatorTest : public ::testing::Test { + protected: + // Return the operator for the given name and type. + const BaseOperator& GetOperator(const string& name, OperatorType type) { + using OpsByName = std::map>; + using OpsByType = std::map>; + + static auto* by_name = new OpsByName(BuildOperatorByNameMap()); + static auto* by_type = new OpsByType(BuildOperatorByTypeMap()); + + // Make sure the two maps were consitently built. + CHECK(by_name->count(name)) << "No operator for '" << name << "'."; + BaseOperator* op1 = by_name->at(name).get(); + CHECK(op1->type() == type) << "while verifying '" << name << "'."; + + CHECK(by_type->count(type)) + << "No operator for '" << OperatorTypeName(type) << "'."; + BaseOperator* op2 = by_type->at(type).get(); + CHECK(op2->name() == name) + << "while verifying '" << OperatorTypeName(type) << "'."; + + return *op1; + } + + // Use the given BaseOperator to serialize the tf.mini operator into a set of + // TF Lite options. Proceed to deserialize the options back into a new + // tf.mini operator, which is then returned. If `options` is given, it will + // be populated with the serialized options. + template + std::unique_ptr SerializeAndDeserialize(const BaseOperator& op, + const T& toco_op, + Options* options = nullptr) { + flatbuffers::FlatBufferBuilder builder; + Options input_options = op.Serialize(toco_op, &builder); + + if (options) { + *options = input_options; + } + + builder.Finish(CreateOperator(builder, 0, 0, 0, input_options.type, + input_options.builtin, input_options.custom, + ::tflite::CustomOptionsFormat_FLEXBUFFERS)); + auto* output_options = + flatbuffers::GetRoot<::tflite::Operator>(builder.GetBufferPointer()); + auto new_toco_op = op.Deserialize(output_options->builtin_options(), + output_options->custom_options()); + + CHECK(dynamic_cast(new_toco_op.get())) + << "Cannot cast " << HelpfulOperatorTypeName(*new_toco_op) << " to " + << HelpfulOperatorTypeName(toco_op); + + return std::unique_ptr(dynamic_cast(new_toco_op.release())); + } + + // Verify serialization and deserialization of simple operators (those + // that don't have any configuration parameters). + template + void CheckSimpleOperator(const string& name, OperatorType type) { + Options options; + auto output_toco_op = + SerializeAndDeserialize(GetOperator(name, type), T(), &options); + + ASSERT_EQ(0, options.builtin.o); + ASSERT_EQ(0, options.custom.o); + ASSERT_EQ(::tflite::BuiltinOptions_NONE, options.type); + + ASSERT_NE(nullptr, output_toco_op.get()); + } +}; + +TEST_F(OperatorTest, SimpleOperators) { + CheckSimpleOperator("DEQUANTIZE", + OperatorType::kDequantize); + CheckSimpleOperator("FLOOR", OperatorType::kFloor); + CheckSimpleOperator("GATHER", OperatorType::kGather); + CheckSimpleOperator("RELU", OperatorType::kRelu); + CheckSimpleOperator("RELU1", OperatorType::kRelu1); + CheckSimpleOperator("RELU6", OperatorType::kRelu6); + CheckSimpleOperator("RESIZE_BILINEAR", + OperatorType::kResizeBilinear); + CheckSimpleOperator("LOGISTIC", OperatorType::kLogistic); + CheckSimpleOperator("TANH", OperatorType::kTanh); +} + +TEST_F(OperatorTest, BuiltinAdd) { + AddOperator op; + op.fused_activation_function = FusedActivationFunctionType::kRelu6; + auto output_toco_op = + SerializeAndDeserialize(GetOperator("ADD", OperatorType::kAdd), op); + EXPECT_EQ(op.fused_activation_function, + output_toco_op->fused_activation_function); +} + +TEST_F(OperatorTest, CustomCast) { + CastOperator op; + op.src_data_type = ArrayDataType::kFloat; + op.dst_data_type = ArrayDataType::kUint8; + auto output_toco_op = + SerializeAndDeserialize(GetOperator("CAST", OperatorType::kCast), op); + EXPECT_EQ(op.src_data_type, output_toco_op->src_data_type); + EXPECT_EQ(op.dst_data_type, output_toco_op->dst_data_type); +} + +TEST_F(OperatorTest, CustomConcatenation) { + ConcatenationOperator op; + op.concat_dim = 123; + auto output_toco_op = SerializeAndDeserialize( + GetOperator("CONCATENATION", OperatorType::kConcatenation), op); + EXPECT_EQ(op.concat_dim, output_toco_op->concat_dim); +} + +TEST_F(OperatorTest, CustomDepthToSpace) { + DepthToSpaceOperator op; + op.block_size = 123; + auto output_toco_op = SerializeAndDeserialize( + GetOperator("DEPTH_TO_SPACE", OperatorType::kDepthToSpace), op); + EXPECT_EQ(op.block_size, output_toco_op->block_size); +} + +TEST_F(OperatorTest, CustomFakeQuant) { + FakeQuantOperator op; + auto* minmax = new MinMax; + minmax->min = -10; + minmax->max = 200; + op.minmax.reset(minmax); + auto output_toco_op = SerializeAndDeserialize( + GetOperator("FAKE_QUANT", OperatorType::kFakeQuant), op); + EXPECT_EQ(op.minmax->min, output_toco_op->minmax->min); + EXPECT_EQ(op.minmax->max, output_toco_op->minmax->max); +} + +TEST_F(OperatorTest, CustomFullyConnected) { + FullyConnectedOperator op; + op.fused_activation_function = FusedActivationFunctionType::kRelu6; + auto output_toco_op = SerializeAndDeserialize( + GetOperator("FULLY_CONNECTED", OperatorType::kFullyConnected), op); + EXPECT_EQ(op.fused_activation_function, + output_toco_op->fused_activation_function); +} + +TEST_F(OperatorTest, BuiltinL2Pool) { + L2PoolOperator op; + op.stride_width = 123; + op.stride_height = 124; + op.padding.type = PaddingType::kValid; + op.kwidth = 480; + op.kheight = 1080; + auto output_toco_op = SerializeAndDeserialize( + GetOperator("L2_POOL_2D", OperatorType::kL2Pool), op); + EXPECT_EQ(op.stride_width, output_toco_op->stride_width); + EXPECT_EQ(op.stride_height, output_toco_op->stride_height); + EXPECT_EQ(op.padding.type, output_toco_op->padding.type); + EXPECT_EQ(op.kwidth, output_toco_op->kwidth); + EXPECT_EQ(op.kheight, output_toco_op->kheight); +} + +TEST_F(OperatorTest, BuiltinLocalResponseNormalization) { + LocalResponseNormalizationOperator op; + op.range = 123; + op.bias = 1.23; + op.alpha = 12.3; + op.beta = .123; + auto output_toco_op = SerializeAndDeserialize( + GetOperator("LOCAL_RESPONSE_NORMALIZATION", + OperatorType::kLocalResponseNormalization), + op); + EXPECT_EQ(op.range, output_toco_op->range); + EXPECT_EQ(op.bias, output_toco_op->bias); + EXPECT_EQ(op.alpha, output_toco_op->alpha); + EXPECT_EQ(op.beta, output_toco_op->beta); +} + +TEST_F(OperatorTest, BuiltinMaxPool) { + MaxPoolOperator op; + op.stride_width = 123; + op.stride_height = 124; + op.padding.type = PaddingType::kValid; + op.kwidth = 480; + op.kheight = 1080; + auto output_toco_op = SerializeAndDeserialize( + GetOperator("MAX_POOL_2D", OperatorType::kMaxPool), op); + EXPECT_EQ(op.stride_width, output_toco_op->stride_width); + EXPECT_EQ(op.stride_height, output_toco_op->stride_height); + EXPECT_EQ(op.padding.type, output_toco_op->padding.type); + EXPECT_EQ(op.kwidth, output_toco_op->kwidth); + EXPECT_EQ(op.kheight, output_toco_op->kheight); +} + +TEST_F(OperatorTest, BuiltinReshape) { + TensorFlowReshapeOperator op; + op.shape = {1, 2, 4, 5, 8}; + auto output_toco_op = SerializeAndDeserialize( + GetOperator("RESHAPE", OperatorType::kTensorFlowReshape), op); + EXPECT_EQ(op.shape, output_toco_op->shape); +} + +TEST_F(OperatorTest, CustomSoftmax) { + SoftmaxOperator op; + op.beta = 123.1; + auto output_toco_op = SerializeAndDeserialize( + GetOperator("SOFTMAX", OperatorType::kSoftmax), op); + EXPECT_EQ(op.beta, output_toco_op->beta); +} + +TEST_F(OperatorTest, BuiltinSpaceToDepth) { + SpaceToDepthOperator op; + op.block_size = 123; + auto output_toco_op = SerializeAndDeserialize( + GetOperator("SPACE_TO_DEPTH", OperatorType::kSpaceToDepth), op); + EXPECT_EQ(op.block_size, output_toco_op->block_size); +} + +TEST_F(OperatorTest, CustomSplit) { + TensorFlowSplitOperator op; + op.num_split = 123; + auto output_toco_op = SerializeAndDeserialize( + GetOperator("SPLIT", OperatorType::kTensorFlowSplit), op); + EXPECT_EQ(op.num_split, output_toco_op->num_split); +} + +TEST_F(OperatorTest, BuiltinAveragePool) { + AveragePoolOperator op; + op.fused_activation_function = FusedActivationFunctionType::kRelu6; + op.stride_width = 123; + op.stride_height = 124; + op.padding.type = PaddingType::kValid; + op.kwidth = 480; + op.kheight = 1080; + auto output_toco_op = SerializeAndDeserialize( + GetOperator("AVERAGE_POOL_2D", OperatorType::kAveragePool), op); + EXPECT_EQ(op.fused_activation_function, + output_toco_op->fused_activation_function); + EXPECT_EQ(op.stride_width, output_toco_op->stride_width); + EXPECT_EQ(op.stride_height, output_toco_op->stride_height); + EXPECT_EQ(op.padding.type, output_toco_op->padding.type); + EXPECT_EQ(op.kwidth, output_toco_op->kwidth); + EXPECT_EQ(op.kheight, output_toco_op->kheight); +} + +TEST_F(OperatorTest, BuiltinConvolution) { + ConvOperator op; + op.stride_width = 123; + op.stride_height = 124; + op.padding.type = PaddingType::kValid; + op.fused_activation_function = FusedActivationFunctionType::kRelu6; + auto output_toco_op = + SerializeAndDeserialize(GetOperator("CONV_2D", OperatorType::kConv), op); + EXPECT_EQ(op.stride_width, output_toco_op->stride_width); + EXPECT_EQ(op.stride_height, output_toco_op->stride_height); + EXPECT_EQ(op.padding.type, output_toco_op->padding.type); + EXPECT_EQ(op.fused_activation_function, + output_toco_op->fused_activation_function); +} + +TEST_F(OperatorTest, BuiltinDepthwiseConvolution) { + DepthwiseConvOperator op; + op.stride_width = 123; + op.stride_height = 124; + op.padding.type = PaddingType::kValid; + op.depth_multiplier = 6; + op.fused_activation_function = FusedActivationFunctionType::kRelu6; + auto output_toco_op = SerializeAndDeserialize( + GetOperator("DEPTHWISE_CONV_2D", OperatorType::kDepthwiseConv), op); + EXPECT_EQ(op.stride_width, output_toco_op->stride_width); + EXPECT_EQ(op.stride_height, output_toco_op->stride_height); + EXPECT_EQ(op.padding.type, output_toco_op->padding.type); + EXPECT_EQ(op.depth_multiplier, output_toco_op->depth_multiplier); + EXPECT_EQ(op.fused_activation_function, + output_toco_op->fused_activation_function); +} + +TEST_F(OperatorTest, BuiltinL2Norm) { + L2NormalizationOperator op; + op.fused_activation_function = FusedActivationFunctionType::kRelu6; + auto output_toco_op = SerializeAndDeserialize( + GetOperator("L2_NORMALIZATION", OperatorType::kL2Normalization), op); + EXPECT_EQ(op.fused_activation_function, + output_toco_op->fused_activation_function); +} + +TEST_F(OperatorTest, BuiltinMul) { + MulOperator op; + op.fused_activation_function = FusedActivationFunctionType::kRelu6; + auto output_toco_op = + SerializeAndDeserialize(GetOperator("MUL", OperatorType::kMul), op); + EXPECT_EQ(op.fused_activation_function, + output_toco_op->fused_activation_function); +} + +TEST_F(OperatorTest, Svdf) { + SvdfOperator op; + op.fused_activation_function = FusedActivationFunctionType::kRelu; + auto output_toco_op = + SerializeAndDeserialize(GetOperator("SVDF", OperatorType::kSvdf), op); + EXPECT_EQ(op.fused_activation_function, + output_toco_op->fused_activation_function); +} + +TEST_F(OperatorTest, TensorFlowUnsupported) { + TensorFlowUnsupportedOperator op; + op.tensorflow_op = "MyCustomUnsupportedOp"; + + ::tensorflow::NodeDef node_def; + auto attr = node_def.mutable_attr(); + (*attr)["float_attr"].set_f(2.0); + (*attr)["str_attr"].set_s("Hello World"); + (*attr)["int_attr"].set_i(17); + (*attr)["bool_attr"].set_b(true); + node_def.SerializeToString(&op.tensorflow_node_def); + + auto output_toco_op = + SerializeAndDeserialize(GetOperator("TENSORFLOW_UNSUPPORTED", + OperatorType::kTensorFlowUnsupported), + op); + + ::tensorflow::NodeDef output_node_def; + output_node_def.ParseFromString(output_toco_op->tensorflow_node_def); + const auto& output_attr = output_node_def.attr(); + EXPECT_EQ(2.0, output_attr.at("float_attr").f()); + EXPECT_EQ("Hello World", output_attr.at("str_attr").s()); + EXPECT_EQ(17, output_attr.at("int_attr").i()); + EXPECT_EQ(true, output_attr.at("bool_attr").b()); +} + +TEST_F(OperatorTest, TensorFlowUnsupportedWithoutAttr) { + TensorFlowUnsupportedOperator op; + op.tensorflow_op = "MyCustomUnsupportedOp"; + auto output_toco_op = + SerializeAndDeserialize(GetOperator("TENSORFLOW_UNSUPPORTED", + OperatorType::kTensorFlowUnsupported), + op); + + ::tensorflow::NodeDef output_node_def; + output_node_def.ParseFromString(output_toco_op->tensorflow_node_def); + EXPECT_TRUE(output_node_def.attr().empty()); +} + +} // namespace +} // namespace tflite + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/tflite/simple_operator.h b/tensorflow/contrib/lite/toco/tflite/simple_operator.h new file mode 100644 index 0000000000..992b98baca --- /dev/null +++ b/tensorflow/contrib/lite/toco/tflite/simple_operator.h @@ -0,0 +1,50 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_SIMPLE_OPERATOR_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_SIMPLE_OPERATOR_H_ + +#include "tensorflow/contrib/lite/toco/tflite/operator.h" + +namespace toco { + +namespace tflite { + +// Simple operators don't have any configuration options and can be trivially +// serialized and deserialized. Note that most of toco's operators will +// likely be supported as builtin operators in TF Lite. Simple (and custom) +// operators are mostly a convenience for the times when tf.mini supports more +// operators than TF Lite. +// +// Template argument T must derive from ::toco::Operator. +template +class SimpleOperator : public BaseOperator { + public: + using BaseOperator::BaseOperator; + Options Serialize(const Operator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + return Options(); + } + std::unique_ptr Deserialize( + const BuiltinOptions* builtin_options, + const CustomOptions* custom_options) const override { + return std::unique_ptr(new T); + } +}; + +} // namespace tflite + +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_SIMPLE_OPERATOR_H_ diff --git a/tensorflow/contrib/lite/toco/tflite/types.cc b/tensorflow/contrib/lite/toco/tflite/types.cc new file mode 100644 index 0000000000..5b4dbfae24 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tflite/types.cc @@ -0,0 +1,165 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/tflite/types.h" + +namespace toco { + +namespace tflite { + +namespace { +template +DataBuffer::FlatBufferOffset CopyBuffer( + const Array& array, flatbuffers::FlatBufferBuilder* builder) { + using NativeT = ::toco::DataType; + const auto& src_data = array.GetBuffer().data; + const uint8_t* dst_data = reinterpret_cast(src_data.data()); + auto size = src_data.size() * sizeof(NativeT); + return builder->CreateVector(dst_data, size); +} + +template +void CopyBuffer(const ::tflite::Buffer& buffer, Array* array) { + using NativeT = ::toco::DataType; + auto* src_buffer = buffer.data(); + const NativeT* src_data = + reinterpret_cast(src_buffer->data()); + int num_items = src_buffer->size() / sizeof(NativeT); + + std::vector* dst_data = &array->GetMutableBuffer().data; + for (int i = 0; i < num_items; ++i) { + dst_data->push_back(*src_data); + ++src_data; + } +} +} // namespace + +::tflite::TensorType DataType::Serialize(ArrayDataType array_data_type) { + switch (array_data_type) { + case ArrayDataType::kFloat: + return ::tflite::TensorType_FLOAT32; + case ArrayDataType::kInt32: + return ::tflite::TensorType_INT32; + case ArrayDataType::kUint8: + return ::tflite::TensorType_UINT8; + default: + // FLOAT32 is filled for unknown data types. + // TODO(ycling): Implement type inference in TF Lite interpreter. + return ::tflite::TensorType_FLOAT32; + } +} + +ArrayDataType DataType::Deserialize(int tensor_type) { + switch (::tflite::TensorType(tensor_type)) { + case ::tflite::TensorType_FLOAT32: + return ArrayDataType::kFloat; + case ::tflite::TensorType_INT32: + return ArrayDataType::kInt32; + case ::tflite::TensorType_UINT8: + return ArrayDataType::kUint8; + default: + LOG(FATAL) << "Unhandled tensor type '" << tensor_type << "'."; + } +} + +flatbuffers::Offset> DataBuffer::Serialize( + const Array& array, flatbuffers::FlatBufferBuilder* builder) { + if (!array.buffer) return 0; // an empty buffer, usually an output. + + switch (array.data_type) { + case ArrayDataType::kFloat: + return CopyBuffer(array, builder); + case ArrayDataType::kInt32: + return CopyBuffer(array, builder); + case ArrayDataType::kUint8: + return CopyBuffer(array, builder); + default: + LOG(FATAL) << "Unhandled array data type."; + } +} + +void DataBuffer::Deserialize(const ::tflite::Tensor& tensor, + const ::tflite::Buffer& buffer, Array* array) { + if (tensor.buffer() == 0) return; // an empty buffer, usually an output. + if (buffer.data() == nullptr) return; // a non-defined buffer. + + switch (tensor.type()) { + case ::tflite::TensorType_FLOAT32: + return CopyBuffer(buffer, array); + case ::tflite::TensorType_INT32: + return CopyBuffer(buffer, array); + case ::tflite::TensorType_UINT8: + return CopyBuffer(buffer, array); + default: + LOG(FATAL) << "Unhandled tensor type."; + } +} + +::tflite::Padding Padding::Serialize(PaddingType padding_type) { + switch (padding_type) { + case PaddingType::kSame: + return ::tflite::Padding_SAME; + case PaddingType::kValid: + return ::tflite::Padding_VALID; + default: + LOG(FATAL) << "Unhandled padding type."; + } +} + +PaddingType Padding::Deserialize(int padding) { + switch (::tflite::Padding(padding)) { + case ::tflite::Padding_SAME: + return PaddingType::kSame; + case ::tflite::Padding_VALID: + return PaddingType::kValid; + default: + LOG(FATAL) << "Unhandled padding."; + } +} + +::tflite::ActivationFunctionType ActivationFunction::Serialize( + FusedActivationFunctionType faf_type) { + switch (faf_type) { + case FusedActivationFunctionType::kNone: + return ::tflite::ActivationFunctionType_NONE; + case FusedActivationFunctionType::kRelu: + return ::tflite::ActivationFunctionType_RELU; + case FusedActivationFunctionType::kRelu6: + return ::tflite::ActivationFunctionType_RELU6; + case FusedActivationFunctionType::kRelu1: + return ::tflite::ActivationFunctionType_RELU1; + default: + LOG(FATAL) << "Unhandled fused activation function type."; + } +} + +FusedActivationFunctionType ActivationFunction::Deserialize( + int activation_function) { + switch (::tflite::ActivationFunctionType(activation_function)) { + case ::tflite::ActivationFunctionType_NONE: + return FusedActivationFunctionType::kNone; + case ::tflite::ActivationFunctionType_RELU: + return FusedActivationFunctionType::kRelu; + case ::tflite::ActivationFunctionType_RELU6: + return FusedActivationFunctionType::kRelu6; + case ::tflite::ActivationFunctionType_RELU1: + return FusedActivationFunctionType::kRelu1; + default: + LOG(FATAL) << "Unhandled fused activation function type."; + } +} + +} // namespace tflite + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/tflite/types.h b/tensorflow/contrib/lite/toco/tflite/types.h new file mode 100644 index 0000000000..f7c5140510 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tflite/types.h @@ -0,0 +1,58 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_TYPES_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_TYPES_H_ + +#include "tensorflow/contrib/lite/schema/schema_generated.h" +#include "tensorflow/contrib/lite/toco/model.h" + +namespace toco { + +namespace tflite { + +struct DataType { + static ::tflite::TensorType Serialize(ArrayDataType array_data_type); + static ArrayDataType Deserialize(int tensor_type); +}; + +struct DataBuffer { + using FlatBufferOffset = flatbuffers::Offset>; + + // Build the flatbuffer representation of a toco's Array and return the + // corresponding offset into the flatbuffer. Note that data from the array + // will be copied into the flatbuffer. + static FlatBufferOffset Serialize(const Array& array, + flatbuffers::FlatBufferBuilder* builder); + // Copy data from the given tensor into toco's Array. + static void Deserialize(const ::tflite::Tensor& tensor, + const ::tflite::Buffer& buffer, Array* array); +}; + +struct Padding { + static ::tflite::Padding Serialize(PaddingType padding_type); + static PaddingType Deserialize(int padding); +}; + +struct ActivationFunction { + static ::tflite::ActivationFunctionType Serialize( + FusedActivationFunctionType faf_type); + static FusedActivationFunctionType Deserialize(int activation_function); +}; + +} // namespace tflite + +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_TYPES_H_ diff --git a/tensorflow/contrib/lite/toco/tflite/types_test.cc b/tensorflow/contrib/lite/toco/tflite/types_test.cc new file mode 100644 index 0000000000..174b78f3e6 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tflite/types_test.cc @@ -0,0 +1,191 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/tflite/types.h" + +#include +#include + +namespace toco { + +namespace tflite { +namespace { + +using flatbuffers::FlatBufferBuilder; +using flatbuffers::Offset; +using flatbuffers::Vector; + +// These are types that exist in TF Mini but don't have a correspondence +// in TF Lite. +static const ArrayDataType kUnsupportedTocoTypes[] = { + ArrayDataType::kNone, ArrayDataType::kBool, ArrayDataType::kInt64}; + +// These are TF Lite types for which there is no correspondence in TF Mini. +static const ::tflite::TensorType kUnsupportedTfLiteTypes[] = { + ::tflite::TensorType_FLOAT16}; + +// A little helper to match flatbuffer offsets. +MATCHER_P(HasOffset, value, "") { return arg.o == value; } + +// Helper function that creates an array, writes it into a flatbuffer, and then +// reads it back in. +template +Array ToFlatBufferAndBack(std::initializer_list<::toco::DataType> items) { + // NOTE: This test does not construct the full buffers list. Since + // Deserialize normally takes a buffer, we need to synthesize one and provide + // an index that is non-zero so the buffer is not assumed to be emtpy. + Array src; + src.data_type = T; + src.GetMutableBuffer().data = items; + + Array result; + flatbuffers::FlatBufferBuilder builder; + builder.Finish(CreateTensor(builder, 0, DataType::Serialize(T), + /*buffer*/ 1)); // Can't use 0 which means empty. + flatbuffers::FlatBufferBuilder buffer_builder; + Offset> data_buffer = + DataBuffer::Serialize(src, &buffer_builder); + buffer_builder.Finish(::tflite::CreateBuffer(buffer_builder, data_buffer)); + + auto* tensor = + flatbuffers::GetRoot<::tflite::Tensor>(builder.GetBufferPointer()); + auto* buffer = + flatbuffers::GetRoot<::tflite::Buffer>(buffer_builder.GetBufferPointer()); + DataBuffer::Deserialize(*tensor, *buffer, &result); + return result; +} + +TEST(DataType, SupportedTypes) { + std::vector> testdata = { + {ArrayDataType::kUint8, ::tflite::TensorType_UINT8}, + {ArrayDataType::kInt32, ::tflite::TensorType_INT32}, + {ArrayDataType::kFloat, ::tflite::TensorType_FLOAT32}}; + for (auto x : testdata) { + EXPECT_EQ(x.second, DataType::Serialize(x.first)); + EXPECT_EQ(x.first, DataType::Deserialize(x.second)); + } +} + +TEST(DataType, UnsupportedTypes) { + for (::tflite::TensorType t : kUnsupportedTfLiteTypes) { + EXPECT_DEATH(DataType::Deserialize(t), "Unhandled tensor type."); + } + + // Unsupported types are all serialized as FLOAT32 currently. + for (ArrayDataType t : kUnsupportedTocoTypes) { + EXPECT_EQ(::tflite::TensorType_FLOAT32, DataType::Serialize(t)); + } +} + +TEST(DataBuffer, EmptyBuffers) { + flatbuffers::FlatBufferBuilder builder; + Array array; + EXPECT_THAT(DataBuffer::Serialize(array, &builder), HasOffset(0)); + + builder.Finish(::tflite::CreateTensor(builder)); + auto* tensor = + flatbuffers::GetRoot<::tflite::Tensor>(builder.GetBufferPointer()); + flatbuffers::FlatBufferBuilder buffer_builder; + Offset> v = buffer_builder.CreateVector({}); + buffer_builder.Finish(::tflite::CreateBuffer(buffer_builder, v)); + auto* buffer = + flatbuffers::GetRoot<::tflite::Buffer>(buffer_builder.GetBufferPointer()); + + DataBuffer::Deserialize(*tensor, *buffer, &array); + EXPECT_EQ(nullptr, array.buffer); +} + +TEST(DataBuffer, UnsupportedTypes) { + for (ArrayDataType t : kUnsupportedTocoTypes) { + flatbuffers::FlatBufferBuilder builder; + Array array; + array.data_type = t; + array.GetMutableBuffer(); // This is OK. + EXPECT_DEATH(DataBuffer::Serialize(array, &builder), + "Unhandled array data type."); + } + + for (::tflite::TensorType t : kUnsupportedTfLiteTypes) { + flatbuffers::FlatBufferBuilder builder; + builder.Finish(::tflite::CreateTensor(builder, 0, t, /*buffer*/ 1)); + flatbuffers::FlatBufferBuilder buffer_builder; + Offset> v = buffer_builder.CreateVector({1}); + buffer_builder.Finish(::tflite::CreateBuffer(buffer_builder, v)); + auto* buffer = flatbuffers::GetRoot<::tflite::Buffer>( + buffer_builder.GetBufferPointer()); + auto* tensor = + flatbuffers::GetRoot<::tflite::Tensor>(builder.GetBufferPointer()); + Array array; + EXPECT_DEATH(DataBuffer::Deserialize(*tensor, *buffer, &array), + "Unhandled tensor type."); + } +} + +TEST(DataBuffer, Float) { + Array recovered = ToFlatBufferAndBack({1.0f, 2.0f}); + EXPECT_THAT(recovered.GetBuffer().data, + ::testing::ElementsAre(1.0f, 2.0f)); +} + +TEST(DataBuffer, Uint8) { + Array recovered = ToFlatBufferAndBack({127, 244}); + EXPECT_THAT(recovered.GetBuffer().data, + ::testing::ElementsAre(127, 244)); +} + +TEST(DataBuffer, Int32) { + Array recovered = ToFlatBufferAndBack({1, 1 << 30}); + EXPECT_THAT(recovered.GetBuffer().data, + ::testing::ElementsAre(1, 1 << 30)); +} + +TEST(Padding, All) { + EXPECT_EQ(::tflite::Padding_SAME, Padding::Serialize(PaddingType::kSame)); + EXPECT_EQ(PaddingType::kSame, Padding::Deserialize(::tflite::Padding_SAME)); + + EXPECT_EQ(::tflite::Padding_VALID, Padding::Serialize(PaddingType::kValid)); + EXPECT_EQ(PaddingType::kValid, Padding::Deserialize(::tflite::Padding_VALID)); + + EXPECT_DEATH(Padding::Serialize(static_cast(10000)), + "Unhandled padding type."); + EXPECT_DEATH(Padding::Deserialize(10000), "Unhandled padding."); +} + +TEST(ActivationFunction, All) { + std::vector< + std::pair> + testdata = {{FusedActivationFunctionType::kNone, + ::tflite::ActivationFunctionType_NONE}, + {FusedActivationFunctionType::kRelu, + ::tflite::ActivationFunctionType_RELU}, + {FusedActivationFunctionType::kRelu6, + ::tflite::ActivationFunctionType_RELU6}, + {FusedActivationFunctionType::kRelu1, + ::tflite::ActivationFunctionType_RELU1}}; + for (auto x : testdata) { + EXPECT_EQ(x.second, ActivationFunction::Serialize(x.first)); + EXPECT_EQ(x.first, ActivationFunction::Deserialize(x.second)); + } + + EXPECT_DEATH(ActivationFunction::Serialize( + static_cast(10000)), + "Unhandled fused activation function type."); + EXPECT_DEATH(ActivationFunction::Deserialize(10000), + "Unhandled fused activation function type."); +} + +} // namespace +} // namespace tflite + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/toco.cc b/tensorflow/contrib/lite/toco/toco.cc new file mode 100644 index 0000000000..f01ec0ec61 --- /dev/null +++ b/tensorflow/contrib/lite/toco/toco.cc @@ -0,0 +1,119 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/model_cmdline_flags.h" +#include "tensorflow/contrib/lite/toco/model_flags.pb.h" +#include "tensorflow/contrib/lite/toco/toco_cmdline_flags.h" +#include "tensorflow/contrib/lite/toco/toco_flags.pb.h" +#include "tensorflow/contrib/lite/toco/toco_port.h" +#include "tensorflow/contrib/lite/toco/toco_tooling.h" +#include "tensorflow/contrib/lite/toco/toco_types.h" +#include "tensorflow/core/platform/logging.h" + +#ifndef CHECK_OK +#define CHECK_OK(val) CHECK_EQ((val).ok(), true) +#define QCHECK_OK(val) QCHECK_EQ((val).ok(), true) +#endif + +namespace toco { +namespace { + +#define QCHECK_REQUIRE_TOCO_FLAG(arg) \ + QCHECK(parsed_toco_flags.arg.specified()) << "Missing required flag: " #arg; + +void CheckFilePermissions(const ParsedTocoFlags& parsed_toco_flags, + const ParsedModelFlags& parsed_model_flags, + const TocoFlags& toco_flags) { + port::CheckInitGoogleIsDone("InitGoogle is not done yet"); + + QCHECK_REQUIRE_TOCO_FLAG(input_file) + QCHECK_OK(port::file::Exists(parsed_toco_flags.input_file.value(), + port::file::Defaults())) + << "Specified input_file does not exist: " + << parsed_toco_flags.input_file.value(); + QCHECK_OK(port::file::Readable(parsed_toco_flags.input_file.value(), + port::file::Defaults())) + << "Specified input_file exists, but is not readable: " + << parsed_toco_flags.input_file.value(); + + QCHECK_REQUIRE_TOCO_FLAG(output_file); + QCHECK_OK(port::file::Writable(parsed_toco_flags.output_file.value())) + << "parsed_toco_flags.input_file.value() output_file is not writable: " + << parsed_toco_flags.output_file.value(); +} + +void ToolMain(const ParsedTocoFlags& parsed_toco_flags, + const ParsedModelFlags& parsed_model_flags) { + ModelFlags model_flags; + ReadModelFlagsFromCommandLineFlags(parsed_model_flags, &model_flags); + + TocoFlags toco_flags; + ReadTocoFlagsFromCommandLineFlags(parsed_toco_flags, &toco_flags); + + CheckFilePermissions(parsed_toco_flags, parsed_model_flags, toco_flags); + + string input_file_contents; + CHECK_OK(port::file::GetContents(parsed_toco_flags.input_file.value(), + &input_file_contents, + port::file::Defaults())); + std::unique_ptr model = + Import(toco_flags, model_flags, input_file_contents); + Transform(toco_flags, model.get()); + string output_file_contents; + Export(toco_flags, *model, toco_flags.allow_custom_ops(), + &output_file_contents); + CHECK_OK(port::file::SetContents(parsed_toco_flags.output_file.value(), + output_file_contents, + port::file::Defaults())); +} + +} // namespace +} // namespace toco + +int main(int argc, char** argv) { + toco::string msg; + toco::ParsedTocoFlags parsed_toco_flags; + toco::ParsedModelFlags parsed_model_flags; + + // If no args were specified, give a help string to be helpful. + int* effective_argc = &argc; + char** effective_argv = argv; + if (argc == 1) { + // No arguments, so manufacture help argv. + static int dummy_argc = 2; + static char* dummy_argv[] = {argv[0], const_cast("--help")}; + effective_argc = &dummy_argc; + effective_argv = dummy_argv; + } + + // Parse toco flags and command flags in sequence, each one strips off args, + // giving InitGoogle a chance to handle all remaining arguments. + bool toco_success = toco::ParseTocoFlagsFromCommandLineFlags( + effective_argc, effective_argv, &msg, &parsed_toco_flags); + bool model_success = toco::ParseModelFlagsFromCommandLineFlags( + effective_argc, effective_argv, &msg, &parsed_model_flags); + if (!toco_success || !model_success || !msg.empty()) { + fprintf(stderr, "%s", msg.c_str()); + fflush(stderr); + return 1; + } + toco::port::InitGoogle(argv[0], effective_argc, &effective_argv, true); + toco::ToolMain(parsed_toco_flags, parsed_model_flags); +} diff --git a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc new file mode 100644 index 0000000000..d43c3b4a8e --- /dev/null +++ b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc @@ -0,0 +1,206 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +#include "absl/strings/numbers.h" +#include "absl/strings/str_join.h" +#include "absl/strings/str_split.h" +#include "absl/strings/strip.h" +#include "tensorflow/contrib/lite/toco/toco_cmdline_flags.h" +#include "tensorflow/contrib/lite/toco/toco_port.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/util/command_line_flags.h" + +namespace toco { + +bool ParseTocoFlagsFromCommandLineFlags( + int* argc, char* argv[], string* msg, + ParsedTocoFlags* parsed_toco_flags_ptr) { + using tensorflow::Flag; + ParsedTocoFlags& parsed_flags = *parsed_toco_flags_ptr; + std::vector flags = { + Flag("input_file", parsed_flags.input_file.bind(), + parsed_flags.input_file.default_value(), + "Input file (model of any supported format). For Protobuf " + "formats, both text and binary are supported regardless of file " + "extension."), + Flag("output_file", parsed_flags.output_file.bind(), + parsed_flags.output_file.default_value(), + "Output file. " + "For Protobuf formats, the binary format will be used."), + Flag("input_format", parsed_flags.input_format.bind(), + parsed_flags.input_format.default_value(), + "Input file format. One of: tensorflow_graphdef, "), + Flag("output_format", parsed_flags.output_format.bind(), + parsed_flags.output_format.default_value(), "Output file format."), + Flag("default_ranges_min", parsed_flags.default_ranges_min.bind(), + parsed_flags.default_ranges_min.default_value(), + "If defined, will be used as the default value for the min bound " + "of min/max ranges used for quantization."), + Flag("default_ranges_max", parsed_flags.default_ranges_max.bind(), + parsed_flags.default_ranges_max.default_value(), + "If defined, will be used as the default value for the max bound " + "of min/max ranges used for quantization."), + Flag("input_type", parsed_flags.input_type.bind(), + parsed_flags.input_type.default_value(), + "Data type of the input array in the " + "output file. "), + Flag("input_types", parsed_flags.input_types.bind(), + parsed_flags.input_types.default_value(), + "Data types of the input arrays in the " + "output file. " + "Comma-separated list matching the enumeration order of " + "input_arrays."), + Flag("inference_type", parsed_flags.inference_type.bind(), + parsed_flags.inference_type.default_value(), + "Data type, in the output file, of internal and output arrays " + "that are FLOAT in the input file. Thus, the value FLOAT means " + "keep doing floating-point inference, while the value " + "QUANTIZED_UINT8 means replace all internal floating-point " + "arithmetic by integer arithmetic producing 8-bit integer " + "activations instead of float activations --- which we call " + "\'quantized inference\'."), + Flag("drop_fake_quant", parsed_flags.drop_fake_quant.bind(), + parsed_flags.drop_fake_quant.default_value(), + "Ignore and discard FakeQuant nodes. For instance, that can be used " + "to " + "generate plain float code without fake-quantization from a " + "quantized " + "graph."), + Flag( + "reorder_across_fake_quant", + parsed_flags.reorder_across_fake_quant.bind(), + parsed_flags.reorder_across_fake_quant.default_value(), + "Normally, FakeQuant nodes must be strict boundaries for graph " + "transformations, in order to ensure that quantized inference has " + "the " + "exact same arithmetic behavior as quantized training --- which is " + "the " + "whole point of quantized training and of FakeQuant nodes in the " + "first " + "place. However, that entails subtle requirements on where exactly " + "FakeQuant nodes must be placed in the graph. Some quantized graphs " + "have FakeQuant nodes at unexpected locations, that prevent graph " + "transformations that are necessary in order to generate inference " + "code for these graphs. Such graphs should be fixed, but as a " + "temporary work-around, setting this reorder_across_fake_quant flag " + "allows toco to perform necessary graph transformaitons on them, " + "at the cost of no longer faithfully matching inference and training " + "arithmetic."), + Flag("allow_custom_ops", parsed_flags.allow_custom_ops.bind(), + parsed_flags.allow_custom_ops.default_value(), + "If true, allow TOCO to create TF Lite Custom operators for all the" + "unsupported Tensorflow ops."), + }; + bool asked_for_help = + *argc == 2 && (!strcmp(argv[1], "--help") || !strcmp(argv[1], "-help")); + if (asked_for_help) { + *msg += tensorflow::Flags::Usage(argv[0], flags); + return false; + } else { + return tensorflow::Flags::Parse(argc, argv, flags); + } +} + +void ReadTocoFlagsFromCommandLineFlags(const ParsedTocoFlags& parsed_toco_flags, + TocoFlags* toco_flags) { + namespace port = toco::port; + port::CheckInitGoogleIsDone("InitGoogle is not done yet"); + + enum class FlagRequirement { kNone, kMustBeSpecified, kMustNotBeSpecified }; + +#define ENFORCE_FLAG_REQUIREMENT(name, requirement) \ + do { \ + if (requirement == FlagRequirement::kMustBeSpecified) { \ + QCHECK(parsed_toco_flags.name.specified()) \ + << "Missing required flag: " << #name; \ + } \ + if (requirement == FlagRequirement::kMustNotBeSpecified) { \ + QCHECK(!parsed_toco_flags.name.specified()) \ + << "Given other flags, this flag should not have been specified: " \ + << #name; \ + } \ + } while (false) + +#define READ_TOCO_FLAG(name, requirement) \ + ENFORCE_FLAG_REQUIREMENT(name, requirement); \ + do { \ + if (parsed_toco_flags.name.specified()) { \ + toco_flags->set_##name(parsed_toco_flags.name.value()); \ + } \ + } while (false) + +#define PARSE_TOCO_FLAG(Type, name, requirement) \ + ENFORCE_FLAG_REQUIREMENT(name, requirement); \ + do { \ + if (parsed_toco_flags.name.specified()) { \ + Type x; \ + QCHECK(Type##_Parse(parsed_toco_flags.name.value(), &x)) \ + << "Unrecognized " << #Type << " value " \ + << parsed_toco_flags.name.value(); \ + toco_flags->set_##name(x); \ + } \ + } while (false) + + PARSE_TOCO_FLAG(FileFormat, input_format, FlagRequirement::kMustBeSpecified); + PARSE_TOCO_FLAG(FileFormat, output_format, FlagRequirement::kMustBeSpecified); + FlagRequirement tflite_flags_requirement = + toco_flags->output_format() == TFLITE + ? FlagRequirement::kMustBeSpecified + : FlagRequirement::kMustNotBeSpecified; + PARSE_TOCO_FLAG(IODataType, inference_type, tflite_flags_requirement); + READ_TOCO_FLAG(default_ranges_min, FlagRequirement::kNone); + READ_TOCO_FLAG(default_ranges_max, FlagRequirement::kNone); + READ_TOCO_FLAG(drop_fake_quant, FlagRequirement::kNone); + READ_TOCO_FLAG(reorder_across_fake_quant, FlagRequirement::kNone); + READ_TOCO_FLAG(allow_custom_ops, FlagRequirement::kNone); + +#undef READ_TOCO_FLAG +#undef PARSE_TOCO_FLAG + + const bool input_type_specified = parsed_toco_flags.input_type.specified(); + const bool input_types_specified = parsed_toco_flags.input_types.specified(); + if (toco_flags->output_format() == TFLITE) { + QCHECK(input_type_specified || input_types_specified) + << "When output_format=TFLITE, either input_type or input_types needs " + "to be specified."; + } else { + QCHECK(!input_type_specified && !input_types_specified) + << "With this output_format, neither input_type nor input_types must " + "be specified."; + } + QCHECK(!(input_type_specified && input_types_specified)) + << "input_type and input_types are mutually exclusive"; + if (input_type_specified) { + IODataType type; + QCHECK(IODataType_Parse(parsed_toco_flags.input_type.value(), &type)) + << "Unrecognized input_type: " << parsed_toco_flags.input_type.value(); + toco_flags->add_input_types(type); + } + if (input_types_specified) { + std::vector input_types = + absl::StrSplit(parsed_toco_flags.input_types.value(), ','); + for (const string& t : input_types) { + IODataType type; + QCHECK(IODataType_Parse(t, &type)) + << "Unrecognized input_types value " << t + << " in input_types=" << parsed_toco_flags.input_types.value(); + toco_flags->add_input_types(type); + } + } +} +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/toco_cmdline_flags.h b/tensorflow/contrib/lite/toco/toco_cmdline_flags.h new file mode 100644 index 0000000000..155a6fea87 --- /dev/null +++ b/tensorflow/contrib/lite/toco/toco_cmdline_flags.h @@ -0,0 +1,35 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TOCO_CMDLINE_FLAGS_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TOCO_CMDLINE_FLAGS_H_ + +#include +#include +#include "tensorflow/contrib/lite/toco/args.h" +#include "tensorflow/contrib/lite/toco/toco_flags.pb.h" + +namespace toco { +// Parse and remove arguments handled from toco. Returns true if parsing +// is successful. msg has the usage string if there was an error or +// "--help" was specified +bool ParseTocoFlagsFromCommandLineFlags(int* argc, char* argv[], string* msg, + ParsedTocoFlags* parsed_toco_flags_ptr); +// Populate the TocoFlags proto with parsed_toco_flags data. +void ReadTocoFlagsFromCommandLineFlags(const ParsedTocoFlags& parsed_toco_flags, + TocoFlags* toco_flags); + +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TOCO_CMDLINE_FLAGS_H_ diff --git a/tensorflow/contrib/lite/toco/toco_flags.proto b/tensorflow/contrib/lite/toco/toco_flags.proto new file mode 100644 index 0000000000..fd7c29fdc7 --- /dev/null +++ b/tensorflow/contrib/lite/toco/toco_flags.proto @@ -0,0 +1,126 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +syntax = "proto2"; +package toco; + +// Supported I/O file formats. Some formats may be input-only or output-only. +enum FileFormat { + FILE_FORMAT_UNKNOWN = 0; + + // GraphDef, third_party/tensorflow/core/framework/graph.proto + TENSORFLOW_GRAPHDEF = 1; + + // Tensorflow's mobile inference model. + // third_party/tensorflow/contrib/tflite/schema.fbs + TFLITE = 2; + + // GraphViz + // Export-only. + GRAPHVIZ_DOT = 3; +} + +// IODataType describes the numeric data types to be used by the output format. +// See input_type and inference_type below. +enum IODataType { + IO_DATA_TYPE_UNKNOWN = 0; + + // Float32, not quantized + FLOAT = 1; + + // Uint8, quantized + QUANTIZED_UINT8 = 2; + + // Int32, not quantized + INT32 = 3; + + // Int64, not quantized + INT64 = 4; + + // String, not quantized + STRING = 5; +} + +// TocoFlags encodes extra parameters that drive tooling operations, that +// are not normally encoded in model files and in general may not be thought +// of as properties of models, instead describing how models are to be +// processed in the context of the present tooling job. +// Next Id: 11 +message TocoFlags { + // Input file format + optional FileFormat input_format = 1; + + // Output file format + optional FileFormat output_format = 2; + + // Numeric data types of the input arrays in the output format. + // This controls what input types the output file will be expecting. + // This is not a description of the input types of the input file. + // For example, the input file may have a float input placeholder, + // but we may want to generate a quantized TFLite file from it, + // or a float TFLite file taking a quantized input. + // + // The length of this list should match the length of the input_arrays + // list in ModelFlags. + repeated IODataType input_types = 9; + + // Numeric data type of the internal activations array and output array. + // + // As a matter of implementation detail, most model + // parameter arrays (weights, etc) will tend to also use this data type. + // Not all will, though: for instance, bias vectors will typically + // get quantized as int32 when weights and activations get quantized as uint8. + optional IODataType inference_type = 4; + + // default_ranges_min and default_ranges_max are helpers to experiment + // with quantization of models. Normally, quantization requires the input + // model to have (min, max) range information for every activations array. + // This is needed in order to know how to quantize arrays and still achieve + // satisfactory accuracy. However, in some circumstances one would just like + // to estimate the performance of quantized inference, without caring about + // accuracy. That is what default_ranges_min and default_ranges_max are for: + // when specified, they will be used as default (min, max) range boundaries + // for all activation arrays that lack (min, max) range information, thus + // allowing for quantization to proceed. + // + // It should be clear from the above explanation that these parameters are + // for experimentation purposes only and should not be used in production: + // they make it easy to quantize models, but the resulting quantized model + // will be inaccurate. + optional float default_ranges_min = 5; + optional float default_ranges_max = 6; + + // Ignore and discard FakeQuant nodes. For instance, that can be used to + // generate plain float code without fake-quantization from a quantized + // graph. + optional bool drop_fake_quant = 7; + + // Normally, FakeQuant nodes must be strict boundaries for graph + // transformations, in order to ensure that quantized inference has the + // exact same arithmetic behavior as quantized training --- which is the + // whole point of quantized training and of FakeQuant nodes in the first + // place. However, that entails subtle requirements on where exactly + // FakeQuant nodes must be placed in the graph. Some quantized graphs + // have FakeQuant nodes at unexpected locations, that prevent graph + // transformations that are necessary in order to generate inference + // code for these graphs. Such graphs should be fixed, but as a + // temporary work-around, setting this reorder_across_fake_quant flag + // allows toco to perform necessary graph transformaitons on them, + // at the cost of no longer faithfully matching inference and training + // arithmetic. + optional bool reorder_across_fake_quant = 8; + + // If true, allow TOCO to create TF Lite Custom operators for all the + // unsupported Tensorflow ops. + optional bool allow_custom_ops = 10; +} diff --git a/tensorflow/contrib/lite/toco/toco_graphviz_dump_options.cc b/tensorflow/contrib/lite/toco/toco_graphviz_dump_options.cc new file mode 100644 index 0000000000..4e98e7081d --- /dev/null +++ b/tensorflow/contrib/lite/toco/toco_graphviz_dump_options.cc @@ -0,0 +1,22 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/toco_graphviz_dump_options.h" + +namespace toco { +GraphVizDumpOptions* GraphVizDumpOptions::singleton() { + static auto* ptr = new GraphVizDumpOptions; + return ptr; +} +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/toco_graphviz_dump_options.h b/tensorflow/contrib/lite/toco/toco_graphviz_dump_options.h new file mode 100644 index 0000000000..ae0541f62b --- /dev/null +++ b/tensorflow/contrib/lite/toco/toco_graphviz_dump_options.h @@ -0,0 +1,34 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TOCO_GRAPHVIZ_DUMP_OPTIONS_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TOCO_GRAPHVIZ_DUMP_OPTIONS_H_ + +#include + +namespace toco { + +// Global data for determining whether to output graph viz format from toco. +struct GraphVizDumpOptions { + std::string graphviz_first_array; + std::string graphviz_last_array; + std::string dump_graphviz; + bool dump_graphviz_video = false; + + static GraphVizDumpOptions* singleton(); +}; + +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TOCO_GRAPHVIZ_DUMP_OPTIONS_H_ diff --git a/tensorflow/contrib/lite/toco/toco_port.cc b/tensorflow/contrib/lite/toco/toco_port.cc new file mode 100644 index 0000000000..a1c8696cd0 --- /dev/null +++ b/tensorflow/contrib/lite/toco/toco_port.cc @@ -0,0 +1,227 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#include "tensorflow/contrib/lite/toco/toco_port.h" +#include "tensorflow/contrib/lite/toco/toco_types.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { +namespace port { +void CopyToBuffer(const string& src, char* dest) { + memcpy(dest, src.data(), src.size()); +} + +#ifdef PLATFORM_GOOGLE +void CopyToBuffer(const Cord& src, char* dest) { src.CopyToArray(dest); } +#endif +} // namespace port +} // namespace toco + +#if defined(PLATFORM_GOOGLE) && !defined(__APPLE__) && !defined(__ANDROID__) + +// Wrap Google file operations. + +#include "base/init_google.h" +#include "file/base/file.h" +#include "file/base/filesystem.h" +#include "file/base/helpers.h" +#include "file/base/options.h" +#include "file/base/path.h" + +namespace toco { +namespace port { + +void InitGoogle(const char* usage, int* argc, char*** argv, bool remove_flags) { + ::InitGoogle(usage, argc, argv, remove_flags); +} + +void CheckInitGoogleIsDone(const char* message) { + ::CheckInitGoogleIsDone(message); +} + +namespace file { + +// Conversion to our wrapper Status. +Status ToStatus(const ::util::Status& uts) { + return Status(uts.ok(), uts.error_message()); +} + +// Conversion to our wrapper Options. +toco::port::file::Options ToOptions(const ::file::Options& options) { + CHECK_EQ(&options, &::file::Defaults()); + return Options(); +} + +Status Writable(const string& filename) { + File* f = nullptr; + const auto status = ::file::Open(filename, "w", &f, ::file::Defaults()); + if (f) { + QCHECK_OK(f->Close(::file::Defaults())); + } + return ToStatus(status); +} + +Status Readable(const string& filename, const file::Options& options) { + return ToStatus(::file::Readable(filename, ::file::Defaults())); +} + +Status Exists(const string& filename, const file::Options& options) { + auto status = ::file::Exists(filename, ::file::Defaults()); + return ToStatus(status); +} + +Status GetContents(const string& filename, string* contents, + const file::Options& options) { + return ToStatus(::file::GetContents(filename, contents, ::file::Defaults())); +} + +Status SetContents(const string& filename, const string& contents, + const file::Options& options) { + return ToStatus(::file::SetContents(filename, contents, ::file::Defaults())); +} + +string JoinPath(const string& a, const string& b) { + return ::file::JoinPath(a, b); +} + +} // namespace file +} // namespace port +} // namespace toco + +#else // (__APPLE__ || __ANDROID__) + +#include +#include +#include +#include +#include + +#if defined(PLATFORM_GOOGLE) +#include "base/commandlineflags.h" +#endif + +namespace toco { +namespace port { + +static bool port_initialized = false; + +void InitGoogle(const char* usage, int* argc, char*** argv, bool remove_flags) { + if (!port_initialized) { +#if defined(PLATFORM_GOOGLE) + ParseCommandLineFlags(argc, argv, remove_flags); +#endif + port_initialized = true; + } +} + +void CheckInitGoogleIsDone(const char* message) { + CHECK(port_initialized) << message; +} + +namespace file { + +Status Writable(const string& filename) { + FILE* f = fopen(filename.c_str(), "w"); + if (f) { + fclose(f); + return Status(true, ""); + } + return Status(false, "not writable"); +} + +Status Readable(const string& filename, const file::Options& options) { + FILE* f = fopen(filename.c_str(), "r"); + if (f) { + fclose(f); + return Status(true, ""); + } + return Status(false, "not readable"); +} + +Status Exists(const string& filename, const file::Options& options) { + struct stat statbuf; + int ret = stat(filename.c_str(), &statbuf); + return Status(ret != -1, ""); +} + +Status GetContents(const string& path, string* output, + const file::Options& options) { + output->clear(); + + int fd = open(path.c_str(), O_RDONLY); + if (fd == -1) { + return Status(false, "can't open() for read"); + } + + // Direct read, for speed. + const int kBufSize = 1 << 16; + char buffer[kBufSize]; + while (true) { + int size = read(fd, buffer, kBufSize); + if (size == 0) { + // Done. + close(fd); + return Status(true, ""); + } else if (size == -1) { + // Error. + close(fd); + return Status(false, "error during read()"); + } else { + output->append(buffer, size); + } + } + + CHECK(0); + return Status(false, "internal error"); +} + +Status SetContents(const string& filename, const string& contents, + const file::Options& options) { + int fd = open(filename.c_str(), O_WRONLY | O_CREAT, 0664); + if (fd == -1) { + return Status(false, "can't open() for write"); + } + + size_t i = 0; + while (i < contents.size()) { + size_t to_write = contents.size() - i; + ssize_t written = write(fd, &contents[i], to_write); + if (written == -1) { + close(fd); + return Status(false, "write() error"); + } + i += written; + } + close(fd); + + return Status(true, ""); +} + +string JoinPath(const string& base, const string& filename) { + if (base.empty()) return filename; + string base_fixed = base; + if (!base_fixed.empty() && base_fixed.back() == '/') base_fixed.pop_back(); + string filename_fixed = filename; + if (!filename_fixed.empty() && filename_fixed.front() == '/') + filename_fixed.erase(0, 1); + return base_fixed + "/" + filename_fixed; +} + +} // namespace file +} // namespace port +} // namespace toco + +#endif // (__APPLE || __ANDROID__) diff --git a/tensorflow/contrib/lite/toco/toco_port.h b/tensorflow/contrib/lite/toco/toco_port.h new file mode 100644 index 0000000000..b5cb7a11e7 --- /dev/null +++ b/tensorflow/contrib/lite/toco/toco_port.h @@ -0,0 +1,80 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TOCO_PORT_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TOCO_PORT_H_ + +// Portability layer for toco tool. Mainly, abstract filesystem access so we +// can build and use on google internal environments and on OSX. + +#include +#include "tensorflow/contrib/lite/toco/format_port.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/platform.h" +#if defined(PLATFORM_GOOGLE) +#include "absl/strings/cord.h" +#endif // PLATFORM_GOOGLE + +#ifdef PLATFORM_GOOGLE +#define TFLITE_PROTO_NS proto2 +#else +#define TFLITE_PROTO_NS google::protobuf +#endif + +namespace toco { +namespace port { + +class Status { + public: + Status() {} + + Status(bool ok, const string& message) : ok_(ok), message_(message) {} + + bool ok() const { return ok_; } + + const string error_message() const { return message_; } + + private: + bool ok_ = false; + string message_; +}; + +void InitGoogle(const char* usage, int* argc, char*** argv, bool remove_flags); +void CheckInitGoogleIsDone(const char* message); + +namespace file { +class Options {}; +inline Options Defaults() { + Options o; + return o; +} +Status GetContents(const string& filename, string* contents, + const Options& options); +Status SetContents(const string& filename, const string& contents, + const Options& options); +string JoinPath(const string& base, const string& filename); +Status Writable(const string& filename); +Status Readable(const string& filename, const Options& options); +Status Exists(const string& filename, const Options& options); +} // namespace file + +// Copy `src` string to `dest`. User must ensure `dest` has enough space. +#if defined(PLATFORM_GOOGLE) +void CopyToBuffer(const ::Cord& src, char* dest); +#endif // PLATFORM_GOOGLE +void CopyToBuffer(const string& src, char* dest); +} // namespace port +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TOCO_PORT_H_ diff --git a/tensorflow/contrib/lite/toco/toco_port_test.cc b/tensorflow/contrib/lite/toco/toco_port_test.cc new file mode 100644 index 0000000000..650a617aeb --- /dev/null +++ b/tensorflow/contrib/lite/toco/toco_port_test.cc @@ -0,0 +1,58 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/toco_port.h" +#include "tensorflow/contrib/lite/toco/toco_types.h" + +#include +#include + +namespace toco { +namespace port { +namespace { + +#ifdef PLATFORM_GOOGLE +#define TFLITE_PREFIX "third_party/tensorflow/contrib/lite/" +#else +#define TFLITE_PREFIX "tensorflow/contrib/lite/" +#endif + +TEST(TocoPortTest, Exists) { + EXPECT_TRUE( + file::Exists(TFLITE_PREFIX "toco/toco_port_test.cc", file::Defaults()) + .ok()); + + EXPECT_FALSE( + file::Exists("non-existent_file_asldjflasdjf", file::Defaults()).ok()); +} + +TEST(TocoPortTest, Readable) { + EXPECT_TRUE( + file::Readable(TFLITE_PREFIX "toco/toco_port_test.cc", file::Defaults()) + .ok()); + + EXPECT_FALSE( + file::Readable("non-existent_file_asldjflasdjf", file::Defaults()).ok()); +} + +TEST(TocoPortTest, JoinPath) { + EXPECT_EQ("part1/part2", file::JoinPath("part1", "part2")); + EXPECT_EQ("part1/part2", file::JoinPath("part1/", "part2")); + EXPECT_EQ("part1/part2", file::JoinPath("part1", "/part2")); + EXPECT_EQ("part1/part2", file::JoinPath("part1/", "/part2")); +} + +} // namespace +} // namespace port +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc new file mode 100644 index 0000000000..232538a841 --- /dev/null +++ b/tensorflow/contrib/lite/toco/toco_tooling.cc @@ -0,0 +1,277 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/toco_tooling.h" + +#include +#include +#include + +#include "absl/strings/str_join.h" +#include "tensorflow/contrib/lite/toco/allocate_transient_arrays.h" +#include "tensorflow/contrib/lite/toco/dump_graphviz.h" +#include "tensorflow/contrib/lite/toco/export_tensorflow.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/import_tensorflow.h" +#include "tensorflow/contrib/lite/toco/model_flags.pb.h" +#include "tensorflow/contrib/lite/toco/tflite/export.h" +#include "tensorflow/contrib/lite/toco/tflite/import.h" +#include "tensorflow/contrib/lite/toco/toco_flags.pb.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { +namespace { +// CHECK-fails if the model contains a kTensorFlowUnsupported operation. +void CheckUnsupportedOperations(const Model& model) { + std::set unsupported_ops; + for (auto& op : model.operators) { + if (op->type == OperatorType::kTensorFlowUnsupported) { + unsupported_ops.insert( + static_cast(op.get()) + ->tensorflow_op); + } + } + QCHECK(unsupported_ops.empty()) + << "These unsupported ops were not removed by graph transformations: " + << absl::StrJoin(unsupported_ops, ", "); +} + +void MakeGeneralGraphTransformationsSet( + GraphTransformationsSet* transformations) { + CHECK(transformations->empty()); + transformations->Add(new ResolveReshapeAttributes); + transformations->Add(new PropagateArrayDataTypes); + transformations->Add(new PropagateFixedSizes); + transformations->Add(new RemoveTensorFlowAssert); + transformations->Add(new RemoveTensorFlowIdentity); + transformations->Add(new RemoveTrivialConcatenation); + transformations->Add(new RemoveTrivialConcatenationInput); + transformations->Add(new RemoveUnusedOp); + transformations->Add(new EnsureBiasVectors); + transformations->Add(new ResolveReorderAxes); + transformations->Add(new ResolveTensorFlowMatMul); + transformations->Add(new FuseBinaryIntoPrecedingAffine); + transformations->Add(new FuseBinaryIntoFollowingAffine); + transformations->Add(new ResolveBatchNormalization); + transformations->Add(new ResolveConstantBinaryOperator); + transformations->Add(new ResolveConstantUnaryOperator); + transformations->Add(new ResolveTensorFlowMerge); + transformations->Add(new ResolveTensorFlowSqueeze); + transformations->Add(new ResolveTensorFlowSwitch); + transformations->Add(new ResolveTensorFlowTile); + transformations->Add(new ResolveTensorFlowConcat); + transformations->Add(new IdentifyL2Normalization); + transformations->Add(new IdentifyL2Pool); + transformations->Add(new IdentifyRelu1); + transformations->Add(new RemoveTrivialBinaryOperator); + transformations->Add(new ReadFakeQuantMinMax); + transformations->Add(new ResolvePadAttributes); + transformations->Add(new ResolveStridedSliceAttributes); + transformations->Add(new ResolveSliceAttributes); + transformations->Add(new ResolveMeanAttributes); + transformations->Add(new ResolveConstantTensorFlowShape); + transformations->Add(new MakeInitialDequantizeOperator); +} + +void SetArrayFinalDataTypes(const TocoFlags& toco_flags, Model* model) { + const bool output_is_tflite = toco_flags.output_format() == TFLITE; + + if (output_is_tflite) { + if (!toco_flags.input_types().empty()) { + for (int i = 0; i < model->flags.input_arrays_size(); i++) { + int input_types_index = toco_flags.input_types_size() == 1 ? 0 : i; + const auto input_type = toco_flags.input_types(input_types_index); + ArrayDataType final_data_type = ArrayDataType::kNone; + switch (input_type) { + case FLOAT: + final_data_type = ArrayDataType::kFloat; + break; + case QUANTIZED_UINT8: + final_data_type = ArrayDataType::kUint8; + break; + case INT32: + final_data_type = ArrayDataType::kInt32; + break; + case INT64: + final_data_type = ArrayDataType::kInt64; + break; + default: + LOG(FATAL) << "Unknown data type"; + } + model->arrays[model->flags.input_arrays(i).name()]->final_data_type = + final_data_type; + } + } + } else { + for (int i = 0; i < model->flags.input_arrays_size(); i++) { + model->arrays[model->flags.input_arrays(i).name()]->final_data_type = + ArrayDataType::kFloat; + } + } +} + +} // namespace + +std::unique_ptr Import(const TocoFlags& toco_flags, + const ModelFlags& model_flags, + const string& input_file_contents) { + std::unique_ptr model; + switch (toco_flags.input_format()) { + case TENSORFLOW_GRAPHDEF: + model = ImportTensorFlowGraphDef(model_flags, input_file_contents); + break; + case TFLITE: + model = toco::tflite::Import(model_flags, input_file_contents); + ResolveModelFlags(model_flags, model.get()); + CheckInvariants(*model); + break; + default: + LOG(FATAL) << "Unhandled input_format"; + } + + LogDump(kLogLevelModelChanged, "AT IMPORT", *model); + + return model; +} + +void Transform(const TocoFlags& toco_flags, Model* model) { + const FileFormat output_format = toco_flags.output_format(); + const IODataType inference_type = toco_flags.inference_type(); + + const bool output_is_tflite = output_format == TFLITE; + + const bool output_is_tflite_quantized = + output_is_tflite && inference_type == QUANTIZED_UINT8; + + if (output_is_tflite) { + QCHECK(toco_flags.input_types_size() == 1 || + toco_flags.input_types_size() == model->flags.input_arrays_size()) + << "Mismatched numbers of input_arrays and input_types"; + } + + if (output_is_tflite_quantized) { + for (const auto& input_type : toco_flags.input_types()) { + QCHECK_NE(input_type, FLOAT) + << "Quantized inference is not allowed with float inputs."; + } + } + + SetArrayFinalDataTypes(toco_flags, model); + + GraphTransformationsSet transformations; + MakeGeneralGraphTransformationsSet(&transformations); + auto* remove_trivial_reshape = new RemoveTrivialReshape; + transformations.Add(remove_trivial_reshape); + if (output_format == TFLITE) { + transformations.Add(new FuseActivationFunctions); + } else { + transformations.Add(new UnfuseActivationFunctions); + } + if (output_format != TENSORFLOW_GRAPHDEF) { + transformations.Add(new ResolveConstantFakeQuant); + } + if (toco_flags.drop_fake_quant()) { + transformations.Add(new DropFakeQuant); + } else { + // See the doc for --reorder_across_fake_quant: that flag is needed to + // support some existing models, e.g. WordLens, that have FakeQuant + // nodes in the wrong places. + // We currently unconditionally enable that behavior when the output + // format is DarwiNN because the DarwiNN test code does not make it + // easy to pass a new toco flag. Once that is resolved on the DarwiNN + // tests side, the special-casing of DarwiNN here can go away. + // TODO(benoitjacob): so drop it when we can. + if ((output_is_tflite_quantized && + toco_flags.reorder_across_fake_quant())) { + transformations.Add(new DropFakeQuant); + } + } + transformations.Add(new ConvertPureConvToDepthwise); + // TFLite export does not yet support fused LSTM cell. + if (output_format == TENSORFLOW_GRAPHDEF) { + transformations.Add(new IdentifyLstmCell); + } + transformations.Add(new ResolveConstantConcatenation); + RunGraphTransformations(model, "general graph transformations", + transformations); + if (output_is_tflite_quantized) { + RunGraphTransformations(model, "pre-quantization graph transformations", + {new HardcodeMinMax, new DropFakeQuant}); + } + + if (output_is_tflite_quantized) { + if (toco_flags.has_default_ranges_min() && + toco_flags.has_default_ranges_max()) { + UseDefaultMinMaxRangeValues(model, toco_flags.default_ranges_min(), + toco_flags.default_ranges_max()); + } + CheckIsReadyForQuantization(*model); + RunGraphTransformations( + model, "quantization graph transformations", + {new Quantize, new RemoveTrivialQuantizedActivationFunc, + new RemoveFinalDequantizeOp}); + } else { + GraphTransformationsSet dequantization_transformations{new Dequantize}; + // Dequantize creates FakeQuant nodes. We may want to discard + // those immediately. + if (toco_flags.drop_fake_quant()) { + dequantization_transformations.Add(new DropFakeQuant); + } + + RunGraphTransformations(model, "dequantization graph transformations", + dequantization_transformations); + } + + LogDump(kLogLevelModelChanged, "AFTER TRANSFORMATIONS", *model); + + if (output_format != GRAPHVIZ_DOT && output_format != TFLITE) { + // By now there shouldn't be any unsupported ops when exporting to + // TensorFlow GraphDef. + CheckUnsupportedOperations(*model); + } + + if (output_is_tflite) { + AllocateTransientArrays(model, kDefaultTransientDataAlignment); + LogDump(kLogLevelModelChanged, "AFTER ALLOCATION", *model); + } + + CheckModelCounts(*model); + CheckFinalDataTypesSatisfied(*model); + + int64 ops_count; + if (EstimateArithmeticOpsCount(*model, &ops_count)) { + LOG(INFO) << "Estimated count of arithmetic ops: " << 1e-9 * ops_count + << " billion (note that a multiply-add is counted as 2 ops)."; + } +} + +void Export(const TocoFlags& toco_flags, const Model& model, + bool allow_custom_ops, string* output_file_contents) { + switch (toco_flags.output_format()) { + case TENSORFLOW_GRAPHDEF: + ExportTensorFlowGraphDef(model, output_file_contents); + break; + case TFLITE: + toco::tflite::Export(model, allow_custom_ops, output_file_contents); + break; + case GRAPHVIZ_DOT: + DumpGraphviz(model, output_file_contents); + break; + default: + LOG(FATAL) << "Unhandled output_format"; + } +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/toco_tooling.h b/tensorflow/contrib/lite/toco/toco_tooling.h new file mode 100644 index 0000000000..9c5a93a211 --- /dev/null +++ b/tensorflow/contrib/lite/toco/toco_tooling.h @@ -0,0 +1,50 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TOCO_TOOLING_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TOCO_TOOLING_H_ + +#include +#include + +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/model_flags.pb.h" +#include "tensorflow/contrib/lite/toco/toco_flags.pb.h" + +namespace toco { + +// Imports the input file into a Model object. +std::unique_ptr Import(const TocoFlags& toco_flags, + const ModelFlags& model_flags, + const string& input_file_contents); + +// Transforms a Model. The resulting Model is ready to be passed +// to Export with the exact same toco_flags. +void Transform(const TocoFlags& toco_flags, Model* model); + +// Exports the Model, which must be of the 'lowered' form returned by +// Transform, to a file of the format given by +// toco_flags.output_format(). +void Export(const TocoFlags& toco_flags, const Model& model, + bool allow_custom_ops, string* output_file_contents); + +// This if for backward-compatibility with internal tools. +inline void Export(const TocoFlags& toco_flags, const Model& model, + string* output_file_contents) { + Export(toco_flags, model, true, output_file_contents); +} + +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TOCO_TOOLING_H_ diff --git a/tensorflow/contrib/lite/toco/toco_types.h b/tensorflow/contrib/lite/toco/toco_types.h new file mode 100644 index 0000000000..ad42497ada --- /dev/null +++ b/tensorflow/contrib/lite/toco/toco_types.h @@ -0,0 +1,45 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TYPES_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TYPES_H_ + +#include +#include "tensorflow/core/platform/platform.h" + +#if defined(PLATFORM_GOOGLE) || defined(GOOGLE_INTEGRAL_TYPES) +#include "tensorflow/core/platform/google/integral_types.h" +#else +#include "tensorflow/core/platform/default/integral_types.h" +#endif + +namespace toco { +#ifdef PLATFORM_GOOGLE +using ::string; +#else +using std::string; +#endif + +using tensorflow::int16; +using tensorflow::int32; +using tensorflow::int64; +using tensorflow::int8; +using tensorflow::uint16; +using tensorflow::uint32; +using tensorflow::uint64; +using tensorflow::uint8; + +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TYPES_H_ diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc new file mode 100644 index 0000000000..bcbfed62d3 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -0,0 +1,1552 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/toco/tooling_util.h" + +#include +#include +#include +#include +#include +#include + +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "absl/strings/str_replace.h" +#include "tensorflow/contrib/lite/toco/dump_graphviz.h" +#include "tensorflow/contrib/lite/toco/model_flags.pb.h" +#include "tensorflow/contrib/lite/toco/toco_graphviz_dump_options.h" +#include "tensorflow/contrib/lite/toco/toco_port.h" +#include "tensorflow/core/platform/logging.h" + + +namespace toco { + +string LogName(const Operator& op) { + const string& opname = HelpfulOperatorTypeName(op); + if (op.outputs.empty()) { + return toco::port::StringF("{%s operator}", opname); + } else { + return toco::port::StringF("{%s operator with output %s}", opname, + op.outputs[0]); + } +} + +bool IsInputArray(const Model& model, const string& name) { + for (const auto& input_array : model.flags.input_arrays()) { + if (input_array.name() == name) { + return true; + } + } + return false; +} + +bool IsArrayConsumed(const Model& model, const string& name) { + if (GetOpWithInput(model, name)) { + return true; + } + for (const string& model_output : model.flags.output_arrays()) { + if (model_output == name) { + return true; + } + } + for (const auto& rnn_state : model.flags.rnn_states()) { + if (rnn_state.back_edge_source_array() == name) { + return true; + } + } + return false; +} + +int CountTrueOutputs(const Model& model, const Operator& op) { + int count = 0; + for (const string& output : op.outputs) { + if (IsArrayConsumed(model, output)) { + ++count; + } + } + return count; +} + +int CountOpsWithInput(const Model& model, const string& array_name) { + int count = 0; + for (const auto& op : model.operators) { + for (auto& input : op->inputs) { + if (input == array_name) { + count++; + } + } + } + return count; +} + +bool DeleteArrayIfUnused(const string& array_name, Model* model) { + if (CountOpsWithInput(*model, array_name) == 0) { + model->arrays.erase(array_name); + return true; + } + return false; +} + +std::vector>::const_iterator FindOpWithOutput( + const Model& model, const string& array_name) { + for (auto it = model.operators.begin(); it != model.operators.end(); ++it) { + for (auto& output : it->get()->outputs) { + if (output == array_name) { + return it; + } + } + } + return model.operators.end(); +} + +std::vector>::iterator FindOpWithOutput( + Model& model, const string& array_name) { + for (auto it = model.operators.begin(); it != model.operators.end(); ++it) { + for (auto& output : it->get()->outputs) { + if (output == array_name) { + return it; + } + } + } + return model.operators.end(); +} + +Operator* GetOpWithOutput(const Model& model, const string& array_name) { + auto it = FindOpWithOutput(model, array_name); + return it == model.operators.end() ? nullptr : it->get(); +} + +// GetFirstOpWithInput assumes that this finds the first op. +std::vector>::const_iterator FindOpWithInput( + const Model& model, const string& array_name) { + for (auto it = model.operators.begin(); it != model.operators.end(); ++it) { + for (auto& input : it->get()->inputs) { + if (input == array_name) { + return it; + } + } + } + return model.operators.end(); +} + +std::vector>::const_iterator FindOp( + const Model& model, const Operator* op) { + for (auto it = model.operators.begin(); it != model.operators.end(); ++it) { + if (it->get() == op) { + return it; + } + } + return model.operators.end(); +} + +std::vector>::iterator FindOp(Model& model, + const Operator* op) { + for (auto it = model.operators.begin(); it != model.operators.end(); ++it) { + if (it->get() == op) { + return it; + } + } + return model.operators.end(); +} + +Operator* GetOpWithInput(const Model& model, const string& array_name) { + auto it = FindOpWithInput(model, array_name); + return it == model.operators.end() ? nullptr : it->get(); +} + +Operator* GetFirstOpWithInput(const Model& model, const string& array_name) { + auto it = FindOpWithInput(model, array_name); + return it == model.operators.end() ? nullptr : it->get(); +} + +string FormatArraysList(const Model& model, const std::vector& list) { + if (list.empty()) { + return "[]"; + } + string result = ""; + if (list.size() > 1) { + result += "[ "; + } + for (std::size_t i = 0; i < list.size(); i++) { + if (i > 0) { + result += ", "; + } + result += list[i]; + } + if (list.size() > 1) { + result += " ]"; + } + return result; +} + +const char* OperatorTypeName(OperatorType type) { + switch (type) { +#define HANDLE_OPERATORTYPENAME_CASE(c) \ + case OperatorType::k##c: \ + return #c; + HANDLE_OPERATORTYPENAME_CASE(Add) + HANDLE_OPERATORTYPENAME_CASE(AveragePool) + HANDLE_OPERATORTYPENAME_CASE(BatchNormalization) + HANDLE_OPERATORTYPENAME_CASE(Conv) + HANDLE_OPERATORTYPENAME_CASE(Concatenation) + HANDLE_OPERATORTYPENAME_CASE(DepthwiseConv) + HANDLE_OPERATORTYPENAME_CASE(DepthToSpace) + HANDLE_OPERATORTYPENAME_CASE(SpaceToDepth) + HANDLE_OPERATORTYPENAME_CASE(FullyConnected) + HANDLE_OPERATORTYPENAME_CASE(Dequantize) + HANDLE_OPERATORTYPENAME_CASE(L2Normalization) + HANDLE_OPERATORTYPENAME_CASE(LocalResponseNormalization) + HANDLE_OPERATORTYPENAME_CASE(Logistic) + HANDLE_OPERATORTYPENAME_CASE(LstmCell) + HANDLE_OPERATORTYPENAME_CASE(MaxPool) + HANDLE_OPERATORTYPENAME_CASE(L2Pool) + HANDLE_OPERATORTYPENAME_CASE(FakeQuant) + HANDLE_OPERATORTYPENAME_CASE(Mul) + HANDLE_OPERATORTYPENAME_CASE(Relu) + HANDLE_OPERATORTYPENAME_CASE(Relu1) + HANDLE_OPERATORTYPENAME_CASE(Relu6) + HANDLE_OPERATORTYPENAME_CASE(ReorderAxes) + HANDLE_OPERATORTYPENAME_CASE(Softmax) + HANDLE_OPERATORTYPENAME_CASE(Div) + HANDLE_OPERATORTYPENAME_CASE(Tanh) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowAll) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowAssert) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowGreater) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowGreaterEqual) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowIdentity) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowLess) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowLessEqual) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowMatMul) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowMax) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowMaximum) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowMerge) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowMin) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowMinimum) + HANDLE_OPERATORTYPENAME_CASE(Pad) + HANDLE_OPERATORTYPENAME_CASE(StridedSlice) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowReshape) + HANDLE_OPERATORTYPENAME_CASE(Squeeze) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowRsqrt) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowShape) + HANDLE_OPERATORTYPENAME_CASE(Slice) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowSplit) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowSqrt) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowSquare) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowSwitch) + HANDLE_OPERATORTYPENAME_CASE(Sub) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowSum) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowTile) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowConcat) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowConcatV2) + HANDLE_OPERATORTYPENAME_CASE(Cast) + HANDLE_OPERATORTYPENAME_CASE(Floor) + HANDLE_OPERATORTYPENAME_CASE(Gather) + HANDLE_OPERATORTYPENAME_CASE(ResizeBilinear) + HANDLE_OPERATORTYPENAME_CASE(SpaceToBatchND) + HANDLE_OPERATORTYPENAME_CASE(BatchToSpaceND) + HANDLE_OPERATORTYPENAME_CASE(Mean) + HANDLE_OPERATORTYPENAME_CASE(Svdf) + HANDLE_OPERATORTYPENAME_CASE(TensorFlowUnsupported) + default: + LOG(FATAL) << "Unhandled op type"; +#undef HANDLE_OPERATORTYPENAME_CASE + } +} + +string HelpfulOperatorTypeName(const Operator& op) { + if (op.type == OperatorType::kTensorFlowUnsupported) { + return toco::port::StringF( + "(Unsupported TensorFlow op: %s)", + static_cast(op).tensorflow_op); + } + return OperatorTypeName(op.type); +} + +void LogSummary(int log_level, const Model& model) { + VLOG(log_level) << "Operators summary (" << model.operators.size() + << " operators): "; + std::unordered_multiset ops_by_type; + for (const auto& op : model.operators) { + ops_by_type.insert(op->type); + } + auto it = ops_by_type.begin(); + while (it != ops_by_type.end()) { + int count = ops_by_type.count(*it); + VLOG(log_level) << " " << OperatorTypeName(*it) << ": " << count; + std::advance(it, count); + } +} + +void LogArray(int log_level, const Model& model, const string& name) { + const auto& array = model.GetArray(name); + VLOG(log_level) << "Array: " << name; + switch (array.data_type) { + case ArrayDataType::kNone: + break; + case ArrayDataType::kFloat: + VLOG(log_level) << " Data type: kFloat"; + break; + case ArrayDataType::kInt32: + VLOG(log_level) << " Data type: kInt32"; + break; + case ArrayDataType::kUint8: + VLOG(log_level) << " Data type: kUint8"; + break; + default: + VLOG(log_level) << " Data type: other (numerical value: " + << static_cast(array.data_type) << ")"; + break; + } + if (array.buffer) { + VLOG(log_level) << " Constant Buffer"; + } + if (array.alloc) { + VLOG(log_level) << " Transient Alloc"; + } + if (array.has_shape()) { + const Shape& array_shape = array.shape(); + if (array_shape.dimensions_count() == 0) { + VLOG(log_level) << " (Zero dimensions)"; + } else { + string message = " Dims: "; + bool first = true; + for (const int dim : array_shape.dims()) { + if (!first) { + message += ", "; + } + first = false; + toco::port::AppendF(&message, "%d", dim); + } + VLOG(log_level) << message; + } + } + if (array.minmax) { + VLOG(log_level) << " MinMax: " << array.minmax->min << " .. " + << array.minmax->max; + } + if (array.quantization_params) { + VLOG(log_level) << " QuantizationParams: zero_point=" + << array.quantization_params->zero_point + << ", scale=" << array.quantization_params->scale; + } +} + +void DumpGraphvizVideoFrame(const Model& model) { + namespace port = toco::port; + + const auto& dump_options = *GraphVizDumpOptions::singleton(); + if (!dump_options.dump_graphviz_video) { + return; + } + CHECK(!dump_options.dump_graphviz.empty()); + // TODO(benoitjacob): the static data here means that this function + // is stateful, not reentrant, and effectively leaks memory till exit + // (since dump_hashes can only grow in size). It also means that it + // really only is intended to be called for a single model during the + // process' lifetime. So it's not great design at all. The overriding + // design aspect here is to make the video-dumping code as unintrusive + // and self-contained as possible. Eventually, we'll want to have that + // cleaned-up, but that will require some form of general statefulness + // in toco (some kind of 'tooling state' data structure) that does + // not exist at present, and would be premature to design here just for + // this new video-dumping feature. + static int dump_id = 0; + static std::unordered_set dump_hashes; + string graphviz_dump; + DumpGraphviz(model, &graphviz_dump); + std::size_t hash = std::hash{}(graphviz_dump); + if (!dump_hashes.count(hash)) { + dump_hashes.insert(hash); + CHECK(port::file::SetContents( + port::file::JoinPath( + dump_options.dump_graphviz, + toco::port::StringF("toco_video_%05d.dot", dump_id)), + graphviz_dump, port::file::Defaults()) + .ok()); + dump_id++; + } +} + +void LogDump(int log_level, const string& message, const Model& model) { + namespace port = toco::port; + const auto& dump_options = *GraphVizDumpOptions::singleton(); + + DumpGraphvizVideoFrame(model); + if (!dump_options.dump_graphviz.empty()) { + string graphviz_dump; + + DumpGraphviz(model, &graphviz_dump); + CHECK(port::file::SetContents( + port::file::JoinPath( + dump_options.dump_graphviz, + absl::StrCat("toco_", + absl::StrReplaceAll(message, {{" ", "_"}}), + ".dot")), + graphviz_dump, port::file::Defaults()) + .ok()); + } + + if (!VLOG_IS_ON(log_level)) { + return; + } + VLOG(log_level) << "BEGIN DUMP OF TOCO MODEL (" << message << ")"; + LogSummary(log_level, model); + std::unordered_set already_printed_arrays; + for (const auto& op : model.operators) { + for (const auto& input : op->inputs) { + if (!already_printed_arrays.count(input)) { + already_printed_arrays.insert(input); + LogArray(log_level, model, input); + } + } + VLOG(log_level) << HelpfulOperatorTypeName(*op) << " : "; + VLOG(log_level) << " " << FormatArraysList(model, op->inputs) << " -> " + << FormatArraysList(model, op->outputs); + if (op->fused_activation_function != FusedActivationFunctionType::kNone) { + VLOG(log_level) << " (with fused activation function)"; + } + for (const auto& output : op->outputs) { + if (!already_printed_arrays.count(output)) { + already_printed_arrays.insert(output); + LogArray(log_level, model, output); + } + } + } + VLOG(log_level) << "END DUMP OF TOCO MODEL (" << message << ")"; +} + +// Note remaining raw-array extension in ProcessTensorFlowReshapeOperator(). +void ExtendShape(Shape* shape, int new_shape_size) { + CHECK_GE(new_shape_size, shape->dimensions_count()); + const int size_increase = new_shape_size - shape->dimensions_count(); + auto* shape_dims = shape->mutable_dims(); + shape_dims->insert(shape_dims->begin(), size_increase, 1); +} + +// TODO(b/62904716) Remove along with remaining uses. +void UnextendShape(Shape* shape, int new_shape_size) { + CHECK_LE(new_shape_size, shape->dimensions_count()); + const int size_reduction = shape->dimensions_count() - new_shape_size; + for (int i = 0; i < size_reduction; i++) { + CHECK_EQ(shape->dims(i), 1); + } + std::vector& shape_dims = *shape->mutable_dims(); + shape_dims.erase(shape_dims.begin(), shape_dims.begin() + size_reduction); +} + +void CheckShapeDimensions(const Shape& shape) { + for (int i = 0; i < shape.dimensions_count(); ++i) { + CHECK_GE(shape.dims()[i], 1) << "shape has dimension 0 at index << " << i + << ". shape = " << ShapeToString(shape); + } +} + +bool ShapesAgreeUpToBroadcasting(const Shape& shape0, const Shape& shape1) { + CheckShapeDimensions(shape0); + CheckShapeDimensions(shape1); + + const Shape* longer = &shape0; + const Shape* shorter = &shape1; + if (shape1.dimensions_count() > shape0.dimensions_count()) { + longer = &shape1; + shorter = &shape0; + } + + // Walk dimensions back to front until we run out of dimensions in the shorter + // shape. + int longer_index = longer->dimensions_count() - 1; + int shorter_index = shorter->dimensions_count() - 1; + while (shorter_index >= 0) { + const int d_long = longer->dims(longer_index); + const int d_short = shorter->dims(shorter_index); + // Broadcasting fails if the dimensions are different *and* neither is 1. + if ((d_long != d_short) && (d_long != 1) && (d_short != 1)) { + return false; + } + longer_index--; + shorter_index--; + } + return true; +} + +bool ShapesAgreeUpToExtending(const Shape& shape0, const Shape& shape1) { + CheckShapeDimensions(shape0); + CheckShapeDimensions(shape1); + + const Shape* longer = &shape0; + const Shape* shorter = &shape1; + if (shape1.dimensions_count() > shape0.dimensions_count()) { + longer = &shape1; + shorter = &shape0; + } + + // Walk dimensions back to front until we run out of dimensions in the shorter + // shape. + int longer_index = longer->dimensions_count() - 1; + int shorter_index = shorter->dimensions_count() - 1; + while (shorter_index >= 0) { + const int d_long = longer->dims(longer_index); + const int d_short = shorter->dims(shorter_index); + // Extending fails if the dimensions are different. + if (d_long != d_short) { + return false; + } + longer_index--; + shorter_index--; + } + + // The remaining dimensions in the longer shape must be 1. + while (longer_index >= 0) { + const int d_long = longer->dims(longer_index); + if (d_long != 1) { + return false; + } + longer_index--; + } + + return true; +} + +int RequiredBufferSizeForShape(const Shape& shape) { + int max_offset = 1; + for (const auto& dim : shape.dims()) { + CHECK_GE(dim, 1); + max_offset *= dim; + } + return max_offset; +} + +bool IsConstantParameterArray(const Model& model, const string& name) { + if (!model.arrays.count(name)) { + return false; + } + + return !!model.arrays.at(name)->buffer; +} + +void CheckNoMissingArray(const Model& model) { + for (const auto& op : model.operators) { + for (const auto& input : op->inputs) { + CHECK(model.arrays.count(input)); + } + for (const auto& output : op->outputs) { + CHECK(model.arrays.count(output)); + } + } + for (const auto& input_array : model.flags.input_arrays()) { + CHECK(model.arrays.count(input_array.name())) + << "Input array not found: " << input_array.name(); + } + for (const string& output_array : model.flags.output_arrays()) { + CHECK(model.arrays.count(output_array)) + << "Output array not found: " << output_array; + } + for (const auto& rnn_state : model.flags.rnn_states()) { + CHECK(model.arrays.count(rnn_state.state_array())); + CHECK(model.arrays.count(rnn_state.back_edge_source_array())); + } +} + +void FixNoMissingArray(Model* model) { + for (const auto& op : model->operators) { + for (const auto& input : op->inputs) { + if (!model->arrays.count(input)) { + model->GetOrCreateArray(input); + } + } + for (const auto& output : op->outputs) { + if (!model->arrays.count(output)) { + model->GetOrCreateArray(output); + } + } + } + for (const string& output_array : model->flags.output_arrays()) { + if (!model->arrays.count(output_array)) { + model->GetOrCreateArray(output_array); + } + } +} + +void CheckNoOrphanedArray(const Model& model) { + std::unordered_set arrays_without_known_use; + for (const auto& array : model.arrays) { + arrays_without_known_use.insert(array.first); + } + for (const auto& op : model.operators) { + for (const auto& input : op->inputs) { + arrays_without_known_use.erase(input); + } + for (const auto& output : op->outputs) { + arrays_without_known_use.erase(output); + } + } + if (!arrays_without_known_use.empty()) { + for (const auto& array : arrays_without_known_use) { + LOG(INFO) << "Error: Orphaned array: " << array; + } + } + CHECK(arrays_without_known_use.empty()); +} + +void FixNoOrphanedArray(Model* model) { + std::unordered_set arrays_without_known_use; + for (const auto& array : model->arrays) { + arrays_without_known_use.insert(array.first); + } + for (const auto& op : model->operators) { + for (const auto& input : op->inputs) { + arrays_without_known_use.erase(input); + } + for (const auto& output : op->outputs) { + arrays_without_known_use.erase(output); + } + } + for (const auto& array : arrays_without_known_use) { + model->arrays.erase(array); + } +} + +void CheckArrayFieldsConsistent(const Model& model) { + for (const auto& array_entry : model.arrays) { + const auto& array = array_entry.second; + if (array->has_shape()) { + for (int d : array->shape().dims()) { + CHECK_GE(d, 1); + } + } + // It's OK to have a buffer or an alloc, but not both. + // (Since allocs are for transient arrays without a buffer). + CHECK(!array->buffer || !array->alloc); + // If there is a buffer, its type should be consistent with data_type. + if (array->buffer) { + CHECK(array->buffer->type == array->data_type); + } + } +} + +void CheckOperatorOrdering(const Model& model) { + std::unordered_set arrays_behind_us; + for (const auto& array_entry : model.arrays) { + if (!GetOpWithOutput(model, array_entry.first)) { + arrays_behind_us.insert(array_entry.first); + } + } + for (const auto& op : model.operators) { + for (const auto& input : op->inputs) { + if (!IsConstantParameterArray(model, input)) { + CHECK(arrays_behind_us.count(input)); + } + } + for (const auto& output : op->outputs) { + CHECK(!arrays_behind_us.count(output)); + arrays_behind_us.insert(output); + } + } + for (const string& output_array : model.flags.output_arrays()) { + CHECK(arrays_behind_us.count(output_array)); + } +} + +void FixOperatorOrdering(Model* model) { + std::unordered_set arrays_behind_us; + for (const auto& array_entry : model->arrays) { + if (!GetOpWithOutput(*model, array_entry.first)) { + arrays_behind_us.insert(array_entry.first); + } + } + std::vector> old_operators; + std::swap(old_operators, model->operators); + std::set remaining; + for (std::size_t i = 0; i < old_operators.size(); i++) { + remaining.insert(i); + } + std::unordered_map reason_why_leftover; + while (true) { + bool inserted_something = false; + for (auto i : remaining) { + bool can_insert = true; + auto& op = old_operators[i]; + CHECK(op.get()); + for (const auto& input : op->inputs) { + if (!IsConstantParameterArray(*model, input) && + !arrays_behind_us.count(input)) { + for (const string& output : op->outputs) { + reason_why_leftover[output] = input; + } + can_insert = false; + break; + } + } + if (can_insert) { + model->operators.emplace_back(nullptr); + for (const auto& output : op->outputs) { + arrays_behind_us.insert(output); + } + std::swap(op, model->operators.back()); + remaining.erase(i); + inserted_something = true; + break; + } + } + if (!inserted_something) { + break; + } + } + if (!remaining.empty()) { + LOG(ERROR) + << "No viable ordering of operators was found. " + << "Here is a 'backtrace' of at least one part of the graph that is " + << "problematic. It starts with the first operator that has as " + << "problematic input array, and then walks back the graph to " + << "the operator that produced that input array, etc., until we find " + << "the root cause:"; + LOG(ERROR) << "BEGIN TRACE OF OPERATOR WITH BAD INPUT"; + LOG(ERROR) << "Here is the first-encountered operator with a bad input: "; + const Operator* bad_op = old_operators[*remaining.begin()].get(); + std::unordered_set bad_inputs_already_traced; + // The following while(true) loop should always end with a LOG(FATAL). + while (true) { + LOG(ERROR) << HelpfulOperatorTypeName(*bad_op) << " : " + << FormatArraysList(*model, bad_op->inputs) << " -> " + << FormatArraysList(*model, bad_op->outputs); + bool found_bad_output = false; + string bad_output; + for (const string& output : bad_op->outputs) { + if (reason_why_leftover.count(output)) { + found_bad_output = true; + bad_output = output; + break; + } + } + CHECK(found_bad_output); + const string& bad_input = reason_why_leftover[bad_output]; + LOG(ERROR) << "The bad input here is: " << bad_input; + if (bad_inputs_already_traced.count(bad_input)) { + LOG(FATAL) + << "Cycle found! We already encountered that " + << "input array, " << bad_input << ", earlier in the " + << "above trace! We expect graphs to be acyclic, even " + << "RNNs. Let us know if some graph actually needs to have " + << "cycles, but first, please check if it really is " + << "an *inference* graph. *Training* graphs are out-of-scope " + << "for toco."; + } + bad_inputs_already_traced.insert(bad_input); + bad_op = nullptr; + for (auto i : remaining) { + const Operator* op = old_operators[i].get(); + for (const string& output : op->outputs) { + if (bad_input == output) { + bad_op = op; + break; + } + } + if (bad_op) { + break; + } + } + if (!bad_op) { + LOG(ERROR) << "And that's the root cause: " + << "that array, " << bad_input << ", isn't produced by any " + << "operator, or provided in any other way."; + LOG(ERROR) << "END TRACE OF OPERATOR WITH BAD INPUT"; + LOG(FATAL) << "(The above was a multi-line fatal error)"; + } + LOG(ERROR) << "And that array is the output of the following operator:"; + } + } + CHECK(remaining.empty()) + << "Should never get here! In case of bad graph, " + << "the above code should have generated a FATAL error already!"; +} + +// Checks that the --input_arrays of the Model are actually used by at least +// one of the --output_arrays i.e. that the graph contains a path from each one +// of the inputs to at least one of the outputs. This catches cases where the +// user passed the wrong --input_arrays or --output_arrays, which otherwise may +// result in cryptic error messages. +void CheckInputUsedByOutputs(const Model& model) { + std::set used_arrays; + for (const string& output : model.flags.output_arrays()) { + used_arrays.insert(output); + } + for (int i = model.operators.size() - 1; i >= 0; i--) { + bool is_op_used = false; + for (const string& op_output : model.operators[i]->outputs) { + if (used_arrays.count(op_output)) { + is_op_used = true; + break; + } + } + if (!is_op_used) { + continue; + } + for (const string& op_input : model.operators[i]->inputs) { + used_arrays.insert(op_input); + } + } + for (const auto& input_array : model.flags.input_arrays()) { + QCHECK(used_arrays.count(input_array.name())) + << "The graph does not connect the input (" << input_array.name() + << ") specified by --input_arrays to any of the specified " + << "--output_arrays (" + << absl::StrJoin(model.flags.output_arrays(), ", ") + << "). Did you pass the wrong flags for this model, " + << "or is that model's graph actually incomplete?"; + } +} + +void CheckInvariants(const Model& model) { + CheckNoMissingArray(model); + CheckNoOrphanedArray(model); + CheckArrayFieldsConsistent(model); + CheckOperatorOrdering(model); + CheckInputUsedByOutputs(model); +} + +void CheckCountInRange(const ::toco::ModelFlags::ModelCheck& model_check, + const int count, const string& count_description) { + if (model_check.count_min() >= 0) { + CHECK_GE(count, model_check.count_min()) + << "Mismatch in " << count_description << ": count was " << count + << ", but the specified " + << (model_check.count_max() > model_check.count_min() ? "minimum" + : "value") + << " was " << model_check.count_min() << "."; + } + if (model_check.count_max() > model_check.count_min()) { + CHECK_LE(count, model_check.count_max()) + << "Mismatch in " << count_description << ": count was " << count + << ", but the specified maximum was " << model_check.count_max() << "."; + } +} + +void CheckModelCounts(const Model& model) { + std::unordered_multiset ops_by_type; + std::unordered_map op_type_by_name; + if (model.flags.model_checks_size() == 0) { + return; + } + + for (const auto& op : model.operators) { + ops_by_type.insert(op->type); + op_type_by_name[OperatorTypeName(op->type)] = op->type; + } + for (const auto& model_check : model.flags.model_checks()) { + string count_type = model_check.count_type(); + if (count_type == "None") { + continue; + } else if (count_type == "Arrays") { + CheckCountInRange(model_check, model.arrays.size(), "count of arrays"); + } else if (count_type == "Total") { + CheckCountInRange(model_check, model.operators.size(), + "count of all operator instances"); + } else { + // The check type is not itself checked against the set of valid + // operators, mainly because the enum set cannot be iterated in C++. + const int found_count = + op_type_by_name.count(count_type) > 0 + ? ops_by_type.count(op_type_by_name[count_type]) + : 0; + CheckCountInRange(model_check, found_count, + "count of instances of " + count_type + " operator"); + } + } +} + +void MakeArrayDims(int num_dims, int batch, int height, int width, int depth, + std::vector* out_dims) { + CHECK(out_dims->empty()); + if (num_dims == 1) { + CHECK_EQ(batch, 1); + *out_dims = {depth}; + } else if (num_dims == 2) { + *out_dims = {batch, depth}; + } else if (num_dims == 3) { + CHECK_EQ(batch, 1); + *out_dims = {height, width, depth}; + } else if (num_dims == 4) { + *out_dims = {batch, height, width, depth}; + } else { + LOG(FATAL) << "Should not get here: " << num_dims; + } +} + +void CreateOrCheckRnnStateArray(const string& name, int size, Model* model) { + int batch = 1; + int num_dims = -1; + for (const auto& input_array : model->flags.input_arrays()) { + // Pick 'num_dims' and 'batch' from the first input_arrays, unless we find + // a better match by name. + if (input_array.name() == name || num_dims == -1) { + num_dims = input_array.shape_size(); + if (num_dims != 0) { + batch = input_array.shape(0); + } + } + } + Array& array = model->GetOrCreateArray(name); + if (array.has_shape()) { + num_dims = array.shape().dimensions_count(); + } + std::vector dims; + MakeArrayDims(num_dims, batch, 1, 1, size, &dims); + CHECK(array.data_type == ArrayDataType::kFloat || + array.data_type == ArrayDataType::kNone); + array.data_type = ArrayDataType::kFloat; + if (!array.has_shape()) { + Shape* shape = array.mutable_shape(); + *shape->mutable_dims() = dims; + } +} + +void ResolveModelFlags(const ModelFlags& model_flags, Model* model) { + // Merge info about input_arrays from model_flags into model->flags + for (const auto& specified_input_array : model_flags.input_arrays()) { + toco::InputArray* dst_input_array = nullptr; + for (int i = 0; i < model->flags.input_arrays_size(); i++) { + toco::InputArray* candidate_dst_input_array = + model->flags.mutable_input_arrays(i); + if (candidate_dst_input_array->name() == specified_input_array.name()) { + // specified_input_array from model_flags maps to dst_input_array + // in model->flags + dst_input_array = candidate_dst_input_array; + break; + } + } + if (!dst_input_array) { + // specified_input_array from model_flags is not found in model->flags. + // Match a name-less specified input array when there can be no ambiguity + // as there is only 1 input array. + if (model->flags.input_arrays_size() == 1 && + model_flags.input_arrays_size() == 1 && + !specified_input_array.has_name()) { + dst_input_array = model->flags.mutable_input_arrays(0); + } + } + if (!dst_input_array) { + // Still no match, so create a new input array to copy + // specified_input_array into. + dst_input_array = model->flags.add_input_arrays(); + dst_input_array->set_name(specified_input_array.name()); + } + +#define RESOLVE_MODEL_FLAG(field_name) \ + if (specified_input_array.has_##field_name()) { \ + if (dst_input_array->has_##field_name()) { \ + QCHECK_EQ(dst_input_array->field_name(), \ + specified_input_array.field_name()) \ + << "For input array '" << dst_input_array->name() << "', " \ + << "specified " #field_name " flag with value: " \ + << specified_input_array.field_name() \ + << " does not agree with already defined " #field_name \ + " of this model, with value: " \ + << specified_input_array.field_name(); \ + } else { \ + dst_input_array->set_##field_name(specified_input_array.field_name()); \ + } \ + } + RESOLVE_MODEL_FLAG(std_value); + RESOLVE_MODEL_FLAG(mean_value); +#undef RESOLVE_MODEL_FLAG + + if (!specified_input_array.shape().empty()) { + if (!dst_input_array->shape().empty()) { + QCHECK_EQ(specified_input_array.shape().size(), + dst_input_array->shape().size()) + << "For input array '" << specified_input_array.name() << "', " + << "size of specified input shape flag with size: " + << specified_input_array.shape().size() + << " does not agree with already defined input shape" + " of this model, with size: " + << dst_input_array->shape().size(); + // We treat the first dimension as a special case, since it is often + // a batch size and the input_shape flag is effectively overriding + // the model. + for (int i = 1; i < specified_input_array.shape().size(); i++) { + QCHECK_EQ(specified_input_array.shape().Get(i), + dst_input_array->shape().Get(i)) + << "At dimension number " << i << " of input array " + << specified_input_array.name() << ", the specified shape's " + << "dimension flag with dimension: " + << specified_input_array.shape().Get(i) + << " does not agree with already defined shape" + << " of this model, with dimension: " + << dst_input_array->shape().Get(i); + } + } else { + dst_input_array->mutable_shape()->CopyFrom( + specified_input_array.shape()); + } + } + } + + if (model_flags.output_arrays_size() > 0) { + model->flags.mutable_output_arrays()->CopyFrom(model_flags.output_arrays()); + } + +#define RESOLVE_MODEL_FLAG(name) \ + if (model_flags.has_##name()) { \ + if (model->flags.has_##name()) { \ + QCHECK_EQ(model_flags.name(), model->flags.name()) \ + << "Specified " #name " flag with value: " << model_flags.name() \ + << " does not agree with already defined " #name \ + " of this model, with value: " \ + << model->flags.name(); \ + } else { \ + model->flags.set_##name(model_flags.name()); \ + } \ + } + + RESOLVE_MODEL_FLAG(variable_batch) + RESOLVE_MODEL_FLAG(drop_control_dependency) + +#undef RESOLVE_MODEL_FLAG + + if (model->flags.rnn_states_size() == 0) { + model->flags.mutable_rnn_states()->CopyFrom(model_flags.rnn_states()); + } else { + CHECK_EQ(model->flags.rnn_states_size(), model_flags.rnn_states_size()); + for (int i = 0; i < model->flags.rnn_states_size(); i++) { + CHECK_EQ(model->flags.rnn_states(i).state_array(), + model_flags.rnn_states(i).state_array()); + CHECK_EQ(model->flags.rnn_states(i).back_edge_source_array(), + model_flags.rnn_states(i).back_edge_source_array()); + } + } + + if (model->flags.model_checks_size() == 0) { + model->flags.mutable_model_checks()->CopyFrom(model_flags.model_checks()); + } + + QCHECK_GT(model->flags.input_arrays_size(), 0) + << "This model does not define input arrays, so a " + "--input_arrays flag must be given on the command-line."; + QCHECK_GT(model->flags.output_arrays_size(), 0) + << "This model does not define output arrays, so a " + "--output_arrays flag must be given on the command-line."; + + for (const auto& input_array_proto : model->flags.input_arrays()) { + QCHECK(!input_array_proto.shape().empty()) + << "This model does not have shape defined for input array " + << input_array_proto.name() + << ", so one must be specified by a non-empty --input_shape " + "command-line flag."; + + auto& input_array = model->GetOrCreateArray(input_array_proto.name()); + if (input_array.data_type == ArrayDataType::kNone) { + // We start out with a float input array; + // that may get replaced by a uint8 array later, by + // MakeInitialDequantizeOp. + input_array.data_type = ArrayDataType::kFloat; + } + + // Compare/merge the model->flags describing the input_shape with + // the actual input array's shape. + auto& input_array_dims = *input_array.mutable_shape()->mutable_dims(); + if (input_array_dims.empty()) { + for (auto dim : input_array_proto.shape()) { + CHECK_GE(dim, 1); + input_array_dims.push_back(dim); + } + } else { + CHECK_EQ(input_array_dims.size(), input_array_proto.shape_size()); + for (int i = 0; i < input_array_dims.size(); i++) { + CHECK_EQ(input_array_dims[i], input_array_proto.shape(i)); + } + } + + const float mean_value = input_array_proto.mean_value(); + const float std_value = input_array_proto.std_value(); + MinMax input_minmax; + input_minmax.min = (0.f - mean_value) / std_value; + input_minmax.max = (255.f - mean_value) / std_value; + if (input_array.minmax) { + if (input_array_proto.has_mean_value() || + input_array_proto.has_std_value()) { + CHECK(input_minmax == *input_array.minmax) + << input_minmax.min << ", " << input_minmax.max + << " != " << input_array.minmax->min << ", " + << input_array.minmax->max; + } + } else { + input_array.GetOrCreateMinMax() = input_minmax; + } + } + // Creation of the RNN state arrays + for (const auto& rnn_state : model->flags.rnn_states()) { + if (!rnn_state.manually_create()) { + continue; + } + CreateOrCheckRnnStateArray(rnn_state.state_array(), rnn_state.size(), + model); + } +} + +void CheckIsReadyForQuantization(const Model& model) { + for (const auto& op : model.operators) { + for (const auto& input : op->inputs) { + const auto& input_array = model.GetArray(input); + if (input_array.data_type != ArrayDataType::kFloat) { + // The array is not floats, no quantization needed. + continue; + } + if (input_array.minmax) { + // The array has minmax, we're good. + continue; + } + if (input_array.buffer) { + // The array has a constant buffer, so we can + // fall back to computing the minmax from actual array entries + // (with a WARNING about possible accuracy implications). + continue; + } + LOG(FATAL) + << "Array " << input << ", which is an input to the " + << HelpfulOperatorTypeName(*op) << " operator producing the output " + << "array " << op->outputs[0] << ", is lacking min/max data, " + << "which is necessary for quantization. Either target a " + << "non-quantized output format, or change the input graph to " + << "contain min/max information, or pass --default_ranges_min= and " + << "--default_ranges_max= if you do not care about the accuracy of " + << "results."; + } + } +} + +void UseDefaultMinMaxRangeValues(Model* model, double default_ranges_min, + double default_ranges_max) { + for (const auto& op : model->operators) { + for (const auto& input : op->inputs) { + auto& input_array = model->GetArray(input); + if (!input_array.minmax && !input_array.buffer) { + auto& minmax = input_array.GetOrCreateMinMax(); + minmax.min = default_ranges_min; + minmax.max = default_ranges_max; + } + } + for (const auto& output : op->outputs) { + auto& output_array = model->GetArray(output); + if (!output_array.minmax && !output_array.buffer) { + auto& minmax = output_array.GetOrCreateMinMax(); + minmax.min = default_ranges_min; + minmax.max = default_ranges_max; + } + } + } +} + +int ElementSize(ArrayDataType data_type) { + switch (data_type) { + case ArrayDataType::kFloat: + return 4; + case ArrayDataType::kInt32: + return 4; + case ArrayDataType::kUint8: + return 1; + default: + LOG(FATAL) << "Should not get here."; + return 0; + } +} + +void DropMinMax(Model* model, const string& array_name) { + auto& array = model->GetArray(array_name); + if (!!array.minmax) { + LOG(WARNING) << "Dropping MinMax information in array " << array_name + << ". Expect inaccuracy in quantized inference."; + array.minmax = nullptr; + } +} + +bool IsAllocatableTransientArray(const Model& model, const string& array_name) { + // The model's input and output arrays are externally allocated. + // They are not transient arrays. + if (IsInputArray(model, array_name)) { + return false; + } + for (const string& output_array : model.flags.output_arrays()) { + if (array_name == output_array) { + return false; + } + } + const auto& array = model.arrays.at(array_name); + // An array with a constant buffer isn't a transient array. + if (!!array->buffer) { + return false; + } + // An array without shape isn't allocatable. + if (!array->has_shape()) { + return false; + } + return true; +} + +string AvailableArrayName(const Model& model, const string& name) { + if (!model.arrays.count(name)) { + return name; + } + const int kNumSuffixesToTry = 1000; + for (int i = 0; i < kNumSuffixesToTry; i++) { + const string& name_with_suffix = toco::port::StringF("%s_%d", name, i); + if (!model.arrays.count(name_with_suffix)) { + return name_with_suffix; + } + } + LOG(FATAL) << "Could not find an available array name starting with " << name + << ". Tried " << kNumSuffixesToTry << " suffixes, all were taken!"; + return ""; +} + +string ShapeToString(const Shape& shape) { + if (shape.dimensions_count() == 0) { + return "[]"; + } + + return absl::StrCat("[ ", absl::StrJoin(shape.dims(), ", "), " ]"); +} + +void PrintArrayShape(Model* model, const string& name) { + if (!model->arrays[name]->has_shape()) { + LOG(INFO) << name << " has no shape"; + return; + } + LOG(INFO) << name + << " has shape: " << ShapeToString(model->arrays[name]->shape()); +} + +bool IsArrayFullyConnectedWeights(const Model& model, const string& name) { + bool is_fc_weights = false; + bool is_something_else = false; + for (const auto& op : model.operators) { + for (int input_index = 0; input_index < op->inputs.size(); input_index++) { + if (op->inputs[input_index] == name) { + if (op->type == OperatorType::kFullyConnected && input_index == 1) { + is_fc_weights = true; + } else { + is_something_else = true; + } + } + } + } + CHECK(!(is_fc_weights && is_something_else)); + return is_fc_weights; +} + +bool EstimateArithmeticOpsCount(const Model& model, int64* result) { + int64 total = 0; + for (const auto& op : model.operators) { + switch (op->type) { + case OperatorType::kFullyConnected: + case OperatorType::kConv: + case OperatorType::kDepthwiseConv: { + const auto& output_array = model.GetArray(op->outputs[0]); + const auto& weights_array = model.GetArray(op->inputs[1]); + if (!output_array.has_shape() || !weights_array.has_shape()) { + return false; + } + int cols = 1; + for (int i = 0; i < output_array.shape().dimensions_count() - 1; i++) { + cols *= output_array.shape().dims(i); + } + const int64 cost_per_col = + 2 * RequiredBufferSizeForShape(weights_array.shape()); + total += cost_per_col * cols; + if (op->inputs.size() > 2) { + // There is a bias vector. One more op per output value. + total += RequiredBufferSizeForShape(output_array.shape()); + } + break; + } + case OperatorType::kAdd: + case OperatorType::kSub: + case OperatorType::kMul: { + const auto& output_array = model.GetArray(op->outputs[0]); + if (!output_array.has_shape()) { + return false; + } + total += RequiredBufferSizeForShape(output_array.shape()); + break; + } + case OperatorType::kLogistic: + case OperatorType::kSoftmax: + case OperatorType::kTanh: { + const auto& output_array = model.GetArray(op->outputs[0]); + if (!output_array.has_shape()) { + return false; + } + // As a very rough ballpark, the cost of evaluating a math function + // such as tanh or logistic is about 32 multiplications, and about as + // many additions/subtractions. (Just a power-of-two order-of-magnitude + // from looking at actual implementations that we use in runtime/ code). + total += 64 * RequiredBufferSizeForShape(output_array.shape()); + break; + } + case OperatorType::kMaxPool: { + const auto& maxpool = *static_cast(op.get()); + const auto& output_array = model.GetArray(op->outputs[0]); + if (!output_array.has_shape()) { + return false; + } + total += RequiredBufferSizeForShape(output_array.shape()) * + maxpool.kheight * maxpool.kwidth; + break; + } + case OperatorType::kAveragePool: { + const auto& avgpool = + *static_cast(op.get()); + const auto& output_array = model.GetArray(op->outputs[0]); + if (!output_array.has_shape()) { + return false; + } + total += RequiredBufferSizeForShape(output_array.shape()) * + avgpool.kheight * avgpool.kwidth; + break; + } + case OperatorType::kL2Pool: { + const auto* maxpool = static_cast(op.get()); + const auto& output_array = model.GetArray(op->outputs[0]); + if (!output_array.has_shape()) { + return false; + } + // The sum of squares requires (kheight*kwidth) multiply-adds, + // and then there is the sqrt which we ballpark at 32 ops. + const int64 cost_per_val = 2 * maxpool->kheight * maxpool->kwidth + 32; + total += + RequiredBufferSizeForShape(output_array.shape()) * cost_per_val; + break; + } + case OperatorType::kL2Normalization: { + const auto& output_array = model.GetArray(op->outputs[0]); + if (!output_array.has_shape()) { + return false; + } + // Computing the squared L2 norm is N multiply-adds so 2N ops, + // then the single inverse-sqrt is negligible, then we multiply each + // value by the resulting multiplier, so an extra N ops. Total 3N ops. + total += 3 * RequiredBufferSizeForShape(output_array.shape()); + break; + } + default: + break; + } + } + *result = total; + return true; +} + +namespace { + +void GetShuffleShape(AxesOrder input_axes_order, AxesOrder output_axes_order, + std::vector* shuffle) { + CHECK_EQ(AxesCount(input_axes_order), AxesCount(output_axes_order)); + shuffle->resize(4); + for (int i = 0; i < 4; i++) { + (*shuffle)[i] = i; + } + if (input_axes_order == output_axes_order) { + // nothing to do + } else if (AxesCount(input_axes_order) == 2) { + shuffle->resize(2); + (*shuffle)[0] = 1; + (*shuffle)[1] = 0; + } else if (input_axes_order == AxesOrder::kOHWI && + output_axes_order == AxesOrder::kHWIO) { + // 3210 <- 3210 + // HWIO <- OHWI + (*shuffle)[0] = 1; + (*shuffle)[1] = 2; + (*shuffle)[2] = 3; + (*shuffle)[3] = 0; + } else if (input_axes_order == AxesOrder::kHWIO && + output_axes_order == AxesOrder::kOHWI) { + // 3210 <- 3210 + // OHWI <- HWIO + (*shuffle)[0] = 3; + (*shuffle)[1] = 0; + (*shuffle)[2] = 1; + (*shuffle)[3] = 2; + } else { + LOG(FATAL) << "Bad shuffle"; + } +} + +// Extend shuffle is designed to match ExtendShape, which pads the shape with +// unit dimensions at the beginning. +void ExtendShuffle(const std::vector& input_shuffle, int newdim, + std::vector* extended_shuffle) { + *extended_shuffle = input_shuffle; + CHECK(newdim >= input_shuffle.size()); + const int pad_size = newdim - input_shuffle.size(); + extended_shuffle->resize(newdim); + for (int i = 0; i < pad_size; i++) { + (*extended_shuffle)[i] = i; + } + for (int i = pad_size; i < newdim; i++) { + (*extended_shuffle)[i] = input_shuffle[i - pad_size] + pad_size; + } +} + +} // end anonymous namespace + +void ShuffleDims(const Shape& input_shape, AxesOrder input_axes_order, + AxesOrder output_axes_order, Shape* output_shape) { + if (input_axes_order == AxesOrder::kHWIM && + output_axes_order == AxesOrder::k1HWO) { + // This special case isn't just a permutation, the IM pair of dims get + // merged into the 3 dim, so we have to special-case it. + *output_shape = Shape({1, input_shape.dims(0), input_shape.dims(1), + input_shape.dims(3) * input_shape.dims(2)}); + } else { + std::vector shuffle; + GetShuffleShape(input_axes_order, output_axes_order, &shuffle); + std::vector* output_dims = output_shape->mutable_dims(); + output_dims->resize(input_shape.dimensions_count()); + for (int i = 0; i < input_shape.dimensions_count(); i++) { + (*output_dims)[i] = input_shape.dims(shuffle[i]); + } + } +} + +void ShuffleArray(const Shape& input_shape, AxesOrder input_axes_order, + AxesOrder output_axes_order, const Shape& output_shape, + const float* input_data, float* output_data) { + if (input_axes_order == AxesOrder::kHWIM && + output_axes_order == AxesOrder::k1HWO) { + // This special case isn't just a permutation, the IM pair of dims get + // merged into the O dim, so we have to special-case it. Fortunately, + // as far as array shuffling is concerned, it's just the identity + // transformation. + memcpy(output_data, input_data, + RequiredBufferSizeForShape(input_shape) * sizeof(output_data[0])); + return; + } + CHECK(input_shape.dimensions_count() == output_shape.dimensions_count()); + const int dim = input_shape.dimensions_count(); + CHECK_LE(dim, 4); + std::vector shuffle; + GetShuffleShape(input_axes_order, output_axes_order, &shuffle); + CHECK(shuffle.size() >= dim); + for (int i = 0; i < dim; i++) { + CHECK(shuffle[i] >= 0 && shuffle[i] < dim); + CHECK(input_shape.dims(shuffle[i]) == output_shape.dims(i)); + } + Shape extended_input_shape = input_shape; + ExtendShape(&extended_input_shape, 4); + Shape extended_output_shape = output_shape; + ExtendShape(&extended_output_shape, 4); + std::vector extended_shuffle; + ExtendShuffle(shuffle, 4, &extended_shuffle); + + const std::vector& extended_input_dims = extended_input_shape.dims(); + const std::vector& extended_output_dims = extended_output_shape.dims(); + + // TODO(starka): Rework to handle different numbers of dimensions. + int input_strides[4]; + input_strides[3] = 1; + input_strides[2] = extended_input_dims[3]; + input_strides[1] = input_strides[2] * extended_input_dims[2]; + input_strides[0] = input_strides[1] * extended_input_dims[1]; + const int input_stride_0 = input_strides[extended_shuffle[3]]; + const int input_stride_1 = input_strides[extended_shuffle[2]]; + const int input_stride_2 = input_strides[extended_shuffle[1]]; + const int input_stride_3 = input_strides[extended_shuffle[0]]; + + const int output_size_0 = extended_output_dims[3]; + const int output_size_1 = extended_output_dims[2]; + const int output_size_2 = extended_output_dims[1]; + const int output_size_3 = extended_output_dims[0]; + const int output_stride_0 = 1; + const int output_stride_1 = output_size_0; + const int output_stride_2 = output_stride_1 * output_size_1; + const int output_stride_3 = output_stride_2 * output_size_2; + + for (int i3 = 0; i3 < output_size_3; i3++) { + const float* const input_ptr_3 = input_data + i3 * input_stride_3; + float* const output_ptr_3 = output_data + i3 * output_stride_3; + for (int i2 = 0; i2 < output_size_2; i2++) { + const float* const input_ptr_2 = input_ptr_3 + i2 * input_stride_2; + float* const output_ptr_2 = output_ptr_3 + i2 * output_stride_2; + for (int i1 = 0; i1 < output_size_1; i1++) { + const float* input_ptr = input_ptr_2 + i1 * input_stride_1; + float* output_ptr = output_ptr_2 + i1 * output_stride_1; + float* const output_ptr_end = + output_ptr + output_size_0 * output_stride_0; + while (output_ptr != output_ptr_end) { + *output_ptr = *input_ptr; + input_ptr += input_stride_0; + output_ptr += output_stride_0; + } + } + } + } +} + +int AxesCount(AxesOrder axes_order) { + switch (axes_order) { + case AxesOrder::kOneAxis: + return 1; + case AxesOrder::kRC: + return 2; + case AxesOrder::kCR: + return 2; + case AxesOrder::kHWIO: + return 4; + case AxesOrder::kOHWI: + return 4; + case AxesOrder::kHWIM: + return 4; + case AxesOrder::k1HWO: + return 4; + case AxesOrder::kNHWC: + return 4; + default: + LOG(FATAL) << "Bad AxesOrder"; + return 0; + } +} + +bool IsDiscardableArray(const Model& model, const string& array_name) { + for (const auto& input_array : model.flags.input_arrays()) { + if (array_name == input_array.name()) { + return false; + } + } + for (const string& output_array : model.flags.output_arrays()) { + if (array_name == output_array) { + return false; + } + } + for (const auto& rnn_state : model.flags.rnn_states()) { + if (array_name == rnn_state.state_array()) { + return false; + } + if (array_name == rnn_state.back_edge_source_array()) { + return false; + } + } + return true; +} + +void CheckFinalDataTypesSatisfied(const Model& model) { + for (const auto& array_entry : model.arrays) { + const auto& array = *array_entry.second; + if (array.final_data_type != ArrayDataType::kNone) { + CHECK(array.final_data_type == array.data_type); + } + } +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/tooling_util.h b/tensorflow/contrib/lite/toco/tooling_util.h new file mode 100644 index 0000000000..093945edb3 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tooling_util.h @@ -0,0 +1,292 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TOOLING_UTIL_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TOOLING_UTIL_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "google/protobuf/text_format.h" +#include "tensorflow/core/platform/logging.h" +#if TOCO_SUPPORT_PORTABLE_PROTOS +#include "third_party/protobuf/src/google/protobuf/text_format.h" +#endif // TOCO_SUPPORT_PORTABLE_PROTOS +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/model_flags.pb.h" +#include "tensorflow/contrib/lite/toco/runtime/types.h" +#include "tensorflow/contrib/lite/toco/toco_flags.pb.h" +#include "tensorflow/contrib/lite/toco/toco_port.h" + +// TODO(aselle): Replace with using a container specific hash override instead. +namespace std { +template <> +struct hash { + size_t operator()(const toco::OperatorType& op) const { + return std::hash()(static_cast(op)); + } +}; +} // namespace std + +namespace toco { + +constexpr int kLogLevelModelChanged = 1; +constexpr int kLogLevelModelUnchanged = 2; + +string LogName(const Operator& op); + +bool IsInputArray(const Model& model, const string& name); +bool IsArrayConsumed(const Model& model, const string& name); +int CountTrueOutputs(const Model& model, const Operator& op); + +int CountOpsWithInput(const Model& model, const string& array_name); +bool DeleteArrayIfUnused(const string& array_name, Model* model); + +std::vector>::const_iterator FindOpWithOutput( + const Model& model, const string& array_name); +Operator* GetOpWithOutput(const Model& model, const string& array_name); + +std::vector>::iterator FindOpWithOutput( + Model& model, const string& array_name); +Operator* GetOpWithOutput(const Model& model, const string& array_name); + +std::vector>::const_iterator FindOpWithInput( + const Model& model, const string& array_name); +Operator* GetOpWithInput(const Model& model, const string& array_name); +Operator* GetFirstOpWithInput(const Model& model, const string& array_name); + +std::vector>::const_iterator FindOp( + const Model& model, const Operator* op); +std::vector>::iterator FindOp(Model& model, + const Operator* op); + +const char* OperatorTypeName(OperatorType type); +string HelpfulOperatorTypeName(const Operator& op); + +void DumpGraphvizVideoFrame(const Model& model); +void LogDump(int log_level, const string& message, const Model& model); +void LogSummary(int log_level, const string& message, const Model& model); + +inline bool ParseFromStringOverload(const std::string& in, + TFLITE_PROTO_NS::Message* proto) { + return TFLITE_PROTO_NS::TextFormat::ParseFromString(in, proto); +} + +template +bool ParseFromStringEitherTextOrBinary(const std::string& input_file_contents, + Proto* proto) { + if (proto->ParseFromString(input_file_contents)) { + return true; + } + + if (ParseFromStringOverload(input_file_contents, proto)) { + return true; + } + + return false; +} + +// TODO(b/36075966): Clean up when dims superseded by array shape. +void ExtendShape(Shape* shape, int new_shape_size); + +// TODO(b/36075966): Clean up when dims superseded by array shape. +void UnextendShape(Shape* shape, int new_shape_size); + +// Checks (using CHECK) that all dimensions of 'shape' are at least 1. +void CheckShapeDimensions(const Shape& shape); + +// Given two shapes with potentially different dimensionality and dimension +// arrays d0 and d1. Without loss of generality, assume that shape0 may have +// higher dimensionality (length(d0) >= length(d1)). Then shape0 and shape1 +// "agree up to broadcasting" if: +// - When walking the d0 and d1 from back to front with indices i0, i1, +// d0[i0] == d1[i1] or d0[i0] == 1 or d1[i1] == 1, for each dimension until +// i1 == 0 (inclusive). +bool ShapesAgreeUpToBroadcasting(const Shape& shape0, const Shape& shape1); + +// A stricter constraint than ShapesAgreeUpToBroadcasting(). +// +// Given two shapes with potentially different dimensionality and dimension +// arrays d0 and d1. Without loss of generality, assume that shape0 may have +// higher dimensionality (length(d0) >= length(d1)). Then shape0 and shape1 +// "agree up to extending" if: +// - When walking the d0 and d1 from back to front with indices i0, i1, +// d0[i0] == d1[i1] for each dimension until i1 == 0 (inclusive). +// - For the remaining indices [0..i0), d0[i0] == 1. +bool ShapesAgreeUpToExtending(const Shape& shape0, const Shape& shape1); + +bool IsArrayFullyConnectedWeights(const Model& model, const string& name); + +// If there is a wildcard dimension (-1), this may return a negative value. +int RequiredBufferSizeForShape(const Shape& shape); + +bool IsConstantParameterArray(const Model& model, const string& name); + +void CheckNoMissingArray(const Model& model); +void CheckInvariants(const Model& model); + +void CheckModelCounts(const Model& model); + +void FixOperatorOrdering(Model* model); +void FixNoMissingArray(Model* model); +void FixNoOrphanedArray(Model* model); + +void ResolveModelFlags(const ModelFlags& model_flags, Model* model); + +template +void GetQuantizationParamsFromMinMax(const ModelFlags& model_flags, + const MinMax& minmax, + QuantizationParams* quantization_params) { + using Integer = DataType; + const Integer qmin = std::numeric_limits::min(); + const Integer qmax = std::numeric_limits::max(); + const double qmin_double = qmin; + const double qmax_double = qmax; + const double rmin = minmax.min; + const double rmax = minmax.max; + // 0 should always be a representable value. Let's assume that the initial + // min,max range contains 0. + CHECK_LE(rmin, 0.); + CHECK_GE(rmax, 0.); + if (rmin == rmax) { + // Special case where the min,max range is a point. Should be {0}. + CHECK_EQ(rmin, 0.); + CHECK_EQ(rmax, 0.); + quantization_params->zero_point = 0; + quantization_params->scale = 0.; + return; + } + + // General case. + // + // First determine the scale. + const double scale = (rmax - rmin) / (qmax_double - qmin_double); + + // Zero-point computation. + // First the initial floating-point computation. The zero-point can be + // determined from solving an affine equation for any known pair + // (real value, corresponding quantized value). + // We know two such pairs: (rmin, qmin) and (rmax, qmax). + // The arithmetic error on the zero point computed from either pair + // will be roughly machine_epsilon * (sum of absolute values of terms) + // so we want to use the variant that adds the smaller terms. + const double zero_point_from_min = qmin_double - rmin / scale; + const double zero_point_from_max = qmax_double - rmax / scale; + const double zero_point_from_min_error = + std::abs(qmin_double) + std::abs(rmin / scale); + const double zero_point_from_max_error = + std::abs(qmax_double) + std::abs(rmax / scale); + + const double zero_point_double = + zero_point_from_min_error < zero_point_from_max_error + ? zero_point_from_min + : zero_point_from_max; + + // Now we need to nudge the zero point to be an integer + // (our zero points are integer, and this is motivated by the requirement + // to be able to represent the real value "0" exactly as a quantized value, + // which is required in multiple places, for example in Im2col with SAME + // padding). + Integer nudged_zero_point = 0; + if (zero_point_double < qmin_double) { + nudged_zero_point = qmin; + } else if (zero_point_double > qmax_double) { + nudged_zero_point = qmax; + } else { + nudged_zero_point = static_cast(std::round(zero_point_double)); + } + // The zero point should always be in the range of quantized value, + // [qmin, qmax]. + CHECK_GE(nudged_zero_point, qmin); + CHECK_LE(nudged_zero_point, qmax); + + // Finally, store the result nudged quantization params. + quantization_params->zero_point = nudged_zero_point; + quantization_params->scale = scale; +} + +void CheckIsReadyForQuantization(const Model& model); +void UseDefaultMinMaxRangeValues(Model* model, double default_ranges_min, + double default_ranges_max); + +inline int Offset(const Shape& shape, const std::vector& indices) { + DCHECK_EQ(shape.dimensions_count(), indices.size()); + const int dims_count = shape.dimensions_count(); + int offset = 0; + for (int i = 0; i < dims_count; i++) { + const int index = indices[i]; + DCHECK(index >= 0 && index < shape.dims(i)); + offset *= shape.dims(i); + offset += index; + } + return offset; +} + +inline std::vector ReverseOffset(const Shape& shape, int index) { + DCHECK_GE(index, 0); + DCHECK_LT(index, RequiredBufferSizeForShape(shape)); + const int dims_count = shape.dimensions_count(); + std::vector indices(dims_count); + int residual = index; + for (int i = dims_count - 1; i >= 0; i--) { + indices[i] = residual % shape.dims(i); + residual /= shape.dims(i); + } + return indices; +} + +int ElementSize(ArrayDataType data_type); + +void DropMinMax(Model* model, const string& array_name); + +bool IsAllocatableTransientArray(const Model& model, const string& array_name); + +void CreateOrCheckRnnStateArray(const string& name, int size, Model* model); + +string AvailableArrayName(const Model& model, const string& name); + +// Formats a shape as a string: [ dims(0), dims(1), ..., dims(num_dims-1) ]. +string ShapeToString(const Shape& shape); + +void PrintArrayShape(Model* model, const string& name); + +void MakeArrayDims(int num_dims, int batch, int height, int width, int depth, + std::vector* out_dims); + +bool EstimateArithmeticOpsCount(const Model& model, int64* result); + +int AxesCount(AxesOrder axes_order); + +void ShuffleDims(const Shape& input_shape, AxesOrder input_axes_order, + AxesOrder output_axes_order, Shape* output_shape); +void ShuffleArray(const Shape& input_shape, AxesOrder input_axes_order, + AxesOrder output_axes_order, const Shape& output_shape, + const float* input_data, float* output_data); + +// Returns true if it may be OK for any graph transformation to ever discard +// that array. The idea is that we can't ever discard arrays that are either +// an input or an output of the whole graph, or that appear in RNN back-edges, +// as that would undercut explicit flags that the user might pass. +bool IsDiscardableArray(const Model& model, const string& array_name); + +void CheckFinalDataTypesSatisfied(const Model& model); + +} // namespace toco + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TOOLING_UTIL_H_ diff --git a/tensorflow/contrib/lite/toco/tooling_util_test.cc b/tensorflow/contrib/lite/toco/tooling_util_test.cc new file mode 100644 index 0000000000..22955ce956 --- /dev/null +++ b/tensorflow/contrib/lite/toco/tooling_util_test.cc @@ -0,0 +1,96 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include + +#include +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" + +namespace toco { + +enum class Agreement { kBroadcast, kExtend, kBroadcastNotExtend, kNeither }; + +// A pair of Shapes and whether they should agree up to broadcasting, extending +// or neither. +struct ShapePair { + Shape left; + Shape right; + Agreement agreement; +}; + +std::vector CreateShapePairs() { + return std::vector( + {// These agree up to broadcast. + {Shape({3}), Shape({3}), Agreement::kBroadcast}, + {Shape({256, 256, 3}), Shape({256, 256, 3}), Agreement::kBroadcast}, + {Shape({256, 256, 3}), Shape({3}), Agreement::kBroadcast}, + {Shape({8, 1, 6, 1}), Shape({7, 1, 5}), Agreement::kBroadcast}, + + // These extend (and therefore broadcast). + {Shape({3}), Shape({3}), Agreement::kExtend}, + {Shape({256, 256, 3}), Shape({256, 256, 3}), Agreement::kExtend}, + {Shape({1, 1, 3}), Shape({1, 1, 3}), Agreement::kExtend}, + {Shape({1, 1, 3}), Shape({3}), Agreement::kExtend}, + {Shape({1, 1, 3}), Shape({1, 3}), Agreement::kExtend}, + + // These strictly broadcast and do not extend. + {Shape({256, 256, 3}), Shape({3}), Agreement::kBroadcastNotExtend}, + {Shape({5, 4}), Shape({1}), Agreement::kBroadcastNotExtend}, + {Shape({5, 4}), Shape({4}), Agreement::kBroadcastNotExtend}, + {Shape({15, 3, 5}), Shape({15, 1, 5}), Agreement::kBroadcastNotExtend}, + {Shape({15, 3, 5}), Shape({3, 5}), Agreement::kBroadcastNotExtend}, + {Shape({15, 3, 5}), Shape({3, 1}), Agreement::kBroadcastNotExtend}, + + // These do not broadcast (and therefore also do not extend). + {Shape({3}), Shape({4}), Agreement::kNeither}, + {Shape({2, 1}), Shape({8, 4, 3}), Agreement::kNeither}}); +} + +// ShapeTest is an empty parameterized test fixture since there is no state. +class ShapeTest : public ::testing::TestWithParam {}; + +TEST_P(ShapeTest, Agrees) { + const ShapePair& param = GetParam(); + + switch (param.agreement) { + case Agreement::kBroadcast: { + EXPECT_TRUE(ShapesAgreeUpToBroadcasting(param.left, param.right)); + break; + } + case Agreement::kExtend: { + EXPECT_TRUE(ShapesAgreeUpToExtending(param.left, param.right)); + // Anything that extends should also broadcast. + EXPECT_TRUE(ShapesAgreeUpToBroadcasting(param.left, param.right)); + break; + } + case Agreement::kBroadcastNotExtend: { + // Verify that it strictly broadcasts but does not extend. + EXPECT_TRUE(ShapesAgreeUpToBroadcasting(param.left, param.right)); + EXPECT_FALSE(ShapesAgreeUpToExtending(param.left, param.right)); + break; + } + case Agreement::kNeither: { + EXPECT_FALSE(ShapesAgreeUpToExtending(param.left, param.right)); + EXPECT_FALSE(ShapesAgreeUpToBroadcasting(param.left, param.right)); + break; + } + } +} + +INSTANTIATE_TEST_CASE_P(AgreeBroadcast, ShapeTest, + ::testing::ValuesIn(CreateShapePairs())); + +} // namespace toco diff --git a/tensorflow/contrib/lite/tools/BUILD b/tensorflow/contrib/lite/tools/BUILD new file mode 100644 index 0000000000..2d918fd4e8 --- /dev/null +++ b/tensorflow/contrib/lite/tools/BUILD @@ -0,0 +1,60 @@ +package(default_visibility = [ + "//visibility:public", +]) + +licenses(["notice"]) # Apache 2.0 + +cc_binary( + name = "generate_op_registrations", + srcs = ["gen_op_registration_main.cc"], + deps = [ + "//tensorflow/contrib/lite/tools:gen_op_registration", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + ], +) + +cc_library( + name = "gen_op_registration", + srcs = ["gen_op_registration.cc"], + hdrs = ["gen_op_registration.h"], + deps = [ + "//tensorflow/contrib/lite:framework", + "@com_googlesource_code_re2//:re2", + ], +) + +cc_test( + name = "gen_op_registration_test", + srcs = ["gen_op_registration_test.cc"], + data = [ + "//tensorflow/contrib/lite:testdata/0_subgraphs.bin", + "//tensorflow/contrib/lite:testdata/2_subgraphs.bin", + "//tensorflow/contrib/lite:testdata/empty_model.bin", + "//tensorflow/contrib/lite:testdata/test_model.bin", + "//tensorflow/contrib/lite:testdata/test_model_broken.bin", + ], + deps = [ + ":gen_op_registration", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "mutable_op_resolver", + srcs = ["mutable_op_resolver.cc"], + hdrs = ["mutable_op_resolver.h"], + deps = ["//tensorflow/contrib/lite:framework"], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/lite/tools/gen_op_registration.cc b/tensorflow/contrib/lite/tools/gen_op_registration.cc new file mode 100644 index 0000000000..57c2567e3b --- /dev/null +++ b/tensorflow/contrib/lite/tools/gen_op_registration.cc @@ -0,0 +1,46 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include + +#include "third_party/re2/re2.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { + +string NormalizeCustomOpName(const string& op) { + string method(op); + RE2::GlobalReplace(&method, "([a-z])([A-Z])", "\\1_\\2"); + std::transform(method.begin(), method.end(), method.begin(), ::toupper); + return method; +} + +void ReadOpsFromModel(const ::tflite::Model* model, + std::vector* builtin_ops, + std::vector* custom_ops) { + if (!model) return; + auto opcodes = model->operator_codes(); + if (!opcodes) return; + for (const auto* opcode : *opcodes) { + if (opcode->builtin_code() != ::tflite::BuiltinOperator_CUSTOM) { + builtin_ops->push_back( + tflite::EnumNameBuiltinOperator(opcode->builtin_code())); + } else { + custom_ops->push_back(opcode->custom_code()->c_str()); + } + } +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/tools/gen_op_registration.h b/tensorflow/contrib/lite/tools/gen_op_registration.h new file mode 100644 index 0000000000..363bb2335c --- /dev/null +++ b/tensorflow/contrib/lite/tools/gen_op_registration.h @@ -0,0 +1,38 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOOLS_GEN_OP_REGISTRATION_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOOLS_GEN_OP_REGISTRATION_H_ + +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { + +// Convert the custom op name to registration name following the convention. +// Example: +// "custom_op" -> "CUSTOM_OP" +// "CustomOp" -> "CUSTOM_OP" +// Note "Register_" suffix will be added later in the tool. +string NormalizeCustomOpName(const string& op); + +// Read ops from the TFLite model. +// Enum name of builtin ops will be stored, such as "CONV_2D". +// Custom op name will be stored as it is. +void ReadOpsFromModel(const ::tflite::Model* model, + std::vector* builtin_ops, + std::vector* custom_ops); + +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOOLS_GEN_OP_REGISTRATION_H_ diff --git a/tensorflow/contrib/lite/tools/gen_op_registration_main.cc b/tensorflow/contrib/lite/tools/gen_op_registration_main.cc new file mode 100644 index 0000000000..7b27066a21 --- /dev/null +++ b/tensorflow/contrib/lite/tools/gen_op_registration_main.cc @@ -0,0 +1,98 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/tools/gen_op_registration.h" +#include "tensorflow/core/platform/init_main.h" +#include "tensorflow/core/util/command_line_flags.h" + +using tensorflow::Flag; +using tensorflow::Flags; + +namespace { + +void GenerateFileContent(const string& filename, + const std::vector& builtin_ops, + const std::vector& custom_ops) { + std::ofstream fout(filename); + + fout << "#include " + "\"third_party/tensorflow/contrib/lite/model.h\"\n"; + fout << "#include " + "\"third_party/tensorflow/contrib/lite/tools/mutable_op_resolver.h\"\n"; + fout << "namespace tflite {\n"; + fout << "namespace ops {\n"; + if (!builtin_ops.empty()) { + fout << "namespace builtin {\n"; + fout << "// Forward-declarations for the builtin ops.\n"; + for (const auto& op : builtin_ops) { + fout << "TfLiteRegistration* Register_" << op << "();\n"; + } + fout << "} // namespace builtin\n"; + } + + if (!custom_ops.empty()) { + fout << "namespace custom {\n"; + fout << "// Forward-declarations for the custom ops.\n"; + for (const auto& op : custom_ops) { + fout << "TfLiteRegistration* Register_" + << ::tflite::NormalizeCustomOpName(op) << "();\n"; + } + fout << "} // namespace custom\n"; + } + fout << "} // namespace ops\n"; + fout << "} // namespace tflite\n"; + + fout << "void RegisterSelectedOps(::tflite::MutableOpResolver* resolver) {\n"; + for (const auto& op : builtin_ops) { + fout << " resolver->AddBuiltin(::tflite::BuiltinOperator_" << op + << ", ::tflite::ops::builtin::Register_" << op << "());\n"; + } + for (const auto& op : custom_ops) { + fout << " resolver->AddCustom(\"" << op + << "\", ::tflite::ops::custom::Register_" + << ::tflite::NormalizeCustomOpName(op) << "());\n"; + } + fout << "}\n"; + fout.close(); +} +} // namespace + +int main(int argc, char** argv) { + string input_model; + string output_registration; + std::vector flag_list = { + Flag("input_model", &input_model, "path to the tflite model"), + Flag("output_registration", &output_registration, + "filename for generated registration code"), + }; + Flags::Parse(&argc, argv, flag_list); + + tensorflow::port::InitMain(argv[0], &argc, &argv); + std::vector builtin_ops; + std::vector custom_ops; + + std::ifstream fin(input_model); + std::stringstream content; + content << fin.rdbuf(); + const ::tflite::Model* model = ::tflite::GetModel(content.str().data()); + ::tflite::ReadOpsFromModel(model, &builtin_ops, &custom_ops); + GenerateFileContent(output_registration, builtin_ops, custom_ops); + return 0; +} diff --git a/tensorflow/contrib/lite/tools/gen_op_registration_test.cc b/tensorflow/contrib/lite/tools/gen_op_registration_test.cc new file mode 100644 index 0000000000..c65cffe340 --- /dev/null +++ b/tensorflow/contrib/lite/tools/gen_op_registration_test.cc @@ -0,0 +1,87 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/tools/gen_op_registration.h" +#include +#include + +using ::testing::ElementsAreArray; + +namespace tflite { + +class GenOpRegistrationTest : public ::testing::Test { + protected: + GenOpRegistrationTest() {} + + void ReadOps(const string& model_path) { + auto model = FlatBufferModel::BuildFromFile(model_path.data()); + if (model) { + ReadOpsFromModel(model->GetModel(), &builtin_ops_, &custom_ops_); + } + } + + std::vector builtin_ops_; + std::vector custom_ops_; +}; + +TEST_F(GenOpRegistrationTest, TestNonExistantFiles) { + ReadOps("/tmp/tflite_model_1234"); + EXPECT_EQ(builtin_ops_.size(), 0); + EXPECT_EQ(custom_ops_.size(), 0); +} + +TEST_F(GenOpRegistrationTest, TestModels) { + ReadOps("third_party/tensorflow/contrib/lite/testdata/test_model.bin"); + EXPECT_THAT(builtin_ops_, ElementsAreArray({"CONV_2D"})); + EXPECT_THAT(custom_ops_, ElementsAreArray({"testing_op"})); +} + +TEST_F(GenOpRegistrationTest, TestEmptyModels) { + ReadOps("third_party/tensorflow/contrib/lite/testdata/empty_model.bin"); + EXPECT_EQ(builtin_ops_.size(), 0); + EXPECT_EQ(custom_ops_.size(), 0); +} + +TEST_F(GenOpRegistrationTest, TestZeroSubgraphs) { + ReadOps("third_party/tensorflow/contrib/lite/testdata/0_subgraphs.bin"); + EXPECT_EQ(builtin_ops_.size(), 0); + EXPECT_EQ(custom_ops_.size(), 0); +} + +TEST_F(GenOpRegistrationTest, TestBrokenMmap) { + ReadOps("third_party/tensorflow/contrib/lite/testdata/test_model_broken.bin"); + EXPECT_EQ(builtin_ops_.size(), 0); + EXPECT_EQ(custom_ops_.size(), 0); +} + +TEST_F(GenOpRegistrationTest, TestNormalizeCustomOpName) { + std::vector> testcase = { + {"CustomOp", "CUSTOM_OP"}, + {"a", "A"}, + {"custom_op", "CUSTOM_OP"}, + {"customop", "CUSTOMOP"}, + }; + + for (const auto& test : testcase) { + EXPECT_EQ(NormalizeCustomOpName(test.first), test.second); + } +} +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: FLAGS_logtostderr = true; + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/tools/mutable_op_resolver.cc b/tensorflow/contrib/lite/tools/mutable_op_resolver.cc new file mode 100644 index 0000000000..8a921d7c5a --- /dev/null +++ b/tensorflow/contrib/lite/tools/mutable_op_resolver.cc @@ -0,0 +1,43 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/tools/mutable_op_resolver.h" + +namespace tflite { + +TfLiteRegistration* MutableOpResolver::FindOp( + tflite::BuiltinOperator op) const { + auto it = builtins_.find(op); + return it != builtins_.end() ? it->second : nullptr; +} + +TfLiteRegistration* MutableOpResolver::FindOp(const char* op) const { + auto it = custom_ops_.find(op); + return it != custom_ops_.end() ? it->second : nullptr; +} + +void MutableOpResolver::AddBuiltin(tflite::BuiltinOperator op, + TfLiteRegistration* registration) { + registration->builtin_code = op; + builtins_.insert(std::make_pair(op, registration)); +} + +void MutableOpResolver::AddCustom(const char* name, + TfLiteRegistration* registration) { + registration->builtin_code = BuiltinOperator_CUSTOM; + custom_ops_.insert(std::make_pair(std::string(name), registration)); +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/tools/mutable_op_resolver.h b/tensorflow/contrib/lite/tools/mutable_op_resolver.h new file mode 100644 index 0000000000..9546c32427 --- /dev/null +++ b/tensorflow/contrib/lite/tools/mutable_op_resolver.h @@ -0,0 +1,45 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOOLS_MUTABLE_OP_RESOLVER_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOOLS_MUTABLE_OP_RESOLVER_H_ + +#include +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { + +// An OpResolver that is mutable, also used as the op in gen_op_registration. +// A typical usage: +// MutableOpResolver resolver; +// resolver.AddBuiltin(BuiltinOperator_ADD, Register_ADD()); +// resolver.AddCustom("CustomOp", Register_CUSTOM_OP()); +// InterpreterBuilder(model, resolver)(&interpreter); +class MutableOpResolver : public OpResolver { + public: + MutableOpResolver() {} + TfLiteRegistration* FindOp(tflite::BuiltinOperator op) const override; + TfLiteRegistration* FindOp(const char* op) const override; + void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration); + void AddCustom(const char* name, TfLiteRegistration* registration); + + private: + std::unordered_map builtins_; + std::unordered_map custom_ops_; +}; + +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOOLS_MUTABLE_OP_RESOLVER_H_ diff --git a/tensorflow/contrib/lite/version.h b/tensorflow/contrib/lite/version.h new file mode 100644 index 0000000000..a751afabe7 --- /dev/null +++ b/tensorflow/contrib/lite/version.h @@ -0,0 +1,23 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_VERSION_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_VERSION_H_ + +// The version number of the Schema. Ideally all changes will be backward +// compatible. If that ever changes, we must ensure that version is the first +// entry in the new tflite root so that we can see that version is not 1. +#define TFLITE_SCHEMA_VERSION (3) + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_VERSION_H_ diff --git a/tensorflow/tools/ci_build/ci_sanity.sh b/tensorflow/tools/ci_build/ci_sanity.sh index 8d4e4c23dc..cff672c9df 100755 --- a/tensorflow/tools/ci_build/ci_sanity.sh +++ b/tensorflow/tools/ci_build/ci_sanity.sh @@ -401,9 +401,14 @@ cmd_status(){ } # Run bazel build --nobuild to test the validity of the BUILD files +# TODO(mikecase): Remove TF Lite exclusion from this list. Exclusion is +# necessary since the @androidsdk WORKSPACE dependency is commented +# commented out by default in TF WORKSPACE file. do_bazel_nobuild() { BUILD_TARGET="//tensorflow/..." - BUILD_CMD="bazel build --nobuild ${BAZEL_FLAGS} ${BUILD_TARGET}" + BUILD_TARGET="${BUILD_TARGET} -//tensorflow/contrib/lite/java/demo/app/src/main/..." + BUILD_TARGET="${BUILD_TARGET} -//tensorflow/contrib/lite/schema/..." + BUILD_CMD="bazel build --nobuild ${BAZEL_FLAGS} -- ${BUILD_TARGET}" ${BUILD_CMD} diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh b/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh index 5de5a379ac..df6016504c 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh @@ -33,4 +33,35 @@ yes "" | $PYTHON_BIN_PATH configure.py bazel test --test_tag_filters=-no_oss,-oss_serial,-gpu,-benchmark-test -k \ --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \ --test_output=errors -- \ - //tensorflow/contrib/... + //tensorflow/contrib/... \ + -//tensorflow/contrib/lite/... \ + //tensorflow/contrib/lite:context_test \ + //tensorflow/contrib/lite:framework \ + //tensorflow/contrib/lite:interpreter_test \ + //tensorflow/contrib/lite:model_test \ + //tensorflow/contrib/lite/toco:toco \ + //tensorflow/contrib/lite:simple_memory_arena_test \ + //tensorflow/contrib/lite:string_util_test \ + //tensorflow/contrib/lite/kernels:activations_test \ + //tensorflow/contrib/lite/kernels:add_test \ + //tensorflow/contrib/lite/kernels:basic_rnn_test \ + //tensorflow/contrib/lite/kernels:concatenation_test \ + //tensorflow/contrib/lite/kernels:conv_test \ + //tensorflow/contrib/lite/kernels:depthwise_conv_test \ + //tensorflow/contrib/lite/kernels:embedding_lookup_test \ + //tensorflow/contrib/lite/kernels:embedding_lookup_sparse_test \ + //tensorflow/contrib/lite/kernels:fully_connected_test \ + //tensorflow/contrib/lite/testing:generated_examples_zip_test \ + //tensorflow/contrib/lite/kernels:hashtable_lookup_test \ + //tensorflow/contrib/lite/kernels:local_response_norm_test \ + //tensorflow/contrib/lite/kernels:lsh_projection_test \ + //tensorflow/contrib/lite/kernels:lstm_test \ + //tensorflow/contrib/lite/kernels:l2norm_test \ + //tensorflow/contrib/lite/kernels:mul_test \ + //tensorflow/contrib/lite/kernels:pooling_test \ + //tensorflow/contrib/lite/kernels:reshape_test \ + //tensorflow/contrib/lite/kernels:resize_bilinear_test \ + //tensorflow/contrib/lite/kernels:skip_gram_test \ + //tensorflow/contrib/lite/kernels:softmax_test \ + //tensorflow/contrib/lite/kernels:space_to_depth_test \ + //tensorflow/contrib/lite/kernels:svdf_test diff --git a/tensorflow/tools/ci_build/osx/cpu/run_contrib.sh b/tensorflow/tools/ci_build/osx/cpu/run_contrib.sh index 8042522ef8..ddaaddc917 100755 --- a/tensorflow/tools/ci_build/osx/cpu/run_contrib.sh +++ b/tensorflow/tools/ci_build/osx/cpu/run_contrib.sh @@ -34,4 +34,4 @@ bazel test --test_tag_filters=-no_oss,-gpu,-benchmark-test,-nomac \ --test_timeout 300,450,1200,3600 \ --test_size_filters=small,medium \ --jobs=${N_JOBS} --build_tests_only --test_output=errors -k -- \ - //tensorflow/contrib/... + //tensorflow/contrib/... -//tensorflow/contrib/lite/... diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index c6e577223f..a3ab40ceef 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -158,6 +158,9 @@ sh_binary( "//tensorflow/contrib/graph_editor:graph_editor_pip", "//tensorflow/contrib/keras:keras", "//tensorflow/contrib/labeled_tensor:labeled_tensor_pip", + "//tensorflow/contrib/lite/toco:toco", + "//tensorflow/contrib/lite/toco/python:toco_wrapper", + "//tensorflow/contrib/lite/toco/python:toco_from_protos", "//tensorflow/contrib/ndlstm:ndlstm", "//tensorflow/contrib/nn:nn_py", "//tensorflow/contrib/predictor:predictor_pip", diff --git a/tensorflow/tools/pip_package/MANIFEST.in b/tensorflow/tools/pip_package/MANIFEST.in index ef6cf56421..86c5e4776d 100644 --- a/tensorflow/tools/pip_package/MANIFEST.in +++ b/tensorflow/tools/pip_package/MANIFEST.in @@ -4,6 +4,7 @@ recursive-include * *.so recursive-include * *.dll recursive-include * *.lib recursive-include * *.csv +recursive-include tensorflow/aux-bin * recursive-include tensorflow/include/tensorflow *.h recursive-include tensorflow/include/Eigen * recursive-include tensorflow/include/external * diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh index cbf06a97d0..8249703ba7 100755 --- a/tensorflow/tools/pip_package/build_pip_package.sh +++ b/tensorflow/tools/pip_package/build_pip_package.sh @@ -137,6 +137,9 @@ function main() { fi fi fi + # Install toco as a binary in aux-bin. + mkdir "${TMPDIR}/tensorflow/aux-bin" + cp bazel-bin/tensorflow/contrib/lite/toco/toco ${TMPDIR}/tensorflow/aux-bin/ fi # protobuf pip package doesn't ship with header files. Copy the headers diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 456c2e2908..60282f6aa3 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -69,6 +69,8 @@ if sys.version_info < (3, 4): # pylint: disable=line-too-long CONSOLE_SCRIPTS = [ 'freeze_graph = tensorflow.python.tools.freeze_graph:main', + 'toco_from_protos = tensorflow.contrib.lite.toco.python.toco_from_protos:main', + 'toco = tensorflow.contrib.lite.toco.python.toco_wrapper:main', 'saved_model_cli = tensorflow.python.tools.saved_model_cli:main', # We need to keep the TensorBoard command, even though the console script # is now declared by the tensorboard pip package. If we remove the @@ -188,7 +190,6 @@ headers = (list(find_files('*.h', 'tensorflow/core')) + list(find_files('*', 'external/eigen_archive')) + list(find_files('*.h', 'external/nsync/public'))) - setup( name=project_name, version=_VERSION.replace('-', ''), diff --git a/third_party/flatbuffers/flatbuffers.BUILD b/third_party/flatbuffers/flatbuffers.BUILD index a426db0c50..e1563103c8 100644 --- a/third_party/flatbuffers/flatbuffers.BUILD +++ b/third_party/flatbuffers/flatbuffers.BUILD @@ -104,6 +104,10 @@ cc_binary( "grpc/", "include/", ], + linkopts = [ + "-lm", + "-ldl", + ], deps = [ ":flatc_library", ], -- GitLab From 005f1183c7a1c0f71f5ec614a3c3139972b1bec3 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Fri, 10 Nov 2017 10:38:17 -0800 Subject: [PATCH 0275/1801] Fixes API difference between eager and graph cond. PiperOrigin-RevId: 175307853 --- tensorflow/python/kernel_tests/control_flow_ops_py_test.py | 5 +++-- tensorflow/python/ops/control_flow_ops.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py index a21182beba..fc125daf38 100644 --- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py +++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py @@ -2856,11 +2856,12 @@ class EagerTest(test.TestCase): def testCond(self): with context.eager_mode(): pred = math_ops.less(1, 2) - fn1 = lambda: constant_op.constant(10) - fn2 = lambda: constant_op.constant(20) + fn1 = lambda: [constant_op.constant(10)] + fn2 = lambda: [constant_op.constant(20)] r = control_flow_ops.cond(pred, fn1, fn2) self.assertAllEqual(r.numpy(), 10) + self.assertFalse(isinstance(r, list)) def testWhileLoop(self): with context.eager_mode(): diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 8afb079d20..d33d4cd597 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -1877,8 +1877,8 @@ def cond(pred, true_fn=None, false_fn=None, strict=False, name=None, with ops.name_scope(name, "cond", [pred]): if context.in_eager_mode(): if pred: - return true_fn() - return false_fn() + return _UnpackIfSingleton(true_fn()) + return _UnpackIfSingleton(false_fn()) # Add the Switch to the graph. if isinstance(pred, bool): -- GitLab From 8d15389ad4bd528e4dbedd829bc35f99788674af Mon Sep 17 00:00:00 2001 From: James Qin Date: Fri, 10 Nov 2017 11:04:16 -0800 Subject: [PATCH 0276/1801] Include memory size info in error message. PiperOrigin-RevId: 175311543 --- tensorflow/stream_executor/cuda/cuda_dnn.cc | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index ad8164c7f9..99bed86a17 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -962,7 +962,8 @@ class CudnnDropoutDescriptor : public CudnnDescriptorCommon { if (!allocated.ok() || (state_memory = allocated.ValueOrDie()) == nullptr) { string error_msg = - port::StrCat("Fail to allocate Cudnn dropout state memory"); + port::StrCat("Failed to allocate Cudnn dropout state memory of ", + state_sizes_in_bytes, " bytes."); status_ = port::Status(port::error::UNKNOWN, error_msg); LOG(ERROR) << error_msg; return; @@ -971,7 +972,10 @@ class CudnnDropoutDescriptor : public CudnnDescriptorCommon { status = wrap::cudnnSetDropoutDescriptor(parent_, handle_, cudnn_handle, dropout, state_memory.opaque(), state_memory.size(), seed); - CUDNN_RETURN_IF_FAIL(status, "Failed to set dropout descriptor"); + CUDNN_RETURN_IF_FAIL( + status, port::StrCat( + "Failed to set dropout descriptor with state memory size: ", + state_memory.size(), " bytes.")); } ~CudnnDropoutDescriptor() { @@ -1476,7 +1480,8 @@ bool CreateRnnWorkspace(Stream* stream, CUDAExecutor* parent, auto allocated = workspace_allocator->AllocateBytes(stream, workspace_size_in_bytes); if (!allocated.ok() || (*workspace = allocated.ValueOrDie()) == nullptr) { - LOG(ERROR) << "Failed to allocate RNN workspace"; + LOG(ERROR) << port::StrCat("Failed to allocate RNN workspace of ", + workspace_size_in_bytes, " bytes."); return false; } } else { @@ -1553,7 +1558,8 @@ bool CudnnSupport::DoRnnForwardImpl( stream, reserve_space_size_in_bytes); if (!allocated.ok() || (reserve_space = allocated.ValueOrDie()) == nullptr) { - LOG(ERROR) << "Fail to allocate RNN reserve space"; + LOG(ERROR) << "Failed to allocate RNN reserve space of " + << reserve_space_size_in_bytes << " bytes."; return false; } } -- GitLab From 550f7220aaa166faedff43886d63b5eb5e33649a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 11:23:15 -0800 Subject: [PATCH 0277/1801] Skip generating input / output properties for _Send and _Recv ops if those ops are not created from VirtualScheduler. PiperOrigin-RevId: 175314193 --- .../core/grappler/costs/virtual_scheduler.cc | 29 ++- .../grappler/costs/virtual_scheduler_test.cc | 171 ++++++++++++++++++ tensorflow/core/grappler/grappler_item.cc | 15 ++ 3 files changed, 212 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc index d5625ae58f..2ab3a9144c 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc @@ -154,6 +154,16 @@ Status VirtualScheduler::Init() { name_to_node[node->name()] = node; } + // TODO(dyoon): Instead of identifying _Send node here manually, add _Send + // to _Recv as control dependency when creating GrapplerItem. + std::unordered_map name_to_send; + for (const auto& node : graph.node()) { + if (node.op() == "_Send") { + const auto& attr = node.attr(); + name_to_send[attr.at("tensor_name").s()] = &node; + } + } + // To reuse _Recv ops. std::unordered_map @@ -164,7 +174,17 @@ Status VirtualScheduler::Init() { for (const auto* curr_node : nodes) { auto& curr_node_state = GetNodeStateOrCreateIt(curr_node); const string curr_node_device = DeviceName(curr_node); - for (const string& input_node_name : curr_node->input()) { + std::vector inputs; + if (IsRecv(*curr_node)) { + const auto& attr = curr_node->attr(); + const NodeDef* send = name_to_send[attr.at("tensor_name").s()]; + inputs = {send->name()}; + } else { + for (const string& input : curr_node->input()) { + inputs.push_back(input); + } + } + for (const string& input_node_name : inputs) { // Note that input_node_name may be in : // format, where (e.g., "^" for control dependency) and // ":" may be omitted. NodeName() extracts only the node_name. @@ -219,7 +239,7 @@ Status VirtualScheduler::Init() { // Default case: node without inputs are ready at time 0. const bool has_no_inputs = curr_node->input().empty(); - if (given_as_feed || has_no_inputs) { + if (!IsRecv(*curr_node) && (given_as_feed || has_no_inputs)) { curr_node_state.time_ready = Costs::Duration(); ready_nodes_->AddNode(curr_node); VLOG(3) << "Added ready node: " << curr_node->name(); @@ -254,7 +274,10 @@ void VirtualScheduler::MaybeUpdateInputOutput(const NodeDef* node) { // This method is called when NodeState is created and adds input and output // properties for a few exceptional cases that GraphProperties cannot provide // input/output properties. - if (IsSend(*node) || IsRecv(*node)) { + if ((IsSend(*node) || IsRecv(*node)) && node->attr().count(kAttrInputSrc)) { + // _Send and _Recv ops created from VirtualScheduler have kAttrInputSrc + // attr; normal _Send and _Recv ops (from the input graph) do not have that + // attr. auto& node_state = node_map_[node]; auto& inputs = node_state.input_properties; auto& outputs = node_state.output_properties; diff --git a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc index d291a04308..40548b5a07 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc @@ -265,6 +265,127 @@ class VirtualSchedulerTest : public ::testing::Test { dependency_["z4"] = {"bn"}; } + void CreateGrapplerItemWithSendRecv() { + const string gdef_ascii = R"EOF( +node { + name: "Const" + op: "Const" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.1415 + } + } + } +} +node { + name: "Send" + op: "_Send" + input: "Const" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "client_terminated" + value { + b: false + } + } + attr { + key: "recv_device" + value { + s: "/job:localhost/replica:0/task:0/device:CPU:0" + } + } + attr { + key: "send_device" + value { + s: "/job:localhost/replica:0/task:0/device:CPU:0" + } + } + attr { + key: "send_device_incarnation" + value { + i: 0 + } + } + attr { + key: "tensor_name" + value { + s: "test" + } + } +} +node { + name: "Recv" + op: "_Recv" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "client_terminated" + value { + b: false + } + } + attr { + key: "recv_device" + value { + s: "/job:localhost/replica:0/task:0/device:CPU:0" + } + } + attr { + key: "send_device" + value { + s: "/job:localhost/replica:0/task:0/device:CPU:0" + } + } + attr { + key: "send_device_incarnation" + value { + i: 0 + } + } + attr { + key: "tensor_name" + value { + s: "test" + } + } + attr { + key: "tensor_type" + value { + type: DT_FLOAT + } + } +} +library { +} +versions { + producer: 24 +} + )EOF"; + + grappler_item_.reset(new GrapplerItem); + CHECK(protobuf::TextFormat::ParseFromString(gdef_ascii, + &grappler_item_->graph)); + grappler_item_->id = "test_graph"; + grappler_item_->fetch = {"Recv"}; + } + // A simple while loop void CreateGrapplerItemWithLoop() { // Test graph produced in python using: @@ -743,6 +864,7 @@ versions { do { OpContext op_context = scheduler_->GetCurrNode(); ops_executed[op_context.name] = op_context; + std::cout << op_context.name << std::endl; Costs node_costs = SimplePredictCosts(op_context); @@ -1530,5 +1652,54 @@ TEST_F(VirtualSchedulerTest, InterDeviceTransfer) { EXPECT_EQ(get_output_size(recv_op_names[-1]), 4); EXPECT_EQ(get_output_size(send_op_names[-1]), 4); } + +TEST_F(VirtualSchedulerTest, GraphWithSendRecv) { + // Init. + CreateGrapplerItemWithSendRecv(); + InitScheduler(); + + // Run the scheduler. + auto ops_executed = RunScheduler(""); + + EXPECT_GT(ops_executed.count("Const"), 0); + EXPECT_GT(ops_executed.count("Send"), 0); + EXPECT_GT(ops_executed.count("Recv"), 0); +} + +TEST_F(VirtualSchedulerTest, GraphWithSendRecvDifferentDevice) { + // Init. + CreateGrapplerItemWithSendRecv(); + // Change Recv node's device so that Send and Recv are placed on different + // devices. + auto& graph = grappler_item_->graph; + const string recv_device = kCPU1; + for (int i = 0; i < graph.node_size(); i++) { + auto* node = graph.mutable_node(i); + if (node->name() == "Recv") { + node->set_device(recv_device); + auto* attr = node->mutable_attr(); + (*attr)["recv_device"].set_s(recv_device); + } else if (node->name() == "Send") { + auto* attr = node->mutable_attr(); + (*attr)["recv_device"].set_s(recv_device); + } + } + InitScheduler(); + + // Run the scheduler. + auto ops_executed = RunScheduler(""); + + // Expect Const, Send, Recv, and VirtualScheduler created Send and Recv ops. + EXPECT_GT(ops_executed.count("Const"), 0); + EXPECT_GT(ops_executed.count("Send"), 0); + EXPECT_GT(ops_executed.count("Send_Send_0_from_/job_localhost/replica_0/" + "task_0/cpu_0_to_/job_localhost" + "/replica_0/task_0/cpu_1"), + 0); + EXPECT_GT(ops_executed.count( + "Recv_Send_0_on_/job_localhost/replica_0/task_0/cpu_1"), + 0); + EXPECT_GT(ops_executed.count("Recv"), 0); +} } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/grappler_item.cc b/tensorflow/core/grappler/grappler_item.cc index 94412eb198..844a1fa328 100644 --- a/tensorflow/core/grappler/grappler_item.cc +++ b/tensorflow/core/grappler/grappler_item.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/grappler/op_types.h" #include "tensorflow/core/grappler/utils.h" @@ -117,8 +118,13 @@ std::vector ComputeTransitiveFanin( bool* ill_formed) { *ill_formed = false; std::unordered_map name_to_node; + std::unordered_map name_to_send; for (const auto& node : graph.node()) { name_to_node[node.name()] = &node; + if (node.op() == "_Send") { + const auto& attr = node.attr(); + name_to_send[attr.at("tensor_name").s()] = &node; + } } std::vector queue; @@ -150,6 +156,15 @@ std::vector ComputeTransitiveFanin( } queue.push_back(in); } + if (node->op() == "_Recv") { + const auto& attr = node->attr(); + const NodeDef* send = name_to_send[attr.at("tensor_name").s()]; + if (send) { + queue.push_back(send); + } + // Subgraph after partitioning may have either _Send or _Recv, not both. + // So, we do not set ill_formed for missing _Send. + } } return result; } -- GitLab From 80078ae4a940f829eb81f03d80ec4d816a638df4 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Fri, 10 Nov 2017 11:28:04 -0800 Subject: [PATCH 0278/1801] Make Operation.node_def fetch from C API when enabled Also fixes previously-introduced memory management bugs in graph_def_versions and op_def. PiperOrigin-RevId: 175314829 --- tensorflow/python/framework/ops.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index ad2e2993c1..813b886775 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -1639,7 +1639,7 @@ class Operation(object): def colocation_groups(self): """Returns the list of colocation groups of the op.""" default_colocation_group = [ - compat.as_bytes("loc:@%s" % self._node_def.name) + compat.as_bytes("loc:@%s" % self.name) ] try: class_attr = self.get_attr("_class") @@ -1894,7 +1894,7 @@ class Operation(object): ["^%s" % op.name for op in self._control_inputs]) def __str__(self): - return str(self._node_def) + return str(self.node_def) def __repr__(self): return "" % (self.name, self.type) @@ -2011,7 +2011,7 @@ class Operation(object): @property def node_def(self): # pylint: disable=line-too-long - """Returns a serialized `NodeDef` representation of this operation. + """Returns the `NodeDef` representation of this operation. Returns: A @@ -2019,7 +2019,16 @@ class Operation(object): protocol buffer. """ # pylint: enable=line-too-long - return self._node_def + if self._c_op: + with c_api_util.tf_buffer() as buf: + with errors.raise_exception_on_not_ok_status() as status: + c_api.TF_OperationToNodeDef(self._c_op, buf, status) + data = c_api.TF_GetBuffer(buf) + node_def = node_def_pb2.NodeDef() + node_def.ParseFromString(compat.as_bytes(data)) + return node_def + else: + return self._node_def @property def op_def(self): @@ -2033,13 +2042,13 @@ class Operation(object): """ # pylint: enable=line-too-long if self._c_op: - with errors.raise_exception_on_not_ok_status() as status: - with c_api_util.tf_buffer() as buf: + with c_api_util.tf_buffer() as buf: + with errors.raise_exception_on_not_ok_status() as status: # pylint: disable=protected-access c_api.TF_GraphGetOpDef(self._graph._c_graph, compat.as_bytes(self.type), buf, status) # pylint: enable=protected-access - data = c_api.TF_GetBuffer(buf) + data = c_api.TF_GetBuffer(buf) op_def = op_def_pb2.OpDef() op_def.ParseFromString(compat.as_bytes(data)) return op_def @@ -2750,10 +2759,10 @@ class Graph(object): """ # pylint: enable=line-too-long if self._c_graph: - with errors.raise_exception_on_not_ok_status() as status: - with c_api_util.tf_buffer() as buf: + with c_api_util.tf_buffer() as buf: + with errors.raise_exception_on_not_ok_status() as status: c_api.TF_GraphVersions(self._c_graph, buf, status) - data = c_api.TF_GetBuffer(buf) + data = c_api.TF_GetBuffer(buf) version_def = versions_pb2.VersionDef() version_def.ParseFromString(compat.as_bytes(data)) return version_def -- GitLab From 9372c016161b7299be28cbf6636f32ad4448ebde Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 12:04:29 -0800 Subject: [PATCH 0279/1801] ...comment format only change... PiperOrigin-RevId: 175319401 --- tensorflow/python/pywrap_dlopen_global_flags.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/pywrap_dlopen_global_flags.py b/tensorflow/python/pywrap_dlopen_global_flags.py index 509fc2170c..411334f480 100644 --- a/tensorflow/python/pywrap_dlopen_global_flags.py +++ b/tensorflow/python/pywrap_dlopen_global_flags.py @@ -28,13 +28,12 @@ from __future__ import print_function import ctypes import sys -# On UNIX-based platforms, pywrap_tensorflow is a SWIG-generated -# python library that dynamically loads _pywrap_tensorflow.so. The -# default mode for loading keeps all the symbol private and not -# visible to other libraries that may be loaded. Setting the mode to -# RTLD_GLOBAL to make the symbols visible, so that custom op libraries -# imported using `tf.load_op_library()` can access symbols defined in -# _pywrap_tensorflow.so. +# On UNIX-based platforms, pywrap_tensorflow is a SWIG-generated python library +# that dynamically loads _pywrap_tensorflow.so. The default mode for loading +# keeps all the symbol private and not visible to other libraries that may be +# loaded. Setting the mode to RTLD_GLOBAL to make the symbols visible, so that +# custom op libraries imported using `tf.load_op_library()` can access symbols +# defined in _pywrap_tensorflow.so. _use_rtld_global = (hasattr(sys, 'getdlopenflags') and hasattr(sys, 'setdlopenflags')) if _use_rtld_global: -- GitLab From 4e0627622639b454df73993d52caf04d8c07b013 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 10 Nov 2017 12:13:14 -0800 Subject: [PATCH 0280/1801] [XLA:GPU] Dump PTX when --xla_dump_ir is set. I know, I know, PTX isn't really IR. (Or is it?? It's not machine code...) In any case, if you pass this flag and get PTX, you're unlikely to be disappointed. PiperOrigin-RevId: 175320353 --- .../compiler/xla/service/gpu/gpu_compiler.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 187b4a705c..b77f75ff79 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -428,6 +428,22 @@ StatusOr> GpuCompiler::Compile( VLOG(2) << "PTX:"; XLA_VLOG_LINES(2, ptx); + // Write PTX to IR dump directory, if IR dumping was requested. + if (!ir_dump_directory.empty()) { + const string ptx_outfile = tensorflow::io::JoinPath( + ir_dump_directory, StrCat(module->name(), ".ptx")); + auto status = [&] { + auto* env = tensorflow::Env::Default(); + TF_RETURN_IF_ERROR(env->RecursivelyCreateDir(ir_dump_directory)); + TF_RETURN_IF_ERROR(tensorflow::WriteStringToFile(env, ptx_outfile, ptx)); + return Status::OK(); + }(); + if (!status.ok()) { + LOG(WARNING) << "Couldn't dump PTX for module " << module->name() + << " to " << ptx_outfile << ": " << status; + } + } + const std::vector cubin = CompilePtxOrGetCachedResult(ptx, cc_major, cc_minor); -- GitLab From d76632ae7367cd67e9a54999a8788b27a803bbb0 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 10 Nov 2017 12:30:03 -0800 Subject: [PATCH 0281/1801] [CMake] Add missing framework dependency to `tf_c_python_api`. This was occasionally causing flaky failures due to the module being compiled before the proto_text headers had been generated. PiperOrigin-RevId: 175322168 --- tensorflow/contrib/cmake/tf_c.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/cmake/tf_c.cmake b/tensorflow/contrib/cmake/tf_c.cmake index 3ae28b7601..c6a15f2ca0 100644 --- a/tensorflow/contrib/cmake/tf_c.cmake +++ b/tensorflow/contrib/cmake/tf_c.cmake @@ -46,4 +46,5 @@ add_dependencies( tf_c_python_api tf_c tf_core_lib + tf_core_framework tf_protos_cc) -- GitLab From f157cc92a895b0cd9f5f15cc459e60ab0c98c875 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 12:57:33 -0800 Subject: [PATCH 0282/1801] Remove the backward incompatible shape checks. PiperOrigin-RevId: 175324895 --- tensorflow/contrib/boosted_trees/ops/prediction_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/boosted_trees/ops/prediction_ops.cc b/tensorflow/contrib/boosted_trees/ops/prediction_ops.cc index 82b8e8c1c2..d66f645f62 100644 --- a/tensorflow/contrib/boosted_trees/ops/prediction_ops.cc +++ b/tensorflow/contrib/boosted_trees/ops/prediction_ops.cc @@ -36,7 +36,7 @@ static Status ApplyGradientTreesPredictionShapeFn(InferenceContext* c) { c->set_output(0, {c->Matrix(InferenceContext::kUnknownDim, reduce_dim ? learner_config.num_classes() - 1 : learner_config.num_classes())}); - c->set_output(1, {c->Vector(InferenceContext::kUnknownDim)}); + c->set_output(1, {c->UnknownShape()}); return Status::OK(); } -- GitLab From 83c2da808e96dc5c9c3e80353d1db58a17502bf1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 13:12:49 -0800 Subject: [PATCH 0283/1801] Removed StringPiece::set and StringPiece::clear, as they have no absl::string_view equivalents. This will allow for a more convenient transition to absl::string_view. Calls to set StringPiece::set and StringPiece::clear were replaced with the StringPiece constructor as follows: string_piece_foo.set(data, size) => string_piece_foo = StringPiece(data, size) string_piece_foo.clear() => string_piece_foo = StringPiece() PiperOrigin-RevId: 175326576 --- ISSUE_TEMPLATE.md | 1 + .../android/asset_manager_filesystem.cc | 4 ++-- tensorflow/core/framework/op_gen_lib.cc | 4 ++-- tensorflow/core/framework/rendezvous.cc | 18 +++++++++--------- .../core/kernels/immutable_constant_op_test.cc | 4 ++-- tensorflow/core/lib/core/stringpiece.h | 11 ----------- tensorflow/core/lib/io/block.cc | 2 +- tensorflow/core/lib/strings/str_util.cc | 4 ++-- tensorflow/core/lib/strings/strcat.cc | 2 +- .../core/platform/cloud/gcs_file_system.cc | 2 +- tensorflow/core/util/memmapped_file_system.cc | 5 +++-- tensorflow/core/util/semver_test.cc | 2 +- tensorflow/python/lib/core/strings.i | 4 ++-- 13 files changed, 27 insertions(+), 36 deletions(-) diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md index 1f6ae1aba5..1a401997c6 100644 --- a/ISSUE_TEMPLATE.md +++ b/ISSUE_TEMPLATE.md @@ -1,4 +1,5 @@ Please go to Stack Overflow for help and support: + https://stackoverflow.com/questions/tagged/tensorflow If you open a GitHub issue, here is our policy: diff --git a/tensorflow/contrib/android/asset_manager_filesystem.cc b/tensorflow/contrib/android/asset_manager_filesystem.cc index 9e4d3290c3..380a652435 100644 --- a/tensorflow/contrib/android/asset_manager_filesystem.cc +++ b/tensorflow/contrib/android/asset_manager_filesystem.cc @@ -97,7 +97,7 @@ class RandomAccessFileFromAsset : public RandomAccessFile { off64_t new_offset = AAsset_seek64(asset.get(), offset, SEEK_SET); off64_t length = AAsset_getLength64(asset.get()); if (new_offset < 0) { - result->set(scratch, 0); + *result = StringPiece(scratch, 0); return errors::OutOfRange("Read after file end."); } const off64_t region_left = @@ -106,7 +106,7 @@ class RandomAccessFileFromAsset : public RandomAccessFile { if (read < 0) { return errors::Internal("Error reading from asset."); } - result->set(scratch, region_left); + *result = StringPiece(scratch, region_left); return (region_left == to_read) ? Status::OK() : errors::OutOfRange("Read less bytes than requested."); diff --git a/tensorflow/core/framework/op_gen_lib.cc b/tensorflow/core/framework/op_gen_lib.cc index 1e93e9be09..d84d5431e9 100644 --- a/tensorflow/core/framework/op_gen_lib.cc +++ b/tensorflow/core/framework/op_gen_lib.cc @@ -84,7 +84,7 @@ static bool SplitAt(char split_ch, StringPiece* orig, auto pos = orig->find(split_ch); if (pos == StringPiece::npos) { *before_split = *orig; - orig->clear(); + *orig = StringPiece(); return false; } else { *before_split = orig->substr(0, pos); @@ -236,7 +236,7 @@ string PBTxtFromMultiline(StringPiece multiline_pbtxt) { unescaped.push_back('\n'); } strings::StrAppend(&unescaped, line); - line.clear(); + line = StringPiece(); } // Escape what we extracted and then output it in quotes. diff --git a/tensorflow/core/framework/rendezvous.cc b/tensorflow/core/framework/rendezvous.cc index a9e4c1cfb1..90756a4f2f 100644 --- a/tensorflow/core/framework/rendezvous.cc +++ b/tensorflow/core/framework/rendezvous.cc @@ -36,15 +36,15 @@ namespace tensorflow { Rendezvous::ParsedKey& Rendezvous::ParsedKey::operator=(const ParsedKey& b) { const char* b_base = b.buf_.data(); buf_ = b.buf_; - src_device.set(buf_.data() + (b.src_device.data() - b_base), - b.src_device.size()); + src_device = StringPiece(buf_.data() + (b.src_device.data() - b_base), + b.src_device.size()); src = b.src; src_incarnation = b.src_incarnation; - dst_device.set(buf_.data() + (b.dst_device.data() - b_base), - b.dst_device.size()); + dst_device = StringPiece(buf_.data() + (b.dst_device.data() - b_base), + b.dst_device.size()); dst = b.dst; - edge_name.set(buf_.data() + (b.edge_name.data() - b_base), - b.edge_name.size()); + edge_name = StringPiece(buf_.data() + (b.edge_name.data() - b_base), + b.edge_name.size()); return *this; } @@ -104,9 +104,9 @@ Status Rendezvous::ParseKey(StringPiece key, ParsedKey* out) { strings::HexStringToUint64(parts[1], &out->src_incarnation) && DeviceNameUtils::ParseFullName(parts[2], &out->dst) && !parts[3].empty()) { - out->src_device.set(parts[0].data(), parts[0].size()); - out->dst_device.set(parts[2].data(), parts[2].size()); - out->edge_name.set(parts[3].data(), parts[3].size()); + out->src_device = StringPiece(parts[0].data(), parts[0].size()); + out->dst_device = StringPiece(parts[2].data(), parts[2].size()); + out->edge_name = StringPiece(parts[3].data(), parts[3].size()); return Status::OK(); } return errors::InvalidArgument("Invalid rendezvous key: ", key); diff --git a/tensorflow/core/kernels/immutable_constant_op_test.cc b/tensorflow/core/kernels/immutable_constant_op_test.cc index b318c9c79a..b3814331ee 100644 --- a/tensorflow/core/kernels/immutable_constant_op_test.cc +++ b/tensorflow/core/kernels/immutable_constant_op_test.cc @@ -147,8 +147,8 @@ Status CreateTempFile(Env* env, float value, uint64 size, string* filename) { std::unique_ptr file; TF_RETURN_IF_ERROR(env->NewWritableFile(*filename, &file)); for (uint64 i = 0; i < size; ++i) { - StringPiece sp; - sp.set(&value, sizeof(value)); + StringPiece sp(static_cast(static_cast(&value)), + sizeof(value)); TF_RETURN_IF_ERROR(file->Append(sp)); } TF_RETURN_IF_ERROR(file->Close()); diff --git a/tensorflow/core/lib/core/stringpiece.h b/tensorflow/core/lib/core/stringpiece.h index 7d258b36c5..94f4a377f1 100644 --- a/tensorflow/core/lib/core/stringpiece.h +++ b/tensorflow/core/lib/core/stringpiece.h @@ -51,11 +51,6 @@ class StringPiece { // Create a slice that refers to s[0,strlen(s)-1] StringPiece(const char* s) : data_(s), size_(strlen(s)) {} - void set(const void* data, size_t len) { - data_ = reinterpret_cast(data); - size_ = len; - } - // Return a pointer to the beginning of the referenced data const char* data() const { return data_; } @@ -79,12 +74,6 @@ class StringPiece { return data_[n]; } - // Change this slice to refer to an empty array - void clear() { - data_ = ""; - size_ = 0; - } - // Drop the first "n" bytes from this slice. void remove_prefix(size_t n) { assert(n <= size()); diff --git a/tensorflow/core/lib/io/block.cc b/tensorflow/core/lib/io/block.cc index 1fa26d9147..4c30486cc4 100644 --- a/tensorflow/core/lib/io/block.cc +++ b/tensorflow/core/lib/io/block.cc @@ -199,7 +199,7 @@ class Block::Iter : public Iterator { restart_index_ = num_restarts_; status_ = errors::DataLoss("bad entry in block"); key_.clear(); - value_.clear(); + value_ = StringPiece(); } bool ParseNextKey() { diff --git a/tensorflow/core/lib/strings/str_util.cc b/tensorflow/core/lib/strings/str_util.cc index 8509c9a041..240e1454e5 100644 --- a/tensorflow/core/lib/strings/str_util.cc +++ b/tensorflow/core/lib/strings/str_util.cc @@ -407,11 +407,11 @@ bool ConsumeNonWhitespace(StringPiece* s, StringPiece* val) { } const size_t n = p - s->data(); if (n > 0) { - val->set(s->data(), n); + *val = StringPiece(s->data(), n); s->remove_prefix(n); return true; } else { - val->clear(); + *val = StringPiece(); return false; } } diff --git a/tensorflow/core/lib/strings/strcat.cc b/tensorflow/core/lib/strings/strcat.cc index 46a45a6678..5b1cff486d 100644 --- a/tensorflow/core/lib/strings/strcat.cc +++ b/tensorflow/core/lib/strings/strcat.cc @@ -45,7 +45,7 @@ AlphaNum::AlphaNum(Hex hex) { value >>= 4; mask >>= 4; } while (mask != 0); - piece_.set(writer, end - writer); + piece_ = StringPiece(writer, end - writer); } // ---------------------------------------------------------------------- diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index e82aebad0b..17fe704b79 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -247,7 +247,7 @@ class GcsRandomAccessFile : public RandomAccessFile { /// The implementation of reads with an LRU block cache. Thread safe. Status Read(uint64 offset, size_t n, StringPiece* result, char* scratch) const override { - result->clear(); + *result = StringPiece(); std::vector out; TF_RETURN_IF_ERROR(file_block_cache_->Read(filename_, offset, n, &out)); std::memcpy(scratch, out.data(), std::min(out.size(), n)); diff --git a/tensorflow/core/util/memmapped_file_system.cc b/tensorflow/core/util/memmapped_file_system.cc index e077e94cf8..a0f43d2d4a 100644 --- a/tensorflow/core/util/memmapped_file_system.cc +++ b/tensorflow/core/util/memmapped_file_system.cc @@ -58,12 +58,13 @@ class RandomAccessFileFromMemmapped : public RandomAccessFile { Status Read(uint64 offset, size_t to_read, StringPiece* result, char* scratch) const override { if (offset >= length_) { - result->set(scratch, 0); + *result = StringPiece(scratch, 0); return Status(error::OUT_OF_RANGE, "Read after file end"); } const uint64 region_left = std::min(length_ - offset, static_cast(to_read)); - result->set(reinterpret_cast(data_) + offset, region_left); + *result = + StringPiece(reinterpret_cast(data_) + offset, region_left); return (region_left == to_read) ? Status::OK() : Status(error::OUT_OF_RANGE, "Read less bytes than requested"); diff --git a/tensorflow/core/util/semver_test.cc b/tensorflow/core/util/semver_test.cc index 0647f670c7..fdc34fa58b 100644 --- a/tensorflow/core/util/semver_test.cc +++ b/tensorflow/core/util/semver_test.cc @@ -39,7 +39,7 @@ bool ConsumeDotSeparatedIdentifiers(StringPiece* s, const string& prefix, for (i = 0; i < s->size() && IsDotOrIdentifierChar((*s)[i]); ++i) { // Intentionally empty } - val->set(s->data(), i); + *val = StringPiece(s->data(), i); s->remove_prefix(i); return i > 0; } diff --git a/tensorflow/python/lib/core/strings.i b/tensorflow/python/lib/core/strings.i index 938c13e30e..9d807e51be 100644 --- a/tensorflow/python/lib/core/strings.i +++ b/tensorflow/python/lib/core/strings.i @@ -40,7 +40,7 @@ limitations under the License. // Returns true on success, false on failure. bool _BytesToStringPiece(PyObject* obj, tensorflow::StringPiece* result) { if (obj == Py_None) { - result->clear(); + *result = tensorflow::StringPiece(); } else { char* ptr; Py_ssize_t len; @@ -48,7 +48,7 @@ bool _BytesToStringPiece(PyObject* obj, tensorflow::StringPiece* result) { // Python has raised an error (likely TypeError or UnicodeEncodeError). return false; } - result->set(ptr, len); + *result = tensorflow::StringPiece(ptr, len); } return true; } -- GitLab From 0017742dd05c19a2bbbf56c87c8da55959df28d8 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Fri, 10 Nov 2017 13:14:03 -0800 Subject: [PATCH 0284/1801] Clean up some redundant and unused build settings. --copts are passed to both c++ and c (so is redundent with --cxxopts). Configs passed to "bazel build" are inherited by "bazel run" and "bazel test". Also removed some unused configs. PiperOrigin-RevId: 175326697 --- configure.py | 19 +++-------- tensorflow/tensorflow.bzl | 44 ++++++++++++++------------ tensorflow/tools/ci_build/ci_sanity.sh | 2 +- tools/bazel.rc | 8 ----- 4 files changed, 30 insertions(+), 43 deletions(-) diff --git a/configure.py b/configure.py index 3c0df9475d..51d860812e 100644 --- a/configure.py +++ b/configure.py @@ -228,17 +228,9 @@ def setup_python(environ_cp): # Set-up env variables used by python_configure.bzl write_action_env_to_bazelrc('PYTHON_BIN_PATH', python_bin_path) write_action_env_to_bazelrc('PYTHON_LIB_PATH', python_lib_path) - write_to_bazelrc('build --define PYTHON_BIN_PATH="%s"' % python_bin_path) - write_to_bazelrc('build --define PYTHON_LIB_PATH="%s"' % python_lib_path) write_to_bazelrc('build --force_python=py%s' % python_major_version) write_to_bazelrc('build --host_force_python=py%s' % python_major_version) write_to_bazelrc('build --python_path=\"%s"' % python_bin_path) - write_to_bazelrc('test --force_python=py%s' % python_major_version) - write_to_bazelrc('test --host_force_python=py%s' % python_major_version) - write_to_bazelrc('test --define PYTHON_BIN_PATH="%s"' % python_bin_path) - write_to_bazelrc('test --define PYTHON_LIB_PATH="%s"' % python_lib_path) - write_to_bazelrc('run --define PYTHON_BIN_PATH="%s"' % python_bin_path) - write_to_bazelrc('run --define PYTHON_LIB_PATH="%s"' % python_lib_path) environ_cp['PYTHON_BIN_PATH'] = python_bin_path # Write tools/python_bin_path.sh @@ -487,11 +479,12 @@ def set_cc_opt_flags(environ_cp): cc_opt_flags = get_from_env_or_user_or_default(environ_cp, 'CC_OPT_FLAGS', question, default_cc_opt_flags) for opt in cc_opt_flags.split(): - write_to_bazelrc('build:opt --cxxopt=%s --copt=%s' % (opt, opt)) - host_opt = '-march=native' # It should be safe on the same build host. - write_to_bazelrc( - 'build:opt --host_cxxopt=%s --host_copt=%s' % (host_opt, host_opt)) + write_to_bazelrc('build:opt --copt=%s' % opt) + # It should be safe on the same build host. + write_to_bazelrc('build:opt --host_copt=-march=native') write_to_bazelrc('build:opt --define with_default_optimizations=true') + # TODO(mikecase): Remove these default defines once we are able to get + # TF Lite targets building without them. write_to_bazelrc('build --copt=-DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK') write_to_bazelrc('build --host_copt=-DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK') @@ -949,7 +942,6 @@ def set_other_mpi_vars(environ_cp): def set_mkl(): write_to_bazelrc('build:mkl --define using_mkl=true') write_to_bazelrc('build:mkl -c opt') - write_to_bazelrc('build:mkl --copt="-DEIGEN_USE_VML"') print( 'Add "--config=mkl" to your bazel command to build with MKL ' 'support.\nPlease note that MKL on MacOS or windows is still not ' @@ -1002,7 +994,6 @@ def main(): environ_cp['TF_NEED_HDFS'] = '0' environ_cp['TF_NEED_JEMALLOC'] = '0' environ_cp['TF_NEED_OPENCL'] = '0' - environ_cp['TF_NEED_S3'] = '0' environ_cp['TF_CUDA_CLANG'] = '0' if is_macos(): diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 43ecb7f937..a3ba363469 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -168,26 +168,30 @@ WIN_COPTS = [ # LINT.IfChange def tf_copts(): - return (if_not_windows([ - "-DEIGEN_AVOID_STL_ARRAY", - "-Iexternal/gemmlowp", - "-Wno-sign-compare", - "-fno-exceptions", - "-ftemplate-depth=900", - ]) + if_cuda(["-DGOOGLE_CUDA=1"]) + if_mkl(["-DINTEL_MKL=1", "-fopenmp",]) + if_android_arm( - ["-mfpu=neon"]) + if_linux_x86_64(["-msse3"]) + select({ - clean_dep("//tensorflow:android"): [ - "-std=c++11", - "-DTF_LEAN_BINARY", - "-O2", - "-Wno-narrowing", - "-fomit-frame-pointer", - ], - clean_dep("//tensorflow:darwin"): [], - clean_dep("//tensorflow:windows"): WIN_COPTS, - clean_dep("//tensorflow:windows_msvc"): WIN_COPTS, - clean_dep("//tensorflow:ios"): ["-std=c++11"], - "//conditions:default": ["-pthread"] + return ( + if_not_windows([ + "-DEIGEN_AVOID_STL_ARRAY", + "-Iexternal/gemmlowp", + "-Wno-sign-compare", + "-fno-exceptions", + "-ftemplate-depth=900"]) + + if_cuda(["-DGOOGLE_CUDA=1"]) + + if_mkl(["-DINTEL_MKL=1", "-DEIGEN_USE_VML", "-fopenmp",]) + + if_android_arm(["-mfpu=neon"]) + + if_linux_x86_64(["-msse3"]) + + select({ + clean_dep("//tensorflow:android"): [ + "-std=c++11", + "-DTF_LEAN_BINARY", + "-O2", + "-Wno-narrowing", + "-fomit-frame-pointer", + ], + clean_dep("//tensorflow:darwin"): [], + clean_dep("//tensorflow:windows"): WIN_COPTS, + clean_dep("//tensorflow:windows_msvc"): WIN_COPTS, + clean_dep("//tensorflow:ios"): ["-std=c++11"], + "//conditions:default": ["-pthread"] })) diff --git a/tensorflow/tools/ci_build/ci_sanity.sh b/tensorflow/tools/ci_build/ci_sanity.sh index cff672c9df..404a9a6b62 100755 --- a/tensorflow/tools/ci_build/ci_sanity.sh +++ b/tensorflow/tools/ci_build/ci_sanity.sh @@ -403,7 +403,7 @@ cmd_status(){ # Run bazel build --nobuild to test the validity of the BUILD files # TODO(mikecase): Remove TF Lite exclusion from this list. Exclusion is # necessary since the @androidsdk WORKSPACE dependency is commented -# commented out by default in TF WORKSPACE file. +# out by default in TF WORKSPACE file. do_bazel_nobuild() { BUILD_TARGET="//tensorflow/..." BUILD_TARGET="${BUILD_TARGET} -//tensorflow/contrib/lite/java/demo/app/src/main/..." diff --git a/tools/bazel.rc b/tools/bazel.rc index 414ddf2e47..ac6766b11b 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -21,13 +21,5 @@ build --define=use_fast_cpp_protos=true build --define=allow_oversize_protos=true build --spawn_strategy=standalone -test --spawn_strategy=standalone -run --spawn_strategy=standalone - build --genrule_strategy=standalone -test --genrule_strategy=standalone -run --genrule_strategy=standalone - build -c opt -test -c opt -run -c opt -- GitLab From 83f20f4586d05f583229b2339799e23d9fc42586 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 14:25:27 -0800 Subject: [PATCH 0285/1801] Internal Change PiperOrigin-RevId: 175335782 --- tensorflow/contrib/lite/java/BUILD | 54 ------------------- .../contrib/lite/java/src/main/native/BUILD | 40 +++++++++++++- 2 files changed, 39 insertions(+), 55 deletions(-) diff --git a/tensorflow/contrib/lite/java/BUILD b/tensorflow/contrib/lite/java/BUILD index 80e8bd435e..e31e3abafe 100644 --- a/tensorflow/contrib/lite/java/BUILD +++ b/tensorflow/contrib/lite/java/BUILD @@ -76,60 +76,6 @@ java_test( ], ) -java_test( - name = "NativeInterpreterWrapperTest", - size = "small", - srcs = ["src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java"], - data = [ - "src/testdata/add.bin", - "src/testdata/int32.bin", - "src/testdata/int64.bin", - "src/testdata/invalid.model.tflite", - "src/testdata/uint8.bin", - ], - javacopts = JAVACOPTS, - test_class = "org.tensorflow.lite.NativeInterpreterWrapperTest", - deps = [ - ":tensorflowlitelib", - "@com_google_truth", - "@junit", - ], -) - -# TODO: generate large models at runtime, instead of storing them. -java_test( - name = "InterpreterTest", - size = "small", - srcs = ["src/test/java/org/tensorflow/lite/InterpreterTest.java"], - data = [ - "src/testdata/add.bin", - "src/testdata/mobilenet.tflite.bin", - ], - javacopts = JAVACOPTS, - test_class = "org.tensorflow.lite.InterpreterTest", - deps = [ - ":tensorflowlitelib", - "@com_google_truth", - "@junit", - ], -) - -java_test( - name = "TensorTest", - size = "small", - srcs = ["src/test/java/org/tensorflow/lite/TensorTest.java"], - data = [ - "src/testdata/add.bin", - ], - javacopts = JAVACOPTS, - test_class = "org.tensorflow.lite.TensorTest", - deps = [ - ":tensorflowlitelib", - "@com_google_truth", - "@junit", - ], -) - filegroup( name = "libtensorflowlite_jni", srcs = select({ diff --git a/tensorflow/contrib/lite/java/src/main/native/BUILD b/tensorflow/contrib/lite/java/src/main/native/BUILD index 9c172a1f68..15806d57c8 100644 --- a/tensorflow/contrib/lite/java/src/main/native/BUILD +++ b/tensorflow/contrib/lite/java/src/main/native/BUILD @@ -15,7 +15,15 @@ cc_library( "nativeinterpreterwrapper_jni.cc", "tensor_jni.cc", "tensorflow_lite_jni.cc", - ], + ] + select({ + # The Android toolchain makes "jni.h" available in the include path. + # For non-Android toolchains, generate jni.h and jni_md.h. + "//tensorflow:android": [], + "//conditions:default": [ + ":jni.h", + ":jni_md.h", + ], + }), hdrs = [ "exception_jni.h", "nativeinterpreterwrapper_jni.h", @@ -23,6 +31,10 @@ cc_library( "tensorflow_lite_jni.h", ], copts = tflite_copts(), + includes = select({ + "//tensorflow:android": [], + "//conditions:default": ["."], + }), linkopts = [ "-lm", "-ldl", @@ -35,6 +47,32 @@ cc_library( alwayslink = 1, ) +# Silly rules to make +# #include +# in the source headers work +# (in combination with the "includes" attribute of the tf_cuda_library rule +# above. Not needed when using the Android toolchain). +# +# Inspired from: +# https://github.com/bazelbuild/bazel/blob/f99a0543f8d97339d32075c7176b79f35be84606/src/main/native/BUILD +# but hopefully there is a simpler alternative to this. +genrule( + name = "copy_jni_h", + srcs = ["@bazel_tools//tools/jdk:jni_header"], + outs = ["jni.h"], + cmd = "cp -f $< $@", +) + +genrule( + name = "copy_jni_md_h", + srcs = select({ + "//tensorflow:darwin": ["@bazel_tools//tools/jdk:jni_md_header-darwin"], + "//conditions:default": ["@bazel_tools//tools/jdk:jni_md_header-linux"], + }), + outs = ["jni_md.h"], + cmd = "cp -f $< $@", +) + # This includes all ops. If you want a smaller binary, you should copy and # modify builtin_ops_jni.cc. You should then link your binary against both # ":native_framework_only" and your own version of ":native_builtin_ops". -- GitLab From dd58a908333de37b09372acb80c0c412d90a7125 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 10 Nov 2017 14:29:32 -0800 Subject: [PATCH 0286/1801] [tf.data] More actionable error message in `Dataset.make_one_shot_iterator()`. Point users to `Dataset.make_initializable_iterator()` when using stateful resources in their pipeline. PiperOrigin-RevId: 175337182 --- tensorflow/python/data/ops/dataset_ops.py | 16 ++++++++++++++-- .../python/kernel_tests/iterator_ops_test.py | 10 ++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 343f316281..09f4349cf3 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -22,6 +22,7 @@ import collections import threading import numpy as np +import six from tensorflow.python.data.ops import iterator_ops from tensorflow.python.data.util import nest @@ -105,7 +106,7 @@ class Dataset(object): def make_one_shot_iterator(self): """Creates an `Iterator` for enumerating the elements of this dataset. - **N.B.** The returned iterator will be initialized automatically. + Note: The returned iterator will be initialized automatically. A "one-shot" iterator does not currently support re-initialization. Returns: @@ -124,7 +125,18 @@ class Dataset(object): def _make_dataset(): return self._as_variant_tensor() # pylint: disable=protected-access - _make_dataset.add_to_graph(ops.get_default_graph()) + try: + _make_dataset.add_to_graph(ops.get_default_graph()) + except ValueError as err: + if "Cannot capture a stateful node" in str(err): + raise ValueError( + "Failed to create a one-shot iterator for a dataset. " + "`Dataset.make_one_shot_iterator()` does not support datasets that " + "capture stateful objects, such as a `Variable` or `LookupTable`. " + "In these cases, use `Dataset.make_initializable_iterator()`. " + "(Original error: %s)" % err) + else: + six.reraise(ValueError, err) return iterator_ops.Iterator( gen_dataset_ops.one_shot_iterator( diff --git a/tensorflow/python/kernel_tests/iterator_ops_test.py b/tensorflow/python/kernel_tests/iterator_ops_test.py index 2128ef4ae1..b198fa1754 100644 --- a/tensorflow/python/kernel_tests/iterator_ops_test.py +++ b/tensorflow/python/kernel_tests/iterator_ops_test.py @@ -39,6 +39,7 @@ from tensorflow.python.ops import io_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import script_ops +from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.training import server_lib @@ -58,6 +59,15 @@ class IteratorTest(test.TestCase): with self.assertRaisesRegexp(LookupError, "No gradient defined"): gradients_impl.gradients(value, [component, side]) + def testCapturingStateInOneShotRaisesException(self): + var = variables.Variable(37.0, name="myvar") + dataset = (dataset_ops.Dataset.from_tensor_slices([0.0, 1.0, 2.0]) + .map(lambda x: x + var)) + with self.assertRaisesRegexp( + ValueError, r"`Dataset.make_one_shot_iterator\(\)` does not support " + "datasets that capture stateful objects.+myvar"): + dataset.make_one_shot_iterator() + def testOneShotIterator(self): components = (np.arange(7), np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis], -- GitLab From 1ddff94002e037fce0fd15c62f4c6090aeb5dce4 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Fri, 10 Nov 2017 14:38:20 -0800 Subject: [PATCH 0287/1801] [TF:XLA] Add a Cholesky decomposition implementation. Currently the implementation is fully unrolled, which can cause code size blowups at large matrix sizes. We can explore reducing code size in a subsequent change. Create a new directory tensorflow/compiler/tf2xla/lib of XLA utility functions. Move batch matmul implementation into the utility directory. Add helpers for batch matmul, triangular solve, and Cholesky decomposition. PiperOrigin-RevId: 175338698 --- tensorflow/BUILD | 1 + tensorflow/compiler/tests/BUILD | 15 ++ tensorflow/compiler/tests/cholesky_op_test.py | 126 +++++++++++++ tensorflow/compiler/tf2xla/BUILD | 1 + tensorflow/compiler/tf2xla/kernels/BUILD | 4 + .../tf2xla/kernels/batch_matmul_op.cc | 114 +----------- .../compiler/tf2xla/kernels/cholesky_op.cc | 39 ++++ tensorflow/compiler/tf2xla/lib/BUILD | 120 ++++++++++++ tensorflow/compiler/tf2xla/lib/batch_dot.cc | 154 +++++++++++++++ tensorflow/compiler/tf2xla/lib/batch_dot.h | 51 +++++ tensorflow/compiler/tf2xla/lib/cholesky.cc | 166 +++++++++++++++++ tensorflow/compiler/tf2xla/lib/cholesky.h | 38 ++++ .../compiler/tf2xla/lib/triangular_solve.cc | 175 ++++++++++++++++++ .../compiler/tf2xla/lib/triangular_solve.h | 46 +++++ .../tf2xla/lib/triangular_solve_test.cc | 69 +++++++ tensorflow/compiler/tf2xla/lib/util.cc | 107 +++++++++++ tensorflow/compiler/tf2xla/lib/util.h | 54 ++++++ tensorflow/compiler/tf2xla/xla_helpers.cc | 19 +- tensorflow/compiler/xla/tests/test_macros.h | 6 +- 19 files changed, 1177 insertions(+), 128 deletions(-) create mode 100644 tensorflow/compiler/tests/cholesky_op_test.py create mode 100644 tensorflow/compiler/tf2xla/kernels/cholesky_op.cc create mode 100644 tensorflow/compiler/tf2xla/lib/BUILD create mode 100644 tensorflow/compiler/tf2xla/lib/batch_dot.cc create mode 100644 tensorflow/compiler/tf2xla/lib/batch_dot.h create mode 100644 tensorflow/compiler/tf2xla/lib/cholesky.cc create mode 100644 tensorflow/compiler/tf2xla/lib/cholesky.h create mode 100644 tensorflow/compiler/tf2xla/lib/triangular_solve.cc create mode 100644 tensorflow/compiler/tf2xla/lib/triangular_solve.h create mode 100644 tensorflow/compiler/tf2xla/lib/triangular_solve_test.cc create mode 100644 tensorflow/compiler/tf2xla/lib/util.cc create mode 100644 tensorflow/compiler/tf2xla/lib/util.h diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 82a57ac185..95be0bc8df 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -403,6 +403,7 @@ filegroup( "//tensorflow/compiler/tf2xla:all_files", "//tensorflow/compiler/tf2xla/cc:all_files", "//tensorflow/compiler/tf2xla/kernels:all_files", + "//tensorflow/compiler/tf2xla/lib:all_files", "//tensorflow/compiler/tf2xla/ops:all_files", "//tensorflow/compiler/xla:all_files", "//tensorflow/compiler/xla/client:all_files", diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 21b8823944..c372e05474 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -129,6 +129,21 @@ tf_xla_py_test( ], ) +tf_xla_py_test( + name = "cholesky_op_test", + size = "small", + srcs = ["cholesky_op_test.py"], + tags = ["optonly"], + deps = [ + ":xla_test", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:training", + ], +) + tf_xla_py_test( name = "clustering_test", size = "small", diff --git a/tensorflow/compiler/tests/cholesky_op_test.py b/tensorflow/compiler/tests/cholesky_op_test.py new file mode 100644 index 0000000000..5010fe5e21 --- /dev/null +++ b/tensorflow/compiler/tests/cholesky_op_test.py @@ -0,0 +1,126 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for tensorflow.ops.tf.Cholesky.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import unittest + +import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin + +from tensorflow.compiler.tests.xla_test import XLATestCase +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import linalg_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import test + + +class CholeskyOpTest(XLATestCase): + + def _verifyCholeskyBase(self, sess, placeholder, x, chol, verification, atol): + chol_np, verification_np = sess.run([chol, verification], {placeholder: x}) + self.assertAllClose(x, verification_np, atol=atol) + self.assertShapeEqual(x, chol) + # Check that the cholesky is lower triangular, and has positive diagonal + # elements. + if chol_np.shape[-1] > 0: + chol_reshaped = np.reshape(chol_np, (-1, chol_np.shape[-2], + chol_np.shape[-1])) + for chol_matrix in chol_reshaped: + self.assertAllClose(chol_matrix, np.tril(chol_matrix), atol=atol) + self.assertTrue((np.diag(chol_matrix) > 0.0).all()) + + def _verifyCholesky(self, x, atol=1e-6): + # Verify that LL^T == x. + with self.test_session() as sess: + placeholder = array_ops.placeholder( + dtypes.as_dtype(x.dtype), shape=x.shape) + with self.test_scope(): + chol = linalg_ops.cholesky(placeholder) + verification = math_ops.matmul(chol, chol, adjoint_b=True) + self._verifyCholeskyBase(sess, placeholder, x, chol, verification, atol) + + def testBasic(self): + data = np.array([[4., -1., 2.], [-1., 6., 0], [2., 0., 5.]]) + for dtype in self.float_types: + self._verifyCholesky(data.astype(dtype)) + + def testBatch(self): + for dtype in self.float_types: + simple_array = np.array( + [[[1., 0.], [0., 5.]]], dtype=dtype) # shape (1, 2, 2) + self._verifyCholesky(simple_array) + self._verifyCholesky(np.vstack((simple_array, simple_array))) + odd_sized_array = np.array( + [[[4., -1., 2.], [-1., 6., 0], [2., 0., 5.]]], dtype=dtype) + self._verifyCholesky(np.vstack((odd_sized_array, odd_sized_array))) + + # Generate random positive-definite matrices. + matrices = np.random.rand(10, 5, 5).astype(dtype) + for i in xrange(10): + matrices[i] = np.dot(matrices[i].T, matrices[i]) + self._verifyCholesky(matrices, atol=1e-4) + + def testNonSquareMatrix(self): + for dtype in self.float_types: + with self.assertRaises(ValueError): + linalg_ops.cholesky(np.array([[1., 2., 3.], [3., 4., 5.]], dtype=dtype)) + with self.assertRaises(ValueError): + linalg_ops.cholesky( + np.array( + [[[1., 2., 3.], [3., 4., 5.]], [[1., 2., 3.], [3., 4., 5.]]], + dtype=dtype)) + + def testWrongDimensions(self): + for dtype in self.float_types: + tensor3 = constant_op.constant([1., 2.], dtype=dtype) + with self.assertRaises(ValueError): + linalg_ops.cholesky(tensor3) + with self.assertRaises(ValueError): + linalg_ops.cholesky(tensor3) + + @unittest.skip("Test is slow") + def testLarge(self): + n = 200 + shape = (n, n) + data = np.ones(shape).astype(np.float32) / (2.0 * n) + np.diag( + np.ones(n).astype(np.float32)) + self._verifyCholesky(data, atol=1e-4) + + def testMatrixConditionNumbers(self): + for dtype in self.float_types: + condition_number = 1000 + size = 20 + + # Generate random positive-definite symmetric matrices, and take their + # Eigendecomposition. + matrix = np.random.rand(size, size) + matrix = np.dot(matrix.T, matrix) + _, w = np.linalg.eigh(matrix) + + # Build new Eigenvalues exponentially distributed between 1 and + # 1/condition_number + v = np.exp(-np.log(condition_number) * np.linspace(0, size, size) / size) + matrix = np.dot(np.dot(w, np.diag(v)), w.T).astype(dtype) + self._verifyCholesky(matrix, atol=1e-4) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index 912e819d8d..376c8108ed 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -125,6 +125,7 @@ cc_library( ":functionalize_control_flow", ":sharding_util", ":tf2xla_util", + "//tensorflow/compiler/tf2xla/lib:util", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:statusor", diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index 13d06177f0..948d7f0b40 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -19,6 +19,7 @@ tf_kernel_library( "binary_ops.cc", "cast_op.cc", "categorical_op.cc", + "cholesky_op.cc", "concat_op.cc", "const_op.cc", "conv_ops.cc", @@ -81,6 +82,8 @@ tf_kernel_library( ":while_op", "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla/lib:batch_dot", + "//tensorflow/compiler/tf2xla/lib:cholesky", "//tensorflow/compiler/tf2xla/ops:sendrecv_ops", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", @@ -91,6 +94,7 @@ tf_kernel_library( "//tensorflow/compiler/xla/client/lib:arithmetic", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core:linalg_ops_op_lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/kernels:bounds_check", "//tensorflow/core/kernels:concat_lib", diff --git a/tensorflow/compiler/tf2xla/kernels/batch_matmul_op.cc b/tensorflow/compiler/tf2xla/kernels/batch_matmul_op.cc index 73ccc151c1..a015b8e0e8 100644 --- a/tensorflow/compiler/tf2xla/kernels/batch_matmul_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/batch_matmul_op.cc @@ -13,11 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// XLA-specific BatchMatMul Op. -// The current implementation simply unrolls the computation along the batch -// dimension. -// TODO(dominikg,phawkins): Use a real batched matmul instead of unrolling. - +#include "tensorflow/compiler/tf2xla/lib/batch_dot.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" @@ -32,110 +28,10 @@ class BatchMatMulOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - const TensorShape x_shape = ctx->InputShape(0); - const TensorShape y_shape = ctx->InputShape(1); - - // Check that both tensors have the same number of dimensions. There must be - // at least two (the batch dimensions can be empty). - OP_REQUIRES(ctx, x_shape.dims() == y_shape.dims(), - errors::InvalidArgument("In[0] and In[1] has different ndims: ", - x_shape.DebugString(), " vs. ", - y_shape.DebugString())); - const int ndims = x_shape.dims(); - OP_REQUIRES( - ctx, ndims >= 2, - errors::InvalidArgument("In[0] and In[1] ndims must be >= 2: ", ndims)); - - // The batch dimensions must be equal and the matrix dimensions must be - // valid. - std::vector dimensions; - int batch_count = 1; - for (int i = 0; i < ndims - 2; ++i) { - OP_REQUIRES( - ctx, x_shape.dim_size(i) == y_shape.dim_size(i), - errors::InvalidArgument("In[0].dim(", i, ") and In[1].dim(", i, - ") must be the same: ", x_shape.DebugString(), - " vs ", y_shape.DebugString())); - dimensions.push_back(x_shape.dim_size(i)); - batch_count *= x_shape.dim_size(i); - } - - int x_inner_dim = adj_x_ ? (ndims - 2) : (ndims - 1); - int y_inner_dim = adj_y_ ? (ndims - 1) : (ndims - 2); - OP_REQUIRES( - ctx, x_shape.dim_size(x_inner_dim) == y_shape.dim_size(y_inner_dim), - errors::InvalidArgument( - "In[0] mismatch In[1] shape: ", x_shape.dim_size(x_inner_dim), - " vs. ", y_shape.dim_size(y_inner_dim), ": ", x_shape.DebugString(), - " ", y_shape.DebugString(), " ", adj_x_, " ", adj_y_)); - - int x_outer_dim = adj_x_ ? (ndims - 1) : (ndims - 2); - int y_outer_dim = adj_y_ ? (ndims - 2) : (ndims - 1); - dimensions.push_back(x_shape.dim_size(x_outer_dim)); - dimensions.push_back(y_shape.dim_size(y_outer_dim)); - - xla::ComputationBuilder* builder = ctx->builder(); - - xla::ComputationDataHandle x_handle = ctx->Input(0); - if (BaseType(input_type(0)) == DT_COMPLEX64 && adj_x_) { - x_handle = builder->Conj(x_handle); - } - xla::ComputationDataHandle y_handle = ctx->Input(1); - if (BaseType(input_type(1)) == DT_COMPLEX64 && adj_y_) { - y_handle = builder->Conj(y_handle); - } - - // Reshape input tensors into 3D tensors by flattening the batch - // dimensions. This makes it easier to unroll the batch dimension. - auto x_flat = - builder->Reshape(x_handle, {batch_count, x_shape.dim_size(ndims - 2), - x_shape.dim_size(ndims - 1)}); - auto y_flat = - builder->Reshape(y_handle, {batch_count, y_shape.dim_size(ndims - 2), - y_shape.dim_size(ndims - 1)}); - - // Slice batches into individual matrices and multiply them. - std::vector out_slices; - for (int i = 0; i < batch_count; ++i) { - // Slice off individual matrices and reshape to 2D tensors. - auto x_slice = builder->Slice( - x_flat, {i, 0, 0}, - {i + 1, x_shape.dim_size(ndims - 2), x_shape.dim_size(ndims - 1)}, - {1, 1, 1}); - x_slice = builder->Reshape( - x_slice, {x_shape.dim_size(ndims - 2), x_shape.dim_size(ndims - 1)}); - auto y_slice = builder->Slice( - y_flat, {i, 0, 0}, - {i + 1, y_shape.dim_size(ndims - 2), y_shape.dim_size(ndims - 1)}, - {1, 1, 1}); - y_slice = builder->Reshape( - y_slice, {y_shape.dim_size(ndims - 2), y_shape.dim_size(ndims - 1)}); - - // Transpose if needed. - auto lhs = adj_x_ ? builder->Transpose(x_slice, {1, 0}) : x_slice; - auto rhs = adj_y_ ? builder->Transpose(y_slice, {1, 0}) : y_slice; - - // Multiply matrices and add an outer singleton dimension to the output - // so we can concatenate along the flattened batch dimension later. - auto out = builder->Dot(lhs, rhs); - out = builder->Reshape(out, - {1, dimensions[ndims - 2], dimensions[ndims - 1]}); - out_slices.push_back(out); - } - - // Concatenate output slices and reshape to original number of dimensions. - xla::ComputationDataHandle data; - if (out_slices.empty()) { - // It is illegal to pass an empty list to ConcatInDim. - // The batch count is empty, so both inputs must have zero elements. - // Arbitrarily use the left input as the argument to Reshape(). - data = x_handle; - } else { - data = builder->ConcatInDim(out_slices, 0); - } - data = builder->Reshape(data, dimensions); - - ctx->SetOutput(0, data); + auto result = + BatchDot(ctx->builder(), ctx->Input(0), ctx->Input(1), adj_x_, adj_y_); + OP_REQUIRES_OK(ctx, result.status()); + ctx->SetOutput(0, result.ValueOrDie()); } private: diff --git a/tensorflow/compiler/tf2xla/kernels/cholesky_op.cc b/tensorflow/compiler/tf2xla/kernels/cholesky_op.cc new file mode 100644 index 0000000000..87d858f763 --- /dev/null +++ b/tensorflow/compiler/tf2xla/kernels/cholesky_op.cc @@ -0,0 +1,39 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/lib/cholesky.h" +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" + +namespace tensorflow { +namespace { + +class CholeskyOp : public XlaOpKernel { + public: + explicit CholeskyOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} + void Compile(XlaOpKernelContext* ctx) override { + auto result = Cholesky(ctx->builder(), ctx->Input(0)); + if (!result.ok()) { + ctx->SetStatus(result.status()); + return; + } + ctx->SetOutput(0, result.ValueOrDie()); + } +}; + +REGISTER_XLA_OP(Name("Cholesky"), CholeskyOp); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/BUILD b/tensorflow/compiler/tf2xla/lib/BUILD new file mode 100644 index 0000000000..21ad21f737 --- /dev/null +++ b/tensorflow/compiler/tf2xla/lib/BUILD @@ -0,0 +1,120 @@ +# Utilities for building XLA computations. + +licenses(["notice"]) # Apache 2.0 + +package( + default_visibility = ["//tensorflow/compiler/tf2xla:friends"], +) + +# Filegroup used to collect source files for dependency checking. +filegroup( + name = "c_srcs", + data = glob([ + "**/*.cc", + "**/*.h", + ]), +) + +load("//tensorflow/compiler/xla/tests:build_defs.bzl", "xla_test") + +cc_library( + name = "batch_dot", + srcs = ["batch_dot.cc"], + hdrs = ["batch_dot.h"], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/core:lib", + ], +) + +cc_library( + name = "cholesky", + srcs = ["cholesky.cc"], + hdrs = ["cholesky.h"], + deps = [ + ":batch_dot", + ":triangular_solve", + ":util", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/core:lib", + ], +) + +cc_library( + name = "triangular_solve", + srcs = ["triangular_solve.cc"], + hdrs = ["triangular_solve.h"], + deps = [ + ":batch_dot", + ":util", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/core:lib", + ], +) + +xla_test( + name = "triangular_solve_test", + srcs = ["triangular_solve_test.cc"], + deps = [ + ":triangular_solve", + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +cc_library( + name = "util", + srcs = ["util.cc"], + hdrs = ["util.h"], + deps = [ + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/core:lib", + ], +) + +# ----------------------------------------------------------------------------- + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/compiler/tf2xla/lib/batch_dot.cc b/tensorflow/compiler/tf2xla/lib/batch_dot.cc new file mode 100644 index 0000000000..28a5e6a58b --- /dev/null +++ b/tensorflow/compiler/tf2xla/lib/batch_dot.cc @@ -0,0 +1,154 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/lib/batch_dot.h" + +#include +#include + +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace tensorflow { + +// The current implementation simply unrolls the computation along the batch +// dimension. +// TODO(andydavis): add batching support to XLA's Dot operator. +xla::StatusOr BatchDot( + xla::ComputationBuilder* builder, xla::ComputationDataHandle x, + xla::ComputationDataHandle y, bool transpose_x, bool transpose_y) { + TF_ASSIGN_OR_RETURN(std::unique_ptr x_shape, + builder->GetShape(x)); + TF_ASSIGN_OR_RETURN(std::unique_ptr y_shape, + builder->GetShape(y)); + + // Check that both tensors have the same number of dimensions. There must be + // at least two (the batch dimensions can be empty). + if (xla::ShapeUtil::Rank(*x_shape) != xla::ShapeUtil::Rank(*y_shape)) { + return errors::InvalidArgument( + "Arguments to BatchedDot have different ranks: ", + xla::ShapeUtil::HumanString(*x_shape), " vs. ", + xla::ShapeUtil::HumanString(*y_shape)); + } + const int ndims = xla::ShapeUtil::Rank(*x_shape); + if (ndims < 2) { + return errors::InvalidArgument( + "Arguments to BatchedDot must have rank >= 2: ", ndims); + } + + // The batch dimensions must be equal and the matrix dimensions must be + // valid. + std::vector dimensions; + int64 batch_count = 1; + for (int i = 0; i < ndims - 2; ++i) { + int64 x_size = x_shape->dimensions(i); + int64 y_size = y_shape->dimensions(i); + if (x_size != y_size) { + return errors::InvalidArgument( + "Dimension ", i, " of inputs to BatchedDot must be equal: ", + xla::ShapeUtil::HumanString(*x_shape), " vs ", + xla::ShapeUtil::HumanString(*y_shape)); + } + dimensions.push_back(x_size); + batch_count *= x_size; + } + + int x_inner_dim = transpose_x ? (ndims - 2) : (ndims - 1); + int y_inner_dim = transpose_y ? (ndims - 1) : (ndims - 2); + int64 x_inner_dim_size = x_shape->dimensions(x_inner_dim); + int64 y_inner_dim_size = y_shape->dimensions(y_inner_dim); + if (x_inner_dim_size != y_inner_dim_size) { + return errors::InvalidArgument( + "Dimensions ", x_inner_dim, " and ", y_inner_dim, + " of arguments to BatchedDot must be equal: ", + xla::ShapeUtil::HumanString(*x_shape), " transpose: ", transpose_x, + " vs. ", xla::ShapeUtil::HumanString(*y_shape), + " transpose: ", transpose_y); + } + + // If there are no batch dimensions, use a regular Dot. This case exists + // to improve the readability of the emitted graphs. + if (dimensions.empty()) { + auto lhs = transpose_x ? builder->Transpose(x, {1, 0}) : x; + auto rhs = transpose_y ? builder->Transpose(y, {1, 0}) : y; + return builder->Dot(lhs, rhs); + } + + int x_outer_dim = transpose_x ? (ndims - 1) : (ndims - 2); + int y_outer_dim = transpose_y ? (ndims - 2) : (ndims - 1); + dimensions.push_back(x_shape->dimensions(x_outer_dim)); + dimensions.push_back(y_shape->dimensions(y_outer_dim)); + + if (x_shape->element_type() == xla::C64 && transpose_x) { + x = builder->Conj(x); + } + if (y_shape->element_type() == xla::C64 && transpose_y) { + y = builder->Conj(y); + } + + // Reshape input tensors into 3D tensors by flattening the batch + // dimensions. This makes it easier to unroll the batch dimension. + auto x_flat = + builder->Reshape(x, {batch_count, x_shape->dimensions(ndims - 2), + x_shape->dimensions(ndims - 1)}); + auto y_flat = + builder->Reshape(y, {batch_count, y_shape->dimensions(ndims - 2), + y_shape->dimensions(ndims - 1)}); + + // Slice batches into individual matrices and multiply them. + std::vector out_slices; + for (int64 i = 0; i < batch_count; ++i) { + // Slice off individual matrices and reshape to 2D tensors. + auto x_slice = builder->Slice( + x_flat, {i, 0, 0}, + {i + 1, x_shape->dimensions(ndims - 2), x_shape->dimensions(ndims - 1)}, + {1, 1, 1}); + x_slice = builder->Reshape(x_slice, {x_shape->dimensions(ndims - 2), + x_shape->dimensions(ndims - 1)}); + auto y_slice = builder->Slice( + y_flat, {i, 0, 0}, + {i + 1, y_shape->dimensions(ndims - 2), y_shape->dimensions(ndims - 1)}, + {1, 1, 1}); + y_slice = builder->Reshape(y_slice, {y_shape->dimensions(ndims - 2), + y_shape->dimensions(ndims - 1)}); + + // Transpose if needed. + auto lhs = transpose_x ? builder->Transpose(x_slice, {1, 0}) : x_slice; + auto rhs = transpose_y ? builder->Transpose(y_slice, {1, 0}) : y_slice; + + // Multiply matrices and add an outer singleton dimension to the output + // so we can concatenate along the flattened batch dimension later. + auto out = builder->Dot(lhs, rhs); + out = builder->Reshape(out, + {1, dimensions[ndims - 2], dimensions[ndims - 1]}); + out_slices.push_back(out); + } + + // Concatenate output slices and reshape to original number of dimensions. + xla::ComputationDataHandle data; + if (out_slices.empty()) { + // It is illegal to pass an empty list to ConcatInDim. + // The batch count is empty, so both inputs must have zero elements. + // Arbitrarily use the left input as the argument to Reshape(). + data = x; + } else { + data = builder->ConcatInDim(out_slices, 0); + } + return builder->Reshape(data, dimensions); +} + +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/batch_dot.h b/tensorflow/compiler/tf2xla/lib/batch_dot.h new file mode 100644 index 0000000000..b46bc7417d --- /dev/null +++ b/tensorflow/compiler/tf2xla/lib/batch_dot.h @@ -0,0 +1,51 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_TF2XLA_LIB_BATCH_DOT_H_ +#define TENSORFLOW_COMPILER_TF2XLA_LIB_BATCH_DOT_H_ + +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" + +namespace tensorflow { + +// Multiplies slices of two tensors in batches. + +// Multiplies all slices of `Tensor` `x` and `y` (each slice can be +// viewed as an element of a batch), and arranges the individual results +// in a single output tensor of the same batch size. Each of the +// individual slices can optionally be transposed before multiplication by +// setting the `transpose_x` or `transpose_y` flag to `true`. +// +// The input tensors `x` and `y` are 2-D or higher with shape `[..., r_x, c_x]` +// and `[..., r_y, c_y]`. +// +// The output tensor is 2-D or higher with shape `[..., r_o, c_o]`, where: +// +// r_o = c_x if transpose_x else r_x +// c_o = r_y if transpose_y else c_y +// +// It is computed as: +// +// output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :]) +// TODO(phawkins): add an option to take the complex conjugate of the LHS or +// RHS. +xla::StatusOr BatchDot( + xla::ComputationBuilder* builder, xla::ComputationDataHandle x, + xla::ComputationDataHandle y, bool transpose_x, bool transpose_y); + +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_TF2XLA_LIB_BATCH_DOT_H_ diff --git a/tensorflow/compiler/tf2xla/lib/cholesky.cc b/tensorflow/compiler/tf2xla/lib/cholesky.cc new file mode 100644 index 0000000000..b3cc489adf --- /dev/null +++ b/tensorflow/compiler/tf2xla/lib/cholesky.cc @@ -0,0 +1,166 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/lib/cholesky.h" + +#include +#include + +#include "tensorflow/compiler/tf2xla/lib/batch_dot.h" +#include "tensorflow/compiler/tf2xla/lib/triangular_solve.h" +#include "tensorflow/compiler/tf2xla/lib/util.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace tensorflow { + +namespace { + +// def cholesky_unblocked(a): +// assert len(a.shape) == 2 and a.shape[-2] == a.shape[-1] +// n = a.shape[-2] +// l = np.zeros_like(a) +// for j in xrange(n): +// r = l[..., j, :j] +// l[..., j, j] = np.sqrt(a[..., j, j] - np.dot(r, r)) +// l[..., j+1:, j] = (a[..., j+1:, j] - np.dot(l[..., j+1:, :j], +// np.transpose(r))) / l[..., j, j] +// return l +xla::StatusOr CholeskyUnblocked( + xla::ComputationBuilder* builder, const xla::ComputationDataHandle& a) { + TF_ASSIGN_OR_RETURN(std::unique_ptr shape, builder->GetShape(a)); + xla::ComputationDataHandle l = Zeros(builder, *shape); + const int64 n = xla::ShapeUtil::GetDimension(*shape, -2); + for (int j = 0; j < n; ++j) { + // Picture of block structure: + // ... \ + // \ + // -- r -- d + // |\ + // B c \ + // | \ + // | ... + // + // ^ + // column j + TF_ASSIGN_OR_RETURN(auto d, + SliceInMinorDims(builder, a, {j, j}, {j + 1, j + 1})); + TF_ASSIGN_OR_RETURN(auto c, + SliceInMinorDims(builder, a, {j + 1, j}, {n, j + 1})); + xla::ComputationDataHandle new_d_squared = d; + xla::ComputationDataHandle br; + if (j > 0) { + TF_ASSIGN_OR_RETURN(auto r, + SliceInMinorDims(builder, l, {j, 0}, {j + 1, j})); + TF_ASSIGN_OR_RETURN(auto b, + SliceInMinorDims(builder, l, {j + 1, 0}, {n, j})); + TF_ASSIGN_OR_RETURN(auto r_squared, + BatchDot(builder, r, r, /*transpose_x=*/false, + /*transpose_y=*/true)); + new_d_squared = builder->Sub(new_d_squared, r_squared); + + TF_ASSIGN_OR_RETURN(br, BatchDot(builder, b, r, /*transpose_x=*/false, + /*transpose_y=*/true)); + } + auto new_d_inv = builder->Pow( + new_d_squared, FloatLiteral(builder, shape->element_type(), -0.5)); + auto new_d = builder->Mul(new_d_inv, new_d_squared); + TF_ASSIGN_OR_RETURN(l, UpdateSliceInMinorDims(builder, l, new_d, {j, j})); + + if (j > 0) { + c = builder->Sub(c, br); + } + auto new_c = builder->Mul(c, new_d_inv); + TF_ASSIGN_OR_RETURN(l, + UpdateSliceInMinorDims(builder, l, new_c, {j + 1, j})); + } + return l; +} + +} // namespace + +xla::StatusOr Cholesky( + xla::ComputationBuilder* builder, xla::ComputationDataHandle a, + int64 block_size) { + TF_ASSIGN_OR_RETURN(std::unique_ptr a_shape, + builder->GetShape(a)); + const int ndims = xla::ShapeUtil::Rank(*a_shape); + if (ndims < 2) { + return errors::InvalidArgument( + "Arguments to Cholesky must have rank >= 2: ", ndims); + } + + const int64 n = xla::ShapeUtil::GetDimension(*a_shape, -1); + if (n != xla::ShapeUtil::GetDimension(*a_shape, -2)) { + return errors::InvalidArgument( + "Arguments to Cholesky must be square matrices: ", + xla::ShapeUtil::HumanString(*a_shape)); + } + + if (block_size < 1) { + return errors::InvalidArgument( + "block_size argument to Cholesky must be >= 1; got ", block_size); + } + + // Blocked left-looking Cholesky factorization. + // Algorithm 1 from + // Haidar, Azzam, et al. "High-performance Cholesky factorization for GPU-only + // execution." Proceedings of General Purpose GPUs. ACM, 2017. + xla::ComputationDataHandle l = Zeros(builder, *a_shape); + for (int64 i = 0; i < n; i += block_size) { + int64 k = std::min(block_size, n - i); + if (i > 0) { + // TODO(phawkins): consider implementing SYRK for the diagonal part of + // the panel. + // a[i:, i:i+k] -= np.dot(l[i:, :i], np.transpose(l[i:i+k, :i])) + TF_ASSIGN_OR_RETURN(auto lhs, + SliceInMinorDims(builder, l, {i, 0}, {n, i})); + TF_ASSIGN_OR_RETURN(auto rhs, + SliceInMinorDims(builder, l, {i, 0}, {i + k, i})); + TF_ASSIGN_OR_RETURN(auto delta, + BatchDot(builder, lhs, rhs, /*transpose_x=*/false, + /*transpose_y=*/true)); + TF_ASSIGN_OR_RETURN(auto before, + SliceInMinorDims(builder, a, {i, i}, {n, i + k})); + TF_ASSIGN_OR_RETURN( + a, UpdateSliceInMinorDims(builder, a, builder->Sub(before, delta), + {i, i})); + } + + // l[i:i+k, i:i+k] = cholesky_unblocked(a[i:i+k, i:i+k]) + TF_ASSIGN_OR_RETURN(auto x, + SliceInMinorDims(builder, a, {i, i}, {i + k, i + k})); + TF_ASSIGN_OR_RETURN(auto factorized, CholeskyUnblocked(builder, x)); + TF_ASSIGN_OR_RETURN(l, + UpdateSliceInMinorDims(builder, l, factorized, {i, i})); + + if (i + k < n) { + // l[i+k:, i:i+k] = trsm_right_transpose(l[i:i+k, i:i+k], a[i+k:, i:i+k]) + TF_ASSIGN_OR_RETURN(auto panel, + SliceInMinorDims(builder, a, {i + k, i}, {n, i + k})); + TF_ASSIGN_OR_RETURN(auto update, + TriangularSolve(builder, factorized, panel, + /*block_size=*/8)); + TF_ASSIGN_OR_RETURN( + l, UpdateSliceInMinorDims(builder, l, update, {i + k, i})); + } + } + return l; +} + +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/cholesky.h b/tensorflow/compiler/tf2xla/lib/cholesky.h new file mode 100644 index 0000000000..2bead7359b --- /dev/null +++ b/tensorflow/compiler/tf2xla/lib/cholesky.h @@ -0,0 +1,38 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_TF2XLA_LIB_CHOLESKY_H_ +#define TENSORFLOW_COMPILER_TF2XLA_LIB_CHOLESKY_H_ + +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" + +namespace tensorflow { + +// Computes the Cholesky decompositions of a batch of symmetric positive +// definite matrices. +// `a` must be a (batched) square matrix; i.e., it must have rank >= 2 with the +// two minor dimensions equal. +// The algorithm implements a blocked Cholesky decomposition; `block_size` is +// the block size to use. +// TODO(phawkins): check for negative values on the diagonal and return an +// error, instead of silently yielding NaNs. +xla::StatusOr Cholesky( + xla::ComputationBuilder* builder, xla::ComputationDataHandle a, + int64 block_size = 256); + +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_TF2XLA_LIB_CHOLESKY_H_ diff --git a/tensorflow/compiler/tf2xla/lib/triangular_solve.cc b/tensorflow/compiler/tf2xla/lib/triangular_solve.cc new file mode 100644 index 0000000000..579944c3a3 --- /dev/null +++ b/tensorflow/compiler/tf2xla/lib/triangular_solve.cc @@ -0,0 +1,175 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/lib/triangular_solve.h" + +#include +#include + +#include "tensorflow/compiler/tf2xla/lib/batch_dot.h" +#include "tensorflow/compiler/tf2xla/lib/util.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace tensorflow { + +xla::StatusOr TriangularSolve( + xla::ComputationBuilder* builder, const xla::ComputationDataHandle& a, + xla::ComputationDataHandle b, int64 block_size) { + TF_ASSIGN_OR_RETURN(std::unique_ptr a_shape, + builder->GetShape(a)); + TF_ASSIGN_OR_RETURN(std::unique_ptr b_shape, + builder->GetShape(b)); + if (xla::ShapeUtil::Rank(*a_shape) != xla::ShapeUtil::Rank(*b_shape)) { + return errors::InvalidArgument( + "Arguments to TriangularSolve have different ranks: ", + xla::ShapeUtil::HumanString(*a_shape), " vs. ", + xla::ShapeUtil::HumanString(*b_shape)); + } + const int ndims = xla::ShapeUtil::Rank(*a_shape); + if (ndims < 2) { + return errors::InvalidArgument( + "Arguments to TriangularSolve must have rank >= 2: ", ndims); + } + // The batch dimensions must be equal. + std::vector batch_dimensions; + for (int i = 0; i < ndims - 2; ++i) { + int64 a_size = a_shape->dimensions(i); + int64 b_size = b_shape->dimensions(i); + if (a_size != b_size) { + return errors::InvalidArgument( + "Batch dimensions of arguments to TriangularSolve must be equal: ", + xla::ShapeUtil::HumanString(*a_shape), " vs ", + xla::ShapeUtil::HumanString(*b_shape)); + } + batch_dimensions.push_back(a_size); + } + + const int64 n = xla::ShapeUtil::GetDimension(*a_shape, -1); + const int64 m = xla::ShapeUtil::GetDimension(*b_shape, -2); + if (n != xla::ShapeUtil::GetDimension(*a_shape, -2)) { + return errors::InvalidArgument( + "The 'a' arguments to TriangularSolve must be square matrices: ", + xla::ShapeUtil::HumanString(*a_shape)); + } + if (n != xla::ShapeUtil::GetDimension(*b_shape, -1)) { + return errors::InvalidArgument( + "Arguments to TriangularSolve have incompatible matrix shapes: ", + xla::ShapeUtil::HumanString(*a_shape), " vs ", + xla::ShapeUtil::HumanString(*b_shape)); + } + + if (block_size < 1) { + return errors::InvalidArgument( + "block_size argument to TriangularSolve must be >= 1; got ", + block_size); + } + + // Returns [b1, b2, ... , bn, indices[0], indices[1]]. + auto prepend_batch_dims = [&](std::array indices) { + std::vector output(ndims); + std::copy(batch_dimensions.begin(), batch_dimensions.end(), output.begin()); + std::copy(indices.begin(), indices.end(), + output.begin() + batch_dimensions.size()); + return output; + }; + + std::map base_computations; + auto get_base_triangular_solve = + [&](int k) -> xla::StatusOr { + xla::Computation& computation = base_computations[k]; + if (computation.IsNull()) { + std::unique_ptr sub = builder->CreateSubBuilder( + tensorflow::strings::StrCat("trsm_base_", k)); + + auto a_param = + sub->Parameter(0, + xla::ShapeUtil::MakeShape(b_shape->element_type(), + prepend_batch_dims({k, k})), + "a"); + + auto b_param = + sub->Parameter(1, + xla::ShapeUtil::MakeShape(b_shape->element_type(), + prepend_batch_dims({m, k})), + "b"); + + // TODO(phawkins): it might make sense to use a while loop here, rather + // than unrolling. + // TODO(phawkins): the left-looking variant of the algorithm might be more + // efficient at block size 1. + TF_RETURN_IF_ERROR(TriangularSolve(sub.get(), a_param, b_param, + /*block_size=*/1) + .status()); + + TF_ASSIGN_OR_RETURN(computation, sub->Build()); + } + return &computation; + }; + + xla::ComputationDataHandle output = Zeros(builder, *b_shape); + + // Right-looking blocked triangular solve. + // For an explanation of the algorithm, see the TRSM discussion in: + // Goto, Kazushige, and Robert Van De Geijn. "High-performance implementation + // of the level-3 BLAS." ACM Transactions on Mathematical Software (TOMS) 35.1 + // (2008): 4. + for (int64 i = 0; i < n; i += block_size) { + int64 k = std::min(block_size, n - i); + + // if k > 1: + // output[..., :, i:i+k] = triangular_solve( + // a[..., i:i+k, ..., i:i+k], b[..., :, i:i+k], side='Right', + // kind='Lower', transpose=True, block_size=1) + // else: + // output[..., :, i] = b[..., :, i] / a[..., i, i] + TF_ASSIGN_OR_RETURN(auto a_slice, + SliceInMinorDims(builder, a, {i, i}, {i + k, i + k})); + TF_ASSIGN_OR_RETURN(auto b_slice, + SliceInMinorDims(builder, b, {0, i}, {m, i + k})); + xla::ComputationDataHandle update; + if (k > 1) { + TF_ASSIGN_OR_RETURN(xla::Computation * solve, + get_base_triangular_solve(k)); + update = builder->Call(*solve, {a_slice, b_slice}); + } else { + update = builder->Div(b_slice, a_slice); + } + + TF_ASSIGN_OR_RETURN( + output, UpdateSliceInMinorDims(builder, output, update, {0, i})); + // b[..., :, i+k:] -= np.dot(output[..., :, i:i+k], + // np.transpose(..., a[i+k:, i:i+k])) + if (i + k < n) { + TF_ASSIGN_OR_RETURN(auto a_slice_2, + SliceInMinorDims(builder, a, {i + k, i}, {n, i + k})); + TF_ASSIGN_OR_RETURN(auto b_update, BatchDot(builder, update, a_slice_2, + /*transpose_x=*/false, + /*transpose_y=*/true)); + + TF_ASSIGN_OR_RETURN(auto b_slice_2, + SliceInMinorDims(builder, b, {0, i + k}, {m, n})); + b_update = builder->Sub(b_slice_2, b_update); + TF_ASSIGN_OR_RETURN( + b, UpdateSliceInMinorDims(builder, b, b_update, {0, i + k})); + } + } + return output; +} + +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/triangular_solve.h b/tensorflow/compiler/tf2xla/lib/triangular_solve.h new file mode 100644 index 0000000000..501d026411 --- /dev/null +++ b/tensorflow/compiler/tf2xla/lib/triangular_solve.h @@ -0,0 +1,46 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_TF2XLA_LIB_TRIANGULAR_SOLVE_H_ +#define TENSORFLOW_COMPILER_TF2XLA_LIB_TRIANGULAR_SOLVE_H_ + +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" + +namespace tensorflow { + +// Solves systems of linear equations with upper or lower triangular matrices by +// backsubstitution. +// +// `a` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form +// square matrices. The strictly upper triangular part of each inner-most matrix +// is assumed to be zero and not accessed. +// `b` is a tensor of shape `[..., M, K]`. +// +// The innermost matrices in the output satisfy matrix equations +// `output[..., i, j] * adjoint(a[..., k, j]) = b[..., i, k]`. +// +// Uses a blocked algorithm if `block_size` is > 1; if block_size == 1 then no +// blocking is used. +// TODO(phawkins): equivalent to the BLAS TRSM routine with side=right, +// kind=lower, and transposed_a=true. Implement the other possible combinations +// of side, kind and transposed_a. +xla::StatusOr TriangularSolve( + xla::ComputationBuilder* builder, const xla::ComputationDataHandle& a, + xla::ComputationDataHandle b, int64 block_size = 256); + +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_TF2XLA_LIB_TRIANGULAR_SOLVE_H_ diff --git a/tensorflow/compiler/tf2xla/lib/triangular_solve_test.cc b/tensorflow/compiler/tf2xla/lib/triangular_solve_test.cc new file mode 100644 index 0000000000..671d9aa4fe --- /dev/null +++ b/tensorflow/compiler/tf2xla/lib/triangular_solve_test.cc @@ -0,0 +1,69 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/lib/triangular_solve.h" + +#include +#include +#include + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/core/lib/core/status_test_util.h" + +namespace tensorflow { +namespace { + +using TriangularSolveTest = xla::ClientLibraryTestBase; + +XLA_TEST_F(TriangularSolveTest, Simple) { + xla::ComputationBuilder builder(client_, TestName()); + + xla::Array2D a_vals({ + {2, 0, 0, 0}, + {3, 6, 0, 0}, + {4, 7, 9, 0}, + {5, 8, 10, 11}, + }); + xla::Array2D b_vals({ + {1, 2, 3, 4}, + {5, 6, 7, 8}, + {9, 10, 11, 12}, + }); + + xla::ComputationDataHandle a, b; + auto a_data = CreateR2Parameter(a_vals, 0, "a", &builder, &a); + auto b_data = CreateR2Parameter(b_vals, 1, "b", &builder, &b); + auto result = TriangularSolve(&builder, a, b, /*block_size=*/2); + TF_ASSERT_OK(result.status()); + + xla::Array2D expected({ + {0.5, 0.08333334, 0.04629629, 0.03367003}, + {2.5, -0.25, -0.1388889, -0.1010101}, + {4.5, -0.58333331, -0.32407406, -0.23569024}, + }); + + ComputeAndCompareR2(&builder, expected, {a_data.get(), b_data.get()}, + xla::ErrorSpec(2e-3, 2e-3)); +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/util.cc b/tensorflow/compiler/tf2xla/lib/util.cc new file mode 100644 index 0000000000..7ffe0aa6df --- /dev/null +++ b/tensorflow/compiler/tf2xla/lib/util.cc @@ -0,0 +1,107 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/lib/util.h" + +#include +#include + +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace tensorflow { + +xla::ComputationDataHandle Zeros(xla::ComputationBuilder* builder, + xla::Shape& shape) { + return builder->Broadcast( + builder->ConstantLiteral(xla::Literal::Zero(shape.element_type())), + xla::AsInt64Slice(shape.dimensions())); +} + +xla::ComputationDataHandle FloatLiteral(xla::ComputationBuilder* builder, + xla::PrimitiveType type, double value) { + switch (type) { + case xla::F16: + return builder->ConstantR0(static_cast(value)); + break; + case xla::F32: + return builder->ConstantR0(static_cast(value)); + break; + case xla::F64: + return builder->ConstantR0(value); + break; + case xla::C64: + return builder->ConstantR0(value); + break; + default: + LOG(FATAL) << "unhandled element type " << type; + } +} + +xla::StatusOr SliceInMinorDims( + xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, + gtl::ArraySlice start, gtl::ArraySlice end) { + TF_RET_CHECK(start.size() == end.size()); + int64 n_minor_dims = start.size(); + + TF_ASSIGN_OR_RETURN(std::unique_ptr shape, builder->GetShape(x)); + + const int64 n_dims = xla::ShapeUtil::Rank(*shape); + TF_RET_CHECK(n_minor_dims <= n_dims); + gtl::ArraySlice major_dims(xla::AsInt64Slice(shape->dimensions()), + /*pos=*/0, + /*len=*/n_dims - n_minor_dims); + + // Prepends 0s in the major dim + std::vector padded_start(n_dims, 0); + std::copy(start.begin(), start.end(), + padded_start.begin() + major_dims.size()); + + // Prepends the shape of the major dims. + std::vector padded_end(n_dims); + std::copy(major_dims.begin(), major_dims.end(), padded_end.begin()); + std::copy(end.begin(), end.end(), padded_end.begin() + major_dims.size()); + + std::vector strides(n_dims, 1); + return builder->Slice(x, padded_start, padded_end, strides); +} + +xla::StatusOr UpdateSlice( + xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, + const xla::ComputationDataHandle& update, gtl::ArraySlice start) { + // TODO(phawkins): make int64 work on all backends, remove the int32 cast. + std::vector start_as_int32(start.begin(), start.end()); + return builder->DynamicUpdateSlice( + x, update, builder->ConstantR1(start_as_int32)); +} + +xla::StatusOr UpdateSliceInMinorDims( + xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, + const xla::ComputationDataHandle& update, gtl::ArraySlice start) { + TF_ASSIGN_OR_RETURN(std::unique_ptr shape, builder->GetShape(x)); + const int64 n_dims = xla::ShapeUtil::Rank(*shape); + const int64 n_minor_dims = start.size(); + TF_RET_CHECK(n_minor_dims <= n_dims); + std::vector padded_start(n_dims, 0); + std::copy(start.begin(), start.end(), + padded_start.begin() + (n_dims - n_minor_dims)); + return UpdateSlice(builder, x, update, padded_start); +} + +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/util.h b/tensorflow/compiler/tf2xla/lib/util.h new file mode 100644 index 0000000000..8fba6b5cf2 --- /dev/null +++ b/tensorflow/compiler/tf2xla/lib/util.h @@ -0,0 +1,54 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_TF2XLA_LIB_UTIL_H_ +#define TENSORFLOW_COMPILER_TF2XLA_LIB_UTIL_H_ + +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/lib/gtl/array_slice.h" + +namespace tensorflow { + +// Returns a zero-filled tensor with shape `shape`. +xla::ComputationDataHandle Zeros(xla::ComputationBuilder* builder, + xla::Shape& shape); + +// Returns a floating point scalar constant of 'type' with 'value'. +// If 'type' is complex, returns a real value with zero imaginary component. +xla::ComputationDataHandle FloatLiteral(xla::ComputationBuilder* builder, + xla::PrimitiveType type, double value); + +// Performs a slice in the minor dimensions of a Tensor. +xla::StatusOr SliceInMinorDims( + xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, + gtl::ArraySlice start, gtl::ArraySlice end); + +// Updates a slice of 'x', i.e., +// x[start[0], ..., start[n]] = update +xla::StatusOr UpdateSlice( + xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, + const xla::ComputationDataHandle& update, gtl::ArraySlice start); + +// Updates a slice of 'x', where 'start' contains a list of minor dimensions: +// x[..., start[0], ..., start[n]] = update +xla::StatusOr UpdateSliceInMinorDims( + xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, + const xla::ComputationDataHandle& update, gtl::ArraySlice start); + +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_TF2XLA_LIB_UTIL_H_ diff --git a/tensorflow/compiler/tf2xla/xla_helpers.cc b/tensorflow/compiler/tf2xla/xla_helpers.cc index 1df6173275..9c3e15d2fa 100644 --- a/tensorflow/compiler/tf2xla/xla_helpers.cc +++ b/tensorflow/compiler/tf2xla/xla_helpers.cc @@ -16,6 +16,7 @@ limitations under the License. // This file defines helper routines for Tla JIT compilation. #include "tensorflow/compiler/tf2xla/xla_helpers.h" +#include "tensorflow/compiler/tf2xla/lib/util.h" #include "tensorflow/compiler/tf2xla/literal_util.h" #include "tensorflow/compiler/tf2xla/type_util.h" @@ -185,25 +186,9 @@ xla::ComputationDataHandle XlaHelpers::IntegerLiteral( xla::ComputationDataHandle XlaHelpers::FloatLiteral(xla::ComputationBuilder* b, DataType data_type, double value) { - xla::Literal literal; xla::PrimitiveType type; TF_CHECK_OK(DataTypeToPrimitiveType(data_type, &type)); - switch (type) { - case xla::F16: - return b->ConstantR0(static_cast(value)); - break; - case xla::F32: - return b->ConstantR0(static_cast(value)); - break; - case xla::F64: - return b->ConstantR0(value); - break; - case xla::C64: - return b->ConstantR0(value); - break; - default: - LOG(FATAL) << "unhandled element type " << type; - } + return ::tensorflow::FloatLiteral(b, type, value); } /* static */ Status XlaHelpers::ReshapeLiteral( diff --git a/tensorflow/compiler/xla/tests/test_macros.h b/tensorflow/compiler/xla/tests/test_macros.h index 3878ac1013..bea0b5ef92 100644 --- a/tensorflow/compiler/xla/tests/test_macros.h +++ b/tensorflow/compiler/xla/tests/test_macros.h @@ -96,7 +96,8 @@ string PrependDisabledIfIndicated(const string& test_case_name, test_name)::test_info_ = \ ::testing::internal::MakeAndRegisterTestInfo( \ #test_case_name, \ - PrependDisabledIfIndicated(#test_case_name, #test_name).c_str(), \ + ::xla::PrependDisabledIfIndicated(#test_case_name, #test_name) \ + .c_str(), \ nullptr, nullptr, \ ::testing::internal::CodeLocation(__FILE__, __LINE__), (parent_id), \ parent_class::SetUpTestCase, parent_class::TearDownTestCase, \ @@ -135,7 +136,8 @@ string PrependDisabledIfIndicated(const string& test_case_name, ::testing::internal::CodeLocation(__FILE__, __LINE__)) \ ->AddTestPattern( \ #test_case_name, \ - PrependDisabledIfIndicated(#test_case_name, #test_name).c_str(), \ + ::xla::PrependDisabledIfIndicated(#test_case_name, #test_name) \ + .c_str(), \ new ::testing::internal::TestMetaFactory()); \ return 0; \ -- GitLab From 8934ebc299bdbfdbc40af5ff9c3da2af8ec8ec64 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Fri, 10 Nov 2017 14:40:38 -0800 Subject: [PATCH 0288/1801] More gradient-related microbenchmarks. PiperOrigin-RevId: 175339062 --- tensorflow/python/eager/backprop.py | 2 + tensorflow/python/eager/benchmarks_test.py | 46 ++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 0a92ab38a8..1a8b6ec52a 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -120,6 +120,7 @@ _tracing = False # gradient function registration site, to be less error-prone # TODO(apassos) add ops other than those in nn_grad and math_grad _ops_which_dont_need_outputs = set([ + "Identity", "MatMul", "Conv2DBackpropInput", "Conv2DBackpropFilter", @@ -195,6 +196,7 @@ _ops_which_dont_need_outputs = set([ ]) _ops_which_dont_need_inputs = set([ + "Identity", "Softmax", "LogSoftmax", "BiasAdd", diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index b555f16f1d..435505edd7 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -37,6 +37,7 @@ from tensorflow.python.eager import function from tensorflow.python.eager import test from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.ops import gen_array_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops @@ -134,6 +135,10 @@ class MicroBenchmarks(test.Benchmark): func = lambda: m * m self._run(func, num_iters) + def _benchmark_tf_multiply_op(self, m, num_iters): + func = lambda: math_ops.multiply(m, m) + self._run(func, num_iters) + def benchmark_np_multiply(self): self._benchmark_np_multiply(self._m_2, 30000) @@ -149,6 +154,47 @@ class MicroBenchmarks(test.Benchmark): m = self._m_2.gpu() self._benchmark_tf_multiply(m, 30000) + def benchmark_tf_multiply_op_CPU(self): + with context.device(CPU): + m = self._m_2.cpu() + self._benchmark_tf_multiply_op(m, 30000) + + def benchmark_tf_multiply_op_GPU(self): + if not context.num_gpus(): + return + with context.device(GPU): + m = self._m_2.gpu() + self._benchmark_tf_multiply_op(m, 30000) + + def benchmark_tf_identity(self): + m = self._m_2 + self._run(lambda: gen_array_ops.identity(m), 30000) + + def benchmark_tf_gradient_function_identity(self): + m = self._m_2 + self._run( + lambda: backprop.gradients_function(gen_array_ops.identity, [0])(m), + 30000) + + def benchmark_tf_gradient_forward_identity(self): + with backprop.GradientTape() as tape: + m = self._m_2 + tape.watch(m) + self._run(lambda: gen_array_ops.identity(m), 30000) + + def benchmark_tf_gradient_tape_push_pop(self): + + def f(): + with backprop.GradientTape(): + pass + self._run(f, 30000) + + def benchmark_tf_gradient_function_no_op(self): + m = self._m_2 + self._run( + lambda: backprop.gradients_function(lambda x: x, [0])(m), + 30000) + def _benchmark_np_matmul(self, m, transpose_b, num_iters): a = m.cpu().numpy() b = a.T if transpose_b else a -- GitLab From bfcc0970d61952ee894eaa4ef3256033239359b7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 14:56:11 -0800 Subject: [PATCH 0289/1801] Change HloSharding to allow getting a ShapeTree for non-tuple types. Add reverse iteration to ShapeTree. PiperOrigin-RevId: 175341255 --- .../compiler/xla/service/hlo_sharding.h | 26 +-- .../compiler/xla/service/hlo_sharding_test.cc | 7 +- tensorflow/compiler/xla/shape_tree.h | 154 ++++++++++++++---- tensorflow/compiler/xla/shape_tree_test.cc | 36 ++++ tensorflow/compiler/xla/shape_util.h | 3 + 5 files changed, 179 insertions(+), 47 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_sharding.h b/tensorflow/compiler/xla/service/hlo_sharding.h index f8ef2a3d05..dbd16b7c9d 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.h +++ b/tensorflow/compiler/xla/service/hlo_sharding.h @@ -142,18 +142,22 @@ class HloSharding { bool HasUniqueDevice() const; // Returns the ShapeTree containing the shardings for each element of this - // tuple. Only the leaf elements are populated. This creates a new ShapeTree - // object so is not cheap. REQUIRES: IsTuple() - ShapeTree GetTupleShardingsAsShapeTree( - const Shape& tuple_shape) const { - ShapeTree result(tuple_shape, HloSharding::Replicate()); - CHECK_EQ(std::distance(result.leaf_begin(), result.leaf_end()), - tuple_elements_.size()); - auto it = tuple_elements_.begin(); - for (auto& index_to_sharding : result.leaves()) { - index_to_sharding.second = *it++; + // tuple, if IsTuple, or a ShapeTree with a single element containing this + // sharding. Only the leaf elements are populated. This creates a new + // ShapeTree object so is not cheap. + ShapeTree GetAsShapeTree(const Shape& shape) const { + if (IsTuple()) { + ShapeTree result(shape, HloSharding::Replicate()); + CHECK_EQ(std::distance(result.leaf_begin(), result.leaf_end()), + tuple_elements_.size()); + auto it = tuple_elements_.begin(); + for (auto& index_to_sharding : result.leaves()) { + index_to_sharding.second = *it++; + } + return result; + } else { + return ShapeTree(shape, *this); } - return result; } bool operator==(const HloSharding& other) const { diff --git a/tensorflow/compiler/xla/service/hlo_sharding_test.cc b/tensorflow/compiler/xla/service/hlo_sharding_test.cc index 00ea38480e..3161dda271 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding_test.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding_test.cc @@ -70,6 +70,11 @@ TEST_F(HloShardingTest, DevicePlacement) { /*num_devices=*/6)); EXPECT_IS_NOT_OK( sharding.Validate(ShapeUtil::MakeShape(U32, {4}), /*num_devices=*/5)); + + ShapeTree shape_tree = + sharding.GetAsShapeTree(ShapeUtil::MakeShape(U32, {4})); + EXPECT_EQ(shape_tree.element({}), sharding); + EXPECT_TRUE(shape_tree.IsLeaf({})); } TEST_F(HloShardingTest, Tile) { @@ -149,7 +154,7 @@ TEST_F(HloShardingTest, NestedTuple) { HloSharding::FromProto(proto).ConsumeValueOrDie(); ShapeTree shape_tree = - tuple_sharding.GetTupleShardingsAsShapeTree(nested_tuple_shape); + tuple_sharding.GetAsShapeTree(nested_tuple_shape); EXPECT_EQ(shape_tree.element({0}), HloSharding::Replicate()); EXPECT_EQ(shape_tree.element({1, 0}), HloSharding::AssignDevice(0)); EXPECT_EQ(shape_tree.element({2}), HloSharding::AssignDevice(1)); diff --git a/tensorflow/compiler/xla/shape_tree.h b/tensorflow/compiler/xla/shape_tree.h index a898a4d375..bf8d190150 100644 --- a/tensorflow/compiler/xla/shape_tree.h +++ b/tensorflow/compiler/xla/shape_tree.h @@ -155,28 +155,57 @@ class ShapeTree { using const_iterator = ShapeTreeIterator; // begin/end for iterating over all nodes. - iterator begin() { return iterator(&root_, /*iterate_leaves_only=*/false); } - iterator end() { return iterator(nullptr, /*iterate_leaves_only=*/false); } + iterator begin() { + return iterator(&root_, /*iterate_leaves_only=*/false, + /*reverse=*/false); + } + iterator end() { + return iterator(nullptr, /*iterate_leaves_only=*/false, + /*reverse=*/false); + } const_iterator begin() const { - return const_iterator(&root_, /*iterate_leaves_only=*/false); + return const_iterator(&root_, /*iterate_leaves_only=*/false, + /*reverse=*/false); } const_iterator end() const { - return const_iterator(nullptr, /*iterate_leaves_only=*/false); + return const_iterator(nullptr, /*iterate_leaves_only=*/false, + /*reverse=*/false); + } + + // rbegin/rend for iterating over all nodes in reverse. + iterator rbegin() { + return iterator(&root_, /*iterate_leaves_only=*/false, + /*reverse=*/true); + } + iterator rend() { + return iterator(nullptr, /*iterate_leaves_only=*/false, + /*reverse=*/true); + } + const_iterator rbegin() const { + return const_iterator(&root_, /*iterate_leaves_only=*/false, + /*reverse=*/true); + } + const_iterator rend() const { + return const_iterator(nullptr, /*iterate_leaves_only=*/false, + /*reverse=*/true); } // leaf_begin()/leaf_end() iterates over all leaf nodes (nodes with no // children). iterator leaf_begin() { - return iterator(&root_, /*iterate_leaves_only=*/true); + return iterator(&root_, /*iterate_leaves_only=*/true, /*reverse=*/false); } iterator leaf_end() { - return iterator(nullptr, /*iterate_leaves_only=*/true); + return iterator(nullptr, /*iterate_leaves_only=*/true, + /*reverse=*/false); } const_iterator leaf_begin() const { - return const_iterator(&root_, /*iterate_leaves_only=*/true); + return const_iterator(&root_, /*iterate_leaves_only=*/true, + /*reverse=*/false); } const_iterator leaf_end() const { - return const_iterator(nullptr, /*iterate_leaves_only=*/true); + return const_iterator(nullptr, /*iterate_leaves_only=*/true, + /*reverse=*/false); } // range-based iterator for leaf_begin()/leaf_end(). tensorflow::gtl::iterator_range leaves() { @@ -186,6 +215,22 @@ class ShapeTree { return tensorflow::gtl::make_range(leaf_begin(), leaf_end()); } + iterator leaf_rbegin() { + return iterator(&root_, /*iterate_leaves_only=*/true, /*reverse=*/true); + } + iterator leaf_rend() { + return iterator(nullptr, /*iterate_leaves_only=*/true, + /*reverse=*/true); + } + const_iterator leaf_rbegin() const { + return const_iterator(&root_, /*iterate_leaves_only=*/true, + /*reverse=*/true); + } + const_iterator leaf_rend() const { + return const_iterator(nullptr, /*iterate_leaves_only=*/true, + /*reverse=*/true); + } + // Recursively traverses the shape and calls the given function at each // element. The function has the following arguments: // @@ -280,42 +325,61 @@ class ShapeTreeIterator : public std::iteratorchildren.empty() && iterate_leaves_only) { - ++*this; + // interior tree nodes, only leaves. If reverse is true, the iterator will + // visit nodes in the reverse of pre-order traversal. + ShapeTreeIterator(NodeType* node, bool iterate_leaves_only, bool reverse) + : node_(node), + iterate_leaves_only_(iterate_leaves_only), + reverse_(reverse) { + if (node_) { + if (reverse_) { + while (!node_->children.empty()) { + const int child_index = node_->children.size() - 1; + stack_.push_back({node_, child_index}); + node_ = node_->children[child_index].get(); + } + } else { + if (!node_->children.empty() && iterate_leaves_only) { + ++*this; + } + } } } ShapeTreeIterator(const ShapeTreeIterator& other) : node_(other.node_), stack_(other.stack_), - iterate_leaves_only_(other.iterate_leaves_only_) {} + iterate_leaves_only_(other.iterate_leaves_only_), + reverse_(other.reverse_) {} ShapeTreeIterator& operator++() { CHECK_NE(nullptr, node_) << "walking off the end() of an iterator!"; - // We're doing a pre-order walk, so if our current node has children take - // the first child. - if (!node_->children.empty()) { - stack_.push_back({node_, /*child-index=*/0}); - node_ = node_->children[0].get(); - if (node_->children.empty() || !iterate_leaves_only_) { - return *this; - } else { - // This is a non-leaf; tail-recurse. - return ++(*this); + if (reverse_) { + while (!stack_.empty()) { + node_ = stack_.back().first; + int64 next_child_index = stack_.back().second - 1; + stack_.pop_back(); + if (next_child_index < 0) { + if (!iterate_leaves_only_) { + // All children are visited, yield . + return *this; + } + } else { + stack_.push_back({node_, next_child_index}); + node_ = node_->children[next_child_index].get(); + while (!node_->children.empty()) { + const int child_index = node_->children.size() - 1; + stack_.push_back({node_, child_index}); + node_ = node_->children[child_index].get(); + } + return *this; + } } - } - // Otherwise we are currently at a leaf. Walk back up until a node contains - // a child we haven't visited yet. - while (!stack_.empty()) { - node_ = stack_.back().first; - int64 next_child_index = stack_.back().second + 1; - stack_.pop_back(); - if (node_->children.size() > next_child_index) { - stack_.push_back({node_, next_child_index}); - node_ = node_->children[next_child_index].get(); - + } else { + // We're doing a pre-order walk, so if our current node has children take + // the first child. + if (!node_->children.empty()) { + stack_.push_back({node_, /*child-index=*/0}); + node_ = node_->children[0].get(); if (node_->children.empty() || !iterate_leaves_only_) { return *this; } else { @@ -323,6 +387,24 @@ class ShapeTreeIterator : public std::iteratorchildren.size() > next_child_index) { + stack_.push_back({node_, next_child_index}); + node_ = node_->children[next_child_index].get(); + + if (node_->children.empty() || !iterate_leaves_only_) { + return *this; + } else { + // This is a non-leaf; tail-recurse. + return ++(*this); + } + } + } } // We've walked off the end of the tree. Set node_ to nullptr to signify // end(). @@ -364,6 +446,8 @@ class ShapeTreeIterator : public std::iterator> stack_; // True if we should not include interior nodes in our walk. bool iterate_leaves_only_; + // True if we should yield the reverse of the pre-order traversal. + bool reverse_; // Placeholder for the current value. Ideally this wouldn't exist and would // just be an rvalue, but operator -> needs to return a pointer to something. // We cannot just use a plain old value_type as it contains a reference so diff --git a/tensorflow/compiler/xla/shape_tree_test.cc b/tensorflow/compiler/xla/shape_tree_test.cc index 7b4b5cb0fb..4b6ab77281 100644 --- a/tensorflow/compiler/xla/shape_tree_test.cc +++ b/tensorflow/compiler/xla/shape_tree_test.cc @@ -456,6 +456,26 @@ TEST_F(ShapeTreeTest, IterateOrder) { {2, 1}})); } +TEST_F(ShapeTreeTest, ReverseIterateOrder) { + ShapeTree t(nested_tuple_shape_, 42); + std::vector v; + for (auto it = t.rbegin(); it != t.rend(); ++it) { + v.push_back(it->first); + } + EXPECT_EQ(v, (std::vector{ + {2, 1}, + {2, 0, 1}, + {2, 0, 0}, + {2, 0}, + {2}, + {1, 1}, + {1, 0}, + {1}, + {0}, + {}, + })); +} + TEST_F(ShapeTreeTest, IterateOrderLeaves) { ShapeTree t(nested_tuple_shape_, 42); std::vector v; @@ -466,5 +486,21 @@ TEST_F(ShapeTreeTest, IterateOrderLeaves) { {0}, {1, 0}, {1, 1}, {2, 0, 0}, {2, 0, 1}, {2, 1}})); } +TEST_F(ShapeTreeTest, ReverseIterateOrderLeaves) { + ShapeTree t(nested_tuple_shape_, 42); + std::vector v; + for (auto it = t.leaf_rbegin(); it != t.leaf_rend(); ++it) { + v.push_back(it->first); + } + EXPECT_EQ(v, (std::vector{ + {2, 1}, + {2, 0, 1}, + {2, 0, 0}, + {1, 1}, + {1, 0}, + {0}, + })); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 8f8d4a73c9..82a513a65a 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -68,6 +68,9 @@ class ShapeIndex { const int64* data() const { return indices_.data(); } + int64 back() const { return indices_.back(); } + int64& back() { return indices_.back(); } + const int64& operator[](size_t i) const { return indices_[i]; } int64& operator[](size_t i) { return indices_[i]; } -- GitLab From 070b06a8ab37c8aa0ee2cdd0cc12defd009bff9b Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Fri, 10 Nov 2017 15:16:21 -0800 Subject: [PATCH 0290/1801] Internal change PiperOrigin-RevId: 175344408 --- .../tools/ci_build/ci_parameterized_build.sh | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh index 2b9aec6c31..db02f6ef10 100755 --- a/tensorflow/tools/ci_build/ci_parameterized_build.sh +++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh @@ -147,6 +147,38 @@ BAZEL_TARGET="//tensorflow/... -//tensorflow/compiler/..." if [[ -n "$TF_SKIP_CONTRIB_TESTS" ]]; then BAZEL_TARGET="$BAZEL_TARGET -//tensorflow/contrib/..." +else + BAZEL_TARGET="${BAZEL_TARGET} -//tensorflow/contrib/lite/..." + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite:context_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite:framework" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite:interpreter_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite:model_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/toco:toco" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite:simple_memory_arena_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite:string_util_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:activations_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:add_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:basic_rnn_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:concatenation_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:conv_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:depthwise_conv_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:embedding_lookup_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:embedding_lookup_sparse_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:fully_connected_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/testing:generated_examples_zip_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:hashtable_lookup_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:local_response_norm_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:lsh_projection_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:lstm_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:l2norm_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:mul_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:pooling_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:reshape_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:resize_bilinear_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:skip_gram_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:softmax_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:space_to_depth_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:svdf_test" fi TUT_TEST_DATA_DIR="/tmp/tf_tutorial_test_data" -- GitLab From cf0f384382d0c3645eb53cd27d38d15a9c3b28b5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 15:21:48 -0800 Subject: [PATCH 0291/1801] Create documentation for retrain.py to describe addition of fixed point layer. Add Readme file to tf.contrib.quantize to provide an overview of fake quantization PiperOrigin-RevId: 175345154 --- tensorflow/contrib/quantize/README.md | 73 ++++++++++++++++++ .../g3doc/drawings/Fake_Quantization.jpg | Bin 0 -> 32990 bytes .../examples/image_retraining/README.md | 12 +++ 3 files changed, 85 insertions(+) create mode 100644 tensorflow/contrib/quantize/README.md create mode 100644 tensorflow/contrib/quantize/g3doc/drawings/Fake_Quantization.jpg create mode 100644 tensorflow/examples/image_retraining/README.md diff --git a/tensorflow/contrib/quantize/README.md b/tensorflow/contrib/quantize/README.md new file mode 100644 index 0000000000..782232e85f --- /dev/null +++ b/tensorflow/contrib/quantize/README.md @@ -0,0 +1,73 @@ +tf.contrib.quantize provides tools for transforming graphs to include ops to +model quantization of weights, biases and activations during both training and +inference. This is done using the +[fake quantization op] +(https://www.tensorflow.org/versions/r0.12/api_docs/python/array_ops/fake_quantization), +which is described below: + +Recent literature has shown that fixed point networks provide comparable +performance to floating point networks [1]. This is achieved by modeling the +quantization operation during training in both the forward and backward passes. +The fake quantization operator achieves this by modeling the quantizer as a pass +through estimator [2]. Note that during back propagation, the parameters are +updated at high precision as this is needed to ensure sufficient precision in +accumulating tiny adjustments to the parameters. However, for the forward pass, +the parameters and activations are quantized to the desired lower precision. + +![drawing](g3doc/drawings/Fake_Quantization.jpg) + +###Forward pass + + + + +\begin{equation*} +f_Q(x) = \Delta\text{ }round\left(\frac{sat\left(x\right)-x_{min}}{\Delta}\right) +\end{equation*} + + +where + +$$ +\begin{equation*} +sat(x) = +\left\{ + \begin{array}{ll} + x_{min} & \mbox{if } x \le x_{min} \\ + x & \mbox{if } x_{min} \leq x \leq x_{max} \\ + x_{max} & \mbox{if } x_{max} \le x + \end{array} +\right. +\end{equation*} +$$ + + +where $$\Delta$$ is the Quantizer Step size, given by +$$\Delta =\frac{x_{max} - x_{min} }{255} $$ and $$x_{min} $$ and $$x_{max}$$ are +the minimum and maximum values of the variable under consideration. Note that +the rounding performed is deterministic and corresponds to asymmetric rounding, +which is supported in almost all hardware platforms. + +###Backward pass +For the backward pass, we model the quantizer as a piecewise linear block, with +derivatives that are non-zero only in the linear region. + + + +\begin{equation*} +\frac{df_Q(x)}{dx}=1, x_{min} \leq x \leq x_{max},\text{ 0 elsewhere } +\end{equation*} + +Therefore, the backward pass through the quantizer reduces to passing through +the gradients as long as the inputs to the quantizer are in the linear region. +Otherwise, the gradients are set to zero. + +Note that the quantizer is fully specified by the min and max values of the +variables being quantized. + + +[1] P.Gysel, "HARDWARE-ORIENTED APPROXIMATION OF CONVOLUTIONAL +NEURAL NETWORKS", https://arxiv.org/pdf/1604.03168.pdf + +[2] Y.Bengio, "Estimating or Propagating Gradients Through Stochastic Neurons +for Conditional Computation", https://arxiv.org/abs/1308.3432 diff --git a/tensorflow/contrib/quantize/g3doc/drawings/Fake_Quantization.jpg b/tensorflow/contrib/quantize/g3doc/drawings/Fake_Quantization.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fdc7ae40cec757cc0a93d50eca6c8698a4697d07 GIT binary patch literal 32990 zcmex=oIr{vTivYZ;lC8CV2ag%k}P*@OcV*_8@Kj2b5{E zCr+Nabot8FYu9hwy!G(W<0ns_J%91?)yGetzkL1n{m0K=Ab&A3FoS&sA|M_^^Oqn4 z6C)D~3o{El$X|?1{-i(KIlc21e7s zXd0k<8dzg5kzHJof5>XyW)@GKZ4+jSSsHKcbB_sNdn$S)D)x`(&Nj|}Cm%YBtXlt{ zLEc=tX!O!l*(qK$>F#*@RX6U?xy-sJ<>H#s<`;w=q(VZq-zNK9R*uyFD(xMT zk}0};W`L-~NlBZ=9d*gqCf$#^wC>B#Qtn*8Q`s7upT)RaZOLq$JdK^1;aP)zVCcDp zDMB}U@9v0R#Jban$H(Ku>oE+mx;Hd{oQi8~oeQ^sMRn@43GXb~&rP_uYA}b_H`- z*SVe7zhql)-MaJYn#7%%EKZVof-yT8S`S*DWRa=vk2>Jz_+QZ>t4LsCC%B~dzt4uPMmeV`^Ybr?;CD9uYI{G)@R|OR=o+k zn@?ylo;<&mA?sq~uH_viv4V?Z!^1aj%(Q#3bBD+2T64i!lb%Oidh_M#mAtLm7C8}2 z+l~BIOQ$cBm?Lw6^_5nS)Vb-U%eF1eUm1BiVZpSWo>vz!l%8g8^j!LSmfv>ilS{M8 z%1=rj-n!+rLh$=7a{^dL8wCU!MUVe!)t}V=$p5!W{+afM6?toQ*S}5Kx@UXWqkDnd zJeE1UczmJx-~v8>hY$HT7hcYwGN%ssfPVYkOG-T05P zvoiN-?EjFuMM+y<#5YyP@X+osli+Pn7_92vss3l!xBA+wV;A4)zLUDfHt%L+!h~bo zE)om}{X z^J(*eTeVNh8I2^L-1&HXOW!}A97p+kYvz`@`L|Vto+Wzzm3mIJ`w?y>3^MvJg zhcE4v?)YPNfAikcaoqlI6{UZk)rj9xySwt-n?JX|*FWBO_3ek7QPFqq#2n7%DHm7u z`Lra#WVf4mq0BaRW?R*W9wuH+@8Tu zV6iUvkR0=68{5S%y~~$pZq619;JEcuA?Qip!;ZU`yBMDNUb%ix_ao!x4HtCRy;a)Y z9=z{bN+q|vzP(B%V*pci!}0l5&-R$Jh~MM-&_8+2&8RGcSLVlkqjv{=oBZef)WnMx zmT|?`CEvc8eoyj3sr8Td@4w{Qy?SSpJ1;w>T;khhW-e6qbpgNnJa6u|+DCS|ZP=C9Udy*IRptAyitKL9!dGKd17wYe0#^X3ntaJx?fJ` zYMqkiG~RbpAyE4Hx^M~mUbU$Y-%2|P@v3`m+W6V-Kf}!h5#MVIEEn)<(Y^9P&gbjZ zFX@}!t@?bV`26FaCNth_oxR#3;kWI^&|c}MmZc|O_tk1JU(;A8eb~-S`$7KO`GPUZ zKg)}vT#NHL?emVb3O;r{srsAo{jIHE>Lp+9+P7M5`jhw@_m|jJY~Q~%L-O%CZoLWx zAr0n5i*{<*DgUT@^i0&bO6U3fHy>Wg{$Tw? zu%~os|It}4+p2W06mLGhW`kIT^R!dqDt!kuEYEu}++MWf{g23b@@I;3uDH!sStN4h z?3AV}8*>+bpYd{c?_Jwhcfa?2spc2h))MsXaiCjoO4qHcPj1LC$#zI^pE&t_0pF~y z&-=w|yfza^pPyh1QwO+oa3Hvwg_t^8FVRQVgEqWK9OJBDUoM-Ve`U^v#tH>zE5eDJy8RsAD zw)H<6Sh3pC-2Bq(?~`2KFY&ZCas3x#;e4z-dH(ab$F2+&TesYL*8Sl4v}>2%`(J!^ zz5S-ts|)ANW+%D-I@;T)%wuuF+PS(bVy)`cJ(V0AuY6tWrQIv_?WU=$lf0h2Y15yL z4+RnmJefaQSbXc!U_N7_FLL`mF=%ky?ob5?&^tqADS%cADwt5F8cOL0Q=-0%T7J|d~93U zuNtA#ODi_&O1%noZWVhc+g)E?;-YUiRlMcPSwDW^S7P}I@3S99iL)vKnb_eLzqHS5n^UMY5Mc1qBxSIX?%O>7ZQ9{2nE zfB3umKf|GuAD14Vc51V0g7Wly9v1&OazDs)X5t=qP3>$|z-Hp8b^ zE^g+AlTR0UR`n=+0y8Ro55r^Ick-yZD1^ z`?s_6cBend4sUs7c68cvzoclMharIasKxZkEMp7)g9sZgKLU%=L|d4syv4?mGS4lPEfG=Gs?Q-&((g8{f$?YgO4=;C57K zO3>4tr{>K#TU9C&!00-PN5fz=4WOiffLpPSwdt&P*R0mv^Ks2akG--HDS~etSFWD_ zLH*C<^St#pw66bWaMfif5eal<7{%m;f#~@k!v8#g*8dC_y&Ys(MFJfLE0)-&v3uTW&!*_iDDzymV@ij2*;H8{z0e*~3BCBA(SL>svfKZ*mfZiV z+XhNUqoGf?Vrew=2eJYf`k^2HQ=rEFKSRO~`!9^Kti}IW|1(TvKm4EJ8-MeE1_u$w z3xl|K$awz8>3V@|FkJJA&yxspsd)a@6mpP^v26y{9=08JY{S(Iz z{~7)^KlsmZfwe^R{15SeF7o{U8E)9||78eZcO1n119tnz`acZ)KSLt7{9o;^U0e(ri@F9a)}H@C|4)aV{C|cGHS)h0)-~-EF; z?ay~ieKyNmV&;>H-Nr(TRVJS_VN~z=&+sJ9|2Id(e}jsOG4HVQa+c>J{~5Mb9RAl3CI62v?LWhlD25jr>+0Xy8Q6d3xNv;?wSvWlMndC%ZK!+c(z>e_9|{~5Nd_`E0aKZBi>{699| z{|v`gFfUlN^Y1^9l3JeX%FptDC-wf*|IaW_O8)P^+5Z_j7w|Q8J^#<}Xv2So74rYw z7XQ=w&+x19Kf{wOumjS2?4R#z|IbjD_MhR=ikpx5f4*P%pW*(@{XbTL8yeWp6^xmPvbnQF0SKGV&Zm;0D6tXZ#^k(aQk%Q87{ z)j>DMbxmCi&;O+VXHfc7f5Ykce+I8d;9M*DpW(!w`QHRK{%5$fWA%asOkEm-7h{$( zpys~$pN?A!U$!{dYvC?)I2lsra6L15P!qUXZL7MK?_$Aa3tt+4l6vsFH00aqdV#}R zXUZRunp{zI^=57W+toDZ9>pgP{NLx4e(^60j+H4~61b*t(UJ_NRVxkzSTSfYUK!xd zbMZgp{|pl=?cXRS|7TcQ815MJp8>6C8vOP5{wHo1>tA2Ap9-qGrT@9)ivQay{1ZpT z9r5emTD|!^X4^P6XKi1Tw54<33;UzWkADah)iYQst zGQYE)&{QeZg*Vq2$3P=8GcV z$`3MMEH{tc$WmKW9AQCjO3On;xRROnpX1`u#V?{Fl#~RN8@RxwriP85o}ZXL!8zLZE}pe}<`#8PDG~G5hvg z6ZIM2)f?>~d@e}*F$_aFVw z@cFRwe}?^+>py~({AZZ`nDO|HFJ|9=#~l9h>8xCNkNxxG&i@$>oZSE8D%kwz6$k3i z*gMNR0 zNmu`8$Ud|GcTP4$5o+7P?&hP9*PSlf^QOi8XLzX#w&~k@vHAm?*7m4$yPq^ zWBOkPll6b3_y1>5zV7JCQ2d|amYkMsjqP&z@1@|3h~4;~;o!pm439%W zzLxLK|KoN`h;xgy#pSox(u-cX_PL&kkoa^wxN+Xivw8Jd71RHDqL;9_n~Z4{hfJd{xet}1>5wWfs=3Le+H+to&Oo? z*X;kPwEF&!eeZoPugiaLqrA>NJ6rgy@y}`LSsJQ+tjGKqY_(Uct3O`<$=}BNKSRRm z^?yaKFz)GzGfi`Ia!`$yj8E?AKQ$M}Bq6=J$l>-6@_!0$iU0M*|0$@i&i0>SqO|LO zhV?H0LeM+vj@6a<%Om|O%e4;YGfApU+@;mGtU=@Tt=*Yn6W4BWdK|c`UhUkSIhB(H z=gfBv;5K~2_`cEq!k5;IZ+Lt)=eYKAKj(Qm{~0!{ssH7g4f46%e})Oq zyY1g-%m14gb={GqL28-*488VGqUwMB-twQ}Qpf8B>*|lq|D3OP5kSg9b7G z8T$S+JegN9{oldx_+Of<7p$?Dsz1^FNd7ll%zuU#%NFpS(z39Bd{6&7`=^x}42!x3 z4zH^}aQ~BX#{RD_=AQx$$@2bZP%`WO&k)!72h{68a@o50e{OrTf8SD@ZT_&lWcgQ* z6MtgrH=g4ck1LWt7fcQCwykt4!>>NejZGywoX{}Pxp_7by!&0ui7bQWfOJG zJ}C5ATwk=}0T;tO8aBQ8>JQ)lB=a%-Z)@^@hD+MuVe;>P+HE5LGbBd*XZX@}jl>a$ z=YKN)Gk{8!>vxy`o8k^iS0?`%CT9L;c>V73e+Dn*bxpXJ$sYc={7=U|#s3T&QqTYE z%DS^AUtA-f_4TSMS*r)~zrS5!T+RNc`slw0|3XzAQ;x35z46sh(|)QyNhy`;Q!mB; z*#9$3)b^BZVRXMB1Rifv{^zn+|KHB~V5HfewCq{CUVLK;?YI+r=8<%Cszv5f`%?D> z4<=PJ_-E}eEaY9fWxKi1ngEy9Clk*xt&mii?-Rz>o^Q6cuEw)o`1oF)s?t5pY#jXI ziMQMjs9(s9+r5dg+VPUK*OrOvQpKfuFaNwgogw+mgr0;dtp-u|1ooO)S7|&_^!$(Q ze+I?#V)Zw|?Efll1@{~I>rWIPo&Qa=|38Dr+HBJLjeMwOM!5ZB|DR&>|LuEg|7DTY z3*6a$O}$wCiP`JwZ`eKl&)~W>C(OSqyuPJ$$vN`~Kl@M80j#h2f3io{Usw6(v?esx zbo{(n|lSLrsT0BW?OW6|?J;3)-^Q8@lHOw{-O&#*53$_3~srD*f|SK|6M zR&rC-&U$_HHaU>~W&(5Dl;7V1@9u3}z_s1}V`#Q-lI6vcm+rUvvsbrT3RW+Y5&N2> zpC)=GG}hXG+odfpbaEbldix-D#m zkR7zy#)G>eKAa(Cwq}F;2WAK1m-e67HvVT=U-6&ei(t6cya~=3+gN0p^Cj8;F=;&= z)G6eqibqnVN~y;Mv9#od1*Iw%SY^}0y4^0%dgYRtqNCiz@$b*2?OfgO{GS;4GhD4@ z_MGnDl)d_SWNGv@p}VK{m&nv?T>hKGLcY^xa>$m9`|($%A1U`Zoz<{zDtAvJKUe3|`ivOXtL1O+hwC{gn`RM#_uH*k1mV|A^(~!V2n#PV=f@q_qA>1?A zVRgIzGbnvq{+}Uk(tm~*Y+JPLWB)TutPcLqu->!&3$p`TD@cCke}=e;^)HyVXxlr1 zq<#J~toPXeg)s`Z$@FN}hTA{r|0(ok|F5TieL)#T8L2udmg@Y^(6TtJR?2v;+S$e{ zb4up^TD#-hi}P=s{xb+3Jui?|ZZN}StdYmOmE-nT`T?YZ0p-|k5&qB zOY!XKi)eL~z1hjY9~_&g=%~`bt}^fH^ZyL|Z&R;rIcpxWi}|cG2_GH@o=n(oXc#**mL@t@(i zZ`7*q{~12ub`TrgK_MhRGZgvuR zm2=L-B^Pai6e{j|ZQ5``;Vau*@c71urXT+QTK8)IXJ8cmqw$}iMf_0x7u}cfKRD(; zgbqF&;(q9NPv+9WuVO3sKP~m`R;mws_Lk|9h z4%Pf{vN8S7;Lu@j_Mbt@{vpSQ>wlRP{@nfp9i;i;WMlrvDkl5i<6Zw5l->U*+R6TB zSWy4ZZSsGHF7VvLe};#gAKw3E{?G8qWd5H}`425WYHDw5ivK;mGW@4$iKyTE;HGom zj$I0!aC$N_NUu`;MbZM9UL#_+;&#%@0;hFuPp&Mp6 z*M;A%6^k#HayQt!Fa9U6SNQkz`#+3!{%3eR6*Ri@pW%;UmHK!6{U1_`K|>?|8Cs0p z-`rXt*S$KH@h2zDm(C{r?_cfzVX^x^!{aUWha{KO*N6URIC%8_e+F5YO}v->GdzDD zBX>AY)BMk{{+a!G=xE*_ zrTt&!uG@bw{?D-19hno$nw&9%V8smoWLTpcegyUJW^#_vn}e{=BpuRQcM(LrYK ze}?ua*S1!gNXM2(W=l-BTGCr2k=TBk;nd$}?7A;@oWCbA{op<6N*m3T)9bSZ^<-H3 zQn(D>SV(KHz4&ECNv@f-MdTBO>C-YTE0-isE3-XmY5zCt+z0;(x!J|1S^IzRXZ~k6mhtZZXhg2yYW)Yf%l{dApl02$I-W27YJTt5 z<^LHZ7yssu{?D**+wcDjjJo@O+yxKF{cT?ha!~DmhK6bP|K#<*@hvE_iLCPf9X9R9 zUjeAAZ`}I+mpAx7!vhifKfdxPJcuFv>-*+uPMQC0O_K5#rv5kc1?w-I|IaW<@ION@ zM8V`o`oARqGkkKg|G4(dl{k@?flQMx#IA@rd{&? zxYhnMJpNKU`K17=TXOM@?2%mq<_@BX0mv{d=d87XfZ?X&-Pzy)?k*W%wOWBG39L8I}t{Qv%({m*a|G(A@Q z<72}=UEaOl&g{Q(5iyR>Acx+pp4vD2(QfIzUXRQ#My`M7dZcmB9c_-q-ORTInZK&7 zd-t*8@yQFf@1LDfH`|w?L?qlEWuV_A|6~2151{#jFZcgIp!ltyZo5{7i&iS598VY8G67D|IdIj z51{eezlZ(Lp%UY_e^(m(XSiJjo_T*e4-}hX?f)5?XYT(g){i;^09B$dGnd;%`S0AM zn19<2^>44({I5YO{2%XqP-Xv%l>%a;1hs}1efFf~|lY0v+6@uUA= z?rruzOzi(;^}m%D`Ok1+ck+LRW@k|G0WsyRzHtBEho_zYJzGBixy0h%!AJIg(RKXK z(5(8O;gJSZjla-;h6`KS|M5%*l}^9Ik0igZNag=Mt4aRZB8W}bK8pY4mIn>?2mW3A zNdK3Z{iizn4-1%&?f;~8r~Z0`{gG=FPic74`R3fXOIC$*?)%ZTkC%be~mx%2sB3!_;>&6{|p!A zulmpM0MzZ4gDLt7Qk4IPW6OVr$5-kPIj^d}Fmqk~{nz^cJfKP*DqgjJ;d9;o{nzRL z8B_xQGjNJ6zkhJo1G#;<6aR&SR>Q^oXZWL1{-5ECX7>EKppDv#{|Qy?W5n3F+vPvrlU zHRx8jHviCeOuX>&+LQYGTi4Ed^v86~{g?N|F6pFy*th+8bfh!)L&alH9rz;|Us-ou z`OnZY>p|^%p4VNrH7oi*O#o-M{|p}{pZ?F_^layUhC1Q@4Bd%_?;etM>5)b*$0L^{>88}PV|7H4m{*S0XXj<++17~Z0t$hsp<==8o{xfJm zqx^@Dj{Wn)Ag!j?|1(H0{wK`M|MSCj`wzaQ{~3T~H^Xql{53bdqwEmx=<+S&I zhWW9@{~1`q_y0))rCK(7@yt!9Ca&GKMRJCQ?xHx2=T~NIVBfj+6~Xyb?USW<^J6ak z$@#~%{o0}9YtKxMzJFRR_44D~+m9q)o!u<>{L0s(Rac9G{@7m1E4iC=?OpWh?%uG% zLetk9gq7F-+SJ1)qGlJ^XZd=vgKS7(N214JW0ru0bGI8FFY*tLNk8Jf*Guff)pfI1 zoK3iX>qyzA(|qQYxiT|fah#WwlyzAB{OGk^g=WDKKe}E;7eBRmy*S2V%6isjooc_r zIemN;k7gY9`?&RN%QD`ii(9%+yifk^+cWKuJJ%`p_X2q`a~@AT%1~@J%WkQ9|5khM z?Oj{%zgh7nX&?LB`29l0MhV`%2P@98*OZ+wsHl#=mL|u&edm?sGxn{lPcKxKk#6Zs z<=ijpe`@(F?F)$)bQgXMJ>MY~>3j9)s;Dg~y^W#WOcv*gj_*yo@k88i!*uUS*0Yyc z<-R@pBzk6D`s}g?cS@ws8TQ@HU~X-bEe`lmzrJGTpWxNUtCydwRb5i(Bc>V=dm=oh z=I1Ty%nr-#yh%?@~nvX1hNDtX)J532J3k^=_NE+`qo; zX+mq!4M~+RT%dIpvL~4~m+)FW@wD5zYC&w!7l!0>u4|O1w8-Z??JrZ`Ah9z2KZB^> zhFP)yj~s*0X;v+ttOOJs-Kf2O7&z|L4jr|2y+P!(yohD{N;E z*{nbIPx0Cw?MMA@Uh4*#bszEAczf%Wng7mAuUhQc|MQu13iGjtFOOEmwyxZG`R&`N zN7>W540=u8?tG$9bf(rf@$7?IwNJH0weDy4O^)bsHuiJ;v`D_p;se_S{%%2ZogT#G(~0g7=KR^@W?pls8Grh zz+UNEXf7{s`>oWLsPeY0v5qA(?`hvuG+ebHfKz*C+vom=GQS=x?OLlHyzSbSZePEX z-O;f}HoJHnymeSvdE)|ZQU~5J2DqO8$^OrvwD$Pl(&qmR?y9&JtUA6tTG;(j-Y;AK z$EV%@uKuq7R{t$b{H^)TtCV^diZ|L+drzSPp^7;_TStzDg8T|0jF+%f17*Y zgSg6rj~8aYe|$Q-n^!JZ-e=#HccvMvU1`(gKDw^D>su;j zvm*3Za!~R28Aa^NP0;81X+MM;Zoc-c-qscMhs;xzRjQp|l?%?1SZJ#heDnVNf}J)( zJNK^7dcQ2%Rdk8B-SV=3zH?YUobQ>I?-DpI`Gu_qTdc5$ue85FL^ z`+cr{f@}jrfd!sesqH@(aW!HJrU)^*>^u^n#J{=f&~vkez~_Vy!cKo z<1262lU;9bSJtY$vOMRIR_2n{Ow5-0*(?^tC%&*!;g&*!&J(9q1qv-fpt{b+o!MxADQwwu?d@a}uYvC&#LioT|tefMhae4LWYYY{N7E z)SlVD<&QWYng3xE+m!p+#0_jgPF-mh03zBbT-V~wOU&x4*% z2K8D`YaEx~z4PPy!S54p@0oV3w<3M2yWqQa?)g6Rw8B34uC~m7yI+uJ&V#JajQ<%H zhOL{p`1s<3DuMUn@3iM0U9UHRk^1+pWNy1|B-uTw)!gIh?1s9u(}i=w_BZ@7U-5%Eq?U7$`K;JK z^;zNG;1}^)HTrx?6*H zkv3mFs7o6&?@$EiuS)P7iy6r2zyIC*&+y_XzEvSaG(WL51Su0+v$s6_--(T3UHV(% z?pU;4x*;RE)RwK9fl=jKfPWUl;&pfLWQnhQc-Abut8((0y*qmz+`2udk3UnMHT*Kq z{ln*;E?v5}b&lDzjVfjz7W7$MNYH9*U=6&mrT@)r_0LOxyM8@9bG=?+zgh5=#+wd| zA!Q6)R}ky#cm8K!`S?)V^xx}K`=_k4UkYmYYX5VMuK)Gg{$C$xFoeb{J1E(r^ZZXP zD2;yqyZJxE#cog<9mEl>v};$>sY}_bR|ZK}1e{dqD-d-!sd7!MHQR1^)jj1aNf}S3 z@|1~P;*rka=kPh(b?-u+sPA&Cv|GpUPmFy9 zA|62tw|4$#V68u~I{!aIZ2EtOi|0WB!UdXLy#H_4`Tq=y=V9;0P?mb3#RU~|j^%#_ z>G~53YyUIk{Ac)*bRD#zpm+b1fPMcNw$=YKByoigO?Q!OF#Y{|<~pSdB|Ck}zkBYB zygq-E;~bOu@8x$j?|i;!*50*QA79)m+>voBb8bu6w-k&h3A?#P5GQ|DVB47gvV>EdXe}x*=F~nuTKJvWdxEraJGAGdet(RLxae zEPnrE$ZP(k>Q=Vh&2@LzsY+-wd<{Hozn=SUD}%Lbmv3>PoXVcwNdikvcE}_g7kq8Z zeDVJFf*mqKS1(<}Jm*35{12qE7POHl{7=`m{|sNR)&G_GLfbYnh2h6|{>SZquH5f` zr~hYIECyPkidq!YWf2ehDmPSqNNH^tue9%^PX*_Bf=~Hop07&>T>@W$Ke>kSBDjC#npfO@D1(nEOo!2sQ%mRK6}Un`TY~8ZfF0y zu<2iy!#_K#)PPl@4qR*uvaAfcxDHM!{>KekVr>8V-TVIxUMk>qsI31PPHdU~^?Uh$ zhNVKFDW*YP?$-Yl`s)Ad>0eh+OYiVw?0?y5-`X0l z=r+lSDYY+S>PsEvgF>sXz6}oAcJa&2_3LhQERq#ToiVvJRQARs2HBIM^A}`pH{9}T z--^Y*!@lso;B(#M{LX3lg_KFv8`YOFFzj8D8IW-+r!4I0486!F6P`XTvrJFEJ7bdn zwe@(`kBOfDq5scy324FR{=XWNK!uz5e+H#X_1Ev-|2M@GX{9x;WHNx_GXEKR|1&)C zzH0w^`d3lV?r=nmlRPAVHoi%T{^F=Bo3eLxet5cg)9qF5SJj=HCeEACeDhA}7oU0D znhV%&)E^CAy=>i|mtuZy^V*jlnJe|+*NUgX|5)x`m$fz0SllGir4sSjm8)Yhqr(FR zYpzyXw~`UCKTvmM5Q=JG|4D;avt(-~QF!%cBqHr%Ih& z6=&FWPV7arq~5)b#gpn9id=mcy)?|YRdidW*JRDH=Ag}HuThYe)J$0Ywy*=(Kx}We~i_y{;Zze!(4q-Mw(d9)?AN0~NRUGv!(M z9VB=2yt^Ikn4T@Vc8jNo0Bjh7;a3357Ca>_%mt9_x>fe*(;N1I#j2pH$@rh^^ZyJR zTEG9BdU~|P1!Y0va9G^#)pOO(+H|xY{d@YSS=r6wvlV5}r!t5o-(U`3_wMZ*ooBmu zPfgt3$TP{~Ib#UVq-M(95|2mOpF zhl_8#JT-UhnS17Xxk|!W%~vjHeN_*0O0u_$^Q&WpZ_!5aQgk9!Rslw z>BLrlV$XkuZvy-NGhEu4!Mp&=ZVZZtNU3Bd^Zbv;|8)F1`A#>Ye+Ag_GXEJ)^scSH zzW6_b)(iR$`!4Ew{`K|k*{fIlv0r-XV%(i&zq?yrXXeil|IbjnGvxcp7t0zH@_($_ zx@m5SuUY>5OYffg`=?JU#Wd{-0hNrs45CSpF<(M)2$CNL z@p%4E$F1vstL#4wY&7;&KH9c@(xXZ-jijJ=@4&->-y{}rS>0PV%WVGhN?+SE>lC+b za|X?}t+2J8RL$V|Af$?K!mrphlVZKLOHI8E+J={4{a7O2C9pN{g{`NlyGv7Sdcc2% zmPy~tTs4Ayw@*ko%=ucG@7xdeD|+ngWmTjdL!dqXL;9cY^ZywVx!?cQo(5_Mqvqkk zJ`Hf({*!Wc{MQ%%8NhpJi$Qy6xBh2{>;JQwwC#fU2NM^synpA0yL5D>`K-?5rfKWB zrhK={>9AMIHvcCi8~m^3Z1KPU43YJpTw#l_P&c;s|L`dF|0QgC{`cS1{|qPLtJJKP z|K*(Z{O`X_p!KKrs7u>G-OS?u3?C-En*T*Dd;a%dr~fld0l{(syL-~jNv!sjO~2fwphxR7``UKqI{lTvvZ^|0m|1{~0!{Tl$}2>1OaDLe2G0*o*%& zB*uFGXYfuNEn`4aJG8(%|1+@tXE?$4{NJHn)BjC78_SyXBkEeUty0B0rHs2 zZpN2-b7u6IyR-byAX|T;`4Rtb_VfQ47D&C&lHTp1|K{12U7e}h=kgS}-Ieq>C~8X8 wt|+WyI7$=lwvUFh!l0OS Date: Fri, 10 Nov 2017 15:23:14 -0800 Subject: [PATCH 0292/1801] Internal Change PiperOrigin-RevId: 175345353 --- tensorflow/contrib/lite/toco/BUILD | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD index 1c73ab8f4a..05e77c330c 100644 --- a/tensorflow/contrib/lite/toco/BUILD +++ b/tensorflow/contrib/lite/toco/BUILD @@ -249,23 +249,23 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":graph_transformations", - ":model_flags_proto_cc", - ":toco_flags_proto_cc", ":model", + ":model_flags_proto_cc", ":runtime", - "//tensorflow/core:protos_all_cc", + ":toco_graphviz_dump_options", + ":toco_flags_proto_cc", ":toco_port", ":tooling_util", - ":toco_graphviz_dump_options", "@protobuf_archive//:protobuf_headers", - "//tensorflow/contrib/lite/toco/tflite:export", - "//tensorflow/contrib/lite/toco/tflite:import", - "@com_google_absl//absl/strings", "@com_google_absl//absl/memory", - "//tensorflow/core:lib", + "@com_google_absl//absl/strings", "//tensorflow/contrib/lite/toco/tensorflow_graph_matching:resolve_cluster", + "//tensorflow/contrib/lite/toco/tflite:export", + "//tensorflow/contrib/lite/toco/tflite:import", "//tensorflow/core:core_cpu_lib", "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", ] + select({ # Placeholder for internal darwin rule. "//conditions:default": [], -- GitLab From 32ed29515c23bb9ff74d4343693d581754192922 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Fri, 10 Nov 2017 15:29:35 -0800 Subject: [PATCH 0293/1801] Improvement to benchmark. PiperOrigin-RevId: 175346269 --- tensorflow/python/eager/pywrap_tfe_src.cc | 66 ++++++++++++++--------- tensorflow/python/eager/tape.py | 5 +- 2 files changed, 44 insertions(+), 27 deletions(-) diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 77b49be8f8..372a6bb4b7 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -600,11 +600,33 @@ static tensorflow::eager::TapeTensor TapeTensorFromTensor(PyObject* tensor) { return tensorflow::eager::TapeTensor{id, dtype, shape}; } +std::vector MakeTensorIDList(PyObject* tensors) { + PyObject* seq = PySequence_Fast(tensors, "expected a sequence"); + if (seq == nullptr) { + return {}; + } + int len = PySequence_Fast_GET_SIZE(seq); + std::vector list; + list.reserve(len); + for (int i = 0; i < len; ++i) { + PyObject* tensor = PySequence_Fast_GET_ITEM(seq, i); + if (EagerTensor_CheckExact(tensor)) { + list.push_back(EagerTensor_id(tensor)); + } else { + PyObject* id_field = PyObject_GetAttrString(tensor, "_id"); + list.push_back(MakeInt(id_field)); + Py_DECREF(id_field); + } + } + Py_DECREF(seq); + return list; +} + void TFE_Py_TapeRecordOperation(PyObject* tape, PyObject* op_type, PyObject* output_tensors, - PyObject* input_tensor_ids, + PyObject* input_tensors, PyObject* backward_function) { - std::vector input_ids = MakeIntList(input_tensor_ids); + std::vector input_ids = MakeTensorIDList(input_tensors); std::vector output_info; PyObject* seq = PySequence_Fast(output_tensors, "expected a sequence of integer tensor ids"); @@ -619,9 +641,26 @@ void TFE_Py_TapeRecordOperation(PyObject* tape, PyObject* op_type, } } Py_DECREF(seq); + char* op_type_str = nullptr; + if (PyBytes_Check(op_type)) { + op_type_str = PyBytes_AsString(op_type); + } else if (PyUnicode_Check(op_type)) { +#if PY_MAJOR_VERSION >= 3 + op_type_str = PyUnicode_AsUTF8(op_type); +#else + PyObject* py_str = PyUnicode_AsUTF8String(op_type); + if (py_str == nullptr) return; + op_type_str = PyBytes_AS_STRING(py_str); + Py_DECREF(py_str); +#endif + } else { + PyErr_SetString(PyExc_RuntimeError, "op_type should be a string."); + return; + } + Py_INCREF(backward_function); reinterpret_cast(tape)->tape->RecordOperation( - PyBytes_AsString(op_type), output_info, input_ids, backward_function, + op_type_str, output_info, input_ids, backward_function, [backward_function]() { Py_DECREF(backward_function); }); } @@ -794,27 +833,6 @@ std::vector MakeTensorList(PyObject* tensors) { return list; } -std::vector MakeTensorIDList(PyObject* tensors) { - PyObject* seq = PySequence_Fast(tensors, "expected a sequence"); - if (seq == nullptr) { - return {}; - } - int len = PySequence_Fast_GET_SIZE(seq); - std::vector list; - list.reserve(len); - for (int i = 0; i < len; ++i) { - PyObject* tensor = PySequence_Fast_GET_ITEM(seq, i); - if (EagerTensor_CheckExact(tensor)) { - list.push_back(EagerTensor_id(tensor)); - } else { - PyObject* id_field = PyObject_GetAttrString(tensor, "_id"); - list.push_back(MakeInt(id_field)); - Py_DECREF(id_field); - } - } - Py_DECREF(seq); - return list; -} PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, PyObject* target, PyObject* sources, diff --git a/tensorflow/python/eager/tape.py b/tensorflow/python/eager/tape.py index a06f5e1a67..afbad183b0 100644 --- a/tensorflow/python/eager/tape.py +++ b/tensorflow/python/eager/tape.py @@ -23,7 +23,6 @@ import contextlib import threading from tensorflow.python import pywrap_tensorflow -from tensorflow.python.util import compat def tid(tensor): @@ -87,9 +86,9 @@ class Tape(object): """Records an operation in the tape.""" pywrap_tensorflow.TFE_Py_TapeRecordOperation( self._tape, - compat.as_bytes(op_type), + op_type, output_tensors, - [x._id for x in input_tensors], # pylint: disable=protected-access + input_tensors, backward_function) def _delete_tensor_id(self, i): -- GitLab From cb720816116c7769ff6bf781a687af3155f29c8b Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Fri, 10 Nov 2017 15:33:52 -0800 Subject: [PATCH 0294/1801] Inherit container prefix in defun. Allows defun-wrapped functions to read/mutate variables captured in them. PiperOrigin-RevId: 175346915 --- tensorflow/python/eager/function.py | 6 ++++++ tensorflow/python/eager/function_test.py | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index b1b1de0c41..c542dd77a6 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -407,9 +407,15 @@ def _get_defun_inputs(args): def _defun_internal(name, func, args, kwds): """Defines and returns graph-mode version of func.""" + container_prefix = ops.get_default_graph()._container_prefix # pylint: disable=protected-access with context.graph_mode(): captures = {} tmp_graph = CapturingGraph(captures) + # Inherit the container prefix, since this is used for error checking when + # isolating eager execution (the container prefix at creation must match the + # container prefix when used, and variables accessed in the defun will be + # used in the outside context). + tmp_graph._container_prefix = container_prefix # pylint: disable=protected-access # Copy the graph collections to ensure summaries and other things work. This # lets the function access (but not mutate) collections of the containing # graph, such as the global step and the summary writer collections. diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 209715894e..65776ca177 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -68,6 +68,25 @@ class FunctionTest(test.TestCase): self.assertAllEqual(step(), 2.0) + def testDefunReadVariable(self): + v = resource_variable_ops.ResourceVariable(1.0) + + @function.defun + def f(): + return v.read_value() + + self.assertEqual(1.0, float(f())) + + def testDefunAssignAddVariable(self): + v = resource_variable_ops.ResourceVariable(1.0) + + @function.defun + def f(): + v.assign_add(2.0) + return v.read_value() + + self.assertEqual(3.0, float(f())) + def testDefunDifferentiable(self): v = resource_variable_ops.ResourceVariable(1.0) -- GitLab From 4df4fa6fe8bf37573210b15088c661b2181848c0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 15:41:53 -0800 Subject: [PATCH 0295/1801] Internal Change PiperOrigin-RevId: 175347960 --- tensorflow/contrib/lite/java/demo/app/src/main/BUILD | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/BUILD b/tensorflow/contrib/lite/java/demo/app/src/main/BUILD index 512a86affe..4fc6d99d8c 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/BUILD +++ b/tensorflow/contrib/lite/java/demo/app/src/main/BUILD @@ -15,6 +15,10 @@ android_binary( ".tflite", ], resource_files = glob(["res/**"]), + # In some platforms we don't have an Android SDK/NDK and this target + # can't be built. We need to prevent the build system from trying to + # use the target in that case. + tags = ["manual"], deps = [ "//tensorflow/contrib/lite/java:tensorflowlite", "//tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite:testhelper", -- GitLab From 4d7d43663228126722e635434c4be7ba74accf69 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Fri, 10 Nov 2017 15:55:10 -0800 Subject: [PATCH 0296/1801] Raise RuntimeError when GradientTape.gradient is called more than once PiperOrigin-RevId: 175349623 --- tensorflow/python/eager/backprop.py | 2 +- tensorflow/python/eager/backprop_test.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 1a8b6ec52a..f9d6d8aa5e 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -835,5 +835,5 @@ class GradientTape(object): for x in sources] grad = imperative_grad.imperative_grad( _default_vspace, self._tape, [target], sources) - self.tape = None + self._tape = None return grad diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index ec9a185b73..86c9cce3fd 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -304,6 +304,17 @@ class BackpropTest(test.TestCase): grad = g.gradient(y, [x])[0] self.assertEqual(grad.numpy(), 6.0) + def testGradientTapeGradientCalledMultipleTimes(self): + with backprop.GradientTape() as g: + x = constant_op.constant(3.0) + g.watch(x) + y = x * x + z = y * y + g.gradient(z, [x]) + with self.assertRaisesRegexp( + RuntimeError, 'GradientTape.gradient can only be called once'): + g.gradient(y, [x]) + def testGradientTapeVariable(self): v = resource_variable_ops.ResourceVariable(1.0, name='v') with backprop.GradientTape() as g: -- GitLab From 581a19836749e2d4b06df400992f776f926f7bf8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 16:28:20 -0800 Subject: [PATCH 0297/1801] Internal Change PiperOrigin-RevId: 175353828 --- .../lite/NativeInterpreterWrapperTest.java | 2 +- tensorflow/contrib/lite/java/src/testdata/add.bin | Bin 0 -> 476 bytes .../contrib/lite/java/src/testdata/float32.bin | Bin 0 -> 388 bytes .../contrib/lite/java/src/testdata/int32.bin | Bin 0 -> 396 bytes .../contrib/lite/java/src/testdata/int64.bin | Bin 0 -> 396 bytes .../lite/java/src/testdata/invalid_model.bin | 1 + .../contrib/lite/java/src/testdata/uint8.bin | Bin 0 -> 396 bytes .../contrib/lite/testing/tflite_driver_test.cc | 2 +- 8 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/lite/java/src/testdata/add.bin create mode 100644 tensorflow/contrib/lite/java/src/testdata/float32.bin create mode 100644 tensorflow/contrib/lite/java/src/testdata/int32.bin create mode 100644 tensorflow/contrib/lite/java/src/testdata/int64.bin create mode 100644 tensorflow/contrib/lite/java/src/testdata/invalid_model.bin create mode 100644 tensorflow/contrib/lite/java/src/testdata/uint8.bin diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java index 9e4724b8e9..3d29877b5d 100644 --- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java @@ -41,7 +41,7 @@ public final class NativeInterpreterWrapperTest { "third_party/tensorflow/contrib/lite/java/src/testdata/uint8.bin"; private static final String INVALID_MODEL_PATH = - "third_party/tensorflow/contrib/lite/java/src/testdata/invalid.model.tflite"; + "third_party/tensorflow/contrib/lite/java/src/testdata/invalid_model.bin"; @Test public void testConstructor() { diff --git a/tensorflow/contrib/lite/java/src/testdata/add.bin b/tensorflow/contrib/lite/java/src/testdata/add.bin new file mode 100644 index 0000000000000000000000000000000000000000..aef0fe3d82c9d92dc444076d3b46e05af1923f46 GIT binary patch literal 476 zcmb1OU|n29Q2%dO&Ukxfo;y2!reZnajWcaVrBuy)!EV z14CvW$Vfg00kCU8{^0@p52PMsHZC`%{Qv(S=Xc!~w2e||0evp0;8-(Bd|NkFE!!XFN ZEDRtB@h8Y13=C`x91I|vAYl%&8vuH^CRP9d literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/lite/java/src/testdata/float32.bin b/tensorflow/contrib/lite/java/src/testdata/float32.bin new file mode 100644 index 0000000000000000000000000000000000000000..30b1264ca152740e1607651ce6cbc2a548319bc3 GIT binary patch literal 388 zcmb1OU|AUO~Q$+0ki zAOiy%0|P^TX-Pq82?GPjJpy18K=$*1-2+nVz`($ufMgEHZ6FMCE6D92AAtBE3{nrW z3uGFEu6Jf-U|`71gIEC)gJBVnB5)W$>|=+90RsbrFh~w$9*Bmi2hkuc=l=iy5ArXJ d528W#f_%flzz+5k0|SE;syGW+9OQqH834j;6c7La literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/lite/java/src/testdata/int32.bin b/tensorflow/contrib/lite/java/src/testdata/int32.bin new file mode 100644 index 0000000000000000000000000000000000000000..f6f3cf607a249e096921b12d848c4055a37d1168 GIT binary patch literal 396 zcmb1OU|fq{jAf#J~q|NlWkAX8Ns7#L)r;vlm?@*oV7 zV_^V61_m|;28R67l7i9_1_p?mI2hO&Kz2gh1T)Klfq_8*$t;i?K^Wv_kRL!k0r5c? zWIo6~kckkw-kFtwfgv*wVhufq{jAf#J~q|NlWukf|yR3=A?L4g|anAfXn~@VOtbe literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/lite/java/src/testdata/invalid_model.bin b/tensorflow/contrib/lite/java/src/testdata/invalid_model.bin new file mode 100644 index 0000000000..8156ac741c --- /dev/null +++ b/tensorflow/contrib/lite/java/src/testdata/invalid_model.bin @@ -0,0 +1 @@ +This is an invalid model. \ No newline at end of file diff --git a/tensorflow/contrib/lite/java/src/testdata/uint8.bin b/tensorflow/contrib/lite/java/src/testdata/uint8.bin new file mode 100644 index 0000000000000000000000000000000000000000..f06c5cf58462ce56b012d163fb208329874f83ad GIT binary patch literal 396 zcmb1OU|fq{jAf#J~q|NlWoflO6lU|^7eii6Ao$%8OR zj)egP85r0Y7#Q+POA1O$7#JXK;$UEB0NDv~6U;0J1_lNNB(p$n1YwYyL4E-F1jGkn zkoh3{Kqf-ydS_M!28PT$h&3QF7#0C30*3|2-#iTL(6C@&U=Rk$fy@KZF!dlBq~+ZI j|NlY$hVel($X<|-Kt2HZi-Cbb3RRp1>|anAfXn~@UtJVc literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/lite/testing/tflite_driver_test.cc b/tensorflow/contrib/lite/testing/tflite_driver_test.cc index 79e8a86972..37010c468f 100644 --- a/tensorflow/contrib/lite/testing/tflite_driver_test.cc +++ b/tensorflow/contrib/lite/testing/tflite_driver_test.cc @@ -26,7 +26,7 @@ using ::testing::ElementsAre; TEST(TfliteDriverTest, SimpleTest) { std::unique_ptr runner(new TfLiteDriver(/*use_nnapi=*/false)); - runner->SetModelBaseDir("third_party/tensorflow/contrib/lite"); + runner->SetModelBaseDir("tensorflow/contrib/lite"); runner->LoadModel("testdata/multi_add.bin"); ASSERT_TRUE(runner->IsValid()); -- GitLab From 08114b6093f7c461483e2f466af49ed55689708c Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Fri, 10 Nov 2017 16:46:21 -0800 Subject: [PATCH 0298/1801] Fix TensorFlowLiteTest in Bazel PiperOrigin-RevId: 175356139 --- tensorflow/contrib/lite/java/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/lite/java/BUILD b/tensorflow/contrib/lite/java/BUILD index e31e3abafe..74fb4fe001 100644 --- a/tensorflow/contrib/lite/java/BUILD +++ b/tensorflow/contrib/lite/java/BUILD @@ -45,6 +45,7 @@ java_library( javacopts = JAVACOPTS, visibility = ["//visibility:public"], deps = [ + ":libtensorflowlite_jni.so", "//tensorflow/contrib/lite/java/src/main/native", "@javax_validation", ], -- GitLab From 61aebf140e12e2ad834dc94a83f23fc574c79340 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 16:46:27 -0800 Subject: [PATCH 0299/1801] Hlo parser: support metadata. Also give metadata it's own format. PiperOrigin-RevId: 175356154 --- .../compiler/xla/service/hlo_instruction.cc | 21 +- .../compiler/xla/service/hlo_instruction.h | 2 + .../compiler/xla/tools/parser/hlo_lexer.cc | 24 ++ .../compiler/xla/tools/parser/hlo_lexer.h | 2 + .../compiler/xla/tools/parser/hlo_parser.cc | 344 ++++++++++++------ .../xla/tools/parser/hlo_parser_test.cc | 16 +- .../compiler/xla/tools/parser/hlo_token.h | 1 + 7 files changed, 299 insertions(+), 111 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 674d3e3836..1e83c69b50 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1826,7 +1826,7 @@ string HloInstruction::ToString(bool compact_operands, bool include_metadata, if (include_metadata && (!metadata_.op_type().empty() || !metadata_.op_name().empty() || !metadata_.source_file().empty())) { - StrAppend(&result, " # metadata=", metadata_.ShortDebugString()); + StrAppend(&result, ", metadata={", xla::OpMetadataToString(metadata_), "}"); } return result; } @@ -2910,6 +2910,25 @@ string PaddingConfigToString(const PaddingConfig& padding) { }); } +string OpMetadataToString(const OpMetadata& metadata) { + std::vector result; + using tensorflow::str_util::CEscape; + if (!metadata.op_type().empty()) { + result.push_back(StrCat("op_type=\"", CEscape(metadata.op_type()), "\"")); + } + if (!metadata.op_name().empty()) { + result.push_back(StrCat("op_name=\"", CEscape(metadata.op_name()), "\"")); + } + if (!metadata.source_file().empty()) { + result.push_back( + StrCat("source_file=\"", CEscape(metadata.source_file()), "\"")); + } + if (metadata.source_line() != 0) { + result.push_back(StrCat("source_line=", metadata.source_line())); + } + return Join(result, " "); +} + std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind) { return os << ToString(kind); } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index d174f05aa6..438d8bb35b 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -1239,7 +1239,9 @@ string ToString(HloInstruction::FusionKind kind); StatusOr StringToFusionKind( const string& kind_name); +// Custom stringification functions for protos that live inside HloInstruction. string PaddingConfigToString(const PaddingConfig& padding); +string OpMetadataToString(const OpMetadata& metadata); std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind); diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc index b5befbf58b..098879155a 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/gtl/optional.h" #include "tensorflow/core/lib/strings/numbers.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/regexp.h" namespace xla { @@ -145,6 +146,8 @@ TokKind HloLexer::LexToken() { return TokKind::kRparen; case '/': return LexComment(); + case '"': + return LexString(); } } } @@ -340,6 +343,25 @@ TokKind HloLexer::LexComment() { return TokKind::kError; } +// Lexes quoted string with escaping characters. If matched, the quoted string +// will be unescaped and stored to str_val_. +TokKind HloLexer::LexString() { + auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end()); + static LazyRE2 escaping_pattern = {R"("([^"\\]|\\.)*")"}; + if (RE2::Consume(&consumable, *escaping_pattern)) { + current_ptr_ = consumable.begin(); + StringPiece raw = + StringPieceFromPointers(token_start_ + 1, current_ptr_ - 1); + string error; + if (!tensorflow::str_util::CUnescape(raw, &str_val_, &error)) { + LOG(ERROR) << "Failed unescaping string: " << raw << ". error: " << error; + return TokKind::kError; + } + return TokKind::kString; + } + return TokKind::kError; +} + string TokKindToString(TokKind kind) { switch (kind) { case TokKind::kEof: @@ -398,6 +420,8 @@ string TokKindToString(TokKind kind) { return "kDxD"; case TokKind::kPad: return "kPad"; + case TokKind::kString: + return "kString"; case TokKind::kShape: return "kShape"; case TokKind::kOpcode: diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h index 79c4f271a1..2236c26619 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h +++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h @@ -46,6 +46,7 @@ class HloLexer { case TokKind::kDimLabels: case TokKind::kDxD: case TokKind::kPad: + case TokKind::kString: return str_val_; default: LOG(FATAL) << "This token does not have string value"; @@ -98,6 +99,7 @@ class HloLexer { TokKind LexConstant(); TokKind LexNumberOrPattern(); TokKind LexComment(); + TokKind LexString(); const tensorflow::StringPiece buf_; const char* current_ptr_; diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index fed0492a54..ac7d9ff482 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -91,7 +91,9 @@ class HloParser { // Types of attributes. enum class AttrTy { kInt64, + kInt32, kFloat, + kString, kBracedInt64List, kHloComputation, kWindow, @@ -100,6 +102,7 @@ class HloParser { kInstructionList, kSliceRanges, kPaddingConfig, + kMetadata, }; struct AttrConfig { @@ -108,6 +111,8 @@ class HloParser { void* result; // where to store the parsed result. }; + // attributes ::= (',' attribute)* + // // Parses attributes given names and configs of the attributes. Each parsed // result is passed back through the result pointer in corresponding // AttrConfig. Note that the result pointer must point to a optional typed @@ -121,7 +126,7 @@ class HloParser { // attrs["foo"] = {/*required=*/false, AttrTy::kInt64, &foo}; // optional bar; // attrs["bar"] = {/*required=*/true, AttrTy::kWindow, &bar}; - // if (!ParseAttribute(attrs)) { + // if (!ParseAttributes(attrs)) { // return false; // Do not use 'foo' 'bar' if failed. // } // // Do something with 'bar'. @@ -129,6 +134,18 @@ class HloParser { // bool ParseAttributes(const std::unordered_map& attrs); + // sub_attributes ::= '{' (','? attribute)* '}' + // + // Usage is the same as ParseAttributes. See immediately above. + bool ParseSubAttributes(const std::unordered_map& attrs); + + // Parses one attribute. If it has already been seen, return error. Returns + // true and adds to seen_attrs on success. + // + // Do not call this except in ParseAttributes or ParseSubAttributes. + bool ParseAttributeHelper(const std::unordered_map& attrs, + std::unordered_set* seen_attrs); + // Parses a name and finds the corresponding hlo computation. bool ParseComputationName(HloComputation** value); // Parses a list of names and finds the corresponding hlo instructions. @@ -136,6 +153,7 @@ class HloParser { bool ParseWindow(Window* window); bool ParseConvolutionDimensionNumbers(ConvolutionDimensionNumbers* dnums); bool ParsePaddingConfig(PaddingConfig* padding); + bool ParseMetadata(OpMetadata* metadata); bool ParseSharding(OpSharding* sharding); bool ParseSingleSharding(OpSharding* sharding, bool lbrace_pre_lexed); @@ -151,6 +169,7 @@ class HloParser { bool ParseParamList(); bool ParseName(string* result); bool ParseAttributeName(string* result); + bool ParseString(string* result); bool ParseShape(Shape* result); bool ParseOpcode(HloOpcode* result); bool ParseInt64(int64* result); @@ -303,6 +322,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, optional> predecessors; attrs["control-predecessors"] = {/*required=*/false, AttrTy::kInstructionList, &predecessors}; + optional metadata; + attrs["metadata"] = {/*required=*/false, AttrTy::kMetadata, &metadata}; HloInstruction* instruction; switch (opcode) { @@ -766,6 +787,9 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, } } } + if (metadata) { + instruction->set_metadata(*metadata); + } return AddInstruction(name, instruction); } @@ -1284,129 +1308,194 @@ bool HloParser::ParseOperands(std::vector* operands, return true; } +// sub_attributes ::= '{' (','? attribute)* '}' +bool HloParser::ParseSubAttributes( + const std::unordered_map& attrs) { + if (!ParseToken(TokKind::kLbrace, "expects '{' to start sub attributes")) { + return false; + } + std::unordered_set seen_attrs; + if (lexer_.GetKind() == TokKind::kRbrace) { + // empty + } else { + do { + EatIfPresent(TokKind::kComma); + if (!ParseAttributeHelper(attrs, &seen_attrs)) { + return false; + } + } while (lexer_.GetKind() != TokKind::kRbrace); + } + // Check that all required attrs were seen. + for (const auto& attr_it : attrs) { + if (attr_it.second.required && + seen_attrs.find(attr_it.first) == seen_attrs.end()) { + return TokenError(Printf("sub-attribute %s is expected but not seen", + attr_it.first.c_str())); + } + } + return ParseToken(TokKind::kRbrace, "expects '}' to end sub attributes"); +} + +// attributes ::= (',' attribute)* bool HloParser::ParseAttributes( const std::unordered_map& attrs) { std::unordered_set seen_attrs; while (EatIfPresent(TokKind::kComma)) { - string name; - if (!ParseAttributeName(&name)) { - return TokenError("error parsing attributes"); - } - VLOG(1) << "Parsing attribute " << name; - if (!seen_attrs.insert(name).second) { - return TokenError(Printf("attribute %s already exists", name.c_str())); - } - auto attr_it = attrs.find(name); - if (attr_it == attrs.end()) { - return TokenError(Printf("unexpected attribute %s", name.c_str())); - } - AttrTy attr_type = attr_it->second.attr_type; - void* attr_out_ptr = attr_it->second.result; - bool success = [&] { - switch (attr_type) { - case AttrTy::kInt64: { - int64 result; - if (!ParseInt64(&result)) { - return false; - } - static_cast*>(attr_out_ptr)->emplace(result); - return true; + if (!ParseAttributeHelper(attrs, &seen_attrs)) { + return false; + } + } + // Check that all required attrs were seen. + for (const auto& attr_it : attrs) { + if (attr_it.second.required && + seen_attrs.find(attr_it.first) == seen_attrs.end()) { + return TokenError(Printf("attribute %s is expected but not seen", + attr_it.first.c_str())); + } + } + return true; +} + +bool HloParser::ParseAttributeHelper( + const std::unordered_map& attrs, + std::unordered_set* seen_attrs) { + string name; + if (!ParseAttributeName(&name)) { + return TokenError("error parsing attributes"); + } + VLOG(1) << "Parsing attribute " << name; + if (!seen_attrs->insert(name).second) { + return TokenError(Printf("attribute %s already exists", name.c_str())); + } + auto attr_it = attrs.find(name); + if (attr_it == attrs.end()) { + return TokenError(Printf("unexpected attribute %s", name.c_str())); + } + AttrTy attr_type = attr_it->second.attr_type; + void* attr_out_ptr = attr_it->second.result; + bool success = [&] { + switch (attr_type) { + case AttrTy::kInt64: { + int64 result; + if (!ParseInt64(&result)) { + return false; } - case AttrTy::kFloat: { - double result; - if (!ParseDouble(&result)) { - return false; - } - if (result > std::numeric_limits::max() || - result < std::numeric_limits::lowest()) { - return TokenError("value out of range for float"); - } - static_cast*>(attr_out_ptr) - ->emplace(static_cast(result)); - return true; + static_cast*>(attr_out_ptr)->emplace(result); + return true; + } + case AttrTy::kInt32: { + int64 result; + if (!ParseInt64(&result)) { + return false; } - case AttrTy::kHloComputation: { - HloComputation* result; - if (!ParseComputationName(&result)) { - return false; - } - static_cast*>(attr_out_ptr) - ->emplace(result); - return true; + if (result != static_cast(result)) { + return TokenError("value out of range for int32"); } - case AttrTy::kWindow: { - Window result; - if (!ParseWindow(&result)) { - return false; - } - static_cast*>(attr_out_ptr)->emplace(result); - return true; + static_cast*>(attr_out_ptr) + ->emplace(static_cast(result)); + return true; + } + case AttrTy::kFloat: { + double result; + if (!ParseDouble(&result)) { + return false; } - case AttrTy::kConvolutionDimensionNumbers: { - ConvolutionDimensionNumbers result; - if (!ParseConvolutionDimensionNumbers(&result)) { - return false; - } - static_cast*>(attr_out_ptr) - ->emplace(result); - return true; + if (result > std::numeric_limits::max() || + result < std::numeric_limits::lowest()) { + return TokenError("value out of range for float"); } - case AttrTy::kSharding: { - OpSharding sharding; - if (!ParseSharding(&sharding)) { - return false; - } - static_cast*>(attr_out_ptr)->emplace(sharding); - return true; + static_cast*>(attr_out_ptr) + ->emplace(static_cast(result)); + return true; + } + case AttrTy::kHloComputation: { + HloComputation* result; + if (!ParseComputationName(&result)) { + return false; } - case AttrTy::kInstructionList: { - std::vector result; - if (!ParseInstructionNames(&result)) { - return false; - } - static_cast>*>(attr_out_ptr) - ->emplace(result); - return true; + static_cast*>(attr_out_ptr)->emplace(result); + return true; + } + case AttrTy::kWindow: { + Window result; + if (!ParseWindow(&result)) { + return false; } - case AttrTy::kBracedInt64List: { - std::vector result; - if (!ParseInt64List(TokKind::kLbrace, TokKind::kRbrace, - TokKind::kComma, &result)) { - return false; - } - static_cast>*>(attr_out_ptr) - ->emplace(result); - return true; + static_cast*>(attr_out_ptr)->emplace(result); + return true; + } + case AttrTy::kConvolutionDimensionNumbers: { + ConvolutionDimensionNumbers result; + if (!ParseConvolutionDimensionNumbers(&result)) { + return false; } - case AttrTy::kSliceRanges: { - SliceRanges result; - if (!ParseSliceRanges(&result)) { - return false; - } - static_cast*>(attr_out_ptr)->emplace(result); - return true; + static_cast*>(attr_out_ptr) + ->emplace(result); + return true; + } + case AttrTy::kSharding: { + OpSharding sharding; + if (!ParseSharding(&sharding)) { + return false; } - case AttrTy::kPaddingConfig: { - PaddingConfig result; - if (!ParsePaddingConfig(&result)) { - return false; - } - static_cast*>(attr_out_ptr)->emplace(result); - return true; + static_cast*>(attr_out_ptr)->emplace(sharding); + return true; + } + case AttrTy::kInstructionList: { + std::vector result; + if (!ParseInstructionNames(&result)) { + return false; } + static_cast>*>(attr_out_ptr) + ->emplace(result); + return true; + } + case AttrTy::kBracedInt64List: { + std::vector result; + if (!ParseInt64List(TokKind::kLbrace, TokKind::kRbrace, TokKind::kComma, + &result)) { + return false; + } + static_cast>*>(attr_out_ptr) + ->emplace(result); + return true; + } + case AttrTy::kSliceRanges: { + SliceRanges result; + if (!ParseSliceRanges(&result)) { + return false; + } + static_cast*>(attr_out_ptr)->emplace(result); + return true; + } + case AttrTy::kPaddingConfig: { + PaddingConfig result; + if (!ParsePaddingConfig(&result)) { + return false; + } + static_cast*>(attr_out_ptr)->emplace(result); + return true; + } + case AttrTy::kString: { + string result; + if (!ParseString(&result)) { + return false; + } + static_cast*>(attr_out_ptr)->emplace(result); + return true; + } + case AttrTy::kMetadata: { + OpMetadata result; + if (!ParseMetadata(&result)) { + return false; + } + static_cast*>(attr_out_ptr)->emplace(result); + return true; } - }(); - if (!success) { - return TokenError(Printf("error parsing attribute %s", name.c_str())); - } - } - // Check that all required attrs were seen. - for (const auto& attr_it : attrs) { - if (attr_it.second.required && - seen_attrs.find(attr_it.first) == seen_attrs.end()) { - return TokenError(Printf("attribute %s is expected but not seen", - attr_it.first.c_str())); } + }(); + if (!success) { + return TokenError(Printf("error parsing attribute %s", name.c_str())); } return true; } @@ -1763,6 +1852,16 @@ bool HloParser::ParseAttributeName(string* result) { return true; } +bool HloParser::ParseString(string* result) { + VLOG(1) << "ParseString"; + if (lexer_.GetKind() != TokKind::kString) { + return TokenError("expects string"); + } + *result = lexer_.GetStrVal(); + lexer_.Lex(); + return true; +} + bool HloParser::ParseDxD(const string& name, std::vector* result) { if (!result->empty()) { return TokenError( @@ -1839,6 +1938,35 @@ bool HloParser::ParsePaddingConfig(PaddingConfig* padding) { return true; } +// '{' metadata_string '}' +bool HloParser::ParseMetadata(OpMetadata* metadata) { + std::unordered_map attrs; + optional op_type; + optional op_name; + optional source_file; + optional source_line; + attrs["op_type"] = {/*required=*/false, AttrTy::kString, &op_type}; + attrs["op_name"] = {/*required=*/false, AttrTy::kString, &op_name}; + attrs["source_file"] = {/*required=*/false, AttrTy::kString, &source_file}; + attrs["source_line"] = {/*required=*/false, AttrTy::kInt32, &source_line}; + if (!ParseSubAttributes(attrs)) { + return false; + } + if (op_type) { + metadata->set_op_type(*op_type); + } + if (op_name) { + metadata->set_op_name(*op_name); + } + if (source_file) { + metadata->set_source_file(*source_file); + } + if (source_line) { + metadata->set_source_line(*source_line); + } + return true; +} + bool HloParser::ParseOpcode(HloOpcode* result) { VLOG(1) << "ParseOpcode"; if (lexer_.GetKind() != TokKind::kOpcode) { diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index d19c6e1877..bed912d921 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -65,7 +65,7 @@ ENTRY %axpy.v5 (alpha: f32[], x: f32[2,4], y: f32[2,4]) -> f32[2,4] { R"(HloModule constant_pred_module: ENTRY %constant_pred () -> pred[] { - ROOT %constant = pred[] constant(true) + ROOT %constant = pred[] constant(true), metadata={op_type="const" op_name="\"it\'s not a problem\n" source_file="path/to/test.cc" source_line=68} } )" @@ -83,7 +83,8 @@ ENTRY %constant_s32 () -> s32[] { }, // f32 constant, but the value is not a decimal { -"ConstantF32", R"(HloModule ConstantF32_module: +"ConstantF32", +R"(HloModule ConstantF32_module: ENTRY %ConstantF32.v4 () -> f32[] { ROOT %constant = f32[] constant(42) @@ -841,6 +842,17 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2 "expects padding_low and padding_high separated by '_'"); } +TEST_F(HloParserTest, CommaBetweenSubAttributes) { + const string original = R"(HloModule test_comma_module: + +ENTRY %test_comma.v4 () -> f32[] { + ROOT %constant = f32[] constant(-4.2), metadata={source_line=5, op_type="::const"} +} + +)"; + TF_EXPECT_OK(Parse(original).status()); +} + } // namespace } // namespace tools } // namespace xla diff --git a/tensorflow/compiler/xla/tools/parser/hlo_token.h b/tensorflow/compiler/xla/tools/parser/hlo_token.h index 9afd2fac23..78a72837ca 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_token.h +++ b/tensorflow/compiler/xla/tools/parser/hlo_token.h @@ -60,6 +60,7 @@ enum class TokKind { kDimLabels, // [0-9bf]+_[0-9io]+->[0-9bf]+ kDxD, // [0-9]+(x[0-9]+)+ kPad, // [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)* + kString, // "abcd\"\n" kShape, // f32[2,3]{1,0} kOpcode, // add kInt, // 42 -- GitLab From eabee43e39186d91956277a4fc8b0dd566a68e3b Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Fri, 10 Nov 2017 16:54:19 -0800 Subject: [PATCH 0300/1801] Update remote tflite_mobilenet dependency path PiperOrigin-RevId: 175357028 --- .../test/java/org/tensorflow/lite/InterpreterTest.java | 4 ++-- .../tensorflow/lite/NativeInterpreterWrapperTest.java | 10 +++++----- .../src/test/java/org/tensorflow/lite/TensorTest.java | 2 +- tensorflow/workspace.bzl | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java index a60c63d4b8..424b3de6c9 100644 --- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java @@ -36,10 +36,10 @@ import org.junit.runners.JUnit4; public final class InterpreterTest { private static final File MODEL_FILE = - new File("third_party/tensorflow/contrib/lite/java/src/testdata/add.bin"); + new File("tensorflow/contrib/lite/java/src/testdata/add.bin"); private static final File MOBILENET_MODEL_FILE = - new File("third_party/tensorflow/contrib/lite/java/src/testdata/mobilenet.tflite.bin"); + new File("tensorflow/contrib/lite/java/src/testdata/mobilenet.tflite.bin"); @Test public void testInterpreter() throws Exception { diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java index 3d29877b5d..9a6894f49c 100644 --- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java @@ -29,19 +29,19 @@ import org.junit.runners.JUnit4; public final class NativeInterpreterWrapperTest { private static final String FLOAT_MODEL_PATH = - "third_party/tensorflow/contrib/lite/java/src/testdata/add.bin"; + "tensorflow/contrib/lite/java/src/testdata/add.bin"; private static final String INT_MODEL_PATH = - "third_party/tensorflow/contrib/lite/java/src/testdata/int32.bin"; + "tensorflow/contrib/lite/java/src/testdata/int32.bin"; private static final String LONG_MODEL_PATH = - "third_party/tensorflow/contrib/lite/java/src/testdata/int64.bin"; + "tensorflow/contrib/lite/java/src/testdata/int64.bin"; private static final String BYTE_MODEL_PATH = - "third_party/tensorflow/contrib/lite/java/src/testdata/uint8.bin"; + "tensorflow/contrib/lite/java/src/testdata/uint8.bin"; private static final String INVALID_MODEL_PATH = - "third_party/tensorflow/contrib/lite/java/src/testdata/invalid_model.bin"; + "tensorflow/contrib/lite/java/src/testdata/invalid_model.bin"; @Test public void testConstructor() { diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java index e41e971159..94b6632bb8 100644 --- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java @@ -29,7 +29,7 @@ import org.junit.runners.JUnit4; public final class TensorTest { private static final String MODEL_PATH = - "third_party/tensorflow/contrib/lite/java/src/testdata/add.bin"; + "tensorflow/contrib/lite/java/src/testdata/add.bin"; private NativeInterpreterWrapper wrapper; private long nativeHandle; diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index dfe332b091..2c9f067882 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -819,8 +819,8 @@ def tf_workspace(path_prefix="", tf_repo_name=""): native.new_http_archive( name = "tflite_mobilenet", build_file = str(Label("//third_party:tflite_mobilenet.BUILD")), - sha256 = "eb71679d23a0cbdb173b36ea39f3d3096de0a9b0410d148a8237f20cc1157a61", + sha256 = "23f814d1c076bdf03715dfb6cab3713aa4fbdf040fd5448c43196bd2e97a4c1b", urls = [ - "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_1.0_224_quantized_2017_11_01.zip" + "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip" ], ) -- GitLab From 2627e3fcd363187a790dc736a38cc3d61ee429dc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 17:42:25 -0800 Subject: [PATCH 0301/1801] Merged commit includes the following changes: 175361576 by wun: Convert the performance guide links to the models repo to direct GitHub links -- 175360999 by mrry: [tf.data] Eagerly construct the string handle for an Iterator. This enables users to call `Iterator.string_handle()` after the graph has been finalized, which makes it easier to use in distributed training among other settings. -- 175360918 by aselle: Fix schema compatibility test and upgrade test. - Handle python3 unicode idioms - Handle path differences -- 175357826 by wun: Convert the performance guide links to the models repo to direct GitHub links -- PiperOrigin-RevId: 175361576 --- tensorflow/contrib/lite/schema/BUILD | 2 +- .../schema/flatbuffer_compatibility_test.cc | 2 +- .../contrib/lite/schema/upgrade_schema.py | 13 +++++++-- .../lite/schema/upgrade_schema_test.py | 17 +++++++---- tensorflow/python/data/ops/iterator_ops.py | 9 ++++-- .../python/kernel_tests/iterator_ops_test.py | 28 +++++++++++++++++++ 6 files changed, 58 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/lite/schema/BUILD b/tensorflow/contrib/lite/schema/BUILD index 3e04d6f34f..54167ddd9a 100644 --- a/tensorflow/contrib/lite/schema/BUILD +++ b/tensorflow/contrib/lite/schema/BUILD @@ -16,11 +16,11 @@ py_binary( "schema_v1.fbs", "schema_v2.fbs", "schema_v3.fbs", + "@flatbuffers//:flatc", ], deps = [ "//tensorflow:tensorflow_py", "//tensorflow/python:platform", - "@flatbuffers//:flatc", ], ) diff --git a/tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc b/tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc index 17ee0af8dd..cd46a06f7d 100644 --- a/tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc +++ b/tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc @@ -15,7 +15,7 @@ limitations under the License. #include #include -#include "third_party/flatbuffers/include/flatbuffers/flatc.h" +#include "flatbuffers/flatc.h" #include "tensorflow/core/platform/platform.h" #ifdef PLATFORM_GOOGLE diff --git a/tensorflow/contrib/lite/schema/upgrade_schema.py b/tensorflow/contrib/lite/schema/upgrade_schema.py index 320c7138d2..94f5730be5 100644 --- a/tensorflow/contrib/lite/schema/upgrade_schema.py +++ b/tensorflow/contrib/lite/schema/upgrade_schema.py @@ -77,8 +77,13 @@ class Converter(object): def __init__(self): # TODO(aselle): make this work in the open source version with better # path. - self._flatc_path = resource_loader.get_path_to_datafile( - "../../../../flatbuffers/flatc") + paths_to_try = [ + "../../../../flatbuffers/flatc", # not bazel + "../../../../external/flatbuffers/flatc" # bazel + ] + for p in paths_to_try: + self._flatc_path = resource_loader.get_path_to_datafile(p) + if os.path.exists(self._flatc_path): break def FindSchema(base_name): return resource_loader.get_path_to_datafile("%s" % base_name) @@ -250,7 +255,9 @@ class Converter(object): # Upgrade the operator codes for operator_code in data["operator_codes"]: - if not isinstance(operator_code["builtin_code"], unicode): + # Check if builtin_code is the appropriate string type + # use type("") instead of str or unicode. for py2and3 + if not isinstance(operator_code["builtin_code"], type(u"")): raise ValueError("builtin_code %r is non-string. this usually means" "your model has consistency problems." % (operator_code["builtin_code"])) diff --git a/tensorflow/contrib/lite/schema/upgrade_schema_test.py b/tensorflow/contrib/lite/schema/upgrade_schema_test.py index 475cdb9d8b..754400e888 100644 --- a/tensorflow/contrib/lite/schema/upgrade_schema_test.py +++ b/tensorflow/contrib/lite/schema/upgrade_schema_test.py @@ -263,7 +263,7 @@ class TestSchemaUpgrade(test_util.TensorFlowTestCase): invalid_extension = tempfile.mktemp(suffix=".foo") with self.assertRaisesRegexp(ValueError, "Invalid extension on input"): converter.Convert(invalid_extension, invalid_extension) - with tempfile.NamedTemporaryFile(suffix=".json") as in_json: + with tempfile.NamedTemporaryFile(suffix=".json", mode="w+") as in_json: JsonDumpAndFlush(EMPTY_TEST_SCHEMA_V1, in_json) with self.assertRaisesRegexp(ValueError, "Invalid extension on output"): converter.Convert(in_json.name, invalid_extension) @@ -276,10 +276,13 @@ class TestSchemaUpgrade(test_util.TensorFlowTestCase): data_expected: TFLite model as a dictionary (upgraded). """ converter = upgrade_schema_lib.Converter() - with tempfile.NamedTemporaryFile(suffix=".json") as in_json, \ - tempfile.NamedTemporaryFile(suffix=".json") as out_json, \ - tempfile.NamedTemporaryFile(suffix=".bin") as out_bin, \ - tempfile.NamedTemporaryFile(suffix=".tflite") as out_tflite: + with tempfile.NamedTemporaryFile(suffix=".json", mode="w+") as in_json, \ + tempfile.NamedTemporaryFile( + suffix=".json", mode="w+") as out_json, \ + tempfile.NamedTemporaryFile( + suffix=".bin", mode="w+b") as out_bin, \ + tempfile.NamedTemporaryFile( + suffix=".tflite", mode="w+b") as out_tflite: JsonDumpAndFlush(data_old, in_json) # Test JSON output converter.Convert(in_json.name, out_json.name) @@ -287,7 +290,9 @@ class TestSchemaUpgrade(test_util.TensorFlowTestCase): # Convert to .tflite and then to .bin and check if binary is equal converter.Convert(in_json.name, out_tflite.name) converter.Convert(out_tflite.name, out_bin.name) - self.assertEqual(open(out_bin.name).read(), open(out_tflite.name).read()) + self.assertEqual( + open(out_bin.name, "rb").read(), + open(out_tflite.name, "rb").read()) # Test that conversion actually produced successful new json. converted_schema = json.load(out_json) self.assertEqual(converted_schema, data_expected) diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py index d4f05a055a..b538caf281 100644 --- a/tensorflow/python/data/ops/iterator_ops.py +++ b/tensorflow/python/data/ops/iterator_ops.py @@ -49,6 +49,8 @@ class Iterator(object): self._initializer = initializer self._output_types = output_types self._output_shapes = output_shapes + self._string_handle = gen_dataset_ops.iterator_to_string_handle( + self._iterator_resource) @staticmethod def from_structure(output_types, output_shapes=None, shared_name=None): @@ -267,8 +269,11 @@ class Iterator(object): Returns: A scalar `tf.Tensor` of type `tf.string`. """ - return gen_dataset_ops.iterator_to_string_handle( - self._iterator_resource, name=name) + if name is None: + return self._string_handle + else: + return gen_dataset_ops.iterator_to_string_handle( + self._iterator_resource, name=name) @property def output_shapes(self): diff --git a/tensorflow/python/kernel_tests/iterator_ops_test.py b/tensorflow/python/kernel_tests/iterator_ops_test.py index b198fa1754..513c36d64f 100644 --- a/tensorflow/python/kernel_tests/iterator_ops_test.py +++ b/tensorflow/python/kernel_tests/iterator_ops_test.py @@ -396,6 +396,34 @@ class IteratorTest(test.TestCase): sess.run(next_element, feed_dict={handle_placeholder: iterator_4_handle}) + def testIteratorStringHandleReuseTensorObject(self): + dataset = dataset_ops.Dataset.from_tensor_slices([1, 2, 3]) + one_shot_iterator = dataset.make_one_shot_iterator() + initializable_iterator = dataset.make_initializable_iterator() + structure_iterator = iterator_ops.Iterator.from_structure( + dataset.output_types) + + created_ops = len(ops.get_default_graph().get_operations()) + + self.assertIs(one_shot_iterator.string_handle(), + one_shot_iterator.string_handle()) + self.assertIs(initializable_iterator.string_handle(), + initializable_iterator.string_handle()) + self.assertIs(structure_iterator.string_handle(), + structure_iterator.string_handle()) + + # Assert that getting the (default) string handle creates no ops. + self.assertEqual(created_ops, len(ops.get_default_graph().get_operations())) + + # Specifying an explicit name will create a new op. + handle_with_name = one_shot_iterator.string_handle(name="foo") + self.assertEqual("foo", handle_with_name.op.name) + self.assertIsNot(one_shot_iterator.string_handle(), handle_with_name) + + handle_with_same_name = one_shot_iterator.string_handle(name="foo") + self.assertEqual("foo_1", handle_with_same_name.op.name) + self.assertIsNot(handle_with_name, handle_with_same_name) + def testIteratorStringHandleError(self): dataset_int_scalar = (dataset_ops.Dataset.from_tensor_slices([1, 2, 3]).repeat()) -- GitLab From eb85d03241beacfa8b842a511f1a720d39729568 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Fri, 10 Nov 2017 19:01:36 -0800 Subject: [PATCH 0302/1801] Add Py2And3 --- tensorflow/contrib/lite/python/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD index b4aa032ff8..89e8693490 100644 --- a/tensorflow/contrib/lite/python/BUILD +++ b/tensorflow/contrib/lite/python/BUILD @@ -23,6 +23,7 @@ py_library( py_test( name = "lite_test", srcs = ["lite_test.py"], + srcs_version = "PY2AND3", deps = [ ":lite", "//tensorflow/python:array_ops", -- GitLab From 973987fbe2d9448e15f6efa613aae090703457e0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 17:49:41 -0800 Subject: [PATCH 0303/1801] Remove dep to file/base:path PiperOrigin-RevId: 175362138 --- tensorflow/contrib/lite/BUILD | 8 ++++---- .../contrib/lite/models/speech_hotword_model_test.cc | 9 ++++----- .../contrib/lite/models/speech_speakerid_model_test.cc | 8 ++++---- tensorflow/contrib/lite/models/speech_tts_model_test.cc | 8 ++++---- tensorflow/contrib/lite/models/test_utils.h | 6 +++--- 5 files changed, 19 insertions(+), 20 deletions(-) diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD index c58f77cb11..e0c674c295 100644 --- a/tensorflow/contrib/lite/BUILD +++ b/tensorflow/contrib/lite/BUILD @@ -178,7 +178,7 @@ cc_library( deps = select({ "//tensorflow:android": [], "//conditions:default": [ - #"//file/base:path", + "@com_google_absl//absl/strings", "//tensorflow/core:test", ], }), @@ -198,8 +198,8 @@ cc_test( deps = [ ":framework", ":models_test_utils", - #"//file/base:path", "//tensorflow/contrib/lite/kernels:builtin_ops", + "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", ], ) @@ -224,9 +224,9 @@ cc_test( deps = [ ":framework", ":models_test_utils", - #"//file/base:path", "//tensorflow/contrib/lite/kernels:builtin_ops", "//tensorflow/contrib/lite/tools:mutable_op_resolver", + "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", ], ) @@ -261,8 +261,8 @@ cc_test( deps = [ ":framework", ":models_test_utils", - #"//file/base:path", "//tensorflow/contrib/lite/kernels:builtin_ops", + "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", ], ) diff --git a/tensorflow/contrib/lite/models/speech_hotword_model_test.cc b/tensorflow/contrib/lite/models/speech_hotword_model_test.cc index f5d1f436bc..0b8266447a 100644 --- a/tensorflow/contrib/lite/models/speech_hotword_model_test.cc +++ b/tensorflow/contrib/lite/models/speech_hotword_model_test.cc @@ -20,9 +20,9 @@ limitations under the License. #include #include "base/logging.h" -#include "file/base/path.h" #include "testing/base/public/googletest.h" #include +#include "absl/strings/str_cat.h" #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/interpreter.h" #include "tensorflow/contrib/lite/kernels/register.h" @@ -36,7 +36,7 @@ void RunTest(int model_input_tensor, int svdf_layer_state_tensor, int model_output_tensor, const string& model_name, const string& golden_in_name, const string& golden_out_name) { // Read the model. - string tflite_file_path = file::JoinPath(TestDataPath(), model_name); + string tflite_file_path = StrCat(TestDataPath(), "/", model_name); auto model = FlatBufferModel::BuildFromFile(tflite_file_path.c_str()); CHECK(model) << "Failed to read model from file " << tflite_file_path; @@ -53,13 +53,12 @@ void RunTest(int model_input_tensor, int svdf_layer_state_tensor, // Load the input frames. Frames input_frames; - const string input_file_path = file::JoinPath(TestDataPath(), golden_in_name); + const string input_file_path = StrCat(TestDataPath(), "/", golden_in_name); ReadFrames(input_file_path, &input_frames); // Load the golden output results. Frames output_frames; - const string output_file_path = - file::JoinPath(TestDataPath(), golden_out_name); + const string output_file_path = StrCat(TestDataPath(), "/", golden_out_name); ReadFrames(output_file_path, &output_frames); const int speech_batch_size = diff --git a/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc b/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc index 687cfab0b2..9da0fb1fc6 100644 --- a/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc +++ b/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc @@ -20,9 +20,9 @@ limitations under the License. #include #include "base/logging.h" -#include "file/base/path.h" #include "testing/base/public/googletest.h" #include +#include "absl/strings/str_cat.h" #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/interpreter.h" #include "tensorflow/contrib/lite/model.h" @@ -46,7 +46,7 @@ constexpr int kModelOutputTensor = 66; TEST(SpeechSpeakerId, OkGoogleTest) { // Read the model. string tflite_file_path = - file::JoinPath(TestDataPath(), "speech_speakerid_model.tflite"); + StrCat(TestDataPath(), "/", "speech_speakerid_model.tflite"); auto model = FlatBufferModel::BuildFromFile(tflite_file_path.c_str()); CHECK(model) << "Failed to read model from file " << tflite_file_path; @@ -61,13 +61,13 @@ TEST(SpeechSpeakerId, OkGoogleTest) { // Load the input frames. Frames input_frames; const string input_file_path = - file::JoinPath(TestDataPath(), "speech_speakerid_model_in.csv"); + StrCat(TestDataPath(), "/", "speech_speakerid_model_in.csv"); ReadFrames(input_file_path, &input_frames); // Load the golden output results. Frames output_frames; const string output_file_path = - file::JoinPath(TestDataPath(), "speech_speakerid_model_out.csv"); + StrCat(TestDataPath(), "/", "speech_speakerid_model_out.csv"); ReadFrames(output_file_path, &output_frames); const int speech_batch_size = diff --git a/tensorflow/contrib/lite/models/speech_tts_model_test.cc b/tensorflow/contrib/lite/models/speech_tts_model_test.cc index e6f2673a42..8829177689 100644 --- a/tensorflow/contrib/lite/models/speech_tts_model_test.cc +++ b/tensorflow/contrib/lite/models/speech_tts_model_test.cc @@ -20,9 +20,9 @@ limitations under the License. #include #include "base/logging.h" -#include "file/base/path.h" #include "testing/base/public/googletest.h" #include +#include "absl/strings/str_cat.h" #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/interpreter.h" #include "tensorflow/contrib/lite/kernels/register.h" @@ -45,7 +45,7 @@ constexpr int kModelOutputTensor = 74; TEST(SpeechTTS, RandomIOTest) { // Read the model. string tflite_file_path = - file::JoinPath(TestDataPath(), "speech_tts_model.tflite"); + StrCat(TestDataPath(), "/", "speech_tts_model.tflite"); auto model = FlatBufferModel::BuildFromFile(tflite_file_path.c_str()); CHECK(model) << "Failed to mmap model " << tflite_file_path; @@ -59,13 +59,13 @@ TEST(SpeechTTS, RandomIOTest) { // Load the input frames. Frames input_frames; const string input_file_path = - file::JoinPath(TestDataPath(), "speech_tts_model_in.csv"); + StrCat(TestDataPath(), "/", "speech_tts_model_in.csv"); ReadFrames(input_file_path, &input_frames); // Load the golden output results. Frames output_frames; const string output_file_path = - file::JoinPath(TestDataPath(), "speech_tts_model_out.csv"); + StrCat(TestDataPath(), "/", "speech_tts_model_out.csv"); ReadFrames(output_file_path, &output_frames); const int speech_batch_size = diff --git a/tensorflow/contrib/lite/models/test_utils.h b/tensorflow/contrib/lite/models/test_utils.h index b2596babd0..1e14c26a35 100644 --- a/tensorflow/contrib/lite/models/test_utils.h +++ b/tensorflow/contrib/lite/models/test_utils.h @@ -30,12 +30,12 @@ using Frames = std::vector>; } // namespace tflite #ifndef __ANDROID__ -#include "file/base/path.h" +#include "absl/strings/str_cat.h" #include "tensorflow/core/platform/test.h" inline string TestDataPath() { - return string(file::JoinPath(tensorflow::testing::TensorFlowSrcRoot(), - "contrib/lite/models/testdata/")); + return string(StrCat(tensorflow::testing::TensorFlowSrcRoot(), "/", + "contrib/lite/models/testdata/")); } inline int TestInputSize(const tflite::models::Frames& input_frames) { return input_frames.size(); -- GitLab From 03c150d3d3cd00c8fbcb7e84b5fa1db08256ed3c Mon Sep 17 00:00:00 2001 From: Neal Wu Date: Fri, 10 Nov 2017 19:16:53 -0800 Subject: [PATCH 0304/1801] Minor cleanup of links in the performance guides PiperOrigin-RevId: 175368372 --- tensorflow/docs_src/performance/performance_guide.md | 2 +- tensorflow/docs_src/performance/performance_models.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/docs_src/performance/performance_guide.md b/tensorflow/docs_src/performance/performance_guide.md index da556bd848..17f71a6d77 100644 --- a/tensorflow/docs_src/performance/performance_guide.md +++ b/tensorflow/docs_src/performance/performance_guide.md @@ -127,7 +127,7 @@ Reading large numbers of small files significantly impacts I/O performance. One approach to get maximum I/O throughput is to preprocess input data into larger (~100MB) `TFRecord` files. For smaller data sets (200MB-1GB), the best approach is often to load the entire data set into memory. The document -[Downloading and converting to TFRecord format](https://github.com/tensorflow/models/tree/master/research/slim#Data) +[Downloading and converting to TFRecord format](https://github.com/tensorflow/models/tree/master/research/slim#downloading-and-converting-to-tfrecord-format) includes information and scripts for creating `TFRecords` and this [script](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10_estimator/generate_cifar10_tfrecords.py) converts the CIFAR-10 data set into `TFRecords`. diff --git a/tensorflow/docs_src/performance/performance_models.md b/tensorflow/docs_src/performance/performance_models.md index fcda19e74c..359b0e904d 100644 --- a/tensorflow/docs_src/performance/performance_models.md +++ b/tensorflow/docs_src/performance/performance_models.md @@ -29,8 +29,8 @@ implementation is made up of 3 stages: The dominant part of each stage is executed in parallel with the other stages using `data_flow_ops.StagingArea`. `StagingArea` is a queue-like operator -similar to @{tf.FIFOQueue}. The difference is that `StagingArea` does not -guarantee FIFO ordering, but offers simpler functionality and can be executed +similar to @{tf.FIFOQueue}. The difference is that `StagingArea` does not +guarantee FIFO ordering, but offers simpler functionality and can be executed on both CPU and GPU in parallel with other stages. Breaking the input pipeline into 3 stages that operate independently in parallel is scalable and takes full advantage of large multi-core environments. The rest of this section details @@ -344,7 +344,7 @@ executing the main script `alexnet`. * **`num_gpus`**: Number of GPUs to use. * **`data_dir`**: Path to data to process. If not set, synthetic data is used. - To use Imagenet data use these + To use ImageNet data use these [instructions](https://github.com/tensorflow/models/tree/master/research/inception#getting-started) as a starting point. * **`batch_size`**: Batch size for each GPU. -- GitLab From c1fbf06b79f6067c4287afb1bc1a22f8c6584e98 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Fri, 10 Nov 2017 19:54:30 -0800 Subject: [PATCH 0305/1801] Revert "Register int64 for GPU StridedSlice kernel" This reverts commit 6bc5375cb07d8d595411ec0516d29314053a8e83. --- tensorflow/core/kernels/strided_slice_op.cc | 1 - tensorflow/core/kernels/strided_slice_op_impl.h | 2 -- tensorflow/python/kernel_tests/array_ops_test.py | 13 +------------ 3 files changed, 1 insertion(+), 15 deletions(-) diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc index 73b6d4cf6a..8fc40db3cc 100644 --- a/tensorflow/core/kernels/strided_slice_op.cc +++ b/tensorflow/core/kernels/strided_slice_op.cc @@ -427,7 +427,6 @@ REGISTER_STRIDED_SLICE(bfloat16); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); -TF_CALL_int64(REGISTER_GPU); // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel diff --git a/tensorflow/core/kernels/strided_slice_op_impl.h b/tensorflow/core/kernels/strided_slice_op_impl.h index a39fdff954..7d42887426 100644 --- a/tensorflow/core/kernels/strided_slice_op_impl.h +++ b/tensorflow/core/kernels/strided_slice_op_impl.h @@ -283,7 +283,6 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_FOR_N_GPU); TF_CALL_complex64(DECLARE_FOR_N_GPU); TF_CALL_complex128(DECLARE_FOR_N_GPU); DECLARE_FOR_N_GPU(int32); -DECLARE_FOR_N_GPU(int64); #endif // END GOOGLE_CUDA TF_CALL_ALL_TYPES(DECLARE_FOR_N_CPU); @@ -299,7 +298,6 @@ DECLARE_FOR_N_CPU(bfloat16); TF_CALL_SYCL_PROXY_TYPES(PREVENT_FOR_N_SYCL); TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DECLARE_FOR_N_SYCL); DECLARE_FOR_N_SYCL(int32); -DECLARE_FOR_N_SYCL(int64); #undef DECLARE_FOR_N_SYCL #endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 612f2c0a72..76b80e60ea 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -505,7 +505,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase): _ = checker2[...] _ = checker2[tuple()] - def testFloatSlicedArrayAndInt64IndicesGPU(self): + def testInt64GPU(self): if not test_util.is_gpu_available(): self.skipTest("No GPU available") with self.test_session(use_gpu=True, force_gpu=True): @@ -516,17 +516,6 @@ class StridedSliceTest(test_util.TensorFlowTestCase): s = array_ops.strided_slice(x, begin, end, strides) self.assertAllEqual([3.], self.evaluate(s)) - def testInt64SlicedArrayAndIndicesGPU(self): - if not test_util.is_gpu_available(): - self.skipTest("No GPU available") - with self.test_session(use_gpu=True, force_gpu=True): - x = constant_op.constant([1, 2, 3], dtype=dtypes.int64) - begin = constant_op.constant([2], dtype=dtypes.int64) - end = constant_op.constant([3], dtype=dtypes.int64) - strides = constant_op.constant([1], dtype=dtypes.int64) - s = array_ops.strided_slice(x, begin, end, strides) - self.assertAllEqual([3], self.evaluate(s)) - def testDegenerateSlices(self): with self.test_session(use_gpu=True): checker = StridedSliceChecker(self, StridedSliceChecker.REF_TENSOR) -- GitLab From 604e9af105aac81cafc3202dd1c284cc1ff3c750 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Fri, 10 Nov 2017 19:54:50 -0800 Subject: [PATCH 0306/1801] Holdout dataset tests that from pip. --- tensorflow/contrib/data/python/kernel_tests/BUILD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 6723f92e08..81e63d8df9 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -365,6 +365,7 @@ py_test( size = "small", srcs = ["sequence_dataset_op_test.py"], srcs_version = "PY2AND3", + tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", @@ -429,6 +430,7 @@ py_test( size = "small", srcs = ["zip_dataset_op_test.py"], srcs_version = "PY2AND3", + tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", -- GitLab From 9c6eebabc71906d240338adc89fa838bd5635aa0 Mon Sep 17 00:00:00 2001 From: HyoukJoong Lee Date: Fri, 10 Nov 2017 19:55:36 -0800 Subject: [PATCH 0307/1801] Add comment on HloPtrComparator. PiperOrigin-RevId: 175370054 --- tensorflow/compiler/xla/service/hlo_instruction.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 438d8bb35b..90293016ab 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -1250,6 +1250,9 @@ std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind); // To make the iteration order over the map deterministic, the comparator // should not be using the pointer values, but rather an intrinsic property of // the hlo. +// +// Note that this cannot be used for HLO instructions across multiple modules +// since the id of HLO instructions are only unique within each HLO module. struct HloPtrComparator { bool operator()(const HloInstruction* const& lhs, const HloInstruction* const& rhs) const { -- GitLab From e0249a0589cda4fec00a88b7b3e5de497c08d3ea Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 20:07:21 -0800 Subject: [PATCH 0308/1801] Further BUILD cleanup in contrib/... PiperOrigin-RevId: 175370768 --- tensorflow/contrib/bayesflow/BUILD | 4 ---- tensorflow/contrib/data/BUILD | 13 +++++-------- .../contrib/data/python/kernel_tests/BUILD | 17 ++++++++--------- tensorflow/contrib/data/python/ops/BUILD | 9 ++------- tensorflow/contrib/eager/python/BUILD | 12 +++++++----- .../eager/python/examples/rnn_colorbot/BUILD | 1 + .../contrib/eager/python/examples/rnn_ptb/BUILD | 2 ++ .../contrib/kfac/python/kernel_tests/BUILD | 1 - tensorflow/contrib/lite/toco/BUILD | 2 +- .../model_pruning/examples/cifar10/BUILD | 5 +++++ tensorflow/contrib/quantize/BUILD | 2 ++ tensorflow/contrib/rnn/BUILD | 10 ++++------ tensorflow/contrib/signal/BUILD | 1 - tensorflow/contrib/specs/BUILD | 1 + tensorflow/contrib/summary/BUILD | 2 ++ tensorflow/contrib/tensor_forest/BUILD | 6 ++---- tensorflow/contrib/tensorboard/db/BUILD | 2 -- tensorflow/contrib/timeseries/BUILD | 5 +---- .../python/timeseries/state_space_models/BUILD | 3 +++ tensorflow/tools/pip_package/BUILD | 4 ++++ 20 files changed, 50 insertions(+), 52 deletions(-) diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index 213ae01c3b..b024f158cd 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -21,7 +21,6 @@ py_library( deps = [ "//tensorflow/contrib/framework:framework_py", "//tensorflow/python:array_ops", - "//tensorflow/python:check_ops", "//tensorflow/python:control_flow_ops", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:functional_ops", @@ -32,12 +31,9 @@ py_library( "//tensorflow/python:platform", "//tensorflow/python:random_ops", "//tensorflow/python:state_ops", - "//tensorflow/python:training", "//tensorflow/python:util", - "//tensorflow/python:variable_scope", "//tensorflow/python/ops/distributions", "//third_party/py/numpy", - "@six_archive//:six", ], ) diff --git a/tensorflow/contrib/data/BUILD b/tensorflow/contrib/data/BUILD index eaede0e00e..f7d8a084d9 100644 --- a/tensorflow/contrib/data/BUILD +++ b/tensorflow/contrib/data/BUILD @@ -17,7 +17,6 @@ py_library( deps = [ "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/contrib/data/python/ops:iterator_ops", - "//tensorflow/contrib/data/python/ops:prefetching_py", "//tensorflow/contrib/data/python/ops:readers", "//tensorflow/contrib/data/python/ops:transformation_ops", "//tensorflow/python:util", @@ -27,12 +26,8 @@ py_library( tf_custom_op_library( name = "_prefetching_ops.so", - srcs = [ - "ops/prefetching_ops.cc", - ], - deps = [ - "//tensorflow/contrib/data/kernels:prefetching_kernels", - ], + srcs = ["ops/prefetching_ops.cc"], + deps = ["//tensorflow/contrib/data/kernels:prefetching_kernels"], ) tf_gen_op_libs( @@ -42,7 +37,9 @@ tf_gen_op_libs( filegroup( name = "all_files", srcs = glob( - ["**/*"], + include = [ + "**/*", + ], exclude = [ "**/METADATA", "**/OWNERS", diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index d811683ecd..552958b986 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -266,7 +266,6 @@ py_test( srcs_version = "PY2AND3", deps = [ "//tensorflow/contrib/data/python/ops:dataset_ops", - "//tensorflow/contrib/data/python/ops:iterator_ops", "//tensorflow/contrib/data/python/ops:transformation_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -274,7 +273,6 @@ py_test( "//tensorflow/python:data_flow_ops", "//tensorflow/python:dtypes", "//tensorflow/python:errors", - "//tensorflow/python:framework_ops", "//tensorflow/python:functional_ops", "//tensorflow/python:io_ops", "//tensorflow/python:lookup_ops", @@ -282,7 +280,6 @@ py_test( "//tensorflow/python:random_ops", "//tensorflow/python:script_ops", "//tensorflow/python:string_ops", - "//tensorflow/python:training", "//tensorflow/python:util", "//tensorflow/python:variable_scope", "//third_party/py/numpy", @@ -431,14 +428,10 @@ py_test( deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", - "//tensorflow/contrib/data/python/ops:iterator_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:dtypes", "//tensorflow/python:errors", - "//tensorflow/python:framework_ops", - "//tensorflow/python:training", - "//tensorflow/python/data/util:nest", "//third_party/py/numpy", ], ) @@ -450,20 +443,26 @@ py_test( srcs_version = "PY2AND3", tags = ["no_oss"], # b/68785503 deps = [ - "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/contrib/data/python/ops:prefetching_py", "//tensorflow/core:protos_all_py", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:function", "//tensorflow/python:resource_variable_ops", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/ops:iterator_ops", ], ) filegroup( name = "all_files", srcs = glob( - ["**/*"], + include = [ + "**/*", + ], exclude = [ "**/METADATA", "**/OWNERS", diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index 1b81cf5be9..6d64fb03e7 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -19,6 +19,7 @@ py_library( srcs_version = "PY2AND3", deps = [ ":transformation_ops", + "//tensorflow/python:dataset_ops_gen", "//tensorflow/python:util", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/util:nest", @@ -84,6 +85,7 @@ py_library( "//tensorflow/python:random_ops", "//tensorflow/python:tensor_shape", "//tensorflow/python:tensor_util", + "//tensorflow/python:util", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/util:nest", "//third_party/py/numpy", @@ -117,14 +119,7 @@ tf_custom_op_py_library( deps = [ ":prefetching_ops", "//tensorflow/contrib/util:util_py", - "//tensorflow/python:client_testlib", - "//tensorflow/python:errors", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:math_ops", "//tensorflow/python:platform", - "//tensorflow/python:state_ops", - "//tensorflow/python:variable_scope", - "//tensorflow/python:variables", ], ) diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 2b84bc2e9b..6783f7beb0 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -12,16 +12,15 @@ py_library( visibility = ["//visibility:public"], deps = [ ":datasets", - ":evaluator", ":metrics", ":network", ":saver", - ":summary_writer", "//tensorflow/python:framework_ops", "//tensorflow/python:framework_test_lib", "//tensorflow/python:numerics", "//tensorflow/python:resource_variable_ops", "//tensorflow/python:util", + "//tensorflow/python:variable_scope", "//tensorflow/python/eager:backprop", "//tensorflow/python/eager:context", "//tensorflow/python/eager:core", @@ -165,11 +164,9 @@ py_test( ":metrics", "//tensorflow/contrib/summary:summary_ops", "//tensorflow/contrib/summary:summary_test_util", - "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", "//tensorflow/python:dtypes", - "//tensorflow/python:lib", - "//tensorflow/python:platform", + "//tensorflow/python:framework_ops", "//tensorflow/python:training", "//tensorflow/python/eager:context", "//tensorflow/python/eager:test", @@ -219,8 +216,11 @@ py_library( srcs_version = "PY2AND3", visibility = ["//tensorflow:internal"], deps = [ + "//tensorflow/python:framework_ops", "//tensorflow/python:layers_base", + "//tensorflow/python:training", "//tensorflow/python:variable_scope", + "//tensorflow/python/eager:context", "//tensorflow/python/estimator:util", ], ) @@ -232,11 +232,13 @@ py_test( deps = [ ":network", "//tensorflow/python:constant_op", + "//tensorflow/python:errors", "//tensorflow/python:framework_test_lib", "//tensorflow/python:layers", "//tensorflow/python:math_ops", "//tensorflow/python:nn_ops", "//tensorflow/python:resource_variable_ops", + "//tensorflow/python:training", "//tensorflow/python:variable_scope", "//tensorflow/python/eager:test", ], diff --git a/tensorflow/contrib/eager/python/examples/rnn_colorbot/BUILD b/tensorflow/contrib/eager/python/examples/rnn_colorbot/BUILD index b657d31f35..f83eb5c476 100644 --- a/tensorflow/contrib/eager/python/examples/rnn_colorbot/BUILD +++ b/tensorflow/contrib/eager/python/examples/rnn_colorbot/BUILD @@ -11,6 +11,7 @@ py_binary( deps = [ "//tensorflow:tensorflow_py", "//tensorflow/contrib/eager/python:tfe", + "//tensorflow/python/eager:context", "@six_archive//:six", ], ) diff --git a/tensorflow/contrib/eager/python/examples/rnn_ptb/BUILD b/tensorflow/contrib/eager/python/examples/rnn_ptb/BUILD index db2587bf2c..4b4792cd49 100644 --- a/tensorflow/contrib/eager/python/examples/rnn_ptb/BUILD +++ b/tensorflow/contrib/eager/python/examples/rnn_ptb/BUILD @@ -10,7 +10,9 @@ py_binary( srcs_version = "PY2AND3", deps = [ "//tensorflow:tensorflow_py", + "//tensorflow/contrib/cudnn_rnn:cudnn_rnn_py", "//tensorflow/contrib/eager/python:tfe", + "//third_party/py/numpy", ], ) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/BUILD b/tensorflow/contrib/kfac/python/kernel_tests/BUILD index 5b7747b0a1..60c245166d 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/BUILD +++ b/tensorflow/contrib/kfac/python/kernel_tests/BUILD @@ -88,7 +88,6 @@ py_test( deps = [ "//tensorflow/contrib/kfac/python/ops:kfac_optimizer", "//tensorflow/contrib/kfac/python/ops:layer_collection", - "//tensorflow/contrib/kfac/python/ops:loss_functions", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_ops", diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD index 05e77c330c..77d381c1c5 100644 --- a/tensorflow/contrib/lite/toco/BUILD +++ b/tensorflow/contrib/lite/toco/BUILD @@ -75,7 +75,6 @@ cc_library( ":runtime", ":toco_port", "//tensorflow/core:lib", - "@com_google_absl//absl/base:core_headers", "@protobuf_archive//:protobuf_headers", ], ) @@ -144,6 +143,7 @@ cc_library( "toco_types.h", ], deps = [ + "//tensorflow/core:framework_lite", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", ] + select({ diff --git a/tensorflow/contrib/model_pruning/examples/cifar10/BUILD b/tensorflow/contrib/model_pruning/examples/cifar10/BUILD index 299278ae75..e7848adcc5 100644 --- a/tensorflow/contrib/model_pruning/examples/cifar10/BUILD +++ b/tensorflow/contrib/model_pruning/examples/cifar10/BUILD @@ -39,6 +39,7 @@ py_library( deps = [ ":cifar10_input", "//tensorflow:tensorflow_py", + "//tensorflow/contrib/model_pruning:pruning", ], ) @@ -50,6 +51,8 @@ py_binary( srcs_version = "PY2AND3", deps = [ ":cifar10_pruning", + "//tensorflow:tensorflow_py", + "//third_party/py/numpy", ], ) @@ -61,6 +64,8 @@ py_binary( srcs_version = "PY2AND3", deps = [ ":cifar10_pruning", + "//tensorflow:tensorflow_py", + "//tensorflow/contrib/model_pruning:pruning", ], ) diff --git a/tensorflow/contrib/quantize/BUILD b/tensorflow/contrib/quantize/BUILD index 45a98c7f85..389e26cca3 100644 --- a/tensorflow/contrib/quantize/BUILD +++ b/tensorflow/contrib/quantize/BUILD @@ -184,9 +184,11 @@ py_test( ":quantize", "//tensorflow/contrib/layers:layers_py", "//tensorflow/python:array_ops", + "//tensorflow/python:control_flow_ops", "//tensorflow/python:framework_ops", "//tensorflow/python:framework_test_lib", "//tensorflow/python:init_ops", + "//tensorflow/python:math_ops", "//tensorflow/python:nn_ops", "//tensorflow/python:platform_test", ], diff --git a/tensorflow/contrib/rnn/BUILD b/tensorflow/contrib/rnn/BUILD index b70a5bbcd1..7e5e35d0b5 100644 --- a/tensorflow/contrib/rnn/BUILD +++ b/tensorflow/contrib/rnn/BUILD @@ -188,6 +188,8 @@ tf_py_test( "//tensorflow/python:gradients", "//tensorflow/python:init_ops", "//tensorflow/python:platform_test", + "//tensorflow/python:rnn", + "//tensorflow/python:rnn_cell", "//tensorflow/python:variable_scope", "//tensorflow/python:variables", ], @@ -227,9 +229,7 @@ tf_custom_op_library( "kernels/lstm_ops_gpu.cu.cc", "kernels/lstm_ops.h", ], - deps = [ - "//tensorflow/core/kernels:eigen_helpers", - ], + deps = ["//tensorflow/core/kernels:eigen_helpers"], ) tf_gen_op_wrapper_py( @@ -251,9 +251,7 @@ tf_custom_op_library( "kernels/gru_ops_gpu.cu.cc", "kernels/gru_ops.h", ], - deps = [ - "//tensorflow/core/kernels:eigen_helpers", - ], + deps = ["//tensorflow/core/kernels:eigen_helpers"], ) tf_gen_op_wrapper_py( diff --git a/tensorflow/contrib/signal/BUILD b/tensorflow/contrib/signal/BUILD index b67090dd50..a83fc20596 100644 --- a/tensorflow/contrib/signal/BUILD +++ b/tensorflow/contrib/signal/BUILD @@ -12,7 +12,6 @@ py_library( srcs = ["__init__.py"] + glob(["python/ops/*.py"]), srcs_version = "PY2AND3", deps = [ - ":test_util", "//tensorflow/python:array_ops", "//tensorflow/python:constant_op", "//tensorflow/python:control_flow_ops", diff --git a/tensorflow/contrib/specs/BUILD b/tensorflow/contrib/specs/BUILD index 6102fac7bd..4b688690ae 100644 --- a/tensorflow/contrib/specs/BUILD +++ b/tensorflow/contrib/specs/BUILD @@ -45,6 +45,7 @@ tf_py_test( "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:init_ops", + "//tensorflow/python:math_ops", "//tensorflow/python:variables", ], ) diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD index 3c60d2bb56..d1beafcb28 100644 --- a/tensorflow/contrib/summary/BUILD +++ b/tensorflow/contrib/summary/BUILD @@ -50,9 +50,11 @@ py_library( ":gen_summary_ops", "//tensorflow/python:array_ops", "//tensorflow/python:constant_op", + "//tensorflow/python:control_flow_ops", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", "//tensorflow/python:layers_base", + "//tensorflow/python:math_ops", "//tensorflow/python:resource_variable_ops", "//tensorflow/python:summary_op_util", "//tensorflow/python:training", diff --git a/tensorflow/contrib/tensor_forest/BUILD b/tensorflow/contrib/tensor_forest/BUILD index 878415604e..f54daa7125 100644 --- a/tensorflow/contrib/tensor_forest/BUILD +++ b/tensorflow/contrib/tensor_forest/BUILD @@ -200,11 +200,8 @@ py_library( # Model Ops. cc_library( name = "model_ops_lib", - srcs = [ - "kernels/model_ops.cc", - ], + srcs = ["kernels/model_ops.cc"], deps = [ - "//third_party/eigen3", "//tensorflow/contrib/tensor_forest:tree_utils", "//tensorflow/contrib/tensor_forest/kernels/v4:decision-tree-resource", "//tensorflow/contrib/tensor_forest/kernels/v4:input_data", @@ -269,6 +266,7 @@ tf_custom_op_py_library( srcs_version = "PY2AND3", deps = [ ":gen_model_ops_py", + ":stats_ops_py", "//tensorflow/contrib/util:util_py", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:platform", diff --git a/tensorflow/contrib/tensorboard/db/BUILD b/tensorflow/contrib/tensorboard/db/BUILD index 068e862650..9d3d60c24d 100644 --- a/tensorflow/contrib/tensorboard/db/BUILD +++ b/tensorflow/contrib/tensorboard/db/BUILD @@ -22,10 +22,8 @@ tf_cc_test( srcs = ["schema_test.cc"], deps = [ ":schema", - "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", - "//tensorflow/core/lib/db:sqlite", ], ) diff --git a/tensorflow/contrib/timeseries/BUILD b/tensorflow/contrib/timeseries/BUILD index b4ecb61a42..6ba069778c 100644 --- a/tensorflow/contrib/timeseries/BUILD +++ b/tensorflow/contrib/timeseries/BUILD @@ -14,11 +14,8 @@ py_library( ], srcs_version = "PY2AND3", deps = [ - "//tensorflow/contrib/timeseries/python/timeseries:estimators", - "//tensorflow/contrib/timeseries/python/timeseries:feature_keys", - "//tensorflow/contrib/timeseries/python/timeseries:input_pipeline", "//tensorflow/contrib/timeseries/python/timeseries:py_init", - "//tensorflow/contrib/timeseries/python/timeseries:saved_model_utils", + "//tensorflow/python:util", ], ) diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD index d0deedc50f..c86d06e923 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD @@ -92,10 +92,12 @@ tf_py_test( additional_deps = [ ":kalman_filter", "//third_party/py/numpy", + "//tensorflow/contrib/timeseries/python/timeseries:math_utils", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", "//tensorflow/python:dtypes", + "//tensorflow/python:linalg_ops", "//tensorflow/python:math_ops", ], ) @@ -210,6 +212,7 @@ tf_py_test( name = "varma_test", srcs = ["varma_test.py"], additional_deps = [ + ":state_space_model", ":varma", "//tensorflow/contrib/timeseries/python/timeseries:feature_keys", "//tensorflow/python:client", diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index a3ab40ceef..e3cbd67721 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -153,7 +153,10 @@ sh_binary( "//tensorflow:tensorflow_py", "//tensorflow/contrib/boosted_trees:boosted_trees_pip", "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip", + "//tensorflow/contrib/data/python/ops:prefetching_py", "//tensorflow/contrib/eager/python/examples:examples_pip", + "//tensorflow/contrib/eager/python:evaluator", + "//tensorflow/contrib/eager/python:summary_writer", "//tensorflow/contrib/gan:gan", "//tensorflow/contrib/graph_editor:graph_editor_pip", "//tensorflow/contrib/keras:keras", @@ -167,6 +170,7 @@ sh_binary( "//tensorflow/contrib/receptive_field:receptive_field_pip", "//tensorflow/contrib/session_bundle:session_bundle_pip", "//tensorflow/contrib/signal:signal_py", + "//tensorflow/contrib/signal:test_util", "//tensorflow/contrib/slim:slim", "//tensorflow/contrib/slim/python/slim/data:data_pip", "//tensorflow/contrib/slim/python/slim/nets:nets_pip", -- GitLab From d84e8eefc597b1f3391081d69ca7afe3728df65a Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Fri, 10 Nov 2017 20:17:48 -0800 Subject: [PATCH 0309/1801] Fix a bug where op name (instead of node name) should be used. PiperOrigin-RevId: 175371296 --- .../grappler/optimizers/layout_optimizer.cc | 2 +- .../optimizers/layout_optimizer_test.cc | 31 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc index e2e4bc3de8..ba5d13eeaf 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc @@ -673,7 +673,7 @@ class AgnosticNodeProcessor : public NodeProcessor { return true; } bool connected = - ops_format_agnostic.find(node->name()) != ops_format_agnostic.end(); + ops_format_agnostic.find(node->op()) != ops_format_agnostic.end(); if (!connected) { return false; } diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc index e9febd7e18..b760cf2ff2 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc @@ -228,6 +228,37 @@ TEST_F(LayoutOptimizerTest, Pad) { test::ExpectTensorEqual(tensor_expected, tensor); } +TEST_F(LayoutOptimizerTest, Connectivity) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto i1 = ops::Identity(s.WithOpName("i1"), conv); + auto i2 = ops::Identity(s.WithOpName("i2"), i1); + auto i3 = ops::Identity(s.WithOpName("i3"), i2); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + // Make the graph not in topological order to test the handling of multi-hop + // connectivity (here we say two nodes are connected if all nodes in the + // middle are layout agnostic). If the graph is already in topological order, + // the problem is easier, where layout optimizer only needs to check + // single-hop connectivity. + NodeMap node_map_original(&item.graph); + auto node_i1 = node_map_original.GetNode("i1"); + auto node_i2 = node_map_original.GetNode("i2"); + node_i2->Swap(node_i1); + LayoutOptimizer optimizer; + optimizer.set_num_gpus(1); + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + NodeMap node_map_output(&output); + auto node_i2_output = node_map_output.GetNode("i2"); + // Layout optimizer should process i2, as it detects i2 is connected with the + // Conv2D node two hops away. Similarly i1 is processed as well, as i1 is + // directly connected to the Conv2D node. The two added transposes between + // i1 and i2 should cancel each other, and as a result i2 is directly + // connected to i1. + EXPECT_EQ(node_i2_output->input(0), "i1"); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 736718d7acdae6f66b3726561048eaaa4eefb458 Mon Sep 17 00:00:00 2001 From: CSJY Date: Sat, 11 Nov 2017 12:32:18 +0800 Subject: [PATCH 0310/1801] remove redundant code --- tensorflow/core/kernels/slice_op.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc index 28a379774b..2faa1a24b1 100644 --- a/tensorflow/core/kernels/slice_op.cc +++ b/tensorflow/core/kernels/slice_op.cc @@ -262,7 +262,6 @@ class MklSliceOp : public OpKernel { HANDLE_DIM(1); HANDLE_DIM(2); HANDLE_DIM(3); - HANDLE_DIM(4); HANDLE_DIM(5); HANDLE_DIM(6); -- GitLab From 01e8710e3902a26692ac8bec32e41a8ffbf3b110 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 21:21:11 -0800 Subject: [PATCH 0311/1801] Change a hlo parser test. PiperOrigin-RevId: 175374316 --- tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index bed912d921..f41bb9e5cf 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -846,7 +846,7 @@ TEST_F(HloParserTest, CommaBetweenSubAttributes) { const string original = R"(HloModule test_comma_module: ENTRY %test_comma.v4 () -> f32[] { - ROOT %constant = f32[] constant(-4.2), metadata={source_line=5, op_type="::const"} + ROOT %constant = f32[] constant(-4.2), metadata={source_line=5, op_type="const"} } )"; -- GitLab From 07ac134a4b2fa6f40f7fa8f7266b854a529ddcab Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Fri, 10 Nov 2017 22:14:36 -0800 Subject: [PATCH 0312/1801] Fix //tensorflow/contrib/lite/tools:gen_op_registration bazel build. PiperOrigin-RevId: 175376892 --- tensorflow/contrib/lite/tools/BUILD | 1 + tensorflow/contrib/lite/tools/gen_op_registration.cc | 3 ++- tensorflow/contrib/lite/tools/gen_op_registration.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/tools/BUILD b/tensorflow/contrib/lite/tools/BUILD index 2d918fd4e8..3c9cec5d16 100644 --- a/tensorflow/contrib/lite/tools/BUILD +++ b/tensorflow/contrib/lite/tools/BUILD @@ -20,6 +20,7 @@ cc_library( hdrs = ["gen_op_registration.h"], deps = [ "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite:string", "@com_googlesource_code_re2//:re2", ], ) diff --git a/tensorflow/contrib/lite/tools/gen_op_registration.cc b/tensorflow/contrib/lite/tools/gen_op_registration.cc index 57c2567e3b..d80ea59170 100644 --- a/tensorflow/contrib/lite/tools/gen_op_registration.cc +++ b/tensorflow/contrib/lite/tools/gen_op_registration.cc @@ -15,8 +15,9 @@ limitations under the License. #include #include -#include "third_party/re2/re2.h" +#include "re2/re2.h" #include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/tools/gen_op_registration.h" namespace tflite { diff --git a/tensorflow/contrib/lite/tools/gen_op_registration.h b/tensorflow/contrib/lite/tools/gen_op_registration.h index 363bb2335c..318859e23d 100644 --- a/tensorflow/contrib/lite/tools/gen_op_registration.h +++ b/tensorflow/contrib/lite/tools/gen_op_registration.h @@ -16,6 +16,7 @@ limitations under the License. #define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOOLS_GEN_OP_REGISTRATION_H_ #include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/string.h" namespace tflite { -- GitLab From 911784813cc6e188fcf06a3064b6d8bdbd665f30 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Fri, 10 Nov 2017 20:49:54 -0800 Subject: [PATCH 0313/1801] Make linker script and platform visible to toco targets. --- tensorflow/BUILD | 7 +++++++ tensorflow/python/BUILD | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 54688e84d1..29b726c1a3 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -774,3 +774,10 @@ tf_cc_shared_object( "//tensorflow/core:tensorflow", ], ) + +exports_files( + [ + "tf_version_script.lds", + "tf_exported_symbols.lds", + ], +) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 76477384de..bec94cbcd5 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -45,6 +45,7 @@ py_library( "//tensorflow/compiler/aot/tests:__pkg__", # TODO(b/34059704): remove when fixed "//tensorflow/contrib/learn:__pkg__", # TODO(b/34059704): remove when fixed "//tensorflow/contrib/learn/python/learn/datasets:__pkg__", # TODO(b/34059704): remove when fixed + "//tensorflow/contrib/lite/toco/python:__pkg__", # TODO(b/34059704): remove when fixed "//tensorflow/python/debug:__pkg__", # TODO(b/34059704): remove when fixed "//tensorflow/python/tools:__pkg__", # TODO(b/34059704): remove when fixed "//tensorflow/tools/api/generator:__pkg__", @@ -133,6 +134,10 @@ py_library( "@absl_py//absl/flags", "@six_archive//:six", ], + visibility = [ + # TODO(b/34059704): remove when fixed + "//tensorflow/contrib/lite/toco/python:__pkg__", + ] ) py_library( -- GitLab From 677b9fcdcbae8d7fa1f9731327ff9b7982e463ca Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Fri, 10 Nov 2017 21:50:04 -0800 Subject: [PATCH 0314/1801] Fix XLA parsing failure. --- tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index bed912d921..f41bb9e5cf 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -846,7 +846,7 @@ TEST_F(HloParserTest, CommaBetweenSubAttributes) { const string original = R"(HloModule test_comma_module: ENTRY %test_comma.v4 () -> f32[] { - ROOT %constant = f32[] constant(-4.2), metadata={source_line=5, op_type="::const"} + ROOT %constant = f32[] constant(-4.2), metadata={source_line=5, op_type="const"} } )"; -- GitLab From 930125481d244e9dfea73174f2b6eb2284b80586 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Sat, 11 Nov 2017 01:02:08 -0800 Subject: [PATCH 0315/1801] Don't tromp on default visibility. --- tensorflow/python/BUILD | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index bec94cbcd5..3020a9b431 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -6,6 +6,7 @@ package( "//engedu/ml/tf_from_scratch:__pkg__", "//tensorflow:internal", "//tensorflow_models:__subpackages__", + "//tensorflow/contrib/lite/toco/python:__pkg__", ], ) @@ -134,10 +135,6 @@ py_library( "@absl_py//absl/flags", "@six_archive//:six", ], - visibility = [ - # TODO(b/34059704): remove when fixed - "//tensorflow/contrib/lite/toco/python:__pkg__", - ] ) py_library( -- GitLab From 88fdd17174cf9d0d34cc4ef3f71c499e6440cf5d Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Sat, 11 Nov 2017 01:22:44 -0800 Subject: [PATCH 0316/1801] sort visibility rules --- tensorflow/python/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 3020a9b431..629a16d851 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -5,8 +5,8 @@ package( default_visibility = [ "//engedu/ml/tf_from_scratch:__pkg__", "//tensorflow:internal", - "//tensorflow_models:__subpackages__", "//tensorflow/contrib/lite/toco/python:__pkg__", + "//tensorflow_models:__subpackages__", ], ) -- GitLab From 1a70297c95643ab047f4ef069851523ea5d4d5b3 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Sat, 11 Nov 2017 07:46:20 -0800 Subject: [PATCH 0317/1801] Automated g4 rollback of changelist 175252067 PiperOrigin-RevId: 175401676 --- tensorflow/compiler/tf2xla/type_util.cc | 3 - tensorflow/compiler/xla/BUILD | 1 - tensorflow/compiler/xla/literal_util.cc | 99 +------ tensorflow/compiler/xla/literal_util.h | 23 -- tensorflow/compiler/xla/literal_util_test.cc | 62 ----- tensorflow/compiler/xla/primitive_util.cc | 8 +- tensorflow/compiler/xla/primitive_util.h | 7 - tensorflow/compiler/xla/service/backend.cc | 4 +- .../xla/service/cpu/cpu_runtime_test.cc | 4 +- .../compiler/xla/service/hlo_evaluator.cc | 4 - tensorflow/compiler/xla/service/hlo_runner.cc | 3 +- tensorflow/compiler/xla/shape_util.cc | 1 - .../compiler/xla/tests/literal_test_util.cc | 13 +- .../xla/tests/local_client_test_base.cc | 3 +- tensorflow/compiler/xla/types.h | 3 - tensorflow/compiler/xla/xla_data.proto | 13 +- tensorflow/core/framework/bfloat16.cc | 30 ++- tensorflow/core/framework/bfloat16_test.cc | 92 ------- tensorflow/core/framework/numeric_types.h | 251 +----------------- 19 files changed, 44 insertions(+), 580 deletions(-) diff --git a/tensorflow/compiler/tf2xla/type_util.cc b/tensorflow/compiler/tf2xla/type_util.cc index c969212a1b..1efbe0ffb1 100644 --- a/tensorflow/compiler/tf2xla/type_util.cc +++ b/tensorflow/compiler/tf2xla/type_util.cc @@ -49,9 +49,6 @@ Status DataTypeToPrimitiveType(DataType data_type, xla::PrimitiveType* type) { case tensorflow::DT_UINT64: *type = xla::U64; return Status::OK(); - case tensorflow::DT_BFLOAT16: - *type = xla::BF16; - return Status::OK(); case tensorflow::DT_HALF: *type = xla::F16; return Status::OK(); diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD index 515b572b0e..fa4d348ebd 100644 --- a/tensorflow/compiler/xla/BUILD +++ b/tensorflow/compiler/xla/BUILD @@ -77,7 +77,6 @@ cc_library( hdrs = ["types.h"], visibility = [":friends"], deps = [ - "//tensorflow/core:framework_lite", "//tensorflow/core:lib", "//third_party/eigen3", ], diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index 93d3cd425f..0cb2223ae5 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -33,20 +33,6 @@ limitations under the License. #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" -namespace { -using tensorflow::int64; - -constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; - -// Converts between little and big endian, assuming elements in the array are 16 -// bits long. -void ConvertEndianShort(char* bytes, int64 size) { - CHECK_EQ(size / 2, 0); - for (int64 i = 0; i < size; i += 2) { - std::swap(bytes[i], bytes[i + 1]); - } -} -} // namespace namespace xla { @@ -183,8 +169,6 @@ Status Literal::Copy(const Literal& src_literal, return CopyRange(src_literal, src_base, dest_base, copy_size); case F16: return CopyRange(src_literal, src_base, dest_base, copy_size); - case BF16: - return CopyRange(src_literal, src_base, dest_base, copy_size); case F32: return CopyRange(src_literal, src_base, dest_base, copy_size); case F64: @@ -216,8 +200,6 @@ Status Literal::Copy(const Literal& src_literal, return *Literal::CreateR0(0); case F16: return *Literal::CreateR0(static_cast(0.0f)); - case BF16: - return *Literal::CreateR0(static_cast(0.0f)); case F32: return *Literal::CreateR0(0); case F64: @@ -303,9 +285,6 @@ Status Literal::Copy(const Literal& src_literal, case F16: return *Literal::CreateR0( static_cast(-std::numeric_limits::infinity())); - case BF16: - return *Literal::CreateR0( - static_cast(-std::numeric_limits::infinity())); case TUPLE: LOG(FATAL) << "tuple element type has no minimum value"; case OPAQUE: @@ -342,9 +321,6 @@ Status Literal::Copy(const Literal& src_literal, case F16: return *Literal::CreateR0( static_cast(std::numeric_limits::infinity())); - case BF16: - return *Literal::CreateR0( - static_cast(std::numeric_limits::infinity())); case TUPLE: LOG(FATAL) << "tuple element type has no maximum value"; case OPAQUE: @@ -452,7 +428,6 @@ std::unique_ptr Literal::Transpose( // The shape with affine layout resulting from that operation will be // F32[8,11]{0,1}, since it leaves the original most minor (the 8 sized), the // most minor. - // // Essentially, given MinMaj(Di) the position of the Di dimension within the // minor to major vector, and given T(Di) the index that the original Di // dimension has within the transposed array, a layout is affine if @@ -561,9 +536,6 @@ string Literal::GetAsString( } case F16: return tensorflow::strings::StrCat(Get(multi_index)); - case BF16: - return tensorflow::strings::StrCat( - static_cast(Get(multi_index))); default: return tensorflow::strings::StrCat( "[", PrimitiveType_Name(shape().element_type()), "]"); @@ -771,8 +743,6 @@ void* Literal::MutableInternalData() { return reinterpret_cast(c64s_.data()); case F16: return reinterpret_cast(f16s_.data()); - case BF16: - return reinterpret_cast(bf16s_.data()); default: LOG(FATAL) << "primitive type not supported in literals: " << PrimitiveType_Name(shape().element_type()); @@ -815,9 +785,6 @@ void Literal::Reserve(int64 num_elements) { case F16: Resize(num_elements, static_cast(0.0f)); break; - case BF16: - Resize(num_elements, static_cast(0.0f)); - break; default: LOG(FATAL) << "primitive type not supported in literals: " << PrimitiveType_Name(shape().element_type()); @@ -857,9 +824,6 @@ tensorflow::Status Literal::ValidateLiteral() const { case F16: actual = f16s().size() / sizeof(half); break; - case BF16: - actual = bf16s().size(); - break; default: return tensorflow::errors::Unimplemented( "unhandled element type for literal validation: " + @@ -956,7 +920,6 @@ StatusOr> ConvertIfDestTypeMatches( CONVERT_IF_TYPES_MATCH(F16) CONVERT_IF_TYPES_MATCH(F32) CONVERT_IF_TYPES_MATCH(F64) - CONVERT_IF_TYPES_MATCH(BF16) #undef CONVERT_IF_TYPES_MATCH case C64: return ConvertToC64(src_literal); @@ -986,9 +949,8 @@ StatusOr> Literal::Convert( CONVERT_IF_DEST_TYPE_MATCHES(F16) CONVERT_IF_DEST_TYPE_MATCHES(F32) CONVERT_IF_DEST_TYPE_MATCHES(F64) - CONVERT_IF_DEST_TYPE_MATCHES(BF16) #undef CONVERT_IF_DEST_TYPE_MATCHES - // Other types are not yet supported. + // Other types are not yet supported. default: return InvalidArgument("Unimplemented: Convert from type %s to type %s", PrimitiveType_Name(shape().element_type()).c_str(), @@ -1057,8 +1019,6 @@ bool Literal::operator==(const Literal& other) const { return EqualElements(*this, other, 0, &multi_index); case F16: return EqualElements(*this, other, 0, &multi_index); - case BF16: - return EqualElements(*this, other, 0, &multi_index); case C64: return EqualElements(*this, other, 0, &multi_index); default: @@ -1168,18 +1128,13 @@ tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice() { template <> tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice() { + // TODO - there is an endianess problem here. fix it, or wait for uint16 + // support in protobuf auto values = mutable_f16s(); return tensorflow::gtl::MutableArraySlice(values->data(), values->size()); } -template <> -tensorflow::gtl::MutableArraySlice -Literal::GetMutableArraySlice() { - auto values = mutable_bf16s(); - return {values->data(), values->size()}; -} - template <> tensorflow::gtl::ArraySlice Literal::GetArraySlice() const { CHECK_EQ(shape().element_type(), PRED); @@ -1250,12 +1205,6 @@ tensorflow::gtl::ArraySlice Literal::GetArraySlice() const { f16s().size() / sizeof(half)); } -template <> -tensorflow::gtl::ArraySlice Literal::GetArraySlice() const { - CHECK_EQ(shape().element_type(), BF16); - return {bf16s().data(), bf16s().size()}; -} - template <> tensorflow::gtl::ArraySlice Literal::GetArraySlice() const { @@ -1304,9 +1253,6 @@ bool Literal::IsAll(int8 value) const { return AllElementsEqualValue(*this, value); case F16: return AllElementsEqualValue(*this, static_cast(value)); - case BF16: - return AllElementsEqualValue(*this, - static_cast(value)); case PRED: if (value == 0) { return AllElementsEqualValue(*this, false); @@ -1328,9 +1274,6 @@ bool Literal::IsAllFloat(float value) const { return AllElementsEqualValue(*this, value); case F16: return AllElementsEqualValue(*this, static_cast(value)); - case BF16: - return AllElementsEqualValue(*this, - static_cast(value)); default: return false; } @@ -1367,8 +1310,6 @@ bool Literal::IsZero(tensorflow::gtl::ArraySlice indices) const { return Get(indices) == complex64(0.0f, 0.0f); case F16: return Get(indices) == static_cast(0.0f); - case BF16: - return Get(indices) == static_cast(0.0f); case PRED: return Get(indices) == false; default: @@ -1436,12 +1377,6 @@ void Literal::Resize(int64 num_elements, half value) { mutable_f16s()->resize(num_elements, value); } -template <> -void Literal::Resize(int64 num_elements, bfloat16 value) { - CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements); - mutable_bf16s()->resize(num_elements, value); -} - template <> void Literal::Resize(int64 num_elements, complex64 value) { CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements); @@ -1490,19 +1425,6 @@ LiteralProto Literal::ToProto() const { *proto.mutable_f16s() = string(reinterpret_cast(f16s_.data()), f16s_.size() * sizeof(half)); - if (!kLittleEndian) { - ConvertEndianShort(const_cast(proto.mutable_f16s()->data()), - proto.f16s().size()); - } - break; - case BF16: - *proto.mutable_bf16s() = - string(reinterpret_cast(bf16s_.data()), - bf16s_.size() * sizeof(bfloat16)); - if (!kLittleEndian) { - ConvertEndianShort(const_cast(proto.mutable_bf16s()->data()), - proto.bf16s().size()); - } break; case F32: CopyToRepeatedField(proto.mutable_f32s(), f32s()); @@ -1571,21 +1493,6 @@ void Literal::CopyFromProto(const LiteralProto& literal_proto) { CHECK_EQ(0, s.size() % sizeof(half)); f16s_ = std::vector(s.size() / sizeof(half)); memcpy(f16s_.data(), s.data(), s.size()); - - if (!kLittleEndian) { - ConvertEndianShort(reinterpret_cast(f16s_.data()), s.size()); - } - break; - } - case BF16: { - const string& s(literal_proto.bf16s()); - CHECK_EQ(0, s.size() % sizeof(bfloat16)); - bf16s_ = std::vector(s.size() / sizeof(bfloat16)); - memcpy(bf16s_.data(), s.data(), s.size()); - - if (!kLittleEndian) { - ConvertEndianShort(reinterpret_cast(bf16s_.data()), s.size()); - } break; } case F32: diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h index f37e529caf..667f926c46 100644 --- a/tensorflow/compiler/xla/literal_util.h +++ b/tensorflow/compiler/xla/literal_util.h @@ -163,11 +163,6 @@ class Literal { const std::vector& c64s() const { return c64s_; } std::vector* mutable_c64s() { return &c64s_; } - int bf16s_size() const { return bf16s().size(); } - bfloat16 bf16s(int i) const { return bf16s_[i]; } - const std::vector& bf16s() const { return bf16s_; } - std::vector* mutable_bf16s() { return &bf16s_; } - int tuple_literals_size() const { return tuple_literals().size(); } const Literal& tuple_literals(int i) const { return tuple_literals_[i]; } Literal* add_tuple_literals() { @@ -627,7 +622,6 @@ class Literal { std::vector u16s_; std::vector u32s_; std::vector u64s_; - std::vector bf16s_; std::vector f16s_; std::vector f32s_; std::vector f64s_; @@ -680,9 +674,6 @@ tensorflow::gtl::ArraySlice Literal::GetArraySlice() const; template <> tensorflow::gtl::ArraySlice Literal::GetArraySlice() const; -template <> -tensorflow::gtl::ArraySlice Literal::GetArraySlice() const; - template <> tensorflow::gtl::ArraySlice Literal::GetArraySlice() const; @@ -723,9 +714,6 @@ tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice(); template <> tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice(); -template <> -tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice(); - template <> tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice(); @@ -759,9 +747,6 @@ void Literal::Resize(int64 num_elements, double value); template <> void Literal::Resize(int64 num_elements, half value); -template <> -void Literal::Resize(int64 num_elements, bfloat16 value); - template <> void Literal::Resize(int64 num_elements, complex64 value); @@ -1005,14 +990,6 @@ inline half Literal::Get( return GetArraySlice()[linear_index]; } -template <> -inline bfloat16 Literal::Get( - tensorflow::gtl::ArraySlice multi_index) const { - CHECK(shape().element_type() == BF16); - int64 linear_index = LinearIndex(multi_index); - return GetArraySlice()[linear_index]; -} - template void Literal::Set(tensorflow::gtl::ArraySlice multi_index, NativeT value) { diff --git a/tensorflow/compiler/xla/literal_util_test.cc b/tensorflow/compiler/xla/literal_util_test.cc index 1e08101759..6d596da4ad 100644 --- a/tensorflow/compiler/xla/literal_util_test.cc +++ b/tensorflow/compiler/xla/literal_util_test.cc @@ -110,18 +110,6 @@ TEST_F(LiteralUtilTest, LiteralScalarToString) { auto c64_lit = Literal::CreateR0({3.14f, 2.78f}); ASSERT_EQ("(3.14, 2.78)", c64_lit->ToString()); - - auto bf16_lit = Literal::CreateR0(static_cast(0.5f)); - ASSERT_EQ("0.5", bf16_lit->ToString()); - - // 3.14 will be rounded to 3.125 in bfloat16 format (Round to nearest even). - auto bf16_lit_truncated = - Literal::CreateR0(static_cast(3.14f)); - ASSERT_EQ("3.140625", bf16_lit_truncated->ToString()); - - auto bf16_lit_truncated2 = - Literal::CreateR0(static_cast(9.001f)); - ASSERT_EQ("9", bf16_lit_truncated2->ToString()); } TEST_F(LiteralUtilTest, LiteralVectorToString) { @@ -409,18 +397,6 @@ TEST_F(LiteralUtilTest, IsAll) { EXPECT_FALSE(Literal::CreateR2({{h8}, {h9}})->IsAll(8)); EXPECT_FALSE(Literal::CreateR2({{h9}, {h8}})->IsAll(8)); - bfloat16 b8(8.0f); - bfloat16 b9(9.0f); - - EXPECT_TRUE(Literal::CreateR2({{b8}, {b8}})->IsAll(8)); - EXPECT_FALSE(Literal::CreateR2({{b8}, {b9}})->IsAll(8)); - EXPECT_FALSE(Literal::CreateR2({{b9}, {b8}})->IsAll(8)); - - // 9.001 will be truncated to 9.0 - bfloat16 b91(9.001f); - bfloat16 b90(9.00f); - EXPECT_TRUE(Literal::CreateR2({{b91}, {b90}})->IsAll(9.0)); - complex64 c8_9 = {8, 9}; EXPECT_FALSE(Literal::CreateR2({{c8_9}, {c8_9}})->IsAll(8)); @@ -715,30 +691,6 @@ TEST_F(LiteralUtilTest, PopulateR2C64) { EXPECT_EQ(output, *expected); } -TEST_F(LiteralUtilTest, PopulateWithValueR0BF16) { - Literal output; - bfloat16 h(0.25f); - output.PopulateWithValue(h, {}); - auto expected = Literal::CreateR0(h); - EXPECT_EQ(output, *expected); -} - -TEST_F(LiteralUtilTest, PopulateWithValueR1BF16) { - Literal output; - bfloat16 h(0.5f); - output.PopulateWithValue(h, {3}); - auto expected = Literal::CreateR1({h, h, h}); - EXPECT_EQ(output, *expected); -} - -TEST_F(LiteralUtilTest, PopulateWithValueR2BF16) { - Literal output; - bfloat16 h(2.0f); - output.PopulateWithValue(h, {2, 2}); - auto expected = Literal::CreateR2({{h, h}, {h, h}}); - EXPECT_EQ(output, *expected); -} - TEST_F(LiteralUtilTest, PopulateWithValueR0F32) { Literal output; output.PopulateWithValue(2.5f, {}); @@ -1023,14 +975,6 @@ TEST_F(LiteralUtilTest, ConvertIfTypesMatch) { {{half(26.0), half(0.0), half(28.0), half(0.0)}, {half(0.0), half(31.0), half(0.0), half(33.0)}}, }}, layout_r4_dim0major_); - auto bf16 = Literal::CreateR4WithLayout({{ - {{bfloat16(10.0), bfloat16(0.0), bfloat16(12.0), bfloat16(0.0)}, - {bfloat16(0.0), bfloat16(15.0), bfloat16(0.0), bfloat16(17.0)}}, - {{bfloat16(0.0), bfloat16(19.0), bfloat16(0.0), bfloat16(21.0)}, - {bfloat16(22.0), bfloat16(0.0), bfloat16(24.0), bfloat16(0.0)}}, - {{bfloat16(26.0), bfloat16(0.0), bfloat16(28.0), bfloat16(0.0)}, - {bfloat16(0.0), bfloat16(31.0), bfloat16(0.0), bfloat16(33.0)}}, - }}, layout_r4_dim0major_); auto f32 = Literal::CreateR4WithLayout({{ {{10.0f, 0.0f, 12.0f, 0.0f}, {0.0f, 15.0f, 0.0f, 17.0f}}, {{0.0f, 19.0f, 0.0f, 21.0f}, {22.0f, 0.0f, 24.0f, 0.0f}}, @@ -1064,12 +1008,6 @@ TEST_F(LiteralUtilTest, ConvertIfTypesMatch) { conv = s8->Convert(PRED).ConsumeValueOrDie(); EXPECT_EQ(*conv, *pred); - conv = bf16->Convert(S32).ConsumeValueOrDie(); - EXPECT_EQ(*conv, *s32); - - conv = bf16->Convert(F32).ConsumeValueOrDie(); - EXPECT_EQ(*conv, *f32); - conv = pred->Convert(S32).ConsumeValueOrDie(); EXPECT_EQ(*conv, *int32_pred); diff --git a/tensorflow/compiler/xla/primitive_util.cc b/tensorflow/compiler/xla/primitive_util.cc index 2bce56b7bd..2113b5e06f 100644 --- a/tensorflow/compiler/xla/primitive_util.cc +++ b/tensorflow/compiler/xla/primitive_util.cc @@ -78,11 +78,6 @@ PrimitiveType NativeToPrimitiveType() { return F64; } -template <> -PrimitiveType NativeToPrimitiveType() { - return BF16; -} - template <> PrimitiveType NativeToPrimitiveType() { return F16; @@ -94,7 +89,7 @@ PrimitiveType NativeToPrimitiveType() { } bool IsFloatingPointType(PrimitiveType type) { - return type == F16 || type == F32 || type == F64 || type == BF16; + return type == F16 || type == F32 || type == F64; } bool IsComplexType(PrimitiveType type) { return type == C64; } @@ -123,7 +118,6 @@ int BitWidth(PrimitiveType type) { case S16: case U16: case F16: - case BF16: return 16; case U32: diff --git a/tensorflow/compiler/xla/primitive_util.h b/tensorflow/compiler/xla/primitive_util.h index 19c6a13888..a49c8b86fc 100644 --- a/tensorflow/compiler/xla/primitive_util.h +++ b/tensorflow/compiler/xla/primitive_util.h @@ -77,8 +77,6 @@ template <> PrimitiveType NativeToPrimitiveType(); template <> PrimitiveType NativeToPrimitiveType(); -template <> -PrimitiveType NativeToPrimitiveType(); // Complex template <> @@ -169,11 +167,6 @@ struct PrimitiveTypeToNative { using type = half; }; -template <> -struct PrimitiveTypeToNative { - using type = bfloat16; -}; - // Complex template <> struct PrimitiveTypeToNative { diff --git a/tensorflow/compiler/xla/service/backend.cc b/tensorflow/compiler/xla/service/backend.cc index 05f2d06278..9abe30e3f3 100644 --- a/tensorflow/compiler/xla/service/backend.cc +++ b/tensorflow/compiler/xla/service/backend.cc @@ -13,14 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#define EIGEN_USE_THREADS - #include "tensorflow/compiler/xla/service/backend.h" #include #include #include +#define EIGEN_USE_THREADS + #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/service/compiler.h" #include "tensorflow/compiler/xla/service/platform_util.h" diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc index f385829cdf..f8e260dd90 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc @@ -12,13 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#define EIGEN_USE_THREADS + #include "tensorflow/compiler/xla/service/cpu/cpu_runtime.h" #include #include #include +#define EIGEN_USE_THREADS + #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/array2d.h" #include "tensorflow/compiler/xla/client/local_client.h" diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index a722d1b3d9..88b77ccdd0 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -1450,10 +1450,6 @@ HloEvaluator::HloEvaluator() { typed_visitors_[F32] = MakeUnique>(this); typed_visitors_[F64] = MakeUnique>(this); typed_visitors_[C64] = MakeUnique>(this); - - typed_visitors_[BF16] = MakeUnique([](HloInstruction*) { - return Unimplemented("HloEvaluator: unhandled primitive type: BF16."); - }); typed_visitors_[TUPLE] = MakeUnique([](HloInstruction*) { return Unimplemented("HloEvaluator: unhandled primitive type: TUPLE."); }); diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index 158fb9a546..f463e57d99 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#define EIGEN_USE_THREADS #include "tensorflow/compiler/xla/service/hlo_runner.h" @@ -20,6 +19,8 @@ limitations under the License. #include #include +#define EIGEN_USE_THREADS + #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/ptr_util.h" diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index 4d0bafa908..b5eb81dfc6 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -263,7 +263,6 @@ StatusOr MakeShapeWithLayoutInternal( case S32: case S64: case F16: - case BF16: case F32: case F64: return true; diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc index 75c9a0d3fb..95a52ecd2f 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.cc +++ b/tensorflow/compiler/xla/tests/literal_test_util.cc @@ -116,18 +116,16 @@ template ::testing::AssertionResult CompareFloatsBitwiseEqual(FloatT lhs, FloatT rhs) { auto ulhs = tensorflow::bit_cast(lhs); auto urhs = tensorflow::bit_cast(rhs); - auto lhs_double = static_cast(lhs); - auto rhs_double = static_cast(rhs); if (ulhs != urhs) { return ::testing::AssertionFailure() << tensorflow::strings::Printf( "floating values are not bitwise-equal; and equality testing " "was requested: %s=%g=%a vs %s=%g=%a", tensorflow::strings::StrCat(tensorflow::strings::Hex(ulhs)) .c_str(), - lhs_double, lhs_double, + lhs, lhs, tensorflow::strings::StrCat(tensorflow::strings::Hex(urhs)) .c_str(), - rhs_double, rhs_double); + rhs, rhs); } return ::testing::AssertionSuccess(); } @@ -151,10 +149,6 @@ template // Specializations for floating types that do bitwise comparisons when equality // comparison is requested. template <> -::testing::AssertionResult CompareEqual(bfloat16 lhs, bfloat16 rhs) { - return CompareFloatsBitwiseEqual(lhs, rhs); -} -template <> ::testing::AssertionResult CompareEqual(float lhs, float rhs) { return CompareFloatsBitwiseEqual(lhs, rhs); } @@ -244,9 +238,6 @@ bool ExpectLiteralsEqual(const Literal& expected, const Literal& actual, case U64: match = ExpectLiteralsEqual(expected, actual, &multi_index, 0); break; - case BF16: - match = ExpectLiteralsEqual(expected, actual, &multi_index, 0); - break; case F32: match = ExpectLiteralsEqual(expected, actual, &multi_index, 0); break; diff --git a/tensorflow/compiler/xla/tests/local_client_test_base.cc b/tensorflow/compiler/xla/tests/local_client_test_base.cc index d98875dbc2..c11e1df0a7 100644 --- a/tensorflow/compiler/xla/tests/local_client_test_base.cc +++ b/tensorflow/compiler/xla/tests/local_client_test_base.cc @@ -12,12 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#define EIGEN_USE_THREADS #include "tensorflow/compiler/xla/tests/local_client_test_base.h" #include +#define EIGEN_USE_THREADS + #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/map_util.h" diff --git a/tensorflow/compiler/xla/types.h b/tensorflow/compiler/xla/types.h index 9fa4297523..3b19ca321c 100644 --- a/tensorflow/compiler/xla/types.h +++ b/tensorflow/compiler/xla/types.h @@ -19,7 +19,6 @@ limitations under the License. #include #include "third_party/eigen3/Eigen/Core" -#include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/platform/types.h" #include @@ -33,8 +32,6 @@ using ::tensorflow::int16; using ::tensorflow::int32; using ::tensorflow::int64; -using ::tensorflow::bfloat16; - using ::tensorflow::uint8; using ::tensorflow::uint16; using ::tensorflow::uint32; diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index eac8f2ff07..7146604708 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -46,12 +46,6 @@ enum PrimitiveType { // converted to f16 from f32 at arbirary points in the computation. F16 = 10; F32 = 11; - - // Truncated 16 bit floating-point format. This is similar to IEEE's 16 bit - // floating-point format, but uses 1 bit for the sign, 8 bits for the exponent - // and 7 bits for the mantissa. - BF16 = 16; - F64 = 12; // Complex values of fixed width. @@ -69,8 +63,6 @@ enum PrimitiveType { // An opaque type used for passing context specific data to a custom // operation. OPAQUE = 14; - - // Next = 17 } // Describes the value held inside padding elements. @@ -318,10 +310,7 @@ message LiteralProto { repeated double f64s = 9; repeated float c64s = 12; // Stored as interleaved real, imag floats. repeated LiteralProto tuple_literals = 10; - // The F16s and BF16s are encoded in little endian byte order - bytes f16s = 11; - bytes bf16s = 13; - // Next = 14 + bytes f16s = 11; // Note: the F16s are encoded in little endian byte order } message WindowDimension { diff --git a/tensorflow/core/framework/bfloat16.cc b/tensorflow/core/framework/bfloat16.cc index 1a6f355c77..a5ac0e1a8d 100644 --- a/tensorflow/core/framework/bfloat16.cc +++ b/tensorflow/core/framework/bfloat16.cc @@ -18,24 +18,32 @@ limitations under the License. namespace tensorflow { void FloatToBFloat16(const float* src, bfloat16* dst, int64 size) { - for (int64 i = 0; i < size; ++i) { - dst[i] = bfloat16(src[i]); - } + const uint16_t* p = reinterpret_cast(src); + uint16_t* q = reinterpret_cast(dst); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + for (; size != 0; p += 2, q++, size--) { + *q = p[0]; + } +#else + for (; size != 0; p += 2, q++, size--) { + *q = p[1]; + } +#endif } void BFloat16ToFloat(const bfloat16* src, float* dst, int64 size) { const uint16_t* p = reinterpret_cast(src); uint16_t* q = reinterpret_cast(dst); #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - for (; size != 0; p++, q += 2, size--) { - q[0] = *p; - q[1] = 0; + for (; size != 0; p++, q += 2, size--) { + q[0] = *p; + q[1] = 0; } -#else - for (; size != 0; p++, q += 2, size--) { - q[0] = 0; - q[1] = *p; - } +#else + for (; size != 0; p++, q += 2, size--) { + q[0] = 0; + q[1] = *p; + } #endif } diff --git a/tensorflow/core/framework/bfloat16_test.cc b/tensorflow/core/framework/bfloat16_test.cc index a25b764ea2..af4e6a4411 100644 --- a/tensorflow/core/framework/bfloat16_test.cc +++ b/tensorflow/core/framework/bfloat16_test.cc @@ -15,7 +15,6 @@ limitations under the License. #include "tensorflow/core/framework/bfloat16.h" -#include "tensorflow/core/lib/core/casts.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" @@ -28,97 +27,6 @@ TEST(Bfloat16Test, Simple) { EXPECT_EQ(0x4140, a.value); } -float BinaryToFloat(uint32_t sign, uint32_t exponent, uint32_t high_mantissa, - uint32_t low_mantissa) { - return bit_cast((sign << 31) + (exponent << 23) + - (high_mantissa << 16) + low_mantissa); -} - -struct Bfloat16TestParam { - float input; - float expected; -}; - -class Bfloat16Test : public ::testing::Test, - public ::testing::WithParamInterface {}; - -TEST_P(Bfloat16Test, RoundOrTruncate) { - bfloat16 a(GetParam().input); - if (std::isnan(GetParam().input)) { - EXPECT_TRUE(std::isnan(float(a))); - return; - } - EXPECT_EQ(GetParam().expected, float(a)); -} - -INSTANTIATE_TEST_CASE_P( - Bfloat16Test_Instantiation, Bfloat16Test, - ::testing::Values( - // More than half. - Bfloat16TestParam{ - BinaryToFloat(0, 0b10000000, 0b1001000, 0b1111010111000011), - BinaryToFloat(0, 0b10000000, 0b1001001, 0b0000000000000000)}, - - Bfloat16TestParam{ - BinaryToFloat(1, 0b10000000, 0b1001000, 0b1111010111000011), - BinaryToFloat(1, 0b10000000, 0b1001001, 0b0000000000000000)}, - - // Exact half. - Bfloat16TestParam{ - BinaryToFloat(0, 0b10000000, 0b1001000, 0b1000000000000000), - BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, - - // NaN stays at NaN. - Bfloat16TestParam{ - BinaryToFloat(0, 0b11111111, 0b0000000, 0b0000000000000001), - BinaryToFloat(0, 0b11111111, 0b1000000, 0b0000000000000000)}, - - // NaN stays at NaN -- no exponents overflow. - Bfloat16TestParam{ - BinaryToFloat(0, 0b11111111, 0b1111111, 0b1111111111111111), - BinaryToFloat(0, 0b11111111, 0b1000000, 0b0000000000000000)}, - - // More than half, round to an odd number. - Bfloat16TestParam{ - BinaryToFloat(1, 0b10000000, 0b1001000, 0b1100000000000000), - BinaryToFloat(1, 0b10000000, 0b1001001, 0b0000000000000000)}, - - // Less than half, truncate. - Bfloat16TestParam{ - BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000), - BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, - - // Less than half, truncate. - Bfloat16TestParam{ - BinaryToFloat(0, 0b10000000, 0b1001000, 0b0100000000000000), - BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, - - // Exact at half, but result is already even. - Bfloat16TestParam{ - BinaryToFloat(0, 0b10000000, 0b1001000, 0b1000000000000000), - BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, - - // Denormal values. - Bfloat16TestParam{ - BinaryToFloat(0, 0b00000000, 0b1001000, 0b1000000000000000), - BinaryToFloat(0, 0b00000000, 0b1001000, 0b0000000000000000)}, - Bfloat16TestParam{ - BinaryToFloat(0, 0b00000000, 0b1111111, 0b1100000000000000), - BinaryToFloat(0, 0b00000001, 0b0000000, 0b0000000000000000)})); -TEST(Bfloat16Test, RoundWithFractionOverflow) { - // Still works with fraction overflow -- round to 4./ - // - // Input 3.9960938: - // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) - // 0 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1100000000000000 - // - // Should round to 4.0: - // Sign | Exp (8 bit) | Frac (first 7 bit) - // 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 - bfloat16 a(3.9960938f); - EXPECT_EQ(4.0, float(a)); -} - TEST(Bfloat16Test, Conversion) { float a[100]; for (int i = 0; i < 100; ++i) { diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h index d005de2af1..a630bee38d 100644 --- a/tensorflow/core/framework/numeric_types.h +++ b/tensorflow/core/framework/numeric_types.h @@ -44,262 +44,29 @@ typedef Eigen::QUInt16 quint16; // see framework/bfloat16.h for description. struct bfloat16 { EIGEN_DEVICE_FUNC bfloat16() {} - - explicit EIGEN_DEVICE_FUNC bfloat16(float v) { - uint32_t input; - memcpy(&input, &v, sizeof(uint32_t)); - - if ((~input & 0x7f800000) == 0 && (input & 0x007fffff) != 0) { - // If the value is a NaN, squash it to a qNaN with msb of fraction set, - // this makes sure after truncation we don't end up with an inf. - // - // qNaN magic: All exponent bits set + most significant bit of fraction - // set. - value = 0x7fc0; - } else { - // Fast rounding algorithm that rounds a half value to nearest even. This - // reduces expected error when we convert a large number of floats. Here - // is how it works: - // - // Definitions: - // To convert a float 32 to bfloat16, a float 32 can be viewed as 32 bits - // with the following tags: - // - // Sign | Exp (8 bits) | Frac (23 bits) - // S EEEEEEEE FFFFFFLRTTTTTTTTTTTTTTT - // - // S: Sign bit. - // E: Exponent bits. - // F: First 6 bits of fraction. - // L: Least significant bit of resulting bfloat16 if we truncate away the - // rest of the float32. This is also the 7th bit of fraction - // R: Rounding bit, 8th bit of fraction. - // T: Sticky bits, rest of fraction, 15 bits. - // - // To round half to nearest even, there are 3 cases where we want to round - // down (simply truncate the result of the bits away, which consists of - // rounding bit and sticky bits) and two cases where we want to round up - // (truncate then add one to the result). - // - // The fast converting algorithm simply adds lsb (L) to 0x7fff (15 bits of - // 1s) as the rounding bias, adds the rounding bias to the input, then - // truncates the last 16 bits away. - // - // To understand how it works, we can analyze this algorithm case by case: - // - // 1. L = 0, R = 0: - // Expect: round down, this is less than half value. - // - // Algorithm: - // - Rounding bias: 0x7fff + 0 = 0x7fff - // - Adding rounding bias to input may create any carry, depending on - // whether there is any value set to 1 in T bits. - // - R may be set to 1 if there is a carry. - // - L remains 0. - // - Note that this case also handles Inf and -Inf, where all fraction - // bits, including L, R and Ts are all 0. The output remains Inf after - // this algorithm. - // - // 2. L = 1, R = 0: - // Expect: round down, this is less than half value. - // - // Algorithm: - // - Rounding bias: 0x7fff + 1 = 0x8000 - // - Adding rounding bias to input doesn't change sticky bits but - // adds 1 to rounding bit. - // - L remains 1. - // - // 3. L = 0, R = 1, all of T are 0: - // Expect: round down, this is exactly at half, the result is already - // even (L=0). - // - // Algorithm: - // - Rounding bias: 0x7fff + 0 = 0x7fff - // - Adding rounding bias to input sets all sticky bits to 1, but - // doesn't create a carry. - // - R remains 1. - // - L remains 0. - // - // 4. L = 1, R = 1: - // Expect: round up, this is exactly at half, the result needs to be - // round to the next even number. - // - // Algorithm: - // - Rounding bias: 0x7fff + 1 = 0x8000 - // - Adding rounding bias to input doesn't change sticky bits, but - // creates a carry from rounding bit. - // - The carry sets L to 0, creates another carry bit and propagate - // forward to F bits. - // - If all the F bits are 1, a carry then propagates to the exponent - // bits, which then creates the minimum value with the next exponent - // value. Note that we won't have the case where exponents are all 1, - // since that's either a NaN (handled in the other if condition) or inf - // (handled in case 1). - // - // 5. L = 0, R = 1, any of T is 1: - // Expect: round up, this is greater than half. - // - // Algorithm: - // - Rounding bias: 0x7fff + 0 = 0x7fff - // - Adding rounding bias to input creates a carry from sticky bits, - // sets rounding bit to 0, then create another carry. - // - The second carry sets L to 1. - // - // Examples: - // - // Exact half value that is already even: - // Input: - // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) - // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1000000000000000 - // - // This falls into case 3. We truncate the rest of 16 bits and no - // carry is created into F and L: - // - // Output: - // Sign | Exp (8 bit) | Frac (first 7 bit) - // S E E E E E E E E F F F F F F L - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 - // - // Exact half value, round to next even number: - // Input: - // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) - // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1000000000000000 - // - // This falls into case 4. We create a carry from R and T, - // which then propagates into L and F: - // - // Output: - // Sign | Exp (8 bit) | Frac (first 7 bit) - // S E E E E E E E E F F F F F F L - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 - // - // - // Max denormal value round to min normal value: - // Input: - // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) - // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT - // 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1111111111111111 - // - // This falls into case 4. We create a carry from R and T, - // propagate into L and F, which then propagates into exponent - // bits: - // - // Output: - // Sign | Exp (8 bit) | Frac (first 7 bit) - // S E E E E E E E E F F F F F F L - // 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 - // - // Max normal value round to Inf: - // Input: - // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) - // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT - // 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1111111111111111 - // - // This falls into case 4. We create a carry from R and T, - // propagate into L and F, which then propagates into exponent - // bits: - // - // Sign | Exp (8 bit) | Frac (first 7 bit) - // S E E E E E E E E F F F F F F L - // 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 - // - // - // Least significant bit of resulting bfloat. - uint32_t lsb = (input >> 16) & 1; - uint32_t rounding_bias = 0x7fff + lsb; - input += rounding_bias; - value = static_cast(input >> 16); - } - } - - template - explicit EIGEN_DEVICE_FUNC bfloat16(const T& val) - : bfloat16(static_cast(val)) {} - - EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const { - float result; - - uint16_t* q = reinterpret_cast(&result); - + EIGEN_DEVICE_FUNC explicit bfloat16(const float v) { + const uint16_t* p = reinterpret_cast(&v); #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - q[0] = value; - q[1] = 0; + value = p[0]; #else - q[0] = 0; - q[1] = value; + value = p[1]; #endif - return result; - } - - EIGEN_DEVICE_FUNC explicit operator bool() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator Eigen::half() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator short() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator int() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator char() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator signed char() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator unsigned char() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator unsigned int() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator unsigned long() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator unsigned long long() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator long long() const { - return static_cast(float(*this)); - } - - EIGEN_DEVICE_FUNC explicit operator double() const { - return static_cast(float(*this)); } uint16_t value; }; -inline bool operator==(const bfloat16 a, const bfloat16 b) { - return a.value == b.value; -} - -inline bool operator!=(const bfloat16 a, const bfloat16 b) { - return a.value != b.value; -} - } // end namespace tensorflow namespace Eigen { template <> struct NumTraits : GenericNumTraits {}; -using ::tensorflow::operator==; -using ::tensorflow::operator!=; +EIGEN_STRONG_INLINE bool operator==(const tensorflow::bfloat16 a, + const tensorflow::bfloat16 b) { + return a.value == b.value; +} + } // namespace Eigen #ifdef COMPILER_MSVC -- GitLab From 4ded6f56f38f8ad5973bd58421e3a5f139f3c5de Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Sat, 11 Nov 2017 08:18:42 -0800 Subject: [PATCH 0318/1801] Map graph building for toco testing on cpu, fix visibility. --- .../contrib/lite/testing/generate_examples.py | 13 +++++++------ tensorflow/python/BUILD | 2 ++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 86540d58a6..7aadc8aab1 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -379,12 +379,13 @@ def make_zip_of_tests(zip_path, report["toco_log"] = "" tf.reset_default_graph() - try: - inputs, outputs = make_graph(param_dict_real) - except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError, - ValueError): - report["tf_log"] += traceback.format_exc() - return None, report + with tf.device('/cpu:0'): + try: + inputs, outputs = make_graph(param_dict_real) + except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError, + ValueError): + report["tf_log"] += traceback.format_exc() + return None, report sess = tf.Session() try: diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 629a16d851..f4dd565fc3 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -7,6 +7,8 @@ package( "//tensorflow:internal", "//tensorflow/contrib/lite/toco/python:__pkg__", "//tensorflow_models:__subpackages__", + # TODO(aselle): to pass open source test. + "//bazel_pip/tensorflow/contrib/lite/toco/python:__pkg__", ], ) -- GitLab From 692ee62a63a54d9fe02b3ef6e4e62de490046719 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 11 Nov 2017 10:22:46 -0800 Subject: [PATCH 0319/1801] Saveable iterator for Dataset.from_sparse_tensor_slice(..). PiperOrigin-RevId: 175408013 --- .../contrib/data/python/kernel_tests/BUILD | 5 +- .../dataset_constructor_op_test.py | 174 +++++------------- .../dataset_serialization_test_base.py | 166 ++++++++++++----- tensorflow/core/kernels/dataset.h | 1 + .../kernels/sparse_tensor_slice_dataset_op.cc | 68 ++++++- tensorflow/core/util/sparse/group_iterator.h | 6 + 6 files changed, 247 insertions(+), 173 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 552958b986..78e1d4937a 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -96,8 +96,8 @@ py_test( "nomac", # b/62040583 ], deps = [ + ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", - "//tensorflow/contrib/data/python/ops:iterator_ops", "//tensorflow/contrib/data/python/ops:transformation_ops", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", @@ -109,7 +109,6 @@ py_test( "//tensorflow/python:resource_variable_ops", "//tensorflow/python:session", "//tensorflow/python:sparse_tensor", - "//tensorflow/python:training", "//tensorflow/python/data/util:nest", "//third_party/py/numpy", ], @@ -125,9 +124,11 @@ py_library( visibility = ["//visibility:private"], deps = [ "//tensorflow/contrib/data/python/ops:iterator_ops", + "//tensorflow/python:client_testlib", "//tensorflow/python:errors", "//tensorflow/python:framework_ops", "//tensorflow/python:platform", + "//tensorflow/python:sparse_tensor", "//tensorflow/python:training", "//tensorflow/python:util", "//third_party/py/numpy", diff --git a/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py b/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py index c3d6bfc097..0f1c8838ca 100644 --- a/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py @@ -17,14 +17,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os import threading import numpy as np +from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base from tensorflow.contrib.data.python.ops import batching from tensorflow.contrib.data.python.ops import dataset_ops -from tensorflow.contrib.data.python.ops import iterator_ops from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session from tensorflow.python.data.util import nest @@ -36,7 +35,6 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.platform import test -from tensorflow.python.training import saver as saver_lib class DatasetConstructorTest(test.TestCase): @@ -574,135 +572,63 @@ class DatasetConstructorTest(test.TestCase): new = batching._RestructuredDataset(dataset, new_types, new_shape_lists) # pylint: enable=protected-access - def _iterator_checkpoint_prefix(self): - return os.path.join(self.get_temp_dir(), "iterator") - def _testSaveRestoreFromTensorsUtility(self, start, break_range, stop): - path = self._iterator_checkpoint_prefix() - step = 0 - meta_filename = path + "-%d.meta" % step +class DatasetConstructorSerializationTest( + dataset_serialization_test_base.DatasetSerializationTestBase): - components = (np.array(1), np.array([1, 2, 3]), np.array(37.0)) + def _build_tensor_dataset(self, variable_array): + components = (variable_array, np.array([1, 2, 3]), np.array(37.0)) - with ops.Graph().as_default() as g: - iterator = ( - dataset_ops.Dataset.from_tensors(components) - .make_initializable_iterator()) - init_op = iterator.initializer - get_next = iterator.get_next() - saveable = iterator_ops.make_saveable_from_iterator(iterator) - ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable) - for t in nest.flatten(get_next): - ops.add_to_collection("get_next", t) - saver = saver_lib.Saver() - with self.test_session(graph=g) as sess: - sess.run(init_op) - for _ in range(start, break_range): - result = sess.run(get_next) - for component, result_component in zip(components, result): - self.assertAllEqual(component, result_component) - saver.save(sess, path, step) - - with ops.Graph().as_default() as g: - saver = saver_lib.import_meta_graph(meta_filename) - with self.test_session(graph=g) as sess: - get_next = nest.pack_sequence_as(("a", "b", "c"), - ops.get_collection("get_next")) - saver.restore(sess, saver_lib.latest_checkpoint(self.get_temp_dir())) - for _ in range(break_range, stop): - result = sess.run(get_next) - for component, result_component in zip(components, result): - self.assertAllEqual(component, result_component) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) + return dataset_ops.Dataset.from_tensors(components) - def testRestoreFromTensors(self): - self._testSaveRestoreFromTensorsUtility(0, 0, 1) + def testFromTensorsCore(self): + # Equal length components + arr = np.array(1) + num_outputs = 1 + diff_arr = np.array(2) + self.run_core_tests(lambda: self._build_tensor_dataset(arr), + lambda: self._build_tensor_dataset(diff_arr), + num_outputs) - def testRestoreExhuatedIteratorFromTensors(self): - self._testSaveRestoreFromTensorsUtility(0, 1, 1) + def _build_tensor_slices_dataset(self, components): + return dataset_ops.Dataset.from_tensor_slices(components) - def _build_graph_tensor_slices(self, components): - iterator = dataset_ops.Dataset.from_tensor_slices( - components).make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - saveable = iterator_ops.make_saveable_from_iterator(iterator) - ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable) - for t in nest.flatten(get_next): - ops.add_to_collection("get_next", t) - return init_op, get_next - - def _testSaveRestoreFromTensorSlicesUtility(self, start, break_range, stop): - path = self._iterator_checkpoint_prefix() - step = 0 - meta_filename = path + "-%d.meta" % step - - components = (np.tile(np.array([[1], [2], [3], [4]]), 20), np.tile( - np.array([[12], [13], [14], [15]]), 22), + def testFromTensorSlicesCore(self): + # Equal length components + components = (np.tile(np.array([[1], [2], [3], [4]]), 20), + np.tile(np.array([[12], [13], [14], [15]]), 22), np.array([37.0, 38.0, 39.0, 40.0])) - with ops.Graph().as_default() as g: - init_op, get_next = self._build_graph_tensor_slices(components) - saver = saver_lib.Saver() - with self.test_session(graph=g) as sess: - sess.run(init_op) - for i in range(start, break_range): - result = sess.run(get_next) - for component, result_component in zip(components, result): - self.assertAllEqual(component[i], result_component) - saver.save(sess, path, step) - - with ops.Graph().as_default() as g: - saver = saver_lib.import_meta_graph(meta_filename) - with self.test_session(graph=g) as sess: - get_next = nest.pack_sequence_as(("a", "b", "c"), - ops.get_collection("get_next")) - saver.restore(sess, saver_lib.latest_checkpoint(self.get_temp_dir())) - for i in range(break_range, stop): - result = sess.run(get_next) - for component, result_component in zip(components, result): - self.assertAllEqual(component[i], result_component) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testRestoreFromTensorSlices(self): - self._testSaveRestoreFromTensorSlicesUtility(0, 4, 2) - - def testRestoreExhaustedIteratorFromTensorSlices(self): - self._testSaveRestoreFromTensorSlicesUtility(0, 4, 4) - - def tesRestoreFromTensorSlicesWithDict(self): - - path = self._iterator_checkpoint_prefix() - step = 0 - meta_filename = path + "-%d.meta" % step - - components = {"foo": [1, 2, 3], "bar": [[4.0], [5.0], [6.0]]} - - with ops.Graph().as_default() as g: - init_op, get_next = self._build_graph_tensor_slices(components) - saver = saver_lib.Saver() - with self.test_session(graph=g) as sess: - sess.run(init_op) - for i in range(2): - results = sess.run(get_next) - self.assertEqual(components["foo"][i], results["foo"]) - self.assertEqual(components["bar"][i], results["bar"]) - saver.save(sess, path, step) - - with ops.Graph().as_default() as g: - saver = saver_lib.import_meta_graph(meta_filename) - with self.test_session(graph=g) as sess: - get_next = nest.pack_sequence_as(("a", "b"), - ops.get_collection("get_next")) - saver.restore(sess, saver_lib.latest_checkpoint(self.get_temp_dir())) - for i in range(2, 3): - results = sess.run(get_next) - self.assertEqual(components["foo"][i], results["foo"]) - self.assertEqual(components["bar"][i], results["bar"]) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) + diff_comp = (np.tile(np.array([[1], [2], [3], [4]]), 20), + np.tile(np.array([[5], [6], [7], [8]]), 22), + np.array([1.0, 2.0, 3.0, 4.0])) + + dict_components = {"foo": [1, 2, 3], "bar": [[4.0], [5.0], [6.0]]} + + self.run_core_tests(lambda: self._build_tensor_slices_dataset(components), + lambda: self._build_tensor_slices_dataset(diff_comp), 4) + self.run_core_tests( + lambda: self._build_tensor_slices_dataset(dict_components), None, 3) + + def _build_sparse_tensor_slice_dataset(self, slices): + indices = np.array( + [[i, j] for i in range(len(slices)) for j in range(len(slices[i]))], + dtype=np.int64) + values = np.array([val for s in slices for val in s], dtype=np.float64) + dense_shape = np.array( + [len(slices), max(len(s) for s in slices) + 1], dtype=np.int64) + sparse_components = sparse_tensor.SparseTensor(indices, values, dense_shape) + return dataset_ops.Dataset.from_sparse_tensor_slices(sparse_components) + + def testFromSparseTensorSlicesCore(self): + slices = [[1., 2., 3.], [1.], [1.], [1., 2.], [], [1., 2.], [], [], []] + diff_slices = [[1., 2.], [2.], [2., 3., 4.], [], [], []] + + self.run_core_tests( + lambda: self._build_sparse_tensor_slice_dataset(slices), + lambda: self._build_sparse_tensor_slice_dataset(diff_slices), + 9, + sparse_tensors=True) if __name__ == "__main__": diff --git a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py index 07fecf04fa..0a9e99fd99 100644 --- a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py +++ b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py @@ -25,6 +25,7 @@ import numpy as np from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops from tensorflow.python.framework import errors from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.training import saver as saver_lib @@ -37,7 +38,7 @@ class DatasetSerializationTestBase(test.TestCase): def tearDown(self): self._delete_ckpt() - def run_core_tests(self, ds_fn1, ds_fn2, num_outputs): + def run_core_tests(self, ds_fn1, ds_fn2, num_outputs, sparse_tensors=False): """Runs the core tests. Args: @@ -45,34 +46,51 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn2: 0-argument function that returns a Dataset different from ds_fn1. If None, verify_restore_in_modified_graph test is not run. num_outputs: Total number of outputs expected from this Dataset. + sparse_tensors: Whether dataset is built from SparseTensor(s). Raises: AssertionError if any test fails. """ - self.verify_unused_iterator(ds_fn1, num_outputs) - self.verify_fully_used_iterator(ds_fn1, num_outputs) - self.verify_exhausted_iterator(ds_fn1, num_outputs) - self.verify_init_before_restore(ds_fn1, num_outputs) - self.verify_multiple_breaks(ds_fn1, num_outputs) - self.verify_reset_restored_iterator(ds_fn1, num_outputs) + self.verify_unused_iterator( + ds_fn1, num_outputs, sparse_tensors=sparse_tensors) + self.verify_fully_used_iterator( + ds_fn1, num_outputs, sparse_tensors=sparse_tensors) + self.verify_exhausted_iterator( + ds_fn1, num_outputs, sparse_tensors=sparse_tensors) + self.verify_init_before_restore( + ds_fn1, num_outputs, sparse_tensors=sparse_tensors) + self.verify_multiple_breaks( + ds_fn1, num_outputs, sparse_tensors=sparse_tensors) + self.verify_reset_restored_iterator( + ds_fn1, num_outputs, sparse_tensors=sparse_tensors) if ds_fn2: - self.verify_restore_in_modified_graph(ds_fn1, ds_fn2, num_outputs) + self.verify_restore_in_modified_graph( + ds_fn1, ds_fn2, num_outputs, sparse_tensors=sparse_tensors) - def verify_unused_iterator(self, ds_fn, num_outputs, verify_exhausted=True): + def verify_unused_iterator(self, + ds_fn, + num_outputs, + sparse_tensors=False, + verify_exhausted=True): """Verifies that saving and restoring an unused iterator works. Args: ds_fn: See `run_core_tests`. num_outputs: See `run_core_tests`. + sparse_tensors: See `run_core_tests`. verify_exhausted: See `gen_outputs`. Raises: AssertionError if any test fails. """ self.verify_run_with_breaks( - ds_fn, [0], num_outputs, verify_exhausted=verify_exhausted) + ds_fn, [0], + num_outputs, + sparse_tensors=sparse_tensors, + verify_exhausted=verify_exhausted) - def verify_fully_used_iterator(self, ds_fn, num_outputs): + def verify_fully_used_iterator(self, ds_fn, num_outputs, + sparse_tensors=False): """Verifies that saving and restoring a fully used iterator works. Note that this only checks saving and restoring an iterator from which @@ -83,13 +101,15 @@ class DatasetSerializationTestBase(test.TestCase): Args: ds_fn: See `run_core_tests`. num_outputs: See `run_core_tests`. + sparse_tensors: See `run_core_tests`. Raises: AssertionError if test fails. """ - self.verify_run_with_breaks(ds_fn, [num_outputs], num_outputs) + self.verify_run_with_breaks( + ds_fn, [num_outputs], num_outputs, sparse_tensors=sparse_tensors) - def verify_exhausted_iterator(self, ds_fn, num_outputs): + def verify_exhausted_iterator(self, ds_fn, num_outputs, sparse_tensors=False): """Verifies that saving and restoring an exhausted iterator works. An exhausted iterator is one which has returned an OutOfRange error. @@ -97,24 +117,35 @@ class DatasetSerializationTestBase(test.TestCase): Args: ds_fn: See `run_core_tests`. num_outputs: See `run_core_tests`. + sparse_tensors: See `run_core_tests`. Raises: AssertionError if any test fails. """ - self.gen_outputs(ds_fn, [], num_outputs, verify_exhausted=True) + self.gen_outputs( + ds_fn, [], + num_outputs, + verify_exhausted=True, + sparse_tensors=sparse_tensors) actual = self.gen_outputs( - ds_fn, [], 0, ckpt_saved=True, verify_exhausted=True) + ds_fn, [], + 0, + ckpt_saved=True, + verify_exhausted=True, + sparse_tensors=sparse_tensors) self.assertEqual(len(actual), 0) def verify_init_before_restore(self, ds_fn, num_outputs, + sparse_tensors=False, verify_exhausted=True): - """Verifies that retoring into an already initilized iterator works. + """Verifies that restoring into an already initilized iterator works. Args: ds_fn: See `run_core_tests`. num_outputs: See `run_core_tests`. + sparse_tensors: See `run_core_tests`. verify_exhausted: See `gen_outputs`. Raises: @@ -125,12 +156,14 @@ class DatasetSerializationTestBase(test.TestCase): self.gen_break_points(num_outputs), num_outputs, init_before_restore=True, + sparse_tensors=sparse_tensors, verify_exhausted=verify_exhausted) def verify_multiple_breaks(self, ds_fn, num_outputs, num_breaks=10, + sparse_tensors=False, verify_exhausted=True): """Attempts to save/restore at multiple break points. @@ -139,6 +172,7 @@ class DatasetSerializationTestBase(test.TestCase): num_outputs: See `run_core_tests`. num_breaks: The number of break points. These are uniformly spread in [0, num_outputs] both inclusive. + sparse_tensors: See `run_core_tests`. verify_exhausted: See `gen_outputs`. Raises: @@ -146,14 +180,16 @@ class DatasetSerializationTestBase(test.TestCase): """ self.verify_run_with_breaks( ds_fn, - self.gen_break_points(num_outputs), + self.gen_break_points(num_outputs, num_breaks), num_outputs, + sparse_tensors=sparse_tensors, verify_exhausted=verify_exhausted) def verify_reset_restored_iterator(self, ds_fn, num_outputs, break_point=None, + sparse_tensors=False, verify_exhausted=True): """Attempts to re-initialize a restored iterator. @@ -163,6 +199,7 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn: See `run_core_tests`. num_outputs: See `run_core_tests`. break_point: Break point. Optional. Defaults to num_outputs/2. + sparse_tensors: See `run_core_tests`. verify_exhausted: See `gen_outputs`. Raises: @@ -172,16 +209,24 @@ class DatasetSerializationTestBase(test.TestCase): # Collect ground truth containing all outputs. expected = self.gen_outputs( - ds_fn, [], num_outputs, verify_exhausted=verify_exhausted) + ds_fn, [], + num_outputs, + sparse_tensors=sparse_tensors, + verify_exhausted=verify_exhausted) # Skip some items and save checkpoint. - self.gen_outputs(ds_fn, [], break_point, verify_exhausted=False) + self.gen_outputs( + ds_fn, [], + break_point, + sparse_tensors=sparse_tensors, + verify_exhausted=False) actual = [] # Restore from checkpoint and then run init_op. with ops.Graph().as_default() as g: saver = self._import_meta_graph() - init_op, get_next_op = self._get_iterator_ops_from_collection(ds_fn) + init_op, get_next_op = self._get_iterator_ops_from_collection( + ds_fn, sparse_tensors=sparse_tensors) with self.test_session(graph=g) as sess: self._restore(saver, sess) sess.run(init_op) @@ -197,6 +242,7 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn2, num_outputs, break_point=None, + sparse_tensors=False, verify_exhausted=True): """Attempts to restore an iterator in a modified graph. @@ -209,6 +255,7 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn2: See `run_core_tests`. num_outputs: See `run_core_tests`. break_point: Break point. Optional. Defaults to num_outputs/2. + sparse_tensors: See `run_core_tests`. verify_exhausted: See `gen_outputs`. Raises: @@ -218,20 +265,30 @@ class DatasetSerializationTestBase(test.TestCase): # Skip `break_point` items and store the remaining produced from ds_fn1 # in `expected`. - self.gen_outputs(ds_fn1, [], break_point, verify_exhausted=False) + self.gen_outputs( + ds_fn1, [], + break_point, + sparse_tensors=sparse_tensors, + verify_exhausted=False) expected = self.gen_outputs( ds_fn1, [], num_outputs - break_point, ckpt_saved=True, + sparse_tensors=sparse_tensors, verify_exhausted=verify_exhausted) # Generate `break_point` items from ds_fn1 and save checkpoint. - self.gen_outputs(ds_fn1, [], break_point, verify_exhausted=False) + self.gen_outputs( + ds_fn1, [], + break_point, + sparse_tensors=sparse_tensors, + verify_exhausted=False) actual = [] # Build graph for ds_fn2 but load checkpoint for ds_fn1. with ops.Graph().as_default() as g: - _, get_next_op, saver = self._build_graph(ds_fn2) + _, get_next_op, saver = self._build_graph( + ds_fn2, sparse_tensors=sparse_tensors) with self.test_session(graph=g) as sess: self._restore(saver, sess) for _ in range(num_outputs - break_point): @@ -246,8 +303,9 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn, break_points, num_outputs, - verify_exhausted=True, - init_before_restore=False): + init_before_restore=False, + sparse_tensors=False, + verify_exhausted=True): """Verifies that ds_fn() produces the same outputs with and without breaks. 1. Builds a Dataset using `ds_fn` and produces `num_outputs` items from it @@ -261,8 +319,9 @@ class DatasetSerializationTestBase(test.TestCase): ds_fn: See `gen_outputs`. break_points: See `gen_outputs`. num_outputs: See `gen_outputs`. - verify_exhausted: See `gen_outputs`. init_before_restore: See `gen_outputs`. + sparse_tensors: See `run_core_tests`. + verify_exhausted: See `gen_outputs`. Raises: AssertionError if any test fails. @@ -270,14 +329,18 @@ class DatasetSerializationTestBase(test.TestCase): expected = self.gen_outputs( ds_fn, [], num_outputs, - verify_exhausted=verify_exhausted, - init_before_restore=init_before_restore) + init_before_restore=init_before_restore, + sparse_tensors=sparse_tensors, + verify_exhausted=verify_exhausted) + actual = self.gen_outputs( ds_fn, break_points, num_outputs, - verify_exhausted=verify_exhausted, - init_before_restore=init_before_restore) + init_before_restore=init_before_restore, + sparse_tensors=sparse_tensors, + verify_exhausted=verify_exhausted) + self.match(expected, actual) def gen_outputs(self, @@ -286,6 +349,7 @@ class DatasetSerializationTestBase(test.TestCase): num_outputs, ckpt_saved=False, init_before_restore=False, + sparse_tensors=False, verify_exhausted=True): """Generates elements from input dataset while stopping at break points. @@ -306,6 +370,7 @@ class DatasetSerializationTestBase(test.TestCase): init_before_restore: Whether init should be called before saver.restore. This is just so that we can verify that restoring an already initialized iterator works. + sparse_tensors: Whether dataset is built from SparseTensor(s). verify_exhausted: Whether to verify that the iterator has been exhausted after producing `num_outputs` elements. @@ -317,9 +382,11 @@ class DatasetSerializationTestBase(test.TestCase): def get_ops(): if ckpt_saved: saver = self._import_meta_graph() - init_op, get_next_op = self._get_iterator_ops_from_collection(ds_fn) + init_op, get_next_op = self._get_iterator_ops_from_collection( + ds_fn, sparse_tensors=sparse_tensors) else: - init_op, get_next_op, saver = self._build_graph(ds_fn) + init_op, get_next_op, saver = self._build_graph( + ds_fn, sparse_tensors=sparse_tensors) return init_op, get_next_op, saver for i in range(len(break_points) + 1): @@ -368,7 +435,7 @@ class DatasetSerializationTestBase(test.TestCase): if nest.is_sequence(expected): self.assertEqual(len(expected), len(actual)) if isinstance(expected, dict): - for key1, key2 in sorted(expected, actual): + for key1, key2 in zip(sorted(expected), sorted(actual)): self.assertEqual(key1, key2) self.match(expected[key1], actual[key2]) else: @@ -385,29 +452,44 @@ class DatasetSerializationTestBase(test.TestCase): """Generates `num_samples` breaks points in [0, num_outputs].""" return np.linspace(0, num_outputs, num_samples, dtype=int) - def _build_graph(self, ds_fn): + def _build_graph(self, ds_fn, sparse_tensors=False): iterator = ds_fn().make_initializable_iterator() saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator) ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable) init_op = iterator.initializer - get_next = iterator.get_next() - self._add_iterator_ops_to_collection(init_op, get_next) + if sparse_tensors: + get_next = sparse_tensor.SparseTensor(*iterator.get_next()) + else: + get_next = iterator.get_next() + self._add_iterator_ops_to_collection(init_op, get_next, sparse_tensors) saver = saver_lib.Saver(allow_empty=True) return init_op, get_next, saver - def _add_iterator_ops_to_collection(self, init_op, get_next): + def _add_iterator_ops_to_collection(self, + init_op, + get_next, + sparse_tensors=False): ops.add_to_collection("iterator_ops", init_op) # `get_next` may be a tuple e.g. in TensorSliceDataset. Since Collections # do not support tuples we flatten the tensors and restore the shape in # `_get_iterator_ops_from_collection`. - for el in nest.flatten(get_next): - ops.add_to_collection("iterator_ops", el) + if sparse_tensors: + ops.add_to_collection("iterator_ops", get_next.indices) + ops.add_to_collection("iterator_ops", get_next.values) + ops.add_to_collection("iterator_ops", get_next.dense_shape) + else: + for el in nest.flatten(get_next): + ops.add_to_collection("iterator_ops", el) - def _get_iterator_ops_from_collection(self, ds_fn): + def _get_iterator_ops_from_collection(self, ds_fn, sparse_tensors=False): all_ops = ops.get_collection("iterator_ops") - return all_ops[0], nest.pack_sequence_as( - self._get_output_types(ds_fn), all_ops[1:]) + if sparse_tensors: + init_op, indices, values, dense_shape = all_ops + return init_op, sparse_tensor.SparseTensor(indices, values, dense_shape) + else: + return all_ops[0], nest.pack_sequence_as( + self._get_output_types(ds_fn), all_ops[1:]) def _get_output_types(self, ds_fn): with ops.Graph().as_default(): diff --git a/tensorflow/core/kernels/dataset.h b/tensorflow/core/kernels/dataset.h index aa4f436b39..a90590fc7e 100644 --- a/tensorflow/core/kernels/dataset.h +++ b/tensorflow/core/kernels/dataset.h @@ -90,6 +90,7 @@ class GraphDefBuilderWrapper { // `*output` contains a pointer to the output `Node`. It is guaranteed to be // non-null if the method returns with an OK status. // The returned Node pointer is owned by the backing Graph of GraphDefBuilder. + // TODO(shivaniagrawal): Consider changing to gtl::ArraySlice? template Status AddVector(const std::vector& val, Node** output) { Tensor val_t = Tensor(DataTypeToEnum::v(), diff --git a/tensorflow/core/kernels/sparse_tensor_slice_dataset_op.cc b/tensorflow/core/kernels/sparse_tensor_slice_dataset_op.cc index 97240a066b..de5ab1a367 100644 --- a/tensorflow/core/kernels/sparse_tensor_slice_dataset_op.cc +++ b/tensorflow/core/kernels/sparse_tensor_slice_dataset_op.cc @@ -29,10 +29,12 @@ namespace { // description of the following op. template -class Dataset : public DatasetBase { +class Dataset : public GraphDatasetBase { public: - explicit Dataset(const sparse::SparseTensor& sparse_tensor) - : sparse_tensor_(sparse_tensor), + explicit Dataset(OpKernelContext* ctx, + const sparse::SparseTensor& sparse_tensor) + : GraphDatasetBase(ctx), + sparse_tensor_(sparse_tensor), dtypes_({DT_INT64, sparse_tensor.dtype(), DT_INT64}), shapes_({{-1, sparse_tensor.dims() - 1}, {-1}, @@ -53,6 +55,27 @@ class Dataset : public DatasetBase { return "SparseTensorSliceDatasetOp::Dataset"; } + protected: + Status AsGraphDefInternal(DatasetGraphDefBuilder* b, + Node** output) const override { + Node* indices_node; + TF_RETURN_IF_ERROR(b->AddTensor(sparse_tensor_.indices(), &indices_node)); + Node* value_node; + TF_RETURN_IF_ERROR(b->AddTensor(sparse_tensor_.values(), &value_node)); + Node* dense_shape_node; + std::vector dense_shape; + dense_shape.reserve(sparse_tensor_.shape().size()); + for (int i = 0; i < sparse_tensor_.shape().size(); i++) + dense_shape.emplace_back(sparse_tensor_.shape()[i]); + TF_RETURN_IF_ERROR(b->AddVector(dense_shape, &dense_shape_node)); + AttrValue val_dtype; + b->BuildAttrValue(sparse_tensor_.dtype(), &val_dtype); + TF_RETURN_IF_ERROR( + b->AddDataset(this, {indices_node, value_node, dense_shape_node}, + {{"Tvalues", val_dtype}}, output)); + return Status::OK(); + } + private: class Iterator : public DatasetIterator> { public: @@ -106,7 +129,6 @@ class Dataset : public DatasetBase { ++iter_; } - if (i_ == next_non_empty_i_) { // The current position is non-empty in the input // `SparseTensor`, and we have already read the value from the @@ -129,6 +151,42 @@ class Dataset : public DatasetBase { return Status::OK(); } + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(writer->WriteScalar(Iterator::full_name("i"), i_)); + TF_RETURN_IF_ERROR( + writer->WriteScalar(Iterator::full_name("iter_loc"), iter_.loc())); + TF_RETURN_IF_ERROR(writer->WriteScalar( + Iterator::full_name("next_non_empty_i_"), next_non_empty_i_)); + if (i_ <= next_non_empty_i_) { + TF_RETURN_IF_ERROR(writer->WriteTensor( + Iterator::full_name("next_indices_"), next_indices_)); + TF_RETURN_IF_ERROR(writer->WriteTensor( + Iterator::full_name("next_values_"), next_values_)); + } + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(reader->ReadScalar(Iterator::full_name("i"), &i_)); + int64 iter_loc; + TF_RETURN_IF_ERROR( + reader->ReadScalar(Iterator::full_name("iter_loc"), &iter_loc)); + iter_ = group_iterable_.at(iter_loc); + TF_RETURN_IF_ERROR(reader->ReadScalar( + Iterator::full_name("next_non_empty_i_"), &next_non_empty_i_)); + if (i_ <= next_non_empty_i_) { + TF_RETURN_IF_ERROR(reader->ReadTensor( + Iterator::full_name("next_indices_"), &next_indices_)); + TF_RETURN_IF_ERROR(reader->ReadTensor( + Iterator::full_name("next_values_"), &next_values_)); + } + return Status::OK(); + } + private: const int64 num_elements_; @@ -198,7 +256,7 @@ class SparseTensorSliceDatasetOp : public DatasetOpKernel { sparse::SparseTensor sparse_tensor( *indices, *values, TensorShape(dense_shape->vec()), std_order); - *output = new Dataset(sparse_tensor); + *output = new Dataset(ctx, sparse_tensor); } private: diff --git a/tensorflow/core/util/sparse/group_iterator.h b/tensorflow/core/util/sparse/group_iterator.h index 99f3eafc60..c0fce207e7 100644 --- a/tensorflow/core/util/sparse/group_iterator.h +++ b/tensorflow/core/util/sparse/group_iterator.h @@ -83,6 +83,11 @@ class GroupIterable { class IteratorStep; IteratorStep begin() { return IteratorStep(this, 0); } + IteratorStep at(int64 loc) { + CHECK(loc >= 0 && loc <= ix_.dim_size(0)) + << "loc provided must lie between 0 and " << ix_.dim_size(0); + return IteratorStep(this, loc); + } IteratorStep end() { return IteratorStep(this, ix_.dim_size(0)); } template @@ -109,6 +114,7 @@ class GroupIterable { IteratorStep& operator++(); // prefix ++ IteratorStep operator++(int); // postfix ++ Group operator*() const { return Group(iter_, loc_, next_loc_); } + int64 loc() const { return loc_; } private: GroupIterable* iter_; -- GitLab From 893801105ed825f13c4a9be8777709e88f342099 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Sat, 11 Nov 2017 10:54:31 -0800 Subject: [PATCH 0320/1801] Disable GPU another way. Add explicit header dependency. --- tensorflow/contrib/lite/testing/generate_examples.py | 4 ++++ tensorflow/contrib/lite/toco/python/BUILD | 1 + 2 files changed, 5 insertions(+) diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 7aadc8aab1..b122818221 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -36,6 +36,10 @@ import traceback import zipfile import numpy as np from six import StringIO + +# TODO(aselle): Disable GPU for now +os.environ["CUDA_VISIBLE_DEVICES"] = "-1" + import tensorflow as tf from google.protobuf import text_format # TODO(aselle): switch to TensorFlow's resource_loader diff --git a/tensorflow/contrib/lite/toco/python/BUILD b/tensorflow/contrib/lite/toco/python/BUILD index 92246a8aed..0d6c8be01d 100644 --- a/tensorflow/contrib/lite/toco/python/BUILD +++ b/tensorflow/contrib/lite/toco/python/BUILD @@ -22,6 +22,7 @@ cc_library( tf_py_wrap_cc( name = "tensorflow_wrap_toco", srcs = ["toco.i"], + hdrs = ["toco_python_api.h"], deps = [ ":toco_python_api", "//tensorflow/contrib/lite/toco:model_flags_proto_cc", -- GitLab From b5a6259ff130528ff5922f8222d98573cf64d1f1 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Sat, 11 Nov 2017 11:17:06 -0800 Subject: [PATCH 0321/1801] no pip testing of toco_from_prtos_test, undo improper hdrs --- tensorflow/contrib/lite/toco/python/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/toco/python/BUILD b/tensorflow/contrib/lite/toco/python/BUILD index 0d6c8be01d..35b68e44b4 100644 --- a/tensorflow/contrib/lite/toco/python/BUILD +++ b/tensorflow/contrib/lite/toco/python/BUILD @@ -22,7 +22,6 @@ cc_library( tf_py_wrap_cc( name = "tensorflow_wrap_toco", srcs = ["toco.i"], - hdrs = ["toco_python_api.h"], deps = [ ":toco_python_api", "//tensorflow/contrib/lite/toco:model_flags_proto_cc", @@ -59,6 +58,7 @@ tf_py_test( "//tensorflow/contrib/lite/toco:model_flags_proto_py", "//tensorflow/contrib/lite/toco:toco_flags_proto_py", ], + tags = ["no_pip"], data = [ ":toco_from_protos", ], -- GitLab From 8b2fb2f37f7b6b9745e34888ffc8e15927370b90 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Sat, 11 Nov 2017 11:32:14 -0800 Subject: [PATCH 0322/1801] Buildifier --- tensorflow/contrib/lite/toco/python/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/toco/python/BUILD b/tensorflow/contrib/lite/toco/python/BUILD index 35b68e44b4..17115047d2 100644 --- a/tensorflow/contrib/lite/toco/python/BUILD +++ b/tensorflow/contrib/lite/toco/python/BUILD @@ -58,10 +58,10 @@ tf_py_test( "//tensorflow/contrib/lite/toco:model_flags_proto_py", "//tensorflow/contrib/lite/toco:toco_flags_proto_py", ], - tags = ["no_pip"], data = [ ":toco_from_protos", ], + tags = ["no_pip"], ) filegroup( -- GitLab From 743c7b17eeda2b4b13e5524168a096e871426a27 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 11 Nov 2017 12:27:40 -0800 Subject: [PATCH 0323/1801] Add support for Halton sequence for use with Monte Carlo estimation of integrals. PiperOrigin-RevId: 175412461 --- tensorflow/contrib/bayesflow/BUILD | 19 ++ tensorflow/contrib/bayesflow/__init__.py | 6 +- .../kernel_tests/halton_sequence_test.py | 131 +++++++++ .../bayesflow/python/ops/halton_sequence.py | 33 +++ .../python/ops/halton_sequence_impl.py | 264 ++++++++++++++++++ 5 files changed, 451 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/bayesflow/python/kernel_tests/halton_sequence_test.py create mode 100644 tensorflow/contrib/bayesflow/python/ops/halton_sequence.py create mode 100644 tensorflow/contrib/bayesflow/python/ops/halton_sequence_impl.py diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index b024f158cd..f92b57869e 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -120,6 +120,25 @@ cuda_py_test( ], ) +cuda_py_test( + name = "halton_sequence_test", + size = "small", + srcs = ["python/kernel_tests/halton_sequence_test.py"], + additional_deps = [ + ":bayesflow_py", + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:platform_test", + "//tensorflow/python:random_ops", + "//tensorflow/python:variable_scope", + "//tensorflow/python:variables", + ], +) + cuda_py_test( name = "hmc_test", size = "medium", diff --git a/tensorflow/contrib/bayesflow/__init__.py b/tensorflow/contrib/bayesflow/__init__.py index b98bc36954..beaf6f1854 100644 --- a/tensorflow/contrib/bayesflow/__init__.py +++ b/tensorflow/contrib/bayesflow/__init__.py @@ -23,6 +23,7 @@ from __future__ import print_function # pylint: disable=unused-import,line-too-long from tensorflow.contrib.bayesflow.python.ops import csiszar_divergence from tensorflow.contrib.bayesflow.python.ops import custom_grad +from tensorflow.contrib.bayesflow.python.ops import halton_sequence from tensorflow.contrib.bayesflow.python.ops import hmc from tensorflow.contrib.bayesflow.python.ops import metropolis_hastings from tensorflow.contrib.bayesflow.python.ops import monte_carlo @@ -32,7 +33,8 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = ['csiszar_divergence', 'custom_grad', 'entropy', - 'metropolis_hastings', 'monte_carlo', 'hmc', 'special_math', - 'stochastic_variables', 'variational_inference'] + 'metropolis_hastings', 'monte_carlo', 'halton_sequence', + 'hmc', 'special_math', 'stochastic_variables', + 'variational_inference'] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/halton_sequence_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/halton_sequence_test.py new file mode 100644 index 0000000000..0a85862abf --- /dev/null +++ b/tensorflow/contrib/bayesflow/python/kernel_tests/halton_sequence_test.py @@ -0,0 +1,131 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for halton_sequence.py.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.bayesflow.python.ops import halton_sequence as halton +from tensorflow.contrib.bayesflow.python.ops import monte_carlo_impl as monte_carlo_lib +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops.distributions import normal as normal_lib +from tensorflow.python.platform import test + + +mc = monte_carlo_lib + + +class HaltonSequenceTest(test.TestCase): + + def test_known_values_small_bases(self): + with self.test_session(): + # The first five elements of the Halton sequence with base 2 and 3 + expected = np.array(((1. / 2, 1. / 3), + (1. / 4, 2. / 3), + (3. / 4, 1. / 9), + (1. / 8, 4. / 9), + (5. / 8, 7. / 9)), dtype=np.float32) + sample = halton.sample(2, num_samples=5) + self.assertAllClose(expected, sample.eval(), rtol=1e-6) + + def test_sample_indices(self): + with self.test_session(): + dim = 5 + indices = math_ops.range(10, dtype=dtypes.int32) + sample_direct = halton.sample(dim, num_samples=10) + sample_from_indices = halton.sample(dim, sample_indices=indices) + self.assertAllClose(sample_direct.eval(), sample_from_indices.eval(), + rtol=1e-6) + + def test_dtypes_works_correctly(self): + with self.test_session(): + dim = 3 + sample_float32 = halton.sample(dim, num_samples=10, dtype=dtypes.float32) + sample_float64 = halton.sample(dim, num_samples=10, dtype=dtypes.float64) + self.assertEqual(sample_float32.eval().dtype, np.float32) + self.assertEqual(sample_float64.eval().dtype, np.float64) + + def test_normal_integral_mean_and_var_correctly_estimated(self): + n = int(1000) + # This test is almost identical to the similarly named test in + # monte_carlo_test.py. The only difference is that we use the Halton + # samples instead of the random samples to evaluate the expectations. + # MC with pseudo random numbers converges at the rate of 1/ Sqrt(N) + # (N=number of samples). For QMC in low dimensions, the expected convergence + # rate is ~ 1/N. Hence we should only need 1e3 samples as compared to the + # 1e6 samples used in the pseudo-random monte carlo. + with self.test_session(): + mu_p = array_ops.constant([-1.0, 1.0], dtype=dtypes.float64) + mu_q = array_ops.constant([0.0, 0.0], dtype=dtypes.float64) + sigma_p = array_ops.constant([0.5, 0.5], dtype=dtypes.float64) + sigma_q = array_ops.constant([1.0, 1.0], dtype=dtypes.float64) + p = normal_lib.Normal(loc=mu_p, scale=sigma_p) + q = normal_lib.Normal(loc=mu_q, scale=sigma_q) + + cdf_sample = halton.sample(2, num_samples=n, dtype=dtypes.float64) + q_sample = q.quantile(cdf_sample) + + # Compute E_p[X]. + e_x = mc.expectation_importance_sampler( + f=lambda x: x, log_p=p.log_prob, sampling_dist_q=q, z=q_sample, + seed=42) + + # Compute E_p[X^2]. + e_x2 = mc.expectation_importance_sampler( + f=math_ops.square, log_p=p.log_prob, sampling_dist_q=q, z=q_sample, + seed=42) + + stddev = math_ops.sqrt(e_x2 - math_ops.square(e_x)) + # Keep the tolerance levels the same as in monte_carlo_test.py. + self.assertEqual(p.batch_shape, e_x.get_shape()) + self.assertAllClose(p.mean().eval(), e_x.eval(), rtol=0.01) + self.assertAllClose(p.stddev().eval(), stddev.eval(), rtol=0.02) + + def test_docstring_example(self): + # Produce the first 1000 members of the Halton sequence in 3 dimensions. + num_samples = 1000 + dim = 3 + with self.test_session(): + sample = halton.sample(dim, num_samples=num_samples) + + # Evaluate the integral of x_1 * x_2^2 * x_3^3 over the three dimensional + # hypercube. + powers = math_ops.range(1.0, limit=dim + 1) + integral = math_ops.reduce_mean( + math_ops.reduce_prod(sample ** powers, axis=-1)) + true_value = 1.0 / math_ops.reduce_prod(powers + 1.0) + + # Produces a relative absolute error of 1.7%. + self.assertAllClose(integral.eval(), true_value.eval(), rtol=0.02) + + # Now skip the first 1000 samples and recompute the integral with the next + # thousand samples. The sample_indices argument can be used to do this. + + sample_indices = math_ops.range(start=1000, limit=1000 + num_samples, + dtype=dtypes.int32) + sample_leaped = halton.sample(dim, sample_indices=sample_indices) + + integral_leaped = math_ops.reduce_mean( + math_ops.reduce_prod(sample_leaped ** powers, axis=-1)) + self.assertAllClose(integral_leaped.eval(), true_value.eval(), rtol=0.001) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/bayesflow/python/ops/halton_sequence.py b/tensorflow/contrib/bayesflow/python/ops/halton_sequence.py new file mode 100644 index 0000000000..49d747d538 --- /dev/null +++ b/tensorflow/contrib/bayesflow/python/ops/halton_sequence.py @@ -0,0 +1,33 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Support for low discrepancy Halton sequences. + +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# go/tf-wildcard-import +# pylint: disable=wildcard-import +from tensorflow.contrib.bayesflow.python.ops.halton_sequence_impl import * +# pylint: enable=wildcard-import +from tensorflow.python.util.all_util import remove_undocumented + +_allowed_symbols = [ + 'sample', +] + +remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/bayesflow/python/ops/halton_sequence_impl.py b/tensorflow/contrib/bayesflow/python/ops/halton_sequence_impl.py new file mode 100644 index 0000000000..8cabf18903 --- /dev/null +++ b/tensorflow/contrib/bayesflow/python/ops/halton_sequence_impl.py @@ -0,0 +1,264 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Quasi Monte Carlo support: Halton sequence. + +@@sample +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops + + +__all__ = [ + 'sample', +] + + +# The maximum dimension we support. This is limited by the number of primes +# in the _PRIMES array. +_MAX_DIMENSION = 1000 + + +def sample(dim, num_samples=None, sample_indices=None, dtype=None, name=None): + r"""Returns a sample from the `m` dimensional Halton sequence. + + Warning: The sequence elements take values only between 0 and 1. Care must be + taken to appropriately transform the domain of a function if it differs from + the unit cube before evaluating integrals using Halton samples. It is also + important to remember that quasi-random numbers are not a replacement for + pseudo-random numbers in every context. Quasi random numbers are completely + deterministic and typically have significant negative autocorrelation (unless + randomized). + + Computes the members of the low discrepancy Halton sequence in dimension + `dim`. The d-dimensional sequence takes values in the unit hypercube in d + dimensions. Currently, only dimensions up to 1000 are supported. The prime + base for the `k`-th axes is the k-th prime starting from 2. For example, + if dim = 3, then the bases will be [2, 3, 5] respectively and the first + element of the sequence will be: [0.5, 0.333, 0.2]. For a more complete + description of the Halton sequences see: + https://en.wikipedia.org/wiki/Halton_sequence. For low discrepancy sequences + and their applications see: + https://en.wikipedia.org/wiki/Low-discrepancy_sequence. + + The user must supply either `num_samples` or `sample_indices` but not both. + The former is the number of samples to produce starting from the first + element. If `sample_indices` is given instead, the specified elements of + the sequence are generated. For example, sample_indices=tf.range(10) is + equivalent to specifying n=10. + + Example Use: + + ```python + bf = tf.contrib.bayesflow + + # Produce the first 1000 members of the Halton sequence in 3 dimensions. + num_samples = 1000 + dim = 3 + sample = bf.halton_sequence.sample(dim, num_samples=num_samples) + + # Evaluate the integral of x_1 * x_2^2 * x_3^3 over the three dimensional + # hypercube. + powers = tf.range(1.0, limit=dim + 1) + integral = tf.reduce_mean(tf.reduce_prod(sample ** powers, axis=-1)) + true_value = 1.0 / tf.reduce_prod(powers + 1.0) + with tf.Session() as session: + values = session.run((integral, true_value)) + + # Produces a relative absolute error of 1.7%. + print ("Estimated: %f, True Value: %f" % values) + + # Now skip the first 1000 samples and recompute the integral with the next + # thousand samples. The sample_indices argument can be used to do this. + + + sample_indices = tf.range(start=1000, limit=1000 + num_samples, + dtype=tf.int32) + sample_leaped = halton.sample(dim, sample_indices=sample_indices) + + integral_leaped = tf.reduce_mean(tf.reduce_prod(sample_leaped ** powers, + axis=-1)) + with tf.Session() as session: + values = session.run((integral_leaped, true_value)) + # Now produces a relative absolute error of 0.05%. + print ("Leaped Estimated: %f, True Value: %f" % values) + ``` + + Args: + dim: Positive Python `int` representing each sample's `event_size.` Must + not be greater than 1000. + num_samples: (Optional) positive Python `int`. The number of samples to + generate. Either this parameter or sample_indices must be specified but + not both. If this parameter is None, then the behaviour is determined by + the `sample_indices`. + sample_indices: (Optional) `Tensor` of dtype int32 and rank 1. The elements + of the sequence to compute specified by their position in the sequence. + The entries index into the Halton sequence starting with 0 and hence, + must be whole numbers. For example, sample_indices=[0, 5, 6] will produce + the first, sixth and seventh elements of the sequence. If this parameter + is None, then the `num_samples` parameter must be specified which gives + the number of desired samples starting from the first sample. + dtype: (Optional) The dtype of the sample. One of `float32` or `float64`. + Default is `float32`. + name: (Optional) Python `str` describing ops managed by this function. If + not supplied the name of this function is used. + + Returns: + halton_elements: Elements of the Halton sequence. `Tensor` of supplied dtype + and `shape` `[num_samples, dim]` if `num_samples` was specified or shape + `[s, dim]` where s is the size of `sample_indices` if `sample_indices` + were specified. + + Raises: + ValueError: if both `sample_indices` and `num_samples` were specified or + if dimension `dim` is less than 1 or greater than 1000. + """ + if dim < 1 or dim > _MAX_DIMENSION: + raise ValueError( + 'Dimension must be between 1 and {}. Supplied {}'.format(_MAX_DIMENSION, + dim)) + if (num_samples is None) == (sample_indices is None): + raise ValueError('Either `num_samples` or `sample_indices` must be' + ' specified but not both.') + + dtype = dtype or dtypes.float32 + if not dtype.is_floating: + raise ValueError('dtype must be of `float`-type') + + with ops.name_scope(name, 'sample', values=[sample_indices]): + # Here and in the following, the shape layout is as follows: + # [sample dimension, event dimension, coefficient dimension]. + # The coefficient dimension is an intermediate axes which will hold the + # weights of the starting integer when expressed in the (prime) base for + # an event dimension. + indices = _get_indices(num_samples, sample_indices, dtype) + radixes = array_ops.constant(_PRIMES[0:dim], dtype=dtype, shape=[dim, 1]) + + max_sizes_by_axes = _base_expansion_size(math_ops.reduce_max(indices), + radixes) + + max_size = math_ops.reduce_max(max_sizes_by_axes) + + # The powers of the radixes that we will need. Note that there is a bit + # of an excess here. Suppose we need the place value coefficients of 7 + # in base 2 and 3. For 2, we will have 3 digits but we only need 2 digits + # for base 3. However, we can only create rectangular tensors so we + # store both expansions in a [2, 3] tensor. This leads to the problem that + # we might end up attempting to raise large numbers to large powers. For + # example, base 2 expansion of 1024 has 10 digits. If we were in 10 + # dimensions, then the 10th prime (29) we will end up computing 29^10 even + # though we don't need it. We avoid this by setting the exponents for each + # axes to 0 beyond the maximum value needed for that dimension. + exponents_by_axes = array_ops.tile([math_ops.range(max_size)], [dim, 1]) + weight_mask = exponents_by_axes > max_sizes_by_axes + capped_exponents = array_ops.where( + weight_mask, array_ops.zeros_like(exponents_by_axes), exponents_by_axes) + weights = radixes ** capped_exponents + coeffs = math_ops.floor_div(indices, weights) + coeffs *= 1 - math_ops.cast(weight_mask, dtype) + coeffs = (coeffs % radixes) / radixes + return math_ops.reduce_sum(coeffs / weights, axis=-1) + + +def _get_indices(n, sample_indices, dtype, name=None): + """Generates starting points for the Halton sequence procedure. + + The k'th element of the sequence is generated starting from a positive integer + which must be distinct for each `k`. It is conventional to choose the starting + point as `k` itself (or `k+1` if k is zero based). This function generates + the starting integers for the required elements and reshapes the result for + later use. + + Args: + n: Positive `int`. The number of samples to generate. If this + parameter is supplied, then `sample_indices` should be None. + sample_indices: `Tensor` of dtype int32 and rank 1. The entries + index into the Halton sequence starting with 0 and hence, must be whole + numbers. For example, sample_indices=[0, 5, 6] will produce the first, + sixth and seventh elements of the sequence. If this parameter is not None + then `n` must be None. + dtype: The dtype of the sample. One of `float32` or `float64`. + Default is `float32`. + name: Python `str` name which describes ops created by this function. + + Returns: + indices: `Tensor` of dtype `dtype` and shape = `[n, 1, 1]`. + """ + with ops.name_scope(name, 'get_indices', [n, sample_indices]): + if sample_indices is None: + sample_indices = math_ops.range(n, dtype=dtype) + else: + sample_indices = math_ops.cast(sample_indices, dtype) + + # Shift the indices so they are 1 based. + indices = sample_indices + 1 + + # Reshape to make space for the event dimension and the place value + # coefficients. + return array_ops.reshape(indices, [-1, 1, 1]) + + +def _base_expansion_size(num, bases): + """Computes the number of terms in the place value expansion. + + Let num = a0 + a1 b + a2 b^2 + ... ak b^k be the place value expansion of + `num` in base b (ak <> 0). This function computes and returns `k` for each + base `b` specified in `bases`. + + This can be inferred from the base `b` logarithm of `num` as follows: + $$k = Floor(log_b (num)) + 1 = Floor( log(num) / log(b)) + 1$$ + + Args: + num: Scalar `Tensor` of dtype either `float32` or `float64`. The number to + compute the base expansion size of. + bases: `Tensor` of the same dtype as num. The bases to compute the size + against. + + Returns: + Tensor of same dtype and shape as `bases` containing the size of num when + written in that base. + """ + return math_ops.floor(math_ops.log(num) / math_ops.log(bases)) + 1 + + +def _primes_less_than(n): + # Based on + # https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188 + """Returns sorted array of primes such that `2 <= prime < n`.""" + small_primes = np.array((2, 3, 5)) + if n <= 6: + return small_primes[small_primes < n] + sieve = np.ones(n // 3 + (n % 6 == 2), dtype=np.bool) + sieve[0] = False + m = int(n ** 0.5) // 3 + 1 + for i in range(m): + if not sieve[i]: + continue + k = 3 * i + 1 | 1 + sieve[k ** 2 // 3::2 * k] = False + sieve[(k ** 2 + 4 * k - 2 * k * (i & 1)) // 3::2 * k] = False + return np.r_[2, 3, 3 * np.nonzero(sieve)[0] + 1 | 1] + +_PRIMES = _primes_less_than(7919+1) + +assert len(_PRIMES) == _MAX_DIMENSION -- GitLab From 4feb4f4cf6ef6cf723dcc8fcd8d2e31f2415e3a0 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Sat, 11 Nov 2017 12:51:29 -0800 Subject: [PATCH 0324/1801] disable generated_examples_zip_test for parameterization. --- tensorflow/tools/ci_build/ci_parameterized_build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh index db02f6ef10..55f32f40f8 100755 --- a/tensorflow/tools/ci_build/ci_parameterized_build.sh +++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh @@ -165,7 +165,7 @@ else BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:embedding_lookup_test" BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:embedding_lookup_sparse_test" BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:fully_connected_test" - BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/testing:generated_examples_zip_test" + # BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/testing:generated_examples_zip_test" BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:hashtable_lookup_test" BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:local_response_norm_test" BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:lsh_projection_test" -- GitLab From f689a1a0c9f23a1f42a7f6c1c6c44cf2d7bb97e3 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Sat, 11 Nov 2017 21:41:38 -0800 Subject: [PATCH 0325/1801] Remove a TODO that will never be done, thanks to backwards compatibility. PiperOrigin-RevId: 175431383 --- tensorflow/python/ops/gradient_checker.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/ops/gradient_checker.py b/tensorflow/python/ops/gradient_checker.py index 3addfefc99..1ff1968055 100644 --- a/tensorflow/python/ops/gradient_checker.py +++ b/tensorflow/python/ops/gradient_checker.py @@ -348,7 +348,6 @@ def compute_gradient_error(x, as the initial value. delta: (optional) the amount of perturbation. init_targets: list of targets to run to initialize model params. - TODO(mrry): Remove this argument. extra_feed_dict: dict that allows fixing specified tensor values during the Jacobian calculation. -- GitLab From 2eb51b6786ad8c81cd3e7eb2ab9b7db6d4343360 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 12 Nov 2017 12:15:25 -0800 Subject: [PATCH 0326/1801] Internal Change PiperOrigin-RevId: 175462406 --- tensorflow/contrib/lite/java/BUILD | 37 +++++++++++++++++++ tensorflow/contrib/lite/tools/BUILD | 4 +- .../lite/tools/gen_op_registration_main.cc | 1 + .../lite/tools/gen_op_registration_test.cc | 8 ++-- .../contrib/lite/tools/mutable_op_resolver.h | 6 +-- 5 files changed, 48 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/lite/java/BUILD b/tensorflow/contrib/lite/java/BUILD index 74fb4fe001..b0d20bac20 100644 --- a/tensorflow/contrib/lite/java/BUILD +++ b/tensorflow/contrib/lite/java/BUILD @@ -77,6 +77,43 @@ java_test( ], ) +java_test( + name = "NativeInterpreterWrapperTest", + size = "small", + srcs = ["src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java"], + data = [ + "src/testdata/add.bin", + "src/testdata/int32.bin", + "src/testdata/int64.bin", + "src/testdata/invalid_model.bin", + "src/testdata/uint8.bin", + ], + javacopts = JAVACOPTS, + test_class = "org.tensorflow.lite.NativeInterpreterWrapperTest", + deps = [ + ":libtensorflowlite_jni.so", + ":tensorflowlitelib", + "@com_google_truth", + "@junit", + ], +) + +java_test( + name = "TensorTest", + size = "small", + srcs = ["src/test/java/org/tensorflow/lite/TensorTest.java"], + data = [ + "src/testdata/add.bin", + ], + javacopts = JAVACOPTS, + test_class = "org.tensorflow.lite.TensorTest", + deps = [ + ":tensorflowlitelib", + "@com_google_truth", + "@junit", + ], +) + filegroup( name = "libtensorflowlite_jni", srcs = select({ diff --git a/tensorflow/contrib/lite/tools/BUILD b/tensorflow/contrib/lite/tools/BUILD index 3c9cec5d16..21b32d8434 100644 --- a/tensorflow/contrib/lite/tools/BUILD +++ b/tensorflow/contrib/lite/tools/BUILD @@ -4,7 +4,9 @@ package(default_visibility = [ licenses(["notice"]) # Apache 2.0 -cc_binary( +load("//tensorflow:tensorflow.bzl", "tf_cc_binary") + +tf_cc_binary( name = "generate_op_registrations", srcs = ["gen_op_registration_main.cc"], deps = [ diff --git a/tensorflow/contrib/lite/tools/gen_op_registration_main.cc b/tensorflow/contrib/lite/tools/gen_op_registration_main.cc index 7b27066a21..1b28b8bcd9 100644 --- a/tensorflow/contrib/lite/tools/gen_op_registration_main.cc +++ b/tensorflow/contrib/lite/tools/gen_op_registration_main.cc @@ -24,6 +24,7 @@ limitations under the License. using tensorflow::Flag; using tensorflow::Flags; +using tensorflow::string; namespace { diff --git a/tensorflow/contrib/lite/tools/gen_op_registration_test.cc b/tensorflow/contrib/lite/tools/gen_op_registration_test.cc index c65cffe340..28a98d68ab 100644 --- a/tensorflow/contrib/lite/tools/gen_op_registration_test.cc +++ b/tensorflow/contrib/lite/tools/gen_op_registration_test.cc @@ -43,25 +43,25 @@ TEST_F(GenOpRegistrationTest, TestNonExistantFiles) { } TEST_F(GenOpRegistrationTest, TestModels) { - ReadOps("third_party/tensorflow/contrib/lite/testdata/test_model.bin"); + ReadOps("tensorflow/contrib/lite/testdata/test_model.bin"); EXPECT_THAT(builtin_ops_, ElementsAreArray({"CONV_2D"})); EXPECT_THAT(custom_ops_, ElementsAreArray({"testing_op"})); } TEST_F(GenOpRegistrationTest, TestEmptyModels) { - ReadOps("third_party/tensorflow/contrib/lite/testdata/empty_model.bin"); + ReadOps("tensorflow/contrib/lite/testdata/empty_model.bin"); EXPECT_EQ(builtin_ops_.size(), 0); EXPECT_EQ(custom_ops_.size(), 0); } TEST_F(GenOpRegistrationTest, TestZeroSubgraphs) { - ReadOps("third_party/tensorflow/contrib/lite/testdata/0_subgraphs.bin"); + ReadOps("tensorflow/contrib/lite/testdata/0_subgraphs.bin"); EXPECT_EQ(builtin_ops_.size(), 0); EXPECT_EQ(custom_ops_.size(), 0); } TEST_F(GenOpRegistrationTest, TestBrokenMmap) { - ReadOps("third_party/tensorflow/contrib/lite/testdata/test_model_broken.bin"); + ReadOps("tensorflow/contrib/lite/testdata/test_model_broken.bin"); EXPECT_EQ(builtin_ops_.size(), 0); EXPECT_EQ(custom_ops_.size(), 0); } diff --git a/tensorflow/contrib/lite/tools/mutable_op_resolver.h b/tensorflow/contrib/lite/tools/mutable_op_resolver.h index 9546c32427..cc1a8e27e6 100644 --- a/tensorflow/contrib/lite/tools/mutable_op_resolver.h +++ b/tensorflow/contrib/lite/tools/mutable_op_resolver.h @@ -15,7 +15,7 @@ limitations under the License. #ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOOLS_MUTABLE_OP_RESOLVER_H_ #define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOOLS_MUTABLE_OP_RESOLVER_H_ -#include +#include #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/model.h" @@ -36,8 +36,8 @@ class MutableOpResolver : public OpResolver { void AddCustom(const char* name, TfLiteRegistration* registration); private: - std::unordered_map builtins_; - std::unordered_map custom_ops_; + std::map builtins_; + std::map custom_ops_; }; } // namespace tflite -- GitLab From 24e368f1a1f1edd4c1f6c13b165c8aa5057c7f11 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 12 Nov 2017 12:52:16 -0800 Subject: [PATCH 0327/1801] Use latest version of nsync in cmake builds; bring TensorFlow's CMakeLists.txt for nsync up to date for Mac. Update the git hash for nsync for use by cmake when fetching nsync. Update TensorFlow's copy of nsync's CMakeLists.txt file, which has fixed for the Mac. PiperOrigin-RevId: 175463772 --- tensorflow/contrib/cmake/external/nsync.cmake | 2 +- tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/external/nsync.cmake b/tensorflow/contrib/cmake/external/nsync.cmake index 2c42377f50..155c91cb97 100644 --- a/tensorflow/contrib/cmake/external/nsync.cmake +++ b/tensorflow/contrib/cmake/external/nsync.cmake @@ -16,7 +16,7 @@ include (ExternalProject) set(nsync_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/nsync/public) set(nsync_URL https://github.com/google/nsync) -set(nsync_TAG 394e71f0ebeed6788ae6c84d42c1bedf6e1ee9f7) +set(nsync_TAG 93815892dddafe9146a5f7e7042281d59d0f4323) set(nsync_BUILD ${CMAKE_CURRENT_BINARY_DIR}/nsync/src/nsync) set(nsync_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/nsync/install) diff --git a/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt b/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt index fbd89bad07..594c2492d4 100644 --- a/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt +++ b/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt @@ -61,9 +61,15 @@ if ("${NSYNC_LANGUAGE}X" STREQUAL "c++11X") ) elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "DarwinX") include_directories ("${PROJECT_SOURCE_DIR}/platform/macos") + include_directories ("${PROJECT_SOURCE_DIR}/platform/posix") + # Some versions of MacOS, such as Sierra, require _DARWIN_C_SOURCE + # when including certin C++ standard header files, such as . + add_definitions ("-D_DARWIN_C_SOURCE") add_compile_options ("-std=c++11") set (NSYNC_OS_SRC ${NSYNC_OS_CPP_SRC} + "platform/posix/src/clock_gettime.c" + "platform/posix/src/nsync_semaphore_mutex.c" ) set (NSYNC_TEST_OS_SRC "platform/posix/src/start_thread.c" @@ -138,6 +144,10 @@ if (NOT "${NSYNC_LANGUAGE}X" STREQUAL "c++11X") elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "DarwinX") include_directories ("${PROJECT_SOURCE_DIR}/platform/macos") set (NSYNC_POSIX ON) + set (NSYNC_OS_EXTRA_SRC + "platform/posix/src/clock_gettime.c" + "platform/posix/src/nsync_semaphore_mutex.c" + ) include_directories ("${PROJECT_SOURCE_DIR}/platform/posix") elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "LinuxX") include_directories ("${PROJECT_SOURCE_DIR}/platform/linux") -- GitLab From 08b3d55b65bb8cf621e4f3e9f25cc7779079c4e8 Mon Sep 17 00:00:00 2001 From: Chris Leary Date: Sun, 12 Nov 2017 16:47:56 -0800 Subject: [PATCH 0328/1801] [SE] Delete deprecated MachineManager. PiperOrigin-RevId: 175472763 --- tensorflow/stream_executor/machine_manager.cc | 291 ------------------ tensorflow/stream_executor/machine_manager.h | 212 ------------- 2 files changed, 503 deletions(-) delete mode 100644 tensorflow/stream_executor/machine_manager.cc delete mode 100644 tensorflow/stream_executor/machine_manager.h diff --git a/tensorflow/stream_executor/machine_manager.cc b/tensorflow/stream_executor/machine_manager.cc deleted file mode 100644 index 2b61c8a0bc..0000000000 --- a/tensorflow/stream_executor/machine_manager.cc +++ /dev/null @@ -1,291 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/stream_executor/machine_manager.h" - -#include "tensorflow/stream_executor/platform/port.h" - -#include "tensorflow/stream_executor/dso_loader.h" -#include "tensorflow/stream_executor/lib/error.h" -#include "tensorflow/stream_executor/platform/logging.h" -#include "tensorflow/stream_executor/platform/mutex.h" -#include "tensorflow/stream_executor/platform/port.h" - -namespace perftools { -namespace gputools { - -mutex MachineManager::mu_{LINKER_INITIALIZED}; - -MachineManager *MachineManager::singleton_ = nullptr; - -PlatformKind MachineManager::DetectPreferredPlatform() { -// TODO(leary) for KNC card experiments, figure out a legitimate way to -// determine this. For now, we use a compile-time hint so we can compile tests -// for both. -#if defined TENSORFLOW_STREAM_EXECUTOR_MACHINE_MANAGER_PREFER_OPENCL - return PlatformKind::kOpenCL; -#elif defined TENSORFLOW_STREAM_EXECUTOR_MACHINE_MANAGER_PREFER_HOST - return PlatformKind::kHost; -#else - return PlatformKind::kCuda; -#endif -} - -/* static */ port::StatusOr> -MachineManager::Create(PlatformKind kind, DeviceOptions options, - const PluginConfig &config) { - std::unique_ptr machine_manager{ - new MachineManager{kind, options, config}}; - auto init_status = machine_manager->Init(); - if (!init_status.ok()) { - return init_status; - } - - return std::move(machine_manager); -} - -MachineManager::MachineManager(PlatformKind platform, - DeviceOptions device_options, - const PluginConfig &config) - : platform_(platform), - device_options_(device_options), - plugin_config_(config), - min_numa_node_(0), - limit_numa_node_(0) {} - -port::Status MachineManager::Init() { - // Initialize the first StreamExecutor, then use that platform interface to - // grab the device count. - executors_.resize(1); - executors_[0].reset(new StreamExecutor{platform_, plugin_config_}); - auto status = executors_[0]->Init(0 /* = device_ordinal */, device_options_); - if (!status.ok()) { - return port::Status{ - port::error::FAILED_PRECONDITION, - port::StrCat( - "failed to initialize StreamExecutor for device ordinal 0: ", - status.ToString())}; - } - int device_count = executors_[0]->PlatformDeviceCount(); - if (device_count == 0) { - LOG(WARNING) << "no devices found for platform " - << PlatformKindString(platform_); - min_numa_node_ = limit_numa_node_ = 0; - return port::Status::OK(); - } - - streams_.resize(device_count); - streams_[0].reset(new Stream(executors_[0].get())); - if (!streams_[0]->Init().ok()) { - return port::Status{ - port::error::FAILED_PRECONDITION, - "failed to initialize default stream for device ordinal 0"}; - } - - min_numa_node_ = executors_[0]->GetDeviceDescription().numa_node(); - limit_numa_node_ = min_numa_node_ + 1; - - executors_.resize(device_count); - for (int device_ordinal = 1; device_ordinal < device_count; - ++device_ordinal) { - StreamExecutor *stream_exec = new StreamExecutor{platform_, plugin_config_}; - executors_[device_ordinal].reset(stream_exec); - auto status = stream_exec->Init(device_ordinal, device_options_); - if (!status.ok()) { - return port::Status( - port::error::FAILED_PRECONDITION, - port::StrCat( - "failed to initialize StreamExecutor for device ordinal ", - device_ordinal, ": ", status.ToString())); - } - - min_numa_node_ = std::min(min_numa_node_, - stream_exec->GetDeviceDescription().numa_node()); - limit_numa_node_ = std::max( - limit_numa_node_, stream_exec->GetDeviceDescription().numa_node() + 1); - - if (!stream_exec->GetDeviceDescription().ecc_enabled()) { - LOG(WARNING) << "ECC not enabled for device ordinal: " << device_ordinal; - } - - streams_[device_ordinal].reset( - new Stream(executors_[device_ordinal].get())); - if (!streams_[device_ordinal]->Init().ok()) { - return port::Status( - port::error::FAILED_PRECONDITION, - port::StrCat( - "failed to initialize default stream for device ordinal ", - device_ordinal)); - } - } - - return port::Status::OK(); -} - -int MachineManager::device_count() const { return executors_.size(); } - -port::Status MachineManager::EnablePeerAccess() { - auto peer_access_map = GetPeerAccessMap(); - for (const auto &access : *peer_access_map) { - auto devices = access.first; - if (access.second) { - StreamExecutor *from = executors_[devices.first].get(); - StreamExecutor *to = executors_[devices.second].get(); - auto status = from->EnablePeerAccessTo(to); - if (!status.ok()) { - return status; - } - } else { - LOG(INFO) << "cannot enable peer access from device ordinal " - << devices.first << " to device ordinal " << devices.second; - } - } - return port::Status::OK(); -} - -std::unique_ptr, bool>> -MachineManager::GetPeerAccessMap() { - auto *map = new std::map, bool>; - for (int i = 0; i < device_count(); ++i) { - for (int j = 0; j < device_count(); ++j) { - StreamExecutor *from = executors_[i].get(); - StreamExecutor *to = executors_[j].get(); - (*map)[{i, j}] = from->CanEnablePeerAccessTo(to); - } - } - - return std::unique_ptr, bool>>{map}; -} - -StreamExecutor *MachineManager::executor_for_device(int device_ordinal) const { - CHECK_GE(device_ordinal, 0) << "device ordinal must be non-negative"; - CHECK(0 <= device_ordinal && device_ordinal < device_count()) - << "device " << device_ordinal << " out of range with device count " - << device_count(); - StreamExecutor *executor = executors_[device_ordinal].get(); - CHECK(executor != nullptr); - return executor; -} - -int MachineManager::ExecutorToBus(const StreamExecutor *stream_exec) const { - return stream_exec->GetDeviceDescription().numa_node() - min_numa_node_; -} - -int MachineManager::DeviceToBus(int device_ordinal) const { - return ExecutorToBus(executor_for_device(device_ordinal)); -} - -int MachineManager::ExecutorToNumaNode( - const StreamExecutor *stream_exec) const { - return stream_exec->GetDeviceDescription().numa_node(); -} - -int MachineManager::DeviceToNumaNode(int device_ordinal) const { - return ExecutorToNumaNode(executor_for_device(device_ordinal)); -} - -StreamExecutor *MachineManager::first_executor_for_bus(int bus_ordinal) { - CHECK_LT(bus_ordinal, bus_count()) << "bus ordinal out of available range"; - for (auto &executor : executors_) { - if (ExecutorToBus(executor.get()) == bus_ordinal) { - return executor.get(); - } - } - - LOG(WARNING) << "could not find executor requested for bus ordinal: " - << bus_ordinal; - return nullptr; -} - -StreamExecutor *MachineManager::first_executor_for_numa_node(int numa_node) { - for (auto &executor : executors_) { - if (ExecutorToNumaNode(executor.get()) == numa_node) { - return executor.get(); - } - } - - LOG(WARNING) << "could not find executor requested for numa_node: " - << numa_node; - return nullptr; -} - -Stream *MachineManager::stream_for_device(int device_ordinal) { - CHECK(0 <= device_ordinal && device_ordinal < device_count()); - Stream *stream = streams_[device_ordinal].get(); - CHECK(stream != nullptr); - return stream; -} - -/* static */ port::StatusOr -MachineManager::CreateSingletonInternal(PlatformKind platform, - DeviceOptions options, - const PluginConfig &config) { - if (singleton_ != nullptr) { - return port::Status{ - port::error::ALREADY_EXISTS, - "cannot create machine manager singleton; one already exists"}; - } - - auto create_status = Create(platform, options, config); - if (!create_status.ok()) { - return create_status.status(); - } - - singleton_ = create_status.ConsumeValueOrDie().release(); - - VLOG(1) << "machine manager singleton is " << singleton_ << " with platform " - << PlatformKindString(platform) << " and device options " - << options.ToString(); - - return singleton_; -} - -/* static */ MachineManager *MachineManager::CreateSingletonOrDie( - PlatformKind platform, DeviceOptions options, const PluginConfig &config) { - auto status = CreateSingleton(platform, options, config); - if (!status.ok()) { - LOG(FATAL) << "failed to create MachineManager singleton: " - << status.status(); - } - return status.ValueOrDie(); -} - -/* static */ port::StatusOr MachineManager::CreateSingleton( - PlatformKind platform, DeviceOptions device_options, - const PluginConfig &config) { - mutex_lock lock{mu_}; - return CreateSingletonInternal(platform, device_options, config); -} - -/* static */ MachineManager *MachineManager::singleton() { - mutex_lock lock{mu_}; - if (singleton_ == nullptr) { - PlatformKind platform = DetectPreferredPlatform(); - DeviceOptions options = DeviceOptions::Default(); - auto status = CreateSingletonInternal(platform, options, PluginConfig()); - if (!status.ok()) { - LOG(FATAL) - << "failed to create MachineManager singleton: " - "singleton accessor attempted lazy construction but failed: " - << status.status(); - } - return status.ValueOrDie(); - } - - return singleton_; -} - -} // namespace gputools -} // namespace perftools diff --git a/tensorflow/stream_executor/machine_manager.h b/tensorflow/stream_executor/machine_manager.h deleted file mode 100644 index 65396dd1ff..0000000000 --- a/tensorflow/stream_executor/machine_manager.h +++ /dev/null @@ -1,212 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// This interface provides a machine-wide resource management singleton -// interface as a convenience for users who will want to exploit all of the GPU -// resources present on the system. -// -// To use the singleton interface: -// -// // At start of program or in your module initializer. -// // Do not call this with different sets of arguments! -// MachineManager::CreateSingletonOrDie( -// MachineManager::DetectPreferredPlatform(), DeviceOptions::Default()); -// -// // At any point after that, this convenience interface avoids you having to -// // pass those two parameters: -// StreamExecutor *device0_executor = -// MachineManager::singleton()->executor_for_device(0 /* = ordinal */); -// ... - -// ----------------- THIS CLASS IS DEPRECATED - DO NOT USE ------------------ -// This class is not suitable for open-sourcing, as it does not support -// plugins and depends on hardcoded PlatformKind enums. MultiPlatformManager and -// Platform plugins are the replacements. -// ----------------- THIS CLASS IS DEPRECATED - DO NOT USE ------------------ - -#ifndef TENSORFLOW_STREAM_EXECUTOR_MACHINE_MANAGER_H_ -#define TENSORFLOW_STREAM_EXECUTOR_MACHINE_MANAGER_H_ - -#include -#include -#include -#include - -#include "tensorflow/stream_executor/device_options.h" // IWYU pragma: export -#include "tensorflow/stream_executor/lib/status.h" -#include "tensorflow/stream_executor/lib/statusor.h" -#include "tensorflow/stream_executor/platform/thread_annotations.h" -#include "tensorflow/stream_executor/stream.h" -#include "tensorflow/stream_executor/stream_executor.h" - -namespace perftools { -namespace gputools { - -// MachineManager is used to instantiate and manage singleton resources for -// all the GPUs present on a machine. This basically amounts to having a -// StreamExecutor-per-device pool. -// -// Thread-safe. -class MachineManager { - public: - // Inspects the host to determine the preferred GPU execution platform. - // To force OpenCL from a build target on a machine that has both OpenCL and - // CUDA capabilities, link against the :stream_executor_prefer_opencl target. - static PlatformKind DetectPreferredPlatform(); - - // Returns the machine manager singleton. - // If the singleton has not yet been created when this is invoked, this - // creates it with resonable default options, otherwise it returns the - // already-created singleton. If there are errors during creation, this call - // will terminate the program. - static MachineManager *singleton(); - - // Returns a singleton instance of the machine manager -- it's generally - // assumed that users will have one of these for a real-world application as a - // form of resource manager. - // - // This should only be called once, at the initialization of an application, - // if at all -- MachineManager::singleton() will return a value with sensible - // default as determined by DetectPreferredPlatform. Attempts to create the - // singleton with options multiple times will result in an error. - static port::StatusOr CreateSingleton( - PlatformKind platform, DeviceOptions device_options, - const PluginConfig &config = PluginConfig()); - - // Convenience "or die" wrapper around the above call. - static MachineManager *CreateSingletonOrDie( - PlatformKind platform, DeviceOptions device_options, - const PluginConfig &config = PluginConfig()); - - // Creates a new instantiation of the MachineManager. - // Warning: generally users will want to use the singleton form, see - // MachineManager::singleton(). - // - // The machine manager has a number of devices that it detects on creation - // that does not change over the course of its lifetime. This does not support - // things like hot-plugging of GPUs or the event of GPUs dropping off the bus - // in a recoverable manner. - static port::StatusOr> Create( - PlatformKind kind, DeviceOptions options, - const PluginConfig &config = PluginConfig()); - - // Returns the number of devices visible to the machine manager. - int device_count() const; - - // Returns the StreamExecutor for one of the machine-manager visible devices. - // Checks that device_ordinal is within device_count() bound. - StreamExecutor *executor_for_device(int device_ordinal) const; - - // Returns the bus ordinal count (as determined by the span of NUMA nodes - // associated with the available devices). - int bus_count() const { return limit_numa_node_ - min_numa_node_; } - - // Returns the bus ordinal associated with a given device ordinal. - int DeviceToBus(int device_ordinal) const; - - // Returns the NUMA node associated with a given device ordinal. - int DeviceToNumaNode(int device_ordinal) const; - - // Returns the first StreamExecutor (within device_count() ordinals that has - // the corresponding bus ordinal, or nullptr if none is found. - // - // The valid bus ordinals can be enumerated by scanning through the executors - // and seeing what bus number they are on. - StreamExecutor *first_executor_for_bus(int bus_ordinal); - - // Returns the first StreamExecutor associated with the specified - // numa_node, or nullptr if none is found. - StreamExecutor *first_executor_for_numa_node(int numa_node); - - // Returns the default stream for the default executor (that returned by - // executor_for_device()). The same stream will be returned for all calls to - // stream_for_device() (with the same device_ordinal). - Stream *stream_for_device(int device_ordinal); - - // Returns the platform that this machine manager was created to target. - PlatformKind platform() const { return platform_; } - - // Enables peer access between all possible devices on this platform. - // Only dies due to failure to enable peer access for devices in which - // GetPeerAccessMap() is true. - port::Status EnablePeerAccess(); - - // Returns a map that says, for pairs (device ordinal i, device ordinal j), - // whether i can access j's memory space. - std::unique_ptr, bool>> GetPeerAccessMap(); - - private: - // Guts of the singleton creation mechanism that requires the exclusive - // singleton lock to be held, in order to prevent deadlock due to method - // composition. - static port::StatusOr CreateSingletonInternal( - PlatformKind platform, DeviceOptions options, const PluginConfig &config) - EXCLUSIVE_LOCKS_REQUIRED(mu_); - - // Private constructor used in singleton creation. - MachineManager(PlatformKind platform, DeviceOptions options, - const PluginConfig &config); - - // Populates the executors_ vector with an executor per observable device - // ordinal on the platform. Logs and returns false if any of the - // Stream Executors cannot be created. - port::Status Init(); - - // Converts a StreamExecutor's NUMA node association into a bus ordinal for - // this machine. - int ExecutorToBus(const StreamExecutor *stream_exec) const; - - // Returns the NUMA node association for the StreamExecutor. - int ExecutorToNumaNode(const StreamExecutor *stream_exec) const; - - // Mutex that guards the initialization of the machine manager static - // variable. - static mutex mu_; - - // Singleton MachineManager value -- assignment to this is protected by a - // static singleton guard clause. - static MachineManager *singleton_ GUARDED_BY(mu_); - - // Holds an executor associated with each device ordinal present in the - // system, which are the indices. Immutable after initialization. - std::vector> executors_; - - // Holds an stream associated with each device ordinal present in the - // system, which are the indices. Immutable after initialization. - std::vector> streams_; - - // The platform that this is managing for the machine. - PlatformKind platform_; - - // Options used to create StreamExecutors on each of the respective devices. - DeviceOptions device_options_; - - // Plugin configuration to use for all StreamExecutors created by this object. - PluginConfig plugin_config_; - - // The smallest NUMA node value for any device managed by this machine - // manager. Used, along with limit_numa_node_, to convert NUMA nodes into bus - // ordinals. The NUMA node space occupied by GPUs is assumed to be dense. - int min_numa_node_; - - // Larger than the NUMA node value for any device managed by this machine - // manager. - int limit_numa_node_; -}; - -} // namespace gputools -} // namespace perftools - -#endif // TENSORFLOW_STREAM_EXECUTOR_MACHINE_MANAGER_H_ -- GitLab From 95ed2e833abd80727164270fdc299e99ab86ffaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Mon, 13 Nov 2017 13:35:14 +0800 Subject: [PATCH 0329/1801] TST: add test case --- .../python/kernel_tests/lookup_ops_test.py | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/kernel_tests/lookup_ops_test.py b/tensorflow/python/kernel_tests/lookup_ops_test.py index 76c790a0a2..11778d8ddb 100644 --- a/tensorflow/python/kernel_tests/lookup_ops_test.py +++ b/tensorflow/python/kernel_tests/lookup_ops_test.py @@ -573,15 +573,19 @@ class IndexToStringTableFromFileTest(test.TestCase): return vocabulary_file def test_index_to_string_table(self): - vocabulary_file = self._createVocabFile("i2f_vocab1.txt") - with self.test_session(): - table = lookup_ops.index_to_string_table_from_file( - vocabulary_file=vocabulary_file) - features = table.lookup(constant_op.constant([0, 1, 2, 3], dtypes.int64)) - self.assertRaises(errors_impl.OpError, features.eval) - lookup_ops.tables_initializer().run() - self.assertAllEqual((b"brain", b"salad", b"surgery", b"UNK"), - features.eval()) + vocabulary_path = self._createVocabFile("i2f_vocab1.txt") + # vocabulary_file supports string and tensor + type_funcs = [str, constant_op.constant] + for type_func in type_funcs: + vocabulary_file = type_func(vocabulary_path) + with self.test_session(): + table = lookup_ops.index_to_string_table_from_file( + vocabulary_file=vocabulary_file) + features = table.lookup(constant_op.constant([0, 1, 2, 3], dtypes.int64)) + self.assertRaises(errors_impl.OpError, features.eval) + lookup_ops.tables_initializer().run() + self.assertAllEqual((b"brain", b"salad", b"surgery", b"UNK"), + features.eval()) def test_index_to_string_table_with_default_value(self): default_value = b"NONE" -- GitLab From 603a2f3db38753cb4281f367f413e8c1975835f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Mon, 13 Nov 2017 13:37:00 +0800 Subject: [PATCH 0330/1801] BUG: don't check tensor --- tensorflow/python/ops/lookup_ops.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py index fa58ffc37e..10b7cd7001 100644 --- a/tensorflow/python/ops/lookup_ops.py +++ b/tensorflow/python/ops/lookup_ops.py @@ -1123,8 +1123,10 @@ def index_to_string_table_from_file(vocabulary_file, ValueError: when `vocabulary_file` is empty. ValueError: when `vocab_size` is invalid. """ - if not vocabulary_file: - raise ValueError("vocabulary_file must be specified.") + if vocabulary_file is None or ( + isinstance(vocabulary_file, str) and not vocabulary_file): + raise ValueError("vocabulary_file must be specified and must not be empty.") + if vocab_size is not None and vocab_size < 1: raise ValueError("vocab_size must be greater than 0, got %d." % vocab_size) -- GitLab From bd1074ab5d2bc87d4fc37e9f6941dc138a3fb961 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Mon, 13 Nov 2017 13:39:33 +0800 Subject: [PATCH 0331/1801] DOC: add docment --- tensorflow/python/ops/lookup_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py index 10b7cd7001..cb5e9d08c0 100644 --- a/tensorflow/python/ops/lookup_ops.py +++ b/tensorflow/python/ops/lookup_ops.py @@ -1110,7 +1110,7 @@ def index_to_string_table_from_file(vocabulary_file, ``` Args: - vocabulary_file: The vocabulary filename. + vocabulary_file: The vocabulary filename, may be a constant scalar `Tensor`. vocab_size: Number of the elements in the vocabulary, if known. default_value: The value to use for out-of-vocabulary indices. name: A name for this op (optional). -- GitLab From 9e966e9e540d245950dcfccdb982304dac740294 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Mon, 13 Nov 2017 13:45:21 +0800 Subject: [PATCH 0332/1801] ENH: use six.string_types --- tensorflow/python/ops/lookup_ops.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py index cb5e9d08c0..c489a8ab6b 100644 --- a/tensorflow/python/ops/lookup_ops.py +++ b/tensorflow/python/ops/lookup_ops.py @@ -20,6 +20,7 @@ from __future__ import print_function import collections import functools +import six from tensorflow.python.eager import context from tensorflow.python.framework import constant_op @@ -922,7 +923,7 @@ def index_table_from_file(vocabulary_file=None, than zero. """ if vocabulary_file is None or ( - isinstance(vocabulary_file, str) and not vocabulary_file): + isinstance(vocabulary_file, six.string_types) and not vocabulary_file): raise ValueError("vocabulary_file must be specified and must not be empty.") if num_oov_buckets < 0: raise ValueError("num_oov_buckets must be greater or equal than 0, got %d." @@ -1124,7 +1125,7 @@ def index_to_string_table_from_file(vocabulary_file, ValueError: when `vocab_size` is invalid. """ if vocabulary_file is None or ( - isinstance(vocabulary_file, str) and not vocabulary_file): + isinstance(vocabulary_file, six.string_types) and not vocabulary_file): raise ValueError("vocabulary_file must be specified and must not be empty.") if vocab_size is not None and vocab_size < 1: -- GitLab From 4d5e8057ec787e25530cba7011b72fb0093f4b11 Mon Sep 17 00:00:00 2001 From: James Qin Date: Sun, 12 Nov 2017 22:22:53 -0800 Subject: [PATCH 0333/1801] Fix build breakage Now the layout of tensorflow/contrib/cudnn_rnn is similar to tensorflow/contrib/layers Delete repeated tests in cudnn_rnn_ops_test (new tests in cudnn_rnn_test). PiperOrigin-RevId: 175487631 --- tensorflow/contrib/cudnn_rnn/BUILD | 41 +--- tensorflow/contrib/cudnn_rnn/__init__.py | 8 - .../python/kernel_tests/cudnn_rnn_ops_test.py | 199 ------------------ .../cudnn_rnn/python/layers/__init__.py | 7 + 4 files changed, 9 insertions(+), 246 deletions(-) diff --git a/tensorflow/contrib/cudnn_rnn/BUILD b/tensorflow/contrib/cudnn_rnn/BUILD index d6d53d521b..fce2c03e69 100644 --- a/tensorflow/contrib/cudnn_rnn/BUILD +++ b/tensorflow/contrib/cudnn_rnn/BUILD @@ -54,49 +54,13 @@ tf_gen_op_wrapper_py( deps = [":cudnn_rnn_ops_op_lib"], ) -tf_custom_op_py_library( - name = "cudnn_rnn_ops_py", - srcs = [ - "__init__.py", - "python/ops/cudnn_rnn_ops.py", - ], - dso = [ - ":python/ops/_cudnn_rnn_ops.so", - ], - kernels = [ - ":cudnn_rnn_kernels", - ":cudnn_rnn_ops_op_lib", - ], - srcs_version = "PY2AND3", - visibility = ["//visibility:public"], - deps = [ - ":cudnn_rnn_ops", - "//tensorflow/contrib/rnn:rnn_py", - "//tensorflow/contrib/util:util_py", - "//tensorflow/python:array_ops", - "//tensorflow/python:common_shapes", - "//tensorflow/python:control_flow_ops", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:init_ops", - "//tensorflow/python:layers_base", - "//tensorflow/python:math_ops", - "//tensorflow/python:platform", - "//tensorflow/python:random_seed", - "//tensorflow/python:rnn_cell", - "//tensorflow/python:state_ops", - "//tensorflow/python:training", - "//tensorflow/python:util", - "//tensorflow/python:variable_scope", - ], -) - tf_custom_op_py_library( name = "cudnn_rnn_py", srcs = [ "__init__.py", "python/layers/__init__.py", "python/layers/cudnn_rnn.py", + "python/ops/cudnn_rnn_ops.py", ], dso = [ ":python/ops/_cudnn_rnn_ops.so", @@ -109,7 +73,6 @@ tf_custom_op_py_library( visibility = ["//visibility:public"], deps = [ ":cudnn_rnn_ops", - ":cudnn_rnn_ops_py", "//tensorflow/contrib/util:util_py", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", @@ -130,7 +93,7 @@ cuda_py_test( size = "large", srcs = ["python/kernel_tests/cudnn_rnn_ops_test.py"], additional_deps = [ - ":cudnn_rnn_ops_py", + ":cudnn_rnn_py", "//tensorflow/core:protos_all_py", "//tensorflow/contrib/rnn:rnn_py", "//tensorflow/python/ops/losses:losses", diff --git a/tensorflow/contrib/cudnn_rnn/__init__.py b/tensorflow/contrib/cudnn_rnn/__init__.py index 1f7efad71f..5d8c6191f8 100644 --- a/tensorflow/contrib/cudnn_rnn/__init__.py +++ b/tensorflow/contrib/cudnn_rnn/__init__.py @@ -29,19 +29,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import sys # pylint: disable=unused-import,wildcard-import from tensorflow.contrib.cudnn_rnn.python.layers import * # pylint: enable=unused-import,wildcard-import -from tensorflow.contrib.cudnn_rnn.python.ops.cudnn_rnn_ops import CudnnCompatibleGRUCell -from tensorflow.contrib.cudnn_rnn.python.ops.cudnn_rnn_ops import CudnnCompatibleLSTMCell -from tensorflow.contrib.cudnn_rnn.python.ops.cudnn_rnn_ops import CudnnGRUSaveable -from tensorflow.contrib.cudnn_rnn.python.ops.cudnn_rnn_ops import CudnnLSTMSaveable -from tensorflow.contrib.cudnn_rnn.python.ops.cudnn_rnn_ops import CudnnRNNReluSaveable -from tensorflow.contrib.cudnn_rnn.python.ops.cudnn_rnn_ops import CudnnRNNTanhSaveable - from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py index 9156087f33..5a667485be 100644 --- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py +++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py @@ -35,15 +35,11 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops -from tensorflow.python.ops import rnn as rnn_lib -from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import state_ops from tensorflow.python.ops import variables -from tensorflow.python.ops.losses import losses from tensorflow.python.platform import googletest from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.training import gradient_descent from tensorflow.python.training import saver as saver_lib CUDNN_RNN_UNIDIRECTION = cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION @@ -123,45 +119,6 @@ def _CreateParamsSavable(params, return params_saveable -def _BuildCudnnForward(rnn_mode, - num_layers, - num_units, - input_data, - is_training=False): - input_data_shape = input_data.get_shape().with_rank(3) - batch_size = input_data_shape[1].value - input_size = input_data_shape[2].value - model = _CreateModel(rnn_mode, num_layers, num_units, input_size) - - # Set zero init input states - input_h = constant_op.constant( - np.zeros([num_layers, batch_size, num_units]), dtype=dtypes.float32) - has_input_c = (rnn_mode == cudnn_rnn_ops.CUDNN_LSTM) - if has_input_c: - input_c = constant_op.constant( - np.zeros([num_layers, batch_size, num_units]), dtype=dtypes.float32) - - # Set rnn params - params_size_t = model.params_size() - params = variables.Variable( - random_ops.random_uniform([params_size_t]), validate_shape=False) - args = { - "input_data": input_data, - "input_h": input_h, - "params": params, - "is_training": is_training - } - if has_input_c: - args["input_c"] = input_c - # Build cell - output_tuple = model(**args) - - # Create savable objects for params - _CreateParamsSavable(params, model) - - return output_tuple, model - - def _MinLSTMParamSize(num_layers, num_units, input_size, @@ -181,25 +138,6 @@ def _MinLSTMParamSize(num_layers, raise ValueError("%s direction is not supported.") -def _CreateCudnnCompatibleCanonicalRNN(cudnn_model, - inputs, - scope=None): - model = cudnn_model.rnn_mode - if model not in (cudnn_rnn_ops.CUDNN_LSTM, cudnn_rnn_ops.CUDNN_GRU): - raise ValueError("%s is not supported!" % model) - - num_units = cudnn_model.num_units - num_layers = cudnn_model.num_layers - # To reuse cuDNN-trained models, must use cudnn compatible rnn cells. - if model == cudnn_rnn_ops.CUDNN_LSTM: - single_cell = lambda: cudnn_rnn_ops.CudnnCompatibleLSTMCell(num_units) - else: - single_cell = lambda: cudnn_rnn_ops.CudnnCompatibleGRUCell(num_units) - cell = rnn_cell_impl.MultiRNNCell([single_cell() for _ in range(num_layers)]) - return rnn_lib.dynamic_rnn( - cell, inputs, dtype=dtypes.float32, time_major=True, scope=scope) - - class CudnnRNNTestSaveRestore(TensorFlowTestCase): def _CompareWeights(self, lhs, rhs): @@ -436,143 +374,6 @@ class CudnnRNNTestSaveRestore(TensorFlowTestCase): self._testSaveRestoreOutput(rnn_mode, direction, dtype) -class CudnnRNNTestCompatibleRnnCells(TensorFlowTestCase): - - @unittest.skipUnless(test.is_built_with_cuda(), - "Test only applicable when running on GPUs") - def testCudnnCompatibleRnnCells(self): - configs = [ - { - "num_layers": 1, - "seq_length": 3, - "num_units": 4, - "input_size": 5, - "batch_size": 6, - }, - { - "num_layers": 2, - "seq_length": 8, - "num_units": 4, - "input_size": 8, - "batch_size": 16, - }, - { - "num_layers": 2, - "seq_length": 3, - "num_units": 4, - "input_size": 5, - "batch_size": 6, - }, - { - "num_layers": 1, - "seq_length": 2, - "num_units": 2, - "input_size": 4, - "batch_size": 1, - }, - ] - for rnn, cfg in itertools.product((cudnn_rnn_ops.CUDNN_LSTM,), configs): - self._testCudnnCompatibleRnnCells(cfg["num_layers"], cfg["seq_length"], - cfg["num_units"], cfg["input_size"], - cfg["batch_size"], rnn) - # TODO(jamesqin): Add CudnnCompatibleGRUBlockCell. - for rnn, cfg in itertools.product((cudnn_rnn_ops.CUDNN_GRU,), configs): - self._testCudnnCompatibleRnnCells(cfg["num_layers"], cfg["seq_length"], - cfg["num_units"], cfg["input_size"], - cfg["batch_size"], rnn) - - def _testCudnnCompatibleRnnCells(self, num_layers, seq_length, num_units, - input_size, batch_size, rnn_mode): - has_state_c = rnn_mode == cudnn_rnn_ops.CUDNN_LSTM - np.random.seed(0) - # Train graph - with ops.Graph().as_default(): - random_seed.set_random_seed(299) - input_data = array_ops.placeholder( - dtypes.float32, shape=[seq_length, batch_size, input_size]) - output_tuple, cudnn_model = _BuildCudnnForward( - rnn_mode, num_layers, num_units, input_data, is_training=True) - target_output = array_ops.placeholder(dtype=dtypes.float32, shape=None) - total_sum = sum(map(math_ops.reduce_sum, output_tuple)) - - loss_op = losses.log_loss(labels=target_output, predictions=total_sum) - optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1e-2) - train_op = optimizer.minimize(loss_op) - - saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2) - - # Train Cudnn model - with self.test_session( - use_gpu=True, graph=ops.get_default_graph()) as sess: - sess.run(variables.global_variables_initializer()) - # Train 128 steps - num_steps = 128 - for _ in range(num_steps): - inputs = np.random.rand(seq_length, batch_size, - input_size).astype(np.float32) - targets = np.random.rand() - sess.run( - train_op, feed_dict={input_data: inputs, - target_output: targets}) - - save_path = os.path.join(self.get_temp_dir(), - ("cudnn-rnn-%s-test" % rnn_mode)) - save_v = saver.save(sess, save_path) - self.assertEqual(save_path, save_v) - - # cuDNN inference graph - with ops.Graph().as_default(): - random_seed.set_random_seed(299) - cudnn_inputs = array_ops.placeholder( - dtypes.float32, shape=[seq_length, batch_size, input_size]) - (cudnn_output_tuple, cudnn_model) = _BuildCudnnForward( - rnn_mode, num_layers, num_units, cudnn_inputs, is_training=False) - saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2) - - inference_input = np.random.rand(seq_length, batch_size, - input_size).astype(np.float32) - with self.test_session( - use_gpu=True, graph=ops.get_default_graph()) as sess: - sess.run(variables.global_variables_initializer()) - saver.restore(sess, save_path) - - # Cudnn inference - cudnn_output = sess.run( - cudnn_output_tuple, feed_dict={cudnn_inputs: inference_input}) - - # Canonical RNN inference graph - with ops.Graph().as_default(): - random_seed.set_random_seed(299) - cell_inputs = array_ops.placeholder( - dtypes.float32, shape=[seq_length, batch_size, input_size]) - (output, states) = _CreateCudnnCompatibleCanonicalRNN( - cudnn_model, cell_inputs) - saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2) - - with self.test_session( - use_gpu=True, graph=ops.get_default_graph()) as sess: - saver.restore(sess, save_path) - - # BlockCell inference - output_v, states_v = sess.run( - [output, states], feed_dict={cell_inputs: inference_input}) - - # output across timestamps are packed into one tensor. - self.assertAllClose(cudnn_output[0], output_v, atol=1e-6, rtol=1e-6) - - for i in range(num_layers): - if has_state_c: - # output_h - self.assertAllClose( - cudnn_output[1][i, :], states_v[i].h, atol=1e-6, rtol=1e-6) - # output_c - self.assertAllClose( - cudnn_output[2][i, :], states_v[i].c, atol=1e-6, rtol=1e-6) - else: - self.assertAllClose( - cudnn_output[1][i, :], states_v[i], atol=1e-6, rtol=1e-6) - - class CudnnRNNTestParamsSize(TensorFlowTestCase): def _testOneLSTMParamsSize(self, num_layers, num_units, input_size, diff --git a/tensorflow/contrib/cudnn_rnn/python/layers/__init__.py b/tensorflow/contrib/cudnn_rnn/python/layers/__init__.py index 5feee3d10d..f09466b631 100644 --- a/tensorflow/contrib/cudnn_rnn/python/layers/__init__.py +++ b/tensorflow/contrib/cudnn_rnn/python/layers/__init__.py @@ -22,3 +22,10 @@ import sys # pylint: disable=unused-import,wildcard-import from tensorflow.contrib.cudnn_rnn.python.layers.cudnn_rnn import * # pylint: enable=unused-import,wildcard-import + +from tensorflow.contrib.cudnn_rnn.python.ops.cudnn_rnn_ops import CudnnCompatibleGRUCell +from tensorflow.contrib.cudnn_rnn.python.ops.cudnn_rnn_ops import CudnnCompatibleLSTMCell +from tensorflow.contrib.cudnn_rnn.python.ops.cudnn_rnn_ops import CudnnGRUSaveable +from tensorflow.contrib.cudnn_rnn.python.ops.cudnn_rnn_ops import CudnnLSTMSaveable +from tensorflow.contrib.cudnn_rnn.python.ops.cudnn_rnn_ops import CudnnRNNReluSaveable +from tensorflow.contrib.cudnn_rnn.python.ops.cudnn_rnn_ops import CudnnRNNTanhSaveable -- GitLab From 6c1ab6d34213057f5d70d194094ff48137815ae3 Mon Sep 17 00:00:00 2001 From: Daniel Zhang Date: Mon, 13 Nov 2017 14:43:54 +0800 Subject: [PATCH 0334/1801] Fix control input name for quantize node --- tensorflow/tools/graph_transforms/quantize_nodes.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/graph_transforms/quantize_nodes.cc b/tensorflow/tools/graph_transforms/quantize_nodes.cc index 97e8f77616..5ccd88cfa1 100644 --- a/tensorflow/tools/graph_transforms/quantize_nodes.cc +++ b/tensorflow/tools/graph_transforms/quantize_nodes.cc @@ -759,7 +759,7 @@ Status QuantizeNodes(const GraphDef& input_graph_def, NodeDef reshape_dims; reshape_dims.set_op("Const"); reshape_dims.set_name(unique_input_name + "/reshape_dims"); - AddNodeInput("^" + input_name, &reshape_dims); + AddNodeInput("^" + NodeNameFromInput(input_name), &reshape_dims); SetNodeAttr("dtype", DT_INT32, &reshape_dims); Tensor reshape_dims_tensor(DT_INT32, {1}); reshape_dims_tensor.flat()(0) = -1; @@ -769,7 +769,7 @@ Status QuantizeNodes(const GraphDef& input_graph_def, NodeDef reduction_dims; reduction_dims.set_op("Const"); reduction_dims.set_name(unique_input_name + "/reduction_dims"); - AddNodeInput("^" + input_name, &reduction_dims); + AddNodeInput("^" + NodeNameFromInput(input_name), &reduction_dims); SetNodeAttr("dtype", DT_INT32, &reduction_dims); Tensor reduction_dims_tensor(DT_INT32, {1}); reduction_dims_tensor.flat()(0) = 0; -- GitLab From ce20a1c47bb5dbce10d0c8b91317a1ab19dc2fb8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 06:09:31 -0800 Subject: [PATCH 0335/1801] Exposing IsNan op on Android to enable better error checking. PiperOrigin-RevId: 175522095 --- tensorflow/contrib/makefile/tf_op_files.txt | 1 + tensorflow/core/kernels/BUILD | 1 + 2 files changed, 2 insertions(+) diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt index 5f06106c1d..fbcda0421e 100644 --- a/tensorflow/contrib/makefile/tf_op_files.txt +++ b/tensorflow/contrib/makefile/tf_op_files.txt @@ -172,6 +172,7 @@ tensorflow/core/kernels/cwise_op_logical_or.cc tensorflow/core/kernels/cwise_op_log.cc tensorflow/core/kernels/cwise_op_less.cc tensorflow/core/kernels/cwise_op_less_equal.cc +tensorflow/core/kernels/cwise_op_isnan.cc tensorflow/core/kernels/cwise_op_isfinite.cc tensorflow/core/kernels/cwise_op_invert.cc tensorflow/core/kernels/cwise_op_greater_equal.cc diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 4169e842da..5e19effe3d 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -4602,6 +4602,7 @@ filegroup( "cwise_op_greater_equal.cc", "cwise_op_invert.cc", "cwise_op_isfinite.cc", + "cwise_op_isnan.cc", "cwise_op_left_shift.cc", "cwise_op_less.cc", "cwise_op_less_equal.cc", -- GitLab From bd9d8064c8342a59db03301b4afd10238002e344 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 07:03:13 -0800 Subject: [PATCH 0336/1801] Small change to how damping constants get transformed. Switched from using tf.square to **0.5. This will preserve them as floats if they are already, so the dictionaries work properly. See b/62785471 PiperOrigin-RevId: 175526788 --- tensorflow/contrib/kfac/python/ops/fisher_blocks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py index a6fdf01fe7..e822a1213a 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py @@ -468,8 +468,8 @@ class KroneckerProductFB(FisherBlock): pi = utils.compute_pi(self._input_factor.get_cov(), self._output_factor.get_cov()) - self._input_damping = math_ops.sqrt(damping) * pi - self._output_damping = math_ops.sqrt(damping) / pi + self._input_damping = (damping**0.5) * pi + self._output_damping = (damping**0.5) / pi self._input_factor.register_damped_inverse(self._input_damping) self._output_factor.register_damped_inverse(self._output_damping) -- GitLab From 23e94a4ceb5c0dd2fac8e53d6ceadd2d2b3e9b5e Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 13 Nov 2017 07:51:58 -0800 Subject: [PATCH 0337/1801] Moves tape.watch_variable to C. Prequel to moving the tape stack to C. PiperOrigin-RevId: 175531148 --- tensorflow/python/eager/backprop.py | 2 +- tensorflow/python/eager/pywrap_tfe.h | 6 ++ tensorflow/python/eager/pywrap_tfe_src.cc | 97 ++++++++++++++++------- tensorflow/python/eager/tape.py | 12 +-- tensorflow/python/pywrap_tfe.i | 2 + 5 files changed, 81 insertions(+), 38 deletions(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index f9d6d8aa5e..33601a1edc 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -350,9 +350,9 @@ def implicit_val_and_grad(f): raise ValueError("Cannot differentiate a function that returns None; " "did you forget to return a value from {}?".format( f.__name__)) - variables = tape.top_tape_watched_variables() finally: popped_tape = tape.pop_tape() + variables = popped_tape.watched_variables() sources = [x.handle for x in variables] if not sources: diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h index 6705483f3b..a67519f9a2 100644 --- a/tensorflow/python/eager/pywrap_tfe.h +++ b/tensorflow/python/eager/pywrap_tfe.h @@ -114,6 +114,12 @@ PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, PyObject* target, PyObject* sources, PyObject* output_gradients, TF_Status* status); +// Watches the given variable object on the given tape. +void TFE_Py_TapeWatchVariable(PyObject* tape, PyObject* variable); + +// Returns the set of variables watched by the given tape. +PyObject* TFE_Py_TapeWatchedVariables(PyObject* tape); + // Returns an EagerTensor of dimension [len(`tensor_list`)] containing // the `slice_dim`'th dimension of each tensor in `tensor_list`. In other words, // TFE_Py_TensorShapeSlice takes a slice of dimensions of tensors in diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 372a6bb4b7..c3685d6222 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -443,7 +443,53 @@ void TFE_DeleteContextCapsule(PyObject* context) { TF_DeleteStatus(status); } -using GradientTape = tensorflow::eager::GradientTape; +static tensorflow::int64 MakeInt(PyObject* integer) { +#if PY_MAJOR_VERSION >= 3 + return PyLong_AsLong(integer); +#else + return PyInt_AsLong(integer); +#endif +} + +static tensorflow::int64 FastTensorId(PyObject* tensor) { + if (EagerTensor_CheckExact(tensor)) { + return EagerTensor_id(tensor); + } + PyObject* id_field = PyObject_GetAttrString(tensor, "_id"); + if (id_field == nullptr) { + return -1; + } + tensorflow::int64 id = MakeInt(id_field); + Py_DECREF(id_field); + return id; +} + +class GradientTape + : public tensorflow::eager::GradientTape { + public: + GradientTape() {} + + void WatchVariable(PyObject* v) { + watched_variables_.insert(v); + Py_INCREF(v); + PyObject* handle = PyObject_GetAttrString(v, "handle"); + if (handle == nullptr) { + return; + } + tensorflow::int64 id = FastTensorId(handle); + Py_DECREF(handle); + if (!PyErr_Occurred()) { + this->Watch(id); + } + } + + const std::unordered_set WatchedVariables() { + return watched_variables_; + } + + private: + std::unordered_set watched_variables_; +}; typedef struct { PyObject_HEAD @@ -487,14 +533,6 @@ PyObject* TFE_Py_NewTape() { return reinterpret_cast(tape); } -static tensorflow::int64 MakeInt(PyObject* integer) { -#if PY_MAJOR_VERSION >= 3 - return PyLong_AsLong(integer); -#else - return PyInt_AsLong(integer); -#endif -} - static std::vector MakeIntList(PyObject* list) { if (list == Py_None) { return {}; @@ -534,16 +572,7 @@ PyObject* TFE_Py_TapeShouldRecord(PyObject* py_tape, PyObject* tensors) { tensor_ids.reserve(len); for (int i = 0; i < len; ++i) { PyObject* item = PySequence_Fast_GET_ITEM(seq, i); - if (EagerTensor_CheckExact(item)) { - tensor_ids.push_back(EagerTensor_id(item)); - } else { - PyObject* id_field = PyObject_GetAttrString(item, "_id"); - if (id_field == nullptr) { - return nullptr; - } - tensor_ids.push_back(MakeInt(id_field)); - Py_DECREF(id_field); - } + tensor_ids.push_back(FastTensorId(item)); } Py_DECREF(seq); TFE_Py_Tape* tape = reinterpret_cast(py_tape); @@ -564,10 +593,8 @@ static tensorflow::eager::TapeTensor TapeTensorFromTensor(PyObject* tensor) { tensorflow::int64 id = EagerTensor_id(tensor); return tensorflow::eager::TapeTensor{id, t->t.dtype(), t->t.shape()}; } - PyObject* id_field = PyObject_GetAttrString(tensor, "_id"); - tensorflow::int64 id = MakeInt(id_field); - Py_DECREF(id_field); - if (PyErr_Occurred() != nullptr) { + tensorflow::int64 id = FastTensorId(tensor); + if (PyErr_Occurred()) { return tensorflow::eager::TapeTensor{ id, static_cast(0), tensorflow::TensorShape({})}; } @@ -610,18 +637,30 @@ std::vector MakeTensorIDList(PyObject* tensors) { list.reserve(len); for (int i = 0; i < len; ++i) { PyObject* tensor = PySequence_Fast_GET_ITEM(seq, i); - if (EagerTensor_CheckExact(tensor)) { - list.push_back(EagerTensor_id(tensor)); - } else { - PyObject* id_field = PyObject_GetAttrString(tensor, "_id"); - list.push_back(MakeInt(id_field)); - Py_DECREF(id_field); + list.push_back(FastTensorId(tensor)); + if (PyErr_Occurred()) { + return list; } } Py_DECREF(seq); return list; } +void TFE_Py_TapeWatchVariable(PyObject* tape, PyObject* variable) { + reinterpret_cast(tape)->tape->WatchVariable(variable); +} + +PyObject* TFE_Py_TapeWatchedVariables(PyObject* tape) { + const std::unordered_set& watched_variables = + reinterpret_cast(tape)->tape->WatchedVariables(); + PyObject* result = PySet_New(nullptr); + for (PyObject* variable : watched_variables) { + PySet_Add(result, variable); + Py_DECREF(variable); + } + return result; +} + void TFE_Py_TapeRecordOperation(PyObject* tape, PyObject* op_type, PyObject* output_tensors, PyObject* input_tensors, diff --git a/tensorflow/python/eager/tape.py b/tensorflow/python/eager/tape.py index afbad183b0..fb6b62a3e0 100644 --- a/tensorflow/python/eager/tape.py +++ b/tensorflow/python/eager/tape.py @@ -59,7 +59,6 @@ class Tape(object): def __init__(self): self._tape = pywrap_tensorflow.TFE_Py_NewTape() - self._watched_variables = set() def should_record(self, tensors): """Returns true if any tensor should be recorded. @@ -78,8 +77,10 @@ class Tape(object): pywrap_tensorflow.TFE_Py_TapeWatch(self._tape, tid(tensor)) def watch_variable(self, v): - self._watched_variables.add(v) - self.watch(v.handle) + pywrap_tensorflow.TFE_Py_TapeWatchVariable(self._tape, v) + + def watched_variables(self): + return pywrap_tensorflow.TFE_Py_TapeWatchedVariables(self._tape) def record_operation(self, op_type, output_tensors, input_tensors, backward_function): @@ -177,11 +178,6 @@ def delete_trace(tensor_id): t.delete_trace(tensor_id) -def top_tape_watched_variables(): - t = _tape_stack.stack[-1] - return t._watched_variables # pylint: disable=protected-access - - def could_possibly_record(): """Returns True if any tape is active.""" return len(_tape_stack.stack) > 0 # pylint: disable=g-explicit-length-test diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i index cbacf458a0..5ca0e57286 100644 --- a/tensorflow/python/pywrap_tfe.i +++ b/tensorflow/python/pywrap_tfe.i @@ -30,6 +30,8 @@ limitations under the License. %rename("%s") TFE_Py_TapeDeleteTrace; %rename("%s") TFE_Py_TapeRecordOperation; %rename("%s") TFE_Py_TapeGradient; +%rename("%s") TFE_Py_TapeWatchVariable; +%rename("%s") TFE_Py_TapeWatchedVariables; %rename("%s") TFE_NewContextOptions; %rename("%s") TFE_ContextOptionsSetConfig; %rename("%s") TFE_ContextOptionsSetDevicePlacementPolicy; -- GitLab From c869024d2e28c63e5448566620cd3755db2aae17 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 13 Nov 2017 09:08:05 -0800 Subject: [PATCH 0338/1801] Make Operation._create_c_op() a module-level function. It doesn't depend on the Operation at all, and this will allow it to be used in tests. PiperOrigin-RevId: 175540022 --- tensorflow/python/framework/ops.py | 95 +++++++++++++++--------------- 1 file changed, 48 insertions(+), 47 deletions(-) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 813b886775..aedd8d87d9 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -1399,6 +1399,52 @@ _VALID_OP_NAME_REGEX = re.compile("^[A-Za-z0-9.][A-Za-z0-9_.\\-/]*$") _VALID_SCOPE_NAME_REGEX = re.compile("^[A-Za-z0-9_.\\-/]*$") +def _create_c_op(graph, node_def, inputs, control_inputs): + """Creates a TF_Operation. + + Args: + graph: a `Graph`. + node_def: `node_def_pb2.NodeDef` for the operation to create. + inputs: A list of `Tensor`s (corresponding to scalar inputs) and lists of + `Tensor`s (corresponding to sequence inputs, e.g. "int64 * N", + "list(int64)"). The length of the list should be equal to the number of + inputs specified by this operation's op def. + control_inputs: A list of `Operation`s to set as control dependencies. + + Returns: + A wrapped TF_Operation*. + """ + # pylint: disable=protected-access + op_desc = c_api.TF_NewOperation(graph._c_graph, + compat.as_str(node_def.op), + compat.as_str(node_def.name)) + # Add inputs + for op_input in inputs: + if isinstance(op_input, (list, tuple)): + c_api.TF_AddInputList(op_desc, [t._as_tf_output() for t in op_input]) + else: + c_api.TF_AddInput(op_desc, op_input._as_tf_output()) + + # Add control inputs + for control_input in control_inputs: + c_api.TF_AddControlInput(op_desc, control_input._c_op) + # pylint: enable=protected-access + + # Add attrs + for name, attr_value in node_def.attr.items(): + serialized = attr_value.SerializeToString() + # TODO(skyewm): this creates and deletes a new TF_Status for every attr. + # It might be worth creating a convenient way to re-use the same status. + with errors.raise_exception_on_not_ok_status() as status: + c_api.TF_SetAttrValueProto(op_desc, + compat.as_str(name), serialized, status) + + with errors.raise_exception_on_not_ok_status() as status: + c_op = c_api.TF_FinishOperation(op_desc, status) + + return c_op + + class Operation(object): """Represents a graph node that performs computation on tensors. @@ -1553,53 +1599,8 @@ class Operation(object): # If no OpDef is specified, assume all inputs are scalar. grouped_inputs = self._inputs - self._c_op = self._create_c_op(self._graph, self._node_def, - grouped_inputs, self._control_inputs) - - def _create_c_op(self, graph, node_def, inputs, control_inputs): - """Creates a TF_Operation. - - Args: - graph: a `Graph`. - node_def: `node_def_pb2.NodeDef` for the operation to create. - inputs: A list of `Tensor`s (corresponding to scalar inputs) and lists of - `Tensor`s (corresponding to sequence inputs, e.g. "int64 * N", - "list(int64)"). The length of the list should be equal to the number of - inputs specified by this operation's op def. - control_inputs: A list of `Operation`s to set as control dependencies. - - Returns: - A wrapped TF_Operation*. - """ - # pylint: disable=protected-access - op_desc = c_api.TF_NewOperation(graph._c_graph, - compat.as_str(node_def.op), - compat.as_str(node_def.name)) - # Add inputs - for op_input in inputs: - if isinstance(op_input, (list, tuple)): - c_api.TF_AddInputList(op_desc, [t._as_tf_output() for t in op_input]) - else: - c_api.TF_AddInput(op_desc, op_input._as_tf_output()) - - # Add control inputs - for control_input in control_inputs: - c_api.TF_AddControlInput(op_desc, control_input._c_op) - # pylint: enable=protected-access - - # Add attrs - for name, attr_value in node_def.attr.items(): - serialized = attr_value.SerializeToString() - # TODO(skyewm): this creates and deletes a new TF_Status for every attr. - # It might be worth creating a convenient way to re-use the same status. - with errors.raise_exception_on_not_ok_status() as status: - c_api.TF_SetAttrValueProto(op_desc, - compat.as_str(name), serialized, status) - - with errors.raise_exception_on_not_ok_status() as status: - c_op = c_api.TF_FinishOperation(op_desc, status) - - return c_op + self._c_op = _create_c_op(self._graph, self._node_def, grouped_inputs, + self._control_inputs) def _reconstruct_sequence_inputs(self, op_def, inputs, attrs): """Regroups a flat list of input tensors into scalar and sequence inputs. -- GitLab From 9581b8ee3a9088093c59b1c94801ecc613dd0226 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 09:53:38 -0800 Subject: [PATCH 0339/1801] Changed nodes_to_rewrite in quantize_training.cc from a nontrivially destructible static unordered_set to a trivially destructible static unordered_set pointer. PiperOrigin-RevId: 175545750 --- tensorflow/core/graph/quantize_training.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/graph/quantize_training.cc b/tensorflow/core/graph/quantize_training.cc index b74fa2127e..d9cb55f448 100644 --- a/tensorflow/core/graph/quantize_training.cc +++ b/tensorflow/core/graph/quantize_training.cc @@ -41,8 +41,8 @@ const uint32 kAllowedInputs = 2; const float kEMADecay = 0.999; // Node types to rewrite. Insert quantize_and_dequantize op for their inputs. -const std::unordered_set nodes_to_rewrite{ - "MatMul", "Conv2D"}; +const auto* nodes_to_rewrite = + new std::unordered_set{"MatMul", "Conv2D"}; // Contains necessary parameters to convert an edge. struct EdgeToConvert { @@ -602,7 +602,8 @@ Status DoQuantizeTraining(int32 num_bits, const string& quant_op_type, int potential_input = 0; std::vector target_edges; for (Node* node : graph->nodes()) { - if (nodes_to_rewrite.find(node->type_string()) != nodes_to_rewrite.end() && + if (nodes_to_rewrite->find(node->type_string()) != + nodes_to_rewrite->end() && !IsGradientNode(graph, node)) { // Find out which types are the inputs and convert them accordingly. // 1. Const/Variable OP: This is quantized as signed tensors with no given -- GitLab From 4b9238b2bce08dbf6dc433d1c4911043dd60403f Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Mon, 13 Nov 2017 09:56:12 -0800 Subject: [PATCH 0340/1801] Support non-scalar variant device copy. PiperOrigin-RevId: 175546097 --- tensorflow/core/common_runtime/copy_tensor.cc | 93 +++++++++---------- .../core/common_runtime/gpu/gpu_device.cc | 29 +++--- .../core/framework/variant_op_copy_test.cc | 67 +++++++------ 3 files changed, 99 insertions(+), 90 deletions(-) diff --git a/tensorflow/core/common_runtime/copy_tensor.cc b/tensorflow/core/common_runtime/copy_tensor.cc index 65ffdba6b3..9084081119 100644 --- a/tensorflow/core/common_runtime/copy_tensor.cc +++ b/tensorflow/core/common_runtime/copy_tensor.cc @@ -52,15 +52,7 @@ void CopyHostToDevice(const Tensor* input, Allocator* cpu_allocator, Device* dst, Tensor* output, DeviceContext* recv_dev_context, StatusCallback done) { if (input->dtype() == DT_VARIANT) { - if (input->shape().dims() != 0) { - // TODO(b/67311047): Expand support to non-singleton variants? - Status err = errors::Unimplemented( - "CopyTensor::ViaDMA: Only singleton Variants are " - "supported. Tensor has shape: ", - input->shape().DebugString()); - done(err); - } - Tensor copy(cpu_allocator, DT_VARIANT, TensorShape({})); + Tensor copy(cpu_allocator, DT_VARIANT, input->shape()); auto* status_cb = new ReffedStatusCallback(std::move(done)); core::ScopedUnref status_cb_unref(status_cb); @@ -93,14 +85,19 @@ void CopyHostToDevice(const Tensor* input, Allocator* cpu_allocator, }, std::move(wrapped_done), std::placeholders::_1, std::placeholders::_2); - const Variant& v = input->scalar()(); - Variant* v_out = &(copy.scalar()()); - Status s_copy_init = - VariantDeviceCopy(VariantDeviceCopyDirection::HOST_TO_DEVICE, v, v_out, - std::move(copier)); - if (!s_copy_init.ok()) { - status_cb->UpdateStatus(s_copy_init); - } else { + const Variant* v = input->flat().data(); + Variant* v_out = copy.flat().data(); + Status s_copy_init; + for (int64 i = 0; i < input->NumElements(); ++i) { + s_copy_init = VariantDeviceCopy( + VariantDeviceCopyDirection::HOST_TO_DEVICE, v[i], &v_out[i], + (input->NumElements() == 1) ? std::move(copier) : copier); + if (!s_copy_init.ok()) { + status_cb->UpdateStatus(s_copy_init); + break; + } + } + if (s_copy_init.ok()) { *output = std::move(copy); } } else { @@ -114,15 +111,7 @@ void CopyDeviceToHost(const Tensor* input, Allocator* cpu_allocator, Device* src, Tensor* output, DeviceContext* send_dev_context, StatusCallback done) { if (input->dtype() == DT_VARIANT) { - if (input->shape().dims() != 0) { - // TODO(b/67311047): Expand support to non-singleton variants? - done(errors::Unimplemented( - "CopyTensor::ViaDMA: Only singleton Variants are " - "supported. Tensor has shape: ", - input->shape().DebugString())); - return; - } - Tensor copy(cpu_allocator, DT_VARIANT, TensorShape({})); + Tensor copy(cpu_allocator, DT_VARIANT, input->shape()); auto* status_cb = new ReffedStatusCallback(std::move(done)); core::ScopedUnref status_cb_unref(status_cb); @@ -155,14 +144,19 @@ void CopyDeviceToHost(const Tensor* input, Allocator* cpu_allocator, }, std::move(wrapped_done), std::placeholders::_1, std::placeholders::_2); - const Variant& v = input->scalar()(); - Variant* v_out = &(copy.scalar()()); - Status s_copy_init = - VariantDeviceCopy(VariantDeviceCopyDirection::DEVICE_TO_HOST, v, v_out, - std::move(copier)); - if (!s_copy_init.ok()) { - status_cb->UpdateStatus(s_copy_init); - } else { + const Variant* v = input->flat().data(); + Variant* v_out = copy.flat().data(); + Status s_copy_init; + for (int64 i = 0; i < input->NumElements(); ++i) { + s_copy_init = VariantDeviceCopy( + VariantDeviceCopyDirection::DEVICE_TO_HOST, v[i], &v_out[i], + (input->NumElements() == 1) ? std::move(copier) : copier); + if (!s_copy_init.ok()) { + status_cb->UpdateStatus(s_copy_init); + break; + } + } + if (s_copy_init.ok()) { *output = std::move(copy); } } else { @@ -180,15 +174,7 @@ void CopyDeviceToDevice(CopyTensor::CopyFunction copy_function, const Tensor* input, Tensor* output, StatusCallback done) { if (input->dtype() == DT_VARIANT) { - if (input->shape().dims() != 0) { - // TODO(b/67311047): Expand support to non-singleton variants? - done(errors::Unimplemented( - "CopyTensor::ViaDMA: Only singleton Variants are " - "supported. Tensor has shape: ", - input->shape().DebugString())); - return; - } - Tensor copy(cpu_allocator, DT_VARIANT, TensorShape({})); + Tensor copy(cpu_allocator, DT_VARIANT, input->shape()); auto* status_cb = new ReffedStatusCallback(std::move(done)); core::ScopedUnref status_cb_unref(status_cb); @@ -223,14 +209,19 @@ void CopyDeviceToDevice(CopyTensor::CopyFunction copy_function, }, std::move(wrapped_done), std::placeholders::_1, std::placeholders::_2); - const Variant& v = input->scalar()(); - Variant* v_out = &(copy.scalar()()); - Status s_copy_init = - VariantDeviceCopy(VariantDeviceCopyDirection::DEVICE_TO_DEVICE, v, - v_out, std::move(copier)); - if (!s_copy_init.ok()) { - status_cb->UpdateStatus(s_copy_init); - } else { + const Variant* v = input->flat().data(); + Variant* v_out = copy.flat().data(); + Status s_copy_init; + for (int64 i = 0; i < input->NumElements(); ++i) { + s_copy_init = VariantDeviceCopy( + VariantDeviceCopyDirection::DEVICE_TO_DEVICE, v[i], &v_out[i], + (input->NumElements() == 1) ? std::move(copier) : copier); + if (!s_copy_init.ok()) { + status_cb->UpdateStatus(s_copy_init); + break; + } + } + if (s_copy_init.ok()) { *output = std::move(copy); } } else { diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index fce8bc61f4..5a7d96445e 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -539,16 +539,9 @@ Status BaseGPUDevice::MakeTensorFromProto(const TensorProto& tensor_proto, } if (parsed.dtype() == DT_VARIANT) { - if (parsed.shape().dims() != 0) { - // TODO(b/67311047): Expand support to non-singleton variants? - return errors::Unimplemented( - "GPUDevice::MakeTensorFromProto: Only singleton Variants are " - "supported. Tensor has shape: ", - parsed.shape().DebugString()); - } - const Variant& from = parsed.scalar()(); - Tensor copy(cpu_allocator(), DT_VARIANT, TensorShape({})); - Variant* copy_variant = &(copy.scalar()()); + const Variant* from = parsed.flat().data(); + Tensor copy(cpu_allocator(), DT_VARIANT, parsed.shape()); + Variant* copy_variant = copy.flat().data(); std::list notifications; Status copy_status; @@ -566,12 +559,22 @@ Status BaseGPUDevice::MakeTensorFromProto(const TensorProto& tensor_proto, n.Notify(); }); }; - TF_RETURN_IF_ERROR( - VariantDeviceCopy(VariantDeviceCopyDirection::HOST_TO_DEVICE, from, - copy_variant, std::move(copier))); + Status s; + for (int64 ix = 0; ix < parsed.NumElements(); ++ix) { + s = VariantDeviceCopy( + VariantDeviceCopyDirection::HOST_TO_DEVICE, from[ix], + ©_variant[ix], + parsed.NumElements() == 1 ? std::move(copier) : copier); + if (!s.ok()) { + break; + } + } for (auto& n : notifications) { n.WaitForNotification(); } + if (!s.ok()) { + return s; + } *tensor = std::move(copy); return copy_status; } else { diff --git a/tensorflow/core/framework/variant_op_copy_test.cc b/tensorflow/core/framework/variant_op_copy_test.cc index 205f2a8370..85e014f804 100644 --- a/tensorflow/core/framework/variant_op_copy_test.cc +++ b/tensorflow/core/framework/variant_op_copy_test.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/shape_inference.h" #include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_util.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/variant.h" #include "tensorflow/core/framework/variant_encode_decode.h" @@ -108,12 +109,17 @@ class CreateTestVariantOp : public OpKernel { public: explicit CreateTestVariantOp(OpKernelConstruction* c) : OpKernel(c) {} void Compute(OpKernelContext* c) override { + // Take the scalar tensor fed as input, and emit a Tensor + // containing 10 Variants (StoredTensorValues), both containing + // the input tensor. const Tensor& stored_t = c->input(0); Tensor* out; - OP_REQUIRES_OK(c, c->allocate_output(0, TensorShape({}), &out)); + OP_REQUIRES_OK(c, c->allocate_output(0, TensorShape({10}), &out)); StoredTensorValue store{stored_t}; auto t = out->flat(); - t(0) = store; + for (int i = 0; i < 10; ++i) { + t(i) = store; + } CHECK_EQ("StoredTensorValue", t(0).TypeName()); } }; @@ -175,7 +181,7 @@ TEST(VariantOpCopyTest, CreateConstOnCPU) { TF_ASSERT_OK(root.status()); ClientSession session(root); std::vector outputs; - TF_EXPECT_OK(session.Run({create_const}, &outputs)); + TF_CHECK_OK(session.Run({create_const}, &outputs)); EXPECT_EQ(1, outputs.size()); EXPECT_EQ(DT_VARIANT, outputs[0].dtype()); EXPECT_EQ(0, outputs[0].dims()); @@ -212,7 +218,7 @@ TEST(VariantOpCopyTest, CreateConstOnGPU) { int copy_to_gpu_before = *GetCopyCPUToGPUCounter(); int copy_to_cpu_before = *GetCopyGPUToCPUCounter(); - TF_EXPECT_OK(session.Run({create_const}, &outputs)); + TF_CHECK_OK(session.Run({create_const}, &outputs)); int copy_to_cpu_after = *GetCopyGPUToCPUCounter(); int copy_to_gpu_after = *GetCopyCPUToGPUCounter(); @@ -261,7 +267,7 @@ TEST(VariantOpCopyTest, CreateConstOnGPUFailsGracefully) { TEST(VariantOpCopyTest, CreateCopyCPUToCPU) { Scope root = Scope::NewRootScope().WithDevice("/cpu:0"); Tensor t_42(DT_INT32, TensorShape({})); - t_42.scalar()() = 42; + t_42.flat()(0) = 42; Output create_op = CreateTestVariant(root, t_42); Output identity = ops::Identity(root, create_op); @@ -269,14 +275,17 @@ TEST(VariantOpCopyTest, CreateCopyCPUToCPU) { ClientSession session(root); std::vector outputs; - TF_EXPECT_OK(session.Run({create_op, identity}, &outputs)); + TF_CHECK_OK(session.Run({create_op, identity}, &outputs)); EXPECT_EQ(2, outputs.size()); - const Variant& r1 = outputs[1].scalar()(); - - EXPECT_EQ("StoredTensorValue", r1.TypeName()); - const StoredTensorValue* v1 = r1.get(); - EXPECT_NE(v1, nullptr); - EXPECT_EQ(42, v1->stored.scalar()()); + EXPECT_EQ(10, outputs[1].dim_size(0)); + auto output = outputs[1].flat(); + for (int i = 0; i < 10; ++i) { + const Variant& r1 = output(i); + EXPECT_EQ("StoredTensorValue", r1.TypeName()); + const StoredTensorValue* v1 = r1.get(); + EXPECT_NE(v1, nullptr); + EXPECT_EQ(42, v1->stored.scalar()()); + } } TEST(VariantOpCopyTest, CreateCopyCPUToCPUString) { @@ -290,14 +299,17 @@ TEST(VariantOpCopyTest, CreateCopyCPUToCPUString) { ClientSession session(root); std::vector outputs; - TF_EXPECT_OK(session.Run({create_op, identity}, &outputs)); + TF_CHECK_OK(session.Run({create_op, identity}, &outputs)); EXPECT_EQ(2, outputs.size()); - const Variant& r1 = outputs[1].scalar()(); - - EXPECT_EQ("StoredTensorValue", r1.TypeName()); - const StoredTensorValue* v1 = r1.get(); - EXPECT_NE(v1, nullptr); - EXPECT_EQ("hi", v1->stored.scalar()()); + EXPECT_EQ(10, outputs[1].dim_size(0)); + auto output = outputs[1].flat(); + for (int i = 0; i < 10; ++i) { + const Variant& r1 = output(i); + EXPECT_EQ("StoredTensorValue", r1.TypeName()); + const StoredTensorValue* v1 = r1.get(); + EXPECT_NE(v1, nullptr); + EXPECT_EQ("hi", v1->stored.scalar()()); + } } TEST(VariantOpCopyTest, CreateCopyCPUToGPU) { @@ -318,7 +330,7 @@ TEST(VariantOpCopyTest, CreateCopyCPUToGPU) { int copy_to_cpu_before = *GetCopyGPUToCPUCounter(); // Force the identity to run on GPU, and then the data to be copied // back to CPU for the final output. - TF_EXPECT_OK(session.Run({create_op, identity}, &outputs)); + TF_CHECK_OK(session.Run({create_op, identity}, &outputs)); int copy_to_cpu_after = *GetCopyGPUToCPUCounter(); int copy_to_gpu_after = *GetCopyCPUToGPUCounter(); @@ -326,12 +338,15 @@ TEST(VariantOpCopyTest, CreateCopyCPUToGPU) { EXPECT_GT(copy_to_gpu_after - copy_to_gpu_before, 0); EXPECT_EQ(2, outputs.size()); - const Variant& r1 = outputs[1].scalar()(); - - EXPECT_EQ("StoredTensorValue", r1.TypeName()); - const StoredTensorValue* v1 = r1.get(); - EXPECT_NE(v1, nullptr); - EXPECT_EQ(42, v1->stored.scalar()()); + EXPECT_EQ(10, outputs[1].dim_size(0)); + auto output = outputs[1].flat(); + for (int i = 0; i < 10; ++i) { + const Variant& r1 = output(i); + EXPECT_EQ("StoredTensorValue", r1.TypeName()); + const StoredTensorValue* v1 = r1.get(); + EXPECT_NE(v1, nullptr); + EXPECT_EQ(42, v1->stored.scalar()()); + } } TEST(VariantOpCopyTest, CreateCopyCPUToGPUStringFailsSafely) { -- GitLab From ac24c8fa779cdceb30ed656e2220a2ee04fead42 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 10:20:34 -0800 Subject: [PATCH 0341/1801] Clarifies py_func docs about copy/reference behavior in input args. PiperOrigin-RevId: 175549991 --- tensorflow/python/ops/script_ops.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py index 45d681c3d5..2c3667dffe 100644 --- a/tensorflow/python/ops/script_ops.py +++ b/tensorflow/python/ops/script_ops.py @@ -163,6 +163,12 @@ def py_func(func, inp, Tout, stateful=True, name=None): having element types that match the corresponding `tf.Tensor` objects in `inp`, and returns a list of `ndarray` objects (or a single `ndarray`) having element types that match the corresponding values in `Tout`. + Important Note: Input and output numpy `ndarray`s of `func` are not + guaranteed to be copies. In some cases their underlying memory will be + shared with the corresponding TensorFlow tensors. + In-place modification or storing `func` input or return values in + python datastructures without explicit (np.)copy + can have non-deterministic consequences. inp: A list of `Tensor` objects. Tout: A list or tuple of tensorflow data types or a single tensorflow data type if there is only one, indicating what `func` returns. -- GitLab From 92f40bfe3beaf087efbda8412bf129a12bcd9db2 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Mon, 13 Nov 2017 11:14:39 -0800 Subject: [PATCH 0342/1801] Supporting sparse tensors as inputs and outputs for user-defined functions passed into tf.data transformations. PiperOrigin-RevId: 175559045 --- .../contrib/data/python/kernel_tests/BUILD | 2 + .../kernel_tests/batch_dataset_op_test.py | 28 +++ .../kernel_tests/filter_dataset_op_test.py | 31 +++ .../kernel_tests/flat_map_dataset_op_test.py | 164 ++------------ .../interleave_dataset_op_test.py | 201 +++++++++++++++++ .../kernel_tests/map_dataset_op_test.py | 56 ++++- tensorflow/contrib/data/python/ops/BUILD | 1 + .../contrib/data/python/ops/batching.py | 10 +- .../contrib/data/python/ops/error_ops.py | 4 +- .../contrib/data/python/ops/grouping.py | 10 +- .../contrib/data/python/ops/interleave_ops.py | 10 +- tensorflow/contrib/data/python/ops/readers.py | 38 +--- .../contrib/data/python/ops/scan_ops.py | 9 +- .../core/kernels/serialize_sparse_op.cc | 109 +++++++++- tensorflow/core/ops/sparse_ops.cc | 28 +++ tensorflow/python/data/__init__.py | 4 +- tensorflow/python/data/ops/BUILD | 2 + tensorflow/python/data/ops/dataset_ops.py | 155 ++++++++----- tensorflow/python/data/ops/iterator_ops.py | 42 ++-- tensorflow/python/data/util/BUILD | 28 +++ tensorflow/python/data/util/sparse.py | 163 ++++++++++++++ tensorflow/python/data/util/sparse_test.py | 141 ++++++++++++ tensorflow/python/kernel_tests/BUILD | 24 +- .../kernel_tests/batch_dataset_op_test.py | 17 ++ .../kernel_tests/filter_dataset_op_test.py | 31 +++ .../kernel_tests/flat_map_dataset_op_test.py | 164 ++------------ .../interleave_dataset_op_test.py | 205 ++++++++++++++++++ .../kernel_tests/map_dataset_op_test.py | 52 +++++ tensorflow/python/ops/hidden_ops.txt | 1 + tensorflow/python/ops/sparse_ops.py | 24 ++ .../golden/tensorflow.data.-sparse-type.pbtxt | 13 ++ .../tools/api/golden/tensorflow.data.pbtxt | 4 + 32 files changed, 1351 insertions(+), 420 deletions(-) create mode 100644 tensorflow/python/data/util/sparse.py create mode 100644 tensorflow/python/data/util/sparse_test.py create mode 100644 tensorflow/python/kernel_tests/interleave_dataset_op_test.py create mode 100644 tensorflow/tools/api/golden/tensorflow.data.-sparse-type.pbtxt diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 78e1d4937a..2b3843b97b 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -187,6 +187,8 @@ py_test( "//tensorflow/python:errors", "//tensorflow/python:math_ops", "//tensorflow/python:script_ops", + "//tensorflow/python:sparse_ops", + "//tensorflow/python:sparse_tensor", "//tensorflow/python:training", "//third_party/py/numpy", ], diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py index 951d4bb5f7..09416f8302 100644 --- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py @@ -104,6 +104,15 @@ class BatchDatasetTest(test.TestCase): with self.assertRaises(errors.InvalidArgumentError): sess.run(init_op, feed_dict={count: 14, batch_size: 0}) + def testBatchSparseError(self): + + def _map_fn(i): + return sparse_tensor.SparseTensor( + indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i + + with self.assertRaises(TypeError): + _ = dataset_ops.Dataset.range(10).map(_map_fn).batch(10) + def testPaddedBatchDataset(self): seq_lens = array_ops.placeholder(dtypes.int32, shape=[None]) padded_shape = array_ops.placeholder(dtypes.int64, shape=[1]) @@ -238,6 +247,15 @@ class BatchDatasetTest(test.TestCase): self.assertEqual([None, None, None], dataset.output_shapes[1].as_list()) self.assertEqual([None, 37], dataset.output_shapes[2].as_list()) + def testPaddedBatchSparseError(self): + + def _map_fn(i): + return sparse_tensor.SparseTensor( + indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i + + with self.assertRaises(TypeError): + _ = dataset_ops.Dataset.range(10).map(_map_fn).padded_batch(10) + def testDenseToSparseBatchDataset(self): components = np.random.randint(12, size=(100,)).astype(np.int32) iterator = ( @@ -481,6 +499,16 @@ class BatchDatasetTest(test.TestCase): self.assertEqual([None], dataset.output_shapes[1][0].as_list()) self.assertEqual([None, 30], dataset.output_shapes[1][1].as_list()) + def testBatchAndDropRemainderSparseError(self): + + def _map_fn(i): + return sparse_tensor.SparseTensor( + indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i + + with self.assertRaises(TypeError): + _ = dataset_ops.Dataset.range(10).map(_map_fn).apply( + batching.batch_and_drop_remainder(10)) + def testBatchAndMapDataset(self): """Test a dataset that maps a TF function across its input elements.""" # The pipeline is TensorSliceDataset -> diff --git a/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py index 00323da311..67c49d77e2 100644 --- a/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py @@ -22,6 +22,7 @@ import numpy as np from tensorflow.contrib.data.python.ops import dataset_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import functional_ops from tensorflow.python.ops import math_ops @@ -124,6 +125,36 @@ class FilterDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def assertSparseValuesEqual(self, a, b): + self.assertAllEqual(a.indices, b.indices) + self.assertAllEqual(a.values, b.values) + self.assertAllEqual(a.dense_shape, b.dense_shape) + + def testSparse(self): + def _map_fn(i): + return sparse_tensor.SparseTensor( + indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i + + def _filter_fn(_, i): + return math_ops.equal(i % 2, 0) + + iterator = ( + dataset_ops.Dataset.range(10).map(_map_fn).filter(_filter_fn).map( + lambda x, i: x).make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + for i in range(5): + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensor( + indices=[[0, 0]], values=[i*2], dense_shape=[1, 1]) + self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) + self.assertSparseValuesEqual(actual, expected.eval()) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/kernel_tests/flat_map_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/flat_map_dataset_op_test.py index 2a582ae662..c950e4857e 100644 --- a/tensorflow/contrib/data/python/kernel_tests/flat_map_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/flat_map_dataset_op_test.py @@ -17,16 +17,15 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import itertools import random import numpy as np from tensorflow.contrib.data.python.ops import dataset_ops from tensorflow.python.client import session -from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors -from tensorflow.python.ops import array_ops +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import sparse_ops from tensorflow.python.platform import test from tensorflow.python.training import server_lib @@ -123,154 +122,29 @@ class FlatMapDatasetTest(test.TestCase): sess.run(get_next) # pylint: enable=g-long-lambda + def testSparse(self): + def _map_fn(i): + return sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 1]], values=(i * [1, -1]), dense_shape=[2, 2]) -class InterleaveDatasetTest(test.TestCase): + def _flat_map_fn(x): + return dataset_ops.Dataset.from_tensor_slices( + sparse_ops.sparse_to_dense(x.indices, x.dense_shape, x.values)) - def _interleave(self, lists, cycle_length, block_length): - num_open = 0 - - # `all_iterators` acts as a queue of iterators over each element of `lists`. - all_iterators = [iter(l) for l in lists] - - # `open_iterators` are the iterators whose elements are currently being - # interleaved. - open_iterators = [] - for i in range(cycle_length): - if all_iterators: - open_iterators.append(all_iterators.pop(0)) - num_open += 1 - else: - open_iterators.append(None) - - while num_open or all_iterators: - for i in range(cycle_length): - if open_iterators[i] is None: - if all_iterators: - open_iterators[i] = all_iterators.pop(0) - num_open += 1 - else: - continue - for _ in range(block_length): - try: - yield next(open_iterators[i]) - except StopIteration: - open_iterators[i] = None - num_open -= 1 - break - - def testPythonImplementation(self): - input_lists = [[4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6], - [4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6]] - - # Cycle length 1 acts like `Dataset.flat_map()`. - expected_elements = itertools.chain(*input_lists) - for expected, produced in zip( - expected_elements, self._interleave(input_lists, 1, 1)): - self.assertEqual(expected, produced) - - # Cycle length > 1. - expected_elements = [4, 5, 4, 5, 4, 5, 4, - 5, 5, 6, 6, # NOTE(mrry): When we cycle back - # to a list and are already at - # the end of that list, we move - # on to the next element. - 4, 6, 4, 6, 4, 6, 4, 6, 5, 6, 5, 6, 5, 6, 5, 6, 5] - for expected, produced in zip( - expected_elements, self._interleave(input_lists, 2, 1)): - self.assertEqual(expected, produced) - - # Cycle length > 1 and block length > 1. - expected_elements = [4, 4, 4, 5, 5, 5, 4, 5, 5, 6, 6, 6, 4, 4, 4, 6, 6, 6, - 4, 5, 5, 5, 6, 6, 6, 5, 5, 6, 6, 6] - for expected, produced in zip( - expected_elements, self._interleave(input_lists, 2, 3)): - self.assertEqual(expected, produced) - - # Cycle length > len(input_values). - expected_elements = [4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6, 6, - 4, 4, 5, 5, 6, 6, 5, 6, 6, 5, 6, 6] - for expected, produced in zip( - expected_elements, self._interleave(input_lists, 7, 2)): - self.assertEqual(expected, produced) - - def testInterleaveDataset(self): - input_values = array_ops.placeholder(dtypes.int64, shape=[None]) - cycle_length = array_ops.placeholder(dtypes.int64, shape=[]) - block_length = array_ops.placeholder(dtypes.int64, shape=[]) - - repeat_count = 2 - - dataset = ( - dataset_ops.Dataset.from_tensor_slices(input_values) - .repeat(repeat_count) - .interleave(lambda x: dataset_ops.Dataset.from_tensors(x).repeat(x), - cycle_length, block_length)) - iterator = dataset.make_initializable_iterator() + iterator = ( + dataset_ops.Dataset.range(10).map(_map_fn).flat_map(_flat_map_fn) + .make_initializable_iterator()) init_op = iterator.initializer - next_element = iterator.get_next() + get_next = iterator.get_next() with self.test_session() as sess: - # Cycle length 1 acts like `Dataset.flat_map()`. - sess.run(init_op, feed_dict={input_values: [4, 5, 6], - cycle_length: 1, block_length: 3}) - - for expected_element in self._interleave( - [[4] * 4, [5] * 5, [6] * 6] * repeat_count, 1, 3): - self.assertEqual(expected_element, sess.run(next_element)) - - # Cycle length > 1. - # expected: [4, 5, 4, 5, 4, 5, 4, 5, 5, 6, 6, 4, 6, 4, 6, 4, 6, 4, 6, 5, - # 6, 5, 6, 5, 6, 5, 6, 5] - sess.run(init_op, feed_dict={input_values: [4, 5, 6], - cycle_length: 2, block_length: 1}) - for expected_element in self._interleave( - [[4] * 4, [5] * 5, [6] * 6] * repeat_count, 2, 1): - self.assertEqual(expected_element, sess.run(next_element)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - # Cycle length > 1 and block length > 1. - # expected: [4, 4, 4, 5, 5, 5, 4, 5, 5, 6, 6, 6, 4, 4, 4, 6, 6, 6, 4, 5, - # 5, 5, 6, 6, 6, 5, 5, 6, 6, 6] - sess.run(init_op, feed_dict={input_values: [4, 5, 6], - cycle_length: 2, block_length: 3}) - for expected_element in self._interleave( - [[4] * 4, [5] * 5, [6] * 6] * repeat_count, 2, 3): - self.assertEqual(expected_element, sess.run(next_element)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - # Cycle length > len(input_values) * repeat_count. - # expected: [4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6, 6, 4, 4, - # 5, 5, 6, 6, 5, 6, 6, 5, 6, 6] - sess.run(init_op, feed_dict={input_values: [4, 5, 6], - cycle_length: 7, block_length: 2}) - for expected_element in self._interleave( - [[4] * 4, [5] * 5, [6] * 6] * repeat_count, 7, 2): - self.assertEqual(expected_element, sess.run(next_element)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - # Empty input. - sess.run(init_op, feed_dict={input_values: [], - cycle_length: 2, block_length: 3}) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - # Non-empty input leading to empty output. - sess.run(init_op, feed_dict={input_values: [0, 0, 0], - cycle_length: 2, block_length: 3}) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - # Mixture of non-empty and empty interleaved datasets. - sess.run(init_op, feed_dict={input_values: [4, 0, 6], - cycle_length: 2, block_length: 3}) - for expected_element in self._interleave( - [[4] * 4, [], [6] * 6] * repeat_count, 2, 3): - self.assertEqual(expected_element, sess.run(next_element)) + sess.run(init_op) + for i in range(10): + for j in range(2): + expected = [i, 0] if j % 2 == 0 else [0, -i] + self.assertAllEqual(expected, sess.run(get_next)) with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) + sess.run(get_next) if __name__ == "__main__": diff --git a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py index 0aa9ea88de..0299e3a1b7 100644 --- a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py @@ -28,12 +28,187 @@ from tensorflow.contrib.data.python.ops import dataset_ops from tensorflow.contrib.data.python.ops import interleave_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import script_ops +from tensorflow.python.ops import sparse_ops from tensorflow.python.platform import test +class InterleaveDatasetTest(test.TestCase): + + def _interleave(self, lists, cycle_length, block_length): + num_open = 0 + + # `all_iterators` acts as a queue of iterators over each element of `lists`. + all_iterators = [iter(l) for l in lists] + + # `open_iterators` are the iterators whose elements are currently being + # interleaved. + open_iterators = [] + for i in range(cycle_length): + if all_iterators: + open_iterators.append(all_iterators.pop(0)) + num_open += 1 + else: + open_iterators.append(None) + + while num_open or all_iterators: + for i in range(cycle_length): + if open_iterators[i] is None: + if all_iterators: + open_iterators[i] = all_iterators.pop(0) + num_open += 1 + else: + continue + for _ in range(block_length): + try: + yield next(open_iterators[i]) + except StopIteration: + open_iterators[i] = None + num_open -= 1 + break + + def testPythonImplementation(self): + input_lists = [[4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6], + [4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6]] + + # Cycle length 1 acts like `Dataset.flat_map()`. + expected_elements = itertools.chain(*input_lists) + for expected, produced in zip( + expected_elements, self._interleave(input_lists, 1, 1)): + self.assertEqual(expected, produced) + + # Cycle length > 1. + expected_elements = [4, 5, 4, 5, 4, 5, 4, + 5, 5, 6, 6, # NOTE(mrry): When we cycle back + # to a list and are already at + # the end of that list, we move + # on to the next element. + 4, 6, 4, 6, 4, 6, 4, 6, 5, 6, 5, 6, 5, 6, 5, 6, 5] + for expected, produced in zip( + expected_elements, self._interleave(input_lists, 2, 1)): + self.assertEqual(expected, produced) + + # Cycle length > 1 and block length > 1. + expected_elements = [4, 4, 4, 5, 5, 5, 4, 5, 5, 6, 6, 6, 4, 4, 4, 6, 6, 6, + 4, 5, 5, 5, 6, 6, 6, 5, 5, 6, 6, 6] + for expected, produced in zip( + expected_elements, self._interleave(input_lists, 2, 3)): + self.assertEqual(expected, produced) + + # Cycle length > len(input_values). + expected_elements = [4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6, 6, + 4, 4, 5, 5, 6, 6, 5, 6, 6, 5, 6, 6] + for expected, produced in zip( + expected_elements, self._interleave(input_lists, 7, 2)): + self.assertEqual(expected, produced) + + def testInterleaveDataset(self): + input_values = array_ops.placeholder(dtypes.int64, shape=[None]) + cycle_length = array_ops.placeholder(dtypes.int64, shape=[]) + block_length = array_ops.placeholder(dtypes.int64, shape=[]) + + repeat_count = 2 + + dataset = ( + dataset_ops.Dataset.from_tensor_slices(input_values) + .repeat(repeat_count) + .interleave(lambda x: dataset_ops.Dataset.from_tensors(x).repeat(x), + cycle_length, block_length)) + iterator = dataset.make_initializable_iterator() + init_op = iterator.initializer + next_element = iterator.get_next() + + with self.test_session() as sess: + # Cycle length 1 acts like `Dataset.flat_map()`. + sess.run(init_op, feed_dict={input_values: [4, 5, 6], + cycle_length: 1, block_length: 3}) + + for expected_element in self._interleave( + [[4] * 4, [5] * 5, [6] * 6] * repeat_count, 1, 3): + self.assertEqual(expected_element, sess.run(next_element)) + + # Cycle length > 1. + # expected: [4, 5, 4, 5, 4, 5, 4, 5, 5, 6, 6, 4, 6, 4, 6, 4, 6, 4, 6, 5, + # 6, 5, 6, 5, 6, 5, 6, 5] + sess.run(init_op, feed_dict={input_values: [4, 5, 6], + cycle_length: 2, block_length: 1}) + for expected_element in self._interleave( + [[4] * 4, [5] * 5, [6] * 6] * repeat_count, 2, 1): + self.assertEqual(expected_element, sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + # Cycle length > 1 and block length > 1. + # expected: [4, 4, 4, 5, 5, 5, 4, 5, 5, 6, 6, 6, 4, 4, 4, 6, 6, 6, 4, 5, + # 5, 5, 6, 6, 6, 5, 5, 6, 6, 6] + sess.run(init_op, feed_dict={input_values: [4, 5, 6], + cycle_length: 2, block_length: 3}) + for expected_element in self._interleave( + [[4] * 4, [5] * 5, [6] * 6] * repeat_count, 2, 3): + self.assertEqual(expected_element, sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + # Cycle length > len(input_values) * repeat_count. + # expected: [4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6, 6, 4, 4, + # 5, 5, 6, 6, 5, 6, 6, 5, 6, 6] + sess.run(init_op, feed_dict={input_values: [4, 5, 6], + cycle_length: 7, block_length: 2}) + for expected_element in self._interleave( + [[4] * 4, [5] * 5, [6] * 6] * repeat_count, 7, 2): + self.assertEqual(expected_element, sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + # Empty input. + sess.run(init_op, feed_dict={input_values: [], + cycle_length: 2, block_length: 3}) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + # Non-empty input leading to empty output. + sess.run(init_op, feed_dict={input_values: [0, 0, 0], + cycle_length: 2, block_length: 3}) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + # Mixture of non-empty and empty interleaved datasets. + sess.run(init_op, feed_dict={input_values: [4, 0, 6], + cycle_length: 2, block_length: 3}) + for expected_element in self._interleave( + [[4] * 4, [], [6] * 6] * repeat_count, 2, 3): + self.assertEqual(expected_element, sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + def testSparse(self): + def _map_fn(i): + return sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 1]], values=(i * [1, -1]), dense_shape=[2, 2]) + + def _interleave_fn(x): + return dataset_ops.Dataset.from_tensor_slices( + sparse_ops.sparse_to_dense(x.indices, x.dense_shape, x.values)) + + iterator = ( + dataset_ops.Dataset.range(10).map(_map_fn).interleave( + _interleave_fn, cycle_length=1).make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + for i in range(10): + for j in range(2): + expected = [i, 0] if j % 2 == 0 else [0, -i] + self.assertAllEqual(expected, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + class ParallelInterleaveDatasetTest(test.TestCase): def setUp(self): @@ -547,5 +722,31 @@ class ParallelInterleaveDatasetTest(test.TestCase): def testTooManyReadersSloppy(self): self._testTooManyReaders(sloppy=True) + def testSparse(self): + def _map_fn(i): + return sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 1]], values=(i * [1, -1]), dense_shape=[2, 2]) + + def _interleave_fn(x): + return dataset_ops.Dataset.from_tensor_slices( + sparse_ops.sparse_to_dense(x.indices, x.dense_shape, x.values)) + + dataset = dataset_ops.Dataset.range(10).map(_map_fn) + iterator = dataset.apply( + interleave_ops.parallel_interleave( + _interleave_fn, cycle_length=1)).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + for i in range(10): + for j in range(2): + expected = [i, 0] if j % 2 == 0 else [0, -i] + self.assertAllEqual(expected, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py index 8a1d99499b..8ccf92c17a 100644 --- a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py @@ -20,15 +20,15 @@ from collections import namedtuple import os import threading -from collections import namedtuple import numpy as np -from tensorflow.contrib.data.python.ops import error_ops from tensorflow.contrib.data.python.ops import dataset_ops +from tensorflow.contrib.data.python.ops import error_ops from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import functional_ops @@ -37,6 +37,7 @@ from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import script_ops +from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import string_ops from tensorflow.python.ops import variable_scope from tensorflow.python.platform import test @@ -616,6 +617,57 @@ class MapDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def assertSparseValuesEqual(self, a, b): + self.assertAllEqual(a.indices, b.indices) + self.assertAllEqual(a.values, b.values) + self.assertAllEqual(a.dense_shape, b.dense_shape) + + def testSparse(self): + def _sparse(i): + return sparse_tensor.SparseTensor( + indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]) + iterator = (dataset_ops.Dataset.range(10) + .map(_sparse) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + for i in range(10): + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensor( + indices=[[0, 0]], values=[i], dense_shape=[1, 1]) + self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) + self.assertSparseValuesEqual(actual, expected.eval()) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testSparseChain(self): + def _sparse(i): + return sparse_tensor.SparseTensor( + indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]) + def _check(i): + self.assertTrue(isinstance(i, sparse_tensor.SparseTensor)) + return sparse_ops.sparse_concat(0, [i, i]) + + iterator = (dataset_ops.Dataset.range(10) + .map(_sparse).map(_check) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + for i in range(10): + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 0]], values=[i, i], dense_shape=[2, 1]) + self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) + self.assertSparseValuesEqual(actual, expected.eval()) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index 6d64fb03e7..d6aaa12f5b 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -88,6 +88,7 @@ py_library( "//tensorflow/python:util", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:sparse", "//third_party/py/numpy", ], ) diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py index d4ade7adfd..cc63baed81 100644 --- a/tensorflow/contrib/data/python/ops/batching.py +++ b/tensorflow/contrib/data/python/ops/batching.py @@ -19,6 +19,7 @@ from __future__ import print_function from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest +from tensorflow.python.data.util import sparse from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape @@ -325,6 +326,9 @@ class _MapAndBatchDataset(dataset_ops.MapDataset): def __init__(self, input_dataset, map_func, batch_size, num_parallel_batches): """See `Dataset.map()` for details.""" super(_MapAndBatchDataset, self).__init__(input_dataset, map_func) + if sparse.any_sparse(self._output_types): + # TODO(b/63669786): support batching of sparse tensors + raise TypeError("Batching of sparse tensors is not currently supported") self._batch_size = ops.convert_to_tensor( batch_size, dtype=dtypes.int64, name="batch_size") @@ -340,7 +344,8 @@ class _MapAndBatchDataset(dataset_ops.MapDataset): f=self._map_func, batch_size=self._batch_size, num_parallel_batches=self._num_parallel_batches, - output_types=nest.flatten(self.output_types), + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types)), output_shapes=nest.flatten(self.output_shapes)) # pylint: enable=protected-access @@ -389,6 +394,9 @@ def map_and_batch(map_func, batch_size, num_parallel_batches=1): """ def _apply_fn(dataset): + if sparse.any_sparse(dataset.output_types): + # TODO(b/63669786): support batching of sparse tensors + raise TypeError("Batching of sparse tensors is not currently supported") return _MapAndBatchDataset(dataset, map_func, batch_size, num_parallel_batches) diff --git a/tensorflow/contrib/data/python/ops/error_ops.py b/tensorflow/contrib/data/python/ops/error_ops.py index 238bb52b02..194b611513 100644 --- a/tensorflow/contrib/data/python/ops/error_ops.py +++ b/tensorflow/contrib/data/python/ops/error_ops.py @@ -19,6 +19,7 @@ from __future__ import print_function from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest +from tensorflow.python.data.util import sparse from tensorflow.python.ops import gen_dataset_ops @@ -63,7 +64,8 @@ class IgnoreErrorsDataset(dataset_ops.Dataset): return gen_dataset_ops.ignore_errors_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access output_shapes=nest.flatten(self.output_shapes), - output_types=nest.flatten(self.output_types)) + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types))) @property def output_shapes(self): diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py index 6df7b22fb6..86337271bc 100644 --- a/tensorflow/contrib/data/python/ops/grouping.py +++ b/tensorflow/contrib/data/python/ops/grouping.py @@ -19,6 +19,7 @@ from __future__ import print_function from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest +from tensorflow.python.data.util import sparse from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops @@ -137,13 +138,17 @@ class GroupByWindowDataset(dataset_ops.Dataset): def _make_key_func(self, key_func, input_dataset): """Make wrapping Defun for key_func.""" - @function.Defun(*nest.flatten(input_dataset.output_types)) + @function.Defun( + *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types))) def tf_key_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the input_dataset. for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)): arg.set_shape(shape) + nested_args = nest.pack_sequence_as(input_dataset.output_types, args) + nested_args = sparse.deserialize_sparse_tensors( + nested_args, input_dataset.output_types) # pylint: disable=protected-access if dataset_ops._should_unpack_args(nested_args): ret = key_func(*nested_args) @@ -197,5 +202,6 @@ class GroupByWindowDataset(dataset_ops.Dataset): key_func=self._key_func, reduce_func=self._reduce_func, window_size_func=self._window_size_func, - output_types=nest.flatten(self.output_types), + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types)), output_shapes=nest.flatten(self.output_shapes)) diff --git a/tensorflow/contrib/data/python/ops/interleave_ops.py b/tensorflow/contrib/data/python/ops/interleave_ops.py index 74a919c1ff..4adec46706 100644 --- a/tensorflow/contrib/data/python/ops/interleave_ops.py +++ b/tensorflow/contrib/data/python/ops/interleave_ops.py @@ -19,6 +19,7 @@ from __future__ import print_function from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest +from tensorflow.python.data.util import sparse from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops @@ -35,7 +36,8 @@ class ParallelInterleaveDataset(dataset_ops.Dataset): super(ParallelInterleaveDataset, self).__init__() self._input_dataset = input_dataset - @function.Defun(*nest.flatten(input_dataset.output_types)) + @function.Defun( + *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types))) def tf_map_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the input_dataset. @@ -43,7 +45,8 @@ class ParallelInterleaveDataset(dataset_ops.Dataset): arg.set_shape(shape) nested_args = nest.pack_sequence_as(input_dataset.output_types, args) - + nested_args = sparse.deserialize_sparse_tensors( + nested_args, input_dataset.output_types) if nest.is_sequence(nested_args): dataset = map_func(*nested_args) else: @@ -75,7 +78,8 @@ class ParallelInterleaveDataset(dataset_ops.Dataset): self._block_length, self._sloppy, f=self._map_func, - output_types=nest.flatten(self.output_types), + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types)), output_shapes=nest.flatten(self.output_shapes)) @property diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py index 2e1c3153ca..08d6a7a605 100644 --- a/tensorflow/contrib/data/python/ops/readers.py +++ b/tensorflow/contrib/data/python/ops/readers.py @@ -23,7 +23,6 @@ from tensorflow.python.data.ops import readers from tensorflow.python.data.util import nest from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import parsing_ops @@ -156,8 +155,7 @@ def read_batch_features(file_pattern, features: A `dict` mapping feature keys to `FixedLenFeature` or `VarLenFeature` values. See `tf.parse_example`. reader: A function or class that can be called with a `filenames` tensor - and (optional) `reader_args` and returns a `Dataset` of serialized - Examples. + and (optional) `reader_args` and returns a `Dataset` of Examples. reader_args: Additional arguments to pass to the reader class. randomize_input: Whether the input should be randomized. num_epochs: Integer specifying the number of times to read through the @@ -173,33 +171,15 @@ def read_batch_features(file_pattern, dataset = reader(filenames, *reader_args) else: dataset = reader(filenames) - if dataset.output_types == (dtypes.string, dtypes.string): - dataset = dataset.map(lambda unused_k, v: v) - elif dataset.output_types != dtypes.string: - raise TypeError("`reader` must be a dataset of `tf.string` values, " - "or `(tf.string, tf.string)` key-value pairs.") if num_epochs != 1: dataset = dataset.repeat(num_epochs) if randomize_input: dataset = dataset.shuffle(capacity) dataset = dataset.batch(batch_size) - dataset = dataset.map(lambda x: _parse_example(x, features)) + dataset = dataset.map(lambda x: parsing_ops.parse_example(x, features)) iterator = dataset.make_one_shot_iterator() outputs = iterator.get_next() - index = 0 - result = {} - for key in sorted(features.keys()): - feature = features[key] - if isinstance(feature, parsing_ops.FixedLenFeature): - result[key] = outputs[index] - index += 1 - else: - result[key] = sparse_tensor_lib.SparseTensor( - indices=outputs[index], - values=outputs[index + 1], - dense_shape=outputs[index + 2]) - index += 3 - return result + return outputs def _get_file_names(file_pattern, randomize_input): @@ -233,18 +213,6 @@ def _get_file_names(file_pattern, randomize_input): return file_names -def _parse_example(serialized, features): - parsed = parsing_ops.parse_example(serialized, features) - result = [] - for key in sorted(features.keys()): - val = parsed[key] - if isinstance(val, sparse_tensor_lib.SparseTensor): - result.extend([val.indices, val.values, val.dense_shape]) - else: - result.append(val) - return tuple(result) - - class SqlDataset(contrib_dataset_ops.Dataset): def __init__(self, driver_name, data_source_name, query, output_types): diff --git a/tensorflow/contrib/data/python/ops/scan_ops.py b/tensorflow/contrib/data/python/ops/scan_ops.py index 5acaed48a3..2cfc0709cd 100644 --- a/tensorflow/contrib/data/python/ops/scan_ops.py +++ b/tensorflow/contrib/data/python/ops/scan_ops.py @@ -21,6 +21,7 @@ import collections from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest +from tensorflow.python.data.util import sparse from tensorflow.python.framework import function from tensorflow.python.framework import ops from tensorflow.python.ops import gen_dataset_ops @@ -43,6 +44,7 @@ class _ScanDataset(dataset_ops.Dataset): # Compute initial values for the state shapes and types based on # the initial state. These will be refined by running # `tf_scan_func` one or more times below. + # TODO(b/68937811): Allow the initial state to be a tf.SparseTensor. self._state_shapes = nest.pack_sequence_as( self._initial_state, [t.shape for t in nest.flatten(self._initial_state)]) @@ -65,8 +67,8 @@ class _ScanDataset(dataset_ops.Dataset): # Create a list in which `tf_scan_func` will store the s flat_new_state_shapes = [] - @function.Defun( - *(flat_state_types + nest.flatten(input_dataset.output_types))) + @function.Defun(*(flat_state_types + nest.flatten( + sparse.unwrap_sparse_types(input_dataset.output_types)))) def tf_scan_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the state and input_dataset. @@ -144,7 +146,8 @@ class _ScanDataset(dataset_ops.Dataset): nest.flatten(self._initial_state), self._scan_func.captured_inputs, f=self._scan_func, - output_types=nest.flatten(self.output_types), + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types)), output_shapes=nest.flatten(self.output_shapes)) @property diff --git a/tensorflow/core/kernels/serialize_sparse_op.cc b/tensorflow/core/kernels/serialize_sparse_op.cc index 2c7ad5bab0..ac58c3d1ea 100644 --- a/tensorflow/core/kernels/serialize_sparse_op.cc +++ b/tensorflow/core/kernels/serialize_sparse_op.cc @@ -207,6 +207,104 @@ class SerializeManySparseOp : public OpKernel { TF_CALL_ALL_TYPES(REGISTER_KERNELS); #undef REGISTER_KERNELS +template +class DeserializeSparseOp : public OpKernel { + public: + explicit DeserializeSparseOp(OpKernelConstruction* context) + : OpKernel(context) {} + + void Compute(OpKernelContext* context) override { + const Tensor& serialized_sparse = context->input(0); + OP_REQUIRES(context, TensorShapeUtils::IsVector(serialized_sparse.shape()), + errors::InvalidArgument( + "Serialized sparse should be a vector but received shape ", + serialized_sparse.shape().DebugString())); + OP_REQUIRES( + context, serialized_sparse.shape().dim_size(0) == 3, + errors::InvalidArgument( + "Serialize sparse should have 3 columns but received shape ", + serialized_sparse.shape().DebugString())); + + Tensor output_indices(DT_INT64); + Tensor output_values(DataTypeToEnum::value); + Tensor output_shape(DT_INT64); + TensorProto proto_indices; + TensorProto proto_values; + TensorProto proto_shape; + + const auto& serialized_sparse_t = serialized_sparse.vec(); + + OP_REQUIRES( + context, ParseProtoUnlimited(&proto_indices, serialized_sparse_t(0)), + errors::InvalidArgument("Could not parse serialized_sparse[0]")); + OP_REQUIRES( + context, ParseProtoUnlimited(&proto_values, serialized_sparse_t(1)), + errors::InvalidArgument("Could not parse serialized_sparse[1]")); + OP_REQUIRES( + context, ParseProtoUnlimited(&proto_shape, serialized_sparse_t(2)), + errors::InvalidArgument("Could not parse serialized_sparse[2]")); + + OP_REQUIRES( + context, output_indices.FromProto(proto_indices), + errors::InvalidArgument( + "Could not construct Tensor serialized_sparse[0] (indices)")); + OP_REQUIRES( + context, TensorShapeUtils::IsMatrix(output_indices.shape()), + errors::InvalidArgument("Expected serialized_sparse[0] to represent an " + "index matrix but received shape ", + output_indices.shape().DebugString())); + OP_REQUIRES( + context, output_values.FromProto(proto_values), + errors::InvalidArgument( + "Could not construct Tensor serialized_sparse[1] (values)")); + OP_REQUIRES( + context, TensorShapeUtils::IsVector(output_values.shape()), + errors::InvalidArgument("Expected serialized_sparse[1] to represent a " + "values vector but received shape ", + output_values.shape().DebugString())); + OP_REQUIRES(context, output_shape.FromProto(proto_shape), + errors::InvalidArgument( + "Could not construct Tensor serialized_sparse[2] (shape)")); + OP_REQUIRES(context, TensorShapeUtils::IsVector(output_shape.shape()), + errors::InvalidArgument("Expected serialized_sparse[2] to be a " + "shape vector but its shape is ", + output_shape.shape().DebugString())); + + OP_REQUIRES( + context, DataTypeToEnum::value == output_values.dtype(), + errors::InvalidArgument("Requested SparseTensor of type ", + DataTypeString(DataTypeToEnum::value), + " but SparseTensor.values.dtype() == ", + DataTypeString(output_values.dtype()))); + + int64 num_entries = output_indices.dim_size(0); + OP_REQUIRES(context, num_entries == output_values.dim_size(0), + errors::InvalidArgument( + "Expected row counts of SparseTensor.indices and " + "SparseTensor.values to match but they do not: ", + num_entries, " vs. ", output_values.dim_size(0))); + int rank = output_indices.dim_size(1); + OP_REQUIRES(context, rank == output_shape.dim_size(0), + errors::InvalidArgument( + "Expected column counts of SparseTensor.indices to match " + "size of SparseTensor.shape but they do not: ", + rank, " vs. ", output_shape.dim_size(0))); + + context->set_output(0, output_indices); + context->set_output(1, output_values); + context->set_output(2, output_shape); + } +}; + +#define REGISTER_KERNELS(type) \ + REGISTER_KERNEL_BUILDER(Name("DeserializeSparse") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("dtype"), \ + DeserializeSparseOp) + +TF_CALL_ALL_TYPES(REGISTER_KERNELS); +#undef REGISTER_KERNELS + template class DeserializeManySparseOp : public OpKernel { public: @@ -246,10 +344,11 @@ class DeserializeManySparseOp : public OpKernel { TensorProto proto_values; TensorProto proto_shape; - OP_REQUIRES(context, ParseProtoUnlimited(&proto_indices, - serialized_sparse_t(i, 0)), - errors::InvalidArgument("Could not parse serialized_sparse[", - i, ", 0]")); + OP_REQUIRES( + context, + ParseProtoUnlimited(&proto_indices, serialized_sparse_t(i, 0)), + errors::InvalidArgument("Could not parse serialized_sparse[", i, + ", 0]")); OP_REQUIRES(context, ParseProtoUnlimited(&proto_values, serialized_sparse_t(i, 1)), errors::InvalidArgument("Could not parse serialized_sparse[", @@ -266,7 +365,7 @@ class DeserializeManySparseOp : public OpKernel { OP_REQUIRES(context, TensorShapeUtils::IsMatrix(output_indices.shape()), errors::InvalidArgument( "Expected serialized_sparse[", i, - ", 1] to represent an index matrix but received shape ", + ", 0] to represent an index matrix but received shape ", output_indices.shape().DebugString())); OP_REQUIRES(context, output_values.FromProto(proto_values), errors::InvalidArgument( diff --git a/tensorflow/core/ops/sparse_ops.cc b/tensorflow/core/ops/sparse_ops.cc index 646c379586..8b6106f2a4 100644 --- a/tensorflow/core/ops/sparse_ops.cc +++ b/tensorflow/core/ops/sparse_ops.cc @@ -237,6 +237,34 @@ sparse_values: 1-D. The `values` of the minibatch `SparseTensor`. sparse_shape: 1-D. The `shape` of the minibatch `SparseTensor`. )doc"); +REGISTER_OP("DeserializeSparse") + .Input("serialized_sparse: string") + .Attr("dtype: type") + .Output("sparse_indices: int64") + .Output("sparse_values: dtype") + .Output("sparse_shape: int64") + .SetShapeFn([](InferenceContext* c) { + // serialized sparse is [3] vector. + ShapeHandle serialized_sparse; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &serialized_sparse)); + DimensionHandle unused; + TF_RETURN_IF_ERROR( + c->WithValue(c->Dim(serialized_sparse, 0), 3, &unused)); + + c->set_output(0, c->Matrix(InferenceContext::kUnknownDim, + InferenceContext::kUnknownDim)); + c->set_output(1, c->Vector(InferenceContext::kUnknownDim)); + c->set_output(2, c->Vector(InferenceContext::kUnknownDim)); + return Status::OK(); + }) + .Doc(R"doc( +Deserialize `SparseTensor` from a (serialized) string 3-vector (1-D `Tensor`) +object. + +serialized_sparse: 1-D, The serialized `SparseTensor` object. Must have 3 columns. +dtype: The `dtype` of the serialized `SparseTensor` object. +)doc"); + REGISTER_OP("DeserializeManySparse") .Input("serialized_sparse: string") .Attr("dtype: type") diff --git a/tensorflow/python/data/__init__.py b/tensorflow/python/data/__init__.py index b5ee8120fd..504500d245 100644 --- a/tensorflow/python/data/__init__.py +++ b/tensorflow/python/data/__init__.py @@ -18,9 +18,10 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@Dataset @@Iterator -@@TFRecordDataset @@FixedLengthRecordDataset @@TextLineDataset +@@TFRecordDataset +@@SparseType """ from __future__ import absolute_import @@ -33,6 +34,7 @@ from tensorflow.python.data.ops.iterator_ops import Iterator from tensorflow.python.data.ops.readers import FixedLengthRecordDataset from tensorflow.python.data.ops.readers import TextLineDataset from tensorflow.python.data.ops.readers import TFRecordDataset +from tensorflow.python.data.util.sparse import SparseType # pylint: enable=unused-import from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/python/data/ops/BUILD b/tensorflow/python/data/ops/BUILD index 5140510409..05acfe4de7 100644 --- a/tensorflow/python/data/ops/BUILD +++ b/tensorflow/python/data/ops/BUILD @@ -22,6 +22,7 @@ py_library( "//tensorflow/python:tensor_shape", "//tensorflow/python:tensor_util", "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:sparse", "//third_party/py/numpy", ], ) @@ -50,6 +51,7 @@ py_library( "//tensorflow/python:framework_ops", "//tensorflow/python:tensor_shape", "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:sparse", ], ) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 09f4349cf3..5f981e2670 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -26,6 +26,7 @@ import six from tensorflow.python.data.ops import iterator_ops from tensorflow.python.data.util import nest +from tensorflow.python.data.util import sparse from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -95,11 +96,12 @@ class Dataset(object): iterator_resource = gen_dataset_ops.iterator( container="", shared_name=shared_name, - output_types=nest.flatten(self.output_types), + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types)), output_shapes=nest.flatten(self.output_shapes)) with ops.colocate_with(iterator_resource): - initializer = gen_dataset_ops.make_iterator( - self._as_variant_tensor(), iterator_resource) + initializer = gen_dataset_ops.make_iterator(self._as_variant_tensor(), + iterator_resource) return iterator_ops.Iterator(iterator_resource, initializer, self.output_types, self.output_shapes) @@ -141,7 +143,8 @@ class Dataset(object): return iterator_ops.Iterator( gen_dataset_ops.one_shot_iterator( dataset_factory=_make_dataset, - output_types=nest.flatten(self.output_types), + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types)), output_shapes=nest.flatten(self.output_shapes)), None, self.output_types, self.output_shapes) @@ -160,8 +163,9 @@ class Dataset(object): """Returns the type of each component of an element of this dataset. Returns: - A nested structure of `tf.DType` objects corresponding to each component - of an element of this dataset. + A nested structure of `tf.DType` (or `tf.data.SparseType`) objects + corresponding to each `tf.Tensor` (or `tf.SparseTensor`) component of an + element of this dataset. """ raise NotImplementedError("Dataset.output_types") @@ -335,8 +339,8 @@ class Dataset(object): # pylint: disable=protected-access ret_arrays = [ script_ops.FuncRegistry._convert(ret, dtype=dtype.as_numpy_dtype) - for ret, dtype in zip(nest.flatten_up_to(output_types, values), - flattened_types) + for ret, dtype in zip( + nest.flatten_up_to(output_types, values), flattened_types) ] # pylint: enable=protected-access @@ -948,8 +952,8 @@ class SparseTensorSliceDataset(Dataset): rank = (indices_shape[1] - 1).merge_with(shape_shape[0] - 1) num_values = tensor_shape.Dimension(None) return (tensor_shape.TensorShape([num_values, rank]), - tensor_shape.TensorShape([num_values]), tensor_shape.TensorShape( - [rank])) + tensor_shape.TensorShape([num_values]), + tensor_shape.TensorShape([rank])) @property def output_types(self): @@ -992,15 +996,15 @@ class ZipDataset(Dataset): @property def output_shapes(self): - return nest.pack_sequence_as(self._datasets, [ - ds.output_shapes for ds in nest.flatten(self._datasets) - ]) + return nest.pack_sequence_as( + self._datasets, + [ds.output_shapes for ds in nest.flatten(self._datasets)]) @property def output_types(self): - return nest.pack_sequence_as(self._datasets, [ - ds.output_types for ds in nest.flatten(self._datasets) - ]) + return nest.pack_sequence_as( + self._datasets, + [ds.output_types for ds in nest.flatten(self._datasets)]) class ConcatenateDataset(Dataset): @@ -1027,7 +1031,8 @@ class ConcatenateDataset(Dataset): self._input_dataset._as_variant_tensor(), self._dataset_to_concatenate._as_variant_tensor(), output_shapes=nest.flatten(self.output_shapes), - output_types=nest.flatten(self.output_types)) + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types))) # pylint: enable=protected-access @property @@ -1062,7 +1067,8 @@ class RepeatDataset(Dataset): self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access count=self._count, output_shapes=nest.flatten(self.output_shapes), - output_types=nest.flatten(self.output_types)) + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types))) @property def output_shapes(self): @@ -1106,7 +1112,8 @@ class RangeDataset(Dataset): stop=self._stop, step=self._step, output_shapes=nest.flatten(self.output_shapes), - output_types=nest.flatten(self.output_types)) + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types))) @property def output_shapes(self): @@ -1132,7 +1139,8 @@ class CacheDataset(Dataset): self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access filename=self._filename, output_shapes=nest.flatten(self.output_shapes), - output_types=nest.flatten(self.output_types)) + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types))) @property def output_shapes(self): @@ -1146,7 +1154,10 @@ class CacheDataset(Dataset): class ShuffleDataset(Dataset): """A `Dataset` that randomly shuffles the elements of its input.""" - def __init__(self, input_dataset, buffer_size, seed=None, + def __init__(self, + input_dataset, + buffer_size, + seed=None, reshuffle_each_iteration=None): """See `Dataset.shuffle()` for details.""" super(ShuffleDataset, self).__init__() @@ -1176,7 +1187,8 @@ class ShuffleDataset(Dataset): seed2=self._seed2, reshuffle_each_iteration=self._reshuffle_each_iteration, output_shapes=nest.flatten(self.output_shapes), - output_types=nest.flatten(self.output_types)) + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types))) @property def output_shapes(self): @@ -1201,7 +1213,8 @@ class TakeDataset(Dataset): self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access count=self._count, output_shapes=nest.flatten(self.output_shapes), - output_types=nest.flatten(self.output_types)) + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types))) @property def output_shapes(self): @@ -1226,7 +1239,8 @@ class SkipDataset(Dataset): self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access count=self._count, output_shapes=nest.flatten(self.output_shapes), - output_types=nest.flatten(self.output_types)) + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types))) @property def output_shapes(self): @@ -1243,16 +1257,20 @@ class BatchDataset(Dataset): def __init__(self, input_dataset, batch_size): """See `Dataset.batch()` for details.""" super(BatchDataset, self).__init__() + if sparse.any_sparse(input_dataset.output_types): + # TODO(b/63669786): support batching of sparse tensors + raise TypeError("Batching of sparse tensors is not currently supported") self._input_dataset = input_dataset - self._batch_size = ops.convert_to_tensor(batch_size, dtype=dtypes.int64, - name="batch_size") + self._batch_size = ops.convert_to_tensor( + batch_size, dtype=dtypes.int64, name="batch_size") def _as_variant_tensor(self): return gen_dataset_ops.batch_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access batch_size=self._batch_size, output_shapes=nest.flatten(self.output_shapes), - output_types=nest.flatten(self.output_types)) + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types))) @property def output_shapes(self): @@ -1312,11 +1330,15 @@ class PaddedBatchDataset(Dataset): def __init__(self, input_dataset, batch_size, padded_shapes, padding_values): """See `Dataset.batch()` for details.""" super(PaddedBatchDataset, self).__init__() + if sparse.any_sparse(input_dataset.output_types): + # TODO(b/63669786): support batching of sparse tensors + raise TypeError("Batching of sparse tensors is not currently supported") self._input_dataset = input_dataset - self._batch_size = ops.convert_to_tensor(batch_size, dtype=dtypes.int64, - name="batch_size") - padding_values = (padding_values if padding_values is not None else - self._default_padding(input_dataset)) + self._batch_size = ops.convert_to_tensor( + batch_size, dtype=dtypes.int64, name="batch_size") + padding_values = ( + padding_values + if padding_values is not None else self._default_padding(input_dataset)) self._padded_shapes = nest.map_structure_up_to( input_dataset.output_shapes, _partial_shape_to_tensor, padded_shapes) self._padding_values = nest.map_structure_up_to( @@ -1374,7 +1396,8 @@ class MapDataset(Dataset): self._output_shapes = None self._output_types = None - @function.Defun(*nest.flatten(input_dataset.output_types)) + @function.Defun( + *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types))) def tf_map_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the input_dataset. @@ -1382,7 +1405,8 @@ class MapDataset(Dataset): arg.set_shape(shape) nested_args = nest.pack_sequence_as(input_dataset.output_types, args) - + nested_args = sparse.deserialize_sparse_tensors( + nested_args, input_dataset.output_types) if _should_unpack_args(nested_args): ret = map_func(*nested_args) else: @@ -1401,14 +1425,17 @@ class MapDataset(Dataset): if isinstance(ret, list): ret = tuple(ret) - # Extract shape information from the returned values. - flattened_ret = [ops.convert_to_tensor(t) for t in nest.flatten(ret)] + # Identify components that hold sparse tensor values. + types = sparse.get_sparse_types(ret) + # Serialize any sparse tensors and convert result to tensors. + ret = nest.pack_sequence_as(ret, [ + ops.convert_to_tensor(t) + for t in nest.flatten(sparse.serialize_sparse_tensors(ret)) + ]) self._output_shapes = nest.pack_sequence_as( - ret, [t.get_shape() for t in flattened_ret]) - self._output_types = nest.pack_sequence_as( - ret, [t.dtype for t in flattened_ret]) - - return flattened_ret + types, [t.get_shape() for t in nest.flatten(ret)]) + self._output_types = sparse.wrap_sparse_types(ret, types) + return nest.flatten(ret) self._map_func = tf_map_func self._map_func.add_to_graph(ops.get_default_graph()) @@ -1419,7 +1446,8 @@ class MapDataset(Dataset): input_t, self._map_func.captured_inputs, f=self._map_func, - output_types=nest.flatten(self.output_types), + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types)), output_shapes=nest.flatten(self.output_shapes)) @property @@ -1449,7 +1477,8 @@ class ParallelMapDataset(MapDataset): self._map_func.captured_inputs, f=self._map_func, num_parallel_calls=self._num_parallel_calls, - output_types=nest.flatten(self.output_types), + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types)), output_shapes=nest.flatten(self.output_shapes)) # pylint: enable=protected-access @@ -1462,7 +1491,8 @@ class FlatMapDataset(Dataset): super(FlatMapDataset, self).__init__() self._input_dataset = input_dataset - @function.Defun(*nest.flatten(input_dataset.output_types)) + @function.Defun( + *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types))) def tf_map_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the input_dataset. @@ -1470,7 +1500,8 @@ class FlatMapDataset(Dataset): arg.set_shape(shape) nested_args = nest.pack_sequence_as(input_dataset.output_types, args) - + nested_args = sparse.deserialize_sparse_tensors( + nested_args, input_dataset.output_types) if _should_unpack_args(nested_args): dataset = map_func(*nested_args) else: @@ -1492,7 +1523,8 @@ class FlatMapDataset(Dataset): self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access self._map_func.captured_inputs, f=self._map_func, - output_types=nest.flatten(self.output_types), + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types)), output_shapes=nest.flatten(self.output_shapes)) @property @@ -1513,7 +1545,8 @@ class InterleaveDataset(Dataset): super(InterleaveDataset, self).__init__() self._input_dataset = input_dataset - @function.Defun(*nest.flatten(input_dataset.output_types)) + @function.Defun( + *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types))) def tf_map_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the input_dataset. @@ -1521,7 +1554,8 @@ class InterleaveDataset(Dataset): arg.set_shape(shape) nested_args = nest.pack_sequence_as(input_dataset.output_types, args) - + nested_args = sparse.deserialize_sparse_tensors( + nested_args, input_dataset.output_types) if _should_unpack_args(nested_args): dataset = map_func(*nested_args) else: @@ -1538,10 +1572,10 @@ class InterleaveDataset(Dataset): self._map_func = tf_map_func self._map_func.add_to_graph(ops.get_default_graph()) - self._cycle_length = ops.convert_to_tensor(cycle_length, dtype=dtypes.int64, - name="cycle_length") - self._block_length = ops.convert_to_tensor(block_length, dtype=dtypes.int64, - name="block_length") + self._cycle_length = ops.convert_to_tensor( + cycle_length, dtype=dtypes.int64, name="cycle_length") + self._block_length = ops.convert_to_tensor( + block_length, dtype=dtypes.int64, name="block_length") def _as_variant_tensor(self): return gen_dataset_ops.interleave_dataset( @@ -1550,7 +1584,8 @@ class InterleaveDataset(Dataset): self._cycle_length, self._block_length, f=self._map_func, - output_types=nest.flatten(self.output_types), + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types)), output_shapes=nest.flatten(self.output_shapes)) @property @@ -1570,7 +1605,8 @@ class FilterDataset(Dataset): super(FilterDataset, self).__init__() self._input_dataset = input_dataset - @function.Defun(*nest.flatten(input_dataset.output_types)) + @function.Defun( + *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types))) def tf_predicate(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the input_dataset. @@ -1578,7 +1614,8 @@ class FilterDataset(Dataset): arg.set_shape(shape) nested_args = nest.pack_sequence_as(input_dataset.output_types, args) - + nested_args = sparse.deserialize_sparse_tensors( + nested_args, input_dataset.output_types) if _should_unpack_args(nested_args): ret = predicate(*nested_args) else: @@ -1599,7 +1636,8 @@ class FilterDataset(Dataset): self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access other_arguments=self._predicate.captured_inputs, predicate=self._predicate, - output_types=nest.flatten(self.output_types), + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types)), output_shapes=nest.flatten(self.output_shapes)) @property @@ -1618,15 +1656,16 @@ class PrefetchDataset(Dataset): """See `Dataset.prefetch()` for details.""" super(PrefetchDataset, self).__init__() self._input_dataset = input_dataset - self._buffer_size = ops.convert_to_tensor(buffer_size, dtype=dtypes.int64, - name="buffer_size") + self._buffer_size = ops.convert_to_tensor( + buffer_size, dtype=dtypes.int64, name="buffer_size") def _as_variant_tensor(self): return gen_dataset_ops.prefetch_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access buffer_size=self._buffer_size, output_shapes=nest.flatten(self.output_shapes), - output_types=nest.flatten(self.output_types)) + output_types=nest.flatten( + sparse.unwrap_sparse_types(self.output_types))) @property def output_shapes(self): diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py index b538caf281..987a9b53ad 100644 --- a/tensorflow/python/data/ops/iterator_ops.py +++ b/tensorflow/python/data/ops/iterator_ops.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function from tensorflow.python.data.util import nest +from tensorflow.python.data.util import sparse from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape @@ -40,8 +41,9 @@ class Iterator(object): iterator. initializer: A `tf.Operation` that should be run to initialize this iterator. - output_types: A nested structure of `tf.DType` objects corresponding to - each component of an element of this iterator. + output_types: A nested structure of `tf.DType` (or `tf.data.SparseType`) + objects corresponding to each `tf.Tensor` (or `tf.SparseTensor`) + component of an element of this dataset. output_shapes: A nested structure of `tf.TensorShape` objects corresponding to each component of an element of this dataset. """ @@ -100,8 +102,9 @@ class Iterator(object): ``` Args: - output_types: A nested structure of `tf.DType` objects corresponding to - each component of an element of this iterator. + output_types: A nested structure of `tf.DType` (or `tf.data.SparseType`) + objects corresponding to each `tf.Tensor` (or `tf.SparseTensor`) + component of an element of this dataset. output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects corresponding to each component of an element of this dataset. If omitted, each component will have an unconstrainted shape. @@ -129,7 +132,7 @@ class Iterator(object): iterator_resource = gen_dataset_ops.iterator( container="", shared_name=shared_name, - output_types=nest.flatten(output_types), + output_types=nest.flatten(sparse.unwrap_sparse_types(output_types)), output_shapes=nest.flatten(output_shapes)) return Iterator(iterator_resource, None, output_types, output_shapes) @@ -167,8 +170,9 @@ class Iterator(object): Args: string_handle: A scalar `tf.Tensor` of type `tf.string` that evaluates to a handle produced by the `Iterator.string_handle()` method. - output_types: A nested structure of `tf.DType` objects corresponding to - each component of an element of this iterator. + output_types: A nested structure of `tf.DType` (or `tf.data.SparseType`) + objects corresponding to each `tf.Tensor` (or `tf.SparseTensor`) + component of an element of this dataset. output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects corresponding to each component of an element of this dataset. If omitted, each component will have an unconstrainted shape. @@ -187,7 +191,7 @@ class Iterator(object): string_handle = ops.convert_to_tensor(string_handle, dtype=dtypes.string) iterator_resource = gen_dataset_ops.iterator_from_string_handle( string_handle, - output_types=nest.flatten(output_types), + output_types=nest.flatten(sparse.unwrap_sparse_types(output_types)), output_shapes=nest.flatten(output_shapes)) return Iterator(iterator_resource, None, output_types, output_shapes) @@ -252,13 +256,16 @@ class Iterator(object): Returns: A nested structure of `tf.Tensor` objects. """ - return nest.pack_sequence_as( - self._output_types, - gen_dataset_ops.iterator_get_next( - self._iterator_resource, - output_types=nest.flatten(self._output_types), - output_shapes=nest.flatten(self._output_shapes), - name=name)) + return sparse.deserialize_sparse_tensors( + nest.pack_sequence_as(self._output_types, + gen_dataset_ops.iterator_get_next( + self._iterator_resource, + output_types=nest.flatten( + sparse.unwrap_sparse_types( + self._output_types)), + output_shapes=nest.flatten( + self._output_shapes), + name=name)), self._output_types) def string_handle(self, name=None): """Returns a string-valued `tf.Tensor` that represents this iterator. @@ -290,7 +297,8 @@ class Iterator(object): """Returns the type of each component of an element of this iterator. Returns: - A nested structure of `tf.DType` objects corresponding to each component - of an element of this iterator. + A nested structure of `tf.DType` (or `tf.data.SparseType`) objects + corresponding to each `tf.Tensor` (or `tf.SparseTensor`) component of an + element of this dataset. """ return self._output_types diff --git a/tensorflow/python/data/util/BUILD b/tensorflow/python/data/util/BUILD index a2b80590ba..41d8513b16 100644 --- a/tensorflow/python/data/util/BUILD +++ b/tensorflow/python/data/util/BUILD @@ -31,6 +31,34 @@ py_test( ], ) +py_library( + name = "sparse", + srcs = ["sparse.py"], + srcs_version = "PY2AND3", + deps = [ + ":nest", + "//tensorflow/python:dtypes", + "//tensorflow/python:sparse_ops", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python:util", + "@six_archive//:six", + ], +) + +py_test( + name = "sparse_test", + size = "small", + srcs = ["sparse_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":nest", + ":sparse", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", + "//tensorflow/python:sparse_tensor", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/python/data/util/sparse.py b/tensorflow/python/data/util/sparse.py new file mode 100644 index 0000000000..673fac095c --- /dev/null +++ b/tensorflow/python/data/util/sparse.py @@ -0,0 +1,163 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Python dataset sparse tensor utility functitons.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.data.util import nest +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import sparse_ops + + +def any_sparse(types): + """Checks for sparse tensor types. + + Args: + types: a structure with tensor types. + + Returns: + `True` if `types` contains a sparse tensor type and `False` otherwise. + """ + return any([isinstance(ty, SparseType) for ty in nest.flatten(types)]) + + +def deserialize_sparse_tensors(tensors, types): + """Deserializes sparse tensors. + + Args: + tensors: a structure of tensors to deserialize. + types: a structure object the holds information about which tensors in + `tensors` represent serialized sparse tensors + + Returns: + `tensors` with any serialized sparse tensors replaced by their deserialized + version. + """ + # TODO(b/63669786): support batching of sparse tensors + ret = nest.pack_sequence_as(types, [ + sparse_ops.deserialize_sparse(tensor, ty.dtype) + if isinstance(ty, SparseType) else tensor + for (tensor, ty) in zip(nest.flatten(tensors), nest.flatten(types)) + ]) + return ret + + +def get_sparse_types(tensors): + """Gets sparse types for a structure of tensors. + + Args: + tensors: the tensor structure to get sparse types for. + + Returns: + a structure matching the nested structure of `tensors`, containing + `SparseType` at positions where `tensors` contains a sparse tensor and + `None` otherwise + """ + return nest.pack_sequence_as(tensors, [ + SparseType(tensor.dtype) + if isinstance(tensor, sparse_tensor.SparseTensor) else None + for tensor in nest.flatten(tensors) + ]) + + +def serialize_sparse_tensors(tensors): + """Serializes sparse tensors. + + Args: + tensors: a tensor structure to serialize. + + Returns: + `tensors` with any sparse tensors replaced by the their serialized version. + """ + + ret = nest.pack_sequence_as(tensors, [ + sparse_ops.serialize_sparse(tensor) + if isinstance(tensor, sparse_tensor.SparseTensor) else tensor + for tensor in nest.flatten(tensors) + ]) + return ret + + +def unwrap_sparse_types(types): + """Unwraps sparse tensor types as `dtypes.string`. + + Args: + types: a structure of types to unwrap. + + Returns: + a structure matching the nested structure of `types`, containing + `dtypes.string` at positions where `types` contains a sparse tensor and + matching contents of `types` otherwise + """ + ret = nest.pack_sequence_as(types, [ + dtypes.string if isinstance(ty, SparseType) else ty + for ty in nest.flatten(types) + ]) + return ret + + +def wrap_sparse_types(tensors, types): + """Wraps sparse tensor types in `SparseType`. + + Args: + tensors: a structure of tensors for which to wrap types. + types: a structure that holds information about which tensors in + `tensors` represent serialized sparse tensors + + Returns: + a structure matching the nested structure of `tensors`, containing + `SparseType` at positions where `tensors` contains a sparse tensor and + `DType` otherwise + """ + ret = nest.pack_sequence_as(types, [ + tensor.dtype if ty is None else ty + for tensor, ty in zip(nest.flatten(tensors), nest.flatten(types)) + ]) + return ret + + +class SparseType(object): + """Wrapper class for representing types of sparse tensors in tf.data.""" + + def __init__(self, dtype): + """Creates a new instace of `SparseType`. + + Args: + dtype: the sparse tensor type to wrap. + """ + self._dtype = dtype + + def __repr__(self): + return "SparseType({0!r})".format(self._dtype) + + def __eq__(self, other): + """Returns `True` iff `self == other`.""" + if not isinstance(other, SparseType): + return False + return self._dtype == other.dtype + + def __ne__(self, other): + """Returns `True` iff `self != other`.""" + return not self.__eq__(other) + + def __hash__(self): + return self._dtype.__hash__() + + @property + def dtype(self): + """Returns the wrapped sparse tensor type.""" + return self._dtype diff --git a/tensorflow/python/data/util/sparse_test.py b/tensorflow/python/data/util/sparse_test.py new file mode 100644 index 0000000000..e30ed639c2 --- /dev/null +++ b/tensorflow/python/data/util/sparse_test.py @@ -0,0 +1,141 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for utilities working with arbitrarily nested structures.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.data.util import nest +from tensorflow.python.data.util import sparse +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.platform import test + + +class SparseTest(test.TestCase): + + def testAnySparse(self): + test_cases = ( + ((), False), + ((None), False), + ((dtypes.string), False), + ((None, -1, dtypes.string), False), + ((sparse.SparseType(dtypes.string)), True), + ((None, sparse.SparseType(dtypes.string)), True), + ((sparse.SparseType(dtypes.string), dtypes.string), True), + ((((sparse.SparseType(dtypes.string)))), True) + ) + for test_case in test_cases: + self.assertEqual(sparse.any_sparse(test_case[0]), test_case[1]) + + def assertSparseValuesEqual(self, a, b): + if not isinstance(a, sparse_tensor.SparseTensor): + self.assertFalse(isinstance(b, sparse_tensor.SparseTensor)) + self.assertEqual(a, b) + return + self.assertTrue(isinstance(b, sparse_tensor.SparseTensor)) + with self.test_session(): + self.assertAllEqual(a.eval().indices, b.eval().indices) + self.assertAllEqual(a.eval().values, b.eval().values) + self.assertAllEqual(a.eval().dense_shape, b.eval().dense_shape) + + def testSerializeDeserialize(self): + test_cases = ( + (), + sparse_tensor.SparseTensor( + indices=[[0, 0]], values=[1], dense_shape=[1, 1]), + sparse_tensor.SparseTensor( + indices=[[3, 4]], values=[-1], dense_shape=[4, 5]), + sparse_tensor.SparseTensor( + indices=[[0, 0], [3, 4]], values=[1, -1], dense_shape=[4, 5]), + (sparse_tensor.SparseTensor( + indices=[[0, 0]], values=[1], dense_shape=[1, 1])), + (sparse_tensor.SparseTensor( + indices=[[0, 0]], values=[1], dense_shape=[1, 1]), ()), + ((), sparse_tensor.SparseTensor( + indices=[[0, 0]], values=[1], dense_shape=[1, 1])), + ) + for expected in test_cases: + actual = sparse.deserialize_sparse_tensors( + sparse.serialize_sparse_tensors(expected), + sparse.get_sparse_types(expected)) + nest.assert_same_structure(expected, actual) + for a, e in zip(nest.flatten(actual), nest.flatten(expected)): + self.assertSparseValuesEqual(a, e) + + def testGetSparseTypes(self): + s = sparse_tensor.SparseTensor( + indices=[[0, 0]], values=[1], dense_shape=[1, 1]) + t = sparse.SparseType(dtypes.int32) + test_cases = ( + ((), ()), + (s, t), + ((s), (t)), + ((s, ()), (t, ())), + (((), s), ((), t)), + ) + for test_case in test_cases: + self.assertEqual(sparse.get_sparse_types(test_case[0]), test_case[1]) + + def testWrapSparseTypes(self): + c = constant_op.constant([1]) + d = dtypes.int32 + s = sparse_tensor.SparseTensor( + indices=[[0, 0]], values=[1], dense_shape=[1, 1]) + t = sparse.SparseType(dtypes.int32) + test_cases = ( + ((), ()), + (s, t), + (c, d), + ((s), (t)), + ((c), (d)), + ((s, ()), (t, ())), + (((), s), ((), t)), + ((c, ()), (d, ())), + (((), c), ((), d)), + ((s, (), c), (t, (), d)), + (((), s, ()), ((), t, ())), + (((), c, ()), ((), d, ())), + ) + for test_case in test_cases: + self.assertEqual( + sparse.wrap_sparse_types(test_case[0], sparse.get_sparse_types( + test_case[0])), test_case[1]) + + def testUnwrapSparseTypes(self): + d = dtypes.string + t = sparse.SparseType(dtypes.int32) + test_cases = ( + ((), ()), + (t, d), + (d, d), + ((t), (d)), + ((d), (d)), + ((t, ()), (d, ())), + (((), t), ((), d)), + ((d, ()), (d, ())), + (((), d), ((), d)), + ((t, (), d), (d, (), d)), + (((), t, ()), ((), d, ())), + (((), d, ()), ((), d, ())), + ) + for test_case in test_cases: + self.assertEqual(sparse.unwrap_sparse_types(test_case[0]), test_case[1]) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 8d6f863a4c..d3fa5cb778 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2852,6 +2852,7 @@ tf_py_test( "//tensorflow/python:errors", "//tensorflow/python:functional_ops", "//tensorflow/python:math_ops", + "//tensorflow/python:sparse_tensor", "//tensorflow/python/data/ops:dataset_ops", ], ) @@ -2862,11 +2863,11 @@ tf_py_test( srcs = ["flat_map_dataset_op_test.py"], additional_deps = [ "//third_party/py/numpy", - "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", - "//tensorflow/python:dtypes", "//tensorflow/python:errors", "//tensorflow/python:session", + "//tensorflow/python:sparse_ops", + "//tensorflow/python:sparse_tensor", "//tensorflow/python:training", "//tensorflow/python/data/ops:dataset_ops", ], @@ -2900,6 +2901,23 @@ tf_py_test( ], ) +tf_py_test( + name = "interleave_dataset_op_test", + size = "small", + srcs = ["interleave_dataset_op_test.py"], + additional_deps = [ + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python:session", + "//tensorflow/python:sparse_ops", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python:training", + "//tensorflow/python/data/ops:dataset_ops", + ], +) + tf_py_test( name = "map_dataset_op_test", size = "small", @@ -2917,6 +2935,8 @@ tf_py_test( "//tensorflow/python:math_ops", "//tensorflow/python:random_ops", "//tensorflow/python:script_ops", + "//tensorflow/python:sparse_ops", + "//tensorflow/python:sparse_tensor", "//tensorflow/python:string_ops", "//tensorflow/python:variable_scope", "//tensorflow/python/data/ops:dataset_ops", diff --git a/tensorflow/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/python/kernel_tests/batch_dataset_op_test.py index 7cffa861ca..236c5bc4ff 100644 --- a/tensorflow/python/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/python/kernel_tests/batch_dataset_op_test.py @@ -25,6 +25,7 @@ from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops @@ -100,6 +101,14 @@ class BatchDatasetTest(test.TestCase): with self.assertRaises(errors.InvalidArgumentError): sess.run(init_op, feed_dict={count: 14, batch_size: 0}) + def testBatchSparseError(self): + def _map_fn(i): + return sparse_tensor.SparseTensor( + indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i + + with self.assertRaises(TypeError): + _ = dataset_ops.Dataset.range(10).map(_map_fn).batch(10) + def testPaddedBatchDataset(self): seq_lens = array_ops.placeholder(dtypes.int32, shape=[None]) padded_shape = array_ops.placeholder(dtypes.int64, shape=[1]) @@ -225,6 +234,14 @@ class BatchDatasetTest(test.TestCase): self.assertEqual([None, None, None], dataset.output_shapes[1].as_list()) self.assertEqual([None, 37], dataset.output_shapes[2].as_list()) + def testPaddedBatchSparseError(self): + def _map_fn(i): + return sparse_tensor.SparseTensor( + indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i + + with self.assertRaises(TypeError): + _ = dataset_ops.Dataset.range(10).map(_map_fn).padded_batch(10) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/kernel_tests/filter_dataset_op_test.py index 489c0375f9..6eb445445f 100644 --- a/tensorflow/python/kernel_tests/filter_dataset_op_test.py +++ b/tensorflow/python/kernel_tests/filter_dataset_op_test.py @@ -22,6 +22,7 @@ import numpy as np from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import functional_ops from tensorflow.python.ops import math_ops @@ -124,6 +125,36 @@ class FilterDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def assertSparseValuesEqual(self, a, b): + self.assertAllEqual(a.indices, b.indices) + self.assertAllEqual(a.values, b.values) + self.assertAllEqual(a.dense_shape, b.dense_shape) + + def testSparse(self): + def _map_fn(i): + return sparse_tensor.SparseTensor( + indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i + + def _filter_fn(_, i): + return math_ops.equal(i % 2, 0) + + iterator = ( + dataset_ops.Dataset.range(10).map(_map_fn).filter(_filter_fn).map( + lambda x, i: x).make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + for i in range(5): + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensor( + indices=[[0, 0]], values=[i*2], dense_shape=[1, 1]) + self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) + self.assertSparseValuesEqual(actual, expected.eval()) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/flat_map_dataset_op_test.py b/tensorflow/python/kernel_tests/flat_map_dataset_op_test.py index 76d568a0d9..895f36382a 100644 --- a/tensorflow/python/kernel_tests/flat_map_dataset_op_test.py +++ b/tensorflow/python/kernel_tests/flat_map_dataset_op_test.py @@ -17,16 +17,15 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import itertools import random import numpy as np from tensorflow.python.client import session from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors -from tensorflow.python.ops import array_ops +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import sparse_ops from tensorflow.python.platform import test from tensorflow.python.training import server_lib @@ -123,154 +122,29 @@ class FlatMapDatasetTest(test.TestCase): sess.run(get_next) # pylint: enable=g-long-lambda + def testSparse(self): + def _map_fn(i): + return sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 1]], values=(i * [1, -1]), dense_shape=[2, 2]) -class InterleaveDatasetTest(test.TestCase): + def _flat_map_fn(x): + return dataset_ops.Dataset.from_tensor_slices( + sparse_ops.sparse_to_dense(x.indices, x.dense_shape, x.values)) - def _interleave(self, lists, cycle_length, block_length): - num_open = 0 - - # `all_iterators` acts as a queue of iterators over each element of `lists`. - all_iterators = [iter(l) for l in lists] - - # `open_iterators` are the iterators whose elements are currently being - # interleaved. - open_iterators = [] - for i in range(cycle_length): - if all_iterators: - open_iterators.append(all_iterators.pop(0)) - num_open += 1 - else: - open_iterators.append(None) - - while num_open or all_iterators: - for i in range(cycle_length): - if open_iterators[i] is None: - if all_iterators: - open_iterators[i] = all_iterators.pop(0) - num_open += 1 - else: - continue - for _ in range(block_length): - try: - yield next(open_iterators[i]) - except StopIteration: - open_iterators[i] = None - num_open -= 1 - break - - def testPythonImplementation(self): - input_lists = [[4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6], - [4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6]] - - # Cycle length 1 acts like `Dataset.flat_map()`. - expected_elements = itertools.chain(*input_lists) - for expected, produced in zip( - expected_elements, self._interleave(input_lists, 1, 1)): - self.assertEqual(expected, produced) - - # Cycle length > 1. - expected_elements = [4, 5, 4, 5, 4, 5, 4, - 5, 5, 6, 6, # NOTE(mrry): When we cycle back - # to a list and are already at - # the end of that list, we move - # on to the next element. - 4, 6, 4, 6, 4, 6, 4, 6, 5, 6, 5, 6, 5, 6, 5, 6, 5] - for expected, produced in zip( - expected_elements, self._interleave(input_lists, 2, 1)): - self.assertEqual(expected, produced) - - # Cycle length > 1 and block length > 1. - expected_elements = [4, 4, 4, 5, 5, 5, 4, 5, 5, 6, 6, 6, 4, 4, 4, 6, 6, 6, - 4, 5, 5, 5, 6, 6, 6, 5, 5, 6, 6, 6] - for expected, produced in zip( - expected_elements, self._interleave(input_lists, 2, 3)): - self.assertEqual(expected, produced) - - # Cycle length > len(input_values). - expected_elements = [4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6, 6, - 4, 4, 5, 5, 6, 6, 5, 6, 6, 5, 6, 6] - for expected, produced in zip( - expected_elements, self._interleave(input_lists, 7, 2)): - self.assertEqual(expected, produced) - - def testInterleaveDataset(self): - input_values = array_ops.placeholder(dtypes.int64, shape=[None]) - cycle_length = array_ops.placeholder(dtypes.int64, shape=[]) - block_length = array_ops.placeholder(dtypes.int64, shape=[]) - - repeat_count = 2 - - dataset = ( - dataset_ops.Dataset.from_tensor_slices(input_values) - .repeat(repeat_count) - .interleave(lambda x: dataset_ops.Dataset.from_tensors(x).repeat(x), - cycle_length, block_length)) - iterator = dataset.make_initializable_iterator() + iterator = ( + dataset_ops.Dataset.range(10).map(_map_fn).flat_map(_flat_map_fn) + .make_initializable_iterator()) init_op = iterator.initializer - next_element = iterator.get_next() + get_next = iterator.get_next() with self.test_session() as sess: - # Cycle length 1 acts like `Dataset.flat_map()`. - sess.run(init_op, feed_dict={input_values: [4, 5, 6], - cycle_length: 1, block_length: 3}) - - for expected_element in self._interleave( - [[4] * 4, [5] * 5, [6] * 6] * repeat_count, 1, 3): - self.assertEqual(expected_element, sess.run(next_element)) - - # Cycle length > 1. - # expected: [4, 5, 4, 5, 4, 5, 4, 5, 5, 6, 6, 4, 6, 4, 6, 4, 6, 4, 6, 5, - # 6, 5, 6, 5, 6, 5, 6, 5] - sess.run(init_op, feed_dict={input_values: [4, 5, 6], - cycle_length: 2, block_length: 1}) - for expected_element in self._interleave( - [[4] * 4, [5] * 5, [6] * 6] * repeat_count, 2, 1): - self.assertEqual(expected_element, sess.run(next_element)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - # Cycle length > 1 and block length > 1. - # expected: [4, 4, 4, 5, 5, 5, 4, 5, 5, 6, 6, 6, 4, 4, 4, 6, 6, 6, 4, 5, - # 5, 5, 6, 6, 6, 5, 5, 6, 6, 6] - sess.run(init_op, feed_dict={input_values: [4, 5, 6], - cycle_length: 2, block_length: 3}) - for expected_element in self._interleave( - [[4] * 4, [5] * 5, [6] * 6] * repeat_count, 2, 3): - self.assertEqual(expected_element, sess.run(next_element)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - # Cycle length > len(input_values) * repeat_count. - # expected: [4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6, 6, 4, 4, - # 5, 5, 6, 6, 5, 6, 6, 5, 6, 6] - sess.run(init_op, feed_dict={input_values: [4, 5, 6], - cycle_length: 7, block_length: 2}) - for expected_element in self._interleave( - [[4] * 4, [5] * 5, [6] * 6] * repeat_count, 7, 2): - self.assertEqual(expected_element, sess.run(next_element)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - # Empty input. - sess.run(init_op, feed_dict={input_values: [], - cycle_length: 2, block_length: 3}) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - # Non-empty input leading to empty output. - sess.run(init_op, feed_dict={input_values: [0, 0, 0], - cycle_length: 2, block_length: 3}) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - # Mixture of non-empty and empty interleaved datasets. - sess.run(init_op, feed_dict={input_values: [4, 0, 6], - cycle_length: 2, block_length: 3}) - for expected_element in self._interleave( - [[4] * 4, [], [6] * 6] * repeat_count, 2, 3): - self.assertEqual(expected_element, sess.run(next_element)) + sess.run(init_op) + for i in range(10): + for j in range(2): + expected = [i, 0] if j % 2 == 0 else [0, -i] + self.assertAllEqual(expected, sess.run(get_next)) with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) + sess.run(get_next) if __name__ == "__main__": diff --git a/tensorflow/python/kernel_tests/interleave_dataset_op_test.py b/tensorflow/python/kernel_tests/interleave_dataset_op_test.py new file mode 100644 index 0000000000..0a3c4af9e0 --- /dev/null +++ b/tensorflow/python/kernel_tests/interleave_dataset_op_test.py @@ -0,0 +1,205 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the experimental input pipeline ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import itertools + +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import sparse_ops +from tensorflow.python.platform import test + + +class InterleaveDatasetTest(test.TestCase): + + def _interleave(self, lists, cycle_length, block_length): + num_open = 0 + + # `all_iterators` acts as a queue of iterators over each element of `lists`. + all_iterators = [iter(l) for l in lists] + + # `open_iterators` are the iterators whose elements are currently being + # interleaved. + open_iterators = [] + for i in range(cycle_length): + if all_iterators: + open_iterators.append(all_iterators.pop(0)) + num_open += 1 + else: + open_iterators.append(None) + + while num_open or all_iterators: + for i in range(cycle_length): + if open_iterators[i] is None: + if all_iterators: + open_iterators[i] = all_iterators.pop(0) + num_open += 1 + else: + continue + for _ in range(block_length): + try: + yield next(open_iterators[i]) + except StopIteration: + open_iterators[i] = None + num_open -= 1 + break + + def testPythonImplementation(self): + input_lists = [[4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6], + [4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6]] + + # Cycle length 1 acts like `Dataset.flat_map()`. + expected_elements = itertools.chain(*input_lists) + for expected, produced in zip( + expected_elements, self._interleave(input_lists, 1, 1)): + self.assertEqual(expected, produced) + + # Cycle length > 1. + expected_elements = [4, 5, 4, 5, 4, 5, 4, + 5, 5, 6, 6, # NOTE(mrry): When we cycle back + # to a list and are already at + # the end of that list, we move + # on to the next element. + 4, 6, 4, 6, 4, 6, 4, 6, 5, 6, 5, 6, 5, 6, 5, 6, 5] + for expected, produced in zip( + expected_elements, self._interleave(input_lists, 2, 1)): + self.assertEqual(expected, produced) + + # Cycle length > 1 and block length > 1. + expected_elements = [4, 4, 4, 5, 5, 5, 4, 5, 5, 6, 6, 6, 4, 4, 4, 6, 6, 6, + 4, 5, 5, 5, 6, 6, 6, 5, 5, 6, 6, 6] + for expected, produced in zip( + expected_elements, self._interleave(input_lists, 2, 3)): + self.assertEqual(expected, produced) + + # Cycle length > len(input_values). + expected_elements = [4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6, 6, + 4, 4, 5, 5, 6, 6, 5, 6, 6, 5, 6, 6] + for expected, produced in zip( + expected_elements, self._interleave(input_lists, 7, 2)): + self.assertEqual(expected, produced) + + def testInterleaveDataset(self): + input_values = array_ops.placeholder(dtypes.int64, shape=[None]) + cycle_length = array_ops.placeholder(dtypes.int64, shape=[]) + block_length = array_ops.placeholder(dtypes.int64, shape=[]) + + repeat_count = 2 + + dataset = ( + dataset_ops.Dataset.from_tensor_slices(input_values) + .repeat(repeat_count) + .interleave(lambda x: dataset_ops.Dataset.from_tensors(x).repeat(x), + cycle_length, block_length)) + iterator = dataset.make_initializable_iterator() + init_op = iterator.initializer + next_element = iterator.get_next() + + with self.test_session() as sess: + # Cycle length 1 acts like `Dataset.flat_map()`. + sess.run(init_op, feed_dict={input_values: [4, 5, 6], + cycle_length: 1, block_length: 3}) + + for expected_element in self._interleave( + [[4] * 4, [5] * 5, [6] * 6] * repeat_count, 1, 3): + self.assertEqual(expected_element, sess.run(next_element)) + + # Cycle length > 1. + # expected: [4, 5, 4, 5, 4, 5, 4, 5, 5, 6, 6, 4, 6, 4, 6, 4, 6, 4, 6, 5, + # 6, 5, 6, 5, 6, 5, 6, 5] + sess.run(init_op, feed_dict={input_values: [4, 5, 6], + cycle_length: 2, block_length: 1}) + for expected_element in self._interleave( + [[4] * 4, [5] * 5, [6] * 6] * repeat_count, 2, 1): + self.assertEqual(expected_element, sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + # Cycle length > 1 and block length > 1. + # expected: [4, 4, 4, 5, 5, 5, 4, 5, 5, 6, 6, 6, 4, 4, 4, 6, 6, 6, 4, 5, + # 5, 5, 6, 6, 6, 5, 5, 6, 6, 6] + sess.run(init_op, feed_dict={input_values: [4, 5, 6], + cycle_length: 2, block_length: 3}) + for expected_element in self._interleave( + [[4] * 4, [5] * 5, [6] * 6] * repeat_count, 2, 3): + self.assertEqual(expected_element, sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + # Cycle length > len(input_values) * repeat_count. + # expected: [4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6, 6, 4, 4, + # 5, 5, 6, 6, 5, 6, 6, 5, 6, 6] + sess.run(init_op, feed_dict={input_values: [4, 5, 6], + cycle_length: 7, block_length: 2}) + for expected_element in self._interleave( + [[4] * 4, [5] * 5, [6] * 6] * repeat_count, 7, 2): + self.assertEqual(expected_element, sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + # Empty input. + sess.run(init_op, feed_dict={input_values: [], + cycle_length: 2, block_length: 3}) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + # Non-empty input leading to empty output. + sess.run(init_op, feed_dict={input_values: [0, 0, 0], + cycle_length: 2, block_length: 3}) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + # Mixture of non-empty and empty interleaved datasets. + sess.run(init_op, feed_dict={input_values: [4, 0, 6], + cycle_length: 2, block_length: 3}) + for expected_element in self._interleave( + [[4] * 4, [], [6] * 6] * repeat_count, 2, 3): + self.assertEqual(expected_element, sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + def testSparse(self): + def _map_fn(i): + return sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 1]], values=(i * [1, -1]), dense_shape=[2, 2]) + + def _interleave_fn(x): + return dataset_ops.Dataset.from_tensor_slices( + sparse_ops.sparse_to_dense(x.indices, x.dense_shape, x.values)) + + iterator = ( + dataset_ops.Dataset.range(10).map(_map_fn).interleave( + _interleave_fn, cycle_length=1).make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + for i in range(10): + for j in range(2): + expected = [i, 0] if j % 2 == 0 else [0, -i] + self.assertAllEqual(expected, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/kernel_tests/map_dataset_op_test.py b/tensorflow/python/kernel_tests/map_dataset_op_test.py index 757191363c..c6c36d133c 100644 --- a/tensorflow/python/kernel_tests/map_dataset_op_test.py +++ b/tensorflow/python/kernel_tests/map_dataset_op_test.py @@ -26,6 +26,7 @@ from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import functional_ops @@ -33,6 +34,7 @@ from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import script_ops +from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import string_ops from tensorflow.python.ops import variable_scope from tensorflow.python.platform import test @@ -542,6 +544,56 @@ class MapDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def assertSparseValuesEqual(self, a, b): + self.assertAllEqual(a.indices, b.indices) + self.assertAllEqual(a.values, b.values) + self.assertAllEqual(a.dense_shape, b.dense_shape) + + def testSparse(self): + def _sparse(i): + return sparse_tensor.SparseTensor( + indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]) + iterator = (dataset_ops.Dataset.range(10) + .map(_sparse) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + for i in range(10): + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensor( + indices=[[0, 0]], values=[i], dense_shape=[1, 1]) + self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) + self.assertSparseValuesEqual(actual, expected.eval()) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testSparseChain(self): + def _sparse(i): + return sparse_tensor.SparseTensor( + indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]) + def _check(i): + self.assertTrue(isinstance(i, sparse_tensor.SparseTensor)) + return sparse_ops.sparse_concat(0, [i, i]) + + iterator = (dataset_ops.Dataset.range(10) + .map(_sparse).map(_check) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + for i in range(10): + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 0]], values=[i, i], dense_shape=[2, 1]) + self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) + self.assertSparseValuesEqual(actual, expected.eval()) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) if __name__ == "__main__": test.main() diff --git a/tensorflow/python/ops/hidden_ops.txt b/tensorflow/python/ops/hidden_ops.txt index a0fff9e16c..f834d9002c 100644 --- a/tensorflow/python/ops/hidden_ops.txt +++ b/tensorflow/python/ops/hidden_ops.txt @@ -354,6 +354,7 @@ DestroyTemporaryVariable AddSparseToTensorsMap AddManySparseToTensorsMap TakeManySparseFromTensorsMap +DeserializeSparse DeserializeManySparse SerializeManySparse SerializeSparse diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index 404041dfe1..2ef6a0015b 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -1434,6 +1434,30 @@ def serialize_many_sparse(sp_input, name=None): sp_input.indices, sp_input.values, sp_input.dense_shape, name=name) +def deserialize_sparse(serialized_sparse, dtype, rank=None, name=None): + """Deserialize `SparseTensor` from a string 3-vector (1-D `Tensor`) object. + + Args: + serialized_sparse: 1-D, The serialized `SparseTensor` object. + Must have 3 columns. + dtype: The `dtype` of the serialized `SparseTensor` object. + rank: (optional) Python int, the rank of the `SparseTensor` object. + name: A name prefix for the returned tensors (optional) + + Returns: + A `SparseTensor` representing the deserialized `SparseTensor` object. + + """ + output_indices, output_values, output_shape = ( + gen_sparse_ops._deserialize_sparse(serialized_sparse, dtype, name=name)) + + # Feed rank data back in, if available + output_indices.set_shape([None, rank]) + output_shape.set_shape([rank]) + + return sparse_tensor.SparseTensor(output_indices, output_values, output_shape) + + def deserialize_many_sparse(serialized_sparse, dtype, rank=None, name=None): """Deserialize and concatenate `SparseTensors` from a serialized minibatch. diff --git a/tensorflow/tools/api/golden/tensorflow.data.-sparse-type.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-sparse-type.pbtxt new file mode 100644 index 0000000000..b25f9a029f --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.data.-sparse-type.pbtxt @@ -0,0 +1,13 @@ +path: "tensorflow.data.SparseType" +tf_class { + is_instance: "" + is_instance: "" + member { + name: "dtype" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'dtype\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.data.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.pbtxt index 56fb270a49..b9f54a4d72 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.pbtxt @@ -12,6 +12,10 @@ tf_module { name: "Iterator" mtype: "" } + member { + name: "SparseType" + mtype: "" + } member { name: "TFRecordDataset" mtype: "" -- GitLab From 2ae9c6c7a20dbd8f05e4b60e921e60986e2968bf Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Mon, 13 Nov 2017 11:19:44 -0800 Subject: [PATCH 0343/1801] Fix a stray hyphen and add an SO link (#14465) * Fix a stray hyphen and add an SO link * Remove empty lines from table code --- tensorflow/docs_src/install/install_windows.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md index 4098ee5b2e..2e5d797958 100644 --- a/tensorflow/docs_src/install/install_windows.md +++ b/tensorflow/docs_src/install/install_windows.md @@ -84,7 +84,7 @@ install it now: * [Python 3.5.x 64-bit from python.org](https://www.python.org/downloads/release/python-352/) * [Python 3.6.x 64-bit from python.org](https://www.python.org/downloads/release/python-362/) --TensorFlow supports Python 3.5.x and 3.6.x on Windows. +TensorFlow supports Python 3.5.x and 3.6.x on Windows. Note that Python 3 comes with the pip3 package manager, which is the program you'll use to install TensorFlow. @@ -98,7 +98,6 @@ To install the GPU version of TensorFlow, enter the following command:
C:\> pip3 install --upgrade tensorflow-gpu
- ## Installing with Anaconda **The Anaconda installation is community supported, not officially supported.** @@ -219,6 +218,11 @@ ImportError: cannot import name 'descriptor' + +
38896424 + +
Could not find a version that satisfies the requirement tensorflow
+ + - -- GitLab From c4bb132f4b64ff6d5490fb5fe5225a95a94d2e47 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 11:17:04 -0800 Subject: [PATCH 0344/1801] Internal Change PiperOrigin-RevId: 175559457 --- tensorflow/contrib/lite/BUILD | 83 ------------------------------ tensorflow/contrib/lite/java/BUILD | 2 + 2 files changed, 2 insertions(+), 83 deletions(-) diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD index e0c674c295..96a9e281ad 100644 --- a/tensorflow/contrib/lite/BUILD +++ b/tensorflow/contrib/lite/BUILD @@ -184,89 +184,6 @@ cc_library( }), ) -cc_test( - name = "speech_hotword_model_test", - size = "small", - srcs = ["models/speech_hotword_model_test.cc"], - data = [ - "models/testdata/speech_hotword_model_in.csv", - "models/testdata/speech_hotword_model_out_rank1.csv", - "models/testdata/speech_hotword_model_out_rank2.csv", - "models/testdata/speech_hotword_model_rank1.tflite", - "models/testdata/speech_hotword_model_rank2.tflite", - ], - deps = [ - ":framework", - ":models_test_utils", - "//tensorflow/contrib/lite/kernels:builtin_ops", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest_main", - ], -) - -gen_selected_ops( - name = "speech_speakerid_ops", - model = "models/testdata/speech_speakerid_model.tflite", -) - -cc_test( - name = "speech_speakerid_model_test", - size = "small", - srcs = [ - "models/speech_speakerid_model_test.cc", - ":speech_speakerid_ops", - ], - data = [ - "models/testdata/speech_speakerid_model.tflite", - "models/testdata/speech_speakerid_model_in.csv", - "models/testdata/speech_speakerid_model_out.csv", - ], - deps = [ - ":framework", - ":models_test_utils", - "//tensorflow/contrib/lite/kernels:builtin_ops", - "//tensorflow/contrib/lite/tools:mutable_op_resolver", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "speech_terse_am_model_test", - size = "small", - srcs = ["models/speech_terse_am_model_test.cc"], - data = [ - "models/testdata/speech_terse_am_model.tflite", - "models/testdata/speech_terse_am_model_in.csv", - "models/testdata/speech_terse_am_model_out.csv", - ], - deps = [ - ":framework", - ":models_test_utils", - #"//file/base:path", - "//tensorflow/contrib/lite/kernels:builtin_ops", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "speech_tts_model_test", - size = "small", - srcs = ["models/speech_tts_model_test.cc"], - data = [ - "models/testdata/speech_tts_model.tflite", - "models/testdata/speech_tts_model_in.csv", - "models/testdata/speech_tts_model_out.csv", - ], - deps = [ - ":framework", - ":models_test_utils", - "//tensorflow/contrib/lite/kernels:builtin_ops", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest_main", - ], -) - filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/lite/java/BUILD b/tensorflow/contrib/lite/java/BUILD index b0d20bac20..1de28eb52d 100644 --- a/tensorflow/contrib/lite/java/BUILD +++ b/tensorflow/contrib/lite/java/BUILD @@ -58,6 +58,7 @@ java_test( javacopts = JAVACOPTS, test_class = "org.tensorflow.lite.TensorFlowLiteTest", deps = [ + ":libtensorflowlite_jni.so", ":tensorflowlitelib", "@com_google_truth", "@junit", @@ -71,6 +72,7 @@ java_test( javacopts = JAVACOPTS, test_class = "org.tensorflow.lite.DataTypeTest", deps = [ + ":libtensorflowlite_jni.so", ":tensorflowlitelib", "@com_google_truth", "@junit", -- GitLab From aefd2377aec2a970264e77a26aa2bcebdf4e2247 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 11:20:31 -0800 Subject: [PATCH 0345/1801] Update ops-related pbtxt files. PiperOrigin-RevId: 175559991 --- .../core/ops/compat/ops_history.v1.pbtxt | 23 ++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 27 +++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 60f67543f1..6833c8e0ea 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -9862,6 +9862,29 @@ op { type: "type" } } +op { + name: "DeserializeSparse" + input_arg { + name: "serialized_sparse" + type: DT_STRING + } + output_arg { + name: "sparse_indices" + type: DT_INT64 + } + output_arg { + name: "sparse_values" + type_attr: "dtype" + } + output_arg { + name: "sparse_shape" + type: DT_INT64 + } + attr { + name: "dtype" + type: "type" + } +} op { name: "DestroyResourceOp" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 2a74c20707..68fc61150c 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -7131,6 +7131,33 @@ op { summary: "Deserialize and concatenate `SparseTensors` from a serialized minibatch." description: "The input `serialized_sparse` must be a string matrix of shape `[N x 3]` where\n`N` is the minibatch size and the rows correspond to packed outputs of\n`SerializeSparse`. The ranks of the original `SparseTensor` objects\nmust all match. When the final `SparseTensor` is created, it has rank one\nhigher than the ranks of the incoming `SparseTensor` objects\n(they have been concatenated along a new row dimension).\n\nThe output `SparseTensor` object\'s shape values for all dimensions but the\nfirst are the max across the input `SparseTensor` objects\' shape values\nfor the corresponding dimensions. Its first shape value is `N`, the minibatch\nsize.\n\nThe input `SparseTensor` objects\' indices are assumed ordered in\nstandard lexicographic order. If this is not the case, after this\nstep run `SparseReorder` to restore index ordering.\n\nFor example, if the serialized input is a `[2 x 3]` matrix representing two\noriginal `SparseTensor` objects:\n\n index = [ 0]\n [10]\n [20]\n values = [1, 2, 3]\n shape = [50]\n\nand\n\n index = [ 2]\n [10]\n values = [4, 5]\n shape = [30]\n\nthen the final deserialized `SparseTensor` will be:\n\n index = [0 0]\n [0 10]\n [0 20]\n [1 2]\n [1 10]\n values = [1, 2, 3, 4, 5]\n shape = [2 50]" } +op { + name: "DeserializeSparse" + input_arg { + name: "serialized_sparse" + description: "1-D, The serialized `SparseTensor` object. Must have 3 columns." + type: DT_STRING + } + output_arg { + name: "sparse_indices" + type: DT_INT64 + } + output_arg { + name: "sparse_values" + type_attr: "dtype" + } + output_arg { + name: "sparse_shape" + type: DT_INT64 + } + attr { + name: "dtype" + type: "type" + description: "The `dtype` of the serialized `SparseTensor` object." + } + summary: "Deserialize `SparseTensor` from a (serialized) string 3-vector (1-D `Tensor`)" + description: "object." +} op { name: "DestroyResourceOp" input_arg { -- GitLab From 5be2065205be06e491be9d0676ee273f3aba218d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 11:24:47 -0800 Subject: [PATCH 0346/1801] Gauge API for monitoring. - Allowed value types are int64 and string. PiperOrigin-RevId: 175560740 --- tensorflow/core/BUILD | 3 + .../core/lib/monitoring/collected_metrics.h | 1 + .../core/lib/monitoring/collection_registry.h | 6 + .../monitoring/collection_registry_test.cc | 91 ++++++++ tensorflow/core/lib/monitoring/counter.h | 2 +- tensorflow/core/lib/monitoring/gauge.h | 215 ++++++++++++++++++ tensorflow/core/lib/monitoring/gauge_test.cc | 92 ++++++++ tensorflow/core/lib/monitoring/metric_def.h | 12 +- tensorflow/core/lib/monitoring/mobile_gauge.h | 72 ++++++ tensorflow/core/lib/monitoring/sampler.h | 5 +- 10 files changed, 493 insertions(+), 6 deletions(-) create mode 100644 tensorflow/core/lib/monitoring/gauge.h create mode 100644 tensorflow/core/lib/monitoring/gauge_test.cc create mode 100644 tensorflow/core/lib/monitoring/mobile_gauge.h diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 18d69fceb3..01ddbfc2d4 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -320,6 +320,7 @@ cc_library( "lib/io/table_options.h", "lib/math/math_util.h", "lib/monitoring/counter.h", + "lib/monitoring/gauge.h", "lib/monitoring/sampler.h", "lib/random/distribution_sampler.h", "lib/random/philox_random.h", @@ -1393,6 +1394,7 @@ LIB_INTERNAL_PUBLIC_HEADERS = tf_additional_lib_hdrs() + [ "lib/monitoring/collection_registry.h", "lib/monitoring/metric_def.h", "lib/monitoring/mobile_counter.h", + "lib/monitoring/mobile_gauge.h", "lib/monitoring/mobile_sampler.h", "lib/png/png_io.h", "lib/random/random.h", @@ -2369,6 +2371,7 @@ tf_cc_tests( "lib/math/math_util_test.cc", "lib/monitoring/collection_registry_test.cc", "lib/monitoring/counter_test.cc", + "lib/monitoring/gauge_test.cc", "lib/monitoring/metric_def_test.cc", "lib/monitoring/sampler_test.cc", "lib/random/distribution_sampler_test.cc", diff --git a/tensorflow/core/lib/monitoring/collected_metrics.h b/tensorflow/core/lib/monitoring/collected_metrics.h index 3dde55342e..fbef25619f 100644 --- a/tensorflow/core/lib/monitoring/collected_metrics.h +++ b/tensorflow/core/lib/monitoring/collected_metrics.h @@ -87,6 +87,7 @@ struct Point { // The actual metric value, dependent on the value_type enum. ValueType value_type; int64 int64_value; + string string_value; HistogramProto histogram_value; // start_timestamp and end_timestamp indicate the time period over which this diff --git a/tensorflow/core/lib/monitoring/collection_registry.h b/tensorflow/core/lib/monitoring/collection_registry.h index 2eff468436..030f8e360a 100644 --- a/tensorflow/core/lib/monitoring/collection_registry.h +++ b/tensorflow/core/lib/monitoring/collection_registry.h @@ -218,6 +218,12 @@ inline void CollectValue(const int64& value, Point* const point) { point->int64_value = value; } +template <> +inline void CollectValue(const string& value, Point* const point) { + point->value_type = ValueType::kString; + point->string_value = value; +} + template <> inline void CollectValue(const HistogramProto& value, Point* const point) { point->value_type = ValueType::kHistogram; diff --git a/tensorflow/core/lib/monitoring/collection_registry_test.cc b/tensorflow/core/lib/monitoring/collection_registry_test.cc index 5b9c100690..ca25f508da 100644 --- a/tensorflow/core/lib/monitoring/collection_registry_test.cc +++ b/tensorflow/core/lib/monitoring/collection_registry_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/core/lib/monitoring/collection_registry.h" #include "tensorflow/core/lib/monitoring/counter.h" +#include "tensorflow/core/lib/monitoring/gauge.h" #include "tensorflow/core/lib/monitoring/sampler.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/protobuf.h" @@ -176,6 +177,96 @@ TEST(CollectMetricsTest, Counter) { } } +TEST(CollectMetricsTest, Gauge) { + auto string_gauge_with_labels = + std::unique_ptr>(Gauge::New( + "/tensorflow/test/string_gauge_with_labels", + "String gauge with labels.", "MyLabel0", "MyLabel1")); + auto inteter_gauge_without_labels = std::unique_ptr>( + Gauge::New("/tensorflow/test/integer_gauge_without_labels", + "Integer gauge without labels.")); + + string_gauge_with_labels->GetCell("Label00", "Label10")->Set("test1"); + string_gauge_with_labels->GetCell("Label01", "Label11")->Set("test2"); + inteter_gauge_without_labels->GetCell()->Set(7); + + for (const bool collect_metric_descriptors : {true, false}) { + SCOPED_TRACE(strings::StrCat("collect_metric_descriptors: ", + collect_metric_descriptors)); + + auto* collection_registry = CollectionRegistry::Default(); + CollectionRegistry::CollectMetricsOptions options; + options.collect_metric_descriptors = collect_metric_descriptors; + const std::unique_ptr collected_metrics = + collection_registry->CollectMetrics(options); + + if (collect_metric_descriptors) { + ASSERT_EQ(2, collected_metrics->metric_descriptor_map.size()); + + const MetricDescriptor& ld = *collected_metrics->metric_descriptor_map.at( + "/tensorflow/test/string_gauge_with_labels"); + EXPECT_EQ("/tensorflow/test/string_gauge_with_labels", ld.name); + EXPECT_EQ("String gauge with labels.", ld.description); + ASSERT_EQ(2, ld.label_names.size()); + EXPECT_EQ("MyLabel0", ld.label_names[0]); + EXPECT_EQ("MyLabel1", ld.label_names[1]); + EXPECT_EQ(MetricKind::kGauge, ld.metric_kind); + EXPECT_EQ(ValueType::kString, ld.value_type); + + const MetricDescriptor& ud = *collected_metrics->metric_descriptor_map.at( + "/tensorflow/test/integer_gauge_without_labels"); + EXPECT_EQ("/tensorflow/test/integer_gauge_without_labels", ud.name); + EXPECT_EQ("Integer gauge without labels.", ud.description); + ASSERT_EQ(0, ud.label_names.size()); + EXPECT_EQ(MetricKind::kGauge, ud.metric_kind); + EXPECT_EQ(ValueType::kInt64, ud.value_type); + } else { + EXPECT_EQ(0, collected_metrics->metric_descriptor_map.size()); + } + + ASSERT_EQ(2, collected_metrics->point_set_map.size()); + + const PointSet& lps = *collected_metrics->point_set_map.at( + "/tensorflow/test/string_gauge_with_labels"); + EXPECT_EQ("/tensorflow/test/string_gauge_with_labels", lps.metric_name); + ASSERT_EQ(2, lps.points.size()); + ASSERT_EQ(2, lps.points[0]->labels.size()); + EXPECT_EQ("MyLabel0", lps.points[0]->labels[0].name); + EXPECT_EQ("Label00", lps.points[0]->labels[0].value); + EXPECT_EQ("MyLabel1", lps.points[0]->labels[1].name); + EXPECT_EQ("Label10", lps.points[0]->labels[1].value); + EXPECT_EQ(ValueType::kString, lps.points[0]->value_type); + EXPECT_EQ("test1", lps.points[0]->string_value); + EXPECT_LT(0, lps.points[0]->start_timestamp_millis); + EXPECT_LT(0, lps.points[0]->end_timestamp_millis); + EXPECT_GE(lps.points[0]->end_timestamp_millis, + lps.points[0]->start_timestamp_millis); + ASSERT_EQ(2, lps.points[1]->labels.size()); + EXPECT_EQ("MyLabel0", lps.points[1]->labels[0].name); + EXPECT_EQ("Label01", lps.points[1]->labels[0].value); + EXPECT_EQ("MyLabel1", lps.points[1]->labels[1].name); + EXPECT_EQ("Label11", lps.points[1]->labels[1].value); + EXPECT_EQ(ValueType::kString, lps.points[1]->value_type); + EXPECT_EQ("test2", lps.points[1]->string_value); + EXPECT_LT(0, lps.points[1]->start_timestamp_millis); + EXPECT_LT(0, lps.points[1]->end_timestamp_millis); + EXPECT_GE(lps.points[1]->end_timestamp_millis, + lps.points[1]->start_timestamp_millis); + + const PointSet& ups = *collected_metrics->point_set_map.at( + "/tensorflow/test/integer_gauge_without_labels"); + EXPECT_EQ("/tensorflow/test/integer_gauge_without_labels", ups.metric_name); + ASSERT_EQ(1, ups.points.size()); + EXPECT_EQ(0, ups.points[0]->labels.size()); + EXPECT_EQ(ValueType::kInt64, ups.points[0]->value_type); + EXPECT_EQ(7, ups.points[0]->int64_value); + EXPECT_LT(0, ups.points[0]->start_timestamp_millis); + EXPECT_LT(0, ups.points[0]->end_timestamp_millis); + EXPECT_GE(ups.points[0]->end_timestamp_millis, + ups.points[0]->start_timestamp_millis); + } +} + void EqHistograms(const Histogram& expected, const HistogramProto& actual_proto) { Histogram actual; diff --git a/tensorflow/core/lib/monitoring/counter.h b/tensorflow/core/lib/monitoring/counter.h index 4b84e9d928..7240348a9b 100644 --- a/tensorflow/core/lib/monitoring/counter.h +++ b/tensorflow/core/lib/monitoring/counter.h @@ -48,7 +48,7 @@ namespace monitoring { // This class is thread-safe. class CounterCell { public: - CounterCell(const int64 value) : value_(value) {} + CounterCell(int64 value) : value_(value) {} ~CounterCell() {} // Atomically increments the value by step. diff --git a/tensorflow/core/lib/monitoring/gauge.h b/tensorflow/core/lib/monitoring/gauge.h new file mode 100644 index 0000000000..75471cfb22 --- /dev/null +++ b/tensorflow/core/lib/monitoring/gauge.h @@ -0,0 +1,215 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CORE_LIB_MONITORING_GAUGE_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_LIB_MONITORING_GAUGE_H_ + +// We replace this implementation with a null implementation for mobile +// platforms. +#include "tensorflow/core/platform/platform.h" +#ifdef IS_MOBILE_PLATFORM +#include "tensorflow/core/lib/monitoring/mobile_gauge.h" +#else + +#include +#include +#include + +#include "tensorflow/core/lib/monitoring/collection_registry.h" +#include "tensorflow/core/lib/monitoring/metric_def.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/thread_annotations.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +namespace monitoring { + +// GaugeCell stores each value of a gauge. +// +// A cell can be passed off to a module which may repeatedly update it without +// needing further map-indexing computations. This improves both encapsulation +// (separate modules can own a cell each, without needing to know about the map +// to which both cells belong) and performance (since map indexing and +// associated locking are both avoided). +// +// This class is thread-safe. +template +class GaugeCell { + public: + explicit GaugeCell(const T& value) : value_(value) {} + ~GaugeCell() {} + + // Atomically sets the value. + void Set(const T& value) LOCKS_EXCLUDED(mu_); + + // Retrieves the current value. + T value() const LOCKS_EXCLUDED(mu_); + + private: + T value_ GUARDED_BY(mu_); + mutable mutex mu_; + + TF_DISALLOW_COPY_AND_ASSIGN(GaugeCell); +}; + +// Explicit specialization of GaugeCell. Compared to the primary +// template, it uses atomic values as opposed to mutex. This class is +// thread-safe. +template <> +class GaugeCell { + public: + explicit GaugeCell(int64 value) : value_(value) {} + ~GaugeCell() {} + + // Atomically sets the value. + void Set(int64 value); + + // Retrieves the current value. + int64 value() const; + + private: + std::atomic value_; + + TF_DISALLOW_COPY_AND_ASSIGN(GaugeCell); +}; + +// A stateful class for updating a gauge-like metric. Allowed ValueType are +// int64 and string. +// +// This class encapsulates a set of values (or a single value for a label-less +// metric). Each value is identified by a tuple of labels. The class allows the +// user to set each value. +// +// Gauge allocates storage and maintains a cell for each value. You can +// retrieve an individual cell using a label-tuple and update it separately. +// This improves performance since operations related to retrieval, like +// map-indexing and locking, are avoided. +// +// This class is thread-safe. +template +class Gauge { + public: + ~Gauge() { + // Deleted here, before the metric_def is destroyed. + registration_handle_.reset(); + } + + // Creates the metric based on the metric-definition arguments. + // + // Example: + // + // auto* string_gauge_with_label = Gauge::New( + // "/tensorflow/string_gauge_with_label", + // "String gauge with one label.", "MyLabelName"); + // + // auto* integer_gauge = Gauge::New("/tensorflow/integer_gauge", + // "Integer gauge") + template + static Gauge* New(MetricDefArgs&&... metric_def_args); + + // Retrieves the cell for the specified labels, creating it on demand if not + // already present. + template + GaugeCell* GetCell(const Labels&... labels) LOCKS_EXCLUDED(mu_); + + private: + explicit Gauge( + const MetricDef& metric_def) + : metric_def_(metric_def), + registration_handle_(CollectionRegistry::Default()->Register( + &metric_def_, [&](MetricCollectorGetter getter) { + auto metric_collector = getter.Get(&metric_def_); + + mutex_lock l(mu_); + for (const auto& cell : cells_) { + metric_collector.CollectValue(cell.first, cell.second.value()); + } + })) {} + + mutable mutex mu_; + + // The metric definition. This will be used to identify the metric when we + // register it for collection. + const MetricDef metric_def_; + + std::unique_ptr registration_handle_; + + using LabelArray = std::array; + std::map > cells_ GUARDED_BY(mu_); + + TF_DISALLOW_COPY_AND_ASSIGN(Gauge); +}; + +//// +// Implementation details follow. API readers may skip. +//// +template +void GaugeCell::Set(const T& value) { + mutex_lock l(mu_); + value_ = value; +} + +template +T GaugeCell::value() const { + mutex_lock l(mu_); + return value_; +} + +inline void GaugeCell::Set(int64 value) { value_ = value; } + +inline int64 GaugeCell::value() const { return value_; } + +template +template +Gauge* Gauge::New( + MetricDefArgs&&... metric_def_args) { + static_assert(std::is_same::value || + std::is_same::value, + "Gauge only allows int64 and string types."); + return new Gauge( + MetricDef( + std::forward(metric_def_args)...)); +} + +template +template +GaugeCell* Gauge::GetCell( + const Labels&... labels) LOCKS_EXCLUDED(mu_) { + // Provides a more informative error message than the one during array + // construction below. + static_assert( + sizeof...(Labels) == NumLabels, + "Mismatch between Gauge and number of labels " + "provided in GetCell(...)."); + + const LabelArray& label_array = {{labels...}}; + mutex_lock l(mu_); + const auto found_it = cells_.find(label_array); + if (found_it != cells_.end()) { + return &(found_it->second); + } + return &(cells_ + .emplace(std::piecewise_construct, + std::forward_as_tuple(label_array), + std::forward_as_tuple(ValueType())) + .first->second); +} + +} // namespace monitoring +} // namespace tensorflow + +#endif // IS_MOBILE_PLATFORM +#endif // THIRD_PARTY_TENSORFLOW_CORE_LIB_MONITORING_GAUGE_H_ diff --git a/tensorflow/core/lib/monitoring/gauge_test.cc b/tensorflow/core/lib/monitoring/gauge_test.cc new file mode 100644 index 0000000000..f98cfe2a3b --- /dev/null +++ b/tensorflow/core/lib/monitoring/gauge_test.cc @@ -0,0 +1,92 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/lib/monitoring/gauge.h" + +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace monitoring { +namespace { + +auto* gauge_with_labels = Gauge::New( + "/tensorflow/test/gauge_with_labels", "Gauge with one label.", "MyLabel"); + +TEST(LabeledGaugeTest, InitializedWithZero) { + EXPECT_EQ(0, gauge_with_labels->GetCell("Empty")->value()); +} + +TEST(LabeledGaugeTest, GetCell) { + auto* cell = gauge_with_labels->GetCell("GetCellOp"); + EXPECT_EQ(0, cell->value()); + + cell->Set(1); + EXPECT_EQ(1, cell->value()); + + auto* same_cell = gauge_with_labels->GetCell("GetCellOp"); + EXPECT_EQ(1, same_cell->value()); + + same_cell->Set(10); + EXPECT_EQ(10, cell->value()); + EXPECT_EQ(10, same_cell->value()); +} + +auto* gauge_without_labels = Gauge::New( + "/tensorflow/test/gauge_without_labels", "Gauge without any labels."); + +TEST(UnlabeledGaugeTest, InitializedWithZero) { + EXPECT_EQ(0, gauge_without_labels->GetCell()->value()); +} + +TEST(UnlabeledGaugeTest, GetCell) { + auto* cell = gauge_without_labels->GetCell(); + EXPECT_EQ(0, cell->value()); + + cell->Set(1); + EXPECT_EQ(1, cell->value()); + + auto* same_cell = gauge_without_labels->GetCell(); + EXPECT_EQ(1, same_cell->value()); + + same_cell->Set(10); + EXPECT_EQ(10, cell->value()); + EXPECT_EQ(10, same_cell->value()); +} + +auto* string_gauge = Gauge::New("/tensorflow/test/string_gauge", + "Gauge of string value."); + +TEST(GaugeOfStringValue, InitializedWithEmptyString) { + EXPECT_EQ("", string_gauge->GetCell()->value()); +} + +TEST(GaugeOfStringValue, GetCell) { + auto* cell = string_gauge->GetCell(); + EXPECT_EQ("", cell->value()); + + cell->Set("foo"); + EXPECT_EQ("foo", cell->value()); + + auto* same_cell = string_gauge->GetCell(); + EXPECT_EQ("foo", cell->value()); + + same_cell->Set("bar"); + EXPECT_EQ("bar", cell->value()); + EXPECT_EQ("bar", same_cell->value()); +} + +} // namespace +} // namespace monitoring +} // namespace tensorflow diff --git a/tensorflow/core/lib/monitoring/metric_def.h b/tensorflow/core/lib/monitoring/metric_def.h index 116a73823d..3459c2ab82 100644 --- a/tensorflow/core/lib/monitoring/metric_def.h +++ b/tensorflow/core/lib/monitoring/metric_def.h @@ -28,15 +28,16 @@ namespace monitoring { // The different metric kinds available. // // Gauge indicates that the metric's values are instantaneous measurements of a -// (typically) continuously varying quantity. Examples: a process's current heap -// size, a queue's current length. +// (typically) continuously varying quantity or a string value. Examples: a +// process's current heap size, a queue's current length, the name of the binary +// used by a process. // // Cumulative indicates that the metric's values represent non-negative changes // over specified time periods. Example: the number of rpc calls to a service. enum class MetricKind : int { kGauge = 0, kCumulative }; // The type of the metric values. -enum class ValueType : int { kInt64 = 0, kHistogram }; +enum class ValueType : int { kInt64 = 0, kHistogram, kString }; // Everything in the internal namespace is implementation details. Do not depend // on this. @@ -73,6 +74,11 @@ inline ValueType GetValueType() { return ValueType::kHistogram; } +template <> +inline ValueType GetValueType() { + return ValueType::kString; +} + } // namespace internal // Abstract base class for a metric definition. diff --git a/tensorflow/core/lib/monitoring/mobile_gauge.h b/tensorflow/core/lib/monitoring/mobile_gauge.h new file mode 100644 index 0000000000..ac13ad35c0 --- /dev/null +++ b/tensorflow/core/lib/monitoring/mobile_gauge.h @@ -0,0 +1,72 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Null implementation of the Gauge metric for mobile platforms. + +#ifndef THIRD_PARTY_TENSORFLOW_CORE_LIB_MONITORING_MOBILE_GAUGE_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_LIB_MONITORING_MOBILE_GAUGE_H_ + +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +namespace monitoring { + +// GaugeCell which has a null implementation. +template +class GaugeCell { + public: + public: + GaugeCell() {} + ~GaugeCell() {} + + void Set(const T& value) {} + T value() const { return T(); } + + private: + TF_DISALLOW_COPY_AND_ASSIGN(GaugeCell); +}; + +// Gauge which has a null implementation. +template +class Gauge { + public: + ~Gauge() {} + + template + static Gauge* New(MetricDefArgs&&... metric_def_args) { + static_assert(std::is_same::value || + std::is_same::value, + "Gauge only allows int64 and string types."); + return new Gauge(); + } + + template + GaugeCell* GetCell(const Labels&... labels) { + return &default_gauge_cell_; + } + + private: + Gauge() {} + + GaugeCell default_gauge_cell_; + + TF_DISALLOW_COPY_AND_ASSIGN(Gauge); +}; + +} // namespace monitoring +} // namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_CORE_LIB_MONITORING_MOBILE_GAUGE_H_ diff --git a/tensorflow/core/lib/monitoring/sampler.h b/tensorflow/core/lib/monitoring/sampler.h index 5a4d49d5d4..c7a05428e2 100644 --- a/tensorflow/core/lib/monitoring/sampler.h +++ b/tensorflow/core/lib/monitoring/sampler.h @@ -159,9 +159,10 @@ class Sampler { // Registration handle with the CollectionRegistry. std::unique_ptr registration_handle_; - // We use a std::map here because we give out pointers to the SamplerCells, - // which need to remain valid even after more cells. using LabelArray = std::array; + // we need a container here that guarantees pointer stability of the value, + // namely, the pointer of the value should remain valid even after more cells + // are inserted. std::map cells_ GUARDED_BY(mu_); TF_DISALLOW_COPY_AND_ASSIGN(Sampler); -- GitLab From 659d8cbc3aaffc0249afee1ec437639beda8d243 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 11:26:23 -0800 Subject: [PATCH 0347/1801] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 175561031 --- tensorflow/go/op/wrappers.go | 493 ++++++++++++++++++----------------- 1 file changed, 258 insertions(+), 235 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index eb79da5384..5a6ae4fa5f 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -8054,146 +8054,6 @@ func MatrixExponential(scope *Scope, input tf.Output) (output tf.Output) { return op.Output(0) } -// MaxPool3DGradGradAttr is an optional argument to MaxPool3DGradGrad. -type MaxPool3DGradGradAttr func(optionalAttr) - -// MaxPool3DGradGradDataFormat sets the optional data_format attribute to value. -// -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func MaxPool3DGradGradDataFormat(value string) MaxPool3DGradGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes second-order gradients of the maxpooling function. -// -// Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: Output backprop of shape `[batch, depth, rows, cols, channels]`. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -// -// Returns Gradients of gradients w.r.t. the input to `max_pool`. -func MaxPool3DGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPool3DGradGrad", - Input: []tf.Input{ - orig_input, orig_output, grad, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// FakeQuantWithMinMaxArgsGradientAttr is an optional argument to FakeQuantWithMinMaxArgsGradient. -type FakeQuantWithMinMaxArgsGradientAttr func(optionalAttr) - -// FakeQuantWithMinMaxArgsGradientMin sets the optional min attribute to value. -// If not specified, defaults to -6 -func FakeQuantWithMinMaxArgsGradientMin(value float32) FakeQuantWithMinMaxArgsGradientAttr { - return func(m optionalAttr) { - m["min"] = value - } -} - -// FakeQuantWithMinMaxArgsGradientMax sets the optional max attribute to value. -// If not specified, defaults to 6 -func FakeQuantWithMinMaxArgsGradientMax(value float32) FakeQuantWithMinMaxArgsGradientAttr { - return func(m optionalAttr) { - m["max"] = value - } -} - -// FakeQuantWithMinMaxArgsGradientNumBits sets the optional num_bits attribute to value. -// If not specified, defaults to 8 -func FakeQuantWithMinMaxArgsGradientNumBits(value int64) FakeQuantWithMinMaxArgsGradientAttr { - return func(m optionalAttr) { - m["num_bits"] = value - } -} - -// FakeQuantWithMinMaxArgsGradientNarrowRange sets the optional narrow_range attribute to value. -// If not specified, defaults to false -func FakeQuantWithMinMaxArgsGradientNarrowRange(value bool) FakeQuantWithMinMaxArgsGradientAttr { - return func(m optionalAttr) { - m["narrow_range"] = value - } -} - -// Compute gradients for a FakeQuantWithMinMaxArgs operation. -// -// Arguments: -// gradients: Backpropagated gradients above the FakeQuantWithMinMaxArgs operation. -// inputs: Values passed as inputs to the FakeQuantWithMinMaxArgs operation. -// -// Returns Backpropagated gradients below the FakeQuantWithMinMaxArgs operation: -// `gradients * (inputs >= min && inputs <= max)`. -func FakeQuantWithMinMaxArgsGradient(scope *Scope, gradients tf.Output, inputs tf.Output, optional ...FakeQuantWithMinMaxArgsGradientAttr) (backprops tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FakeQuantWithMinMaxArgsGradient", - Input: []tf.Input{ - gradients, inputs, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes gradients of the maxpooling function. -// -// Arguments: -// input: The original input. -// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. the -// output of `max_pool`. -// argmax: The indices of the maximum values chosen for each output of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns Gradients w.r.t. the input of `max_pool`. -func MaxPoolGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - opspec := tf.OpSpec{ - Type: "MaxPoolGradWithArgmax", - Input: []tf.Input{ - input, grad, argmax, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // QuantizeAndDequantizeV3Attr is an optional argument to QuantizeAndDequantizeV3. type QuantizeAndDequantizeV3Attr func(optionalAttr) @@ -16849,6 +16709,29 @@ func FFT3D(scope *Scope, input tf.Output) (output tf.Output) { return op.Output(0) } +// Deserialize `SparseTensor` from a (serialized) string 3-vector (1-D `Tensor`) +// +// object. +// +// Arguments: +// serialized_sparse: 1-D, The serialized `SparseTensor` object. Must have 3 columns. +// dtype: The `dtype` of the serialized `SparseTensor` object. +func DeserializeSparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype} + opspec := tf.OpSpec{ + Type: "DeserializeSparse", + Input: []tf.Input{ + serialized_sparse, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + // Elementwise computes the bitwise XOR of `x` and `y`. // // The result will have those bits set, that are different in `x` and `y`. The @@ -20670,6 +20553,146 @@ func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { return op.Output(0) } +// MaxPool3DGradGradAttr is an optional argument to MaxPool3DGradGrad. +type MaxPool3DGradGradAttr func(optionalAttr) + +// MaxPool3DGradGradDataFormat sets the optional data_format attribute to value. +// +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func MaxPool3DGradGradDataFormat(value string) MaxPool3DGradGradAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// Computes second-order gradients of the maxpooling function. +// +// Arguments: +// orig_input: The original input tensor. +// orig_output: The original output tensor. +// grad: Output backprop of shape `[batch, depth, rows, cols, channels]`. +// ksize: 1-D tensor of length 5. The size of the window for each dimension of +// the input tensor. Must have `ksize[0] = ksize[4] = 1`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +// +// Returns Gradients of gradients w.r.t. the input to `max_pool`. +func MaxPool3DGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradGradAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MaxPool3DGradGrad", + Input: []tf.Input{ + orig_input, orig_output, grad, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// FakeQuantWithMinMaxArgsGradientAttr is an optional argument to FakeQuantWithMinMaxArgsGradient. +type FakeQuantWithMinMaxArgsGradientAttr func(optionalAttr) + +// FakeQuantWithMinMaxArgsGradientMin sets the optional min attribute to value. +// If not specified, defaults to -6 +func FakeQuantWithMinMaxArgsGradientMin(value float32) FakeQuantWithMinMaxArgsGradientAttr { + return func(m optionalAttr) { + m["min"] = value + } +} + +// FakeQuantWithMinMaxArgsGradientMax sets the optional max attribute to value. +// If not specified, defaults to 6 +func FakeQuantWithMinMaxArgsGradientMax(value float32) FakeQuantWithMinMaxArgsGradientAttr { + return func(m optionalAttr) { + m["max"] = value + } +} + +// FakeQuantWithMinMaxArgsGradientNumBits sets the optional num_bits attribute to value. +// If not specified, defaults to 8 +func FakeQuantWithMinMaxArgsGradientNumBits(value int64) FakeQuantWithMinMaxArgsGradientAttr { + return func(m optionalAttr) { + m["num_bits"] = value + } +} + +// FakeQuantWithMinMaxArgsGradientNarrowRange sets the optional narrow_range attribute to value. +// If not specified, defaults to false +func FakeQuantWithMinMaxArgsGradientNarrowRange(value bool) FakeQuantWithMinMaxArgsGradientAttr { + return func(m optionalAttr) { + m["narrow_range"] = value + } +} + +// Compute gradients for a FakeQuantWithMinMaxArgs operation. +// +// Arguments: +// gradients: Backpropagated gradients above the FakeQuantWithMinMaxArgs operation. +// inputs: Values passed as inputs to the FakeQuantWithMinMaxArgs operation. +// +// Returns Backpropagated gradients below the FakeQuantWithMinMaxArgs operation: +// `gradients * (inputs >= min && inputs <= max)`. +func FakeQuantWithMinMaxArgsGradient(scope *Scope, gradients tf.Output, inputs tf.Output, optional ...FakeQuantWithMinMaxArgsGradientAttr) (backprops tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "FakeQuantWithMinMaxArgsGradient", + Input: []tf.Input{ + gradients, inputs, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes gradients of the maxpooling function. +// +// Arguments: +// input: The original input. +// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. the +// output of `max_pool`. +// argmax: The indices of the maximum values chosen for each output of `max_pool`. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. +// +// Returns Gradients w.r.t. the input of `max_pool`. +func MaxPoolGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + opspec := tf.OpSpec{ + Type: "MaxPoolGradWithArgmax", + Input: []tf.Input{ + input, grad, argmax, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // StringToNumberAttr is an optional argument to StringToNumber. type StringToNumberAttr func(optionalAttr) @@ -23222,6 +23245,101 @@ func Rsqrt(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } +// RecordInputAttr is an optional argument to RecordInput. +type RecordInputAttr func(optionalAttr) + +// RecordInputFileRandomSeed sets the optional file_random_seed attribute to value. +// +// value: Random seeds used to produce randomized records. +// If not specified, defaults to 301 +func RecordInputFileRandomSeed(value int64) RecordInputAttr { + return func(m optionalAttr) { + m["file_random_seed"] = value + } +} + +// RecordInputFileShuffleShiftRatio sets the optional file_shuffle_shift_ratio attribute to value. +// +// value: Shifts the list of files after the list is randomly +// shuffled. +// If not specified, defaults to 0 +func RecordInputFileShuffleShiftRatio(value float32) RecordInputAttr { + return func(m optionalAttr) { + m["file_shuffle_shift_ratio"] = value + } +} + +// RecordInputFileBufferSize sets the optional file_buffer_size attribute to value. +// +// value: The randomization shuffling buffer. +// If not specified, defaults to 10000 +func RecordInputFileBufferSize(value int64) RecordInputAttr { + return func(m optionalAttr) { + m["file_buffer_size"] = value + } +} + +// RecordInputFileParallelism sets the optional file_parallelism attribute to value. +// +// value: How many sstables are opened and concurrently iterated over. +// If not specified, defaults to 16 +func RecordInputFileParallelism(value int64) RecordInputAttr { + return func(m optionalAttr) { + m["file_parallelism"] = value + } +} + +// RecordInputBatchSize sets the optional batch_size attribute to value. +// +// value: The batch size. +// If not specified, defaults to 32 +func RecordInputBatchSize(value int64) RecordInputAttr { + return func(m optionalAttr) { + m["batch_size"] = value + } +} + +// Emits randomized records. +// +// Arguments: +// file_pattern: Glob pattern for the data files. +// +// Returns A tensor of shape [batch_size]. +func RecordInput(scope *Scope, file_pattern string, optional ...RecordInputAttr) (records tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"file_pattern": file_pattern} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RecordInput", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Rounds the values of a tensor to the nearest integer, element-wise. +// +// Rounds half to even. Also known as bankers rounding. If you want to round +// according to the current system rounding mode use std::cint. +func Round(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Round", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Generates values in an interval. // // A sequence of `num` evenly-spaced values are generated beginning at `start`. @@ -24909,101 +25027,6 @@ func ApproximateEqual(scope *Scope, x tf.Output, y tf.Output, optional ...Approx return op.Output(0) } -// RecordInputAttr is an optional argument to RecordInput. -type RecordInputAttr func(optionalAttr) - -// RecordInputFileRandomSeed sets the optional file_random_seed attribute to value. -// -// value: Random seeds used to produce randomized records. -// If not specified, defaults to 301 -func RecordInputFileRandomSeed(value int64) RecordInputAttr { - return func(m optionalAttr) { - m["file_random_seed"] = value - } -} - -// RecordInputFileShuffleShiftRatio sets the optional file_shuffle_shift_ratio attribute to value. -// -// value: Shifts the list of files after the list is randomly -// shuffled. -// If not specified, defaults to 0 -func RecordInputFileShuffleShiftRatio(value float32) RecordInputAttr { - return func(m optionalAttr) { - m["file_shuffle_shift_ratio"] = value - } -} - -// RecordInputFileBufferSize sets the optional file_buffer_size attribute to value. -// -// value: The randomization shuffling buffer. -// If not specified, defaults to 10000 -func RecordInputFileBufferSize(value int64) RecordInputAttr { - return func(m optionalAttr) { - m["file_buffer_size"] = value - } -} - -// RecordInputFileParallelism sets the optional file_parallelism attribute to value. -// -// value: How many sstables are opened and concurrently iterated over. -// If not specified, defaults to 16 -func RecordInputFileParallelism(value int64) RecordInputAttr { - return func(m optionalAttr) { - m["file_parallelism"] = value - } -} - -// RecordInputBatchSize sets the optional batch_size attribute to value. -// -// value: The batch size. -// If not specified, defaults to 32 -func RecordInputBatchSize(value int64) RecordInputAttr { - return func(m optionalAttr) { - m["batch_size"] = value - } -} - -// Emits randomized records. -// -// Arguments: -// file_pattern: Glob pattern for the data files. -// -// Returns A tensor of shape [batch_size]. -func RecordInput(scope *Scope, file_pattern string, optional ...RecordInputAttr) (records tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"file_pattern": file_pattern} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RecordInput", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Rounds the values of a tensor to the nearest integer, element-wise. -// -// Rounds half to even. Also known as bankers rounding. If you want to round -// according to the current system rounding mode use std::cint. -func Round(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Round", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Returns the max of x and y (i.e. x > y ? x : y) element-wise. // // *NOTE*: `Maximum` supports broadcasting. More about broadcasting -- GitLab From 58f7858601b72aa3c5854571f2152b91d1795e29 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 11:34:15 -0800 Subject: [PATCH 0348/1801] [TF:XLA] Adding test coverage for more C64 operations, and ensuring they pass. Included here: - reduction ops (reduce_sum, reduce_prod) - unaries: tanh, sigmoid (currently GPU only) - binaries: pow (currently GPU only) PiperOrigin-RevId: 175562417 --- tensorflow/compiler/tests/binary_ops_test.py | 50 ++++- tensorflow/compiler/tests/reduce_ops_test.py | 30 ++- tensorflow/compiler/tests/unary_ops_test.py | 31 ++- .../xla/service/algebraic_simplifier.cc | 15 ++ .../xla/service/algebraic_simplifier_test.cc | 25 +++ .../xla/service/elemental_ir_emitter.cc | 208 ++++++++++-------- .../xla/service/elemental_ir_emitter.h | 12 +- .../xla/service/gpu/elemental_ir_emitter.cc | 164 +++++++++++--- .../xla/service/gpu/elemental_ir_emitter.h | 4 + .../compiler/xla/service/gpu/ir_emitter.cc | 33 +-- .../xla/service/gpu/ir_emitter_unnested.cc | 13 +- .../compiler/xla/service/llvm_ir/llvm_util.cc | 20 +- .../compiler/xla/service/llvm_ir/llvm_util.h | 3 + .../xla/tests/array_elementwise_ops_test.cc | 153 +++++++++++++ .../xla/tests/client_library_test_base.h | 10 +- .../compiler/xla/tests/dot_operation_test.cc | 32 +-- 16 files changed, 623 insertions(+), 180 deletions(-) diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index d412c572ae..654dc15e86 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -366,16 +366,52 @@ class BinaryOpsTest(XLATestCase): self._testBinary( gen_math_ops._real_div, - np.array([3, 3j, -1.5j, -8, 2 + 3j, 2 + 4j, 44 + 3j], dtype=dtype), - np.array([2, -2, 7j, -4j, 4 - 6j, 1 + 2j, 0], dtype=dtype), + np.array([3, 3j, -1.5j, -8, 2 + 3j, 2 + 4j], dtype=dtype), + np.array([2, -2, 7j, -4j, 4 - 6j, 1 + 2j], dtype=dtype), + expected=np.array( + [1.5, -1.5j, -0.2142857, -2j, (2 + 3j) / (4 - 6j), 2], + dtype=dtype)) + + # Test inf/nan scenarios. + self._testBinary( + gen_math_ops._real_div, + np.array([4 + 3j, 4, 3j, -4, -4j, 2 - 3j], dtype=dtype), + np.array([0, 0, 0, 0, 0, 0], dtype=dtype), expected=np.array( [ - 1.5, -1.5j, -0.2142857, -2j, (2 + 3j) / (4 - 6j), 2, - float("inf") + dtype(1 + 1j) / 0, + dtype(1) / 0, + dtype(1j) / 0, + dtype(-1) / 0, + dtype(-1j) / 0, + dtype(1 - 1j) / 0 ], dtype=dtype)) - # TODO(b/65408531): support+test pow for cplx + atan2_supported = self.device == "XLA_GPU" + if atan2_supported: + self._testBinary( + math_ops.pow, + dtype(3 + 2j), + dtype(4 - 5j), + expected=np.power(dtype(3 + 2j), dtype(4 - 5j))) + self._testBinary( # empty rhs + math_ops.pow, + np.array([1 + 2j, 2 - 3j], dtype=dtype), + np.zeros(shape=[0, 2], dtype=dtype), + expected=np.zeros(shape=[0, 2], dtype=dtype)) + self._testBinary( # to zero power + math_ops.pow, + np.array([1 + 2j, 2 - 3j], dtype=dtype), + np.zeros(shape=[1, 2], dtype=dtype), + expected=np.ones(shape=[1, 2], dtype=dtype)) + lhs = np.array([1 - 2j, 4 + 3j, 2 - 3j, 3, 2j, 1, 4], dtype=dtype) + rhs = np.array([2, 3j, 3 + 4j, 2 + 3j, 3 - 2j, 2, 3 + 3j], dtype=dtype) + scalar = dtype(2 + 2j) + self._testBinary(math_ops.pow, lhs, rhs, expected=np.power(lhs, rhs)) + self._testBinary( + math_ops.pow, scalar, rhs, expected=np.power(scalar, rhs)) + self._testBinary(math_ops.pow, lhs, scalar, np.power(lhs, scalar)) lhs = np.array([4 + 2j, -3 - 1j, 2j, 1], dtype=dtype) rhs = np.array([5, -6j, 7 - 3j, -8j], dtype=dtype) @@ -385,7 +421,9 @@ class BinaryOpsTest(XLATestCase): self._testBinary( gen_math_ops._sigmoid_grad, lhs, rhs, expected=rhs * lhs * (1 - lhs)) - # TODO(b/65408531): support+test _rsqrt_grad for cplx (needs pow) + if atan2_supported: + self._testBinary( + gen_math_ops._rsqrt_grad, lhs, rhs, expected=lhs**3 * rhs / -2) self._testBinary( gen_math_ops._sqrt_grad, lhs, rhs, expected=rhs / (2 * lhs)) diff --git a/tensorflow/compiler/tests/reduce_ops_test.py b/tensorflow/compiler/tests/reduce_ops_test.py index efda2cc207..965fdf684b 100644 --- a/tensorflow/compiler/tests/reduce_ops_test.py +++ b/tensorflow/compiler/tests/reduce_ops_test.py @@ -67,25 +67,37 @@ class ReduceOpsTest(XLATestCase): np.arange(-10, -4).reshape(2, 3), np.arange(-4, 2).reshape(2, 3), ] - NONEMPTY_FLOAT_DATA = [ - np.arange(1, 7).reshape(2, 3), - np.arange(-10, -4).reshape(2, 3), - np.arange(-4, 2).reshape(2, 3), + COMPLEX_DATA = [ + np.zeros(shape=(2, 0)).astype(np.complex64), + np.zeros(shape=(0, 30)).astype(np.complex64), + np.arange(1, 13, dtype=np.float32).view(np.complex64).reshape(2, 3), + np.arange(-14, -2, dtype=np.float32).view(np.complex64).reshape(2, 3), + np.arange(-4, 8, dtype=np.float32).view(np.complex64).reshape(2, 3), ] + NONEMPTY_FLOAT_DATA = [x for x in FLOAT_DATA if np.size(x) > 0] + NONEMPTY_COMPLEX_DATA = [x for x in COMPLEX_DATA if np.size(x) > 0] BOOL_DATA = [ np.array([], dtype=np.bool).reshape(2, 0), np.array([], dtype=np.bool).reshape(0, 3), np.array([[False, True, False], [True, True, False]]), ] - def testReduceSum(self): + def testReduceSumF32(self): self._testReduction(math_ops.reduce_sum, np.sum, np.float32, self.FLOAT_DATA) - def testReduceProd(self): + def testReduceSumC64(self): + self._testReduction(math_ops.reduce_sum, np.sum, np.complex64, + self.COMPLEX_DATA) + + def testReduceProdF32(self): self._testReduction(math_ops.reduce_prod, np.prod, np.float32, self.FLOAT_DATA) + def testReduceProdC64(self): + self._testReduction(math_ops.reduce_prod, np.prod, np.complex64, + self.COMPLEX_DATA) + def testReduceMin(self): def reference_min(inp, axis): @@ -108,12 +120,16 @@ class ReduceOpsTest(XLATestCase): self._testReduction(math_ops.reduce_max, reference_max, np.float32, self.FLOAT_DATA) - def testReduceMean(self): + def testReduceMeanF32(self): # TODO(phawkins): mean on XLA currently returns 0 instead of NaN when # reducing across zero inputs. self._testReduction(math_ops.reduce_mean, np.mean, np.float32, self.NONEMPTY_FLOAT_DATA) + def testReduceMeanC64(self): + self._testReduction(math_ops.reduce_mean, np.mean, np.complex64, + self.NONEMPTY_COMPLEX_DATA) + def testReduceAll(self): self._testReduction(math_ops.reduce_all, np.all, np.bool, self.BOOL_DATA) diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py index 76644380bd..a9a3f4f97f 100644 --- a/tensorflow/compiler/tests/unary_ops_test.py +++ b/tensorflow/compiler/tests/unary_ops_test.py @@ -330,12 +330,22 @@ class UnaryOpsTest(XLATestCase): def testComplexOps(self): for dtype in self.complex_types: - # TODO(b/65408531): math_ops.acosh (needs pow) - # TODO(b/65408531): math_ops.asinh (needs pow) # TODO(b/65408531): Wider support for log (needs atan2). atan2_supported = self.device == "XLA_GPU" if atan2_supported: + self._assertOpOutputMatchesExpected( + math_ops.acosh, + np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype), + expected=np.arccosh( + np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype))) + + self._assertOpOutputMatchesExpected( + math_ops.asinh, + np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype), + expected=np.arcsinh( + np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype))) + self._assertOpOutputMatchesExpected( math_ops.atanh, np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype), @@ -392,19 +402,26 @@ class UnaryOpsTest(XLATestCase): expected=np.log1p( np.array([[1e-14, 1e-15j, 0.6 - 0.3j]], dtype=dtype))) - # TODO(b/34703906): math_ops.rsqrt (needs pow) + val = np.array([1, 2j, 2 - 3j, 4 + 5j], dtype=dtype) + self._assertOpOutputMatchesExpected( + math_ops.rsqrt, val, expected=1 / np.sqrt(val)) - # TODO(b/34703906): math_ops.sigmoid (needs tanh) + self._assertOpOutputMatchesExpected( + math_ops.sigmoid, val, expected=1 / (1 + np.exp(-val))) - # TODO(b/34703906): math_ops.sqrt (needs pow) + self._assertOpOutputMatchesExpected( + math_ops.sqrt, val, expected=np.sqrt(val)) + + self._assertOpOutputMatchesExpected( + math_ops.tanh, + np.array([1, 2j, 2 - 3j, 4 + 5j], dtype=dtype), + expected=np.tanh(np.array([1, 2j, 2 - 3j, 4 + 5j], dtype=dtype))) self._assertOpOutputMatchesExpected( math_ops.tan, np.array([1, 2j, 2 - 3j, 4 + 5j], dtype=dtype), expected=np.tan(np.array([1, 2j, 2 - 3j, 4 + 5j], dtype=dtype))) - # TODO(b/34703906): math_ops.tanh (as itself) - ctypes = {np.complex64: np.float32} self._assertOpOutputMatchesExpected( math_ops.abs, diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 35fe0d1a51..5c9b29f6e2 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -135,7 +135,10 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault { Status HandleConvert(HloInstruction* convert) override; + Status HandleComplex(HloInstruction* complex) override; + Status HandleReal(HloInstruction* real) override; + Status HandleImag(HloInstruction* imag) override; Status HandleConvolution(HloInstruction* convolution) override; @@ -947,6 +950,18 @@ Status AlgebraicSimplifierVisitor::HandleConvert(HloInstruction* convert) { return Status::OK(); } +// Complex(Real(c), Imag(c)) -> c +Status AlgebraicSimplifierVisitor::HandleComplex(HloInstruction* complex) { + auto real = complex->mutable_operand(0); + auto imag = complex->mutable_operand(1); + if (real->opcode() == HloOpcode::kReal && + imag->opcode() == HloOpcode::kImag && + real->operand(0) == imag->operand(0)) { + return ReplaceInstruction(complex, real->mutable_operand(0)); + } + return Status::OK(); +} + // Real(Complex(r, i)) -> r Status AlgebraicSimplifierVisitor::HandleReal(HloInstruction* real) { auto operand = real->mutable_operand(0); diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index c06e330bc1..620f0a54fa 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -371,6 +371,31 @@ TEST_F(AlgebraicSimplifierTest, DivOneArray) { EXPECT_EQ(root, param0); } +// Test that complex(real(c), imag(c)) is simplified to c. +TEST_F(AlgebraicSimplifierTest, ComplexOfRealImagC) { + Shape r2f32 = ShapeUtil::MakeShape(F32, {2, 2}); + Shape r2c64 = ShapeUtil::MakeShape(C64, {2, 2}); + HloComputation::Builder builder(TestName()); + HloInstruction* param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, r2c64, "param0")); + HloInstruction* real = builder.AddInstruction( + HloInstruction::CreateUnary(r2f32, HloOpcode::kReal, param0)); + HloInstruction* imag = builder.AddInstruction( + HloInstruction::CreateUnary(r2f32, HloOpcode::kImag, param0)); + HloInstruction* cplx = builder.AddInstruction( + HloInstruction::CreateBinary(r2c64, HloOpcode::kComplex, real, imag)); + + auto module = CreateNewModule(); + auto computation = module->AddEntryComputation(builder.Build()); + HloInstruction* root = computation->root_instruction(); + EXPECT_EQ(root, cplx); + AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, + non_bitcasting_callback()); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + root = computation->root_instruction(); + EXPECT_EQ(root, param0); +} + // Test that real(complex(r,i)) is simplified to r. TEST_F(AlgebraicSimplifierTest, RealOfComplex) { Shape r2f32 = ShapeUtil::MakeShape(F32, {2, 2}); diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc index a945657712..606868034a 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc @@ -93,14 +93,14 @@ StatusOr ElementalIrEmitter::EmitIntegerUnaryOp( auto to_ir_component_type = llvm_ir::PrimitiveTypeToIrType( primitive_util::ComplexComponentType(to_type), module_); if (primitive_util::IsSignedIntegralType(from_type)) { - return ComposeComplex( + return EmitComposeComplex( op, ir_builder_->CreateSIToFP(operand_value, to_ir_component_type), nullptr); } if (primitive_util::IsUnsignedIntegralType(from_type) || from_type == PRED) { - return ComposeComplex( + return EmitComposeComplex( op, ir_builder_->CreateUIToFP(operand_value, to_ir_component_type), nullptr); @@ -178,9 +178,9 @@ StatusOr ElementalIrEmitter::EmitFloatUnaryOp( PrimitiveType to_component_type = primitive_util::ComplexComponentType(to_type); if (from_type == to_component_type) { - return ComposeComplex(op, operand_value, nullptr); + return EmitComposeComplex(op, operand_value, nullptr); } - return ComposeComplex( + return EmitComposeComplex( op, ir_builder_->CreateFPCast( operand_value, @@ -269,15 +269,8 @@ StatusOr ElementalIrEmitter::EmitFloatUnaryOp( StatusOr ElementalIrEmitter::EmitComplexUnaryOp( const HloInstruction* op, llvm::Value* operand_value) const { - auto real = [&](llvm::Value* x) { - return ir_builder_->CreateExtractValue(x, {0}); - }; - auto imag = [&](llvm::Value* x) { - return ir_builder_->CreateExtractValue(x, {1}); - }; switch (op->opcode()) { // TODO(b/65209142): Angle/Log require atan2. - // case HloOpcode::kAngle: // case HloOpcode::kLog: // log(a+bi) = .5*log(a^2+b^2) + i*atan2(b, a) case HloOpcode::kConvert: { PrimitiveType from_type = op->operand(0)->shape().element_type(); @@ -291,24 +284,26 @@ StatusOr ElementalIrEmitter::EmitComplexUnaryOp( primitive_util::ComplexComponentType(to_type); auto to_ir_component_type = llvm_ir::PrimitiveTypeToIrType(to_component_type, module_); - return ComposeComplex( + return EmitComposeComplex( op, - ir_builder_->CreateFPCast(real(operand_value), to_ir_component_type), - ir_builder_->CreateFPCast(imag(operand_value), to_ir_component_type)); + ir_builder_->CreateFPCast(EmitExtractReal(operand_value), + to_ir_component_type), + ir_builder_->CreateFPCast(EmitExtractImag(operand_value), + to_ir_component_type)); } case HloOpcode::kExp: { // e^(a+bi) = e^a*(cos(b)+sin(b)i) auto exp_a = llvm_ir::EmitCallToIntrinsic( - llvm::Intrinsic::exp, {real(operand_value)}, - {real(operand_value)->getType()}, ir_builder_); + llvm::Intrinsic::exp, {EmitExtractReal(operand_value)}, + {EmitExtractReal(operand_value)->getType()}, ir_builder_); auto cos_b = llvm_ir::EmitCallToIntrinsic( - llvm::Intrinsic::cos, {imag(operand_value)}, - {imag(operand_value)->getType()}, ir_builder_); + llvm::Intrinsic::cos, {EmitExtractImag(operand_value)}, + {EmitExtractImag(operand_value)->getType()}, ir_builder_); auto sin_b = llvm_ir::EmitCallToIntrinsic( - llvm::Intrinsic::sin, {imag(operand_value)}, - {imag(operand_value)->getType()}, ir_builder_); - return ComposeComplex(op, ir_builder_->CreateFMul(exp_a, cos_b), - ir_builder_->CreateFMul(exp_a, sin_b)); + llvm::Intrinsic::sin, {EmitExtractImag(operand_value)}, + {EmitExtractImag(operand_value)->getType()}, ir_builder_); + return EmitComposeComplex(op, ir_builder_->CreateFMul(exp_a, cos_b), + ir_builder_->CreateFMul(exp_a, sin_b)); } case HloOpcode::kCos: { // cos(z) = .5(e^(iz) + e^(-iz)) @@ -318,8 +313,8 @@ StatusOr ElementalIrEmitter::EmitComplexUnaryOp( // cos(-x) = cos(x) and sin(-x) = -sin(x), so // cos(a+bi) = .5(e^-b*(cos(a)+sin(a)i) + e^b*(cos(a)-sin(a)i)) // = .5(cos(a)*(e^-b+e^b) + i*sin(a)*(e^-b-e^b)) - auto a = real(operand_value); - auto b = imag(operand_value); + auto a = EmitExtractReal(operand_value); + auto b = EmitExtractImag(operand_value); auto type = a->getType(); auto exp_b = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::exp, {b}, {type}, ir_builder_); @@ -331,7 +326,7 @@ StatusOr ElementalIrEmitter::EmitComplexUnaryOp( {type}, ir_builder_); auto sin_a = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::sin, {a}, {type}, ir_builder_); - return ComposeComplex( + return EmitComposeComplex( op, ir_builder_->CreateFMul( cos_a, ir_builder_->CreateFAdd(half_exp_neg_b, half_exp_b)), @@ -348,8 +343,8 @@ StatusOr ElementalIrEmitter::EmitComplexUnaryOp( // cos(-x) = cos(x) and sin(-x) = -sin(x), so // = 0.5(e^b*(cos(a)i+sin(a)) - e^-b*(cos(a)i-sin(a))) // = 0.5(sin(a)*(e^b+e^-b) + i*cos(a)*(e^b-e^-b) - auto a = real(operand_value); - auto b = imag(operand_value); + auto a = EmitExtractReal(operand_value); + auto b = EmitExtractImag(operand_value); auto type = a->getType(); auto exp_b = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::exp, {b}, {type}, ir_builder_); @@ -361,7 +356,7 @@ StatusOr ElementalIrEmitter::EmitComplexUnaryOp( {type}, ir_builder_); auto sin_a = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::sin, {a}, {type}, ir_builder_); - return ComposeComplex( + return EmitComposeComplex( op, ir_builder_->CreateFMul( sin_a, ir_builder_->CreateFAdd(half_exp_b, half_exp_neg_b)), @@ -370,33 +365,40 @@ StatusOr ElementalIrEmitter::EmitComplexUnaryOp( } case HloOpcode::kAbs: { auto sum_sq = ir_builder_->CreateFAdd( - ir_builder_->CreateFMul(real(operand_value), real(operand_value)), - ir_builder_->CreateFMul(imag(operand_value), imag(operand_value))); + ir_builder_->CreateFMul(EmitExtractReal(operand_value), + EmitExtractReal(operand_value)), + ir_builder_->CreateFMul(EmitExtractImag(operand_value), + EmitExtractImag(operand_value))); return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::sqrt, {sum_sq}, {sum_sq->getType()}, ir_builder_); } case HloOpcode::kSign: { // Sign(c) = c / |c| auto sum_sq = ir_builder_->CreateFAdd( - ir_builder_->CreateFMul(real(operand_value), real(operand_value)), - ir_builder_->CreateFMul(imag(operand_value), imag(operand_value))); + ir_builder_->CreateFMul(EmitExtractReal(operand_value), + EmitExtractReal(operand_value)), + ir_builder_->CreateFMul(EmitExtractImag(operand_value), + EmitExtractImag(operand_value))); auto cplx_abs = llvm_ir::EmitCallToIntrinsic( llvm::Intrinsic::sqrt, {sum_sq}, {sum_sq->getType()}, ir_builder_); auto type = cplx_abs->getType(); auto zero = llvm::ConstantFP::get(type, 0.0); auto oeq = ir_builder_->CreateFCmpOEQ(cplx_abs, zero); return ir_builder_->CreateSelect( - oeq, ComposeComplex(op, zero, zero), - ComposeComplex( - op, ir_builder_->CreateFDiv(real(operand_value), cplx_abs), - ir_builder_->CreateFDiv(imag(operand_value), cplx_abs))); + oeq, EmitComposeComplex(op, zero, zero), + EmitComposeComplex( + op, + ir_builder_->CreateFDiv(EmitExtractReal(operand_value), cplx_abs), + ir_builder_->CreateFDiv(EmitExtractImag(operand_value), + cplx_abs))); } case HloOpcode::kNegate: - return ComposeComplex(op, ir_builder_->CreateFNeg(real(operand_value)), - ir_builder_->CreateFNeg(imag(operand_value))); + return EmitComposeComplex( + op, ir_builder_->CreateFNeg(EmitExtractReal(operand_value)), + ir_builder_->CreateFNeg(EmitExtractImag(operand_value))); case HloOpcode::kReal: - return real(operand_value); + return EmitExtractReal(operand_value); case HloOpcode::kImag: - return imag(operand_value); + return EmitExtractImag(operand_value); default: return Unimplemented("unary complex op '%s'", HloOpcodeString(op->opcode()).c_str()); @@ -424,7 +426,7 @@ StatusOr ElementalIrEmitter::EmitFloatBinaryOp( switch (op->opcode()) { // case HloOpcode::kAtan2: // TODO(b/65209142): CPU atan2 support case HloOpcode::kComplex: - return ComposeComplex(op, lhs_value, rhs_value); + return EmitComposeComplex(op, lhs_value, rhs_value); case HloOpcode::kAdd: return ir_builder_->CreateFAdd(lhs_value, rhs_value); case HloOpcode::kSubtract: @@ -479,54 +481,66 @@ StatusOr ElementalIrEmitter::EmitFloatBinaryOp( StatusOr ElementalIrEmitter::EmitComplexBinaryOp( const HloInstruction* op, llvm::Value* lhs_value, llvm::Value* rhs_value) const { - auto real = [&](llvm::Value* x) { - return ir_builder_->CreateExtractValue(x, {0}); - }; - auto imag = [&](llvm::Value* x) { - return ir_builder_->CreateExtractValue(x, {1}); - }; switch (op->opcode()) { case HloOpcode::kAdd: - return ComposeComplex( - op, ir_builder_->CreateFAdd(real(lhs_value), real(rhs_value)), - ir_builder_->CreateFAdd(imag(lhs_value), imag(rhs_value))); + return EmitComposeComplex( + op, + ir_builder_->CreateFAdd(EmitExtractReal(lhs_value), + EmitExtractReal(rhs_value)), + ir_builder_->CreateFAdd(EmitExtractImag(lhs_value), + EmitExtractImag(rhs_value))); case HloOpcode::kSubtract: - return ComposeComplex( - op, ir_builder_->CreateFSub(real(lhs_value), real(rhs_value)), - ir_builder_->CreateFSub(imag(lhs_value), imag(rhs_value))); + return EmitComposeComplex( + op, + ir_builder_->CreateFSub(EmitExtractReal(lhs_value), + EmitExtractReal(rhs_value)), + ir_builder_->CreateFSub(EmitExtractImag(lhs_value), + EmitExtractImag(rhs_value))); case HloOpcode::kMultiply: - return ComposeComplex( + return EmitComposeComplex( op, ir_builder_->CreateFSub( - ir_builder_->CreateFMul(real(lhs_value), real(rhs_value)), - ir_builder_->CreateFMul(imag(lhs_value), imag(rhs_value))), + ir_builder_->CreateFMul(EmitExtractReal(lhs_value), + EmitExtractReal(rhs_value)), + ir_builder_->CreateFMul(EmitExtractImag(lhs_value), + EmitExtractImag(rhs_value))), ir_builder_->CreateFAdd( - ir_builder_->CreateFMul(real(lhs_value), imag(rhs_value)), - ir_builder_->CreateFMul(imag(lhs_value), real(rhs_value)))); + ir_builder_->CreateFMul(EmitExtractReal(lhs_value), + EmitExtractImag(rhs_value)), + ir_builder_->CreateFMul(EmitExtractImag(lhs_value), + EmitExtractReal(rhs_value)))); case HloOpcode::kDivide: { // (a+bi) / (c+di) = ((a+bi)(c-di)) / ((c+di)(c-di)) // = ((ac + bd) + (bc - ad)i) / (c^2 + d^2) auto rhs_sum_sq = ir_builder_->CreateFAdd( - ir_builder_->CreateFMul(real(rhs_value), real(rhs_value)), - ir_builder_->CreateFMul(imag(rhs_value), imag(rhs_value))); + ir_builder_->CreateFMul(EmitExtractReal(rhs_value), + EmitExtractReal(rhs_value)), + ir_builder_->CreateFMul(EmitExtractImag(rhs_value), + EmitExtractImag(rhs_value))); auto type = rhs_sum_sq->getType(); auto zero = llvm::ConstantFP::get(type, 0.0); auto oeq = ir_builder_->CreateFCmpOEQ(rhs_sum_sq, zero); + auto real_inf_or_nan = + ir_builder_->CreateFDiv(EmitExtractReal(lhs_value), zero); + auto imag_inf_or_nan = + ir_builder_->CreateFDiv(EmitExtractImag(lhs_value), zero); return ir_builder_->CreateSelect( - oeq, ComposeComplex(op, llvm::ConstantFP::getInfinity(type), zero), - ComposeComplex( + oeq, EmitComposeComplex(op, real_inf_or_nan, imag_inf_or_nan), + EmitComposeComplex( op, ir_builder_->CreateFDiv( ir_builder_->CreateFAdd( - ir_builder_->CreateFMul(real(lhs_value), real(rhs_value)), - ir_builder_->CreateFMul(imag(lhs_value), - imag(rhs_value))), + ir_builder_->CreateFMul(EmitExtractReal(lhs_value), + EmitExtractReal(rhs_value)), + ir_builder_->CreateFMul(EmitExtractImag(lhs_value), + EmitExtractImag(rhs_value))), rhs_sum_sq), ir_builder_->CreateFDiv( ir_builder_->CreateFSub( - ir_builder_->CreateFMul(imag(lhs_value), real(rhs_value)), - ir_builder_->CreateFMul(real(lhs_value), - imag(rhs_value))), + ir_builder_->CreateFMul(EmitExtractImag(lhs_value), + EmitExtractReal(rhs_value)), + ir_builder_->CreateFMul(EmitExtractReal(lhs_value), + EmitExtractImag(rhs_value))), rhs_sum_sq))); } // LLVM comparisons can be "unordered" (U) or "ordered" (O) -- ordered @@ -538,16 +552,20 @@ StatusOr ElementalIrEmitter::EmitComplexBinaryOp( // matches C++'s semantics. case HloOpcode::kEq: return ir_builder_->CreateAnd( - llvm_ir::EmitComparison(llvm::CmpInst::FCMP_OEQ, real(lhs_value), - real(rhs_value), ir_builder_), - llvm_ir::EmitComparison(llvm::CmpInst::FCMP_OEQ, imag(lhs_value), - imag(rhs_value), ir_builder_)); + llvm_ir::EmitComparison(llvm::CmpInst::FCMP_OEQ, + EmitExtractReal(lhs_value), + EmitExtractReal(rhs_value), ir_builder_), + llvm_ir::EmitComparison(llvm::CmpInst::FCMP_OEQ, + EmitExtractImag(lhs_value), + EmitExtractImag(rhs_value), ir_builder_)); case HloOpcode::kNe: return ir_builder_->CreateOr( - llvm_ir::EmitComparison(llvm::CmpInst::FCMP_UNE, real(lhs_value), - real(rhs_value), ir_builder_), - llvm_ir::EmitComparison(llvm::CmpInst::FCMP_UNE, imag(lhs_value), - imag(rhs_value), ir_builder_)); + llvm_ir::EmitComparison(llvm::CmpInst::FCMP_UNE, + EmitExtractReal(lhs_value), + EmitExtractReal(rhs_value), ir_builder_), + llvm_ir::EmitComparison(llvm::CmpInst::FCMP_UNE, + EmitExtractImag(lhs_value), + EmitExtractImag(rhs_value), ir_builder_)); // TODO(b/65209142): requires arg(z) -> requires atan|atan2 intrinsic // case HloOpcode::kPower: @@ -1565,25 +1583,25 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator( TF_ASSIGN_OR_RETURN(llvm::Value * rhs_value, rhs_generator(rhs_index)); llvm::Value* next_accumulator; if (primitive_util::IsComplexType(primitive_type)) { - auto real = [&](llvm::Value* x) { - return ir_builder_->CreateExtractValue(x, {0}); - }; - auto imag = [&](llvm::Value* x) { - return ir_builder_->CreateExtractValue(x, {1}); - }; llvm::Value* product_real = ir_builder_->CreateFSub( - ir_builder_->CreateFMul(real(lhs_value), real(rhs_value)), - ir_builder_->CreateFMul(imag(lhs_value), imag(rhs_value))); + ir_builder_->CreateFMul(EmitExtractReal(lhs_value), + EmitExtractReal(rhs_value)), + ir_builder_->CreateFMul(EmitExtractImag(lhs_value), + EmitExtractImag(rhs_value))); llvm::Value* product_imag = ir_builder_->CreateFAdd( - ir_builder_->CreateFMul(real(lhs_value), imag(rhs_value)), - ir_builder_->CreateFMul(imag(lhs_value), real(rhs_value))); + ir_builder_->CreateFMul(EmitExtractReal(lhs_value), + EmitExtractImag(rhs_value)), + ir_builder_->CreateFMul(EmitExtractImag(lhs_value), + EmitExtractReal(rhs_value))); next_accumulator = ir_builder_->CreateInsertValue( current_accumulator, - ir_builder_->CreateFAdd(real(current_accumulator), product_real), + ir_builder_->CreateFAdd(EmitExtractReal(current_accumulator), + product_real), {0}); next_accumulator = ir_builder_->CreateInsertValue( next_accumulator, - ir_builder_->CreateFAdd(imag(current_accumulator), product_imag), + ir_builder_->CreateFAdd(EmitExtractImag(current_accumulator), + product_imag), {1}); } else if (primitive_util::IsFloatingPointType(primitive_type)) { next_accumulator = ir_builder_->CreateFAdd( @@ -1607,9 +1625,17 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator( } } -llvm::Value* ElementalIrEmitter::ComposeComplex(const HloInstruction* op, - llvm::Value* real, - llvm::Value* imag) const { +llvm::Value* ElementalIrEmitter::EmitExtractReal(llvm::Value* value) const { + return ir_builder_->CreateExtractValue(value, {0}); +} + +llvm::Value* ElementalIrEmitter::EmitExtractImag(llvm::Value* value) const { + return ir_builder_->CreateExtractValue(value, {1}); +} + +llvm::Value* ElementalIrEmitter::EmitComposeComplex(const HloInstruction* op, + llvm::Value* real, + llvm::Value* imag) const { auto cplx_type = llvm_ir::PrimitiveTypeToIrType(op->shape().element_type(), module_); auto complex = ir_builder_->CreateInsertValue( diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.h b/tensorflow/compiler/xla/service/elemental_ir_emitter.h index 9d32436e38..cccb498f82 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.h +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.h @@ -95,6 +95,13 @@ class ElementalIrEmitter { virtual StatusOr EmitReducePrecision(const HloInstruction* hlo, llvm::Value* x) const; + virtual llvm::Value* EmitExtractReal(llvm::Value* value) const; + virtual llvm::Value* EmitExtractImag(llvm::Value* value) const; + + // Composes a complex struct. imag may be nullptr for simple cast operations. + llvm::Value* EmitComposeComplex(const HloInstruction* op, llvm::Value* real, + llvm::Value* imag) const; + // A helper method for MakeElementGenerator. Given an elementwise op `hlo` and // the target array index, computes the source array index of its // `operand_no`-th operand. @@ -117,11 +124,6 @@ class ElementalIrEmitter { // compiled executable outside of the HLO code itself. const HloModuleConfig& hlo_module_config_; - protected: - // Composes a complex struct. imag may be nullptr for simple cast operations. - llvm::Value* ComposeComplex(const HloInstruction* op, llvm::Value* real, - llvm::Value* imag) const; - private: // Returns a ElementGenerator for a RNG HloInstruction. llvm_ir::ElementGenerator MakeRngElementGenerator( diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc index 1b94499bc6..6bf00cfb8a 100644 --- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc @@ -230,6 +230,66 @@ StatusOr GpuElementalIrEmitter::EmitFloatUnaryOp( } } +StatusOr GpuElementalIrEmitter::EmitComplexBinaryOp( + const HloInstruction* op, llvm::Value* lhs_value, + llvm::Value* rhs_value) const { + PrimitiveType input_type = op->operand(0)->shape().element_type(); + TF_RET_CHECK(primitive_util::IsComplexType(input_type)); + PrimitiveType component_type = + primitive_util::ComplexComponentType(input_type); + switch (op->opcode()) { + case HloOpcode::kPower: { + // (a+bi)^(c+di) = + // (a*a+b*b)^(0.5c) * exp(-d*atan2(b,a)) * (cos(q) + i*sin(q)), + // where q = c*atan2(b,a)+0.5d*ln(a*a+b*b) + auto a = EmitExtractReal(lhs_value); + auto b = EmitExtractImag(lhs_value); + auto c = EmitExtractReal(rhs_value); + auto d = EmitExtractImag(rhs_value); + auto aa_p_bb = ir_builder_->CreateFAdd(ir_builder_->CreateFMul(a, a), + ir_builder_->CreateFMul(b, b)); + auto one_half = llvm::ConstantFP::get(a->getType(), 0.5); + auto half_c = ir_builder_->CreateFMul(one_half, c); + + TF_ASSIGN_OR_RETURN( + auto aa_p_bb_to_half_c, + EmitLibdeviceMathCall("__nv_pow", {aa_p_bb, half_c}, + {component_type, component_type}, + component_type)); + auto neg_d = ir_builder_->CreateFNeg(d); + TF_ASSIGN_OR_RETURN( + auto arg_lhs, EmitLibdeviceMathCall("__nv_atan2", {b, a}, + {component_type, component_type}, + component_type)); + auto neg_d_arg_lhs = ir_builder_->CreateFMul(neg_d, arg_lhs); + TF_ASSIGN_OR_RETURN( + auto e_to_neg_d_arg_lhs, + EmitLibdeviceMathCall("__nv_exp", {neg_d_arg_lhs}, {component_type}, + component_type)); + auto coeff = + ir_builder_->CreateFMul(aa_p_bb_to_half_c, e_to_neg_d_arg_lhs); + TF_ASSIGN_OR_RETURN( + auto ln_aa_p_bb, + EmitLibdeviceMathCall("__nv_log", {aa_p_bb}, {component_type}, + component_type)); + auto half_d = ir_builder_->CreateFMul(one_half, d); + auto q = + ir_builder_->CreateFAdd(ir_builder_->CreateFMul(c, arg_lhs), + ir_builder_->CreateFMul(half_d, ln_aa_p_bb)); + TF_ASSIGN_OR_RETURN( + auto cos_q, EmitLibdeviceMathCall("__nv_cos", {q}, {component_type}, + component_type)); + TF_ASSIGN_OR_RETURN( + auto sin_q, EmitLibdeviceMathCall("__nv_sin", {q}, {component_type}, + component_type)); + return EmitComposeComplex(op, ir_builder_->CreateFMul(coeff, cos_q), + ir_builder_->CreateFMul(coeff, sin_q)); + } + default: + return ElementalIrEmitter::EmitComplexBinaryOp(op, lhs_value, rhs_value); + } +} + StatusOr GpuElementalIrEmitter::EmitComplexUnaryOp( const HloInstruction* op, llvm::Value* operand_value) const { PrimitiveType input_type = op->operand(0)->shape().element_type(); @@ -237,18 +297,12 @@ StatusOr GpuElementalIrEmitter::EmitComplexUnaryOp( primitive_util::IsComplexType(input_type) ? primitive_util::ComplexComponentType(input_type) : input_type; - auto real = [&](llvm::Value* x) { - return ir_builder_->CreateExtractValue(x, {0}); - }; - auto imag = [&](llvm::Value* x) { - return ir_builder_->CreateExtractValue(x, {1}); - }; switch (op->opcode()) { case HloOpcode::kLog: { // log(a+bi) = .5*log(a^2+b^2) + i*atan2(b, a) - auto a = real(operand_value); - auto b = imag(operand_value); + auto a = EmitExtractReal(operand_value); + auto b = EmitExtractImag(operand_value); llvm::Type* llvm_ty = a->getType(); auto sum_sq = ir_builder_->CreateFAdd(ir_builder_->CreateFMul(a, a), ir_builder_->CreateFMul(b, b)); @@ -261,34 +315,33 @@ StatusOr GpuElementalIrEmitter::EmitComplexUnaryOp( {component_type, component_type}, component_type)); auto one_half = llvm::ConstantFP::get(llvm_ty, 0.5); - return ComposeComplex(op, ir_builder_->CreateFMul(one_half, log_sum_sq), - angle); + return EmitComposeComplex( + op, ir_builder_->CreateFMul(one_half, log_sum_sq), angle); } - // TODO(b/65408531): Implement kPower on GPU, where atan2 is available. - // case HloOpcode::kPower: - // // (a+bi)^(c+di) = exp(i(c+di)*arg(a+bi)) * (a*a+b*b)^(0.5(c+di)) case HloOpcode::kExp: { // e^(a+bi) = e^a*(cos(b)+sin(b)i) - auto b = imag(operand_value); + auto b = EmitExtractImag(operand_value); TF_ASSIGN_OR_RETURN( - auto exp_a, EmitLibdeviceMathCall("__nv_exp", {real(operand_value)}, - {component_type}, component_type)); + auto exp_a, + EmitLibdeviceMathCall("__nv_exp", {EmitExtractReal(operand_value)}, + {component_type}, component_type)); TF_ASSIGN_OR_RETURN( auto cos_b, EmitLibdeviceMathCall("__nv_cos", {b}, {component_type}, component_type)); TF_ASSIGN_OR_RETURN( auto sin_b, EmitLibdeviceMathCall("__nv_sin", {b}, {component_type}, component_type)); - return ComposeComplex(op, ir_builder_->CreateFMul(exp_a, cos_b), - ir_builder_->CreateFMul(exp_a, sin_b)); + return EmitComposeComplex(op, ir_builder_->CreateFMul(exp_a, cos_b), + ir_builder_->CreateFMul(exp_a, sin_b)); } case HloOpcode::kCos: { // cos(a+bi) = .5(cos(a)*(e^-b+e^b) + i*sin(a)*(e^-b-e^b)) - auto a = real(operand_value); + auto a = EmitExtractReal(operand_value); auto llvm_ty = a->getType(); TF_ASSIGN_OR_RETURN( - auto exp_b, EmitLibdeviceMathCall("__nv_exp", {imag(operand_value)}, - {component_type}, component_type)); + auto exp_b, + EmitLibdeviceMathCall("__nv_exp", {EmitExtractImag(operand_value)}, + {component_type}, component_type)); TF_ASSIGN_OR_RETURN( auto cos_a, EmitLibdeviceMathCall("__nv_cos", {a}, {component_type}, component_type)); @@ -299,7 +352,7 @@ StatusOr GpuElementalIrEmitter::EmitComplexUnaryOp( ir_builder_->CreateFMul(llvm::ConstantFP::get(llvm_ty, 0.5), exp_b); auto half_exp_neg_b = ir_builder_->CreateFDiv(llvm::ConstantFP::get(llvm_ty, 0.5), exp_b); - return ComposeComplex( + return EmitComposeComplex( op, ir_builder_->CreateFMul( cos_a, ir_builder_->CreateFAdd(half_exp_neg_b, half_exp_b)), @@ -309,11 +362,12 @@ StatusOr GpuElementalIrEmitter::EmitComplexUnaryOp( case HloOpcode::kSin: { // sin(a+bi) = 0.5(sin(a)*(e^b+e^-b) + i*cos(a)*(e^b-e^-b) - auto a = real(operand_value); + auto a = EmitExtractReal(operand_value); auto llvm_ty = a->getType(); TF_ASSIGN_OR_RETURN( - auto exp_b, EmitLibdeviceMathCall("__nv_exp", {imag(operand_value)}, - {component_type}, component_type)); + auto exp_b, + EmitLibdeviceMathCall("__nv_exp", {EmitExtractImag(operand_value)}, + {component_type}, component_type)); TF_ASSIGN_OR_RETURN( auto cos_a, EmitLibdeviceMathCall("__nv_cos", {a}, {component_type}, component_type)); @@ -324,13 +378,71 @@ StatusOr GpuElementalIrEmitter::EmitComplexUnaryOp( ir_builder_->CreateFMul(llvm::ConstantFP::get(llvm_ty, 0.5), exp_b); auto half_exp_neg_b = ir_builder_->CreateFDiv(llvm::ConstantFP::get(llvm_ty, 0.5), exp_b); - return ComposeComplex( + return EmitComposeComplex( op, ir_builder_->CreateFMul( sin_a, ir_builder_->CreateFAdd(half_exp_b, half_exp_neg_b)), ir_builder_->CreateFMul( cos_a, ir_builder_->CreateFSub(half_exp_b, half_exp_neg_b))); } + case HloOpcode::kTanh: { + /* + tanh=(exp(x)-exp(-x)) / (exp(x)+exp(-x)) + e^(a+bi) = e^a*(cos(b)+sin(b)i) + so tanh=(((cos(b)+sin(b)i)e^a - (cos(-b)+sin(-b)i)e^-a)) / + (((cos(b)+sin(b)i)e^a + (cos(-b)+sin(-b)i)e^-a)) + cos(b)=cos(-b), sin(-b)=-sin(b) + so tanh=(((cos(b)+sin(b)i)e^a - (cos(b)-sin(b)i)e^-a)) / + (((cos(b)+sin(b)i)e^a + (cos(b)-sin(b)i)e^-a)) + =(cos(b)e^a+i*sin(b)e^a + cos(b)(-e^-a)+i*sin(b)e^-a) / + (cos(b)e^a+i*sin(b)e^a + cos(b)e^-a+i*sin(b)(-e^-a)) + =(cos(b)(e^a-e^-a) + i*sin(b)(e^a+e^-a)) / + (cos(b)(e^a+e^-a) + i*sin(b)(e^a-e^-a)) + This is a complex division, so we can multiply by denom_conj/denom_conj + =(cos(b)(e^a-e^-a) + i*sin(b)(e^a+e^-a)) * + (cos(b)(e^a+e^-a) - i*sin(b)(e^a-e^-a)) / + ((cos(b)(e^a+e^-a))^2 + (sin(b)(e^a-e^-a))^2) + =(cos(b)^2(e^(2a)-e^(-2a)) + sin(b)^2(e^(2a)-e^(-2a)) + + i*(cos(b)sin(b)(e^a+e^-a)^2 - cos(b)sin(b)(e^a-e^-a)^2)) / + ((cos(b)(e^a+e^-a))^2 + (sin(b)(e^a-e^-a))^2) + */ + auto a = EmitExtractReal(operand_value); + auto b = EmitExtractImag(operand_value); + TF_ASSIGN_OR_RETURN( + auto exp_a, EmitLibdeviceMathCall("__nv_exp", {a}, {component_type}, + component_type)); + TF_ASSIGN_OR_RETURN( + auto cos_b, EmitLibdeviceMathCall("__nv_cos", {b}, {component_type}, + component_type)); + TF_ASSIGN_OR_RETURN( + auto sin_b, EmitLibdeviceMathCall("__nv_sin", {b}, {component_type}, + component_type)); + auto exp_neg_a = ir_builder_->CreateFDiv( + llvm::ConstantFP::get(exp_a->getType(), 1), exp_a); + auto exp_2a_minus_exp_neg_2a = ir_builder_->CreateFSub( + ir_builder_->CreateFMul(exp_a, exp_a), + ir_builder_->CreateFMul(exp_neg_a, exp_neg_a)); + auto cos_b_sq = ir_builder_->CreateFMul(cos_b, cos_b); + auto sin_b_sq = ir_builder_->CreateFMul(sin_b, sin_b); + auto real_num = ir_builder_->CreateFAdd( + ir_builder_->CreateFMul(cos_b_sq, exp_2a_minus_exp_neg_2a), + ir_builder_->CreateFMul(sin_b_sq, exp_2a_minus_exp_neg_2a)); + auto cos_b_sin_b = ir_builder_->CreateFMul(cos_b, sin_b); + auto exp_a_plus_exp_neg_a = ir_builder_->CreateFAdd(exp_a, exp_neg_a); + auto exp_a_plus_exp_neg_a_sq = + ir_builder_->CreateFMul(exp_a_plus_exp_neg_a, exp_a_plus_exp_neg_a); + auto exp_a_minus_exp_neg_a = ir_builder_->CreateFSub(exp_a, exp_neg_a); + auto exp_a_minus_exp_neg_a_sq = + ir_builder_->CreateFMul(exp_a_minus_exp_neg_a, exp_a_minus_exp_neg_a); + auto imag_num = ir_builder_->CreateFMul( + cos_b_sin_b, ir_builder_->CreateFSub(exp_a_plus_exp_neg_a_sq, + exp_a_minus_exp_neg_a_sq)); + auto denom = ir_builder_->CreateFAdd( + ir_builder_->CreateFMul(cos_b_sq, exp_a_plus_exp_neg_a_sq), + ir_builder_->CreateFMul(sin_b_sq, exp_a_minus_exp_neg_a_sq)); + return EmitComposeComplex(op, ir_builder_->CreateFDiv(real_num, denom), + ir_builder_->CreateFDiv(imag_num, denom)); + } default: return ElementalIrEmitter::EmitComplexUnaryOp(op, operand_value); } diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h index 3defa1b696..6a537d0152 100644 --- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h +++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h @@ -61,6 +61,10 @@ class GpuElementalIrEmitter : public ElementalIrEmitter { const HloInstruction* op, llvm::Value* lhs_value, llvm::Value* rhs_value) const override; + StatusOr EmitComplexBinaryOp( + const HloInstruction* op, llvm::Value* lhs_value, + llvm::Value* rhs_value) const override; + StatusOr EmitErfcInv(PrimitiveType prim_type, llvm::Value* value) const override; diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc index 9d55c7859d..af2a92e11e 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc @@ -293,29 +293,30 @@ Status IrEmitter::EmitAtomicOperationForNestedComputation( computation, {old_output_location, source_address}, new_output_location)); // (old_output, success) = atomicCAS(output_address, old_output, new_output); - llvm::Type* element_int_ir_type = - ir_builder_.getIntNTy(element_ir_type->getScalarSizeInBits()); - // cmpxchg accetps integer only, so we bitcast the operands (old_output and - // new_output) to integers of the same bit width, and bitcast the result - // back to the original element type. - llvm::Value* old_output = - ir_builder_.CreateLoad(old_output_location, "old_output"); - llvm::Value* new_output = - ir_builder_.CreateLoad(new_output_location, "new_output"); + int num_bits = llvm_ir::GetSizeInBits(element_ir_type); + llvm::Type* element_int_ir_type = ir_builder_.getIntNTy(num_bits); + // cmpxchg accepts integer only, and bitcast refuses to operate on aggregate + // types, so we bitcast load and store addresses to intN* of the same bit + // width. + llvm::Value* old_output = ir_builder_.CreateLoad( + ir_builder_.CreateBitCast(old_output_location, + element_int_ir_type->getPointerTo()), + "old_output"); + llvm::Value* new_output = ir_builder_.CreateLoad( + ir_builder_.CreateBitCast(new_output_location, + element_int_ir_type->getPointerTo()), + "new_output"); llvm::Value* ret_value = ir_builder_.CreateAtomicCmpXchg( ir_builder_.CreateBitCast(output_address, element_int_ir_type->getPointerTo()), - ir_builder_.CreateBitCast(old_output, element_int_ir_type), - ir_builder_.CreateBitCast(new_output, element_int_ir_type), - llvm::AtomicOrdering::SequentiallyConsistent, + old_output, new_output, llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering::SequentiallyConsistent); // cmpxchg returns a pair. The first element is the original value at // output_address and the second element is whether the swap is successful. ir_builder_.CreateStore( - ir_builder_.CreateBitCast( - ir_builder_.CreateExtractValue(ret_value, 0, "old_output"), - element_ir_type), - old_output_location); + ir_builder_.CreateExtractValue(ret_value, 0, "old_output"), + ir_builder_.CreateBitCast(old_output_location, + element_int_ir_type->getPointerTo())); ir_builder_.CreateCondBr( ir_builder_.CreateExtractValue(ret_value, 1, "success"), loop_exit_bb, loop_body_bb); diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 7b4662fc80..db78f4b84d 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -1081,16 +1081,25 @@ Status IrEmitterUnnested::EmitRowReduction( // from the warp. llvm_ir::SetToFirstInsertPoint(if_tile_in_bounds_data.after_block, &ir_builder_); + int bit_width = llvm_ir::GetSizeInBits(element_ir_type); + // bitcast cannot be applied to aggregate types (even packed ones), so we + // instead bitcast addresses of load/store to intN* of the same bit-width. + llvm::Type* shuffle_ir_type = element_ir_type->isStructTy() + ? ir_builder_.getIntNTy(bit_width) + : element_ir_type; for (int shuffle_distance = 16; shuffle_distance >= 1; shuffle_distance /= 2) { llvm::Value* partial_reduction_result = ir_builder_.CreateLoad( - partial_reduction_result_address, "partial_reduction_result"); + ir_builder_.CreateBitCast(partial_reduction_result_address, + shuffle_ir_type->getPointerTo()), + "partial_reduction_result"); llvm::Value* result_from_other_lane = ir_builder_.CreateAlloca( element_ir_type, nullptr, "result_from_other_lane"); ir_builder_.CreateStore( EmitShuffleDown(partial_reduction_result, ir_builder_.getInt32(shuffle_distance), &ir_builder_), - result_from_other_lane); + ir_builder_.CreateBitCast(result_from_other_lane, + shuffle_ir_type->getPointerTo())); TF_RETURN_IF_ERROR(EmitCallToNestedComputation( *reducer, {partial_reduction_result_address, result_from_other_lane}, partial_reduction_result_address)); diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc index d95409e399..086c8dae9e 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc @@ -163,8 +163,9 @@ llvm::Type* PrimitiveTypeToIrType(PrimitiveType element_type, // z, and reinterpret_cast(z)[1] shall designate the // imaginary part of z. return llvm::StructType::create( - "complex64", llvm::Type::getFloatTy(module->getContext()), - llvm::Type::getFloatTy(module->getContext())); + {llvm::Type::getFloatTy(module->getContext()), + llvm::Type::getFloatTy(module->getContext())}, + "complex64", /*isPacked=*/true); } return cplx_t; } @@ -178,6 +179,21 @@ llvm::Type* PrimitiveTypeToIrType(PrimitiveType element_type, } } +int GetSizeInBits(llvm::Type* type) { + const llvm::StructType* struct_ty = llvm::dyn_cast(type); + if (struct_ty) { + CHECK(struct_ty->isPacked()); + int bits = 0; + for (auto element_type : struct_ty->elements()) { + bits += GetSizeInBits(element_type); + } + return bits; + } + int bits = type->getPrimitiveSizeInBits(); + CHECK_GT(bits, 0) << "type is not sized"; + return bits; +} + llvm::Type* ShapeToIrType(const Shape& shape, llvm::Module* module) { llvm::Type* result_type = PrimitiveTypeToIrType(shape.element_type(), module); if (ShapeUtil::IsTuple(shape)) { diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h index f70d9f88b3..063ead2b64 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h @@ -129,6 +129,9 @@ llvm::Value* EmitBufferIndexingGEP(llvm::Value* array, int64 index, llvm::Type* PrimitiveTypeToIrType(PrimitiveType element_type, llvm::Module* module); +// Returns the type size in bits. If "type" is a struct, it must be packed. +int GetSizeInBits(llvm::Type* type); + // Returns the LLVM type which represents the given XLA shape. For example, // if "shape" is [5 x [10 x f32]], the function returns [5 x [10 x float]]. llvm::Type* ShapeToIrType(const Shape& shape, llvm::Module* module); diff --git a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc index 0b700fbb6f..c6e8b24d12 100644 --- a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc +++ b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc @@ -82,6 +82,25 @@ XLA_TEST_F(ArrayElementwiseOpTest, NegConstantS32) { {}); } +XLA_TEST_F(ArrayElementwiseOpTest, NegConstantZeroElementC64) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1({}); + auto result = builder.Neg(a); + + ComputeAndCompareR1(&builder, {}, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, NegConstantC64) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1( + {{-2.5f, 1.0f}, {0.0f, 3.14f}, {2.25f, -1.0f}, {-10.0f, 0.0f}}); + auto result = builder.Neg(a); + + ComputeAndCompareR1( + &builder, {{2.5f, -1.0f}, {0.0f, -3.14f}, {-2.25f, 1.0f}, {10.0f, 0.0f}}, + {}, error_spec_); +} + XLA_TEST_F(ArrayElementwiseOpTest, IsFiniteZeroElementF32s) { ComputationBuilder builder(client_, TestName()); auto a = builder.ConstantR1({}); @@ -145,6 +164,28 @@ XLA_TEST_F(ArrayElementwiseOpTest, AddTwoConstantZeroElementF32s) { ComputeAndCompareR1(&builder, {}, {}, error_spec_); } +XLA_TEST_F(ArrayElementwiseOpTest, AddTwoConstantC64s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1( + {{-2.5f, 0.0f}, {0.0f, 3.14f}, {2.25f, 0.0f}, {1.0f, -10.0f}}); + auto b = builder.ConstantR1( + {{100.0f, 0.0f}, {3.13f, 0.0f}, {2.75f, 1.0f}, {-2.0f, 10.5f}}); + auto add = builder.Add(a, b); + + ComputeAndCompareR1( + &builder, {97.5f, {3.13f, 3.14f}, {5.0f, 1.0f}, {-1.0f, 0.5f}}, {}, + error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, AddTwoConstantZeroElementC64s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1({}); + auto b = builder.ConstantR1({}); + auto add = builder.Add(a, b); + + ComputeAndCompareR1(&builder, {}, {}, error_spec_); +} + TEST_P(ArrayElementwiseOpTestParamCount, AddManyValues) { const int count = GetParam(); ComputationBuilder builder(client_, TestName()); @@ -222,6 +263,28 @@ XLA_TEST_F(ArrayElementwiseOpTest, SubTwoConstantZeroElementS32s) { ComputeAndCompareR1(&builder, {}, {}); } +XLA_TEST_F(ArrayElementwiseOpTest, SubTwoConstantC64s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1( + {{-2.5f, 0.0f}, {0.0f, 3.14f}, {3.0f, 2.25f}}); + auto b = builder.ConstantR1( + {{0.0f, 10.0f}, {3.13f, 0.0f}, {2.75f, -0.25f}}); + auto add = builder.Sub(a, b); + + ComputeAndCompareR1( + &builder, {{-2.5f, -10.0f}, {-3.13f, 3.14f}, {0.25f, 2.5f}}, {}, + error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, SubTwoConstantZeroElementC64s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1({}); + auto b = builder.ConstantR1({}); + auto add = builder.Sub(a, b); + + ComputeAndCompareR1(&builder, {}, {}, error_spec_); +} + XLA_TEST_F(ArrayElementwiseOpTest, DivTwoConstantF32s) { ComputationBuilder builder(client_, TestName()); auto a = builder.ConstantR1({-2.5f, 25.5f, 2.25f, -10.0f, 6.0f}); @@ -385,6 +448,27 @@ XLA_TEST_F(ArrayElementwiseOpTest, DivU32s) { } } +XLA_TEST_F(ArrayElementwiseOpTest, DivTwoConstantC64s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1( + {{-2.5f, 1.0f}, {-25.5f, 0.0f}, {2.0f, -1.0f}}); + auto b = builder.ConstantR1( + {{10.0f, 0.0f}, {0.0f, 1.0f}, {2.0f, -1.0f}}); + auto div = builder.Div(a, b); + + ComputeAndCompareR1( + &builder, {{-0.25f, 0.1f}, {0.0f, 25.5f}, {1.0f, 0.0f}}, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, DivTwoConstantZeroElementC64s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1({}); + auto b = builder.ConstantR1({}); + auto div = builder.Div(a, b); + + ComputeAndCompareR1(&builder, {}, {}, error_spec_); +} + XLA_TEST_F(ArrayElementwiseOpTest, RemF32s) { ComputationBuilder builder(client_, TestName()); auto a = builder.ConstantR1( @@ -496,6 +580,28 @@ XLA_TEST_F(ArrayElementwiseOpTest, MulTwoConstantU32s) { ComputeAndCompareR1(&builder, expected, {}); } +XLA_TEST_F(ArrayElementwiseOpTest, MulTwoConstantC64s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1( + {{-2.5f, 0.0f}, {0.0f, 25.5f}, {2.0f, -10.0f}}); + auto b = builder.ConstantR1( + {{0.0f, 10.0f}, {5.0f, 1.0f}, {10.0f, -6.0f}}); + auto add = builder.Mul(a, b); + + ComputeAndCompareR1( + &builder, {{0.0f, -25.0f}, {-25.5f, 127.5f}, {-40.0f, -112.0}}, {}, + error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, MulTwoConstantZeroElementC64s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1({}); + auto b = builder.ConstantR1({}); + auto add = builder.Mul(a, b); + + ComputeAndCompareR1(&builder, {}, {}, error_spec_); +} + XLA_TEST_F(ArrayElementwiseOpTest, AndPredR1) { ComputationBuilder builder(client_, TestName()); auto a = builder.ConstantR1({false, false, true, true}); @@ -886,6 +992,53 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareEqZeroElementS32s) { ComputeAndCompareR1(&builder, {}, {}); } +XLA_TEST_F(ArrayElementwiseOpTest, CompareEqC64s) { + SetFastMathDisabled(true); + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1({{-2.5f, 10.0f}, + {1.0f, 25.5f}, + {2.25f, -3.0f}, + {NAN, 0.0f}, + {1.0f, 6.0f}}); + auto rhs = builder.ConstantR1({{0.0f, 10.0f}, + {1.0f, 5.0f}, + {2.25f, -3.0f}, + {10.0f, 0.0f}, + {1.0f, NAN}}); + auto compare = builder.Eq(lhs, rhs); + + ComputeAndCompareR1(&builder, {false, false, true, false, false}, {}); +} + +XLA_TEST_F(ArrayElementwiseOpTest, CompareEqZeroElementC64s) { + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1({}); + auto rhs = builder.ConstantR1({}); + auto compare = builder.Eq(lhs, rhs); + + ComputeAndCompareR1(&builder, {}, {}); +} + +XLA_TEST_F(ArrayElementwiseOpTest, CompareNeC64s) { + // Disable fast-math because we're operating on NaNs. + SetFastMathDisabled(true); + + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1({{-2.5f, 10.0f}, + {1.0f, 25.5f}, + {2.25f, -3.0f}, + {NAN, 0.0f}, + {1.0f, 6.0f}}); + auto rhs = builder.ConstantR1({{0.0f, 10.0f}, + {1.0f, 5.0f}, + {2.25f, -3.0f}, + {10.0f, 0.0f}, + {1.0f, NAN}}); + auto compare = builder.Ne(lhs, rhs); + + ComputeAndCompareR1(&builder, {true, true, false, true, true}, {}); +} + XLA_TEST_F(ArrayElementwiseOpTest, CompareNeF32s) { // Disable fast-math because we're operating on NaNs. SetFastMathDisabled(true); diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h index b578667735..1dc274c591 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.h +++ b/tensorflow/compiler/xla/tests/client_library_test_base.h @@ -332,8 +332,9 @@ void ClientLibraryTestBase::ComputeAndCompareR0( ComputationBuilder* builder, NativeT expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { static_assert(std::is_same::value || - std::is_same::value, - "Floating point type required when specifying an ErrorSpec"); + std::is_same::value || + std::is_same::value, + "Float or complex type required when specifying an ErrorSpec"); std::unique_ptr expected_literal = Literal::CreateR0(expected); ClientLibraryTestBase::ComputeAndCompareLiteral(builder, *expected_literal, @@ -355,8 +356,9 @@ void ClientLibraryTestBase::ComputeAndCompareR1( ComputationBuilder* builder, tensorflow::gtl::ArraySlice expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { static_assert(std::is_same::value || - std::is_same::value, - "Floating point type required when specifying an ErrorSpec"); + std::is_same::value || + std::is_same::value, + "Float or complex type required when specifying an ErrorSpec"); std::unique_ptr expected_literal = Literal::CreateR1(expected); ClientLibraryTestBase::ComputeAndCompareLiteral(builder, *expected_literal, diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index b72dd2707c..bfb04fd9f9 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -386,35 +386,39 @@ void DotOperationTest::TestNonsquareMatrixDot(bool lhs_row_major, } XLA_TEST_F(DotOperationTest, NonsquareMatrixDotF32MajorToMinorFF) { - constexpr bool kLhsRowMajor = false; - constexpr bool kRhsRowMajor = false; - TestNonsquareMatrixDot(kLhsRowMajor, kRhsRowMajor); + TestNonsquareMatrixDot(false, false); } XLA_TEST_F(DotOperationTest, NonsquareMatrixDotF32MajorToMinorFT) { - constexpr bool kLhsRowMajor = false; - constexpr bool kRhsRowMajor = true; - TestNonsquareMatrixDot(kLhsRowMajor, kRhsRowMajor); + TestNonsquareMatrixDot(false, true); } XLA_TEST_F(DotOperationTest, NonsquareMatrixDotF32MajorToMinorTF) { - constexpr bool kLhsRowMajor = true; - constexpr bool kRhsRowMajor = false; - TestNonsquareMatrixDot(kLhsRowMajor, kRhsRowMajor); + TestNonsquareMatrixDot(true, false); } XLA_TEST_F(DotOperationTest, NonsquareMatrixDotF32MajorToMinorTT) { - constexpr bool kLhsRowMajor = true; - constexpr bool kRhsRowMajor = true; - TestNonsquareMatrixDot(kLhsRowMajor, kRhsRowMajor); + TestNonsquareMatrixDot(true, true); } XLA_TEST_F(DotOperationTest, NonsquareMatrixDotF64) { TestNonsquareMatrixDot(); } -XLA_TEST_F(DotOperationTest, NonsquareMatrixDotC64) { - TestNonsquareMatrixDot(); +XLA_TEST_F(DotOperationTest, NonsquareMatrixDotC64MajorToMinorFF) { + TestNonsquareMatrixDot(false, false); +} + +XLA_TEST_F(DotOperationTest, NonsquareMatrixDotC64MajorToMinorFT) { + TestNonsquareMatrixDot(false, true); +} + +XLA_TEST_F(DotOperationTest, NonsquareMatrixDotC64MajorToMinorTF) { + TestNonsquareMatrixDot(true, false); +} + +XLA_TEST_F(DotOperationTest, NonsquareMatrixDotC64MajorToMinorTT) { + TestNonsquareMatrixDot(true, true); } XLA_TEST_F(DotOperationTest, MatrixVectorC64) { -- GitLab From 3db96abfc5432c190d3afa62ebfad3c1d82cd818 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 11:37:37 -0800 Subject: [PATCH 0349/1801] Allow assigning colors based on HLO sharding information, when generating Graphviz HLO graphs via a new --xla_hlo_graph_sharding_color option. When generating TF graphs, a new --xla_hlo_tfgraph_device_scopes option allows to prefix the instructions names with a device scope. This help the TF graph viewer to better isolate the parts of the graph which are targeted to different devices, and allow rendering of graphs which would not be able to due to size. Changed TF/XLA broadcast lowering to propagate the request metadata into the HLO broadcast instructions. PiperOrigin-RevId: 175563052 --- .../xla/legacy_flags/debug_options_flags.cc | 16 +- tensorflow/compiler/xla/service/BUILD | 1 + .../compiler/xla/service/hlo_graph_dumper.cc | 201 ++++++++++-------- .../compiler/xla/service/hlo_graph_dumper.h | 5 +- .../xla/service/hlo_graph_dumper_test.cc | 2 +- .../xla/service/hlo_tfgraph_builder.cc | 29 ++- .../xla/service/hlo_tfgraph_builder.h | 7 +- .../compiler/xla/service/user_computation.cc | 3 + tensorflow/compiler/xla/xla.proto | 8 + 9 files changed, 169 insertions(+), 103 deletions(-) diff --git a/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc b/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc index f2cdd9669c..bfafef0a40 100644 --- a/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc +++ b/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc @@ -31,7 +31,6 @@ std::vector* flag_objects; std::once_flag flags_init; void SetDebugOptionsDefaults(DebugOptions* flags) { - flags->set_xla_hlo_graph_path("/tmp/"); flags->set_xla_enable_fast_math(true); flags->set_xla_llvm_enable_alias_scope_metadata(true); flags->set_xla_llvm_enable_noalias_metadata(true); @@ -117,9 +116,22 @@ void AllocateFlags() { bool_setter_for(&DebugOptions::set_xla_hlo_dump_as_graphdef), flag_values->xla_hlo_dump_as_graphdef(), "Dump HLO graphs as TensorFlow GraphDefs."), + tensorflow::Flag( + "xla_hlo_graph_sharding_color", + bool_setter_for(&DebugOptions::set_xla_hlo_graph_sharding_color), + flag_values->xla_hlo_graph_sharding_color(), + "Assign colors based on sharding assignments when generating the " + "HLO graphs."), + tensorflow::Flag( + "xla_hlo_tfgraph_device_scopes", + bool_setter_for(&DebugOptions::set_xla_hlo_tfgraph_device_scopes), + flag_values->xla_hlo_tfgraph_device_scopes(), + "When generating TensorFlow HLO graphs, if the HLO instructions " + "are assigned to a specific device, prefix the name scope with " + "\"devX\" with X being the device ordinal."), tensorflow::Flag( "xla_log_hlo_text", flag_values->mutable_xla_log_hlo_text(), - "HLO modules matching this regex will be dumped to LOG(INFO). "), + "HLO modules matching this regex will be dumped to LOG(INFO)."), tensorflow::Flag( "xla_generate_hlo_text_to", flag_values->mutable_xla_generate_hlo_text_to(), diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 7cf24641b5..c163a5f837 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1985,6 +1985,7 @@ cc_library( ":hlo", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:xla_proto", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 04b3059fb1..e4c89cd8c1 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -312,11 +312,11 @@ optional MatchTrivialComputation(const HloComputation* computation) { class HloDotDumper { public: HloDotDumper(const HloComputation* computation, tensorflow::StringPiece label, - bool show_addresses, bool show_metadata, + const DebugOptions& debug_options, bool show_metadata, const HloExecutionProfile* profile, NodeFilter filter) : computation_(computation), label_(label.ToString()), - show_addresses_(show_addresses), + debug_options_(debug_options), show_metadata_(show_metadata), profile_(profile), filter_(std::move(filter)) {} @@ -382,7 +382,7 @@ class HloDotDumper { const HloComputation* computation_; // never null const string label_; // overall name for the graph - const bool show_addresses_; + const DebugOptions& debug_options_; const bool show_metadata_; const HloExecutionProfile* profile_; // may be null const NodeFilter filter_; @@ -414,6 +414,11 @@ class HloDotDumper { // appears before both the inner computation and the destination node are // defined. std::vector edges_; + + // When coloring by sharding information, we track the sharding string + // representation to color association, by round-robin the color schemes. + std::unordered_map sharding_colors_; + int64 next_shard_color_ = 0; }; string HloDotDumper::Dump() { @@ -734,15 +739,16 @@ string HloDotDumper::DumpInstruction(const HloInstruction* instr) { string trivial_subcomputation = GetInstructionTrivialComputationStr(instr); AddInstructionIncomingEdges(instr); - // Override the node's styling if it should be (de-)emphasized. - if (filter_.Deemphasized(instr)) { - color = kDashedBorder; - } - if (filter_.Highlight(instr)) { - node_shape = "diamond"; - color = kDarkRed; + if (!debug_options_.xla_hlo_graph_sharding_color()) { + // Override the node's styling if it should be (de-)emphasized. + if (filter_.Deemphasized(instr)) { + color = kDashedBorder; + } + if (filter_.Highlight(instr)) { + node_shape = "diamond"; + color = kDarkRed; + } } - // Build the text that will be displayed inside the node. string node_body = node_label; for (const string& s : @@ -827,6 +833,20 @@ string HloDotDumper::GetInstructionNodeInlinedOperands( } ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) { + if (debug_options_.xla_hlo_graph_sharding_color()) { + if (!instr->has_sharding()) { + return kDashedBorder; + } + string shard_str = instr->sharding().ToString(); + auto it = sharding_colors_.find(shard_str); + if (it != sharding_colors_.end()) { + return it->second; + } + ColorScheme color = static_cast( + kBlue + (next_shard_color_++ % (kDashedBorder - kBlue))); + sharding_colors_.emplace(shard_str, color); + return color; + } const auto kParameterColor = kOrange; // Special case: If this instruction has a parameter merged into it, paint it @@ -1079,8 +1099,7 @@ string HloDotDumper::GetInstructionNodeExtraInfo(const HloInstruction* instr) { } lines.push_back(instr_shape); } - - if (show_addresses_) { + if (debug_options_.xla_hlo_graph_addresses()) { lines.push_back(Printf("[%p]", instr)); } if (profile_ != nullptr) { @@ -1177,70 +1196,36 @@ const HloInstruction* HloDotDumper::GetNodeForEdge( return instr; } -tensorflow::mutex& RendererMutex() { - static tensorflow::mutex* mu = new tensorflow::mutex; - return *mu; -} +class GraphRendererRegistry { + public: + void AddRenderer(GraphRendererInterface* graph_renderer) { + tensorflow::mutex_lock lock(mu_); + graph_renderer_ = graph_renderer; + } -std::map* GraphRenderers() { - static auto* graph_renderers = new std::map(); - return graph_renderers; -} + GraphRendererInterface* GetDefaultRenderer() { + tensorflow::mutex_lock lock(mu_); + return graph_renderer_; + } -GraphRendererInterface* GetGraphRenderer() { - tensorflow::mutex_lock lock(RendererMutex()); - auto* graph_renderers = GraphRenderers(); - auto it = graph_renderers->rbegin(); - CHECK(it != graph_renderers->rend()) << "No registered graph dumpers"; - return it->second; -} + static GraphRendererRegistry* Default() { + static GraphRendererRegistry* registry = new GraphRendererRegistry(); + return registry; + } + + private: + tensorflow::mutex mu_; + GraphRendererInterface* graph_renderer_ = nullptr; +}; } // namespace -Registrar::Registrar(GraphRendererInterface* dumper, int priority) { - tensorflow::mutex_lock lock(RendererMutex()); - auto* graph_renderers = GraphRenderers(); - graph_renderers->emplace(priority, dumper); +Registrar::Registrar(GraphRendererInterface* dumper) { + GraphRendererRegistry::Default()->AddRenderer(dumper); } namespace { -class FileGraphRenderer : public GraphRendererInterface { - public: - string RenderGraph(const string& graph, GraphKind graph_kind, - const DebugOptions& debug_options) override { - static std::atomic output_num(0); - string file_extension; - switch (graph_kind) { - case DOT_GRAPH: - file_extension = ".dot"; - break; - case TF_GRAPHDEF: - file_extension = ".pbtxt"; - break; - } - string path = - JoinPath(debug_options.xla_hlo_graph_path(), - StrCat("hlo_graph_", output_num++, ".XXXXXX", file_extension)); - auto status = Status::OK(); - int fd = mkstemps(&path[0], file_extension.length()); - if (fd < 0) { - status = - Status(tensorflow::error::Code::UNKNOWN, - StrCat("Failed to create temporary file to dump HLO graph: ", - strerror(errno))); - } else { - status = tensorflow::WriteStringToFile(tensorflow::Env::Default(), path, - graph); - close(fd); - } - if (!status.ok()) { - LOG(WARNING) << "Saving HLO graph failed: " << status; - } - return path; - } -}; - // Gets a NodeFilter that includes roughly all instructions whose distance from // root is <= radius. NodeFilter MakeNodeFilter(const HloInstruction* root, int64 radius) { @@ -1350,7 +1335,54 @@ NodeFilter MakeNodeFilter(const HloInstruction* root, int64 radius) { }); } -XLA_REGISTER_GRAPH_RENDERER(FileGraphRenderer, 0); +string SaveGraph(const string& graph, + GraphRendererInterface::GraphKind graph_kind, + const string& dest_path) { + static std::atomic output_num(0); + string file_extension; + switch (graph_kind) { + case GraphRendererInterface::DOT_GRAPH: + file_extension = ".dot"; + break; + case GraphRendererInterface::TF_GRAPHDEF: + file_extension = ".pbtxt"; + break; + } + string path = JoinPath( + dest_path, StrCat("hlo_graph_", output_num++, ".XXXXXX", file_extension)); + auto status = Status::OK(); + int fd = mkstemps(&path[0], file_extension.length()); + if (fd < 0) { + status = + Status(tensorflow::error::Code::UNKNOWN, + StrCat("Failed to create temporary file to dump HLO graph: ", + strerror(errno))); + } else { + status = + tensorflow::WriteStringToFile(tensorflow::Env::Default(), path, graph); + close(fd); + } + if (!status.ok()) { + LOG(WARNING) << "Saving HLO graph failed: " << status; + } + return path; +} + +string ExportGraph(const string& graph, + GraphRendererInterface::GraphKind graph_kind, + const DebugOptions& debug_options) { + string path = debug_options.xla_hlo_graph_path(); + if (!path.empty()) { + return SaveGraph(graph, graph_kind, path); + } else { + auto graph_renderer = + GraphRendererRegistry::Default()->GetDefaultRenderer(); + CHECK(graph_renderer != nullptr) + << "No registered renderer for the HLO graph. " + "Use --xla_hlo_graph_path=PATH to export to local file system"; + return graph_renderer->RenderGraph(graph, graph_kind, debug_options); + } +} } // namespace @@ -1358,27 +1390,22 @@ string DumpGraph(const HloComputation& computation, const string& label, const DebugOptions& debug_options, const HloExecutionProfile* hlo_execution_profile, bool show_metadata) { + GraphRendererInterface::GraphKind graph_kind; string graph; - string graph_url; if (debug_options.xla_hlo_dump_as_graphdef()) { - HloTfGraphBuilder builder; + HloTfGraphBuilder builder(debug_options); TF_CHECK_OK(builder.AddComputation(computation)); CHECK(tensorflow::protobuf::TextFormat::PrintToString(builder.GetGraphDef(), &graph)); - // TODO(b/37198616): Use the default registered renderers when all - // renderers support rendering GraphDefs. Always dump GraphDefs to files - // for now. - graph_url = FileGraphRenderer().RenderGraph( - graph, GraphRendererInterface::TF_GRAPHDEF, debug_options); + graph_kind = GraphRendererInterface::TF_GRAPHDEF; } else { - graph = - HloDotDumper(&computation, label, - /*show_addresses=*/debug_options.xla_hlo_graph_addresses(), - show_metadata, hlo_execution_profile, NodeFilter()) - .Dump(); - graph_url = GetGraphRenderer()->RenderGraph( - graph, GraphRendererInterface::DOT_GRAPH, debug_options); + graph = HloDotDumper(&computation, label, debug_options, show_metadata, + hlo_execution_profile, NodeFilter()) + .Dump(); + graph_kind = GraphRendererInterface::DOT_GRAPH; } + + string graph_url = ExportGraph(graph, graph_kind, debug_options); LOG(INFO) << "computation " << computation.name() << " [" << label << "]: " << graph_url; return graph_url; @@ -1391,12 +1418,10 @@ string DumpNeighborhoodAround(const HloInstruction& node, int radius, StrCat("Neighborhood of ", radius, " nodes around ", node.name()); NodeFilter filter = MakeNodeFilter(&node, radius); string graph = - HloDotDumper(node.parent(), label, - /*show_addresses=*/debug_options.xla_hlo_graph_addresses(), - show_metadata, /*profile=*/nullptr, filter) + HloDotDumper(node.parent(), label, debug_options, show_metadata, + /*profile=*/nullptr, filter) .Dump(); - return GetGraphRenderer()->RenderGraph( - graph, GraphRendererInterface::DOT_GRAPH, debug_options); + return ExportGraph(graph, GraphRendererInterface::DOT_GRAPH, debug_options); } void DumpText(const HloModule& module, const string& label, diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.h b/tensorflow/compiler/xla/service/hlo_graph_dumper.h index dd304ec76c..2704aae1e3 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.h +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.h @@ -84,11 +84,10 @@ void DumpText(const HloModule& module, const string& label, // Internal implementation details below this point. -// Class that registers a graph renderer. Higher-priority renders are chosen -// first. +// Class that registers a graph renderer. class Registrar { public: - Registrar(GraphRendererInterface* dumper, int priority); + Registrar(GraphRendererInterface* dumper); }; #define XLA_INTERNAL_REGISTER_GRAPH_RENDERER(factory, ctr, ...) \ diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper_test.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper_test.cc index 7b0f937f38..8e1531c87f 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper_test.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper_test.cc @@ -45,7 +45,7 @@ class DotRenderer : public hlo_graph_dumper::GraphRendererInterface { string last_graph_; }; -XLA_REGISTER_GRAPH_RENDERER(DotRenderer, std::numeric_limits::max()); +XLA_REGISTER_GRAPH_RENDERER(DotRenderer); TEST(HloGraphDumperTest, NestedFusion) { HloComputation::Builder b("b"); diff --git a/tensorflow/compiler/xla/service/hlo_tfgraph_builder.cc b/tensorflow/compiler/xla/service/hlo_tfgraph_builder.cc index 06abe00747..101a710d1c 100644 --- a/tensorflow/compiler/xla/service/hlo_tfgraph_builder.cc +++ b/tensorflow/compiler/xla/service/hlo_tfgraph_builder.cc @@ -58,8 +58,6 @@ TensorShapeProto GetTensorShape(const HloInstruction* instruction) { string GetDeviceName(int device) { return StrCat("/device/XLA:", device); } -} // namespace - void CleanNodeName(string* name) { name->erase(std::remove(name->begin(), name->end(), '%'), name->end()); const string chars_to_replace = "<>[]"; @@ -70,6 +68,11 @@ void CleanNodeName(string* name) { std::replace_if(name->begin(), name->end(), pred, '_'); } +} // namespace + +HloTfGraphBuilder::HloTfGraphBuilder(const DebugOptions& debug_options) + : debug_options_(debug_options) {} + Status HloTfGraphBuilder::AddComputation(const HloComputation& computation) { VLOG(2) << "Adding computation " << computation.name(); for (auto embedded : computation.MakeEmbeddedComputationsList()) { @@ -90,24 +93,38 @@ const string& HloTfGraphBuilder::GetNodeNameForInstruction( if (ContainsKey(instruction_to_node_name_, instruction)) { return instruction_to_node_name_[instruction]; } + auto append = [](string* str, const string& other) { + if (str->empty()) { + *str = other; + } else if (!other.empty()) { + StrAppend(str, "/", other); + } + }; string node_name; + if (debug_options_.xla_hlo_tfgraph_device_scopes() && + instruction->has_sharding() && + instruction->sharding().HasUniqueDevice()) { + node_name = StrCat( + "dev", instruction->sharding().UniqueDevice().ConsumeValueOrDie()); + } // If an instruction is fused, put it in the subgraph of the fusion; // otherwise, put it in the computation subgraph. const HloComputation* computation = instruction->parent(); if (computation->IsFusionComputation()) { - node_name = GetNodeNameForInstruction(computation->FusionInstruction()); + append(&node_name, + GetNodeNameForInstruction(computation->FusionInstruction())); } else { - node_name = computation->name(); + append(&node_name, computation->name()); if (!instruction->metadata().op_name().empty()) { // Always make computations contain TF ops but not the other way around. - StrAppend(&node_name, "/", instruction->metadata().op_name()); + append(&node_name, instruction->metadata().op_name()); } } string instruction_name = instruction->name(); if (instruction->opcode() == HloOpcode::kParameter) { StrAppend(&instruction_name, ".", instruction->parameter_number()); } - StrAppend(&node_name, "/", instruction_name); + append(&node_name, instruction_name); CleanNodeName(&node_name); auto ret = instruction_to_node_name_.insert(std::make_pair(instruction, node_name)); diff --git a/tensorflow/compiler/xla/service/hlo_tfgraph_builder.h b/tensorflow/compiler/xla/service/hlo_tfgraph_builder.h index b2c578af91..9aa3e501d5 100644 --- a/tensorflow/compiler/xla/service/hlo_tfgraph_builder.h +++ b/tensorflow/compiler/xla/service/hlo_tfgraph_builder.h @@ -17,6 +17,7 @@ limitations under the License. #define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_HLO_TFGRAPH_BUILDER_H_ #include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/xla.pb.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/node_def.pb.h" @@ -26,6 +27,8 @@ namespace hlo_graph_dumper { // This constructs a tensorflow graph for HLO computations. class HloTfGraphBuilder { public: + HloTfGraphBuilder(const DebugOptions& debug_options = DebugOptions()); + // Adds a computation to the graph. Status AddComputation(const HloComputation& computation); @@ -42,6 +45,7 @@ class HloTfGraphBuilder { Status AddInstruction(const HloInstruction* instruction); + DebugOptions debug_options_; tensorflow::GraphDef graph_def_; // This records instructions that have been visited. std::unordered_set visited_instructions_; @@ -49,9 +53,6 @@ class HloTfGraphBuilder { std::unordered_map instruction_to_node_name_; }; -// Cleans the node name to make it a valid name in a tensorflow graph. -void CleanNodeName(string* name); - } // namespace hlo_graph_dumper } // namespace xla diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc index 8d5bb08e51..8f63c92e5b 100644 --- a/tensorflow/compiler/xla/service/user_computation.cc +++ b/tensorflow/compiler/xla/service/user_computation.cc @@ -2538,6 +2538,7 @@ HloInstruction* ComputationLowerer::ImplicitBroadcastToExplicitBroadcast( if (ShapeUtil::IsScalar(operand->shape())) { HloInstruction* broadcast = hlo_builder_.AddInstruction( HloInstruction::CreateBroadcast(broadcast_shape, operand, {})); + broadcast->set_metadata(operand->metadata()); if (operand->has_sharding()) { broadcast->set_sharding(operand->sharding()); } @@ -2558,6 +2559,7 @@ HloInstruction* ComputationLowerer::ImplicitBroadcastToExplicitBroadcast( ShapeUtil::MakeShape(operand->shape().element_type(), reshaped_dimensions), operand)); + reshaped_operand->set_metadata(operand->metadata()); if (operand->has_sharding()) { reshaped_operand->set_sharding(operand->sharding()); } @@ -2565,6 +2567,7 @@ HloInstruction* ComputationLowerer::ImplicitBroadcastToExplicitBroadcast( HloInstruction* broadcast = hlo_builder_.AddInstruction(HloInstruction::CreateBroadcast( broadcast_shape, reshaped_operand, broadcast_dimensions)); + broadcast->set_metadata(operand->metadata()); if (operand->has_sharding()) { broadcast->set_sharding(operand->sharding()); } diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto index 710bb6ff25..127e5e81ac 100644 --- a/tensorflow/compiler/xla/xla.proto +++ b/tensorflow/compiler/xla/xla.proto @@ -167,6 +167,14 @@ message DebugOptions { // computation will run 2! * 4! times. bool xla_test_all_input_layouts = 91; + // Assign colors based on sharding information when generating the Graphviz + // HLO graph. + bool xla_hlo_graph_sharding_color = 92; + + // Prefix the name scopes of the TF graph exports with "devX" device + // assignments, if available. + bool xla_hlo_tfgraph_device_scopes = 93; + // Extra options to pass to the compilation backend; specific interpretation // of these values is left to the backend. map xla_backend_extra_options = 500; -- GitLab From 73bc96ffc009283058c9d55b494745631a931814 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Mon, 13 Nov 2017 11:41:27 -0800 Subject: [PATCH 0350/1801] Implement __format__ for EagerTensor Allows the user to substitute EagerTensors into format strings. For example, print("{:.2f}".format(tf.constant(3.1415))) now works PiperOrigin-RevId: 175563766 --- tensorflow/python/eager/ops_test.py | 4 ++++ tensorflow/python/framework/ops.py | 3 +++ 2 files changed, 7 insertions(+) diff --git a/tensorflow/python/eager/ops_test.py b/tensorflow/python/eager/ops_test.py index e86073d6b2..51550c9f51 100644 --- a/tensorflow/python/eager/ops_test.py +++ b/tensorflow/python/eager/ops_test.py @@ -345,6 +345,10 @@ class OpsTest(test_util.TensorFlowTestCase): with self.assertRaises(TypeError): float(x) + def testFormatString(self): + x = constant_op.constant(3.1415) + self.assertEqual('3.14', '{:.2f}'.format(x)) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index aedd8d87d9..d2608845ac 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -646,6 +646,9 @@ class _EagerTensorBase(Tensor): def __array__(self): return np.array(self.numpy()) + def __format__(self, format_spec): + return self.numpy().__format__(format_spec) + def _numpy(self): raise NotImplementedError() -- GitLab From 0a79d4ab6ab961d7f36cbb3a14cfaff2152e70fd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 11:41:38 -0800 Subject: [PATCH 0351/1801] Moved tensorflow::StringPiece::Hasher out of tensorflow::StringPiece and renamed it tensorflow::StringPieceHasher. This allows tensorflow::StringPiece to be more easily replaced with absl::string_view (which does not contain a Hasher struct). PiperOrigin-RevId: 175563786 --- tensorflow/cc/framework/cc_op_gen.cc | 119 +++++++++++++++--- tensorflow/core/common_runtime/device_mgr.h | 2 +- .../core/common_runtime/direct_session.cc | 2 +- .../core/common_runtime/direct_session.h | 3 +- tensorflow/core/common_runtime/placer.cc | 4 +- .../common_runtime/step_stats_collector.cc | 4 +- .../distributed_runtime/master_session.cc | 4 +- .../core/framework/variant_op_registry.h | 9 +- tensorflow/core/graph/costmodel.h | 2 +- tensorflow/core/graph/graph_constructor.cc | 6 +- tensorflow/core/graph/quantize_training.cc | 4 +- tensorflow/core/graph/subgraph.h | 2 +- tensorflow/core/lib/core/stringpiece.cc | 2 +- tensorflow/core/lib/core/stringpiece.h | 8 +- .../graph_transforms/fold_constants_lib.cc | 4 +- 15 files changed, 128 insertions(+), 47 deletions(-) diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc index 38a17598b8..7a1b2a012d 100644 --- a/tensorflow/cc/framework/cc_op_gen.cc +++ b/tensorflow/cc/framework/cc_op_gen.cc @@ -297,7 +297,7 @@ string ToCamelCase(const string& str) { // argument to a function. std::pair AttrTypeName(StringPiece attr_type) { static const std::unordered_map, - StringPiece::Hasher> + StringPieceHasher> attr_type_map{ {"string", {"StringPiece", false}}, {"list(string)", {"gtl::ArraySlice", true}}, @@ -325,29 +325,112 @@ std::pair AttrTypeName(StringPiece attr_type) { } bool IsCPPKeyword(StringPiece name) { - static const std::unordered_set + static const std::unordered_set // Keywords obtained from http://en.cppreference.com/w/cpp/keyword kCPPReserved{ - "alignas", "alignof", "and", "and_eq", "asm", "atomic_cancel", - "atomic_commit", "atomic_noexcept", "auto", "bitand", "bitor", "bool", - "break", "case", "catch", "char", "char16_t", "char32_t", "class", - "compl", "concept", "const", "const_cast", "constexpr", "continue", - "decltype", "default", "delete", "do", "double", "dynamic_cast", - "else", "enum", "explicit", "export", "extern", "false", "final", - "float", "for", "friend", "goto", "if", "import", "inline", "int", - "long", "module", "mutable", "namespace", "new", "noexcept", "not", - "not_eq", "nullptr", "operator", "or", "or_eq", "override", "private", - "protected", "public", "register", "reinterpret_cast", "requires", - "return", "short", "signed", "sizeof", "static", "static_assert", - "static_cast", "struct", "switch", "synchronized", "template", "this", - "thread_local", "throw", "true", "try", "typedef", "typeid", - "typename", "union", "unsigned", "using", "virtual", "void", - "volatile", "wchar_t", "while", "xor", "xor_eq", + "alignas", + "alignof", + "and", + "and_eq", + "asm", + "atomic_cancel", + "atomic_commit", + "atomic_noexcept", + "auto", + "bitand", + "bitor", + "bool", + "break", + "case", + "catch", + "char", + "char16_t", + "char32_t", + "class", + "compl", + "concept", + "const", + "const_cast", + "constexpr", + "continue", + "decltype", + "default", + "delete", + "do", + "double", + "dynamic_cast", + "else", + "enum", + "explicit", + "export", + "extern", + "false", + "final", + "float", + "for", + "friend", + "goto", + "if", + "import", + "inline", + "int", + "long", + "module", + "mutable", + "namespace", + "new", + "noexcept", + "not", + "not_eq", + "nullptr", + "operator", + "or", + "or_eq", + "override", + "private", + "protected", + "public", + "register", + "reinterpret_cast", + "requires", + "return", + "short", + "signed", + "sizeof", + "static", + "static_assert", + "static_cast", + "struct", + "switch", + "synchronized", + "template", + "this", + "thread_local", + "throw", + "true", + "try", + "typedef", + "typeid", + "typename", + "union", + "unsigned", + "using", + "virtual", + "void", + "volatile", + "wchar_t", + "while", + "xor", + "xor_eq", // The following are not C++ keywords, but names of local variables // and parameters used in the op constructor. Treating them as // keywords, so that other parameter names don't conflict with these. - "builder", "node", "ret", "scope", "unique_name", + "builder", + "node", + "ret", + "scope", + "unique_name", }; return kCPPReserved.count(name) > 0; } diff --git a/tensorflow/core/common_runtime/device_mgr.h b/tensorflow/core/common_runtime/device_mgr.h index d16681ac59..cd93f76324 100644 --- a/tensorflow/core/common_runtime/device_mgr.h +++ b/tensorflow/core/common_runtime/device_mgr.h @@ -68,7 +68,7 @@ class DeviceMgr { StringPiece CopyToBackingStore(StringPiece s); - std::unordered_map device_map_; + std::unordered_map device_map_; core::Arena name_backing_store_; // Storage for keys in device_map_ std::unordered_map device_type_counts_; diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 316fb0ac16..d1dc826a6e 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -1135,7 +1135,7 @@ Status DirectSession::GetOrCreateExecutors( if (run_state_args->is_partial_run) { ek->graph = std::move(run_state_args->graph); - std::unordered_set names; + std::unordered_set names; for (const string& input : inputs) { TensorId id(ParseTensorName(input)); names.emplace(id.first); diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h index 7fbabf6d81..780d0b46a8 100644 --- a/tensorflow/core/common_runtime/direct_session.h +++ b/tensorflow/core/common_runtime/direct_session.h @@ -64,8 +64,7 @@ class DirectSession : public Session { ~DirectSession() override; typedef std::vector> NamedTensorList; - typedef std::unordered_map - NameNodeMap; + typedef std::unordered_map NameNodeMap; ::tensorflow::Status Create(const GraphDef& graph) override; ::tensorflow::Status Extend(const GraphDef& graph) override; diff --git a/tensorflow/core/common_runtime/placer.cc b/tensorflow/core/common_runtime/placer.cc index 73fdf60fd5..54f082e823 100644 --- a/tensorflow/core/common_runtime/placer.cc +++ b/tensorflow/core/common_runtime/placer.cc @@ -129,7 +129,7 @@ class ColocationGraph { // 'string' values stored in NodeDef attribute lists, as well as StringPiece // values that refer to 'string' values from NodeDef::name(), without // performing any string allocations. - std::unordered_map + std::unordered_map colocation_group_root; for (Node* node : graph_->nodes()) { @@ -171,7 +171,7 @@ class ColocationGraph { } Status ColocateNodeToGroup( - std::unordered_map* + std::unordered_map* colocation_group_root, Node* node, StringPiece colocation_group) { const Node*& root_node = (*colocation_group_root)[colocation_group]; diff --git a/tensorflow/core/common_runtime/step_stats_collector.cc b/tensorflow/core/common_runtime/step_stats_collector.cc index e6403df97f..ba8e555f36 100644 --- a/tensorflow/core/common_runtime/step_stats_collector.cc +++ b/tensorflow/core/common_runtime/step_stats_collector.cc @@ -139,7 +139,7 @@ void StepStatsCollector::BuildCostModel( const DeviceStepStats* hardware_stats; }; - std::unordered_map + std::unordered_map per_device_stats; std::unordered_map gpu_hardware_stats; @@ -179,7 +179,7 @@ void StepStatsCollector::BuildCostModel( CostModel* cm = cost_model_manager->FindOrCreateCostModel(graph); cm->IncrementUpdateTimes(); - std::unordered_map name_to_node; + std::unordered_map name_to_node; for (Node* n : graph->nodes()) { name_to_node.emplace(n->name(), n); } diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc index f7fce1d0ec..7617158d66 100644 --- a/tensorflow/core/distributed_runtime/master_session.cc +++ b/tensorflow/core/distributed_runtime/master_session.cc @@ -208,7 +208,7 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { const bool is_partial_; const DebugOptions& debug_opts_; WorkerCacheInterface* const worker_cache_; // Not owned. - std::unordered_map name_to_node_; + std::unordered_map name_to_node_; // Graph partitioned into per-location subgraphs. struct Part { @@ -486,7 +486,7 @@ Status MasterSession::ReffedClientGraph::RunPartitions( VLOG(2) << "RunPartitions step_id " << step_id << " execution_count " << execution_count; // Maps the names of fed tensors to their index in `req`. - std::unordered_map feeds(3); + std::unordered_map feeds(3); for (size_t i = 0; i < req.num_feeds(); ++i) { if (!feeds.insert({req.feed_name(i), i}).second) { diff --git a/tensorflow/core/framework/variant_op_registry.h b/tensorflow/core/framework/variant_op_registry.h index 831dbd3dff..13f6908cae 100644 --- a/tensorflow/core/framework/variant_op_registry.h +++ b/tensorflow/core/framework/variant_op_registry.h @@ -145,9 +145,8 @@ class UnaryVariantOpRegistry { static std::unordered_set* PersistentStringStorage(); private: - std::unordered_map - shape_fns; - std::unordered_map + std::unordered_map shape_fns; + std::unordered_map decode_fns; // Map std::pair to function. @@ -159,7 +158,7 @@ class UnaryVariantOpRegistry { ret = Hash64Combine(ret, sp_hasher_(std::get<1>(x))); return ret; } - StringPiece::Hasher sp_hasher_; + StringPieceHasher sp_hasher_; }; std::unordered_map, @@ -177,7 +176,7 @@ class UnaryVariantOpRegistry { ret = Hash64Combine(ret, sp_hasher_(std::get<2>(x))); return ret; } - StringPiece::Hasher sp_hasher_; + StringPieceHasher sp_hasher_; }; std::unordered_map, VariantUnaryOpFn, TupleHash> diff --git a/tensorflow/core/graph/costmodel.h b/tensorflow/core/graph/costmodel.h index a908a4843c..8afa4971ad 100644 --- a/tensorflow/core/graph/costmodel.h +++ b/tensorflow/core/graph/costmodel.h @@ -30,7 +30,7 @@ limitations under the License. #include "tensorflow/core/platform/protobuf.h" namespace tensorflow { -typedef std::unordered_map +typedef std::unordered_map NodeNameToCostIdMap; class StepStats; diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc index 2ee409768b..ebaffeb50e 100644 --- a/tensorflow/core/graph/graph_constructor.cc +++ b/tensorflow/core/graph/graph_constructor.cc @@ -241,13 +241,13 @@ class GraphConstructor { }; // TODO(vrv): Profile this data structure to see if we should use an // alternative implementation of std::unordered_map. - std::unordered_map gdef_nodes_; + std::unordered_map gdef_nodes_; // Mapping from node name to the existing node in g_. - std::unordered_map existing_nodes_; + std::unordered_map existing_nodes_; // Prefixes already used in the graph. - std::unordered_set existing_prefixes_; + std::unordered_set existing_prefixes_; // Imported node names that have been uniquified. The key is the original // name, the value is the new unique name. diff --git a/tensorflow/core/graph/quantize_training.cc b/tensorflow/core/graph/quantize_training.cc index d9cb55f448..cb0fc8a154 100644 --- a/tensorflow/core/graph/quantize_training.cc +++ b/tensorflow/core/graph/quantize_training.cc @@ -42,7 +42,7 @@ const float kEMADecay = 0.999; // Node types to rewrite. Insert quantize_and_dequantize op for their inputs. const auto* nodes_to_rewrite = - new std::unordered_set{"MatMul", "Conv2D"}; + new std::unordered_set{"MatMul", "Conv2D"}; // Contains necessary parameters to convert an edge. struct EdgeToConvert { @@ -563,7 +563,7 @@ Status ProcessTargetEdges(Graph* graph, const string& quant_op_type, const std::vector& target_edges) { // Remember previously converted ops to avoid duplicated conversion on the // same input. - std::unordered_map name_index; + std::unordered_map name_index; std::vector added_variables; for (const EdgeToConvert edge : target_edges) { Node* convert_node; diff --git a/tensorflow/core/graph/subgraph.h b/tensorflow/core/graph/subgraph.h index 8ccc27914b..3c1f8870f5 100644 --- a/tensorflow/core/graph/subgraph.h +++ b/tensorflow/core/graph/subgraph.h @@ -71,7 +71,7 @@ Status RewriteGraphForExecution( const DeviceAttributes& device_info, bool use_function_convention, RewriteGraphMetadata* out_metadata); -typedef std::unordered_map NameIndex; +typedef std::unordered_map NameIndex; // Augment "*g" by adding special "fetch" nodes that connect to the // tensor outputs specified in "fetch_outputs" to retrieve the output diff --git a/tensorflow/core/lib/core/stringpiece.cc b/tensorflow/core/lib/core/stringpiece.cc index 984f4404ce..29b727fc44 100644 --- a/tensorflow/core/lib/core/stringpiece.cc +++ b/tensorflow/core/lib/core/stringpiece.cc @@ -21,7 +21,7 @@ limitations under the License. namespace tensorflow { -size_t StringPiece::Hasher::operator()(StringPiece s) const { +size_t StringPieceHasher::operator()(StringPiece s) const { return Hash64(s.data(), s.size()); } diff --git a/tensorflow/core/lib/core/stringpiece.h b/tensorflow/core/lib/core/stringpiece.h index 94f4a377f1..53af116465 100644 --- a/tensorflow/core/lib/core/stringpiece.h +++ b/tensorflow/core/lib/core/stringpiece.h @@ -103,10 +103,6 @@ class StringPiece { StringPiece substr(size_t pos, size_t n = npos) const; - struct Hasher { - size_t operator()(StringPiece arg) const; - }; - // Return a string that contains the copy of the referenced data. std::string ToString() const { return std::string(data_, size_); } @@ -133,6 +129,10 @@ class StringPiece { // Intentionally copyable }; +struct StringPieceHasher { + size_t operator()(StringPiece s) const; +}; + inline bool operator==(StringPiece x, StringPiece y) { return ((x.size() == y.size()) && (memcmp(x.data(), y.data(), x.size()) == 0)); diff --git a/tensorflow/tools/graph_transforms/fold_constants_lib.cc b/tensorflow/tools/graph_transforms/fold_constants_lib.cc index f2934a79bd..250f54e20f 100644 --- a/tensorflow/tools/graph_transforms/fold_constants_lib.cc +++ b/tensorflow/tools/graph_transforms/fold_constants_lib.cc @@ -39,9 +39,9 @@ limitations under the License. namespace tensorflow { namespace graph_transforms { namespace { -using StringPieceSet = std::unordered_set; +using StringPieceSet = std::unordered_set; template -using StringPieceMap = std::unordered_map; +using StringPieceMap = std::unordered_map; } // namespace Status ReplaceSendRecvs(const GraphDef& original_graph_def, -- GitLab From 6470669cc4cef9c8f23527121cf0e20b72d2bfa0 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 13 Nov 2017 11:47:50 -0800 Subject: [PATCH 0352/1801] Fix use-after-free bug. PiperOrigin-RevId: 175564747 --- tensorflow/python/eager/pywrap_tfe_src.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index c3685d6222..5cb1313c4b 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -680,7 +680,7 @@ void TFE_Py_TapeRecordOperation(PyObject* tape, PyObject* op_type, } } Py_DECREF(seq); - char* op_type_str = nullptr; + string op_type_str; if (PyBytes_Check(op_type)) { op_type_str = PyBytes_AsString(op_type); } else if (PyUnicode_Check(op_type)) { -- GitLab From bc4d2043b631b78365560fb893ac81e93935535e Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Mon, 13 Nov 2017 11:48:10 -0800 Subject: [PATCH 0353/1801] Add bfloat support to XLA. This is necessary in providing bfloat support in GPU backend. RELNOTES: bfloat support is now added to XLA infra. PiperOrigin-RevId: 175564791 --- tensorflow/compiler/tf2xla/type_util.cc | 3 + tensorflow/compiler/xla/BUILD | 1 + tensorflow/compiler/xla/literal_util.cc | 99 ++++++++++++++++++- tensorflow/compiler/xla/literal_util.h | 23 +++++ tensorflow/compiler/xla/literal_util_test.cc | 62 ++++++++++++ tensorflow/compiler/xla/primitive_util.cc | 8 +- tensorflow/compiler/xla/primitive_util.h | 7 ++ tensorflow/compiler/xla/service/backend.cc | 4 +- .../xla/service/cpu/cpu_runtime_test.cc | 4 +- .../compiler/xla/service/hlo_evaluator.cc | 4 + tensorflow/compiler/xla/service/hlo_runner.cc | 3 +- tensorflow/compiler/xla/shape_util.cc | 1 + .../compiler/xla/tests/literal_test_util.cc | 13 ++- .../xla/tests/local_client_test_base.cc | 3 +- tensorflow/compiler/xla/types.h | 3 + tensorflow/compiler/xla/xla_data.proto | 13 ++- tensorflow/core/framework/bfloat16_test.cc | 61 ++++++++++++ tensorflow/core/framework/numeric_types.h | 83 +++++++++++++++- 18 files changed, 374 insertions(+), 21 deletions(-) diff --git a/tensorflow/compiler/tf2xla/type_util.cc b/tensorflow/compiler/tf2xla/type_util.cc index 1efbe0ffb1..c969212a1b 100644 --- a/tensorflow/compiler/tf2xla/type_util.cc +++ b/tensorflow/compiler/tf2xla/type_util.cc @@ -49,6 +49,9 @@ Status DataTypeToPrimitiveType(DataType data_type, xla::PrimitiveType* type) { case tensorflow::DT_UINT64: *type = xla::U64; return Status::OK(); + case tensorflow::DT_BFLOAT16: + *type = xla::BF16; + return Status::OK(); case tensorflow::DT_HALF: *type = xla::F16; return Status::OK(); diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD index fa4d348ebd..515b572b0e 100644 --- a/tensorflow/compiler/xla/BUILD +++ b/tensorflow/compiler/xla/BUILD @@ -77,6 +77,7 @@ cc_library( hdrs = ["types.h"], visibility = [":friends"], deps = [ + "//tensorflow/core:framework_lite", "//tensorflow/core:lib", "//third_party/eigen3", ], diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index 0cb2223ae5..93d3cd425f 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -33,6 +33,20 @@ limitations under the License. #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" +namespace { +using tensorflow::int64; + +constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; + +// Converts between little and big endian, assuming elements in the array are 16 +// bits long. +void ConvertEndianShort(char* bytes, int64 size) { + CHECK_EQ(size / 2, 0); + for (int64 i = 0; i < size; i += 2) { + std::swap(bytes[i], bytes[i + 1]); + } +} +} // namespace namespace xla { @@ -169,6 +183,8 @@ Status Literal::Copy(const Literal& src_literal, return CopyRange(src_literal, src_base, dest_base, copy_size); case F16: return CopyRange(src_literal, src_base, dest_base, copy_size); + case BF16: + return CopyRange(src_literal, src_base, dest_base, copy_size); case F32: return CopyRange(src_literal, src_base, dest_base, copy_size); case F64: @@ -200,6 +216,8 @@ Status Literal::Copy(const Literal& src_literal, return *Literal::CreateR0(0); case F16: return *Literal::CreateR0(static_cast(0.0f)); + case BF16: + return *Literal::CreateR0(static_cast(0.0f)); case F32: return *Literal::CreateR0(0); case F64: @@ -285,6 +303,9 @@ Status Literal::Copy(const Literal& src_literal, case F16: return *Literal::CreateR0( static_cast(-std::numeric_limits::infinity())); + case BF16: + return *Literal::CreateR0( + static_cast(-std::numeric_limits::infinity())); case TUPLE: LOG(FATAL) << "tuple element type has no minimum value"; case OPAQUE: @@ -321,6 +342,9 @@ Status Literal::Copy(const Literal& src_literal, case F16: return *Literal::CreateR0( static_cast(std::numeric_limits::infinity())); + case BF16: + return *Literal::CreateR0( + static_cast(std::numeric_limits::infinity())); case TUPLE: LOG(FATAL) << "tuple element type has no maximum value"; case OPAQUE: @@ -428,6 +452,7 @@ std::unique_ptr Literal::Transpose( // The shape with affine layout resulting from that operation will be // F32[8,11]{0,1}, since it leaves the original most minor (the 8 sized), the // most minor. + // // Essentially, given MinMaj(Di) the position of the Di dimension within the // minor to major vector, and given T(Di) the index that the original Di // dimension has within the transposed array, a layout is affine if @@ -536,6 +561,9 @@ string Literal::GetAsString( } case F16: return tensorflow::strings::StrCat(Get(multi_index)); + case BF16: + return tensorflow::strings::StrCat( + static_cast(Get(multi_index))); default: return tensorflow::strings::StrCat( "[", PrimitiveType_Name(shape().element_type()), "]"); @@ -743,6 +771,8 @@ void* Literal::MutableInternalData() { return reinterpret_cast(c64s_.data()); case F16: return reinterpret_cast(f16s_.data()); + case BF16: + return reinterpret_cast(bf16s_.data()); default: LOG(FATAL) << "primitive type not supported in literals: " << PrimitiveType_Name(shape().element_type()); @@ -785,6 +815,9 @@ void Literal::Reserve(int64 num_elements) { case F16: Resize(num_elements, static_cast(0.0f)); break; + case BF16: + Resize(num_elements, static_cast(0.0f)); + break; default: LOG(FATAL) << "primitive type not supported in literals: " << PrimitiveType_Name(shape().element_type()); @@ -824,6 +857,9 @@ tensorflow::Status Literal::ValidateLiteral() const { case F16: actual = f16s().size() / sizeof(half); break; + case BF16: + actual = bf16s().size(); + break; default: return tensorflow::errors::Unimplemented( "unhandled element type for literal validation: " + @@ -920,6 +956,7 @@ StatusOr> ConvertIfDestTypeMatches( CONVERT_IF_TYPES_MATCH(F16) CONVERT_IF_TYPES_MATCH(F32) CONVERT_IF_TYPES_MATCH(F64) + CONVERT_IF_TYPES_MATCH(BF16) #undef CONVERT_IF_TYPES_MATCH case C64: return ConvertToC64(src_literal); @@ -949,8 +986,9 @@ StatusOr> Literal::Convert( CONVERT_IF_DEST_TYPE_MATCHES(F16) CONVERT_IF_DEST_TYPE_MATCHES(F32) CONVERT_IF_DEST_TYPE_MATCHES(F64) + CONVERT_IF_DEST_TYPE_MATCHES(BF16) #undef CONVERT_IF_DEST_TYPE_MATCHES - // Other types are not yet supported. + // Other types are not yet supported. default: return InvalidArgument("Unimplemented: Convert from type %s to type %s", PrimitiveType_Name(shape().element_type()).c_str(), @@ -1019,6 +1057,8 @@ bool Literal::operator==(const Literal& other) const { return EqualElements(*this, other, 0, &multi_index); case F16: return EqualElements(*this, other, 0, &multi_index); + case BF16: + return EqualElements(*this, other, 0, &multi_index); case C64: return EqualElements(*this, other, 0, &multi_index); default: @@ -1128,13 +1168,18 @@ tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice() { template <> tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice() { - // TODO - there is an endianess problem here. fix it, or wait for uint16 - // support in protobuf auto values = mutable_f16s(); return tensorflow::gtl::MutableArraySlice(values->data(), values->size()); } +template <> +tensorflow::gtl::MutableArraySlice +Literal::GetMutableArraySlice() { + auto values = mutable_bf16s(); + return {values->data(), values->size()}; +} + template <> tensorflow::gtl::ArraySlice Literal::GetArraySlice() const { CHECK_EQ(shape().element_type(), PRED); @@ -1205,6 +1250,12 @@ tensorflow::gtl::ArraySlice Literal::GetArraySlice() const { f16s().size() / sizeof(half)); } +template <> +tensorflow::gtl::ArraySlice Literal::GetArraySlice() const { + CHECK_EQ(shape().element_type(), BF16); + return {bf16s().data(), bf16s().size()}; +} + template <> tensorflow::gtl::ArraySlice Literal::GetArraySlice() const { @@ -1253,6 +1304,9 @@ bool Literal::IsAll(int8 value) const { return AllElementsEqualValue(*this, value); case F16: return AllElementsEqualValue(*this, static_cast(value)); + case BF16: + return AllElementsEqualValue(*this, + static_cast(value)); case PRED: if (value == 0) { return AllElementsEqualValue(*this, false); @@ -1274,6 +1328,9 @@ bool Literal::IsAllFloat(float value) const { return AllElementsEqualValue(*this, value); case F16: return AllElementsEqualValue(*this, static_cast(value)); + case BF16: + return AllElementsEqualValue(*this, + static_cast(value)); default: return false; } @@ -1310,6 +1367,8 @@ bool Literal::IsZero(tensorflow::gtl::ArraySlice indices) const { return Get(indices) == complex64(0.0f, 0.0f); case F16: return Get(indices) == static_cast(0.0f); + case BF16: + return Get(indices) == static_cast(0.0f); case PRED: return Get(indices) == false; default: @@ -1377,6 +1436,12 @@ void Literal::Resize(int64 num_elements, half value) { mutable_f16s()->resize(num_elements, value); } +template <> +void Literal::Resize(int64 num_elements, bfloat16 value) { + CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements); + mutable_bf16s()->resize(num_elements, value); +} + template <> void Literal::Resize(int64 num_elements, complex64 value) { CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements); @@ -1425,6 +1490,19 @@ LiteralProto Literal::ToProto() const { *proto.mutable_f16s() = string(reinterpret_cast(f16s_.data()), f16s_.size() * sizeof(half)); + if (!kLittleEndian) { + ConvertEndianShort(const_cast(proto.mutable_f16s()->data()), + proto.f16s().size()); + } + break; + case BF16: + *proto.mutable_bf16s() = + string(reinterpret_cast(bf16s_.data()), + bf16s_.size() * sizeof(bfloat16)); + if (!kLittleEndian) { + ConvertEndianShort(const_cast(proto.mutable_bf16s()->data()), + proto.bf16s().size()); + } break; case F32: CopyToRepeatedField(proto.mutable_f32s(), f32s()); @@ -1493,6 +1571,21 @@ void Literal::CopyFromProto(const LiteralProto& literal_proto) { CHECK_EQ(0, s.size() % sizeof(half)); f16s_ = std::vector(s.size() / sizeof(half)); memcpy(f16s_.data(), s.data(), s.size()); + + if (!kLittleEndian) { + ConvertEndianShort(reinterpret_cast(f16s_.data()), s.size()); + } + break; + } + case BF16: { + const string& s(literal_proto.bf16s()); + CHECK_EQ(0, s.size() % sizeof(bfloat16)); + bf16s_ = std::vector(s.size() / sizeof(bfloat16)); + memcpy(bf16s_.data(), s.data(), s.size()); + + if (!kLittleEndian) { + ConvertEndianShort(reinterpret_cast(bf16s_.data()), s.size()); + } break; } case F32: diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h index 667f926c46..f37e529caf 100644 --- a/tensorflow/compiler/xla/literal_util.h +++ b/tensorflow/compiler/xla/literal_util.h @@ -163,6 +163,11 @@ class Literal { const std::vector& c64s() const { return c64s_; } std::vector* mutable_c64s() { return &c64s_; } + int bf16s_size() const { return bf16s().size(); } + bfloat16 bf16s(int i) const { return bf16s_[i]; } + const std::vector& bf16s() const { return bf16s_; } + std::vector* mutable_bf16s() { return &bf16s_; } + int tuple_literals_size() const { return tuple_literals().size(); } const Literal& tuple_literals(int i) const { return tuple_literals_[i]; } Literal* add_tuple_literals() { @@ -622,6 +627,7 @@ class Literal { std::vector u16s_; std::vector u32s_; std::vector u64s_; + std::vector bf16s_; std::vector f16s_; std::vector f32s_; std::vector f64s_; @@ -674,6 +680,9 @@ tensorflow::gtl::ArraySlice Literal::GetArraySlice() const; template <> tensorflow::gtl::ArraySlice Literal::GetArraySlice() const; +template <> +tensorflow::gtl::ArraySlice Literal::GetArraySlice() const; + template <> tensorflow::gtl::ArraySlice Literal::GetArraySlice() const; @@ -714,6 +723,9 @@ tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice(); template <> tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice(); +template <> +tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice(); + template <> tensorflow::gtl::MutableArraySlice Literal::GetMutableArraySlice(); @@ -747,6 +759,9 @@ void Literal::Resize(int64 num_elements, double value); template <> void Literal::Resize(int64 num_elements, half value); +template <> +void Literal::Resize(int64 num_elements, bfloat16 value); + template <> void Literal::Resize(int64 num_elements, complex64 value); @@ -990,6 +1005,14 @@ inline half Literal::Get( return GetArraySlice()[linear_index]; } +template <> +inline bfloat16 Literal::Get( + tensorflow::gtl::ArraySlice multi_index) const { + CHECK(shape().element_type() == BF16); + int64 linear_index = LinearIndex(multi_index); + return GetArraySlice()[linear_index]; +} + template void Literal::Set(tensorflow::gtl::ArraySlice multi_index, NativeT value) { diff --git a/tensorflow/compiler/xla/literal_util_test.cc b/tensorflow/compiler/xla/literal_util_test.cc index 6d596da4ad..816bb3c549 100644 --- a/tensorflow/compiler/xla/literal_util_test.cc +++ b/tensorflow/compiler/xla/literal_util_test.cc @@ -110,6 +110,18 @@ TEST_F(LiteralUtilTest, LiteralScalarToString) { auto c64_lit = Literal::CreateR0({3.14f, 2.78f}); ASSERT_EQ("(3.14, 2.78)", c64_lit->ToString()); + + auto bf16_lit = Literal::CreateR0(static_cast(0.5f)); + ASSERT_EQ("0.5", bf16_lit->ToString()); + + // 3.14 will be truncated to 3.125 in bfloat16 format. + auto bf16_lit_truncated = + Literal::CreateR0(static_cast(3.14f)); + ASSERT_EQ("3.125", bf16_lit_truncated->ToString()); + + auto bf16_lit_truncated2 = + Literal::CreateR0(static_cast(9.001f)); + ASSERT_EQ("9", bf16_lit_truncated2->ToString()); } TEST_F(LiteralUtilTest, LiteralVectorToString) { @@ -397,6 +409,18 @@ TEST_F(LiteralUtilTest, IsAll) { EXPECT_FALSE(Literal::CreateR2({{h8}, {h9}})->IsAll(8)); EXPECT_FALSE(Literal::CreateR2({{h9}, {h8}})->IsAll(8)); + bfloat16 b8(8.0f); + bfloat16 b9(9.0f); + + EXPECT_TRUE(Literal::CreateR2({{b8}, {b8}})->IsAll(8)); + EXPECT_FALSE(Literal::CreateR2({{b8}, {b9}})->IsAll(8)); + EXPECT_FALSE(Literal::CreateR2({{b9}, {b8}})->IsAll(8)); + + // 9.001 will be truncated to 9.0 + bfloat16 b91(9.001f); + bfloat16 b90(9.00f); + EXPECT_TRUE(Literal::CreateR2({{b91}, {b90}})->IsAll(9.0)); + complex64 c8_9 = {8, 9}; EXPECT_FALSE(Literal::CreateR2({{c8_9}, {c8_9}})->IsAll(8)); @@ -691,6 +715,30 @@ TEST_F(LiteralUtilTest, PopulateR2C64) { EXPECT_EQ(output, *expected); } +TEST_F(LiteralUtilTest, PopulateWithValueR0BF16) { + Literal output; + bfloat16 h(0.25f); + output.PopulateWithValue(h, {}); + auto expected = Literal::CreateR0(h); + EXPECT_EQ(output, *expected); +} + +TEST_F(LiteralUtilTest, PopulateWithValueR1BF16) { + Literal output; + bfloat16 h(0.5f); + output.PopulateWithValue(h, {3}); + auto expected = Literal::CreateR1({h, h, h}); + EXPECT_EQ(output, *expected); +} + +TEST_F(LiteralUtilTest, PopulateWithValueR2BF16) { + Literal output; + bfloat16 h(2.0f); + output.PopulateWithValue(h, {2, 2}); + auto expected = Literal::CreateR2({{h, h}, {h, h}}); + EXPECT_EQ(output, *expected); +} + TEST_F(LiteralUtilTest, PopulateWithValueR0F32) { Literal output; output.PopulateWithValue(2.5f, {}); @@ -975,6 +1023,14 @@ TEST_F(LiteralUtilTest, ConvertIfTypesMatch) { {{half(26.0), half(0.0), half(28.0), half(0.0)}, {half(0.0), half(31.0), half(0.0), half(33.0)}}, }}, layout_r4_dim0major_); + auto bf16 = Literal::CreateR4WithLayout({{ + {{bfloat16(10.0), bfloat16(0.0), bfloat16(12.0), bfloat16(0.0)}, + {bfloat16(0.0), bfloat16(15.0), bfloat16(0.0), bfloat16(17.0)}}, + {{bfloat16(0.0), bfloat16(19.0), bfloat16(0.0), bfloat16(21.0)}, + {bfloat16(22.0), bfloat16(0.0), bfloat16(24.0), bfloat16(0.0)}}, + {{bfloat16(26.0), bfloat16(0.0), bfloat16(28.0), bfloat16(0.0)}, + {bfloat16(0.0), bfloat16(31.0), bfloat16(0.0), bfloat16(33.0)}}, + }}, layout_r4_dim0major_); auto f32 = Literal::CreateR4WithLayout({{ {{10.0f, 0.0f, 12.0f, 0.0f}, {0.0f, 15.0f, 0.0f, 17.0f}}, {{0.0f, 19.0f, 0.0f, 21.0f}, {22.0f, 0.0f, 24.0f, 0.0f}}, @@ -1008,6 +1064,12 @@ TEST_F(LiteralUtilTest, ConvertIfTypesMatch) { conv = s8->Convert(PRED).ConsumeValueOrDie(); EXPECT_EQ(*conv, *pred); + conv = bf16->Convert(S32).ConsumeValueOrDie(); + EXPECT_EQ(*conv, *s32); + + conv = bf16->Convert(F32).ConsumeValueOrDie(); + EXPECT_EQ(*conv, *f32); + conv = pred->Convert(S32).ConsumeValueOrDie(); EXPECT_EQ(*conv, *int32_pred); diff --git a/tensorflow/compiler/xla/primitive_util.cc b/tensorflow/compiler/xla/primitive_util.cc index 2113b5e06f..2bce56b7bd 100644 --- a/tensorflow/compiler/xla/primitive_util.cc +++ b/tensorflow/compiler/xla/primitive_util.cc @@ -78,6 +78,11 @@ PrimitiveType NativeToPrimitiveType() { return F64; } +template <> +PrimitiveType NativeToPrimitiveType() { + return BF16; +} + template <> PrimitiveType NativeToPrimitiveType() { return F16; @@ -89,7 +94,7 @@ PrimitiveType NativeToPrimitiveType() { } bool IsFloatingPointType(PrimitiveType type) { - return type == F16 || type == F32 || type == F64; + return type == F16 || type == F32 || type == F64 || type == BF16; } bool IsComplexType(PrimitiveType type) { return type == C64; } @@ -118,6 +123,7 @@ int BitWidth(PrimitiveType type) { case S16: case U16: case F16: + case BF16: return 16; case U32: diff --git a/tensorflow/compiler/xla/primitive_util.h b/tensorflow/compiler/xla/primitive_util.h index a49c8b86fc..19c6a13888 100644 --- a/tensorflow/compiler/xla/primitive_util.h +++ b/tensorflow/compiler/xla/primitive_util.h @@ -77,6 +77,8 @@ template <> PrimitiveType NativeToPrimitiveType(); template <> PrimitiveType NativeToPrimitiveType(); +template <> +PrimitiveType NativeToPrimitiveType(); // Complex template <> @@ -167,6 +169,11 @@ struct PrimitiveTypeToNative { using type = half; }; +template <> +struct PrimitiveTypeToNative { + using type = bfloat16; +}; + // Complex template <> struct PrimitiveTypeToNative { diff --git a/tensorflow/compiler/xla/service/backend.cc b/tensorflow/compiler/xla/service/backend.cc index 9abe30e3f3..05f2d06278 100644 --- a/tensorflow/compiler/xla/service/backend.cc +++ b/tensorflow/compiler/xla/service/backend.cc @@ -13,14 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#define EIGEN_USE_THREADS + #include "tensorflow/compiler/xla/service/backend.h" #include #include #include -#define EIGEN_USE_THREADS - #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/service/compiler.h" #include "tensorflow/compiler/xla/service/platform_util.h" diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc index f8e260dd90..f385829cdf 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_test.cc @@ -12,15 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ - +#define EIGEN_USE_THREADS #include "tensorflow/compiler/xla/service/cpu/cpu_runtime.h" #include #include #include -#define EIGEN_USE_THREADS - #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/array2d.h" #include "tensorflow/compiler/xla/client/local_client.h" diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 88b77ccdd0..a722d1b3d9 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -1450,6 +1450,10 @@ HloEvaluator::HloEvaluator() { typed_visitors_[F32] = MakeUnique>(this); typed_visitors_[F64] = MakeUnique>(this); typed_visitors_[C64] = MakeUnique>(this); + + typed_visitors_[BF16] = MakeUnique([](HloInstruction*) { + return Unimplemented("HloEvaluator: unhandled primitive type: BF16."); + }); typed_visitors_[TUPLE] = MakeUnique([](HloInstruction*) { return Unimplemented("HloEvaluator: unhandled primitive type: TUPLE."); }); diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index f463e57d99..158fb9a546 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#define EIGEN_USE_THREADS #include "tensorflow/compiler/xla/service/hlo_runner.h" @@ -19,8 +20,6 @@ limitations under the License. #include #include -#define EIGEN_USE_THREADS - #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/ptr_util.h" diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index b5eb81dfc6..4d0bafa908 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -263,6 +263,7 @@ StatusOr MakeShapeWithLayoutInternal( case S32: case S64: case F16: + case BF16: case F32: case F64: return true; diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc index 95a52ecd2f..75c9a0d3fb 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.cc +++ b/tensorflow/compiler/xla/tests/literal_test_util.cc @@ -116,16 +116,18 @@ template ::testing::AssertionResult CompareFloatsBitwiseEqual(FloatT lhs, FloatT rhs) { auto ulhs = tensorflow::bit_cast(lhs); auto urhs = tensorflow::bit_cast(rhs); + auto lhs_double = static_cast(lhs); + auto rhs_double = static_cast(rhs); if (ulhs != urhs) { return ::testing::AssertionFailure() << tensorflow::strings::Printf( "floating values are not bitwise-equal; and equality testing " "was requested: %s=%g=%a vs %s=%g=%a", tensorflow::strings::StrCat(tensorflow::strings::Hex(ulhs)) .c_str(), - lhs, lhs, + lhs_double, lhs_double, tensorflow::strings::StrCat(tensorflow::strings::Hex(urhs)) .c_str(), - rhs, rhs); + rhs_double, rhs_double); } return ::testing::AssertionSuccess(); } @@ -149,6 +151,10 @@ template // Specializations for floating types that do bitwise comparisons when equality // comparison is requested. template <> +::testing::AssertionResult CompareEqual(bfloat16 lhs, bfloat16 rhs) { + return CompareFloatsBitwiseEqual(lhs, rhs); +} +template <> ::testing::AssertionResult CompareEqual(float lhs, float rhs) { return CompareFloatsBitwiseEqual(lhs, rhs); } @@ -238,6 +244,9 @@ bool ExpectLiteralsEqual(const Literal& expected, const Literal& actual, case U64: match = ExpectLiteralsEqual(expected, actual, &multi_index, 0); break; + case BF16: + match = ExpectLiteralsEqual(expected, actual, &multi_index, 0); + break; case F32: match = ExpectLiteralsEqual(expected, actual, &multi_index, 0); break; diff --git a/tensorflow/compiler/xla/tests/local_client_test_base.cc b/tensorflow/compiler/xla/tests/local_client_test_base.cc index c11e1df0a7..d98875dbc2 100644 --- a/tensorflow/compiler/xla/tests/local_client_test_base.cc +++ b/tensorflow/compiler/xla/tests/local_client_test_base.cc @@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#define EIGEN_USE_THREADS #include "tensorflow/compiler/xla/tests/local_client_test_base.h" #include -#define EIGEN_USE_THREADS - #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/map_util.h" diff --git a/tensorflow/compiler/xla/types.h b/tensorflow/compiler/xla/types.h index 3b19ca321c..9fa4297523 100644 --- a/tensorflow/compiler/xla/types.h +++ b/tensorflow/compiler/xla/types.h @@ -19,6 +19,7 @@ limitations under the License. #include #include "third_party/eigen3/Eigen/Core" +#include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/platform/types.h" #include @@ -32,6 +33,8 @@ using ::tensorflow::int16; using ::tensorflow::int32; using ::tensorflow::int64; +using ::tensorflow::bfloat16; + using ::tensorflow::uint8; using ::tensorflow::uint16; using ::tensorflow::uint32; diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index 7146604708..eac8f2ff07 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -46,6 +46,12 @@ enum PrimitiveType { // converted to f16 from f32 at arbirary points in the computation. F16 = 10; F32 = 11; + + // Truncated 16 bit floating-point format. This is similar to IEEE's 16 bit + // floating-point format, but uses 1 bit for the sign, 8 bits for the exponent + // and 7 bits for the mantissa. + BF16 = 16; + F64 = 12; // Complex values of fixed width. @@ -63,6 +69,8 @@ enum PrimitiveType { // An opaque type used for passing context specific data to a custom // operation. OPAQUE = 14; + + // Next = 17 } // Describes the value held inside padding elements. @@ -310,7 +318,10 @@ message LiteralProto { repeated double f64s = 9; repeated float c64s = 12; // Stored as interleaved real, imag floats. repeated LiteralProto tuple_literals = 10; - bytes f16s = 11; // Note: the F16s are encoded in little endian byte order + // The F16s and BF16s are encoded in little endian byte order + bytes f16s = 11; + bytes bf16s = 13; + // Next = 14 } message WindowDimension { diff --git a/tensorflow/core/framework/bfloat16_test.cc b/tensorflow/core/framework/bfloat16_test.cc index af4e6a4411..6e45338751 100644 --- a/tensorflow/core/framework/bfloat16_test.cc +++ b/tensorflow/core/framework/bfloat16_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/framework/bfloat16.h" +#include "tensorflow/core/lib/core/casts.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" @@ -27,6 +28,66 @@ TEST(Bfloat16Test, Simple) { EXPECT_EQ(0x4140, a.value); } +float BinaryToFloat(uint32_t sign, uint32_t exponent, uint32_t high_mantissa, + uint32_t low_mantissa) { + return bit_cast((sign << 31) + (exponent << 23) + + (high_mantissa << 16) + low_mantissa); +} + +struct Bfloat16TestParam { + float input; + float expected; +}; + +class Bfloat16Test : public ::testing::Test, + public ::testing::WithParamInterface {}; + +TEST_P(Bfloat16Test, TruncateTest) { + bfloat16 a(GetParam().input); + if (std::isnan(GetParam().input)) { + EXPECT_TRUE(std::isnan(float(a)) || std::isinf(float(a))); + return; + } + EXPECT_EQ(GetParam().expected, float(a)); +} + +INSTANTIATE_TEST_CASE_P( + Bfloat16Test_Instantiation, Bfloat16Test, + ::testing::Values( + Bfloat16TestParam{ + BinaryToFloat(0, 0b10000000, 0b1001000, 0b1111010111000011), + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, + Bfloat16TestParam{ + BinaryToFloat(1, 0b10000000, 0b1001000, 0b1111010111000011), + BinaryToFloat(1, 0b10000000, 0b1001000, 0b0000000000000000)}, + Bfloat16TestParam{ + BinaryToFloat(0, 0b10000000, 0b1001000, 0b1000000000000000), + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, + Bfloat16TestParam{ + BinaryToFloat(0, 0b11111111, 0b0000000, 0b0000000000000001), + BinaryToFloat(0, 0b11111111, 0b0000000, 0b0000000000000000)}, + Bfloat16TestParam{ + BinaryToFloat(0, 0b11111111, 0b1111111, 0b1111111111111111), + BinaryToFloat(0, 0b11111111, 0b1111111, 0b0000000000000000)}, + Bfloat16TestParam{ + BinaryToFloat(1, 0b10000000, 0b1001000, 0b1100000000000000), + BinaryToFloat(1, 0b10000000, 0b1001000, 0b0000000000000000)}, + Bfloat16TestParam{ + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000), + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, + Bfloat16TestParam{ + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0100000000000000), + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, + Bfloat16TestParam{ + BinaryToFloat(0, 0b10000000, 0b1001000, 0b1000000000000000), + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, + Bfloat16TestParam{ + BinaryToFloat(0, 0b00000000, 0b1001000, 0b1000000000000000), + BinaryToFloat(0, 0b00000000, 0b1001000, 0b0000000000000000)}, + Bfloat16TestParam{ + BinaryToFloat(0, 0b00000000, 0b1111111, 0b1100000000000000), + BinaryToFloat(0, 0b00000000, 0b1111111, 0b0000000000000000)})); + TEST(Bfloat16Test, Conversion) { float a[100]; for (int i = 0; i < 100; ++i) { diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h index a630bee38d..2b080e13fd 100644 --- a/tensorflow/core/framework/numeric_types.h +++ b/tensorflow/core/framework/numeric_types.h @@ -44,6 +44,7 @@ typedef Eigen::QUInt16 quint16; // see framework/bfloat16.h for description. struct bfloat16 { EIGEN_DEVICE_FUNC bfloat16() {} + EIGEN_DEVICE_FUNC explicit bfloat16(const float v) { const uint16_t* p = reinterpret_cast(&v); #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ @@ -53,20 +54,92 @@ struct bfloat16 { #endif } + template + explicit EIGEN_DEVICE_FUNC bfloat16(const T& val) + : bfloat16(static_cast(val)) {} + + EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const { + float result; + + uint16_t* q = reinterpret_cast(&result); + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + q[0] = value; + q[1] = 0; +#else + q[0] = 0; + q[1] = value; +#endif + return result; + } + + EIGEN_DEVICE_FUNC explicit operator bool() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator Eigen::half() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator short() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator int() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator char() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator signed char() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator unsigned char() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator unsigned int() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator unsigned long() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator unsigned long long() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator long long() const { + return static_cast(float(*this)); + } + + EIGEN_DEVICE_FUNC explicit operator double() const { + return static_cast(float(*this)); + } + uint16_t value; }; +inline bool operator==(const bfloat16 a, const bfloat16 b) { + return a.value == b.value; +} + +inline bool operator!=(const bfloat16 a, const bfloat16 b) { + return a.value != b.value; +} + } // end namespace tensorflow namespace Eigen { template <> struct NumTraits : GenericNumTraits {}; -EIGEN_STRONG_INLINE bool operator==(const tensorflow::bfloat16 a, - const tensorflow::bfloat16 b) { - return a.value == b.value; -} - +using ::tensorflow::operator==; +using ::tensorflow::operator!=; } // namespace Eigen #ifdef COMPILER_MSVC -- GitLab From e8da93bde591c7cecdea50fd539a731031cc9ee2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 12:00:17 -0800 Subject: [PATCH 0354/1801] Internal change. PiperOrigin-RevId: 175566634 --- tensorflow/contrib/lite/toco/tflite/operator_test.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/lite/toco/tflite/operator_test.cc b/tensorflow/contrib/lite/toco/tflite/operator_test.cc index 543a9bd06c..8e77c56d8a 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator_test.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator_test.cc @@ -319,10 +319,12 @@ TEST_F(OperatorTest, BuiltinMul) { TEST_F(OperatorTest, Svdf) { SvdfOperator op; op.fused_activation_function = FusedActivationFunctionType::kRelu; + op.rank = 1; auto output_toco_op = SerializeAndDeserialize(GetOperator("SVDF", OperatorType::kSvdf), op); EXPECT_EQ(op.fused_activation_function, output_toco_op->fused_activation_function); + EXPECT_EQ(op.rank, output_toco_op->rank); } TEST_F(OperatorTest, TensorFlowUnsupported) { -- GitLab From 333bdea9524dd2bacf626051dbdbbcfcc4b46122 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 12:33:52 -0800 Subject: [PATCH 0355/1801] Automated g4 rollback of changelist 175304150 PiperOrigin-RevId: 175571632 --- tensorflow/compiler/xla/service/shaped_buffer.cc | 8 -------- tensorflow/compiler/xla/service/shaped_buffer.h | 4 ---- 2 files changed, 12 deletions(-) diff --git a/tensorflow/compiler/xla/service/shaped_buffer.cc b/tensorflow/compiler/xla/service/shaped_buffer.cc index a57ebf59e7..a2a442eb1a 100644 --- a/tensorflow/compiler/xla/service/shaped_buffer.cc +++ b/tensorflow/compiler/xla/service/shaped_buffer.cc @@ -63,14 +63,6 @@ void ShapedBuffer::clear() { } } -void ShapedBuffer::AddBufferAtIndex( - const perftools::gputools::DeviceMemoryBase& buffer, - const ShapeIndex& shape_index) { - *mutable_shape_index_to_buffer_entry()->mutable_element(shape_index) = - buffers().size(); - mutable_buffers()->push_back(buffer); -} - const se::DeviceMemoryBase& ShapedBuffer::buffer( const ShapeIndex& index) const { return buffers_[shape_index_to_buffer_entry_.element(index)]; diff --git a/tensorflow/compiler/xla/service/shaped_buffer.h b/tensorflow/compiler/xla/service/shaped_buffer.h index b440948700..e5ea06fb13 100644 --- a/tensorflow/compiler/xla/service/shaped_buffer.h +++ b/tensorflow/compiler/xla/service/shaped_buffer.h @@ -75,10 +75,6 @@ class ShapedBuffer { // Set all device memory pointers in the object to null. void clear(); - // Adds a new buffer at the given shape index. - void AddBufferAtIndex(const perftools::gputools::DeviceMemoryBase& buffer, - const ShapeIndex& shape_index); - protected: // The shape of the device buffer with layout. const Shape shape_; -- GitLab From 90222dd7b29ff2597bc7f8d0f92db17324f591b0 Mon Sep 17 00:00:00 2001 From: James Qin Date: Mon, 13 Nov 2017 12:59:04 -0800 Subject: [PATCH 0356/1801] Fix CuDNNCompatibleGRU after GRUCell refactorization PiperOrigin-RevId: 175574730 --- .../cudnn_rnn/python/ops/cudnn_rnn_ops.py | 93 +++++++++++++------ 1 file changed, 65 insertions(+), 28 deletions(-) diff --git a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py index 9f74899693..6c526b2c75 100644 --- a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py +++ b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py @@ -18,7 +18,6 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.cudnn_rnn.ops import gen_cudnn_rnn_ops -from tensorflow.contrib.rnn.python.ops import core_rnn_cell from tensorflow.contrib.rnn.python.ops import lstm_ops from tensorflow.contrib.util import loader from tensorflow.python.framework import common_shapes @@ -29,6 +28,7 @@ from tensorflow.python.layers import base as base_layer from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope as vs @@ -55,6 +55,11 @@ CUDNN_INPUT_LINEAR_MODE = "linear_input" CUDNN_INPUT_SKIP_MODE = "skip_input" CUDNN_INPUT_AUTO_MODE = "auto_select" +# pylint:disable=protected-access +_BIAS_VARIABLE_NAME = rnn_cell_impl._BIAS_VARIABLE_NAME +_WEIGHTS_VARIABLE_NAME = rnn_cell_impl._WEIGHTS_VARIABLE_NAME +# pylint:enable=protected-access + class CudnnCompatibleLSTMCell(lstm_ops.LSTMBlockCell): """Cudnn Compatible LSTMCell. @@ -87,9 +92,9 @@ class CudnnCompatibleGRUCell(rnn_cell_impl.GRUCell): Cudnn compatible GRU (from Cudnn library user guide): ```python r_t = sigma(x_t * W_r + h_t-1 * R_h + b_Wr + b_Rr) # reset gate - i_t = sigma(x_t * W_i + h_t-1 * R_i + b_Wi + b_Ru) # update gate + u_t = sigma(x_t * W_u + h_t-1 * R_u + b_Wu + b_Ru) # update gate h'_t = tanh(x_t * W_h + r_t .* (h_t-1 * R_h + b_Rh) + b_Wh) # new memory gate - h_t = (1 - i_t) .* h'_t + i_t .* h_t-1 + h_t = (1 - u_t) .* h'_t + u_t .* h_t-1 ``` Other GRU (see @{tf.nn.rnn_cell.GRUCell} and @{tf.contrib.rnn.GRUBlockCell}): @@ -112,33 +117,65 @@ class CudnnCompatibleGRUCell(rnn_cell_impl.GRUCell): reuse=reuse, kernel_initializer=kernel_initializer) + def build(self, inputs_shape): + if inputs_shape[1].value is None: + raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s" + % inputs_shape) + + input_depth = inputs_shape[1].value + self._gate_kernel = self.add_variable( + "gates/%s" % _WEIGHTS_VARIABLE_NAME, + shape=[input_depth + self._num_units, 2 * self._num_units], + initializer=self._kernel_initializer) + self._gate_bias = self.add_variable( + "gates/%s" % _BIAS_VARIABLE_NAME, + shape=[2 * self._num_units], + initializer=( + self._bias_initializer + if self._bias_initializer is not None + else init_ops.constant_initializer(1.0, dtype=self.dtype))) + + self._candidate_input_kernel = self.add_variable( + "candidate/input_projection/%s" % _WEIGHTS_VARIABLE_NAME, + shape=[input_depth, self._num_units], + initializer=self._kernel_initializer) + self._candidate_hidden_kernel = self.add_variable( + "candidate/hidden_projection/%s" % _WEIGHTS_VARIABLE_NAME, + shape=[self._num_units, self._num_units], + initializer=self._kernel_initializer) + + self._candidate_input_bias = self.add_variable( + "candidate/input_projection/%s" % _BIAS_VARIABLE_NAME, + shape=[self._num_units], + initializer=( + self._bias_initializer + if self._bias_initializer is not None + else init_ops.zeros_initializer(dtype=self.dtype))) + self._candidate_hidden_bias = self.add_variable( + "candidate/hidden_projection/%s" % _BIAS_VARIABLE_NAME, + shape=[self._num_units], + initializer=( + self._bias_initializer + if self._bias_initializer is not None + else init_ops.zeros_initializer(dtype=self.dtype))) + def call(self, inputs, state): """Gated recurrent unit (GRU) with nunits cells.""" - with vs.variable_scope("gates"): # Reset gate and update gate. - # We start with bias of 1.0 to not reset and not update. - bias_ones = self._bias_initializer - if self._bias_initializer is None: - dtype = inputs.dtype - bias_ones = init_ops.constant_initializer(1.0, dtype=dtype) - # pylint: disable=protected-access - value = math_ops.sigmoid( - core_rnn_cell._linear([inputs, state], 2 * self._num_units, True, - bias_ones, self._kernel_initializer)) - r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) - # pylint: enable=protected-access - with vs.variable_scope("candidate"): - # pylint: disable=protected-access - with vs.variable_scope("input_projection"): - hi = core_rnn_cell._linear(inputs, self._num_units, True, - self._bias_initializer, - self._kernel_initializer) - with vs.variable_scope("hidden_projection"): - hh = r * (core_rnn_cell._linear(state, self._num_units, True, - self._bias_initializer, - self._kernel_initializer)) - # pylint: enable=protected-access - c = self._activation(hi + hh) - new_h = u * state + (1 - u) * c + gate_inputs = math_ops.matmul( + array_ops.concat([inputs, state], 1), self._gate_kernel) + gate_inputs = nn_ops.bias_add(gate_inputs, self._gate_bias) + + value = math_ops.sigmoid(gate_inputs) + r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) + + candidate = nn_ops.bias_add( + math_ops.matmul(inputs, self._candidate_input_kernel), + self._candidate_input_bias) + candidate += r * nn_ops.bias_add( + math_ops.matmul(state, self._candidate_hidden_kernel), + self._candidate_hidden_bias) + candidate = self._activation(candidate) + new_h = (1-u) * candidate + u * state return new_h, new_h -- GitLab From bac56b37be7736c9da9a3257696a9c1241327d60 Mon Sep 17 00:00:00 2001 From: Olivia Nordquist Date: Mon, 13 Nov 2017 13:07:45 -0800 Subject: [PATCH 0357/1801] Validate shapes when updating edges from Python. Uses MergeInput from shape_inference to check if the new input is compatible with the preexisting shape. Also this changes the MergeInput method. Previously, MergeInput would only return true if the shapes differed *and* the merge was successful. Now, MergeInput returns true only if the merge is successful. PiperOrigin-RevId: 175576173 --- tensorflow/c/python_api.cc | 27 ++++++++++++ .../core/common_runtime/shape_refiner.cc | 3 +- .../core/common_runtime/shape_refiner_test.cc | 12 +++++- tensorflow/core/framework/shape_inference.cc | 7 ++-- tensorflow/core/framework/shape_inference.h | 13 ++---- tensorflow/core/graph/graph_test.cc | 7 ++++ tensorflow/python/framework/ops.py | 14 +------ tensorflow/python/framework/ops_test.py | 41 ++++++++++++++++--- 8 files changed, 93 insertions(+), 31 deletions(-) diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc index c67007dca0..ba5a9268b4 100644 --- a/tensorflow/c/python_api.cc +++ b/tensorflow/c/python_api.cc @@ -46,6 +46,33 @@ void SetRequestedDevice(TF_Graph* graph, TF_Operation* op, const char* device) { void UpdateEdge(TF_Graph* graph, TF_Output new_src, TF_Input dst, TF_Status* status) { mutex_lock l(graph->mu); + tensorflow::shape_inference::InferenceContext* ic = + graph->refiner.GetContext(&new_src.oper->node); + + if (ic->num_outputs() <= new_src.index) { + status->status = tensorflow::errors::OutOfRange( + "Cannot update edge. Output index [", new_src.index, + "] is greater than the number of total outputs [", ic->num_outputs(), + "]."); + return; + } + tensorflow::shape_inference::ShapeHandle shape = ic->output(new_src.index); + + tensorflow::shape_inference::InferenceContext* ic_dst = + graph->refiner.GetContext(&dst.oper->node); + if (ic_dst->num_inputs() <= dst.index) { + status->status = tensorflow::errors::OutOfRange( + "Cannot update edge. Input index [", dst.index, + "] is greater than the number of total inputs [", ic_dst->num_inputs(), + "]."); + return; + } + if (!ic_dst->MergeInput(dst.index, shape)) { + status->status = tensorflow::errors::InvalidArgument( + "Cannot update edge, incompatible shapes: ", ic_dst->DebugString(shape), + " and ", ic_dst->DebugString(ic_dst->input(dst.index)), "."); + return; + } status->status = graph->graph.UpdateEdge(&new_src.oper->node, new_src.index, &dst.oper->node, dst.index); } diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc index 1ed5eb3f22..8e314c7ea5 100644 --- a/tensorflow/core/common_runtime/shape_refiner.cc +++ b/tensorflow/core/common_runtime/shape_refiner.cc @@ -333,7 +333,8 @@ Status ShapeRefiner::UpdateNode(const Node* node, bool relax, bool* refined) { InferenceContext* c = iter->second->get_context(); DCHECK_GE(dst_input, 0); ShapeHandle existing_input = node_context->input(dst_input); - if (!relax && node_context->MergeInput(dst_input, c->output(src_output))) { + if (!relax && node_context->MergeInput(dst_input, c->output(src_output)) && + !existing_input.SameHandle(node_context->input(dst_input))) { *refined = true; } else if (relax) { if (node_context->RelaxInput(dst_input, c->output(src_output))) { diff --git a/tensorflow/core/common_runtime/shape_refiner_test.cc b/tensorflow/core/common_runtime/shape_refiner_test.cc index 676fc7cced..ff32e855d5 100644 --- a/tensorflow/core/common_runtime/shape_refiner_test.cc +++ b/tensorflow/core/common_runtime/shape_refiner_test.cc @@ -1259,7 +1259,17 @@ TEST_F(ShapeRefinerTest, IncrementalUpdates) { EXPECT_FALSE(refined); ctx = m.GetContext(dequeue); EXPECT_EQ("[?,7]", ctx->DebugString(ctx->output(0))); - ASSERT_FALSE(SameHandle(ctx->Dim(ctx->output(0), 0), ctx->Dim(shp, 0))); + EXPECT_FALSE(SameHandle(ctx->Dim(ctx->output(0), 0), ctx->Dim(shp, 0))); + + // Inject a shape of the same handle and expect refined to not change. + ctx = m.GetContext(queue); + shape_inference::ShapeHandle shp2 = shp; + ctx->set_output_handle_shapes_and_types( + 0, std::vector{{shp2, DT_FLOAT}}); + refined = false; + TF_ASSERT_OK(m.UpdateNode(dequeue, /*relax=*/false, &refined)); + EXPECT_FALSE(refined); + EXPECT_TRUE(SameHandle(ctx->Dim(shp, 0), ctx->Dim(shp2, 0))); } void TestSimpleFunctionInference(bool enable_function_inference, diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc index 5d6bf559bb..fe0742e1db 100644 --- a/tensorflow/core/framework/shape_inference.cc +++ b/tensorflow/core/framework/shape_inference.cc @@ -544,9 +544,10 @@ Status InferenceContext::Merge(ShapeHandle s0, ShapeHandle s1, return_s1 = false; } else if (v0 != v1) { *out = nullptr; - return errors::InvalidArgument("Dimension ", i, - " in both shapes must be equal, but are ", - Value(d0), " and ", Value(d1)); + return errors::InvalidArgument( + "Dimension ", i, " in both shapes must be equal, but are ", Value(d0), + " and ", Value(d1), ". Shapes are ", DebugString(s0), " and ", + DebugString(s1), "."); } } diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h index 485980e42e..b12d37b4c0 100644 --- a/tensorflow/core/framework/shape_inference.h +++ b/tensorflow/core/framework/shape_inference.h @@ -237,24 +237,19 @@ class InferenceContext { // - For any one dimension, if the values for that dimension in both shapes // are known, then the values must match. // - If one shape has equal or more information than the other shape in every - // dimension, the shape with more information will be returned. Otherwise a - // new shape holding the combined information of the input shapes will be - // returned. + // dimension, the new shape will become the shape with more information. // - Example: merging [2,?] and [?,2] results in [2,2] // - Example: [2,2] cannot be merged with [1,2] // // This requires idx to be in the [0, num_inputs) range. If the merge is - // successful and the new shape differs from the old one, store the new shape - // and return true. Return false otherwise. + // successful, return true. Return false otherwise. bool MergeInput(int idx, ShapeHandle shape) { ShapeHandle new_shape; - if (!Merge(inputs_[idx], shape, &new_shape).ok() || - inputs_[idx].SameHandle(new_shape)) { - return false; - } + if (!Merge(inputs_[idx], shape, &new_shape).ok()) return false; inputs_[idx] = new_shape; return true; } + // Relax the stored shape of the input in position idx with according // to the following rules: // diff --git a/tensorflow/core/graph/graph_test.cc b/tensorflow/core/graph/graph_test.cc index e5d57facaa..7686cef219 100644 --- a/tensorflow/core/graph/graph_test.cc +++ b/tensorflow/core/graph/graph_test.cc @@ -511,6 +511,13 @@ TEST_F(GraphTest, UpdateEdge) { EXPECT_EQ( s.error_message(), "Node 'A' (type: 'OneOutput', num of outputs: 1) does not have output 1"); + + // Update a's 1st input which is out of range. + s = graph_.UpdateEdge(c, 0, a, 0); + EXPECT_FALSE(s.ok()); + EXPECT_EQ( + s.error_message(), + "Node 'A' (type: 'OneOutput', num of inputs: 0) does not have input 0"); } TEST_F(GraphTest, InputEdges) { diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index d2608845ac..b0abbfc7dc 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -1805,7 +1805,7 @@ class Operation(object): tensor._add_consumer(self) # pylint: disable=protected-access self._recompute_node_def() - def _update_input(self, index, tensor, dtype=None): + def _update_input(self, index, tensor): """Update the input to this operation at the given index. NOTE: This is for TF internal use only. Please don't use it. @@ -1813,8 +1813,6 @@ class Operation(object): Args: index: the index of the input to update. tensor: the Tensor to be used as the input at the given index. - dtype: tf.DType: type of the input; defaults to - the tensor's dtype. Raises: TypeError: if tensor is not a Tensor, @@ -1832,17 +1830,9 @@ class Operation(object): self._tf_input(index), status) else: - if dtype is None: - dtype = tensor.dtype - else: - dtype = dtypes.as_dtype(dtype) - if not dtype.is_compatible_with(tensor.dtype): - raise TypeError( - "Cannot convert a tensor of type %s to an input of type %s" % - (tensor.dtype.name, dtype.name)) self._inputs[index].consumers().remove(self) self._inputs[index] = tensor - self._input_types_val[index] = dtype + self._input_types_val[index] = tensor.dtype tensor._add_consumer(self) # pylint: disable=protected-access self._recompute_node_def() diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 4e931e00c5..1be306ddc5 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -492,8 +492,6 @@ class OperationTest(test_util.TensorFlowTestCase): with self.assertRaisesRegexp(ValueError, "must be from the same graph"): z.op._update_input(0, x) # pylint: disable=protected-access - # TODO(nolivia): check the shape/type in _update_input() instead of depending - # on run to do that. def testUpdateInputTypeError(self): g = ops.Graph() with g.as_default(): @@ -509,6 +507,37 @@ class OperationTest(test_util.TensorFlowTestCase): "with expected int32"): sess.run(z) + def testUpdateInputShapeError(self): + # C-API throws the error differently. + if ops._USE_C_API: + return + g = ops.Graph() + with g.as_default(): + w = constant_op.constant(2, shape=[3, 1]) + x = constant_op.constant(0, shape=[3, 1]) + y = constant_op.constant(1, shape=[2, 2]) + z = w + x + z.op._update_input(0, y) # pylint: disable=protected-access + + with session.Session(graph=g) as sess: + with self.assertRaisesRegexp(errors.InvalidArgumentError, + r"Incompatible shapes: \[2,2\] vs. \[3,1\]"): + sess.run(z) + + def testUpdateInputShapeErrorC(self): + if not ops._USE_C_API: + return + g = ops.Graph() + with g.as_default(): + w = constant_op.constant(2, shape=[3, 1]) + x = constant_op.constant(0, shape=[3, 1]) + y = constant_op.constant(1, shape=[2, 2]) + z = w + x + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r"Cannot update edge, incompatible shapes: \[2,2\] and \[3,1\]"): + z.op._update_input(0, y) # pylint: disable=protected-access + def testUpdateInputOutOfRange(self): # C-API throws the error differently. if ops._USE_C_API: return @@ -524,9 +553,11 @@ class OperationTest(test_util.TensorFlowTestCase): g = ops.Graph() with g.as_default(): x = constant_op.constant(1) - with self.assertRaisesRegexp(errors.OutOfRangeError, - r"Node 'Const' \(type: 'Const', " - r"num of inputs: 0\) does not have input 1"): + with self.assertRaisesRegexp( + errors.OutOfRangeError, + r"Cannot update edge. Input index \[1\] is greater than the number of " + r"total inputs \[0\]." + ): x.op._update_input(1, x) # pylint: disable=protected-access def testOpDef(self): -- GitLab From 9642c1c0164ac87acfff2d52f82626eb38667dd8 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Mon, 13 Nov 2017 13:36:50 -0800 Subject: [PATCH 0358/1801] Re-introduce support for key-value records into tf.data.read_batch_features. PiperOrigin-RevId: 175580235 --- tensorflow/contrib/data/python/ops/readers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py index 08d6a7a605..632082b5f1 100644 --- a/tensorflow/contrib/data/python/ops/readers.py +++ b/tensorflow/contrib/data/python/ops/readers.py @@ -171,6 +171,8 @@ def read_batch_features(file_pattern, dataset = reader(filenames, *reader_args) else: dataset = reader(filenames) + if dataset.output_types == (dtypes.string, dtypes.string): + dataset = dataset.map(lambda _, v: v) if num_epochs != 1: dataset = dataset.repeat(num_epochs) if randomize_input: -- GitLab From 061c3597b84d45a9878b8adf831e39a5573859ec Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 14:21:04 -0800 Subject: [PATCH 0359/1801] Upgrade gRPC version used in OSS. Fix-forward of #14262. Remove namespace-compatibility hack. PiperOrigin-RevId: 175586521 --- configure.py | 5 + tensorflow/contrib/cmake/external/grpc.cmake | 10 +- .../contrib/cmake/patches/grpc/CMakeLists.txt | 14415 ---------------- .../contrib/verbs/grpc_verbs_service_impl.cc | 9 +- .../contrib/verbs/grpc_verbs_service_impl.h | 11 +- tensorflow/core/distributed_runtime/rpc/BUILD | 11 - .../rpc/grpc_master_service_impl.cc | 50 +- .../rpc/grpc_master_service_impl.h | 15 +- .../rpc/grpc_namespace_compat.h | 32 - .../rpc/grpc_worker_service_impl.cc | 4 +- .../rpc/grpc_worker_service_impl.h | 1 - tensorflow/workspace.bzl | 23 +- third_party/grpc/grpc.patch | 105 - tools/bazel.rc | 1 + 14 files changed, 67 insertions(+), 14625 deletions(-) delete mode 100644 tensorflow/contrib/cmake/patches/grpc/CMakeLists.txt delete mode 100644 tensorflow/core/distributed_runtime/rpc/grpc_namespace_compat.h delete mode 100644 third_party/grpc/grpc.patch diff --git a/configure.py b/configure.py index 51d860812e..0d1afbfe15 100644 --- a/configure.py +++ b/configure.py @@ -976,6 +976,10 @@ def create_android_bazelrc_configs(): write_to_bazelrc('build:android_arm64 --cpu=arm64-v8a') +def set_grpc_build_flags(): + write_to_bazelrc('build --define grpc_no_ares=true') + + def main(): # Make a copy of os.environ to be clear when functions and getting and setting # environment variables. @@ -1043,6 +1047,7 @@ def main(): set_mpi_home(environ_cp) set_other_mpi_vars(environ_cp) + set_grpc_build_flags() set_cc_opt_flags(environ_cp) set_mkl() set_monolithic() diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake index 464aad74c6..41ea0b48a4 100644 --- a/tensorflow/contrib/cmake/external/grpc.cmake +++ b/tensorflow/contrib/cmake/external/grpc.cmake @@ -17,7 +17,7 @@ include (ExternalProject) set(GRPC_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/include) set(GRPC_URL https://github.com/grpc/grpc.git) set(GRPC_BUILD ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc) -set(GRPC_TAG 781fd6f6ea03645a520cd5c675da67ab61f87e4b) +set(GRPC_TAG 54e8f37e537794c2d814c1604c1282125f64f093) if(WIN32) set(grpc_STATIC_LIBRARIES @@ -28,10 +28,11 @@ else() set(grpc_STATIC_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc++_unsecure.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc_unsecure.a - ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgpr.a - ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/third_party/cares/libcares.a) + ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgpr.a) endif() +add_definitions(-DGRPC_ARES=0) + ExternalProject_Add(grpc PREFIX grpc DEPENDS protobuf zlib @@ -39,9 +40,6 @@ ExternalProject_Add(grpc GIT_TAG ${GRPC_TAG} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" BUILD_IN_SOURCE 1 - # TODO(jhseu): Remove this PATCH_COMMAND once grpc removes the dependency - # on "grpc" from the "grpc++_unsecure" rule. - PATCH_COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/patches/grpc/CMakeLists.txt ${GRPC_BUILD} BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release --target grpc++_unsecure COMMAND ${CMAKE_COMMAND} --build . --config Release --target grpc_cpp_plugin INSTALL_COMMAND "" diff --git a/tensorflow/contrib/cmake/patches/grpc/CMakeLists.txt b/tensorflow/contrib/cmake/patches/grpc/CMakeLists.txt deleted file mode 100644 index 84722c5ca2..0000000000 --- a/tensorflow/contrib/cmake/patches/grpc/CMakeLists.txt +++ /dev/null @@ -1,14415 +0,0 @@ -# GRPC global cmake file -# This currently builds C and C++ code. -# This file has been automatically generated from a template file. -# Please look at the templates directory instead. -# This file can be regenerated from the template by running -# tools/buildgen/generate_projects.sh -# -# Copyright 2015 gRPC authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - - -cmake_minimum_required(VERSION 2.8) - -set(PACKAGE_NAME "grpc") -set(PACKAGE_VERSION "1.5.0-dev") -set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") -set(PACKAGE_TARNAME "${PACKAGE_NAME}-${PACKAGE_VERSION}") -set(PACKAGE_BUGREPORT "https://github.com/grpc/grpc/issues/") -project(${PACKAGE_NAME} C CXX) - -set(gRPC_INSTALL_BINDIR "${CMAKE_INSTALL_PREFIX}/bin" CACHE PATH "Installation directory for executables") -set(gRPC_INSTALL_LIBDIR "${CMAKE_INSTALL_PREFIX}/lib" CACHE PATH "Installation directory for libraries") -set(gRPC_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_PREFIX}/include" CACHE PATH "Installation directory for headers") -set(gRPC_INSTALL_CMAKEDIR "${CMAKE_INSTALL_PREFIX}/lib/cmake/${PACKAGE_NAME}" CACHE PATH "Installation directory for cmake config files") - -# Options -option(gRPC_BUILD_TESTS "Build tests" OFF) - -set(gRPC_INSTALL_default ON) -if (NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) - # Disable gRPC_INSTALL by default if building as a submodule - set(gRPC_INSTALL_default OFF) -endif() -set(gRPC_INSTALL ${gRPC_INSTALL_default} CACHE BOOL - "Generate installation target: gRPC_ZLIB_PROVIDER, gRPC_CARES_PROVIDER, gRPC_SSL_PROVIDER and gRPC_PROTOBUF_PROVIDER must all be \"package\"") - -set(gRPC_ZLIB_PROVIDER "module" CACHE STRING "Provider of zlib library") -set_property(CACHE gRPC_ZLIB_PROVIDER PROPERTY STRINGS "module" "package") - -set(gRPC_CARES_PROVIDER "module" CACHE STRING "Provider of c-ares library") -set_property(CACHE gRPC_CARES_PROVIDER PROPERTY STRINGS "module" "package") - -set(gRPC_SSL_PROVIDER "module" CACHE STRING "Provider of ssl library") -set_property(CACHE gRPC_SSL_PROVIDER PROPERTY STRINGS "module" "package") - -set(gRPC_PROTOBUF_PROVIDER "module" CACHE STRING "Provider of protobuf library") -set_property(CACHE gRPC_PROTOBUF_PROVIDER PROPERTY STRINGS "module" "package") - -set(gRPC_PROTOBUF_PACKAGE_TYPE "" CACHE STRING "Algorithm for searching protobuf package") -set_property(CACHE gRPC_PROTOBUF_PACKAGE_TYPE PROPERTY STRINGS "CONFIG" "MODULE") - -set(gRPC_GFLAGS_PROVIDER "module" CACHE STRING "Provider of gflags library") -set_property(CACHE gRPC_GFLAGS_PROVIDER PROPERTY STRINGS "module" "package") - -set(gRPC_BENCHMARK_PROVIDER "module" CACHE STRING "Provider of benchmark library") -set_property(CACHE gRPC_BENCHMARK_PROVIDER PROPERTY STRINGS "module" "package") - -set(gRPC_USE_PROTO_LITE OFF CACHE BOOL "Use the protobuf-lite library") - -if(UNIX) - if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") - set(_gRPC_PLATFORM_LINUX ON) - elseif(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") - set(_gRPC_PLATFORM_MAC ON) - else() - set(_gRPC_PLATFORM_POSIX ON) - endif() -endif() -if(WIN32) - set(_gRPC_PLATFORM_WINDOWS ON) -endif() - -set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) - -if (MSVC) - include(cmake/msvc_static_runtime.cmake) - add_definitions(-D_WIN32_WINNT=0x600 -D_SCL_SECURE_NO_WARNINGS -D_CRT_SECURE_NO_WARNINGS -D_WINSOCK_DEPRECATED_NO_WARNINGS) - # needed to compile protobuf - add_definitions(/wd4065 /wd4506) - # TODO(jtattermusch): revisit C4267 occurrences throughout the code - add_definitions(/wd4267) -endif() - -if (gRPC_USE_PROTO_LITE) - set(_gRPC_PROTOBUF_LIBRARY_NAME "libprotobuf-lite") - add_definitions("-DGRPC_USE_PROTO_LITE") -else() - set(_gRPC_PROTOBUF_LIBRARY_NAME "libprotobuf") -endif() - -if("${gRPC_ZLIB_PROVIDER}" STREQUAL "module") - if(NOT ZLIB_ROOT_DIR) - set(ZLIB_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/zlib) - endif() - set(ZLIB_INCLUDE_DIR "${ZLIB_ROOT_DIR}") - if(EXISTS "${ZLIB_ROOT_DIR}/CMakeLists.txt") - # TODO(jtattermusch): workaround for https://github.com/madler/zlib/issues/218 - include_directories(${ZLIB_INCLUDE_DIR}) - - add_subdirectory(${ZLIB_ROOT_DIR} third_party/zlib) - if(TARGET zlibstatic) - set(_gRPC_ZLIB_LIBRARIES zlibstatic) - endif() - else() - message(WARNING "gRPC_ZLIB_PROVIDER is \"module\" but ZLIB_ROOT_DIR is wrong") - endif() - if(gRPC_INSTALL) - message(WARNING "gRPC_INSTALL will be forced to FALSE because gRPC_ZLIB_PROVIDER is \"module\"") - set(gRPC_INSTALL FALSE) - endif() -elseif("${gRPC_ZLIB_PROVIDER}" STREQUAL "package") - find_package(ZLIB) - if(TARGET ZLIB::ZLIB) - set(_gRPC_ZLIB_LIBRARIES ZLIB::ZLIB) - endif() - set(_gRPC_FIND_ZLIB "if(NOT ZLIB_FOUND)\n find_package(ZLIB)\nendif()") -endif() - -if("${gRPC_CARES_PROVIDER}" STREQUAL "module") - if(NOT CARES_ROOT_DIR) - set(CARES_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src/c-ares) - endif() - string(TOLOWER ${CMAKE_SYSTEM_NAME} CARES_SYSTEM_NAME) - set(CARES_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_party/cares/cares") - set(CARES_BUILD_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_party/cares") - set(CARES_PLATFORM_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_party/cares/config_${CARES_SYSTEM_NAME}") - if(EXISTS "${CARES_ROOT_DIR}/CMakeLists.txt") - if("${CARES_SYSTEM_NAME}" MATCHES "windows") - add_definitions(-DCARES_STATICLIB=1) - add_definitions(-DWIN32_LEAN_AND_MEAN=1) - else() - add_definitions(-DHAVE_CONFIG_H=1) - add_definitions(-D_GNU_SOURCE=1) - endif() - add_subdirectory(src/c-ares third_party/cares) - if(TARGET cares) - set(_gRPC_CARES_LIBRARIES cares) - endif() - else() - message(WARNING "gRPC_CARES_PROVIDER is \"module\" but CARES_ROOT_DIR is wrong") - endif() - if(gRPC_INSTALL) - message(WARNING "gRPC_INSTALL will be forced to FALSE because gRPC_CARES_PROVIDER is \"module\"") - set(gRPC_INSTALL FALSE) - endif() -elseif("${gRPC_CARES_PROVIDER}" STREQUAL "package") - find_package(c-ares CONFIG) - if(TARGET c-ares::cares) - set(_gRPC_CARES_LIBRARIES c-ares::cares) - endif() - set(_gRPC_FIND_CARES "if(NOT c-ares_FOUND)\n find_package(c-ares CONFIG)\nendif()") -endif() - -if("${gRPC_PROTOBUF_PROVIDER}" STREQUAL "module") - # Building the protobuf tests require gmock what is not part of a standard protobuf checkout. - # Disable them unless they are explicitly requested from the cmake command line (when we assume - # gmock is downloaded to the right location inside protobuf). - if(NOT protobuf_BUILD_TESTS) - set(protobuf_BUILD_TESTS OFF CACHE BOOL "Build protobuf tests") - endif() - # Disable building protobuf with zlib. Building protobuf with zlib breaks - # the build if zlib is not installed on the system. - if(NOT protobuf_WITH_ZLIB) - set(protobuf_WITH_ZLIB OFF CACHE BOOL "Build protobuf with zlib.") - endif() - if(NOT PROTOBUF_ROOT_DIR) - set(PROTOBUF_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/protobuf) - endif() - set(PROTOBUF_WELLKNOWN_IMPORT_DIR ${PROTOBUF_ROOT_DIR}/src) - if(EXISTS "${PROTOBUF_ROOT_DIR}/cmake/CMakeLists.txt") - set(protobuf_MSVC_STATIC_RUNTIME OFF CACHE BOOL "Link static runtime libraries") - add_subdirectory(${PROTOBUF_ROOT_DIR}/cmake third_party/protobuf) - if(TARGET ${_gRPC_PROTOBUF_LIBRARY_NAME}) - set(_gRPC_PROTOBUF_LIBRARIES ${_gRPC_PROTOBUF_LIBRARY_NAME}) - endif() - if(TARGET libprotoc) - set(_gRPC_PROTOBUF_PROTOC_LIBRARIES libprotoc) - endif() - if(TARGET protoc) - set(_gRPC_PROTOBUF_PROTOC protoc) - endif() - else() - message(WARNING "gRPC_PROTOBUF_PROVIDER is \"module\" but PROTOBUF_ROOT_DIR is wrong") - endif() - if(gRPC_INSTALL) - message(WARNING "gRPC_INSTALL will be forced to FALSE because gRPC_PROTOBUF_PROVIDER is \"module\"") - set(gRPC_INSTALL FALSE) - endif() -elseif("${gRPC_PROTOBUF_PROVIDER}" STREQUAL "package") - find_package(Protobuf ${gRPC_PROTOBUF_PACKAGE_TYPE}) - if(Protobuf_FOUND OR PROTOBUF_FOUND) - if(TARGET protobuf::${_gRPC_PROTOBUF_LIBRARY_NAME}) - set(_gRPC_PROTOBUF_LIBRARIES protobuf::${_gRPC_PROTOBUF_LIBRARY_NAME}) - else() - set(_gRPC_PROTOBUF_LIBRARIES ${PROTOBUF_LIBRARIES}) - endif() - if(TARGET protobuf::libprotoc) - set(_gRPC_PROTOBUF_PROTOC_LIBRARIES protobuf::libprotoc) - else() - set(_gRPC_PROTOBUF_PROTOC_LIBRARIES ${PROTOBUF_PROTOC_LIBRARIES}) - endif() - if(TARGET protobuf::protoc) - set(_gRPC_PROTOBUF_PROTOC protobuf::protoc) - else() - set(_gRPC_PROTOBUF_PROTOC ${PROTOBUF_PROTOC_EXECUTABLE}) - endif() - set(_gRPC_FIND_PROTOBUF "if(NOT Protobuf_FOUND AND NOT PROTOBUF_FOUND)\n find_package(Protobuf ${gRPC_PROTOBUF_PACKAGE_TYPE})\nendif()") - endif() - if(PROTOBUF_FOUND) - include_directories(${PROTOBUF_INCLUDE_DIRS}) - endif() - set(PROTOBUF_WELLKNOWN_IMPORT_DIR /usr/local/include) -endif() - -if("${gRPC_SSL_PROVIDER}" STREQUAL "module") - if(NOT BORINGSSL_ROOT_DIR) - set(BORINGSSL_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/boringssl) - endif() - if(EXISTS "${BORINGSSL_ROOT_DIR}/CMakeLists.txt") - set(OPENSSL_NO_ASM ON) # make boringssl buildable with Visual Studio - add_subdirectory(${BORINGSSL_ROOT_DIR} third_party/boringssl) - if(TARGET ssl) - set(_gRPC_SSL_LIBRARIES ssl) - endif() - else() - message(WARNING "gRPC_SSL_PROVIDER is \"module\" but BORINGSSL_ROOT_DIR is wrong") - endif() - if(gRPC_INSTALL) - message(WARNING "gRPC_INSTALL will be forced to FALSE because gRPC_SSL_PROVIDER is \"module\"") - set(gRPC_INSTALL FALSE) - endif() -elseif("${gRPC_SSL_PROVIDER}" STREQUAL "package") - find_package(OpenSSL) - if(TARGET OpenSSL::SSL) - set(_gRPC_SSL_LIBRARIES OpenSSL::SSL) - endif() - set(_gRPC_FIND_SSL "if(NOT OpenSSL_FOUND)\n find_package(OpenSSL)\nendif()") -endif() - -if("${gRPC_GFLAGS_PROVIDER}" STREQUAL "module") - if(NOT GFLAGS_ROOT_DIR) - set(GFLAGS_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/gflags) - endif() - if(EXISTS "${GFLAGS_ROOT_DIR}/CMakeLists.txt") - add_subdirectory(${GFLAGS_ROOT_DIR} third_party/gflags) - if(TARGET gflags_static) - set(_gRPC_GFLAGS_LIBRARIES gflags_static) - endif() - else() - message(WARNING "gRPC_GFLAGS_PROVIDER is \"module\" but GFLAGS_ROOT_DIR is wrong") - endif() -elseif("${gRPC_GFLAGS_PROVIDER}" STREQUAL "package") - find_package(gflags) - if(TARGET gflags::gflags) - set(_gRPC_GFLAGS_LIBRARIES gflags::gflags) - endif() - set(_gRPC_FIND_GFLAGS "if(NOT gflags_FOUND)\n find_package(gflags)\nendif()") -endif() - -if("${gRPC_BENCHMARK_PROVIDER}" STREQUAL "module") - if(NOT BENCHMARK_ROOT_DIR) - set(BENCHMARK_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/benchmark) - endif() - if(EXISTS "${BENCHMARK_ROOT_DIR}/CMakeLists.txt") - add_subdirectory(${BENCHMARK_ROOT_DIR} third_party/benchmark) - if(TARGET benchmark) - set(_gRPC_BENCHMARK_LIBRARIES benchmark) - endif() - else() - message(WARNING "gRPC_BENCHMARK_PROVIDER is \"module\" but BENCHMARK_ROOT_DIR is wrong") - endif() -elseif("${gRPC_BENCHMARK_PROVIDER}" STREQUAL "package") - find_package(benchmark) - if(TARGET benchmark::benchmark) - set(_gRPC_BENCHMARK_LIBRARIES benchmark::benchmark) - endif() - set(_gRPC_FIND_BENCHMARK "if(NOT benchmark_FOUND)\n find_package(benchmark)\nendif()") -endif() - -if(NOT MSVC) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") -endif() - -if(_gRPC_PLATFORM_MAC) - set(_gRPC_ALLTARGETS_LIBRARIES ${CMAKE_DL_LIBS} m pthread) -elseif(UNIX) - set(_gRPC_ALLTARGETS_LIBRARIES ${CMAKE_DL_LIBS} rt m pthread) -endif() - -if(WIN32 AND MSVC) - set(_gRPC_BASELIB_LIBRARIES wsock32 ws2_32) -endif() - -# Create directory for generated .proto files -set(_gRPC_PROTO_GENS_DIR ${CMAKE_BINARY_DIR}/gens) -file(MAKE_DIRECTORY ${_gRPC_PROTO_GENS_DIR}) - -# protobuf_generate_grpc_cpp -# -------------------------- -# -# Add custom commands to process ``.proto`` files to C++ using protoc and -# GRPC plugin:: -# -# protobuf_generate_grpc_cpp [...] -# -# ``ARGN`` -# ``.proto`` files -# -function(protobuf_generate_grpc_cpp) - if(NOT ARGN) - message(SEND_ERROR "Error: PROTOBUF_GENERATE_GRPC_CPP() called without any proto files") - return() - endif() - - set(_protobuf_include_path -I . -I ${PROTOBUF_WELLKNOWN_IMPORT_DIR}) - foreach(FIL ${ARGN}) - get_filename_component(ABS_FIL ${FIL} ABSOLUTE) - get_filename_component(FIL_WE ${FIL} NAME_WE) - file(RELATIVE_PATH REL_FIL ${CMAKE_CURRENT_SOURCE_DIR} ${ABS_FIL}) - get_filename_component(REL_DIR ${REL_FIL} DIRECTORY) - set(RELFIL_WE "${REL_DIR}/${FIL_WE}") - - add_custom_command( - OUTPUT "${_gRPC_PROTO_GENS_DIR}/${RELFIL_WE}.grpc.pb.cc" - "${_gRPC_PROTO_GENS_DIR}/${RELFIL_WE}.grpc.pb.h" - "${_gRPC_PROTO_GENS_DIR}/${RELFIL_WE}_mock.grpc.pb.h" - "${_gRPC_PROTO_GENS_DIR}/${RELFIL_WE}.pb.cc" - "${_gRPC_PROTO_GENS_DIR}/${RELFIL_WE}.pb.h" - COMMAND $ - ARGS --grpc_out=generate_mock_code=true:${_gRPC_PROTO_GENS_DIR} - --cpp_out=${_gRPC_PROTO_GENS_DIR} - --plugin=protoc-gen-grpc=$ - ${_protobuf_include_path} - ${REL_FIL} - DEPENDS ${ABS_FIL} ${_gRPC_PROTOBUF_PROTOC} grpc_cpp_plugin - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - COMMENT "Running gRPC C++ protocol buffer compiler on ${FIL}" - VERBATIM) - - set_source_files_properties("${_gRPC_PROTO_GENS_DIR}/${RELFIL_WE}.grpc.pb.cc" "${_gRPC_PROTO_GENS_DIR}/${RELFIL_WE}.grpc.pb.h" "${_gRPC_PROTO_GENS_DIR}/${RELFIL_WE}_mock.grpc.pb.h" "${_gRPC_PROTO_GENS_DIR}/${RELFIL_WE}.pb.cc" "${_gRPC_PROTO_GENS_DIR}/${RELFIL_WE}.pb.h" PROPERTIES GENERATED TRUE) - endforeach() -endfunction() - -add_custom_target(plugins - DEPENDS - grpc_cpp_plugin - grpc_csharp_plugin - grpc_node_plugin - grpc_objective_c_plugin - grpc_php_plugin - grpc_python_plugin - grpc_ruby_plugin -) - -add_custom_target(tools_c - DEPENDS - check_epollexclusive - gen_hpack_tables - gen_legal_metadata_characters - gen_percent_encoding_tables - grpc_create_jwt - grpc_print_google_default_creds_token - grpc_verify_jwt -) - -add_custom_target(tools_cxx - DEPENDS -) - -add_custom_target(tools - DEPENDS tools_c tools_cxx) - -if (gRPC_BUILD_TESTS) -add_custom_target(buildtests_c) -add_dependencies(buildtests_c alarm_test) -add_dependencies(buildtests_c algorithm_test) -add_dependencies(buildtests_c alloc_test) -add_dependencies(buildtests_c alpn_test) -add_dependencies(buildtests_c arena_test) -add_dependencies(buildtests_c bad_server_response_test) -add_dependencies(buildtests_c bdp_estimator_test) -add_dependencies(buildtests_c bin_decoder_test) -add_dependencies(buildtests_c bin_encoder_test) -add_dependencies(buildtests_c census_context_test) -add_dependencies(buildtests_c census_intrusive_hash_map_test) -add_dependencies(buildtests_c census_resource_test) -add_dependencies(buildtests_c census_trace_context_test) -add_dependencies(buildtests_c channel_create_test) -add_dependencies(buildtests_c chttp2_hpack_encoder_test) -add_dependencies(buildtests_c chttp2_stream_map_test) -add_dependencies(buildtests_c chttp2_varint_test) -add_dependencies(buildtests_c combiner_test) -add_dependencies(buildtests_c compression_test) -add_dependencies(buildtests_c concurrent_connectivity_test) -add_dependencies(buildtests_c connection_refused_test) -add_dependencies(buildtests_c dns_resolver_connectivity_test) -add_dependencies(buildtests_c dns_resolver_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c dualstack_socket_test) -endif() -add_dependencies(buildtests_c endpoint_pair_test) -add_dependencies(buildtests_c error_test) -if(_gRPC_PLATFORM_LINUX) -add_dependencies(buildtests_c ev_epollsig_linux_test) -endif() -add_dependencies(buildtests_c fake_resolver_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c fd_conservation_posix_test) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c fd_posix_test) -endif() -add_dependencies(buildtests_c fling_client) -add_dependencies(buildtests_c fling_server) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c fling_stream_test) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c fling_test) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c goaway_server_test) -endif() -add_dependencies(buildtests_c gpr_avl_test) -add_dependencies(buildtests_c gpr_backoff_test) -add_dependencies(buildtests_c gpr_cmdline_test) -add_dependencies(buildtests_c gpr_cpu_test) -add_dependencies(buildtests_c gpr_env_test) -add_dependencies(buildtests_c gpr_histogram_test) -add_dependencies(buildtests_c gpr_host_port_test) -add_dependencies(buildtests_c gpr_log_test) -add_dependencies(buildtests_c gpr_mpscq_test) -add_dependencies(buildtests_c gpr_spinlock_test) -add_dependencies(buildtests_c gpr_stack_lockfree_test) -add_dependencies(buildtests_c gpr_string_test) -add_dependencies(buildtests_c gpr_sync_test) -add_dependencies(buildtests_c gpr_thd_test) -add_dependencies(buildtests_c gpr_time_test) -add_dependencies(buildtests_c gpr_tls_test) -add_dependencies(buildtests_c gpr_useful_test) -add_dependencies(buildtests_c grpc_auth_context_test) -add_dependencies(buildtests_c grpc_b64_test) -add_dependencies(buildtests_c grpc_byte_buffer_reader_test) -add_dependencies(buildtests_c grpc_channel_args_test) -add_dependencies(buildtests_c grpc_channel_stack_test) -add_dependencies(buildtests_c grpc_completion_queue_test) -add_dependencies(buildtests_c grpc_completion_queue_threading_test) -add_dependencies(buildtests_c grpc_credentials_test) -add_dependencies(buildtests_c grpc_fetch_oauth2) -add_dependencies(buildtests_c grpc_invalid_channel_args_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c grpc_json_token_test) -endif() -add_dependencies(buildtests_c grpc_jwt_verifier_test) -add_dependencies(buildtests_c grpc_security_connector_test) -if(_gRPC_PLATFORM_LINUX) -add_dependencies(buildtests_c handshake_client) -endif() -if(_gRPC_PLATFORM_LINUX) -add_dependencies(buildtests_c handshake_server) -endif() -add_dependencies(buildtests_c hpack_parser_test) -add_dependencies(buildtests_c hpack_table_test) -add_dependencies(buildtests_c http_parser_test) -add_dependencies(buildtests_c httpcli_format_request_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c httpcli_test) -endif() -if(_gRPC_PLATFORM_LINUX) -add_dependencies(buildtests_c httpscli_test) -endif() -add_dependencies(buildtests_c init_test) -add_dependencies(buildtests_c invalid_call_argument_test) -add_dependencies(buildtests_c json_rewrite) -add_dependencies(buildtests_c json_rewrite_test) -add_dependencies(buildtests_c json_stream_error_test) -add_dependencies(buildtests_c json_test) -add_dependencies(buildtests_c lame_client_test) -add_dependencies(buildtests_c lb_policies_test) -add_dependencies(buildtests_c load_file_test) -add_dependencies(buildtests_c memory_profile_client) -add_dependencies(buildtests_c memory_profile_server) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c memory_profile_test) -endif() -add_dependencies(buildtests_c message_compress_test) -add_dependencies(buildtests_c minimal_stack_is_minimal_test) -add_dependencies(buildtests_c mlog_test) -add_dependencies(buildtests_c multiple_server_queues_test) -add_dependencies(buildtests_c murmur_hash_test) -add_dependencies(buildtests_c no_server_test) -add_dependencies(buildtests_c num_external_connectivity_watchers_test) -add_dependencies(buildtests_c parse_address_test) -add_dependencies(buildtests_c percent_encoding_test) -if(_gRPC_PLATFORM_LINUX) -add_dependencies(buildtests_c pollset_set_test) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c resolve_address_posix_test) -endif() -add_dependencies(buildtests_c resolve_address_test) -add_dependencies(buildtests_c resource_quota_test) -add_dependencies(buildtests_c secure_channel_create_test) -add_dependencies(buildtests_c secure_endpoint_test) -add_dependencies(buildtests_c sequential_connectivity_test) -add_dependencies(buildtests_c server_chttp2_test) -add_dependencies(buildtests_c server_test) -add_dependencies(buildtests_c slice_buffer_test) -add_dependencies(buildtests_c slice_hash_table_test) -add_dependencies(buildtests_c slice_string_helpers_test) -add_dependencies(buildtests_c slice_test) -add_dependencies(buildtests_c sockaddr_resolver_test) -add_dependencies(buildtests_c sockaddr_utils_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c socket_utils_test) -endif() -add_dependencies(buildtests_c status_conversion_test) -add_dependencies(buildtests_c stream_compression_test) -add_dependencies(buildtests_c stream_owned_slice_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c tcp_client_posix_test) -endif() -add_dependencies(buildtests_c tcp_client_uv_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c tcp_posix_test) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c tcp_server_posix_test) -endif() -add_dependencies(buildtests_c tcp_server_uv_test) -add_dependencies(buildtests_c time_averaged_stats_test) -add_dependencies(buildtests_c timeout_encoding_test) -add_dependencies(buildtests_c timer_heap_test) -add_dependencies(buildtests_c timer_list_test) -add_dependencies(buildtests_c transport_connectivity_state_test) -add_dependencies(buildtests_c transport_metadata_test) -add_dependencies(buildtests_c transport_pid_controller_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c transport_security_test) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c udp_server_test) -endif() -add_dependencies(buildtests_c uri_parser_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c wakeup_fd_cv_test) -endif() -add_dependencies(buildtests_c public_headers_must_be_c89) -add_dependencies(buildtests_c badreq_bad_client_test) -add_dependencies(buildtests_c connection_prefix_bad_client_test) -add_dependencies(buildtests_c head_of_line_blocking_bad_client_test) -add_dependencies(buildtests_c headers_bad_client_test) -add_dependencies(buildtests_c initial_settings_frame_bad_client_test) -add_dependencies(buildtests_c large_metadata_bad_client_test) -add_dependencies(buildtests_c server_registered_method_bad_client_test) -add_dependencies(buildtests_c simple_request_bad_client_test) -add_dependencies(buildtests_c unknown_frame_bad_client_test) -add_dependencies(buildtests_c window_overflow_bad_client_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c bad_ssl_cert_server) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c bad_ssl_cert_test) -endif() -add_dependencies(buildtests_c h2_census_test) -add_dependencies(buildtests_c h2_compress_test) -add_dependencies(buildtests_c h2_fakesec_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c h2_fd_test) -endif() -add_dependencies(buildtests_c h2_full_test) -if(_gRPC_PLATFORM_LINUX) -add_dependencies(buildtests_c h2_full+pipe_test) -endif() -add_dependencies(buildtests_c h2_full+trace_test) -add_dependencies(buildtests_c h2_full+workarounds_test) -add_dependencies(buildtests_c h2_http_proxy_test) -add_dependencies(buildtests_c h2_load_reporting_test) -add_dependencies(buildtests_c h2_oauth2_test) -add_dependencies(buildtests_c h2_proxy_test) -add_dependencies(buildtests_c h2_sockpair_test) -add_dependencies(buildtests_c h2_sockpair+trace_test) -add_dependencies(buildtests_c h2_sockpair_1byte_test) -add_dependencies(buildtests_c h2_ssl_test) -add_dependencies(buildtests_c h2_ssl_cert_test) -add_dependencies(buildtests_c h2_ssl_proxy_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c h2_uds_test) -endif() -add_dependencies(buildtests_c inproc_test) -add_dependencies(buildtests_c h2_census_nosec_test) -add_dependencies(buildtests_c h2_compress_nosec_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c h2_fd_nosec_test) -endif() -add_dependencies(buildtests_c h2_full_nosec_test) -if(_gRPC_PLATFORM_LINUX) -add_dependencies(buildtests_c h2_full+pipe_nosec_test) -endif() -add_dependencies(buildtests_c h2_full+trace_nosec_test) -add_dependencies(buildtests_c h2_full+workarounds_nosec_test) -add_dependencies(buildtests_c h2_http_proxy_nosec_test) -add_dependencies(buildtests_c h2_load_reporting_nosec_test) -add_dependencies(buildtests_c h2_proxy_nosec_test) -add_dependencies(buildtests_c h2_sockpair_nosec_test) -add_dependencies(buildtests_c h2_sockpair+trace_nosec_test) -add_dependencies(buildtests_c h2_sockpair_1byte_nosec_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_c h2_uds_nosec_test) -endif() -add_dependencies(buildtests_c inproc_nosec_test) -add_dependencies(buildtests_c api_fuzzer_one_entry) -add_dependencies(buildtests_c client_fuzzer_one_entry) -add_dependencies(buildtests_c hpack_parser_fuzzer_test_one_entry) -add_dependencies(buildtests_c http_request_fuzzer_test_one_entry) -add_dependencies(buildtests_c http_response_fuzzer_test_one_entry) -add_dependencies(buildtests_c json_fuzzer_test_one_entry) -add_dependencies(buildtests_c nanopb_fuzzer_response_test_one_entry) -add_dependencies(buildtests_c nanopb_fuzzer_serverlist_test_one_entry) -add_dependencies(buildtests_c percent_decode_fuzzer_one_entry) -add_dependencies(buildtests_c percent_encode_fuzzer_one_entry) -add_dependencies(buildtests_c server_fuzzer_one_entry) -add_dependencies(buildtests_c ssl_server_fuzzer_one_entry) -add_dependencies(buildtests_c uri_fuzzer_test_one_entry) - -add_custom_target(buildtests_cxx) -add_dependencies(buildtests_cxx alarm_cpp_test) -add_dependencies(buildtests_cxx async_end2end_test) -add_dependencies(buildtests_cxx auth_property_iterator_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx bm_arena) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx bm_call_create) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx bm_chttp2_hpack) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx bm_chttp2_transport) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx bm_closure) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx bm_cq) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx bm_cq_multiple_threads) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx bm_error) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx bm_fullstack_streaming_ping_pong) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx bm_fullstack_streaming_pump) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx bm_fullstack_trickle) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx bm_fullstack_unary_ping_pong) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx bm_metadata) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx bm_pollset) -endif() -add_dependencies(buildtests_cxx channel_arguments_test) -add_dependencies(buildtests_cxx channel_filter_test) -add_dependencies(buildtests_cxx cli_call_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx client_crash_test) -endif() -add_dependencies(buildtests_cxx client_crash_test_server) -add_dependencies(buildtests_cxx client_lb_end2end_test) -add_dependencies(buildtests_cxx codegen_test_full) -add_dependencies(buildtests_cxx codegen_test_minimal) -add_dependencies(buildtests_cxx credentials_test) -add_dependencies(buildtests_cxx cxx_byte_buffer_test) -add_dependencies(buildtests_cxx cxx_slice_test) -add_dependencies(buildtests_cxx cxx_string_ref_test) -add_dependencies(buildtests_cxx cxx_time_test) -add_dependencies(buildtests_cxx end2end_test) -add_dependencies(buildtests_cxx error_details_test) -add_dependencies(buildtests_cxx filter_end2end_test) -add_dependencies(buildtests_cxx generic_end2end_test) -add_dependencies(buildtests_cxx golden_file_test) -add_dependencies(buildtests_cxx grpc_cli) -add_dependencies(buildtests_cxx grpc_tool_test) -add_dependencies(buildtests_cxx grpclb_api_test) -add_dependencies(buildtests_cxx grpclb_end2end_test) -add_dependencies(buildtests_cxx grpclb_test) -add_dependencies(buildtests_cxx health_service_end2end_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx http2_client) -endif() -add_dependencies(buildtests_cxx hybrid_end2end_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx interop_client) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx interop_server) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx interop_test) -endif() -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx json_run_localhost) -endif() -add_dependencies(buildtests_cxx memory_test) -add_dependencies(buildtests_cxx metrics_client) -add_dependencies(buildtests_cxx mock_test) -add_dependencies(buildtests_cxx noop-benchmark) -add_dependencies(buildtests_cxx proto_server_reflection_test) -add_dependencies(buildtests_cxx proto_utils_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx qps_interarrival_test) -endif() -add_dependencies(buildtests_cxx qps_json_driver) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx qps_openloop_test) -endif() -add_dependencies(buildtests_cxx qps_worker) -add_dependencies(buildtests_cxx reconnect_interop_client) -add_dependencies(buildtests_cxx reconnect_interop_server) -add_dependencies(buildtests_cxx secure_auth_context_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx secure_sync_unary_ping_pong_test) -endif() -add_dependencies(buildtests_cxx server_builder_plugin_test) -add_dependencies(buildtests_cxx server_builder_test) -add_dependencies(buildtests_cxx server_context_test_spouse_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx server_crash_test) -endif() -add_dependencies(buildtests_cxx server_crash_test_client) -add_dependencies(buildtests_cxx server_request_call_test) -add_dependencies(buildtests_cxx shutdown_test) -add_dependencies(buildtests_cxx status_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx streaming_throughput_test) -endif() -add_dependencies(buildtests_cxx stress_test) -add_dependencies(buildtests_cxx thread_manager_test) -add_dependencies(buildtests_cxx thread_stress_test) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) -add_dependencies(buildtests_cxx writes_per_rpc_test) -endif() - -add_custom_target(buildtests - DEPENDS buildtests_c buildtests_cxx) -endif (gRPC_BUILD_TESTS) - - -add_library(gpr - src/core/lib/profiling/basic_timers.c - src/core/lib/profiling/stap_timers.c - src/core/lib/support/alloc.c - src/core/lib/support/arena.c - src/core/lib/support/atm.c - src/core/lib/support/avl.c - src/core/lib/support/backoff.c - src/core/lib/support/cmdline.c - src/core/lib/support/cpu_iphone.c - src/core/lib/support/cpu_linux.c - src/core/lib/support/cpu_posix.c - src/core/lib/support/cpu_windows.c - src/core/lib/support/env_linux.c - src/core/lib/support/env_posix.c - src/core/lib/support/env_windows.c - src/core/lib/support/histogram.c - src/core/lib/support/host_port.c - src/core/lib/support/log.c - src/core/lib/support/log_android.c - src/core/lib/support/log_linux.c - src/core/lib/support/log_posix.c - src/core/lib/support/log_windows.c - src/core/lib/support/mpscq.c - src/core/lib/support/murmur_hash.c - src/core/lib/support/stack_lockfree.c - src/core/lib/support/string.c - src/core/lib/support/string_posix.c - src/core/lib/support/string_util_windows.c - src/core/lib/support/string_windows.c - src/core/lib/support/subprocess_posix.c - src/core/lib/support/subprocess_windows.c - src/core/lib/support/sync.c - src/core/lib/support/sync_posix.c - src/core/lib/support/sync_windows.c - src/core/lib/support/thd.c - src/core/lib/support/thd_posix.c - src/core/lib/support/thd_windows.c - src/core/lib/support/time.c - src/core/lib/support/time_posix.c - src/core/lib/support/time_precise.c - src/core/lib/support/time_windows.c - src/core/lib/support/tls_pthread.c - src/core/lib/support/tmpfile_msys.c - src/core/lib/support/tmpfile_posix.c - src/core/lib/support/tmpfile_windows.c - src/core/lib/support/wrap_memcpy.c -) - -if(WIN32 AND MSVC) - set_target_properties(gpr PROPERTIES COMPILE_PDB_NAME "gpr" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/gpr.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(gpr - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gpr - ${_gRPC_ALLTARGETS_LIBRARIES} -) - -foreach(_hdr - include/grpc/support/alloc.h - include/grpc/support/atm.h - include/grpc/support/atm_gcc_atomic.h - include/grpc/support/atm_gcc_sync.h - include/grpc/support/atm_windows.h - include/grpc/support/avl.h - include/grpc/support/cmdline.h - include/grpc/support/cpu.h - include/grpc/support/histogram.h - include/grpc/support/host_port.h - include/grpc/support/log.h - include/grpc/support/log_windows.h - include/grpc/support/port_platform.h - include/grpc/support/string_util.h - include/grpc/support/subprocess.h - include/grpc/support/sync.h - include/grpc/support/sync_generic.h - include/grpc/support/sync_posix.h - include/grpc/support/sync_windows.h - include/grpc/support/thd.h - include/grpc/support/time.h - include/grpc/support/tls.h - include/grpc/support/tls_gcc.h - include/grpc/support/tls_msvc.h - include/grpc/support/tls_pthread.h - include/grpc/support/useful.h - include/grpc/impl/codegen/atm.h - include/grpc/impl/codegen/atm_gcc_atomic.h - include/grpc/impl/codegen/atm_gcc_sync.h - include/grpc/impl/codegen/atm_windows.h - include/grpc/impl/codegen/gpr_slice.h - include/grpc/impl/codegen/gpr_types.h - include/grpc/impl/codegen/port_platform.h - include/grpc/impl/codegen/sync.h - include/grpc/impl/codegen/sync_generic.h - include/grpc/impl/codegen/sync_posix.h - include/grpc/impl/codegen/sync_windows.h -) - string(REPLACE "include/" "" _path ${_hdr}) - get_filename_component(_path ${_path} PATH) - install(FILES ${_hdr} - DESTINATION "${gRPC_INSTALL_INCLUDEDIR}/${_path}" - ) -endforeach() - - -if (gRPC_INSTALL) - install(TARGETS gpr EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - -if (gRPC_BUILD_TESTS) - -add_library(gpr_test_util - test/core/util/test_config.c -) - -if(WIN32 AND MSVC) - set_target_properties(gpr_test_util PROPERTIES COMPILE_PDB_NAME "gpr_test_util" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/gpr_test_util.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(gpr_test_util - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gpr_test_util - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr -) - - -endif (gRPC_BUILD_TESTS) - -add_library(grpc - src/core/lib/surface/init.c - src/core/lib/channel/channel_args.c - src/core/lib/channel/channel_stack.c - src/core/lib/channel/channel_stack_builder.c - src/core/lib/channel/connected_channel.c - src/core/lib/channel/handshaker.c - src/core/lib/channel/handshaker_factory.c - src/core/lib/channel/handshaker_registry.c - src/core/lib/compression/compression.c - src/core/lib/compression/message_compress.c - src/core/lib/compression/stream_compression.c - src/core/lib/http/format_request.c - src/core/lib/http/httpcli.c - src/core/lib/http/parser.c - src/core/lib/iomgr/closure.c - src/core/lib/iomgr/combiner.c - src/core/lib/iomgr/endpoint.c - src/core/lib/iomgr/endpoint_pair_posix.c - src/core/lib/iomgr/endpoint_pair_uv.c - src/core/lib/iomgr/endpoint_pair_windows.c - src/core/lib/iomgr/error.c - src/core/lib/iomgr/ev_epoll1_linux.c - src/core/lib/iomgr/ev_epoll_limited_pollers_linux.c - src/core/lib/iomgr/ev_epoll_thread_pool_linux.c - src/core/lib/iomgr/ev_epollex_linux.c - src/core/lib/iomgr/ev_epollsig_linux.c - src/core/lib/iomgr/ev_poll_posix.c - src/core/lib/iomgr/ev_posix.c - src/core/lib/iomgr/ev_windows.c - src/core/lib/iomgr/exec_ctx.c - src/core/lib/iomgr/executor.c - src/core/lib/iomgr/iocp_windows.c - src/core/lib/iomgr/iomgr.c - src/core/lib/iomgr/iomgr_posix.c - src/core/lib/iomgr/iomgr_uv.c - src/core/lib/iomgr/iomgr_windows.c - src/core/lib/iomgr/is_epollexclusive_available.c - src/core/lib/iomgr/load_file.c - src/core/lib/iomgr/lockfree_event.c - src/core/lib/iomgr/network_status_tracker.c - src/core/lib/iomgr/polling_entity.c - src/core/lib/iomgr/pollset_set_uv.c - src/core/lib/iomgr/pollset_set_windows.c - src/core/lib/iomgr/pollset_uv.c - src/core/lib/iomgr/pollset_windows.c - src/core/lib/iomgr/resolve_address_posix.c - src/core/lib/iomgr/resolve_address_uv.c - src/core/lib/iomgr/resolve_address_windows.c - src/core/lib/iomgr/resource_quota.c - src/core/lib/iomgr/sockaddr_utils.c - src/core/lib/iomgr/socket_factory_posix.c - src/core/lib/iomgr/socket_mutator.c - src/core/lib/iomgr/socket_utils_common_posix.c - src/core/lib/iomgr/socket_utils_linux.c - src/core/lib/iomgr/socket_utils_posix.c - src/core/lib/iomgr/socket_utils_uv.c - src/core/lib/iomgr/socket_utils_windows.c - src/core/lib/iomgr/socket_windows.c - src/core/lib/iomgr/tcp_client_posix.c - src/core/lib/iomgr/tcp_client_uv.c - src/core/lib/iomgr/tcp_client_windows.c - src/core/lib/iomgr/tcp_posix.c - src/core/lib/iomgr/tcp_server_posix.c - src/core/lib/iomgr/tcp_server_utils_posix_common.c - src/core/lib/iomgr/tcp_server_utils_posix_ifaddrs.c - src/core/lib/iomgr/tcp_server_utils_posix_noifaddrs.c - src/core/lib/iomgr/tcp_server_uv.c - src/core/lib/iomgr/tcp_server_windows.c - src/core/lib/iomgr/tcp_uv.c - src/core/lib/iomgr/tcp_windows.c - src/core/lib/iomgr/time_averaged_stats.c - src/core/lib/iomgr/timer_generic.c - src/core/lib/iomgr/timer_heap.c - src/core/lib/iomgr/timer_manager.c - src/core/lib/iomgr/timer_uv.c - src/core/lib/iomgr/udp_server.c - src/core/lib/iomgr/unix_sockets_posix.c - src/core/lib/iomgr/unix_sockets_posix_noop.c - src/core/lib/iomgr/wakeup_fd_cv.c - src/core/lib/iomgr/wakeup_fd_eventfd.c - src/core/lib/iomgr/wakeup_fd_nospecial.c - src/core/lib/iomgr/wakeup_fd_pipe.c - src/core/lib/iomgr/wakeup_fd_posix.c - src/core/lib/json/json.c - src/core/lib/json/json_reader.c - src/core/lib/json/json_string.c - src/core/lib/json/json_writer.c - src/core/lib/slice/b64.c - src/core/lib/slice/percent_encoding.c - src/core/lib/slice/slice.c - src/core/lib/slice/slice_buffer.c - src/core/lib/slice/slice_hash_table.c - src/core/lib/slice/slice_intern.c - src/core/lib/slice/slice_string_helpers.c - src/core/lib/surface/alarm.c - src/core/lib/surface/api_trace.c - src/core/lib/surface/byte_buffer.c - src/core/lib/surface/byte_buffer_reader.c - src/core/lib/surface/call.c - src/core/lib/surface/call_details.c - src/core/lib/surface/call_log_batch.c - src/core/lib/surface/channel.c - src/core/lib/surface/channel_init.c - src/core/lib/surface/channel_ping.c - src/core/lib/surface/channel_stack_type.c - src/core/lib/surface/completion_queue.c - src/core/lib/surface/completion_queue_factory.c - src/core/lib/surface/event_string.c - src/core/lib/surface/lame_client.cc - src/core/lib/surface/metadata_array.c - src/core/lib/surface/server.c - src/core/lib/surface/validate_metadata.c - src/core/lib/surface/version.c - src/core/lib/transport/bdp_estimator.c - src/core/lib/transport/byte_stream.c - src/core/lib/transport/connectivity_state.c - src/core/lib/transport/error_utils.c - src/core/lib/transport/metadata.c - src/core/lib/transport/metadata_batch.c - src/core/lib/transport/pid_controller.c - src/core/lib/transport/service_config.c - src/core/lib/transport/static_metadata.c - src/core/lib/transport/status_conversion.c - src/core/lib/transport/timeout_encoding.c - src/core/lib/transport/transport.c - src/core/lib/transport/transport_op_string.c - src/core/lib/debug/trace.c - src/core/ext/transport/chttp2/server/secure/server_secure_chttp2.c - src/core/ext/transport/chttp2/transport/bin_decoder.c - src/core/ext/transport/chttp2/transport/bin_encoder.c - src/core/ext/transport/chttp2/transport/chttp2_plugin.c - src/core/ext/transport/chttp2/transport/chttp2_transport.c - src/core/ext/transport/chttp2/transport/frame_data.c - src/core/ext/transport/chttp2/transport/frame_goaway.c - src/core/ext/transport/chttp2/transport/frame_ping.c - src/core/ext/transport/chttp2/transport/frame_rst_stream.c - src/core/ext/transport/chttp2/transport/frame_settings.c - src/core/ext/transport/chttp2/transport/frame_window_update.c - src/core/ext/transport/chttp2/transport/hpack_encoder.c - src/core/ext/transport/chttp2/transport/hpack_parser.c - src/core/ext/transport/chttp2/transport/hpack_table.c - src/core/ext/transport/chttp2/transport/http2_settings.c - src/core/ext/transport/chttp2/transport/huffsyms.c - src/core/ext/transport/chttp2/transport/incoming_metadata.c - src/core/ext/transport/chttp2/transport/parsing.c - src/core/ext/transport/chttp2/transport/stream_lists.c - src/core/ext/transport/chttp2/transport/stream_map.c - src/core/ext/transport/chttp2/transport/varint.c - src/core/ext/transport/chttp2/transport/writing.c - src/core/ext/transport/chttp2/alpn/alpn.c - src/core/ext/filters/http/client/http_client_filter.c - src/core/ext/filters/http/http_filters_plugin.c - src/core/ext/filters/http/message_compress/message_compress_filter.c - src/core/ext/filters/http/server/http_server_filter.c - src/core/lib/http/httpcli_security_connector.c - src/core/lib/security/context/security_context.c - src/core/lib/security/credentials/composite/composite_credentials.c - src/core/lib/security/credentials/credentials.c - src/core/lib/security/credentials/credentials_metadata.c - src/core/lib/security/credentials/fake/fake_credentials.c - src/core/lib/security/credentials/google_default/credentials_generic.c - src/core/lib/security/credentials/google_default/google_default_credentials.c - src/core/lib/security/credentials/iam/iam_credentials.c - src/core/lib/security/credentials/jwt/json_token.c - src/core/lib/security/credentials/jwt/jwt_credentials.c - src/core/lib/security/credentials/jwt/jwt_verifier.c - src/core/lib/security/credentials/oauth2/oauth2_credentials.c - src/core/lib/security/credentials/plugin/plugin_credentials.c - src/core/lib/security/credentials/ssl/ssl_credentials.c - src/core/lib/security/transport/client_auth_filter.c - src/core/lib/security/transport/lb_targets_info.c - src/core/lib/security/transport/secure_endpoint.c - src/core/lib/security/transport/security_connector.c - src/core/lib/security/transport/security_handshaker.c - src/core/lib/security/transport/server_auth_filter.c - src/core/lib/security/transport/tsi_error.c - src/core/lib/security/util/json_util.c - src/core/lib/surface/init_secure.c - src/core/tsi/fake_transport_security.c - src/core/tsi/gts_transport_security.c - src/core/tsi/ssl_transport_security.c - src/core/tsi/transport_security.c - src/core/tsi/transport_security_adapter.c - src/core/ext/transport/chttp2/server/chttp2_server.c - src/core/ext/transport/chttp2/client/secure/secure_channel_create.c - src/core/ext/filters/client_channel/channel_connectivity.c - src/core/ext/filters/client_channel/client_channel.c - src/core/ext/filters/client_channel/client_channel_factory.c - src/core/ext/filters/client_channel/client_channel_plugin.c - src/core/ext/filters/client_channel/connector.c - src/core/ext/filters/client_channel/http_connect_handshaker.c - src/core/ext/filters/client_channel/http_proxy.c - src/core/ext/filters/client_channel/lb_policy.c - src/core/ext/filters/client_channel/lb_policy_factory.c - src/core/ext/filters/client_channel/lb_policy_registry.c - src/core/ext/filters/client_channel/parse_address.c - src/core/ext/filters/client_channel/proxy_mapper.c - src/core/ext/filters/client_channel/proxy_mapper_registry.c - src/core/ext/filters/client_channel/resolver.c - src/core/ext/filters/client_channel/resolver_factory.c - src/core/ext/filters/client_channel/resolver_registry.c - src/core/ext/filters/client_channel/retry_throttle.c - src/core/ext/filters/client_channel/subchannel.c - src/core/ext/filters/client_channel/subchannel_index.c - src/core/ext/filters/client_channel/uri_parser.c - src/core/ext/filters/deadline/deadline_filter.c - src/core/ext/transport/chttp2/client/chttp2_connector.c - src/core/ext/transport/chttp2/server/insecure/server_chttp2.c - src/core/ext/transport/chttp2/server/insecure/server_chttp2_posix.c - src/core/ext/transport/chttp2/client/insecure/channel_create.c - src/core/ext/transport/chttp2/client/insecure/channel_create_posix.c - src/core/ext/transport/inproc/inproc_plugin.c - src/core/ext/transport/inproc/inproc_transport.c - src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.c - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.c - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_channel_secure.c - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.c - src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.c - src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.c - third_party/nanopb/pb_common.c - third_party/nanopb/pb_decode.c - third_party/nanopb/pb_encode.c - src/core/ext/filters/client_channel/resolver/fake/fake_resolver.c - src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.c - src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.c - src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.c - src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_posix.c - src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.c - src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_fallback.c - src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.c - src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.c - src/core/ext/filters/load_reporting/load_reporting.c - src/core/ext/filters/load_reporting/load_reporting_filter.c - src/core/ext/census/base_resources.c - src/core/ext/census/context.c - src/core/ext/census/gen/census.pb.c - src/core/ext/census/gen/trace_context.pb.c - src/core/ext/census/grpc_context.c - src/core/ext/census/grpc_filter.c - src/core/ext/census/grpc_plugin.c - src/core/ext/census/initialize.c - src/core/ext/census/intrusive_hash_map.c - src/core/ext/census/mlog.c - src/core/ext/census/operation.c - src/core/ext/census/placeholders.c - src/core/ext/census/resource.c - src/core/ext/census/trace_context.c - src/core/ext/census/tracing.c - src/core/ext/filters/max_age/max_age_filter.c - src/core/ext/filters/message_size/message_size_filter.c - src/core/ext/filters/workarounds/workaround_cronet_compression_filter.c - src/core/ext/filters/workarounds/workaround_utils.c - src/core/plugin_registry/grpc_plugin_registry.c -) - -if(WIN32 AND MSVC) - set_target_properties(grpc PROPERTIES COMPILE_PDB_NAME "grpc" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/grpc.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(grpc - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc - ${_gRPC_BASELIB_LIBRARIES} - ${_gRPC_SSL_LIBRARIES} - ${_gRPC_ZLIB_LIBRARIES} - ${_gRPC_CARES_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr -) - -foreach(_hdr - include/grpc/byte_buffer.h - include/grpc/byte_buffer_reader.h - include/grpc/compression.h - include/grpc/grpc.h - include/grpc/grpc_posix.h - include/grpc/grpc_security_constants.h - include/grpc/load_reporting.h - include/grpc/slice.h - include/grpc/slice_buffer.h - include/grpc/status.h - include/grpc/support/workaround_list.h - include/grpc/impl/codegen/byte_buffer_reader.h - include/grpc/impl/codegen/compression_types.h - include/grpc/impl/codegen/connectivity_state.h - include/grpc/impl/codegen/exec_ctx_fwd.h - include/grpc/impl/codegen/grpc_types.h - include/grpc/impl/codegen/propagation_bits.h - include/grpc/impl/codegen/slice.h - include/grpc/impl/codegen/status.h - include/grpc/impl/codegen/atm.h - include/grpc/impl/codegen/atm_gcc_atomic.h - include/grpc/impl/codegen/atm_gcc_sync.h - include/grpc/impl/codegen/atm_windows.h - include/grpc/impl/codegen/gpr_slice.h - include/grpc/impl/codegen/gpr_types.h - include/grpc/impl/codegen/port_platform.h - include/grpc/impl/codegen/sync.h - include/grpc/impl/codegen/sync_generic.h - include/grpc/impl/codegen/sync_posix.h - include/grpc/impl/codegen/sync_windows.h - include/grpc/grpc_security.h - include/grpc/census.h -) - string(REPLACE "include/" "" _path ${_hdr}) - get_filename_component(_path ${_path} PATH) - install(FILES ${_hdr} - DESTINATION "${gRPC_INSTALL_INCLUDEDIR}/${_path}" - ) -endforeach() - - -if (gRPC_INSTALL) - install(TARGETS grpc EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - - -add_library(grpc_cronet - src/core/lib/surface/init.c - src/core/lib/channel/channel_args.c - src/core/lib/channel/channel_stack.c - src/core/lib/channel/channel_stack_builder.c - src/core/lib/channel/connected_channel.c - src/core/lib/channel/handshaker.c - src/core/lib/channel/handshaker_factory.c - src/core/lib/channel/handshaker_registry.c - src/core/lib/compression/compression.c - src/core/lib/compression/message_compress.c - src/core/lib/compression/stream_compression.c - src/core/lib/http/format_request.c - src/core/lib/http/httpcli.c - src/core/lib/http/parser.c - src/core/lib/iomgr/closure.c - src/core/lib/iomgr/combiner.c - src/core/lib/iomgr/endpoint.c - src/core/lib/iomgr/endpoint_pair_posix.c - src/core/lib/iomgr/endpoint_pair_uv.c - src/core/lib/iomgr/endpoint_pair_windows.c - src/core/lib/iomgr/error.c - src/core/lib/iomgr/ev_epoll1_linux.c - src/core/lib/iomgr/ev_epoll_limited_pollers_linux.c - src/core/lib/iomgr/ev_epoll_thread_pool_linux.c - src/core/lib/iomgr/ev_epollex_linux.c - src/core/lib/iomgr/ev_epollsig_linux.c - src/core/lib/iomgr/ev_poll_posix.c - src/core/lib/iomgr/ev_posix.c - src/core/lib/iomgr/ev_windows.c - src/core/lib/iomgr/exec_ctx.c - src/core/lib/iomgr/executor.c - src/core/lib/iomgr/iocp_windows.c - src/core/lib/iomgr/iomgr.c - src/core/lib/iomgr/iomgr_posix.c - src/core/lib/iomgr/iomgr_uv.c - src/core/lib/iomgr/iomgr_windows.c - src/core/lib/iomgr/is_epollexclusive_available.c - src/core/lib/iomgr/load_file.c - src/core/lib/iomgr/lockfree_event.c - src/core/lib/iomgr/network_status_tracker.c - src/core/lib/iomgr/polling_entity.c - src/core/lib/iomgr/pollset_set_uv.c - src/core/lib/iomgr/pollset_set_windows.c - src/core/lib/iomgr/pollset_uv.c - src/core/lib/iomgr/pollset_windows.c - src/core/lib/iomgr/resolve_address_posix.c - src/core/lib/iomgr/resolve_address_uv.c - src/core/lib/iomgr/resolve_address_windows.c - src/core/lib/iomgr/resource_quota.c - src/core/lib/iomgr/sockaddr_utils.c - src/core/lib/iomgr/socket_factory_posix.c - src/core/lib/iomgr/socket_mutator.c - src/core/lib/iomgr/socket_utils_common_posix.c - src/core/lib/iomgr/socket_utils_linux.c - src/core/lib/iomgr/socket_utils_posix.c - src/core/lib/iomgr/socket_utils_uv.c - src/core/lib/iomgr/socket_utils_windows.c - src/core/lib/iomgr/socket_windows.c - src/core/lib/iomgr/tcp_client_posix.c - src/core/lib/iomgr/tcp_client_uv.c - src/core/lib/iomgr/tcp_client_windows.c - src/core/lib/iomgr/tcp_posix.c - src/core/lib/iomgr/tcp_server_posix.c - src/core/lib/iomgr/tcp_server_utils_posix_common.c - src/core/lib/iomgr/tcp_server_utils_posix_ifaddrs.c - src/core/lib/iomgr/tcp_server_utils_posix_noifaddrs.c - src/core/lib/iomgr/tcp_server_uv.c - src/core/lib/iomgr/tcp_server_windows.c - src/core/lib/iomgr/tcp_uv.c - src/core/lib/iomgr/tcp_windows.c - src/core/lib/iomgr/time_averaged_stats.c - src/core/lib/iomgr/timer_generic.c - src/core/lib/iomgr/timer_heap.c - src/core/lib/iomgr/timer_manager.c - src/core/lib/iomgr/timer_uv.c - src/core/lib/iomgr/udp_server.c - src/core/lib/iomgr/unix_sockets_posix.c - src/core/lib/iomgr/unix_sockets_posix_noop.c - src/core/lib/iomgr/wakeup_fd_cv.c - src/core/lib/iomgr/wakeup_fd_eventfd.c - src/core/lib/iomgr/wakeup_fd_nospecial.c - src/core/lib/iomgr/wakeup_fd_pipe.c - src/core/lib/iomgr/wakeup_fd_posix.c - src/core/lib/json/json.c - src/core/lib/json/json_reader.c - src/core/lib/json/json_string.c - src/core/lib/json/json_writer.c - src/core/lib/slice/b64.c - src/core/lib/slice/percent_encoding.c - src/core/lib/slice/slice.c - src/core/lib/slice/slice_buffer.c - src/core/lib/slice/slice_hash_table.c - src/core/lib/slice/slice_intern.c - src/core/lib/slice/slice_string_helpers.c - src/core/lib/surface/alarm.c - src/core/lib/surface/api_trace.c - src/core/lib/surface/byte_buffer.c - src/core/lib/surface/byte_buffer_reader.c - src/core/lib/surface/call.c - src/core/lib/surface/call_details.c - src/core/lib/surface/call_log_batch.c - src/core/lib/surface/channel.c - src/core/lib/surface/channel_init.c - src/core/lib/surface/channel_ping.c - src/core/lib/surface/channel_stack_type.c - src/core/lib/surface/completion_queue.c - src/core/lib/surface/completion_queue_factory.c - src/core/lib/surface/event_string.c - src/core/lib/surface/lame_client.cc - src/core/lib/surface/metadata_array.c - src/core/lib/surface/server.c - src/core/lib/surface/validate_metadata.c - src/core/lib/surface/version.c - src/core/lib/transport/bdp_estimator.c - src/core/lib/transport/byte_stream.c - src/core/lib/transport/connectivity_state.c - src/core/lib/transport/error_utils.c - src/core/lib/transport/metadata.c - src/core/lib/transport/metadata_batch.c - src/core/lib/transport/pid_controller.c - src/core/lib/transport/service_config.c - src/core/lib/transport/static_metadata.c - src/core/lib/transport/status_conversion.c - src/core/lib/transport/timeout_encoding.c - src/core/lib/transport/transport.c - src/core/lib/transport/transport_op_string.c - src/core/lib/debug/trace.c - src/core/ext/transport/cronet/client/secure/cronet_channel_create.c - src/core/ext/transport/cronet/transport/cronet_api_dummy.c - src/core/ext/transport/cronet/transport/cronet_transport.c - src/core/ext/transport/chttp2/client/secure/secure_channel_create.c - src/core/ext/transport/chttp2/transport/bin_decoder.c - src/core/ext/transport/chttp2/transport/bin_encoder.c - src/core/ext/transport/chttp2/transport/chttp2_plugin.c - src/core/ext/transport/chttp2/transport/chttp2_transport.c - src/core/ext/transport/chttp2/transport/frame_data.c - src/core/ext/transport/chttp2/transport/frame_goaway.c - src/core/ext/transport/chttp2/transport/frame_ping.c - src/core/ext/transport/chttp2/transport/frame_rst_stream.c - src/core/ext/transport/chttp2/transport/frame_settings.c - src/core/ext/transport/chttp2/transport/frame_window_update.c - src/core/ext/transport/chttp2/transport/hpack_encoder.c - src/core/ext/transport/chttp2/transport/hpack_parser.c - src/core/ext/transport/chttp2/transport/hpack_table.c - src/core/ext/transport/chttp2/transport/http2_settings.c - src/core/ext/transport/chttp2/transport/huffsyms.c - src/core/ext/transport/chttp2/transport/incoming_metadata.c - src/core/ext/transport/chttp2/transport/parsing.c - src/core/ext/transport/chttp2/transport/stream_lists.c - src/core/ext/transport/chttp2/transport/stream_map.c - src/core/ext/transport/chttp2/transport/varint.c - src/core/ext/transport/chttp2/transport/writing.c - src/core/ext/transport/chttp2/alpn/alpn.c - src/core/ext/filters/http/client/http_client_filter.c - src/core/ext/filters/http/http_filters_plugin.c - src/core/ext/filters/http/message_compress/message_compress_filter.c - src/core/ext/filters/http/server/http_server_filter.c - src/core/ext/filters/client_channel/channel_connectivity.c - src/core/ext/filters/client_channel/client_channel.c - src/core/ext/filters/client_channel/client_channel_factory.c - src/core/ext/filters/client_channel/client_channel_plugin.c - src/core/ext/filters/client_channel/connector.c - src/core/ext/filters/client_channel/http_connect_handshaker.c - src/core/ext/filters/client_channel/http_proxy.c - src/core/ext/filters/client_channel/lb_policy.c - src/core/ext/filters/client_channel/lb_policy_factory.c - src/core/ext/filters/client_channel/lb_policy_registry.c - src/core/ext/filters/client_channel/parse_address.c - src/core/ext/filters/client_channel/proxy_mapper.c - src/core/ext/filters/client_channel/proxy_mapper_registry.c - src/core/ext/filters/client_channel/resolver.c - src/core/ext/filters/client_channel/resolver_factory.c - src/core/ext/filters/client_channel/resolver_registry.c - src/core/ext/filters/client_channel/retry_throttle.c - src/core/ext/filters/client_channel/subchannel.c - src/core/ext/filters/client_channel/subchannel_index.c - src/core/ext/filters/client_channel/uri_parser.c - src/core/ext/filters/deadline/deadline_filter.c - src/core/lib/http/httpcli_security_connector.c - src/core/lib/security/context/security_context.c - src/core/lib/security/credentials/composite/composite_credentials.c - src/core/lib/security/credentials/credentials.c - src/core/lib/security/credentials/credentials_metadata.c - src/core/lib/security/credentials/fake/fake_credentials.c - src/core/lib/security/credentials/google_default/credentials_generic.c - src/core/lib/security/credentials/google_default/google_default_credentials.c - src/core/lib/security/credentials/iam/iam_credentials.c - src/core/lib/security/credentials/jwt/json_token.c - src/core/lib/security/credentials/jwt/jwt_credentials.c - src/core/lib/security/credentials/jwt/jwt_verifier.c - src/core/lib/security/credentials/oauth2/oauth2_credentials.c - src/core/lib/security/credentials/plugin/plugin_credentials.c - src/core/lib/security/credentials/ssl/ssl_credentials.c - src/core/lib/security/transport/client_auth_filter.c - src/core/lib/security/transport/lb_targets_info.c - src/core/lib/security/transport/secure_endpoint.c - src/core/lib/security/transport/security_connector.c - src/core/lib/security/transport/security_handshaker.c - src/core/lib/security/transport/server_auth_filter.c - src/core/lib/security/transport/tsi_error.c - src/core/lib/security/util/json_util.c - src/core/lib/surface/init_secure.c - src/core/tsi/fake_transport_security.c - src/core/tsi/gts_transport_security.c - src/core/tsi/ssl_transport_security.c - src/core/tsi/transport_security.c - src/core/tsi/transport_security_adapter.c - src/core/ext/transport/chttp2/client/chttp2_connector.c - src/core/ext/filters/load_reporting/load_reporting.c - src/core/ext/filters/load_reporting/load_reporting_filter.c - src/core/plugin_registry/grpc_cronet_plugin_registry.c -) - -if(WIN32 AND MSVC) - set_target_properties(grpc_cronet PROPERTIES COMPILE_PDB_NAME "grpc_cronet" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/grpc_cronet.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(grpc_cronet - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_cronet - ${_gRPC_BASELIB_LIBRARIES} - ${_gRPC_SSL_LIBRARIES} - ${_gRPC_ZLIB_LIBRARIES} - ${_gRPC_CARES_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr -) - -foreach(_hdr - include/grpc/byte_buffer.h - include/grpc/byte_buffer_reader.h - include/grpc/compression.h - include/grpc/grpc.h - include/grpc/grpc_posix.h - include/grpc/grpc_security_constants.h - include/grpc/load_reporting.h - include/grpc/slice.h - include/grpc/slice_buffer.h - include/grpc/status.h - include/grpc/support/workaround_list.h - include/grpc/impl/codegen/byte_buffer_reader.h - include/grpc/impl/codegen/compression_types.h - include/grpc/impl/codegen/connectivity_state.h - include/grpc/impl/codegen/exec_ctx_fwd.h - include/grpc/impl/codegen/grpc_types.h - include/grpc/impl/codegen/propagation_bits.h - include/grpc/impl/codegen/slice.h - include/grpc/impl/codegen/status.h - include/grpc/impl/codegen/atm.h - include/grpc/impl/codegen/atm_gcc_atomic.h - include/grpc/impl/codegen/atm_gcc_sync.h - include/grpc/impl/codegen/atm_windows.h - include/grpc/impl/codegen/gpr_slice.h - include/grpc/impl/codegen/gpr_types.h - include/grpc/impl/codegen/port_platform.h - include/grpc/impl/codegen/sync.h - include/grpc/impl/codegen/sync_generic.h - include/grpc/impl/codegen/sync_posix.h - include/grpc/impl/codegen/sync_windows.h - include/grpc/grpc_cronet.h - include/grpc/grpc_security.h -) - string(REPLACE "include/" "" _path ${_hdr}) - get_filename_component(_path ${_path} PATH) - install(FILES ${_hdr} - DESTINATION "${gRPC_INSTALL_INCLUDEDIR}/${_path}" - ) -endforeach() - - -if (gRPC_INSTALL) - install(TARGETS grpc_cronet EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - -if (gRPC_BUILD_TESTS) - -add_library(grpc_test_util - test/core/end2end/data/client_certs.c - test/core/end2end/data/server1_cert.c - test/core/end2end/data/server1_key.c - test/core/end2end/data/test_root_cert.c - test/core/security/oauth2_utils.c - src/core/ext/filters/client_channel/resolver/fake/fake_resolver.c - test/core/end2end/cq_verifier.c - test/core/end2end/fixtures/http_proxy_fixture.c - test/core/end2end/fixtures/proxy.c - test/core/iomgr/endpoint_tests.c - test/core/util/debugger_macros.c - test/core/util/grpc_profiler.c - test/core/util/memory_counters.c - test/core/util/mock_endpoint.c - test/core/util/parse_hexstring.c - test/core/util/passthru_endpoint.c - test/core/util/port.c - test/core/util/port_server_client.c - test/core/util/slice_splitter.c - test/core/util/trickle_endpoint.c - src/core/lib/channel/channel_args.c - src/core/lib/channel/channel_stack.c - src/core/lib/channel/channel_stack_builder.c - src/core/lib/channel/connected_channel.c - src/core/lib/channel/handshaker.c - src/core/lib/channel/handshaker_factory.c - src/core/lib/channel/handshaker_registry.c - src/core/lib/compression/compression.c - src/core/lib/compression/message_compress.c - src/core/lib/compression/stream_compression.c - src/core/lib/http/format_request.c - src/core/lib/http/httpcli.c - src/core/lib/http/parser.c - src/core/lib/iomgr/closure.c - src/core/lib/iomgr/combiner.c - src/core/lib/iomgr/endpoint.c - src/core/lib/iomgr/endpoint_pair_posix.c - src/core/lib/iomgr/endpoint_pair_uv.c - src/core/lib/iomgr/endpoint_pair_windows.c - src/core/lib/iomgr/error.c - src/core/lib/iomgr/ev_epoll1_linux.c - src/core/lib/iomgr/ev_epoll_limited_pollers_linux.c - src/core/lib/iomgr/ev_epoll_thread_pool_linux.c - src/core/lib/iomgr/ev_epollex_linux.c - src/core/lib/iomgr/ev_epollsig_linux.c - src/core/lib/iomgr/ev_poll_posix.c - src/core/lib/iomgr/ev_posix.c - src/core/lib/iomgr/ev_windows.c - src/core/lib/iomgr/exec_ctx.c - src/core/lib/iomgr/executor.c - src/core/lib/iomgr/iocp_windows.c - src/core/lib/iomgr/iomgr.c - src/core/lib/iomgr/iomgr_posix.c - src/core/lib/iomgr/iomgr_uv.c - src/core/lib/iomgr/iomgr_windows.c - src/core/lib/iomgr/is_epollexclusive_available.c - src/core/lib/iomgr/load_file.c - src/core/lib/iomgr/lockfree_event.c - src/core/lib/iomgr/network_status_tracker.c - src/core/lib/iomgr/polling_entity.c - src/core/lib/iomgr/pollset_set_uv.c - src/core/lib/iomgr/pollset_set_windows.c - src/core/lib/iomgr/pollset_uv.c - src/core/lib/iomgr/pollset_windows.c - src/core/lib/iomgr/resolve_address_posix.c - src/core/lib/iomgr/resolve_address_uv.c - src/core/lib/iomgr/resolve_address_windows.c - src/core/lib/iomgr/resource_quota.c - src/core/lib/iomgr/sockaddr_utils.c - src/core/lib/iomgr/socket_factory_posix.c - src/core/lib/iomgr/socket_mutator.c - src/core/lib/iomgr/socket_utils_common_posix.c - src/core/lib/iomgr/socket_utils_linux.c - src/core/lib/iomgr/socket_utils_posix.c - src/core/lib/iomgr/socket_utils_uv.c - src/core/lib/iomgr/socket_utils_windows.c - src/core/lib/iomgr/socket_windows.c - src/core/lib/iomgr/tcp_client_posix.c - src/core/lib/iomgr/tcp_client_uv.c - src/core/lib/iomgr/tcp_client_windows.c - src/core/lib/iomgr/tcp_posix.c - src/core/lib/iomgr/tcp_server_posix.c - src/core/lib/iomgr/tcp_server_utils_posix_common.c - src/core/lib/iomgr/tcp_server_utils_posix_ifaddrs.c - src/core/lib/iomgr/tcp_server_utils_posix_noifaddrs.c - src/core/lib/iomgr/tcp_server_uv.c - src/core/lib/iomgr/tcp_server_windows.c - src/core/lib/iomgr/tcp_uv.c - src/core/lib/iomgr/tcp_windows.c - src/core/lib/iomgr/time_averaged_stats.c - src/core/lib/iomgr/timer_generic.c - src/core/lib/iomgr/timer_heap.c - src/core/lib/iomgr/timer_manager.c - src/core/lib/iomgr/timer_uv.c - src/core/lib/iomgr/udp_server.c - src/core/lib/iomgr/unix_sockets_posix.c - src/core/lib/iomgr/unix_sockets_posix_noop.c - src/core/lib/iomgr/wakeup_fd_cv.c - src/core/lib/iomgr/wakeup_fd_eventfd.c - src/core/lib/iomgr/wakeup_fd_nospecial.c - src/core/lib/iomgr/wakeup_fd_pipe.c - src/core/lib/iomgr/wakeup_fd_posix.c - src/core/lib/json/json.c - src/core/lib/json/json_reader.c - src/core/lib/json/json_string.c - src/core/lib/json/json_writer.c - src/core/lib/slice/b64.c - src/core/lib/slice/percent_encoding.c - src/core/lib/slice/slice.c - src/core/lib/slice/slice_buffer.c - src/core/lib/slice/slice_hash_table.c - src/core/lib/slice/slice_intern.c - src/core/lib/slice/slice_string_helpers.c - src/core/lib/surface/alarm.c - src/core/lib/surface/api_trace.c - src/core/lib/surface/byte_buffer.c - src/core/lib/surface/byte_buffer_reader.c - src/core/lib/surface/call.c - src/core/lib/surface/call_details.c - src/core/lib/surface/call_log_batch.c - src/core/lib/surface/channel.c - src/core/lib/surface/channel_init.c - src/core/lib/surface/channel_ping.c - src/core/lib/surface/channel_stack_type.c - src/core/lib/surface/completion_queue.c - src/core/lib/surface/completion_queue_factory.c - src/core/lib/surface/event_string.c - src/core/lib/surface/lame_client.cc - src/core/lib/surface/metadata_array.c - src/core/lib/surface/server.c - src/core/lib/surface/validate_metadata.c - src/core/lib/surface/version.c - src/core/lib/transport/bdp_estimator.c - src/core/lib/transport/byte_stream.c - src/core/lib/transport/connectivity_state.c - src/core/lib/transport/error_utils.c - src/core/lib/transport/metadata.c - src/core/lib/transport/metadata_batch.c - src/core/lib/transport/pid_controller.c - src/core/lib/transport/service_config.c - src/core/lib/transport/static_metadata.c - src/core/lib/transport/status_conversion.c - src/core/lib/transport/timeout_encoding.c - src/core/lib/transport/transport.c - src/core/lib/transport/transport_op_string.c - src/core/lib/debug/trace.c -) - -if(WIN32 AND MSVC) - set_target_properties(grpc_test_util PROPERTIES COMPILE_PDB_NAME "grpc_test_util" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/grpc_test_util.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(grpc_test_util - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_test_util - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr - grpc -) - -foreach(_hdr - include/grpc/byte_buffer.h - include/grpc/byte_buffer_reader.h - include/grpc/compression.h - include/grpc/grpc.h - include/grpc/grpc_posix.h - include/grpc/grpc_security_constants.h - include/grpc/load_reporting.h - include/grpc/slice.h - include/grpc/slice_buffer.h - include/grpc/status.h - include/grpc/support/workaround_list.h - include/grpc/impl/codegen/byte_buffer_reader.h - include/grpc/impl/codegen/compression_types.h - include/grpc/impl/codegen/connectivity_state.h - include/grpc/impl/codegen/exec_ctx_fwd.h - include/grpc/impl/codegen/grpc_types.h - include/grpc/impl/codegen/propagation_bits.h - include/grpc/impl/codegen/slice.h - include/grpc/impl/codegen/status.h - include/grpc/impl/codegen/atm.h - include/grpc/impl/codegen/atm_gcc_atomic.h - include/grpc/impl/codegen/atm_gcc_sync.h - include/grpc/impl/codegen/atm_windows.h - include/grpc/impl/codegen/gpr_slice.h - include/grpc/impl/codegen/gpr_types.h - include/grpc/impl/codegen/port_platform.h - include/grpc/impl/codegen/sync.h - include/grpc/impl/codegen/sync_generic.h - include/grpc/impl/codegen/sync_posix.h - include/grpc/impl/codegen/sync_windows.h -) - string(REPLACE "include/" "" _path ${_hdr}) - get_filename_component(_path ${_path} PATH) - install(FILES ${_hdr} - DESTINATION "${gRPC_INSTALL_INCLUDEDIR}/${_path}" - ) -endforeach() - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_library(grpc_test_util_unsecure - src/core/ext/filters/client_channel/resolver/fake/fake_resolver.c - test/core/end2end/cq_verifier.c - test/core/end2end/fixtures/http_proxy_fixture.c - test/core/end2end/fixtures/proxy.c - test/core/iomgr/endpoint_tests.c - test/core/util/debugger_macros.c - test/core/util/grpc_profiler.c - test/core/util/memory_counters.c - test/core/util/mock_endpoint.c - test/core/util/parse_hexstring.c - test/core/util/passthru_endpoint.c - test/core/util/port.c - test/core/util/port_server_client.c - test/core/util/slice_splitter.c - test/core/util/trickle_endpoint.c -) - -if(WIN32 AND MSVC) - set_target_properties(grpc_test_util_unsecure PROPERTIES COMPILE_PDB_NAME "grpc_test_util_unsecure" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/grpc_test_util_unsecure.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(grpc_test_util_unsecure - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_test_util_unsecure - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr - gpr_test_util - grpc_unsecure - grpc -) - - -endif (gRPC_BUILD_TESTS) - -add_library(grpc_unsecure - src/core/lib/surface/init.c - src/core/lib/surface/init_unsecure.c - src/core/lib/channel/channel_args.c - src/core/lib/channel/channel_stack.c - src/core/lib/channel/channel_stack_builder.c - src/core/lib/channel/connected_channel.c - src/core/lib/channel/handshaker.c - src/core/lib/channel/handshaker_factory.c - src/core/lib/channel/handshaker_registry.c - src/core/lib/compression/compression.c - src/core/lib/compression/message_compress.c - src/core/lib/compression/stream_compression.c - src/core/lib/http/format_request.c - src/core/lib/http/httpcli.c - src/core/lib/http/parser.c - src/core/lib/iomgr/closure.c - src/core/lib/iomgr/combiner.c - src/core/lib/iomgr/endpoint.c - src/core/lib/iomgr/endpoint_pair_posix.c - src/core/lib/iomgr/endpoint_pair_uv.c - src/core/lib/iomgr/endpoint_pair_windows.c - src/core/lib/iomgr/error.c - src/core/lib/iomgr/ev_epoll1_linux.c - src/core/lib/iomgr/ev_epoll_limited_pollers_linux.c - src/core/lib/iomgr/ev_epoll_thread_pool_linux.c - src/core/lib/iomgr/ev_epollex_linux.c - src/core/lib/iomgr/ev_epollsig_linux.c - src/core/lib/iomgr/ev_poll_posix.c - src/core/lib/iomgr/ev_posix.c - src/core/lib/iomgr/ev_windows.c - src/core/lib/iomgr/exec_ctx.c - src/core/lib/iomgr/executor.c - src/core/lib/iomgr/iocp_windows.c - src/core/lib/iomgr/iomgr.c - src/core/lib/iomgr/iomgr_posix.c - src/core/lib/iomgr/iomgr_uv.c - src/core/lib/iomgr/iomgr_windows.c - src/core/lib/iomgr/is_epollexclusive_available.c - src/core/lib/iomgr/load_file.c - src/core/lib/iomgr/lockfree_event.c - src/core/lib/iomgr/network_status_tracker.c - src/core/lib/iomgr/polling_entity.c - src/core/lib/iomgr/pollset_set_uv.c - src/core/lib/iomgr/pollset_set_windows.c - src/core/lib/iomgr/pollset_uv.c - src/core/lib/iomgr/pollset_windows.c - src/core/lib/iomgr/resolve_address_posix.c - src/core/lib/iomgr/resolve_address_uv.c - src/core/lib/iomgr/resolve_address_windows.c - src/core/lib/iomgr/resource_quota.c - src/core/lib/iomgr/sockaddr_utils.c - src/core/lib/iomgr/socket_factory_posix.c - src/core/lib/iomgr/socket_mutator.c - src/core/lib/iomgr/socket_utils_common_posix.c - src/core/lib/iomgr/socket_utils_linux.c - src/core/lib/iomgr/socket_utils_posix.c - src/core/lib/iomgr/socket_utils_uv.c - src/core/lib/iomgr/socket_utils_windows.c - src/core/lib/iomgr/socket_windows.c - src/core/lib/iomgr/tcp_client_posix.c - src/core/lib/iomgr/tcp_client_uv.c - src/core/lib/iomgr/tcp_client_windows.c - src/core/lib/iomgr/tcp_posix.c - src/core/lib/iomgr/tcp_server_posix.c - src/core/lib/iomgr/tcp_server_utils_posix_common.c - src/core/lib/iomgr/tcp_server_utils_posix_ifaddrs.c - src/core/lib/iomgr/tcp_server_utils_posix_noifaddrs.c - src/core/lib/iomgr/tcp_server_uv.c - src/core/lib/iomgr/tcp_server_windows.c - src/core/lib/iomgr/tcp_uv.c - src/core/lib/iomgr/tcp_windows.c - src/core/lib/iomgr/time_averaged_stats.c - src/core/lib/iomgr/timer_generic.c - src/core/lib/iomgr/timer_heap.c - src/core/lib/iomgr/timer_manager.c - src/core/lib/iomgr/timer_uv.c - src/core/lib/iomgr/udp_server.c - src/core/lib/iomgr/unix_sockets_posix.c - src/core/lib/iomgr/unix_sockets_posix_noop.c - src/core/lib/iomgr/wakeup_fd_cv.c - src/core/lib/iomgr/wakeup_fd_eventfd.c - src/core/lib/iomgr/wakeup_fd_nospecial.c - src/core/lib/iomgr/wakeup_fd_pipe.c - src/core/lib/iomgr/wakeup_fd_posix.c - src/core/lib/json/json.c - src/core/lib/json/json_reader.c - src/core/lib/json/json_string.c - src/core/lib/json/json_writer.c - src/core/lib/slice/b64.c - src/core/lib/slice/percent_encoding.c - src/core/lib/slice/slice.c - src/core/lib/slice/slice_buffer.c - src/core/lib/slice/slice_hash_table.c - src/core/lib/slice/slice_intern.c - src/core/lib/slice/slice_string_helpers.c - src/core/lib/surface/alarm.c - src/core/lib/surface/api_trace.c - src/core/lib/surface/byte_buffer.c - src/core/lib/surface/byte_buffer_reader.c - src/core/lib/surface/call.c - src/core/lib/surface/call_details.c - src/core/lib/surface/call_log_batch.c - src/core/lib/surface/channel.c - src/core/lib/surface/channel_init.c - src/core/lib/surface/channel_ping.c - src/core/lib/surface/channel_stack_type.c - src/core/lib/surface/completion_queue.c - src/core/lib/surface/completion_queue_factory.c - src/core/lib/surface/event_string.c - src/core/lib/surface/lame_client.cc - src/core/lib/surface/metadata_array.c - src/core/lib/surface/server.c - src/core/lib/surface/validate_metadata.c - src/core/lib/surface/version.c - src/core/lib/transport/bdp_estimator.c - src/core/lib/transport/byte_stream.c - src/core/lib/transport/connectivity_state.c - src/core/lib/transport/error_utils.c - src/core/lib/transport/metadata.c - src/core/lib/transport/metadata_batch.c - src/core/lib/transport/pid_controller.c - src/core/lib/transport/service_config.c - src/core/lib/transport/static_metadata.c - src/core/lib/transport/status_conversion.c - src/core/lib/transport/timeout_encoding.c - src/core/lib/transport/transport.c - src/core/lib/transport/transport_op_string.c - src/core/lib/debug/trace.c - src/core/ext/transport/chttp2/server/insecure/server_chttp2.c - src/core/ext/transport/chttp2/server/insecure/server_chttp2_posix.c - src/core/ext/transport/chttp2/transport/bin_decoder.c - src/core/ext/transport/chttp2/transport/bin_encoder.c - src/core/ext/transport/chttp2/transport/chttp2_plugin.c - src/core/ext/transport/chttp2/transport/chttp2_transport.c - src/core/ext/transport/chttp2/transport/frame_data.c - src/core/ext/transport/chttp2/transport/frame_goaway.c - src/core/ext/transport/chttp2/transport/frame_ping.c - src/core/ext/transport/chttp2/transport/frame_rst_stream.c - src/core/ext/transport/chttp2/transport/frame_settings.c - src/core/ext/transport/chttp2/transport/frame_window_update.c - src/core/ext/transport/chttp2/transport/hpack_encoder.c - src/core/ext/transport/chttp2/transport/hpack_parser.c - src/core/ext/transport/chttp2/transport/hpack_table.c - src/core/ext/transport/chttp2/transport/http2_settings.c - src/core/ext/transport/chttp2/transport/huffsyms.c - src/core/ext/transport/chttp2/transport/incoming_metadata.c - src/core/ext/transport/chttp2/transport/parsing.c - src/core/ext/transport/chttp2/transport/stream_lists.c - src/core/ext/transport/chttp2/transport/stream_map.c - src/core/ext/transport/chttp2/transport/varint.c - src/core/ext/transport/chttp2/transport/writing.c - src/core/ext/transport/chttp2/alpn/alpn.c - src/core/ext/filters/http/client/http_client_filter.c - src/core/ext/filters/http/http_filters_plugin.c - src/core/ext/filters/http/message_compress/message_compress_filter.c - src/core/ext/filters/http/server/http_server_filter.c - src/core/ext/transport/chttp2/server/chttp2_server.c - src/core/ext/transport/chttp2/client/insecure/channel_create.c - src/core/ext/transport/chttp2/client/insecure/channel_create_posix.c - src/core/ext/transport/chttp2/client/chttp2_connector.c - src/core/ext/filters/client_channel/channel_connectivity.c - src/core/ext/filters/client_channel/client_channel.c - src/core/ext/filters/client_channel/client_channel_factory.c - src/core/ext/filters/client_channel/client_channel_plugin.c - src/core/ext/filters/client_channel/connector.c - src/core/ext/filters/client_channel/http_connect_handshaker.c - src/core/ext/filters/client_channel/http_proxy.c - src/core/ext/filters/client_channel/lb_policy.c - src/core/ext/filters/client_channel/lb_policy_factory.c - src/core/ext/filters/client_channel/lb_policy_registry.c - src/core/ext/filters/client_channel/parse_address.c - src/core/ext/filters/client_channel/proxy_mapper.c - src/core/ext/filters/client_channel/proxy_mapper_registry.c - src/core/ext/filters/client_channel/resolver.c - src/core/ext/filters/client_channel/resolver_factory.c - src/core/ext/filters/client_channel/resolver_registry.c - src/core/ext/filters/client_channel/retry_throttle.c - src/core/ext/filters/client_channel/subchannel.c - src/core/ext/filters/client_channel/subchannel_index.c - src/core/ext/filters/client_channel/uri_parser.c - src/core/ext/filters/deadline/deadline_filter.c - src/core/ext/transport/inproc/inproc_plugin.c - src/core/ext/transport/inproc/inproc_transport.c - src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.c - src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_posix.c - src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.c - src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_fallback.c - src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.c - src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.c - src/core/ext/filters/client_channel/resolver/fake/fake_resolver.c - src/core/ext/filters/load_reporting/load_reporting.c - src/core/ext/filters/load_reporting/load_reporting_filter.c - src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.c - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.c - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_channel.c - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.c - src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.c - src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.c - third_party/nanopb/pb_common.c - third_party/nanopb/pb_decode.c - third_party/nanopb/pb_encode.c - src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.c - src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.c - src/core/ext/census/base_resources.c - src/core/ext/census/context.c - src/core/ext/census/gen/census.pb.c - src/core/ext/census/gen/trace_context.pb.c - src/core/ext/census/grpc_context.c - src/core/ext/census/grpc_filter.c - src/core/ext/census/grpc_plugin.c - src/core/ext/census/initialize.c - src/core/ext/census/intrusive_hash_map.c - src/core/ext/census/mlog.c - src/core/ext/census/operation.c - src/core/ext/census/placeholders.c - src/core/ext/census/resource.c - src/core/ext/census/trace_context.c - src/core/ext/census/tracing.c - src/core/ext/filters/max_age/max_age_filter.c - src/core/ext/filters/message_size/message_size_filter.c - src/core/ext/filters/workarounds/workaround_cronet_compression_filter.c - src/core/ext/filters/workarounds/workaround_utils.c - src/core/plugin_registry/grpc_unsecure_plugin_registry.c -) - -if(WIN32 AND MSVC) - set_target_properties(grpc_unsecure PROPERTIES COMPILE_PDB_NAME "grpc_unsecure" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/grpc_unsecure.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(grpc_unsecure - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_unsecure - ${_gRPC_BASELIB_LIBRARIES} - ${_gRPC_ZLIB_LIBRARIES} - ${_gRPC_CARES_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr -) - -foreach(_hdr - include/grpc/byte_buffer.h - include/grpc/byte_buffer_reader.h - include/grpc/compression.h - include/grpc/grpc.h - include/grpc/grpc_posix.h - include/grpc/grpc_security_constants.h - include/grpc/load_reporting.h - include/grpc/slice.h - include/grpc/slice_buffer.h - include/grpc/status.h - include/grpc/support/workaround_list.h - include/grpc/impl/codegen/byte_buffer_reader.h - include/grpc/impl/codegen/compression_types.h - include/grpc/impl/codegen/connectivity_state.h - include/grpc/impl/codegen/exec_ctx_fwd.h - include/grpc/impl/codegen/grpc_types.h - include/grpc/impl/codegen/propagation_bits.h - include/grpc/impl/codegen/slice.h - include/grpc/impl/codegen/status.h - include/grpc/impl/codegen/atm.h - include/grpc/impl/codegen/atm_gcc_atomic.h - include/grpc/impl/codegen/atm_gcc_sync.h - include/grpc/impl/codegen/atm_windows.h - include/grpc/impl/codegen/gpr_slice.h - include/grpc/impl/codegen/gpr_types.h - include/grpc/impl/codegen/port_platform.h - include/grpc/impl/codegen/sync.h - include/grpc/impl/codegen/sync_generic.h - include/grpc/impl/codegen/sync_posix.h - include/grpc/impl/codegen/sync_windows.h - include/grpc/census.h -) - string(REPLACE "include/" "" _path ${_hdr}) - get_filename_component(_path ${_path} PATH) - install(FILES ${_hdr} - DESTINATION "${gRPC_INSTALL_INCLUDEDIR}/${_path}" - ) -endforeach() - - -if (gRPC_INSTALL) - install(TARGETS grpc_unsecure EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - -if (gRPC_BUILD_TESTS) - -add_library(reconnect_server - test/core/util/reconnect_server.c -) - -if(WIN32 AND MSVC) - set_target_properties(reconnect_server PROPERTIES COMPILE_PDB_NAME "reconnect_server" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/reconnect_server.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(reconnect_server - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(reconnect_server - ${_gRPC_ALLTARGETS_LIBRARIES} - test_tcp_server - grpc_test_util - grpc - gpr_test_util - gpr -) - - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_library(test_tcp_server - test/core/util/test_tcp_server.c -) - -if(WIN32 AND MSVC) - set_target_properties(test_tcp_server PROPERTIES COMPILE_PDB_NAME "test_tcp_server" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/test_tcp_server.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(test_tcp_server - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(test_tcp_server - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - - -endif (gRPC_BUILD_TESTS) - -add_library(grpc++ - src/cpp/client/insecure_credentials.cc - src/cpp/client/secure_credentials.cc - src/cpp/common/auth_property_iterator.cc - src/cpp/common/secure_auth_context.cc - src/cpp/common/secure_channel_arguments.cc - src/cpp/common/secure_create_auth_context.cc - src/cpp/server/insecure_server_credentials.cc - src/cpp/server/secure_server_credentials.cc - src/cpp/client/channel_cc.cc - src/cpp/client/client_context.cc - src/cpp/client/create_channel.cc - src/cpp/client/create_channel_internal.cc - src/cpp/client/create_channel_posix.cc - src/cpp/client/credentials_cc.cc - src/cpp/client/generic_stub.cc - src/cpp/common/channel_arguments.cc - src/cpp/common/channel_filter.cc - src/cpp/common/completion_queue_cc.cc - src/cpp/common/core_codegen.cc - src/cpp/common/resource_quota_cc.cc - src/cpp/common/rpc_method.cc - src/cpp/common/version_cc.cc - src/cpp/server/async_generic_service.cc - src/cpp/server/channel_argument_option.cc - src/cpp/server/create_default_thread_pool.cc - src/cpp/server/dynamic_thread_pool.cc - src/cpp/server/health/default_health_check_service.cc - src/cpp/server/health/health.pb.c - src/cpp/server/health/health_check_service.cc - src/cpp/server/health/health_check_service_server_builder_option.cc - src/cpp/server/server_builder.cc - src/cpp/server/server_cc.cc - src/cpp/server/server_context.cc - src/cpp/server/server_credentials.cc - src/cpp/server/server_posix.cc - src/cpp/thread_manager/thread_manager.cc - src/cpp/util/byte_buffer_cc.cc - src/cpp/util/slice_cc.cc - src/cpp/util/status.cc - src/cpp/util/string_ref.cc - src/cpp/util/time_cc.cc - third_party/nanopb/pb_common.c - third_party/nanopb/pb_decode.c - third_party/nanopb/pb_encode.c - src/cpp/codegen/codegen_init.cc -) - -if(WIN32 AND MSVC) - set_target_properties(grpc++ PROPERTIES COMPILE_PDB_NAME "grpc++" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/grpc++.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(grpc++ - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpc++ - ${_gRPC_BASELIB_LIBRARIES} - ${_gRPC_SSL_LIBRARIES} - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc -) - -foreach(_hdr - include/grpc++/alarm.h - include/grpc++/channel.h - include/grpc++/client_context.h - include/grpc++/completion_queue.h - include/grpc++/create_channel.h - include/grpc++/create_channel_posix.h - include/grpc++/ext/health_check_service_server_builder_option.h - include/grpc++/generic/async_generic_service.h - include/grpc++/generic/generic_stub.h - include/grpc++/grpc++.h - include/grpc++/health_check_service_interface.h - include/grpc++/impl/call.h - include/grpc++/impl/channel_argument_option.h - include/grpc++/impl/client_unary_call.h - include/grpc++/impl/codegen/core_codegen.h - include/grpc++/impl/grpc_library.h - include/grpc++/impl/method_handler_impl.h - include/grpc++/impl/rpc_method.h - include/grpc++/impl/rpc_service_method.h - include/grpc++/impl/serialization_traits.h - include/grpc++/impl/server_builder_option.h - include/grpc++/impl/server_builder_plugin.h - include/grpc++/impl/server_initializer.h - include/grpc++/impl/service_type.h - include/grpc++/resource_quota.h - include/grpc++/security/auth_context.h - include/grpc++/security/auth_metadata_processor.h - include/grpc++/security/credentials.h - include/grpc++/security/server_credentials.h - include/grpc++/server.h - include/grpc++/server_builder.h - include/grpc++/server_context.h - include/grpc++/server_posix.h - include/grpc++/support/async_stream.h - include/grpc++/support/async_unary_call.h - include/grpc++/support/byte_buffer.h - include/grpc++/support/channel_arguments.h - include/grpc++/support/config.h - include/grpc++/support/slice.h - include/grpc++/support/status.h - include/grpc++/support/status_code_enum.h - include/grpc++/support/string_ref.h - include/grpc++/support/stub_options.h - include/grpc++/support/sync_stream.h - include/grpc++/support/time.h - include/grpc++/impl/codegen/async_stream.h - include/grpc++/impl/codegen/async_unary_call.h - include/grpc++/impl/codegen/call.h - include/grpc++/impl/codegen/call_hook.h - include/grpc++/impl/codegen/channel_interface.h - include/grpc++/impl/codegen/client_context.h - include/grpc++/impl/codegen/client_unary_call.h - include/grpc++/impl/codegen/completion_queue.h - include/grpc++/impl/codegen/completion_queue_tag.h - include/grpc++/impl/codegen/config.h - include/grpc++/impl/codegen/core_codegen_interface.h - include/grpc++/impl/codegen/create_auth_context.h - include/grpc++/impl/codegen/grpc_library.h - include/grpc++/impl/codegen/metadata_map.h - include/grpc++/impl/codegen/method_handler_impl.h - include/grpc++/impl/codegen/rpc_method.h - include/grpc++/impl/codegen/rpc_service_method.h - include/grpc++/impl/codegen/security/auth_context.h - include/grpc++/impl/codegen/serialization_traits.h - include/grpc++/impl/codegen/server_context.h - include/grpc++/impl/codegen/server_interface.h - include/grpc++/impl/codegen/service_type.h - include/grpc++/impl/codegen/slice.h - include/grpc++/impl/codegen/status.h - include/grpc++/impl/codegen/status_code_enum.h - include/grpc++/impl/codegen/string_ref.h - include/grpc++/impl/codegen/stub_options.h - include/grpc++/impl/codegen/sync_stream.h - include/grpc++/impl/codegen/time.h - include/grpc/impl/codegen/byte_buffer_reader.h - include/grpc/impl/codegen/compression_types.h - include/grpc/impl/codegen/connectivity_state.h - include/grpc/impl/codegen/exec_ctx_fwd.h - include/grpc/impl/codegen/grpc_types.h - include/grpc/impl/codegen/propagation_bits.h - include/grpc/impl/codegen/slice.h - include/grpc/impl/codegen/status.h - include/grpc/impl/codegen/atm.h - include/grpc/impl/codegen/atm_gcc_atomic.h - include/grpc/impl/codegen/atm_gcc_sync.h - include/grpc/impl/codegen/atm_windows.h - include/grpc/impl/codegen/gpr_slice.h - include/grpc/impl/codegen/gpr_types.h - include/grpc/impl/codegen/port_platform.h - include/grpc/impl/codegen/sync.h - include/grpc/impl/codegen/sync_generic.h - include/grpc/impl/codegen/sync_posix.h - include/grpc/impl/codegen/sync_windows.h - include/grpc++/impl/codegen/proto_utils.h - include/grpc++/impl/codegen/config_protobuf.h -) - string(REPLACE "include/" "" _path ${_hdr}) - get_filename_component(_path ${_path} PATH) - install(FILES ${_hdr} - DESTINATION "${gRPC_INSTALL_INCLUDEDIR}/${_path}" - ) -endforeach() - - -if (gRPC_INSTALL) - install(TARGETS grpc++ EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - - -add_library(grpc++_cronet - src/cpp/client/cronet_credentials.cc - src/cpp/client/insecure_credentials.cc - src/cpp/common/insecure_create_auth_context.cc - src/cpp/server/insecure_server_credentials.cc - src/cpp/client/channel_cc.cc - src/cpp/client/client_context.cc - src/cpp/client/create_channel.cc - src/cpp/client/create_channel_internal.cc - src/cpp/client/create_channel_posix.cc - src/cpp/client/credentials_cc.cc - src/cpp/client/generic_stub.cc - src/cpp/common/channel_arguments.cc - src/cpp/common/channel_filter.cc - src/cpp/common/completion_queue_cc.cc - src/cpp/common/core_codegen.cc - src/cpp/common/resource_quota_cc.cc - src/cpp/common/rpc_method.cc - src/cpp/common/version_cc.cc - src/cpp/server/async_generic_service.cc - src/cpp/server/channel_argument_option.cc - src/cpp/server/create_default_thread_pool.cc - src/cpp/server/dynamic_thread_pool.cc - src/cpp/server/health/default_health_check_service.cc - src/cpp/server/health/health.pb.c - src/cpp/server/health/health_check_service.cc - src/cpp/server/health/health_check_service_server_builder_option.cc - src/cpp/server/server_builder.cc - src/cpp/server/server_cc.cc - src/cpp/server/server_context.cc - src/cpp/server/server_credentials.cc - src/cpp/server/server_posix.cc - src/cpp/thread_manager/thread_manager.cc - src/cpp/util/byte_buffer_cc.cc - src/cpp/util/slice_cc.cc - src/cpp/util/status.cc - src/cpp/util/string_ref.cc - src/cpp/util/time_cc.cc - third_party/nanopb/pb_common.c - third_party/nanopb/pb_decode.c - third_party/nanopb/pb_encode.c - src/cpp/codegen/codegen_init.cc - src/core/ext/transport/chttp2/client/insecure/channel_create.c - src/core/ext/transport/chttp2/client/insecure/channel_create_posix.c - src/core/ext/transport/chttp2/client/chttp2_connector.c - src/core/ext/transport/chttp2/transport/bin_decoder.c - src/core/ext/transport/chttp2/transport/bin_encoder.c - src/core/ext/transport/chttp2/transport/chttp2_plugin.c - src/core/ext/transport/chttp2/transport/chttp2_transport.c - src/core/ext/transport/chttp2/transport/frame_data.c - src/core/ext/transport/chttp2/transport/frame_goaway.c - src/core/ext/transport/chttp2/transport/frame_ping.c - src/core/ext/transport/chttp2/transport/frame_rst_stream.c - src/core/ext/transport/chttp2/transport/frame_settings.c - src/core/ext/transport/chttp2/transport/frame_window_update.c - src/core/ext/transport/chttp2/transport/hpack_encoder.c - src/core/ext/transport/chttp2/transport/hpack_parser.c - src/core/ext/transport/chttp2/transport/hpack_table.c - src/core/ext/transport/chttp2/transport/http2_settings.c - src/core/ext/transport/chttp2/transport/huffsyms.c - src/core/ext/transport/chttp2/transport/incoming_metadata.c - src/core/ext/transport/chttp2/transport/parsing.c - src/core/ext/transport/chttp2/transport/stream_lists.c - src/core/ext/transport/chttp2/transport/stream_map.c - src/core/ext/transport/chttp2/transport/varint.c - src/core/ext/transport/chttp2/transport/writing.c - src/core/lib/channel/channel_args.c - src/core/lib/channel/channel_stack.c - src/core/lib/channel/channel_stack_builder.c - src/core/lib/channel/connected_channel.c - src/core/lib/channel/handshaker.c - src/core/lib/channel/handshaker_factory.c - src/core/lib/channel/handshaker_registry.c - src/core/lib/compression/compression.c - src/core/lib/compression/message_compress.c - src/core/lib/compression/stream_compression.c - src/core/lib/http/format_request.c - src/core/lib/http/httpcli.c - src/core/lib/http/parser.c - src/core/lib/iomgr/closure.c - src/core/lib/iomgr/combiner.c - src/core/lib/iomgr/endpoint.c - src/core/lib/iomgr/endpoint_pair_posix.c - src/core/lib/iomgr/endpoint_pair_uv.c - src/core/lib/iomgr/endpoint_pair_windows.c - src/core/lib/iomgr/error.c - src/core/lib/iomgr/ev_epoll1_linux.c - src/core/lib/iomgr/ev_epoll_limited_pollers_linux.c - src/core/lib/iomgr/ev_epoll_thread_pool_linux.c - src/core/lib/iomgr/ev_epollex_linux.c - src/core/lib/iomgr/ev_epollsig_linux.c - src/core/lib/iomgr/ev_poll_posix.c - src/core/lib/iomgr/ev_posix.c - src/core/lib/iomgr/ev_windows.c - src/core/lib/iomgr/exec_ctx.c - src/core/lib/iomgr/executor.c - src/core/lib/iomgr/iocp_windows.c - src/core/lib/iomgr/iomgr.c - src/core/lib/iomgr/iomgr_posix.c - src/core/lib/iomgr/iomgr_uv.c - src/core/lib/iomgr/iomgr_windows.c - src/core/lib/iomgr/is_epollexclusive_available.c - src/core/lib/iomgr/load_file.c - src/core/lib/iomgr/lockfree_event.c - src/core/lib/iomgr/network_status_tracker.c - src/core/lib/iomgr/polling_entity.c - src/core/lib/iomgr/pollset_set_uv.c - src/core/lib/iomgr/pollset_set_windows.c - src/core/lib/iomgr/pollset_uv.c - src/core/lib/iomgr/pollset_windows.c - src/core/lib/iomgr/resolve_address_posix.c - src/core/lib/iomgr/resolve_address_uv.c - src/core/lib/iomgr/resolve_address_windows.c - src/core/lib/iomgr/resource_quota.c - src/core/lib/iomgr/sockaddr_utils.c - src/core/lib/iomgr/socket_factory_posix.c - src/core/lib/iomgr/socket_mutator.c - src/core/lib/iomgr/socket_utils_common_posix.c - src/core/lib/iomgr/socket_utils_linux.c - src/core/lib/iomgr/socket_utils_posix.c - src/core/lib/iomgr/socket_utils_uv.c - src/core/lib/iomgr/socket_utils_windows.c - src/core/lib/iomgr/socket_windows.c - src/core/lib/iomgr/tcp_client_posix.c - src/core/lib/iomgr/tcp_client_uv.c - src/core/lib/iomgr/tcp_client_windows.c - src/core/lib/iomgr/tcp_posix.c - src/core/lib/iomgr/tcp_server_posix.c - src/core/lib/iomgr/tcp_server_utils_posix_common.c - src/core/lib/iomgr/tcp_server_utils_posix_ifaddrs.c - src/core/lib/iomgr/tcp_server_utils_posix_noifaddrs.c - src/core/lib/iomgr/tcp_server_uv.c - src/core/lib/iomgr/tcp_server_windows.c - src/core/lib/iomgr/tcp_uv.c - src/core/lib/iomgr/tcp_windows.c - src/core/lib/iomgr/time_averaged_stats.c - src/core/lib/iomgr/timer_generic.c - src/core/lib/iomgr/timer_heap.c - src/core/lib/iomgr/timer_manager.c - src/core/lib/iomgr/timer_uv.c - src/core/lib/iomgr/udp_server.c - src/core/lib/iomgr/unix_sockets_posix.c - src/core/lib/iomgr/unix_sockets_posix_noop.c - src/core/lib/iomgr/wakeup_fd_cv.c - src/core/lib/iomgr/wakeup_fd_eventfd.c - src/core/lib/iomgr/wakeup_fd_nospecial.c - src/core/lib/iomgr/wakeup_fd_pipe.c - src/core/lib/iomgr/wakeup_fd_posix.c - src/core/lib/json/json.c - src/core/lib/json/json_reader.c - src/core/lib/json/json_string.c - src/core/lib/json/json_writer.c - src/core/lib/slice/b64.c - src/core/lib/slice/percent_encoding.c - src/core/lib/slice/slice.c - src/core/lib/slice/slice_buffer.c - src/core/lib/slice/slice_hash_table.c - src/core/lib/slice/slice_intern.c - src/core/lib/slice/slice_string_helpers.c - src/core/lib/surface/alarm.c - src/core/lib/surface/api_trace.c - src/core/lib/surface/byte_buffer.c - src/core/lib/surface/byte_buffer_reader.c - src/core/lib/surface/call.c - src/core/lib/surface/call_details.c - src/core/lib/surface/call_log_batch.c - src/core/lib/surface/channel.c - src/core/lib/surface/channel_init.c - src/core/lib/surface/channel_ping.c - src/core/lib/surface/channel_stack_type.c - src/core/lib/surface/completion_queue.c - src/core/lib/surface/completion_queue_factory.c - src/core/lib/surface/event_string.c - src/core/lib/surface/lame_client.cc - src/core/lib/surface/metadata_array.c - src/core/lib/surface/server.c - src/core/lib/surface/validate_metadata.c - src/core/lib/surface/version.c - src/core/lib/transport/bdp_estimator.c - src/core/lib/transport/byte_stream.c - src/core/lib/transport/connectivity_state.c - src/core/lib/transport/error_utils.c - src/core/lib/transport/metadata.c - src/core/lib/transport/metadata_batch.c - src/core/lib/transport/pid_controller.c - src/core/lib/transport/service_config.c - src/core/lib/transport/static_metadata.c - src/core/lib/transport/status_conversion.c - src/core/lib/transport/timeout_encoding.c - src/core/lib/transport/transport.c - src/core/lib/transport/transport_op_string.c - src/core/lib/debug/trace.c - src/core/ext/transport/chttp2/alpn/alpn.c - src/core/ext/filters/http/client/http_client_filter.c - src/core/ext/filters/http/http_filters_plugin.c - src/core/ext/filters/http/message_compress/message_compress_filter.c - src/core/ext/filters/http/server/http_server_filter.c - src/core/ext/filters/client_channel/channel_connectivity.c - src/core/ext/filters/client_channel/client_channel.c - src/core/ext/filters/client_channel/client_channel_factory.c - src/core/ext/filters/client_channel/client_channel_plugin.c - src/core/ext/filters/client_channel/connector.c - src/core/ext/filters/client_channel/http_connect_handshaker.c - src/core/ext/filters/client_channel/http_proxy.c - src/core/ext/filters/client_channel/lb_policy.c - src/core/ext/filters/client_channel/lb_policy_factory.c - src/core/ext/filters/client_channel/lb_policy_registry.c - src/core/ext/filters/client_channel/parse_address.c - src/core/ext/filters/client_channel/proxy_mapper.c - src/core/ext/filters/client_channel/proxy_mapper_registry.c - src/core/ext/filters/client_channel/resolver.c - src/core/ext/filters/client_channel/resolver_factory.c - src/core/ext/filters/client_channel/resolver_registry.c - src/core/ext/filters/client_channel/retry_throttle.c - src/core/ext/filters/client_channel/subchannel.c - src/core/ext/filters/client_channel/subchannel_index.c - src/core/ext/filters/client_channel/uri_parser.c - src/core/ext/filters/deadline/deadline_filter.c - src/core/ext/transport/chttp2/server/insecure/server_chttp2.c - src/core/ext/transport/chttp2/server/insecure/server_chttp2_posix.c - src/core/ext/transport/chttp2/server/chttp2_server.c - src/core/ext/census/base_resources.c - src/core/ext/census/context.c - src/core/ext/census/gen/census.pb.c - src/core/ext/census/gen/trace_context.pb.c - src/core/ext/census/grpc_context.c - src/core/ext/census/grpc_filter.c - src/core/ext/census/grpc_plugin.c - src/core/ext/census/initialize.c - src/core/ext/census/intrusive_hash_map.c - src/core/ext/census/mlog.c - src/core/ext/census/operation.c - src/core/ext/census/placeholders.c - src/core/ext/census/resource.c - src/core/ext/census/trace_context.c - src/core/ext/census/tracing.c -) - -if(WIN32 AND MSVC) - set_target_properties(grpc++_cronet PROPERTIES COMPILE_PDB_NAME "grpc++_cronet" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/grpc++_cronet.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(grpc++_cronet - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpc++_cronet - ${_gRPC_BASELIB_LIBRARIES} - ${_gRPC_SSL_LIBRARIES} - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr - grpc_cronet - grpc -) - -foreach(_hdr - include/grpc++/alarm.h - include/grpc++/channel.h - include/grpc++/client_context.h - include/grpc++/completion_queue.h - include/grpc++/create_channel.h - include/grpc++/create_channel_posix.h - include/grpc++/ext/health_check_service_server_builder_option.h - include/grpc++/generic/async_generic_service.h - include/grpc++/generic/generic_stub.h - include/grpc++/grpc++.h - include/grpc++/health_check_service_interface.h - include/grpc++/impl/call.h - include/grpc++/impl/channel_argument_option.h - include/grpc++/impl/client_unary_call.h - include/grpc++/impl/codegen/core_codegen.h - include/grpc++/impl/grpc_library.h - include/grpc++/impl/method_handler_impl.h - include/grpc++/impl/rpc_method.h - include/grpc++/impl/rpc_service_method.h - include/grpc++/impl/serialization_traits.h - include/grpc++/impl/server_builder_option.h - include/grpc++/impl/server_builder_plugin.h - include/grpc++/impl/server_initializer.h - include/grpc++/impl/service_type.h - include/grpc++/resource_quota.h - include/grpc++/security/auth_context.h - include/grpc++/security/auth_metadata_processor.h - include/grpc++/security/credentials.h - include/grpc++/security/server_credentials.h - include/grpc++/server.h - include/grpc++/server_builder.h - include/grpc++/server_context.h - include/grpc++/server_posix.h - include/grpc++/support/async_stream.h - include/grpc++/support/async_unary_call.h - include/grpc++/support/byte_buffer.h - include/grpc++/support/channel_arguments.h - include/grpc++/support/config.h - include/grpc++/support/slice.h - include/grpc++/support/status.h - include/grpc++/support/status_code_enum.h - include/grpc++/support/string_ref.h - include/grpc++/support/stub_options.h - include/grpc++/support/sync_stream.h - include/grpc++/support/time.h - include/grpc++/impl/codegen/async_stream.h - include/grpc++/impl/codegen/async_unary_call.h - include/grpc++/impl/codegen/call.h - include/grpc++/impl/codegen/call_hook.h - include/grpc++/impl/codegen/channel_interface.h - include/grpc++/impl/codegen/client_context.h - include/grpc++/impl/codegen/client_unary_call.h - include/grpc++/impl/codegen/completion_queue.h - include/grpc++/impl/codegen/completion_queue_tag.h - include/grpc++/impl/codegen/config.h - include/grpc++/impl/codegen/core_codegen_interface.h - include/grpc++/impl/codegen/create_auth_context.h - include/grpc++/impl/codegen/grpc_library.h - include/grpc++/impl/codegen/metadata_map.h - include/grpc++/impl/codegen/method_handler_impl.h - include/grpc++/impl/codegen/rpc_method.h - include/grpc++/impl/codegen/rpc_service_method.h - include/grpc++/impl/codegen/security/auth_context.h - include/grpc++/impl/codegen/serialization_traits.h - include/grpc++/impl/codegen/server_context.h - include/grpc++/impl/codegen/server_interface.h - include/grpc++/impl/codegen/service_type.h - include/grpc++/impl/codegen/slice.h - include/grpc++/impl/codegen/status.h - include/grpc++/impl/codegen/status_code_enum.h - include/grpc++/impl/codegen/string_ref.h - include/grpc++/impl/codegen/stub_options.h - include/grpc++/impl/codegen/sync_stream.h - include/grpc++/impl/codegen/time.h - include/grpc/impl/codegen/byte_buffer_reader.h - include/grpc/impl/codegen/compression_types.h - include/grpc/impl/codegen/connectivity_state.h - include/grpc/impl/codegen/exec_ctx_fwd.h - include/grpc/impl/codegen/grpc_types.h - include/grpc/impl/codegen/propagation_bits.h - include/grpc/impl/codegen/slice.h - include/grpc/impl/codegen/status.h - include/grpc/impl/codegen/atm.h - include/grpc/impl/codegen/atm_gcc_atomic.h - include/grpc/impl/codegen/atm_gcc_sync.h - include/grpc/impl/codegen/atm_windows.h - include/grpc/impl/codegen/gpr_slice.h - include/grpc/impl/codegen/gpr_types.h - include/grpc/impl/codegen/port_platform.h - include/grpc/impl/codegen/sync.h - include/grpc/impl/codegen/sync_generic.h - include/grpc/impl/codegen/sync_posix.h - include/grpc/impl/codegen/sync_windows.h - include/grpc/byte_buffer.h - include/grpc/byte_buffer_reader.h - include/grpc/compression.h - include/grpc/grpc.h - include/grpc/grpc_posix.h - include/grpc/grpc_security_constants.h - include/grpc/load_reporting.h - include/grpc/slice.h - include/grpc/slice_buffer.h - include/grpc/status.h - include/grpc/support/workaround_list.h - include/grpc/census.h -) - string(REPLACE "include/" "" _path ${_hdr}) - get_filename_component(_path ${_path} PATH) - install(FILES ${_hdr} - DESTINATION "${gRPC_INSTALL_INCLUDEDIR}/${_path}" - ) -endforeach() - - -if (gRPC_INSTALL) - install(TARGETS grpc++_cronet EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - - -add_library(grpc++_error_details - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/status/status.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/status/status.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/status/status.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/status/status.grpc.pb.h - src/cpp/util/error_details.cc -) - -if(WIN32 AND MSVC) - set_target_properties(grpc++_error_details PROPERTIES COMPILE_PDB_NAME "grpc++_error_details" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/grpc++_error_details.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - -protobuf_generate_grpc_cpp( - src/proto/grpc/status/status.proto -) - -target_include_directories(grpc++_error_details - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpc++_error_details - ${_gRPC_BASELIB_LIBRARIES} - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++ -) - -foreach(_hdr - include/grpc++/support/error_details.h -) - string(REPLACE "include/" "" _path ${_hdr}) - get_filename_component(_path ${_path} PATH) - install(FILES ${_hdr} - DESTINATION "${gRPC_INSTALL_INCLUDEDIR}/${_path}" - ) -endforeach() - - -if (gRPC_INSTALL) - install(TARGETS grpc++_error_details EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - -if (gRPC_BUILD_TESTS) - -add_library(grpc++_proto_reflection_desc_db - test/cpp/util/proto_reflection_descriptor_database.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/reflection/v1alpha/reflection.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/reflection/v1alpha/reflection.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/reflection/v1alpha/reflection.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/reflection/v1alpha/reflection.grpc.pb.h -) - -if(WIN32 AND MSVC) - set_target_properties(grpc++_proto_reflection_desc_db PROPERTIES COMPILE_PDB_NAME "grpc++_proto_reflection_desc_db" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/grpc++_proto_reflection_desc_db.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - -protobuf_generate_grpc_cpp( - src/proto/grpc/reflection/v1alpha/reflection.proto -) - -target_include_directories(grpc++_proto_reflection_desc_db - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpc++_proto_reflection_desc_db - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++ - grpc -) - -foreach(_hdr - include/grpc++/impl/codegen/config_protobuf.h -) - string(REPLACE "include/" "" _path ${_hdr}) - get_filename_component(_path ${_path} PATH) - install(FILES ${_hdr} - DESTINATION "${gRPC_INSTALL_INCLUDEDIR}/${_path}" - ) -endforeach() - -endif (gRPC_BUILD_TESTS) - -add_library(grpc++_reflection - src/cpp/ext/proto_server_reflection.cc - src/cpp/ext/proto_server_reflection_plugin.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/reflection/v1alpha/reflection.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/reflection/v1alpha/reflection.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/reflection/v1alpha/reflection.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/reflection/v1alpha/reflection.grpc.pb.h -) - -if(WIN32 AND MSVC) - set_target_properties(grpc++_reflection PROPERTIES COMPILE_PDB_NAME "grpc++_reflection" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/grpc++_reflection.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - -protobuf_generate_grpc_cpp( - src/proto/grpc/reflection/v1alpha/reflection.proto -) - -target_include_directories(grpc++_reflection - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpc++_reflection - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++ - grpc -) - -foreach(_hdr - include/grpc++/ext/proto_server_reflection_plugin.h -) - string(REPLACE "include/" "" _path ${_hdr}) - get_filename_component(_path ${_path} PATH) - install(FILES ${_hdr} - DESTINATION "${gRPC_INSTALL_INCLUDEDIR}/${_path}" - ) -endforeach() - - -if (gRPC_INSTALL) - install(TARGETS grpc++_reflection EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - -if (gRPC_BUILD_TESTS) - -add_library(grpc++_test_config - test/cpp/util/test_config_cc.cc -) - -if(WIN32 AND MSVC) - set_target_properties(grpc++_test_config PROPERTIES COMPILE_PDB_NAME "grpc++_test_config" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/grpc++_test_config.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(grpc++_test_config - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpc++_test_config - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} -) - - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_library(grpc++_test_util - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/health/v1/health.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/health/v1/health.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/health/v1/health.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/health/v1/health.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_messages.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_messages.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_messages.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_messages.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_mock.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/duplicate/echo_duplicate.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/duplicate/echo_duplicate.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/duplicate/echo_duplicate.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/duplicate/echo_duplicate.grpc.pb.h - test/cpp/end2end/test_service_impl.cc - test/cpp/util/byte_buffer_proto_helper.cc - test/cpp/util/create_test_channel.cc - test/cpp/util/string_ref_helper.cc - test/cpp/util/subprocess.cc - test/cpp/util/test_credentials_provider.cc - src/cpp/codegen/codegen_init.cc -) - -if(WIN32 AND MSVC) - set_target_properties(grpc++_test_util PROPERTIES COMPILE_PDB_NAME "grpc++_test_util" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/grpc++_test_util.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - -protobuf_generate_grpc_cpp( - src/proto/grpc/health/v1/health.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/echo_messages.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/echo.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/duplicate/echo_duplicate.proto -) - -target_include_directories(grpc++_test_util - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpc++_test_util - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++ - grpc_test_util - grpc -) - -foreach(_hdr - include/grpc++/impl/codegen/async_stream.h - include/grpc++/impl/codegen/async_unary_call.h - include/grpc++/impl/codegen/call.h - include/grpc++/impl/codegen/call_hook.h - include/grpc++/impl/codegen/channel_interface.h - include/grpc++/impl/codegen/client_context.h - include/grpc++/impl/codegen/client_unary_call.h - include/grpc++/impl/codegen/completion_queue.h - include/grpc++/impl/codegen/completion_queue_tag.h - include/grpc++/impl/codegen/config.h - include/grpc++/impl/codegen/core_codegen_interface.h - include/grpc++/impl/codegen/create_auth_context.h - include/grpc++/impl/codegen/grpc_library.h - include/grpc++/impl/codegen/metadata_map.h - include/grpc++/impl/codegen/method_handler_impl.h - include/grpc++/impl/codegen/rpc_method.h - include/grpc++/impl/codegen/rpc_service_method.h - include/grpc++/impl/codegen/security/auth_context.h - include/grpc++/impl/codegen/serialization_traits.h - include/grpc++/impl/codegen/server_context.h - include/grpc++/impl/codegen/server_interface.h - include/grpc++/impl/codegen/service_type.h - include/grpc++/impl/codegen/slice.h - include/grpc++/impl/codegen/status.h - include/grpc++/impl/codegen/status_code_enum.h - include/grpc++/impl/codegen/string_ref.h - include/grpc++/impl/codegen/stub_options.h - include/grpc++/impl/codegen/sync_stream.h - include/grpc++/impl/codegen/time.h - include/grpc/impl/codegen/byte_buffer_reader.h - include/grpc/impl/codegen/compression_types.h - include/grpc/impl/codegen/connectivity_state.h - include/grpc/impl/codegen/exec_ctx_fwd.h - include/grpc/impl/codegen/grpc_types.h - include/grpc/impl/codegen/propagation_bits.h - include/grpc/impl/codegen/slice.h - include/grpc/impl/codegen/status.h - include/grpc/impl/codegen/atm.h - include/grpc/impl/codegen/atm_gcc_atomic.h - include/grpc/impl/codegen/atm_gcc_sync.h - include/grpc/impl/codegen/atm_windows.h - include/grpc/impl/codegen/gpr_slice.h - include/grpc/impl/codegen/gpr_types.h - include/grpc/impl/codegen/port_platform.h - include/grpc/impl/codegen/sync.h - include/grpc/impl/codegen/sync_generic.h - include/grpc/impl/codegen/sync_posix.h - include/grpc/impl/codegen/sync_windows.h - include/grpc++/impl/codegen/proto_utils.h - include/grpc++/impl/codegen/config_protobuf.h -) - string(REPLACE "include/" "" _path ${_hdr}) - get_filename_component(_path ${_path} PATH) - install(FILES ${_hdr} - DESTINATION "${gRPC_INSTALL_INCLUDEDIR}/${_path}" - ) -endforeach() - -endif (gRPC_BUILD_TESTS) - -add_library(grpc++_unsecure - src/cpp/client/insecure_credentials.cc - src/cpp/common/insecure_create_auth_context.cc - src/cpp/server/insecure_server_credentials.cc - src/cpp/client/channel_cc.cc - src/cpp/client/client_context.cc - src/cpp/client/create_channel.cc - src/cpp/client/create_channel_internal.cc - src/cpp/client/create_channel_posix.cc - src/cpp/client/credentials_cc.cc - src/cpp/client/generic_stub.cc - src/cpp/common/channel_arguments.cc - src/cpp/common/channel_filter.cc - src/cpp/common/completion_queue_cc.cc - src/cpp/common/core_codegen.cc - src/cpp/common/resource_quota_cc.cc - src/cpp/common/rpc_method.cc - src/cpp/common/version_cc.cc - src/cpp/server/async_generic_service.cc - src/cpp/server/channel_argument_option.cc - src/cpp/server/create_default_thread_pool.cc - src/cpp/server/dynamic_thread_pool.cc - src/cpp/server/health/default_health_check_service.cc - src/cpp/server/health/health.pb.c - src/cpp/server/health/health_check_service.cc - src/cpp/server/health/health_check_service_server_builder_option.cc - src/cpp/server/server_builder.cc - src/cpp/server/server_cc.cc - src/cpp/server/server_context.cc - src/cpp/server/server_credentials.cc - src/cpp/server/server_posix.cc - src/cpp/thread_manager/thread_manager.cc - src/cpp/util/byte_buffer_cc.cc - src/cpp/util/slice_cc.cc - src/cpp/util/status.cc - src/cpp/util/string_ref.cc - src/cpp/util/time_cc.cc - third_party/nanopb/pb_common.c - third_party/nanopb/pb_decode.c - third_party/nanopb/pb_encode.c - src/cpp/codegen/codegen_init.cc -) - -if(WIN32 AND MSVC) - set_target_properties(grpc++_unsecure PROPERTIES COMPILE_PDB_NAME "grpc++_unsecure" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/grpc++_unsecure.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(grpc++_unsecure - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpc++_unsecure - ${_gRPC_BASELIB_LIBRARIES} - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr - grpc_unsecure -) - -foreach(_hdr - include/grpc++/alarm.h - include/grpc++/channel.h - include/grpc++/client_context.h - include/grpc++/completion_queue.h - include/grpc++/create_channel.h - include/grpc++/create_channel_posix.h - include/grpc++/ext/health_check_service_server_builder_option.h - include/grpc++/generic/async_generic_service.h - include/grpc++/generic/generic_stub.h - include/grpc++/grpc++.h - include/grpc++/health_check_service_interface.h - include/grpc++/impl/call.h - include/grpc++/impl/channel_argument_option.h - include/grpc++/impl/client_unary_call.h - include/grpc++/impl/codegen/core_codegen.h - include/grpc++/impl/grpc_library.h - include/grpc++/impl/method_handler_impl.h - include/grpc++/impl/rpc_method.h - include/grpc++/impl/rpc_service_method.h - include/grpc++/impl/serialization_traits.h - include/grpc++/impl/server_builder_option.h - include/grpc++/impl/server_builder_plugin.h - include/grpc++/impl/server_initializer.h - include/grpc++/impl/service_type.h - include/grpc++/resource_quota.h - include/grpc++/security/auth_context.h - include/grpc++/security/auth_metadata_processor.h - include/grpc++/security/credentials.h - include/grpc++/security/server_credentials.h - include/grpc++/server.h - include/grpc++/server_builder.h - include/grpc++/server_context.h - include/grpc++/server_posix.h - include/grpc++/support/async_stream.h - include/grpc++/support/async_unary_call.h - include/grpc++/support/byte_buffer.h - include/grpc++/support/channel_arguments.h - include/grpc++/support/config.h - include/grpc++/support/slice.h - include/grpc++/support/status.h - include/grpc++/support/status_code_enum.h - include/grpc++/support/string_ref.h - include/grpc++/support/stub_options.h - include/grpc++/support/sync_stream.h - include/grpc++/support/time.h - include/grpc++/impl/codegen/async_stream.h - include/grpc++/impl/codegen/async_unary_call.h - include/grpc++/impl/codegen/call.h - include/grpc++/impl/codegen/call_hook.h - include/grpc++/impl/codegen/channel_interface.h - include/grpc++/impl/codegen/client_context.h - include/grpc++/impl/codegen/client_unary_call.h - include/grpc++/impl/codegen/completion_queue.h - include/grpc++/impl/codegen/completion_queue_tag.h - include/grpc++/impl/codegen/config.h - include/grpc++/impl/codegen/core_codegen_interface.h - include/grpc++/impl/codegen/create_auth_context.h - include/grpc++/impl/codegen/grpc_library.h - include/grpc++/impl/codegen/metadata_map.h - include/grpc++/impl/codegen/method_handler_impl.h - include/grpc++/impl/codegen/rpc_method.h - include/grpc++/impl/codegen/rpc_service_method.h - include/grpc++/impl/codegen/security/auth_context.h - include/grpc++/impl/codegen/serialization_traits.h - include/grpc++/impl/codegen/server_context.h - include/grpc++/impl/codegen/server_interface.h - include/grpc++/impl/codegen/service_type.h - include/grpc++/impl/codegen/slice.h - include/grpc++/impl/codegen/status.h - include/grpc++/impl/codegen/status_code_enum.h - include/grpc++/impl/codegen/string_ref.h - include/grpc++/impl/codegen/stub_options.h - include/grpc++/impl/codegen/sync_stream.h - include/grpc++/impl/codegen/time.h - include/grpc/impl/codegen/byte_buffer_reader.h - include/grpc/impl/codegen/compression_types.h - include/grpc/impl/codegen/connectivity_state.h - include/grpc/impl/codegen/exec_ctx_fwd.h - include/grpc/impl/codegen/grpc_types.h - include/grpc/impl/codegen/propagation_bits.h - include/grpc/impl/codegen/slice.h - include/grpc/impl/codegen/status.h - include/grpc/impl/codegen/atm.h - include/grpc/impl/codegen/atm_gcc_atomic.h - include/grpc/impl/codegen/atm_gcc_sync.h - include/grpc/impl/codegen/atm_windows.h - include/grpc/impl/codegen/gpr_slice.h - include/grpc/impl/codegen/gpr_types.h - include/grpc/impl/codegen/port_platform.h - include/grpc/impl/codegen/sync.h - include/grpc/impl/codegen/sync_generic.h - include/grpc/impl/codegen/sync_posix.h - include/grpc/impl/codegen/sync_windows.h -) - string(REPLACE "include/" "" _path ${_hdr}) - get_filename_component(_path ${_path} PATH) - install(FILES ${_hdr} - DESTINATION "${gRPC_INSTALL_INCLUDEDIR}/${_path}" - ) -endforeach() - - -if (gRPC_INSTALL) - install(TARGETS grpc++_unsecure EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - -if (gRPC_BUILD_TESTS) - -add_library(grpc_benchmark - test/cpp/microbenchmarks/helpers.cc -) - -if(WIN32 AND MSVC) - set_target_properties(grpc_benchmark PROPERTIES COMPILE_PDB_NAME "grpc_benchmark" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/grpc_benchmark.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(grpc_benchmark - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpc_benchmark - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - benchmark - grpc++ - grpc_test_util - grpc - ${_gRPC_GFLAGS_LIBRARIES} -) - - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_library(grpc_cli_libs - test/cpp/util/cli_call.cc - test/cpp/util/cli_credentials.cc - test/cpp/util/grpc_tool.cc - test/cpp/util/proto_file_parser.cc - test/cpp/util/service_describer.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/reflection/v1alpha/reflection.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/reflection/v1alpha/reflection.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/reflection/v1alpha/reflection.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/reflection/v1alpha/reflection.grpc.pb.h -) - -if(WIN32 AND MSVC) - set_target_properties(grpc_cli_libs PROPERTIES COMPILE_PDB_NAME "grpc_cli_libs" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/grpc_cli_libs.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - -protobuf_generate_grpc_cpp( - src/proto/grpc/reflection/v1alpha/reflection.proto -) - -target_include_directories(grpc_cli_libs - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpc_cli_libs - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_proto_reflection_desc_db - grpc++ - grpc -) - -foreach(_hdr - include/grpc++/impl/codegen/config_protobuf.h -) - string(REPLACE "include/" "" _path ${_hdr}) - get_filename_component(_path ${_path} PATH) - install(FILES ${_hdr} - DESTINATION "${gRPC_INSTALL_INCLUDEDIR}/${_path}" - ) -endforeach() - -endif (gRPC_BUILD_TESTS) - -add_library(grpc_plugin_support - src/compiler/cpp_generator.cc - src/compiler/csharp_generator.cc - src/compiler/node_generator.cc - src/compiler/objective_c_generator.cc - src/compiler/php_generator.cc - src/compiler/python_generator.cc - src/compiler/ruby_generator.cc -) - -if(WIN32 AND MSVC) - set_target_properties(grpc_plugin_support PROPERTIES COMPILE_PDB_NAME "grpc_plugin_support" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/grpc_plugin_support.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(grpc_plugin_support - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpc_plugin_support - ${_gRPC_PROTOBUF_PROTOC_LIBRARIES} - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} -) - -foreach(_hdr - include/grpc++/impl/codegen/config_protobuf.h -) - string(REPLACE "include/" "" _path ${_hdr}) - get_filename_component(_path ${_path} PATH) - install(FILES ${_hdr} - DESTINATION "${gRPC_INSTALL_INCLUDEDIR}/${_path}" - ) -endforeach() - - -if (gRPC_INSTALL) - install(TARGETS grpc_plugin_support EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - -if (gRPC_BUILD_TESTS) - -add_library(http2_client_main - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.grpc.pb.h - test/cpp/interop/http2_client.cc -) - -if(WIN32 AND MSVC) - set_target_properties(http2_client_main PROPERTIES COMPILE_PDB_NAME "http2_client_main" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/http2_client_main.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/empty.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/messages.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/test.proto -) - -target_include_directories(http2_client_main - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(http2_client_main - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - grpc++_test_config -) - - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_library(interop_client_helper - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.grpc.pb.h - test/cpp/interop/client_helper.cc -) - -if(WIN32 AND MSVC) - set_target_properties(interop_client_helper PROPERTIES COMPILE_PDB_NAME "interop_client_helper" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/interop_client_helper.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/messages.proto -) - -target_include_directories(interop_client_helper - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(interop_client_helper - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr -) - - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_library(interop_client_main - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.grpc.pb.h - test/cpp/interop/client.cc - test/cpp/interop/interop_client.cc -) - -if(WIN32 AND MSVC) - set_target_properties(interop_client_main PROPERTIES COMPILE_PDB_NAME "interop_client_main" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/interop_client_main.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/empty.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/messages.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/test.proto -) - -target_include_directories(interop_client_main - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(interop_client_main - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - interop_client_helper - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - grpc++_test_config -) - - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_library(interop_server_helper - test/cpp/interop/server_helper.cc -) - -if(WIN32 AND MSVC) - set_target_properties(interop_server_helper PROPERTIES COMPILE_PDB_NAME "interop_server_helper" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/interop_server_helper.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(interop_server_helper - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(interop_server_helper - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr -) - - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_library(interop_server_lib - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.grpc.pb.h - test/cpp/interop/interop_server.cc -) - -if(WIN32 AND MSVC) - set_target_properties(interop_server_lib PROPERTIES COMPILE_PDB_NAME "interop_server_lib" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/interop_server_lib.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/empty.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/messages.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/test.proto -) - -target_include_directories(interop_server_lib - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(interop_server_lib - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - interop_server_helper - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - grpc++_test_config -) - - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_library(interop_server_main - test/cpp/interop/interop_server_bootstrap.cc -) - -if(WIN32 AND MSVC) - set_target_properties(interop_server_main PROPERTIES COMPILE_PDB_NAME "interop_server_main" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/interop_server_main.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(interop_server_main - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(interop_server_main - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - interop_server_lib -) - - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_library(qps - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/payloads.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/payloads.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/payloads.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/payloads.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/stats.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/stats.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/stats.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/stats.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/control.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/control.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/control.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/control.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/services.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/services.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/services.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/services.grpc.pb.h - test/cpp/qps/benchmark_config.cc - test/cpp/qps/client_async.cc - test/cpp/qps/client_sync.cc - test/cpp/qps/driver.cc - test/cpp/qps/parse_json.cc - test/cpp/qps/qps_worker.cc - test/cpp/qps/report.cc - test/cpp/qps/server_async.cc - test/cpp/qps/server_sync.cc - test/cpp/qps/usage_timer.cc -) - -if(WIN32 AND MSVC) - set_target_properties(qps PROPERTIES COMPILE_PDB_NAME "qps" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/qps.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/messages.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/payloads.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/stats.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/control.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/services.proto -) - -target_include_directories(qps - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(qps - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc++_test_util - grpc++ - grpc -) - - -endif (gRPC_BUILD_TESTS) - -add_library(grpc_csharp_ext SHARED - src/csharp/ext/grpc_csharp_ext.c -) - -if(WIN32 AND MSVC) - set_target_properties(grpc_csharp_ext PROPERTIES COMPILE_PDB_NAME "grpc_csharp_ext" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/grpc_csharp_ext.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(grpc_csharp_ext - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_csharp_ext - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc - gpr -) - - - -if (gRPC_INSTALL) - install(TARGETS grpc_csharp_ext EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - -if (gRPC_BUILD_TESTS) - -add_library(ares - third_party/cares/cares/ares__close_sockets.c - third_party/cares/cares/ares__get_hostent.c - third_party/cares/cares/ares__read_line.c - third_party/cares/cares/ares__timeval.c - third_party/cares/cares/ares_cancel.c - third_party/cares/cares/ares_create_query.c - third_party/cares/cares/ares_data.c - third_party/cares/cares/ares_destroy.c - third_party/cares/cares/ares_expand_name.c - third_party/cares/cares/ares_expand_string.c - third_party/cares/cares/ares_fds.c - third_party/cares/cares/ares_free_hostent.c - third_party/cares/cares/ares_free_string.c - third_party/cares/cares/ares_getenv.c - third_party/cares/cares/ares_gethostbyaddr.c - third_party/cares/cares/ares_gethostbyname.c - third_party/cares/cares/ares_getnameinfo.c - third_party/cares/cares/ares_getopt.c - third_party/cares/cares/ares_getsock.c - third_party/cares/cares/ares_init.c - third_party/cares/cares/ares_library_init.c - third_party/cares/cares/ares_llist.c - third_party/cares/cares/ares_mkquery.c - third_party/cares/cares/ares_nowarn.c - third_party/cares/cares/ares_options.c - third_party/cares/cares/ares_parse_a_reply.c - third_party/cares/cares/ares_parse_aaaa_reply.c - third_party/cares/cares/ares_parse_mx_reply.c - third_party/cares/cares/ares_parse_naptr_reply.c - third_party/cares/cares/ares_parse_ns_reply.c - third_party/cares/cares/ares_parse_ptr_reply.c - third_party/cares/cares/ares_parse_soa_reply.c - third_party/cares/cares/ares_parse_srv_reply.c - third_party/cares/cares/ares_parse_txt_reply.c - third_party/cares/cares/ares_platform.c - third_party/cares/cares/ares_process.c - third_party/cares/cares/ares_query.c - third_party/cares/cares/ares_search.c - third_party/cares/cares/ares_send.c - third_party/cares/cares/ares_strcasecmp.c - third_party/cares/cares/ares_strdup.c - third_party/cares/cares/ares_strerror.c - third_party/cares/cares/ares_timeout.c - third_party/cares/cares/ares_version.c - third_party/cares/cares/ares_writev.c - third_party/cares/cares/bitncmp.c - third_party/cares/cares/inet_net_pton.c - third_party/cares/cares/inet_ntop.c - third_party/cares/cares/windows_port.c -) - -if(WIN32 AND MSVC) - set_target_properties(ares PROPERTIES COMPILE_PDB_NAME "ares" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ares.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(ares - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(ares - ${_gRPC_SSL_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} -) - - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_library(bad_client_test - test/core/bad_client/bad_client.c -) - -if(WIN32 AND MSVC) - set_target_properties(bad_client_test PROPERTIES COMPILE_PDB_NAME "bad_client_test" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/bad_client_test.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(bad_client_test - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(bad_client_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_library(bad_ssl_test_server - test/core/bad_ssl/server_common.c -) - -if(WIN32 AND MSVC) - set_target_properties(bad_ssl_test_server PROPERTIES COMPILE_PDB_NAME "bad_ssl_test_server" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/bad_ssl_test_server.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(bad_ssl_test_server - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(bad_ssl_test_server - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_library(end2end_tests - test/core/end2end/end2end_tests.c - test/core/end2end/end2end_test_utils.c - test/core/end2end/tests/authority_not_supported.c - test/core/end2end/tests/bad_hostname.c - test/core/end2end/tests/bad_ping.c - test/core/end2end/tests/binary_metadata.c - test/core/end2end/tests/call_creds.c - test/core/end2end/tests/cancel_after_accept.c - test/core/end2end/tests/cancel_after_client_done.c - test/core/end2end/tests/cancel_after_invoke.c - test/core/end2end/tests/cancel_after_round_trip.c - test/core/end2end/tests/cancel_before_invoke.c - test/core/end2end/tests/cancel_in_a_vacuum.c - test/core/end2end/tests/cancel_with_status.c - test/core/end2end/tests/compressed_payload.c - test/core/end2end/tests/connectivity.c - test/core/end2end/tests/default_host.c - test/core/end2end/tests/disappearing_server.c - test/core/end2end/tests/empty_batch.c - test/core/end2end/tests/filter_call_init_fails.c - test/core/end2end/tests/filter_causes_close.c - test/core/end2end/tests/filter_latency.c - test/core/end2end/tests/graceful_server_shutdown.c - test/core/end2end/tests/high_initial_seqno.c - test/core/end2end/tests/hpack_size.c - test/core/end2end/tests/idempotent_request.c - test/core/end2end/tests/invoke_large_request.c - test/core/end2end/tests/keepalive_timeout.c - test/core/end2end/tests/large_metadata.c - test/core/end2end/tests/load_reporting_hook.c - test/core/end2end/tests/max_concurrent_streams.c - test/core/end2end/tests/max_connection_age.c - test/core/end2end/tests/max_connection_idle.c - test/core/end2end/tests/max_message_length.c - test/core/end2end/tests/negative_deadline.c - test/core/end2end/tests/network_status_change.c - test/core/end2end/tests/no_logging.c - test/core/end2end/tests/no_op.c - test/core/end2end/tests/payload.c - test/core/end2end/tests/ping.c - test/core/end2end/tests/ping_pong_streaming.c - test/core/end2end/tests/proxy_auth.c - test/core/end2end/tests/registered_call.c - test/core/end2end/tests/request_with_flags.c - test/core/end2end/tests/request_with_payload.c - test/core/end2end/tests/resource_quota_server.c - test/core/end2end/tests/server_finishes_request.c - test/core/end2end/tests/shutdown_finishes_calls.c - test/core/end2end/tests/shutdown_finishes_tags.c - test/core/end2end/tests/simple_cacheable_request.c - test/core/end2end/tests/simple_delayed_request.c - test/core/end2end/tests/simple_metadata.c - test/core/end2end/tests/simple_request.c - test/core/end2end/tests/streaming_error_response.c - test/core/end2end/tests/trailing_metadata.c - test/core/end2end/tests/workaround_cronet_compression.c - test/core/end2end/tests/write_buffering.c - test/core/end2end/tests/write_buffering_at_end.c -) - -if(WIN32 AND MSVC) - set_target_properties(end2end_tests PROPERTIES COMPILE_PDB_NAME "end2end_tests" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/end2end_tests.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(end2end_tests - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(end2end_tests - ${_gRPC_SSL_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_library(end2end_nosec_tests - test/core/end2end/end2end_nosec_tests.c - test/core/end2end/end2end_test_utils.c - test/core/end2end/tests/authority_not_supported.c - test/core/end2end/tests/bad_hostname.c - test/core/end2end/tests/bad_ping.c - test/core/end2end/tests/binary_metadata.c - test/core/end2end/tests/cancel_after_accept.c - test/core/end2end/tests/cancel_after_client_done.c - test/core/end2end/tests/cancel_after_invoke.c - test/core/end2end/tests/cancel_after_round_trip.c - test/core/end2end/tests/cancel_before_invoke.c - test/core/end2end/tests/cancel_in_a_vacuum.c - test/core/end2end/tests/cancel_with_status.c - test/core/end2end/tests/compressed_payload.c - test/core/end2end/tests/connectivity.c - test/core/end2end/tests/default_host.c - test/core/end2end/tests/disappearing_server.c - test/core/end2end/tests/empty_batch.c - test/core/end2end/tests/filter_call_init_fails.c - test/core/end2end/tests/filter_causes_close.c - test/core/end2end/tests/filter_latency.c - test/core/end2end/tests/graceful_server_shutdown.c - test/core/end2end/tests/high_initial_seqno.c - test/core/end2end/tests/hpack_size.c - test/core/end2end/tests/idempotent_request.c - test/core/end2end/tests/invoke_large_request.c - test/core/end2end/tests/keepalive_timeout.c - test/core/end2end/tests/large_metadata.c - test/core/end2end/tests/load_reporting_hook.c - test/core/end2end/tests/max_concurrent_streams.c - test/core/end2end/tests/max_connection_age.c - test/core/end2end/tests/max_connection_idle.c - test/core/end2end/tests/max_message_length.c - test/core/end2end/tests/negative_deadline.c - test/core/end2end/tests/network_status_change.c - test/core/end2end/tests/no_logging.c - test/core/end2end/tests/no_op.c - test/core/end2end/tests/payload.c - test/core/end2end/tests/ping.c - test/core/end2end/tests/ping_pong_streaming.c - test/core/end2end/tests/proxy_auth.c - test/core/end2end/tests/registered_call.c - test/core/end2end/tests/request_with_flags.c - test/core/end2end/tests/request_with_payload.c - test/core/end2end/tests/resource_quota_server.c - test/core/end2end/tests/server_finishes_request.c - test/core/end2end/tests/shutdown_finishes_calls.c - test/core/end2end/tests/shutdown_finishes_tags.c - test/core/end2end/tests/simple_cacheable_request.c - test/core/end2end/tests/simple_delayed_request.c - test/core/end2end/tests/simple_metadata.c - test/core/end2end/tests/simple_request.c - test/core/end2end/tests/streaming_error_response.c - test/core/end2end/tests/trailing_metadata.c - test/core/end2end/tests/workaround_cronet_compression.c - test/core/end2end/tests/write_buffering.c - test/core/end2end/tests/write_buffering_at_end.c -) - -if(WIN32 AND MSVC) - set_target_properties(end2end_nosec_tests PROPERTIES COMPILE_PDB_NAME "end2end_nosec_tests" - COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - if (gRPC_INSTALL) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/end2end_nosec_tests.pdb - DESTINATION ${gRPC_INSTALL_LIBDIR} OPTIONAL - ) - endif() -endif() - - -target_include_directories(end2end_nosec_tests - PUBLIC $ $ - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${ZLIB_INCLUDE_DIR} - PRIVATE ${BENCHMARK}/include - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(end2end_nosec_tests - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - - -endif (gRPC_BUILD_TESTS) - -if (gRPC_BUILD_TESTS) - -add_executable(alarm_test - test/core/surface/alarm_test.c -) - - -target_include_directories(alarm_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(alarm_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(algorithm_test - test/core/compression/algorithm_test.c -) - - -target_include_directories(algorithm_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(algorithm_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(alloc_test - test/core/support/alloc_test.c -) - - -target_include_directories(alloc_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(alloc_test - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(alpn_test - test/core/transport/chttp2/alpn_test.c -) - - -target_include_directories(alpn_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(alpn_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(arena_test - test/core/support/arena_test.c -) - - -target_include_directories(arena_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(arena_test - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(bad_server_response_test - test/core/end2end/bad_server_response_test.c -) - - -target_include_directories(bad_server_response_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(bad_server_response_test - ${_gRPC_ALLTARGETS_LIBRARIES} - test_tcp_server - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(bdp_estimator_test - test/core/transport/bdp_estimator_test.c -) - - -target_include_directories(bdp_estimator_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(bdp_estimator_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(bin_decoder_test - test/core/transport/chttp2/bin_decoder_test.c -) - - -target_include_directories(bin_decoder_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(bin_decoder_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(bin_encoder_test - test/core/transport/chttp2/bin_encoder_test.c -) - - -target_include_directories(bin_encoder_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(bin_encoder_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(census_context_test - test/core/census/context_test.c -) - - -target_include_directories(census_context_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(census_context_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(census_intrusive_hash_map_test - test/core/census/intrusive_hash_map_test.c -) - - -target_include_directories(census_intrusive_hash_map_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(census_intrusive_hash_map_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(census_resource_test - test/core/census/resource_test.c -) - - -target_include_directories(census_resource_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(census_resource_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(census_trace_context_test - test/core/census/trace_context_test.c -) - - -target_include_directories(census_trace_context_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(census_trace_context_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(channel_create_test - test/core/surface/channel_create_test.c -) - - -target_include_directories(channel_create_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(channel_create_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) - -add_executable(check_epollexclusive - test/build/check_epollexclusive.c -) - - -target_include_directories(check_epollexclusive - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(check_epollexclusive - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc - gpr -) - - -if (gRPC_INSTALL) - install(TARGETS check_epollexclusive EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - -if (gRPC_BUILD_TESTS) - -add_executable(chttp2_hpack_encoder_test - test/core/transport/chttp2/hpack_encoder_test.c -) - - -target_include_directories(chttp2_hpack_encoder_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(chttp2_hpack_encoder_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(chttp2_stream_map_test - test/core/transport/chttp2/stream_map_test.c -) - - -target_include_directories(chttp2_stream_map_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(chttp2_stream_map_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(chttp2_varint_test - test/core/transport/chttp2/varint_test.c -) - - -target_include_directories(chttp2_varint_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(chttp2_varint_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(combiner_test - test/core/iomgr/combiner_test.c -) - - -target_include_directories(combiner_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(combiner_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(compression_test - test/core/compression/compression_test.c -) - - -target_include_directories(compression_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(compression_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(concurrent_connectivity_test - test/core/surface/concurrent_connectivity_test.c -) - - -target_include_directories(concurrent_connectivity_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(concurrent_connectivity_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(connection_refused_test - test/core/end2end/connection_refused_test.c -) - - -target_include_directories(connection_refused_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(connection_refused_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(dns_resolver_connectivity_test - test/core/client_channel/resolvers/dns_resolver_connectivity_test.c -) - - -target_include_directories(dns_resolver_connectivity_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(dns_resolver_connectivity_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(dns_resolver_test - test/core/client_channel/resolvers/dns_resolver_test.c -) - - -target_include_directories(dns_resolver_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(dns_resolver_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(dualstack_socket_test - test/core/end2end/dualstack_socket_test.c -) - - -target_include_directories(dualstack_socket_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(dualstack_socket_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(endpoint_pair_test - test/core/iomgr/endpoint_pair_test.c -) - - -target_include_directories(endpoint_pair_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(endpoint_pair_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(error_test - test/core/iomgr/error_test.c -) - - -target_include_directories(error_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(error_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX) - -add_executable(ev_epollsig_linux_test - test/core/iomgr/ev_epollsig_linux_test.c -) - - -target_include_directories(ev_epollsig_linux_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(ev_epollsig_linux_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(fake_resolver_test - test/core/client_channel/resolvers/fake_resolver_test.c -) - - -target_include_directories(fake_resolver_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(fake_resolver_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(fd_conservation_posix_test - test/core/iomgr/fd_conservation_posix_test.c -) - - -target_include_directories(fd_conservation_posix_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(fd_conservation_posix_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(fd_posix_test - test/core/iomgr/fd_posix_test.c -) - - -target_include_directories(fd_posix_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(fd_posix_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(fling_client - test/core/fling/client.c -) - - -target_include_directories(fling_client - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(fling_client - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(fling_server - test/core/fling/server.c -) - - -target_include_directories(fling_server - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(fling_server - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(fling_stream_test - test/core/fling/fling_stream_test.c -) - - -target_include_directories(fling_stream_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(fling_stream_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(fling_test - test/core/fling/fling_test.c -) - - -target_include_directories(fling_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(fling_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) - -add_executable(gen_hpack_tables - tools/codegen/core/gen_hpack_tables.c -) - - -target_include_directories(gen_hpack_tables - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gen_hpack_tables - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr - grpc -) - - -if (gRPC_INSTALL) - install(TARGETS gen_hpack_tables EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - - -add_executable(gen_legal_metadata_characters - tools/codegen/core/gen_legal_metadata_characters.c -) - - -target_include_directories(gen_legal_metadata_characters - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gen_legal_metadata_characters - ${_gRPC_ALLTARGETS_LIBRARIES} -) - - -if (gRPC_INSTALL) - install(TARGETS gen_legal_metadata_characters EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - - -add_executable(gen_percent_encoding_tables - tools/codegen/core/gen_percent_encoding_tables.c -) - - -target_include_directories(gen_percent_encoding_tables - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gen_percent_encoding_tables - ${_gRPC_ALLTARGETS_LIBRARIES} -) - - -if (gRPC_INSTALL) - install(TARGETS gen_percent_encoding_tables EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(goaway_server_test - test/core/end2end/goaway_server_test.c -) - - -target_include_directories(goaway_server_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(goaway_server_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(gpr_avl_test - test/core/support/avl_test.c -) - - -target_include_directories(gpr_avl_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gpr_avl_test - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(gpr_backoff_test - test/core/support/backoff_test.c -) - - -target_include_directories(gpr_backoff_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gpr_backoff_test - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(gpr_cmdline_test - test/core/support/cmdline_test.c -) - - -target_include_directories(gpr_cmdline_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gpr_cmdline_test - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(gpr_cpu_test - test/core/support/cpu_test.c -) - - -target_include_directories(gpr_cpu_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gpr_cpu_test - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(gpr_env_test - test/core/support/env_test.c -) - - -target_include_directories(gpr_env_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gpr_env_test - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(gpr_histogram_test - test/core/support/histogram_test.c -) - - -target_include_directories(gpr_histogram_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gpr_histogram_test - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(gpr_host_port_test - test/core/support/host_port_test.c -) - - -target_include_directories(gpr_host_port_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gpr_host_port_test - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(gpr_log_test - test/core/support/log_test.c -) - - -target_include_directories(gpr_log_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gpr_log_test - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(gpr_mpscq_test - test/core/support/mpscq_test.c -) - - -target_include_directories(gpr_mpscq_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gpr_mpscq_test - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(gpr_spinlock_test - test/core/support/spinlock_test.c -) - - -target_include_directories(gpr_spinlock_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gpr_spinlock_test - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(gpr_stack_lockfree_test - test/core/support/stack_lockfree_test.c -) - - -target_include_directories(gpr_stack_lockfree_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gpr_stack_lockfree_test - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(gpr_string_test - test/core/support/string_test.c -) - - -target_include_directories(gpr_string_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gpr_string_test - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(gpr_sync_test - test/core/support/sync_test.c -) - - -target_include_directories(gpr_sync_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gpr_sync_test - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(gpr_thd_test - test/core/support/thd_test.c -) - - -target_include_directories(gpr_thd_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gpr_thd_test - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(gpr_time_test - test/core/support/time_test.c -) - - -target_include_directories(gpr_time_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gpr_time_test - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(gpr_tls_test - test/core/support/tls_test.c -) - - -target_include_directories(gpr_tls_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gpr_tls_test - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(gpr_useful_test - test/core/support/useful_test.c -) - - -target_include_directories(gpr_useful_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(gpr_useful_test - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(grpc_auth_context_test - test/core/security/auth_context_test.c -) - - -target_include_directories(grpc_auth_context_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_auth_context_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(grpc_b64_test - test/core/slice/b64_test.c -) - - -target_include_directories(grpc_b64_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_b64_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(grpc_byte_buffer_reader_test - test/core/surface/byte_buffer_reader_test.c -) - - -target_include_directories(grpc_byte_buffer_reader_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_byte_buffer_reader_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(grpc_channel_args_test - test/core/channel/channel_args_test.c -) - - -target_include_directories(grpc_channel_args_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_channel_args_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(grpc_channel_stack_test - test/core/channel/channel_stack_test.c -) - - -target_include_directories(grpc_channel_stack_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_channel_stack_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(grpc_completion_queue_test - test/core/surface/completion_queue_test.c -) - - -target_include_directories(grpc_completion_queue_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_completion_queue_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(grpc_completion_queue_threading_test - test/core/surface/completion_queue_threading_test.c -) - - -target_include_directories(grpc_completion_queue_threading_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_completion_queue_threading_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) - -add_executable(grpc_create_jwt - test/core/security/create_jwt.c -) - - -target_include_directories(grpc_create_jwt - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_create_jwt - ${_gRPC_SSL_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc - gpr -) - - -if (gRPC_INSTALL) - install(TARGETS grpc_create_jwt EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - -if (gRPC_BUILD_TESTS) - -add_executable(grpc_credentials_test - test/core/security/credentials_test.c -) - - -target_include_directories(grpc_credentials_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_credentials_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(grpc_fetch_oauth2 - test/core/security/fetch_oauth2.c -) - - -target_include_directories(grpc_fetch_oauth2 - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_fetch_oauth2 - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(grpc_invalid_channel_args_test - test/core/surface/invalid_channel_args_test.c -) - - -target_include_directories(grpc_invalid_channel_args_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_invalid_channel_args_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(grpc_json_token_test - test/core/security/json_token_test.c -) - - -target_include_directories(grpc_json_token_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_json_token_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(grpc_jwt_verifier_test - test/core/security/jwt_verifier_test.c -) - - -target_include_directories(grpc_jwt_verifier_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_jwt_verifier_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) - -add_executable(grpc_print_google_default_creds_token - test/core/security/print_google_default_creds_token.c -) - - -target_include_directories(grpc_print_google_default_creds_token - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_print_google_default_creds_token - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc - gpr -) - - -if (gRPC_INSTALL) - install(TARGETS grpc_print_google_default_creds_token EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - -if (gRPC_BUILD_TESTS) - -add_executable(grpc_security_connector_test - test/core/security/security_connector_test.c -) - - -target_include_directories(grpc_security_connector_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_security_connector_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) - -add_executable(grpc_verify_jwt - test/core/security/verify_jwt.c -) - - -target_include_directories(grpc_verify_jwt - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(grpc_verify_jwt - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc - gpr -) - - -if (gRPC_INSTALL) - install(TARGETS grpc_verify_jwt EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX) - -add_executable(handshake_client - test/core/handshake/client_ssl.c -) - - -target_include_directories(handshake_client - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(handshake_client - ${_gRPC_SSL_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX) - -add_executable(handshake_server - test/core/handshake/server_ssl.c -) - - -target_include_directories(handshake_server - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(handshake_server - ${_gRPC_SSL_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(hpack_parser_test - test/core/transport/chttp2/hpack_parser_test.c -) - - -target_include_directories(hpack_parser_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(hpack_parser_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(hpack_table_test - test/core/transport/chttp2/hpack_table_test.c -) - - -target_include_directories(hpack_table_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(hpack_table_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(http_parser_test - test/core/http/parser_test.c -) - - -target_include_directories(http_parser_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(http_parser_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(httpcli_format_request_test - test/core/http/format_request_test.c -) - - -target_include_directories(httpcli_format_request_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(httpcli_format_request_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(httpcli_test - test/core/http/httpcli_test.c -) - - -target_include_directories(httpcli_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(httpcli_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX) - -add_executable(httpscli_test - test/core/http/httpscli_test.c -) - - -target_include_directories(httpscli_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(httpscli_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(init_test - test/core/surface/init_test.c -) - - -target_include_directories(init_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(init_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(invalid_call_argument_test - test/core/end2end/invalid_call_argument_test.c -) - - -target_include_directories(invalid_call_argument_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(invalid_call_argument_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(json_rewrite - test/core/json/json_rewrite.c -) - - -target_include_directories(json_rewrite - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(json_rewrite - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(json_rewrite_test - test/core/json/json_rewrite_test.c -) - - -target_include_directories(json_rewrite_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(json_rewrite_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(json_stream_error_test - test/core/json/json_stream_error_test.c -) - - -target_include_directories(json_stream_error_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(json_stream_error_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(json_test - test/core/json/json_test.c -) - - -target_include_directories(json_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(json_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(lame_client_test - test/core/surface/lame_client_test.c -) - - -target_include_directories(lame_client_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(lame_client_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(lb_policies_test - test/core/client_channel/lb_policies_test.c -) - - -target_include_directories(lb_policies_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(lb_policies_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(load_file_test - test/core/iomgr/load_file_test.c -) - - -target_include_directories(load_file_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(load_file_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(memory_profile_client - test/core/memory_usage/client.c -) - - -target_include_directories(memory_profile_client - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(memory_profile_client - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(memory_profile_server - test/core/memory_usage/server.c -) - - -target_include_directories(memory_profile_server - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(memory_profile_server - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(memory_profile_test - test/core/memory_usage/memory_usage_test.c -) - - -target_include_directories(memory_profile_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(memory_profile_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(message_compress_test - test/core/compression/message_compress_test.c -) - - -target_include_directories(message_compress_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(message_compress_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(minimal_stack_is_minimal_test - test/core/channel/minimal_stack_is_minimal_test.c -) - - -target_include_directories(minimal_stack_is_minimal_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(minimal_stack_is_minimal_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(mlog_test - test/core/census/mlog_test.c -) - - -target_include_directories(mlog_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(mlog_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(multiple_server_queues_test - test/core/end2end/multiple_server_queues_test.c -) - - -target_include_directories(multiple_server_queues_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(multiple_server_queues_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(murmur_hash_test - test/core/support/murmur_hash_test.c -) - - -target_include_directories(murmur_hash_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(murmur_hash_test - ${_gRPC_ALLTARGETS_LIBRARIES} - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(no_server_test - test/core/end2end/no_server_test.c -) - - -target_include_directories(no_server_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(no_server_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(num_external_connectivity_watchers_test - test/core/surface/num_external_connectivity_watchers_test.c -) - - -target_include_directories(num_external_connectivity_watchers_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(num_external_connectivity_watchers_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(parse_address_test - test/core/client_channel/parse_address_test.c -) - - -target_include_directories(parse_address_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(parse_address_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(percent_encoding_test - test/core/slice/percent_encoding_test.c -) - - -target_include_directories(percent_encoding_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(percent_encoding_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX) - -add_executable(pollset_set_test - test/core/iomgr/pollset_set_test.c -) - - -target_include_directories(pollset_set_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(pollset_set_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(resolve_address_posix_test - test/core/iomgr/resolve_address_posix_test.c -) - - -target_include_directories(resolve_address_posix_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(resolve_address_posix_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(resolve_address_test - test/core/iomgr/resolve_address_test.c -) - - -target_include_directories(resolve_address_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(resolve_address_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(resource_quota_test - test/core/iomgr/resource_quota_test.c -) - - -target_include_directories(resource_quota_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(resource_quota_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(secure_channel_create_test - test/core/surface/secure_channel_create_test.c -) - - -target_include_directories(secure_channel_create_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(secure_channel_create_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(secure_endpoint_test - test/core/security/secure_endpoint_test.c -) - - -target_include_directories(secure_endpoint_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(secure_endpoint_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(sequential_connectivity_test - test/core/surface/sequential_connectivity_test.c -) - - -target_include_directories(sequential_connectivity_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(sequential_connectivity_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(server_chttp2_test - test/core/surface/server_chttp2_test.c -) - - -target_include_directories(server_chttp2_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(server_chttp2_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(server_test - test/core/surface/server_test.c -) - - -target_include_directories(server_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(server_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(slice_buffer_test - test/core/slice/slice_buffer_test.c -) - - -target_include_directories(slice_buffer_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(slice_buffer_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(slice_hash_table_test - test/core/slice/slice_hash_table_test.c -) - - -target_include_directories(slice_hash_table_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(slice_hash_table_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(slice_string_helpers_test - test/core/slice/slice_string_helpers_test.c -) - - -target_include_directories(slice_string_helpers_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(slice_string_helpers_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(slice_test - test/core/slice/slice_test.c -) - - -target_include_directories(slice_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(slice_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(sockaddr_resolver_test - test/core/client_channel/resolvers/sockaddr_resolver_test.c -) - - -target_include_directories(sockaddr_resolver_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(sockaddr_resolver_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(sockaddr_utils_test - test/core/iomgr/sockaddr_utils_test.c -) - - -target_include_directories(sockaddr_utils_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(sockaddr_utils_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(socket_utils_test - test/core/iomgr/socket_utils_test.c -) - - -target_include_directories(socket_utils_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(socket_utils_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(status_conversion_test - test/core/transport/status_conversion_test.c -) - - -target_include_directories(status_conversion_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(status_conversion_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(stream_compression_test - test/core/compression/stream_compression_test.c -) - - -target_include_directories(stream_compression_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(stream_compression_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(stream_owned_slice_test - test/core/transport/stream_owned_slice_test.c -) - - -target_include_directories(stream_owned_slice_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(stream_owned_slice_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(tcp_client_posix_test - test/core/iomgr/tcp_client_posix_test.c -) - - -target_include_directories(tcp_client_posix_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(tcp_client_posix_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(tcp_client_uv_test - test/core/iomgr/tcp_client_uv_test.c -) - - -target_include_directories(tcp_client_uv_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(tcp_client_uv_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(tcp_posix_test - test/core/iomgr/tcp_posix_test.c -) - - -target_include_directories(tcp_posix_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(tcp_posix_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(tcp_server_posix_test - test/core/iomgr/tcp_server_posix_test.c -) - - -target_include_directories(tcp_server_posix_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(tcp_server_posix_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(tcp_server_uv_test - test/core/iomgr/tcp_server_uv_test.c -) - - -target_include_directories(tcp_server_uv_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(tcp_server_uv_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(time_averaged_stats_test - test/core/iomgr/time_averaged_stats_test.c -) - - -target_include_directories(time_averaged_stats_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(time_averaged_stats_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(timeout_encoding_test - test/core/transport/timeout_encoding_test.c -) - - -target_include_directories(timeout_encoding_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(timeout_encoding_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(timer_heap_test - test/core/iomgr/timer_heap_test.c -) - - -target_include_directories(timer_heap_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(timer_heap_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(timer_list_test - test/core/iomgr/timer_list_test.c -) - - -target_include_directories(timer_list_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(timer_list_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(transport_connectivity_state_test - test/core/transport/connectivity_state_test.c -) - - -target_include_directories(transport_connectivity_state_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(transport_connectivity_state_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(transport_metadata_test - test/core/transport/metadata_test.c -) - - -target_include_directories(transport_metadata_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(transport_metadata_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(transport_pid_controller_test - test/core/transport/pid_controller_test.c -) - - -target_include_directories(transport_pid_controller_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(transport_pid_controller_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(transport_security_test - test/core/tsi/transport_security_test.c -) - - -target_include_directories(transport_security_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(transport_security_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(udp_server_test - test/core/iomgr/udp_server_test.c -) - - -target_include_directories(udp_server_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(udp_server_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(uri_parser_test - test/core/client_channel/uri_parser_test.c -) - - -target_include_directories(uri_parser_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(uri_parser_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(wakeup_fd_cv_test - test/core/iomgr/wakeup_fd_cv_test.c -) - - -target_include_directories(wakeup_fd_cv_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(wakeup_fd_cv_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(alarm_cpp_test - test/cpp/common/alarm_cpp_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(alarm_cpp_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(alarm_cpp_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(async_end2end_test - test/cpp/end2end/async_end2end_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(async_end2end_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(async_end2end_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(auth_property_iterator_test - test/cpp/common/auth_property_iterator_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(auth_property_iterator_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(auth_property_iterator_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(bm_arena - test/cpp/microbenchmarks/bm_arena.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(bm_arena - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(bm_arena - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_benchmark - benchmark - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(bm_call_create - test/cpp/microbenchmarks/bm_call_create.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(bm_call_create - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(bm_call_create - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_benchmark - benchmark - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(bm_chttp2_hpack - test/cpp/microbenchmarks/bm_chttp2_hpack.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(bm_chttp2_hpack - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(bm_chttp2_hpack - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_benchmark - benchmark - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(bm_chttp2_transport - test/cpp/microbenchmarks/bm_chttp2_transport.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(bm_chttp2_transport - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(bm_chttp2_transport - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_benchmark - benchmark - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(bm_closure - test/cpp/microbenchmarks/bm_closure.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(bm_closure - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(bm_closure - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_benchmark - benchmark - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(bm_cq - test/cpp/microbenchmarks/bm_cq.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(bm_cq - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(bm_cq - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_benchmark - benchmark - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(bm_cq_multiple_threads - test/cpp/microbenchmarks/bm_cq_multiple_threads.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(bm_cq_multiple_threads - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(bm_cq_multiple_threads - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_benchmark - benchmark - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(bm_error - test/cpp/microbenchmarks/bm_error.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(bm_error - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(bm_error - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_benchmark - benchmark - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(bm_fullstack_streaming_ping_pong - test/cpp/microbenchmarks/bm_fullstack_streaming_ping_pong.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(bm_fullstack_streaming_ping_pong - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(bm_fullstack_streaming_ping_pong - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_benchmark - benchmark - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(bm_fullstack_streaming_pump - test/cpp/microbenchmarks/bm_fullstack_streaming_pump.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(bm_fullstack_streaming_pump - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(bm_fullstack_streaming_pump - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_benchmark - benchmark - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(bm_fullstack_trickle - test/cpp/microbenchmarks/bm_fullstack_trickle.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(bm_fullstack_trickle - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(bm_fullstack_trickle - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_benchmark - benchmark - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(bm_fullstack_unary_ping_pong - test/cpp/microbenchmarks/bm_fullstack_unary_ping_pong.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(bm_fullstack_unary_ping_pong - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(bm_fullstack_unary_ping_pong - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_benchmark - benchmark - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(bm_metadata - test/cpp/microbenchmarks/bm_metadata.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(bm_metadata - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(bm_metadata - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_benchmark - benchmark - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(bm_pollset - test/cpp/microbenchmarks/bm_pollset.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(bm_pollset - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(bm_pollset - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_benchmark - benchmark - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(channel_arguments_test - test/cpp/common/channel_arguments_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(channel_arguments_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(channel_arguments_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++ - grpc - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(channel_filter_test - test/cpp/common/channel_filter_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(channel_filter_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(channel_filter_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++ - grpc - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(cli_call_test - test/cpp/util/cli_call_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(cli_call_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(cli_call_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_cli_libs - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(client_crash_test - test/cpp/end2end/client_crash_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(client_crash_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(client_crash_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(client_crash_test_server - test/cpp/end2end/client_crash_test_server.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(client_crash_test_server - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(client_crash_test_server - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(client_lb_end2end_test - test/cpp/end2end/client_lb_end2end_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(client_lb_end2end_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(client_lb_end2end_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(codegen_test_full - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/control.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/control.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/control.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/control.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/payloads.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/payloads.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/payloads.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/payloads.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/services.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/services.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/services.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/services.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/stats.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/stats.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/stats.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/stats.grpc.pb.h - test/cpp/codegen/codegen_test_full.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/control.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/messages.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/payloads.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/services.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/stats.proto -) - -target_include_directories(codegen_test_full - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(codegen_test_full - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++ - grpc - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(codegen_test_minimal - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/control.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/control.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/control.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/control.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/payloads.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/payloads.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/payloads.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/payloads.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/services.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/services.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/services.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/services.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/stats.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/stats.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/stats.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/stats.grpc.pb.h - test/cpp/codegen/codegen_test_minimal.cc - src/cpp/codegen/codegen_init.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/control.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/messages.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/payloads.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/services.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/stats.proto -) - -target_include_directories(codegen_test_minimal - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(codegen_test_minimal - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(credentials_test - test/cpp/client/credentials_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(credentials_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(credentials_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++ - grpc - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(cxx_byte_buffer_test - test/cpp/util/byte_buffer_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(cxx_byte_buffer_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(cxx_byte_buffer_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(cxx_slice_test - test/cpp/util/slice_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(cxx_slice_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(cxx_slice_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(cxx_string_ref_test - test/cpp/util/string_ref_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(cxx_string_ref_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(cxx_string_ref_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++ - grpc - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(cxx_time_test - test/cpp/util/time_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(cxx_time_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(cxx_time_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(end2end_test - test/cpp/end2end/end2end_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(end2end_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(end2end_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(error_details_test - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_messages.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_messages.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_messages.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_messages.grpc.pb.h - test/cpp/util/error_details_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/echo_messages.proto -) - -target_include_directories(error_details_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(error_details_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_error_details - grpc++ - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(filter_end2end_test - test/cpp/end2end/filter_end2end_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(filter_end2end_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(filter_end2end_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(generic_end2end_test - test/cpp/end2end/generic_end2end_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(generic_end2end_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(generic_end2end_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(golden_file_test - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/compiler_test.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/compiler_test.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/compiler_test.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/compiler_test.grpc.pb.h - test/cpp/codegen/golden_file_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/compiler_test.proto -) - -target_include_directories(golden_file_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(golden_file_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++ - grpc - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(grpc_cli - test/cpp/util/grpc_cli.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(grpc_cli - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpc_cli - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_cli_libs - grpc++_proto_reflection_desc_db - grpc++ - grpc - gpr - grpc++_test_config - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) - -add_executable(grpc_cpp_plugin - src/compiler/cpp_plugin.cc -) - - -target_include_directories(grpc_cpp_plugin - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpc_cpp_plugin - ${_gRPC_PROTOBUF_PROTOC_LIBRARIES} - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_plugin_support -) - - -if (gRPC_INSTALL) - install(TARGETS grpc_cpp_plugin EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - - -add_executable(grpc_csharp_plugin - src/compiler/csharp_plugin.cc -) - - -target_include_directories(grpc_csharp_plugin - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpc_csharp_plugin - ${_gRPC_PROTOBUF_PROTOC_LIBRARIES} - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_plugin_support -) - - -if (gRPC_INSTALL) - install(TARGETS grpc_csharp_plugin EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - - -add_executable(grpc_node_plugin - src/compiler/node_plugin.cc -) - - -target_include_directories(grpc_node_plugin - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpc_node_plugin - ${_gRPC_PROTOBUF_PROTOC_LIBRARIES} - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_plugin_support -) - - -if (gRPC_INSTALL) - install(TARGETS grpc_node_plugin EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - - -add_executable(grpc_objective_c_plugin - src/compiler/objective_c_plugin.cc -) - - -target_include_directories(grpc_objective_c_plugin - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpc_objective_c_plugin - ${_gRPC_PROTOBUF_PROTOC_LIBRARIES} - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_plugin_support -) - - -if (gRPC_INSTALL) - install(TARGETS grpc_objective_c_plugin EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - - -add_executable(grpc_php_plugin - src/compiler/php_plugin.cc -) - - -target_include_directories(grpc_php_plugin - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpc_php_plugin - ${_gRPC_PROTOBUF_PROTOC_LIBRARIES} - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_plugin_support -) - - -if (gRPC_INSTALL) - install(TARGETS grpc_php_plugin EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - - -add_executable(grpc_python_plugin - src/compiler/python_plugin.cc -) - - -target_include_directories(grpc_python_plugin - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpc_python_plugin - ${_gRPC_PROTOBUF_PROTOC_LIBRARIES} - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_plugin_support -) - - -if (gRPC_INSTALL) - install(TARGETS grpc_python_plugin EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - - -add_executable(grpc_ruby_plugin - src/compiler/ruby_plugin.cc -) - - -target_include_directories(grpc_ruby_plugin - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpc_ruby_plugin - ${_gRPC_PROTOBUF_PROTOC_LIBRARIES} - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_plugin_support -) - - -if (gRPC_INSTALL) - install(TARGETS grpc_ruby_plugin EXPORT gRPCTargets - RUNTIME DESTINATION ${gRPC_INSTALL_BINDIR} - LIBRARY DESTINATION ${gRPC_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${gRPC_INSTALL_LIBDIR} - ) -endif() - -if (gRPC_BUILD_TESTS) - -add_executable(grpc_tool_test - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_messages.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_messages.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_messages.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_messages.grpc.pb.h - test/cpp/util/grpc_tool_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/echo.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/echo_messages.proto -) - -target_include_directories(grpc_tool_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpc_tool_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_cli_libs - grpc++_proto_reflection_desc_db - grpc++_reflection - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(grpclb_api_test - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/lb/v1/load_balancer.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/lb/v1/load_balancer.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/lb/v1/load_balancer.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/lb/v1/load_balancer.grpc.pb.h - test/cpp/grpclb/grpclb_api_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - -protobuf_generate_grpc_cpp( - src/proto/grpc/lb/v1/load_balancer.proto -) - -target_include_directories(grpclb_api_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpclb_api_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(grpclb_end2end_test - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/lb/v1/load_balancer.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/lb/v1/load_balancer.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/lb/v1/load_balancer.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/lb/v1/load_balancer.grpc.pb.h - test/cpp/end2end/grpclb_end2end_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - -protobuf_generate_grpc_cpp( - src/proto/grpc/lb/v1/load_balancer.proto -) - -target_include_directories(grpclb_end2end_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpclb_end2end_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(grpclb_test - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/lb/v1/load_balancer.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/lb/v1/load_balancer.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/lb/v1/load_balancer.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/lb/v1/load_balancer.grpc.pb.h - test/cpp/grpclb/grpclb_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - -protobuf_generate_grpc_cpp( - src/proto/grpc/lb/v1/load_balancer.proto -) - -target_include_directories(grpclb_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(grpclb_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(health_service_end2end_test - test/cpp/end2end/health_service_end2end_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(health_service_end2end_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(health_service_end2end_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(http2_client - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(http2_client - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(http2_client - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - http2_client_main - grpc++_test_util - grpc_test_util - grpc++ - grpc - grpc++_test_config - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(hybrid_end2end_test - test/cpp/end2end/hybrid_end2end_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(hybrid_end2end_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(hybrid_end2end_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(interop_client - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(interop_client - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(interop_client - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - interop_client_main - interop_client_helper - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - grpc++_test_config - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(interop_server - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(interop_server - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(interop_server - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - interop_server_main - interop_server_helper - interop_server_lib - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - grpc++_test_config - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(interop_test - test/cpp/interop/interop_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(interop_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(interop_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr - grpc++_test_config - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(json_run_localhost - test/cpp/qps/json_run_localhost.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(json_run_localhost - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(json_run_localhost - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - grpc++_test_config - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(memory_test - test/core/support/memory_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(memory_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(memory_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(metrics_client - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/metrics.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/metrics.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/metrics.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/metrics.grpc.pb.h - test/cpp/interop/metrics_client.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/metrics.proto -) - -target_include_directories(metrics_client - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(metrics_client - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++ - grpc - gpr - grpc++_test_config - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(mock_test - test/cpp/end2end/mock_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(mock_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(mock_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(noop-benchmark - test/cpp/microbenchmarks/noop-benchmark.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(noop-benchmark - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(noop-benchmark - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - benchmark - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(proto_server_reflection_test - test/cpp/end2end/proto_server_reflection_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(proto_server_reflection_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(proto_server_reflection_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_proto_reflection_desc_db - grpc++_reflection - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(proto_utils_test - test/cpp/codegen/proto_utils_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(proto_utils_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(proto_utils_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++ - grpc - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(qps_interarrival_test - test/cpp/qps/qps_interarrival_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(qps_interarrival_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(qps_interarrival_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - qps - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - grpc++_test_config - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(qps_json_driver - test/cpp/qps/qps_json_driver.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(qps_json_driver - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(qps_json_driver - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - qps - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - grpc++_test_config - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(qps_openloop_test - test/cpp/qps/qps_openloop_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(qps_openloop_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(qps_openloop_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - qps - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - grpc++_test_config - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(qps_worker - test/cpp/qps/worker.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(qps_worker - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(qps_worker - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - qps - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - grpc++_test_config - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(reconnect_interop_client - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.grpc.pb.h - test/cpp/interop/reconnect_interop_client.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/empty.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/messages.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/test.proto -) - -target_include_directories(reconnect_interop_client - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(reconnect_interop_client - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - grpc++_test_config - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(reconnect_interop_server - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.grpc.pb.h - test/cpp/interop/reconnect_interop_server.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/empty.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/messages.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/test.proto -) - -target_include_directories(reconnect_interop_server - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(reconnect_interop_server - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - reconnect_server - test_tcp_server - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - grpc++_test_config - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(secure_auth_context_test - test/cpp/common/secure_auth_context_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(secure_auth_context_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(secure_auth_context_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(secure_sync_unary_ping_pong_test - test/cpp/qps/secure_sync_unary_ping_pong_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(secure_sync_unary_ping_pong_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(secure_sync_unary_ping_pong_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - qps - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - grpc++_test_config - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(server_builder_plugin_test - test/cpp/end2end/server_builder_plugin_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(server_builder_plugin_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(server_builder_plugin_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(server_builder_test - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_messages.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_messages.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_messages.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_messages.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo.grpc.pb.h - test/cpp/server/server_builder_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/echo_messages.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/echo.proto -) - -target_include_directories(server_builder_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(server_builder_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - gpr_test_util - grpc++ - grpc - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(server_context_test_spouse_test - test/cpp/test/server_context_test_spouse_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(server_context_test_spouse_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(server_context_test_spouse_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(server_crash_test - test/cpp/end2end/server_crash_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(server_crash_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(server_crash_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(server_crash_test_client - test/cpp/end2end/server_crash_test_client.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(server_crash_test_client - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(server_crash_test_client - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(server_request_call_test - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_messages.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_messages.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_messages.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo_messages.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/echo.grpc.pb.h - test/cpp/server/server_request_call_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/echo_messages.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/echo.proto -) - -target_include_directories(server_request_call_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(server_request_call_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - gpr_test_util - grpc++ - grpc - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(shutdown_test - test/cpp/end2end/shutdown_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(shutdown_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(shutdown_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(status_test - test/cpp/util/status_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(status_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(status_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(streaming_throughput_test - test/cpp/end2end/streaming_throughput_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(streaming_throughput_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(streaming_throughput_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(stress_test - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/empty.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/messages.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/metrics.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/metrics.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/metrics.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/metrics.grpc.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.grpc.pb.cc - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.pb.h - ${_gRPC_PROTO_GENS_DIR}/src/proto/grpc/testing/test.grpc.pb.h - test/cpp/interop/interop_client.cc - test/cpp/interop/stress_interop_client.cc - test/cpp/interop/stress_test.cc - test/cpp/util/metrics_server.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/empty.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/messages.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/metrics.proto -) -protobuf_generate_grpc_cpp( - src/proto/grpc/testing/test.proto -) - -target_include_directories(stress_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(stress_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - grpc++_test_config - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(thread_manager_test - test/cpp/thread_manager/thread_manager_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(thread_manager_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(thread_manager_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++ - grpc - gpr - grpc++_test_config - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(thread_stress_test - test/cpp/end2end/thread_stress_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(thread_stress_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(thread_stress_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(writes_per_rpc_test - test/cpp/performance/writes_per_rpc_test.cc - third_party/googletest/googletest/src/gtest-all.cc - third_party/googletest/googlemock/src/gmock-all.cc -) - - -target_include_directories(writes_per_rpc_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include - PRIVATE third_party/googletest/googletest/include - PRIVATE third_party/googletest/googletest - PRIVATE third_party/googletest/googlemock/include - PRIVATE third_party/googletest/googlemock - PRIVATE ${_gRPC_PROTO_GENS_DIR} -) - -target_link_libraries(writes_per_rpc_test - ${_gRPC_PROTOBUF_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc++_test_util - grpc_test_util - grpc++ - grpc - gpr_test_util - gpr - ${_gRPC_GFLAGS_LIBRARIES} -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(public_headers_must_be_c89 - test/core/surface/public_headers_must_be_c89.c -) - - -target_include_directories(public_headers_must_be_c89 - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(public_headers_must_be_c89 - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(badreq_bad_client_test - test/core/bad_client/tests/badreq.c -) - - -target_include_directories(badreq_bad_client_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(badreq_bad_client_test - ${_gRPC_SSL_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - bad_client_test - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(connection_prefix_bad_client_test - test/core/bad_client/tests/connection_prefix.c -) - - -target_include_directories(connection_prefix_bad_client_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(connection_prefix_bad_client_test - ${_gRPC_SSL_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - bad_client_test - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(head_of_line_blocking_bad_client_test - test/core/bad_client/tests/head_of_line_blocking.c -) - - -target_include_directories(head_of_line_blocking_bad_client_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(head_of_line_blocking_bad_client_test - ${_gRPC_SSL_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - bad_client_test - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(headers_bad_client_test - test/core/bad_client/tests/headers.c -) - - -target_include_directories(headers_bad_client_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(headers_bad_client_test - ${_gRPC_SSL_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - bad_client_test - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(initial_settings_frame_bad_client_test - test/core/bad_client/tests/initial_settings_frame.c -) - - -target_include_directories(initial_settings_frame_bad_client_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(initial_settings_frame_bad_client_test - ${_gRPC_SSL_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - bad_client_test - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(large_metadata_bad_client_test - test/core/bad_client/tests/large_metadata.c -) - - -target_include_directories(large_metadata_bad_client_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(large_metadata_bad_client_test - ${_gRPC_SSL_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - bad_client_test - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(server_registered_method_bad_client_test - test/core/bad_client/tests/server_registered_method.c -) - - -target_include_directories(server_registered_method_bad_client_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(server_registered_method_bad_client_test - ${_gRPC_SSL_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - bad_client_test - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(simple_request_bad_client_test - test/core/bad_client/tests/simple_request.c -) - - -target_include_directories(simple_request_bad_client_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(simple_request_bad_client_test - ${_gRPC_SSL_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - bad_client_test - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(unknown_frame_bad_client_test - test/core/bad_client/tests/unknown_frame.c -) - - -target_include_directories(unknown_frame_bad_client_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(unknown_frame_bad_client_test - ${_gRPC_SSL_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - bad_client_test - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(window_overflow_bad_client_test - test/core/bad_client/tests/window_overflow.c -) - - -target_include_directories(window_overflow_bad_client_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(window_overflow_bad_client_test - ${_gRPC_SSL_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - bad_client_test - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(bad_ssl_cert_server - test/core/bad_ssl/servers/cert.c -) - - -target_include_directories(bad_ssl_cert_server - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(bad_ssl_cert_server - ${_gRPC_ALLTARGETS_LIBRARIES} - bad_ssl_test_server - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(bad_ssl_cert_test - test/core/bad_ssl/bad_ssl_test.c -) - - -target_include_directories(bad_ssl_cert_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(bad_ssl_cert_test - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_census_test - test/core/end2end/fixtures/h2_census.c -) - - -target_include_directories(h2_census_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_census_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_tests - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_compress_test - test/core/end2end/fixtures/h2_compress.c -) - - -target_include_directories(h2_compress_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_compress_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_tests - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_fakesec_test - test/core/end2end/fixtures/h2_fakesec.c -) - - -target_include_directories(h2_fakesec_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_fakesec_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_tests - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(h2_fd_test - test/core/end2end/fixtures/h2_fd.c -) - - -target_include_directories(h2_fd_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_fd_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_tests - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_full_test - test/core/end2end/fixtures/h2_full.c -) - - -target_include_directories(h2_full_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_full_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_tests - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX) - -add_executable(h2_full+pipe_test - test/core/end2end/fixtures/h2_full+pipe.c -) - - -target_include_directories(h2_full+pipe_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_full+pipe_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_tests - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_full+trace_test - test/core/end2end/fixtures/h2_full+trace.c -) - - -target_include_directories(h2_full+trace_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_full+trace_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_tests - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_full+workarounds_test - test/core/end2end/fixtures/h2_full+workarounds.c -) - - -target_include_directories(h2_full+workarounds_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_full+workarounds_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_tests - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_http_proxy_test - test/core/end2end/fixtures/h2_http_proxy.c -) - - -target_include_directories(h2_http_proxy_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_http_proxy_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_tests - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_load_reporting_test - test/core/end2end/fixtures/h2_load_reporting.c -) - - -target_include_directories(h2_load_reporting_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_load_reporting_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_tests - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_oauth2_test - test/core/end2end/fixtures/h2_oauth2.c -) - - -target_include_directories(h2_oauth2_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_oauth2_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_tests - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_proxy_test - test/core/end2end/fixtures/h2_proxy.c -) - - -target_include_directories(h2_proxy_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_proxy_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_tests - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_sockpair_test - test/core/end2end/fixtures/h2_sockpair.c -) - - -target_include_directories(h2_sockpair_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_sockpair_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_tests - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_sockpair+trace_test - test/core/end2end/fixtures/h2_sockpair+trace.c -) - - -target_include_directories(h2_sockpair+trace_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_sockpair+trace_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_tests - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_sockpair_1byte_test - test/core/end2end/fixtures/h2_sockpair_1byte.c -) - - -target_include_directories(h2_sockpair_1byte_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_sockpair_1byte_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_tests - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_ssl_test - test/core/end2end/fixtures/h2_ssl.c -) - - -target_include_directories(h2_ssl_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_ssl_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_tests - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_ssl_cert_test - test/core/end2end/fixtures/h2_ssl_cert.c -) - - -target_include_directories(h2_ssl_cert_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_ssl_cert_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_tests - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_ssl_proxy_test - test/core/end2end/fixtures/h2_ssl_proxy.c -) - - -target_include_directories(h2_ssl_proxy_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_ssl_proxy_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_tests - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(h2_uds_test - test/core/end2end/fixtures/h2_uds.c -) - - -target_include_directories(h2_uds_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_uds_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_tests - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(inproc_test - test/core/end2end/fixtures/inproc.c -) - - -target_include_directories(inproc_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(inproc_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_tests - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_census_nosec_test - test/core/end2end/fixtures/h2_census.c -) - - -target_include_directories(h2_census_nosec_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_census_nosec_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_nosec_tests - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_compress_nosec_test - test/core/end2end/fixtures/h2_compress.c -) - - -target_include_directories(h2_compress_nosec_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_compress_nosec_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_nosec_tests - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(h2_fd_nosec_test - test/core/end2end/fixtures/h2_fd.c -) - - -target_include_directories(h2_fd_nosec_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_fd_nosec_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_nosec_tests - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_full_nosec_test - test/core/end2end/fixtures/h2_full.c -) - - -target_include_directories(h2_full_nosec_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_full_nosec_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_nosec_tests - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX) - -add_executable(h2_full+pipe_nosec_test - test/core/end2end/fixtures/h2_full+pipe.c -) - - -target_include_directories(h2_full+pipe_nosec_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_full+pipe_nosec_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_nosec_tests - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_full+trace_nosec_test - test/core/end2end/fixtures/h2_full+trace.c -) - - -target_include_directories(h2_full+trace_nosec_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_full+trace_nosec_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_nosec_tests - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_full+workarounds_nosec_test - test/core/end2end/fixtures/h2_full+workarounds.c -) - - -target_include_directories(h2_full+workarounds_nosec_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_full+workarounds_nosec_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_nosec_tests - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_http_proxy_nosec_test - test/core/end2end/fixtures/h2_http_proxy.c -) - - -target_include_directories(h2_http_proxy_nosec_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_http_proxy_nosec_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_nosec_tests - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_load_reporting_nosec_test - test/core/end2end/fixtures/h2_load_reporting.c -) - - -target_include_directories(h2_load_reporting_nosec_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_load_reporting_nosec_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_nosec_tests - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_proxy_nosec_test - test/core/end2end/fixtures/h2_proxy.c -) - - -target_include_directories(h2_proxy_nosec_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_proxy_nosec_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_nosec_tests - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_sockpair_nosec_test - test/core/end2end/fixtures/h2_sockpair.c -) - - -target_include_directories(h2_sockpair_nosec_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_sockpair_nosec_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_nosec_tests - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_sockpair+trace_nosec_test - test/core/end2end/fixtures/h2_sockpair+trace.c -) - - -target_include_directories(h2_sockpair+trace_nosec_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_sockpair+trace_nosec_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_nosec_tests - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(h2_sockpair_1byte_nosec_test - test/core/end2end/fixtures/h2_sockpair_1byte.c -) - - -target_include_directories(h2_sockpair_1byte_nosec_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_sockpair_1byte_nosec_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_nosec_tests - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) -if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX) - -add_executable(h2_uds_nosec_test - test/core/end2end/fixtures/h2_uds.c -) - - -target_include_directories(h2_uds_nosec_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(h2_uds_nosec_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_nosec_tests - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif() -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(inproc_nosec_test - test/core/end2end/fixtures/inproc.c -) - - -target_include_directories(inproc_nosec_test - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(inproc_nosec_test - ${_gRPC_ALLTARGETS_LIBRARIES} - end2end_nosec_tests - grpc_test_util_unsecure - grpc_unsecure - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(api_fuzzer_one_entry - test/core/end2end/fuzzers/api_fuzzer.c - test/core/util/one_corpus_entry_fuzzer.c -) - - -target_include_directories(api_fuzzer_one_entry - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(api_fuzzer_one_entry - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(client_fuzzer_one_entry - test/core/end2end/fuzzers/client_fuzzer.c - test/core/util/one_corpus_entry_fuzzer.c -) - - -target_include_directories(client_fuzzer_one_entry - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(client_fuzzer_one_entry - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(hpack_parser_fuzzer_test_one_entry - test/core/transport/chttp2/hpack_parser_fuzzer_test.c - test/core/util/one_corpus_entry_fuzzer.c -) - - -target_include_directories(hpack_parser_fuzzer_test_one_entry - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(hpack_parser_fuzzer_test_one_entry - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(http_request_fuzzer_test_one_entry - test/core/http/request_fuzzer.c - test/core/util/one_corpus_entry_fuzzer.c -) - - -target_include_directories(http_request_fuzzer_test_one_entry - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(http_request_fuzzer_test_one_entry - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(http_response_fuzzer_test_one_entry - test/core/http/response_fuzzer.c - test/core/util/one_corpus_entry_fuzzer.c -) - - -target_include_directories(http_response_fuzzer_test_one_entry - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(http_response_fuzzer_test_one_entry - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(json_fuzzer_test_one_entry - test/core/json/fuzzer.c - test/core/util/one_corpus_entry_fuzzer.c -) - - -target_include_directories(json_fuzzer_test_one_entry - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(json_fuzzer_test_one_entry - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(nanopb_fuzzer_response_test_one_entry - test/core/nanopb/fuzzer_response.c - test/core/util/one_corpus_entry_fuzzer.c -) - - -target_include_directories(nanopb_fuzzer_response_test_one_entry - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(nanopb_fuzzer_response_test_one_entry - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(nanopb_fuzzer_serverlist_test_one_entry - test/core/nanopb/fuzzer_serverlist.c - test/core/util/one_corpus_entry_fuzzer.c -) - - -target_include_directories(nanopb_fuzzer_serverlist_test_one_entry - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(nanopb_fuzzer_serverlist_test_one_entry - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(percent_decode_fuzzer_one_entry - test/core/slice/percent_decode_fuzzer.c - test/core/util/one_corpus_entry_fuzzer.c -) - - -target_include_directories(percent_decode_fuzzer_one_entry - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(percent_decode_fuzzer_one_entry - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(percent_encode_fuzzer_one_entry - test/core/slice/percent_encode_fuzzer.c - test/core/util/one_corpus_entry_fuzzer.c -) - - -target_include_directories(percent_encode_fuzzer_one_entry - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(percent_encode_fuzzer_one_entry - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(server_fuzzer_one_entry - test/core/end2end/fuzzers/server_fuzzer.c - test/core/util/one_corpus_entry_fuzzer.c -) - - -target_include_directories(server_fuzzer_one_entry - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(server_fuzzer_one_entry - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(ssl_server_fuzzer_one_entry - test/core/security/ssl_server_fuzzer.c - test/core/util/one_corpus_entry_fuzzer.c -) - - -target_include_directories(ssl_server_fuzzer_one_entry - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(ssl_server_fuzzer_one_entry - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) -if (gRPC_BUILD_TESTS) - -add_executable(uri_fuzzer_test_one_entry - test/core/client_channel/uri_fuzzer_test.c - test/core/util/one_corpus_entry_fuzzer.c -) - - -target_include_directories(uri_fuzzer_test_one_entry - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE ${BORINGSSL_ROOT_DIR}/include - PRIVATE ${PROTOBUF_ROOT_DIR}/src - PRIVATE ${BENCHMARK_ROOT_DIR}/include - PRIVATE ${ZLIB_ROOT_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib - PRIVATE ${CARES_BUILD_INCLUDE_DIR} - PRIVATE ${CARES_INCLUDE_DIR} - PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares - PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include -) - -target_link_libraries(uri_fuzzer_test_one_entry - ${_gRPC_ALLTARGETS_LIBRARIES} - grpc_test_util - grpc - gpr_test_util - gpr -) - -endif (gRPC_BUILD_TESTS) - - - - - - - -if (gRPC_INSTALL) - install(EXPORT gRPCTargets - DESTINATION ${gRPC_INSTALL_CMAKEDIR} - NAMESPACE gRPC:: - ) -endif() - -foreach(_config gRPCConfig gRPCConfigVersion) - configure_file(tools/cmake/${_config}.cmake.in - ${_config}.cmake @ONLY) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${_config}.cmake - DESTINATION ${gRPC_INSTALL_CMAKEDIR} - ) -endforeach() diff --git a/tensorflow/contrib/verbs/grpc_verbs_service_impl.cc b/tensorflow/contrib/verbs/grpc_verbs_service_impl.cc index cff765d1e8..991f9a9d8b 100644 --- a/tensorflow/contrib/verbs/grpc_verbs_service_impl.cc +++ b/tensorflow/contrib/verbs/grpc_verbs_service_impl.cc @@ -43,22 +43,21 @@ VerbsService::Stub::Stub( const std::shared_ptr< ::grpc::ChannelInterface>& channel) : channel_(channel), rpcmethod_GetRemoteAddress_(grpcVerbsService_method_names[0], - ::grpc::RpcMethod::NORMAL_RPC, + ::grpc::internal::RpcMethod::NORMAL_RPC, channel) {} ::grpc::Status VerbsService::Stub::GetRemoteAddress( ::grpc::ClientContext* context, const GetRemoteAddressRequest& request, GetRemoteAddressResponse* response) { - return ::grpc::BlockingUnaryCall( + return ::grpc::internal::BlockingUnaryCall( channel_.get(), rpcmethod_GetRemoteAddress_, context, request, response); } VerbsService::AsyncService::AsyncService() { for (int i = 0; i < 1; ++i) { - AddMethod(new ::grpc::RpcServiceMethod( + AddMethod(new ::grpc::internal::RpcServiceMethod( grpcVerbsService_method_names[i], - ::grpc::RpcMethod::NORMAL_RPC, - nullptr)); + ::grpc::internal::RpcMethod::NORMAL_RPC, nullptr)); ::grpc::Service::MarkMethodAsync(i); } } diff --git a/tensorflow/contrib/verbs/grpc_verbs_service_impl.h b/tensorflow/contrib/verbs/grpc_verbs_service_impl.h index 6e2bf86dac..86431ca030 100644 --- a/tensorflow/contrib/verbs/grpc_verbs_service_impl.h +++ b/tensorflow/contrib/verbs/grpc_verbs_service_impl.h @@ -28,15 +28,6 @@ limitations under the License. #include "tensorflow/contrib/verbs/verbs_service.pb.h" namespace grpc { - -// ensure internal namespace exists -namespace internal { -// bring in contents of external namespace -using namespace ::grpc; -} // namespace internal -// bring in contents of internal namespace -using namespace internal; - class CompletionQueue; class Channel; class RpcService; @@ -70,7 +61,7 @@ class VerbsService GRPC_FINAL { private: std::shared_ptr< ::grpc::ChannelInterface> channel_; - const ::grpc::RpcMethod rpcmethod_GetRemoteAddress_; + const ::grpc::internal::RpcMethod rpcmethod_GetRemoteAddress_; }; static std::unique_ptr NewStub( const std::shared_ptr< ::grpc::ChannelInterface>& channel, diff --git a/tensorflow/core/distributed_runtime/rpc/BUILD b/tensorflow/core/distributed_runtime/rpc/BUILD index 51e499d3f5..80640c806d 100644 --- a/tensorflow/core/distributed_runtime/rpc/BUILD +++ b/tensorflow/core/distributed_runtime/rpc/BUILD @@ -200,7 +200,6 @@ cc_library( srcs = ["grpc_worker_service_impl.cc"], hdrs = ["grpc_worker_service_impl.h"], deps = [ - ":grpc_namespace_compat", ":grpc_serialization_traits", "//tensorflow/core:worker_proto_cc", "//tensorflow/core/distributed_runtime:tensor_coding", @@ -247,22 +246,12 @@ cc_library( srcs = ["grpc_master_service_impl.cc"], hdrs = ["grpc_master_service_impl.h"], deps = [ - ":grpc_namespace_compat", ":grpc_serialization_traits", "//tensorflow/core:master_proto_cc", "@grpc//:grpc++_unsecure", ], ) -cc_library( - name = "grpc_namespace_compat", - srcs = [], - hdrs = ["grpc_namespace_compat.h"], - deps = [ - "@grpc//:grpc++_unsecure", - ], -) - cc_library( name = "grpc_serialization_traits", srcs = [], diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc b/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc index d998d51058..e2016e824c 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc @@ -49,75 +49,77 @@ MasterService::Stub::Stub( const std::shared_ptr< ::grpc::ChannelInterface>& channel) : channel_(channel), rpcmethod_CreateSession_(grpcMasterService_method_names[0], - ::grpc::RpcMethod::NORMAL_RPC, channel), + ::grpc::internal::RpcMethod::NORMAL_RPC, + channel), rpcmethod_ExtendSession_(grpcMasterService_method_names[1], - ::grpc::RpcMethod::NORMAL_RPC, channel), + ::grpc::internal::RpcMethod::NORMAL_RPC, + channel), rpcmethod_PartialRunSetup_(grpcMasterService_method_names[2], - ::grpc::RpcMethod::NORMAL_RPC, channel), + ::grpc::internal::RpcMethod::NORMAL_RPC, + channel), rpcmethod_RunStep_(grpcMasterService_method_names[3], - ::grpc::RpcMethod::NORMAL_RPC, channel), + ::grpc::internal::RpcMethod::NORMAL_RPC, channel), rpcmethod_CloseSession_(grpcMasterService_method_names[4], - ::grpc::RpcMethod::NORMAL_RPC, channel), + ::grpc::internal::RpcMethod::NORMAL_RPC, channel), rpcmethod_ListDevices_(grpcMasterService_method_names[5], - ::grpc::RpcMethod::NORMAL_RPC, channel), + ::grpc::internal::RpcMethod::NORMAL_RPC, channel), rpcmethod_Reset_(grpcMasterService_method_names[6], - ::grpc::RpcMethod::NORMAL_RPC, channel) {} + ::grpc::internal::RpcMethod::NORMAL_RPC, channel) {} ::grpc::Status MasterService::Stub::CreateSession( ::grpc::ClientContext* context, const CreateSessionRequest& request, CreateSessionResponse* response) { - return ::grpc::BlockingUnaryCall(channel_.get(), rpcmethod_CreateSession_, - context, request, response); + return ::grpc::internal::BlockingUnaryCall( + channel_.get(), rpcmethod_CreateSession_, context, request, response); } ::grpc::Status MasterService::Stub::ExtendSession( ::grpc::ClientContext* context, const ExtendSessionRequest& request, ExtendSessionResponse* response) { - return ::grpc::BlockingUnaryCall(channel_.get(), rpcmethod_ExtendSession_, - context, request, response); + return ::grpc::internal::BlockingUnaryCall( + channel_.get(), rpcmethod_ExtendSession_, context, request, response); } ::grpc::Status MasterService::Stub::PartialRunSetup( ::grpc::ClientContext* context, const PartialRunSetupRequest& request, PartialRunSetupResponse* response) { - return ::grpc::BlockingUnaryCall(channel_.get(), rpcmethod_PartialRunSetup_, - context, request, response); + return ::grpc::internal::BlockingUnaryCall( + channel_.get(), rpcmethod_PartialRunSetup_, context, request, response); } ::grpc::Status MasterService::Stub::RunStep(::grpc::ClientContext* context, const RunStepRequest& request, RunStepResponse* response) { - return ::grpc::BlockingUnaryCall(channel_.get(), rpcmethod_RunStep_, context, - request, response); + return ::grpc::internal::BlockingUnaryCall(channel_.get(), rpcmethod_RunStep_, + context, request, response); } ::grpc::Status MasterService::Stub::CloseSession( ::grpc::ClientContext* context, const CloseSessionRequest& request, CloseSessionResponse* response) { - return ::grpc::BlockingUnaryCall(channel_.get(), rpcmethod_CloseSession_, - context, request, response); + return ::grpc::internal::BlockingUnaryCall( + channel_.get(), rpcmethod_CloseSession_, context, request, response); } ::grpc::Status MasterService::Stub::ListDevices( ::grpc::ClientContext* context, const ListDevicesRequest& request, ListDevicesResponse* response) { - return ::grpc::BlockingUnaryCall(channel_.get(), rpcmethod_ListDevices_, - context, request, response); + return ::grpc::internal::BlockingUnaryCall( + channel_.get(), rpcmethod_ListDevices_, context, request, response); } ::grpc::Status MasterService::Stub::Reset(::grpc::ClientContext* context, const ResetRequest& request, ResetResponse* response) { - return ::grpc::BlockingUnaryCall(channel_.get(), rpcmethod_Reset_, context, - request, response); + return ::grpc::internal::BlockingUnaryCall(channel_.get(), rpcmethod_Reset_, + context, request, response); } MasterService::AsyncService::AsyncService() { for (int i = 0; i < 7; ++i) { - AddMethod(new ::grpc::RpcServiceMethod( + AddMethod(new ::grpc::internal::RpcServiceMethod( grpcMasterService_method_names[i], - ::grpc::RpcMethod::NORMAL_RPC, - nullptr)); + ::grpc::internal::RpcMethod::NORMAL_RPC, nullptr)); ::grpc::Service::MarkMethodAsync(i); } } diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.h b/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.h index 131de2863f..412395c526 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.h @@ -25,7 +25,6 @@ limitations under the License. #include "grpc++/impl/codegen/stub_options.h" #include "grpc++/impl/codegen/sync_stream.h" -#include "tensorflow/core/distributed_runtime/rpc/grpc_namespace_compat.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_serialization_traits.h" #include "tensorflow/core/protobuf/master.pb.h" @@ -108,13 +107,13 @@ class MasterService final { private: std::shared_ptr< ::grpc::ChannelInterface> channel_; - const ::grpc::RpcMethod rpcmethod_CreateSession_; - const ::grpc::RpcMethod rpcmethod_ExtendSession_; - const ::grpc::RpcMethod rpcmethod_PartialRunSetup_; - const ::grpc::RpcMethod rpcmethod_RunStep_; - const ::grpc::RpcMethod rpcmethod_CloseSession_; - const ::grpc::RpcMethod rpcmethod_ListDevices_; - const ::grpc::RpcMethod rpcmethod_Reset_; + const ::grpc::internal::RpcMethod rpcmethod_CreateSession_; + const ::grpc::internal::RpcMethod rpcmethod_ExtendSession_; + const ::grpc::internal::RpcMethod rpcmethod_PartialRunSetup_; + const ::grpc::internal::RpcMethod rpcmethod_RunStep_; + const ::grpc::internal::RpcMethod rpcmethod_CloseSession_; + const ::grpc::internal::RpcMethod rpcmethod_ListDevices_; + const ::grpc::internal::RpcMethod rpcmethod_Reset_; }; static std::unique_ptr NewStub( const std::shared_ptr< ::grpc::ChannelInterface>& channel, diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_namespace_compat.h b/tensorflow/core/distributed_runtime/rpc/grpc_namespace_compat.h deleted file mode 100644 index c178927f5d..0000000000 --- a/tensorflow/core/distributed_runtime/rpc/grpc_namespace_compat.h +++ /dev/null @@ -1,32 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef THIRD_PARTY_TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_RPC_GRPC_NAMESPACE_COMPAT_H_ -#define THIRD_PARTY_TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_RPC_GRPC_NAMESPACE_COMPAT_H_ - -// This file is a transitional place-holder until gRPC versions consistently -// use namespace grpc::internal for library-internal structures - -namespace grpc { -// ensure internal namespace exists -namespace internal { -// bring in contents of external namespace -using namespace ::grpc; -} // namespace internal -// bring in contents of internal namespace -using namespace internal; -} // namespace grpc - -#endif // THIRD_PARTY_TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_RPC_GRPC_NAMESPACE_COMPAT_H_ diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.cc index 80a2f89337..348c6dc98b 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.cc @@ -58,9 +58,9 @@ namespace grpc { WorkerService::AsyncService::AsyncService() { for (int i = 0; i < kGrpcNumWorkerMethods; ++i) { - AddMethod(new ::grpc::RpcServiceMethod( + AddMethod(new ::grpc::internal::RpcServiceMethod( GrpcWorkerMethodName(static_cast(i)), - ::grpc::RpcMethod::NORMAL_RPC, nullptr)); + ::grpc::internal::RpcMethod::NORMAL_RPC, nullptr)); ::grpc::Service::MarkMethodAsync(i); } } diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.h b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.h index c8a8b5778e..e9862a61a3 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.h @@ -26,7 +26,6 @@ limitations under the License. #include "grpc++/impl/codegen/sync_stream.h" #include "grpc++/support/byte_buffer.h" -#include "tensorflow/core/distributed_runtime/rpc/grpc_namespace_compat.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_serialization_traits.h" #include "tensorflow/core/distributed_runtime/tensor_coding.h" #include "tensorflow/core/protobuf/worker.pb.h" diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 2c9f067882..19e1deb95d 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -416,6 +416,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): actual = "@protobuf_archive//:protobuf", ) + native.bind( + name = "protobuf_headers", + actual = "@protobuf_archive//:protobuf_headers", + ) + # We need to import the protobuf library under the names com_google_protobuf # and com_google_protobuf_cc to enable proto_library support in bazel. # Unfortunately there is no way to alias http_archives at the moment. @@ -528,15 +533,21 @@ def tf_workspace(path_prefix="", tf_repo_name=""): actual = "@grpc//third_party/nanopb:nanopb", ) - patched_http_archive( + native.http_archive( name = "grpc", urls = [ - "https://mirror.bazel.build/github.com/grpc/grpc/archive/781fd6f6ea03645a520cd5c675da67ab61f87e4b.tar.gz", - # "https://github.com/grpc/grpc/archive/781fd6f6ea03645a520cd5c675da67ab61f87e4b.tar.gz", + # "https://mirror.bazel.build/github.com/grpc/grpc/archive/54e8f37e537794c2d814c1604c1282125f64f093.tar.gz", + "https://github.com/grpc/grpc/archive/54e8f37e537794c2d814c1604c1282125f64f093.tar.gz", ], - sha256 = "2004635e6a078acfac8ffa71738397796be4f8fb72f572cc44ecee5d99511d9f", - strip_prefix = "grpc-781fd6f6ea03645a520cd5c675da67ab61f87e4b", - patch_file = str(Label("//third_party/grpc:grpc.patch")), + sha256 = "c2166b6d96daddf72fe45b2c594210c65ca17ec3c1b2e12089159a9529edb5e4", + strip_prefix = "grpc-54e8f37e537794c2d814c1604c1282125f64f093", + ) + + # gRPC wants the existence of a cares dependence but its contents are not + # actually important since we have set GRPC_ARES=0 in tools/bazel.rc + native.bind( + name = "cares", + actual = "@grpc//third_party/nanopb:nanopb", ) # protobuf expects //external:grpc_cpp_plugin to point to grpc's diff --git a/third_party/grpc/grpc.patch b/third_party/grpc/grpc.patch deleted file mode 100644 index c06d9b8aaf..0000000000 --- a/third_party/grpc/grpc.patch +++ /dev/null @@ -1,105 +0,0 @@ -diff --git a/BUILD b/BUILD -index 6552d5879e..59adb1ce1c 100644 ---- a/BUILD -+++ b/BUILD -@@ -287,6 +287,7 @@ grpc_cc_library( - "grpc++_base_unsecure", - "grpc++_codegen_base", - "grpc++_codegen_base_src", -+ "grpc++_codegen_proto", - "grpc_unsecure", - ], - ) -@@ -1519,13 +1520,13 @@ grpc_cc_library( - - grpc_cc_library( - name = "grpc++_config_proto", -- external_deps = [ -- "protobuf", -- ], - language = "c++", - public_hdrs = [ - "include/grpc++/impl/codegen/config_protobuf.h", - ], -+ deps = [ -+ "@protobuf_archive//:protobuf_headers", -+ ], - ) - - grpc_cc_library( -diff --git a/bazel/grpc_build_system.bzl b/bazel/grpc_build_system.bzl -index f793cae56d..0295adb8ab 100644 ---- a/bazel/grpc_build_system.bzl -+++ b/bazel/grpc_build_system.bzl -@@ -80,7 +80,7 @@ def grpc_cc_test(name, srcs = [], deps = [], external_deps = [], args = [], data - linkopts = ["-pthread"], - ) - --def grpc_cc_binary(name, srcs = [], deps = [], external_deps = [], args = [], data = [], language = "C++", testonly = False, linkshared = False): -+def grpc_cc_binary(name, srcs = [], deps = [], external_deps = [], args = [], data = [], language = "C++", testonly = False, linkshared = False, linkopts = []): - copts = [] - if language.upper() == "C": - copts = ["-std=c99"] -@@ -93,7 +93,7 @@ def grpc_cc_binary(name, srcs = [], deps = [], external_deps = [], args = [], da - linkshared = linkshared, - deps = deps + ["//external:" + dep for dep in external_deps], - copts = copts, -- linkopts = ["-pthread"], -+ linkopts = ["-pthread"] + linkopts, - ) - - def grpc_generate_one_off_targets(): -diff --git a/src/core/plugin_registry/grpc_unsecure_plugin_registry.c b/src/core/plugin_registry/grpc_unsecure_plugin_registry.c -index 7eb599d81a..4cc2e30af4 100644 ---- a/src/core/plugin_registry/grpc_unsecure_plugin_registry.c -+++ b/src/core/plugin_registry/grpc_unsecure_plugin_registry.c -@@ -28,18 +28,12 @@ extern void grpc_client_channel_init(void); - extern void grpc_client_channel_shutdown(void); - extern void grpc_inproc_plugin_init(void); - extern void grpc_inproc_plugin_shutdown(void); --extern void grpc_resolver_dns_ares_init(void); --extern void grpc_resolver_dns_ares_shutdown(void); - extern void grpc_resolver_dns_native_init(void); - extern void grpc_resolver_dns_native_shutdown(void); - extern void grpc_resolver_sockaddr_init(void); - extern void grpc_resolver_sockaddr_shutdown(void); --extern void grpc_resolver_fake_init(void); --extern void grpc_resolver_fake_shutdown(void); - extern void grpc_load_reporting_plugin_init(void); - extern void grpc_load_reporting_plugin_shutdown(void); --extern void grpc_lb_policy_grpclb_init(void); --extern void grpc_lb_policy_grpclb_shutdown(void); - extern void grpc_lb_policy_pick_first_init(void); - extern void grpc_lb_policy_pick_first_shutdown(void); - extern void grpc_lb_policy_round_robin_init(void); -@@ -64,18 +58,12 @@ void grpc_register_built_in_plugins(void) { - grpc_client_channel_shutdown); - grpc_register_plugin(grpc_inproc_plugin_init, - grpc_inproc_plugin_shutdown); -- grpc_register_plugin(grpc_resolver_dns_ares_init, -- grpc_resolver_dns_ares_shutdown); - grpc_register_plugin(grpc_resolver_dns_native_init, - grpc_resolver_dns_native_shutdown); - grpc_register_plugin(grpc_resolver_sockaddr_init, - grpc_resolver_sockaddr_shutdown); -- grpc_register_plugin(grpc_resolver_fake_init, -- grpc_resolver_fake_shutdown); - grpc_register_plugin(grpc_load_reporting_plugin_init, - grpc_load_reporting_plugin_shutdown); -- grpc_register_plugin(grpc_lb_policy_grpclb_init, -- grpc_lb_policy_grpclb_shutdown); - grpc_register_plugin(grpc_lb_policy_pick_first_init, - grpc_lb_policy_pick_first_shutdown); - grpc_register_plugin(grpc_lb_policy_round_robin_init, -diff --git a/test/cpp/util/BUILD b/test/cpp/util/BUILD -index 33240f6f69..d2e1f67f06 100644 ---- a/test/cpp/util/BUILD -+++ b/test/cpp/util/BUILD -@@ -29,6 +29,7 @@ package( - grpc_cc_binary( - name = "testso.so", - srcs = [], -+ linkopts = ['-Wl,--no-undefined'], - linkshared = 1, - deps = ["//:grpc++_unsecure"], - ) diff --git a/tools/bazel.rc b/tools/bazel.rc index ac6766b11b..2d7201ae57 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -19,6 +19,7 @@ build:sycl_asan --define=using_sycl=true --copt -fno-omit-frame-pointer --copt - build --define=use_fast_cpp_protos=true build --define=allow_oversize_protos=true +build --define=grpc_no_ares=true build --spawn_strategy=standalone build --genrule_strategy=standalone -- GitLab From b066496f625930bc00397ad9d000741d724598eb Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 13 Nov 2017 14:35:27 -0800 Subject: [PATCH 0360/1801] Add support for grayscale bmp image (#14296) * Add support for grayscale bmp image This fix tries to address the issue raised in 13942 to support grayscale bmp image. Previously only channels of 3 or 4 are supported in bmp decoding. This fix adds the support to have 1 channel grayscale image. This fix fixes 13942. Signed-off-by: Yong Tang * Add test case for grayscale bmp image decoding Signed-off-by: Yong Tang --- tensorflow/core/kernels/decode_bmp_op.cc | 16 +++++---- .../python/kernel_tests/decode_bmp_op_test.py | 34 +++++++++++++++++++ 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/kernels/decode_bmp_op.cc b/tensorflow/core/kernels/decode_bmp_op.cc index 086369a9f1..cd7956e1cb 100644 --- a/tensorflow/core/kernels/decode_bmp_op.cc +++ b/tensorflow/core/kernels/decode_bmp_op.cc @@ -33,9 +33,10 @@ class DecodeBmpOp : public OpKernel { public: explicit DecodeBmpOp(OpKernelConstruction* context) : OpKernel(context) { OP_REQUIRES_OK(context, context->GetAttr("channels", &channels_)); - OP_REQUIRES( - context, channels_ == 0 || channels_ == 3 || channels_ == 4, - errors::InvalidArgument("channels must be 0, 3 or 4, got ", channels_)); + OP_REQUIRES(context, channels_ == 0 || channels_ == 1 || channels_ == 3 || + channels_ == 4, + errors::InvalidArgument("channels must be 0, 1, 3 or 4, got ", + channels_)); } void Compute(OpKernelContext* context) override { @@ -66,11 +67,11 @@ class DecodeBmpOp : public OpKernel { channels_ = bpp / 8; } - // Current implementation only supports 3 or 4 channel + // Current implementation only supports 1, 3 or 4 channel // bitmaps. - OP_REQUIRES(context, (channels_ == 3 || channels_ == 4), + OP_REQUIRES(context, (channels_ == 1 || channels_ == 3 || channels_ == 4), errors::InvalidArgument( - "Number of channels must be 3 or 4, was ", channels_)); + "Number of channels must be 1, 3 or 4, was ", channels_)); // if height is negative, data layout is top down // otherwise, it's bottom up @@ -117,6 +118,9 @@ uint8* DecodeBmpOp::Decode(const uint8* input, uint8* const output, dst_pos = (i * width + j) * channels; switch (channels) { + case 1: + output[dst_pos] = input[src_pos]; + break; case 3: // BGR -> RGB output[dst_pos] = input[src_pos + 2]; diff --git a/tensorflow/python/kernel_tests/decode_bmp_op_test.py b/tensorflow/python/kernel_tests/decode_bmp_op_test.py index 783492a6f2..e7b472240e 100644 --- a/tensorflow/python/kernel_tests/decode_bmp_op_test.py +++ b/tensorflow/python/kernel_tests/decode_bmp_op_test.py @@ -64,6 +64,40 @@ class DecodeBmpOpTest(test.TestCase): decoded = decode.eval() self.assertAllEqual(decoded, img_bytes) + def testGrayscale(self): + img_bytes = [[[255], [0]], [[255], [0]]] + encoded_bytes = [ + 0x42, 0x40, + 0x3d, 0, 0, 0, + 0, 0, + 0, 0, + 0x36, 0, 0, 0, + 0x28, 0, 0, 0, + 0x2, 0, 0, 0, + 0x2, 0, 0, 0, + 0x1, 0, + 0x8, 0, + 0, 0, 0, 0, + 0x10, 0, 0, 0, + 0x13, 0xb, 0, 0, + 0x13, 0xb, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0xff, + 0, + 0, 0, + 0xff, + 0, + 0, 0, + ] + + byte_string = bytes(bytearray(encoded_bytes)) + img_in = constant_op.constant(byte_string, dtype=dtypes.string) + decode = image_ops.decode_bmp(img_in) + + with self.test_session(): + decoded = decode.eval() + self.assertAllEqual(decoded, img_bytes) if __name__ == "__main__": test.main() -- GitLab From 7d197b3ab4b5389c19ab7742cf5d473d5fa91b08 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 15:00:07 -0800 Subject: [PATCH 0361/1801] Automated g4 rollback of changelist 175563786 PiperOrigin-RevId: 175593063 --- tensorflow/cc/framework/cc_op_gen.cc | 119 +++--------------- tensorflow/core/common_runtime/device_mgr.h | 2 +- .../core/common_runtime/direct_session.cc | 2 +- .../core/common_runtime/direct_session.h | 3 +- tensorflow/core/common_runtime/placer.cc | 4 +- .../common_runtime/step_stats_collector.cc | 4 +- .../distributed_runtime/master_session.cc | 4 +- .../core/framework/variant_op_registry.h | 9 +- tensorflow/core/graph/costmodel.h | 2 +- tensorflow/core/graph/graph_constructor.cc | 6 +- tensorflow/core/graph/quantize_training.cc | 4 +- tensorflow/core/graph/subgraph.h | 2 +- tensorflow/core/lib/core/stringpiece.cc | 2 +- tensorflow/core/lib/core/stringpiece.h | 8 +- .../graph_transforms/fold_constants_lib.cc | 4 +- 15 files changed, 47 insertions(+), 128 deletions(-) diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc index 7a1b2a012d..38a17598b8 100644 --- a/tensorflow/cc/framework/cc_op_gen.cc +++ b/tensorflow/cc/framework/cc_op_gen.cc @@ -297,7 +297,7 @@ string ToCamelCase(const string& str) { // argument to a function. std::pair AttrTypeName(StringPiece attr_type) { static const std::unordered_map, - StringPieceHasher> + StringPiece::Hasher> attr_type_map{ {"string", {"StringPiece", false}}, {"list(string)", {"gtl::ArraySlice", true}}, @@ -325,112 +325,29 @@ std::pair AttrTypeName(StringPiece attr_type) { } bool IsCPPKeyword(StringPiece name) { - static const std::unordered_set + static const std::unordered_set // Keywords obtained from http://en.cppreference.com/w/cpp/keyword kCPPReserved{ - "alignas", - "alignof", - "and", - "and_eq", - "asm", - "atomic_cancel", - "atomic_commit", - "atomic_noexcept", - "auto", - "bitand", - "bitor", - "bool", - "break", - "case", - "catch", - "char", - "char16_t", - "char32_t", - "class", - "compl", - "concept", - "const", - "const_cast", - "constexpr", - "continue", - "decltype", - "default", - "delete", - "do", - "double", - "dynamic_cast", - "else", - "enum", - "explicit", - "export", - "extern", - "false", - "final", - "float", - "for", - "friend", - "goto", - "if", - "import", - "inline", - "int", - "long", - "module", - "mutable", - "namespace", - "new", - "noexcept", - "not", - "not_eq", - "nullptr", - "operator", - "or", - "or_eq", - "override", - "private", - "protected", - "public", - "register", - "reinterpret_cast", - "requires", - "return", - "short", - "signed", - "sizeof", - "static", - "static_assert", - "static_cast", - "struct", - "switch", - "synchronized", - "template", - "this", - "thread_local", - "throw", - "true", - "try", - "typedef", - "typeid", - "typename", - "union", - "unsigned", - "using", - "virtual", - "void", - "volatile", - "wchar_t", - "while", - "xor", - "xor_eq", + "alignas", "alignof", "and", "and_eq", "asm", "atomic_cancel", + "atomic_commit", "atomic_noexcept", "auto", "bitand", "bitor", "bool", + "break", "case", "catch", "char", "char16_t", "char32_t", "class", + "compl", "concept", "const", "const_cast", "constexpr", "continue", + "decltype", "default", "delete", "do", "double", "dynamic_cast", + "else", "enum", "explicit", "export", "extern", "false", "final", + "float", "for", "friend", "goto", "if", "import", "inline", "int", + "long", "module", "mutable", "namespace", "new", "noexcept", "not", + "not_eq", "nullptr", "operator", "or", "or_eq", "override", "private", + "protected", "public", "register", "reinterpret_cast", "requires", + "return", "short", "signed", "sizeof", "static", "static_assert", + "static_cast", "struct", "switch", "synchronized", "template", "this", + "thread_local", "throw", "true", "try", "typedef", "typeid", + "typename", "union", "unsigned", "using", "virtual", "void", + "volatile", "wchar_t", "while", "xor", "xor_eq", // The following are not C++ keywords, but names of local variables // and parameters used in the op constructor. Treating them as // keywords, so that other parameter names don't conflict with these. - "builder", - "node", - "ret", - "scope", - "unique_name", + "builder", "node", "ret", "scope", "unique_name", }; return kCPPReserved.count(name) > 0; } diff --git a/tensorflow/core/common_runtime/device_mgr.h b/tensorflow/core/common_runtime/device_mgr.h index cd93f76324..d16681ac59 100644 --- a/tensorflow/core/common_runtime/device_mgr.h +++ b/tensorflow/core/common_runtime/device_mgr.h @@ -68,7 +68,7 @@ class DeviceMgr { StringPiece CopyToBackingStore(StringPiece s); - std::unordered_map device_map_; + std::unordered_map device_map_; core::Arena name_backing_store_; // Storage for keys in device_map_ std::unordered_map device_type_counts_; diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index d1dc826a6e..316fb0ac16 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -1135,7 +1135,7 @@ Status DirectSession::GetOrCreateExecutors( if (run_state_args->is_partial_run) { ek->graph = std::move(run_state_args->graph); - std::unordered_set names; + std::unordered_set names; for (const string& input : inputs) { TensorId id(ParseTensorName(input)); names.emplace(id.first); diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h index 780d0b46a8..7fbabf6d81 100644 --- a/tensorflow/core/common_runtime/direct_session.h +++ b/tensorflow/core/common_runtime/direct_session.h @@ -64,7 +64,8 @@ class DirectSession : public Session { ~DirectSession() override; typedef std::vector> NamedTensorList; - typedef std::unordered_map NameNodeMap; + typedef std::unordered_map + NameNodeMap; ::tensorflow::Status Create(const GraphDef& graph) override; ::tensorflow::Status Extend(const GraphDef& graph) override; diff --git a/tensorflow/core/common_runtime/placer.cc b/tensorflow/core/common_runtime/placer.cc index 54f082e823..73fdf60fd5 100644 --- a/tensorflow/core/common_runtime/placer.cc +++ b/tensorflow/core/common_runtime/placer.cc @@ -129,7 +129,7 @@ class ColocationGraph { // 'string' values stored in NodeDef attribute lists, as well as StringPiece // values that refer to 'string' values from NodeDef::name(), without // performing any string allocations. - std::unordered_map + std::unordered_map colocation_group_root; for (Node* node : graph_->nodes()) { @@ -171,7 +171,7 @@ class ColocationGraph { } Status ColocateNodeToGroup( - std::unordered_map* + std::unordered_map* colocation_group_root, Node* node, StringPiece colocation_group) { const Node*& root_node = (*colocation_group_root)[colocation_group]; diff --git a/tensorflow/core/common_runtime/step_stats_collector.cc b/tensorflow/core/common_runtime/step_stats_collector.cc index ba8e555f36..e6403df97f 100644 --- a/tensorflow/core/common_runtime/step_stats_collector.cc +++ b/tensorflow/core/common_runtime/step_stats_collector.cc @@ -139,7 +139,7 @@ void StepStatsCollector::BuildCostModel( const DeviceStepStats* hardware_stats; }; - std::unordered_map + std::unordered_map per_device_stats; std::unordered_map gpu_hardware_stats; @@ -179,7 +179,7 @@ void StepStatsCollector::BuildCostModel( CostModel* cm = cost_model_manager->FindOrCreateCostModel(graph); cm->IncrementUpdateTimes(); - std::unordered_map name_to_node; + std::unordered_map name_to_node; for (Node* n : graph->nodes()) { name_to_node.emplace(n->name(), n); } diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc index 7617158d66..f7fce1d0ec 100644 --- a/tensorflow/core/distributed_runtime/master_session.cc +++ b/tensorflow/core/distributed_runtime/master_session.cc @@ -208,7 +208,7 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { const bool is_partial_; const DebugOptions& debug_opts_; WorkerCacheInterface* const worker_cache_; // Not owned. - std::unordered_map name_to_node_; + std::unordered_map name_to_node_; // Graph partitioned into per-location subgraphs. struct Part { @@ -486,7 +486,7 @@ Status MasterSession::ReffedClientGraph::RunPartitions( VLOG(2) << "RunPartitions step_id " << step_id << " execution_count " << execution_count; // Maps the names of fed tensors to their index in `req`. - std::unordered_map feeds(3); + std::unordered_map feeds(3); for (size_t i = 0; i < req.num_feeds(); ++i) { if (!feeds.insert({req.feed_name(i), i}).second) { diff --git a/tensorflow/core/framework/variant_op_registry.h b/tensorflow/core/framework/variant_op_registry.h index 13f6908cae..831dbd3dff 100644 --- a/tensorflow/core/framework/variant_op_registry.h +++ b/tensorflow/core/framework/variant_op_registry.h @@ -145,8 +145,9 @@ class UnaryVariantOpRegistry { static std::unordered_set* PersistentStringStorage(); private: - std::unordered_map shape_fns; - std::unordered_map + std::unordered_map + shape_fns; + std::unordered_map decode_fns; // Map std::pair to function. @@ -158,7 +159,7 @@ class UnaryVariantOpRegistry { ret = Hash64Combine(ret, sp_hasher_(std::get<1>(x))); return ret; } - StringPieceHasher sp_hasher_; + StringPiece::Hasher sp_hasher_; }; std::unordered_map, @@ -176,7 +177,7 @@ class UnaryVariantOpRegistry { ret = Hash64Combine(ret, sp_hasher_(std::get<2>(x))); return ret; } - StringPieceHasher sp_hasher_; + StringPiece::Hasher sp_hasher_; }; std::unordered_map, VariantUnaryOpFn, TupleHash> diff --git a/tensorflow/core/graph/costmodel.h b/tensorflow/core/graph/costmodel.h index 8afa4971ad..a908a4843c 100644 --- a/tensorflow/core/graph/costmodel.h +++ b/tensorflow/core/graph/costmodel.h @@ -30,7 +30,7 @@ limitations under the License. #include "tensorflow/core/platform/protobuf.h" namespace tensorflow { -typedef std::unordered_map +typedef std::unordered_map NodeNameToCostIdMap; class StepStats; diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc index ebaffeb50e..2ee409768b 100644 --- a/tensorflow/core/graph/graph_constructor.cc +++ b/tensorflow/core/graph/graph_constructor.cc @@ -241,13 +241,13 @@ class GraphConstructor { }; // TODO(vrv): Profile this data structure to see if we should use an // alternative implementation of std::unordered_map. - std::unordered_map gdef_nodes_; + std::unordered_map gdef_nodes_; // Mapping from node name to the existing node in g_. - std::unordered_map existing_nodes_; + std::unordered_map existing_nodes_; // Prefixes already used in the graph. - std::unordered_set existing_prefixes_; + std::unordered_set existing_prefixes_; // Imported node names that have been uniquified. The key is the original // name, the value is the new unique name. diff --git a/tensorflow/core/graph/quantize_training.cc b/tensorflow/core/graph/quantize_training.cc index cb0fc8a154..d9cb55f448 100644 --- a/tensorflow/core/graph/quantize_training.cc +++ b/tensorflow/core/graph/quantize_training.cc @@ -42,7 +42,7 @@ const float kEMADecay = 0.999; // Node types to rewrite. Insert quantize_and_dequantize op for their inputs. const auto* nodes_to_rewrite = - new std::unordered_set{"MatMul", "Conv2D"}; + new std::unordered_set{"MatMul", "Conv2D"}; // Contains necessary parameters to convert an edge. struct EdgeToConvert { @@ -563,7 +563,7 @@ Status ProcessTargetEdges(Graph* graph, const string& quant_op_type, const std::vector& target_edges) { // Remember previously converted ops to avoid duplicated conversion on the // same input. - std::unordered_map name_index; + std::unordered_map name_index; std::vector added_variables; for (const EdgeToConvert edge : target_edges) { Node* convert_node; diff --git a/tensorflow/core/graph/subgraph.h b/tensorflow/core/graph/subgraph.h index 3c1f8870f5..8ccc27914b 100644 --- a/tensorflow/core/graph/subgraph.h +++ b/tensorflow/core/graph/subgraph.h @@ -71,7 +71,7 @@ Status RewriteGraphForExecution( const DeviceAttributes& device_info, bool use_function_convention, RewriteGraphMetadata* out_metadata); -typedef std::unordered_map NameIndex; +typedef std::unordered_map NameIndex; // Augment "*g" by adding special "fetch" nodes that connect to the // tensor outputs specified in "fetch_outputs" to retrieve the output diff --git a/tensorflow/core/lib/core/stringpiece.cc b/tensorflow/core/lib/core/stringpiece.cc index 29b727fc44..984f4404ce 100644 --- a/tensorflow/core/lib/core/stringpiece.cc +++ b/tensorflow/core/lib/core/stringpiece.cc @@ -21,7 +21,7 @@ limitations under the License. namespace tensorflow { -size_t StringPieceHasher::operator()(StringPiece s) const { +size_t StringPiece::Hasher::operator()(StringPiece s) const { return Hash64(s.data(), s.size()); } diff --git a/tensorflow/core/lib/core/stringpiece.h b/tensorflow/core/lib/core/stringpiece.h index 53af116465..94f4a377f1 100644 --- a/tensorflow/core/lib/core/stringpiece.h +++ b/tensorflow/core/lib/core/stringpiece.h @@ -103,6 +103,10 @@ class StringPiece { StringPiece substr(size_t pos, size_t n = npos) const; + struct Hasher { + size_t operator()(StringPiece arg) const; + }; + // Return a string that contains the copy of the referenced data. std::string ToString() const { return std::string(data_, size_); } @@ -129,10 +133,6 @@ class StringPiece { // Intentionally copyable }; -struct StringPieceHasher { - size_t operator()(StringPiece s) const; -}; - inline bool operator==(StringPiece x, StringPiece y) { return ((x.size() == y.size()) && (memcmp(x.data(), y.data(), x.size()) == 0)); diff --git a/tensorflow/tools/graph_transforms/fold_constants_lib.cc b/tensorflow/tools/graph_transforms/fold_constants_lib.cc index 250f54e20f..f2934a79bd 100644 --- a/tensorflow/tools/graph_transforms/fold_constants_lib.cc +++ b/tensorflow/tools/graph_transforms/fold_constants_lib.cc @@ -39,9 +39,9 @@ limitations under the License. namespace tensorflow { namespace graph_transforms { namespace { -using StringPieceSet = std::unordered_set; +using StringPieceSet = std::unordered_set; template -using StringPieceMap = std::unordered_map; +using StringPieceMap = std::unordered_map; } // namespace Status ReplaceSendRecvs(const GraphDef& original_graph_def, -- GitLab From 2401b0244816bc728cf67161257d6abe6881a5eb Mon Sep 17 00:00:00 2001 From: Yangzihao Wang Date: Mon, 13 Nov 2017 15:00:29 -0800 Subject: [PATCH 0362/1801] Include float16 benchmarks for Conv2D. PiperOrigin-RevId: 175593114 --- tensorflow/python/ops/conv2d_benchmark.py | 57 ++++++++++++++++------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/tensorflow/python/ops/conv2d_benchmark.py b/tensorflow/python/ops/conv2d_benchmark.py index 6992fa57ea..907df85cd9 100644 --- a/tensorflow/python/ops/conv2d_benchmark.py +++ b/tensorflow/python/ops/conv2d_benchmark.py @@ -22,6 +22,7 @@ import itertools import time from tensorflow.python.client import session as session_lib +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import nn_ops @@ -30,7 +31,8 @@ from tensorflow.python.ops import variables from tensorflow.python.platform import test -def build_graph(device, input_shape, filter_shape, strides, padding, num_iters): +def build_graph(device, input_shape, filter_shape, strides, padding, dtype, + num_iters, warmup_iters): """builds a graph containing a sequence of conv2d operations. Args: @@ -41,14 +43,18 @@ def build_graph(device, input_shape, filter_shape, strides, padding, num_iters): window for each dimension of input. padding: A string from: "SAME", "VALID". The type of padding algorithm to use. + dtype: Data type for the convolution. num_iters: number of iterations to run conv2d. + warmup_iters: number of iterations for warmup runs. Returns: An array of tensors to run() """ with ops.device("/%s:0" % device): - inp = variables.Variable(random_ops.truncated_normal(input_shape)) - filt = variables.Variable(random_ops.truncated_normal(filter_shape)) + inp = variables.Variable( + random_ops.truncated_normal(input_shape, dtype=dtype)) + filt = variables.Variable( + random_ops.truncated_normal(filter_shape, dtype=dtype)) outputs = [] conv2d_op = nn_ops.conv2d(inp, filt, strides, padding, data_format="NHWC") @@ -58,14 +64,25 @@ def build_graph(device, input_shape, filter_shape, strides, padding, num_iters): conv2d_op = nn_ops.conv2d( inp, filt, strides, padding, data_format="NHWC") outputs.append(conv2d_op) - return control_flow_ops.group(*outputs) + + warmup_groups = [] + warmup_conv2d_op = nn_ops.conv2d( + inp, filt, strides, padding, data_format="NHWC") + warmup_groups.append(warmup_conv2d_op) + for _ in range(1, warmup_iters): + with ops.control_dependencies([warmup_conv2d_op]): + warmup_conv2d_op = nn_ops.conv2d( + inp, filt, strides, padding, data_format="NHWC") + warmup_groups.append(warmup_conv2d_op) + return control_flow_ops.group(*warmup_groups), control_flow_ops.group( + *outputs) class Conv2DBenchmark(test.Benchmark): """Benchmark conv2d!""" def _run_graph(self, device, input_shape, filter_shape, strides, padding, - num_iters): + dtype, num_iters, warmup_iters): """runs the graph and print its execution time. Args: @@ -77,43 +94,46 @@ class Conv2DBenchmark(test.Benchmark): padding: A string from: "SAME", "VALID". The type of padding algorithm to use. num_iters: Number of iterations to run the benchmark. + dtype: Data type for the convolution. num_iters: number of iterations to run conv2d. + warmup_iters: number of iterations for warmup runs. Returns: The duration of the run in seconds. """ graph = ops.Graph() with graph.as_default(): - outputs = build_graph(device, input_shape, filter_shape, strides, padding, - num_iters) + warmup_outputs, outputs = build_graph(device, input_shape, filter_shape, + strides, padding, dtype, num_iters, + warmup_iters) with session_lib.Session(graph=graph) as session: variables.global_variables_initializer().run() # warmup runs - session.run(outputs) + session.run(warmup_outputs) start_time = time.time() session.run(outputs) duration = (time.time() - start_time) / num_iters - - print("%s inputshape:%s filtershape:%s strides:%s padding:%s " + print("%s %s inputshape:%s filtershape:%s strides:%s padding:%s " "%d iters: %.8f sec" % - (device, str(input_shape).replace(" ", ""), + (device, str(dtype), str(input_shape).replace(" ", ""), str(filter_shape).replace(" ", ""), str(strides).replace(" ", ""), padding, num_iters, duration)) name_template = ( - "conv2d_{device}_input_shape_{inputshape}_filter_shape_{filtershape}_" - "strides_{strides}_padding_{padding}") + "conv2d_{device}_{datatype}_input_shape_{inputshape}_" + "filter_shape_{filtershape}_strides_{strides}_padding_{padding}") self.report_benchmark( name=name_template.format( device=device, + datatype=str(dtype), inputshape=str(input_shape).replace(" ", ""), filtershape=str(filter_shape).replace(" ", ""), strides=str(strides).replace(" ", ""), padding=padding).replace(" ", ""), iters=num_iters, - wall_time=duration / num_iters) + wall_time=duration) return duration @@ -126,15 +146,18 @@ class Conv2DBenchmark(test.Benchmark): fw = 3 input_shapes = [] filter_shapes = [] + data_types = [dtypes.float32, dtypes.float16] for b, c in itertools.product([4, 16, 32], [i for i in range(3, 16)]): input_shapes += [[b, h, w, c]] filter_shapes += [[fh, fw, c, b]] strides = [[1, 2, 2, 1]] paddings = ["VALID", "SAME"] for ishape, fshape in zip(input_shapes, filter_shapes): - for stride in strides: - for padding in paddings: - self._run_graph("gpu", ishape, fshape, stride, padding, 80) + for dtype in data_types: + for stride in strides: + for padding in paddings: + self._run_graph("gpu", ishape, fshape, stride, padding, dtype, 80, + 2) if __name__ == "__main__": -- GitLab From a09c68eafb47e8f26c4f88ee36b03f5c916d967d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 15:49:08 -0800 Subject: [PATCH 0363/1801] Enable ImportGraphDef to import unbound graph when input_map maps all unbound inputs. tf.import_graph_def() in Python allows to import graphs with unbound input when proper input_map is provided, but tensorflow::ImportGraphDef() does not allow importing such graphs. Because of this difference, using the same graph in Python and C++ results in different output (ok vs error). PiperOrigin-RevId: 175600872 --- tensorflow/core/graph/graph_constructor.cc | 73 +++++++++++++------ .../core/graph/graph_constructor_test.cc | 37 ++++++++++ 2 files changed, 87 insertions(+), 23 deletions(-) diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc index 2ee409768b..e45828b7ba 100644 --- a/tensorflow/core/graph/graph_constructor.cc +++ b/tensorflow/core/graph/graph_constructor.cc @@ -447,6 +447,7 @@ Status GraphConstructor::InitFromEdges() { // Parse the inputs for each node. for (int n = 0; n < num_nodes; ++n) { const NodeDef& node_def = *node_defs_[n]; + int pending_count = node_def.input_size(); if (IsMerge(node_def)) { // Cycles in the graph are only allowed for while loops. A while loop is // identified by an edge from a NextIteration node to a Merge node. For @@ -467,28 +468,33 @@ Status GraphConstructor::InitFromEdges() { } } if (has_loop_back_edge) { - pending_count_.push_back(num_control_edges + 1); - } else { - pending_count_.push_back(node_def.input_size()); + pending_count = num_control_edges + 1; } - } else { - pending_count_.push_back(node_def.input_size()); - } - if (node_def.input_size() == 0) { - ready_.push_back(n); - continue; } for (int i = 0; i < node_def.input_size(); ++i) { StringPiece input_name = node_def.input(i); TensorId id(ParseTensorName(input_name)); - auto iter = gdef_nodes_.find(id.first); - if (iter == gdef_nodes_.end()) { - return errors::InvalidArgument("Node '", node_def.name(), - "': Unknown input node '", - node_def.input(i), "'"); + if (opts_.input_map.count(id) == 0) { + // If an input is not mapped, then the input should appear in the graph + // being imported. + auto iter = gdef_nodes_.find(id.first); + if (iter == gdef_nodes_.end()) { + return errors::InvalidArgument("Node '", node_def.name(), + "': Unknown input node '", + node_def.input(i), "'"); + } + outputs_[iter->second.gdef_index].push_back(n); + } else { + // This input is mapped to an existing edge. Therefore this input is + // as good as being already processed. + --pending_count; + DCHECK_GE(pending_count, 0); } - outputs_[iter->second.gdef_index].push_back(n); } + if (pending_count == 0) { + ready_.push_back(n); + } + pending_count_.push_back(pending_count); } return Status::OK(); } @@ -565,15 +571,36 @@ Status GraphConstructor::ValidateShape(Node* node) { const string& op = node->type_string(); const std::vector whitelist = { // To be removed after 2017/03/08. - "RandomShuffleQueue", "PaddingFIFOQueue", "FIFOQueue", - "PriorityQueue", "QueueSize", "Stack", "Barrier", "BarrierReadySize", - "BarrierIncompleteSize", "HashTable", "MutableHashTable", - "MutableHashTableOfTensors", "Mutex", "CuckooTable", "IndexTable", - "WholeFileReader", "TextLineReader", "FixedLengthRecordReader", - "TFRecordReader", "IdentityReader", "RefSwitch", "RefEnter", - "RefNextIteration", "RefMerge", "RefIdentity", "LMDBReader", + "RandomShuffleQueue", + "PaddingFIFOQueue", + "FIFOQueue", + "PriorityQueue", + "QueueSize", + "Stack", + "Barrier", + "BarrierReadySize", + "BarrierIncompleteSize", + "HashTable", + "MutableHashTable", + "MutableHashTableOfTensors", + "Mutex", + "CuckooTable", + "IndexTable", + "WholeFileReader", + "TextLineReader", + "FixedLengthRecordReader", + "TFRecordReader", + "IdentityReader", + "RefSwitch", + "RefEnter", + "RefNextIteration", + "RefMerge", + "RefIdentity", + "LMDBReader", // To be removed after 2017/04/24. - "ConditionalAccumulator", "SparseConditionalAccumulator", "Table", + "ConditionalAccumulator", + "SparseConditionalAccumulator", + "Table", }; if (std::find(whitelist.begin(), whitelist.end(), op) == whitelist.end()) { diff --git a/tensorflow/core/graph/graph_constructor_test.cc b/tensorflow/core/graph/graph_constructor_test.cc index 893826da3e..0f88c80b85 100644 --- a/tensorflow/core/graph/graph_constructor_test.cc +++ b/tensorflow/core/graph/graph_constructor_test.cc @@ -1475,6 +1475,43 @@ TEST_F(GraphConstructorTest, ImportGraphDef_InputMapUnusedKeys) { EXPECT_EQ(results.unused_input_map_keys, expected_unused_keys); } +TEST_F(GraphConstructorTest, ImportGraphDef_InputMapWithUnboundInput) { + ShapeRefiner refiner(TF_GRAPH_DEF_VERSION, graph_.op_registry()); + + // Populate graph with node we'll use in input map + ExpectOK("node { name: 'input' op: 'TestInput' }", ImportGraphDefOptions(), + &refiner); + + // Create input_map and use it to import more nodes + ImportGraphDefOptions opts; + opts.input_map[TensorId("new_input", 0)] = TensorId("input", 1); + opts.input_map[TensorId("new_input", 1)] = TensorId("input", 0); + + // new_input exists in input_map but not in the graph being imported. + ExpectOK( + R"EOF( + node { name: 't1' op: 'TestMul' input: [ 'new_input:0', 'new_input:1' ] } + node { name: 't2' op: 'TestMul' input: [ 't1:0', 't1:0' ] } + )EOF", + opts, &refiner); + + EXPECT_TRUE(HasNode("input")); + EXPECT_TRUE(HasNode("t1")); + EXPECT_TRUE(HasNode("t2")); + EXPECT_FALSE(HasNode("new_input")); + + EXPECT_TRUE(HasEdge("input", 1, "t1", 0)); + EXPECT_TRUE(HasEdge("input", 0, "t1", 1)); + // Test that t2 is unaffected + EXPECT_TRUE(HasEdge("t1", 0, "t2", 0)); + + // Check that t1's NodeDef is consistent with graph + Node* t1 = FindNode("t1"); + ASSERT_EQ(t1->requested_inputs().size(), 2); + ASSERT_EQ(t1->requested_inputs()[0], "input:1"); + ASSERT_EQ(t1->requested_inputs()[1], "input:0"); +} + TEST_F(GraphConstructorTest, ImportGraphDef_SkipMappedNodes_FullyMapped) { ShapeRefiner refiner(TF_GRAPH_DEF_VERSION, graph_.op_registry()); -- GitLab From a5827f23427881175bfb42944a854ddfb8ab6e83 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 15:52:57 -0800 Subject: [PATCH 0364/1801] Add BF16 to ByteSizeOfPrimitiveType. PiperOrigin-RevId: 175601459 --- tensorflow/compiler/xla/shape_util.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index 4d0bafa908..2202b6a2c1 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -594,6 +594,8 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { return sizeof(uint64); case F16: return sizeof(float) / 2; + case BF16: + return sizeof(float) / 2; case F32: return sizeof(float); case F64: -- GitLab From bd4d623c9717f148e6f0b51a1cc2333b1e02f640 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 15:58:11 -0800 Subject: [PATCH 0365/1801] [tpu:profiler] Renaming some fields in tf_ops_stats.proto to make them more meaningful to public. PiperOrigin-RevId: 175602224 --- .../contrib/tpu/profiler/tf_op_stats.proto | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto index 5b2dbb3124..d8ee243790 100644 --- a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto +++ b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto @@ -47,14 +47,14 @@ message OpMetricsResult { 14; // Total number of FLOPs incurred by this OP. optional double total_flops = 15; - // Total time in microseconds that the MXU is occupied by this OP. + // Total number of bytes accessed by this OP. optional double total_bytes_accessed = 16; - // Total time in microseconds that the MXU is occupied by this OP. - optional double mxu_occupancy_in_us = 17; - // Total time in microseconds that the XU is occupied by this OP. - optional double xu_occupancy_in_us = 18; - // Total DMA access stall time in microseconds. - optional double total_dma_stall_in_us = 19; + // Total time in microseconds that special hw unit 1 is occupied by this OP. + optional double unit1_occupancy_in_us = 17; + // Total time in microseconds that special hw unit 2 is occupied by this OP. + optional double unit2_occupancy_in_us = 18; + // Total memory stall time in microseconds. + optional double total_memory_stall_in_us = 19; } // Result proto for OpMetricsDb. @@ -86,8 +86,8 @@ message StepDatabaseResult { map step_sequence_per_core = 1; } -// Result proto for Dashboard data. -message DashboardResult { +// Result proto for looping-related metrics. +message LoopingResult { // The total iteration time in nanoseconds. optional double iteration_time_ns = 1; // The total number of iterations. @@ -120,8 +120,8 @@ message TfOpStats { optional OpMetricsDbResult hlo_metrics_db = 2; // The result for the step database. optional StepDatabaseResult step_db = 3; - // The result for the TPU dashboard. - optional DashboardResult dashboard = 4; + // The result for the looping-related metrics. + optional LoopingResult looping = 4; // The result for the HloExtraInfoMap. optional HloExtraInfoMapResult hlo_extrainfo_map = 5; } -- GitLab From a559b3ae8d2eaf6393ce0a964f24db3ff5d29010 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 16:44:04 -0800 Subject: [PATCH 0366/1801] [tpu:profiler] Interface and client side change for input pipeline and future tools. PiperOrigin-RevId: 175608761 --- .../tpu/profiler/capture_tpu_profile.cc | 1 + .../contrib/tpu/profiler/dump_tpu_profile.cc | 22 +++++++++++++++---- .../contrib/tpu/profiler/tpu_profiler.proto | 17 +++++++++++++- 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc index 5b51a72ece..bff23a447f 100644 --- a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc +++ b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc @@ -50,6 +50,7 @@ ProfileResponse Profile(const string& service_addr, int duration_ms) { ProfileRequest request; request.set_duration_ms(duration_ms); request.set_max_events(kMaxEvents); + request.add_tools("input_pipeline"); std::cout << "Limiting the number of trace events to " << kMaxEvents << std::endl; ::grpc::ClientContext context; diff --git a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc index 7541a5291d..120a38b6c2 100644 --- a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc +++ b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc @@ -42,11 +42,11 @@ using ::tensorflow::io::JoinPath; using ::tensorflow::protobuf::util::JsonOptions; using ::tensorflow::protobuf::util::MessageToJsonString; -constexpr char kProfilePluginDirectory[] = "plugins/profile/"; +constexpr char kGraphRunPrefix[] = "tpu_profiler.hlo_graph."; constexpr char kJsonOpProfileFileName[] = "op_profile.json"; -constexpr char kProtoTraceFileName[] = "trace"; constexpr char kJsonTraceFileName[] = "trace.json.gz"; -constexpr char kGraphRunPrefix[] = "tpu_profiler.hlo_graph."; +constexpr char kProfilePluginDirectory[] = "plugins/profile/"; +constexpr char kProtoTraceFileName[] = "trace"; Status WriteGzippedDataToFile(const string& filename, const string& data) { std::unique_ptr file; @@ -97,6 +97,15 @@ Status DumpOpProfileToLogDirectory(StringPiece run_dir, return Status::OK(); } +Status DumpToolDataToLogDirectory(StringPiece run_dir, + const tensorflow::ProfileToolData& tool, + std::ostream* os) { + string path = JoinPath(run_dir, tool.name()); + TF_RETURN_IF_ERROR(WriteStringToFile(Env::Default(), path, tool.data())); + *os << "Dumped tool data for " << tool.name() << " to " << path << std::endl; + return Status::OK(); +} + Status DumpGraphEvents(const string& logdir, const string& run, const ProfileResponse& response, std::ostream* os) { int num_graphs = response.computation_graph_size(); @@ -154,7 +163,12 @@ Status WriteTensorboardTPUProfile(const string& logdir, const string& run, TF_RETURN_IF_ERROR(DumpOpProfileToLogDirectory(profile_run_dir, response.op_profile(), os)); } - + if (!response.tool_data().empty()) { + for (const auto& tool_data : response.tool_data()) { + TF_RETURN_IF_ERROR( + DumpToolDataToLogDirectory(profile_run_dir, tool_data, os)); + } + } TF_RETURN_IF_ERROR(DumpGraphEvents(logdir, run, response, os)); return Status::OK(); diff --git a/tensorflow/contrib/tpu/profiler/tpu_profiler.proto b/tensorflow/contrib/tpu/profiler/tpu_profiler.proto index 88e86eca3b..9c3fd45fd1 100644 --- a/tensorflow/contrib/tpu/profiler/tpu_profiler.proto +++ b/tensorflow/contrib/tpu/profiler/tpu_profiler.proto @@ -22,9 +22,21 @@ message ProfileRequest { // events. uint64 max_events = 2; + // required profiling tools name such as "input_pipeline_analyzer" etc + repeated string tools = 3; + // In future, the caller will indicate which TF session is being profiled, and // only data relating to that program will be returned. For now, we assume // all activity during the profiling period is relevant. + // next-field: 4 +} + +message ProfileToolData { + // The tool's name which this data is associated. (e.g. "input_pipeline".) + string name = 1; + + // The data payload (likely json) for the specific tool. + bytes data = 2; } message ProfileResponse { @@ -45,5 +57,8 @@ message ProfileResponse { // If the trace covers multiple programs, the longest-running one is analyzed. // See op_profile.proto for the detailed semantics of the returned profile. tpu.op_profile.Profile op_profile = 4; - // next-field: 6 + + // Data payload for each required tools. + repeated ProfileToolData tool_data = 6; + // next-field: 7 } -- GitLab From 21d830086efac5ee511949170d91dd927db33e4b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 16:53:46 -0800 Subject: [PATCH 0367/1801] Change `saved_model_builder` to `saved_model.builder`. PiperOrigin-RevId: 175610122 --- tensorflow/docs_src/programmers_guide/saved_model.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/programmers_guide/saved_model.md b/tensorflow/docs_src/programmers_guide/saved_model.md index 6bc2cbb9e3..8731cae0d7 100644 --- a/tensorflow/docs_src/programmers_guide/saved_model.md +++ b/tensorflow/docs_src/programmers_guide/saved_model.md @@ -238,7 +238,7 @@ For example, the following code suggests a typical way to use ```python export_dir = ... ... -builder = tf.saved_model_builder.SavedModelBuilder(export_dir) +builder = tf.saved_model.builder.SavedModelBuilder(export_dir) with tf.Session(graph=tf.Graph()) as sess: ... builder.add_meta_graph_and_variables(sess, -- GitLab From d44f37161d34f0de012e10d5aebc2acfdb292be2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 17:04:08 -0800 Subject: [PATCH 0368/1801] Remove unused using-declarations PiperOrigin-RevId: 175611524 --- tensorflow/compiler/xla/service/algebraic_simplifier.cc | 3 --- tensorflow/contrib/lite/kernels/optional_tensor_test.cc | 2 -- tensorflow/core/grappler/costs/graph_properties.cc | 5 +---- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 5c9b29f6e2..bc9a3ac43d 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -46,9 +46,6 @@ limitations under the License. namespace xla { namespace { -using tensorflow::gtl::nullopt; -using tensorflow::gtl::optional; - // Returns whether operand is a literal with the given value. bool IsLiteralWithValue(const HloInstruction* operand, int8 value) { return operand->opcode() == HloOpcode::kConstant && diff --git a/tensorflow/contrib/lite/kernels/optional_tensor_test.cc b/tensorflow/contrib/lite/kernels/optional_tensor_test.cc index 8977d27f73..8e9cc07656 100644 --- a/tensorflow/contrib/lite/kernels/optional_tensor_test.cc +++ b/tensorflow/contrib/lite/kernels/optional_tensor_test.cc @@ -28,8 +28,6 @@ limitations under the License. namespace tflite { namespace { -using ::testing::ElementsAreArray; - class LSTMOpModel : public SingleOpModel { public: LSTMOpModel(int n_batch, int n_input, int n_cell, int n_output, bool use_cifg, diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index 35048a4fcf..26b8521771 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -25,16 +25,13 @@ limitations under the License. namespace tensorflow { namespace grappler { +namespace { -using shape_inference::Dimension; using shape_inference::DimensionHandle; using shape_inference::InferenceContext; -using shape_inference::Shape; using shape_inference::ShapeAndType; using shape_inference::ShapeHandle; -namespace { - template struct HashHandle { std::size_t operator()(const Handle& h) const { return h.Handle(); } -- GitLab From 4b4b51cdd9e8c3c748b76dd8649bcd5556e84d76 Mon Sep 17 00:00:00 2001 From: Pete Warden Date: Mon, 13 Nov 2017 17:11:23 -0800 Subject: [PATCH 0369/1801] Ios (#14521) * Initial implementation of Makefile build for TensorFlow Lite * Added support to TF Lite makefile build * Added iOS support to TF Lite makefile build * Added simple iOS example application for TF Lite * Added copyright header to file * Added optimization flag and cleaned up code --- .gitignore | 5 + tensorflow/contrib/lite/Makefile | 147 + .../contrib/lite/build_ios_universal_lib.sh | 16 + .../contrib/lite/download_dependencies.sh | 88 + .../lite/examples/ios/simple/AppDelegate.h | 21 + .../lite/examples/ios/simple/AppDelegate.mm | 44 + .../contrib/lite/examples/ios/simple/Podfile | 5 + .../examples/ios/simple/RunModel-Info.plist | 47 + .../ios/simple/RunModelViewController.h | 24 + .../ios/simple/RunModelViewController.mm | 219 + .../ios/simple/RunModelViewController.xib | 46 + .../examples/ios/simple/data/grace_hopper.jpg | Bin 0 -> 73746 bytes .../lite/examples/ios/simple/ios_image_load.h | 25 + .../examples/ios/simple/ios_image_load.mm | 85 + .../contrib/lite/examples/ios/simple/main.mm | 22 + .../simple/simple.xcodeproj/project.pbxproj | 359 ++ tensorflow/contrib/lite/ios_makefile.inc | 47 + .../contrib/lite/schema/schema_generated.h | 4521 +++++++++++++++++ .../contrib/lite/tools/benchmark_model.cc | 91 + .../contrib/lite/tools/mutable_op_resolver.h | 11 + 20 files changed, 5823 insertions(+) create mode 100644 tensorflow/contrib/lite/Makefile create mode 100755 tensorflow/contrib/lite/build_ios_universal_lib.sh create mode 100755 tensorflow/contrib/lite/download_dependencies.sh create mode 100644 tensorflow/contrib/lite/examples/ios/simple/AppDelegate.h create mode 100644 tensorflow/contrib/lite/examples/ios/simple/AppDelegate.mm create mode 100644 tensorflow/contrib/lite/examples/ios/simple/Podfile create mode 100644 tensorflow/contrib/lite/examples/ios/simple/RunModel-Info.plist create mode 100644 tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.h create mode 100644 tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.mm create mode 100644 tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.xib create mode 100644 tensorflow/contrib/lite/examples/ios/simple/data/grace_hopper.jpg create mode 100644 tensorflow/contrib/lite/examples/ios/simple/ios_image_load.h create mode 100644 tensorflow/contrib/lite/examples/ios/simple/ios_image_load.mm create mode 100644 tensorflow/contrib/lite/examples/ios/simple/main.mm create mode 100644 tensorflow/contrib/lite/examples/ios/simple/simple.xcodeproj/project.pbxproj create mode 100644 tensorflow/contrib/lite/ios_makefile.inc create mode 100755 tensorflow/contrib/lite/schema/schema_generated.h create mode 100644 tensorflow/contrib/lite/tools/benchmark_model.cc diff --git a/.gitignore b/.gitignore index 9ae0d9c96f..d11a504bdc 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,8 @@ Pods Podfile.lock *.pbxproj *.xcworkspacedata +/tensorflow/contrib/lite/downloads/** +/tensorflow/contrib/lite/gen/** +/tensorflow/contrib/lite/examples/ios/simple/data/*.txt +/tensorflow/contrib/lite/examples/ios/simple/data/*.tflite +xcuserdata/** \ No newline at end of file diff --git a/tensorflow/contrib/lite/Makefile b/tensorflow/contrib/lite/Makefile new file mode 100644 index 0000000000..8c65a0cc34 --- /dev/null +++ b/tensorflow/contrib/lite/Makefile @@ -0,0 +1,147 @@ + +# Find where we're running from, so we can store generated files here. +ifeq ($(origin MAKEFILE_DIR), undefined) + MAKEFILE_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) +endif + +# Try to figure out the host system +HOST_OS := +ifeq ($(OS),Windows_NT) + HOST_OS = WINDOWS +else + UNAME_S := $(shell uname -s) + ifeq ($(UNAME_S),Linux) + HOST_OS := LINUX + endif + ifeq ($(UNAME_S),Darwin) + HOST_OS := OSX + endif +endif + +ARCH := $(shell if [[ $(shell uname -m) =~ i[345678]86 ]]; then echo x86_32; else echo $(shell uname -m); fi) + +# Where compiled objects are stored. +OBJDIR := $(MAKEFILE_DIR)/gen/obj/ +BINDIR := $(MAKEFILE_DIR)/gen/bin/ +LIBDIR := $(MAKEFILE_DIR)/gen/lib/ +GENDIR := $(MAKEFILE_DIR)/gen/obj/ + +# Settings for the host compiler. +CXX := $(CC_PREFIX) gcc +CXXFLAGS := --std=c++11 +CC := $(CC_PREFIX) gcc +CFLAGS := +LDOPTS := +LDOPTS += -L/usr/local/lib +ARFLAGS := -r + +INCLUDES := \ +-I. \ +-I$(MAKEFILE_DIR)/../../../ \ +-I$(MAKEFILE_DIR)/downloads/ \ +-I$(MAKEFILE_DIR)/downloads/eigen \ +-I$(MAKEFILE_DIR)/downloads/gemmlowp \ +-I$(MAKEFILE_DIR)/downloads/neon_2_sse \ +-I$(MAKEFILE_DIR)/downloads/farmhash/src \ +-I$(MAKEFILE_DIR)/downloads/flatbuffers/include \ +-I$(GENDIR) +# This is at the end so any globally-installed frameworks like protobuf don't +# override local versions in the source tree. +INCLUDES += -I/usr/local/include + +LIBS := \ +-lstdc++ \ +-lpthread \ +-lm \ +-lz + +# If we're on Linux, also link in the dl library. +ifeq ($(OS),LINUX) + LIBS += -ldl -lpthread +endif + +include $(MAKEFILE_DIR)/ios_makefile.inc + +# This library is the main target for this makefile. It will contain a minimal +# runtime that can be linked in to other programs. +LIB_NAME := libtensorflow-lite.a +LIB_PATH := $(LIBDIR)$(LIB_NAME) + +# A small example program that shows how to link against the library. +BENCHMARK_PATH := $(BINDIR)benchmark_model + +BENCHMARK_SRCS := \ +tensorflow/contrib/lite/tools/benchmark_model.cc +BENCHMARK_OBJS := $(addprefix $(OBJDIR), \ +$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(BENCHMARK_SRCS)))) + +# What sources we want to compile, must be kept in sync with the main Bazel +# build files. + +CORE_CC_ALL_SRCS := \ +$(wildcard tensorflow/contrib/lite/*.cc) \ +$(wildcard tensorflow/contrib/lite/kernels/*.cc) \ +$(wildcard tensorflow/contrib/lite/kernels/internal/*.cc) \ +$(wildcard tensorflow/contrib/lite/kernels/internal/optimized/*.cc) \ +$(wildcard tensorflow/contrib/lite/kernels/internal/reference/*.cc) \ +$(wildcard tensorflow/contrib/lite/*.c) \ +$(wildcard tensorflow/contrib/lite/kernels/*.c) \ +$(wildcard tensorflow/contrib/lite/kernels/internal/*.c) \ +$(wildcard tensorflow/contrib/lite/kernels/internal/optimized/*.c) \ +$(wildcard tensorflow/contrib/lite/kernels/internal/reference/*.c) \ +$(wildcard tensorflow/contrib/lite/downloads/farmhash/src/farmhash.cc) +# Remove any duplicates. +CORE_CC_ALL_SRCS := $(sort $(CORE_CC_ALL_SRCS)) +CORE_CC_EXCLUDE_SRCS := \ +$(wildcard tensorflow/contrib/lite/*test.cc) \ +$(wildcard tensorflow/contrib/lite/*/*test.cc) \ +$(wildcard tensorflow/contrib/lite/*/*/*test.cc) \ +$(wildcard tensorflow/contrib/lite/*/*/*/*test.cc) \ +$(wildcard tensorflow/contrib/lite/kernels/test_util.cc) \ +$(BENCHMARK_SRCS) +# Filter out all the excluded files. +TF_LITE_CC_SRCS := $(filter-out $(CORE_CC_EXCLUDE_SRCS), $(CORE_CC_ALL_SRCS)) +# File names of the intermediate files target compilation generates. +TF_LITE_CC_OBJS := $(addprefix $(OBJDIR), \ +$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(TF_LITE_CC_SRCS)))) +LIB_OBJS := $(TF_LITE_CC_OBJS) + +# For normal manually-created TensorFlow C++ source files. +$(OBJDIR)%.o: %.cc + @mkdir -p $(dir $@) + $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ + +# For normal manually-created TensorFlow C++ source files. +$(OBJDIR)%.o: %.c + @mkdir -p $(dir $@) + $(CC) $(CCFLAGS) $(INCLUDES) -c $< -o $@ + +# The target that's compiled if there's no command-line arguments. +all: $(LIB_PATH) $(BENCHMARK_PATH) + +# Gathers together all the objects we've compiled into a single '.a' archive. +$(LIB_PATH): $(LIB_OBJS) + @mkdir -p $(dir $@) + $(AR) $(ARFLAGS) $(LIB_PATH) $(LIB_OBJS) + +$(BENCHMARK_PATH): $(BENCHMARK_OBJS) $(LIB_PATH) + @mkdir -p $(dir $@) + $(CXX) $(CXXFLAGS) $(INCLUDES) \ + -o $(BENCHMARK_PATH) $(BENCHMARK_OBJS) \ + $(LIBFLAGS) $(LIB_PATH) $(LDFLAGS) $(LIBS) + +# Gets rid of all generated files. +clean: + rm -rf $(MAKEFILE_DIR)/gen + +# Gets rid of target files only, leaving the host alone. Also leaves the lib +# directory untouched deliberately, so we can persist multiple architectures +# across builds for iOS and Android. +cleantarget: + rm -rf $(OBJDIR) + rm -rf $(BINDIR) + +$(DEPDIR)/%.d: ; +.PRECIOUS: $(DEPDIR)/%.d + +-include $(patsubst %,$(DEPDIR)/%.d,$(basename $(TF_CC_SRCS))) diff --git a/tensorflow/contrib/lite/build_ios_universal_lib.sh b/tensorflow/contrib/lite/build_ios_universal_lib.sh new file mode 100755 index 0000000000..e0f2ef768b --- /dev/null +++ b/tensorflow/contrib/lite/build_ios_universal_lib.sh @@ -0,0 +1,16 @@ +#!/bin/bash -x +set -e +make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=x86_64 -j 8 +make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=i386 -j 8 +make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=armv7 -j 8 +make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=armv7s -j 8 +make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=arm64 -j 8 + +lipo \ +tensorflow/contrib/lite/gen/lib/ios_x86_64/libtensorflow-lite.a \ +tensorflow/contrib/lite/gen/lib/ios_i386/libtensorflow-lite.a \ +tensorflow/contrib/lite/gen/lib/ios_armv7/libtensorflow-lite.a \ +tensorflow/contrib/lite/gen/lib/ios_armv7s/libtensorflow-lite.a \ +tensorflow/contrib/lite/gen/lib/ios_arm64/libtensorflow-lite.a \ +-create \ +-output tensorflow/contrib/lite/gen/lib/libtensorflow-lite.a diff --git a/tensorflow/contrib/lite/download_dependencies.sh b/tensorflow/contrib/lite/download_dependencies.sh new file mode 100755 index 0000000000..0d9842fefa --- /dev/null +++ b/tensorflow/contrib/lite/download_dependencies.sh @@ -0,0 +1,88 @@ +#!/bin/bash +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -e + +DOWNLOADS_DIR=tensorflow/contrib/lite/downloads +BZL_FILE_PATH=tensorflow/workspace.bzl + +EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)" +GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" +GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" +ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)" +NEON_2_SSE_URL="https://github.com/intel/ARM_NEON_2_x86_SSE/archive/master.zip" +FARMHASH_URL="https://mirror.bazel.build/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz" +FLATBUFFERS_URL="https://github.com/google/flatbuffers/archive/master.zip" +MODELS_URL="https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_1.0_224_ios_lite_float_2017_11_08.zip" + +# TODO(petewarden): Some new code in Eigen triggers a clang bug with iOS arm64, +# so work around it by patching the source. +replace_by_sed() { + local regex="${1}" + shift + # Detect the version of sed by the return value of "--version" flag. GNU-sed + # supports "--version" while BSD-sed doesn't. + if ! sed --version >/dev/null 2>&1; then + # BSD-sed. + sed -i '' -e "${regex}" "$@" + else + # GNU-sed. + sed -i -e "${regex}" "$@" + fi +} + +download_and_extract() { + local usage="Usage: download_and_extract URL DIR" + local url="${1:?${usage}}" + local dir="${2:?${usage}}" + echo "downloading ${url}" >&2 + mkdir -p "${dir}" + if [[ "${url}" == *gz ]]; then + curl -Ls "${url}" | tar -C "${dir}" --strip-components=1 -xz + elif [[ "${url}" == *zip ]]; then + tempdir=$(mktemp -d) + tempdir2=$(mktemp -d) + wget -P ${tempdir} ${url} + unzip ${tempdir}/* -d ${tempdir2} + # unzip has no strip components, so unzip to a temp dir, and move the files + # we want from the tempdir to destination. + cp -R ${tempdir2}/*/* ${dir}/ + rm -rf ${tempdir2} ${tempdir} + fi + + # Delete any potential BUILD files, which would interfere with Bazel builds. + find "${dir}" -type f -name '*BUILD' -delete +} + +download_and_extract "${EIGEN_URL}" "${DOWNLOADS_DIR}/eigen" +download_and_extract "${GEMMLOWP_URL}" "${DOWNLOADS_DIR}/gemmlowp" +download_and_extract "${GOOGLETEST_URL}" "${DOWNLOADS_DIR}/googletest" +download_and_extract "${ABSL_URL}" "${DOWNLOADS_DIR}/absl" +download_and_extract "${NEON_2_SSE_URL}" "${DOWNLOADS_DIR}/neon_2_sse" +download_and_extract "${FARMHASH_URL}" "${DOWNLOADS_DIR}/farmhash" +download_and_extract "${FLATBUFFERS_URL}" "${DOWNLOADS_DIR}/flatbuffers" +download_and_extract "${MODELS_URL}" "${DOWNLOADS_DIR}/models" + +replace_by_sed 's#static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA );#static uint32x4_t p4ui_CONJ_XOR; // = vld1q_u32( conj_XOR_DATA ); - Removed by script#' \ + "${DOWNLOADS_DIR}/eigen/Eigen/src/Core/arch/NEON/Complex.h" +replace_by_sed 's#static uint32x2_t p2ui_CONJ_XOR = vld1_u32( conj_XOR_DATA );#static uint32x2_t p2ui_CONJ_XOR;// = vld1_u32( conj_XOR_DATA ); - Removed by scripts#' \ + "${DOWNLOADS_DIR}/eigen/Eigen/src/Core/arch/NEON/Complex.h" +replace_by_sed 's#static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );#static uint64x2_t p2ul_CONJ_XOR;// = vld1q_u64( p2ul_conj_XOR_DATA ); - Removed by script#' \ + "${DOWNLOADS_DIR}/eigen/Eigen/src/Core/arch/NEON/Complex.h" + +cp ${DOWNLOADS_DIR}/models/models/* tensorflow/contrib/lite/examples/ios/simple/data/ + +echo "download_dependencies.sh completed successfully." >&2 diff --git a/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.h b/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.h new file mode 100644 index 0000000000..75b1f1da38 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.h @@ -0,0 +1,21 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +@interface AppDelegate : UIResponder + +@property (strong, nonatomic) UIWindow *window; + +@end diff --git a/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.mm b/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.mm new file mode 100644 index 0000000000..1e808eb976 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.mm @@ -0,0 +1,44 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "AppDelegate.h" + +#import "RunModelViewController.h" + +@implementation AppDelegate + +- (BOOL)application:(UIApplication *)application + didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { + + UITabBarController *bar = [[UITabBarController alloc] init]; + [bar setViewControllers: + @[[[RunModelViewController alloc] init]]]; + bar.selectedIndex = 0; + self.window = [[UIWindow alloc] initWithFrame:[[UIScreen mainScreen] bounds]]; + self.window.rootViewController = bar; + [self.window makeKeyAndVisible]; + return YES; +} + +- (void)applicationWillResignActive:(UIApplication *)application {} + +- (void)applicationDidEnterBackground:(UIApplication *)application {} + +- (void)applicationWillEnterForeground:(UIApplication *)application {} + +- (void)applicationDidBecomeActive:(UIApplication *)application {} + +- (void)applicationWillTerminate:(UIApplication *)application {} + +@end diff --git a/tensorflow/contrib/lite/examples/ios/simple/Podfile b/tensorflow/contrib/lite/examples/ios/simple/Podfile new file mode 100644 index 0000000000..1740ad6457 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/simple/Podfile @@ -0,0 +1,5 @@ +platform :ios, '8.0' +inhibit_all_warnings! + +target 'tf_simple_example' + pod 'TensorFlow-experimental' diff --git a/tensorflow/contrib/lite/examples/ios/simple/RunModel-Info.plist b/tensorflow/contrib/lite/examples/ios/simple/RunModel-Info.plist new file mode 100644 index 0000000000..1a3eaa8a2c --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/simple/RunModel-Info.plist @@ -0,0 +1,47 @@ + + + + + CFBundleDevelopmentRegion + en + CFBundleDisplayName + tflite-simple-example + CFBundleExecutable + tf_simple_example + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + ios-app + CFBundlePackageType + APPL + CFBundleShortVersionString + 1.0 + CFBundleSignature + ???? + CFBundleVersion + 1.0 + LSRequiresIPhoneOS + + UILaunchStoryboardName + RunModelViewController + UIRequiredDeviceCapabilities + + armv7 + + UISupportedInterfaceOrientations + + UIInterfaceOrientationPortrait + UIInterfaceOrientationLandscapeLeft + UIInterfaceOrientationLandscapeRight + + UISupportedInterfaceOrientations~ipad + + UIInterfaceOrientationPortrait + UIInterfaceOrientationPortraitUpsideDown + UIInterfaceOrientationLandscapeLeft + UIInterfaceOrientationLandscapeRight + + + diff --git a/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.h b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.h new file mode 100644 index 0000000000..4e1a83ccf5 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.h @@ -0,0 +1,24 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +@interface RunModelViewController : UIViewController + +- (IBAction)getUrl:(id)sender; + +@property (weak, nonatomic) IBOutlet UITextView *urlContentTextView; +@property (weak, nonatomic) IBOutlet UITextField *urlTextField; + +@end diff --git a/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.mm b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.mm new file mode 100644 index 0000000000..965d830105 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.mm @@ -0,0 +1,219 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "RunModelViewController.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/string_util.h" +#include "tensorflow/contrib/lite/tools/mutable_op_resolver.h" + +#include "ios_image_load.h" + +#define LOG(x) std::cerr +#define CHECK(x) if (!(x)) { LOG(ERROR) << #x << "failed"; exit(1); } + +NSString* RunInferenceOnImage(); + +@interface RunModelViewController () +@end + +@implementation RunModelViewController { +} + +- (IBAction)getUrl:(id)sender { + NSString* inference_result = RunInferenceOnImage(); + self.urlContentTextView.text = inference_result; +} + +@end + +// Returns the top N confidence values over threshold in the provided vector, +// sorted by confidence in descending order. +static void GetTopN( + const float* prediction, + const int prediction_size, + const int num_results, const float threshold, + std::vector >* top_results) { + // Will contain top N results in ascending order. + std::priority_queue, + std::vector >, + std::greater > > top_result_pq; + + const long count = prediction_size; + for (int i = 0; i < count; ++i) { + const float value = prediction[i]; + + // Only add it if it beats the threshold and has a chance at being in + // the top N. + if (value < threshold) { + continue; + } + + top_result_pq.push(std::pair(value, i)); + + // If at capacity, kick the smallest value out. + if (top_result_pq.size() > num_results) { + top_result_pq.pop(); + } + } + + // Copy to output vector and reverse into descending order. + while (!top_result_pq.empty()) { + top_results->push_back(top_result_pq.top()); + top_result_pq.pop(); + } + std::reverse(top_results->begin(), top_results->end()); +} + +NSString* FilePathForResourceName(NSString* name, NSString* extension) { + NSString* file_path = [[NSBundle mainBundle] pathForResource:name ofType:extension]; + if (file_path == NULL) { + LOG(FATAL) << "Couldn't find '" << [name UTF8String] << "." + << [extension UTF8String] << "' in bundle."; + } + return file_path; +} + +NSString* RunInferenceOnImage() { + std::string graph; + const int num_threads = 1; + std::string input_layer_type = "float"; + std::vector sizes = {1, 224, 224, 3}; + + NSString* graph_path = FilePathForResourceName(@"mobilenet_v1_1.0_224", @"tflite"); + + std::unique_ptr model(tflite::FlatBufferModel::BuildFromFile([graph_path UTF8String])); + if (!model) { + LOG(FATAL) << "Failed to mmap model " << graph; + } + LOG(INFO) << "Loaded model " << graph; + model->error_reporter(); + LOG(INFO) << "resolved reporter"; + +#ifdef TFLITE_CUSTOM_OPS_HEADER + tflite::MutableOpResolver resolver; + RegisterSelectedOps(&resolver); +#else + tflite::ops::builtin::BuiltinOpResolver resolver; +#endif + + std::unique_ptr interpreter; + tflite::InterpreterBuilder(*model, resolver)(&interpreter); + if (!interpreter) { + LOG(FATAL) << "Failed to construct interpreter"; + } + + if (num_threads != -1) { + interpreter->SetNumThreads(num_threads); + } + + int input = interpreter->inputs()[0]; + + if (input_layer_type != "string") { + interpreter->ResizeInputTensor(input, sizes); + } + + if (interpreter->AllocateTensors() != kTfLiteOk) { + LOG(FATAL) << "Failed to allocate tensors!"; + } + + // Read the label list + NSString* labels_path = FilePathForResourceName(@"labels", @"txt"); + std::vector label_strings; + std::ifstream t; + t.open([labels_path UTF8String]); + std::string line; + while(t){ + std::getline(t, line); + label_strings.push_back(line); + } + t.close(); + + // Read the Grace Hopper image. + NSString* image_path = FilePathForResourceName(@"grace_hopper", @"jpg"); + int image_width; + int image_height; + int image_channels; + std::vector image_data = LoadImageFromFile([image_path UTF8String], &image_width, &image_height, &image_channels); + const int wanted_width = 224; + const int wanted_height = 224; + const int wanted_channels = 3; + const float input_mean = 127.5f; + const float input_std = 127.5f; + assert(image_channels >= wanted_channels); + uint8_t* in = image_data.data(); + float* out = interpreter->typed_tensor(input); + for (int y = 0; y < wanted_height; ++y) { + const int in_y = (y * image_height) / wanted_height; + uint8_t* in_row = in + (in_y * image_width * image_channels); + float* out_row = out + (y * wanted_width * wanted_channels); + for (int x = 0; x < wanted_width; ++x) { + const int in_x = (x * image_width) / wanted_width; + uint8_t* in_pixel = in_row + (in_x * image_channels); + float* out_pixel = out_row + (x * wanted_channels); + for (int c = 0; c < wanted_channels; ++c) { + out_pixel[c] = (in_pixel[c] - input_mean) / input_std; + } + } + } + + if (interpreter->Invoke() != kTfLiteOk) { + LOG(FATAL) << "Failed to invoke!"; + } + + float* output = interpreter->typed_output_tensor(0); + const int output_size = 1000; + const int kNumResults = 5; + const float kThreshold = 0.1f; + std::vector > top_results; + GetTopN(output, output_size, kNumResults, kThreshold, &top_results); + + std::stringstream ss; + ss.precision(3); + for (const auto& result : top_results) { + const float confidence = result.first; + const int index = result.second; + + ss << index << " " << confidence << " "; + + // Write out the result as a string + if (index < label_strings.size()) { + // just for safety: theoretically, the output is under 1000 unless there + // is some numerical issues leading to a wrong prediction. + ss << label_strings[index]; + } else { + ss << "Prediction: " << index; + } + + ss << "\n"; + } + + LOG(INFO) << "Predictions: " << ss.str(); + + std::string predictions = ss.str(); + NSString* result = @""; + result = [NSString stringWithFormat: @"%@ - %s", result, + predictions.c_str()]; + + return result; +} diff --git a/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.xib b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.xib new file mode 100644 index 0000000000..93f334b985 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.xib @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tensorflow/contrib/lite/examples/ios/simple/data/grace_hopper.jpg b/tensorflow/contrib/lite/examples/ios/simple/data/grace_hopper.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d2a427810f679db537236c5430873a81a62ef412 GIT binary patch literal 73746 zcmex=zKMNGjWx(E@lO+TPE zg%l;`6{n>ZA=^~I!0;a{V15Cz7pv5NO5{K;V&PzOFU?Fz1$mW00PJQ4CI<#EW&npH ziyUugkdH!sS!z)cR3Rt^A#ze|!8w`95Z#q zjL?)KBJPt|nOdaa>7wACnwMIXSdw3);FMSlN^l_mFoMnC5%o(gR|wC{OUW-URtU)` z%}vTn%*5YF1;xxwsC9t_RE|9ZCl4PK`pH0#{JGoxAelq;`~InJWYyx znUcK2YPN#xIvMNi!?XW0xR%_H+x5>rGEH{sg+dw1>e^NJL;tm2R>}Ux&SAS) zSwk&F(9%F|<3#x%cdolkl%TTdesx5;x!AU~7|9tq@ z+^JLk?Y{c<)sfvCbjg}){UX?mJcV zpP~NpuC3YPN6aNzFRs(r`ux4-z27yv+jD=}f03JX{#AXL z@vnN1^{bah`Imb=$6h@Tt#&{1U*gM5*Y1`&ezBPE zS!8s#43Ep!H@ekflk2wqP03}KDAk$o(7SBIU$e%Szp6dHUEO^9rSTWcz%?YybM4$V@g^RhThdK<2L9-&0#QANm)k_3FG_ zR7&J@{s0cFZvts z%4A1)Xy`oYWVuthng5pkXZW@1o7$WU>JfT2TMk^RZ+~0yu5tG4=~w5yS<`K#vu!&2 zzU#Y=3&!2|znfToA?u}URhF0-yW8$%wW;f6_Fr8So_#ana@@Z zD%um3TV+{u#^a=W)m8Ro^8~+%9NU_9BmV!z=T+~o=iJ+P>Enmb*{cn=UO670y5@HJ zE!Mu;X@O~$t1qYS_%}^={)w-m=hxRq6jUAl`10V^HxlQ2<|JIus@*p``*&>l_PY~b znC?&foBUt6nJ3nLF{)NBfpZyJAn>X24%y`Mo z1iei?)qU0sbFb~z{BnKwuYJqDo_qZG{^SX#3|Ez2zm>G>hU$_34C^lyeL2^CA>(C4 z$aV#uo;y`hwqYL*=RW#=-DB>$FZ2J2-an@`=jMtdam$LP|1+ODf7)Urk4uZ}pC3H< z+dlO8`s=%H8*6W{2<(sD78)EElWn)z@V@VU?-&0j|1-=eshfSZAnx@$ld36OuSaNa zJKpcZ`Q`1(cPAEKt-p|McKQ8(2Cpr#-J$uvZF8#&zx8?;@0@=wIu!gbJ#N3+bj`2XWgxO&03Jkrl~ho$M>KE|sTBWrUD ze}(K%&0F;9-_-p3zw|%f*d?=m&5oa@QXjNW*7@>FZQAkWO-A{P_h0{-7KMsziqWKG;!y?=`a4?{?G6_>&7~#jk!AOzceilz9Stwdp2M3>KPmF-z>Y9`=5cS zU8+Xx`eC0wp_IHQ*{AhNl=Yg?y9T}umQTCwYL`06Qt)|ZC;n)-I}_5Td4-|C%Z$}z4|rzz~R#w6LMCr$)0^euiEYX zoAov?Z?1cJ^Xro$zmuyxXa}ZtQlKS^enoa#<%Ne!VEAL*r*?l#;*5?a%X7&Ny;#=jBrDE5vZMq-Z ze*fM5H{rML^qAi{;~tb%daicb{hd$V@s?j$WoFuQ%Zs!TDS03dECS3A_Mts_AN%cJ=J$_w>ov} z{G#?3j~+gLnsOlby7=C=IdAlCe_4I^-^_Ra8Ek{rzkbJ;eAMS~>9zfC+Y=h)=X|PN zwJ$#CtgO2Bs;8GOow^^nUP4$ed2MoR^1S=G_8+Y4?O!hM@HnT?C#{hEXj^2Gq9$*K z{Pe?%SLyD0Q&XN2`RYkk!mSSs)8$I?H~%?up?blWJAeF^_ZS(>-MjebF?gX~chq|Cb+BrHbCa zW>8opZ5ll_G~D2&!XCpnZOmi*0XDU&a6GVfiL2?_b16X}-F;`O4AV3TapET_TS2^z?++=ij;iLp$8K^xBiyG}m|c zOW&7H+w;Bp-1psoo+}y{2~-9Aj@Wcpitk$1T7I$Yqakx{z)8BmU*O+f+ zYvxRsiu|IPzG>rv{LeZ4OSgZXDm1|%xnaG><-AoV&pbQ%`OL%{RptL)U#agtV{N=^ zT^%dG$la-z-=><)UvzxaotmyWfB9d0zx+OWu7kPaw5W(zGLOZA-pU+j&)qg#>iwcu zYhtEfwOjLU;@5s=^ZMn%dAXH*Z(rP~)jqs`!`su>>UQsc^^N=0=~tG%M|Z1#$zKun zIyNKy;*Ma?>|1~8zx-#IAO9lfKLg_?n;&OZ1^#UM7j2SQIQ_KV-tL2slM<5+ub+-` zR(;PNUwHFBLtV6eY<9lquL)C8Czn!IAiZjnhZe^7Il6o&VrK;>dL*D*hkDf=JtqJ+hptYUzl(kgmDZ^-_qqgq;{V>DNEE6Spzic>aqrj9@_ToRo|10bN z8G5_Bud#kvyRdP-?0WtFHybPOtb6?@CHmJs*7*Io>-%4A?ETMhadp>`CGpc;`3jPA zxAqjDyv0!59P`}zbJG!K6rBP7dLbF%?Y;|YUW=3 zHT7a?z59#$2lF3XmD~7FPkXV@>0M#2+wMl(|5kn2_AFD}wL1snZcE)S-`VA6RZ+p_ z_~6AZy}9kz>h{zouUj*x-R^Z1>rK^7w=&k$<@(+__MhS4=0B?Mzlz)Nbjn^|RNvLN zzxVRpD}_ee4Cl=}^EH8e+BLH~$L&Ag4lav~t~ahdSrU-_H}^k-*p2#dE7iIF-)67d z+P`ksi(21|jpxfQ9oxD7&h@Rc-~DG;tEYIb`cB-ZnD0W*EsuGsoSySse#vva<$n@t zQ%}E^jefI!>vCz~u*<>Q4YyfT&-z{~t-o;oqV0cW_wCJ|re><9C}83kkvGpSbI!vx z1=|biR+Y;1R23CTyvjVCeYcY@?RnSSmrwGN@9x^~doKO*udMRF0eg}xf2=c$yI-;D zc-V!^>|FCZ^1EMN`}Hb)`9I++UN7Rd-e0W0(N0+3xB2kFqw;0%+kdWqE?0Tf)_lLb z(weR_k>0nH`Db%fhpyQ-@rt3L_SwBXBGHK}`}ni_9+(SmTwWcjaM@~!3a>!j!oGqj zoIMA7+9vh13jAmIfA!gahGT5O`L85=*B<=R6CbplkAKDY-?RVDny&vbY}e$|n;g`f zC8XFJRL?j5-uL`nsDl_? zm+x+Vxlohi$jltO)r*BS8D;N0aJ24TGS`2m#*Ld|kMEu7`u1z;4%^nVd<@MymnZ*S zVK+1T&h=x@CS7K8X_gZC<=1&jpuxk;MDa@bHN6e5^LJm**sS4tPBcutf6?7}=NJF% zvb2w|I_5p+^(N--d%1;QUI~3STNU#@H~Ej8sr@sZpNy{Oe<~;oXfUu{jne;KRLJ@9 zbWf;v$45T-X-j4+TkHL54UgQn_u$8^i|;VcxSp3C^?t$EICm|*!Xj4%h8qmqisw8@ zwUj*ZUQRKOW=W+&W?ou8Xp|!qG&Tbo za^h!jWJqDiXGj7MQw1<&Fyu3oFyu27gINU(3=F~qR46c*G3YTEfDDA#&Bo&u;Oedr zSegj35@Z`Y0~2VJlo9*r5C>7Is!vHWpT9Rz^W421RBTLq{RjKqa=s!bT(E zi3^?BHy#X1DpD5taIwi)^x?-zi<~zd3QkrLyHwn4^61m##T+iGYKKEoricqNFflT* zGP1I;v9qy(ox;c@$gF7SD8v$&SlFnob>#zM$<-(UXB>1t-+k%Toy7&%4r4KCrf;ldE};b^Lwg7 z@@8|7@~H{|CbyO@_2lqUV&XhA*Xy0&uGM>ELvFl$QQ?g-QO~w6{#&-nCZNJ#2o}!d@4qmYy1yt6BWr`ToAgzY<( znYz2@Fw8nL0xnf0UFQsXpy=myF}LXUWU|%-}5#x@&pLW5FE*u9qFp zOqK`FnEb8UBV5#RmHvI5@7=Z2ei!W9p>feg(Q?`HxEYC{dw`q zy3%UjNzFSm>q-wC?#M1YW+^=H`_8K}>omjzgWK$0oWK9w>i5yF#~#Q0OFe!1!jtnY zau1~rJX|uZ$BEgvza>2D)OL^8+t&6yxii;9J#hkyXQ7~`&4NZ7|LYQ8zG@zGmuYy{ zJ~eAu?Tl8zO_HL<)+r0O-FX+XGB31OJ6&+ir0yFVU)-C$>E&r{tMrVe9;+Fy8EVe# zH(0Kzr*%E`mUZ2mJhORA?`<7sG@+5uneOh}zW841S_H!c_p39if^Y-Jd^%<8lSvL6Y{rIfb z&*agab(^~WCNDnv&!k%Ty9ocRzuu)=-xpTal+@%o1+x+l-U<%XKI-LX zYkGaJt#jRtRZ+HUzuq`oqQ#=|?V-iz;FZ_5tq;4k;bLHavD^_x8NMRfjrYF%-EFqM zGiaIi*Sp~pzP~IrGT?C1f2)yq@44BxJBlkVhVF{COHrOu*d8Jk8Mudum0i+BjPXCi z>P4^Iz5ixa@v~=5{A0N7pGa-6X8V!d*Y~`&&AS^b9T8kMZJHtDQPK2=r)6AQ*hM%D zl_KPRyxp4{9PSxfJgZM9Z(C1SPc>u2!zaawEuCkoT?+m)+Wa z`(ApmqQhYwqsNk_QgPfKiVre%EqBNW91~RxeV46ia;136)-|k0Q%#s>>D^{OeQxEw zS+>^pmo6z}u8wo9=gR7Lcz3(&^Qm@A^+kH|%FVs9XOEn@D*Wb>?MD4`cGFgdPO0j; zxqL!Zc;jL*A(OpK#fBI2f0keP9xI<6H}!7V)!M^nT@w@-8rT+g*Ls;<7m3d=^$A&0 zn90RcB>P;>W!=%M;V(=sZ3whkyyZ*V@4eTS{*w0EB_bSpRZOMEI<@Gum#uhz)pzaG)pz}7X9g|1({RW_?D_tnd*AmSc4V7tE*7k-pphA>Q8(ko zhn?YiMKT#vVzT`%>#JP~E6B)mCD%J`M7*~k=@n{jg`L?|&_I^iD_6bxBjmB*qyJ`51p#B-yN!UCnx+$)}N3z?1#$F}?}Ta|}6_Z+W1=V6f0CWEqQy z9XG2?6th^?%!V2BPAGORa}MO#@$kS?K3z#>&6SV1IKLfG61u1$vs6S;z}eql`z{7=&;0UtW6n z?kqj89fxOK%{rN?JkRJHSHls{^lt$^)e9Cf=sC~UxxT~s&rVe8LIKQzrm}))ZbXsA%fon(V7oLE>3Bp&{~a3I+uzop~*0lg4)|B@*_2c6Ouy7Mdxf7O)UuBj}n z+@Hh1DUn=D6 zIdZ1qT@%-|`yKbvOwF18g)(cle(yM+7?tqMP~rSNp6r)r52m#4ulB+E8n2U=9nF|k-*qd_7G|}} z@mj`ek$ku(MSF&~#KIGzYZW8*oesDggWqf({S4o;I9`a*?&Z0tO) z7}B+SR)o}h{i73|3Iin`OkZ&H9AkyXw8F_DybQixuN*{#EmdYq99ZqCvU1M3EHUk! zfBadb_>-8N7}DF*B0qFa5fFaMI=}sA65FJL2TrZuM0%Q@vL0x-B-6|0;CkWo5tmRV zi}{*%$#>Z0-f!jSUg9q)dPK+Y1k;q@nNvEZSnx*q{G4;`jMgG%8ztV$#j1agFZosV zuiy7es`jgOJgHJvJNt};KK9!Cz7L;$b!zRc4-3}^zFY8MmBgO4e!3pd@1|b$*?rsW zZP&?@ZgUNO_ZqWod6`_;UGBJTuH6j&Wp8uKHa(l)di3&#b2i)8+`4@rs9xl~tNxN# zAEav9b~(=4Rd1G`v+kX4xpn=OsBN29UaYQ+xxiJ-o-A6o$=dE==*ZClI^TlJ6?w8_R-f`3@dByWZ#Akh)#{cjeTQspdWmJIc4T_FdmlbLeI7&7Rb~ zw=?%N*oqff$Z-VOY`G*@nsvsa#rTMuvnT61mUgCHjm|gPtc4yq<({bh!734(O2wmc=9(7f zI~T3foV8Snf1hPg|IEXSv-uAFDtT{OZzR7b_~q=6evw6BD=HI-( zOGBrse6^SqRBZUN_wd?Zrn4jW?7N$LGdHr`*Vgf?R$0)Zd*%Nb+HB76iM??rZ+dvO zuho=X*?@g_!Y{8~RdcZSX-@9LyFa71t2eLu&+sa=iZ5`HUPho4*CK@p$A2eTU0u7v zIJ9a_hULx!X^dJsRRUMVhO86Yn$^!c$@0!B$AioD4@*j?UEi6vop>0BG`^Aw zd;68kkbTAbO~+nk=$9Y)aaTXE>y+GOor=J0sXa*8>ch% z7`86tKeE_!zSyzOyWH2UBt!pNm(>OZu3N?#cJ{E>Za0~E%X;HkvbAr<xx7V|K&hD%mBmK#p8kW^FdFB8zsd%IpDHa8$7OM7N>%f9nH zUfyXQS{aQ?85ScBhk&Y4r*w@B-%>nsW1$G~9a+}&_x zM%fxBuIZdT{~1yYl{_yw8Z^lKUMP_Aa`_aIhy8DtN$^~7IVPbH`S2#cu0kL4Lu2JV zoKHDSURl&~ziI9|v5rAMhrv*N#RL`3pmv!DN*-rVE6;E$YHwaLML28ql){$QClgJK z<8K9U^h$ZDaWC_X<+6#MgpkX^*GkeYJ^g!Bm>QVaZyt@kw}B^1QQ_H=Nj|@= z6}F1(kbJ@@QKw_nyi~wslG99$ha4+pmU=DXNxf0{p@D6I>luD|Aw|9E8b<7lD$@)) ze6Bo8=(X~ilg4!Zm_+L-ktGLOdS{7fl~$B<6v`Y73U=X2zt6bY;icli+MgZ=R~%vB z<7Y^5v#2=0uAmaqqtbau$Wr7<^}?jZFP%FZm;_B`W=)!Sl84uq=fq#J54H-IO~(F zoYp%n)xM-Ozs9pnAd-Jk!<19<;Xhe+&-4;lY*4buVae6RG6g}K>Z0b@1Hmk!Gqehu z|1+FF*c~i3(Q4LYBb6qh1h-nlWfNIUvRGb4J~osPnHpBadAKDlxKQMTw&}kU>$^Yv z=yOqF2y*FZoG!B{D(hEqkT|17=K*m=t+#HD{F{4H4A*_}-pJkIvFZMiBcEJ+jaeBq zk2;=GbJ*J}k+ib&lwu-7>*+3rnJy2HTnS-TYi^pv^618s-6u9#cN%Qj=CPvM>AA$& zO9u+3G!?|1IJs(<$|GUZ_Yp_4TH{n6PwxpW{?WaoO331@pT+Wg$-gV+F4?(CuH-?3 zUSN;F?rDyxcCsI>JsDmK9jLq1o!i+{m*{zAW|G618IC7?I-8X)>J&tnI;i<-6mTp# z9QxuCm-#*~VSR~~5BWQi8zQmI?=EUE;N}XxQge_jvL7`W^onmTdpeP-XX0*52!%&3}fs`>QP8JzV~^ zTq;J6H+*qlg2g$-WU;*2DLXo9e-`fDAyu;bYQ-h?sWAc04jLRYIhLHhDSNw8@98_s z&u72)79PFP`q}@!X)nB_doLf>+%$c~ zcOfR8u)-r%hgW2m>U}QW6;krG$lPwk(+JL1;1=!>;66JP!7?c1?q=jyiYAsM;qGyg@``r7ka z^enku@6EDdT`hab{S*79-+rq(ZxR39_ov-g|Lk9!?|Wk@S4pq>;<~%x#kJEalcV?j zS#y6^(x3PDb@XB#-(}_=yYqd+9+MrmXBP)Ou-uwy?_3(VW95f>!+&m$dupD(n!E2! z+`JiU(>J!WT{i!+I&fu(@UySpwO0AMpN!|}l*wEDO4~bC`|PoE{~7X@hfLeFHoM%a zd5(B+W_Z%Jh1uyX;k)!jRbH=kTUeOvb=^7j)9vEdzQ@iN%Z&kz4zJM-)?=)^?j?B(aW>Nxi2?u{3`WpI_DA- z!+T-HpZ@N1uj0?VpP6s6GUj$v%;haRmT5=Fh9@jLwMN4++q*X{D|@T?)n#XW+v304 zUEF%}PM-PumHCOefmgKe+4Va5uFuRiyu9+Psq9O$#Zs^0mOp)URcQ6aZrSsvLblzX zbXPcg`=;9k+g8?BE~(nSer?$7?#ogoFKv64UHi|l>|Xw!??=^F)p@^@4$jJxHI7^M zJmYGuH`ju?6{ZU;_jTVg33vWCf7z+|$yd9>1OB|Z@m5#Mdb#fEcUPxwxml2LWqHP} z{!iPMy0=}q`d!+8mipq)@~d9l4%vUFO6})1hSgd@x?jIYZjYVaxyJsPozhc|=c<>* zJe{7EoOi3dJ9WO_hV^cXU&fg1Jh!65xl!5q)(-i;*^BOPc{l$`#ItJ|OG|xyy^>je z`~FsbYo=2h82|eo z%{RUIoq2nFbl#!vJn8(fxz1VNdQF{Huf21f`E=l{*{A)d|5%(aKiS%U)z$h$-`8Bv z_}RAVt>6-!e<*l5hwr5>mg#yIWpnomUW)bJ7M-`-aAmsRr^(uZD;m5Q9sAOhZZtYdhmaS0lB$e*&B_*DX)Tr+9L97G!97DNK@J5w&0l z@l{FGP!Vvs>JTTXqq(iJMM36aM}jfSlstt(%k&oE6%*tFRl@oHJg}&2I<2$0p|?n5 zW{9HEqYXR-MtR#kQZD{3Iuumt5h8T__Pf@l>x8t552UOTW6PM?Yc(;XbA1UDpMp1M z-_55t89B}}&Qs=GBG91ZbJD1(cisdJhNeS~arS=<7_OU&C}d{3ynSad$MB=Ws?~=* zTF)3c-LLE1+@~}+ka+vLudY7dy)@z~94sQ*YCYepf z&*GC*449@XT0RmtWER(CSTkkG`4pwc&7D?Giyqh{G&LSvBDp}by`*0vlwm>|V^D{i zyo;);)DkV0_>2-m8BdD^6Wawu3)NC4e|44!n4V~NSaap0#LF5Btdbm7`U~=eYFN(V z^yxXyQ6$7zB({pZaGIpkvBhrA>*8KHP5N}A?a5}Y^zOvQC$9om2p(7vw9_TU$hAT6 zAcv9dgr~|gorGHVaK?!1b9iZ{@LrjH&Y2ez*6Kene(&OuV zQqG>1lQuaOx=gW8+xj=|Wv0VV`Ow~~iOq95?S6Fal+Z9b{;Fs0gtm9vN~NErR;5h- zag5EXJT(1*qqzQzT7jyR3wyR^&)#4z}0zN1mWvqKu9CodRIV2~_xU%x$f(&?2} zg}+N5o0_eai&I$AC(-e#?e*5Ux3`zZPFt%S^s4)=_oZy_IUl$T-{#MrxvOU5yrX}= z-BFztqN>^(xH9Q==F!67rWxn*CLB$gC|&iJfB((gspj3c3-g_3TwVH9{!OoTO$)=~ z?}gnz&IVqV5}c$^ZhSFdj-$^Hy}}vSwp`>``e0J0!i=-&5+2^qcCKLXGP|AHd+lkV z-=&lDmbx=NYhgdF6l{}py2!h_LNJXX$RYHBe~lN<1uJ8H< z*Y>3=xxB6mJovWzt!Dhk%GolBEz1>RHFXM%jei7Lt2^#m|97+Z6<=L;=i>}sS-%f6 z%hmrqvHzmue+DxAi!2QFkK*LWB)Wd-f=S;eTws_v zvv<0lrMc!F@r{x~7qzZ0ROfh5|Ig{3#k<|CJ>8iBJJv|e1qck=QVcV1@42j1G7wdG22*izpG=5Ndv@8bv*I*mt0W5}55CVXDYjhq zpW(`NvENee`I{G~Z;3Wry42vop@BbQRa&{&@VfPjOr6_r?>tbsT=HOf z(=Wb`mv_G^?pf})>}K9=_uwnP9=$z!X2m{#{|i#HzkhXiK7G57mAh0kw9WF1X|3X= z{+Dv|mu`v)U%q2wf34TnT0sw?@3UrY5f2SFnQ-6yW>0swRG8wqTecB>#j&Vd#Ms~+4IYN zyI4bibFsUsn_{Bx2i^VNyZGvA53{9WcdjkF@V%<#>axhdE8gdyURis7`Qsj&6@hQN z7thVOSGcbJ;or%wt4>}wn}2xmzt1hDSNCps_4nP>sXMnVIlX*K0IyqnChv+5XQZ@3 zqUIkLeJ&E4u=7V|fui-kFS);#uadq}{q@_A)A^UUr*AR4AGCulSmdka347Cf^KG)u z-c^_N&)U-9F{5l*OZa+$7cXxAP>#72sy+S4@AGbd`tQuIy%@i21%q|z-Rv~=8+Eg{ z{by+ZYGvPK<=E%_pW*ReFZE^HqjD5%cXt-gULlgbb_LsfS-rk(i}=p92LG7Zy5nti z-D~GBw_7*sX{N}DZ27y)`pog#XA;l$x_-L%?&SKNH@x3#B>i@u_}YZ?lg&%fdhY$^ z{~3&|HnyZ|?7I4U+T27PE1Rz6dBzdzh0mU2?UK19cPQe)SJriBTbBynFO#~soLT3x z=tJ4ftE|ip*Dv~YW!CN36a1%lXV{$l5SHSE7J=T3&YNRo4B|zuOu|l zVe(T&)ujhM?cB5OKZCN>ojRYq<^DR;+O)ULF|1fD`&q-o_=uc*n^fcCX|95tA)zs0 z+_M@v=CLkwO=P;Marma?Ia85d)dilxZ0o%5OyG2_Pf}XGjUk}mnXsZ?ok3%eqvRs3 z0}4V?0WXyej|SUs=2K`5T$UP?u<%FXt_c%XcxZ@6{CMlsP&J0I$r@*F=H%<=uMFO};CI)V701Ocvn=2{7P#}}y@~r? zWpB7<5x9l(Rmh|XcIPwuT}%4?b8m~z*8O(cEO^3KEsJ8|nGrWnJXztpwEKuM+tm{D z+xaWZb|;7K%G%m{MfQfe1as0&zNf*}tJ8h2pUaycvL(yo?$Tg&h5VG{>&r5OiyN*i zik*1=*p7T5sT51rHewz!wU*FdGYf3wYxqlcfBuKn?<>$+&@ z6*-0_X%6a=M;#aLin-2Y?f>?stQ^zQZLg!YxqbeucZfmed8R}X|9*=X@tQ9;Ef2e- zmsxeG@*S7T#PiK=>k3Vsc$VImRe$nY=H1$5f%dAWESQ{J-UUC6d8a6&GC$X+ezrBk zEvw~Ur`(9n-eI!SVZw^bXKY)nGY}5Rfd|BRbSLk2=En(ZYbJpg(kGgqt zy7|L@YO9myf0=)B?Xj)VW@AF+bMGvJteILtk`rTe9pDjEsVDp~c zh0C*juf{Oy@vz=1nCU37^~IHiu~%O6>Sro!m-(Qq@g+TYd&1B9%Ujv*=K5|Im411F zh2gpFAy>nGtJsj*=;>i=Lf;n_KmTZzE#~%m$M&F38|f=StwP7e7czIMw>+79(NyTs z2Pv-gGh9?wRqs#f)_07!bk}OyrAr(5mOc3EZ8Nzga;J02ysjldtG!n*T)2Y6_(&38 z#9^jJ6Bmc4iq@)P_h-ztE93pQRjf((9m86O2Nt%AZ;EO%HTv{rcC_#Hb<2L6#A|Y^ zN=QN9hS6fq4)^4r`GNbteQqe*m;Ng8N6dJ=WId*%!t<BiSmVA?~mIb z`TL+MQ}=XRwshZwm0{BZBdj{5zAj`bnh@of_3hZKg{(Y>Gw zEqk-Yb#`vn6F*+M@#g$@x9*fBnh^n6%5(1KtN6yu56QCnz2?Q2ZTEjw@n@Gy&&o5I z^U|!hKm>Fzt9D^-^dO3n?u_AOe% zY}&0nbM;IwS6{2BIoDpT`=+I!`fql<;r-`*kym0fom$16tN}2POuXdllHPv;ld+_pr9oKi1e|~LwWV-D;nT0;#ceI3J z&wP3=Hoe$7cyr#8XXVoZQ+|Bczj%MziA;`X{}}{LtIoD=J5zo>a^K$8N3Wli{W{%S zEg|N+JZ$CSs0E_0*{(e0DO>B~m2-Ld^bbel3-6WNK70PPdh1i~T}SGTR!?c0yy8WM z{Iu1py`!ZLl?u7f#SGzX6{%UIX!e`^xccA1Gh$9-#68(&(x&h zRcU5uTD$T4qZhU(9d1eF-90Z`KHK=*)6-k0%+2&)EIfbf+faomKc)s~^>IqxPQANz z@6)YEzi!yOX<^{hJYAVb=R?;Rr>|)Dj#>G!PU5<`v!hDi(T9ist~imL^0A>nK{LQ| zvqQ4;(ZGibS?&hg8eXyW-@f_jTJGK5m1!Wb?=I=wp!CgWPVpUY%reARp)X4 z%Yi$uE^WKJF3YNQ?bFunE4N-xOnwowaru=3b-!tQ`?9l4&-`cbGWn~xujKVF=j61< zwduZJay7&@Jw1~){o?Wk`+c46>)*=BTd#ZbN=e?E{|qZ`U%jpWR@QO;lKSV`d%i}7 zSiZS*@#&=#k5{h~3hK&T?iH@GMZfUiv0klhPyQ`BBP?MUdE-s_-o`bz*R4DEe!Hn; zc5$F&xo1;Up3RnZQb*+H-1po5XZx{Sy=6D@zFfY&-P(F}*V^KjL8(XW5)Lt+*DT($ z{n69DMLWgC^=2+M-gbFO&DE>Y+H>=kMS7m)SKXJtX>pKRRn111n^x2I&X0H}Yq%ro z&aB^a??$iPw==`+(Y3jIb@j9F)T~?S`)AG7GcV3Bz9jPE{eOnymf8EsdOwuc?(!==%sOR@&EwC!)9k;uy>z>FYwqgI&`aM8n66ti zd=LHYd;L{q(u!XqbE2NLWodZGv^j5>wr0`o&$=tG{obiQ^{d1;Ue#5yUh@sEFr9zY zHL-k#h^m9sj@rQO{ePshV^`ahh@S79W;f||^tt}ryV~AXZzETHdi|fFOyOPp=06^PZq)ur7riZ5GsE}N?c3^8Qdi#o z+~s~oZ;tiU{;$<)YyWnHZ%Gd7m_5s4-~4&=KOXy@_$u1=+gyi(FYaAr{%W@5m9F}m zJw>7U_m)eRX6@})k>qjH;_Nlssih~LYqfrLNMO<7&(X6Mzc_EZ+Jm*tgG1usQvo5@ zfCEt8RzqPtHbfkDVG^Wh^>C$2# z+@Tt-vmjHC*??)4pNfp5mfBUd)XE(Pj_z^k)P8s@fMI9q@s}UkCJ5|uwchaExg$eC z^1R?ond*g{Q>QsBY42f@T03V7t5$V|bK4{jl@`Vrk12}A2A*8u8?6l*TrPWe6`rb5 z>`|C(pn9X3MO*KO)+L`4nMYO$-V>7YnJ`t6(@%CnSUjhSh$P=l2ZiapULI;L%Vphp z6ohZ3%(F9SSMHlUX*%l>;Ug?9X^b(&no|}?IC?%lA|m&56;9Px;$l{w(B#n!j=rd6j7&YS=0Xc>$A4ACR}dxYW-i_C5>7Fe=H zc>T@J>&iqq{uSs!l)s&kzv)%C>9T(-M2r$1}Ma9GxX) zov%rU9W8b{_iK0NtOqZz1O`j2{4;Y0?@L>Mt!q{b-aRusVfCM(rtCk%Rr9Tfx8A-` zxYV`aiBybxbxZQroMY-cP6cePYZDR~BW*vc0r4;Ym?icUpdx0@98wl6UM`-i*jve~zO=hvYeUJrWT z^?T>}Eq!rydE~+~1vAQX7R}xF{@P#LO;>mGGOm%zxF{4np=ZLAn69t|lXEYYiPvm- zt(nrNf7P1p{^e55n`RI0#0#vqXgs7ZugiURb6Bc_tzqE5*@qH#Sbx}M8T$3@xu=y; zzuw+j9bQ>|X9ttN#&5&Rbzv60``(z|y8lYtYRZz5rUg9n#Eh%Hr2c2K}E7hF^46_!r{65Q( z9PDwTAwd74Y@W>Ow9Efu>{BJyuZrEV-z)Cpd0(YuR+Vp0lGQVIF3j8!edo@(d%bSE zR{foEY~4q{S(BJnY01U}pFL&}^T2D;I%XT&ll+q{E*`f%x%`fHg~>0gd7UdiA77lK zY1X<|`}MCaFSoE?UUaLZyV-$PZ*AaIspV78eOcU5svh)C)_+InzCD$EJBzOJ1}rt% zxX0GYmp57ZTe8om<-u&z_A_vvw0_vITd@B^xWwU2nr^EfMu^oMf3{3|^Xof)x2wwy z-<6(bzH!!P*|(i>A0F7My$stFHUE|MH0$f9-u}9`-!t;OVu&`qg2xv&_S9^f9b6)~IIrt*$X?s@vALD~_fXIM4d_Y3CAAzRWm#kh6i%p`nM`mzL|by|}gIdsv>Vh0a<%Ewh)uSJX=RE%AtkJ_Jq$rOEvyZuV1X8(-sdQWQIP3Np#e&TIZ=4aP(-TU3Er_7m^ zd)f7#;8Lx*Q3s`YFE6)R*tco>i|=oLmtD_vxL7*%t$t|hudi8)u5CP@artA_p8Ls< z`|_{q|J;1}YHqpi>C1oZZhgPHeb0u)9=A>3Wv`gEb;dQb1%Gzks<)aQ`K@x%iT@1q zVs%cRUba1V_ja4$N{qNQ1@WQ+6*7dD*T=d{YW{k7u zw96+quKKDuO>9=~B_~ng8NN|HpHAtS+9gE9Z7?o)DciSV+O91bQl?ygd{-QLZup-; zB;0KG+Etee%*$^V+{wKmlMxlR>e;31|K_iH_W4S(`00-W4fuS&k{U7oXLW7g!%T-Ph5 zdg@oM1|B{B;ppA@wq=VGUQIPy-B`E4Y%;gJ%lVo2Hnp3smH+hT;uX!cCZqY0Lpl5qeCQB#W>dO@~o6Q_0yHxIxbFI61Tu0u2hELr~ z%CqhL^EOMHo|d`Jz1xjdH`Cql%9X9ZW`DHEy80(@viGxemv_sPGp^nG>Kr2WdZml? zmFoWtm)6vqd)w*T1ZKv@h24`ZJ>csVdgt5LN!iR+ub-ZHnY-m}wcgLn#Q4h9FXQum zxBB}%ymp^=?pg&~`|jv3`)0q||HCAITl}Cx$&#lUfp(n-=E{bTT2Zd#-Dw$satdJ*O}}6mwlg`^ZU-mxSLj|t6sdW zn)X}u@!qHd%{~TV8|LqN5V_XmLW|E74|nX}lt|Ezpl+>g^6 z-tITjl`8u!{d}?S)?}X@3#_!Z?6%`uDs%m-*}{X1cAM^)wk3E1 zdRA`0D-TCw=NbMbChRO>3t}veZD0Of{#A6Y`IUmwEcZ*R-l$zX^@;sf{?Dy$=RG4C zO%8djw0rT|Evqmn(lmH3sHy+rfPkQ)C4-UE<`#xa9wA32Y&yhj()XzA>RJC50Y<9; zNe!XzGm-=~IJ9k+&rwv@4rJ3xQJ-~?gR|gGWI}a2gkA(NKFiAcuzOX_yq~H|KEYTyH6F?*6 zp-;nhXlew1o5RGRZL}~|CGt7Lfp(!(qv)f;5-gcAsZ*YI{cvee;7D7@G>5;ugi*l!~d&~MGvw}R9IB^E>BQ*=R_8kj8EtL zCE7M{EjVe-^;@aBMC7wjh@_yx%i}@X-}lKq;(r+19{kd7+0qR)JNFgKxL&>+xFuSt z)b?o4ujCm_&*e(>oz^M%?4Bi@r!~ud*;lcy<^_UF9j{IiIIO_5ra|IE{9V@B)iLw$ zP4)D=&djvp&l6+YIW|gZS-J|3d@dXRP87Jta%V!r(-jNeo>NQS86x7@`J*~Xpf2VF2` zE=D~Ij%9D`Ew{xUcHQ<)Ha*~vKiAbiPMY!Nzb4+^_Vce^bN|aV47n*rrRhobt&1#o zF+94ocWzUWVeC4Y)J)5(=Fo7=WySMaMaK8-FPtb z%5t~e@^SN5`YxHi;qe6J{qK%lJ6v|hY+rWNSBc|ltFli-%~*KKG<@p(;;5zN4cD(| zJupyHmk!#_&i37HYk!Dakzb#M(cev9)BRqjn&0LUWt_B~?~(Q9zSEayZ@XWf$GbLc zufZip!>8=5!bY>E9AqhtpLA^X_t}jrb~Pkzn>9(wpY^fC`-nil!pdN~OE>P`_1OBx zy6&^r+HCvKW}f8sn(M1;cw3eKGi;dMRe!{BiLmVYRlE9gUY@op{m!*$uGKo8nP-o; zgjG#W+3;#_+veOkw|ZBd4{CK2)wO8nSas`WnYm`@O7$L@BAW*bb~HCW?$SE$_cJzd z|MES{mwRtrw|-ZYp#R;1{2z;F-nqDA%lfcu55%r5@U`l-iK^Rn-0Nq+Bj2y$%a?FC z2_6wT*`NN~pz8D8m)q~B#J^Umo87J@d;ZR;`pDH)v;H1gsh20I?XuYKkkp|V8HQ&n zPc9!+uD!lR+I{7FiPpGVd6A2E7Ihs>U(GDCP%*fnH^zF(!<+LzE%f`l)TZ_K@9r;8 z-%AEQTl`gjHiM|c$F>G8U&mNa<(7qBS$#%NPs~%k&9JcC(rEfO*W7KdKBs6pp1l;@ zw?M>af_t4dM=RsPI}5~FIa(g=DQLVU{z9OC8QX-exyxG=1Xy=XczQsx%!wt--7{VD zufUeh2`@dDJZQZa&li!mXoq*h-{lNK;=*Sxg)p^Nlso0RH+aYND!j^?qT-S+lD#DD zanr#NEl$fxojr_kZI}KtlWR85lY~4|yi1FXYQtMg4hpfe#He`{Hpo0xnCqfdcSymU zt8xZ|Fayi+m(8y>9+|2u$@=z9o2Zf0b)}{yUcobd>|U@XSbC;#Ff9wX#`MbF{M63Y@6NLeLs*24xjfZ4>~Lw{>fN4pCmnNxj%%#& zJkm7zee(v9-9DGgnrc=nIm}dl(V%tNl=+~^oo8njF;3ckT3;u5+p{^RS2hJqou@Z_ zTg--oMHeik+Ib8cmv|SSTAuV`_P?dC=1zONEqbn3^>MFXq1(Po_3P}}Ubnq&(zjV# zLNC9_NX@r@=I*-f{x0X0Rvz*1&TlQd8`xL(aC=-x_FT7J^8ZAY07tX zmswe}O4UD46)HWm?W^C(4Y}J(=WcjE#hlyydilakhtTSk7i`XyM!ieUv^@Li#g!M= za{fB+D7kz4y_xs)sO!bPR;??(?OfUXa&PAR9gn4qo?GZGZd&rd=eI>cFBf;(&7YHB zIo>(8F~nH!yY}u=>()jstc_}!e#N*_YwyXklatu4>EumQVEJ6A-1tyXkGp5Grqe06 z8a~fkiEBiKmRboE6_w@`7Iy}P2+TP1VS0+1`9qHI>FxWNPADxCcw?Y3DdssZ$L5mM zbn}AsIh=M&d3`fZo++Bx@-tRkR7LS%s^xcA@g-jGeXltEGA*rUJf?p(L#x%;L8U8Q zH70rUq0$YzrQ;*dKh?iizUr@YsgRS$3Sl9JgOSQzZBrg}RU1YWH*kmu_VmTLENEP6 z>ciOi_I+OlYmV>sQD9iEt{CIL zDR_Pk69?xKEv=d9_xb8L0$5x;zl#d+O;z#qIUv-)&|v9sSn>Dg2Iq4s>p13bm#7FA z2=sP0VAfAzQri@Fr}1$^o8r<10-Fz|Z~4!VBETdruwzckg+k#b=M+|7(v$tvOU0b~M$!&wtdZ!64z>FgxH> zqWb(2v&Gz93`-seObRG-T)M>cq@mqmS&xT>`wp>8xl^h+LDR1n!dZkC^tx~|zK=pcH6@PtNuUg@ECi6g*Z;*lRNQ?)Eb-8h626g)P%dfi#9v-GRMYG0$O@22Zk zU*Em6Y}MCSQd7l^-qg*=V*0jofyFZRqYe?t4v*5((zqV&IDBupzocL7Yq2#G#b?YB zX32JHSWxh;$LdL1sDYxo-HFqx9CqP6VioH*8D@)2oZt{>`$&6li(7|JCp$x^_3z|O zf4Y|)>pk}JhUtT9>9!S*RUI9bI{a&*cU7-i^~Tn|HaSzfd%H-*@fjI<|HO`29tsz` zu_ik1(+QKg@y_X=y;s@4aYJnPjK-HJ3CeG4%e9Lj}94bKF+E=>*(dJ5(gbut#RGEJ-GJD?6u2&*8laoHF06k z_J0o-^G43tRI_$f^v;-uN7pTTRXdgJu0N80?{3c?+$PECXT^T%`LuVX2bTDn8!o=$ zuf1IHp76v@uubknZY1JQ?Rq(V_puwkCPg`VpvCU4G z4tCBJ7L0v@B|m4~m34R&yYg7GhoZ~WgDXXrCoUG?c;@-=Xp)V29KUG2u940DS9$-` zR8_;h=ldVFqEd04We2VC$FsVuN zoV?ovxSC}AA`dD2d!fM@WKpIkAeuI5&Yr|p7ADR^LI<1}GT%z7s4zLEPtQ>hP-cGD zvdO{V{60ow&Gr;c>kXWT887*yzCTv*AnMVB3w!rYI3w`*_ruE!Q>S15JK?~FlAP_& zmTO&H(~@S=v41MVvxUZT2VOk4)e2sbb4#H473-TX_v%)>bP!s~F`>-x>I18r%RTK2 zyQl2=&+zcqF~yKL36F9wv8js1^JLPG$vMqoTEC&zz*%%h;w<5Lft!|P@$#rlv|~BM z(xlQgceBu}!>0t;=A5cfd8o)ct3{HJwYZHz_?gSm!WnPcGD}YRC}}XcsT{GGu;7ov z=}pXTib39Y-%eq03lUl%;u_fguAN6f=97%SAfJvC1E-OvtA+CxGk43%P60-lCkv*$ zi924)#K_0cG;@`o{8GLtyweRNW`2{bkaoQn;g@YEee>E=Ce?ErA)&Pzl1rd3_Q-yPm6qL<-71y(IQnx z(V6-G^~a$dGbQ}*a?M+CSkr^&$ZwYgSMw%>b52_P==-6Bu&k5`EguE1{;p%;%;5E$ zaVa9FKv#Y6_a zA5E*e9&{>cyzPu|2za*AsNkfRTSTJew@H->MjmcjZk9`Y0|dA?1o-To$n6jpu)w#b z&|~H+hosLU%)2GFNqYEHGsG$$^I-a+W9`+nS#w_O1?eS89$F!NpIHRD)gGAi+?kgz zu`{KkMVqN9KtR^e;^3-xnWr3Wl64saih>_YlrHV```VRXqwJF5d7@loj=wY0!w$u; znk1%%a}{3oF}^OVnmXTi?fB0iaZ^)fg-}rZ!lDj2#-c@fGVBs_FL4?yHuB$Pc<-uH zs~>w$bcKh*B!?LfCUrkQEj5+xePmDBtkho9%%-%K#s3*v>(7b^bTJ6a<*9Gd->>6p zyyvQltkaAO?+#VY?)hjSLpaNqb-2HS{QW`qqZ)s0`OX|Gs(CF6yO!6|WcA zDzGr6ZvVT6U`4}*cP)mK)-fLwIkhXx<@uaUW9}^;6YXy*Xsui1$vmZ^!(D)9a({V> z#^al$-`fB5uKrx9 z=~i9KmQ8WCSXI7|=We`9#`dE-wQMGyTY3BHD$6KOCg+|{J=1ch=g(jEXvs;DiX~;M z^ZGVz;a#S=LEmgTqXD1cro|g>W)yERT`Qa&#isC7a=z)cPHE+X+g*2VD`?A^zSX4d zlH#haR(81;lYjAkYk26h@ZhOt%Ng%Ywq*;icJ@BNqAay!%7G7U+mn`kEsbLAVT|I^ z^p`yJ=gMhuzC-TJb>~Y2GS&G`&Xc>rf9u_hdua~>zNx?O`&f86ET8d#$;z1wN@{X~ z!oQq4lU`iav=ry4pFQi+l6v*jZ!!uut}!qiV%|Jqdj2Ny7q4fE?O1Z?w84ItGWRcw zxxd&=$WUQ$S$dn}%TxuC4NY9OYbVYZ^{}`2CG@9v-E#SVvtw9vP6#+X)~Gh?Zr7VO zRYK`(Vc!9(ux`e(Ngov^O^Ca3V)^Be!;L0~J(ayvE&VyVpLwga|J2qto_P7?>h^}W zMh^@kPUd^Y>|r`+{Nb_e#Jjd$Q*Qe%=yB&v7v`PRo!P!`vj0?Be^Hr4JJ+*pJH82> zc(ned^nse?O^o)85>t1j{hPdHn&?Rbk9YnXwKDcEw%jo7c`` z-Q!xcPJPDGV_U8ts{FU_Zn1-W)a=>Ig(iJJraUW3{mAhJ@4iV}y{XpP=$LQlu;aPN z-p^0}nrGF{;Hl$Fs#ufJZfK>sUfA>;*W$AK&6aiCI#0X)Dv2d`B_-Pz-pc!SYuVJY zL<0xS!$K?xVmnu_P<+rSb!GBVs~PH?u9v);EY}&u89Zxn)M~Z;JApy_@T(@~J!f+c zXjNN$bLp6^+w|bVeI0EP`#Hx%H3XkuQWtEFmG#(Sl4+MVQEAhh&&MqOGCT~M$?iSt z!L#W?916{rW$vjf?CpaR9xLAnILT77*Y;=HE|dQZXB#I^+t~eT);GZkD=rCa4VYxV ziAyAisiJJMBA=6k(}J@V3k^CHd{4YQp=P+_6ce{wQ|M{6HG33`CK*mRXnaoY+DV_5 zsdv1*gx&PC7ATzYsw_XC*uruseO9o4`okth_eEz!W=!(q%;e(U%@}L(;V}b)W8pK- zIR*kwD?5Ydzcpi$+faNZ-~{t6^|u1=;u#IPzaE;Q zFj=Y4V$w_<1~<>RAdXN+)klx-v#bm`Kdbh0j=+VN%{u)(mTR?CWQ_IHRh_yj4lwh4 zVo?(LRWD*{v!KQ2Qr($H4hy#I-aebrj-ew%rRjm%sz<^W3C7`C*=NN!cO7b1*{IRI z{69mdq67Q?3r~ONT`k@hFYRr%*4Juj6aO;Puez_It(J1Vy&N@Dcg1m!CRIc6`BQXq z{Z{L$Prs=0(DGqV#J;=|=QVTgExf&Qt<)0kTghFYu1@&RAd*}D@?>G9#b4#y5uxe9 zleR8V(70rMu-!yx!3lxQ4|bemF1=E))Te4|tl7qeFfPJiQ0$0HH7kMw_oL1 z##zW9I$z^WlSlCZ8;{_LhjiNyKXP|g_|(9s@K9%xqhVVsL$Rm(`<4gnQ*@p`NO5ty zo@G44h5eAuS{84Wr^k7OvZ{R&uZGTP&UCsZonYb)a z?#gnQxi0n~i@$%A_7sawaXks|kYrz`W>W{A4?VMjjU*mUIP+Yz@Spk}^NGrB(;8bE z*%WG~^jJK!IBCQue0V?KjgIY17QXd)H!c?@xC)r`OQy>*KdsyU%3NW?&eopiXV*#X zztzR$aguS`VvYSZYt?_PJ&^s6ZMEIykTZ3{GVu}dzyA3@{PigJ%YTMfYs>fFOyfUu z>EachOK;OJX3aae+9${>O_;O5W6z%pS?^0YvY35NGp#oKy{Nb4@=+028TL5Y{@)oj zNBvyyA7P&IW0}FPkocg*nU@o5!zbJdxnbEoud$ceHfrGv?am2D`fnf8yKBEa=vIc+ zw{({kZYR?-OpGSvG|sRPsxQf#={n`MkEQ}kUhKMv zzV3ruOer%GC4H>!NiR)tU{*>tnYU}xmsP(7+jLij7p{t7RFz`W{OabN(IOQU%wl}F zCDrJTc+r9^CB2%&w`tRrZ#jtuHo1rt>J(qv$<;hd!ueLqdFSB!Z+qG};a?96AM`L4-)CN4iq?hgO?6lxe z?8avQ5}t)?IWBoM{|l5bwVJ@ISAC#(L(0Sjg%PE_nyW=sKDp&an_n(EQS7sjgJ-G4 zR}GUbTOS@c!E^9q!?~qfWTwV(R=q6b5Cm$9`iMlLq9`?ngD`{<1$*qp&nTux{es+&v)%w18$&z_j*evx9 zovprhi~VS+<2gx%isHRz`?fEc;HUk5PHC-erP{AuGVh{~X>Ul=VA*jpWv27N^8p#2 z3_KstI5=@|>siRp5xa!L?WjbemurE?;P{pS%-`tF`Rfb zfrCXqb0VjPtJi`6i}OdEm;+cvbo%T!FFdL~sjR84gCS$`lqtbF1yeYEOg$pmMFYIb z9A~O}CJHI@-8@u~Ij7@L_RTpArl-16(iZcC>mmlzf-4~W5dt98dZ|cwVO(kp1!dKkC7H|DGszLaM)DG9;XWxU(?wym_#w>)s2hUhBGP@9L!GI~qr{79B{R1LFS^2Y5<<#!>gx}S?wk5E zdT;*IsqZCnayeOJ{~X_z#q2*l=&_XIiS3_~Ew^kqzLL}NTE`N}E&V1_B)zvOvW76L z$lmZxUiIhTEvx#)+g4fU-F3fuM^B(?$x0r1Njsa3r(UuKEOVAS>a#Iomw4!rxiY~A zLxS4wXouPMU8tC_cWsCEib%nh*!x1Vsgqe=^VpNGqaQ&P{{ zMMGk)I!*3b#6Icq(se2mI2}?|%WRZhPZx^lROq#uWBPOR_oMHhR;GWgI zMbGXD(#Y)7a5+`|==Y}`q5oDEJzLhR%f~M3<>9LS-d*|X{eSx>+`sdmVWxDgZT;%4 z_x>I;KdWYwf8|!Zy+_eH7Hj!OY#MzW)rNKwM_)h7erdfzY|iNnQ-9IIOY?p#wrE&q z%pj}X`2IhGN#KEyhadklyj}3DKCpb7+u>aMWvfna_P?@||8lxZE7STv;mzt*ak=b| zyIrTQ_4=9-SnKrdQsIhHed7z013o^}yZBGzTFKvuJMT&yGUc|4x)bDj{@I&n&knyk zdhy$pwX6nLzIFvIYfY&+cgb@48>hX6?*gN=rrm4`%w6Qo9p$&;!iASVS$^*OJgF-6 z)Up2zA`zKQ&zX4cs4izHO}+ooZQ8w!+gH4DjSli#_aX4$Iyu+l!4vmPOx~QfElW{z z3)|&9hKkx1CyR}9_Xy5*nYQuQteEGM9ON>SYCmeOEP8lCWcjCCvvyx~PUh>qtg-Ry z9Pat8W(G|wd{TnVMH$Mr2N%xFp2LyxHsg?X<)Y|ViBtV|UbI=R;B{@MGa`&!f`2 zl3p$Lo?jvtt8jDWJ4I!?b6cxodRMOP>d#&_C*WvM@41Ig3w6`xYRMcp%FuI=$yoE= zy{)GlGhC#mB|Nv#extr|Pl0}k9Jkb^;wc{BN4x9z)~qklFsEuJ-L)-&bU zl+yi|3RJdb?T}-va69^7=FV436kMkFm}>Q|O32>%*8A39Q>L}%Wx^7n>wF!2W_@}T z<0j>#UXr8dDCOO;>|1lk#CdEc)~zR&Jd$X=!DMjIqr>c#XJv;u*EUofRM66s|| z6VhUCdJ(E&mbqP@JzX}M7iFxf_0p@VWbU(gBv9DdxmLsOeCZ9a|W6HmzJH>Z)KPu%al@74&&175=k>+lw zZRFz|adp{s@#RY%gw&|=&3msC{H^!&`u7>#clRCsDza)i+cgun>C=9;8a+3Tn_fF# z$9-q%^uWk)>%8~dHeK6lbTMl7%4t{fcCO4c3VqDu)J{NILG969fZv&!9gzE?H{iy_M@Y7J>G_UD}y@-{hCw*RxB#RxA&hX|_ON@*#aQ z^Q5^;Ikl$jZ{=oMu>X`(Lruv%A-J|DCpMQPq^nS?&+Im5=YvihjJz zqPpXnKHE8qTc&#JX8O(M^_uE8D{-OUnwBc}r}2L8tl5sdvhFfknl9D2n1QSCoJDi~ zw(htMOLyGhHS=4&#^8)`piJ_m-);|`i?%Kg>veeclEp$MRpe8i*|(>=&v@M_$hS(! zU8fm+C+fw{gcTAGqGSyg99vv|H#1kvbp7k4)>S%slX#4KeqC8-Yacr2%V)u}|MG6V z+b+#&^(~Zh%c2LbS0%{0HYj<&nX%~B{W8PJi`H`P5zZFdXmLI0eR=e|=kbNRRI@EJ zw@hM=;JhULP->P_Li6f_4h%=8^FPfLVoC1ld^p9`<8#@A{ZZ~!ot~~K6MJ_(zEYrC z*>a98SWqy3W!PLZh2jps&l)op82{LCK;%Qm&ekx&?ieni$#FM&-zfcVOtUV!z0PcR z|K&R@PqS}TG$|Ke{wu%d{rQZAE|(X`o{d_dF;lE#*Sjq`(6Y{;sdtKP#xTn@vKIcwOwH99&r#%L~gRB7pzWUW_O zI}dCL{joj-c-`@EelzwmGN%1u+fC)HXd2t*xX z+WT(a8k4R2v&_EtFU^nL`f{h1f$6k2B^TOSW29ov#m4h3&-`6D)Ay>`@3fmQMOIDN z%E+ZT@hOi_ip_qWFmvr)c{?(aIr-VW7JSUVd(JqoXx_7ofWoc=xz|3N-8ECsZt=|J zuIhp--|781)zT39pP~F=j@X}r_wJm(91(HJDc}lkt-Da?v)}Pq+P9~2O%K(~SP>t2 z#N<-(ti5l3q-|{ZefCr<|I1~Mp7m<^ub6i5T|)N#rCaTFD`WoZUR`Axy_M57Sh8&9 zyV#xVvjg9_y?egas;m3H+v=m6Z>b%>ta!L({`4H#eUI1unzVSu+xT_+;-7J+P1{g# zJs>l=zVo8l!ELemhvq%sAG*5S>C)ufoRwGfiZ*q1<^E?lwIt@PZeM@mp7^)R=H1-B zbJ`Y7#;|v*D`TW;wv=p*I&Nz(HqEtYXO{4-MH|jecRKrf!o6pgD%ApB*X)1&D>pUv z*zC8rzGp^#cMKJ0vp7=!O+DCf;iA39ftMA+6(1DNxp3_Die2x1{o2gFF5+}>;FdcE zibs-vB&{%)J@=oXVBO(LuXD@VrLMlu`jvQWM!e_#+LKFr8&5r1a&=emR^gv!YmZuA zp7#9W=A5eV^*N5U{1FYI`=;hPCGyXhk$7*{?u<)!58b_bU&HO4W{&)GRwcCH?6e!*Mx_H*Tb)rlY zPd&2#sU$r0#Gitm=}8N^Al6 zhFtHgP03qc=oTF9&u~#Px-%zhsl??wnY}wNO;-(FyiE1g((cUA+%Taf3$)TUpEA1h z%tT4lB|!D9VMOXxt$9;d=q_=Xn6`9UvS*KVyWy>s`*yBe@{;jfq2GtcI!V({_C$N> zOxmoO_OH2m=bf9E?K)T8&C5{Tb!h4v86i#SwhyUtIK2OEe>VKwORJk#7>=iYF3EvUbeS^LAqCGm6zO) z$Pc+&T>bKGbcWWeK7}dY{Y1C>ed)XDxAfh!&xKBtJFbSDTfD<`!DqMACHLOt<(}BL zTSUXqBiSXaNDSrr><8@itrx${PlP1(xXq=uikpOcdgjk`sJ%8bLcO9_3}w= z$iI~Ao~4B=j=t?WH$x<5Gk0L|tov%ePX}Aw&)Bvqa{JAIrTz~S6IdFL9uCRP`1-cD z=yK5f1zxPnDx)^3%y`;&cZGSidD-n!DYLkJyQ-tNhAg?|ecJWC>Q*iOZU15eCweVk zck5EN{OV{E4~8W>TZA6XPfZGbbTm2qQF!`{B`>$?8dxo@^2s%i%Qfw0KEFDw)yQOz z{$~l9wYmPgKOWuI@BPI*;?1G%sLLhC)L+|AFFHP9*J~4{&-E{l3*6j(>y1+1q9~un z6YFFzZr^_A{n~Z0<=d{6t}R#`?KdgP-B?!iR=r-Xr`r7X4ZA1iKX=TXDs{BJa@ni7 zlgnP-J^kzlY{Zi{{Av*MScW-!CvcN&b! z(`FR@IP8*VE@i#$X=2o)4_12d_x8$_aG$yMtYS&dci&Z8 z&ss^(RZ&{NzuJc{+4oW2qr35`doC{9w0LLq)zr%xS5_T*bltEet86Lj*^sAxv3%Xz zPHzv(jVjff5;8l|V=7N}KwwH|{-GcG_qz*bMJ#pqbx-eksqya6*5l6Mm&~TU-E~Ks z>3LUXmW=16SfAHXsRyN3nD$Ovu{CtXb@3~!j*Av8+Op&WqtwsZbMf0(nO)J0x?Z?6 za`C0f4y;R68sGBHGi=;(J9yKMm3i0pdkbH#tSH+y<i5W8)DmouCFfLf3ZjkzrFL;F{ zuSW0=_3Ipl2NDECrYyhD!n~}a#j3c0t3X!V}*&sW}{P?tFYmfXhLHN#*1bX9>?DMa|W!55Fyt z^<(i_$-02o^C)|kqndk=x&6%q78apK5vJhnJq#vn^B7LwXV}ubRJ_Gz9>TBcd6%T9l?_V3xK z>1Cy>c7A!;HjQbinCJUGleBlj-#T9ZXHeO4dG(I#g${QWeyhry^SYatAF6R-*XboI zV=i9(es1^8Us;dtoiAJ^{o1oat#Hcgxy$P}R&HJEw|8%uz^{a+1y!Cdl+Em64w<+pJj;+qSSMED)b$0G`|Aot{qP%B5y4rcX>F!pQm-Y+l&9oxc z@bqoEvFgjWpK%8{7xAymi8WksR{dMDw+oAew#x?A%aJDvd+!I8`d$w22qs8#ss37(#lm#qBorbCN)g@#??d|_cBHbs}e zA2}8Fu9%nDIrETu^@EJAAAL)nu|-Bo^%RMj2pM&~j|;ss>vdMDYX{@OgTX;_6}}(4 zdN(#PCwI;2hF^9wt`#!rB-meIQnFJudUNGZM?Aid3R6b zQO+ytCdSzrTo7D%prKR0X0dv~#`W7{uWsIv6|(4y%C{4OHzl5~;F!{!j>8&t_DpTQ0vWVz6*U_iCoS8!LY%ddGc_T2MCWT0((B z(%!Xw<*S1~$#nfazvlXZf2*!ne;4}tTVwZ%h->u+ufIyU_Sx{)ezvuKKW0m>Yq|Ym zI$){m+8um3QSl<+FdBuyChlH)KDt)u;`UTZ9yu5ma zL2IARjJxN5RN#HI>)ERfdse!1|4w_c%42Snuk-qkm-Bxny-fC-n!T*5B}C;@6W>Hr zyMP(Tu9t1S=s5KjcddH+teG!u`m^`Fn=DkmRBY+0l|fr2j~-le{Zi2ExL2pnPMh}Y zWmm{e-=*s=U3zqFuKQb7J%e@nzU!{Mwo<(+AA024n$Y-wMLTwXyDWKo?ZmIVwW$Rn zhdm1%q}IKel5yAJYRVm#3zkQXJJgoEew5X;{xy!hbq<4<+i$FAI`oq^l7-&-k|Tk3P?azmXpK77wQH(0ahGG&HPKYFNi3JA?kei>+Fi?aMr{5PzU@m&HwdiQu_VS^DsO4PwVnJI4)Lw` znWpq0=A7Z=oZCWD&w^^JrZN_LYfiY7Zt$Gt+tH1a*YRu$nG!T5_`uW9s_Hz^InlYH zCvEi#84XS;FW-D=(Uq->+!mEGt#f{)yq%jnedDsV8<#CjOG|IKEKz*urY~@!1CcX=~=PGb$RWB~~&s=q7$v2a2E1$7zo@s80 zczJt6ShmTw8I^@RMghr*@mYLZ^<23YY&pBaOZ`^YQcLdxJ9ixG)|xf#?Uk32TnT%< z`xKN+vZm#=^RBybBgj;fl_z`7n^UXz?Ko2_ZZvu3(skG8syf}wnttfXXSd*9tsc1v zPjr1P7AT*7$TV?Qw%w(LaaXsz;+nQf%ylV$=B#7UT6gYR`6(@)vsKpDfcL`9*5Bva zLKX&oXPewvEi!>Gs=%{TVq@gnHJg*29V;)b==1eF{%)7mjvs1!glRl_~_p1S-fFab+4p6 z@cHn~?bW?|OSewtu4AbFyE6x_j-6)u06*93)P9Sx~6pgt*{WTa8o@$9$70Cz+v2;3_3Z)rO zzv)rkz!el6V)$9&5**KBDy6`}KR!RG|OiHe6JU8c;6 zDOtd(ZsKZLUK1$s$BBDJ-^8$wTpm-BLsX3>Nft)TTV#1;o3}y1k>?WcXJ~Ll_WWna zQ8M3@k!{#tJde3EMDje}jdQolYftB_I5jQwt}p-8lR}uG+OC(~jorO>>N|;RcTTRWuKwIxs4Ex1+%}2}_lEFl%X<)V&_tVy3g9a=nd_tj-m+;Z1if6RKg`tORxivwDphP>rG zq;Iby{Ohbk?9yiw9409Ts{7x|Dc&14f8vumeppL^_vfUDv)?Aqd7E$2A;X~N_b1?Q zUQS7-8;6~ zFWixB>7gY&{fhr)i{t5MUxw{kadbUfUdy*%v-8zn{AW1jeQ{gK{L-kkZtU!QX)l8( z2~5n`b?erJ=YgWOvKk%tLRiG+x(3fEc2oDbvG3l}G_jY8QrDR$a`#tsmj5kzVR8A& zsU@O{N3>2A_MUye%4%{;s+H8*bz9x9>Dm>cz5)J*zi$bd+`emTD)H%o&ZOkO;l#jJ9qW7D1DH?HDocUFDQTc)d(w~X5(AT9RzhNE6btaI*Zzxi@-x!03f z-#VWj4)rR2EN=djahBwVnZ0Jew{6?1b|;u6Yzu_8;mw6% zfzxjvKAMt}`)jWA{z3TkTW5A_UNhHap4t#Dlg5>H9hXafmdeHOu{>-)Dl}EIv2=%9+3kw0 zjhkOt7nkmRJ7r?Vj;rg6xR{xjORAnNWT}kdc{XF>ob=esOCy4JGuXaa@bMt`oD20g zd!kBgogSU)apP>il4{9g^13)QdHsumyG2iL>Ni=3*W5hu_4bv=H#cl9%3XJ&{aV z)!A<*k7(rrMkb~P{>1paB%WnUD?}%qVhGmReVM~pSZjt?0E1ZO(STYDCx)3x3p#zY zI*ej1o^vdEX#e&UgOK8!>G!I8_>w$-%+?W*dr|O|b*4~DO5x)}(gu$HM`kSINnqTe zrVo4+B$;PD=DcI$^s$Ah@uT4V78M32uXVzQj~p;zwOE(^)v2k? zePbY>Rl)mK2KoP2AGP_ZNX}oqPrh$uL&?d?f;kWO?Amyq?=pWD(}R6ag&)hxdh}&2 z;*^sRQV6NESy1fJ#5FJU?+1yc;>`9E{>3F%t^_khO|D?&E|{h*H2D*Eu)tZdtip3f zj0$b9EO=7d3K{WAhVYXzbwRbC3!)?ou@RaE^P{(ommvCUwr^o|=X_ zg^gOt7uZ5=7Hp9bs_{H7aeSpwOovO0pU=`8ixgKZunt*zy65l2&^I#_*BxMqJ0c*( z#N)}elfg#erQ_;@MQvLa9*W8;`;2SOC)X$34Rv=n4?nI$r>-eXRTUCfng^GDJ?nZ;_2+<^iA*w>qctbO}BHf(9W z5tbMRu$3~nybG(Uo_$G z^QCXgqkmbqK0fQ{ViCL~jnOJB__vbY>{6@T+b=I(;8i%Qm*$e--kt1S@mA!4obZ(!vvaBjBRQubt`sH`$9OE>w_j9=H8&dtHc>9lJ z?_Ggwt3yl|mNKj;HcVbyaI1Q+8WTeX@2bZJOH&_Q7Pu45oqcib3QrY1mQ;;(Pr{zN zuiV{Vw$gmwty(p;`z&g?A>xx#39c87dQX4aK2+Con+EZ@T}xqOq}yoIH!P2}AA z&q;`Ot$n*Y^V+5V3}5*sHKf#kI8-UvGkf;kE7Q(BT5&OI_1%~3mTM9YUuN=qkS@5Z z`_;vj9-FT$hXxTBDX%TM)gt)b==+f+mmpZ+CZ|{^hI%c_V>E(YM!Mw`4 z*JbMpt#x{n9h7&OxG!G2NuT@R&2w&Um~LV6NaWS8$DU3%ub0La#`{`juRZ!{diuRqb(cwJw)bpX>A6cw z_1;NOwzKaFX7Vt1T#GA+J8?WY^p@wdsx1sHS{kP}sqa*LlN)f~ckB5T$7W7AU}9L? zSCZT$#W}%(andrwmwUzTol|?8ZfT!BwQiHm^|g2JpSit6DDJ-T_uk8m67wYaZzegK zIxLHAzRwu)L|x%cpehGTP|994cPP ztlA7bMO!A-ojiW`%i)VfMg~_I1-z9zJ*twVc1->EnL)6Dk>#vo`|gcmK`y@%MOhj+ z-fB)^Ho72HWB9Rw)ga)3noU?hhw}7$1)fd}NjsOfsC!J3{d?hnXWyg+g6;dHR&nHK zS=%I@V@xrbvq7!mfc`{2+Z`$ogvBPR>sS31xWve^=)*^Lj_R$PXDrGTV*EK6gpJi^ zGOIeyGvMV|q@Z|q-qau#bI-h>v&J5i5>~M2X&bm-6+AK5)5hm1594Z~{Q_Du{R$Rd zzBFrRaHmHAO`iBV_PQ*3NMS$ z5IUNE)LL=r4LNq!L+RfS2hZ5zlVa@T*}iG_%U`|A{xf*Z&8nLkpeA?Lw=#LMQpELo z9_IpHt9_aiHbwTQxV${kVl=Nt@!bsJ8Qg}qk6SuNanE6zc`lXBa^8t`k5@^Z<>>QG zIOUL1qu8s`4W*Z^b+{m)+~|yl{m~jPqBsiBHvq?BonypIu*hH||>8)m(wA zUQ_0Trgq0EtIazcb!zgAU&|i$&5ouph-EjZ3 zYYP~;{9NWNnt0Xv>|V9dT%}9jUh!V+=-u{u=ceA>odvNfmc0S$a~7?>TwT1q`}3|C zze~;C8CzCn&;Qn8&&?7o8hT@+PD|Ofpxp1XWj9y-o4Dy#(5~#2A=R+~>(m2PeXMS; zU!#3pWN+CzKK7|0smFAx`>v+$xLw!wa@R|1b_G5e?&?P?{xeLw9X&_pR{tfDsg7+- zQa8W7+TG#hyGH1v&a}Jkd4KsUH9O|TB&Xf|H~Hn(T^&lv5)1E^EWLNt#X5iWtG3ou ztxZCQeZsTzu9qFz_~dfQ?U$kxWuI@8YU9t@s&pr_Yvr}XOOxBpuAHvDq(e+EqkeT79@uC0INb7QC6EDZb?TIA`qUZZte|G8r; zFW*YI{5orQ>ua4info?(Pp}mZFbc5XxGwwe+4O*zx&>CVw{D#`bE0knZ^Vwbv(@fy z$i%o$=AdU?;uE`Pi7N~D9eXiV_+WXj<(h($bF&ux z-jerr=YsqG-T@5Hw#RW)RG4>eB2f% z@5DM@kxtI*4t<-nRpsX7#}fK4Ut9SXEQvYgS8&z0i-BYRKC`fCVp}vedt8!wZf#WZ zKrNwEx2TU#_hL$K>CcQEI*iOx*GvUOGUQzzO9xz%l2kSq)rvl_ewu9Lfh((*Gd+sh zeCCK-rb*#j`AzPd9^U?0lO6DqHSYVf^xmg;e$MIM{QFYf(Iz>Y;|l4&b(lnJ)Z94# zGjwsXDC)+#2FnGJm(MB|I*JZx}KN_iBlXra0){h3UW=GTJ?k38M|kKbfG!qAk! z>Lf5{_X9?TAV)S%-r^{+)Rj$4MfUrI=g2Hb5MfB0?2)OKpmM%UfZ=0f5oe-JKTjBA znM2FzrgDxvh8+TXocehbeHhf3_t$5xJ`^K-Ohhj?g4er$FKA=k z_4{A@y>5S1JNHB?Ut^11^JhNBk9f z@<2rKm(u3=W#6*3PyJph<#0FLYR;Q!F_%9btF}3wb!XAlj6Hjwt-N_(b$RflceTd( zm!rM&^5tLMj}egeNf0U1pQahwsdqf6AxQUc_0B7M@7>Ld+p^Y)ZH2?*n|}=bOYQ{o z9I%uLaQEViS2CLY*<1Wk`cB{9K?f6D7FfpWu^Fi>Tz>k9Z#vke9Fn<$!7Fxk=K;FZi`p!G&OW&SSF+? z@$SH~iXJw}=TfhqhFodYY?#+uc&Wh1?%WBdEQ3RJ0wN}hQ>P1f{a(mC&BnjQ(SXq< zWWwX$>C6TOmgOJcwW}q!70BPJZqIluFyn&U8>xpg4{sG!R?ofl?tAvJ+mQx)rVGSO zVzfB?ZSS_nW$SK*L@!}lUm+xuF591YV@=s^tN7&!rv0DgKZ^Ez+A(Kk%tpPirQ53` zQc_EfyTAR-JJ(#SYrCJ!`H~CEb0T5{cZFJSZZ`^Pd1$+kF=Ac5Rj-y6SA(lh;yRmk zSC7jHnlf;mQm&Y?F~;J!)qjQ$%YUT4GmHAM_+F3oy!2Z;CImh(`Rit0&amt33sWnP zOU4lw@0c(A;DCMTK!*9r7M@u{u6TQ+QWe8XuE}4j~+1^ zJ+tx)dGDUAZMKh=_8xWL zyROvSV&j|h9uro2Hizw%eEB=D)MnAPkAEU$1%ml?SFvllzCUx!HLUHT>c4`a(uC9Wf9 z;hOfF-QHnW*>ta!SFW6qbjw}8j5{-nPhk>M>O;}KdH3XZ-w4=U&1&^lN`~dY<;LLI z*L2>>i|lo|#Pd{@{h0QwEMJ?`B9lWS)>?P0lel&@c}m%i0^?tc#a{hqxX2&a=Vlfz z;;36T;oA=0nu4d(TS9hqz5H@J@PX%9H$lt)43DO0JnwV8w(*0Bb??>Kd$0QL=RB%) ze_4DzAlvhq;RUnTTGh2{xkMyiUS4*IgF(}$;LK6`rQ6=>*1Zj#wd#(=uY^a}R3_S8 ziPM!S+0v%G%g+NFb;otoZUlef(%Zgp|0S@KA{KZWJ!4UtO=5-rXw`c{=17~P-1 zE-X~#v~_8I_{3YD9E-OpZOq=X@YIeU56xw{sxzg&Rta)uq_XS^16{b)ISI+xFp3j;~4KA#?c=P3riNCU(=4uMJ zOv(t}r&v)?+#GU6fki^WCh5ZOv@I`}J!d<6{;G-p5uSwW+ZVqwo6U5kFz)kB!6jQ& zmd!ThTf*U9WVJBFS%Jl9YxBIb>Shxhg?yB=gBr4#1NSt*`T5`4+2j7S0PkrX%K4{xWjq<>JyT;hD#@ubVn+*j~zxIJA_X=kG+l zlUJ%s->yj3lXx}dCkMBZ(url)3+H96W=@J;^|JS=N>*i>RX}F`Hp6xAY^A*YMJ`+N zS{^Z%?w(&Cbnn#U)z@^Fu3Yn}ZBos9n=MD&OFb^tW|hv<3gay(n|k3ALzwZ~jI$T* zY}e1MyOTP3PVj#QG1i*yRd;8ev+-Yqrj<@!hSiw^JfGgkTCU&8R&c%|pzJ>Q+K z)a;%4{h`ITxyL7{1c@-KvZ~(hJjUQMc-}PVdmGAu9g0K~{6wmxPn5lI^#Zl(;{f`pD zk$uJu%(n_olr7tWXSlh&V`1Ef3^V?t)0f8{68HNiv6$zOpw-+mm73=ft}os`PJu=0S)gn54+c(k>gpQ(P}Z@?oJ&XCi6$}$N;A(B%jOzJ$$ zC@^_ms>K@3MxBa}ZKo2_&}6H*>UHuv#DtfQ>89N z@Ua=~%KEkJ>GtURVP(s07al2i-*dYA?CzpkFOyZ!5+KbEAtn4N3imD z9c7smQ_;tH%-ZYG+! zUA5gqSz(S(&@QLRuk%B`G@laLxg&9o-lElU%RUy|I^}C5b?`&m%#}S8+y1#qe>#}A zJlA{56rFdG+nuAry#!Z0TI$F$DgDPHrOEG{3XaG;e0s^p>3hQdeadRi9!fPUQiOX- zQl|<|;aauMscZAh4^0h1s|w$oVpA<-dCcw+GKop8`IR9Hb7!j20``W~6)p+$m0Ak9 z4Ch>a+4M-}k-@jW1uL0fnXX(R&MGO=5&I*)&_e5!_G<52r}d5(|FwJCe&YDlDyFW^ zmXf#29!`xn{blt1`r;Qmcdfeg>0IW`W0qR^Wv^aUdH<<+ z6WDc9R<||NWLwN~AsPo-OomuLYn`!B?J|jb7(#Dm$j&AOFye-$3J$lu7jns6u zNw$wQYWLi6S){e(S9b@i$D4-|`u2DIKb*dkwQbME_im~O^-MgcTU`En?AuH)=cc;T zQql_~GNzr4;rw_|x$qTZ-g1$UcRhAf{xe)&ek&;$e>ik#zszOz0Oq>cEi>Tv@qkX@=Exfa8=hBFmmYWY2`5Xz|?&UgF?y>KZ zJriX3dK0EHp0#ZG?AG$?R@HVVJw5FWTi;I6in~xC8kzU4S7GbJchBqv5-vT~@6xy% zyg78!&QhH2GldVS;Ly=s#8OX@|{RE;dId+E%&5jDz&vU-!JP0(odOXntig+%GrZqQIERhPp#?NSEY8mFWq+~x79H6ZARt< zhibRNxL3T(ua|0T@fOxPpAVR?ZD(#!yS!3eOj_&cdA^)xM~$10nztq^Efrn%oSpsJ zI`2+qhJ&Z=ejY2mvQMZmHRy;@>t3}>yo^1UU$$#zWL#>O6P%^@GWvF}FXNlSqimj& z+*aKPV|q7t*`#kT>z2>8D&5X*z4gqK-rQ@)_vW)~`t~P!>t&uLS6w45&nY|=>XrWN z)q1Jl$LI9CHK|;!TPou!VWV;ThmQx_iOiun(7@?gKDE%$0!)#e$!;T)N7+JKOa^*xc*lEYtQ) zj=CVo7~GfQGR5kC{<;UJ&Rtsa%2=VG_u|XkuDkP=M(I@FTBo>xpGUEGX?6~;wYA&E z!?FrO?OT<0dT~41@2*h&$~5g^X4UziYpde|d}gvvZ!;9#ck-{>yKOJsKg;_pUeTGA z?Roh9t7mQh8T`^Vm)&BUn&!DxibG`rL*U0`>9X(PWZita5jyhv9|`8kd$`SP;HKhI#3d zM-S~@99r@0SD#LSu9oQOrj>`}mij)*{C-?8J!VSQWk$htWf9p;2bUf&T^{Ss%WGtF zOt02E+_h(p=+m_ssw}$;uOz!}yrbCsrLdcIqm>GXNC z^%t>OBKvlxEz_D*_fMX?U%BqS{y~Yp>U+1gb|`LIa(oik&O3KD&vH9-soSKr)l zaf@}l=~K+=;{RnF-Miw@e+It^j8mpcshi8xA7aV>@;myc*tH3H`G@tdS?;-g%52%! z=~FA7B`O}hq{XP+VWA?a#ivoDut0W2K}671m6f5*$1{y32=*TGPQcGeGH<)$Sx}c$1 z$H_~E@i6nuBYihGm?l^susp!T7^AkjO#! zRuu%==csKg3Y_>xKq_FxGoPT^xf4>vJPc$vFnDILOgv+q&ZB7AIceQGZoVm<@plpv zWJ)IO5d8X(fmfZuT5#to4v!Xwy%SQzf{inetY8un32wioJw2g~IbcyjP^Fqd=Tiwa zg{dc;ju!Y7pWY`nRfV5tO2q-gUCUMhJ`$y;(Iux<@_4bR0JU-rE?yMIxAc;Kbw z9&_VcbPDrdM^Du~w(MnY!LM@DxU8LLRm`#)w(l|zkM)Ya6tYc=W!vfrp+Yi?QiOuf zikhcfayhgvbDE6OJ=>-7Gk4Ym)t#DlXUCu2ahf4}!(X4Oes|;T`W3ndD>Hf5sfZ-@ zPtxF5EZMLkL?O+{?nH%(&%(Hm7Rhs-TNbdG=rr&$JA_Q1l9ZGuxNTM&=VMcrQ z!;BRjSEY=OO!c#!sC|3kt+<6;76Ody$E3e`vPJT6eCS;tY;jflfG?j*M~Hy`g-+6Ji(G2k2IW&E%N1IQ^J*t?~||{|wp7 zvO}z1vgpchPl-6|m6xD#4;D-Q zdvW8s{g;Po-gPSuzwDcnFP8bNUbI5Q{8IGc)k1IhSd*>x-QAv-Upn=xk>n!3B-6*u zv+v#(xZ+&#+?=8C*J7>v{~4+S53HN!Qyb`S6x4T}?Xu^Vi|hM2P8kJ-`YN^WE)jd0 z^El}6zg;&^FTFBxJwx)UnTLPaT#TyNxv_esZQI1dbvvFWBb$}N2l3F6=s<~-kG{H{f*|{mm5pVZu>8`jGg!B+0`Xa zGG{QZ<81RWj(8Pqdt2*5toF)YMV_X4_K*51_m`a2UA3WhX>m~Zl(tRJdSYu)8b?5)9bniq@buhKJ@J+@S> z;To4tG3U_-w*E=Sf?r!*JQ=%e-kylGg_ePb{3CqD%)VFX7xup5ViDW?X+{IXs{J>G zH@!RgYi~GLsX@*$nl)9T?_aoa z@t;$T4<>O2>|-kw&0D%<+0#YO3c`~%hONBZw?^#VtIcYj+$(o_uHX|C|GPiz=C4`L zudQ%WH*|WVGO;w&SiCZM<&|w)Lw8K@`QZ0s!dKxr$1W{kD0Eo+D@-fYZ1>uPU#1Mq zLdPerw44@bE5*MxV^!uA*Gtc(@^~`sD}|J9MNRg!F);spn|pohIAd{mA;)y9JqVr&u25D0M%xy5qp^%^ZjK)-TWbbmgnyQO`*_i%x`Y*_v9k zsB!TtuKTNY=5FR%$ZpVQl=0DL=c3w0ow212Uou&j9rblF5wGXCEMaCgf6c1jovRBO z&U$inOFO=5zqssH759WadrsI?GM@Iy?z;2gqJ*BebKUKu%QiEMTQYvp*73qE8QSlx1D!2}hK!>q!G7#RdYbWSNY>bIq^F!=8i5Y~0pJEPj8 zWYIEd-jzcP94;=aO8I&@LlXcOroC|D{KH#hpRk9Zs^3 zZ?kBfSf+VMD%H@S*v}x!OK&j>j7VphDqO!|>~ z>Lz=4;`t|^%xx!}OsmrO`SNyR_`5&;s^7zd%#|2T<4d@_gp~?gK5K+rnRkB4vEU{1 z=WH%n_9CZT?cxf1t!)Y)KlUy4TWY>jV&Nu*lL8<56n&2_31!N0+B{M4>Nel2+jp0$ z2j#8m;rPTByZ6FlE&CF$^(-EzI{8e?7MB3BQxJsk3*Yx zt}NeUb*p3_Ecq8((j43Wu7YpR?L?(oL=pptiROUQ{d%<1M_~YFWFkX``yx|*=grP zr%ibkwr8bkDp#(%(6f`KS8~cM3X2{mE_>7$`$F5(%S+c&WnSK1?$x)R{n`?F=HXfY zFy}{?_DD-^S?92=cKQJn`TOuhTKRD;I2#n%$e%ttR9p8nE zOH7ha)oGkuTj=kZ&2#3Yd{AL;>De1gF8b`so1Nz9tS^(K%RYN$cX4E|>B=oz*H-cc zbahF8`RvAf_JP%Th!|+WBK=CUrs{&JJsG)rwA6dzb*J~%h}jXD>K3^Hcq+4 z&Yl%_<7n;c*q662F}_h$7u@_HFw^_uQ^xY8$S zkxTi3m44j~rK$xA+A~63z3j}qlU8cas(8rhrt)3*tMl{?79Znp z-)CUx{Gi>R*ucGI%^jhl_T3D3+PJ%ZIEQrZs-0O|aDd}kQ<25O$8!`sojYfE3e4@~ zc*gPWT#RDD%s8Vnk9!s}^c!DmifCkGdUhDJ?T5p5Z)kVuqK`fmqLZwTBp+ zMHsoe9@NcX*vNTYr0^7Xzo63O2cGX*m-$+13C#($aAsU#DEzQfeG?~Bwq}wl3sbxW(t$oTBMD zk5t$kG>oP*ir!<JvtUU6MDpf0)ZCY_-j+^=v z83)!BKDm1t&H~qcgq)Ypb7No?JbqZhLC9^UOCe4SyPHUj+9S2(6)eq$GxJ_E6pQj z2cN)mt;CjvYZU`J1!}7IB)$lYw=ybHSeAW|&oJr5*^}3ZZ_Y#A4gwVd|dvZ=*_X|Y5U#WZm+!a?N+GD z3I!&mm4U~mE@tLZ_F9)Mc5mr|tjH_6M|d91j9ML^c2~8eWt#2A^Iw*&v|GA4e%JPU z)_L_SL#9hUw0zKHlzSt(G;-B~Gbe@j8I&!0o6z%Y)ugw*bJ^L#S1v!hWUhtehi|TX zUR|?q+1$5JpIPqO43Rl8j8Z)lp1c#BS#aEP%hiQk4NR7Pu@jrKS6o`?O!eFHhWv_e$RC>)6Jb{UkWK)a(>IRpINmt_SLL-xyriS`F?&mgW2o)RJR~C z*J)S2US1-#_;1v#S3B=Z+v;zY5xaJ6+rvVqbN6RGdbU{iC39nKI9u?uKYsoGrF-A5 z_bR!&?q-(IR4tz^TM{NR%{JLG&nR!X`?A)K!Z+Fl7n6Q%KIir2r}>rn@meo>S>3w3 z^i%V7y^8(cebx!iz8`igFDoiWQ%BqPPE_xwW${&dYb#2*@&oSrWIBkSx$xrR`|4cT zdso-4xl+NpWwp+&r^2%T8AOT`UGBXJyp|Wa=8{RM)x>>mZGYI$&79n57qc@tHFNi* zXIEGj9%VW!{<@3xZtK~#M~`kS%viQy%h{l!-}|d%vrDV<15AH)7af@}saL;WY_<4I z|7R+5Rpo`(Z+bU3Drw!m0)Ykr58vt}v%A}`u$~X+yv!oxbd>4x%2-EWiG`rM3(MNgs^A9r4OWpASDy80!(@!J(XP=4)W6}~LRUUU7fZC$&aL*#Yg(E|2E2KMi| zmp-qkTy}G3$*y$&=_V1`z3ROj`+S$qwptqdz;eZ<7%!p6DiWQwf@}8eOO)HTFHPq7 zVUTES_t#c?zF3i(tr&Uzk^-!1r4B)gckPPekq$lUhs zK2N#Yyh}C98;>dny}kGB)r!>Jo_tSSB_^LEv~rY=IpgKOS6~mx{|vkdfU6k_nR`k#Cc}FG=F2c`n-p{{&D|V zR-$Wz(i@WoHd=RPc0=-SFsI(}fOW z&IA4&tSLP0nFq=aFdWEIXKrj!V{_)hW}R{^{x2Ty7Y;~ z#)XRQ6AFbq6(h7bjTk02Sbm(6dzB;CziQIrmJJ=9<)W%d z;Y#qHx*=suE z(sMhnn2G0B8U9;5VSua{V6WY-QCg8PNl!<>3HiK%-6Dj;pPb?A5r#wQV$kIBsHsaGFJxw zU8f}RaK%E+>D!bdzt!-hPj2|w>UktIGiwru1DEKV#(+r+9{)~cnx!=HAY*9fX5qRL zzbJP#KDLAf-{vU3mGOG|_@*F}u&U#2q4g<0w0kpK?VM!w z#phRFF<phPFX=g4Drvr@Tzx6!!mI3USKOwrN|ms6tzTOIab@2VvwQn} zlS3axe{Z<&^mJu|;hETg%bTkH=qXhDl&pQU+E#i4*IZVoZ|+KW%4;{>y0+Bny3beB zNtzqAY$o_A=p=rbT~su=D}Tqjpq`w{C5zoQzcY{K^$*y=Ie|<4D(~O)m*whr;;-j! zovKi}_X$Ht;?WzPYr0b9w$-*R+!k&6-f60y0n3MFheh9>F1ur1J-K+t^{=b^{d_VW z95bJ~@%H@X)@v>IuFm#f+4*aVZr+XU+owBS-2L@#Kz8&SZ?CED*8esh)DoNjBj?ck z$DfZHzSGmIz8CXdw6aiTlF!M)&*g_bjgNCUD88SwRC>+Fw-J4}1HD&Y{l0l-{p7OW zcZ+<#=}gT0S)6fu-JX}H{N1;I%~>_s{?ye#{vXO)YF;e){&w2dS*mWU_B=6Vy7=Dk zq+QSk>t||b{D0N$J}@mrUiSU7*x7p9xZc!B1qK@}kTGgK@k9Du-5Jl1v3ke1?RzE4 z+qXM^o9(`b>sV&-W}k?OSOdD&eYKU_1TBfrOUfj@_nF?;ThCiJYsswz!cJSO{HGi@ zU#Q*!vw7qWO;QGe$3<~1R^R{i!m_C14BJY$u~RdyjkmHBA~ zGZT)pD@U)eJewFgk&7>VmihNyKK=#n+J|=3-ARu3_jWg87kkAe$ds99@lZIZ)_vP9 zDebcxr>*Hf`K@-wi_n=qmsdE4cQI@_sF3tqQryl#R9$S&VJJsq%@VNzrRz=PXuUYg~&eVAL_v4QB+qXvso=UmG zJ<-BVdTqDmmW9R-LYYfzV^=Xzi+5Tw96-ZnF4#A5*m6d2!mU$5*c|wokh~>sF2ibEnD#fu?4C zi91&pT{B&}fNe|B`Xk}?Id;dUFZbOwWm@w=M%RwlSHEUSDYH$m7F8CR_w07skE5{eGw|#eYjiT|V2KJe!W=VfrwDa?tpVZ7nQ`ArU&N}jN>2(7f)JMrkZe;Z#}^O~%PvwAeacyZCx zS!oUIHdQ_U#JB0))<4s`?Ms#QQRBpjhq24AEQs+d`l|W(V|MR`C%5KtUi>yQHL38p z(UX8lt%eg_LryptGu9m4oN=V?G}GLu@Q~=j+`y?XEwr;P6;yL_H_tw`;nST`S%(Qy zPlMYpickIC!zk72pYhGXsY1kcJ~N-u5eD~?Jqk(_P0Q0bIh<1FNL@Z0qQi88*?6M% z^j$*V%O)P;b~oZ&f0cnjXEU>ik5SW%mSaKAk2M^Nr7AoV50;#ZXg%-<2z! zhg%HRxeDJF7WI7OveC)qVb`KWCjWg5QZ0%lhg$Vye;i;a+9~RJ$oY_*o5akd8BSW3 z3?C0kNlIz3czkorTGILUG)EDGn^aK6svl0DeI(Ue7z|cOFt}AN`c{5OPeDy}QI@}) zz?X><%=2OtclO-cvAk#ogRqYi)9QPo;)g`^Qm?L9l{8Z+5!8dc`lyKI*=8RWu}AHa zqHpFg94gRNu~0hU$+IMpah_3##muMDrh?KPA;u^C^?o|b)VS42Bnt^GXJuAp_;*o^ry$M zU*hLxO;=Pob4JNMDBG{SqT{IL=~5H7)rDS~ug==NdT#Ub#FQyPRrAwoH@m*wUFjjW zDDl4d>RnYGMvltI)K+KRx+bbIt-Y$(VWY2kXjbl{qIkJCdg`r(@rw$r&Nv#ano(fb zRdavOy28YJW?i>a56>*PbjpdEnw2w^wX+d;rVL zJQa!L{M>`HyLV*dL~q>Ml&)9)X7#+xqrqFw9}4oZQG9TQXM@MJ-KC2ztX=lJ;7XI> z>v!d%A%>cpn+ug#(~2%N&tFxTyWBIsN+qtUc!gPS*V?SvOOsCZJx$fh)vuh+`0q=? zoj*s`yBDmk-EwQ|(o+(@*Ckz@)E2Cpar~{FruIiB?{dD@4VObQW-@i=mH|I{sy zw`Esd+bL7$zVhDA!gD6Ap$2E}=G_gy68*UI&iYp&c_lMkr7{mY+nUX|?6E+x;*dns zsuigg?na^EyYyNVIo`I2guARrn(|yd()0A*7KH*O$CL^cCAFxV-=FtOI4)`a(4rbs zdq_~j@no{^D|<%PXE$&TwR%D*1io-rnjYbEP>_Rnu-w?pdt* zE%MO6h`6fRrGGbOgvCg=o~df=b$)PSXP$`U3)2Njs~ir8%ctuU-|f2gd$Cp5_ryC= z%r}GmSBaRuu|K`${LTR0_~Mh#ugc2pb&R_2!11Vb!{jbCp+0|A)iQr~=gr4gZ)U%d zp>6tO6@x?GLd&8Lf?cm%-#?sHR$#cPkWW8@>j86j!DsnzF%y|JgIRV6pE;l+(!%n4 zL)Ycyjir)nR@9m+9$8T&!`<>YKx*CE1(Pl~nRHH^wxpZ&tX!;r=4Hi0mE|+8&JK_| z5PjxB%qIbXIY~E6J2}_#nr2nad7cs0u=$aY!y4&o|Cn~mg`6^*CiI+l-8GNV;lh$h zJO18_6Wp+U!L6S2jAv~<80Y+x*m$aQVu0_HY}t%&rUG+41QJ@dJiRM9?c15cWDgf( zrXr5^mp#`nvWOhg**RDOi z%KXYlh*OtgYG}`|HJ7dh&pNT&M#)R6)@`?1^O@EC2^}i~IiIBlIxb*dGFh!r#sAH- z-nFfJv%X5Y%~0B!uBtrQjc>*LBGWBe+h*5#o%G#uE6esAM{mB_irruG?nhtvx3uc4 z=xeqvla7C(f`7LCGdG{N#5(Nm$rV2~^|>C+fFh?D|vMjJRhZ`RX=X4 z|H)YRpW)qZ+1dV|)@EN<-nKU2{C|d}vLBxaxjClayeXId^J=Z%=XLKgI_v$nZr?U9 zPxtGc`F|_kxaNB5@|n!8o_*!{jE1mep%kVA>{D7i6%C$G@KnhRP-$0b`?ASanBdm)o-QEKPV!SX_4Z!NYX*|z=BUJos?e0@8p^NE+sWjI4-3; zcYEpW?NxQ_t`_<|bkNK8w>JE|@mAdVM>#25OFHL>imGfp7#VnF71tJ-SEdr;u|`o3 zr!9DPUSo4^ZSCHh45zR6p6k$Dw$bb0{<0nMpI`MBJUzP0H@Dt#UQOCQ_TW{ot_1I4 zNGohwBGgjm|6AE}^Vhhud+TDQ_AlRU|8Le#-WB!CQ_rrLUuNFVxSqE_VabogjtPcK zO?86T7tTGpMry(3Gc5@_PVeJl30JffQcRww=DfqnH@G}A<4uE8ae<0=i@`kCW5Fj* zskjJDS}#!0Cb06RLXD4MFK3@n$m5$VK3c*G%a5@!>=aPfPyE}X#5qqxV4fyJd(w{P z4&^R~^GXZ!B854Y&GhigvKA7ayYxh;fVDpd8$;--r^*a&44MW!*F#QD`Ww`$?$a`9 z!KvC7$3jnQqooHs|JNQ(Qsh&4b;VzpLAd3&K-uBf z7i;&r2dv^KRf>2}7|0``-t}DSn#lssz>7Tv3TMtqacLe&KPL0+(VVoRH*GroZ&XEN zt_WV;7pS;xpR;aJ{3@%d25WWwZakf@(f{PW!jBEdza6t$s%ak7e4y^`$F_LkID5xc z@;uc&?w5)urZG;*$z5T3Ad7Q3-yJ1((~<^}c1~^kIQWKC_2^Fz20nA^|ZSugtm$ndkFW*mKQrUZwXPb+*B+jvf^mc-Rtru zTJP4^UB9eXE3LG4ZJpb!t<&Xw_rJLG`~I1lV*AdRrH1G43r}r%^5m{o#`agmn-gy} z1(mMKe>0nF_g<%_R)aGkJ667VddN_|Flx(d+oLHB3no~)D9o3!eyAm?utG>MCHI)6 zvhKZjrT51Rw_dbT+~MEH@R;vMIE$w85gFSFmYd!=T6lM`25U^5`%Lq9(}Hic0-P$X zDl>`%9vQeNcKo~He}R+d)11C$Z3w_pCyQp8@v~=R>W3A@Tp8M{5bZy>$hHT%wHJruGPT30zmtk z?&xmwf~^TGRlN)XT&um#zvz2foaW@ z#(NFi*|CCKbx%fxvg{L^Yg%{Kk z$7!3zW=GmBk4%``8j{*Eq3cD&SJn@V(#^*-RTzqOtS)f&18btm7J-k$w*WkhDFb=lurDOFqdAC!&jRm}HX!^ptoWb#-0^0nJ?x82S)?bx9G z%KFUrnfn%>zg4|+aZR-9EFR;&cBykGu7?#_hVM(+P@&T?=h@b!Uu7hHn4a%A!PcX#?wjvDT~>3ywm59x(t~}4Uv9Ol8Beoy{w!Z&ZSI=)J+~s&{i4|yu7%el zekIzs?6a9M$w8%KlICrxUy}3Ul;=EE(3!wv!IAaI^HA)*6S0>#4V#we#S0nCS)iaU zdAebKi-MP&&t5_ z?0T{|?n+yWz>)@$$(m^v5-JMrz6A%>J6kSsF)a7b+34wbsne5bt3b1z#zw`4CXdjN zIp;t8I5?4a$_E7xzWd#08B9!GURsy){AtL+3o)Emyq(iFNvL@~W?=RXTzEF-9LtYt z<wiy7*z`JY)}D#G{~C9Gx@&NG=I@7@ z=RY1-7MeCET;x=xAi40+wDsw@4kJf`(AR@xvg4nU8D4~ zkM~V_QGB`~#BqA!ISUP?nYmtmU+&%4brZVIw9JbwcH-U*uj3Zp{?c6fYnSizr6tT) z_ijy%{?D*e{P1-4mc@Q|xl)g2OPpFB$9si|g}ujARMasxO+4&i>Loc`59=f-* z)jc`TH!HqlN9Kx)e++Y%MztQ6jJvluJE(wlYUU2x7Ma7#HZC&mnVNK2bBa2DC%@C) z1q>FC54fbcGgUwFjF=c8Y_ybf63>%1sV6Q*S*_ZKoK#%$7FfFPIm_8H>8#IzCa0Na z-DlRnM=!e8xEQ=3xuC^{Jbzy5q~<+k44E3{u`@T{_RV>H^i zQ)zAUm-aOGOpjOp8BD)zpH*UR|Ff{Vw)&Y@ z#uMAa-(8nKYY*I=`tsY`Z(B{`-YvZ{dzoME?)o_&ii&q;^dAp?b4+Y?;o_i@u2k#w zbxARK(?!eXwmrO3wkzw@b(3?`V*~7b>^#I|x|m00`FH2e%1qXut@-754>9tb^;1mr zHZ%~>Gj3j@B$8gK>Yvi1t}$ta#vD}!Mz5*IZ!$az(3m2x@z}C)(jlo*nczJK7-Bvj zQTTSqhr#0EV{SQ(c_$p0X8BxUJeJMCtT5TL%(*;77k z-ufy~H9Jc(*h2vK2}yK&{_O{GT@eV7_n_oY=iyVslKnODEE4&5><>w1+@ zuclCphg8%>TfZ{ij+3jFY)<*I_`;RE?P~2y^Zh2Sua3Mockgb-D=}ZQo%p+~=AVAo zV$Hj7_kP~hyY95j`s#9+f#a6R55wj^7uiprpI$DeXX|`Lm;augqwbQI|7P~jUuN{R z?)Kvqo4O;I-IzIUCmj_kvYfLoG@BzOdD{dA1`Zj;tq}}9e6=@^C32`rnER*hHNO+8 zxaH;6^VNNfYgv!qmAcrp)k6M!+;Y=(YpcTUp7gu(Z`n`d6`fx5KbY)1W^qbMWM1vX z9k1qIcwhbP>8{<}tKJujTV+LEvR0L95-4K{5y;|wdBNkk=YmOH4Jis5!cET>Xn35y zJ;kC|S3xeQ`$l);+cuX(p-|Q$zk4?nyEwUZE(P?Qm?(GoA&0L4=S%~JXIplzV4BLc z<5^|xgm1E@76KkzY7?$%8mu&aYY@SxBG8tWt&t>mh24AE{QZI~4GXNN9(6pTbUv_c zi_)UZBQqB{&$+&J39FyN%ta~bLXixmJLY&wh^QqB$ykcbTe$R6A)`xIOGI*KaktY0 zk6BMmrYPLuIl&b!bK(r208eE&Uy=vQV$TWcO$Xk@{JSti{yxX59Xbb9*I70me%0c0 zK#MJ@K89I{FX7F^VmnU#kIpAmk2y3e%<$e7AkpwjMKe-O!a2ol(gVJ!;gUi#RTz{~ zygnDa{n*s&uBj#F;H~pONMM=p(J(=$rw5muS^4c01Eag(>M2?i0u~($ySp>b+BL9r z+ns%P4=bKIepc|y-BZu%d6HYMtUnqsxxZY3)ns)?_67!DM+@)v9E+1jujw^qt?CWm z^GeqKy4mcdVK?)7bLW0!>?!)^>pUU5-#EtqSH59(vA1}DYgu-GPl37e;R!-7eWsk> zyv-~+KH%ML+rX$x58q|le9^W&x8&oDVn6xPkDKlrdv_nLO{<=h^09g0%`Um4NB(kh zoDFVcSSQ%JId9tSdw1Onw=u3=;jMKkz}Kpm<5ix|Kl?*3&dgFvtUW0`Ei~fFiOYWd zjo|Mn&qqx}C1n@icQyO-b2)6MqTyHu?s`P}zwqB-ldZeGosen~Im zO7Il9V7VE^rK|n7Je1k}EZrxBb@$b&w!R7f8CLjjZWHvEdK`4)Q>C=t;gagW(i4lu7SEYY)(aXOG%WlziiEBzdK`GB z(wZn>IPrwe2f@Su4wbDQ&(8QfzRl&(GOOn^^N|Aarl*QEZW8Ah*afM9srN9_bUyIQ+N|39tou3OAe*n`k<{=I=^gPsJ$@T0NI^ zt(arL)1e`_$BmmsvA{mlKTmyQ;;t!LuO0~Z*9drJhHyE)^0rS8_-1^<~?BFa2}SC%EFlvm(*|3_MY9v$Rcii+1d3 z*STfH6R_*|qPe>o_vf7X&+xrFZK;<6gWgK>zsiR;*I%mtH~skO+7_pdbx)KncAO5_ zT2{7fInVXWHwxaa`nmOn+=Hnin^wzk$NFqPbG*4Rduy^ZchiNln#Lk#UyMN*BL!Gt~1$lUuFHvwg1`sJXfxJ`-)rjSMp=Bf))1N zyUZi7CA8w|f`4M}W--~Ts*_et+a+6iW~0W+lb3cbos}xIwwLSrl-FXO4W_eF?8G+6 z>~vcpzU|5lSNFZg=5p~B2`bP3IY;KkOZFw-)Yx8b-CLQdx<0dLos2Tq=fs<9Pv8A3 zT)g{=;H|hfEfZyK%zkw5&EYNLW>@PjFHdyYp1SY8-;7-2z(cczIueWTMs2)uZ40k| z_0Gyjv40&eych5I&v4h)-`V%JKyDeo;=ShJpI-x`e>Jb3I{(FghG*8(7VfCI{PNwL zJ%u-|au1gzUJP6MHbsYd>RZOFy<556<~RG_>sxw`!>a!AtW!>_~x${pF=(Jh}(zi#SN44A+) zMTmKu_-s9`#0KVY#;2~rhDwYk788~qw4HEQL!?-}|4QN=$5UywH=G?*KZ`B!?BP1Y zBjBjdYPby9}n#s&;{N(NSv;|vRuI9M!iG_=;=YtUl+DAug5E_htdKrM8Ymk(!Cp!GFI2akY9 z)hCSE0vd`v7|sWI6uO=eZChY`!b7-4Y}%ynI~tX`49gD*tNxDXF;LX-P`A6mF!7v9 zQ&_r9q6yoM7OQ8SHpUa4bzJ$m`SdZazR&ks)7g||JXF;lALYNuTh+F`Q}~c~pFo_N zvll}zM{BW`TL)i}Z=eJB?Z@0}t6NUjpT2wc+V7>Yo0hD&e>uu)mv5C~%KWVPE7$)> zUb;fhsij8wri|99Ps{G^h>pJ;7<6m3R?B(MaFc&(pN^Mi-wa%{*5g&G+tx34)VO1q zXF1j0cvuay(=vi*ANS{9luj-#(OhM8 z?pCnBr;yd^mA9;#Q?#c|Z=A8r(?4&<5#}vk4(UQ6T5mXYB_G}wJd@<(rPUc8B%Zp< zS7*|#_O`7aPOB$8=AG;%Y>?ugX3*VoK+CNl${~1?ryAFz<$JO^MSE3OxhZAr@h@_o z7UlI=^EAsLK|XzVKbJ>>#qoPKT%GLnAVg({)**%!F(Lvpp0}{F3c0WtGl^+*?U?$$ zWzw$%fgMv%Fz`1tFkIo&5?LS++rXltP_lp{sNjI?-Y4}rssZi%YYvt4Qj+=i@{ygfPygEAZWq9z;yEC@DTQ7Uo?dBu5*(^t^ z+y40eixn@jkN$eOuRi#Bc-+-tS&_aL0wa-?H1I?ggrF^&WgX(d5;J)mcGB#fBHxOQo({($ioZFA+77%Xbq; zWA%d-tE#R)J%1*4_quJL8n^A%pO_lYUHCE?Sw%=KEaqHcm<=a-RTNqI} z+ChWmO1U;e++{hNwLxpErfOfaS{+)w zDx~Oqbk}Z)*ZXa)W=`L|biZ%(6~3j3Kd(3VNUf8fa$NoHmH!MM+!gd9Ie#adkn^nQ zN|uyUIKW$c)ga}h&yJmnLJiC_KTct1ys@K?{g^;|(T*n~PlT8R^-g{~ZlM_D>g2<- z`+>*9rUbu$1Fj1utXRVO(ZJbBC`92GUsYmq&!GgPhJX_Zd<&}mB?Q`;56lbfQS00u zrtnCuXpR`Wz|IGLs{RR0iYyEVkMUF)|N=PkDwtY&e_H&fuuvl@;L6!RiqZdd$7$ zpfeBKG=V-H#bv%~+$&$Iws`pKI{$E6%rLF}g^&^7w|NJTad$TySs1`oFirCiV=u4P zq>!LM)&6Z#`#CgL8hLddzVNd{W663MyA{1h1iJ0)cL{I?Og!Xb&UYs9z{hXp&MHfG z)>u4cbLG?fd!eDy=|hWv1t3!ldc+&;^gCVgKSNukr~S{ol)?}5Ip-VzU5>o&fn<5*7MoK~ zn;zYftV^|j#`bM*-+zX;$JWWbEYCjWHBV#tM5)6ccFpCU?WZW*v(R~hTI;0bSyJ-}{r^k5CU(jx8NQxZu<4MI@q6Pr$>HoV4ea|YuO4`L*;u@|=z^}##4wBW z;2X1fuPwYJb*k&x)__NaefKMvXD{(hVSnG}|5$-F?w?s6>gcIIUF~zk zqKPZc70p`C%%`X8;K*g!=urE)G zgvA1z<}j%&Kegik^Bk$?yM1mg_f!?pbZywPt8j+qL=RQ36_FnDt2yM-8Cr{uYb0^1 zFgOXRt(ft##blA_oHnM)L(?XhdVH|=(G+rIS+e^?3zN!91;*q1nA7+fO(YM_aBAQG zx*=p6&e*XrMWaK-oMSk*6YG5v%LMXMa(7+ z0|!3a&n*Rf2RKE~)+p*s@ZbL3fl;#cDU)MRx&>Qyqr$QYtKYV?tPV)7vg?uPQIp(pusP8?>D4>32j=vnwc-DIh%-3x|WIo7Sd>&v(7S>mBv zk-X*Ebpek4gg4@E@0h)=UiK%~y-p$U-f?#KZYGhi6{_O=lE=Lleh&Jb`l8F4?`G)z zt9oDA@0v`=T`?(h$CmoPM`P`$|7YO;x&On%%XhrrT;9&~^mFmE&1-t=CcmtDS#A}* z?xDXF^P}wxKGc6_%UvD(mtWQX+`n+fPupfYTw?NACHa`~@)gg}aB&Nvn5D&S9t>S> z3{Q_S@szJv5hdynclf5=3AKfBiDy<69Zg9{YJDQGVy2(ai?XH9f>Ic{HZX8EFbXIf zOJk%|G@YpCR+S z>N(^83_6Y-Gu|)>PhF>5;bf?y__#$>N{Cm&~t%tNu%kR z!zGWR-`I#-l<_LG#@}$dQpLd((|y!U!cnWhPxjJd##1jRaXxNi682>GP%WOGV#u=4 z#;9RZkVoN3+tmgWx|V6NEbF#=Q6#YBnVMq5thX#OZYl}sw-i<=`&{T`Y8O4I$Y9d2 zbIP0s<{hins5PH*Idy`^(@rLFfy{&F)?tdPIt*9xJx};BLKShT6iL2Oj=d zxOC~JU)G8p3MNhKjMx=#-k!sklA(S^xTo{u<$qr|CzbR#D1UeQbi#l0wu%Fc#j`k^ zI_Jq)vzRo*ersn6b3fvwC)HVS_~F5jbu;}q3d{K1Qvx*``BxN3YI<>OMrX;ND)?mnb(6MgHN#lPfTo0Z&s=Ao*-i9=WBBck5T*Y$R zCV6Cg9^N1)XTakXae=Rbd7|w+0kPEOXB%cYcP-F9ZE&fPztD4uN#pYAvtM>>5PF#< z>B;f-QL=vCp4Hp#-F_$b=c>-VQx7}3q$+Pc{uXmD;t02i#iH$^Z$Gp?oi*vAW?A#` zT{0<`eKTG)u3D8*{{Cuo-Sl7WEB`ZizN}kv^*_Vp@`zz-5+AB=2>~9k({Z~_bIsMG9z0dTg{*JEU|9^RxP^rvvjkXof?6;?+ z%;e-L?ulBKzAIbiNy!=QO(immC)XTP-Cou!G2z{X8Sde$&vW`zt!`mTa`=uk?n7GD|pt* z-e-4$g@svFVx6QT%e;H{IXE~S57s0+xWq80>i|=>cXt%a2gcM}8#Wzj@-v=ZB6(t- z#SE6p2?<4QOaU>QCP}VjCRA$>1%)!nxw#3x=ZaXTCczo^YBY&dA~E$Lds` z&M}4iVT+Ezteyl0$;KHHItpu5FUwbP`aEe+G~o7X+T%3y@l763YEWe)<@|Iu4pq^yX5F=HvudX-JU(xBBVZx>B^ZrUpIXJ_fB z{|sGPzr(H<@5w8>9lPQA&bp=#S1+-fy8mZ*8O&F%Q#E_ZHt%XLlNHgyU)HXT&WPxYSyafHVV&bm$*1j zmpm;oq1mALjQ|e^r=~{s6k%1CMa$)GlpJJOFrD-F!gVv5&)#BDT$Px2CP;;O%hr9N1-w~1dhT5Cu5bNy>v~PVL%&6zn!GeFacUGBTx|B3 zE-iB+reMuS6J`Z@H3l?zEoj@Z<$+km^S0t985K_9iN=#2a|ce^xt4DNQ%X;J zvX|Vk19j8(G@dv&x{IkOA<5UxbUdY zl5>X4>J<%)S`G#b97lTm6lx3nCS7ote$=3uMIre<=hIsgTFO)%Cp?-U(=b6Oh^_eB zp;rdYMh1LSSgit17J9fWDE2qVQ**1GaI=Xc^v#=x96W7zujX!zc9^AgWqHWoxgoQI zX4wA9wi0n_bZ}fAKKoIt@XvKq{>tuOyCb@AeQsUv^8Z(M?{1u*l5};RL#ia7RJ*!ZuZ5i6A>O1y817hco-;uJ?g4=hA@7`75o^Mun<65F{ zr=I4Kb5bjw`Y7JBy&&Dw`RnfohKUYujpbc5&o@0}ygX4{b3T25L zOZwa;Q+Q-G+6DF*z4_R@(uwcr-yQ~L8#UV#8v-mWeV$KhVicGdGBbtiLni}ocHE;& zoS#~+blVg(aCqp2aB`gUoDiR5w`^{n$&`(Ly%h^3e3xhU)7Bi|XjWJ? z??*C&hB)I*K6i$7>>d+re>feFH1BPDwEa^XUa$~GrBolKbAdrD6 z!#aJ6I)h-Pljf{)pW}`%_?lPm3R}MCT}j8L1isDNtAt}7-RjT$6p%i1{zuvG3MaC3 zALRav{Iay#!Mu8x-qP~2W0$utd3ffOD&HnM@zAG#OlIBRF4uHhH##@xn34edMJe6> z6q7r*zIIKWXqd(9;W#Dy_LVuamWnkBEOcsUlud6l*zr{K?e)T~x8LTKOTU^ockNp9 z@E!GR!L09;-+O!8UH+MzxbyDTP9K3)3w|%|`t$TvzJKoam%G#drT!Eb|K%BWVOdzO zdHwTHsr_G)JSK&NTFU+26Z8J}-g{pw=52i&9PV|pS3@&MNNBN#>BO~+EVDY58a=ft zj;!VI2zdO>E!$K>v1rkh>^6x+o1_q>yM+>WgM%E60u}8HN>*^B-7k34t|Y|i#3Xq8 z5T}b)hzH-r8$N1EW-Hhdw3;6&{)+2dveMH+&49y=iRo3yJ2@r>-{2rd+Z|349*PGm zo^zN6)&$8&`%IAYHR$rn(lTK1KFcs&PAjDG5F@kaRppk8t9hL@|J^hAwo2&AoQCsT z6{b4&3)ttq$Y9%&w4?0sl1UQDxBu>);5!he`Fqn8;l>NIykbj;g`;KoeZUUE{Oj~5x-SS&)MsxQvmlqaXXxf^-M~yvD-RALy?N?j(PCZ-H z5VSaKnMG!H&*O7BpPHN|i12;vI=0T}$IRJNK6JfJP;z+q`(S(4QDYasKMyR_e-c?j zRph=lUEv6;c*Sxm{Wb%a$E>Dp8NvJ#g|n9DG@Oc2-tZ;wZbF&vtlqC{SEiP|UK{o7 zTr|7VzTmw#(+c=gh=Yqx4wd!?;D{VFCf%lzS2 z`yhwnKRr)B@6x<##lAR>DKh5rHT9QwcQ7y<>Dn@%)9S^cxhjPVCSUD1$)d2jiP=VZ z{eOo44Eis<`u^;mdu*bH!&eR4MX_6E6?MpeDm%2$JNDgHn=e2F|mo3=OU8Sj9c#9v_pFVLp9= zFTlg8^Lu~!&5jQr=KM~Wxllz(;}C&3@&xKI7Dyzpmy@(F-PVPkxtm{g2_xx$B+fn3@a% zn+jzoK5flpNm|G?)pc6oUAwri+}mUOZiH6dji|qPEpqY#X3yszFa2k*mS@>D&3WOi zoPZ$N%{?bBzcu%*H8ivn+#b-iQM^wjYOzQ{{5z4inL<2=o^$;!Naaf9uk4GyYU}rE zeJOXj&S|gmROXN~Q=Caf(l_@EKMSL(ExH|Lto_uYA0x1T-rzIc~r(92)V zg}TcwE}Lrjt=23vC;!*s(lwEOhavO|00(+nE{FO@LcKE*{Dk0&_n zn5vrIV=`fvzt*CU4mOG6pQcToc;3Qcszu@%gR3q3zNi_t`ycILWmx3h^!&CE(-kMq z0#2m{>yUE=-JZ&)90OWD9GQCf-4O@I=vhK(OsR|_k3!}!Jmy?$aYAe6l*1YY8gB%= zB0dN(cv+k%m~mF2cB=8W=S;B&SmosJa!inVY{DS$j(v-|iB-eS5bHA*oz6bzc}|{H z&W&F7{dU;}bFsW7`Wi;BZU@gk@3!#I@}KS6lR4Cve&5dPx9EsPd)P9L{|x3S(p&WV zg(sRuL{FL4tL(A1xYPEDg7AkX%c$Z?Bh?gk&Tq$_|GRRpL;b?CYTKynF9&4}_f0(Q z*t)R(>T*^2?oC`vyg$jT|0VoQs>mV3hd<}!+d0Zh-yQKeGr!91@xSZ$of;iibu_KD z{}Zs@D!T4=*|vQ1ZH=>YqmEtO`q#~(>X!Ewv5mKGtL>lt?(j><8ci|1<;Hzy14GT_%z2V) z+8H@yglA}moVWSm>>044v(rO0CZr|QD8`3Lpk!97l+9wBMBb=U1`nB4lUPsy{|df%?p+0g3{wRwJhi3yYA64~IOFcE8? z%NxTy@Aqgh{J*w+(u!80z@AS_M1*v3` zTgIR3vV8udKGvnGGp0P=TytjUm#V}sjdRZ))rw5Gy6R0mOW_tL=Lu7^xH@ho2ARG( zQ?oo$^rP-Ak;2$v54;vir6!%Sbk6jmPn@cZy_2n-~eU>eM^5b#Km$vUNXRItv+_fio+q!Q%!dKlt^)0rx zw2oy)`v#TV2z#Y<@ZIK3l?m&fUuC|$ZDFoQ z)fQdTp!CnBzv4wB7Dis)u{7VU_oir`)U~en8NAu6r%OWe8pN{a23dta~O~ z=T%I|o8lWaHS4B<-Hzo!585Zb3S06lB>PU>&7ePP?Yiy6KiL^)=C4iGxLnJ;cHPof zp;y*bXY9EuWV2_hR%ms-u-zG6501I74bLx+o0#f$;?j%)wM6%a9@(!A1=9}*PHFs5 zVt4P+WWC(1?ZGdjHqEuly5N1(z;8L*l(g-0qe_d+CqL+%Uv)U`+vWNNA3ZLAiMSN; z=oQm!H~Z6z3tnB_Yi|Ac$@3DOfPib$H}tx+o8JgEGF<*Dci!<8+pgb?vtYR7E8W&t z70OY-m*wyCpTTJUak1#Vhvs^1eKjkqW!uEUpl6m}v-Wz=dVeDGOY{qK=PyM@%V#M? zJC@xKeJi55ZLU@otJFf7XI*`|3741q>g)QSUHR-O5s_W#p?G8pO!f)R(m3yVH=1{vTZ?cx@ zo{fjr>uTqIpDq^8krjQVl&iHhdS%O&g&*1+R|#r(?y6G_Z{lE0Ka{xm`0s?4%)F(F zjE_0f3Zyy|=KWcCp<~M{V+COYKMx@e;oq(zW#=_i7gqW-F}2M6aiFk(S?}JJLkz+J zLc*r(`&2Siez$Cl$Yu_gR5{*;aG*%PUmRUH3eEbZy(Uxt^{U zuYB#hSoF-YL1@OrrjC1e;xm?1Zu<6HsrS)gJ(iI>HsvZddOPyaW2 z>94Z+zpCOE+dm4)UbXu_gXV;(sU1(BcAhln-Zi`RY4)ij?00rwVe~SPS5@%;Vh>Rz0a$ z-=n9nI9RB5=K+?p;+{TfhMg;-ukBc&5s;`KIj7~%<@Zf1LpL5WnlmM7cD|P2)DYe? zlYEw{bi5K?a9ZifhjUibPrsjD9rmAL=fBD2qS*mr@7FnNNhR!DzwCVbY>RDQx1ZYA zdt=+~tly=*{*hl#Yl+8PE7}sc>dW)$8EOYV90^%H>CBZM;h{eYmwKElIsCEIR5sf8 z%FW9qt9beHjwT)7c#zxM@$n&T*R|DA{XAFHmFhY+MZTS4)xFZ}SM<@lUoUg7j9%+K z*{aI>tX+*M~*Mb^S71SpUoLS6Abeqg%sG=e#}cT~hhi zLqB5f&ZA!QN@iKt&3$z(@p#!b&wVy2Prj5)oBzy|cL~#UJLfm+gcuI;F1xs5vQmo0 zvD2wH%DV3@xMB8c|H_rlg=;>>p4+uzU9v{#ijtR?m(QIte|3m%bNWql+0$9ucchkc z@4Av1n*Zumuh*=(O9Y($PTQq3o$u4^Hr^H1GhcKo|9kwnW_xMWT=Nx~bEeGt+stAj zkml9vGI2`ouT{-+Kh@oRv+7jN@;$$%N_}VB`8MjyUaLgW_g9|n_pZN`>i)-Y@|Kcw z7fU~Lm)*$I46VDHomn2$yZ@E8saD`hhv2epdqS+DLqA=W`g^Q*@vp+$scE6BQ+?0u zTj;j*sQa(dt%mD$injdB)2>`3Q*FF^SK%@dzx7*AzqVVP_a^yB;H1e@)aFf@lC?kQ z$>t!@V0IqEV~kfgC792IHHm-%G1lt=)I+)$9B#v3r6xtdWk|9M&4ew?gvNe}=98XN9)bnX4?Ayn3yT;PRO-YUb&- z*F9PASSqZ4<>>~MOolA(DG&Q5J-n~+Ha760tn!u{vv=oi_g;D5s59!j@RCoK{?ERB z+GVNtpCR2n|K4|N>9n0&bo2FeUu-=dn5`M2%Xm)1UvFN9PJHX{fW0hN(sxZgf8yJm z)7w&WckGplyYbHSfspzGi$j-B`AvQMV25Ut#7<3N$G%Dlt0=*<^H@2$yV@q$2`%Pn zlQ`Vq;CYzSky71`*DuP0th$ zsyLi;<+%OH~Yv3jyZz|%Y{8}&Y!L(t#}ws;CG@3-u6cMz>Ok$gzRVRY)A#pF?5;Vt<7@dVvjgpQ^X@jf zxxTADzjf(GkE~86C+|&GVpkO=p5NblYf(l?_@X1H+SxM?EO^M!#B!=fe3RG`4lX999}Z`Y{BKJnE+{OU8tZD&vi=t1oTW^Qw7WXXorcEO?evi>Z~lMb%N2`Jh3A80*XZM(?=0SLHrRnJyMOYpTDo zQdGUc+Sy)vw`!TsF4bOix#G6endPt9&O4vA_x6r&bq_Aqdhwqj_`HZx!{VpY-#2WQ zIW?2N#HzpA%I)^GySgcN&8GQp4!QR10x!$MGv7ki2j2KoxH&zhj&D%BN~Lal-pXyMyo2bGN^?n?2FGRn3IO=~;h4+4_>4++FjNcWS#A zEmoRbk>SVnq>sU;=i_7h6ej7OIeX{tlb(?G##YUxXG&x4>WgJ{O)YOwrhKc z9_OuRJx@FPjTtUZkf~4Y=@KuWb|Uv()k> zfAZ|t4*Oo7ZepDKw(Fa6US>bP-}?UyVP>_tSL}Mdwyfp1Wy|8Xw3uuEG^dDnbJ4C_ z<=VNIF5aJ?^D(fU*Wu)w`a9L!vtDevc4u~NanSR?FRNvJ*;2NAe=K^p#Cy-hw0@@Z zm#<6v#*OhR(4&k8xxoLgE*=F!%{BAp4ly*e+30ZhxmR~tP8ESO0qp3FiXa5 z=>sDt#rApLM@3F=W=Le}oiOD9d*|DxM|;kF>r^T@n3SsA+@Px=Xu!b9_rgd|BJ&LY zI~C^u^RP)C2V^Xg+Eg?=RFAYyR5{I~V$S*+-QtOASzQJ#0TV8ovND~R ztl`Jm>8sKtnzDE9NAVd4O4eKpp220iVYc$BNKMbCMQjUNeg^XVPM7A+-TJjTD}1Vr z#1Vz5k#bUtjAk{hei`&yrh582?XpF;e+hQyx`eH`w)a*<%{j(%ng$&Q=grufyJg$& zvbKf~mtIcvTB+W0n9s2IWww}E+l8J>9Lqkat%%>P?Em4CV96ct*6tgez9mwIGfY?Z zC{I53)qd)Ut;YlIem2{6{k`q-;>Ke!m7%#)zVEoYW~J^7(S41t<>uZp_nK z>}l_cU!j5Cw;o*RQRlc@`kx{8%eMF1qNCq#@7uNN*3);XR-q}E-Bz1w&tRYV(d*Tz z^Q(6T6{v+xJ6oVKv#M(Lzbly$-!-`cdYn$Guf8j-esW5>Mqt97+8MIma?|B}-c&|9|zAM|F`}NeydF_#R zbpCyHcauMJ%G}iadPr{ArhE5J-CKIsd{#eSs@K)bp!#L!^_K=7j$Zk5^U%$&7*_wDcd=I^{?w)fPn)~%OUTVB=CR!D!jI(TBX z<<3{eQ}>*0e{*%+x4EY?cHA{Pd+gMuE4Mc-JsK@_cG;op=5dP`rk{D|8@T-}=3HMi{pn%5!tS|;j_2Oav}{_M z^msq<=&-+gS`D>TS{483c_x9MMxyyD7Yql=+{P^!oOkQ}5 z$cDek@mufziZ}eux8SerQ{PQnx4z{v7kQg~g@28!R8wEug}5LaL1KY z#r)E(_k(KhnqA&p8(r0V?UO{Lzo*R6Uo)4O>B{T29+1t||$CsY|x+MDK^s`T+ zXNP1S(%vP|6S#BNtG!zy;sSRpEjo2P->mJ$17)LR57xZIm!w=2V+LA^OFYNhh|q{7%<6wRE@9 z?X17%izBWs-yIo$dD*R$$$oRK&a81ie0tTKeP0qTbFML2sZ-MN_!#%CM-LaSDm6X8 zu%wsa^iAQdrMGrUZ#<`Kvc>#!#l@#L=F7dA)pJc(uXXd9?YDD}C+%OoJ#gomQ{`b- z=LT;3${Zmlm}Pl*V%%N%K1=bSyBF?Q^_Sk!UAc41dcD=xmiw)l!|E=s$UZx0deL!@ zgv%HI#(WB>3e%e=f42U8pKr{>u!XJ)>sDNT`R#Fj*V^OS<*K6pOgT3nsk*u}eY&<` zX-NAUmz)dBfF)Tx!CHif8>^=;+k&VtUC!mzMhf7q;wNG?iEVjJxD9TPKH!!MBCN^{Wr3IZpV{ zk-gNGWloCnT<7)EWc?(sO+9-&Sox?=Ud zyim`l=B1r;E=L}|DfLq{C?LVE%7$bMk( zQt4FC_&&qIN2v3-{jC578|yPdGZ^MhIN%u5qB-Y$4a4Ob@wa~`UYdAW$JB{2T;xn2 z6N8i^gDKa)7oL4Whj$4^3Wcd2>B-H9OBIqXc`B}2F)8?6gTOZtRUw(^Q=G=r=C6}-s%#WtZl31C7bje3 zFv;bKil4EK?Sz=iD<(B}v9ueur6f*#dy0{t7ZIj&W4Vj zNxj9Mx9%L<$<($dja@Ivc>kIhbE(|Wz`oj7CC7FKrFzP4a^zP$puk}`f0xoj$*@(s zq;x*Lzj^5Dajrd?E3>!!Y~LFhyggOA+MU~M>z30qLsq1}>JF8=DSth*XLbLqTF=PV zsk&>LnO9zUn(7*rpBlFEYn`(HEX^*KFD;>r!Mhw4vs7c7WL?tlFb0d>dHC_2m$_b4 z&vL22FSjTBXRtoGy58{Wt{>ZX&Ypd5<=uR(uIoIDuD@97({PAwmXFBH&Ldm*TWf19cWk`<^?6A2jf=NeURk0Mv^eN$>z7&ai&ids{<;2TaQ?%#Qu)|Y`=Bs!>E5i8 z6H8xaZcl5Q#-7di=E{c!M{9O81v7*&aV>JZ;$iVpwyP^jsi<^m$yytq&aJWQrXHMq zsy6+~m$e#^W^ecPar{gvNV=`K!;n`*Xc_j*o~S>LTT-*}~0ytV#x@y=+8=>DLuY%k+Z?R|TqGYr^t7l%DcKcM6oVDs+EB#oBV8Rvvbdd0Z<`TjWW+Hx;Dw!kc0 z)^-2gu(hR8Pxfui>RTE5D~hrBWZ_fhX?Z=xd-7Udnl69pbfI%j;uI^-N2Z<}-wzyN z3fCz-r1)v=-z%9h?}9wFKYONj&ior+l6Xa1QH#;B$;tZKiFfi??p$!_7df{r zCD^mq!>XUDz@u`4(Q)CHzZVXOpW>XyB-PcmBvdpju;|G<@7j}=Cm)7)3)-a2kSO$# zvTT_3{GUpz5d%xxGq+%aM=5H%Jf;gCOyw12a%t)|nQ}&jA?pf<%BN<-A5(bh-Y79m zHV{ilWpZ7ydPTC$?nMa-^In>7<6F}>jn#0{L5Eq5Tuaoio3zA4UE8@tGPzN914Fo- z-eVQkM5n-~Z=Bvt2h9y!+_LG}y{&9_HXgjyV02>MiK}-)3s)VRZ&+YF&B8T$iTStN z7B}w}UoM}rt1xd};QKwrVROyI8Ppaq%RG7Ap7~;@O?lemki!<6&zG+=)z4ccx$|n4 z#dD+2#|$q#f820*m(26&mR|*m#YJs6HReBRley0GY$xNCl4;&MKEF4=$9Y0U@6Od= z4n1a-(=v(rJd7uc8#*Q~_lRNnyFsSn>0yrIg3yV^t^qR-6vP`%)-zz}h!b)--(xZ1 zQOqOt8DS6Sn6q3F-SOe@_G3KDKPr7^oMOem8RIplaiaEpj^yL}w*`cFsujqDaR|F> z$Z6DdFu1ueElIUI|E6U@p^O+q@hr!ju9cS$-)3nNJi4@7_3s4+KdwTH`oe}n2lWOC z$19dwYm~g_n7KDSVOn)sf`Q|K-q&A*r& zA){@2?yF1;H@9G8FE8(vTfOCLUs=aZf3opwMkDMzcLuNx2SLPp5w66=%!c21jkt}yMA^Qc+Rmn zGR;YN#-8Ur=ULl|7TOp+o_?LLxOqzpE7z+zKb$!frbzU!afwUj7W6PuNalBFS+r%% z-?ALLB&%-z_=RcfZD(d8MC5@RkEi>O1@u=FU8L&M%J9O~5q4rN845kGRdw z%PJEXt`tf|Nl95ZG$hTQ)yg4}BEGpt@)cjky!e}M8dqg8$=&cfDbC7Nz~kX4v&Tt2 z|JFHiEwR&GiSJwFSZ*sAA1j#iXl0YAa4MWt3-59YYL?%W z$b4#9%BNOVrvD5jirju24neJjk*VDN+H)2#bai*yhnVaXt+7#0BuD zl$9)}Sp)0^8d7^U2kR8Hu2=beSo7f=3yXEjS})ID)_wfl0hWA;4`+nuxEnuZwN?tB zmT}TiCAqy_qQlCw>+x?1C)u-4L+3Fwa_(K|wqP-j;_7>xN0x|YbuwFi`}cxHHbKU* z(3M?hI)~Rmi3%s%l=fkA+rnEYqFcaHjd|VV{!) zPM#`LT#e`7ZC?4H=?%|?3Co)f`zJN1wWPB;)UWs(^;I@PuRc@Jx%kTBKIPpU0U`YV zFFl`i?~>f%rKc7i()(>6aQo`bbs2G%S(~PM?*1@i(H_oKJeLni7KdD37_ni(mODJ3 zw7(oTGrcSKrFve9vUPCrZ>^P6!z!hiR`JGID=Mv;w4!k(m$TnYHM<{P9%{uwg>y>I z^9ia8xOo)*TBxbZGN*&(%qc;?B!f`bzaIn{r|jUr*T%@nc;LxOrM(mQg}?dYz z)pF{);b5@flne81mL&%*HT^Da=sJhqF>!D;z4Hf5k@he{1oT_X* zE;?h1dxN&&lxv0JGuS+qZ*?!xRardu^eTs$JYp=@_l7)ek(>fLZ`yK!2ZPMQun2{3 zjvUe7rv*(Ej;U}u>NRN(dyyk!z`Ov9G{*xzAKREXm(S4lpH|#9)g_df=b^QT(Q&yS z#w_a#nOhiGCaG*_a`BM}d?L?1DIB@#qoiuKGtFlw&k|&d} zKR-v3poSpVLqn#Dhx%DgDhhl@)m%D`#VC3#bMJWDz*MtphYbHdfxxF0=`E~U?T33b z`&URX6c%r3J+LBZ=7AXuJw8t@9r>$0oPrw;%+OrH*deU>NU=fGkyZ7hnm}XOk_lx@ zG1WE-XYYu&u`(Svm(don=66Lh1BZ~BVG!3hCzXT{Mk5~P#vsNA)(fpoCrQrv(7+-i zBCPt*kSnFei(_T_mtIh1yWMKS;QSpgyke6G)aRvVp z0|CC-E!}CWIaV@APFTsY>U_(x2{9(SJ&$OFwlFvpxy(GrX(XX@WaISjhaHU1_ehFN z;#}px^n3XpFF`H?wHR?j&dt*Tx&->>GfoR&;5;&?SpF{inTiELb6AwP^n{KizUy{( z&Jp{4?TuDmOYrTSFT0+epBxkS;F#g@H(AB8fpr|Mw^!X!wX87=eXpK(d#5VLvzzO0 zS$VJA)!4S}^p-pK0@?l7ok|G`?5+)0wT&+iNr=y_?zs{H@oYz|&mq*Q>;U!{K z%JQFqr)qiTTEBOfTKC;9cG|GwzTfO9?%BR)GAkDSit2xP?{(2#lfPy!FP{HkWb!vY zefz!nPFr=w@}CsvN%^veRAwgcH+d;C>%!C3+2_x{`k86%ZvLM^>#Fqj=5Mo=4+c#; zd!^#>-7B9pbUj%!=A1dBAkfN`lq|D2bAPVht?lB0PPatGma>NO#_JndPk;Y8^Rm>2 zOHUOq=LvYK3f{eJzz{BZ=7FCrA{@aHfJ9mh3wba+> zIy)#VJTt{8V8bzGT?|}G#z2|B@r|;N#bb-mMGW99Woa~CmUCG>mGg3ajiUZR?SIH z;7X{AQtgI0FLgTKHVB-o7XI!eEHFWxRraukE9Z;}$D5WgePWxeAj-ln;BVlRCMYJw zAmDQ$W#^ebONSZHRF4HIvpODC6h5oDjG2Sa@pgxC%K?36hly8pn$#;wOc-ABR4^?0 zFhM}(X<&oPH%Atklt#z5r#T#i7G73VefprwZU&>FgyJ0bf};%F%7;{z2d-Ta6Q#w( zT{16n!c2!2QyJ;TNtT?EBD0*GgcUYVkV$-3Ja27*Qv++LOUNOqc?SyqPLgKmjpPY+ zm}sXaCGMf&B+7DnPuW2Q4}}n?7NsBJj#iGGmOeiI#~7HVel?W|sXNUjGFfQC0_Oi$ zp0n^oaSFx#XSm@uzkJ2A+3Z4Z?Oxp8fBN<^XS@3nm);${zcQmhb3vxMNW{^!`G(;O zU)v`67Q9oLy}GNmYv-&#Oq2U$5|}#s3*R?2#z^_sD6k8@nUiZXn`L*3NaJyh;7Kbi zmKF3FY5(?36urdU)H?CJMKWhmF^?PHuS6Sx2Wl%+UJ7iEV3s?u`j&$Ho}F5cZ!)l) z=P|ZC;NGN1orp85(JiZ&X8ELQJ_6hF-4R|PELS7U-AHxM)N)QT|A#P++rt) z8pUyQJPQ6che0SnuxH8gyL=2jT1qD#^5*krPLXe2`k5<0IZhxvthrHbaiKP7vrPj|+T76y}_73Y$4_o~+XuH9a|eaVBVCbJG~ z*?Vl(_8FPVQ+u=j`o_$gy=cW#zZ*AlO7&jG{hBtVx8(AxiL#9U#`5fce+){ zl{Md9{bvY28GL@?M)UlIeouF&#_oI88uUE(+?|Zee|^({j}wX(U-Rxf%y z_sf3<{cB$}FF!dYwoU6~sMSmN8-0r=mupQ5y6u1Io5Xzsja~aLZ+}7{$9^YpXRUVl2cN;T@i zZDE(otXe)#tXqU8D;7@p^>8y=vE|{NJF7ztr!4lI+8t;ttj=;ywn10x>Eup^lg?c- zb2S`O=7mT+@i!I9yAf-k5QK!Clj(Ad{=x zsNhJMqlKu`DybG8H#P1l44d;DKDJGDJR+Z?*s$fyPLIdG7rc}di8F3_>UU%cyUyhAF)5x? zeA*0cQxDqCJXEx5s`F8C8%0^G;@>O39`@3|%5Z3pNW)J5yNt$i0_~N|7Rr*z3> + +std::vector LoadImageFromFile(const char* file_name, + int* out_width, + int* out_height, + int* out_channels); + +#endif // TENSORFLOW_EXAMPLES_IOS_IOS_IMAGE_LOAD_H_ diff --git a/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.mm b/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.mm new file mode 100644 index 0000000000..789522d2a9 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.mm @@ -0,0 +1,85 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ios_image_load.h" + +#include +#include +#include +#include + +#import +#import + +std::vector LoadImageFromFile(const char* file_name, + int* out_width, int* out_height, + int* out_channels) { + FILE* file_handle = fopen(file_name, "rb"); + fseek(file_handle, 0, SEEK_END); + const size_t bytes_in_file = ftell(file_handle); + fseek(file_handle, 0, SEEK_SET); + std::vector file_data(bytes_in_file); + fread(file_data.data(), 1, bytes_in_file, file_handle); + fclose(file_handle); + CFDataRef file_data_ref = CFDataCreateWithBytesNoCopy(NULL, file_data.data(), + bytes_in_file, + kCFAllocatorNull); + CGDataProviderRef image_provider = + CGDataProviderCreateWithCFData(file_data_ref); + + const char* suffix = strrchr(file_name, '.'); + if (!suffix || suffix == file_name) { + suffix = ""; + } + CGImageRef image; + if (strcasecmp(suffix, ".png") == 0) { + image = CGImageCreateWithPNGDataProvider(image_provider, NULL, true, + kCGRenderingIntentDefault); + } else if ((strcasecmp(suffix, ".jpg") == 0) || + (strcasecmp(suffix, ".jpeg") == 0)) { + image = CGImageCreateWithJPEGDataProvider(image_provider, NULL, true, + kCGRenderingIntentDefault); + } else { + CFRelease(image_provider); + CFRelease(file_data_ref); + fprintf(stderr, "Unknown suffix for file '%s'\n", file_name); + *out_width = 0; + *out_height = 0; + *out_channels = 0; + return std::vector(); + } + + const int width = (int)CGImageGetWidth(image); + const int height = (int)CGImageGetHeight(image); + const int channels = 4; + CGColorSpaceRef color_space = CGColorSpaceCreateDeviceRGB(); + const int bytes_per_row = (width * channels); + const int bytes_in_image = (bytes_per_row * height); + std::vector result(bytes_in_image); + const int bits_per_component = 8; + CGContextRef context = CGBitmapContextCreate(result.data(), width, height, + bits_per_component, bytes_per_row, color_space, + kCGImageAlphaPremultipliedLast | kCGBitmapByteOrder32Big); + CGColorSpaceRelease(color_space); + CGContextDrawImage(context, CGRectMake(0, 0, width, height), image); + CGContextRelease(context); + CFRelease(image); + CFRelease(image_provider); + CFRelease(file_data_ref); + + *out_width = width; + *out_height = height; + *out_channels = channels; + return result; +} diff --git a/tensorflow/contrib/lite/examples/ios/simple/main.mm b/tensorflow/contrib/lite/examples/ios/simple/main.mm new file mode 100644 index 0000000000..d70550a730 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/simple/main.mm @@ -0,0 +1,22 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +int main(int argc, char * argv[]) { + @autoreleasepool { + NSString *delegateClassName = @"AppDelegate"; + return UIApplicationMain(argc, argv, nil, delegateClassName); + } +} diff --git a/tensorflow/contrib/lite/examples/ios/simple/simple.xcodeproj/project.pbxproj b/tensorflow/contrib/lite/examples/ios/simple/simple.xcodeproj/project.pbxproj new file mode 100644 index 0000000000..9277c230b8 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/simple/simple.xcodeproj/project.pbxproj @@ -0,0 +1,359 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 46; + objects = { + +/* Begin PBXBuildFile section */ + 1C0D734B1ECCC460008C1DAB /* CoreGraphics.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 1C0D734A1ECCC460008C1DAB /* CoreGraphics.framework */; }; + 1CA45FFF1ECCC356002FA6A4 /* UIKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 1CA45FFE1ECCC356002FA6A4 /* UIKit.framework */; }; + 594C14AE1FB8F9B500EE8BFE /* libtensorflow-lite.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 594C14AD1FB8F9B500EE8BFE /* libtensorflow-lite.a */; }; + 594C14B11FB9037100EE8BFE /* labels.txt in Resources */ = {isa = PBXBuildFile; fileRef = 594C14AF1FB9037100EE8BFE /* labels.txt */; }; + 594C14B21FB9037100EE8BFE /* mobilenet_v1_1.0_224.tflite in Resources */ = {isa = PBXBuildFile; fileRef = 594C14B01FB9037100EE8BFE /* mobilenet_v1_1.0_224.tflite */; }; + 59A3D0011CF4E68100C4259F /* AppDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFF21CF4E68100C4259F /* AppDelegate.mm */; }; + 59A3D0031CF4E68100C4259F /* grace_hopper.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF51CF4E68100C4259F /* grace_hopper.jpg */; }; + 59A3D0081CF4E68100C4259F /* ios_image_load.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFFB1CF4E68100C4259F /* ios_image_load.mm */; }; + 59A3D0091CF4E68100C4259F /* main.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFFC1CF4E68100C4259F /* main.mm */; }; + 59A3D00B1CF4E68100C4259F /* RunModelViewController.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFFF1CF4E68100C4259F /* RunModelViewController.mm */; }; + 59A3D00C1CF4E68100C4259F /* RunModelViewController.xib in Resources */ = {isa = PBXBuildFile; fileRef = 59A3D0001CF4E68100C4259F /* RunModelViewController.xib */; }; +/* End PBXBuildFile section */ + +/* Begin PBXFileReference section */ + 1C0D73481ECCC41B008C1DAB /* CoreImage.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreImage.framework; path = System/Library/Frameworks/CoreImage.framework; sourceTree = SDKROOT; }; + 1C0D734A1ECCC460008C1DAB /* CoreGraphics.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreGraphics.framework; path = System/Library/Frameworks/CoreGraphics.framework; sourceTree = SDKROOT; }; + 1CA45FFE1ECCC356002FA6A4 /* UIKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = UIKit.framework; path = System/Library/Frameworks/UIKit.framework; sourceTree = SDKROOT; }; + 5911579B1CF4011C00C31E3A /* tf_simple_example.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = tf_simple_example.app; sourceTree = BUILT_PRODUCTS_DIR; }; + 594C14AD1FB8F9B500EE8BFE /* libtensorflow-lite.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libtensorflow-lite.a"; path = "../../../gen/lib/libtensorflow-lite.a"; sourceTree = ""; }; + 594C14AF1FB9037100EE8BFE /* labels.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = labels.txt; sourceTree = ""; }; + 594C14B01FB9037100EE8BFE /* mobilenet_v1_1.0_224.tflite */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_v1_1.0_224.tflite; sourceTree = ""; }; + 59A3CFF11CF4E68100C4259F /* AppDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = ""; }; + 59A3CFF21CF4E68100C4259F /* AppDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AppDelegate.mm; sourceTree = ""; }; + 59A3CFF51CF4E68100C4259F /* grace_hopper.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = grace_hopper.jpg; sourceTree = ""; }; + 59A3CFFA1CF4E68100C4259F /* ios_image_load.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ios_image_load.h; sourceTree = ""; }; + 59A3CFFB1CF4E68100C4259F /* ios_image_load.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = ios_image_load.mm; sourceTree = ""; }; + 59A3CFFC1CF4E68100C4259F /* main.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = main.mm; sourceTree = ""; }; + 59A3CFFD1CF4E68100C4259F /* RunModel-Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = "RunModel-Info.plist"; sourceTree = ""; }; + 59A3CFFE1CF4E68100C4259F /* RunModelViewController.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RunModelViewController.h; sourceTree = ""; }; + 59A3CFFF1CF4E68100C4259F /* RunModelViewController.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = RunModelViewController.mm; sourceTree = ""; }; + 59A3D0001CF4E68100C4259F /* RunModelViewController.xib */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = file.xib; path = RunModelViewController.xib; sourceTree = ""; }; + 73DBC33C5DD9A526EE6D1EF2 /* libPods-tf_simple_example.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libPods-tf_simple_example.a"; sourceTree = BUILT_PRODUCTS_DIR; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 591157981CF4011C00C31E3A /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 594C14AE1FB8F9B500EE8BFE /* libtensorflow-lite.a in Frameworks */, + 1C0D734B1ECCC460008C1DAB /* CoreGraphics.framework in Frameworks */, + 1CA45FFF1ECCC356002FA6A4 /* UIKit.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 24D7686C331131624F4454A0 /* Frameworks */ = { + isa = PBXGroup; + children = ( + 594C14AD1FB8F9B500EE8BFE /* libtensorflow-lite.a */, + 1C0D734A1ECCC460008C1DAB /* CoreGraphics.framework */, + 1C0D73481ECCC41B008C1DAB /* CoreImage.framework */, + 1CA45FFE1ECCC356002FA6A4 /* UIKit.framework */, + 73DBC33C5DD9A526EE6D1EF2 /* libPods-tf_simple_example.a */, + ); + name = Frameworks; + sourceTree = ""; + }; + 591157921CF4011C00C31E3A = { + isa = PBXGroup; + children = ( + 59A3CFF11CF4E68100C4259F /* AppDelegate.h */, + 59A3CFF21CF4E68100C4259F /* AppDelegate.mm */, + 59A3CFF31CF4E68100C4259F /* data */, + 59A3CFFA1CF4E68100C4259F /* ios_image_load.h */, + 59A3CFFB1CF4E68100C4259F /* ios_image_load.mm */, + 59A3CFFC1CF4E68100C4259F /* main.mm */, + 59A3CFFD1CF4E68100C4259F /* RunModel-Info.plist */, + 59A3CFFE1CF4E68100C4259F /* RunModelViewController.h */, + 59A3CFFF1CF4E68100C4259F /* RunModelViewController.mm */, + 59A3D0001CF4E68100C4259F /* RunModelViewController.xib */, + 5911579C1CF4011C00C31E3A /* Products */, + 24D7686C331131624F4454A0 /* Frameworks */, + ); + sourceTree = ""; + }; + 5911579C1CF4011C00C31E3A /* Products */ = { + isa = PBXGroup; + children = ( + 5911579B1CF4011C00C31E3A /* tf_simple_example.app */, + ); + name = Products; + sourceTree = ""; + }; + 59A3CFF31CF4E68100C4259F /* data */ = { + isa = PBXGroup; + children = ( + 59A3CFF51CF4E68100C4259F /* grace_hopper.jpg */, + 594C14AF1FB9037100EE8BFE /* labels.txt */, + 594C14B01FB9037100EE8BFE /* mobilenet_v1_1.0_224.tflite */, + ); + path = data; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 5911579A1CF4011C00C31E3A /* tf_simple_example */ = { + isa = PBXNativeTarget; + buildConfigurationList = 591157B21CF4011D00C31E3A /* Build configuration list for PBXNativeTarget "tf_simple_example" */; + buildPhases = ( + 591157971CF4011C00C31E3A /* Sources */, + 591157981CF4011C00C31E3A /* Frameworks */, + 591157991CF4011C00C31E3A /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = tf_simple_example; + productName = tf_ios_makefile_example; + productReference = 5911579B1CF4011C00C31E3A /* tf_simple_example.app */; + productType = "com.apple.product-type.application"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 591157931CF4011C00C31E3A /* Project object */ = { + isa = PBXProject; + attributes = { + LastUpgradeCheck = 0830; + ORGANIZATIONNAME = Google; + TargetAttributes = { + 5911579A1CF4011C00C31E3A = { + CreatedOnToolsVersion = 7.2; + DevelopmentTeam = EQHXZ8M8AV; + ProvisioningStyle = Manual; + }; + }; + }; + buildConfigurationList = 591157961CF4011C00C31E3A /* Build configuration list for PBXProject "simple" */; + compatibilityVersion = "Xcode 3.2"; + developmentRegion = English; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = 591157921CF4011C00C31E3A; + productRefGroup = 5911579C1CF4011C00C31E3A /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 5911579A1CF4011C00C31E3A /* tf_simple_example */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 591157991CF4011C00C31E3A /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 59A3D00C1CF4E68100C4259F /* RunModelViewController.xib in Resources */, + 594C14B11FB9037100EE8BFE /* labels.txt in Resources */, + 59A3D0031CF4E68100C4259F /* grace_hopper.jpg in Resources */, + 594C14B21FB9037100EE8BFE /* mobilenet_v1_1.0_224.tflite in Resources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 591157971CF4011C00C31E3A /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 59A3D0091CF4E68100C4259F /* main.mm in Sources */, + 59A3D0011CF4E68100C4259F /* AppDelegate.mm in Sources */, + 59A3D00B1CF4E68100C4259F /* RunModelViewController.mm in Sources */, + 59A3D0081CF4E68100C4259F /* ios_image_load.mm in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 591157B01CF4011D00C31E3A /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer"; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 8.0; + MTL_ENABLE_DEBUG_INFO = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = iphoneos; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Debug; + }; + 591157B11CF4011D00C31E3A /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer"; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 8.0; + MTL_ENABLE_DEBUG_INFO = NO; + SDKROOT = iphoneos; + TARGETED_DEVICE_FAMILY = "1,2"; + VALIDATE_PRODUCT = YES; + }; + name = Release; + }; + 591157B31CF4011D00C31E3A /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_DEBUG_INFORMATION_LEVEL = default; + CODE_SIGN_IDENTITY = "iPhone Developer"; + DEVELOPMENT_TEAM = EQHXZ8M8AV; + ENABLE_BITCODE = NO; + GCC_ENABLE_CPP_EXCEPTIONS = YES; + GCC_ENABLE_CPP_RTTI = YES; + HEADER_SEARCH_PATHS = ( + "$(inherited)", + ../../../../../../, + ../../../downloads/flatbuffers/include/, + ../../../downloads/eigen/, + ../../../downloads/, + ); + INFOPLIST_FILE = "$(SRCROOT)/RunModel-Info.plist"; + IPHONEOS_DEPLOYMENT_TARGET = 9.2; + LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; + LIBRARY_SEARCH_PATHS = ../../../gen/lib/; + OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)"; + OTHER_LDFLAGS = "$(inherited)"; + PRODUCT_BUNDLE_IDENTIFIER = "com.google.tflite-simple-example"; + PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE = "1072bd47-ff19-4e5f-8107-d912748f83f1"; + PROVISIONING_PROFILE_SPECIFIER = "Google Development"; + SEPARATE_STRIP = NO; + }; + name = Debug; + }; + 591157B41CF4011D00C31E3A /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_DEBUG_INFORMATION_LEVEL = default; + CODE_SIGN_IDENTITY = "iPhone Developer"; + DEVELOPMENT_TEAM = ""; + ENABLE_BITCODE = NO; + GCC_ENABLE_CPP_EXCEPTIONS = YES; + GCC_ENABLE_CPP_RTTI = YES; + HEADER_SEARCH_PATHS = ( + "$(inherited)", + ../../../../../../, + ../../../downloads/flatbuffers/include/, + ../../../downloads/eigen/, + ../../../downloads/, + ); + INFOPLIST_FILE = "$(SRCROOT)/RunModel-Info.plist"; + IPHONEOS_DEPLOYMENT_TARGET = 9.2; + LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; + LIBRARY_SEARCH_PATHS = ../../../gen/lib/; + ONLY_ACTIVE_ARCH = YES; + OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)"; + OTHER_LDFLAGS = "$(inherited)"; + PRODUCT_BUNDLE_IDENTIFIER = "com.google.tflite-simple-example"; + PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; + SEPARATE_STRIP = NO; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 591157961CF4011C00C31E3A /* Build configuration list for PBXProject "simple" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 591157B01CF4011D00C31E3A /* Debug */, + 591157B11CF4011D00C31E3A /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 591157B21CF4011D00C31E3A /* Build configuration list for PBXNativeTarget "tf_simple_example" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 591157B31CF4011D00C31E3A /* Debug */, + 591157B41CF4011D00C31E3A /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 591157931CF4011C00C31E3A /* Project object */; +} diff --git a/tensorflow/contrib/lite/ios_makefile.inc b/tensorflow/contrib/lite/ios_makefile.inc new file mode 100644 index 0000000000..bcff7ed988 --- /dev/null +++ b/tensorflow/contrib/lite/ios_makefile.inc @@ -0,0 +1,47 @@ +# Settings for iOS. +ifeq ($(TARGET), IOS) + BUILD_FOR_IOS_SIMULATOR := false + ifeq ($(IOS_ARCH), x86_64) + BUILD_FOR_IOS_SIMULATOR := true + endif + ifeq ($(IOS_ARCH), i386) + BUILD_FOR_IOS_SIMULATOR := true + endif + ifeq ($(BUILD_FOR_IOS_SIMULATOR), true) + IPHONEOS_PLATFORM := $(shell xcrun --sdk iphonesimulator \ + --show-sdk-platform-path) + IPHONEOS_SYSROOT := $(shell xcrun --sdk iphonesimulator \ + --show-sdk-path) + else + IPHONEOS_PLATFORM := $(shell xcrun --sdk iphoneos --show-sdk-platform-path) + IPHONEOS_SYSROOT := $(shell xcrun --sdk iphoneos --show-sdk-path) + endif + IOS_SDK_VERSION := $(shell xcrun --sdk iphoneos --show-sdk-version) + MIN_SDK_VERSION := 9.0 + # Override IOS_ARCH with armv7, armv7s, arm64, i386, or x86_64. + IOS_ARCH := x86_64 + CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \ + -DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK \ + -fembed-bitcode \ + -Wno-c++11-narrowing \ + -mno-thumb \ + -fno-exceptions \ + -isysroot \ + ${IPHONEOS_SYSROOT} \ + -arch $(IOS_ARCH) \ + -O3 + CCFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \ + -fembed-bitcode \ + -mno-thumb \ + -isysroot \ + ${IPHONEOS_SYSROOT} \ + -arch $(IOS_ARCH) \ + -O3 + LDFLAGS := -fembed-bitcode \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + -arch $(IOS_ARCH) + OBJDIR := $(OBJDIR)ios_$(IOS_ARCH)/ + LIBDIR := $(LIBDIR)ios_$(IOS_ARCH)/ + BINDIR := $(BINDIR)ios_$(IOS_ARCH)/ + DEPDIR := $(DEPDIR)ios_$(IOS_ARCH)/ +endif diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h new file mode 100755 index 0000000000..df460ab9a3 --- /dev/null +++ b/tensorflow/contrib/lite/schema/schema_generated.h @@ -0,0 +1,4521 @@ +// automatically generated by the FlatBuffers compiler, do not modify + + +#ifndef FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_ +#define FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_ + +#include "flatbuffers/flatbuffers.h" + +namespace tflite { + +struct QuantizationParameters; +struct QuantizationParametersT; + +struct Tensor; +struct TensorT; + +struct Conv2DOptions; +struct Conv2DOptionsT; + +struct Pool2DOptions; +struct Pool2DOptionsT; + +struct DepthwiseConv2DOptions; +struct DepthwiseConv2DOptionsT; + +struct ConcatEmbeddingsOptions; +struct ConcatEmbeddingsOptionsT; + +struct LSHProjectionOptions; +struct LSHProjectionOptionsT; + +struct SVDFOptions; +struct SVDFOptionsT; + +struct RNNOptions; +struct RNNOptionsT; + +struct FullyConnectedOptions; +struct FullyConnectedOptionsT; + +struct SoftmaxOptions; +struct SoftmaxOptionsT; + +struct ConcatenationOptions; +struct ConcatenationOptionsT; + +struct AddOptions; +struct AddOptionsT; + +struct MulOptions; +struct MulOptionsT; + +struct L2NormOptions; +struct L2NormOptionsT; + +struct LocalResponseNormalizationOptions; +struct LocalResponseNormalizationOptionsT; + +struct LSTMOptions; +struct LSTMOptionsT; + +struct ResizeBilinearOptions; +struct ResizeBilinearOptionsT; + +struct CallOptions; +struct CallOptionsT; + +struct ReshapeOptions; +struct ReshapeOptionsT; + +struct SkipGramOptions; +struct SkipGramOptionsT; + +struct SpaceToDepthOptions; +struct SpaceToDepthOptionsT; + +struct EmbeddingLookupSparseOptions; +struct EmbeddingLookupSparseOptionsT; + +struct OperatorCode; +struct OperatorCodeT; + +struct Operator; +struct OperatorT; + +struct SubGraph; +struct SubGraphT; + +struct Buffer; +struct BufferT; + +struct Model; +struct ModelT; + +enum TensorType { + TensorType_FLOAT32 = 0, + TensorType_FLOAT16 = 1, + TensorType_INT32 = 2, + TensorType_UINT8 = 3, + TensorType_INT64 = 4, + TensorType_STRING = 5, + TensorType_MIN = TensorType_FLOAT32, + TensorType_MAX = TensorType_STRING +}; + +inline TensorType (&EnumValuesTensorType())[6] { + static TensorType values[] = { + TensorType_FLOAT32, + TensorType_FLOAT16, + TensorType_INT32, + TensorType_UINT8, + TensorType_INT64, + TensorType_STRING + }; + return values; +} + +inline const char **EnumNamesTensorType() { + static const char *names[] = { + "FLOAT32", + "FLOAT16", + "INT32", + "UINT8", + "INT64", + "STRING", + nullptr + }; + return names; +} + +inline const char *EnumNameTensorType(TensorType e) { + const size_t index = static_cast(e); + return EnumNamesTensorType()[index]; +} + +enum BuiltinOperator { + BuiltinOperator_ADD = 0, + BuiltinOperator_AVERAGE_POOL_2D = 1, + BuiltinOperator_CONCATENATION = 2, + BuiltinOperator_CONV_2D = 3, + BuiltinOperator_DEPTHWISE_CONV_2D = 4, + BuiltinOperator_EMBEDDING_LOOKUP = 7, + BuiltinOperator_FULLY_CONNECTED = 9, + BuiltinOperator_HASHTABLE_LOOKUP = 10, + BuiltinOperator_L2_NORMALIZATION = 11, + BuiltinOperator_L2_POOL_2D = 12, + BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION = 13, + BuiltinOperator_LOGISTIC = 14, + BuiltinOperator_LSH_PROJECTION = 15, + BuiltinOperator_LSTM = 16, + BuiltinOperator_MAX_POOL_2D = 17, + BuiltinOperator_MUL = 18, + BuiltinOperator_RELU = 19, + BuiltinOperator_RELU1 = 20, + BuiltinOperator_RELU6 = 21, + BuiltinOperator_RESHAPE = 22, + BuiltinOperator_RESIZE_BILINEAR = 23, + BuiltinOperator_RNN = 24, + BuiltinOperator_SOFTMAX = 25, + BuiltinOperator_SPACE_TO_DEPTH = 26, + BuiltinOperator_SVDF = 27, + BuiltinOperator_TANH = 28, + BuiltinOperator_CONCAT_EMBEDDINGS = 29, + BuiltinOperator_SKIP_GRAM = 30, + BuiltinOperator_CALL = 31, + BuiltinOperator_CUSTOM = 32, + BuiltinOperator_EMBEDDING_LOOKUP_SPARSE = 33, + BuiltinOperator_MIN = BuiltinOperator_ADD, + BuiltinOperator_MAX = BuiltinOperator_EMBEDDING_LOOKUP_SPARSE +}; + +inline BuiltinOperator (&EnumValuesBuiltinOperator())[31] { + static BuiltinOperator values[] = { + BuiltinOperator_ADD, + BuiltinOperator_AVERAGE_POOL_2D, + BuiltinOperator_CONCATENATION, + BuiltinOperator_CONV_2D, + BuiltinOperator_DEPTHWISE_CONV_2D, + BuiltinOperator_EMBEDDING_LOOKUP, + BuiltinOperator_FULLY_CONNECTED, + BuiltinOperator_HASHTABLE_LOOKUP, + BuiltinOperator_L2_NORMALIZATION, + BuiltinOperator_L2_POOL_2D, + BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, + BuiltinOperator_LOGISTIC, + BuiltinOperator_LSH_PROJECTION, + BuiltinOperator_LSTM, + BuiltinOperator_MAX_POOL_2D, + BuiltinOperator_MUL, + BuiltinOperator_RELU, + BuiltinOperator_RELU1, + BuiltinOperator_RELU6, + BuiltinOperator_RESHAPE, + BuiltinOperator_RESIZE_BILINEAR, + BuiltinOperator_RNN, + BuiltinOperator_SOFTMAX, + BuiltinOperator_SPACE_TO_DEPTH, + BuiltinOperator_SVDF, + BuiltinOperator_TANH, + BuiltinOperator_CONCAT_EMBEDDINGS, + BuiltinOperator_SKIP_GRAM, + BuiltinOperator_CALL, + BuiltinOperator_CUSTOM, + BuiltinOperator_EMBEDDING_LOOKUP_SPARSE + }; + return values; +} + +inline const char **EnumNamesBuiltinOperator() { + static const char *names[] = { + "ADD", + "AVERAGE_POOL_2D", + "CONCATENATION", + "CONV_2D", + "DEPTHWISE_CONV_2D", + "", + "", + "EMBEDDING_LOOKUP", + "", + "FULLY_CONNECTED", + "HASHTABLE_LOOKUP", + "L2_NORMALIZATION", + "L2_POOL_2D", + "LOCAL_RESPONSE_NORMALIZATION", + "LOGISTIC", + "LSH_PROJECTION", + "LSTM", + "MAX_POOL_2D", + "MUL", + "RELU", + "RELU1", + "RELU6", + "RESHAPE", + "RESIZE_BILINEAR", + "RNN", + "SOFTMAX", + "SPACE_TO_DEPTH", + "SVDF", + "TANH", + "CONCAT_EMBEDDINGS", + "SKIP_GRAM", + "CALL", + "CUSTOM", + "EMBEDDING_LOOKUP_SPARSE", + nullptr + }; + return names; +} + +inline const char *EnumNameBuiltinOperator(BuiltinOperator e) { + const size_t index = static_cast(e); + return EnumNamesBuiltinOperator()[index]; +} + +enum BuiltinOptions { + BuiltinOptions_NONE = 0, + BuiltinOptions_Conv2DOptions = 1, + BuiltinOptions_DepthwiseConv2DOptions = 2, + BuiltinOptions_ConcatEmbeddingsOptions = 3, + BuiltinOptions_LSHProjectionOptions = 4, + BuiltinOptions_Pool2DOptions = 5, + BuiltinOptions_SVDFOptions = 6, + BuiltinOptions_RNNOptions = 7, + BuiltinOptions_FullyConnectedOptions = 8, + BuiltinOptions_SoftmaxOptions = 9, + BuiltinOptions_ConcatenationOptions = 10, + BuiltinOptions_AddOptions = 11, + BuiltinOptions_L2NormOptions = 12, + BuiltinOptions_LocalResponseNormalizationOptions = 13, + BuiltinOptions_LSTMOptions = 14, + BuiltinOptions_ResizeBilinearOptions = 15, + BuiltinOptions_CallOptions = 16, + BuiltinOptions_ReshapeOptions = 17, + BuiltinOptions_SkipGramOptions = 18, + BuiltinOptions_SpaceToDepthOptions = 19, + BuiltinOptions_EmbeddingLookupSparseOptions = 20, + BuiltinOptions_MulOptions = 21, + BuiltinOptions_MIN = BuiltinOptions_NONE, + BuiltinOptions_MAX = BuiltinOptions_MulOptions +}; + +inline BuiltinOptions (&EnumValuesBuiltinOptions())[22] { + static BuiltinOptions values[] = { + BuiltinOptions_NONE, + BuiltinOptions_Conv2DOptions, + BuiltinOptions_DepthwiseConv2DOptions, + BuiltinOptions_ConcatEmbeddingsOptions, + BuiltinOptions_LSHProjectionOptions, + BuiltinOptions_Pool2DOptions, + BuiltinOptions_SVDFOptions, + BuiltinOptions_RNNOptions, + BuiltinOptions_FullyConnectedOptions, + BuiltinOptions_SoftmaxOptions, + BuiltinOptions_ConcatenationOptions, + BuiltinOptions_AddOptions, + BuiltinOptions_L2NormOptions, + BuiltinOptions_LocalResponseNormalizationOptions, + BuiltinOptions_LSTMOptions, + BuiltinOptions_ResizeBilinearOptions, + BuiltinOptions_CallOptions, + BuiltinOptions_ReshapeOptions, + BuiltinOptions_SkipGramOptions, + BuiltinOptions_SpaceToDepthOptions, + BuiltinOptions_EmbeddingLookupSparseOptions, + BuiltinOptions_MulOptions + }; + return values; +} + +inline const char **EnumNamesBuiltinOptions() { + static const char *names[] = { + "NONE", + "Conv2DOptions", + "DepthwiseConv2DOptions", + "ConcatEmbeddingsOptions", + "LSHProjectionOptions", + "Pool2DOptions", + "SVDFOptions", + "RNNOptions", + "FullyConnectedOptions", + "SoftmaxOptions", + "ConcatenationOptions", + "AddOptions", + "L2NormOptions", + "LocalResponseNormalizationOptions", + "LSTMOptions", + "ResizeBilinearOptions", + "CallOptions", + "ReshapeOptions", + "SkipGramOptions", + "SpaceToDepthOptions", + "EmbeddingLookupSparseOptions", + "MulOptions", + nullptr + }; + return names; +} + +inline const char *EnumNameBuiltinOptions(BuiltinOptions e) { + const size_t index = static_cast(e); + return EnumNamesBuiltinOptions()[index]; +} + +template struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_NONE; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_Conv2DOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DepthwiseConv2DOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ConcatEmbeddingsOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LSHProjectionOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_Pool2DOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SVDFOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_RNNOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_FullyConnectedOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SoftmaxOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ConcatenationOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_AddOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_L2NormOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LocalResponseNormalizationOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LSTMOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ResizeBilinearOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_CallOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ReshapeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SkipGramOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SpaceToDepthOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_EmbeddingLookupSparseOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_MulOptions; +}; + +struct BuiltinOptionsUnion { + BuiltinOptions type; + void *value; + + BuiltinOptionsUnion() : type(BuiltinOptions_NONE), value(nullptr) {} + BuiltinOptionsUnion(BuiltinOptionsUnion&& u) FLATBUFFERS_NOEXCEPT : + type(BuiltinOptions_NONE), value(nullptr) + { std::swap(type, u.type); std::swap(value, u.value); } + BuiltinOptionsUnion(const BuiltinOptionsUnion &) FLATBUFFERS_NOEXCEPT; + BuiltinOptionsUnion &operator=(const BuiltinOptionsUnion &u) FLATBUFFERS_NOEXCEPT + { BuiltinOptionsUnion t(u); std::swap(type, t.type); std::swap(value, t.value); return *this; } + BuiltinOptionsUnion &operator=(BuiltinOptionsUnion &&u) FLATBUFFERS_NOEXCEPT + { std::swap(type, u.type); std::swap(value, u.value); return *this; } + ~BuiltinOptionsUnion() { Reset(); } + + void Reset(); + +#ifndef FLATBUFFERS_CPP98_STL + template + void Set(T&& val) { + Reset(); + type = BuiltinOptionsTraits::enum_value; + if (type != BuiltinOptions_NONE) { + value = new T(std::forward(val)); + } + } +#endif // FLATBUFFERS_CPP98_STL + + static void *UnPack(const void *obj, BuiltinOptions type, const flatbuffers::resolver_function_t *resolver); + flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher = nullptr) const; + + Conv2DOptionsT *AsConv2DOptions() { + return type == BuiltinOptions_Conv2DOptions ? + reinterpret_cast(value) : nullptr; + } + const Conv2DOptionsT *AsConv2DOptions() const { + return type == BuiltinOptions_Conv2DOptions ? + reinterpret_cast(value) : nullptr; + } + DepthwiseConv2DOptionsT *AsDepthwiseConv2DOptions() { + return type == BuiltinOptions_DepthwiseConv2DOptions ? + reinterpret_cast(value) : nullptr; + } + const DepthwiseConv2DOptionsT *AsDepthwiseConv2DOptions() const { + return type == BuiltinOptions_DepthwiseConv2DOptions ? + reinterpret_cast(value) : nullptr; + } + ConcatEmbeddingsOptionsT *AsConcatEmbeddingsOptions() { + return type == BuiltinOptions_ConcatEmbeddingsOptions ? + reinterpret_cast(value) : nullptr; + } + const ConcatEmbeddingsOptionsT *AsConcatEmbeddingsOptions() const { + return type == BuiltinOptions_ConcatEmbeddingsOptions ? + reinterpret_cast(value) : nullptr; + } + LSHProjectionOptionsT *AsLSHProjectionOptions() { + return type == BuiltinOptions_LSHProjectionOptions ? + reinterpret_cast(value) : nullptr; + } + const LSHProjectionOptionsT *AsLSHProjectionOptions() const { + return type == BuiltinOptions_LSHProjectionOptions ? + reinterpret_cast(value) : nullptr; + } + Pool2DOptionsT *AsPool2DOptions() { + return type == BuiltinOptions_Pool2DOptions ? + reinterpret_cast(value) : nullptr; + } + const Pool2DOptionsT *AsPool2DOptions() const { + return type == BuiltinOptions_Pool2DOptions ? + reinterpret_cast(value) : nullptr; + } + SVDFOptionsT *AsSVDFOptions() { + return type == BuiltinOptions_SVDFOptions ? + reinterpret_cast(value) : nullptr; + } + const SVDFOptionsT *AsSVDFOptions() const { + return type == BuiltinOptions_SVDFOptions ? + reinterpret_cast(value) : nullptr; + } + RNNOptionsT *AsRNNOptions() { + return type == BuiltinOptions_RNNOptions ? + reinterpret_cast(value) : nullptr; + } + const RNNOptionsT *AsRNNOptions() const { + return type == BuiltinOptions_RNNOptions ? + reinterpret_cast(value) : nullptr; + } + FullyConnectedOptionsT *AsFullyConnectedOptions() { + return type == BuiltinOptions_FullyConnectedOptions ? + reinterpret_cast(value) : nullptr; + } + const FullyConnectedOptionsT *AsFullyConnectedOptions() const { + return type == BuiltinOptions_FullyConnectedOptions ? + reinterpret_cast(value) : nullptr; + } + SoftmaxOptionsT *AsSoftmaxOptions() { + return type == BuiltinOptions_SoftmaxOptions ? + reinterpret_cast(value) : nullptr; + } + const SoftmaxOptionsT *AsSoftmaxOptions() const { + return type == BuiltinOptions_SoftmaxOptions ? + reinterpret_cast(value) : nullptr; + } + ConcatenationOptionsT *AsConcatenationOptions() { + return type == BuiltinOptions_ConcatenationOptions ? + reinterpret_cast(value) : nullptr; + } + const ConcatenationOptionsT *AsConcatenationOptions() const { + return type == BuiltinOptions_ConcatenationOptions ? + reinterpret_cast(value) : nullptr; + } + AddOptionsT *AsAddOptions() { + return type == BuiltinOptions_AddOptions ? + reinterpret_cast(value) : nullptr; + } + const AddOptionsT *AsAddOptions() const { + return type == BuiltinOptions_AddOptions ? + reinterpret_cast(value) : nullptr; + } + L2NormOptionsT *AsL2NormOptions() { + return type == BuiltinOptions_L2NormOptions ? + reinterpret_cast(value) : nullptr; + } + const L2NormOptionsT *AsL2NormOptions() const { + return type == BuiltinOptions_L2NormOptions ? + reinterpret_cast(value) : nullptr; + } + LocalResponseNormalizationOptionsT *AsLocalResponseNormalizationOptions() { + return type == BuiltinOptions_LocalResponseNormalizationOptions ? + reinterpret_cast(value) : nullptr; + } + const LocalResponseNormalizationOptionsT *AsLocalResponseNormalizationOptions() const { + return type == BuiltinOptions_LocalResponseNormalizationOptions ? + reinterpret_cast(value) : nullptr; + } + LSTMOptionsT *AsLSTMOptions() { + return type == BuiltinOptions_LSTMOptions ? + reinterpret_cast(value) : nullptr; + } + const LSTMOptionsT *AsLSTMOptions() const { + return type == BuiltinOptions_LSTMOptions ? + reinterpret_cast(value) : nullptr; + } + ResizeBilinearOptionsT *AsResizeBilinearOptions() { + return type == BuiltinOptions_ResizeBilinearOptions ? + reinterpret_cast(value) : nullptr; + } + const ResizeBilinearOptionsT *AsResizeBilinearOptions() const { + return type == BuiltinOptions_ResizeBilinearOptions ? + reinterpret_cast(value) : nullptr; + } + CallOptionsT *AsCallOptions() { + return type == BuiltinOptions_CallOptions ? + reinterpret_cast(value) : nullptr; + } + const CallOptionsT *AsCallOptions() const { + return type == BuiltinOptions_CallOptions ? + reinterpret_cast(value) : nullptr; + } + ReshapeOptionsT *AsReshapeOptions() { + return type == BuiltinOptions_ReshapeOptions ? + reinterpret_cast(value) : nullptr; + } + const ReshapeOptionsT *AsReshapeOptions() const { + return type == BuiltinOptions_ReshapeOptions ? + reinterpret_cast(value) : nullptr; + } + SkipGramOptionsT *AsSkipGramOptions() { + return type == BuiltinOptions_SkipGramOptions ? + reinterpret_cast(value) : nullptr; + } + const SkipGramOptionsT *AsSkipGramOptions() const { + return type == BuiltinOptions_SkipGramOptions ? + reinterpret_cast(value) : nullptr; + } + SpaceToDepthOptionsT *AsSpaceToDepthOptions() { + return type == BuiltinOptions_SpaceToDepthOptions ? + reinterpret_cast(value) : nullptr; + } + const SpaceToDepthOptionsT *AsSpaceToDepthOptions() const { + return type == BuiltinOptions_SpaceToDepthOptions ? + reinterpret_cast(value) : nullptr; + } + EmbeddingLookupSparseOptionsT *AsEmbeddingLookupSparseOptions() { + return type == BuiltinOptions_EmbeddingLookupSparseOptions ? + reinterpret_cast(value) : nullptr; + } + const EmbeddingLookupSparseOptionsT *AsEmbeddingLookupSparseOptions() const { + return type == BuiltinOptions_EmbeddingLookupSparseOptions ? + reinterpret_cast(value) : nullptr; + } + MulOptionsT *AsMulOptions() { + return type == BuiltinOptions_MulOptions ? + reinterpret_cast(value) : nullptr; + } + const MulOptionsT *AsMulOptions() const { + return type == BuiltinOptions_MulOptions ? + reinterpret_cast(value) : nullptr; + } +}; + +bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); +bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types); + +enum Padding { + Padding_SAME = 0, + Padding_VALID = 1, + Padding_MIN = Padding_SAME, + Padding_MAX = Padding_VALID +}; + +inline Padding (&EnumValuesPadding())[2] { + static Padding values[] = { + Padding_SAME, + Padding_VALID + }; + return values; +} + +inline const char **EnumNamesPadding() { + static const char *names[] = { + "SAME", + "VALID", + nullptr + }; + return names; +} + +inline const char *EnumNamePadding(Padding e) { + const size_t index = static_cast(e); + return EnumNamesPadding()[index]; +} + +enum ActivationFunctionType { + ActivationFunctionType_NONE = 0, + ActivationFunctionType_RELU = 1, + ActivationFunctionType_RELU1 = 2, + ActivationFunctionType_RELU6 = 3, + ActivationFunctionType_TANH = 4, + ActivationFunctionType_SIGN_BIT = 5, + ActivationFunctionType_MIN = ActivationFunctionType_NONE, + ActivationFunctionType_MAX = ActivationFunctionType_SIGN_BIT +}; + +inline ActivationFunctionType (&EnumValuesActivationFunctionType())[6] { + static ActivationFunctionType values[] = { + ActivationFunctionType_NONE, + ActivationFunctionType_RELU, + ActivationFunctionType_RELU1, + ActivationFunctionType_RELU6, + ActivationFunctionType_TANH, + ActivationFunctionType_SIGN_BIT + }; + return values; +} + +inline const char **EnumNamesActivationFunctionType() { + static const char *names[] = { + "NONE", + "RELU", + "RELU1", + "RELU6", + "TANH", + "SIGN_BIT", + nullptr + }; + return names; +} + +inline const char *EnumNameActivationFunctionType(ActivationFunctionType e) { + const size_t index = static_cast(e); + return EnumNamesActivationFunctionType()[index]; +} + +enum LSHProjectionType { + LSHProjectionType_UNKNOWN = 0, + LSHProjectionType_SPARSE = 1, + LSHProjectionType_DENSE = 2, + LSHProjectionType_MIN = LSHProjectionType_UNKNOWN, + LSHProjectionType_MAX = LSHProjectionType_DENSE +}; + +inline LSHProjectionType (&EnumValuesLSHProjectionType())[3] { + static LSHProjectionType values[] = { + LSHProjectionType_UNKNOWN, + LSHProjectionType_SPARSE, + LSHProjectionType_DENSE + }; + return values; +} + +inline const char **EnumNamesLSHProjectionType() { + static const char *names[] = { + "UNKNOWN", + "SPARSE", + "DENSE", + nullptr + }; + return names; +} + +inline const char *EnumNameLSHProjectionType(LSHProjectionType e) { + const size_t index = static_cast(e); + return EnumNamesLSHProjectionType()[index]; +} + +enum CombinerType { + CombinerType_SUM = 0, + CombinerType_MEAN = 1, + CombinerType_SQRTN = 2, + CombinerType_MIN = CombinerType_SUM, + CombinerType_MAX = CombinerType_SQRTN +}; + +inline CombinerType (&EnumValuesCombinerType())[3] { + static CombinerType values[] = { + CombinerType_SUM, + CombinerType_MEAN, + CombinerType_SQRTN + }; + return values; +} + +inline const char **EnumNamesCombinerType() { + static const char *names[] = { + "SUM", + "MEAN", + "SQRTN", + nullptr + }; + return names; +} + +inline const char *EnumNameCombinerType(CombinerType e) { + const size_t index = static_cast(e); + return EnumNamesCombinerType()[index]; +} + +enum CustomOptionsFormat { + CustomOptionsFormat_FLEXBUFFERS = 0, + CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS, + CustomOptionsFormat_MAX = CustomOptionsFormat_FLEXBUFFERS +}; + +inline CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1] { + static CustomOptionsFormat values[] = { + CustomOptionsFormat_FLEXBUFFERS + }; + return values; +} + +inline const char **EnumNamesCustomOptionsFormat() { + static const char *names[] = { + "FLEXBUFFERS", + nullptr + }; + return names; +} + +inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e) { + const size_t index = static_cast(e); + return EnumNamesCustomOptionsFormat()[index]; +} + +struct QuantizationParametersT : public flatbuffers::NativeTable { + typedef QuantizationParameters TableType; + std::vector min; + std::vector max; + std::vector scale; + std::vector zero_point; + QuantizationParametersT() { + } +}; + +struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef QuantizationParametersT NativeTableType; + enum { + VT_MIN = 4, + VT_MAX = 6, + VT_SCALE = 8, + VT_ZERO_POINT = 10 + }; + const flatbuffers::Vector *min() const { + return GetPointer *>(VT_MIN); + } + const flatbuffers::Vector *max() const { + return GetPointer *>(VT_MAX); + } + const flatbuffers::Vector *scale() const { + return GetPointer *>(VT_SCALE); + } + const flatbuffers::Vector *zero_point() const { + return GetPointer *>(VT_ZERO_POINT); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_MIN) && + verifier.Verify(min()) && + VerifyOffset(verifier, VT_MAX) && + verifier.Verify(max()) && + VerifyOffset(verifier, VT_SCALE) && + verifier.Verify(scale()) && + VerifyOffset(verifier, VT_ZERO_POINT) && + verifier.Verify(zero_point()) && + verifier.EndTable(); + } + QuantizationParametersT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(QuantizationParametersT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct QuantizationParametersBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_min(flatbuffers::Offset> min) { + fbb_.AddOffset(QuantizationParameters::VT_MIN, min); + } + void add_max(flatbuffers::Offset> max) { + fbb_.AddOffset(QuantizationParameters::VT_MAX, max); + } + void add_scale(flatbuffers::Offset> scale) { + fbb_.AddOffset(QuantizationParameters::VT_SCALE, scale); + } + void add_zero_point(flatbuffers::Offset> zero_point) { + fbb_.AddOffset(QuantizationParameters::VT_ZERO_POINT, zero_point); + } + explicit QuantizationParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + QuantizationParametersBuilder &operator=(const QuantizationParametersBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateQuantizationParameters( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset> min = 0, + flatbuffers::Offset> max = 0, + flatbuffers::Offset> scale = 0, + flatbuffers::Offset> zero_point = 0) { + QuantizationParametersBuilder builder_(_fbb); + builder_.add_zero_point(zero_point); + builder_.add_scale(scale); + builder_.add_max(max); + builder_.add_min(min); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateQuantizationParametersDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *min = nullptr, + const std::vector *max = nullptr, + const std::vector *scale = nullptr, + const std::vector *zero_point = nullptr) { + return tflite::CreateQuantizationParameters( + _fbb, + min ? _fbb.CreateVector(*min) : 0, + max ? _fbb.CreateVector(*max) : 0, + scale ? _fbb.CreateVector(*scale) : 0, + zero_point ? _fbb.CreateVector(*zero_point) : 0); +} + +flatbuffers::Offset CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct TensorT : public flatbuffers::NativeTable { + typedef Tensor TableType; + std::vector shape; + TensorType type; + uint32_t buffer; + std::string name; + std::unique_ptr quantization; + TensorT() + : type(TensorType_FLOAT32), + buffer(0) { + } +}; + +struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef TensorT NativeTableType; + enum { + VT_SHAPE = 4, + VT_TYPE = 6, + VT_BUFFER = 8, + VT_NAME = 10, + VT_QUANTIZATION = 12 + }; + const flatbuffers::Vector *shape() const { + return GetPointer *>(VT_SHAPE); + } + TensorType type() const { + return static_cast(GetField(VT_TYPE, 0)); + } + uint32_t buffer() const { + return GetField(VT_BUFFER, 0); + } + const flatbuffers::String *name() const { + return GetPointer(VT_NAME); + } + const QuantizationParameters *quantization() const { + return GetPointer(VT_QUANTIZATION); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_SHAPE) && + verifier.Verify(shape()) && + VerifyField(verifier, VT_TYPE) && + VerifyField(verifier, VT_BUFFER) && + VerifyOffset(verifier, VT_NAME) && + verifier.Verify(name()) && + VerifyOffset(verifier, VT_QUANTIZATION) && + verifier.VerifyTable(quantization()) && + verifier.EndTable(); + } + TensorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(TensorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct TensorBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_shape(flatbuffers::Offset> shape) { + fbb_.AddOffset(Tensor::VT_SHAPE, shape); + } + void add_type(TensorType type) { + fbb_.AddElement(Tensor::VT_TYPE, static_cast(type), 0); + } + void add_buffer(uint32_t buffer) { + fbb_.AddElement(Tensor::VT_BUFFER, buffer, 0); + } + void add_name(flatbuffers::Offset name) { + fbb_.AddOffset(Tensor::VT_NAME, name); + } + void add_quantization(flatbuffers::Offset quantization) { + fbb_.AddOffset(Tensor::VT_QUANTIZATION, quantization); + } + explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + TensorBuilder &operator=(const TensorBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateTensor( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset> shape = 0, + TensorType type = TensorType_FLOAT32, + uint32_t buffer = 0, + flatbuffers::Offset name = 0, + flatbuffers::Offset quantization = 0) { + TensorBuilder builder_(_fbb); + builder_.add_quantization(quantization); + builder_.add_name(name); + builder_.add_buffer(buffer); + builder_.add_shape(shape); + builder_.add_type(type); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateTensorDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *shape = nullptr, + TensorType type = TensorType_FLOAT32, + uint32_t buffer = 0, + const char *name = nullptr, + flatbuffers::Offset quantization = 0) { + return tflite::CreateTensor( + _fbb, + shape ? _fbb.CreateVector(*shape) : 0, + type, + buffer, + name ? _fbb.CreateString(name) : 0, + quantization); +} + +flatbuffers::Offset CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct Conv2DOptionsT : public flatbuffers::NativeTable { + typedef Conv2DOptions TableType; + Padding padding; + int32_t stride_w; + int32_t stride_h; + ActivationFunctionType fused_activation_function; + Conv2DOptionsT() + : padding(Padding_SAME), + stride_w(0), + stride_h(0), + fused_activation_function(ActivationFunctionType_NONE) { + } +}; + +struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef Conv2DOptionsT NativeTableType; + enum { + VT_PADDING = 4, + VT_STRIDE_W = 6, + VT_STRIDE_H = 8, + VT_FUSED_ACTIVATION_FUNCTION = 10 + }; + Padding padding() const { + return static_cast(GetField(VT_PADDING, 0)); + } + int32_t stride_w() const { + return GetField(VT_STRIDE_W, 0); + } + int32_t stride_h() const { + return GetField(VT_STRIDE_H, 0); + } + ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_PADDING) && + VerifyField(verifier, VT_STRIDE_W) && + VerifyField(verifier, VT_STRIDE_H) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + verifier.EndTable(); + } + Conv2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(Conv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct Conv2DOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_padding(Padding padding) { + fbb_.AddElement(Conv2DOptions::VT_PADDING, static_cast(padding), 0); + } + void add_stride_w(int32_t stride_w) { + fbb_.AddElement(Conv2DOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) { + fbb_.AddElement(Conv2DOptions::VT_STRIDE_H, stride_h, 0); + } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit Conv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + Conv2DOptionsBuilder &operator=(const Conv2DOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateConv2DOptions( + flatbuffers::FlatBufferBuilder &_fbb, + Padding padding = Padding_SAME, + int32_t stride_w = 0, + int32_t stride_h = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { + Conv2DOptionsBuilder builder_(_fbb); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + +flatbuffers::Offset CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct Pool2DOptionsT : public flatbuffers::NativeTable { + typedef Pool2DOptions TableType; + Padding padding; + int32_t stride_w; + int32_t stride_h; + int32_t filter_width; + int32_t filter_height; + ActivationFunctionType fused_activation_function; + Pool2DOptionsT() + : padding(Padding_SAME), + stride_w(0), + stride_h(0), + filter_width(0), + filter_height(0), + fused_activation_function(ActivationFunctionType_NONE) { + } +}; + +struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef Pool2DOptionsT NativeTableType; + enum { + VT_PADDING = 4, + VT_STRIDE_W = 6, + VT_STRIDE_H = 8, + VT_FILTER_WIDTH = 10, + VT_FILTER_HEIGHT = 12, + VT_FUSED_ACTIVATION_FUNCTION = 14 + }; + Padding padding() const { + return static_cast(GetField(VT_PADDING, 0)); + } + int32_t stride_w() const { + return GetField(VT_STRIDE_W, 0); + } + int32_t stride_h() const { + return GetField(VT_STRIDE_H, 0); + } + int32_t filter_width() const { + return GetField(VT_FILTER_WIDTH, 0); + } + int32_t filter_height() const { + return GetField(VT_FILTER_HEIGHT, 0); + } + ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_PADDING) && + VerifyField(verifier, VT_STRIDE_W) && + VerifyField(verifier, VT_STRIDE_H) && + VerifyField(verifier, VT_FILTER_WIDTH) && + VerifyField(verifier, VT_FILTER_HEIGHT) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + verifier.EndTable(); + } + Pool2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(Pool2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct Pool2DOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_padding(Padding padding) { + fbb_.AddElement(Pool2DOptions::VT_PADDING, static_cast(padding), 0); + } + void add_stride_w(int32_t stride_w) { + fbb_.AddElement(Pool2DOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) { + fbb_.AddElement(Pool2DOptions::VT_STRIDE_H, stride_h, 0); + } + void add_filter_width(int32_t filter_width) { + fbb_.AddElement(Pool2DOptions::VT_FILTER_WIDTH, filter_width, 0); + } + void add_filter_height(int32_t filter_height) { + fbb_.AddElement(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0); + } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit Pool2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + Pool2DOptionsBuilder &operator=(const Pool2DOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreatePool2DOptions( + flatbuffers::FlatBufferBuilder &_fbb, + Padding padding = Padding_SAME, + int32_t stride_w = 0, + int32_t stride_h = 0, + int32_t filter_width = 0, + int32_t filter_height = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { + Pool2DOptionsBuilder builder_(_fbb); + builder_.add_filter_height(filter_height); + builder_.add_filter_width(filter_width); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + +flatbuffers::Offset CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct DepthwiseConv2DOptionsT : public flatbuffers::NativeTable { + typedef DepthwiseConv2DOptions TableType; + Padding padding; + int32_t stride_w; + int32_t stride_h; + int32_t depth_multiplier; + ActivationFunctionType fused_activation_function; + DepthwiseConv2DOptionsT() + : padding(Padding_SAME), + stride_w(0), + stride_h(0), + depth_multiplier(0), + fused_activation_function(ActivationFunctionType_NONE) { + } +}; + +struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef DepthwiseConv2DOptionsT NativeTableType; + enum { + VT_PADDING = 4, + VT_STRIDE_W = 6, + VT_STRIDE_H = 8, + VT_DEPTH_MULTIPLIER = 10, + VT_FUSED_ACTIVATION_FUNCTION = 12 + }; + Padding padding() const { + return static_cast(GetField(VT_PADDING, 0)); + } + int32_t stride_w() const { + return GetField(VT_STRIDE_W, 0); + } + int32_t stride_h() const { + return GetField(VT_STRIDE_H, 0); + } + int32_t depth_multiplier() const { + return GetField(VT_DEPTH_MULTIPLIER, 0); + } + ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_PADDING) && + VerifyField(verifier, VT_STRIDE_W) && + VerifyField(verifier, VT_STRIDE_H) && + VerifyField(verifier, VT_DEPTH_MULTIPLIER) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + verifier.EndTable(); + } + DepthwiseConv2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(DepthwiseConv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct DepthwiseConv2DOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_padding(Padding padding) { + fbb_.AddElement(DepthwiseConv2DOptions::VT_PADDING, static_cast(padding), 0); + } + void add_stride_w(int32_t stride_w) { + fbb_.AddElement(DepthwiseConv2DOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) { + fbb_.AddElement(DepthwiseConv2DOptions::VT_STRIDE_H, stride_h, 0); + } + void add_depth_multiplier(int32_t depth_multiplier) { + fbb_.AddElement(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, depth_multiplier, 0); + } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit DepthwiseConv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + DepthwiseConv2DOptionsBuilder &operator=(const DepthwiseConv2DOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateDepthwiseConv2DOptions( + flatbuffers::FlatBufferBuilder &_fbb, + Padding padding = Padding_SAME, + int32_t stride_w = 0, + int32_t stride_h = 0, + int32_t depth_multiplier = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { + DepthwiseConv2DOptionsBuilder builder_(_fbb); + builder_.add_depth_multiplier(depth_multiplier); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + +flatbuffers::Offset CreateDepthwiseConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ConcatEmbeddingsOptionsT : public flatbuffers::NativeTable { + typedef ConcatEmbeddingsOptions TableType; + int32_t num_channels; + std::vector num_columns_per_channel; + std::vector embedding_dim_per_channel; + ConcatEmbeddingsOptionsT() + : num_channels(0) { + } +}; + +struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ConcatEmbeddingsOptionsT NativeTableType; + enum { + VT_NUM_CHANNELS = 4, + VT_NUM_COLUMNS_PER_CHANNEL = 6, + VT_EMBEDDING_DIM_PER_CHANNEL = 8 + }; + int32_t num_channels() const { + return GetField(VT_NUM_CHANNELS, 0); + } + const flatbuffers::Vector *num_columns_per_channel() const { + return GetPointer *>(VT_NUM_COLUMNS_PER_CHANNEL); + } + const flatbuffers::Vector *embedding_dim_per_channel() const { + return GetPointer *>(VT_EMBEDDING_DIM_PER_CHANNEL); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_NUM_CHANNELS) && + VerifyOffset(verifier, VT_NUM_COLUMNS_PER_CHANNEL) && + verifier.Verify(num_columns_per_channel()) && + VerifyOffset(verifier, VT_EMBEDDING_DIM_PER_CHANNEL) && + verifier.Verify(embedding_dim_per_channel()) && + verifier.EndTable(); + } + ConcatEmbeddingsOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ConcatEmbeddingsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ConcatEmbeddingsOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_num_channels(int32_t num_channels) { + fbb_.AddElement(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0); + } + void add_num_columns_per_channel(flatbuffers::Offset> num_columns_per_channel) { + fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel); + } + void add_embedding_dim_per_channel(flatbuffers::Offset> embedding_dim_per_channel) { + fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL, embedding_dim_per_channel); + } + explicit ConcatEmbeddingsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ConcatEmbeddingsOptionsBuilder &operator=(const ConcatEmbeddingsOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateConcatEmbeddingsOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t num_channels = 0, + flatbuffers::Offset> num_columns_per_channel = 0, + flatbuffers::Offset> embedding_dim_per_channel = 0) { + ConcatEmbeddingsOptionsBuilder builder_(_fbb); + builder_.add_embedding_dim_per_channel(embedding_dim_per_channel); + builder_.add_num_columns_per_channel(num_columns_per_channel); + builder_.add_num_channels(num_channels); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateConcatEmbeddingsOptionsDirect( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t num_channels = 0, + const std::vector *num_columns_per_channel = nullptr, + const std::vector *embedding_dim_per_channel = nullptr) { + return tflite::CreateConcatEmbeddingsOptions( + _fbb, + num_channels, + num_columns_per_channel ? _fbb.CreateVector(*num_columns_per_channel) : 0, + embedding_dim_per_channel ? _fbb.CreateVector(*embedding_dim_per_channel) : 0); +} + +flatbuffers::Offset CreateConcatEmbeddingsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct LSHProjectionOptionsT : public flatbuffers::NativeTable { + typedef LSHProjectionOptions TableType; + LSHProjectionType type; + LSHProjectionOptionsT() + : type(LSHProjectionType_UNKNOWN) { + } +}; + +struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LSHProjectionOptionsT NativeTableType; + enum { + VT_TYPE = 4 + }; + LSHProjectionType type() const { + return static_cast(GetField(VT_TYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_TYPE) && + verifier.EndTable(); + } + LSHProjectionOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(LSHProjectionOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct LSHProjectionOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_type(LSHProjectionType type) { + fbb_.AddElement(LSHProjectionOptions::VT_TYPE, static_cast(type), 0); + } + explicit LSHProjectionOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + LSHProjectionOptionsBuilder &operator=(const LSHProjectionOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateLSHProjectionOptions( + flatbuffers::FlatBufferBuilder &_fbb, + LSHProjectionType type = LSHProjectionType_UNKNOWN) { + LSHProjectionOptionsBuilder builder_(_fbb); + builder_.add_type(type); + return builder_.Finish(); +} + +flatbuffers::Offset CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SVDFOptionsT : public flatbuffers::NativeTable { + typedef SVDFOptions TableType; + int32_t rank; + ActivationFunctionType fused_activation_function; + SVDFOptionsT() + : rank(0), + fused_activation_function(ActivationFunctionType_NONE) { + } +}; + +struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SVDFOptionsT NativeTableType; + enum { + VT_RANK = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6 + }; + int32_t rank() const { + return GetField(VT_RANK, 0); + } + ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_RANK) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + verifier.EndTable(); + } + SVDFOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SVDFOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SVDFOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_rank(int32_t rank) { + fbb_.AddElement(SVDFOptions::VT_RANK, rank, 0); + } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit SVDFOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + SVDFOptionsBuilder &operator=(const SVDFOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSVDFOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t rank = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { + SVDFOptionsBuilder builder_(_fbb); + builder_.add_rank(rank); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct RNNOptionsT : public flatbuffers::NativeTable { + typedef RNNOptions TableType; + ActivationFunctionType fused_activation_function; + RNNOptionsT() + : fused_activation_function(ActivationFunctionType_NONE) { + } +}; + +struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef RNNOptionsT NativeTableType; + enum { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + verifier.EndTable(); + } + RNNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(RNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct RNNOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit RNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + RNNOptionsBuilder &operator=(const RNNOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateRNNOptions( + flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { + RNNOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct FullyConnectedOptionsT : public flatbuffers::NativeTable { + typedef FullyConnectedOptions TableType; + ActivationFunctionType fused_activation_function; + FullyConnectedOptionsT() + : fused_activation_function(ActivationFunctionType_NONE) { + } +}; + +struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef FullyConnectedOptionsT NativeTableType; + enum { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + verifier.EndTable(); + } + FullyConnectedOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(FullyConnectedOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct FullyConnectedOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit FullyConnectedOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + FullyConnectedOptionsBuilder &operator=(const FullyConnectedOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateFullyConnectedOptions( + flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { + FullyConnectedOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateFullyConnectedOptions(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SoftmaxOptionsT : public flatbuffers::NativeTable { + typedef SoftmaxOptions TableType; + float beta; + SoftmaxOptionsT() + : beta(0.0f) { + } +}; + +struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SoftmaxOptionsT NativeTableType; + enum { + VT_BETA = 4 + }; + float beta() const { + return GetField(VT_BETA, 0.0f); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_BETA) && + verifier.EndTable(); + } + SoftmaxOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SoftmaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SoftmaxOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_beta(float beta) { + fbb_.AddElement(SoftmaxOptions::VT_BETA, beta, 0.0f); + } + explicit SoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + SoftmaxOptionsBuilder &operator=(const SoftmaxOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSoftmaxOptions( + flatbuffers::FlatBufferBuilder &_fbb, + float beta = 0.0f) { + SoftmaxOptionsBuilder builder_(_fbb); + builder_.add_beta(beta); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ConcatenationOptionsT : public flatbuffers::NativeTable { + typedef ConcatenationOptions TableType; + int32_t axis; + ActivationFunctionType fused_activation_function; + ConcatenationOptionsT() + : axis(0), + fused_activation_function(ActivationFunctionType_NONE) { + } +}; + +struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ConcatenationOptionsT NativeTableType; + enum { + VT_AXIS = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6 + }; + int32_t axis() const { + return GetField(VT_AXIS, 0); + } + ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_AXIS) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + verifier.EndTable(); + } + ConcatenationOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ConcatenationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ConcatenationOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_axis(int32_t axis) { + fbb_.AddElement(ConcatenationOptions::VT_AXIS, axis, 0); + } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit ConcatenationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ConcatenationOptionsBuilder &operator=(const ConcatenationOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateConcatenationOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t axis = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { + ConcatenationOptionsBuilder builder_(_fbb); + builder_.add_axis(axis); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateConcatenationOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct AddOptionsT : public flatbuffers::NativeTable { + typedef AddOptions TableType; + ActivationFunctionType fused_activation_function; + AddOptionsT() + : fused_activation_function(ActivationFunctionType_NONE) { + } +}; + +struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef AddOptionsT NativeTableType; + enum { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + verifier.EndTable(); + } + AddOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(AddOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct AddOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(AddOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + AddOptionsBuilder &operator=(const AddOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateAddOptions( + flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { + AddOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct MulOptionsT : public flatbuffers::NativeTable { + typedef MulOptions TableType; + ActivationFunctionType fused_activation_function; + MulOptionsT() + : fused_activation_function(ActivationFunctionType_NONE) { + } +}; + +struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef MulOptionsT NativeTableType; + enum { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + verifier.EndTable(); + } + MulOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(MulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct MulOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(MulOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit MulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + MulOptionsBuilder &operator=(const MulOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateMulOptions( + flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { + MulOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct L2NormOptionsT : public flatbuffers::NativeTable { + typedef L2NormOptions TableType; + ActivationFunctionType fused_activation_function; + L2NormOptionsT() + : fused_activation_function(ActivationFunctionType_NONE) { + } +}; + +struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef L2NormOptionsT NativeTableType; + enum { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + verifier.EndTable(); + } + L2NormOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(L2NormOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct L2NormOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit L2NormOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + L2NormOptionsBuilder &operator=(const L2NormOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateL2NormOptions( + flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { + L2NormOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct LocalResponseNormalizationOptionsT : public flatbuffers::NativeTable { + typedef LocalResponseNormalizationOptions TableType; + int32_t radius; + float bias; + float alpha; + float beta; + LocalResponseNormalizationOptionsT() + : radius(0), + bias(0.0f), + alpha(0.0f), + beta(0.0f) { + } +}; + +struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LocalResponseNormalizationOptionsT NativeTableType; + enum { + VT_RADIUS = 4, + VT_BIAS = 6, + VT_ALPHA = 8, + VT_BETA = 10 + }; + int32_t radius() const { + return GetField(VT_RADIUS, 0); + } + float bias() const { + return GetField(VT_BIAS, 0.0f); + } + float alpha() const { + return GetField(VT_ALPHA, 0.0f); + } + float beta() const { + return GetField(VT_BETA, 0.0f); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_RADIUS) && + VerifyField(verifier, VT_BIAS) && + VerifyField(verifier, VT_ALPHA) && + VerifyField(verifier, VT_BETA) && + verifier.EndTable(); + } + LocalResponseNormalizationOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(LocalResponseNormalizationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct LocalResponseNormalizationOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_radius(int32_t radius) { + fbb_.AddElement(LocalResponseNormalizationOptions::VT_RADIUS, radius, 0); + } + void add_bias(float bias) { + fbb_.AddElement(LocalResponseNormalizationOptions::VT_BIAS, bias, 0.0f); + } + void add_alpha(float alpha) { + fbb_.AddElement(LocalResponseNormalizationOptions::VT_ALPHA, alpha, 0.0f); + } + void add_beta(float beta) { + fbb_.AddElement(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f); + } + explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + LocalResponseNormalizationOptionsBuilder &operator=(const LocalResponseNormalizationOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateLocalResponseNormalizationOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t radius = 0, + float bias = 0.0f, + float alpha = 0.0f, + float beta = 0.0f) { + LocalResponseNormalizationOptionsBuilder builder_(_fbb); + builder_.add_beta(beta); + builder_.add_alpha(alpha); + builder_.add_bias(bias); + builder_.add_radius(radius); + return builder_.Finish(); +} + +flatbuffers::Offset CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct LSTMOptionsT : public flatbuffers::NativeTable { + typedef LSTMOptions TableType; + ActivationFunctionType fused_activation_function; + float cell_clip; + float proj_clip; + LSTMOptionsT() + : fused_activation_function(ActivationFunctionType_NONE), + cell_clip(0.0f), + proj_clip(0.0f) { + } +}; + +struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LSTMOptionsT NativeTableType; + enum { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_CELL_CLIP = 6, + VT_PROJ_CLIP = 8 + }; + ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + float cell_clip() const { + return GetField(VT_CELL_CLIP, 0.0f); + } + float proj_clip() const { + return GetField(VT_PROJ_CLIP, 0.0f); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField(verifier, VT_CELL_CLIP) && + VerifyField(verifier, VT_PROJ_CLIP) && + verifier.EndTable(); + } + LSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(LSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct LSTMOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_cell_clip(float cell_clip) { + fbb_.AddElement(LSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f); + } + void add_proj_clip(float proj_clip) { + fbb_.AddElement(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f); + } + explicit LSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + LSTMOptionsBuilder &operator=(const LSTMOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateLSTMOptions( + flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + float cell_clip = 0.0f, + float proj_clip = 0.0f) { + LSTMOptionsBuilder builder_(_fbb); + builder_.add_proj_clip(proj_clip); + builder_.add_cell_clip(cell_clip); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ResizeBilinearOptionsT : public flatbuffers::NativeTable { + typedef ResizeBilinearOptions TableType; + int32_t new_height; + int32_t new_width; + ResizeBilinearOptionsT() + : new_height(0), + new_width(0) { + } +}; + +struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ResizeBilinearOptionsT NativeTableType; + enum { + VT_NEW_HEIGHT = 4, + VT_NEW_WIDTH = 6 + }; + int32_t new_height() const { + return GetField(VT_NEW_HEIGHT, 0); + } + int32_t new_width() const { + return GetField(VT_NEW_WIDTH, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_NEW_HEIGHT) && + VerifyField(verifier, VT_NEW_WIDTH) && + verifier.EndTable(); + } + ResizeBilinearOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ResizeBilinearOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ResizeBilinearOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_new_height(int32_t new_height) { + fbb_.AddElement(ResizeBilinearOptions::VT_NEW_HEIGHT, new_height, 0); + } + void add_new_width(int32_t new_width) { + fbb_.AddElement(ResizeBilinearOptions::VT_NEW_WIDTH, new_width, 0); + } + explicit ResizeBilinearOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ResizeBilinearOptionsBuilder &operator=(const ResizeBilinearOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateResizeBilinearOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t new_height = 0, + int32_t new_width = 0) { + ResizeBilinearOptionsBuilder builder_(_fbb); + builder_.add_new_width(new_width); + builder_.add_new_height(new_height); + return builder_.Finish(); +} + +flatbuffers::Offset CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct CallOptionsT : public flatbuffers::NativeTable { + typedef CallOptions TableType; + uint32_t subgraph; + CallOptionsT() + : subgraph(0) { + } +}; + +struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef CallOptionsT NativeTableType; + enum { + VT_SUBGRAPH = 4 + }; + uint32_t subgraph() const { + return GetField(VT_SUBGRAPH, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_SUBGRAPH) && + verifier.EndTable(); + } + CallOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(CallOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct CallOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_subgraph(uint32_t subgraph) { + fbb_.AddElement(CallOptions::VT_SUBGRAPH, subgraph, 0); + } + explicit CallOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + CallOptionsBuilder &operator=(const CallOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateCallOptions( + flatbuffers::FlatBufferBuilder &_fbb, + uint32_t subgraph = 0) { + CallOptionsBuilder builder_(_fbb); + builder_.add_subgraph(subgraph); + return builder_.Finish(); +} + +flatbuffers::Offset CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ReshapeOptionsT : public flatbuffers::NativeTable { + typedef ReshapeOptions TableType; + std::vector new_shape; + ReshapeOptionsT() { + } +}; + +struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ReshapeOptionsT NativeTableType; + enum { + VT_NEW_SHAPE = 4 + }; + const flatbuffers::Vector *new_shape() const { + return GetPointer *>(VT_NEW_SHAPE); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_NEW_SHAPE) && + verifier.Verify(new_shape()) && + verifier.EndTable(); + } + ReshapeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ReshapeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ReshapeOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_new_shape(flatbuffers::Offset> new_shape) { + fbb_.AddOffset(ReshapeOptions::VT_NEW_SHAPE, new_shape); + } + explicit ReshapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ReshapeOptionsBuilder &operator=(const ReshapeOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateReshapeOptions( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset> new_shape = 0) { + ReshapeOptionsBuilder builder_(_fbb); + builder_.add_new_shape(new_shape); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateReshapeOptionsDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *new_shape = nullptr) { + return tflite::CreateReshapeOptions( + _fbb, + new_shape ? _fbb.CreateVector(*new_shape) : 0); +} + +flatbuffers::Offset CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SkipGramOptionsT : public flatbuffers::NativeTable { + typedef SkipGramOptions TableType; + int32_t ngram_size; + int32_t max_skip_size; + bool include_all_ngrams; + SkipGramOptionsT() + : ngram_size(0), + max_skip_size(0), + include_all_ngrams(false) { + } +}; + +struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SkipGramOptionsT NativeTableType; + enum { + VT_NGRAM_SIZE = 4, + VT_MAX_SKIP_SIZE = 6, + VT_INCLUDE_ALL_NGRAMS = 8 + }; + int32_t ngram_size() const { + return GetField(VT_NGRAM_SIZE, 0); + } + int32_t max_skip_size() const { + return GetField(VT_MAX_SKIP_SIZE, 0); + } + bool include_all_ngrams() const { + return GetField(VT_INCLUDE_ALL_NGRAMS, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_NGRAM_SIZE) && + VerifyField(verifier, VT_MAX_SKIP_SIZE) && + VerifyField(verifier, VT_INCLUDE_ALL_NGRAMS) && + verifier.EndTable(); + } + SkipGramOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SkipGramOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SkipGramOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_ngram_size(int32_t ngram_size) { + fbb_.AddElement(SkipGramOptions::VT_NGRAM_SIZE, ngram_size, 0); + } + void add_max_skip_size(int32_t max_skip_size) { + fbb_.AddElement(SkipGramOptions::VT_MAX_SKIP_SIZE, max_skip_size, 0); + } + void add_include_all_ngrams(bool include_all_ngrams) { + fbb_.AddElement(SkipGramOptions::VT_INCLUDE_ALL_NGRAMS, static_cast(include_all_ngrams), 0); + } + explicit SkipGramOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + SkipGramOptionsBuilder &operator=(const SkipGramOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSkipGramOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t ngram_size = 0, + int32_t max_skip_size = 0, + bool include_all_ngrams = false) { + SkipGramOptionsBuilder builder_(_fbb); + builder_.add_max_skip_size(max_skip_size); + builder_.add_ngram_size(ngram_size); + builder_.add_include_all_ngrams(include_all_ngrams); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SpaceToDepthOptionsT : public flatbuffers::NativeTable { + typedef SpaceToDepthOptions TableType; + int32_t block_size; + SpaceToDepthOptionsT() + : block_size(0) { + } +}; + +struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SpaceToDepthOptionsT NativeTableType; + enum { + VT_BLOCK_SIZE = 4 + }; + int32_t block_size() const { + return GetField(VT_BLOCK_SIZE, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_BLOCK_SIZE) && + verifier.EndTable(); + } + SpaceToDepthOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SpaceToDepthOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SpaceToDepthOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_block_size(int32_t block_size) { + fbb_.AddElement(SpaceToDepthOptions::VT_BLOCK_SIZE, block_size, 0); + } + explicit SpaceToDepthOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + SpaceToDepthOptionsBuilder &operator=(const SpaceToDepthOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSpaceToDepthOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t block_size = 0) { + SpaceToDepthOptionsBuilder builder_(_fbb); + builder_.add_block_size(block_size); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct EmbeddingLookupSparseOptionsT : public flatbuffers::NativeTable { + typedef EmbeddingLookupSparseOptions TableType; + CombinerType combiner; + EmbeddingLookupSparseOptionsT() + : combiner(CombinerType_SUM) { + } +}; + +struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef EmbeddingLookupSparseOptionsT NativeTableType; + enum { + VT_COMBINER = 4 + }; + CombinerType combiner() const { + return static_cast(GetField(VT_COMBINER, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_COMBINER) && + verifier.EndTable(); + } + EmbeddingLookupSparseOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(EmbeddingLookupSparseOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct EmbeddingLookupSparseOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_combiner(CombinerType combiner) { + fbb_.AddElement(EmbeddingLookupSparseOptions::VT_COMBINER, static_cast(combiner), 0); + } + explicit EmbeddingLookupSparseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + EmbeddingLookupSparseOptionsBuilder &operator=(const EmbeddingLookupSparseOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateEmbeddingLookupSparseOptions( + flatbuffers::FlatBufferBuilder &_fbb, + CombinerType combiner = CombinerType_SUM) { + EmbeddingLookupSparseOptionsBuilder builder_(_fbb); + builder_.add_combiner(combiner); + return builder_.Finish(); +} + +flatbuffers::Offset CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct OperatorCodeT : public flatbuffers::NativeTable { + typedef OperatorCode TableType; + BuiltinOperator builtin_code; + std::string custom_code; + OperatorCodeT() + : builtin_code(BuiltinOperator_ADD) { + } +}; + +struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef OperatorCodeT NativeTableType; + enum { + VT_BUILTIN_CODE = 4, + VT_CUSTOM_CODE = 6 + }; + BuiltinOperator builtin_code() const { + return static_cast(GetField(VT_BUILTIN_CODE, 0)); + } + const flatbuffers::String *custom_code() const { + return GetPointer(VT_CUSTOM_CODE); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_BUILTIN_CODE) && + VerifyOffset(verifier, VT_CUSTOM_CODE) && + verifier.Verify(custom_code()) && + verifier.EndTable(); + } + OperatorCodeT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(OperatorCodeT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct OperatorCodeBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_builtin_code(BuiltinOperator builtin_code) { + fbb_.AddElement(OperatorCode::VT_BUILTIN_CODE, static_cast(builtin_code), 0); + } + void add_custom_code(flatbuffers::Offset custom_code) { + fbb_.AddOffset(OperatorCode::VT_CUSTOM_CODE, custom_code); + } + explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + OperatorCodeBuilder &operator=(const OperatorCodeBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateOperatorCode( + flatbuffers::FlatBufferBuilder &_fbb, + BuiltinOperator builtin_code = BuiltinOperator_ADD, + flatbuffers::Offset custom_code = 0) { + OperatorCodeBuilder builder_(_fbb); + builder_.add_custom_code(custom_code); + builder_.add_builtin_code(builtin_code); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateOperatorCodeDirect( + flatbuffers::FlatBufferBuilder &_fbb, + BuiltinOperator builtin_code = BuiltinOperator_ADD, + const char *custom_code = nullptr) { + return tflite::CreateOperatorCode( + _fbb, + builtin_code, + custom_code ? _fbb.CreateString(custom_code) : 0); +} + +flatbuffers::Offset CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct OperatorT : public flatbuffers::NativeTable { + typedef Operator TableType; + uint32_t opcode_index; + std::vector inputs; + std::vector outputs; + BuiltinOptionsUnion builtin_options; + std::vector custom_options; + CustomOptionsFormat custom_options_format; + OperatorT() + : opcode_index(0), + custom_options_format(CustomOptionsFormat_FLEXBUFFERS) { + } +}; + +struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef OperatorT NativeTableType; + enum { + VT_OPCODE_INDEX = 4, + VT_INPUTS = 6, + VT_OUTPUTS = 8, + VT_BUILTIN_OPTIONS_TYPE = 10, + VT_BUILTIN_OPTIONS = 12, + VT_CUSTOM_OPTIONS = 14, + VT_CUSTOM_OPTIONS_FORMAT = 16 + }; + uint32_t opcode_index() const { + return GetField(VT_OPCODE_INDEX, 0); + } + const flatbuffers::Vector *inputs() const { + return GetPointer *>(VT_INPUTS); + } + const flatbuffers::Vector *outputs() const { + return GetPointer *>(VT_OUTPUTS); + } + BuiltinOptions builtin_options_type() const { + return static_cast(GetField(VT_BUILTIN_OPTIONS_TYPE, 0)); + } + const void *builtin_options() const { + return GetPointer(VT_BUILTIN_OPTIONS); + } + template const T *builtin_options_as() const; + const Conv2DOptions *builtin_options_as_Conv2DOptions() const { + return builtin_options_type() == BuiltinOptions_Conv2DOptions ? static_cast(builtin_options()) : nullptr; + } + const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const { + return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions ? static_cast(builtin_options()) : nullptr; + } + const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const { + return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions ? static_cast(builtin_options()) : nullptr; + } + const LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const { + return builtin_options_type() == BuiltinOptions_LSHProjectionOptions ? static_cast(builtin_options()) : nullptr; + } + const Pool2DOptions *builtin_options_as_Pool2DOptions() const { + return builtin_options_type() == BuiltinOptions_Pool2DOptions ? static_cast(builtin_options()) : nullptr; + } + const SVDFOptions *builtin_options_as_SVDFOptions() const { + return builtin_options_type() == BuiltinOptions_SVDFOptions ? static_cast(builtin_options()) : nullptr; + } + const RNNOptions *builtin_options_as_RNNOptions() const { + return builtin_options_type() == BuiltinOptions_RNNOptions ? static_cast(builtin_options()) : nullptr; + } + const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const { + return builtin_options_type() == BuiltinOptions_FullyConnectedOptions ? static_cast(builtin_options()) : nullptr; + } + const SoftmaxOptions *builtin_options_as_SoftmaxOptions() const { + return builtin_options_type() == BuiltinOptions_SoftmaxOptions ? static_cast(builtin_options()) : nullptr; + } + const ConcatenationOptions *builtin_options_as_ConcatenationOptions() const { + return builtin_options_type() == BuiltinOptions_ConcatenationOptions ? static_cast(builtin_options()) : nullptr; + } + const AddOptions *builtin_options_as_AddOptions() const { + return builtin_options_type() == BuiltinOptions_AddOptions ? static_cast(builtin_options()) : nullptr; + } + const L2NormOptions *builtin_options_as_L2NormOptions() const { + return builtin_options_type() == BuiltinOptions_L2NormOptions ? static_cast(builtin_options()) : nullptr; + } + const LocalResponseNormalizationOptions *builtin_options_as_LocalResponseNormalizationOptions() const { + return builtin_options_type() == BuiltinOptions_LocalResponseNormalizationOptions ? static_cast(builtin_options()) : nullptr; + } + const LSTMOptions *builtin_options_as_LSTMOptions() const { + return builtin_options_type() == BuiltinOptions_LSTMOptions ? static_cast(builtin_options()) : nullptr; + } + const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const { + return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions ? static_cast(builtin_options()) : nullptr; + } + const CallOptions *builtin_options_as_CallOptions() const { + return builtin_options_type() == BuiltinOptions_CallOptions ? static_cast(builtin_options()) : nullptr; + } + const ReshapeOptions *builtin_options_as_ReshapeOptions() const { + return builtin_options_type() == BuiltinOptions_ReshapeOptions ? static_cast(builtin_options()) : nullptr; + } + const SkipGramOptions *builtin_options_as_SkipGramOptions() const { + return builtin_options_type() == BuiltinOptions_SkipGramOptions ? static_cast(builtin_options()) : nullptr; + } + const SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const { + return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions ? static_cast(builtin_options()) : nullptr; + } + const EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const { + return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions ? static_cast(builtin_options()) : nullptr; + } + const MulOptions *builtin_options_as_MulOptions() const { + return builtin_options_type() == BuiltinOptions_MulOptions ? static_cast(builtin_options()) : nullptr; + } + const flatbuffers::Vector *custom_options() const { + return GetPointer *>(VT_CUSTOM_OPTIONS); + } + CustomOptionsFormat custom_options_format() const { + return static_cast(GetField(VT_CUSTOM_OPTIONS_FORMAT, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_OPCODE_INDEX) && + VerifyOffset(verifier, VT_INPUTS) && + verifier.Verify(inputs()) && + VerifyOffset(verifier, VT_OUTPUTS) && + verifier.Verify(outputs()) && + VerifyField(verifier, VT_BUILTIN_OPTIONS_TYPE) && + VerifyOffset(verifier, VT_BUILTIN_OPTIONS) && + VerifyBuiltinOptions(verifier, builtin_options(), builtin_options_type()) && + VerifyOffset(verifier, VT_CUSTOM_OPTIONS) && + verifier.Verify(custom_options()) && + VerifyField(verifier, VT_CUSTOM_OPTIONS_FORMAT) && + verifier.EndTable(); + } + OperatorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(OperatorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +template<> inline const Conv2DOptions *Operator::builtin_options_as() const { + return builtin_options_as_Conv2DOptions(); +} + +template<> inline const DepthwiseConv2DOptions *Operator::builtin_options_as() const { + return builtin_options_as_DepthwiseConv2DOptions(); +} + +template<> inline const ConcatEmbeddingsOptions *Operator::builtin_options_as() const { + return builtin_options_as_ConcatEmbeddingsOptions(); +} + +template<> inline const LSHProjectionOptions *Operator::builtin_options_as() const { + return builtin_options_as_LSHProjectionOptions(); +} + +template<> inline const Pool2DOptions *Operator::builtin_options_as() const { + return builtin_options_as_Pool2DOptions(); +} + +template<> inline const SVDFOptions *Operator::builtin_options_as() const { + return builtin_options_as_SVDFOptions(); +} + +template<> inline const RNNOptions *Operator::builtin_options_as() const { + return builtin_options_as_RNNOptions(); +} + +template<> inline const FullyConnectedOptions *Operator::builtin_options_as() const { + return builtin_options_as_FullyConnectedOptions(); +} + +template<> inline const SoftmaxOptions *Operator::builtin_options_as() const { + return builtin_options_as_SoftmaxOptions(); +} + +template<> inline const ConcatenationOptions *Operator::builtin_options_as() const { + return builtin_options_as_ConcatenationOptions(); +} + +template<> inline const AddOptions *Operator::builtin_options_as() const { + return builtin_options_as_AddOptions(); +} + +template<> inline const L2NormOptions *Operator::builtin_options_as() const { + return builtin_options_as_L2NormOptions(); +} + +template<> inline const LocalResponseNormalizationOptions *Operator::builtin_options_as() const { + return builtin_options_as_LocalResponseNormalizationOptions(); +} + +template<> inline const LSTMOptions *Operator::builtin_options_as() const { + return builtin_options_as_LSTMOptions(); +} + +template<> inline const ResizeBilinearOptions *Operator::builtin_options_as() const { + return builtin_options_as_ResizeBilinearOptions(); +} + +template<> inline const CallOptions *Operator::builtin_options_as() const { + return builtin_options_as_CallOptions(); +} + +template<> inline const ReshapeOptions *Operator::builtin_options_as() const { + return builtin_options_as_ReshapeOptions(); +} + +template<> inline const SkipGramOptions *Operator::builtin_options_as() const { + return builtin_options_as_SkipGramOptions(); +} + +template<> inline const SpaceToDepthOptions *Operator::builtin_options_as() const { + return builtin_options_as_SpaceToDepthOptions(); +} + +template<> inline const EmbeddingLookupSparseOptions *Operator::builtin_options_as() const { + return builtin_options_as_EmbeddingLookupSparseOptions(); +} + +template<> inline const MulOptions *Operator::builtin_options_as() const { + return builtin_options_as_MulOptions(); +} + +struct OperatorBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_opcode_index(uint32_t opcode_index) { + fbb_.AddElement(Operator::VT_OPCODE_INDEX, opcode_index, 0); + } + void add_inputs(flatbuffers::Offset> inputs) { + fbb_.AddOffset(Operator::VT_INPUTS, inputs); + } + void add_outputs(flatbuffers::Offset> outputs) { + fbb_.AddOffset(Operator::VT_OUTPUTS, outputs); + } + void add_builtin_options_type(BuiltinOptions builtin_options_type) { + fbb_.AddElement(Operator::VT_BUILTIN_OPTIONS_TYPE, static_cast(builtin_options_type), 0); + } + void add_builtin_options(flatbuffers::Offset builtin_options) { + fbb_.AddOffset(Operator::VT_BUILTIN_OPTIONS, builtin_options); + } + void add_custom_options(flatbuffers::Offset> custom_options) { + fbb_.AddOffset(Operator::VT_CUSTOM_OPTIONS, custom_options); + } + void add_custom_options_format(CustomOptionsFormat custom_options_format) { + fbb_.AddElement(Operator::VT_CUSTOM_OPTIONS_FORMAT, static_cast(custom_options_format), 0); + } + explicit OperatorBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + OperatorBuilder &operator=(const OperatorBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateOperator( + flatbuffers::FlatBufferBuilder &_fbb, + uint32_t opcode_index = 0, + flatbuffers::Offset> inputs = 0, + flatbuffers::Offset> outputs = 0, + BuiltinOptions builtin_options_type = BuiltinOptions_NONE, + flatbuffers::Offset builtin_options = 0, + flatbuffers::Offset> custom_options = 0, + CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS) { + OperatorBuilder builder_(_fbb); + builder_.add_custom_options(custom_options); + builder_.add_builtin_options(builtin_options); + builder_.add_outputs(outputs); + builder_.add_inputs(inputs); + builder_.add_opcode_index(opcode_index); + builder_.add_custom_options_format(custom_options_format); + builder_.add_builtin_options_type(builtin_options_type); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateOperatorDirect( + flatbuffers::FlatBufferBuilder &_fbb, + uint32_t opcode_index = 0, + const std::vector *inputs = nullptr, + const std::vector *outputs = nullptr, + BuiltinOptions builtin_options_type = BuiltinOptions_NONE, + flatbuffers::Offset builtin_options = 0, + const std::vector *custom_options = nullptr, + CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS) { + return tflite::CreateOperator( + _fbb, + opcode_index, + inputs ? _fbb.CreateVector(*inputs) : 0, + outputs ? _fbb.CreateVector(*outputs) : 0, + builtin_options_type, + builtin_options, + custom_options ? _fbb.CreateVector(*custom_options) : 0, + custom_options_format); +} + +flatbuffers::Offset CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SubGraphT : public flatbuffers::NativeTable { + typedef SubGraph TableType; + std::vector> tensors; + std::vector inputs; + std::vector outputs; + std::vector> operators; + std::string name; + SubGraphT() { + } +}; + +struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SubGraphT NativeTableType; + enum { + VT_TENSORS = 4, + VT_INPUTS = 6, + VT_OUTPUTS = 8, + VT_OPERATORS = 10, + VT_NAME = 12 + }; + const flatbuffers::Vector> *tensors() const { + return GetPointer> *>(VT_TENSORS); + } + const flatbuffers::Vector *inputs() const { + return GetPointer *>(VT_INPUTS); + } + const flatbuffers::Vector *outputs() const { + return GetPointer *>(VT_OUTPUTS); + } + const flatbuffers::Vector> *operators() const { + return GetPointer> *>(VT_OPERATORS); + } + const flatbuffers::String *name() const { + return GetPointer(VT_NAME); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_TENSORS) && + verifier.Verify(tensors()) && + verifier.VerifyVectorOfTables(tensors()) && + VerifyOffset(verifier, VT_INPUTS) && + verifier.Verify(inputs()) && + VerifyOffset(verifier, VT_OUTPUTS) && + verifier.Verify(outputs()) && + VerifyOffset(verifier, VT_OPERATORS) && + verifier.Verify(operators()) && + verifier.VerifyVectorOfTables(operators()) && + VerifyOffset(verifier, VT_NAME) && + verifier.Verify(name()) && + verifier.EndTable(); + } + SubGraphT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SubGraphT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SubGraphBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_tensors(flatbuffers::Offset>> tensors) { + fbb_.AddOffset(SubGraph::VT_TENSORS, tensors); + } + void add_inputs(flatbuffers::Offset> inputs) { + fbb_.AddOffset(SubGraph::VT_INPUTS, inputs); + } + void add_outputs(flatbuffers::Offset> outputs) { + fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs); + } + void add_operators(flatbuffers::Offset>> operators) { + fbb_.AddOffset(SubGraph::VT_OPERATORS, operators); + } + void add_name(flatbuffers::Offset name) { + fbb_.AddOffset(SubGraph::VT_NAME, name); + } + explicit SubGraphBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + SubGraphBuilder &operator=(const SubGraphBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSubGraph( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset>> tensors = 0, + flatbuffers::Offset> inputs = 0, + flatbuffers::Offset> outputs = 0, + flatbuffers::Offset>> operators = 0, + flatbuffers::Offset name = 0) { + SubGraphBuilder builder_(_fbb); + builder_.add_name(name); + builder_.add_operators(operators); + builder_.add_outputs(outputs); + builder_.add_inputs(inputs); + builder_.add_tensors(tensors); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateSubGraphDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector> *tensors = nullptr, + const std::vector *inputs = nullptr, + const std::vector *outputs = nullptr, + const std::vector> *operators = nullptr, + const char *name = nullptr) { + return tflite::CreateSubGraph( + _fbb, + tensors ? _fbb.CreateVector>(*tensors) : 0, + inputs ? _fbb.CreateVector(*inputs) : 0, + outputs ? _fbb.CreateVector(*outputs) : 0, + operators ? _fbb.CreateVector>(*operators) : 0, + name ? _fbb.CreateString(name) : 0); +} + +flatbuffers::Offset CreateSubGraph(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct BufferT : public flatbuffers::NativeTable { + typedef Buffer TableType; + std::vector data; + BufferT() { + } +}; + +struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef BufferT NativeTableType; + enum { + VT_DATA = 4 + }; + const flatbuffers::Vector *data() const { + return GetPointer *>(VT_DATA); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_DATA) && + verifier.Verify(data()) && + verifier.EndTable(); + } + BufferT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(BufferT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const BufferT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct BufferBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_data(flatbuffers::Offset> data) { + fbb_.AddOffset(Buffer::VT_DATA, data); + } + explicit BufferBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + BufferBuilder &operator=(const BufferBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateBuffer( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset> data = 0) { + BufferBuilder builder_(_fbb); + builder_.add_data(data); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateBufferDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *data = nullptr) { + return tflite::CreateBuffer( + _fbb, + data ? _fbb.CreateVector(*data) : 0); +} + +flatbuffers::Offset CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ModelT : public flatbuffers::NativeTable { + typedef Model TableType; + uint32_t version; + std::vector> operator_codes; + std::vector> subgraphs; + std::string description; + std::vector> buffers; + ModelT() + : version(0) { + } +}; + +struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ModelT NativeTableType; + enum { + VT_VERSION = 4, + VT_OPERATOR_CODES = 6, + VT_SUBGRAPHS = 8, + VT_DESCRIPTION = 10, + VT_BUFFERS = 12 + }; + uint32_t version() const { + return GetField(VT_VERSION, 0); + } + const flatbuffers::Vector> *operator_codes() const { + return GetPointer> *>(VT_OPERATOR_CODES); + } + const flatbuffers::Vector> *subgraphs() const { + return GetPointer> *>(VT_SUBGRAPHS); + } + const flatbuffers::String *description() const { + return GetPointer(VT_DESCRIPTION); + } + const flatbuffers::Vector> *buffers() const { + return GetPointer> *>(VT_BUFFERS); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_VERSION) && + VerifyOffset(verifier, VT_OPERATOR_CODES) && + verifier.Verify(operator_codes()) && + verifier.VerifyVectorOfTables(operator_codes()) && + VerifyOffset(verifier, VT_SUBGRAPHS) && + verifier.Verify(subgraphs()) && + verifier.VerifyVectorOfTables(subgraphs()) && + VerifyOffset(verifier, VT_DESCRIPTION) && + verifier.Verify(description()) && + VerifyOffset(verifier, VT_BUFFERS) && + verifier.Verify(buffers()) && + verifier.VerifyVectorOfTables(buffers()) && + verifier.EndTable(); + } + ModelT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ModelT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ModelT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ModelBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_version(uint32_t version) { + fbb_.AddElement(Model::VT_VERSION, version, 0); + } + void add_operator_codes(flatbuffers::Offset>> operator_codes) { + fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes); + } + void add_subgraphs(flatbuffers::Offset>> subgraphs) { + fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs); + } + void add_description(flatbuffers::Offset description) { + fbb_.AddOffset(Model::VT_DESCRIPTION, description); + } + void add_buffers(flatbuffers::Offset>> buffers) { + fbb_.AddOffset(Model::VT_BUFFERS, buffers); + } + explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ModelBuilder &operator=(const ModelBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateModel( + flatbuffers::FlatBufferBuilder &_fbb, + uint32_t version = 0, + flatbuffers::Offset>> operator_codes = 0, + flatbuffers::Offset>> subgraphs = 0, + flatbuffers::Offset description = 0, + flatbuffers::Offset>> buffers = 0) { + ModelBuilder builder_(_fbb); + builder_.add_buffers(buffers); + builder_.add_description(description); + builder_.add_subgraphs(subgraphs); + builder_.add_operator_codes(operator_codes); + builder_.add_version(version); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateModelDirect( + flatbuffers::FlatBufferBuilder &_fbb, + uint32_t version = 0, + const std::vector> *operator_codes = nullptr, + const std::vector> *subgraphs = nullptr, + const char *description = nullptr, + const std::vector> *buffers = nullptr) { + return tflite::CreateModel( + _fbb, + version, + operator_codes ? _fbb.CreateVector>(*operator_codes) : 0, + subgraphs ? _fbb.CreateVector>(*subgraphs) : 0, + description ? _fbb.CreateString(description) : 0, + buffers ? _fbb.CreateVector>(*buffers) : 0); +} + +flatbuffers::Offset CreateModel(flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +inline QuantizationParametersT *QuantizationParameters::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new QuantizationParametersT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void QuantizationParameters::UnPackTo(QuantizationParametersT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = min(); if (_e) { _o->min.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->min[_i] = _e->Get(_i); } } }; + { auto _e = max(); if (_e) { _o->max.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->max[_i] = _e->Get(_i); } } }; + { auto _e = scale(); if (_e) { _o->scale.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->scale[_i] = _e->Get(_i); } } }; + { auto _e = zero_point(); if (_e) { _o->zero_point.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->zero_point[_i] = _e->Get(_i); } } }; +} + +inline flatbuffers::Offset QuantizationParameters::Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateQuantizationParameters(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const QuantizationParametersT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _min = _o->min.size() ? _fbb.CreateVector(_o->min) : 0; + auto _max = _o->max.size() ? _fbb.CreateVector(_o->max) : 0; + auto _scale = _o->scale.size() ? _fbb.CreateVector(_o->scale) : 0; + auto _zero_point = _o->zero_point.size() ? _fbb.CreateVector(_o->zero_point) : 0; + return tflite::CreateQuantizationParameters( + _fbb, + _min, + _max, + _scale, + _zero_point); +} + +inline TensorT *Tensor::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new TensorT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void Tensor::UnPackTo(TensorT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = shape(); if (_e) { _o->shape.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->shape[_i] = _e->Get(_i); } } }; + { auto _e = type(); _o->type = _e; }; + { auto _e = buffer(); _o->buffer = _e; }; + { auto _e = name(); if (_e) _o->name = _e->str(); }; + { auto _e = quantization(); if (_e) _o->quantization = std::unique_ptr(_e->UnPack(_resolver)); }; +} + +inline flatbuffers::Offset Tensor::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateTensor(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TensorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _shape = _o->shape.size() ? _fbb.CreateVector(_o->shape) : 0; + auto _type = _o->type; + auto _buffer = _o->buffer; + auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name); + auto _quantization = _o->quantization ? CreateQuantizationParameters(_fbb, _o->quantization.get(), _rehasher) : 0; + return tflite::CreateTensor( + _fbb, + _shape, + _type, + _buffer, + _name, + _quantization); +} + +inline Conv2DOptionsT *Conv2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new Conv2DOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void Conv2DOptions::UnPackTo(Conv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = padding(); _o->padding = _e; }; + { auto _e = stride_w(); _o->stride_w = _e; }; + { auto _e = stride_h(); _o->stride_h = _e; }; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; +} + +inline flatbuffers::Offset Conv2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateConv2DOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Conv2DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _padding = _o->padding; + auto _stride_w = _o->stride_w; + auto _stride_h = _o->stride_h; + auto _fused_activation_function = _o->fused_activation_function; + return tflite::CreateConv2DOptions( + _fbb, + _padding, + _stride_w, + _stride_h, + _fused_activation_function); +} + +inline Pool2DOptionsT *Pool2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new Pool2DOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void Pool2DOptions::UnPackTo(Pool2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = padding(); _o->padding = _e; }; + { auto _e = stride_w(); _o->stride_w = _e; }; + { auto _e = stride_h(); _o->stride_h = _e; }; + { auto _e = filter_width(); _o->filter_width = _e; }; + { auto _e = filter_height(); _o->filter_height = _e; }; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; +} + +inline flatbuffers::Offset Pool2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreatePool2DOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Pool2DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _padding = _o->padding; + auto _stride_w = _o->stride_w; + auto _stride_h = _o->stride_h; + auto _filter_width = _o->filter_width; + auto _filter_height = _o->filter_height; + auto _fused_activation_function = _o->fused_activation_function; + return tflite::CreatePool2DOptions( + _fbb, + _padding, + _stride_w, + _stride_h, + _filter_width, + _filter_height, + _fused_activation_function); +} + +inline DepthwiseConv2DOptionsT *DepthwiseConv2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new DepthwiseConv2DOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void DepthwiseConv2DOptions::UnPackTo(DepthwiseConv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = padding(); _o->padding = _e; }; + { auto _e = stride_w(); _o->stride_w = _e; }; + { auto _e = stride_h(); _o->stride_h = _e; }; + { auto _e = depth_multiplier(); _o->depth_multiplier = _e; }; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; +} + +inline flatbuffers::Offset DepthwiseConv2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateDepthwiseConv2DOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateDepthwiseConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DepthwiseConv2DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _padding = _o->padding; + auto _stride_w = _o->stride_w; + auto _stride_h = _o->stride_h; + auto _depth_multiplier = _o->depth_multiplier; + auto _fused_activation_function = _o->fused_activation_function; + return tflite::CreateDepthwiseConv2DOptions( + _fbb, + _padding, + _stride_w, + _stride_h, + _depth_multiplier, + _fused_activation_function); +} + +inline ConcatEmbeddingsOptionsT *ConcatEmbeddingsOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new ConcatEmbeddingsOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void ConcatEmbeddingsOptions::UnPackTo(ConcatEmbeddingsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = num_channels(); _o->num_channels = _e; }; + { auto _e = num_columns_per_channel(); if (_e) { _o->num_columns_per_channel.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->num_columns_per_channel[_i] = _e->Get(_i); } } }; + { auto _e = embedding_dim_per_channel(); if (_e) { _o->embedding_dim_per_channel.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->embedding_dim_per_channel[_i] = _e->Get(_i); } } }; +} + +inline flatbuffers::Offset ConcatEmbeddingsOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateConcatEmbeddingsOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateConcatEmbeddingsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ConcatEmbeddingsOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _num_channels = _o->num_channels; + auto _num_columns_per_channel = _o->num_columns_per_channel.size() ? _fbb.CreateVector(_o->num_columns_per_channel) : 0; + auto _embedding_dim_per_channel = _o->embedding_dim_per_channel.size() ? _fbb.CreateVector(_o->embedding_dim_per_channel) : 0; + return tflite::CreateConcatEmbeddingsOptions( + _fbb, + _num_channels, + _num_columns_per_channel, + _embedding_dim_per_channel); +} + +inline LSHProjectionOptionsT *LSHProjectionOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new LSHProjectionOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void LSHProjectionOptions::UnPackTo(LSHProjectionOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = type(); _o->type = _e; }; +} + +inline flatbuffers::Offset LSHProjectionOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateLSHProjectionOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LSHProjectionOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _type = _o->type; + return tflite::CreateLSHProjectionOptions( + _fbb, + _type); +} + +inline SVDFOptionsT *SVDFOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new SVDFOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void SVDFOptions::UnPackTo(SVDFOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = rank(); _o->rank = _e; }; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; +} + +inline flatbuffers::Offset SVDFOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSVDFOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SVDFOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _rank = _o->rank; + auto _fused_activation_function = _o->fused_activation_function; + return tflite::CreateSVDFOptions( + _fbb, + _rank, + _fused_activation_function); +} + +inline RNNOptionsT *RNNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new RNNOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void RNNOptions::UnPackTo(RNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; +} + +inline flatbuffers::Offset RNNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateRNNOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RNNOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _fused_activation_function = _o->fused_activation_function; + return tflite::CreateRNNOptions( + _fbb, + _fused_activation_function); +} + +inline FullyConnectedOptionsT *FullyConnectedOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new FullyConnectedOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void FullyConnectedOptions::UnPackTo(FullyConnectedOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; +} + +inline flatbuffers::Offset FullyConnectedOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateFullyConnectedOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateFullyConnectedOptions(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FullyConnectedOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _fused_activation_function = _o->fused_activation_function; + return tflite::CreateFullyConnectedOptions( + _fbb, + _fused_activation_function); +} + +inline SoftmaxOptionsT *SoftmaxOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new SoftmaxOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void SoftmaxOptions::UnPackTo(SoftmaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = beta(); _o->beta = _e; }; +} + +inline flatbuffers::Offset SoftmaxOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSoftmaxOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SoftmaxOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _beta = _o->beta; + return tflite::CreateSoftmaxOptions( + _fbb, + _beta); +} + +inline ConcatenationOptionsT *ConcatenationOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new ConcatenationOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void ConcatenationOptions::UnPackTo(ConcatenationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = axis(); _o->axis = _e; }; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; +} + +inline flatbuffers::Offset ConcatenationOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateConcatenationOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateConcatenationOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ConcatenationOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _axis = _o->axis; + auto _fused_activation_function = _o->fused_activation_function; + return tflite::CreateConcatenationOptions( + _fbb, + _axis, + _fused_activation_function); +} + +inline AddOptionsT *AddOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new AddOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void AddOptions::UnPackTo(AddOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; +} + +inline flatbuffers::Offset AddOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateAddOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const AddOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _fused_activation_function = _o->fused_activation_function; + return tflite::CreateAddOptions( + _fbb, + _fused_activation_function); +} + +inline MulOptionsT *MulOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new MulOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void MulOptions::UnPackTo(MulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; +} + +inline flatbuffers::Offset MulOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateMulOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MulOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _fused_activation_function = _o->fused_activation_function; + return tflite::CreateMulOptions( + _fbb, + _fused_activation_function); +} + +inline L2NormOptionsT *L2NormOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new L2NormOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void L2NormOptions::UnPackTo(L2NormOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; +} + +inline flatbuffers::Offset L2NormOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateL2NormOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const L2NormOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _fused_activation_function = _o->fused_activation_function; + return tflite::CreateL2NormOptions( + _fbb, + _fused_activation_function); +} + +inline LocalResponseNormalizationOptionsT *LocalResponseNormalizationOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new LocalResponseNormalizationOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void LocalResponseNormalizationOptions::UnPackTo(LocalResponseNormalizationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = radius(); _o->radius = _e; }; + { auto _e = bias(); _o->bias = _e; }; + { auto _e = alpha(); _o->alpha = _e; }; + { auto _e = beta(); _o->beta = _e; }; +} + +inline flatbuffers::Offset LocalResponseNormalizationOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateLocalResponseNormalizationOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LocalResponseNormalizationOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _radius = _o->radius; + auto _bias = _o->bias; + auto _alpha = _o->alpha; + auto _beta = _o->beta; + return tflite::CreateLocalResponseNormalizationOptions( + _fbb, + _radius, + _bias, + _alpha, + _beta); +} + +inline LSTMOptionsT *LSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new LSTMOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void LSTMOptions::UnPackTo(LSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; + { auto _e = cell_clip(); _o->cell_clip = _e; }; + { auto _e = proj_clip(); _o->proj_clip = _e; }; +} + +inline flatbuffers::Offset LSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateLSTMOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LSTMOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _fused_activation_function = _o->fused_activation_function; + auto _cell_clip = _o->cell_clip; + auto _proj_clip = _o->proj_clip; + return tflite::CreateLSTMOptions( + _fbb, + _fused_activation_function, + _cell_clip, + _proj_clip); +} + +inline ResizeBilinearOptionsT *ResizeBilinearOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new ResizeBilinearOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void ResizeBilinearOptions::UnPackTo(ResizeBilinearOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = new_height(); _o->new_height = _e; }; + { auto _e = new_width(); _o->new_width = _e; }; +} + +inline flatbuffers::Offset ResizeBilinearOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateResizeBilinearOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ResizeBilinearOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _new_height = _o->new_height; + auto _new_width = _o->new_width; + return tflite::CreateResizeBilinearOptions( + _fbb, + _new_height, + _new_width); +} + +inline CallOptionsT *CallOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new CallOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void CallOptions::UnPackTo(CallOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = subgraph(); _o->subgraph = _e; }; +} + +inline flatbuffers::Offset CallOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateCallOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CallOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _subgraph = _o->subgraph; + return tflite::CreateCallOptions( + _fbb, + _subgraph); +} + +inline ReshapeOptionsT *ReshapeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new ReshapeOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void ReshapeOptions::UnPackTo(ReshapeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = new_shape(); if (_e) { _o->new_shape.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->new_shape[_i] = _e->Get(_i); } } }; +} + +inline flatbuffers::Offset ReshapeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateReshapeOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ReshapeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _new_shape = _o->new_shape.size() ? _fbb.CreateVector(_o->new_shape) : 0; + return tflite::CreateReshapeOptions( + _fbb, + _new_shape); +} + +inline SkipGramOptionsT *SkipGramOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new SkipGramOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void SkipGramOptions::UnPackTo(SkipGramOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = ngram_size(); _o->ngram_size = _e; }; + { auto _e = max_skip_size(); _o->max_skip_size = _e; }; + { auto _e = include_all_ngrams(); _o->include_all_ngrams = _e; }; +} + +inline flatbuffers::Offset SkipGramOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSkipGramOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SkipGramOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _ngram_size = _o->ngram_size; + auto _max_skip_size = _o->max_skip_size; + auto _include_all_ngrams = _o->include_all_ngrams; + return tflite::CreateSkipGramOptions( + _fbb, + _ngram_size, + _max_skip_size, + _include_all_ngrams); +} + +inline SpaceToDepthOptionsT *SpaceToDepthOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new SpaceToDepthOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void SpaceToDepthOptions::UnPackTo(SpaceToDepthOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = block_size(); _o->block_size = _e; }; +} + +inline flatbuffers::Offset SpaceToDepthOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSpaceToDepthOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SpaceToDepthOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _block_size = _o->block_size; + return tflite::CreateSpaceToDepthOptions( + _fbb, + _block_size); +} + +inline EmbeddingLookupSparseOptionsT *EmbeddingLookupSparseOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new EmbeddingLookupSparseOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void EmbeddingLookupSparseOptions::UnPackTo(EmbeddingLookupSparseOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = combiner(); _o->combiner = _e; }; +} + +inline flatbuffers::Offset EmbeddingLookupSparseOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateEmbeddingLookupSparseOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const EmbeddingLookupSparseOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _combiner = _o->combiner; + return tflite::CreateEmbeddingLookupSparseOptions( + _fbb, + _combiner); +} + +inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new OperatorCodeT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void OperatorCode::UnPackTo(OperatorCodeT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = builtin_code(); _o->builtin_code = _e; }; + { auto _e = custom_code(); if (_e) _o->custom_code = _e->str(); }; +} + +inline flatbuffers::Offset OperatorCode::Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateOperatorCode(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const OperatorCodeT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _builtin_code = _o->builtin_code; + auto _custom_code = _o->custom_code.empty() ? 0 : _fbb.CreateString(_o->custom_code); + return tflite::CreateOperatorCode( + _fbb, + _builtin_code, + _custom_code); +} + +inline OperatorT *Operator::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new OperatorT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void Operator::UnPackTo(OperatorT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = opcode_index(); _o->opcode_index = _e; }; + { auto _e = inputs(); if (_e) { _o->inputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->inputs[_i] = _e->Get(_i); } } }; + { auto _e = outputs(); if (_e) { _o->outputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->outputs[_i] = _e->Get(_i); } } }; + { auto _e = builtin_options_type(); _o->builtin_options.type = _e; }; + { auto _e = builtin_options(); if (_e) _o->builtin_options.value = BuiltinOptionsUnion::UnPack(_e, builtin_options_type(), _resolver); }; + { auto _e = custom_options(); if (_e) { _o->custom_options.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->custom_options[_i] = _e->Get(_i); } } }; + { auto _e = custom_options_format(); _o->custom_options_format = _e; }; +} + +inline flatbuffers::Offset Operator::Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateOperator(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const OperatorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _opcode_index = _o->opcode_index; + auto _inputs = _o->inputs.size() ? _fbb.CreateVector(_o->inputs) : 0; + auto _outputs = _o->outputs.size() ? _fbb.CreateVector(_o->outputs) : 0; + auto _builtin_options_type = _o->builtin_options.type; + auto _builtin_options = _o->builtin_options.Pack(_fbb); + auto _custom_options = _o->custom_options.size() ? _fbb.CreateVector(_o->custom_options) : 0; + auto _custom_options_format = _o->custom_options_format; + return tflite::CreateOperator( + _fbb, + _opcode_index, + _inputs, + _outputs, + _builtin_options_type, + _builtin_options, + _custom_options, + _custom_options_format); +} + +inline SubGraphT *SubGraph::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new SubGraphT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void SubGraph::UnPackTo(SubGraphT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = tensors(); if (_e) { _o->tensors.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->tensors[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); } } }; + { auto _e = inputs(); if (_e) { _o->inputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->inputs[_i] = _e->Get(_i); } } }; + { auto _e = outputs(); if (_e) { _o->outputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->outputs[_i] = _e->Get(_i); } } }; + { auto _e = operators(); if (_e) { _o->operators.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->operators[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); } } }; + { auto _e = name(); if (_e) _o->name = _e->str(); }; +} + +inline flatbuffers::Offset SubGraph::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSubGraph(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSubGraph(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SubGraphT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _tensors = _o->tensors.size() ? _fbb.CreateVector> (_o->tensors.size(), [](size_t i, _VectorArgs *__va) { return CreateTensor(*__va->__fbb, __va->__o->tensors[i].get(), __va->__rehasher); }, &_va ) : 0; + auto _inputs = _o->inputs.size() ? _fbb.CreateVector(_o->inputs) : 0; + auto _outputs = _o->outputs.size() ? _fbb.CreateVector(_o->outputs) : 0; + auto _operators = _o->operators.size() ? _fbb.CreateVector> (_o->operators.size(), [](size_t i, _VectorArgs *__va) { return CreateOperator(*__va->__fbb, __va->__o->operators[i].get(), __va->__rehasher); }, &_va ) : 0; + auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name); + return tflite::CreateSubGraph( + _fbb, + _tensors, + _inputs, + _outputs, + _operators, + _name); +} + +inline BufferT *Buffer::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new BufferT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void Buffer::UnPackTo(BufferT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = data(); if (_e) { _o->data.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->data[_i] = _e->Get(_i); } } }; +} + +inline flatbuffers::Offset Buffer::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BufferT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateBuffer(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BufferT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _data = _o->data.size() ? _fbb.CreateVector(_o->data) : 0; + return tflite::CreateBuffer( + _fbb, + _data); +} + +inline ModelT *Model::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new ModelT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void Model::UnPackTo(ModelT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = version(); _o->version = _e; }; + { auto _e = operator_codes(); if (_e) { _o->operator_codes.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->operator_codes[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); } } }; + { auto _e = subgraphs(); if (_e) { _o->subgraphs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->subgraphs[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); } } }; + { auto _e = description(); if (_e) _o->description = _e->str(); }; + { auto _e = buffers(); if (_e) { _o->buffers.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->buffers[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); } } }; +} + +inline flatbuffers::Offset Model::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ModelT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateModel(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateModel(flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ModelT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _version = _o->version; + auto _operator_codes = _o->operator_codes.size() ? _fbb.CreateVector> (_o->operator_codes.size(), [](size_t i, _VectorArgs *__va) { return CreateOperatorCode(*__va->__fbb, __va->__o->operator_codes[i].get(), __va->__rehasher); }, &_va ) : 0; + auto _subgraphs = _o->subgraphs.size() ? _fbb.CreateVector> (_o->subgraphs.size(), [](size_t i, _VectorArgs *__va) { return CreateSubGraph(*__va->__fbb, __va->__o->subgraphs[i].get(), __va->__rehasher); }, &_va ) : 0; + auto _description = _o->description.empty() ? 0 : _fbb.CreateString(_o->description); + auto _buffers = _o->buffers.size() ? _fbb.CreateVector> (_o->buffers.size(), [](size_t i, _VectorArgs *__va) { return CreateBuffer(*__va->__fbb, __va->__o->buffers[i].get(), __va->__rehasher); }, &_va ) : 0; + return tflite::CreateModel( + _fbb, + _version, + _operator_codes, + _subgraphs, + _description, + _buffers); +} + +inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type) { + switch (type) { + case BuiltinOptions_NONE: { + return true; + } + case BuiltinOptions_Conv2DOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_DepthwiseConv2DOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ConcatEmbeddingsOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LSHProjectionOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_Pool2DOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SVDFOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_RNNOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FullyConnectedOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SoftmaxOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ConcatenationOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_AddOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_L2NormOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LocalResponseNormalizationOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LSTMOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ResizeBilinearOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_CallOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ReshapeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SkipGramOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SpaceToDepthOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_EmbeddingLookupSparseOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_MulOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + default: return false; + } +} + +inline bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types) { + if (values->size() != types->size()) return false; + for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { + if (!VerifyBuiltinOptions( + verifier, values->Get(i), types->GetEnum(i))) { + return false; + } + } + return true; +} + +inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, const flatbuffers::resolver_function_t *resolver) { + switch (type) { + case BuiltinOptions_Conv2DOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_DepthwiseConv2DOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ConcatEmbeddingsOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_LSHProjectionOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_Pool2DOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SVDFOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_RNNOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_FullyConnectedOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SoftmaxOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ConcatenationOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_AddOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_L2NormOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_LocalResponseNormalizationOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_LSTMOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ResizeBilinearOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_CallOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ReshapeOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SkipGramOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SpaceToDepthOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_EmbeddingLookupSparseOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_MulOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + default: return nullptr; + } +} + +inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher) const { + switch (type) { + case BuiltinOptions_Conv2DOptions: { + auto ptr = reinterpret_cast(value); + return CreateConv2DOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_DepthwiseConv2DOptions: { + auto ptr = reinterpret_cast(value); + return CreateDepthwiseConv2DOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ConcatEmbeddingsOptions: { + auto ptr = reinterpret_cast(value); + return CreateConcatEmbeddingsOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_LSHProjectionOptions: { + auto ptr = reinterpret_cast(value); + return CreateLSHProjectionOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_Pool2DOptions: { + auto ptr = reinterpret_cast(value); + return CreatePool2DOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SVDFOptions: { + auto ptr = reinterpret_cast(value); + return CreateSVDFOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_RNNOptions: { + auto ptr = reinterpret_cast(value); + return CreateRNNOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_FullyConnectedOptions: { + auto ptr = reinterpret_cast(value); + return CreateFullyConnectedOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SoftmaxOptions: { + auto ptr = reinterpret_cast(value); + return CreateSoftmaxOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ConcatenationOptions: { + auto ptr = reinterpret_cast(value); + return CreateConcatenationOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_AddOptions: { + auto ptr = reinterpret_cast(value); + return CreateAddOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_L2NormOptions: { + auto ptr = reinterpret_cast(value); + return CreateL2NormOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_LocalResponseNormalizationOptions: { + auto ptr = reinterpret_cast(value); + return CreateLocalResponseNormalizationOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_LSTMOptions: { + auto ptr = reinterpret_cast(value); + return CreateLSTMOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ResizeBilinearOptions: { + auto ptr = reinterpret_cast(value); + return CreateResizeBilinearOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_CallOptions: { + auto ptr = reinterpret_cast(value); + return CreateCallOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ReshapeOptions: { + auto ptr = reinterpret_cast(value); + return CreateReshapeOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SkipGramOptions: { + auto ptr = reinterpret_cast(value); + return CreateSkipGramOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SpaceToDepthOptions: { + auto ptr = reinterpret_cast(value); + return CreateSpaceToDepthOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_EmbeddingLookupSparseOptions: { + auto ptr = reinterpret_cast(value); + return CreateEmbeddingLookupSparseOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_MulOptions: { + auto ptr = reinterpret_cast(value); + return CreateMulOptions(_fbb, ptr, _rehasher).Union(); + } + default: return 0; + } +} + +inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FLATBUFFERS_NOEXCEPT : type(u.type), value(nullptr) { + switch (type) { + case BuiltinOptions_Conv2DOptions: { + value = new Conv2DOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_DepthwiseConv2DOptions: { + value = new DepthwiseConv2DOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ConcatEmbeddingsOptions: { + value = new ConcatEmbeddingsOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_LSHProjectionOptions: { + value = new LSHProjectionOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_Pool2DOptions: { + value = new Pool2DOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SVDFOptions: { + value = new SVDFOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_RNNOptions: { + value = new RNNOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_FullyConnectedOptions: { + value = new FullyConnectedOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SoftmaxOptions: { + value = new SoftmaxOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ConcatenationOptions: { + value = new ConcatenationOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_AddOptions: { + value = new AddOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_L2NormOptions: { + value = new L2NormOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_LocalResponseNormalizationOptions: { + value = new LocalResponseNormalizationOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_LSTMOptions: { + value = new LSTMOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ResizeBilinearOptions: { + value = new ResizeBilinearOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_CallOptions: { + value = new CallOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ReshapeOptions: { + value = new ReshapeOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SkipGramOptions: { + value = new SkipGramOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SpaceToDepthOptions: { + value = new SpaceToDepthOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_EmbeddingLookupSparseOptions: { + value = new EmbeddingLookupSparseOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_MulOptions: { + value = new MulOptionsT(*reinterpret_cast(u.value)); + break; + } + default: + break; + } +} + +inline void BuiltinOptionsUnion::Reset() { + switch (type) { + case BuiltinOptions_Conv2DOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_DepthwiseConv2DOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ConcatEmbeddingsOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_LSHProjectionOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_Pool2DOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SVDFOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_RNNOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_FullyConnectedOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SoftmaxOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ConcatenationOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_AddOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_L2NormOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_LocalResponseNormalizationOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_LSTMOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ResizeBilinearOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_CallOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ReshapeOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SkipGramOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SpaceToDepthOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_EmbeddingLookupSparseOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_MulOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + default: break; + } + value = nullptr; + type = BuiltinOptions_NONE; +} + +inline const tflite::Model *GetModel(const void *buf) { + return flatbuffers::GetRoot(buf); +} + +inline const char *ModelIdentifier() { + return "TFL3"; +} + +inline bool ModelBufferHasIdentifier(const void *buf) { + return flatbuffers::BufferHasIdentifier( + buf, ModelIdentifier()); +} + +inline bool VerifyModelBuffer( + flatbuffers::Verifier &verifier) { + return verifier.VerifyBuffer(ModelIdentifier()); +} + +inline const char *ModelExtension() { + return "tflite"; +} + +inline void FinishModelBuffer( + flatbuffers::FlatBufferBuilder &fbb, + flatbuffers::Offset root) { + fbb.Finish(root, ModelIdentifier()); +} + +inline std::unique_ptr UnPackModel( + const void *buf, + const flatbuffers::resolver_function_t *res = nullptr) { + return std::unique_ptr(GetModel(buf)->UnPack(res)); +} + +} // namespace tflite + +#endif // FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_ diff --git a/tensorflow/contrib/lite/tools/benchmark_model.cc b/tensorflow/contrib/lite/tools/benchmark_model.cc new file mode 100644 index 0000000000..f80949b23e --- /dev/null +++ b/tensorflow/contrib/lite/tools/benchmark_model.cc @@ -0,0 +1,91 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/string_util.h" +#include "tensorflow/contrib/lite/tools/mutable_op_resolver.h" + +#ifdef TFLITE_CUSTOM_OPS_HEADER +void RegisterSelectedOps(::tflite::MutableOpResolver* resolver); +#endif + +#define LOG(x) std::cerr +#define CHECK(x) if (!(x)) { LOG(ERROR) << #x << "failed"; exit(1); } + +namespace tensorflow { +namespace benchmark_tflite_model { + +std::unique_ptr model; +std::unique_ptr interpreter; + +void InitImpl(const std::string& graph, const std::vector& sizes, + const std::string& input_layer_type, int num_threads) { + CHECK(graph.c_str()); + + model = tflite::FlatBufferModel::BuildFromFile(graph.c_str()); + if (!model) { + LOG(FATAL) << "Failed to mmap model " << graph; + } + LOG(INFO) << "Loaded model " << graph; + model->error_reporter(); + LOG(INFO) << "resolved reporter"; + +#ifdef TFLITE_CUSTOM_OPS_HEADER + tflite::MutableOpResolver resolver; + RegisterSelectedOps(&resolver); +#else + tflite::ops::builtin::BuiltinOpResolver resolver; +#endif + + tflite::InterpreterBuilder(*model, resolver)(&interpreter); + if (!interpreter) { + LOG(FATAL) << "Failed to construct interpreter"; + } + + if (num_threads != -1) { + interpreter->SetNumThreads(num_threads); + } + + int input = interpreter->inputs()[0]; + + if (input_layer_type != "string") { + interpreter->ResizeInputTensor(input, sizes); + } + + if (interpreter->AllocateTensors() != kTfLiteOk) { + LOG(FATAL) << "Failed to allocate tensors!"; + } +} + +int Main(int argc, char** argv) { + InitImpl("", {}, "", 1); + return 0; +} + +} // namespace benchmark_tflite_model +} // namespace tensorflow + +int main(int argc, char** argv) { + return tensorflow::benchmark_tflite_model::Main(argc, argv); +} diff --git a/tensorflow/contrib/lite/tools/mutable_op_resolver.h b/tensorflow/contrib/lite/tools/mutable_op_resolver.h index 9546c32427..a51fdaee19 100644 --- a/tensorflow/contrib/lite/tools/mutable_op_resolver.h +++ b/tensorflow/contrib/lite/tools/mutable_op_resolver.h @@ -19,6 +19,17 @@ limitations under the License. #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/model.h" +// Needed to resolve unordered_set hash on older compilers. +namespace std +{ +template<> + struct hash { + size_t operator()(const tflite::BuiltinOperator &op) const { + return std::hash()(op); + } + }; +} + namespace tflite { // An OpResolver that is mutable, also used as the op in gen_op_registration. -- GitLab From 58c1aaf77721268a4ef87ebd2ab520a6d5a62f79 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 13 Nov 2017 17:20:26 -0800 Subject: [PATCH 0370/1801] Split up HloExecutionProfile into a set of re-usable components The end goal is to have Hlo profiling support in XlaJitCompiledCpuFunction and eventually AOT compiled XlaCompiledCpuFunction. This change leaves the HloExecutionProfile interface mostly intact -- internally it uses the new split out components to do what it did before. However, in future CLs: - I'll extract out a HloExecutionProfilePrototype that contains the HloProfilePrinter, the OwningHloProfilePrinterStaticData and the HloToProfileIndex. This will then live in the Executable (if profiling is enabled). - The HloExecutionProfile for a specific execution will have a pointer to the parent HloExecutionProfilePrototype, which it will use to paginate profile_counters_. - The CPU backend will use the HloToProfileIndex in the HloExecutionProfilePrototype to map hlo instructions to profile counter offsets. This will make the indices in the generated code "line up" with the indices that the HloProfilePrinter expects. These changes will allow the XlaJitCompiledCpuFunction (and later AOT) clients to pass in an appropriately sized zeroed buffer to the generated function and then pass that same buffer to the appropriate HloProfilePrinter to get a textual Hlo profile. PiperOrigin-RevId: 175613737 --- tensorflow/compiler/xla/service/BUILD | 23 ++++ tensorflow/compiler/xla/service/executable.h | 29 ++-- .../xla/service/hlo_execution_profile.cc | 130 ++++++++++++++---- .../xla/service/hlo_execution_profile.h | 87 +++++++++--- .../xla/service/hlo_execution_profile_test.cc | 99 +++++++++++++ .../xla/service/hlo_profile_printer.cc | 67 +++++++++ .../xla/service/hlo_profile_printer.h | 97 +++++++++++++ tensorflow/compiler/xla/service/hlo_runner.cc | 3 +- tensorflow/compiler/xla/service/service.cc | 27 +--- 9 files changed, 467 insertions(+), 95 deletions(-) create mode 100644 tensorflow/compiler/xla/service/hlo_execution_profile_test.cc create mode 100644 tensorflow/compiler/xla/service/hlo_profile_printer.cc create mode 100644 tensorflow/compiler/xla/service/hlo_profile_printer.h diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index c163a5f837..c9828d8641 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1360,6 +1360,7 @@ cc_library( deps = [ ":hlo", ":hlo_cost_analysis", + ":hlo_profile_printer", ":human_readable_profile_builder", "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", @@ -1368,6 +1369,18 @@ cc_library( ], ) +tf_cc_test( + name = "hlo_execution_profile_test", + srcs = ["hlo_execution_profile_test.cc"], + deps = [ + ":cpu_plugin", + ":hlo_cost_analysis", + ":hlo_execution_profile", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + ], +) + tf_cc_test( name = "hlo_computation_test", srcs = ["hlo_computation_test.cc"], @@ -2159,6 +2172,16 @@ cc_library( ], ) +cc_library( + name = "hlo_profile_printer", + srcs = ["hlo_profile_printer.cc"], + hdrs = ["hlo_profile_printer.h"], + deps = [ + ":human_readable_profile_builder", + "//tensorflow/compiler/xla:types", + ], +) + # ----------------------------------------------------------------------------- filegroup( diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h index 7e0d182b36..2135707371 100644 --- a/tensorflow/compiler/xla/service/executable.h +++ b/tensorflow/compiler/xla/service/executable.h @@ -197,14 +197,14 @@ StatusOr Executable::ExecuteOnStreamWrapper( VLOG(1) << "enqueueing executable on stream..."; // If the profiling flag isn't enabled, we pass nullptr as the profile to // indicate profiling is not requested. - HloExecutionProfile hlo_execution_profile; - HloExecutionProfile* profile_ptr = + std::unique_ptr profile_ptr = module_config().debug_options().xla_hlo_profile() && hlo_profiling_enabled() - ? &hlo_execution_profile + ? MakeUnique(module(), *CreateCostAnalysis()) : nullptr; - auto return_value = ExecuteOnStream(run_options, arguments, profile_ptr); + auto return_value = + ExecuteOnStream(run_options, arguments, profile_ptr.get()); if (profile != nullptr) { VLOG(1) << "enqueueing 'stop timer' and blocking host until done..."; @@ -232,24 +232,11 @@ StatusOr Executable::ExecuteOnStreamWrapper( } if (profile_ptr != nullptr) { - std::unordered_set profiled_computations = - profile_ptr->profiled_computations(); - // To ensure we have print the profiles in a stable order, iterate over the - // computations in post order. - std::list all_computations = - module().MakeComputationPostOrder(); - for (xla::HloComputation* computation : all_computations) { - if (profiled_computations.count(computation) > 0) { - string profile_string = profile_ptr->ToString( - *computation, stream->parent()->GetDeviceDescription(), - CreateCostAnalysis().get()); - if (!profile_string.empty()) { - XLA_LOG_LINES(tensorflow::INFO, profile_string); - } - } - } + XLA_LOG_LINES( + tensorflow::INFO, + profile_ptr->ToString(stream->parent()->GetDeviceDescription())); hlo_graph_dumper::MaybeDumpHloModule(module(), "Service::Execute", - profile_ptr); + profile_ptr.get()); } return return_value; diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc index bf19bc9309..ecce2bd4e5 100644 --- a/tensorflow/compiler/xla/service/hlo_execution_profile.cc +++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc @@ -26,45 +26,115 @@ limitations under the License. #include "tensorflow/compiler/xla/util.h" namespace xla { +HloToProfileIndex::HloToProfileIndex(const HloModule& module) { + size_t current_profile_index = 0; + for (xla::HloComputation* computation : module.MakeComputationPostOrder()) { + InsertOrDie(&computation_to_profile_idx_, computation, + current_profile_index++); + for (const HloInstruction* instruction : computation->instructions()) { + // For simplicity we track all instrutions here, but we could skip + // non-executing instructions like constants and parameters. + InsertOrDie(&instruction_to_profile_idx_, instruction, + current_profile_index++); + } + } +} + +static HloProfilePrinter CreateOwnedHloProfilePrinter( + const HloToProfileIndex& hlo_to_profile_index, + const HloCostAnalysis& cost_analysis) { + using HloComputationInfo = HloProfilePrinter::HloComputationInfo; + using HloInstructionInfo = HloProfilePrinter::HloInstructionInfo; + + HloComputationInfo* computation_infos = + new HloComputationInfo[hlo_to_profile_index.computation_count()]; + + // There are two "indices" in play here. The first one is the index of the + // HloComputationInfo or HloInstructionInfo in the array that contains said + // HloComputationInfo or HloInstructionInfo. The second index is the index of + // the HloComputationInfo or HloInstructionInfo in the profile counters array, + // as decided by hlo_to_profile_index. The latter index is always referred to + // as "profile_index". + + size_t computation_index_in_static_data = 0; + size_t max_profile_index = hlo_to_profile_index.total_count(); + for (const auto& pair : hlo_to_profile_index.computation_to_profile_idx()) { + CHECK_LT(pair.second, max_profile_index); + const HloComputation* computation = pair.first; + size_t current_computation_index = computation_index_in_static_data++; + HloComputationInfo* computation_info = + &computation_infos[current_computation_index]; + + computation_info->name = strdup(computation->name().c_str()); + computation_info->profile_index = pair.second; + computation_info->instructions = + new HloInstructionInfo[computation->instruction_count()]; + computation_info->instructions_size = computation->instruction_count(); + + size_t instruction_index_in_static_data = 0; + for (const HloInstruction* hlo : computation->instructions()) { + HloProfilePrinter::HloInstructionInfo* instruction_info = + &computation_info->instructions[instruction_index_in_static_data++]; + instruction_info->long_name = strdup(hlo->ToString().c_str()); + instruction_info->short_name = + strdup(hlo->ToString(/*compact_operands=*/true).c_str()); + instruction_info->category = strdup(hlo->ToCategory().c_str()); + instruction_info->flop_count = cost_analysis.flop_count(*hlo); + instruction_info->transcendental_count = + cost_analysis.transcendental_count(*hlo); + instruction_info->bytes_accessed = cost_analysis.bytes_accessed(*hlo); + instruction_info->seconds = cost_analysis.seconds(*hlo); + instruction_info->profile_index = + hlo_to_profile_index.GetProfileIndexFor(*hlo); + CHECK_LT(instruction_info->profile_index, max_profile_index); + } + } + + auto deleter = [](HloProfilePrinter::HloComputationInfo* computation_infos, + int64 computation_infos_size) { + for (int64 i = 0; i < computation_infos_size; i++) { + HloInstructionInfo* instruction_infos = computation_infos[i].instructions; + for (int64 j = 0; j < computation_infos[i].instructions_size; j++) { + // We can't make instruction_infos[j].long_name etc. non-const pointers + // since they may point into static storage, so we have a const_cast + // here. + free(const_cast(instruction_infos[j].long_name)); + free(const_cast(instruction_infos[j].short_name)); + free(const_cast(instruction_infos[j].category)); + } + delete[] instruction_infos; + free(const_cast(computation_infos[i].name)); + } + delete[] computation_infos; + }; + + return HloProfilePrinter(computation_infos, + hlo_to_profile_index.computation_count(), deleter); +} + +HloExecutionProfile::HloExecutionProfile(const HloModule& module, + const HloCostAnalysis& cost_analysis) + : hlo_to_profile_index_(module), + hlo_profile_printer_( + CreateOwnedHloProfilePrinter(hlo_to_profile_index_, cost_analysis)), + profile_counters_( + /*count*/ hlo_to_profile_index_.total_count(), + /*value*/ 0) {} void HloExecutionProfile::SetCyclesTakenBy(const HloInstruction* hlo, uint64 cycles_taken) { - hlo_to_cycles_taken_[hlo] = cycles_taken; - profiled_computations_.insert(hlo->parent()); + profile_counters_[hlo_to_profile_index_.GetProfileIndexFor(*hlo)] = + cycles_taken; } uint64 HloExecutionProfile::GetCyclesTakenBy(const HloInstruction& hlo) const { - auto iter = hlo_to_cycles_taken_.find(&hlo); - if (iter == hlo_to_cycles_taken_.end()) { - return 0; - } - return iter->second; + return profile_counters_[hlo_to_profile_index_.GetProfileIndexFor(hlo)]; } string HloExecutionProfile::ToString( - const HloComputation& computation, - const DeviceDescription& device_description, - HloCostAnalysis* cost_analysis) const { - tensorflow::Status analysis_status = computation.Accept(cost_analysis); - if (!analysis_status.ok()) { - return ""; - } - - HumanReadableProfileBuilder builder(computation.name(), - total_cycles_executed(computation), - device_description.clock_rate_ghz()); - for (const auto& item : hlo_to_cycles_taken_) { - const HloInstruction* hlo = item.first; - int64 cycles = item.second; - - builder.AddOp(/*op_name=*/hlo->ToString(), - /*short_name=*/hlo->ToString(/*compact_operands=*/true), - hlo->ToCategory(), cycles, cost_analysis->flop_count(*hlo), - cost_analysis->transcendental_count(*hlo), - cost_analysis->bytes_accessed(*hlo), - cost_analysis->seconds(*hlo)); - } - return builder.ToString(); + const DeviceDescription& device_description) const { + return hlo_profile_printer_.ToString(profile_counters_.data(), + device_description.clock_rate_ghz()); } } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.h b/tensorflow/compiler/xla/service/hlo_execution_profile.h index cdce77cff4..f945b9d84c 100644 --- a/tensorflow/compiler/xla/service/hlo_execution_profile.h +++ b/tensorflow/compiler/xla/service/hlo_execution_profile.h @@ -18,7 +18,9 @@ limitations under the License. #include +#include "tensorflow/compiler/xla/map_util.h" #include "tensorflow/compiler/xla/service/hlo_cost_analysis.h" +#include "tensorflow/compiler/xla/service/hlo_profile_printer.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" #include "tensorflow/core/platform/types.h" @@ -27,6 +29,54 @@ namespace xla { class HloInstruction; +// Maps all HloInstructions and HloComputions in an HloModule to integers. +// These integers form the contiguous range [0, GetTotalCount()). +class HloToProfileIndex { + public: + // Scans `module` to populate this instance of HloToProfileIndex. + explicit HloToProfileIndex(const HloModule& module); + + HloToProfileIndex(const HloToProfileIndex&) = default; + HloToProfileIndex(HloToProfileIndex&&) = default; + + HloToProfileIndex& operator=(const HloToProfileIndex&) = default; + HloToProfileIndex& operator=(HloToProfileIndex&&) = default; + + size_t GetProfileIndexFor(const HloInstruction& instruction) const { + return FindOrDie(instruction_to_profile_idx(), &instruction); + } + + size_t GetProfileIndexFor(const HloComputation& computation) const { + return FindOrDie(computation_to_profile_idx(), &computation); + } + + size_t instruction_count() const { + return instruction_to_profile_idx().size(); + } + + size_t computation_count() const { + return computation_to_profile_idx().size(); + } + + size_t total_count() const { + return instruction_count() + computation_count(); + } + + const std::unordered_map& + instruction_to_profile_idx() const { + return instruction_to_profile_idx_; + } + + const std::unordered_map& + computation_to_profile_idx() const { + return computation_to_profile_idx_; + } + + private: + std::unordered_map instruction_to_profile_idx_; + std::unordered_map computation_to_profile_idx_; +}; + // Describes how much time each HLO operation took. // // Each HloComputation takes a certain number of cycles. This class helps break @@ -35,6 +85,9 @@ class HloExecutionProfile { public: using DeviceDescription = perftools::gputools::DeviceDescription; + HloExecutionProfile(const HloModule& module, + const HloCostAnalysis& cost_analysis); + // Record how many cycles this HLO took to execute. void SetCyclesTakenBy(const HloInstruction* hlo, uint64 cycles_taken); @@ -44,17 +97,15 @@ class HloExecutionProfile { // Return the number of cycles this computation took to execute. uint64 total_cycles_executed(const HloComputation& computation) const { - auto it = total_cycles_executed_.find(&computation); - if (it != total_cycles_executed_.end()) { - return it->second; - } - return 0; + return profile_counters_[hlo_to_profile_index_.GetProfileIndexFor( + computation)]; } // Record how many cycles a computation took to execute. void set_total_cycles_executed(const HloComputation& computation, uint64 total_cycles_executed) { - total_cycles_executed_[&computation] = total_cycles_executed; + profile_counters_[hlo_to_profile_index_.GetProfileIndexFor(computation)] = + total_cycles_executed; } // Returns a version of the execution profile suitable for performance @@ -63,25 +114,19 @@ class HloExecutionProfile { // for the operations in a given computation. Returns an empty string if it // wasn't possible to generate a printable version. cost_analysis should be a // clean analysis that can be used to visit the computation. - string ToString(const HloComputation& computation, - const DeviceDescription& device_description, - HloCostAnalysis* cost_analysis) const; - - // Returns the computations we have profiled. - std::unordered_set profiled_computations() const { - return profiled_computations_; - } + string ToString(const DeviceDescription& device_description) const; private: - // Contains a mapping from HLO to the number of cycles it took to execute it. - std::unordered_map hlo_to_cycles_taken_; + // hlo_to_profile_index_ maps an Hlo entity (computation or instruction) to an + // index in profile_counters_. + HloToProfileIndex hlo_to_profile_index_; - // If non-empty, contains the total number of cycles a computation took to - // execute. - std::unordered_map total_cycles_executed_; + // Used to print profile_counters_ in a human readable form. + HloProfilePrinter hlo_profile_printer_; - // The computations we have profiled. - std::unordered_set profiled_computations_; + // Stores per-Hlo profile counters. This is the only thing that changes when + // we execute an XLA computation. + std::vector profile_counters_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile_test.cc b/tensorflow/compiler/xla/service/hlo_execution_profile_test.cc new file mode 100644 index 0000000000..0628444b34 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_execution_profile_test.cc @@ -0,0 +1,99 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_execution_profile.h" +#include "tensorflow/compiler/xla/service/hlo_cost_analysis.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" + +namespace xla { +namespace { + +class HloExecutionProfileTest : public HloTestBase { + protected: + static constexpr int64 kInstructionCyclesIndex = 0; + static constexpr int64 kInstructionNameIndex = 19; +}; + +// Splits `lines` into a sequence of lines delimited by newlines and then split +// each of those lines into a sequence of words delimited by spaces. Filter out +// empty words. +std::vector> SplitIntoLinesAndWords( + tensorflow::StringPiece lines) { + std::vector> result; + for (const string& line : tensorflow::str_util::Split(lines, '\n')) { + std::vector words; + for (const string& word : tensorflow::str_util::Split(line, ' ')) { + if (!word.empty()) { + words.push_back(word); + } + } + result.push_back(std::move(words)); + } + + return result; +} + +TEST_F(HloExecutionProfileTest, Basic) { + std::unique_ptr hlo_module = CreateNewModule(); + + HloComputation::Builder builder(TestName()); + Shape shape = ShapeUtil::MakeShape(F32, {30, 30}); + HloInstruction* param_lhs = + builder.AddInstruction(HloInstruction::CreateParameter(0, shape, "lhs")); + HloInstruction* param_rhs = + builder.AddInstruction(HloInstruction::CreateParameter(1, shape, "rhs")); + HloInstruction* add_instruction = + builder.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kAdd, param_lhs, param_rhs)); + HloInstruction* dot_instruction = + builder.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kDot, param_lhs, add_instruction)); + + hlo_module->AddEntryComputation(builder.Build()); + + auto shape_size_function = [&](const Shape& shape) { + const int64 pointer_size = 8; + if (ShapeUtil::IsOpaque(shape)) { + return pointer_size; + } + return ShapeUtil::ByteSizeOf(shape, pointer_size); + }; + + HloCostAnalysis cost_analysis(shape_size_function); + HloExecutionProfile execution_profile(*hlo_module, cost_analysis); + + const int64 add_cycles = 1000; + const int64 dot_cycles = 4000; + + execution_profile.SetCyclesTakenBy(add_instruction, add_cycles); + execution_profile.SetCyclesTakenBy(dot_instruction, dot_cycles); + + string rendered_profile = execution_profile.ToString( + backend().default_stream_executor()->GetDeviceDescription()); + std::vector> lines_and_words = + SplitIntoLinesAndWords(rendered_profile); + ASSERT_EQ(lines_and_words.size(), 8); + + const std::vector& line_2 = lines_and_words[2]; + const std::vector& line_3 = lines_and_words[3]; + + EXPECT_EQ(line_2[kInstructionCyclesIndex], std::to_string(dot_cycles)); + EXPECT_EQ(line_2[kInstructionNameIndex], dot_instruction->name()); + + EXPECT_EQ(line_3[kInstructionCyclesIndex], std::to_string(add_cycles)); + EXPECT_EQ(line_3[kInstructionNameIndex], add_instruction->name()); +} +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_profile_printer.cc b/tensorflow/compiler/xla/service/hlo_profile_printer.cc new file mode 100644 index 0000000000..071c5a6629 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_profile_printer.cc @@ -0,0 +1,67 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_profile_printer.h" + +#include "tensorflow/compiler/xla/service/human_readable_profile_builder.h" + +namespace xla { +string HloProfilePrinter::ToString(const int64* counters, + double clock_rate_ghz) const { + string result; + + for (int computation_idx = 0; computation_idx < computation_infos_size_; + computation_idx++) { + const HloComputationInfo& computation = computation_infos_[computation_idx]; + const HloInstructionInfo* instructions_begin = computation.instructions; + const HloInstructionInfo* instructions_end = + computation.instructions + computation.instructions_size; + bool any_instruction_profiled = + std::any_of(instructions_begin, instructions_end, + [&](const HloInstructionInfo& instruction_info) { + return counters[instruction_info.profile_index] != 0; + }); + + if (!any_instruction_profiled) { + continue; + } + + // Once we start using this in AOT for real, we will probably need a more + // minimal version of HumanReadableProfileBuilder. + HumanReadableProfileBuilder builder( + computation.name, counters[computation.profile_index], clock_rate_ghz); + + for (const auto* instruction = instructions_begin; + instruction != instructions_end; instruction++) { + builder.AddOp( + /*op_name=*/instruction->long_name, + /*short_name=*/instruction->short_name, instruction->category, + counters[instruction->profile_index], instruction->flop_count, + instruction->transcendental_count, instruction->bytes_accessed, + instruction->seconds); + } + + result += builder.ToString(); + } + + return result; +} + +HloProfilePrinter::~HloProfilePrinter() { + if (deleter_) { + deleter_(computation_infos_, computation_infos_size_); + } +} +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_profile_printer.h b/tensorflow/compiler/xla/service/hlo_profile_printer.h new file mode 100644 index 0000000000..45921c66f6 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_profile_printer.h @@ -0,0 +1,97 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_HLO_PROFILE_PRINTER_H_ +#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_HLO_PROFILE_PRINTER_H_ + +#include +#include +#include + +#include "tensorflow/compiler/xla/types.h" + +namespace xla { +// Instances of this class can pretty-print profile counters gathered from +// running an XLA computation without having access to the backing module. +class HloProfilePrinter { + public: + // Holds meta information about an HloInstruction. + // + // The pointer-typed fields can be owning or non-owning -- this decision is + // manifested as the deleter_ function in the containing HloProfilePrinter. + struct HloInstructionInfo { + // Textual information for pretty printing. + const char* long_name; + const char* short_name; + const char* category; + + // Metrics computed by HloCostAnalysis. + float flop_count; + float transcendental_count; + float bytes_accessed; + float seconds; + + // The index into the profile counters array for the HloInstruction + // corresponding to this HloInstructionInfo. + int64 profile_index; + }; + + // Holds meta information about an HloComputation. + // + // The pointer-typed fields can be owning or non-owning -- this decision is + // manifested as the deleter_ function in the containing HloProfilePrinter. + struct HloComputationInfo { + const char* name; + + // The index into the profile counters array for the HloInstruction + // corresponding to this HloComputationInfo. + int64 profile_index; + + HloInstructionInfo* instructions; + int64 instructions_size; + }; + + HloProfilePrinter( + HloComputationInfo* computation_infos, int64 computation_infos_size, + std::function deleter = nullptr) + : computation_infos_(computation_infos), + computation_infos_size_(computation_infos_size), + deleter_(std::move(deleter)) {} + + HloProfilePrinter(HloProfilePrinter&& other) { + std::swap(other.computation_infos_, computation_infos_); + std::swap(other.computation_infos_size_, computation_infos_size_); + std::swap(other.deleter_, deleter_); + } + + HloProfilePrinter(const HloProfilePrinter&) = delete; + HloProfilePrinter& operator=(const HloProfilePrinter&) = delete; + + // Convert the profile counter sequence `counters` to a human readable string + // representation. + string ToString(const int64* counters, double clock_rate_ghz) const; + + ~HloProfilePrinter(); + + private: + // The `computation_infos_` field can be owning or non-owning -- this decision + // is manifested as the deleter_ function. + HloComputationInfo* computation_infos_ = nullptr; + int64 computation_infos_size_ = 0; + std::function deleter_; +}; +} // namespace xla + +#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_HLO_PROFILE_PRINTER_H_ diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index 158fb9a546..63f2b1296e 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -130,14 +130,13 @@ StatusOr HloRunner::Execute( run_options.set_intra_op_thread_pool( backend().eigen_intra_op_thread_pool_device()); - HloExecutionProfile hlo_execution_profile; ServiceExecutableRunOptions service_run_options( run_options, backend().StreamBorrower(), backend().inter_op_thread_pool()); TF_ASSIGN_OR_RETURN( se::DeviceMemoryBase result, executable->ExecuteOnStream(&service_run_options, arguments, - &hlo_execution_profile)); + /*hlo_execution_profile=*/nullptr)); TF_RET_CHECK(stream.BlockHostUntilDone()); allocations_.push_back(result); diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index 71afbee456..ee9501dd48 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -572,30 +572,15 @@ Service::ExecuteParallelAndRegisterResult( // profile. for (auto& index_to_profiled_stream : index_to_profiled_streams) { int64 device = index_to_profiled_stream.first; + auto& module = executables[device]->module(); se::Stream* stream = index_to_profiled_stream.second; - HloExecutionProfile hlo_profile; + HloExecutionProfile hlo_profile(module, + *executables[device]->CreateCostAnalysis()); TF_RETURN_IF_ERROR(executables[device]->PopulateExecutionProfile( &hlo_profile, stream->parent())); - - std::unordered_set profiled_computations = - hlo_profile.profiled_computations(); - // To ensure we have print the profiles in a stable order, iterate over the - // computations in post order. - auto& module = executables[device]->module(); - std::list all_computations = - module.MakeComputationPostOrder(); - for (xla::HloComputation* computation : all_computations) { - if (profiled_computations.count(computation) > 0) { - string profile_string = hlo_profile.ToString( - *computation, streams[0]->parent()->GetDeviceDescription(), - executables[device]->CreateCostAnalysis().get()); - if (!profile_string.empty()) { - LOG(INFO) << "HLO profile for execution on device " << device - << ":\n"; - XLA_LOG_LINES(tensorflow::INFO, profile_string); - } - } - } + XLA_LOG_LINES( + tensorflow::INFO, + hlo_profile.ToString(streams[0]->parent()->GetDeviceDescription())); hlo_graph_dumper::MaybeDumpHloModule(module, "Service::Execute", &hlo_profile); } -- GitLab From f92692223dd19772ee2dec10707043807c19fa16 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Mon, 13 Nov 2017 18:02:43 -0800 Subject: [PATCH 0371/1801] Explicitly specify TFLite demo assets in BUILD file PiperOrigin-RevId: 175617982 --- tensorflow/contrib/lite/java/demo/app/src/main/BUILD | 10 ++-------- .../contrib/lite/java/demo/app/src/main/assets/BUILD | 6 ++---- third_party/tflite_mobilenet.BUILD | 5 ++--- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/BUILD b/tensorflow/contrib/lite/java/demo/app/src/main/BUILD index 4fc6d99d8c..654fa9d6d2 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/BUILD +++ b/tensorflow/contrib/lite/java/demo/app/src/main/BUILD @@ -6,7 +6,8 @@ android_binary( name = "TfLiteCameraDemo", srcs = glob(["java/**/*.java"]), assets = [ - ":assets", + "@tflite_mobilenet//:labels.txt", + "@tflite_mobilenet//:mobilenet_quant_v1_224.tflite", ], assets_dir = "", custom_package = "com.example.android.tflitecamerademo", @@ -27,13 +28,6 @@ android_binary( ], ) -filegroup( - name = "assets", - srcs = [ - "@tflite_mobilenet//:model_files", - ], -) - filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/assets/BUILD b/tensorflow/contrib/lite/java/demo/app/src/main/assets/BUILD index 1a759f5652..dd0cd6c98f 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/assets/BUILD +++ b/tensorflow/contrib/lite/java/demo/app/src/main/assets/BUILD @@ -2,15 +2,13 @@ package(default_visibility = ["//visibility:private"]) licenses(["notice"]) # Apache 2.0 -filegroup( - name = "assets_files", - srcs = glob( +exports_files( + glob( ["**/*"], exclude = [ "BUILD", ], ), - visibility = ["//visibility:public"], ) filegroup( diff --git a/third_party/tflite_mobilenet.BUILD b/third_party/tflite_mobilenet.BUILD index 75663eff48..de47ed61f9 100644 --- a/third_party/tflite_mobilenet.BUILD +++ b/third_party/tflite_mobilenet.BUILD @@ -2,9 +2,8 @@ package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 -filegroup( - name = "model_files", - srcs = glob( +exports_files( + glob( ["**/*"], exclude = [ "BUILD", -- GitLab From 8997ae6271cd2c496988ceeedab1d31755d65da4 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Mon, 13 Nov 2017 18:11:54 -0800 Subject: [PATCH 0372/1801] Add multi_gpu_model utility in tf.keras. PiperOrigin-RevId: 175619595 --- tensorflow/python/keras/BUILD | 14 ++ .../python/keras/_impl/keras/activations.py | 2 +- .../keras/_impl/keras/engine/topology.py | 16 +- .../keras/_impl/keras/engine/training.py | 6 +- .../keras/_impl/keras/utils/__init__.py | 8 +- .../keras/_impl/keras/utils/training_utils.py | 188 ++++++++++++++++++ .../_impl/keras/utils/training_utils_test.py | 94 +++++++++ tensorflow/python/keras/utils/__init__.py | 1 + .../api/golden/tensorflow.keras.utils.pbtxt | 4 + 9 files changed, 312 insertions(+), 21 deletions(-) create mode 100644 tensorflow/python/keras/_impl/keras/utils/training_utils.py create mode 100644 tensorflow/python/keras/_impl/keras/utils/training_utils_test.py diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index 6a762ee5d2..55b5d7ff61 100644 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -69,6 +69,7 @@ py_library( "_impl/keras/utils/io_utils.py", "_impl/keras/utils/layer_utils.py", "_impl/keras/utils/np_utils.py", + "_impl/keras/utils/training_utils.py", "_impl/keras/utils/vis_utils.py", "_impl/keras/wrappers/__init__.py", "_impl/keras/wrappers/scikit_learn.py", @@ -587,6 +588,19 @@ py_test( ], ) +py_test( + name = "training_utils_test", + size = "medium", + srcs = ["_impl/keras/utils/training_utils_test.py"], + srcs_version = "PY2AND3", + tags = ["multi_gpu"], + deps = [ + ":keras", + "//tensorflow/python:client_testlib", + "//third_party/py/numpy", + ], +) + py_test( name = "imagenet_utils_test", size = "small", diff --git a/tensorflow/python/keras/_impl/keras/activations.py b/tensorflow/python/keras/_impl/keras/activations.py index 4e35b79869..f017d2ae85 100644 --- a/tensorflow/python/keras/_impl/keras/activations.py +++ b/tensorflow/python/keras/_impl/keras/activations.py @@ -21,8 +21,8 @@ from __future__ import print_function import six from tensorflow.python.keras._impl.keras import backend as K -from tensorflow.python.keras._impl.keras.engine import Layer from tensorflow.python.keras._impl.keras.utils.generic_utils import deserialize_keras_object +from tensorflow.python.layers.base import Layer from tensorflow.python.platform import tf_logging as logging diff --git a/tensorflow/python/keras/_impl/keras/engine/topology.py b/tensorflow/python/keras/_impl/keras/engine/topology.py index 2bcbabf19c..1b7ddef9c4 100644 --- a/tensorflow/python/keras/_impl/keras/engine/topology.py +++ b/tensorflow/python/keras/_impl/keras/engine/topology.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== # pylint: disable=protected-access -"""Base layer code and base model (Container) code. +"""Base layer code and base model (Network) code. """ from __future__ import absolute_import from __future__ import division @@ -450,7 +450,7 @@ class Layer(tf_base_layers.Layer): The config of a layer does not include connectivity information, nor the layer class name. These are handled - by `Container` (one layer of abstraction above). + by `Network` (one layer of abstraction above). Returns: Python dictionary. @@ -469,7 +469,7 @@ class Layer(tf_base_layers.Layer): This method is the reverse of `get_config`, capable of instantiating the same layer from the config dictionary. It does not handle layer connectivity - (handled by Container), nor weights (handled by `set_weights`). + (handled by Network), nor weights (handled by `set_weights`). Arguments: config: A Python dictionary, typically the @@ -637,10 +637,10 @@ def Input( # pylint: disable=invalid-name class Network(tf_base_layers.Network, Layer): - """A Container is a directed acyclic graph of layers. + """A Network is a directed acyclic graph of layers. It is the topological form of a "model". A Model - is simply a Container with added training routines. + is simply a Network with added training routines. # Properties name @@ -792,7 +792,7 @@ class Network(tf_base_layers.Network, Layer): node_conversion_map = {} for layer in self.layers: if issubclass(layer.__class__, Network): - # Containers start with a pre-existing node + # Networks start with a pre-existing node # linking their input to output. kept_nodes = 1 else: @@ -1197,10 +1197,6 @@ class Network(tf_base_layers.Network, Layer): print_fn=print_fn) -# Alias for legacy support. -Container = Network - - def get_source_inputs(tensor, layer=None, node_index=None): """Returns the list of input tensors necessary to compute `tensor`. diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index 0b04c17ad7..b1e48439ba 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -28,7 +28,7 @@ from tensorflow.python.keras._impl.keras import callbacks as cbks from tensorflow.python.keras._impl.keras import losses from tensorflow.python.keras._impl.keras import metrics as metrics_module from tensorflow.python.keras._impl.keras import optimizers -from tensorflow.python.keras._impl.keras.engine.topology import Container +from tensorflow.python.keras._impl.keras.engine.topology import Network from tensorflow.python.keras._impl.keras.utils.data_utils import GeneratorEnqueuer from tensorflow.python.keras._impl.keras.utils.data_utils import OrderedEnqueuer from tensorflow.python.keras._impl.keras.utils.data_utils import Sequence @@ -559,8 +559,8 @@ def _standardize_weights(y, return np.ones((y.shape[0], y.shape[1]), dtype=K.floatx()) -class Model(Container): - """The `Model` class adds training & evaluation routines to a `Container`. +class Model(Network): + """The `Model` class adds training & evaluation routines to a `Network`. """ def compile(self, diff --git a/tensorflow/python/keras/_impl/keras/utils/__init__.py b/tensorflow/python/keras/_impl/keras/utils/__init__.py index fa50b123b7..78f325cf61 100644 --- a/tensorflow/python/keras/_impl/keras/utils/__init__.py +++ b/tensorflow/python/keras/_impl/keras/utils/__init__.py @@ -18,11 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.keras._impl.keras.utils import conv_utils -from tensorflow.python.keras._impl.keras.utils import data_utils -from tensorflow.python.keras._impl.keras.utils import generic_utils -from tensorflow.python.keras._impl.keras.utils import io_utils -from tensorflow.python.keras._impl.keras.utils import np_utils from tensorflow.python.keras._impl.keras.utils.data_utils import GeneratorEnqueuer from tensorflow.python.keras._impl.keras.utils.data_utils import get_file from tensorflow.python.keras._impl.keras.utils.data_utils import OrderedEnqueuer @@ -37,7 +32,6 @@ from tensorflow.python.keras._impl.keras.utils.io_utils import HDF5Matrix from tensorflow.python.keras._impl.keras.utils.layer_utils import convert_all_kernels_in_model from tensorflow.python.keras._impl.keras.utils.np_utils import normalize from tensorflow.python.keras._impl.keras.utils.np_utils import to_categorical +from tensorflow.python.keras._impl.keras.utils.training_utils import multi_gpu_model from tensorflow.python.keras._impl.keras.utils.vis_utils import plot_model - -# Globally-importable utils. diff --git a/tensorflow/python/keras/_impl/keras/utils/training_utils.py b/tensorflow/python/keras/_impl/keras/utils/training_utils.py new file mode 100644 index 0000000000..b993a16394 --- /dev/null +++ b/tensorflow/python/keras/_impl/keras/utils/training_utils.py @@ -0,0 +1,188 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utilities for multi-gpu training.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import ops +from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras.engine.training import Model +from tensorflow.python.ops import array_ops + + +def _get_available_devices(): + return [x.name for x in K.get_session().list_devices()] + + +def _normalize_device_name(name): + name = '/' + name.lower().split('device:')[1] + return name + + +def multi_gpu_model(model, gpus): + """Replicates a model on different GPUs. + + Specifically, this function implements single-machine + multi-GPU data parallelism. It works in the following way: + + - Divide the model's input(s) into multiple sub-batches. + - Apply a model copy on each sub-batch. Every model copy + is executed on a dedicated GPU. + - Concatenate the results (on CPU) into one big batch. + + E.g. if your `batch_size` is 64 and you use `gpus=2`, + then we will divide the input into 2 sub-batches of 32 samples, + process each sub-batch on one GPU, then return the full + batch of 64 processed samples. + + This induces quasi-linear speedup on up to 8 GPUs. + + This function is only available with the TensorFlow backend + for the time being. + + Arguments: + model: A Keras model instance. To avoid OOM errors, + this model could have been built on CPU, for instance + (see usage example below). + gpus: Integer >= 2, number of on GPUs on which to create + model replicas. + + Returns: + A Keras `Model` instance which can be used just like the initial + `model` argument, but which distributes its workload on multiple GPUs. + + Example: + + ```python + import tensorflow as tf + from keras.applications import Xception + from keras.utils import multi_gpu_model + import numpy as np + + num_samples = 1000 + height = 224 + width = 224 + num_classes = 1000 + + # Instantiate the base model + # (here, we do it on CPU, for better efficiency). + with tf.device('/cpu:0'): + model = Xception(weights=None, + input_shape=(height, width, 3), + classes=num_classes) + + # Replicates the model on 8 GPUs. + # This assumes that your machine has 8 available GPUs. + parallel_model = multi_gpu_model(model, gpus=8) + parallel_model.compile(loss='categorical_crossentropy', + optimizer='rmsprop') + + # Generate dummy data. + x = np.random.random((num_samples, height, width, 3)) + y = np.random.random((num_samples, num_classes)) + + # This `fit` call will be distributed on 8 GPUs. + # Since the batch size is 256, each GPU will process 32 samples. + parallel_model.fit(x, y, epochs=20, batch_size=256) + ``` + + Raises: + ValueError: if the `gpus` argument does not match available devices. + """ + # pylint: disable=g-import-not-at-top + from tensorflow.python.keras._impl.keras.layers.core import Lambda + from tensorflow.python.keras._impl.keras.layers.merge import concatenate + + if gpus <= 1: + raise ValueError('For multi-gpu usage to be effective, ' + 'call `multi_gpu_model` with `gpus >= 2`. ' + 'Received: `gpus=%d`' % gpus) + + target_devices = ['/cpu:0'] + ['/gpu:%d' % i for i in range(gpus)] + available_devices = _get_available_devices() + available_devices = [ + _normalize_device_name(name) for name in available_devices + ] + for device in target_devices: + if device not in available_devices: + raise ValueError('To call `multi_gpu_model` with `gpus=%d`, ' + 'we expect the following devices to be available: %s. ' + 'However this machine only has: %s. ' + 'Try reducing `gpus`.' % (gpus, target_devices, + available_devices)) + + def get_slice(data, i, parts): + """Slice an array into `parts` slices and return slice `i`. + + Arguments: + data: array to slice. + i: index of slice to return. + parts: number of slices to make. + + Returns: + Slice `i` of `data`. + """ + shape = array_ops.shape(data) + batch_size = shape[:1] + input_shape = shape[1:] + step = batch_size // parts + if i == gpus - 1: + size = batch_size - step * i + else: + size = step + size = array_ops.concat([size, input_shape], axis=0) + stride = array_ops.concat([step, input_shape * 0], axis=0) + start = stride * i + return array_ops.slice(data, start, size) + + all_outputs = [] + for i in range(len(model.outputs)): + all_outputs.append([]) + + # Place a copy of the model on each GPU, + # each getting a slice of the inputs. + for i in range(gpus): + with ops.device('/gpu:%d' % i): + with ops.name_scope('replica_%d' % i): + inputs = [] + # Retrieve a slice of the input. + for x in model.inputs: + input_shape = tuple(x.get_shape().as_list())[1:] + slice_i = Lambda( + get_slice, + output_shape=input_shape, + arguments={ + 'i': i, + 'parts': gpus + })(x) + inputs.append(slice_i) + + # Apply model on slice + # (creating a model replica on the target device). + outputs = model(inputs) + if not isinstance(outputs, list): + outputs = [outputs] + + # Save the outputs for merging back together later. + for o in range(len(outputs)): + all_outputs[o].append(outputs[o]) + + # Merge outputs on CPU. + with ops.device('/cpu:0'): + merged = [] + for outputs in all_outputs: + merged.append(concatenate(outputs, axis=0)) + return Model(model.inputs, merged) diff --git a/tensorflow/python/keras/_impl/keras/utils/training_utils_test.py b/tensorflow/python/keras/_impl/keras/utils/training_utils_test.py new file mode 100644 index 0000000000..51fbd041a4 --- /dev/null +++ b/tensorflow/python/keras/_impl/keras/utils/training_utils_test.py @@ -0,0 +1,94 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for multi-gpu training utilities.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + + +from tensorflow.python.keras._impl import keras +from tensorflow.python.platform import test + + +class TestMultiGPUModel(test.TestCase): + + def multi_gpu_test_simple_model(self): + gpus = 2 + num_samples = 1000 + input_dim = 10 + output_dim = 1 + hidden_dim = 10 + epochs = 2 + + with self.test_session(): + model = keras.models.Sequential() + model.add(keras.layers.Dense(hidden_dim, + input_shape=(input_dim,))) + model.add(keras.layers.Dense(output_dim)) + + x = np.random.random((num_samples, input_dim)) + y = np.random.random((num_samples, output_dim)) + parallel_model = keras.utils.multi_gpu_model(model, gpus=gpus) + + parallel_model.compile(loss='mse', optimizer='rmsprop') + parallel_model.fit(x, y, epochs=epochs) + + def multi_gpu_test_multi_io_model(self): + gpus = 2 + num_samples = 1000 + input_dim_a = 10 + input_dim_b = 5 + output_dim_a = 1 + output_dim_b = 2 + hidden_dim = 10 + epochs = 2 + + with self.test_session(): + input_a = keras.Input((input_dim_a,)) + input_b = keras.Input((input_dim_b,)) + a = keras.layers.Dense(hidden_dim)(input_a) + b = keras.layers.Dense(hidden_dim)(input_b) + c = keras.layers.concatenate([a, b]) + output_a = keras.layers.Dense(output_dim_a)(c) + output_b = keras.layers.Dense(output_dim_b)(c) + model = keras.models.Model([input_a, input_b], [output_a, output_b]) + + a_x = np.random.random((num_samples, input_dim_a)) + b_x = np.random.random((num_samples, input_dim_b)) + a_y = np.random.random((num_samples, output_dim_a)) + b_y = np.random.random((num_samples, output_dim_b)) + + parallel_model = keras.utils.multi_gpu_model(model, gpus=gpus) + parallel_model.compile(loss='mse', optimizer='rmsprop') + parallel_model.fit([a_x, b_x], [a_y, b_y], epochs=epochs) + + def multi_gpu_test_invalid_devices(self): + with self.test_session(): + input_shape = (1000, 10) + model = keras.models.Sequential() + model.add(keras.layers.Dense(10, + activation='relu', + input_shape=input_shape[1:])) + model.add(keras.layers.Dense(1, activation='sigmoid')) + model.compile(loss='mse', optimizer='rmsprop') + + x = np.random.random(input_shape) + y = np.random.random((input_shape[0], 1)) + with self.assertRaises(ValueError): + parallel_model = keras.utils.multi_gpu_model( + model, gpus=len(keras.backend._get_available_gpus()) + 1) + parallel_model.fit(x, y, epochs=2) diff --git a/tensorflow/python/keras/utils/__init__.py b/tensorflow/python/keras/utils/__init__.py index a7c2179fe7..91cc860727 100644 --- a/tensorflow/python/keras/utils/__init__.py +++ b/tensorflow/python/keras/utils/__init__.py @@ -32,6 +32,7 @@ from tensorflow.python.keras._impl.keras.utils.io_utils import HDF5Matrix from tensorflow.python.keras._impl.keras.utils.layer_utils import convert_all_kernels_in_model from tensorflow.python.keras._impl.keras.utils.np_utils import normalize from tensorflow.python.keras._impl.keras.utils.np_utils import to_categorical +from tensorflow.python.keras._impl.keras.utils.training_utils import multi_gpu_model from tensorflow.python.keras._impl.keras.utils.vis_utils import plot_model del absolute_import diff --git a/tensorflow/tools/api/golden/tensorflow.keras.utils.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.utils.pbtxt index e840f33142..5a446c09d0 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.utils.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.utils.pbtxt @@ -44,6 +44,10 @@ tf_module { name: "get_file" argspec: "args=[\'fname\', \'origin\', \'untar\', \'md5_hash\', \'file_hash\', \'cache_subdir\', \'hash_algorithm\', \'extract\', \'archive_format\', \'cache_dir\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'None\', \'datasets\', \'auto\', \'False\', \'auto\', \'None\'], " } + member_method { + name: "multi_gpu_model" + argspec: "args=[\'model\', \'gpus\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "normalize" argspec: "args=[\'x\', \'axis\', \'order\'], varargs=None, keywords=None, defaults=[\'-1\', \'2\'], " -- GitLab From 80a3d011807e7b3a9de4d58e082acf2e091d7927 Mon Sep 17 00:00:00 2001 From: Russell Power Date: Mon, 13 Nov 2017 18:19:59 -0800 Subject: [PATCH 0373/1801] Add a model comparison function to TPU test utilities. PiperOrigin-RevId: 175620458 --- tensorflow/contrib/tpu/BUILD | 1 + .../contrib/tpu/python/tpu/test_util.py | 137 +++++++++++++++++- 2 files changed, 130 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index e14c36ae43..64e9d0e765 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -16,6 +16,7 @@ package( "//cloud/vmm/testing/tests/tpu:__subpackages__", "//learning/brain:__subpackages__", "//tensorflow:__subpackages__", + "//third_party/cloud_tpu:__subpackages__", ], ) diff --git a/tensorflow/contrib/tpu/python/tpu/test_util.py b/tensorflow/contrib/tpu/python/tpu/test_util.py index f30c27f129..b83c72d0ff 100644 --- a/tensorflow/contrib/tpu/python/tpu/test_util.py +++ b/tensorflow/contrib/tpu/python/tpu/test_util.py @@ -18,14 +18,26 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.tpu.python.tpu import tpu +import os.path +import pickle +import tempfile + +import numpy as np -from tensorflow.python.client import session +from tensorflow.contrib.tpu.python.tpu import tpu +from tensorflow.contrib.tpu.python.tpu import tpu_config +from tensorflow.contrib.tpu.python.tpu import tpu_estimator +from tensorflow.core.protobuf import config_pb2 +from tensorflow.python.client import session as tf_session +from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import gen_array_ops from tensorflow.python.ops import variables +from tensorflow.python.platform import gfile +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training import saver as tf_saver def has_tpu(): @@ -38,8 +50,9 @@ def has_tpu(): Returns: boolean, True if a TPU device is available, otherwise False. """ + def _check(): - with session.Session() as sess: + with tf_session.Session() as sess: sess.run(tpu.initialize_system()) sess.run(tpu.shutdown_system()) @@ -61,6 +74,111 @@ def _available_devices(): return tuple(devices) +def copy_dir(src, tgt): + """Copy src to tgt.""" + gfile.MakeDirs(tgt) + seen_dirs = set() + for dirname, _, files in gfile.Walk(src): + for f in files: + src_f = os.path.join(dirname, f) + tgt_f = src_f.replace(src, tgt) + tgt_d = os.path.dirname(tgt_f) + if tgt_d not in seen_dirs: + gfile.MkDir(tgt_d) + seen_dirs.add(tgt_d) + gfile.Copy(src_f, tgt_f, overwrite=True) + + +def compare_model(model_fn, input_fn, params, master="local", temp_dir=None, + tolerance=1e-4): + """Compare the results of running `model_fn` on the TPU and CPU.""" + if not temp_dir: + temp_dir = tempfile.mkdtemp() + + cpu_model_dir = "%s/cpu-model" % temp_dir + tpu_model_dir = "%s/tpu-model" % temp_dir + initial_model_dir = "%s/initial-model" % temp_dir + + logging.info("Checkpoints and weights will be written to %s", temp_dir) + + num_steps = 1 + num_shards = 8 + + def _make_run_config(model_dir): + return tpu_config.RunConfig( + master=master, + model_dir=model_dir, + save_checkpoints_secs=10000, + session_config=config_pb2.ConfigProto( + allow_soft_placement=True, log_device_placement=False), + tpu_config=tpu_config.TPUConfig( + iterations_per_loop=num_steps, + num_shards=num_shards, + ), + ) + + def _make_estimator(use_tpu, model_dir): + return tpu_estimator.TPUEstimator( + model_fn=model_fn, + use_tpu=use_tpu, + config=_make_run_config(model_dir), + train_batch_size=num_shards, + params=dict(params, use_tpu=use_tpu), + ) + + def _extract_weights(checkpoint): + """Extract model weights from the given checkpoint file.""" + weights = {} + graph = ops.Graph() + with graph.as_default(): + model_fn( + *input_fn(params), + params=dict(params, use_tpu=False), + mode=model_fn_lib.ModeKeys.TRAIN) + saver = tf_saver.Saver() + with tf_session.Session(graph=graph) as sess: + saver.restore(sess, checkpoint) + all_vars = [] + all_vars.extend(graph.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)) + all_vars.extend(graph.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)) + all_vars.extend(graph.get_collection(ops.GraphKeys.MODEL_VARIABLES)) + + for var in all_vars: + weights[var.name] = sess.run(var) + return weights + + def _run_step(use_tpu, model_dir): + est = _make_estimator(use_tpu=use_tpu, model_dir=model_dir) + est.train(input_fn=input_fn, steps=num_steps) + weights = _extract_weights(est.latest_checkpoint()) + with gfile.Open(temp_dir + "tpu-%d.weights" % use_tpu, "wb") as f: + f.write(pickle.dumps(weights)) + return weights + + # initialize models to the same weights by running a single step on the CPU + _run_step(use_tpu=False, model_dir=initial_model_dir) + + copy_dir(initial_model_dir, cpu_model_dir) + cpu_weights = _run_step(use_tpu=False, model_dir=cpu_model_dir) + + copy_dir(initial_model_dir, tpu_model_dir) + tpu_weights = _run_step(use_tpu=True, model_dir=tpu_model_dir) + + bad_weights = False + for k in cpu_weights: + if k not in tpu_weights: + raise KeyError("Missing weight %s from TPU checkpoint.", k) + + if not np.allclose( + cpu_weights[k], tpu_weights[k], rtol=tolerance, atol=tolerance): + bad_weights = True + logging.error("Weights for layer %s have diverged.", k) + + if bad_weights: + raise ValueError("Some weights have diverged. Output pickle files have " + "been written to %s for inspection." % temp_dir) + + class TPUTestCase(test_util.TensorFlowTestCase): """Adds helpers for testing on TPU devices to `TensorFlowTestCase`. @@ -68,7 +186,7 @@ class TPUTestCase(test_util.TensorFlowTestCase): ``` def model_fn(features): - return tf.reduce_sum(features * 2) + return tf.reduce_sum(features * 2) class ModelTests(test_util.TPUTestCase): def test_sum(self): @@ -97,10 +215,10 @@ class TPUTestCase(test_util.TensorFlowTestCase): Returns: Output from the model function. """ + def _make_placeholders(): - return dict( - [(gen_array_ops.placeholder_with_default(v, v.shape), v) - for v in model_inputs]) + return dict([(gen_array_ops.placeholder_with_default(v, v.shape), v) + for v in model_inputs]) if device == "tpu": with self.test_session(graph=ops.Graph()) as sess: @@ -133,7 +251,10 @@ class TPUTestCase(test_util.TensorFlowTestCase): else: self.assertAllCloseAccordingToType(actual_outputs, expected_outputs) - def assert_device_output(self, model_fn, model_inputs, expected_outputs, + def assert_device_output(self, + model_fn, + model_inputs, + expected_outputs, devices=("cpu", "gpu", "tpu")): """Run `model_fn` on the given devices. -- GitLab From 579276a0d39127d221260697f0f34151f7e66f4c Mon Sep 17 00:00:00 2001 From: James Qin Date: Mon, 13 Nov 2017 18:30:07 -0800 Subject: [PATCH 0374/1801] Delete infeasible TODO in CudnnRNN Nvidia CuDNN7 still uses old GRU equations. PiperOrigin-RevId: 175621353 --- tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py index 6c526b2c75..dcd3d4732a 100644 --- a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py +++ b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py @@ -105,9 +105,6 @@ class CudnnCompatibleGRUCell(rnn_cell_impl.GRUCell): ```python r .* (h * R) != (r .* h) * R ``` - - TODO(jamesqin): update the impl after Cudnn 7.1 when Nvidia would adopt the - canonical version compatible with other tf GRU cells. """ def __init__(self, num_units, reuse=None, kernel_initializer=None): -- GitLab From f9e3e8d8731daf338b6dc743aef84c35740ca037 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 18:34:37 -0800 Subject: [PATCH 0375/1801] Hlo parser: support fusion. Also, - Add a HloInstruction::CreateFusion interface that creates a fusion instruction with given fusion computation. Add a HloComputation::SetFusionInstruction interface to help do that. - Change how we print fusion kind. Before this change we print fusion kind together with the opcode, e.g., fusion:kLoop, which is not easy to parse. Now we append fusion kind as an attribute. - Print fusion computation the same way as other computations, instead of nested in an instruction. PiperOrigin-RevId: 175621768 --- .../compiler/xla/service/hlo_computation.cc | 5 --- .../compiler/xla/service/hlo_computation.h | 3 ++ .../compiler/xla/service/hlo_graph_dumper.cc | 9 +++-- .../compiler/xla/service/hlo_instruction.cc | 28 +++++++++------ .../compiler/xla/service/hlo_instruction.h | 10 +++--- .../xla/service/hlo_instruction_test.cc | 7 ++-- tensorflow/compiler/xla/service/hlo_module.cc | 6 ---- .../compiler/xla/tools/parser/hlo_lexer.cc | 10 ++++++ .../compiler/xla/tools/parser/hlo_lexer.h | 6 ++++ .../compiler/xla/tools/parser/hlo_parser.cc | 35 ++++++++++++++++++- .../xla/tools/parser/hlo_parser_test.cc | 20 +++++++++++ .../compiler/xla/tools/parser/hlo_token.h | 1 + 12 files changed, 107 insertions(+), 33 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc index 8f595b45e9..8056bcf0f7 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.cc +++ b/tensorflow/compiler/xla/service/hlo_computation.cc @@ -385,11 +385,6 @@ string HloComputation::ToString(int nested_level, /*include_metadata=*/true, /*include_large_constants=*/include_large_constants) << "\n"; - if (instruction->opcode() == HloOpcode::kFusion) { - s << instruction->fused_instructions_computation()->ToString( - nested_level + 1, include_large_constants) - << "\n"; - } } for (int i = 0; i < nested_level; i++) { s << " "; diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h index c9782cc981..2835dbbb84 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.h +++ b/tensorflow/compiler/xla/service/hlo_computation.h @@ -326,6 +326,9 @@ class HloComputation { // Returns the owning fusion instruction, or nullptr if this is not a fusion // computation. HloInstruction* FusionInstruction() const { return fusion_instruction_; } + void SetFusionInstruction(HloInstruction* fusion_instruction) { + fusion_instruction_ = fusion_instruction; + } private: explicit HloComputation( diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index e4c89cd8c1..881b7e227c 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -1001,10 +1001,13 @@ string HloDotDumper::GetInstructionNodeLabel(const HloInstruction* instr) { .starts_with(StrCat("%", HloOpcodeString(instr->opcode())))) { return Printf("%s", HtmlLikeStringSanitize(instr->name())); } - + string extended_opcode = + StrCat(HloOpcodeString(instr->opcode()), + instr->opcode() == HloOpcode::kFusion + ? "" + : StrCat(":", xla::ToString(instr->fusion_kind()))); // If the name does not contain the opcode, render both. - return Printf("%s
%s", - HtmlLikeStringSanitize(instr->ExtendedOpcodeStr()), + return Printf("%s
%s", HtmlLikeStringSanitize(extended_opcode), HtmlLikeStringSanitize(instr->name())); } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 1e83c69b50..d3096231dc 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -648,6 +648,20 @@ HloInstruction::CreateSelectAndScatter( return instruction; } +/* static */ std::unique_ptr HloInstruction::CreateFusion( + const Shape& shape, FusionKind fusion_kind, + tensorflow::gtl::ArraySlice operands, + HloComputation* fusion_computation) { + auto instruction = WrapUnique(new HloInstruction(HloOpcode::kFusion, shape)); + for (auto operand : operands) { + instruction->AppendOperand(operand); + } + instruction->fusion_kind_ = fusion_kind; + instruction->called_computations_.push_back(fusion_computation); + fusion_computation->SetFusionInstruction(instruction.get()); + return instruction; +} + /* static */ std::unique_ptr HloInstruction::CreateFusionForBackwardConvolution( const Shape& shape, FusionKind fusion_kind, const Window& window, @@ -1805,20 +1819,11 @@ string HloInstruction::SignatureString() const { return StrCat("(", operands, ") -> ", ShapeUtil::HumanString(shape())); } -string HloInstruction::ExtendedOpcodeStr() const { - string opc_name = HloOpcodeString(opcode()); - HloOpcode opc = opcode(); - if (HloOpcode::kFusion == opc) { - opc_name += ":" + xla::ToString(fusion_kind()); - } - return opc_name; -} - string HloInstruction::ToString(bool compact_operands, bool include_metadata, bool include_large_constants) const { string result = StrCat(name(), " = ", ShapeUtil::HumanStringWithLayout(shape()), " ", - ExtendedOpcodeStr(), "(", + HloOpcodeString(opcode()), "(", OperandsToString(compact_operands, include_large_constants), ")"); for (const string& extra : ExtraAttributesToString()) { StrAppend(&result, ", ", extra); @@ -1882,6 +1887,9 @@ string HloInstruction::OperandsToString(bool compact, std::vector HloInstruction::ExtraAttributesToString() const { std::vector extra; + if (opcode() == HloOpcode::kFusion) { + extra.push_back(StrCat("kind=", xla::ToString(fusion_kind()))); + } if (CanHaveDimensionsField()) { extra.push_back(StrCat("dimensions={", Join(dimensions(), ","), "}")); } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 90293016ab..6b2762ff14 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -312,6 +312,11 @@ class HloInstruction { static std::unique_ptr CreateFusion( const Shape& shape, FusionKind fusion_kind, HloInstruction* fused_root); + static std::unique_ptr CreateFusion( + const Shape& shape, FusionKind fusion_kind, + tensorflow::gtl::ArraySlice operands, + HloComputation* fusion_computation); + // Creates a fusion instruction that represents backward convolution. This is // similar to CreateFusion, but with extra arguments indicating the window and // dimemsion mapping of the backward convolution. @@ -977,11 +982,6 @@ class HloInstruction { std::tuple, std::vector> ReshapeMerelyInsertsOrDeletes1SizedDimensions() const; - // Returns the opcode string for this instruction. This is the result from - // HloOpcodeString plus, for fusion nodes, the fusion kind, separated by a - // ':'. - string ExtendedOpcodeStr() const; - // Returns a string identifier for this instruction. If no string identifier // has been explicitly set, then the identifier is the serialized pointer to // this instruction. diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc index 4ead64d997..41b916e2c7 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc @@ -1195,9 +1195,10 @@ TEST_F(HloInstructionTest, Stringification) { HloInstruction* fusion = computation->CreateFusionInstruction( {dot, reshape}, HloInstruction::FusionKind::kTransposeDot); - EXPECT_EQ(fusion->ToString(false, false), - "%fusion = f32[5,20]{1,0} fusion:kTransposeDot(f32[5,10]{1,0} %x, " - "f32[20,10]{1,0} %y), calls=%fused_computation"); + EXPECT_EQ( + fusion->ToString(false, false), + "%fusion = f32[5,20]{1,0} fusion(f32[5,10]{1,0} %x, " + "f32[20,10]{1,0} %y), kind=kTransposeDot, calls=%fused_computation"); HloInstruction* loop = builder.AddInstruction( HloInstruction::CreateWhile(sout, computation, computation, x)); diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index 659f3d8c26..d9c223fbba 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -174,12 +174,6 @@ string HloModule::ToString(bool include_large_constants) const { std::ostringstream s; s << "HloModule " << name() << ":\n\n"; for (const HloComputation* computation : MakeComputationPostOrder()) { - // Fusion computations are emitted with their fusion instruction and - // therefore don't need to be emitted as a separate comptutation in the - // module. - if (computation->IsFusionComputation()) { - continue; - } if (computation == entry_computation()) { s << "ENTRY "; } diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc index 098879155a..0140c121f8 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc @@ -17,6 +17,7 @@ limitations under the License. #include +#include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/util.h" @@ -226,6 +227,13 @@ TokKind HloLexer::LexIdentifier() { return TokKind::kOpcode; } + // See if this is an fusion kind. + auto kind = xla::StringToFusionKind(identifier.ToString()); + if (kind.ok()) { + fusion_kind_val_ = kind.ValueOrDie(); + return TokKind::kFusionKind; + } + { auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end()); static LazyRE2 dim_labels_pattern = { @@ -426,6 +434,8 @@ string TokKindToString(TokKind kind) { return "kShape"; case TokKind::kOpcode: return "kOpcode"; + case TokKind::kFusionKind: + return "kFusionKind"; case TokKind::kInt: return "kInt"; case TokKind::kDecimal: diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h index 2236c26619..5c9d1bf391 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h +++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h @@ -18,6 +18,7 @@ limitations under the License. #include +#include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/tools/parser/hlo_token.h" #include "tensorflow/compiler/xla/xla_data.pb.h" @@ -60,6 +61,10 @@ class HloLexer { CHECK(GetKind() == TokKind::kOpcode); return opcode_val_; } + HloInstruction::FusionKind GetFusionKindVal() const { + CHECK(GetKind() == TokKind::kFusionKind); + return fusion_kind_val_; + } int64 GetInt64Val() const { CHECK(GetKind() == TokKind::kInt); return int64_val_; @@ -110,6 +115,7 @@ class HloLexer { string str_val_; Shape shape_val_; HloOpcode opcode_val_; + HloInstruction::FusionKind fusion_kind_val_; int64 int64_val_; double decimal_val_; }; diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index ac7d9ff482..3e3406e658 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -103,6 +103,7 @@ class HloParser { kSliceRanges, kPaddingConfig, kMetadata, + kFusionKind, }; struct AttrConfig { @@ -172,6 +173,7 @@ class HloParser { bool ParseString(string* result); bool ParseShape(Shape* result); bool ParseOpcode(HloOpcode* result); + bool ParseFusionKind(HloInstruction::FusionKind* result); bool ParseInt64(int64* result); bool ParseDouble(double* result); bool ParseBool(bool* result); @@ -761,10 +763,22 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, shape, operands[0], /*padding_value=*/operands[1], *padding)); break; } + case HloOpcode::kFusion: { + optional fusion_computation; + attrs["calls"] = {/*required=*/true, AttrTy::kHloComputation, + &fusion_computation}; + optional fusion_kind; + attrs["kind"] = {/*required=*/true, AttrTy::kFusionKind, &fusion_kind}; + if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreateFusion( + shape, *fusion_kind, operands, *fusion_computation)); + break; + } case HloOpcode::kCustomCall: case HloOpcode::kReducePrecision: case HloOpcode::kRng: - case HloOpcode::kFusion: case HloOpcode::kInfeed: case HloOpcode::kOutfeed: case HloOpcode::kTrace: @@ -1450,6 +1464,15 @@ bool HloParser::ParseAttributeHelper( ->emplace(result); return true; } + case AttrTy::kFusionKind: { + HloInstruction::FusionKind result; + if (!ParseFusionKind(&result)) { + return false; + } + static_cast*>(attr_out_ptr) + ->emplace(result); + return true; + } case AttrTy::kBracedInt64List: { std::vector result; if (!ParseInt64List(TokKind::kLbrace, TokKind::kRbrace, TokKind::kComma, @@ -1977,6 +2000,16 @@ bool HloParser::ParseOpcode(HloOpcode* result) { return true; } +bool HloParser::ParseFusionKind(HloInstruction::FusionKind* result) { + VLOG(1) << "ParseFusionKind"; + if (lexer_.GetKind() != TokKind::kFusionKind) { + return TokenError("expects fusion kind"); + } + *result = lexer_.GetFusionKindVal(); + lexer_.Lex(); + return true; +} + bool HloParser::ParseInt64(int64* result) { VLOG(1) << "ParseInt64"; if (lexer_.GetKind() != TokKind::kInt) { diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index f41bb9e5cf..8eeed339b8 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -540,6 +540,26 @@ ENTRY %PadHasInterior.v3 (input: f32[1,25,7,7]) -> f32[1,25,17,11] { ROOT %pad = f32[1,25,17,11]{3,2,1,0} pad(f32[1,25,7,7]{3,2,1,0} %input, f32[] %constant), padding=0_0_0x0_0_0x2_2_1x2_2_0 } +)" +}, +// fusion +{ +"Fusion", +R"(HloModule fusion_module: + +%fused_computation (constant.param_0: f32[3,2,1,1], constant.1.param_1: f32[2]) -> f32[3,2,1,1] { + %constant.param_0 = f32[3,2,1,1]{3,2,1,0} parameter(0) + %constant.1.param_1 = f32[2]{0} parameter(1) + %broadcast = f32[3,2,1,1]{3,2,1,0} broadcast(f32[2]{0} %constant.1.param_1), dimensions={1} + ROOT %subtract = f32[3,2,1,1]{3,2,1,0} subtract(f32[3,2,1,1]{3,2,1,0} %constant.param_0, f32[3,2,1,1]{3,2,1,0} %broadcast) +} + +ENTRY %fusion.v3 () -> f32[3,2,1,1] { + %constant = f32[3,2,1,1]{3,2,1,0} constant(f32[3,2,1,1] { { /*i0=0*/ { /*i1=0*/ {-1} }, { /*i1=1*/ {4.1} } }, { /*i0=1*/ { /*i1=0*/ {2} }, { /*i1=1*/ {4.1} } }, { /*i0=2*/ { /*i1=0*/ {5} }, { /*i1=1*/ {4.4} } } }) + %constant.1 = f32[2]{0} constant({3.14, 4.25}) + ROOT %fusion = f32[3,2,1,1]{3,2,1,0} fusion(f32[3,2,1,1]{3,2,1,0} %constant, f32[2]{0} %constant.1), kind=kLoop, calls=%fused_computation +} + )" } }); diff --git a/tensorflow/compiler/xla/tools/parser/hlo_token.h b/tensorflow/compiler/xla/tools/parser/hlo_token.h index 78a72837ca..181760bdeb 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_token.h +++ b/tensorflow/compiler/xla/tools/parser/hlo_token.h @@ -63,6 +63,7 @@ enum class TokKind { kString, // "abcd\"\n" kShape, // f32[2,3]{1,0} kOpcode, // add + kFusionKind, // kLoop, kOutput, ... kInt, // 42 kDecimal, // 4.2 }; -- GitLab From 7f02a3cddf08fa63a279a89ada2600d18399c383 Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Mon, 13 Nov 2017 19:08:03 -0800 Subject: [PATCH 0376/1801] Re-order arguments on the replicated model_fn. This supports the use cases that call Estimator's model_fn via positional arguments. The right order is defined by Estimator as follows: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/estimator/estimator.py#L102. PiperOrigin-RevId: 175624067 --- .../python/estimator/replicate_model_fn.py | 2 +- .../estimator/replicate_model_fn_test.py | 32 +++++++++---------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py index 0848c5f62f..dcc48d1fd9 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py @@ -145,7 +145,7 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None): 'server device is going to be {}.'.format( devices, local_ps_device)) - def replicated_model_fn(mode, features, labels, params=None, config=None): + def replicated_model_fn(features, labels, mode, params=None, config=None): """Replicated version of `model_fn` to be used instead.""" feature_shards, label_shards = _split_batch( features, labels, len(devices), device=local_ps_device) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py index 7fb1065ac0..5a1982f5eb 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py @@ -189,8 +189,8 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): with self.test_session() as session: replicated_model_fn = replicate_model_fn.replicate_model_fn( self.model_fn, self.optimizer_fn, devices=['/gpu:0', '/gpu:1']) - estimator_spec = replicated_model_fn(model_fn_lib.ModeKeys.TRAIN, - features, labels, self.params) + estimator_spec = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) session.run(variables.global_variables_initializer()) # loss = feature * c - label @@ -219,8 +219,8 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): devices=['/gpu:0', '/gpu:1']) # This call is going to fail if `replicated_model_fn` is still passing # `params` inside `optimizer_fn`, even though the latter doesn't take any: - estimator_spec = replicated_model_fn(model_fn_lib.ModeKeys.TRAIN, - features, labels, self.params) + estimator_spec = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) del estimator_spec def test_eval(self): @@ -230,8 +230,8 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): with self.test_session() as session: replicated_model_fn = replicate_model_fn.replicate_model_fn( self.model_fn, self.optimizer_fn, devices=['/gpu:0', '/gpu:1']) - estimator_spec = replicated_model_fn(model_fn_lib.ModeKeys.EVAL, features, - labels, self.params) + estimator_spec = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.EVAL, self.params) session.run(variables.local_variables_initializer()) session.run(variables.global_variables_initializer()) @@ -259,8 +259,8 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): with self.test_session() as session: replicated_model_fn = replicate_model_fn.replicate_model_fn( self.model_fn, self.optimizer_fn, devices=['/gpu:0', '/gpu:1']) - estimator_spec = replicated_model_fn(model_fn_lib.ModeKeys.PREDICT, - features, labels, self.params) + estimator_spec = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.PREDICT, self.params) session.run(variables.global_variables_initializer()) self.assertAllClose({ @@ -274,8 +274,8 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): with self.test_session() as session: replicated_model_fn = replicate_model_fn.replicate_model_fn( self.model_fn, self.optimizer_fn) - estimator_spec = replicated_model_fn(model_fn_lib.ModeKeys.TRAIN, - features, labels, self.params) + estimator_spec = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) session.run(variables.global_variables_initializer()) # loss = feature * c - label @@ -296,8 +296,8 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): with self.test_session() as session: replicated_model_fn = replicate_model_fn.replicate_model_fn( self.model_fn, self.optimizer_fn, devices=['/gpu:0']) - estimator_spec = replicated_model_fn(model_fn_lib.ModeKeys.EVAL, features, - labels, self.params) + estimator_spec = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.EVAL, self.params) session.run(variables.local_variables_initializer()) session.run(variables.global_variables_initializer()) @@ -324,8 +324,8 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): with self.test_session() as session: replicated_model_fn = replicate_model_fn.replicate_model_fn( self.model_fn, self.optimizer_fn, devices=['/gpu:0']) - estimator_spec = replicated_model_fn(model_fn_lib.ModeKeys.PREDICT, - features, labels, self.params) + estimator_spec = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.PREDICT, self.params) session.run(variables.global_variables_initializer()) self.assertAllClose({ @@ -778,8 +778,8 @@ class MergeExportOutputsTest(test_util.TensorFlowTestCase): replicated_model_fn = replicate_model_fn.replicate_model_fn( self.model_fn, self.optimizer_fn, devices=['/gpu:0', '/gpu:1']) - estimator_spec = replicated_model_fn(model_fn_lib.ModeKeys.PREDICT, - features, labels, {}) + estimator_spec = replicated_model_fn(features, labels, + model_fn_lib.ModeKeys.PREDICT, {}) session.run(variables.global_variables_initializer()) return estimator_spec -- GitLab From 2b3ed91c200f33c2edc5e0bf01d1b217c7db09fd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 19:35:19 -0800 Subject: [PATCH 0377/1801] Automated g4 rollback of changelist 149812908 PiperOrigin-RevId: 175626065 --- tensorflow/python/platform/app.py | 4 ---- tensorflow/python/platform/benchmark.py | 9 ++------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/platform/app.py b/tensorflow/python/platform/app.py index c01e1c9b1a..1d8acf3f00 100644 --- a/tensorflow/python/platform/app.py +++ b/tensorflow/python/platform/app.py @@ -25,10 +25,6 @@ from tensorflow.python.platform import flags from tensorflow.python.util.all_util import remove_undocumented -def _benchmark_tests_can_log_memory(): - return True - - def _usage(shorthelp): """Writes __main__'s docstring to stdout with some help text. diff --git a/tensorflow/python/platform/benchmark.py b/tensorflow/python/platform/benchmark.py index 392921abb4..837bca1dbd 100644 --- a/tensorflow/python/platform/benchmark.py +++ b/tensorflow/python/platform/benchmark.py @@ -43,8 +43,6 @@ GLOBAL_BENCHMARK_REGISTRY = set() # See also tensorflow/core/util/reporter.h TestReporter::kTestReporterEnv. TEST_REPORTER_TEST_ENV = "TEST_REPORT_FILE_PREFIX" -_benchmark_tests_can_log_memory = app._benchmark_tests_can_log_memory # pylint: disable=protected-access - def _global_report_benchmark( name, iters=None, cpu_time=None, wall_time=None, @@ -216,9 +214,8 @@ class TensorFlowBenchmark(Benchmark): store the trace of iteration in the benchmark report. The trace will be stored as a string in Google Chrome trace format in the extras field "full_trace_chrome_format". - store_memory_usage: Boolean, whether to run an extra - untimed iteration, calculate memory usage, and store that in extras - fields. + store_memory_usage: Boolean, whether to run an extra untimed iteration, + calculate memory usage, and store that in extras fields. name: (optional) Override the BenchmarkEntry name with `name`. Otherwise it is inferred from the top-level method name. extras: (optional) Dict mapping string keys to additional benchmark info. @@ -230,8 +227,6 @@ class TensorFlowBenchmark(Benchmark): A `dict` containing the key-value pairs that were passed to `report_benchmark`. """ - store_memory_usage &= _benchmark_tests_can_log_memory() - for _ in range(burn_iters): sess.run(op_or_tensor, feed_dict=feed_dict) -- GitLab From fe3a35a12304c4a6eeaffc208e3c8a006d80455f Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 13 Nov 2017 20:31:33 -0800 Subject: [PATCH 0378/1801] Avoid overwriting LLVM IR files dumped from the same process. PiperOrigin-RevId: 175629699 --- tensorflow/compiler/xla/service/llvm_ir/BUILD | 1 + .../compiler/xla/service/llvm_ir/llvm_util.cc | 20 ++++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD index 8f24bb1718..d878061f72 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/BUILD +++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD @@ -48,6 +48,7 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:name_uniquer", "//tensorflow/core:lib", "@llvm//:core", "@llvm//:support", diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc index 086c8dae9e..cd0c4a371e 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc @@ -25,6 +25,7 @@ limitations under the License. #include "llvm/Target/TargetOptions.h" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/name_uniquer.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" @@ -644,14 +645,27 @@ std::map MergeMetadata( return result; } +static string GetProcessUniqueIrFileName(tensorflow::StringPiece prefix) { + static tensorflow::mutex mu(tensorflow::LINKER_INITIALIZED); + static NameUniquer* uniquer = new NameUniquer(/*separator=*/"-"); + + tensorflow::mutex_lock lock(mu); + return uniquer->GetUniqueName(prefix); +} + Status DumpIRToDirectory(const string& directory_name, const string& hlo_module_name, const llvm::Module& llvm_module, bool optimized) { - string safe_file_name_base = SanitizeFileName(hlo_module_name); + // We can end up compiling different modules with the same name when using + // XlaJitCompiledCpuFunction::Compile. Avoid overwriting IR files previously + // dumped from the same process in such cases. + string unique_and_safe_file_name = GetProcessUniqueIrFileName( + tensorflow::strings::StrCat("ir-", SanitizeFileName(hlo_module_name), "-", + optimized ? "with" : "no", "-opt")); + string ir_file_name = tensorflow::io::JoinPath( directory_name, - tensorflow::strings::StrCat("ir-", safe_file_name_base, "-", - optimized ? "with" : "no", "-opt.ll")); + tensorflow::strings::StrCat(unique_and_safe_file_name, ".ll")); std::unique_ptr f; TF_RETURN_IF_ERROR( -- GitLab From c408c299eb7b1d31ba64b11d8ccf149a43a9ae58 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Mon, 13 Nov 2017 20:53:37 -0800 Subject: [PATCH 0379/1801] Add correct gradle repository. (#14525) --- tensorflow/contrib/lite/java/demo/app/build.gradle | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/java/demo/app/build.gradle b/tensorflow/contrib/lite/java/demo/app/build.gradle index e1470fe717..b76eaad8bb 100644 --- a/tensorflow/contrib/lite/java/demo/app/build.gradle +++ b/tensorflow/contrib/lite/java/demo/app/build.gradle @@ -36,8 +36,8 @@ android { } repositories { - flatDir { - dirs 'libs' + maven { + url 'https://google.bintray.com/tensorflow' } } -- GitLab From b10d5b2545eb2302c370c3d472099b3afd6baba5 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 13 Nov 2017 21:05:09 -0800 Subject: [PATCH 0380/1801] [XLA:CPU/GPU] Implement multi-module compilation for the CPU and GPU backends For CPU and GPU this is a simple wrapper around the single-module Compile method since the CPU and GPU backends do not perform cross-module optimizations and analyses. PiperOrigin-RevId: 175631791 --- tensorflow/compiler/xla/service/BUILD | 1 + .../compiler/xla/service/cpu/cpu_compiler.cc | 7 ---- .../compiler/xla/service/cpu/cpu_compiler.h | 12 +++--- .../compiler/xla/service/gpu/gpu_compiler.cc | 7 ---- .../compiler/xla/service/gpu/gpu_compiler.h | 12 +++--- .../compiler/xla/service/llvm_compiler.cc | 37 +++++++++++++++++++ .../compiler/xla/service/llvm_compiler.h | 11 ++++++ .../compiler/xla/tests/llvm_compiler_test.cc | 28 ++++++++++++++ 8 files changed, 91 insertions(+), 24 deletions(-) create mode 100644 tensorflow/compiler/xla/service/llvm_compiler.cc diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index c9828d8641..4ff8302568 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -632,6 +632,7 @@ cc_library( cc_library( name = "llvm_compiler", + srcs = ["llvm_compiler.cc"], hdrs = ["llvm_compiler.h"], deps = [ ":compiler", diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index f46764cba0..d2202252d9 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -663,13 +663,6 @@ StatusOr> CpuCompiler::Compile( return std::move(cpu_executable); } -StatusOr>> CpuCompiler::Compile( - std::vector> modules, - std::vector> stream_execs) { - return Unimplemented( - "Compilation of multiple HLO modules is not yet supported on CPU."); -} - StatusOr>> CpuCompiler::CompileAheadOfTime(std::vector> modules, const AotCompilationOptions& aot_options) { diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h index d091302474..963aced208 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h @@ -109,15 +109,17 @@ class CpuCompiler : public LLVMCompiler { CpuCompiler(); ~CpuCompiler() override {} + // Bring in + // StatusOr>> Compile( + // std::vector> modules, + // std::vector> + // stream_execs) + using LLVMCompiler::Compile; + StatusOr> Compile( std::unique_ptr module, perftools::gputools::StreamExecutor* stream_exec) override; - StatusOr>> Compile( - std::vector> modules, - std::vector> - stream_execs) override; - StatusOr>> CompileAheadOfTime(std::vector> modules, const AotCompilationOptions& options) override; diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index b77f75ff79..6a0eacc66a 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -520,13 +520,6 @@ std::vector GpuCompiler::CompilePtxOrGetCachedResult(const string& ptx, return cache_value->cubin_data; } -StatusOr>> GpuCompiler::Compile( - std::vector> modules, - std::vector> stream_execs) { - return Unimplemented( - "Compilation of multiple HLO modules is not yet supported on GPU."); -} - StatusOr>> GpuCompiler::CompileAheadOfTime(std::vector> module, const AotCompilationOptions& options) { diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.h b/tensorflow/compiler/xla/service/gpu/gpu_compiler.h index ee67e65caf..fe5fce615f 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.h @@ -42,15 +42,17 @@ class GpuCompiler : public LLVMCompiler { GpuCompiler(); ~GpuCompiler() override {} + // Bring in + // StatusOr>> Compile( + // std::vector> modules, + // std::vector> + // stream_execs) + using LLVMCompiler::Compile; + StatusOr> Compile( std::unique_ptr module, perftools::gputools::StreamExecutor* stream_exec) override; - StatusOr>> Compile( - std::vector> modules, - std::vector> - stream_execs) override; - StatusOr>> CompileAheadOfTime(std::vector> module, AotCompilationOptions const& options) override; diff --git a/tensorflow/compiler/xla/service/llvm_compiler.cc b/tensorflow/compiler/xla/service/llvm_compiler.cc new file mode 100644 index 0000000000..ba0304fb8c --- /dev/null +++ b/tensorflow/compiler/xla/service/llvm_compiler.cc @@ -0,0 +1,37 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/llvm_compiler.h" + +namespace xla { +StatusOr>> LLVMCompiler::Compile( + std::vector> modules, + std::vector> + stream_execs) { + std::vector> result; + for (size_t i = 0; i < modules.size(); i++) { + if (stream_execs[i].size() != 1) { + return Unimplemented( + "Model partitioning not implemented for the CPU/GPU compilers!"); + } + + TF_ASSIGN_OR_RETURN(std::unique_ptr executable, + Compile(std::move(modules[i]), stream_execs[i][0])); + result.push_back(std::move(executable)); + } + + return {std::move(result)}; +} +} // namespace xla diff --git a/tensorflow/compiler/xla/service/llvm_compiler.h b/tensorflow/compiler/xla/service/llvm_compiler.h index b2e72871c1..c4f689eabe 100644 --- a/tensorflow/compiler/xla/service/llvm_compiler.h +++ b/tensorflow/compiler/xla/service/llvm_compiler.h @@ -57,6 +57,17 @@ class LLVMCompiler : public Compiler { void RemovePostOptimizationHook() { user_post_optimization_hook_ = nullptr; } + // Bring in + // StatusOr> Compile( + // std::unique_ptr module, + // perftools::gputools::StreamExecutor* executor) + using Compiler::Compile; + + StatusOr>> Compile( + std::vector> modules, + std::vector> + stream_execs) override; + protected: ModuleHook user_pre_optimization_hook_; ModuleHook user_post_optimization_hook_; diff --git a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc index 70d8b764a3..62fab6a224 100644 --- a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc +++ b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc @@ -82,6 +82,25 @@ class LLVMCompilerTest : public ::testing::Test { EXPECT_EQ(1, post_opt_hook_call_count); } + void TestMultiModuleCompilation(LLVMCompiler *compiler) { + HloComputation::Builder builder(TestName()); + builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(42.0))); + + std::unique_ptr hlo_module = CreateNewModule(); + hlo_module->AddEntryComputation(builder.Build()); + + std::vector> modules; + modules.push_back(hlo_module->Clone()); + modules.push_back(std::move(hlo_module)); + + std::vector> executors; + executors.push_back({backend_->default_stream_executor()}); + executors.push_back({backend_->default_stream_executor()}); + + EXPECT_IS_OK(compiler->Compile(std::move(modules), std::move(executors))); + } + private: Platform *FindPlatform() { for (Platform *platform : @@ -128,5 +147,14 @@ TEST_F(GpuCompilerTest, HooksTest) { TestCompilerHooks(&compiler); } +TEST_F(CpuCompilerTest, MultiModuleCompilation) { + cpu::CpuCompiler compiler; + TestMultiModuleCompilation(&compiler); +} + +TEST_F(GpuCompilerTest, MultModuleCompilation) { + gpu::GpuCompiler compiler; + TestMultiModuleCompilation(&compiler); +} } // namespace } // namespace xla -- GitLab From 553727cf58f47b42718424f3a8492c6b723400e2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 21:09:30 -0800 Subject: [PATCH 0381/1801] Fix a bug in ParallelInterleaveDataset, which expands dictionary output to dictionary keys. PiperOrigin-RevId: 175632077 --- tensorflow/contrib/data/python/ops/interleave_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/data/python/ops/interleave_ops.py b/tensorflow/contrib/data/python/ops/interleave_ops.py index 4adec46706..830642c040 100644 --- a/tensorflow/contrib/data/python/ops/interleave_ops.py +++ b/tensorflow/contrib/data/python/ops/interleave_ops.py @@ -47,7 +47,7 @@ class ParallelInterleaveDataset(dataset_ops.Dataset): nested_args = nest.pack_sequence_as(input_dataset.output_types, args) nested_args = sparse.deserialize_sparse_tensors( nested_args, input_dataset.output_types) - if nest.is_sequence(nested_args): + if dataset_ops._should_unpack_args(nested_args): # pylint: disable=protected-access dataset = map_func(*nested_args) else: dataset = map_func(nested_args) -- GitLab From c303f4e7411a9537b42237630710f9b7e27ac6fa Mon Sep 17 00:00:00 2001 From: Tim Date: Tue, 14 Nov 2017 05:36:05 +0000 Subject: [PATCH 0382/1801] Fix XLA compilation on OSX (#14288) The double versions of these functions are overloaded on OSX which means we need an explicit cast to disambiguate them. Fixes #14127. --- .../xla/service/cpu/simple_orc_jit.cc | 130 +++++++++--------- 1 file changed, 67 insertions(+), 63 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc index fdf02e5b42..462614475f 100644 --- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc +++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc @@ -210,71 +210,75 @@ bool RegisterKnownJITSymbols() { #undef REGISTER_CPU_RUNTIME_SYMBOL -#define REGISTER_LIBM_SYMBOL(name) \ - do { \ - /* Register both the F32 and F64 variants of the libm symbol. */ \ - registry->Register(#name "f", reinterpret_cast(name##f)); \ - registry->Register(#name, reinterpret_cast(name)); \ +// Register both the f32 (float) and f64 (double) versions of a libm symbol. +// Unfortunately the double versions are overloaded on some systems, e.g. +// Mac so we need an explicit cast. This requires passing the function signature +// for that case. +#define REGISTER_LIBM_SYMBOL(name, double_sig) \ + do { \ + registry->Register(#name "f", reinterpret_cast(name##f)); \ + registry->Register( \ + #name, reinterpret_cast(static_cast(name))); \ } while (false) - REGISTER_LIBM_SYMBOL(acos); - REGISTER_LIBM_SYMBOL(acosh); - REGISTER_LIBM_SYMBOL(asin); - REGISTER_LIBM_SYMBOL(asinh); - REGISTER_LIBM_SYMBOL(atan); - REGISTER_LIBM_SYMBOL(atan2); - REGISTER_LIBM_SYMBOL(atanh); - REGISTER_LIBM_SYMBOL(cbrt); - REGISTER_LIBM_SYMBOL(ceil); - REGISTER_LIBM_SYMBOL(copysign); - REGISTER_LIBM_SYMBOL(cos); - REGISTER_LIBM_SYMBOL(cosh); - REGISTER_LIBM_SYMBOL(erf); - REGISTER_LIBM_SYMBOL(erfc); - REGISTER_LIBM_SYMBOL(exp); - REGISTER_LIBM_SYMBOL(exp2); - REGISTER_LIBM_SYMBOL(expm1); - REGISTER_LIBM_SYMBOL(fabs); - REGISTER_LIBM_SYMBOL(fdim); - REGISTER_LIBM_SYMBOL(floor); - REGISTER_LIBM_SYMBOL(fma); - REGISTER_LIBM_SYMBOL(fmax); - REGISTER_LIBM_SYMBOL(fmin); - REGISTER_LIBM_SYMBOL(fmod); - REGISTER_LIBM_SYMBOL(frexp); - REGISTER_LIBM_SYMBOL(hypot); - REGISTER_LIBM_SYMBOL(ilogb); - REGISTER_LIBM_SYMBOL(ldexp); - REGISTER_LIBM_SYMBOL(lgamma); - REGISTER_LIBM_SYMBOL(llrint); - REGISTER_LIBM_SYMBOL(llround); - REGISTER_LIBM_SYMBOL(log); - REGISTER_LIBM_SYMBOL(log10); - REGISTER_LIBM_SYMBOL(log1p); - REGISTER_LIBM_SYMBOL(log2); - REGISTER_LIBM_SYMBOL(logb); - REGISTER_LIBM_SYMBOL(lrint); - REGISTER_LIBM_SYMBOL(lround); - REGISTER_LIBM_SYMBOL(modf); - REGISTER_LIBM_SYMBOL(nan); - REGISTER_LIBM_SYMBOL(nearbyint); - REGISTER_LIBM_SYMBOL(nextafter); - REGISTER_LIBM_SYMBOL(nexttoward); - REGISTER_LIBM_SYMBOL(pow); - REGISTER_LIBM_SYMBOL(remainder); - REGISTER_LIBM_SYMBOL(remquo); - REGISTER_LIBM_SYMBOL(rint); - REGISTER_LIBM_SYMBOL(round); - REGISTER_LIBM_SYMBOL(scalbln); - REGISTER_LIBM_SYMBOL(scalbn); - REGISTER_LIBM_SYMBOL(sin); - REGISTER_LIBM_SYMBOL(sincos); - REGISTER_LIBM_SYMBOL(sinh); - REGISTER_LIBM_SYMBOL(sqrt); - REGISTER_LIBM_SYMBOL(tan); - REGISTER_LIBM_SYMBOL(tanh); - REGISTER_LIBM_SYMBOL(tgamma); - REGISTER_LIBM_SYMBOL(trunc); + REGISTER_LIBM_SYMBOL(acos, double (*)(double)); + REGISTER_LIBM_SYMBOL(acosh, double (*)(double)); + REGISTER_LIBM_SYMBOL(asin, double (*)(double)); + REGISTER_LIBM_SYMBOL(asinh, double (*)(double)); + REGISTER_LIBM_SYMBOL(atan, double (*)(double)); + REGISTER_LIBM_SYMBOL(atan2, double (*)(double, double)); + REGISTER_LIBM_SYMBOL(atanh, double (*)(double)); + REGISTER_LIBM_SYMBOL(cbrt, double (*)(double)); + REGISTER_LIBM_SYMBOL(ceil, double (*)(double)); + REGISTER_LIBM_SYMBOL(copysign, double (*)(double, double)); + REGISTER_LIBM_SYMBOL(cos, double (*)(double)); + REGISTER_LIBM_SYMBOL(cosh, double (*)(double)); + REGISTER_LIBM_SYMBOL(erf, double (*)(double)); + REGISTER_LIBM_SYMBOL(erfc, double (*)(double)); + REGISTER_LIBM_SYMBOL(exp, double (*)(double)); + REGISTER_LIBM_SYMBOL(exp2, double (*)(double)); + REGISTER_LIBM_SYMBOL(expm1, double (*)(double)); + REGISTER_LIBM_SYMBOL(fabs, double (*)(double)); + REGISTER_LIBM_SYMBOL(fdim, double (*)(double, double)); + REGISTER_LIBM_SYMBOL(floor, double (*)(double)); + REGISTER_LIBM_SYMBOL(fma, double (*)(double, double, double)); + REGISTER_LIBM_SYMBOL(fmax, double (*)(double, double)); + REGISTER_LIBM_SYMBOL(fmin, double (*)(double, double)); + REGISTER_LIBM_SYMBOL(fmod, double (*)(double, double)); + REGISTER_LIBM_SYMBOL(frexp, double (*)(double, int*)); + REGISTER_LIBM_SYMBOL(hypot, double (*)(double, double)); + REGISTER_LIBM_SYMBOL(ilogb, int (*)(double)); + REGISTER_LIBM_SYMBOL(ldexp, double (*)(double, int)); + REGISTER_LIBM_SYMBOL(lgamma, double (*)(double)); + REGISTER_LIBM_SYMBOL(llrint, long long (*)(double)); + REGISTER_LIBM_SYMBOL(llround, long long (*)(double)); + REGISTER_LIBM_SYMBOL(log, double (*)(double)); + REGISTER_LIBM_SYMBOL(log10, double (*)(double)); + REGISTER_LIBM_SYMBOL(log1p, double (*)(double)); + REGISTER_LIBM_SYMBOL(log2, double (*)(double)); + REGISTER_LIBM_SYMBOL(logb, double (*)(double)); + REGISTER_LIBM_SYMBOL(lrint, long (*)(double)); + REGISTER_LIBM_SYMBOL(lround, long (*)(double)); + REGISTER_LIBM_SYMBOL(modf, double (*)(double, double*)); + REGISTER_LIBM_SYMBOL(nan, double (*)(const char*)); + REGISTER_LIBM_SYMBOL(nearbyint, double (*)(double)); + REGISTER_LIBM_SYMBOL(nextafter, double (*)(double, double)); + REGISTER_LIBM_SYMBOL(nexttoward, double (*)(double, long double)); + REGISTER_LIBM_SYMBOL(pow, double (*)(double, double)); + REGISTER_LIBM_SYMBOL(remainder, double (*)(double, double)); + REGISTER_LIBM_SYMBOL(remquo, double (*)(double, double, int*)); + REGISTER_LIBM_SYMBOL(rint, double (*)(double)); + REGISTER_LIBM_SYMBOL(round, double (*)(double)); + REGISTER_LIBM_SYMBOL(scalbln, double (*)(double, long)); + REGISTER_LIBM_SYMBOL(scalbn, double (*)(double, int)); + REGISTER_LIBM_SYMBOL(sin, double (*)(double)); + REGISTER_LIBM_SYMBOL(sincos, void (*)(double, double*, double*)); + REGISTER_LIBM_SYMBOL(sinh, double (*)(double)); + REGISTER_LIBM_SYMBOL(sqrt, double (*)(double)); + REGISTER_LIBM_SYMBOL(tan, double (*)(double)); + REGISTER_LIBM_SYMBOL(tanh, double (*)(double)); + REGISTER_LIBM_SYMBOL(tgamma, double (*)(double)); + REGISTER_LIBM_SYMBOL(trunc, double (*)(double)); #undef REGISTER_LIBM_SYMBOL -- GitLab From fab6adb40b7279271c4015dbbd4626c62d8732a7 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Mon, 13 Nov 2017 21:50:15 -0800 Subject: [PATCH 0383/1801] Fix formatting in readme. (#14200) @flx42 thanks for letting me know! --- tensorflow/tools/docker/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/docker/README.md b/tensorflow/tools/docker/README.md index e35c58ff80..39b6655234 100644 --- a/tensorflow/tools/docker/README.md +++ b/tensorflow/tools/docker/README.md @@ -65,7 +65,8 @@ from a binary docker image such as for example `tensorflow/tensorflow:latest` wi not work. One needs to execute the script from a developer docker image since by contrast with a binary docker image it contains not only the compiled solution but also the tensorflow source code. Please select the appropriate developer docker -image of tensorflow at `tensorflow/tensorflow:[.](https://hub.docker.com/r/tensorflow/tensorflow/tags/)`. +image of tensorflow at +[tensorflow/tensorflow repository on dockerhub](https://hub.docker.com/r/tensorflow/tensorflow/tags/). The smallest command line to generate a docker image will then be: ```docker run -it tensorflow/tensorflow:"right_tag"``` -- GitLab From 577a55777251dfd85bb285fd246a45e913ead6ca Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 13 Nov 2017 22:10:27 -0800 Subject: [PATCH 0384/1801] Add op `tf.contrib.ffmpeg.decode_video` (#13242) * Add `tf.contrib.ffmpeg.decode_video` This fix tries to address the request raised in 6265 where it was not possible to decode video like the existing op of `decode_audio`. This fix adds the support of `tf.contrib.ffmpeg.decode_video` by invoking ffmpeg the same fashion as `tf.contrib.ffmpeg.decode_audo` so that video could be stored in the tensor `[frames, height, width, channel]`. At the moment, the output format is `RGB24`. This fix fixes 6265. Signed-off-by: Yong Tang * Add python wrapper for `tf.contrib.ffmpeg.decode_video` This fix adds python wrapper for `tf.contrib.ffmpeg.decode_video` Signed-off-by: Yong Tang * Add test cases for `tf.contrib.ffmpeg.decode_video` This fix adds test cases for `tf.contrib.ffmpeg.decode_video`. Signed-off-by: Yong Tang * Address review feedback. Signed-off-by: Yong Tang * Move GetTempFilename to tensorflow/core/lib/io/path.h Signed-off-by: Yong Tang * Disable GetTempFilename on Windows and Android for now. Signed-off-by: Yong Tang * Remove `.` from tmp file template if no extension And sanitize with clang-format Signed-off-by: Yong Tang * Add missing header files. Signed-off-by: Yong Tang --- tensorflow/contrib/ffmpeg/BUILD | 44 ++++ tensorflow/contrib/ffmpeg/__init__.py | 3 +- tensorflow/contrib/ffmpeg/decode_audio_op.cc | 25 +- tensorflow/contrib/ffmpeg/decode_video_op.cc | 118 +++++++++ .../contrib/ffmpeg/decode_video_op_test.py | 68 +++++ .../contrib/ffmpeg/default/ffmpeg_lib.cc | 246 +++++++++++++----- .../ffmpeg/default/ffmpeg_lib_utility_test.cc | 4 +- tensorflow/contrib/ffmpeg/ffmpeg_lib.h | 25 +- tensorflow/contrib/ffmpeg/ffmpeg_ops.py | 17 ++ tensorflow/contrib/ffmpeg/testdata/small.mp4 | Bin 0 -> 383631 bytes .../contrib/ffmpeg/testdata/small_100.bmp | Bin 0 -> 537654 bytes tensorflow/core/lib/io/path.cc | 66 ++++- tensorflow/core/lib/io/path.h | 3 + 13 files changed, 524 insertions(+), 95 deletions(-) create mode 100644 tensorflow/contrib/ffmpeg/decode_video_op.cc create mode 100644 tensorflow/contrib/ffmpeg/decode_video_op_test.py create mode 100644 tensorflow/contrib/ffmpeg/testdata/small.mp4 create mode 100644 tensorflow/contrib/ffmpeg/testdata/small_100.bmp diff --git a/tensorflow/contrib/ffmpeg/BUILD b/tensorflow/contrib/ffmpeg/BUILD index 7a5a4cb8c9..dc5a04a0b1 100644 --- a/tensorflow/contrib/ffmpeg/BUILD +++ b/tensorflow/contrib/ffmpeg/BUILD @@ -47,10 +47,25 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "decode_video_op_cc", + srcs = ["decode_video_op.cc"], + copts = tf_copts(), + linkstatic = 1, + visibility = ["//visibility:private"], + deps = [ + "//tensorflow/contrib/ffmpeg/default:ffmpeg_lib", + "//tensorflow/core:framework_headers_lib", + "//third_party/eigen3", + ], + alwayslink = 1, +) + tf_custom_op_library( name = "ffmpeg.so", deps = [ ":decode_audio_op_cc", + ":decode_video_op_cc", ":encode_audio_op_cc", ], ) @@ -59,6 +74,7 @@ cc_library( name = "ffmpeg_op_lib", deps = [ ":decode_audio_op_cc", + ":decode_video_op_cc", ":encode_audio_op_cc", ], ) @@ -81,6 +97,15 @@ tf_gen_op_wrapper_py( ], ) +tf_gen_op_wrapper_py( + name = "decode_video_op_py", + require_shape_functions = True, + visibility = ["//visibility:private"], + deps = [ + ":decode_video_op_cc", + ], +) + tf_py_test( name = "decode_audio_op_test", srcs = ["decode_audio_op_test.py"], @@ -115,6 +140,24 @@ tf_py_test( tags = ["manual"], ) +tf_py_test( + name = "decode_video_op_test", + size = "small", + srcs = ["decode_video_op_test.py"], + additional_deps = [ + ":ffmpeg_ops_py", + "@six_archive//:six", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:platform", + "//tensorflow/python:image_ops", + ], + data = [ + ":test_data", + ], + tags = ["manual"], +) + py_library( name = "ffmpeg_ops_py", srcs = [ @@ -126,6 +169,7 @@ py_library( visibility = ["//visibility:public"], deps = [ ":decode_audio_op_py", + ":decode_video_op_py", ":encode_audio_op_py", "//tensorflow/contrib/util:util_py", "//tensorflow/python:framework_for_generated_wrappers", diff --git a/tensorflow/contrib/ffmpeg/__init__.py b/tensorflow/contrib/ffmpeg/__init__.py index 2bcb7284e1..871dff7bbe 100644 --- a/tensorflow/contrib/ffmpeg/__init__.py +++ b/tensorflow/contrib/ffmpeg/__init__.py @@ -27,8 +27,9 @@ from __future__ import print_function from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_audio from tensorflow.contrib.ffmpeg.ffmpeg_ops import encode_audio +from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video from tensorflow.python.util.all_util import remove_undocumented -_allowed_symbols = ['decode_audio', 'encode_audio'] +_allowed_symbols = ['decode_audio', 'encode_audio', 'decode_video'] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/ffmpeg/decode_audio_op.cc b/tensorflow/contrib/ffmpeg/decode_audio_op.cc index 4b1c8a337e..92fad70b1f 100644 --- a/tensorflow/contrib/ffmpeg/decode_audio_op.cc +++ b/tensorflow/contrib/ffmpeg/decode_audio_op.cc @@ -37,29 +37,6 @@ namespace { // https://www.ffmpeg.org/ffmpeg-formats.html const char* kValidFileFormats[] = {"mp3", "mp4", "ogg", "wav"}; -// Writes binary data to a file. -Status WriteFile(const string& filename, tensorflow::StringPiece contents) { - Env& env = *Env::Default(); - std::unique_ptr file; - TF_RETURN_IF_ERROR(env.NewWritableFile(filename, &file)); - TF_RETURN_IF_ERROR(file->Append(contents)); - TF_RETURN_IF_ERROR(file->Close()); - return Status::OK(); -} - -// Cleans up a file on destruction. -class FileDeleter { - public: - explicit FileDeleter(const string& filename) : filename_(filename) {} - ~FileDeleter() { - Env& env = *Env::Default(); - env.DeleteFile(filename_).IgnoreError(); - } - - private: - const string filename_; -}; - /* * Decoding implementation, shared across V1 and V2 ops. Creates a new * output in the context. @@ -69,7 +46,7 @@ void Decode(OpKernelContext* context, const string& file_format, const int32 samples_per_second, const int32 channel_count) { // Write the input data to a temp file. - const string temp_filename = GetTempFilename(file_format); + const string temp_filename = io::GetTempFilename(file_format); OP_REQUIRES_OK(context, WriteFile(temp_filename, file_contents)); FileDeleter deleter(temp_filename); diff --git a/tensorflow/contrib/ffmpeg/decode_video_op.cc b/tensorflow/contrib/ffmpeg/decode_video_op.cc new file mode 100644 index 0000000000..d44032968d --- /dev/null +++ b/tensorflow/contrib/ffmpeg/decode_video_op.cc @@ -0,0 +1,118 @@ +// Copyright 2016 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= + +#include + +#include +#include + +#include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { +namespace ffmpeg { + +class DecodeVideoOp : public OpKernel { + public: + explicit DecodeVideoOp(OpKernelConstruction* context) : OpKernel(context) {} + + void Compute(OpKernelContext* context) override { + OP_REQUIRES( + context, context->num_inputs() == 1, + errors::InvalidArgument("DecodeVideo requires exactly 1 input.")); + const Tensor& contents_tensor = context->input(0); + + OP_REQUIRES(context, TensorShapeUtils::IsScalar(contents_tensor.shape()), + errors::InvalidArgument( + "contents must be a rank-0 tensor but got shape ", + contents_tensor.shape().DebugString())); + const tensorflow::StringPiece contents = contents_tensor.scalar()(); + + // Write the input data to a temp file. + string extension; + const string temp_filename = io::GetTempFilename(extension); + OP_REQUIRES_OK(context, WriteFile(temp_filename, contents)); + FileDeleter deleter(temp_filename); + + uint32 width = 0; + uint32 height = 0; + uint32 frames = 0; + + // Run FFmpeg on the data and verify results. + std::vector output_data; + const Status result = ffmpeg::ReadVideoFile(temp_filename, &output_data, + &width, &height, &frames); + if (result.code() == error::Code::NOT_FOUND) { + OP_REQUIRES( + context, result.ok(), + errors::Unavailable("FFmpeg must be installed to run this op. FFmpeg " + "can be found at http://www.ffmpeg.org.")); + } else if (result.code() == error::UNKNOWN) { + LOG(ERROR) << "Ffmpeg failed with error '" << result.error_message() + << "'. Returning empty tensor."; + Tensor* output = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(0, TensorShape({0, 0}), &output)); + return; + } else { + OP_REQUIRES_OK(context, result); + } + OP_REQUIRES(context, !output_data.empty(), + errors::Unknown("No output created by FFmpeg.")); + OP_REQUIRES( + context, output_data.size() == (frames * height * width * 3), + errors::Unknown("Output created by FFmpeg [", output_data.size(), + "] does not match description [", frames, ", ", height, + ", ", width, ", 3]")); + Tensor* output = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output( + 0, TensorShape({frames, height, width, 3}), &output)); + auto output_flat = output->flat(); + std::copy_n(output_data.begin(), output_data.size(), &output_flat(0)); + } +}; + +REGISTER_KERNEL_BUILDER(Name("DecodeVideo").Device(DEVICE_CPU), DecodeVideoOp); + +REGISTER_OP("DecodeVideo") + .Input("contents: string") + .Output("output: uint8") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->UnknownShapeOfRank(4)); + return Status::OK(); + }) + .Doc(R"doc( +Processes the contents of an audio file into a tensor using FFmpeg to decode +the file. + +One row of the tensor is created for each channel in the audio file. Each +channel contains audio samples starting at the beginning of the audio and +having `1/samples_per_second` time between them. If the `channel_count` is +different from the contents of the file, channels will be merged or created. + +contents: The binary audio file contents, as a string or rank-0 string + tensor. +)doc"); + +} // namespace ffmpeg +} // namespace tensorflow diff --git a/tensorflow/contrib/ffmpeg/decode_video_op_test.py b/tensorflow/contrib/ffmpeg/decode_video_op_test.py new file mode 100644 index 0000000000..4d1fac4ef8 --- /dev/null +++ b/tensorflow/contrib/ffmpeg/decode_video_op_test.py @@ -0,0 +1,68 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""Tests for third_party.tensorflow.contrib.ffmpeg.decode_video_op.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os.path + +import six + +from tensorflow.contrib import ffmpeg +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import image_ops +from tensorflow.python.platform import resource_loader +from tensorflow.python.platform import test + + +class DecodeVideoOpTest(test.TestCase): + + def _loadFileAndTest(self, filename, width, height, frames, bmp_filename, index): + """Loads an video file and validates the output tensor. + + Args: + filename: The filename of the input file. + width: The width of the video. + height: The height of the video. + frames: The frames of the video. + """ + with self.test_session(): + path = os.path.join(resource_loader.get_data_files_path(), 'testdata', + filename) + with open(path, 'rb') as f: + contents = f.read() + + bmp_path = os.path.join(resource_loader.get_data_files_path(), 'testdata', + bmp_filename) + with open(bmp_path, 'rb') as f: + bmp_contents = f.read() + + image_op = image_ops.decode_bmp(bmp_contents) + image = image_op.eval() + self.assertEqual(image.shape, (height, width, 3)) + video_op = ffmpeg.decode_video(contents) + video = video_op.eval() + self.assertEqual(video.shape, (frames, height, width, 3)) + self.assertAllEqual(video[index,:,:,:], image) + + def testMp4(self): + self._loadFileAndTest('small.mp4', 560, 320, 166, 'small_100.bmp', 99) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc index 545a4386d0..201774e1d0 100644 --- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc +++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc @@ -16,6 +16,7 @@ #include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h" #include +#include #include #include #include @@ -25,6 +26,7 @@ #include #include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/env.h" @@ -38,28 +40,45 @@ namespace { const char kFfmpegExecutable[] = "ffmpeg"; const int32 kDefaultProbeSize = 5000000; // 5MB -std::vector FfmpegCommandLine(const string& input_filename, - const string& output_filename, - const string& input_format_id, - int32 samples_per_second, - int32 channel_count) { - return { - "-nostats", // No additional progress display. - "-nostdin", // No interactive commands accepted. - "-f", input_format_id, // eg: "mp3" - "-probesize", StrCat(kDefaultProbeSize), - "-i", input_filename, - "-loglevel", "info", // Enable verbose logging to support debugging. - "-map_metadata", "-1", // Copy global metadata from input to output. - "-vn", // No video recording. - "-ac:a:0", StrCat(channel_count), - "-ar:a:0", StrCat(samples_per_second), - // Output set (in several ways) to signed 16-bit little-endian ints. - "-codec:a:0", "pcm_s16le", "-sample_fmt", "s16", "-f", "s16le", - "-sn", // No subtitle recording. - "-y", // Overwrite output file. - StrCat(output_filename) - }; +std::vector FfmpegAudioCommandLine(const string& input_filename, + const string& output_filename, + const string& input_format_id, + int32 samples_per_second, + int32 channel_count) { + return {"-nostats", // No additional progress display. + "-nostdin", // No interactive commands accepted. + "-f", input_format_id, // eg: "mp3" + "-probesize", StrCat(kDefaultProbeSize), "-i", input_filename, + "-loglevel", "info", // Enable verbose logging to support debugging. + "-map_metadata", "-1", // Copy global metadata from input to output. + "-vn", // No video recording. + "-ac:a:0", StrCat(channel_count), "-ar:a:0", + StrCat(samples_per_second), + // Output set (in several ways) to signed 16-bit little-endian ints. + "-codec:a:0", "pcm_s16le", "-sample_fmt", "s16", "-f", "s16le", + "-sn", // No subtitle recording. + "-y", // Overwrite output file. + StrCat(output_filename)}; +} + +std::vector FfmpegVideoCommandLine(const string& input_filename, + const string& output_filename) { + return {"-nostats", // No additional progress display. + "-nostdin", // No interactive commands accepted. + "-i", + input_filename, + "-f", + "image2pipe", + "-probesize", + StrCat(kDefaultProbeSize), + "-loglevel", + "info", // Enable verbose logging to support debugging. + "-vcodec", + "rawvideo", + "-pix_fmt", + "rgb24", + "-y", // Overwrite output file. + StrCat(output_filename)}; } // Is a named binary installed and executable by the current process? @@ -106,7 +125,7 @@ bool IsBinaryInstalled(const string& binary_name) { ::execvp(kFfmpegExecutable, args_chars.data()); // exec only returns on error. const int error = errno; - LOG(ERROR) << "FFmpeg could not be executed: " << error; + LOG(ERROR) << "FFmpeg could not be executed: " << strerror(error); ::_exit(error); } @@ -198,52 +217,100 @@ string BuildWavFile(int32 samples_per_second, int32 channel_count, return data; } -// Returns a unique number every time it is called. -int64 UniqueId() { - static mutex mu(LINKER_INITIALIZED); - static int64 id = 0; - mutex_lock l(mu); - return ++id; -} - -} // namespace - -string GetTempFilename(const string& extension) { - for (const char* dir : std::vector( - {getenv("TEST_TMPDIR"), getenv("TMPDIR"), getenv("TMP"), "/tmp"})) { - if (!dir || !dir[0]) { +Status ReadInfoFile(const string& filename, uint32* width, uint32* height, + uint32* frames) { + string data; + ReadFileToString(Env::Default(), filename, &data); + bool in_output = false; + bool in_mapping = false; + uint32 frames_value = 0; + uint32 height_value = 0; + uint32 width_value = 0; + for (const string& line : str_util::Split(data, '\n')) { + // Output starts with the first line of `Output #..`. + // Further processing output region starts next line so we could continue + // the loop. + if (!in_output && line.find("Output #") == 0) { + in_output = true; + in_mapping = false; continue; } - struct stat statbuf; - if (!stat(dir, &statbuf) && S_ISDIR(statbuf.st_mode)) { - // UniqueId is added here because mkstemps is not as thread safe as it - // looks. https://github.com/tensorflow/tensorflow/issues/5804 shows - // the problem. - string tmp_filepath = io::JoinPath( - dir, - StrCat("tmp_file_tensorflow_", UniqueId(), "_XXXXXX.", extension)); - int fd = mkstemps(&tmp_filepath[0], extension.length() + 1); - if (fd < 0) { - LOG(FATAL) << "Failed to create temp file."; - } else { - close(fd); - return tmp_filepath; + // Stream mapping starts with the first line of `Stream mapping`, it also + // signals the end of Output section. + // Further processing of stream mapping region starts next line so we could + // continue the loop. + if (!in_mapping && line.find("Stream mapping:") == 0) { + in_output = false; + in_mapping = true; + continue; + } + if (in_output) { + // We only look for the first stream in output `Stream #0`. + // Once processed we will not further process output section. + if (line.find(" Stream #") == 0) { + size_t p = line.find(", rgb24, ", 24); + if (p != std::string::npos) { + string rgb24 = line.substr(p + 9, line.find(" ", p + 9)); + rgb24 = rgb24.substr(0, rgb24.find(",")); + string rgb24_width = rgb24.substr(0, rgb24.find("x")); + string rgb24_height = rgb24.substr(rgb24_width.length() + 1); + if (strings::safe_strtou32(rgb24_width, &width_value) && + strings::safe_strtou32(rgb24_height, &height_value)) { + in_output = false; + } + } + } + continue; + } + if (in_mapping) { + // We only look for the first stream mapping to have the number of the + // frames. + // Once processed we will not further process stream mapping section. + if (line.find("frame= ") == 0) { + string number = line.substr(8, line.find(" ", 8)); + number = number.substr(0, number.find(" ")); + if (strings::safe_strtou32(number, &frames_value)) { + in_mapping = false; + } } + continue; } } - LOG(FATAL) << "No temp directory found."; + if (frames_value == 0 || height_value == 0 || width_value == 0) { + return errors::Unknown("Not enough video info returned by FFmpeg [", + frames_value, ", ", height_value, ", ", width_value, + ", 3]"); + } + *width = width_value; + *height = height_value; + *frames = frames_value; + return Status::OK(); } -Status ReadAudioFile(const string& filename, - const string& audio_format_id, - int32 samples_per_second, - int32 channel_count, +} // namespace + +FileDeleter::~FileDeleter() { + Env& env = *Env::Default(); + env.DeleteFile(filename_).IgnoreError(); +} + +Status WriteFile(const string& filename, StringPiece contents) { + Env& env = *Env::Default(); + std::unique_ptr file; + TF_RETURN_IF_ERROR(env.NewWritableFile(filename, &file)); + TF_RETURN_IF_ERROR(file->Append(contents)); + TF_RETURN_IF_ERROR(file->Close()); + return Status::OK(); +} + +Status ReadAudioFile(const string& filename, const string& audio_format_id, + int32 samples_per_second, int32 channel_count, std::vector* output_samples) { // Create an argument list. - string output_filename = GetTempFilename("raw"); + string output_filename = io::GetTempFilename("raw"); const std::vector args = - FfmpegCommandLine(filename, output_filename, audio_format_id, - samples_per_second, channel_count); + FfmpegAudioCommandLine(filename, output_filename, audio_format_id, + samples_per_second, channel_count); // Unfortunately, it's impossible to differentiate an exec failure due to the // binary being missing and an error from the binary's execution. Therefore, @@ -256,7 +323,8 @@ Status ReadAudioFile(const string& filename, // Execute ffmpeg and report errors. pid_t child_pid = ::fork(); if (child_pid < 0) { - return Status(error::Code::UNKNOWN, StrCat("fork failed: ", errno)); + return Status(error::Code::UNKNOWN, + StrCat("fork failed: ", strerror(errno))); } if (child_pid == 0) { ExecuteFfmpeg(args); @@ -285,5 +353,63 @@ Status CreateAudioFile(const string& audio_format_id, int32 bits_per_second, return Status::OK(); } +Status ReadVideoFile(const string& filename, std::vector* output_data, + uint32* width, uint32* height, uint32* frames) { + if (!IsBinaryInstalled(kFfmpegExecutable)) { + return Status(error::Code::NOT_FOUND, StrCat("FFmpeg could not be found.")); + } + + string output_filename = io::GetTempFilename("raw"); + string stderr_filename = io::GetTempFilename("err"); + + // Create an argument list. + const std::vector args = + FfmpegVideoCommandLine(filename, output_filename); + + // Execute ffmpeg and report errors. + pid_t child_pid = ::fork(); + if (child_pid < 0) { + return Status(error::Code::UNKNOWN, + StrCat("fork failed: ", strerror(errno))); + } + if (child_pid == 0) { + const int fd = + open(stderr_filename.c_str(), O_RDWR | O_CREAT | O_APPEND, 0600); + if (fd < 0) { + const int error = errno; + LOG(ERROR) << "FFmpeg stderr file coule not be created: " + << strerror(error); + ::_exit(error); + } + close(STDERR_FILENO); + dup2(fd, STDERR_FILENO); + ExecuteFfmpeg(args); + } else { + int status_code; + if (::waitpid(child_pid, &status_code, 0) < 0) { + return Status(error::Code::UNKNOWN, + StrCat("waitpid failed: ", strerror(errno))); + } + if (status_code) { + return Status(error::Code::UNKNOWN, + StrCat("FFmpeg execution failed: ", status_code)); + } + + TF_QCHECK_OK(ReadInfoFile(stderr_filename, width, height, frames)) + << "Could not read FFmpeg stderr file: " << stderr_filename; + + string raw_data; + TF_QCHECK_OK(ReadFileToString(Env::Default(), output_filename, &raw_data)) + << "Could not read FFmpeg output file: " << output_filename; + output_data->resize(raw_data.size()); + std::copy_n(raw_data.data(), raw_data.size(), output_data->begin()); + + TF_QCHECK_OK(Env::Default()->DeleteFile(output_filename)) + << output_filename; + TF_QCHECK_OK(Env::Default()->DeleteFile(stderr_filename)) + << stderr_filename; + return Status::OK(); + } +} } // namespace ffmpeg } // namespace tensorflow diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc index 7176f3b550..39e7e90ccc 100644 --- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc +++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc @@ -20,6 +20,8 @@ #include #include + +#include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/mutex.h" @@ -49,7 +51,7 @@ TEST(FfmpegLibTest, TestTempDirectoryThreading) { pool.Schedule([&mu, &temp_filenames, environment]() { std::array buffer; for (int32 j = 0; j < kStringsPerItem; ++j) { - buffer[j] = GetTempFilename("mp3"); + buffer[j] = io::GetTempFilename("mp3"); TF_QCHECK_OK(environment->DeleteFile(buffer[j])); } mutex_lock l(mu); diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_lib.h b/tensorflow/contrib/ffmpeg/ffmpeg_lib.h index f64007c81d..c5ea1432bf 100644 --- a/tensorflow/contrib/ffmpeg/ffmpeg_lib.h +++ b/tensorflow/contrib/ffmpeg/ffmpeg_lib.h @@ -24,16 +24,24 @@ namespace tensorflow { namespace ffmpeg { -// Gets a temp filename in an appropriate location. -string GetTempFilename(const string& extension); +// Cleans up a file on destruction. +class FileDeleter { + public: + explicit FileDeleter(const string& filename) : filename_(filename) {} + ~FileDeleter(); + + private: + const string filename_; +}; + +// Writes binary data to a file. +Status WriteFile(const string& filename, tensorflow::StringPiece contents); // Reads an audio file using ffmpeg and converts it into an array of samples in // [-1.0, 1.0]. If there are multiple channels in the audio then each frame will // contain a separate sample for each channel. Frames are ordered by time. -Status ReadAudioFile(const string& filename, - const string& audio_format_id, - int32 samples_per_second, - int32 channel_count, +Status ReadAudioFile(const string& filename, const string& audio_format_id, + int32 samples_per_second, int32 channel_count, std::vector* output_samples); // Creates an audio file using ffmpeg in a specific format. The samples are in @@ -45,6 +53,11 @@ Status CreateAudioFile(const string& audio_format_id, int32 bits_per_second, int32 samples_per_second, int32 channel_count, const std::vector& samples, string* output_data); +// Reads an video file using ffmpeg adn converts it into a RGB24 in uint8 +// [frames, height, width, 3]. The w, h, and frames are obtained from ffmpeg. +Status ReadVideoFile(const string& filename, std::vector* output_data, + uint32* width, uint32* height, uint32* frames); + } // namespace ffmpeg } // namespace tensorflow diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py index 18b0b8b812..78ead471d2 100644 --- a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py +++ b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py @@ -20,6 +20,7 @@ from __future__ import print_function from tensorflow.contrib.ffmpeg.ops import gen_decode_audio_op_py from tensorflow.contrib.ffmpeg.ops import gen_encode_audio_op_py +from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py from tensorflow.contrib.util import loader from tensorflow.python.framework import ops from tensorflow.python.platform import resource_loader @@ -89,3 +90,19 @@ def encode_audio(audio, file_format=None, samples_per_second=None): ops.NotDifferentiable('EncodeAudio') + + +def decode_video(contents): + """Create an op that decodes the contents of a video file. + + Args: + contents: The binary contents of the video file to decode. This is a + scalar. + + Returns: + A rank-4 `Tensor` that has `[frames, height, width, 3]` RGB as output. + """ + return gen_decode_video_op_py.decode_video(contents) + + +ops.NotDifferentiable('DecodeVideo') diff --git a/tensorflow/contrib/ffmpeg/testdata/small.mp4 b/tensorflow/contrib/ffmpeg/testdata/small.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..1fc478842f51e7519866f474a02ad605235bc6a6 GIT binary patch literal 383631 zcmZQzV30{GsVvAXFfn3aU|;~znZ^0JiDk)#3=9k{X+^22An^gitfyb(rX-dyFfehl zvHt(hddKzU-e)&GwolW!p>R|A-TMk7GZO_}h2;FAR0VTO1ziOXJtH#{ecu3AcU=<& z$1rDwWCdLX=lp`ooYb@u1tS9kV_hQy1CZ*Bl9B=|ef{$Ca=o(5l+^s3#5}$H zqI7+bv3eOLxj72D3i$;knfZCeRtm|9Nr}m}1`0)~X|_fRDXB?0`N`R~hE@hv1`3IJ zi8+(ATquv#nw>Iz{)_+Kp{7? zA~hu*WFAO;d{JUvdaA9VnL=_#QGRY>d~T|(p+ZSfYEDjOv8{oEMTJF5a*3^hLULiQ zt${*HYGO)NeqO4rk)e*EAxuwUL41B%T5)QLt*((mNk&m>VoI^CnL=I>SVwYZZej_@ z)ZC=Rv^0=W`FX`9MTwbtsVVW9c_l@Owgw7GX+?>-sURE63yM-x3TzD&vQsNF^Ga-u z4Peap+{`>%V*`can_vp{)74@{>T){F>z-{XfmOWYhe@Z)o=;FFzE9>PLL;dGfu>5}gi}i`d z!T)!%o9{K}MYb&NF0$tOG|w>ddFNF7|NpvH9bWQ4!(#e}YYav!?zC(-fAk^Mb(zuqQ5f%m1f8rkAz7?{8yZjr8$wQYq-T%N)oZk-D$Jx{u8* z;biNFH}%Z2iJ~*SwZ24M-oH=X_oDSi>8=k6Ut;|K_)U4@X{J=|vipe0v4qV|>3;Xm zw@bzfHoSN-i6`Khxp2V#E72>~2WNkNRC88rn|;3B%84JI;6fQ{+bzqyBmI11hW+Vg%T;v+An9>}B}3Rzl?f-`-92YfL@!M1=M-H!yzI zOrFrtrN_uHf%8H4S>tt%hgb9O{g5CP$dRU%!t(SzV~dZuSW2(y)Ni>z87`d>*&O)H z{=kut*)tt16`p0}{E=S0@u7`0TNes@i|fFK2L1;n9fj z+?Dcs=U%puU#?sB32@yL5IHtY;oGGJ4_0(!dglsV5xuR#>Sh)2mtV-xlf#~Y?b?R? z13cM@rxRSCfB)zvU;Ff5*#AYRW^P)4`_xp1{92<9Hk11?&;IS4ze7gi(FdIsSpwoh zhZ!BCB5xc}=zrOe&^ae{$>~%BiD`aJ31FOH+Gr#N4;`skTSM~EV$A)8~Rt8}wlFly)=X&(8WRD{Q=gdP4ee>j=-Z^wI zPVUsqh_9T=>YWor1(u07>i#|U_1LZ5M-)3(GN?y9+_ufpg@4QG-R2Lro|7tVUOQ#s zvYPYdWzvn)((YGkI(r+ukjp*nq*E8PZuzGL6&DN`G>$nkFqSW#Q*ho@>yh!3PwBer z8ZY=Z3FMxsP5SvGQdc@_MeO2m;qsGht0(TfIy+cAv@UGV*=m#L|Noz*`nksZJ#iudy7uRnUtF^7@b%7{6Bij+8P0Tb zdwpi>&yQtuUH{upQeLs!Z`I}3r_Ogi4D;Kj8gcjO%|E+)1EVk23vIW`ZvQ1)AX94c zt?q+ixYI+hBQuhgs&U z%&fQRu3e@_7pbT!FD*L!UH?&OVb)^}*HtHH)>-_qx-Y3?Xms13H8X~NX6AGwF&*Ce zW@DZ&^+qdi{oK$Konkv*YM1D>Z`ZtbhU^N+kA1Z=c+K}U;fDhsHoajfVqex7)>rhW z<>^eL*%seTLl}cO&A#3;GI%rN*g1dJyP3zIT@A{b6aJL%zx|KTIbH7sJA>Ay?J{`4 zEW_Er#Ivl%=YR>%s8qq>7X(_p=QUC8+?7eifNrE=gswd8<4YV8aOGZ($O{&DDU|Bw6fUke{dmD>94#~h7y6?4L+O1ZQ?O2`^; zPM7Ws^uM;Ml)H8Exm6dnG{5-io2*)X_4%jyWt~S0{nsUx9@llT5OuXU|Nlc=dz}55 zzwIsw3cVtx_kOS|tnK1TGqt^KbU^FB$mxG~rY0|&v|V5Br>pmm&l`VpaBul@bg%ib z^4DzN=Kq+IJ^yE5Vd>1ND+@&e?=x^5*_zmvE~0L1pu=GC@POxOo;_TgUAt@h-6L{} zW5q8ssPzdbPkU?-{lD&Q?F{`*9qUy3*Kg=jnmExz_TUjF&F+_19*Fk#ZdonHAiGY~ zp)!I&=wr}ytJF6U%c`30PB^{Hjp6KV#f9~I5+i3!s;)4yxYb-OqkeeKiaCX@o0TTj z2>&uKd--SgbkV7)D$YmVeR{M}OFPND=>0dR@*l!`Ppo10XILV8clwb>|CK%$-RnNG zW~Ej4*GDs^#(S-*zdYHaf4|qptejZ;>H3f7Z;m<2Vk2bR;qh++myVh}!<}hP3-qcV z@P1psY{3&}S!}XtJHr9D@RJheCJP!EZZL6a2?(rKGRa^_Gwhpjzwyli$CbP{xRWgy z%gr}mX`R||ae+N&4|8DiBp2@(Mum-ee^}a2*plBH9Pg$?@vj#=61`#TGUGpDeQxz1J73h+2AVxvAbx!1g!{22Mu$Z|?|UaSL4tjm zlV8Mo+sq@0ihmylq)W^y3stbPkea2joYm$oZAlg36#9tqdk9G zW`BYB8Pz6*tHSG87D#zNm}75T+EI7U$3c+gw=F~2{zuOno2NTeJji>T+2vr#Bf!U~ zz|PV8m`|W*(^bz$fd}O_C`)o3tZ9sKUiRR`!50lvm;SIg@FAlAq!9ZXk=@J*Ukb#v z^7b(NpOUhMf%DJIhcmoaf2sc9G2iA2+kx%5+ZZ-{uaMwtc(9t$eb4dMeMSvit2Z(# z?4380{j%Mnl7%ii3olx%>1V2WvZU+Czo{`xx{geGH#NrZjoXTL@#~#O%UxC$PvtmJ zQlc`i^RLjh&d|!jLan_O%WC4+#`qtqJU>~^{^jaJZ0m16V18}f$nj-$iM3xEg8*ko zfzdmLRu9R86Z-pDm%aG!y<^9u35v%~f7ri#3qw)zhMsWG$H|V<*X`f0I$tWNuP*2P zhN8J0+qE3pL|YZU-V0w}Cd8lWQ;>Z4|APq(0!#@81`OX+gcaKEtZWso5o{DGkKE+u za=?W>>dEOw{^OS)%zn0f|L%LMggw;{ty=k_p0VZ%`<_MYXpOx^l}=#hIB|g5(q@%h=#!jR$EvZ*p;);#m*?fY)I`t}K@hIbueIl!R6#m>TdK%!blTh6)0C-0>^ z17p;rme`ke=E>~}oxOWb@Z_(Y*v0s0fk@q>9<7GZ?`K_@UuD+EX>rVB^x3gf^2NFa zV`gEKFY}Cp(l@*=lL%Va{pI4s;+Z9UcLih?-m(*5;N|CG;!CWm(qfQED99+-Y5UfC zN|wlkCnpyOB`C&fFwavsc4e*>Gq1w*t1j6Q4l|u<8vJi1c;2^XxWAhD#Z+6zz3Y_g zjg6=O2o>G3*qiZaCEJs__{AJf2@C?lOe@OXEqKx7G;P5(r}J(FL6tift)8ZmA9pWG zclQ<7kaW3CUX1JNN^;W`7xnKtE7+WPN&n*nv-9%NmJf`t*!_trfA;@-t6bqkjV0!j zkLB}3_@A?nUipmiLYmzYhSjF;7C&m|Jg6lT>GyOV=auz~@*NbW`R!kp)hxyz#_OC_ z`R}q#hGXs49~&a}O_}-s@{2=LWuoUFh}gK`+UpWMW>>>`_8bg?G8qnnPV$n6^u8R} zd@#;XlP~^x#LX=mo(U~6WWCMevGd;Vh-2>-nZ{q`c$6s5y0%X09W%ouKBZTxhZ#;6 zx6RRYG_-AC(o`smzv%LA@rxZ+0r%eoBs0|)|2}eWeyg>%#dM!3k0iNgF)<`ekZ86o zxbl3(as!9ihJ_4FJ1@(ir^A1W zXk{K~U{T`c5EGr_7%arTW7Z9gNfQ_{tX?>9uFXye;oB4|x=lkwd`@j)YI*7ti@0;|6bzv1~UW^sv(O^Z~f&ztd!CA#;}%(}1Hvp+C76mm`vxV~YQ z_{JJ$ku^4-cfa9ncRGKkTJdn;2BXu{S8=d5TyxTiZrJHiHb=^>iAB51tRb&|Uslth z^RYW-WZub;iM*#J+4;@&+K;uLQv3w@Z#MPU=tl;3-Z|8ZhFVk?>mR%_3b>qf@%w{MZ(jiyBZE8s8pSO=fMB$fM;X_ z*Tvh#Z=yfjeBdsM$WoH}*0Am?laR(;`@gRW{~SC2&D)zvdh^u2&4DvY)lYnDIkb6; z026}%2XA8o!^&f63}LDtmNJ~OZfm-5h|96y*QF+pzm0D9q&vCK>`6U(=jySQ;><1& z)$BVI_N?50(0y{^qVGzb`ATnpd|JBkv8f<`T|rpwOkumP`?C~gq%!|MQn`sexcYP? z-_1O+9Rd2TDz{Jf^}gAXrGHbXKXT?cB75*J8#O#sdrvF03{R6CMZ{ zEm`0s(^}=r@Yq3zF(he~m+18^6)cJG4+s7{_Le>9^0J$fza>o`$8BOOV~*0fYq>~9 zJ9m>`(;m6*2v!4L9mk88oZlz@WHeXqZ=0ez$K#RW`8{8|H|oh>t1(Z$a9O_N{^Qx- z&xB6dR;l^K)QnZ8y21Hwc~8&k|Nk#4Rg|^FPh5IUseb>L`%xSW4(T5p4Sd&M9;4ooK@a<~j+> zBZV)Rn_~9x#BnV?!aC)6&b9wJn***mBuL9lv|Yhjz}Uvlz&MSQ`PaL{2Ud9NFv`D( z+O5N$T)^D7W(HdVb0X(jzVP!o9|Ac49#|Uq@9NReQdU8;B4!WiXm_{G``8>)k0p4N z-+TJ(h33H()!BZ&57gZ2_&Fm}HwN&mZ2JEBux6cv;JU+wn+RRw!%9M z|INde?p?6X^xp*s7M2B#Jk9e986;P~O^$d^Zm@1&d*@hVS~=rI^%kEEN^4Kt%#AZVEpzR7fAz*T2B(cI3>-(f zy#yZ}u*piSF=+hz;h2%Y^v!d`6-=3~BryK#w7F4xg^_0#uY~@EW%2(`o?I`*_C?{`lXWzl&syU2{tQzs}8@rT&>|oAX`9U;AGl5xB-%^Fh-!K|Esr4GmQb z8TO_+<|WS#FgVC`CUk#Lu=Te$+9GiF|C8r>dQIA~At7hI7qU3DSe9cevciAjGDlsSUiyPye4_e1Mr_8))XZ*lu zl`m82^gBB|R#$plkG_&qCRO@B=f3GJm##Tay?YO(x?T9SJjuPwZMl(3nMjs_$Hv>D z$*cbOnyS@*ws&OYjxN|4{CQd@vnKPcx36ZqF}l6(>iGYc|JACM+3RlC@8>?VUa&B4 zio-|dmWGvwPgKua#l|R7xj?bg_gw?)zZ9K^`OCK54d0~r|0%-}xwSGY7?>Z1?F?*Q z+bU_|dUb&l1JeS5gNh8UQPP}pw;HtEHar$d&TtT1IQOC9We4W16VBb{_qVaqo^8Bv zajRH?&4h@VGdOe(9C+uN#ITBq(Ll`pVVA+hM=?uJ8{4|RzPT;2yY}sKJ%^pw9RGA> zg-*_VdHG0F$|0$oUrTZv{WmY`Y-G79%~eY7m1tZjs9!dDd)}5S&1?Ssos;Ime6`f@QTP9U z+iPa!Zw<}gpe$?tk^5f2g);HVyEkSU`EP%i{_l)rUD?zv>vEk#4u-4opvv!6BX?BNQ{U%DV-`S&$@#DCA~ zt^alP_L>sI{VWa!77gRWHiS9t!P$SSRQkw5xxkjP<##oFI5?)Z87mn2Imh4AoWGVWVArFcDQZD?Pq(^$EeleRYRWqid%XUVi7Zd< zuj#)hDi(CU_-4mte}ki6Va1#0XAd(lGO97iNw6;M`XK4VyZOk6Jq&717dLY#J6T=` zF0eA2o{}ucAhwG?amAIVOl=zT-`6(YJ!gN!dJDs{uD8;xS`NDwOkRCz+m4cyzYez! z$5|}0zVgE9L7H@gv`yuI%`;U&TLf5qZ4{*Lek%*#cQc&vZ5(qNqkqMp*1bXe4S!^I z*i^g`uwFPxd68LIe&d-d#h2E-ce!*!((Ki`M|+%@oz`|RaCDf&~{F|7tXQLELz)YuKW#Iy@fiJqWxdJz^i#N{RcIs-y%ZE&o z$>$B`ck>E;II*m>;mP$0_9v<>*JNFO^?dSY6B%av&SZr~-lr@K4&^MHY7Rx7m6Dil zb2+x|kgA)edY{K*21W+v2{wlUUQS48WVVex)?Kae(>hHqp)hiW#{MVo=dpe}IFGf* zTFpUa&nNjyN42V!X|}C%UjCH%wpFN4RkB*srgDMn*ncDU9qgrrBDe1^=~{WeMQFl9 z27xfSyu#B>3u+c#w9VdNmcQNTF7N7}>@KYW?VA_=dUb>4qt z&qIMHAMej#HVB!)({FsfS&YwS<(JdejNA7XerRBqacBq(3jE!8AV*H*ds1)Fhmh^St)h}rZq)O`g<`P9n$`Ttj~b(eopC|CTW_}c1F zrE~N1|NmcjDKU6{`i?)k`=XXUa!Qu<$nb^Lewe1_ShG^;$Sd`=l}o#BKFnIQJG1IavZR+%{PWv_yQ(!D zd3cv|>fTx3v|o1ry&JCH7w0Lj6Y1b!Vlr!AEK)Hq|3`TNlT4MksrXh2rL(V@7^E09 zHXLFJxXd6_^)F4QBi-}CF~-HhqQ_=_)>*lV$M!(S{PR5g#pbLE;_d%RcoYO0<2l%GmfJ2+`p$6ga-ghq?5m?I zF9a1x7;U{P+hoUM%`-`VQ`Wx|HL-cOGutL!Q%>6RHAAZ?GFD@UuSbV*xw77U`CUoz ze5=e_PVURznYg7?Fur+1k%pT8KgsL_=KqZx;=O*)mi1cQf1J+nP3U&PhDEyj6&^=? zTDtr4L6&s~5_RS}aR(oq?$N+p{Z8c1Ua{aQ3Ee(imi03GI6|M;c?TP8*T4VnO{K&v z{|{mM%dTa=vR=(G*ZkkZC0zoK5||p}dE5CH zf7)?FW7Qmq#g~@{(GtMRyDUbH|DR_W0N!Af7<_ji5iRZvkb>KTem1Ct=yZ} z&X>42uI_XO!;MY1qJDL~P7bljx!-w2Sf#w{sDf3WYVUj7)3SlCP79xlYFezf=Q7nf z_JWPAgpGY8pW5xe??e?(JLT(IIanIxmHp`s@jQ7|{`j^&IFEaU^en!KKdYvOzVA`84*1s^s>pmsF5}BV zBW<=|qiY<37alY)K2!=&eRRn}izDoYNnLo8@MF=NYfo}YU9&zXdj9a1bMLqpya}A= zxcK1SPkVXppIK^|bT`d$V_3if1_ibS3XvaOthIX*-`LN*b92pw|IZm%nGY|}Ss z8SCMEJ>;*tDZ97q>)9>dYU&SoQXafhKD7Pan#X6gWRutz9AJ}vSR`{GIbsjna|a&A ztu{Q4WlEikzyIeKU4H8Di?6)twU-X)JUS=ZwnFY9^W=Z~kFYe@T6($J1u#U%`=4UZIDg1i|Y5w%Uv|$9wqU-jJ`dcadRf~BUu(k zKF>x5#tV$itWQ=&&tq~k>~G|MCO)yD#3N|xi31-j4Uez!VF|BS70{TFe4_8<-9&-y z><3zx8>GuOy|uVez}Vnk@Sy0IWqOF@jfZz7nRmot|3CdTH0pKK`Y*dWZ$EL33=?$c*V_Mo`Trut_mkJXZru4H zt~ucICkHui1zF{8kuMD8Ia?pky~HDX_kM5~``IKJN#_YRjVmYeO=gig%~PUow&}1e z=NZ%M{E3b$8kLT2<0;kpyC(2^=c63GU%S+q3x59Avku?ja3z_WrAX{vMgGirJxqVt zO)XOX*WQh=J(gDfEk1jC^F7BCeY)=z{p`4-3#ztU4t)9Y)s5x{3Jly92E06L5@n{^ zN`;sHy}U+TeS_wdg2nq8j$S_c(_YTP_J)PV%AW#kd%w!WCQad9YjpRB<5Pt?gRl4c znGT!ehbJ`nsKq`FFRizpt(Elu7L48UFP0TRUe2Rm#swaLxP7{NKXx;f}1= zUS+0hZqH=Ra94Wnd{*L(!ee$jx&EE^t&gui#K6pWQbvM7Ktp1?$hX85m$YwXEQoBA zKRdx#>s{LYjk~A1*1z)qoD(ek+00q-hE_W7^SF*Al=?BnI8V2+tF_xK+O1}A+!|L~&S^TlkTks+O)19|&njD`rnMu7V^C8##C;Ls#UeA_)IqOLLukS2-^VVPGVDa6t z|E~kXcW%GXOoogfC0@-ZKDgSId=O+3*J9+#7)@1{rQ~KJE=OB z@|=`62WIRyF*g@i`ZL#nkta#BH{#stWKjkU#@G)U+!4C#C%nA4h~chSX1PGhL7)0& zk^a9C7WVrOFflcKF=k(=^UN`bPKiQt!zrw`Spj4+jHInY##y)d`d(Eemc{R;<@Ov4wl8Q|ydi{j5_ec5*PyQAprtb@r2G z{ZbHO^ly`b*lWe*zm`q;+xcJINuuzi^U{oMdpLF+^fQ^d>1j@^+S<-3F*ZLdVw%@2 zmiSbmm5}j}(Q0CiXR`kSC$*M0eKG|M%jX{C$-aEmc+c*>t;@`k&J}EPTks|Q#=H}} z5qXY}&B9i^bQ4!dWZ-jT7cfxTQ6(@#;?x}xFXkDF_nDF=U8oUayTZESzw86U#j1t6 z`@eEJx@(x6F|JeKVH7_rz~|^WwZ6$&*HVAyZ`=OO2cH&HZ<_n=_+g%j32a>LEB0}- zrL&)wt_s+&(n3Q$Vlq=_x6Z*61>weaT5`*|qR$)|m3<^pLQVbko3Cg~9WoODh{pn%a$RL>f+4p&|?T@#Rbld!IRV8fu82+aoEO@VdP#>B$Qo29{=p&ITTdo5|Nyl^esf zCSCaRr!w!|7JrqW&F5yY>r}_FziiOd((amcGTf8rq1p+XD#afXhP-V3+RvFZ^esYk zVqDA?d7E)8;Fz|)^u+hVu0@kGvmL#sHqg2E%)xY4Ao;NuU0sT$tWz?YA>#%X_os1}>i;j} zZs*_sy|DU`)u$)C_nCAeuRLy@*RYn|WdmOoH%HwZ!z&uU7{AYNX|U(xo>3FidB42Q zbAx`b)9--l1J8Ja+q9M&u4du&R;cM+yL#Vsb7sy51+QlJ{jFbR_ueNO<#M*xp3aQyeWpF2R^2;xjd+xB7foH?Ug+n)*Y?sYwz6r z+htLpU{;$xn{`kQ>&?vjLcbS(p5iy-kkD?SPx9YxrzZNEv}~6iE&bFQgB_{6Nzz^a*ED}PZ?lrw?-=Mjf{_snM0bg#PlqLjn_ z`%8oBeLCE7e-6G1=X6neV6}eb`U}FPZ@!f99la_(dERc*C)-*xE-p5lCz8KGe_9*U zt0Mv1qZr~|+`6|uM|`5^kKYgSgHD}4VQA31vgqH#J1eZi^Q4wmXl3(CTt2GJ;ke+^ zjknfvzrNq@W@2RP&WpaR|FHhxvD3!fpXzgu@&!qp< z`CCV94cATlwWVhd8(YHpe}>-jJKp}fy{26z`MhCqeqnt9v&4%)uLHKsWpkyCmu{Bv zo@l(&MYe%aU=qXZJPG@>!wVdja-6);oAqzH!;FR`-zL?7J-dGGII_b{hquf@;oY8( z*PqS%TmQ@QzOa@DyENCH?Fsr*-Pn7l?_+LYWN6ROq*u3%-=1wqB?~~AbFmff^~o6y@k3Ayp^vk-*|PnPW>CZv|xTf*){`5 zEq4oxZ55^oUcq0&s%n-CsZ{UZ($f&)_V>MFRRaTqivoiU2g6@B#ch!i$G%RLkTIU~ znX@wpp!9d`=1hqd1V&BTHRh8t|`+$}7Yi&!Is*%Bm< z$e&{f{>iOb$*HE)k>f1!-)YT`@F@p`c8P?y)ZV+l^__dtWMR)w2kf6{v3!5>+l%$I zL!6m!2PCoKX?2Ss*Iq#~##~*N$ z``e+OSuY%ZbJrAU_P4S}8EubWlF#M0A?en+xe8h3x%WPl#m~7g`*8l+7DkOSHpw4m z+SzW-&%{hT#6&KBh`n%o$)#(bjM*=2kWJ56DX>DCxz3UE$9$dkEe!KC8!xnp|6@3- zsJY%HO0>P?>&)k@c|Vk%|6BCn%9@4Vb_}QE)9rc}h=)$ANY1}8>)!{}2#J}>3#Mc! zovk?T*Z%Cl)7f`;L^;2hd~X!p7i%y(x>hY*B127k)|#FRkyc%6CMBNGeH$JAO+Rww zrYBnuyzMeR`)he%XnXV>(+TPJs_AdfnKrQg@D`nxHCy^n-^YNZmK(m>-rOMeskLGE z*ZWVrQJ{>sGhNa98k!d0IRg8R{ojK7vJ9#)T{9Qou+`jsr96A?Ye^x(&h_geaPTp;52e- zU}#vuz`?xB`uD!Hr4MFU_Xabl&I$=qn&idsY+9n@-Ii%qCpq{&FbFW8FOEq!J#*tz z%>kV&RrUvF?zlI7>w)_m47?42F4n7l2l45fGCc@d{@~QI_POm3tw5cenD9%FHd?el z?|kZ6lrV2<*A(_*rk3gckIHtbXX#y#X4rRnAKS$*7bI=k7!ww%w8(9^)A-U!v4d%S ztZS&YqRI*V0>hk&2mK0rpS-`eU!`44^M!wJOvCT)j_2}PCX0HXB_tboAMrb=qG7Rv zhn<6uNlTn9aCU3b@=c7#)wtv(UH+x>9smEQqGYw|?^oOl)%LWkPe0ps_+Rp~s#iPM z9)4NC{BerXasdODHF05^WZpBn-~7bpaj->)uhU90?+L&8g=_KF->cS%MwoO&zmjfp z$eP1__{~OB|DJ~-Mt0F+^*~5Z|9?q){b2e}b2naP?7BafZ ze?wZ)bHnN%t2DFBn;h$;u8VU0d8yTonB zcPYulDTS99XG=@Hhze!@5&R=bF!I^^&xaWUEfu6(V<&bB+Az+&aLBAcbGLK<-v8w; zLb_k4sR-|!m!6)D{W@u$EWYL}tzL6PWIs{|k0}-=(UX*%AKoOLeIAF2Di?2V8miKjVE^Yh6G-KK1i~E01n7q#8_N&`Ex3||ZsjX}_{A|z= zBiP8<$tE)=v6SON@`Rwa&o|cTOV`vKy8ryFt7xdE#ezxhFD_-6n|!R^e5T3SCE$Jb zMvvsM=0`$%S?`qX@A<3CuXvM(K|8hQ2AA@l_>i`L8<=@G4n^2H{gpCs{=2zh#*{<- z3A-Ya{MCAP_5In#^=E$ezMdoRET*12k>~Vp*6IuGT4|P_9&_%q*}nKQmzL(0_m4Cg z8aRLNR?~kyXU&2oCQUYRzO`0I6V?PXYp54p{`pP8gHe3vq@xBBc_w!B55rzH!H@6HZ!i};%k8eZDu-1;;IW$h+Q)K=4d+kJzmd}{z#zcN?9{`s z;1Q!{o{+$Mei^^*KOa7k<=gVkoLSw6(_`WB7jO2mPh{M{5OYQHM?(GIi4&JPr>yz+ zP&IA&yO@6Q$jk-%|EstBpKrZw>RvsreB1x&K0AcV*%$B7+%SpLiF1~?f#;-8(om#!-%L<7H4=7Ao zcDPJoAFt6d$%?kV0`Z#PCK{_FZ@&G((w%f_7DHnF?l98>@`hz@W(6Mk|FauYBz_IXQ8kBSG9MX!d>@c ze%@U8CFx9K)7&rlrEecJGH9?0a4|T@IT-ZoG4bWyU;Xvr!l^Nd2G>>uBzLjBVA)$B zv802AZNaXmapEgf^Jlg{_n32XsnX3pi-uU;=9PhLYR@;`-yOR~=b+jCcMS8kU%%e& z?!n`DA>Ad1p|Rl%vp~ZC6F;AI_jrD0Y+!H@VbDHsfWbpZ>+?*82!Wt~j%^J3KRYf5 zEL{A{Y~y#G0%IN~#?J!xbB#nxjj9wU{K+?1Asf_{BE={3;<~*73+lczqhULjw*V=#H{b9WNVb})2^j*c%J~Hn}4Dk+W&bD^&Vv=EHFl>$% zEUGDbxPD9K-1YbRrYkTAuqVf>vIuNB=E-ZW_Ea{3({a^IV`gUN6;?l7%?x6fxrP4V zvhO-^%d_lP%k(>6k2rWXOf_Kr6Xmlz=?v>@u_sTtl;&r?Q0rjrDfumCx_81a!wr%C zhn^VlSQ*_t`PM^Pi&uYz9gAb*Q5MC7bsnWspWB&DjoS%O|-+|1PHtYsrsYwZJ0#OVu zth)D-12e)!`);L4ePc`fcr{63{b8OntaaCIB(g1}TFlw@9*FLazaaj%VCmhR)AC%F zYdAP9WSD6F#Pfs8R-L+tjokYjxu<+Ot+#H6$c)g|)%8m>*8j9;e5sIi+{ba%wfbaijPC0y}uHgLR11_q+_?91-IfHeZ#cR3OEw}O; z85kMwY-nIfUGe1t=Yb+684kT4H!@814Jr&1?q3z#rM)mHk>HBWBShu){l%O0t~WjVjEtF-ff9^Th}`M z^ct6K3GOTZYQ`AqK8+QhlX&6RGGXU+Q@)B^<PCf`wM4mf+7H9lM@{e9jMkXGI-T`BsgJ0&kQM_c!ei7Bm>sV$0qL+OZ+%JWj)uS zMXMf${w*-=>)yyWYr%?2Lq`7gleJre4rlN@dg#1{-SGOgB?pclfB22L^!2KB@*mc+ z>!h(39{(!(@6h3jre%DQ`tz9t8oeZTBpoSKlFM;c>kIe^?Nwe+T!f)sd&g9 z42hTiP*Ae{`hyjRKjjLBEij7_*xW34@8TP$NKLOND^|{SQ?RkZwyy1hrIUS6=d5GLF0Qegc*{^DUE#-r>8CbKti748 zcv1Gk)mZDBLgH8QKNzt-TE=>HzsJ96P1<*_e0t&EXP0R8qGXf1!FQjDIagMOeJ~Z$ z`mtT&%7-ahyuXqSo^Xf?BI;5G7-Kf(pPuZ z>`;tY59@Ka1oM5oi+->deu&flT-WxdcdDH0^@WwgSzDlT^QKT&7WJ!A?9GvZeGp&PqgXVrC+lXuZnK3f6wFbzA1Wd zN_D8(^A>JKSH?yW_a3GR77Ewi@I|HEY`kzOwBeMUkM4ylTax)JYOXoib6bZsH0=8s zBJ#Cr_kMNlPj)ZrMSgye|E;5$_2|VCUJZuVc3(X1F-T;e|0_FtZJYSFsj=49@s}Eo zR+t>%;b3Ucx^nrEiLENj8u15;^*1(1KC}xv$UdLNP&3}oF7eY@jeku4Qr^8^yr0wH ze#0K;*J^?6$181`Hy_yc-rnliY-x@9?)bH-d@>wNavuyBtQe#PN<<76>dUM!bA9lu zaSK;~{N+EQ%NzT{?}#s8%u$=bF=tbQ=7x)W>;F3IM`=wJv-@*m^TlP1yI!Rjoqf-9 zsHaIFzeqT6ZQ1l?MfXfSwQ~E|zRvb2`h6^S;`wvarnZJY{ro^Kcfg#2TR)p{5M68Ha%TcGin$>QPTJGEzhODD|y{7B8a*5Vy=&=a2B6)3MX%T$ zglm%G>T@RfTc)1i`C|4zGRjujFjC^d92f8FzCO+?!e2-VialDXeJYUAf;IR{_A~_x z0mC_^cAMhjzqppZFGyrB;*jlGHq9t0I#p5KR=#Ni`v02r* zvEllyC-+=*dRF$V6ZF1Uzh&wibCt>LayJG?p-%l%zh=ptQ#_^$2|~5U zc3%(ke*LJ&?4p*?B4%qjafSC{nrZIoD`b*m*)B~lU(k2D_M|rR&xpC^8R`o!Px~t! zuwG`vu`31L4Ts)*`0-*hYyQ7ZF_v^bgM5}-T;FOsXE2^&w%MC+vP?TJLix|Mbxu=) z7`pd4Y?^xJ!=HP_7Pnf}ed5Lbe)v$pl_Ng!D*tTxmCR)hTV6em_w7uh%CTKAv^x*$}+;n^1Z6VS~v!2YVl$l3CUGdzP__e*eugbGyV7 zRIaIeCvU!)XM3xk$AEDy55s{Eu19!|uqI3n788q2VeL{BKDcZFtHz1+z^D$U)m=<& zewQj2-3k?XvSoAU<-ghO5fU?Pp7ThR7ORx3JY_1`Cigkk^4ZjKNreZi|3A=OmvF^E zhD|M@(EGs*cOB*p6*XZxMU8(0=V>jpP<;N{l~bqLvm!R>e7?`~?#%g(2Ur!K8-K9i zDLZB}x4ZdFRL=by)dHp+OM6N?jz737^oVn2Wr|i>qX5gBju#u=Maq8v!uwNDIfa); zZ%gq5r=2oY7uKwlkT>UL+{>O`AOAcpXKgTj@F(I<_fC#r zgUhHLx90ShC9Nj!t2hF$BwV=ct7$n zzHQ0gx#nMK?AKRqhZz{QE)0EE?)XxPaq1y^`{TY3JEs5Ms=B4YN?xgBZGg4Vt;WVa zA<@Kugo9S>X0m(TXEmmTCtv^i{B=a1h47sZ7D@|?Px<$h81j{K{1HEvzH#^WA70n( z?L_1XEKi)dZXmlOI(^1IHV5<6&!UF@|Lk{NahGJf>LZwOz)$jLz0wJpf*A}ik1YB< zZ&&|o{%uO_O^#&;cC_nRC~M!46OH;G&T;MVuEcV~vn<~jr|2*Lk)&|cbwb)5&WT|z zrMIuW{Q5O8Ytme~>)8Q`U)Bj0DjZ%cSUuC^-E*;r!s}M6ia6}gthm(tDlfAxl5N6M z{r_iIUE8-_SRP6&M&eei&Gm{%>RFa_ebQ@6Jl5IeQ@5< zeDC@CMD1Arpz__%e}7;TU@(v{5v)mkx?!RpqwUs!FFXv3xmI$$?&9gcET6N)sKL{v z-h< z+Xb9KexvlUy0C*&@BfeyWH2G;*8CLj6PhEv9NIx{YQFxb#0|hDc}#g!ws#8a!)w%A?S*n(N`ieZdAY z=C7M}+&y>g^M1M82Nqme<)|RL{d93s-q|^&jsZ&H_X1MFS zpG4b^J2hV}WLz?C+kdL;X4#>xD`t02hUPTfe`**g%I%;UwM4#%Q$fO^kw=fMxt2L^ zMb5!FJ3hFEP7^9-ciUu8!N1tqa8}>84$V84-193p-bnE3z0mRH$3dCUDp}QbQ|T`s zzs@eNa8WZzaC4N+pO^jbTL0AaX$}kKBz@LP)4s6$dVK8lJZ8b?MLRSS@0~oIbn$BI z9I<;j&l}qXJa&4S=LhW#>a(1{`d>VD$_dMp2Q=e+gUffU(ONNe*4N#fMK+PU*MH1& zVmu(MYhbhF&?vd{G@+4Kq0rvF7X*r2gb}KVJ*pnM-^D22_THDqu3XL^(%*``88sjJ3 z%_>NI?Vl^AVHe%i2+$1INJq3ypA=SeLL3XDB$EyAoqjS3P%A7u7P zi&)6y%Za-FGk1&mSjpbUe5>Wju8G^78zcA|t7foieLes8xz26bjSf4$KQPYR7_d+K z=js2P!P{;(hyFQKq`$t4_wcj>2^oza>L)U8bPsP0fGys+za5$(}lO{t$}dZhR7`_}6LQ#$Re-A~U*9K~bS7_E!Y+TyL|ftd0|$-@I6Cb< zusnW8QtVg7yZ@vnepz$jKX1EgkA2g=)Xm3adfQz-_wkrWvhOW#TwnT{`9>Q9qg=5= zn0EUIgQ*+7?)Y(jDPKpP!OUWR>J9dG9u13;ZHsFripXe@fj&xw0hwZ7I|p-5<~N(mg^pK zo3eOYj{MKIacoHl4b6G><-ep8Y*_;{H%#S+tSR%9t29bigS47UC2LnnoHg5 zR&v1Qs)H6285kPYKWMwwy!np;ll3VMmQ=f91^vkO%?mHH=rrF@uvz;5(bre|mR3xQ zK4dZ1{=Z+i$gvf3#kW3;;rsOe-+YeLV|)KEn>VX;`M-;^{wcSe{~Z zWk}iKR=&8pt>}r}`7*I9%=TQnzeg@#cmI4jW84l^^&di3C**_W=YEq2@z7Cn2>R$C z?OuDWApGK#&wGzgTr$@&?5orPo<6~VhQ;OkR_Z347rAj^gFM??38y~=3?(n#a#SZ( zUbyBaSZ^+}-ajx^<3*;T!$LkkQ-0o=B4c-AX3`l) z)=a(SUD{{rIbuKXJmJ%j+A+Umwt__wqrvR0dFaOd;V{ui3E z{Qbr97V$CC)wlUd`%&y|G^vTG83_D9thvhx}GK43c_ z%l6`N_cv+Vwa*f>Sryuv&Q1C9vg47S$eAb6Yl^tOeXz_uY@zTrb*8V-fs!*SDI(d+ ze>d10RBp=g=44ts zMev-F=keEyuhlmDo#*>eVd>y9rAapU_=Wom4lFv%#Cmhl;w4undFe5&Tapvdu=q!q z;F)3u1AbA%t?BH(3O;(0SF?gzp9rkDwOm5}ke>7&wSpIO+jJ%shWtGzRO!d+&@i!u zp<$koj(LaFmpzZN6GU4+Jjgq_L12o@x>im-14e0$TcR_#pRJH}SnxhS?aL(1BXQ@$ z{OSx%w|Cp^S`qldl~a^qeP!4F7vfya4skQQU&NQ(X5zeJ;J{GAEW^^1srfvl_k+oS z(|b4yb#@q>FDqhOY@j3I$#Crg6Ic9AVVmX4dVbnm`)1*NadF@hotufAvxU~wi+VST zJ2~e5%r#zoKj(*oz@Z-k4Gayd8yFgvDc`u`Wx%7nYsRMK60>*TuVB{TRDI~T^Mk`@ zNzHc7b5-wuU6*U>P;7fTf7(ti?Z%}|=JPJ;Jmr&#V|8;lAN%P$gOP~j%O)|0$qbAP z4H|wHrVPP*6Q>#Q?8x^^I`Z_umjD)Xo=*aLa-EK6r#8q(Gk@R+_;RDJ#Je#%Emh!2 zvR_^-yrri2fa#Ef4pU+)@f7hJLBek0CVC&S|X>`op3y9-UH84q81nKN6%sOHayNf|un4m(becCFjC zsNjF%>;KN1`!pKoYk#n4z16hf{>MtmC5Pls)o;?^KXUHlH={fJ7y9M}GCwZ)zh>R^ zUn^EH_sM@gd9kT1$$Zg)jLI^JX6dzCP79x7JW$Ma$NTfQ>v?s8+|?gMx?|OD-e@g; z&ug)&>YP>YwTCg`YRr`fHa>67IDXv+X+S0DhcJ60}S8G3bgDyU}6(j!)S7v zV_~WHf4c|$M~w_LUTxtoaQA*+$8>^Ah|OYsAA967745l2lfy+5FY}1je!Wn5RGQJk z{lT3BvW4?!woGhX;r&5hs??yi;lYG1J>4IQP7i-f@BL7|N!Ol5@WAaOXNv3=DR$xN$U&6lFVyZmr4{S-k$3116Tzz+P z`03L9KQAuWw^8oFwMA||UpH$oh;nBcep*vf&HPME)@yd${6iC*npo-_3xDxulznP4 z+_G)qQqRd(WQ!8zHZks5mSC{VX~(`R@{TGS)||+fdcM6`XcW|{I+i&^dr6o%VmyJv`(~0uupRAl1Toen9B4)SwZRS>KDRW7E;KcN5$iHxvF4Iw7k&StyM6LNcC$GExHhIwB|>` zM_IMkzukZLF}iRY+-MILF)({OPjsv9We$fq2N@V2FcfvJGvBJy_&4qNgKOPV>}m&9 z-a8&r&{<~RnsHl{`M}>0wcXsFy>IVV^xaR?PJQcD#&<)#$BZfGjHB7VV+@m+HF#}R zMN3%{4fbS}NLsKkFex%9Bsk^zoDk-G_hiA{&_xeUSVS#c(tZ9RugA2*avCh^zYflw zD01}fy`SgmnrHXuuaa)RuAXZ+%YY$#%H`O`XCmdAXCoF|Rg(C9_~8l%?gn-)4hD`D z50hpIr{7w4ov&4AiH!B3|LrP5F>2Z^`&rH;Gp>DaUTmLq?C7!$8S|Pei`?#~WEcf^ zr1Z^kob~ZW^}Zdp{w#85|LsU;WPG$|_NM*+bDx<{{{JuI{;&I&@3VWozozQY8EUan zu;r_FfjWPiOXM=E!Yc*Ow@2)+oWJz{j$c*&&K_5;+Fy7up*7F+=ik<)hXS(!K%tS1duO1Mn;7ysxCkYQBaxUqkV%bbqa6jsfwkmu7v*$ zP2`uHVtwz{((?yrNI7l@kL=AkR4Cud#U7(_?#~@-?gp2G3S0Icx*YN-&PgD}MNss* z!VXD;f+IT^r>!}W@R#Ru!i}u8zR9AGe|2Tu2%XjXT3hewT2qD-way7DJ{sZAT5=^e zq;5=%(fH21@aWU}iHyfTHa4w)QD5u!bd}&4mnSnCngR_IN~H|A9!N`FW@0yHwt7CB z>EUF)4J%HYJSbPuKC&b3>rA+e>Qov)jV%n?0xUYm5_=5CqMf1a=G>A z!?)$nGc>JOrBm;HE@GPN58bbi^WWdKj@oh1p0)m78dJm1a@qg?kMceEvU>T-rzt#D zJ|{L*gg^aXE9JLu$Eux=s%js-QfvLEEzZY>G|&_uKoc@?S0781?^Ou>3F8s@HMX zH(UsR?V`#zu0nx)QV^J1#5M_2dGRK3o#pLzo}pMJCdk#^wYhuNRI?^{25 z-Xy zwu!br|H8t5yIu)1zbJXJV1}(=$7|;!nmikyS#8TVUe4^W!I5!F=lYr&KBb@bUs z?sY9pb~0&Q!a~JDj%;#Q)~B$POptD1xVNXZG3VHu{YS%Hr&){cE#0zbnbA}gz& z9*m9Kc`nb1IrUpR)D##S-@Uy5 zxBgphKcDXL)rL2I>)YzYY0GNowoJdDztQDGe%P<;SMUE?wlMzMzFo&PH|`1AGsFG$ zdf~5GD^$-M>wUgsU#n}a_`a?Q;Vg!S&cBb17oF1+_VjgVOO5jX)rP0D7i<44?5;^K zes#4o?&B53k1>0EZ{)X4Ie-7+oL{yPOGUS~*7k)@w|dpCYxV5&>KSWIG;7MQtzKJO z+7-XHa^L!|Qw!!CU{JiH@!R9BW!=3MBJZvU%yxb9;wDGNt?XA`?HX$i{JP#5X1Dcg za*Wc}t8uNFh1z;YweN=?uP=?be*Itms+{o0*{i2ttj=3>wvWeEf9(<{u~~I3*DtOP zZ?z2H$g#3jIfZ%u>UZIZuKP4ESZ+x6Pb{yJjQ7gCa@Ojf=DU4qHmBpGeqFP!Oqvq* zu}*E*@<5j#TYY}TEj<19?8<|&*=PSGF9|tTe$>t|S!Ckvc8$9(A6CxJ{I}3A<}t7J z+sN2CWy;s$ZrtK~3F7KX~eW-FH-WO#G^^A@8_x`u~7djgzGY z+fQs0%i)Z^t^ccx<9Cs_{*xcUeW`)&lTw1+CDcPq^gPmDG5r(Zd(t6X@-=72L{qn= zEw3hj{9|;SRsQO|kg|!hUq?LsGB^9o`bRZh`>dY^{u95k^ksG2QC;!p|2}JfkGkJI z-K_mjQRc^lgP#se-T&o&_jw7^pY^v7m1Ny|Y`UhVl*?sHrl9^UgLOK;U-s$Uc68nJ zLg@IDn5J2r*KcQTJ$!u2HKjb;Gog#cW@v4Ax+J_qMV~8@ZP|qX{eihDyuZ2QBp0Ng zIa)dQ%Mst`yDzUFbI-of_w3IK?>QV_&RXyt692{Oyr>}1Pkn{<-LunMe*ORaE6iq1 z@A;O58Gp}-zQ$P#l6Uu@7rgf_er^C>7o#K zlk5HKudkLR)Ek=m;EVsp;Awa?bfoBDcvxa<4My7E#@#mLTY+D9YY^d>b4 z9B8=ul|O@BLj6;lGPnAbr)sH9CSIFgnhDr!-aO&LdZ|XMyvCgFHPTs$%>T?=w*O|C zudS)-n3n!ug8%We#HeKPHM6#*EjV$J%k%iV_$IrYCx@4C+DOoNrQNwJ z%JYJ|)DB;K);5Vv@Q$mFJ+~d-%e^n`g(OeKN5w6e`dUDh?fZnkCBh#WuWj92yHjD2 z>{J0^t<&mN<^Ni>IS-y>mF4CFCeO@vOy@nPSpLtx=gH#3U01eB+MQ~ZU-FSPdX8dR>GRhc=UkO-UnlFp zm$Yz7$J3OvK@YrNPEEMG>u;^<@3JYGx*LutPCqi$m^M!{t?1L>;-$ z79amJ)v9W?uWaV^VwV9u?+4j-D^WY6@ zt_NR~$(5$TC&ucsC>-LVgU zI9V@LG-~bYkl*@$lPl!@9+Y4A_7Ia^$m<6y6qtgPI2(>n^nK`8y0Ls_$fgH8?e>R6 zKUye1t^LLSw@$W0%5lYiX^(~mqo_pb3Nh`MQRn9i^qQZ$`0ZyvbLTP(=l|>ZK8kB9 z)_#w!{*%XM{5a}w%#=127NaNU<7e}oIV`tu<){DtEj#~CFsn7$6#Jkd$0|nPp0?<* z+c#7`9{M|5OG&@xg!e&tHTJ|sf-_a(WSZaH(|=>wvwEY=j5NddRrl17{(L3!H(qwS z>dEMz8@-*UUAr>ZC~4QK^^d=AwwOL4;lTM*2DKNS{@-KG;+rtPgmq6*S&mkITYUAT z_`X$7;|~4Tsn%cgcj*lc)wDBVoV!hpZTsfmIMUH@IaJ2yMrKDQi~6tAf-6^wU(LJC zGAZrHKhExL^WIIZP*rG|Ao$m-?b*%xq)+Qbq80CR{)k?wFD%zhg3{t-5<@ z>C$;0Yg=lwcHb$~3YP6@IKBG;|Fp{bdk5WSB`Z%2KRW$?z!cS(>3()spX~3xAwB89 z@uZR^3J!Unv^ifg@#?RCxIggy`Du@zuRp)}hp_Fg`h}<7FQ0e%|Np3cTce^w9o^^M z{gZxBaowMzMV#f=4VRh)t1H_DH5clIgq>XU|K~bw+j`EJ-&^A*>F0e|C>6SSJ{Q;V zv-c(nNo>EFUdSRLcCn1}q}{}qJ9&=p*8jfrie63TWDZvuzuq||_3Nk4iJi?_qwLOk z!^ZLJtk*LOSZeQxW-kje`g`AD!G#~6IU+nhS!d?#+uu~&UbFc=$DU7B$q%CqS|&be z+fdK6Q+@jVFMhpIotO6IuUH?yo+Y?YZJx|@#ig&cS?}I4T+v|80fck3VjoQ=v9b=Hm0eAKb!N9l6$P zFDk#jO5pSBMSHpa?GK+A{$VL!h*olb%b9l`>y{TP`Tw$(kN3N> z?5fx7M?V;D*zOQ*@Z|g>57)p(#~`F9JWI1){nM;^x;ETpulV1>nn}<0Pj4@8W&S9jSY@L2{>S6~ zr@E6|98UlH#*zHy$4V{7T9%p}Nuu$~Z`V&h8UFp5VNuNwC1JkboA+N?nELn5gwE9| z%(tg=|2?&T@A>}|rrKGpN{&&ln*D0tf+_1IdRX);Sr#$B&Tk5eJM(7o_jP_z`DZuqot+w5*w1|i&rJ`?Tu{N_3P{F;;*mD*UA3ds-AsU`pcThR~EJ=t~n&WGClIu zPuu^C^QNb--;y}#Xo60i+QY5cFM4P59**kz?4=g3X=M58y3pDX-O11YKRcd!c#+1_ zy;U2^e?3+8dQ&u`>0s5>`&sveWBgxFS@K=Ciu=<2@_@_Tw||$j1i9qJ-@LVy)#Z#e)3~*_PK#buI{j6tJmmD}eM{GOgoUo2akOrqYSQ1N-7ejG z!b?xT*!cVB)fwLFuTJ+__c!Tp)!v9h-o@G9U)Rn4x@zC`*^ffI_JoIVD&JqY!PueV zQ&s7cMuk5g|KEQtUUKW|zPyj8w3|!vf8D)jt>^iQ^Xl)2-QoMRc1{ni{i3w@VeIFt zi|jM}VxRn2e@-!P_TT;CaeF_x)NlVA^JV+%ki{HJ6Z4;Z`pM_Wks4uG#f|zr{3Ta{lfs zyey^pgFmrq^V@j-l^hY_8}3!Sx>xh}<)5NQlYWOR+FP}1b(r17*e{!9Yu~?~bmQ0S zecvmZ3)@pov)+D~*HkvQW2JS?(k&IQ&Yn$v7F3n`BW|kJtHo&>4Q8LH^Oygy-QjS- zv|D>`OqqP|czxban@XckwZEUQHgOKxf9v4-1+K^M*A;%Xs@q%OeE*2I->!v`|F6YO zP}zN+XU`O6t@zm58Jn)}{!~-{g)!VM=qszc3At(^X5%gcZ3zi_XyxVgE+aXI^! z*Cl@s@{}1*Y&~}J`Rk}^mYTY-u&4YHW->G0ZOx62__g5X^sGOBSBI~y+w}66^6ss5 zv-e+L|97wbgWhi^6ABM3yfM3E?PUw+@X3w0qn3QX`tP8^?2N9ztu_1P~I<>t*Vkx~Df>VJKMm-FAo0BK3X z-u?GqY&7w>OZd-Cu9AB`a6vchH0zP;UUDtUGKduPK%+qTSSU7~9x z`?E~AZ^FF7l@-+?Qtz~Ch4oMG{JegFTiMpaPlDUOaa0x^xo$q=_pYypzt&fMa8oXu zfBoyBJqppG8f)J#p8h{{YL?K}rSrvKoj+gxa{fiLFX9^U@;=_MZ%3&*6m)0-67Ug&Sb!=19wk67|KIzNi7i)L|8JQ=q7gZmQwW8tkHt6yp@T2@(`-){K0y!+{`(1(-G?%qG` z*5vD7!#L*KKkChS(4c*|Dt6V?gFIi#pHK6@^tb$V{{Mr5SLS<2n{4?V_E)%IGRM`t z?)$5Me{p|RP^+_R&Hmq;i)|}ZbhV1s$NXRG`Jiuh7;o?E&B;Yu_pjge`v29=@E89! z-rC=?L^s|CHtPU#Spzw{@|kZ>2wVP1M5ASgw#u>Ie|KXgT%eEfg%`Tzf}>(=VVt_@#Xw>56-#lNe+T)^cHT!Sy?H%j3)|BV%n?7x8)ZZW#<7I)7#_rxFhM8ekS2!)tP3XGBq^QS# zIB)_(#*OLgCpz-hI$7K-nPzq2R=Ifki-vPE3bI0*Snm6UtbN&--#)k5yY6km$=BO2 zls$;Km{O6|D|jJuxo1a5*fJi~m;5@tj%U9|Vz8)Ny(YTr+pQ!m}xeztne?xULfeyb3sUx~gYyv(`_Iz5hDm{QcG936tlSmKEtA?by-^P> zCUQH8UcLJLenQLQh7aNV?+@=gC~vSLbyl%8|7|1jo4tKGD@y%%FS}jZwg2nkuU#>U ze+5@xUH)ueYTobFYk%K<&Hh^a)zqA)fj#w~n{uDdy>#$?qMhb3p2}~H)g>Gyiw}g@ zaeOP>BcFmM?&R$wyyZ2w)7=LPwZ~f}Jt*6!$ zXWbXRbv^5PV)v#j$*XqT+7kcYc-gz4BXil}nN#Po_HW(f3nKYs~^0*E*iwPFQ_bgzwdPU zzpi8Ng*A36mhZ1!^z^2ug@fVa()h{G|6H5&ZR^dqyK{ek&H8_BcjVRA(`!$@mOA;! z|JCy+r+!Ab{CxF&@2tz|8X^#6Qy?0fw;vU6tl7j^!_W!tZAKDuqm|3%*F_kXOo)+1r^(&|%X<`2J| zgo2H`wk-Mm|8j-i{A0YkUvAiacwS8Svi%<-@0^vAG+&?g;XwHtp+K2m%wM{nO)-eS zb?a!6)Akn|YxTDNI(RSs`jcn>zTqA z6HD%AY+E0;KI}u^Udh#0@9&vk7xd}>Y`4aF@-Y*=fB%2bZua(Y(EiwaZ$DN4<@m?(Qt$fC)$x_JQhFyUj{mq_o7uWCcS#BMSxwCL2wKDH`V7yi6yY4FY;pBwL!44K{7=zOQFg zTxmM>n3rz&?mN?3UKIVmWOu*$i(S&kPw#Ke{`{|h^`6wt)2CR!xWBn>kDcvQ_RX)a zep=UDulatSwMzPYD|ipmMr~Cb{y!SVi_$vO+W&FEuVpBu&{VNl? zoV;re#q7_$@=tBy|9`e2rCPzdf0~)t_w%ng^8XKmTtAz_;+hqj+mAlhS#acOD68gg zlPiKp|6A_n7-bDhCE&s}Y7cmL0Jn>1m$MecKU z!};MKC$KZBh)mHjkrre+;jm$Xj)*nup@j;aA;B?E*6isFa#?YDWn__%6@O#qgDTk7>C?BKS#7iR-bofLZe?^6iii=l=l#ga zvvJ8ggQpv&XbSnYTvvQ|K(pw8`)saTK2NgW-tAs+MRn`Hix+ft7gqnPFO{42&|<6A z>$=dlhdH;2Gt9rf#M#WQx_Z-%qA!+lj0)91E;DMj97}8caID5|Vd|vcQi`^vqJ_89 zt>@elxG{}ut%R55pDSyRCf%R6e>U^C(zKHt%?*rdB0U~%T&zuv4I3}yh-me)94uJj z;w30??dIA0Mxm}x*Q*4iZ0%h5&;HTBYPY)j)@S>Fyk7p}hK>5eSMy?;FNRhbZEBbQ zzHZ*RE3f*JCfApG=AZpOFZ|Sg=`;4+$16`6^mtyV^ZBx`)It51MR=6P`Gd3GRH?}S zSXy`B?!3hHj~ag`*J}1I=Zk(?ckkDeSH*7yLkhyV-OUUBa`|NKy7m0Uw4%Gah(0({lJ!)y6Z}>ADpw*MPlWoow}Khy|!1@ z-S61`vTwlyMir3>XH>X_m<~29i0}}TVr^ z|NeSx>5shTXV>4o+5g}6{>~%$5y}8`n>f=zJhP`n^_BL z9=VpfC0NOFcSp{hz5J<-|DEuSg|+YRvAw?A{&0tW`r$e+wj`~y`nC-ruDfGC{ho8Q z?Cz~yYql1ws(E{Vl>%d^L2ALS1eul2pH?bw_X>0DY7JZ)`tZJj1*4kCgf~}&IS(Cl zSP`Hj&ehto&|yMLzTJ}Y%F1AcEmw;VEe(C=tsQ#c(cg0uuUWmm{C{mlcJ}}9+Q;sx zbFwZkZv9huEcCFwlY#$3yY7wojNr! z>33L~*XB1?*N()^kBiu*S^ITr*krZ{NOIK><6?DcN_3c@Bf-tebg*H=1RWD@PNs#4 z8Ub30nyp%)DSPg1<6NZ`8x*Q{RqKcRqr2wfvNihxe|&#;Y2~ZpRE78l+j`0?8griu zT={9e|4HroZ8c3I#q#?Mc4i(f_#*Xii{9iEskE|38xyOqJk#9x?R?`w(K&WU=B{77 zX;D$)){E?px6~daIp)ADO(Ufo?;)NL~e+Fy_&{5^&IHYW_Z@+HQ)ZN#sR!!fzH08>( zxRa%&>JGc=`Jc!i{B!?b@9t8&@6$K`h`(mF{`>hj>(I3|GP{d?MNXfy@V@Zk)wEMp zz6SrfSNj&TW>+b{+CKGV{-u9em*ttJKa-FCY5uJ%>r3lfk3)a{U)!g_%fq}RB&R(- z-rx^coMr?|Ki5C2&V(P|A~}Q-lbzm&r}K!OxPGTNfA;64>QlHn=QvH?>=-j=<{bvh zsT`fZR!*8<{NUph-^Xw38>dfN#rC`;Y%)XuS5%*sJru%yKAT zU|`+rIE#s)i?t+v{e>C(>aYGY+PVMzj#*lNtKR07g!!M}|MmUVz59OuO?omh`_VPM zsDC{_=6!Ve^Y_d5W8d#*wEbPP*HOLi;qmMCF8??UmuT~4M?L7C);O)?S^VqhuQe)L zf4#bHGDX>3_0`7s)n$7>+SlH13cA3#c%#?VPyzE}@lV!Yx2m3H6Z5@h!?|r2t+zIR zxBXcgfA#K3*FWkXZG~^`2wQVpcb{5(m{pX_zQ55=*2nD*oAmqg7LM#qg&FRgeNvOZ{O)l@_7U;F=i{as(S)$iYRy{o4--TS&eD|%1Rqt5qV zwL)5Mya~Vl`tF+WY|*OS65H3u{jI+!Y4iUtXWYWm_FC6B)z;5n@#cAM{Ygi+b@|=L z>*d#ndzJi+-)j47s`cAyk@q6&+BNHUM^@?mto!J-`uhH`u-D%dQ?3YAyl>Tf)fIBI zswPs$>{kE6hkviRNOWpnx?&bXZ@wq0do@E-l%HFy)cDrVmzS{ck_3P$eP?_ITDv?EUX?$p{7M!{111bd!-s+Tngi~qOj&yn&^|2X=J z+=_ypKU2EC|N8!@zj1rBw;HUzdgI-@o``e5Bg^BqYF%fQxZvQ%dtinDYnga;+xLy@ zH|Xds;FN}Bo(l`(r$5`S?RTxesH)zz zS+VD2*w-gF{io}BTv^UpZZmD`-KwYYPSfI@wf@+d&)d4D?Y;bEbddglD9uGefh`OXUnIV-rK)EIVHiXzG~yos?{5uk1;WI8wl?Do_#g@ z)VK9cQ+94$xkr6bP1^pg*OPlrMfe=sKIPSisVv7n9Jo8bPTI~{SAR~TOG>`^hcnC0 z=g68$+s%xR>o$MX;TUN@S-fqV;^|qZPs?9lTedYkZfE(e{Ts9OuW9X{EwaVGwtT(Q z{Q8V+(FYq^Pjha~RH}=)TB#_`{qXTKJC2`+7&iHaCBF-?W@*X0&1i8WFEeq0>W|Ai z+pISW2+B ztckNS*squWLhyvbvH*v4>kVI*FW9MIwrgSKj^Dx28&-NPt8W*$^p$5VEtDJt#`l0vMN^!qU z=>M!=`}duVNl1xX@A7T`;+RQK{=Q!oTDAJ>`e$|~Hd50!+oWjPwmw*}MLB3z;YI(i zM}99Ze|p;gsdeCt_*ZU!JvRM~f4BYJzLQV?ah{Ocdb_mXoL^O_5VHIof)t=6Y+E@FpFZud@(%YEnS9iyq{Q3XabL+Uj@pF@7MK9da zt=q1$#c%r5*gJE!)~?pRrnObEZcVs&-{~9rQ7a4MwAW4T?+P!dkJ}pb$2aPlVfZua zsn+lIW@dNC?EWJ-b?#r6Z+{(mB4=|sX0q}9XAyp7*SO(WR_46dpWXJm+`N&VsrAdm z^{Dcb=DEMt=KtDnwU8A1{JTGtpKO`AJ~VNc$lBWMJ@f5< zJQcmJwKw+H{#S3pV!rIX%N6}~YM_bnt{r>-?fl3c9_M+4^K6>v7oR=xuXW0&yZqbw z>uKDY^6;>Aq2Jb-U;4Ms^FjCSr@u~h|No->{%O>NvY7v`{{HdWb=m$?-P&LKE0+F^ zU;WieJ8W-Xv@qB2@V5#3qmJsHzqV`P>5BPr>%(Ww<}Cg8Gj84bpL=G0O|5zwuBvtY z)w!BL6OCCRgZ#|9G zcPUw~zWVCyRmaknmoEC|KmU>Mo#$Tjx&55|!+I1}c)tF=|Bp!Qw(Y9#f2gb~liqJ= zmc9RcXiu;FzqLO$U0G?qcv0>DMWyjYzrMeI{_54rxc6VX{_?y(XIkgi)&BX)qscBy z>L0Z1xy}A$qwdy+H=fn7CaCdzzA|&F(PC+Zf1NMCCd`itjKBJPeec5HrPKd?xXL)^ z5|_}FA1^ug|4S-dxMbftyLGX`xAxnuU)}Y=dBf9yyR&EA;D7Q~Bx%p{c%w~EC)PZR zH=U!v|LN(SX`D7wuJ%3kzrOyO*8Ze-QD46=%)UBpZQS1J|30{ve_FlXQZz*?vH5?D zV@%7}|4F`nd!EFbZfgybd~Whr_pFKd8)G-Kz5frJslNJqO<+yvYrS8qLo5HrOxfrE z`rlPEfBR2Uum4h>zefC_ke7m{a>mof#IN;pUTwU{?)~zk+TE9H9Nu5ez7qf6wd(8X zPhmT2|Hqdt_rLOG-mdkweij~!YWAmZQN3^fYx9&=ZE@%C<{6BcHAY@(h1f6du9Ln(A9TO>^;43+PvTEYdI39 z?7x3~^`u`ad#}#A>nJXn_n_cz%yiYo-z;7D!d8b~lP`X~weg3_ z#cQ$uII7l%uD;%+bMNon{L{hz_RRg@p0&1mZT7ya@BT+-O%lBQ`b+EAzY%}qyd$0_ z8CUJ!p>&$}+b{DF1_l;p$63q_r&#M38vc*jI8XnY_S4 zM*I$Gug>~?@^}2deb%nYYoq5zDL7p>*?ypX^5y>_;h!pNIHHulttnR%y}mm1>gs)3 zJHuCZ#n$bO+uNb4{`%^r@8z%C->ekfa!odU#hPX4zIHv6~#>&pe_J_N51E00^^-#PjE?uGaDuO;rg+O>6R zY~<^Y&1TX6UN5a)?f*UK%KG5dDbtqYs~s=PgY{gu$~zrMeo{&&sp-A}`JuTqSA zTdMDUZO6RzyYw&rh+Z5r|EAohMZ1%o-AuP|$E^up6TddLcD4Ab?@@chqXU)qulgTh zAGg}PJxKa%@#i~>cmF>q`f}SdfBXN}c2_1G_`9lf^PMygiO$d8Z(7`qV^L)(oh39q z>Wlh|t*_&f%q3ahpp4^1@xNt$X>X?Gl-+Mq zTAKC0>-5IOt!JwLCamWD@v?7@&4H^m`?yb}YUVqy?-7xk^&vVWZbfB?%0G40($|-& zwDm9U+FP~fX}p7{T4DYAy1l=)Ka9S0?`#0iq^wiv>Ty*ICRydR|Bc@iwttp=|Jyka z&Hr&{FS~hjvsGQs^Vj>I&T46XeRR52`0Cm-Gwm;KTJQR5_t#h7JTu?#+MKrhOW2>= zYpV{PTIN;$t(X0YaLE2}d6ud7R$r8O_g8T4y3iLfNqe3}E?F=2_x@^*myutq9=;CS zeKKKEU)J5p@2{IZJ|x8c!OivZs;}?QzrL0KPio!1If)iVLW|;8uMgusD7x*n*j>-_ zzWeIW-<~X8|8;t{cy@eS@sg>^f9_i@vHt#I{pq`poJ3!*zpnLlRr%u3pWAY+{rqbe z?S1<8|B|S`(t0oE*X`VGzWR0O;j>Y*ri%Xln|f=8Y{|N*`=;r2hJ5l~ulF}5BQ*bq z;=A{SM;`ADUl+Q5x8cYCs|^?!m^V4jVs3cFT6rwtUw*RO+p9nYW(Bh$raQ!>{kJufMuK?6{s`gxbF~ z;p=PMgT80q*RA@pZ*SPzzUS<(UVpI;iC6!BkmKX@YnRML%GQ+ab>8{)*4L;#(|_%) zy1wdEbZmLunuXq7(T~!0Z+*43HzvG(-PiK0qg8vWj@~(_e=Yu`%Rk|LXPI^PfAFl< z`Mdhxy4X)Av-5t1UY-6>{(sb8pIepHVY8>kfAQHpx&Gqk6Ytb^Oz}Hf7_Po1^mY5| z*!ycjUwwV8_3=`V;D?1K)>XIS4;wCA{`!5doxOSJ>O)59({0bs53k!0u5VZr^)+fg z$9MbHfA0QU8?x;_OU%xz(U^pj3U$Se4Hx|;{CxUj#v}3b+|2t~ToqZ2d7eMLk(IN5(@U=ZAEHmI z$ptEz?>7|9`PF21?CXt?CtH{Bi0U+UT` zci~T?6YtKF>-YT8DQ_fd?UHxf(=D#)ZYf5i@e;sLW|NcVXe%IF@Hu}q#gnn*g zI>iP zElAK&k!EE%=x7k2W6H&Hs8O-QH`#c-WT9)U_sYc2(3(HfAKVXm__$7W!v5}c8!~;b=jTN(_?cZ||DbTKX8hCtR;#kw zx-3@A4O9O;Kjh8XSB0-LbS7s=T@9c5R=C;Y+T6^XgSN28I!j2Zf8IuxcE#7?o{{ucov6=sP)y{d$ESjXXY>Pzc zH&vmoq|~oRX1s4O&i@?K!1->Hq`Kwy44aiRj>$|7b~!O&<2%b*d8vHU|EHeqe*5;* z)ys$G+C?Qjv$9vccE+N5`MYI_bELY|TfGC7A|&^$-fMp~=5}}arse>#c%Q3x4w%L& zUEcMoW}4%<@GVi^9$sIAt_YkIzOp$zPkQdkjmw3UTUR;F(47{4FmPg9qfcI5vj4Kw zXIWcL_OgaPebU{T@#b}UobHM9oSnT>{3`Ny6ckzM6t03!_hpu9axo;l7vfJ>> zY#HrYvv|5UUwo?()&4bkMWfuq3m<1aE^m0isB~q*nkmwpPE866PI#ztak?lfboeL< zb4Q-_7oA@pQ!KV>XLw{$YsjNs3pu^3|K?>cn4DFk^{>9~+@1~c#($a87MB0N{b#ZJ zfBq@|)Bm6UU0-=ze94Ph;(X7ZAHH~@*gn!IYR&ekSIr-;bC^Bt?|%1+eP-VT&s6QP zV|V#-@5vty-|qkQn~uLWcUsl7S<*Cm-KGXzo#&G4^1}B$m75=b)%4l*YbElfZ+O4% zd64w(lit~)-wup=dPG~GyG=gnYbzJj0z%C zG}O2_XEX+E4Db>WW_3CkFu_Gdn3Kij*^Xz$)mz_|YOPx?yYy(N)YMWF*;(_y%n1*u z+4OGK*LPR;Zt_3ub9Ygb?fxUPs^c~HHMcF>reA9uY5aM=(?9!HzrX%JIoo~xzGU9F zwoKpo7j(R1JYIi=BX7!nOp(ii`BturIaMFM2|BQ;cILEsZkDHAY#1IeDv3<+5mDx4I_EecMng=BrA0BI zW9F)pGiHi(ww}Fpb6v_g&C*!D_n~I*@4ddO{8y?k=0)zD?qAEK*2mWV5%=4^`n}ES zeQArjcB-ge$`_g~@@P|ed7-oO$LUsQG8X@1xNg#}{(wX3rS0|$70b<5X>WdR`pVU! z{O|qWvrj*p`tse+D`#Z)AK|-vZ|+*p<$>!tkIt$3;rDkdhvFKm=U@L_e_wx#2UQdiqn!W+r4z37rY8hir>Gu zHg-qvy000pw(K}-zb`((f>A}JM?;j0x2Z94!2%r#Zb_y?P8y(*a+X7m0ux+(*G3-i zUVXqTR#$78YgUkz{M=u&AOG~7{g?CVU;UW3&%eiiwBKFKzpUnxY|P=`eVtXFb2*m1 z)u=l@Sz`Ku)FpMlHU@Z~KKnB5+RCez=AG-ltzx$*Ji@y+SU9`7Cx@^2XwiiWa(nkR zhA-GtrFXbE{AcC2ViW7AzsE)Ur+xf%ezjJQdOLSj{`G3cEQYYH2X)y}w!HG6AZhED z?610S!Tqb3Io^Lyyt|Mi_hod9cVF+sBfr+XiCjEK>ch!zM-`G!m=$SE`ChkW`?o&_ z9)KpYa#VylnGQJ|i0}{-Vr^+mSQw!p#?{K?=+NPjCUPrj`u)(Z4O2_iRJCGOO+WBN zO8NPm$%+4&f7Th7Pk&WA>B}_l9n){`+o^oy;P!1swQBQq1J}Cew`@&LNnSVkxBr!V z)!#e4o|f*;o+Is2&(?Ub-3bxw=#_0Jb-J@4nH?@PG(-)Elfy>>QZ-G_jIxUSthQT+OGC4jxUal{dsCPi}bzyuN>+=&Fx=* z&GOg&?BMHyuXVx$)@S)Ai{87oV{ed>R9dQz!ud8ug##aR+viCs&bXQ}!+;~4v;Lqe zdqV1&{al~RW-ZhTeabgiWO4oppNqM_u0OK5e7M6Yx=jN8!nSA6|-t>pEdFki{^@dv3mR#}wuIX%;3d&u`K>ys5%AuE_9xlg4NLQ>#N3Y+vWKb^6ISo6FYg_ins;$X$P5fYICT z6rlwcDz*0cd$`p3efF%Z@12$SB16b>rRK+L2kt8Ami+(wk%NC`*SritCZ70@;j_Zn z?9^3KvyziOUocc?SRb3H{IKcN&qvMMm*l_xzG~YpmZ0RGGfU3}gdgfz_3pilS;_OK zdv#W7oxa4kaIvb_*Ql#^KZpMD`L`kaO`^pG1!KFl@8!*;YO>e)_g@j3cEOc{J;tC` zntNTho|~U+ZIdwjM2(UPd!0^WCWiH?nV0YF)h(~y5HG(#<7dJ9Z_ocPo+$apQNR0% zQ~%H9{p$nf9W_0#|KRYkRFRM*>-FuyJ03)Rjk>dIXUL|g*ITD16^Z}X3(eB5xE6l- zhR4k6SJw_|79LJ!YABarjhDZ%h%e^j!RDCcH%lLu+^jl#oK0_U%pbwiFDkrVG|k?g zzn!z!!8J+B|B$R}o9 z{O{vGi)^gyb65SZci7$W)NtZ7lU0?kKTkV;tT%i0r|jH{{O~I`DsKM!&$3%uNl};C z>BoMqxP{EQaskSpZ`Xa@^=JR}?k7jw&j0M6VdQ4U%`r3mmn_q)8>SntR%$yM|9&C( z@IyesrC;BL73!q~^q$vzu9~+hPW*fL|GOMjUZ+q0(9jMza%Re`zR%Ymy-92oqc-y*Su@K&1Cudvx2mS4LvL>+y>*6q+-?Zr|VhgW?&fYqqW{(lcNGUtE**^R)**d z{TmVRievrs?~i(y8eI0>6;!=%N8iIHpSwS8ub;eMG1I^El6UR@@b}Rx)|+>Tsh{1v z>iyT(`>*d0pCtF>O3NX$Sk2mcMRULX*Pq#0zslIZzOwGu_DS(4_Z(t>^e3Aq_x+j{ zy{O9ieXEZoF8L~Q^sxy`&8NEcug>oKF8@9Hp`Bdlv>!k0Qr}&@bY=b9(pTTN{FU8! z*Frzy(%O=cJui-|t~&XZjJUq*+x{i5idWpew*Ehh8PCN0 ztM^Z>jb6R~-rtDNDnE-q{G0t?`mrdpOC{Hz&X<3+VZZyk$W`I{BX&n-sef4a{{Hp( zTY{JB|95$HcF*2gCGW4@XS}}4$FE>uV9s}(#nPz7T3##E`t-k(ym>|M_ghDQZLHgS z=ezytFzL0sRAc^MU-fg!;duFuNZYE|@ZI~vwXdJr_4SqPuT~xRwc)EhU%&oZe{Fx% z-$c;|?OB$o`&rB~J&N`3d+M)~Uy~FaU+q0LHfm?x+N&Sy*R8&~RPbh<_19NhSFV}o zle|03_twsj_R?kR?;p{wnZIU#Z0z;bld5)QO+C4``s@0uyQ9`cf6dyOFT470%-@*5 zQF~+lR(~!2T68q4z0Q5>`m5Jpi@z4F+NgWFXMOzo^O36S!{gW0l+Rq>{cgK?zoLG) zY5a|}+*h4b()Y{`U3K)=(Z{RKE9R@M3jbTxs{OWDer@Q{3YFt`qeS+zioX4S(u}Ed z{^6P{Ve(6=!*4zObu4_fP`&uAt&g|9`dXfK_0`w-Jp1zR7Ds!uua8x3@h$tNQ-Q`lw~KRa@rm z{dH6IU)7ljY4TrROf0__U;k$R_xp3+)&BbR{avT{>-S#X!P_r=z4bl&ivRkbwpIRN z^0svs1ILYTeK7hgVYa6}QQo`X`E#8~U01Ao>NiKO#v%#c$k&s)Uti-czrOn4)?e43DldBV z^|bo)Crv`jc1!Pzc1_;ueZRC$|I5~@CcXF5U(8yss(7HO_U#v+t?$F@x^=Jq750m} z_kaKTy8lyJ_Fg@G3Q2FkOFC-1YKs=f4oiFNhq*YV!^S5B6E ztz8%TD&=kT*HyAl!>_u2{TsK(^yx8qH>=#t{i|n%v;HaAe7sPo>2yrWIqxm+f5*Lz zytSukd(g_s`(Itt+q&+3=ih9JeY^igtIz*)Z{>d$JC{s@AF4e!{=Vg2Vjo$rx^DOH z@b~+E>Bhf|TX($NEB^oLzgegMAJPteb@Z3;n^>ygtZ_Ss(Fz3Sh)CySmgs*gBcRJn`I==zKG_ttYvQVq76c=2L^f~AX& zs-MX`_eUwQ{Rdx)abMS363(q#^wR3t%c!4MSH)l79kJx=%NmiKuFhQP>xCb@+ozT7 z(~kNh?8dZwS%k^|sS&dS%i|B4nH22oczWLW>-+Aw^|3V*!^3v1k1r1o@!J0S>a&}R znP;5`1&^dd; z0n?Q|QrCqa&M0!<_2=t@u15Y-AM2EM?EHL}+2n<7)5@)@`1KZi`_#s!_o(dG)ivcW z_r!;UBx(!nZ#g+*^V{_oAN&yvJpIbAx#7Tzs*~^k{|Hay>27!A^vPf=`su{v$QZZY zQvBoVIrFdoPnfLK7A-%?`{l3IQ~dANOv#wjZpXE8`SVYanf%S)Pq<9`n|tf)r~4dw zvF5L@XWhBAU%ZS%*DZ;Q_4wIYoB9Qu9(L{darke`!yjB0j1v@O*S5TBJH+S0%VWC# zYw?yc^Lxnw{eP!RrA&M4Dq6L(Zf#uE&xcjh=e^&#W8N#h_xrwm$%(3L#U+XtDxnuv<_ot8f-v0IE!@jj~ zd*gn&PTBs$;qsQwGA8GCbHPu->%;F>*{1$~Q>faWZkKiM#r{8c|E;mBTHt$lyY@fr z|HC$ z$`)c+ugbEf_WShf{;n;j|83JgHd)+k-nHEG^nBO8gXM+S+|NDxFT8taXQtloKa*$7NqD~h z`tEmcWAE2~zh4_0rS#uZE@^Y<>#Ogt?+<(9cJ6)MlP_-i2L+R@|7TvU`V+aCYxVnm z7jNX4+6vvNHoD3#e^Bg_N$d|5!*5e}>wOQuwavf(yI0)X&&mgY#6k=&wHVoVig|wcodIN)$VreIw7_Bl?u*ll@WOZ*s~=c)#Cw z_0^mG-xqzmeJDj~(&pWN>qOSOZ|D3ZC@}qG|Cag*t%mpO_x8Sg^>DTnui1w`U#IOa zy8Oq{m*2{T^AYpcBi>cz@An^6XwSZ08vg3}Dq9cjpQ$g4-k*H`_0W6u{eOSyd9}vI zIxsLWD>=?$Z3JzJ(Kk0!K2p&aBk=qGKbCvH|L_0*zp3trW%%EsU%%ttM*RB!b@j7% z{xR#XzA5iM$eJDFe>|(~>ZPaKC!gOPeBucA${89Uw_?_XbEx7#*c zd*5}guQR%yF8a59Qt7{^N3V8o?f$y@YxT|+E$?edf3rpQuYNQwdVR#Q{nyrCi+DG8 z`qfp>cCHl*nLq7Op7&qDQ~R%L+7?Z|7QdSHb=dsa@>|b6vyS?|)LvD!HSg)xS6{Qf zR&A}cjsM#HHUHXvJ~y{k{?$F5(r)~J{HpAN^#2CQ?@RLh8@*kuWK!Hl&$_Cwcc!fi zU)!~H)t9L6;Y;fuwA{HLzb3TO-u}tgx|i?Q#ftwr^?q*lH1FMi#dUwL6na!2Rvx$M zjq0&Odl>Ux9}0VU`L6cc-Cf&eFYep)dwpQRq|ViouIk-iy&^B|pMU+Y{a62+o7h)} z*1hw`P3mSNm&=~7@2)OetGhV$hKly$n?KV(wJ^SkRBzd|scOU3FV`O% zyTsRZ&5>_P+Wm92?=R*3EL$%fi`~-wTL1JU9<^uxh5aUMLU0~?T*kt?skDcPo4Sg{knx{OK7DT@5og zhnK%A&)F&3UK(ufBTGw2q^jPa&^Dx_`Duj^=;f7w?Pa)&E!A_@Pnc zXSC*9xxJE4_TSjOwrcA=>$Irnt0T+fgakDlM5_J&PPx5c`|G-`%fI~dpZ_VFW6!gT zoA$>|KAYv_arMc+Ka)iJzCP&b)w{p=ijx2Rm|52!wE2H6w_oe|U`B7s*}I+Zzm)yW z@($i!zuI24JZ`=Ej@ueLc4$WI3E$va{Omet`LdP>XfdzDh5#K2Zc(Pgjs_tbpq7(4Xi2aqSxBt)9YW&##ut zUiq=DxBdMu@5TE)#KP-dfBSkvY+vuEqs!kH#ILP!ez5Q(zhGAH#rsAj;r!(<%-N~~ z-e3B)U+$R-!>2{sr?YTAewUY0T2Gt*r{qMe0{aaO@P9}?m%!Y|sOFQ0vs?Fq&{TewXJ#P_2rNAceg!$e(p_re1UkV(z>r^*Y@7$(_I(&YmZ#azE+>vPoEgn z3*Pw?Tvt9fICo)aPr;wEuUCI-@xS@9^VOR6KRQ1R?cT~Iz6|*M)bsDJtriR4M(F)& zR&EH*T))+zWnnzu92dzi<+5zkeb)y+pK--4a(!w+`?NOpTP6yT48a|ByvEb_?c4t( ze%VLq#$NT!Jx#5D-3PrD9rlz#fUc4Hlrm${rEU zPAAZ+`we1ToK1}Z3qv&2xLP_C4LV}3ZC)R4wBz1Gwbd#swYt1#W^X+5?f;!D|H_L0 zYyX-bb)WC|{ML)Txpi|}uTR_feZRx`+HfC>&)iZ6&HkwArrp2LyL`!(1O6J)Y=MRc z=Z3qSFWzK+CXJ2LY|B$mOS79zPcBC8m>=w*d;LY)%D}r-j{9HTxLl(eFZ%1-(^MNh zw)q>*`t9elI?ug#;-8#-E_uwIvlEzm%R=I6xL(}3b6sxAbonf`V2J{8>EF8ytloR| zO|$CcTV_;q^{DghX>IPIvzO12VN?>Cq9G>4+SHt=5XlAFR?wKRVuF`ACy$e(pz-|t zm}32Do4HR<>Uy-&%VSqu^Y+v@&eQ*G-M{3||9|aX{8x*BWT))4ne)$V`YpG6KkurO zOKt=geqFP(!}Wgeq>qj(BA@lot$nw=RA^eB4x$HWemRDFAYYs$MF@BZD=RWlMa zc((ZYkqVc~v8Rg;&f#28Fv%>uKJxsFnJ#-TyxSd?RjB&>gx%Dg`yJ2U&;Iu7_-7|= zm0#;C#XrB@Hbpx>eZkIapZN}k3BQWB*uBatOlaz5?R^V=*Xno9O6f`h3X?K0|;tC^Lf|j&icRupB{;s+Irx)ux zGk2D~+Eve;`cUO{yx69Cm8YAR&i;`mRsFKtTUQopcK#9JFIza-;(LzEeaoD0Pnmws zeja?|gy@1=vU z{$88E`F6Lrt86~s?CI+kw(qX{{8mR?>(;{M>$ZzdR2KYOAJ;N_yBlNrvni+bKYUP{ zu0%O`W^!HadpvlEXm3cvB|_ix)3PfvEp-nnkq zvFDl6ttYP4-P2a(>4zN>z$ha!MWYzB zUm)o~M1~lbL6c&@gciM@*W&yto@qud+g>d6DlE=B^wb0EZSp4yez!gRF8{4=@q-)3 zJ$~}7ypvsXD_cM0PImowuG#hS51Tf>Zrb?z(j~9pzl?g`e5mP@@WpBR3_TB2&kMPqwr|&*B?`+3|+{1jE(l=H2 zl>YLwEZF(-+idsEy$`CFZx%S&Vn02#QuTbu^IpjX3ntbj-n!K9?)N=jFOPS!|!l_;S;}l>XCr{t*Zq$3%@$@RVUK6{#Di1%69$L z-IHD~?P|@d5BnSW>-E>vTUV<#^!=?~ANp_C-&Fmzo>^ayeAT?Y^<}Mx{`r_0_h*EE z{=B+w*W2J5OV_SjdirYA-l}8znyV*YoEL8~^;_4}HPdUgMPD7ge`$Mc)c4ogUz_%p za+K_iTNiqDb?oDvT3=tX>vQTxEcCwhFkIp3#$V60ukP+T`fO{LfB2-UCvCGoxZgPJ z;pzOh^w#O!U%TRCy|Z3@t-q$VwO)Guwf*6{*RvGK*a&Fxo_l{~`)j_xwPAb9Zz*5@ z&T)I)?srP>MJIFQ&sz6aY{|agyyY(2l5e|A{;$4f_xkWZOFWLq*Dikk=Sj75as0o% z;q`HQ%T?Ewzgn-lx_34IqK9!OwuNV`(fc!hfAppJ!$xh9Zi}a1Xi;Q%f5`COH;%_A zUM_KbGd->|{72^{E$h0UReyc;HTkRUuczv(?QNr%8Q$Al z_5W+>NBi}$Cnv`(Wq;9Qcj{LB_1%VgVb4Fu{jC4)6#sc*&AqNQ#;#Qh*A%q>FMiB- zFh?lA>TSf^s&#v_zZ%&7KC1sZer@d8w5RK9o4ntKKU`NnZPGv6E7Rw#40%=mDk1jY z8u5ASSI2*zSm&M8v0wexyLEo-NRyy;gIl<`@;=U8_9#O75U$pZ6vC~#oM0)q$YL}OLqecI%E%B}T z8k^>^CVWx(i$$xyYG~~Lx5NL%1dsi0hZz`{%N=L2HLe0peyx}v|M5=z|L>pbe?9%R z`SJeM(cjj@ua;jlSO20_eAJJvFZBgBRee7y-n>y?VSQKl|7S0!-fgltefrJnAG7up z-P;@Se(Rcf_kX;fUKamv-8D_yxV>?IBmPd=`srug{Rf9R*)>-c&o^ZDcK>qUQy zI_^fr$3$1m`_1wzYmVqkQKj7eJGI9DwN80IkNfL!sp@;}zqPlnR(&n5+iUu7eI3Ww zZI53af6!KuA;L9%wS3#B4S~DkWp@Alw`SF!-s-zqfw7l#&hT|Xd}p0au5F7w8?+O&xbk)QYn}P`|F?ds78JU-+%Mt8NA0Ko_T=|1 z^8PvRZsL!+Kmpa?g3EsIKU6-$c;mJA>sjVW9$5b)W1Y{YAQp)aAB*IFC@Onarv8r% zyIi%sDC+C4FW2-v->3BXzyIJ~6uj<9ywmgV_vW36tluPb)ad9o*CttG(-`lV#C;I6OhWsCXNn$&)yWW4k<`&)k^#2-@itMt|gf8(}f2)m_ z|9bzieBbSVHSt+yQ~yT%`TECr#ns6>eQWDx{k__MeSh4$zcGK~-d4Ril3%;%VfdYS zFGY?v{x9L(o?X9a=P%Xx*_+h<2>!SyQ5&=_n)4y{J$OlJ@@xt_5S=V zInEjXB~1IsM9WKIT`Q|Rx&QvJ->EiX+TKL^FMeE9oh_wL-k>tc5%{Q8=>@3gVt zljGkld;hCG3l1wt7h(3{qnDq{Y6yz7WcY1J3C%J zf4hC__6lqCyJvl^`ucjOcx;5)8|5=aQY$uJ-oN4M^5ln~FBx%Isb=)ZPS_>&_h$XC zt&?B>|B{)1|Lft+nWt_4$p{_$6MU=wkEvwlE9a1t(eLim2~)U=i3zZSc`?xbCP z%B75d9R8c?-U@}*?wzY-)#KOt*SPmx+x@rn*TGxHr?lUHiLcvzhk=3F&2bhx<0;nKg}KtN zWWY5>*O2?mmEi4n5cb|8A+0K*6 zS^KYUJaTVu_SQr35sPe#{wDpc-oLc?YnF)r^ZieEt>+7^tvNMq_Ad47>sSBNjd^rs zn^4fBzx!W(fA#n7zm1+HhTeVg!f~6!SBI|&j?a4cerer5)tDE%_JsY8*k!ghHaz^- z-l&~1Ti&Vf*#A>+-)pYwA6qMHg*4?qRq4jBeSP)y)$@m>S#^!pXB?5zomBkt@~>0> zcI@j4Po5I?W9?tvvNe-lcOK6dR?$rRdZun&{QA&EGeZ33KXYua{j&Ye7vG|A>Hogr zcRw0MUAKb?H^d-_S<`>QY3Uw?awLo~7O*RuB; zyq6S3Zcfbpe8qgJ_wRG-UhiE0b^TQ(*I)nk2)x_!Xx$$5$NJlZeCxLUnE8Iwj<;9l z+XgPm`1JMF|F$)E7fr0Jj&k;2|IIJvwq~btO5v;N-1mybMAg@Cv-tX7QShYn`<=RR zt2=M6e&-$ad-}zs-8`@44|I4xHI>uZ{`>yRwe``f_q$XDeSNXMZq4rECZ&9=CnFyFPesJoRGL{}>V3HBawskE=R< zV!Ougui@*XCjb69_1TxGKR@0Jh4=2&`y+Nw=ip$Ovp?|=r*Q1IHM6h2KH+=u@2BUlPJap7{#x?a z(eQhJBTZ75A3c|`^HCf5 zH@-~zwfn;}1A|FdSGh~rf5};XeShdJt&6YTRw*CkR9$|5a`|7M(APKX@9P$)U#yOP zazZ;}-g?u;7IEuqm3M8~8+vH#tFKe5u14+Jo3KB8efj?K;@GI|8cH+z-v2VK^Q*DE z!N9(JL)U)KbEzh9!6_1`j-&3t|DV%Mn6H+~21 zTNEbW@Aj`}`poa^H!hqh)SPzoX=3~1wN_hvc89;);JVN3Z~WKf@1@z{uf<>e-Z8a$k@esBuj!=*({*;ZJ55`+H(?|9ooQ?8HvD&O55IG*Q}oOF z=%A=C{yNjPuV%4%B4%OFGLwD&&R-92Z48eV+V7k7IsVcq?}$~``LC{8y>EnUsKZ?%%vmaH9RLSOlE_2xqnlKfA zw@3Zdv13`;=SqLg>HdGPm1*{+d3wiB88{nB`X0XY>vxw?beQQIfn%3y)~3vRoVZJt zv2JS}r*Yi7->d(Mi@&-Q^<`SJ*Rj9idcWiT9+&!GX(#`|d+%%Qoq4~$9{F4S_5Hz2 zGk5XCuC-6T>;Anu;q&EN-=?43<@Y!HVWf4~$>0Bfz4CUedw((Z{_~yD*}s>5(|5^x z6PT-V`F-7=K%^^@PGHGUw!s<*}*?TFdrgMK=U$^`JYtSH#LT4f1MMs`NMzh zU)9$6+p8Y2eKoN%e#_>xe9iInuQMxeRQ%dmZoF^uwzKaq?SoK)!K9= zg;)K{=BM)VxyoB_U#`8jT-JO4_Mk^qRYqUdJ~vsnZh!h|KJGSE?!!x_ywTDNI968A zxov$@$eg85H}3qP-tKfHef^t*2S9@?IwG8{ObZhgW)y(B>5WN>5*n)9oJ>v*6MDRE z-8{bc$ldI+Jrh>XZdH19mwm?DANeoL^8?M}*ME&aRpD~UU9SAktt!pe;XI!MPrKEa zmmj;d?EOD03%Ij^$bPV0?+C=xlcDs_wtD;zRj63)uC#I_Aa7{AJ_G+Nve^?gJOcOR% zT{Wi{70ZhLjE%kT{-tMiZiq+inP2^@{ZdVq)`@K2 zY0`C9mcMDVQ$KxR!#}YD&Y5exW&QGdUR_#T^ZZN3>_4}~&DWH>y<29%CpXbc+Rx|e z3ZsDjXug}Sr604FrUgb;9Zy zjvE4cRJd7L7CKC1(T}aZUA0ha)hpN7kQq}a?U*Whpu+R~g1!Cx7ylfO(qH@Uzv8K! zw!3$kcADmhpS$$9Zf~T1C->ke3<#4xZz_%O5|xFDLj$Tkrn*S*73p`*Ezyo0T1D`#!?| zOjtzOjAed0nF{Z`JeP{6`5cjabaI6ZqlUBT;ZW7sJQ#w z&b8&oBxaoq3JN(nWpbBWd;90}>tD#9&DwwRe@Dr(t?#-5>p5rC?9{F(y*i=nw9%cV zs{{RWbAzsLneNc~DM6R>TJA>Pm&v7t<w)|J-<}5kU6nPlS8OuZ0lm9t*W6{_yQ%Bn)Kd#AwKIQ7UQOPf=H#7wBE|Kl zR{!iL((jUG8&4~(UC#OV^Jks&9l;B&4!(K+<)f+Vt#@kg8!uT+UHa`sCr{yDfyNIX z^4LKh)Dm%KWjX>r6vwHRC-qriP zm!o!n+Q02j{7Fl_#PfCWZenjI$IrBtTFPbnWB&i1M$LH6EjLqblDGWY-ctIGQSRBg zyGQ%(JiW~z{$O{zKzDHFMsm%D7#d*Ue{X@!RHo=LS1yd9jw55N}I!!hr}4F)7Zb zg)RmGI-(q{PL6^S(LPh7KX294SDET5krdlL@#p;s+vm&WuiIZTPx{Hv_&fXF?xaJswJ+3!nj;$la!h?WZ>4lIAeKWx&QTQjAPZ+ph2?`dIsk4>$-NX&{##xDZEv~i%B+ft$O-1O+-erY;UT0+xA_X&Hul?-)^;Wze$(l|2gZ`c%?R^2C9GSDD}?1 z>wSNH$=ua{_N#lEX_x!)Ae|F?Q==|`@M z|334i@1OHp?y&gN{TKSCJ-J(@etq|T*VUh9m;cgkzW?On&W{x;T55CWhTc6{bhk=9 zcG0Uni^M&vbXK1aVR_NcAHOy0Yty=TmKJ{3)ql&nwZ3-k4bop*x4FFQXw9tMUn}#! zgsgwiz2tgXP~OwsUt(_m(hr;Zy*xaw$xx-{czxcTboMNvXiYlGU) zrADPm*cleT`oCy@?dtHgfi~BFzA<<5os{d#WI6r$-=sIf$CMQ0dcU5oPw1{H@Xk7% z(DysCyo`IrIR)EH$&2B)_VV5Q8vbkFy;m2N*4uMAw`JIWzZ)C*`lqkd^s4veKh@5% z{87Cq(sU+u|KA@69H)G$x-#R=jrp&yXM7Fo6}|r|Abiu__p|q1-KGBZrH$b1c{lX+ zPQAN$6SNSU^VhOfOM|bzx^%y^s7fug7Svn!aGb@_q{Uj)@%qBFN$;Mls*l=xb$873 z+W1eUrLRR}&ioQQS^9c<>8poT8_M?HnYZq^?e5jC$*UY?f2qXk#wpDDx+ZA<$&>3p zPp$d1$+Pm!Z^PHz?>F2_S?bJi=T92bL9u_*y)Q4UQ`!E?>Zoi>^vdfoEDlHJ>MhEP zk6Q1t{%XS+&D{|z!nEJ5SsiXSJ$_}@*PmRsz8#wKyudQ1fx6%Ln zM-yw&@0E7@Utib!8yVxXZ>xIuO5-1iaZ{==JsW*43ZNGb7Zu%(FPVh=2Nn_VZ7Fo!WDEX8l3l z$Y&hOv+}J)-c|jRSz6n4X+FQ*!q@Bn|5Dk_5@utY^pf-A|65z?XP3Oc9zV@lC{g|I z**2G^@3-{iCrsaWUH{tZE3M}1cJKTj*YhD!oAK821r}~c-5;*#(f+@)w6TAF_K^tA z#yJi-TfW7={QN5Pw)c}=U%UPmyv_1(-wIw&nYDA(yzI!jB*n8^pX!-ZUY{{()0KL@ zFK$gb*QMDx=ePrz2YyZB!dJ;O}5Qw`0Q_TK73N|ogK%A=Y|e9W(&^0 z+J1HQHT^@?`^)1LCVjs>wQJwehet2f6dV5Ry7!;O_TKJ40xuVruhuk~UO#VTUoUgZ zi~3I&_4WVP&vxE_a=K}uUH3}Q*ZaQKFS3eW{kLFyd05}m3Kc`Q;QUL~CqGqaXq~hQ zS<~^ga!soflO?B~ z_OD;Gr}f{TBju-GyubCe?{E3t*q^U@O=kb!y8YRBO3O}l8tyQYJvcvsQdD)poTJK)|UcK)f z=q!VmHi8`d&&ukcs%;;ycH_O&Ty-?Ih=2BtE{S)5E>ti^Zkwd{X= z`J=se{J&i%U;h99fAtr|?+4ds&8+(Wzw`am$=6n2UsL|_ZPKdLb=S^az5Y7%^_uYT zm{sR1ZL{_NpLY2kuD(XLNvTKtJYR)e=jVSOuk}o9|7Coxwf~y1Z>^BF=*jvui)&L= zPrqJ$ZN<8GtdTqBUA&Cv@dN*;-b&aLdeyM+bANsXw!>_OL#lMb*YL|X5b^WzJ`|_{v&(|+- z7X7!G#Z!*%a$eJyfty`m==d%tUQpS^O3c}Ms9h^5t^D{Hpg{mx&n_x0K8f9us3fBmIex4&ZTt)sf@ z!&fDLID0YH&&lNaPur~byT7>AwQl^8*IwoPa$2|CldVnhZhk*@>1~tKAs6{=>wl>4efPcjYx?t5f(rlt+_@6^I_U1}pRQt0l=JV`dD*YO z7_t1-dKH_}_}5oo9e=O-?#Wl9x*f0X-oO9q@5TDR6IHf9+^fERy71Bu?x#D{wGU5S z;>h?hvi!-eeXq^t{nFVVwvsz;cig9xudDW7l=F}Jo8=$3f`NhQu;VPwCeR8g!>u*3 zXJ7qaeX=;VD123kFX#TackAlo>$Zl6uf6^?#rNO3)!`aJ_t$%da=f1QKJ3l<-Jd7c z+5X#`wQl`ZBmdgfU$v*cj$a?X`tDlK*B8ZqzDn%gf23sXMf2>rp=*Mx_RQO|de;{B z4CjCSnz>nOC1I{nJM~wGublksZ^WOwf|LI2_Wif^*HhuFqlLk@?yNqdU-4dK`^}mg ze_E1~W{OPj*dH0b`o??FPe+UX+;Z+cL|i^M~Ss-tT{IrSq_fU+GxzNA8i+U%YBU$f43 ze3)rj`S9*^72_MLx%~a+KNmZ{Ve$HH@;_Yy@@CprZ+-FCdu4fC*Rx$GJRWX}E)W0d zA39_I{!P|#|0DJb_?aYky7<@an|@F*>)-4n{vCRs*6(|DG+)*+Tkh@mj%}wNi+#Si z(L(zD$$5p%fA3E_Wb*gN_wZ9YL)Ywa-x)|JzZUNLBqn!l-|Oq)ucyBb`eiKoI_SyuMOUV6oqYV?TG92_ z^_OOUm0hw^f7kwtQlQ}uI45-lESRAo%Ei*6Qn7l^p6AXg%VMXr`h-mqZixQM ze$;eb^F#jlk9EpntD1AKey}k78yx;$eACxknal3StWBA(tE^eS{jt>I2m32!*$;o~ z_0B)pbg1r2q2tPZ$YdV%^muo!J6wq*|%qtPE=3+oG!cK?X}Fi+TU#dsI68$ zdbs{n{5S6F$IGQcwtd*J_v+f}%zeLfTs}Vdzk~1oiCrf5XZ|lUn4k5};BE7>e@hM7 z1ztt)ZaTvsw8ybEcdK2-L$6&E{|lenC38F3MfpuTLjhWKYO(jZ1uYLwmfin#c=EsO--mKORxMnq zxXj?%r~64x5vLu!mn9b1xH#W(XJk(Lsu^*J6&Vzt2ASR@EWMG2ryG(b!tvn;GrQ3TCSux!zJ&vzOb6x8@*7^kg0}ct5+^u z`bRo%(qH|@_r>r3PQSXhw#p(*_0E&B9ozy(SLW|qe!F+>jk~YdT;@)Hs<->O315WB zwtw#V)}Pw0-Yu#9aR2RhnV_9hl3h60NA_1LR35T=Bk+7(PU!wN(QQ+H`9GiSCcVeG zKk;jLtj;N~$;X%Pc)s^X{O_=-<^Gy4*kCJ6`MAc4dZW z)rsj5bw?N6D@|Fx;nC&mRa2d!XS3~*e&^jVT2ffcA?v2P~MPQ7SOy z(Br+uyJt+boVqkdYpKtal`Fkx$*=R{&v~*Z<>&i4zj<+)7qW$JtDlOS`nq33-}dhO zVB6oDcIRbF28Uf7?;^?LiyEj^lmJ?`LQ4 zUq5B*WmogZzZBjv|+vC>fF*9HF zvT!WZjB?qsS9O1KBiGNIe97_lr4M^K11>C^{@N;khGS{vOJ&Wap||}zgmxDOwI${( zmGBTQnB9{&m*vod=6yXc%O}i9pECF2QjSTU(c6~E_em6-tDJvaYiZ%cwUKMe6ShT7 zPN~{3^TyK78A0c|^Y(vlVr6adIWU3!^uF5C)8=V4OsSqpca|;U^WjX;%!yp4t-f;O zT7f;{HXG)>T7Tf(w{H*2?qzMwJ+tJ2Oiy8A-;%d-;>?czp^MiyzSveYS^v&{Q?1$4 zwk$Y$?%#qs)~lDxzG1P6q=vPS6pNYHeywSg=4x zm1E|6KjYYwXYLiA>kXW+(q*lZ1%Ln3+mAn;UEWzW=hY+GqXixBGHc5p&y(`%Q}xxq z`@Qbk=0}> zrwa|@nDtC9?hecKFf?1ZCB(r{K7N&6N{2Rgl*r;T-`(%r%$JoYCvH+(r)sUKvOPwI zQAMOfBbAG_rLke-L>^EMe5lYF8Ln-)HrP0{-U*KXc>t$FtE>ds1pFE9Uh_e$@@V}GXb zAC9~5VD}Y|EwZ1#C+g?#|FbB-$wlQ+*|a^||F70axoaHxd;Zla!A;M+mm8dGFrED` z`P!qX#mk=T<7b!8N~@Yv`Ltr+svH4%l?G>~#JJo{A$B>&_l}*e-0H5H8p^j$xu0k? zT-=-DopQCHfPsNY%W)PLQx|LP^@Tdzm-ftCAGg(XefawL)l*+jxh?!zTYmL?!B~CY zS1M zcgF3HEb$M2{c6|l9UtSjdiVazn)hRMb6#z6ZKUupze7*ncTZ=6J zrdhw+8~=61+@Fs!c7M(Onk;(y#m4X^Vcp#mLwxW2+bWv9vS#umQ`7p@+8xt({GNQB zd)N1~d4IE%tDn#3{ChELZ`IzS^(^Mna}HRocbWcX>Ak&0e{KdU7loF-`oCtsOXbdY zl_x)m{5=x$Z?C=S-=MWFRYy7Y^#0d7w|r*m*UUdpV~yTVJ{%cwrIP(`tnJl_uYX-# zzhPd6=~}+OMZ4eajobfxpHH~@;@4k1gTJni4ZXhlev_Y7bICp5obBrCmMjhL{JQ7f z2H%oj*ERQi{ctKGAnMJus{K*lquzh*TG0M=MZaz8?)a@u@j5jM_x<*4d%H>R;pC~V zuYddt{qvuhC*aPb_%G@9ag(oWYW?~v`uf_slc9J2aq9i`O<(_T%D4UVRz7TbWB=-G z{QZncf4;v+{kK+3c+;2sO~Doi|0VP9x7#bu{Ut;1obJ-Q_V=F%&Jx>o@m_mwdHHtz zJyY+`kDs)jH~ztdizn}E?fkPT{+Dw6=6B!UtKKoL>$@12tXm(WeYI=Xm$J9Qp*xi3 z^Cf?Oaclb3^PNsvN0YW@ulsm!{q_CfY&-79?AcnWz`(#P5J&8ukW6$ z7rbb{+Vgenzm4&$cdz~%wKwK_xP9=|SIUH6|z4vQ%qc*(JNJU&pU^ z`*qy+$%an<*F5V&Pd?jv`s<6?Z==7~U0-CG{<2D~GwxOR?y&H+p+-l49S!jXZ8Z3@ zrmo{_dHIpkZFN)TTzc}a>r&PB(7Sz4gzNT(*Kw{1{Br)JOU3!C^Vf)a*X{Oy^>VM^ z@~KRL?VWwynzc#m!~Ryi+bgu_?{xh?LPZk&?pb?d>vB$?7O!7q@&2c2WNp&y zS=r$opX4m~tbca&`W*ip@HA=9l$Tmkn=VN1P5poE&rY74`Y@HRT|ahzt+dUmXEAxX z!0*qV{a@VI>m1XceyAkp-kekQPVpbr7ERZ${pN9U^MA90y)$l136?H3ak`}2Dq);?S_!DUCG&HSf7(rb2~{-u^h)kd(^^SWG*~W|LKOZ;sozUzIka}r&~1myZj$p?v&npYJ+`YyI^&CvdHtO|Z(kpBQK`+WIl4ge#!n8D89%wd{t}v*m;ELQMH+SMzkl`oLBIO%&jRz;JH_vNeNbLbYs$>fYQx>U8T5UuE3R$aSHJ%fc>JKlr$n?W|Del;-wU7q+Wy3} zD@xy0Lv{aA@r#GOzdhf4p&zoF-cJI^Qe}qN;?}~~2 zxAxaj_4WQQ+{?pSwjT}sw)gI>@1?J=zZRL-mEam%W&HlC)G7uBCMCyN+)Srfix#h+ zy?ow3{r{7{sQwQ7zyHdC|NH;{{`LE(b-4Bar+>57@4I?>*+%a>P1Bdfy`Q-+HUC%s z%$S!`r@y`?6!rf5>uZ{Mzuzwpn-G3!ZN&Q-f3xbrOOaSLbW>s?!Q z_54Bc@Yi2$^%sRkX_s-Vue<(Y(!A7Fi`M14AKLR-{G37W!cXzfl}_cZ{8V-Q^}VR4 z5zevtPIgtBR|eMKlrJrRu`zt|iU$$paeH6wJ+)UzyW?wWCFilm8rSpOXJ737_517l z>r1{~na8nl;_auPv#2K2b=_YryLdxfuXy?Em3vd|*ZaOIdG|MAb=h_G_1?viAKm_1 z|G%z((K~!``Rj}3>g$B=7yr^<^7YjhkEOf%_q$hJtv+P;$Y%Cs!PgR#?>0`ZJ^i5n z^u5I%S=V2@|8@1W>h%ee?_O`xbde78eI0buvQ2aH)yq})R&1@Fza@Ol)TXA`!|_-D z?Jes#TtBIh(d+E3gOeZaUwyM}Q;hrCx$KXc6-;Y)mXnZ&ARMu#QKPJFUnti?aE#se))6Ev^ABl&R^Yo?a06NzpiH; zxwo}$>#e<2yP}uop00ZxUh4X7ecXEemGcE(Pk%lAb){`;+16jz+g68e)%TA5w)NN1 zQ!CxGW52BnJ^5PX_L?Q1t_H|f?R>YcTxHei=Wnx?zu(9IKm6tObsxDk7G=HsR~N;-_mCKbJ1|tgZf<^!#t!-uOdx z|CHWld3T6xeRcimy6#tBpX{=Yndg^wf1Ui=uuY$m_Uu3T`s!;<-J(U6|L?E$em(tB z8JoT3{=aWOotSg~Yr@|6t=(Vq{>QAo{*|kCb!E}7FV}?jO}&x(N3cA%(e~qqMnk#1 zKVO~Ae}DCd;y=&d`@`NwDW8wmIOw2uTWC}1p+fJN>qS+2Z+!`$)8)QbUwzRg|FUI< ze*{DGwLV|@u>WNDYVXolN8H!P-d`U+E9Q6S)_3pU%WwY^zE`#`nrYtjS4XxBmVWxR zf7M&vNiSpeN#A>Qp!vl<`Tsn(dm67#f6l_#)5Q5V>d%zQm0vZzU&l{B>lE{C*YmJx zH_J<32mR#LKbi4v?L~!Y*Yq#OU7uzj_S9nDm;G*2=5hGlH2$+aZttzHzu!IkIz4G; z%}bMbo*Q3dGX8IuQg{=4`|bXRo%VCPq>gR)|89@i6T^j1moMMBcXD~S>f)o}LD$#B z-aYxCE~DV^;@n4?Y!r&|ENAjP5u4< zLb1;GSJyGhg-p>f;^J&+Y*-NSqLqucX|Y0Q>^txKa$3H<$&r;=Q^O<0#e=!m7~bh5ucr#dB{<_uu_z+sZWmMOleq+dThGRN~t-dBY`Z{yXa!Shl81 zUBA04V}-o@w>!IDJ@@$3zijcR@7XRo{z1`G|4)C`w=3-`=UT2l(dzXF)+NnvZ`R+P z7W|hrdONgzckiflJznn~9yxz=+x0WyE;_fkYHPW=P9C~e z`+M!39`$$gs_o5p6>jtq(wB&ed0tS!C^hq+bV1AMj4tmBs>1opbXXPBX0R(rJ-Kc* zTfD;F#YUaA$@|Kt1lQw{&qFq^*t&jtWP$K`^R)H+Tg|??*VS4Swbf1i?s?)@a@*cl zrBf&8Jy_fF^!0?sW{H?I9r5|u?31;mvQ`zSWC=WKf4m~yCrW1Xa=DX1e6QY1>NKD1 z`}KO&AJId_drQMJ?j}kkdkZX<&$xV0RI==&Rx!^uh9&Q{c)xfTrRHj1WIwAQ#dTN zeBXj z3EZ9+-*7*hk)_OYbG_dL#?YIJ=MJW=t?>PK#?3%@W7~qV7@z-s=NEOe2nyR3-e;b9 zbmr2TyKh2*E}Ohn3eINsm%6suNsUwSzTrs&`?>9<2c=AM z^43ZAtX-xkYPso>*_#`EQ#W*o?(pOfm)ZGLsz6J*Y)4$JL&Ji*e7XF|PKOm9Flt=s z$PxnenF2N>fYyjG9SWG}q9w@L;+7<=mAAZZ&)J7Y@0QP;)O~Z3%C5P5|0I9fTI$y8 zf7xDM82oeV#r{6SUai)Z{mT8S8iZ-OJtf-ym&CiF(wbOGh_~MH`45td{#FcTxH5ckQnN zOIp)IzO&{P3HV**=2rgBcz{t&WQvLi=wPZw0}mY)Zb4=yMU4(8C9W1H$AB3w$Hc3y zS6BU-r>(Z>x%pGkpq^O0pkI7-vnRLrF1a%=`Y*d{{=#{^NB``Xn)III(-MzM>S-@; zt*JUI_TCgrWD7(gja97cZIk?D&G0g;7^zo6DK< z^j7kGHV=robYi){Bgu6(t{T70c_&SMyYO$hwrD_T^~$t}vOMnh;Va7A7|z~dXH*jb zg>D;5B4`DU2{$K`qrrp#6-7Z7Cr5)W>DT*q&fk82_uB3~JCCu7?3wg^S^NC&+Uv`e zC#!f z&GM%YbiMxcJLyqkiZkX=PI|j??+)9QlLB@Iy>^vc7QqftQr95-5czyWW zYkz*%nKLemvY+4Rs-*w_>3*#6tANRd!#Azoh3syPiS^Db@pZm8`J}H*e-@1pOD zh%o&y-JdBk;WX>jze_<~X2utevv`<6YYuYn-rD-<+s^vB_2KHPWlxr0`tdvNZ;<|K zQ5Ku1&0q7bZ~Ff?YJb*|`B#7K{q?kN`=2{EZhfu#T3YqqdAICZ@3#Nz>$a~Ey!w0h zv7+U#!(U&0t$+2T{?{V^7&Y15Wt&z<-EUg8r}h7@eZOZt3ArnL{>O{?;kDXUnsF=o zq$hcA&DxpgAGR*_(E9m1^*n#SjoAMxOzU@;*8hmr*Vnzi9-4hQ_LoZh>d@~Md*e&{ zkA&K<3)X)1{OhSL`=d|4da`e??cIMnW7q9I{eOqw>Zc#eWwcjMz8Sk_U+4YR(_Lb| zXUF+$7k%<%&2-g&U7zn~2h~=cTDLWB&i?w{vSz0569sBB1*;CKZ{jtn^%u^*{=N46 z`?;011rzVb?TlY77<%k?uZ-Ou*DaDv3>PM6d6&mY_3}51M`upWzkBEM>DKIkx8HAF zpRd09zQ;!IS5Llet=sfhSnFdH=S2RtiS>4WHXA0d{+sky{(r`F%dj0^SL%PQ{t}aa z_1Tw9|5r!0|N46O_lx6^_U_YD->ts-^!nDKc(PP=yd#k=yJ&o(Szb1B#Nc{gvPd;B+)O7gIw5|8;vrgJ_z32_t7VaJT zD$)D3cxh49)yePwe@UOmp{c;Y!1&B@7EiMlYv~K`xn47DUQgMsdi7-XMX|lNwpMMu zrCXjPzE)^cP5qaxr%f+B-CK2K-kR0ntN%uS&E8tHwt7*RPVt>-ua;)IX@3v>^|kWU zed~*-GN%7hW_^8seZc$stCRPKuZ~^q@=<dDwuK%~DZsX}iVtUut6up-H{WW_=*N)kdm#V%-ef_fb z))v>3udBYqdcT+&wJx-N-J-uq`-4=wZi_#^ZmBr$>+A6Bm9<&=SD(z=b8SgNlEgn> z_1*i_GTi*5{#M0D++P#8Z)17f&hRzyt53eZZ~ALVWXk@?y{SL9$6uZPcYTW8)W$Xxjg}t->jHwZ0yk_xkGli&5TjvoA`9$8{x^bq3w7QY|;1(<^QX% zyqL5;LiOvXz13e|?=SscbZhh1?5(f9Cao9Q`s&Bhzmd0&s;@6wy7AQPy5(M9-*?Ww z7%}bLf0tLM-&P-zzrJK^{q-d)i*~>NdMaw`N2BTY?{BTYzUm2RF;MlQ(05l4eO(iD z^w*W?s7Oq@8FYkG zVOh7{vrm&14u-$x5w<^G^XqP0et7s=-`5-JIxkggZQt+u?&9vwfYzh4xg{5jnG`TARS z`=Z}}aOG9B__y?X^}oG$f9<)(bDadK)i#_51y;g4+dmfhPaoUtPz*!1&y87H@MFX!O&vZezUB z_lmyk?ys-DzSNCKU!~Pnx2UrI?w`*eMPIMX%MUpl6S_KlP1V=rty7EsZLIMLUmLRH z>HeTMwc4k%{(i6VFL$|mea*T@dab%ud%2G0Ut9HLYUG_i_g7zE>+;()$fIfJU)gup zR$p7KTC_0!_4M+ypX=7Eua;jE`&awA;IAvL$9_GHTPMH1ZbSL=f1Z)sj(_#*vYxg& zvTWD)s7b57zPcKeSw-ze&AYt;*~6)@}M*{k8gP_12ep)$gj;tCG*WuZsG50pjdVMXl^g(-6)Zf=C z;_K_y@YeoYulp}s_3qjFO-py??a@Cwg-^c2INnL`RQq$K<+^t7cOS`ref4B+T=w6j z>Gx)GM%D^(>UBQ-uYY~DTh*)jsQDKoPX7Go^z_sH+SQ?}L)W`h9?_{&U4Qbq)<28p zGEU3ltEIKy<2(}U`LFLRU9VbP_#mrav3r-)g%lsTskurJSw>Q5uuAOnSxl-D=g^NY4R>{bB)_;y)6SkL0y}fC= zxaha^MYoI)2AhSpQ9>(E>^}J5iut}}Kj(>W6gu9xl!$-Z+m za6!Y3Nf%S2zjPQbu$&+@Vco9IDd_Rz2Ex0|)STerVmzH8}evmf2Pta)lR3sR($N)8n;DqNYM zBErcDxOj_}vJom#+mA4z;cSvdQA9)ni)&zXHwA9+h+ap|EIl)y#K9}v-)nUG3%AD zkM~Ii?Ej!CS2APq#~sE3b57j12-&o)ALmnvJO?VL^-zXsWVBQNqJZM5ra; z)YX(fzeB^Mv>v@SHJUy>JZRl6neG*@KW?{E_utJ{FZFoWyQzqUE&H1}( z=e2}~hj*+sWiRw|J^ias^HubtV11?+YjeK)HSCeEOb?$Jqpa(p`FlZptGT+Q$2x{N zixgLSmohgn>WEBn5#{7`df2ccz(<3Nx9OpiK!}nNXG_yUgU&m}*M8pbzrJf#L?N3Yo)R?i}3r&foiY>Gyf}Kj(1laOIu1YW1^OL393#{$AF>=Xj~~)Nzq2 z>s;Fl_LF)y&XfLCqIUk?x#+nbcazPsSS&O%qhfQnS)4rF&t&oGoMPtgri910{G$)Q zSrIUAGhax}6rE@6JHCBVnsqbPW%8sM>?Lbi0xc(NmZUmmn=m&pYKTnHP~vK7I@qva zhK86_E8D|>86H|9934%G0xNv9ggNF~?)mod=kKejp{rgVbBzi)STu3LyL;yA_~wa! z_CNPCZl(Gq>xr{Z^=$H(;xr9Z5j@@1a?qrYxvGG1F=y?y!r{p*FrH(tk1 zy>UAD+SlaP-FnxOUv{xfW_qEqe9`BSy}uT=X{s~KzIi^JcFCP^K>3$}08 z&x|`^p)d2@+TaOq%kxaPKTJbDk_{UZHWOh zTGRv$r)t&BE#0H^YIX13C%aa9?akBM#b@VMXZo^GZOi|gg^w?Ndp$X$%|9l4{$HDX z^97xIkKcU$N8uG~RQ>bJ#r7fMEDP_hbBIZEKjC|Q>WRv2vDc?G&y^^e9w8huT{iC0 z%x?ZmoIky`?hZJi+RQ0``ETdR^(S9G%Z&=!nRBy#e#x2wuUi=_LylaTxAyHr)@gGV zZPXJky1@=wW}qd)$;z}a;lP9(5usL=g$*kLS_C;+og4!?e3V2vrW|@2TKe-(}v6E zUx6tbxZgB)hrVd$KXkh$hC!g(vwB1P;=L`ee|G;h>eicH7dGSK6$XAdqwZWt`M!g8Ee-TZ?5Uc-oqiJdBy485g7&s z#&3?Z_?Wv`OEY}SV?u+I>^s+ae_eO=k!{@judBYUzxqE+dhOL#t3`XG)RujHclGL} zqra|p#b5m$?oze!qv)$w`@6!|hv{F}=zbR;HT`P)bxzxuh4**iz58o|@~ppE zD)Zz1zdCSl@7>s6TVlWe+IVNT$&KE?t*>6+`kSTn`ufs&dsE{!i|n5L_1%-5wtn+o zUq7O4AHMPQ*Q)QQe|=fB^W9%R&b@xCzE01&zuNQl)I0Nb`o6lK_4xIw@BOC#^=Ek> zc|KjiL8R*I;jgd0u6(sWGJNexy;k9vz5llUT)Xve!T#{+*Oxp}`MQ4EI`6;!q2Jd1 zO?qeb`pU}cHKAF5%**eDuL~`Gx;Q<%{MF@1%jJTv?|xhJWA2{(?5S0I;@9lAt$n^W z{+0LUtYuF=2W|Fn6eg0b|7*r*8 z_`&vGN#_l{rnx)AC#_k@HP7GF+diXby=&jJC!ZpeyDtCC&Hc0erqJ`RD{GS`cTB&y zVAA{R`&n8QyG?w)sxJzE5v}^cDK~7{rdKi77wxUuzx3MLsP$n-d>{05b)T}n7W7MJ z_vEYnSAT!qA6d5d&i1&i;bEPuQ7VCV!nD_2nDEvV{_j;Q=3SWgH15~?|NDRc-2MOm z|G(M$|Nl+;|2F^E{=fg%UyR>cer$Ew*0|2=XK!71tz8@YX#cETcH3vyZ`snke`(jY zsjoB3>qFN@tlJyCHhcZnbr)mR1@*71R;k6WyezWm=8DyK|CeaL4PFZxcKyG)_4Ot3 z|1nO-s$PH1J1BRxVAjgLaa!-M1(oczzpyBM|JBvk@2?MEUvu~5>)@&_({`4BzPdi_ z`pH_|T5av6<=NM)|Evss^0n&!)vK=#{<{9Uy?(v-_w2jtR$brSz4i6){nxyh_T{C% zUh=p4(G19rtYSx@-HxR!@8XN^fsfe%#OX*H>Sic(3zn(W$-91iIH<+Z{F8 zHvZ}UxW7fyuN)2k^~yi&-Madyb;?`c>F>Y#>Sy)pdB4}YzRdIIyt%5Xm|x%RrK^6| zr_+VgqA%R=dwo5q>Xi2D>v~uByT13*y3nJ2@oC+zX`lYq3s0|GYx{4n?$f{j_7?4r zSa-^2rujnKTgvm-hOaS=t$J1Ol9~5cWP71`+)nQ;-S@p$nohd5?2E^%uU&s5`gYwF zZTea@%|Gnv@=RUN=j+?2JN|oh_5Nh_C*R&*<8Z&aCj4~qeeabvEgoMs$P$EsGxf#CWu z&{pr)yY^k*AGSX3ZS|LUJMCS&xA-URTetK5*3{S^DvPc~+P(kubXmLp?)7m;YgdQ- zSmat5y+2I5bA52DzF^JOS5KpM{Ms6|_1E|88Gozyy}r6n{blv-^>Oc?c5nR}b>+J0 zYR|h|uS#EkEyz>en!WpL^4FyLu-Vy<)?Ysws>YFh_3Zb$t+$S<{#)n$>T2oaFKbo@ zZ9iWVU!FbxYu0YTyWiHmEt>S|_us78m4CChZiw4zzP4^%xYysitNCYVF5A~}VBgY> zK@+;azSK>4=e<8_((m=*Kc-gNT$z98-g@1&@1AX)GQDnVtkO~G(l98;{dN56 ztEY=AHy!?^C;QJSDlJ*O^tskT{#l#;mp^}1$~&R;Sj~k0udWvPt#P{>Tj{x}ynelR z>8tN8Mpe6vt_8>J+j_V9b70jK->d(+ufD$i`f~h>b#{&-li#V|>wU=Y+P(YJzp6vb-{x|NeG^bv?W?KE%BJV%bgzs$E{g(7<$$qulG50!7 z))sVpdg<%@OS`;ot8O;yRrS@@??1Rcsn$9hazW+#q7vnxZI;t7-Xp zcfDuoa9`)>^~iqt+N)2$_=Ydq|2{Ky-cOa?JJ;{5jd;Ja_TA*~llsC}W$rClAGI^| z)Yq=HNuVTiZGUXo>%CPw1^r9^uAUq8Ysp8=bV`eVpW+iM`cqccZ_)dRf1}FFasfbj1FMWrE-1cKE-T82Vi$ zO1D;L+1WLQlit1hAG4n&#*{18D!gt(_-r{Ac8_U0>o(SH+8?t@F#K=!^yh!8{Bptt zi(_M4ddt>clwZGY_Se;4-y zUAwnt=B=?PnZ0Sp-?IwsH|zG#zRw(4QrrfJuLsy2$hIOqT3eNmveaQTa673a3zdMc&m z^6C5C*!UM~)fd^n*s-W!{zup+PB>(#E)$iv0+O)RlZIoZ=`)ljtC#|%by6S)2 zueddq@7l}DZNKddc$aphyyBha>WdQl!{3)*{kK)MwCGBg%R0@tr2j|i*CvZ!y7lE= z)gPCHr(aV`HpUBVoa=SDYWLUIUr+t5itg+VU+8`Q+uGodCjRSxHOl2j?9yKntX{vi zy!`I3-MhcO+WPL=(*0MvMX#?e1Kq#+?&_1>&wtFn+O_rLFSSL**|S!?^Yb)HD&0N( z>Vb8;kJnB5nQ}GyYu2uPPrk&ydiP{&_SSdTISb}i<%gG-o1QOy9pr!bzgqnNmYP{- zlJZ?%?k!qVzV%e>$~?VS?_Qq^wL6vl$ZpxY-_t{1?6%FhGv!l!p~$D0dz(5VEqAhV z{XPCZylRonc9+lp-|d;jkrS7??_!iy#Qd zbB+0~>wR(FR@2{yj{NvP=~HF>qIuJ=YwQ1CY;Ln}XWe=g1_s7ejv_QuV$ghp} zy3zMH`s=&dU-O^t+JE)es#ojw{g3)|_1yno@soe;iVt7E`)|_z@H$SD()0(D!y>C! z|J$!MU-tLbch|H;udl9KGi%n{h*uMees6t$oqPA!gmvCw*5NOvzTSFh!?N{XS61$N zwl!n-+xUI&q}Bxg+Zz7s>aG1c4om-Lsb78d`nuMC)%dS9<#RuQ7V`*yid|dc{yL8% zWoy;`(uzLieOKE)#_Zczx9?r_de>v_G4+c~Z~az3|GI3W@2is!Chc8XwQ5~<)cUZ$ z9-Qm`#(&OvrF1>^qwPtT`zt4{-@W5}#FDS;uW27m*?P+E{ngXEzE1tMdDo$P*H_h- zt^yr9^*j8lOGUp^>YeMql!gC3(ylX(3|+nP+?y7gt9$&{)~%^q`!`Yc*SF`-c7LtE z+WAiV+S09+deN%)!++n3T)(5~|9Sz<%J4$+F|jZ?ms zq;`GTmoV$DuX%Sif4|pi{^ISeFWWYhi@thZ(EKmE`-_KK{J)md_fPM4%joexx#<6g z-l{9}x-Yp3)~%1*nfhV*&%aeillN|Y2^tPQ;y6o?#f!D5=+D-Qj;V1QcfVV=V{7-{ z8rA#XUx#Kr^;;j~zyJEr`oH4)Bld2cGC!`f(l*NbsQ$vR{clqzKVLuT>gliR9@!R6 zdM2`|YN!5c>&cTv_Z00fy1st(HO`$WJ6UzYS9{m5p1USq{nGBI8}F>Vb8l_k)+BS) zYu)Ty1jRA>4$4Q{af`l;X;}A`uL__ z+h50i|Fz}bn(#HdCtnMF9lvJvwe`E2MDwp~jek}3w`xQG*3-)t-gznb`udBRo-ek( z+Vw5;)vUjfF>`AiX+p;9W{yF{;ES6Q^ZuEjLIsOsoF|GmH0yLK60Uo}hgi_elzIn(xv?|%30 ztCP_rtzGdKwojJt{91B7ZoTBI&oTFUgC-yUr+z*@=X;{+Yx5aSJ!}0Wmj30>=XmyY z{ne#AZT%)yef@P*@OAuZ@9b&XyVup0uJ?Vues$)*qE*)E{lC^G*$ROx#;uOC1Y5E| zxo-OFt7m_G_uH8A=l??43;nM8_4l*>|G%|;`>pS_>uWXNUyJ|#ZR^+Tr@A%UJof(D z{C?lFuhY-0+x>G_YT>fjxDF)mE;kmoe+(cJDX6|LbVn+W1>vW9F^z zdVO{M_5E?{!xnP?+S?Hxyx#R|OxIQUJJa^w`X==LZ@T0!h4NEs^S|bI{9Kj4Yxm+Q zU#l)P{;l5n>goE(y0v$GzwO(-I&PQ#*F(p3e=XJjzw%&T`1jXU`~HWmUN=p9|26Kf zFZ1`U`yJ_N_4Ud6UD5lm?T^?WHv7uSxV>?aH-7(3552!ORB6xDb>XFuaT+@%_Fh{%V&d{p(!)|6;1n*4NjMT(T3`xqGc()~~p? z@y{k-UHoV6t)ErRze*Y&jK%0zM9)mj$=SIK@9*I#d5 zzu49EyJW%q^`51#FU6#qMp>8NeI0JU$oT8(uLaRPJJ%)sf35mGZqwf^fURZBax4YXtYwm@g zPY*BgcA9&kV2}6hHQZjyugy3A^F-0}XCPg-%ckFwv{X8*a;ae`Tk7p;n{cQBH zkNxJgO}k|GUF@ss*VmprdzE>B`ib(Y^LHD|H-?+5m3^;Ur(8edSm}*fPHz_c%3FVH zVZzk(pG=NndsnX2Jk)h6^HV_ULJMy7V3RC9(4nGQB3vy^3mpywSam{R0vTjd$jrXufOJ@qFNTuwBkb7ty?h7V)Jz8Cud%M*lJ;&7k#8M>7sPhT;s$S zjF08}jS{Y&J@ifG@Bfe^@r=K(-+yAc>RsQ8*OO-n1+87b{!WW;4eQ#aoZGmjT#f8H zxbK{x+>r;XLqC1qp}9}yZ{+Ns%(gY(x4#$pROxoU{&i&PHT|dy)AKhbo$On&IB$21 zg#77`w^OF^y*zV1bn7bluFtpDh$nO&`Y>}TkE-CLik}gn14Jiu330KpG$k&G(GcTe zb!kjka7L@pdh4C<-+#?p8Zv3iuBkgK_4R9e(vXgzih?C%zv8e>kE!VK91M_^l6n3i{G7@F}F;<^6_61|G4O{ zoz*2(&*y&m#}u~nPbqx1cTVQ%p6K0MxKC6k3kt@rF|qvo;(>Ulz=GfD<_!G(QXlut z&omO+qGqPe{!+?4vU0)JrC(;OvM%#s37=Kxdhh_FhDe8&5I8nAOwqC6W_3C^VL^x= zXxe|Fp+=9EI>+2&d3kkn{fY$R`@WvbRD1fq@6V-0KVw#eOAHtPgk`rKVLKF zqT-|rwGp4CZU1Y!dbiJ*5x0P|f2ZSJ`Dc$EgqF46t3J3>dwy?b?2ee227x(x=C5WI zY;qD@sI;)r@}3ZT2+ZZ$oNR|2Rzw&HakjEK1fPR}J<;Ai zzpZ&uzRun=>+al&Ej`{6WqjbNeD?mB`&T1XuB(23m=-#F>BeIVgGAL2?6p!YuUh|$ zf7fTGH$PbZ73Az)+!H9eVNygqOPZ3{%FM1-y`b+F9M2mp7#J9P9A^o!bg>qFjQ#rk z!~Fk$W^_zfHGlPYZOQknyJ8zl6wCHrF7K4^)^nUf<>dN+8uiEqf zb*~Pyj&Ci~n0EK%t^Kcm|La@%^y~YpzgOGV%U@SmztgtpUAykb_}6!)-sMo)x#NGB ze8j4itopB~#!Oq&yQAB3`swz0KhwUH+bLGdF45hmmLIeFnt8Uki)Yc@yD^i$>V~~? z@U*(pygz>Rq^hGI?N_fi%?y~Au`*xnZN%H8A2Vy?zZUM^e|_oh{dS#i|Cep8TmS2O zX;IY`-@Av5ey#O=EfT&sB+mX_--o|m=Sxp-om#asZqxd>mcJ2iWA^J?D2Pp89a_3- z_xr7{MfR^gU9jCGe$mrk%8PO%{yfcpm&vLtwe)M%uAOu#CX^HR@y`h{)Q ztGjki?_BE|pE0#ucYR%p&Fhz(y1(`)zSdTsetq)g|6hyuJ=WW8@Z{k3M=sXP!Qofy z+P~hd`g-eo{bI-Py4EY<6BNUz|B}A`>ier7>%TVbt$OG0n)Stv$uedC)praGjQx(Y zgjztSU+N3Kjy?VA^RMf#{{CCC`r7`ex4w5zzYeOp4mxH&>($nkwb@&*zn(I0tN!Zm z_g~F1+v>dOQMl~o?e(kW*S_wIdYj+(_t*98__gug@oU3Ze^=kNZd2Iauyxl~FI{(S zqu*=s)qmrDg}sZ@znXnFE8bZ`?Du}>h>fvTyPy4v+8yivI`q}2%Pzn6d|Tgj{nh)c z(^nl;UlY65|8<`)=U=JS7hm5^nxwb4>c#J!dP~%|p7P%M(X{+^`0LeQU+PwW&HB3T z>eNeL?yaj^TeoTV?)4G&VVl zY}`htdAs(^v`y9fuqQTeZ`jsf+e<^U){DNrzbH}s#a><0s$GAxzh>qM?S7Q3C#`>d z&vf0{XS+|{J^gyhydCa&dq3K*^?a?p|6-ih#nt=%N9}%R^nR+`>g%9gs}t?kyx;fa z>*){Sd%jj}oApZhdhF@W)2~_Xb)4R*XZdCBFKz$VLGh*ai$d>AmtCcuyuIDfw*LLK zCHvzZYeh|BU|{TUoF&Y1inaFiu|M~}f4b5ClC#K9{r~#^@Bd%zzo!59_y7ODqyA=p z37_`+#k^_$E-TH${-(y9y1y>o^#AMZt+Tf#?5$GzrT^@2we0Oh)-Ps#t-ro>Yg7FC z@V^mni&jVdt=`e4ls)_6oVSuszUcm*RH^-5^zqVX9bc!$RxODSpMQPH7sac4pI%pw z&v;W4UKBHJZQRb21;=-Ehim@}eYv(~dgsy7*O#k~{(7pvZjqjc{*!lqm)3Xwt@4gr zJ^yOcuC3i)uS}1s*YWyWrSvFIedkW4*xDHXdHYV;zHwh8`X{zfvi{%S>S-Q3{zr8z zSshj$sV#C{wD{MZ*;oJnoEGGLH|nY29_8$7+S5A2K81zEf4#osYr2(j_~Kt;PhN_y zo>ac;=&yryol|0KkK75pyXZ_*)jR*%kh{{6Y3s7xWmjE!X}`F%>TCS#>o3;c+W!3a z+hw;E<9x$nHTB}%-dzv;m%aJyGntI(+@~4;vfT5I>)H26S7c?}a+cTq@2~ED@#>3r z(*Ell_uaKzqs+_J7BDa{UUQr!-13UGEdJM%E$Zw0e*XXAzW%_1#hLxBmXR{<=P>@f@{p>eNHFrk_*3{!4v7JN}n>y=h;4-Hy=j z-`3xmseS!*p!ny|*nfM!{@Pmgux^d$`u5T{^J`uh$C}uUT?>?#HVAuf?;c%-b5bzHFm+`Rj0f+qS@KxA=}pR)eL8Q>cJnpz>*80>|F`aE zUEHgG`j5jG%BzQ@>;KITuf1QsY`wXP{(04+UthL!9gdkXZ^8fBS0B~fz8n@+@;~b2 z->n~O7hesoUC1{j_}t$opFED=+8I~%r|wtP?k%c)XL7e3JO4}j-`=RTbz22r+$;~j z8(XEkTjSc-TPCqb?7k{2n6dvy#)Adr+*&FZ=QeXlEy=&&`s%U%&b`(5zr?JV_v!2P zg|jyO-nV6&{0W7Ly8j)mR<94&y1Gm2Z)x3n^)>NrMREIz-M9YRDtP+oy71RQ4xqEn zG=#WVni>N(Own=SYIAlBSm2>2A`EKo&T!FE^giiWq+|pm~?|D61f8|v>)vs@5``_t@Z`s%XG}h+KLH>^L#q(c%UcF{P z$ETOGAAb>WmfO0P@$sCk7F(_Mt$exQBs)jfcMZ{M^+`VsE^yt+Wi{;AK?w{*= zTt9zZzN+)0b4BJRzqud(+{T-6aMw%Ok5987RkgQR+eq>?FW!CqYnB!J#q7BE%(LHy zE_i%Du-^M>Oj40k1@nd7l9!qya^CrKe^=U6{?X^WynNr25Y9UDtXZd<7jgg3zq^V2yW{Cyw`DBf zi)^m9m18~~Sy}kHc+ED?sZ+|L%5vR%S+oED|7F*9yqtf%#MgMeNhi-e-+yH9*>^|X zZBokg?=Jdq&NlM>$$5u6K0ioqe$V;f`sS?{EDm^Y&7Po!C7%$B24~_^4~h1qqms%+1}u{QC-8w>?jg*{%5vCYqll3@7faJa zM-3k>VXlrti-mVST+e@g%ll~Wk54||ee88#@@#g?R13a+-x+_Mf1db1)^0zih1~D2 zsxxa=$gkP;W@2AoP0SvtuD{HmHKq$2zIM%v53EnIJHBhpiGaUJzwWO1yf#z6?Qj3} ztwlci9fc2N1j4=V9qo+e)7!W`?I_2DkN5unKNR|H`%DY#?o)ivtEW0o(I}p`Z{i)c z7sp%*0*c%;99>dosw`8q?Dko8h5>xVt(KAy7t=vUg8&^>UN)!3fC(WULZHr(p+JC^ zILFNQOS9|k*YDGxYyD%-=X*O>U%QiTx_WA9mCWij&) z?DW%{uC3>~-+bm5xV!pq+M3PRPVeK}&-q5;?#JVCHXZLgk60zGU)XM)dG(3hznao( zt+)CNi(~l2cCJ<`+;Q{e$<5y)V&;Z7tH<9BHgV?h3Y@pXaMQ&5>CF1|TLKKjCou%- z?Km88aIa>#PSc6AuCbR`!@=t_&X|DiGHKYb#7tC(wP~Tlh5!{2t`?`HKna)gQm<#; z-}`ZE==0dGb+unZWAfWx76w|#IEVcB>-K(+dfJ~#|21suL*+v+6;1czpK*EpV^^`8 z7t8YM&K_`J(TJ7Xc|N{x$);r|#9vMIX}@(!UYh@d(T4|HS`OVhviEn<>Vx+TKg||6 z@})O!ZNxbfp}?|U!^N>+g^yGEx%D!? z_HEpC?On{icTU;mT4SQ_K_aI`>HWhpkYz zxEf)d5cP1Dz^0=8k!)|fPt-bqs?2?kvqV@yBmDX5>qAdo+8?uTgJ-s=@zQtK^tb93 zul9WHQMc4w26$wd&8Sh~2vvp6(LZ`B!kg%X-&Sj^4G}cSB!m$?Wufb@|su-PPf1g3iBN=l%NX zYw_3D>(|%qtlQi5dwpc=kzZf-tyTTD_SfmHdDj-bQ`g&@bp31g{q^3jWA9#HEqpiT zx}I{@wb<2PUr*nfwd?C4t4D@1LEmp}e|4#9_t*8;G|s!d%#}ItH|cND>Sw_-lavq?5kEE|8;7w)Zztgd&PnZjbki}qO|L}N+w^l68ZYF^Yb5{eS4>Wk5j+C z>aNZIj0tW@k;j{+>^2elo20aV{>_A``o9F9UbJ7gKRpSkwG^DFPk7i0E^^=>`=(u?bE(ehgFd!No<=X)5i zWOrDW&_VZ{V%hgkD%NKGdh~08dw6+KOzg?pD+~;*XB=mVv}&=|Ui!J}nfm>|A31#M zcK&}c>ucA#wNZbo_Ew+X`MT>v#Koe?7q!;?jg2{d^=$0-;`&9q@87?-Jwhw=n)R1? zJ6}Eh;`MWNXxH8>|Chh^e%(7sGIPKF{*-?w^*rb6&OGVoS)tY#u3%8PALV z`fg|S+Hn2Q!&9r)?Tvp~w0dRr`luiCelPSdTN}6e>HpvQfA3a3RlUBpZmZM&t6Hk- z%hu|z_S|(;ef8h?E>WdVn$1s*Cw_~rTKYv_>qxlayY=Vxt9;FupZDx*)~DGW5tHBf zzrT3qev6Nf?8?*gE*R#l&N^~Di$7v#XM|nSnfudPcg1Dj6}}4UQ@!5xHT!G!*ZkOb z*SGH7_cvy}jL(mnv?6cYX%p*?1#LRJzb$9IoX*1~e>P6Pz9VVRF85j11r2YT>eI@9&%nm%6@@#5G2E`_qH zDd`MXw=YorS$ZvgeNJ=9xrO5YK6og#$_U6Uf8x4ADXVhA{{0W8tliIY_0IkM>WWJJ zUwI}nw?*+bhn>4Q|G!iGuCG@=oy?e8*8l1*SEgCpt5vI3EqXE8RONWj)ngx8_wB!+ zs2VbVW%WO&SAAbTvX(XUt~qZyd?o_z%jp*T1ON-Bx>Mj`Y%rx0p+RZ!zD`(st_b!Rfza{JxuN-oO83 zXUErdzo#_+ek;GPxrwvm>DJ$cB5~)7pPl;j_^;^o*PzDsySLSkl+0G&>`>_Gbr)Z# z`ZDOt{l1>*6IXY~|NedYb)CYxXwi8O*IA`BURM1u)8M!06ie|x!n_xEN5xKmeXdbp z;f(&D({FwKvt8-$>qJZAz)za$-SRr>PRDsIu9l3JD@d!#lp zF7T~8rI6slrvI|&Rrte)mY-9cxA%(At>@hQ>u*X#nB2S-{7e1z7ghZg-*H;eHUG-G zs%KaJt-t-`mCxE3SJ!uo{^#r8e{)4SukuF|K zj{oQsxUl~HTb`Kzs~Hy>dM4{VJW5n!9nk_D@ew z)4iJYVd-vbb%U7Il6|XpPwg;!*;FjqX;aAV`ZhD{+h=)ciC=GD{+$tKuO)T8cB9>q z_bf>fdtdp@-PE~Q{(JnC)#j!aIqDC6i*(EHmzKqU4dyVlKil85{-yYB^_NYPx-F-F zempz7_|r>yUG>c(-!AXVJ*oY`T;)O zM+KJ)eeHOBXxgf)E;k#Gvh7)@5qk6TuP^S$tDAp)S^Zt`eEi8*f3Nw>)z3ZsZUF-W z`xeJpqO4x5wf|ao{@Q1}{ngXL{eOb4pTD*JWXE}5lX)Ap?!S(`zCQK;!>#XILVsB% zUzb~Fx9@MTW&Q7P>u~G8@jtcJ#dn3Rdb0I>Qnv8N+S+&dYrGA&oz+qO7IFLRlKN;_ z`=xurYu*hm)PaoAh7Fe;s4E({z<7d_KJH=EdOnGP>$0s zPq=G~9mhju|CsYpQ3=7#UHvY5LeSlXJwlOTYh{1i z?@O7p;g?J{VTm`chmnb%r`w<`a1M=?4w=J)>WElIaU1I z6WMt7a3<>UsziH>*@7srR(*pWmWENjazGasZ&$1k6*}XvYB4t z@iyc1GTGc}ibW@4eh()>;39SIjV-CFpc>mE)pVfsao!w$`nw+Z!{r zWc~ZE>qQpT#(#;h=TNb}#9vrGRO})OQNm^(6)1~WH1uA5-2v3sJb8AaF z_4LdWmz=7tf6Q%5%MM*nSz&(6`Sc$7iR@28(&9eb-;JMs%O?EQrK&yG1O9EdVwtr< z=~MnYF-NtlA@SEWlC28`jvXlV=B$2IGmpDYqxGzc5cmJ96<1d=9&+CL)5dfRz=CQpEdWml;iW-}`p?^Ix&>X6ot8`tkPusic4z26Go~TKcKx z^fQHB61Mz`@16Zc+jOMdReDU1ZS49KcE{+fp6PFvr8zB?^>-FVIK8~fK2PMs@6RW+ zWbSVG|1lz^RsX7jflk@;=d0bj{Nw%>&9^`8_IA7e(i|2v<&)tZV|_-t-q_^7(|&$DV4C;2$*ijG+lFQqZ?P~8(U4K<7bi_9JWtY zbm0-IGVzRA|MD`+QkHuaC$>s$Y7KjTaPsMQH^uh8@Cxvs)3&H%LHPvhE1QeTWGz3K zosRnNP&-}k-m!_Scg^?zj?A-lR&%KMKmAln>imr%+O3gcS_=LgkDZ9_Kf+;OJKyr$zK<5R z_FqqBN7nfMy88a^y^C%EGgjGdZ@ZXRcjQ3QJl;!(qYvNwQmDVvKk>}MhGPY5r<_^! zKcQ#6Lx;A(;e?JDzB|vaYZN%MnLd%*lfQ4Ta^(51|09*Qy}rKd!*kVMt$$9!d52S9 z_Vc{`qf|TRdFk6HJi=3Z@=xuZEI;Xl07qSatbaV8($4F$@Bi)-t(vp?d3;LV@3#?q z#CTpmn|^=w-4jdAH$K`LRXi!qfNPqbs7B8Msjj5Tuky1NRNFtfI$M&(Om}ic{Pl0o z7M=QjW5=S;hRHdJY)6t^7gX(t7M{kldh2qx4(=Y=)A^@G+#g-@e)?(aoeA~+ABzrt zxqn4y(NR60njfKZ0SQ4q>*u5{InH!xLEw6eHXQwMDw(q2;ss?&Ph_ zHLtYqe0z|?+5X|VqID*n4FB)7YSx{J;z@DT_}BN#iuI9&%NqX2Nn!hpDtc?rFTM27 z@y1c>xzjsh`GS}V`iz_wOcj66R`hVg(}oukS(Soihd-9oCo;zDvuifqA@%I&`SlMA z6IXunn3sQm=WJf-uiyP{oLZ94-@UQ!Q8{$9WcT8q-uG7h;+~y#!#*J=Wb4tmH=mz> zj?u7tT{F*T-xHAo1#0;swNfGP4z{*^wMf{_v@*Tz>G`kUSKT`K_1?qF8w|`3DnGh; z_ny8$_MQLVBwsXYKYjVPM(yeA#M*ba6}xP#qB7(Tl}G;la5LNRjb%cm!J#b&)x>=M z`RD4E{8&G;?folfEyv3zzi)oM(#XJd*1V|gEJqICVZI`xc-0c;~nJ`D0JF&b0lzN^@P{uXs)76_Is^#J0RRYO}_C zl}b6MeeZ%dm!9jmYD zt>4SCx;6Asb+PdEwvLOFSFQha<;wdjcMX~6vs6~dZ_Ts(C3WR*=>9N^$zl9H;vW@O2xa z$^Sav#@^bVeOLBEmwx#+fh()lyPWclf84qK)z#Xo*Z)j4U!C?g^7tadufJ4QUzqnd z%0F&>-0rx)sc~z6{q2b_Tif@ec72lgPImpH$BmRfrhmB>GXGP`rN83$h4!AmsQt-5 zzWdke*`Zky`#>u^zMj6YYHQZ&Ey_!8hn;>}`i^r}(&7qp_2++Y3E#aYusU>o-OBx8 zZ&TIRm+kF}WYvw@_|bNC^0c3}|9&4TeciI_>EEc!f7i&b4(GTJI`I3b{`%1MeLp{nqt3O;kMCIyYBjHoTsGztT?pb$UOSvGAuvt4&hdmIhDd%=@~!!z}yl^8264 zHDx9wY4h=HH~SnsZO-EW(ewW&pAOkDwK>CJ&y$riz8sQUHK(QaM?+kd=(QI=|{R_xsMM&B^dN`Ec?>Bjd;u#t&Xc zaV`D6dwTCO`|eM*r@S^5i8gaiO*v4rQDpOse_ME_EZoYno%wrELy ziq4i7@An=OOOshejEod?$|qZ#=(yUj?wKD? z&$gXwe3mOXuKl`i=GyF6CZ$c6o~Q*jn_BGV{cw2d>Z{Y=G$mep^DQ>z#mn~f>hRB*_cypnsY!jBP=QQj8K?3?r{4NjW%sy}`>NL$U5(l= z)*tiBbjv-*DKc^Xo%MXl#)V;at8TFvsMs&gQBU5}$WZcb_eE9h-LC8Y^H%&>YL$F% z-3BX_O6hC=4i|1*pLp*64P~Fx+hw2Zd|tPQtp90K`l+&R)6_A)832{ygF47qUn6zqtRg z?ps#tc--{`&s4^R4ZpXsK1xjsv9JC7)84+|p^`+Fh3lskzwaHIBmGyL_ij~LWNh73 z`5T5gn@;|IyDdsWEng+P{ZjbTlb=Dq z(|?WgZ)x0);?HN^btpC+*1fsu=Hk8KU8lmg^!zOQo3SQqkN&R2)Wvf$xULFo37>o@ zcVFHtj^l33xm9tGb&oxHzxUFrYtI+>x$pV^aK&s9^YY5KQ_q@l_vSwoW_?*+J8Qxn z=Wi-KHAe(q<<{5indi@>vVY(2%L`?8XHULx?ftH;UEkA=r8<-vtyFw6%YMUsIS%2j zkIzHmH~8}D%rDz!&B}VprLty?eeJratwp=_|HkXD`}ekdca_+2u`u0|sp5CdR$t#U zPkzNl=hNYA+v1nr-#g{u9OqfT-S1a(f15tL@%z~`OKh%8zAF0Z$;V$EYs!=_ehtmO z#M-2`Y}#AXW!E3{otp4iWm^2b{TqCu!fP-8<23%rmlNJnzVXRF-Y=6sy*{M!q+$A7 z;R|MaJu+8*dNKdjzwK4ktF%9yJ1>59UXIjP)96K-(Rakk3vDhR*wP?1l|ezz?Y}|I z^?jlYA47!it^504_W!Td7ryV*o3_LGYxmcyDc2@)WJ%jC($$%AvirkRm93w@{kn88 z_pa{h%^zHE+&%m2AIFyN*fVjPLgQMdnp=Or$u6(>v*zi^j~Zqg`jZ@muYCwrD6##N z)P8*3mjgxy(`JbZO+96N$<}6GQ*PDjyZ2S5$8FudeC@T@Q+@}k)nObd2@dsyAxelBWXkS>LL_!vTOArGokAN1)$|T zOT>6soenx0X6UGLvoalWSP`Hm$l2nQBs}xDZhdU&uDy5jVy+tn_l7=J;aqs@i?8y{ zSLnV@LZUJ?dUw;d*$u~f{yi~w zKbXf`p0@huB%x2@A}fZXW#Q*RwFMlv#65R;~?M7|5@@wWd_urL;u7 zz3bc7pS!lzZ1F#kcgOP8{eOSwv7OI9)bjc7?n)az9;O?6b!Ygid4K(2`iiH0RkeCu z9uNN)ev`DP*K$rP-kMc$xc$qTt-8hwZ*1THPG?C@GS|Xu5{I6FYI zl_o0OtWHM*7KCJoa<=$sZTj!uR~o%~MeMu4V;fd#g{*!duO7WU^p%}1JO7vbLl?jF zAn#Sz-MR~hB-Pk*SE>~=8?*Hdx_c`v1R>s+#e_t5fb^Bzl6Kt`5 z{kHBUTU4h-yg9?b^5js}1I^uXC85#ktCv|ZR|W{hqPs z^*+_@dv&$-9Q@jzpFV!r|D*2b^;715|1AF6&FHM))%8>BcC}8v6#eb9NtDo%O zU0HY9KV+YOnCyGPwm5BwO&+Q`etuL!I1}s=Z!zktG=0>`S88+U;Cwx{GS&) zSFdWn@;D*pkM^wp%gr^TILc3mD=8(OQn+qYf0^^I^Q~_ZziqQ`pL%^NjkQen!PWCM zdPRo}E0*q?_qXc%KL2j+u6s|v-Cg!4e8taBM+UKr&N2GgybMkso;#)$UcKne?xyqF zvj2LDy12{RZ<;GL*6i1O6*aNwR9!T~0|o}prH-@2SV6mEqs3o`Ub^m9^7s3H?@Q}; zzu&zwIrabEAf>8}zc$9Lf49=gYVKdq8vE<(uWr1vz4X=bZ1H>k_5WU}t3O|RwUle2 z|B(p0&UH3cg{mZz$@k`&vEh$}h^;qXJ^>4-cn^iO))m(n? zGl+d+h{E+>{x#vBYJWbe@;>D@%dRX??EGq{=&x&n?mn6lb~Mi~dQnx}t9R?K{(g5Y zR?*M8$@!t&(fzzNW*kW=^9#SIuDdApsQ+*3{p|SFXKNpctzWu6T>I*)tNqu4oOs&2 z_ic&$%U$=5C(AVUZq)biS65$)m%hIKD&=p{{QqJ483|5B4jb0C9F(6ft0H%8hSsIZ z#UhW4KKu=fcfN7;Q*hbdU*EJvrxe}t51-8SOX+>}Gc_Bhdk+P@S$TF}ocp)x@4Nc$_|AsLSgHH(SNm%14>PLy_1JKV=Bd`+3_B~az`aMWXqyCh3e1Si$hBF- zl=JtO_$|F9Utd~Hm|y?9{%fV4f6V@{Mb*D|6wmA1WpJN=?}>neJFd@g*(3WP>GmOZ zt@QUb=5-b3kL_z5|K{B4f6(?$TlBU1_lp8m--Fg#KlVO&WrDn<;`id$x(7YaIC3Ah zw^WI$^ZoX#P?$UYFcXW^{#^6>8=q7>)+t?Joh!(dQe)V*XI}UhuB~xRfB*B|@;_?q zzx;625-#N@N^=+A-d+)XqN7zH`_$gR7tMxh3v3kT2Km2g`C7F%=-awie{1iD-mJ#tWz-DSf0ncJ$K zv>x;L%Y4dC*6(`3MYq$RFaDn<8MN~F-$R@GW=r_#AMfb$UfP!H|98&Z4B_hg!Nu3q zD~%p)jW6Z0uZ{gb`QEv}jX46bcg#%Pxt6xnB(9tEWkci28HdbPE?NBlAg4@N^wBtR zD~7hpS=V%O9q8&L!RHnlv;TJ!Ux0&96T^&(9Q_Z%Zrr{>pEe(n{9~ZC zX#JFW9aLez|ZvF6v=D{8$~UhR4?5G2#v$~J|xycDey-GjlCFS|I@Gp1BO_zm_jHZX* zzYz6_+3VAfCto-vU$vcdS&-5YuYc_>+m6!_+-tW>i`mDwo%4uHO>1V_HBZrV^6xeC zbAf!yt-AMpjNg3sY|S52p{-9o)m?vB z82WMTfgFyUHwSkyPd}vdr{nSCiiYo87uFq%YF^;f$8v84|a8?*F)dPkJ-|OkDT>(7qUsf_w506MknMnd`fR z$7YMgVZ|=h;|H0gZYI{W@%CxKujSKKJ+`hsp;R#C=7&4-Ph@R4cDVIL z8ELHlC$_Wxp!(TgQ;ovymV9^EfAPeI>`wl#FPV%V{@4&F{o171M(VHm&o4opJ=L#y zIlTQR9kG3_5USbT)AM)hjHfqVD$cq#^>U4#pspg*nw6>^{ySJ-bI$s8A>m?#m&75J z-@lIv=c=Fb-ah$z-PBM27k)l zYgO<0U-4hCa>2jWr5DWEHB;rTsTOlNPk->Ue(i?!@ynOj$fy?GkN@(;$CAZfW|_l} z@98Wx9z_P7zfZpYE4Oj=jC%Du^Co0?sAfgJmuf%zS4nif`qli_HL?!PKZ_dPZfI++DZQCB?eEhs zKMfq+np)QQ?kP1gIpzI~_4+GeL#^m#PdqqH*jTr{*q_S(@QQz@xa2*hV7<(((>F*P zGEAApvok;b$+vmGSVS}?M}K%M7q}sLY1>VcG;<;S?)olND zgy!GucL?3wzxBu3orXW0%nqMR{hU+zZjy3krkk4%mw#sJ7bo6@He%8&%5K%i z4_}=4@sFNQuIGv_&P=6WJcZMJwOE??gR1XZ}N#%ny#+Qg>XN#e4IZ&HXUD ziN^Ost=hj_c>3I^^YNEbi+r6o@dq4Va!eepFDcFAq)-Cx9UFxTpP=j7AVkL9hH;(V; zF7I-_H(g@&+KP+=i)yZ?{tGEdc2r~mJsuXn!q-*4eGqqjc{_U`t*_~U<1p}^g^y;fhl zwttNJQk$>)eriw2o^wHIXZCGW3=>XKeSPBazg4SC4qRq4cr9$!?EGl@cKzk%aZUQ+ zWjpKksxR7_x|*qKd+Eu^$N%k^{WCiI>4Xsdck|oNX}B*Fo@*7B{eXdiOT%%NSnDd* z`i1#5cWtJJhyOag>+9>Ur?>X3y}mzYf4KF>dF!uls`>x@`mJmJ`~P3nWtsZu%ij() z&);$H_g&u|wl z?W;(&c)f~0wK?-)>fgltzTV>16J~ua{;FNNK0N!1W%$ImQEN-@UpgAL{r%VS+VA)6 z%Nm{TzYyrKUE6nIb*&^@&9=ox?|ai$Fa9YIE-5rGPX@$qXUb*qxzjmAT zv^&kdY+L3r2TeP;_>EE%L;053?H7No*m_#@-`-nartJ(}eR|dNX;)vp>sT7&cxE5} zmt#9TC3fTm`2KGZ*(tC0vbE@cK9y}$T>{J*^qYrpJ`+vs2Vdg|T#XSUXDeLel@OyB1gt)E}l zZ@!gl_Vm*G@{<`+u6L`OLhCB^zt&xUdiU4$`n9#Xo?o-YUx$2Jm?mYxrL#-%roGPz z_8h*y><$O-9_dMVki#4(FVv*>?c1|M>F;%ym}hMWx!*e{p_p;!SB43OHX_+PUr{6YD7Cu|__?ZJ&ux*NRzwG4q{eR}o-*^4hlc~Ii-ucIMeC>L<`ghbX zG2ewVcIV`5O8s1|Eut~o&+mh|a>`fXH7C1HdH>p`xa-K_dba!SXJ6DDt2hZ`2G7UgXw$k-kqx%vgWhPX~!L|t9^b*eRVdC5<2x&B-Ui!u2p{5!i!>>tYx0T!{>+w)H#2zbSm*e^7t2#>75J^*?iz4jTW+ za7;UVKX8e2jnOO*$yXY0ULKtF!*FKh+j-aDC^Uf6JPk$YIHS5>~WnNDH<5@*14>CWTP>|F5UcGw{>lDk(e+KJCl4S1A@aUKF`u6(u z&Hi0?AHHQg*7MRr!SZl{U8B2ZKFg~1*Ztcy*(diZDBFCnSJDbu+~S++>v(_ZpZ%db z>bLK!7JbY7&7kx$&$;7&eU;jdod5o-aDl`Aggr}t-wFHvbITexxAjd$LM^5d3YF!r zEpPv1fB4%XvL|u%%YuFV^1d8D?baPJew@x1xxe~-`=+B&$`5*aQWd)TKHZ-t@n~b5 zv3ATdx5)noHT@mG{abuQ*mKt687)tGw#FGAq@y zMSmSUD7F8!)|R**_r2@IFWT$>0IwaoEtPj0^smww7>-S}{eI$vn=hK_}NY_&{fud42J+s8loUVcSL zsPjYShnGPH8j6RFEZ-=`-rKL?&?9|)^6lGeZ%x>~XP(q*hZWBIrDqDsZ8_Y&(`a^h z|K2MCou7Q(AMt10a&(FK{~wLa&;Ly_`o;d?&DZ{$z6EO*xW>JrhuO?l3VbH3TR?Yr^M_Q~ZvtABp~qI&f;t0V8T)j#WEtG-t4d!w;t zfBcvI?QM>7-=Ez}dHP4lZvGbT1*TeUJ*LjHUV6X(q4;3A-nmCz=fjTqTyojBe|=ct zfdj$L>_1xHe0|WKGym^G^L*LQ#h1Qb;e543K;CI;W&Np*b!)2sztiU|sJJ%y^_i(`cL|#|ZwmE4J z>OVZU>^A*(p-f}`oYjw6qkEpP%}qc3^@!)MqE7esmj9i0eLEbNx8d&Y|FI96Ckb9o zo8$6|^|0rw_- z1zj`y%`H1^MKTQ+PnL;qdwA4%*Plbr1E0FR?@(NEcX|4c&!ol)(gv6#l`S<5a z+#hk-Z#Rq{fBmt!AlARAVWs4S{l?y3!#sV;bPvqP5dUytD$FQ9d>pzT_~0oVN3Rrmh0JnapVdvY`=`TK(dFM^yOY@5^4@%-#zkG-P* zBp%HgyZ+W<@Q^&sOueHaLg*>Tm!WA`_En9Y~_S4_pk2(Io z=RCK$wBf|6R`2iW{o&##_d7qHsG`4zW54F%+n*Wxmrbwln-L_mw!S*s=S*Gv@5`&&uE6 z;-U29e3#qgnf7{hErPrT>(=#7UUGHi+}nHRUo2eV67YNX+jsMlO$Fj?*5|!@_v8Ie z+cVYEIbxNby?gg|cdfg#=erMW>#ikNYJcyK>uA{VewCt~nv) zxh?y@@A$s|gIl!6#gwJuZ|lEBrFpx4NwfZAm>Jele>YQ5IrP^@_N{t}Yb4{}^>Roq zvzzk8@A?z7FbVD_H`8CKRNLSCtrq1upT#KY()%O6y(+ubAI)a2Z`iYB%KYp48ZN#O zIdj6kSFBsKNkBQO<>Bt#-$kzo3p=m1Q$5IQoqA&WqAlM8O&@d|-}!5gUGQJN=60jY zE3VIe{9)hJ+9s>$oj>2$^;uMTMBRSSf1d5nk^VXL2Q8BytG3Qv}IVy`<){`7inFtF~TQdoegln|1Q9Q+uX#%ItAJYq}`4IPNBs$eZmA3=CWqjtC<``gynNYt){5TT8aH|6#u{|J2f5 z=F3=GA1GEPTSpiPNNXf0K8~^A2?#CWdwQ@;^WW5eitE|j#5d%nY^j)@(?99L_0{3l zVblIcv+Le(X4Lw>`gD1xT&swD(@OmZQy+c4&;4VC&PBocxopOBw75(}S*65erA*ib zZZ4jw5}#PV;D5Woo*jj}vlx#Qs%|KVH`!ppwNSNvvdSrgKJPAOIEI;S4* zq+bs;#3LUuZ0Hb|F_^fSdExqmM=5P`y0@B>KAgT4aKg6ulHI>`p7ra!wAY7EzOHE% z_O@t!yx8u4hn=`jUx`!C-FCa4Ls7ImbA7vDg8l#HlY?I0&R^!cCen$8D?Rk7r_)cp zhp)0UU#>TKwIPRN#>1uJQ-g1PovD6A{A)-jgG0cBuiZ)2XViEN3X48Phu|KeeHe?`;HZ=VITSngXgI*3XTZb@kL?lk(IL zlWlr@G=vy`@6cI4vumU9tBKN;Q|@_e)HYaon&oVxXAJk5;8umN2fa!nf7e{-sF~0^ znYHPypPGoxWY)%RhUpHmc_|Gnmfq(rQ#L;qIa+dCr+$C${i}}mPhJkax}Y~cJ2b@g z-`;n7ReyiV`1C1IUVqXhryJ@VGV)yss-2P{#Uj zM_yc$P}7G)Mt4j?EAF3X+1+Mgl%llcW@`H58ItY)w0=xpboZ5;!Gv>t8z*;sW8-9d zR%?}Snk;J}88q$6l1qx_L9@~yZ_{mP*c*56?xDE0O+K@p?cV44SK#=}xa8ym4Zm#* z4yj04{(d8T>gdXS!4>TnzwTF$da&a7GX2Az*-Go5PYBx*sH3;?XUgxdN6H250@9aV z3Mk*R?5>b)o8F6)4v$R=O%Cqz{>?g#Pxzy!2=8~v&zcKT7w-I~mc-4TxFX(7)hSWu zdwJLIx3cd&t1B$7H?)X*Kg>-y;ES%Uz4U zGAaBC&=IYu+?^{V`PnOcRx`tNmW_^`?BeSJ<=3yaQesQ_^7^+(1)JRU{rYdz^CC?6 z&Lrz6CM}X?ZdA2cAFzm5OQ58$!|?pe`}wc7a?gAjz3{~D7llX8KV?yJG85ScZN)3P!UV8_V^tln{X?Gc>%RgOpZ{tL^5hsUL_OnJ2a+nVoJ*C$M$)ATcQ?atDB zr}suXeR!m=`f%a5qRSC^-&1Q+HeFfxyYtB3i}__+Z(R+%lX^cuT{Pl;lI7AVvhOD@ z?hI2q{4(jk{_=0ZZtrSmy|hoV+45-fvoAklzUS{=f6G*(Ui|zlkyUnF8L68;g--00 zm{66=d{O&-^dC`y>|?qmyJl~DqP5`otO-&3uf<>8SeSnG!RplJy}!2>Tx?6STi4lU z`~Orm+vzp#;`d9m*x&DqQoVjdr9ExA|GnZt)u(^t$;9L@TO7a9ne*;WtE!tjD-tHl9aOw~?U=vm|9uDR%FL`- zT(3WFei9P$ee&TSlUPLeZ#(L=Tqn}E@sG>rk5^&Dn|L6Mo#br|0SXktX_SVVz$xnX0>!cy~u|~Cd4mU&FDxAs% z1^>*uz5jli%If%skC#g<3vdW}6@8`dh^YM6rfhCz-7azIzxVo1B&Zn`y0fv-bH;xZ3Tui}6)Za|D0oX}71Js;(>4{wJtQjWbw(}Jzjl6KF1&J;ZDji&PvvD=PJAu{bu4SztzaLR(@ zq~zSbEyA*XMlEPkiaT?%?<*Wv|Bj)FVSigm0!`A)ey>;Ig-q72vP@x?1ykG01?b(HzJb&lg{`vp!uTdvI% zQA#c`ozNuU_xh>ZY0n?Eld7Lg{TIa1?;^CiF0cQU(o4O=o4KYw?9$)&$wiUH=D|GA zw!?{ctf#-?+OcC^Qbms7;howk6v##E+H3WHKigsTdBHJ3of$qakNUFhZ*_SdST@;U(_y*nPxov41R`@Ko?QxC9V1du z8oVSaFf5Ye-Ta5%+trV3`>|NSvQ=i+Z;rBSi+Yb9z4Vv6GQ%xDeD&X$TEU+Eznb2g zX}!_h6C3qqnt%WQ!@uvBuT9pTw(!WM2US&y8SLV3!@l>&g}>wUsi~2EBxD$?{6Bxu z@&9Rj*=tjuoPQ|f_${;XoPFWW)AcHXaEr_31}k{0s~P z#L~@*()DL6WE4ygc(7o>r*of!Uh3`TU4LzV+~kX?aVuNybsTWly_+^6An-xK4~_qm z9;~Prm!EuJ`mm8yjr4Oj1_qw>jA`BYU8b)zy3_WzPjUQ_4}t`>!$v@ zruFsp*W#_|ub%pc*Dcu{w>oCk>uXEDu3x&{YwMS-pa1Qw+iknmC{fp|_1^ps4b9Sh zk6R{OF8UMNIg`(%`SbVJpDiB7om9#e49J`q_Rz4`>1L*>I7^EzJA3`RAKuP!8x&qo zj94A@x3nuJyzBnavaP>1mTgrR*!t`GrK|T}$F9v~-7sUnoQj1pOTSN}>yZRUYRf4BU*KDXq=*Z)WU>V|&XUaH$u zynlu2$^C0%t2X{RdS`ps+LW*F^}lGIU*P0A_x#nfi6M$io(7UGb$0s{GF`6(i-(H` z<#)f+)0uYgav__jNDzLDu=;VBH@uR1&#JWu}4DVc_)+tlIe3$bTf%_F! z>!#iQ`}gGOpoOM)i>|H|{+hRR&HVYF>g&zJpGj=}Gx`6TgpSm?_n)RKP4-FuwNLF= z^cKC<*Iru8`~C9iubaQGJ#A%kG;-hM`ZK$3<%D{pnR8;FHoaww4q3@?g72=0w|;AU z3LB%}WHI)K_i{SEIfb_!PX4g+mFJnAa#72d-_O%_I-HSP5`X&i)kFTae|#2CzrWTy zJX?I#^8B-pwG)o~vv99Iq9ewkDYQSJpmI^abK1AVbL9Ry?7YEKt$zLLMxJkr7R>x^ zzM#)5?8?5qvRRLw2XmbJ$QNfBaPQd5yH`TaAB3IGOB|Dw*e- z?X<}5{QS$pdVHH6T;-HFaP77Khovf0PCer;jOF?m^R4C8y6W{|hhnSrgWiWVw|Tm` zcyeyy`k;Q#^7PYB@e>BHr5N8x}uNN<^BaMZh9Ph+lW6sOPue$`1Rt6+U|PYRAKV-emzI z^IhFz*;N7)Lbr3w*dMF(=;NgChrS;E6*hgY)Q1PxB!Z5(E%`R-=l;*%cFCAuo;Y7h zUs{ay)Pn`ua~Hh5V)nsQWNFne)s}?+s`0DYIFIBl`5CodL*V4zTPyBf5~^CB^+wa~ z+HS7t<(|A3u0J;CN}GRBQ8iIQ^v1>5%~urJid6hXt}p-J2AqC2NddeHiMR%_p)60-+W7U#^jTY8eyachQl z_Me)$H;lCSG#+2&_WmEgsW@TPOIP97|G(6puF!3BpR{Uqgfx@N>wfnBgA#N0$1#iC zKC&>eV8L?HJAqxiN0UC(ZZF}}ak~2}VU>oV+o#g-Wp#zjMe@GsViO8JcZYwIQ`Jrp zxA4_owA7S0}+UaY;l4-wk=lyS5 z)E<0ZyRAU;zxkI0{R`_SPI$NX{DZ$z@!i7J?|vS3o7csjYczAQ_OtkRxB2F%XYWZ} zp8xw(cWPWrweZFp{m(Y$OXl@`swsOXdU3Jh_O`Xh<2M}6{QRG*gH!!tZ}IJ4f0IAj z`5#r^a3*Z_@w0KQ8IxZpYkjqox?%aUMSGJgxBoY(XPX2Azhy|?|9@y(&XtOnIw_m) z@PEna{+}+i|Cs*kt53eZX#ce@K7RcP!~1)@rUtLiaQ@3{q4=w0v3SW@j+s*<+t{QZ zv_5^myD(6#WBd0XB@3U;XWLis-*l2}&Yk@$PMr4*I8v~4*)zo_KmYRv1Wi_`j7T-^ zS^D2cV9H_^v4iu!rEXt3;obV5J(oC24Sv-YycLf9oVa;y;elfcfydLOvY$TXy)~=j z`lm@|x56gAtLK=XmdST6?B9*_Ew5w*l0~|!JGa?oONTGf?D<#qZ;9KR|DVJz28kAm z-_riKvu@uL!T0n1IVxFd=Oi<0XK5Um@V~uF-pQ*e$WSNiy1|=J`RLiV*%$ky-2 zEltUOn!nIfbg2pd*?O-PF*9wfCJ8ityR3DU#a`y*86~4j`hJ2>zOUL{erfXUuj)ER zb^jK0Ii*Q#|K`7ER>5D<7Gw3oh*I8P%~ekP&x+|7^{bPRD_xo4ZpVvX$-mCekvhwI=ht)}Z)FU&%u54b2ioD$P~`n<0b zdwaHjXq9@#vVfaj^6g0vN)L-&|DUF3RPitA)9?8#PEz+-gTFa?2@4*yncrpU*;KSl zwdF@do$V9-?F)8nlQG)gW3c-2=7Z6k6&@8Pf2*393#Tn!vt{xhhqlE+ZFL))nbtjY z-hIC}x9rNRfAM}V6>d)YlBWCO;NQAC>3=^Z7Al`#_i@qBuRl81M^t6YD<<#WS!e9~ zKiSs#xWBZm!N0EZ0)bf*2!*D^hlib4sHVSLuQjjqy+R^S>dg)L z-(*xS{F-&EjC0$k`;%XSNdMmNge2`f{hL^z2o9RuU$$Oe**o|Ea~-J(RD{ zJpRxB-n&CjeD=+?@cDP|cHXat`Tws!Ha%1s`C4etq%xNU65SJw7CbJJ+dC70&T-Sa7+^XM&e|;Q}Y&Ji&f3jhn+GkzsZFX9=nQC%RBUhP~x+pK8r)e@z!@j@1#p5?C-PB2Z`LXCHrvDuls`IGo|JLf3ZsiGy->UoX-S2m+C+27U zzN(V`>Gyh5-IexRw={ihJDzrC_UmUHJ5x4n`k{C2jj+6|QM3NUuo>#Rn|ji`QciKa z{?=TUzdSCu*QDFSCW~|bU&*=kLhkdd`3F?i?b;YI=i^$P&^fox zG}+9z{ke7VOQtzy9*=u74eRW-T~7Bm%-?cUu%qP6eD?MG3fy!){FoZ$rSRE?Wp{zd z&c|t`f7stA*Ew{~DK%vly=vXvJ1xKDc>DQ<$C4L%Cvs}dIq3bP>sr}o*88dR&p-Vu zv!`}P%=x)@=i~)@+&j}HpkwxOI`8SPo4Nm9n>|1OTDOouskOUzum0P2&bsQ7 z(dmwtyX8Mj{Nv9bzs39=*CDkzO=rX6zo~Lwespfz#kETL-MiiM#HQ`FdK>4xsBZd? zh5i-^79X^#GW9nuWqJDE`)8&v%gU#x&1xR6T;#X)puAnxc9B@)&=(h%?2P&I&;R`W zD_VZJ3%gGH)@ZU?Y>}CFI%J_-^wQc{{%H>nrJs7tbE3C*e!Qb%i9y?w>aH8N_NAmHW8v#oC|M0wLcz^It5# z?%S<<_q+GgnM#*e3yWUPdz)2r{oeM?c`;8ut|~exXj!adHNer_G!9T6n+n`&;{8uY0YxHg0M7{eq!jHmrfvZaQKhwA|f7Z{x7s|ffzHTct zfAReDCwFfZ?zf#?%2yQq`r$_fDTWVrE7mb5P8GYrXY*|TkNQ>r%ewb8D6YFC-+q_L zp7$o>-TV8V9;ka>X8HPvk3!T5v0Q3uVyk`d%leh>p{He4A0k52w{;$$zR@#u z>iacGCp+0PbIyCNwF+OoU3W`;#50X4)wdZQFe->lQ4->7${W6++CI|mUGlcQpYwzdJ(*X`^EEBX;NZ6MKYNc} zyBfNC(|S9Bz}tWBXq&udDPD2dKXS^}(--zd?EAmsgNeTWo3bRwcsBE+Tgq)0{@nWJ z%&WhCH$O0D@4k4lN1!_@z|EBNlEP(cr$ z;CbvF>pCBbY}@cHeo^r8jMMwHZ=L$K|4Ze82aIAOJvt(wW1t)k1n7uxYcd^l*bv|& z!Og~Wuwh4sk_ab@%g;%_@7FS=Z7*6VI=N`ptktI<)NOAm+Wz<~zyIm)JNK(ytB?69 z{whmzJ>Q`>{+@5N)%>bzbaE_yL_T1hmAz{E%ME;+u9&`iAf6?syS_H?Im6VnJU!z} zU#!w*-qX3!%~3DCwKTEt+ROH0yYF6Mm;dl}@r3iLEq1l^(BRn7`c>Nc$?7!IBu2x; z>9uCOYh7oo?s!-KPgi86ry8f(>`mP3?=;>IICVaF%4Iccp?CTAg`aEP8~gV>e7Ht~ zb?2b^ZoPgtN6NmEZbrlzj(#1*V6WqPkLvIC9l{cH7~z(_l7DJxya8A^`CTm zlx7twFW$vW6suP9GefS{w)tZ_cQ9E zCP&z=zgIS$k$P@$zCKy}*`f#LcVC|B`hWf`%PYfxlEs?Sty5#$!fUMT+RwL}FFh`D z^+LS-POa?c8^T@~BpSqRHS@XGSLS7sViA4g`k|PYSx;Y0{`;M|fl&bz`<$&!i4F&5 zOb`=cZDveb5TT)_-O|)3FyV+^*4Mq?zdzTnELs{HtNLiw)YZ*V^2gum|NVbpPQC4i z{Z0Gbl%K}ds(oyj7CURJ>DIG$+NVF=e|MoV;cNT+>q%eCW^$?>tn0aUK>z4ZQLleu z=iGmBZ(J1coHyrM%bwH)mcGIN)kQ=cyW7wB#+{2?_~*R;;o0x@vab$5Q_1^xlG42n z#`E7lr)DqxeSO6PR`H;qLoz>j|4Hb~c+arc>dhRz`MzRHjoDso&*3%AcF^kCoppS} z?FZNOH!)@fa@nS+7+*a9TxiX_H)WR|-~D%^Zwjl%2Zat{lWvdM^4}jD6oBI0OGKEJ znK9X6j?Ppr@BwI3R0JhioD$2|yx#S7ZGK;|g=gs6(_O(*o79~@-PitNSN+kBd$~<~ z=Xd+-E}3tauK1Hb``Oa(*FQfze|$l^2}j22hhqP~Ge<98dCH$Bd)pSBJ9$vSiSM3StYJI#BZInS+;gTc5ARo={X5P0`=m4f8fKhz*F53CySn@O z&AT0+9!&igY%zT*^Cw%;*I#XPkLDVCwJyy!^V47Sou92(?7MCD zvQGxZukU|L^=GqZoN2GcT6;sV<&}NW^gr>=dpDn9XH*fHpd%vK+RB)CV1 zgB1Z+V^;{`~#zUdz|zS}apVLyx2`cb-(Ff9Id_SE*O}m-f8c@=wF> z*K(uAzk79jKQB_rFMM|4b7J-`dI_7u)PbA~Va(*EmrU)KKlP+9zRm#mCWRp_^>z~?a=XE%JF``afa_e4!( zxP0Kdjp@g>*X++)Uv>8MuJ_roRR?QkzN#pA{PJsQ>&Z!}HqY8!Ei;;*C2U;CrgG-Z zr8yyM?v)+8EO$}+^sfwd1_sU!$5|3=T&$&~;ZZ{GUSHiG^<&=p{r`6V-~an>{Qv*| zzk2Omz5DeW&7uxz;Il zKCOKl60`yiGObC}&28}#O7YA+IaTHJYuEc9MB7F7?wEdI$LecKwpJQA?%t*RY+-Cr ziTlZO8&8Ssx!-!9Y2$K~;! zDr z(5Q6@8p)ULq<>j%XfJpvuQtp7&YzGg36~aa|IpZWynA{FzrKDM_w*^3X3lec$*ID( zb+YUJ8C6`rzB;bk75E^bit&`3w3`;Qd*eRIjT`oOdFX_t9Vk2Q5Nv(ac#m7s{>yIn zAA~tho89of{N4K+n{)Z||NoI$kfbmpDsxjyxPkV}ZK1MfwoJ2%Uv;IgYxk3N`PcUe z#caH>uKv32vK2}aTfcqCpAw(zvuq=KOi4v!-n)12zREHNi3+M!@A~b2ub=DQyYHo+ zX4?JW3+@hNTX47Dyzc+xtMk35%*lPVCiIKfmpNNs-&ndnoa>+3@_6%eJbWUuObOo~ z3oJXd?a21FgS}cy=S_E$3|*9eW#RLkZJ#bUD?|S8y0}Ak(VzYGs(j9c73sge?~VGJwdrrv zyXaNd!~Ex;{IKQNdG|-E;%(2Pmp@XNlX7fhu}19eBP(4xih8Fs_&-u++%$)$Z`0Jq zQ$=0sBL1OK9KZG^ANlp=-riqZHflv*64RdA7qkBA?XMD*0`4{4$}8_Vaj(35FaGNx z;WVy~4?2SLGDO~esSj+k%X=$%^rMa8oWklW!4jU^^e>kgB)jbrTxn(|Wx2=2ZE&{EL_FvU6sg<67|1O(--upYs zOPnM&dv2ShdU*fxMIPCyKlR?NmDeoDu{pXs%HfW}!rlinPBJy#?z*SJW5my~B=lqJ zqn|5$LiUIMh-;kEx%G$Av_oZFO<|7~+;so=pEX3hulcT1qjq_0T?ESY){6WXo`uq>BCu?l|Ts4;j z9X`2A-}mIp?@rSXKM{$#w*BFTc^^;c2r4X%XMEZewQlvdjZe7OCN)arbQ-EY7(Onve8*Z#@x5~WrhRB0*yAnVdJd-vV% zyO+nh-46IQW7i`l>74I>!h1RORx7pq`>Uwx)bKfmo#CbIRsD}R0R*1nKkkL%43g~a^x@816reo?eh ztM~Q6u*9Y!ul8nz&*sN6{9`u#?@yeOyi3_KzVwgz#{b-hgVr^EzZ-K{$4CBu)86F5 zrHel=`&Ihk$&CY1^BOZ0qZ9f6#83Lt?!;;?{`vl*y;WD%MY+|zKP>Dv%{lDKqnu{X zvV<8z#w*rpPcVJ>CHBPjzW)1hT>M!pvI1q#A6B(l8~IQ4&W{4eh^eYI1^%BLf3mqp zRo2UXSowIjss7#GMFu`$e`S?^-?yw5{{R2x?W7ck1Aih{H1N!>P2f7T>6(`Q($tih zbG+jJxEQ2uVq#soeRBOnDQVgNPlS#Y{(sOFEE#ca{`4pNUPpf^vyajO;Ol1ulK9|7qFL1Vc!3O@5!e8$x(f) z|K5u_%>6_Ap{mWKNp`C?*w(2Y`u&gVbU)9p0RLwzPV=1&m_5Dpz{D;=}Fy_yN}xb+Agj?=hQ*>BVC-)o))mQ%RUd?mCHr;2- zFXLI?T6(p*tC#-l?Y~>qKfyhPWxZ#t{$~)3e>(l3Z@OJ# zTzAmVBbjwqp0%IY&!Ti8*==s{WTGU*ifQDqiiW%!t`pep-Khg- zwJWkv#rpM9R+g;T(D?>+?b}~n`nBbruyIcAlHcpqj(vZw-Ep$t;>hNV_J_MAe^vdR zCy>unR&4pBmPbc)z0Ul#f4~1&eM~U#r^Lr)^=m)O`y4!9_4}sc2!mf$&CWqbuHP*C zf3temoffuR^PWvF{LXEa$p2CO_d^B8HT#@)9OCWS6ZUPcLiOvFx62#%_W#g+pS#&W z<5>BroX(kNHZ9B*F0M2dG}Vd|e)oK;%Dy6<Cy{X+2K`po4(+|0`F*9wL(3sLvw)f}LUk4Qzoc|)aQa66<>8plGI_Qvh~^|j;X?pfEhw_4Th3}3(g+R{apb(+#MErXN&k}-(R0J?QivlXNn)0&mHaWtQUW9;;?I;_UQnd(?^?yY)Ze(la4Ye?-;D(C4ls((Z*sx3sgV%!ZuCXi;VORXOF7j6-=Qr5{^_Mr#Qr#{f;&3qZ;A>Am zzP;%U-s}@DPcOQ)@z-}3&AhI+)mLld)?PLJy1u-lYTv6*b@`1V<`3qm>6G496aJgf zqxN8t+?AViB&*nY_4k}u-E!o~Zykkzs)D${jiGH#W)C;#PPukgr9!ZOV|U}bSu+z{ zqEc7<+`Z=N8Y@1g6c+K_ZL6n8AAf)4!aR;1O*fS{`kK;qcRPKf$`ouLoIaf?cqgo_ zj-B7Zz|v9mj~st|4A-vrvfLH*TwOe3{rc5Uc-j_jIqLr4+=TTj4=XXc-rwDCE*@D{ zs1U~x!@9P^_JNa@!SB1?cdxy=zcym&>h)psuWP-`UbZ~^C8vbelhE_>`VWoxO81D( zVAZ2xfi1&Q$5ZDlH%Wbp8L^7hPBRs<`&z zs(05{glUJHul9CXAHAsFWp&(bg=1VS%KJM~Z`S0COC0IaSi+ZZJwA~Hwx~s{JzsiiZ^`P5cI)&0{r)4!V;-{H{~*Wp{c%q=yqz_Doy`@W_Z2pF zXLo*{er=Ebf^%LU+j`k{IbYs?S(5Lz^CvI!hiAMUXC3gDddVZu{fGI8#D2Y;*#?I` z?<(aen=8Bcmi4>6^2`2D{+g+`w`#|fxO?mhVXO=d4`afiuxRIccr{UE#?GOkx>a1hy$MFHqp0pY`b(|D5Lc`E5HR{z`I+l=rq8-JQ}VKz zQQMZV)qktMzQ4ZYz%;RIjGRmk3T#TR^j0l7Kf{RAW7Vpy-+M*ZNVPM!uskb_(-tXz zo8PcDh1yyUZ2lO5c{ERpgN7gpybDKS+@5j7bb3seZpAvll5fW z>r*Zj@29u=_s*)`T-V#gJK^@P-+!YwJk+{cD>!5I2XB_L3BUaQ9rpVs->P)V$Kdoy z-gjl14<`#oIzIgT^vL|${XAF2b9d!SgkN$vIW_D@=fi2zzBSs%q69acd;NF)5{cOQ z^|KGxcO2DU)-C?(>Q%S(;imD2C)T$q z#OBONTDj)C>}me~JO6&Oq?Vt!TKZ_cNYaMtr#*YNSNI4#-MzQk&Ln2qtj{HRdyYO1 zS{-x9;`+9|zqPJKzu($aAzUYQJ#M>GhkM-zJ>CVkua`QdgIQ!YF{+Nd@xPIH-;QR;op6dP2 zHZOa9b@j!FX>m{EKWN$c+y8x~{J@=sq2+zn{e_{w)Z`bPzxzw*>vN;+{3~PIiw;gd z$g%&>BJ(t>AA3W;_50ob6R^A2^ePL-oqh4&4>B74vXHuUKz6r+q203uipmpD_43T{ zGN1fs{~w2qJU^J$JZe9lsc#p5|AbnU{YQ!1X}2#uzP@L>rCt4#T_2oJX4?Kxy7#j@ zj-#JG-YZz@HxVF+Wmxk;diH|DJdt2mH292KN=smSa0f*U#n+FJ`8lJVk~U^8hR_> zyU)&kP5x(_UjNHkvSiiiFPlyotc;rF7RhQi<5yW|M!V$is?MuNsEw5WV0*v zZGJO-@>$O+?bSat=FMW!D!$IU@}q6B-OL^PHn`a{HI-Uuy*jOtY4=y~{q%IDQd;b}|Db_vY8lKv8szcUO`o!0Y*Z#+364@V{8!L-#S^M)W^G@k4 z&KI-nyuTdsP_}SbJ7g&64 zf=4Pmrdi*Rh~#Cf+gK;5`|Rp(#nV^zK{QB+eTK6{s(i*RQG$$?6 zciiJ0FQvK4>sHbGKa(YO?L|4(?5cjhOl$XxWh@(XYm5Zv99X{lf$hqd;xim)*#&kM zguk9%b92pi(W=jnOLkct{>Ue;sah8ksAm!H7vC{IaCS}g={@)Uy|=sjK4PPowe^Gg zbsyvBN;FmWJ&SgJweaVjwi4d|QM0eMpIp23iHYv1$G6m~kNtfzKW^8ac6~c}ZB<9f zk9^VRS_BtAn;_p;WcO*o#P-vzY+>`;Ypy)_^zO*)O@I7^Lv0H#qnH-dRUq zO-?&)pHWk3o27k_=i9Y|l^hj$PL=;BY&M)dRXi-d_;1f0^J#1EY`%U?>Cw&)KbwDL zv$~x~X}`h$i-B>{0yQQE9k#}p1#Xea)!aH)E!@RqC;e4i-54Uu%U<*J%+9-y=WJc2 z9q{h?s;P$YMNj^%{q^zmcyD?y;~JF?a8XWS-YavM@_%BYk$-_#h6Uq>8F24^>MHk-R{UtQ`T4|nz7|TRi)wu zGo}dtwZc`tt#NC3eWYd>IP4Q@zx47^&(AbnE~`G@+A=}SBI zOpAES{(j=D4F5~3KDwpwypUJXvnc&j^}4jD*f!{QuY^r|d*vGGiu?aOw&uS1JH>E% zeuPUZ%Wq%X8S(Ep4m0jtxnqr(#I*SjT68#0SI;>bB_1z*_t*C9@Kw)$)`waDjrtq0 zkn4Z=s(osmuj|)WY*?}I>fX0=-_EI-9_ew;_icg10vCSSa`~m!_Jv}1!nDhm|DST< zejyXvj4;*X+n?Uv{YZ-6iswJ$0*-H;zYG3tP(Gm`;9c5voAuTTu37Kw4*Qi`aCCJ~ zxb1V6ZMt_$NZA1&v6&{i(?5ormdgKSI#-bBA$M(8WZ=>#Cj83r{8Rizg4IQCUb^>Q z$v$Fha`q?Z@I5)~sSQ11tNMZuOj%OxR4ubUsOb8d zA0T09`XWDkz4tonzxr!^Uxyae`Z!r$49yAd;d1+tZB(*l760@P=U)Dcm}emB`lxgr zThPy--iJO#HeIGhLc89u&O3Uhf4bCUqqr*-SqUm<_Wx(wzj{u1NUr$n{}VYu4&JiKQ%sNqFm6sS8wNXiMGv5-S_SAk}Eq` z@t)_>b=63nQ_6hU@#WX}Q^(KRR@z!xu{bf83WmuyE8boBeEYBOD>6ev4n-kz^RcYWXW{fKtxn*C89=I{Rcp1F{z?e53e z`;|NPNgq3KC7!up&i;cE_AF{r8nf0WsED~7_@(`*VppT`1h>fvyEu5)gkIQdEO=(| zb5WM+Md?o;oT*oStXNxf>z8f(8`0l~Q+Fsxd4AuLV|K6YMnPHdqdoKPKU7*AjD?=`?@&WAxtSZhs5+2FKeI@62nn_xsP1BDvdfN~oI9j34?sUqAacM;uuc z{#*Wyppf6eLeGq?C36J6*37-Do1l2myeYuXXwj=xAh2aM>w+GkmL@0u{*xQC*9f*B zxS-^*hDMlGL-~(bM&Xt_5X9DUqHN(|kf_M(>z(;nA&| z3p38kTc3BH@qPc~-{m)_ioU=4;9Sal|CA?NR#&m?ITAG0%U-Xzv+AbB&Bs>%JKsjj z-rx0g{ndYQyRXJ==+}R+fTQ4mhv1L=Jx7?iI9rrtnwPsXzZd3pi1~U!{od(4uih>1(5#)xX1Zkl{Krq1 z+oev{2o#hEc_(>msh;WmlaJzmX*#za{#rgUPBOQD^0%(IwwG5V`C1!}x1VN^o^GX3 z@uA>f+NagwtCoLR)xy~+uls#{%H3ag17Ba+GhtKMG7x5mk)MNaRC3AqcmTC0^=7wEW`~}}6JWQ=_ zImfJXtZMgP9_JE~>9+BtOG#bxcQezE+h?D5y572AnUHvkpZvj9?E3;kr-+#8vVO06 z_#^ySLATFKgKYsqe~fQxrT=va4Ve-DUokwssrLP;#c@yGoUc6ezPx|p?Pe|!t4dk`P{s@-PCXPy4C-SN{Q9&atp z2A7Ts!7$T(ua@(O<;;Jw_n^#5h1!n3_!(;rr}o8Hs!ac|)8y=NVavE?|L>;k@9J6Z z+?0}%Rry|WV)hGPwi8Mz>1hvEyq&y1{jG_kiJifzQxm4!9hJ2;VY0C*=MidVkeU5s zdjG$WV9tv_4<7NH_n4*pwJ!5b#Xs}cmOC~Iue-^x_N%n`E4S`I*Vh40=G5Pa@ibUg z!JM{r{>Jj_56$?_ezWR+F0}8{zkf)yf4Cf1i?w_n}mPmFJQC)LTM^0a-!KC8(74ju2j53*6RN(R!0_HURtSytF? zg;H)3Tc?Pe;p7;e>Z3{*1K%y=xPD%t*`n}{5o7hN*q;Ye`Xgm$d(At||KQi$r2Ahv zB@Q+34*P8P`jBX>(Y)Qy|9+bG>5^mfl!w2T{QWLqQ+ZbC!Ay;&KcV~@&wLL2U4KP0 z{X8T8Hj!oGcm2P+2tIuMv9RCjwZ`f)$Mp$4b?o2gB_*ret;$oL_`Xv3iOAvPgj3m% zO0NmbX1|yJBQA%t`qs_D=SMDdCZ2osu>amYG41?i60aLBXmMQ=TweD2f9IXg@2)?T z4lVv&8d<)xs4Ctz+pPAz!?LpI+m;D$sxv|kJ&*b$JfZ5*#kTTCcKc4g{(rN-;IQ4% zpq;ErO!5<(?!BM>j;q2@L3gcOwBN+1X7f4(t(AP4_cYJF^&JC%Uon?ydt^nN($PvX7%l7cz^p~q1WQ_*OLk- z-fn!(&Sc%|zr^I^?XFixi_X4(-uY!xdA-iv`@gOo?OxLHZEx+`%zsPYUwwZu%m3?F zjh0F8KR8y^Y|biZa8NxKx%}9I25sK>=0E%6+iP~}Ul7^Vb7HNq&T)bLxBF%OKUlET z!+PesKXY5=-+uqfXW~DFrK=|Ar^LqH78Ei6tBV7lzGx z&?0Lv`&v|s&z)pv;e|{|pZzb%eeOQyGU@QOu#3uTOB1injNiT6;B|Q2F^-G#Sc8}S z+4PkCx@c1*cigE(oIjSWPwiSMUy)>U{9v?hNMK~ameV0WuXC)>m@p}F&G&EdtrwRE zPOrOCv@k^VNFmqzH4eMDIM*#X_|0-!w`IREW^XY=arv}^wsOjjwp^X6~$2hS(n ztm+n&-dO_IJTg&q z+gi6?Rb*v(V`k&brpAzUG1H#=-1xDP!FacRzul)R(V~fw;Rerd-du3D@NIHk<0KA# zV*xdXtgVN>mUQSOJAZ$!;`29wRf0Prk0(Dzjj`+Et>y928|OP*9(vW`aa+88ZQO(@3c4SeuO?P@ z_8d(xjGKMCe|cD*GK1Nb%^UBAZ_c`^+jwz>l!orMlkU%Zv(vaG(xpy5S7>Tb{80DU zMs}l|;-RO_pLA1J7y9iz{qRLZ!=44b?XT}WsL3+sf4j_akuTFvJ72TyGW%=}3QkJw z@|C;!_VK+ruEy{GR2)b=-Ek+>M7^i?`TvIj3(DSene3~v*%rKM=GXl{Ty*62-MwF4 z{nv6YSJbNK8&A~!UbSlSs&#uK-udPI;doTuWH8%cR-7oirPlW5;{U%`_dRoscE0p!rsQ<>51sCZCOB0# z2uwDVNDEgI)8;Mzn6O&YzWyPjxB8#?)f*g5pZ`8wU;BSy-S@cF-^@OmcKOF0FJ85` z`swOfPwqx-@ZD53Cn3Y>!pEZ%WU?ch{#@tE7JB}yBgrBA($8D>OqEy-)a;{q{wfAu zc(i3t=6Tt8DZ3l0ZojslmR`o3(Pr>ft-k5b?1k@5w!V&E{cr8Z($ehk*P*Yz?Ae&K zbA6i4wGS6MCiPxBbxq(s$B9Odr|Wk93k&MJ-AB+WKotzP|brzWcB6 z&hkZuj++an>^JPrWARN-kN*)TQ=yYkA!qT!Qd{uJqm5BPf14QBKlv20M_}TOmH9_k zZMewt@uW+kQ))=5($Rh2?i74D@=L6`@q>iD>NAy?twkZ+sk8q#2@4g?zu*5je9|3cG%@ zuvn%2PyQKperT7hawt^kWlp%P@PCW$!S!3*-|a2cU7OGBki+Zx!I+zsNj|e|cLZnm z^exw!K5WWbU7;x7qqS<)s;h;i+>9Xy&0^;?>^Jy0G2+B84+UdC-ON5kZoj8Vzu#}y z;`g&MlHj#WX-~e+dHD|iLSvx|)eFQ{e2rtS-M8S&ck%z+6Am9#=UR{=!&ct#>r$uS z(XZEkE$H2?Jb5Db3E?#nrlA& z|G}kn{jJ|DY`vTAsUEtlD=^Xf@$?UmbzWV6Gl%8&)xE{>Kh@V6C;t|8{i^qTuWCie zQl-FKHM5?m{L$3efBU^uo5Ja1{D)n3SxvdR@%H{B0ox~El)oFl`|Fx{@t<|s7!(vd zRA1>wq%ra_cW4?c7Tt1rKFb^%i&c91w+>p&5t{IwJN-l1aj#{IuJJo79}G}XdocUE zjRzn5!9@N~|K`u=EP7M9&_e&g6cbU62nJrwHH;1&8y3#*l$zOP=izt6)O!DmqaR(j zK6n1l%iS8tT4j8HN2Th-yB75RrUP?$*UcRSMhEg8#ED)!g78e66Yc(T_<#zvkLV^VRWY z_ZXEN&D!Jq;N}Gfizf@1mNvZq#Cf#7>2Sn?vdilKcCvNH&sk+(?6=hZ*Ya48)MFl& zhd-BEZI|%XT~;t(_VCmByPV)XujGis|^( zTDeiyNbvg8i<_nAf1bz2A2I*y(QjMB)=z%^f7h;O>#`1f<1&AsbjVHhdc_j0c z?0+Novpsq7`pO?G*P}ri6{pmAE{b*TdZFKHyz%kBFaeqCZ#fKHZWM-OWI8QXIGAww z`+0){Cqty1FJ=5cRebPgS-tw%zY^D6dDz~6_!fQg0K3wG&yCI-Tf}5 z{osN}^Rs(<_!=FYEVI_G;Z|sM?_iH(-sie%L9r>Hgw}rk;^~JRnrAF!6FOMKdiVE= z2R8rACSQNPYO(g=sCJo2$9WR3L^r?T`({;ot7E|~p_IKJrnf$SeVeavlhdR_J14QX z{Hj}Pc(<{?zp450uhwAsmpdYk8ywRweRb|2%acn-CzRe-dAzI4RUU3ot`M#8_UijnJw6yy|6#RM!oLVY2I6R z+kdHt%qU0{Ul>+??*E3kWApB83HmWZA>Yc``L*MYBLBM1XV)V>W~|}-c;n2{E!x^L z0`g0^IoorV9DQD_#B1sCsP^@PxXX`QavMVuPt1rj+~D)+(dEy2KbL&h3_tY9+n-7O zf&I7j*&o`k-SdCCcEOut?oP)`Sba;y?{57f_O|6i-I4kA>$~o+cJVEL{`BvsgMyzP z@t*Z}T=BgB^7>0=Kjv?^*r<2>Fh|$6B^%4kUS&0zlbN zogP2a@81^3 zzkKzj>59#>>^CIpX|r&$pHR@@GX3dR@Tcv%m;9X@k6x)N7^q)7nY-qs$WQu#Ub zES6B@l)O22!q0_^79HPvqHQCik${bB-C}+LH+!kcJDbiwZZXKvIwiJCC+>h~?o8|Z zU(DCch-6;Nuq0Y~mYBu=r@viNH}3g7(S5o7!_`}g0!_`Ycdupbi4WgmU}q64UbsC( z#hNEy{&gYqvH7xD(r45Dy=geNF3#suri%jmUcqtH2`h8-F zwanx4)Bm4|oPFQ-s%9QXziyMa!I!U<8B4nB&R$!!p-({7&ftCftk;?0*H=XBzLPWk z$3B~5_v;VsEH2yn;X&uuXJ5B{*e|+xv8KVgb*Voj9&s}5kCW^E z%U*x>lA&}`_Qmq;r7q>aMl>)pi9{j4@G|DmdVgl*cr5A!D+ooVj!JNoL26GiVm z7Yp0S9hoek*ZYMf>HUAX&rME|TkKBjhBwuws@m?GTGl4H{6oiyGO;uHQ>$Yl(^acj z^|yFh%s$4~e6l+8UX((IxUKh`cJ=eqw`qy4S@T_J$H&%q8Iud9-Sr`~^MRF1jhMdY49y$woVudV?_Iop%a=d1Pf zFMr-gzcc%~^@BJ}7XR&7%d_Ifw8vdFca9~ie5OX$g*F7XwT2?!qw`%wLyByq-UPr@1&a{;=?9<6*k(m)` zrTKo-p-o@!ZCo2z@wm<;`dktpqnyYL4L2^c^|psS<1(hck%wV%X?<*_*-4>d--$N7u!d5=by#JnMzG)c=$W} z&64EvlXsu(>RGC{`orseKhCT;9bdR4KIQ7%?$b={#iCM9)3)j`=lx&Q5Pxv{U)>pR zl6o1L0#)Yq9f?2B^C8aI!1MHGi}e@M_uN#vwdIFs7iR#_m62jyHsjot{qg{ARC%>lK z*c10IBx~Md@9rN*DmmgOKTZCTahR2#f62LDGxu0*s)_tLE3M{QqyXRfQ$EbqC(ZXi zea+RJdMQ!EQ-QU*PWY^$fa2|aUbgpNxbFGadX2BAoa?`~o7x9OE(tCUy{FbY*1Ff6 zy>or~zStjr$G0cvEn(R8yGs72$m+-29)7Bmex}MjGyVIbaDkJy5hu3fea!Y0Des%H zh5b2~TbOa|*_$D$%Oxy8Ia5nSS+40|!$uJu5pGVdg$^q^l#Dn!&pq3HE;X$5^_s_e z>oyk1+*>fYad~a7^%;v}XXeCTz4Z2N_s8?)?VBHj|J&?db!7dg>82%qf6hPCseO4{ zDZ=gQFZ04jGXvP&!>8r-r(T~l|9Hm#h(CXm13xW!cE=`dZPQIAr&?Zl&1ZJ^(uMx+ zZ<+92u5RB_f2N7c*MISq?K)mEPx$0%bEdx7)vwO$^&FQAv}^c1Rlx4?nKv;eT|K|~ z-iGo|c)V&uv&15mm~E1m{P!eZs|~EtSJm8iW!0*X(zZQ<4mY1OH!#YHOi&Y%W@S3) zXt2OULRgaJP@uvT9dWLfCbe9{bJ^wlY-6q*S$*zn;w9lAEm=G(zEYl&0c`@H4r@>xFI)1E#-UVr;9b?e$-2w+Ns-`ddGFS)|B+u9d~XfPP+1N9h-5% zmJeUgrrDW%*Wayazwr56Z|=}rIz79e@iD51^k@llakerhIn2~h<6`Y-OxWS!BqZ4R z{N8t&fB%z>)`?-H|ze=bJx*v#sjCoLDZ ze!I5y|Ef=4rp^2K;(9P39$;YLFL0bC#n#1I{L%hf%Qxls2O5<3xc!gW zr~Z2XzkmDxzs=tNWMBQNue%?;>Wy0;e>GM-M)7{BfAN#=f4G^>r20RIvQSa@vRoi# z-cIE;Tt`{ec-(Hr?p^MoY0TUcVUVZwy7NVn9oMw@e;4lly~efq(prVh`Wabq`B&rK z?T!Ds{ng#oXG_msG_4g@Rp0&~Y))F2_u2SayLMa+zcMTKu5Xt9^kWPx3W+u{IWWMU-fRw{E0O1zs$VNIlAsn*6#G_PZn@ExUerM5h$>|XOSbT!Lu^z z(>;T%qNdI6QrDag1WguASst-%pYrs}Tr3Rp64uzcZtFB(9{A_~bf$zaf0gy7XU>lD z`1**+@$VVO6E0C{Gr5JPc?EPIK5bUJ>{ZrJcJ|f}iTWa{ubi*(N9xZy8{uv6?~52(&4(ZC9k~S!>ptrvB4$U;LGm{l|#xNk9N^x zZbi`jb*qwc%D*q$nmzfN{rdwwoF@dtg%9r9eQ?J?g{ema-!Aj(hiL{sPZw zvQu@FHH{_1*m5a>iC!K_F5f9Bpj*zP3k!aXI+KK+C6 z!law2-#$e?;Cs8_U*3~<>$Mj#PiG9#iQ=r#ixE2Q9bRVGqT{7ComHsh>#ATkrfYu_ zfA8qd))R?!JZQATNR>4%@!FwRtCnSGsI<;sZ`rS~rTeVdbRKrb@3qJNR;H@w^+q|cDc62V%p3FXIUjLo38vE*Yawe@zoQvKUFN@k?M-cSj2Cb>Sb}obLvBJngA!S#{r|2f^HhdXb(Bn^v%KnJQj-ov(h=o1-GEc?R1# zIbR0;`HY8;^F<$t;aJJ`F0y<@n)Tnc`3X}34Yb~fet-T%cHJE-L5W`~|0hpgI@{x; z-_nb9Zl7;U<(lRGZdqSy|6%1)dv}{D8@xrA|35UhPtv9DnbE;!$BUcyZ(rTI`|^Sb zHud-R^XX4M{ZlWZNX~z{V9I`{WtOVX`&)7y*1hpjJ$%mObnfq`$=eSP({Yl9)nA0EeerVZf%+>HlbXz5&CwN{T!L&xgcYOj_0+nZ-M zb-nKH^!T0WQ4-A0!s-w-|6ohO^myJyuXjlC22}Vw{UcxAzD>H9c`<8-569jNC>_2}B*u7^=q~Ty_rJt`HRU&7 zq2c2H*JS?oI7y9v@$vWUZyU^g)+iXZ@8tVmGv;V5Q~moVNAkoJsSO1hg}yi0@A&I&**^X6BA@K~q8^m?`rwZEIL;e7va@-cV(juyO$jdc zh;2V7wEBpRcfVx~k3x$`+YJScirlIyN5gj;RxR1d>9+Rg-j>IA@9WN5eC$X}mhu0o zOc4uP`&o_ROdY+r9gOd{wH@Xtm)mi^{=-j;G{)dz9+(k7yjM|EhVeShKz6cDe zxBe~VdCA3gt7^Euc1eYMKi8_L)k&eZxix$5-Aws+!mp?Lo`2RvvxACL_&%}BTRJPA zZ)#hDac=tFbG3KRY3zP&QBwcSTtIUxeyX_9TTjZeiiqo<93hV;{GA zb=1?S{SUYzWcTnc=xEFMc}?|Vi~ro5h8h8ZjZ+kYCMyU!X{9sN1*aTkKKP^NfuG%} z{pVuSo}Y=)-M@7HtNIg_9NKXl+)Q2$2a?;nSpRcBwFrnlFvrk1t! ze4?e(|1B(8{L3*=WB&Q+@fRK3-Y$)v;h1w{`^TLRix0QY5fjviU`zH}F1z#K!B<`j zmzK}^e`L1!^yMet-hQ}f58rCx|8G2_u0BuxI&H%Ia5EO^UroYOy3%epUolV)F+ZNP z{^P_);YKg#O{uXKa_)2=7Sdx@m8#H7y8e4n&IG5jRleif6esZ zY8E_i)!o-u%H;g?{~D_ZZoXLSqpzCxn|ME%aDUoQ_WsLXmvDI4sN3I)`_1cVr|!yg z^ke4yzXo0f%Zqm#cm1q;A9pxar8sbEl@y<6kF*_!$2a>r@i(p$BMvV2nZDYH^T_%? z6E4>>6zU<8E=ii*+X1yih zX!Y#S1%KPjn`Y<~7~F2DJ7qDGgZqwSQA(LaQEkxK8iNP(&o5bdX8yjo{d@jjU-cvI zulQ=G->z%sP3YfW{WWQO9dENjNA%wR>jN(CtU6Q4`%tg$@gJQu*_}J~Oyf>W(!RRa z;m2uSha(M(9m7}D?CD`M-{19xb=3r)@A5CVM!K{;?AS7+o{9PCVWaTBYj&t;UOMI{ zU)NPW)2?bcKS%Y?&(2yGk6ck+D)U~-r^X=Q(#*vX?r&mW+2v)m?K-vjuw(qb<=ija zeti*DXxrEpa_-zg#-Ee#mCQI+bs^6}@qZGxxgi23m*HRN2fG}r4DM}l<< zdJOk1`4p=wyzihygZVAq$Oqq))?`19wAb6t-Pvn&N2%bIeEAtYgRkGs!lV{hZrj&X zq1x=nzqk1L`Y#_9B^CO7JAQmA|C{RaasA1w#c%)2yY+SHa@Me;U+y3Gs4ldAd-&KA z*Ty$IEK2Vp*xu_0GWq>n#J)zLY~q9Ntmd$HzHlZOkjP4!T`>y7T&vf+@YKN)JLF&y$N+ z|0>RGV$Q~Y@a`O+X7$zQ&uGlJ!#GvkQZoLs#m(!}?*Eyep~iKvusO3=W!c898fRv) z$FXsQt(YD#rR2${ZXY@3LjkN_;@bS(Y!UCjytVz^vwu#BYsPM$8LStR4WxY@)_mwT zQruBBU*F{3`|@+Sf23aLJ{O+jdRzbHVR*fcuKrvb_ix|7Wrmqge*X?|bXcr2g}@xU4bZzi*H(HKJ=c6}-)PJW*n z6q3QE>?6P8tNPK__tPIrByF6to#*DSUAvSYn@yHmCjL_4?7k(x+HZLGN`&+5lF2Te z`SNqhdh-ibDSLbNo>7+&I1t;hkLzARS<}lEPnK_(Tz`rG0pBVO*#*pb+u3sFub8Yf z?|FzEi}qaS&&g3UGJc)@bx5gD_O$P;kB=N(-__i`aeZawi>YrOwY+}kz2g3bT`z8T zUtQ-v>3zV1j+ZkN_Zg_@ivKm6ws*rxkCKMD@1MRYU#;I(_`&DyEAve{C!{yr^0-^? z#(2NG>VnNKy)%);uQ;;2=RaeuSd_f9t#zLK`S`Z^TNRf75uaSH%RRR!^W@*R7Y^p{ zF__tQ@o%X4yBalLgDdqK2c?a^a9bH&(tfujDKGA!aA8X?+mV9hW)1h;(|FxJ{fN3V z@kWyEW&4Hm9Q|DO-cwXAI@&)W;`7s~AHHkVyss~mzH;M?#7isYi4UGWT(Pq-$>y<_ zh~e593w9>YGRd(#_%G$$tp62zCeGS1J*?lC`Dfo56?y*8wjVh%V{h%^dV1k`SHH&8 zkAGGw*t7rIH`_Z`efq0Mjt5*4Urq0@`SH=UkY%yzwF<+gv%YiHMVE5*9GiSMQmxU)8em)KRnlYkPk;?UxLyEv_yM?>g0$FSVp&ht=y1joM3J*Ghhv zcGEZLC&!DM*PnFsRn;;7e*MX%EbrBdq>|>f;X78G_OWi<`}e1Ph@-jiC7xr>g+-Mt zoclN}q_d*m&QKSAW>_KqRU`Ci)1`Hpw&&YED*j7dr*%YNoA##%)^&e)9vaR$b8l5% zPu}}EPLAKWJ_Tgu9a?Mfm?#Tw_S!rE`sH5Hw>&KG^pd%II(?Sq4RZu?DFFVCnC zefiHe>2K`)x>tuEOldzfp^%rUfA_B@y?4*1{}=R*jEsN$uE}%XYQK2}C%IQ&Kit_r zQ~#jm`M&`d{=R$lw2Jj@+^@eP+ox^%!s$aM#U(9sthy@-_AE}9#021tAOStIh8sF1_2kxSyFAQSW7d!Csl2) zy)U@F_s9MJ@%8`ze)((ue|5O%_g_o-|NqZ^{LqfcLG5qs$Ng)5SuW6ox+>bTmCnCD47SY&)U@_7Po7bl0j$kkvic#?RVTR>=sPn zk2-5^-B@bTs$BV{^M2ror+KqK9G`Akz+=OHo+bPSU&p79@7ret{gZg?TeC;nrr}&q zvD$;`+FAeDgWhp6H6BV`!goR|UsP&p#q+1jyZ>?~FA6>Px;O54$w86*>sQRb`uEq- z5U1_0{;$5ezHVdaw~e8H0@N?}T?%`dUvpr$Lf^us9qYDjeErtM=d>%&Dv9l--0crt zf>{d!5_|hD8cgZ%7Km?Ksic;8a6uTG)rO0cSQu?Y0!|kFTUaf*IH%^2gcWaTom8Ja zn<;-@kD#0W>0ifdR24XcRzH6(x^R2V{Edlhe2fyjg$m6Z8uw{CmLDiEUr`qGG9%gxU7U$DW!hNGeF(_@Y`ErP@r;pX5srl`-+OD+)N1b}uHa0BT*nhkKx1GFgZyImo ztqKP{Cl;}CR{_SP3Ri}rLi>fK;kUjUCC{6^?@?~j+pJYT-^&SGG={u%*}VJ1Zv77y zZBEPFc-eLvAeooJ;0*u8p6-Y{ z3*N53{#+wu!TiHKD$ftzx$)2U>5^COMH=((f8Brne)*xefN38$2rx4}RAjK>dz)3i zy+Y#9`-%vQ2d{usP49x5xgl(p{7+W@`OvXDU}2$oathm?4pHX+3RU&T|C+t?d-P$y z$__R)u_*2Z`m;LL-TGPk*YWXJ*3Ow_jcIe^+m0|B#O7I^{B(Ioz&EY1!W|!eFW7ce zH`iO|-Y>4DA_`ybN2ut@by%}Jk-Js)oAJX-@eY-WHS5%`|1p|Uwdtfsy72uUIlnHc z$DcpE@%jn{kqOp;U)T(_H|j8*SG{s~{prcEsyozfo_-v-;OHi%P(_zpL0xJa?ew<4 zdf&f$;|bsO*Y-!OKRnCr;H~aI>EEXGvYwsJ=eS%(Qe3rf@n6*lg#+(YgLk^@(fqn` z!MvNh*|Hs3%A{Xfa&TShJv^!M-;FZgBcHx&?XSAR>g{Hi@vi>TYXd6}o8%oALPR@w zSPp%>I~$VlOniIZ&-(u(#hqUZ zSfdnWk0=!^a2<6jO>klnFMbQQZ= z3u_qD{|M8T*}QFsbwdlLe%Q&}5g&f(-h(qxo_S<);7*t)}IetGq|;M(W@wtd)F+SbE|kd z4n^I!4SglQrOEg~pU)+k;6GL`em*8A0aZXSH0 z%~~3J-^_Zq*2&O>Id)u(3Ae6KXEgjhLuMLFi|6@26}GP%^R~AAcp+MUNl@IeC@Wy0 zbT`kk4LdGv;)oWr-go%1b48+#{EdQ1y#g7Rd!DS_lGtdz{mxe@m+l?Y)!+Y6xMgtk z@MoTK#cla-UVT;yQq8;Lac}XbTD9vP8s|b<-r9dPeihjC@84mT)(W4+0so}=86G}7 zF7IpXzt8pBH1QWxj(?2z;#=?kYg^M%`H-0bmLW4&IM(J0-v58ruKG{q;+Nm=ZA!VG zwe3z>%D*j1>wEX?IoR;}_Lud`4x6rJwZENpGLOqqZtDM=uYdS{^}lTElj-IDzj&_o zkGk8(FYM7>AGUf`^#1Afe`b9!Y*LB}6<@<*QNntn!cnTb`>N}c&r(b)_Q*5IAM4<~ z6gh)+(l_ZoC&!)yhq((Rr!cTEb}r!BRIJh8d~Sj9!LLlg8#bBMMC}vPVEOuQ{;EIK z6Q+l3uCKcwAubtg%^*)L7&HsdshpF*0o7y$&|IXQxy;rTdEqX&{`x^_@tNZic zf4+VsUinwP_>WG-sFmWYLo3CB+7~?y`moKdUF^iwD$`%lzi-afT@Y@4YeE+RmfYjpsHS(V){Ewe=K2AkJ)9J7K`j3}(bL9JTP0MbZcQm9gbxO(2 z_cH(f-92WrzQ8Z~-ZFLPX8pBQ+x9y09Ldx?{ox_sqEg1t_*D5rg3}_R4L=8~?~LD( z)A8^bQ$f=!_uSbo0zSs}Joy$4eI0#VE1q#M`Tnd|wGnXg{14oNxBSzE_>&gMeT=^BpP$5Z3(c)mIP zBf9P8^>qiYnqNO)*LAOen`iYr)wmqR)vH}ssZZ$)UwYEkKXT3O-`Dt<_4M_>SH0)C z6&rZm=l_=OKUyBOm##DlCAqCwBFDVtHLw2hMSt5?NQYk&co}PHF)8#HaS^Hx*fAYU;)1=cn%o`P$Tct@ghhdp~X!6vWBJ&)QwN{$7|vwU0e#>huA*170Ho%HnQ@7BaRdfCQm z?Y_yD^7Fs+P02FW4TcPgQ&Mtvo{Z#I|FKoH)gwd9`c(WTotI_&OwkXxZY=E0?XjwQ z@>IsMrqL`+&_qMw{*MaPyi1Igr+OS3UoSb>xZ0u2zVcO5KmgzTGug%(3#TjV?p+>x zTZ4;7?5@TdL#`*r@$2muwiJ8H9qay`p1gFP<5O2<6~mgVsk`M_T{b-VuDvxyKH)+F zN2gWG1cr!Lu`J&%)O)1P%)Mw_IJuaqQul z-Gz5Q%)K*Za{f==U7o+3^Y^F}Cgw`^Tz{y%Q)JU+K69@lte>j>O-O&!=6;oJ>ER0s zP4{m%ls^5G?ITc-J^!?r(BtL@$EIJ4oN{c-EUDS+FE}0YTf48b>5chI`|J-p3!`^! z;xs#R&NXk#{@;JZ-~F^Y9C9howbksh!EU1!|Egwh`Zw!fZr_%xg{7@00?wS?vGLU5 zHH(gUT)%D?f3-x!2mmM?f@_@+?TY&Ykuy$^*}%ZPI9TYdBIoQQdRt5+895$k?`^`Fx3y+4mc z?)xJ6`I&O~pSthN+xV*wG4yaAcUEw(z3Z)z%#gl7AdC6V^nLqS=W$5erXQc6+#35Z z!s`+HKk1^~_9qrhd}XrOwxDKGrq7}B>XV-f7BgPla7o~UeL~;VyNa*4*YAG!=&fo> zd41Clm2=O(-`wGHe?~$ti_k%lW7~OTj$cx(jcM)OFhx+LO+@~Vk#MZ_zb3P^b3#e+ zUs_7iTi%_TRu9l-s&}RA>2Y7I@T9 zc!&PqOJ;w6^{o1HC&pjXzHqm|nRPXGS>8N$%R;~O&YZeD{QARB`(@>dn?KE8*q^(2iG9f3`w>px&pKTG zYFM=KuK&;153hd7?c00EF5>Rj^#|o@l5f-mN^PzZeb@QX>xXh{h}17{xr;i2f^&{< zUnuH2Q=ruS%Z8Act?5(WeTY<4k`_+fQRKGgeC#=i{$}-^i#XI-7P;hrOHk{hj=1_4&YF`*-TQ`WO8O zdG9Bz&giL>vh($K7LkOvz8=}K-!t_u>0W)tnd#%M;#%4yaj7XdX+yTGjr_OIH3z$delRcyGC9tYW;?}NTKdBK>V@9fTeBb4{tvoYAMcvH|KIfeum3-q zdPi;V{iuz%RPuhm|IZo{{+khN=C4DuufD$fJtS(`PU#~pMzvo!TbmLWHga|N zhrQM_W~kpC`#fe#%oV)^jt>v^W~~l9b6xKJkBEi)1>|dX+&cP4kmt8^>4Of<n;WVk{)vP=l443kVO0Ld-Eu|yd#lmvIpz-O01NE(we{4=?Ix{y( zD*S!^>*M90{y0=P2t-&WS52+6ex|VAZ`N*4Jtx=5ZGSHL%=~sRrFQWHvxOJr=WEWm zC$ru1(CwPVUb0Our9uqf8}iD2*>wB%?gK|e*4OYxe~o{zeD^H7ckhm${%Em&wffxf zBFBlbe-#Vs_Nsebln#IOv$l(gGqEG#5kmya!Aa8}Ts{4VWygh!LTX8jN&+k^Ds;Ko z%KMrxGR%C)8gPmI1<#rC3k&QX9+~2B!6^P?;v=rY$yQG<2S~;7$!M`=crkEIoV#0j zQ=Yl2!LJ)>D;QOt9Qu~#bGEno?1%S>D<)e-@82qE&!}gUC%5sPj(uZa(4BKDe*JL{ ztJ%jS;Ipqnj6tx4K`_xaXKG6QLSBJ0uw>NJ4cF6`uhdUfR8b>7-i&V__VKH+{b6UaW(sQ6~ zE#qr0gZb>{Mhrh^Ee&&1c(UoIy+fZID+kL0HKu?XyBvvoYbQrzG|JXT;9zNH(Qs<`L`t0=G_D0p6O6yCt zQ*1Q)ydH4i+0}zvHjg=g#W)<`(N?jsXX^3 z_w)VN)+_zs*7Mt!pP|pYb-(`4xT1!Gk6b4+g?=-ilek_$R8BjK`zoh z&iwDK6etjF`{uETnfmtkx>vk^ZI!hWx^iqmU5$8Hnc4jGi|3ZVKl5|K>6c{-7ECR= zESV9duyy*?_xy)-Ho0XiyYxu(`mgT89(@@JOC&0?j;>KZVE-d8wM+fQt(l?|{=V~2 zR^gOi9{RPdO#1nPJKcwQdsFtl@{hV>N8fn^BxU>H{ zU&)ckoW}lNtMg0K%x;;b*EZX~zAE><_FE@=jdIA{wVq82|A?|Y6L|5vK(U`CrF^wy zy_02Yc=g%oja!sC7Nr^e@KMt|Zp6gAHsA>3ft*eJ*1Bp-Ki26?QMt2i2&rU#gN08v=g5m;yRZpZ&1@Gk!o@)BvdN*+W*M9!rH&Dq~F!a7)_bj zba53why2_By{D4yc?a*)~B~uJpUK* zLq9jb;mrPi&TGBmuMX`sSXss<-}WUW?!J%lzOB8p@1!>G_i^FQn{ODr`*-Tiy+&zg z7TpiD-LgITkD6!x!J9=@A|X2Jy0^A#HT9j{b4@P6h3m_l!p$j4+(M`J@!kBP@NCbT z{CjJUOYTjQa%^87T+i!QlaOp{IxR!B_4_~hvr7ci9+qwVu+-7bI`P=l;}K%l17I?8Tq4%aq93OM}uhnk?Q&lrmq%U^NZ~L*m{BWy#Z|9MtpRWIt7F;P_ zub?LwuGQEoe^h4Q|HMrXPpD*Sd{q3llIKp9Wa+<)GxvUY(9?bWwTi(sQH!-MJD(oj z*luw#>*2KjRy*y*s|3~d*S`DEG0Eudr9J69^8Z=H56s(sFXRy}EJn|HOq3HcKu)yrJ@bbK=G6Ph#hP zN->gM|M%F1g^fm+=jA;3Bi?weDeS9=l2OaCsLwvn5_(o-^ff*bvs&vK>V0Q!*#31L zF?+vRJ$;|Fz1IB3>h&8|&TH%}c+Md#H`l1fPVmQz-`mPguHMgLli-wj*i5E*LtH|G zhqT3k$2t#Wch~w){ylrG&y4s-p@xAmm;P-$_oL)NPe|mvg$)m{ERdP&DDR`NcspOn zN4d$2r>|Z}Un)Ie?x#6bA3vRX+J4T*rdVG6dTbMi$n|ft>et-fF-uZFeWgvnYt9?H z4`$YvYJ>+d$7#uZfA{iZBmd8Xj%+s-{a?SH@GX(o;D=knY4vCU{Rc{60lygd1(jyh zroR7?u=b2#^1{OUr&&!34bG18$7arVe%rP1+njsm_Zio^7IsX2@!a%rPW6^!Ro`_| z&rA_ERC|B5F!ciC6|c;`goWSU1=%Go6EezOzG3#w>fN!wpGP^^zUFV`o|5*`t*+;_ zymREteW}S&D>f~EoO-NT_~eo$+V9ubDBJg(y4^VI-SjK_`K;nMH8IV%r8vDVJUDqgbAT6fk()ho7+8`OFvk{_&IFK~tbP-#<`qvket zUd8Oce_1x@{nKB%rs4HvZV!Eb^TTcHUo|c`{Pz4$oB!wIrKf#~UlFru!-`;|*}Q%_ zYm(RfOP#51GmOm+@vZG5olVjhQner4RYUuM?RT9@-EV>-MnPEqc|5 zo2$!LJTv-rH(Bj>?yU`~FVr?1-&A=bV{6IVtsAA+X7S~}{{NRx_*nYWpC<%oIV{t7 z-+mx@S#djy|0em~#D>Ppi<$Bdz7g5- z^qUW7^D&mQ(UVLPH79huX_2_QQ{eq70pA(AuYU&02%g$);G`1c*Hr20da}L9fA?Wd znfSokMZt5tG=t-my2Tma$lUi{R}%JZ_12fB0;#c0OM=37b)TQy*?wekp7^0V zhH5IZ{hzyc@fg(2%`*tScukV{sl~o>kFY^7D zbFM$X)-BoB*e0xLbNZ{OX5X7LX+K>jM%A9=u$>a^cFgh}cZY?E`kUi_r$+>R+n(;7 z&(nRW+G`sJ&kep1SM>*(Jk}aX$E+T$Ivp2PxA=Z^$jWn>mUcI~yjn_jIDQLxxO%eg z={F72R$I2Q_I#RhQ^YU-iqH|>hkwsYo7Se!5k6u#bz0CLQNfkZn&zoDRTTH^-}5Hj zEGRAI`0-hVHfBNlJ}ileXU{R4tYqeG=r2@wbNbzq!uH&sMLDOZoz%J?-`{gA|M=RY zFaGI>lm>nO`XXXuWWgEb$Cjmz=8r;G+do+!vFdl!)tekrN4c{9-)2?$SZ*vgYn$iv ziEmAphyJK8{481fc+Irmwc^JrW9Mnsem^#+w#!o||JUMMKLt4==GjUFD!ClVm(6sW zX}&(-&aSn019}b@?n%2W=UBV+9p~C*FZV9_`XhT*=ud4O>lXd$yD5#ErhfN0JNbEj zNo3EwpqCFj6}NJ3VSD{NT=`R#={jdQxB1fFet5IV`KUbp_bT`HPurhtIhQ8#th(p_ zQbn1=^_tzkCCdXjyEmjxvdNwBZ~lpm+2@XanD~x8)WCXu>C)-P_I{i;D<~l#Yx6YI z`CPo-zqn#=vCmiY&Y%9#g`aEB?+;TFwO;yu7rj{!rkJv==)97oNK}`|g1&jOWje>- zmZ}G*m6&VqtQFIm9do|(`mSkf4{FZ&aA#+ERnzOebKl>6@~u8T;OcGV$MFUYbFxa! zZa?`K{rT&MrFQLqnIfhOZ#7%A?fZWB?qBNX_BlDf_%F#TIZxPBZ;9{!N!)wCiLJr`P_? zP@M8=>*P1`*Gv?21Ya`+ThvKMpLBiC(qzHO=y2eSaQlkT6>FUP4|V=fI@2W5@5sg9 zY4E^}lgT1_oA<``?|R?unDxc<_0^MkKc;5dR?m(39kXib`(hqrz1iPcC$#8a|FI_~ z*7fL_R=Yq)pDSk14(#R0cH8jAOnmxo+196zmsGxF;#_{VD^Aa1-Jwt4f2VoODXMHz zOlWWjX}r|?=&U@SX{KAWzEF+7>z~B@x-G4dSC)%x(elz*#QY)Q-pWVrTu-;L#}=(< zkGeHG`={m+Kb6Hr>01k5o|*73(`w<~u&Q!BU4!Nu4{io22YK`@epBj>v^*3 z^e)UzsOp}X0y-kLUBH)j!$FDrJd=V^J-4K zlzjTbPGe4h9{1&qR=&|K3<7-x4h((Yr-uG4DHKK}pyk{srB zEXpQJ*Ps2zeS)*2LQ3|@+3)3zW!&F)w6!i1l+tjRyu_w>pC8NRJYW_PhF3uOnBSo~o~l_YR6bS-xk*qQ(Fxv*MbH_@_IhFWgwByg#@% z?TVu#`ziT@Y15b(bMyC$H#M(boVrEjm+KYLlTsUMa-OYRUiO3Qn4|dX)jNN`tt;DE zx8`qrqn&?@>el3)x>21^|7|R&{q1yeM|w#@S#~&607I!%mcKu%%)SZNql+u+YUTti z3|9&(-W)UkiqqTgkuIN3X4IFfyn45F{q_s9LMS{MEN!~Ww6JGyrlWJQ?p+g2Q} z*~jbD(4r&2yTHlh+}q?+vr85T+OTezb5MAeneid12C3Hclp^6?0X7wp*~XW*Jl;|l zUml}j+ULo)aCYh<`3_f7m4fBO`3=z|3rP{?P#r*A?8JIiJ&2Fhoho%q5qeOv*sw*Ets;5pUHXMj|a;xE5zTlm8}uzQ2BY~(YMy(47Jn?IsYa$ z#(zA;Q8dZoX#ApY7p}d3_HO0GU3;s3OtroIbbZ_ZLEWmtY>Unv zotYMkbtD|xW_{~x^e>;1&=hwx_|W31wagEH7|an6`@}ZkIS*?-OYeG{`op&$&f2hK z`6lyyEw|35#VfI`U7+uLYR>VNPY>%GOqto1B>b`c#gcVxdyeMHym(Jb&R-(udP*-n z-(l?Ix_^Do!_S?`mJ(`Rn~s{il@{U+nfXO8^Va*2b20Vmu}tSUco(;&{!pl^GM(qG z5dGtZzGBYP-=)V+l)WqDxw-2+$A-0bK631P+Aqu8v*Nt+@Wg}b_B#va%j$_;c6EC+izX3o4TLT%l^P8`$_@Dg*TH#+HAB04P}2FadK2#6sET^ z=82#)A8Sj}H}0M@ZrApH?A^TcRmI=OLCHxy8)S_>ls;RNoFp7&Ve&q~rtNy5RsHAg z7k|(Ht?pf5a7d@&_TfU-2#2dp$D?9wC-B{=xFz6ZnpSxvXHy8bOO8R-%5!H*5_x|2 zR7L%hRWla;%_#Ij;-b`rCWo#4dFr?HpZrn&&Z?@I%;PEQrlY*G)8pUW=YKT3yk&Mq z-u*fG{-&v`Dr@cL7+x-yTIxHuTX4S4{{02pI%i}p+SswwbMD5;=Bp-doOeii_P_t( zS8hgx95j!cmZYbh(HiUhLQ%-JY(hDU^TDgRyH7QKoVxVNWs8RGXFE(U1*{X@kRf?y zMU9(BpWfp80peQ=|2YZXy#LH;$79p8@m{}f+i2hMC|z8rvh_!q{l zuUmgN>i@2<@BO~a6x)4GU7(dQ?PYwjlieaO4Yiq8 zaYAFG-hneuL-psqsGc2S<0rv4QS*4ah~fof*WT){`3w7`H!N{6*vtR8QstQx$6kR4 z!jt}4aquVg)N=p#j>uc$_ayxI4{i2KUC-vb1^<6{aj#SA?o%C7w-}DUd3?_EZ@Di^ zPU+N3Q@rw`Pna97d#``ue(&B}$91-KKld;*-RpJM`c+kpYPrLUF#R9N=bxMB7Hi98 z{O@j$-@!9;NmlHsz4yK=^JFdNNIhP;BKu(9`DD)CnDr;QD{iJ$m`>sA`lQX%FWvTY zZvDFY^z~~dER1ozzG(AB?Iymx_rLx;raMP1>2JNal0wPjLnlrjNWbr#ymdiDU5a?< z-Tz&E^|PvWKiE9|qL4g~mz2xHPLAJeL=1J=3N5e7O(-}Uy>fn?3u{}-`X`@$YdWWv zmy12zlF0hz-;Vdnb1iMVVhi5uMQfGH2;9B#C+B|nyR%h-zT9WkbAPq<&z$j!b+`GS zUwdqFmIyz%Q*HPrxn#qe%c83`D2po1iku&26ts2E{CQ?w9LlS{*p*D1d|&@?kD`qA zTZ71;x38rSNc`x(di2BA7x|CPkFja4=6&ncG0*+Pbk68j9#PHqiA}*1K5?gr{&l&i z{iXRIfBKV?f9BXei}wG|m&G58znaQ>-8B5em9Vuw zK41R+*>WfHPko)2TJ7r9pDJz3zW%Z2@!Xz~pvK?%=%^a|(W!HGv#6{1ai+N}*~I_t zk=jF#$|}=WCQE-DI9kNSvX=i2M~HV!*lO8c9<~J~2c$MQxV0YmV%i^ia_XVjZLF8} zZK^$eTO-fHq=HTH=))}q0Z+7QrS=Rk)o0b3Jox|c{lY)H(^&jdF zrWa^lmee~lHO=s^&~o()ns*-=9((mrVY!k{Ghct~H*txtl2>-z)#aMG=EK}iP9C0b zyC2=ic(i8ToOQ07c@|q$Uj1^|@Xx|hvA@>af3I|Gn7yr`i-+-*`~8+JTx>=v^>4&# zRB!J$Js@#(&ZN?Pf$XxUjb1)J{6+4xZqq`&_k8NJ zo$ndRZrpuaTH`~!_0-qOoBiTAI;@y~KmT@UZH@nm{P};yCj70HIXSK6QR?rq>j{f@ z-}_kk&-2vVm71TbxBgIhzVz?E!)|OAHZ7$&t4|6QY+JjJ^QGm{fZKmxCa!VI72E&w zC_8_>ctYWmRSx3**+tf1Jw|pZX+=YB9QOIl50hM9t*?gQtnTnP*J)JNvR~YhMod zb^4RXF%2V%VCVu*Sl1CxM*jQ)f=9eD-W+xsK?Z{Zl zFJbmuGw#ZDef{a@eBb8W%)GdA_jIGlOQs#FnUu+rRa8CUdPQ7C-=0&oi(lMOaG1HL zO#Ww^GLvKGyd@D4~JPGi+Az(e%*;j8)h|Bs=@ngLkePlKj{N1>bWx?9{ z2U#~Lyo>={i{}q_m(8fy!Y6$ZT|aL=9043 zj&j^rE7!}q?eMyMXUG2n|83fp`G>EVXYs7hZZE(8vQFvK#_b<>#T_zQ%Wo}S^mIwE z^;edrT$5fs|M_``o*fo0kNE%Z!(pC-2mdCs1^sY49>(dWU(UUC?XN#gFVai3U!AWm ztMoBG))rp0R>AJxO4BkUCFTo}PZr82r~eb^3D}!^;k=%>+^%^qUfDjl?_4f+takcO zC)S5c?F1gKte+!)I7ssy*B=ixT~qhWjkBuy)SmuE!PRX zaAK7WEWh)^{nO3tDCb~EoO8t&=E{b4cMN?_x|#}PewC@u1$Xa zi1Vr1k=Uy~*B?D?wr!T`hAaCg zwWx9B{n=7A_vvz5#pe|pduG1MPdRCNt-qsroBYww^%nkJOFeGX>@!gdTm1gw#M!Be6qwq!v03c-FuR$v;7V~ykA+ry&&>%{H6>V5$7>Utp3mt;mityR9F=i@A}hwcFkv3&gGXW_j#WwDn*J)R-~^|`Z+EXMS{!mu!vEa*wf~QX2DGVaoK`(;ADbN?(IhbI)yG=Xjm#rOt$jO6PApT`stJrP8P6v9Ht&wqL!& zaxl>|e3OId?0pBdD)twy3Kr@H+=#c!+eNWZPrfAY~IrhVMZu6MT= zIVbv-%3Gh{XER>oyl2m}=II{IYwv#h__~m*BH>kKf>g)rS5fC%bu3;w=>C_wvcrN= zN@PNg2RAR%!G;YFbY5_?9!hkWa7ORe{q5&0&e=$<4D!9}DlxgUYajc8`u~69{PrC@ zoWF6OVrAY_yZ4_jtADZkomS!X*^awjp7%?8&Nch;rQH`Mi9O&u@wevrRX6U?b-eNE zu43EY+J@ddzv=3RO8;E*V6(=3+zp<-Fa4ITtLmE8+;Al|^p%-$|Eb@eY-#4p7Ed}; z8ZY5>Y0B~qjAhr^*jv|^{W{(7``bRTfITm^3fC3-uQ|-QcHT*?rxn-QOL&&e?rL#l zs1muW@aE_Kr^Ou$$~|n3cg24&O)O5XphLWhTwFt^(FeYRiM{rNFfCpOgM^eX+x z)DQL-|G$5If9b{fzY|ZcmFu%jpVoIjeSOQkg1cwFRqwgszx}X$T6M|IjYX?l{-o@C z@cHkGSwFY!v|TZ0_t|F>Yx`Z>m*}1O`?^7V_orOTl^(U)#TOR3o#xXL$b6MG^X19E z#^;W|d}C7g_iu1~j!e*9{~vyv&KfF4Br&bta*=Ch`emk$KbPZ*@@Bl(T(fU=iRou% zpLc9ugCfrrt|)Roe%W?U7ZIzyuvB9@b{2M283uF(JVY$jK-iEr;&Kh87>y3X99i z)|eV-URjeA`l9A>NL~N0(knwyyEC)%Vvsxw|nlIX!iRi>1uz@$G%clD~cA_krn>!`~P!)x+lM%e^ULo z>;GM^;`j5t{!%}+O?Cg#pZeEM`852=YTH-rzCLfiq2O|>PoV;9noJTUI$a*7et9V| zZ=cNa>bNLZM$`XQcay*6Zz-v}cly}|1{b|KGi=;1elw1m>(-l=DspvlWJ-c$`ad>WF0K8p$hy_M`MR*2?y~N* zHCH}eb&PRiD^*(&vs?GE^s#n{g|D;I^n!msjN1NIX8W&KqLuSAsx9Vz*N8WJ!2b1j z-Mir2e<$8KKA%yN?!a*W`2?o>GO9Y8=DSv1TcvT|fx%uicACZBFXHFtGI5*~n_#$3 z{zyucnfS8ZleY%?CaO!CCNB=YaNRXc! zywbwYcool-vmKZ2H%+i^nLJx=MY7Pmyaj>V3c0l=_ul>g>=pC8MFHFQpSmnxb^HHv zPR(Bqk`j`Z`7@-x&bw^(;N+c$J*T%N@D!yoYNka_xmgvny{wOb-(s zZWxG3v34~k9EkAH;ATDKXfVOWO;qu;)T)!7ieX)=@5W}usxv=-zv91)^pCf9cb&1~ z-x=~XIP`ySs7ilGbDiI8Zz-3Zf4*=pbuth95qWpjnWFhI#k=*S%%93Q#jQFNweB!q z-*>Tle+-RsbWCn6wyosb&am^H%-i=9>Vi9;{)n6r$Z+4Q&P*;#Gt(i7VY#qG=nf0h z;*i3czmuBcN~eiQ{`O_)e7!gKme7w+a-Uy0+^>E8bNh#lSMwFT7wmhk*89*wsX{~a zz0cw4(_=&YP5$g(6dM+!a?P;vh}XTS33|V>_dR@jDM0P_C9hrVr{!4#a&F(e{;{_1 zf&~KuPrTzS8Fnt#@=Ibz@7I1!G7m55`u~4lhHujRYg@0a|F~ai_x}C=e}&hi7e(!A zE17)j#=ApY5~)4yl@odOW^R)xyk`G9_wDu@-a+?YyjUEONCYA4Y-ZiW2_cANDzs_8>{E>4~TXAZ3l`a3aO$&~lw8_}) z`Dk;}w4$FAeN?WPy?%Astj3|hp-?&EW5UODzbTWJ7$vVk*mV(aztl$_+18XG)y z0@S{XajLd9d1l%@Pn2Ra=v24WojrB3;?w4R(tZnOo%WrjF2TI6`17nVsniL_{_dEx zYgWXkTBG%oi`HmtYCQNt-tAt+>otbbJC8?~GDe8b;CSqRRjlQtw9b(wwLYwJ4?Xrj zzfeC%A@GL6H=R_qRn}|ftbTnq{=VFjUtcS$qD`7M8_Z~xtPs0j&A=aZn=yQ!5##BD zPs~p{?q``P5OObst$Yga>!fM#*K)rpE8^)|)w?=of0)X?cfVtn*2Pbp5Y&~W&#_+S zjs78y-lK2-Tl1%dx;s5h;A(C%muV1h73*WSWNbYD>|#%)%|iv1hfOSOH-9g7o8)S~ z=+WyQ1#XdDncI)&C0cc#GVSxZt9vCi{Qf0o4FQqiXRg`@>d;$>~_s{=wcRzc7>-ns!`b#=4 zR{N<7r6Dmwf%5)MYu5Hd=YUc4%f@u}w#|BxZz-KYs-3QbrACGJZN*DDv z*y=v(@@%`Dj7`5iE2r<2WO<&zF?}u1ouh^OB$KZTTF%RzeyXXhCM(NnVaa*6(zeDh z`P7T|(_YrAedy(%{(5ShWnuC@lj)`2Syx|+zrK2AeRzbOda z8&5N9keP{(9Al%YPXm+sWvK(#zAmgwO4zxpy8q9-`1&Z0uYb$--uk_>Ht6bg^H={@ z6@Jt@`fJN;F0R7GJMM4%)*q1kR8)oYPQSvo*9?;Wm1!KMD;LOD?n>^om(fcW^zvEo zwBUokf7&nmo_h|Zj28~@q^6xTjTZfMxw>{^?4M`sZyH!1j3wT_f@if2A1-?%Wxeu}XTmyezYMvT58r*cbhMT4RdN@b!SVRe z`wNZs$F1|{tX^XpHcd5dNtVpMzfphVzh-^#+&f#o_2d?l(}!YtyI!8wpZ>j+C-DhG z(6N*LhTA7<+txa~+x~CC?9f;Cz4x0I^4xuu(z@lG1;4(;ajT%uPK%%SSNw4BGI<#n z>i+j@hrqAk1;-=pj@R5M`v0SPahtuX<83{=t2LHt{nvgtSx>0q&0pDEucq}dW%Z}K zY&Yi0?>qZZw74!q|M}Fr%?Ve^;LUo%hOLTFd( z|6^avW!vtQJKnS1%aqc{dA({*L&Mcm2U%VS8(yw`$nZy};_GRz+0Xy4ia)=rE9GRp z{=9!2oTpb5{wgWnwC|jO*|PZi6E+-of1fYjaCc){vi!;$o_mWD_Z^c~yYx8sYyHL2 z-)e?$Q||uj4lU$6(!9Z6JoM!B_l!ou_f`Bpd^ljud-_9=u2N*sb zU|4g)@xX))oc;m3eLJ3Cn3)``^_=T|Yia{yg_+}pFU(>8`Vukt`bu=J;ghd-ydZHdvQDSZ>2H(Sje4%>x`s3S0~e{V zTx|dK+U3puKPPsl{{0*IlGj(i3y9$`KawW6YWal{RDQ`1NCYey;gH&J|qpE^YVz$u~U~ zF|bJxi40%$;bJUvc|bqgzgyY=7OdIv_th%-^!5J_D*Cr&ZE&dma6EKRslmS2>7TC} z|Noa%RCCdLRjgBh?En3375A^{A1;yjWw%nz*!=&Y?|Ydicq~`9nMeF#vQsNrC9DB$niqS^QP&$Cx^PP=HCAl~ho zI^|gN)_U7tj#EPNg8y@KJwC+TB-hLsU$rqcYR&(@vd6C`)kp95z1ySe zF|#mw+3SOf%e~9F&&N9yYiOOmSpBKu6@OEy`d@8-#2vW1 zRiWYK{%Ohwa*~|BKWI3-%z|a9()JS_-wz*PJ-l{0zgB0^TGcS4nX8g|LVL5eZ94bA zQt}{539vaFWwXyCJ zf^mH^482CiA4ECq=3Lf_yHYAX^L|F|lD^L1c~_q;yC0J}`P&^yzlUk0as9iOvY^=Ee4zQ>$K8dIKJmYKb5+n!}1o7-M7Y(3U0P`U8H&ePvs zUHUV*{>Hi253FDMUiRiK*&DHu=O*9weva!;fBencZu`E=}M|=KeB!~jrF0p_J#(A+=XV}Z||+0xwY!^gM@RU_pAFBq-@O0bxO(Yd+Ycv|_SS_Q{u2AMzjQszld?=5rl8G9 z{{6wV+$IaBh=_l1DpitNH0?8ApDWMqqlek-_ozxO|6Y1wPm_kp?3;YZ>s&Xtg6BIFGQM0q^Sb5S!E%-Zh8kr8mu>gBqzgPO_@SMbcj&4_R&|qMrbO->3e3ujc-% zE>l?c*3D?=?w!aInRi1si{bwT*ZD>|?X$dWs&@ole&PLZ)2lswi?9COX=9OD9CqX6 z7X2SNTgtD_nj7X_CR88hu|LT4N5V`yr|=MsTCacesyh@?=N|vgy!qUA8+YMe^~apt z*S2l@-PPT9L~r4-=xeWo-rLrzPLECfvE=T;zsvL3{-kc$Y~%gu*R4Y`J8qukY}Zx_ z`eb=%^@)RM+1ScZJI6x%bc~vYnOy6s9ro5U`K~kyuS;U$?A2g0I@%Je zr1j)ZTEQXFlDhW2@%8ezv^`(9XRrIuqL%UV=% z&ntNUhtLPt!_-s%UjLJ@LVoE}qeCCEW=56<)II$i;(I8@*-s?LTK_C-jrprhhZ~E# z`lmP67>Ou;xW9P9gDVyXuD{vP@G5)G7A_TTnZ@FBuJbUj&=TZb6tM4&)Ti}6|0Z9E z-5Z#-bYkW4Q}O@Thlfv?y))_6>8tVLlP`bp{NpZ>eN$*cWnH7fLB}vjIf2{_l^-@X zWNpidJh_Nn^@9nUvCW~Yb)3@}7l6&<2xGmay<4lUB zZ;I?w&lys?7M%Nbb?@FR77cfGpSep@v!5^X&uo9r9ChH3DDz3?J^5yr8QrfQdh4Bi zK%@036G{~FxZyf5!@nhMC|SM$wwFk8pSx@vr49HE~s7FXSSPR1EYeWj?0Jd z@3hY{%3rzq^GM4U$FL7mTyGyzvb%ZQ`_$j+tvhZ}pKW9uHmTK3emB z+3F9WYoqLRRy|{CXa3o=Yjx(Dh$H38bdz62E&X8hScRdFkNeje_9x5s@P4q4}8aRx{Jvi%u6ALMP1 zIb1CuhihD!N>w z;=%GZBkk28hZz}`<|5lfU2b%zv0T~X>s;>n|6-m0kw5+~;$v5TzqRrH*dvn(;t6kKmDmb|LUDh zzrM`dShu%4ZtuIjaeJp9KX0NgVa3VuFx|EEKJ(rO+fCkn`XnGaD>yUZ3Tu+qN{@e? zYs?Ho`<$b0WmqL7nB;7{>rjySP(u?d}4IZ}q-#vYI z?p&_<%rX8>YRH$_#%dAX!ZN3RmH2M&@Z+kU_2i4-`|I&q--SQ@TK#C+->AP?zLN3J zPCr~I^}lz9fk5ZsuJp?eX6{Sk8oN8>cAx2AuaYTp;_y5DAD1NE6gb5%H!?CFKF?n- z8YVjLSo#0!r}r(KBs6XL`oB*<`%Ti8@KxGmZG7n32DvrY4&8rR{z}d2Ld~>0=Brtr z&Ak(MVXkN0k?+yvN7q(eU+tOo_wJwg?D%=lJu>J26+CzLdGf=`Psi0?+wiUbSzFJ` zku3P^&nlb#|A(D!7<`Lff7zofzUsl2#mx6_MDNXx(Nef`<#d`vo1tpptVu6QJHGwj zX5V*QCE*JHo5&Bh6PkTCZF>4S)aclcjKi1D9kwjHb>yJ#*UV`yrwe}9sRsR#d{gu2 zjpSxCNlot!)eq(`X5A5_d+B!W?LEFL{ygka&W>zwB3W<<+Wn9hF;8-F_z- zc66cptXZi`_gTa#o@H$3<`ZJdxmsfOO?-j0;YP&^(p**?p$-?<+s=DocAWLx$+Rwq z11I0#O_XNb@aB#CIeTlfe;i)E%}U z^TU?y!aRvOr(W#ek}%!sUzU1IAIBfl+k{?$tF_y04tDi>(BnKpc6a^ZM%V)css6|5Ef3FeJ6IYXll#I-c|G{xN5 zq-;L{8VjrPJ?^ge z+h%1^m~k**=8AgNw{piy&A;4@RJeNi{v_+f$n-61mwx-ll{jOAcbg33t!=J#eJp%N z5%q0{wyH~~EM1$O7#DVvdFzqIC(lW|;{9}M`u{EAD|vn{eH6BD+WwezU+NoqnvQ6# zdbNE*L%7|BbWs7|y(|C7G($rt6oNUwz+9?hh{MwV&$3cf`$Z{#RhZ z{fm#GsAq%Z?S94eW&abE^B(P3`%h!l`A`2R&zir!OlP}ZTut2{&)r)$3!e6l7k;|% z^Ofzt^pkdNUElfb`ugeno7XcZ1k9Lj^*!xsJ|n|NmhvMzRL?)Od{bL_?;3 z9M679?_SRtCEe%Ef9~*q)%(A9-+wr1&r97eD(N?O9}#-xBl~sFRQvu*mRdRL)uKG% zVS=mWdvA(OWqJ4DU(Bo3Ngg?uzw7<7oE zOPQoz2k@5f6l~e0Cw4Z-dB2RbL~NwtBNaTolp9$aMX7RXwZ3j~RbV+By!s39OI0I)BmjN0#|IGdFgXL{}a4<+$r%L|EZUv`vNuy zPCgLNI`Q+d6(Vy?&g`iDq4P#ZXc4o!PxP}OqaD+~U*?#6xM=3>eBM)B4^)&Lws!Sj z*>_91%-}%T7L(%<50Y7)ebSlq#&>DHTtHFLKE}3ZAMYRAH2K7fSwael-}0_J4m#6s zoot!!^K&2jtXa2=xGz8aXZld>&hZ`Jm2M=gdi)^7Q*1)ejZew{kKPNA{`0~proKRa z!Hqehfv>LLblY^|Uz~09+A9CJ{VYL`;^Uhnb@la5x!3+WTGuWA;kbm>0^a*=e+@5} zfAG91>9U_y)_iMO_L?Qt|D?q4UbVUX{fo`z^)bo3+Ke*Cw&@*_&|o&RIN@H}wf)`A zpa!;z3+;%&`Wj~ z)E7HG|GE2L-uz_scPEOcX`T4Nn_HEb79H69HQFVa@8R~ymJPy>_B$P1=;|aZ4`1$cShc{C{m~FoK#)M6yG3v4s_oPYN*WF&k%p_8}eKVUmf5KMb z+4Wb_y+ktqw>;Z@Z|D5@-|FQre3smvd{g%SENemWA4>0zuRkA?A;o?1|M~PMU6=Fk z91Y9)AN2gig#5eD&c*BBt*-t*;f24#-mk1jJQ=4=FPhz{P%3}--BVAdW7hpW4xKX! z&dt8<|NPyVgEd!!W9w(mRlLzPNtk8-eG6u_3yh5dmkutp?S4 zu|)+B?2k{hJ@xp*e8!Du_Jr0bvWVAsH?hsIePDlVe)n(J+uylT+kRFB-w~;3DA-;5 zx0UZ;*VXlh3ROa`v)vWrNPhL!Re*^_L15MK8517N6Q9q#Dr%<-_dDwe^SpBQ1v341 zo3+N}XvSO9lTS?V?Fsy}B4^_2oBF%!UH1Lw`tCAk?)4NIzvaJXT;F&<(%A6y*Z&q_ z=PV)$S9*KT59_SI`$$z!@#cKX$Lfz~dr!XYxjBqa$G^O|N6z2T^CQ5zhv}O!$wsV{% z%kITmR3(1*lyUjbr7x3qX~ykMnzs6D_sh7oNzcFkO`iI?wd%^WtzXv1uAKEfc=_w= zO&ls8`PU|tG#0k8sAwPhT4_-tw#rkNuwCYioTTZ(oL38yc~O9hUtC`-^1@6{9>EqaXT5rzQ+e@AlgkF|X{M9VEA})ia%%4-^EMMve z%Q@z3&so>xD_(az%o?44Hu#R#G7G6Miw{ijJndk-aC5*!uX`_9)#eoa&=nBoWI3An z>ukcjIQM10n%8{{t+wwHy3L^KaaW_{>>f2CHW^O)x%q#WwS3ro`psAA3T=L-{i4nG zyJj`+d#AeDj`Q%rPmkF8_c-6Ho|s_z@KsoF&k|>+o$jtHXTAS={q^+X*jSg zI%`KPKmGsI`iH{o3vPY$FBkHA_p@QOX!aj5$3WJk?d98xZtX9Sn4eVm66R^PpQ_u9c%a*o%z_Py}-d>!%hYiv>I zJ?)ySubxcXwfcwiqyC6s{XhH5f7a{}*i?E=L}2w@&a0sTOXBadXZTmIbb27fcE%<^D`BAfYweuYVX^WP`m)H>^As&2d5tCqi* zWmDVLZ}G`0+#ly|;dI-#z;MpfePz3MKi;UnCVuh-w{=gyuH&fQrxtxgx9a*44}bfU zA1qnBlnbo$+5Q@IG`!<#-*~~twmqw-=V1D3y(K@-<(IOUef_<&mL=zJ{O+&sR+gx> z>#hpDvTy6J{jY9K{ZucutNKI$F0rT(LfOZ&Z^)i~1zD?;2%a`en>bYbqB%?0lu>S?jcvU2f@} z?kTA)9VPoe6|4Qfwvs83Ys#dxi=X_8^2peD`cA$778my2XXdUnTjKIFYt|K$8cmy- ziw~4Z|8+C%+7rZd>56H>j~5vt=N~<3h;#a$@cPr4^##)|3tayuc!YK94}(`xtNL$g zX4t9kt+Fjw72101>8+=Ad(D^_C1&iH7pKRb7G$^fQ{#=i4|%ez2kwN1hUYNnFTC!5 z(6CUQM?pa)&Y|~q;Xkbp(+(+leeut4dKa^H%Jvug{uqR8RX_UulDppN-@96RJm$^k z>DjV>dc_UFd(TXbHrJXntT}XX;~BOr-Ua^^H9c1dY+6#U6Q8}}&5i&EE%Bg?THen~ z|Jr_u*yM0GG5`7NJA0R{yChTL@ZEdEPSJm28KYPxlZ?JxH@o#0Jp5nnv zVejt0W=}MzlHfbi@L;iB`lFdY47g;^a7-u==&M%#+F*A3LiOZtr>8{7wXrx}eh{-^ zTE%-g?t^X<{%`JX{1QGCwx)o)tvVK<|`BL z?!UPYgPavqB^&K0XCB#SGu_{R_53HXC&Dk(3kKcunfTG{;sx9HO*1|8j0Gz{D8`EY z=Ct?zqkYF?%gX%1N0;VS{tk)874`f5)Kz*Z1|NTC@Kc zpD{GL^mAM7Pw(?jc0F6@ZTfu$U*p@WRg*4C++$i%p~KOy+$po;g&Ze?jP2n&>Raz| zIo;@dz!%T>YM)(l`2G4t-}7v>Sl3_7{uo)ms$+WEcIVD>t>F6 zO&06*vKE$X`D6M1me;o$1$|DbKYG@M`FShrN6W3%x5747oQSPyI{P?)O_f z4wuZoz0SYr*I8Nf0yEti+xN_$e?djy>=O=^{L}Nzd0IWIS5+O4t@!_X`hy9cZ+@9c zDcsujWIs<-;>o%GS05f)xFB+M)YDT_uKaH@jLUj+`~QTO!PB3MKK=UVX!+gM_O`im zb?U#qKasxwVbBx@Uv}gBA{jFLs}$Q;vPsl$*gm(mQAX~v*Pf)8Yx%?f9iGG@yrbf% zms#iY;NPa^|Fr$$^*5gUTN@CueUtjPU$5@}<4BnPW1;q4W`isKYrk>|DEdr!89l`) zgmazU&sPDjpPkxpOQqlG$0mjLr&S;Kd%v&KQvSB>Zl1`or3TMrSNq#;bh+imW!4=s zy?LLP_sfaK(_|ISHe5exo)CFWyGCVqzu(+v<*IvrbKiB9TRH!~`i@Ca6^A~2nj9y(*gyQpsQJ0gWOLh^67_Njf@ zdi|l7oUyZ-;rAD(EB~)O-=q|Crsyws;zu`^_fqC10&i`Zf|%6R@0g^z?@CJZ-@W#r z|M%86hXoDSPv3v@x$DQH`*p3148O&S*S>a0e=_$h@5bbc`qigj z$E*@;Ir3ZXVdDA5mJ`2KQad(Y-WP8zA)GS9(Yy6nXUMH*|Mnd%`mtd4k24ZV2CuGr zY0lt#Tm2#Y^jj-Cfg_zf!rZ#Ce>lIswmdttX7iJvyZ;{j-uGx?8x@`ZkX?x8@W8>2Od5{0yZ=5)7 z`jwSe+?IW?i?W%&XGWt~a<@q4x$Jo--)k#6ZEJY;Fplxms`N$Ie{5>E(Nfo0JO5UR zbYXXR^WWD=AD@)+cFfu_tJ~#3#;1OnKNH$bMIJ8rJ}py)w_YTE>5rbW!qU*)sW16r z-tuU)^*+omY`hn!!##J0RM+-~+|?Tj@1(Bzx%p-RPt+@C@12v@u8GXlNZXXfee~|n zfL*hN($w;I! zs^Us-^|JQs=G8lzy-nI5=bO5^^2T0`-Kkxn#$Qv$G`~Z>=ymw1wyU|W_g}nxWZyJPzr~Bw zFYyu6BBtZ#J``1O{a?+RYwkmn7`}G@BjbSy=tn^{-}3)@0pGHBz6|upR}@c|FicWbMK1GEI6_u zKkGJEYvS9)nyb!M;=U3Ro{Z0K%RD*$O?O&vd-e~e1*}P@r{*-j{mkjM|Niacg|S`t zcOAXkbvLtYe;SUifldmG=ASYe7Nn=mAU>g zf9*vsvIe};H=Mk!`s20_4L4V&Cue!Q^Vaxau|wK0=d0r5<-M*k-4f|immALWvf42$ zd}DF`$C4}G=U8x7&p7{-^+W52Lg_wJgFvA?d&+pc$|Q)ln` z{H{OlPwzTRay)ak{C`IEAFT~F+L2uwy-O<=Wp_@#w)&c8ZR*{RQ>RCWH90!8L>Vgw z+%!=9d*?$}jKdPOb-yE&Z#JEnX203sf<%F8R`j7N$%g)_eqOVbQ?EQq-_2WCTy$;E z?|rOm1N@hME&cRwd)d}dy?C$Zt9Jf6_G88>rA;;5jofukO$QGaFf5a0^pRs=k+9$v zaQ!LlV$S!2A;U(EpHD2|yJ&vOkr_2-*x!7K?SJq@;p#POr|I)H)~R={ceD)Gxwgi; z_8TYv@!G;}Rz}|)+RPaxLJWM(2S0VBF;+*i&PjMw^dxM`98ukypI=n(mo_-4$a=tH zpXP2cpR6tl~1qGxYAZ z1nBI1F|qXgwC;(9+l87x-nTlvBr1hBT4ip@YmP_p!uLa8#9WV3iC_0$I3#NCx%GF} zGBhv=*z!le7izd*f8d0D1XrR{n&9p`Y!iM6HFSvxty-lA0{Vf4g^CA4-Q2xxe{AB< zn8cv{j8`PjZrve)N44`392ZQRD7Y+n!879xp>tekUs_OlW7*_z{wF)8Tg}Ph;4j|b zdzCpVi^Eg0?s=k7|7`CEKMUXQ6`NQqZ5RLRU|8nVEjO$T7e3hakhiMu&zy^1e>P8y zn#OMWbzfebNNSB$;eR#v_bNSclYbXBhrKY)|KlhVt-5rPSt{4D(=FNVcaL634l31L zG;8MnFI78Vg;sBU_kP;S{pO)7GD6sY*eR;t*qd~Wu|}rfU~#=rfk*LlZQhNC8g;_$ z+YTiv8jEen66S6?xYheW=(;M0EQj_jHk=IX3|+UGWVqIu#Ld5YAVWf6&8wsvT)Mg~ zcXEwhCd)opA{bGn!64Vjv4qjbWMa7erYcQ~LuR!N4T}=H7=O(<+Pd^pb?p;jr(OB# z_urZCaF9snVPg!mc<_3UQlf|Z<;d@Dtlf2hqpIFIG!ml$@1usTztN6=~?|hw%g}y6Zcm5a_g$>sx4Ca z^S=u{n7%Nzzh{Z_8r5L&7aI#}{ImWFtgZaNHulOP=??;m&BCFRKdF?k{oLrJxAXIw z5B5#!h0*Tk9%k6q%-O?sTJl`OIR@q(LQO7`IS=Nw>UQ(Z?f4hJYL7wyb8q&7|91Uf z8Fwv+|6F^zN9VV#w3Ot%q`U1m^P6(6oW6d;V8`)>sa3^?+i&+x`u+Qtses|kdWR;l zkdMcX-gw2n>F8O>M`!O9PT{z0oBvPc_`j9w{N=yYY^@dl;jBN2b?NSRXZuzy7MgPX z)s9(TuOzGtn_$5xZE*YEl7@~F%bxi&I3Dg;A|iOGY0-@a#>f8(80`MMx)yfwl?3b5 zGP!vSC#Rj5z@X(Bv!r#-F57o&Uklm)7AU;dYTfBlWwU*r;?7<5E#|wQ8wNh&HvUnb z>+4W~#0f<~M{=_CIKQm3Di++V>x7x)&onu3UQQ|0;#E>rt@=4_NTyzc*D#~hC6U)EP{hVDCP zKF#i!vGH%$Z#F8G`Y!}36f@4RX)=prc(?z}#`}*x$678vaxl2Yq?288f1+VX`SJ4m zJJc=C*O~CG-+6^I%i{FJSz57TOTXs5zWTazV&?i)+Vj`fMZKO{@TNtA)hqP<=i=1L z<)6MtZJt_pT*hE;+@25?*}qHAZ`SOq|DmVs-P`EDqq^kkjBD}x3nHH!yw99$^|{3F z__>Odg|SNvTKgxw?E0KAiM=9DG1-|#>MS=$+)6{iZ>oQ`%n?4e_B8jhs?A}k=0`8d z?cHs4%`4f5JMI1kEp3;7nZK=H#CGkP7PS2Kqu}>y@b^P~o$K}a(pPUzq|M+dmx&NP!%%tk+;k^r2@Xlmy`hH@2sT8xat=r=Bi+);p zh`83Ax&4f%c>n3IyV_KGo28ev{4Pmmt5Fr%pP&4TmG`_^UvpAzfplH`m2d0TX6?+p}-&D=trWMb;mz(Mgvb z+(VpVQX-L9gBaurLI+p{~{=D=$SrcHicK7>_A0ygrm+bdc z+sO9GxPOOT-5EjCt~0jB%FC^1n+vD!=b!I%Ox80^;XP*u()fYIW_e$pI)kwsBuPxz`4}bN+ zMNP20q})1XlgZWGQkH-9EXQ8Foqjd;kedFLBRV}()E*cv+Pv&>WcWvG?HwoICUCKS z&lJhHf9*-w-$~#9xIa8LW$%@LR(^%=q&9WloRrD8&3zUppJmPF@_qhySTeLfJ`p~g z@SE@1qEEpO6dE1QskfbSJ3FIXNAixy-((I=RxY2I*g|!d4~`4da#A9arq!H$wNxds zy=QXH^V7#)UM-Yj6}8rKoz#A)Us1|_$E(?MpM6%pGcoM(&$FzR>xCXJ{4`-!2lJxTGC?@j(J z7+SBW_-6Z_P2Zod&%dKzos%biqkH9=3IF1Y?j8!ivn1+e?w|Qa;!l?uzdQKOW?K3= zf6>pLqJOIA@AxsD#a=Z`@xIn&^L6%;dg9)aQ!?*7XR+A%$4u_kr+?}*#G9B|60|Q= znbxo?*j;tmWF~fV#tx>M4X$;2`7^WLiY)kBw0i!Rai@h#? z-d}w)f8IZTra#`CJa&4QUwNyabGB37J?YZDcyUgC8y(JL2fj^7W~}50QTUTmH#{rZ3V=YS>mHV1yH(_ihcd@6hOwq_BV-R_=& z;@cg6i{JdgKkHO$(JQ$r94xnNThe=$)y({8=BZe}=t-U9{)@98KH0M7w075u`}dMu z!VelVWFP-BXU*5g7Ob0pdfnb|WViL__1izaf70n9lc?|9Z?r?f+B)SOBDum3?5aVoZYfM-XQ6MJ5I;k=0XMc$e@*H(Cj z2X)^$w$|s}=4NyY^u>LoA#aj8hk3tt7%Qf zit_jSy}y>&O5{!3eqH0F2Iqro>FU;jD`xVjel=Nh>e`o@wKi|3{ha4AK}zYT*v;R3 zt38*!_fBa$zND4cC9+7iBBT5L#ha$TXVrS_;b)Y4!+iVw*UCR%IrUO`z6zJ^^m?YS z`~J2c*~%st7f$rzoEG_HR{TS!1bh1_2M;ic%=~AO%yxRR?xUyXrWYPRaj87qc1Lcd zh0~{ithmgD!kvCA)%R`sn6sqXY5GLD{raNav39G2)!v=6Gl^?Gv-`@9Hfvf;{8KBAD-xcGR(uUv*}F&^>UJxgX;zuc;- zdS=N3fk*O_uANtkyRCZa`lNRy4-{3V>gLb+v3}O_OG@&g!3Vm16-kkL`qy^S@)h18p$TWFKJ`{MJ$tg$G$Qtpyzs;`_D7Fgp7?l954(*2lOu22FR!Zq zbFiK90Hegrf1(mw5*7wMo3?7@bEh(=ms}AxE_YQL85%8xG8fwPrkVNdDlWJZx}=)j zC@iSV?DrN`QxPv`zc+tQvRs<{K5k)^?K{z9ZvVbosGH6RO$=)ByjAyYdFz&`o^#Zy zRHJ9@H!u+8J*YV8(UX18A2v^AHYz@J$NNeW0=>b=RW7q583#j*EB9L zC`r7^^_g@avq#HJuY$nV4ZaI!KiK&F2)B*}r<&X2shLlgto7zJ(Oi7(m@b3FGOiNt z2}eZ^E#fK*eD3%0$D2Ql3~TJCeR>_1CEi=bobX;wBgSG?1$V&}g`MnoJzb*#F)LKhUD{CXki_uh2~SK@VZxCY;xpJ@IUZnCx-vmcjEmK&G5J8m z2?=f`rb7i2LX?y^I%1wztXMT!-!xEbz0qskqgPg6xE{asPEFcM6I^+r7VbuG631Zm?if3Ym~2%*ERB zFwr4mf|v+vFJrQW$0A|waCx~}=f&P4H0<<{WDXjL<}g>%<_PIOJb`cC6Rufhq}oSDq#D&eKP%ew03YWZ9j zIwclzvf`Y?d!Z#U2VA17oG(nCBJ^RuoSNVR9v7+NzQ{SEJiqParFU(zimKuaO$=?F zCvF-M-Ec|U@rXK8M+lprNX&(gN*gWzdzbs~I~uw_wX;xO%U-S5T6=+GuJRm-h@4K@ za`PpB{GDI?j6MFk{{8>Hwz#XAU)SD!b6PhqyIVn*)hp}y&)2OI3D+)dvv?R`ylsu% zWg`zxW?_Yb4M`vPr!QjYW|v}aU|`_abDSl|-o;u}C3L(n{9^t0Uq{tfUtL?X|NE7l zMXSq}`2T^0La#6>DbMouc+(z1mGPtJ1UH`9;5u4_MkX&t70UH|G0&-0hg+-hcIf)Ze1@ajzymC=rrvJtjYU z+k5k%4>NLDT7NA4=z3=HA&K>I`@^T-iha0Mcnx1`$dMoB&5T_$m(ON!^q23Ob7O5- zXvK_p_ijfW{>MM;zW%gZ+hGoj$hprwASTr(R+@Env~ETV62n0fF`^ZVoV zKlZUQRj=OfDE>3++NybftKO~JAG7M&t^=N|O^q7*HtD=}vJRnnvqKFulPt1++*!kX zvQ^`nvWDG|#h%eEN}O&OG;$jQNGlEKcnLA2zc)iJokjZh2(u=dfjBrh$r4ygT-`3v9lX z>u4YA8~@}JYwBSQTb4uZO-Wu1ZYuZe)z>N+?GJyU^jz!Lu81Yete->w zOXuv}sjo9{U8mr?V`;Bet8g{QZaZ|(Wl7SuC;zxQ?;dJ;)p_TLa=p|VhcD7^*Qfk4 zneMcb>w}CX?@cE4eGjg$`1kR?W=r+k&9AmUKfH)9{#E?9zOERn=qUab0k>9I#?9z; zj(DdaWw=(cV#=oU&a)TxY!M2XwDbH5i<2B4C-=WqVbtnr-~aPcY}$j|_t9V9Z>ih4 z`bg-;yH{UdeRVKU+g`u4_WMV>S6}{q*GgF=i+}%{`Tz%yz{upM zW^Q*aR+WBf{^#`MU%GvO0E5nm1AINRUf<} zWO4Xui}FKZb~DT-2b>qbGUa`Yma_Uy4-dunk*)y`m;af+>ruqn7HdxP)AG$fUszK5GT@@YTb_A!6S8UyD_I{d=3A4+z3#H2 z;zRQXX#x8VE=X1up6?d5H0rW%#^VbT-?)C9KNS$Gk#frHcExGe(UMO5>KJkND%8F8OADT*d!1+WeLA&XVJ- z!RL;aw%7PX?AM%npmk-AfJ5#p2JQM)m1mpZwd`a#%v~-1bQ;=l!ty`1Iex-}eF^i+dP$ zx;&`PvWZ{scIWmC_XX>J1gsIT)BBY$@u%#kJ-U()cU#}t{_vOKZX2^bi}tSGx~JvB zzKVqS-@jR&ypaswcziPJ&XoNZqoTv>Htp>^{kXiZzUd+Vt}dxg<`cJ`Fk8e#vNSgq z#@CsspJtv>8$~ zjMbJ$6m%>{fBE$IqSTde?Ld4CT_l1SJ%7$LCcSNW(UQqK2SU?$5s&fK;4l+Rq=!O z%y)cy)@CzF2s$N9?RvGFHI?PZL=~|UAq+G9MVQP3BvpA@tG5XK{{ClvXM<2}_`A97%7uEWkJW7JxH5$j_e~G2etyPeh4)u>o{0rc$0i6q zPOJYA$=ks0m>{vCo9TdYp_Sc~yXY)j92LE9<}J$!X^QyIb=0 zR)3$Kw!19$pk?mv%B@?LtrK8bV$jg8zIMv=$h9)R*dz5W2k}okmt|A&aq-6_BK=I;OR-{Knkf%jTY+UoSr z!M)Oxzw0lUFy-{TvroRPKfH~t;=F_J&)MG&fBsurldwIbNc?5o^gqfMZU1dZ{r_us z)cTdLpR6lcbus=^`dRyp&(1SF72M3eCcgH`xBn(TH2!b8$n{@z>g>`r`{VywC`mP6 zj{Ki6YxR>mHT!L^2{o$MM^@jGT+6s{-|XM4Y_lSJ?yAWhJ3ZyXK}qE=ENoh( zJu?4)+7)lobUXRnxYmm)K|+Sk8h5?fI|kf4&kw_y3>PxfgOLc7E9M|Hp$f`@Tp2-&iH( zwd;>y#D#f*bw`%{{pGOZ+Jow~ajES|7k#3xItH@)tSb9FmE)?XqVd@qKiMRlPV;Y% zi&&bzc$?D^ZA0%RwMjer zIQuh;i)ISfE|P9f{Mhqnew`whfKKf5J2#@jzh~W8{?4;A>ifwr|2S%%3F?*H z%c(g2WA>g$%zP{|YwKB5(`w}BUwQnX@!$FnC*5vJSeR+O{VDubQ{eIqC4(2Qf6J%5 zvT>RGX8q|H^X+MB%5A-OnI`5=i`{#7{V7|65Vt;qq8ZyS&S=`udi9}jhzWo0qZf@m z*Y-}zeAPH@PyAG^h!;n{T(MQf zE4K%~FXySaet7+5gY3T-iIX$O%yZYj7qx-MNPLF=S zS@7nk|FKOGm#~UJ`k9?y({^l`ekE@2 z7rV=k+!%ic-2Z+v<=n)DJn6Hg^rob5OS$t;S}ae~b~EP!?Iwd|T5l$cPLg&gGxeNN z@nG?r)1P)LET}S>@#KBtHwih%6}PU2$ZmMvl_45s_}8FTKm3=}_1Dv1?5ta}`u&Ob z2L-!a?~B(w{w{lRNB-{pYrZaf{qJDwm*sc+cYK^?yGY&S)4x+f~5l?_+<8!j>G zaVC_pCi=ZNy++WO{jkT;imy`)-ml3`-@w4YRqi-Tu6-42{X*TiHRa-^r8V)d#s7X= z`)hm8>he|duhu;OcD3#Q2mk-mzrMe|Pc6~wcWnRCmp1npU00rY@I%kUZS|^ln+EyG zA{8@zW4|Y;2YOHFY509KyLDOnWd`4Nb}NUtS+NhNRO(tWI2uSRw3w^D;BL@KVd&sJ zrLKJG!Io8GwkdzVt8)J6o3rED93d8y2OR+kWs-8MGp|dT*cV=QHrspp;F}JE{JhTR zzyGyh$BigDUTZ_Ru4X8*@uJ3p_BUtP2M_N_zDg5Gz2e17zJ z|BAOek|dn{m$jw^aSQeF&#=9%yYHX=Cik)h8A5Cg*ZjhS0yzyOxQe$}hJwE^^%P#nR^wGu zT5iN1Ii6KndxEYnv<(*8Yp%-^C+_-h-;e&StH0VtTwJ&?C{E2umsu;))BEGk>+4Iag+JcCxFjX;@a?{aN!|Q;$v#() zpW>hCqNyczjIHgIN87c9J0G4raJt{oc2!6D9tR&oE+NS^ehlAYGt!+L#rJRfd;ehc z%1_~Hs?zU&>&<-dGwZ*j)w}Fz*C$GKPA+|ETU}wSIYC3TRhDU061T4LjE!HS19T3C z8buT)ZM6S+SD(w#(XEN+P{Hh%oQ1`$+gp7?ERv>WSESH3)Oar z178Vw$t^R^Nv|sv=yK^ldMuxN)rvJIw#@wZ`ahRI!KJB>%`Xbo&D#G$rPKZ2Bh7+I zg5Un#uD4#HV0{1h|2eCF`nY@d?2F8b&@q{sdis2FZiEqQNXgUf#amgU!mW7|lRBDq zcZc#;X)~mhn*6)}AeQ~kVRnsG``t^g>^-afU14caaoL74PU~qm{tEWYub5n+(64cw zBj@lL<;l}p_-1WCdp1=dS3qW})+?8egE|wmB45d?{XO|@2jUIY^`2vuTyE(v$93(Rxf{UQ0da|EZ1<cLjO-G0&ZS*t%E zz2wR4aA`{4f2TWLqW?b!eF)LbKG47sICGuw!q~7aJGeS`?`J-K@cZXNJ~qdfb+3>8 zeBQ^_cxhd4vVnpLZ{5vw{%7rilHt~~-nKqdOIiJ`a^|wX6NCOq_U+d^;@I`)w$A#R zST@bMP0r4o3e|tMA6TvS`MK6@`O*XXAEgIvI{5XkaOkH%pbia*SH@~Oqz$_J(j0lHDJ-c7m*eA>oesT9l$wLWG zEkRx;>Fe(0Ee#V}m?Lj*a%W&r88$LVvA6i!5 zHhfSzkzRd|sr0%#%M-kmGdp?oGpwn$5Jm-~k>Cff$X9}lvRDAgHMwrX$D%%|DutTD#e+FfbJz6q>{E|HOuHF@;khCPyUgSy~RdE)kl=vZB1J zZR5A2@%oGQs+jp2uRrx@GiOgi!_4Ds$umt3_N2@Tyt!>s;PV^H)_>OTHF)f1o}utF z_WpgtJ5#5q%>QXyl+rE#ZHMVC#VIpa#vc>sn6*4FCSS5z^!>@Ohy0T#%<3rH-hcG; zu`VyIoQ0oO$%~!R*X8ck%U>P$H-2BR(EHnOx2iw?yZdM$U+c8mMNAJvAAOl|+f<_d zb^KnpnMJ?;%%2q*d4KilbE>sb%RjheoM^53^i{EWpNZ%6+_&E>CUbbs7N0l8>T|Z& ztIMqg%uK(XI=9bF=H7XKZoK+_s}%~(`)v>Ou*rOW{P4)ULazJv;(Al%uNTY+_FS~_ zzFe5>hr8Ie)=xq(or$p!-tT@B`EKg{=S8)_buCwX@3(UXU5?jiWmy(n9~S>U>g)QmXL3#~ z{ae=C%DVeIN5|pF>#`C1eArH3-7u+R(+z`tqUEAu{|kj~9=LR|NcVkMOOO`p|Jm`a zQLA-7c()6A%Ln~5eBH0m*0eon&5cW*r9a%>FF98&>V8UCf5*v>-@OZ?SG>_<6%eN*?!O+E*zlb*I! zuknj_&;0wDgL_Ho+B5b1^&F&S zmFfTXt&@FO`Sl_1dYzW)s&LMr8WSU#>!%+t>+I~CCOhTl9ltpp{(btouXR2hteW;W zkK;(MlFRLb#wUGGJBZ(X$x-p)qo{rN?Z0=Xm$g4tUteCgA@uau+VA(@TdMCma=bff zk>%;ftN0E*h?e8(h>Q3Wnk19I=^qCx^Mno!<{1GqGbZp{cNh7wV>!oq*$EpM7>J`(yy@D_1$9-SJQ((j9%yvYlz|}OH^{?@qzx(&_H%{Wde|+|Ic}9)$ zreuko^-Ye9O|c43E{jgr{UMmv6jbTc#cw##Z;vF0SQ_v1cKKyO$w5pX6f3l*@3G+D zFVKCzD)X1%yR|Vd<^+9S{cop!QGCXv7c+CS+V%9WZTk^(?3hDo$i_=4+xYVHe;V!C zusrJV?&rUbT1-C86L6YY#2~VVwQ!jIzvc-GKFM5v#FYP z;!8H_KW#d}a<*)C>>tmAHabR!g$ktBi%BN-YIv@+_heag;D!N5l2hB!;1B(m)aB*W ztS*{9j$+vKLoBUHBDD92g_i%L^sCDP)toyvY_zGpwUlv7&Q00a`j1QdboS3>T;jo! zWgaNSI#X|F>DsRsc81@%-NGPv?LY&gr0;Ukn?cvovsYWzKhIT8-}>XVJFoMVZ0V4F zJ$EKGUe$6}O=HSF_`uPHQ;Fl%M7i(T+Fw^Ksxyr%h_KLm!?mdT3?w{A}YJ2{Uinu~R#e)qQrw;6Yu)-kmf=vA6Uk8mpABbRi zWc?`iTkF+8X}fo8>q76Y_FVNazOuab%GXN!AdVx8=2W~;O`5l4y5GtNQcO>K5|=$- zIMq{ra;u1Y`JUtxzizSo+4V7{zP)zB{lANXwM_K_PhPybJd?*X*Bs`N*9;`pi>seSqqJ2S+C%rBk~{nviu>-4Mr*VaG#`m^5Z z;F=9PEmW!*u6WI5W-#3FVpqTpP7hViZ&`;G1pnObw1{m_&Nh##NSEcwaEbQd(Vnz`@hUBXd-Kv1R<4CHbdB zM%VV9|2qG?X!li5reE71w*GIe>;FzBkNKQe&!+BBQ0tK0xTqueO3G&$LAy;$H}@^A zFL+*Nvi`Y*+KMY*o#*V|6(YIn`m24sm;Z8ZF=Kn?ES=%}W&wi$+vdxkyfS3W4_fsY zWC{LIEr0lIefRc)IVHSWE4XW>@!h+fHG%)3{xx33+VAx{w2uEdxasw_L-KQ$r|y%Q zGvVzM>61GGe(~JeWv!?tAI~HC`cqw&n#aj3mEQ0ab6MHfpCUC|US0dK`d6=Z@H?*G z+q^c(z5EdM+_x>#NqlwF$%37%=h9V86jlo)Ny|ShnfYAEW#QF|z|j07IU9eMemL!1 z^!nuA-`5wF{@Pl8Kl|>r`i4C$szO`68y4^^l>M-N-Hu0{V(Ou>Pe0t@Hn44DaBf5hZ#1!{~*2L=D9oj?#oYp{hdX^{(#cE$Un7=Z4uMWlL{9$9)7Xa zdxL~Yq2hn;2^JUhqi^Jvye#Z(sZUTl$=g&sEVYGqR8Pbg%rO z^Xuo=^h_VA_x~89#SYr7Yc%|>_tb2<<*n-pi^IN#%=yZivRR|yrf|}J+lnpc+z(fs z%nlbmu9_KGVE=2n+Oc3!y>-{KoE9Xld2})9$@j27j)JM*_p@2638XO@YSuqF5;;Z2 zLtbKbk;&h?*QEH@S8xcbz196~rzl~<84^B}N+RMG_gYd5Dw+>y?Q@!xVH?wC!*)`5a?&KtfDhGzF?(aurq+ZXR?|x$W z$3-E`D`y&ca8Cd9@2|)+4mM-$S4tZaUjEYg#dIrOt^WB><*KGf()KozYk&CMT+eKD z;{T=@H(rE@{%aF%j_RJBwes}U(7e2eUrm!5uYEf_#jZW-Z|GM3V52QfmoM!xa(?^z zt$20JhIPV8EeBU+e~9dC{V@H_{yQ$xUu1q}g}(UesCM9O<>ux3YnO${U6$0V=jHj* zI^|{M(QgND=KIOMnEXPv)8TEP{M{?NL;`O857O42<)aw!fAzP$Azw@4d;T9jEI9j! ztng<3zdu4eKC3+qd%r$=uI~RH=cZFf*DqXj+<%gpb6%)ekpYWn^S!2Gxm7~1)|{Wb zxsq*(fCQIG;W4(=kw5B;a=-fj?6(aKOI;Lf*tOv6zZW}N);gzNy6uu!VS8AamHAjq zZxhr17=;^)mvGjeci~yGzB~SJ?TqbVf17SzIu2PT%*v&;R7T(Oxh9^?#nu6UX$| z#m3njUK=~xRZ`6_P2tV4)7q@;%9WBJuFn$v5ja{qu`|^UCLas9n6D(1%-d zrq^}9+82L){hgoxw%4t7{q|~2+Y!r~dwxu>@E5-%-siJgq;j!---3JR3x9sx{qvqh zW9!nh3HvP4zdHo=>Lxe2zyHW0`%zml{QKP}pI>b(N;tk-VA&$e*d3R|Ed)18)u`WD z#JXsz$Mwi5=PycVlx6-rSRwGE`RT@;_SY2DX50>V|6I#>Gn;{;c26|KU}dQK4DF@n4O5)lb;$i}jvnwUfzw6) zziZk0>w4)UO+8!f|1R&Z>7Trn`AvAkTY;wcZ&cf_ta&ldzx>}G;i-&MjueJJo-)D5 zZh6+%x8>i?)zHC%?Yd2Zn*5nz zPG{VNW$v(NPQSOVMyi7I5aTk(P$|*J`ImXhegD1PzxDr{W{x}FHFeq#Zme3Xn~-F) z_`Z|Vm;F)+_cadP)_wV~@Nm%)ZprE5`*f#V*>}9>tlCk|HGvcB3-9!&^DjOsv3UB0 zS&!IV5B#iH)6tq#+2~qrJ0rwKylFGseDYBl84V_ByE!#X|L)?%#?X`NBCKy=C5)v@t$TP(#61<6aUwcl*Zs zLX!eN8t_GCZmG}Nefno18*gvp{V12U@iOPazRoW?xRcX7|9@mkzjk2G)>$8{Pw_{D zXrx%BhW_P#^-lWphSattF)Kt$dX6!CwOo77fM2He)4%m`-S^kJ{8Fu3WBzMzh00ZC z*U*G+*_proC_fH7`7)sQV8r_mE}046?|6inEAk}m_;07>a{pzFJM-38;%xsN;#npx zXL?yTC;wEl*sSc@$xpBRpZs&x{rzXpx;*`RQ$^oaYWj;MwnxG?atrR9U!%cx;l??= z+}_U*jSB16<|qZO`?bt?hv}88HO?DXzB8_@wN;W4>)@`Gwf$InJtHT$>igBIuTgue zzVF_dIsM|?pZpe%d-hL%b^XQGXV*LS-kbdUpMd0nce~4rZ-4rkr{uA7h0)%P7d?MX zytt%iTh7nNXFoe{DtbOsU{ZV>r%T7&L+3?!WVhY=V!I~n_4Oq?75cn;@7Fg8x*tAn zm3>gdy+YvlME1_fL49d&8ro+#lrT0JvK`QW(RKgy_67z9t~-vitW43h6Q4*BUoOYAPU)`do1;^u9iF_XP_rud1m2|64m%VfC6@hs7?>yb~f58XvVUz;MQu{{l@jwokQwlcB#SwCCMU-5Rgn zzit^TwlX%nV`q$E>wj=>UyMVs*Xpj!yqm2eu_r@rupW@v_f+jl)2^zOHk+;FigP7> z=RH3A`A)Q#?vKykR%k7G!Q>+}k2%WordFAIcEkU_t-HP`$bQPPDEASw&o;fjJ zWP4C!+Ke|3r`^c$@sQAJuXtF(Je!Fjp+bx2#v?JtfJ=3ia?+Ybd;eV*W3&oC&E$3U zx^Uln=cVQQWA=wnzq+z?_1!;r*LrN*BylU|k3*R8lzkryeLb641se1k7aU9NdXSNG zVACFj7UP|cBIgbj&J&0fzkSL3Q5*Xzm5=9jKD=Oh+~UX8)Mci`DX`+2bwz_m3!@hE z+0*VTD@-mO-Ru3DCAYQtY3MU=vCT5F8QOxJDoWR*i}HU?O?qp(j*DB*+%(F_w$R>k zM`Uc#cBh#gd>+!~Z5lrgy4;gG-(C`?&E-(oc!zrl%kpNHFvYFmSX^6svx&x|HE9)3TL(6|YwGyEYw)-gf%mlzCh4+*`8o zZl`{Xi&kRa;@5U@%m*9}rh z>;-cPgZr~@E7MK6JQZvkI!!M<*``?&`Z?Kko0-B&GqITFO20}5{k@a6uQxfilkt<% zeOU&vLo<)x$=#q`U67^cbNIKI@(V2B95<*-lU1@4mF&^}EX6Ti>%qGu&NDT(-xpywJL?NdL*EC;P+&q-7+8M5N3&_Px~c z{k~`F?amM}u3QdDcDV~}d>@`&Wn_E0Wuu!=g`%nRy#*_bR_g6Ls{i^yyH90RMf>q9 zvKj4+8lN6r)8G2+{U3vvB?1qOZk}WOw=(?6Bdyf0!ix_!E;RqL*5Pw1!*mtWsXY_lf8$)gI6*IFzIe*d z_nV?pcHLg^^{XH26-(XB19LVvDIE$n>Jkb*{8e)!gXYBpj&h6t-M#;(-9o`CKFKg@ z-JLp{D4EZO-eH&BHM?S;JpSu1v3*LGjX|we5{^IP9E!G$}MXAidzppe$KBF^66%L9H=(;yfrM4MMf#Glzd1KmeZMf5dGCd@ z`_7eq6G;%T{Gq!$B<_8p*Q31us(%z*rYASQfBLmDC4ITug2ha8(%c>YYUPwI-Tg-3 z^6iJWzU$u7k2|;XwczTgJ?balACy&i_eH@ma@y`gY>V^WAJqE&N3-kXxyl1J8yHd! ztaxD~a?RVTaps=g)#lf|0#C#(j5pO2oyNeDa7$LMah>5$b%Xnl*KIvMYlo|#EHBe~ zv1#YSILd;TOHRLgQT^hDhrh*K*phuiPs%Ua{Kiu|x@hyhvomM^F$!$?Ew8`gqWb!m zzdKZfceYm@{GRZ8`rE^L`l%b{lx%MJ7c9QqWE8H-7(^q|HEI%o~zK3&yY@I>r4_ir|G9ERV`;si|flW34VE_fJaNE;|48_tGQRj-J*n4w3!7-N{Bt zK`3nB)fY3%Z|T;4zp3{7-5$Z|X>0yC3aAxDKX6z5Fz@}T6WiS~64t$4{bKXJbG4dl zCp1|9udmv_dsqFBVBJOgj~A)Oiy!@bze(p=`lT=R>yw7;a_qy?)Rd~ z|M%N=?~YdLuHW~yLe*r0UiJ2A_YS65SXr?Z9o(LObGqV=tP7=&GiQC`mvV7=s1{-V z|G?yL?-mCBWA%{JJvC|hzQ}E*HWsUnrSTP*Ru%wK1R4nzrzZh-y^x9lj6Gfh@EGxzf=9P18Yty9#a-AIG)2C-unN{qxboj-_8BEMX#-5X^(Tm zF)3AX)f+lj&lhTihPK;F72SPx!C#{M;fHN^u1)hYiq;BW_|tu@uSaNekgECVF6S1e zDGjoJF6e!JQ>}F~bz{Pp!>z4qCI_oxQ-6AR9~X3t>g?5$+#Ay9W0#lmDtqt4tL~qE zB>GjO*%nr`ubc)ci$D7dYNAyX?(%g@3Vp{HZAwpZy|SZdm`> zu4ymWyFY&3X}gdo3hypXVwOB;x_U8-`;WF+T>=waj~+90pDwuK$I8V|oK)Qo=Efuk zwz4gJFW=r5vSz`QPL=AG#jmEFo><-UQssR>`JwPT4;P+`+Uq7DAQzEv{zbP4kNj1O z4BMx_(*NsU7fNt4nzVMIsP%~>$A3(IB%@c-Cc5I&cfCaSizN-m)fUL~_x0b4Y-P2y zGVl6(Z^5NA&!c~`3VuK25O%I7o3rTC>GDV3d3WFb*>dMhz1eq{-)j3m_^vv+CGJhz z^y*W$Rc^l44wOCX7qesY91(vxv%evy|9*RTbLEkPA9LgHT$_Ki`@{5yb4++TekQGq zpKf|Y%+7E1=V{-Xeyut8ul?D-G7GDnwp!1Ri5G5FWm~;J#x%^w*(u=3n{^)|a)11u z+qL^>3g@fVJ@fX4?X2t8n(wN-_x{(UIqIvKs!WnMh3%iy{qV@;-*ti>exj@P&sy(Q z;~4j4?n{*iE#<2Jw+L{kh`Imy@gwk^`JDOvPg)Jk?H&gAmb}@1{>S%KwLd=hJzZeZ zzU;=?4o?hK{ zPEhr(^{uj76^t5JCa8&Uvoa+*8YSqs2(unK*687*qR7eObT;Hv_O%|B``eQBK5M#r zul7$|y+8WnJl}uu5C0YaP<`{h_8`t`v2?U(zl zC7iOvk9dXlTusly<-7;%dV@`EZ`R9;GnUu?V_?11yCVHz5_{dVtP=OUx1l%PPT749 zHfvOJh>I6c1^fv$+3dj&8os)x*xx$KI#;eBS>f{^Y%-y2>KX z^M%$fm24^5s>dU{b-fFp!efWCQOymEN+CTOqC%`qiH-&r48&wudqDSBX-RN%F&%Q4 z;L_7ucKVssyUtn9SFJ3aa3||hxbu%$ogZs{C_nyX-|=5_bzfL{^nT8z|BfHO^jv81 z>zBLZ_gK9Ce%{EcWg>4z#qZiX#&2o^7GA2I`CLck=;}?&cijrfTpoLm)n~UZ=c*_B z{}|q4Xc2k;+>%-Tg)5_5;j*tM3J$EB!jOM~x4ZP`>-(SI9Nb^-Q20hZf`P@_X|-Re zZp*>H#~2=2eyy9DcPO6Y{v(c<{j00qbOn9*w%X&~+r3uLY(J$UaC>*k z-%B0L4I5-~xYs7~s5s~+_e8CkoBhl<$o6sC%3Wt}h5ycEZeSFf`A=4@AZDS_d!c73 zpBGw`1bR8ysZQxzp-uO1nW%QRfT~*K(;P6@E zd~oB=l~r>X6|PKh6VVoAW=v6t&=BM5VtN>`VUCv|CyPsB?(we2dryR>t*<(HO!cX| zVNKeq63h8_{=1&7tN+x0vHtty<4?k3?>E2t7yax0tJNRxXU~_rRde^(r<>eQ7yXkH z=DSx@zuzp*UG2P9#+_}?JMX6cy?LxAJFZgt&eAohGopXJuw_^mYU||ca^(wKUvTfy zFaJsv&rg#2l`gaOo#K*{Q4hpcHnwOLef{FKzVz7DYbQ?3v-vlV!{>diy;k~$dmE0f z(=pX=3A&xH!n4T7?y>gY-TA)v3N`rHVy83D4sTw1?9ZCqoE@68uep{iNn3R1!>e1> zyJ8M#_?_-pdr?-xr$-*2tnkPxLgIYySvgHiaqvE=HaMU#2fajo#^T^t^-ul79d z?YC!v-~Z1(x_#x_vQ_mz;5|0gjqPgbkeWXLl-PR zY56ldtLvA#t!$Eiez~6Q!=t_bK6CC>e;FQtuCUV*QEcsXOjd~S64PpJWlY%N;vy(jyfXUKN^L4vgWnS@`lot7ye4B z{7N!#i_$oL$^ZVh1`9@okSQ8rLagmf2?rt!#H?DG79}kRQ4^AEjSLTe^-*MXaH#0@ zf?Jt65LzasI=`@1cdawZ!JvNf~Lo2vIm|7hfu51E`>*tnFh zPTm#G{_T&asod@jD_-8ZB>H2s>MnsE<3GtcpY}IQo)ICva>w&KujQ{7#e8sm-dobJ zOZxGWBz^OjY_;>I_g1`*ooljdqNRey&w~oOZ+pJZoO^R!2J0K2wlfP&78b1gbbu$= zvMT%i&#J(j6*s0h{Fz*-^J(Ylo3G-TTHc9Hxe>#eC~jFGlQ!j6|H*e(w){z1$j#%* z*x{*sRAN!VcFDyL%NY+aFmQ1>&QjpuVy$1O6MJ9$|H=Qg|EK@||NH&dq$u^R-~Y$| z|1Y%u`>);m_g@dnxqGd%M)AH&{QA1?J4{EE55#hOU@;N-p(m2h#QZ#KgD!{WVW#dw z3x0Plkvd>i#_CYJ^?||h09)D37nbZdTdcMJgC-YC>HA%i`|JMldL`}D|KhVhY~$&x zL7I{c^S0Kl?fW0Ux^C~ey)iRwGqm*G?z!EFago2HF!f#A&wvG`m-N0VzM8typ2Lwv z!$r&Ce_i}z_ZgG>>~-cp`WS3e^H}Wu0~QUwrrF+0ObqXMX?9gR%~|5pSz`MnPrZKf|CO;KQBjeW31a5?|VO+q=FhM7#838vwHQeSee4t0yIh@aMZLjR$Gvl@KeWPrC&f-z@mL_i*=?>nzLJtfwx^nCnocnZ3 zc-7auzHs~a@>^T(t-o8f{p@}F6KkT&zZ}e*@^9CgVh&~gt;M%r{$??a(@j1ip(Cet zHqAliZCA;6arsk=R27AtxT2h-v`Z1vkQWNWW9yK=h zNAKae%KUDQc&uM~Ox5a+Yr7vRYrpTUDt;#Z(O4nyGfTHk(hX(PH=GNq&I>;C-QdV@ zvoOMUMajA=@3~~P=FGUKkvT21X~Fe+_uF^&Xn&WUeyaJN)yXHV>Hl8tzQlVe_``}5 zQMx|`Ppo?#;;~-sMNfHY)awg7D{ij*=zadf(q|g#BF{cVcrX8VHnC65UoI|QJhCp+ z+Uf5B^Jez$rZ&cZYv(_>KK)DMOr7LA4C)SFLOD$S$>v4<=>PTb?bWs6s!(PIZQ$BUWW z_u1OIHkO^-_gF?N%F4t@&T2x>DTY(bR+(*HAO zw#!OIwvWH(m-k;WN$`?-ovU)laQS4mPW$_ZCJ96(3W#q$Iwg>na`@@Ew@o=FJWmqm}rq!?e=QOI+uxj~J+ZbFDNr?{l~+ao5<%K~0=wEpwYx-PG0 z(A`ldGf|OEUj5v3=e_KQV`5)Mx9%-A`)kLg_xJt!(y3Kq$7}z@%3Vu9<%zfP?sw@6@PI+YHO9v;~B4>s_#otwWw%!DEx3|UZ{y_ow@bml}%kC9!jVFM?b14 zv}+eKm-+m^RR+p;icU)0_-N~Tf!{}Y z+rB>tl;ieU?e+aVQ|FP;(o1^`8k&p`b~a7qw_tpJ_~4-f3_Sr1$~NrGJFXV?%y+BG z^=P>?x9Ldwhcy8V>Rf!x!G$I7o;F->sn4=wY!F^@Jox{gT)bl}3VBzv+iH z$8mbfn$-AqJe)1xB>DXK&0i0H+LY|5S`bqX3-yGd8 z*M?7AQ+&Xalf|X@xyB=Wm}uMk@r%e=ll7A?eETaM^+)vCnG2qC`MznN z49rXaTN<~a>R-|N53ia38C>6e_wKTF*FT(nAFpt*r}_EUmwHi3PuXm+F3noUCoof@88DGYG?a$@SE0# zCKjfs%ZK|z?!W%j?c^xbRLP}n>(USNd1F27euX5c*f05Omp?a3;l}7%Q%Er64>P#4L|X0i|EQezI?wN4yJuWoaJliljcosc_D9q2o>M#(A}7SH z)%jZCwnX{ur>oR|r=_$?u2tRXvemyfML5x`$=~Ss>EK?~ox;hwFT+bsPe)zQRMlYm zoX+i$uJ-cPf>Yh~U)4X={t8_lqO$gK%}zV}*Vkgd|N1ho^XjXw&-VUaZy73SIwx+= zlHLC{wC?wKwdZKjyjFjmEtTggbpJ}aXK}Ls=_Pl%<-hFXxP7Zv zxNLou+rpPi)?b?aQ0QN_`rcFyLCvy1ub!U17HZ+(Iir8}z8brbu)l|F_AXs|@am`k zKYM)-C!fA}>!{6y)T{rTg{`+#tiF0_^11(inpEZYpERrAS0k4({eQ9NyxoNbt@DbO zbiO{_x7s=5Sst@~e${{0E_}88hm+04f97wu#xF8l9~kO&=IQZ7-6wuN zKPO$d*1wbc|3|qsS9WrVrfTK=UwGGB=abOX2Mi3{>5j7$I<#18uj{{vi?@G$cfD`c z)2OHB>%zld-(Ow3CcJ)q+1|39;cNG=Qhua$b$@iYYSoo{d-uy&Sa^tfO>cOwQ@Q^A z)&~vg`}PPMUH%bdGDGb6gDoE(8D?HK3qH(sw(r1u%?py}oY$UgI~HUfwDOpE&9Am( zHyJkJOul^_o>#uT4?SqeocbXAtkVLMfXllNs~$W1@aE|gkE>GeeyLj+A9VEB(Tg?$ z@9Nd});sEdnxFrEYwG`^*iWZhU1m)7n$OuUwCTzjSijmvJ=J0*sxo-nst_|avx&3x{xZ5fk}G-^-%wq5z{#wpAB zw0>;sZuc^?;h!oTR1nq@w?Ha8*gD6{?p2jRfBnDm0|yQ@w@%#u=Q_7cl<>4I3;*mB z)(bO}_!pfLac{k`D%a$Bo!7(iyvg;m|Nmw8>c3zkGmG`d zbjQng4Z=dcZcAd#TD3KV|M@Mm7wd%_1D6~)v-CO$i6BVZXj>f5iUC@YUYgS6@BvJ^APR$>~RqlpH!_ zcqV^+c$9vuqV3C-?+vCzEVdO|{`Hi9_`ZKv_lK`L#-@6}W zmjvdpT?)0A{Bm)p?d?l8dGXr3VpoC^MSK5FH#lTH}sH~qcgko=fE z+*@_#%9qh4ohy4Lnua+ENq5cq`|k|bb=Sm&e1Z?{niuBO>NQ)gZI+l3C!#QK!?YzD z$zi_@wfI+w{a<9?Vk)-YY5LZx{Z~6}Bi4p%M|ia}^_!Y{@$5It*4Siw`|i`zaPh(fss4i$jUK z*XD}nv(JCN&wpxl!IMneD{z4Y5zX1O`+L`79Eic&077)Ws}xFRpG7YBWoPuiaV@WSw)U7cN1S~;d$y| z<>l7&V}5BmM`R8$&GCFHktDJ|K*`EAmt8z!4dbz*7-+%I_R(%V&s2XT&`uP6eU0;4K zdgXdLZ$fj3TFKYcAGh~7{n~N!%IQ?Gw&a|BGWqP^?GANko_zW?(bS(ZLu*nZ4=^`p!0PtSKu%&*?J@otIz&lYw$6_4oe z57xWfnsZ_5m*;Al9JNowyr(35 zyj=alnI)}SNjvx7O3uHj7iG=QhTpCCoqT)!)mCvu?wAXULd7jzckMsOKl%0NEP`2BwO>xX|u*J{+K?l|*&`m42$B0K8tuRj=HR=I3~&qj8skL*T5 zZf>sHpVXDy&uS{(kSDqVRBb@gISQyL)6-|E_OSKj8i8 zbE(^GvoMv$xrtxY6<00!{bVc01Z9@ESuO6Ktr}*J8=nTT3ZH(KqN!)DCm>w7=4ee# z0e9q`WBsw+J#FEydoC2WZj7ELcIw38X&N_QC$sLjR8qGoN_xtTx6|hxiV;4*61V%u z!u}GmC;QY_8vdH@^ru|4CqKR~`I?u;eU{2?yXSO;{0!|>oxg5Tey;TEZH>B*`hC&hI~K(0h5xmj2B@lyc&1mJ07`RsDK;M`L_T(T^tUz2Dz{ zoqXJ5HiwhVqwfu}Xe zlj7wUDsHendlu@{@NR2Aw`kN%xBvD1>Lqio`t5V$e))Ef6W6*AFIE3m{62VVlHoPOx^v_Ds zzY8Liw>YPG7{8ykCi%>n>c|-lQ<5HU{j!g1`}IjDl;xG!YgfNf3wZ0QQ`9v?7xQAE7sP8 z{gvIXxOlPf+sUhbzuywhb#=Gus&8E8vnH4RPES!R`gQKb%%Hfx-u81Rr`|j1`F+;9 zWaZj-p=Q!`zd25aY=7P=UukpYe&LLMueq+%WmUIym_G|z8oG9Je21FN<*UZ3wf|a@ zJYUxqJ?`(aINANy_VrxXp!u?yd-lDb5XoYl`D50vllvV-%J+9qKbNYu{8!bkx!T8z z|Ha?^mt_0j{q?+cou8IJ-f7Q${mZ6*DW3N>ODCzF`*VzCck90N;~sHQ8)mvr^skpV z<$uz+C`vZ&pI*Q7{jYtKugO++=e_*@?R))&_C2>V`u8v5Rui_Jpt9)i(++o~$-)0; zX{(hkWnkbaaGa&c;l)}Me@XmJ`kp&;b$@Mtb$@m2%1c>O_eboyzE6AK^>hFKMy-3d zH}=={?0Dh0y~#WCuKo|-&(hM`a$?2G*ZONWZIe)Vf93lFw&~|p_HVm%y#C7`JJFn! z`7ami_}u=oV?v3LX6cvDi{JbV<0*Qyk~^OFy41SA5pQdqHpW%{a`EbY<+48Nexr=v zhXe(!!z_zS=4Uodxg7stH{XXj=Qa7;W~Tmo5&HG~rXPO7<_0OF-OVnKeWamz<P&2HZ94}TrYY~w7Cn^^i{$-Hle zQ+b3Ku1NZnnJwF~v*zT+U;liSqK_mA+*=iDd$}Ot|C%__KXKMmluv%Dez(-+RP>MC zUyHxK`yu&#{m(PUKk8jm62JHPxws?$bPg_5qbG6M*%@4l5_~@&x>J^KscgL-d z+r47f*X*6LQzF@+j(>ZPde@*|7wL%cKcert@9Rs$WzLHKiPO% z+}5@Ld6iH$PRVzDM`oJ7I%!+S`Ff|q(^Gqcl>Or$W<6Se^3|((!H#>^P4|gsGoS5a zHT#>yI!|5s6-B@PR;|;%wq&<{>(bb!!-1AF7EJZ&opOCY-_(QlGgVCPznNfqtLo3M zyY*~Bhm8+4vAS%T%DhA%cj2pRS52PXl~@<=dF>#J@r=Ya`pvM6U+t9(WLK$dOZDG-a@xPBBRdV{XMd@ies%t8m#V#a zt&_eQtZCDX+v$1u&eex=X1eWr_3O8ysf@#3Zrkbq1DUwbw=Gufoy3so5EQ&J@y_4mwQ!etR-cNv=*Iyw2-@W&sRe^ZHvYq{c0f>{>k)l z-sildDSmOQtiXJgvf%6u{71e=>i;O%GuOjtV$$3H^Y-fm9{=F#(IRIt{p%NxO?B)4 zJAJM$>-<^QePz>K=Lu)*S)WQ~aM!-BWBpjZVzW z&7Y(`TmRzQyLaxbnar-dr@|*v+`s&dhx)5^y?^((#r~Mq`(CIqPy6WAoIkba=Wg}w zQ~$rvvM%nap?pU_#~B`e;?h$haQnRy}L z$eGPobk*M<)Q`FFDx~<1d7=5Q?=RNgEuH>|`#;C31JnGEbew;1^m1LDn&d(LIRfUU z@^h;H9{$|1Vt42>iFc2-K3e)c`Ekg4@BIfQZ%vm^uGEsa7bL?ud;a{_Gfr-`xUzj= zo%$wT#gxjO#R3WSwl)1zJ1cj#ZoFx}`je4mc&_H4bM3yvFG@`zQfiRuAeZ9 z`?sO>{nK|w%T4NPgg)M>)P3-sW%G=#pP9LFp_9ruT3)KmEoW7m&-e6VShD>ZO_%e2 z`=Vw|*I)Gg_0=Qm0~S@ke^kBRH7{xU?vFRM4c=An`uE%L?%k{3IDXo%KcTHH+hKc> zH)igZA8EBm1Fk;(|0iJ1&6c|k(sfIp?-A|RY+Kc4|I7QIYn%RpQl9G#2_ENO+bsLS zyu{GmCQQaD{ZmC`68UWtu|O*|72{Rnz8ot;A_Xi-S<{;FNnC5 zJpJq+Ume3~(k*{quaZ9?(RO8Sq38>CslOH~RnJ$e|E)|qmGN$}XJ}Aiz>TP=drzl_ z)a?(t6l;CTXaCmIyVUpJbJ{KakE3+*=Fb&pgQ~AT-qOC}ZJ2HE*MDb{HvZ!{w)ax< zhmd@k&EBh6Paf%he_w6;&D!O<2L%nLB+X*>xv(we*xB5~ODgY!URcDjN||y*|GM;j zq4uhObCRy6Z~xV|VCld3n^|qePv1SZI9>EdBaATA;vuM-dPZ!@` zlI#nKiL*4ldcQGUdVi5qhsne<$3Cl`?%$?y)23ub%*~rF!AfHG8^ia-T5R&qh_=yR zAMv%c^g&DK%hO-G-hXhvRAbNiZm#WVO(Mtab**s)PQU7E?dC3Z`0co2%?|eLRY6(x_q{}Cn7z8Q@~6<^c)o49 z$9DSck8U}+YRUBK9U5BeO{%Wn(C*z*`pA3c_TK5wCL2o>MHEi^zIkPlR;KF3rYgUm zi|&4L=2pA-x%r!y_I~&J>#--Fs9s*ZsIt61`|8st|C_qADh^HlQGWW~n$40udFm&> z+?3^@%`+)#~b;U{&}nyv-R%B zx{IG5EOF(1n!oPNdgth`mAYP&s`mU_7dx5HRQ}cXvV8{yPye-l#1?tK{4L9mU8tt9M4^qD=Teo z+kfrP<7A4_SSP{2z+>b%OR*!1wSHk?{Q9!Bcjjsf-8yDiOG<)7y zIoyta@}lG zll0!W=G|-=N2HUtCY;ub%a*^${7insA)UB&tB$?M=%FF*yh_|`mN_NkhddezMDkK(!X`Q>+&V!cH%@n?C(-o5{4uV?kH zG2fIYb%s}2>i;LwQ$w$Ymc}$#Uov}s?dOG>RlT91D${nq<@H`OYtqf!uWuIY|G?_i zW7T1`{pGGZZsDi*A6OHSGV|U3Z{a#B>kpOGsBB$XF?HSb-SeJY`xCJ2?e4t0kMGwD zs~rjVh_BV2C@y*HY4!<+g)5yx@12tI`egS1uJ(n0GwV0~&b-uWVI3eExI(Ul^=6Cj zngxw2TFb+)*?h^!&9$&OUw+NM=j03DxO>`v^8}a*+;W4{!*=1y)N$Z^Z(z7R6e1PHZd#OJL_|o@#tqx zJQ|Vo`Qoya`LECYf9M*Cg|^fa41T(t=!G?%G7Pwzp~Wa z?5^Gs7qjV_O-H0n1!EfhU5VdDRziI=#QxD8i4bbezvMS%6e($%5KY-~H_58LXB^Q&LlU+KL( zc!lzV_rKl*>J$~1=_}iF$i933@m{NH->fft7I8T_|9&SjC!uZmsZSqPw2S^Tl@zg= zude&=@5zK0hBEV9xKtz^9lsv@rts*UmUwVl66cA&iFT^Yyzi&AZvFCDHT=ke_IB~E z(+*d*%4KvhPoA~SKUaSDM(?his=21LrF7$4_Ok8LwJL09 zXUsFIzkYPF%k-*)9Z8$5?kWGt&pdm?RMbgv{ex|rZ-wRl{@~;I^jhw2wNq|>_uooZ zIEZ&FJIMZU(p-kMcaP7R8ocb^&T5DKPBIcdc?#yNnrY-~IlC$Tc2?1ErE4aQxk09x zJ6i0Q_xGrEACEL-_8;dkV3BQ;C_6FN?t8$=1pcb+tqrw1@?N#i zU!i^LZ2jInI-dHqmJz1~chCQqeDA-A@9n;2*}=Asri(0oUkm=xe=gLQ)%^D#wWQC5 zCx5=*GoL$7^l@C&HTfLwgUfHNZH>Pl5oP3J{@JUk^Ls$mHE|;qNz)ap?+D*jT;?+U zzVL1R-(I5c6lOG?UE@%;f6w_9zb(VhDVW(m3$L{Ei~lF~AxG|~%I>dKucK!!cDK=p z4{&m?wfgz`dE)A5{wa)a7zc!nNO*;Z8=Zjj^S|K~taEhke?7A|!bC$ztlq5P{Q8#nyQ)wBUr}_ZCuwdjpG1|$%NYWz z+jVDr(-w@Y(XF1g-Lk(|^+uXd{lvXLyuasv`7li~`|8P8dmmIVvHcOK5zyZC_4Lu} z>u1N6uSu-!Oe_?d5WpuH6!7@j)!uop*^X3fxVQM>+Q^qpYqndt|G#)@ZQ|7F{|_CB zQnzh*sLj6dYn+Nr6Z^lMw+ohh`?g_yMnU?DtnjQJgFFB4y;Ip^7qawX4jbb^sn!@UF8zn_cvet#|ODAi*>g`3_@7hoVD8*ZMiB}|7&)@ z-@WfV0^Ta<-8j3X^O2EVXVI@iu?G8pD1X(J3Jm?XHNx3xnt$Bt+tZ)kTd{Y2z`pI~ zGrzBW%og?kXqTSLj{0rJzwv!_5*{FU6&`t^qo1*Y;p6mQI2e(XNu8jDw761>-5&TW4Gar6G~{XB=) zn>TH6so-|sc%Sq7t@+NgKh@sfF_B~9V-H68%Ri@w&dt1k`P{7dmqFZfY?r^6X05Gb z;$30;e~Fem@2Nd`3-(-pma%>-=dRr|w0SR06aQnC;;m|5tkopAB`*)su?jhsJ9oD`^_DM%JJi4dl z*1o!o&*0j(HCi!BZgsyTUiN)0k-GcJEAi=X`-J9W+D&E$UixJn*wFVWYIcOAiGcai zpSJlQ9Qj^7l92*Za9Q|^6|L+qYeW!@UPWWcd{$@pU$sEfXXJ2m# zzYIpHb zxAqJ7$;3@C{53(=ar1?`l{ZT5f9p=pc%J;%x{3Sl;Xf0$Y<;!GdE)w`eKL7_*%oyA zWYt&&xT&i~)n8w>wl+R&Z(V2jzFURgzHFb87ACy(j=dH0(u*^a8Q)E_F6TJ#;Qozz z^?UO?yk!+tUre1dZ~8T>J5v(A7wUdZJhbFl?S>y^Qx4zAFKa5-*#Gy?OT+vfDjzQ& zi*Hp=c|LdYG?~Q~t^HSTT&{@e;MkRFk?M0U;%VQ9|Jz!Zb6>I&bHC4|{;+=knw5rb zXYQolIh1yLMp}ISvNnN^Wjm`H58mp1^)BV($|H5D?{7(#-#v5g@~PM}yDr{Kf4QsA z$~CD%)AH@HzeV%+#gtBbu-o(b@!!>spE^{ZU15)2^sSHQNW0#zUzOim{)xXn&C-6y zdG7k_ORlZ^&f*&Sx&D5Jd&-~Hp8`J4S@L~vVa-nJryZ_ zXQzl%^Z$IH?4HQ6Mk4%feQ)}v>VFG5cJD89VyXRe{=euYsf*`)Dr@%ZtG^eFNH~7* zTL00~zY8C2PS_Inh$;2&J;Rq>O02te72D=#z2DHetA{r#(!KV4PlSp6qt#ROZ@%x? z^fqg`V&eAuU)HXDCdmFj%1xQ^^y_`1v%2$kc|S_qKS^blSM=|Dhtt)x?6wQsEjzto z^@l`TyXUXG?(<)d_`Lhh&h2^AwrI2LsXu*Pttc-nUc~x|SmvjB+LCK)@18&ZuH~z} zM-#hb$<|-nc7OlLb*uht?Z4N{_g~xob>D}phkEr=W4a{&Pk+CZrF`%0`a6yb>|g3H zI#X@;$l{r|{o8}rZq}byV!f*uEwAVh+p}NOtGe&w`?!SIDJ(UAQ^g*=`*ApEM(g*- zM}AxGuwP_#y7=n0%l_9LwfFV?`YuskmG}Aa-FcCwPgs+rZ-e>_^f12m>jrDuxUq6(*+ePrg;uR;KXte&H=cMBOfPsPYk>e~S zjxN^Ph5B*pZ|%RdPx){5*HvGOqNDa@Z(V3HxzGI8GG z)mQ&t+Hm&tr@kK^*Yv0VQ9LO5)$+~XtR1VrR-cagTlF&(bp7l5t0!ike8=H+@1b($ zoGE;l^-mrCQ~XijYw^C;!o5NZuChv>66QF$!RztsEBb~~1`a24qe}kEKDE!PpMKt2 z=#Fhu=Swbz%g-+zeQJ@pLPx(r`1;L953|Z&YwP5d3M6>S>V03$v+HKzS5vkBiz~KA zd^BeKGG|~s3HmVL{wZ>n^QiGQecl6CLBSib+0I^PAJfBAmvs>)R+cdWD%Lyp&rL`7Y-`x4o* zTqsteS!dOp`R}8+O7FN<{+&r^>cp+ZUO! zwQ1e#uh|#tHr6F2?wKIn$ts!K`?JUBzVEa+rnNcWes`Roe6jlWTF#!Y^=I5Ru2ub8 z(^9Q}Y|gQRqQ<&q*{*r*$s3nH4m{XcGe_ZzdiZZ{6O(ZcP4i4%@;J^cz+_3$Lq&$4fdUAOxeMnJCS2v zXij&{^ma+sbvu8v->TWm9Q5w~F6A#(Z$0J;ep(x@ANBow_^(%gBUT9c?yY!ovi$xH zmFY^GmL{t|UFPo4T<}WvDvR>`IeEd}Qfa2;zh@~+%PI*iWcj=8_&Ft?gZ($Y+A$m! z$T28e!256Cm;1Fl;v@~keVDuE`gY!9nqHM3xqpMQdAK3(>BZean&0HQo`3pXexZEE zG21KMUr(fF9bdR2_OqI8-Pr8h3FTXo`&%e2q7u8DQYhU;5p4xkuXWyw~(=2X0 z;y)NEk#ue!SIW7x1oQZXu0MACJ^!5N*kswCsfSM$R@)woz5k#*%yg&gb3vA_3$p*t z@jU$-&$2muTUE4fu_MP9#k-r&9lm&?=lbP&UXR~*=3BKFJr!9tOQrVz>-)BM6K!vO z-?ZMduKZE_+}E0!9B(rs7j}1jF#Y%}+1E*8%f8pV&1vixUqAULUDKm2+8QsCtbg=- z>X8tup!&tX|1>3Ux7lBJq5RmNkAjo>7s#Z34dp!eSE63w!Gq%khd<2M5zRuSr*4zW#8>Qz!WErmXjG#O8gk4mbbd zGEa7y^>>Y#aq3?p_6sDwNI6w}^oDYf|I&SJ@53%{mU_#S$9G&)dYjqwxHmXzXX2PEFAJsXUi~YvkXz%`nf({scXwX6vd?^PU9j!H zj?e`m|DLD2&A-}zP%k7M=^8Mz=mZpduKlW#JOnA)PFL%G~t=wvLwf@D%O1pjYA0M;c{qtA&{skYM1dpX2 zaZ7($ard-=f#mGHk$rrwhhO;Sc>hrDZ(8oKe*J=(mUdz0;gjFH?r~dum}Qoem(I_R zthT>HGb@yI?w|f7Y$)(8`oXG6AyT`4@BSxmeSBBG8Nb=SnJ3q8*W57i;j{R^&MC)Z zr2ehDA;NzlDW>LO!@hmDwoOX^+jLuzDY?(f!7J?9ZvA^d3_dxm=l$!gsAliB@k8d1 z{QgtdFP^B<504JJ|Ksfch~C(>r$0)krLJGU@Y!)j&U=4sZT9m{Nz0NK_F5t=t(>#} z{srX;x{|DN+rMxBZ*uxM|KuZr$uGBMWZIok$>`Xld13#&og1FpKUfgXcj)3S8`J3< z&3yieP7rWT7JA)sZEL0Y;kWa>|70GN`Cl%jn%!Cxezo>u@!4;jHaDJE-J4kL-=w6m z@BZHVu@8e{efuto#$Uf;@b>$&ISM=n8Ll@u?|#|-?yA}S3C+q@(U0ya)Cs@ooEKTA zXa6vLXLaM}2f1@=G^^@N^zsbeO(H@{-{p6x@Q0XyD_Wx1TJUc@@fzEKmX|6uElHS-@j>a z*`zA?H1F2(f8Xm5I!4z{EPnMl-g5rDQ!Cr;{a=X9FF)-Q->>?3!hMIOKQ0`%m&x!C z4s&G8vURur;N!Z8=}zNiT_N?!zdzOr{N%J@F>+dKlX&;tuds>sxp)7z7(UJ{?)vWe z^+(Ml*HpO$sgJ8#uP&2+a>@Bswb0AnpnuF$xS!qo!FEIReEiw9g{&;?mpmFiOnCIY zxJuWh(k0~X%nutq=Q({^zHeApo@c-HmE^5x)U;gapoLpC&wnuDsn{Xyydaql>e!)im>0fkxj90#1bjN36 zx|aTqQ`~>NPygHZcXR5O_iuda_r1UNPUYRd_lKCL?@_Q27Z%+e{?4rL`{&q-X2;q0 zP3>o!WnWC1_{;6=&F`;@^pAHPK7Lhy){Si6*Q!UphD&Z=6LS52SGT%d()Kk@I)!oX z8YPncNuL!AcUa@G`@EN@{j;Y}PrbkY=%nAuIPduvZfIR!eNjUA`Xx;Dk%U*|n`5&?W)zi1x`}Xxs`ZzDVsOsuRuebNV-ujv*z`;5F>B+AfO#hfG zS$q!M-)6wMYQ_7vNAIzE?+=aAvbdG1lH;IOm+;&D{ndBB%pZji`xqZ1iho;+I~bA`j=jGI*KxlQ{|cfT;>DAwY?pn6WnSo~y4k?S3Shg$_w zSN{FFyQw!wnfdV154Vy}HBB^Aw`%yfaRT4Im#;7CYag~}yu_|$cJ=j#HCk@)8@Ip^*A(Nc;%9+msRVhMX#QHkkO>a_1jw$hU4Y7WmmT?R_xZ~-1PP6 zf8NtgQB`-uC+z=T+STQ{|B=6~UC!zE)Bk%2OqO$*es8DxX`@4XoeTaxk=pX5>aXSc zhk8BrcBb!}!aGUuHZixoFo40&J*OE@pJCC z$9V@Hw)lvi=1}-MXX(`L4Idi69SgsEuYmQY3)f$Ud7aYHALkqv_L#0_o_}Eajn_HH zPyNbr+{XS2xcT$?Pk#g- z8!vr**&?vMRzzsZgX@*bW(|7-Tg zO?w@B{q^;`@84;-RVfwU@G_leeBt?gHtF5dWLjRIUjF9)1W#rrtEmhB*jDp8-ZXi4 zwx%k=uuX++ZM*oh|9>iVU1i;WJQUZ=@BC=W z`dc&Ortg0iHT}5q+aJ8!o|N3bl~MJWzbSHM^hv2D+duqq)y~-bIG#7}>hUGU(*KIq z@69-C^v8Yu-tfJ(n>hCV`@LWH-v7iGc{8>go}^Z0_ggo&BxApt@QGP%YCk6?9rF>N z|3}z2LVV*J>18gPzitq=XJg&+LorRe%Qcx_>b>0`^RJ0NZs;(!8Y;2tuF0B8dFNX-@mxXYNz`9kHHmh9+zHQRwub9rF})Q_kaDr z4^KItjhpq4t>%2x*SbJf$=cxW+v}F88l4kAD0|y_$Fg5v^Q?~7?OONbgG-)4()2oI zhvdiHddWWff_ix7S*T>c5tzi@KchMK;qG_41S;+3|HuxFn#rkb{clhE-s`LFMclZ; zRFlH&or301I1+g?yy-{&Qt!!DC)fV}(0($`W{tQ!2g|j+cQf{{5B+of#oh(`T<__) z%NZV-zF6F527kKJA-g|&>^FOul`!{u)QcY$@_JSM`C8V*%Kx2DPlkJnzc{~tfs2c| zR_}k2I1?kiee3M||1=dx>v#9r{aN-&Nu&L#XeP_`qnv%ysU)#}HF>MwH-}i0v)+zC=pZxGn z*sQ%bIlSNAkNW=NNA^TV{qoa3aUH!^Uw9w+X&C(PZ0?HvEW6VTk2@`8t^4C z>$?4ZzuLVg<*V)9FG$_D^~sXyAEiQ{U48eyR$29(`h_3+BlZdwtPb;uU;k#`{>^#U zo;(VS{}x`WTYddgi>&;;c;~OlOZHZ6)Lp%7-^(Z8Utcs}U|{*;I7^x16!_MUwP9;} z{=5>nS~vZro93^L*Y#J1-p{(fdhWlC;j+Ju{y+Nt|LgBr_t)27TC-}!EdQ;q_fM@_ z(faQC_3W!pO-fV~e)9DTS1jn-o_+n~ugy>5cCFigjq7WYh5a6{M(byX!5z)zWjPu{QjnItyktPn!WGo*O#@` zH)ieor?#&9kKy_k$z`q2R=d3ZQnfo`$=0l|#b4h`My-$9=oRzpss3tV@xMh}Jg5Hu zy1sk2Q;civ>OJO@o_?)dx6bMJ|JScLe(=AGzN-I7>uBk!TYuw^e5$Em?|JlOX~?|b z;>z0Ve?R^E)g-&Y{l&nr+|LpK)C&6uw$$xy1ieS-t!} zLLJY~2^Bpp{r&0EUeAPgMi%=77Vnq#YTT0SV#@e?zq_$Z)Rpb(GhE~LD22_E6R%eM zIJsli)$c!9>NsC6lra#FQJ?fbdBgPI9}?G2Fc4gFe#v5oj(Zj+#`TAeChn3qeqtpt zXaBPPf7$y#crKe9`=h4oEW^VclPf;n3Q{TbvgFa&5^&`0#hP%L7kp{SpB``L+FNh# z_MheHteBrqf4+V@y?(}|jYW617lppq>HWfae&6oggf~Yr#FFF=zmG9dJ6@)`&|m%k zL497u1v`$KZ2EWcW1Yej*6C&^9H;;976|jT{&Kffz4iO%l=BY_&E|do6DV*z*z|+%b9~oI5oV66`zM z7kPZmn!AX};lQ68C;LDB{QoBV{by;(*dJ@E_7pki#(aNz>HXh`^?D|SudiOdb(QOP z)atm2iTZUd-XDU37HyN;Ec5h@%D?^UAC3OlJ=y;HaDngVEBp2fDqf$leX97$sk8pL zElb{CSkQCt-kLv~{(G-zQJmjo^f`I;8P!=n#(OI?MUQ7p6l#CU zdTveg;=L8x*1ic2-z@I@zrAhtdhux&i&jUh{k0~3wb)+Qdrz0W+hdYc@v+z2Y{mZv zU5%4uM4zu@{_)`G*C+OJ0`g5#vTKsJBlSs?Of7~lB6@2`8_@LFo+aLPp zX`Kl9=l*^AuMd8Hvax&<6>YO)u1Ox{I-=i_d3pNT{hw!_+JCfipH<<%ADQ;*9Dlw# zZkBkn-_x&USLBnaSsOa*W0rmr_;NwL==a9=pIvlnzja4nmvD|d77~}RIau(bJ6mm^ zQRC?!D}LOseLw4X@dTGUlTT(;|Kr#=VPV~Vft;Ps&tJXYrEcaVrrH1LTN?vY_Cd*~ zt=muk+y1()CU_cOg8aXeg_HC4a&}KyvOCiDWxf9ECx&$q%D=up-TUOz%c!5PpMBjQ zq3<=_Tzz5s-taa1tJXjA6wS$fvMIgv^w+CDl;8dRd15nTTq&qgU_Of4{Z!zRe=B_xq>cdbgvm?s}+O z$(-I3hmJ^c1{MGF-~W&I--O+(>#zQfd*>hi?0RTvp-i{mrr7Z9C;n~9a99{qW_S91 z%<=v7+RufOOCi?98zdSU5~NBS3Y!^8GUswJmfj6dT(|9$M-8~JhZ zX3;xzCjDJ;At=+<>u%??{%VC!qC2`@>{VZ1w{GP>Wy=_KHoJJZ1ql zQ?ITpzJ50%d$ZKstS_qn72QtmK9?2KWR~n{TIav~HRr4PVyVeLTtcsi`1toduucyC zEO>2UOvsIk9#iVey4<^3bp0fm;`hBaaB^c#c)pGImrZ5fmsg=HYqJ(*pURj&`zxE^ zp%CoZDpID)De5KiB_KyAb-&=OHO|}zqwx?Q?1DKIsNIz8TD5Jsuz_4E-X3z!{gdfDdn&B zI}7f-Y}xyk@mBi!$5Rc~A9PQ)u6X>4XWs80P4hM1oVoedqV}N9>HR^6qqqu}dF?rO zMBwo66B{bd#LVknKP^9M@eXBKX9;fw!!0jk{U;Ll3Gxh%}fvj5|39WuAd|MB;2^H=(p0#`K zhU6#n_F4a%7q7mY)!@c)ub$H{GHwb8$ZGj%U(qi7(Q8#J8y%jtMMsNIs9K(R#~gQ7 zxma%9_1||s{qCw%w^da0sFqH}#eYW*njPT&uGFM|>h~tLZ#wJj-}f+0UC8i8cav{n zc>1c0<30QL2YBdyPnsEx`58CzR5w(;F=gFt@a1ft!Fr81Ydyex&9GRcAs&4R26tHTz2di<~T(*-s;Pre5;6mxu6%#ck!X8Y>j z!+4V}g-52zJHr#<_3!oX?M?@{t~x3YR!&*_v;x~m1+bN zC-L56Uw4>EUY^x6;F)quNWj|<7ye!6U~g(YG;4Nq_8S3J3F5ebCZstDp<*eCvU`TT_f;i%IkFVUsYU>m-q9^E>0dGAmB z?p?Z`zfS(X-@APWZ}g>xM+Ryin-4PGzPZZCNiz8V$!0DScIk5r{oT!99(}){e7L6m zPNL!c%IfSte?Mg9hMoF#kmXlf>C1rLRehTm%YWN^=Kt^W{}r1(oa;YIywkR`T<=^#97B}TX-@cx1|6c8o>)g-Ar$4ot8-&RmeS7+G z=JN8gtjVlxH&T=Dp8R}ReO1(hp4@j+%xtba-1+gFYf_wBQGIzyaE9WGO?CGl`5w2i zNc~#+xZ7=|#rvHzt{+{TH@(+u!u6fi`%^SOuCJYNyjJOO?VnQt;aT_B=lzO(CdxTO z(=Na_Kc>?6an1a`wGU=2n619QE{wl!Q}zGb-~RqrpEW&8^sO{Y@%1Yi7YgS+d!M(6 zZ}!bS0s^m`_ODP;U1@k`o_BV~ok#4|PyR)}NZel2^kDX}+8wg->kk(G`>XvYcgAO( zNej0$HJ+c(J#*7)Py0BIod%CBt1qA4oRu^CYWC!ryqRAwiCxbQiZ6fEoAKegXNCIe zbz$aZk@i1}s*3*KU#k=QqI&Tkw~DHK{r`^D%Z+XJuXE-9xA0Eg?7P=5|5sGVU-IYs z;mZ7^Nk4w;UH|$#k=xpf+cf<5;zw~4o?hQC=EufU+jh_N!xABjcZ=Wr_E+yM{}%Ut z!?gE%*Ih5V6neBuFMhW0i~oy^^)F@jTfLX-+vU$_TeW3E*Y+nu@%6FI;&BQpvnz|g zvy|`GRZ?@9)cfw(UYFSK`-7yuz592yOZL@Y!4tEWercJhv}3RKq^bAC6cp9cw(Z|! z{fbd$=09#JLy3cvR?hY`ns-fRqj&eT`-fRiBnUF*-KjM!elqv|nY%&X|5cxgVmU0v z#mF8N@%i+<$~%k>vuu*quhfdQsjI(#W5F*LwO@~x6koa>_xoxQ%g1{fi=#OWS*EfH zt1R7<6y#?3`})4)%N}s(d9*uCRC4|Ch^KjH^m%1Q7H93LdApD8e#0_*j%j&dyyc7K ztJMzg`j}ttYgRsKrth2^41dk<^lW-{vsR(4dvU{QmBwjP_s;MA$SS&2qe%GmyBU`s zIb68f`05XbtY4K%k;QNJ=m(4Vrk`-S|LWh*MLh@RGMg(IusXLaduQCC{^aHLWz)DB zHkc}eG0d3Y!}#`RiVUOF%zxSvIte?2QqOC9oqX=7#l9p-R^pSpVQTwo=dP2#%FMHM zr!Gt1eQNrPAD@lSPqI95x!2oJ&UdY^Oz!Vv?_BFIuG)OGRPN=!lQVmx96C)Eqc1)9 z8o2&=&i(D@CuRNK^)*j@`hzdY&QH58-PH)KpSSqrHl`Pa3R0RYK3m**bYJ4bl&y0Q z-ab6}`QiC-Hrh}4Zoc4F+i01$#O(fz+f2==epjbEE?ITpbi-BGl5h>FRUafT6*UD< zJ367e=th51^ujrFAMM-!;*EUb{)E>CH!Q5pG~e&~JEdUnj+76k+j#x66W$)#n&zT! z(lzOQz)|DXcSX2mXWVG~TgumB^-D&=JK!xlqtcZLSt7!`Oa~ne5_}@KSUVgG4d&>G zbF?%mDRjhKkJ)ah7V3R6>h!AD`!>y!mbN8g}pC^7l;8&x8^6CJWfXJ)%2mj2iU0v$L zS$0u7Z?)f>R?%6aj7Mz0nFp|asE+!%>e#aNb|Kd%OI`b6FD$7ccfWhvhM0JDo)tXl zTB-#FjJ5X^Y!6*bwY#*^>iRBQm4fW1ed5X66@zA8V2y9xC-+~ak5Rt4?~G^5k)S-m zQy(XteA+??vUfDRt9x`eZE6e{5?gU{ny9p&`b_+18Zk5HUeaiM6RQU_pqE zt8hz`V!#9!zpvBJhR4jh_cXF(YH6vL-%8C3cJlVutRBzG-BSJb>F50&{_Y3Y&kt=j z*z(i;&ff6SYQv|$ChtD4UUTjj*Sz)%X>vO=cb#~u`iI-#d9#OndDM$P;-8{Eekh#d zV}5x-udKH(UJlB}T$|+fM*2gccNA})qJiw?TGGPtq0Lar08v=ApxLP|F7ECze zCdjqEbj{tURmH2OPg<9HWzn*wana758M5rZ?=N|}`C;MV2Qe$W%93u1A6oJ=yJz?5 z-TUoT<2Q?1dVl@>;={S6?3-KV=Z2lVxuEXg&)6kZF=8<{4qwju|LOg@)B^w7@c7!B zAJX;+{PGSx88A0wUsvpZ_JB8UKDW!o8noLlmCHLTDtbI1dqKlf;f6Kkya$gpGtHVj z^A=O3_kBADb1%`jt&EWl89pESYW3>QRy8eeY~9qOU0x?%E4%QN#NrvtCuQ-quqMow zh>ZMr?w620*L9cWvsZs)k2h;5V3ZM=;3Lekg{JooOjkv@_Llr z_;2RCOIx_!KJq{7;rKr0_3m2-{Wbp0o7EKgX7_GiRoN@?Od;NR0zGewCp+Ddy3p}Z z+Ubwwr$y5zZ_7z|z^EoNK}&?ImF0}Xh5#QGX-+Q{g^sz!XC80$*|S{cCa3qBsWMtm zEu;?LxwL-%lRu{K{5D=Znf-jR#@l60SGPYszI&-n-tyo6=56b}PcG}7wl{WHbZ`By zw-0MBZGSya{BFI~{Rh=YF6oKAIDNA;QgJ5Ry7mP&`VX%CGQXYad>~}eo)_Hz=PfK2 zEegNx(E2z?`oyOY<7@`@dev|7@vVE5{&pp4U#q--b4z;jxw?D%=G|}OIm18YQq4Zr z5B|&J?{7XHo#3^u^V^|*i61p15=;?gou zd8@s_%{=s6@UE!m@8?Fm3NPELl3%)Hjc?Zfb@6LLkF2j-=e@Gkc71^RdAFkX`~Gs= zpWt-5M=^WiZl|Kvd#~<}oA%^sxFNUxYL#o9pKHHn1fR@!|7GvT*wT!R#b2hrx_bKl zMxkF{w#G$m4BJ}#Y2n;!ZAV@HUHU)Ki|>cnzUxbO|C3YA{@S&Ek9)b@_PT%8F<)QD zwE4ac|5SA)?a|2%%{uSpCrW2+{;#Pjv8X+5%Aws$oC>D(JYU^)H)*{_U%pd`wrg4W z)dyRaF1q3F_D{Yk>%rAG@5|TQ)=pe7HdcB)`|2nJny~(?G$?5G2@U1>t zeCU6Z^ZD6x{~d`{*{9w1UA;$U<{v|eDX+@bE8bat%;#TAH;Wig;kTd@l93q)*Yz?- zobiv7nm6x|;El=bJZ7`c1#xmpG-n)pXMXYCy2bi2^RGuGD*KAf=Zsu4pL2gR$HR30 z1sZQ{mPsD=$PYaJvG7qf_ZeH|m%k)=*#(%UHnEv5zWOdw(N&nQ|J&Q%7hKG|TR%E8 zzB^hH*nYEi!}{wYFCBbVet3Al{6}q_-P*Hud#{--4@}`%^!mm=tJfP8q{Uh6jKW;s zaeQ8>H23d8wa0e5?oP5;Bak}lYchw{KlwQcduO-r^f2$6RyFC(PwnM5_*@RR|DT}d zEPc=M#^Mgn7hBbGZlwQE(bw-jJ5fbAVL|a0^Ev6Ng)%WJ{pHuoT3@TTKXvR@q`#Yw}Xvvm=$dE3DGLT1s3p5Z1UU*f-JCVB)dsZ;WT_F+@%>;N$-pH1E-> zjjW2@eGF+oUmMi12%md#JzV|!Ef?eWlby0Z30ckTHcFX)>f`LM;gxpv_nf7zzw~l|$~H)=T@osP9er;9$U z+AI5%hxHTl^&YOL9ERoGr;M0SKd|69b%Ud0Ven2fAL;LcJ=0<@bWc9D|Kz9tGj=!} zeWAbKRk2{_SK;079|u-nF}!MYp>2!MQ;+E1pZ@Es2F9+x;r9FU(Rb^2q)$nhuk<7B zt9|*M$?7`O>$<+z?6h6~LQBET|H5+JRcuw-Zx69$e|;z6vamjJ_TKAXB=Yt3UtLq4 z-g==SY3}F5xc9qP{I7rU;%WRt_33}T?sxP}_<4Hrq<0F}ssgGvU;4$ydUEaEyLA=& zUs!6jU)?FBfAZHaKM$`9QsJC>=bELJ1IiBk4l+vLYh1>f)u?bwR?&9ha^BnPyk!5B z{S2&nTl6>kk)7IGF}=E~{YNElme(In^Qz%n#Imp9ghAW1!{rZ~HubdJDwprPdDwO_ zi~a1|D=SLY*w0stuivz9{)M)>DCXqi?~giNr6k#U3SWd>-yYq1WZ|)yW*3&Pe32jT zkm)?pwwfhHETQ}T@;}>44BYjPUHmP%xo=C^q=kifGO2qK7~Ea=n{)2IRkLsUh4Pj* z@w|k0)6W*Dn@tRuyMBd?=-KK2e|bN$x~6HlnE!b#tLrB3m~J;+MPcfK*JnOf zA64GmAECOJefNhC_hZ+4KG^c{gOkzu`&m^Les$lCbg$2tc++md|CU`n`MW31N|$ru zJZ*89PpSOd7cS08i<7y8Hkh%`F*DLwRCu`GSkkU~zxwjHkLzm{jV@g^PuKdzyVGo% zUZcc>=F%#5jk)3bGZdG&3eBteOjr^`_WaHU{otma=v>$WL=U0mS=RfZ| z>khRyKjrr(mL8q_`#r1antk6pXT4c(dba9=N1W-lKjyl&$4`7xeCz#J=E3cI?>5z4 zf8^x+c=~0Q{rcJe*34h`zS3S&YkGA?*dG<^2%qzN4?Q!C{d1&L#P{~fcU9`&?XNG| zzh1vOUPQCu)6+v~%YU;;-+5*{AuE2O_^Y*3+Lo-#|6^`u;(h%?a{W*1U7x2$R9vrJ zKS_|kxcuqgyMbJhHQU3vz2_dgKRr&YZ`$>{4`%e&+}I=UsF-mmPW|1xzfDf5a-0Tx z`Cm5a^KSX|owaR6Uj34eu+9Y~^S504neTU6G?Hsmz2IJpE{@toA3uCNR^vYT<9}Xn zyEVtv>#qoF$3^;Ke_ZwX&LegDGXu;u%r`0e96ObqQ})0h7dD}H7w zc*|q^hUxD^xkYA7GkuryGOdAh!0`ZbT4 z`R(rKEGow(bI)Z|>zGd2e)8MtEZtSE>a)vttb1SnF;?N0BKJqVwZK2-0Hg*R33b9J>%qGch&Xo zzsY}8BKParfA?h7Txo0nw9ZS@aqp204eizMzL&p}RP4UD?`HX{jb|0vTeXs9m)YuN zA9%CZcIEt(^Bu3aT@I>!PgcKMCTzJg^Op48)pa||)|Qw5dMdgq~`YnpM+6UwyUvo*~%N^8fn%CCA-A zmaX~u@zne4G5hVqENkYi*1x{1H1FB8wK2P+CZAF7yWI6-JK+vUsmJGr`$@ z?<-kxPMdGP*xLlNYp*W+ziRdMWmhX5tB(JTt6m?k`BP^0O}mr3FSLEdT$@tgrFQU#C+6BB7Tx5j*0B74d6-%|oD7G+2c5T5ki_dVN{D+zO~c9cDPBKBy#+0A!9yvyU5pSC<#*c-Ej zY45|?ntOJB-{`aGy-=LmGK=<;Hq(P8KUMr+Wfxwom*;o%XR!-UlKO^+%#%&nRvQZ@ z&Dhzd`1Rml$GYUO=9f#WCO2eWE>4py`fzd5ltg(R1G_b?%WJ>M3N`YZKAzJgW40wf zp~dq4mgKn5ADkD7M@3rEV`cpEEV_mtbL%6^Iy{T_xvN9SRa>`^SHUNo<8Uh&;M`D zh5rZbA7{?BH=6Z8TTW?SQGSZc{eL=7lU0AT%cty?UdtJ9_ugU_{S}{G4sM?mq7?0L ziAzcNbC1%q6`zjP=(~k=aOXZ|`6G5te(E*<<#+GT+clpA_Pqb6wGyP8w>j!QeD6%N_C{^?F#m_?e&Y0u$Pcb&ejoxP@gXJJs%ckkc3dKJq4#x*6L4`JFi*Z95J{ZdPr z+$Wt@{X5_2M7%H&uKAR#AJ|o|;UFSdcKt@l`Mr17{4)<)^WlGv_x?3DE?0w@5Rr~Si>*<^#xaxbNzjFjIR8@r?fEos9fX{iIWe^X70FL z*TlrXZ|?gSyZ)wz_*s7~u3Hzj`u_U()dklBT;~1^yVuqwEg+htHf1AoW6j(%1-EpL z30-!QOaHpqrN`g8BUyTI4ER(d{6?^k+P%zrLD%l4eBwn%$X$uyrN z;m2Y(_%>Xc|CeQn?7Duul8B=T)9?wGwnp!uboaT|H^DVb%hW}7} z7I5&})(s(9o?3-Rq$3)CFJEVT<*8Gbw65#B_w_yR9#_`h__RUw_}gm!=wokKva(HC zQ_}g$PU!CRj$ZnqeNM`XC7Ya9JIqg6F+V}4X5XZDIsYer{8!>Rrxud)UUiOUH+&&Y1PXl?s<_5X_}{lBB4t9P4pseS&)n6tJ{cmHZkycpU32)Q=KY`NlYeAd&AIXBuF$GU zm)A>Ae*KH(>G_-U?Kah9R&r?9S8eCd!^N2>%iUgclk zdtmY7d%;<)x-9&grhh0;$eS)+%CNX#?c(3QtA70we#|*r>O^0@=*gGa4-U*Ua#<83 z`YLf%PX|9wbnByTjmD`ytJ2<|u6@#eZ;W|sVIpOuPMJ;SujMc1F|Gz&C6kYmp%)Ut6ufBE%~+iJSe0t+4G z<~&<|no0Pc!h>D?7j&1(r+iA;nId?1$$96CKW$#76tbF>OHO^UdP|`UWB(>k?afnX z?)k$#^_9egrocZn?O*@%Jlj-Xn{@1niO*r@kZ+r&b94QewR_*+>hJH>&u1odT+8J4 zxq2bb{-pZqrw^Za+pF)&Ugq&(y4<&WN-_Iwe(VYTvYu~awSHarzW4r<4x0+>Uzk@X zU~c^F?L%qXe!xKncVJ(Jb(^K9l7 zg}jrwcyL?9x|`)MGrisJL`6+jZ}dsz@6$0W5p@!qX0&MjnWG!`b~*A_fBY2t>!Ey% zlEth?!7NR&chBsbwKppB-9^cF+$n84k8yvydr++HZ_3B~ml2^456$eoe#|p+bI$*J zG1)2)ro~O4tg2t&;;2`&{NawdJjq8t=eDH$<2>@>?!7m8bsU{}@5;0*-$kjOe3`k` zQP?_5JLM8f|Mc1S>)SW2H98nB+rztNeHzQ!qMxeynulsuthD_;>2llcmG-y&^fl+| z`M)VW%AsFA{U%qE)%9ODyA`(0KQ5zj?sv%Cl=~jp=O0XQWb5n^%QC+0w`{ikAy&J9 zZyDE3#opb0ueN+?e6N(u^e2kxRi7$1T@6YUPWrP^{O-H##k=bRLVU_AXS|X-U71~z zVk!NSH~HE7g=aDkl-Jd5w7J%B_0#+{S{ z2(6!gWFG?q`(ejfs+?Y|wP#;=*SOdJzyJ69>HjIafB*mY`)|bC)c^nW-M`FT{r`Wh z?yu7qr@s3AW7faUb)l;kg{{A~FZ4Y}(`PO{@z!|P2ty$gA@(2fp{st(K6W|h_~lhG zzYUM;?wa`T&GQxSQ&yC}yP^Ex(?rWj%Re|e7k#{%^)<=y{_dFFhg`1TzOBC2Lu20S zYdbqvzs)`zEF@hW&*${WBGV@8#I}CTjB96R&9Ae5JZt&sM@|9@R1^{#-&r<%vFg-* zwlnj%;`FOkK|gCIISR3|a@85xOz8i)@9uhKu4X6K1#Ijk4~qM0B9?Z~XKj_%N$t_% zlb^vnyY@=;!|%fHVuc^F^fp-^?!RF7h8R)XqJZCj78>)7TSxk2fjG?7zh<&6W2i zOk1?;X!5`1g*Un7t&}#Y9ctCH%)NUjOuM@4Xx`(V`bk_~$6`fNx9$n-IregmZdH{KIC%BY0X2`sCa)|O zeJwn5Wq0TnYq?sB8K0c{6;^gl|MrJH{>qy$?5yg=w7`4@MUS! z%8H}f_T(&MM{1ffxzsB`r%Ua)Mi>+tOco!rat3Ai+ zkl=eOy*D4-7|U0FO}hK*O<-Ni_u@-Bp~Y2S{>ePu=6!ts_jNjR3Z6|%iRbyaNL`d& zU$<8L_eUqg$Upxat{+smeOKXcy#DN03XW$IyzA^`h1b3^`nzAUV`+`~pD7~Nf`=a- zls5F9y>I=+>Pf4j_5@vBU$<{(=$id)KTKb zr;N18|2rky7iQd_^TCbnW$XXe-Fxrtn{oE6eUm1q#?;>*RthL?{bjQwh-;c<;(xu{ z(SH=HpO~~1Y%Kq?`}WUM8yE4}|2kPOf6md{P<-KGt<>cI|J7H0{k3Pha)E=0>fIk= ztY)02A2hhP9?5P}*f3$js$-8|)qjW;6k+COs40v1I{A+fckS!1P4R|u`N z%RKzo_6I8#rY?v(KP5!SZTSnOr(a*mY&FyRo!qa(_H^mTUGMg2{C*o(qxiC-<%5^< zB72R7t;HMuPRVi3(>aiTP+)uZsXd2g2N(U{`~HJ#n59|#ggu!nYtpt(4GnxaJ0>c= zbfv;RAInw$RIB3j8`eM2{->|rX3w#p|II0%`%RzT{WxIzx?lM5*9VJtyr0_J%znhq zdiu5#R_DVm`lSErU-N#l+T?wE#HNL%y^oC8`B#Ylj87&@c`N6P9rg!PKV52i`~TCi zlK)pf{qtX~|1ti>-Ur9^qQ0N4>t4CX!g$(sqtx|(mToYdaOAb>Lfbf%^LAw}@t!}F zBxjcy{fIu@)~B7bCfgt(K&WMZT=AOS77hQ8-Z0$U`?0|3u5ZnvO*)*?=J`Kck9o`M z|Ia)1V%?#I@?EoDcTK*pSJ(CY_5CUB=l*`*w&%!v`zGPtyTqP9tYvhcy4}$I`_rDH zcL(!4O()i`54ih%eea7K`A=H_f>5zBPwybxoc1`|VpBv+edbSrpHI6MpPw*ZcJ` ztIwt-Hvg^uaI;kLhQVtQr*&qP4tLz2ufOM!I)(B3zU<3?gx};HT$Y{6ylMRh&$H{} z=X6^e@Bh}eGiHe-qyL&xqf~dPSEsM(Bu*;gR^DT~biH&S<6IT<-`Zy`-?ERKKK-i3 zj*0W$Y+Jix`M>I%1L2+6Gw&+6SbiWv>3uVVI{r*$? zbmiL9jq6YE{qaKa?yIcHvpmg;{;4>rIPWQ}y3fIRWPjAE?(XY5oMqmx*d18aL{;#e-H~OOz8St0;)%`U=KVN@ZDIqQOAuxI4*2zZ8U%yhg<(qLl>iUYE zoObqCt8N8~M8B(9yXNAz#?Zh2S?bs{Pv^hNvNXP|S-E**it_sywZ+d5-7hb)5k?{*9y`Qh&q0`CcCH15DtAN@2 zy+2o!r+$r_vvbu&^Hqv1v0qQ@Ix1}5aOv{m@)ydxMZOp7cTB%_QC#c8k?DF-`_(%W zl>>h}3N$o$2sl(L|2-kfD^L^T`YT4djy>%7P0j>equ|a=&?P#@9cQU_X0a9(YIy(u zvfauud*3hhUAwkcZM}N;*7e$N8-KseJ`(yr_W!G&_0iwf#_d|A{dIlMy3&<-@7vE6 zUJCW$c>R2G-iDh_P)NZfA!pzsa-+;*XJE*kYjP$5fF8{LV;V3Yh6@m&zol-{Q5VQ z=HwQp{5r}f&~`9-_3_uT#|_L*iG?jUQrW7qb%mH8Q|gK=L+1?X-Vcj%cigr$JF=N5#GT8K2EA ze-y3s@)1ki+4#ue`$CC7ot=!mI*;(=lCPclkeS$us-mB!KsCrfrpFtReeRfU%9Ggn{zvlxboy9 zCOHl&9o*+F68LAOd^~yFU2?g~($MHHQ7f z*@ja}6fe)c?EhRL!==ccb>XQt`{#RbLnS`kMdwpLy%IUax&U*U{pg+L^uIPk)F%aG1YBO_60jEBFISd5ILYwV()7>kHOuWM?0UOm-qxbm|E!x6^(L)<6cD!Rm-5!F z=k~^2ef58R?7zL?s^5bayZ*9SU8eNK?q7acj((@exb<>d&$zlA=2iM5KYg7B$tve!a6QyTfh6LSNthu%JLJ_I~xM`9JfOIN2rI>l;rf1WH8h zXLICn>b}jLZ8he|@)r!y%_SQPG2MTplJDo<4rpm*#+2a2A%KZA-dHEk6rv6%)`Ty*ar)S^Ce-_)akJ~qNRrUKAfopsAhx>j% z)qTxc|6RwbI{ib9yFNOtPY(HgT_!~Cp-#(Z|CyaLepY-q_-c=6VDNdF@Y;_$nnnL# zzY=|Rv^(y{$ye*^9lEuq>puP8)^9$wY@_J=-CI2F?N)nPW8t_(Y74u`^%sjpR&#Hb z*`h0ydSPLTP)nNb`S(@!hyLWvnyC2o$FKh_3p{_+E^QVwm6vR=|L|XO-H*Z*Zmu^T zIqXkb9`KsEX5QYa-|Kz<1TX$}x>M1-sM0{d+pUHU;myw z-E@7Jf=kxic@`Z0Pkug*Z?Zc3{)_O7_v28~EUQnBBRFFU_0WCx7Do`|t0I z9jcob{P-Zvvi?`p``!1hadU9oU-mxbrijQXzr>!0KR4bu^vmkHqNTR}n|VTWs~Tjs z9KGAWtK($&{^X`MM%T*>PdctyOEKnetc{(%=rN~79KVr^pX&hI~K z7iPC`^POLEer(%=b5$3Y%vCU!DlGc{iTjk>8y@R=*~`l~j(xW;-?lOQ$N%g*we>Id zmcOrSsoJCeaOxjj17nTU$XzD?;@=idHsVc|Wk305f_c5-^#CKG;BbYe?q5F&w)b@( z@1I(D{~uR+%BhowCHAZKl~w0|O`4qg&#zTd@pNml^zyymn;h5Ia4r2BetgFD)a1P% zR&!eZJLG0&K4VVK0fB0sGq%xs|GysV_PQ>S9ava;^zj3>%I`0Gp3REXW_-+{bL0K# zf4iiwxh>8+%Ren(>%CL2!=x_T=XB)j?7M56TiDy9wQ%;6BtftTd|r=<+(;cRGFJt?2O+ zYYwx`y)|Wn@7@~?pY`KvUte!}y?3Ya>9_?gb0(+PcL~pmet&(=H_RKw$H|F!ft=U)P;TWOfS*wENfgTej=Q-_&2G`}1mbL#s z?bkMY|F+5K#qr(y0@*^}bMFl6@4viX;7Pvrr$2GUUD{iterY(&-uV2~R__-|A?w4Y zT}*o48FY2k>#wKQN?v^(S-<>nxnA~`nxA$1tCza`Sbwtc_59sSF24V&81`b<+cPnY>VuH0q5s_k8pdHiLu*N2a>Nbitq z+mofyKBYmbzet8LhEZVc`bTlz`<>(Whc=&|v?R>)>Au}A*XNZ)KHZ%FK_!3Id|cGn*xTW2QmyC0joAt8oxB|-Sqwge4$jL!-fk6 zVmz#UjY$habVMb2oDv1K&YW6Z_()SI?%k3#dwRX}BK|aA{}S)K>gV?lc3ZcHl;8cO zz4iPZp|$dTS^jH&ty6H%{&Jw?goJmzozT~koM8VYYu4?b_jL`!nM41*&hYM6`CFh+ zbHm|0lW|GL%xMQ-7O^(zux!!wQ&AA(PyO!5^5=PycN71on%QBQtL|*gtKD7LZ})oN zU4{0d7)9&wI*yIIPa4kXE&Y1;Uxn<3^|qg(ER9_9;AAg<^b%3i7fPqI zy^4aIuD-vv=IY)bFTO-g^pk#a>h$wpjNk=-Y9h*-OotmbT+k8a=5#s~u)sq}h@$wX}2O)bWNCZZ5_*l3j4!9`Csm@zv*7ydgbHQ;4;6dj=vX|e}1z(&hFXr$ID%w zK7Vgt&R#$3VQ;zW`8oe)O}b*@)#x52s6YQ?$nEt8P0M@UOZq6i{*fm#@m{6Tg6tJ> z8#q@Tx>GB!FU`o=-z{bAC34O1xv(%(h{u%c%Qv?x$vjAOvYyo5ZsFzkRb$)9We@#0 zgcw-Tq}B!W+p%AFy6Rqb+T2LF&4O>qyNzF=0iAn~*@~!|?8PDXXr9&D2||YI;gf%a8eRJm0fR zHCxtSF1d5JZfAbHYE}FHV7_UQe-guO&YtW2t(RAye&peUi;sWI3isW6>5l56nKI`O zKYacF;hC0C_D5=^&*-Jet7lH7n*h5pIWEp zKM9BqYn7bg5vy_G`udHn-{K2|mcM-T>&#Wf%-gfr%k%TUfc-haM?{#Hqh!tW| ztWK?o8WCE8oGe~n*M9g~B07Cq_ugkSP0w_%7gvYU6nQbucT8~g)jX2 zq273fon-pcPj+9QY|XXjntew+^Z(^nGRNmR-k5k{*~;Z}swY*b?|vuzXy!@T`RwM= z8|U9Q&z0slZh9j}-$yy-`L%F<%OgAgoeuB#{N(G0MJ-xlygAGKPc2YUm@p}WE4(T_ z&-(3(>*tTl7pz{yV^_nL!R&CS^^M~BjUQXqE%Pt&I_YxjOh!w-b<~b`4#{%8PbIqno46|PpMBMzGcd{mq{nH~yTuFs23mk#xuqqSt|nbUVqu2E_H7#sHa zLu}lnZ<*`P&OW~8m+*_}Y>V?_ZfEaW^66Li(&*ol{`+&@TX$c}oN=4~m;ZJK)=%Q6 zMRVqS*(z|NVQ*MwPN@+8w-=K;zx-Qv%jmq^^k3{^W=db<)=$$pEhGLU>Mm#YTG76l zKhi3X|2z9%UjIq`?0+BPPNX|aKE3s5!5Pla6)TwTa5ePXZ+#-_!kFvhe!4a~O|P-( z!k4KnJdgS=ZdsiZo%%H~>Z8}Uxi5EV`re3+sh$4)Zv}G$qngMJH!&_&m&SwzGggRk zu{Jd&ESREW&c)i2m@uJ5MNpXg+NU4iuO@v8eYz?pFYoE8mJKzP`#Gwwgbr!s_X|Fwc zr2g0WS8|N`Ey21kw=IA4|6x(}a;drV=C!`5nm)7epQ-%YT@R!dZN4Vbo1DJy@~Wwq z!){uC>b~|?@Sl4R`;Y9~&)$39JU#1aw>z7o=}Wo0UQ^*~E~9PcC2GytHsJ?W z-kN35+`z!Vbr{T+`k4JOyCYUz?R!_c^Op9XQ=jil z4~={F!F}J=$IXlfW3KHi)d>~*)3CtLc>1xhyVeEzZr^pd+mE!a(rsK?Dlb%?$adQ6+5JY92<0Vd?HxQ1Oc*1Uo_zbi ztaZO{*v6V4-l9+byZ_r5zdn5R$$Hseo7@5_#3YhW9ez{$ou^0T&x1(kBhlL@e+W3@ zG1D=<|CDam>Hhqsx4-#2GEZ}4XPxk=Hfw3RXxGQ+BPAz)o$9s7;;>X>miyQg>z>CZ z?m5Y1>#Ze87tj5)?>t!8a3EmGHH%X(dc7A}SiG#LpXYHRh)eO|mtXOx&b;PPP!Y&o z@^x)>wAZJ&pS!yh5AHc|+9BktfqsEQ{`G8! zS3TKj<@_u2^+oxYaodzUCrwM3{+6xPVtt^on(?2i{!NStX}QdD@siUb;~z%vzWeU* zOW~?RPRF|WCGQ?jk`{fm#VE~M$?W-!H@iPuy%M!rcgmshWaEin;Wd1P&y{|hntp5h zhsRvr(&p1rzlz?EYUY=&N!WO_Eh-}C+~(tJgcFlb+9*lSIadAjOZmH9VOIYm{;$5O z8{>X$wdTA_O@-6D-q+4=vcC6T@Ops9uXU|`CV`9>G$(F9WPa&i+>@ufU&ZZ@d-^w` z^35>uWlo=aOCde@G?q0zlm?#WrNaR>1LwL*SF2rR~O~h|9bi{P4DTc4G^0*|lX+ZdoQ zZxZ`trm1`64$SA_TKy`9&1PH3{==&@Yj|xq?}&YNVrja-aVcwx$oleY@qb;@W?3`~ z+NwMU0DeN(wL+!IaVu2pZR8CJv02< zJ`rV(jaQFLda+#&v3R#8(Y)yIsa#>V>USOP)z1zJY~Qb6=vErCbP}(YLrzlHRD+Yr z`72-Szr93iztr`Uh9UDG@kW`PzxnBZXp?NZsMy(;4}%t;wicN%+l6Plm%Ll*fr9)0 z)+P)%99PhVQ_2WzmrqCBV=0Azts6(J@+d5P+HtQ)ikyDkvF7{U4Gv6MXQ}z8|NCOqpPu|Eo89)lr;#!K?BZgNeLNSf zre&yp=eZ`6Ic1B*C*hv?zgN$CddPnI`u)92*LI7%Kh5|2=CSv6f&BTgi8F4reE#~0 zt=;8!@VQ94O?CCHl}U0}+RDRbuJm3q|KIbe^@{QJvm<9r{~vH=N!zRqB~mj^JlX%B zUotpK>>%Gn`QBu0L2kC!(lTdi52ol!>~>$W|5sc2(XTTy8XGpWe_N-2W_R{KpIL|1>sy@x=XSvs>p=)iw8@{NPfQ=br1G z^YEdV`qOXC*H5ID*sNIpV_kNlK*IN*Yxw!q^1DS-8{(Jze)??c>7Vsm3!L=qoZhc~ zRHDD@XjZJ=6MOT!`bRhpJwN`*A}=+zcHh*C_3v3Gbx54-kDu`A@$0i`-}b#RJO1~_ z1oueE{WoG7zyDqK_S~{HLR>n3PhBu*pU7ffvd_fIwocMp|CI5EMx&?I`h88x=cla` zytuJJ?sHVudG`(0OV@b(k34Q-;2^wZd9)9+b;GfXr$rWLw7glXwKV&7P}G#KDekLJ zs2(e>$}hbtUj6Sr)7jSbd*>gyH*MB~RlBeI6x94|a|m&L)cJ4qLDw*a70bM4|J!@~ zOcAs83X@9{#iqfo+mf1wY$83>y@kZF-vyu z+`2j?^!ls{dHWWPEgsju!_4ixWZ_59?n0Z)$HW^y|rvM?9x>KR#pBnU{K#>rB_Si@9eWD=2HrJ^aM~ zwzyCK6R%{M$r*j!YHd#GgPtoNUo@TUC}H<+p5VO2UaMXwtFfjow(?vN^3>?H=c#Mk zHcsa}aL4(}hf_aolkQX+Yvq1Eq(1B1+r!>_f4J}GJXsxmP@rjY%0@^;zpqd?w=De7*ng6565H=@M*}JVPuSxA&e7yMk4xM+g zrmUY6vvOuUi=W`}ULtH`#cW=aGZW+Mf_OBh@9(ZEDG zmuZ@H${#w`{CGFDCvZi&+uG_Mr}oAw^ep#!#FEUSxorR1x}du%Oa8sA3oQP(>+jL( z^+Dpblc!tS)@_>b|Eo%p4Yy9Lz5bIgoJaq-N&igNncuw0GTD9ke{lgn!%u0-{59QT zUwuNu*3V~q7W^}!L;dRU>CfLEnId@a`|_C*i=K77U-EwU$DikPc4i*DI_v3)bgrh& zcb;hc7fIUQA;w}c^Ih%6IXk93y_V{jTA6T8#^v`nohEkamCp`8{F%%0=-oQ+ck92; zfBGdu<%37i?YQ`z#!DyoAN0@qw*Bi0iDn-6&f*<=gj>7%0>b967Z!6!eHz+R^`mWS zr_j^A8~x%fzuLsq-#3llQ<9*=@kmbm>E-D>4Qj4t8h6+DMgQfuDk^@u`szQQ>+koy z3vu69-4wfji3kVNhJ{XxE&ka4o6zaf#mZ#xaH?}#X-7fz;dWz5+Ydbp8E!b_@vUER zx-52cL&20#0pkK_tGgMcBzc4u+&O*ldBf+U$5Pk5 zGrm;$L)dSI#?skA7nyym4yRVW6JDY9hi_(iv-<5XW&7j)Myek9b@bQw+SPKeu5Z}L zB=q`6!0uh4cghq`UlyHqG-?07f@Nkq|MC3*t&U0BbYrn-*8vvs*n=+4&c8QCM0OP2 z44kqrs7aa0L!g~r+WYdw9{7Z3SNwP0N3Yg&fye zd26e~?E}lp_RGgsn!jeP+8Q738FXc}=;TZ0k3AeCGf%%`UBuA!h=nMg0U{YSG`9B^PYJbSx-`*QvEhDc`SCJirHjRc#(nOE0-UYWY{bLYg8 ztIsa-CO`ehp(n*18Xnaqq{Jb7it~i%mkm?TZkW*E@ZaiDwdIfU`i#xA*04DLl17L9TrE?yPw(oL#(*fT+pI=Pc-nwZInU>2L#-!ZX`pEO^hf3Q- z|No8>p)RMhS%d!VwO@TC-qy%vk59xo{+Er*N*WKG2)w})7W`m>`gVrGV8_C_kv04N3#c31 ztuNiLqR7U!FGi`k>Ee5VaPd6X{OK#%EEjlLdv}MF9A+vv+@kd|_He0G z#?@(}|0Yk|aQ$eMrf`qHZ&1c}r-+W3%0j6dlIKk?O5SG4xzzjGs{br=kJ$EBln8O} zzI%7^dqy@X-RFE%Vtg4G2Xv4!mQ_AJ3OcN|~vCy-C!%*PK5NBQjjPpdGD2>m7~^nQ7Ht?JXqJ&WQv z%KP@+iF*~Ccq>&_%}@Hn#iWag|E$-4JigCD`Be(vSH^mNlk2-2BO32)I-|V(@2Oa! zTCSjH_rGFcijAM;ID`U&* z6f3J8KhDMMu@w|K(kP*JaDh1^V~m=5-~N9JdG9{XwbK8e)EU&YUGZRN%)amO7arxk zuipQ`P50iL{5?WaEonC9Ulg{@Y`c?p^Y1CgbpQCKMN;!tKl1$i^x~the5VeyY?>eV zePYG-rRN+IqB$cr?f)-$xoDPM{Xv~4LQh!(tp92KYu<2PbJm7Qzc^Aq=hzjk-{X)| zTko?W?)6pIvrH3hxJw&Z{66S79MjgnUM43I9GuL-uVz}@Gi%bm<<6S(=e&Ni;`i|n zy)v0!MSeaLkaW)4%ssdJ>NAt_!&T}@cNSI}M)Q3$DB2Ko*rfk|y{WuI|LNQDb5{E- zTXlM|aXgQ-%_PZ-pDNDn-*i|>qn^cPi=>X>>3P~0v%j96|4{V$Av>-n(?@JsSEjt( zV^YW$w%oyD|BL*Vdg{y}l}l zN518($C0Ef`&MxrowH-I@s`Jyr~h6`*kmku)ob_PchBB6%48|E-&cRXx_7SXS*{cb zYp$$?U*>JT_Wwuy1QFA(M$G+x`@lX%8sz2lbr8{z5GIKQ9vPkDC!V0zok-3(TzZtwr?CCfJV=P@2O{=hd+ zZ*1g}uTZwLkUM^+<*3cGCq2b_+XG^BPk6VjTc3ETu+?!|SL6L8trLBjayK+MK4@^f zia+_aT=Zx5L6IklInO%VH@thl_xzv0M)?5K=Ya+d|B4)~dM{f~Pcr;nYe3VM(_weMGh&z;z3o@^c7NeB$-BaQ-DOLDFIQR2 zHrHX@WoF~^Ym34Z%xjNsT@$qF+`g(UyS^n(y#JSDR*6%vsG7#B{dsa< za6Dv|n4Z7)Q}~0;25shVY>r%EVa?rfrSR)I*)KiEs;ph4b~7GwjlVwoJp1eEPv1q= za?9@i%JP_Nw_@DMj*53z_J*F9`&kz%G+8FmY2q4YF_#arzl47C6l)%7<2 z{RvUYH13;a(=x;#olwx)6P4u`U8lY4>@4Q&%<#~y4R;D9rpjG>eB&0&j^6d{d!N2Z z&|W5+)svT)-5x)+`svRWNsES{@JB^K2X8-%P%9BK{ZUbxzC?e? zyBIHh17+(S?Yh0yi%#v0Dysc|!hio85vfS2NmrZd3YGOBB^Pjz_D!xNl=gl3H?@J4uT!j5UZmQ4TS+)L<+G7<9 z5oKfh36sC=zcc^#^rqV@gkMWvtzC4*UF%3y!E!@)^;QoK3)Qx-Y@3cXeEiEe`*yyF zg3ydzf+yejHT`~OfgPl_~@rUrL$*EG`v5zLn3@@V}J4F7v-XVckFST5VGc> zO5xKLKaVE4vwcq1@5#)*vGDU>|7k}Plf&Yl%n0CL9A-WD7RRe~`D#Ktc7K0&$XBvT z{k`(TmtW49TW{G_(SBU`jpPr%FSjcru3vxnd+*Obd%yp@d}fK#!D*9^X6>1OEB5*X z7wO3AlPB$F*|tyfm#kTnuD)*aGR{xeXZ<_*>2B5QVw=71G;jJI|9ib?dPfFZ@zR(d zO_uq6dp<}rA8D1EId3mFYoy7J^=hxy$Cu@tm|3?l``Bc?3RCgeWwWK^5}R6h@*EiE zi-~SpGiBOD_bq}I-BBH1{PvxXx6PjX`1jI}<{93>IhW^5vlOl0Tr)p4`#Gn^#ih6Z zE!5EqtyM_pf8s+Vb6wg`yzr`-U@8v;-1mRZMI}Q)?J$ABfvSN*qX>WMoeny5# zoUuXhW>!R(Yp_=LBH_QTVmzYH6WwnvVA>uVa4|FC?vAZ*`OgcKDaSrcblkzfz@y?g zOP%u+YkBDlpNR9*kA&;y?X~w7jVaUr8+G*VN25aRYZ|jV&0F@LJ!p{;ah&^A`1Zsc z=V==z$3b;*V8j|A{O#_T|8wg^B}VPP;v$nYea2? zL$~<#ip0}AKI;}1{8^+~F2Fa3?~HEqihmVZznd;E`EY5CjF1acjl`W*aWgD`8tYH~ z@N^Pm^g&ybZ_2es=>iNa3=Sy@404QD_RDYv2rx0kBzgzBE}0*=vo_&Af^QGm5`p=H)mzl)rNf%mk{eBds{__~GmWyTZ{o=&RG(EjVuN*9@ zTwh)-*`fGo=OM0(zc&7hp1=F+x>xU>$=+DOT9NOO<3DT1|Gwt7-QOGEG@QBO%{5`2 z+`fnicY}Xo4pW-kS^AQC^&{toI7PAS7kE}Rr72cecB}ZG;KK|d7c5x0gydhHmRKWo z=UrU<0e45;vuUcGCW)<+4Dmnckx43*#LuM>@T9&&64-==G6Txp2N_6;7Yw$|I?lJ`s@E* z6)qS4n(=q{Z?*Uj0uTK8T4XH#ZK|kPz-D)-?bE6iGB0?RU6}VFS$|!if)A@r0Y?lw zL;noX?JrGpJ=EnI8IQE8G({(5t+8*M{_tP-(-YdWoC0R;kDB*;ddH9cYis85a2_>M z>gQzq-7l8DNAz#s=`+WaXBtw&z z>#vUYzW!QMZ_Pc1oG_`)mZcBOm<5>*$wj-{8l-N~iaw*RG3Vgbwk^oQmjl_TCJFe}`Gl7)qUN>6hl_o^wR;=*s^Ib0%0e2W(p2@bvxy;o5Ey zvjPSiIUD}t4G*oBu2@uB`@QQOn<2-GXtm2Zzs)DVP!Q;vXJVtZ_?%0DGK0IMf z>HR$q4+?LT+Z!98ZF|4;)8o%sKWneXs6J*^zMyu!$>L@I{#3)6)A!y?`dLsnJM*28 zWsAtn`u0Wh|5p_LF}5qx*|LAu>x)I}^n&XCzur*&v0wkrw)4|}<_O9}JUxBBzoTfw z5?OApxW6GD76OX}Oq16e2ezesP%-=}XgJ}4TZWKNPW-w1mG$3x%mNvuB)mDc>Y1Hb z$yxoAcZ!63BGa-WNsGf;|Nq2pcH)cu|)!9vxdbqW3TFup#9IyKK`;Ohz*|qyi z)cQzYqf z!N`e?N%rENGKP)U<-07tzTh!i`0werww~Sdj(-eZ6MufquVu5(sF&1m9-NY{;`^({ zqi5>nbw|bR=NL%ao$xbIuy>si`%L3h_`l)}zY14Rs;vFr=8}5qp|rd2lz%6mekw7A%seSSX@rYu%DAZ5%Y`s~!hSyLbNFSmEI*7vymF=d|jB!{;4E1e%( zFWlN`^y#PFls`@)y4rXDEm&9JAo$Zm$Nc||d7s#-SGV`wH}}2M`tR$>Z|{~|emJM~ z>C*fO?FZvK|2V2myVAtG{Pn?-iRToQ3wjr_eU#&{`Ow$g@Wt1}$0+ZEwEmK)2}Ral znv6==+FL4ZH8$>=TK%y9MEV*Rw9G zuN6`#Ni%49u2?2hcWiOu-!CeXBz zL(HaYRd2?kV=Ct|1m-=HUR&w$jrZSP?>)jdQq64+`&{stXew~hZ^Ln2?JtfCIW&s` zYEpmN*B3^-cC60$F#V(HntfScpDkBBcRqSn{q-Fxy4ptJPzhT~+?GB3k zYt|i;ob^lj(S_6Bi=Hi+n08z$>m&DR=5MABFQukUNjaqIexqgQ-F=&!l$Cw{ui-x6 z!t7xz`Si-xP}?8#Z*y+_aox-~!fK|P_P>>WAtt5mn=+Oy398ZKV@)b|IHMiC!|qe< z2@#`+*LxYh?OV;2!fYB8@x^A%E#bu*wv?xO3hQ@s#1|IU+_*9`tU_b$4E@5=H}mVx zy6xV5dBvtn%$uX{cyoOSFk1hmY<_fj(wXL*ddU^vmmhJ@skh-fGB5f^#J2gyEIY1$ zNEY}%&FAe)^B-ALbGTA118e5HE`4Z}YvsJYfA4O-sLQb%oOgWvZ!k-Ed)y+A?|ar{ z$0nWpvwDl5X{q!1t@9bSU&Hb-1!7K5fs_kU%a~|8~{s_`jpYX`6kgrnl*`6);vfn4U6@L*w z+qw3Vu3DnsmRFN6`EJNbboG(Rv)Gw4au9Y0YiMs>;;oqvyRcCxW{>jBhJlJYZ=H&G!f6o4C@4ug8;(OI^$vt~>=dX*K zJE3)xUeu!i_wrAd?LT==77AUjB=4ae{`h109=Q)Qem~o>;(ye&X=*3Cil1D6#QpGb zjbG#C%+zzbf30eN97y2csj)wBntvmo*`H$z`)>wDW^kN-{dYlMtk9;vRrztVFG~8? z7CaX(KRvx_wb|`|oC}w)y}7Gu{}Y`I0kPQMP0+clbzxy_6Tled!m3r^~2r?x^h%u(`VoYS`cE6(^cx1!BpBB3` z&)9~4{29l-+53L8)U>j=|Myj$*6ua=IW^xP;9s-TtND_rbv!0qbh=YvI<>xrfq`q9 z<1F>gSFH65bG!sz|J}OKynJif#@O&nU#pHr)c=2Vm~H?1eN*p${~L2}?XNHM)~%24 z<8xb|ufvnj5wN_xoNYDx=L7qsxCHOjl-Hm8_Tzhxz~j$1*4fwbxUzru{qcSx|2qly z%y5;?JgcVl%XPM0%6v9Q-=jKt|8d4XuJEnDo_-2=x=%Cy^}VP~OXFVntH1xX{nn#h zRbOA$W`DhUYpeMG?@e-TwKGGMSG9&u7WkjNx73V>gO`!*w%C!cMjyk9cQB=}F5J9R zcKPI@^UND{4mB-$wV8P(v-<2D*?rmiYY+V3YkPI9Oug;kX3fNHOLugKR4q7QUoW4e z8Sj(*JwnX2hgqZU>#wYyoy!znHA$Ug@G&-xXSx$~p_*0vlTe`4oQ{3PlKBTGY}_?n zdRqVEUhT>q&PN|CW$ru>w2p6<$cAe{=YG~i`~7A#(9k@jUBA1#`I`8L4M&UlxB~AK z1T`;p^x&SOSMb-$uV&MNEeiD{9*C8c04!3m? ztec-U==@*fVWi1)>40&6T4^LZm-YW2OdfpJKe8`R|24tw)Ug%kGIOhbOud@Y%EV^D zakSMmps<6%JGzpG?+ioKep}(Gi~7$@c>YjJ_4uQ`#+!b&Y)tG-Hk`<2wV`Zjp-r)W z?uFOJVx6|F4Qi5EbGV-=WhpPpzTmn#bp88l@vDpet^M`A^tIme#*3{T2Q3)elbKrN zZ%%$-z_w%!_XK6}=1o6$ie?FNx~XJNT397fuXlK%*t`#$c@`R8JMdt3L!|%lQzv*L z)V}||c!YbS;+=@>hduY23}bKBe+`RRxOe)aH(p zOTV6ew>Rl;>UrC>{tsGi@2@jIV#EDxSxd$9gU65Sz2W@uoza8E;BuSVVYMcu*z4c8 z*2(H;{&`nAKcx9Dd&dR_0TpKf1_6N-35`nu7d9j;ir|p;zW9Ps-BG4ENtelS#X=z$ z&VZY7A~WS!Y|Z9>&55@Cc7)^6Z}}HT*U4QrSM@BpwMj^G)>pxYe?J>E*)G(Ne<=Iw zYt`3}a%=(Zjc;sp&eyGu;1FO#t?ob^SkE4X*?Y*e+ryMB7tUEQKj7pEVM zoM3h@tWRM7(YPaRFRH$%Zm+E}HqYa>-r@3np}|c-&H1}i-d%pM=$EnPrHcA?uVO(> z-5FIy|E!!Eu5uVHG5__=g_l_-zfI}&_m3J|73=?>5(xiPQ+dWnyU^X}YPfUA|MKdt zcel5D7cR57ynB1tlixfJ8{`;WnNo88KZ-ASSl-mdF5c+0Lu>EB3$sl3i(MDE+OB{7 zoyKj|TJZ~)Z++FYEPlANkR>Dg$x5A5RbPdRW+nW+|L{+d+vy8e_6oc>{Nc(JQH{s9 z-B#Y)WxJoH?M|lY{Wfnq!`jbEhSR^T{h_Mln!H(c_uO~NI(4F_i+0o-gAF%tJ(44d*AY{U713Oa*2~qDO zEC#+I1$+*>Zd$xrqCC&>e#^P48nb=+pWe0qd}U=fLn21)Oa0cjaY{b|FHL)Y$aQT| z`Xe+euqkNo-T6KnPgmOfkNV)^RIjyjo7%gdwcKyHuV-95 zRjv@}u-(SLQpU4ey9%=`8IGI)9BzvAqw z`MvklhR8c5CV>aU?|c5Qym%XY7GnYTBt^X~PYoi{h#eaaobd+OiRfA98lo}GG{|9$zNX_uaU>l0HI2sBVL z%bTnH;Xpt_|69ew(sS3YN#wY?Nn@SUv_-cbS{Z6R$hOaqR2Lb4|fe> zB`VJr-qtE=I3l3pb=z>pr29p+GP;%>9%`N61NMGv+ETvKfH&JR*vpzs0nyPH)%w^eZWU(!mRI@Avv_T(D&KBc7Y9 z%e{}37G@s@3;v2TtA8Jz zd{4M)L%W(*fMjw*<`G`TzFkFCIhUF{KCS%|o~3#6XM|wFkH;$i*^K^%)C=lGzGW$@ zH`x28wtMMts@2pl}J^8kZ0tHJt^s}+rbyS@n4Utjhm>`xPa|9Z7ERm%2l z<-3-dbsgot-tj!~rMLJ_-kr(n^Y%*d#>KBRYdtLBtVt2Cb{MITh^_MKKpTSV^>h-iO~6vgK%|2PggbtTyge0%>%&9(d2+dobsO5e*ppMFeonG;sv<`+}1bLIXP zAJI9Xr}L`~%FhXYuWMR!`v2G08a=Z!>z}WmcVF=J)p*v`)BIyBI5uwLD6ctl;2gWv zxmAz58-ImcJ~d`!5;=5xWCNa?p%eal`mHCV7H{1I&VoWG$-!ze?qesKe%mPn6| zD3@qcW5R+E9TRRgrh|?f0<=VgIzdM*x2OmzZ4I7hTpsl-&nd`td-3emSA`<}e0)=5 ze($*Z`}_7+%_c0})&Hr~=JvBYPcqH7E1%c1Km3z*cl9<^<)6ZWi6zs6#5yHEuAg1! zxbAk$x8wi!v9auG7hBW+*-c+0TzGGk%cH9s=lo!>Z(ky#pU}Q?)92oUt zxaQ6^JXu@0G;7P+TPu!qx9VS?<9L8kO=KdEiZCD3L5Bko9->^VPR)S|SM&spwniTB zO5Lbgx%Q4eS8{jPWan*<>-}$=+eI$g=e>3Me61^UieGeJe;2*$?Vf+)GfW;PmFK%zQyYUcOol~?T-aM-N#0zn|&I{k|ZZ8)5oBEl5N$fd~%?&et@LY`VC|?$u zxO2siYd_0mBe~{9sopg1z4LnAG!~d5C@z3;4nA(`3vt{@8 z&HAVLUNhNnyH`kG{y)D+PJR8R;7xDi-gSFzUOang`F(kzIcdQ!4qZR5ZqLL&<8Da< z$DKtE9;^R7G8HV2jI^nq6FlqXrTI&fFDO>3)Tgxu?X6hO(ldwQ0i%Y<1RW7&R;Ggv z8v=AxxJ3^&Caefh6X9k#w8&vbh*Ns4YO%rI!&PVF!d7(d3@;2Su*h>h_p|@f_Ll3P zVo(2)-uj^~{+s8ky4U6B{?C4rf4c3w-f8U%V#oG2-VJ{h!Ikw!P3hk!)4EIb=kKmf zoHTv*k=w67JURUA{>PcRA9+qxW?3g&`tRf3dM1chs6b2p#-?)RS4>Nb>m(kUb?z25_-+%L8_^bcjcT(Oxezfj>P{f0cRiA%z3upOm+3K_;An=3ViquF6 zSJj|B*E{_e$S`V&OvqBv=4Cq6upvQDMVOUok;94!T8_r^kDs6Q{k8S(IWt%7n!R(? zl2xVx7aq-Scl%gkHIM(~tNLel_AS{z?S+ls+poM=d*y7t`q_CR?VH7R+?ggDqcZ*W zCzYZ-;iepq1OA79wo~)5HP{+(!y@R)!rQZd{qx%GcBT8o(R#U4{k_#ZJ8y>{Yd^lY z^!}3o-EW7lDtz0R5WglpTUlN9(?0eV{dW&*cZkneSN+THU0=yM2AjXjE^;WAUW(kA zHOJ5^Y{HcFDT4Nqm#)RE(`asB)CigI#)Vsy>9E6w0Iy{3R;NZ`%eC4y`Lla`Urh^r zHT!Dlv(l6ccXvEjdb4dl|F>DproV=Z3Wuy21%+_m7;-*32 z?#JV#A2W;SKiu_2ks)y3e#i6gxc?r0y~q3h%^(F~TPyj(+Vdx0K4kc2w}Ab2kM-R$ zox?KfOBcqv#(FM&*%`XLMEW=A?EIPkj6^j;I*s0~va`PS<3fSM$vaPaIs`R0yi}dh zbh?(yNG104M!QL`&V*-X2~EqM!e%w`wg>YQ#j2^Rrf2xvc6;$dpqaBt!$Cr5`}}|J zK2N*gw}PR1? z@8#vCbpro?ZLeLoKkEJJ{k`#1U$4KuBI&o_)%D?P!&m>?`|F#({jUc1wz8%ja_&rw z;oGaX>D`GqRncRy?qPv-hxD#3@3^?9SqP~ee-yZ1*DSk(f%Tvg1Ea#{1XI4a3U=>? zkJ+yR*w$K@G&3a$=?dK8|F%t3kn!4a>67*c|M&f!qjI$C=$XY$jwx~<9{i5$FMb~J zTx;inzP&N=ukQYcEd9D>*RH2Kv)=3fzj`~|yz%3j3v(u#%Z4;wyj`^N$g;x8oefm##Fcr6KY2mWb}0d)emrtkZMFCV8jn7KcUUo#IjG z=H?I%;@WVuu~|2N!FGw2$Ldxrp8P|kTO*uRQNZyuXMw|o^(kRDg;rl)FY*0vlGBQZ zP7nHhzB`}o3hg$r3|v3?F*9{a+s=_E%l5WH2f zP;`UA3l_O@m&w*&w2Z)$kWbi;&p2Tkbo&VZx5Rgse+IJk)%;tKki~f5UG4h7rdm#~ z87C}K;!K3wJbX?ybULMq8WnO2+Epowerx+$7@`>*eqH=)_A1$56Mx$7=s)>GFv)|- zM*S6&gigT>$G5LTr+;R;)6?2Fe@d0qXTNML1iWgi0pEP-B zaqvDfH*7!#~ovlUt=9IS8q`lt?eI;*bq^yGKFiLU4m&=Ry_z+f%l8(^IT+qy6R%k#S$7=&0>)Su#0Ti-dK^VuO2yLJE6{HODseRzCFlFp3)#$`L0R=hDe zvoR#2F47c^>~? z_%~`@Xm{Jf&xm9*vKc6U5J=Sh!Ljik(=skryPP8*7#F{YyRWh7 z@7AX}7A?Z>cO-RcPtCf>xYna?32*(u@RoZT))Tv5>23Er9;XmFo4>_!l9SCHwem|w zvx8FpUKHn@w?6;jl)q7*g`3XIyd(O-`Tbdw86P~7}Z=E+;io{D5x`x+kZx-58Q_rYshI8M#}@xNq}rTV2k)6%D_ z)8a+ zSqv<)|5=W!_uM_R!|j*5=l!nZMc1`OLf4;D$bEOS=l;%?AS?gJX%}xl{J`nSpmkQS zGr!+1!l3fg!Fv|Y_xFEO>G`K|C;XhxzW=PptzyjhA`4RgzleEhcIT(X@rS3+PyN%@ zuAWjv(?+*ge^Gs-$ld5)7q(8eg7+Gi#(s$ z*L#*d)ShyDfm3m5zSfjI3P&!-ak8x2dqZ7xXN!nUXFy&?%~Ga*^9c`NZvTFb!zx+f;uJkk>$R+YElm*t|CZgo zWOn-N^G=Wk8=IsInhy3JX?u5NV6n({;Y$=|sB2Q8E8Oq#rXQnv>;1-(tO-NJkO zP=0~4%V}-HH|v_*I8VN9iLy-lV)E26^xX+I{z|)FA7YQqdd73U@7GJ$iM=dlcRQ=f ztj|sevzpOdbYs0$*I%q%qh-?1xjx7tZy9qt8RXO=ZuO|99AWt znD#UL-&1@hSM}+XUDFevSqFW5#?f6~nG&s9qo{su@01rOv#-9o?|t>f-d|tlwOFXP zTUR1&3yT1`!-d*%oCbtKdmeNu3BTouC$e&!H-Y%>i&I()!n`8RHRJo7FeW~ z208`&c+q_FXZOye1#d1*`c`n&WZ%p;tE=TgwH6CHPUcVT)Zu#Z%41RT~%4YZNtFL!&=Gnw(T=M7F zx<8JiYEm1D-be*=ZsB-%|>3R2K9xK=XzFS`6yh9~y`KS6dLPFezGiCgj-BGPs z?0CC;0aw1i)_r&h|^kLt$Z+z?uCx5QV;NG!1}(rrtV6Ae^Y+4ABdp4;|pmuQzf<+y52ck=Rc>^!Y2@9SL5&W#Hp+ob>B<@Eb-N2S6wE&JYYO1O3UlB6-ScSsd8Wtv@4nGzZ7r|fQ8-ukxt_hw z$wLBpQdz#|W0tw7)%maR)2V2fAD!&d(zz?*<#hG8*C)&=7y6m-q1<7*chQ?EQf*=$ zGn<=?nI8X4woKffCc9QkFpP)Eq~YqlBqpEb4)SqAp3K}%TLrHOc6TJ)|03A+Eb8&Q zmlb`xzCEg66aMDR`Z)d5p5L#;W&deyyJc7-aLp_2XQ1t>tY5GE9XmLb_}*9f{aKo1 z{`AQw(@GYB>-TQ13{+PBKI37=@2ULIwWmx3T=bOa1F#`;YuyJ#YWFedSmG-i?{OHukT!<^S+F)`@jYxlBdYU3Zt} z#;##`$^5u&w%fWl_XQp_9)A3p@pvMStD!ZKNVz$J6aAp3G@bz(JJ#&JFly-T*icp=MJ7T9?2!Cp) z*OH8Mp}Jw2VJd;_zRb~TtVmj3@L|AFIxm5hh12X8*J zSlkC&yYFHB=i@en#;^4(UG-FW@|o_CTLOYCZcqO{bYe>6FMDgJ{VYjB z&yPK47;C@pQ+Sv0I;!N#8IgnPTQoDb3R#<9Q(nLD_+*p)1&i0P^VD$jz7b*E*Qv06 z8&_LZdY;8=PsN*`b)&xWSu{D#-E=MLV8c~?ooz*@AIz}hJYRe@WVyg+gN(3gwg3Of zFgZ8~olln9Id@InOkM82^3SEKrygDF|2lrHZ=Lk_%f)L8l%-pyZT)C#XDRNBYQ0v?;vZ`KdXJYA(x^vYpa_4`)Q~&;J-7JBxE9+wR>{D`@ z+A0uG;8nR~=_5nu!}c{-*0y9gxwF}O+GgKZ|L8GOOW=4E^K@#^@}cU7zQmj3!$b-msE_3G}oF*>`>8*ZAUGsW*))5WGWg@J39o##nl z-u&*=Y{_%>I-b{K?zq%H{#qJ-(rMbOm!`ET3v1O^pL`wu`lkK<*YyDr1`}Rw+BD1J z(X>B8okqdam2JYV^B;^kJmbP~!#5KT{CGE8dXGrO`)$v4Q&${v3wU+G=vws!BdK|J z_Z}}+)xGq8XTZ!~s@jaMljnKtx#tmkQcLKgT-WtVyQQ0yl)3D@PFz}yz(?>V{%=eS-bUE8s z7&$%o^xw&jl4sHaUY(dC*LWyx-bQWV)3c>h=Y?qXh&*iHW&UZo{QbY2RZFM+(-zqj z|N44EyUg0@2mB9&eP;W?WMrXa^j$opd%9@X56+29CwG64`+B3tN@u44a}&GKEQ=4^ zQxcd{u3ih&diPIxQ&jRv`P0Ad_P$Wg?0s}b#rOo;m=dHi~_)s5Xm$2`~jsJHfxsF;>PE&c@58Jao4jboc}f1{r~3)?7_e0q&@W6G>dJg_TQ?N*{mAJ3l^+s z?`Qt>Uy4aUis6@lkl^n7(Fd;ZGK4Xyxz{NQaD=prCZw<&TBxi2X#4bY0;~*sI2h(| z_^9r%KP0*H)nwL%9~pnhEQ~N?TbRbc?!5i#eF0781u?vr&)l%@nW8aQQsWWNwyJv8 z$EO`$MOL5pdVFm1|JS#qZ?XKZJnrx!VdmqO1DnJi=~tg)J5rJ}`s{BUe~Xy@ zoNt?ViDkJ}@5$h8T}thDTxEI_u5;?GnO737+7!sRD{$wQd)4pm+GxP19gav>C3~f9GGbJHom_BOn?rbmix(ry zqK#~h=EoWx84qk%i4<77{zuaR85X8h_4)UcfBct?V{E$|U|P1tbFOZksK!m5{>7F5 z*6@C<`S^ZCotbSDk442?y_-9p>wgV-(4@lf@Iir8!-@!otm?;G_#TyQc@&%cw^PCH zbMu??-ZhRdwO(caIy(KjzV5{tSF}$bbXQ|vR=e}(_JW7So^tZbZj@=n^KkTXAFtx8 z5*L5GKJ;CyLgV!tIV+Yfdm3f&>2bq>zoq_v??lh8l$jNtEwcSyNB9+{9+4Ft)A?u5 z{#4JjF=4H(qU?{f7}kc?@I-b4y@Vr`ZfwW@7)9JTBCj=h^BKX<({wlgyP13Z%)grj z)5Rmha{V(6zPFrNYp!qNz_Wa>$A!m?8~$%!DEzQs?r;;=(o=JT?TqXe|9*YrkjFkNX|;! z_fNk5RWa_`se@rhCEKzl)lQIS3iGMl{xRiq%vKgTljz=rY%M*eFa`UVU&;0A{gn|L zT~igOui(F$Tv<{2HRa#?fFGOR1-^Xz>CofzVd1Ohf2n)_#cx;W`>Q8SU03NpT{WZi z?DZz4-Mjbgt^U#&&~b71<0OfP?0X+Pz4}x$#QIyZO@e4ujX>v}e_On}Od4lAn4a|i zVC4mc$t`k*{_Bsm=l$c}C3&`HuX&}t{Fm%kFK#Y;BT?|?r`E@wsH;s*K6&w%PCeMr zay8j|-Xe3QJgXSb+0{7*LzkRqUSZO+YerVa3(YM%A}4!KVXAam`o}JXZGGOpOLq@9 z%@gw0&De1`Z0Xh=BJT}8);@d%5m0!}%71M8jET7mA*HPW~KyBLBTaz8t zf8J)^de8r^)3@f2cT)8_PCa{|cQS9Mv(f{}@OM`qDJY81neu;0pvpA$*u^(e9&Vnj zEKvDa!+EK#>z=DSZo5{y*9n!@8BX7sw`!;9|1XNu-g6(bnRoiqLVukOf6>|5U+xr? zdOm%d_&aF&I(aF(*=+y#c0^R%>?(*&lwNZ7R+X=FrsIZY%~$(4+l5`Xb}Zyc%4P)-817 zRa3{`@BMjuTCUIi{!2ez@cg^3CtbPq5*mB&@6MA-ZDv~YHtEj$y9cGVEr0)EV`UyAK$j&(0i+&wcF}hVve{=&j|~k)BW*g#T%ssPhY5c_xc)0{%yKw zmbmxsxnt3>21^tk*U8?tzxVFUU!O~t%r9G)E!cO-Lvho4&4u@~-<(*IJ8AtV!^PbD ztoyd;FHpIEYtr?Tw=YjW^Y^4v+mny$1H}~93KrikQn%s`iL^J1XSEEOd&IuRFZb`g zn7Ott9BCX+A3du4(9kGQnqZw0m8IM?_vt34b?S~bXD3R_e1BWAsGswq(wRr`pQ8BA zI$k})<@JlDsa5j9)sTA=xSH?F?f&%PhMtC+G~4z@+n1v2=WSTR{~(Kn^NybS$&6n* zY~mL_p4hGB?smUvxH>DKN_C^`42FPhUw(v-D72owe6DXx)d#RIXDV zYuW7kqJHXLIsNXBQ2P6gaaI529q&F^(o^~Gvvl%Co$a|CySJp96wX=q`^StXW1scY z8jgM6>vr<6&ZS*upJFCf{q9)u>HGfI`S%vN{8;>B@#~Xv;hvvn*OwnUVc@tnQ@T)f z_q)%0YwA~g@%3%{Y25qYrqK8Ck)~*;81bjSO5U>kH(j*zmdfg-UF%N&-&^UCS-&^F zR>Nbf*W~|ero?FL_ipWc6A)1I|HlsJ^BPC8U;l~z^sgdxnnvjAryK9yZ~P(5cP#v5 zkNF`Tq}jUAubj*OjZLx{9CvsP%Vk_?q~&@q%$%f9Fct3w~FUaKK^e* z+bZ{+OGA}TW-$m>yWLrA>vczEW?1ZOHD<U=SH>PC%BwAV#8y@u5xYfx&Ub^S^4vuUmax>-7BVOTNC2 zY@H$2;u5Dl*EPQ`Lrr<>#K>2Tkw!B_=WMhNP~d#fu{EmH_`m!MQOA`WTU2I>UD>hd z;QITK4|1B~*YL<+@3Visu0(Cg)^|UDPv`hQ>0@l@uRYWAJr1+AF!-I4DD<;3P267) zVHP3SbbOJNgd|ItswIo!d4rAYJ#CsHe?#8QKWn&3e8%C2=EvL5E?OR>zI$_R&L+=k zU9IPq+0~txH&dmeJziydX=(8+oSVooX7%ZdjyUmU%3 zl5Luh?t)dVTKVhd@6m}^lc&5$DrdvL3Ag@whu5!-UoCGk>D%648aTZ+?48~o zvS$AL8%c*AE?15d*E?n#U!&=Osur zKS?(}_eXBN{`U9p1pCY{|Z+qXKSpMWuQANER zC-?5{Nk7_IEISHzZcIDfdTKvIgkz)kl9-ED5iV>SL=HMm4OI<#T)5?6#z)3xty_UH z?B{-L?^Pv5@ z{XqH7rmCg}A4Ua^WAFCdW31h3AajFHw&BVVc4i~_uf0-y(|;C=o*i^ew)BX5+_38iK9=oYLDM$L{yZ7bq+X93C%=@p(r{?c_ZMFF= zVZBAtr{C@o68rA4?t0zsIUPbXZ#Qk9ko)e}yo3A=x97<%+P&jm{GJazf*z@L);sFn z$}ib{YqHMFZ~2jaET1_{W@zhncjbNNsQ=gGroZ>@{Rn-*>I0rrI2LcJWw^v<`(Rer zl4KRnzJvhX#U4dTd+(=P9p-e%I@*8r@50=B*Ec>|uxeW6n!7GHrUq(lU8tiKd})f* zM-PYad*+YhuYbO-aN=OWUlZ?~c+-|&Zy)gZuHXIp|I5Gbk3KD$J89PCli&PTC_cVb zdiT?jU#E73FF(om+VP^%n`rkei|LMI=4SW^K>BMq&`6u6KXKoUf(ry(M^C zo26~R;`j63@164K{Pca3-d{QNv;J_?>`lHt3+J5SnP>Twb?HRkv;Pwp%I#Nl;n=-5 zML_PkHowIGCqLt6^SPZ~`mlw?YR1i}e=794o|zj*eVBgSvMhATkIyg6JJiw}OSM$B z%KP|z{vGXF>ESfxQI2$m`gHx;g*orue+k?DxI22fzJ2PJUtjY+RX^Q7`S5yy%=%?1 zv9jfp5AC|MaQ6g-JHPC<#BbYLcYW8UiqiXOCdK+2t_AnLIw%;k{p-{fm&8mgHdIb1 zdTn)3lg-k`;W9^E%$EC_9mWnP|9+0!z`BNE*_VTPTvNO3wp%19`7z78WKQFcVEy@! z$$=-8yEcI@FmSV+wW`V=jYg(`&|}e)Rzd#f~wlDj?>}L`$FV9WcC%>)ao_9#Z z>m29l4$P1}`Kw2*|}H z6{Y)^eA6DwuAW-Iw)}o%^}Z##vbQ^Ux&#H9gdYBVcJ^%H#>GXITr8@7EgZ@v&ldk{ zS(9ZrJ>-Pno`3AwLe1(=90lKr7=;@tr6-Ck_~Md3@3o1kn5xfYQL6V3vHq2<@dxA1(klz5-)mt zH|YM0_0ccam?@V>9JsNwewFw8-Jd!wPv5jzeZA;~^Q>~G-?um?E|U24{DY^@mir&# zly*IgJiPPK`X@g=g$O1{KHfH6#j!Y9J$J|LFY}usOWNbqcb@wcV)Xa!)5%XvOkyMp z*;1#4bF&|9f5q!^#MRJeJCElIN>uB+hm^B|13!;I(5I94U1xJn{G6`Gd|plNf&VMp$=%a+H%?dnrx(Y!r}OD! zms19Ft%{$76f+?lZLcRwjT z4O=Ga`teO*=8p;OW(ksaJ|0?EvcO)w>lMp{4ws8J_Qm|vj;Qd6@wn*{#Vg5wxJ#-h zfZ_OlgS=mR^WJ>EUA8|&$lc1x^7|f9BYE}mxTejkm#)@-_BS^E#a#9Eo-fuuwLkq! zS?HkYcQ<{{daduTu9uc(i|&aj5$^KXcwaBEb7n(=D%20La&28IWG`~I%J z>i9w8;XB^CnODCYEEjkp`ucf7XL2|YRM(m~}Lhvlp|9gpt3 zD|bD+>)1)bv7n%pD|Bw)k~H>pYpSZkKOQCf8tr++cS*UdRhLw zOD)(c@o8W2aV~{V3y#*VEVFyg9vO7)zY)(1HNEDK%XOV&!uAUB?En2h^6ZTV6BK-z z^HcT9-7Z^ymey#n|I2qPGdg-dZ!ep#(si?z#0@$do=q&#l|3X>qUhDQjDwAVC7+Y~G#ksKk+s1v%Ui`W7kMBaw z|JMg1x%fUOd-+@~C|-D^^NO6vOqj8%Th zPmWeB{jo-+<@)uvRWIwN?G#f9|8@1&{-ghPru;p`a8^RNsH%%eXZ?l-i#^%v9=esr zTzCEdZ{MmfIxp+?iWlXr{U}rV+~B|j)+rtzG91?LOgOb)-OuaCZeGCQq)slzb&#v!Mkp8SMe(;kML^jp6zs_68dnHhVVGNv$#Xp#C<>I`pTMP zYW=;nhaZ0kJ#+8-A@zn8#!LUSP1#(SQ>9a~MXW6$JN@pzo>_4Q|JNQ~$~BQKLg`+S zguP1A)lxgRq95 z<(7T?+VHjUVw~JZY`;A4WD4-Oy?f{T*Ef&e348Zd5-R_udA9b$a)8behvnOt4U~>?=CksFbI`s5-&ava$Z%{3FWQX?8jL-r6mm)Nhsa zK-oUMdifjuf=iDc6?c5N>9t<|PE;4q;ji}>#@ojp-qYs9@zjL>Mp5X6`^L?i}#D?m8(fvRUONFr}nP${ndE&RgQKy6b>|hzVqE< zx3j~koohF{`&^lAA@O1Ex^o+LZ(I^|sfb(fo%|~ceub0Ww>y)Y4tQMmvrYA=_FT3j zcm0nR=?BX$g?h~V7ypr)wJy4lTYt7k;WFD*&m{hAyze?^%Di{0n~bg=kPO|u?&qhI zpQcuF?(De!;85Zh-txJ=y{S2+TDAYx=l?$vuCnLu{e_0*+uE3fVm6+iQ(niEFtdh9 zmGOdKFw2g0A7&mX;1a(q`jM|d(COG+m;N=Lj~Y@RCOpwF+*w-i`B{13X8*5TQvw)N zg&0^?7#OmhXeei8xuJ4m>iiCo^Gl8F=l?&azyI7u?LPvE%J1HmaLJ$f)X!XcEGvhD zYvUU^U!$56E39>Yg~Hi~5Up)Li<+|L4q=aM{>U)z`uDuiu!c9eeis@s0l$;^q4f&zAph%}@4r$dSu^;~u36ag)j+1Z2wfA>UW(?q}h*CJQe)9F-Y(fd4Qy<^`#4RiHCh)KGqEIH5sdC~$ zuil-WUtFPL{&Zzaon6gWp6I>1SPFN~WwOY96Mwg_d``3mh+nCEt=e$t;u5`u5!`F6o^M!>u+>+OSGg z@vdHUbiH)8=4MtILtQJEtE_@dOy9KJOEYW*EMGnR|9xV@x-*~F>#t8Oc?3piHp)WeO`Os6|>a>M& zyAHgVaPQ-iqx&zt=s4`Gwsr33{V!4#ot=`W))=Y1Zp3r)TH-|s?^sQ2=IhU4pZ z-)GttYuI^vBR3aIZ(i-C3I7kW^Vv*f4C5(jeeLEOQ687maBY2vyVS4QhnXZ2<}Ul! z_GfA;$IGgat&z$exo2#?e zU$vPMP+ib?@VAU>#H4xNQR`i&mahKux4P@4miP4tRke5TuZvH9h%Dabm%8{~aS`7yjc<6>I)j$NpA+ z=laY45ArOoSZDaX_T{95p@(y`nF6Qq^V-!Y7nyEcv*fVRCefzV9IvF>gx7Fx{k{EV zmEnvh2bN!|e*HMLU$e$p?uSliXItjKFo{|2!mO5pU*s~E3+0v_SbO?fZ}8pc67|z> z6g!?|>quYu;pVd+T-qfZw=VzP!=>=x+~ze3&kvXsecT#VF77a0LZq?6?#e&^?M7i5 zxwmW|>e>XghnKDUASCrW-R|Me>mD=L)%`#DPI%%ZmBZW*&39H62Rbr++j{%M;;Ffh zA2?k%4q0~X)brWr#VZ1a2dwXH8^r2e1IAN$@y%g9NIi^E=wJBWjG zqOALW(=AV0_O164I%xI7hkyAdwHjZ!=u(CCw(&n-Kh_Ty_}e7?_TbNC-`q{HbN2mB zG+O&7E4F_9zlk6I1oX0fY~B$n@$a!`?8(FWPuclx3e`VIDJSj8oqnFxr07zx?j)Ig zfv?j01fI@m>6!FIJzwX=`-4~6uB3*xUecSo^vkq`5|d7Uu)ROIfMfdpvUNZC_ptgF ziJe=_UF$CR^Re1Wi@na1e$S}t6R;MZ0Prk-^6{4{S$udr^|~Y{TkbwS4*wl&!TAcCGJPp*Y}$HRQJzUG<#(CaU1Y9!!opg0P%M#MuK(>kfqh~c!mrJP|5>eH?(uN;;TsBvRtQ#> zZS#N7Jk5H(e9R*0MVo&abX(2en)u;i;C!w36ZK5zA1V|wy=z@{?S$is{#~c~GZT+|ng7&#({*Se( zKR0jp|3e0;0&Q0Xf3&}S99+3_u zijU`H#WQt$ROOn%bwt7_W?9VaORF~@UpIxx_28Fj{};?JkJ}rsXm9`5Gj-Y7|2H(v zYV0C2{(bkJ{wsImPW|(+w%O@ntzG!rfZ`w3j;q9Z1dda(@Wt+SfzC3p%Z}#*5 zv&-yG-@U%bdbVZkl*FV``;5M7mjkP_--{Yuixa)r+pjM8SjCICCNCy_8n;r8bE40& z8-FJ<$9`An?&9<)F*9+TVHfVc@w^;{f01tz^qJA5|k{Q5W1tn*NE>rK}A zxjWTs{dzX0JTO1cJ&9+1mxz$ff~5!dn4B)1&&eYga>=9Y&5s`Aq_QNr3g)SeQ!-Kt z&UogPzh0%=yzcPge~$zDt=<%~jpbpx)(a^7__6l))eYVP4~uG!<~?7jIpg}3 z+ne8Bo4;#gSnj=(&;S1t)^g*h+1+%=kg19F;Xkv>M)j3y?SC#7rAE}gs!h4}e}6xx zl=$!IW&NmA;&EB%=@3Cfec8P2Hx{Z#PUPsL1bX%43s{Hob?^i_3ICh-%w)=Wg z{d-`I*Tzayom2nS_v=0Xc>7m`=r3kNy&6&y3( zKDODrOXbKSv8c>|ZjD)i+yNDZP7mjutp4rL9JE=ZrzJaM759Q$J8x*pDm-A+5$W&{ zQfg&67_cEgM}(V;>1aX66em}Knfol~R=xfBdEM8!)n{7P+e@CWyBIkqD6T!ZPX!IF*v`LW zko|X9mLcrqv{=Q^iCu}&_mnb{MM7j4bwVbnh;XtxIXP+sXo+#PGA(pi5#b@h&3VXC zU`B+Npk%9)TJguOXM3_%mq)FAbT#~I-OiWmd^rz%u-$8?cW>S~{S%cd|L(r1&GzQ_ z>)7Kz+Pgir#{HT*=U~t6qGd{_zQ+GpSkD=4)!uhP=5xK>CU35U<8{8S^DFn}|9+Kx zV!^G=LM{<68s_}__wT{#o9;=?3t|@4%f8`mjuN}5Kc&L)v%Vcy_{XlvExY*J4>DZ- zb~Q9bb5tOk&B&wH?iK@ z>9y3<{kL!bZ;Gv(@}){MZ@SMnGiG(YD^rTO*Ut=HUz9JD8R}B{_PmKsx2I{ z2O~mVpIeHFR3vry#w7+`@%sEpJ8;vCi3ZBgFS1!@KXJ4AYZV_}@3T+m^{T!buNNGb z^odJ7_K(j`R(AWzmp`tavfcGuw_VR;P1IfWO{azLeLS*Z%NZtdWuwK%EEp9)-tvn$ zn6$)u=h>^D=2bByT^EjuJM@<|KjEfP;k1P8j^5mt?;db zvHKN7oGof%)fR2|u>Nh8jp3ej^GbEQ<{!&dW=}h`&e-g{#(dpXlJA(yUFI@(vR~2n zxZf!oV=-IdTeN%R2}YOgmlsX_=^TH1>aK53OaJ9ZE;@PV5~pUKim#-f*UZPfCxT@S zEVfN|{H*Lb-lX=B9QO3A4XBmnoonqEb`ocF~YI7?C z!@{JKu`ztz7k;ZZ_7*cO+S#+noH;JJ0p!U^Vp)+Zh2BmNJ!|oc?I7>Ht#{mSFo^lb zuXuJ__0t4Kg`0oZYV1G!`#-~<|Hs;=_{?woaeIrkTZhS!k~7X;Hk~YboJZQkez9v@ zE$WWT2%S7vDP@w$j#m@E%#F{yf35su_wkc8ukzaFiuDQfkFmul~SJGJR<%%rP3Uwl&OzFxi5xM5kU;IasH zjs6RQ|L^SnewcszuSp%%xorILhu=2`-)g)ZVEyC3o5#$nJL=vUnI3cB+^<;pgz=1r zvt!{cc1DdW9j}Cp1P(1Wm~cczgqxM+P{WQ8JwfSXJMQLvd)|9ByXxHRMLRzFo;f}J z$MFrHzg-QDv)L^x^P@8P>BQ?h*XPNE_GfyVg>Br|thZ_M!2kcU1w0+tqVF zd1gh6m1I7PDp;ZY?#yg$uW9!t?-fW<+9B4}&ab`x?cp4@ZEgL#WcN03{Qe^=p~1ja zz?8aRhXn%zU!CJDEv_!sT8`5h@l##@ZT%B^Rs87Z|My?refjV2?~|W@e=so5Ix=r< zXsF<=gX*jAuCEYsJ}7v5aqW}Y_n9;j1d1NFPI~&i?qh_?f}rj#C;gZ6a*E%P+&k6U zG0y#tnYz^f1Ve@xZf@m#rlpeG5A;cYSa2ZlvQBYQVA}s9f7`P;=BBqZRJb{nHX1Bj z-c|mFHMg2=)0g`{*R5(^@|y2e)Yb407KfN+Di5eRCCHvxA$d%(=9bveLhh-3e@@0( z=GiT~9`sfG|L=EB|9^cw|7+ttYrWVtKOHR&Jv`jRK8;zxW`m4(Gn@I0YChA~y_?P1 zKdFhHyWM!%(PLJ|V>VTN@!PTw^sRZz5}G(xzdVt=wfgUpDbk+~v5Iku`F*LhR9}62 zg~$xn#J2Bx+UGU6DlV`p7%Bhzd&S;;MyKhn=jNtM^#!gU-IjSx#ejJS*W>D!hntVM z%AT8MHS^rcy4KCdg_yH$yn3q6_UXoEW+p;@aKxzwcG#Gc(^I#Kq{ke@f~0W$GK(tr0)W${M{s@SE((tJ!Hud14Qi zehdEnVbR(b4;dyLZee+}N!sF7L3Quq+l8oifZk2mXpXzAtNBtN-Vb(^AH}Z2{5qc_ZH2{|VCGv$p!( z&a7+tSNBK#t$taRAFi@B`D@nQT2F)i2E)L`{Zow#wh#_qS^LtLv{dZL7b&+Q0Rx@EI-E z#*{)H4FjfUW@}`hGfz0^Au`F6vo3AT28($D-#B=ZmNa_m$u0HH_xg9u^PvwzveZPg z=ifUIOH4WSOuk(AdiMMs{)XDCXFl0?I?U#b^eNb&|CGl`m1iO&hB+pPgXwJ zf8}e1iatZgv7*O!_olKkZ~5R}a!7qk^<3?F%>9)GlLPB76+Kpw*ME9aWnb&p;Lv}| zSF5dh8zc90m;M)4hai>%PC5!3zCWsX)AMdZz0UpzMYg;jEH+GEFSS5#@?qse83~hr zSZ_69GVeJ0ha+Xdk+#N*N<|;+y#h_T6I0VQw4`g2g5`}vnF`HxFZ*GF&P>vS(ZUM8D)X-3Vr4!CE?rg5#n|()MYPEaSAFZT|FQhXhZp@C!jpwzzzi<1I znA3$l&vU0=o5qzPKQl|I=H45QsAcbeE~iPO%5$JJN4=3QXn{`+YRh zRQ#9S?sxBEV$o39qfuFkRi`9Vp-_W1=pD}7TX^Y*8=Ef$#; z>htCZWCkx0+@W)#IB$uJ?7n5^o&6VIDd1E0ThUwICqAS6Nc4guhTe1a1=}}nV`4E> zJM(O=vjNA8smq_R-B4M({r!*2Vh`n{xqj5#tj*K>{`_oBVaMA3n|&82_nf@PzkXrr zS*K^(m9<|svd-U^FS(F4`a#33@H<+Y-dIfHsb|TJ3kdqPzqjG(-|8yfbc)NFpZ zA@QA{ZH}M0l(}PX#9Z4hiRD}3KYYvP-*Q6z@`jfl%8zVU`MmxjxV8U>vdTYskyF?5 zB&Lam-z_{W|HrB<|K|Q_U)`LOQ~STK|6#Dl_RL;B{nMNF%>T8uZ2hnOllGdw=jyZg zuUYgX^+!x)yWXe2r`Qr3K6q&Fo3QJxWzqczFP)6r_ct?TpGuf`V8v7U@D1HEZ%mKO z5&8S>gNe4FxPVVZK|Oh&B$@lzW1B|go=?4f84@b2dmZi#9uWzbY1ST ztvmCnZPiDO$0ry2I^9>F|E@@INC zbLwBGe=&Adznwczmd|?_=dG)n93r6cT=?HVU0K`vN7mMOy{i}cC+2Xfd;N_%V*849X6--!*_w?7A}j29c5)dpak2$$;oT~h#(3($ zyER8M601KnPUC zw*K*lwoRS9*FWB`b@}>-hu^*aW8JEMl6O|Cq|j_Xo1a&h4;=k)UP@=~l~p-wEJYrz z>B@c3-)}tc{l;&0J9cKz`@N{*gThaqrGGQ{etgPX_hG?`8mXJ_ihewB)R`vQ$Mn;u zC|iwz-$s5x`WLCc<&{@*Y*ZICW*mHc&WE#Rr^d2;HkWNLbh00st@*Ivm+05jNy3Nq zuSa_7zpuGoe)r*gJDn%z|N2EOFZ}=Mr&-kN*__|YwS_ju&s*5W=yd~80_Wy2K-ad`_nq}?%s4MY(rwn_;cwc8#e>Y$4`C{wj!yi)A z8`emMWymQn`u}C?@(tI741F)p_ij?Vrux`5P18niW`+dUof?6;TJE|}re3sh|10}W7g#f+B z94xbBRgP%xfjVPpiATKCdN=>y&~WbUv8WUN_S;?s{f#Gnt?QQ+!-*-0r z7kb(l`}n}dSB(4 zL^aDYGZjG|&qFiB_1$tJm7>mxB|0o+>oa&GFx$vl+Q2T7qx{pq z|35NLl9j&yA+-D|M|aGO%OdBG{t~<8_3>Hu9vAKP*S{<^i;ryj$zwSo(Ol|K^z^Ww zHSJ#|6tpe}w7)-@kZrRsNTf{TlWf!RBhsHfhTpIL^rP|Ap&T{AZ3|rtC-_cX|M_~u z#M!pmj;s=qq0Tl>=KkToYoFe8_2gHc%%m)fnL%gvyf^#0Y_V=bqKV1Lg=)^ve=xq) zKGN{QNJiquhrD9HjZfOAtNa#G5aIf8y*&7J{bfh1rJ+AxZ>jwwrCDKr_T~BYM!TYS z?Yr>p{q@*1QI2V{lCMAgxWDi>=M3rJw4!X@5%#hr-*YRTDp6dg5#B{qP_wi`uMr zucBW)RXC@(_}$HQbAD*`?|Alh$HI@w^V$Dh@n06N{?d7lkuy{Ahe`H~RjW>S#{^~v zm~ZG?dVbZ`csZSf;bXT8K*CPXlMwZd(cL0#s5IYU$1$E6n?R; zVqiXCb4YyZ42F71uKWxCX1%*^YcgOMHd{*7PU(l}7ST86||In6*ZLfc+ zh+LIb6l|}Gn<&NeJ#(qg?JcoSKF4^6cxPbje zUA?|IEuJOy~y!ZV-tDf(#fBOHSXpw)oow~^0Z+q+Z-nv?~YjV?sAQ_X?QuF@$ zAJT^oI@^So>+A0CDq3^s_)eC`aeS@44^tP$Z-0Jdo{C9F!aN1R1qZs>8qUQYdj8hJ zzvw(a`Wy2%`m4`$ zujqrXSsuy!+569C$8O_5QHQH>izgIrj6IrvtD@pSgxBQNU(a8w%IIqOFaLb^ZvGF+ zt|nDWqdimn9&bMGwLdH_1GNqL{oKHL9)_OURu zIGAv?ami0tw@7w-pEXUN|4HGsrrMPiALZUAr_JiKmQ83_aFuBv&$gyJoJ+pleLU~R z?Gw!*jo-xYUa-o0G(ooGSmAGsP3)z@~6uWALLN9^N*hRK6w9& z>Ui;w_Ui+du8!Lu_ctnW_Oj{M_ga}auXnk_Atex}`17pLf$8j%rGF-U_gOgIhNF$e zz|Onqn%8=k9~LYudP&qsXU=S2^latIK%K-h?|<*UBx>|Gp?1|&+xJuE2{fg1 z@kWWB<~bH|pY6_uPsXR$FiFTaW=nLhQCMmC^6#hT8tPUJ%Q|eYh}RsxbFXz{Kypab z_u7-$_t(eXUthLYwXMhg;q^JJ4L)3_4!NX)r#?aR6si&t+41E2|kZXR6huKMQm!~t7s$y5{e#rAGlV?@bwhg{A z(^#ZszIM8~y|ugYY^w6!FK0j0^^4bv$DiA4`)#ed(B6>Pt5LI;L@3p+e=z5|jovoD z=Um4>eXLV4TweR0dFAroM+#QT6jl7b5`0nr$1DEzPUq_s&dPaj2yst}zt+=yJ$t`& zrAM>K>52OcK7D)oZ@%OM6Q1o0te40y^HqI$&hgK-Ztr-G8^%J0i#(2>?mn#g$J$Wy zw#w!Uf1bqm9=qt3R{MKP-Hy`goqPS(Uti7a5gj3OhU1~^`|7=48z%Fl2)w^HgX==u zXHI61gVT4v`{OVB_rV+E>rF*B=NQPWaZ+9{$gj{-mZU!)sGz2^*9Q969XTpH?-&k?@ShW>fL*@ zhINA#bHc--gCR4N=jp0E+P!fNZ!;guhJ`FU>#it=)ME2OoqNDtwH7 zS8wq%=34*%Cd<7a>rQl*>^l}Mv3RcM$&dEavV78NWse^fx?}C*x#raW^{WqRs(pSE zp853~S82_`q9>xv(`D|j2s=1&zC+3FPJ?Uvpi^mb`zJ7Xe z`tqOW<@Xl3U%$V)Zdb>|>Dyi}5N^2G%ElpbCEoVvm0-bNTh)6sg!!e|8)lreu2Xzo zy-j23w~evSip15ckH7YCZel+Yv+e2aeam{!e-PnN5WF`l^Y)Lr3|*a!nG=l-Q%`4h zEpFg)Fnbol;l3hKll8>WGs}-1xAlvDE&lM)h2*Dk(U)?(d6SE7Eq{HmVSDZOsNMR% z1XBJ~>KC{36;%98xD)=0wrc2qlJ|^K8?|*VuKHN0L_T;QbuT3-jL)*^CxNPR$n0Vm$jXm1ZTm1L`nRoyHrs)47 z(YMac@a;T)Fur_RM4D(^tyY~*=mRwo7kQa;Z|W_!`gmox^hi{%o<1+y^=d@V&gwh! zI8&XtFC-=;pEvy>P_A;!yZ-b?R!z3!d<#8PGheS$QmQ=_|J94&RX8))wUvR zs_tJUVh_2ytJ=M_v%VS?!2a5eL)`HH<$WACQvvG7$m|q`}q=9qX(iI$HZ!@1>DTE8LfPUBv>|t%J62L z-@jdRjttkoXkCM8YeoBiB#KWLT2py4X8R<=womu=PY&LD>+=1H3vI*JURv$*Vg34) zbB?ub%RObyvL%GX`Tm_Ys8`%F-!jZBQ*+)yKIP?Kzxn&te~NpzS75DwjEMK`a=oaD zcWRo_yLI#I-p%H^sImCcs_&CO7n>YXTIW^ZxWp>LhMy&7ErAzUIfzOzk*TtK`*; zw*}|qud|u3T=$30l7mm1e03z^AKf;ox9QM4Uw1{S)oZK3cY}>}51HL$w7wtw#{4Ja zfPdO@eKzI|k$O5jFa8N$WEBlt-FrdHAkL~|$=*+F%?fjFM3g5z5i^#2XI`$l@NeAS zx&X}V;@3pl+JF6Dd*=S>@9Q{PV(0On&u`?`+CEQn#9S?otJgxy`$}!uK(-h%Jogx=`Ph&{31Rdzp9iO5t@+zD|EJ_x_8$ z-}atXpR)AtojEJ+Dt}juk2-l!U|IkCJZVNDaaI>*>oC=)?{@Frz4NmA&cqGVA|B-& zT=gw#{q6${47?82>byz%1p z3TegP4=?@T_~7lls3Ly#|MIwX>%$Ml2LJO9k6k0bwo3TE{i|;iYz`W5ZaCO#=o9OC z>3ChBe(|L_Q&uz_;W(gtm_?QGwq#chBiE}$_etimI%^**2rcF|dYzhV_U_@0XPjE= z4k+5p)1Cd{1)GCwsH^jxyZQVd!Zs}|IbeB)hs(g|k>E?#dGB=YNUdn;RoVT{M%jp~{I zELg~>pk|)IxBTt5GSx!`x-3&WD%GF%zB4qc-uoc)K~0WkjY)w^z2urTAGX{tFiy5y zE^s0IKm-T>r50{>3DvUWHC^{67Hkvq;EwRIuRkvE_a_&}!;SJnk^KhhtgTnWCTFc$ zdrN%wu3byQ|HiG2$c_Ix{dLUV>#HA$?alsLnLjuFz!atvdWNU-vP(LjI+%F3pO+zUqIBc4>a;tN6=j1&+5gWG(L$UYj2{{cP}+vK5Q>%{as8 z9@YDqoo#7uC;y(^zY>xhk1lP!KUI=}^+AdLJAWBQCH?<1 z-9FA=_)x4A<8M~o!oyosMCPtOw=;xgqPy>#UB8qrYTi(}Q+@e=*Uw$`TkMzY+5i7z z_kE6lb-v*nWADr?ef=uRi<6b}LCiKTN!1A)&vqS5RC}{|$7c1(kN#+wEK#&8+B5NW zA{%pgZHLF4?zg@9tqNAt|FSYR6jUj%bNZjpsK4*{+$Hrz+Y~weS$_I&xzB&`$xtcp zLT;T&y~mR8TZyuBN<1v$OLYzZxW?q{k&H#DOQHk}XYB~OZSD}Y+x^}3)$<-oy{ z_qTe<&myLi-+Jfs*cZy0Mcd%8lr(at4_tgzWrHt^Tm&%U-2thYL5oQvb*kwfqay$8`IT<*`qH-czU~1_N3#vWkET~DD4x*? zdn6MR#h|vJL%fE?!$C;6!F~0mor%Y8SkOmKkH8~yd|^$`HFmwPM4d_KOgzNl6d##@xALGulV11w8Q$| z=7atIO%wzrcNPmgSf{zuxN(KB*lMx(3=UVeh9lBU0*+mG zIR3UM@Ub&Hs-A0e$}Rq8v&&hnYSLDQE_sbA4F(R@1Nw?9SZ8#}i&jbOW)3V&?>s8w z@nLeoD#zXJ=gxm~lC4a=W=TBLFsdVa!@R`jhJror)-#$-Ndquy&5w{;) zJdX|B&wW4tpT%bTef{ara~k&ll(S}ve|=Zs>+uO|&!n%6=#{=Ba3j_Kgnv+y#i!cv z6(^%v1lFIfE@X{<^eB3(GH;do?qkOztIZZ%?~$K4r|IA1&<>yfAH0vvWWQY^v4!hI zRN=d>sg*YB>sRlOn%@1v_g)#3ftlk(nb*t{${NGF?t1^eRr|nELY$51WcY`@sb7A| z*cFt1oTwyQdQViqxx&<9#+(BTu@_mF$Q=|v9&X>DIZMMY_rK$(M!StC|7hgenxFdk zykN?Utc8kgFO%LGs4VO*JS}*ws(%}IQRa#DdJ>QI#n!VFKlwW?`Hgt*2JZR0_l7)d z^#Ao%!9$^R)ykkd-ClX(iU#jED$jA$PM>!3yXxr7sw+TOm=LIHUZc#ovyiKT3aJ9!V%=v11qTpMGGGAlqXP zI~zCA^_!BvHkQ1zIl8@kv(!q@`Ki(s3|vVEW-A_$v0$FL#!yYeV$lupqVSBEmsWjJ z)0027`yK3Cd~8OA*Uvvqz5g5T+x_1lzcu0Hd-wk=*C(hdF!n1{B)raj7|*q;q9*X8 zBBP^#gU159Io^lkny0R<%Sej+CwsZLN!(U>MM|FC;-}?&d+N0mmu2kV*{go}k6>j) zt#-on-3H0k8gJ&$y7S7WmuJuX_8oI)CA~FSAKI5@7n9|GUw(o2xy%I-%l28Y&-r6t zdn&7CcAK_%#P@9z9%!zZa`S)n!<+w#l^3g+{%HIBYWl$+&$_+dI)CT?p0?TVjX>ap z#=lqAK0W{Ex?`Yz-1^mVA1#{xzt!HmDsGwH`?x16)$2}AI#{{w^5X|9*6Ge51i<%9;zp^&XJtP;){b#d+_nn8|?RCt3c6i=&o>=hT!|S4Y=lzR+?c~Jv;Qt}iX-~uFQ#MNLAm7J%4S~Us)+W`P1F+RaR_vE;Y6C``!FM{V#Pf2#Ixiv;BCY&E$PC zP6yt~Km8P2Fkdw%ufN_=k|#+nogrz8lYxN&@7GsNQOj$6DmQ&-nE3kb(WtOsyhF5 z&Vi|4o^Lm?YpVObb^Te^(ktcLLmY>t!@ZpzoOp~^Lec&wKsTJ4=XlTP1%C~SDr z!M-r4T=MJkupL3Q*H>uYE-=m3ZnO3}JEuU7o4@H~f$^-DFJ@X?3Dh{~HnV4bQ+~PD z{eQpimi}0id0vR)T9?@2G7;&SM&9qGcb4n3dD`pa6zarf>uPD-by zU!D8p-^-72`#%5MRTSSIk|-*vKmCu_#zO*S={j6H-KQQc-LTT({CkJFO>d0t*A~u< zOA*tbx^it!^YpaPPi@WZhs>()H}}m2*V-+`+~B zYW53=S!;MCyqmjsZ_@FOx^vv7HOID}Ewo&`rm0=>z++Xm{I*oOWtMX(r%?zh0>%4KE}T#@AbX^XZ_$1gTc4r`MDY| z#U{^-KmDSz{$W#F)zP;}CHQ+ke67AEn z{i=nFZ@+ea@;kir`o4W9f(?BB2`dQI8y|c0e_sF6EenN9d7is=oQb=$@qhdwWA&YF zVV|ta)pV1-%VeKYIym8|XaB3%_lJ~^1Xl>|x~Vi*`#sN_MF;D5wZ!pepv6G^!xq2=~)~thiY$c{Gs-KPHIija@!3(clK`kemAdq z<@MyM<##5{S?`c@+TBTV(xmQf;_oDGt>Tfo(->XmeC%q?t3=MgXBTHqKK#xhBEtUT zt%Pen$z-Nzu$huq0OIzGkz^q+IX{O9}0XO_mi39C`now@RP z?Y;YJvTf^qZeP+YY4?+z;;MS4WyR0qYi21M7JN0!nf7$^g){qY>Tj3wb9MdEuivAU zlepmR>($3<-Unw|Y`wD3Y3;^YJG0r$HZIY0bY~6z!ec$FuCDq-^oxK^DZlq06jxI@ zzR!RE*Y4^Y27C8KuYap=`fn!p>OVqrX8WFbSC#o@Ui{xhNAGGRY6#6OmHEEX(Z5ZQ}X&esI#UHhpl>(lcI3H{@Vf@`IK3o92jD#p|~oY1+)< zCoHAnAlvJ_b7kdo#bCkTf0K3}d-p_r_5Zv6r^D43tgY+3RQ~q!pDRDRKMFD#yx&** zBjZflv*i&2ZpW1j8yFaP6&z=2cfDdQeGyf%?r*{St*dt0#?)q?{=c<*>-zsO|NsBb zjuTxyC;tDpt>>2H{|tgPrv$K_J zKbzEtNUfewFPnWJS*P)O#;1#7jkmuk1o3ceRCw{-t-LGZykBO6wt}zak(IA^i!S_Q z)j#?0|5ZMg|BnT}3~Dr65c)U&{%eu#`)6kF+InS*dt-u-#I25crU^|2h7ZV0Jcev~9b^zUgO?>}9+cH{i4(Eqt5`;8P`XDV%ye)2LZ@uQd7uOHRA*K1`Q{u(d*HGwZg|Ng8`wI?4mR3t}v`CZ9g zx&QJOF0RG%GJo18r8a~&)nB;rb#K)*jd?6zlXq=h_vCBd?^V0>{wD2N(@641I(PklxqR6?orLu6wKoJ72i>(75fwZeE_@(q58Dsk zPuGv>Eq}gYHEOS^O~GaAqHO;UDkK8n3K=tXQ~o-}+Y< zB%}@$9C}*So?95kzB8?$dFvj-y{1Z(CKrm&a{AeRuk0Nxfq%jlIsvo^5s!McIZKuO@7r^!+iH z0h{vaXWEIrd2@t zUU+-j+v0yaUte2#H*0TnjQjtERa?chTiT_6z3fu25I*+x-iO5=Itp^UjR#nF@PAy* z$FQCKgVaSnv$KpZSxvP6d(_5WJ*jSc{&jfiGEP>0op1NUxIHIZ%!`Q+5s^D&w&K-l z?}hWHsIIS{vByzgz}x+yr$ea078CBf`&VdP`fB{MqD%VOw6_uoE_RQi7?s8#X()?`u5 z2~T5;UTjsl{^hZ=cOBiD4#n!RO5e!5;Bf1zK%#zW@$dQC?hGCw+5PZQN4Z@yPjNLV9D09#sghm#zda|k6q(=q z98ED;lW}ZO+QEZnpI>G!{{y$?vBf}T^FOgQ&t5XGUMNPiX%3=PjZEgnpsTg$!9Lg7j~4-5_c(3>G7T) z`@cS}@5j5D+OH!P6$m`L#`MN5Yt7CIBUT0FUb9aU-0wD2GQwv9~{bU-rx8n zT<_)~aURoe&D-znye+(Y*Pji0_T8^gINTZXM{Cn^Jw8*#8CPpIe!19S6Y}oJ(%bdN zCMFjjJEEKZJ)N;_W?r-XqKB{TAFEIISD&%aWy+TgeB!_N8Cv zEV~V#qONbfCjH=a&;ot;8ndN|57t@QZ9Lj*{Qf}b&0W#VI{&zJ?j79y?r-(Kb?d{| zM}IH=y1s5ltkBkVCY%Res6LVr?K+yY>%p?XCZmVN0uS2d{$|IBu(`Bw{N8)l^^%%z z|NjdiQ?~ERSNiu$@Q(AlcYoh6$q+Me2#sJp$s3el6TyBm`%3zkr)->28~-;wddBx z3e9<~^?#d$z6(6fjIr_6et6rr&*-JrqJ4sa3(s!(=}|4ZRfOa9%ezyiA93QhusXWD z{Hv{8!_34Mbs)j+ z-^P1+Ozs{TwJ8V1nk=ik)vw=3Fp!=n!uIZ;mG}B5(k`DgO#GjImC`EsvpJyb>5R{g z_V=#wJy;)>!RxpEk0esye^KZXedok|eenz!FVy&eOtsHGlRtF|=*l+mI__XDH!6Q@uSr=7~ z$M;IcNqylyHJLA2_NLYEDR1UK%I_;UaO(Vv{{5$)p6__p8Psz-e*cN^-EYc6%dgD* z{>OE8*sSdJQ}XO5u{}2Qi?~!B(aLIgE!3cNFOyW$bS)OKQ)LUoCv3Vek$$n|?c5*s zN7)`<-ma_nU-eSbAHgTQ*4g6EwsP)hvOWHHef(O_T{>ab-y0(Ye`F}v?ftgy{)GhR zJ9eGCD+JdsFyYAg82e9W?U{_!uj{%WG-T*oOYxZ~*d|Q;`fOV8KaMB*-_Fe0b#Ch` zz0XfyxCt0leLuOiz`4A-zv~QsZ~9F=Fg#ow=ar*y?U^oZ~5t-zIyZ76U!}B6gF{vdj0j?qtd_P`e(!H z&-jL4+W#=dd58L+ZicfnG$-&|W|V~F_X#G=`+q!tkBfk`#>s;gtm;o+Ep4Bup~>pD zk!jZzfysiW_(OLnoqc>I`GZg48j`gipN6+U#lwCs~zR(i6(>S*gk_FJ}&1y>@c=}S+||6BBpWu>C*H@08=6oxf75<^&`DpbPuI`(~ zw|0KqqHh-DRQWEuXojt1YU8dO&(F^3%-0mq`n5ctKO!#U|Hsl@;z2S|7Y;crhZ#mu&i> zsSpuSE3?tDE<5{d&8@tE&=1^3r)}mOh;VQH`0rQh!s)M=H`e^qV*K;}e5%Mshs>}Y z=W^JCRi>2wHdtp?y~)roe$&C}62*odRc#!XJ}uu-?aVeQ+{<_RVh@q&6K-0c>u2Ws zZZqe^_m6JTfpK*&!#NM7q??(n{xBypw5-Qb&Dbs|YsXT}Lf)UL59)uHhqZ1}ztOy* zzJ9if{+A$TugcfoBo0YWcKI21|DHdKYx_pMd&`wt-WuEg+i7QdODyTZnbjx7)-Shl zyVzS{ExBLEP0f4NhG@f`b%*?qIMlsrGC3h(V>7RF!xtONd8S9?0+l!v4`$Z4*QjY~ z{CId?$SB85=3(j4rSm>rF@LSMp^=G&<7LdYn48_s^K93jx^n7lE8AiN>*rST0s9L> zW8{8+)JXYxRdZL?-lBIuk4^~Jl~!LZ@UtmNTlg*a;i;1*8U{bwV-_<#-rV)tj<~pE zZ*r%0uiv-6b=sQw@hkQ1C+z>rw&`zOhuyaMM<-ir^A#l4|8W!D_~^{qo1b@tZ<6Y` z_K@qu;!C%(|ID8$xMPpq>7sJkSAoSL*Z!t-H+63IV%oY&@PGDEM>)xk7N?E(#dg^k zN_{)?{Lsas*Y)b(=SIwEXNmwh1TRxExrutypWSc5!;uhZ-RXrFh}@KRc_J z)UKRb7G!;Uf{5>r_=Ap*j%j=C@U&&As*em^U$-|rXwKYk{r~6RNbdjtX6pKbnm1mz zi*0^AZMW<6tdi9c^1bd_g_X9PF)0EqyfX@ylw5yU@jHIj{FQ3A;v@EPaO(bn_heC}yW+vxyO1)$xLZx^$PCsKm++}9Jrqrj{XUh85&xGRHPL|Eg zb`!bETUDliJuYw6^I+Bc+g~b5<}auaNfF)e(xA#1#;76!+AzuMv)Xs?q=temDW;T{a)o4cI|8Wu<-r%$Q8AUdaq(G{uS}Q=&@%v>+PeL zFLS@&-}r^+{H>oiyk_10_OUi@%k$p<%=`J%6@Qd05W9W<&niS& zZC$_a@A)v-@4=L(R#H(rkF{TAW7{xekrvzqJv!6t=Es5>lO8nxy3+ z_9&1eoMX*j7&9~6_mE_v_0~9W#qG)oap%te<2Q@U!HIe<-e@nHy$4@g>ch{E(_5X7%`Jpn=K=d+K><2Hs&x*F2*6(=U^wVpf zYAx$G@we7omzO;XI~$R=;-Jj*=GK2pnO~SkCsm!kZ(Ukz8fKXu8j*it*4apvIX})M z%T!5py%F5ouzbx^uJvbk$Szu-p4gg@d*lw^YQ+mr8dW%+$OXy0W4zAzo++ce$y3!p z$(%pNUx4Gz14flA6I4VwS(z3&tq9N(;bvuKOxTg2BhJm@vP57-%e#vA;h(1(KfY}9 zWA5GOlD5ltTU@R9tTgZN&d+5(OKZ;k=dODf`b+Np!DaPjZ(W|h{K2*=ar(A*pO@y8 z1s`qjx%2&mfwdj8pB6j&aqIPK9k}#aO83`B<*~)RI#b4+`}Im}p7`tLV#)VsA{bu% zuKmv!x~{givaI{d1J;VW;tzdiKMU~E%Mm{R_~9-8HnVk=uirLpT63wYCVt6Pj^msu zr-UbXT3-udn%#JSQBP!wimDWAOJl-yaoEcj)(jYdqtgaX**;+vv)^e8sDXX~`D*JvJ+Bxxx{) z_zZIcqn=2ImWog-%R+|@6I8^wSX&+j8iZ&lakM%;3h0=kRGMviU*0C{@9yi({ZiMh z?*8_dj19TB$aLY27X@!E^VMzt+|O<+*jzTja+hOJrtvr5rHv=IR+RlHJ@}{o%Y!TV z79YMeua*pd%W(0wvQW*|S#sZ8w>Yjf-5mhULHJS}&ZxspkLv_2oKSv!~Zy{T{c|v-b11?#KHiqiwEMeev`xeSP=4 z$-?uj>lb|7_js=Pzq%FT(%hC2GKq{kK2CUX=<|kSOL@Zkp9_`!@c8rLx@d(XiDDqK;}0Ozi-;Q`hU}d5RuNhJ)z~bcTcyZ zhi_cap<{e-s>V*+=1&ics)Wwn{ivYQm~c1p?2|s>@Z;T1mv=c`6nHt=d8=jR#);Oy zxtk_t3cZtX%lI6k^W@BB&BaD1W_D&CPF(O)p?+~%w~*L^Z!h+$9C>$g&efoYck+*P zH?P`Q(kU-@zu~QXo2SCH3)eqvy7o%U@6B1w6sfE?&u_jIf5!Gz=Qh`?xeHmYF7R3> z!{oZs^#0=yor+eW26Y{$zD#;6U%{>5QmFE^^XJ3;*TSr?%!=0aLtwpDQL{5wTta-&r z+w_#<@ui*LgI9R*Ozkr~zkIt_*4jIZu4h!==}!B$WpnC=gR>Mu)vHS**Y9{|_~+sD z7x}l22AfZxZMR|0L3ZJ`=kvwc|5X2Ix0Bu);gq0oFhJs|P|Ri(zM5@67Vm!l==V|G zDElz2jp3_>W3GRcUzztiGM-iGSn%@dFFH%g%h&KbYMAhfhcc$S9}HchTYAv0@`C*0 z+$f)WLR0Q2ni;HeIPmVriaqgniz998pI?<yJjz9{2J z;$NeH*?D3Df_Vu$8o7&8B&NB0I%L?21|H^YRr$2u<88JHi@+TvC%x9kZoKOz)m#yC znRLAUYJk+7ovT(J)R>YOx3kVtYTBFCdehBsaa^DMYvae*__b4a?fLY*Zm;U+)#dtA z+(hL0KmW96Sjgnhw z`QJi8k=)4#qO3P&*7^zEVl*n@d^TzAL9+#aY8K5)oL&^@X7=Zeet%v6-Aj*tfmUto zFu1CuzP@Pn`Bz_Ghraqh<2Fx9;=aa!#(5U(k78IS{65(bTFW_gpV3S^S$melq0O_8 z-xizD$A3`M&%<`tU$#a2qOKm{NDIjNv`U4y=)d2Dp462_V%PoUGJZIHE)8`veVVw- zWzWvw6}zX+;@EjVQLaXyiRsOaxQiZd6psd2KfCNa`+ohxig45Tvvr-*OXB}W|FW&@ z-_dv~DZxWrwCnyfZP6_`lP~{?XP?JWEbzeg`6-_rF%N>x8#=k9!}AtM8we}-@<@j4 zxbr~$Zck!`d+&}yubR|^7ZJ*znPb>2nv8QKYNPVcaj-vVb}Kk=lyA{H<&2gZ$!pCG z7uTPOlZj7$xbh%tt40pT_t*2XpUS+NG5F^+$UDgtgH!vn2Na+5aUXZ$aY2 z>kFcmCOq4p`Yb`VuJ@p3emQ4^&Yms%&lPELi~gDK{kLSRT5W8UPMG$6F*!M5)%UwU zToIahf7j~Q%nP(QHbh>Dy)JT;H|S)@ewGVYCUu*J7M4zb=rvn>)!NW(P6bW2gOza) z#E)G$^w6-Pu`*Ha5$k{EEkZ{a7|#f;VqhxJXO*lF_`=Py;8|$zRmLMW`<>3jw`PVU z>X|xt?T^vFv|xJY4dYUmxYb`gRv(Jo(_iE1g{5YHwnzN;bfHlI`xO{8Cx~pnf3bxuX@hc zi{!g19k1AE{X72m-A}*vpN!J2EHAHLIq98e_CdR^OUvTt-2G7%Kd)Y)XWrg!yLWR} z?3=)RMRixBoMg!k5&b4+r84F>9IAQzosBtB#L~t1n&-Q`$|Fo`a=UttyB(sn782e4@}!)U<_nQY8XA{;>t?8KQ_l14KR>TW zY*yxy6L}^Jm46<$TJrvNofG>e{jXK0?k;7E{kVkZn|RLd{}Pom0#h~#B};tUZx$>w zTlYv;&ScMvIzbi94<;8q%41$}Jv?pU|B#TDRo^ZU-ZuNZq-rqHV=dPup|3Y zCN>^+Nck49>h9MU=k`0re-_F#zCPpl#FKv&1KhrxS+*?y9=PD}tcv-b?nmnfZ?0J`;|i(0aVh@ZM1wE3=cZ~)AKZUm`b^1( zfQrR(70-9NR8_IM~ra?AVAR{AlI^MTve)s?k#j>#_6_&zzre#zIaciq4K{yecSB1OX9wx%ZU z)*-Q{pQLI|Y&9{i*%DopfAa4my{Z|P3jgn)+hnh1Td6l!d_#}^^4tJ0Y@a?%TV zqK{|R`ETz!+_Yc|`}c#f!QuiZ{Fi(%@u*y(V7%5ThO4$d{Y6Z;&iA_QtKWZdQh(6B zaj$pA()A(5r`PWn{H$i}G4o8Ls&c&eBaf9e#rGQd)vTot>=S2tQIcN1sdw_K24^3@@Rn@M29 z+HaR1bWEE1;?=nW3=D$H9B1itYq8dw_ z+)?4VUbx=&u2l^gO`GRxJ&1mG$J{GWoWG4FA@{HsV^?O$(|m_%6GHB3F>9*!>*$ET z@O*!Qc};>Svtsva!56cRo>G%7Qhjwn{OteGKVqkS+%{N96c{jjHa%F%aehO9w$nWI z(BlCwHs!xsxAc3xVEM1FeCpS;^{;jsE@X6`lV)GFZL`CisE0Xre=kk@vi^RfLBnSr zy)WflRXa~VW{P2d!!8l#mvM)G{@R+=%nXb_7#SWg6g50AU;q32nLZ|E#@I_IN*A!C z_uc(j_ULc9em3ix($^{>42{&gpxBgTD-|O68Z+6G-nKQ|9iM1e03un+zW0$J| zlQ&#tZ7^%lVwdShuW#2WdHa3u{H}NZKZbvHdC^z@!nEw|#`i{X(rQj9c9kc%G zqZ0KB(`JJXC6)Sndze`G8a3y)pYd*ZedbAA>!e~jfebhK0h*l-Rc0NL(G%%%c6d8Hyp_5(r<1kVq*|>@^!3W?YC&i;&DjJ z|KVU9da$X3k2lbX)h{UMPmJT`sWJN>M2Hk0w^04~|5MGC{9AhAXFr<0F5YQd8n^Xs zaahmRTbtEibTLWJb7+a*;vkyJVsypxRs9?7y`7;~&Z?Rg*IY{a{^GCd_4VGhCp(}1 z2{=4KKtRJncv7{4<)++y98SXD!X)&dD`q$2? z|I(f5ceYy}>tLRs&)TdVr`X`|Wy)5u&smWbDWPV1e&4q|;yz$KNvi(?_p>FgwU7VB zG5%A3pki%w;buUXg_#4>wmb8ttDg)llM}!7({-(hpF`Q4mmZ(4Og7lnIXix;Yr1*J zJ{P}-e1_UFzE6!iXaU)b{W-$04W z4*5P;lhT7!x?PTW@hmGT$a)lg`D}7q)-=iOpDM3aCH}m_zUD^L$wZfb4z{+jXFopt zeCkljZu?KVQ9_?$V+%Rvey&k`SpVv3=atp>jgCf^mag2ByXegd0pa$dD_jD6D?eEu zZ%obIAi*hj_xJQS)^8t4#k3hNiWe?f?;oD((CoCSt?rV>$?Suk-<}GViFO$uJntVA zB>(w9r#wgLf=7;lj(R>3`m@^Ew3e4moLZG|_VGUJ{qGYm9bOz~^G9djv8}p)q>lKk zc)b5#eEbm+>+>sWnf}VeuShxQ7ZIbJsK0#E;r}bDc+X#-mAdJ6>UmxE6O9Wa)AQzq zt?IGq%>0(xo_cp>mQQ@%tN6%u<-g@BPh0)^ug!gZZu`$oimac7Z+R75n|u6=fOEjS zNqYp+RO~oHBzSLxrzE4^A<7 zY1NN8F`gWFW7?bUf}x4Oq}A`t4}TbAI5a{wdqgW{|mlO zdz84LXS=B6Ub%qV4Yyt#vU_JPDEqzqfayW$WA*zUJ2Q6N_xY+}-}}Xa*=%TXu8Yuc#5-ekkt%(eJfn<=|#K#AlBUFWsM?cdkQN>kF6o&v&g<0Jv8YExtQJtb0r^s3uAOC!HzJ=e0 z-exwRIkt~NmKn$TYIJO1ytJ*M2(7BDg+dwRa(`~wc`KaMIa z7vl4u&F}8;X@ zPC@nf#!0;%dcS}D5vi2P4Obk;%nU#ESic;p%SMu=qUH;GM)mjAig*|eMS?fz3O z`?qnh6s(OiQ+*ydbBff>Y1d_#{Q0^qR^JJk@kM&}Hd*H~rIi=Fm+U)YqGv#~xtbbj`9Cyj`nVp+k8qE~kQ`)hLc-+gFoqdCLt zuxXWwp~j=YV>w@4Os}3+pI3k5qWDc21Gyi}xwbPO8=9@(_IKaZb;+0hZHwZS;h*oY z>lX`)Y;pC^DZ2B*nZhRg(fnuiw>j(MZqdoV7TN~%e2+|+)3kl7`0KdnJ-63iOnPn- zrnp@H>GTPww!fc#D*n*N389DdHZHjCuxI8PHnZLHTNNI?wvzE2qw^sQdrxVC#_49? zH{8FQten#1Rwo_0?SlM;{C%MtH`%Ikal8&a7r1m03-@-eB`f7tsf4#$uCiOT?#_yz z+gQIS-#YK;u|;mxmviCMzgtY6WB-yjBW?R9`(?+%w4Vgu&^XTXaW7NHTfe}EjcZ+J zd}P%NyBoaxgXGHnf9LqcY!$6P5~>lO|E{g{*Pf;Se_s`xx-NcQH23$kM)9DM8`6)C z3im3Q^@t>I`Xc&ri8<@ugKj(*G*?b5eV@*L^=QTU>1Wrv7d>YZzql^n_`<=)hV>t| z=Tz`HcKVe4)}H+J&)1)dQa&}>I_CF^V>ipV{#U={w?jprk>lKAr#cDch2P$up88LE z{_-Cx3-5m29pI(Xvp=)<_nS)Vf=e|oK1eNj_=NBG8o{6T`k9UzD;*B+5a&K>v8+LA zy9QI^qII6Pw?CQd`M~qYE3ULh8=}Kc6$XoIu8-QEFLmXR?8+CWzfxXEY z&pKov`BPuPdDcw96NN2N0Zzwtn7aZF% ze?5GpZ?<6pPc`TDX<2_hRWA=_?kHC>Hr;#J&FRZO$-Q^XRNK#NkWHHBthHzMRE0FP zhecvbu3kyM{=cg2&|_=E_XStqPx@(l!%y|fv5d<s!P7->UJB@4jcB zI(vH7d&93W%xie-XV^L0h7Jp`6e6b=hG*oyzyly>I!hF3?rUf2HDDbD3`zlb;tJ66UZ>T_3RB z?NhX#7RRpppQ=CU>AU=%_v?>X=d7fE+c$r&Q&;2gc@TH}>enR~64uu+>h+)gQ+NOE zGUs>R`b$GRbOdMpTxG_#wbg!QL*#_0XY)UweBW`*=j4wG&!U>*8XA*@K28!ab+-Sx zI`LKI;vMU^T|AN_DD%yx?ZtkN@0Yj^3T0gCd#l>D|5m_8F=oblT(z%tO``sUer0@N zQPi~3ct>{ETz1c@$4ecrc1RYV6I);;;}RpS&v)hct(L{fUHs4Q@ZaAV5M2FTdYg6P zhquj(Cgf?$hNVr?|8XiH&iO&vwSf74wg#-hU%l8pv~+fC%GzjVA^(c^hsHjg(xVSO zg7w~BnU|+0`h;uZ>P&}pult_OK8sFPJfGw(7kho7{+&tpCk17*=}*7ub@GVhj?h1E zmoAQeJ^k8tHi=jK=es}lpZfZW%j|a$`w6AdTH?HQ%-{b%I-rk=3 z`k>qAyJ62C9?ErF>^RNmxc!P0DM@ku>1Q%Kj~!TUqxe#_s^b^`=Edc|Bfo9v;+SGzvG&<&oBC^ytp7e; zb3J3SvE0rt%j*6Lhy=Aw4w&WiVS;+G>zZDL=nZ!(cy5aCSez#s=CgkK>nZIfT6g{* z6Ptd1W%u$+9d&g%)2}~bjE%U;)nBskhH|M&(#juES_l7J`quJqZ`}ST>s|f!YVVGX z`kUqNr0{2YdDS1wj>6I}5-*>}A1;aB`E`lzBlg&f+@XT)o)hFMN~HJctDW~-7=Kc2 z=~tE=8~pzAoxAMDw%198r}~XV{@shVg8E`()8yFab z5*=sha(l7XFSLwb>%Z#z_5c5N)~+p3TU~zsBiH-?@Bd$m|Ni~{-_>(CYrn0$e|5=@ zJ~IyvTMJ`eZqdq=CjRs4`Fr>to)L@t_TZ)Z#ize^o&E3rPQ!ZEZRUMZn@(BQsZ4n9 z$85=XSe#Y7Q-b$A+tfpiJ8H~ou2nwf%xF6Mu;I{l8+-_gD*Vh#ay+1Kqowod;3Z*9WQYgZ@#nppZ; zwCbVyx}d*NPan9mS2k3Hb*Tp`JUlP&{fgb-;G|HQM+>`kxPuz5&N!0dx9iY)sgSLf z9oiEAbB}w>TxNQxGx*~+>%-~%u`>^(yiiyvo!oKj>l(2V&0jZC7wrl<5OMf-*>|~_ z>f3&oHrhPM5)%2{yK2$Vr`3Vmz4`RgVwr|&8Cjk{MLFUKYOu0QD3|zE?-lQ`eJRSccdezn6;cu({ChguI zH~s3lt?zpdaoL@$PyNil{_%y3%g?GfzaKdv@|dOc&5x4IKZiD^2DZgnI4}2cxZDxG zhKD&Lah8#`6H8gdvE>1~mux;DF~PKH(eovnFFF3%spj=ra=utr4$JS`x6ag*UNw2> zTJkDQfMJG_6N5TehmdaYSq`Bbj+dX0?_;oQpD0;t^5ydXbMh}H@2|YvUZb%5!0pCg zGSfE2_ny|+SS`e!uWyUi%zN;K zdpUoa#3Mf0V+U=07X=`iI`{a6WhsU@pm{JZt!i~ z$1dZ&aSiv9?{gc%D|7`jjMW-Lt{y+^a?nvyM7FEt^wGn6=YQws=h}DVYx&*ct4psc z<^L*W4L#6gF!{yQm2I~5r&0>5UwpZLXSaT5)s8!*hEX%xEKV8S;QenNklc3M!F0WV zb=0=+N_!l3`Nm9twn^!=vWmTG4e#G`7yrw)e{@`|=wIu-JVZ5X+QMVEFDw@NwsL3P z^{sys{|i^VUugR3$i1C@e-}9JWYdl^IMTz=^5xR?^J^`yT{tjf#o?asZnx{V$f-|j zkiC?prm(@fNknSG0jZlBGmWdAcL}Ccv@bs!XO`HZp*5M;dsRHsj0pMGb9r$5? zYu*95hu_W5J~gR+$Kd+Wdy!w~iQ#n+@v~!17%_RQm)B0+$?|MV|5g9=+pIk~ zmNruplMlC?$?*T|5t~%L{ZJ&y1+liH4J0{2*9^x!4FZ%zDLqRuJe1>xWuh-R2 zGUJ!uyK}eP`i|Efx!9LgdHfBkvv%~D7|5{%>{Ncdh*|%CaxJs`!PUCC>QhBHd!7d- zFG)Q*<^CFnueaF~9alb|ucg>Mz2oJMncG!oslKRNIsK4(#0#(aAKZMNb9n3QU*E(O zc6scg=d5sIRlXGIE0Q#u=VHHH&M%H}whcd6V_t z?&GD`Jp1-7ygZ{$Z^7F4UynBBJ>U9R){T?L#9O1fY|ebGy{w)YsaZBW_4jyuesDY~ zXj?REWn;lV|J|t*6}CpLTr%k#r`xpFdHa8|DnI>nW9h6Xb^BCOZO$76ec!`>P(j#n zfnQ+szDFC~>zNNp3otxrkP%^!OyF^zbZ~z2^2Wro)7PuMTpq3xpJ#bC*scB-%TjZW zpj{JQ9DVUMV*Y-H=vua;51&NTeLh%!)H#RUsr~xDw3MemE%=M4yGZ#@?7aWR@_mhF z3(JAVBD3Pn{A*^MnJ#spsQTqwro%BU<~LIoR95V<+i~{Ouhr8;b*d9h&-x^?Z*lOds&k>gul?s=Ytvihy+vj||mfHF+evK%j(cvdL zk<3|a|Mpy;!T5*av`pD5#Vi{U^V6ex@&4Z@zmHwja(yow-+#F4eKIjKa>u+91;LMusm*7uMY!zGcxlKmVZE(w?;Fuv z6|RYFS|4V4GjcC*W%GY}=Lz3;(ffBZw4yH5@5y812sOR`NIdobn~K|)R6$NbzXB`NUdSeiHPG*VV_5lA^Pd}e? z5H<7bOS-$nHF27LXWY9Qg;z`o?pqIk67X(scpk;Pt7%qhs1qZL4FZylta!Pk>=zkOmpRIwIMsQ%VcaZW2c zZgx)|>$^LF;UPS+y1%;am5Qmmx-GnsG`lOpjHhtHlWu=&ab1T0FaB?QcICUj-0~RN zD=&^KG+x@$Uz`-WQ?Y8-hIemDjA9n+Z?R6v=D%}{?X#zZht9-`dhU-mBUjH^#dSEx zR#aB>-0WuuSwwt%fWK4 zYz=`%?UkPO({&@6Jr!aPV@Lw?=|8!_os%JDrLJZnSwXZGQf+dhgR| z8!r8_f4`mghVT@x>+dBz{$)-uFu%V;MSS<6)BhthMB4?+U!(+lJiS8fd3y-kZQieJ zt&&c!<#$&9JMGRd`)uv|x2KpTS=2vh8a~oZXfN?9Ez7Igod4g#!0xGGZc8S^+4bMQ zKZ~{CdUf&pl0`3h?QNIoB^Yg45dI+|^Tyo92_9zG56)P2VO8XZr!S0)`t75054$ED z|ML2cgqGy*4;>SZ@ZC_+Kie!n{rXuR@i|N_oi!7}?_90^7igLPF>1O+t9sp=OUXaB zzCW~OJHx*P|5G_VXXhra{&d)FV$Hnx?+fNnesL_~kGZTw^cAVB0}oG~e`#6pO_}d& zz{U^hiT9oKqN3KyZCrJ~`M>a4Z~N1~?3;W_*)C?ivh$n&T5}rPZ}Gd|BzrEc30t|E<&sF$F6zAffGvG(l8I|su6|BoeA(Mq|2XHUR~ery z-6oT=eX+0iY5Sv@GV_eLZ#WQqd*S+>Zt}PP*4~^ZT6~#HSSil}h-Ft+v0KHV4MrDYzW$nYxxM`cEFG*&erT=Sq2aC)=KTRlGD* zb$&}-qWv4Ohm4$0GR_uGou+v8(cApK4{}C-Lrd?ii&ek&O6INBe7DCseE&l?{7}(N zP|j<(ZIXQdaYjPx`$HQyv%J;)_o;TNs7%;%xm?)=LFfM0M)F*Ju)FFv%X7^I$#1s2 z{{8#M$92E&Y+WhNR7nf}eyKwZD!=f#d7=M>4M+F_IZ~gP{Z*FyKsmr67-2YpSEoiIV zcDd*H2?zH>p$^`^w(R!}R#J_h;NT{^YN!5fy#=bD`CjeXZpp%9)38$B?}FC1NrkUe zYwFd~r#+m!YW?K50$TDd8{YHJ$WdGMnZ5K`VxC6km4;W>6+W0Hx$|w{+H$aSL-K;p znf3=IQtBJoX4ZunImdnYG2ujvmT!1y%&+^`I3752v>jP0e=$&G<&n4c53CPd()r~O zb8JV*v$dxx-(Qs58yj?e(Q~aL4%f8b*Ta&WnN(~`r*m$Ydyw;wRl%>#>mAj8OZ#4p zcz)x@A$eK-^rxpr5xu+puJ+;wCx3i@IO8Dq>*q`LH#r(Fv)bbLwYBnd z+$ola&xAb8csDLS{*{g8bnCJ9zgvH~Jll8m@Eq%d>tiHmF7K58BiwmVJ1ytkAM-83 zW{O#Re}12D)pY1X?UZ#BrhiQP^T_{wkca@s=bXz1hQft?Sx(^|;=5|%-~3qjLHpmN zKYv@^zT)ySj(Cwh%egh|{IA>=PJ=IKC2rzdca1r5 z)6KfavsgnrB)3bK|2pzXHM71_(Xlah*`BMHIJPK$W4yFL_VlH9C(m>^MqmE7qsMV> zuM7WXna?Zr4@GDw=rnaM)AhHv7c)GH|d$B-mev$G4VwI2k$I9J6_Hvx65Vb zJr_Exik3{A_F;YYgTA%blU@6NN(WtAxXz`jIc(NW#+tq_Hw6@WwygL+d#~-|I}(us z_1-6fBNgs{pX6~$e3{U4LCJ(Wo`v;^I#W2d{Hm4QXXBR(xB&Ygn)uz|+Qx@{QPd}q&w=Mhs zj2-J#idUR^#n@N$#4#wz`?Hb0`ifis18t5i+!?+)Y176<>#wcvT>ZD|@;d*z>#Qu| z*_nZhi{qQ}mUjduiJH}QJo?Y#-rJk|ckS;jF;=^;>`+o<O@L+mCG1Vva_|Y<*E1K7G~d7yGaNySDCC)u+TI^_N$FfB$;_wH;q;BTC9M`YOWj zK7O%m>x6a7{g3c*r~F!9v7~-QvDuUS>A?>e7ZvcgnSMlwaqZrnRFId%&$IT#?t?oP z{MX}pB=O<9TxQ=X^@Arwcnu64bDRHl^aPy@44dM9a7TZL?WX_D6>`oNOFx*j`cH4{ z+qk3j=*qaYuGfQ}mT{D>PM&|Y#prZ=T!+i#sabntPQL#CVE!I!pCfml@vROG6Pt`@9Ci8tvmk~TX*dUe*SCzyknDe743KYZ#;NH!q#Z{4z0&Jk*~_RQgwFP z9Q1fGL8!TK_hNT5sSd~9BK2uATF+0@{Zw6Fq16}=akI$kg|*&}7#p_j%Qv>v-CuQn zdoV*_F@Hhu=gX!|_ny4fdw7pGw)eWnCh6H<*=v}M9bQqW+-%^vLK< zNqt+Tv!c|Z{zmzIEaqnw+`RPE?ci;vHSUGJSaiLqz^&@XBmZ5;`4-jw|Eu`-Zqzr; z-{I4*F4>uXH~Ve;*V+UP#sbb087`8451egS#+&*fJ^nyTg13>9!T0&D`OB^PWrU== z)J1Ym?ljo;Xk+A#cJ@6M%|*}t#O3QB7HT#K;Br)`xopy!_~Giw=m5>>H}95v9k|lu zJEQLRgIljZOxn|2@4b~nV{MV7pz($o;lJPg3!a-~kRPZce6sfY`TM0W-W9H1KW+Wj zm9{av%qnQ9WUx1=VqIJcH#RNgWVrIPnOm`+8SPNZghRsm%A_jb-ayNe{o`oks+@v=d6TF z1~=aMK5|T!)pU@KJ!do3rtqJozgYaW-1Xm%CCq%@+IFpifBiBx8+6zVTn_y^vfxgY(vO+8vgh7Cy4k#;A-P>HIEC3+^x`242H^sh1$hcavlu+s zw@hFVSY(~hz_3E`kP}nTtQjZYu zn92P?!^X<>M#axf88;L>2#_*baPG~FCFzn^;)H{ugJ0-oeD>gDzOP@{JYRlVtxMtq ziI+7u^#!J${n5;lKI7t)?M8{4%hk^+h#vWJ>9_i0^$vqF-Iw|e5_e6UyWc)|H%D;( z{_k(zi~L@+(e~f^5_hYsQF~_9H~E`xwa&;iT-eWNQ(#fux-ZLkPes=9xRx;Im8X4m z-^{Cdx#DH+{RdypR)2E7(k|+JfbmC)Ni=gs;e)^`pZrL#Ofgg6d&f$A&(zIzFTU?^ z@Wr&~d+~KI91n}{Y|txY=Z|}}*T5^=bEBFkv%_q!tv{++R^6~%^Dj_uMnmcMwACf& z*h{}2U*BdR{`yX?C2!b~T{RJ=FLV-piwe%|(-M|%t~he_bA++#gzATVWs+MH&bSs% ziTZF!WAE!GIUkAX;?IS)Jpcb}Vxc_u$-32XbIm_`KPctOX8sZV!K#(LXZPKI`W1`X z9T|S_-uLd?_Qo2)Wl8VyZ-oo}|Hi4j`l)aK!LU;)CI-8>TJmL|#0jt~^3T{WePW`o zk4Htp+DCzV^Xxx&aIaQcnDXJk{|_6Z>z=9Sf1cPcmHkdeQ}z1elV97rYRr75v3Onn z@cifGw&=AwIc+V*40`+L_}{m3WXW7w*xm7BW9eL%^vUbLoSI>@qMw6@^WdD6`%Df? zuJU%)mKXfho{{tYgGHtKk9vhG2N``T*3?Mv2A?6w-;6I!~0!@(su?+-siM~$40K(V;UtIJ=T zTMF_-6c&sA>|j9%bCWqWgBy;F*Bq_Z#g-j2+*mFAoh4Y$ z-+tfM!<%Mn?Mj?>d!7l~i$`ZW$y2I*Y{NZTxjIOPfp)!_I}y^>b~Iph8BxQ*1L~>FF*N!*@S(KfWnM| zhE)oWO{0&nPoE_E)U?dl(t7fqW$PtX^u*nZq9+=J3$$~m^O=UdID0T4VcD}Qmwk5K zeI-9b{YOY+LR+h;V3T~C_DsgBjv0;nPnWjr)q5{K`N*2^@Ab}JEc&YLbtNDFr5xQ7 z$hhaZgM9oe4!2vncbZt8VuWXPFFM%&kH>Tm_r)p4BVMV^zkWk`YJJ??y27P;XDW{W zIW)WH^4^@h_%2`P#zRX?br{l?EYtJgYj1d{l=a8r{37Y=d>=Y$BUgL*=Uu<5ch3Cw z-&Z=@7gtot*fJM+J9_e#l4!Yz z>(4itnQ33xTcLC%xLeTs`(-unst2>H{L7F3@R{_p@{)^=dC-lV=%9jpqax>`$euk< z{0#2B;^5fz?aHja%|8p$jP^Su+cN)1I#jLG-qL>Mx2o{JhjsRb%!_MlC#Yo3cd7s2 zxL5SWhW^Kb)~c&bxBr>9H6~`d{fxd}pE;kMx_wae$EV}wWwIi(cd8e2Sb11FGq>c& zRNg=ivhv|MHyrBcfH?h1*&WM7j6xo&`^H;Q(b++u9baG=>k9hiOlDi zm5_b0HPWH{vX*)QU!-0C-rw<@Ogt+U)-FH#Xx7H%r#Fk7_z};!+i~}q9htk<{_~5! zB-!xbl=83O&1|cq8rqA$-!NeOzEpef=H<6@tj+l|YWf=+B_^dE4mc6;+i>^&#A$JD zbvvguvgWQ^7F4c{i%N|KX3N4%BuZ)rnT5#+-c{( z>c9H^AKvLPAFl0HKU!?~I4WqGTxTq;Bp}|uo`qEKu1tz{G{byRn1}Cdu*gTy0R_3SKyVJqkY4MlC zr3H?ehVsiDzL%r1;Fw;r#~&{K)M)v>S%>+X({yH}zJGJc!|8EHbo~d*!(REHm3zy|T)Av-~`cZahZJYU~UrSUcB)EM2D(aj$`B`gVmCS?-oA)eQ z#ON2L^I2+DTb%IWW0Bb_n?8y$KsUfki9@KmlBY)_2mo-{rkp~oSUv2%s` ztnSH@Q_nW_d{cM&bMw83vi_fgjvU#dpR!fgh1Pz*=O2Gnvj22;Z2#VOze@l7QC@c6 z``?r4zh3Q~^y2%jc1y+ha~WSJ-~0ddX0FVunR@r%2yh&jI`w#y-=`l;Iu4)XI-~B- zzk1oQXujT`eJiqDlfC!KlrAzjYL*eVt0eZ=6HBFEuPmIp^zYn~Q#zX>IziKDo%?(R z%gNsa1;US{J=^`u$?vV6T2)J?aoLY&2R42R{H9~S<54R|WamTUg{gdNj^9vt#ItG9 z|5N><+yA8gGH7HsJTx!MbH?V5=?cGD3cqdB433x<5wOTYZ*#KO#59!$cmByg{8Pri zVV&`v`%Ab|jsD&am@8ym_Otzn<*RHvRyl#0>93Ov+I5!(ehPCqcC7l6jAiY*%*M#_ zvZtqSZ94eR^^Wg9pDouHr`}N1e7W#OZtVSM8{dEW^y{byP%BH8rH5G)sDXeQy7{I&Pj{?ga+ z>znsQ?cAjBHGSEt)oZ->-&-gx^ZK5MdH|z}NQZ|I7jM%-hYc5WRJhrg9tJG9p(V)0 z>X8`8IZr*>e}3Q3Z|~Pvmb5;7^(U_Iyrik8g|xj*e%{gTHMd*lp6C3q-#u_Gcjb%8 zWzY5(RlmC~|4HO(+u8reTa=~PZ*Ti6ld*2!dail@NxZd+^yVW_}iyI>O>;y8D zs#B(KesD>TB{6K9{f6)z+y6|ty0!HCmHb#IyT>>AZr&=nKk3#yulruH+UGtt6qHq2 zZFb#!eqrMA1-FmxZB6@e>yL8p6xleT85-`s_I;mvan-%uyLW|Nn>sn>Ufilz1vYkTk1yZ)YVY|Y zrB$D2|GS)*psYS!xAx^AGP4anU8^`TEY2 z{sq76@%Z~}``@gRKacN^`1>dAE z&E8NqZTZ7>l7;yXzkPkN{>SE+Ti&1d$v@YAdB3+tJSF(z75^0%&h4o_WoPoHN-XbI zwDqZOy~}o<^UKZ^3Z|4#KD+PU>I{Qj&9+|{?jD&}XErIoL4;XL^6^RIEi!7oVHsDy z?z-x&8X3HNVSeP5h1V^b=M>1KFG&t}FIJX#Vmz-^LtInf;Ip02*GXMVQaY@0N}W0N z6d$A3l?f^?T%Aph2{U3e#JJd)4h1U2Xb5w3Iwc9ra8Xk<-nU)!=gafQms>sW4N868 z`z&kgu7{zJaeZPzPR*@qkh`=qu&<9&)Dy`<>l6O%Qp$kxVi7U$GeB8Z%>!7 zt$&_)?*BE>Y1?<*>{H|wdVed(cmE+j$&Md~9Ll0gh`2qLv0GvomUiOi@u0BkF0t!q?09)z;R0ecUgbRlN4zp6}->r_Q{xbc^LKixRuV`QLm0tKt#KKIEe>>RxaChOKh9ycIS47J?1iDz?iJCV&U{ss= zcdCR=%u1)D9?NZCdv1BXKu;{>*ul(S!uKr7^BMl~CZGJh?DoSN({p#ub$*+$x!;O+ zhggQ!o>kA)at`e{>EfYlT6+4FS?`Gj&c5sJ&7D#jTzvAvd5vSqS%!zxjE#G9RyI#N z^L@^vu$Gr}()h^%^v?@3daHFl?hJP^|S9$gBS67d0X=RLK z4Pxd#@c*OxCy&GDk6JqjII%l39-LF%Qs#KLQcLyz!;PArcOU9H2G{TiZr{T-NpkyT z_wT#D3B~2zt`)z&Kkl!a!UOlX-Elwdl()XWx^JTO*Hhk6f2$v*?Am?7<>-T$W1H%n zH#kW#|8Qt*>{?YcmCd|YWwOVab?dI5IWu#^%`CN>*0ZKc?6vuqa;085bxZR%tHZwq zogPj}*kR0mOQXslfcM~A4dxXGq+F#BN*r}AdT&xwrr?oL*J^Q?O{(_xHJ9~&%2zXK zOwd-`HK(cEz1r*jrvQ1OKN~Z`AEczVEGqK;(|2#C@t;j+C;bpT-s+TcO7_F!yD4XP zbWM$)aq{ziG0tPtWd*iAJIZG6`RiHF4$oNMz%cnm+YfsM?YMc4^C0`IhV4HNn#`}Y zVSAtXEbi?y%Tkg5hZ1@W9hOf%X;`(6?{oTZr{7y&U9Iwtuk|goUR$@)F+s14!I}%5SfDE5zL1C-(i+Y1y6cW?PxPXNm~O`xrfMd2wL$ zv^)9%K4(O)uqTGJ1^%#J*lByPm}lh^Rh0#=l6W^}2p?Ee&~Q0UJLgQaWv<;Sx%vAI z{2 z54oK+(hEMirZ2v|yX*UvH%_xMr)~HxaHsZ|_%;sCsI_5R4Z6^hure8hvwfk$P`P-zw@#@$AU;Hh0dj8!6O=oqL2!(6sX8r$lPEJ8EqP@JHZNaiT zGT!YDh8tFgf3z!Kcb=(j!2>nt`1$+ZmHL+WY};A2x^A!Pwbz&K`M-Zzy`MQF{)mKz z6~o@m2ljlHX?f+7zsdwuoe{e4%T zd_Dd3(7LSejVbEg>I~a{TwMM1lZryxW_2l#C)1lJ#cYs#Z^J(C zSXO@Vu8!LJms%TZvb8q!Zg>Bp`zN*f>(%_KHR0#>mi}9_K5Sps*Y(%-M@>FgS)3ey z#K1wj;PgK>%K+N~4~89Q9)=uXS{2K{%ea{M(N$K*ICCXFykE>U+wrz%Xlhq@#sA(vpMB3glF9uR>1o5uz0hFN zw&=Aj2e(-BJxpW&_%mJcpBeuv%?&M#erYNN=j+#`^7&k=sjQ#(*0iUhNx9+JGDZW{ z!WI1QgH)5R?EPUo{O{Z|z0+gidroEBBu3$D1$PUV&ldTk&--HzfA{G(ODC+AUOGWE zto!4JWaEN%NyEII-}Rr?nN_6D3ibY{aVPvFqnMWY#g+E|iFE(G;fkxbR8N}L z#;M9y8?v{q_kZlQU`yP^iu{$Y98xnr-}gHz*y`FUsrQ%U=Kc@dbkN-CiQ&(B-cp{W{SOqU-r2a=A$RhF6DJnu)n%$2Tu@kl zlDGJhwxV2OVDtU^ZjKVKmF(^}B?`Z`m{@dHrKUqTY||q}u@cGUj8Eh1x-Z|Y+VsJo z?}L5EJtMY_Q3t%bCPXZ~U%rO5o=4tv$-4!LJL2wo9}_5;cy@)LGV31ht=D9o<~{qj zr$@ratI*iepED)Z2vDX?zY7yIj}tGDKA1btKcB1C6p((i?65+ZW3p_YH0hOOa@Q zJB3wX?Mv?UoOUmMxZPgZYUQf%I==2<(D_P_HmT2R-DmxesCvQwB5;Mcrod;h-@E?B z_9gvQSd+Te>9)Hcd&$#9a$6TJip^0@`*!jB{}&&t7kfAUnEHNtP57_PcdPg7@0|Z& z!XM{pb_Ib;_~rQ7r!=0B;}h7K9LJ&%k~lfZ{XbJeMWK+lr}j0UH4*}%OdB^=A5Chw z!+7`O?i~%x4S7186HHtDD;#{>ESCjN4!h|ZE6;vVV9f^x0nP_;J_i_{NiFz!=cfX{ zPIuFek_oam^tW05Xs`dHW%^!9=uC{ph1H3l^(@y|+Sh$>Y%rMG_-pRW72j^U3CMrE zqQk8f$Q*9}*0)>hfcuX6pKtH&-ecwSL`QAf83Qk#0+TIbhl1X_b-p-#Y{&D>EF!y( z9nG^#d~@9GGI zyVWDv9>v$a?DOq`!N`|&B;1Z|5m zdXKo6Z<)!zR&uXjOZT&?Wv#V|FTOkY)wxWoTDxw~+7{mJ4wtWY%a~qSAu0RAbxl>> zqAS*tMGsu(1t|nhd2Xj1b}b}i+k3t_E<%&#-pHMJzCZhv^E}u0_mqCF=*XOE!`V;DHp`bl& zK115;Tw6Ee=ayxA|IhYtem{#(xIDh+kH8koU$IKtbH3Mfe7KMic>7;E_sy68j^8@I zQP9A}XXzi--G9^THl*pZZ1`bYZZ2Y6eBiU$2hAB$?JNQwcAJiU-l6e&@)Z^ff&E;k z1^Ai%d`jStej3a8lb461E%FkxTk4wMjM0yF>}H&*@vynSldFnX#Cqlg>u-}PdZgZN zo)vsJ-u~o|UrjF3Z3z;|J2D=2EuH@G%gW07FGX4(6_@TWb)MbAqWa;{)Ay;hZxiGB zelES6By9i2RsHSVUmkn!t#fcbn%o(q7<1|Fe}28i@vi^>FMXNkv%kK?XEn<~k#f;R54tsC zHx=JV`MWpAE={sz76zw0-ngXjGVj$=B@#nx0yg*D!Ry`~AK4>$lb>-m02;D(XAR(xf*WDwCYN z&hvAp#MCCfr5~0peA9C2-50IOQ~Re2u8y1RALqJ8wLZWjV&AW?pNnd*<`sLb6@1t( zbNS!y74z-|KRj2kxr_&=@93%Bb3yOGWQu;GkkP2OML^}qgz9s3l=Usr$o z!`T;W*41!sU;E`@-F}wvn#WK7{o)kda_2!qrL^zEn*j@^q~7~`x2`eqC-03U!wH>h z_=A3*j=iL^VYb^((_r_N*-Sg_*7S8XJrQ=UGJ4f$xWVmWTl0gK4Xnb#*El?{o|0+W z@?o0M{`slr6DBsA{Ez2g;!Bcn;uJdBkhFiJnE1}q58Rg&eX`n6E&8`_v&-wt@rvt@ z<$k=sRb^j?!6c0v^L=xtpINfxuU`9x%9wN(fwMw(v+U1DCp_61xzV#HOXTFOl#wa4gWMWM|n7!XHX6Rjat=hzb7Jg!5VU72>> zs&}lp`26e{1NZqpa(>n)qH1?dyvjDiqIc)ld7qJ`)9uB?#T z_+?_SO;b+q3#Nta=YLKAyhHqcb?a^u=NoqatBZRSbHbNdGkN}aBrh{VZ0+I+1#d!K zW~FI=`eLx{eA_{roNXVO%uZUq&COg8rW?Cdf5HhDHxW&ta)BqM_gO#9l~~ZJ@OZ{t zmXEzJFWmEsW(awn)h8{?zCt1S+ljpmVN#eXOx*&q(q8`mqs62VsZA1v&4>e7EAqKg4gcs>%Y3|+yAHc-?#;f z71i1=VtvlR)+aq(n8W&abBo08u8mv&KkP}fOmb)|y--^!AYZg(-hxwq9ws%Y)UB{6 ztlv0UQQf1)?cSc-Wy+Nt7A@iyc|A?6`uhu}v|X;>P`fy-!HSjyEzqyDE0?+Rt{`K@}N|KT{#O%yW(tb^U*Uc=xeviNW z!O$;fOsBj*tM|4-S!?IL_)|wdII`_;mc3zG-*WQNq5cK7lik;^yzjNyo6Bj7- zwQW*eJ~8rNF8-C8BgMASAn|O0nfD{1DWA=L@CFoT$9F&QP#Ds4)8#31{!U zve^Iq#opj6d5b=>RzE*}+4W@7j>4Drg~1_$NhKGnmR+%ae&(aZdD%%!Uch`(XkBfV%@Rf9r$056>`a8FgqLs2k1tnWnB#rc&!px3S1-NSC4SFCmS26e z`o7iP#Pg-6{fp}bG-NBE{?nAX=`lSsDfy(tr#FX{%nBCYV_*=z;y6pMdlhS;xbEGB zwJ}$-&Ry-l+P&|8nD*p<{bAB!|NmF*+NtNg`rQZnmKq)jk2ETzv$Do!e0W=m~){K@YR9&BRM6gR#sdN`(4bB;mbmjcs2_2MdjJu;RZuX&?- zYW^Fq(yFgDDoaoOyXyVx)!w4r?;nZpQ}W&;&&u4Ed6;39?KA%m^`$SH`JNuUWN;(q z6sx9JdO{DofR*#K&f|$oY8nn|=5GBT^YLMThk3~nrUQ#b(mPHRl>C^Y%2)Bje)>h> zgEx;aj(@1NJ63bk>=Z3i_T-Fq!@{+S0xAvDSROvMyWjqy?4)4rxp=0J89a=)!k%fZ z<=O94nSF`tb}|`M-v`tl!#^!oqF; z`Ns2W8=kpIJ#gV+I~B7$w(sbPppQBgJ=K3X*I&~P|GirK`rY;GuWPMx?5?e^GJk!Q zMPu#Sf*p_N9F+ zA|x1B2#LfAwcp@oSbF1uz10IP(b+PEhbBh;SgiZTxpia2nje33l6tmjOC75#2#QqK ztB-2xvTHh~Es_+!O!~Hp@ud}4#J#$vd^O=aP(Cy4z*LX0uH&yYEe+$HB9`wD|C?G~ za_Z>5ReOux#vYizK>m-y%DwAKeKU@92(+@9uI^JtFE5nK74aMrod0(>n_}q8{cEQGd8=tHBVM1^ zIc>|lG`DcIbnj2cw`%U5w#D@7t4~q;z2mmtx~4H}^^WN?+h!ymSobnv#=B?Qb?ILu zB(948E#dNI4;HXEHT|ws@e0RJr(eI*D*f52aMk7ad-eJ2f7yf{S^fQfw$@?Mb%JZ| zy}G9Lb$@y7sZYiR$J3Thy|YsyHB4rH{5v@{(}^_)H5(wv((ebcMe;XlQ8M@{?V{y#KR zJ8a$6)mQ&bTdi~3BClrY51*r^2U*`s>lo-Y&N#^O%31pKSEFaJgvT z!q=S=vCpsG`L^~EW9iWxzu@&;C9n71mUcfjC#GHfhL_3hTcIiMtlnQ z>XU?~&zx=L>q=x-xW4_7I?MgikMISzKArz~&ZExz)I_sH0H;qJElTPO6-hd11d+nVKXfBXxtR%HK%M)hOOKWko| zHDVPoU774QD|*qyS@z5B#GCrQew6@aR}dR)%(Ml@d`-)z%f9N5<080lDH@|C}S=W9pV zfL(!R3%Aw2d>|5fyk6&cM`r#!1$+Lm9x0X=opA^D9^}s!RQdJKDg4%so((#;8eAXp z?Y|iRzP@Qri(sXVwYtE0`3rBX%#$8?KA5I^e_r5!^+wrP-VeNo_~m|B{3^(`n;^|O zEhIl6(9!1djyU(EvqyrvPkXO;c$Z`Q@t(b>c=Hl#8)MYY_m@*y9jvf z$nZCR{o$jWhuK+&o=~?^pNh777c44em4aidrhO4Ey8lPuORu{;tH_kD)U5gWr+Et1 z1#B#guH64oH1+Ae162+p8wBrp?R#bLO&fWMsiv;mx`_ z_C-F|UtcXhn|)w{1WRgp+5DITXPdfrEEXwGGtYkgT*O#UTsw}3iFMV@YaBdW%*kFp zf(QA;j$YZ>Vc~ovP(hygfjYC?2flZ|?gwn;zsEH$6PB_`qQ)$F;=tP5y1Ia}0T$H@F?U zS5O^0g(q*acIeKa%kd8yX0Zs)DdOOHV(}t1N-TU;`Mb+5UGh(UUthR2cjmmUQ3|R8 zDkgijEL3TmA-m)CXa4r&psNkV<&{zsCtQoZeCek?PoLzU^rR;{5f=xyuYBtle|*jBq#hff z^6tAle|_lTeKV(VEGXQuPuORAq2kkl|0^w``gGTSSgRAG7&x=gIk)-u(v@ZNpZ@8LV%c6%RlB^+ z!#&=X-ig=)J8ybES>L1|^j%66-jRD>3L^db9LPBk$qw zOTO!9JzleDZn2WLzt)-KUR_V})Gw5Aue+}^|NZ{P9j7WgNaH-53W{I|K$m_1UI zcjn~40{q{WEnHaj!C`{j{x=>%y+WTfV$S_;m*ee>dcR2|u*IJtINtWx>DCbYCXN{9 zz=X68Pcg>D^Ih#;_5~h1{~N`->#+M>!hC^xb!&5`NuY!{B@mj!OzR)?@ZQz`lH~Cribd;fv9v z`sjf_)(-P~UpTzxHM>^wGEUp^{Q|YQZ%a)dscyR=wsBV6tH+gl)7&S2Z}$AKefI?A zhTkn44$Q9+{+TrY%@d7ht#KvU79x8+ijk{NPK8F|MebI?Y&#N zQx^Q@$kUFKyw`MM*0TCJP0C^abZU3zgx|?d{Iqvfbk}d`v%b!k&!25KT6eN*QeEe+ zgSXF}X$+fid{VmWrdvNHyuN>1bVowKdfGRmHBW-8_PuZpXOBM3rg8MtoTmnpF3PLF z?@(&@T=%e3>94}X&$C|~{{7fYdRN~R`CI&+yDtiUIN}`k_0*K4_^03As?d&beWX!(XK+hkm=-)t|etn<3MI9F`z z>bA~=^9BC{w|3O8x2*s4x8z1mA73u_hTC#RdyIE}zFug-p?vh2_Rrc=@ocPzr+qvj zz*2MLE>m3NG6}!889T%@-JF9emP$@BwTPE_dhq6^H}!uScQO~)wU|EqaOn0ud2jdc zCK+-Y(%)3iDe+dae&a2bHs$_#<_mG{+LA&SQ(i95-V||!WxiD9^2cJ2e zpvN{NE)43*Rt_KKaxo%zR0h??c00 zZWa5zmnR&V^ds%esqeL#+r$rks7ts#&DA-A|7ZEX!%JC>FF!utQ?k>{qF%qcY?Frg z{mYtthYj3*UP-OH&-0y!<*8Og*{^$XcX!^}{X_Z}pTG;b_WP9&r{CDRCa*rzX~#4> z3snILJCA_&#hDQ*d@S=P#QuA}D*MSFj_otP{#UfPp_Bh2_kLkt$9f-62Fd2;Aj9(k z;frK57QDZ-$R&AGW0UIU{Mt1UCj`Bp@0nW0VY+WdMwjPy{o|g3&S_^q8co^xv+u7N z%dIM|)|H2Mcq!|iNjv}2j{XQMC@ip_a7r@T`B%t- zmOA4l`=%}wW?$REB)LrE;mxEw<>C*fURId+Wv`6cj`KXr{q~>ywoKhQ<3h4gr{%h9 zW;sy`=kL9A?%Q$r-BnZl>ig3~9{<}r#ojEzPxOv@#XspM>>&>8vqVmdebevip0?J* zn6+l5-FKJ6$#G9y>fe6b8v1?KhZp*S~ILd;#keGx2I)%^S?PCT(!XkGlYdk0xK&R1D|@>{<6VPLa)&x|zJ=1o0Q ztUAhTd6Sc7F!S+F=euuK zJUZ3A|55+C*qfyhFAj(8ToZrFVBVkl#av~lcst&8Pn&S<(w1}YTJ`UinKXT|dmoat z-=A=iDFIA}s&ac@sVIpVvPyOFpBfRxrhws05wNybv{Uadyc`9XE3?ZRT}w-D+g%NkLWZ+#3WeOBM|w{?**eu#@ZJlx zg6)lw0o&LWe9ZcaJ5y&HKmPb;oA!gB+w&itP(0G#x+k#9D*f~kiDxcyYbvlb7xs@--hGXx@Ia@B5UDSN88uEmP}0`Eu`{1#PGP zi`_aXqWYue@XNUUaSuAyiFvQTI$zI3!n<@$>J~r8zURl}ej8cVe{0$$5_9$O-Pr;E zs{U^?af}zQa9DKzkD%+JUuh=pm29PMnP2o)|L6Ib=pOxGntA$yYsG8+eeg~DmbEXg zkF#{%`bZvM>IBTq7!wpIo}{;o}(* zvh4lF&&4*@h`!^u$zx9rK6p-y@xlc8?67~DJenWA8%8Dvo_t_`u;)jijYnZngMG@X zZ8Hw^-S$6!A>Pb1a&rG9`>3t8sh)irU;7{Bs65*Dlc!nbtk)aod4IOPc>6wL{zmRg z5lSB#C9JCQ#r&UnnhNBzW%}kVj+tLKm5F<^sg6I>UW?U!b7vYV)IN6OH|Ke^X34QR zWqx~iO9sZ=5}DAOHP>^}`_TLMcJVE2`r>=j#`=zS?Wx1%owG~=PE9I0-yHO*;(Zb8 z)^!tuROEMN8kLw&`gZ#1|J$~;{wlGxV%q!q4ho8$^eW!qKmEAg&&uE{0_6$^d4lcB zOI5V`f4|o+F#fXQ|M3sgV@}Wa&l4)>kF9^O$y-HwnP$(iU2_lnedu5dc~kdjOVHuJ zuGY6gWAAD$ndJF5^TA@qs8`xc*__ro$SxMtEB{#Gw0>{SlE?Y0uDZ*9eDJ!pUtdqTbo_bs zuYUVozqRTelWn&ATOYUUvHnG=b?>{s{%KM@KmBRH%FM|ubrW>6q8~6Y2)a7X(&s+K zTEy}G!nAkuKYfhn{G)hl)3aT#udiPEerslJ{QiqU`s=%&b*?(9`{@T)f%D`$JOV0j z1QiavW4rhvphPTYdr^$VuaB+M-?L0`l)myLuDWmT!*5POpXO{zG}9+y6WB4&f1ujwd)TzS7={0n&A+*!TtZi z1rHbcSenlo_AJpiOemV_InGA7!B_0RKzWEy!GGi<5!_To;!;U z-k8U8f?#a z9^|l^UgPNMGHGpl#Z+4HH*w9Bvew9zodpNkFXpYwW4rCq8*$p>?)+shO3kAbTzfye zNZgpOejxeRH056_zIY3NnY(FMXU=UE=TCRDV&86G-+VARDW1J<8pD@qZ#=E1A3l&W zjlrRieTtCgbRh$?_3;-BUhQghpX9aiN&BB}{`#hc4?BB8R-c~ZuwstB`0ecZe$#vA zH*zZeFR#_zc-s5w-Mp1jU)*GWul^e`>DfVtCeEFm;p>hnym)kc8b4<~A8%avzZ(BY z%P-D*4{eZtZu;xoziQj9Q;&QpQq?X=i#hk>{}<`?+5a9CC!|mIJ?SMCoV(gck>PYr zVG6%s*4Ds6BW+2=2V6Q&ZMI3B_mw#Iq0woDX1x>d-`M6bO*^_7P@~fGNO3o7wf*-UH)3U{_c@mSI_*~w_$1aA)SEJ1!bQr_ddz-P`o4kvFP^irw285 z{NM4u|6J5NF|J8lLs$L$^7BXY$rC(R&o{mMZ}+oC>Ef^VdxHK%?RU{Hy1M#gX~@?w zY4JU6$G94-u9~Dbo$~345&gdG2ev~ zZPy>@y}Khnx9507>i5uJ9J_D5tzN&i_@Uj>|FQKfKW+X$vd#LNu-4Wjgv)Y6UViPA z$uF;;|Fpr!^P5r*)4dhyQnP#aTh(!CK1g2W8`k5mz3=tKsP}t~>W=Ku{HXduQ zvD5oy_cFp@+5?mQf18@iL~5tbbG~Qy$wgsd@(J}Te7Cf^4b9k%Jj1t5h*bJ&)V|Bf zbc;%41#*3+aHOjvS&+3@dQv6R)kRkI%{yw~6N^Jli`{;0Ch{tUDc2s*I%b9~`+Wy|ZW*5JA6_?ACt&+A;5Bs0Jw~i}v zx4U+EYf$%sjMFb;{~ofu!SB?pv-mN`(jPBXKm5xUS;DeDRw?q+!gWjU?pI&5C%vtQF5>6PUHy^9cU~MQN6{7@$`Y2WqU6j>|J&7 z!*><|_4QBwmHh2n&;DEd!B*}yDhaNir0vAI_shF;9BpL&aq;`*Ne5eQ|1=Gqu()pS z)F+CuI_cssI7A;`PgR(3=0|C@l+%R}8@0%phs>H~7JLw0%(y{^0Z&j|+>QK4fT^WPM-syDR!hyui(V%q!PnS$Q{E|~KYEb`hkCL<+-ewqB2ie|6jQp##}t58XJdh5@Mw&)d+ zhbH*dBnnBa^p?!(U$E};>i0~cTk6Zjzx8va?lL~C_@{LCcFXRF^JSuHuMOE|h5gyk zcjxB0u%B*RS$b7|q0Q5F?p_~%e@&ds_sPHYdq3CcerP{9qi28YW6zb+3Hx4iR-aDJ z44KO0t=n>i)67EhKy_Rj|C>m!Y2CI9+$VW{loUE~_xPqCb<8)-H@^RU$;{kghs@#I zuf0WUv^QFr94_^-y5JBYF-5@f>76*gD7RBR4|)3L3I~^Je%zEAGQ&t>>G7L2@-55? zUP)gT9=%lBocvGvqKJOF_UhLy&u%9wA3DE;`{?EU_K}lp?Z2}+3Hr0h?4L5>teLFa zBeq|$C$F5Ve3-QOmR?3)#3pAO-d*M5KI%E*|DXQnWZ&k#CT@2s_tj&!`?~UVj9vdH zCVfd-R{G1jb@B#H-TwhkKkoVPSMkNaJ5%54>8`H45wcY2=jZDlSANYoVc^AOkt@8w z?$||xa|?9rGi(K|V$S3%cCIY5`~399$;0BQN*qsC=p4GX?qlnuo+l!4@1K2*tXunk z{-^Jk7H?bNr+!5?i<5WN)J?yxyQ#ka5Gr!>YGcD&bBzrLYz`JRe@F-xll-OMW65`L zeaQBXx7x`^65oV_=^lU=3AP2OF&KYo0pcE`z9pZC-GnD{z_}ez2IPB3L@q=nruR8~BC@n2QDmy~Crh-j{G=IpBqKRsv3EQ#XaoAfTR z`8QXugnH7Px(4qB2Nh;EpO4;CJK^xRb(7Rne(n8m(?fuVFGJKksD&pg{{PXq3g^N~ zBeD0t{a>v4W0zhpXOm+&)mZCj%z0TMsn=8LdE8vsgDfVk<@I$?*Qeh!gdgoW`t7>GUY+A?Hc=5b9G`qIdv%aq`s~x23j%n| zPZ%y#SofT9L(0m9$DS0;U-x05M4VCm*%ussqCYJc{LOw?%PzJkrqnUpc43TJ_~!L< ztR%|frsmk13KfToPM5oPVeW&@uqpeW+t#-!PH?|H`}*#`RVRP0pWOZG%GZaN8UMxC zCgmw@?RfNKeva+z58FzW*cxM^SQ%e8>h5%kn`snj-K=|6R;9-JZohiN5jC@wMsM0B z@)l+W|Nq5%_@$qN)Q^t*pNARy_?y4;36#X;@ozA_)Skco`{e7>GxQ?cQlG{&KUw_k z%%Otm^DCamdx=avVLIhB>-FN7d!y>>IM>~EvRkp9x%YM6yPr0f`VQT z?U&CMEmao%?X2?C$H+$aye)^tq1ThYGBa7dS+6H;OY#C-zI?@dUT4KfbS;#b~_w@x#*&GrsZf z5WbOGqj)TOW(&(fQ;Vkp=8vu}+I4i2=@z%Ql_rrJ*JL&9zL?O}x4X4MaUVmG(O(<0 zMFslP4$6oImhHH*TFdlr(fY`mOl^V*nB`B*>wnX~HfmHy=n zY{8BdOA4!&FAYu2h?Z;06u7e__1JllZs$NLm8Q?->vIe}k~;m3xv$85a9{GwVfQx) zqmAWrfAz>p6z;edytP=7H}&Ygi*oxs|NfYqe6RY)zHjC3=e}-UA9OzbT>Q6>ffp}* zI$oI^c%EVZN`39SMQ#h6jwbd@yZ$8ZcG0Ax7KS%|-@d>8(Uu&m3+)na z3H#4qIVIW7BN)BZz2$50M8EGb`vQg3Te)_Mrz~DA#cg7iJpc8Y9HRYh&rS^u9i0CVwD#{_z^7nl6-X}-0bhPgL`1Y&EZT}xB zN?hv4%iRlV`}bVvaM`}!Od_UA=-2lBldrA%{dSi3w=CNyU;l2{_pYAbWx8+T>JKs2 zpDMiT)UAb|URDtNssH*$8sq)-;ZARAPlhhAZ?Vo&JTAq*>H0>#;D?9ana>pdIa%vJ zOU$ubcYmmE+ADnD@+U`Db(Pxlzfx^RLN#qOKWZLx4Ar*%+ud{b?t9_I+aIfm-Myi1 zJ+U)f*|xs?{ag0VN#WkrXLG*F&p-aZsp)RZhkKE4f3zm)2;IlGz`?l*J z`JdV(C=@04f4Z{!eme{M(&M&=?;K6qnRwvEvgozDE}7lhW2drl_jJAQD}Ee0Vxp5- z{885IQLI(;6$QuLcaJ%o zS^Q7yOPQ^b?&pAycWVCZlZjV!G>RzA6{uzmU3-`?4;%mJi^0CY6C7rh>Bk`9gVCrumeI>t&lxJXtFL#(I{|^x{ep-tb$&`S;5nO_gbT z_gLqfRC=50k^1=}yQcpr5t#sZv;H=64@uTB4vty`767p zyJvQ+-ZsD9;EnsAU2n`XZEN;(8VavCsPyCy_w=u3%Qj=A#btn=>e$2Tn2@$fk-Zx_$Nz!&N`OTYUSYxyPd z_%%WCS>J=MmTdGauU~uT)$#h+)%E|s?pnsbwru^aiB_wx?KA&cCAmI)UHsa*t-r4O ztO;LRxAoTEU;CRks)=S;d=h+h@aLfx9)YQB>hJw*-p@LwGutV4kL1*Q?|DyptSG;6 zi&fi4|IM!VTkKX}J+yWC<=nMpJL|S4@8(}ywrPF%?scl!PyhPXt*Kl8>w0!{!3Gb@ zJ8xY7nO?t z>8n+e%v*Rj$0=nPH)`m`%LN={bZB_keC+Ugr?mVZiARD$4wM935}Ex?H_F@m^w*{i z6CwGBH(x0Kd3W!A0sq!KhC6=GBmGyWAMSYg;k?NH9xLEBYm+=5cLyHVg`5C^%b!!V6OA?yS z{kX(<@Mw=iNY$e3PS?|iLhG*Wv)@{t9kW`@^5eN{;YX?kojTXQ`mp}i#DBhhW!h_& z+11b4^&x+9htKs>MY7X3d+z-5TmNU^O`l1zk7xL{s@rVS7VWwdrkx@CsU_lQS({;N zkz1#pH=D*wj)Lc`Y#K*)R6I=9No^L9cvBNS-+S%j)fG)03CkwPSKhQZrTzZEZ^o9R9J_)Z`XkXFj zXw2G{+LEq+@5P)6;jw?26xXs&eY>ViKpCaJ}K}g6vnKcR#&$ z`H{Rof6mK8M>T)H(*JS$>#D7pdihcN!{54!RvlFpzIt)K$oek^Dcg9mSa<>wEfz+e z{8Knf@aSu|dnFyQF>Ez^eBC%c+$&+~G+rDcQN?;tpSSYT_0LkzPZg!yUobbu*K5u6 z-nhS2)$60)JwE!aH~ix7(5Ff)mj7>aoH^Rx+x9fs^ZJq2oeO7e3BMfsYvbMEy0xM& zHrA~?{9wVHz3eNTj%~Tz*4Zv~ZRNCS@&0`}#!LEF3VIkwzPr96O2WaMDNoAIwl{BY z*o*!1Bm_GOq})!+%wx_!WF)Y4di25F{nq-oA{ROzHmYpTWqxBmtzSZ#P3PYZsqJUD zc7ADazdc`g)uCUvW-YlP5?mnrXThFJ=L$MFdH*lcn}5w^zs=S{>x|%kJEFhyNb9y& zJxXV&F8>wBv9C;j`rR*$^(K>i8NZxeDdw6JwnE?*mkita$E&SBA6EZkYg-oQvhZs1 ztgQcCTeLXs#W_W6Gw00|%Y9{MvPM#-w){#^&HFN*o!4Hc)>q9^>Rr8io&oEJkH(Y5 za=gSMFHi40o#)tiNPKx2pO8h~za^KvCj8s$BvfX&?3A5ryxzN)ywf>P2GmPGFq_0G z7d5jdcR`|7teIDJ&f#r)vTo10b-+>a>#@3py?Zv*T-Uo{9@ixEzaUbN??+Sfn=Qdo z1zM9PzAv1*ucNka&)S+Frn38{uuPs5_inbx=0GC{%fk#4qMts`O$cmd?-tp!GxHDU zQyytOj)Gqs84{AF99SwR@P3_aU`^)f)`QIL%&o_mycAg~4?9X*{je_){bJt1yD0v} zWp&-Uhi`iqUfNuc^q|Ggw(^p|+{#Nn$F;-GYuWErfx%d@`Bk-QvoS(ti!@@k-7|gLp0kr0lY% zUwNip4qf56c-f=+R~)-_Y^tsv41M?bN!FQtA7|e>{p+5*+Jo8E{r6u8A2grdso1an zW3h`bd+p=2>FeX0yiYBjzCwBGqz7d)B0l`tvqa#?^J_ms7Iw~@x@(pr!~JUZnn+8b z<3DS|3*K(s`aP`3-EZw;q3AagGTo!DPqU9(_$Z*}OS#Xv&M8McS5EB^SvKp;f7S0H zK92PtK4b`YK0Utj)9DXu45Sh>KBpe!%ez*;hD)*YlzZOC4u=AHy944p`4J8_-0iKb zUFS-EczwJh^1)-P&4I72DlH{bF4R5pIm`7SAfL7Qn_EYOy!;XwJ9=9zV7#hr$4!m=+uY`o0@nEZDoC>oBhE(QnI{`Q}0h?eLTzN zP>1$s&3c7v?$n67X8G=0W#aM8er9vvD#Q1m=gafo*PB@BmiqONc9=`^>!zJ16X&k) zIkq5l{|;5PoeR_Fu30m6_vfiK6*)dX?X&yV%TDLMFC374{O-j%wRrXNpFZ#2zuGYS zt5iViIZcj<=0TB*+$?XuWO>};(kA-K_0XReI(w3|)7zHyF#g&9m;HgyZ|1-04i9bL z%{_hTd;iIg8?&DHxn0<{m7}Dx$~nDn`h)oyjZ>9=Q{lD9| z{~cu5zVU~O*sn~-Je$yW{_CVyDla|#Pw?8;xc48Nw|$Jtzx&Ry|K6UbF&(}8_E<_w zK5n?w{Y~&)oqF%qigIVOyI)?NKDqk+v<>fa^n}A7D9(T6e#x8thR<8AYv+W7I6Ajn z?(aR)+$W!7nmt)*C+Cb8bC*PY(!X-*DC3kg!ABp3ICx~Q)VM27J8>=cf%*DHhq$&^VAl||JrnzeTPw{ z2wSz!pY`GL&6|w)YbJ~K&dfgH`JiOh*8V&5wuT4(GY`l<|4VrN#nt-n|Ng9>=W^}a z{vRn*Bo8qh+aG-RhnkK+t#b30yRY?StRsW}L@6-Pe1ujmPaXk3NrAc}H z^g}IwGXGxE-M#l-oK?>6e@pi@l)O4!*z^CxgDK}Yq*NcOHP(OVZ`;*z;jzN0khR$h zrP}O&)Ld$NARIJNjz4R|{G!uRTTWh|ACPvt%fw)MPY#FuAAza&yFdInR?g$OO=!_^ z{pH>pjpYvVihek>W$W(9*zYe+2d%E2V{G=-D|bunlfU0j8ZdY$ve?UCtJ(kM;h$=q z(>>A0kFb56bNO(|sdY_1)b?0(FU)sdDNbw|<%d7GwzDsIYjXUL&~-ICJMP^* zzxT)3^WOU!7VP}bBVj_T^sP7gVi~nu>$#pTKV-%#A?OfuW6F)+zeQhisB#!G39=ti zpLcUfMd+MFH{MTId*vDLD^84<-Oh4BSLB8E0j~!3r#^||m=gYOG2P>4EipR~KYB zh-I~G+)%;S;=;AcBE{oOylsT=KklvRS2iSnoo=aXym|FOu@r88zS17^AKDA23Pw!1 z>==9N^+y+D!Bg1+_0PrrNA8N3R%GL`IV*ax=j)kk^&1aP^2!uByfm!Sc3Sac7PeoEN%Mms3`KP_j5W z;qLkJr{W^p%2)H={WWXH_oO>2^_PF$-LS0A`_M1(&AU0iZdluP=H|u(MSaf$e#R5_ zCuIM%*z<9nwNzlC8MpDDJu!P659A-Tm{XW#ePmnr%D`z}seh_3UMm!APD$WBc12j? zSkTFzfq&+d`(1dZ`aDyzD*I2e%)XL$a_ycCfAh5&P1-n*sz1%Go!`G?-_acIuyg-^ zxK_BYFFjm&qQ&2}u65qb(g}exUapr3aQY$jDC@*zKCNqM5{2!TAFhiE{?G5U`c+<- z?SH|!zwgH_-~N2%M2Bx2=9=thXDV)fIAhwMSGzluPj$Vow6~4fy?3pR!`yr4e#g3Y z%(ju*FUh@gb?~uiiu+F`pZY%ee=wu`?-L6D4V%l>I9=xcuMsxWSY*#@rmk!Gu8+GF ze>QRbT`AP3Uv9tO!;wesD&LW9j8TU_w;f)8l9$XPHon=;EGpMJ zfBdXH_AHTM$(27l_&W8ok1S`p=oj_E#6ZZ6)N zKmFOCC9Imp0p))t&F0KnRyV0&-Hg0S`_rq8b&u^o`Ec+1NzS?{KXoPsb_l;vc>L{v z{lAHR7k_4Fco*9ieSWe}uyV(q+DTmN*gb#l?A2}EzFfe?-DHN}-Zv55o@?K{skiv! zyrKNb)Op{kZX9t~uu(?mDp}*T288ZO#0pUF%N&{{Q04AEA_|`^{hUX?|T+4ttV=9 zr?Yz>xH8xCxPrZ4`EBXQGYX7KpyQ9SA`TX*oUYz!KDDld;a2G0eU9EyvzyCjf6m$Z zd5Y!vbku66n7_ZvZ2S^H(VLq zDsCRG4NOVgtKD|YIF#qHr2K!&zxk`yul}?7{{Nupo;S}6ew|xAFHLoE>EEkG*MFD) zI#VOlc4wBy{)%T7tWLu7wB|YbToin$R>QEAY4UrvkN{&jRfEEtSDrgw$zWf8o4LGA zYIm``^b3!xw`Aw{GTqqK@AN15e1u`dT#G*$3d%hX#XH|D@>1t`z^E|u?<@(OkV8e9 z=VIf&&AY`IaIUB*?J3)I-HWBA*?V=%-^@y=VP7v6`}3rH_42OwzJVX>PXB#k_Wx5O zPnzJJP1l0HFtqvxxm{KI*pjk`~u+-*(~{4=bN9pd;PBBbnLJaM7j%qbc-oSvUZ z3H>lbXO?DQ;)YLkJ-qf+a@$(^-B-xI=VNc(#$1tEc1dii-`y=sqo-h{5(6Mxs1{KdibkZ{oa*!|4-J~pUQl*PtP#+TT`jYVzwzy5BQ0< ziEmDF=)Tc?H^b^Pn;ZM#f_Rxj@A(*0UcG$7apqrX>HqD$YuCNl5?C~yb*1L*#LPFl z-p$LbtF7l!fA{j^Q(wQ7?Xl)%eOJ#_xn=*o@YyQmyVKeHMlOpvmU8n8s?R7?zj;%f zap1~jj#KV(wmW^}#`3>~;$ZNVIu~$}nnNnV_N~)Y{UNup%HwgqxMg z$zZ_=4L2^<7WJUY?sN`2qI>j%x(Z9aJZXt+b>0;kIL<(SRDbSEf?(d7MpFO#TKcT)VOuQ{RoLN9vJf(MK$A``TP zgjkvmI&Pe!BO=Jk;^<@$pd}*Aa!9Rs?LNEPSJPz=d2N_nEEv2h){kAe;Pk(z^V=SN z>{C9gGC7oQ`QPf2vjHbh->S-;qWUj5@VVYi#~;P>u2;Lwc1`^IPW-#J)%#`x2l?OI zPIB{n-T&ckS;^Oypv2D)cIj)@2A})Lx6U{F%9IUP-Xu5uxvo9=@2mbFdtyI5i4Bh5 z-d>(3H6`qH_s5(~pZvSlX@9ttv06p;=vr2eOeQ)0_uC}iw@ut|pl#-n2Zq|uPwu#< zT<9a>;hk3fARO-yt(GD7yVyTS7A~Ze=GMNr|#|h|0}qz zus@8f?M*%QFXL`~++uYn>r?JqkL|wy+{|UN=DyVIb%DJl)k#UY$-jSBN=|6mC~;MN z&qbv+Io1D{JF=raf_o!#m#@8ea)rO((+?*e z{Cu|8^sCx-(GM~w*KE9c*Dg3;uSaX2z^3Ji51HP*s(&B<^47bZ;Ya6di#)nqpHlG3 z_}c$~FZ1uuIQqKkb5z>PIqs0EA8$7JN^FpM)CFE&4sm+XI{szt*!qko*nyd)9R>KAur}Fd;IEoQEXrlUwrJp zb)l=v@7`zJW4?gJ@?e1~4@a1Qys6eF#-(#Mg!jvOT;7mn&J-!Gu)ZbP%_u`_v7b$b zsJ>Y|6UR*Ewj6=N=&5cieV#{JKC8C)9uh0n=rcWP;uc{e)6ZL$`$gn^zO(-0ynikG zEOX26XWd^JdVkl~`1&>RtIOloOuD{%_4}>K5d|6(V)`>L3q2}}{*x_ZBO>9xXG?+h z$9LuRJev-jZ9bEyKSf0&U*^+VqtxlLOBmReo-tsMx^!KJfn!p`0|u8V<YEp3(ld z+PTK0JGRC5aVa&Jww!#3l4v#oy{#=18B-q^YN zvy+*?gcJ@fr^5$&71r9xWq90QRGqoGv5NcD)@>ULR&Y32NzY+O>Z#Q5>IBo30q+J>$$Z{ZnlX_m7&KnjD+QD;xRzY7UKJzoYR&#v zk6j$~?mjc*QgpW!iF+aPueHQ?UGC+(HoG?$#jE^TTNk_B$kj>WN+iGN4HvFmF7k$U z@dxIx205s0mGoZmSt>xccK#3f=KC9jZ=PO%Vg1Eb(~s0Un(uMj8ymK^{MMG(Ys+R$ z6Y|pjv}wZg$7<5kFSQf}n_Sz=-ub|HZc*CB(+}K#JH6WO_AdMK+tPBuil*5!CC$CUfRaEXjwy7lQJ9R94(sjkwUlzV;_q~*(VD`)C8Ux;p@-iQ#7u{ZQNs2AE=RDy}rC|uf@91MmKr>rsh?bLOnJ1 zNO$vb#W2q}Gu7xp>-s#7@ACdF+cnp?|1WQ1dOT%!cmSX7S<`P4iJJsO**7?b3wR2f zvle9TJP!|DesSXzIS>0ixrDM++}m(%(&(I z$Fk8t{t(-}5OZ}0v-WNJ=4bMAlN5k>BZ5OHROj*Dw8cKa9bu73XBK18IYqhXt_;k~>5 zhJVs`ADrONYRJIg%p}3Iv*S=KThqZKCo)%OzdX&t&bop5^PDJsyBvu^an8TO`#DY} zU-dD%YNh>0-9N?i)5hB8>-#3%+p3>@>V0bUS@nOMoSn}i>-Gy8b{xC#_22rF@8df* zCrmImi9CG!;mf=g##L;E%M31wdr!I)pV8vKUy19Sm~gDujMby(fD1w?rB-m z|2v=MWwuLYog^jOqqVl;MMAVz(N>W!I#Jo_H-Z*R^{slb z?`jHf?THK4&NyhGVEex3o^|)_{LN-er%bubXBoKMPbf2szpsCN>DTwycU}*&(o$&( z+xvOph497uU*)XVDcQ6jLs+&#ao>-bcJ*f+Hhew!C1yX%vRU#hzT!_O)ouM&q`#;C zPurcJ>X&@Ix@3j*-ghnAs2KQ2?C|W>33t>VOtn~QcUXG2?%l}T|9>)1oo$qHJe>Ku z(eB-H?&C)f_0+^K@{}}x&{V9)@yC8*UD>rg_qa-YP5xNVuD&wqgx40SE*mdr@2CGy zb4l*qsW5%jgURpjOPM8o!@dzFz&V*=jnaT=6*rtcCwL* z_33B+$j1F^v=K~KRq^+iF}cIM|AxV;&yzY;1(P2($Qf(i3u^kjC&R4%*rcaBN*-_S zdTL`J-`i!ih3Bc?g9g9p|2PyY<~AQ-^87G=qP=Lxs}!3(eIJgwakGX_e0F8)pM8bx zA}e_{_~gv>jvOicY#zLH$J1|Z4O)-7nmJMq-2AXSmWPG^LZHJJhE01ISlOj|ST-LK zKbhEa;9he$$6U!4IhKuE)@42pxc>C(R7K^I5DA5(wT~Uym@6LDzrJg}drO1U68^30 z+U>s|kCHqsW|{qL$)2uh{NnBH28MUv3bAiZX4GEUnVh4!UENE@-}Xb&N7Iaj;jRMa zN0j+1pp2 z3Y_;R^6j+0e^9P{)xxeeMO6{w<@$eSSd$xwf(_>Jm$8MWV2VmNF6>A|Jf`b$bG1qDawDlvm(!$Pq(m_|8>y9&8sGR8=F}Fy11tLz#Wy{JJWL| z6x9;v%ulh}k@?=kcG-(HK~vRv&y~x~7qH@rUb<&pcRWLxQQOHK3fy;}ORI%4aNM`3 zov84mzfu43R^umb9VX|l>f~EST)Wz_1w|IQKa+}Ruok#{DTBF$Z~G>W zq}vvc1e?A;_q&Ti_tnJ6t_z2l461_Alq`J5B{q4J5{O6Ao;#sCY6A-_9_bpraCr=L1zg#a) zIkuRekF;<(H)rjdl0%xy9<fe z?XsFyG0&PsX5Q(C1`QvNHmv3CIrjH-UY1jhsQ2>oM_ZVtm$m(Q7cF|Pf|=vWwl!}! z#Mk``ns;05$F9ratM|{O?&tI zJ^N!X%zE|q_VoL!SbzJ~tzQ3l&OxbrhebEO`}%jr$9adt_bTvBaFqR`y2bA}KW|*q zwV}k84^}IDhTQ`1>6_ z`)>KaIdHeOzBS|7tf}UObA1`tq<%ZSH|xPBVbjQ+J*D3dhl|ge|K3K_A^S8xmqq1! zUCI06E$0O9zI*rX-Pcxy4)@N)KHI}pZw!~LS6CP8ZMHQ`vgy-)qgi$DoNgOV&%ZiN zq4m*6tHPXr{#R6Hvbc%H&UnAe>t@NjTf0v`Gxc(65WSz6O z6^G=6-k+R^BOYq7VZavuKP^zOaS66ubN;)&;XVSe*P$Nnz^EmQEP`|!Nh;Fw>#e4?V@1tM!MaH_mAtrm(G88 zZtLBtTJ`?3k#LZ|H@8i0TgalbvEOVI1e9TOUSGVieeH~fNGc|X%y1qT;&wXq6D&gdBZ!`J?-b6p;d2cQ8!=h+o zOXIQT`~MX$t>2OQGyQYRY2I5$1vc(sJ>PfDXN^0X)sM}NwpUg}NuOG?!~5ou4yD8i zih`|udlfd2{jp^3R`}NZWlfTv-oGcZ`!x+NOAT?WoO?QFIJic5Nvm-4R ztPg|-C6uh=+jCnutM4ze z_Z6(Yx-Ry~=l}2bK3Ulo_w`Voqea|nACh0$y!-=J-zHeX84Bo$EW`+T;{`W(Af@kFtrX}3DqH(f; z$J9nysEdvLfB)U(58j79n-dsY8r1MHHli$=L1X>VV$QGc`Lh(S)v$=Q>FDMB53s*y zk#J;j#MgwPC0e_fy*RyFBL1zcDbjm?{)fMSifDjfoY;z%_0BiM9VF&2v0z!dg#U!{ z39G*mtN%vt`ug&1_Sd2#MS=3`JtuKlJgGW#UUehOw{=t2vV;hk{khq`{8QbIn1ziO zSN279{eRlV<5t)yaeUom3)4-t?W_K%t3F=(-LrV*tFL*PRgU);Y0UasFkQ1t;^Wj) z_uK6fkDYBTl)snN`@u{xls#~5eg*I#A}9(yyi+4v&2ndv(!W`$LCphlye09HOe@bneYBlo%$7LTM`>fTn zjZ^36brPR-ApFARx%<)=tT8ZESGZdC|Hm3XQ``R=H%r&AWsc^nzWd!UC#U70TT57e z$(f$t5AS|8I?0sv-=SRe>eia=&(+gxtZsSOD$EhnnwXlcSe^B3Luyw(SHGQm?vG=t z%#TBQUo2g`X0fN>?^kUyHfKj44I4!$En z>i2hwx_ErzWG}K4IPrWhhwZ}C7xSK|WglEEeSG$tCpRQH{A-^mJfE5LdR1D_rK-4f z8t+To1Fs)Tgi`mc$!vLo^PjM+5!h;B%lG~N<8*NiZMj^BjoPcEUYIEe%|33o z=e(U=&IJLMuO>hBO?DQ@zb*QF|9-{I4|A8Pgg*Yi;`R~GTGd*ScX}KD%bs;*{r2~Y z=#8|+#tZ(GwlmI_KKpV0-_Y~TC+Znv0uO46@$A3fdnIoZ2j7}WnNHGsw^oxEKa#awwN<#9v)-SW;ew{`~q zDEhwsMZr1C$yYzc2!!hV{n>1g%)wjX@%H$Wk43NfZ)i9j&{q%Gci=WV$4&!|xa*r+ zJ~h6WV4BRCa4xX-n2V{M=sU^KtYs#f-m}er@?GymHctX~tg4i`(vio0v%{>P1nqEs z)pg;+wLOX1=aqOojGAg&=PbV$*gf%1?Yzqh>*s#BRXhLK6s!4UiIYo z^!HWVldQEG@A;hiBKG5VysYoWV-r#Z&CfqwU2&jl#`VvRv)RHLzg@5Re70>@`n^9+ zT!+~7KA!GzzY)Kql{rT1?IorAr7xz}{EOdzc;^cDS5l{QHrUIwh?)x=z0_#pESKTx zdqbrvK3(QVZ1wZMtEYee@Qs;y>#pYx(LJ)q6jGn%u}osyx~z6J^Yw%II+qMygzhs7 z?da9#oS5)1{3ibi)#WQ?>w_;lSl;)WW_N~9d)>MJtreD?%%8ZD8@|3=c>c7(&WArN zQolS%`~F>S!>JALuXcVY68>AX`s0h%;`>K7bhO64FTY+_`s1FUe1o%=&kYHQ6+8ip zl$lxfH<(ynsjq*2_7vZy1xpsb>h!;6YA2tN+frvH_`rS|hw>z$**C%)lA{7@vp?Ng zQ{$A!!xz2TP1$$0vB3V976zK1RkQq$O%rTlYn5!Dz!C1?!nUqrx>WK~*GZ}0F5Y|7 z+wn~MQkzicf&Xh0BmCdpwF&eLZ2qu?|B-5rxqiv2Yvv2`Ef-$69edCJsgAndqPT}Y zew=*w`*d>KMlGwB*mwVS9iAKVXyMZI+C{&=mH6EkbXmg}yY+)lSjL7=_1o`${3kPQ zHmk(HfP;4TpKmxGuY1R|;oxoUxlbPUMKP*>{#N`&>B$s(ez|F_^Y*e^RJM;hf#ePj7TN8DGc#f8p!41G57>Z|=@pW8-!9P`0sv-e!czwZJ&(Vnx%XbI8L9$CrFoBnK>tqm(U%QytzUJo}B#gQliem^E=;uF&6te z>0jHao6Nh5ZkMl5mGaOyeqryTSH&iRF4~4Q{z*UN+Gh9%xPO-OjkuNkq(5*`-r=n0 z$G+W?4V3)%IO?InBukSGI+n|0s^{KfnS1${_uJER4trK9z2%>0p)7lPMp4Z6P)SP- zm4hZGY1huXsjqwFX;r$D%WQS|bLY0hA!aJS;+p(d+Rh1&=ChxqAAi4@egBQ}iyt?J zIo+K1IsB9K(uW-%0xr&3eB{urBCYf*lb`>Od-y-JNj#coioC+EkEgn}bW~*6PZ4T} zz8?Ma+Vtx}GnSk%j5cYU{b-wC?W-sj9w{H$wmZLT&xP*w*Ztmiv^GZlqix3C&=lpudw<=jpHS{L>dVgfGf)@|cj|bK;|V;o*bq&G~0KH}mJaZgjD=$(CQl)U;Zh zc`bXxHtng#UsIcEvUMG%ZcO`Jn=P@wc+s|RYXk(QdCM7;=@dAopI)yKu^{t&i{7n+ zbJhy{JG>|CSX-LGrsHjgr$6wS&35>O+otLbuDhCUO*tMl<#_ed(~kwidv(e^J{^=O z7muiaxW&KQk@L^N&yJ^EWTkC*m)4{u{dPFKX6iSW>Gkh?_J7#

a)C59@)6yE|+X zvl}%)47lT;m0zw7eD{7@BdnAd1d>b z`Q=|Yc#>}%n0#RUcCNW$Ke=R=pSM{r9rxQL=8nhg#I~TBUCR$W=j~j@UbT9DrSFwb zjc={~Gc$+Wp8vPbpGj!tv9lHJy!^JaTjV)wKi&Qowa2&6&^-5q)GX#nZx$u2v3*~k zVk!Ts+GW1?lzHz%;+9yoY^{FpKJWi_y{B*H>7O|E{OGOBP|Xh?PWSD*@T6sZhqC(G zn#M=r9NgPAc#B{iy)930X#nar*jy zflX`3v<(khqS9i1@c*5V6FL7;*Xg-^Jl69I9@MOl{K0-UuXD|@_g%N-=C?ikzEif{ zaT2@oIk%VzU!~m_{&~Ut=x%mGq^tMK)w$=J6c;>ucQpFK*%k3JrdrcZNQE7}`dh(t zp=n6b?Ec_?>BbxN`!DI9_;OP2)2~ohgR0Foisf$m^7(i+8o#bTsAzIz?c%m+x&Nbs zdUyR%H9qL5{l7Au?|I4U?-RGJ=}z@rmF=@Tdsc0xMtunDomn@Ee{-LE7OQ^d+3u5} zQXdyTDB%)0u=+~magVTrU8YxTtxlaS%{J$ny?f`M?h~udrPa)uuf8mL<@(K&Ute4y z5TV0UIj_^lz%9yR=cO};1sCuNV=gsw4U>GL-ISx~j*@8Rc~Ys{Ng zh<(qfdXXo7t}FJxyR4b_r&|?=e0)nT@*R4;P^WIW^pPb(kCesg7C-&@T=lryk$u+Q zQIA_WmHRbJcys!;P1cIt)U<7CyVXC3L$=P``mee*Zf{$xZt$&QU*4m|mziRI*S7rP z{byOYbLFp#hEuh*JGFe?e+~1laaz1jw6#tB6Vs+s87+olQ(fZI*k$~Hvt+=G(leF`Z7;aT6Ce`%-Qf6XMt`pD7uPHEthxo^MMZ2E7_ z$;rv4vMKNEayz|R&H3w%#3Cz_>i6EOUd|j)5S!Pyd`WV6;{nOiS6_8QI8LYijGZi( z^?mREmD5EO7TSC=ylDUNT>T&OFYQU(Sx0+1Wb0p8v*e1;{`M)&x%s5oepZ$^i7f;=_i*D-IM7`&Wvs!ia=}rG0Hd%wY?YB>Uzp_o# z=6k;E`GY@$;y3Sb^lK6oi`P}zwn4Od{>I=-bB?k|p3seaUZ(AoBv{#T>+j;+OE&_J z)~u8ByfE>+gLTx|UoCqh=T3Ief8H1tahL5`_|{sPlW#jWU(j6qBFJ5Q&H1%$`oICR6`+ouL(eF>7u8K}`e0U6UWQc7Y=|MoJ4jq9$6-uGSb+O*cPw{hOG!)3dKx$@rcPQGV1!|J5@?YH5JXV%SV znUi?$dS{|LOT()J<)?g(Iqi6z-MB?dZswjXbpGMno_&vdz4O1Vb-aDm zV9BygZL=hNem;&bnPT;nrSzH3n)B1%?cDP(?$yQDPZawb0>7{S%j&tYbK3lvFOwFn ze|Y*sW&I_klfR!lnDKnQ?38?`32*GUpS`?rJ9Bj>8_%qeyfvZuv2yz?+8& z&r@`zWbN6-3G0_|ah^GP`{oxZy={KG{PuqlReSl}UHtg5J7(6CF9hB*X4x{yE_;o_ znX~e1=DYWqwmgjbWEgK!UHIc%j z>b%wMCe1o)LD5T6+ zdlXdS+l&sct-tn;ySm=Y-RRHDRwso6&Hu9XJHMzGz57?NeE)|N7cUh?r(N9>cgkS% zrj4~BYt%CK=0#@qnpATLMO<0K7qaHgpT^WBN;wvjm5+vB(yrUJd(z9wx)oD@Z=TOT z>31kcA|9J{>Y}u91NZXrBe-rwg?Z15E#;ZH;zFD{X z?xP=zx0$+i@$KH=6|~`d(agQIO%m*d&PfJ0{#$1zO3c((U$vj(b-ezr|B4J7PUgK$ z&gsiPZd5bH_s8B#X`e5rRg_&0ocHPR4Fi!+@vmYROv;MAXOP+6)LAm?Tl;RQ0}FIE zINtWn+3~Q;g=!Ph+p1AN;od z+Hd1myVUQuyY%e+=J#{f+t}}FqGdlA7&tdL&NAfjVy$0jS+;KW^`)o(@0X1G|NsB{ zd2f^cR=3vd|NqkZZ~XhO+52Wo{;H3A=eyuO#~-#Q3x8~wkth85VCww`^AG+u4F1@; z@ARXb)yXy7_kPLkK6fF+%cAylLH6`3+Z5P4b!`g&IQ%*<*K#{w`1Mr5EmO1Rg~>lX zRuF%jrDV^3>9-7lkHSJdIqt3~)7CNmxncT^)Dn&P*YvJF*}6YFsA{9=UcILJ4UW2v zJ6|vwYyY?Se)jk6H72ib7VP={mwoALhJaj=`}$|6>9PFv6&90Wk*@o3`bWFezWv8J z`OdyGKi^m0qW3@f2>ba-m4a=nq}J44S-4u~rR0?DyY|;iF$>+sl=bo5qtD6A_nqwj z{uX-aFum^Af<5jQz0=%|NUKM#D`-%&c(QeA6HlAQ)rWW9ZaVJP=W#VTpzPV@SsH#8 zt3PgdHrqMk7O%t>zll+t@|)`~Ip2_vQMTQ^Y>RH?enXonk2n8QdL|O;u(EJrQ@rUD zh5K#YdqeBDb1Egr99$Km{%g1ABZc4QpA09g4Ac`++m(|1qm$#J%K!B`=5ORasCF`3 zma}!>sJ?$)VfF3P;taxux_>^GoZWKz>h8GJ!OOF6yr282W~=V6J<~(IkKA`$y?>c} zTjclCPyb0N@3!jo&Rn-P=?|ZW_U`g`e@?Py{7QB+@lE>>aZx$jmGueh((S&_;@Sd^ zJ~e#!_)fI-ciHdt(z(a=FYRitI^womv~I=td^jS2hXQu{nt^8T^8Oan$?n4e%iOZ>y)32a@PBNf0nb_{}-D#erRa* z*|+xB_Sbi|zrLbb{UYq$@w(M9OLu*(d)*Uu`uBsWf0Q_m1uO|Tv;XN1RqMUCYx@^G z{gD>!eYSaZMSH#bik1snEBNP|c+C9u+g&&O*4NNqU#8!Ujg1Y~^?ZHxwP=uj>i>Ts zeh;Ez_KQsPn4r}zXY~DoWSPK1=6QeahPoAPfBKO%{_3QLklsBjo=a922?Vd-nJhmc z#)bKXAM?%`HUbr8|FYCNO?RpNTmCw%<7w}NDdC?t9G|v}O+xtf{j*Dq_iJCz(rG?> zx6f(O zhL!rye#}h0AYi@l*IR?i#A&}LR-G(wD=v;-w72SD-1;919}gXyt+t`PAfe#f9kb)T z?*vMCEY2JKUnkwy)^x>AYHR`&P+=dt{rCW&O-Pj2yY{U6IRq+B20diD3CL8MgL zCEtlJ<*I^}%m078>}%cgKxZ2cgZj2p!9fyB*-wA! z2`K*(yS(mUC8z$Z#;tMu$GQy<96BEET4RGKi6!i{YP-p ztxta=_DB8OTl73w<@=r+lSJRYWO6cmHo5EeAMNYy)8cREix+IV7-Sf*>ePwoqzNTI z&h?}#e1GvPIq~paW5xW!b3o@1i_@8^NPzCW0C zRi$Xg-lISMSz0azRx$1`Wn%)IBvSz+~|`JIdQ zExrA=^`dwGtyfLk`r?oKTKFP^R|GK{I1`=9s9O}E`U@BeQSl9x}~V-dbQ$iO1=-yV?? zCTYc-Bbo8n<5#Tu8}T>dZ`C^gxP7W+KW3``xOH2feyx1$V*m5~FCm`Hr2Pf1X6Xy(|5lf;)KBf_WP1K#>aJ4v zKmUGQIG9yip)!sC(Pg{MXQRGLi$0h&?fbs!uc!8Wd${|1_rI+-RCMQD>i)1(-&^w8 z()crrECPgY8uljsE&Xlx)NGGQUWQ&=|36NCDcO4a2koZiTJL4w3TfJk?T=b-7oBzg zk8$kP9q;$ot^3nb{%`%yuPrjxQ_g((Tlu4J>)O{Pd0#v! zkKV27H_!I>rSJ>M@-f@a$7?FT|M2y2N3ZH3H(`TMGH#!??dLVs|9mk^<5Q2#syuda zy^EKtzTSPcE`0U%rqhexP5Jr$=ZD8{9{t;+ysT%9kzEA4g@cs*`LGib%9ZQc>wm<{ ziN;S_-QxFcHTORG`zs7S_x2uq_utTO;r<_4O=>~$%WnM<%4;~8^GB>w)%W!7eW4F` zTe(hcf6yH~annza-+#<>@7GUt{m){PBELk-)J4HX!w|}_nYWCO7EAHNZ zeb4L4y<*otr?(s1^Zc9<^5*-2_+NVF&n_?jeEpn3@SMK?yzjn#k~~|_vRnJ5)P%Ys zjVrqs%*xsATcKk2zWu?j_qQ?%%=W*we|T7-{lwhpZ6*6Z_|B=UzLfkWvaaLa>S_Dc zmhNA5s&cn=|b_6hb10){r@ey-}A+; z-4pizYWf%O=Z(FanY|ql2ZM^X; zS3V0?-`D^CXtl1^{)2H_vpx4FU5`+>_lwQv~__KdWgx#In|M~LXPv$9# zj{hsHnJatz>gsE|qjo=cJ=OXC|A+aL*2ZnDUOg#q>+86lC)PuqTVP^pe=jV`XYD|GxL#^&av6|5jhE+8eWX>ASTCoslP=vnPAlPk#RNPhpLc{b3gI$J0#TpKo2z zyjXYj0;O3!UG4lwjb6Om_P=#i(aYcOzr-DiKKt_5p{1+ip8veQ{+iyjzx8z+>-O7+ z+sD1%TGaV^y<>G^g+t=I^Xv< z*@t_tAyeC{)~gHqOjhat-DWLR!*@Hp|2wNuRkljZp6~|?RLrK#FPIqYF36_v*>C>e z+8t_J%*zb?Gf%Ic{OHiMH3AB~hp$#O2c7!#qe4Y#hxSjsCjQ_EURDlUvF^SH%MPg= zSLlhl9NWLv%%tphb@s>29cIFF3+L7EEI$wuuyBG<)0$~V`fpo2zEWFPq%?Kj3DdX# zf9-m|B{E=Xan#k30G6`-{1IFIkG*3XUu(i+BCE)1Gyg`Q5#HotujKt^ImB zbN^pG`j@w#|B>9e_p;ufo_y$Y%X&4d!0jQ+uNz*9eTuc!)0a$peO+_MPOW)c*X2*G zo?5k=rD=U_9XI#f>U&4kcYp1v>bN##ukI~<&!}Hn#T|-aM-saZRX4O*P@>O`=fcbCnQYgjkSly z!u8kAJYWA>ee3B*S`%YKCjI(;>7n`BnoB=lx;sh4OuL)kUbEvYZ;aw2)|j(GFRS(c zaTd&65IS#@n$C?cU(a9v(mZ?m!J6EU`+l!XIOK7x{=-zq(qGJWQD3i2EG>@unzb(W zi~p;;!nd}+Ze3fP{4ZJY=k>#}!h4e)F04}cf6#8~x9*ew<=t~`d{0eHmA@AIPjcSP z?3XKA|FcA%l*rTiqrN0b)soTQsnveodc(pO{XbT-bXYB$Uzhbq;O6I(*1wL%e(7GV z|9|xf>+QN#Uw?gnb#DJb#n8I%EHgJoa7gTDNeN>)_@4iKyfo|4Wz{YA+{X?(@3U|W zk!-%awXN`B!)qS@lnagjAKrNIDum(g!G>GBLOOfSCax@y(XD^Xd~?OV9d%WIMe0f` zP15xLSULm-hQBkGe6#u3@f#|Sn`KMmxEDJuzolanzq9@ET5gYPqTlb{*1A4q`zhA#I`ftI{?5nS?ukN1p`lisGdD@vp9D&>IBAISXJpFISrL~(E zq+Dv-=)_p^{>BUCuqLMEdKTd_I*b1|?OwF_-eL{*cnL|>_QS62(VGuioT&1j{QsFs z<%AFC#ZT96y!-R@lHixADd%tUK_4RsQw^Cs)Re(zwZ|6t0_Klis*U4QlJV@g#<`JB)S?Uy@WI#fJ3{Dp5fZ+hVR3(^Al z`hWCyUD|Rq=rarB|Ib&ZwZ{M6Gym(=>w@Ql!k_LuVSoMZ_0GF?6VpF9-%ySZU$|u7i{7e;tk65xzKEeb)b9R<3cc?oa4*@(}%d@+FDxv`uam>Yk5tUmF802uXKNeFWUHXeQw>x zx^?mM^`FH|)}PW>#Qi#8x+d#gWx@Mv*6d%tlY7-)!BdZ3U(AZx612R|XI}56%D-|Z zX7m5;tP|2VUAq5BkVI8=L0)*=uJ@;(sO`~muil*XCffA>hm#hY15aL?^-m@IZk5o- z$^RbD`+qKU&z1ixgLm9u)Dr3N2;*vPRctf}(G%lhb#g4!Xi-z;>~KmFUboqQ-p-l( z^Xqg2k7pK6clEgT_DSy5qt^Rwm;dEE{8nAxP04$I{<~+Kr+e2r{<m)mWt+x04 zCvz1&ejj=n)b-KKZ1%r@Ev0pG0){L0xtw2P@$mYI>lK&Y`ukse{8b{{UScbQ)2H_B z_x*RWMz_U>RUB)pdwuh}dbR3ZIT@jy*6I~Pk$w`6+FnYnVY5tmUfeQdXVekt&=S(( zb$Xa^A;3$CtHo(y!3rNIVUCt2ZNFbDKUdu=|M7R%mX*(+tzVlKdNFEYmF(Z%*f|?N zy!k5^|7E>e-RtiYb365F^q*SAeOmCnTKdxGDHq>0e7ds#PU)i!e|cQ~{x2@mo%&X1 zPTWQY}r*0XtGOgt=OdIR@+qQ5N&PTlIP8eY?6_pG&4cS{|Ej^fGekRX;XA8Ag=9y+aNZeY|C>Bth{WMy&;STI9N zlAHC=LWdO{8sc0nhm;IC&L|0U%vins_T#JX_uHH6J;^O!v+~%>m{iplDJNfE`t<2z zyx#O_)g7~nUrpQl`6qAGE7_NKPfTB0|8((pUbzLUZiRk((_sAbgNy&ci?ccX-S?|r znN~j8#p%=4FIAhT%kL0faw$HBBVbvLQr=5hSz}?7*7|}BUxkE8M<=pf5n=i=WeT5y z1!$+Nhma7fle58&2on+RR;NZog%Ay8ftDuS;`K7|x7VKialI%zSnYFc+@~L_&VF5c z!dY^UgoNg&b30LcH3K~r& z4WWt_3M;z4y2voQD5^7~y=qM1PC(6Ux^e|yXfSQPKtCOQ4hmVpfXOCB6_CB@h zztv~o_q{4qDh@vTzUtBLna`HGA1L@xwZ-zsxpk#iUi@nfd3-;-xWPJe-NrxD{#|rh zy0Ez85BHbf=D(l*u#dAl5Htv}~n{TR13midN7 zRqgr3m);kN%jpBkyv& z%cmLP{i+K$rp5);>+vOCSz`TWPZ6uk3KrQX77uOmUVd-0SQEMM2@_YxwZ=&GK$8ct zL7pv31DZIP8yFZ^Upvk+;^|_oUsxFK{i!Yc{q_BEf3vr~`uh57{`GZFcdEy|jau~k zZS~6NukUuRijV7f61QsoRimFzCBF!}c-w2TJ*~={ShZH%T7CYn?NQrbzk9tkZ|VCh z3*J_F$Ngyi_-o^>@2`&6USBQyPi9xtv4G3_r~jK`%29o&inlM~h|2P7)8-!*n|0Fh zSl_-$M)CUp9?o2z%yO%KYt0v@rTVWQ^d$xMOffaC6TBo>>lD}!wc7YN$K97TqQ?*1 zFF2hvvOz* z=)SApvx}v@{PSPlqkhFPTkhUI8`^xTN%Ca#zdx!h7hctzDti8q+vohsS5IGtJxw<4 zI{P}bs8}~F{`IP@_1BE%|9Q~TqrPtMIj4B(u=gi?Z`sDK^{rpGH#~OD?x^=qC+lWC zpDrK%N8O=VT8(Y_^)&83yL-G`lRj?G4$q#xIJAD9*4CQvU&oKvhyIyVbyV=x|3wFH zsqAOnX)GfY!gp`K@U6Wz=ECu3FL}O>QQH{0TD@^2;K`%$~?&e4R06{-d)| z@9rP^bIFl!Ua_2L>vWldJDvX}eI~WcPu=z{Vi)K7*T?uhW`5lIP_5v8UZPsv{S8_t z7F1;HQU1D)bNQFT=dE|+I86ed$o|mY>|$2zxMDfCl*|fK_M@T|%5xI#Pk(*4oqOA~ z5N3soU3P!I^{#uj^rfr*6TSCWgyVC+uD{T=<^NNi58tFLALO1$Y;by2y`OLWrPe>9 zMLo_hO)T9f{M#x1&s1xo@P~&I9?ffSY$*^47Fg{X{m=LL`GmvVsa0pMpO5{wrjFCh zwB*nlftxo~Lm$SPEj23CoS9%_@yF!8{U@(z%k@XgE-M_ow>M(<>UB!W7p>2)k@_#I zti!6Mb${0WGLe}-RCH>N7Buc$z0sQ0ET;0o{F3jkKMc0~xKn=p+@dbm>8-EKthAnN zT+()9$&O=_FK+$+waJ^Myk6qh!JnUsAFuvtzi;#X_tG}if0HI%5A_$WU;QmfGoW74 zKX&@nn>PQC37m;AY+>K~Y5&404)qRwI~y6Q%GdvRbn0}K=0v-4(+}=xyZ>#htM1oi zD`@CrI`#Ex)%Wa6Mc?l@*4Bsp5e$ygKWL~qQLTL{+kV5amg^@R4li!GoA#h*nLXd4 zKYSC+e`~-0;$a_F*JA%#wi6 zzW$)Np54xR{qnfH)gPSPYd`&;yjT0r9p|YJZ5j6-)^$;4=h|EswC&&PgPhVy)w1*U znW_Crsr&Qw3+I(7^#|Qd?lfMCnEJ<_P3U3fZl?5jz1N>^l$ig%@_f~;*WZr^{GamJ zXwTE`v*C~GrS?Y5`d(@K|3v!!hjH(xYd_oaf7;ZawXd)KS1+%hW*&Zh_8f&>J8eYe zwce#I*vr;)zW$)3;hEd)f8#%U?)~AFoGM|vY2)|8OVJ*8V=v0z_tn_6Ra3Iq^-W~) z^O7A)pZu}@ zZT1Gw30Lp@5fr?;`l7A<>wu5nWm{|$#is66{~fs5r#|hxq}=P>@RgN)GSO;p+?%hTq{Q}4n3?fE zUcA1|zxc4lek135FRS;nnD5uFdU5yH*UDFOUDPVJf3CctQyXO;d-m1W*Y{V?`~G_U z)n5_)EFsfA?st+{!N9=U<~Yl!XBBJhg}!x8HUIy$>@UxH&T9YpYt8@vNB;i*w*Kn< z|M6e-D>c^C-_QC~So)$r+;(k6+V)S5?s9T1XKm(pPhBSK9oHmh1fg zC2zm2@8Tr6Ab-itq2@y3_u4lXX+;oO8VESIes_-WA#U>-($o*H?W!-Tn35 zq$;JqF}sxZ)}LCl_Ui7i@CiN1hVx$?b<`~iH<>cE;$`fs>-!^izgib{aer*_zvW*0 zuCFeAcYXE2mW_&H-uDcSAI{z5_v1{yeN$g!Ps)dtPKUwCcd3N`np!>q-)_K3a_SR8eZWZ&p>$`WyO?r2IRsPoGxmD}7sI}IuzMg%l zYGa-LEY2v8n;)F--uMZ+zIb7-Vq_|1fL3=BHifbD5`A6*M@-WnvnHKaV zqkHFay;!XXdlr|N({et4{$}>M)PI;5yZ3hH{Hy_aNn+1^Fz9)KU%+M!@U({@ojAPO8lbCqE4?o{J-gK>Z#;3 zNp^YHtCc7A&pVlS*mdvHpUNh7PQEXq{|LI@-BI^9nSXWvzL+xq{a;J-HiWMBe0BVY z=H*@QpDumS;xo@|ncbRwJ6X?MRFD#u)m*`*C8Ro|k!yvrPHB~L{=|=){Ui@D{DiihvMeuq45&yJh%j6oni{~%>-hHK^p7mbMrrrr# z@3)@Xdt`l_{qryPgxBnF-`^RtK5c)J?DwZyp8s7>{So|LChzL8uk(J}`|`j?+!y!N z@3s_g)2t6me%|_P$B&)YFYuhQt)Cw1eJ}q1dKTMz;jbRW*Oq68KfHN!M9JhflvulwTe`9xRvU-ZqoC9BVPGJf0l(?fOkM-%k~_xDr$_6SDZTYr7|?=Mjk z_KE(lbMi{oT;K95@1m+heR-Asj(gVYS>{C^yfOP`+1&dty!q!Xwmf*#d7n2+imSnq zjI!|SH#uH>Z8@^f=Gr_l6+?}mzArXTTw1R-JMKIzU#}a|e zh}y^>S?iQnPyF^bHa50OFMh`4^wqMuzyHQeufNiHw^}pd+Ft2ZM?O9Mwy)~PPp>)g zZ9iVSJ~ip+{`y4s-P)?D|5u-UB3Rz#E}XQ#^RN7puhmanr^Z&DTEF{Osp_0xhiw)ke_~iPu#osFum)mr>%<*pM5^wZR@YU>bu|W zk63pt^wq)3*Vl)y4;3yi+w0wVv3k>|Xa0NtsQ9QRdVb^B@NbQs%#Q!NdYb149RL5I zZ;e6^@4r9a?W)(;_nkU6Df-Rz>AT;5ZCac9BfG?EURv;ijc0^q{)gF3)6}gN@qQY2 zH1lnB_`kjC(_RSHe{j8EcsXXjtHJSyR!r44+Y|b{v+^4Nj%Q_*Pf?mLN5G& zu%)B?-W~bXqNT6dpReDbIO(X?zP-P`##VjLo?7+&_5JmG!x!z%{%Drw5V+?ttNh2n zx!T<&vH!B!dNL#9TXrUgEil@9A03z7~I-8Z9*YKg;wD z=EWbJa+l_vkJn9Zjn>?7WFbSExshL`zuendUbi;f<=_9wN8F<9c9*{n z<9O?%p!54fBFj9ce}3!{%Rn*hnyFI1 zYFm1F>4QnL|Nc7qL*>b2%er-c%px;aTI=6kw>#>`e8KM#>o0V@&Dt9m8F*vTlP^I_ zWAA5&7eD^HIQvn?zWp3085meN9cLN$ykafQ@N)l}{Ot9S()!i%Yl9!}@BVTBpVi*f zxOJMpcWpiW_1(qTpqB@ElRk65pYSv}e7)zVLeUpfV^#->#<#t>n^U;))kf~SzgL$h z@17L(<-^)rTdTHAzxlB)N_BnsqT2e|QTroQ_7q)SX6?C8AS^WW^`&!LooyD^W`A8- z74`9J(cY|9{ntVJc5PjMZTGwEw-Il%;_c#PO@sBXw45xutfTk*YxmZ!b>8cv)?M3| zy6S^#q>F~k>e*k{&3YRfv^q>bdHak0aP6yo*R-Z)ZF+zD;gL`KS;}=HdIt@dR7 ztXJ1|N5;L5eYEdt-|Lx0ldlDH#a%sqQ0{U_|D!EanFQ2eF0L zVV(Nle(YI(!EBM8?9N8^OUvq6ukZVw{$O(Pir-&fe=1K~|FwJ9*LQK(W3Q~bdv@1R zeRD1Sx+PM3#3xURTD7$O(f0pc(>^u^x?C0I>`|Tm$2hSk=Ih7VRYet@F^PBk|pnukBiOeMxQBk$HM|TZ)A){5Z(i^yA-~thp?|?H5h7m%eG`{D5bN zqv0-wUb7$sUR{D0hWDf|)FfwRtb-+VH9C;wBr*lQ4`c=gpE;e4-eesx{X zU(6^Et7`kq)%wrB{`&5%1<}tVJzw2lDHU`3V{i+r=$j21=h}H#m2ztYuQj+|JnG&4 zVBWzQ&OBMUCim{o+wW!3zTUR(Y2crY6K`xX(vbe(dfG?Q_T-=G_2s3nj>HSAUcEkH zDzi1?hDl$-%X*$xyf@JmHasll2ml(@!op`kz`;n!o$clGIh7^V7esTw_u5Z{NqJ{a?J+$9-u3 zI%VJ1$)D7JeaeiAtNw4d^ox(ye{RtWjyoJDNWToWYMb80lv@8|m5aoNIbH`&7$p5# z%Db=Tk4Tw$dKuH<%~zfWO36<@sF>+1_vT5wR?H9onpeWNUU{2ke@l9}Y~^~lcRylf zuXnC@smST+ESdhqQ0K;y?dps6r>|Rj|A+4L#j!W@n8S*Wc0))znpq4Su1h!mbU#rRQ9>~^vs`k{DKji#Mk-r3TsZs zENEt}Ql8(W&$nPo(wiso(&_S1CtquQ2@CzUF?{v?MVqpt{wC~ntku|kw|4RSFMe<4 zUW)tj#N>~V=v~&t?{|KFUHAL)tLra%_S@FXnjPOH9n8P+V4Pl3RLtiX-_TcGS6_>1 zzP_lnZk_k^i>2mkR=a8!25zD57tLUmizwvc5J0{RJ&bN_E_PN$jwY=mYac79Tk^hW z=HO$L6X{qX%Ej6AFkwN0jyg9hQ)0k^7%gM24yVL`1tCi1Yj@qR%f4@X`?HUJxk0h2IU@D+{M|_pe8df=#qaoY=8^I7 zL*HXgHOdBVPW$uly`=T#t9KYw_Ie#mRJy=$FmtWJoC_Rlz$2|HqMW=gjtM)am`HH3 zIz4pM2+(ro>Hr1v-FxbN=5?#jpMHPacC}HV(dyer!`4R%FL*eA<#PYY%a@*7CwFnu zpV+O>4n+PqBy{_qo+0}sJ$>o-|MutWy!xlh_WGJd#@YQZ_VU+SKV6@2!g^bA$sY#S z+%0l*-|duY&VK6piI2I{bdgHA{M7r6v&FmLFee{hx$*9zii($oh6=`4wEQ@Q6$>V{ zs`h9o_%VHBXEYJ%=n|1?ZF=NzAjU|Po0aWRV26j6AZM#nVj$;SzhC!Dt^Vxz^5f*> znCE_$E00@h_iEbo&MwI*t*UXacpZK7t-5G+r|BQwe*y`Q@@n$~+uj`yQ1GAHP*VEv z-|U;KC+~GhzAkTkZqeSFr}}yu8}hgPRBk%RUmxZpDL&6UWb>C~#RD!aR)N9qtSzd$ z^s{>--l^U3KUOcg^7z5h{tU+J4;b`WMP~|qbeVg#Q7B6I z%nzw}_2oOR{V>g%^z`@5f8R})Z8*PC<^2EImo96BC>hxqu%_8K@u%OPe<#6uPXdQ% zK%!$PW62}6unw;hg}MaLxtSeaN?fd<6cG_1#?#7lIN(5xmIzlzQ)0l305ws8neUHz zzJGdNxBU4&74527XQpp`wq87j=f8Wnn(xVvAM>|g{H|>z{&B zOcoMYa#8tA5X<%48+@P>Sye>2Segwio9JG-Uh(&c&) z*5lJX8e{ISfBCJ9<#0{BJ*$B6wPQ*_wWk-$H2$3aH0zPmC;_!JLwDNt@L| z_B5$1a^z|}z`($~-Eo!)FBfa=LQCJ*_t)24_dU1w|C$Z{ueRAo)?chu`u_jh`v2Y^ zxt?b||My~D>HS6KMNz-j?XB8WRhb)8(31m<*De26GM-v{w<|ozzV@sC z*{gSd?fmui%Z9GGvF7s@`zHVMO|lH%r?=O)ZfE?;%j*08?Au$mGk$&eYW``z;`II| z{ol3df4Il~)e-AMSMMo5{4)CMslC-(D{Zqs&F_6xvul6nnzFr;=l&>$+f_Z=qr7|i zHLb53;yyPqoUSiic=~hHsl8c0E9VwXzIHNC`Rf0O{XG8s5ie(5);;?At~GP~&KFLR zH9|qPtL`#;?T#&1U0=TW_uK5P^`Tkn&%cKFR$Yy)dirz6lf(AM-)@qs-{3R7CyF_2 z^8cRcRlXaw#h&d~`ojO}=+D=WSN5;hwJNP&x8HoKb4UEORo8dFUw^e~ZAk1> z_Fv~ez4u?&lX)G-EA95*eg9+TxyARszIyz8{Z9VNzjnllAAfoG*Rj|Czb1Zt|LW?i z_t*X&`7x)YU=7#D3k>sF3{Rf8(%pZ*^^4bm9K#1e8?~kfB`)|g@7w#&g74$H-k+T= zb$!~;ja3itmwtU6?)z%1{_0q-tGkZ=@afv%bfcu``9Dr>wN%lH&2c|c-~K$cI9G1| ztlyKb#|Q3P_ebER_1B*U)7#yCy_ep?A{m>ysfTCw5$@&}&!wj}>~;wFz4)T-?~lQ$ zRTY|@U7L^JFE3ADzbMbnV*Z3JOXS1v7gcSUerfIfjuQf`a+jI;Jkr*d{#@+P zeZlx-w|(hF2Cj12v@$&WtMRHzsU9Gk@fwL+W&E+3glLQ{SnuB ze~tWFkM))j?;>mGGHGuux+>usXD#{NxAcipvclqjCl~dl?UmE|{Pmwf=hyQ0yZ=V5 z+aoaZF4NbDejB4pe{Oy*>t3}vD7}8xZ~Jy_!!FK?vEmojt^X41ovrt5OV<9lzoG~A z-T&?So4sTA>Zr+Af6w|Kwcqvd(Z8H;CEs`MkGsd$7Nk|;!N9DuZ@?q|G54Cd3DKscY(Zgy@P5p{_BsaeBb{{^ljFP*;|uVEcM&AUe!DH>vyNA zkEb1tuV3x?dU}25i{pKo$G@I_x4y~R{m-V#e|y_@Myy|UGV;~m_^+#E{;mA|`*q0Q z$5S7z%iGzavt97@-6l!K{Dar)*3|fC9gV$zZhhVQSIh6XtdH@%UtVmlBG|E9HL|W~ zLE#rS#lH@H9hA5B?&+fYYXg?9ewj7Vs(iTK}t{bJ+#+K8-4i=Te?+s_xW@yfN( z{l6ytnH2ozs|+jGmpFg5fPxxjhdXlzTdv;4PTictm z>L=ESmkA`YOqR0w{>L~tyWslii~qyw*3~U%f4+Bful;{EIVsN_2@k*iv+rMD_wL_+ zTW3L$)&Bpyp9{#_+_`7}r0V4Zl7vc@-O~iyJx1)p<6ny|4&$PGDQAEeCx#Z zyBQ69^=BVHv8?C2At`v4_99o-{fU}L+m>LHLa~HHTRXi4_z^z z`-SJd!06Isoe9zt>JPGfs_+UHTA*iOkhtKBzu2Zr>^*|fUd;a@&)Uavbnla|OG>;k zqh|m8HK7mJG@Bp(csGCf_1gS-yHfYx+FSLt`d(#i*1EM>@BXHm?4KORH_IErV%x$jep#PqH%1frluUmI*)wOl$ z)4kqi`J26a`M1gYtb5g_4?aJb{a@exw!{13%x6CPEDp@7>VJLu?zbY%dHer74>$Z4 zKkfHh;ibF3PM!Yu*XQfU*$);xudA&{yH{IvdV^EU|M=DWdGqGhz1;O#C_=XX>D~LO z#}BfUPpDg0r@ri3%-ajn&JIie%r@dX{4?i%-1@-PvEIQ^`Ysh2t*(pWm#eL}t)I*u zCBH@_{@T{&?a5Z&tKSI8UEM9e$o&0rH}QkDCYhIRtv%k@r~hlR{Pby`{#JGDm@EBs zdG)`qUFJO^-Qp5~dB3hkm+RO5TmNl|*Y~~mCpa9iDyUss*F8Ob{Z!HAagtj57Q2M? znC3j5EPqhG{PY)}{So$j-kr9^#cN;8@BaTfW{LK$f5GDV_v<^hHL3&(uG#-N_RD|M zlw*694@Rzz$?SN)`fuLki&goWy}i|zA+Izhf6sbcX>b1fi_iM=@tTv~?NMD?GU@kp zUGrye?f%y;+sov%ujYK*TGi99SACsw{pag9O<}xG`_q-?@x1+eXZkInqLtP6emvw@ zzxVvV(zVzAu0Hu-<1ABN zFV><$dn2)@u|@04|Nqsx{^zxL{r|9a6?JRM4;;S#|Eq4{{4IOGtusIJx4QoOuj{K` z@7neC{kw_TtJdrPYpD{f+sA#iY3r+9&vw5Gd%yl=)!vxbD?9#1fBkstn}7WOrSDdK z{boJ$_=&xHTH_ANeVhOFvVD`>2}`c(-mb^}7E?~fGXD{J!jpVhxK4Hb!Mf#=Zw=hH ze!G$8a<+sk4YM00D{kHFK(dyIxzn}iKQ+;*VtGi!yVuLC^<(kai81X#t zm$vM`9WC?se_xw8)zF#*qq$}ij z$?IJ^Uc_t-T_gVL^~=&&eeVa`w`9$%-*NNxrx(BcGPmY;ynm#m|Mewnd9BU79p&ch zJ*U^quAl2Q_jLW5K-;h>`_}#0;&W1p{p+f)tGCv1{>#2w^EdU^mDH6&{t>Is2kv0! zep2;ljrL3-VJD`0djtZ^*bW^2Fgr0iJ2|&BC#UuQ(r0N_g_z$w=vtkcW+H-e0l!7n-+?* zUSHQV%#W;}Z@+k=);U*=otLJ3W&Zk_{Z(SQ()HE0?(6c}FaO({H0%4-`17^@I!>Hk&Fr`}$Dee>Plf2FwhpMGBV z-!|@YoqyQ7jrXp93e1jQed*VqS*!lU-27^$A1-ej+&A<1>$8dC|5bhGUkk4KljqO% z(jawan;9=}cWOpn`me9)Pv4q;&MqylUn3~@q&dFQHsNnE>+_x8T&~rhoL7JK%6!4S zBKvP|oT)4PC0zSN+Lq}}pPug5m3b*%docL>K|@vJ_lMHoz28!?I=&>fY)9CW^Rqs9 zXnem>vuEc0jO9Fk6L#+}y)v)+{$j)FfB*fDn{+W|_21KTZ(Z?UoEX>VwKw(G-2GwU zbGkn-P0f0LFDmlgp6mBtuly_fCoE3=>(zG~%GN$TQu&&1$@f3=*4J&#uW8cz`uhHw z@Kr*urmtF9THh75KkVJ!=&$cvLQj8+Ff0Fkse1DDLt0;3OzW&eUi~$HF(HP5fmz3K zmT7MmYw<7f*!6W=>-PToo;^ML($%1Un^uSYjo#P!fBM(cTMHNVU)SFGwo3on>H^;@ zi?*>zJ9a%-A3kaJ>(#QS|A+s$)e*JRHuBZFl<)ulCQpjmU-NrC%k12*H`IHl+9#Tp zv3!2&Fj>C;|EsI7{;w+eo3#4<(a?Wp-f{1|<0e(@+V|wsgdf+p#FlML`TPHG_18?j zP2sElhqLV7y87#P{nuBsLtp1zo%`KUn~)=o^w=)?QmIL!0*EQ|HD=4{(k?x!TrVG zbwSx7r-QsNg`4YEU75GT^R;Mf^~-I0BUAscsTDk%ZesQ;VbzJu`G@ab)!8ho8qM3x zCg*CK@W4JUTDrVkJ$q`^mGCK(>R+>je&aa3skV;OdT*Wa?spdP+hUepnf|Cc!yiuvpE~U0sGS+$U$Fl5?Y~uTtG~WJo)xF( zA78gKev#K&v1OlxBHbL1M%nCE*I&;PG@+gzd-4}O%sjz36eV^;=>V0Q&?(FVjdDVZOL>#>)%KrN5vFqHQ<4*0B=Rb65>B7sYDba8G-|XY) zRnD0I>DWQ0zokv5bIR81oYP%D)%nyPp^ptA=UWwieUJZ`rm{r;)B5bF7k)A+`n|E= z%d@`E+p9mrnRCkY?5}UuRGfMD>fLqz^Uwaa*tzb$zp1u<#-__Zd#2fIKUtDwn?GS~ z(7QKBo~@aG>yO)tujWlkKF9UmUs+imw==xS&ck$SrLJ;HRXvP9JWZS9Y> zzfR9_{j&W3L2(-{|LUFZAXbDaJz%HOrS z{OaBBrW{Yc{on6&Zc^1$tvB=je~SGjQu0T28u#6M3=B-S9A}yFcCi*!iQixE`}%67 z-TwF2m#V-2wejxVU$@>yyx(aW*RTKo->$bYxsg9KZVL4%oZKgx9lySA>#hB-j&?=w zYstQ*^>lrB?7yv1@m5=ZYCI13e@*XN{G_Wa*Uz5zzrLI0PEB~$0qKi(EU)bkTfINF z{MXg0qnFEnZ9gr%>gcU6|33ZudfobK;aAg#{0ohuzWO`wWVf2y|LS$3{Uiv&7~^gVG>eW4Y8PT=#o%cKMS+B1LRee7#T6DMSYy6Y7 zv!hG+F1v3{`db=vx$27hqU%}DAMPvvniBL>)sy8-f6Bfi>C0A33~e`itG~o-^7&U^ zJN|VBRq2MVmS5=qOZCOP`ycHTDyMBvcq=pS+Wwf`f4<2jJ~rC3|Mlcq`xhAszk2oc z)t9ipN%J1r-q0*rL;zm)QsFX9eE;o9>d(UCY*^wOKNI>aV?pf^wX^$KBnu zK6sX2T2#F(=HHRs{|$|uKe$UtDy9k;)K#5Y7gcla*O8jg>_sU@o^9>^n)UV6-qgF5 zOX5y8{{0>lx$^z4PZJil#iZ3w`#tHmyO+GJTm6BYzrUW|e_}RixBI*Qwj~l(vHf*t zcg5?qMP*;T7XSUw(!^N7Kzl!CwUYmnUtQPew>^J+`n3OHyZ@HPRn3)N_J6%g;r=sf zhb6!LS*BNeJAPtkQEYO?o4U2asZ$J7{hz$ zT<=-j_u404{_l6`TDQ@n_0PS%zn=bj{rQ(g^Z%WB?zL6l6Shx|dzG|zkewE@#4@f(eGxfzP~y$J#HO;R!aS$kI$bk zUiS6pm*sMo3ypqlymfzHWk&bb;)`MX!?mxix_b51zw^o`pDkNw{q^+ecW>jn|Hjxy zb$qSsF@h-p^w(@or~s*3b9X4b@jqfA#fs=&Pqy z%FI7jX7oS3+c~Xuzi0fSd(W=!UEOSdE%f!pdF#XDw%$_a-888_d-g|`)3wGrd#-r~ z{ao|#`&;{-hEIrIStnDf|h0 zKmDThrnoEjHth}%ZK^R}x^PKVj$PG;WaA6I%m3`!{l~rd!k7Qv_g=I9`u_Fgt-V!8 zqgQ`$|Mg`b$EzZ~qQ4hpdiX^(X5LSf4+X~`}M&i zJb9tT{TEk%iOp93{)(d`Sp3u9bGseksr@4nk&V5K7JcR1JRgZtstYraOW z3H?6ZeDTqrJ->HH?GIZW*75mP$XfgBlaCz}KPGf#_WqAQSGMTd${$wmO}~CR_dkou z`8h%HHv78nulG&T+?Tggce1SYKi}2)=fdU6r#L3<-&a+)>racRP9N{@b^6DzWuGiQ zb#}(89~zf3zFBkpty8kLxTTk|>_u2mLcDxBU_Q#Mz2RcPJaIXROiZa@A;?y zyT>~$Sf7&ovm^Gtck8dG;omkN{rSqvn*YD8@4Eiqf0mSA?^c*|-aaeK=n-1f$GuD@vg^~$`igH^9TmH6!u{<$ehWZ&9@B0KNsv$Plrp0B(2 z-@bO*&yTnC%P0TZ>)V@}CVa8_YnPATU+_b>kYZ?~86O}bS=_ZMY<${^a*up?`nxQT{Qj!7IXrP(6WRjx*TnYZ=V&R<9G7yUZD|7z3MR|iXz zUab6Eoqm6PU5-U}@%-st@BUW3d$%`ca_2hrt?AkEYwOmno|?U8yH%6Mqwj?uWqUSn z=&!%u-@W_mAIlz-uRo43J(jEwt-n`&_upRC@A~WO*4M2R-uY+ROOIax@y2pr9{d#X z+`9gq{LBS=UyHAAoBD6+l3SCn%T{gK;x$$O+PYW&<92q&tryaoYgzmBZ;4KO-u*qZ zg}N`*hK6{3z5DBGY|-`Ap0Dq(i4*>}>2J~>^_nXiGPS;Do&HvxZGUz5 z?)ttf4?12(y-nR1_eVIrchkwwdFAF=0>=X~mh?w2Z{@e8KZ~m{HKQxxa%j{me;3X;&ioRQn|Bn`{dX$v>)=zq{r0`;-Q?}B&Pj%xS^cBm zbzSKCf{9wD5}QIY4(rALJ6O_YRbV>X*!yHz*sQpXH-?jZ`TPl_pPS5zUN$`v9@gHZE`Zw-BD6?mY(7)YLe_B0kXVo9c z`0@DlGNbtW7xWb0XR7dp>95|oG-}$#s_Cn)Om6yp_tUH}$xD28_UfO%H(d(z39LFA z$JNy||AW_$|4jTJHyv7LwVxw_qu{V!VfWL$EygxSR6X)LVu>F+meCyNtDS~d9hkvvkHu5{S&ox=$;{Oybiy8X`dxY&-`qd}rc6wwz(XY5zW$SJJj(v7kAGfiH!WJl@!*9PJCYZlcACfW*bx)%RwV!`x0 zQ#P+<9`{zaPm2D{;#Rx2EVwO<&1P8GX`-jK2-X1|Hi4*QR6y8`0hD0;X4 z+im#y|DlwuU6T9Ma{Fw&;65n*Hm0Gt%Lobv|4HYqm7#m z#Vu_Kx!m zyruNyOVm2oqWCbaYl}GF{{MB#gMoo1!*P~*?<>|qq5BI>f6djt#`im7>DMCPYwNpT zitc_JynfNnU(5Jc@87+8^**(&F0Y^bjoLB6BTlaF{Xt37eK&-!nZJJhwfJq!uejY| z9s94o`W<()h%Y?r>+KKcioMtGRQz*m_pKfC`;Tp{+5ea8^Q9=`f6M&B&)OdpnfO=# z>iV*^zx3<&t&jWbbMaosfqgr|SJ$oEy?*vJ{%d)U^uH!qU0=U+{Uk?kjdj(3Q*T{8 zTlzh2*ZU7s-+bTxp_hH~rDc^L@2{=f81`D~Z~WJc&M-~es<_a)m7U*z{aeGG-F~F9 zUNj_Wa{j(qqpE{%hS(Z-(Bo?w(@nz`S~A^ z5>&bL@TyvkVy3kA$89G00ur1R>&%1JynJ)kzvIUX^_zY_!W?6(w%2QYnQr>&_jC)# z$$!K?|4GSsAe;JskE5klh0(Hv>qmY{Xv^)DJ@p~M$6Bb>^76t}83Mm*>$jhXmD+!? z;BVV(Q^(^x_BG!wu6wu6?_$jCBNH+N;uvmglsl^Hzka`yuV&)1 z_#a9(88Z7~XUvGSNcr0Q_0*pDN!-ihV)nj&vir+=!Jt16U+=$qxoSgPk^ZUMfA5tA z{q%~NU>uhe?D=n-_1~mFty6vbU!A|%UVEYR;o|my$$z3}G(Fk>A&~2&VAbO952n4j zVdmB*zn^86M0I7HVh8^zZ9RX{W zf=4soKD>BbU|&VN{`VtHn!nS3C4UmT@Uh)JZ}mS;KA*Yj@ogFcFDk_NGZIRse>tYO zFIaNYqJ8oImfhG{E4KVoX!%3iIsbJ+r+?Z1sUm%kK8xi|1^?@>{< zkChiNS+RAMUVOal=!1?QUHNt2rhl}&!NYYriI>0bYexSK4ej&#b{lQfC;$HcC1&Yo zj=t{I*&prdmlf9^G<0X{U-o_ZAK#mQ?#90k`pG@xu6>h)a_#x{{+pIQ@eh^F<>n~t zm0tMPd~LbMmiOUxLbujUiZ^`z_THiLJGbAz+FSj#KD6Z8);Nw%fsGI2zL?yvpDiQ! z>(k52aXacbetA0nSo2}}jXPzN{;qG?Q*z>e&{IX_p56NUR~-$%q5SEY=!2g1o{iU^ zEcyG(ZS&shW#%sT|E;~Hec$VP=h>9qPqt*NJ}5t{=TGZ?^GaL$R~)4uHMXiRm6;%OP_S;AmG(TAkIT#4GwxY zb;hoVOUx?H_!#WkbK}o+#+xDWDt9*a$ArKBBDB$KqrAZ7d6#VGS}A^2i}SPJep#R; zLvpLc`*#U8wl(&47fQn4yv#hY!L#mPf`Oml6ye7y54G=lIjov9;ZHa_qfW>~12rRF zr?!LzAvz&~EpDst?tZm9x_r9kwM*;E&t47nE>&xLd+?K9`iqMBt{3F2s^))T*u(l_ zqIAm4FDG_$g?{oD<_-P)*Ob4pm|xkwPJ2IV-{DDn|L#zK7teimCTpDt^A+c^?LEJu za*mumm~SYeRmjvZPvj+o_7eXJzZcd^>Vs8ho!=(!&ig*$(RW`4-tf&QRT)d})vamk znrp48`#r8}OQ@KiaENa4x*u!OPvs~|`94p1wkfDuc-fDR%=MYg|0gVxyZ!yYaB2B( zYk7k!JoWZ(PaHh#^Z9D(xb*tzjXR9dH&R_z?#^vR@tlS_lHL5*niA>wPNYg zrS~%Gm^c2Iyk~K(dTFRY=LsPx3C1;3W=UmjKk@$Sz2@t7aW2=NXityb6EJ0STLI%$ zS5?sju{|dO9L%4IU(v`5+0k-yrR+z=UsAWU%vMWZ4N|yY*sf*RZ!-IF?qSIa;Wih; zrdJE?ZT#v|gCCc6{|L%F%J=BSbgpbC;q8S2oO%{QhIgiTrWtSjYb2idr^@;2&-TT) z^uKxf7Vy10c%b@t6Z@8^=X(@#W*%VFxiUpVl#A2pV8enL25KU#tc(s5LR5q~1)UTF z4g~0kaLinn`u^@Z>$N}c#XkKi()GNb%ilC}!PDb+XUSKU?f-w-_RHIHg%6uGR@}4o zme}aDw>2yER_~LoQr?g5^dGsX#-*R`Ao`^1;abb%ds%!<^JbqfE^IrU7sTssB(5zk z@mldgooIEw>XdykvhzN)osZyo{POL;d-5i2C)F-9tg+5=C|Vf3`;ye{*I~7{LS18y zW^r846XImBxWD*w_*>ntFVYEf`^8Mpd^?+1vvtqu%u*AJww~wKZrjV^@AEtVVcsKt z_1KYn^Pf&jh*JLi_5DV5{%3U(cjbNm-?$J}8un;fg6fgPSOrHNGk!(cV}BxUFIiRg zkpKI`huY%o**oUVdg7lm=RmQaU2rv}CeEW^BFMMmE+lgDNV;<{o+T8U0*QK)e za_qfRR!%<6P{61XGU0-XbZbXr!h#45BQB0Z3msNW$PzR@de!q+>2;m6Ue`RbUgcIV z$yzeaqFwE(%;f3+?!~7sJ@fzj?^@Y!u33NQ`&9qW*Qt#!IGgm^H~zZG2m6HUGMkhB zIlq+>2;cm^!E)OQv*r_*ioWIr-u+oW!)My&8zsvNTkn57oVq$~-9$a@a51$%%k&RD z&{(^&_TDNH@41^#t+J{V&9d2lA}&8c@5~7XYngkYr&B9@b8w~<&&D6*H0*U7xGe7UvEZ|CSS^2UlYtj1P z%c5D2L)ovMR4wA)E*rIS_v&qPviIy4YvXiM&g!nHzoo0tz#wSGbb!%3;cM+*h6fA` ztf`K(Ecm!s%l4Un(+&T$zW#sxzrFYWfARWm`v2d5fxVyCmS27S>-%f@t?#FAegFUc zzw6Jl-(Npj|8$c3TGiyQQ#UKD`&9K`y~FDzrJavj0zd|DE;vl(@i^E`g`kMt|M->FVR1!TYZEO^ko?(lGkPj`_1AZILQroTngukV?)_5H-yyO%>x9{XOm zH}(Hl-H&!>_I}x0e*V_ek0$&7sLv5#{k@89Z^G`^6T*)aMeY5l_4mHc_47gTUPq>> zCms(LUD@|z*6v-qXUF}m-hX|6#H#n#cK`MFeD(H3CzJQPuv@RVO=NOzR?ghh@<(rwd(J+rB7F{p0Tg<8<%=pA){SJ za^TAL2VFJy{otUqAK!w@ zKkL38G(4LAzT~uMoAPYyKcN@6lFb%s9A!T`VgApWe0h_3{y%dHuSo5?oBm_X|LxY= zlb=lTUOoTGtM#tkUsvuCDPdOYHA?Jdto@;*GN)idYGakA{i17z{zVfu&ba?kuHz7s z8CSM@+LKBD_9dBJzP`FH=-q$$yn8Bn-YS)5_OZRI|9rJFs{hp0-@*U%*B8H~uXe4w znDjAnZ_%&tQf<%IH*M{oeq17Fp;F?Nc2H;UoBy+HDnkV1WG5Z5^7?owIXtZVo9vnt zk5``!v)9~Fuyv|8?qZ)he{sPx;i3}XN`uwMjvfDFzWADlR%x^Iuix%ndjuxME*Fi| zda~u)#=5xpy;a{$mrj4SbbZ)6pRez)tc+V{SES3Nd&cy?c>11lyFETNH+LwA&l7RU zpS*iN=NY>quE;;${|)Av|4=e!ntc0H(TYF2rLW#snYY*W^n-5xBkDQ(%RS}x-MA!W zF85=WG~+5smz)**C$uKZ3rK&}yuoLF?|lCGO=~)Tf1NgG(%;m&POCk~ZG0G*{|Mdv zqx{t9!^!vYQyg#1oPS15F!MoetB1L&UC#FM2l@5aYE zY;3;kH#@y^-8$Xsr{IS_dY=2Ge9tF`+n;>@y6Ms;NtxX}Q*YGV%l~M)HvEr@^Fkvo zW`ndFKa}jAUwGnJ|DkU}skq`x_nX#V)z|+~&Ykk0Vd2JeNh?Bn8|C}k#Df_#WEPs- zkPu>jIenx1p;^qUJ#HL+mAroL)8ut&+TlU9F%#CrPqOCt@J97Mhlhe1x0%Q!*E)lo zpDJqKPQP>7S$DPm?)`V(cdySr&EN5Mi|P7@Qj#mon(y9!ef_u4_ggzX?)_I(3Aq0K z@qxy*PCwjIHs+l#`EtEe?{-e^+*$vEM7yt=D|0Wd??1Ktx8dh!-A}$={dMqAQN>1? zol)_EzHh7}>mRIH*lK({QQEn9j(@@>;i$*j2NjJvILq}9O3eIx`m5${bxZ)}o95xk^4o30;33bZ56vmyZAMutIyI>#ytn z$Itz?{??~&8><$Dzm8v9xAoWctgo+Q*9G~lzqR+))u`qCE2r6Qs9XPQ|Nqt3S4-Ob z{~vwQZ~OX~eQ_(Lzh1gpvgvKQ*0udnOS7teFVB88#pwMtf4whzzc^j9%nTB}zj|`j zm4EBSUrdN^N}e#=fx*&viFobmiAI{<1B9t7aVkU%zg@Q`TRh z{co$@e>L(idbNhz<<9G=wy}@JMEzHNnA)CJUvcVf+}^6UMH}y~68>9zYj4%zm8pV9 zel3r8z5b&AhT^$1Z%%&*-|O<~adRX8^P~g+pRN?zXwRK>_1wGXUy~f~9t-*+5V&OC z|EIf^{=c_Vs0)~*TEM0ue^pBT{JPI=Z1o>}HQA*clX~~ZEsW24JpJ675dWg6jWMS_ zO0NxHA3rThL0k37)D~-*d9VNUUzsCu*s1B){wIgG>)L%Sf5Rdhb#|rnuBY#JT3$W5 zH!JD?t6ism?bGfP$Su6rD4_YHqVd_j#!I}Gl5N*o6BpcY>InC1{OTwh(`EmxZ<*QB z(&yIoEj{u_e`FQ6NbUMt@MYHYYx)O8L)$g>TZ)=3O}<`qXJ5B?{hH7RlcwFCRHa(S zWwYNS;dbaF&6iJ8zdpGaA74LV)4#8WFT43qbV*d!oT=sdeD#f)Pd>8r&95=uqt}1( zN&i7{jYHK_g&)7R48Obiu&S{B>g(%6Hdk!&n`C0Q&`fh*@6Hb`57s0wnbgSHg)>@4>RSAT8pYZwrMOW#^WVf^XHl3`x{?K~6^XXFxb)07J9-c108?&!}`iq{4=9A@F z)WTcz0y;Mw-C;6cYNF~3;cMcCijR+3{V|zd{zKVb-pu9D-|X{-vw3Gc@>M)sqd!e5 z%vbqK{67Uj9{c{wH-9WNIx5`4a^dSU%d`D9K5YML#Pf_Y|Mv?X?>_wh@y@iL`CrO5 z{o9wlb$|HZs_83~l#V&vZ~l~K*c&MR{`=9d54w*{7N|RSZ%cgfd1>bPuWG0NUnJP8 z{^#ZqmVJ{xte@)W%=lD3+HiCHKW!tX89SY&6|Tr1lH~Q1;EcF7k$`#Blom z3f+ag=YG2hWawB{{%4&ny(+c6*rSu{)^**zEW!4wxBtcbk1JlkJ^Y7?+L1`B=_@{& zHmhrK?KC^X=jmyt8i|j$uFHM@!SkXVkJaw?)vLbj>-zfY@uQwiObo~5=N;5{ zy>fl>>3Bo$>DT=nrI+}-SK7wCpW5M2B(eGwn+ds#~9?`cCon=@;IHw?CDy?p*zT-}|dizhwQ{viMO6PS$Vs|9!x~z&6)$mL;DTYxyN%?Ok7AeO-5L$=#y)*R{S{ z*91NNwR`>l>$}vqzWSQHd)KaI)35E0p1W?{)g@c&uEh3zdzBY;m2+*0`q!k+g?D?( z?>f)){u+MK{Kc$aaepK1{C>at%3^z;FaDbTwbIHI7QTwFQLk1Bt$s11`)^hDq^w#m z&&Y*eU8nEb>fxlnd!1Tz*m2Ld7j2_9>aUMq8~VNcZq?QJ9ZE_?^|5zPXKZ>t@9EYR z-EWir%$0a~ShGB6{pnTz!=L}Hk6x?(>U+(8;k&=??2ns&jce=mS9kV)dSC3GY_hZB zl|aE6oBOXd*T!XCTlCI9Zau5SiH4~{x3VKArQT58ap3d=zW3{E@Bc{WkU9P9=ic=P z&CJd{Q%js|?j^lCHgxsetE;D<+Vr++`TD5NU3JSg={2fn6s1p=4rdi|c4UdrsDqW!Z4 zi$Wg>BY{>X;iEN4D-QHx?5Ls?>kb-QRh*P5E+{ZRbm>j&}GQI(#FXbJ|)*m4h3$3%H(XniM--+(z^M=3-~{=RXv$`JDFp z>$0jH9Y6SoMxnRZ3>02+Hr>_3WTp{C>@yPxf$jpc z>-(kuCt6AW-gi*3an_85p7ZNkziTvyGv8mh<^Q~$Y@FG@Rnm-qv&>eK_$&DO>g!8) zR_5Qk%eU+>Q?Kzp-tI&9?Jj{>P)EGScS_!|%x!u9EB< zo%}n#^ID%huJyssp_}#1vr^#|J5;Y8v5(p{eZx9|X(FAao0!zkaGQMF8FWVdr}yOJ z2g9HC-8``~JAUCTb-ts=7T=qA@`iGod*u7D1@nuAG@AKl-0piYWz`Y(`iC8}ZRa+< zb8~ap{CI`z55;wRey=o$aFJY`P;lxO+u8lH&vUCCb_w3}N#Fh9LzYj7-K`%kbGDrQ z2qHDt!CKC!x!kAJVv``~T7X2z)_r~lhHC$w)i{4TF~@(XiZ@1-Q8DqYXD+iUl7 zsq8#A>C~!#_LSv3oW7P-+pd26oPH>DOH^lF%J0p=?_z|)JvUX?yPmvN929&!n>|ft z-O~5<-{d|Q==bb8=9+ZbL}})oo8QmujceJk@ye0o=d?2Ki25aUCr%Oz-}rt~;7TSL z-j(7!dbA-7s` z_gAsW*Y%&h+xs(o_w855*T1-!74G!t^sC+9s-9f?=f6K(u|q=Ag{7qP?i0g#PwT_I zBfme`Y(D$ul;i!uyWLU1Fwx3SVR@e(d_Y>$_dO?EHHlR)2AqckSBi#kphl2hZ}Sb&mdf z7#Nrh9cNkgWwDmmZ`Au+bNYJut0#Mc_RL!!^rQO!|L_0*#(%&6>iyQDb&lM1%h+Fs zZVg)-?%#Xm`s>)${A-N$|3CT8VjEkus51PI^jW64w{(IFO*fjpnDt|7QoOod-PW)_ zNyd8dSO1qL-}yR8MX~w6%=){(y7EeKCEtK#9P)=#|T^C(hik z`^u^-?*Atr|M&50(%)sUKijRAzeoenKykmED%fGW-SHgwwzh-~U{;F(x>HE?y z|HvAzJ#+kz++8QK|C>^N#OluQw=ui-hpX(KT6K5*Q!;)`!G2Z`Z1emy_bZV|(W}%il>MVMc|f#dq&!x{LpxwKrkE=*?Z8wz|I#|F62x zwPU?&$-Js#GnfBAKE027W zvAV3|slOsmRfRu;jvM*BK74&=jc~=8r&1I9)}QrNVf>osPv<-C3WwYci}jvr|{|0n&JaWnOc-{cE(!zbLdnHTr|>#Df@YqLuKChhpC zzx%HE(w9Y(Px+x!&eeZJeL;=-d}hKrp)$NrzKHU0XWF8`GMTl80-N;mrT$(wJh zzW(m%DU;QETZU27uws(L2)>$roePPwgdOm)irx(QbwS}yne9rZ^#?OuWU-RtT zoAqaV{hE0j!kg@lmCX8M-q$=m=-&SQ*LC*)TJ*K*`Xf*4x{N-L0975#d8dU>VIlX&O0 zcky2f{KLX$+*h4 z>(3jfI!Jvu`@Z~m>+|v#le%|(aEe~fA6kDl^y=#A#lN(Jrq91qGx(x|e*Uk&DEmv?>Ts?1_sjkMUku)QR!uuRpi5ByqQ57j!OVX>Rt7UQ zol>>dEnOGERrb4reQT>@L)pa4RqkckN5uI>c(azN$9IQcS#)p09^+4Mjm?huZP8og z7b|>iQ|q?B)6&aVU3w8ebI;DKG?!;;$_9tB)w?V_&Q6$A#?Cxb(n|dCyn^D#et$L0 zx^ki&wPt7Tn$T3PQ_!O+zHZA>-gQ2ggE%)FNaYY{I=N)hm9FqzlQ>Tpya-5JzCyx5 z!_8~0bxXyWug`*RnA$a|EM2g=E%S$j<-E4J4XQi%C%8+lXbEwu+AZOKPE_>$t*^1M zn+0A!WeKvpbaU!7vllNGF3l||i_f%)w4H0cqp(+Ee)$7~D-Vv86`F3C!)SD+M@x*0 zv!gNbKtzU^m59?qL(ZFbuHE@l^=kXxIPX(i6qh|MIazw(&+*BV|I~Z`p6q+;pWW^H zl~>Bk3nv$*|NPt{*Y@&Pwf#)4;J340TNLWA`lqqjhghaGMo%4#zWW)~~I`qq|Gk$XBT=vLEv!;iJa~V7gTfXm1 zV+NgQzn8>TVKKh$1%qW5XVxkZyYEpGIA-H~B5 z5Sig2Cf&x=m~dc*ffx_#gw{X>9}Pi47LRA2`^w`gXFXF>nOU?{ZPnBzYK^ab>#voV zWFM|6SNVDWckP+H^pz59XMTIew{2Z+)`#ef4*h_u^+1<0AxK6$ISXz9PyVdH?9IU4p4U_MYp!@TFQrRO0d8DHf?$ zu3DFdEix7rf2zH4s&?tgiHatB?riv8UEim=+QR)iZ_qVai|dV^2`{rxW-k)C2y{2yK4hu#Jkr^6dT&=Cl4m%ko}cSAZKj}cu$GSg!m#`Qs@S)l_`WF4KkRm#ZqfbSJAdu|k(Kk&??s=P z*w(L`F+a9S{G_vZi44BZvmxlgrc-fxP3(%P`MSYU=<&La7m z{eH|-PTudmZe`nNYqB@=exGZ}S*17noGKdp*x_a!q-E=I*4qlLh{D+}?j7p;hOe)c!#6|C#%z-u;zOpBd}Z z&M-OmqExortNm;mQ>AAyd}3u!K6UZ#+qGHMJ;uz}7cKT&;lgYbu;q{b`ehgHEZzHh z_C%)!!^Sx}roy)?WV;_9`d_&4u6=x!o$$#QXPyVXd_5zbQDE`c=@kbYzNYPL_1}H0R3M#y z=U<`h7TILeT#vf{dRv}dDV=_o>F(cCj{18mFa3=3eEZvP&4exS^?x_<1mrL<*WOb& za{kC4{c;h%*Gn&6%CKLrze8HV`})?4r537gIT7u{B(iJ!^xE~Ump}g6`t17W z{{M4+Z>ZcK>ApdYt31BqR)f-q!q-n#(oQU^*j%6+2$A7MX!>*^ls ze|qCLQ?7OTk^_5vPl|qyulE(P{(hqJR$SY&8wx>%mwvgFc~*+I^cBzf`z@Z?@2^w-aqGk0mU^xgOFr<0WA_i$;N>so-hX{Xv3lL~{SmwWR&RZE`ft_Nc=`23 z7wz}Idr|ajd)!g|*VmhG{r;L$7I*T;)b#8Re@j2z7yh-$zxK}bTc0*@g~zS!`gpo- z&Ak7wt{?fc>3&G)_g}BSo<8Zi`ir~u>i1t?i|k)Le|`PiSly~$*=w)vk9!*%=cLo~ zYjx|3r@KQIc>n17nps=)rE1^rs9AOK*Oz?#=3jp;?rPW8RdcK2!|V1uXCp!`5YOt@_%%Xn%ytPTiz;f0N_ePWtj*tGNDe)AmY5bgt{;tm zeO1SIwfBR`yN>P;&gZ>$edVRS;^9x!tY(~keZAEF?DZq-Yc~D*@{d#b5%U54I<-qa zybSOCAM}OUhtJsaGwR`4zK~6YkHc*D$L#<0&*zo(^}?OuG57YGRb78Avp#0^LDk!L zrXQUsd_m@5+tOVhBQD+AWn^{%n_qXQy9caki{irQwK%1QRS}>haOkEuc=CD&W_XWlPPW1 z|EDr(#c63tXM4W%hf8K`Ww|A{`i1=sWtEDwU(44e^>lCx8EW}`{2085bIxaWDZ}}* zdu7V@@A1=)FY(*@diwu>hsh1ATh?${F8?p+*Bn!D{A-u%tEluZcDw#;F{;~Dus?kM zy(MYe{(n)mx_RRGSK0lagk?JR3AcpBIp0ta>fpJ*JD_Ll!TTqFA3wx&zsWLUf1r5P z;;&U-W4$MTssHH2F?qT1ulIkBZl2dNfA6kY@87Kv5}W%ueD$X4`d1$UeDA$~cXfYc zWc~X60^xPG!yxy?5hpEOO%Q zJ1*sO>aJ!KOX2!uwKrl1YbP;+Wqg|s^%?hS(3v0K9A{bef%l*M57(ak@9K^7zrJRF&3c&^zb5>(XkhKXb^m9@{j`rQ zzyEht;niQCl?t_HR=)0>EgiM1w8?f$0OW6M4t6490McPJfjakR9X7t_VSNz{vAKmZM?*|1|dCw`Ip}p~vecj%2 zQ>pd8Ht+lW{)2~8rR4SOm4C%{AJy02wL$f+_^<7+FBP9Xdu#63N9O&KW&iaPj)nbG zfAY3^$&d3~DU0mGR==FSerf*oB|G!4me%c>eB{}lr&&p>cw1Nhw2%88x6V_0b=>Ox zF-v#q?b(;F|N6n^Nj=4fEKA>9U3mSyeDD8)BMS3gALRSA!eIL=b%UNv!(uU}__)35 zYirl-42#+H>uRC){q?E;fAn(~yH!Nlh~HRv({`(F_AcGnAi_i-gF{|J3|oo@C1OVyv*_aC(M%9(dfztea9OVsIKKa@}0-(TBgop~v0vijG( zRjSo%yS7&D71REb#xXDZ|HE^Vt`qJ^IGot{`stUDrai~(|2y62_~JhKn%39#*8=4> z)>R$uD*pdenV-kAze#fc>Cee(_NPK0vtOv!FaOzn_Ii^MZ%+ygZ;yUny2!WXkMY7& zr_LPSV_+Xwys^sj_eUnZfb&Kj2m7a8uKGMRc-#JOUP_16n~WY`Gpk$kH#JDOJZxj# zUi~T84H($#iZ*zs+83t3-@Wg}{$JO3OJv8bSs(X)>pbuEQB(Hc z`ud`Wvwq$ClaDuby^UG4`_0$Cn_hpqbU)?Q`Pb}E?mc_-_5Gb`PEX(caf>Q`HhLV>PrkSsHM@G+?@7@&X6ALi{vR&*VypRt+nX8~7}zWw zXIb-|Vy$18d%{1wc5V6g|L+C=ZM_>c=|TJd|FQLxKmY$9dieF;`1`Nd@1MN%{mY2j z?0s8Tum4(UoBj3lSEKONcUmgk*TxDxnc*Jc`gnirx9*^_^?g5={(ADwb!yckt?6H{ z?As6~IO$DC_Cv{6OFlN&q&x_jfA#*f-SJ^sS6^S>ANRLGY+Q>QU_M7aqUlaOzr7cg@ z7uDASQD34ay_cGL(4hZD|KzQwcmA#Z+WmF)*6S?w6J|b`$obv;j_)<`e>+1q#`wg2 z_k1W=^jdY%O#N$%Y=fCUrYu*A`K#Zt;>RI}KaI>$U)Qr(m)(slt^G4~dS&USzn`wJ zuiL9WJ7cfj$`9^!72(hJ{uh-$^4aLV{<>FA=N$JJxbJ=X+xXs|IcaOAUh#FDTXg-Z z$KJS2f3tS}yS}sY-LBLBR~EkR;j0rr<5D4ImD2Zw?f8~?3nE;OE~?r8y7&l`ccXagUEhhm+p+6z)sDWp`p$LNbc@1| zl!PA*Z*o#@`zxIK^*yU#anP2FJWR{;_n1UF@g&Gsf4F0#SLc1PbN+EzTiY2kO7c#~ zpWJKxTR68_v}((-J*KOC0)xfB&ztZ1F8(Hau>4OWm39>;hn6V4prY%mubzB$|DNxU z!@>)fz4h&VB^{(0?9I5l^UpVfQ~QrOrHL-)&|3O_`g)l+Cwm(k7TDSQZCPzZmGuG z^9ey1E@u-5im;RrWInMn#RX|{~ z+K-vfCwG2Z8vE+kmezX@nm48@{+%vJz%P;cL|zxvP4of|LgU!{L- z9@od6(e}sMVloYvzRCZYc+~j(>xV7r*N--FSpP76dHTJ3RczAl>DP}<&K0!3_-o#d zbtyh=E^p<3JaYE)-2N}xMg7BO|NKA4o75-&`Okg$L*w?!eXl<*aoblsr_a7^^2$5P z;W7Jlt8Bye?X3S1KQ;fiYWUu=3H{uEtP_$Hl1yFpt#xLOyP@=O$5yGwJa!zdTqbUzS)>(_eH#NGA*r`tn?szkEZvhxYFo%ZHzmfDtJ%k2+IBM zVo@&7ve(w+rfnaWsX{iI3-_r)w-cjD}quk}$=!W6&nHe2O; z@#(#(*Ldx8?u3O&|6aW>a_i~uuf!IsJw3P2#4cp&l>eQ#w@)`+r}aH){`Z%1`(B82 z3D*B+J$jm7z5dYMvlqT?Z2qP2_3){ln>N>89!)t{X4~Dr;MdYyyHEFj4Xe6;Z&UR7 z=TGNf-tSaB=bql)e8c+dX?Op<&)X^x{GYGj@4we2QGce!znHW((BO|m;i1>%*KeD? zK51hYdTML5XZ=DwmR~!pUU%&J;BzgMrX-stuI z`l_XGWq(BNb>12FD{k^XudOTfHq868`^Nhi<0W6e+Pi-Z6W`Twfr~l1`GNX>?V=~& zzy6Nh?^1f-cJk-{U*E>x6PiAC|L^d>)%&h5+4_2?ZdUZ4Wsg0~e_2~kda{4%D^A_D z^?hmuPxfzJucj2oT3UYTtlA>igr#v>*75;v(f4;|yRCj#wQSdxsEyv`xAuNCfBGW- z!*u;Rf$d+vuB?rU4&NQsvGvNn^{Q)jN37WY`=7vRcdL(ql_FuF1F52R)@|*I{IT`L z(pUP2*7?tkYTB>goAh4#&iBVJ6lNY=B_{3DKU>o0nV)WAD(jV3=ii-QF>lpB?oE1D zGXuT1?)zG3De4w1nqj?l`s@3v^{y{_qhAmf`1LB8>EUfU6&DX0 z|8g^PliiV|o!w$8#ZtNQ`@7%L_V-L?zOE{_-!!do`=sW>x)~k3su_>m^MB1-+|WAz z_n+F?<#*r4C#kRInlH(|zHagEMaLMK?q1-OkNh6>@xS=r=(}0#y8nN=TanMQp21|z zqF1H=z7`x~u9u6DRl2upj#R~eos+KLxK2pkUHS3J9)X{0gzWRrA6)h3#QVhsYUVd? zIx}9o7%jKL<6-)C|2N84KVC~{F0OR{XGwamd5-R|;En8@S^56!x~IFouC&=B zEEo5Pb&8l%nCk1l%fdfCaLLzju>QyCoh=u_!L{qV>^TXMFA=J{4*Y!{z$F)^^zMU) zN8!;udk^M)fzxXs;B+8+uuGTcxi3+q78c5r^CZ_1iw%Jf41DE=AM83 zhnedZit{y`vHy5=TcVlGnl{6^Do)3Xrm{2DKfb^J?7yT;TU)-H*4dw0k1x0Xnta{A zOSdmgzh-|^vCY3bQeR$m{(jxBWBt}sXlDukEcto+ZkO&3@=~oo7<+ry#k+zB81xsH zt@Ermt$G@+=Vf16p3(b5>CC|m`|oe^zWg@!dtjCN{#QZI|13FOyJSbfrfsIK|M(x? z3Ea3?R%T(NQPdx?_eC35ym@di_rW(ZS~hL`@CN22F84kd(&2ay=~0|rP?~1UHTo%cX6hs zR;+z{=k)*T-6He$hR%C`tmMDzh9Cac8vF9~zrK2Y%S^WA@iO_jEfxvpC;uK4mz_NQ zp=1Ett&fd`JUj+hbx%m}A5A{MaiZ`f`}%{dZ4!}{vVu}Bd*-ss+32iafBN;Sw`c!x zEcP&0IT%-{{p06j{YB+gUylB*-{Ni`zCO&d>}QR_qh|fp|6e_tdN@jT{p}!~J*`t+ z?OQkNrHRauuD^bo-C@)J_{q=z?U{c45ogK0r#e-!IU85F7Nu5b&*Plhv*y70WmQI( zf_@z}UwkyK^HR0otkBJV^+StEd;zFu^$OW9P#wU9e|@nt_ItRXX6No{&%TuJo*w%8>#0qa@7{k?`XtrB zs3$U^M}(WLnK4-*B2Pq_mF=M8h5#2qPL3n1C+&Ry?&O_`#k2L6rW{!=%3EMp{x|!p z{@EAv9qq#&{alc0w(mt%cf<#Mzq)j@vi<>!#RuH#DVJA@-(<`6>dr1Kk)hS)Ax%Gw-(G< z9CK-HKvd7oq$_v3;#8xK$8>s!>Ly%V``*fCa(%Egc)+(uLrjRX*)i$B3@cz=1C_DZism!E3qcWhJJaG7=k9->^V%}fb1K*#BFwlXFvMyQE$@ir|KnBk%1_Ik~=>c7P{QQ1@0 zZ8SY@J^j@BfOv+gKgz%IFI`%>q*rgX`Ca`F8;ufG3k|E(}#o=|;x;$H=G?yE{nNvLs&mQJl{0Y&psw)`}_a%zTDsKGDli!NmE#I)0DtZOAl>XFLh|?_KVJ^mnA%4 z)DoGZqarQEa;R}Zh>nO*E7Kx}2?1In60J@ul%n z?T&`u&)@t{h1ErOT;25O+V$n_t}%KX6Q0iL+vFp2?%me?=c4*IPl}LZ-|xTY#EE4W z9&$&CbsK)4XX++byCqyHC^?gz(I8~PJ24^FR>s7D90L(sX>!+`;t-X9;`O{NzXRX#n zJId~^Z+g-VkZy1;12P*zJ|4H-TOU0mx?{&U3Z$f zY2281BuysA`BC`2+P|#P$)~5z(z?v$e(CzzhdEid#H(IKe{;FIJ6hwVi29D_ZLMeL zxi_uiteW$v&~I9#-t;LhL4Gc#pvk0}|3o!(?g|FoH$OIY>ahl$R>fNjD*C4|?lk=L zy=<28{#B<=uyP(XyJoVbmL*TipI0nXb)U)e83jJo*L9d3ibEnJjrXj-B&(r&@WUlO zrO*9}K|a4HMvEHQMWvNl_iuBmI#+bh$5@K*^tvGL8pr7!jAt}Xcw9T6mn9gbA{gU* z;>;}vFCUK!0%!9!G4?K&o>pusT%9@R&SBAXi~MOCEDSi{D`6|S$ z?VQmLKL!St6vtUM{9LTH3-#+Z)~yR&w?FRPy7-7CTi;*(9n`%4+WsK(*Wy{;Ti?Cz z|MuVYJ!iODe7NeW&Y%B(sr$aZ{wieo($!__%-H93E1V(}_3yOo zQqA{YHJ*6d*WWYC|F61j`>W@PJI}=5`K58lfAgn*<=b}bST%L-+S2RmReynHh8cW+C#@v_}ox7r*{*AnQ>s9pM?7ZvQ@vHT(o_~FHhWFcl zOTH%U%~HO)^y~gNQ+M=-Pkuj5>5;8y@2`0~b&l27uMMpGugE5BQxyL?*qiIu%~O8w zs%O_Z+f98`E4FGye{7j^;o}qcT~Doxo_{4g^?N*$ZuWY?{qg?Eym*Tn}{Bh6d z@qDrO*VbQ8>w2C)TIP36rj2_&%jG50-^^un)Ss{^IPUD}e}&nHJ)hKHoUz~J*d@0B z2YcJPBQh=qR~C0Z{;ztyJ)<;d=dQ<0g#|};i|GH@&Frg^($>?c`tUX2BPya@3UG)E+VEfea4td1zw){`&e4=dMP6oK^oizKxUT`>e;k#V@s0j!$r~kINGC`Qg7Ut?=vx>u^zb z>7MjMG3?9gI7_-eF0nrQH05{n|5K_N>c5o#?ftd??45mE)tiL#z6M=yJ@q1&r&lC1 zCf09R!EgPa1v5A9JISNTdsg;eMC{bRPycUalfE5MsP;O(kH0yg;nF(kZL{aL{t7KO zeWYD&Yk$IX)xQf@9)9rf<&E1TG^26mA*Sw`9j321R$jjGXKBfe$l7}0%80o$XZ#Tk zO1AQS_g;N%Rg=-ZV40oS*SPnG)?ZtFa`%ulWj{qLT> zJM@0dPMdx6H(KBA^M6Pm*C{_}o!YtQDpEyh3ptjP^} zEBd^D_xs&Z{iXFyMVWj4Cw{(uSZ{H{;k&nl*Sc9lfmQ1SN%hOudiEM@ay}lyMhM}|N7>y z|9btuU(@&Pj+f1Tzi;X5pWCLdpSEsO=#C5bWxwt3@DuYV*cNe|Y*O!azi%>eXuzhzoXT1Kx`|0aXEVS6ikz>B-)t#7!41pQK43n(X z*RTKkYx}F`*n9Y=*QdB%Qn5=vHNfI*LBZ!Km8pqo>emW8mDdc*QCAKU)MeV=EfFk#3TD} z>#wh=_v=erN>3jCZ~LG{{qI}-x~+BVMf~Ug+S>kWZ{_PRoxb|7w_Yvw&e(MQzv!2( z>VhBTP3Cn@nbsY@$o78P8_Bo7zQ2w?nf3ACrZ*~)9{cCTH_4T$tUmPjR?*+U&|3BX zmZFkzAO1hl`TVbHHb4LUNiV*>=zI5Z_n|tDw?7QIPnHUo$(-YJu(~QFm>&P<>n9Bb z!`_2ipS+s6_j_@CQStj<`>wC%Z^`<-N7=Q@pJ#&Alqamr*N^Fyy*}sq?U;u-_vDMI z=BsyCUs70CA2xa|ZsYDFNxHIqPrhZc znVo#S{$dR0{u4eqF85E`nz3!lH~Y9K@AZkwm;0Tx)xzWn6Mr`sjiAe{6cWZC^a+5A_f8{{$`l+O=+N(T`tS-?@hWNR7Pz zTJ&?>roUM}Q@gJHuU~C%o2>F}f8f(oQni(DeEKH;J0G92yl2vnsp~$v+?(|N{F~JH z?7LU5t-cyEYjxe)xK5FIzume&Uq8#duhTy~H1?;)rdj{vj_Ul|TdEztzHHNaw}QL> zj9SE4;U_n_jqYZ^WK?@%GC5AJr~>sUG*Y$~$5w=Q_0=J5EJ(f3Cc~J7WEn zl{J6w>xb70?Y$^_e|6aEsTIk-Qs$RJ0&h=PoZc3)`ijfZyNq{IwCmSKe|=lE*FXHv z>Fc+?M}2>N_131pSud|PZ+(Aleem^mLEHM`T+5!ixV8cY{*y%=l`b=%@)t7$PZuz@t!tzS66|rv7BfrXLlTHrBX*sNxec`?G6e%YKu7{RguS z7%cJs7VOU^ux#$nWr9`_CtF{9_5Y|Meez|w>)v12*LuF#x<6>roX)zK_+@(WdhwHl z|7IzjacE{$I`Z^J?DRt)U)7$`UdiLuKL3BJ#UTNhr>D>od!8& z-Jkr|iLL33%haF*{vuwU37^yAuNTWnPWu1R;z{@m*)9L7{%J|E_|9AZ^+i+FzisiK z3RxCUmc5mt*fC+r5Et3vUt?=G( zyj?j_n4SHPb+&U+8!LP8vE#0yrZXJeUEBY6y<~e7X*5m2=tJzGIeE%)3b#tkZd)DR zID4iaN8hZS>8q+9t`bS)jyU@M^(j@Ig2cU+e{Keaft8m<0tIjeGjqhR-frJ^Nso* zllS8LULQ0y+hFYLmAp4}@@&g$megY>?2EsfFFJZ_u4?QvNfuxJ*E){7KFiwv%f0r# zZfouL$+!RQxijVO)AO&6uG_yQZolE&`MWO6zxZmSWlft*ZjJT#Nj#$uZc}dq0|V1l$62=hS**2x6?5wL-nxEkqJ8-8 zw^@I)*2Ra{tqc9Pt!`)BYW=IP{;7J*dp-UC>Z{_P|LqLduhqITZ|kd_o)0EZmycar zwzg0X-3`BWwdmivy1n|XWh<{u z+plu}hu*uZ7w-hGP}w$LJbiin@h?B-URrJEulmpX0S3erXY+D@Kqo0YGh5G z{xV8P|Nr%s!C$XuovjwWdi{Ldx}Vuz^>O=y)K_gui+c5b>&ka~qnv8~Utir<8|z({ zB&+;C=A{0k|51Ol*0)&nJxRZNU0l5O{^~E^Pae^IER;qIs3{<%&+@AB>0ldi9;Hn_wYBzApp-kN*R_UGsNCL!Ce zUy6>`eAu)7^xeDpS082Hi}Afa#r5@PvE#`)fV4*UXRpy4t_V$-Pfbv&O7+Ug^))<45m#>)kmS z-(xCrL1OabV`fWtZ{YKv|NHhN;prED{fRBv9JsXhep`;eHCGt($$GD~8SQ^x{Xd=g z^|k)dcmKk8A3vUGZsM%Sy1rAEkFg-LY5qkCecL*l8UMrVor>1De@T0PSZn2S);vo=4Zz66H}*N?_Rvv>s!6;f8K~&Cts}d>{zq(gHzJ~xB1uC zh5pym{OI9T8@hXb=&G-WqGOZ{rH-UFFltGnu3anlX+4YEx%%LJQ{yY zi#mT~`H$(l_Qrib)Espwre>vH`l&>{Lk)3HzuNU#)tFl6FS+$f-TnMmQR!uTiyw3K z$Q!ri22NzY^3vri`wH!tiCy3FHZPd%c%u5b^tS9RDISjN+GBHSj}$*mdAqFBuuuQt zoj)bof%RvM_pB=vHrnEQae=U_HGAWt#e3iQshyrFFkrC?BH3?dtVnVdEd5bUD~Sr z$m!v#o(Hb|Kk(PSJ5cSdXZ!v|6}9){L8V$a>@5UyQe%~tbEd5KgC9R&Lxf;M}?0DM|&^$Y_n1J_C{@E z?~@;IPkQ6M?f9o9y6Fku&q?mz7XIV)9k1r!Q?BpP*N{)Vb^qk+^)BB}JZF@Za9)`3 zlJmZ`P4%TWb3%IaH)PM_x-nZ+N8yL?ZFWX2ksdD*VJ?lE@y?(3AN;ZZ)1%~F@1FdMvQzo~?|kmcL-%V; z-v6KZ>;9vPs-oCgJEht9AHU?9YoY1NC*!d1|FSK*lJ(nawj{3p)%WXM)BfheRnfMO z|98$1I>`1tK50q(?BkWoH_ixN|NhS1jWfDp%hd#5UvstI{~+I6tbTdC*4^BjNrg8n zp06+8&KJT`bS1>cWzU&MweM1oS$+(9Y!?{UFS*HP(Z1B$R{s_i_dx5&H`7%2Z&l23 zytg7|^5&NEn-wctCrxsaFS(N44X=pV)JK>*c)x zpYDLx+fP^~!Y#{maKeR%JTWegL(PE-Q=HPvqt46ReH8cHR`uOeeXUQcy;U|IvA@3R z$GL5P>yK}Lu6F9@(|VrQU(JIq?(g55!hiMuk8dv)&Mf*r@!zZku0?y5iyynyZ4eZ1 zUff~uFHbCE;o2Qnj{P+;Em<(@@Lu_yQ~qwB^Pfp1cKesCe_6qgQetDy=6v*)dp9$4 ztLW?AeKF5XE!Mf)27SCA`0L%Lo|2fw|NEoA$Uc9h`!wTEeM)ajQV(Z%8ue+t~ zZSmYM`{$ZIYt^~eHa?78Y<+GHTSoatyAQ|s7&SzCG{m@gK^r3?49rA1TN)DFP4hjwUw!Tve^NVV#Rk8ghLa?F0Cy)S2a;?8a+)eWzD8sBVLIGy85XZ7SaM|~zAkJ=g#_3u5Iu* zCH{K+-&_X?wvJsZ+r{^XSawfW{7|7De^zt>`@b*y_i-M}Qs(&Zp0;^l*16fk7`oD<4mP*f0(Yu(j1^F{x^7X7vJmu}r!*Z==#t@C$z znYUrv-c;}I>(T$0$Ndc|i`ShH^d&aFIQG}(MgODrN0#j@*S%>||Ecc(8SSG8mqS2&G*UJ<=bDhy7Rl!{Ua$au=WI*tqqt*8Po({2#7$_4L*~(^b#D)?DWw_k4Z$ zTJF`WcDP1G|BdMU`m#3Z{qyOiySJ{o8ymK^Y^&hAR{R=(QYvESp8ZcnqfX#K*qr2bsE&P{&a{ z@ACd))!yn)lT%(j{rYNi=le;k&Ux=sm0t2C-224^@8VnH61kGQIRm~vJwIV)_Wi_3 z7m7+h?%qA+`;VCqQhT;leTuvPDf{Zm>_uCvzU*tYTDNC;LPoK}wi=1*oaO&7D((K$ zRCZ?i3zzsc!MB8C|2fPwhM{Qvn^`%h)xf76v5e?H!B z`vC?9CQrv%cKxeZiwgJu4{Nz@TlM|b-K%TM!*9KQ6t-*s|NsB4{=U7x_TP4^saF3P zIfQCH6(>)>zuxuNuC+;TlP>GrH$U_4-_=XEHa|J}{(tsYzZ&IGy`ZXBd#e&-EA($~ zKmGCU-1-&WEqPw^k4+FdDBgGE;LqfQ2V1WHI{RzS^!<;rCSThh{(jfjm3rPRDSL1I z6+O1^+0M#WrS;eTudUl^`tx;|f8PGDtF{^i)_-*?@P0Y*(K^lStyN!(qE|)jzxt4S z_ui|M!%zR(U%zg#ZE4-sTUWjY*8YDRv-8=7sfQ;`zP3N=op;Rsxcw2UFKT(a*8Tpe zeQots-ON8*vo%)FzxK3BMd+92>Lu&EMX&4Ly8b$R*HNW#*1vZTmpop7#v|`quRVwG zC)&JK2dRn%oYg5Yp2W^*X zgsPs#Ir&azW0}@J_5bPcCu_sk*G$!MpK>g@`;*xByH$t9!m55gtSO7JiVaHnY4mE% zn!3G3?{}!+*?yZ-6y=vFz>$h!f@83Ogw?gIJbJof;+j~X-#;5Jc zamu^+_4k6ieerIm;yFYv&bU?*X87#wr@u0tuR4xQ39s7cAE9I`ynL7c)!SeHw3nY; z((`0{Sj$!KUq@B1-d|){KKb&WIY-=W)-0QC;%{4b`rn`HlP|xXd||qh-e1YPr(diS zdi_@LeSQ4twCdB@SL@fT*#Gs~3jgxBjp1wSHg$YaUVrh!e%I!m@%y&NSM7{nvbH>K zWBj^6bMYbv1_q`|$65CLr&vpmEX=E||6lw6|9#)PRqx7Gh3{Pdb;>(x)~nwyPOZ&; zwm)v#tC`-ldZ8I}!@9M$7VrCbvtZ?~qxN51Z@t>7eLa5l^w-m0U#(xwRMBUudtiIn ztbav1YvMmYp7z`}wq!%V%jh#=^8YrLIDJ1ErFy^i&i7XjuKCsc4YJw$G3#;dANN;R zFMX}ry*2CllhsS>Uww|P+8h7+uCUFkX**_C+$o>pUnpg=J#Jan)#O!IcYnINRQBKc znDtB7U)`5!8#C?o)!%=s@?-t`|E)>dzx4Iaye$3A6N}GW_!trs`^UBF>t+9x# zv&3Ir?^s{qthVaq-`M*v_UbP#FVEPk^Q!9oz8As&HZA&p`N*%QKa`i7#n`Ll*PSoV zH?#k`>TBh@<$Yh*pUphHlJk$>)~(?Wx@XOv^7Y#4YrFsavFkkcNI}K=^y$gh_U`_= zQZKsuOSsYhSBc_vzyGLT6nk@m!&E!g<=4?)+M%oMT~9?%x;SgE=!x$2W*qa3zkjco z^Ka{~t-nlNDl|^>Ym3Xf)*lqq^*OTJ_4@O5%lrRk?NLjv*Wr@j+q2{OzyER5{<~H* zdh4&Rdlz$k^5NohcPndlevbRL^+$(8<=huU;|8W$Y(7b0-{7K2~@6RUG{crNkxRx zZr}asdtJ-F&gp4~%RPU;Tenv9b@%$Hzj1rxx7J_N-uLutwal(}olpO5|MS&`t+W2$ z$-^gh-&?q8lGv&e-37P)z38|3TKZyr*U{>5-+EzFt^K7hT>b9;*>~~QyLIcoR@TP7 zTbVNPom16Dm+PIP)%Rw7QL8`UUhCIy{dN7d7^$*jj#2 zwrW$>{eJKf!|cOMifS4*A5Y%7VDE=(J3TAw%M}u$?zemo{w-6qewXpKV@FHHtJjAwdaAQ3 z>z#l2{_x%R*FRCtTz>z>`uw;(0uhQAkA1nIovP|^#&Ov(Q_b~H|HlYCFyW~_pLzRh z{k8aY>#qexU4QjoXV-e~r^nj7j>vOP3_2Y3yYcD&J(U(G+mmV&`Xt0T?EY)axwu|* z!}Au~{8v3S3O-fqo_OwmD9+Wjkufgf_F|7^`PcQ&&OiTM?Zxs;y}fboa$P5LtDV{~ z`NzN9lo$K!J?Dg< zx3>$v4y`@;!h6X_re(imGxpd2TeGREe)IOcHJ`Sh=ic!0^@oRx(|D%G$$G}WG2Uc< z{fqLCr+;h8PNfB$;F%+OUKK<_4F+Yjt z?8^Lidj#EHhL+W}9(DbHZBM|FC+WNFH}96d-Z|OFcxsode^~aude^FV)sqihmzT2d zdiwFN1@{+^75n~uIqTUL&iyyoZhwn!znECh=~+$k|2Vu3tx^8e@mcnM+}^NdtHoE( zKao}bbMxQ$XJ#;VYlE@St5pGtO zLk=4sn2B<6g4*yQ8m8Q=PK^c=T+{@W%~#bfIh&Mn_jSasb4F7Qj|g5Ukz31t(9YfQ zS8eD2{Ygh}Z8bBkj`Fq(Nv>-&)vi-Tu6eGM(GVZQw3|H&HpN74^WuGLE4Pw(Ft{dU{Ce|L=SdF)re-M)Ii zvd_UMPapOFu{2?4GhHJ5dfVB~jJswv0Xi9c7FBLChc7St^gMIzdp!nC#?o54r2swibj-zxuv> z_tvD8`2TimkJ-dM(9Tqq>kMdJ>%e^B{QhlOVpC5}YyP2WcrxnR z!=NN$E%3*nF)drXvzW0V^N`Ad3A6Sd@OT?2;h`RPU{0nf({Igp>)XwCKb~{=ciE%e z=AX9wr<;<-1_%Nvi3_>cj)Pt{okH^f8Y0Ke}4Zw;yb(a_5O@a zPZSq+FL?fG|J$q2X0aZ8$z6Y5{fpf6#S1?D=BSfZ34DxdA~V!fq@|b+Hf+c+6H{VsS?IVS zKu3hTrAe*g$5(U9SLx=Fl`BiSt|jGpE?O5MXTSc>x&9yWe>cCLy#BsR?xcx--mg2q z_M`3D&)@y?i_Nrj-D5v<`M;a9fO+qZ)bUE7 zNbhohh{#7&vdm_Uh*^b-#bJV%^by4-9;BU;aIMXsdwv=Xl-6DTS0!kV4^0(dgzFwL4uA6SBsOQzyufH)7SN7 zgT=#lU70jhA}}s#$%E*{OYSedT(ke#|4E1M{nP)u_o~cEhwt&ZldE@?ddBY-n;%@K zX{l(#>{u3Lf|Ja=6I;U{d(B4+4@0 zJr}F|5%>MfzjBA(^BasRPuZ?{T-LBIUBKMr`(499Gx)x8GILXQliMomr4rUn1wO}I zHZS4*{z9Qqrz2?wD_{4f86UUs&ksKP`B%`My*5JUcpVK}+I)pgM4B8|U-`MFg?++` z&mMCspDE{l+h74&m%B=ghqdWp;({3_(%dbF8WR?PPoi)-=x8uSLs03{x$19cPL`U! zzBO~zO0U_WuUzKv-~99cMb&rzU)>+7&3{k4`Q-1yb8AI@zdSzwRc+q=&xd{oForL# zj<2@qYEAjRs%zf#*ZYl*PyK)6&yBTn7V2Az-~T$*u{CGroZpF0&&B)PEDn#|dPRsK zZc27~$e(Y@oLlnWil4O7PyVhN74I2WU9|c(GnLa%ZQ2$=7=-^%hf#x7~_d+OkBo zx$@4GKMx)-FfcJW&T zUtOBK-}}|^?9d}RyTi1v{$3yUH|qV@LwojB*@tPr>)f^Kf4J(tr^gQhre_B|2OLU>+7#B#r}F2x54v~#$(>v8Mout zH~v4awR_s%h)D+>RVPOq1$V8R{59*q^tiou@7#Yl>GbK-rY$e8uZ~_{R{mDz!Mkji zcdxH5KmPCflCLjzzyBvBGZ1_{3}}j;LS4s&q;bpzvn&Oe~_>GUi{zc zBY%|FyJqHpzHxTj-dloGtM%%`f9;uG=cHD9_j_Do#fjFVzossW`@c}Y`^nwkVY|c1 zS|?7Z>)ol|WHYa8)pg18Ut9Kdi`V|j4tf{p_y6<-=Sx#hzWDVn^_`@?di`df-<$qF zGP-GaSOOtDjB2rholv(Vgvg`tR*oB>U-yvewq8Ve8cwz1mw9v(ER){*zO6 zy_d&H*(_rEbu`vqec@GIVdq7;b!&eejI25-p{7~4w|4a%ZS{3a8yFZE|2occn4kr^ zAJMo?f1G!md;~e?tUuGZqNEO`S<^q(O=86gYML?J9;)e z>(jd(b?c57#jXo~b#>Jo`PIU|zW(~Muf;1%y>(@~%V`(!f9k8($L$Z(zS{LQ!9Q;I z`)84!R^N|=o__VQ{MOOBt?K$)1xt?{z4bJ%<7w1U&|OER=la=;s;25))6Cy#vZyv@ z_3rhWA~z>rEcXqq+Tyxe@7n4!PWz|5{XKhk*O$o9>n6Ka?~YmWF=fi@s!Q+py*`;A z`r=*aw~evKqPAzumHW5;NJ;jrNmaVxQ)aFdESNv-_kOq2Tg>+-DIS@<>(izDpRXUU ztW}fxI<-nCuv~rh-K*cc-~El)RA2vZ(|Yy9dEetYHCDT?zqU7JXRXlg)k{mm<8&^5 zi$CAFzC$B-TYW;hK=x0y{U1fsQf4il;-1iEXI!~5_UJENw_hqj_cPY&uZjJ9<U`k#LH`f8@1$24B?|9iUob7}U24qv_}o;hJtb?@xk5kKSHuW9Qd_C@Vnw`Z^U zucP-k@1Au=x%XL|#2Q!5Q@g_d zR*P;izqbDB^4;GwW}FXTU|_FyoaHFs#adJ)cI#KT_I15yTVLHxn)FZo_5c4T|Ng&! zYE9YN|Je`!{o38}^<{0U>2<-+A~W)Xu3!0jtXyIFUr}wFy{o6cI&!b$|0Mr^+&MWs zKd0TgA9r!{<)39s_V1l?Z1ozuxWX@823H*4t4(RzXOi4t{Y!Q_Z~3Qy_cM;`tT@^C z(Q5MRUt3?e?Adzs{;HDG|6jO=Tr!>?ws)D%I{wss7vuI;f6cqTdQyAc`?{$TOp;=g%H+&5?LXGuNt=SyM9 zs^?Ed5zKIWi{)ok{fKYum7xo=SNpLb@`-t{TJ?;n)kUabH8nz7V*9TlUCH%_ZbwLf^8 zV0XT=yZD7dy6tgxXLi*tqY(F#zjQtQ)z#f~cE2sXf23}&-K&4Qe{oDMnowETeD|E; z+S1&)LO%Z|&Dr?v$W+HW)8<&57TsSZm{DhcBeUdF|LMOK&8$2tUO)J^V)dQUS?*__ zOgFtOd--3>9oHjOY%#At9Wi0wC3gPMln*^iT2vZl{rq{R_iam5sdKLWi3k6l+D~!U zb}IcfZQ93E6Q`@M6RNs?X64?F^+C-a)24R6`ns2EQKn&p-H+-uPk+6Z^W$45%lXRS z_!j=9>-Jp|&62c!{q>u`pFa{)1*+?hO_u+$(em5(`>Xc(ui7$2z&|l``olL{>YjE@ zRuN`jDxY%cQhj>0qp{j)o0LnrXW((B)vvOm1Nj%iN6qC&mDeoy&VZ{PjFEBa2c3UBa@D|cHe zR7$*dF|Mr)?b~#Fc3zv1?2M_4C6-)Ow%YvdU}b&n!JK20)!x@^ebIk(SKWm*|2Ll6 zv-<56%VeYZiL0yEA6|R@?&z_p+I@S|M<);sVkPhwsN=|8pE_|t$zGBwJicV zeDAh=me*ajTW)E1&pgS8O?{_#eqJImS@f9J*XjFq1&b=)XI#r~|4m@UyqEv2=Bq6K zAAQ%;KDO}1%e(LX&OVsNwL0Hc<6rRO$!p8o4Axc~>xlnwI9RcD<3xD1{{%QYv{pD}FblA^>rh6(CyFD;y&ig&>>#8|E+>hjBk#boS6fYU^x=S|579Ra>axC1+kQ}X)nneKiq$FpW&5IS9@os*n?KFKj{oiJ z&Hjw{>(|$H-~J^(S8+x*Q~ZwU|9)FPXR(WnR^R`!>6Eg>58llE(^~E9+4eQ1Y?F5A z{kc#5Y=o+APKs;y&Dxhiny!7n*H%P7Roq_l@7hhN)Bn}_uU^{p{`{)<*55h09MAqq z>Etb)TYEjyM)!|VK`R@BA;dLDEt9LJb9+;%hHl4F0nOb(+5X$d;N_U>`(MbE=>Fu{nOyeFYDC5 zHB-tb<==KaJZJa*@>id|e|?SHTP>1(`E<|w7@6Xy<-$!COb{*=}p`f8Kk4^7R;Ry{H=RkNVT6@0ubozq~8^tLv+;FV{@Z{>hW% zUHkKf>X&KXX0ES4pZ;!Q&+5H_)jw@zw%W#j>S@lLzO-P(h47tBh_q6Y? zN2{;Ludmx1ufND^t!%&FLe8kln*HiqjCUVkU|@aiILmQD7Hd&(veMsQJ8ylh+M2ib z>#wi3zP{QUxAxc8s4tUN*OtWpi4WTTf4%4FxVr&jHZ z+Z#M#O4}o?cTx?Kw)Nuw|Ht>4WUsw5=hNLCVODo?&PQuDzLHnd@10p$sK2!MLs%SV zUHykwY)e~z?q6KvcJ}_;S68DphJV&CTl?#KrOp5MS65%(A2xaEOTBNa{|2<3d>j8& z+46pEli<#kJ67H?=H2dn=xdn4m(T&uqN-+J9W?fSa-Yg)U$7OkKB zv*h33tL?$FHvD_}Kh*Q6r@PB9oBdJK-e2AQI$S>L*Zog<>-Ju~d-dL{Q#<;@Rp;$Z zeg8`@_Fo{g?!EpbgStKP>spgOKb+%wO}l!v{Gx;E!eQ&e{{Q3fw6WP=CT+gI-0tw)$(;&f2WK)zj}B*~;+m>B1=9d*><~k5&KLee;6e*7Dn$)APdrw9Xf> z+gtR19Vbt=RQBuYn=2H*?c4V0?WbS1;p<|(ZNt05Z@qey((-%a`d7C^yx7iskP+It z7?qO?{=MJer1HP&ZFYT2{`7hLwiR9)PvdxarE3*_ z&XP)9Fva}+zXIp;%{AwP4sUW2uzzw^|DfmN$|^R#&ElUqxaLH~9ya)S>B|&D`>FQ- z`BKu{l(tw+)ra+j<*4}^R zn73Zw?vK0DXJ6aBW|8&#eXlP5{#vbm|A(I7${_hozB_6=mNXncdg!sMX=m%EsSkb_ zHS9YgsB*vVca49@$@zC2bG2S=sw;~v>w2`Pz{`E-3DphBl|TM8fBZGU{F%(m9k$1W zt-K#~qP zqLrIXFYa((aA@NVql+IWTEFysxqL^$(FwnlIc9IoUh{9q-SbbreLpHQV~<+WyuY_Q z8o%tj_(kB(e7o;2_N`v>|JAeJFL}-y`}U-tgxpXT$ksJ`ymt;TtB3*67=SXWIbw%r^s{EQ)E}i;& z`kR)6-=rp0OKku2X zSQKSl`*q1J{U)dM^vOT{>K*HUX?xc%dU`MZ|8JKo zlRp2nc_NViD*WWfCDp&y#LoD=FYfor-~U6)f9iY>->%;BZRJws{Vnfa{i)@={R?tY`bj!9!4FJ86LviqKA%6xR3xE_UAb0 zura_ul)I%xsrc=_&2@VohDsjtS{WFu>8pC&d1~$1kL%Cdv){ij4p+LSADf4CNVUYNj>V8-j}cAjB}&%x`C z&Cl=sjfnUCs=K#A%JPx!yLXHl$2=y4xY_Ti`hIiASNV3^JFowiu8^pPEM*7Arw<8*i5|EbqHmx`QNC|t8=1y8`M#kub;K4)$KdtQu-wUsepMnr*_ zNL$lEhYb^aRD}7M4o$RhQBmf4T6pj0qt}yOsm$aINr((&)lSl%ub8*)gO$+|q5skwKi&D}uW`GBCp|6y z?Y?#AZ~cBJU3Yo6zxs^AkL%tZePI}W{4+~psfx?{SEhFMs}=hF*F4zUpto*PhkWl6 z!(YpufCiK&=%{eBG97GK5D_3I#o7!x@)k6*+^}JRpOQe!q327Jug|^r)Hmm~R#$elr{;f~Q^T&v!;+}!x>(!6bn*(@Gg}Ql-DSwIa_iydIo{pBFznxX561}; zyYKz9(Y-Ngq3%iTig=3!1{=cP>a4QS_|$j+v|3L`L|KgKNWq2xKQS)WwuKHGLNp|V zS&uAKwA8F?(C~RkJhEVubQ(gn@?qOJbs;d@Xe{VQ?tan z1@^zBz!PJe!XGw0#{2e;Mawzbt3#a}8g|5<+JU;Tyvu`}Px zJPn?!Zv1>I^!Cl=PnO1+zG609eADQcMM!P*cE?LGulDc%y!ZFJ*t*G1*7rC0G++Ot z|Mhjh-u>u}zmEU-<-axb$NUXTju;>C<$7=Bcu?}xVVNHx_xipu_dRSC(Nl`i{r2vl z{*|%^!6gN|Z0nd`!96m<}~;$S@F7Vr^wiQi#wH@;RUXTIh&))q&Z^7-qJ9SxI;Km8U7b!t&MEnAb` z=B2b$_{yW-g}f)y_7o{R6R zu{wFSrs!JX?9;Akn^Mndy($q-f4yJtg-vl&M)Y2<%X=E)Yv-FRD4Dszrel8Q=U>aG zzG(H6>IxD4{&iv&kEWyJbkAdX@8{brFTbbt^L6!!4YTu_bEf@!nJ%5)|7i6RDZYP( z2k*Vq<_ipwz0)^q|5Ve!6;E$Asw!Uo_Urso28Laec1Hd-lv%%Xe&ErU@r}Y3pS83R!ca^poESeP-Q#8`p?;tE@Q84chiDG9gDqScvIh!-(-N(1 z4+9psw2Aj-&p!9_^X_$ZyH{_y7vtl5YWfmCwt3fW9-3tMRYkvhEPwI05ce;u%NMMY z9mL*G_S)kWu>odA$GmcE1O&**l*z z&8s+E_jc*+6$--B{oXn?aG&7cxTSn&Ls-ndyK%4Q_n$Z<_@>f8{LhZ4@1mEFPgo{j zTy!Ahy4&xs$2RX=emHbu*`{aTAIfDYzx(*2xq*R!b&KOHCxI@|);&*;HRYey*7*DE z4ZmOdI`;0rO>e84Cm)*oM&N~p!jF6Br(eIalz;iUb?bM3&3dx>U3+CC07k z@~n5yTN55|pgp5khcD}BhGN*>`&x&8YW=;g-?4gq#QyNBd+u?hF8AxM`Tly{)k(#7 z>zYclW6w^1wWjFqlkfjoRC>29c$72G^qbJ)DesF`1m9h>H#RWL`pdrcHSVRww>I9` z7i+oqpwZ4}uYY}i-#Kx<`u#7$HCmUArk(F@xhFk-r)8?PBy&Tr=1K4O*Ir02y3-J`v1ZH1_(L;8 z0v78^n9V=VGdV_MdxcZ$G^cYrXEx28)O5O7sC@nWLo@Yu>R-&OS{twQ<=)!+X=*|n zs=40PM*Hu5{bBCBZ_nd*lq{Bue7TF$`~5nLtGcK6zp7cBcI)x~BNEd(((}%=sIRK} zKg;Cy&-!?lu+)aK*se`~e?EA(`t|D7?)6uHP2(3{GrwcU?AjEyB`0lf{Si2LBmL`- znBK(N%U7FQEjF}XlXjV$WdCDVgY(q|8y>0L^Y8zt6lovAtNV}ht?7v`xe}#6)C^gl zC2er2BEthe@Q*_wk}To zrM>E)WRqFZc_R+(DNP;AT=hSRZT+EVxnjD%H(zn|?l*CEzv{k~XIb2Cn`9x7vWLl= z=e7Qx?O!6ow&lqP^Bry6z5ehS=`XvQKJj(0|K#@mhHGNC^PTVEzwD#FFU^zs9CbD_ zu?YxSps<#oR;mOgBGWuFoMU+wOTtMPN14Enh*hI;L7={x&p z=l$9z|K99V4vCxoH*D3Hr&B^EJo)th^w)pu-z+V=zxn#dg&z`g)T>_JPggoNp?>AZ z$a<-z_wK)$bol+IqFJ%C>`p$ZyE{Ade%6;wMbq6RGIxK!eS7=%$tO>~SbOW#UeW6{ z%X@AY$G_j&z1Q_?PV&vYQBQt-`qA%nf6u)6U)IdK(K6{nt5-lSFf#}mZzGuJIwlx?Wf=!JCx`%j@#c+mCo z(xTX?hxcE{y*hr@zIDX}X~PR4c0qUjLi8W$X*qvU*0h9w+b3|67?R>2`2VQ@-naJ^zTEY0st~ov^>RS2p26`VV!r1IKdYitA*AE%Q49C`MiF+SC6}jo+R#;rq{xT)%dGIM){Cvd@xh$&UJ}>FYHw zmsvhvr?j{FYv#${{|tf!>p6_4y$iGOd8(UzGsVo=`8|`_UTNXl$Me3{*FO35w(67U z#uIbp4)U+^%l)-$$LyH>uCKqo{+ji*>rLAxqmp+Y!bN}n`}Eze!upMR_1nGGpDr%j z;1hdaSI7Kj&z=9fH&0F5_JaSL#OJ`KS@A1>vY#_3xN?5eN$pS3uCL!l@4vqEYx>%% zZL5D9?szF&+qU#?;+Ng~-|VS5HvRhZbx->*Kh=zI&Cj^?Mo8HHwfg+fRK2H7`wqE3 z-*jP_ZJf$Ai^2=7(Hp)mzZjFZxpVUMC6Dxevna)WGktx(_WS)r!>F4|`*Xg8-{d{} z{eD%|<-fDcGUhyHPrX#TPiu`icYydNVW(TiGPIl}+w+D0U!Q0DI`;dm$K|^pt55p< zM(|9|me^LS$-0)ddwo}{o9uoQnD_48-)X@Ds~UcW9TiyP|2V_?)-+DGyD!5ocKko# z8R)KVrqox*Y>yZ_qn_m`+Y5q;WA{xAM|XWx>neZLjDHotL9 z`+ND#tn8ba`}h6+qpxD5GJ8Lpon(HXnW+Ef%DSUBzIZL3R9e3&)Ba@juXRhezPfV! zuB_Mp)pu71&KjX)=jB)9 z>&3gmu3Hr}P0y-wuvuF%xfFaE`sled1;jB#HV zzb-Ior@^r=(}OOWW__yKXumF~+R$xv-Jidi;fqe+nDw9c@9M8FO~YqjU-duk-JeMI zV_xh5FD|TGefDJUdKT66-sybnFK$@9-u3m>s<=qos11F4#e=Hm>Yvc6aqwkf{%9s~ zoIN2W@ckFvWjkx5_sYb~T2zw0DRr_^rI*k5vRBjgCjG76yK9$P+4Ol!kNp1;KSzM| zl7%**-Vc7VHoA zd@=Qr-I3F}=fg!y;_pA%;>PrTr{(UyV%qw@{!ivry>KPp@O#PpN&iK1YM*l^Ys5dB z5Z04ttA2gaTk+*P=DS=k`lTXdeLmi2x5JX}e=qFWUZw6@Wp{du@v7@lb$_eg23@LC zJ{4neZl~S<*LPKR?|UmAcK_YCiCORO{fRh}eoOmE{tcDqXT)E4?-x^2uPiQ#y;~)I z^>yg0PgMu&_WtDMo3Z@#>aP`P#ntQkf?wZ_|M{vj`^nd-@2i{MT-*8C?CFBv-KX!JerNyF z_w#qm{#zPu9>03Zx+PyT*R2C><6)L`oaH>>6>I6yKXVtF-mm|D|L@=bzm9(W|Nr}~ z>#xQCypCU6fB*mQ@7M47umAr)eEpjJ*LFwk-IupCN;MZN0MoP0*F;CEcM>*UxNySrdNt*WPz~r~gwoegA8F!mgK*d*?^hw0M_i ztTC%PX7c=6@Ur-D&s%$AtG3rGeSdwg<*MngtuNyRQ~&?+pX~Mf{f_RiRlCDhg|2=U zwR(NjmoN&6XuEXD|F+W#3MQ;n*czq+|&o3oPkL&i{ zzrW~e?ETs!Uy_u|R_w2z@KUVDZtn??A19ywuT8l3_uES6gqG#{UL|j<_O$i9czHSh z-7Wi{x|ym!kF0-eZWJ4Q_22rdDxZEV3`*;1WQua}%sLeu7<)d=`sUrgi%RBfs`~P$ z?Xebed+${{X)8LUmCgYJ^xK1EXI~^@xR<%`~GE?Ec;>>6s3IjMc@6s zQKwG-y&Jzabl!fx&ChQs^ZtoH;{W19)YtWz`Fh&>lXOG+-}#F_b`w7TOvR>X-}6fv zSLE!?&$^`CTd%rMJ$swwYt`%1S808J{&c-%NJU@$-=zEZQ^c1&`{0v3wf9Aucl5WJ z>lXc7_xr8r?zdTc#h2{AzCVoX$rt7IXQHhB7VW(n1X*P?87r;2_OU0fvUfgdy6@qQ z|1sq?e^j~mg;!*JyrNUsUmE@E{<_cadusG;g>~L>-?^@Tb4kM4cMn>te07=`zU$9o zKEL(t&EWP096CA=gFAok_&D`t&gVyazl+V-CVKeNyZ&2M{&z0^m1jL{m=}99=tl$F zMdk@P|L3KgnrnVx&dccz0%yWB@7{c8vGePnM|GEe-cU5Ys4l>@+k4ZysL7X4KKyzy zu9|u61JU)-cp9|v!`K0{3Bg5K$!%RL{lzq?-_vHL~alxAPmIUFzMuy4BF+<0GsfA!Q4TNghT zI{rf{>;3EB=j1%C(|qxzJ|+2Gt#D<>6~)_+(jT6--G1Uu zzyF4~KNDEC#1-3oPF|J!XMysaUl+n-9S<;SiA?Ykk!EE&Xt3dpftXP5p@Rh*0<=^( zTb)2_UeyG(tgqay{+)mO^SqN$Gb8mw&pyrbT&cH1W_Re%9rq2NJwF`ZU;gacZ8LMZ z+WV_)b2{a9wtby)WnB;Nf=)Y*S0D9CziDrLw`ty<+YSGfg;cBFa zYrm5N9p&P=S#Juz=)J74>zCutRrTK|R2~y7TEBnwxt=78^*_v}vOYaAse^GrbmQ40 zrKX#-bjs!4|GIy@-uU}o_y6bSzgzqD=InU)&2qnAO;Y(`a`oT+RaNPSGG!mkcxh^; z(qvRid>QCKcY=axH1jUEdhLwk;2=`5K(A_xm<-eIil!|=-TrhFSAL4ES@3Yp zf9vA7k25x}irW+yQ>^<&X8rDog16p0K6TvkPv1L%>pyJoo}YW?@B6gkwa0_fTCYZ) zRBM!2Z?Jm->x|9?lB>4wn0RvCCxgO=vlWW}`nfDvWpZ9^{$qpAaBUrhMRy-R;Xila z0i&A8gf>v59^SAaM8i~=)#;$aiU2nitrn+;g7aj`!{2|sJ@;#A`I={2cCAX~GYxxD zUaxX@yZwd2v-cgAFJC7+U3R%ZvR+g_}*21 zF`M`;b&W6elkOF$B5=ifZOVpA0H@ zuzZ#H>GtN$ddAQ%R+lTzU)j6pU{(Ik`d@dyy!I`$`&pjw^Jl|4eu?-;%%^1-1zL@p zcW+=(ys}fvwNh?3BWONgLXIdG$DxM_2VxAQxFtc!G(^LdTaf9Yzz&ym%j?E9zbp2Y zA9__?JXK|3mqmAh#eRu<72?IQ8S;shwU)0%1#pzj4Sq~iJz`5 zXRzDq!1I6qe$F4c@l_{G0|M*VF8F%ijyf1OF|{pe(!J|De>V4SW^Q0q5t*VPD$L4s z7<7rF6t^JL!9s%=9TBNcmO}+AT(ktGkDj~xyZV*i=k8V2`C6~E(u{+{3Kx8@_pa49 zyZb5Q&Q|sb zuSHGtOXZed`xaj~Rd{szi#YcCcDK5xF#Wi#GVLZyZ`G>oYP}PA?+;)13Si&lz@*Fb z@#4N;CMzzPezn%%`n4_N_PM9~Ty;O)`XHKlZ6?R{soI5c6WJLU*bh3+auMWWEn2?* z!oIgrYpdRGJ@sP!zW;H5tEWGG8&UuN|NE`UTMPeR-Tk*(a{8XRdf}zDYs0C*m>kuOhZze)8eh^?2d+>vO&b_MHAdd!1Ul-*cBZ^{?+cpT4dC^{ZXW zXWw*9tuX7b((+%g_w9?`;4QD@P{U!``0viU{PpQCdh9rgnr=>XOA-0MTDB>B%flt- zjFTIF|BGPT^Xc>dwB7HncDm~R(iSbhbv6Fh*Qzh~))|HUcb@MbY5Vuh;eytazpp=N zFUwzl-&Lvl)zpn528}Xvw`-txi<7bO!76qt1 zyt9A9eb4GoOEXON$9-pY^BXItuycKzw=Svp!ttUGEZQKr$g3-my7@H{mm2e zq*6b=U2^%AA9>4`IsNwV$#RZeu(Culjs17Ug!~7c-0!8|?{8aFE|-zgJ?s6Z2~YPj z%V&M^@?QT_fAW{`qqSG#*H^Czl=vI``p%QcNwefnekuKtb*|;&S&bvtTO#)Nf8#tM z^l_G=9zkQ+btG@r`^Ye+u@4nZsnfL#Uq|9lkfZ=Mz8pNK1KEQhsV~-U+UXN_{#sEV|?KCE#7<2;xm4J zp1%97{66ok>67nl&8yl}``2P2Px9h}9|T_?`20&_`pMt1j~AIQ{C)IK5gY)xJK%lE6f?(=+AI2h!X;kohC z|1y`Fc+FLY;g!PE&P{wd$Nb?AInh72t{-kc{jt97gWA$FX=|i*Jztx(%Js^T53j#- z)rQCk?7Cr@vD)bM$Hm7QZ`W3&$rn|xZ2jc? zPpaX2(SdJsHh1VLtgx2P{-oZi@b29_ujYhxFs8jD34D6z2ZVy&_PNnXRL^ zt%G~U#NsGn*Za#Cbnf`^@V2FxfMnr-NZ{qw< zvI?im>f5qg&nZ>E__MES-_)BtJi#ukn|}CZRC~@)J^AD7L!)G!EjC{!-+N#5IJ`N> z`+c=Q;MUd!>oW?=dzJ|sd#}(F|DUKS@^epw!6!CmDM7i7Vr{c*CQXsQBXRG6c_kL59YWfIjW2Al%Dy!`oX%H`_n6`-4|%qb}j5R ziEx~eupvBs-oDFQZ@zx4ANt;Y^6x)imbdssKmGG?_mP}*(;w$^+xGQ^es1Z0@#@3W zw|6GWcQ5;)CiH#r-c#`tLT09_KMmdSmdWApnw>5up8sW%TEACL>(0M}=X>OjyeQw< zX(rE-u5A82x#ynRk(!pJi;q4_KK7?JI%ek@G0Cuxf!>pMR>$nxmG|WQ^GC0Ky#MIL z&iY)?rb5)tW6uroFQ@Dr~e%XVLSu=8$ZZG(IcgdbhhlR~6x3UyYe}DVIgjrjjKW00> zr({R<{qmZH)l2iYF(5q8~wQT_9ll{~xe9PRRAzWZ(7`6)_O#-mR&> zs@PfF-@88=!c|$<--Q};<_nxkQeRuWr(p&pq>;3*CD0%;V z@yt4zH+$=sADQx9Z|ViJrT^l??Kb_t|8?@a-RI-aI`p%eechX>;T^gA{C{y)qxARn zlX^`nwy%8{;B3f#FLs~di+DZnZ+`FdCzr1Y65qY!W=oRPnGce)( zKI!*)Z(NeX-*Hs$mbA?;f6jUD=lA;Xm*1m*UEh26_t`R`&hPW)t&My4J$$nD`iOng zmwa*Gy-shzB-c0dnmE^WirU98FtDz0oaHi6i?#fc&?TO43+0b$pL{)K^7mWc+ikt4 zXW#$)|Ne`O)nC6~j7eKpkpAn+)Vj_~A-g91QvSN?bjYH`&UMz$>|2Ze$p6S&JNf(n z<~84SH1acY{@>cDd!?m3X{vhp+0%Eqe($^?n&#H5Ed3VuT{rLZMb+@Hg|Es>9d~UyUjJE5ky~3+{-UZeE z|F>R!RefkveB9*g`&6U9zPr9BeD|Dxf3(j1U0m_x)y1dhqrY@tu`KP1`=j|YZGY!< z@6#WDf4y4ab7h^)_5Yl1?df;wf3duBub;lZrItU|bK2Gtul?ELXVyzz+JE+>-geu% z?&%p5Lpw~lLeDI={i=NRZsM%(|9pS#xt{uS|D@mVwz9N5J0`jQScJ2gfpkVx$ijtp znR{DUL)ogYaAe$WIHRD(He<>`$>QjZSAR?v<}5coxb<$Jr0xA*`BzI%hV{LA8@F`r zwIy5YuI-MNUHvnC-Nfy$H@VJKepc+$`$EraqW00a?sAFfuL5E1->-@LsD69+@=wP9 zLbl8~TaM1Y|6t#w;7O7h5!bIDmN5TmzxT!5d;U|Um+Lk!j&9a43Wx|u&swo%L2J$H zq6vA&>%2Wu*7h98nWdor@_K96qwxDS8|Ef_-u~(DKW^T3 zt$PW3MT%_C?6Xdrb&v5p&%Av<7r*-XSWT-+XN}!&mPc>*em8EN72j$4?%Yqi>UDeO z#r)jHdu3{!A; z&B@UB|LJ6&r0jnJg2z{;ZVEeX_&xZ?LdBjK#wy9;)yrJ2<#Xmo{L+sX)OezHWzFBX zs(-rwlKrDE`k((1al;4eT&g8VOe|k%;US1~UmEDys(gvp%eP6$Qhk;NF^ZQLc z%i4c9Y}XY1Y5Vb$`12-5z3<&SdP7eBy6(C6X888{wb}c>SGpECojoKXd4#oL($lwU z`7{1?#s58d@&41^Y4vOWpY^Eey!3lfZT;ku*JonhJ6{nzw@5TyPUz>ZHD*`YW<9K8 zQMu_>S(nA-lc?{$_SYs8%lCojKluDESo!tbA4%bB{g?i1*vX>0^}4U+#?w9G9|AAx zeA+zq@ay}lS)W+Ht-ZMa^Ys(cZm!MVy8eSxRQ~&ddEfGPy4c(HKVJQ(S)y>$Kwq_=!K)C%uf%`c(D(n!o+OSnZeDtJhEZa;;_SkH#6Vw$1n{?0x-{ z?3Z`1H{E-aeRkQDc_$JdHO+rr_5N+*2bx_?iM_kVCLzw_0l_}Z#H_nwMgd+}>hNXY^h7pMJ;4X^&P5xN_7@O)Uu zo&W2?85mef9A~)-da;&j3E#cG`rp=D|6c76*YDhA{=X}3^7Xyp{8!Ik*T25xYts6! z)rYLER{ixZTeEK3(od7c>ipKn{T1KUJhM}zdB@koQf4|cUM6O06td6%^;gzNb!W^$E>ulM@u~FLHACS|6FUE`P;=#DwFK@og$eQAMVv z{Ig$wxKp~iv#R&>^3Wxjg86MJ`g-iep=M{2rConW{n;S(Puptmf89kt>(b+Ms z>DI6Oz4PRA*MgrU*3z>lR?mu$ zbd8#`uiWLH z8mfA-^wrb2>1ULta73ND{pj_ZCY!x>YZTqr-mBktT+Sr^>sb-`8%EEL`1>_@8o0>c zXx2QLA=CUq_T<<1(<=Xdi#VA2_uJF6YZscHQ?`87Kb< zo^?3++g!1$dbeobGQBvpz5KP;J8t$)E6sVo!S&fZ*{%f@zk?3HH|KSB*7;T*)_%7B z&-3_x-aDsX%)RyX*7xi{vCiB3H*D3MbYf45&!NI+b2@gLJPSMU{g!y7{px?Glw!XB zP}=*>&Tq}X`l2PezrWP%ww(U)(a*2H-~C&9>*K%QKPP6ZpZ|4A>C6AbVC8wvF6@bX z3rm%Q7t2W4U;Tc+(x&#;*29|wt!kSLpPurxTEG9o8GF}LS{MCe*O$M_zWZJE>V$xg z{p+2o{zm-#73j>d%vEo$)xLuYJKr44(^7x_)5lG4(;q?3c{cHjEcEoBU*2d^HtBba z^>_Eg4{qtTeXm}fau{|_hm+OFBZBvxeJezouKww}J9D$=i(zwftg^foi~`tsWEahdk({GaZAv-IMe zeRpkbU)E3G^;x|9*3&coul}o^zZ<_d%hBJ@-#y{aB=0}sZ+rgE+`XDdAS1~Cf5tp( zKHoVjkH>2@so$UDJt?|=zsu>e-*ObJ`@&gY-=?%#?jr{7l9OnHb`nC#lSuWEY9pYtzeRsMBPHGW&9^|Z3?h4ata zdHb4je!u^G{e|c3x^=%&tB+JqnD%}3_oq+4*G6jG)wH+&Uwb!7=wWfdnjfpeK0iC6 z^|fnKe6;DWJxx=8IIBHToxQ&-W%9r1{R|8oxsJ13CuXr06~>B}D%#fV4fWl6J#1t6 z`q0?V(*IveKi=neO^>rmd)K4gyLYb+R=KJ6p?z<@`}zleu4{dNFe&hP!UUVW@2>9C ztrmT}-9me*QQ6)Qbw|C!Z+~j*Uw=LQY?NSlxMqFz`S{-7*M9UQzMo*1w*0l0jnL`T zofq_XOW*d;H#nE!K0i9t>TG>{lkTzT+buftOaDLoZBdZ6t0bf=zLsn5Z4dcgq4dCK z{6Dqq!tcNOZytWMB)#Uy_p&wX<3LxpCH?3={a0H5T$)|1-s}8Z&js|7!&dL#_H1JL z+r_V+tao|OVZF7q>i45M;h(GC1lq56c{4rg>&;g&+^PSA?v|fCUEZbsXn)*_@@F%P zi(bt0`ksAnmgVIY&4!=UKbGr!{MP^T_s@5;_R8*$n-Csz>gwzGwK2W-v-Muj`nhNS znH-DL7X8_$-`vao_GgvokH6vP!}?0rOr}kV?|6cBJ z#q?WZW%uoXsQ#&z3hPdDY%Oo@x#5&@+2Y}J=FTkFiuoZge;til8*chhcA*-p@OS-; z2}G4qWNj(Bee&)7>vbG|T0#W_!krg+|MKzZ z`#tqtNKo9TTFcNI^~rl1PJ1~wd|l5fo9pg={p8184~wO?X&oqEZX?;q{j=ccj~6zq zr~aPZQU0mCY~$s9%MuOLMKX`=KeyZE$HzVf4mruWUBzR~` zn%(F45Nk8<=DmkTE_U{ZPnOt4xLpYI>VKTf#r$}m{ZISS#iz{`1((R%Y~hz#YRBy_ z@%@lksYwpU>HH)^yP28^2X9S#Cv)=26VV0FmKXmQa4$Z%{;b|rd#?@g*PBix^xZzz z@nKzU)GH&ix_GUu57%raFUTtC*{t%vt?%I$TU)01jqDX)PyYV0RCz(*^C$DpE%oA2 zO-`G!<`J8znUfj!+5JyGDK9?~yChZOXl-p-hSQ`;IWDkd^c1?sHkVEc=~z8k6`g=YNs4e_wgO?61nZqwQ;Kzue7x zap3jSy!ER-3SDe^yzidJ)mM_!xm#mbpEQ45RX^ur?Yf?r_g}j971$Bj6ADmIK^Y@Au)MWdd_w8NK6a}6Xv4nWbDK#ENCYJyHhU@+2 znmRjVYbgJY{l8;&+ty1@yHd|}Fs?RO{m;QyOBb>I7Z239ytHQhPy4FcNjHpQ;w}Ez zP33HnNw~J)!omi}zbP}F{k;Dt^1jNdnfXWi+2yGjpEK8_8~4Q4>+jy>u=uRQC&M@? z{l7oo*sfT-zP`Zy@xJ#L=6$-_xw3lq)Abs?_upqt`hSs+ck4%%I%z9Dfd@}#e{i^T zZvV1%oUHq9CD*6R>R9 zwr*$l9X$QFBI1sLq4-*bgE7YOfA+;zsEf(`X{y@Ca=ZQFpZ%6oO@bVrIZWJtZsTR~ zck9IB#qXw^S#(}8_HEFTsCviXm$zBW>$j}`5z~>mif86i9pS&}r6=qC+g1uVlpf=i zv3L3U>-<%Z^ZqPe*I#)@is*~BJ@J(}Q~o1l16S!Y6JeXfb_=fF`Txgvr|s!^`*xpv zc}sB4DYv9kAzrSzO?mLHM_2R#u59qd6j_UrUzV4^Oet8yle>X$T3e}vr zV@rS4s9uSg`BwcmU&u|)l56{AJl058^L~-cm^w%KV9^I5Puc66rv$0_T>Sr3>8$?Y z+R!sw7Js+D;giXn|K!$&2`4H&Ypx}(@4Q|VQo8xSPB~X(^{2?To7bOym|v5hCcOLV zHK8fCq0^qU-Q2%w<+j?lS01s+SFV5iv&er>lPUM}FIEX1sqE_vRCLAvw>@TVQ*z;B zcv--6E&0&q|C7I09X6cSzxkiyf}Cl|?|+`!Kh+`2zdl^#`uDxsyiq-_5x){2>d)jp zvAZ_+iR!ICF;{+he+bcw-EDOEQr_N3t^MonIw^7AI5oNT{kr1jeYWerW}WJ2_HGv(3ww)CPaVTt$2R=Z-eD_89{d!R=vNQ^knt5-HIE$@^e3Tn&;p9 z^3&X3{k!Y5?n~4EZB9S_dd<{p{w1&0{5#7M+^7Cuz(>AbPT|cAkJBGtU%4La*;#wi z;rUm)NB8#5yZrm=jK1GL)%;JG-oJFEbIsJtg4g~%5v~5t5`5%XW%k!({ol{mFXD{; z^xyV)^M9Y##aAS?-|bsy|MHXgdV`IzQ4H3^?uY9(1sZ|5uw&r#)KUaL1IFzO$!qpLUcr=S)G~^ z7DVWYa!h&l^Yz=WpFh6Jw^^U-7pUeF7Pse}TD$1||K`U^(>v=ef{VEhJXC(WsRmX2%TYgA9;JZQ$xi(|EpQW=QR#myNPZs zlwEi=>C%=p5l@;pPS15b0Gjl@AjaL=%;<0ev_puqr7>W^6&=utYzG4^T>3>#=l#il zeD&FXIjwhFPX&DoR{fou*LHNpzvS<}hCd#@nB2YN-wKNdZ~i;}(2dVH(73PoVTI(y zf=^!#9S(o!6BfVPW2?;ala&>|SN0!gUO%60)~2i#9LvAIie$cU_gKKp*M0ICQhfEV znrjd5+`|8D^=ZF5>4_Kj`ui};#P#b%h=?D3VsPJpz2uqB_k}Bt>hU>FXzre9*TlXo z)Bb1S`B%I7uLUf)w%z+d?A5S_YggD9KdpTEX#(H6_ve;$oSNk_sp2E+-KBby4r=%F zX||>)Yk%%rPyq6=jtXaMOJl==4A8WGM`MyjfQ~5WsKg|VIZEyZ`?tP-|9)L|!!CVY zpQ*it)59UwpZ@X}R{M z&iS#oq#su;zB6^tq9>1U&S0tAe{-^R#fK?JKA!t(IzM?*`oiok&5GQkK0RlYa!m#4oj5PD0xe|w7+ZP{}WGYGQ znwfFM(9>A9S~q_m4`h}O%?~tY-fu@K5le&|G_ub#H@7?CJ*FWU1_f9eVw`fXb+N^2I|6W;F zS+}q4kZ9E5oUflQw=6u@k$wNI%6X}VnavL$-+uV^hgJelj@O@`Q43u5-9Iwv%+7@= zuOl~Zy0^;KC@eqZ0OTqL5pI@4hYLL-45UO@TN)Eqr0A${buuk<+%P4{ef6ndf4837 zUKx|`dd*|{)`01S7SgjHFSgMC*gog>_mJ;*pWcrC(=9(|)1Qe48ou8-KYM0g(f${5 z0{3qQ-U8>B|RBKtQix=-VOnN20W0k)0;+ob+rVr}7WPNQ9HkFq3xK*xS;-#&~6P3K& zsL@x|^oZV>I}Y|raqnKfde|PkkJ+x{){4)KN;CiNoszLzr}gR7RQvP_uMKImkvDkN0 zLyLSv9uz*ge6D=2^{##Eck!1$I5PR>zlBbluRYW1Ju&YdpStf~(o=GwT)V}sc4AprZcNj^;}uJ88Ldo-e6p|BfBCkx zes!yJ9J2OF&O5Sh!@Momt6mCE`O5iWoyO_{0sY&BANH|LSz@bQDR21W-~+LFHe6!C zELz?<5e|nn&($Bah)LK0I{tIQn%HlF^-#_02P+yD2Ux?a4fIVLRiSo24{ z#q4?2&V?s^cs-!7kR)OX9Js zr@j{D_`Q35-2RxO=eB;*p1-#K*TIYX*Vcc%|2lTH{Qvv4D@DS#W>1;w{aX8U_@{4= zYhqKiRAPdf>;xa~w#YpvbNBU+rzdK;UM_j!y6%6>>Qb+%I#cVm#%&c`we4fA@Qx2X zVivQuG@tK3S$Fib~CZFKgr8a<08!xAiX9ZRcHe>bt&n>Ay;idmH_=?nF*a z=bMY)_Ap%)$#1r@33vIc{ozvi^}r(G)BoP-{jBuzJ?6ExG<8Z-qnE**nU88rmL+u9 z`|Rr$nOc7S+*;kUuP;?S?Th|Ar^n)u{mXg#%;ta5ZZ(?yp0V}wsR-9q%oT~9X6O4) z@y-6!w9B>sKq}k2OU$PoXNIoy)GUwlmznOPkQc^TAl`9rzj)s=p@tuql2ul`)BTwq zw=3>{)ZgsgQ{(6Dt@>zu-75J0<3rCb=|#2hy;_~y%wcg{ev0A8$AYnni~HW+$Sa&$ z{(0R`d-2b&Iu_525yYIfK%@=-`-FI@=+J5dZ{1C2dklztxu%cAZbU~f6;D&0J z?ScizntxB1cY5^rP5Z8b#&?00w(=i3la(LIC(M&87P@R^{b~6j(Z-!=tJ=5oOv%+s zSoicx-0Ice!#};hzUZd_tAvW=l;`R+ z{eCmkvi*jmL2%;cyNhSXw_ksJJ@t)F_1&BKc~gEW-zw6w5$w36mOA<1$qxw*lVcUr zc!jRL)KNZ^SUAQ1MU46-Q@;I^4}DkJab$_!QS&J(WeHaIMStYZx;mxS^HBf0DBGWh z_DsKft&8`&-I8NrIG#W$A?XMx6Czl3l}L(Jowx8dBExTYf?5W(Vzc6 z|C0S#Pp1xiKeXcFb;^RKP|GVJBecwaBEB)Wdy2b3Yyo`w0mn?oiR*9=6IWG~l+jZ@F;6~APUl^6Tu#=eVC4lWRR`{Q@;e+}qvpW>w@kBIcZ%lY{iaC zM+=%PXkWH-*B<7?NA<2*Ar~L?eXrSB%{f){bGm!?X1Q;dU)A5Q68ic5k?ke!fTy~r z;^(v!z2Xbyp0|%9Dyi+<*S`x(d8WQ8vn%~(zbb0FzWUeVyG75}hwTpg8$UbT`pqNN zMKbamzO=Ni_%UJA25*~f(#DCC4DPchZt7vGUG}Qr3+Md4e~164n=QC5Bz17ZisIiz zHuLJA+)}IQS3mSObCH*N`BE(bw}j)_PqL3MpQGhF5VZEM(Mz3<5@vBSyqW0gkn>_wcSjhC{@#pI=c_vED%lP`IdhgdOp||u) zCf~194!qVg`I^AL_vtV5+uw2=F0@G4#bWd;HJW$b837-Ixwjg-jvP4D8P3|H@g!F0 zPW_kGIfbPREA!r6(=u(eWcK6XTi~Jl)?kzSh6jiBuOD}5w!XL0@XdzLg z;k}&nN#E(s(={ICAR6qr|`P?L%LgkT|TsI^D|Y>iuW%BUiug(xqE)Td(?iX{S&WEs7^31 zIOVV~?~{v5)&E-G;5q}9w{zKr6S@Ui*zX>%Z~1+j-{r|V7WSWA{!czd-Ddg4)Vyoj z-8XKVY6X9_E;Q`k^ZRdp$;98Ye$W3V*`K=4;(g2B>+>S2fAdP1OncIDbFY8kC4G-| z|J%)F1;jUOiR0Yzx&Fo$t{v8io2NUdepiqBALHb&*Kz5VpB-aaVcFW1mvebbSseGb zn|%>pKc_tN;X;w_$64;>|JU!Abo|Xy;{W|d))CX>HAVmbgm85~?iIh27k($v;AZOC znv^wHRBFoicfPGYqZA#t`~AKz&!${{9;lkwSEH+(yq@{cIlE>F9y=TRHQ_uLPT$bt z^=NH8elR5E!^c@WZL(hUm>zfHUlx$2=9FJ+cV)NwvAULuKsf6Pn+UPm4bCLmv;TmQT()hV@#yv_VW7LjXK>i z^RD>+6Ev0;co5&=RwlSmruo8!jurn`{Ve>b(6MCl$BJzp*Y`4~{)>q=)SC2*C4I-O zzy3$KIH#R`?|=R4szY|a{zukrt*nXU?V7HBQRwyL!=){;9P2`LUIAga!V6oN~McJm$#xeQ(v;!r$dCmAg8Q)C4``cKc=X&Hj?w$zS>Z ze%4-ov%LDr?|ZAS?|XM+_Wc*(Irh(4Qu=@IJo4mYNoW4vL*l>0{;w}DEwAfrnb!4- z$?k8^Hz#Ae3g4th_K|sii?&UFe|4s>_nm(X3>;4#XSq#W#adqdf7;GnRgbJ*y?gij z-THlhFKpAgt}(A}YWBWs`=kD5?Tz@cKl|zXuSs#I5BqHPth}~kt&r~Rq#r_i``67q z?jHaB&J@4zeeeE??GFE&weQFL>-?RQol^GduA9$w-1~n4%a0?Q_{#6C?A?;#%f8ut z&s5eY+w87;e*JY)@z2B956ogew)`(B;<7&`cj2L`oxlBw#&kKkxF;$P#ow>BvDW3E zH2tEDdG@!@+GSGpMSfy`avnF$nAmJC`1tIixrIQMqmp0K^5r(|zG%Dbjn z6{MQ$8=70UHg50PUz$gXl(V->2fnY}*Yi~TPuE|kMw}|{7cF*f~{k+A#;okqx6>D#`zrXnF<_a2Gx2L&Ae+49dJDJGzZ{aM*IF+8CHA$HiNDnLmuiU6^)+!V*X>`2zt;L% z^ls`673KNnSv4YQ&(7*Ty0)^XZPuslJ1gJJoSG`Nr*cii3Tv(_>L%&;FSs9^BE6V1 zinY3(e{p5KhtHLHyubPdjb^26O21=wWSiQI(}G`h{=VXt;F-Mm^w;++rmxz&-o1*S z>or64dy#OB*Z;3cd!xS|uA8A^i^_~+r`3GN7M}a{{^IVIynk6=v$nom^ljow&XQ`qqXEGiEqgcnoojj~tFu)} zM(0OT*O4dYpN_xgS=6~+vu3{SE6#mlkNP>Td7OWDSSj+}KKX4MxBZLrUwl%qz_CTD zY$}v;e|?SmuJzsZ!>KK^K7GF}@%??+ zbE{`7h5oB(U-J97GaxOXWyi$-oN~@bIoKLw#r7U_WIOIAciya8{o8L1g9WJ`*KK>e zK7CHetNs7Mdr?e4(YLQWr~lT@$yDC0eez-L3WX^9Gv5Z2fhY@WJw#nylR`o8DmGYbEI&Tqb`C+vReZ}r!e ztir#3Z~b2We{o5dH-GM#gW{~2k6IZX@c4&ctvht5NuNWZHjnLUsqJ~08_~=S^Gn#j zt6%Ir-PROvGOfggeTLbi`p^f5qS8;!SfH`C_w+^8>0c|~m`K_j|Le#x>2T`(#KXze z@lPMNUaZUcYWm9RzSrIA;Ie&LpW`F#zOA{Md^IBfj-_3$#V?+U-3A*DR6n^H_tm!Q zd)F8LT}OXy{Jb*g-Tqy#=c{j9GfiAD(Xr3Xj>985-OJc-e|b}+s^7vBi#P4!J9N6> zrvGDQvpx2@>|Z{&`E<=>iu#k_QFb)#ca8Sbd&*W@YUi`G={_5~`;Y;7`4Zmd`yeld8 z-%J({3CTV4%wNo&6wp5V_`3IpCdJLa*q!oim8!vwIXmMIZw^|0;xDU~yU_o~;W{yo zE$u=xPh2>1BWGz+!|VRnTQZmVxo`1%^`*HkLzk&%o|>er>wq~^Z&TeeD`qX zMG>YoPaiK$|Hfgn#{Ky5wXFUQ*WR0ozcJJ53gVjSzV`V4w$>%4Yuh6J@&5f*r1pn# zriJz4nKG+?@L#Ld(}{75*>zSmeEFx^*-IDiU$H3eO8m;4mc1|P)xSP}I4Mx;R84y7 z%N1Qgk8*PNNC-|e=Kb_JO>K(Z-pIQ9cOPxEXERC)oGdx{*7U>bk~aA-Ce@rV75Z&k z=kjaU{9C)jCmgsh;Bo5D&24P&{alP^wVgG5mD;}T$?j4;vqu71^<{c?7Duz+K3QNU zkWnHnJ;Oli>O;Ro#ePPu*IV{0|JQBd**;NuXZzjv)8-cnJ^Q>N({_Gqmq?OT+hd)* zbqtqyFFRZ*R&c8MFT62q??qenI^Psmv-e#Gj6xnGo)m08W% zBYv49efb>uu{M`B_y!6SR_=7z4^65`^fBOGQ>gl@;uWRpE|K4&?)9AvMyN~Yp zaev>veO-#Pim$;Xf1`@9{abXrcfVJZwBP=9?&Le)ufGuE=>NTEMpoLnE&pEqURkor z<4xK7S|`m3OZK095f-I?*Kex*>$@*68Jzf;_;pvi&5y$ur2b0OX3yF8kApg#|Fa7boOL;T7)7M{a+dZjETmRJgc%Lp=>+OgBDSrCV{&%f( z&YZr9YZ-F+Z*HQ%*PMimy~MPKh_wefGU7WxD2bf3L4QJgqBh``44- zU+wc`lG%CvWW?@w@tdmuUtPGNeDd@EU)E&k}rE|Eu!`A z^K-}ujJx0|*3yjly1jAh%gaqyi@uI~b$_L>{@<@wU;WirPk;3^Z{_5#uXgRbruXV! z)YexgANp;1EovzFc)j;$f4iC$XBRiDEUKQcQ>*ge@kvtig!WD4nb-YFWbeO?A$z}n z)V_XdXIRw1TN{5}*|#=s@3|taX*I?1>aYE8^w@U4X4!v#CXiL_=kJEeuUkiGe+sX$^$*}(aFQ0eeWbxi9NAm9oUP!LFcl_bLXh-XbkMsAx zx>$Sn?w6hGUd@UQ+ZS5&Zqx7A7R&6+*4~f5K3%DA-QlFXuUmEh`F#C&^5)kHMTM8e zwZ}NJF1~t~AG53eTAwC%+T_pYIW}_gv}c*Y)RIzqI-M-G57{R(*Zh-dO$Q z&Rs8;?mj*F`p)px*RR%b|0+m)BRG@UE8sw^+L3?VSF@^*{QerZI!x>OdX6hEZLfY; zySRh@R}|Ca_m^9|YIelD+w|V2ms8TJb9$$9+)UYb*Y#(ujhIxa8>zbb?!T=%ySBR= z_Pe0{R(*E#v?-_5WSds9_%Aiz(6nUF<35ha6Q+}|f0^qj#&Rk4rIq$K88gQRf4+og znq1W`{iY^obmq(|4KLBQPM1%2zyGogT@w_wE2*yj+t!PvLThr*@<`9t6)ioy@t33W zp0wGnuHFxK-%v39+0p;1Q@rlV&aXEoAND@<>EEOK2_{EwZ_av=*?aTw7n4U@c>cTj z>g~KSf3~qu#Wt_ZfA_g=*d?x6rn6jSg8$+h3;*9&zyHhlWjNQhEf(nycXS=de%Z4B z%bEF0Uhd50HTs%4NhWD^)UsB+o$5`Q)gMD_i;M2p>ulFwsJCbSA+^me)oacq$u%EI zJlQOv_@=(+YVr-1r$>J9ZQI2hQWd0{a_HG!!;jsf%<}qwV$FT0{5xNwttPdrbpKM% zfb-SgB!#4Yzy5f3#@#m-;k(ygcGg=M=lV1Gi0$q*H@?n3(IMjG&L)&uY?Rf2t`+{!D)$_Svl}zT>=NTEeP326?mI2+zszTHYId_vYkt+YhVU zn`(MC#&*#s`ET4`W+>c}(DNw zu6z8~qO0$q;JdrMM;{i-rKu%H#>@E4{JK4(@L1HoABW@4`JP~o%?A4pTk^^zyb&;X#`pN|ob^SQ=IY)5ZMm>z^^2k`m$@tp zasqALH-C5|P-!=Rxz_vBZyrVd*_BkyvZP4R;f3L~)lsi+aS3mp9PiR8D1VxlKl9zW zWaqa1>eH_k`qdrcUAF$_N3Y&ZOYchxzxknO!(qezH|xRf;~JkGlJ)+4{gTQFKg}vuKUOp0>EAhFYrNJy|IN#7;Yi6zJYT-wA-=!CASkwFUr)%Dyy;iNBJW9?C)smMA z!Y54c-Ft}nQ$=m@=Npj#pf;Z#mjAM=UjF3G}Mx`yU&~V)wgDTyHJI^_t%?+ADKQ?gg@Mq_{ZS) zIn9Nav?7167UaD7R^;VjpY4n1Ykj||@OGwtVw}O%urHpABZ{Z~yK9gTwpChh_d}OA zHI0e~Kjen%&-^{bTgP^~>f#Xh3Ga8E*!BDM8y)TP^X)AQeiZPGHJz!S^ISa&df?)_k6>C^~s-~g=W6A{o>oL7kab1pu79h z{e6dyMJJTc?w_|TD5L&V>nWFQzxjOAi=WyBIUX_I`z?qsc)od)>>-sp&%U#BKlEqc zS$%)e{*AW6zqX%#RQvsjx@6|U91A1!;&mzKSN@-VZvFa$Jjw^A)F-Ro``UI|O42j; zbxYrK`;%|$=j%TSF|pUb^ycaNd0i$|-#ET|9oy1%*yVh4UE(X>>`%3Vfx%5x?|Pf7 zbMG9{^Y5SU-Yxe2?xh>byYFg$<6?F8oc-}X*SeBlMrW&Ieut;>U8}EOpL^%X`MC)P z{NDY_s?h(lXCCMFZQWw+i@B!$dt`s!x!}q)M;AG^ee>h?oDhG$On}GG`2LSiX9_MJ z?@Z#)U$*@3k+0jQbap0tmfh|1-+uq|g24HE^Q(V1-F>~y7vBa zCzGz^gNy$D6nkHNe3ns5Nm{a^@}%n~Hq*Dhj@xSIKR>v7(eKw);qUZc3G8rsA6zT` z=Way#{TmUU_SNTXQVrI0{!jX=AFldKzo2jF^!wGTMNhxIUo?-?dfMF6U!-sD-tcGE zylZ+FrhQqqb=~W$zp7Izj2BJQGyk&I``6c`?VnS=u3NR$DtP->0R!*pyI(Odu;x3? za-aALbg@p+_m8&!@Bja|_x}IbzjZsy|7%?juNPYG|NC$D{{Qd)|KxuEkTv;hf|vj7 zXRA`Hp1nT#@c$>BO*Q|os@aR+Uk6gJQWCs^_I)(B5uHL4!S3G`g*}7>h zqVv7J_@DoOZQZ)-tFHE4Tl%&B+U~bPa{lt0PqN(dfBxqQ@9`)($5-acFaP=Xt~PXQq(Td&+4AF%3l*>i%(e=&At}#c8`jB|KHR_ z+jp7mcc1#BF)*oHP<&&Zy8|`?|ic>+9~V9{-+;BPyLQt8M^c7-KwZ*yhlSg zdEJCIRExJKEUG9z$Xl?f(EIfEL!S+vIZmA|EGO7Fvr272_OI`M1m;w(|2^fAmEA@D zf3^Gk(xi?(`SPE8y1YS;ShL?(^u6qKwd$Qe4@xfk+O58O zzvGEYp?eQ*F*-gJIQr|Kf4ZKJp2y<_3&M}j+8zEku_d%7SX-(8;qMFDEfQ+;oc(w0 zVt%ajQQPRvSX!}!g+6bPA9LM z^Y5U*m2@7qnDj&?2kU=N_w0Xj-LU+9xbC}8Rr~+ePL|!1$=cng@iBFK^}PP_r|V+= zJmPixp;S>Kd1KzeCGTt9pS(F=t)72B<752#&~-tDq9?=em)0%}1s$_h`~80H6hAAM z)wSQaYn&%eG`M`&r%deWBEI!4KW^{4YGo(zrQ&+J%>IZMKdj#@&ok4nv{SNwzhUBb z%>}Dc|9-!DWXIo)N7jBzK{dBo~hX3DhDm+kNz`m)g?e3$! z*0oa}UfKG8)4g}zTidIwudWSUe0^$JYq^!s-EV=?`|1K&+Jzo$y`U7~_weHUeZNhY zTcqEz+xE2eq(XvmTe9c8A8GUMtpCRCZDV`xxOm=ywllT!4qIRU)*Cz9Ww!2gULCu0 zP2ZU3R^(Of+k9qa{f%O){1snz?SH@T_Zz`(JL3(NzOS{;9=F;4I_`SyLdoLq_wAl- zzOu1*SN+YEi64d1e%sc`6n=ca?^DIUOWM3sT0Q*s9S*!ABRlinlz+cjT6Yv4-)8-$ zaE7qr6$`Gr7uJbC`7pU=bI*IXN8XhY%l^K(Q+B*~>%XwiKPKfJ<#?*Ax$=I7pYr_u zrcv)U{ykZ{(Er|gGhu_EDoxKliu-=El<%zOU38`_+xq*tFZbsN-F-h(*+--Fx#7I` z>TSO!#L3Qo|9AsiWFTJFhU$bl9-nh?MZ$rON ze!0G89{>M@j&sM}jLv!9 zo3ADxPX4=R=Eq(8^taYu-|=U@T=d&t`|e(z{FXCIWOJ3y>d>Y|SHAhn|NH&?;`%hv zY5&XX(>0@vCae|yyZX0#^H$sNK%2i4l8q)+FD|VY4zu_*mGjibNd2$tuEpLlP+t7% z$A9T?%jMFk^|dFj#$MU`(qGqiO_W?*oY4EsuI|OW(7#pMiAveof7hP=TAnp`q1Rv4 z>1WH<2CYA}C+KU;*Nh7}HNsP_e7^U)l>6Ml-aAv~Y5uBGT=%JR>le{E6O_~9a*lpF zcYnEDCa;kGoz%lYn`%5Sf8LaO@a~>d-2QHXm}iswJxjFkNU0dy?(kY%g)**B3#GU zt>rxRVr$m!)mw{Y)=d?9_UQVOubFawI(NiBuSwec#O=>%`wagtHz#a#eEnB9 zPCfHGYzKZdH9xqWvFC}%;+eMHe-f7j$s|2?JN-p)`6{j!d0l{l3%9D=z+PhoH1+CSQ}~reH;V$Kaexx)!%hd~DWz`?uF=TV`m& z@mXIJ-4?Ad<}i``=fB#@*O7O7xcG{5?{}|UsUyPwdv#HuqlS4;)?sH6voOKTvqe3I zulM{Zzk03ZYSs60|6fN1S6Sw(|Cv8mG(bf{^yS~Yk1otLB0U^gZd{UIHW=vleiOTQ zH}%w?c%zrg>MuLRPdfQ@$=oKV)P)b3R>?KCEPgJ@T&Jp|Dz_s#&}p5hmxfsXYi$*Q zFFf1MF7NDaOB9e_emms%{vAo5Cz(A~ab3&(L-%ptv<(v@`z`FAxSq6~`=|7<`0RaI zQvLM-*FCz#jF+ydd$dyP_095{iyZIY@7KR_B%LKKMf_=GfX2Jq+x4>Nn3UU?_SOHH z_wClCBK<2VOOBrWGwV@++wbG2{?Frmlagg7{o5yU>+F5+^S)dZND~WtypA_<{?EG3 zr>@Bx{4!o8*(d3gm_)ymv|RA$m*45~qw+Gv_UAY=W@W8;zGbJq&(=NjOmj6t&R@K> zW7eJNXX~OP|F7t^%4Ay+STrm9Qpm!H*YDqoy}9zNxyt_TlTda?``!**6{B*Ea6?oxUymq_D+5qutYQf$7|MY1UdbU-|a!&2c`4x*MZ5NY#Y!x== zhwGv6eLpo;KH2(GJ#KZ}nlNwo?xUp~zjHT)Se^R#$>q!BZOn@v`E z9?-j{LL)woN3qlJXKE>*{-ma?UEJ?KUvC#)`M)*y&dg9}jZHmFyd6J}^~`WO@^8kk z@Y=igy)zGbUYe@s5@r${^gJ=hAXTNe`9S>^_Rryy7JTJl^pUyi%mC3m23)H zw$yh1`S=CW0_S&`^@aY?TlDnnt4C_;=f6I={AXGD+rRu1cFvbOGS56x^6Y(s1eGv5 z73&o%o`wV*s(pQ={p90YbJU7;q#ilXUs2Yl&fliHzCCSe>{qb|6Z{?@T6uiIjnnU? zdam~HG^fmXDzg8}*L1Ueoq`F+uT09=boHlA_lfK3Z`S>LvmtycZ(QSix!X(FfHqe{pj?(pY8+eX7a-rOlE$=V*V;yg1h<-+!G8 z{kW&@`(d}b(3?{~?%vuLyIXJ1)9{|}pK9_$U&)x>c;)%<$UMGn26nt_1ByKOw*E+s zoXzz2dXt?|3*VkkC*S_%=*g31JH&eD(D5s?xD5%)W6)4yDzfu{mvQl z@g@75bMg0U^CqM$JGUd(>)$TRq#v0#ZVRv4?=^Sp=B$mbtY7u-zkbNSz55rvFMV?CUGm$iCM^l|PU|}_ z4_b5^AD@|EJ7K1W@1)vX`=|MOuC@Qe??sws{GELK#r5*quc>?Q$LqgX_bYx<+M;Po zYhA*57nRq}IsMEi_my|ej_m38i>`0_oAO|H+0UXwyS5g-5u1AJ^nB@6n}61%ZK{Zk zuiRx`xA)igZF|qHsVsfr*;QG+HjIDu%YvUa{Y^#5JO4ZBpF8!__Wz^YqPOv1Uu{v= z`@q1!Vd*%_W0DqY>5KTfy}!QxdcAtASGUtfK7_4T^0*L&Ap-Ph+{EG0ARw6bqWoV|WX!{Tbm!sa(G8Qqy` zAJ^>QGm9;^-tGGCV(vEo=Lze1N}8GG-tK#zcd&i>bl#(>4`%PUt#x%v`YsI;L>qC|*n!oG1 zSzpiV|GmJ}Y|X|0pC^8BnX_SDY0UgZ-IXM^%thCj{EXItZc9Onjp`j@A~U@|E=01Fe_MEJ}fuNK74)Yznw)TdsP1DhE2Zm z(tLer_L}+oU$YlQo&FO1zc9&2%8-vecVV{vl%Iz;s+H6CY<>Il;o)SbQt602LVusx?YAx2FlU*kW}Cm_Bdyi54vN0FlWMlw z8eH6Yv1b48w$ptkwUxaR+pP7hr=GOS5xal-)$i8MVxbnR_yf`FdrV5RuZR^M$f-Dz zvuU-%%Yed5@u`3O1CE_Lo?^=5`fqywwh1qnF5m3Zv(>x${N$r+Z*wG=2=|;)HJjHl z^QTqym%2~?{3h+KTDf|be|X5BYyNCS;hO6!?Y7K2Y&6UA6jNv_XXN+)>P@>IteBG~ zbh*I)t(x3*?Z5AGUfAEx^l0?=<(szuUSLP z@AcJ`SKNQuGcBn0d7Xfg!DEw96ZwBKt{0PwyfzBPTer5%h7KlW3}XBw}ti`KV3Jvq%Ic~0R( z=7cwu+f!dpNN96C^8eVksk|4`jeDex=UmBqG%2l>w{Y{TtFjt3l?G2Y{Bc$ocyv2{|zCLmtbkh^t_bpN0{lWi&FV7*htjq!qRk2TTQWMWN7?_9s-B5p{^|*>!^1=Hu_f9@m zy)4`IOXZ9A$I8!ZN;3YKu6_Aq|C`U<%`eXA*r8jcu==mp<0O~6 zCH?)DcDp2M_Ti5A>R)-M>$3YyT7M*HV*Evq`AwhmKD_!TzGeS$t|=O`N?CV2sf$A8_PI;Z>3O14>b(fNPl)fZo${L_Dl;q)WYlaANC{;t)p+*f^d^2x^?mbN9U zK2-?6{&{ZOscCio??1Tm9^(1^PiE277A8OD)boKs^#%9sB9cAk|9KjJc%wtP_0lSH-`)Fl)(@NRHKu{-(ZIe}!ZBe3{cv-cLBSPrs){$hLQn>Z}J(=bpOt z`p!~$;aNBG(+m$6Zcg7b-}Y&jzJ94^{|cj^YTNs#6pVkF7bh%xzh6f{!fJlp&D`C^ z>)KmNIxnUEHcMGJ@3{KjI?<;}fopH3*KWIQIMXJ4xvc%C6$QU$-khu%dZ9`p!8vnI zdRt?Mv*zAvq4shM{cn?d9xJrfiTGG_Jic=*-6gy8=M29W6R${pWP0-J`eD`BD@yB(^uI?bQ2?pI?>U zP&s^ZChO)6cY|XC3vb+P`jz=V@b%;6^WCr5cJpn2d}+PIj>-_b z-y3eSsMgvpDx0>^dW)HikXhpwb=MZ%!zL6DkZ{LFn|NrMZmEKU-(YM%l^`FG#Tp!O@8jc3Zcb}f_UT%Lw)zZHD z_LS@Mf2Zl*{i^6w*Ro~8`PzSbbL|!7QW8@Bo=Cd5&;P7<&HTFPCtv>a$!@v#_{d`Y z=KSE5k&|!gZN7eEv1Z?V+j0}mV=SWFi;M3lUy?Knc*`eoOS!tf>F!(k8`Jiy9MiJ> z{r)t|?EBj|yQ1fOls$dQaq^bE7wtYo>i+m^(f?HM-TJ*#7azUf^W&~l?fS4)QHv|? zdd22WaDBS8eA3M`@ypMrUr5}3ODw=S_V%3baz>N6f4SC&m;dX&yT01{{^}2&>$ZIQ zX)W%wTm9z#>1riUpM3iN{qg4V`i1&x#=rJ#zZ(}?*x^S%FH3w5B zYr<>e!=T&v- z^7~BvHFEbCea+hY9U%gKb5}Nwmn`mZPTNmnbO8@8Qw-bxuLf> z#WF1-h2FF;-!3+<+F7^b=;c#M+wCOgCF}mXAD<`bn)&rdg1?jPzpkT`um9h* z_04?wzyJ33{j6J4svW+%+|>L1_1$rArSvwRn{srI@ti;N%9r|E>}9qQtu8{Z$w0yT9J|XR21r-hX@VPrfMo z{nypI7e(t=|G)oScT<)9ivi|#{9Vnz9{+b=EwfD}{M*j^YeGA(3dVo_w)fqd(0@Ct=e+qZb?%YTh8ycVxTSMG zJKp~5xx!%I_a4E7_8oQmGqXfy)G;o}6RIs16gU%nOWx^TyQ(NlqmH7>BX{9~yboLM z&cAYTPU3L})7Y^lp2-6dd)XezyrRm2y{jvmk zpZUcPvpF?&)+M;CC`_faIy8yce4uJSMu+ZHLLi(<4mUSN<1=`v`w*U zO|&>&^=G#JrOy{8+D55G7V91{xN!e_^|WS#gi4#=N1T?q_A@NbN%E4kkFcj2x7 zMo6`%FOUE1Te(j_R9L%&If)u0N!2TesjSvz?dMgwrZMueb|$-ryBx+;RE0 zf3e%NgVxQT971;$PVSrEyE5Qy^Sv5Nb;FsV_KP*)BfMJzQ7^%%{=qZ{0-^VGmTv@b4I*on)XuU)WP!%tFM?XE(xkh_A(5p z;;6Ujc5In{>C>~N$GzKAnKat64=65kyJ7icHKSdbe9xQG@(Z@5pK53C)`)77{hFnk zr#0oo{^iZfHCMcv{33cMQ$c`>LSkIq-It+HckE(|Uf#2%t;lZM<(TIaRaeyhZGFY{>dH&eYW3L6`)WN?vz_D*Om~XU zm{OwmT%vVyo7`z*j@@yJ) zTjAxkjn^@^dBS7HfByGBB<6mepRkZmbaQlCGFxqG;8e{i-}Z@3`DOShdDcVC8)ipk z><;v&cH6~2_UsQ()_ylP{K1?X=3*N1nRA5aIT4}R8qF)~8b7?#t%-bkEB#s1j@;=^r&BXNek`#1)Op~_(&YBTyS7Vy)oRb~ zm@!L3t!KuL=1I$c{oeaww#|>jF==J?S8U}k*vx;-C#5(o`nh1-m1Fbga2QD3`|Zel zaQ-XNhsz>l|3xo2Q}gzB{CfBOZjJ}uulX}$-Cpsfn}j`e_Qr(lI{AB7mdVuHi7O8$ zK4w~fgE>9w`s1}%|IA!wFk$EXduuzJ^cidKUFbga&;Cl)J-sLFK|$YM57(lTDvXz$X(edpuP zE?&0xoN8Zg*pGb1lh3nP9?`#$CmSwYck%T@yZd(R->YI$R``4P+y6HVOIpRp!+)fu zBFRTroGnrDQ|;;>6|0YNpUpTfoKYYbW-32#(u2G87t8NY|F6V*UoY|86y=R`?N_L{ z^s=9vw^uF5yS`m%`|m$FN9WEq=5~)hSfJXPdMWeY(iU<41zEPjzgkWv9G1*)Zs$UZ{*Z-flVE>Lvjc*gY z7#Pc^9}WyyY1$RiyZPkT-_4BTmk*_hEIU2*)mizg5&BYGAsgG~vA%cftqs=~XYY#- zxz3{Y@Yw1+gO@K4-!pKzCUjv@%9rqc`QEMTr9bP;pFOS2Ykk<>R^J4-vkf0V6qL$7 zPS;uQaQ1NTyl=ASk}L8P!lzH&b@0@U<^R*QdN}7gZvItrn60Ntdfs`dCO!#}5QP)X4W z;*(09vDda%O7-#v`~GgWk`zN*?~cX&OMLF}|0{3tF|Cpkyi$|TV*lI1K=!%!{*?#i zR||bMQ-3_iKzg0glvC6HxSL9d1wGxj{m{m{D}}{(-(F)g*ZxPrf|I^pzROh7mVS9v zqtorh5+>W!w0!9_^YFEy*VlW#zRp=!{>!{D>reUQ^GgqhAJ)0@V2Y!xk$Zi5%kM@; zZk_&^hnjXz-a3j2TjXEf_gwwD`lb9GaVOd{E+|h-``B8+_v+NYhdVN-Eq?vs?bm-> zH&5fdv{wGcJC#1ZGgDiq_}LwGWsy=oF#qn!OLyy4wAH)5CQQhSSsHK9Q&XT;-uHOj zZl(;Q^1ugq1T=-)Qgk?>@X%;>GhLrQtUw8r_o&wm;EWxmZ`` zl;W$jr2(_`uc&{xcW=iE!Ku&pmcLhgkPt=DqU2B(=ap?fvPRsrR$)t-d}zJoL@_O+309x|KH+cC*d%s%h!$e!4AF zRekf#A1Pa`*DsnC7aVsm?Nyl3<9n00g{J(z|MhR>iT%7PpPzjDy>wdq^bgbf`M>Y` zTJ9(=Kkv8b%_)1|`%hYb#sAR_<=uZHjzr~ew_UtFep#~h&uZt$w^#38%h%WXp7lRY zbs_)r$)#ETRY%TW_fz`4_cwRR{mk`=M>QX3y4_oE8>X=T>%{8|OHbcAHA`vVPg{Pm ze~W&<4~c6lH&tI)`)g~}^{u|5-|I8x{<6LQbMLp6zb&ml>iygOR%Yeh>oxvXCb7S= z?>zrj8~^g4_#OrZE+fZTo|Cdz%g_ElQu4yzH~iMqQtg^or}kf6ecd){|Ld!-*DcMo z+-EC#@4so4dhOqJv5%(SUngsHYUgyVa@pXkUccjQ&p#3l-JjT$I5Tv)Z2F_$DwkEZ z$8}x4Z?j)t@Z+cWRZrizO}S%wIM;kLhugLKl`l-)YOeDx;`y(>cj_|Jwq>Q-QY~Au zf7*oamQFaZ_^|bM_Nkrv{dT(!nQ=~UlUZM%*cWsBEWu z=*&a+n4_2PSKb!2B>eO{-M%I<;fhqzP`&u9tF@<({P`zl@3$uQd(5iuuZ~Q*x_VB_ zybafCKU^;PckTbHlP}BvckTX}xH`0ahuXZuIoo8){N_|I%d7w zdU$JQ@ae1PBzL|4#^aQ}df!UhtiRd&en-7qx&E5g?rF>8Pkx>LxVF7hqBeg{)6)a* zx8z2)P5i%Ot-{hfPoyn8Wv_0j?%z^-YJbPPi&4?LCtY24Ez~sYZ_5r9HRerEd+g3m zyQj0L;MJ{f`@czxSj5&AmH)PX%l%^Rq;>08UyYd`;vT=c^z!Lk!PfIlH9J4Adpx!G zM5Ow~rT>^EQ;Lfj7khfvRoF~Arn*%8(hj{D8{?*$l;u7Xd#cW8a4OE~;^`ktZudM^ z{>A0<;gU2rcj+Y=#jvQR46~Vh-ge2)%M$Z`zwWrP?qbpN=W)~C&+?eMX5P$5O>T3( z#UHNd_p&_bEB0%(+qY|**+g@fIrF5q-O$wHusdCKRh&P{aHgN$2WQ^A*=x_QKeXho zq@RJS!j@@s*>5KMiS7S0p)Z;z*hl5I+-DR}Yu{-XJY4Z^(~Fm0QuS# zN;w%DBQ8kw33DjaB%T%D*R5p1&m^9q9mURAlb=vA>ykp+4wr*B8oJ-c`tnRu51JJ4 zQTpZSgr@w%Xn6>)j8h-tBXAV}J6wyzbrGi8^=YajU&pzw0EAw4FhX=o8m3 zkJtP8-~D5+aH6VyLF=|$kL6+UZXXX+$!<;9RZ%u+;Q^C~n}WALgddYyq1^mVOY{1* zH){kZ?2#9e@_DYOE&k+3V8%}S`hXynC-)p5u84|1FmcEG&qY7ZWt1u!3q;<$pmoD6 zX}RO6qkLgn-alq$>R&u@<>WdGLf56^skA9PUn z;r)+0zC4KR4`z;^Zq_X-e0}}$KXb0?7GKx9H&OfDw%=R=jSa0yWlvT-eO5istl~$m zSgNXv`Wsz_!1W)@_s{RU$M(9eCY=3*gt!Lp%*hD{WiRfVbNivs3i(a)%JDzf7m2Yx z;o(?z!PUw9-pBY4PP&udoqY5AnV=rug1BFkPikL@$>_A66t8}5&)klEpY1o7XB|?D znEd@n8}}h`(YP%>nbY6$&&=i%m$H0Q`2PGiE?(Ka>z!8xE>vf+3zI!3trzq9NlpLG z-%;yhR_y<`HEwHo+}gPAyCJ7M^sMLK`=JwI%*ns^jZwgh)}0|m`-7V*C(cTH_hjK7 zfvO98r!^FGHs8w-JDk$r;lJX?grX|(6&*9?DnI_TeZxwpcYjMm`4|6x#S*<r({7u z|FcWf+EGwt8})5bK4C$%ECGg

  • n|w{dDR073Ful7b=k|&N9)~arf=>7fvsJ<+3;Ncu7Ujl?|tqXU$w;`hM;y^W6#Gc(|7o z3%+;}+ut{}ec>8|uESfxxvGx5zP4sHe!Cam-G_CD?(~}>&u1suPS}6A_^zqJ&Q*u6O{Eu)lX-+JCpi7NZzTc=+yz5Y<^y88RDKG&`(@Bck=Qi=O~@Ndfz?X$b`5XO zj5?_F^`m~lltXVGX-}WL?dJXsjoWy*m>d$XIQZrL{P=fvdBT%_d7G*^IAeYYInpLQeT_n|fV^l`B4&DIWafa3G3v?;ZQSXQ!tl) zj$qjIZQo?n<;=7H6nreSXk1^ixL$AhvDj0|g1Pf7KJ8rh=Jd(e`;-h4i=PQ8G`!>f zG2L%o{K*eLv%EQ`e`Q(Z{+F@fv2Jbe&Lj?joxb*Lx1Edseu}TQ-x6ECY{Sghd+$#^ ztzG|mdi~l;i*+GkOM&?w0zMk^#-r?E1{lCs? zkA8T1Q;kHGYFqiIib;Rlw%gtM`{B|2Pt~XEZyKnsd-!jEN8j(uyNq~luH>wKUXs6G zT<*)i8qZI1U&5~C-hK1hb=gU6apBUs7t!C=yv;uQsq*L7`gD=S9rnK!lNYO>zNMS{ z>!Doe^u10Q&XP8^|LbbIq8_e(x_keHNkQ@JPjo%pZGYBf=~EV?w2y@zUD5N_M%q64 z_5Y)MAya?y|F5PuSDsjM<$nX{x+yI&F3H1;6D}n9i1KhYJxtg!K}$qvV$(v0&eoc` zto8eh%VKr+aYm{NT?&1XrEm25N59kG|L^VmzvgGjC&q;zf11}@If-G0{OPCnRa)Ns zj#~e^Z_NbB4(m7jR_VVxtIb(kb8B|`kw$5q9|xB{_Q^6Sv2eWcW_k5L^UNoYZ@nux zYH{}OHP(I254SnV{fXu7Xxn^K@Y??M&As8wHlZii=RG|qWMf>jsQ7F8s_%>c=0E3t z%6@*yNlQ)%kyqmPm;CR!r@!O<1NG1~D*fE?%A$>n@LjCosRv8_thn>H_g0js&e&X)@%-!iKl{)B zi<3@$Jy9a}opyBD-}Ixu4qqtbE#By*+s=Lev|f2^Yi=WJm2Kd)^0McT|Lp#Gd;XpO%zF-%2=Pu(ygN&=^|!i#^9r3t<~i%H2*1_hdiv_s z{zYX9M^5yHY?aYjzvS$eh6ZCLr=|Ikt-NO2YHC6)M^vk{itkI#so1qQDO>DurG(d&&V{>P`x);1c>UOZ{hF)t(lPcS_1*bW z?p1gGP0la){L5f<>(&6p>3c1Hgv|WI|5*Oe=XvY5SooP-UtJsh-}JG~m%4d;@)y5- z*|))>Qe*FRt+Y9e(~lTGQdq}-`Ay3C1@rYZ9F}cxa6h3c@%QBazTLm=KkRG&k@3}d z(pq7K0IpSEJeosYe(zFBZki=0HGk8t`j;0g>#Sb>e}1Ir*}+H;SHI~?wQb*) zdmr2NwcvC@0qE{KHxcPJrb7)I5^_p}SzQ_nR;1{-a!fsIx37HP_PDyz>1uDaEY>bf zv|1_4SN!?^)r@tyKiLnwx1Fvh)+0T|_=3IWXPqq}uU74-FZ%LAJ#I5k=)B&d_zRY&QkK|e|9e$^!E1YCEbk*B z>5}7SS97NPv;27LW60;eSC_t@s)wjj4xb}i zE{X0s)UmDh#?4;yNxhW^)ITw>U*$|&W7D2nrB=MaX0wKP<_m)h-tQcGK9}}}w?2P= zfxl#u_f?kkjq6t)mu)=2s30<-$C;ax>9E6w1P?JT)~43v9Ue;VJ#+FauYHcaa#e3> ztm(7B7# zUD@#><_T)`(Ki{hs!z|AF@ITCU0f3wC?j@b-wWop->LcczwUhBboarBW7Y0|njiMR z`@8Q=W$o{;m)|UUvqQ3(LsE)mg+tlN_+|OeDo^Mh(b{+P%lYWfht|g)dAQG4?7Mh^ z_k$loc3vMA_Z=P+;g(`L>bN05OGKKBoDsa+{2Uq)?S8iK>{_pX|f8y77YP_;P&%Ud7-~0CtN9HZLy}O0u^S<2;->Nh3 zrmb>XsdZb`v~s3TVlicE;^FmMWjr!NlcW+B6=K+S@=lu1APhNHnWA^S5BYr zbt2_`!#ieU)@GX&(N3GQhNTfLOCrmk+CN^`?RdI0_;#;pajyrXUiqn{gL@} zz0~TO?{Qm}eJc55rTr-O)-A7>2Mr1s7}&Ns&hirKVy$0T80#H-eLZO3sOg%R?S8xd z@Bjb*-~Zo1_v1fjo%Fj?HNb`ai?; z=DvW>Th_JzQY*A^axDB7|Iy-QbY^YQ{xJKvUFKg`EwYb{{kE=dxqoTV((o_pS1i1= z*B=kC+bUSwyOOuMt)R2*)E=oNhwJ|`d0XFzS*KL^ragXX)vwx!lvO>Qvf5MqukHUE zmAyY|b<~foRe$B9o_zcIskZvTv>VU&+t^Nin_uR_`+lzck}cjRZ(YwW-dZ18fA#$2 zhZ;LW`*~8k;^RZ#SM5|^bya`YmN`DtZGW!?t-0CXWRw@mA7oqpOW!w4Gwb&pr!DUn zrC$3h72khqZ?DCn^n$p+x>?`;27dTk{bm%i9qGAZ=#lTWr)TXy}LF~?xjlYjRM?tIwA*y2&H?>41m z{okO^I|Wz8?R#$3cW#^E{|_R7De81kBFO}tb zbN0B3|KFeG`8!@PB`unZ09eq{m{dxhr1I$e`mdR@&8H> z_D>uN{~X!x%-;WSp)SL*hswT*GY=g4zw^Wwnf?C{{l4%tSlV&TX>Wer(vzQSP6vzs zuk-u;rFq%}qoW7ETTaoF3=H^ZGH1gSQ_c@zo7$MGPQ@Ir3l&yaxQlg>*G|XJ?dE%a z+Pz%QJxScn@3+?(t7mb}|5@*v&0M&}!07${;7^++JkCElbba~L#E3;d>aTyCes5LZ zQO^BZGfqCbcKyRef9_As)e++V-gFiE|IuzsztvcAvF-B7r#0zn6BqtBFQ5MRifHn_ zzsqgyw|I+m<5ew;s7_VvY_yhYU~-~MH1;<7e=`ll!E z-I4P=R#WCb{dKBDBTaQ$i#3l_Ma7RfS1k6qtN&MTUtQ+^DBSwmy1rj`!}$-jMV~C% zSbJD2jeoE4^%Vs*AFfY2w`hv3{r4NoR>r@+X?y#tV0GAB@rPL~-|sBbe!OJgGUlJJ z-#T);aow8s-?jR7+)u6R>#r|aDZlH_tquNm)qC$o+dQ6<^O4cQ|EJCB7=h{2Z(iTF z>BB3%r+*?VukbS8T6lbgo&Ob!#92As_QrvCCZrU6@`!mSt?_1lyp`0I{!7ZQCboR4 z-1dmSZTl3J#KhSdGBI~~{@zV=OD(bbsAhPYTmODR_tfl1JDc7#vFo!t|7n`%aQ(T| z{uUnL`UgD+zs$0G_)$cA>5af{0=go4=_Hheq-@PxdIeK<|twd%^{$&=Qg>on+anbF{qBj~T-qX0(o<(yzwy*dI`(4L8)cbg=kK0;mwdI?_x}vH^r`bF z^Onm$;+^7G^x5FfTCW(>Yh0pD2Iu~QN=iPTX z{q=QyJ6D)S=|_p0_=Gj0_ki+P6c`4_6NsXqUS@eTQm|18UD zjb}VPUNgTsYt=D-+rOfH<#*q0e;@vjUlqCEn|kl+{i!uo>-M^SinrCiU#`DU&of&zZ12;zwq^To zZCn5K>)ML-7nzgRz143EJN~Wv`m}z%(EnL~zd7yw{e8vczR0f{|JUuGe_{K(kj?dr z$}d^wbG%*hZ;8p5{dE`5ai0%a89!Ok_Wz&IR22pW)~}AUye6$;tzVe`>#O+x*VjO& zO-6O?+S>ee^`p?ASO13gubX~<{i?NDanrRg%+$*dxBYu@jo#<$xB7T4E)VPbnms-H z&Zn5|WlQ%*towWQ{ngba?kOMY)$h!`pY_x3dw7g_h4S9_lMmMZHuwAzvv_~#qNQ)E zx8B}b%$c`8KWtfx{buP;X()L;Frtz46tnM|2lb?R6dcUZ;^XvCl|E9k>{#x%;PTrws_hKX5ch8k#tzL3&*&WS=t{t!c z3NkV-v#_w@z7$_4{L?gJ{pIYhtF~r+E&5xPAFjXj^x`s?z|Lc}8~oda1Y>vFpDX;= z(pVJZU1s36D=YiO6@fDor<&OqRs7yRjqAf|Pi=?O;lDVRX_WRDB~B4ee0Flt4M&ya zf9(^sr6>LTF>lgN>sRZY)|-o~eoGE8+LU@ZSS0FjS#0#D^YMIkf4iRQB_veZAK$4s zWz%2gA58+S+fL1X>Z!idzA4IVkxEwkn}KRsXdk(I~W(RoW%f0h5@!oA1kW9{PJcg&s?y|q4}x%{-Cj#;llvWkZH&kT#q z8y<7sXzfpGxhNIF_P)|qWdECESw8*=+a|rQSu>wg@4ad>6)Fn-u^Pbv&W8ibnkubCm>hIs8}|0`rW(7XLN4=p}1JO zXB$t_^XD<^1)uIweX;xLr0aWprFSoR+P?1g0*&NOekdaqjMD*y7s){2vRj($4$ zg*`|kp`D+{^!PJjBVciz9aHS@2&I$8GK`R}`5Tl6wW z^77^O4cn*oR4$H}dB@USoVBrTXUK}Zk)ro&7hSJ?^CIBI_4Oy5=Y(Hhyx`_Bf$XA% zyW7G(|64G#jWPWAk!N-s^-1sdJvF@1{Z{wY?+ZVTp6_+4$f%2cx>K+9_gk)aCwZ2; zS9Kq<-urQzBVV)1uiQhjPOJVd)K^otH$Shwx-ZJGTYYN$l+!a5u7tOop4cV*IH@p% z<0%KTg6GqFP5%y;&viZD$9>*dsAl@pCaZmiYri?gsxUAARFNNfUi_Gw*DI%c{_MV2 zJo@#UazxV8a7y0Vq?XOp+`JSD3ana{f>g6|7PW$$~yvNZh zU^XFBkI$j}rdOExrp*!s0d{7cm-ANGHH=CA&J^7&IMOdJ<> zym>wMY5nb~a(gVc%db(qbM1w|vQnd!?>F=mxU8`-Fy1)J{cPR5ui{QhzK_0LNPi=x z!lRgY%xIEaa^T-dP8^T^w&tCg@y%_+d-0z(wn3@qo}WHs=QFMQ>g%g@QJcKqt>1UZ zZIbjcw^UuB>fNvJC(Nr{z2o@l;$DMW^KN~=k*{K5-?peGDfV08n%b3HZNE7NZ+ttk z;bTi|OZw-Z1v~xwe|idM?7gL5CTc2aJm0kP;h*^0Pu;rbXPJKs+H3ynRGk0CgrC*H z%gc_Ru{KNjnf-oS=+gV&Vy^7_ee%`Uhec1nZ=e3*&8y3&vTwZ+6kMtK*8T7IU;1Th z?AaMeec+-7j5f*fBJ2) zM$0Q%iRs46i~AE7y*qZw@bnD*`?cQF^Q+$&@4f!n=gZ{htLlzS<_R{_*mmjsYTG*N z>HEt|*5C6s{9gP0?p>}aVSB~3e%c28p7iG4d)Hl((`&!0ertVyJ?i7u^>+U3SAYL4 zK3)1cZnaqa|8@2AKIfgBGx`2?y_f%E{L4%EJ~-yco5I}TU)m`Ze#qK^w%QSb-tIs*1x{K`(@Ssu*ui-ujyZ%{$g&RY0av`mTzoV zDK6Qb@%H}fRa?8ap4ywLd)?@2{Hx=q>(+#?TXn|S`_hsoS?L?kS>}A%JoWLR)fayk zISCuj@z`H}wtBu!&i-oo+EZ51Yr^k~w@kTPJJEK(dCbb3pcoy#nrt(#gF`3w%_g0>HpuOY~#I-XWcb_z0$Pj>-6mStn1H}j!*yf zC2Cq}x9q1i+ToMttxcMJo&W0j>#Kghjo-KQaM_C%?@woK*LQV`ayhaU@uqz14*Ie& z<;(w{?@KFflinunkNcbY>uc4{xD7My_kVpdW9AIAl=HUfCcOSPRE|5j&5QoZV%YQO zlmz#+1&7rqzm+%Dk?^_v=jqPy3re?iV}q)We%X8f)!qB6_lM0ofBYfeoZp=x8M|4E zn;6dSFsk{#`~8ORv!>^lm`pz$9L8<(t4V10{-~)Hn}p}O{kHwjA;)xK+Zyvb_kY)Rz{mscF|da;~oWXXcu#^6P5p^~55h!x#0ltIkJi{^mW;V(Zq~Td_Oi`TLEJ z?!9;U8S&)bSAF%y*MFNH+0C?1-aN7Jj?J~asonE-Tyd>9+n@3G_FN|QlTUVS6W>r7 z5_fFnv`HJEe0^8BHtuMR>)E?+;y2IY=8atcrv2`dAN2?8%=j$2y3Vh*Tr~aU)7Rgm zc3sl_aYJ)KOv>*I_gC%_o@6y+x=h51*c&OY*;Lt=Uf$ru|K;DEdquOSfAaLyuWj;u zH(UKo{>3B9_q^|0l)hp0i>W2^Hq@>Cb^eyJc<+ z7rteM()NalN9L#heip@CK52}vJh^$Uak#!o5T(&yY+?_A7M^XSK)HFY7T3mohM67Syq zCf6KrsZe91aguSB&^_Z{#uksvp1k-hUw-q5>yJzMo0s_JyfF$>xH3PxVAI*gS~Clk zVx1a~?B6CmYVTj)JU7Q?M@YeT-KLBFRfPxjbLM5I?8sT1F#mP`(&H9Myf5W0Zk_yO z>k((S^w**BH~C$D?XgfdoE~tx@c-Jv-NKs}DYCcCdw+`Ynn}ijgyVM~Z5Ha|GjC1z z^E{fBTDW`u9q;zGy&t?+nTSX_p8Wbp>9<&3z`^a?O6B%PNZbn6*f_b=+vNX3!x%q* zp-(P8iueAQs0ehI-&f!D)2eQb5RYdOw|iTa^ug~vo%RvRT70fDCSfwhyBNRf=yYxW z%bn)GZs%dQq84VQ*4~&){|*W!&a9h%df#N%6a$xCGoIUJXzK*O`zj;w&q~EU(J$xW zZs#EDpSK@Q{%Lob<5;V5tNz4*+j=db%+LC!Z~Po|F=Gk0;i2w=+@90dA8QycRy`yv zyIFRzv)`BRS=}=8_1B-Xi}~bu`~39NOQvdU%aA@<6#G!t`-Y;(*YDM(m!3swx@M@& znqZ)NXI|)}g;|YV@0YyZaIwY5zkl-CtXnDx5ADuuaOYj~zrOb5ilUjt@iQ|;f@Zb7 zXN~_`vn6fWbM=q2-^tiUJL`UK>GpL0S7b9mW<%?u+Rv?z7b%^utFK$v^t$`6<^Cq8 zt;Vy9W0Q5>x0mbsy1Zq3_c*1$!|0>p%uhd0URw30S~mLg<~M!2${zI0c$D_(>H9SQ z+0Q=ozu5Xq-?Oy(Q}v=+?q^R#0`?{RPw3yaG3C$oLl+wNpTD%czQ2EICO0SlQrot! z_p3z=?whl-#?AiwtL^u$e=XActM2_53Kgu2%AXs*dD1@VcenbrYSzyZbAF!p({|gQ zrU(7y_K*Hu{dtUQ_cKe)9?{IaH@PQEQr5{Vu`K+(cjEqURg;7NPkLXz|7hO(^7@53 ze+1uGJ=&Xgv&Gr#`7d$No7=1QUET3@o&EYv96Vb~_61zj&z#?`JALw_@F4HaaS*@L@e(HDkt0^-tpQ?0~eY#ZctH76cJaaT{_Nq17 z_Xa&n_MFuA<)Z3MgWd1XY^r&3`gP7`_dj!|{n`8Jy?e!7-Qw#P67OBU{h)vMj`K15R26qEt-YZnDs}vHVSUZ2Kf7f* zgucyvUoZHVGt=-PKga*@w+YWze0`O*w&?TZ>J|Tj1e;~!%R)93E7|P5vNBNQdid!Z ziV}Q#{+{D}-*|8L;+>Zn7}%aW&hno0ina7bY&t&=m@(cC{qn(MM_K-kb#M0G{d2YM z=;G^_uGSXHe%pF(>#vRKCim8sN2RC*3!nPEVU2fZ%)SF^yNVavzx}E9KeRYhL-B|? z%UbbJtNj_5c*W-Xh~{g2i<>HaUwx%{7I)r{%2~%&$<6-d_x^2MVEi#JtH_Iiwpsq+ z9n<~gTFw0V|HfvmnZ3V0#D6@b?bi}RawTtz0|kfwa7nw-xC?9ZCA6!Utj)M^jx_7^VNTXf_>Ffn#+X@_2S)A@9$b3A2s>f zuK3P*zn@1~>DR9>ty}x)r&Xbfm~V^O^^j9#EvNS#lvvC^?Ptu9@46>nybSw0>BYXf ztFOD??^?b7(e)McYo=Y;ly~y2v>lga&YC;3C+wG85NP_;MegNVBl@+np3A4L~^ z{kD8>_>%n{>m&VU%9uYr_b=B_((a4mDzWq$yK|p<6l`_V7VdG|GV%GF%ewYAUrFp~ zuDffn>~7`PS3(;6*G<^hEK2?efHq5+g6jrr+@mtBcMMep&`MQhczp5 z@%Cpg7kMxIAHB&*PSASA@oK{}f7@DizkdAt!qeSFUnaMH?UPoky%wgvb)uO}%u$Kg z;fwa(NfcTl7QVau-hYh*!VyOC(z8@XnQn1{B#q46W;Uk4|@ z`~P0OzpO-i(ah?y_aA%;PX<_+``$0rFH8A){yXOzo-N02mj6hL66+B0in;%5(j_aa zaGs}|;=sd*cR35UZv1{eZtwcvPv3v>TBr8s?w#4KXPp%sbU(B<|Nb?T$MR0`!Iyt@ zHKf_raP3h}-Zo`I()!b{zeqh%dUYdpX-4ALN3Qq2*Pm0Y$n~##|2{L%JOBUpyBF(> zKc-%Ae$aA_Pd2ZU{f~d`qA%Mv)~$WFHe7en*CS~X799Rd7X_Z%QK>WkjgyDMvnDG$ zo`W(ajSao?Ou~L`IC6`#r2OjoX9BK|Ip;svqV%qaulKA4?<@7l!;4jtdg zYg|9(sPd*>F5b8Q(Ov)S{z%sBMo#Nuy0H8*#Ohu_?(y=T7PueL4z#h-Om&DK~i z+PVMugx6aP#H_2&|9zj-Ir-$DdGDjYC0abaws}LJTJrNI_5JhCZ1Hp4yfoyLciC6F z)4OH%pM0pa`|a%;uT(DHZvAF8C{|NqAMUei)v{y+Qf z!pMq!@6LEPC9bmTUfS(fd1~K5Ii_UR4E-zh=cM)C?eCc~g?I1zqU-Y?G;z$(je6An zp}+R0&5QNtI9y}z#a`Kc?EdGtKaZ^UZohD;owet3wwP;Gz529A^|cdR@A)XdKl1zI z?~I+-t`^^!sO|sLrsw;Yti0&A_iO)E{ixMDUO#8kZ=*IQ12fI%ymx>2e{pW>tLt8T ziv7d_o9*>)L}uH+>wHsLe){zN>GzA`YrnN_jW2o|ySA`qpI(dg^yKyWiL>(eznUcc zE%v^D+v(j;e;lfJzVqL<=s zW#sC-m^GDe&K~tM)vA>f`u(~Z@i^NM(d!dLr>oq1{a&Kk;&fNcn-7ht+xcq!Or{)vy=v+|W1B@1#&=l! z(|&t4ef_6d@~~#+T|LpWyWM|2%xf%Kyy@QlbL&o-Mowkk=Nt2W-W*MX;l&Q<$;HU9qPAlUP1B3x}OO{oh*bVRr%kEr;Sp4++ldey4a zUfr{&M;z6>w(6ey+lu}d|6PCVZ;*;>-2b4qET?Mvp=W+|uTvJUT=wc>>Mir$+Pzbm zQ(7N7db(@oP5bwF%K3d67Vm%N>#MV$s`=z>%K!S-jtd;FyxF(^-uLflU7f~#c>n6_ zJwk$B8Nxgzo954!bPN74CwZHJn8bqjyM7x^4^O}I_}U!tcKiCdJsYS0-R`Pd_S*E? zl)aNP&tHsC%Afe`!kT-Me8;b;o|kOP6RD|;)tc8keeIm`{UMFD*0%ZjS}$fx@9=8+ z_;s(WL)5g*?52WU|NTCm`}#e~^Pey~qk%||Mwn1*J7dC*038i(PL_jC8xnLhxLJ>= zc)gze<;v4i&+xFG$J%_Q{;Jm}{_2%nUO8>S&#U(KH}ve#aG#={j{Z!bRm>lErF z9OO2;_wCd2j=EdSx#=YupBHLcIlPo+ZeY|CnXpWPyOn8Sib9A6XsZ69qrn3obAgUS ziayi6$8TR5(sDIq=CPMutFCkEJNsnk-zuJG{YUs)yyvg=d-qSBB7Z<#@6px#`bllG zSAU$UxG(R(BD>$OJoou-%w7ApGE=R0lF+J#U$a;9ZSPUN^h#Yl`~B7Xk)oxQFCNGE zsoq_mHh25^I~knXYqZ!msP3-&+2%GYb=&g<_Cf|Wu72I=$`fjH#}}c zd;MnZJ!Ly-MmO^XPu-14>)xaCPxyb|eU|I; zVn5PiEYIx9-u;Vh@|O#fY%;bTj+mWcZgB1BUc-}h%QrGk`T09!tN-?oyFRT^eUOx4 z68H60`N7(siDyiw$W|UT*W$=tmB?}N<^IJXsVfquU48#$Z>{~$Zv`sb*CeX&*+)P7 zS@>ba(qm4((jT-Lt{5sgrg$(guH9PB`7`OWzWWXrmRBZSCd=;~Ui#y&?V3F~`kYMv z=U;FTw&<9@X6Cs=lP#T>s_50Nof>whM6jLD;Q^zLNDnBc`yM)M63`J*W<7K?U`L3O zYvw+?(pmdL_Ds?eT9r51^oV}rA786EJ32qj?%5yTTB~? zEuT7l)zRa}>v=yd4GoOn5phN3xyiztjSU*-x7T|fl@&Gxwaay1PH|J~K* z6qp%)^^{uo>PeGdP0cLZlCypHZId4%`)m~+Fsg)1IOD-B$aL7rAi$@co8^#7)wH|$ zJ!>mo>jlLZ%=9Ps%dXw2|I+;7(}k+c*Um>xRu@hFcK*~a+y5Dp{y1;{ zC9he})U^KJ>Z?9W^F9Bs`Eu+3ywA4-JAS1<`JGeOf5_9~rBG3&55tms`u{Gd?T^T@ zaNtc3@BF%8qHIgaqsv;qgj*hb-qI}4ZMV>B<|@k==ZCG2uJCH9N{Ov|JG1YU#<7>L zxK01<;rq42kGGod#2&{AuJWG-_eEd46FzmVyh+x-Rxk7Q*00|}9CqveJjI|8pvda} z^5yk^jDa8bJ-xSNpGa zfAyGpb#>XU-BIhqcTc{iy>(aT-=DU>zK7q?Ff)oRcp~mEUKF44X)y^l}#-RHZ%l_$p8#|a)X7kD4{KK=XlbFQDIQfn6-y|sJy z&w^#kcfXh|dwj3ee&U7v(wyro3FeII2o(_HR;)@ zgPigP5#Lj0YJ)b>z7K!$<-g^08~Im%XLrxr_P)OEkbA$(UrCj9-qWvd`hQE^W_cwWEeG;C2 zt>>A$;Wo?pDZldX*M94~`)%*lU*GF>pVZybkIV7x7x=CG;nT%Cs^zKap>vt$aM`gk z-mh8C_atZjth;i1Os2)Zxqbgl^!JVh-tXVV-|B7q@BO~A`Rb*||E1T* zy^9T9v;Tis&#_Yn6BUhy~5RrAzRD^&EdI6TY4rK}t=HDc zy$^3PEWX&j<9?-WZ2cylqcy+QyC!c5TOT(mv&#MBrudR~9a&7}laHEVE&3d#d`nZeW+*dJzvwH%y!yISYud*|*{l;;gS1IskSs?Fw_jhUr zLbA^T&!3gs-#cB@eWG3e^!mjI&$X7XG_NaB-BP#SY4>&Y)<+e6%T8pK`t$k6X7k0~ zdd(6q-~T_!d#>_YA*+vy5uhz0#kCV3_TGJc< z@jRt|wovAF}X!DDhji=6~vjd%v$tf5~?Agx#+n z=L`62-#^vN#I)la%VPG|_6Vh|@GrhH@dS$TODwSzS#PY+ZaPMcU6dTN^0 zZ&|yw8kx-tqTdHvzCK)ZwN`X`k@Y=)?*DuHc3nN#7~5uNX}&>mjpe4o$=f0}z3Vuh zq{hP*R{V29%+9}mX3hKSRXD5edZ3#5Y?aT3uOq9jpM3s%p-$|-wdId8w*H#9H)mc_ z>CPs>2O-{z)<)jawr07$z1o_4dg|V{C*SXx_iO$Bu$(P5%a#Y#E-WnLNc{5KdgnX8 zv)?$52JYTBXU=^6fA`mx=LdI-pIo#1{T9WkFW#G9iQmb|-+kt9$*Yr|Uw)*ln%mjm zqmc7OWS-nl)#Nvq>eXM>r|!<*w0%|WJO_h+9lmNy+)}<>p8h6&R`2)w_IBshYCeB{ zaqs=UZ%_XC?VoHn|3zN7=(OKI!}t7+TkrC*=>N&*@3$Gf`zxSUUVf=wOT9d<|0>tY zpsi1LZ0afh{7vu0r)NL*UAVC~eAn`Mn`S+_zRR}oL)_X^>z=<6IO~1qlii1|YgT*sxqRa+kIpVK7!yZ&qO zr}x!U&!`(q*8jGxS#j+*=OKag*r@a8Jl}r_yZZI$7ylP?1+@Q0t;}AUe_iYA>aQzx zvqCG)cUJv)`hVB=qLP)TzOLLG)3wu9ij%c|@zuLAcUDi?^Zj*e)YX`In|i)heNC#5 zSbeqY>#DCwe|@;VWO_c;yWF{7HA`Ln>x%gzx%Y$JzP~T2v0n7=Uy$+sM?0_QthBzq z@B4p~<`-LC7r%PGvh!Vj)Q{G#zg2tXLmzIOzN`21MgLFr)&IK`{WF(|JpbGFa^0$) z%ME+;0;~6($;;pHOr|JmV$lkVhJW9F=16}2z5DCx37hNpKk7XBpDH2F&{y zAA0@C?yp}u9!%;sJv04S_{uwO-}@8}h<{o2FHg4f^W^8>-)~Vp_TTp6_g;Sg-@D)E z#jj*>cX8rbz5XC)?hCv9wtg9T@xQkId0IcmA*HtJ`>Q*Gw)bDR@A@)tZ)C>Iin$KX z--L}NZm8r*Wq#gwC+DWa-|Byfd-f-_#J!u?_xo+>?%!{|9uL&0`(L;G?;oqV?cuA6 z-ij?0wc}`sG`4vferw0x|EDG5m+nj2rJr^0^}qj4H)7wUO#FV&U%uvo(S%UD^M~)3 z>)sN+`s&H_U$%9vCDW?@*cQj98*NmUa{QmzG{xl&ZwuSSX{@p0VU{bpzpn^C`SUNg zzEwki@6lTR^2OiNCC>LuDc0am)lAG;ReqC|cfr*K-()m88ovH`=$2ZX^!HnG3tt@n z$(O&I0%i6j>oHbu*l{B?wMgRQwY}&2j_tOUWfD$)(^Fi&LwWBznNPR0>$_w%QyK2B{r_o<@l zor%%)C%?b#J8j8cJ7=fo^8arvqj|POiSXpi{CBr+r>)ukHHPKytCnpF{=WRe^pn4L zJu@q_ep5I@Os<>H(rw1sw|kHNo$!9&>0Pm#)UABisqe1)S^InU{<~jmzu)XM$2{drao|-+$qgv_`BWmrc4=;NpzOaesf-?EUop zmeAWT7n*k8+57g}_kI6kmi(W-w)$@T-0H8__oqq9*1pgCez|kbUU|p=_fzivKKbGQ z_xrWo%4HLLm)Cy3pMAGu-}}0HeYbdf{o3#Md9Mp!oL*Jo*Zt3Dp0DxG-xv4Se!si( z|AkM>%cmdt{Qce?zu9r~_mqEa+AF;JT2S`#->ct7Z%y73e0TMZSpV2-FXqO~{r;%B zz9nMWmIVw9tVxcue1*MO%a1J7y%_s z_C2rdudim^U4QQS?)UqzFIijVJ?YBJ$5LO_=T>dXkLnGxm^4}E$E>^Ji^}8PMof-; z8!_!S%Z|5J)8p^F-*>`y`Mve2XSZfwJ$hqN)sn9-x-zZy$8Gt^^ZxF4`%0Vo-<@C7 z)?d@RIw@*v)YnJ*R~KJ1+8BEBiKxf_cYP)AUy2?H->1KN^40TKcgO9Ho2KSBb=CD< z*SQ}3ULUdj{n{&wEX+BXJ$z;_-dc4u!fETTjd%WSTJ-&Lt)&FaY-iRhM)t4AOY10h+{@_q$gdN`%Hbd?`CNqBNKiwyM zcIzdz8sFn?ZQN>gEeBQC`-A4%ufD%J>DSXpHP!oP4Od9>SZtLSPmG>Dk7G_}o$%Hb zFOTm^d&Kvg?*q%}3jRkt20!ap%{?2o^hd|*psOO=4uwo#E#FiTSUdG!V)xshC+#2Q z?S4~d@ulJA)pOd%T`t*sU#}I6+qGx*XAT?tV=tb2O?Tq#tv#Rj*sZVXr!(W!GWo;% zBtEoU-%-e6&;I*q#fA31`4{8%h($IP&S^hCMz2>zw7K={$b)Jw)Ep+!9o-2lnyrS_mEw&b9Kk`>-(d+;y7OP)alFW%)VQA zt^eWbP0O!`DD%kwC|K7YxvFITpChMR+mD`R5jV-*^i$>hM=O7A@juVK0x~%`hUk_8~NM*?>YoE3L>_>l)PM$5j%8b9~^7`?UqqGEpjm7^zr%bIhY`Fk(Q z{{70hdw%P8#&2*^d2;FX$0Wri6E4Q>X1ROc)Ld#}zHN3;)mQya*T03eYs+7+)@jz1 zP~hJjW|!icMi;o8c{`=YkLjOP0+`D7-M7}RuzpnFI zx5iDzyXpI7c~hlX6!Hr$Gl{>drX5_kO2b~a{*j##dGE8os5Qu3$&?Nq((`6~RY z;l1DgymjBlMc-R1wmhh%?0e4PHEc}VBmV7QwE1h|`!696wtxNp_0CW3wo=NYP3+>otFy!VweOc1KYbtmy>L65>Fd~kj*{y8#it5? z_x^nSw4sj9s?&@9?VtYZ^PSvz^SfP&)xSS=*OqKyy~^AF=hny4mDamH6uqMroL)*{n_#U@$bK<`C>Vpem#`hcTMkF=#kT}?}uCI zP0cO(Y5Qc?k!rWoXa5%MU|?X;b)4lpIg7RQh4v9H^Sh`*7EGT*9Fa+3@@#m z>aBO8dVhJw&zhjGxiwQ`V`Ee9tqwo(UEMamMSGh6`9J&q&pG{c<@$*1`sbf@OZxTa zM(@ji8Mn9S)z6IZ_!6(9;nl0}FSfT5jul$}{l!%$yfhZ%dah& zn)>edD#f^shb#{{$$nqzd%gPNx&1rLPv75PK3Q$*=YU1oUEwcwYR%%%mr=fAk#Zs6 z?U~(gH>ZWa-}JsbWzUPhwSUaxzW+bFFK>(b`l&^;roE1ObZz&8?dt8%e>|#7WInLo zqnnxgSr7C1e*&9iqAgNI51qev|HZnK&%f+@FnJMY#KX0Ii%!`4fB2g^f8F;uo$33Y zpDW$=`fvT$?f0WgZT59*e*bf5b@cW9B@_Iv{#p9;?fSUyEXU=2vM) zl`yy1XbUJuZQAolto+FO-|-rM zo~bJQY3ft3e!4oGo0EfgdVApd$M^o%bx-~E^s~-7m$;+4?^C|>zb?3_98`W`>8=0Y zvbXwVpC;|^6)oF%h{q3`~F$#C*Qq(b-ecbyEUQH zzQ5nQt4XHL{f2O*trYhc)!*U!Bdn$_-FyC9vGwC6o2QvYzu#Yc{gZ>a)k&NA*Ejz* zQEPwmB(QPg-ug+uSxRp1-&tn=YtN)_-xuz8`nCV9n0}~LdClykMHWxq7-z~}fBfm= z)AxI&zwiFNJF=2RFXq+VPiuD8zKXr|E$~wO)QG>lP3yL(?Ty-7H2I>$^Q*kObieF> z{?UH*r%z`~v+w!yyomE)RGIltRKn+>rqMIcoRjMnixn>G2XW7kVwveT_1fzqt=|S8 zgx@{76%l>>i>axXPm9*0OR=YGT9a>id;G0DnBE)2@}auKT58>v#|fM*1q>Ns1<(C@ zR_@&_7Rq>OnNZ`+!qs0N1YXx|+Bczj-)f&5tM&YR_wQ^BIa8OL~ItRlK*+>{jq|t}=K~B(v$ErK{cwR?ADPd{UJ^u_>MS(LCkW{X^HvKFrwl zZc%){vhI!!Ddse_W5!A9509&J9$-|O`A<1rEi_y;)%1IQ!U-%S;WlhYua%a=_!y(Zw|LRtonjgCN z^J4AO^w>GI_WJH&WyhD-PY%5I-FJ=Z;{Ke)nNvPl@6!ekOI_V+D*VS07^di>w|j2Epn z@GRM{>=bAHy+7#i)yvK=&R;R^x_*O|$ujQr#V4H4=AJnCU~YTnosb7*j#VGJ3-6lo zCF$Ou{q^mP*R4NwCD}htzH)i?nHv4_%ze8zE%KgLJc;#4U(3y2Spj2mP}>*UijtT zi}u}FjF$I88|zMO%m@uvyX?nMz$hm&K}&?Im6a)BK}3NHS1ZdwSAzvwVq6^_KOYr` zzFx9l+;3;hs;O+DuJIEW9*eb!?=Jsv_1ZahwHIx-k6iMbVLye_T>Z<}6WsqdJbiB$ z{J1Fg@#5S{>h=+JD7NH}QV9eRKUZzvDS6daMHLp3mLPrmLQD(^b@FM$fnHjt$E_bq)sd zzfogU6cVpnSGRxt%m1Bc79=oDSoZOGpLs~t-!%t*EwR}vcfL`$Z#~;Gqq4Yl6W1>E zxVz88S!mha0Cv!R(>EeqtzC_Y3O6*|xLTQx1#-%5oj&{H(f4tt=azZThe~&$K)f?xVuD)dbTmD?d{VS*3 zf4O9Q^ZZ)EW#`xbBkkZCp0CrgSVEllrLW)Iaw6@&#w-!G4_hbogtOPTDHY$pa_8Cd ze|5nhu3T_WV`@;6mGWD>QnokuRNn0g5xPmsZf=_>ver~=t=qGY$=}Sfx9?A24`9}d z>)iN+`37?&hw!%KaQpp-WnR8rLKbY#}+8+?p9 zB2zSsgn5|`I&4^=VYI`B_m%LvN1cMb;Cbi{4Oeb9rlx`g5dmUCyiN}t zb_A>fO;9$1W~m_VUd>YyH<vkKcDUy;|}|WPjuDQ~M8Wf3^Gg!|&Ps>7v)>#m!ZnA8;Y$m95(8L(eTa zUK{a{~Y)D*XhrCffECjEH1v&Dm^~ed14M*==!=%eD-|%qisKxztvEXGjDwLyRJ)~ zlQCvRS<^RPmU~+`R>$ArV_;xwbDZTT+{Id&5nH#;GduRK_w;S+d|$`@+Zz6BNp;lU z?3LO3uId)v7ru2p>+#>ULAB{`v%gl_M(i@pioN=0|Fu7YFC()>$nUVS~T z{q_2y4R>#Cynk-%;s3>Vznd<)`mr=y zqx)}G^wRayfA9K~vhMlSHuKW#Yu3Noi&wp@kDIh^cemjd9`m`St8bR5J9Ue!vJv>M zdh(>2@VCCJXT$H8zP==U`v29KsK|r~cl=d%*39qz@n@nXyGqHmnH=h;7A^OF_SpK# z=TF`z->1YcT*~dZ<;4Gkw{P^%I?wF>Pyf#MTlO)3_TFD#w(gzpHLcTT@4lxd9%MhZ zklR?Fb3=Lgyy|0!jQ&6;(*_S@@P@s%ITbp@ZO{Ple( zxk%9VDx2N+-I5Ou7l-}WE(coS#_XFP#HGG#-|rtY_j0~&dC9u2bz_pz#Z7GD3+xWx zbX9!$uG7szK=Iy_xb-{abNcdUc~yR3m)ER2{Oh>*VS=V^w zou;Fl^n3N&D|hG~v%UN6I=a>5!r+4$yf-uqooRrlC!mpW^=SC;kT z6VVgemo0Wh3C^7UXM*Bp!GCY3PD$N8`QqR2mPx$R`W-B8Oj`L-a9Z2tX)XI#s&2Yj zotgGtw`0<$IbJ_rl^wsjzHF`j@3lXle0>@GU`5dMjjsYUPVeB9Utm5z|K8zAVvC%A z@G+M^owxbOnFopcxS!3-3j48cqM*SAmu`lY|6=B8-(R-x*YBJA-pnp<<2L`Xj{oV5 zgS(b~`X&E2uU}LDWXz`8|6jGgp7amvelCCiiOgFCJ)MbLzs5N6}Ac&^q5p%ezLgf^Ul5HWp!OPeNzLx(#k70-Y?7!WfRM}{_Es}-`6kl zso&)NJiGk;M_F~z{Y4KZ*=xzIGt}6zboWa+{p;bSk~8*%pLVkT;y3wFb$-v;wqM(4 zR@UGDoBA_x-EN2OiA&zU?D}}H<;Pq7wUsSAY8ATqRhw^I&wiEp`EF{km7&vONnwWTpP?kIm`g?BKiKz5La~R{T@!F`Vr72;t9r*P2<;v*macU-2Io`DYiamkYdOpML#q(dqv; zkDRMXIo9;2a{KBR^F!DD%(}4LqBDNay!pRt=GpG7TQ_C%fy1o(;(r$<%>1sq{N&xA z^-YiNJ$-Qh)mq7K9OnBC%+5@@Yd!6C&$lS&b>X2~zi_U-wjz0_lYU+2P21Z1k4_Qq z(>ZQL_dirpHo9kZ*DHSiVVf1sp06yH2V~T%FRF5_tbhOekI18vN0!r19sP0dzQ@kA z_-DFvzjZw?l-pQ-qtSog`|WqL@7`^a zJH_tCD{u467uw(U{&Rk_`hNGbL&^rHa-N>`);mAdWTN4oz0W3b^u^R~jq}{}WY3@f zX{tH@S~eGL+k5oIQdTDU=zCmWbnia-^?&#Kudn6=ZfKpW_x`>5)1Mk=vOb*5J%9b= z&Hrk1ZJR9rAJ+f+_t)_vHKU_RE&@cM9JW>kX_w;ZZ4n?4xzj zW_L-8{wMpzEBcyygneVg^A zYEu8doW%3*PtU$|sdiuX{;OiEbj7v5T4)JBeX=J>c>pS|}dtuoubB3Xo!it|IIq!j->3LqF>4-A(_~xu#qmZhC*_MS>8#)NC1&04b?RR&@2&}db>yd| zP1T=y@_#*wzY6T!oV&{|s=m-7KkZwMx!|-G`CZQ@6zOZNmwg_v{{Pg7f8HVc*My%g zcit@&*Jn57PyT)Hb^r9^*VnCc)zc4uF|Wg5QU9F!yIs$}sE6Mz4G-^9|N82s&-B-^ zCnGi&F5ANvvpK*`zx;iP+Z&Je+{cIiHUI8>y5Fh#-PZkaldiA6dQ$bqx8C-&u2U}m zcNTn`B$8{@D-k=v@T2&0OWoRqKkM!^$p)X#lW_=r^6`kx^a+!%<#{S)y}$VWYVe62 z>wdXb)y*hK>{WS_eeCz=l)Jyx%WwR={WLLS?L7NKuFPe%%#Y#?j_gcc5oVjq`Xke> zZ;My`zJrA_yB+5wHBOk~f4{bE#UXZqiXRgmu79}lNwvA}45QXP>azm8GiU(wP(w?(aWZ_tyQmdQ1QOSJ%h3 zZr@lB@rpSwDPQq`XR?pkuaX^hFWETR*)PA?pXAkfzxsW66X%=U`M3Ggc3o?76FPrE z;oNgsmN$2-HdWv5aOf*+a&oxu`?J!ccoJj3)4}}p0iTl2$X_o0eQJ4zciqMih0-Kb zec_-V-bU}Q*GFVH-@Pv9()iy;b)(#h28YlYj$djK(xL=!9XC9j* zVfp0qFCVeV76)sd&DWXFt3NsW(zJJ%U+xl9dtd#lOE7C?Qm2UFr;3*qiq>r&Hv8t6 zNwr_vy>DNK?Dy@(GtYPU{5I!}shX}4@y!0sEc53oQ|1Whi0wP4dcRWs*U^CJxV>t! z|2EbOEDc=LC28gHvd!R-M*j-o`AqB?|DSLEtI+?Dlhc0xg12oS)V{HN?mIPk-~TI3 z%+6^R@qe4@o=O<8u}M!B%v!}_skfuJOT2pT_dJbr|K5E2{6DVkitGEmZ#}w|{7)5|i-dG3Wf+Z(V=e*W?%T zn0Y;yu-a|hWB7MW*(|fC9A2{X=J$yGmf;`QzQ0)Yw`h<5?w7J!?^P>p z@-MyO;Ohxy2{CI_$SRq}GG|)VV~zhCGmbhQ?-Spmx|_de@74a?Eo*+8Tou21d+E1* zLIOu$MBnS{3BP~Q<x} zAdceDj}za2OSZk=a{OM*9QU*D<60i>_TS7={d3p;Nl&(_widJ9J13U1NA5dc)eZTl zEYX~&zfS)C>y2SA^Sa9?r|odbW_TR4#Ouqf!pJ=tM^5(DHz{o}yx>r`^PbsXhrT;K zm!Ew9y|Lx)*|W3ri&mW5|Ip%W{SKq+x6~r-Y(GWt-2G`|{_&|n!;T%lUD(Wi=PvSf zYfH#}zR=e9R?{hkKk>V7G=yv{GLKTy4SH_zU&e5;UirR*x>{@(cmGuve9JxS?620& z3-i{@`npnmmD=v7kIG(3xV_(N6P0lF4)rk^X~hd z>%*4yXFu5;sz&J=>{s@7~W7|9*9?`0v;1yVhOMXw#hIpZhLPrv6NeQ|TJ^ z!x3#y@809|toR(eMK+`EP;IvAK1=0EPd-8xzGfW$;$P^K zMHQuP^*VDtoct92%U8V-3s+vRQZHPwe801GZguXT?($Ec zel0Bcz3`UZ+xWY2f0NeE|5~+Bv`(Xcva5vAn&S7TUA%AY{8||@=Xz`w#}bpQ`}>nv zc7Cs8U|{>@ILlx76l>{`za{(akJhz*|GQfL|Nr|(b>g!{tIpId-yg9*V%_WO`@`4A zEV&!~wYYiy>;G|oQ{(m)*S)^G-h11w(;xRc^vv0kKUF+##y;!!dLiBK#Lc3V3%B|2 z`0ZgF?Cso9AHRD3`rr@W*4EZ-z4f&y_TSd9<(?K(->3g_d!I33&S&oW3%WC!8f}jB zIUdj17n|E!sLG(Ey;t>LUDRp*c=_+&WO}E^y?#04w4dvS^DR98pILr+75+1!Iq2{S z`#pD7yo-DN(zg2l?Ah;}D*vBc{yD_W+3wr+Io*#pJ^Ii!r(-7L%-+-dcecO&a?EA2 z$-WMklb>#%{MPzi>-QX8mhNRqYSVaYcCMKoaK}fgu-eMHyldh)?U&;B6!FiNxpiudhD&Ue@xx^GCGvt}vf@(feNQ-1$%GZ&c@l?pNF`ZgKuo zYASX)d_VbMYnZ3H<}!ixPN_P7*M(?i1pV3i>iLOvvG?^JO*Bk-tXuH>)|TnJZhsf@ z3w;{1Wcv&6`#Rh9-p{&JX#3MQX71TdWgKVE9$mS7;otjZSEk?bGc$9Fi^yuYvb8{O znNXtkx!CMapLlCG{;a>?C$vL8IdLHy!ORw(7&ay6L;a*Z#`Xe-KiIANP+-?PRJRoQdV{jNi=jDC*jtnzl*(?{^6*e4Avo zTy;udhnJ?gj{IC(o?UD|wyUP6D?B~@IdNum*6l}E`%IcYIrQ0`xCExY~Am zrN`%+FUz$bt?*X9C4Tj^{(4`%d%41Dfr3?+<}&fh%;Zx&lD=TFEYFAd)6I|70(LT` z)rZ{+a^#a0=KZtr*TX4S|L;E}r}}Gk{h1^4KX(>B50rEI7qsuw|49!81&(pV-aqGY zulLvs!(|QX*ER=7IfSn7wZ4>`R3n|$>v`~T<;t3`f8_*T%1nE|Y@7P_K-)V8i_eHB zJ>9(^U*Vv<-pb-bOZ$JX))QR3`A==;vfTfUQ7YRcxa)#A>=(7E7t8;zy}Af=J>aXo z)zhM1FZtObGO@?W_{RNPEO%Ab))u|GU;9|^R*X6KqZJydUR58~pI8`KxHQq}a@36L zEIGYh_P5nG{gv$tbSu1BG3$%*#y0{}AD>WPzd7IU;G9km+j}P}d#=7#Ybl$x!>de3 zWx|>*vKa}-W4sg2zVdzfm#^~m+ja@=8*^JL3~%jgo;9VA@vG+K%m3U@e*Lc}_$9t> zh5!EGP97}#aU$g09@&2;PPqRPO{GRI|FZm6cWYJ2r`5Xo zyZA3nQvYAO`pxT2vEqBXZl2n|nNO{LcJ-~uN zPQJE!@2w7PzL>U)|1a!aF1w*-Y0HM-+SKRk7b_`FvVW=y}yp$y8c@0 z(ci3fKWn`1Ec={s_EVkoG+`zt^EaD&JWO_YpRls3QuT@S%l~5i_~E9ux9PXPB(Idx z|IaZY&?agAuG9+)D=zEJ{V#f@K$qo)W&Tgw-CIs&&pMt}r7$Vsv-@vl{j){)!``cx zpMJC=`jzodz1ZlDxAz|r`S8NIXP?{mAB%Q;c=7jsM7qSy_UqTbU!QPv!I||R<}Grb zCg*YK{weM^Y^LrvuC%NTolt0d|LsR9o+o`?%zA-7!OMStQ4X)M-f7Ff?ECSS;~#~d zeV3Oo`Bb&t>1*9y!ItmB_iJ`nb8@LZZ+-bkJ?~@5tP=`Gm6`WfeL6pXzrL>4<&&>p z$4;r9ZdJA=FED!FO$m`w78UL+9#y><@DbD4m;NUegCk0^{OwWlP_^Jyrs6fL-A%!Sj;z$_0^x=DIHXM zsr_P4Q-0Z76UC=HJ)hTX|KfD~_Mds4`(K9iygHEEWG+dEeW*Gwu8ax7bVF-gi&l6>9x{(&qokyVruReixYk^#B6{%Vft{{*zy^mcQ`sU4MSo zx^;WAJof)y{cdmZ{k7$`r)#gCEc)N^Q++>svd@3r8!DR*yfrOfp`mIJm@c<`}?x3cl&(BbH(rBuJ=!V zPLE$HdSdqM>wB-Rx^mX#)Bm{daqpBqRn+bCy>9M3W#4k|!`3@a$KCVp6U?f3wkLIQ zQAOPCa4Vs&(iZDi#wlIX-nG@rds0$C{j3Xf>pJgDm?*xpH(q^e%HMA-+w0fIuAZk| zz4}Pho@rZk*Ozgz6sMcHo?0rtQ@MHZOMTb!M?FuMPv)Gar)F_Ve|CVmP3g`0fA4qg zeX=&TO7n8||2bV|8BrPk!**0n>#y-xQ8{bo&)3_<_l3*f^`DV(dtt4hcZ~lL&#Zf% zE^1;fZoiv6ZiVe||7XW}BD?ZX;mP>$hZFPO$4~A4b8L^*vL}D;AB@>j@JelFocjH$ zxx$j2{YOf8E}x$J`_+}!Y4Nq|)^xn<^=p~A{BCN1yY2T=IxffB8jmg97VVkxIlef# zp<(|=SL1fq^LJlgS6{z>>#IK|J~G`qe>^?EVPcNVm)+NA%-*}JwXj=tdAm^Kz1EtP z<8o_H+D_?PrQ_R<&K@1{RG=Au)0(EjB6-%Zlz7K=aZ zcs|{vX1RrD?Vf+V&S9(LkDZr`I}>xUYP-3yyxZP;dpCyfYk3-9T6*ovNxiwBn{0b_ z?!0BaKho`U?DxsHu7CS2P$}1Aztd|&eXz7u8}m;uk<0_;7iBeB&GXkS|8wYR@(nfP_8_C@ zic|Iq%-*;AdB+#i50kDh{~fU|F3Rhd>8cy&mYt1XynD7tT7T==`}ZFf9kTO3nlSBE zu!Q-kpYdPwR95MIy#IfN;+~rR=Gv|6CkRT^|7*BE*~~|trQYh{&Vt~oeR*5I=<_#uJZx^pJ(uJJRzzN@IX=7o`L+Ge%{M-M%In;fw&H)pvHAb1{y8@D zs5k!-`(9YQarUlzb5>8j`gP5Gk=@_V-&l0x-kX09W_^FS@>7>y==&)?Z(65r?F=a^ z;!>A(TGfuuzeHVji4~exM%>~l$|`7#uRi;}{Ka0+diPh? zmVbS!(S6U~H9mNG?f26+ZSE(v-us<*`hR4r?SJQ@PWA17y;puQ?2F}ZQd+p&_wY{6 z$3m;V+S}XpKYzd9e){+7koShN-`!5E&3b*|-&EQdbXE2HJw^?YDH@`jtSpBdH6jW`!259| zK)bJ6k0=|gu6p|RukHHsGj}?pW*Ubq=2QC>KjHbN52fG!e6-J>8|9o|^z*avgW{7{ zi(ghf{Z+{QgKbUUt4SM+-`+5C{BCa}Z|&l*za!s2qf2n9TvXqs_BVnKRa2WEZRNXj zKQ{j0tvBzbzi8T=?Rnkjaxi_<{^_;qOBJ{{-5gfN-fFrZe*WHcm8G|?Jbt*xfA3AF z_cbMLi?^InQaZGK){3czH>Lg(5LGD4;rS!p8MdaSVQ>7S4}0!)7&17C2HAILmuoJ%-H1BH zP^K^`^}A<9rvabHd5e4NDyn|WdRw}(mFHOPojEBJ`)c=vY~$KDUOFhS2N`BaUm|2mW7ERkMXp!GCFPuNE0z; zb$KW>y?x!ke=X16tKK`k>!@LIpt|#cg3phwYvOHwyqxdHdyD_gU*SuCF38O7pX{4e zuYdZ&0V$UkN4n#_&M`Ns`MFwUa;b;fodVW%#pl1=IdQqLnojr; zx;OH5@e&T}cEm|7ztfnP-NpWUPuZ2r4|v0mh6imq@ksD!?P=n;zZe-}=$i|NrNTW9v^a{ryIgrXLCrEGo8v7|cRKx9jrBJ;D#ty}-T zzQgJHQSUfYc|qy!It85_`)*9GKW)VBzIg(p_oWFND|arpRK4P7vcMbcrzt8XGOR2Q z9X13QhzN7C9CTdaqan)0+SVAz8F`-f^W#2f#LY~RBmRl>)`{p5w@-}i4t zr0Eo{bbjs2Vs_Ec_)Rl3=IJmb>4Yg=;xfu` zGGfy8_Tqi`Y?6oJ9+nByizhu{`~PnBnvlN_3+p%7R<2dGT>gnsb=fq@cHKk58jtjX zy1qonFlvc(XasSyvN9$dh%gW_YGrc_1npE6?C_bsH@#mr)c4eD-Vjd98D6h+lONYU z+RN|u!~Xw{&-Tk@Y98*$TPPoMdc7=fdx>av-}T6ke%5~l=lXxYeZOMO>aulyzu(@v z=Tsu-ZFcOPbH(4dt)lr7J9xT-Gx}6C^OTeXH@zyf{bjAQ@$Twq+qvEo#NIzB-bL&j=I4DrucGGpy7+5`hrZ?Zyh>fsdeSTW%8n(1 zM^;!cYKU}b7;&|>G!=l(dFNtvI_M}7&?d^o>7||@8NFZDziv-<)t*^-H%l!a7n{5a zDlz-m*SzETEA{l$nCy3N{wwpxzdieLiCp*6hj+HDznj0|YkBRz*1a76qjv9)S(Rk}AvCuB>-69M>;J#{|L<>o*xT%{ zb^HJS%@VjVk43r4Z)>f^l%D5Jg?5}rrXAe<-+b}rx211=b@uI5T|IwI{JQlQ7yZq% z<0!g&XZb#X^WKUNQrn*{|8z{I`ft)((cfYAaqFTFTCeUn{``=np6~10h~Ah#VcNf9 zy+baYd{xr%C9UfG3;%HS*B6%Ad)G8`^1bDKnOn6sX@A)9q|K&PN8|SDy}G|$Wv-(QPX?FnBK6kByLLg_>D-Me0`S$~#a-1SlIX;PSJ?ZUlx7H!#){$)+wnwqJv z#TA9Bw*J~^9dB^>`|s;r&sPhjDZleQyZ35F+>%ErPvYLjn%>$K_u=za*#sf8Y6I)?L}x!hPklcSh(h?^3i^Ny@g{dYfKi{G|Y2CFy&HA-x3+j%E%-0z3$LGru~ovNRrj(ddw-7o{OR9DUj_!| zdycaLrf9JiT~%LbtG{k{#JVSQlU6O;UVeA=^wQU_md(DlYxu{XWe{XG*=8Z(tMD??EN(X0bF?-D6@i%`>)T_hc z`xif7Uw<=UZe_E1a^l}8^^3pO?M$xC@^+r_wrbU?iTB=Rt-CNSe6`T8U3-)MW|?1D zH#sR$Hek;F@V8m{al1BE{lBrUdeXK1Vb;^HYi&&yJ^fm|-c|76k5gqYqC9J}_C`fk zN7wC*+w?b1IrP;bxvQ)GIx3t!VyVCGpy+8AKIXdrme;?ur@jwQa#8X5P}^zfeM_1( z>foL2<>u={@9m9^&j^XM{rB(ty>ka!=FYwHw`noA(Y=og7haP1%=PG+nj*{Z#gcX9 zn-}!7Ofc7#eX5-K~1la-}o$PyLtUf)#KugRj<~)^YP2xZy5Wv&805m zvy0Ono&7HFey`VE9p5qKmUFV*`+eKrZk{aoZ~uBj`O*t%vZqx~m+cEyPUc?yE>3wR z=enIHtA$sXKT!GCW0m-A;osw@&z`JjnU}BiiMu5JOj_e@@#+6Qoo+q<_1&~ro9=zR zCA>EC`jt=YykpPbx#gXL=fgg> zv)4T|EPl>%?|Ag;D<5NpR$qI)zFz0L=-0KOw|*J4#$SKbU46JKHS3pLP<>kbyO-}@ zuKsh#xPJMunbzVReO?Rhxt%k*D3KfZ>9gU!|0k>eG6}t1l{`sfN>}Xnv-*psYDMWc zUB9<)&ALtdwNJhe_g#4DtX)Y*@u;zSFK;a;aFwq zwbM(&RZe|>9s2#pMn2v*JN-_5ewtOOBVk;9r|MIO=B0a4b|=5yk1c%>E&Xlx?ukFb zpMQ^xTYJXvuF9(H%lE^>W3&DKpKN@x_rv_(UrlCA_nKZgeg6Be>-=9S{=dcX`uX3i z_ve?~pM3YtrmwGGpH^R6@krly{`1-&)9cp0j99wgWy;*eJAduo>Lnk1clFL!Go$xk z*R)=9|7DGm_kR6p|L;05Ft9K=&I%OqVy*S?{VN$ATi^f3n*aaf|I@Dicl`fb%c4ZyitpnH-^=}T_owUTFXpP-%rBVKM}Fi?W%u0p+M)j2t_ekV{#MI=I$j^{b@%(Lz4x<@t$)$_>ZWMt+th;> zZT`ev)j!Rm*0M)>HB;^FBM)sK{_X4Z-XCTkHUDb=((&r{^f~y|J?fNnQn%;Z;|Hu1Qnv`9V zda7{qDd%zTdB2Tkd0moy604Mc_g207(R%%bwMqLYFZocUxBvP2xYg5YXScJeO!*Ki z`Txx(j@upEcK!M4QYvKs_M}iydX)L(2m8OIZWgwgH+`RH$Be!`UoS~#{@(Djoc)xn z_OuJGC+6HQe^LGCp7hVk`hPFx>8*>|D?KOa!`rWa{hQYFnRK~vGEZw&Zeeeo<|O`b z^6@{qjh{8V?+3lgFtWPwD)B?Z^v5OFb%dYnPg|q%JLr!@P+v#)a%qRfCt~a$Ir7_y z_1vC)bo$9Rp^Baj%^|fNr|Yt$x@J9DpY&_uuZ{ORC#U?hTb~iJX5Y7%=c_lZe=EMJ zqRz9j`o@lonLe?>$2{gwnRWJC)kK#MzovxQH0@+PWi9@9-sRZ0*VXT@+Qhpl$mMU7 zR>bd!m4D~F|M+qH+~|2SqIvh!b_*|%F*L&{ENo22>aC*S`% zy8OJ;sllbo?X5L$-OT-^<@yOd(f3Y2>)ZG02$%N~uiBO230=QmZCxg|XP5oeQyzOorc|NAI6&%(*5}oxjelqORLC*EiFpz~PiW)5R|1)~+?h#FoTneZ zXlM8D*ZRY5nu}Yv%J2NY^6uIB>$f=ZY%vjFciD3!^joarpItUCTmGC$eZP@4+jOqm zr|NxA)(6_|SpUxbOWW6&33DzlypgwGz3q0&oxP!@dj9PH_bq?EYwhklb(N1S-~WHK z^nAH=UCVs+XZ!zJpQ_&W)BbGr_mx#!!p&Y!{{8>2^yly8Z{z;{>${j4dh+?@zjd3I z?fd&@-k-TUx3AdHa9Mxa#h?tb|E>+4^Clm5ou+Mab^z5dC!|A!<`7-s(aTfM(yeaw=t>Di&JTbcgt z5&cwEq1ZIhYR!J$AELcC+r1B7I$i4D_rCA+T1VBV_g}qw)%iE8`~UaX(~q9iw-Pz` zUHJR$PseL(zqRaY)$NO0`ZBNR`>msQKWk5|y?gcZ^@YBxzm?vv{noZOURYOX&Blne zQ9D)FRq9XQyZ>pqqIvDrg)>91W<;yp&9c9~zH8R+z4?;@?|Ezw{(d4hW_xAn{?}J# zTq@U#lWu$Ze&73jyT87lymN2d->T?GSNr1CHaOpTfBt^$_xroP#ztL<7Y@I*`D^9o zihr*_`!Sq%pM3DM!YIbl>}Eqe;YS^RKTrSJt*ozHgtS z@a;eEbTQT#^`xVJTXl0TO+P5Iq|5te@s&rGoBZEidj75^l>gNGODpEZ3oftg{`S+! zGk*2D&X@1v{A2d&wt8N-(%W}VIW=eci91~r*X!GQxJ>BT*Yx#C)c>P@Ui<1sGWm4# z>#q72yZ`5@_w)Lf*b1Gw`Rb2FW`6Xt`@3(4?hK!=zUcph7N2QN)!#$zUbBz=8Jq7@ zwe$Dulds=TDK&~){od_f{C~s3+q_9@PcGcED=uFA{)9Q|RaVmXS6^4!e`mg5wTOsC zg8AKbzu!EMKW6;(W$Cw7MMdd?xu^bCuY6XR_GOOi`Mch6O*$QuPX1nJ`g-Zb>!ISW z_rG|^`X%CFi@$ZryzGbT_Y15xKee-bY3;(dpTpOd3r~M~ZoApGADXVVi}ziRviO=_ z7ap_4DUobnYzJ8t1uV2qU&i9S0nzH4lY0&{jC6OsAB7&?=je!OUS`yrB zObZ<~1++wjS`IY^E(lR7UuRTYx8?3<!ahv0?s+HL>km(P9`A9;JL z*xu^%c{(<;4`f*_i~lLKUo88(QRchjYqr%G*BL!{WG?IbCHU#I>WFofb9a8>dVF@$ zN^!HlZ(apA^Bqq=vuEC}dfAt*x&Jg5+?abr;3>Pk{QQU)ah8SKGA*CaV~)FbL#uz? zk3+f}Cz`!q^v-4(LrB`8V|jr}7gbHximVL!Ryp;Y&YOmt0R@a&B2zS!q*@L&CMu+8 zh;y+VYBpGKMMH?QmC4azMSzx|)heSsmG^$U`@AYD@5q{|Mn9SgWGv+iA76d<{onq= zYpUfRrrUAb`fu6K_+$C+lKk%>_VJHi#_eDIGxf#4XGUi2&-GqTR@eT%{iNEKwyM`L zes5LJuble#Vx!WLiY4>Py?^u1Iv!&l@HuLS#-Db5gE_r-QgRimrfhvQohQD{Z_)Z! z+pAtPufBbn_g&os7SrVkP4g2?^j2;c5cBWgn-J(`0P?_u77=0CNP%W+Q&ZrADH_Hc zbETfz{&;TnaqY*TsL86&ob@|Btz_bxAKA(C-CyzE&-J2A|BKK>|9SuTT~>U$skqbp z@TuD7df)Jr|8BS2+drn5nc8ofn$o%N!H2@PE9#gI+&Xil*5s(~opT*o~?nnEZRRyYN zE4#OA83h?T^eP&xnm&W)&G}6S9x!TzOh^%tW@S3uupvQ1O^CCpB~UR$seFFrx;yuZ z-@fjvST5?(>3hmgDCEN1KYYIbp5MLi@}yesqwW7+q2Bw7O~s!6(a(Nxqj3GgN?Etn z;Ty`sH*3#nKD2~!P2rwI&Rp3EN8}ePpU?c_!#(Sn3eSo|vU}fe4OqG>y~F%&;gy4W z-D`7p$9^n0ICcKo6^E_`?sLz3cSTHZzwKA`UqARBc3m;osSN#c&T*c@ES^NkAeZby zN!Is;th;q@cHHF3&5dhq zgMJ!)^T|9MG3~I=B&+RV_cv%qW%PV0lP~Mr_V)eNu7jc1`VVe+&t0u2{d6YlCe^ih z5n{#{iWo$=Z4PdmK3CId(xh1jT+VqnF+S8Q3Q3e$ez_$0&w>eBHx8?QJ~m_0!n4O* zB=((td-qhZxoK%rRm|i$1>&5mH$J$#hrN=!GpzLHjNYZO_D8ckk&gUPyZ^5f2Pek7Zdo13+Xx0Tc`?I-u z_AkFR=pPO#-<~z?s!f!2L4DSojIHeJR9z>n40)=u*mRA^UB@8Ln4<|6j0Pc7G*m(3 zPX?edLT*;@Sha^L7fVY^!h$PKX`4Lnms?&ppA(oG>w7X(IDESH!n4Qs_#Ia$(2HMl zYtPYxcLls3^}e~gIQ~y%eBQ@-X5u1$J5Tw34-7t?%p&!hD{+on$?w%Af7gAE>Z~cd zu=}@IpR66Tn8a~Khus^0$Is*ND`hdw$gwTYebS&2m8TNgqGYc8Ux}yIZrTz>k+k5j94 zM7mh(J^aF7$Nt+}x3z3z*sJ$fCs*w+x^Ew@wl)0L-p}5z@0+hWu6KRc>T97VKUMUf zjG6sQIz=46+J!CbBX(Xh5A%=z`hNEQ&iPl*PmQUKSvB?PpZ)hIBq;v3+r?sG z*mS-4*Qet}zqja~e0Q|Grnltk>q|LzL%%+k*f+6!|Ne_|KC`|EzWVxZ^~#9X$A!-w z*I8*ATqwWV^W>-hmA2PjzgijjWv|(5@mI&QBc6XPuG<@P>b{`hYpuIgewAj6oV<&F zU47j7C12^_CyBD^jD#2emtA_dqwh())mQHr*Eds|@^`5p%f3JDkjs&{CyNC9^saMS z?o--u&ilputM^*23w{(el(F#VRJ$I%Yu`)J*x0Lg1@9mI99ncS=I{0QPa-SpcGjMZ zFWYHfzgvB6b{a>~uX_Kx-zVQqc^8-W?oZubmeiW*fAyZLd+ja1Z+rIC-JgGNIBdUb zUh`j|{cG%>NvDOiO|O1B^8Nm@f~NoSze?tY*EQMJhhL7hy|1$_|NZp&`+mPyZ{K}q zs`q`HeS6DQ_BmZWd)MmUuiq?eg-j z^)znnyRCKW!q>X|+HvdE--5r{J8iR9uT6UYY}fVO5!Tbsr5@ZD|2|T0{TJ0$|HJo( zPY&(g`g-e+JF`~XUemrJ(9>)Be)40N|54Y!R(;W3vNiwu?#Sd_nYKymQdfOlw|i^d z)0NeC z_|*S>O@#lo%r03TE z`TFyOztZxKuNwPRu1$M6!S`UT*F5XGo#AU;Qr^t^TfXi0zdr}HWbVJdx_)xi*^S0A z6U963`Ruyibz#v_tuMN&L2~&%Mz1<&t<&}~4SJPkGo~Bis6jB$l-c+3=KkNU+jQLgn{Oi5HJ=#~kEAIKa z$IIud{Z&lPm5pC?de;)CdrHE~pL~k*y=A-p>h4vZa<5gcuRN~5e75`j(pA>;_9_WI z4dnW}-qSNOHud+_D_>2Q>Xq-TtpB(E_PbZp|3~cEcYVh^z4gNsb$Q7LsUwCR_5bsOPi3uK=Baz!=nSu& z{hXVgElDD3YD%SVYx2#XzpSvjqZ=zLu{(Kl-}zd>k|0&@KfG$%J}7# z>+7of^QO(8t8(3`@6DPXt_k(uPNg)@$v3q1U<`cqc(T*t1a?M=ng611x`k|7qP2Wp z``+3o4E%=wJs9}E#Fa4ccmAKiATRx2fx-0e_dm4?3=9ml`yC(unZUrn@LT`w|KmLj z3=Fd2Z||Bvz58B-0R;XFKQQ?g0b()ydwc1B2?K-NopLajfq}V!fq}O zw=5$CBys%IyXD7Ey<5S+7(ao5eQ5&&BLfJ5q#&3PLcvKiz5`qWD}$MVfk7fOKcyHX zE6~u;z|P?R|382Ie+C9Nu9Bj}Yz78~n38O;TNvSPfjNPFW&lVZ3P!UZ)v*i=AonmC zFfcHJ7!0h-a#J!B85kH8a#O%|Fu?5q`*&tQ=n=5#iWw<6MPL>K14CJ6N-B&ErCDQh zGxO3I7#Kv#a=`{6J3}P}A}5nllnPP<(gTv`DJ{wY#Tp|61FLLtNm3321H)_xJz`(%FAi%)D%zWZw(mO^5){Jv(r$LSs zO3u$I%1h2kW?%$E1_p`Zk`i#dL7ldYfq{Dm0|V2V;*#PjBwLp;FodZyFmNj{FtF}n zU|@a3z`%Ntfq})Gfq{jcfq{7&0|T2Y0|Rq00|ToN0|T=f0|V1b1_tJ81_ov?1_tJA z1_q{W3=GUZ3=B+F3=B-i85o%K7#Ntv7#Ns9YU>#o7~e23Fur17VEoF!!1$Vhf$AKz`(eZfq`)X0|Vn^1_s6+1_s7k3=E8i7#LVD zFfg$1U|?XYVqoB0$-uy+!@$5*!@$6^nSp_`hk=1Jh=GBJg@J+hAOiz`2?K+G2Lpp3 z8v}ztA_D_YG6MsTAp--q69WSeF9QR=5d#BPH3I`LKLZ2T0|o{zJ_ZKvYz7975(Wky zGX@6EXABH%y$lR2KNuL;yBHYQ4>B-ter8}`KhD6wb((>J>kkJGmybKJ?yBQc*vKbhd z9T*sx4Hy`h?lCYhuVY|fJoo=jmJ9|4CJ?=qfr05J0|Qe%0|S#U0|Qen0|Rpp0|OHq0|OHy0|R?K)NasOG+Yb} zOfwl6m}MCl*pD(Wu&!ZXV5wkW;3#BZ;F!t4!0{47vlc_bg~OJCfjg3cfy<16fo&H9 z1M5!)26jD&e^`z&FtC8c(ij+63>g^Mx)>PP&NDEu{bOKYna;q#}xr z3=B-5G*!yLz@SlFQk)D;t4z>z%mSsg7#J8}VmpdUlJlYJK;kPH7>b`UFm!)mU|7Pz zz_2}qf#Jkb28Qc57#N-`Wng#{&%p2_o`I3Yk%3YEFau*?D+6Oo9RuT;WeiNr2N;-a zf*6?6N*S2CWEhyXEMs7Lp2one%*enTyM}>z-gE}$S1AlEYFrE~c9{$;KK~h5l5aAw z)HyS-%$UuAmf{L9@#VI8PAiuD<3xpJ7$q1OQ$H{; zx@5C9G&C>?cCa-xG%$ivHd+bJ$jZO~O8hLKT!B=AgTxuWGcYjRW?*1=$iTqxl!1Za zB?AM)3kC*;4-5z`$?~YW^Njg~`Ccu$h5@VJiaz1Bic) zfq`KQ0|Uc0s5pq;&cML1pMik^M4x3~U^vLYz_5#ffnhmR?M4O$hJ6eS3?Q>X;vjQC z_5Lvi28PoN3=AOlVFm_<6ATOtm!KHrhOZ0^48Ir{7``zufWz|-0|NsHgZu;X2gvUr z3<{@v3=9nSp<(llfq~&a0|O(d^#jt&0cHPXU;u?R$ShDSGm0>PB9DRLFQ`Swz`!U7 zm1kvOVB}(8U<4`p%)kJSo9_@lNH4<=1_m$&#mQR+28PEB3=D4=7#JQwp};r$aDk03XK-}%d7mI@101IyH-Y>G3M)`L0@(#}D=065nsK0b z2Bkkx`T&W6+zhez=00Iy*r>?B zaA-LL!=*k3hWmXC44*?782-IrV3cBIVDvi9z*wijz<6{a1LL1g1}4)O2BzjL2BvjM z3``H#F)+*KGB8K5Ffh;bWng}?k%2{VCIgFQIs;4WF$R_j2?mx)Sqv=8(-~NH)G@GJ z$YEf4&cndUevE-NIEI0>WDf&t-%JM9b&?FM2VOI>AHq@5x<_zSy#6s9m2P|rM`fFy!O)AcO$b!J`6X6V zvE>y6Rc?I^MlUP3zO35z3O}mZ@dk$8)$Bl^+MVxlp}Jik!0285?)Qyg)U@XV8p0<2 zsc8=gHShh56}9gF3PEiLzQIxZ!SBeZS0NAOvA|ocZ5z2CA~{)c@9#2sT8j z^X&hwbN{=~|L+8&_6z^pFZ}N!5%pa7-*e%A|9LPv`@iqZ|K5`b)N|s0&x!wi$N%>p zLq~nbLE><-|HS|P6aV`l2qJate;+o~bNoLVLQ~d%{67?d&4idd@g!7m;>rII)PL$f z2%Y@j4?_@fI0-QXWH`iCC;s;y2O+2jNCs?|ctObvxeSMB6{qZU0cW z^&K45Zg~qw)mvXzZ+%_0CK!K_cE8> z&RB9Ieerd8Wt6k*4!knTU49o*8Rf0Gho?S*)JK|(4ftd$hFN|L;bsjCxOkDkJ*WM}5bjRS$AG2B~ErRSc93 zE=%E+Q9rDng2=+_uc1;OO}PTCk0xLFKj9*%c|gVb2un=_shhxIh*=-?!K!AgwGU<` zg|$AKa0*|21gVUAj{bu%pycttJ;(k)s-&J{f4YwRLPn4(36b4Un}v{E2`6#YL=bU$ z)<-o+l@X*?8aee5sA9s=Kf+oc6|8vxsfi#Y*7^wEQ7T^l7^$O#=o}$;l(1GzkjkiH z%PZuX2*M_*KEhKaVb({G-VlU@bd-?U5D`c#sA(Q^1l>ye+Whkv&n`2#_n$3f+M_v!!Lr$L<~c>f6AIf5`C{iCMSupSe* z4+ZH-wVnj^kI=eEooD|;x=0=8KwTvA`bXX8K{Z<6x&J+9K%F>HR}NeyL3&0#C;oTC zJ4Z+V_aY%sIgi}^f%J)xNn}|hHd^PX=Q#Fm6v%ue)B0h`AXQTTaZrB&)DeJnj?n5O zxH6DAu$~UF{UeCI5M9V@hzL>_2}}P7saotm`5#m!p8}I-LH(mi7qRz`CSCnM@yh@H zs~|Mt%KzSr|9j8>?>PtRTana1g1841EZ~X;&VclfAS9%2g0jKNkn1CO=LOOjAgN;N zKLP5d!TU!OPC>g}a5LcyXzg?Cf6vi>Cp#T__;bdGYD-_BWfD-Vpoougani?63Gx`y08 z!qr8}S#byJcpaoZ%3pmSt&0TV(zHG*-~19*CB1-DLvRvaAK~aCq4bX+V|S2V5@y{5 zu9Du>?Rp2PcaTX)?Sm`=VZ+lQt||#@l15JaZ3n)=t0X-2QP+w89Vh;Go%-K#5`-W% z3#1x?lf=|V5RGl8Ktp*QXQ1^FCF6CF`l#m&sIG+ckKi>Cgh5h$1TSYmCC@QL{ROHN zU{w+o>m$$*)^X@~UGH(|xEZ`Af-pcL zPz)mVlJInt&}uG-8$gv2co-034m@GN8SrYT>+l~0!qy=|tA;=W5lB5Fw9JS#!?y1K zid+q0)Lc`;2vRp;tB*?8KQ3AKsA$cDqBW0-);xj?))lUPh;6X$0c5ZaUK!=BxPx3DLB{Lg z^-;=#D`@o*gbN?I19gs8+=JIgkXk8k)jfDkgvNl>L=Y0KJ}O%G2)SDXXT$qPr5m3k zBci)VFDkZx5WGHuFefLbe|aUqpav! z^%0znmUNr(F>VH>N@wIy#5iSCIU@Ao&Dc^28>Sr?*dO4L24pM=cxPm z|L$Xi5u}y@DL($c`veHVOP*d>O$4eMj)7)FAd2B6L>vnV>zaYeG$cbH6%Dj}22Tk= zIzAIm{GWK@fB!Kg1nE^l>JeCd1W^nkkAbJakW9wn28b-IR_Z+tn$G}L?Jf8Wjjeb@hYU->`b5~z#R zcao_3s1IBXK`R2J>Jm~J^_>J&M%`et=M<11q1ge|B)f~K9flL>I!W`mjq*@6g z2vLbfBKM>~QYZiSo%jba3OOb8BK3(7)eyW&0@p{73J94*>k~n899lMnaM7|VW?qJf zz%x690k3`FlSB{^cx8mOK7v#_Xe6i(+V%#Stls*%Zrhu>ZEtF~zD7fk+6P|mRBd^M zRue&_(E3NP%IJCJ#^;dPB4iRabM&-i-4n=oSMl11g{$vF5RPi-0i^CISaAmpLHb0H ziY9yM?W`rYvXow~G6T6XLSy5pn;?}DTJ2N0^(C4JxF&+Ec>q;LRomX6)kJVEa%BYXAJy-C+py~$ z8iLnB$P7qoL?a<77MVm#fK*A4Y6zKxbd(@nr1rC*DhV=P*Lfb_k_Jdms^i@McJNpqWW=ux zRzG%uU5KcLj)Mm4x{m$tI*J8hucW|Q(CQ;lAqFYc!6dvIf-`W{0?6f9AFQ_pSBzO7 zq0~f>T4`|9N1$%Vng0{dfqFd?FGK62?yK1Aqy8KJd$0YUe3hE@5vbBR4XueFY{;k` zgoM<3pi1Nf`l=O36o6t7TsJ|~U?nl@BhWMuY#9u^Hw0(EE2GXszu^dLy#uLz;3Ook z!AVFqgp-hLi)*+8t!D(ObdX8p3J8sj+$Tb-gCJbwItb2&)HrC>5S$CCZXi7%NM!_3 z4-o-ZLoZ?E^9t}tUHSUQ$n{a>#;3@Y5zlwFi(&3bgzHTp#5xzYRf< z$|!Hyt?Z?@VU^LX%*EGJ=U+~lcPVMk1$YI7#z5{OK`J9yee@s?wBqhT{_2OQ2(LW-m2~r<*T>Rg05lh7csgFQisx#1;EXaDl zu9N@a2&F#4Q5kg|#nEGe)JI4psB;9V6u^~H-?9Hamd3%4N>CB-G*)0U@n@=yE%7`8?q`s7E&O_pqszE^g8jSEk8^MG3 zgL+RORscc<=pZCy?h=`VtoDT0N1$0eu#cc23<)135>h2WV-u_rA_5_?=mM{l!>oNE z)excrI{2#t86En8wcddU!ZR9#0m*%E5|TF|H4dDO%s|fL@Qe>(AXhz*dIur`VWW|d z+6O|y>m6i9_14!AE~Ij)hINZ-w!Es@@~U?0YY3wTwyX|bE5X&ndr7d$2sD3GvFTaG zrf1;p&(qTNPs=wxFW(62B2^-h-meX5o8b# zJfj5dA3^FPG!ouRLSnqB-TAg21HmgJGzKI!qLIky7HdNbo&_KbNbYDm@C{NE!AZz; z5~MzAJ@ON>_PgcCzxLz*+fRZJq&{jn1*($Zl@WpgUJ8!fKO$Hk!K);!^%0~p0;xF* zo_+$)WI<}Wu9N>ePk>Oz@&8@NK;0t9$Q#z45mrG+cL~&)I)*Tz3p6+bZgPWb5Xf?f z?xX*^k3wrDC;1ywW<^^o!qG9m|ILr8Ev0_k22czrbC`2PvVp!E@`Rss*%A=iw( zCqT=#k?W)GtN**N{_nW*zw;^xL29M$>;JoN{OVoXx|XVMy`+G zRT88of{>Us5n6o&awNEO1g}A`R575~L|G1pzB&#ZJ5UqAD-(K-{)1FN5E5PubsqW+ zuZG$W{6rw!6GQM^24O(59-Kt3cd%AT@Jx-I*&&q;YNCowFCYv^7YWiif{0XZep#{U1-MoM^^ef%BajrVGJ;e?rE4EU zYNC=gk4ix`(Iap-1XLM8dnn-g2v%Q!)^g`9yOqEEHl!;AuZ9Yi-O5{X1BAe97(kWL zvKxgO2n`(}RrEb)NxEMZr5nkje;B6LlT?-+2Vf=p3jHI`Y2@jxa{>p!E?%99$)J z9sdtOpy4}6j|nmlg;XD5_LaJif^3192B{OFBt$Qa1QpJx{VYhu1R}wuB8YMP|HNbe zCmj6`sxpp)NJupS;vNH4A`_5Ec;~412(+$5b{kw4!stDM)UkrEPKPb10BuA%37ty< ztpfzFxad9ef6~SOJ(vIYUH#vMiaM|S@4NyM?6~&7;~J=Q)N}oR-xW}N^+D#mVD%1K zHH6HCjG;k9AaxKj*>wuEUZwLiDAhsw=G|xicY`;?^_~TFsUY=H-*JouZ?N@mkm?Ci z*TC6u21FcE`#{)`3J9_~4o(uB5`tDt;93b%@1T+J3JAi0WJb)&2wwYOGKj2?(5fd$ zH3T8yRSy~iUi%<3Ak_+T6$2N62!iVx=%5^23R$pX(+dbHN1d`S-S85Eiq}0aUjMXc z-IJ2F&x+SPEn52wgw{O)_lzDDz}7H82IU|t79f*E`QT~@(ldh8M-Z}L*{%GgH;a~m zQ1S9xB`a}K)O8(DMC7tKDA)V(91WUf_QJ6bga=hCx2!nM=^UQfX*CI|z$ zdV*9*xT+zzI9h!KseLfX0j!T2_I!fmg~q*~A+=KT{;!Z)sRh%lK zc_oN~cJS(j4x|A(mx{I)PD?AF(KDP5H_Tu={x$LjQVKe$^Vm1fzagBpe?bJ zPW_((Mv$FQkX7G(Xa7&W1R90w1y?=DRT88=>bwG~j5@FX?*XH}E1)&}`0FF&Q9E4q z5oETg8@wGGQU~>%1Fa;7kdS?6@cAR;`l##hA4tuDNn%PtYaj5kgw8|15nS;41W0{E zxH3BU9bO;7t073egGoXLE+BajxjuqaK=2K%5H2zaua6)-AP9+E`#>rkxCn%cOd?l9 z$W;%V4e92<$K2qx3?>6o@4%~@icK#-LvY|e5kwFo0wK#cg6f^(^-qh}KLrg*ZFmW) ziPk-b)IJ4k9u=*AQndO>;hLu)1nwLatRYYz<%6e+kh@5E;K90*RrktQ->X=2zhuSj z%()jb=A4i2KjdAz!8&(=MPi?lcZH~B5|>^$yaIwVvX|b5uyetkBUl&dUJhts1C+$p zMS_mk;p!g|>?^@*A4~?M8iJFQ*GE`;NtIh)L#B`*B&==%9qd5-cwNJ;5AgH}XCS9u zI2)4YA?Y8D#H^1X6%*RphStM>(dr{eAE)UwXtoH_KSHjRh^dd7!6!Vlf$JkkFRAVP z|F-l0TQ5LS8+alKbZ)~r&|wacnWOGgpxGiwHPn3q+KcHr{=XAa1;DEnI0LyR!o`Lw zh8S`TVJxVZge-dmG+qZ8wS(3>NV1UrQOD8$9dL7S)JG5#kb5ybN5Nx*kp2;v>^}yY z6@rWt!b!w@5M+i3Y1*jg7|3mCX7(T{099Cr|MwjM84It%AS>&pp8XF&Q_lRK21bxt zX~L=hlg@xnW`Ng6klqk-mDF<;w1%PU#{bUip!Icq*Fn=vUFV?D1nOXdmnAwrz7z z*#f7u31*@7%1$|oR`Fu`A%aS-Y~of-JUR?4Dv)&!a1v4zK}h8K2(wm#I1)}msv%?& zxdK9CLq_c&B(70AbyF^b#_M`vqjTNY zKx1@}TB-LMs6Ohv{=e(S{|Pt$_ul~3N1($uPJt%%dQSd_Ab5S$1>QRVseK@9lIkN! zWz+#aBnmQK2kL4a``>jKG2Me!(ID4BXl#@U2x+k#NF22eqGEm2xc4)p0)mtEyFWru z1Na0mv}y>xkQ<)OaaBghH4daELKdmn_6E5h1n&yLYae*k17SdVK$s-F#(^`cQ0pVm z=-hh98o4K+vAA_lO4ozN;tIi)5vWfD9+xXz^E7|eV+hJy`6y@kgPaxjz>_kdeWsB5 zsASCpWCW_A);=s*_o#Hkqw-BpO4r=WS$s8p=9%QFC!)J{dsnS?&Yx=>->c+ZA!MDw zZyd|49>^f(z`$?Gz^TW;q|U&k%D|w&z#z%MAPKLDAPh)N1R+VPk05I;kn1A|8?7dS za3M7jgoM{e5C(EJ1ZP94Av6+RD?x_r;3T9Pf^Z>hI0+uLqe*=Psg=OX@W`o;;8_4t zA7LG`gOA!ls-)I~-_3Ho0*Z)rfA6VCa9aQ!7fd}P!u7egZ zbYA=4c@4zqxbeROTpz*vM`$yBkR}9(JPBH*38{}DBxphnatsK#(**A#K{g!qodX>S zh1oyqIR{$j*?khU^0WKc{|==32(8jV=7K63w0Z}L3ty-JV!)1eM6QV-Y{-&1(5WU! zCqsfdN8tVuq$`A#g!GT_)JOH;1L)A|BX~6gXAn~#q18mll@Xo_2vQS4`a$rT2vYAL ztxo{;j39!g>z_jUN0s1?5`1*76kHz_uXzfY6#{jRHasm@|0I9iQE?s;v zcjnQwN&BK2R=MR&vyN`k^DEVGOp`K>5Yq7Al(%P=FkukXW#Cd`V3A>9mSkWMWnkcE zVBlk5;AUXpVqoB4U|@w*LvRvO6G2F-)JKqB9;6zAlaQJSPC{xTG!kAJp)rtaC9G8v zr1lx2^%10E!c{B5Ya$2(QXhea>|nD%>SlSAk=)~ ze=~SV1EfBJuWEqIA3^FP2nnf`poW0ENG<1}^-=qI&`A!ErQ@w9|F<9e-v+KeKz)!S z|2q!-Z#xX_l7I%vAa%`QXw3rY)PSldq}~w0`l#bDtd4^WyMgNz=qhn=C4yY?L)p0N zBlw&VgaIjgK@||{EE1#!K_>f;{s$rOs2#laflnks81TxdA3Ved(GKZnK@Px~a`yl9 z%OEuK8mNAmedGW1E1;UF_vHWn(@1L(kNxjE1*)y4UH(7o+W+~t|1Z4vf5F}Vvv2>O zb?g81+n_3G^6meVZ~vco3)Be$ZG8nV@a_WFN+9wUs6Ohy0UEOFzYIFZspl+cSw4K_ zN#_aB2p*_=OG`Uup?!ciZgb(HW_MxY7^dGYqaUy!N? zd2|k~8iJp@1sS7j+yA3=-*-IK5Tr6{#@#uBbdfN7L-1+{!oXD-)$MqPRtG_PLufS- zL=ax@z!}K34>B84??5Ue$jBRQoQzY3Aj&Gvi2EhVrcEN zg4Iv*);G9D%TrtysGfeg}43__L+JcbM$x(uwU z3`|N4jI!WPkT3&-Ft{dyR7S}45qs{^+h{0n*_}M_q2G`t4R8{?(C%LL^4qZGcF;qF zAUz{Y5>ms`zdnMD)WOeUg&iw|b}Bfumjpfm56Xs&-9d)zAahAzHp+M%eBiDQyeSFZ zErK&3i4%=PYgs|KkWo8G1%yUo)<=*LJoGNofuHS%ezzV5A>>|C^KsD11n6oy@Y?U@ zv;UjV{%<%1LQSVYy`-k&;9k=4|IOeoRXezbY6UNNfK)~h5>h3BYPU20Th9D%Jqzv` zo&Dc=2GpDeZG8gof$BU8?%+VGq(jj9sO>PUR)W+=U=rHX0rz@9RTV_zk^dcskq}rV zq=rIP-+2_=&w`L(6S^QhBSQ62H>~1=)IMk=s4_yWhCquMkg5_$g@Q~%`c@!ikfr0` z-AvF00LP$bto0rTEvo|^j&$;W-)T@kXv#%Uy)*sl|C!hS&%FG9>UmHN+qmz4*~b4_ zOaCX%`X4j(f8>P!5&i!oC;m^K{XcKz|H>`@8}|KgJ_uUOHtEv;c{l$ry8C~@!~csO z|DXHl|IB;;r-ISsyZnkiEZ9(npcQxB z;Bz^^{UgvqP{?x7j>CT-Te*=*%*qH$9fVvZA?+uE*F+Eoqyi#XAHgdi2m_K0;Unje zoCv9d;A9=rDcs0e8@Ucbn=nGIgCKPc8VRX!;DQh?G6}7HHodOg^tx>0tI`cGOE$eK z-3(=yZ+Ka;@m2YTm*pE?macnVvi4c&y65E^UKX!^Tm(LUyA*sVciB47>D*=O9u=>B zShVIr@#^~}YaW!Xc~rjkamBhv__@ney5 zU=THB5HMii)L>vyW?+7no#YV1OW21_l-+#LmEguRcOPr2*1c z%2`3b`UuyeJLLWm(%Imk72uHW5~ODYua6)Mv|0(73#o?SB)ATGgIphhrjWM1s@e7m zxiZ4Vh9^I~&wuiN_LKj!9{- z(7I{jz5f&M{qMOALcO>D_ud9=iR}Wf_wKp?I-;TH-2c9_p!%pAsaAsQUxSR)LFyf} z`Un))Cqd^W!dG{Ls+@?n*&~WP`dVE3Fxf32SwmXpR#q2 zsy9Bb-Tbm{%ZuubPs_HxDBkq6VD;Uc48pbyBDM@-_6%aS3=&{c*or~O9Ml71QDb0IVPKGBV31~DkO3o61_luZ z20;b}0dO@0sf-}wavb2%ISvK}P6kGpIF}@!lqOo|2*QQ*kI?EPNJlAq`EBGb5~Qz$ z**SuUfI3HO?-#5EO@!eZtSemm5N)Im&P5xkgK&whk2bw7M?w{w-#`)i^+|}%QPnn3 zHH1_Xq3oQ6Ew_X2OM;|22nm-$J=g(svl1jRqml5W4qC&r`%41`YTWY`iuQkRL`6*p zzd=y*A<$7mpjrvsKZ2b84XKY>j{R>t0UEV~^lsY0og?J>sOc1F$gb%)h=h#YffnYS z_}>a!@(tpG`$`}aPXBL%q4qN%Hl);ooVoz%mvtZc-*FJsE$RSwj@l3YLt}vEZVvwk z&Ep*U-*xDJ`w;|!^n+0PN07eKk^k+7p~~7IjHCbC!3bpOQIHf=?-6jn30%`bN9rI` zHsJBPo}-|7CCI7dVZ$0upaO^uJ=u|AICD6KDJn>iq9g{?97+ zi*53M!|)%9UVo)szDwBu7qc5!9e<_>)a_;}NL;ky0{EzMXpR@db$@>2_ z+x|E00S$_Fodg|5*LVH@59$|65Ois!mYH>iB=iarA9R5C@zRtbMu;{(_^9gTLAj{Om+R z(DmEMJ6^%;=6&BGsAd0;mi@n)_x*%$oA-PL)kol3sd>*=1OlJMjoc1I&Xve)L%l7+B;PIMf)VEjiQ!MT}!*oy+w?Iv^DgCJC8B!mN@Y^%0R(5@^V7 z%{{dG2vRY@Cy(F^cr^rPKq??K60JVM*+oLCjLJ8?hE_(K-@toD@D33s1JXr8BjJ?} zgaJ=)pz&eY1%Hs_icBIWW;h!;wPRLB4ZFWUE2BMM>-T~Xa+L(BjG7L9gV#qbhkmsj z`UUPH{cb+|xB2kj#>0PGj{R?hb&)XZqsEianyBgc|HfmW&J?I~1g?)DyJg|^5vYrF z@_!2qLFS&iPJm{sA#3M4j)Ce3P-O(JkJ=CZZ$J1C4MFOrj)RE$2r`rhsg>FefyeqF zbr2E>s*E7j&=JtMA4oB{GJ;e|ATIK--cjh}6t(IjZS{~4D+-IiHb|IfY#VoW*<8hfnR{6A~yr|4;K{5tR3RoyhqJ)@I+Q$FmBi2FGK z=TqXg_rz`Q@tMEkGk?Wn{Ef%>8=ujCe&hdw#{VU({!2Lim-qN@koe!F^nduI|7iv!DK-`Q-nM z$NwkY|3Bd_s6Ohw`M)20WYC05|0i7fKM{5v5~MN$RWY!`Q6ZHMa%I$c9CS<~Xg&x$ zpw|iRB|$VIkJ>@jTXvp<)<>XD7I-WdQYFDxFA!fJb%HAzQtKmdHT1K2-%m&;f>%bZ z`@SP8qdi|C-6D8>gq$VO*pO@s&(t+L-hr;egItQY{R5;9!c`wZCW#;4Le`g z?R;Im?M2zlu+>JbH#BH*e=2uuosdq6^9QUF{N@xy8*9`Hg1UIqpT5@2AJ zWaUxd5z~`Vvy-t1)^<-a3oi6ZZw)S*liai>p>0=u>rU|Ts<+@I+QNpcWp|Jfc-1$k ze?(G!l)nag9yp{Xf|Kx?2$=z|jF1_~RT5@>1QCG@)*+G5+ms;Vb#M}1AHipi;0)x7 z39T~1-K!XM3Bk|PC{#?17DG= zq~=54TMmORNP<^Mt%pIi5@?g`anP9w%_l);2Q{4m%^cO80xfK4I`h9CywI-kBxnk$ z@ffH|f=nmD$;MOv8&Caj0(Y_?f-T2EBYVxqK=Vx<;JyiD$O}4ibmV{Aq5rK=^sn_0 z0<|3c+j0<8p|l+WscAb1Dy!R${BJt~s>nc<3uG}iQcVQuD?#g{!=PRgRPiy;!ijc> zdaxVdYC6E3BhVTN$T|nugb}D;1{Q&i5W?0}bb=?IdX9m*MSbwSKSw}o6($}79l`*q zgL;obFY<$ILM)LpRwqF{>K01JO7s+{a<%Lh9sAdE1UicZuZ-{y%0Xn&xvD>m!u=Ai!sF!zYH|j4JT* z07!iVxqApkLiQhRhQcu76&z?ipm4Q2yGdd22xX zRg2ewj-G>IkDU%MwHm~)_n_(Hzr0kRQv2W`VO0|0`UrpBgjok6i$Lll zh)BiuHx=7KgL{w^i(Dn)s)-=t^}9b}Rzt`l@X83n098o`zBac$_QK^ftJRB*Un8k{(sVOP*(`l8-mqG6Hb86Ntg-l3Qax>x^1EL@c)`E|MQps zOq%*Ju;qbE$vyM*i`o(AR05Anxt|tyIU#I)MbPpRugN1`lSdrJpLxu`^O*eSHT^GO z1nL9{8~+zE`7dhrU)`@fjMe_>;ghy=Jw(hmP`oB7|n^?%fq z|0&BrRZ`(WP<>Q;>3{tdP<_;K^Z$f9|EE0oKl{=Dc~Ac@cmb-67QF%u<;{KZf6jA| z_>BAir{4KL>E{0l*FmWN3aCEnz5qIX7j(=N>@p)rEdyH80PfmA4(#nX_OIjEzqTWP zAl&xjptTLq+6r7Zb({jN@rK;<*l`wWX4?tSNx$u=^%10(1h17Kht6SEF_2+8G;w%+ z1gU_~NVNK>dEa+Pl|*EHRJZE`q*j7gMm4)XKoDAegw{W*+xDgbe788HN`e#%5E5P! z!5Gl-I&g;wdR=(sX3)k}NIil^Lh7U9wXY!cQR%vu#cQ6HtbJCv{(1gd(7qw~3Wti# zZ>qMwtKRxLXW5;MMK>ZQo$zno=2*JIEMum2Y@du*1-of7t9B@ZtUGAS534={vns6b zBL$x5kp}mAM8WllAh?eM8C!$T^{_E8ure^PgDW5|aFxUjo)Y2!uT0dopj%se*ZD z3Kv|?nSCyG+UfKeXCc)PoFrHuWi7jvwd@vr)DFUc)I^vhXclSpT~L*@8hUyIybi)- zAor4RO)9}VO33vQ=8<>cDhXO2LC#`@_m7Y+@`J9pL#~yu_LY$PO~~x3ZJ=rhQW-%= zNJpt^C;Bz$klF{6Bv>Co>Ly5K1R)#2^%1;Ef(+b&dPzw2QS;%yZQxNj(5eRTRwKv> zLXcfZkh-bi6sVU}fAW9b2@tA>^_Ut?{0AXeebfl<7D1|^rlbE`A?V2eRxoNg44P+e z0e5zq5B_gC23?!YR;dgqaur z&$Kk_|LBo>iY;7|K~OM&j&{QhX45u{|lJ@7c>J^I$~!370myO>;30e z`NS;qibwgEnC^cuqyM6Y|3ytf9VKap|C)aP&9nZySN-?u`5!*>fBcsJxrhE2T>f8t z?SKCD{{=VxSKs_!b>n~YjsNX;Kx1^%pZuTm;{Uu?pzhM*SN|8i_`l%U{{@f#&%XD6 z=I#H}Zv3Bk?SDV)Mx^eupxOj<Y< zNJwP_?;pXrAQ7bg5u^?RWy4(`AlVaA2f@keU7%Uu+MVwqO-*P9-|?Yh+q=B)eSQP10xtQGcdr} za1kc38pw(`4se~t52}C|gh7V$XfR4xi|Gc)*`*o;lv^cq`4-O&t6!7YwLQB3K+?41 zsdLVy&%2N^?^4RVO9^v8D1FxXl<8+6D0$}DPZsKXs7TUUP{|i_CNt*FBsN<<)$!+7bvznn76?{&JxSZy@nSTs?cNo~OGKjqu)c7x~52}v@!Bvu|)qh#1|5~yCt#bd{H~#nN`tQHwf6T`J zNk{+ZU;3YW^?%;A{}o`=bRASB_1px_8BKi%8o`_Y3^a(h^!5KGufPi)9{-ph%-GFS&5nXlOPu6*m;if!*ewG#MpPf!K1?G=2*0;Dnm_k%!VaggP6 zRhynwZ+>33=|$ofc<^V_pk?Ywf?ss0wHK03TccEqQ2`eXzd(iZ3Bdah{J0iP`3y? ze*`)G2DB6byytY{S;LCm{6FLD|A~kHw`}`gw)B6>TK|M}{`2Yn7dHAYW%^&j_`itJe<6eag8CpNVEA9a z5F{mP@?XOAzmz$M6gB`=E+PgXDM6$Ef`*_TlaMiZu+H*7kKTU{#d{3gCm1;T8Q6Lm z#g2+;-x4?YFJ%leP{ar{peJMdUp?@@dE$TT(*KUt|E;_JJ5T!Wy7GV2zWD_2(*y_TYZGJ8bThFgRs%6A!IIe z1rhkDF3kQBa&CpQAvqe}{zNVikk2B6jI=J!hs%#zd2dMg@md ze*F+OB^L$}GX`!gaJ?e}9(IE)E#L!JI`9EE$iNz8*o_6&(_sZOIKTsO5H>^#v=0cj zEeKNkaDyj|Aj=Yj7#O4&_zZ+}{B#4W{EB8L^=!+Vaj0C(mL^A?`ZU2qPxZFc^} z)VUW^=3Gdebvbd?xq1#2FlRY?#od|nBuRwA=L!a8sV7oYCj5EhXAQfkn5x71JL@Y=>SqS1gSodM%6%-1$Y>(9kOH&Z+!%@1yVz` z9|o_%gBS=N!2=m}1hinU8(cX;4TO#ObsPfqZXj2IL-wxrodhkto_Y~f4NblHfBNPB z(=Pp=couZnY4z6sS@Sm$_+VbCY=6|ns|NT#bs-%Rg|KqRx&$;|R_X?;!s<`~W^5XxB z3;!F>{ck!CUchkW|BM^|=imRoeDHtX-T$+0f~uq$SN?<0CD8cfq;sGHBKl8( zE{uTOiw6$9e~^7cklF-7Lh2(3yW{Y`j>G?u&k1P*-+|I{G-G2BZQ)CedmiI2TeA)$jTYVvtxLAwW#E z=fz8ICQLl$U9(Xyeu{{F0fR~?gSZQWpdACh1p|j61EU%PgA#bZj~uuMBn}>ngU|kO zfma|v>KaJ>0El!>xgfNgkXO5K9O zsrzado~l`Pp?u-#vLzP^=AFu(cP4w@ne=%V(&nB|nR5YNA0^JXm^kBN!i+NsGtR_M zKOGP54Z-Um$Vgqnv{P|YPGl~=2}huA(c+s~OQ4s@W`ny(kd6|Zgv=kIk?_h0je#_y z1UeWTUMs;F@X83n0C$ugKq@9k1%yO`?j3u+oeMn6KCtJWXM-`tg|1V+qU&`pef(59Gk+c3UZ3F7Ih?#(TMxy$l zMF{*l|G72(^J)DT)cr4L@L$pbv}{}4^1p=ke-7#Y3_@QSMSpN=f_g@L#{YSZ{&SoD z7qR;pzFsE(V5K3=F0WtX2$sWkSl`LQ1zJH6L=QeiPCMEdm!d|1ahAU)}$| zcG7>N%>Rak|4mE(>o@(kpY-2s+kelK|J|;DP{^hKVORdAUHPAS`G3~M|Cty5mz@1y za1z`UISM)*eFFFt)*079=MF7@@_*T55M$mAP$v;GN;&oX|0(A{X!6p+MM>#Sf764{$b!0Uqvy)I<=n=kT9y@FjYkhkmz%QQN^^ za0FS>0O=*c7tx{393ijehOm)qCFBL(jeEX=>LB>Cy4_zOxeT8B;9VgI1KyH^R6`K5 zax0dhlFBV_D>lD@Zgt!Y8h(LH4ZvoC-juF`T`IH@bie}SR3Fgc2^*hQY<}9X`)$kK z_pSRr)NOxWx#>~C;)@a8JH1Ng8Ai7%xa4x^g!0O}GVmCIjynJizDa=RfJDF(H<0Dl zkUk4!#8?19!OUqx{tmA(auf{t@>22wpM48IZcEaKqEW4NoDJ5rl+PNyya@G8_11e(BVKMdLr+kG4~*3@_E|AaH3F(}9Zag)x0 zdO)??|K~6NA3ODfSHlz2?DL8tM}%Cra$D|TG1|zWzK==kAfwu4PQBZFdLIQ0K8qUu z6F2@ZZuno?6f`0yWBy;p^uN5te?`mxDmMQWZ2!xHDY_Rkjg&} zm0w(He|WV2i<^P!BT@7J!fO8+g#I&dzF`pl#;pxnQzvNlpWo!afYpCV=l{x%|G6~& zGxBd{V3^LpV9dZ^!oZNkz*)$^x|f0H2m}8Ec9pMOdjEyZ|4TUhSMm9;8uMQ#`M*}y zf8E^w%4Pozn*XaU`|o)0zw^caE|>lXT>znkbN`dUl~K;+|K;E+so~iF>VyBA_Wx}? z^uPJg*S4cS`cD6ydg=fCJO7tF{J-qM|3!EHFTDAG{&i5rGy~jGg4ajg$NzR6`vbYz z0bVP09r*>VeZWTmLC&LtEO-D_J>d0ta0YTustf7C8tWK7Wb`A*OR26%l`v*R74GJwdFo+2iE#?4amg6U8Mx}WCOSoz`{nkeG%w2N-CQ>NV9nA?m5a_7 zF1}W{_*&M2D_IM!q|LjO241KDUaoKjQXj?7231DUv(CrQ1R+R$6hHlJ+_WZ9UK&mmP(>6VwcP|4;OkO~MF2~Ub7)kpBu4q>3w zN050{$QS}r`>k%zH#n-_`yB~^cF5wdk6^u|KP`v;G#~ukeDHU}A@Fn(5tsa4j8&5UaI8+QI5Oz&$L`8gKBZ z9pvcIiDyB_kWM)VIty;<+5c0{fUd^tJ@UVC>;Jq(|HE4TJLZ4W341T)a#z&y6|dnV zHno?`%Fmb;-?OQFVpsXgqxqjl<3F$Fe;(ccf=2%Z^g$CtLWci^3_-(XlHhGYpha%x z|K)7|tJ;FGESo5_D6z5l{m|9MsZb1A=MlYh!CdzW4IE|2mLDZ~GgW*{V` z^`AxLCj-wzHt{zCx}X^)F;h@&C1v(s&g#FC)qf6!*KC3{3=G+f?DC*xE?t##fyZ`6k04?X3diMW>Q~&!;{Odjbw+DPtIJ`1~oM;NU@dUCu z4!M5>seK?M%9@8mzuRDQLy!@L6qbc+nlC-huRqAS6Tt zGGYKBk^4uGx(T^Hssr!%11)I)XTZ9hpCEY?(t5_nKn_ojv z$tKX*K4lwUm23i`;*BpMTZBNngjPQ+Ui-Lc^Rv=zFB^8gtlab{sc)Bc`eX&?OeR%t zMqy*np`)OacBH_aAo%#30C*q{GK0ebp4Nd^E0C%OJ}U&NRv_bY5Gi=A1gW;*Bqw;z z2(oci8npJCS&MB$PboDrDLye#XVbnddn2DI z1tGH*-GCr?t%Oz|!7HYWCATs_>l~oZ*=ND@xS-f|Nb-oCtmakH{$>!l#lUxkf%hx}|9J+{SNv+f zMZi^(tnPmfiN6edk6FcD^J@JUHv!dpJlddokX7+No7_tVp6g7!6$}j7EL=(qY;p_? zI$-3_z!1W~(9bHjfm`Fhgehoytg7pO^^pJC5&v~!{_Dm4S5En_k@;V;=fCy3|3>FQ z^^x_(|5g|NyIug*N1mttyB_-=e)zxt?*B;}z9(<|QL*iN@y1V(J8`PEKCj#Px_QsL zmc8%04}F<_?*GE8p!#Up{r}7EgZfHyu7H*|OgQzw2i!S=R7Q}R2tq;z)*v+tq&|Yw zP>`AkLL%2k$m$_|AINM``+=Vw2Y>`))y&DZ}_xsRf+B8wM=D@c4y!+U|>*#orC}xkrRWRL@Erf zjQGG4I-u(qVD%9@QdbC4A3>@i$btk28!|lvmxV~NgJyy_#JPo)xTTCll&ytygA{EN zb$p8L;#z~t7bdrF%$a(me8Ktrwv@>Y+5o8cAcIs(J zWrR#-EV=<7yTh!TAl)UT;XBYWJO~$aF&^YR2XK`Ht&i|@kswn@pd*IBo0Y(I5OgjH z%qFKk!a8IJ=_Nr(cm;&UDBB8M3J$55;MGw1w%3q42tq>kBUJ2sR{>rK3gJS?>Rs=V z5u^@+kX5@sz!7AZU{gTaZu_czqpciQWGH zEeD_oGM5BdBi9PPgaT6UKxTy?b3Kr?bC8+{LUtVf54uR=IOyhu38(%~1mD>`^Wy&n z*Z$AF{D1oC|C5jZ@7VsoXyO0Z&j0Sk@AX4pOWI#$)wsr>c%DJ(HiP&L2C>JC@^9Fc ze{!k)5!C)KX$&26Q?U3iXZByl7=*+P|BLGX7t#MOYVcnGjD(H_)|D-2xg7+6;_ur6ib zSivB4mQVG8q|tvxtN+q^;Q1rL2MmG_*yUgFtAF5E{>m=>mRD1uKzUy{_BPQ*AM$|5dL31 z^1oKhfAO0C)(iftp8KzL`M=(^|GHN}Nc-}CqYM9aPyaVP@?U$`f3sEJ)n~tmn)4uI z)w|*~&r8<6C|UEMeEs9{O;4(~y{Oy%vSH`z_I)2F9RE4>-2b`P{x7@tfAQ`Av#x-; zNWI7ZLI&&F4}OQ#L=dtKT!}#HBk0Z`@K9Sb^1cHkHl#j+=z^$+?{9^#>qe`L@br%$ zyI&zS5rk|8pI-;5i6E;_A@i;93gPziZv}v~KB@tceH0tCkyuHc1*qf{r9*Q3bDy1KmRh9)*J(ae#aa1EgET z1+Iozz)P|rn}gsar0Rjp9I=7dFhJJSLC#EooMkEs9_5o|;L~MLauc?SQwSd8-~|ExVJqD}%NSbpgY0jnOxtAe~q&b(MbrWPY-K@(A(=WtNKN~;&EU1eFnm>XrW{97D zI%e9bm}#e?r=CPZ$Tbm!jlVvEbeF&#B}7dGsgE!h<7Ih)kSQeikR7Blf{7UrwUvd zVb(`gJKqsgA3^d3bj=Tr@j7^YRKNE-besY8TsyQb5~MzYEVqN#M@>im)kDx>@PT#U zJNuf?g6=bFJomr(0thvo|KD={Kj@0{lc4#d2H1FA{lWi@2T%~GdIDEK@G1#jAE8x7 zpepI$|2F7WS=b zLfgLoWo!P&PyOIhaYrlekht3x4)di9Ix84d7BEPzXOQ2@pm2^!^)jc%O9A~4!iN7O z%|W|LB~1P+TK`vb_^)F7U)ct{Ai?CnnC^ceP4J|T+J7GP|9qPNxwJr4lDPSQIlKQ- zR{xom|1*e#5WCuc1^fRdLH`Z?|7$schU^q<{wv!4SG4&rY4TrK9kft^f%6gr!!ZVi zMGOoJ85kBYuqmGsbn+D%9P!76i;A!=yC-vK2H12xSyyso_;qRcq&vVfF2sB%C2zuo= z;uvl4`Zq|Q1ya92Y7K07B0&;xj_RI?3K8C8K#W-i_EvS`yAFnU$I`E}vu*CpHD)$IDzxc__m!S4;bKGbb{ zQ@r|N`mFPQjoYl!XDRtN@tLMDCvXH4_*{As%i79Fo# zeW7;U^}6*ps@C4FT6?=>?fsIqpox;~<)BhMW67=D#W!;n-N;&SEekd%2dR?a^%0~) zlsFr-VgXL3%(;{@=MrRfcjAnT@zc&j`a!W%PsdC-6*mopVyB*rg;hgQQ%*oo#H3>o zl(yg+8p>F3Eo;$@3@}PtbR&K7O=JWawS%m|gI7av23i*hTnCX>A3@fK!)qm^fjdzD z2)qFAQ6X&94$eTEOCqU0!mO1bbr7UJf{>67JMx?n=zt)MJ`uczf-@i;BKY44?3#d`yC<+$#IZ&VEz8@4X`D4$ZNkL^$}!KQsd#j2z2OQ z%Tdq;NgZeYx1Rgoe&K%$7&V;-RY}bk|2LioZASuaN;>wx7JNWZ;}OuZfu=*KRT8*9 zf>uKZLH#C3-PCsQf6IPQ{nE7mfAc;NyJg>h5Q21?AcKD38meR8|L%kTdk%vx?}psb zFyZ9?sptMrKLa{Ju4DWEisk>)CjR%W_+c3JUfSg*yY6)cg;NZ|XBqfUGVoks5WdYI z`J7SlJ-6n6eqB&EN6hfQq!DPGO~T~Av^i)&f}-_*RT~gewgJ@+k|v-P+L8vKDoIf5 zKcD)4E|nh~O5b?Z|B4&_m$UeimC|oB!2r{#U*6 zU+u^#MLc2kIFe_}0GvYby-39ry;R7a(;+>%Q;sm2XW4el#Mj z`GmKW;3`3t2)F`*tm1}T>;$|G0?<%*vt=jqyr9RsH zvU>B&+O4naw!Nv@@(MKIx#d;Owl}pq-#737T7TecXlY6!w$2iH3+;7K3k z<9ay3%i*}e%k9J&SkxE-AqZUHLo1{CH!|m4hoFpkS2E^ZNt}Bz zaqh+BxffIBT}qmBArZ8t=wj;J%b;p#_QkY0mr`e6OrCi@X~wyv8RrtFor#@#I%?91 zm`TT?CmxNNbS!SliMXl9u0i&qNN24YmjhTEndeWiDNrxgP9z?5*(!jG+pt&kS z^-&gdItf&t!s{al1G!d$tpF#se?&}uRImoUN+P~Kf_Ic43`k{!Iimz0uftUz!D}33 z2BbrTECQ)?h^dd@6;t)@&)}@|t$O!YY*iAd?MG^T1lkyTtSf$2RgyDA5tC5=EmBPlab&q&q&zq;LjWh>AEIaxDMWh7zzU(x_{K98`@e{Plk%(5RD zB;T?rd=u3EFKZ6cBCPYDf&U`|^J50Vk75S@wO#)k`~KJS_^Y85ou^F!nPrO<-W@1=U9!>lvjE@@W5&wfQe@`5#mt37utN*vY`q z!@$r1-e*+9z!1d1;LpIO%fM;C#HYftzu=uq{Hb8E)}f24Y`$|VC8KPd)2)X(4fuJ^35;n zz}F}=?fKNa_jCLHugLXL>wakO19F=KWVHaKnt+U$VUm#A1VTcp5-1y~_5oEx5bl2H zIjFR#k03px>YX2}c6@-MbgIYL)h!X>c83Usp1A{gLgF3h$q{6@eStbYR z5J`bIuA-e62OpZ_0(WqbD<*F6KpZD{oi;?A6)eKfz$V2hq9LyBr0*PM9a|?)%|BYU;AG?KE46EH*Q~i!vHpJ1%A46sZ)PpIku>j0+M*lT%b*BS8D%cH zmbUn6+TyFp^RK3WyG2 z09iqoI{PA|K1!T^Hg3wP=!wT8CmxTPaw>l6NeGIYaw2-tkr;3_1gVT7Cmo5Jco0$_ zg-EK*YmA&c#oPkV29iqu7dw74MYylTe>2p5w0v6Ao#2-dXw0ucdMM$ldoq~1X$tM`6~quTvH z;i!K9&xQlP;HdH7Z^#;krbB-q8 z<^T2P|2Limb&)^^*`4^`aP)sYxJqgSZ}w?{^_5x<{BH(VNsaqqdteWN&RYN-iEsqE zlnyk|cNlbhbnp596V8DSrI~&SbScA}i~pyb{NK6#f6c1@S(E+;RlT)|f2ZVng-`1O zgXDe&?qgtdlR@+`qvSU(rT>EJ|3$Sy$H9pi{TDU{AzlN}2|WBp|M?96^XY^3nM#|2 z7HNy?{}Z2wTd>vQ|T?`;RZL3&1z`U2F!*#qh;HShi03PF3mw!*F!YTWa^anJW=7;4@3 z6I$Krv8AQQ5UZokB zl^9r67)DI1=PbFAx#U{rl4}`@KnOBDlsq3)F{RGCo-+4Z z%G_(obFL=OxtcWl3S_D%an3n7g6wjIkjXPaC(*(CM@cizB~Ck^F!fyA3f>tcF9Ri&Z)N}xJd=O|U0%RZ1QSd$8C;#`I0bR8- z>B@fyntTy-!9>mWze#f+yH%glPS`B!yp&OYB7^J%2FV@w|FTy9#f<;+fc6CaVBomUz;uy;a}NW@ZU*jSY_d0a zHUG<5{g*cS&nWSWf%6520?29|m;aEJb!zsYDoND|RH<>v{AXZ(#=yKDJO|Xuz}yP0 zk661v^%2`b2C1Xm5Udvi? zBWKyI{B@5@x4eYZN09yzsOo|BTp;yP^WM)OL89v;&_SlKSsq9o)C{^Z4|;1Jyc&Wt zAax%^CDcZ6eFQmu4l*kQAt5ypgsj>99j!j9-S@e6A9xEnc-I~%e}M<RY_Yv z)NK7wx#fM;RuH#g_m}4VU)v7-XgTn$6?_qG{m%Ers~^TrJ?&YuQ73+ih;tc(ULu2H z5QBgt1Cu@jgC=;3kR}6@CU{Cn5p-q(s|slQDvK0&RtU0p6|$}Xvd>f)TqOyBkCKDj z=>Q=iw*^5uN1)T;Kz%3}8?&9mYi?8P_zM8Z6Y8Dt}FS(Wl=0XJF;u#CBWGuLn zy5JHB&A$xULzFV_N)l*j?sD>+t0?@F|qWD>7;0SX5Pr{5d$+OR=&OV_Eh%?6*gUE!0C#LYYn>LN`(8aL@M zxIO~82jVI$B)G|BV;^*PjC&*8n=Y;W%imUBeO3 za=XT(;EvUC&<%YZC;xYz`rm!_fA9JKz2`uv=feN~%l{`_1fi)HKevdX<>k^jM~0XlU@*bsChgQ)p` z8QcGI_MnvmGUlLhHbEWG)=>_%|D5U|#Gwm5<-qvAfCXsDft2NcL5=^65`P#Z{_t!3 zSFro9@B81-=f9dGXb4Wp26Uo=jO~AUv;X{R|5?QzFtA@_U|j}YfiRVUVHyMLN@j^Y z94f!$to|#SgU90dzVNDoCZ2ShK|L&GoBvAopxRB|`aie&e+JRppfwDvGZ+}BFt9d* z5OWs;TQ37!I|F+M1N%}2iESL}pksw3&Hu9~{b%5Pz`$^sk@qN@&?yGd>kLwNSf!t_ zNIhkf*w4VahJneIfx()AK?1Y|iK&`TWv-C!KM7;d4HOb)AiwB%{WtXeujBDw!wocG zsOt7#)$PBc$A2Zy{|r_CwO9R@y!l_|&VQM^Af$Tdzv`X;a$qEX`@aI1A$8-w&Xxb% zoB!~8^)VQhDq5#1+onr93&53N^nlbxA*~)tbtDlr^cv-vSBc#BG z4#dHBl|n}6Q0gOatppKAQw-U}jV1`=euv+h2dReY4}6B8+I=69_kBYykAOG^vKj$G zA~))4_I#@a4;|F*|5*h_)qB5I@BLb_?{nFnj}^N=l<#_9x$|QM=xFW_727}ubXV{E z*tqL+^PaCA`+syF`aR|7p9u$lbnbXvv-)1qtTPb}n_N;S>H1ZR8%DD$dNYXGF|g=> zmmkQ2NjdOjkSwSN#3I4KBmzQ=0t|2jx~dye^+1l5gI}TwS?vbd-zpF7@~|j_7~EP6 z?3xU``V4YT0>d9+~Z<&u@xtJdAGUjLwQ&Ar?ew=$RB zOkaFGb;-@tB{$O--A-S0J9F{%j78VdAZP*T*aXOo3}{IJX!r|M+JhHtgO1uxnRf}& z8%mi6+Q*s*8jb@knFFnwgY<@GUx9XuU`x26_0i0;ap1#$5@()G22CNIPndBga^lg* z{=<>|hogEAMfDyE>Dm|AzT2&Oi)+Y+W@JI3P4BOL3bfx)<^J)39YXLsf<8l zci8$zplz_5UO?_lyWuM)3LwGHq176Es6wwd+$Acct`m35L zyZBvKGU!ZTknLv>Y-Zq`!XPx4L1H(f!eJiuo1zAfBn>~ynf;J41@!_z(>f;q#ZCW9 zfKN7%wFlKl5|*H4Y@!CBLwi80;57bosDX-iM*06t>i;=(|8pDv7q|bf;SSm~#31~g zf%PQ=|7St1|7tG(jRXIi2mROg`mbsaLUQ(?Q{aR^OAG!nu%2gNILg2iplp@4vA> zs6GNMv2*#a;tH*gRDJ%lxBWNX1geZ=@BWttS4p_)BfV??r4Rp?3Z2QIkt1aiCvTZ7 zW}PK&lPzVRC*xQk=Tl{tGAXoUZ|#PskkbY*s}e|!0I7GH_kM2L^Qi?1VfI`g>XAuU zO$3>K0*&lJL?E?K?Y@t2RJZpN2!RI;kWHxH^%=5AyM7mFSpt0I0W#TI4bBc#`+t`2 z`Bt&(Q~A!1CA;1i?|fIb^IgU64>fzf)bIb^eDHV6p+ArnfSreax9yz61@ z`)qQTDEK#VS>`aPhBF8_F)$l2Fz7NcYJ-s`1G6>*n;ICYg2v5Q6d9Nm7?{Ku7)8OA zkuaz-ViIIv5(F_omv=*!D+q#?x-kedu*$LsYjet&39C7XXuC@q_^8-KtJ=n>J01yaTQeQfX_iqoOKyAMhC8p;%8h0p_!ob7UE}~jh}fIau{9Qv{O-& zjs&;R{Z{+Z)rRp?WW4I7 zJ?qp$I!%(M+vO~@&0l1ZHPZ45X0*v&ZhQlAsewkm0+6HII<1AvhZ{WCtO!)<^Kp5qv!Z ze9j21e+20nA)inOU$g+NjKHn0%AFq}2()wu+)IMz7c>T>4niZ5XO19jls95G4O6+;9keTb(lfqEVJ|zE~VE(n*SsX{tIaR7uE&M1PSYd?gkPt_|K#L zpGy;T=9{PqXqS+p)qizcP^BPm{$JV{bO3^&F6jOR5rhAdhW|x%|MP2nXOn-&F872- z`K5p^sN14w|DR3eKLg8s28LS%Xe&e--Ed%1)r+J8ifBf`0$yqW`lj{I7QSzw})YlDP9<;?94$yC5WW z=RXMD{V#dvzu3M1Lbv`a-TW_h_P=ueHU{-PVciH3y-0DBXeskJ8Ou01>jW9cJPqF} zv*c;1la99U`PzNpXXE~#^?ScJ?*0Kn;8`I^y#T3qApN8Ey`S5`ouigLpCIB85|XeX z>Kk@{fpll!Tv$y9t&c$5y`LNRey#^~i$GOU&E5|+dq36egA^Haml zPjx##f!1m709`j$zw0w-vI*SEsy_@Gj;=lUw|f8Y>I1)O4}q?y>^%8@;;H}rC;s;z z{N1tZOUsgq9yRjxOSovh$g!l@n0AnwG#ZUpWGX@I7ISkxITdn!XO$Zsm9KkHw*Fz!>bp71 z?&L1No4xclX#Lot8=&ohscZqNk90i#YdM2fAQ*Uo z)@;jI{AX4C&%kq=f#D3OGGdqos*IRB7+BgE*gHY>5#Is^iATaF|5R;3N2jvL{^!^D zuVe$Nk3buaY(b-TvS$BzmH#tve`a7^$H2IXfqe=C+hhjzi3}W*K(|G3^fPd_Fz~c9 z2<+g~x+G%$U(WHrnjOd&tWrN1gg)>n{F2ZCEyLi^2Mspzn}F7+vnl>(7GJ}_G@XGn zhFvU^QSzv$?oC01|5BEq%1GAXzpBT7E#LpDuK$&tK_ioj4*xZrL5R=$zkbetiLL+j zPJ=2VDR5~!=6v@`lxo_M|gb%X~==Lje}27fOL*(w!W(aFSh{gW;*nv{qRrFhP4Zz z>oX>t2km8OJqhZnx19oYE0Hf3Nn0M$pVx(uwk3~ZVp#G%2!p#iFl*pxv>XRygJut|aHBWRTb**7E#>ezzn zBQXXBDFzND22m4XO%DynG`sMc;G#Ksz1yqio@!ljwQcR4?oAInwmjVys6K)pumGu#QeoR#Q)XXD0k2y~oP9BA&ZX$7 zr^EV=1a}_{>N*hAbs)I=U_i${pO#&ojoV$TH#$|UvnyF;nZML5XMsW5EThy}2FWuF zQ)WVtaoTK?v;`JfOKgi*I#;fDFI%Y<*&}3{#319zDB;E`MP1cbx+r)YE?Cf5D31&eiASeO57OcQWwgF|cPcurx5RHZt&R;7~Xs zr1M?Y9CRWax6XeKFk;pD&!YXGMdv@O?tfOD|7_r;2=Mz-d9*>N#t9mLPIZ&B0Zo@E zS%E4e&|(IQ|I%h4B&GkKTj4*0&;z8gfh=m->GOp4SZQ z&$v}U9T{ymP$xpo4zx5*&-uTq73iD=qLuR z$qd|6Kzcc+f~q9;i40ub41E0zA{T}9-$>d0mvj2B;RM5{~2Z8G6+6m5WmbIf16YFr>Nn7VN=j?tWtLWrS1PKI{jC2294S& zIe^A6W$peexqvRJU`qRMKKsA?MbL&L@jL&;@BA0N@n8HFbkt7b_J4`npq`P~z5jxD z|MT7YFMi{{=(YbcMLQVeQw3DKgw%Y5G(1GKyu@@p#r3>|4Z@|&<3z18rR?)`B6`af zUGLiet7-eE#@#;}c73nk^Q(T(ue#me>vlue&Vj3vZ;;+lF-V0m@VkQQAZ9ZLb}I%B z69!IG1}-BI;?`#1)@I<;03mi2P&EYU6LBjsa4Imc%P=sBgOBkM2h}?a(x6!+7G(xb z69yq$MwMVui*)_aPM55Cq4nFcr=BTYe4~Etqxy}n8#lkL-}1I*s|7F~l>J&-+~pi34O!>(XhbUS0=tt>Et zR6y`<5r{GWM(VulkU9u*nq1nvD{0^k5$KEs6vm~Dd6yt{Q~KP?5C*&hl|J`!(u{Md z;Kd1%{fGTpc6d~8aI0MBT)Eb%VvS4r8mH1#wgpQqvKLq(A&bm;CaJT`(`K5d%`{D( zVUjf6Flo9;>MY~b`Bpj0oJv=@R;+c&ou%Me#xCc`AYjbEug1Wy#wwt~!mrH4Db2tp z!N@Gez#xoV4Z+!%^$}7>3B7-WwLZ#Nas$>yf^JZPFKmD_ApIju5>hciNJxDIUiA%H zUk9m@FiA*#ghs*#?vUq_Ae9l?L{iD-7e!!Hf^oDRq&|X^kSeJXeDXZxlm<9SVSNOz zn6NS+%Ot9Ie}>dYkSYnYK7w?VAY|44pVhE|yL#~QZ>(h9fnQYz{!}0QTYC&tCDokx zUv=z%*-;RxJo3Nt2xzdb?JTIZnZ5q2Tk{2}@Ffho4GfYw4E(tad^rq!1q_144E){f zGBYF$uBh5Rlr;V)X7HOw7gQxd>LV6ya5bd&pG^-`8F3l@7cu`YX8E7b;6H=>e+H3X z3^M;kO+bx*H5br^Q+adHvN|Pe(5#J&Ip|0qUeJ<&w+sw>!S@8s1>erFh=FYtgXn!h zy&sZh|Ah_ybBljt;(N=l0qPcMx&Bvm0QF5Y9RBM#{#P*l&m;ezf$bRs!y52*p4KQgf1XW+QSDES&P^TlWU zpT`hXAMxt^7tsF0t@fQ)_ocA$dvW9cQf8oCw9>Z!A@z~09e7=bJ*cLZvi>h?_g~89 zzpVX##n%5JJ3w2F)NlTmxC^R}AaxLA774-@y9cV0_-;Y#BcaRx#d9|^$R_hBxbZ7{ z@~gV?tGaS4*>Ne_va5LWX$SEdCGr|23fUF=R<7$k^moe1|1JA|w;lM~eBfWx{=bm= zsA>1N<~`pb^%1D*+4Z?$=cmS9pPP1mY6i1GgK6M;q<-gn5CYdnHQQd*?|cj5?gZ6J zkfqd+`UpJq_py2J$A�>UO;YS4MB^c73P^S5;j{e)S*!+kgD;q!a(9p8nr=fJ{qnm4zV?Uxiiw+HWp1iQoG=M+nNVGo1S%Vebc-BUE7wI z%^ROpuYF#*`f16^M}^Do=Pv;vXuY%KZZ-_%F1-yA%w2jLvOfrU4KwH<>%})ArxRo> zyaht@Z)Aa|ia>P`a-V44bx3^#sf<#pZGA*rZP4HA@Cn zMywhPY_cH4DhZl^1FeULEHC7N*Fne(to0GBuXHVA(Ty}Hf-U3#FXw<)OvrsD;`>L) z6%(W~!aVE_vnGOwpw&bWE=l!K3HTaW$fzAw64G4)tpmq#V=UzUVt5w`(oq6&x4o;{ z@d3hxuJGCM0c)jGvFj5qR0$f&`&td|96_oiNEfMU&$o)b-yx_9ywDE5z6i3)sA}IY zI06l)@Ba-Y5B#bI_n|-o?0bGR9sFB)@PFmO|7C~%mmd6Ia};zDQs-&V+PRK{{}X0D zQwUtkpxMeGRscdGg$%+448m~?Lb0IE5pO<&OBMNEztZC$3_PJ8K5eOcQOOdBnIw&5aQUw zAby%l=fAe^e~X0wIwAkH?Lb#fun0e6V7kh{xSxUV46F1LA&rkbhM@Y0Pyat3_ykuz zUC?Zruraizmb3;{I&#*a1?qA(pu_8wZ9#*Fau%RNBpBTOi^u=xU-w_{?0?x?|K(u) zAV}>aeg{-NLFSUg?)?|N`(NnBe}QZNC9nVIJ^f!ab{T_Y5|4rduc9Njq63eTBaebP zlb9~ExH+$)qkv|Bh;9h4ZoImCWpu;->3e?9I{kmb!T$|AzSnL0TC?M4J*!{K%2{rF})36=1yB2)t!JGP>ZyR>J11*Hu{iSio z$EIEHYPUiUKxo?WrgiuGzJuSU9RD-r_@BwgeosC0wSUk1?v0NdmffhCdnUYMxkYG) zs#7ViN)Us%D+8YyXlxC1Eu9hryFG()G>3f`uS-9J^AraA2@G~U3>Iw+X3Y%dEez)M z3}$tVh9wMIxeOX9po&SvlY!rbf!BtC(*o4T;WT03He}$?1MMLK83-Q(6Gh8zR%mfXr+3R)uvUdfDD z&kWi7nZ4+C7Nkl7lWFsBAR|y^H1B!}_LW-;3Aq}AvvJi&xH?L?YaZsVc?hY7;3WR~2(60*pG$%>Fl!}vm4wWIA8iLY z>9=(2+tRIX%eK9PFiN++h1XM{x(QY>RbZ=!YfD zzo3;7c%HZVz%NLhRlDa$?VcZH`~H>f`&YdGfAw+DAwo^3|7WfFU{SS?$8{2edI^JA z5rc3(gGe3-3FR~JW-;(ZGO!0RFsHJ}H*y%<6105Grw7^`Bw-Ai7U9wQ&mi}OLF^Tq z!WS;B|2*J3z6JFEi|T>uBYC_3Vy56@c^K|7FzjIvy(wh@T195+3#v^F9R8cQfVu;6 zHvgry|1%2yXJ9$bz_1Hk8MT5dqe=#bW(MXC2F|_QDmNs}Ks$Z-6#lb{gQ^T|@aPuk zR3E$lT8{tKE&lV${b%4g&%khifpI#hK4R@*;GDp~GZj>u@b@$D^)YZwV-TLtrtn44 z8dM)CTmR=*{m&r>x=TXE8gzV|toeUg)Boc7{{u{y;MxpAg0mQeW-|!Q08N?k zOkfb4${>1@SL>dH)qkUq|K{=kH3I%Cn*3*%`p>{~pMl{Ls6OI8$slrLVF5&}LrV(Ek<<|K<1nH#q-a`UVJz z-UZb=5K`hUqCSF%2;YR(M@$F)Gq_J+5Q>J>M;vmt+=>poiWZEbIt)Ta?9$fUD&BnR ze(dUz%*vq*D#_9g^}coc+O~h6apC{Olm8p{{;S&dy>iF5s-53!c7Cnd`L$uk*ZS>W z>UVs`S|2s;degM~ZS$@-&AZ;z?RW?76Md-J_6mCZ&enI3`lxyL`>sPjCZ7C1>BOH2 z$9}c%dS9~qPGrwPtK3Bzp&g=5rM$+e3`!vkVy+Av<_yec3{0jB%!UkXCZKb8xQsvx zr1=aO1U(tm;i6C91oF%uAQ8s9F4pI|ANYG$i)}kBPi*G_k>L7Jc#==`^kO4YK zw+Kw8%)JH~uY=b|SqrXZ%)0^~uS=bMK5_c#xQWLidk*^4Z*eSHX;-q!zH~K&v@cz0 zQ?$%7XCY`fE@!@V{v!LrC6?LqOfqI0rOkv?M#d>KjZ@te`+}uTd5c{OmpRw0bFNuun$W_a>IvTM%LT56 zSQ!`?!3b6xF)+aEBVB8ODam4BBZB&=dW?jONBN+1Sok;6?01F|UzdCx4Q z0>VndYa(O@yjFrUAhC>0Li$Gp>m%eLyMpyk;G=e!wGw<@3BrK%n~=xxAeB+c)>m)@ z5rI@pkZP!8%bSudZy;>QI35}a=`NwwN|^OgC1`-|GlZ<(jov>h-}4Q$t`*ils@VG- zQomH}|5*Xvf>E{qC!{{A*!K&LAZjWP{Hi?gt78Ap$^*Yj5Bx3J|EJ;r=yagTr~mgJ z{a-lurA5jHZv6%Zp=1WWXa>&R_->Uj`;`2JU4-dMCLI z|Fi4;XV>}9XABxTV~~Bvz`uxrVG;wwBvzSqJZis$b^mkf{^!#B&!-3K2*_Ce7cuWdiCu@T!1LKo!#eujc$;6+G#qY71K4reOG=RqQVV z$6nA02`m#qE8;kM8Myip^^xF22KJQ02tH%rImp1ZpFwyX6bY?m;5)z|ev(D;m#EQyS)2drZXjRCTm2VT`OhNw zi-GG31M^u1?%hm6hZw}pv&vp$QTZzXK2%)L5W2Qd%p9~LUCR2ul=XiR3lM_bwWMhN zU&-RXq7`UYFN4E>HlP0t-TxIg{1?0QU-~*|R!IEDe}S7IBz)(;@SXo+cmIpt`7d-A zge30#7r*^q^d^YZy!oGD%})lSdp#ps`G5M+|DF5(H*Nn}v-Nw;*6&r@zgF+~ zRvaD5SK0k zw;t%IY5{!)2;$dd;MZgjuxC*XVYBVvcbmxIF$;t|XEC@t--* zV$dvNP|RWwiDKaNXJB?=;PPhR_FxbTW>AXaG0s-x^R>7?j>f znh3&x#5g9Iv-&~qnup-z@c?W82;NsBxV!;U4WW^kwNmkxmk?9}Tl|1rA3_ncx6qkV67o8A(`zDkFG(BxMe|zfa!uzr5*xX%o;nv1~s7wF>^rulsMX|G)U<|B_ch zOB6(}{pY^{LV~yc3*P!Kd<(rk62AUl`u=~_tN$4$-eXWsU=R)BQ?%ugw`P?wW0N)K zmNH@!)nyUY=a4k!lCuMi<0*LaDf@9Mhq1^7GKdB+FxfINIB@9Zxs`0MUHxv-@&A)g z{BPU;zi#`_@-3e#wtlYM`MF}pC+HfUoo_0(zpB~xs&2=d>a8!Ux4x|3`M!DY=eGSn zIu8EoI{2r3-_N?u@A8-2&zX5Sy5o>l`Yg?`PGQSz7OfZtVFw0QBk+N5s-TnKm~k~R?=kP(q!P)0wFFH25wabK3hnAB;?$^|!?*52#g@v?8< zo8G;zJ9ocn+wrn~+l!jbPb=0vDqVfQ2t4ct>F;E%yqB~3LDtGUkopKjt^nPt1Ch;J zaSvV@LDtAYCW+80qpZbuKvP2CB?_RiI9R0vsdvCvEr9m7!cT95h=Z;LpMNz8Tp7XV ziBiG$5JBprv^kgJrkskIbRx3qJXQ5g891zM} zV4ODBBz+EKskU+2Y=h((7TF8DYc>Wo@9?Tw?@+YVI(LB;xZ`7!JrB|evPhq0m@wHa zb%tB%ijd}QL5*9yD%Us_F0s#!n={`!XQ6H0V#mCN?j_5;8aKPut~ZNs2KSF3 zw+dpZj2IXgQs-WTAb4ejJaz}^86lI9g$*eSu3;j0#e{q+IA$*iULQdi7~Q2?kc~^= zgIMpPEw_WzN|3QxRKSCqn6%+E(J9vGB)=R?QQNpZ`;G=nv&Jmo1*F+Eoa%BYX z6IEd>2#2ithSW-!H4#h%w6d-YTn!=DMDQvJUMrRE`39+yK;)hu72wWM<^JDQ2mVwY z_)~M>Pwm0K5O(GMzf}kRmG1jraqxfo>W})xI~gA$r8f6&bf+)t(TK|5HL9YO2a;$S2RBS-&B1F~yGw^<3;Ml>yw26UpItX!3V&I+xS_mgN2{cm2IhBFu9Jl&QdAt9* zKA^6dzSn;h@IiL`YM}ERg!Do65ueU~Zq5JPYX4c}f3nEFVU&8wDD#L#{wcfC3to+P z+*+U|8IWF{x-GZ{(f-dN@t=wJ1q1sd2A;D_0v8!XPO?axWt6_lsr-~v0~8uUhM;jv z8LR)GK{?C+a+aWNu%Mfjz_%=kS%3!a#4P>`8vkdo{mJ5qB{4WSb!gv0Q-2E?j<-gS3|7M5&Go-Es^*f|o zI20UMWNcWZ&6%al_#{oZL=D(PbvY#r*rm+4WG$gc&V^Ueom0V&TPc7+!k2-=mVw=# zMLF6$dPdEHyK|2JUvT<==kD(<+dfrpdRw{aZQb@yjk`bxUe)gfAHKTx^Q04hrk?&k z`S`ECLti_$J}y~sF0gE|reC$NQ4Gi#0_F^Cx}a5R3{tSeb9lkm&xtUw$uO`hGq5Ux zmKJboFz{$HaH)gthvHNL9XZRT#K5f#VsI-lKo-AoD>3leGpdF&*tT+eP6Sm$zVjHo zW-&NTVzBRLueJblVQN1Y33|2aG_whhW_zc+Utj@|Fhf^}tEU0tPS_wBrVHx&pLT0v)pe>J)&s z7r_=Sq|OB$b_!W6mo(#CWdBjGhHdV(8(pi{+m)=aE?jC`yxcr@p;g{O`{HFT6{{Ue zmYZkKGtZu9nKj=kdjY7U1l}}cm@-o@eu_oryuhaI5uN+|8aBI>t+dTwV3RlBK5xEV z?mS4}$1HV*QQ{P{)EVw&D??kh2RChXDp_uwyTCGgo+WJ5&L($}ZQf#s{6%i1Dnkt%N*$hgLDchv^{o5n2}sQYArKk~==2 zRZNij2+~V}kg%EvGGs?leN+akm>{!ARR{i*?*3k~>qq5&(8{{(E#K9%H!^7EGl<4A zNF{+PBcVtJp+FE4^kv|3V-WHMA^rdckx&Ma2nNYm2I*c=pLqm47^hq_$Po6#}o#E zIgGORrOkip`~5cy{cqy`U(fTuq6O$+RRQ(?{A&OCwEqj}{^!;H&!hEUKpWgk()iA& z{!LKllZgITaibry7XKtIKsGAagH{R3nSc&}Wf%X?!2Oef?I8o}0|vhHEJByrr7!R) zUFT7IA*}meKp)gC5;p_wGXhmcHlWH#-U>t_*GFO&|AkEci<>vFkzE4Ws@{zmonp!apF~U=Ti;lQ4VBR4C7D=XOInL;B^HZB_SH5 z=+luk_2Q%>|0f*!*L4_#dXE0@J@&ur1PDzy@vnXV*NSzIvKCzNYuaj%GF`|nhe6UE zeCC@v_!2r9@QzVg@X|F&@D?612GGHwVhoJp44~8elo;5+$A_}3ftFsgt1z%Dfe>U3 zHk6bHt;FV0X5dq0;CF=7N1X2cpkX?<=?tDT8T{t4MXeIc+ND>2-hIZ4$YozrHvTWz z`M>fI=q&n{L;qX%{BPL)zj^z=hAqFVHhnB#|E_fX)6(@%i`G3VTnoBhym;+n2r6Cs zxNO~%vUN{N!IczfVrUiU@aL>mk1|#~098gv^%1kY5^n&nz%<>Mj3Og^B1{Ptah(n=Tx@RGIxPh-a@PF1=irX9_yTiW|{L0 zQ)cQXOts9K52=p=nzp)CtadD3;!w2MxoELd;UcHPB~FD)EHh@CBu|6%k0AAtOW8{M z!X>u(i)`{1Lh39~WmLG#xoD|-*-G!GE$($2K>Z_4U+{Px1Nd%1to0G-wpoaeASA?( zY4fh6gS`y)ICKgLQY8^nA0c;lFA3f=f~>Er01wlF`b1y`w3oE!N9peGn5cN?*V@DXJI;W1 z6}27y?^J)7$Elq`DV;$zmq9U;K{}a1B!)pKhCw)%K{1~}uZBUZmO;CYLAQa)xQ#)l zmBFx!N$U*1`AsIh|11We;#yGezqH8@4#lTTLYWKNHffP0c28L(`;e1x{ zRjlHxSY-BdDIH~&*~1{TiGgz-XdkH1T@?$^ayU7g|1wsf%1G7vzpU|pPQ^D2%;&&s z>be-1%0Y;^f`P4?fvpmXSSuK~b_nU+k+B5L8Oa#@XA=9*CJWkit7r!r5|p<99rz%u z_Md_GHE8~bc?SdU5(d7dpshyyvq2=^EC$xi3{ocqbwD+mMdW|0=>JB3|214dC*6ta z{O8vMApvbrH6*C}Ur_fypAPttJzdZ#6B7EM1$c61pkr4-%d>4jXWvN~{1;UG&n*0( zf%ho`=R*eG3k-Z08HBH}N#5dAcqX9!R!|ePE<)7gzoZ3dbWYmlzmzQq$=HDIK!@Cf zE^i4Mx8$?>&u#vn!R0?+)PIJq|3XXt^X~gEa`ZpnDG=hh@SpF}f9`YtdCr3v0$2Wv zT=_2w9>Ej4{$KFge<5&pN$~oAt=s>l&-|CI-oqdh%fPS6DPzVcW6daI%_3#RB4x!X zWx_0?$0BOTB5KGWYRD*N#4e`8DWS(9Z7!(dAgSvssp~7C8Nj3J!zS;`BI^yBSK@a9 z9ZSydD(_Sk-?XcD^Sfz#zPGM=l-7MXu70aa>TK(vP7V7q5%p*mac>4z69xta1_oa6 zWj&B9dmwklaWXJ)!tPdpT_?xL4pPa)4?cuPjDcAiwBnjo7Q7u*hJhW-V3%j$kO%dQ zxKtQ;)EIa)KuE})K`olWvW~&Ng~73hDP+D}!EVd$Ye7rjCT{tYf9QYdiT{O8GLd=X!m}niZxcbiy+7{XQ2gn zwKil;f_?EapN7rB?YrEn)>`K;vIbWlw)vorjBVaxi_Ce(;QoQ^z$6x)exM4TpvMtNszfD+7E zyG1v<ybjbCQu;zaTv4^1Bg&6x77&;l4E1(EoAF)<| zY9*lq(&kSUY(e#rviW~*`TxvPpu1lsOhKn`OPheMLlRa4ZyMq|462g&mNM`!V-TDR zMZ)tL#E**T-I2EfEk7^_{BIHpI=NKM33Nx2s1E3KBLQ%qNKgk<8A0kJAp_8T77|9F z%1G7(G$bf*{$I@EKffWkTO|9Rf&VW9>t_b``wScp7z8dei(F@yy3MEfNJ#U&q`@~y zL(pztNej?vtJ2_uUg7l-yh?)9N8INBh3)?f#r@Z<{Lef8zxc}koV)(>?f=hp41_q& z{O3ITpYzOruCt)}i0{&Wp-caTFM|g0M6UklyZm3^Dri}q@Xh}!SN}82eac{42b#MV z1RbFUsgGEtty!h5xui|mC5%|bj6n5~hyjDBA)BZ+o2WLcgbA0drI4DNgpQAxewesH zxPWFjr&0i@J`(d}5buVqnk%uR~A+lNt=5 zn}7tsoge{l1;h?sUIiJnVPjxm2bWGv;9F2xLDd#BM6oafqa+A1OEa*@fICex;EG8O zQ6I4>Ft8{vFv~OWIx%P`Ft~K`hs{>2IP5m#apLBGg@^x_90#GoBmZ*`{4dz|KY#E4 zqCNi$cKt8i@xOT6zsjwDOE&x{1Eb1KU&=RpD&6oJYkdT{a}BbV8?uHf54?&SUKwSr zc#ys9J_s$p3#xiB>LW>;RNseQpxtLz0P zsk4ldXISSf3~b&O*}X5Yc^ha7sbr~r@e=#uC3Z!N?F$##7cPPf%~@p4b*oq%)UrLa zeV1qTI;WE5kX7OiCCfpmc!g8xDzA#Q0d<>wTekVNZg(k}F6EWWBBBP4aYk^Y!`3mt zXh@$3jf7N2Y4bpfH^2+pk^4uWej=Xw2o#1$s~upQlAw$3AW@LM7<4UE){+|#l)V&m zQB?NQn>ov%lSi=n=vMY}$n~gj?j8Wpy?#=tx2UDUY2fnS-265Ha#zbb&ep%fI~>gURvaiQt{^3#hYJ4`bQ;O zphq{9K{g_Rk8yyk{f2jokQr6GK7qC=Ve2S?7k-0A?J9SDtK9XieE0Wq*ljNr-~|kj z9#i4oZ-slmJzxP}Ip6~g4zE|w|Rde89_lf^gF8-f#;(x)C$IjWS1uQBU1Vb5E zJQ#R_8N^~4q!Jk9QWylH8ARe4oaQiuu4i!H!r;7t!DKOm#&ibF9tO2e2Bl60wO$7O z`HYq;Idq@$8od%U0FAXz`$I{z@-WrSAmT1bILNXOM`YCaLcj_Xmbkb zGB7DIfaVLhz-dwt#OB+%+~%@e$|Ox6xG;KnNVpF!XV1H)zpmKFx)X4n{AJp)@k z1A93Lv9vI7buoxNkp?dzPp(Awy`k>8I ze7gU6bwG$m`@fJ5=xz%U?f;@W|0Q+)i>UwSSN+c_{hwLn3j@y^2F^PS9Jd+xE-(mQ zW)Qi{D*KE}>5GWYKVe-^MJ8?XU)BuNlTxyPc8DbG{)^i}Paub^^yahv&tv(YA>cn( z=6{B%|5=v&XIS%}aovB0&Hov;|7Y0ypJCsBhQt4vkNsyp^`GVRe~xqiInVzWJpW(l z!hf;L|HZHT=erIUd#25w6RK?4>Eb50oxE?G-XX)7)nYaS_c zZb>s%2_t4P113>@MiD(`F?AMk4OS5?W+4qGQ4LNReL-a_No^-t10NZEZ(bD#1`z`W z0RskJT?RpO1}*~z$cdv2dZ4Zlvpxfp9s`R$1DhcOvleJs0;qE&54(H;(o14z0M$Y8 zT8RbRU}gi=L<~IOxg;Ub*d4P31G5Y>n>+)PG!v^F1FHfEu_=HWIjkz6i6=%i24)Qg zZc_$nR|e|>#e}Jrtw%!_K8)Y=Bk$nQLye=!MbPpYoF$? zeVV)WdCr<=d262Ku6~-o>S^A}CwVKLf)QvfckZ(Lxy$b7Eq?%Epa^??=eJ5r9t+a(8GG+d)l=-(Z7F>fYqRRj;oXc5sBX7x# z{G~SwmfgyjcPVkoN&lwpE~RU1^OicmkbUkFhrFeBIg24D`8bxW4C~w%)wkcLVWVT| zGH@-k4Ahq?T54YmTIp?^Hd`-chEc|xfTpdH-TMMsw!2rab1q%xSiHokWT`{ZVz3ra zC&(&qp+(Momx|Q^E!%_JcX`)uaw=QtP_o>qY^8nK3J@w?=}@-HqjH09-B#DuZQk8` zJnNUsh175>*?h9$BwBrxw&)tTN+PyC z%3OLAg3ziYJoOQLUI|hYK}bk130@z;t0YqEqvCbXk?W&E@bTcFGle$2fK*0hn-FI^ zK>A9hTV6uyBg~Ex@?NB}ZSRn)AqX2Xg@kp~4%S`zQnBj`q&_O!^{s5zw@UCP*)rHV zhqB#Y^LKyA+x0nP+xwhdpGyz@tvUI>^X&hY6aNd>y|Js_$?Mp_AQ8^M?#{sL3z`X% zier#XVUSJ&RYpP)45A4pnOlq}yb>uq%@%c>!Eqgf-dqqen#y1_mBC;-gV9U|vt=wc zYdCbD@|(O7Hv`p241AYBCnvC%GB6b|aI1mJ4bY&90t2Tk1BVO)yCfrz94ntDGmkoW z)<7KG5@rCGJR%H?t5mF?$k>7guf!okSSFz3Od0qOgH~v>wlc7_F>o}35N8tuXA=Wg z6{tR9ZDkOh%A@*O)$YH#3#fmj;RqU?<5mJyN&H&>Mf5=%odh)h^D6)2R{X;y|CmAK z27~w^2FW8V3dcBAPxENp7dL(-Z}DH<;lH-qe;xP#TAu&aTtNruNg0Cp`Q$V-x#<)fsTM+f6l;l7qsC>_$q_M zEjGF5{OTV>bpA;j|CccXT|p~v0kT5L03#yWYFZ>s|_@C?Yf8J~Vr7rzv z*!Z6zY&ip8Jm{!uUTX$ET}Dw84rx%8#3^mXBWKGaWzHjI&MsxbDq+MbVZV}DgbYHg;Ybh>LW-^l)2#*V4)C$gTWnvj+%|8i zQ^9fvFzHmV+^Jx>eeM!?eH7VqFsg6AZ{sGHij@u}plZmzcp0cWQvzCdXO=lnFJ*># z?n3{@EfHON16sCw*KcwuTkcrA#JO~tbLldNlBHm@+^%S;b^apP$~D35yTZHo`8RKK zuU_X;vD%|%y>rzX=c+YM6>D6o)_GKJ@UGeH(z4a7YqxjfO5NmcDMN2i3^Fi-mwZEN zAp{9IwH1?uR7|k?2-;DCaKWn$FDK7OL68&9QORrIx(RfKL+X<2sY|ZIJ4a~!qpT%2 zvSD2$NM)3{^d`JM%2<92jvzfFIGM8oI-rNyKfmKK>dkh(@gLjVdz_-ex zb&-nKJuhDO9Mnc0|{t=uFS_cm9A7R!<$YXb9+dg1L6+1qa zZTnEN?OoZ94`n+(K*sJMB)CfYQoi$Z#m=u4JHM7f(5^3~Fa%Lnx$|T7?$2#U{!Kjp zf6AHv6&v3AHyn`k>SB~fumnv#FxWGIZdlL;i?nK( zACb2DFKzW-)a<{Q`F|P1|EyB~8TgJdFzy7MRth>7u8o1Kje)NYgt%%L*ee-WRx!vO zf$XBy_W!Tq0Xl+P)bKx#>VIw(&=o~|TF~=axwZatY5wO@{l_BzgF*fSlj0{fRnTe* zF#}NT02%AkaR*gNS`PnJtp3Xy{1?{(?b{X61#Ok$*9R?x(5wYxQ5o@;{^Pe>SK849Wkw>i;t={m--UKgXv3 ztn2?XuLYxZ|C!hQXIlTCb<=;2ZU4D;{Ab(spMCd#mIMD;5B_I5@}K3{f1cC-15T?-_y@F$h?LD+LJ#J~akjb!HJgHc3-f2@`fH6E0bEP8l-}X;ThqQ#MIs zR!Kc(aUE7sb!K5zCLU=9RuKkf0R|?15Q9^KiC2+BSe;i&PfXcdLElx$&|O%`f`L~V z-1iY>U{Pe?R0GvPpqo;`{UbIb24;N*Mjg zf7PG-UwPtx=@C#Jly~5N{{H`k2Vv;Y|3YwmRCwTj@&5mXdqLxMMO*)tZ2Sj8>;II1 z(AVO1Um&wcMQdL`>ZAO%PYc&PEm-{o(lg3k{WNFQll+y>z&#^S{|Hhaxc0YGH=zwm>Oho$9yATAahSHYYNdcpzC3g}R-%eV1H)-MBltp(E=ih{!w2(3X zR?gykIg9V)EWT5)2!tw^-^rhQDSOJ{*tV?!rHfp%raLBd83t4-yBEki=g4|hn zwll12Z*coA-^MNO)$81=*SS@#b*ownVS84s_paIK(X`FCeYbDx2Ah)k%C0F4LWbbJ zFdL#Cg0V1bBDe@>nhZ8%2dRnRBwA&ZJpVFkO$54qBV_^3`Y2<`Eyze6gamE)g4IVE zu!HZg)<Hx2gAPf)*-mQk*KO(z6DgY1bJR?xI`zN% z^#7_IpS(H_iTZXiD8?~xdo!@PfjUHDVGLs7AS4mVAQ=Th(lHF;aSVbH4BSzwK@07V z|M$A|U;oB`gWgZXL(tL+SSyCL-vkKun-t^c4! zYYd(2!sbkTMxZm0StJ-Zg+PcyoPk3eR2eY|GO(z#2%8B=#tTTs^GW52D3tI^goE#) ztCLXQB4!F&X(ns}t&f-`{xk3%1=UBKlNmTCGw^mY@O3c=HZlk_FmTs0@HH@q?iMt> zuIl{XBTeFY4@?rT zz?IQG2L5Z%`iT1igWx45;VUfSx47jVaw)&$*LWwY|6ke^bR?auC1{_loGs`~R2f@P zWh83#U&022l&wMgc4aI@RtY0ENn=n=1ZFUc>oAFFGYhLS2q-adN`O1C+zgC-pgVn7ML_o~ zuuFqFygUjF{HnY%Mv@wKQkwSSYIdNS4m1oa3SK*>%)q1z8n5Fp0UcP%q7UM-sDq9^ zV3G%4#~=(kF@^y$G{+6@6S081%xvH}1tHM71ra@N1uH%^HyM))+x^~s&zpC>YT5m|cF)I(?QcrytTH=1$A7f?sC2_ONPSei=3UX+7e#AdKt}2cRzHE)M>(sW z%bl%kF0_y_dc0KJrQicx40`vP)fjI~k0UmfT5Nawlc+ z{glP`Qy1S&TLL;wD0kWYg5{607u`*tcRhaE`RLx`{*Alsa~J8yPErr3mvkzWcF5&5 zif2>xXOQ<~kaK5{bLX|nHcpu6QnVaWAGsEYk$my z!vQVZ-Ky3)m92!=N02JUp=7yb!6NITC4LQ?BfIy7cJA?O+Ui!d+O={OGIFU{?OM6U zscfZt^}2wT?cv?~!n^nRHEs2%S&v*FdDN_j)JJX&TYTDf1$Aw8tzBUeRmq{{$;2ni zz%0bTzyaLhsWgULvpdpQ}5;MEYEku>iTT9t&YKDq+ArVu<{cO6VZN9{6D z>mx|51R+tbt5|j;bJ-0@=O}Z2RJ`>q z8iMqdN_KuO+4;G6I|!BU_)@n0bLrOil{-JT?Elqw^8bvp|J(O`&6s@FFlHK?Rw@I( z7Xzmo1CJ+zkUwY#x_AhKXb^)~AcJ@igLnuCNk%Zp$1zAJGYCX57}R*K{w@waDVpy* zXgG-J@PDSm|M@rkXPEn6uJSvB|5*l;4GhNX8H{!?nD1rK`N?JkI*FQH^FIUM31h9l^D3ptbj5_TbEvo~q_tyBx zrSzIz?lz0`4F<8348q44gbp!?9%T?c$sm53LHr7{%uPBM3Tu zlu4O^QwvlvF+--7j6hRMjJlwcd6+c7Gl{a`zLhZO3L$<~0Zms)BR@&|1f8H_r@~p$ zU7Hgo9Zs8ZGIzzT@=Z@__Pwv#^R|B1o2sqPE4RI<-1eek`b@+fC1=%~89rT6kc=hQvOS^6Mj83?5=y`Q@De#)X-3G=QcEx3`g2(<7x zYuTNG7555O+$&jeJ8#j|oY`lRCml}e+!0!{+A(daVOXoSSE;N`x~y5OkaiHisw<0> z6}yBHldvx6N+woWP#b_#gF`REGQQKTc$rJ#66eBYuEi^yidQ)nt+X##YMZy%Bz+F3 zJ}O%o-n}no!r|cdU66{yrFyk<)hegT)ehw=A;_k5xlQSE&)N-o zwPuZ5)oS}nsea=)R{|bI1Uq%&s1|B7FM~NG>sFH!3fk6PX8iI@9sgH1Uh(O&UNF9Vm zf-57?cpYS637+~W9ekk@bkq)9AEht5nX&v9uKEbMe?(z@gloJGtv<@%02;D`)JG5! zGMWcxgQ_Hq`lxvGYqUxVPi0iP^*wTBgxpao-tnn;$ET7VU&?lUtK0v(_1OQub0E~R z|5w87Yo=L?nJo$#1icxUY#7*`7zDf-gaa5v0zuV~Xb_AH0ByRL34FBt zBrX1{yZyKD{jX;IUqIz8lh}P0<^RG)pc}e*v_R_+_|*TiOWkGQKF7eZ5QGFLG6+p# z5T48+F@s5Audv}cDYO5oj-WGzKvj}6=!7?M-T$2Opvs6r_%nmZT?WBh3?kPUM6NMP zJY$o6&8_%fKn--{f`I0K0UZz$(E%NHCk?&?P|5&Q8A<4YuGGFo+*#l0MHYbAeUvHn++{VeM}c2EQeYL7Q8p z%|Kg#nz&$tp)B{8f4En{F<1+If2A}hd^ z(JIiW9mmH192>z>i~lps{>RYwk)iG;L*@~NfcXsCRSc}b3=9t73W5vN?gU|B25tpV zeFQq{Uz34PgMmi@)Y*Z|@o`Esfa(`n25xx<9%&F_7X)i(2KQN*z$6E#++Y%7WD{fL zkYeGM1!-Xt0atna3@m&MjNIVjg#%3Tg6kR?&|n?29s`>x2(cJ2up2XQ>VuIc1G6%y zCSnw4;8Eh0F|r8EE$&&-zxCSmz4s>Uf6>41dH=yreFr|a9{AL_=UwIQHznJiS8RS- zy5UjjrYGfFo|SHSS-$Oc@s3ZWyS^0e{RXLziVprOK<*rYN9qa={wp~64^$a}>!Y0A z{|mSO%ijvBj0!+28U7Tl{ROIwFzO>nWt6w(3A{eaS^fxuvQ|98tdAg5MOom=2(-ch zTpwjGxu3n{e%g|IDU0qTEVz}l=uZ05yP);qs~+X9ew@GhVfx~m$+Itp_aF7H-(;P? z$S84=QecyebCIxBCZ}F3gK7waqzi+f4TFd^1HT#Q9CB7w2GFuq$i6KBP-l}#kzF_3 zBCZ2oAAveb#jBi(S2+|e104;Ny}%@WjuW^(ik)~Qq+_>7&3aI^QUj@C*1A-$h0Oan zRIYZYTJ1^) zN6-qZVm)LEDX?j0IHbc>y~HMSl5tp}vRk5*rafqml1+-8Pl-oVQ%2PiUi)A&@YF{{ z_K!e!<{};a4X=!_)<=-D*5TuIpekt@sDA{hk08Ay$ao#nVBKxx`UtX;0dnRlq<;iq zLwZS&6?fTd9zyy@Icpywcab3dBa|v>{qw^0&+|7tg><06<8@CVB&Y%cuegKkOM#^4qE>Rtv&)zExjw*f`W>+yeo#)L9k>eFf2v{ws#*9|{xgdG=MuliE^?bo z^caWm5mupHO#Is!`3|!P9%B%>&m{JcLHHvG3I1Xh`_C-?pHucfhdk(zI)1hPBHEz! z58?(OH;5bk7c&CgCo5n8Iw4lX@ISBCe@?aUEQ+5PB(ir>`Oo0His)fze8Ey|Wxlt_F3BSl9h$TmN5l!+(+W|JB$2ms|2* zYt{$h=9|(vTiC+qbGbA!=;tuV1TgSAGB9d_)}Db*ssvR+NEkd0#-#w7@!?Yj^_4g! z7}!KX^#}_elw=lQU=s$7uX0L&qmK(boC2A8W`y;X*g-uYCSE2sK^9JN7EW^7t~i`RAgXOgx~T@XQe}1FN*Mkwr=9pY>B!fb&2P%qJ}X`KvV6nq@(r&mH@&Ue{H|*AyUHzZE4P47Ew0-2sdV@E zvOPbF_Wvr{|10;v|NKM$a}R=0-l6|_hhTl8L;v#*g19*c{^#ubpS9tZ#l506pgm2XjZ4dJWzN5xJoN-<4O`7Br@T2fiG5oB zH8M8IA|}y7x;`ArPMne!jDiNN0=kSmnhYFrpfOC)y$syo88^swEqL6rGB619>IT`y zHn|opbtzl|9;{pGR=&=?V!eCCT9;DLjYXC@3tcK!hjr~um~uR`#?ONxWbxzf5ovPP5R;}@C+8W(|C~nfxsJ?@cfjCH|<5~l%lH97+!e^Zzqjr#r z3DW0sty}{#8KS0qjcdhPuj)BXh#A!Zt) z;GAv{T#i`{!A0Qp5$32JW_<)7zC#LbYf5x6D-owkZr8I^2#SGf6Y;pVr+Ti+IKc>}453bwv21WzQDY z5kyeX9V`{hz~jvz8tp&%ro*lOPIvwbT>j5;{6FV~{{q+l%U%T`*17)~LJl%$uV+wS z&7iV~L3J^M(rO0v4Gc=}SoMF48h~mLZk2273YQd(W^qb2fp*h_R#I^>F!C@ks4(-H z3n(5{x46Tn2~sI-3OX)ZH{icX_isc@HsgUt{3C!NB*1QREY=#D6YXP-Vm^3tAw@sqkMw z9ai?|kz+-gN|1q?^Wyn3t5V3>7eky}v zIfGy%1G_&1XqAWo1A`)H>K}5-0lch2Fa$wccsS%hgCx943?L)}nk3>BX5bKFU=v_q z1}}bN;s*`vvx`CNBhZZi@ZF1$EkY2I6;vxR@G>w7Fmj2o@k_Gsi7;>pFt7@N`cMpP z;PnZRc>zvv2TGcOK^DBEK?yXz#2^FiJqds}BEg*qnN1L8;M0-zt&8c~-@NN{*TG-) zJ3g1JeNnXf8Kgd{-txX`>${rmAF6kMuHXN?a{up|L;s5Q|0+H7H}4>LDc#}!`G-NZ zPd<2z4!*uF_uzj>eU!QTfBw$@nVWy*Z~c?K@kidq-}#&VlyCb}yy-{L#`k$^U+1lR zmbdO1q%z80{Wu3SeD@48d6cv2N$%>W@G1#1UY8A?FiKx~H*@*@jAi#SmxDGUWi7o2 z8{&IZu=+{v^1G??uZB%H=G(s4u4IK~{ABsS79p2%HlqXvjW8zl5C$1f25~zEVJpyS zu{_$Ko0u5n89)IAnaJY<*V-(wz=E%rW(0?!D34B{ZETZU@iMpKW$vY`yerqam4lYI zc~x)ls9Xm+%DZrxYvr1tww+1SPC}M6c-L?8Y}f$m`_!#-uUqF*yWXXCJ)~ChuHO{g zzALnIPe{jZ-^MMF%E+T;J-j}0ty}}ClAOv`Lb^h(m1~gakDz_1inXp4Ydxzr`qgjq zYupypx+}DOS3uKN_lD(W`O^%ds+E27^!;)zBg$PfdVMj*;ep?Kxm|hodehpfxx8-Hc_7Clceor|5 zzvuA3`VH^G+K;LQ^f9O;G4OgZusSiY+cWUEFbH@u2>UPyc`^ulF^G7A5TpWv)IN|t z5wZwGT*8lm*^(h_oy)HOlGp#s-1^US;y?3A&{AQ(>;KiR{pa2LpCkD_gZW_wt*s22 zTNqRpFeuJtke$zYM>i_w)KuA>cznIp493y0oPjT# zTlJlY-ggdl(5f~eR2kWVhQdVj{|l=9=aBu+B>IYh{|STW5eAXN zEaJPFM0c_Xf)E4$Ar{dS4AL*zRX_8og9Zb|4E~E5{O1NEF+I@IZfV{BvihKd9i)x^ z%Nv4_v@xj9C8i6ir-Ze?@u<9Km%q;_c@xw#l03^Gd744|B%|m>Cb3J*QujFIAM+{y z7u5nC+8}1|U)1=&q&a8^PSWnbm;(q&Is6y51udl$xBf3;`CrWXKdTK0F@*o;D)`Sf z^S|Vp|AO283vK_exb?sM=Kosj{wpo}uf5>ERR0(8x_j(-=XfL5GC0p*&?{$lWm+jKvN`W0#M&YNq8Q4WYEo~kp20ldw0cA!$1qNPuc0oB-ei?3~AvIlN$al@4O$~7HpAJ0AXqkH$)=FM-**S;!W`>Jf) z_o`jLi}(L8J@~(9KL`~b04?V(I0W8pbOgMb?l6d52ww7CbPyz4eBgiaf&T@&|L5)e zm%HO{?vB6tTmNOQ|B=7xPws|adF#LDt^Z!Q=|ld8clqm|=dOJU8?1YlvFri7GJ@1Z znadw$E&=s@AS)2kmfTBUdOv6Rqx@A*E7v`*So^GM?UTwikE@p7%Aa#SrF&Ok#d5cd zNtV&AhJh6-F4=MxQ36^%Jj(VQ^46RZ226sQ9Q=xm98!#|0u0QMdWRL9gCIRqc;Mh+ z@CoYryCyc-pfyMq}{)oWdA*16S!2IU}KA#i=P z(xqahOWAVI>NSp~D_kqrc-C%!m;mV?fmBwm@~mFtRkPN+cAa&u{B9ojpk3tY zq`Xy-!{Hy0R3AZBhC>#IBUecvHn{gxu}IH z&;?1bOOfC+N}wfnJ3fFG%N_XIbofWx!Cw`d-vqQDk_(-{pq0VE>&?LAz`*9h!0!W^ z9uo8fjm`;pGVpsa2)Kigpc{j*2ZIP0LFyercLqUs6cJ%hkcfaAi)ys;%#T4QK=qOI z&Hrp?{_|Y=&vO-YUbOuA{|pnKGuWSIFxtnUx069_BZJaB2Km_x(sLPP<}t`#=G1w_ ztqQ6#1av_4k(kzhRm=af>I)eda$q^fno~T1PvfJo?l*2N&?+?v%l~Tb|BZwH8w7yr zBL&OUX#tc*$$=QI$<)rLEm65pPe`&}65_bQ^tUy(g z1bE1f$N9fv#D9gh|Ay25%WwKGzUn{2;{ObjelRq>V<>sRkanISa2JE~3I?O;463aR zQdJB>=?rYq3=AHi#UTuu;C-f&;9WWF;Q9g<-uP&w`ba>TL0FAVSd~*ugHJ-0PePST zM3I?S3Oe$}E(Thq&nXNV!~;zU!Ou^?G#xUp1X-=k#=ywU!YRqYE6X7u%g!&u!Y&H# z89^3Buz_Zym?c?xl{p19S$I^yJCfjYTX2&Z!My-(22M>)>u5fWXa-gb29+p_*vUC_ zuC;If*s+T=r+ke#U`(JY4f928t?mlZcgeB;CCC1k9{XQ%6jT|3cETP7 zE&oQVk8*eX&D;4eebe`>4L=LE{m)(p>LQhH`&Yc>Pucdbg_}MUZhV;w-UbU=@4fzI z-kN6x>t5undXl;PVaBot=}YftEPIf*@=4*E=lN?vs9@cT+|^G(+*MDr=HH0uJ?7i6 z-8yfneoVh|K(mZ<5wA%yn??|WqBnz_D`*->z=(m*fPq5=*8LF!FJZk{`9vxpJ*X&H8|r?XeS& zBuqIT+OyBEbvvX!@@m-PS-%kW1New~CeSm8(3eS9(-~Dl*5? zWo~6lTuT?bl`U~ATjE~1%DZ-5^6ZOngjq2q&AF5`=MubU1ZP0%BdjER;10q-u9d*s zkw8^a8t7Q{YZ;562v#@U%vy2_S|5RHqV(lA(w9Tm*FowdWD>T*0Z|`8Y9i3^>?+Vv zhwwTGGM5A)A+t!Bm-j(dgCmdNLD-Of6Ea!22DBw1f9=x(@ZN0@2_CzH*GCXWG3vk_ za(x6jbG3NeYXkz1(Uop}U$Ns;?XGXl2mW*&`rmWpfAjV)2~#dwrY+(!F9t0_U~>d@ zXSf^~M7=;r*o{HhjX}^6goGRzK*)(f2!;e5L2LnM&@DECE)4umP==rj0|>c+?#JW~ zl1o`FviHBj$^W9a|MP>-nGwA9pZD^AfouO|R{dv)yU3uoi$QA}gZe54rKJoC(-~x^ zFi1~jkeb3EvzkkH53lZT0fT?6TA)?moZ8^!cJlW@=PEFSF)&0iFwNwU+sdHI(4tM{MR_`i?^sH&1M0u37qX#eL|`^ql& zmP7g;o8%p4u?tM1=NUyWGl^YclDGvzQcv0CUvaDa6VV0jSCTRYU1%!@UP>ox2O6W3 zcm6N!3PQ3@pz2A+=D)1%e@UDFiZ1_^3;r9{{WqQXU!~!bY|e4ch|O%ylNj`B8Pv)c zq_P-g4Rj0@PT_S zkRdoUlObGYaLvfYz{JbMF3Q3w&c-dt&LhRiFU8C$09TMmt*6T1sB2) zy_h7kfVie#OiN^Xzno1DgMb5rq>qAkT}=PQ_FaGKPW-Pq_CM$N|BS=`vyT2RI{Ux) zGzuy_`M==A|GZ=WvycDJIPyQ^00?Cr{GYKOI-^v5=zrPX|K&UX*X;XWyYGL=*1v@t zzGbiakhthUa#m>bG9SY{z=g+mv zooAmn-ywg2OYsty@-=Ri(49(<+6`h6yfVVdfYe725?&dB4o#YSF%bzNS4@y<2#ti* zN{Caz!B;7PkFEt@I0fz;-NITQK?dvKH4&s5f^(s>Mc@um?((~k$_TVcZ560Wg499C zB<1x{(b{K4Ye5HzKyMVnrMM(4J_DcSy}Z0EbO zo$tU00e!CB`@QAJzmB8-E4F@2nsw8@e4Ds;2ZKr?Xkv)dm4VL_v{YNdk3q}_)cp}~ z1y?IBpvp+VfkD6yOMN8Z0IG)goe-p;3#e8CiMW7{{L(ClS@cq5?|+%2|3z+rsv+(R z|M{o5~}BV31nLr1XJb|1YQB ze^xEf;of`({{=N)GKgIhQf-se=@r*KqGWNNTN`v=xv0^91zXTfd8$tTHC;eQ^vIcl zE@0)?01ePd7=q546W0fw(!&ovGL8>?To1n{=wu#2-T%V+|Hbq`2y&8-pyq!*wg0?I z;E_794-EV-8TihF7C#8?1=UC5`Z;PPL_4O$;b+5eZZ`7dGdpWo;|kN$rK z=T{8o_ZZApFsRRDkgH@6D`en}Vc?Ep;EG~k3u9pQ2iFRK!t%%iG^R0nNI`_g}~!%hzq8XN=ZnQo)J9I!~yCJG4L?32(j^qa|_C_@k_AqiZO7C zf!x3>1X@SIAr9Ks1Ugz1$qWP&bW0AqsI5y-m2dWJm!wJ3Zj}sLu?&XAx(SQ?X525@ z^S}Dq|N2}1E3f=7Isd=x-2aLTAXIklf9cu(C1?H@p8TJG98@1=9|u)Tg(v?Pfl=Op z|M45YW^ekQyX9x$=AUUR-o?&)=F@xCrTL_N-4WZmJua<>!>67NnRLpsWUWs60=vpB z(bFy#u6kOr@lDm{cctrImacl5v+zz#|1p>Hb>>+MHKHepyH>H8WP+}6QV3#@^JI{8 zVHCG#5VU0A)nx#!g;Qi;Rsi=0MZm#Ao-H{ zBcG;i-i@IBLEiP7d>TMqAjQc@W zK7bb5RjzWYS`8Vmb1GR1IZ(*6dYxaxX7Ab!UN!5TOIO$zF0sp-W0yC_I(w!~&Mey; zXqDt%w%o09oofYX&#ZU-Cf~*_puMzp8(eEp`bTJ0QsT_>keUdMB%?lpRZmwzcN&7v zUxn94X$!7k_Kz|kGe^ro{UgX29i#$+R7ROAZfC8$13}P{I&ggiuZ)PPkKh#(gn`*V zf{4H`s(^QqK!bJbpFl?HAXQR6xJrW5LC7TL+6H*71g@msmTmh`v+GOa{-2$P{`MXJ z-@X4&(Xz)OEeF)XCNb-zF^Kvz@VSA;;>3L!B>Wh}y+BC9jX}(XLEH(1#2greY(NvB zBGwGTwqRt(AZ*7VWQ#z8_MobY-xfp)*nvn;W#j}hna718|CA+oD<$7)(A7Cy7yfgc z{m*j|w5*O{%YTOOmkeen81y$Vs4Ztuo6DdylR<6@gY-lOsZIv5RtBMJ2Hr9T=`9Ru z``NU9bLjl#(E1O$0!iy1hvr{y&2L;9U%9nG7sB%B{^!>DuW0sP*9mkw9B2Z_0(1z2 zsK$RmmH&bopi|%kb^r5ggSt_mo6U7V$7u6t{o~R4$EW@ugw#N_59Br?0oDHks{i@G zh+pMDoAgg6@w*HH*BJOtG4P&b5ID>rc!WXZJd^AVE-lajagejVK}WWlgRUi#Gx;xJ z0@^&Q=m1_}uKSHc={2kD3pTlzq6YsJZ9tbhaOr?fMrD=$#wzig6;>aC7I=$aVGzH~ zEOn1r<~5h{CvG)RH%in9bc2S3)qioz|6;cPr5yjuI{lZj|1atIU(W5nwA+6v*Z<-U z|AlQp3+?y~|MTlzWRTd+Adt$y6wbhI145jZ44jq>?A8ptj*Nm1pi_7FY{8@z1D_QG zuO$PwIfH-&1E(Pavm9t*1+-d#j7BvlXrTh91dD(?tB?v4zak62G7G;lE1v=zzamIb zP=Q@ofmKkBkxz+LNDZ_Q7qm1&lz~Bnkwb=!SCLObk6%)sLs*r8U6KLRyk`TixWhlk z!3gR!F>*6=ig5BvafvAMh$(Z5X>o{Za)@ZL3u`j)DSi>!>|68wvP}7C~XdfqzUPS326Fp zD!FsWSThJ4G4N?JaH}zK%7T4_c5*Dlb7aICL?aO-yR4e2w{^zEkj7QsONk7WfvP0Ph@3~wddMUZcqPLo7x1hQq%v}^+u#OXYv*3Q#-j$h;?ARHtxv3k zaFjUnJbINh`(onki)i&x(ws}kog*|hT3-pmMXrw^RT6T21e-s?K3)f}k06y1q#A;g zRH=_39jILJQU}N|9c0BFq)LLww-2bzB=jXiTk9^w>szgj>FfL$_2w~v$WDxLT5D#P!_h%3TcZkG%7(_il zNX!LPABj4EYaAQUbv8nlpejiS%oel(?Nfk|0@ffk&^pK#)WC++O1uuBjupGTka4#9 zlK&k3qDB zL9&B^tC&HcQaR`~pY1gU;U~=MKe%;4T_i5;|6JPt`E`E@=>6u=0aY>FI{*3fLG_V_ zE$FCHevSW}a{t++|FcW|=a2?1w-(X_RYv^U|GB}44_qJdfwuf0>LXro1tg&UUkHo@ z)Ip2lAoUT0=xqkRE1>#_?-aB?62HW+^;E**zpCqh6-UtB6G~?PmCgSvTY`|B6=?KN z!W>#3F-kvS;J?Knc!x#)r<5tETH)3I&!hC0LG%N&*i#nqXRH#pStV{WNnT@>xydT? zfJgZ`ui94;-9KW6|D{a-OIv`>`H`{vuk89?C-}c{)PEJv|I!ZsrJeuFx=V zX8(Eg{xeAYXW&`Sz_5UUF^qvBn1RijfytbK*_eUdoPo!IfzOEndJ3Ek1FsFJGU7G| z9S6r{#=vLBz+u3^A`d!&7H6LVf2qX@>W{$cBW3|5Rsj`eJ|zZDX$DSdMqU|aemQnw z1yGeFq{b$!&cvqz+StOt2W{B1NI;h7aR{q&il}jmDKPU%fR-^Z5ZT&iV_@KBU=?EF zlV%oB;S|&2lQIyLH5ODf6;w237m@*Y95CH4#Kf&8Z5S?Z8ZBy>q~cMi9W}!ueW^zN z7WuT*j4`YEGItpE-HqM#zx3+=+PhFxclUqoo&WW>L8$g7sA9_4^(S)mTaRhi^lJ8~ zqUo2|;W+g9%KYC9e^<5I@52g!5J>7-6)H%VtPOyM=n zlyE4KcP*E+FXS>vVo(WVkoRMda0fO2_$?W@^ch&SKx^Tc6u~P7#lTZ$Jm9fS4sd;q z=_wkCh)QbMX-71Amd%HZeR3hS#lrd(3+l3$gN|r`*F+FT=8C(Zi-VE+N03!^$n_MQ4Vh7bbci5z5Hbn6RBJVK!9(_% zhmgt$UOmAXkX_q_YahcAWG)F(JwZr#l?1PqAY*ip^Hdvmfv#_E-}$+G<>T1S111U6 zMXd4|r2QGVEkKCh5>(3wT0=P|9RH`XV~&z=p<;c4m3u0@;}p=|DrqpGqiqWaJa`{ za*4rUEraSJ29=o%3R4+mdKo0U7{uxsWZM`x6B%?{wKo5EIQgF?;}?U~83xUt44OX$ zwEhX{{O1BA9vx5>!=Vitp%OR!FK!CD%#K0g4+Hl{2F~{kLLV8VesU=O;!^&@qw!5Z z=a-P~KOxn}C)s zNZ9?CwEr*R^j|&jzgqZz#i0K(p8tg%K)ob+m;Vy3AS7n}pHb}}tI9?OuIUWiE(~n; z4BQ%^eI?v#pleTARYCi4cr6%sEf~0+L5Rx>bi@<4KLc+d1AiEUP$Yvy3aAr&5RO@0|Y5k+H3Wg`J8O$H`j3=eQJFpBakI*4m}2paec8wK)M=7>6#Duzz6 z$XRV!cgCdhxJ2$j@tgz39asGpe+XUiA#ll??A^c1j{VQx@il7s6R!#9Y?}`n7j4kZ zS!IyD%rIxURpBa!@(tF-AmmWK!LeeaL**8S$}P@yJN-J3WG%Uox%j$w$mnWhZWVI}RCZc4;e484Fek69zsF(7j8n(%_mJas(e_tP;cdbQNId5>u71Pj$-d z_pDyyUAqoaA3@eLK!)a!>m$#GO+L+AgW7ibG=NstfyoWt^&sR?vlcQI=Tx@brDCOP zgXr&03%8HC~l# z+)7t?R;>1^UgurA!Mk>YU+o4^Wz?|QuL0C$gA62k)@(#Z@zc-35oX;KKl3~sL24yP zcL}*NLSrM>O^I_aB3De1TPh$Va{ma<&RBFUZShrjl?1Jrz$boV_L3moCCI&3kQxUv zUWcbX%2@%vRv2=*FqkBxK7ydK)qiuV{$y4KjotC+fDSd~(*4h+3pxl$Ko4}W9h>5R2JycPg5Md0 zJ~D`YVUh+R7KL}5s-Jn(fADJjQf8pdO(3h) zA1MX>SFQbTG2y@I%`1O`Ge{sX$Mfn#BK4PLFEqv z*CPh@DWLnFIGq?coj{cl==4!VP<_Ov2A*ZmXW%kr;BjFR@nH}TXP1j(SIy+o$l=m1 z;?XVP(kWn&O9IzNO5jxi#20{&2_1IOrc?$I23AQ%9tB1|B}P6a&@mcJg5Vvg;A`@D zq}j!_*~PV4MDW)tGeo0S$Q2(q>Ab64ag;x0qd@X%vCNHfdFzylHYwz; zlFMG9k_AHASu4y+H#<~sv#Z=3k zIiG>wnnBQvLE3>))C9C4h+7MEc{+;%=(;5aad2QlcK*UQ1R*|#us{-!Za zP9;l0tE?M9%NU@=K6thWQXfGM^YL!l65Itn!^g80)OCUn$3YeH6D_U%wzreO| zk$v$J*UD8MHEW$qm)Ydax6GbnojcDucb1rmcS9Iw-JVb0C-juaCUzHX~O?o;4ferk#nKb_Q#egw|t%_MV_Urt^uj z&nL~f07nomgiM-qA$jh_S0pX zKD)G8%5J61YN4Rc3#SPKk2wRk83U&Ys4U_(WZ*Gk;4=Y{5H_z7103<2f$AL|GX@Yc zVc;==O7Vc%kjjYPiUEYceI-8d>;r`4H(}s60bPH?Y0ki4!ypsQGVQ(Q?*H6-|FiD| z9ZJKt^*`U4|GcL`_q%bN{x7`#KSTK&2FEK5=BF66_cEw#V^E&QAU}&iu8Tpwn?a(A zfjO1IYL3Bt&^gi)C;#)d|7Qq)%wY9`LH{m;@m&V1Ck&b|8C0J#sD5P7`5|ibU)TUz zj|jqUS>@6C&!PUGS?NEI!Y>Y)Z!FRun5Eux$-d#1d&@5OnMe5tzvf?1jRUTjK+EmG zwGyNz64eJC4JV-epG)a2gZLu`kuwZJrx*l}F$f%G5WK^y_(sa;zn&ZD=5Zrm(7{An zp8t(v|C=TL*NOfw>hhmK<0*sHcZT|(;tRp&?W_coi~qAF{b8`WB4zwv!x1#4#U%Nc zf$t3i-y3Gh&s++>c-6rxEA&Czi^MHKhb@R%f!6FvIDqDfSuvyJuy9Pv>bTLR3Fo4d=Rw24RVg}9KfF_%G7(ksMAyEH_ zMT&__fr($4fmackcL$Qv;&t%gB5m`jDfNCXX&=y)o z9yw+{1#W&Rj4l!zXm>5M61$QkgQ6FMya#BkOf`f>FICjOOvtTOA!M>b++!bbpYYeiNSrn}g>Ny$Kf5NAApMB+etCCgr6`*R! zscM6B^+uQKE$(&O13C}IO}~({{HA}`9!?E+1`c%=UPU&3WmaA(&>`obt8DSCEgX;v z2+dM(6Ghz0$TP<=XKqmKI=@EHVs9^Stpr)u?NI~TKm=K{4Vg;{?cC$vyv?(A17sP4 zW9bUpe9(1!Ryp%6vgTT6&9%v%XPGhEHfKKQ+VQHj?q$oJKv%>rvM*fXQnu2gYOQzG zS|9K-hJdE6$Oy95&cAUB2sLc=2P41wEs(m&uVJ%q{U+bK&E7Q|A*5H$das)GkopK$ zl?1JWW}Zhdkm{qv*@#*xarXK6nP=hk5u%;~FT;aWPmsN|uv+O7xP}6e@SA5LwNmEd z>)_2wpo=Qf7F|nUd=tDr{6^;Dn~>A(Ag4XRD=c`$l(h_c{Y3WiyU1+hHF(HtA+Xg- z(E2E6HLOZn{Wxz8;@A+-PHpgE4)FRYe>HSu53)@OQWI5ddsn^lW5do*4Ld)Ttb7#N zcig3DrJ{culU5vqkQ)P+B?E^!=zN~K;6Ll(|J*14GamWRe(Jy2rvD7JpBTLEGT5AF&^y4Oxtl?A34_Xf2E|?m>2?Nz zA~xk#$&OdfoBoR)`_HiOGegil&^9IWrwry#xx@df6#SQr`_JO}pF#gOgW_8T>96c6 z{{*%Fi|T`x$qDO&&L#!*mGnX5W@1|ZMKu1i%Y9}Le*;-Rz##UTMfwAq!WT~E?_6qs z__Y2*M(u>ai?oGxLFR}WfVxYd`bgq2v_29(!615yN$jDp`X?o;|AwCbjeJ2!*XO^U z?|;n@P<^Bs@t@E3KZEyw&65A3^Z!dN0iBY~u=YR4mj7Ja{+mtu&k*uX+yFGtCu#7X zS?V97&<7^rkLa#C`uud4rL| ze0KHlxE|V=ate^HFA?PaOIRX;FPpr6*b|~ zP4&oLY?im#Jb#Nz=~lP$?T$&a_zf}`cpX4{f<#={RYL_eyctByLAQ!Ch%m58@ri2k z@XNAtKrW<)IEEm}3Yu495o2JNWCbk?$J~)5D5UFS8&Pf?*`(%Es_0R!7usTxG}9q# zp>5#?^XyeRS*vxjR_o@i(ac(5Ub^0_WSx1@T8rX!7RBpK3s##IuF=n0YFoY~dg`Uv zY3DtgcH393cdXv%RK3lqdYenlCeS9G`W@a)yMudFi)$L!k+OKgNWabF8umQYg8&VTNj#F@}0&Pcv>=<&d zUguP{(!O|^S=L;$jM)}hb1k#yS>?>P&s%7px6rw0saxp^@2a(awHv&vR)bb7RIc%? zUgulC2|fx3uZaSix53!~O!aU5%2rP0BJOBt< zzXz$3vcNM+nQI;*q5GL@?&qunqqPsR*W3pk_^|GAG5EOe>aDNqcfN1h`l@`@{j6zc z{i`<_M|Sg@rZdR8Gw_--u_?E%q3WLU72EE4&+W#4JL02Ym8-OO2xWJdL^6CB;)&U*DAfWn(Q|>FP%xh+; zm(0>{nWf*d$baBe`O2gAOHdnBmx&qu7d8AZZVW;KI{!J9UNA`BXBI!jD0+fH>=c9K zS!T)CVmkknY(ZB@=(+#bb^EX7@n6U5znuGjNvHn|w!hhY{!2~$Z?XBm-roP3JN}#Q z`meSAzvZHzD!orMqTce@U6eNXuW9#RRQEr#^nV7C@2rx4xK%*|hR{Xjw%L*A_i`M2A%*00XNVd zWKMeq4m;3lI(}yc=_p3ER0fSE2CFFy?t2-6PB4UD0ilTN3^BJE;vX?2KV!&vz>xct zJz_6|Od|t>DtP}92Y5|2xIGLl0%2Qw7{F)#Fz|!sgBbW37zMx#K~R+hIub#IfkBvs zLz;zOkri~KgcxW?600}^hm@F_YgAUJe?k?flnI}N0l%ant44xp#00&>RT|Mtl!Io8 zy7Wj`S8-@(F^Ksya5yqBS}<@ra4AMf8DUHvB}+X^*Laq$aV}YH zle@&VdZSO%4v&UyHYKZUOIO;ItgtO!X;-q!u4I)>(F*hY#oEc!-RrkzExM65|6*{@ zK9`zJuC<$;Yd1U9Y;vvH>Rz|qyckdcvxk&y>{ zj1nu%or8gf>_LW*EZ~#?=?L?KLX5*mKtIwjrq{b{xqH=m(6Z|4^{&;>eXK5JDLcVTDQ?;+&xC>B6ok+!CWuDJtP{UIgSfkZ zL5iMVlV{=L+^NU&mOQN92&#-+Hh@Owq;LKgy8d7M&VP|J;A?bNf)+M_mU*xE&$bj) zAF-x?VDP=hV7i|{V-thMY6it63~F=uop(ytJ@?!ApK}}NRyh8x|9Lk3m)-bZdHa9a zP5D+%j^Y9&6c z{~U_X86@wsNu6aDKgA$%hDG)wm%=AGlmDs?pk)kN&i}QX|EoHKswDM*|C~nu8H_(F zr2SW4^j~h~e}?u~40#tBg10l6&Sa1-V&F+;VCZL%-5{y=U)2g!A8{!B=aBi&uM8S# zlrRDvjw%DLjAZTqL+T?bd(gUW3CI5uj{ilRKvj|qxO2qk_@6=VKZD9m2FY^_%2OHS zCo(9fGbpCAsYWx%1TpYBGqBq-@Hp@)gzy>_idk1N*v}LUUdxkkO(^XSQ|2?K%x4Ta zubB(pGZcPdDE+}u^@+3XpF-g^2IUU$Vg_aK-cmO3s2!x7WP){t_`tOfA83q=fd@Qv z#tH6Bae}q*LTV)@Hc4ha1$H47CO&BfPBGA69h(HVw3W7Xl%kOjqmT}#ur`mF9)n_} zyi=8+OD~Ig1A}HEgH#fOXb6K~0O%MHUN;6dTLuO*1_lcTUN2!I~*(5J5;Q(D_iAIw#K1sjZ?)s z`_k108FN%(Cb-vc%~^6QW8vkX?!Ayxbew87Il@Nj+#7a!Hti1TIsmDUyj!-2YCAE2 z>U40pfmXId!kGwv&{hfLlv)VM0A9QY9*O0U5s=iC({eJf2{8(+c2Aq&Rl3Zta+OQ@ z8mH1#_N6QBN>Tpz(JCgi#aQW?R?gc)bzUyT%%D`d7z@o>%q6I?i>Y#oQj|ONim|Kehf_StUcyz!BToYl|N1)+2Lk1p0 z=u{EBK7v$60v4b>caW8M{NO5&+Z430LE4j9KUUhW*`atvNdJlKW%tWAzN*~vHg)^& zs5Kw>*Z)^M_+Reff5j*NWgh%je*WL&DQFsqdDef<1>igQX8dQUd(TkynxXk8L*;vh zq&o~jXBgZLGWeZj$i6Q;^*_s=|4h5UmDdi?S_qE)|5=XwXFd9#^%xi({?C5&Kld>( zI{u&U)PMdX|9KAnSK9esVEKQRqW=tj_ZjpaGN|5QQ2WHB^Mf0*?MU}Ok1l9&IB5To z-hY1m|2*2DdtJHJf3qupXP5uTD*K*Q;UkCg7f#jB-0EMs)qiuU{^V46#w_)KP3k<0 z1n68-Ho5COia(UhLC3VJIsDhK2O(9b|2m%k>ip->I?kkWkV7Mxf!Bk9K?6KT zF9a^bLG$Je3}u3fvjlWNhkFa^gZ4lPX@L$D5;6QQVftUf3bcVp!v4Rc1L&{?N$}+n z(st0gNyzEHu=9U@um9X`{~6qVGT44#us_aVf1KHFC4<3i27_h>n|@)p4hEGp20m8? zZYKtzXaVCIPWQQj{!1CckFsQ3V<`B{Q1YFj{3k=@Z-$zG3=NISy?lz|Ie9dLlFCr&U4X+J|MFAlIz`59S6SombQM3g~y zqH#&G@X9mu$T9G0Fz~1|u!=LVh_P`?3y7++NcoCr#WE;mg02r1iDnQ8XW;V&U8lw2 z$iVKv!0ODv;>5sc!oXm}z;4FCXUinx&!rO1DsBrt)|CS^z$M8iA`d>IK^W{d z=tGa))&+X)N;)-jBIwyoa|h-#It6VXXSFIq6H2mOKl4mSrjcZ&0lO@xYV+E zxlPduyOLE_xl7Em7J@GHDP5I0|8mB>O935woU7M6Rc&yp+Td8Z-m!AMbM;1tigiX= z3zVWJcr{7dPXacjH!rv{}-gbxa1S3=%#JoVELZz~2#XLj9i6|WM<+}UwmJF@0ps#yJ?V)L8Y9UrT< zzN^~$E`Hp z{}~!TG1PrwsQJWG_JObBlVa@;gP#A2i~sAa`_HiTKf|W~%)9?{9Qe<%|3BBk|4fHK zi185U{yx^jps6UXqyO0s{%1P`M!Wy>9{R6+=)cDP|4tkJ8!!B?p8KD{<~@V@X9lHr zEGqxl)Ib|%`Sd}XXZiF%wGyA+e*t~))vn-G-$Gh{_|<=Ms(fWr{KTsGo>lQZliVjJ zxlgPzkC;IHqw{Q1=b5A~v&-M$Q~sr7{$Ilhgw*Z+YdQW`cL5y=Az=QWLFqpO?m$3)kXC!V18oLv6`Y-7CpVQ?(hx301_g@Uo-F063{Bu_sQo`f*MEjS@YLSa z|3WMOyDa?=I?E0;d87g^*ttN{E}-clE?AWWsfO6WTsVnVA3-W90Z4ts$ScFdC(ACN z#LB0@z@^Fny1Y*a6ckKCyh2Kh;$Dp6UZ84-Czt_(c)dVJ_;R~2a5^!tyMhp>4FiWY zXg-wL2sEk0W6v%LLJVAxd)64hj^SqE66F$*;ue%<;}8Nbv4EG@tlCa-L7A=5WwYZd z=lPc}^eLHVTQJWOyou7QYDH-0F3;L^mc`30ik3l;ZShL`($&^^OHI<}sYUj=SFO)m zawB)~_2BOPPF3rit2esVg3fJ&R62HLYmBoODn|BuHf}9ibuW9#wcwt8?)6)pt2a7U zZ*;ER>|C?et$qh&ye_EgKu%2g3peNat6Wg;$YB%2Zz0L(4AK zE~wBor8&50VnqGo#Ev!by_@2DH^udCOX%Mb*S9CGZ%=g3!Lar{KJ{DNDnTcE+ZQc& zC|==M0=k;qqh`HV^#;Fs&@zLd7Eq-V*t9*Uc}Gz5j)3MJL9M$2APlfbQ1cE%O$4cx znzsdlDda(pOmk8 zTE6CK_3G!POCJ`_x)Rg6-!)^NwqJ{sMFxYcFKF2tt3LQrW>wH+D1#ih`7Ou5qQJna z$iSumLL5pATq>X{iAMum8R;WZx|3b#Bj-t?|` z(>sebKTTJDW83y$_UwPzgZ~*Oe_<&9&rtuLZ}NYJ+V2cu?-{(GFeE*boBUsS>wl>= z{}~qjXP6H{j0^rVF8|ND{y*c!|4f_yGw=J)bNoN&DbTfjT*v=&pZL#p420MZgK8g! z{h+(E+4lTr+4Y}gI|xDQFVL-~i~h6C{m(J~zk26?hNK5f)<+mLHZmyhU{Kw~sri&g z=LL_>FFw6Le0t!DNe8^_!2q;k9JD;o0MudO(fZG>_MJ)b6NBt$2Fdpfl8+d~A23T@ zWtO_iAbFET;Q@~d=!`pMd(drts&=5`Ak>^eTUL41{xb-^V&u+XVDJQ&hLC0_18km} z8C-HUiD;}6)%-854>~dkbPPE77Ig_*=(wDu(|<|V|KiS|^HHT7{zH(UBd8`~0CkK0 zGX(r(2>itm@suI*8AIYNhJ>38!6z8J4@jr%v~IcS+^~fSQ6{AXzY&(H&|gC_oGnDU=tD!5@V`#;Zy|3(Y{GZ;()PaZ+`A3-`h zko$=s&3G1YhX`I1LHbIN`Upa@f$IWB1_n+>UKuViRnWx~JWA|BYApQ941CfIeA0|; z+~B}r5by_Sab@6j10fz)1`cP?&5r_Z3_MN@ z+_nsC77VJM4C>wtY_g!*4K#(s3hG?2^Ko#Au&@b$gPMVXK}bN^-l=erUG6-y-1#7s zv%oZazG?PCgS0u?iBm(m_9f3g@71&oG$mBJ(zbM^RnansvQ=3c+myLqQi(+;AL_G>>7(sw9%?!~O7*X=4-vr6iL9SON;0~{nYBp|!PAUz;9 zu)PAHnIKkGMp09CZ667nSY@9g^Mp2+;yFQ$s{&d#hIVa>>)VqsVPEXTLvfQ1M@~2t z+OsdT_h4x6!Ei8&>OB(PbugrTpMTR%pZYC+ji9?8Ak|H9+wS1D-9fFpLRxo)g4tk^ zy}@mJgIf0lG(+nnNUapmyu-g~yMNP8|E8UOjXV4rcLaj#qrj%^pv4JIppiQ0tWe_? z(8&+5{t=`;@~+tkse|CP54`H}1~1I>sswFQ@~T|pS+N>|;PnwQ1G#R3)JO11rP%4G zAqZX{#o_KK#m_t&JM(Njco`m~hC(JGwGw=BIBZQgXvH12DhZ`Nf>%k9*(C@GJ^5h~ zv{r)DM`&c~lIswZwiJ4|MaGgFkVz=`Iz4!OlmouHqF~j7;#H4}S3NFVc0X+>!N!S=vC4nj?Q)nkh&>UJT37COSj1d5Lg+ONn*)d2s^Qnib z+2>lvbcd8KNu6-8a>>=|b&u;dJ}ck+vUKC~{EaX3H@?i>@HltFL>Cj{owv{;NOwFL()5CGnm9&w1=W=TT67#C;r5AF=NK&%E;31}`pu^Di%H=dtNaUQ*=Ou>H`!!wFi78KReZ>+ z3OWW{+5W$h-G3EZ(EO3Q3uv^BMfMv5`vV4+L|A=i(B80G%r? z4X%vj?f%O+{+D<8FXQ@O*6qKvI|#|SK&vEa$N!?P|Ha(?Gy42z_WREe{huN3KSTN# zhU_0)W$)RFUNc0VV~9LwPx_i>Uo^Dz1s2X74wd(7M*6PdMVic z4O$aT0L|PnO#9C;0~}m)KuaG5w*1$h_n$$3A~;<^=8qtg{+yt4o`DHGm;gCCMeG%FM6I#HYe3qQNPm!N4!WARx=c&I1k?W;PyiMj=~9 z(7{vwpv&%sf*FKE7=!{r^G5>SpxTGu12mDu53ZQ_JV7F2J}kvN~= z5;JfI*GG`m7hr=yP6gLZ42+yI7GANHcDeJ+)28dE&9KN{Xqms*B5#pl`drQUsoIHC zlV+UBS#ra_eYb7tO30Y%SQXd6(?)3zpXzkg! z-KA!ePs^^r_PySX+pP>{j}MOW_j^`LylvY~JbJwA&k84f%H*^ljT8 z+;cR1;?dOkm$Q~$_wCpvrezNvuS2enh}>EQ@g^B0I|G9t3$LQ2l9`rifUQTeS6p3G zUSCYZ!i=7^*;95E%-o+b{XoX_196l0$4uNC-w#4jefy*O_DA#|4DUM-+`T`zdq1Q; zitITY*>gC&>tJx(UU)SW(!K|dg4=e7f)Thf+8xxo2ZH>YcEW2SSY?E$k9-@pL)OcI zx>pUL%E$+{p9oYL)o+6Ij}S}dz^mrql@X*X`c%)aEJ&p39pYJ46MDRlzErn{iD?RS0M|+A+--C z38{}Dog?^=9ddn?vE&w{GD=-?9o$R0mAVAFhCFTQjr3(V!51olCX%vN-pN}5LM6-Z z6)n9}Joi#^@1fwbrB<;$n$Ed`+M!J1R-iR|GfBL>>y0lJM&lz~y4 zfl(5KnB_pnyl^Wq@F;^iM_eilpw5vd19ZGjhe1G}fnOVhgbWyj4M80xVKW91V+K(Z z22oQ65fcUxGX_Cp1|DMuAxj1sPbT#cKI=T~h;IARl@Wc1lILH}UwyxL{o{%)FG{z( zEZzj#jSJd`3%>uNa?|V5bQAw6{Li-GzwEOAeAEBSRlQ;G zKgggrgF&T_LA#wnt%*S_l|eLxK_m;bo>`=gL86O6bTg0A0Y3F7LfS9bH9=<(vFm`B zZSy`OgsbpCSD}L*{>mqQ4Af{~4-&Gt~YAtu1W$ z#gTZDJLQZ^=heu0uRpvNH_z?|A$Ef;vRg!5yL* z;H8NR{xd8DRYnYpL5mm-*8XQOo&sCX0691gGWZ5*T|;Jw!1M2*(}Y-HwGyNm94t+m)>{&0Q!L*5y*OA!pgG)cKeFJNG)*ZnUr7=uorCwQ;*w+a8~e zeXfn$9l%pao{ih{mfgu+b|Z4q5udg_ejWR~+V*(2@AdCG?B8`bu=j9K-;vmjAmm~&8d2pRh=sfQCLpGcenTKpU{e65Ki3 z1tB4QB3QQw)V+eNmxIg`LFStvBxL#tIz0qgzYRW571A?;kdP_~(m8_ni69JkP2^dz z+O2d2avg-srm#MO58uUtyGY2@5S$IEhLA}}m6QzKJd5ZkfxAmrQs-ZV)J>Q*5kv&D zK1y431Al##w)7Tcxj4K&%3pmyZ|wunB8SD-6K9_f?>gvIy2d1Kvb1Xzmu?K`+$t^; z24-E*&C3i*;KMDY!8H-+*h$b9574Sw@LmrFF>qxh4LSppSq?OI!6wVVDi1>J$_!j; z3|yKB#H-1`tI5Ep!@#c#T4Kke4;szm(FQFD2Q7rqWe_%HP;eJDjnVL}a!i{XP`e>@ z($S20mkO8PF5URFV#|y2&Ckm>KQG<#vSjlM$eFH?10qV-Jug}N%xBGepEd8fH~tsc z_Mc(@f2O1Vd5`|*JN94d(0~5z|CKiWSKRR5Xx)F+h5s31-ZS{zX9#%05c!rNI6 zC;sP2`_EAHpSkouL*0Ld#{Zy^x3>SRz5f~7{&T0C5y-sgJ>gE=;`h!5yO6{~6|hE2D+~8J2)Agk21}zFc72f1~yP8Eoc) z$Lo|}eIm%(Iml`&$S@sdeFW(fL24ofNL>S}j~Ip71(cYDG`J-7Ii*Zk#SEaQq=I&R zf(lGFULghnGiG5+29aO}iEvP51i9K?IEXfDQYEA9k>Cx*P5ws|*i_h{JST)WY!W`lF}daI&k3K2c7 zwHtGm-O5~i-M4+WYyD=IhHb8m+ufUYdbjWO?cDF)ywkOIvsKXw_xi25OK;~bzZo_8 zs9(pv;NHW&o%{W|4g~fb4eU7@)ORGf|7gsV6RC4AR&IP+wCbKuW;=s~8F8z09IA~1-o+^SanFH5HXfs5RYRIE% zEppEYxf*gWTj^H10)mk1AY?YYK7ugdy`-3FryvMd7YWi`LaUG9TzF*!VIWsV$OCuS zsw6~x1YhI;sfHjVSnyg3`0#Kz3F#$4dQ7R{k-D^H*VC3=Pg;5-Y3YrWrMFU+-b!0` zFMav_%oTStmfrzgP`l__^31a#UHbwmR@tUbH}I4$ z*e3s1-tgaK+kd8({|sSI7$ToD#Q$N4{=pFP0knar z{wqBEul(e{>a+h!&;H9l{x5v{KgWgtjK}{o90m7}w*6;a_8&C#RP>4=^elt5 z3`#v9q}t4&(!`=y#-N_hAnDB@<^h@$XESBs&}LxOU|>-JPrw;6u-GuL6|##rams8L zQa!+{_DWRmlYlO0_>NQiKd1J89s>~KHvlb%;7|vx#}m;1FK+f<#`eFo&3|!l2S>*1 zzp&$f28(wL@>dx+^BEZ8K^L`42QtV-gYNQB%wmwwU=UitrnZMo<3FFle{Q4y0_LEl zbc)XZ6+QoJ2md!n{BM}_Up?->Lim5VfdAs2{~3J#vxfXk?w&Gptk_Rr$`}tfZG3eKd`pq@Y*{xS~#EK!$(kEI3u8EfYXIS~4VHN1~46Ysjb$9${@Lk6MTF;;is!%|aGLV%Fkhvj9eFPyv z6$*G>4A&477pJ%yue26}kUEoyCYz83yRarZzZ~fFZ;-Lf3=CXM+@cJSb0qlPKsOGH z1v7|;fYy5p1v2mjFmQTmE~P=AEjlX$(3NoRNc_OOQoKSJyhsE^EF^!BVHfMNWl_Ec2II<}bC*TWp=T zST|w1xL2b=+N_*K*9unL3F|xLUcc3=X@^(iF3*OY&NW+|YqmPoY_h9ZryM)kzH)uu zvO76TZiV!LIzC=4J0QrXZI@5$KA+Zop3OTQsy4VcZckfqt$fpyf0<=muW!-9@WyS)y@wJeo=Bf@K6%#V#2FXkW}J(eb|!l2$%skEA|{;x zp$W&}br7T`3hz4}4(=93^d5`oJr>>r?iO_(4(m7sLg3y|P{*F2jy;f?2vQjZw}Iw{ z0^9ZnwC;nVwp{^jXw?vCp2)X(yKnP$NbLjf4f!^JR&;|;w*%cC3pyCQ7PKS}bf^Ms z_mEf3ddP|e&+2tvHR~Z25Ts88se>RST89WSOov<Ct)=LmY%5Tt*E zyhjO{4X=jK7|>d2@wJr2*HV{WL#vNcmfT5QdN*z9t@wGDgL@A;RIM{en(OrSS3J+O_G6Kih)fUgjnT4J2=?YK&!z;ta;UZWo^>T!y4RjXT>yc$eMYo za>dQ^b@wXP-!EGCsA%1zy!B7u2y!|Eq$Yxr;0rD@*S=0){Zek-f302rg)aV=y7^!5 z3g`p|rlbEE4*zG^`JZ72xX_#NpP~3AL*;w575~k4{1@r^&ye_@A^ZzN@COFJR}4x2 z7?%7O+VAknFwx@((~&5~My-eEMJF!GD1p|2fZtZain$3$E$r{by+U z$dGi4A^0?d-Bt#(wG4){L4$Sry$sr&pvp)lg@Mb4fk7WU5(T*6BeT^^vMC=!jJ1kpD8l|Hb|OOZohl^#0Eg{$C{dKWp)So~r*W z&Hvd!tFr$y^!{g<07Dc1v(5a^FzLTY&MBFaOW`x`XRiL3vHY=b&k0`Vi3~bb;sNu` z3-%k4fp(Kh}zA-pa~8r zR`75flDm{YLqR>6up;r)j_8@GEjY=g}BxHs$sA@G_6owPajmFshs z+{#&UD`L_K|IYnBZM%Hicl);Q_UqW=-*GUY^N?T1KG*uKZVlU#=3Hvt_n~mjz38c@ zBPN{)n{X_&|0r^O6yARrw0?N*rTle|Gp8SxbT4LLmt$ZMWdR)(3ken?cOh}G@QMpb z87ONznz<9OQK=l!%4uX@Qg$m%x z$gg>iU-KSNrPHzlJ}w7dsIbky2~-*RG=YZZ{2I3**GHi3Mc^}6A(KR)OUY|CdR2pt zNQG279#v~049E--q&|Xlh#+i8eFW(fxtFa(Mv!h1oQ#=t3=PFiIT1JI1cmhxS}!Sa z=GjDyDhW~z!AVGc1R>#-5fbAv_@E)sARgpIRu~DoJRMdmU4;+XL1ZBn6Qn+ZjNPRz zy^*>EbV+*V;_JDKZ{{qzkv`*WWY_+n+I5aOa}7f3r0g;Tv;!E#tQj~p7+91Um}J2x z{6h*!NR7h{UTO=egCL7+xxig}hzPts0#`|(ItWs8fwsnjCe0Z{KvPT1k_;@;AjBxm z07CK%EJ_T#dJK|Q3~K(OwrQ$Ct=1W{1Dbb$y6tl>XDzz}Ibb|%?W62<%^U8kp7yH#;IQIURyZm4D&VQcE|5*hAvx9e)|(elmoAVhDf9)&8GwgSZ7Z~)9GU)U&sJ4mvFH%q3!eDTKL2Vne+AluC|9pm^%kl(FLFyG;{wult zS91Ta;QAlbQE~b@RIBv*sAOa{~0ENmm)wG#m)TBz3@Nt z%>SZ!CzPtM#?HP|wBcj+vWJmV&I`NsGiVgb1kSN5-e=x$#f%fM&otp-;^ ztD&bDF>LtHun~k68zZ1dCP1()JFHiBv* z$hb6UXD&AbXjTg{nZ?1tAT6WgZJD#sDtD1}{sIsxTxwmo)H)A@OtTlthjtrf%*$JH zD}VXz@czT@joUq%cK9^!@oC=U-n7@PagRsy9=FC_W`(P4D>kOgyHd2~QS9`yejWS# zy7qar?C|Z}>({j}u=`+O_d&m|1K_Em-C>iCWiP*-w&+^y^s^C@PK5R!4e2`))^{Yl z|0o27_8*R&emZXM1<-P)C09%f7BC2zGcao~b1Q=eXYcRZ~BL|Fd`2pd`lfgKsvdn~l)CZ9nXC*fTr zto%s)<0S3_-3{BY#@2uTX0 z47o}I9fb$3kCNwJjGlTjwELh}!#3-jg=*nFk}jnz`U#*@efTXIShc_d^-|!PNDMqK z2RV0z3%p(kQu{#G)56)@44~#EA8fD=QY#6;xm( zSmZ#99E1$mRh-1FV{`%vZBzTg>Xs)@I9xFILjI!bMN4nxt$L8Z`eF8(hdFB>Wq}c- zZi1Zb3OUmi!iJJ-p5(20lDX`@>NDk)h}%(o><@PW&7rXyo@CL{t-b?@aF8>!e@Sl74f3g1G450@Z ztYlVf<_?3r~jYZ0EC!q{`J@B5oFV0%+cUfn}f-Akro0RBErs&AD5&;bY#)Co$76DFn}9P|KGNnC(z{ zz`Ws%Q}=bD>3yH0ANB%QR`vJNDi$RWo zk%x&xn1M|KbmuteusC>q1PL@oa9~32!bYp&8NpQ*Hv@+h=z?;7MJ9el2IxJlLd-mJ ze8S3H{PHY((ky(^(E3Qql|j~nQ8|=TJC;o=j!i3$S2KxUJ4Hk{T39DaSSN~0IZ(p6 zET#WY!kkOBJKqsVAdnoy}N&t8~MI=xN8y!)pu!%e;fLB@`^!xI`I1 z*I}@O8%_)i44f={vXaV1N~Zo+zUkg+t+5r;)7#e-PTWyE<7oMulLfQS<<2;rH1$m4 zl+!U&PQ^?)6*lEe@Whjm6OTtuJRUXSSQM-W1gQ-}dXGV>AqW`)?h1iAM14m>dk-Vm zM8RE$gTV7dAzg^&3XsYOJV3V_QW-(2Ay7>O-cRJ$vctD|yFY00_I7YJw8OUn#DLU0 zpvnlmUJle1g4IFrYRD6OC4@)y1`wlm184^m`0P{6`pCU<4WwJ-UbzN*==U0zvXw5l zJ4BE=$fbB097Rt&3PI=<6KtO>WYi8aixfNcBq+*|NO-LjGwl?fiV0pHL8>P>2_L(I zFd&^HWHK3i94qMFiaD2(=72`+z_-(0PMUWGhAu%WBS^)R1gfyEq|UzzsfHjVc;nKw z^!Zm)=3Gjhb~?J}U`X9&=iEh>G2I&8RWfD?YzkhWRc&mV;NduNa8q0mJov^3?g~K$ zM%ZC(Jaz^K$i8?81F}0FQZYelC0@iJi23d;u*0lO@?&`-`s~%;88M&(;=dVIStMBK- zP~OT%c`F}fuYQ`n`f2Qn*Rd;Ji*Ed_d-A`;qyHlJ{xg8@HsZPPpZ_xG23V0Z|AnUh zXGnR@5cG^84ZvVG_^I!JLf0@hw zg|C8;;PwB)H~)*>1=UL85B`hZ`!90mzvx}i8AJkC|1(|zA-x;_r4DNBhbWx$guzRWz)3__E66c#edUEr_5Td&|Cx3EGa7tn(E7-rd6UoPjg0FLDd%qzCZOG~qI&;@!FOAT z=>Hco1D%p8>jpaVPQe{?u!Dm0e|e|>eD43n{Qoml{O9fj4a6}_f~|#vtU!P)MVJp> z5w{35_{P8bKlk$gvQ?Lq>aWMozEiT{W5K#-;Zx7*Brj#qFO>3`=u)=Zq4~7$gc~w5 ze`ziM&$__g{!QJm)e%C zur6I~QMAIcaJfy%Dyza(MwyEYGUg@BI9I&(Vf56KzMcC)eVn#~KJACR+YUmISL*@S zhTRU;+oC3)DP8xxao6X#=@K+ z0#!q>E>g&ZV-ZtMCC)jMx#UXmh6mXzZbbI&3hvkv-MC8DIfsEo8+5mef*XsvuZB;V zQ`&^^>b1%3J7c={rA|4XGW$yEoU8G(u0~J07&{eI0fkLE5ego53!88(dgAfuiN|C5 zkHz#u*CastK9HISROx`bKM)4;h+KFN%Bs2G?t{VI2ZOo}1c7@-NOcfsh6u8O2vQ9N zv}_M(*$%0LAhSZg&D-H)ao&wvy&JdsG;Q-~LK&9>ZC?d%AOh7p;NB2~;ZeN-g50at zLy$-1I?pQTp>rOU>ktUGXaQ0mxq){TA$N!%T_nip9HcUGEm;n)j9iMBK`J8%iCiT` zLDzL4W|1HhNs#&|YRU-+16O?nshi;S4zBtL-eZDyl#nYU2s;s6B|-W}$#X9w*GKUF z5u`E#FKM^}t$o0!f~PIIk+R@g^4!aDGtNa!IOg5F)4pJ-PV6KFuX;Xv*|1Q~yWjLt!3iXp9ZNHqizK_0JzR7pJG1|fLd12hT32RhS| zSqQYAfmNJ=T^4i=w1^(Jf|aaksG(=3O=LrG!StBMby@v;^5>kdTz0c)`Gcb65AwhW zdOg#sM>(q=WvzOY0Xn(pLCz{rr30yYK;)`>d2j@-kFr)h$y)UUQXi=t_-}jjzu1HS zTvz@xocPai>A%30|Drel3!es0npgf|NPoo;^^PI(Ekocn2KO@z0cRPKA27}SFS-ug z?4S9cee-|8z5h+0{Wp62U-AN|N)oyPt&aq+gT?^G?*13K`(OADXg!1A&HsYeK#1Ye ze}?n_jqm$i#mw{CmbXo=+nI85n997&b77onlk^Af)$)L-#+I{(mk#5E9e@9S_c={hvYYKd3U&{={PRh~4xl zpZP0Er?2wX|E0}Al@Xu%e;#$vbxXqLp!!J06;vO|yZx7U`>*KoU%?r)v|B#nKU>Rx zp$Xtc8q;BG;UJ3;AT<$WQQUISG?3`V{{n0NE7o08th*LH{YJ@#kA>@>r!2bV*>H%# zyi(A)!>MGabKALq$+u-^{nlFkpKa5Bo*n<$cKv7F3EH~FvJ1T0WH)G8KJ)JX^2h!w z9RIHrJq=W+vrEdUTMEc%GH{7AfKH!ggLQ}?=@W945Er;91({KTq*cfq6GRq#;Ssn> z0@X)6(hO`eEIjZVV`UjQWVqyPmCQmV>@q~HQ@KqNIgR4kjgt8+v&5{6rR>Y4919dZ z%H>@OnbbqXol5;G*SJ<~O`mhUe9N<#nWq^ftr-~PS-9mvQ>CDT;2?WoA;XCb3=F)A zhJg;H>+Fix*px1Js9bGVvCgJ!ja}Iq(1Aadn{0~LnCCBzpMJJ{fd$Luk(m+$6=rLLmtihTpMmx`tM<7c0Tpp-cmQoz?LfiAFs9elgOb~z##6j{eWX%$Ugv<^@*bowu>mjugq&|X>kopKha)384 z@PoTrBA^jJW<>^GZ3bxve*F*`mpuLOM$63U-qkCE`wk?|IhVHR8t76d@D+ieYid{C zgPaZlsgJV2l~K;BC!jNnR^H25{UC1@2tm${$Xk6EjBe+xzFn~DUcst+Ijf%Jta@Uz z>a*+SKO&d^D?R+rbK^hr`Twj}{&QUaFMI{Gl~rufe}>fi41Nz70v<8=J!bHJzz}dB zgxoJML_B2Z`YyiWKg0U}jGO-p9{jI$>%ZE=|B_e!^IrSUcl|%#wg3Fr|MT7gA^to6 z`C&DYw55?B9w9{SHU|D%@A~b59IVDRt@kvI%NjX zEzc6*!AxT7Ayjpct)T4SwZu-`^0^G^ci4)<=p>^xcwJ) z{m&HiUm^q4Ct{oh?gTA_t%zF+uGyFW2OU7M0!*$1twZ47_@8(Ef3@!WQuUV;=iVz` z{~>qvDzuWYRWzRMV~F#{AJzopM4kTY(SPh|5^5d(H>A` z#I^4~`|khZC;uy7{cqHNg@N6Nfk8@0%#2+`7M#W)M>Ig{8psMy5pc5#w&{RD4AdWI zkOJ@2Rs?q{6~I-wEHi^38>0v#moO`zI0KI)=y+8wanOn1Jlc$+CafCak~Z1=Ho3f3 z*_>AS+%|Hdqy`&6sz)e9N=+CD()u z0~i=27+EF2gR%VJ78*Nvs1UN`Pf*^-&!%{dL-_`WsturXd+N8kG;H%|+~v`@%d2sh zOZ67J(sjwRE;sG@RJ#6YVD~})&iw(MM*_Q!2KF2e>^UCPdm^ysIHY3oYd_%Eejv2x zC}=Ho|IzT?!;lg?sOvy5s8-q+(6uk9YhQ5p{=n}2UM)L zLZ*kn)b2WLJ(wsE84mRx6&2P zrOP15sbndS(!`+q zh|Tx~5BPWo&_X-Y|1zfkB@99Jk+{Ks3FH6LX8)y4|I3>Hm$w8l1l0dCN&aWx`NY8c znMEE{TS=Jzm$&<`;0{7^?x0?hD7Z>u3HdLR^Ph3nf2P^6`e?y_hQ2t}ZR~6gdweJ06lCzrGyv90zi*NhI z=&ASZH~qKW06G#7ULP^-`OgHdjM(=5=i2w5W6yt~BmdeUkzry|U}91L*F-8{QW>21MHm?PSQvyD zxrEsTBpEnFnfRoc`DGY5L>ag=C3Ssd?DOQE3S~U2g$+aTwA?^6fYbIejj$^~aWe-u<1^&NrsjKH-LWX(3TCW3T@ zz`I!?>*c`J5NNpqcw6gUNPPs^Llo2j+EoN!J{N$rhsd{iCvts+G-m{=k08gK`ZjC@ ztxTu`oqY;rfNLUn1%$N@g49Qlp*eVc1X{5Gu8iEu*SM9hL0-a*JT&K2vJ{RW)ewY4 zu8AOQNPPq+;Z+i(K7x$eftSRCDkf0f1RKYLNWm*6NRJ6JhzB9jW|U&5pN>XC@X83n zfLBS$bIv8tIhP1V@pI0_&p8)6=X~rOQ11zRmD1(Jxloim=W5E_YYFqNCCW017q()19qPSgx2Gl=VQEuQb!v^lD8 zf5Obu=?gAF`$R~02xc$44@Zz%DSgG=^c8nAm*3A^ejj|y_yb6t1=_B=;x>d07YA#( zle79>;>wqC%btmD_^*HJzr^GJ!uS3&U;NK?<3I1s|NOWAD z8LY1`SeyYNvl9$9=NPR9n!|Ic#`RJCzm z2a#OY{I{tP49v=)twyX0;08SM*(}7@N8r;2K+}0F4E!N7ev=t&o->#|pHbus1IG&n*82=R&o~wTOB()%*GEd8 z|E0Y^NZ9AUkoSLv@c&YU|2byd{DEA8b$t9O``KzF8t+G)}( zdJ&qzwPIlt#KR}bMfAx>_A5kBQ%#tyoxDINWuZytB9qKTIvI0J3zwS2^f9Q0^B8BE zL{HRCo|(J!cK-SY>C12UWOsu5Xzbt|0U2|Etgm1OPh)Y&Xt@Tp>w{5>) z=b^x!qrp8#Lwb*e_ni#wIS#2`A}5?opLacO>bZ#i6OfWTuLA!j(w0BmSAwf9^QM{zwe-L&wfZv6x@5*zjJ@&)RP%YZp6)IXGwL7S5dqBrlpQeM}jR!oMcKEjM4es0<(RV0z!tv<7qY-^aAk|9Dq!UpSPr>^{ zkkb;P!N(?q_Z<%dxgWGk2)RCjY!|{L!+Q^h_d-YHK!b9yRSK}0XkS<-s9O}$z6V|n zK@L?2Y=`!Yz{deWPgDqK2CbU&ZQcp0lE4SnA+MB!tXqKeh9IMJ5E8jQf(*)eRBwQf z%fT6-3aDzWTh&?+1Kc@st$^-tb+1?psf-}?5oB}@J~Rgzor70Kj>Su`)rCRTGx4*|B+fY(JL^pBtTWNG&c@9J zU800pA0^Gc5;fyO*pxFNy~n(o_c)cVGD)AK6WJx>S;c9Z$eYR`gc5%o)>JGW*`JCrr=eBPqVS&Oe`FS!OeG!OZ1rmUs+;3#AHos8vo(w5&z zU3NQT*}aTq_aLLcs(3}&YoEWpTmH-pO&hUSlQ>p>@Yi=F##_2R$! z)&G3wLG=;$RZ!K#1+J1fum0z}3aXFTu7W0~xUT#cxb$D>%72Cn{~0cV>LZ5IeW3es z7;Kn1jhHwLL6s4cDyTAIR%2k&03lXY26lA@P8HA*7NA`okj674B8Vg*fe${YO(oMH zX*rkMM+WmpoLZoj-ZIvp%7tI)KLgKa2G&;$obMR8-!brhVBq=6!2bh;c)l`ly<%W{ z#=!P~f&V49GJ1U^d7q^#HW)fZ(AknKgBS4g;vdRk6BNB zw*HSj_CN5@f1SA>3}R+6h=qVRG^v4B*RZLvOInz?rN(z|_AOs18#`SmXPHIO3agS; zM!73Y3s#%wuQASEVNC$jd8N%fsS`*mc;ec@OA<*RI1+lTJiUIuYJ~G`Ra(~>}1*&KQdk-egy_mK5a_qEY{_VTHo41E_>&QT?( zTLh_(TuN7=RYtC`<=jpsOC5?9I~Ff-C|V4ucO3E;AtSgTq#A;gMAk=;&Jpq+B}o4W zGH?f(SAthV$PCCj2Q(61AH~c7RYnOjPsf9+q}W+!KwTv8)Dq+#rNr46K?@sZo==>9 zHgU?y=)S|gEn6JQm)qsc(v9xa^e&aMNfXlXW0J9B;8$f}l?JbogESx^>x&q`%`-^8 zKwAn65rmWQDhX2Qz)48A2+|>f9DT+E8ua6kWf0V7RA! z2dj$d{pXPQ%fR-Uf$=^A%OeJ!=M2K{SY`gP%KT%I`OPZ(hgJG3gV+ZKvDfT!pLsPw zHy=UzM~WW*l{`U51Ic;+7Y8HGu>ayIp!Id26M_1{$NMY?FU$t7f&-m&vEx7UcF=+Z zhFzd_aZG#ua~=7wci=zAtXB?Q7gN`Nn{fVr@uuf3yWh^g_CK=!j8s6cQ^nqhzRP}d zo+hsUA-fHrjvFf19q)z|ms653WkO;0v zG+>i5;C?!zC_A4LgMc-MtOtXBjcC?(hUUj&vwm@|{4cl>v{*uD-+!S?{}mqm=Q{tN z>G*%K-TxUH?lQz|XRw~eVAH_sGr_WWdBLiCWm{i4HEvT(U94BM!K{3%amgn0vR!7S zJ57qWo0aagtk_~xz1^mElSAV+%j|g!@<9x`NopaTmPKpR=HE(Lc0FX;ac&JaaQ_HW zS2BV}6d_CdAR9}C8MqW2vgT(`Jq4+5K!?bIDx>3pT}Ok#2;44>2vQS4*pNC1LPGjPnDr5~4gzm$&*h4M8nrK=$8 zy5Zd-cufQug>x=i3_;-IIG|l2p9W-LkA;m1NBcfp|Z4qYRn{5--f=HAqv{laF=Po|^)#cuu=zx$u>3TXa_=f;2GOW?+Q+9w9@>kQ7f z8SJisDkIBFpvnj|WOtgu^aQ9fa^BA}?Z48N|KgYbt33i$Mp9Qm-6BYR#0~2c@qmZv zAmeqQ%INBU{u`hwiT~Pv;amS1(l;=01hNacG4VPwNZ2z-Ixt8%GRe9y$-02pLRJiX zW}uV$Kv%SDFtDqDDij9t>LYM_fPsg>Z?;L>WzNL^46e`M^^u_Fetpq>%$YZlSZ zd`f@Cwf{>RfRKa%=&n66o&Um`pd}4ry8i`rQR*WVZ$y10>HD8E?7u+le}>}U47K1x z80LWbN1&0q-Tzq*{O33fsxr9tf$Ag3Y6P|epnAmO)PI?E|D1cTgwB7~dE#&5fv@H3 z9&{c2oV)meLE2)yl$B8vu6j(nAF}AR`p*rnjF=9CDkD&pbO>Y)JGe^X zIPzcg;(zxY{~1D-GBD{gFsQIFa4~}por7#T5CWY;#h}f=YoS_oJbcYhi8+6im;Yy3 z@}C{D++q#rfI*Qfpsmmh2mY&``Y(Opf7qV?(v5c%6P7qN9!*S{qH?=cO-By)*&C7P#*X^>c+2Pi{$FXU_X#-5p;b*FSr&1kH|r4 zB1km^+B5{Ni9r1z@L6-9%BbTYqz4q(zB{M`Q5nH^5Bin)ZMY=wb-aNF97L z5xhPEt>s4DLj=(Ef>|Hg=Pg98j3WCFqt!>qH4%gj zsi7d15nBHUt&4t&Bz2 zQm0>x={sRobJ)E02*d23Vw?Xn9RJUK>%ZhJ(9{pxCD7!N?CJju)BiC9zh`iN%;0#1 z!R9>ZyaMyH4CW^otWGmn9b(Ym&S1Ght>U@h+W%^YKx^t0?|_Ejgsy;Qi#TEZBgl9i z?^V!3JD#imd9H$X$Z}o%FL3og&&B`xH~%v%d&i(#${?G{X$V5RCdq;p>B1JN{3eM! zx>3w3flM-Ppo^RYO&E9$82GdpxYa>5JE&ZR%raoc7o>TBDGu48EDqA1xY2mZ6W;v) z3|`MUwf_s5fG$E3()!OR{+~hgFSp`5AOt!A)O6~@+|3`W5B{&-{IX=t zqx5CBy*l^lq%8DlIT$kgUiiF+Ml1g~Z2vEI3}gb!A8D@UsPv4`{ax-?t(}dM8($+n;Z{K57xYoLAn?=n5 zi<$$*l?N>9j@q`IG_N~s-MHVe{itK>LD$Zs!Ci+X13DQrQvI6td$phT@41k&_*(qD zi;hVx46N#)o6wm=VAETW8Cyu*1l|oH>h6^_Cu+j6ppJdM9ft#ZjzgAngStpPppH^N z=Mmr5Lq07BLwk-#O*|7h;dCfyWgY0mxuD)-kfTujyAJzw9rA47?^?IrwQhU#l=B%2 z@5D?#=iheFyJ@dS{jR{eUG60tw6oXfCJ7U5y$oUDNL*&3mn}P=JP`gFY z%BTml%n!L5f>cJK;88?Sp9tI=g498fDWQq^V^&U0r-K*ERSFeNYY;}Y5 zfGXFxR<3cWSPeqpdIwSmK?T8gpu2!cXVAjA6)vT#oJ&?Zl`MBgTF4EliCl`8!TU$B z$_QE?VXchp^A<+*9g65Xgj_2@s-%bsM?zZnS|6t5E5&B z6gBM>q&|YL43C|8HhTK$$f+lT`wx4x>~N{tV3xB;Gon+{qmtJ=kwHC>LCh931Oz&P zQx-IC2HNriS*3tkb`dFpSq(u%Ae9jlxX;DTz#zgdpdzSZr)C+f?UQF6Q|FX3&8KWZ zP{a1<&I6GXPRCEX7(3%)(%fsw^RGexGFz0f4AkYxUUolg=>r5>b|+)Wt+WMKlIC29oqjrE(lNh|J|ChQAt&g~`{}Exm4)<=-pB1oSIvY3JI8lpbpz6Khv<2(o2d8Bgg zKSTCr2D3Ur@5$=^%?1%YI>AlaL5-Sz)v`|cBBn{)I+36a$YKtlYX|v_7=-i~1R-^`^_|KvCpIh}mr}BSJ)&D#i|2eh(3mEWmTl*+s{uS5SUFP{~e5YKG zT<}-E>ilI^o^D6fJaewsR=K6n7WmNl^p#yxj-W1SkK(0goB`<=`cxTuJs*HG#{#U;6 zpJCQ#;p_u?ZMVZ0-H%`OJYmIM&xyxus<%5g?Xzz_ZdG^4y!MEF>lsjG)O6gbgv_{-z38?_{SF4zB+J}Yfqj>}+E3@LeUP#IR&vWSMkN=}HRznu;4TuR`^E)} zM;`FrC~bB3N=SX=kF=0G1iX$e7*Zv59QAEI6gBZo?vne-v#y5qp78HF9Nc$2sP~wE z*J0m|gFfvCJlprXH0`pl*l1m{)}wJx_=M9@6VC*99rdW+ZiD$yWJBdKI zJb>?dfb3lj?K=)RIU%C|1Z2fRMDH>9NFAaw0@p|IItapm>>+|zM!~S#A0YJ+s4_yW zk08|$goI2IAu6NB-LRV=KzHO}t&gzQM9>}(xDIlwS_`R+++jDgLVG~q`Uqb8K)OPp z3aAWpy{uE|Dma4FN6sZHA@z|HY_9(>r}YNA%6j`n#ewHp?%)MsNO@!D7x=3 z9L4m5mV!g7CpZbIry#WwG6|`eaIJHI)Jl+=2ts15kD_6##1m$mNuGTkbiCoTQxOx6 z1a|KAtl0=UYcYO`l3%lgO&W(z7?Z3G1HT^l2rqtcIY!28IC7zf%tot#Am@O=>m$e% zk|>*y2EUqAl=#_j8xs&j+L2CHJ!y-btT#GiLI+ z;I6~|ZTr1zx7!u0vQD0^8``PpRViYdBdiKu4syKVh)H%wT;UG+_jqGdj*-bBaN4HG}yU*`5~`%l|8#`fvaI zzv4a69wJWg+z_Nbg7%EStL*r$g3bjJ0C$UcuYpGDxG(${zw_UA^%sVu)kc}CtqM2W zm#%gwTVtEQ$Rus1PEd=KLy@3aDu-?iyRt8XgcAe55eNzCfR5Z`5d#-o%&@h=Ot9s_ zkYI=G`GVI+knR!_c$5!vv>-o2(rUYIsA(F-nZP)|qN@V~SnXor!q)qi!{|Ekvi z6-@uj8UL3u_%EUNUsMM)Ovk7BpHt;8r}9rOm0x`7pq)yP`bft8zmhNLG;g_p|5E<{ zr33!+hk_78&OfF~Q2&T!4(OCQmaU+L2kb}w3!ejBQ)uw`zs7A)|3&!Bf3DO2*-wB- zzVrVL_x~4P`onwftEe?UYqor@*zi7k?Y;Dsw_>JWa%kNuXt(cbaj zm2;Ad=J!$5WVqV^u~YTrT-Z+E-@rOVaR>QQ2U8t;(y+Q|0U0Yjv->$_Mi3Sf9B)= z)vo?$+xnle@Pb&|)5yhllGi@V-}E$O`YD^nLoS`C>^lxSbRKc+J?lF0V%+?@2@CHR zEPs%*_)g}`t8sm&;_5b7M07C-xEX|YM@_r#)pk60?bE7VpAskT=e5m-?s^6_5h3R@ zaDW#%Ko;HcF)+yTC^$s)9*gQf;oW=Or|*P+|4F~T6Jfm|6w-Sl7`(6nGINwQ|5nc8 zdyx}P`*$4lY261ozs?K1SqU;u70|NPr(vUU@fMSkt(py|^;$0|*B=rq+REH|UuD^U z=_CK84*!=q_Fw$qf9<~i3`zg_EdDcS{pSTE7R~=Gn*Z5#{|lJ?w=cOG)czuT!pU&h zS_Q~%Bgo=9(6Vsw5krvb3ABV8wxABQvJQTn0{DD6cy9=@UJf!t1X`+%+Bt&uhFbT7 zCWgTG;=$H$gU_7IY@j9BO^!R<#y# z;v8gs0)zy0jw)6oDx=a>PGu`W2wWdQ`b5y#qLP)a#n3ITNF5^Rt|G|joJ-*%Cs0ka z08$@8svbCLm%G3&cY$rreA}G)$dyr4?;&_)6y0~2lKKczJt5C7#h~u1h15zAGIHuk z$dFysv{R5>O9?a1#7{pRJN0Db#G?Tndt7QZSY$8Iik+n7*DPdL%&8mBAnysfv5rF> zbnXEt!9z9*A=fFCu(8%h5JAWZV631mu3QRClBPnMt}-@>nx1)v5v?}K6Kr$lyOu0> ztJ&n+ywksZcX0Q9&^GN!C!(iaOq_E)W$_JA&uB5IK7v$0kSYmO4K2M5VP`D9lfLM7 z=AzrF^R6e)x*RhZ)Y$WG+Ur=c*(h^~dfarm&^|GbCT`nuE~7LCwI~KDPX=Kp1{OUA zDHrL!Ywo+iOOChvXWIQ=;3Q~65dV450&Ip8{}~qkXDE8b5crP4b~`2HhH{8iA?HY zjI!{Ff`l`?0)i|cf$Tem6l;+EEZpEKi5IjS zB6Ob1>MsH{{~7#VaqIjSGX5`T@?Xi~zq%a=sak{jM+&B(DoNfHBqe3|Ur6&mx7vRu z*>4QuZy7{ib1427(gt0AByIa&$`w%`Ne6)HBfb!n`iONdXrB@5Hc)-Re&oNCBStWR2lJ{1@%vDPW{)~@jqbE`{1RYsyBbC+VnAh630JN|HS|uF;UC1kS;=l66|0ZYu zYaaSoh~^Glr`740HZVocS+! z6ufD7+kduGp!!JZ;(w9D|Jlp0Np`*nnRh*H!^^C-4-yt%bLl+g)qf#+$^G~hj}z9s ziCFR^X!1p?>Ydt2v!#Q&8FVul#DW-D%orH785ne=>`P;3-tz4{mA33&#f}dN6ZTnW z&jhX6V~_?f`4$9|T;O&iJGjZg1zP0KrDa{PGJVd~@R^srdyo6|o$%{B0a;%c(0K&3 z0k-3?SK}V{n(Yz2C(`ELNS<}oul;~q{SL>Ptq#>&e46*T)@*gESZ80l+9YG9VrUz$ zZ=X-k*?{GrJQuvSo_@=5?gP66|84jGmpBMQg1f<4AmKlQ^J@l;{|uV{dB6x#A93h` z?)>$ry%*B?3Ua7vWZ&_q{u7WTUdZ(k{FEU`eFRxh2im>_*+T>-;k_Zygc0~sJmd}$ zWTXxAS{3EfWws(`@t5r_em$&Hot}em61jSc?mLV;jt5~w`bWsC z9U#3VNCgx=@hF}>O33Ftz)u{Co_aD8hQRgF+4$)o6gw4k@}U9U`o&?cM2K8Nf%o+V4YtJVfKYz}DK6WqB!9CYE=DbNDK zxffIBUrbwgIb-qF%q7>d7TwQWcrRno{ftHT)8^kwntd&C+Idhe=r|NqztgjLg>CvA zie%>CUk7 zKiB^M!WaMZf*b0fDTCSn8HztJgnwpmdBEU&ox%1ZgV}KqvN*(Gd6+?G5nsYJ@iqU= zPXD)k`(OP!=s;BdE1>mZBkAVYRM*Z%Wf1tG!9paDI$EB`sK|2MhvUtq&e z@6O9nGakiEyd2hj*1Ku9bN5(|7Vo^#vpi{fn^s1-!caN z-zS-x2&aYtI4y}*) z&Vj0ah1>rXZvU6O2CB#TF8t>?_n+?qXc@fnrT@~W{u^!jufF_C^qMaT8-HeRdX>87 zNy5_m;d8EePdx88_6p<&%~1neTRLkH|Pa7v1vzv4*CXNc%cHCt71_G@1c+YRVxglT=L!t(=NwNxfnR{ zbjp%@F*DBicJAf3&1BFBVUx3CV320zRR*0Q44NW@ba;6|Q+^DhBHEF0rHeDBo{#GW z9R%dle#o<9uV=?zpO(X(jR!pI_qf;Y@~qqLRlg&7($V5YSF`7yi|E=NP`Apva;bG( zx2i{}kYO~ZMi8&23xlv8gHmSb!WRm=|BGz|U5_Gr@IT`&(2XiwJ3)Owi3$H1QvS1; z|7S4#&!zpJ16&#LX#Qta2O&X+hgy-p0^81qbzgzhK9KqJ$O(rcCmf3EJrdOmKH9YR zaB%lQ$id*ClTN!1Ll$gw2q#A;dcHo{7yfU)QnGdgtAPj=_QF#C1@czT_S_#g8)JkFCDJS@-9i)t(6P_6VUtbqQlpgV8d1F}{tc2YrP8)(0!GmS8XoNO_UvLh zprzeh;B!tGAvcsk5Opsa$QctmhOHM)130>T(45IqX3iblJK4R7xO70~Z!7avd z{pM*i9r71BRjhNb-R#q{3v_@<|KW%!Ct_!uiUY4{Pn>fue)^S|DHp>goDJ$Z>C<|^ zv1+SL;VPr_1$xm_mHay;oXUC4b2xP37*xVQXO9b5F|cVfFsU=Fw24v=tc}4 z20^dXIag(l{nxqnU-km{0D%dh)yN#j{&O4y4IDA-`_DN4KSR}bhL|4=J}($tZ-Tl= zX2%&UPBPdWW6<5sV7}I<<(KZE{~mY$8$bRJng_Y^A5tH&gSQ>QYa$2(GJgcGk9cqY z*E#c_Y0>NGnU6y!-wp0K(A1PV=SF!o8X!~Em=D&jNe_@0F%+fy?gg-FIUY4}_ z;F|VN+vf?3{9|^dXM%?RLV$C&|sYexJnWX2G>WK|1j$#)wn=N%WP<#1d>z#E>fasHwJj*Ci-B7ktezLt zz~GQ#V3!6lK*JjB3=CSVGG6K-eUWVk0(*{ow;l9o-{aQ03sf039`J75=UKnUyJ4qu z#YU&XC7u#s4|i z{^wo~x~+p@>wo4={~2cgWr+XJ;PQk)3p7&4rv9Hz{Xdube@-<}WyGrfU(WZPecu0& z&PySkm*DkLWdGra{)6Ft2O@h8NA`efBFLepp#8SsiV3oB2(p_MvSbc2Ljb|q(gn!q9OQ%_NY4l|D@0^{1n(cgYa-{OrI0=myfOlnf%%IZLDNI?A@vcY z8nVrq52=Z4vgg53c-Q{$uKke82u{LlCCt?hVSR_f`VK*ArEt`WDH66U98x!-)k>hc z33bR0YxNX6^)v*%+inRR|{=Z^{bY4DU`BJ7Bq?C z)AnLja%7b;V-VG6;8A2?mjSQz<_DE#pl$n*vI{Z+1S!GDCLxs(oP?MInM>kkU>0ED zkYW~4VwbTH)^L$B2~)94(DW`b4R0_{=yl4T;a=&&Xp zJq9ivP|t{2iGf`ibW1z03InGC1DhzgaD$w`1epqi)cBBdK_O?PbA#Ie@TL2ZC}04O z0wEmyaR!aosk{F-4E(Pi_+Q%fKbO&e2E{K7 zN{7{gzl8TcOqlVidehtZX%}T&&$8-m6Eyg*VE8(UT>LL|{=dwn{|eXsD_!|7aq7SB=Km_I zenxKiox10L)1hCD2YDNO|3&xyXFc+t1-t^B z5mF`X`_Hrww8EU{C}=!Z;tXg$i0|}&&SM~D{OA75U->V60kjK@_X=nPRqQrM0mIt= z43mE{bi89|c+D{J5BJXh(kDS~7P$Cd=-PjUC;x?4{AK8UKVBpp=@<fyD}A$PTSauJJLicT$8jmRamcv{X?e<;hDhpput*v+@N06(nv1H~ zun222^2o6YC^1N-Sv8+$m=EdzGA;hkvGTvz;{SZJ{|goWX9)YxqW_;kO2Ih zhQKGCLh2yo86wE-c#u=(AS8622z*;Bgbf*~0}svZgpiQh2ewuLR0n~sXNBF42dRVL zq<7s`2!agCK~DRD*F^Bn5wuD|8l!`(TY%I=kb}6P^-<|6$ixt2iGnjy=Lpg*f{^w_ z%N+`rITS8)1|yfE(k zB6|)-^@5h(L8>G;iKjk_m~bQvj3D(9@|Yf|GJ>6k7Xf3S)klyKJV<2(A!DYT0^L;A zvDd$8yJy2Dhw_yU`3o%4XPCwJ=!P}x29&9J=Bqj-N?U|W8F~w8I&;YxGm2_4@F_5G zNHH)$&IRIt4e4MOVo2G61SVv>4z2-mdIKA{Y5)%_u_}TVFpFEV$~&@a1__zON;wy( z2ha63~}R?SO&$v2Oq{}t~2m%I)t041)12J4_Fd~O5H zbTBOW&oBeLaVGaKL)<$Czk3XJ=NL>6GguyH2)bt2_Fru4f7K)ZO&|U@d-PxY+JDI# zpjjav@S+7i@KFq$XuYJXp!$gQGU)IJ(JTL%5B=Bfy)IpQ+`DkQb>echkZuXLS{};#=lo}g`oL-YU()%%p%yEd{*>(A5i|lOEf2IS_`iT3;f8JyNMNa$|J^7#S^ndOXAZ0wK|I1wbFMR=YB_e!wPvOdc zu~Yx~HbO6E;#v1!$%`{(8@0r+diZ(y`Q}DPT=%Y;)XGx z)((RVGrzI8qLYSEuxmhNLiUvO=5;kQ4wo;vTDs_R(%fqav#*$BZc>g|=-+uFa`L6{ zsb@2n--pykaZ~oVR4ww(no>XSM9r$R)ho}Ww9W=M7vY(R0TjQiDvTO&N}lx^{_SG6 zrJykZ2`>h2Yw-M(DyVnHqyg$ygT@Jkz}Zcdfm@YPL{D1FiA&Oukw=b0Sd~FG)2#k9 z!*uY)h*Q8fB$j<;$av1+{(-^z34=PQJ`z+0)khpk|JfD)Gb{XORRY~>ZyfR8tLd&^ z+r#jl?!t^<&s5tI$tNrbdy4$>il)H@IoUi-iqkkL6v zZwOKoK}g8WM3CLAkn^`8B%~&S)Io64vt~1-KJo&eeF~|Mpmk8ydbi4Tpl(qmV!{Yg z8M(lY;C3ln3BMJ>xnw0;ePmy>92waaF0(IMZU?T6;PsJn;W8)Cz4Iu$Tp`sEgtSAd zh8%JiK&l}KiCh!eWzVzCnj6*$IwdI_+)IL!@LDOn_h5MML2%SS>n2Ej6o%AAf-Jp* z^q3;RtHg2DN03?xPQu6WAPiW2bP{>xL&T(GkrR)G^&Rr>23=p`(Xhp_a;<&w67%di zMoAO2qB>MVYGu4jq@6NEtP=$cgW1$QS>$aQB+M9vwLqgltm5GA5@gZ=vLg?(C_~N{ z1lgF{;UbVdv+SUDK8qLww=9EzDw~u(pO&k*v7d@#nwEQxs!N`VbFQLehKxC69`cR?$xw4VNVc>r3r&Cv6ochP@=^`L{P8QOm{^#2E~{%!cjQ1po*>lH)6 zD~49kBro44&^g{3H~t$w{jYo-bfkm8HRy^3-YXymq&EZ^ql2unE@#9hp`I>;IZ$QUdTf{x374Tt|a4*!kZ{+k5-w+#PplKI~v_rFH|fBCfUHf;~Qr@qhM z{3w6(qnwquGnQVD?0u#hxrtNjzk=g`Iaknmsp158XIS;0c?+mMVmthw=Pc+vCE=U@g>U{BxdB2#-~*Hd&w;8W$xHtwFM$>|aGnAU zgbE${&*nRcfx!_v?h5w@hyn4qC4n)4F9_Fv}ie~X9z8Kys0p7hju)!(Z9KXW#|PFwvnaoGcxwgdWw zt8FT`dbJ-;oOdU8)r*q#pYvC}3!nciWX|K**>?i_FL<;c_3J(D*l}93V6#j8VgIgk z&TYs2CtVJiawc}}RmfCn^7K3Cm;!@nxn=rE zmc;)Ik^h;3|1-G%XE6BBp!1(mc)p{byGE&!q65L;JCqA>X>K-Zh)y$g6sjSM?_En$3_p2+|?) zgk1*V0p86Ds*k{TsN5x8>% zt&Cvx5oC-GQVl^!c!vnWfK)^FIrEWgBHOIFa1;ivk02zxK7ueJdq9;DB!b|z5^_xh zse>T1evq07jf7N85Hbq&AXW$$QWHT)to2d!)RU04;t^9$giJUR+`ZqweYaoJX3yGn zZe>gC^XFP-Of-n^FpQ|v@Gn(%Nt3pTlQaq9*YXflw&Id8<`7k91a+50LE~wR+~65= z$Xq!jFHo0+4CR5d4`^&$0Mz1Tm15x2WDqlCl6PTIbYqb708Il)IfAxL2^xcrlHk#1 zU{zrN4S-04*6cBWPF)7~dRW1E7E%*IOoq4?BEkap3ooc&pc#|1_JQEh|Ef3t%U}M_ zbKpPsG0<8sgERjb=l*BNe8SN5mwVNJ{&nEnODFti=mGVTMEm|rO#Cl0=fBFD|H9k< z^KS-i!?b(%U+>X>rEC8M!BrAuMK}KXi2pk18a~bS-x(5CXar7?bMF$jE91~lXH}16 zQVCw=W+Zm?D5~Z>wnV9|85grc}#kfy7ET;=0}C=9^|dQn>6jcUeazJU1)tI z@AhBD15_W$`GM*qkst!~k<2)L`B@Ku zswAevpejk=)PJEfpu?azj{j#n1FDQT&x32FbN{){f$B=$bKuJ85~wl~Irm@eGHCIn z*tP$XH$iOAh!e9)+JkAB%sv%;;8H3zKg_M2AhbZOZW zGUZI_f=d~TuO?4F<=3<|b=EPT#?DF6$N$Ed_{C@`NzicZ16`X&YM*R1#e;nBQGNk7uygmx;-Va95 zDk-%4U}(qwuujk=MDPj-!a!a%M@&tGRvAG$L~s&P6G2GW2|u7gIdCTkQ4>L`BuIVa zS+x<;8-g6F;9jv7as)SKeFPbxgV#im*&;lZk!$e^cxB`W8>4e5TxMUe6x1OqSn80s z*dcE*gpD~;2bm;-cZlq==RqnXTs0A-N($-Phm63n0Er7Q*?TY)31N1QAXO6DL=vP{ zf>c9TNl0Y`CQm>v^95ZiKjl;u2!XC!ikNZ&g5XQev}a#X=WhSz zZQk{p+$&ev<}a|!m}!_WQ758JC7@2xy-?gXUBo1sTRVtb)tyz|ky+dfG(g0o%)knn zT@nCyJ=noh;b~|PC$brI8v_5v{~R0tb8Z5i-evL);4f~k78iG_Ma72XDQ$MFMa91Ow3#cgF+RvI4PYF z7DYD}MK@MuSAJc8VeDS*2|=JJFzQYmTtIFy5UCLv=?e| z8~FA9D?9yHbpEgG`d`80zmnH~dB6XP0sn=9K!_prKT82a&jn?U^|_9LJb zZo(HqcjF3O2X&4_Zu}R#23lRfc@|V7h+h6Lb_KMsf&JuvmSg|Hq9UK#1!Q=&T0T1OHj}|7X|-I`#*$){gJwf59{VdCvUjI`yCJ z^nZ@C|9QcP{|x9AKIrLR){Li5Qnj7L$|Ie%jLaa)lp*be`{~)CFhe7@)x5ZhNz-M-4C%v1mh4$?W zpKu_e?@)N}!N}fyk-ht%^F*D8K!b9SeozPal(~+BLG1^isAC^wv0PyLUI+tn)?7$C z;=mvLJtKHE1ZHf5R72oC5$FIq*oM}v;L2ztd<7k3-2!BE4njgl7 z4!QH}bLOJfL38Y~5y&=cHYP%@j6%Bh;jfRd)=iK~3Q{paY9i3V4zQ&T#MDQSQ+XpM zorstOs*Iw*l@aKs`$@;bCLM!RNs!4V&`{o_V=0#$-o4^BMef` zW6c6s3J4N~Ae9%Sc@Jsk@qwx*&{zayDIlaXh1~LoT%Z8Yr;thtGH?MQ!BR{D44}1V z;!Z(@YHR)*Z2Ql=`9IU9|DZ{tNq<0p{CC1yB6QLj zis#aQ-6#JUCfs0;D{eA2?QT5{@!;F<^$6p-;NxZff1g*F=q z31>mjYD3M<$}!LQZT|~7fT|%iyZ?~-NY3^@mnLW#gIx4~h3)?hj{o<+^j~`3bEm#@ zp$i`tt-V^b_GuOID_gVB^S_4le|0V$_Y8&{{jL31p`3~hMDvKGj#t4 z)kiD-GjIIQum`jdj_(X;X}uu$Zd~E(|AnuEPEX>y462NHF8oKUkN7V9=Z~5XI&78! zK2(J@WeI^U(es#Vz8y3}#C`-+8F3u~4b!nf7TST=fwLY4^@#aTf$Agf)1dl@{mg&v z(;x(|k9bf07dQ>tw2NFHiCp_Hat%7qC3FjP*Sy|NP?Z$E_;LK|*J&#rrLA}rzw~k7 z^sDX@u6j+r;W_D^_ml@7Qy+UweQeox)2jcLRsUn#NzV-Dezafx-}cb|pzHs2FaOs( z|KIb%fB9t}wFG#?m4XH5%+jWS?~Rp$owN^$Ur3`1G6M^#oiQ^bc#N8f zhf_#~M^qg&pv}O?z%IrntjZ;>$;cxI8XITkV_@QCU`-d*>EqM;&!q*bk2t}V5tAxt zl89LWTp7v#XHogbt^Hp$=(&B)f1j4?J}uXSd-i~;B+vlee#qSuX!Q}OD+I2OpfyqZ zfxx!?@MUsvMsVA1I0|aTx0@AQ0f7eSAX7zt%TP^5C&R(gj@|lmn*&9?y*89k*lQ8-h+_JD75z=q&~tV;WZTU z@&aWOFRF))fSaLKYs8Z(LNGZ+*(O}oLe=|9UR z=(<*h?Vts9TwDLMZTv61{6Fi={|q@F7`)FjxIbX!!ZpkX7BCl=5z%B+G%z>2HkS-FW8iI5WF~gq|v}%iY zvXbvYW`+WSG2NB($Bz-pX==@i5`mX{;@{XX2Nx|j6k{jp>BvHTr3<>|4v;Q-6{^tZA z2gke~wC|Yf@PEFu|Aj7s#yJJSm60&`oF#!vpacDQFM;YK@vG4O5#uqCAb;!<@H%Qt zUtowZTiF$^&_4NJ{U~TV68jO*z@6Y>(7-I$K2S%A?cjfoBcQ|BA$1UB7K!iFe?IV} zlJMF85F~gGbZIa5`TyMKK@JeShL|PfzxH4B)_<|v{}rzMXIS}7Wy&k_<$p6)eMnyV zI(E^m@Htn4C!F)`I^ooPE@0*zy_xq-=0D|~^-*g1KmMiv`Ir7@SoEKH<$osdfq2Y^ zLCqV*lm8j|J~HsRv1%nr8AmZmJAt;Cfo^MJ0C$cceq;f6ksy1VA+0sYEFok~EJO`> zr3&b_K~@GvZcY(7Idx0WF+>dPjBFz8f(l$B%FO(7ECTWj9DEFn>R2xY)aoaRewsmpEHbo;$C&uzv*gF*8vdf*%{QcGo*WO zXwSZo?!6!c9yJ10L!J8}=bb`GXnoXyvTPf&jR?GWdl%$NR=<`VzRla=Y`?~B;7eLT zJ6j>ANkB+%@ZAuQ6S=(_ws;7|W15G7xAN$XD6m-Oy@V@`N zJN`?q{V%uazsvUj3X}gc#C`+SM`5oRqTVq?JYopC!w__rA?_u^%>O)FKqqQCzWc9p z_rJ>3|NPfL2y#j)W_=`b1-yOoDrotZ=(YdKPyVwP?qXoD11|2qWA!D&m+UCEC>wi@%P<^E7@?YES zzl_a)7M1@De%}ozfa)WLo&UuT|2Mq*Kk(>(jX4kPdk#4C9yClj&F{2Q$Ou#)DLejG zcKk07MzW6o6Zx9Mk_ZEc*|skGPM3>LY>kIO`*^EB_^~fu@t# zPl67$1JzK;s~8x};jYKa&|;7bV3_(`?+mCT#(D^JSFFe}Xnn*4S=ew0R3)(=2aVTp zpZd>r5>y%Sp8PL(8dMpHock|w?!Um<|NLh_{WVB^#D4`;AMsrQMJ4YQP;XA~8fXeq z;>>@YIiJ*~y-8mBI&<~M#AWxQ7u<-Sb1Qb%4cESlX7z{UyDqqI`oYj~pKtajrWya4 zW`Gt~gO2{#0y+?n@gV4U5QP)}8TvngC&)CxbE7h#`9V;91nKWXZk>P+4zPmP%W;CM zCrBQIw80=NRoKAdtPBiX%zP4{{t;-k4<92NXbhZ{Uyg}S7E~W`@`L--0fH)}+=~CX z75}pcxzg>>%??%EU7xf@a# zLh2)E9Rwbx19gkQJtJ`Kvj>etR6yYR$hUbr1i@A*KuS1?v-oc!*P(B$h~Z(J8Wkwo;t{}co{OnRSm)WL0|@GPb;JXf-F&h zR6r0Ctpc*onG3IpAhnNe)@*pyW1TV6Dt!h7S*A^gAXvyldr1WABgjqkkRB7fuLP-& zu#(7?6mpdWVZ%F0a0YxeID`Qiri+>as(s*<5rh#nnPSJ_%(F$%-^s1C|$rZCn6*CFr*7W95wr7*KW)Rf@HR-v* z8+<^sIgn{qcyR}*HIPZXg$I=-IYH~qcof*gw0Kn9ocB+9 z2WTsb;MxB|SN{uK2HhsWbm%|xPH=Z=_J4-XA563U>mB>Au;o8P?JI`R&ma``pCRNg zL+~qx4A2aS_<{d6FaMiA`7eAGgm}Rh7(q{^1GD+B{^tiT?dHA$nrINZ@}K|ue~t_P zT^|0I*!q(}+MfZmXI~i9Hvz5U5Mp4JWZ;x#m(=4^Fy&RU5mI;L*Kpxecji=cVN-Tw zl(YaH^u({tz@-M7&SR1UO$UJPmE&b#;8!(r;g!;15|HPTP-kG~0f!}g0RyC?ggmLl z3@fQ1u>&dJ%9KqusM`Nma{z5SQnmT7~WTiNJ6iv=LQ2YZC*5 z71)FDlm_NuA;A3ww&b{G01Ugcc|KNY#{h>pp9LLx$A9`i@0tJn=Rgx%e3wBr5!XfV!Go~#pxCed=eh|x z;)7xSC;15vwdcHzS^O+`*4>~fH+=doN!IR@u0LRQ`oHg^{|x=_89HAwbpB^({>RY! zpJ6KKvW&e1gVB9*Lhd1^{xWl&^TnEbL_KbLy&F8 zOgOShp8-MEY182dtv-Tq;k8mw*FHFc)IrGgQE>NuVyYxaJw;^2gnX7bq)GxEK!l?{ zf(+t;x=vG0MZi$_#N(m;M}xZd`LyitYux5pv(dd|xkK&(yYy*h@qGqi^_u<_+Md}e zPASSZ5t2rJB3h0-N|wA*ddvbU44e`S%tGLnHe}-xq@Y71k+KwRnT!lfJWSl8TvB>6 zIu2^Kk;YzWCgIg?nZ1UED?_KA72Wh-3~J~9JHGp`e+N_!2kKko9$t`iS>3s6G-nKN*_;f-0tE;1xC7L8pl8 z9Qe=B{0KZPCWVo`rOa_`$2Yg+R>`kTS?FGj8x2OAe4;(2j3zP?lsD zW963v)kmOB91tl!HcoLyPBD-yH2mE7#%>v!NAV_AOu^A09ugHxjU$HH;4hMlJ*3)??$eM;58Ar_Spr!793O)?ec5c1*w31 zo45NkZG#~1#;x#yILO2hVA21IWNtv-TmBSNl^Ak`3r1oehsm63bJTCd7AUa++a zkSYm6LTVq#d=R7#g0PWmA2c?kGO`Cx41q`F7J>Ug2ogL!1Uh^hbY&j?5jj{jWScz` zf*{opgtSSYVVg12CVd9H_Q7O;n*or<0F;DONf0uyb1xb~tB;VYB=~d^qyj=C@z+O? z?h`IQHP2WN~47|$VyJZEygA9-{ zZO9lAq^%B3`n0DQ8JPH4g%x;J9OTV{R9$n-!fH)pI$d*SGSr;YS^1me6sQF*aTT<~ zh36FLOf&WapiA6DxBll?@n2%rf9~G@3~3)3{BAN#{bO+aztpDx4E0|aBL6e^|6(Y9 zBeL?p__6;M&p{U@$X^2$O1$9NZP3OUaD60r8Pp+ykdV5G=h}acOQ7zO?BoBT%N{c@ z*fM|)!QcavkY+8U*x>~&uxAqntt4a>0T(sGu(i!1;2mM`bsdmvaUhLhPEe7>Ar4xL z$SJ|XBMF)>15KPkf}I&`3m2G#6i1MzHDu}mVlNK^L$;L094WK^N_L>r>g28eOS*v8 zeluA9XVCx8ko8+(DQE=(!!FR3;tZSr^Y8z!cJ05`^#2Uz+Zg0O30eJDbo{U4@?XvQ zzcRRUr0M{=4oTJ?v{qce71Tds%KFdH{)>GosNiK>4LTl;y9!oIDnV*6L|}vC9 z7rzKPzLlx}KilO0(#!s4_Ig!h#|U8G4T;B!n-;93dRIXVHUi6A6o6BGQ5)~G2b z!X_Mv=sOhJy+5dJr%%IX-xCv5fD~3#rxdFIIBRRI-hcGzk{fa^X|9 z6_7V&6V+zsQvj{RV}z8PkOC93{s%mM!T_1tplPMV$iOPZCZfo#WG7|ltKg7s7+7Hx z*=8QsEiv`E$5GI^G!nN!lhka7LC2SI9{wRQb#~8%i{ree<1Y*FSsCrTpJ2mVF;=2Il#pWD|onupMg;T)JtU( z232a%K_1939lSJy6ljp)9Z0Rj2^-`|7gCraV(?$y`oEmje;JGaq7IHh{7{u^HW zZ+hv!`IY}l*Z(VA1vNo?=@|L*12{p;@scI^oO&kaHD1P3k5Zbj5bkjug0)eyW6f-|sHMmtHWkC2xu zKq?@3ZwOu!c~ozJ>?eX$M$m2%cxVo?iW^=Td4kVafDFn(CW#;>)2uV6gKDMB>9(2EA)|AU zaXAPH=@VI{PQ$E>EK{ap)<>WVb770_Ahi!BiCG^(M)Tkm6Py9Bn~=Lp@cIbOz*-+c zPG^PGN)fPAhTsgy-G8yuPQ*?-5jFLA^t2ODH1#BS7wu`tvIo$u_Y;qT&NAvd0-C<; z*aKSgP_-U(&2Hfm+pM|P$x{s@yFmxXxtGa0W(%7q^6Q7OtNXDjIxt9Cf)4*-Rb*h6 z1dl8U!g@@Q%MKuGk7-&Z@iB5rFo>!1s+kMx`YYSUGdNT!q|TSv_uu&DfBwh+8Ls?i zIQpOA#DC7SpmkI7hyQ~P?@Il{;PaTl`5uGQT?X%a48hMBVy=qx{FmMfI*Ngz?jJ+{ zf9CC=b7p+s|F^yi8nNNO0b0hu5AG%Lz-EgeeIf|Se)T{5RnVn-%;)}lKL5`!`wjzx zDhPqbSRj2Ic5r0YFQlS@R7nEh`Up~af#-%8 zxfno4=5m79XhCMiA*}+qIm`?Uc3fid+$x`xtp7`y{g*KLFJb*(*5SW^%YO!w{|qs& z)n@+ZJ@B7p`+ue_pjkGFIsX|7pE6i{V6gbWZSr5z>A#u_6sbD@*Kh*OL&-S&mvQ*d z?f##~<3B^fd#1d<46VP}CWFde=GCClQLY1^?WmHM|4ZBgb&eqQkq~&g4Y_~BclAHV zd2s*e(toYa3k(b{;1ZRKlmsL#X_O$f=$FkAaIJdaKhr_bNllCg|1%y0-Bb&@B>f0z z7K!u3f9_MD^9*@UgVu@*oQ2j$GFShLo&)Wc;yd$S^b%-+1n9_)@^1_UZx~V^GL$}H zX!*=A@jKIE&@wr;{r`DSf>!RapZd>n`ajE=|6G?qcL8%=`7eJH)a_zu{=krMmZ9{G z?5h8=d;T-b1D{E?2sB*Guho}O_+s8nuSN2fnAP) zO%{Y$Wf)i`K!_O}<4lmA5oEv|lEvW5dwAG|l^8fBcqG)tlwg|zSVcgGO>uHD@$xaS zMF?nCGpK%LQ2QoidqF4sl|$JPuZDB}?Rx{jqj15Udqdj~hO`}koSzWXx+fUhfpU-= zhyvQ6lSJ^E2-Y)#E>wV5MvyT&$b=ERTLf7$hrDVIQXj!)=Bz%Sl(m8_EN01JYL(x+BxE!Pcf|CyUi)_INQUO6o$a$xbn~5MK zq&|YLQh+d^l@WM95oozW=5(u!X`spowLY>=n{Jgl4PGB%t&+gQq`Q$x~4CWQ!vu>2ZB`Tzk136fa9oEx<)DaL85=xMM zC8SD%48cJ}AaxUbmlnKA;sb9JgS3+&hC?blcm)U14hc}mY6lhu21^DOUj~7DV)}nY z4E_u2{}(j*FJk_m!{$GW#east2O{nN1rCD{`%dt7t-_}a-uoF0UozNz6R?EVM@kO= zRh>ag8q^%1^^vS2s50Vq|IZNnh9T!SL-S9TiP-BSiCh0AZ-e_sS3#qx@cKvueC!hE z1!#Sw-*=UP(F435gA?Kbyd*nl7LA3IfkBOdQJ*1rwdThEtVco9g-i!QRT5}b+W}C0 z#JV4}s)7CRfA(Xb`bgm1fBv(em34yWK=qO270|IoB4?rXkas|6k=OXkHA|^*aozk~mKP=REVD{XA$L3uM)$_{IOS*Z-^R`VXp-8h=PE z1$VvMzB0`E&$#kG!zR!$rs)3v%4h#ub)07qiezArW#!YBRMcZ&6K3O+W#yHD)<>Yq zNQQw~jDblM6y4CuhzmTD36Ec9@C+KbO~NF|z#$G^P$vOymM}B0i*XApaf^ww2?{Z= zMF{KEF{pp%HTe&!jI#fEHJtNqJRi`p4^kh2dPZ#rK$nBJ?S*evMLx*~exy%8GiYK6 zxh8_J!NYW*`UqYJ!5Q$H2tIQJnIVE#L-0-zq)LKwA$=kU3F#cU!KR8lD%ZJJAl5Cw z&zghG3L!7yhF3%I=^;o>1nC*!s*m6uBKy3BHhBwe^B39YErj=m;0#D*1Uiu$wxJch z4uWkU0mI-QE#5U7T}oHkPapyndrT;8fLC2;EUH-3n>p$nd{|qJn8T{`uxIbiYxyN90o5A8b zgWVMdn@bEXr`XcoFf9Dfw*hn-1M418!`0yx=rC}p8~=H)gAn9&AP5Psk9ewNcbKrNNW~Sm_TYG$PzM0m4RGC!NnoV6_~-{ z2_D&F6aZDkpw1VhwT#>w!WD=R2MB`3g*d0n>F(y&{4c2epIhrcm*#&)t^aKL{}~)# zh{gZcT=d^=`+ubw{~0pAFqr*dF#ODE^k2a8zm&y)1-t(W4xsgQO7@^@k`x{OD?0p_ zaQrXr@}JZ9Kda|IhNwGCc^??szOzjNbw$}${%79ypJzYl_&D*4;Ny30;;fJOuYp!F zFdPT+b%~yX(K;zW>bo!INTpz~uh_j0gTRANkLI0<=Sx7kn&(@Hxlf+id#u7ZxIV3_iqq4oho(IbYgUkoe%vv2&*xEoYCavTGl?rXaCH-k+V z1BV`?pt-5NA85%Cg8&1Q5NMc=NenbF%OuFa4C(X2PjQE=afJj68VMf3;}&J$6$fon zV-{lIQs5BM;1pJ3<(FgNjFYwOlkq;QANRng?x#WM zas>oAV*x_CR;&e`J6EyRt$Ym>frsWGtL7jz5rl+vh@4ATKlPrrA^bIweGceEZb&s`pEVmY5C{Nt>LUmjQY%5~BM1qp ziI7RmS}AhUk;q9$AXQT2#G|mT5NJ~pm<`=R3ta*+2~-)yOga_=Vw{MZcp|#*XmroP zh|Yb1P1}9zHhWgA2E|m)EVHBu1`$n~fz@grg^JEOiZ+SjM&Ux5u3U2BHGM+ zN}y|8m;^z^CSsL1WWy#}4x*gP3Ld8t;+D75T=*>Y?tkg~pc4QX&i&`O3965TulyH3 z^PgeHD~7Nq4DMGM>~AvIUT3hr1*(#4FEiL)W^g>tknoIQ&VOF;jW8Vh|J%LzZ}SXP zTu594%}~Sddxeb7q18v+S3m_R@0I^j5B_ss{Ldg6#K52nE=wWx5xmlYFd#J%q>4c! zAqvn0Azbi$Gy^-hO5z2DDw7~+IRt3ABR>OZMAV?L4CrJ6hSM79gNiJuU{?D!Y zpG6t81BF597lZ0w2D7^i4$nac$rgTO2!G7rc$UHPH-pVTKJ)(~w*O_VLG_WmJ*YlX zw1d`1%1)q8lc?Q)M%P~q4qq7}ZZYP*XK4A#IuSHH#kLYO^vAsqG)E(L0aPDJ;qD(n z>LbRJ|G6*!H=XsAK_D7D2rLZq3}_;ZM^uwtK!t%_9)!3Q7y+gGU}9hh-tJ#Dj$ph-L&O1{OXRL1`u)5Mtm`VCGe3 z6Oae3zEdo7h}me7{?fVVOHj+{z~)o_9lQKHb_KWZ03q=C3E&E77pO97+ZoWj1B`aT zyG7vc&raC59Awb~V$~d^Cc^9ofh(hJklqlSgskF*bcH~*Qq4xM>J7;C5u_^wC*fx- zxK^wMRY~y5sC*4%k_dV29CCdGsfHk=18mWP1NZ_6_zV%~5+d*lI(UZ&&VW=#kggEC zA7q;|AATnhgaJ8s4r_g6i?)*pRv%fVO@Y)$SgRq3AfzUOlhD`!pKbwRgO;6uBQCgY zH@sqkGvL(|q)LK!m*5>GI0If2!5NUs2$_V`M93thCc-3sym0AEu**@186axI%vlYsJjH2*2mMd17#<2F(U(mvTDjq zvE%=B@BQb$|DXBtfA+Kg`L6tzx&}gA3;#3Z++%Qk$l!b*bkUyuWzcoxHW#3z(;0^7 zrwsG|3vB_NbfLx08UTl)>XFkopKhLh2f1HY5NbA`lXy22vkEW|6o+OCXr|!ShFuP=%}{ zf#@YjGBGe{u?ssfNNy6;KEk2)i&OnKtMY$V<^OETf7w<3F=%{c*8js{0J;c?&+xyn z*?$=;5RwO9)hB2BU)~OMbDyH^e?>dcss>T#|NOT98QgC&xLjk1y1NAR*`)_vdKf{cd3<53; z3?{5R>Y&~$17tW3GVY2Tn~*6;WD#UGr2h*Vxdffsz{$ z4xQ>>xig^oXh8E(PzBV!Bd}vvV8^atlu07cNj^cXyTCOOs6K+%K5z!)`~=88RyYaS zLj)lqi?_k#Hu%*KFb1eM1gUo*l@WvuC*c(koZ(iv4paewNtg095ad?A1~kWAx(dVq zZ*GOuL2jihAPW^-VMi-KdP9!I%ixufec=-LJQ1YcaRi^G-~_%G55k7rNd#dd&l5pv zA50Q*9|UN62)u^~vN!=wLS}^^Gen?jC~b;m>SU`l#Bdy>_JNaTus#to!!%(cqz*zO z;Z+j68iF(Y+jqmOBsc?7H$g~veFR~^DhcN3Sz=`i@sllhI^rgQ;MQ>tei=(gr2Xch7+%X6{naM6TdQOAOh4~;sfu7 zfQ(ro=O#+n;BJ>nWbT@W%2&XrD1hrD&hw!9Na-GEu`xr-H-@M)po_>I?l9P22Hiz& zbAiG7JcHFa5ZmbtQ|xnw`TzMgfXH3Cx{um3Kvq>pw&B&ss}EJ95RsECCF?M zq@4^|*$XilStWk90{GsX84P0U8Du_isQqG7`Ol&DpGWgQpZ0%d{XeXR|G16+^PB${ zG6l^pNt%NO>*TEe%US=IwfQdxxklCo#Fer9&!GE@LE|ff-3126%M6j{8FC&mG<|^7 zN9-%0^%2jp|H5ZM^^p{8NdsiPH{_Ccczq;#6?DL<@P+?|bKf&a=Yr0);LrzM*TpUg zx^!kN{|mrJ>P~^GB(bZY!@wEVfa{}G|5q}l=R5je@C4{a z4Bq|!+4lYCIRZKzLg*Z*KH@(5U+2ny>BIk7tnwHbO*sX$!JCq}z-a@L0FdJmSE~cE zhz>HEhqN4#g9lV42}?3?3o~=g*tyH$ssbo2%K7v<9X!Q}iGJ-5x zfY(Q$0Uz*n;mDN{5#hs-YtSuL)KYyv7n6E))})w(?hU*tdPnGUK2qpqtt2e z`p7bMGQkSSG+`n#LaT`&TzGvH(6$R1p$+Q=wxf5TAZz=PNl2}PfA|hkJt5aekXi|| zVuJLHkjaRNha)B)4x4y5e9{p}j|sZk0X(S$u9A)+lTcak-!8W}QY1QCI^Z{bQIwG~7bA_x(IlvEHlM4UX*jDg3E zL0~SA%0_m@hl1L#d9?rY>ilQZ`Oj_mU%>Fcu*rWx)Bi$dpe~Y#`F}}M5Rx(fFK_i< z$@;&d)qfdlka`BWBMeF>7@W2-xb0$yJjRfFi=pWQ$Ao_j^Zs+L_|LKh)IZ`s`Cse; zv_6u&4O(RfxwC`!>VIDF!Uo~1pxsHr;2wwI#s3CNelaMNF)->faF{T#DS)PjST#Un zaeO8WJVp#0dZ2C*i!uX~BDnUKVqnn+)kj>m44gI$tm+I5@(c{ZTpVf)nv+>w*E4us zV2HfWQ1zX+_dm~~{}Su|3vBx@wg+@6FxLUlay!0LpgAMnbN_|Tf{^%mP_4nR8oXY8 z6X=`=-h=;HcY@aJu^;-+bs999$A1}AH}PHh&vgM*sj(gT&wlbh2ly}|$PsyMVIFCW+tprZ}=R5JA55YX$~i28Iv@h8_m)8SKKRxy8>5DLfQbe8eC)iGd*-yrD@4d=e4(a%2W|QS1DW zs&%2=`~6yW1h?)FZr$(Gw#NswKn`4jHSYo~S^#nP1hj#UQ-EAQ1i5<%xf((~=@ecQ zAy-3iHmJ!8nixXd2LY>(HhENoY9&aA2vQ|MCWhQ9*SS=I5OPiAQohEuVlBKTg49PK zvTUVe=?cgU5qzxzoPpUVf)CB%nk0gBlpy1B$Rt{2WRpG58g?O(HMkCflUC`oAaxL= zR>D#fK~{06P6pLPDU&QxCRrs-u}T6>8-e;kNfXRq$TYDZ6PYCRfe-rI4krVecffnH zAO>oE6wtN{txAITn;;D2nh3&%*GI?<m@PZ|BkQzo7@FeMp9Qnm5~s*GU5YQKs?|T-JDlIEo2_Zme^~c`iSw|f4STL z8PALL1c*` zkxkPA4{bFuuufuNJ<1?_nL+v&o9cfCwf`(yp!!JM;=hQ+e^JZ-q86YkNz4eeK3v-D zzqHwZd5iy2#{caVvxCPKL;1%msHD8s8(=N4$GMi=QF&5$HxC zSbf9~u8erW2Qk6xBc3z=1uymxoB22Op@QaUzO zaD5~R>Qlh$BVGpvZe0cjMbJ4I3=#|+MGSf~80?RNrU-pcF+^QwsCo|CWwz))|JMJU z`$3595NORd_nH5E=Rvz-1<(8!JP8_AV^{@h$*}DJo%6u8546XYaS!MqM3$qV$_r8- z@m>bqqr`m@bPy2BvHz^cL0A5<9R=Nu&U6s80SZ(ff!9&6ffx9Jj(Y_U;(-o#JqlU| z&v^Vl>*@bNyZ$q{_k!vpWuT z*E&^zDj;~31nLUGxoB^+lAPh*)2)RCjR6TGKGD8F* z;WZJw8iFt&JtIhE1gVeU)ewYXl|Bo(KC*-z@&j2c2O*)$%vkJQ_XmD5Rc>n0z#R(h*3l1R*0P9fQJOMg)sC%z}$1d-tEnbbA zy=ynPRjhU`S!$az7qSvhBcw^erwXzXPt+og&oEqA*PmO}olDLZbm%Qo?vi1#Nd8{L2jC0=0mY#|8?&CH+uG8@hV8Kz;)0}0r!>v zJYd8Hnd|^pMtooepbJK?f+niCZ~srZ`k%qN8N8-W1l%)X1XoGi;88pH5DG77JBffh zgK`vuQ3iud5Ce-g=*9%lDh0S2WWO_l=bJEnjVX>Pf>js1rv@1qXA5wm5T|9KAm7drW0^bF{H2BFjcd5`>O+5I0hf3)d8+n)dYM?ojx zG4BDL>BoHVKjX3g>}UQ9T>{N^iJk`Cw9bDHv}TX>5a=Ey?&F{(<&1|w_oqW{Vq)0~ zI^>*j7wDi`j$@#EHzZDh>NuXQ|HTi0PJb0V{9p0Re}<{w8C0?u80?rpH^7Qv#vcPi z2$y7yfb35(WzZo%Tx$P0)&Fy8{O3{q&#U&IR|SN4)c^CS|L50v&ZqrM$@PF$?2Ul> zqtRU#f;&MMKm@e!3~1j8IYbUZLTVyNs}fSy!Fxc+4Dh`V&>M*0m5yKi7G&gGw;2^d zZfo5LK8SlW_+Gq?9@U%Nt2TnsdU&4*vW*BbNd&2V+%VQEz_+qu?GQo6=peNZ8VRY4 z>~XABz`CauK3imyJrA-<0WvoPxe*VvB@@;yg4IE@Y(PCD5NVw@6OL?BX26ko%5(@a zOP*$yJjE<|ib>)mlSEJ*WR^6+B5{IwLceh$2pJ{x871_Y#Y2&4TrV<$#1Vvq)I<;x zYkdT%p5WCGoPkyoLAZG8BRs=*$Qzf!`VNJ{uC##HO=u$U`UvZtfRKs_QY9hRN1(yI zNk@?TP4M~%>6{|aeN2&)j)e6e4C&q*)VVvPZKr?Z7Qfnc?q$neOP1T^F0f3UY!uf6 zS&66NT`2FAp==v3V-_J};4h@+Dx_e|Eos6bq`}Co2s-Kkx_1e_aS2OL2{yb|9{o1{|z64wjF_PfV~0Qa0Hnxf{ZD_@0H-W3X;%4qDa)DoI3Pl?dcq7x)}Bo1m(VflIW4PqRiyw_3t9<=DyE9{H?Fo-8cl;5ju& zO#~+)RSblS=52@+njls#L?wiT^u-{`Afttl)9oa{LtbVK3>FLw$qWp|4BXooB#$tN zK4Xx5$E5L}#|Tsz30i>aBVPUg+?xMcl>Rfyer6DS&LFmifo}_gI$MeVDrkldxjy2)^xt&je+G?O za5qPpfklCVLxq9e7&LarWx&91%D`a&>L0PFGcYJIuqcCW5azOE;IRhPN8C;fd{zw1 z`V5THpnemB9s{E>gLDsr-FgO_6QKIYU=M@QJ_hd-3>i-uy8kmR`_HuFKkM%QJcs{_ zoc=Fy5wssx@C4`_c!tfOvng11{TDg`s*hOq{%1Z2I_QJpDCiO;0r2pn+MWN>7eVVE z1kQt&c7v|=1J_5)M?m!v3wXSac@Jo_67wF=DX+Yz|Fa(j??0OHjiLE1L(ezR2JKb< zdA9%O-SOYL>HvdW3V0D6wp~{&=@RN)B8q>-RY3I-o6>(a6%gW70#`}k`iN8IKZo*v z5yLmi4!^DAZhGgv^s7DWUw6c}d1pY|9>3P@Ak?x0QW^QR?C@{e3BHmQIs^n>oB+Cb z2woW>_lDp#5rlzU8Tr+3fmcT03TPwjlGY92BTqMaRBmvu+UQof!41rSS4NPP+wR~c zbMQeq(3B8(dI-e8xQz&YzyiEO1X;O_Rue%MFhG_r*yb;S_l6)0kfjcA z5_#;7___&FB_Y>G;r)jpg2>lfK~LytDO19sWXh@d$qz^GyHGFxL zoY-V+m_!XhM?J91f{%LO1s`RATz*1IQB0DXMcR&g>U*7Spq0IhyFru7B3J&iU;EE^ z;XmKS|EwoKXH7H2JY?{F!r*=nG;?Hq5i~|;eVV~!H-q&d4x0lEe3Ka5mpbkHumAYJ z?n7{+_bRAH5b;5E>Bs$5t9GoAf!eD}Zj!T*ff37`^^L5Kl# z@F`?j9poYa$V7l5ld3nj`$UqVqvq9(@Xo($Yq1|=RJ?4Tp{|nss&vFhl@n^XCKZABFc<@u3 zfk}mdQ=5Uqj)B{ef!~fnz@CB6j)C8vf!~RN&yhjE8HD&;7A>$Q8(;tS};J(-f(8Lx) z^DPFwZ1A~us^IRSCHNG?90sOF2EGTvs^7U4{_`sRXI1~tq6VsrKvk0Je|GSFMqJAO zIdxwOn0!zTI%%19*R%Q%c$3jCzXs5hkZ<$$fEEz)Yu)bKvJFDQ%QtXM1UhOP&VY=? z!AVFRgiIpWLB4gHVbu_*Lj>7O1g?qJ!w>d>55XZbAoY=3`5Jfdej<34801q8bfVgZB!sd^yy zhJ)Flt6CxJw_)U5jLs40J_zJ_BKV>O%ap0s$y2S8rdTI~7?3^@gfvNgojH19yGk04x7tpuA_f_IT1H4%h_ z^p7B9aOd8j4(KMPz|Oq^NNflfxsrm+B|%8gToTgx;b{FPcr^rP5YthDS5ol02AKh= ziQsh;vQ)%`BM|e$`VIwk?elNn?boskv<|0!3urA()ml(5JAbZC#x$FhKI7}4 z&mwiFba|_IdDBn{JwHJe2TmCac418hE(HcQQPBPx&`G3_q7q&{W0u2W0qI?uoByk> z2MyLSZ2!-C6m$a-+tvRJ=l*k@{m*dtKSSGphJ>dKeovwGk<|rIePnTh!SVou-BAYp z%?y%rMJBupx$xiM$$zz5pc~0WVf7KDN`llx5E4=!K~^$wUir^*{=eCS{|uD}K}Q3D z_P`2)R$_y0HDv%-sj#zdq#bnw$|OSONrf%ojNKuSvY)y5wm`!R){b`~)Bk$Rf2CY? zRMk8jbU_lxi*Wx!_T7VosDUB%J7iZWq*j7VTCszt@K{0DNHL0nr(MiIrMh4)43(c5=75fK=Qs*lnj(DZzrcCW$*T<8 zK~qR9d;hcS_z#*wItZ$gm=A(FO8jR)lZL8S|BD?1%{TEK19gJ9kNg)s^`HL;XgH30 zFKDk5^MU_t2fzme&Hm5O{Ei|2Aw%YEhU5pJvlAoh~B6Qlxy)Jh0)GIYubw6J^vXowHeR|@Jq=HGSLuVbHA%PxWNRU&RrNP5)kQ27y%dq(vL=72S`$VD^Gx+ad2;9Yx zbe1{qCPUo|rtYr{)BZ3m{?D@NKl|GMvh)8lgl=VE(+ADwfTpuqVJi(h(pep1~lR#vqi&AehP^ z6c0ke;S54y41!S%LeUJO2@K*%43fDFnzan>i&^|uF@zsw&AtimAF-|Y&$#tJq<_SJ z7SunIy!~JLE~q{dzwux68fc_W5ZpO})JFn0pr`I=Ec?%(KZAkYg+aiVLEMf(z?^~8 zmVv{Dfy0u4&76VRh=J9Ffz^zG*#yL9&}U#)W?)hRcPLcAq#9^JJEIP0MFg`x=(r3< zBL>+bal6S3CMy}V7cgk7VbEH~puUAcV=IIHAqLwE3}N?0%R#k+^e)iAo!Hs`VrM~< zO2P+0=W;Wy`OmcWKi{tZLi<3=1^M=Y2J{3^{1>_OpYu3qLjm`3&`cBaPS9dJz61aH z5BwK8{9pRWe}SFRsZv-fp#%NZgk~P293_K zss3kD{Lg7{SK8^lLHud^yjwo?2Ynk31vYIDXxtXsv^BJ8t55T0pXSYeOmW#<2z2r`_+THQxa_}W{kSYl> zTLc=K1Fz+V_lzLjA_xiZ86np}_JvCz6%d4k56!{rBisB%HsEu9Z1SM@LBJ~`yW9o# zIrAOBB)Ct6xEc>}{3+_~c#yqCkhODG=`$b~UMCP{jlrp@mdQ5HhfRH-v#)D+P5xZV~~F?;+Pm zSgRy>^#mEjgOJE|6NC+~kKhbQeFPz~)<=-aC>+~iL=Y)RWrU@31U~-(vI+t&8#U=z z*u+zz6HbQq9}Vd{6415JzjJR;>n_lJTQ%$5E7y3IEOpAAXPY+FG_Kb;yiv=qLfs`> z(JocqJWNc_S6IV|N70&7QkOwM1+@1BbgVIawF6}M3sNF;%2n=5+5cbW{C~BZ{~0&^ zXJ~%RF#R{nuK&!Z|1%u_&vE!a!><3J+t6~}feuaey362nnZf2PgY8++GH?Cu3|gBQ zVy?L@`fq;gzuU|I@^?W8GC=Ahj_WA(5fAK?2F|OXDhYJ3>gE4@*Zxa9_|FhFoq@rO zfmN1)1#+S?2Lq@!fy{z2Fz{*VxhIRqF5?fK#}KfSA@&$^-c5n(XH4B+`DgrRTKu1D zHK;z~Soh!Y@PG52|D6kFf(NM>UiVf5WV&-Dt6d8R3C*M0o6x!A3%3`%=^!_0@QEi*aIrGA>(xtw?LJV z)Sdt0H$X_}256*C_~w5Qx((_gv7i6Xc=EsGbXa}F|&ZL%j)9CGj5vEo_iH{$Ktq_>_hT9~hSYXIu;FF)@MnF>xLIFLnjA zUy1W5XiE{>9?*Of$1c!TBax&3Id*~@NFARTvhOfNon{C=4XTfdUkUd7=U?()e#?J} zz5j)G{1@K=s*ia0|Cc)RU+U0*hS^^k0{CCK|?N<5_xjqVM+ybwUpmh*Ztpw>GL24p+?SssK zR6yQ!n;~@&GD)x+g7l0al@VyyDtId^WcLtcnH;1(g49G164W6oS?N%`0>nTXqjM-( zZjW?ZE2IK~kXWlBNR6(E(7Nzw%5nh0JE!5NVH$RG|hRfN_vf^?A}G3DC;>e#}2Nr;Y8^ES-- z2sAbetB*iyDPd!G(8ZQWTuAkV+<^ksSsl>fJLKvKUK2q&N)Qq<%>=1~FiCi21Ytlb zBM6Bpf>udE>L5%q5_}Rkylw(9km@7IsYKwVchLGMr2j-n|B0Z9#{wrD4emMW*M7ji zeYaN==n_De@-=pa%PeyiTBOf1h@Yev-liT!Sw=z-AM-1y$pss87ww2#9n7u^Iz)3fAd%Wjh_A&ybU_1 zj`jL~wj0p-Bc5xZu_MSx9pvOX#%uo>!0W-dF8p_V^q*<*a|RY81_mWYHeLoM_>Iz# zH4MTG+^P%~brMkv7(#Y11Z-i5J;#u97jzMA*LQ|#e?WIwuL7NB#<1-_M1{OO84sFmm44`WbHNo4vRKVe*!2qg;v_K?iZ>$yrvo>e}9` zINxE&{vbQ)zx1~Mayvmw;1o{&S6=a-A@e*#$uow!4-EC+m|FiZO#Uyt>_6mg3*KG- zMGyTK-SwYmWt@$HE#$AT!3k)$A7~(H8d)ZX~v#Eg)Xc3(n zXz2pC3aE?3sr;Wq^&f}oKVh32D!wnxviI8-9rJG3>D#nBxM_1p^Oo?Yt>H~uLmPI5 zHtYg*g<#bXq*j8JY@qeqKK0wc2)dOOQXgThi6DIRohQk?<`UuiFg3lI#Y9jD;Mexc9#7G3yM9B3Kp2`Sb6B)$y7{sBB z(HTZ{!VxUipeyZ>t0WK`9L11s5enH1TEp+#ydA!d1(N}(jF9_EkctWG2@mk@5~L1- zkdRslQWHT$AZ$1Zsdtb`|#31paUT#n0aMDQ!|3rITLn?9{8_u8FU1V=+*xcC;y8c`_HfvG>y)% z=|AJP|DeM#=ly4&^`D{dKSSv&`Pcc{>V(`4kQ}WJu_J6)T|23}tw|MmUk09f6@NNJ@^FA_F~!GAMb%mId+yPTD;7`nc*O$D7b%Ch`F+lK$V+d)$jd?)`4p95`v z@xJ)q`rv=_#BOkr%LrZr&jwyV4z6uL1iV&)GayIAKn?)s1g$e?;s@2;9O4XIl8n4E ztbz(W66*Xidcq1u!fNIc+BQGaFJ{nM#GtkoR3$;`BaK}Qn!6cvk1-gWWNITT!n zgU-_sHTtg;`q3isuS4lc=ZZ7l4LiLXb_O?X4sO~EsgFYHcZSsO^ljXNh9H#=xEcag zM&Kh9K%F3XH3VV6J4BG}tB?u^PQoi7_|P1@CW2H(kg+(o@-@!LTZdqEkW=vrNHqi} z9l@O=cr|2SybOYnYa&SP1DP8_W<&Z!$YXSnn#ekL0lYqfteu0LJ_o6cAWP;T{Udm- zWR*SxvjRe{i7eBmTOb|84Vf?k9k!4HUBV4n#SN*9AZzE06Z;|kAj9}xIKwc$*C4J3 zQW-(mkopK-0ULUn=wN^szB|+Fh zUHc#m2#K{;!aB7Csk0ze5`=_QN$~mzlYv|xK^8E8MUF*IJPtviW9}v#10UvaJbL2M zs0l}+CLV^M$O)jO>7X?jy@!K)4~K$LP}c#!HqdoT?zJ0T%GcNyEVav?V-!C@H>_Db zpjO?hT-G61#yVBVFq%)tk3-FaNzxRA#GOE!@xzw8Zu>8D_P^$>|8fuii{AJzedoXC ztN+S}|1-3H;F|hhZsmXGdH)&O-ZC`3XXyRK&<$GHz*P5}q2en;)d!BrprIAU?f+FT z{kMDlU;Y7TE`$Brf3~Zz-A3T4B6xiSVqEiF_LE^MxW-%oasb0F(4KwX zQ{XEP&Vp_(5IOr_^3s3pE&oA_?ih@~b4if9&IQ36Y9JT(A)j##7GV-&V3T6tkY(Uj zVBk|_5Y=IlF=kURXI8f1&~W6|b`v!85j2SqwTO{%$W(SOQ1mWU3#d{KuGfrgRg3P_ zPUtg9n{Jge*FI;yS@JYB&w4K1WCk&R23|MNiZzh{28mP#@e~G;GzP(B(6o_2B(y#f zie?awVGvDWkk4aqU(6P;h9PVZWA0^!y60>?Ul|tuXI=$bgwJ~LKil#DyyrmCBzgn1 z1xf4%h%Is*)DPvp4w_dIx%OZB#((iE|78w?hPedOuP}%lWa>AqWFr4MA!m$mUk$T}AMk2(21|)IpHhA_xiT8Cj%G1D(_j z?h`>OBgoo0vlP%&kx|kFBiPU!WQ!|gMK^p`ky(77aeOZtBBnlq$0J(B1nL|$ZiDoK z(JCVl7ko`TyjFrR{91Nk_L$HH@gS8^VEb-=FhU-(gRlcJZb*lVKq@0-5>n~FNl3i| z;UdpHLBx^QKVViy$Rgls=m>-i?>`Db5fhF<7-18Rg-tjHs*ENaiJkQhb`O``PFRO!GZv;6#j1AmB5*O8Q5(}QL5WA2e zcneeHE{4n-tQF50TE4SP0QHPmmxI<*vF`ZKe(*o&uF`X$HCFZ}J5OnNIKpyJie5TA(Tk zQXg@efNCYs_99*I5S$ixW2^>f-z$SAcz{k5EUpbY;EvmjLBxSUzy`D(i9rL@!vblv zVPJP+5Upo0n8l#I4pblMY+}&a1X|djv6Vr4D}(+{P<>>2n89QhgVg~B&%F$BmpF=U zF@)`7uv!VKkGu{s_#S2OKLYCh_#9*ap(CIg%6T1w^Ew9iouEEc+-=^9uYz;`Yi|87 zvH!o&&i`yXK*MxA2mcEm`!BTnKjW(ZJX2pY6rEr&-_M}AkwNwqgZvvveZ(gJAJhYa zE$3!e{m-ces**q(U{ygAMr?}zMU6rCPZ=gWvCMemT6x^P`lNsTPUQN?uVFI;1=eo~ z1YZaaseq7Yf^OJbTeFq__4ohR*zuoz%YTlopba1#t3Zd6 zF|Ys6v=y`v!r|3_!`uHkul?r%@1o_t4%$V_2VS(mbq#dl5D#n@66E|ko?D=MV1usx zXUN|NsuVyQjv(u(A){yDj*@JWl+Q$l@cj(IyBQ*nFr-~%sCdcH{)1uie}=iB5kVG!|Tkn&_u4&u}f6)=hsF^iY9PLZ-tm3PTf^3GQeDA5eA z(Tix*k7+eY>@rE2V4gnNJZF}5!93fNCH7^@oT}Ej*Kc-j-0sn|19C>0SNTSx_?c36 z)eH);pe;NcE}$xj!;yh2m_a;&K{As;G!-;qBoxaa5D8vl7senQ4qELXp3UH}fZJ~! zL&SEjs+*$Iei?24Z@v#SEn;=(zv0gR>f8RSZ2qsl+Jupap=F=f&UVR zK-Z+Ip8l_Q<-f({|E3rIYp(szQ2kas;Ut4%7ie7^n;8SEF6cl8W-ZXY-k@0|ZE(Ly z8&vy%PWd(f4bX9zfO<@9=AcD)EG7(WW(>^647|1sQoall-k^owj3yxUpeZM9&>{y` zS3c!r2Aw$!`tuodH-M1NItCEh#Gto@!C)t7d4tg|2BTdJR$Cd|4l+a?VF=mB;Jk^! zWix~8UIx#D3|>bVypMvaArN~ngU4o{YMh-F@6=zgcN@xSoV|2+Gkw;hS@ z2kld{KKx%{#dnL|Gg|fgyeiMwWbc!-e9oqJg<0$ytHcjB$^U$cpsI&c4b&~-0F&(C z?M0j#AaOS3{{n`e#4UbmMIN`xyzW+W(4+3Kf8$=?`rZBw+rgb3@R=`POgp z1|!ef4G`o}vmP=K2U(K5bzzW zh^vT-S2z|ecPLy2sfHl+kt3{UWM90@u4pL)L24z)xE#D1f=mxt=PrQshLB0j%E&%v zzFqb_NS_FP;1B3PRFq>DKr6Q)br6JvS30;D=CIBYXqXPXoEvoH0<2pEADx5LM;7sY z7T_ZnAeE76TrZ>!LM9lS+`j5}pw}TwNzfrGrccckhQ(QV>CSeFR}()=Cf& zNUa1XA$_ID{=<>|hr|1iVpc{G6OKXZAY>A`J_?_BJZ!=-NKF*dcPI>0uYqRGkky=fF+kSWabJh_kI?EP*316|@BcU0@}Ge_3cR{o2sEXG z!0cR-`jXxgbmNzRn$uB-7}73+ra~b10nG*7Jj=KdbmTSDzW*FY{_~sxZNHVg4yuws z*Lhz8ZT1&A^`GT9==5pLL;v-T|F>NHPIK}-{l&ixmi)0_@ZNIPOYfdjnprCp{F(*r zOE|QnICY|hOycAmvUL3`%pw|1BI`|~8Y~i9ZPU8!Gbg#^O?N4rVlB zm7aBLe3~~0v~P#BXZ$<&2Y`C12mD%gIpwcZ3-0AL%wrG_0dLcYI1|}m0 zE-TP%5t9}Jv_8@T4bX9#f@&X569z8u5FD>1=u8G)OVDx!Hgg6}O9pOh26l5$eI)M1 zAnpa)Hp}Y(8t7v;0{4})85nFBI6WDZCNUT;!+aRNDzV(~X z5N3S@IcFPI>1^_<-sD-e9>xV7ssLeQt%e|TLs)8}a?k~NkU9uLLTVxi>4ba%1h_*4 zuZG|Z$VeTeCbBD9Y74#?0&8UinH$1i6G7@ANIwX|2314g)445CDx+DDJ`rRr4y_Y} zSs$6FOobpwO$6#3fe-%yZFGfoiy$*ZMhVbyIrF$)bEK1hOkzPDBE#5j!`N>9=uRjj zwp%y46N1ocB7?{d{fKt`h;~qy7IaE2X#baY-DdP!sbRBE!)8P^L_vLoRx3dU?jSuT z2pdu>1$6F4M;(auc!8b!kx@|B0Z69_LPF{w2pQbDH>7JH1ci3*N1)z=$aN5ejXabG z={JRgE2Buz`42}R!*mcbtnWxzA4mi;Mi({lFoY2a?i@u-1l3>gDhyJuLF%Kh2}eTu z4h44~4D8$=)PBIPd5=&1Hn++RHl@q0ikCVSEwIj>DHAtCCT<2p!v(eF-$CmZR)G2% z3~T;#Z22#||G&wF|9YqXD=hiXkbQ;0mx78~^36|L48&pYP^>Xnk}Yv@4VMI<#}d4L+g))K>zJ*YTeR z9a1gi+z4u8gQ_7G*e)UPh9e#iS>I0P$i0l=yBVU-Fl63hD1FS({1H@F&i>D^0<=_^ zVJGOAO_t;Txz7FvT@-i&WT5C((2$+LSdHFy4>Hkc}{tF)c zFLvs`=9&L0$N$T0{m;JOKhLcHER%i-Rh?moUC-ds!(iXe=iRRmyU-$cg-hiY@5*(q z#VZ_&7JF2!_O4y$*RavAb$eh3XrDf43wGCm(B8x0y@wTV0UF;a$;a}V-SgDkWOI`OJfjB1YMTLAH^UT0lI5jIE6tr zkHLHzujdK|pH&PV%NR^%F{pMk=uKg;UCa=%i=p5WL)S}|8Q)nJ|KwlypKA*Uac={~ z0^`bG3=2Oo%>BeL@fkzWb%v<@4AvVM>^Ac_t!0pCW?-^rV6g?&N(|bddW6diGzY|I z1*%^lhcoawGVnTpxV&}@ymky+*3d-@oK_56_Mnp)xSbfdoj`0Zd(hF}oHn41w49dU zMR!IFBH03VQyEOxF&M05Fj&iAv>t>^HZquRXRz48V6~gUW-o*DZU(PI41wDjLU)5| zCBGdEKKnq({|H0qDF)x&44&H=LUu6(?O<@3&0sr)!KRPFs)r$CxAms~3WxtI9{F#1 z{J-M*|F%=_Dpw!&tUlmfdnmN^U`WdW)5P1Nj=LG8o-;^2XOjo*X64lc%@DDvfG3aC z{m$ffMUW~9P9iUUfOMLIy7qzAA;8Xp zfb^2!^%0~R3hzG(sfplZME}8v{)3Sd4#COLz5`+X2O;VqT`EX@1i!fnbbSVRokM8X z;gHTl!EJkkTKBltZE&hy>s7VFsc3;(#v-elZIZKIDKGiLumrq(ZsmW5wg1^S{}(^} z-|p6bpBMjwum9(p^qa%?0fY592IE5vx_cP(S2D=WV3423ptyiRaV|sVQ|5L5MfU$U zxbxrS0jNGwx&^9?cyEB}Bc2niEqux}( z%ibWz9WXL5u&@fK2sqT}$IoU6-3h9avhVO!ykuy6&)oNiVa9)!#h}WFWizNU;yC=D z^UQz#OaDc#f{^G{P%laF+<(5)prf8yPlGOr6FB#u>&kz&3;$V9{%6<^I(3J6`+wnW z|GC%w=Unt(WbJ?bz5fmN{rB7U-*Vw+)3$R$$*Z|NdicEijnWr+)$a;#-4)!pEud+0 zX#0+^j-BD1yFz>Rh4&qZ=sOhAcL)*-QJ`tQW6={%Mo%~y)PBIeWUYG01XiOw2C+Z} zPVmeTyECXVVs~X=abaL~X5a_}O&&>SGe~4W>m#8^2H_|M;S|s|C8LQPE{hrL<}f%f zWC+_MQgBUe(htj}|BZKo7O@*0`)_sbztv3;GQ15MBQ<^S-}C|KHaeqQ|4pugko(2| zax4A|6g_7MKEa^e2fjDK5>y{C8ZvNLfTn@?tr_^O!CfH-&=4Htb_ot^1`cZmb_)<< zGY9RKWi^2N+)(osRAjD=3W?L|D*)oW_GD>(b@H;aw=rb@zaGLiq*lcAmTFqd* zfx&zWgT+<`%WVu+yBMr@GdLY)a6Ja<0r?$c@ZSomlKgiv`0Zfu-4CjdLQXP-oo4Xa z#o)CAR38Oy2h~Sb6B*1q84Q~l{1$32`l@mIKktVBOcNe6G+t4vI;32E*t_;nK-2M% zmIE#&8x(wwahR+F)ko4V*cJW@Y5(Wb`p=^Q>KU;p{by1Eoe0FD{GUw~R2lK<|5tST ztRM2p3${Uv6JXasz`I3|+6S#Zg51yw zsgEGng*z85Me7Pdri37)bC5km$QxSWwGX6Lg3J@4)I{K}kTtgY$UJSPB^W`vKag4p z!nR1AW}Y$?LPF{tcx43Pg64_99U?Pu9b}p?(IkEXgoGdXV~Dg|!6*(?4M8d+NPT1+ z1L_$;DkFVZpGZHtQ!lCmf{^PVINKn+4Xr*xo(=S@+2~!r3DQ6EY6NwZd>UXCQv>Lx zc&~;nkSYm6`ZR5W_oN`BcJPBcFxNR?)<=**Ja~PCsFG0XBS>EfR4YO1Cio~HXq9#6 zUU;`CxN|S0PZZL%55jU6l7&Wc<;do(3y3IgS!s~cOQf-nS+zz zeFs3LU;jY}BeZ89q&$RISfRbpv-KeL5$vcvQ1>dd>u`9_(Qt4B-mhhccjH#~`pq^a zE5u878}(mfTK1o5*?&-dv;@3MaTRF4ywaur&QCx{a_N7D#48N$cR^Q6=AC-y_vyj z7K__dv54i46}y7F&V=Qtz7ne&1CK9gVS`*YsFx&^1RA&#j$;r^Vi3<@FzVy= zU(FDEKp_7TU&niudH*F={a4!ZpLf@Po}JLs41_L#X39jbf(~U8zW!h2#(&XU|Hbe8 zm%Q^|=Kg=RlmD5P{^xD_&QS1_Blrx1^%e&CdKC2H%|w{=1o?wlM^)V{o3uU^#)osFOjrg+XVHT+$<@~8qX{~)n?ns$4S|N~l>hT; z{FijOt`+*ix?rbM`2o+`b$(5o{pxlgP~&>wN2>m63Pt2Ctg+$OuvgK}ff% zwQvNfeK1K}6%eE{g7$;JN1j5~Cm@sXaXARX5!MNU)JG7~xoD{q_{?nx3E?{AFM`hy zq18mlYqpWAA>=v;QWGK15P`Zy;Bh&t^jVOjek{{w!n#E>k$Xds3J6{YK`J14?E_(e zj^l>S6TxdD(}anTwF;1VB1mrtGD&0{kC-GfiRm+m=`)V*g&@PoZV1wk>eh?wf+ARD zq#Fqunu83`=|{Bdg}1>GWY!NSS-Sz$5pLMzUI$`8*q#lWys*_rpjrtwpa-vypj8sl z`<49KcOq9wAU1d<9;EjK=?y_@A_xhok09f9@X83%8-i3oSnDH5O#~+))e{;S-g_{# zdq1Qyf{>w*-VK<9R6`LM^$|oRgoGFfxyT8^23Jx?!h4QJ^d5uMM2~gDQHzN!(vdCBzEk-)vNzOpZ}{J|Id(ojlpI;gVq5CjeQJ8`&e8r zf)-FIFXPo(#jdi1A>^pX%Kzf0{_8*bulopEAIaSNFL(#kMMAER_-_2?y8#-q;|AZ+ z!+8-@8Hqgj&)#u^fx#Q;>{D>1#KtdSU1$|MnImEsN5n3Mq)W`jkC~g_@b!IXnDL)= z(SL^3|5-PI)-bRg1l@2gZ~;^qiGV93&|-$mpv$s3Pr~XW@P*x?m;MV}{Lgb1G|j|* z_&?{W{|p5W8JxB;IB#bNKgp1Fhau|{L*hw>?6G36|LN(E2Fk z07JwPwzypk{;L_BW-?e!WYB8`A@$`9TB{gT=Q9|sQ3ze>SavF`<49=x;lT3!_Ni-S z-8S)>En<|r%^-E1LF65zKH}HLcXZ2bm42eL!1T z!PkOA7?8>c)DMDmh)P#L>K&{kq-O-_{=kp-u`gT#semANvqDyDLr8di1n(0;Y9a^; z8I;3X2f?c$$eA(Nox3gB*$aRT(tR+IP% zCh-#>^%0~df{>Rk84q`=+1r?C0 z5K;$)^+A?`gU#$Y8rFLZ)Hwq8k0SbxK#)uKIs3MgjD1gq=6_>Y{GSDaD-2GayC{FglcU-0;U z2GAOBW$>bIa7_dvl=<{Rc|$e{Mr>mUIm(cFjiK~8L(>VN4g|D`U2j_%|=16sqteC$8(DNx-caQ?sWrT@I=|8s(`Cty4B zU*X_?hADp-tamVIuV66S#9*_7!E7~y!BPgJRSYI;7%bN^*luKS+sI(Mg28GEgV_W& z`$nbcxgky4lO`MupKvH*;$g@Ne9#mgXzl%x@cyHa+CQZ0fNS+e!^GJF4pj_l2@KMq z4E$aUJnm4$?a9FF&A{i!AQ;HN7sSBr4;r=Ok6=>BW{^r{5J>?c{&WWMd=~fRGV%Kv zVh(d;Ut*|x&eQjualwC{)&E(xf=Bc{w}Y#Db!+kEVnGsv|w@JBMR`ZI9Yf)JMx z1Gg~)mk|hY8ZvMiGH~fKaO!|=>tol2c99rO7(grQ?7*jF2QV-M@)^$wseBaH{NKCc zmxRw-4x78|4*xlw{&PG2=XL%s;QXK834{ck|MR%~=X3in!UjG?v|1g+7V6c3` z;C+KJ=sxHOps?eN(fgTV4zNXSX7F0f;5?PVx|czIH-p?R29f&=0=F4tcC)D*mbPAE z5U|rKc9&V?HWk+c;uf3vRIhR?U1AXZ!XW$!gd~5kD*or!{?D)fU%=qMuNRx>kX~Y7d9oE9V>T0?zRIJGPN5ZMUYoDhyiIWL)OT_ODf31Irs_NkUPOa+t0xV z%E3EC$h8k-(-34`95U%%w$i<9C1ia9goLczMn2;dzJnFcfUHbFKIRm%QVw1dL3&1* zRTAc%M34#yxiW%mBeKq#YneUI5`1B+b=F*qthr_xv&}MQn}V;#vr3-@seK^z5rl-) zMDTGrI0I4>L8>A6Dg_7wd3p%BTLh|%!1v`rdP5Kr`5aV8pU5z#*D$6Ra@r4k6}Leo zbX-m+szV3dEkf%P8Ah}lgtuWLc>H2As8t_Vavg-i#9y~K z)I<;xtv*8TB0(wz%>EHteFR_30GUMsS4q$@I!Jv4>21L)BZ#r#UHc*RQFzyW2#V}E z7}mKDQXfIIfQ>pB4C-XT`dM9vgS!qx=8ryL ze69Y^F!MXpivJ9Y{xB5pXK?Ig5RGHt3}WEQW)LZ5;BDh~KcKt(zxUDq7EAv#q}*aK zJ1m&=o@dj4r4#>c-~2ay0xAq;ZvPj$145#=|AP?t)Eeaai0#UM=8K@}gXheDtq1=Z z8qR{(46B1@jzCor=oD`af2*iQ!H8`_k=q#(FR|x8U}*fz+54Yi)_;y=pgAM9t)LsL zSq}W?JPEq_6f#=`sgI;CgKpb_TyM>C{6EKO(7>JOrT;<~L6sEGIZ%DXdKh$BgZhd8 z43mF@=G>L$Gw7^f&|3vM6jXfZ8a5>{%BX>YnrV|6p1CpLY$YKH}Q_ zAGtmfy#^YJl(-J6k|b|}x=VtG|1&K5#!&fzA^Qs!XApE@ z5Dj6Hk7HJ@Gz(kfSA54I^_))RZ6*JUg09bm+@ABe{^$4jFW~lH$m745CjvogA`zee zB0m2GeE)O0{bzRn#OV5g!Q(1}-))A-Qw&k3nPU$!gzshuSjpfzkHKjQgUd7qwL=Ul z2N=X2G6>#b5ZlVYyM}?MltH|KLAilZrIA5)27}CW2LAmFB1af_-!SmMV-WhtApVU> z?k}Gfs4@~T_%Estu9CF>3+nt=^trDW`NpAQyHnLp$Q7&5zE3q`2^?f)f@jr6uj);h z#T0ZVsA@f=G6GE-RjhTdSPNMr2k!xa7!|9Lt073$<5ITLt#pMO(v%Qr&2}mDfK#;E z2T}(ilaM+HPTC`l(Lw4XNVf=52SI8a$P5vDN(ep*2WQyk%(sS241wz)3nVtAJ~GRg z4XJ$~RT8WYnqiR&MUdJDGCBugnkJN?>sLfE+KBgira zI2qEl56%t-Z?=V>`2eXeV7-_lknO8MT?fF+_CO2yAiG&1l?$XA!cRg}f`;I__Cc0a zppl{72Sd6Jgmxbc?%WSyK&(^mxNbhWE+MuHw7*rS}GW41*F|7fe4kxQ&mdjS;JQLAWuAW_#KAG`wUSJ7-AnX#64n2c*GF*fHCSUL+~*Mr$r1}T?}GL z3>+a0tey<)-VAKujvH$-1A8h1V+8|8Jp=c22I2V(!lxJ{&NE8g<57Jntp8WXeB)BGmS3r>MtIptR$hBlSq&|X>@FfcH(K*P79HbuvVMA&k_!u30 zjhsXNB6uAHStbXmj3Ctz%DoS{3m|}( z%(O|L0YTPj(=pd7K&l=Hi98}_kvs)7ACx!=UL_$j;FS?*=>oWC1RtG4u8fRhx=q0d z(kFtDhEeTsq!-nu7uBW{)utWU3PCy%Ejke`df^~s5Z+=C-l8AYjEwX`n;;0Y4nh|3 z0Iyns)6%)ADI1mV4OdbGU z!UrcIV}lTLh#}#%B-|ooMsVkT2ny=h2SI_I`+Zt=Y4_YVo%WPr&QJag|C!sLGnh_g z;D}^k@MU1}1C8FWTQjg(fsT}6*JToS0!?0f&h(!Do?+>KrZxZh*ZpT+^q+abf3eN~ zmCpQke*0hh`G47)(E5n~I_Q{g)+_&+FM)Q&a)A$06SxkpjKDi&*)IKOx%!{?-hYAm z!wd{o-~;7gw;D;<=m(VvL~Y{?UeAzlku&c;L+ux?ZqSXi91H(5t^d!t?LWiL|E&8# zl@ZT5P(Mrb>VK*0|0S=1&S4Nc4?4Jj`#7jF;yn4E=lp-cEB{3<|K~sVpX>C0&QqYv z1X+&%XFK|z}^nG1iBWy{SQO&3x>=m4C#*;5}q=|K4OTx2da-E?lHtZW{A4S z5P6p&>JCHXZHCA*3_(Zv0ypybuNQUhXESMFRn26OjAf8YW>U;zQ)=MVZ0FHjAf&%c z$Z)T?*&zvo=aPmmMD;$28+;Kr{x78WM?e=eN5iiVs*i+B|BG3IDkDipXniE>462nB zo&L)^fNp${2UkY$`bgN}zp%r9KIi|uPXF26|1&!NXK=p7V15o%8QCvmuwAa0e3n=1 zDFf#b28IF#hE&j|07e}KUJKBbwE75Ixm1B}{dNcUeIWfHh`3MHdLQuGIY=i6-Wx)zj36^XAQCnl z2Va%|scu|Lmb(-$gOHGgaF7}Yjf7M{Xe4A@4tYKZQU^i0Kae^InS@;ZU=Kd#6jx;g zse>SML!b>rumL)V2&5)5%K+6%Ht93q*W@ABN62*$WONQvADJXh0x`hr<w zoro6Qux4Ex)exjI(hF^Z&j4aFAe9js38{~et0c5q35^S>ong0tZr zB@hFAu_JPQ1e#QWof+=ew#%nw2c&L-kb&)c1KRckb?k$b^T;Gv5VYbBQl~&jNbLYy z7qJIC+_%@i9fbVa_Q23Czcwg>*|Hl_(GVn&T@KNUi-fGi@N7Tm)p3Zs=bp&a=lsk5 zi>&?++T6jP$iS`5z^V$m;(nJ!EqXI$V!H=gA7S$8FKG4)PDq> zk~bA>?<&we3!pip1EBkx2Imb7 zzPmsNi3e<9h+E5;v)iTNpnvz7sNO@-eTO0Yk%RjV1orF$4Yc+i4sPA=SiDvnAW_C3Rl(rC zQaJ7aw0~6doTu+6%cB1R>;7}^0Ig@>Jp7;U1ZWUa_|kvLtN-P0{a3&BU;5mCw&njB z+P*X7ePu{{%aHsCgyJ7EM8j&IhS+2!Fuf^_aosE`!rs z2FEvS)}I)RUNR^=WK(~^rSXkd`zNm^XyG)k=6_zz|2#S%#HRyVB*3TfUr_tMpw@p; zz5jxG|3wTzr|!sD{g=1>uW0{Y#TkUuT>h&%|5tPPui^Ax6^xXfK;nvy|K;ufOWXgK zariF_M&h>rCGG!<+Wr@?_|Iwio5|>lpxYbykYB7C2N)Q-7#O4(7$g{&d0DtbSOi2E zxcM141VM;dl7Ug2flYvcQ<#C>oI~E9N$e80{B0Jw|9l$%g$(}-8~qnB{m*X#>Lm#p z{Z|jVW0v^Zxq7{GHE7e2Th&_cs@2|At0DCcdL5rD2trzB%!DI&hsX>pXpuexf{;(%hTI1Msfm#5AW#Ja zKIg{-b(I3B`x6gZr~sNF0y97r5V#rw)kjD*5%LN;NIwWt9~nn=8iDGdPWb4YL1Y^Q zK{`RYVa<>}k#=~qb~vaSg4aawsz)!hNhi1g4PjP8a1pec2*Sl$A9;co;Nj~ufh!|; zt>oRf6dpB?QY1xH=pcZ*!OnZT9Ox%K?74N8CL-r;b+q-$YSJO88+U;H) zheW16S6uW(c-4P~vMUTqB@A3K47|Fa`iM=AfmIfi!5O%;^<4KKy2+)Jz^$98GXKBo z0`LO7)u0P&*f;-Y-UYhR#`XDst2_UN@B9~njnQ%5`Y-(CzwiUlN^g!!pd-G8Zh+42 z7P#JetYwHl#R|G#?*m82 zFNP`q+2(=HBVyYG>KU=@{m*{%Kkw=PVpl;^d*7aYu?t^sAJMpIX%6-4prvKKP|KD=SXVnS!Jz9=>x1WH_j45C2{pwT%`1|ClkgU=h(MFQ1L;L1oS5L6in2SMv2$yf%- zGzOVWQ0GXcl0mVaLAIK~X^BAeUWV{}p#D+yGoHSm%nMQKBi`ent{LW;%6mgFs?g>N2Cx+~=44K~;Qa&=oJ_FT99uFDp zZ!p-tV6cD1Zu6Pl=CiQLS7DQ{qQ?J)4gU-3f)J!W;?e>QeDP?37Ewd$BT>Eo!aATX zlCS}&K9V$t)<=+fN6iIvE*x5Yr0n!x*%?$H$v7bDBT4)JQV!6*m8k1~0f+x0p6_{` zUN9)`WMG}kz@^5(AO&i=F!3<3aDXQqIT%=Y85jf^KuL%T+|kx%5O81+I>)1M2f04t zHv!d0LdO4vjQ^_z-Zo8m9r-UGtk$c5jFEE1CL=e&q zGA;+(G2~pb(ye4Aq&|Xlh0sVyO#~s4Ya+B>54`(>#(=MmgH%Hh5?%qJF(7r2eGceU zAIO>8kje;N6Tzw>&~8?6S7;`r(y>UNVTy#z)271_T76^*J5d3x4uaQ2#^7sPO~KPc z@M;KBF&V}7LFyx;*gg;&Tpt<5K{vF*8Ibx2+9!%=htx;#a~1R>THy#%CFw-8Kq@0@ z)kjz>Cb!!45CpH7APjhAgvNl5Z-;b1u=ZRK z7q2&hu8H(**y`E1)eAbMwavS6t5?Go*V>IPH5)wZH+zA%Q+YLP@vPtMUc1q)W`kSJ z21p=4atbaIv>Ci+y=UzPP?cS^+Ov9%XZ0Gdnzi_mTkU$6>a{v$>m9ldbIkcHw(7s~ zn*R)8o57c{+A%XrfN$331zm5-z|O!X%)lzbEw0AEpde+NsIu_0#!}FD5&H^I?ZC7i zbXbtd!~g0JK{W#3P0;FY>0AG0ZvR(3_h0!I5C*Dibt+s2;RmJcaS0PE@Sm8hR)v%6aOrH|E!1pa~=jE z*2Di<4})!3xy*sFTZ$Rhn(C&kQ?R#x=mn#Hx z@t7Ag$VGvw8y;^4ZZ8H7cLpv`5QEo;fe+R>5)NVz31$!t0|mZBJcCps=m1r@Tn4!^ z2Gv>ytxgbfUdk4+iy>+sL*`kAnnyhSU)dJ^7hdmyZXP?e*v_EJNZIMXf;|W+I{b$qIfwrcr0D!#(dECK z<9{jp|00(E#f<;+>-=YsImN)Sje#MDfgv7z;;S430}H5n0!=+a7@Q0YBFy~iOrkr4 zR8O$T{O42qFK7Vj9|@WK7c~AaVDewk^uMz2ZIie+&eiK3E7y8dt#+?mOsE#8&U^(RjlHSQ9;^)b=p1By0%YqDygqWsoo5d^Rer904$AecHraEmvu48)yf*}|h9LEhMfwZ~ z145dmPJ=TbeIl#0DOPDyAm?vG`bUuZ2!1;r+68%#3J6jkLFyn#56Bo+2f=F}$W~Sq z5_U_G0r+SI&;mK|A*isG+tB(5QW@!mgC>m7>LdI;BkiDi?Vx(BWh|r+#!5meBe&Z1 zkZK5xM5~P8TrY5a1S#_&Y)Ijc++%{Up&OTARR+AOf^ieZC&m8(eBY$hCUCYxR1!nhmbtjNnkQ#;IbBbLCp+%C!z z-+zYCO$^K~;NuZQK=lz5KNFWYyQmTavj8Kn3U1^5;SDT zvGu?5h5t4$|I6R}FLMpF^;P6Ps50U@`JZ9lUxvyX3<+l#B9AhJon#0-!4Q3hA@?3b z_Y;N%KUt1}E=OQ2I>TVnC+*rT=h3I=UMcHbDj2*-EMhrB)Lw?PlMEHl`8$6wO#IId z=^1VK&#>)3^G?vD5BDihWh8MGG(shD<-gq3|1y_BmmCS60c|ycY>XAU1geif`(Drd zhtx;hCqbn**AY-P#IO-O?zbDXE0+H>xH7uXL9u{AzMMg$fx&nZ ztK$OJ@SW_DyBQ+)GUi-hsCvNB{gHLSf4-IfIktf=nc_bBpXb8tk{+Yq#KZ6OV zGD5D8Al)A>jsKWc61+a*1Mf5C)&#Ai76y;k3F(76QxazXdLMf)J!WlD7j@Nsx+3&f&kTJqXD;fa)X2Op}b`e+}3FO7{OXT|diNy^uCp zz#`kpAm{|1Hev@~^$WR3ksDk$i8Ax6Gm2~%P(H>i{hwR;zkoipJ`ytd&u~QG9QFYLV7^RH-p<{&w+65vgaVzN4P2@NY!JWHr*_B8YY5XkO#S~HF*kXSpw3~ z99lJmIQtYnJrvgu=?%g65WyKJbrARfI;aS^K7w?JAgBGnY9h#nR?u1n$P5vLgjYsd zVNE)rAOxw8kO$}>br3R%wLXFfqSZ&PHS1hqYhd8D5}binF`-Q`dDd@6tBi2fN6>|N z;MxRU8NnIIwU2+(cK;^Og@6HVpi|3yo59@1ZIC0%AvF=`;9u}oZjY+9kjqLSI}0F3 zPC-V4q18wJ0;j@7PKAr?^XA*;%(cy#3u(RBXV0|FoK6tgXU~Kno75?`X;UG{E`1sv zWR*U{JY}kM<|4bEqYSfu@~--?yX`+i>@EgwUj_zy21Y&xRv~r)DOoiGaYb_m1~Dcs zNe%&d@J>~J2H_~f@_md;{iJZCVN#-P#1U_FV!W-^1@B1Z4! z4E`G!!gnyFUtueK#L)Z^bRrO>?%VpGaVKcHhv5)t@J;mEf9dO>o{_?p|MHhXMX}%+ z&`Ck;r$E=Kvz`U@zl5*;=e+Qr<2+QH{rG>56QFCd88(CWF0BULi@~t#Klic!0;m5A zp8YR$4uk~GfCltf4*h323R?WYeHwH$E61t-%t!z8ANkL)>A!N%ZPB{3ZmkEsI*&qD zut0i7{%!mH+xGi4>@m+=s^HVbWmdwVl*Aws0@_=|69C#?#O24p>C3?72V(OCF!1_- zsw!ddNS#xp5b&4L<3EGje>SK8%=Z7; zZT_=Z{%1G(&t~|a!{9%w?teCI5aQ7N&#v>IT^EEnb^deefCl2Yv_M@RZdhFd8J&Z6 zjTknwsI$_|K~j;)14-v_a!_Li+#3P5w)n|ChA^b$L`={;RtFS9Jj) z6-YJY@E;dso4$6r9RdI+d*i zUH=WfcNLPs9g#M%qAgBa zfh(gqHn0i^a!WWe2`O9PHIZcoVt@`_4MA2ZKn+2+}8lS3q!vVQe?Ne*`%M6>{7h4igAEWqm{t*|Bt(g|(Sf=v}6*FF%oR$v_*xmK>hM3DLjjdZDA>r%ZIQb|GTCU|88 zXW*=r!1WQNR)Uc5UJ`o61m08&ses_qOvnuH#;wp=2|NS`R|)D~)q(DRgzQ#;tjTe& zTmxC#=~%oBvMkd&f01SG0?X`q7Flym(`T8a%`{4xYM4C51cnR~CK|*~Fo>U^AJ?xR z*N-di;E@Pnz$02eq8+^21T@nJ9cu-*D=^vmF}>Omo&2%W9Quwk%>KzZ?~ml7|Eztl zMZJ0$*gQb>5xWS7ptQJx7Q3K21A{07yBGt9I0KUiD4R3bs%CHE+X$+UxOV+ly7b@V z$$zzLpvp-4{(r#-;I2pS7tn#ykq;Om?lT0xVF-H75cC{WANfCI2z$Yh_KYFzpVefywlc3Ip;8{?WBzP9IFpvExXmpO_BxofbL>KGv|4c_f*S#^!`5{7zV&i{-L;uB2fa)X0Eug!f7`i?%n8@scMP`QK*;hBgXs?j zQElPc)0v@0Pb&J45bC4=Y(H>DBftKDmfEVDw z>LXAW$r&`YB;)X39?TH82USUsPL`qPe@(~#+D@k>^mhm=mNRh1gYU~w2F)e$sxSy| z=Tkh&Ed8Hf{lB38e<6eae8!;qNXYoVsM&udpF8@IFC8n_J5;Q5t61q)u@Y1nm92zy zc_1~BQ|T)B4d9ULzd@I=7DJCsa41>sSPHu5&Y^e(h*1I>)37gI2CsL}81QPyp=c@Q zxEy3A2u|98uU>`MK@f&j&V0yyte`Qv?0JwWA-n8(kWH-M%4nVqxD$k22SM1@88bmw zwPwx6Um2N!DSRLg+pm>>*jucrZeyE=&5u-T_!Gw2{ASUmck%>)OTHb_#JIt!*I;a3#^Pl_h ze~kzK74L&iPE&XQI!qgM_D!izt0eSg(3P5L)I&Xq(=-v zml*;tF?bweFj>c7w2;AY0fXgY2D=puW=k1t*D$zlW$@n35POcN>^b|y|Ki|^Tfb>d`mZU|6C^jIgS6b>4ADc+}fbd4iC7q1DO?qR3dPaPXkn^ z@PlWFAj){O5H%6JK7vSrDkHuB0$>E0EfUlLO(F4Xfo6_`wf~Fif=Yg2@M1Pe3($C- zg57^5m;cJ({*j6^sCrU#fDX+;W{VUZ|0{qejN~2vBO}NZlAHrb5ZZNe0j+b8vHve+ z`(Mfq)M=7(0EtN1gC>~7tp9Tx{O8kn$iR1;fuWXxp^Ab10E^632Fd?CY9KcY8T=P8 z0?i)@8vPeG`LF18UoZTPL&X}0iZw3fi(SeWdzP*AC|!YBAHfgsaf07-2fp6{vSk%{ zk1B)>sfHl64`ewUgbgPl4OB>#WCJQI7C^c@$Rwo8W0$)CQU_Us8CE&-t#al=rNH$O ztPYw7XCSXtut=W)uYI8Rv?A3mW#12vQU2fsXfS)s2KANY6+gTn*u>k3iKB zxa!dis@DihYx=3&m-bF%V!0RK> z+5m8^A0V|4a@Psc5Ato;3ON84a)b!v#8=N+(5eGhSXaoUc$r=9 z0*mxnCdt!GlBXCYPBKpHH;C(j?jw%r)s5)X4Qtg3Zqf*>Q}eG;@vBhuE>-d>QS>ZU z@F-IBELQX=Rd6qnbt#l}DO7ODlXJ=;h~%Af<(+fooN}aX(`4+@AxO?46AzMf%oelD zU~sCo=sL_i_Xo?opWO5R^Dg`^oUxffA)A3+k^!_%i4%O?0}FVy5<3F}gQIKI{KY#M z%$q`&e`471pK%N5Itk_5psklu;A7Jmw)|r#dCZV{g(2o9L)c}8@SC8GE%DD8q8>5C z-Itp1UuNomhTOLdrQgJQ{+rDCZ#3(_Y5O;hsFMsf%NPu2GZ-%dT@vcFiNSq4L%>0X zxbqBU&p9T6W~q30fJW-rkN)Sm0IH85tGgAhfO|%l{_~#&%^9&C1yx2u7ye6M11)S| zKMSgl*v^3JBc@}Z6Lr|O{AZZ*o1ynF!+h{0%SKRr#C{N58G-8~k#qkA&VZ(qSPnz$ zBc6-@`ObhA@-b}t&oK87L+KTU=v|^|>vZdmLF%KRmaRc8TZ7wn2et0?#rp1KnpN5CW>3_`?|Z!$36=pFgxp;tyjGiewN;0M$nt zbqpq53@!_KVs{DF-cggI7k#3`jKusdtb`mv#C|1wtpA@z|GWU$T|JU|B?rc;Cu**Sw|kz`>LM(`>L zGK&PQmFz*CCTT}d?IUCVU)l~-A4%E$mw+JK|Kc{FDoM)zzn;f`Rr?>x_Irf&*Dy(4 zW|OebWO?X@M3q7=gAR2^s%a@Vtk$K7z~yK}X?AK*Mo}Y6z)50*!kVgYLG2 z-*5+KK?qpo><)6%eEvf|HQm5TtViuZC=3XZt|vBk*E5 zl&%n@K7v$F@R|sD-2$WzGE1EX=?6hb$XW$RO#~UqL9UNX!K>!rBxtz;xPJtxgCHcl zCNhrig;Yau65a`dFd*kDKuCCHq#FrZt^nx`>4r5!_O!zLL@3=NaDAi|46TgRgX`6U zL6s4_KEkYnv;%74$faU6Cc>Ogw4Y?FAaV}cyQn<(|e}QApT$}V6R!LLL;wG5Jbb}5v z4{O#7Zh-V+wEZj9yh>Ev3sl_;l-+WaT(gy&(&g-uWo?sWY!aoc;-#$lQ2o+P+>h^VLe|FeLsRo$RLPE%ZEWD!LE5X>)an~^L}!# z`7gBjKU?$)29ZS203Cw}0|R7^7_y$0k%56p+A`ffwws~yvf@h6N&u!K|FxchcE9pm z`Y(3pKmVrx3>6O;!X7Y$KV%5F#o&JvG<6>Pj3NFpL&6h=DgRAR{|8koOaJrl`Y(Fm zKl@G)65I(MIxBw#I#knc1B1gR2D6o*Q(HavGXx%I2s_1)affljKiM_^#gBu=20@n@ zT>;fNQddEjy~TK$juQtjw}Y&*V?Ont;;W1#gCtVcjcH}ISWomR@U^FROW{|q_T z80=>=IM25#SntzxFtlxVXxr|fmhC|;+e6y-1vKw+DcfL_GEds8i`l4%K{kOwBpg)5 z2uFdcA)zn^L1YvGy52}I6x3bf4r1U5X5b2C5R73E&18@%V9@Shu$;yaw2344EPLA< zmBs&s8a^;YoM3a=!Ch{pU9N&!qXES?NEw)_*ah|Ki5~MNIw+8A10V$$Q+@3x8u*w#Ke( zjdR%|m-5AK;A0aYwU0wFXdn)B!8iD_Zzu4js~{3Q90#d!kXISm7cN1b4??bV(9Y9_ z)Io3(GX4gygCNV~Y{8u%NPPsUeITdFK}bj)1YtwQ{~;u#CPH1pJrhy^S!RGP4Yx*} zErQGw!K)$AB05-y$Q(902VFG>o>MSNoMZ%>ErPCF0I%f+RYUO2MDe|lE)t|G1h0(X zbr57k4%9P3noWRAG(b-Kfovv1?h`?JMvyuPavHF4b$DtJXlOA!HI!2f3_4r=`5%!S0YMd~z@_z9q{P-M4WSci5{qq<+Ms#m#^ zdx?Tep1gCej6;@~Rf>pNys&AUkZClZVK|>&Ft2V9w{`#oacKCmtNXC2d2^_Gv8#Bp zE4#8PIuk@}N-i8qF6@fV4ANGNGS-YT)=aWC_z{Dg1A~MWgRr++_F~q#KNzNe;$8RO z^xA*XtnCa8UZ4&UyS%)L2@9trXbCM8b1^U|s5liUuKcgG3Ur<}|Hc2x_d$ya1h4;R zI`N;O{XIj%35L-7p!&$`3aCnof5DLOk|FUack_RZegE~3{^#5IpJOwqRsxN`Z2(Qj zb8Ps}vEjeQ{QtZy-^ELxGbdhV@Y&B`v6{hfIfL~^2De=do_iVM&M>rpW!?H;>ePSP zyZ>eGfQI9wuYne3OP&WUoZ~zNni~?l2wH0=ei=L&bml+nng5I@Kqsm4od3_g_di3& zGls%@42|y?rv7JI0KSTS8)zLJ?z@A% z%lZ;z1F4UAwV}N#cufRZ%)kvE`r`u+)(L=nMv%%#L?3ie zHi*>wFQNxp$p+edYWiQy0#qN#fCttdgoM{VmazN5A?NwPNi*<`c(85}bUzWK9|T#b0J*0XxkCgw zg&R^IA@_!m>mWEAQW+V6k6AEHm}m^Stus?> zL6F)9KMARloU7J=Dj@Ls2M8Oo!U35?u9zTfP-RpLU0Ds;(E}mjl@S^PQXfHTA9#HP zudux9HoJqnMQ-JwNg~k7OmIzPkv0XirZKKdFRD#1tU)87O2e;01>6skcg#|>OO>}t zma&YJG>aB82;$T6<573xR&(W1a^zNY;8L_@m9b!!GJ_x{NmC|CV+Jt;22p(mQGF&+ zeMS*IqL8Q#gNQb0K@qE|NNK#%RKlma{a&5-T#W` z{xi(|3#xj;AArW_!fr4`U1zBL#xU(4!_>bFbN}=21s%e`vhhFj22e%7wBbMVM$l=f zpwnz1$BC~1PsexuW2pYXTKAcu{0&3cSq94u47y7h^p`W3Z(#C2!H{=TWBPxU{r?S4 z|5v>VLPF<3M|N`@2kmkdyZm4L%74Mj|9Q@V`b3;({<9wk9r`VG>c7a5{|rs98RCvJ z6uw~Z_|E`2MJ7U`oM0V^9Z`&T;wllJ0cVOdgx3UeoadSjmn;Fz|8N_2jJtNSF z9C*SAGHoOf#2^#`LZacI871*(&}j_9;SBr{p!>@Cqd|jp5;>syNUwvzX%0iu5uwJ% zLeu_>&HB%g|CGUE9fR~@2BlRD@~4;-&#=nB;#U2@p!$zV{XehTe;(C;Tq^&$RQ_|S zf^MCI)H}Qy|KaruuQq7097G&K3TXZp&;%*qhxL!Rwf^&fl|dKLX@j}i|GB`3SLZ)3 zc-e}bKd%93?2F&vKLiOGf+{1(`E`)}v%<#z#mqp9*@R90i<*KEpqQ7Fs!2 zRV8T)8oq+1MUXXe zkh4s&u9Smxh#>V5goM;bHqhz@v_=lHGQl!?4jfrz&4wd*ZwTBWLiB?mOXeW84}?Un zk05Mw@LF!j%|wuz2tp#Cu>cvJgUmZXHn)N<4F?aEZzI=7paCV+!?*Rq+jPTPAxJ;0Sr1kpLHa=uQYW|p(g}i-T0u2%q!U!5 z6I7!eSdEFa0;-S^WVIw3iM4uis$A_Sf$aK$Z2W=LN^lZU zD?!f31Fy$}3&Q7~JgPUiRjzX>UFA>+nk2H!m~D|h!z3A085zX&Xooke1=XqfRVaCt z$h+ptIOWLLrHfl8i5W+U8bu1}hw$qJa;SMQD>^gD+Av64F-e#+ikmTr8Zrp#GVp7I z5T6#Pj^I{h;8tbeQh_24B?e5yrGhTVsldP?&%h?jz%EA|Qf6RPWMI%_Pz`2izQ{G_ z6~nyW40C@e7ad~Yjb&h9WnmE)leS=BRxrt)Tfgl^)Us!GU8kIue{x^*oo(xX{(Ybu zcEoRk>La;_|M_9=13I;v13XLzxq(9L z*nfu3rwqQ^7}75=RDI;>1s}({2xL3suK%pZp!Jc^IZ$OJa0*l(aUA>4egcGePyFXO z4!U=Pq3jWZVs2yNLK+Oj>Wb5B6SF56ttG6qJY0tSgV z2EGu`a2#(418*Scd{oF{Z*E@(E+5c}dcFWqw@5S+R3C{)GYEl^Kn&;zQ^{OVeWcyW z=scGpOX$)ifdkV zl=_GdT-`usf*@54=&A+S{1I|}1R1R3)&h-dK~zFWQ1t|^jJUKxl@TxafFOQ-(A^Du zdjFB@BS;A^Yy?uqtqrQJ_;tZEO?v-@jsJ_7f+{Rd&HwD`pp_C3DGs&&?Arf%4gd3- z{ui?NFKh>@k3{VL3)_ND3W6-66SoH)90VGs1Fu+s*GKa9DD{!FJ)%Amw*}Ql!WRF9 zE&j_}fsnWbs6LV~|1WNaQXfe<-`5CyYgx1!QXe^#E`+R4fSg}ySF{weI03Z3uW*?| zA*eFKtdAg-5u`q{&0ho>FacLa@MUt4U8|7Qamb_{_*_#+O$6!tz-l7M$^`WK2wn}r z8Q^MYHiU!^%0Vh4i}V?gng~Kd>LBFa5Ts88pBsWt4?%iEkV{(O9U{;?ICxyn7_&D7 z?h}EkA@Hsu!x+RhdGMMDd2R?&`@l)$ZjnCt_Hg($5Rk*SAvF<%gzO=LkdT^4E4TqZ zHw3ASAiW`YWu%F%8iH3$$d!?XUj_1+w{SM{sqC0+cW@WUwGw*12b=*J%7c)QWq9t@ zpe;*SswePPCTQ0Qc~}o=n=O3HEu_wZ)I@F-s~wA%+2${@2CZ0_VVE${D6U5@szWCf zyf^_|6Dhjp$vS39+oecYCrX+{3+e?6==kwycyTE?F-u#s$(l1unJ|m%f%-B6stkO} z3_J=9oU#lYG9bh$&A=hWz$U@KA_hXN;vhDsGy|tJXn_H%I9O1GfmsNIm;@P^_|Xxg z0BF}LqYwjOB+9@b%)}tf$u7kZG0k??4Th;-8D{?$nfHyM>>Pt$Hu!W?QwFzA(M7){ zmi}W{@}F}fs0?RV_n&DWXn;%T#(%ym|0ThQsrNra^hJiiYYbtxL5t`TA2H1PCx7O@ z@}2*RxBhb<0#`a~{&Q^k&$;P8+j>xC1nnQK|Ie@%jMn@I^_A9xml@3b&(QUSq3$I^ z&v%9i{~0FyXKnq-5O9RSU^#=udIs-94Dr_(%AWE}{LjAnzvSNkG8g`<-2%-h34<>s z=LOd(tcO9HScT5~7wP-V;IWp$eXDT6J%+{~j6MGuW`h=xGi>?Kw&y?p@&Cf$dnkm? z{}(y~(!zcObS4AyanO1^&eNbhM?zEoGelovQ14>k&f_yHam!m1(7eO9VXI%$=Absv z9yH(P?M@}@)FLJc*wiqnrZR{|GYCb1hUo;t8TbPjczhXnyg*m)akwzBJAksRv(xxfb|@M(f87x3Y7yx?&*$Y>jg z1lK#rs}~@fjCeIbE9C?r!*}5AMFQHOEv}HEJV?C;SziY^k5tG2v_gR2^go};e?GJS z+{U1VeB9cgh2k6<|Jl?*NI(~KsDiZFe<{=dQfB`pOhBveIMx5ND*k7X{LdiwpIPHS zgV}#}hyScj2*hRgpUdvQsNH`td(fDh6nGhf6y!i5N6>lTGLE2w!KEFb>lq}$)s~zs zXuFfN?SCno|5BF!B`yAoTY->>#eWeC(BPq{`G3gy3_^yWDI^7#$2tM;?F&~s7O#cu z3Un!4?OL=3QXe7rgCKiXosh1&gAB?+NJupV?+`iUFLKOV2uF}zLXh4NgoKDdwhkfp ziLAlLCP4Z`)>*UR42U=y39f*kgL06zF=#1W8uY+z%e3iM8PhD&r$T0kAf#F9471c3 z<|)&`cSB4=n<0YKM3CMPq$Yy&i6E!_7{yO8il1N@*KY``cR&*p;OmD#45YoQ(6fEO zBKpytka;3V1q7~rK&PHUcBX*2?fT)J`r)01;hhFy9q`J?AiNDyA3;dH&=xcVseK@% zR$!fWP(5TsPA9NVJD?UgV(S4Lc42;5{{S*uW;O#$rpas|ryr5Yg1|IN{K0IJHH&~nxrdznomB#4tVWuA%`S#%-x#KU7n%1>dgf4vzhx%1GkMe}QYD1=Uhl!PnvCJYons!Qg*|A@mkQ%w2}kZ#+9ehc+nP{?C2@ zG(g9&`9Iqx5Mtl(pJO9vu#Ra1c)V^sxT=D%H-L;{-}hf|ALtrxksbfJ*8S&O`d@Ix zf5VOcEqDI6oBdxf?j6v^$ZS68KO>r>Z6A5tP?;3gX}B*Gwt}#bpTW!37rCUtc1>k`bYdHL6s59 zLC}pt?5F>8oB}N}XK47&;JAZ9q@01bK+dhjzkI8I({|s6t^Unh0$M=((cEh{nxrid z^Jrqy%VUs^W)KVqA^tGXif$fX1}<;VrR40c3@i>{UNz7? zr#$LV64J-v10R(D8DWF;bU@V*yjB9&N01Rb2nnf=_%uQF5p?)Y6SN2%QXfHjPrTZo zszk`(zlafN2u{KRG~fmrbF%?e3Zl0E+4cW3EC1!t0M$pJT1o3ak1l9$thfoNlOk{R zUk;3f4M8`wg6bow{|t)%x%B_D*n;ltV{`t`?gFZkSRDUz+JX?IJ_1!nj-XzWB>2Ep z&@p$g%1GJ)Bo19-X9wErBx?hju>w^|NcEAZ<$qywXniDX`d`o(R2c~y{g-oksO|sG zI(NB45oj*hsbslJ;cDlCRme3FWQhXgDhD_T8mk9i|7~Bm#3pYcWU2_#4}uKPL24Yw zyoHeC;~a9K%ef)7k7M3KNM&RXTeJXK8i#yx0$S|@=Yp#t(9j&H_5lsi%rZ}#2}aXF zBCz@hv}q`9ibdKK5CgWK$TVfTY07kSD4J>pUI1(YJ6Zu+?|@emUdNA`V$N;Zm{M0lahpR2hNC=%78Lum<(OYBj%d1&=~mmn?C+ z6mhFK0poCfgCJhL06uLmZVh)%6(=@Ddp22fMhPPZ0aXyB=}5PJU*zC_spJ3E&iq$D`(JI^ zR|b#e3`SEK+*UHgon$Gz!_fMhbqeT&aOTzj8F&5X2CrD)ISm@_<39)5PRo7lKg$6S zVn6<$`!u*`x9UH0`W*(%c?`U14B`bk;S+;uxBJy^325Br-wZ-NP222CS87Mi;I*k@ zP|9EsiDKXlVc-vA;PnI5L>z9Q4iTFZxF)h?V6kOjv1VYk0dEl@O={^`u{}@{>wN%Q}_93mA%rwV4Yj(GS`x&_65uB3zkFbBgn`br0)ai z2SIv3Xs2pJ#^oT@kRABI1bAHoXF%#5$N-%!_|gY^@Bya~(hfXf1fC2+94m)h0YTXC znh3Ib$|`*ZmYD_1UYLCHZBLMk4#ghn(Uz zR$~xWV-iwh7FK5yR_74aWDvAr5U^%&>Jn``$GqS_>!SY*bN({S{Kc^7KkG`+-9SvM zK&Qs>9{JC87F=bV10CkZavW42F)aPh5OajV>Hve?5eAnd44x+$ik@ok1g(M-xdK`~ z$FTA*<2umdYPL0?g#n!F{LE)+*dKgo?tJz&(QVq5UHjP>%*O%LA#2 zASC3NQ%J3ZwLap8t)SxoS2vv6|2eckRT8K6e;&R6y!xPJ36KLwL2CfaL8D%v;WDTH zQqCYG=?GqI0A6=3;P9W%?!SO7==LHhYtTFqi}rs`ga42)Mac5MkmY|iYY<|z{?A|zIzSJ4jGpO#7L)(%=7=gu)b+nO7)d&T>O@I! zl>}P6XA7#8(CQ;uYiNB08oRRrRYu~L|KatKupVeZpp^X+Rj>C}*()83Hn^28b17Z| zs*DO(IG`S$0O|gKDyF=JkWLV!4uTB6K|~;I+no8xbr6JYmpuSR*SSL=Bbk*XDL8dDS$UcPBQ^j zMxd>c_$wnweFPbxGekWr20k$aVSwtO*lx(CA-$*$NbQ46!fGOvwR5_l3yDAr72y3K z2m`qqg0LYq5rhQyj2g9r8bO<0k&gP&45)=7SbYRqR~AsEfwW=)tpY;kYWP*C`IJG{ zu{f11N3MpD*~m2!ob6P$5_!!7ygou^KsrjeY9g%SXcd!l)f$)TwQf~w?TVIK=PxqP zo@bdc+bng4al%C1sBYb`R?VPBwSX!WzY0aqVi}h_3A=O=t7H-LXd%-`A)`nEgK%Em zAPx;*J}n<^bx(FB2SynyRw*-f84Fe^6J`k`&?7ASNk8CTUZ41uH_xhE3j@P0p50&X!%lhC|VoQ^}4|%8Wt6UC26%y?Bq_ ztjAJo{!6X~^^f>B{}IX{@VYl=l=`u1&xd`EdI~9;y?2$(BVRyYe1C|=Q_}BdMq3NGj9FQvg1Gd zLC`>%$kqQMS3#E|@m&G+R>ZIUSGxOO;Wp?l389Psd9MEFzX3V|g8%G)<|ClPy+LRG zto<*%>_0=x7Y6SW4Ek#r%=a>c-C->G#8dy5A?FT*;X(%K8V2ni29v35E=w6=PO_Ff zWN81vJ{45MvaSEmv=>zSaGVAmZO4BGbf7oy@&6o${zK{`zLTKtk_hO^hOZ2+n;C=~ z83beGY}#FmR|Pd}hipmnY1-jgztyf_jatMM9`h;&`6LFB7zUvzQ0Iszn1RQSffKDh zvIE^=%4!4ZA~9Gpa5ypu2QbJ*Gbm(&#_PnRL3@$7+!#QeDL0T_E-walcLweN1_nn4 z!!FazL(uw2@-&P5MM!p4YYc} z;=iETe=b8%&BdYjpF`3t~k5%Rtv_%0#Hm>cN09@r=xyfT9H zeIOMOa^DBCjR;a1*?SQzUVLs3!eZZ$ESb)amrkNy8fgq#ANrvFl$sj!=c%KNF0k4ce zb4GDJke(5w4ubcFAY*ipqZJ@^5ZZaC@X81>Mh98N4X%vfHxq&H4G(SC3u!|^p`bY< zcr~OG+@u@Ss0$jY1Jy@}4pBfYT89W;6G0eS0aeIV51g&xSAmR>>mzu@gr{zT)Kf0t z0}LTc@{nsK%Lb&X=|=Gr zv?DroLR&P08bOtjcd5L4p_EgOgk8F*HE6s})FMXAELPMsM#w0fUq3{|AV^TxUqIWF zN8OcI$&OFio=4G|L)M%{(wI%!ltb2>L)M&2-jYk+l3URNffTH`!3a&9N70%`(VAP? zibn-X@~But5Wl*efTpvko(Cc1C93Bos_P@F>m#P`Evn}wqU$NB<;I{M#vtX%pq9@N zG@GI6Hvht3yc_=uZTm017gQf{>;QFVKxY)}{LiohG^-+T96S~mdzQgw4TJUy2K6Nj z#%n+~s|FooD0#uK0#xiX%m(juT>@gWEdS585>X$qZ3b0F9D6{Okp%b@Zoy0cxi9|b zzY6NJFzf==7XskBjX*cLUj8p|^S|VEP?aQb@xQ=DP%Xx=^S|=8|7tt`yRP}K(f^+z z@;-y}Ne1_$49O1}!jCiP&SlW)WiXz?U^t1vWGaLA4uXIXC@h*^j7? z_)deaOoG%$tOx&d9Qn_C0(8hK+lv37m38Jz8F;D~#FKQrr}=o!lYZkARPyq`Vou-jn{$pxB7ri0%UW6_K(<{7+CE=hz&es$70JM96zcFa(HDs_$#qPhV z{eLC9{|Yw$WvxL6{zzJY)=2XjfClbZwf-}y|7TDIA!fz@9BThL4MFvVuoL1B_{#Wt=F$Ast z3t9aa0WTVtasX9EQnvqPzz8&v1g?)Htp1C_>LY#w(8+s(+MxPK)$_Au)(ZQAb)d>9 ze}O~ZQir^yHhJ?Q^%3NV9ta6Pr3YRgL8>GO7cv$HuWOL|KJaP?(g}jAfp8%;k#)vw z5XzhmsgFRl54a98PoD}wAQA8d5Rf%;kP$ifOc1CVLaL9b_(1JXHy)JKpt+wf`#!hlsqZIEq5@cIb6p9nld z1gng6f?9NfT0j+3XbWU?4l+Qe4X%uIgBrC08^EXz+986~N08bFnN;_$RL7VkLau}0 zY*p`4T=fyWZh}-mXe4qk39X}qStTJ?Lj>87DhWb5RYDJra4K7AmAB9;XTD|jJoB`f zMu}65V*7L>I<>-D)PoyUe5;haD-^xTW!;LUopU7}vnA}(#ck8Xty9IUQiROn`Hf>B zYa4`&BKh^hg!O_2bpi!-{UMzu9t|%69Y0WQr57NmA1GuPfItR8LIyz~L4zQKpkaWp zVX&}au#icxuxSW{6gCZpAQ7_=QS&fy%Lx2P(ke#MDn`~SQO-I^)H(%(EaHSrqXjL( zc#H!XH2oP=y%}^9S#1m10;kDkuH&e>Alv_xYx*C)+5Z_9fjT=3t3l&z^6UOHVi zn#G_wkwK$_L8XO3a~gy30tTyH3=ub&Cj4ia^q*lm_#}kIpeY-Mh5uQX|7TtEpM4!@ zB?HSAP)|hQ3}~iF_%i4eAf5~Vxi0+Yy$q5CEvxGS>)rRC@7#Z$%OE6h^*{fW|NQ6w zi(L6He*M4bRnYz*wu7MSCm1&W=UDKcq4OU@_DhD?+YFIc7~BprSgd3)UB+OrfI)2< zgT{OYm)#8ckHq`_bFTa^vIW$?<39!(C*(Z|T2{w@@;}#M(CQ18ga6r&{1-hA+Cs}S z;Xgyn1qP)q2DUs#wN#7PIgs<)y&AT8HEeUQ+i9M+Qa)%3yHyQ?awce5oj@dLGZB9n z15YrhgTv+l8n0t_1(BeN$q7`~u-P-P+A;8YFe=6{Yo;-4rZLDTFi1o)i1;#ayD+dD zGq4yiFq(o8vpxfp9s`>x=wfcY2BW+K3H84M7t(TzdaGb)jdaa%e%TA$HCGEE@k=HUG2f{Abhs&t(8wZY^O28uC)M z{jY5MU&HagvCn@4-~XC!|K%+}NY?bfxc+}p-T%Vc{{__lbIAW^k^0ZT_l<%5H3Qc> z2G#$<_W!x<|8v;<=e7FJY4)Gf5rmkWJ~KOiVQ~M>;P{cj?k|Jme@0u-G2?u;{~2`t zGbn?OGlFcGl>?szq~!2l$>G16<9{vJ|0;I>6|F&5%bEX|GzHxN$ffb0UG+bkG6*q9 z|7DQ)#i0FP*!90i@PEO8|Gb|6*`5D$+kudf4XEn`n(Bfqw6g&n-5_Q4AF>NczyNf_ z1mu=m$atN3=3?9Ym5}<#K6i24`CzmVzC0UJ^+2u@f}Fq)sfN%xKJcms&OqMN z3aNmsAzh)Yxe%^x)?BOfSyt&YtTSdpR>Xm-o{VXbY6zLMOrHu`p8y$!1J4bCsv+2M z3T7#jK>Z`so)PH0Q^+ZEU=p%20ZJzJ!;j!LO6Y?ed5T6tIz$i>Pqzp@MhB^Skn1C) zYN!pilL*=^(g|&XtlZWKZh$b5?`%arYYueiTyU*!Fm%-%WZi;pKrIM?PwzxK-m|g`7HwM#AeOtPFVd1nD9nlW5%~mkQ99C3sKA zxoQnE8*B9hsf-}~BZz5^r7Pg|ky+{tqr@r3as7IcUE1NTp!uV~I`x2B&@ptrRZ3nZ z3LZuBu6c4Uprhnu9kV5E)5NTjrR`FrY?EbdKuFOhQNcPv&MHR6B1+mkQrbLH#v%&D zwTh9oj+L{GK_HtrS?gGkpiLY?&^AWSCQi;KPR1@)#x7RcHU`Xyk+B0IS^HQyhj=BI zWCF-JRoNv?-6dPYHAmhxPu?|8(IrdXDMQ{ZMaDT%!ZBUKF`dCUok2aCK{boPsGPx~ zkHK#VL;4wp+D8oi-x+59XP5`BUz(mWSk7UPY-A9xXOOI9kgjG>>|;=!#9+Q%vg8Z% z#QzN4{}}pyf~uss;F@Lre@5`w9m{IanmV>^pt=Bbmcm8QQK($!{&SoGS4PJ`D=0v^ z7JzzH3 zAGE%X^VolpWB*yU{b#88z~H)*LAaWMvrxda#5HTBU*k5P`Ym1!+q@gMJCtwIh@U0s z(#D`yz#yLrTFD>~$sia7-Fd|A%fRjlTFk)V2C9$PUBQ)+69c;gsNRu_6gJM|H!Wh* z$z)JS1YO3*>BPWb4!&JZje$WAj5I+T#Fz{j7!(;aszCLT{CWoI)eKUXS>pjCXn{9mK8Rf#ROzs2{AUDrWSG=Jb3>q-NbNtPGN@O=rUg1nN6_NG zu;qUdE6})>wC#T-8&E}{?(kpF{lAXqe`SaNvS$CKP5w(8gDN9IO;F#5PxU{$+cB-2Phx|F?_$ukQ(}85OPmL+T?TJN4*fEJ}g?ze^1M;2L2>|d+oSEKAz zuIN#!2tFKD#wAzQHBa8XP}VhH-Zfvzqe#iCM9Hf}*{fX1vrOK-5Po{9l6#4gdx?r? znX*Tzl6#4wTQLO5yA`7$$T_P@-c?H8Rf=AfaHQ;22|-Gp6{>#KkjXxcz&czA+~Ean zC4;C`^RH3$t5))^Q1&WUa4%7GFO+r8m2=3Fv`!T?O&2!LWVWpm^zLAYT`p3zN38Cq zSlvyIfHe%Vl?*(I41#G4{4or|2@JAz4CXU9%AT1n{?E|+ouTOsL(dO}{@&nqFK`KTB?QOC|IBAV2cI$=`_I1TKf{7Q3|;>jW`X)*tULZo-}*0n z3AD(8@8W;S%b*D?_AQ|6Xaz2S`c1s&L8mbYoc}L!`9El>_I}W^K!(}}41ot3{PrJ#?ZI}AR@8JzYqgq~xo`!2Tezs6xuKS<=*f3f5L1rGja+Xq6d zNB?u3`Y*osKf{v$+!^;7j8-!6r!oknsrk10m#y=z-R@nx-J^PwNA)Jd%oP#={oGbf z3~G4{GARt=@t~SWI0|%nD&#b8o&W|;PY~j81C83TIf3Se*qs~Foq<6)%f4_2gVb&Y(QOPO*O+APu_*rH z(frS$0Y;kt*)>4x*%(zo{SF4De++8B8MOX0=>BIg{Lf+z>NSYlfaYuXHU6_JfX;K{ z(FQGnmbCp3K~lE=K_`@g_ngYwfoEPULAytVO#XuwNE`j<)B;XFL-VFFSz_a!<7FF zx&IlWKQXxdXK??|ZVlRDByI6u#`3?i^?xO+|7te>HSPcFdw^;~4cq?;rvD`jK}bLk zROfPNfcDaIDF0`Y{m-HD3sj+)fo73}JpW7h{}&7SFB|<|C*{9h+JBAc|1$pn#hw0( zI)FCGO2GO@(pDfJ@#uqY(}1kCP;kDj<@?kkdx3S{V(m5i}2*R~Un*w1#>mbOu9GC=EL!eoqDIf+?eFQpo8(a;UB~5_TL=Y0P zW*bUk>k~n$Avg)|6Cu|`kf|c9H4&r&g6tuJuwi{7(7FY1Wz>pX6B&dy=><1H5TqJ{ zoU{Py5TPB!4L)E&C#YIGunJNKLHb0H8V8fq^{>=H?GtJFRcQE>Y50_>dzY$vmui4J zN06FG#j8Zwvsl@)7_zR?p=c?*4#H&Ms+HjN5rl!?DYd~g} zEC-$93$BuEz)KoH*McNZF-e?c6h8qHHM)`Qx{>WV5p5bFObW!z!8+3!}4MLj?LYs8K8@qIa>h*#f;Bkb; zfV4y4F|HTh21x@@GP2DusvSQvj_L%Vm`>xEPJ_rc{fJiGux2gru^S-w2UMwem8g3b zD>~=dYW-}NUFc{@C7}YRXHZyoGVenYYpxVg5k;1?j#=soGz!S+J7{?%2&*ivC ztmTVr_iu*U=b-wi`#VGTcZQyy&?;#*sAI*r0yJ|Zb?(2!El`!je&Ij+1yFs&diuZ6 z@&61{J}^}NXXpUcTx>i43!De7eB(L`S_dw0>OaG>-wZu(7-s!q*bQEfeHL^;kkCbt z8j*AV#V-6;JMo`y!+(yZPYiMA7{X67q+aJpy2}uFhQa@wX!a}h=6~|l-xyNwF+^Ws zD7?WiQTMPp?H-_%xqrQ78c`b2IXwfQf+vBBpL}i)*EuBH=iE^FL*yJmj?rf z3#dNgac5JBm$0i=@oJZKXynk&VUToTV9^I}H{%6YK+Fsbj9>&gH;@%X$t2rD>Lckr z3{rR46drM?{^Qm9&!Y05N$Ee6@_$Ap&|V$}rT@&DpdmPJ%l~37|HXX%O9g-mXBmIc z9#R2Q(B@ed+5b$6py?#gmK-b4w2>sZG6EeF=Lk9!PTB@E`66fgU&i{sh#44}{uea* z&#MbM)K1j!Kbz8j2D#S^A`cmOw=yuTVqj#djmSJ?$ZhHL-J&;GAi^`A8kw4$5A0JK$B)(To5sapS6wf?Vc^Iy^GzmoZX zWeZT(Oxyr;PXxa%$aY>`&?0TnQf3;^B|3Y5>rGx*gg#T9v z{;%WLa_HS#~+IY_mXB5@enTQXe5#J;-d!4EPZWplS%bcXbv>5LO?7m&t+GCxA!f zAhi#qD}+WOS4p6*5WF%cw@Y)B?(2wng zAUxF&_&x|wxd$nWAS9$jG5{AxklF{C)Cp|?qb5+51TNelH4%h_6mgLKM3DLjGA;)p zVcUo*;q{SDKow>+1nK^0`BmtE5u6LDi6Er9cd43JiJDi53JgIiBjoxBUNPaXn;^9n zq=tgrgaxULAS9^z0QZ&P42SZSmSOYFh3NT3r7 zK?_qd8iP*gW)gqd1OM0V(fw`qsBss}ehdra47`pE4EhWV zx(r;F3?j}9^2w~KnGCvZ;%WOCnx8Y&KW1op&(Qjbq4PUK*AIrie+*MWEAALqfEH9s zU;i(08?=C&`S5?ii~mJ0{g=4a0f5}sz{t?@O|7=G;r-r`WX$slu)nBQaui&_SaA_nx0v7p7?B2f$y zF;FBG#~>NOAR59T9K;|L$iU~#zzf=Q#H*hp>)ask+9YLN%pe=ez@x#yEYH9U+2G8A zR1<+JA_fK)5G9%Hn7@-j>NtbkX$HAx4D!zz%;Ud&;D3Y2|7!mKwLC#Lx~khfl{UI5skwc(+)fx8xXFd8~ z=)ix@?f)5O{AZ~A%MkgW!RsS~{eK4M|AKD+g+2aDdH$F4{V(GJS}&(+^IzEt)KO9} z{jX#OLSi~!nIs-E2y`$o6oPIz;+Vo9a)UwkA%oNl2Fd3Pa@QH;Z!oCcW6*fOp!SME z`#pokHwL{w3}*isEdKMDfR+LZ7=n)DW7PwNfOgmsllZe%dGjrE=h@`Vx6Yksoj1oi zZw_P=E4ceJ7gGB`NO*l@n+>|y4syvIgoIpm2fFPpW2SWmXum4jB5gPqykHx&i4|N8 z&4l!fEK;X|PE<&nVg*Cc)!J#GJ*~*q5QGh?j8Z0nvN>p#0+<2L?~tAmybgjeKs8Z9 zA83*&4s^N#q|7ml?}b-Da0axng4IWmwF+<&-Y0^zz#$}ZO@!GSg4aignh5E(R!BP> zTKj-&C`f$-se>RS^wd-Eu|HY?p!x_hG>2RtLFyfC|4MjG1g(#J%Cx{8B1laH?;Jtu zBUR60&}|Ez#i}rbTn%AmLntdcM*BU~czE|p8gYWK=D5ad#}(yep_^bkex zF+n!4jn&p!b1l+mL00uc&U!LSnqZMK$s}n4G^v1-he^@|Ncyo%ooJal5wsluykitsgX&@4g&CYY{;f;CLF&Y(m21g!J;EOSIm zGB`D3c_iE!*v%Li4M6i9Y+ek^?hL{?VyXKW%5E`~-)5+K4mwY}`6EN?XNGR@z}+m+ zk_OJB|3xqU7rgchSJzvyMq-9}tzK{Xe{ za`4pBED*_i?7#FC(5@(k86O#DeP-JHUj%#r9mC%LjQc?qitPFSI%odN@Bc5o+JgU*lm9c+|7Ixv%+T@*&iP6mTg2Gx8}eI${{ARGg# zk3_+zq)NszNXIkCB{9gxFvvtRNJlY9MS@Oa5D#IHixxC4khHH6wk+e+N@C!31sCZu z;QEM@fq@e|UI(gzV3--ifYe9I7Z~(zbL)TS*8k3=^_|1uFQ37G(7qw1|C}oSMf5;R zwMDJ|3z>tiBWKY3&!GN^LF7II`(+01>kLBA7?i(r>4R=Z)KKNgK z|9_=j|4o+sm+AShp7ozU=)Z{Pe{rAxiUI#sL;fpy{g-g~uVDUPP~$&`?0;s7|6Fqa z8AQG_FrQ&y*v7z63UZ9Z5;3b|A};^c!u~5I{MXL>Z(Q}?yzakw^M8w$|3=mSwM+i1 zr2UtS{?Ft6pTX`wv-W=m70?kz9Gd@G_5O34{MU^*W}0x$GIyRu&RomvIaWDyt#W5u z<<5pJ)t+OQJqJ|vWY2`uN7mWXZ9v_nSs=lz*^rtDPC{-Mf{>6yrj-1}exADRQ#N8sje zAG{7Si0eVChIAp#X7Euz$dwVK0zxApeIf{{8`i25+5)eGAY5=w1nL=S1%vKsg^-xz za*#8Bkn1C8O$5H5NDI1+s8TDS5>g*QY9B~N1F4TRe99p8krucbf|HN|Iyecbh9EVO zibs*Md!e#>p{i%GvU?$9*E2GSwN}Cu1RXF{vK(^wA%uikENiwI__$p&q!y7iOay-Hp+y#G+qF&h9GJ*li>$ej6BMCj z)_kk%1(5ku$XY&2h)B*ttLz1s$Tn{=8gj^63`b5#r)()>N1>v7ft*X8ltY%3O{$bls+@I#m~jM` zx*LO-DFc@osF)IoHO$z^P;!$Y{|ZCNU54r>4An0g8s0HcW5C!=Szu!#q%(C3y0` zq2e~K0Tmz(%swBtWR(|?B2zYLAvSmykf-uz!^)qk_kbB;N(RMj2GCJIQ4AuXpbL-0B0-%a zP|pZlCCMa!Y9)n42E`=Mx&`SN29;!PgM2~L0&e{*W;q`Q79-Hi5tA6WKH_3vDdb7`3|? zq^lTsiy1ic8CVM$xXKu~${EC4q$mFfI00H<&vX30%!&WP`~U0i|F5>=zv=4#sWJDRZ$Dj$>9vrUm3WbFtA+$A)&(zQl}a84>LGlXGnX(Q}au3 z+JDtm|Han+m)`VW?7)ASWB)}?|Cc!bU*Z~QYXIc@D8)`+tdP{~2;# zGMIj6F!?5C`CifGms#p+tIYL~*&>^qSzrX3A+pPw4?^ILLXhglK5Mo;xB>!IH{ey- z5D^Fo=@vnD20>eu=`%p5_kb@Of}CXvAt5bK2piHCg^*UM(=1b_TBS{aR7r3WUK2qW z=%XC4&XIA#M0i&SUI)P$pj+@DqjN~>x8cwn0aZwAwktqgB=GV%NKFJG z;q?)yNX0COx_?f^bm z7>#rUpMH-_x)d!(pyFkaYsCnXke(*wPBF;oC^mWXt#amCWX(2Cod!9}*C?(BlpwKmK3f#D9i}y$p7X848~;wEbi#d&yAzjG^P5_@4jL zS3y_4GHm?Maq>U^ssD_7{~Ug3#dL)$O0XODiX>d9D=BiAVYSLE|Ov@gGwrsdIqCfI)h>ovqn0bb{4B<3WIVS zgQzp8UkNJhMHrZPK$Q^_4|oa*TF`?jaQV+49@u@EJN_?&%OggopPXL*d7S=p+W+UX z0M$nvYX7-3K%0ytOuHJ^7>pzqCe?~9R;268t zf1%+2qGA8#WB)5B{?|(Xsh9oNEaRD4#5GQXnG6gu3=9UK@izu#1_mYYh@1!m=*nPj zu#?aTP<_Ouo?+5B;T3;0_Xk z2jbxMku6vRLPF{wNPPr8#%DUD#(|Tdd+yRef{^+M`D|^-cnG9Ef>%Ru2D~c-XCPNe z@bz+#`Up}18N;r_gL94HLv!GTSo$%&AcR``=tOluY9gJe4mg5O66r>Qj#fafjL_;M z?eJFZaL}R!cy9=6eFUk7Acy=QpMa_ZK4}5Ip9oY9K`JBv8V%I?NYlRpGHnE@iQq$U zkR=Mp^^vA`sit?S26(CnxekJl&Z&45!RsSPtpqKN!H2X%N@GkCQa+=Rpfv{Idr2Uj zBTN#xqeLVdQ6+(w)xql{m$H@cD#;OiJu##J2VKHdvc$GziGA@B+rmY5MT;RMsD>(B z1Yz43FR?3HY*)0{z8ItoqQRq%0t5H=KSKi;HT`Gk{LZ`{bi@$L z;r|R9|Fa+Z&v)cM>+b);C;!V_0-gB)I-&0Df02v-6)%IXixr;xpCRKFgW)p~sO|iOhB_ z42BI1>SYY_>EM-h0iYQokx&M)aL{HV$tVV?Xa?Ch2E`Nxm2?J;37&5!Bc;+Hp>flV7UY{^+#WH6equ=b&s_eWr{OYLosmcpcFW*=`np$S!xDP4*lIFtW~>3PRa4twDz*%(Kp% zXANGD4HRAL2e^52A{=k6x#C-siICTCL#iRjh#Z83^o$^E2nnf=KwDr!nxIt@cs>ZTPXwuqAg3#62GnW= z)`~5^~*!N7Y)->U9vdXZ1P=f-Kd5usy5SxmB%& zEa8AGCWKT>_QlIQikG?MFLKG5=ae%{{=7oXNWz? zAXv+wv0E|m3#dM7{?E`3UjH!hKkLT-;@AGO9|Nr?XFK>`^z?tB6QD!9Irf3Bc@;Ph zIuuCk@_)Gt|E13Sw>>i-Pxe;D+hF<5_Q2>Zkk{~J`=+n-~QoyNe?$iNWE zzz!O46Ev+9^JtOv@0R!P6m_jm$&B7#nK$g9)^x zfz!pR{gBLp|8k4|>&*YJIOV@=^?#9~|Dx&txnuuxh5zRa`!5vrUo`eVj}K_28N2;| z78_`N#BB40!T2?U#$^VX;|z-18KgEam@d%Gxa?SV(mZFsebE-X!p)9FE1im0xs}fI zty$`mGm(K)4_w#qfGbB%FbP?R&H`JA&IBH~V+M`XF@WnM24@C=5C-8~5aQ1TRY`m~ z47}M4JQ)n!=?pAcpdORndIpoN3>q82Xgh=M9tPv1T#lCoLmtV*zm?DXZ&3eVr|Z9N z_kY#y|4JSIh1&iLwESmj1tEqu@T$umaAnjFUJkJYbaXu9j{nTNKzpmX!IcsB>Hj=u zL5TnSe}N02%1Gz}XyK*k`Tw$K|BD^_uQBC6L-1LH$URoc#~|lvL&n@dl~K-AP-T=o z(=vSy2tlt~1uafMu7e=`AV|*$vpzBh_k$qy5egf;aTQ)GAy-DAOYxGYSf)%xu8Ghp zAP5&!2Z47ILDtJbDji5ogt(qHt{)w+XVhD7RXjB`71Pz8@>kUEb z9S8}pkKj{9@cKwAtVJuhNeguRDRg--yiWvSKx!g530lnyzMfS(pcYaMY5G+o*F>6r z)f&E_8%`k`S2e(uk%~_l zIF+q*tz6?!z7oDI7jjrDWWhOPA1?f~A_y09dLHN?zRGoOXs8NwupUSXl>w=J&`6J( z^=Kj>Zsi(}s)k5XIajVlMh<1G>`Is0mMnvImvR?aX3sNCpJkjn!#H)CVe%Bi z#D4wwUftMkjfghQphoq8dJX?N74IrFk3uDvTp{}mG3Oj^mo|QnE{2c|tns@UVoot9 zU0^7A#?bVIdEPmNbC6PV+!L zBZiIty>9$x=z7h-zLJ4^ErZKh-ojs?nyC9fLpQjjn*N`48|dP(e_CL?|{|pKL8BFgmX#Z!>2c6*wV#r=% zVA;vQ+sm!COw6`Z*s6`iFq_3No7J$8SwEjat$;xxi$OM%K{6F|vmU1}1D_j%fCp#= zop1o6N|K3XP)uY{PG(R{2lb4UGC-t44oH_sDCqQWMs)_zfEe-xBHZBdI`Etkv|3^Z zFLE&TYu(4Y{y)pI|E%Dn*SWX-=i2(8ck6$iE&ql0fY%4^|F3ZAzuwaS4Ax5-R8}(R zY-G^c#Gt>G!F&q{sV!lUo5P?nmqB?3gHfw#)?U!j#APep$`-kmEpjef;#{`Gu6UtK z<#N~j2@Je?Aj_H9z%>!1e*~$In8E!cMvyE5bAXhw+H*?=GYHp%kXS2&P$Pq20|Q4n z15+UbLkx|hp=`+v}q=rHpsWXG!@&KuWAQcd_ACxlHDs`G!@)U?5X!j6!<+fGQ z6sx2u773Fe$UJ@`1et>SK@bvN`(Ukr(CQ#aw+Q)S2>qyTC;~56(2ME-BY0)hid-KV zg0E_Y*FoCht&o}sUJXGQ&<;^(3#c+8u|5KIk-+s4q#uMrg6|}P^o$^tks7!fQt>H+ z)JO0t2~zt&>LW-U1eqIB@hE~Yz{?dtlSIfpBL$aS2vT&-Q*g$aM;&3j-k`2RL9Q;acFbkZuTksX3$?f{^gq2a^Hm zzo1t}m7snNM9LL*FCnOkf%S&a*qHT^Th&^3r25FE3Z+VNs#s%Rw!*Fyv|Pj1ViR+hPn@ITmDO*{LgU#R3C|6{m*$4Jmj(rJU}-Oe7?|BSOa(h zxE`4fokHR{{@-x_e+IpN28Q{J3Wpey-$>Vki>ZEak<|-20)lHds6Jvj0O}tJ9Qn_^ z`#;09FQ8*0rh#l>-48k|Pw32l;q(6m&iv;%`=9;Hf2pJYRnGr+Tn?_1)Il3aSxo*j z=>2EXf5M=8OU!ViuEz~ftG6UZu`%* z=fC*L|MuJdGlcGEFx^yHG^b_kntjwz~dIFx8*87829~Gp9MnFGyluZ{?9-8 zKUd#>(4p&)cU|1zI2RodX@m!GG+(&hq~ZdB=?s)>x!%f~-}5oFE5TlK`)ekQc0? zv2D_4Sf@=#Mvz${$VxfL=p2NER5!>Zygsr_0bM@?U78E7j7$=t)sRKPBuF&`Cz0zQ z2pdw-!23ZE21F2z#9kSJ`$u}v%BVv(vK0w|wo*dsAjnQ42wNutv<(D8LaHGM39pQF zLR&Pz=K~=3k05;_I0^3-K~^k4CXC?SA`RavoI|DlmM@MfDHE`S4I#vyncZ*AQcU+iV3a^&Oob=oGMpC zDkEePQXe^0uZ1IsF5>GW%pMb{>H&|-L8RQP*MYi4um$aKlU*wi19zaYyUNw}Wh)?l zhuk;@S^H;^GuJ$O4rC3Uaq2Yv#7PDTeFh1AkiL>maD!$*t*m#sv}Y-|T`_}BB7;gA zvsDd;_e$P?bqrCbI4WK+ZT>HD5;Tb;c<#U0+5c>N!NYOOK>LXp)`4!65IhMwM+kIe z>taw916tuQ7c@s8dG^0d>~#jlY6iijf^N4!86W_sU-j~Tr3;|TVMWgTS3CWmX*p<^i^1TVurX-Y zC%+MB)-CycxJWL6=8>?tXP- z;B;c(_GIAqWe^Eu5DSJ)CxO@6$;C0q#xh7Ig3bWviDY06Vqi#NV8~)%nZqcwidF0+ zm-IPK`6~iyw}rKzi0VG$(Rn6d@Jd+i5(CE;28IqfpIfq9{tK@H_5K((|A*Z94!J{$ zaSN!Mz_j~6-|_z*H~up;-(fJ_!r*#@-Tf$|<9-I$Jq!-pp*4}-VsL%bWSY4X(s*+z zUg8QGY6dqy@>jZ)tP3hzz#wc39**Mz&l#~ZFtCEg48he8D!~j^CJA2n?ajcS&Y&@e z!F)AC@Nv+&e>o3%T7U6O`!Bxmzu>a}A}jxME%?tg1KiA*3hn|=0Z#)>MIxtx+a%M$ zyUeD7Rl+Ny3E=34D4qdY*}||I6bP(4|8wmF9e&Gy3Uss-AEa}19@P2eyZm493V0_q z_%si})1Xaiy6gTilpeK8U1*oN6kZeALYB`V%@D!6JZQZh(1LAn#|Kigz;73_NSy{< zlK`FxLau}0Y_sGkklP<1B=Tx)2-_@ik||PG2*QR`KoAmA?|{g7(54K-*lx&?KFB2e zPzCsi9E1VC9S>3sq1@0K(F(aH4{|3FqymDkSb*#*f{)0dty|Cu1r5+aPT|%52QX)Myia^>LX?MLPghn z2!fuC=9;GnLnPHl_Ru2@AqVY3j(>-e@cPIWTrt@fE<&!79Euh@qSi`~wFI^$%WR65 z+LkOsL&)7FNPUDxLW*TH5?;{|!+=y?pn<+h#F2Vft0d>DHI7IKq7hVCRIEiIVVAbU z`$v%K28|05gphD4$kr+d2{F^9d<}?ExyGdedZiR}P!T+#=3Kf0w0fj)31|Uc;UX*W z0z6Attz?=y*(3#2E9u5|8%MS4hc#=4H!BC$O8YenyHzu2W-}kmuWNpGt2}RVw*u1*9x2iE%s*E3o4pG%k%m{QcP?9a~}FH zaPmL@>Hl2E{)=4%t-E8|@L%}gf5DUg`HuaUKk{ELXEe z&|RY9HZLV?Ux_*HVA5T}pp?fTmIOM8n^99vM9N@DT*c}+y>_JBZakw#X zdob{Lf-WoP^#e`xOT{tB#DH#(6^~~Sj%E;wXOK!~SLhPdnj)orSk>&Dn)xeDtGAl= zU$h*)YB>ByH1-d;YWR{qJ?_zwFxo3^uzN%(gPv>|!w8%wV;V!F(Nq)_exF*$nziAoY=X_HM|y zw{y{AXV8+HrJ%lW?sE6?jlty$8N?kybr5LrIb<0FJC6E@0n|C-G80wulX95hlyShK z@vBSkf0McY9ajF=Ujag53;y#?|Ia)FgczrTqih|DG6NJr3=nZh^#qZf0*YTeO z)knr#{xj5_vrb!No3X?;Yc6Aoqsg zwUUN^jk;eoq&|XFMw))rYTgx)Dhb2@-$?}N6G3Vs$nidqQ`aDs5#*>J_>wvN^^qKG zfDY0dLL=pzvf*o_?Q-Yg>L}U3DkeAs(p|F7UjSOkmcIZ}J;6z|x(UvOtZK6|^6%%|V1H5j6tac!tgfBIR58NR$Abll=%GD0AY71H)A=N&R9+PY3 zDm3I)wc4!;TARSstbx}-5E1AY9k_D@5rLB~70~_~yh?)fl0eP_FS>xNc!$i{*%d9e zhOJ$<$eL}EHXSlLVUaSyIKEpys#_<#Lp`KcEx1M_uuj+}6ST%mEtA1!ra;V2&ei`l zPlFad@tyn6a_~RHjGv&nfF+;{EI7~n7rF7D?-FP-TIlkB#f$%W_JH>st@!V9?mt7y zaR!D829?zeiMIsWz{S)wa0hDse}*NXsRV|7|7C9dXE+X8?=5>4w3~^!|GymgCPALV z|M|}R=RN(O;S30Q?D@}NGl{`wzI4@HhGn362hJ1!dC&Y8tN+hn^F&bZKQH)ZI$nLy z(QW*O|9SO5_inSRJY?WK#lT(7z*54%70AHh59$Z;crkE$GjMr<&P`=^Wni;oV6kCf zb^z_%VY3I_aK!7zARfRV70DnU#~_i+AeIOk$l|MI;GNGPwt_+QIbwk3MMGwxG#>2BrTDT<00M&Wld?Z?qP4t2@J%{|uW!)eysu z|BO39US`_!pJ^ZHDiEQg|CO%(XGp%rpf#7lauWzyuV=7Y2bwtojnpk+P@BPO)@hlw z*R^P=OW_i?q6Kb63tWpAx)v|AE}rL7vplqV4ug_2c%cF>Xm1pBxdkT!XcUp3fx#Gb zE0b6@gLw}__$rq2Q(S%DMQ8r!S@>UQ`G1z>;12IXaOZR;c-6r)aQA0AxE?_xXM)=j zGr-Cq6%(Z21nC4#0r!s}6Gr{u7>6vYn+dMS7J;?^Fm3|ftH^QiKhMeke5XMpbpjVb zM@R`?23=eNx=ZUSm~r_(_l5u5CqYPm+kb|JGgfJHZ8H}@&Xj}HK9GJ8WE2ik<3LEr z$^=LXhmer@AV|FfAtAL7gbiAj0PY8Y?{}DF3^^Kw1%$>nN$3Mz!-}*Z0a^{k z^}wqhc#VUH0qGFINk|<8A>lQVVMHqg8HBeWBgmzA@Ub{ZO$4173PYJ8f>cJJW!vC$ z#%0{b?kn1L7 zHoO`_X4rx&BS?K@i%})P>n3D|ZQ&Bz!X?&4AOxwKAoUTXhJujr`UsiffV#^G!i7{x z@O5`^2BdC+bKwk#IJ`c>$^aGqm7vN9R3(8=I)cwpKZF%`#>}+KpCe zlPppum?cgE9f2O*q7w-|s4BEc+Ovm2uaH5xfx&Vb!<4U@r$LKydCq_e5Qe>=o)N>^ z|4c{!^IiZ~Mi)Sn(fk+wvu*`(0>Nq2@FCrjpLsQw*MFH22UbDrj{0h zDkJ9Y|AjAtZbxF+4=S!TZv1Cxd%~bNiJ{@6$m##`*FYOmK=skl|9rRpvs9d8VDMqk z>XoU!$FSr-=w$EXpatPl&Howfp7Ck_=hpboq6WGnmERC_dYiDxe-6$64AM6k825oH zBZeFXW-kUNPiQ}g-5pduLk{s~wPj$oW?--dAr@OueI(+`t{lUtlFXo(0IH9K<3RP1 zP$PrDA_j?73}QD}6do}t|727D%b*H6Kmc?BgyDaFLuh@(qYt`Nh+FSJlfqL5mXi#H zFKo7hY9ax^k4PHf4#;38SFQKR;}A?Ww6}{8bec^1?m|Y zEMicZ#%0}SowMJyXsJ{FB99Ufaw%HiRzY~Mx(2x# zf{e&P>L5sk1z}GIS6lFI(G>9L9HeIiAtCdLQ^5w#gpSu?t&fDRfT|>ZNKJI*zrZz6 zUy18H=-?^tlmGR${%5E?X_YqDI(?o+`Ya3ZT@R3|#}YOx1mQyJ9n+MlXb5sD3uMd< zLPDp35+^~quznC)b%TowIabanegblRgfa~Tu8$xqw2`YJ{pe0KF62Z_2npXk1ZTk4 zDu7SCM5%@#Rg!*Kvwj%pc09;23y`@Xc((|$oEvmXCwK*&Cg_sZM)=$iygq`Q;S8;V zAh)%GFK>m^L=aNluNsap>mv>C3P^nfuY=&dAr-F@&@t3Vm5~y78xg!`1h0u;l~KMT zc)0?+K9YCIg4aioY6wokZW5Vmn====4nk&I=gvbzkctU3a0Ko;LApzD5>ifLl92ic zlZ4btcu3^B37PFsvK+FI4_@g&7!W~Z5+d$girAcswLZcWgbW5kY6nyjR3BkhNst;1 zjRakE3*LnU8Zbn?S{GgifuvxaGDtTKVhD15Ub5~1ZR+Pc`d{+Gf7!183=S{&wElCegLaY%>wr%321W1@ z=z?(!JUtA86B&3nGl=eHka)x(|B^xR7mFI`fNpLb&}|69#{b1lKnQX>D!cK2Zu9@r z`u`a?9x>RQ^#k|#7&e3U2!ldtH>hjLya%*$o_qg)-h==74*!?E^xyNue}GeQ{;2<3(2pd@(K3E5-caSHHAZzL%D;Xw%NARY>>Z3*fL8nV?0PV45*$+Ae zm-FO*&a>c)v@ZYWx(G^LTvz^cUHZ>+Lz#(2$=!t2SG?k1q3G{tK(qH-s**s*N>@SZAY>9!8L9bJse#upX!usC zc~^kO=)g6RhIfU!S2^-}IryqMNLL7%gmj1?B)BqyuA>93TX4-&a><1wMdutvmn=Af z?C6J+@{U=^^${8yQYG2u%tfn=;9Pip1gV}NB)nF_WLW1f!mNzYM6lLMkg^vm37HW_ zu6nStG3z79ay~Q?QXe4;Lh2)Ilx6!d+dk)GtB=7niX0CT6f33 z=fBv*zYKY|_*(z-wSw1QO#l~7v%#Zzi$O=HDqZ=naPdFyK2Yf-cK5%;gnJB30Sru* z;6nzi7!+a{q82jrUSY`Ez+hIxte(QaV8s)=E*X3)pTP0|d`JIFUHmWI0qP$K=z!`Y z4#oe%I{$gJ{xb-FXW+QQzm1g07R`@?zk22O}3y zp9nOCWDlB1Vs~KR_h66;231Bf;S2(?AjI1Xt&ha_GDtoK)kpF_m{tF=Yy5}QN1~?x zCCxxc$n?LE>3>es|6FGOmCgV2Dg9xPTCLRv+Ug6rsF(G?f40LQ#C-&`l0oPgXeNbW zBWNItulF;9&n5<=^`Lv?b(b@!F9g*`S_>J}W-zEu*Ns}_T(H9_Z;pNTOvk+WcG+`` zvlm(yuQV>+Vqbs2sr!QGr0d~}?q_WN;I;9;`?~+SOaJRE`OmlLKie#Dm_xRMfv5IB zCpgUocZ(q8Y;ZLM>G(_s*By{*2r>i*sf-|9BshC2I5sDOV-_+lHyJ!qHxb+m?gx|b zeiKA$8n`L46122~W&eMU!=ULx-n0L?&;RGY{GadYf8Hzqxvzo{-!;(Y0=~=txz2-5 zsbW9(YAh!=dDkI1OIq>2H zILNb2vie!m4iswLELDS5v1yYlaLj3kVOj$E})?~Mc9f3Wu*EDR2jkQ zBL(M75CWetqu`V+@0bNakn=ntBwBrBn={uYdk*~U9mokh5E5P`LFynIq(yoT`3rC% z$ZjU<{6(OO3F$-#t0X7bITaAE!B0MSDqP}}zrZPffn)w` zyPO%;ki+`YXIQ09gRH^RkMGrw@70d&(~j*^kL*#8?2(xH$p6BB^_!rh5|~c>X9aJl z=RO0vAC~{>e~x|s8D{(e)kiBpE9>}=|7V;3pP}(HLkoD1Qrmyf*2rG)pwMzq1tfR_ z)E)+P8CL(dI``l3=znhAL{JZiPmzH~UdldJ-6spY%|{bVN-{9;b8x9~7Vit%^I!7P zf1%U=1&{uhKL1~$@;`(5D>1GAd`kZX6#h$V|L2hZ#=v%$fn_NJ^AZN;RtAP@28L(` zhFAuccn0<)2F^Ip^-p{W415WoiiST1w7h}aA2fW&>dL_C2wHT<=L*G2~MeGF3fSQVbIDE;72`@^a6pI7I1?zX=G5 zn*L{y|H;6$n%Dk;$=3h&2mVX!`!99izt+zGiW~mRF8$9j;U8ngTZZHt48A8A>~}Gk zYycs{^$fbJ8MKx%D9>Y1nZ=+mjX|Q3L9JgPVXaiwc8>II66MDP>uyN2-skFmFER5k z-@^YQ%l~sM|IfA*JRk*GG`yJ4mersc0aJy?c?WB*@}PNMC6Zcz;F zFMsC0&V~Pam;P&C{;z-izurv{(!Kp(@6LbSTmKF3{g=G{U;5^M>C2$yD+W9NGc=vm zOIc`?vD7SehAF5@nhNO(K?dUBl@Vm{%_4blWGDFm2#-`J(+d=gaXn6k$XeOHH?0)U zKSTLThN3SFwf{k=5xiM)2559n^x}W1hu~!lOF^^wE;s%&WS#=exwBa@FpEiO+fChd zBe-H3sCr`103Svz2j0;m!Qj|qx%i#X>HqvE{)?RWFM9GnW72;HtGD9X|GDKs^^v6J ze+Kb)3=EeT80IrDECAO>jSQ^m4D6WKVi_PLk;)*J%pep7UIgv~ zTHyd1zHB-ULUc@KSr*P#0)_7k+=n@KH@k1&ujdj z*XTdD!GBiO{|vm(8Q7LHu&-b!ek42ZKSS?3hT3}!8CMx1jxu;|XRuktV6>dUd<}!` z4p4n$yAxC&X)I$A7h<>7nC-_lEO+m@fLG zu<*aa!vA~=|MSd8>g>z~jh8Y&dP9pql@Ze-a6JX7iQp>dplX5FN01&6q$YxpkeUe6 zZ-NZdO$HCWLdNC#VfE2u@B%!D7DzQT9XuX559A)kh5y+Xfe>hM=A!=$OaFn;lJCsR ze=*E{!!YkX!@{==i{CRW|G==~Bh&idT-*P%90ILo5jg`&OD22&Gjv?mOIfIwvJg@q z!7Cs%2BbcM)I<;xQZYf+znLaXgfL9vCqQ~V5H7N~F?dxrXo*5R=n@&oMGv6kwc|h+ zul1Ru_0X{2np&C!MZ|Fc0iSyUxk`)mAYTGiZ_U%=3AxWU7_k#uIg2; zhQv_wECcn42-Qc()ewAhE2J_)o+nar$yIdDfsl|YN!d9IQVqdLw8}`qAste7VI?uE zCrlB@m>wDluaeLhxc1&c*6Be=%uzmD=wb=bYG?RXT*yLMT(uHh9L|7OPw+uMcpZev zK^Yz{;aQ;b&mpIvn`O<0>|HWVpJA9dTRFH_IC-&Q<3W`bZyb;QSG)dS=`yH3 zVmtGn<@|pR@PHogng7yfL1)d$9Q@BP^$SDMBZkJ042|CyI{z^&293^1-1;wZ>p$qg zy3L@gUFEL*S6cg@!E`YLgA)UTB?F6!g;!Sfgf%jTzTkCYA`GnjppyhZdrmnSq!JCP z4l=C#&$17+jg({8e@0Kxh1ycO|9KVub1MJm*Zj{S^%!))0s8_5_5}<)6B)QCFbK4O zkZ=uyNG*e86$nXHGDuZ2$doWh6flUUFbKpm2!u26hk^FV@&tf-OngBgE^i0}dk6z} z1Os0?gK!T6-#P}#oeZ)!85Qp`Dt}>B|G};STE!<~^k2dZbYX;$`G0<65E3@}&#LmD zLF6n0*I@?M`3wxx8Q3PWYj0yP*v6o~mO*7LgT_V%wT;Z0TNw1WF`DdVu-wI9e}KX5 z0E72Y2H(RB-Uk_ik29p-W~h28-1|pj!GF1B|G5`}5Zh8vR|qr>1Rr^u4;}}Dbav)| z+4DhF0JJx>0L-2b?iN8*LdNJI9Vp0RImp`k8L$cka$nUHFc(riLAphdiV4DosDV^c z5D|!hu#?`oR{j@Q|6g|4K-Gq~cuxsgE>#t5m(pRXoc;$g4yZd|NBzfCb3GK9JEl2&v)$?HQ?g z6so!xsJa&@!^YwuT_MQa5Tq-FT>Buim7Q{wopRvSkdjj-W=*8vkdD7T!dgio3u0DJ zcos!~cQY-t&Ru|6D?#R=D5;O&^$souW_<*&n2;He+5~w&6uh?uVIYx9ol3AEhzZCf zq<%poAqqgJ)qz*>Ih3!o2k(A`ETw}hhF3#y269aVua6+U1N$9%=`(2cBlvUA z{wp2-uYBS^?;_A17S5&r#W(+#Is0Gl{(q@E{}~Sb2hD5m1nq3)J^G(*;(vy!7X|^d z83bb(7!();4H-lX7+4j-eIg-7c0o{;#3&3p5<#=TwEh^wd{BKPeHOfE+#XaPiKzb< zRQWHg`=3YcKeOZ`29Apie9IYxR!spex$tYC+dh$dxfj z7c)p^Fo>ryh(t38MS^ZEhg=aW7|OsK!oU-ZQXlcIh1W+6il5;1k&wZE5u^Wn#vsIR z{GVGNbhkJI&r1f5gA5Ei7#L=P=CD}$8Mvl0Xs!nx2c)!`L3SB~)KUiNWekcd7!0;E zc%R{mxyzaVQnLQ1TJL|8+5e3f{Wn?sUuy~I$|tdT|3&A5_NB5TS0#`R&TLo(1gUW# z_0c?V?E|Taz#S#V1^+?Uc0lSZWJBQf5xm}kbZ{Uf^1vNL!Ax)^1=)o(9W)BZvfw}0 zvj410!2^S{Kzz6)OXEqv*}=#}r{m;Os%`!9VRvg(7^@;4q!Ul}iYp+E15{Iq+L6K)vJdnDd+ zLO*qpPSSix?E~413Lzo=AUFvbcEcngl@6SQ^p8-s3xRt^$g2?`^$uEn1gVDLq+x6) z_~On^(84*?%E%xRaJkUkNpGV&~gteS%?PJqt`!5NSxb7~%i5Cj>5 z!&)CfY9-`42+}7~bk0$5%2sj&BaC4>NF9VsLgpHnkC1;g=Rc z81RY-R7mE{hmi0IDU6QNBFOk2au*5C1{cN9J_3F=*7Xu_K}g*M-w$P13c4yCQrbgE zGU_9E)dR0(K-CatD0;|IN<- zm%j*FH3zzBV(WkIl;WT>Gzb{lEUB|8lp%%f)wrX0(}i|Chb`U;G$&aaaF; z_7(pncY#Q@^fe5EVc@Ic#BTrq=e5rb4ZgG4HWcpPZXNH_}AT@nZZb(Ahz)1)B{=! z9%lpJatJ*V5K`$tDjN968>D`LkdPV&JVeH@08~dXE(DYC`Uo<+1X)K1U&a6#g@cgr z%|w&IYq=rS5TpW{3#$7-_1Z$vgcQT#|4hsNGq3v3vKn+iAH(c_0<-_{&-$&^{(?8_ zG=u8`2ID>k{Zi+R-Zwb2iym9q?>B;@fp1YU+jLc{ei`-G-pzHBYOyysFyr zs(kayvQ01Y*FMc%{kUM|liXzw6BgbLo^rvd^RQm&daJa#W=S(GlBd9{9%Kgcv89lc zOCd8s5E9bmfv(Mt?}gtk1gVc8vdA?NgblBB^kci><8qM6AV`-7%x;I*N60-OI2&B= zK+aMCT>t^8kC0FGF$iyiEZBySdZ8_F1UW(hPC_;cK}bkN10lg(p+>F11~96FFIRxb zs>Aw3kSQVPBoVAz1nLmMY9igj7b5g$nTHa|+Ix@=h7ZNX{`G8OhkEVy%hr2qITZkP#usng_I632R3Q za^el7GD0RH4R}bI39pZk8Q{7JS|34bB}iclua5{a;H5OY>cPrDtB-6;mO%=6Y~&KG z{UeA8$RxZ%L1w_uKS%D1!RsJo23!|JCD!@~;yZ{V;Uk$425h$%=x`@+FhWn*aLk+U zl)u0wZ?PNb)~D6>g{x(A*GgutWJp}ekiMRw?T*M=&?T)xNB*<#1>MBQeH?V5fY6Em z66gNQUidF@8nnn6R8?&T^=>3C{O3Ii9!i=5T9wDX;=kJQ|GMY?XP^1+zu~KnXC7z= z4FfaygaH;XDWqZ^DBSYQaM6Fhga1WO{O4c$pP~2#pVfZ`o&O9H|2b6ti|YPoU^vad zFp)uQ3!BX)cH7I`R!12$HZv$JV33~8AlDB<3SA5eT?~qy3<@0#3JnZ$bqw+)3^IkF z%1Af?)I}1E098i(;7KLkKyYOg&cNNwAk@Xcvx-4t8-v^}X65?~@}HU2ez9u&=h6jL zMnd3CPkaX8%830T1LJ-MhCSfxStm0vv@kHVGjR0?Yp#~_J#Ul!)~fBl$CUr3^Z(l{ z`>(P5zv6=beDnVE&i&60MxYzpA(acH?tqYxE)RSl4pK2emLfnD&j!y2!RsTiN@!)Y z5Y#yW)gzE93^K3=uZ$qWbdWJO__Dgm;FTwHz!N}_j@bPFObh>Wt^kd~32gk&zu`aI zDo}j{I#ze;UxrzK*;f2#Xnw-rx{N`gk%7N}L9UX)pqs(0hu34CX5@0Sv@MP`hy1%P zgipB?J>y!!>}zQY@8m4Km$&>u#l}~SyFT}y{NH!tU)`<`RomW_ZFy6;{(145=S8cZ zr7eFPH~+Rr|4H4Fb>_*lFzX{kKM3gzZFn67uYlkTNY#T}AAvRv#q~fcAhg8^`mxyW zT?JP|pbn89=rlRVxu&2_5ac``w0j@G6%eRCLhb>PQ6GWF;y|ml(dr{`byKeq*q|9u zuNeSc$PJkr0!x7g=pcO}NYw)&A+-{;Lj+#Jtp;8-2U@}nz8C^t8G*LAg0IGd49!7$ zMv#6Ga!mwb^kwBeDfBzR{EULQdikW~{-g^LIx$N~xYT0X4x5vCxd zkcSlb4n<2LTri2{m_EniCD>GAuhbxQ7QDKFFpz5^csB|n1@8?ZGr%qZnGbgx+yM{< zWS24AU-0{$Tnm@E7A}K?hGXdp+rlNzg)3Z(S2-3hvCN*MoxDgZX`yiGHp|&}w0Hft zJp)=91*&nj|7Tba-aoqfC?Hv;W2G{xkT377}nMfiAX_H2*KC zzJ-CImVv86(PtM^{7Z&_TMQ0o7!3C^D6L_TUdo_29kfngr?!v3r9~Y~XMMk=&jPyxt%L>Kwu9Bii5SLUj>OqFlgTHZ^~{O_{!zw565%4Js>R9hH$(;37Id96D6th+=#x-?_v*%z+$ zuH6~dzAvu-Sn`yUsnbp;O*@%B`&{Py%Q=g#Wi7s%GWUGU^pkNjPp2)unzj69-pbo~ zD{tp4znQcAX7=LiS&ObE&N=5)yB>0kwn@?iV_4M#=@UU}A7m0z>6pa!n8x**#P*oP zf~JJvH4X{`X`QxlOt*0iq8|k55W%Y+2*VI#L=N5$LaT-#=lMWLNKFK(gV0EDpQstU zr?puxw8;RB@ZZo1Ib{yg34+u+5E60^5!xsm{7?l*H3T6c{U9~iIX`eNF0mBM#ovin4@T5wy8APr)S*(k+72L=Y0KK9YCLQglEd6m}iR*GG_A2~Ii{E`qZm3@6w*dyolv89KV28>= zDhP+7rI1C{^)D=DhS_{Xz=Re1O z(4h+aCqd_#g!_8WuP zD+aE63?la!Us_{yQ%I@3iE<;)?$g^Zzr=`NuTpAH#f5 zhXu6W08-{dsu;-nw*~(h=YtTa-vOzKAS9%IK~@Rr>_B=?u={byMSt`u`lD^WlVwYdTp5WGlLCw3u z+V@3t?u+R?lrZIZ%8b()^DgBrxlywAe)X1TRa>8zZ+eow;#TIe8(B-QXDq&&y5LgM z-1CVu&&EzY88!8|Yr`h%Z9jAcA0M$g0yIBoFoAijd6c4#F z((tc=*GF2|=7u135S)bcgCI*3AXO5mPXylCid+*x>LWE+&j_+s0kbl~sEHs)p`z4B z;LZ`KK7!Oia1yODf(+J?R3FLOrl3_rXk17g1V3F0%mCd#0zLw9wsqEAtjH#N9uk@Z zuc2TJgsg4Od@ur?4gs&W;8RY>)f4LRdJFCI;0XPEa!3V)i-go-$RvTWzJjHod&~=# zBHveT59Y$fAzX+8&w?V}Vmw_%%hxO5*g5Z^P zaEsuAa1qFAa0m&Jg|HzcXyX#-I@$%cuoH0|!3W`j4sXw2;8486rF@NH{&J19c~ZG+ z%sNjPuK3`8?7!uW|ArU-^PU17%F2HBKj-=X!YBSStozRZ9;{>8{$K1Q=tw&Llc4D* ziBtcD{bw>TI503+F&btwtzgU+?%jb&3WXOQk^5Ncx(oXa4(5VXop zcr$~{K?aqR3}zP@obED&-Dk`B%Gdi}cFupPr644*{6EiP@Mt8YmVwLxL24iH0t3)` zYzP<9%>l1NK-5HVDI^AV*?G`D5%W2⁣*J}C~=OM=ZJF)acOwJ|LO_d_99 zCBjIiMgN%>|L0o{TAa_l_&@u6@N{0^Cx)4y+1CB%KJZ`i+<)0y{}t~3SGf6K`ObgM z^Zyx^{A0*^$l$gYbT5YCb_U%Q3@Y;(q*@uon;7Kl7?hhBG|Cv%N`x%3^g`Nfk~`gU zCqvHJ^Q~AN-n2EceNS}fzUaQAiBnFe&c2wp>Leb0>fK zmGt>%QsDgm0i-vCNkV!+klF`Q6M+ZiIt(Jw5pwMV+sE1pzXQTBqSYY01#;dgWQGV*AL)jH zt_X*adZ8`)VXcsw2ucPw!0RLAQ+*&65Hbm=gOEw+jd=c*S^-s>Fj5nCArYJbx|~saxK2fon7YISN6hTN8*iItM#oN$%B2b42dQlO$G6J2q0I!B1D;D6D z5qw+@vVu;*J`)+q*`^~SS(`L81UdH%jYO-Jz||1wI$$tk7UW7SYw*>LSnDH*AgoHl zS06#tz-uB1gQWThwVpz+o8Sx1;nfpz-wLx)s(&K#YQu@VX4tO)Fdi zsdSuR8sS_x!xnsX2)v628Q8QhTx6ZM*sgGydFfhhNL(~k-D3ue=3EBOS_Y;*P<>={Ny_JqV#1gV%H^$Wb41M8KX_M>KQ?l z!AY=Q#`*u5zzj46%-}IPt{Eux5$MVf$QU5Z5Kw)z>_5|r|BP!uh+!q@DkApf|Jj%S z=UV)qeF3O4VxIkw>E!9oV`B9q`enkXyoagX(pI>LDX? zkopK-2Z3(E1E25%?+`)8;vl3t?3QrI{ooK1vMd2kLMtHf^bmp#?ioS$7NOQeCE(r= zXlEq8G1Ysz`R&;}h=1^)PNPUE>GJ^DpAS)IWA)|A+>m#(92*QP&TZoGUS3on6 zWsxt&vPho=Ii43%0pTEL;~LO|b(cUD7NjOZBa!<@$ZWgZ1rTJPw-8wz+pN=k@K_%* zi9YTJ*`*7qeejd;ngUZ9M9LmE)(5E?;DY!Wn5H3%K=j(?FM?Db5H6$=gp%O9dLb1P zRJIVM<s|RTbRN`QVm|>Io#Q|IpZny0#$%uZh~y9bXGlBBpf{ht;34~>|595)n<6=n|7Si8 zI@3)3_GwA&kQ2#He0h+iG*8R^Q{g8p-D1**A$&_adRbLruzB7QTr1uO7 zcNx4-Fj#G4&|eMe7O75W(CT4OX=9K~Vqo)UU^Zl6&;@mf7!<&Jodm%v%q789k}7C_ zE|VF9PMt~B83wau3_?u|`in)vPO;a&lb8VNiSWz?54(ZK$`}`cXJ^1{(1L^o;H7bp zt`Ma5fz=Dpm2$9}2(5B~a*<^r+95R&oDE^X&7Ak23A#SvFT>2g40HZ6%mWR(aW4BW zy#Bw~rvH*#|I6(7FTLl#>^=|zF?Rk}-v3{A|9^!;pxQ@t?|)DYwHS29o#MIwYIpz3 zUimL@5rjC;gZi^P=l}DZ|IdH+Ki7%>oLm1hO#R4^dWFG#8-w{`29pI0ma|x#7PC0d zW3ZUaq*2Mh6U)F5%)k%;IxUbXh(Xw!LBxkaGMqsvfk7vO-Mm=9rBTACQ!aFZQsfl1 z_!*i>v$fM_>u1k5&Rbw!w8Xk(iCy_}*P3-M)oUH{7n!C^H%XjgoG=-(bOCa39Aw!x zWObZDY@cCVKV%;(a%BXmi6He3RuZ!sGK}hk)IOM`L1Y^Qfv1E(qjQE4?S>KUun{>> zR|s-!f>uO}Rs@Kv8`ca#I-#IyNH4TSCm4E=0_eI25VrwR>A(-%)(xuH39N(ENAQ{m z!a(i{LD=xABFI=Afa#bswRProS_9};r&Oz34Lx$$y zogg?v*}VXn4L@T6zK9OKr3f-l1SgRzBec#DuKEbMPXuQ}Dj*0cW1R{?kYl5eN#xoG z8+#_~2*jD@X&?lzl^}H!oPjWW@D?-kN;;q{GapUfBx&B#ymsfSqAk_ z3_9P1H2;g~{uk8#&m#AqLE;{R_+19?v&wlN7%IOo)c#fHfidkdL+}L#$GxES z)oL>s6eck!G%<)(Fz`n(Ft{-=XfiOUGcd@5Z>AFf?QdcbW?&FwU{C^gmrNM=oEV&@ zxOU$ah&sh!wv63#uXOB1hLXp;tzQ@>{AGb0TMa)Q4pKWT01wk4Pxrvt5Csqt(r<#W zv62vTkV%L+kU1boO}Fqr=p;JuDhRgupgs=g;{RMr{f_?G{Y+y0Ag{Li=Q zKhFvf;#~EgVG|4fY^c!KtU>Lau14B9;mO0^7P zg$(=&p!!HCn@PHaO+KGTwUk*cj!89^K{=6GJCnnrM98U5+P_CBY_fLJT;r^zHl^#F zs<*f`ZF8*M=v2Gjv1XlP)he6PrG^=^G!rIigm)AR-2lZQz;+GAjhCk09f3kP$fu38{}X!<#k3 zp*4|iSTlInDwg_4E4T?>8NnGkfpt28hy~k_nh4MFK9DL2QXfGkgEU|_5TTFD!R`%L z_AZ6oMg%3n^%0~df{>7PbnvPN&VbZMkZK4*DuTO3@M;KoEjMz#1F4VT19T7uXqXOm z1|6h-1X;QOuYDj4NbLhBA$1TkiChz5vLW>mTpY|rsgEGX#iNn%DhbS(38{b}Q&A8S zB7#XGS4=k9bI@uc2p3!*%>$3=fqF?0K{$z7A0dl?4=z{$ThxbTXD+;Af-^`h&q-BJ zd?o0RzsMmUebGCq))7?iP%saIoQf9ML(x3jf;lcl^IVGNITp-xESTw#H^(7wj&0sz ztLz1m2{Y95mPz%VvEA@q=hT1Wi=e~0g}~=LGavuYbnHLtNzk3Mpwmb9{^vXas;vah z{Wm%FpJx&HHdwVU{A&OCRH5g1vnl*#5dOd*{D58k3q#a9j)MOTmES<9;sZm$3x;PvE;;9FC985jgWdvY0+8Q7gz zRN@(;wktN>X9zyZV7Gz6XB%tGF@};`%pLC-rv71`1zuteS&as%ARtTT(CQkrIt5bu z%mc3(K<)=YCXyhskg5l+5pFm{1hP_Y(SOFJpeZ4~mH$Q7|JT~`Uvc$+jfLN&r@Rzt zzaiFnORe>aQsV{1+LMa4Cyn}Um`}cIGwX@%?59qPU)j!kYB~F<#jGbfv!3eAda5|@ zo$jh%;=BLL9s19)8?-x&VJ(>44!XCO?Kr5-AaNda1Bmn$(7>JSng0^U|Jxq;ufFQP zM&2(5vqKCb8yUD)FfcD>U|h(+y@Ww>6NBhs27!GH;>#H1*Rh(m^4aw%h0NBCUuIFT z-mPwLK<~-OX_pe`-%ML_FL%wug7r`G*FDTz`yg-K{oJ+p@>btXUve#Z_W6hjM}k^+ zfkuGSW*R0;hTMe*sgEF}K^*7|A9#HPsfOSrr0PMdcaXX8ItZBonF)f7!okQ^z3>)D zWdyn60hxrsUe31w zLgKHOupT7=ucy!$Sj%}dH578~^A?g(ACY9KZT=#Mf_e7&b0Nqke=Zo!w#lCj8nP>x z1F4Uk3g$T#%(Kl~>`=H&EqAG0>Rg7z1q@j$89Fa>ul&q?=)dS0&>_#<=l?Sw2O-|0 z|M`!DuG`}}`CsJHf6>d})1*DOG041P(Ecf*|6g41Key(82AO{h0&f{OpD~C$6fpS9 zkoXl;AC-M(DE-7x_>v*}F+;*FhQQMdPP-XQ)-q^J1RYGu7s@tJvb_VOU48GghVvjSF-(~Cm$T;Ia`#jJ( z0ni8=q?&+`^TBvDM^?ZZX)LVAVXvtbT-D zb}xh2HU^>9ASAGlLFfRp+EyO@eNtYtG$U79=WcN=-{;eEIHc!v_>>Frb8e(8x|6o# zUiz}zxoaNguDPGH`d-eOyLs#G7p{9yyx~#Vx<{pJAC|4WmofW7Xv=o{yoDx-lZ?Rq zBWN`Qsf^-!^kYDmq`<2oI0I{agk1H&*|2&C)a$`oA3-W3eeeSr9h<`Uq0>z)9pfNX9x9QU@WE<|&Aib>VEhbrYyQvPgxJkctV7gw#{8 z(YzV(nh3H?0?x%`pw&%i-6b?GWMq%X`Ut6>g6@HWS4Q|5kWoQQlB9B;DvBXX3L(8D z2ni8^kZ4vw>PIxMVfq3h0#OEG+vUx%&7BSD0omovfg?zT4poyo&jEa#IOI@qr~C!> znF}nEW@;qQ(@9?>)p*!^-UHQL|IN?**F5)M8+-w}#D)KwXaB38{;$3CKSTZ-2K9do z3g7wE{|jn?=8Pna{xhijXApeP!1aWI|0RR!TULkv3|T+fivBZ{y<;eO!;tfYA@wdp z#3csL!=StEjFy46#t0QMa5;e&D)509lf&+>L(qbtgCH2RKu5U>M2Xn8Gh|)jEWXX) zw1dH79fR*qro^)h<@Xu8J~7Pr&kV10AXN;c%7C!p)e5{$Sp>RRb{+lNcIbW0 zj;Ey?9u%*;SGeY0?uuKv%WkC2znC=TsAu&WlcWg-aXor*U2tR&*QFoZ2{}ap^AuCa zngn>IgT_FvgOJ&J;ca^1ZIHPk$YrdM$_UaAf>%cRVW36Y`ryh4JR%1=Sq@eofv#l5 zJYxa#d^zL}5xg3L_lC3sYPI}p;0SUB9vTVVIs~tbyg{8KNCgBYOO(A!zzp~p9jH&F z>{*0_iXj)aDti{g_7i~);)c{g5E4=oA(QYiI!KiS?-Ripkg5kxLhB&-0G(r&oI@rE zfjdXgkvh9nIlEMNO(biRMzAuHvP_28LAV%@x(O}|uZbWGw3-OQHBXyqmO2AcADO33 zw@3%=Z-O%{(r1{bO~obLN8wOXu0=%yr0} z=a@Gibksx6Vwb$7#(66>lIJlbEn&@B$53;eVbWcWwZFx7|7Y0)ItH9|nSD^YR z@g{@c2?odA45lj?H0Lr%S23`sfh!{|@V(xwh*}8BVg*ekG0QP9XoKn_`AjA6sT@VO zgle8JxbBA3N6bm*7^)tD>Z9rZQ0gOi6$7b&AQcUyZi3W7ka-^n8?G3x3?j1RKkF*! z2|S$Z{&TJW&$IqN-^Tx(>;5w>2aoUde_`nR2tuXT7=pJj7r($PcPF!#`ecA2&bq|X-J+0gOq2u_k_7lGw4}GoO`Mz@N z>!P*K3RXYKT=gt-)ic+{Z!M-hV3_oPeeP?9X>S=?ZZo7`VTe1;;BuM4_z;84CI->f z45GUjgm*9qTwoBp#vpK)LF_Su*gFQ%w+tfh7s^7wPTjr=56?Q6Rkc1S4N=vNH-dES^}g3f{>6& zAH67)McT;qj($Wtr0PK?;ewF4AxP~5sfi$@L3j(KCW3c=;FXb1DCp)pNR^}!(gg1l zK^TzANIR$z`FuI#`UtW_0n!_Sl77{Y$_T9vLgqs1BT%;peDSXiO$rfo-z-W?1+9Zp#DVFI|!CZ*o6!Y|{ zaAcM`4S6v+q;A3_;gu0G1G5f77J<}9kUhD$>LWC9E6`D*^DMKW2)RnaWTTYQuoZpK zLOT~R2?bF`(%>GXvVf2fjmWhOel~1*256Na+-hqK=Rpjy%UxgzM#w6$vaP@y!N9(Q z%+1Y1zK0QX(;xVbVAwe4Jgc0!HhJ@H^Fh_LZO(j1kXmQWwauAtmA%L^YoT7+9KEzT z3OP$v3RbEVZ%`^+rF%@|IcawLcIF_dG!8sYyTHC z{LgInpU?9@L-JSV^q&lQ&l$3xFeKe%2tC8#zKg+PHG{!q2IV#e(Ma&7S2+d-A!z-B zLUA#$DKN-HF(@Z8NaQexWijaXvxV(osC>ZR`GFx|KZDgu2JgLGiI*9wo-lNNW|;a9 zw4@DQp}>1L$P3{hbO04%i#G!4YC7|dSypAo!SiD%w_o_YVJ7XKHT z{aH}M&N_~!pt+4|r1*ngd^|Cu}&Fz}`@Fobg}lylhhaM<*S_;zR|&vUO@AJV=j zy6g!K>b!G_vrZ+?Ig>Q!Lh{^;S<7x^FTb6$>`u4NGqgvL*wr`ASSQuVCC$yl?H>1rZtHrOR&!>2TWkRh(O0QSmbeG&Yo<$4Yt5$f{ ztnzJG7umBbb?U*;){Ryf(+uKzjAJ^DV>*rEx{Ttwj3Mi{!Dl8IM|B!Sc3>jNqBsZ% z>FL00CFGh2d>eQhxEg}q39cX345^PGq;7a41nGn|z$+urK?>ltb2_0dTCkf~H9|lY z6Qmk~kg&=KbeIq1OgVT>q!~~PIadKv2SLW=G{H3ya=imxI|p0T4PCkbKj8LVGubU1?4N3ymlAOzlFgw`_xjnr8uNh2XCt3(Kb)IoSiNUa1{ zU=FU2VB|zdeS};YS)@%dOGB!X(xxNVM-VngI<(=O18vSR}{?DNLpGg-~83`Kym$dvZVF9X-xZVCU#J*=p{LGN~ zh#~y}L-ZvEpTnR71WcAQX!kM5HZTZX(!klUvRa&2GvLQ>#)~HkWLVUgj70^P7rL*DQJui()|HfOiW8a$G)+y{Li}b zKg$YmO|;-Y!^}SnQ-6RKiZA=my$pOqYvXH%^otBJM;OBQGeqoRh&jNJdx@dpDMR~h zhMWTo8ubjqSqv=EJc<>vfeYn>7we=hu&>zc*SIaHbyrCHzQ~@#;k^e#y7om*Iutkk zc+AwJY4gu#FTI(!;!ghR`+2MGWi7dtw%}UI;%kWuE{DxJ9Wdpn&!po)GtR0coCMv< z!LgM=_yB|a3r4Na3@U#aWWIvVpb&Zn>K2JzW0gC~t#Vqx@PLTv5m|@b%5DcW{CDXD z@6(UiU=+PsKWe2;_%d;~776!O-Gu2D*^A6FW}BwZGESRqSGY_gsD?!^f0T}7-TFNxHTA9bQw4dnY5#1?H)f^+Bg7 zfa)X2soJ396u^~{A!=m=8*PKG$A(oykfm|0kopL%GJ@AfkaY;iq+VzfXk0G5Q75bc zg3#(C_!0&6;6_Mg1nD2asv*!d5b%>u;q?)`LjcJ3t`LNT*GKT( ztSaDTa>`!i;7(8pa!sV<1?m$a*GJ%Ls1Qa%>mzVYgs6|Oc8lQsBL$aSV(KFqcs~ft zKvX~unQ*qWZ5ncYgj^FL&l5>oCqXJ8WKz;H0dmGWG6^{W9$o>VGg2m*rA&g?N9L&$ z;oT)T!wg&{nL%o$wCSL|Z>iHDB&0sVBq8+?8i`ybVX`3=lUc@0T$K^BxMenImy=EQ z9E+^E7FlyKt0A}utjL89>%mHC(Ao)PHkvYc{{p$j!Kwz?5K`H8xeH*Uj_~0}C<8K^ zhAay)99|z;f)CAr2*OEhE=1Hv@C$+=hNF>o*>mi3=Gy1XwF8AM=*$g^tl4k}K%~G8 zfI0Sg3vIIJIc3gw%A9YXH{BtBhHcg|i`0eM5qnjGwu{uK$PM{J(%H2r*dv zXR-ay5OJS5<}pLkH4utA!Qiut!EQN&;VcHtRtBju23}9_aX_-*O-9g42tlzqu^Hwv zIIm>z-OS*>mm%r|L(WB}y2oOz&l$4MGB~Yda9qLQznc{_UiXBd;}gTwf1tzMARU|~ z;A#b40WASvgs}`vE&<w9O<2>0sLox{UAtrY4kk=GnzP_?{pM#APW+jE z{{MuNzgzcxsM!3pVD;Ukd8gxNpNN`zG`MeHKN8CQ9m)+Ei$jo6-FlH-1`$055k0V;QJYaDeD(*uK7x$7!D}MO zzEn5~Ilu=}*Ffqc2pd8|Y9c5JJ<CtPTQIMv!$2NYxN@^#XED zgxoEHOch~PL%4?KApIa@61fhNu}hb>O@kmAn`8t6Z!AKoiNH(gkgFjnt3*l51PDT| zi6CsV#7Uq-;FG7I*F+Eza5^-Dk`v5QCcxP6N@}uc>SRcjWSTnJG-ax3%2Y^Ygq0++ zZbGiAkn1Kh@D-?V61fgSV?*jMNPUFAN`ll)kg^(HbfYmKvdAPv0kQ}t8={615?Vsjz?fx@tv&V%m39b{x`hy-{{bPhMsQ>A?Fy3*D`2MWl-)1 z4LQoxGe{OO$X77Pl`$w)F{r0A2nRDryD%tuv*`x%n?*^x=4%Gm8pd|pWK44{S?X1} z%DZ+$c;}wPsV6e$UdUQ>EqCe7eDI`9$@+&Co1f;axSF~2QqtVB;gb%z*X&b`n!~BL zh*fKzsO}+g{Uf4A`{b;TE4m+4^Esv-vQs@|r(VQby@<8uQ47tZ78=D(HH@7Cy}d4> z(>S3MvIbi}y3-_~53{j0aOz~L|}CgXoIRAbU7U2#2!dx1mCX;K0*PyC=OB$!AWRM z6xN^>QU^Ip0WuZ`sgEG*H4&s+1eqj4tB+*Cvqg}B zIC=Og1#oW&YfS{Hk|5O(oJ3ScHp!6w5%KjA%{0^_3;S5Nv z4KWSE#zWd<&w(vFnG3FuKz&b0ePoe6(;|DOZT3v-%;~mZ22^FvRNK7i_IcCoa;Mql z&9cdvVUarDB6YrI#5UECO(M>Bgq?1)n?GmJzt3R4lfmH-L%=SEz}*Z%J3u`hm(>iG z3mEkKKv64Z)fI+pI-Dw7c-$sVSBMjB|*}L8`O#8(!^Do2f-wg9W_ueur{|~xhaQ%PQ z&HvfA{^#2EpJ&H^zU}|HHvebe@SkPvf2NiHxi^6j!_xl@Gk-Bm`O3ZZzxLJtlGpwV zp8L;r9)Wny{pUXWUt&Ax*lnitn+%q#8B|&s6q^_n3m7!Y8BFTgY&+P^x)`+U8DtU| z*t{4Rv=~4edo>t1j2I-H85F%4)FU|!lLf5v1a0yKY|F)6>ZE-;W&FCN0(#{_CaNaP zHq2RWRl32xYO8boRUZhH&K7lRQSq6i8#>!Ga;{m_JoDID=CQMkw+x6Hj3>tjOjIx>$ObiH;?NzitI3m z=rWG(wN9TJ(y*>@?#YHVH@dbx>Dclpt?xio)0UdW7bfj|*1GC?`J9u*GmaNbJ(4nE zUr56```p<^34M@y#4xhiC|kJ0Z0XxH4)5PXj?F zgW4eGz)A4(Z0z+BtX9HaAAzQdz}+J4pn4rB12hl^sfi$~6jTFiA(fF@P#wJ9L0&JX z4ZfQd(iPG`s*E)Ksv$Lzx__lQ?8G^EH3X@8K=(etj{1SrLEzd4bXzM@WmH0BWdx~` zz+|C3aWxU7K0;p04X=ja4CFdU4m>Ca+13iFh9D%o8j`X}g&;ijk(6aJ6u~BpkUK|` zmI;uW2tq>YBT#2NaT26fLavFR?4$`MN$@@qN__;bp(dF_sv$TD?jJ#FCHx&Fm#ss&_EUy z8OU`ItgZpwOAfIenS|6w(Aoh!N{Q)NNHt`ZG20B(y8+cEcxoSLg#xSDAu261W?H1r zfYkDknFZ(wYW7UC%<0x(gjOF}=S&4vM!C~$b7$J-&a}x~Y?HazC~?1j+#WgKXObQd zxvgI_=-p$`+rXf`mcecXgWCpBW#qSo!F?Tr;|f;0xeO+g85I&4_yZW&^%xiw!L<<+ z1A`C?w=9FOkBC7gL&y%Pyh{ufZx|}yGE~1}?fS_-_a8&$Ee7)r27yEd)oKpsnG9ZQ z8R8By)IMZxd&My8ALn9F$A^8>f9{?C1^53KIPjly=YNiE|G9R6>Ld2e|GBsP7vA$< z?)ZO=i~n`6fROsR|Kj`pv#$niW#!-bU-metG7`V?U-Z_0!E68dE`!kp(EYjm>;5xL z`@@z0kimT?gKi&#atni2C4*Tjt4%wDc@u+51%q%ps4`;nV&Jr7;Id*6wiVC_;?fCY zQt@Mu4`fgXV$g_Y(NE&GC=z$66Z2@1@#|6ypQ08sQz3eqavX@1jhHMG(I*|=D;3xy zm$%X+2H!fEVjoYzRxtK+b|M@^rJg*9BKB_sE@Nl{UpJX@YKar%qG{XtPgbn^9B?a(x88O9-(`NH?MrQXlDs zb-?N%$Z7<{4R_!Q2yt2hmhj5YEXe;#KRTaGc!dfvQ_n07T>x`Lb$Tn*>K?Esvv69GzKN1^M z0oi5GBNAC>%_Xw7g48C+br3QeQu{zih-2)s=h%WT1cCPrAtpdbcnt!njLZntN02at zS4p612yD1n#!PSnV3uk6EXZzXaP>6P4BQ@o1~|A1Gs>K8l0DZxYqo6$w9{mrJ<~dS zrfuc|5XzctpEK1cWr0D`JdLPT3PFnmU8gdbv@n>oGnjWVIL&47UdG_PnjvTtL)322 z>S)Ue3<~88ykX#zf(#g#Ef_d`8Tf)21acVUs~H?OF(h4PsD8=N^ns!KJ;$Q|Qq$ft zgsx$bj$_~rV9?0tckW{FoX4HKgT3Xh?5xj9%l@mZ_^+_wyUd)=V*O88YA>->o?~W1tH_}phJG-`o1w_U*_@N%wRf~L3t{JY(InO33rhM zYsqu(WGs4^w)lS1{M$+MZ^zHM5jEpN*pxHA9fxfzHXCIwQH`0d9Wz-wX0l;izd>vt zq~6hw>(`6zgY6A*Jn7KnONrByEebw+PGV9IP6Gc8)BQ;WZJ2 z0jZ3{&Ep_75rl-CY6z)`AS9$FLM9N85}15Y8rdrY9!4v>x#gfvN>f~zJ% z7B@?shFk|BvmrGUW~~ITkKi>FoPk+GfeKQL`UqNDqKc!{P*`gsyn>)RIBc`#5{V$S z1MwuJ4ubW1KvOmlHnOuJ&Vz_RWFaEfS)frhNKFJG;gu1b0k3y38IXzwR1<+ZM9>-s zYkg#qKEot!x@kJ-OnA^N61a0}lsVfd6I2;N>LcsS=@uE&EHb89rOmZUn`@If$tH7> zaq2>ov_%FXN%pf;`L18hI+DcXh5aN{H%_0U~bj`bk zfqNx`;Cf!|X`;sSbV8MZ4 zJ~EE!HjVBzjqZi){Q*}-osiig$Wc7TQJrvP5Y=KB-3l3DGl&EaokcY2MKqd#k$Gg3 zVQ`IRRI^586Xc{F-7ru!WEj~DsgFR@J&;w_;BBXnxgJP;1S3I%au9LI3IrI5xL^o& zf*feSDy&K(P#QW?oR=fG8=LMkI!TX1D$mxA0a zg3J>^NVNJ0UI#%Kk`_rI1Y5u$X_)}8jKt03AaxKXiK{*`OzcNPMoAMOA|@%5Oj0Hp zB~5@M6X>!B^hydore~5e2{F?I9^!-4L|92k1%yUgq)tNM97z_kzLKzJ+Amgkuk>4>XIpciIk%!V*5)8|;E&4z_MND!jYCVd8|w#t|ZimdcG zmZ`H8L%PL08@Md0S&T{1mC`Z4hNGiVgadrV??>0)>3VsLEb z^zIi4pP`Vv#H4VoUG;YVuA@;?FN9COn6Thh=JNYlD<0&oew?@FN$T>4X)7M3t$C5S z{BiV>=RvdY*-gG@-g8}i(tV4~|BY_^*SYjx@AQB9J^$5ae`Uxx#9-3RAXUh~mj*(t z=?q*s48k1@qFoFUtJoDbu}GZdmcPQMa+6!}8jIXPM(KTw%1hWZR&pB8<+E7C?>JG) zXO3*>B%PFn<^?O=>UX#|>kR@;iksaU@vq3j{!}bTYfx9;CkmYb-L68V!pDAR_4YE8NA_A*l zn&AxafLk+UD-eXO6WRiD{A^3fH@LP(YwGz@D;&R~4t)RmeAX|#yq)nQ%RSKjIf|J-QBcv5{ zkPeZgWt_Nq3>p$Mi-w=9i_CymPmq2SGHH;|2dRBTwnuowYV zTM)K&+H~u*>CiSt>MVgQf`cD$HZ(=ZMV{n_n(0E&Y{ePvM|D`tm*O>QT zxbuxj!waRpZ!+`#t1SC3x#_>+uK!9q{_}yBz5QpH_?Khxe};*_7`*2&NM?a5BR+oy zg)DZ1DhAbT2Av`X$5xTBS(-U(O)GZV*X(d>-tXUaG-C3(@aY$0=3L8Ib}whuqk?tM z>i2%F*!87w^V_^FA5vF8k6-aRe#L9Q#qXSFJQkgBS9;oG=9Rx#R{aKD_LOso!Fwr# zNJQ1FMflaji4LZsu$H}9Mx$W(`_2lX%OCG7y+t}AS)1H zJsr@w2l`>4r3jF<2k;6AjRC2HNUDz@OW<(TN8kks^+VY6wn3>LWGZDtHA1VL*CAYOqrj;I$9r;vonZ&W6-PXhU<#;L1qR6MDQ4 zq(cND!G|g!DkJce5NJ>iTn)h)`Eo9K^3J)C`Uq0vK&m7N8&V(1I%dHckje;o1)ZD& z%4KXJJ)e5Xmw9S|a=@DQ`VHF|OMr6Mb z!-kwa2&qjlNwd@$W~nn^^$Tb?4tn}i`cz9alr|k6@^A(u3arv+*dQUeAgHO3KGhn0 z)`w;KRNFKV($AQnpE1EGeu7S9m#}XapGPZ0)@IYiZzK+aF7;#B^Pgeof6(FKJN`55 z`Ok6ybaOiIzW+R{{xeMd&oJRP!_5EuOaC*pykIbFXW&a=5C~=94q%XoVbRE8(28fb z%9jgjHBO)9P`TQtb$d|PzK97&;%A&nn0Y#J&bgHN7c-V#%~^S~bkn2e10R|WeXQB@ zx_I-Gymj~U);`Kx`zUky!<0pLe5YPDuir0~vrag5xmeUZ{^02g4}#o(2dQfyq!#$fAxIsh71*F11f3*; ztdRp9UA_u97ASApp0@X)O8IXw~ImdL! zS#uB)-Vc(qONG}*pm`$jY!Rpef>lW}Hp!5+3ZRn~z$+HuRg#otGH5u?B1r~};B}Cg zS*)aG9IkGWm{~LgA>T6sncsxmQxB=3AbSvyN&WaZ^iOj%NNkL|sFiA-7$tY<8 zq&|X^$Q2MSHgbK0rUqFGnGG)z(aK~<2MUcuQ$q}w$Z7(!iosPMA(zr{HheAyGKqty zZpG?m_!>nt{~~jd4j!BdJAn{!8X-g$a!Mg2NFjRBE2H$O5LsL#q#pz!k=0mZ)JF!H z6ZJDd+c3?Nr>MlumI<585VKUW{Q~=j{}KoPvmE-*bl^YdG0^$dtOx&d9{n$L_&?kB z|DX$1+dnb%{bZQ^UvT+L<=MNSbFEH^m}$vRTY@^Vk_?@iWchXBx-O zHIAKY7`4POYKckQJd?P2hHjk1a0LXa zl5`{6A-AULM6~EcK$aOGNJxbOI`;rv4e5k}7Cb=uK5!{mb%UsAu!`seHzC(Ypb7}J zKGF^Xb$>KrBmWEAbjAW~ZU|H>!N%f{D;nmVj0tL8gbq%wi#b zX^shAF+muZBYS8fu%jVB$LN9B*v=<6Pnn8b86mUbt0N!`NO6irA{XXpQkYz1#RS># zngz~)^c3J+Tnu=jjcf(HenA#QV@a*njS0|Je_LF85(P_@D0(ctyed{|uAh2C<3Pwbq0=b712Pt2 zlF+9c*{U|rga8tO zjIil}FA0LwK5!Cp8IX23&R!2>dAXiL~eW`F+ zP<0brtBnn52G?o?)o4Kw_}V+jnF+A7wLyI!a3`o1`7}95eFPyPwGX^21mQxKC_pBL z)KJfqgI7c7br9@C1++mq$PAI9XR(4u5d=Z%Bgo0ykeUcy4T08ayCZfFLHa(ZWFB~Q z4ob>9=fV(lwg^%s$vI_1)+fO0AP~bTL)Ixn#xWg%>_MF%jGW05$) zDtVGoTE9_RziH}3Fq&+XG(jn4j#|b7hWwr49hVpue&XHupJDfZmi_;k4uKHQf&Z*K zLAzF&R{v*N`Ja0OxIU`9!C*RxL86pFD3XCEm_a;*Nhw~$Dn-@1R4b^)Afm}AvCArR zl6}!UkLu;#b*uau*GBYgO`d+BaM9_e&3C%?J?}sKrsvS>rX3H9R$b2ppMd~6mZxW* zN8@J8{Ka}HGxSoYYb8$Bi|x~o>o*3UZ(x$xX_DBfo6rZYgL>f%wED;}t{sxsjN^Ot zqB_(9s zL<_t!!mN+r6$)~F1gVukl@ZdJr8>dj>M5)NUgKaen!v+upm`r~2d55F8A0nENbLhA zHG*m&^^s;^HKg8wpP2w!fdCzdtATZepbSV&1epecR6vluLJ&5*8bUC@r<+1*B6#-) zodF)5gVZ&Ug>w)#q&|YwL=X~E2f^zjcx9vru8F`?Mfqssa-bD)(DUWs6%dpm=ad6R z+3=bOvS#AW5@mNF9VsLf6=UhnFF%Zj58P(dr{etpp+U;(Fj+B+M!a ztuiveHUkBjjWWborw0*1tBEkVkje;7LUu)=)k<(Ka$yKz!)qc419{CkoQ+%`L6qSk z;q?NfzJQRp)I-Z&c&7kXEJJDvOj$G$OKgYJ!X1E>fouZ2+Xx8?I2Tn7v>Gx`nT{WU zs-cvr7RggU4AjcVGIcWC3W5wsutQoGmZ_61U~4k1k|$XtPOwRuW|K4xVu(q~1XK8V ziHWmq(-vqHZL(}WEI;j@-qN??TmLH^_%C$mKmY#!Y`Z{+d*^?aP5-&){$Z%U$KbJ? zL93HNDuqELjzQRufyowhZ!C*CgMcxMv>m6WpR8??qE|U+pecTWY5EMu(q-QD8+@8K zdpB+JYTg{&vnOHZ>4cf5<7b|YpLsT7(uvUiqi&7646>IPp06Ak`3rl(b0zO&EbIBdkZyLFywJ%LFO& zI0y-;kHFOstR^yxkN_h|^GFB-nG`n-7dH)uC|l^`V6S_v))seLd>NbQ43 z!aGoC49toNDv~(K1T;c21zrasGa%&{G6^aD@RMe#(+DcStZ|S_bxc`^2(lUo8`4c6 zvOdD>-$9JULqcqUknjo!IY2Pj_^KhZd1H)J>*FHkjm?Tdz zNuB~}V1N$ugw;oo{*g`6G^>OukXDCbVlQ~V+a%E7UgB)C_?cn}3uMxkG30M&tT@2X z_kelfXNI-^S+;;GBaS`)1^54F+wz}t_AiFU2kgO{7)&NJ$Y(MLMl%R_GjO^vFsp&i z8sgAq5HV+vc3@HQWYh|0)r)2@N@g}qjsOBzS!UU8)LRDP$Ns3A9!rc7jpt1jtR?df*%A z%wqaLyMDl{tdZ*@tJofs@HR!eWFAdFCNXW$5F7(LsG^VJrJ~)J3a^axLYpAf zkXA^YW^k=$a4o2>2dRBRpxqpJeS}&EA=gTfQ1=J9CW5db)ewY)48%d^gCPAQcr^r>DnhP?lp)tWz-u2c19a>*r0PMdeISC^ z7R$lwBjm0Sa(x6Df>U%wU%3ripMa=|(CQ!v7hDr%gGuP!5TF?%@HQgkItbDyf{-%y zsgTM@+75gr1ZZ^~O@v9p zt07DVWcUtFLe33FCQTFjO%waksv$TRTKiy*=7H-YNO^@!BG*dDZ1_SxA{me}6ODuv z?^Ge-^$~W{z@u1@z60d=bVywZserI+LF#;2q=I@D5bvXTfo!f>@)TsfX5ho#;nfhR zXOs$>+JlI|T@GhJf*rzz*FK>6#KcLUS}7S+^_V4103q<9RTjxpAV*i3CQO9vJAfn! zvy=%&2|b31y@rXsy73eA5++I~%+kzTA=7+Rf7%U|#c%Z1ep6iiOK!z?iMbyc+io$& zY+Bb-zz^)$2svXOrm%wh8DPUhD=2fd0+MyFa%_M!E zRnbPLs+}(NJ6!5_*fnmkuG?r_x>h%Txkma@x!Bpl0aKhCj<~lS(au_-6E{&WVKV3f z?zn!)+>mKpw@EDY>>cEDcp$YB{!`@o$TF_iCUJsO#zg1LN%rAo47@tvDu)H! zAz}f~6LEmMOUz(#H~~3fn~Q;gPf*jtEw0uewA&yQbTBFSY|>W9^pFAQUT#2{}apvM3HtBKLQ2 zu_4DPU@lZZu7==i=iq%JST&RfIc*!#GlJDbIgs5$NMmuJt`MXef|KBy2vi?II!97= z>G1jpQu`p2kX3W=%1GKSMZp$?@lP_1N@h*k~BTE)v+#miX6fe~n) z2+|=!?h`@kBT2JZFhUxX19y(#HIal_1iTu8FvLv4AbSvyNkhn;voeBBn}0tE3)K(=N8pAhr)u6G2FLePkNbX%X9F5!+)N*<=yd?vYT>AfOMfgLuFs zq&|WS-?6}}A{c`eG_}OQ!>8uz7FTN+)?*mf13N1lv`PUoItQcJy@-~Tb)(MC%l2trp6b`vh1g?f4l@VxM z4y_uJG>e6FizG~Az?D%f^4t)lCW2Hwn539V7`Q$H?L>g=N5DpcFAk3FhSW#;u*wKh zB|%7dHAFl^A3T%+SyGNh!s{b62Cj+;R6r$6gw#ryBxc}_> z;SAwI%56L(L>Vz8{I)6V)db9)5NDf%kFYRJMa<3+V+BDq_-hzf+79xV&BQ{K!XqYe&SsWsZOd2Kh8OHZQL?HE%abmAgLXR=H>tviT*(4Fvy)ufQ zpboiH7 z&;l%~qe5T>%~Y!uyZLO*<1e<6PW~a|v@Uq%63Sx%6hnk{c;YZzM0d9=rHP z$gK0pi*Nh)ANOtFYgxQfFMf)4%tVv;380yy*dFlK2~e#BsgEF|4Nx+s*DwZtJ`Z$T z5af6Sy{HcTux2ChQ9Sw)tIO1T1ZILp=O7F1AY3@f2p-mB;Zd=6iz_t> zZvmq=qwqH4h<0Pp(Qt^lA;_c;goKW^f!Dl2+2FNs&}t|QG(e{j(tw6QRZmd8E)s&Q zZNpymfGZ=dpc)-y6j-N&R1JaViTrCoCuT!thyp6e_&C!#4gWew zjRV;e1R0Totk6aohyzzZkRA|(g!GIc^$}#A2)<4m&VW=mm?T;Sq~uWqS(5;%jF9Uf zWHzKeQgX>dLZF%ma;6-Fgs;|yEKYz-34!(v!6t^lM<`^;IiR6TusGssJmhgXcufQu zqk}A#leSKRFyyS0;0RJf!RsJN%LGWR1X(hNTpvl9$4Hrj>K)V$QLMCCtdwbtlxdU{ zc*z{3N)k7Tgx5i0CSedZqz*zRAv+P!NTiAhR3GU^cOnqBDhX0CK}ce1B|P;JWJVZ` zM5~)nxbYJ(t0A}u{_+#KEJbFUrA$I_VMQ^)Qk^^n@cIb5cGUU^R)TYhVjzv6LTVFSB*FT~0M_?`)Ho0lx$g`~6%Y}u^%1yA0?j9C#Lv)( zpCOSrPda5GU(zy;m<8-1Ga1~v84L>;q#_yE92o?>1+^0ttn$U2%T$9}^%JJr7cBK| z*cse$Fueb0!i;m-i*M#Hzgx8CQR(`p6s{Thcct527H)c)x&BG|+DGZj z??=wO7~Ff4q!H2pxfKY~AtI(ef-gVN46N2hLXi3kxlaVCeIV0A zpe_>Z?r+Tc2r?f8BWumymmEXXJvq)I})kO-wRlCjGG z9cl``G#qlQoV0D4lx;F{H6(490_heZuav{AkL0YAWU*Za0k4lB)exM7^n;K|wE75C z6PZMcn?&NNhQv(5M2$m5jYH9DA_x~>eFUqUAQ=r0327PPs-7TgT%q@WA$6A^WeysN zQURf@*^38l-iORA;O{FzDk(%ogH({>6CAku2vi+lIud+BAV>=4FjCnB`a7t`8YfJ| zG#@g01_@7GwGV!ANL^!|&~Kj553OIo@d#nVNz+7_EG*Iu!N-P}B=i}_cN@odnLTcEE+*vdJ&w4$^7P- za=r~_sWUuk)`oQK3~1dL*|#fs^8U1Ghtj4WNS(GnX~LeUjxABm8xp&=XHVLnKkabl z)Wf+mk0(w&mN4aL_WVovi?8L*IUCZv#Uf*xZY<16v0ad35DcT*AsGbGoQv+%kLm^? z@CgZ!vwFaX^Rz)~B?ud`PTL`_fkDCwyu*ka-1lJxuj>X?I$#E*R)XBh$_-xR0G5)p zvk9-!k7x(o<^Vpi$0)qTFs#`iw8>Ed!tNL2v`Av;t7h=Yh-z!D^psI0Ldi z0bCi?K>9(DRoRff4+KzCPsdsRZkeUdxdkD@( zu7l)U^56(kAAzf(Jj^->QWHTA+=kadkj2}Oz7H}98j-V02Td4(t0BmJ5Rg6*WC=I) zR3Gqi1xUREsfplyA1SLuSulchjv)OYv;$7z=lsZ6CV+?IKz$;l$_RZkk%TFzTLkGG zNts5$D=;$h)s1D=|g`W+{fN&DhX+k6Q;-LGR z;I$H*fmRb?a?$E0!?=EUWrWFqmYK){dbpQNz>83n>It=c#w?sE6oJf=;4}oIK7wr2 z!l@pcYaxMvM+-y&)~W|Cgi(&essH0T$gbS=okb@ zl?2Hw$TdY6w#M$b%0)m2t>K?Gt6f>mx|*BZW~PN!z7Bb``;o zQ-IV#kYx!nHpx<`OB5jWkqmg#5d3T($fhA=5_H-Rczpu$?jhuTtdK!D2q|S6C20}~ zLGb!W%s2uvE(fWDBuv8Kbr3Q`HxhIj6&i_DJ%Q>YNKS#1kcON-tfK_UacCs|`UrA* z31kohUK2qWkX{uUiCHVbMIdDkoP-o#ct}XWNCJsmml0y)Df4kj;i->|6DC6X5fBpM zW>V`TNQHvMc1TT^I0>QvR2jujGEJBa=fWmsKy!2OiXJo=6+Z!)gwzzIlE@JRk4i(N zZZbSVAq==0czpy}y#`syW*pxQss*6hLG_eTTo)Xf#dScCd3>iuLZ@+bi*8u6oO_A9 zcO_`P23#)~$F})bE-IbARm>s`yefbdeEI7+1-XxgOoul28* zVU*aZ9o1wI*JY75$s%O}sD1(85CmP!7702J0iJ90qB?ZI2qFT>anN2&L@W5ZI`D}- z5nYCn-J0P&S`ob_ksS=mK@6-048nE{N?|-kY0_>DI^h$|(iYg~uXHQh;90rZziFpm z;|}lI?Y{LpT}n2Xq%6>gn4}okE$!VZ?cJ&x(5MmI0_ojAYof3Q(4HWq+kzlHCg}P% z#3mr<0&Ga_gGOqAr+$ztBgjx2eD()2<_1}Q0PhgNYa&qh$G;M}8bV?FSK_IU)ch(S z2sHKMU7-v+t_Qk28@w_B(gVUImAp!npqGG`Kx!OFl?3Sop^@-&wjq00mE8;B3}x66 zbI1(H#1Nc>^n(<@N1nnr4S^Y;vlK9UM(}C~!hqL15C-x|r=X2RNF5^h=p3YH1R0cr z_l6)0NM(dZN`VLEWG$0rEt4Vj5uAj~2Z2dwZwONLfOfE&MM#=QLXenwq!{jra^5ekuoBm+o{ z!xylb#KNl{NT(alg)&TI+f8HJ&Eh(Yqg#!lnvJ7dOyj!M{A-l_YE%R3RRe2H5_&v} zW_y>;leLd!U=Rb(*TB~gK(_oKZT4Xjb&tuQ*{mPg z3_b1uqdwA(Y}W=KPpS(dLG3`S^^rkDmu7g6Mp(D1SEYbyikefddvc#o(XxoTO>v!v z(fpZKaONRdt)38_;sNn*^0yB3dPh`%sNNL7Mf8G%=_fs6&u+CbDm>KCX=n5RH$ zU~@M{-~$KHtT2r4#){xI3myifW`RU5gbg7LW4j>?v|0%+2sx9@C=RlW4N?t3NyFGq zaL)*IXd7fF9h?Lm6b!l|q1!yJ+bFV4)vs2`yGqfk(jc-UxOz!)_ZG9*Y6eMj@L_R+ z;0_T#1A_nq10T4@#K*uO%qXL85Kw9yT%qh0t`L^gK&$BOC?^L_qy=jYA$ARF9 zCnKkykDPu!e(u%yxmOF;KPlP#B5TEi)Wx?UC}Yu$j72vx=Uzyjek!DKlUec%V0BsHe9~=h}L9U6Qb3@?yAP5_ITn^ITK_=C_O5h06Cqk}zR6UDT zJ&RR5iXfy4c(pd9Cc;_=L24Xi5>oqMlBhKis80l`gP^2yrabsSIa#L+c`(9QEC*Vy zAY+#Ssfi#Y=I$ZLas||(xddrzD3Y^IfFQ`GAutJ=FoIV`@cKx~ECGaI8(P7O6QC1A zpaD8?hX`I1K^WrT$_P>)q1Qp+mD`}o2*!Yn&Pf`FN*afX8wP`rQJ^?B1mBMUXF&21 z8VRY43_v%bwCRE`K!H_8ZIJN?WRhStgscEk4Z%rxO$1>;3M)J$T1|w>h1Wz72Bdg| zw71bnw6d2}E}qK57`)XNvogZ7{}ev;RBl5|ep6stxo2FyTVhFIPQ7btmqkRaesHa6Bxqqa{DuYSNeIv?26WFU zBnLriA23gGE~-<(vx3Dig~c$1->Fg~dXi<{O0W7|9?e_* zyY@gTqo^4dqGw)An0GC4{`JficS|S$9S8}zmjP1mAd`^l20}vWAWgq& z2m{nV@~?r<@1WI0pjrk}*FZ?58V6GEKuHW)NIwYDCqnM{K-u7v6QFAnkcQ(RwGzBC zQiUC!0I824vqJD{2r?FjwLU_wec;%AF#-VUFoFNKdu3#99t3DDl41gebHH5~1 zc`xCBYGItY^sDS_eQ z5H95;yn;Yu;5QjfF=*Z&T#sPtLQ@H^0w5lNs)tlJ;9WcraR>=f45^i%Dv@kKRs&Iq zOd{7u;4PnBxJYof1+?bP2)vpFQe_Y%A&Ma+vSLUbL<|Yl71ar;ydW3Mf$r&!=`@aN zH;QV}3v183^er=5tLa4325LBF~+`qAy`!Jx_rPkn@3<6yEOwGV_;Lq4Skd`<#rJF0&rE)r5J zAy-CfK4r*UWHzMBgGNHuCqM*ojmSZ2A25yJpcqd*7)PNG#qc({eMFD}uZi$82-Z*#1&~sgBoede#v_7OU%n8-}(Ug}3PjHJV1Wnntu5hqao5H<=now1bD*+9Bg_klcYx zVy%YYf{@)nphH~4n)SmQ4I`Tsy-Qj3BW1mc9rLDpR;=`^UFXxh)w^knbL~d=#;wjx zTV0yB`Su(PnS3H*`l;wyXXEBxh?;pOde+&fnP=kWUWlA=I&#vH@CkE(f`c6+Su#se>RpTajxb$O1W}Y6yBaD`wRL z5rM4MM&8g0?-@ZmL`a97f(}ox%aB6q7DCk(?Vf7Kb8j`e0f=m@j+9W|H zhNP{NzyonOt072zBo5ldikKvV%@f5!DkBNg7zywqZAg72X&Mfxk05hH$Ru)o1n(9> zsvcxg*f1E5Ae9lk>cPrDIyMAycnFAu*GjM(xY{5L&|(JA5m+tw>mxkfCFGh2D;u%~ z36sQCJ>jp6Fl8a-GcFQR#zNK=L)0Lb$&|2>OLAm3elw9(LfE*}LuAoN_-Gi!FStmE zuSq45YYJ@aPFMwm#sKXJ#aflX1tF_tAY6EzLXZI&QpQvU5r?#g6dR*>osEfO)_R%7B6?L-yAaGNW_#A(KAlP&N>r6=Un2v3kh@2Lr~1j(-Hj# zBPSdR@7`luxX2`Ff@*N1dO$5?k_c=62zFZ#=#n6`S_x7Cpkq~3vde9-D6NWBBEhTsh3ItVgShg=6i z*vLb3(D@+Pl|zt92OLl2aLuqADhAoUTrCQ1a8p!x_}^;pG;TVc79RnjaPynzUG-YKLSf{f0In?yo( z4@sItN`ZSulBVHU5oVtVvN8dgge+HpS4P5y!B{IG5yJps1AjQeS|7m$wZmJr!&@Ph z5rotY17%4_)`K_m&=|<|5iT~?%1AG&4YMY~C4#GlLKDZVlCX+E%4$fN3}Hi*LD*PH zTvY>BakQEO&P7&?X$!N^Cf zk78z?hSW!i^DZRKzX&0NyY~1rZ%vwZTsE*t+Ou3MqD{@eMm?ZbBM{t0LRvwGRuf^? zK3Hoa$m#+}Ed!~KAZ!SUToXar*s3AqAvk#L17W~x9|!|d2SM0ql@4-!1UbD2t^iUQ z!FxcEP7rdh2f02%o(zK5M{+KpW91+f5PaFToKp^@GJ;JE<-jW)JPhb!Ii&gsUKzpb zBT3tIDR37_!X{1HHeCubOlO@6sfi#xAV?jA%$BfDl(2?WLvS)4QW=462FFn!fe-nK zhSfx%o)Ki<5QK#89s<`y5+-42^$}#vHs;VAq$Yx_k%N;W20`%pNKhX!G$(A}53h!V z^!?zKk+6Y3_-bIth4oMpa`-%igjY;R48-Y+$hi!yO45mFf#h7cIFS_-WKFDgWIL{E z2riCR?~u)f7tZ<^!$(+4UAQ2mxQ3IEDh5(JV3KfU#53R)K^RyyLZtAJ5Z~ca0I3}y zvijh=4mW$K9zsIu5hw|%k3fg&fYuclgtusgcW6g+YK3=bhJo(s)(!`a8|a3$Leev&RR*tF zkQwm$1tO>&-mDb{JqZCWg_U6t(FD09NZGrTLDidq*O)=VmR;RP%r;pouv|ZJibK&- z$LjUI?YklMQS6LUaWhZH&pMMd_k7a4OG)!CB`yS^fX>~TNmKlrwr+5<;FEk_2D< z02`MBRY0iq5uzpnRYvA9ko&d-162p3Wf zA(Nm5+u##^Kn(EKA$Tme25B16nnKSrb7-(3BCw#k3St7p59C5v=}#)FuSyYv2mt42VYJNR;{r zUNJ#x983~Y*FeM}Y=cP9j1OvULSSQpegw2nq#FsUL?EXeKumxQK|`BKdZ2}Hh~5pj zbJPK;oAkq)4Z>SA!`d}M+cd-4HN)Dq!dl?2I&w%rhXt*4}?UkYqY@U zk)jDAb0M36G=u6PJ5qH*>I@^BZIe2T!fVCNW4N^gBGDZNE6oj2wwZZ8Ssh; zx;xdsQp2wTQsZcX+3@NHD+9U4fvkmtkjV85u2pc5`Uq0dK-drxQsY3x(MTm&9fY+S zf{(vJY9df&9=A3-}o;Mxb74cRV)Ai?z!WQ`o; zHgHINBx#!_fm97asw7G46i9CfLP9Pi0uRc8DkCW?Q0GX(IuWv14$&Z`M`UrBC0;B?xG!2Jzjv#dq{A?dMSKK%h()WSaKFAD6H3TOi^$~J4 z1ZNBB`ynGp?SqTd32lKRNEZo`l^|IUxqpPrhV+bxB=sXe8w26B5~K`)v}}<{OmSG* z1=~D-uc9Zm%{oF%7u4&*Kg2@ z>sJn}R|{!U52yuAAq9YDh~TvkG6OUv1YRHquYDj4;_D;i$_R3g17snbl2-|GeFUjx zAS7g8Duj(pLc}3GBS?LOOu`4^AY*Y5DYQNjyfOl9S#>Xj49$TD;_~EO^JI~bylbAE zOD;0PRUaYOK@c`%#RBp5k(5oUv~8NCT?!b1xX9DK1WK;X*pWO;{16 zhJsf{NDRc_1ElQ-A(87Nv&-5;RqKVT)1MAwLUnB<+hiCzv9SS~m2bPKui3-w@K}(amVa@QW2blp$_WEJ1(DWS9 zs1*)AU?&W84G^XwU~{n7N02H8Qb|GBct}u91Uah*d`^OPV3nqSg@9QCn@*UpO}18W zlWEFSr@}=pH5&pt_JmD10tD$_@%n_*ek#WhDamkf-&XIP`k#)|2R7SF3wyZPa1|mqM16d#kXG6G< zng~Kd>LcV15rhrt5J3)4fYd|~Hn=iMk+e&autmKS9NamA)I<f&|w` zkQpL439paf^$wgNtRD!PInoaV)kF{yykY^;4-(P$6V~%ZtB-{Bd?5%jT!2aHfRF!$ z)HNt1xWx!*H=>Ax$0Z;l5WNr*YZlfEZ-L}@%+do=0b!D8)exKunG3)}qE%JkA{0xJ zNK845Uk#*g!moyIvPj;9)E8L1jHHHGSxC19nbeB{?T<#TbTHXE5iLl9h#@t2BL2-@ob-@>35)&RL>TR*%J z*L)DAjl9LgbQIqNJxDIsdSJ@cm;&UK(2b=Y)I9EMnZ0a zfYd%{XUah;BM1qphTtUZ4u~WvThPtmkjw8t_pw5HLy$ViG8wHVg7l9d7qg0+$3TYW z#LZ(wE#g46l6f5X>{IB3k(e2DTu#gkbW1p7h6qA}t072b1R@~=bRZIP><{eZZAeW7 zs(MU<#o-9PtrdLK4_YS(UKyd+MBw@eUK62JLy)=&QWFX5`3mX!Kyv{+JHQx_8Vd9F z6OGV12m)0}NOcgTCW4TVng~K77YoR2NPVOo-i%QtVd)>CRZpnp7-*D+7WEONxTaeT zLS2g>jt7&7`UuauH8d$m2!pkwR8^3g2+}2itTaHXXi&yo(Zu1g3^5rkLUkfg`aZg0 zt>8KcS&-QJ2sw?x*_ic_PG}Rj8fu1-p!x_hPynfqATP*F2~J0Ef?ACRI=1GeTIT}4d_T!f6$F}kopKx^}tE^!UG5c zxdMW)F{>W92xR*YybeNUV6BhPcBg_C8^HFfLh2(32^o=t^nK7s0prJW%l?1AZQ0pV)EAS-2vqF-VagcrxT787v8y!(haxH?6VO?6kR{yk0XpP52+l_IiNG}xgo{}p!RsJohLElgWUdpF zgzTQftdF!p>fi_>3#qw~^Cng{yl{Xp;Pnv(1GK^%vd$b%VisR;5j-^!Rw+n%h)H6W zy|_fETX7(phRa6E#3A(&h(z@@qWXXbltE-0ysiN=2-NkExj%SaLp%f0ok3O)5rL5K zfm}#Z!B4`67z^E+@>t5 zM@5TmvgasySE++KN1!u&AgdJMy&-k~N=O|9A>p+Tg5gu9?p=zEAe|aC5q0opRpd?( z8XMBJ!Ahc4H;|eLLPDglovK}+?2dvIUGpGF!6g^MP;^BsOMuikaxS?L1~Mt*lr8I= z1F44KB)l4eR658cqymDIkeUe636il(N3MF1*^ntA1PMOx6tXe_a+ZR$bpmK04ps+2 zCWF9ZbcvuVhb%!Gh#(yz8Ou2Ez9G<=+mQMQLc*${STVC$2pe7jiJC@1M&zJp`#`E8 zaBm1&6M@e=h15Zi!8&M71nCXII!B$Q2Fp8F8S?xKTI$VD9}<3#ifvs?i7n)ko?fwJL#C zLRKk|`bagfUMFFqMb2D@s&$^t+x@%uhfFvUJ?%u?%+s+m&nAHSN0;O0U5=f1F>d~) z*cqoJL)z@~=G*1YQ}V6?9nj}nr3O1J4s<~(?8*i;$a-uD7q&|Xvp!PuDpB_?g;YjZ zNwf+G&V}@M&}t$`MT3=u)I^Zw2ap{@knuNU5?B^SOZ4!HtCX2UBW zGzPo^g4a9949I;$5E5P!K^Vw&5HcHH6G3K$AUjwg^^ufSJfupJvWl0qNCNeYtl*2~ z5~R%I(CQ<2Wh4b#K?kXQU?iwMLavDrm61s#ygmY5{Q$l$9K--uMxsWbrF58;5vU&o zJKhIZeS}>5z}e6`2woZK1qkT+2*PHF;B^p$0jY-IBxLOkG6}7rf@(nZPH+SM0X<0e z#6u#N1ITR1$R1{uq#52yB+`m#(*lk25U7tJ1s$A(mw{*lN~m0Tq1{fO+6S_!3VE$N zCL6UjLAa2RIHKM`RSDfn0}l{L5P`cz@G1#WA30~!1_nvY6#p30`+K6>LcAC(CMR~ln-uwLE2!D zDhZN1;9VPSaD4>NJ`i!uz-rLl4M8=Ufz_Bu!@ma7A%d(f0FmHR;2<|Os0UU_X0=-1g~@;T;#40 za>WF%kI)$K?hhIRUJXIkCm;@0z*xTxpDL2INduj63f?q?Tmd1o!9#PP3m{7s z2per5tE6QD{CZYN%LMRz5NK8i(p{3V1XWM)`UtW(LDDQ5QWHULg+QJg5(RB6ih=fw zz$+FYY-k+>z8oBW#scEJQ%Ii(OoA#Sqz(~$wg@6FW)LK15G1M}h>1k>0#FcmDV>n6 z52W@%CgF7ul%eG%q~!(AN^k}w$AOz_HJ~gB&T+&r@YG7kQjpREULT<`h^&m@${^(% z@g%(Xg)>lVB1q|tue7Ib4F#zqFl!%75r|4sNw_z`4CJ{v#0oU96zG;Za4-^{wSiX( z$U|gsLGl>T^#h@x4hy8#fTRrh%sL49emYPEgmfKJoK~9HL53GjNN1VE0JfHO#-B+10m6BBD6XPnG5L| zK}g7;oT6(UoB^qTAoUT1M5~O@svZazt=>WA$~tCYT`vb2h(jhJJs`LUWONR*K7y>H zgLH_bZPTP|QXy3j8VOmdja(l=DjhfpuZ-XfDdz(ZJwZUR9r@%06yCjwEAS^Hp$KvY6>A(N1)e@wq1i-0Q{l#V2_ zEb{4UFqM#62~y&8N&zJu#T`nDj*06t$N@$(*;&*!+JxId*~qj9XK1rK&yseTy_6S zkZeE|xZ0{z_pgAgf|IaG5tbD^!4S{++keUTXB5zZL&IG}vP`H?Nkg{u@qH~UtORgf) zI&FDp&^_Ra&N+}fSmm5E5y&YUQU_sHL&*Ijc)bJf3PI{4I0@+iLFyw238{l1B=Wc% zWFM;(_$F4!X+QAF2vQ%x>mXzXyc&WtAoUTXHv}OeHIb-U1O!3uArdzYhujhlAtAMo zgmDA}i5rDO5x55gse?qp+gh=nv4C6+!PzJ^5%Ojta0LXflHfIwpq?)T3F!J@MSR*` z0!Rqbu0tjv?LA1F5VJmlWJb*SS$GDAGawZca!~+h>xDKUS2VcT8X*l>r66?*=_F*O zL^DJqT7itrMXr?yRzPS9AhiZDB&1G(sD#u$kP%2+B&0rt=z`Qi$Rx7+G1;(M22mwJ z)Ifp)nS|9NkmGR(51PT$Lsdd65%8oxL`ny#%LA!0FiCjAKxRPF54IGukhp3exH!DtLGBqr1d;nbFgA494XMTAd9ylw?e?HAvgoACPJ={Ag7)}Y9dgF2)vaQR3%}YbShz;C~lPi zsgIEN4Z&+7c<#T2ckclC*`Up}1A=gLHng~)QA=O8? z&RBp~K#h+t)eTp#gkc|yhqFiFS(j2eM;@Tv+@ zPr)-L8UwTDf>cb%`5nTBlogP*5bzoY&V}fLkf81rxMV{v<L4YLLPhri2!af?p^->65vcov*6Tr&g^b84x#YqTygq_gKyU_pEDn0SHteP$$OWu$ z5`NtSNW?J<-W!5fI+zSl1!R{ZW1k8+=Ld2p5u|4XsgEFgS3%W~bqZv34*6Pe(DFGe z(BX2B9Yc`H2x;FCXr2hUGJ@1W@G42n40PnFq**MqPXwunz$Cmf5&>Th4!I9P%p^?8 zI6?}uTLil?9C&qzq89+El0aP{@Z1ozRsz>YplS$G8R_~%2Ia8! zi6DY}+FlSOpzX!4qQi?+MGa{>mE5OQtmH&vI1%44c^#YptaQA@Mo`LEh`1*D{42bQJ zvzw4fG{fQE#1uypfd>Ud9KJ{ZvOEB5m_wFKqe;OV50J=0CLuKrrU*nFjYMv=fY`yc z+QGGu?WhnEQ~`knArm?v61H>=>u4LiWd%=@$RhCc2~XJIK{L?M8HfR{Qy}#STmgiO zPGamch1NT;x(2kq8obU8az3`2Zxuv=nr{{4oHxj7H)Zcq6`wNDAe&!>oI@6iS^%S3 zu$Xm*if0+mV7sba<}^QXfGoBWb%7 zNPQ$@oeJMH1YyAIBgj}BWS9=TN&&Q82(o4yQW-%>=ss3ZeI#ZU3#pQjt06dB(hPC- zDdws<5z}yC@CG9I5(P+ogwiJh_l%HhA_yB^AHgdi2m@04V3MF}2we4m7~uK{Ix7UL zh9Fgvkd7Z(eFUj`FiBomO$4cru#75zTY6~K5S$Com1qpO0(h>6FyQ$ejiDA)2S>8=M%E?Q*-S&9I!j9@1!q)6MPK-SAiTcyCeMUa{Z(mz5z zc^g~}!I#V-_7K5pr8rRww9^$3V|0*eNCI>CJB*+kjSNkYGAEuU@g2#f-@ks7^DRYuYm9{kn1w6Y|Q!yE&^e|i(;${Ncm4Z ziB|HH#Kr0WG(kxJML(<+v~x2Qv5ON;nO0a6F}}mp1*v5qH4&ulAc@3UA3^FDfhJCG^~Q3t`qK_Y>5kZB!=7C0M{>L3Ga(3%Lmni`TyA;}w_p5cRX=#^1G z6^a_aYDi@S*D0LrheFRySfLRm4=70*pYqOz)a*(bN zygov!gU~7$OfG2ofeYx$Z>%*ArXZx=K_em65S)b6N0{}FoMXB)c-Rem*F!3TMBUAb zsENR5%0cQN$bGDkD_P-n5TqIc?HjU825le$uUmk0i{P_D;KkdZ6Mi6jS0N<0G6K~{ zBIck;B1i>m&^&kM&uyfBDDGlttR5rbjMX6fkwZECZI7O^$}*3gj^p%*tlkwlIDQ7C zWTrKVRxjXk60Vzc@KqV$^KH<~hjTG4f>$VzK{;F|Bd@fC)B>253z`Tdz91xgU=5-d zPU-~K!4XstX%`Qu>Otay=6AqzJ&=`bkZK4t$Oc}h2A72}P-+>Z{thG!L#|qYR5V%v zRaya6kopC3&jM83zXo3W!0QxHt%P(J1E@lQEvSZ6KxiTmt_HY8 zL8xrZm2#l@AjpglX1#+Zf?3hv7eT9lAY6C_1ZSY^8$#+6!K)s$`bf?$Rn|5If*?2F z;UXa`<)9?^U>``W1gVUWdqWU5tUdyb)IsVXOcJ>Qg0La`h9G?+ag%5WL&7u$az`Fy zdPp2RRRkZJgD~KGh#)l)sB>f(4DAgeZ6gvh41gnet%Q6#9{eC~$O%8lB%&HZtm5_& z*7Fh3@dhIw5gjOkS4Qyq2r@$i=@ubZLvS{vGUC&8hfJYjl8_#gs$Vs7<)Vg>ks;Yx z14m~Ht{$y2(hLA~j^I@igaPRyLE5em5?&uc7?7e2jf7M##E=k;aFv)vF;)>;*F+HW z;S~ggfz>4tL5O-Z5?8GOsed6FiL8hrp#&k}#zMGoQVYCG8eZK%D-J?O;y@x1LPE4) zrV4G?@@XCL5FAJWxW<9hDFjGJZyh|^22qJp2Z85zpd#3kvASP1sB!`Kbl~?aKMVG*LMj8Yzp^Km*zZNF4<497$Rv z!D=7q)eqw4ao}zddSwJzAP1?Kz-*Mx5u`GL_l(3%qTvX#ejBqUf{w+(?!^-acZ(pE zkvM971R0BiS4Lun0g(C#UI8I9kgFj$8!|BjCBb8KLb~2Ux{%7q8(tZS=zum8!MZ|T z@SYKPdI)87j$g|IUJbz+m^Bgd30JEAm2d>9q2TRCWCr1tuHfDlv@`&-RRXJ20;^Pm zs?iY2xC(f51-kM)s0JE_T*XRb-V?~gT45VfuNJ2U>7Ns#m#2Sc^&01hcd$*4Z;1()ta; z>dpQ0WlTbNqzxGu#Ti(HSUANwc*Hq*B{;YxIk+VmxWrlbqy^A4p9Et8S22W#@x0umcUUq4jzczzg9Z3$U@0 zkd<%9Qn>0JG;w(CBjcD3L6BA4GDw{u!~h+*mVwkYD04$7cj1BNgCIR4S=$syWrV8& zf=>*|StrR_B_bo_nh2>H0xeMhuTKD-xed7*4_pC3PgH=gA*;AyognB85qMkMV zK84grkggDfM6QXTY;ZqF+%Oo9M2!MP3PZ7UbOsqfBa(#dB!w6PPjA>6Nc9n< zu7S+;!0Q@h2B<3W22cHf>n0EvHZF(EhSx8U>PF3{3{3>9IHWRykg`6d(q6^?^&A_V4z`)PIz{&suj0_Bn3=B+QE*k>_ zI|Bm;0}~IkfE1^M0gtpXgQ~NlTMBCJ0~&1uRYtG_5FoV=GKty`BDOw4u4^#akopLj z#9Hsb1>w~YoB^qfkV!~21n>CB*n_$}kjhBTE)`NAffmTwrok7;!5Q$H2>DuY%vuRD zB?LL+6uB~zwSpcu2d;YHbr6&RUNVPrDIR3OHl#8F-x3aAt^k@Lf>b?V5>y|F8ApLp zgt$p0bUzVz5goMl0r!a@yIJ9DgcLULhg3Rn z281i5?+4jB1Sdh&kgh*y;T&x39Hc%H(((i&=m86m%1B7d3pN%9UC52UKH}8?RYv^k zZjiO6Xe7LnQt_*RBZwe!CPijLWRXeig#fIoQVytuS4PMT$ZB&m61f_Jvmvc($jBZZ z60JUhEWtu9LNVFMCP|z2vY8Z_gzxCVWI)a%#T18#K-OJD7PF~&l_+@?D|i%&8ijLf z_%ce{N|}WDXEb>x)z}54i)h#~aELQ7aDl2IVlfK?12==Xfx2s)rgy2bYd)k_f{@Ct zd2j^l2Voh7!|Vh>`azI`5ilzqxCnB+gNrTelmXwe3hDAdNaPv^A_A>45^Q#t&@=JBY5qD$&j@Iow+SznE*$SlM^uO zBS@tKAtlTrB+Vlw%pyR@6x0ubio+HyfXC$`!8H+-1aD=9)<m$g51jtraQBy>>2-FXP)I<;xTpvL?N)o1^8(P7YQM8ybs2>Ebh9Y1jN_`}56b`S6 zARQuP5^c>ka(x7?i6H08>HEPeBS`HdsOJms5TV_U2d##{RS#(49Hb_KkYGWq^%1-# zf-vycN4y$t@SYK$nk%20D_YM8jjQBS21gJnNPa{kLG4EHI#sM$9;+avGD0RH6%+CZ zA*2q16irx2aN!22bciR>icqZMONhc5TwFt@;Nb;6eg?eoCz65HaEKsQ64O|SYq2WU zfK?w*HK6MZYCzYIf+yxc)p|fJypD#34C?5aW?(gZjR95$WHmLu>INK5_!NLw8X(sr z$ZSN_1MAD6h`_2L=md~oHD+Ri?C8N1hloIG8BhX7AAc)V^MN8ryamB7{DND!NjY-+)GtFiWFQB9U{=-Z{VAQlw5M*brW)}1gVU0Ejhqi4MC2KL#}Rc zvE>{xAySZ9243mlVL()(RYp=6Q$mm`3DPZslaSg+(k4L)d`mdwEfY(G22BaF2GLMrqisdkFBC`m{>NqGH zat{%hgw;L~qDJ9Z5vZzy>}Ca@HHTac2^$7ut%)FqZ^P$YI;CwA7oNM!$UyB1Kch0;MH&kGe9f4 zA$=kU39pYJl@XtsE953eG!jzvs328FNGQ>LbV~86+g(BxK<>$@c$ZSZhr0iLw;#I8TUn=R6#i`=Vz#zxKAi%)DNpd9w(;zLbYUNdA>szYhUaH|- z2wsE$t&bol9zbdw2pd8|L?9&Ang}8&=afxSCkRsYppl>@a1NP}S_W4&gk1YTR>UET zNZO@H+JTlOK$_TkW5=OlM z0#Xe@NaQ*QnT=c@3F&&{ua98W5c~vGP<;fhj8N($NKFJG!POA7R)W+)e46f%Y6wp9 ztGmJLBjj8LVM9(V!Ig_4YS2hESZ#%=3!dL04N*83Qr94pke(5;2&8C&6jPWaL_K6V zx~g9_E`(NLL5f5)QWLh!7_$oisk4Z$@^IA)5OGp#6NoMdiAyhJMhv0`ZEzAY76P=a)R8X0A6nZ6+!RpltIMdeH;iEl5Qc5F(t1O2m`XN0GWhTF_5YU(u;u&yA`Q= z7D1{ZNSy+yW#DxXoC^_waFJC)R$oK*`#{8%yo!~*OH^HRL`8X8FPcPv2r0*4}^p-Fn}|V>m#&^24xcv z>_8r9?SsUHtTcd(wjtLt_}TDE2R#0k3?bna5S$@t17EETx)vNVE(foSAPfoXcnCt> zF$7w(4e1BLZXJq)R6}qQQu`p2kQ*N$cR)bc5K`PU9NHm*T*(R}Ar+9AF-D&Vwn_m$ zH-vl&H*|W)AXG#@1d6~%D?oZf&~Z5fe-WfXIZ-edGBk%qLi$9=B=VYV2pf4P5v(!- z)j_a6kq4|YLNEk0+>sHa4ieDt;8%CYh4|Fm_|)8Z)IkW=8Z3oZF_2t|i-apfV<4+X zu9e^!AB_R(a3L+0K(2=1Y+~vo72hfq-zr>{5oG@qvaE(b=cGBfU72gYz_rg zJ>V(^odK?oATbCbAypEj4kD&Lf}|o`H4(}P8>~K3^R9p-OGsjauRG#sgEF438Zp?R5~P)N*+au?u8I73Lb?D9) zvIcnn2(9YDteDUgKx!puO=Oo2Vt@zS@Yg|7_NkDr5L#~tUi-iqD5ofZHxNP2GKHLO z3aN%5ce6rjA_xiT`yi8$`UqYDAv2JxA;__E;04>DRSJ+g2)tlB5<)_IL$DpJ5U!X> zn7BzKa`y+phSf*#t`MXS5;6#atXKeTBZ5~(`XP|BeV{cFq;mwWfJmx_AXSo(E_635 zv?cw-e#2oX7R8kOSkdh5TVv0b-Ng|QE16bMc4h4dNZ3{43 z`46cnkV$wor;R#Dh+NA+`a8(Fkl7HGkmc66OhZ-=Wuwjtq5Bc3mjta=z_kfvWj>ye zgakg3B$k>5a%>LrDWi~@1*vy~ymA1O4ehgdm8-%KLBgxoXZCI-YrkE(r`g8sPef z2~yV(LkcL{T6m|byXPr8XURI{gOGDBs8({xl1D?}Y6w!};3DD6t>Fwv)dQ(tkV%L* z8VOeb=?x*1keUcy-N0)cOc6+>BVn5iM-UNY5>LWB1 zf-I|m)E)TiBZwO0nh4T=f%a~YY8Dg`urDB9!&1wDJ2(*a@cIZ>P?IVSk4MPv9!Spx zLV~7yyem|Y5a>uZZl5wC)knP;-Pd!CY0rmR!Gj3a2s4zoUzbIydu6-c2jD&R}L=avdA@_P9r)q;Pdaz0mHwT?52baZKA3 zT@AtNAVjwaUI8I91VLL_5$7rptbicZ4M7sE8iI2nISxX?vm^$i7&O!B1tOKaiopo` z$Oc3sd?+4&eS{_pDY&o-LZq;gka`NM6tW;>kvZthf27$8JXIB>=!P#vhRjvrCn4oL zq*RBje1@~}Fd!R0kx97Km<(iPa5kng{33XK4Us~waWFfo$g;?6m@R z*vN{}8aArlrI4Bhq8`GAlaQ(hK|*R7@Ja(ndWNWkD1g*N${wZ49;I+Lh~ZTNu3U2l8L3eK4zSr9i9xyC_e!!N3X3*xMgz(*y(S0+F@K9Kcq$Q2MW8#EbYn+Dyi zipBshRDkq-AcrSFNJw=9ziSmTPlP!E1mC*~uW`T(&^g=U=AgyckbV%j8iHC)B;v~(W3+EvHAmr^r5H_SH5;utit%w7!Sb(4KBWfH9Jy!u-AHjP@C>0Q_ zN)pl!5jF^goOB8y1@%D(aYIJu@Yh7bdcKg#2s{c0Su6)Sb{ov~MP4a~Rv&>k5P??8 zAy-C_c_P@(R#2^krw4?*jR>(TiM!Vr4_hWH<>?0~syEPeQ9aziK?S z52h5nT7fh0*Cx=jLy=Y;z@3Vf0d*C4{0-t4tjgemkXv{lDv{Zc(8H{r;Nn;r$gK{{L}N8T+5vUXb8BTv~QPth$0guv^oA>ybc==1|Q=K?wB0vVS)8J9fB z3IrvmbanR}^6Dc75d&SPI3?F?P>q9HAA$Gyq{%v^L2hIKb2DTevmg}>oRqRlm$FNj zama))AS8q?wc+&!%bOp#s3m_8S4?^o7;i``?DQPg76|27Xwm6LuB#SL=ahc z$xpCCfp@zgR;Ysy2~-cPhV;L%j19sE2~mgLAle}=#7`2h5>f|Yu0w2L-_C6?O9s{~TfAV`l=RgY50d?-TDvlzn0tcf6%i=t<-f=3Z@9fZn; z)Hqm3MR#bo2)Pb|)I<kVxPQb9 zCW#~XHFX`L6kW35^^vq=mb7D*tWyT~%(ql|SbYSkm1G>#CGAoq?9!ziGLfq!NPh?3 z*?}cXsCt6W1WB2LIzHe!2uuG6yi^<1{ee_Tl4jA!JtJf`as>ooL#Bk# zNO*k&KY1InQcljyznVFeb1s|#$-MANO39;08L2XYR7sd5N%avP#aN}VmWRaDP!Kg( z3wB72gP+8zk)R-?rhwE$1WC|Qe&C)CL>Gi4Xa&5YL{o{`*@09>AaU@)YnZhUvIwNk z!W!CeLF5R6S1~H!AvHDFN;XV2$Soqwng~+AV3I2CC6M|EvbY*e0i-@saxXz3P&HJf z-~mFAWDFr8^$UyytucV)2yg`iS_r4;4iW^D&^v(SUGpGy5Hbns5J9RTIEmChg5Ue2 z;F1gJ4S|*%IOU`DkCdG=^xcveL=3={5GnN$pN5Woq@oMxS~?l~92xr@DTho@T?0OE zN6s-7jMAaoe_)3mh}))tsw8mF2+|>f)h{TgBB0er@G3?IW)8eElCn?6Qy)p$q(W*U z$lVRdY~J z^>Uyk3TX8acq^-ifQCC-&j?Zt!AW@k2wnj}7?4^9lZ4ktm{kwAvJ)mk88?7kd=JZv z1>j5vt&AYK6+a2dvyeOvCo$_IWD(@z2sDC+I*gBX-U>|%+8SL*3mg{-FDlU(kjjW| zB)n>X)F}jOB8UPN@Z=Xn3));5+(0x2sB*zL0u;Vj8qPqjXdrA{0f;6J8F$5u5lj)x z`Uq0jAd5rTSnDHr{Q~LB!?|zIoE7?w;To6Yy@&aoQaKEALYt| z7rDthgBE7Xf)~QcI_D}nW+*vj>bs^eh?ym=398 zWE?@manMS~4%9i4v`c}ktcD-*CT*7lsaa$|)k-q5xQtCA9Ld@y$=bq?hJ$QSg>#c2 zXJ|v}EEyZnk#Ugod?2-tlufFnbqb_Df{;>{$smSx5>f>e4_me!59tlT>Z3$)s|0vu z1nK+0Nk~lu=Zc%hz;3|cHj60&ay(g}k0j0}Q6RgYdUv>MV27SIg>vGoE#wU53Z^7d5(8!RiR>yKO? zq4k5%xUkB|3$=d)nls|lbm!Od;MMZr(RAkpGvE~vpC+_JB&6Xkq~R{8?j{I6h#P-3 z#0Tyc38=a9tGZxCyvj~k5q0Y$NKS{>L=XmY?L&|a?`pvrkSYnhh@u#?l!J&s$}v17 zyc9*NhTvSf)kF}B;I#&XtKwGyKaT?53xiCJA(N2$2(A|w18OGHCS^z!13!WQ&c$Rv zmQX5t7Q+J*9&}h4knkslgc}I&AHnM*nsRE&LtO80b!De%E&bv*ZxyE7ifJX=adbp zj38?d6dcl(owE$wQW?d~NUo1Kl(cNak?SL9)dSnh1FCv#(1gU)>E9D@hq$OzT2f5yXup#vkysm-QMzBASd}Ctx|x@2SG?k9fVB6>mz7oq#pvRjPwIR19adjNkBIUflhA z6BQse5rhuZ(nmns3sMas)jo*bLl8m8`UK2=5Raxi9AVZ+keUcWLMkKVa}{v) zjIdTl{HiWkDr8?C`5aVOujt^S>f@vK{J<{rEK(fbDZ30w+jL3WbZMJRX`4(1n>0n+bP2n3aoaS=(rbC!czN4+ zHT!5a`)C!1NEL@jRr@ei`!IDdQnrs$w2hFri;=gBk+hAIw2hOpj+M5FgRDJ(^n)Nh zCP;5c!8S$S2DK&vb0H&gkeUcm2Z65qwoHcAM4+w^wqtxC<8q+6AxM3M-0Oj}Ma-ii zwGw1y0;E=g)I<;xUiF|c;A;{fRT8}R!DK+r@IfXaH59Za0-vG)sfpm*g+vX5A@vc2 zgw{mhbqfM|0fPF0keUcW!s{bw1q7ZR0iP>n4mgF80$M&o;QJsTRSy~o zJKYqtQjVDV2(n}jQU{@tXtP3?!*N(@A~h#|Fyd8lz>0X3?UCDW$fMs-HnLW-Eg=`4c0u!zPQh#BR1gj_NDuCBD@cmYx zx(2C716P2~Afi4}_9}-X7gFNGjjSafl2T~WYG`xNxo&m3kAPmg< z2vXNz)hpKL|3K2dakTZIb0t7bie#BFOm(mdTLH zNXjx9ULQf$!hyR&pn3<)g)$(u3}l_Qs6`wYfyUn;H4&t)fsl|oNWwGjgs)yguUB_2<|1 z7tjlU)I<;x()SV2^?_DB81<2Wu8)9@A98&ppydNa+UR{E$c^D>^$~nB2+|wk(RAn5 zaD#M_AcxCA`ax(l5rhk=k1(qtxCo>^Lg@z~RYN?=_E-_}W^H)W4$gpQPe`^!CgD=> zfex@NIGZEYN8llP^!^c~feInfDk<>bKV--SN`jAMfh;n|(m%ph5`xzF;Sz_`UuYy| z^+cWsd36?OiU;XL0_2_xCL3OVLAD8FwGr9znDq#x*8>qpu8Gk4L6881D8ouZ6u?Qe zc*I({kRb@!H;S#^0o5;%JLBN8;Eko&>m!ItNM(ddf~JZfiCNAi7mWeo%Dd*lr63}* zpot#%Tu%;+3$0%udwd`#yg~98Ln%M!3p+VMKE7+ze*rq7irYPH` zs@Wu|SS1+P1R2-_Iomrq**OL}nFTtT#W?H7IP0gmX{Nht<$5XQ`>2%qE0zT)RR$?m z1uInrsnmq1mG~JJ`RJ#6nx%P|`Z?SAINBK5`WxE?DBH#>gS%Jqw(*d;AvxrCcQ8pv&j>OT1h0LN4?BfaJqXe? zTm*)odxgM@v>{uEFzq()E$-H%fW(M9A!0*FR561hr3lY(%G3`}HYXz@r!)dSH2OGwaR zVOUB4jkdw-9S8$b#bA(#8|C2FD1h%)0PpbuZ|wmkQOFW&7nI5f($j$?X*dZHL?dAp z5aM&t z)WXu#!qZC6$JQ{yUOU=JFV$58xjrfnREE??H6dy>A!=m-#>Kt{8D8dT9;W^-_CAg_ zhBkhNHhxMraY{CEa$qEBog!hCj8-3kdP7!;kjhBPDpA5ZUfc>Y1cyd~Iz*6zhg<$m&=atU&!Qf`-5qU=)&AdRUMu2~JX2A3@4m<#8MwAxxgl9!1WPmvyTgChzvAJ25wd%kE%h^Hhi5AvJ_;fO#!_808tG& zXUc$&H~`f(NDSoD4^X7Q^^vS&wyYy`o(Qsr0n}-N?froqh9GU1BJY%;;$@k z9CRZ&!FeON0x?AcH)V%-Wruh<`$Sp01XYJPb*BVlrzk^*2y?pt19MM3ZBsR6U1L>Q z6E(SD9iDIlfjmq0B3tfyXU;}f?rtxxULT$gf9|e8-U-26lR|l>hI3C1=bjwKJ2irT zMmXp6FpmCUzTQBdMlbO?cacN~{X_?S9qV8n>tI=%Xjz+RF{@-TE6{upWEu!m2Z2ZD zAXk3NSjNeKPLKmtLlCx=r$q)e@JBn zsfHjVXsI@Q%{F*t0(5->r0;`T0YPdaNDl}^!UyQId{9T^Al)L+JQ2T^C;ZX}czwjL z?gp6~g49HCl26SQ!iKC*z^sV`RbAi+UK1f#G`uPf{78sT*`6OfOov$kaVyzjBIKMR z>ym|zz~d5VjY9|*k{@NAb5MseaMnkVMk$y#nyl%e39 z2|fM5DFeX(kIO;oBPj>y;oG1p34B5VNuoH&S~ye^`z;TUd+xy9BIr78c-4cI0jZCmRSdW$g7ko(q;Z6h zQ8)xa>LUn=+z&!N(G*e>LC)}jjK9H2VLj+D9jF2VcY+WM1AoXeInd?c;C0%Nu{cO? z2wwX@DkkWH1W08BUYP)?gM@TJH4dl_LY^A}*GK%C-XNp}s)P7I$IU?*kXa#kWyGfm zx*!jH^@AIv(m^I66%ZPUTpvN^gW#2spsEXz^${PosUpmp2qFRhgwqq3`z7m0`gct&06C@#u@sN;h%UBhl1s6ds?(ipE6G387*}VV~ z(L|7tY6y}-@*!0W5{amZAn6d2G9fE=AZw~+ok3|BE(_lTgk0A^*bu!CHk^cK7I+2$ zop=DPUqG`zkaMjORgY5!Lo0`z7d ztTo+I1$2GDw;;j$K147W1r+t|G#$e<971&*Lp1FI_03&04efMQH54SIRmFrA1ocY1anOeWuF|% zJ|&D}N*KqCFxKgzEK@?5CI&Ke__DNmF{V0*r#ngNTlpB;_(A$d64r^3{XtSz@h~=M z6b@1oK}g8@1k{=+2FwNB!72&8@D6c+53D`{^^BkukXeK%5<=_yfG&4{9j^_ogTNzl z!p0H8#-N+QA&cTfjl!XOg)qkDAhi!@X&iV;2)PpkXNwsG!4aYw(hr1O4h}g`4njgE zhD7uNAhi!@EgY=sfmBS8!8aitKWN1SzV`ugqz`&!r0oTt7=kk}>myj72viv%S3nRp zd~pJD)q}}~R7S`oXrThQsi52C!25t8>#jiu_}C_ExTXl|`7$u@5m5;tE8$nvx6^ixP_YS= zHgFc!uojck7LZVt7nhP1l~5GoR}|uh)JHZl><$XtK^ma?D8+~&)10xy7E~X#da|^6 zv9|hvs-(U^j{YFdNg-^I`eu39R*pB4e0^F*wFe2 zT;qW19klugvIqf8#!8sRV6Bg!)e!77IYb2nTa5q}F^L4#I3{6`1qou1Y6wO``$S0l zh9LD3a?c3DhSW#!nIK3V1mVJ~9!O;bIz&z{0C_|XQU@WE@IDb{eFUkSAoUTPgw#Y3 z5?&v{YacWQa#sk<2Hg)1?hv^{NJt$7At7BM%&G?>f?V|=vq9Amd@Cy{^$}(T#G`17 ziQrj`BnC8xf^#|~zrzRmErJjNp zznBaK@cn4Slcd%TkTuK%Nyvg{Rp{ZL_!bbNIRMi~a1pGv2}BmF6q+EUcY|Dakj;j4 zMzE5Q(KZMPQ2?n~FiEH^Y!i^IOO`YeLQ@Yq4*`;Tok4wk$h~#=N$AKn(ntd2L^w&O z3`ua8N6s-Fauyu4Pvn>-2b&LqthI)b;L{Qy_b)&c*r&p4C@H&iO!l5M0t}e>1CBbDT z!)&F%9-_q%p~sMJ!jNapRA$dumZuzAgXKWp>OR4ua876K+{9uDhX5*K~^R}XNAlX zq|6c^BX8ge2-FFJEI+_Uf@gvt^Fb135s=|HOcK#8Lb}#L$T$>)zyoyfZV_lv9C&>K zr1pW6pnJd#14Lj5QuUzRdh2XW&AjqkkxUxNJu{jTn%}^M(RNI5v2A(BO#R$pPH)>4um`|hs?%RAMvX=K&FZy z49w0ET786B6G22ss)pc-@zh7ME?JP00Wuf@pIt&L5O7sY)T)n=>nTO|LO2(#P=#ALk(io&Bf{+lEXbB0< zg{X(Li)5TLk?SKF*KA1t9hro*qvV})*qsR}*2=6z4RRX0niD z4bni=N2PWQ)h=wUUd*jt%$+{0oj$CP`e;fpE2KV}5z0P2m}`14*TfKJT=kKZWrVm@ z0_d_K=s~9N1#+-~IMaAZ(|F|i2(uazHvwI=3OO|nDvP@gf?w_cnJt2Ji{SN+s8Kkm zGBOT@_l!U_5oA#uYJCLh5W(vs=u!mmln`cpB%&7px(E+^3m&8gK{Bk+$1zpqb-C0uY!P-5AfoGNCwh$0CXq@ zGVZ50FSsJx40m;j1U(H3G&DY@_-m1E~l(8my8gnj1Z@iC(-=rignYj$ zyw^jphTtSpeFUkDL<|EU)exKknF)fE@Qx2M15y)- zAl>_bRvAGmAP}kL1+RuMYa;x05Y}o4QYG=LyYp&55MnbCa(x7;gCHbi^g|Kmx&_N7vmWO0~5DA`#g4Adz zWd*pN!X*MJTS%&p6kM|*x-dyd$qCuX2rs+9g3tj#{5>XkF)j~2V3T+ftvVpcg>0oE zo`gG$NCw=SNDR_yaB!fZXn}+_yvBhuAaw_%UkshQ0gr~kMIZ_wByt6Tiw&R0fiR#c z$|Vb)#*pW6pxa9!vd)>(P8kp+17^!QW?q zkg+&fr!;s?1YuBKA3;vqk+MletB)jX(!{J%lm`vFxkvZEWBLYf_(hq zg1q8_ykY`e!o2L#f}Ao!T+#yEl6;(!{OnQ!9MXaukjh9#h*MF7Ls5hSxjxD?1Jy@m zwhVQy?A?Csoj$DnJ`5B6n5G6W%nV_H)JL;ISs?Y%jBut&VW9e`(~qIm3tAuPS-28e zABmdAh?>SA*GI^;3>q7`x`9hUdO*lh$eV^B^%0^IWDqJ~7z{!Nq44?$dWf7s5TqJ{ zlaOi%Qole*gD?3qGAHgdo%%M57-VmfpLaUE3Ya)mUN=<}Z2O+b;^${#Tf>y4= z+3@-ZnSq@5Atec}0tZtZzGMQO0dDakHtNEr2_R)7CW)oibIAf}2k-EO^tm7;QbCPg zRe_~Yiek*d8QNKZ9rOp*g4G=Cg2dNE5S65w4^xlN7x2)-^c_S5vN#Q{3{s(>k!Vqg zRv$rRu?j+@;3T}(fG{B95H^GaRYuO4@R|tDfQ+6&>L5%Kp3dP36517V%0{k_;I)r} zbEZ5v*~>a*$T?@qIHp5a%|TXWL+)+>&;CGGEI|4}P&RmO2z*!^=sr7$6oiBbf);fn zsev473hoxAiGhypNfWh77qvCrwgBZa3W4T#aML0Mld3ogo z`Q!xoWcayc__?I`xg_~GB?P#ng?S|fA&6T-fJ;)4TT+l)T98v#m|IPRLtT{9Qj*0+ zhCNu7AzGI;%bX$KhPm9Hq1Kh9-J7k$hrP>(y~mGpVi5P#F#Z{lLNlX;=fsN4iWZs? z!#*W~c|sUtUkF2&A497rLy0q2i8Gg>m5Y(JD`c0DhT1^D0hTtT;bA;9p!Zi&9uZEC&Mvxi@lZ4knkTE(wMQaG+Rj|T^AlU?k z1mBPX6@jq9BxHCTk}Ki$5rm6ILdp%aS_vYEMnXy(WD>Gh9SaHWf?cKE`Ur9- z9i&QvaOOygm}OPKDG*qE_jU`bfeo zf`LmItv+I6VB}z77vkm-7Z8-@=LaD^ZW(?a8GbHV0d5&V9#FSPkXuTKM@onfTqS{O zA_)O5P$dQKH>rtmsEKeu*L$mh>Z43EhHMLl5*vmJ2Zjb0hGuuB4lmYDZ?;LnJhP%j zmZZupNtK%yFE%xdcXBw>QnY#TM+r1geTzN{IxeP3w4Xs=ttFlGRV@1rN z19afZD28Bt1h0S~40tsJXF#eUOcGubL8~Fy5FE4z1iog7Uq6H&%s>c&>m&F%a&QJ{ zqY&&oZCt$}c+~@EAgUYWt`N3;L-4sFv_27}CW2H<{OX`Zai}#BXw5d(Y6vnY$EOCW zfOynhxxk1AqdtOHLy(JycvW3+)kj!+J$%ackX=IDN_O!22(uc3i@@t62m@Cgge(rp z7bqn7-eIT+gpEW(Y9%-cshHq77g{la`&JMsXnBHdV<2*u3eLt-6M;|kf|NoO)<+=i z(Ao;J4+V5FG^E5vCXvg0%+eUY2(n&c*!XK7YRKZ8sexB3@Hzr6g2{lZ!ODQdGFB;w zAVdKk5>oqMl8}UfoUTyV;FJc@h0;F)--Rmam?Z)3$VfTB_N9W=%Y#?GLFR)L!1WR6 zs04>BP{rg3B0++n>r>Md9YF{(A_u9JKuZxEvSdID5;Ei*vfv1O!4PQSoD}TzY{>kP zvSYHMeWId$qOxPMvVEemeWH?mtdf1Kx?2teBT5E3$s17Sl+@Q5Fj4c%lh0}+A9Vx8r|Kf;E!eu1o5!zzVG5SmWF^#uwWIn6+1 zAtDeq5(%k~q#V`guf?WdS`UZL1cqO|8 zMca5qyBI~g7&ZH7Rl6u-0bUtFK6yb- zS$=k9K~^;p4l`+XYXu%pWj23Jo@gD0NG*m$U4~?Rh71FS9216e8-`jZmU2h-N@va{ zFM-ZL@%~Wh?m+R*0Ff4N{&p{}CRf%9d*)&*hCFkITr-9wQ^rgi4g(8UV`~q{ec#A6 zk%&n&7=iW#2^&Yi59)!}N61wV8XHm*K}bmLgGQp&M35~)5E63TD(HGUjQR*#8R-W? zY9a_JpzkMu+B3ph4PosHAy-Cx+FpDZl@VmS5U%$O;EQ>RCCNwbIj0jOwo2oHgHNbbWXBxjx%?PHFJzJbBwcc4!3X&g^*?r zVHQpi?oJ+Vj_!U=PQH$gzV`OM_V#`b4*rgg{*I1*4h|vq_96E6p^kQ;j&>0amQhaD zu`ZUeE|v+-W(m$_$aFMfo zhNWntF;BEEo2$BzhnA?Ssi%g84`ijBuvwIlX`~3+HdWJbVbc&1vrvdQ+9GY_ng~(> zp^@;K2(1o+b0O6boP<<5(1AEu-v?6lz)8qF5gLhWWdfvn!df3e>L6qi*A7|~8#YhGr)Z7bKY~;}SV{8gBN?Q+3C_k-Nr4Iz z@FWy616HJf7DzzO`vQ&ofzO$wU3~#&RbAp9) zf|YZ)wM&GxON5nkxS2zknL`+IeFUk7{2d(woSYyi+{p=qoE^fQ9b#N;V_a+#+-(xv zZBpE=Qa!BG-L2EytTNm!GCeHwyo~a^j0(N=3cd9Td^8JuH1oaH3VhV7y(O!>Bx-$x zYr&Nfq&{j65H0hR$+i=8RpBubWKiV+RY|hEEV8^T()?`F{A`e(5u_%P7T}f^;09Gj z{Ok}BNPVOs%%&^B?I6$Us=^hm&y!*ynqeiJW+51B%rE2!rSuaB@+J!pcEdIz~Cg0LYSB1{rbH3U0I0kl{SvSbcSLaQXKbr3`l zQsY2sA_xf)fsowFPFzZk@ZCc^N{$f3rEJed%K38ON(WRQ;b1^(B1n}4uaEEy&>`1K zT(aiUwrS|d4t!e*-u@B14nk%~+od3LWx%`6A(a%Y^hk#@5S!V-Cr?7UQQ-OrbPgC= zVFluXTf&g?1V(}uK7pi=25k^raDNJ|%7a!$&Y6&M5~3KwhLdQeF>;@a7&b&Z+%$p= ztd$NPL4pS2QUGxnCW%!&L>ZbCdVK_2fd(Cv`vHT@qyGz(oSj8PHE~6sTz)HhW1fFQWo4ivlkTWL8KD+$Rzj;t&_&fYw2REE<9==3)#s(u|%84E|~i z5jqU9hK#8u>{*sPu_inL>I_yg42HrC>Rb#;tPI**43-k?eo73Xnr!)Ij3w4=H4Y5T z?x2lG?Ox2C-V8l{OcTMV$CtO=gCo&eF4kPiR87%TO;JI|TFuB&+$31iEEGB-huJfN z9M@wSB5WE0S(^>-@1QY|57Wk9A3-Z1_(2Nb*&le{2Yz}FsA58rf>%9o2BZ!`CgBr9 zXtfV$st98&4pJpS4%vo~nAH$?vnuG!1jsE9Xe9FB8z!3%cF_amJReLFQtv=WczpyJ zlmpEi!8$~|%1)4)2ufnBjELzU;jfR7Ya&R`2)W+jk+{wMAgx?538|+rN#t?~g$*8Wfz?EyQ5tAngS9ep%!10o3Q2S>k}kAT z7mZ6y9fYg~Ye|k~CNdXUFEMOnz0_r6t(_rOK%_vo#W`fc*^pWW60?wA3?z^tBFH2> zmgSu?P~sH1enDb`sv&SU2gCr^N09SHL0ok4WEsa)Df?s)f=Qv(M^cWdQjV$W4yg*3 zaSD2#lIl*{I)>Uhh7NiPZYFAR7ShSK3I$G5#jdgy?ozcr@~yrypeo5%s?Arb(_b7^ z6Zwg?`G^p#kJ^2CT9Htv4^NjbF9c2S6P(~DIN48dq7VN>AHm6f!jt_3r}zuOY9ek( zebnd2395`>_0eR1wyA*}eSus({v7c7$WjLR^Z% z+}a}Sdg5G;3T&<_Tp`-b;d*TG#>`1(Y#HWUS(ZGZIxH^I40-|#n%oR(oS-U6hnvAf zn8{z6AzYgy-;A-so~yxyvDuxa)q}Clk)hd@q1}@KvO~7hi>uX*Ezw#o&O+K&Pt(Cr zN5jNf+uTjUG(^%YRKx^4PXwubFiF!8A(LQ8KM1WF5;qQojJzS&K@c`%H3GEu0k1|t zW+N&fq)G?0Ob&LQHt2E(*u)UL4iWvbr7s`1mdFBN031|Op;5{ z7J@LVA;`WV7-_|=07Z~K5u`GLu#u}Itd$X$thtPJDmnt+ZZB(-212$evbYhJ8Vb_e z!ssX=S6gs4yqH2^fajN?GtS6t=!ul6a$tr$X!#T9NPqa6C=dgws|6_=ags>&5n8h! zQUb$C$SgTzHi{PY1uMfw~5j;wvs^kU~^UDiPmFIGUY0@VF0fd zV{G+c?DApg3t*WR%sMTYbwVIVpFdk)08?K8(}W-v2%|56X<`7=1b;?M)a%dK>(AKj z&)DV1&<$pE`!n|huuKGNfsnoa%zXhYeE}>G#)Kd?kQM%Hef}JM{-86od;HlTsLP)X zfr7ZYg1BnDg^Haxebm`4LGwop8vG2ZybLNl49dJ9q{7Fb!pEQ{$e=F7pef3rCB|SV z$zUSG;I7Evt;!Uu&623kk!H$~X~CUq!&qR?Qe@9t=_Zn3Cg>r@U@ppFz{jA;4nn4U z431Kav6>8tx=h7Zpi>%Jy_nm)S*AqDZLPH5-x)YPQL`_YtHYP2&yTa!l_AttDa=k; z)!a|n)JNPn6toq{BwEZkO41}!!Z-r5IS6ytD&*WAto0G3GJ=!HH4aWTxMze}A3-Y} z-1QN>>VY%h3lfN_eIS()WWhGP-a%u4DLa)~y!IiM0U0MmB|+5_xEg{~Mlcd|Tt7$(UTq<* ztiUW4AtKNk3cRZkxunI#hSx_#GBC??{36J84KZw7?xdEu3{npW(@eOmj6)`*mVtyS zeiEV*N`fN`TGt@g9Y}1*{!mF<@D?6$or2syLKO%1jF9Rhh%AJZvQL$?OHs6o5j6@B zmQ`lu7nK%ZQ4r;{k!5yP;fmH{PB7*uwqmGuVr%taZ1-aB@?q%pXPz3wGBt<=xjyO( zV1iUPpt=dQGV1qZ==Woo0IrN6^%1NxV(j*3?)GPf)I<;xQW?SPBM=u<8KKul-F~c{ zFx2JG))Bzb?$2KBC0OLh;ibxICCZ>B$e_*#s*hB`l@X*$QsxEKM_Qtc`qE5Ba;(d9OZ7@)gEGX-jYQg%Gr)Gp~gZUYOHoLoMs}7 z)*=kfGR*PX45@}}g%%9ub_{jS3=OUfkostQqvwPur4C<~386fbf_OVUSwd}l zwef5nf>cb9+6P`438oR1Y0skYJJ43U<0prFd2~A z2bqM|IB*81j2R}9v`NB>pl6uaCQI2SL+)WgCXr?Fu;H~5gn_k6f=D68`cP{k#BvM! zWEn7mRYuT`610>rE?SQOt^f}MQv2XhKxZk4Pw`j^@2bc- zWXhv<8zH(N3P48+z=qTywFxAgkx5AWf{v=jw%i3Wcni^si-gxia0aAKf!@Dhn+6iM zO969}B<)ZTBq2dsP|#6myCfO=WOe&6VQnW#5h+0)Ar%1z4G|_s1qKgw<|ti;1Y@=$ zE2c6#wni70Ru7JDZ;l=x&WU~?)Dyr0L6B}vw?8waN}Axu+V9KK@5j^+nnq!mh*lp3 zFm%HZL_MS?njFA18P>7t_hXuXsHa#a_%lO#TM+SHf5sk=nIQAK{Fu7^nY;WsI{nzI zJOv>2k+m3uwh*X3(&T5*@h9TFODbIvC&73F6m?KV)Ax4KG#gH-6jLpeZ)7?r>!ZbwOBv{HM zOv)ro*f>hqI0`Zf2dRC~W`f{?PzJ{O1V}XmxxoQKLTemw?E^hQ4t%OMlx^TIYTysq zwF;?mAZ+NSA*8EUAxq=XNC9o=QF4%;j*u2Q7b1>aB_Y>7kopKh!Yd$N4R;6|ZQ&ev zWddZO0(5->ay100jF9_5$n_B~($E~dCW7{TzzgKi)@j{B&0M$ zR6uAI6Sy*hR7o=S&@Cb0x(VD%qM$y4l-Q6`mvRzPhSQED*@fWQI%0G|>LZ9si2EUI z2nnx`;8h8vCW6KqjtU4<7Oox6097%NiUunQt$o0C5JUk~1f?>Pv`c~{Ge~^|ISWo2 zB$%XT7b>LXh+H2zD>1mMF-7X2)JM&3Y+YXLJwBWhd^slga&-AIK@g;y)8)t1JSDf$F0^Ur-kbQjfqZqwWBZ;$DA7NPRTPpK%hbV+E;qCP1Y?^%10# z)#uCD54HufKI-!4sPYsjuxIm9WwjIm)kj)_3_3y#y21?lLJS7N45s1?7E%nZiVQv) ztU;Qbff^j1N=(i&3^pPRwxSHdstmFE?Ahkbkou_Ai?7W`s5e-$Gf=$6N2nuEv?EZo zz(F!vo5f#|AxwiY*OIl=k+;r`wb`4i#gn_vnYF}%J0`5=@U2SLZ&} zDVtP?I5G)t#HK*BL&^%OkeEdrX(EvF*)d(pJ{4LMfkhzuC?TYTeX0aZ7G*pTJk~^A z5lCrII}&u|fn7SLo6&5Pf~i3c69^kRR)#!ohQuZ<@a;iIhC(|pkfB{{q!c(sL6u>w zrIE5th9o4=b>23~kO~O0`4rBUw@H=LWD)1}za5XL$w>6^3YC zhD2k=JZq*BM~+%omO3}qHZRsrUyfcF>hWQRAc#o6FI&GaTaPbGw-56KP-Vn4!H0Pw zxUU56<1qL8G4+DFQOr=%4|Gifq~4hjzyL>(TCCq6D$?%{IT)M1Fw23^Tl?#kcnDOm5y+vv_)@5)`}$X8*{6K5_Ss>f%pDrqP$tgWDLqG=(c z=d57l4jPm*43;(ymN5wtHwqUuKv|O@ZWs*e<`@Qu8wJ9Vm|*}EA>FJhsuv7GkZK62 z*8{I^5DdKl(B*fqBNQOwDAf?gSqeg+!{vO0^?Zc&y@d2UA@vdTkZo;05p6#qO&<`_ z@`BVvpejZidb$E~1%$>H)btR9-2;wX6G6_|#;lJ7)!hZv-38R#kdQ0#03Do-T>C)S zkZT|WRU8Dt9U@3IghoPY9840q4#LBRS3PJ9cumBwU;{>0{II=6yowe)3g(bb5Ka=@ zGlGxQK^T~Wb(}J0SSurGYwUw~k~T@=U<9wNKn&{?N$V7dxVTLca+M?j9`1t|An;-Y zQc0na5LqHgcri$%dbl!NrK<$E4nig&DzWz0AcZ_GDa^tjj|f`5Kz%N_n~CWFW#JJa1HRrA!i9%676!HoNZK|HQU_s@a1+o{3xo@)hTx>EZ3=`9 zsgGoA5;5x|O<`t8eH5hws*eh6SxTKaYh0LXT|w8OcKUMk`EmBb$Q~bdNM+RP%Q3-^ z9j!i^=*u$6j}_M6VV(f1j36^ay|DVI-;V)O`}F%W^!YJN@CBiMe~=)Aoao0i!I!be zo1w>>q1OjQc6u{(`7m_*GWPf}wfQi#`7l(tu@~4u>mwr(P;baun!!ef!A6e3PJzKi znZZ#3)KOC90No?Y4!*rllz~B=9aJ9$DKkRqqY7(|W+$d*C#Fs(h8|aj9#4iYcZPNk zhE6ZWY8U=0XTEeR-e^6>Bm;(YQ-)G&=5kxsG8?8+YsMl=wtO?@C}ZJBBOxaP6=y?L zOFdg_1ABQxH$`I)$h?oFVW6~eFsL%ZsEZy{Z8P-TQ#AAzbNNM)pnxP(Yh(-W=g!ODd#P5_;#z^CZ}sfOSr zW_=`}=7y_2f>b&JYOW9l77~0R5n6qOwvQEF6A@YIpeaMEhTvR0^%1BtqF#L@WtE5@ zp;cItHVGIARC$TpB*2k`Z6X{&$_-Gp1)HdXh+rk9?NZRHGYD4_qmhiin8Io}q*{cR zrD)|U(lRaNDe`1UDNREXHYr2>nh0VtxIU7xONVwhVATPn4nig&euRe)G6T}p#U!P` zbr3u*h+@DT0I7rEuEMN|pdzp)4c0h?)IN|TDr1u@ZJi_sMheym3f2iSw(&By@yfRG zlE$HuVhRGh;wl0R>cWgpa-jZElrBS}2}6+$ONA3xtqWV7D|?$KXQvNOpC5OxFV_S= zc5ns6GQp1(j{1FBdiPEWSAVxG%=8&4~!-Ru}lbJ z=?!4$4FD~B08JJ7u}%c7=;od1$35AfW0D{H1bKWdmK>2RY`B zk%57WfkBOn!9;>1ScNHChpWVdq1u|g-I<}&m7&gsxze7Y%7LN5jkVpAq1}_A(Sxhr zjibm(Ak&gP(~2#_k~PnUBhQAT(2Bj#iapbuKhvDw-&EP(R9VZ&UdO~y+Q?7J&{xPX zK-efy*dSckAY9xqSR6bVgf{pF;X->u;K~Rx?*kD5U1kTVT;L?Q*CVFq2S?DEAXt|N zQ7eH}BCkn+uwk{24+aBKA3^4WAYCC+>myz*PhKrhZ3XTSf!1$>87P$zW=#Y+X#uHQ zG2@gq!HPI!jNu4>Wh7;l2+8bl5~DIofYdec z$_R}Ckwqr47A$Z5IiSxBi)Jc(Q*(1Hzj7#ag})&`;gUg<#UBRCh%kg!dLvq5z_Z1fDR zGQt@h;ITMpU4z7y1oyI$NvIm6`Up7{LD=wQ31^_yM^e_YkorhUT#=tgOo6-T;A#fqZ>_-0=Dcw0gl8R4aihkw6a6qBzhhZkDMb z9MeNMW`uHrE=vtzn-Rt`BaCHE6z9xvwkaXZ)5F=OM{!OLV+Ab{4`!Vl#yTY&blLZm zVAd%?oFEj)H#vZJk{|mdKlT~n+|$FjriSrO2;}JYW9<*(oEXB}AH?1023p(J=)uz9 z&Rp)unPJ2bs?HFkf?gka%Ys%iScx$xaWhCUGq8XwBvu9n0VW1@ZU$3H&MXdSL{ zbLJWwjtM@j{oXA7p(0zWt#;Pi%}!L962>(lh^@t&x5b;c#6_skk-x~9Ki`2T-g+fp#wQZUFuEyPMg&&*lR%vshXK-MHc)FfEcB$&wh2vQB9llp<6kvDi{ zq!$3GZbWr`;Yb)(6JcdQ>LB=1ZCv|?z}+QKeI%gc4np9uIL!J8J|85g=?ST2Al)Je z8$$AEdhlpMHWNW_28VZu;0$(h3p=Ju2=xi7C~-n z6;QSpfGyI7S3__H+HwVWO@yoV!4$`=i6A17`iM{78jLK_>LbVy93}}_!vI;aK(I36 zkTE7$AE7Cbu#U%y#H{1t2)RmvlpDm5QntzP@jysr1ShFgA4$R13QB@IV#wvHq#a6q z1gV?gv*c)nt+X9zUJO%^G!bx50o4uY6(y!Dyw1X`P_T+XY8gxtQ!zvhvW@Vn2blqH zAwU?AdIu69k~W}41rQOOBvKs&t#QC2$TblP8`iCcCm{*zLz*~LrP#L8PGN*G5;ODOU4h^YxMXp1ttDl&MgGQ=7%B%3ki+cK0mu~xgX z)O&KY`*C#z@^<)e_5|?u`SW-C@bm`=^alv^`0|2|BkRhoXz&uM_Y`dK z5NPlaX!7Q7^x~`c;;ZxIt@YrkcH^pWWCb00?aW^7#8&G9It05Fd>VGM3(M3nvAMBI zot}J64j@!v%T;Q{o@v68pv@4b#t^8;;49DIBgfz)3qpRf41Tf<9?}f15)5v#Aml8^ z;Hto2A;VxO%Ag_0pvKRjz{8-(%b+dBZ6Gb6BPXUUD=I0(Dager#l)b<2|EABLxDL# zn<34Rq1>FI$_jK!YPADHl|5H)kZgaLR)f2At(RbvzgSD4M4h)#sUvs3HEW?gQ;`F6 zkt0KaJwu@lLy;{*sU1_P9aDilTY)`Wyp2eby||5yzNMv(qKT`rnY*Y_peSr^2+|u8 zF$hAe$JP%7txo{Y2ccI+2LAB#6NK~vgduAWAhi!N8zQ3TE2QfKseLd>0bOr664C|L zJD8^^2x)nOkhZ&!wmbaB2T%p1;VlS9NY@Xct&S7WfY(7F222*TmK$FCpfO-I5n|ak zzp9IXnk!@`2(ot-rB4K}j9@cF$g30}JtMUG2(lCbP72BA z!y>}Qz{d{iB1tiUHYhnsF?cJprRX!H8-XgLDl3LcJBBKIhC(yOG;M}tErvu5&`LRf z8HNZ|hGYYVY;(qZYt{lg#zK3hLI;LI2T)}MuaEL0K;i`|2huzR>BBX8+qCOJP_7uB=EY2HOsC52+-iR41N< z)IN~O7f20)pTx4?8GIrKE?szB0`IOOhasdIg3R3@&*s1f(9juHDblD&$}(BXGFj3h z3Bo`lk+nl=9|$R7orqZ<$$%SI;^uMUW(gnzZIeaJW96;lC5$5!r4&SXh4sZ4EaW-8 z)EIp=S;F-hql{P}=jY^DvE`Dt+bs&jd%uzM=AxvR3cDl_?LatG=O1ZnXFY4OGCamDI! zWf?ORS+lizaI|}Ib$GK(3gw#-B|0gDe`2Umf3QG{k4Tf3P@#iNwv||fu1KI7ueUOr zyF8PtJcFw|gNq!4vn+$N41*htZ7j`XEX^du&cqBJl!FsY970?IQgALa1A`GigRK;( zY70?gC~@Ga_ZIK;7j5+xEO+BBb>XaY=Bsz%FS6oH)@E>%W^fW`aFSr~R%FOD22H4y z*fN&bGM3sil-h%;q(TRVe0zpGJBDHh5Gt}|D7Irrv*k*&I1KO&>6h2 zaXI)*5WlJmv_1ll!a?dCOcF8#2bm9okdVrVU&)Ri+a^{>O$3nzAD#d@KLK(dE5U;l z;Jb%#E|7!I3V~P3L3%yVnh0DaVI7O(1b2(L!Icr7`Up}JK}b;MHIKtZN?InMNr_s- zKoDB31h0(H7~)n5$Ox^vA;?85^D(&uO+Y9BH^Pw%RXAJ1HW{rlf^!F3eFW*z;Hph9 z#o_f4dMyAOC4*>@v`&FAFzX|DC}CwlVg$L?z|TgjZs1%{^#QJ!;37~4j`cRU>LX}! zHA@gPjhC=a6gG>Ivy786jZ&6Tk`NR(m0`11;_*^r^wD4r&|vUWV+d9U9Wk0@z>s6b zQtrTA<;>UOF4W;I)*C3iMJk(gd zG}xULm~13Lqhxks3_;2a;TkMi#tg+a?2RrgZJwO19*jNy920~2CI<8M2MhEC@plGF zw)u&bILqhRN`~tQ2deRUDzdo9FgU@GqcnpHxJq(YWC+&fuvh0(;bCB9U|@vTJs<`n zvVa5+i?O=Aiz2h59BAiJq&9Pr9ap(Ce~lBzlqiKQ%^uU!v}>Jso7{!+&DoN*8A7!f zBlTD#^cW)b8FS1S3aps0)<^mF3`JlB8No}n;Y_jSFfcPTFf-&c^x-q~5!d$@*GCzD zL#~_P)exKkscRs8B6xiS;)2KJgmnBs2((Ds7x`RM%=(B=+mjEYGJ^Dvz*9dS@cIa{ zHXBkOL2d(w_J-8G1mFnPIl@u%leFUj)aMedVpfh}IVU-c0K7v;_1R3PjN4P2@V(KGEH3TQc&0|PHQsyyI zU@elCal|0BYD?TI4%dhuOdNfD4^g3D4E>QNg}Xiim$ZoWf$+*m9I==Xw1yB;_Ck6D za5fr))cz5qPeD8h>9tAOCK8FHtrM^!SUrorS`r)-&{h3tZ1^}TvX^nyK2lck5F}|C z2VvkRG1C$r^>D?QsSCM261PkeHjR_Fj+HWt&{dRFkr1#|W4BRdGnVJokzmsnVAbMh zah78AR^m+3W6m_?s&e3{bKz4SGY^17_<4Ruy`pldMPn@DKWS!fcivk@*sw* z9D}PIgRdN@f{WDSi#8CjkzmkbV^HN_(BK9gUZ>0p8k$q)W-t?G@Ymrhag#4~S4cGA z2~uPTQvsngebD$}hCV})C0kdRQnkBqq7g%)5krzO2qhXbr~H&kquJ`7?s&DmDw=mTeId{v*y|`=h`sa8SB{@>%poRUlDy@7~2n% z0j+>wlR;3fuD`IZKcqf_kkHx(Rv#gEj*!@p86s~;R|uKp*8){QU=lQB2WCL~M}k^z zVB{j8=?qz?Eu`TNMjk@2Y6#bnKG3yrsEZT8l@4Sq&J9-exME{~wzWbwu|jGh2#HxA z!K)!S15y(~Rw+O#BS9rQK_xrnc^?=Xbk_r}4iT=322w-el7)*yCWDaI%fZL%@Ki>S z`iM)$7=qxVb8rT{8iF%Os*J?Jl@X*qLe>Z`A21o1B?hE>1tHkJjW#tua9t5C}`qXDg1>xyO+$s)|qQuN%LC7>-+$=%d zGD*ZN9#S9aDM@L`iJ8eW=!i0iGcyP=Fvv17sB$qlh%@*ob3y8(3Olws7oHAp&_cWZ zAnr+_eEmV3{Xv|)fqWf4oK4;mRn7tdI-Fi=%$_PNUaG9#T5RrWOjhy?`XUTsEDY=n zpq`5W1A_)HgP|CcrxHV;7E_KPs6J|NVD9kX?(t-4cVTF^XXtfhm>$Y9DS%<3FUtfU z<|0Rtcs)jUIR<+%1_ucca+YCml4fv_WU!Y2Atz}D4@m|uX$Ds*25$uhS4DO!F$M!+ z24gV>U2z5jDF#zn784m}e;vMb7qv=nm0Ev|d^@RlJ+2fZ?o?x*3`2%&6UIUd)_gPO zG$V#0JFa{Ou525&LMOg#8;%q+wpe|J6hnq|6NWqsh9VnKeN<}8P+$i_MAk>(O+cXf z2wKaaRXW(X(8>i|@4$LJp!>d&#i49nAMnT<59wA$1V9vNME%Msh2IjyZ+Y zK9HISLP8Ih!?9QnvP1zg3I`z}^^t(0t$-rR89tcx5u}zuCgC*;eg?eW!Nq`#)Ztk# zhg>~zz-l5+X(LY93=zEQL1wT?8A6blSu}oxTD|j6NT53gHHw|2ZfmUw73`Cd)u~)(-7CQp3C&X;4gAD}r5YeRI1vQZj zY$Z8p*%&TyNZ(D;CIJ=**q1xPWzn1m=fV}BF_5citZY%F$_TV>0mpb6#3FboK`Iw1 zt9WD)WHu;ikr0*W(Fd=OM9ibXC|1-W4pcFj#VXjwNSTMJ8@cg_YlCYH&&g#$yqJ6oGKONTF8lPg1uJ425j1LzFZ5YX{K_3mO74*V`E zY}V2Y=2A=+(#*y(YzC6dBJ60RXY5Lx44MMWK5`5ZT1+`6%mo(gjSlQxo&qh-JRR-= zQzBJ5d?jWEvdj!*>Gx&m_GHL+;z=-OP*o69RS=U@5R*}slu?pJLkbFV^7683GTcfM z>=sg>{jd%apu6)xm%7R_ILR?M$}%{~GC0dI1gbJc>T<>E3&rROW|<4;+elQ}a8_7z zG&*tj_=@!hOZEjxbO(vFfi7|aEepTYt4$>KN8gQ5!CZRtB>Fn3Qh*NK0=iR_j8f2V5Dsf;vZ<&X5{MP~8oT+y&Lq#@}4QvqGS{ z2DMf~)JKqm6kwxpp!M6R^%1B~1gV3NNj@bjuX zt%s4aN+c8)psE~M4X*S9Q3I*1;3P;ftV4v>)B#sU(IV!tBIclulDK)CyiK%}d6=$+ zkE(%-yt1y4m;wVMHv=QLG&_qDAGe(}qCTp2W@`54YV&64^keVw=jiffn*hFPq9=f* z)0eTzNwD0W&ry-tMux#kmeod{-Ata-Qi(@JoLg3ag_}=?lShh|ON4`!UzUYIjTf|1 zE=q?b-<++?j<>~`tIt=Y!(CuyhT;E9bGKEyEr{S=7|Am$f^BLrW1b^-x)qDPm4UsL zfvJs=p@p7-nW2$|v5A$bnT@%rwHb(EW@2h;Y;US!q9tjkz-%D}+7Ih24O;NvDi5xV z&OGfNg5Cb29X>*x0YYuQ{B8ci?ExZH z?xK}$BBhRE`PKrNCQMnT%=s1!g;orCHXsD4i6G=4sgIB=9r)Z3To6(Lp^>E4N061+ z@R|t11(S&3I9N>txq--)SJ{~x1tI!G$XDRODBKGlHBf2dRl5 zByxQOTP6p+cnJ9}JgmJTR@&fNT!EP9f|OC@k+8Z9qk(N5BaRJW zwFPTCA5!KM*)77`vjFvv;Omyr7`W;YT;f>WFKLP4eXBT#N=Pk(hZMDdw*)}-J6cTy z;fk9>O@o+;OhRHASp>p{q!u(1Qg%XZz!3M9EK!kewDM z(i_TM>M5S%z^7wmqi13#XXGhs=pkj~DQoH@ZW73=Mv2DMnnWMoifj95s&2jcy#tjta5XQbHy{VisW{ zrr`pH!NSH7!p0FoM&Uw6;i5*7U=%KD6fS8LF02zMui-4C>Y$^bq#_}uC&8gF$!Vd$ z>ZHYQBFhfik;KNJz{X%B#_FcPAF0U|uP;z&##nB{-s#TOaK|~;Z9LOG3@MI8VaRPJ&0=NPKcXPl;CBV6; zH4&&jg7knO<8lxZA_5_i`$6#eAP@uGSAx_;JUEtcL&oSJbr6iS=7rsP2k#Jp88+NV z=ba)~G!Qn{Dux(A`iMiy5Dnp}j382wOf3c;rh{i~TnxBk zF|%k$iGfxhp>fgbBd`|2^%1D>fsO5ewIHf0!dx8d)v;Da;?^;c(h`$|TZATsrzA&{ zf)w4jNEyontO#E3;H+3tJ&5K!I2ThpL<-H3m|P<3BY5~wg@IfXLD*=MLy*l~ks&~fA=waw@=QV7g^H~hiYytL zT}A301#y%dbhu-!rG!m`g-n7VcLNC+gu;<9_%t42gHR#;5HbA_af46|Lmw?;KYL?SV|7g% zH8C><0Yga!JuwCuRtA0s1~CQ(ITi*Z9tL|!jxaU$C@t_^IAgA4W9Bq7wj4{&Tq~|TE0#PfmV8Twd`pI6 zONJ6FhCFMgJZmP%dIo~^5w1!Hk2rWyoF~@G2z;g-=tu-ey@QK{)JKq-2tq?eMIE?ZFuJh(m#UtgCGpF z%7{bCP{K5dBqU`Tg@z#7;e`Mi15yE@35uCPyGsyR5pz(#39W>o5f`}zL}sJaN1~Qd z5CnGyq_~EYaB&C&A&$Df9kV=#i$H23td)+8Wr7?Af~dr-M{tQioCNU~gpEvM>Vn9^ zorlH{wTMGQ(3NahHVcr_JOFop#If~jAZZ2~yco?Ph%6=vNsVw4(r^+ti4*}NQOhVn z(=a8=P%cGVB>^Ta2`*1%248iCG((0QbI|tHVk?FcYql&yhDap_A5jKZAqFRY20IZ3 zA0-AGH4#r^MQP(8Ny7jkqi|v42*_zUkb6JB=rwauNO)s3OffwjtpG0Q?Q!;C-0 zR5-;{IN6dn%|;;GhCkbeKhK&w*NUsak}cPaIoF&a*PJ2Kf+5G6Da(dA%ZAz3SliZE zTU6IaRM$s9&r<+)PATTu2)HUB$gB{A#8n4D#9=G6(XU&DtVw{^P0*@G9kwh%!v%6W z9HcUWut6m1RdW|}2&F!PY+}V#9}!a-amkv)>m6JSE>N8%=t zcn~~a!x?Cmkrb>t!z>vfBEn`-!e)peK{2yvNUIfI6Tuk}^+b}8Vu}nBxeh{M!zv(1 zH3TQ&RzQktI0+YrFc9J-)<;t2aafUzc^n#oD1+A{D2|713PTozI1J(u2pgG1RtaH4 zWFeJ~h;ffX^f*LN`lH6JnT!GpQna-ju{=(I6Tuq+5^=@2Mjy$C{?73$A@fwW&G7LVF z3=U!pj^Yev3VhzC%F@O`(#Amo2BDylG?QR4(-6q5+&pIC+@@jj=6>S(ZZZ4Q>h9pCVWK)Jza|TO& zbx3_AqT>y(k8o8zXyW+$LC~E+h-KK&a}wb74x9n0kI<_jaD4=-Wxz+3f+`?1h8lrE zIe3)>sd11=NKFJGA>vf2k6_CZpljzKH4d(=tPoi^$t7=zr#>QB8R4pZ;Nl$8rVxZ& z`#{)Os~$8#NWFuLgjYsflKRMb7C#%3$1zFBupWej)IJarSp>p{)I<;xOL>Ai(*&=l zureS;C}<%BtbvVO#DUlrQQ~L_s*1Nq9JTZoh5A^DPsQN=B%q4Zmtw#qaJIm7Hh2* zWv&)(qU@}#Vy~`Xpsa4Bs;QxAuBB}$Z|I?5coyu`YL#5w~+dc62sojG#ALvTe7 zT*Z#u)h>ed9-<9iLJeL*jh=$_?)-HwJasNSg*IGSrp$p_T;3`yzG@7CT8xHTiWY_% zl6oFu+O9&-Q{x~zgFs8;AiW+mQV=#1gr5O9ApsXDq~QU+NC>hz4xNP8O`!S++z;YW zcST0rYA#Tu?!*m4kaOgS8H{W`HJ%lb%2qszpuP{J z0)p=tLS{e)<#-jW_!TYS2wwX@RxF?m$3eK@iV0dbamib9$eLqC>@sE$1g~+hGN5&k zv?-UgDWvvclQCw4o&3WtWr9FR%ef(AaS#$$9Rv~Ql+Z&%kUWJ<;;)r3Wg(>jgoM;Y z5E5Ag!WK1+f)ogFF1&=n9Kpj{4Iv9cN??cySV;3oF*F2Sral1m5uREAQvPF-khRK~ zBupjrlnh89;2|Ns3`_?ghZwRfd}SI&y#oq2O!pw017Snz4oJ;{Ov0-h$b=4ri(K_U z*l<}07oJoIGLY*dNs~xP@PrZA5S02z#3T|_ahQZDnguDE1^HPRc$?}58_NY7%Y~aN zM3^Z?nyH4HD0>=ec^PTj8kjg3o10s>nOnH2nE5Iidm-0HqDJAu2BD&01icXz%mxV> zg^L;ng9O31FvuDP$bu^)c^D~e7$60zR{Y`h5%dgF)Y?Z#-w(Maf^Z>c!9nik7S!_* z0_{Te71r~S)^`Ifv`|#$;Ni2DWpGpmozD=e&XA%Bnzza}0IjA-RcDBnWeAjIh)`jO zQ~(`eS8mEsVaAYQ&X8^fI;5i1nYYSWpw3;S(ObCDTe#Umx2@doHA5I>-bNoP>?VA;#-?lq~VrNAQ{m!hlys z{E8OH^%11@!6Z3k&B0ZYHHR#EWdx~!&`7M+5L^&feFWV)1g?+RB#l7`wt)yz4Z%rB zy@N)wOBz5BT4ls3p(hGjzyn&k17{%DO>j1R1YX2694itx4M#%~W)Tu*5txca%_2n2 zB81JMNXRT4&K9wVM6Pj&VMEG5EF@ZG1m}WlB`oSm5l1e-DP%*c65@OcTEGmck05;z zOkW_I1C@n%oM_)6*GJfDACMbR#nJb#K`IV#MSx{$64IW5EHHrdSuj;X)IdmxX}Cy8 z9fVAZn?#BkM?mx*PFw)`JXy+aX>-mU)yGVk1KD=6nxE@%%CT8)Fuh14=|68Gqw2U<-8;esn8cktFBXy*vtCxY)Af~-}5^m-t* z5;6&?ec+W5rz)tM1M2BO&XhwUA!`!2lpL`lcy9<^2O%@yJtJ_32r>`{=@}uBN|roI zmS6=SLF7sY)DME(1`Z}6QsC;w5?&Lbjm2@unSu~Tw+O8of^#8N4~MiVguy9e2C0LP zNl@nq)<1&OM3CV)v|0wt)#s3cB6dkVc1b-hNka(YlrVrZI3)~V2)%y0uE=;wq^iwH_u3Ev1PrpNUrw zshEgY4^fF+gWzXFY5`b%fnEz>ng;2oh?z%cigBo@aj3XSu!L!dq)9NON)k5;ls5{MH3*gjS4ooKh1L*_5EG!8 z1$_M+a!mvpaD%NnfQXBOZ|j!O4+0_ZiK7w*0pj}pkTEyV#E@YCqyoZKA3^#-@M;Kr z5(45h9(a8upzAGc;4fhsqNHUmBBN<8$>65JUEw0oBQ<`yr`W_Ok*H|9oHraC8v zTsy8bb7mK9Q3rKFeN`DF4S6Lka}^y+P-Ud$DWc^Gua96mRZ*77!D}C47?64g7YVy! z)fH0d2!IFWARQt&3Atz$YkdS7Y6Fk65v-3Ol@Sk)%7{zJk(l}jR@WfvBS_7{tpFOh z<5si)A?!=Itsu97Lr*RRO%hpBw?0Cycd)YArA#;>2?SsBr8&VlTY9a^; zsdu;}4IvB+Qjc9s2N{Wi^CL1TW)g-6K`J9;1>&aRaIU0jI2wXfOt|VJNM!^kh0P<7 z%QTAE;9eMJeMFI&u)SK4qMJezGM0tQ5KMPM3Ug%jm}-z~aHuS}iwb4K)iJqd*8-6uRpNy!Qv3N5oBn#X$-JM2(}zBaauX1w4#G-8sv#lp6{zsaNJQUH2zJCMWY-Tw3ZfWtLV}d8gSeKh zhNhmHx{ig8ij9FrpqWyzg-W=ELb!#3tDdrhrks|NjJ&v@f*`BB0E?*@gQF5hlpaI8 zF>{eELxnR#xdTIuD?_CNL#+!#vnx}xD^r&TLx&qfu04O61&5KAu923msHUB$rk$X= zn~;XPh=v~sX?clgc?oNJih#Bbc|f)WA(QYG2&fF`LIqq^4@4ZcIS6_@0=yc6GvL(_ ze2pA*HQNU4(gv9I4y5)$CRxRGSjBbVBXaCw2sW}Ha%IFWrUS`S$Ru*z z1ZTr*9L)Mi%p?r13^Fp0OhPn5bU{cF({N#vFhNrgg4aZF2E4Q($N&|9rl1Ka>?(1L zD?)S)1`=KiAUg))7i1AQ8=@X<0uC+A+JkdXRFP|p{v%U4j(N7leuM&HT6(9FQl%*H^& zQdiZ-P}bW(##c|wQ%lHIPuW>VQD0qAQ&C!mmr+5G)m(zXMwU5RpCQSNt;C+8#+|v| zgQ?kvZC$D1!VHzZ0N!q2u09{;HWyHRlx4$Xre|oPqc5syC#Gc&sgH!!eL)CZ6A5d2 z3WHWGps&hCt%Ka*!)pQ>*hl2hDj+m2yc&Wt;MEX(P!3WbK@QDEu7)6Nv}y>>MXP|g z6&(m5*wqj4O+%1X3NR8{AA#x}1j%jl@Wvl zO&B5FJ;W|+0z$Cm3ivA{Ht?!B$eILD#RRU8;MEYE0k4c8^${EDh#b5QLaU@8^%11H zfmcAB5_%93TqJT$1YyG~7c>TRr=DRjgaIdoO~T-8NbLh5A+-{O4XKHcNl1N!ObVNT zHm8G%HE@9jVL*x+WD+6{Cy}L4*cjuA5ZmDziDaOapAasj497}B6yPDzbU{jcXsrS6 z%V72;AtKP8CU{jdR2R5fK{gOs98!Z|m4*0S7&gy>>{^Hvy!Qby1nXFth)J{vc-g8k3er3yByHd)A+aW89139|lkilB%m7tAu=yR#`bZR9D?wHu zKuPeb1IXAKE>gfK5QGea1wcE4K$AYuwb|gIHb|ccLc%K-2p2*k&;CHVMQ{=_LjQ}M_AuWSl>$!j38_=T~Bd6F9BU|9!+-vZBHRx zZ^$koF@0}QJud-mJ6;WINj^?-9(D&w(D~tEx(ty93>lUTdG-uBHlW*%dLoqCgJo;o z`O6$Qs-2mtoEYP*L}M(3RrMTH^&EIK-NDF%R||T75M=!}ghZ=-AY6DIgq4BZ!6C?o zjKx7}95j+w*$E;dsOH44;sB|SAS9$F;#P5h*FKmG7%@8BZ26|9ge8ZLQDNbQ46LdNTO6cAV7@yJ{9Dp)~!Mp(``l`(_Q2SFI1>IU9F z05H>^{ULHaiC{+x2ngAjWCovU3MR3$dP+721 z#IQkyEx3TC76ZA0kbw8^AUzAbB&eu24ack?kR1jO7C3`=S%^xwMtI=BMKBmx>LXC) zg1ibEymA0ESp!}SAYmK@_k%Mabr3QMQ3)Yo zXYY7}D;>}^K;Y^L%7D~ILb_mCSbZd9;4Na{Eu`lurVB!_Gkm<^=e!AOxryj{3h6rV zY1+sN@k;S?IZ1TmJ`zy(g49IdBehXFM_6kgOhJNm5V8VDjf0hh47h=6B^3wE z`UtZELKC5SeFR#QpkM{BjG%oV`1N*3Q$o zH~4^4ZW%M^8KR%Q40Y`D-m~<1TP}sMG7WE7;FgWoGx6o2GQd15{y^|xU__f z=MY0eN@)^Er1n3g2Ek84`aUF(;Lam1_h3{Rpv}(ku`*0q%s_+pQsMOxCIe~?xb}pu z96+vOz#$Cn4T0AWAoq_jZG6VtGj(6Wb|mCdj2CZOTYuLU~82lFtF#fMK7y>xhAV(@kxA745$IwF%sL271X3A+DkjwB3TU+wT4h91eFVw%{02dg zh9ab5!bJ*!r+~2*BM@^S8sU`}gaPRykwjv(0*@dubr3-{xT*tC;f!1|V`HOLPnb0k ze(yu-0=NUvoQhm`KvW_tK$eBDF>42??ZosS!EFYp`N%$n)klzO2${s%KY~OSBvrtx zAvgmr3ui!7LRv=( zfUz=QLB~)>e}-)KQYqvX|6$ zfYe8j8V6nz@oBoFoEHZ^Bmq+I2&%im>l$PRq}PLI{0&|QAv2I`BFJ4rplS%dNSjc7 zgjN$lxR~`3kAgixL`;2zwE{vD1XVOB)eyMq0f`V*A3>L7OPj(gAT$QNx`8v$>LcU| z2#t+aAF+aV4}sQlqt!vkTw>}Y%*u$Q`UsNKA?-ut5&#byQihH-`jq-KHC zAdq#}q6U8G2wWc_*Gdpw(7Fjymm!V3LFyn(60$rSQokVAJCOQFK*vu|*B=={>K90D z0%1dHB^c=~q60^u+6tmZNXrYX050V%q#Yon9RR6`AUApoYPkq%x$tYd@oBm8Xt{D} zI16dm^Q+p(Dj6uK8XIb98fj~pYAYLSDjI7j8>uU4tEp?Lswt^zDXVEqshP@ZS_o;_ z3TxW&XgTv}IrC|`@@ct3Y99!RIRpo(iI8g_JZ$9ZhA>;%2~-V%N96cb96;Bng6kmU z-Vo$$1Y{978&dUfqjr%Xf{^|Z!D zBqU#Bl8~GZ5l1T@$lyZiBQz3T6QPNN#_e&85F&~}+&wb-s3Ez`2bbrV^$so(i1`p% z2wRNe`Uq>tfEG-G>lA1;hqQhY646+7K?Fex!0Xu{^$~QM1~x|nshH4eA4s}F6NGa? z27;GW!x@kih>L`nhDkzd6JhWW9Fz^NQy^n&BH$x;L=F7l4A5YmUVsP~3F!sE5hSNT zM&U4PA50Nq>LURiKL`@k^@r9kNVn91D;ghojRW0qs^tk0ha3tAIg`r8CQwTRQb!PP8uI!>h+h08#AGxz5JBuDq)wq~H4P75>^6ee zMZ?!ugDN9fO$4c5ApIkFS|Wx4I_w&Jlqfu1;b%b1grsW72|N%IB7#DK>m%6e0#N@* z1bpxglmypAkorhOFF;t=A5tYjO#q*q0I7W-H4cOgCm|goI0>m`AY7DQ60CD1paa@i z1W|@eg2w2yydc#LnDm6#M_3smI-tc20y^#jnttH=$dgygLsS!lAUDP#*GDK75Ylo5kT}x1IINWpyvD&?x_~SNuZhs=Ajrvbkei0!H4YjBQn_G~@V*ZR z(t$tV5jp6QKFF03ygp);)MFOcK}N`R5UaQjiefve!9TpUt3!xxniX&|INB1$n?^$}#98}jT7#4bG1fvgy$4BS5gRYnG( zkZK6AT@=Rv8l)1zO5(45;Ifcziy%_73R-bs6M@t$keMA=4FWp<4O$aHY9*xlNLbfj z7+i5cHR=Vx^+Ku`NDTsEVmxo*cjWqrThkl4KH^mO zV_3G9A^L_k;ReOm^BfkR>D;u;i-u*D zAm_`0j+=wjN|1U7QU@`M>%kGLxE>fGdPb1`5vQm&C+ZF($k+wE zu7NYCTOWZ6G*ZTANL7zkep1Lqt82*MV)jSyI|*9Xz-lpkiXr_J)Dbgu7b4X}kh%#z zJ_a`#62ed}YJCK`JPs-aZbg6;6I~y{(;Hg#K?zq-KTv?U^bG0az^WKfO@yaDf^0#B zR5T*`eh>yGiM$R0A`7pNKvfcI9R#n8Ablb{^%0~J0h2`4N8suSQt5zh_lER|AV>E= z&W?lBGW;4Y0-CNu8lVaYvp#~1!okE{`P5v6zzA}21ElW*se_QuMj*95f>%bM9Yc^& zIAteDrGtkARYppVcq$`)=r&d8J$H~Q2}fmQ3){PD3pzXjHV_A?ZU~aF3J7w*DGBuv z{JwAGItZB!KiU*>f*hyLz09 zBS;;DOp;n3L24PINbpfwLV5wh`hk#HUyTDjhKC z4(aB=N$5-vcwsi=XgJifvfW`^P$ea(?gkmD6Hs##P;=u^cjZ=tUbhPA8Nmmsdtb`NF4-U1&8Z;JMba|8&GFQ-i}w^4#a>i zQ~(Pq*ud%?P>sWj!bQ|XT(XuJ$Q;r$;+8Rk^pD^hg%Au`6D}EJ1Q)z$0aW`)n?kA^ z5F1<*aZ4G2kpZ`q0hnRLEos6nX~HF83}+y(mqTSkkKN{w(#Kp52dQfy^$v`LS4Pki zejukSK>A0hB&cV^E~dvWriXuY4tao%LqrQJBB~~WjHQEfE~Gv}&a=?$kI^Uv6-xSn z_-h}^Wr-;s>8XZb1%##mQmudn?g-U%5S3IW4MHK8Uqd=aq6VQt`XPdP!N|*6An}MC zh7b`5i7XChgQT!kF_7qnCK&K=87?;Bj2+mzYe+&xR7?i`$cYei&?wkINL@n=iB=y$ zxM(D#mO&;_IyFdD5@bgzwDy72N}4`m5QerNWXuiJMFKC(M&d$de<0ICAhv)uXb28U zYCvlfaJ>WV86mU5wG8MyQb^5$jf5_HgLHQI)j@R%q>2Hrhy&dy1gUEv{T&z^RQo_g zV7(!b2;>$atS4r}>myJ%2Rt7HshH4e96Veeq~SPtrGw0X*GHh52)q{+x!&Q0UG@!U zKx!fgNlbmjBX13=hRCgtFlr)5-Govfp;bCiF1S7dRXX6oH)IBKeZ(mZsf>)kl@X#o z;*u~%MyPcV^7-4)`UtHuf*hoPt3HApf(lua03jhY5u^tMTeqOgBBskG3PC7`{~%XS zXhU;ol@YtJCIms|YKbACMTcGhXjLjW??XkvYyrIhv~dzpp+txJ2x%uH3B4*xbm8rh zfd;Hd8E*t#3<^Fd0CEKn9un$1ytcz@1acVQTd<+AMoe!PtOj{qG)k2L?!Q3$I50L^ z4MGf8OwSL5bOXd-NL0sPRL37A2$6!3klqcXCKA^3MefKTS0c!4wAu%m3$KGO+bUXrNEgbabZlntuAoUJb5;VD^>H@E8AoUS4373K}xFL(N!SxZS zVp4O092|$0gfGknF_avMtdF3T5lYVpdfOdr*v%SJ2Vs)nP7wOSY`7rgL^x2Dgw!(v zcZ(1`ATSryMFKM*;-Eu&;FCmH8Qd~vT+*iS+6OWy2O%NzL7dV?5C)ut47lG7o3_mFEM2ph9L zg3b)|02>jX(+PCtYA4(bxTwv$>7A@_qss8k<8 zYbA{S3o*41rW#1S0I!V1bORuL5QsQ338_s$i`I~;4CLAeD;uH%>*XM|40!4XR3-6gVyTHBbr7Vp17Q=aj|9{} zm64#j8)Sry4)qa_k|VSq1m2?xsf>7(?coei1%$R^2v#3Ks~br72bJVkbl``JAk|0k zY6x89J6Ic1^}tseU}Zq+BXD(tsCU4%45Ytf36TQ#hCqEEuoQL% zr;Hiod^u=M1RjWkS3__HybeNUz-uB11707&sv&(C1ABc0>F>Zu^s^M;H4&Tv8Eu1+ zNEZ^pDLX~~gftot6$dYi z<=6G+*Y$_iP&BTOh_B-yr8*5saDRbd1%#%6`1%OyaYzm_MTCTK_vJevbyKr0$Z1q7*9;3Ry02Nwf8 z&W0F;L#>aXRS)vvJZS5#p`&f!D}f-L8e|eO$c9GpDm#HLutOuk_u0Wu%!V*PH+O?; z7D#;rsdq4!WJAP}NzefcN_KcEBVI)Z$O<}MMF-H%AaGp+u6iI#<3J?%P6zM}ci?;O zcoeN6B4849*8|qN23{+HCVD_C;^5T~lmV-8p!E^BCW5fR)ez`NANbim2nI?W1gVK2 zq^t=yWPlDlC4^iDAurnoS3?Gz(oh7ck6?8WyfV^<+_(xQ!M!AQ2_p~!9q9vKEQeAb z!RsJo2BhPILPBdI4K^_iQ0GWY3yowI)kK~TViwh67S&=A)&x~UqMEEQ#38B)LLyr1 z!kQdl5?TR?Xs}~LB-KZtIb3j!Lri@HsgOetLT5oDz_gyhxofFnrdf=NPFvLRJQC}&^8t06c8S2Y9?hfMuI z`bQ9w*7XsjW+9_K0v(hM?gSC6kGSET9eDQ#8v{}wfhr>fd)QbUs2YOTO5l12>0Tj} z8&?U}N1QTd;JOJ~4Po?yAaxKj39XMHOSRFq5uw#bkXa$nz#Wci2(2;#PXj?`d^n_# zDgTp39i!FooJDhX5{A=f@=Y!)%lqz_7EB&NeIt_`k$u-8Y> z>IUhYA4q)!s*FUmpt7QxEbtl!Ha!HbjD$6@Rz~b18j$=*JP8>OL9S(>MGCmAK<*_$ z*%a3}$aZ0}p|&HPicDP*+~u>ckUkm$?PtNQ9$i*I-%k)$(TGxMbPKKI2lX$&Rv?Q* z=2;*jkog@58$uHAeuzqt71#nFmpBz-N7oLy$EIu$l-mA_uCE;MEY9yd|8C#=tpJ2R_M%Pr;l|0k%I#9#kK}IziwX zN7fv?707~9#)4bMf=AYpN7j-X#5P0R?|`v=719ZU>|KShk-I|NQbs(|h|NUc-jES8 ziK9LOkIqSgNNh7cpt=cC6M?6J40t3!h)YZljC8ofuprF!*lc22u*yga+&O|&NgD84 z2F?I=d_+OL9u9cL1nw5Gh^VoMsG-z2$n_DtlETKo%%ONhKpQ2%yUQW95`=`529Q=O zv<`ywp&;ZCua7WmK$wUR5w3<0l35{RF^AKttA&L8=2{NRorRN!R^5H^B@j4D%HA3^#!)T)o*^$3Ik zuZ-Xo3Pc=E!fPVP6|0adt)X&X5`hMq;UtFe@Xp`UtJp!wXx7 z0Ga5491RCyL+ToWBqyw=1Fw}p^$Su3#G_~ptA^mUk{!H?0V}qH_JGK$k04bNghamQ z4&3p9ErH>iTQtioy{NIwWvAAu_)Gfo+3eFUzFKot|D2L$OHVUm#A z2XZ_D=5hrtNkeWalnMyBGUAjr!e1Xjsv9H{vH}57A3GSWk?j39LoG6|m; z0x?jA<}_GE)nN!R3J0l@FzX{05e+aRTpzIstFsBKqtsd8>IpU{3%VhN7&h#zdv6Q| z;`Dn+Jp~~_O<70*LL5nCc{OO1fo`IK?dS)wA!RkB+$N3$b+V9a1pI9BJPt7vt6qo{ zoJ3wBKnxqJnfL_}H4Bb91+G9?&sS8}2l<##FdMYg4YLP?yq{D^%L~4Q0KAP9w5|YB z0iltQ`UtZhgi8due*{?t2O;703mOA!ox%@35&_b|K_hXghlqnJBQ;k6b!Q~xD4_0$ z#Dz-ngZnuA;1zBVHe}?DSJ@eioM2oS18MC6qz;1AN02?I5E5R?pfMn_0?PKtyHK(2 z27=T$FcMTH@hjQzDOzLe`#^d?2ojY6!ka8+p7A)F(o!fWTD`Xum3m0Uo%6?pg&Ok^tHY#18Eiq0~pnbr6IN z9+X3$8-fc$>K!Nv=>&m$K+u{9Ha7%cG6$}VG}u9%BWV8!Yo!CJjIff#)IrEEyI_w&2eI%p>y`>aV2O*Pa)ewXWUipS)s7*k_9a0~` z84#|ZCg`A1$W#s*iB=5}?C+peLvSvn-hr;oMp~5(sfi#Ya_z&f<^V=ckV*%Qgw!lh z61*52Tp2l|S4PTM7G^{4ae&t~0AWOBu^GAl5l@X*91STOX;y~39p85z< z0ilubst1h$t%Ja|50p))K4KRJO&EdajMTtY5@N*y)~W}qAfC}V98#XhTgc&TTqTTv zmJdk?mpMa49MUT!SQEh&z|ZhRuGH|dNvo>h+i-}i+Te8!@)9*T8zKwmf|d;s>f=xv zaj=s@Asrbc61w0Gtzv@oU*OdToBCLt9HTmgK94L+oXl>yO(M)GQa7Pi5L&Y;sb zkSYdJgTPP2hF3QbS;#^-2pggut{xWyG^z&c`+#d7ob?g38iI9vpeMqC&%}oGebBl& z{3;HDDh}{^2U!+UD?wMm!3O1cQBRG7Z$yPxKxhm|bpuxhS%(0xkNCioL_CV%LwjH= zwBeNzH`1OUaD9YQ6QNBH@yT2AfmX^P2IwGl6NH4U!G_i~;CUZtj|RL&2v!q;M(3bv zz^9l(Yag&0(6R*h-c_Xa3CQ&kqymDR)dTPT!0RK(cpbNt5tk&qH-yPR9Hf9&8F5O0 zM&#H;b-|Sp^!x+}8?!#*6x9VI&^dBwl@aJm&Fo1LMbihvDG8s@iAY_N(Z?DLSvKR z4D9Jd8%z2D84f$Y8mT4%?H)z$`@q@Y{t;;N4y1B{)HRT51+4~wbCK0RRN^P$8X*kG zf;VF7BeePjQu|<%Xv)yIkhR(1Y6#vT!d@T2Dm9fVgn_H? z0}+RiNEHoYkPT8LL24pMX;TP-*GK3Ky!8>gv=OVM0eln=xjup}(#BmMK{`he61hHt)H|po zdjAM=S^~Hlf>cJjm^BeZ1i4#8ph_aQKEhh>;1VRhCW5HMsD|L(B`oC$sN@-u^%10( zM34lp2Y^;OxCcVvBcc!n4XY&3(mR|dt$}83&?+NH{|FSz7_|v#=?jJ+R+ab#(W)T` z7hH)Tj%$P0M}nHr6Rk1p6iD@fNkUZetGhuM5E9OXGa%vupz9c*N4i03A|7?<9jTZ# z3q%C1LVDGO>sV{7W;62~+FzX&dKaETMFgU}Sf`$vPJJ`w^g9)Q+6XyafIE~Gw! zupuP=(4$-yQUSrYmx5~|$czv6`UtYR0A78-8JIN@L( z!*pm>47`3pW5AW6?^i`Sp9gDY1Ui!kHUtN*dT`c9yebaJRT3HX5v2BkkITWiunNT% zYrVsxh*KP{5zGMXSA|q1B-KaYDhae!0iqX9LMj~)SJoU-A3;RWNc{B?xEg{~M&^*7 z5rjmmh9D=)fk_n;cufRhAlFCmItZBoR{*PqAe)8^AT<$$g!GTFRz?F@ zA3?_AD6fxLh1B4P_?if&5;PuyJPr?I<5fs?kus(v+LC7RrFYydmLc)cI4>aLiJ zt06TGq)tJrjNmucVd{dY$3;R`XoIUEc!!Am`UqaXKp1?myM!QIWRhC_Bgj}BxH7VZ zY!QMjhXW175!pWica+fQhL9^A$VuABBFJnC>myJd1i9Y +#include +#include +#include +#include +#if !defined(PLATFORM_WINDOWS) +#include +#endif + +#include + #include "tensorflow/core/lib/strings/scanner.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/env.h" namespace tensorflow { namespace io { @@ -60,8 +74,7 @@ std::pair SplitPath(StringPiece uri) { auto pos = path.rfind('/'); #ifdef PLATFORM_WINDOWS - if (pos == StringPiece::npos) - pos = path.rfind('\\'); + if (pos == StringPiece::npos) pos = path.rfind('\\'); #endif // Handle the case with no '/' in 'path'. if (pos == StringPiece::npos) @@ -112,7 +125,7 @@ StringPiece Extension(StringPiece path) { string CleanPath(StringPiece unclean_path) { string path = unclean_path.ToString(); - const char *src = path.c_str(); + const char* src = path.c_str(); string::iterator dst = path.begin(); // Check for absolute path and determine initial backtrack limit. @@ -229,5 +242,52 @@ string CreateURI(StringPiece scheme, StringPiece host, StringPiece path) { return strings::StrCat(scheme, "://", host, path); } +// Returns a unique number every time it is called. +int64 UniqueId() { + static mutex mu(LINKER_INITIALIZED); + static int64 id = 0; + mutex_lock l(mu); + return ++id; +} + +string GetTempFilename(const string& extension) { +#if defined(PLATFORM_WINDOWS) || defined(__ANDROID__) + LOG(FATAL) << "GetTempFilename is not implemented in this platform."; +#else + for (const char* dir : std::vector( + {getenv("TEST_TMPDIR"), getenv("TMPDIR"), getenv("TMP"), "/tmp"})) { + if (!dir || !dir[0]) { + continue; + } + struct stat statbuf; + if (!stat(dir, &statbuf) && S_ISDIR(statbuf.st_mode)) { + // UniqueId is added here because mkstemps is not as thread safe as it + // looks. https://github.com/tensorflow/tensorflow/issues/5804 shows + // the problem. + string tmp_filepath; + int fd; + if (extension.length()) { + tmp_filepath = io::JoinPath( + dir, strings::StrCat("tmp_file_tensorflow_", UniqueId(), "_XXXXXX.", + extension)); + fd = mkstemps(&tmp_filepath[0], extension.length() + 1); + } else { + tmp_filepath = io::JoinPath( + dir, + strings::StrCat("tmp_file_tensorflow_", UniqueId(), "_XXXXXX")); + fd = mkstemp(&tmp_filepath[0]); + } + if (fd < 0) { + LOG(FATAL) << "Failed to create temp file."; + } else { + close(fd); + return tmp_filepath; + } + } + } + LOG(FATAL) << "No temp directory found."; +#endif +} + } // namespace io } // namespace tensorflow diff --git a/tensorflow/core/lib/io/path.h b/tensorflow/core/lib/io/path.h index 955098f5b5..93151efcbe 100644 --- a/tensorflow/core/lib/io/path.h +++ b/tensorflow/core/lib/io/path.h @@ -89,6 +89,9 @@ void ParseURI(StringPiece uri, StringPiece* scheme, StringPiece* host, // return the path. string CreateURI(StringPiece scheme, StringPiece host, StringPiece path); +// Creates a temporary file name with an extension. +string GetTempFilename(const string& extension); + } // namespace io } // namespace tensorflow -- GitLab From a9bd6d67022664db5ccd8128df46651d1d14ee8d Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Mon, 13 Nov 2017 22:13:54 -0800 Subject: [PATCH 0385/1801] Fix Python auto configure build support (#14467) 7ccfbdf caused our Python autoconf to no longer be auto. This was likely due to the evil helper _get_env_var() which has now been removed. This change blocks tensorflow/tensorboard#719 --- third_party/py/python_configure.bzl | 91 ++++++++++++----------------- 1 file changed, 36 insertions(+), 55 deletions(-) diff --git a/third_party/py/python_configure.bzl b/third_party/py/python_configure.bzl index bbc07905fc..c16eb3a12a 100644 --- a/third_party/py/python_configure.bzl +++ b/third_party/py/python_configure.bzl @@ -1,11 +1,8 @@ -# -*- Python -*- """Repository rule for Python autoconfiguration. `python_configure` depends on the following environment variables: - * `NUMPY_INCLUDE_PATH`: Location of Numpy libraries. * `PYTHON_BIN_PATH`: location of python binary. - * `PYTHON_INCLUDE_PATH`: Location of python binaries. * `PYTHON_LIB_PATH`: Location of python libraries. """ @@ -23,32 +20,13 @@ def _tpl(repository_ctx, tpl, substitutions={}, out=None): substitutions) -def _python_configure_warning(msg): - """Output warning message during auto configuration.""" - yellow = "\033[1;33m" - no_color = "\033[0m" - print("%sPython Configuration Warning:%s %s" % (yellow, no_color, msg)) - - -def _python_configure_fail(msg): +def _fail(msg): """Output failure message when auto configuration fails.""" red = "\033[0;31m" no_color = "\033[0m" fail("%sPython Configuration Error:%s %s\n" % (red, no_color, msg)) -def _get_env_var(repository_ctx, name, default = None, enable_warning = True): - """Find an environment variable in system path.""" - if name in repository_ctx.os.environ: - return repository_ctx.os.environ[name] - if default != None: - if enable_warning: - _python_configure_warning( - "'%s' environment variable is not set, using '%s' as default" % (name, default)) - return default - _python_configure_fail("'%s' environment variable is not set" % name) - - def _is_windows(repository_ctx): """Returns true if the host operating system is windows.""" os_name = repository_ctx.os.name.lower() @@ -73,11 +51,10 @@ def _execute(repository_ctx, cmdline, error_msg=None, error_details=None, """ result = repository_ctx.execute(cmdline) if result.stderr or not (empty_stdout_fine or result.stdout): - _python_configure_fail( - "\n".join([ - error_msg.strip() if error_msg else "Repository command failed", - result.stderr.strip(), - error_details if error_details else ""])) + _fail("\n".join([ + error_msg.strip() if error_msg else "Repository command failed", + result.stderr.strip(), + error_details if error_details else ""])) return result @@ -163,21 +140,23 @@ def _symlink_genrule_for_dir(repository_ctx, src_dir, dest_dir, genrule_name, def _get_python_bin(repository_ctx): """Gets the python bin path.""" - python_bin = _get_env_var(repository_ctx, _PYTHON_BIN_PATH, - None, False) + python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH) if python_bin != None: return python_bin python_bin_path = repository_ctx.which("python") if python_bin_path != None: return str(python_bin_path) - path = _get_env_var(repository_ctx, "PATH") - _python_configure_fail("Cannot find python in PATH, please make sure " + - "python is installed and add its directory in PATH, or set the " + - "environment variable PYTHON_BIN_PATH.\nPATH=%s" % (path)) + _fail("Cannot find python in PATH, please make sure " + + "python is installed and add its directory in PATH, or --define " + + "%s='/something/else'.\nPATH=%s" % ( + _PYTHON_BIN_PATH, repository_ctx.os.environ.get("PATH", ""))) def _get_python_lib(repository_ctx, python_bin): """Gets the python lib path.""" + python_lib = repository_ctx.os.environ.get(_PYTHON_LIB_PATH) + if python_lib != None: + return python_lib print_lib = ("< Date: Mon, 13 Nov 2017 22:10:16 -0800 Subject: [PATCH 0386/1801] (1) Make LIFO's GetCurrNode() returns the same node until RemoveCurrNode() is called, even when some other nodes are added. (2) Test cases for GetCurrNode(), AddNode(), GetCurrNode() sequence (both GetCurrNode() should return the same node). (3) Better test case for while loop; instead of checking time_schedule of each node, which may depend on node scheduling algorithms, check the actual dependency. PiperOrigin-RevId: 175635789 --- .../core/grappler/costs/virtual_scheduler.h | 30 ++++-- .../grappler/costs/virtual_scheduler_test.cc | 101 +++++++++++------- 2 files changed, 80 insertions(+), 51 deletions(-) diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.h b/tensorflow/core/grappler/costs/virtual_scheduler.h index c9a032d5f8..c74d80c2be 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler.h +++ b/tensorflow/core/grappler/costs/virtual_scheduler.h @@ -138,7 +138,10 @@ class FIFOManager : public ReadyNodeManager { FIFOManager() : ReadyNodeManager() {} ~FIFOManager() override {} void AddNode(const NodeDef* node) override { nodes_.push_back(node); } - const NodeDef* GetCurrNode() override { return nodes_.front(); } + const NodeDef* GetCurrNode() override { + CHECK(!nodes_.empty()) << "GetCurrNode(), but there's no ready node"; + return nodes_.front(); + } void RemoveCurrNode() override { nodes_.pop_front(); } bool Empty() const override { return nodes_.empty(); } @@ -156,18 +159,23 @@ class LIFOManager : public ReadyNodeManager { ~LIFOManager() override {} void AddNode(const NodeDef* node) override { nodes_.push_back(node); } const NodeDef* GetCurrNode() override { - curr_pos_ = nodes_.end(); - curr_pos_--; - return nodes_.back(); + CHECK(!nodes_.empty()) << "GetCurrNode(), but there's no ready node"; + if (curr_pos_ == nodes_.end()) { + curr_pos_ = --(nodes_.rbegin().base()); // Last one in the list. + } + // Once curr_pos_ is set to a valid entry in the list, we keep using the + // cached curr_pos_ until RemoveCurrNode() is called. AddNode() will not + // change the GetCurrNode() return value. + return *curr_pos_; } void RemoveCurrNode() override { - if (curr_pos_ != nodes_.end()) { - nodes_.erase(curr_pos_); - } else if (!nodes_.empty()) { - nodes_.pop_back(); - } - curr_pos_ = nodes_.end(); - curr_pos_--; + // Make sure we have curr_pos_ ready to be removed. + GetCurrNode(); + // Note curr_pos_ may not be pointing the last element if some nodes are + // added. + nodes_.erase(curr_pos_); + + curr_pos_ = nodes_.end(); // Reset curr_pos_. } bool Empty() const override { return nodes_.empty(); } diff --git a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc index 40548b5a07..412b494be7 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc @@ -938,6 +938,18 @@ versions { ExpectSetEq(expected, nodes_at_peak_mem_usage); } + // Helper method for checking nodes dependency. + void ValidateDependencyChain( + const std::unordered_map& start_times, + const std::vector& nodes_in_dependency_order) { + int64 prev_node_time = -1; + for (const auto& node : nodes_in_dependency_order) { + int64 curr_node_time = start_times.at(node); + EXPECT_GE(curr_node_time, prev_node_time); + prev_node_time = curr_node_time; + } + } + // Helper method for converting shape vector to TensorProperty. OpInfo::TensorProperties ShapeToTensorProperty( const std::vector shape, const DataType& data_type) const { @@ -1033,11 +1045,15 @@ TEST_F(VirtualSchedulerTest, AddAndRemoveMultipleFIFOManager) { manager.RemoveCurrNode(); EXPECT_EQ("Node2", manager.GetCurrNode()->name()); manager.AddNode(&node5_); + // GetCurrNode() should return the same node even if some nodes are added, + // until RemoveCurrNode() is called. + EXPECT_EQ("Node2", manager.GetCurrNode()->name()); manager.RemoveCurrNode(); EXPECT_EQ("Node3", manager.GetCurrNode()->name()); manager.RemoveCurrNode(); EXPECT_EQ("Node4", manager.GetCurrNode()->name()); manager.AddNode(&node6_); + EXPECT_EQ("Node4", manager.GetCurrNode()->name()); manager.RemoveCurrNode(); EXPECT_EQ("Node5", manager.GetCurrNode()->name()); manager.RemoveCurrNode(); @@ -1110,11 +1126,15 @@ TEST_F(VirtualSchedulerTest, AddAndRemoveMultipleLIFOManager) { manager.RemoveCurrNode(); EXPECT_EQ("Node3", manager.GetCurrNode()->name()); manager.AddNode(&node5_); + // GetCurrNode() should return the same node even if some nodes are added, + // until RemoveCurrNode() is called. + EXPECT_EQ("Node3", manager.GetCurrNode()->name()); manager.RemoveCurrNode(); EXPECT_EQ("Node5", manager.GetCurrNode()->name()); manager.RemoveCurrNode(); EXPECT_EQ("Node2", manager.GetCurrNode()->name()); manager.AddNode(&node6_); + EXPECT_EQ("Node2", manager.GetCurrNode()->name()); manager.RemoveCurrNode(); EXPECT_EQ("Node6", manager.GetCurrNode()->name()); manager.RemoveCurrNode(); @@ -1181,7 +1201,7 @@ TEST_F(VirtualSchedulerTest, GetCurrNodeFirstReadyManager) { // should return it. EXPECT_EQ("Node6", manager.GetCurrNode()->name()); // Now insret a few other nodes, but their time_ready's are even smaller than - // that of Node6. Befor calling RemoveCurrNode(), GetCurrNode() should return + // that of Node6. Before calling RemoveCurrNode(), GetCurrNode() should return // the same node, Node6, in this case. NodeDef node7; @@ -1505,19 +1525,18 @@ TEST_F(VirtualSchedulerTest, WhileLoop) { RunMetadata metadata; scheduler_->Summary(&metadata); - // Nodes in topological order (each node takes 1 usec) and possible start - // time usec: - // * const, ones: 0, 1 usec - // * while/Enter, while/Enter_1: 2, 3 usec - // * while/Merge, while/Merge_1: 4, 5 usec - // * while/Less/y: 6 usec - // * while/Less: 7 usec - // * while/LoopCond: 8 usec - // * while/Switch, while/Switch_1: 9, 10 usec - // * while/Identity, while/Identity_1, while/Exit, while/Exit_1: 11 - 14 usec - // * while/add/y, while/concat/Axis: 15, 16 usec - // * while/add, while/concat: 17, 18 usec - // * while/NextIteration, while/NextIteration_1: 19, 20 usec + // Nodes in topological order: + // * const, ones + // * while/Enter, while/Enter_1 + // * while/Merge, while/Merge_1 + // * while/Less/y + // * while/Less + // * while/LoopCond + // * while/Switch, while/Switch_1 + // * while/Identity, while/Identity_1, while/Exit, while/Exit_1 + // * while/add/y, while/concat/axis + // * while/add, while/concat + // * while/NextIteration, while/NextIteration_1 int num_next_iteration = 0; int num_next_iteration_1 = 0; @@ -1527,45 +1546,23 @@ TEST_F(VirtualSchedulerTest, WhileLoop) { int64 next_iter_1_start_micro; int64 exit_start_micro; int64 exit_1_start_micro; + + std::unordered_map start_times; for (const auto& device_step_stats : metadata.step_stats().dev_stats()) { for (const auto& stats : device_step_stats.node_stats()) { - std::cout << stats.DebugString() << std::endl; - // Start micro for while/Less/y, while/Less, and while/LoopCond are fixed - // regardless of scheduling method. - if (stats.node_name() == "while/Less/y") { - EXPECT_EQ(6, stats.all_start_micros()); - } else if (stats.node_name() == "while/Less") { - EXPECT_EQ(7, stats.all_start_micros()); - } else if (stats.node_name() == "while/LoopCond") { - EXPECT_EQ(8, stats.all_start_micros()); - } else if (stats.node_name() == "while/NextIteration") { + start_times[stats.node_name()] = stats.all_start_micros(); + if (stats.node_name() == "while/NextIteration") { ++num_next_iteration; - // Start time can be either 19 or 20 depending on how the scheduler - // picks a node among ready nodes. next_iter_start_micro = stats.all_start_micros(); - EXPECT_LE(19, next_iter_start_micro); - EXPECT_GE(20, next_iter_start_micro); } else if (stats.node_name() == "while/NextIteration_1") { ++num_next_iteration_1; - // Start time can be either 19 or 20 depending on how the scheduler - // picks a node among ready nodes. next_iter_1_start_micro = stats.all_start_micros(); - EXPECT_LE(19, next_iter_1_start_micro); - EXPECT_GE(20, next_iter_1_start_micro); } else if (stats.node_name() == "while/Exit") { ++num_exit; - // Start time can be between 11 and 14 (inclusive) depending on how - // the scheduler picks a node among ready nodes. exit_start_micro = stats.all_start_micros(); - EXPECT_LE(11, exit_start_micro); - EXPECT_GE(14, exit_start_micro); } else if (stats.node_name() == "while/Exit_1") { ++num_exit_1; - // Start time can be between 11 and 14 (inclusive) depending on how - // the scheduler picks a node among ready nodes. exit_1_start_micro = stats.all_start_micros(); - EXPECT_LE(11, exit_1_start_micro); - EXPECT_GE(14, exit_1_start_micro); } } } @@ -1581,6 +1578,30 @@ TEST_F(VirtualSchedulerTest, WhileLoop) { // different, so should be those of while/Exit and while/Exit_1. EXPECT_NE(next_iter_start_micro, next_iter_1_start_micro); EXPECT_NE(exit_start_micro, exit_1_start_micro); + + // Check dependency among the nodes; no matter what scheduling mechanism we + // use, the scheduled ops should follow these depedency chains. + // Note that currently, VirtualScheduler executes while/Merge twice; hence, + // we're not testing dependency chains related to while/Merge. + // TODO(dyoon): after fixing while loop behavior correctly (run nodes in the + // order of Enter, Merge, ...loop condition ..., ... loop body ..., + // NextIteration, Merge, ... loop condition ..., Exit), re-enable dependency + // chaing test w/ Merge nodes. + ValidateDependencyChain( + start_times, + {"Const", "while/Enter", // "while/Merge", + "while/Less/y", "while/Less", "while/LoopCond", "while/Switch", + "while/Identity", "while/add/y", "while/add", "while/NextIteration"}); + // ValidateDependencyChain(start_times, {"while/Merge", "while/Less"}); + ValidateDependencyChain(start_times, + {"ones", "while/Enter_1", // "while/Merge_1", + "while/Switch_1", "while/Identity_1", "while/concat", + "while/NextIteration_1"}); + ValidateDependencyChain(start_times, {"while/Switch", "while/Exit"}); + ValidateDependencyChain( + start_times, {"while/Identity", "while/concat/axis", "while/concat"}); + ValidateDependencyChain(start_times, {"while/Identity", "while/add"}); + ValidateDependencyChain(start_times, {"while/Switch_1", "while/Exit_1"}); } TEST_F(VirtualSchedulerTest, InterDeviceTransfer) { -- GitLab From c6d6ef7fd32a86a0653040b35ad32dc12cc14a4e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Nov 2017 17:49:41 -0800 Subject: [PATCH 0387/1801] Remove dep to file/base:path PiperOrigin-RevId: 175362138 --- tensorflow/contrib/lite/BUILD | 8 ++++---- .../contrib/lite/models/speech_hotword_model_test.cc | 9 ++++----- .../contrib/lite/models/speech_speakerid_model_test.cc | 8 ++++---- tensorflow/contrib/lite/models/speech_tts_model_test.cc | 8 ++++---- tensorflow/contrib/lite/models/test_utils.h | 6 +++--- 5 files changed, 19 insertions(+), 20 deletions(-) diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD index c58f77cb11..e0c674c295 100644 --- a/tensorflow/contrib/lite/BUILD +++ b/tensorflow/contrib/lite/BUILD @@ -178,7 +178,7 @@ cc_library( deps = select({ "//tensorflow:android": [], "//conditions:default": [ - #"//file/base:path", + "@com_google_absl//absl/strings", "//tensorflow/core:test", ], }), @@ -198,8 +198,8 @@ cc_test( deps = [ ":framework", ":models_test_utils", - #"//file/base:path", "//tensorflow/contrib/lite/kernels:builtin_ops", + "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", ], ) @@ -224,9 +224,9 @@ cc_test( deps = [ ":framework", ":models_test_utils", - #"//file/base:path", "//tensorflow/contrib/lite/kernels:builtin_ops", "//tensorflow/contrib/lite/tools:mutable_op_resolver", + "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", ], ) @@ -261,8 +261,8 @@ cc_test( deps = [ ":framework", ":models_test_utils", - #"//file/base:path", "//tensorflow/contrib/lite/kernels:builtin_ops", + "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", ], ) diff --git a/tensorflow/contrib/lite/models/speech_hotword_model_test.cc b/tensorflow/contrib/lite/models/speech_hotword_model_test.cc index f5d1f436bc..0b8266447a 100644 --- a/tensorflow/contrib/lite/models/speech_hotword_model_test.cc +++ b/tensorflow/contrib/lite/models/speech_hotword_model_test.cc @@ -20,9 +20,9 @@ limitations under the License. #include #include "base/logging.h" -#include "file/base/path.h" #include "testing/base/public/googletest.h" #include +#include "absl/strings/str_cat.h" #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/interpreter.h" #include "tensorflow/contrib/lite/kernels/register.h" @@ -36,7 +36,7 @@ void RunTest(int model_input_tensor, int svdf_layer_state_tensor, int model_output_tensor, const string& model_name, const string& golden_in_name, const string& golden_out_name) { // Read the model. - string tflite_file_path = file::JoinPath(TestDataPath(), model_name); + string tflite_file_path = StrCat(TestDataPath(), "/", model_name); auto model = FlatBufferModel::BuildFromFile(tflite_file_path.c_str()); CHECK(model) << "Failed to read model from file " << tflite_file_path; @@ -53,13 +53,12 @@ void RunTest(int model_input_tensor, int svdf_layer_state_tensor, // Load the input frames. Frames input_frames; - const string input_file_path = file::JoinPath(TestDataPath(), golden_in_name); + const string input_file_path = StrCat(TestDataPath(), "/", golden_in_name); ReadFrames(input_file_path, &input_frames); // Load the golden output results. Frames output_frames; - const string output_file_path = - file::JoinPath(TestDataPath(), golden_out_name); + const string output_file_path = StrCat(TestDataPath(), "/", golden_out_name); ReadFrames(output_file_path, &output_frames); const int speech_batch_size = diff --git a/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc b/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc index 687cfab0b2..9da0fb1fc6 100644 --- a/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc +++ b/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc @@ -20,9 +20,9 @@ limitations under the License. #include #include "base/logging.h" -#include "file/base/path.h" #include "testing/base/public/googletest.h" #include +#include "absl/strings/str_cat.h" #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/interpreter.h" #include "tensorflow/contrib/lite/model.h" @@ -46,7 +46,7 @@ constexpr int kModelOutputTensor = 66; TEST(SpeechSpeakerId, OkGoogleTest) { // Read the model. string tflite_file_path = - file::JoinPath(TestDataPath(), "speech_speakerid_model.tflite"); + StrCat(TestDataPath(), "/", "speech_speakerid_model.tflite"); auto model = FlatBufferModel::BuildFromFile(tflite_file_path.c_str()); CHECK(model) << "Failed to read model from file " << tflite_file_path; @@ -61,13 +61,13 @@ TEST(SpeechSpeakerId, OkGoogleTest) { // Load the input frames. Frames input_frames; const string input_file_path = - file::JoinPath(TestDataPath(), "speech_speakerid_model_in.csv"); + StrCat(TestDataPath(), "/", "speech_speakerid_model_in.csv"); ReadFrames(input_file_path, &input_frames); // Load the golden output results. Frames output_frames; const string output_file_path = - file::JoinPath(TestDataPath(), "speech_speakerid_model_out.csv"); + StrCat(TestDataPath(), "/", "speech_speakerid_model_out.csv"); ReadFrames(output_file_path, &output_frames); const int speech_batch_size = diff --git a/tensorflow/contrib/lite/models/speech_tts_model_test.cc b/tensorflow/contrib/lite/models/speech_tts_model_test.cc index e6f2673a42..8829177689 100644 --- a/tensorflow/contrib/lite/models/speech_tts_model_test.cc +++ b/tensorflow/contrib/lite/models/speech_tts_model_test.cc @@ -20,9 +20,9 @@ limitations under the License. #include #include "base/logging.h" -#include "file/base/path.h" #include "testing/base/public/googletest.h" #include +#include "absl/strings/str_cat.h" #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/interpreter.h" #include "tensorflow/contrib/lite/kernels/register.h" @@ -45,7 +45,7 @@ constexpr int kModelOutputTensor = 74; TEST(SpeechTTS, RandomIOTest) { // Read the model. string tflite_file_path = - file::JoinPath(TestDataPath(), "speech_tts_model.tflite"); + StrCat(TestDataPath(), "/", "speech_tts_model.tflite"); auto model = FlatBufferModel::BuildFromFile(tflite_file_path.c_str()); CHECK(model) << "Failed to mmap model " << tflite_file_path; @@ -59,13 +59,13 @@ TEST(SpeechTTS, RandomIOTest) { // Load the input frames. Frames input_frames; const string input_file_path = - file::JoinPath(TestDataPath(), "speech_tts_model_in.csv"); + StrCat(TestDataPath(), "/", "speech_tts_model_in.csv"); ReadFrames(input_file_path, &input_frames); // Load the golden output results. Frames output_frames; const string output_file_path = - file::JoinPath(TestDataPath(), "speech_tts_model_out.csv"); + StrCat(TestDataPath(), "/", "speech_tts_model_out.csv"); ReadFrames(output_file_path, &output_frames); const int speech_batch_size = diff --git a/tensorflow/contrib/lite/models/test_utils.h b/tensorflow/contrib/lite/models/test_utils.h index b2596babd0..1e14c26a35 100644 --- a/tensorflow/contrib/lite/models/test_utils.h +++ b/tensorflow/contrib/lite/models/test_utils.h @@ -30,12 +30,12 @@ using Frames = std::vector>; } // namespace tflite #ifndef __ANDROID__ -#include "file/base/path.h" +#include "absl/strings/str_cat.h" #include "tensorflow/core/platform/test.h" inline string TestDataPath() { - return string(file::JoinPath(tensorflow::testing::TensorFlowSrcRoot(), - "contrib/lite/models/testdata/")); + return string(StrCat(tensorflow::testing::TensorFlowSrcRoot(), "/", + "contrib/lite/models/testdata/")); } inline int TestInputSize(const tflite::models::Frames& input_frames) { return input_frames.size(); -- GitLab From 315cc11c4c7da7d6f0849ed628863ccc4a01a2b5 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Fri, 10 Nov 2017 22:14:36 -0800 Subject: [PATCH 0388/1801] Fix //tensorflow/contrib/lite/tools:gen_op_registration bazel build. PiperOrigin-RevId: 175376892 --- tensorflow/contrib/lite/tools/BUILD | 1 + tensorflow/contrib/lite/tools/gen_op_registration.cc | 3 ++- tensorflow/contrib/lite/tools/gen_op_registration.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/tools/BUILD b/tensorflow/contrib/lite/tools/BUILD index 2d918fd4e8..3c9cec5d16 100644 --- a/tensorflow/contrib/lite/tools/BUILD +++ b/tensorflow/contrib/lite/tools/BUILD @@ -20,6 +20,7 @@ cc_library( hdrs = ["gen_op_registration.h"], deps = [ "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite:string", "@com_googlesource_code_re2//:re2", ], ) diff --git a/tensorflow/contrib/lite/tools/gen_op_registration.cc b/tensorflow/contrib/lite/tools/gen_op_registration.cc index 57c2567e3b..d80ea59170 100644 --- a/tensorflow/contrib/lite/tools/gen_op_registration.cc +++ b/tensorflow/contrib/lite/tools/gen_op_registration.cc @@ -15,8 +15,9 @@ limitations under the License. #include #include -#include "third_party/re2/re2.h" +#include "re2/re2.h" #include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/tools/gen_op_registration.h" namespace tflite { diff --git a/tensorflow/contrib/lite/tools/gen_op_registration.h b/tensorflow/contrib/lite/tools/gen_op_registration.h index 363bb2335c..318859e23d 100644 --- a/tensorflow/contrib/lite/tools/gen_op_registration.h +++ b/tensorflow/contrib/lite/tools/gen_op_registration.h @@ -16,6 +16,7 @@ limitations under the License. #define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOOLS_GEN_OP_REGISTRATION_H_ #include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/string.h" namespace tflite { -- GitLab From cf0642f4fc718cd2e03c45dfe1c4ac15dbb9ecdc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 12 Nov 2017 12:15:25 -0800 Subject: [PATCH 0389/1801] Internal Change PiperOrigin-RevId: 175462406 --- tensorflow/contrib/lite/java/BUILD | 37 +++++++++++++++++++ tensorflow/contrib/lite/tools/BUILD | 4 +- .../lite/tools/gen_op_registration_main.cc | 1 + .../lite/tools/gen_op_registration_test.cc | 8 ++-- .../contrib/lite/tools/mutable_op_resolver.h | 6 +-- 5 files changed, 48 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/lite/java/BUILD b/tensorflow/contrib/lite/java/BUILD index 74fb4fe001..b0d20bac20 100644 --- a/tensorflow/contrib/lite/java/BUILD +++ b/tensorflow/contrib/lite/java/BUILD @@ -77,6 +77,43 @@ java_test( ], ) +java_test( + name = "NativeInterpreterWrapperTest", + size = "small", + srcs = ["src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java"], + data = [ + "src/testdata/add.bin", + "src/testdata/int32.bin", + "src/testdata/int64.bin", + "src/testdata/invalid_model.bin", + "src/testdata/uint8.bin", + ], + javacopts = JAVACOPTS, + test_class = "org.tensorflow.lite.NativeInterpreterWrapperTest", + deps = [ + ":libtensorflowlite_jni.so", + ":tensorflowlitelib", + "@com_google_truth", + "@junit", + ], +) + +java_test( + name = "TensorTest", + size = "small", + srcs = ["src/test/java/org/tensorflow/lite/TensorTest.java"], + data = [ + "src/testdata/add.bin", + ], + javacopts = JAVACOPTS, + test_class = "org.tensorflow.lite.TensorTest", + deps = [ + ":tensorflowlitelib", + "@com_google_truth", + "@junit", + ], +) + filegroup( name = "libtensorflowlite_jni", srcs = select({ diff --git a/tensorflow/contrib/lite/tools/BUILD b/tensorflow/contrib/lite/tools/BUILD index 3c9cec5d16..21b32d8434 100644 --- a/tensorflow/contrib/lite/tools/BUILD +++ b/tensorflow/contrib/lite/tools/BUILD @@ -4,7 +4,9 @@ package(default_visibility = [ licenses(["notice"]) # Apache 2.0 -cc_binary( +load("//tensorflow:tensorflow.bzl", "tf_cc_binary") + +tf_cc_binary( name = "generate_op_registrations", srcs = ["gen_op_registration_main.cc"], deps = [ diff --git a/tensorflow/contrib/lite/tools/gen_op_registration_main.cc b/tensorflow/contrib/lite/tools/gen_op_registration_main.cc index 7b27066a21..1b28b8bcd9 100644 --- a/tensorflow/contrib/lite/tools/gen_op_registration_main.cc +++ b/tensorflow/contrib/lite/tools/gen_op_registration_main.cc @@ -24,6 +24,7 @@ limitations under the License. using tensorflow::Flag; using tensorflow::Flags; +using tensorflow::string; namespace { diff --git a/tensorflow/contrib/lite/tools/gen_op_registration_test.cc b/tensorflow/contrib/lite/tools/gen_op_registration_test.cc index c65cffe340..28a98d68ab 100644 --- a/tensorflow/contrib/lite/tools/gen_op_registration_test.cc +++ b/tensorflow/contrib/lite/tools/gen_op_registration_test.cc @@ -43,25 +43,25 @@ TEST_F(GenOpRegistrationTest, TestNonExistantFiles) { } TEST_F(GenOpRegistrationTest, TestModels) { - ReadOps("third_party/tensorflow/contrib/lite/testdata/test_model.bin"); + ReadOps("tensorflow/contrib/lite/testdata/test_model.bin"); EXPECT_THAT(builtin_ops_, ElementsAreArray({"CONV_2D"})); EXPECT_THAT(custom_ops_, ElementsAreArray({"testing_op"})); } TEST_F(GenOpRegistrationTest, TestEmptyModels) { - ReadOps("third_party/tensorflow/contrib/lite/testdata/empty_model.bin"); + ReadOps("tensorflow/contrib/lite/testdata/empty_model.bin"); EXPECT_EQ(builtin_ops_.size(), 0); EXPECT_EQ(custom_ops_.size(), 0); } TEST_F(GenOpRegistrationTest, TestZeroSubgraphs) { - ReadOps("third_party/tensorflow/contrib/lite/testdata/0_subgraphs.bin"); + ReadOps("tensorflow/contrib/lite/testdata/0_subgraphs.bin"); EXPECT_EQ(builtin_ops_.size(), 0); EXPECT_EQ(custom_ops_.size(), 0); } TEST_F(GenOpRegistrationTest, TestBrokenMmap) { - ReadOps("third_party/tensorflow/contrib/lite/testdata/test_model_broken.bin"); + ReadOps("tensorflow/contrib/lite/testdata/test_model_broken.bin"); EXPECT_EQ(builtin_ops_.size(), 0); EXPECT_EQ(custom_ops_.size(), 0); } diff --git a/tensorflow/contrib/lite/tools/mutable_op_resolver.h b/tensorflow/contrib/lite/tools/mutable_op_resolver.h index a51fdaee19..8206a5481d 100644 --- a/tensorflow/contrib/lite/tools/mutable_op_resolver.h +++ b/tensorflow/contrib/lite/tools/mutable_op_resolver.h @@ -15,7 +15,7 @@ limitations under the License. #ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOOLS_MUTABLE_OP_RESOLVER_H_ #define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOOLS_MUTABLE_OP_RESOLVER_H_ -#include +#include #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/model.h" @@ -47,8 +47,8 @@ class MutableOpResolver : public OpResolver { void AddCustom(const char* name, TfLiteRegistration* registration); private: - std::unordered_map builtins_; - std::unordered_map custom_ops_; + std::map builtins_; + std::map custom_ops_; }; } // namespace tflite -- GitLab From 7497fca0ab2e87b0a9194f99d421c2e78b30cb17 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 13 Nov 2017 22:29:49 -0800 Subject: [PATCH 0390/1801] Implement shape constraints for `Cross` (#14156) * Implement shape constraints for `Cross` This fix implements shape constrainsts for `Cross`, as was specified in the TODO: ``` // * Both inputs have the same shape. // * Input rank >= 1. // * input_shape[-1] == 3. ``` Signed-off-by: Yong Tang * Add unit test for shape constraint and inference for `Cross` Signed-off-by: Yong Tang --- tensorflow/core/ops/math_ops.cc | 24 +++++++++++++++++++----- tensorflow/core/ops/math_ops_test.cc | 11 +++++++++++ 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index d30b847696..df75caca37 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -2331,11 +2331,25 @@ REGISTER_OP("Cross") .Input("b: T") .Output("product: T") .Attr("T: realnumbertype") - // TODO(cwhipkey): implement these shape inference constraints here: - // * Both inputs have the same shape. - // * Input rank >= 1. - // * input_shape[-1] == 3. - .SetShapeFn(shape_inference::UnchangedShape) + .SetShapeFn([](InferenceContext* c) { + ShapeHandle a_shape; + ShapeHandle b_shape; + // * Input rank >= 1. + TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 1, &a_shape)); + TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(1), 1, &b_shape)); + + // * Both inputs have the same shape. + TF_RETURN_IF_ERROR(c->Merge(a_shape, b_shape, &a_shape)); + + // * input_shape[-1] == 3. + if (c->RankKnown(a_shape)) { + int rank = c->Rank(a_shape); + auto dim = c->Dim(a_shape, rank - 1); + TF_RETURN_IF_ERROR(c->WithValue(dim, 3, &dim)); + } + c->set_output(0, a_shape); + return Status::OK(); + }) .Doc(R"doc( Compute the pairwise cross product. diff --git a/tensorflow/core/ops/math_ops_test.cc b/tensorflow/core/ops/math_ops_test.cc index 28f9969de5..3dfa776d26 100644 --- a/tensorflow/core/ops/math_ops_test.cc +++ b/tensorflow/core/ops/math_ops_test.cc @@ -515,4 +515,15 @@ TEST(MathOpstest, RequantizationRange_ShapeFn) { INFER_ERROR("must be rank 0", op, "?;?;[2]"); } +TEST(MathOpsTest, Cross_ShapeFn) { + ShapeInferenceTestOp op("Cross"); + + INFER_ERROR("Shape must be at least rank 1 but is rank 0", op, "[];[]"); + INFER_ERROR("Dimension 0 in both shapes must be equal, but", op, "[3];[5]"); + INFER_ERROR("Dimension must be 3 but", op, "[3,5];[3,5]"); + + INFER_OK(op, "?;?", "?"); + INFER_OK(op, "[?];[?]", "in0"); + INFER_OK(op, "[1,?,3];[?,?,?]", "in0"); +} } // end namespace tensorflow -- GitLab From 3f791113c036218c75049487355e4acd9be806c5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 11:17:04 -0800 Subject: [PATCH 0391/1801] Internal Change PiperOrigin-RevId: 175559457 --- tensorflow/contrib/lite/BUILD | 83 ------------------------------ tensorflow/contrib/lite/java/BUILD | 2 + 2 files changed, 2 insertions(+), 83 deletions(-) diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD index e0c674c295..96a9e281ad 100644 --- a/tensorflow/contrib/lite/BUILD +++ b/tensorflow/contrib/lite/BUILD @@ -184,89 +184,6 @@ cc_library( }), ) -cc_test( - name = "speech_hotword_model_test", - size = "small", - srcs = ["models/speech_hotword_model_test.cc"], - data = [ - "models/testdata/speech_hotword_model_in.csv", - "models/testdata/speech_hotword_model_out_rank1.csv", - "models/testdata/speech_hotword_model_out_rank2.csv", - "models/testdata/speech_hotword_model_rank1.tflite", - "models/testdata/speech_hotword_model_rank2.tflite", - ], - deps = [ - ":framework", - ":models_test_utils", - "//tensorflow/contrib/lite/kernels:builtin_ops", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest_main", - ], -) - -gen_selected_ops( - name = "speech_speakerid_ops", - model = "models/testdata/speech_speakerid_model.tflite", -) - -cc_test( - name = "speech_speakerid_model_test", - size = "small", - srcs = [ - "models/speech_speakerid_model_test.cc", - ":speech_speakerid_ops", - ], - data = [ - "models/testdata/speech_speakerid_model.tflite", - "models/testdata/speech_speakerid_model_in.csv", - "models/testdata/speech_speakerid_model_out.csv", - ], - deps = [ - ":framework", - ":models_test_utils", - "//tensorflow/contrib/lite/kernels:builtin_ops", - "//tensorflow/contrib/lite/tools:mutable_op_resolver", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "speech_terse_am_model_test", - size = "small", - srcs = ["models/speech_terse_am_model_test.cc"], - data = [ - "models/testdata/speech_terse_am_model.tflite", - "models/testdata/speech_terse_am_model_in.csv", - "models/testdata/speech_terse_am_model_out.csv", - ], - deps = [ - ":framework", - ":models_test_utils", - #"//file/base:path", - "//tensorflow/contrib/lite/kernels:builtin_ops", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "speech_tts_model_test", - size = "small", - srcs = ["models/speech_tts_model_test.cc"], - data = [ - "models/testdata/speech_tts_model.tflite", - "models/testdata/speech_tts_model_in.csv", - "models/testdata/speech_tts_model_out.csv", - ], - deps = [ - ":framework", - ":models_test_utils", - "//tensorflow/contrib/lite/kernels:builtin_ops", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest_main", - ], -) - filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/lite/java/BUILD b/tensorflow/contrib/lite/java/BUILD index b0d20bac20..1de28eb52d 100644 --- a/tensorflow/contrib/lite/java/BUILD +++ b/tensorflow/contrib/lite/java/BUILD @@ -58,6 +58,7 @@ java_test( javacopts = JAVACOPTS, test_class = "org.tensorflow.lite.TensorFlowLiteTest", deps = [ + ":libtensorflowlite_jni.so", ":tensorflowlitelib", "@com_google_truth", "@junit", @@ -71,6 +72,7 @@ java_test( javacopts = JAVACOPTS, test_class = "org.tensorflow.lite.DataTypeTest", deps = [ + ":libtensorflowlite_jni.so", ":tensorflowlitelib", "@com_google_truth", "@junit", -- GitLab From d915c3e34fa558bc0008f7a8a75239507e9765a4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 12:00:17 -0800 Subject: [PATCH 0392/1801] Internal change. PiperOrigin-RevId: 175566634 --- tensorflow/contrib/lite/toco/tflite/operator_test.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/lite/toco/tflite/operator_test.cc b/tensorflow/contrib/lite/toco/tflite/operator_test.cc index 543a9bd06c..8e77c56d8a 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator_test.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator_test.cc @@ -319,10 +319,12 @@ TEST_F(OperatorTest, BuiltinMul) { TEST_F(OperatorTest, Svdf) { SvdfOperator op; op.fused_activation_function = FusedActivationFunctionType::kRelu; + op.rank = 1; auto output_toco_op = SerializeAndDeserialize(GetOperator("SVDF", OperatorType::kSvdf), op); EXPECT_EQ(op.fused_activation_function, output_toco_op->fused_activation_function); + EXPECT_EQ(op.rank, output_toco_op->rank); } TEST_F(OperatorTest, TensorFlowUnsupported) { -- GitLab From 59b3f9c9e3181753cb8a0e0cb00edfcaa3f20233 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Mon, 13 Nov 2017 18:02:43 -0800 Subject: [PATCH 0393/1801] Explicitly specify TFLite demo assets in BUILD file PiperOrigin-RevId: 175617982 --- tensorflow/contrib/lite/java/demo/app/src/main/BUILD | 10 ++-------- .../contrib/lite/java/demo/app/src/main/assets/BUILD | 6 ++---- third_party/tflite_mobilenet.BUILD | 5 ++--- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/BUILD b/tensorflow/contrib/lite/java/demo/app/src/main/BUILD index 4fc6d99d8c..654fa9d6d2 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/BUILD +++ b/tensorflow/contrib/lite/java/demo/app/src/main/BUILD @@ -6,7 +6,8 @@ android_binary( name = "TfLiteCameraDemo", srcs = glob(["java/**/*.java"]), assets = [ - ":assets", + "@tflite_mobilenet//:labels.txt", + "@tflite_mobilenet//:mobilenet_quant_v1_224.tflite", ], assets_dir = "", custom_package = "com.example.android.tflitecamerademo", @@ -27,13 +28,6 @@ android_binary( ], ) -filegroup( - name = "assets", - srcs = [ - "@tflite_mobilenet//:model_files", - ], -) - filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/assets/BUILD b/tensorflow/contrib/lite/java/demo/app/src/main/assets/BUILD index 1a759f5652..dd0cd6c98f 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/assets/BUILD +++ b/tensorflow/contrib/lite/java/demo/app/src/main/assets/BUILD @@ -2,15 +2,13 @@ package(default_visibility = ["//visibility:private"]) licenses(["notice"]) # Apache 2.0 -filegroup( - name = "assets_files", - srcs = glob( +exports_files( + glob( ["**/*"], exclude = [ "BUILD", ], ), - visibility = ["//visibility:public"], ) filegroup( diff --git a/third_party/tflite_mobilenet.BUILD b/third_party/tflite_mobilenet.BUILD index 75663eff48..de47ed61f9 100644 --- a/third_party/tflite_mobilenet.BUILD +++ b/third_party/tflite_mobilenet.BUILD @@ -2,9 +2,8 @@ package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 -filegroup( - name = "model_files", - srcs = glob( +exports_files( + glob( ["**/*"], exclude = [ "BUILD", -- GitLab From 2c26c98f8d1f15d064c76548393137f058043dc1 Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Mon, 13 Nov 2017 22:32:55 -0800 Subject: [PATCH 0394/1801] Add a checkpoint compatibility test for layout optimizer. PiperOrigin-RevId: 175637014 --- .../python/grappler/layout_optimizer_test.py | 58 +++++++++++++++++-- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py index 87f07c4a52..bc9d910447 100644 --- a/tensorflow/python/grappler/layout_optimizer_test.py +++ b/tensorflow/python/grappler/layout_optimizer_test.py @@ -18,8 +18,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np + from tensorflow.core.protobuf import config_pb2 from tensorflow.core.protobuf import rewriter_config_pb2 +from tensorflow.core.protobuf import saver_pb2 from tensorflow.python.client import session from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -32,9 +35,10 @@ from tensorflow.python.ops import functional_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn from tensorflow.python.ops import random_ops +from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.training import gradient_descent -from tensorflow.python.training import saver +from tensorflow.python.training import saver as saver_lib def weight(shape): @@ -83,9 +87,9 @@ def loop(): return outputs -def get_config(): +def get_config(layout_optimizer=True): rewrite_options = rewriter_config_pb2.RewriterConfig( - optimize_tensor_layout=True) + optimize_tensor_layout=layout_optimizer) graph_options = config_pb2.GraphOptions( rewrite_options=rewrite_options, build_cost_model=1) config = config_pb2.ConfigProto(graph_options=graph_options) @@ -95,6 +99,41 @@ def get_config(): class LayoutOptimizerTest(test.TestCase): """Tests the Grappler layout optimizer.""" + def _train(self, checkpoint_path, layout_optimizer=False, restore=False): + ops.reset_default_graph() + graph = ops.get_default_graph() + with session.Session( + config=get_config(layout_optimizer), graph=graph) as sess: + batch = 2 + height = 6 + width = 7 + input_channels = 3 + shape = [batch, height, width, input_channels] + image = array_ops.placeholder(dtype='float32', shape=shape) + conv1 = conv_layers.conv2d(image, 32, [3, 3]) + conv2 = conv_layers.conv2d(conv1, 32, [3, 3]) + optimizer = gradient_descent.GradientDescentOptimizer(0.01) + loss = math_ops.reduce_mean(conv2) + train_op = optimizer.minimize(loss) + saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2) + + if restore: + saver.restore(sess, checkpoint_path) + else: + sess.run(variables.global_variables_initializer()) + + np.random.seed(0) + for _ in range(2): + image_val = np.random.rand(*shape).astype(np.float32) + sess.run([loss, train_op], feed_dict={image: image_val}) + + if restore: + all_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + all_vars_values = [var.eval(session=sess) for var in all_vars] + return all_vars_values + else: + saver.save(sess, checkpoint_path) + def testTwoConvLayers(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) @@ -152,7 +191,7 @@ class LayoutOptimizerTest(test.TestCase): train_op = optimizer.minimize(loss) graph = ops.get_default_graph() graph.add_to_collection('train_op', train_op) - meta_graph = saver.export_meta_graph(graph_def=graph.as_graph_def()) + meta_graph = saver_lib.export_meta_graph(graph_def=graph.as_graph_def()) rewrite_options = rewriter_config_pb2.RewriterConfig( optimize_tensor_layout=True) @@ -165,6 +204,17 @@ class LayoutOptimizerTest(test.TestCase): self.assertEqual(node.attr['data_format'].s, 'NCHW') self.assertEqual(found, 5) + def testCheckpointCompatibility(self): + checkpoint_path = self.get_temp_dir() + self._train(checkpoint_path) + vars_expected = self._train(checkpoint_path, restore=True) + vars_layout_optimized = self._train( + checkpoint_path, restore=True, layout_optimizer=True) + + for var_expected, var_layout_optimized in zip(vars_expected, + vars_layout_optimized): + self.assertAllClose(var_expected, var_layout_optimized, atol=1e-6) + if __name__ == '__main__': test.main() -- GitLab From 43c428ada3cd80b8d269c34f3724aaaec08f12de Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 22:34:51 -0800 Subject: [PATCH 0395/1801] OOM error with allocation information. PiperOrigin-RevId: 175637128 --- .../core/common_runtime/direct_session.cc | 3 +- tensorflow/core/common_runtime/executor.cc | 15 ++++ .../common_runtime/step_stats_collector.cc | 90 +++++++++++++++++++ .../common_runtime/step_stats_collector.h | 7 ++ .../distributed_runtime/master_session.cc | 8 +- .../core/distributed_runtime/master_session.h | 1 + tensorflow/core/distributed_runtime/worker.cc | 3 +- tensorflow/core/framework/op_kernel.cc | 6 +- .../core/framework/tracking_allocator.cc | 11 +++ .../core/framework/tracking_allocator.h | 2 + tensorflow/core/protobuf/config.proto | 7 ++ tensorflow/core/protobuf/worker.proto | 1 + tensorflow/python/profiler/BUILD | 1 + .../python/profiler/model_analyzer_test.py | 59 ++++++++++++ .../api/golden/tensorflow.-run-options.pbtxt | 4 + 15 files changed, 213 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 316fb0ac16..d652b1004f 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -548,7 +548,8 @@ Status DirectSession::Run(const RunOptions& run_options, ((measure_step_count + 1) % build_cost_model_every == 0); } } - if (do_trace || update_cost_model) { + if (do_trace || update_cost_model || + run_options.report_tensor_allocations_upon_oom()) { run_state.collector.reset( new StepStatsCollector(run_metadata->mutable_step_stats())); args.stats_collector = run_state.collector.get(); diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index ada29ff287..1896baaf66 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -1804,6 +1804,21 @@ Status ExecutorState::ProcessOutputs(const NodeItem& item, OpKernelContext* ctx, LOG(WARNING) << this << " Compute status: " << s; DumpState(); } + if (s.code() == error::RESOURCE_EXHAUSTED) { + if (stats_collector_) { + string err = stats_collector_->ReportAllocsOnResourceExhausted( + s.error_message()); + s = Status(s.code(), strings::StrCat(s.error_message(), err)); + } else { + s = Status( + s.code(), + strings::StrCat( + s.error_message(), + "\nHint: If you want to see a list of allocated tensors when " + "OOM happens, add report_tensor_allocations_upon_oom " + "to RunOptions for current allocation info.\n")); + } + } return s; } diff --git a/tensorflow/core/common_runtime/step_stats_collector.cc b/tensorflow/core/common_runtime/step_stats_collector.cc index e6403df97f..bfe7a32b1b 100644 --- a/tensorflow/core/common_runtime/step_stats_collector.cc +++ b/tensorflow/core/common_runtime/step_stats_collector.cc @@ -20,10 +20,21 @@ limitations under the License. #include "tensorflow/core/framework/tracking_allocator.h" #include "tensorflow/core/graph/costmodel.h" #include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/scanner.h" #include "tensorflow/core/platform/logging.h" namespace tensorflow { +namespace { +const int kMaxAllocReportNodes = 100; +const float kMaxAllocReportFraction = 0.99; + +struct AllocStats { + std::map> nodes_by_size; + int64 total_bytes = 0; + int64 total_nodes = 0; +}; +} // namespace NodeExecStatsWrapper::NodeExecStatsWrapper() : NodeExecStatsWrapper(new NodeExecStats) {} @@ -267,6 +278,85 @@ void StepStatsCollector::Save(const string& device, } } +string StepStatsCollector::ReportAllocsOnResourceExhausted(const string& err) { + mutex_lock l(mu_); + if (err.find("OOM") == err.npos) { + return ""; + } + // -> AllocStats + std::map, AllocStats> allocs_map; + string report = "\n"; + for (const auto& dev_stat : dev_stats_) { + const string& device = dev_stat.first; + // Only print the device that has OOM. + // TODO(xpan): Extract device from err first to speed it up. + if (err.find(device) == err.npos) { + continue; + } + // NodeExecStatsWrapper* + for (const auto& stats : dev_stat.second) { + // std::pair + for (const auto& alloc : stats->allocations_) { + // Only print the allocator that has OOM. + // TODO(xpan): Extract device from err first to speed it up. + if (err.find(alloc.first->allocator_name()) == err.npos) { + continue; + } + auto dev_allocator = + std::make_pair(dev_stat.first, alloc.first->allocator_name()); + AllocStats& dev_allocs_stats = allocs_map[dev_allocator]; + TrackingAllocator* tracking_alloc = alloc.second; + gtl::InlinedVector cur_records = + tracking_alloc->GetCurrentRecords(); + int64 cur_bytes = 0; + for (const auto& r : cur_records) { + cur_bytes += r.alloc_bytes; + } + if (cur_bytes > 0) { + dev_allocs_stats.total_bytes += cur_bytes; + dev_allocs_stats.total_nodes++; + dev_allocs_stats.nodes_by_size[cur_bytes].push_back( + stats->stats()->node_name()); + } + } + } + } + + for (const auto& dev_allocs_it : allocs_map) { + const auto& dev = dev_allocs_it.first; + const AllocStats& dev_allocs_stats = dev_allocs_it.second; + int64 reported_bytes = 0; + int64 reported_nodes = 0; + bool done = false; + strings::StrAppend(&report, "\nCurrent usage from device: ", dev.first, + ", allocator: ", dev.second, "\n"); + // Print allocations stats of the pair. + for (auto it = dev_allocs_stats.nodes_by_size.rbegin(); + it != dev_allocs_stats.nodes_by_size.rend(); ++it) { + for (const string& node_name : it->second) { + reported_bytes += it->first; + strings::StrAppend(&report, " ", + strings::HumanReadableNumBytes(it->first), " from ", + node_name, "\n"); + if (++reported_nodes > kMaxAllocReportNodes || + reported_bytes >= + dev_allocs_stats.total_bytes * kMaxAllocReportFraction) { + done = true; + break; + } + } + if (done) break; + } + int64 remain_nodes = dev_allocs_stats.total_nodes - reported_nodes; + int64 remain_bytes = dev_allocs_stats.total_bytes - reported_bytes; + if (remain_nodes > 0) { + strings::StrAppend(&report, " Remaining ", remain_nodes, " nodes with ", + strings::HumanReadableNumBytes(remain_bytes), "\n"); + } + } + return report; +} + void StepStatsCollector::Finalize() { mutex_lock l(mu_); FinalizeInternal(); diff --git a/tensorflow/core/common_runtime/step_stats_collector.h b/tensorflow/core/common_runtime/step_stats_collector.h index b1fd28a982..996dbb59bc 100644 --- a/tensorflow/core/common_runtime/step_stats_collector.h +++ b/tensorflow/core/common_runtime/step_stats_collector.h @@ -82,6 +82,13 @@ class StepStatsCollector { void Save(const string& device, NodeExecStats* nt); void Save(const string& device, NodeExecStatsWrapper* stats); + // Generates a string reporting the currently used memory based + // on ResourceExhausted OOM `err` message. + // `err` message needs to contain device name and allocator name, E.g.: + // "ResourceExhaustedError: OOM when allocating tensor ... + // on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc" + string ReportAllocsOnResourceExhausted(const string& err); + // The following 2 Finalize methods populate the StepStats passed // from the constructor. Calling it more than once won't have any effect. // User shouldn't call Save() methods after Finalize. diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc index f7fce1d0ec..5798ad09e8 100644 --- a/tensorflow/core/distributed_runtime/master_session.cc +++ b/tensorflow/core/distributed_runtime/master_session.cc @@ -498,6 +498,9 @@ Status MasterSession::ReffedClientGraph::RunPartitions( // Collect execution cost stats on a smoothly decreasing frequency. ExecutorOpts exec_opts; + if (pss->report_tensor_allocations_upon_oom) { + exec_opts.set_report_tensor_allocations_upon_oom(true); + } if (pss->collect_costs) { exec_opts.set_record_costs(true); } @@ -1368,6 +1371,8 @@ Status MasterSession::DoPartialRun(CallOptions* opts, const auto count = run_state->count; pss.collect_timeline = req.options().trace_level() == RunOptions::FULL_TRACE; + pss.report_tensor_allocations_upon_oom = + req.options().report_tensor_allocations_upon_oom(); // Build the cost model every 'build_cost_model_every' steps after skipping // an @@ -1528,7 +1533,8 @@ Status MasterSession::DoRunWithLocalExecution( TRACEPRINTF("stepid %llu", step_id); pss.collect_timeline = req.options().trace_level() == RunOptions::FULL_TRACE; - + pss.report_tensor_allocations_upon_oom = + req.options().report_tensor_allocations_upon_oom(); // Build the cost model every 'build_cost_model_every' steps after skipping an // initial 'build_cost_model_after' steps. const int64 build_cost_model_after = diff --git a/tensorflow/core/distributed_runtime/master_session.h b/tensorflow/core/distributed_runtime/master_session.h index 51ea92da68..eb696eb06a 100644 --- a/tensorflow/core/distributed_runtime/master_session.h +++ b/tensorflow/core/distributed_runtime/master_session.h @@ -146,6 +146,7 @@ class MasterSession : public core::RefCounted { bool collect_timeline = false; bool collect_rpcs = false; bool collect_partition_graphs = false; + bool report_tensor_allocations_upon_oom = false; Microseconds start_micros = Microseconds(0); Microseconds end_micros = Microseconds(0); std::vector step_stats; // per partition diff --git a/tensorflow/core/distributed_runtime/worker.cc b/tensorflow/core/distributed_runtime/worker.cc index b7c5793736..fcb1830197 100644 --- a/tensorflow/core/distributed_runtime/worker.cc +++ b/tensorflow/core/distributed_runtime/worker.cc @@ -132,7 +132,8 @@ void Worker::DoRunGraph(CallOptions* opts, RunGraphRequestWrapper* request, return; } StepStatsCollector* collector = nullptr; - if (request->exec_opts().record_timeline() || + if (request->exec_opts().report_tensor_allocations_upon_oom() || + request->exec_opts().record_timeline() || request->exec_opts().record_costs()) { collector = new StepStatsCollector(response->mutable_step_stats()); // TODO(mrry,pbar): GPU tracing for distributed steps. diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc index 30e3b7ef59..c23692409c 100644 --- a/tensorflow/core/framework/op_kernel.cc +++ b/tensorflow/core/framework/op_kernel.cc @@ -622,8 +622,10 @@ Status OpKernelContext::allocate_tensor( Tensor new_tensor(a, type, shape, logged_attr); if (!new_tensor.IsInitialized()) { - return errors::ResourceExhausted("OOM when allocating tensor with shape", - shape.DebugString()); + return errors::ResourceExhausted( + "OOM when allocating tensor with shape", shape.DebugString(), + " and type ", DataTypeString(type), " on ", params_->device->name(), + " by allocator ", a->Name()); } if (params_->log_memory) { LogMemory::RecordTensorAllocation(params_->op_kernel->name(), diff --git a/tensorflow/core/framework/tracking_allocator.cc b/tensorflow/core/framework/tracking_allocator.cc index db996e31b0..239dfd13ec 100644 --- a/tensorflow/core/framework/tracking_allocator.cc +++ b/tensorflow/core/framework/tracking_allocator.cc @@ -183,6 +183,17 @@ gtl::InlinedVector TrackingAllocator::GetRecordsAndUnRef() { return allocations; } +gtl::InlinedVector TrackingAllocator::GetCurrentRecords() { + gtl::InlinedVector allocations; + { + mutex_lock lock(mu_); + for (const AllocRecord& alloc : allocations_) { + allocations.push_back(alloc); + } + } + return allocations; +} + bool TrackingAllocator::UnRef() { CHECK_GE(ref_, 1); --ref_; diff --git a/tensorflow/core/framework/tracking_allocator.h b/tensorflow/core/framework/tracking_allocator.h index d10b0cca51..a6c26c89e5 100644 --- a/tensorflow/core/framework/tracking_allocator.h +++ b/tensorflow/core/framework/tracking_allocator.h @@ -85,6 +85,8 @@ class TrackingAllocator : public Allocator { // deallocated. After this call completes and all allocated pointers // have been deallocated the wrapper will delete itself. gtl::InlinedVector GetRecordsAndUnRef(); + // Returns a copy of allocation records collected so far. + gtl::InlinedVector GetCurrentRecords(); protected: ~TrackingAllocator() override {} diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto index 145311b59d..a956aab3dc 100644 --- a/tensorflow/core/protobuf/config.proto +++ b/tensorflow/core/protobuf/config.proto @@ -331,6 +331,13 @@ message RunOptions { // EXPERIMENTAL. Options used to initialize DebuggerState, if enabled. DebugOptions debug_options = 6; + // When enabled, causes tensor alllocation information to be included in + // the error message when the Run() call fails because the allocator ran + // out of memory (OOM). + // + // Enabling this option can slow down the Run() call. + bool report_tensor_allocations_upon_oom = 7; + reserved 4; } diff --git a/tensorflow/core/protobuf/worker.proto b/tensorflow/core/protobuf/worker.proto index 137f9bc216..34a5cff366 100644 --- a/tensorflow/core/protobuf/worker.proto +++ b/tensorflow/core/protobuf/worker.proto @@ -169,6 +169,7 @@ message ExecutorOpts { bool record_costs = 1; bool record_timeline = 3; bool record_partition_graphs = 4; + bool report_tensor_allocations_upon_oom = 5; }; message RunGraphRequest { diff --git a/tensorflow/python/profiler/BUILD b/tensorflow/python/profiler/BUILD index 26cc5f0b74..519b05975f 100644 --- a/tensorflow/python/profiler/BUILD +++ b/tensorflow/python/profiler/BUILD @@ -53,6 +53,7 @@ cuda_py_test( "//tensorflow/python:client", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:distributed_framework_test_lib", "//tensorflow/python:platform", "//tensorflow/python:variables", ], diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py index 17c87bea92..698f8906d4 100644 --- a/tensorflow/python/profiler/model_analyzer_test.py +++ b/tensorflow/python/profiler/model_analyzer_test.py @@ -28,6 +28,8 @@ from tensorflow.core.protobuf import config_pb2 from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.client import session from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import random_ops from tensorflow.python.ops import variables from tensorflow.python.platform import gfile from tensorflow.python.platform import test @@ -635,6 +637,63 @@ class PrintModelAnalysisTest(test.TestCase): self._trainLoop(x, 10, time_dir, time_steps, memory_dir, memory_steps, profile_dir, dump_steps) + def testOOM(self): + if not test.is_gpu_available(): + return + ops.reset_default_graph() + with ops.device('/device:GPU:0'): + a = random_ops.random_normal([1, 10000, 20000], name='test_random1') + b = random_ops.random_normal([30000, 10000, 1], name='test_random2') + c = a * b + + try: + with session.Session() as sess: + sess.run(c, options=config_pb2.RunOptions( + report_tensor_allocations_upon_oom=True)) + except Exception as e: # pylint: disable=broad-except + exception_str = '%s' % e + # This trace reports allocations for to random tensor. + self.assertTrue( + 'OOM when allocating tensor with shape[30000,10000,20000]' in + exception_str) + mat = re.search('(.*)GiB from test_random2/RandomStandardNormal', + exception_str) + self.assertGreater(float(mat.group(1)), 0.0) + mat = re.search('(.*)MiB from test_random1/RandomStandardNormal', + exception_str) + self.assertGreater(float(mat.group(1)), 0.0) + + def testDistributedOOM(self): + if not test.is_gpu_available(): + return + ops.reset_default_graph() + + workers, _ = test_util.create_local_cluster(2, 0) + + with ops.device('/job:worker/replica:0/task:0/gpu:0'): + a = random_ops.random_normal([1, 10000, 20000], name='test_random1') + with ops.device('/job:worker/replica:0/task:1/gpu:0'): + b = random_ops.random_normal([30000, 10000, 1], name='test_random2') + c = a * b + + try: + with session.Session(workers[1].target) as sess: + sess.run(c, options=config_pb2.RunOptions( + report_tensor_allocations_upon_oom=True)) + except Exception as e: # pylint: disable=broad-except + exception_str = '%s' % e + # test_random2 is reported because it's allocated in worker 1. + self.assertTrue('Current usage from device: ' + '/job:worker/replica:0/task:1/device:GPU:0, ' + 'allocator: GPU_0_bfc' in exception_str) + mat = re.search('(.*)GiB from test_random2/RandomStandardNormal', + exception_str) + self.assertGreater(float(mat.group(1)), 0.0) + # test_random1 is not reported because it's allocated in worker 0. + mat = re.search('(.*)MiB from test_random1/RandomStandardNormal', + exception_str) + self.assertTrue(mat is None) + if __name__ == '__main__': test.main() diff --git a/tensorflow/tools/api/golden/tensorflow.-run-options.pbtxt b/tensorflow/tools/api/golden/tensorflow.-run-options.pbtxt index 5ad6804a78..2f3e7f1a84 100644 --- a/tensorflow/tools/api/golden/tensorflow.-run-options.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-run-options.pbtxt @@ -34,6 +34,10 @@ tf_class { name: "OUTPUT_PARTITION_GRAPHS_FIELD_NUMBER" mtype: "" } + member { + name: "REPORT_TENSOR_ALLOCATIONS_UPON_OOM_FIELD_NUMBER" + mtype: "" + } member { name: "SOFTWARE_TRACE" mtype: "" -- GitLab From 2e57e3f4952efc955cafa86a17c3d2b101924a64 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 22:49:31 -0800 Subject: [PATCH 0396/1801] Changes to the TF mobile docs for TFLite, and a new intro for TFLite. PiperOrigin-RevId: 175638087 --- tensorflow/docs_src/mobile/index.md | 274 +++------------------ tensorflow/docs_src/mobile/leftnav_files | 7 +- tensorflow/docs_src/mobile/mobile_intro.md | 247 +++++++++++++++++++ tensorflow/docs_src/mobile/tflite/index.md | 202 +++++++++++++++ 4 files changed, 490 insertions(+), 240 deletions(-) create mode 100644 tensorflow/docs_src/mobile/mobile_intro.md create mode 100644 tensorflow/docs_src/mobile/tflite/index.md diff --git a/tensorflow/docs_src/mobile/index.md b/tensorflow/docs_src/mobile/index.md index 06ad47bc62..a10db74364 100644 --- a/tensorflow/docs_src/mobile/index.md +++ b/tensorflow/docs_src/mobile/index.md @@ -1,238 +1,36 @@ -# Building Mobile Apps with TensorFlow - -TensorFlow was designed from the ground up to be a good deep learning solution -for mobile platforms like Android and iOS. This guide is to help you understand -how to integrate TensorFlow into your mobile apps effectively and efficiently. - -## About this Guide - -This guide is aimed at developers who have a TensorFlow model that’s -successfully working in a desktop environment, and who want to integrate it into -a mobile application. Here are the main challenges you’ll face during that -process: - -- Understanding how to use Tensorflow for mobile. -- Building TensorFlow for your platform. -- Integrating the TensorFlow library into your application. -- Preparing your model file for mobile deployment. -- Optimizing for latency, RAM usage, model file size, and binary size. - -## Why run TensorFlow on mobile? - -Traditionally, deep learning has been associated with data centers and giant -clusters of high-powered GPU machines. However, it can be very expensive and -time-consuming to send all of the data a device has access to across a network -connection. Running on mobile makes it possible to deliver very interactive -applications in a way that’s not possible when you have to wait for a network -round trip. - -Here are some common use cases for on-device deep learning: - -### Speech Recognition - -There are a lot of interesting applications that can be built with a -speech-driven interface, and many of these require on-device processing. Most of -the time a user isn’t giving commands, and so streaming audio continuously to a -remote server would be a waste of bandwidth, since it would mostly be silence or -background noises. To solve this problem it’s common to have a small neural -network running on-device @{$tutorials/audio_recognition$listening out for a particular keyword}. -Once that keyword has been spotted, the rest of the -conversation can be transmitted over to the server for further processing if -more computing power is needed. - -### Image Recognition - -It can be very useful for a mobile app to be able to make sense of a camera -image. If your users are taking photos, recognizing what’s in them can help your -camera apps apply appropriate filters, or label the photos so they’re easily -findable. It’s important for embedded applications too, since you can use image -sensors to detect all sorts of interesting conditions, whether it’s spotting -endangered animals in the wild -or -[reporting how late your train is running](https://svds.com/tensorflow-image-recognition-raspberry-pi/). - -TensorFlow comes with several examples of recognizing the types of objects -inside images along with a variety of different pre-trained models, and they can -all be run on mobile devices. You can try out -our -[Tensorflow for Poets](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/index.html#0) and -[Tensorflow for Poets 2: Optimize for Mobile](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets-2/index.html#0) codelabs to -see how to take a pretrained model and run some very fast and lightweight -training to teach it to recognize specific objects, and then optimize it to -run on mobile. - -### Object Localization - -Sometimes it’s important to know where objects are in an image as well as what -they are. There are lots of augmented reality use cases that could benefit a -mobile app, such as guiding users to the right component when offering them -help fixing their wireless network or providing informative overlays on top of -landscape features. Embedded applications often need to count objects that are -passing by them, whether it’s pests in a field of crops, or people, cars and -bikes going past a street lamp. - -TensorFlow offers a pretrained model for drawing bounding boxes around people -detected in images, together with tracking code to follow them over time. The -tracking is especially important for applications where you’re trying to count -how many objects are present over time, since it gives you a good idea when a -new object enters or leaves the scene. We have some sample code for this -available for Android [on -Github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android), -and also a [more general object detection -model](https://github.com/tensorflow/models/tree/master/object_detection/README.md) -available as well. - -### Gesture Recognition - -It can be useful to be able to control applications with hand or other -gestures, either recognized from images or through analyzing accelerometer -sensor data. Creating those models is beyond the scope of this guide, but -TensorFlow is an effective way of deploying them. - -### Optical Character Recognition - -Google Translate’s live camera view is a great example of how effective -interactive on-device detection of text can be. - -
    - -
    - -There are multiple steps involved in recognizing text in images. You first have -to identify the areas where the text is present, which is a variation on the -object localization problem, and can be solved with similar techniques. Once you -have an area of text, you then need to interpret it as letters, and then use a -language model to help guess what words they represent. The simplest way to -estimate what letters are present is to segment the line of text into individual -letters, and then apply a simple neural network to the bounding box of each. You -can get good results with the kind of models used for MNIST, which you can find -in TensorFlow’s tutorials, though you may want a higher-resolution input. A -more advanced alternative is to use an LSTM model to process a whole line of -text at once, with the model itself handling the segmentation into different -characters. - -### Translation - -Translating from one language to another quickly and accurately, even if you -don’t have a network connection, is an important use case. Deep networks are -very effective at this sort of task, and you can find descriptions of a lot of -different models in the literature. Often these are sequence-to-sequence -recurrent models where you’re able to run a single graph to do the whole -translation, without needing to run separate parsing stages. - -### Text Classification - -If you want to suggest relevant prompts to users based on what they’re typing or -reading, it can be very useful to understand the meaning of the text. This is -where text classification comes in. Text classification is an umbrella term -that covers everything from sentiment analysis to topic discovery. You’re likely -to have your own categories or labels that you want to apply, so the best place -to start is with an example -like -[Skip-Thoughts](https://github.com/tensorflow/models/tree/master/skip_thoughts/), -and then train on your own examples. - -### Voice Synthesis - -A synthesized voice can be a great way of giving users feedback or aiding -accessibility, and recent advances such as -[WaveNet](https://deepmind.com/blog/wavenet-generative-model-raw-audio/) show -that deep learning can offer very natural-sounding speech. - -## How does it fit with the cloud? - -These examples of use cases give an idea of how on-device networks can -complement cloud services. Cloud has a great deal of computing power in a -controlled environment, but running on devices can offer higher interactivity. -In situations where the cloud is unavailable, or your cloud capacity is limited, -you can provide an offline experience, or reduce cloud workload by processing -easy cases on device. - -Doing on-device computation can also signal when it's time to switch to working -on the cloud. A good example of this is hotword detection in speech. Since -devices are able to constantly listen out for the keywords, this then triggers a -lot of traffic to cloud-based speech recognition once one is recognised. Without -the on-device component, the whole application wouldn’t be feasible, and this -pattern exists across several other applications as well. Recognizing that some -sensor input is interesting enough for further processing makes a lot of -interesting products possible. - -## What hardware and software should you have? - -TensorFlow runs on Ubuntu Linux, Windows 10, and OS X. For a list of all -supported operating systems and instructions to install TensorFlow, see -@{$install$Installing Tensorflow}. - -Some of the scripts in this guide require you to compile TensorFlow from source, -so you’ll need more than just `pip install` to work through all the sample code. - -To try out the mobile examples, you’ll need a device set up for development, -using -either [Android Studio](https://developer.android.com/studio/install.html), -or [XCode](https://developer.apple.com/xcode/) if you're developing for iOS. - -## What should you do before you get started? - -Before thinking about how to get your solution on mobile: - -1. Determine whether your problem is solvable by mobile machine learning -2. Create a labelled dataset to define your problem -3. Pick an effective model for the problem - -We'll discuss these in more detail below. - -### Is your problem solvable by mobile machine learning? - -Once you have an idea of the problem you want to solve, you need to make a plan -of how to build your solution. The most important first step is making sure that -your problem is actually solvable, and the best way to do that is to mock it up -using humans in the loop. - -For example, if you want to drive a robot toy car using voice commands, try -recording some audio from the device and listen back to it to see if you can -make sense of what’s being said. Often you’ll find there are problems in the -capture process, such as the motor drowning out speech or not being able to hear -at a distance, and you should tackle these problems before investing in the -modeling process. - -Another example would be giving photos taken from your app to people see if they -can classify what’s in them, in the way you’re looking for. If they can’t do -that (for example, trying to estimate calories in food from photos may be -impossible because all white soups look the same), then you’ll need to redesign -your experience to cope with that. A good rule of thumb is that if a human can’t -handle the task then it will be difficult to train a computer to do better. - -### Create a labelled dataset - -After you’ve solved any fundamental issues with your use case, you need to -create a labeled dataset to define what problem you’re trying to solve. This -step is extremely important, moreso than picking which model to use. You want it -to be as representative as possible of your actual use case, since the model -will only be effective at the task you teach it. It’s also worth investing in -tools to make labeling the data as efficient and accurate as possible. For -example, if you’re able to switch from having to click a button on a web -interface to simple keyboard shortcuts, you may be able to speed up the -generation process a lot. You should also start by doing the initial labeling -yourself, so you can learn about the difficulties and likely errors, and -possibly change your labeling or data capture process to avoid them. Once you -and your team are able to consistently label examples (that is once you -generally agree on the same labels for most examples), you can then try and -capture your knowledge in a manual and teach external raters how to run the same -process. - -### Pick an effective model - -The next step is to pick an effective model to use. You might be able to avoid -training a model from scratch if someone else has already implemented a model -similar to what you need; we have a repository of models implemented in -TensorFlow [on Github](https://github.com/tensorflow/models) that you can look -through. Lean towards the simplest model you can find, and try to get started as -soon as you have even a small amount of labelled data, since you’ll get the best -results when you’re able to iterate quickly. The shorter the time it takes to -try training a model and running it in s real application, the better overall -results you’ll see. It’s common for an algorithm to get great training accuracy -numbers but then fail to be useful within a real application because there’s a -mismatch between the dataset and real usage. Prototype end-to-end usage as soon -as possible to create a consistent user experience. +# Overview + +TensorFlow was designed to be a good deep learning solution for mobile +platforms. Currently we have two solutions for deploying machine learning +applications on mobile and embedded devices: @{$mobile/mobile_intro$TensorFlow +for Mobile} and @{$mobile/tflite$TensorFlow Lite}. + +## TensorFlow Lite versus TensorFlow Mobile + +Here are a few of the differences between the two: + +- TensorFlow Lite is an evolution of TensorFlow Mobile. In most cases, apps + developed with TensorFlow Lite will have a smaller binary size, fewer + dependencies, and better performance. + +- TensorFlow Lite is in developer preview, so not all use cases are covered yet. + We expect you to use TensorFlow Mobile to cover production cases. + +- TensorFlow Lite supports only a limited set of operators, so not all models + will work on it by default. TensorFlow for Mobile has a fuller set of + supported functionality. + +TensorFlow Lite provides better performance and a small binary size on mobile +platforms as well as the ability to leverage hardware acceleration if available +on their platforms. In addition, it has many fewer dependencies so it can be +built and hosted on simpler, more constrained device scenarios. TensorFlow Lite +also allows targeting accelerators through the [Neural Networks +API](https://developer.android.com/ndk/guides/neuralnetworks/index.html). + +TensorFlow Lite currently has coverage for a limited set of operators. While +TensorFlow for Mobile supports only a constrained set of ops by default, in +principle if you use an arbitrary operator in TensorFlow, it can be customized +to build that kernel. Thus use cases which are not currently supported by +TensorFlow Lite should continue to use TensorFlow for Mobile. As TensorFlow Lite +evolves, it will gain additional operators, and the decision will be easier to +make. diff --git a/tensorflow/docs_src/mobile/leftnav_files b/tensorflow/docs_src/mobile/leftnav_files index 347c07d233..4d2c3b6234 100644 --- a/tensorflow/docs_src/mobile/leftnav_files +++ b/tensorflow/docs_src/mobile/leftnav_files @@ -1,8 +1,11 @@ -### TensorFlow for Mobile index.md +### TensorFlow Lite +tflite/index.md +>>> +### TensorFlow Mobile +mobile_intro.md android_build.md ios_build.md -#raspi_build.md until this section gets rewritten, or TFLite takes over linking_libs.md prepare_models.md optimizing.md diff --git a/tensorflow/docs_src/mobile/mobile_intro.md b/tensorflow/docs_src/mobile/mobile_intro.md new file mode 100644 index 0000000000..948563292a --- /dev/null +++ b/tensorflow/docs_src/mobile/mobile_intro.md @@ -0,0 +1,247 @@ +# Introduction to TensorFlow Mobile + +TensorFlow was designed from the ground up to be a good deep learning solution +for mobile platforms like Android and iOS. This mobile guide should help you +understand how machine learning can work on mobile platforms and how to +integrate TensorFlow into your mobile apps effectively and efficiently. + +## About this Guide + +This guide is aimed at developers who have a TensorFlow model that’s +successfully working in a desktop environment, who want to integrate it into +a mobile application, and cannot use TensorFlow Lite. Here are the +main challenges you’ll face during that process: + +- Understanding how to use Tensorflow for mobile. +- Building TensorFlow for your platform. +- Integrating the TensorFlow library into your application. +- Preparing your model file for mobile deployment. +- Optimizing for latency, RAM usage, model file size, and binary size. + +## Common use cases for mobile machine learning + +**Why run TensorFlow on mobile?** + +Traditionally, deep learning has been associated with data centers and giant +clusters of high-powered GPU machines. However, it can be very expensive and +time-consuming to send all of the data a device has access to across a network +connection. Running on mobile makes it possible to deliver very interactive +applications in a way that’s not possible when you have to wait for a network +round trip. + +Here are some common use cases for on-device deep learning: + +### Speech Recognition + +There are a lot of interesting applications that can be built with a +speech-driven interface, and many of these require on-device processing. Most of +the time a user isn’t giving commands, and so streaming audio continuously to a +remote server would be a waste of bandwidth, since it would mostly be silence or +background noises. To solve this problem it’s common to have a small neural +network running on-device @{$tutorials/audio_recognition$listening out for a particular keyword}. +Once that keyword has been spotted, the rest of the +conversation can be transmitted over to the server for further processing if +more computing power is needed. + +### Image Recognition + +It can be very useful for a mobile app to be able to make sense of a camera +image. If your users are taking photos, recognizing what’s in them can help your +camera apps apply appropriate filters, or label the photos so they’re easily +findable. It’s important for embedded applications too, since you can use image +sensors to detect all sorts of interesting conditions, whether it’s spotting +endangered animals in the wild +or +[reporting how late your train is running](https://svds.com/tensorflow-image-recognition-raspberry-pi/). + +TensorFlow comes with several examples of recognizing the types of objects +inside images along with a variety of different pre-trained models, and they can +all be run on mobile devices. You can try out +our +[Tensorflow for Poets](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/index.html#0) and +[Tensorflow for Poets 2: Optimize for Mobile](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets-2/index.html#0) codelabs to +see how to take a pretrained model and run some very fast and lightweight +training to teach it to recognize specific objects, and then optimize it to +run on mobile. + +### Object Localization + +Sometimes it’s important to know where objects are in an image as well as what +they are. There are lots of augmented reality use cases that could benefit a +mobile app, such as guiding users to the right component when offering them +help fixing their wireless network or providing informative overlays on top of +landscape features. Embedded applications often need to count objects that are +passing by them, whether it’s pests in a field of crops, or people, cars and +bikes going past a street lamp. + +TensorFlow offers a pretrained model for drawing bounding boxes around people +detected in images, together with tracking code to follow them over time. The +tracking is especially important for applications where you’re trying to count +how many objects are present over time, since it gives you a good idea when a +new object enters or leaves the scene. We have some sample code for this +available for Android [on +Github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android), +and also a [more general object detection +model](https://github.com/tensorflow/models/tree/master/object_detection/README.md) +available as well. + +### Gesture Recognition + +It can be useful to be able to control applications with hand or other +gestures, either recognized from images or through analyzing accelerometer +sensor data. Creating those models is beyond the scope of this guide, but +TensorFlow is an effective way of deploying them. + +### Optical Character Recognition + +Google Translate’s live camera view is a great example of how effective +interactive on-device detection of text can be. + +
    + +
    + +There are multiple steps involved in recognizing text in images. You first have +to identify the areas where the text is present, which is a variation on the +object localization problem, and can be solved with similar techniques. Once you +have an area of text, you then need to interpret it as letters, and then use a +language model to help guess what words they represent. The simplest way to +estimate what letters are present is to segment the line of text into individual +letters, and then apply a simple neural network to the bounding box of each. You +can get good results with the kind of models used for MNIST, which you can find +in TensorFlow’s tutorials, though you may want a higher-resolution input. A +more advanced alternative is to use an LSTM model to process a whole line of +text at once, with the model itself handling the segmentation into different +characters. + +### Translation + +Translating from one language to another quickly and accurately, even if you +don’t have a network connection, is an important use case. Deep networks are +very effective at this sort of task, and you can find descriptions of a lot of +different models in the literature. Often these are sequence-to-sequence +recurrent models where you’re able to run a single graph to do the whole +translation, without needing to run separate parsing stages. + +### Text Classification + +If you want to suggest relevant prompts to users based on what they’re typing or +reading, it can be very useful to understand the meaning of the text. This is +where text classification comes in. Text classification is an umbrella term +that covers everything from sentiment analysis to topic discovery. You’re likely +to have your own categories or labels that you want to apply, so the best place +to start is with an example +like +[Skip-Thoughts](https://github.com/tensorflow/models/tree/master/skip_thoughts/), +and then train on your own examples. + +### Voice Synthesis + +A synthesized voice can be a great way of giving users feedback or aiding +accessibility, and recent advances such as +[WaveNet](https://deepmind.com/blog/wavenet-generative-model-raw-audio/) show +that deep learning can offer very natural-sounding speech. + +## Mobile machine learning and the cloud + +These examples of use cases give an idea of how on-device networks can +complement cloud services. Cloud has a great deal of computing power in a +controlled environment, but running on devices can offer higher interactivity. +In situations where the cloud is unavailable, or your cloud capacity is limited, +you can provide an offline experience, or reduce cloud workload by processing +easy cases on device. + +Doing on-device computation can also signal when it's time to switch to working +on the cloud. A good example of this is hotword detection in speech. Since +devices are able to constantly listen out for the keywords, this then triggers a +lot of traffic to cloud-based speech recognition once one is recognised. Without +the on-device component, the whole application wouldn’t be feasible, and this +pattern exists across several other applications as well. Recognizing that some +sensor input is interesting enough for further processing makes a lot of +interesting products possible. + +## What hardware and software should you have? + +TensorFlow runs on Ubuntu Linux, Windows 10, and OS X. For a list of all +supported operating systems and instructions to install TensorFlow, see +@{$install$Installing Tensorflow}. + +Note that some of the sample code we provide for mobile TensorFlow requires you +to compile TensorFlow from source, so you’ll need more than just `pip install` +to work through all the sample code. + +To try out the mobile examples, you’ll need a device set up for development, +using +either [Android Studio](https://developer.android.com/studio/install.html), +or [XCode](https://developer.apple.com/xcode/) if you're developing for iOS. + +## What should you do before you get started? + +Before thinking about how to get your solution on mobile: + +1. Determine whether your problem is solvable by mobile machine learning +2. Create a labelled dataset to define your problem +3. Pick an effective model for the problem + +We'll discuss these in more detail below. + +### Is your problem solvable by mobile machine learning? + +Once you have an idea of the problem you want to solve, you need to make a plan +of how to build your solution. The most important first step is making sure that +your problem is actually solvable, and the best way to do that is to mock it up +using humans in the loop. + +For example, if you want to drive a robot toy car using voice commands, try +recording some audio from the device and listen back to it to see if you can +make sense of what’s being said. Often you’ll find there are problems in the +capture process, such as the motor drowning out speech or not being able to hear +at a distance, and you should tackle these problems before investing in the +modeling process. + +Another example would be giving photos taken from your app to people see if they +can classify what’s in them, in the way you’re looking for. If they can’t do +that (for example, trying to estimate calories in food from photos may be +impossible because all white soups look the same), then you’ll need to redesign +your experience to cope with that. A good rule of thumb is that if a human can’t +handle the task then it will be difficult to train a computer to do better. + +### Create a labelled dataset + +After you’ve solved any fundamental issues with your use case, you need to +create a labeled dataset to define what problem you’re trying to solve. This +step is extremely important, moreso than picking which model to use. You want it +to be as representative as possible of your actual use case, since the model +will only be effective at the task you teach it. It’s also worth investing in +tools to make labeling the data as efficient and accurate as possible. For +example, if you’re able to switch from having to click a button on a web +interface to simple keyboard shortcuts, you may be able to speed up the +generation process a lot. You should also start by doing the initial labeling +yourself, so you can learn about the difficulties and likely errors, and +possibly change your labeling or data capture process to avoid them. Once you +and your team are able to consistently label examples (that is once you +generally agree on the same labels for most examples), you can then try and +capture your knowledge in a manual and teach external raters how to run the same +process. + +### Pick an effective model + +The next step is to pick an effective model to use. You might be able to avoid +training a model from scratch if someone else has already implemented a model +similar to what you need; we have a repository of models implemented in +TensorFlow [on Github](https://github.com/tensorflow/models) that you can look +through. Lean towards the simplest model you can find, and try to get started as +soon as you have even a small amount of labelled data, since you’ll get the best +results when you’re able to iterate quickly. The shorter the time it takes to +try training a model and running it in s real application, the better overall +results you’ll see. It’s common for an algorithm to get great training accuracy +numbers but then fail to be useful within a real application because there’s a +mismatch between the dataset and real usage. Prototype end-to-end usage as soon +as possible to create a consistent user experience. + +## Next Steps + +We suggest you get started by building one of our demos for +@{$mobile/android_build$Android} or @{$mobile/ios_build$iOS}. diff --git a/tensorflow/docs_src/mobile/tflite/index.md b/tensorflow/docs_src/mobile/tflite/index.md new file mode 100644 index 0000000000..59daa2fe25 --- /dev/null +++ b/tensorflow/docs_src/mobile/tflite/index.md @@ -0,0 +1,202 @@ +# Introduction to TensorFlow Lite + +TensorFlow Lite is TensorFlow’s lightweight solution for mobile and embedded +devices. It enables on-device machine learning inference with low latency and a +small binary size. TensorFlow Lite also supports hardware acceleration with the +[Android Neural Networks +API](https://developer.android.com/ndk/guides/neuralnetworks/index.html). + +TensorFlow Lite uses many techniques for achieving low latency such as +optimizing the kernels for mobile apps, pre-fused activations, and quantized +kernels that allow smaller and faster (fixed-point math) models. + +Most of our TensorFlow Lite documentation is [on +Github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite) +for the time being. + +## What does TensorFlow Lite contain? + +TensorFlow Lite supports a set of core operators, both quantized and +float, which have been tuned for mobile platforms. They incorporate pre-fused +activations and biases to further enhance performance and quantized +accuracy. Additionally, TensorFlow Lite also supports using custom operations in +models. + +TensorFlow Lite defines a new model file format, based on +[FlatBuffers](https://google.github.io/flatbuffers/). FlatBuffers is an +open-sourced, efficient cross platform serialization library. It is similar to +[protocol buffers](https://developers.google.com/protocol-buffers/?hl=en), but +the primary difference is that FlatBuffers does not need a parsing/unpacking +step to a secondary representation before you can access data, often coupled +with per-object memory allocation. Also, the code footprint of FlatBuffers is an +order of magnitude smaller than protocol buffers. + +TensorFlow Lite has a new mobile-optimized interpreter, which has the key goals +of keeping apps lean and fast. The interpreter uses a static graph ordering and +a custom (less-dynamic) memory allocator to ensure minimal load, initialization, +and execution latency. + +TensorFlow Lite provides an interface to leverage hardware acceleration, if +available on the device. It does so via the Android Neural Networks library, +released as part of Android O-MR1. + +## Why do we need a new mobile-specific library? + +Machine Learning is changing the computing paradigm, and we see an emerging +trend of new use cases on mobile and embedded devices. Consumer expectations are +also trending toward natural, human-like interactions with their devices, driven +by the camera and voice interaction models. + +There are several factors which are fueling interest in this domain: + +- Innovation at the silicon layer is enabling new possibilities for hardware + acceleration, and frameworks such as the Android Neural Networks API make it + easy to leverage these. + +- Recent advances in real-time computer-vision and spoken language understanding + have led to mobile-optimized benchmark models being open sourced + (e.g. MobileNets, SqueezeNet). + +- Widely-available smart appliances create new possibilities for + on-device intelligence. + +- Interest in stronger user data privacy paradigms where user data does not need + to leave the mobile device. + +- Ability to serve ‘offline’ use cases, where the device does not need to be + connected to a network. + +We believe the next wave of machine learning applications will have significant +processing on mobile and embedded devices. + +## TensorFlow Lite developer preview highlights + +TensorFlow Lite is available as a developer preview and includes the +following: + +- A set of core operators, both quantized and float, many of which have been + tuned for mobile platforms. These can be used to create and run custom + models. Developers can also write their own custom operators and use them in + models. + +- A new [FlatBuffers](https://google.github.io/flatbuffers/)-based + model file format. + +- On-device interpreter with kernels optimized for faster execution on mobile. + +- TensorFlow converter to convert TensorFlow-trained models to the TensorFlow + Lite format. + +- Smaller in size: TensorFlow Lite is smaller than 300KB when all supported + operators are linked and less than 200KB when using only the operators needed + for supporting InceptionV3 and Mobilenet. + +- **Pre-tested models:** + + All of the following models are guaranteed to work out of the box: + + - Inception V3, a popular model for detecting the the dominant objects + present in an image. + + - [MobileNets](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md), + a family of mobile-first computer vision models designed to effectively + maximize accuracy while being mindful of the restricted resources for an + on-device or embedded application. They are small, low-latency, low-power + models parameterized to meet the resource constraints of a variety of use + cases. They can be built upon for classification, detection, embeddings + and segmentation. MobileNet models are smaller but [lower in + accuracy](https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html) + than Inception V3. + + - On Device Smart Reply, an on-device model which provides one-touch + replies for an incoming text message by suggesting contextually relevant + messages. The model was built specifically for memory constrained devices + such as watches & phones and it has been successfully used to surface + [Smart Replies on Android + Wear](https://research.googleblog.com/2017/02/on-device-machine-intelligence.html) + to all first-party and third-party apps. + +- Quantized versions of the MobileNet model, which runs faster than the + non-quantized (float) version on CPU. + +- New Android demo app to illustrate the use of TensorFlow Lite with a quantized + MobileNet model for object classification. + +- Java and C++ API support + +Note: This is a developer release, and it’s likely that there will be changes in +the API in upcoming versions. We do not guarantee backward or forward +compatibility with this release. + +## Getting Started + +We recommend you try out TensorFlow Lite with the pre-tested models indicated +above. If you have an existing mode, you will need to test whether your model is +compatible with both the converter and the supported operator set. To test your +model, see the [documentation on +GitHub](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite). + +### Retrain Inception-V3 or MobileNet for a custom data set + +The pre-trained models mentioned above have been trained on the ImageNet data +set, which consists of 1000 predefined classes. If those classes are not +relevant or useful for your use case, you will need to retrain those +models. This technique is called transfer learning, which starts with a model +that has been already trained on a problem and will then be retrained on a +similar problem. Deep learning from scratch can take days, but transfer learning +can be done fairly quickly. In order to do this, you'll need to generate your +custom data set labeled with the relevant classes. + +The [TensorFlow for Poets](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/) +codelab walks through this process step-by-step. The retraining code supports +retraining for both floating point and quantized inference. + +## TensorFlow Lite Architecture + +The following diagram shows the architectural design of TensorFlow Lite: + + + +Starting with a trained TensorFlow model on disk, you'll convert that model to +the TensorFlow Lite file format (`.tflite`) using the TensorFlow Lite +Converter. Then you can use that converted file in your mobile application. + +Deploying the TensorFlow Lite model file uses: + +- Java API: A convenience wrapper around the C++ API on Android. + +- C++ API: Loads the TensorFlow Lite Model File and invokes the Interpreter. The + same library is available on both Android and iOS. + +- Interpreter: Executes the model using a set of kernels. The interpreter + supports selective kernel loading; without kernels it is only 100KB, and 300KB + with all the kernels loaded. This is a significant reduction from the 1.5M + required by TensorFlow Mobile. + +- On select Android devices, the Interpreter will use the Android Neural + Networks API for hardware acceleration, or default to CPU execution if none + are available. + +You can also implement custom kernels using the C++ API that can be used by the +Interpreter. + +## Future Work + +In future releases, TensorFlow Lite will support more models and built-in +operators, contain performance improvements for both fixed point and floating +point models, improvements to the tools to enable easier developer workflows and +support for other smaller devices and more. As we continue development, we hope +that TensorFlow Lite will greatly simplify the developer experience of targeting +a model for small devices. + +Future plans include using specialized machine learning hardware to get the best +possible performance for a particular model on a particular device. + +## Next Steps + +For the developer preview, most of our documentation is on GitHub. Please take a +look at the [TensorFlow Lite +repository](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite) +on GitHub for more information and for code samples, demo applications, and +more. + -- GitLab From f5669d905a28893c71ff44245da6ed5e13d55d1c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Nov 2017 23:14:06 -0800 Subject: [PATCH 0397/1801] In the Grappler arithmetic optimizer, make sure the two new nodes have unique names when hoisting common factors out of aggregates. Fix a few missed optimization opportunities due to control dependencies. Fix a bug in counting the number of inputs for trivial aggregate rewriting. PiperOrigin-RevId: 175639520 --- .../optimizers/arithmetic_optimizer.cc | 97 +++++++---- .../optimizers/arithmetic_optimizer_test.cc | 164 +++++++++++++----- 2 files changed, 190 insertions(+), 71 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index e8ef0e94b5..5cce34e2a6 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -188,7 +188,7 @@ bool IsInnerMatrixTransposeNode(const NodeDef& transpose_node, // Follow a chain (through input(0)) of ops starting at `source->input(0)` as // long as they // 1. preserve the values of their first input, -// 2. have a single output, +// 2. have a single (non-control) output, // 3. are not in nodes_to_preserve. // Returns the last node in the chain satisfying these properties or source // itself if a chain of length zero was found. @@ -200,20 +200,55 @@ NodeDef* GetTailOfValuePreservingChain( const NodeDef* source, const NodeMap* node_map, const std::unordered_set& nodes_to_preserve) { const NodeDef* source_parent = source; - source = node_map->GetNode(source->input(0)); - while (IsValuePreserving(*source) && - node_map->GetOutputs(source->name()).size() == 1 && - // Do not skip over preserved nodes, because folding will change - // the results of these skipped data-reordering nodes. - // TODO(jingyue): A more elegant way is to copy this chain of - // data-reordering nodes and modify only the copy. - !nodes_to_preserve.count(source->name())) { - source_parent = source; + if (!IsControlInput(source->input(0))) { source = node_map->GetNode(source->input(0)); + while (IsValuePreserving(*source) && + node_map->GetOutputs(source->name()).size() == 1 && + // Do not skip over preserved nodes, because folding will change + // the results of these skipped data-reordering nodes. + // TODO(jingyue): A more elegant way is to copy this chain of + // data-reordering nodes and modify only the copy. + !nodes_to_preserve.count(source->name())) { + source_parent = source; + if (IsControlInput(source->input(0))) { + break; + } + source = node_map->GetNode(source->input(0)); + } } return const_cast(source_parent); } +bool MaybeAddControlInput(const string& new_input, NodeDef* node, + GraphDef* graph, NodeMap* node_map) { + bool already_exists = false; + for (const string& input : node->input()) { + if (input == new_input || AsControlDependency(input) == new_input) { + already_exists = true; + break; + } + } + if (!already_exists) { + const string ctrl_dep = + ConstantFolding::AddControlDependency(new_input, graph, node_map); + node->add_input(ctrl_dep); + node_map->AddOutput(NodeName(new_input), node->name()); + } + return !already_exists; +} + +int CopyControlInputs(const NodeDef& from, NodeDef* to, GraphDef* graph, + NodeMap* node_map) { + int num_copied = 0; + for (const string& input : from.input()) { + if (IsControlInput(input) && + MaybeAddControlInput(input, to, graph, node_map)) { + ++num_copied; + } + } + return num_copied; +} + // Returns the data type in attribute `attr_name` of `node`. If that attribute // doesn't exist, returns DT_INVALID. DataType GetDataTypeFromAttr(const NodeDef& node, const string& attr_name) { @@ -848,7 +883,12 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( // Mul(Const(N), x)) // bool all_equal = true; + int num_inputs = 1; for (int i = 1; i < node->input_size(); ++i) { + if (IsControlInput(node->input(i))) { + break; + } + ++num_inputs; if (node->input(i) != node->input(0)) { all_equal = false; break; @@ -856,10 +896,9 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( } if (all_equal && node_map->GetNode(node->name() + "_const") == nullptr) { // 1. Create constant node with value N. - const int N = node->input_size(); const auto type = GetDataTypeFromAttr(*node, "T"); Tensor t(type, TensorShape({})); - Status status = SetTensorValue(type, N, &t); + Status status = SetTensorValue(type, num_inputs, &t); if (!status.ok()) { LOG(WARNING) << "Failed to create const node: " << status.error_message(); @@ -885,6 +924,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( new_mul_node->add_input(node->input(0)); node_map->AddOutput(node->input(0), new_mul_node->name()); + CopyControlInputs(*node, new_mul_node, graph_def, node_map); AddFrameControlDeps(node, {new_const_node, new_mul_node}, node->input(0), {new_const_node}, graph_def, node_map, frame_map); return new_mul_node->name(); @@ -896,11 +936,12 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( // where all the inputs are Mul nodes. This pattern occurs frequently in // regularization terms for the gradients during training. if (node->input_size() > 1 && IsAggregate(*node) && - node_map->GetNode(node->name() + "_hoist") == nullptr) { + node_map->GetNode(node->name() + "_hoist_add") == nullptr) { // Determine the set of common factors if the input nodes are all Mul nodes. std::set common_factors; int i = 0; - while (i < node->input_size() && (i == 0 || !common_factors.empty())) { + while (i < node->input_size() && (i == 0 || !common_factors.empty()) && + !IsControlInput(node->input(i))) { const NodeDef* input = node_map->GetNode(node->input(i)); if (input->op() == "Mul") { std::set factors_i{input->input(0), input->input(1)}; @@ -930,31 +971,34 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( NodeDef* new_mul_node = graph_def->add_node(); NodeDef* new_add_node = graph_def->add_node(); *new_add_node = *node; - new_add_node->set_name(node->name() + "_hoist"); + new_add_node->set_name(node->name() + "_hoist_add"); new_nodes->push_back(new_add_node); node_map->AddNode(new_add_node->name(), new_add_node); for (int i = 0; i < node->input_size(); ++i) { - NodeDef* mul_node = node_map->GetNode(node->input(i)); + const string& input = node->input(i); + if (IsControlInput(input)) { + MaybeAddControlInput(input, new_add_node, graph_def, node_map); + continue; + } + NodeDef* mul_node = node_map->GetNode(input); int unique_factor_index = mul_node->input(0) == common_factor ? 1 : 0; const string unique_factor = mul_node->input(unique_factor_index); new_add_node->set_input(i, unique_factor); // 2. Use a copy of the first Mul node for the outer multiplication. if (i == 0) { *new_mul_node = *mul_node; - new_mul_node->set_name(new_mul_node->name() + "_hoist"); + new_mul_node->set_device(node->device()); + new_mul_node->set_name(node->name() + "_hoist_mul"); new_mul_node->set_input(0, common_factor); new_mul_node->set_input(1, new_add_node->name()); node_map->AddNode(new_mul_node->name(), new_mul_node); } } - // 3. Set the device of the new nodes to that of the common factor "x". - NodeDef* common_factor_node = node_map->GetNode(common_factor); - new_add_node->set_device(common_factor_node->device()); - new_mul_node->set_device(common_factor_node->device()); - // 4. Add frame dependencies that the original node might have had. + // 3. Add frame dependencies that the original node might have had. AddFrameControlDeps(node, {new_add_node, new_mul_node}, common_factor, {new_add_node}, graph_def, node_map, frame_map); + return new_mul_node->name(); } } @@ -1117,15 +1161,11 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps( << consumer->name() << " to " << simplified_tensor; } node_map.UpdateInput(consumer->name(), node->name(), simplified_tensor); - if (!nodes_to_simplify.Exists(consumer)) { - nodes_to_simplify.PushBack(consumer); - } + nodes_to_simplify.PushBack(consumer); } } for (const NodeDef* new_node : new_nodes) { - if (!nodes_to_simplify.Exists(new_node)) { - nodes_to_simplify.PushBack(new_node); - } + nodes_to_simplify.PushBack(new_node); } } return Status::OK(); @@ -1136,7 +1176,6 @@ Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/, GraphDef* optimized_graph) { *optimized_graph = item.graph; nodes_to_preserve_ = item.NodesToPreserve(); - GraphProperties graph_properties(item); TF_RETURN_IF_ERROR(graph_properties.InferStatically()); TF_RETURN_IF_ERROR(graph_properties.AnnotateOutputShapes(optimized_graph)); diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 4fcbb0120e..354a306905 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -164,6 +164,37 @@ TEST_F(ArithmeticOptimizerTest, SimplifyInvolutionsWithChain) { EXPECT_EQ("c", output.node(2).input(0)); } +TEST_F(ArithmeticOptimizerTest, SimplifyInvolutionsWithControlChain) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output c = ops::Const(s.WithOpName("c"), {1.0f, 2.0f}, {1, 2}); + Output recip1 = ops::Reciprocal(s.WithOpName("recip1"), c); + Output id1 = ops::Identity(s.WithOpName("id1"), recip1); + Output squeeze = ops::Squeeze(s.WithOpName("squeeze"), id1); + Output recip2 = ops::Reciprocal( + s.WithOpName("recip2").WithControlDependencies(squeeze), c); + Output id2 = ops::Identity(s.WithOpName("id2"), recip2); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + ArithmeticOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + // The optimizer should be a noop. + EXPECT_EQ(item.graph.node_size(), output.node_size()); + for (int i = 0; i < item.graph.node_size(); ++i) { + const NodeDef& original = item.graph.node(i); + const NodeDef& optimized = output.node(i); + EXPECT_EQ(original.name(), optimized.name()); + EXPECT_EQ(original.op(), optimized.op()); + EXPECT_EQ(original.input_size(), optimized.input_size()); + for (int j = 0; j < original.input_size(); ++j) { + EXPECT_EQ(original.input(j), optimized.input(j)); + } + } +} + TEST_F(ArithmeticOptimizerTest, TrivialSumsSimple) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2}); @@ -185,6 +216,9 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsSimple) { EXPECT_EQ(5, output.node_size()); const NodeDef& new_const = output.node(3); EXPECT_EQ("add_const", new_const.name()); + EXPECT_EQ("^x", new_const.input(0)); + EXPECT_EQ(std::string("\0\0\0@", 4), + new_const.attr().at("value").tensor().tensor_content()); const NodeDef& new_mul = output.node(4); EXPECT_EQ("add_mul", new_mul.name()); EXPECT_EQ("add_const", new_mul.input(0)); @@ -194,6 +228,41 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsSimple) { EXPECT_EQ("add_mul", new_id.input(0)); } +TEST_F(ArithmeticOptimizerTest, TrivialSumsSimpleWithControlDep) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output y = ops::Const(s.WithOpName("y"), {1.0f, 2.0f}, {1, 2}); + Output x = ops::Const(s.WithOpName("x"), {3.0f, 4.0f}, {1, 2}); + Output add = ops::Add(s.WithOpName("add").WithControlDependencies(y), x, x); + Output id = ops::Identity(s.WithOpName("id"), add); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + ArithmeticOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + EXPECT_EQ(6, output.node_size()); + const NodeDef& new_const = output.node(4); + EXPECT_EQ("add_const", new_const.name()); + EXPECT_EQ("^x", new_const.input(0)); + EXPECT_EQ(std::string("\0\0\0@", 4), + new_const.attr().at("value").tensor().tensor_content()); + const NodeDef& new_mul = output.node(5); + EXPECT_EQ("add_mul", new_mul.name()); + EXPECT_EQ("add_const", new_mul.input(0)); + EXPECT_EQ("x", new_mul.input(1)); + EXPECT_EQ("^y", new_mul.input(2)); + const NodeDef& new_id = output.node(3); + EXPECT_EQ("id", new_id.name()); + EXPECT_EQ("add_mul", new_id.input(0)); +} + TEST_F(ArithmeticOptimizerTest, TrivialSumsRepeatedAdd) { // Test case from b/69059093. tensorflow::Scope s = tensorflow::Scope::NewRootScope(); @@ -207,6 +276,13 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsRepeatedAdd) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); + const std::vector devices{ + "/device:CPU:0", "/device:GPU:0", "/device:CPU:0", "/device:GPU:1", + "/device:CPU:0", "/device:CPU:0", "/device:CPU:0", + }; + for (int i = 0; i < item.graph.node_size(); ++i) { + item.graph.mutable_node(i)->set_device(devices[i]); + } ArithmeticOptimizer optimizer; GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); @@ -216,36 +292,48 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsRepeatedAdd) { status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - EXPECT_EQ(11, output.node_size()); - const NodeDef& new_id = output.node(4); - EXPECT_EQ("id", new_id.name()); - EXPECT_EQ("Add_6_mul", new_id.input(0)); - - // Add4 and add5 get deduped, and we rewrite each of the 3 remaining add nodes - // of the form Add(x,x) into Mul(Const(2), x). - const NodeDef& new_add_4_const = output.node(5); - EXPECT_EQ("Add_4_const", new_add_4_const.name()); - EXPECT_EQ("^Add", new_add_4_const.input(0)); - const NodeDef& new_add_4_mul = output.node(6); - EXPECT_EQ("Add_4_mul", new_add_4_mul.name()); - EXPECT_EQ("Add_4_const", new_add_4_mul.input(0)); - EXPECT_EQ("Add_mul", new_add_4_mul.input(1)); - - const NodeDef& new_add_6_const = output.node(7); - EXPECT_EQ("Add_6_const", new_add_6_const.name()); - EXPECT_EQ("^Add_4_mul", new_add_6_const.input(0)); - const NodeDef& new_add_6_mul = output.node(8); - EXPECT_EQ("Add_6_mul", new_add_6_mul.name()); - EXPECT_EQ("Add_6_const", new_add_6_mul.input(0)); - EXPECT_EQ("Add_4_mul", new_add_6_mul.input(1)); - - const NodeDef& new_add_const = output.node(9); - EXPECT_EQ("Add_const", new_add_const.name()); - EXPECT_EQ("^Placeholder", new_add_const.input(0)); - const NodeDef& new_add_mul = output.node(10); - EXPECT_EQ("Add_mul", new_add_mul.name()); - EXPECT_EQ("Add_const", new_add_mul.input(0)); - EXPECT_EQ("Placeholder", new_add_mul.input(1)); + EXPECT_EQ(17, output.node_size()); + // The graph gets optimized to + // Mul(p, + // Add(Add(Const(2), Const(2)), + // Add(Const(2), Const(2)))) + for (const auto& node : output.node()) { + if ("id" == node.name()) { + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("Add_6_hoist_mul", node.input(0)); + } else if ("Add_6_hoist_mul" == node.name()) { + EXPECT_EQ("Mul", node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("Placeholder", node.input(0)); + EXPECT_EQ("Add_6_hoist_add", node.input(1)); + } else if ("Add_6_hoist_add" == node.name()) { + EXPECT_EQ("Add", node.op()); + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("Add_4_hoist_add", node.input(0)); + EXPECT_EQ("Add_5_hoist_add", node.input(1)); + EXPECT_EQ("^Placeholder", node.input(2)); + } else if ("Add_4_hoist_add" == node.name()) { + EXPECT_EQ("Add", node.op()); + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("Add_const", node.input(0)); + EXPECT_EQ("Add_1_const", node.input(1)); + EXPECT_EQ("^Placeholder", node.input(2)); + } else if ("Add_5_hoist_add" == node.name()) { + EXPECT_EQ("Add", node.op()); + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("Add_const", node.input(0)); + EXPECT_EQ("Add_1_const", node.input(1)); + EXPECT_EQ("^Placeholder", node.input(2)); + } else if ("Add_const" == node.name()) { + EXPECT_EQ("Const", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("^Placeholder", node.input(0)); + } else if ("Add_1_const" == node.name()) { + EXPECT_EQ("Const", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("^Placeholder", node.input(0)); + } + } } TEST_F(ArithmeticOptimizerTest, HoistFactor) { @@ -272,16 +360,16 @@ TEST_F(ArithmeticOptimizerTest, HoistFactor) { EXPECT_EQ(9, output.node_size()); const NodeDef& new_add = output.node(8); - EXPECT_EQ("add_hoist", new_add.name()); + EXPECT_EQ("add_hoist_add", new_add.name()); EXPECT_EQ("y1", new_add.input(0)); EXPECT_EQ("y2", new_add.input(1)); const NodeDef& new_mul = output.node(7); - EXPECT_EQ("mul1_hoist", new_mul.name()); + EXPECT_EQ("add_hoist_mul", new_mul.name()); EXPECT_EQ("x", new_mul.input(0)); - EXPECT_EQ("add_hoist", new_mul.input(1)); + EXPECT_EQ("add_hoist_add", new_mul.input(1)); const NodeDef& new_id = output.node(6); EXPECT_EQ("id", new_id.name()); - EXPECT_EQ("mul1_hoist", new_id.input(0)); + EXPECT_EQ("add_hoist_mul", new_id.input(0)); } TEST_F(ArithmeticOptimizerTest, FuseConjAndTranspose) { @@ -463,10 +551,6 @@ TEST_F(ArithmeticOptimizerTest, IdentityReshape) { item.graph = output; TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); - for (const auto& node : output.node()) { - LOG(INFO) << node.DebugString(); - } - EXPECT_EQ(0, std::count_if( output.node().begin(), output.node().end(), [](const NodeDef& node) { return node.op() == "Reshape"; })); @@ -492,10 +576,6 @@ TEST_F(ArithmeticOptimizerTest, NotIdentityReshape) { item.graph = output; TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); - for (const auto& node : output.node()) { - LOG(INFO) << node.DebugString(); - } - EXPECT_EQ(1, std::count_if( output.node().begin(), output.node().end(), [](const NodeDef& node) { return node.op() == "Reshape"; })); -- GitLab From cfa5c082dc2d7f71e26b75eb1829a490c378aeef Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Tue, 14 Nov 2017 00:04:36 -0800 Subject: [PATCH 0398/1801] Fix schema compatibility in oss 175360918 by aselle: Fix schema compatibility test and upgrade test. - Handle python3 unicode idioms - Handle path differences --- tensorflow/contrib/lite/schema/BUILD | 2 +- .../schema/flatbuffer_compatibility_test.cc | 2 +- .../contrib/lite/schema/upgrade_schema.py | 13 ++++++++++--- .../contrib/lite/schema/upgrade_schema_test.py | 17 +++++++++++------ 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/lite/schema/BUILD b/tensorflow/contrib/lite/schema/BUILD index 3e04d6f34f..54167ddd9a 100644 --- a/tensorflow/contrib/lite/schema/BUILD +++ b/tensorflow/contrib/lite/schema/BUILD @@ -16,11 +16,11 @@ py_binary( "schema_v1.fbs", "schema_v2.fbs", "schema_v3.fbs", + "@flatbuffers//:flatc", ], deps = [ "//tensorflow:tensorflow_py", "//tensorflow/python:platform", - "@flatbuffers//:flatc", ], ) diff --git a/tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc b/tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc index 17ee0af8dd..cd46a06f7d 100644 --- a/tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc +++ b/tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc @@ -15,7 +15,7 @@ limitations under the License. #include #include -#include "third_party/flatbuffers/include/flatbuffers/flatc.h" +#include "flatbuffers/flatc.h" #include "tensorflow/core/platform/platform.h" #ifdef PLATFORM_GOOGLE diff --git a/tensorflow/contrib/lite/schema/upgrade_schema.py b/tensorflow/contrib/lite/schema/upgrade_schema.py index 320c7138d2..94f5730be5 100644 --- a/tensorflow/contrib/lite/schema/upgrade_schema.py +++ b/tensorflow/contrib/lite/schema/upgrade_schema.py @@ -77,8 +77,13 @@ class Converter(object): def __init__(self): # TODO(aselle): make this work in the open source version with better # path. - self._flatc_path = resource_loader.get_path_to_datafile( - "../../../../flatbuffers/flatc") + paths_to_try = [ + "../../../../flatbuffers/flatc", # not bazel + "../../../../external/flatbuffers/flatc" # bazel + ] + for p in paths_to_try: + self._flatc_path = resource_loader.get_path_to_datafile(p) + if os.path.exists(self._flatc_path): break def FindSchema(base_name): return resource_loader.get_path_to_datafile("%s" % base_name) @@ -250,7 +255,9 @@ class Converter(object): # Upgrade the operator codes for operator_code in data["operator_codes"]: - if not isinstance(operator_code["builtin_code"], unicode): + # Check if builtin_code is the appropriate string type + # use type("") instead of str or unicode. for py2and3 + if not isinstance(operator_code["builtin_code"], type(u"")): raise ValueError("builtin_code %r is non-string. this usually means" "your model has consistency problems." % (operator_code["builtin_code"])) diff --git a/tensorflow/contrib/lite/schema/upgrade_schema_test.py b/tensorflow/contrib/lite/schema/upgrade_schema_test.py index 475cdb9d8b..754400e888 100644 --- a/tensorflow/contrib/lite/schema/upgrade_schema_test.py +++ b/tensorflow/contrib/lite/schema/upgrade_schema_test.py @@ -263,7 +263,7 @@ class TestSchemaUpgrade(test_util.TensorFlowTestCase): invalid_extension = tempfile.mktemp(suffix=".foo") with self.assertRaisesRegexp(ValueError, "Invalid extension on input"): converter.Convert(invalid_extension, invalid_extension) - with tempfile.NamedTemporaryFile(suffix=".json") as in_json: + with tempfile.NamedTemporaryFile(suffix=".json", mode="w+") as in_json: JsonDumpAndFlush(EMPTY_TEST_SCHEMA_V1, in_json) with self.assertRaisesRegexp(ValueError, "Invalid extension on output"): converter.Convert(in_json.name, invalid_extension) @@ -276,10 +276,13 @@ class TestSchemaUpgrade(test_util.TensorFlowTestCase): data_expected: TFLite model as a dictionary (upgraded). """ converter = upgrade_schema_lib.Converter() - with tempfile.NamedTemporaryFile(suffix=".json") as in_json, \ - tempfile.NamedTemporaryFile(suffix=".json") as out_json, \ - tempfile.NamedTemporaryFile(suffix=".bin") as out_bin, \ - tempfile.NamedTemporaryFile(suffix=".tflite") as out_tflite: + with tempfile.NamedTemporaryFile(suffix=".json", mode="w+") as in_json, \ + tempfile.NamedTemporaryFile( + suffix=".json", mode="w+") as out_json, \ + tempfile.NamedTemporaryFile( + suffix=".bin", mode="w+b") as out_bin, \ + tempfile.NamedTemporaryFile( + suffix=".tflite", mode="w+b") as out_tflite: JsonDumpAndFlush(data_old, in_json) # Test JSON output converter.Convert(in_json.name, out_json.name) @@ -287,7 +290,9 @@ class TestSchemaUpgrade(test_util.TensorFlowTestCase): # Convert to .tflite and then to .bin and check if binary is equal converter.Convert(in_json.name, out_tflite.name) converter.Convert(out_tflite.name, out_bin.name) - self.assertEqual(open(out_bin.name).read(), open(out_tflite.name).read()) + self.assertEqual( + open(out_bin.name, "rb").read(), + open(out_tflite.name, "rb").read()) # Test that conversion actually produced successful new json. converted_schema = json.load(out_json) self.assertEqual(converted_schema, data_expected) -- GitLab From cedb85f2cbda30b9dada94930af9ba40bbbdcf86 Mon Sep 17 00:00:00 2001 From: TTrapper Date: Tue, 14 Nov 2017 12:41:15 -0400 Subject: [PATCH 0399/1801] Removing labels_as_indices logic from _compute_sampled_logits. Now computing 0-index labels in sampled_sparse_softmax_loss. --- .../contrib/nn/python/ops/sampling_ops.py | 7 ++-- tensorflow/python/ops/nn_impl.py | 33 +++++++------------ 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/tensorflow/contrib/nn/python/ops/sampling_ops.py b/tensorflow/contrib/nn/python/ops/sampling_ops.py index b26da52f01..02aa1efc5a 100644 --- a/tensorflow/contrib/nn/python/ops/sampling_ops.py +++ b/tensorflow/contrib/nn/python/ops/sampling_ops.py @@ -318,7 +318,7 @@ def sampled_sparse_softmax_loss(weights, A `batch_size` 1-D tensor of per-example sampled softmax losses. """ - logits, labels = nn_impl._compute_sampled_logits( + logits, _ = nn_impl._compute_sampled_logits( weights=weights, biases=biases, labels=labels, @@ -330,9 +330,12 @@ def sampled_sparse_softmax_loss(weights, subtract_log_q=True, remove_accidental_hits=remove_accidental_hits, partition_strategy=partition_strategy, - labels_as_indices=True, name=name) + # There is only one true label. _compute_sampled_logits puts the true logit + # at index 0. + labels = tf.zeros([array_ops.shape(logits)[0], 1], dtype=dtypes.int64) + sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits( labels=array_ops.squeeze(labels), logits=logits) # sampled_losses is a [batch_size] tensor. diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 8e64259143..2bf5514c64 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -894,7 +894,6 @@ def _compute_sampled_logits(weights, subtract_log_q=True, remove_accidental_hits=False, partition_strategy="mod", - labels_as_indices=False, name=None): """Helper function for nce_loss and sampled_softmax_loss functions. @@ -932,18 +931,13 @@ def _compute_sampled_logits(weights, partition_strategy: A string specifying the partitioning strategy, relevant if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. - labels_as_indices: A `bool`. Whether the returned labels represent the - indices of the true classes. Default is `False`. name: A name for the operation (optional). Returns: out_logits: `Tensor` object with shape `[batch_size, num_true + num_sampled]`, for passing to either `nn.sigmoid_cross_entropy_with_logits` (NCE) or `nn.softmax_cross_entropy_with_logits` (sampled softmax). - out_labels: If `labels_as_indices` is `False`, a Tensor object with the same - shape as `out_logits`. Otherwise a `Tensor` of shape - `[batch_size, num_true]` with the indices of the target classes for each - row of `out_logits`. + out_labels: A Tensor object with the same shape as `out_logits`. """ if isinstance(weights, variables.PartitionedVariable): @@ -1054,21 +1048,16 @@ def _compute_sampled_logits(weights, # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat([true_logits, sampled_logits], 1) - if labels_as_indices: - # We want each row of labels to be the indices of the targets, which - # start at col 0 and end at col num_true-1. - out_labels = gen_array_ops.tile( - [math_ops.range(num_true)], [array_ops.shape(true_logits)[0], 1]) - else: - # true_logits is a float tensor, ones_like(true_logits) is a float - # tensor of ones. We then divide by num_true to ensure the per-example - # labels sum to 1.0, i.e. form a proper probability distribution. - out_labels = array_ops.concat([ - array_ops.ones_like(true_logits) / num_true, - array_ops.zeros_like(sampled_logits) - ], 1) - - return out_logits, out_labels + + # true_logits is a float tensor, ones_like(true_logits) is a float + # tensor of ones. We then divide by num_true to ensure the per-example + # labels sum to 1.0, i.e. form a proper probability distribution. + out_labels = array_ops.concat([ + array_ops.ones_like(true_logits) / num_true, + array_ops.zeros_like(sampled_logits) + ], 1) + + return out_logits, out_labels def nce_loss(weights, -- GitLab From 7ba5810c105640f218993d989142d7e91da6703e Mon Sep 17 00:00:00 2001 From: TTrapper Date: Tue, 14 Nov 2017 13:48:29 -0400 Subject: [PATCH 0400/1801] calling array_ops instead of erroneus tf --- tensorflow/contrib/nn/python/ops/sampling_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/nn/python/ops/sampling_ops.py b/tensorflow/contrib/nn/python/ops/sampling_ops.py index 02aa1efc5a..ca719ccaf3 100644 --- a/tensorflow/contrib/nn/python/ops/sampling_ops.py +++ b/tensorflow/contrib/nn/python/ops/sampling_ops.py @@ -334,7 +334,7 @@ def sampled_sparse_softmax_loss(weights, # There is only one true label. _compute_sampled_logits puts the true logit # at index 0. - labels = tf.zeros([array_ops.shape(logits)[0], 1], dtype=dtypes.int64) + labels = array_ops.zeros([array_ops.shape(logits)[0], 1], dtype=dtypes.int64) sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits( labels=array_ops.squeeze(labels), logits=logits) -- GitLab From c674e27bfd68a6c990e694b6afd901bfeeaa006d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Nov 2017 10:54:28 -0800 Subject: [PATCH 0401/1801] Merged commit includes the following changes: 175703479 by yifeif: Internal change. -- 175695370 by A. Unique TensorFlower: Implements _shared_embedding_columns and adds some tests. -- 175695349 by A. Unique TensorFlower: Implements tf.metrics.true_negatives, adds missing tests and does some cleanup in tf.contrib.metrics. -- PiperOrigin-RevId: 175703479 --- tensorflow/contrib/lite/README.md | 200 +++++++ .../lite/g3doc/TFLite-Architecture.jpg | Bin 0 -> 48710 bytes tensorflow/contrib/lite/g3doc/apis.md | 359 ++++++++++++ .../contrib/lite/g3doc/custom_operators.md | 91 ++++ tensorflow/contrib/lite/g3doc/ios.md | 67 +++ tensorflow/contrib/lite/g3doc/models.md | 22 + .../lite/g3doc/tf_ops_compatibility.md | 417 ++++++++++++++ tensorflow/contrib/lite/java/demo/README.md | 36 ++ .../lite/models/smartreply/g3doc/README.md | 146 +++++ .../lite/models/testdata/g3doc/README.md | 102 ++++ .../lite/models/testdata/g3doc/asr_am.svg | 4 + .../lite/models/testdata/g3doc/hotword.svg | 4 + .../lite/models/testdata/g3doc/speakerid.svg | 4 + .../lite/models/testdata/g3doc/tts.svg | 4 + .../models/testdata/smartreply_samples.tsv | 50 ++ tensorflow/contrib/lite/nnapi/README.md | 15 + tensorflow/contrib/lite/toco/README.md | 26 + .../lite/toco/g3doc/cmdline_examples.md | 509 ++++++++++++++++++ .../lite/toco/g3doc/cmdline_reference.md | 238 ++++++++ .../contrib/lite/toco/g3doc/python_api.md | 62 +++ .../contrib/metrics/python/ops/metric_ops.py | 191 ++----- .../python/feature_column/feature_column.py | 224 +++++++- .../feature_column/feature_column_test.py | 267 +++++++++ .../python/kernel_tests/metrics_test.py | 200 +++++++ tensorflow/python/ops/metrics_impl.py | 50 ++ .../tools/api/golden/tensorflow.metrics.pbtxt | 4 + 26 files changed, 3118 insertions(+), 174 deletions(-) create mode 100644 tensorflow/contrib/lite/README.md create mode 100644 tensorflow/contrib/lite/g3doc/TFLite-Architecture.jpg create mode 100644 tensorflow/contrib/lite/g3doc/apis.md create mode 100644 tensorflow/contrib/lite/g3doc/custom_operators.md create mode 100644 tensorflow/contrib/lite/g3doc/ios.md create mode 100644 tensorflow/contrib/lite/g3doc/models.md create mode 100644 tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md create mode 100644 tensorflow/contrib/lite/java/demo/README.md create mode 100644 tensorflow/contrib/lite/models/smartreply/g3doc/README.md create mode 100644 tensorflow/contrib/lite/models/testdata/g3doc/README.md create mode 100644 tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg create mode 100755 tensorflow/contrib/lite/models/testdata/g3doc/hotword.svg create mode 100755 tensorflow/contrib/lite/models/testdata/g3doc/speakerid.svg create mode 100755 tensorflow/contrib/lite/models/testdata/g3doc/tts.svg create mode 100644 tensorflow/contrib/lite/models/testdata/smartreply_samples.tsv create mode 100644 tensorflow/contrib/lite/nnapi/README.md create mode 100644 tensorflow/contrib/lite/toco/README.md create mode 100644 tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md create mode 100644 tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md create mode 100644 tensorflow/contrib/lite/toco/g3doc/python_api.md diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md new file mode 100644 index 0000000000..b173936f5b --- /dev/null +++ b/tensorflow/contrib/lite/README.md @@ -0,0 +1,200 @@ +# TensorFlow Lite +TensorFlow Lite is TensorFlow’s lightweight solution for mobile and embedded devices. It enables low-latency inference of on-device machine learning models with a small binary size and fast performance supporting hardware acceleration. + +TensorFlow Lite uses many techniques for achieving low latency like optimizing the kernels for specific mobile apps, pre-fused activations, quantized kernels that allow smaller and faster (fixed-point math) models, and in the future, leverage specialized machine learning hardware to get the best possible performance for a particular model on a particular device. + +![image](g3doc/TFLite-Architecture.jpg) +# Getting Started with a Demo App + +This section contains an example application using TensorFlow Lite for Android devices. The demo is a sample camera app that classifies images continuously using a quantized Mobilenet model. A device running Android 5.0 ( API 21) or higher is required to run the demo. + +There are 3 ways to get the demo app to your device + - Download the prebuilt binary or + - Use Android Studio to build the application or + - Download the source code for TensorFlow Lite and the demo and build it using bazel + +## Description +In the demo app, inference is done using the TensorFlow Lite Java API. The demo app classifies frames in real-time, displaying the top most probable classifications. It also displays the time taken to detect the object. + +## Downloading the pre-built binary +The fastest path to trying the demo, is to download the pre-built binary +[TfLiteCameraDemo.apk](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk) + +Once the apk is installed, click the app icon to start the app. The first-time the app is opened, the app asks for runtime permissions to access the device camera. The demo app opens the back-camera of the device and recognizes the objects in the camera’s field of view. At the bottom of the image (or at the left of the image if the device is in landscape mode), it shows the latency of classification and the top three objects classified. + +## Building in Android Studio using TensorFlow Lite AAR from JCenter +The simplest way to compile the demo app, and try out changes to the project code is to use AndroidStudio. + + - Install the latest version of Android Studio 3 as specified [here](https://developer.android.com/studio/index.html). + - Make sure the Android SDK version is greater than 26 and NDK version is greater than 14 (in the Android Studio Settings). + - Import the tensorflow/contrib/lite/java/demo directory as a new Android Studio project. + - Click through installing all the Gradle extensions it requests. + - Download the quantized Mobilenet TensorFlow Lite model from [here](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip) + - unzip and copy mobilenet_quant_v1_224.tflite to the assets directory: + tensorflow/contrib/lite/java/demo/app/src/main/assets/ + - Build and run the demo app + +## Building TensorFlow Lite and the demo app from source + +### Clone the TensorFlow repo +- git clone + [https://github.com/tensorflow/tensorflow](https://github.com/tensorflow/tensorflow) + +### Install Bazel +If bazel is not installed on your system, install it now by following [these directions](https://bazel.build/versions/master/docs/install.html) + +NOTE: Bazel does not currently support building for Android on Windows. Full support for gradle/cmake builds is coming soon, but in the meantime Windows users should download the [prebuilt binary](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/demo/TfLiteCameraDemo.apk) instead. + +### Install Android NDK and SDK +Bazel is the primary build system for TensorFlow. Bazel and the Android NDK and SDK must be installed on your system. + - Install the latest version of Bazel as per the instructions on the [Bazel website](https://bazel.build/versions/master/docs/install.html) + - The Android NDK is required to build the native (C/C++) TensorFlow code. The current recommended version is 14b, which may be found [here](https://developer.android.com/tools/revisions/build-tools.html). + - The Android SDK and build tools may be obtained [here](https://developer.android.com/tools/revisions/build-tools.html), or alternatively as part of [Android Studio](https://developer.android.com/studio/index.html). Build tools API >= 23 is required to build the TensorFlow Android demo (though it will run on API >= 21 devices). + + - The Android NDK is required to build the native (C/C++) TensorFlow Lite code. The current recommended version is 14b, which can be found [here](https://developer.android.com/ndk/downloads/older_releases.html#ndk-14b-downloads). + + - The Android SDK and build tools may be obtained [here](https://developer.android.com/tools/revisions/build-tools.html), or alternatively as part of [Android Studio](https://developer.android.com/studio/index.html). Build tools API >= 23 is required to build the TF Android demo (though it will run on API >= 21 devices). + - In the root of the TensorFlow repository update the `WORKSPACE` file with the `api_level` and location of the SDK and NDK. If you installed it with AndroidStudio the SDK path can be found in the SDK manager, and the default NDK path is:`{SDK path}/ndk-bundle.` + +``` + Android_sdk_repository ( + name = "androidsdk", + api_level = 23, + build_tools_version = "23.0.2", + path = "/home/xxxx/android-sdk-linux/", ) + +android_ndk_repository( + name="androidndk", + path="/home/xxxx/android-ndk-r10e/", + api_level=19) + +``` +Additional details on building with Android can be found [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/README.md) + +### Build the source code +Run bazel with the following command to build the demo. + +Build the demo app: +bazel build --cxxopt='--std=c++11' //tensorflow/contrib/lite/java/demo/app/src/main:TfLiteCameraDemo + +### More about the demo +The demo is resizing each camera image frame to (224 width * 224 height) to match the quantized Mobilenet model being used. The resized image is converted into a ByteBuffer row by row of size 1 * 224 * 224 * 3 bytes, where 1 is the number of images in a batch 224 * 224 is the width and height of the image 3 bytes represents three colors of a pixel. This demo uses the TensorFlow Lite Java inference API for models which take a single input and provide a single output. This outputs a two-dimensional array, with the first dimension being the category index and the second dimension being the confidence of classification. The Mobilenet model has 1001 unique categories and the app sorts the probabilities of all the categories and displays the top three. The Mobilenet quantized model is bundled within the assets directory of the app. + +# TensorFlow Lite Quick Start + +## Step 1. Decide which GraphDef to use + Depending on the use case, the developer may choose to use one of the popular + open-sourced models such as InceptionV3 or MobileNets, re-train these models + with their own custom data set or even build their own custom model. + +### Using a pre-trained model + +[MobileNets](https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html) is a family of mobile-first computer vision models for [TensorFlow](https://www.tensorflow.org/) designed to effectively maximize accuracy while being mindful of the restricted resources for an on-device or embedded application. MobileNets are small, low-latency, low-power models parameterized to meet the resource constraints of a variety of use cases. They can be built upon for classification, detection, embeddings and segmentation similar to how other popular large scale models, such as [Inception](https://arxiv.org/pdf/1602.07261.pdf), are used. Google provides 16 pre-trained [ImageNet](http://www.image-net.org/challenges/LSVRC/) classification checkpoints for MobileNets for use in mobile projects of all sizes. + +[Inception-v3](https://arxiv.org/abs/1512.00567) is an image recognition model which achieves fairly high accuracy in recognizing general objects with 1000 classes, like "Zebra", "Dalmatian", and "Dishwasher". The model extracts general features from input images using a convolutional neural network and classifies them based on those features with fully-connected and softmax layers. + +[On Device Smart Reply](https://research.googleblog.com/2017/02/on-device-machine-intelligence.html) is an on-device model which provides one-touch replies for an incoming text message by suggesting contextually relevant messages. The model is built specifically for memory constrained devices such as watches & phones and it has been successfully used to surface [Smart Replies on Android Wear](https://research.googleblog.com/2017/02/on-device-machine-intelligence.html). Note that this model only works on Android as of now. + +These pre-trained models can be downloaded from [here](models.md). + +### Retrain Inception-V3 or MobileNet for a custom data set +The above pre-trained models have been trained on the ImageNet data set, which consists of 1000 predefined classes. A model will need to be re-trained if these classes are not relevant or useful for a given use case. This technique is called transfer learning, which starts with a model that has been already trained on a problem and will then be retrained on a similar problem. Deep learning from scratch can take days, but transfer learning can be done fairly quickly. In order to do this, a developer will need to generate their custom data set labeled with the relevant classes. + +The [TensorFlow for Poets](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/) codelab walks through this process step-by-step. The retraining code supports retraining for both floating point and quantized inference. + + +### Train a custom model +A developer may choose to train a custom model using Tensorflow. TensorFlow documentation has [several tutorials](https://www.tensorflow.org/tutorials/) for building and training models. If the user has written a model using TensorFlow’s Slim Framework the first step is to export this to a GraphDef file. This is necessary because Slim does not store the model structure outside the code, so to communicate with other parts of the framework it needs to be exported. Documentation for the export can be found [here](https://github.com/tensorflow/models/tree/master/research/slim#Export). The output of this step will be a .pb file for the custom model. + +TensorFlow Lite currently supports a subset of TensorFlow operators. Please refer to [this document](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md) for details of supported operators and their usage. This +set will continue to expand in future releases of Tensorflow Lite. + + +## Step 2. Model format conversion + +The model generated in Step 1 is a standard Tensorflow model. After the completion of Step 1 a user should have a standard .pb or .pbtxt GraphDef file. If the application developer is using a pre-trained model (as defined in Step 1 above), they can download a ready to use, already converted model for use from [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/models.md). Models generated using retraining (aka transfer learning) or custom models will need to be converted using the steps mentioned below. + +A prerequisite to converting the model to the Tensorflow Lite format is to freeze the graph. + +Since we employ several formats, the following definitions may be useful: + - GraphDef (.pb) - a protobuf that represents the TensorFlow training and or computation graph. This contains operators, tensors, and variables definitions. + + - CheckPoint (.ckpt) - Serialized variables from a TensorFlow graph. Note, this does not contain the graph structure, so alone it cannot typically be interpreted. + + - FrozenGraphDef - a subclass of GraphDef that contains no variables. A GraphDef can be converted to a frozen graphdef by taking a checkpoint and a graphdef and converting every variable into a constant with the value looked up in the checkpoint. + + - SavedModel - A collection of GraphDef and CheckPoint together with a signature that labels input and output arguments to a model. A GraphDef and Checkpoint can be extracted from a saved model. + + - TensorFlow lite model (.lite) - a serialized flatbuffer, containing TensorFlow lite operators and Tensors for the TensorFlow lite interpreter. This is most analogous to TensorFlow frozen GraphDefs. + +### Freeze Graph +To use this .pb GraphDef file within TensorFlow Lite, the application developer will need checkpoints containing trained weight parameters. The .pb contains only the structure of the graph. The process of merging the checkpoint values with the graph structure is known as “freezing” the graph. + +The developer should know where the checkpoints folder is present or checkpoints can also be downloaded for a pre-trained model (Example: Here is a link to the [MobileNets](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md) + +Graph freezing can be done using the command below (and modifying the arguments appropriately) + +``` +bazel build tensorflow/python/tools:freeze_graph + +bazel-bin/tensorflow/python/tools/freeze_graph\ + --input_graph=/tmp/mobilenet_v1_224.pb \ + --input_checkpoint=/tmp/checkpoints/mobilenet-10202.ckpt \ + --input_binary=true --output_graph=/tmp/frozen_mobilenet_v1_224.pb \ + --output_node_names=MobileNet/Predictions/Reshape_1 +``` + +The user has to first build the freeze_graph script using bazel and then run the script. The input_binary flag has to be enabled to ensure that the protobuf is read and written in binary format. The user has to input the .pb and the .ckpt files to freeze the graph The output_node_names may not be obvious outside of the code that built the model. The easiest way to find them is to visualize the graph, either with +graphviz, or [in tensorboard](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets-2/#3). + +This frozen Graphdef is now ready to be converted to flatbuffer format (.lite) for use on Android or iOS. On Android users have the flexibility to use either the float or quantized versions of the frozen graphdef, if available, using the Tensorflow Optimizing Converter tool. + +Here is a sample command line to convert the frozen Graphdef to '.lite' format for The Tensorflow Optimizing Converter supports both float and quantized models, however, different configuration parameters are needed depending on whether a FLOAT or QUANTIZED mode is being used. + +``` +bazel build tensorflow/contrib/lite/toco:toco + +bazel run --config=opt tensorflow/contrib/lite/toco:toco -- \ + --input_file=(pwd)/mobilenet_v1_1.0_224/frozen_graph.pb \ + --input_format=TENSORFLOW_GRAPHDEF --output_format=TFLITE \ + --output_file=/tmp/mobilenet_v1_1.0_224.lite --inference_type=FLOAT \ + --input_type=FLOAT --input_arrays=input \ + --output_arrays=MobilenetV1/Predictions/Reshape_1 --input_shapes=1,224,224,3 +``` + +- The input_file argument should point to the frozen GraphDef file that holds the model architecture. +- The output_file argument should point to where the TensorFlow Lite model file should be generated. +- The input_type and inference_type arguments should be set to FLOAT, unless converted a [quantized](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/) model. +- Setting the input_array, output_array and input_shape arguments are a bit trickier. The easiest way to find these values is to explore the graph in tensorboard . The user should reuse the arguments that were used for specifying the output nodes for inference in the `freeze_graph`step. + +Note, it is also possible to use the Tensorflow Optimizing Converter through protos either from Python or from the command line see the +documentation [here](https://github.com/tensorflow/tensorflow/tree/mastertensorflow/contrib/lite/python:toco_from_protos target) A developer can then integrate the conversion step into their model design workflow to ensure that a model will be easily convertible to a mobile inference graph. For example, + +``` +import tensorflow as tf + +img = tf.placeholder(name="img", dtype=tf.float32, shape=(1, 64, 64, 3)) +val = img + tf.constant([1., 2., 3.]) + tf.constant([1., 4., 4.]) +out = tf.identity(val, name="out") +with tf.Session() as sess: + tflite_model = tf.contrib.lite.toco_convert(sess.graph_def, [img], [out]) + open("converteds_model.tflite", "wb").write(tflite_model) + +``` +For detailed instructions on how to use the Tensorflow Optimizing Converter, please see [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md). + +You may refer to the [Ops compatibility guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tf_ops_compatibility.md) for troubleshooting help. If that doesn’t help, please file an [issue](https://github.com/tensorflow/tensorflow/issues). + +## Step 3. Use the TensorFlow Lite model for inference in a mobile app + +After completion of Step 2 the developer should have a .lite model. + +### For Android +Because Android apps need to be written in Java, and core TensorFlow is in C++, a JNI library is provided to interface between the two. Its interface is aimed only at inference, so it provides the ability to load a graph, set up inputs, and run the model to calculate particular outputs. The full documentation for the set of methods can be seen [here](https://github.com/TensorFlow/TensorFlow/blob/master/TensorFlow/contrib/lite/g3doc/). The demo app is also open sourced on [github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app). + +The [demo app] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app) uses this interface, so it’s a good place to look for example usage. You can also download the prebuilt binary [here](http://download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk). + +Note that you’d need to follow instructions for installing TensorFlow on Android, setting up bazel and Android Studio outlined [here](https://www.tensorflow.org/mobile/android_build). + +### For iOS +Follow the documentation [here](https://github.com/TensorFlow/TensorFlow/blob/master/TensorFlow/contrib/lite/g3doc/ios.md) to get integrate a TFLite model into your app. diff --git a/tensorflow/contrib/lite/g3doc/TFLite-Architecture.jpg b/tensorflow/contrib/lite/g3doc/TFLite-Architecture.jpg new file mode 100644 index 0000000000000000000000000000000000000000..bc83946647c6a923a8a0bd3a041b42e4febe6a31 GIT binary patch literal 48710 zcmex=oIr{vTivovIz$!vMUve7&T5@$f4}C@t|nX z#SbdRNkvVZTw>x9l2WQ_>Kd9_CZ=ZQ7M51dF0O9w9-dyoA)#U65s^{JDXD4c8JStd zC8cHM6_r)ZEv;?s9i3g1CQq3GGAU*RJ2VdF$b$$4{OPfBE|D`;VW$K>lK6U=!0ld(M2vX z6_bamA3_l z2KW9s^}peD{?{gb|3uhd5yr+*!$*TZdX|KG2@sy;|tI(UwiTr}yrfW3cYnj`{8NpZM~>&;QThBzil5(NzQ$BbS-) zR=0j!|EDKP{r7){#|~0c0$mwIsD?vIyT4vGS>3((s`yj6wYM7d1LMr)wyro{u(@{E zgQ>YUjJDZpeLc;+W&ehzSN6M=*#1;^F>D08R+4?;q6RJ}rzTGZz9knpZ27XZ*Z=r3 z|KHB}PcoP+zG_P@5j41ILa^I?UG0IHJwGMgy^8$M**i8xwUzDabaA_=Ik!uNHy69T)>wZuEm%B-i zE@><3njF=TY z^yB(JH(lYs{~126d$l88>OaGYql@bI|1)7+3HCBt!XZ~IM!x^6?j^_NZg=$Gs~lgS z_~-S`Ss_jAvz(ZerX^J~#+j~&+xl<0>64ju$Ltrmwzz zm-*A`#N(^x@iJ8Tcgs#bT<+5o+91>1d?kGD%k5Ho4|`@Ns=WKmV|LAwq59i;$B)VE zANF3>JT*0lf5nM)$q&E(Y1ONL-TzgDarML1()amq$KQF?5oFNVeoorrT$R+br&mhr zMPAKMZ*vPx@;b66G_=-#ZF!tFPmO8ycFS9w$CtSl*)eFD&TKsPnDh3}E{2sWUi>=W zE~>ZhLfN!qKVm-AzFz;~-?m@pnhP#Jek-ujUHQk32iNXCzU7|h`?AG(;(TlA;vZKd zo?L#iao4}|*-1>Y|J-c(jQfu@*sS;b9sg|&)A4_H-GcHPmDlEE{gb)2CUcgB;( zIfC<#ZQPStXj7*=H7HZGOrPrr-&FO6`X{#Pe+%-TY3=X*&+sI5`ro7fLKrW_XPVDC z-d=Jzw@As*dFN@vpPSg17R;($vVGPRvyC1?k@8vF{Mq(Kd;MpaSakj0-u6%HO+P%h zQ{VKTVcq@54zk~7^X!xQ@a1j2`JCjm$qJJvI2?F>Z^!;^*>>7j&KPKnS{|$A zFuh$-94obL!gb-dk%>Q3kNeF$TWlUD|1tKBn)K?a({!F^pFLKuwR=fMRNLf5uU;1j z8y%j)aq|WLj|*&Si`QkvDP3h|~ zR;eDd=LulVFx8DdbZh<1cPizs$~8H+r?#(H@7t8Jqsel@Q(0%`H`(Xj{ZW6U{nBN7 zkh{)|1L@P}gqJUg_D)}NJNkUTk;S2Hd{qIgw?I7vN-?Zf^$a}!Z;}6Z|4$3J>iPaN zJdwHo-u_>U_O^BE5AXkJp8fave})UKWmgyiT}4oEIQQfDpAuXAzyD`AwtLmXn(Y4! z%3_`W{xkfzz_wH*(1DdoSi(;Ccg`-ytsA#b3ZJ=c-@!ThRYyLQx2HF~s-Ej~W!Y7( zZmqBCE7v{iVrYI-w*T{6{=dIP5C7PGc)C$mcJcCCCY?$(k4%3rssFk$_OA%ziAt-f z*Bh5?o^>g)T!KSjy$+*<(&`Frul*aYq*t$wObqL1*m2A+d&i;%e*F8QW;9b1no->#6 ziH{FMt0o`K40MnYZ#J3hbUboW^lHz)494V3Cg<=VAkZ%J3isYO=d z3okl|Y*`U!D|JmWWc3=YHOp5#+VkrB;akThuaaDw`*dR9cUK+f@;Zz4L5ms;OhWcu zHSz8!-EqHU$vVlhxjX(dNIiM?M}N(W>-RG*?>QOj7Zl-thJB&@lklY%9pnnW%Nbl* z{M)eaVQ3pChw|e)A7iIACYC2IS-SVjqfGX^-QmA?zK_}xz&rEqC9(%Dg9ZajUd$rP|KT zvox9v?wmeaB`U&rpyIg475%N}t*)JmO#XBwFZ9HD)9Wt^PUbI)JgOx!QHgQNSGFx0 z3wYuMUvRFA(g@=$l=I1}s^u*#`?gQxg6!h$Z3Ro7v;@4U{e69Tpo3g>e(IyWao;4< zCuNuRZfCDih+ovV^#zAya=`Io#*@dRN>MWmdIlnv4{!6V@`B}AhG4YjPN6+=*2Qde zA+>i*?;_{i22+hiT?|yk>&lNswO*UM#8r36F-^II*7lf$`7dLBc|Ouq4wpK}*j9gD zv^U39OVg_F*p?L`t3PPZYjv&sR^$4g!Ey4xdajrs-T#`^|NIj@|95HYzvBEAkL;&> z;nF_;`H}uFp821D1kM+%zmW2uVg8~1w>tWt+piSLpQ>UNeg5;K_+K{vpMNyYckF** zRDb@V`?qQ0KM%jM=znU<)b;s4!;cmJ86XmWLh8Rr+CTrW__sUvpNC&%+&|50bgldc zcY-m@s=td{{}ue|Q~v4iuxQVJ20Nwy3=2;FXRvRTgjx3Q;-UWx7Panzf0i!@jH`#c ztq1B0k^0QU`Z;!sJN^Y<&{%H|bDJC7F8eJ7`_IV-9;w%SAre0S`H^^t%Mez{-?E5* z&QGOK_yc#=QFmx~qdGwNkpll8KmMP8w9^k(O?|g8w@hE%*rbSINncgimqL@(@i%wh zIJTB+%0AQN6^pvGixn1iX)sU&uXz5SAyEDI@}D77W&Sg~Qu)tN@}EJAVflym{~XW$ z`~08bh12e=M|Gg;*>eBA`oA3ExtLWSAMKt=l{Jm z*YRjb%&8aWwxm4x8Qc4NK3`OnfZCO-+&m2@d;V?h%E>Di*!F4DSN6mwRoC8k|7W-! zUB4si-sTnOm%qH0oflHs*>>uA57*c1#1%WwMo)FSon1XIR>w6}!uqpi`6o+{w|Q#5 zA03rfyj$h5dRJ`g-q&Zg3wcU9P7tl1TN*esci&Nw&7wMmPY;Tps_lHy|taSn_PPj?sbNs=X8UN+j*- z$V=P&p{Cg3b_$P!#W}m=aFvMneiKa(wWa^;tDd{_U+MI3DXL3S*Y&?Q{dZwb>{D6O z_`>7T-c$R}WLL7(-_t7e?c!X=DRAhM;PtG@UGG(Pdc0rKLdOtvqWc~F;q;5?kV1KT{|YS*CKfBY1Y(~vV?TaOM7;2Sk$H6@&-H>tFfp- zaNo;c!FBTg8CLB7&#b=u#xKh>c5$NE3RgyQ-={~3M>th+U6#z0&dissq> zXPBTO|Mi3Y7qL~>{xf`hEB?FrXXt$S{|qOD_<#M7|HU1eUH{`-_uqqmRv$QTCn0w( za8IYkYB8rPcN#0sDIT}By6#}oEAsN$?=w3;u?Nqq*9q@;X16-IW9#y=s?v>ti{7f7 zi|e{L*+2U9q+KCJr#a#xj~Uc$W!aVcM4Xdlg3*cRT(LDU4N!)%Yvy2eT&w_ zM8rm&+UU!?dOMfloWEM-7fmG|evZ2Ot#O`#|5K;MVZU5o@NL!BKAGiMUSC{u>bg+A zmVpIR_gbfCUzPhR%hx1x8L(-7`xW~3j#7j|x>MoAu4|hvtaY1jdTc>z@1rCcMH$9e zlNCwX@ujt;maoDN2YILG9p(7rxYCuQH_t+rGr-p%dsf5DrNV#SER1*)=l(Q2dHY4x zT}{@R>SdK8qJ@n;p98XX<=stvo3l{;DT~(UsB;o44$t)ab=$pI!=LBrsfT&H7EQdO z*W0n#=-&gGx3_GIuTAsydKZz{xy)Me`1A(GnCi>#N@l-y;Wyo_HP84;klplZnTswL zWIz2qIPq+;X=jx6x7VMn!mrL-b-yv;M|-egW!mFy=DCt9a=upI7C0o?C-J0BQu6CE zz3Ek-y@d|ke)RjXa#wEat~;*;4yrlrPI$aFXrAgh^St*i-ffbuH?{MQ$G)0A{o!u!iam9P~ z)ElouS8mUK<>*=xW!I>d6ORRD2bc+nOu7Y^9FDx+@X~{%EPT&B^@j zzHFP8MTgg-<+3F^=iD#(tGS>5Kf{&y{|wh1|7M>3?tf|ahyM(JUH>!uEs?*rcTV@) zKmEq$x75>JMZz!4Qy{X2LFsK_(=)^xhEHvMfN8O~2S@1L?ixa0As_0qi2H;dJ_huQl0>SrxZ-@NnM z3u$SG(%#4=cWzHJyY>9zuaL{9{J&M%ZaMk;;nt-I{*TXP_cpzXPQTByR=R3+(p=TA zXD;bXiaBv^f5|jUd9N>;&%8_?%Y`pIY{OhCvvW&&jGo$mhL)`hKgS$>`uFUek}r$y zoLX5kKi%j+T3%xPoZ^=;4SRM7SKiN4 zyYKGb6Lk7^l(R5F zvmW^yMb5obqI2`8*R83Kk3O}%vu4II?pf>KYTWy?HoHDPu+}MTyV0%l{}``u_bWT> zn|1Zuqp%z9zpu=SeZ0Kwo!%0rbe<#a|Mcc&PZBp;qb6wF(I0a6;2+JPXDJ;=HhU=B z23Ruu6P^F9JIL>>cJ#B%udJsYmwerS;#PCm@4BU_@;4LPa}I85o*aE)%D=n&0~}@? zn_c|H^x%>k}LgimKmwX3W|1Nz(n}bjv@lG}44xa!t#NRwpEzzq8hsP3?+x zUJ@@GIy;k{$?fFJu$(-_>7LuKs6KtWIPu40_MhR>wtvrFYMuA+3G-%iQK`E=8zsWh z1?5&qytQx5mN`B3rP$S7?@~T}S6wsp{pF3fIbOUrQ4HJ;J#qEps?XvQRyJV}|=d_8;|>#Ba7&yEtcEQ#golJHOW3)r{jy}Dui zz1WwVOAhV&&+v8Shr2qL?mM-AFMA=t__}%i=eIHHxAkn^Tfa5>Ipf&h=k{}zV$Vw^ zO&8Mp7tdFhzqa&0L)PoM@az8=Pye0uHZTA6%BzBf`fa+?eRWh=@&)I}*I!#dy;uEm z(Az0@a}_4P-ah}@jt>2Gy2VfYJ~m&^4p!MQDYNFLYFhn!v1bd@0;5*!e!1M&TI2)o z?qcn%KVPl0HNLW5*SlBrpCL{8$;qF!8#HsyRG~MQxudWKN2_>#Le@w{C0UnMbp{bk~;O5472pn|0m)z?-GBOOMU!&(}%u{IsP- z_`=t9-i_z>laP-d}ng& z#uNL8jNAS*ENDIdMB_h03)Ag?P2%t=)74bgq<(~9$^U@-uQsegsd}3d#X|26!vAFuF zu6>IC878P6zff-&BU(~5z2o7&4b$!_O!R&{$zA5UWt^DR5-nc~>-phR16I0C?sd&| z+TG7ne?rv$e*W_q+Y%}N4#mrB#O^+r*pXDkaol7>sIuSECvVzk1d&nxkFvc zm)|%2V^@Cjo{D|R&foXFcE-I)S+cHw@*Ek})4GScG*+ol+V{eze`xi~L_(`?|Bkid zw|bY_W%HJG_k6E&{~21=yZmRE|K&f!KK~y9Y-dG#|1)sLb^d3tfBB!GX8wo8|Ah7m z|Es_J@2CAk2WgWq^FOLp>VN(-EdTqd{!sr9uP^>T|1kox5sz3j${=WJjP~k_s#olhLw)uw-evi4S zCcXNI_3!=<>q46NI1b9L-iGRdFtM$b3+}P)*tV}X{goRB|F6>Z=HkuIzQ24|y|XU! zbXMuSxsu(ndYum+tKFV`^-!rte|>S}FVl1Fd9Fv_zDW!Go_tWT_H0Q!vM=VSalgIt zyZel>_r1B|pUoH;7}yw=uE|I7x;(OmuO)UhHy3T(kK}t~pEIX*9Yb|>AZm~=;GLx{ zjTG$e$U)vy8I)zOpz|=XqqPpR{Z*LliYe>0o9Iz0tZFhyM%{WFP|* zYgU-77HcvyJK-7NoAm?xFkYNoX;49rWo&=6R!iE`GwZ_to!s!BVSdQs-)e{d)h4f= z65jZA*8Ow;zE#H^n*M41j-`JmC;n%!Tj~DIxBbs=_t>!R>YPi_H%~5iR7^WKaf7i=Eg7q@8uXEi2Q%%4X)(*qiRTz^LUZaz->IuG!S?y!7d_<=^{nC0gz* z`Q4Wf2^C1-1nv6I@Wbuj@j~IV#lroj?9;cuXF3ozZ9!*tfzW;Dwi4}02ifCK?MiGt zTC_cAaoE@Ti7g*3>QWZ6ynnIZ`22r{O*$UuZwmA;TmlL5h}XLsr{(eo7w1fn+5Yap z@BJEUPUrKU*4As}q}3*itsS0(O`Dy~GEZ{%%!j>8f^H-&nE2zOpT$hSkUO_BI^=%6=f8D8oCXL!Bf-`00ewyN9J zrPuZ=n=Y4F@oBZ}T#>-IkB_M}ugeM>&Rha7OafT#kymqt3!@DFW6V-%BQ58Oy^1{c zpTQhNt))j$bNGUf|7ZBb()@29`@g2EQTPAZF8a?Q3dL zB~vZeNNKN|rxmdL#PsMnR}D^;THbz{yl2we)R}p??{bbQcdei1w&~MxS48oHvllpI2RlG}(liucBwBohaqWY?Wn4<=m>ytwT0 z!yOTKtaCnF_dE%HvTN(cZI|EJU6^F$v2=!<2=lBb87m@Zdq2GXMf~ekG{nCpg9xRws zx%Fw7m9O0=E!dSC3y^D<15v@KHLwx7*Ck$p1gXx*b_s zk7tMYzTCb450hM^?~?VGf2D2jcz5k}XufWmP++&&@ZIAM*w-PVP&cX$2R=E-w%<0CE! zbIC_LrL;YguUcyrY}$Kb9#`wK*Sb*t2`@C&vgNE@bm*oI zkBjE#HDZJeyS}eF>3R2TwRx`ny5*gIdiJTmU2EO{GaRx_NHjJ}n^-@ij z4}bLJy&UUzQ9@>=LgDeM{omFqWTl6Sw``368u8rXXt;0Mt!Q2s>y}C7s=_i88`wXs znyRA`dn-0~rPealwbCzK?OM~eeTY3+svPa{FtXzbQ{k-OhfDumkB&F|(=7M*?8VagS?;Ur ze}tyU$!#y4RhcK7JF#me_p}_D^v{dp`IPQOttmE+_MafpBX}&URz7mORK(_K#kpF~ zb7q`)?Oz*qQ~%B3EM6mJv58#_>@0lE_tpj-+*O#Vo8!E)yXRK!M7!Rlk#~>FY(JBF z&(4SW3ro_HYhQL(_qyHVP7*4Q3(ynKw>M463NJXl-t>y^KHXP0zaG<0`y8?%*6DUo zSH0oXnr%INh38ETrb!jfk@AeKJ?drp=%MwtH~&7VeK@;1zfF53?uvISwSkjGXF!VlsZH-ntt;?7 z334YMd6CxjQskx43|AIWX&xVH4)c_cKh&LD8V~G{{Lip2YUhhR?r9U1B%YsI!OmrW zY_Cp(8wX2JW#-?be^$>HZdu;x8SWSV zuKxFv`ra!0Y4h*IpX%4O-?4vc;eUqYhr8|fy!>~0{a@vC_77Dz{b!h0{GZ{z@}G?V z3@y`6|EoOzpP{Dz<9hc`mw$Kv41Bu$yZEQYzvX#v{(bm&dHgT;IrWFUQ~xvAJpa$I z&;7?mWNYL4A*xUNztex}{>@H$^Y6pI{q}$9w?Hv^@jszx{h!b6|9-MRwpSja7~;k^ zHRkEx+rQ1Nen0 zMb#J9YrXt;{BT))s}0j4Gh1+!=%~0dF#`!JYV0nGVab_)~Yo5W@5Kv6RR(}ol6`S~a2vN_{ zJL;cs7JiXG6V4j@zQZ2Ij#Eqi<5KJ9y;S^_=$(q>e-11E{jRrq_O2|#^6#yfu}5d+zKmD>D^GvsxE3?%QvA%hVQ+HRtxpq(e)s+0f%sy-%Oxkj zYJPJuXnw!*#iHG_r8!J~7m8^A`z*Jn_Eo`-$dH=zE@6rNtsI|pQ;hvCFTVWXhQvw5 z-{u{E>(FFgFu@KE>Ke})%)^JgxGd*H>Q&9#;DJ~{Hv-IJWDW$}1c z&ObSgYwvkaon<*E;<0zL^RY*_p0x-&Y}k?VGhJ=w`Tq=XpSZ((XL_ankiPh>DE{s1 z=WZ`rcCYaL*|4o2exJ~jdz9Ly5wY6Lcux7^*A-VTJ&CW-vEyC+c{4NI{=R(StnUI_ zf?iDAWpI$)^upZ?BX=F9TPbtZEy84y?_8byaK>p9!~1U6y>=Qtntt7I)2?qB_2Gw( zo%NkFcYDXzYzr|76OXIx3vI64|1j-!{TFusGuIuHA3oc)by8-!z>GVq*<=l`&$@A0 z=h3#y?_4-VZ!ezPvch_Y=vp2(tuQ|=t^3zn!k4y(XZiU&Jl<`3sK6ve{Z(f4(W+N- z%z765lIeOKSADrSYr0g5*{jP7?kz}PYaV*($yL+ueMW*o29f<)sy*vZS7`6Poz~eW zH{EpEo3;0ztN&-<%-jB-!J*{OS$VjlobB7Ezq#a_UN)Uc=RHG_p~pSBe5;Gevx{PQ z-W|XD*mUx-zM+^~d?Q+VnC<^&Zn1 zi_>e>OhYAACLb-iCAaL>zNw;*k140Ah_Wj;cRelD|H*czen)ox!TL|Sj{iQ_TYdfX zCuL(m(WI^Y=F)9_a-ObtUT7@p`ogC`t!iSZRSArTe_a3PTor%)SJ%eSKiU5o6!+@C z{_7gCe%5*WPI>3Mr|dXcMFQgl->`}V`sj&XxPABb`j)5LGbi{Ix2VtCdcGws=+u(N zD~*>XZC@A_l6UED#G$Pk*EZ#+7cc+tZ8=L-&9q5A8%|FOoKsL7eCL&u+P1FfA1Z%e zP3Ks^z<93s)=nwo*n%_9rLx3sJewEGd8K&Y^MLEu_J5eBdtjcu`srhTKF$f*c71~M z>dkD6HlDa7v|5eJeE(0=6OX&%wmhEc$Mmj=Rf>Cp#H0nE0y}Yca~ib*fBj|n|Zk2#U-nbSAVd49m{b|XL8gW$;qc?nKC&PF-oqp zUS3t%7TV*LoY1~Fx&PdzgP9&;##^|dAjyp>GY~ALO&|IPR-An%4F1h ze&IU5`A}9Hn}1%v?8JYD zR{h`uXZLr?J1!iEx zuA8R0@Oh@pBE_a^!Dc z&}G?vE4koy^@p3QtJ-#E>Xf}??(tOGa;Jp(+U}hn-X03d`sn?PLtkPsB*tleEZL{sa6%4r-G)=eP3JlHtAuoAjgW;U$)KuzSHZzQ)72%PzlNq&(H$%;Lat)A+Ymb*mTMK3G_$`=nWtrHUuyyVcz(=GTH; z)n`g6D^IXwcs&2otTri=J0^iOdrOmw*K2h}eb)Y^wD?CFo6zz6invFHlh*7I`F(Nw z!D|nC1P;ibx%zUe2jiratJ6;>PVG%*dA4bN{@Z6C?ig@7&M}KDJRj_}rQpj$`}EY3 zUqAb%tiCd(^6O^j8jXcpEtwhIr}*0kMor%?b;PbR+QO#wj8m|WDb%R|q?QcF>(jT*>J$A>0 zlQU97nH;PHV>gv(=|!IQUVq|A*IM?otZU~#3*2=w|F$VS-8Mhl_Rb*-P&kY=;nFC;&1lSi?jVA*GFAU z4U69w{qbd>Yr%9Wu_YI6)=H@Gn|9i=mO1M#ee+&3_SKh9mZ|-#&u1Jv?fd6kP_%bP zlIN1vYv#Y#xJUIgEoVvieLY^qVCk|+*M+=Z#1`y4Z}DjM+MCh$Uw!*_a;JLgX)_DX z^RFXAPD}MZ_?`JGljrcgE!S&n`P|m{CKl%BiyoOcUn_8G;)!*0eiR*YR^6p~$YZfm z*zYA*W2MY?Y`S7{@|;Vh_M7^@NlQ~Nbw8aS@;2}Ff_rvGF&AF$Uq10{LZIhc@3`bJ zTh(<7A^pxE&Duz=GC=#EqBv;dG?T(_d?^QIWh6U)j`{~Z@<3h_13g4 zGv4!TIjbME>GIdiyDm*PuV+m@DIaxm(zD7$kGQam%Tt3YL`6&Yr0e=sWv~5u|DVK@ z2=B{JR{VCt^r&#*TfJ6~K*3!$XJ6HX@Bm3sb-rSWW8LG|md-KbWY|8ec(iPx%6O6<8c-{qvHIpdwW zl7|l*?!PXLZu`amZ?8=Sv!DHIy?3`?oU&h$bLHR4{m<}x zPyP2Bt5-kJm95%)xi&@qLdg6N%^CYYpUeOKBkP*0{PRB?|E|}cO3E@9*fzI-<>I7( z2W+DI5v@^^3T0RMPsf7)GvxZ_+_-W3<+je;oYth62g0+yY=|&z?d~eyd9drKjmwl- zTf0pHUCZV`^Caxh5%TGwl$93sJ>y(b@px+F<@Xx4T2Y?MH#`*=Pb<2oeNq3b_G<6h zqV5@tr#2_gX^wcaPo?>9-Xxi-!G2K9QK__C|}-!IV%^EqGSR9?RPp-leTj2gv-@;?iiUn+HZuzIhQ)`R;zhObhm9Q4<1Lh&Y zv5t8sHl0`z^U%@OSM5@Ga$r%J%a%Y_L+t8FJaS>sFTY`!z~=Aens7?eL!qb1`bv2J z)0;m38SXmOdp+4wFJRh|%X=VG=~%FA$ggnQ7?(_6w^+YZ5|-DOJ>_dpR$0lDtI{Vb zKIzB@?MYYvxP5-tbY@ET!n(I#zN~rc9lqnrlAq^pwBPM1k$)Wd_1UAsb#-sdY}fFG zN|nvcjn6ysZTjTMJ8M2{laZ_a+gm;>Z>iYQ9F_id8LM)(J)gFgXyniOnX*}etFA16 zcfWJwgO5TjKg)e?8y&p7+JEkD(<|>@Eb98+KN()3Axh#Yl*VFYITr0np+o{yf zGTHxDz&`5X>Z51k-wWBzR}EWqe@38d_K*5EYJYTZH68yO_kQ>Ms)x(@DmCsMUm3N0 z)3WE;H>P`j__Dn?YVa5YEJ*XN$_v|+OpL5-fRRpSBw9=`YJkWv2X2^oR6mS zZ`D`E=Hv>$i#o91`VVM9O%?we#NEz^gqKJexd&i7pmg_a7^k)VmH_= zShO!5Sv@j)RPV3|E5H4-<1gsvZhvjA_oiIa^L~$f)vsm8+-Cc4n|LnvQCRXT4-v*B z=xSf$4=!A}%HzXJ^G(N&#wwHrSqm)N7k*jg`q!J!KYg7&CB-u7JO8=5DU4s|ihlaf z@QuH-{)MgnKNe5-Z|pFpgM2_>y#9ZNgo^3^nlJrlc;GUn9*NEPl@+WNu6|VOPz|Q3 z%Rk%>y83pX^6kveufl~DbtY+homt!XB7^w__&hgytnQIFt=Rp~TlV_j^7y|j;jS_7 zyZ1lYc(?v~>3@a`t|cLO1}N<`Qs#yBN!;?3ZdhgH|ImSF(UMu5YnK}O3m;W@wQh<0 zi7O=xfv&a>rztG+xXaA5?y0f~!zr!oYVYptnz<~S4J4IJbR7O@sRgYGZDf%VsFq^D zw>{BfzI6Qwx8r{=|2bM~zwAH53BAkzcBubr(tgH|T|$HDDv7O6k*)K8GT#5saDB`C zU!3b_+Hn78kT>7&`fux(dz=3=C|Lhz`1Nh_e+Ea>&+3i!kDq;ivHx0wbrjCl`|^`K z8&n(e?U@~ro&vynmYg^353UBEb^7QKBJ^Piz{ z|C0@l{~2QCe}+4va0A#cl?1vn&}rwWUDn6)Khvz&|9ZFo7stA;o%hA-PbTmB&#>I3P$>fXC{UNMzk@pz(ej>gTC>MLJ5 z81J~gWw&nd;qO(p3*xs#x-E5(X=0Z@wZoqGhtu`Qi|f|ETYv9rLXz|o76x{e>yAtR zxNn*rpZ`pb!v3_e_ML_zb!^{1n7>-{3!=EcQ zm2L6j`&*v`omklxVB~qs{c-dz`{pfYxuY+i`E{&R=e}Pmd%aBZc0Pi@&~6wF8QrqlvSU*y?0;1DI;5t zwflu%Ouu#J7H|KpiGNO<7W>rnhJj}uziCvhzsrZ~t?8F?OhvD#?@01q?s|@`){1Ly z<)go9_d_l`?|PZmr!mX2i^->wA#H|ujsAknD7B5Hvh!Bt~tWtO6Fr+@T&R~ zQJ??qw*S|XeMM*8ALFI3ukYIQdZWkn?js2)`hJgJzKzRNXnG%C`&ldcbV0G^3z6+n zR{aZK++*3i^7E8U5` z{~5TxyJz1ldGYnQRYUA+LB|8%WnV3u{Xjdjd?t$6P*~adQ99#{C=c=v;mYL{k zAN?+M_1?P9f{Aa&9>{tb}7o%;tWi#9y0kId*t(% zy%fy4mHKh_rX)rk24$P{&+FXwG9Q_>ZsPJERLc{XWB3Sn*VbwoB#Eo{TIP_g9@zT4q_`7k#@-5(l!4XPVoI_ z__ue-e})UbH76g$|Mct^!n{B zAM;<1!5g9(&av;X%|4zl#lCmu+Qk_U(p3d_t@yG&@KXL3fBw_9mwqg`6S?8<-Ff9X zzj&U15q&Ohbo*7*@6$*3Zu$IV@jV!q3cybd=GE5ke{!% z^KJg27yGRH{dtnM?Wuk7%8JRT@W-`pn;-Y8`*vMA?!S54m9>u@Pk&h9?vqlwI&*EN zo${`WdXp+1T~V5@c7INT+HRC6YsdoUCwd0wd#-4vNd~kJ@&)4tu~*fv!lD) z&$Of`oH%GQ?Lgr_g%71$+q&h}FO8pl+DySP!N~Z2*AwjvrGLXcF55`TH7>ue`=-b3 zrSJmDllFhJKEHAh33N5U7+-q+hxb2&Y7_sj7ylVf?cze=HrP!Db;{}0xn}$vj?%UW zJ@*5t<{1kRiSSRO~OHYTuk@>`RDncp@z%;A=~!<4D(Ogzi)K^c1-x^ ze#Y9M^~s%!cIW?aT>GEl`6vJ14?u@%AT(?VMm#|GAF&*V4G- z!mefak-UVYJSVY!_6Kor5Nut3|NPSXM=Nted<|2_Lf;haTDQD# zP5j=sH=T_iTZc3EXKl@WWb1U@WwuA};@>Ja?{L-J`6cxNIbIN6(5%mX2sH;!MEVmO z`c#Za>_)jJRy4czyG;}EjPf@S^5#hvUCVQvKdNRz)h}TiTMxZVTu)eg-a?b&u)PV%1*pA$RH4JJXvU-P3exy}g$?dHveb`8R&$ z|7Ct#|Nhc{hSo}pHMb^lAxr*}Rr313y8 z{Br*n{;mHR_Iv*mLFMEJ{|nP6PMJM%%H#v57oU1{w)M-c+%M+Sp4QyxdwR#nIBxpi z{$>9eF4$fF_gDW%AS!3C|Idh8x7hO*SGPvbP5b*^@0;WdhB7^N(X{JF%D>H0`Q_tg zzv+x;L9p)6>$Rv_Q4HPd|5E~Fj`pIx!N28w|1&J8UH|u2{D;M;oT~YsHVlb)`_F(J zar1B7%K!5t>OaH%h5tnAK@kTI>eur>nE#o^VgGvZe+I4BUsqg`O?qWb-A&%o1j zJ>)~^Vr|vBT@vnsZ0oBZ^d~vvS;Y}C>!<(TGyV4hsq?R{=I4dQZ*4Ey zDw-E-_DbUWo*KR#mPbQYtn@0myKIfcIiqVj8f$0UYH8*iJ~`j8QTv?M+}1_jH3>6h zc%I9Ihini{dNBXijgv_V@1F!M+UB}$)_1oXI!%5v&x_nJIpG|$W@fVdUCX1He`kij z+47&^0r%^`Z7WVJYBDHvtvvG*qk)30`~0Y;_&cTE+X`{ZEvW|GoT|v~T|9`X}O<{~2P0|1&s>erCrd^Ub}F)H6?* zrs3-Q@0)$C{s}wiR)I_PFVyy>e9-?>bMLSCuM6y(D#ZWhe*eAlKf~tm^qBO<`fK;= zcg_E8T9!Py>&Km|@#3m4swKj+G`m)v{~ogaLfQn-@ipJqo<7+(GoPjG#exk_CWhvz z_XkB>ve;OBr;l;P>ZTUkbM>_)+n)%oSAQSvY}R$XR&n2f9sPgRZT?&poj27uhTqQq zF3ZN}$K#z_rdj;^qqgu;d9PKl)VqS?8Fo9?eV=-3`{rktuJ1hm zb$#)yV~Q-LEP>5E53W0Qrf-=2_M+AVzFX@n*uSZ;RCR?K-HX_{GGppDnZoeQ=(Rmr z8jJQUUW0F*0$J|U+v~oxcKs`KUUc>QI!`xB>yw)4ADHP ze5|PNRBfO3BMrYfDSwyb^=BK_PO1Ie;w958RVKg6d=$xMUva1U+I!QUxleiLUYvhbD#|$Q zaj@9;AeQa@6&ClZE`PZ>DNkR`lpUL(6&DkK6yW zR%}BOTZz1;^XO%Kv#_WgYX^t7KFWQxPH0n{GW#R{~2E2 z^8d?n{meYp{|x=J?=Sk#5XJD#pXoot+I{~SO6|X8z4^(KwBtOo!0X4x={Yg8KH1Ju z|1fdG=bNv@KCbyRV~N-L%EDYJm9pe#ahn*MBY!N~JkPGUJ7(4U^?}FvMZRC&xZu`o zY1S5#rhv+tD`nF^Ia}?S-IFgck>>FTi-sduITwP ztYmk0wdUL^C@L|_{W>MHW15Of-Wqlj*Ye!_+=7Xx zPaA3;T$81}Fm3AEwX?6s2b;IFJ4NcH&q#b)Cf9QH_UsiYmln0g8|fJtWSez8Z_`>5 zGWq&!=g1)6^$Lr+v}asFE(Y+ihhP!GUw@(uTtdVnl@OaQ3NF2-dr>9h#&*6PC;q&a z@&Aze;`H*5i?6I$bN|ih^}o1d|1;dz{LgUY;@_F2{}~*!_n)-?G3!TtI{$OqbLlrO zE!teaS#SRf-SVI6{~3hczolOP%ewAAgUWw~N14?Y_4dzXF1^VNjNE^7di)o$-G3(k zXXsq~+vxSbrt9^S>OY!5l$_tV)HUrtLt1S8g{b$R{Qs%-L#zh%p^sdA^YG7qh8e}) z#3-^mHdS;svPqFF5py>h!lyy7%acXQ}S7&MIvcN@5uiQ6J-B0{H^x<&#++D&&db&fBaniCH{51?`!){%=`Z{tk1OnA{=j2 zA^y)Uzpndl2J=lD?tj~U|Gn~`;pXbi>x>WVf1SVouK(XzyRCklx&8N@`{tH^;Dy7N zm*6Wx2+0k>B8KQz$csf=b#fD<-FNSZeNYnhqsLAz^!ceR8{dYkSg`KQF71H5t#@}% z=w|8uUims8^5xqinT;v8j4WP9on|#xmCE({+Yx$o_N1*{AtCd8W`BE=xBu>H^Ig?Z zZ*E0?=b2{}rp+IC^sKI6-Wt8B-!%i>x8~h?xuLexWtGi!mgaj~4n0)w%ynOAnyr;- zmKYc%mAlFI_fexfE`GMPi*EXsUAv#G<>vM5%64>!hQPcV3=%VTY%6 z+Qt{_yACJGa&FIFzVOY}Y+`YN@@5bV1?|$0az0cZQzv|Db4ORw;p9?Q-%vcfGr#vYj zzwmF^e50}(8&d`5y%pUdc~Wg#`u;86TX|%*)G+ajo)7fr@0-d|bTI4gJ5hsIOV-`I zxTbK~3iYtqb?r9aSD9Pwd1=zo$i~N9zk1l>&#Byw~-eYZQC! zy3gBbc}4zF#};)xUEfif$g%aqKea6{i#+!|bQK9V+(1krjNGvX-54VCpW*n``M-Pr z9IdK!|9$D_oE!Te8XvDAc^q^7CYvOBOugGq22}u%T<{oNGrR!xu}eSB-2VGYe>&$M z70@=b)Zc{)bcYeKX!h}i$z^G8U|`iDN5&L{_5MO)!ug9w02bgBzo+}e#8oo z_$B`tPWZ{MP43vfY3gsO!0;6lU&w!$7*T)u_}?G-pc8(j`F~aXXV}mCPt>XF$2~9m z(?9+*9I%`G>e9cY{|tXr%;dj({LfG)4L-HE?eM>O_U{kZw+1ZggR10T?frBA`aSzW zm-V*)wW+_~1{#rlsJP)j!}9*$AMQU4(O5jG|I{}~wU z*6Kg|x9`)Sb?MgYhp+1_Y}xRj=d1n0$k3IoOY<|A{`t@FK+wudUZe}B7t{Xqvb-I3CW*7ZNi z)8jv1lRxvHp;i7vUu^yPh>q^wXJ~<^Bzc4%R>vcB)E}}e z{m)>V_~-nOKuT<`EoQ#|@jpX@-1E=>SmP0i4Rq`>FFd_&{m;Ph^!lH#?Vs%*dLpsC zp~21Y-G0tmlpuRr6J5{x=i6DfKS~qI7smTXy-E4-_e`Kon@7Q_hzIlTw-($}`Onbb zVSm^D+}@md>*UrcTE*J@XLumW& z`KN#CAFv~wi7mBab0#jYV{s;|)I)P-SWeE8{WFeAYfad_>Bz7Ck1OYIXo*>}C9eJh zs}`TuI~UX}uMID|k+M8;dGYMmGxq(Ua`M=Pe?{B>sDvSx=J3Fe{Ljz=%jb(R3Jmsl z_4W_KP$LUof#ArqK`?v4<4N7#vgAP|AhpVUwLQY+4|1%&; zwovW2R@{WU7rt6s2?f<~GSkl@poptq@=~7$IY!xbOB|EW_R z|MhPD7s1%N$^RJ?E$r{DDU?4neX-fXKOVmpZ(Kbwb9(TNy^k~umX*u=3blIm<@`_K zb^jS2gnqgaWw=^x>CMeAK87y!(mcM)(rc20>W(R{T;jzSxL&7c>)IVU z`|?X`)`VS)Q|L_G=KA@vrTn4V)vGLvcKOBb7oHW_ynVOae}={*-ol5q-LbAYLU&Ei*3WmS~rTKm8?)n!}c zc4rlqgcr^7of36XiN)}wU~Wqa^V`rh4=;bsb!@u1rtsF2+z0HI@4M=6ZrbJN^``!o z;npj$6OUi*)LECb-MdpUwe#O=xwhThv!yN9sv6yna#yK2{&as#)d$U8tw)L~>hAxQ zH8c~^PdQ)c`}CCNXYXY#^%*-&Ps=SAb}c`>EGlwIz}mH&7xUkHV~OxG)0WRP;FmN9|R-s|OSuPs^^ zYqlmm`g3)^q1wEIt|DR1;JHFAvEAFew`|FLEUvfon$qEFjaLq@``k@U&1T(U>e5&O zS~|;OUjOmA{omsJr*WQ-{D1st|9kn*>uc}+Gdw<5|LfWPQ+p;Kng8)4|L^5LuV4Q# z|KpOF{|tARf3n$)dprx@Sl3^kPu_Hx4fyKMTe|ZnF3ag|eXcxfD%tm+cG7LmE_%8D=7-i#-^Gsy zAOCUf^6ABAGfv&LD>GVgCJ?5fW8T&~)Zoc(0m#pdHny-&}&=UpwfS>#*yt>@CNWiS8EH(P5r zt>i^h>$wkBv%2nGJ9XG&$E@s-*(awqoRGBn=>Dzxu5WZ~=?ne0qT99i&F?&Rd7_l3 zs6bwt1w*;%jhENTri)F=^3LR}e7`&2GY&#bbp0(~<;nnt~5!3xQ zPB@)!x^+zKumE7qM@+ctCI;bPTUeutm3XPo@L z&M7@oy*%{Rnj@`W*4em8-?(XBc5aQG`fc;nlwCb*>-p+WIQjp2Qa@u&*Yyw2|Fo$5 zXISU{v+L8x`|mD)F*#Dt|ZT~30 z5?tZTY^BQe-;!SqY?a1s} zrC1vW3D5cS%YA+)ZF;eO!t$VyyN}MVTjx}?|pZ0fqVxe1?~eo8zqEABtEeD;wd z?-kK^yGw%wZf`Fu=RdVkV{P5`uM} z=UTU-<0rd!?Hkv4Y0Wd*oO@?#^P;J1O1K}nOcRN|E%8ppf7g>M1{JLfC%rwos(uNB z!Pv-a@R#Y~O)t0Knh?E!kD1{FgZhiNaf>7DbU)0S|5#T3x809DsWP5C zA7yO(tOWmwK7YESBk^Ov>fqG+sh7il=2cGiJ$B;hKFuH5&g~D+e)*XBZQ7=rTDDsz z99eOqe8mCfjqDQt8MuyhAGxP@W$S+i?u;_2C1v%uta!|WPwVbF6?*RPC5B3sRvF>G z?8txOuXb(RzV)Mi&w5i|JvJU)7A9pitNh#}^Pj)dwm&K*`}F;>UMXFpSGk4@FaBpJ zEah>WrnFX1WaWdN!hYwtXJX4_FMku7wc^s8b(b?U)-IWPZ`FJMJ?A{{ecQVvqIXvq}1b83i#Ft)(|i%>-ke zOdd0CE<1L2?Up=Ai$DBpOTNW(zm4|3u6XUM=2q*!>G`+vZ8l2AF4x>x$;{4vxxuh& zU?)zNY>fX8Nw~^R9gZr;eCpR#>zW&UvY3JJF z|R}cKXv#0i{GjuWYTlFlrD!)|JJB0wr=C(l+AN)Yo6Wr!N)$xzJINE z;<}34$HJuccWlYcyYI7M(=3m-Ilik-K4)S0^QJPU$*TN~)%C#o2h}(Cx>=%F6I+MNXK@*qk+$45djl1Bs#Ds9= zKI`PQUdIyEB|dxEoHC3$_A>JKsXOb#)~2tI4Lv-~Vm{Xh83(zy^B^ExOd z`p)VjvdpWM*d$Lfq-s3Cg|EY2Q_wrxqmpiI|GNsP{y;ZMF&S=JkewD4*)^y+z zW7SRn=TQEi;pHeuI&;F_(MKikDmX7|LfXc2jHxqF{l4N zbHAMWpW)q$JCl#(JUjDu2LEQ6{`v0<`~5UK*Kd05zvAy4p_ALw{>`<1d~4&1HE~9p zukJVYUlkb3yIWsCI=g%Oq#K^Hr^PgPFV$V&;li!Mo7JE3WTIHzi!Z;zWJRu-ZQ45T zP=%HL!fm&{WS8gahzTB>ke=0i^2G6&pqJYheLnKC=#}z4&tt`X+I)Ret{E+!RCHCn z&_>o&Z1LVTrMKGli}o!nP~7acbwgj*4 zU2XPt(;KY{tpKlAZ?0W7J-%^!cILw9?y`Suk40i%Yii$~U2U--BD$#Y-lmKdzwYLR za=+;QwN_oPIxFw)kDjxIyl2c-iykVid0y~k`PoMnD*Wo-a-1GsXFYlT%euO*`q;JW zc2EAQWtF*cW$~J=$0mz5HfT8UeYjiCb$U_Pv8A3N1uk1FfV0{2;CTJ+^B3m0 ze^^rTCD65|^U*x-57+%SUAGB-^fx}VNqeuWDO+pJ7yhRTIt&b7UiL4#BD|zd;>YDm zzK+g(-fR6T-vwP5TqlNhSXc3d`v2khb=`Y)jn$5P!IXJG5wLe{M|p6a>i1BS40OLxj9?q=ppS1-rJLJGw`v; z%$gthV{-M%*HyKx%Qo$k+fr+#ee+%2vAj8Zx+L7h*&8H^kJnXkt$r|{G3(ZUhQqt9 zBW+yElb6PNye(foJ5^=sgumA9eaU%xt+n9|@ms+`X|}_35FRoC<F^j>!MeqXC~twpDf z&%60!e%!S!`=v7c*GZpvTXMOO^~chR-ckylD(%c6glc8hJKW{W}2=4yJuUntJ`exQeo6U;8@ki_UKen|m%l>u7>gu#jXN7ibi-_Z7v&wyCbo%=J z-+t~ zpzqo{5t`8^X0N&2{67kCp8Wnzwm{qc&^=y%-ps1C{kNaaE#+RR$J4P)t-q?VRbleP zK7P@Y;o>d5mPIR0EuFNhuRpw6ZR+0J+BYl9HQmBL*_?L$(Y*HT%4ogikFH$)%3QeZ zxzCl7gCdGXb)gd-6AJsH<>M^BM}78Q-+F7>%3QXYt1Gruzm=2Rb5LemS02YnYo8;N z?|l9hzLY`KL59IVd$!r?)>RL6Yh>8D7t>*ZSlyRDb-4{k!?k zuIt6_nGp()BiK<2A!4M@D^u6LLLY>vHF+qn=o@AnY){K_r@*e=8>gl`^<7f>CCvYiN?84!+0grMV9a>quQKkRj8FdB8~@aF#hUp) zJl6bY*lGUvll`%NB=-C(h4Lp3-w1g_D6R1)25(&Jn)09FkZbzillvhTsKA-E?t*`M zY+L<~Z55sTpMg_!{&zL_H8}_l$VCR+$ zw^Z~$!xSYxrgZBz?e&bUYk6E!HEv1=EDj9uU3~Vd{!flO_4~4?BJBrxwN)Nd@NZoE zhsQ4}w(jrv>kpc>zj8*)@;}2B&wnrfg{s1aFB&vauTk1DzrFqwU;g*`{~4S_Z(|
    X|=r zEo;@rxOD#~S@NGh=Kp7?>;G{PiGB5z_M?9r^-jODTYqES`}Z0@rP}}eiT=-UfAK$) zN(9@qs>=?fia%-h{ymG4^rM=k@$1NL{T;Sl@BV%2pSYs_{L%e?f7w5FM=--zEPgHY zX5WKv-n)OFvU6MbpTS=2Kg0JI{~3DaPIWQ^Lc)PqFEB{#U2_pW*$*{|r(noIMp5%IrH#0>5^a#TQho-u?SjUOC`D!~E|0 z-(TvF^&_#@2e|QZfSvjfzq(b{O`#Hv*O?~G-#$O}JNu^o^6yh>a5`YGtNt4QO~2dU zOedgBK4Ln^wAlOi0zaAZ|NOE2Kf`|aKN(2utgj-qNwL%K$ek>&uXI0g#s2xD`~UvR ze_V`UuD-HcI_>ZB{B;%!!)M>r-w}7t_Wr#9Br~v?XP-h$3{N65hW(IZIN|k4D}Ro8 zF2!3OCr$jDy=Gndk^3J%v;S`Yd41v3Yeh$vT>9N7b7)pC+q?5RI9Ao;T5Z4L=cOS~ zfP-2Bc=6(`OzGk)>rc2n{`)y*edB)k@g1Sq1aAd`#!e@${^S0tR^3%3Y*`|GPRn9` zwjSjKk2fayKe-P6`&#%qA!&@Hxl~t7pZ|+h@Pgm;RI@I!cN6A`m-l}8e z()|n^|n19lO;! zcFEfXf3^MFPH`XYdYb;2xAD}hrzS^vS4C?xI8G?2+j=?jZ?kXoo>zjFvsHMyo|Rnp z*s7gg#`V>p=~}%u z;^(znC1D@6PO7mp%kI8(_jTdz*RM^2I(2U{?Ylhj-kPkaqqE{7&dRJ*QkFf)SHQ9E z*8H#)%ic{}X}{-b`}TIV>37$EGQNG7%dt~`;<~1HhL2XBUSwnCxVO?HHEEem$#dE4 zrPBj@cFwc)x|yiF>_XHE*W%ojFFxJcd#BJVW>R!R<+;L$D~A$euas}zw*P~7o4W9x zZM*i(|G@M3;inBZckcMl(BH&<_vOS{(;gr8<`i7}CRbB|#isD}wJpo{Zt698G3lMT zzz!Z~J_d=iX|XYp-j7;mg}ZNmtb3(o#me?W2cu4hO?!H1pHlAYP5Ft>4;yYMY5OZz z60<33>5cG%>o|TkKlsmZKXl#nZx?+PtAFRKPXC^-IpYm7GVHVoK1_QO|Qp)^KYr=nqsvZ9suokF^ zUH^FgN^Xf3Zrl)=Jxfa>0TCz3c$%=KGp8H&0Uz)j) zSLu{^VW@v=>i4JW=|F?&66D zHTMNobJ=e+dA(((PF{+!^YwCP@84$oC)8&IezrU{^R1q8V|v95m%9>uy!oz+GB3WI z7#ruWQoXC8`dZJff-5E7(G~H6?)kigpWmq16Te}T*V{Qt zi+3KT1VOYYKuZ;;{80ksmHJDY4g4j zDcyUn$4vEF(WPr#8jE(E+WrWT^cuXE-x+JuX;GJwct(F1Wf&TPFCxC5B(XGrU+e=<5@jMAJ&iZzGWz;Tzn~N?* zXa4P5wDXqMywu2}>KCR?zvR1l(T*!QiPx3(Z=XBIT!^PcVs-kFZT}f=9ca(r^yc7C z|M;avA;EpAQTDvkwsWR_Su*wAmyH*e?U%~lbZ5fZyw{r*irCls&1##xtb69$lu40k z@+Ey!PhSj{IkxJq+Slz7n#cm1M zmg>a3GWha-(=&mdX`)l^Tzjkfo%cbe#R}W?#fz)&O1#Q0lXBcs^V?iA?CYatd#~y2 z-@LT>t$x`#j)2p>TTcgu{#_hA^{A`%E zm!EDk=Ez}Fc&D^;m$p{Ktt|@{@|x~k>!WJ5`T4x%VY`man6-0nqy&3+HgEg7Y^Kiv zJLXlE-8q$*roww%;#a7Z=Cxy2SIdPjU$8Ra<)!lbSMFRc)y~cpdUY#IDbjdLuFc1Y zlE8?o>$N8Kq<-_w4D7Y^@?H_>D%!XKN97Ld5RKODp!PuDl|YAi$A?Cb=syF}p@uIP z&hoAd^O&8z-Z`?)`a^$L{3F(PE0?_#7Z=>U`lw3gPJf25@^9rw>VGlD)<3XSn;e$x zyS${#{`Rc)Ki~f|9Cy|K_H5yQ2HXD(A1}t_BNWQtyf65l;RS2ye};yy?N>GGm45fu zZI1pq^JaZsmi*`Y{~2UOz^sq|8Tz^&*KgjZ{GZ_h*X#cbj4%G3`_ccO!9jcdANIws zrr1xcJ@|Fj{WIrI|1J*r&oJ*l!;cq>5zKYwzpWqH|7BRW|ASoroA(|68NM*>`pP#Uqg8O z4}Nzf4~H!L)BTQr&9#4NW%2OPkq5E<)IVOd<36(6?2*0uYbpPe_=4KB?VpV|{kwC; z{`q5g;DCZj^w@uf^cwg73=2fB|6{(0WH~rgn_rvmpE-9rA{N}iOndnsFNhDCAE1~7 zyA>R#h5M0W?@K+B?O(m*Pwac}>umnB^i)`YKtksDKP&BCmQ?}&T)(ql%dR)J z)_;4}`Cs*ahR2Kg;Ve?ZX8%7c4R|1t5Hz6hpfJ^d1Bry7ISzB`eiDMF&i>;?NDSi5 zQeSF=+CS;*;?6=v@rpP`PnQ62( z$m`PNj8izqurhXk=7*8OKdZPNPB#vA^@RWJUX_MZWg_ILbeKvs@g9Bh?;>iD&5|Cw`#5gPj6&i>B; zDZw7&jMl}kuGCLuzozw{!PuG~p#ko4?S7;*uo%hds{{Xq$dCFGM?Eqcmkf^=(LBXk zS*m2FWXe39@nqKX8l+=AC;zs7WDcp`x7Nx1@IET=p#5zC%gIY^ZywZa=l&M_d#^)(Y2l4GiGV>J-cz& PEh0SG0d`57{QsK( interpreter; +tflite::InterpreterBuilder(*model, resolver)(&interpreter); +// Resize input tensors, if desired. +interpreter->AllocateTensors(); +float* input = interpreter->typed_input_tensor(0); +// Fill `input`. +interpreter->Invoke(); +float* output = interpreter->type_output_tensor(0); +``` +### Data Alignment + +TensorFlow Lite data is usually aligned to 32-bit boundaries. It is recommended +that all data provided to TensorFlow Lite be aligned that way. + +### Error Reporting + +In many places TensorFlow Lite returns status information through +`TfLiteStatus` objects: + +```c++ +typedef enum { + kTfLiteOk = 0, + kTfLiteError = 1 +} TfLiteStatus; + +``` + +Failures can be easily verified with: +```c++ +if (status != kTfLiteOk) { + // ... error handling here ... +} +``` + +In order to obtain detailed error information an ErrorReporter must be +provided: + +```c++ +class ErrorReporter { + virtual int Report(const char* format, va_list args) = 0; +}; +``` + +The `DefaultErrorReporter` takes care of reporting to `stderr`. + +### Loading a Model + +The `FlatBufferModel` class encapsulates a model and can be built in a couple of +slightly different ways depending on where the model is stored: + +```c++ +class FlatBufferModel { +  // Build a model based on a file. Return a nullptr in case of failure. +  static std::unique_ptr BuildFromFile( +      const char* filename, +      ErrorReporter* error_reporter); + +  // Build a model based on a pre-loaded flatbuffer. The caller retains +  // ownership of the buffer and should keep it alive until the returned object +  // is destroyed. Return a nullptr in case of failure. +  static std::unique_ptr BuildFromBuffer( +      const char* buffer, +      size_t buffer_size, +      ErrorReporter* error_reporter); +}; +``` + +Note that if TensorFlow Lite detects the presence of Android's NNAPI it will +automatically try to use shared memory to store the FlatBufferModel. + +### Running a Model + +Running a model involves a few simple steps: + + * Build an `Interpreter` based on an existing `FlatBufferModel` + * Optionally resize input tensors if the predefined sizes are not desired. + * Set input tensor values + * Invoke inference + * Read output tensor values + +The important parts of public interface of the `Interpreter` are provided +below. It should be noted that: + + * Tensors are represented by integers, in order to avoid string comparisons + (and any fixed dependency on string libraries). + * An interpreter must not be accessed from concurrent threads + * Memory allocation for input and output tensors must be triggered + by calling AllocateTensors() right after resizing tensors. + +```c++ +class Interpreter { + Interpreter(ErrorReporter* error_reporter); + + // Read only access to list of inputs. + const std::vector& inputs() const; + + // Read only access to list of outputs. + const std::vector& outputs() const; + + // Change the dimensionality of a given tensor. + TfLiteStatus ResizeInputTensor(int tensor_index, + const std::vector& dims); + + // Returns status of success or failure. + TfLiteStatus AllocateTensors(); + + // Return a pointer into the data of a given input tensor. + template + T* typed_input_tensor(int index) { + return typed_tensor(inputs_[index]); + } + + // Return a pointer into the data of a given output tensor. + template + T* typed_output_tensor(int index) { + return typed_tensor(outputs_[index]); + } + + // Execute the model, populating output tensors. + TfLiteStatus Invoke(); +}; +``` + +### Writing Custom Operators + +All TensorFlow Lite operators (both custom and builtin) are defined using a +simple pure-C interface that consists of four functions: + +```c++ +typedef struct { + void* (*init)(TfLiteContext* context, const char* buffer, size_t length); + void (*free)(TfLiteContext* context, void* buffer); + TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node); + TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node); +} TfLiteRegistration; +``` + +Refer to `context.h` for details on `TfLiteContext` and `TfLiteNode`. The +former provides error reporting facilities and access to global objects, +including all the tensors. The latter allows implementations to access their +inputs and outputs. + +When the interpreter loads a model, it calls init() once for each node in the +graph. A given `init()` will be called more than once if the op is used +multiple times in the graph. For custom ops a configuration buffer will be +provided, containing a flexbuffer that maps parameter names to their values. +The buffer is empty for builtin ops because the interpreter has already parsed +the op parameters. Kernel implementation that require state should initialize +it here and transfer ownership to the caller. For each `init()` call, there +will be a corresponding call to `free()`, allowing implementations to dispose +of the buffer they might have allocated in `init()`. + +Whenever the input tensors are resized the interpreter will go through the +graph notifying implementations of the change. This gives them the chance to +resize their internal buffer, check validity of input shapes and types, and +recalculate output shapes. This is all done through `prepare()` and +implementation can access their state using `node->user_data`. + +Finally, each time inference runs the interpreter traverses the graph calling +`invoke()`, and here too the state is available as `node->user_data`. + +Custom ops can be implemented in exactly the same way as builtin ops, by +defined those four functions and a global registration function that usually +looks like this: + +```c++ +namespace tflite { +namespace ops { +namespace custom { + TfLiteRegistration* Register_MY_CUSTOM_OP() { + static TfLiteRegistration r = {my_custom_op::Init, + my_custom_op::Free, + my_custom_op::Prepare, + my_custom_op::Eval}; + return &r; + } +} // namespace custom +} // namespace ops +} // namespace tflite +``` + +Note that registration is not automatic and an explicit call to +`Register_MY_CUSTOM_OP` should be made somewhere. While the standard +`:builtin_ops` takes care of the registration of builtins, custom ops will have +to be collected in separated custom libraries. + +### Customizing the kernel library + +Behind the scenes the interpreter will load a library of kernels which will be +assigned to execute each of the operators in the model. While the default +library only contains builtin kernels, it is possible to replace it with a +custom library. + +The interpreter uses an `OpResolver` to translate operator codes and names into +actual code: + +```c++ +class OpResolver { + virtual TfLiteRegistration* FindOp(tflite::BuiltinOperator op) const = 0; + virtual TfLiteRegistration* FindOp(const char* op) const = 0; + virtual void AddOp(tflite::BuiltinOperator op, TfLiteRegistration* registration) = 0; + virtual void AddOp(const char* op, TfLiteRegistration* registration) = 0; +}; +``` + +The regular usage will require the developer to use the `BuiltinOpResolver` and +write: + +```c++ +tflite::ops::builtin::BuiltinOpResolver resolver; +``` + +They can then optionally register custom ops: + +```c++ +resolver.AddOp("MY_CUSTOM_OP", Register_MY_CUSTOM_OP()); +``` + +before the resolver is passed to the `InterpreterBuilder`. + +If the set of builtin ops is deemed to be too large, a new `OpResolver` could +be code-generated based on a given subset of ops, possibly only the ones +contained in a given model. This is the equivalent of TensorFlow's selective +registration (and a simple version of it is available in the `tools` +directory). + +## Java + +TensorFlow Lite's Java API supports on-device inference and is provided as an +Android Studio Library that allows loading models, feeding inputs, and +retrieving inference outputs. + +The simplest usage of Tensorflow Lite Java API looks like this: + +```java +try (Interpreter interpreter = new Interpreter(file_of_a_tensorflowlite_model)) { + interpreter.run(input, output); +} +``` + +### Loading a Model + +The `Interpreter.java` class drives model inference with TensorFlow Lite. In +most of the cases, this is the only class an app developer will need. + +#### Initializing an `Interpreter` Mith a Model Mile + +The `Interpreter` can be initialized with a model file using the constructor: + +```java +public Interpreter(@NotNull File modelFile); +``` + +or with a `MappedByteBuffer`: + +```java +public Interpreter(@NotNull MappedByteBuffer mappedByteBuffer); +``` + +In both cases a valid TensorFlow Lite must be provided or an +`IllegalArgumentException` with be thrown. If a `MappedByteBuffer` is used to +initialize an Interpreter, it should remain unchanged for the whole lifetime of +the `Interpreter`. + +### Running a Model + +#### Supported Data Types + +To use TensorFlow Lite, the data types of the input and output tensors must be +one of the following primitive types: + +* `float` +* `int` +* `long` +* `byte` + +If other data types, including boxed types like `Integer` and `Float`, are used, +an `IllegalArgumentException` will be thrown. + +#### Inputs + +Each input should be an array, a multi-dimensional array, or a `ByteBuffer` of +the supported primitive types. + +The use of `ByteBuffer` is preferred since it allows the `Interpreter` to avoid +unnecessary copies. Each `ByteBuffer` needs to be a direct byte buffer, and its +order must be `ByteOrder.nativeOrder()`. After it is used for a model inference, +it must remain unchanged until the model inference is finished. + +#### Outputs + +Each output should be an array, or a multi-dimensional array of the supported +primitive types. + +#### Running Model Inference + +If a model takes only one input and returns only one output, the following will +trigger an inference run: + +```java +interpreter.run(input, output); +``` + +For models with multiple inputs, or multiple outputs, use: + +```java +interpreter.runForMultipleInputsOutputs(inputs, map_of_indices_to_outputs); +``` + +where each entry in `inputs` corresponds to an input tensor and +`map_of_indices_to_outputs` maps indices of output tensors to the +corresponding output data. In both cases the tensor indices should correspond to +the values given to the `TensorFlow Lite Optimized Converter` when the model was +created. Be aware that the order of tensors in `input` must match the order +given to the `TensorFlow Lite Optimized Converter`. + +The Java API also provides convenient functions for app developers to get the +index of any model input or output using a tensor name: + +```java +public int getInputIndex(String tensorName); +public int getOutputIndex(String tensorName); +``` + +If tensorName is not a valid name in model, an `IllegalArgumentException` will +be thrown. + +### Releasing Resources After Use + +An `Interpreter` owns resources. To avoid memory leak, the resources must be +released after use by: + +```java +interpreter.close(); +``` diff --git a/tensorflow/contrib/lite/g3doc/custom_operators.md b/tensorflow/contrib/lite/g3doc/custom_operators.md new file mode 100644 index 0000000000..204a489a93 --- /dev/null +++ b/tensorflow/contrib/lite/g3doc/custom_operators.md @@ -0,0 +1,91 @@ +# How to use custom operators + +TensorFlow Lite currently supports a subset of TensorFlow operators. However, it +does support the use of user-provided implementations (as known as custom +implementations) if the model contains an operator that is not supported. + +Let’s walk through this via an example. Assume we are using the `Sin` operator +and that we are building a very simple model for a function `y = sin(x + +offset)`, where `offset` is trainable. + +The code to train the TensorFlow model will be something like: + +```python +offset = tf.get_variable("offset", [1,], tf.float32) +x = tf.placeholder(tf.float32, shape=(None,)) +y = tf.sin(x + offset) +y_ = tf.placeholder(tf.float32, shape=(None,)) +loss = tf.reduce_sum(tf.square(y - y_)) +optimizer = tf.train.GradientDescentOptimizer(0.001) +train = optimizer.minimize(loss) +``` + +If you convert this model to Tensorflow Lite format using the TensorFlow Lite +Optimizing Converter with `--allow_custom_ops` argument, and run it with the +default interpreter, the interpreter will raise the following error messages: + +``` +Didn't find custom op for name 'Sin' +Registration failed. +``` + +All we need to do to use the op in TensorFlow Lite is define two functions +(`Prepare` and `Eval`), and construct a `TfLiteRegistration`. This code would +look something like this: + +```cpp +TfLiteStatus SinPrepare(TfLiteContext* context, TfLiteNode* node) { + using namespace tflite; + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input = GetInput(context, node, 0); + TfLiteTensor* output = GetOutput(context, node, 0); + + int num_dims = NumDimensions(input); + + TfLiteIntArray* output_size = TfLiteIntArrayCreate(num_dims); + for (int i=0; idata[i] = input->dims->data[i]; + } + + return context->ResizeTensor(context, output, output_size); +} + +TfLiteStatus SinEval(TfLiteContext* context, TfLiteNode* node) { + using namespace tflite; + TfLiteTensor* input = GetInput(context, node,0); + TfLiteTensor* output = GetOutput(context, node,0); + + float* input_data = input->data.f; + float* output_data = output->data.f; + + size_t count = 1; + int num_dims = NumDimensions(input); + for (int i = 0; i < num_dims; ++i) { + count *= input->dims->data[i]; + } + + for (size_t i=0; i SDK Tools -> + Android Support Repository`. + + 2. [Edit your `WORKSPACE`](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#edit-workspace) + to add SDK and NDK targets. + + - Make sure the `api_level` in `WORKSPACE` is set to an SDK version that + you have installed. + - By default, Android Studio will install the SDK to `~/Android/Sdk` and + the NDK to `~/Android/Sdk/ndk-bundle`. + +2. Build the app with Bazel. The demo needs C++11: + + ```shell + bazel build -c opt --cxxopt='--std=c++11' \ + //tensorflow/contrib/lite/java/demo/app/src/main:TfLiteCameraDemo + ``` + +3. Install the demo on a + [debug-enabled device](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#install): + + ```shell + adb install bazel-bin/tensorflow/contrib/lite/java/demo/app/src/main/TfLiteCameraDemo.apk + ``` diff --git a/tensorflow/contrib/lite/models/smartreply/g3doc/README.md b/tensorflow/contrib/lite/models/smartreply/g3doc/README.md new file mode 100644 index 0000000000..cab5dcca43 --- /dev/null +++ b/tensorflow/contrib/lite/models/smartreply/g3doc/README.md @@ -0,0 +1,146 @@ +# Smart Reply Model + +## What is On-Device Smart Reply Model? + +Smart Replies are contextually relevant, one-touch responses that help the user +to reply to an incoming text message (or email) efficiently and effortlessly. +Smart Replies have been highly successful across several Google products +including +[Gmail](https://www.blog.google/products/gmail/save-time-with-smart-reply-in-gmail/), +[Inbox](https://www.blog.google/products/gmail/computer-respond-to-this-email/) +and +[Allo](https://blog.google/products/allo/google-allo-smarter-messaging-app/). + +The On-device Smart Reply model is targeted towards text chat use cases. It has +a completely different architecture from its cloud-based counterparts, and is +built specifically for memory constraints devices such as phones & watches. It +has been successfully used to provide [Smart Replies on Android +Wear](https://research.googleblog.com/2017/02/on-device-machine-intelligence.html) +to all first- & third-party apps. + +The on-device model comes with several benefits. It is: + +* **Faster**: The model resides on the device and does not require internet + connectivity. Thus, the inference is very fast and has an average latency of + only a few milliseconds. +* **Resource efficient**: The model has a small memory footprint on + the device. +* **Privacy-friendly**: The user data never leaves the device and this + eliminates any privacy restrictions. + +A caveat, though, is that the on-device model has lower triggering rate than its +cloud counterparts (triggering rate is the percentage of times the model +suggests a response for an incoming message). + +## When to use this Model? + +The On-Device Smart Reply model is aimed towards improving the messaging +experience for day-to-day conversational chat messages. We recommend using this +model for similar use cases. Some sample messages on which the model does well +are provided in this [tsv +file](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/smartreply_samples.tsv) +for reference. The file format is: + +``` + {incoming_message smart_reply1 [smart_reply2] [smart_reply3]} +``` + +For the current model, we see a triggering rate of about 30-40% for messages +which are similar to those provided in the tsv file above. + +In case the model does not trigger any response, the system falls back to +suggesting replies from a fixed back-off set that was compiled from popular +response intents observed in chat conversations. Some of the fallback responses +are `Ok, Yes, No, 👍, ☺`. + +The model can only be used for inference at this time (i.e. it cannot be custom +trained). If you are interested to know how the model was trained, please refer +to this [blog +post](https://research.googleblog.com/2017/02/on-device-machine-intelligence.html) +and [research paper](https://arxiv.org/pdf/1708.00630). + +## How to use this Model? + +We have provided a pre-built demo APK that you can download, install and test on +your phone ([demo APK +here](http://download.tensorflow.org/deps/tflite/SmartReplyDemo.apk)). + +The On-Device Smart Reply demo App works in the following way: + +1. Android app links to the JNI binary with a predictor library. + +2. In the predictor library, `GetSegmentPredictions` is called with a list of input + strings. + + 2.1 The input string can be 1-3 most recent messages of the conversations in + form of string vector. The model will run on these input sentences and + provide Smart Replies corresponding to them. + + 2.2 The function performs some preprocessing on input data which includes: + + * Sentence splitting: The input message will be split into sentences if + message has more than one sentence. Eg: a message like “How are you? + Want to grab lunch?” will be broken down into 2 different sentences. + * Normalization: The individual sentences will be normalized by converting + them into lower cases, removing unnecessary punctuations, etc. Eg: “how + are you????” will be converted to “how are you?” (refer for NORMALIZE op + for more details). + + The input string content will be converted to tensors. + + 2.3 The function then runs the prediction model on the input tensors. + + 2.4 The function also performs some post-processing which includes + aggregating the model predictions for the input sentences from 2.2 and + returning the appropriate responses. + +3. Finally, it gets response(s) from `std::vector`, and + returns back to Android app. Responses are sorted in descending order of + confidence score. + +## Ops and Functionality Supported + +Following are the ops supported for using On-Device Smart Reply model: + +* **NORMALIZE** + + This is a custom op which normalizes the sentences by: + + * Converting all sentences into lower case. + * Removing unnecessary punctuations (eg: “how are you????” → “how are + you?”). + * Expanding sentences wherever necessary (eg: “ I’m home” → “I am home”). + +* **SKIP_GRAM** + + This is an op inside TensorFlow Lite that converts sentences into a list of + skip grams. The configurable parameters are `ngram_size` and + `max_skip_size`. For the model provided, the values for these parameters are + set to 3 & 2 respectively. + +* **EXTRACT_FEATURES** + + This is a custom op that hashes skip grams to features represented as + integers. Longer skip-grams are allocated higher weights. + +* **LSH_PROJECTION** + + This is an op inside TensorFlow Lite that projects input features to a + corresponding bit vector space using Locality Sensitive Hashing (LSH). + +* **PREDICT** + + This is a custom op that runs the input features through the projection + model (details [here](https://arxiv.org/pdf/1708.00630.pdf)), computes the + appropriate response labels along with weights for the projected features, + and aggregates the response labels and weights together. + +* **HASHTABLE_LOOKUP** + + This is a custom op that uses label id from predict op and looks up the + response text from the given label id. + +## Further Information + +* Open source code + [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/smartreply/). diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/README.md b/tensorflow/contrib/lite/models/testdata/g3doc/README.md new file mode 100644 index 0000000000..d0c21d2833 --- /dev/null +++ b/tensorflow/contrib/lite/models/testdata/g3doc/README.md @@ -0,0 +1,102 @@ +## Speech Model Tests + +Sample test data has been provided for speech related models in Tensorflow Lite +to help users working with speech models to verify and test their models. + +For the hotword, speaker-id and automatic speech recognition sample models, the +architecture assumes that the models receive their input from a speech +pre-processing module. The speech pre-processing module receives the audio +signal and produces features for the encoder neural network and uses some +typical signal processing algorithms, like FFT and spectral subtraction, and +ultimately produces a log-mel filterbank (the log of the triangular mel filters +applied to the power spectra). The text-to-speech model assumes that the inputs +are linguistic features describing characteristics of phonemes, syllables, +words, phrases, and sentence. The outputs are acoustic features including +mel-cepstral coefficients, log fundamental frequency, and band aperiodicity. +The pre-processing modules for these models are not provided in the open source +version of TensorFlow Lite. + +The following sections describe the architecture of the sample models at a high +level: + +### Hotword Model + +The hotword model is the neural network model we use for keyphrase/hotword +spotting (i.e. "okgoogle" detection). It is the entry point for voice +interaction (e.g. Google search app on Android devices or Google Home, etc.). +The speech hotword model block diagram is shown in Figure below. It has an input +size of 40 (float), an output size of 7 (float), one Svdf layer, and four fully +connected layers with the corresponding parameters as shown in figure below. + +![hotword_model](hotword.svg "Hotword model") + +### Speaker-id Model + +The speaker-id model is the neural network model we use for speaker +verification. It runs after the hotword triggers. The speech speaker-id model +block diagram is shown in Figure below. It has an input size of 80 (float), an +output size of 64 (float), three Lstm layers, and one fully connected layers +with the corresponding parameters as shown in figure below. + +![speakerid_model](speakerid.svg "Speaker-id model") + +### Text-to-speech (TTS) Model + +The text-to-speech model is the neural network model used to generate speech +from text. The speech text-to-speech model’s block diagram is shown +in Figure below. It has and input size of 334 (float), an output size of 196 +(float), two fully connected layers, three Lstm layers, and one recurrent layer +with the corresponding parameters as shown in the figure. + +![tts_model](tts.svg "TTS model") + +### Automatic Speech Recognizer (ASR) Acoustic Model (AM) + +The acoustic model for automatic speech recognition is the neural network model +for matching phonemes to the input autio features. It generates posterior +probabilities of phonemes from speech frontend features (log-mel filterbanks). +It has an input size of 320 (float), an output size of 42 (float), five LSTM +layers and one fully connected layers with a Softmax activation function, with +the corresponding parameters as shown in the figure. + +![asr_am_model](asr_am.svg "ASR AM model") + +## Speech models test input/output generation + +As mentioned above the input to models are generated from a pre-processing +module (output of a log-mel filterbank, or linguistic features), and the outputs +are generated by running the equivalent TensorFlow model by feeding them the +same input. + +## Link to the open source code + +### Models: + +[Speech hotword model (Svdf rank=1)] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_hotword_model_rank1.tflite) + +[Speech hotword model (Svdf rank=2)] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_hotword_model_rank2.tflite) + +[Speaker-id model] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_speakerid_model.tflite) + +[TTS model] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_tts_model.tflite) + +[ASR AM model] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_terse_am_model.tflite) + +### Test benches + +[Speech hotword model test] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_hotword_model_test.cc) + +[Speaker-id model test] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc) + +[TTS model test] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_tts_model_test.cc) + +[ASR AM model test] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_terse_am_model_test.cc) + +## Android Support +The models have been tested on Android phones, using the following tests: + +[Hotword] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/android/BUILD?rcl=172930882&l=25) + +[Speaker-id] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/android/BUILD?rcl=172930882&l=36) + + diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg b/tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg new file mode 100644 index 0000000000..ca96556422 --- /dev/null +++ b/tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg @@ -0,0 +1,4 @@ + + + + diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/hotword.svg b/tensorflow/contrib/lite/models/testdata/g3doc/hotword.svg new file mode 100755 index 0000000000..36187aa321 --- /dev/null +++ b/tensorflow/contrib/lite/models/testdata/g3doc/hotword.svg @@ -0,0 +1,4 @@ + + + + diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/speakerid.svg b/tensorflow/contrib/lite/models/testdata/g3doc/speakerid.svg new file mode 100755 index 0000000000..dbe4312c46 --- /dev/null +++ b/tensorflow/contrib/lite/models/testdata/g3doc/speakerid.svg @@ -0,0 +1,4 @@ + + + + diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/tts.svg b/tensorflow/contrib/lite/models/testdata/g3doc/tts.svg new file mode 100755 index 0000000000..9664b78f16 --- /dev/null +++ b/tensorflow/contrib/lite/models/testdata/g3doc/tts.svg @@ -0,0 +1,4 @@ + + + + diff --git a/tensorflow/contrib/lite/models/testdata/smartreply_samples.tsv b/tensorflow/contrib/lite/models/testdata/smartreply_samples.tsv new file mode 100644 index 0000000000..dfdc783106 --- /dev/null +++ b/tensorflow/contrib/lite/models/testdata/smartreply_samples.tsv @@ -0,0 +1,50 @@ +any chance ur free tonight Maybe not +any updates? No update yet +anything i can do to help? No, but thanks No, but thank you No, but thanks for asking +be safe. I will be Will do my best Thanks, I will +congratulations Thanks thanks Congratulations +cool, let me know when you have time Cool Yes very cool Yeah, cool +drive safe Thank you, I will Home now I will thanks +hang in there, you'll be okay Doing my best Of course we will +happy birthday! Hey, thanks +happy new year! Wish you the same Thanks and same to you +have a safe flight Thanks, love you too Safe travels +hey What is up? How it going? Can I help you? +hey, got a sec? What is up? How it going? Can I help you? +how are you doing? Great and you? I am doing great +how are you feeling Feeling okay A little better Much much better +how was your weekend? It was real good +how you doing Okay and you +hugs. So sweet Thanks sweetie Take care of yourself +i'm bored Sorry to hear that Join the club No you are not +i'm planning on coming next week. let me know if that works. Works Perfect, thanks +i'm sick Sorry to hear that +i'm so happy for you Thanks me too +i'm so hungry Haha me too +i'm sorry No I am sorry Why sorry? No worries love +i'm sorry, i'm going to have to cancel. No I am sorry Why sorry? No worries love +is there anything i can do to help? No, but thanks No, but thanks for asking +lunch? Yes coming +okay. lemme know as soon as you find out. Any more questions? It is done +omg amazing So amazing +on my way Okay see you soon Cool, see you soon Oh wow, ok +oops, mistexted. Oops Haha, oh well That was funny +safe travels. Thanks, love you too Safe travels +so sorry So sorry +sorry, i can't. No worries at all Sorry what? +sorry, i can't do saturday No worries at all +thank you so much. You are so welcome You are so very welcome You are most welcome +thanks for coming It was my pleasure +thanks, this has been great. Glad to help So happy for you +tomorrow would be ideal. Yes it would +tried calling Try again? +ugh, my flight is delayed. Ugh indeed +what are you guys up to tonight? Nothing planned +what day works best for you Any day +what do you want for dinner Your call Whatever is fine +what time will you be home? Not sure why +where are you?!? At my house +wish you were here. I wish the same Me too honey +you're amazing You are too You are amazing I am +you're marvelous You are too +you're the best. I do my best You are the best Well, I try \ No newline at end of file diff --git a/tensorflow/contrib/lite/nnapi/README.md b/tensorflow/contrib/lite/nnapi/README.md new file mode 100644 index 0000000000..913467d176 --- /dev/null +++ b/tensorflow/contrib/lite/nnapi/README.md @@ -0,0 +1,15 @@ +# Android Neural Network API + +The Android Neural Networks API (NNAPI) is an Android C API designed for running +computationally intensive operators for machine learning on mobile devices. +Tensorflow Lite is designed to use the NNAPI to perform hardware-accelerated +inference operators on supported devices. +Based on the app’s requirements and the hardware capabilities on a device, the +NNAPI can distribute the computation workload across available on-device +processors, including dedicated neural network hardware, graphics processing +units (GPUs), and digital signal processors (DSPs). +For devices that lack a specialized vendor driver, the NNAPI runtime relies on +optimized code to execute requests on the CPU. For more information about the +NNAPI, please refer to the [NNAPI documentation](https://developer.android.com/ndk/guides/neuralnetworks/index.html) + + diff --git a/tensorflow/contrib/lite/toco/README.md b/tensorflow/contrib/lite/toco/README.md new file mode 100644 index 0000000000..281b2ea5e4 --- /dev/null +++ b/tensorflow/contrib/lite/toco/README.md @@ -0,0 +1,26 @@ +# The TensorFlow Lite Optimizing Converter + +The TensorFlow Lite Optimizing Converter's most typical use is converting from the TensorFlow GraphDef to the TensorFlow Lite +format, but it supports much more than that. + +## Usage documentation + +Usage information is given in these documents: + +* [Command-line examples](g3doc/cmdline_examples.md) +* [Command-line reference](g3doc/cmdline_reference.md) +* [Python API](g3doc/python_api.md) + +## Design documentation + +Coming soon! + +## Where the converter fits in the TensorFlow landscape + +In the typical case, an application developer is using TensorFlow to design and +train models, then uses TensorFlow's freeze_graph.py to generate a frozen +inference graph, then uses the converter to convert that into a TensorFlow Lite flatbuffer file, +then ships that file to client devices where the TensorFlow Lite interpreter handles them +on-device. This is represented in the following diagram: + +![drawing](https://storage.googleapis.com/download.tensorflow.org/example_images/tensorflow_landscape.svg) diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md new file mode 100644 index 0000000000..b9f8c8d152 --- /dev/null +++ b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md @@ -0,0 +1,509 @@ +# TensorFlow Lite Optimizing Converter command-line examples + +This page is a guide to using the TensorFlow Lite Optimizing Converter by +looking at some example command lines. It is complemented by the following other +documents: + +* [README](../README.md) +* [Command-line reference](cmdline_reference.md) + +Table of contents: + +[TOC] + +## Convert a TensorFlow GraphDef to TensorFlow Lite for float inference + +In this example, we look at the most common task: we have an ordinary TensorFlow +GraphDef and want to convert it to a TensorFlow Lite flatbuffer to perform +floating-point inference. + +``` +curl https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_128_frozen.tgz \ + | tar xzv -C /tmp +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/mobilenet_v1_0.50_128/frozen_graph.pb \ + --output_file=/tmp/foo.lite \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=TFLITE \ + --input_type=FLOAT \ + --inference_type=FLOAT \ + --input_shape=1,128,128,3 \ + --input_array=input \ + --output_array=MobilenetV1/Predictions/Reshape_1 +``` + +To explain each of these flags: + +* `--input_format` and `--output_format` determine the formats of the input + and output files: here we are converting from `TENSORFLOW_GRAPHDEF` to + `TFLITE`. +* `--input_file` specifies the path of the input file, to be converted. When + `--input_format=TENSORFLOW_GRAPHDEF`, this file should be a + *[frozen](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py)* + *inference* graph. Being frozen means in particular that the input file is + self-contained, and does not reference any external "checkpoint" file. An + *inference* graph is a version of a graph meant to be used for inference, + typically not the same graph file as was used for training a given model. +* `--output_file` specifies the destination to write the converted file to. +* `--input_array` specifies the input activations, that is, the input "tensor" + in the input TensorFlow GraphDef file. The array designated by + `--input_array` is the one that the user will have to provide the contents + of as input to the runtime inference code. +* `--output_array` specifies the output activations, that is, the output + "tensor" in the input TensorFlow GraphDef file. The runtime inference code + will store its results in the array designated by `--output_array`. +* `--input_shape` specifies the shape of the input array. It is currently + required, but the plan is for a future version to no longer require it, + allowing to defer the specification of the input shape until runtime. The + format of `input_shape` is always a comma-separated list of dimensions, + always in TensorFlow convention. +* `--input_type` specifies what should be the type of the input arrays in the + **output** file. `--input_type` does not describe a property of the input + file: the type of input arrays is already encoded in the input graph. + Rather, `--input_type` is how you specify what should be the type of the + inputs to be provided to the output converted graph. This only affects + arrays of real numbers: this flag allows to quantized/dequantize + real-numbers inputs, switching between floating-point and quantized forms. + This flag has no incidence on all other types of input arrays, such as plain + integers or strings. +* `--inference_type` specifies what type of arithmetic the output file should + be relying on. It implies in particular the choice of type of the output + arrays in the output file. Like `--input_type`, `--inference_type` does not + describe a property of the input file. + +## Just optimize a TensorFlow GraphDef + +The converter accepts both TENSORFLOW_GRAPHDEF and TFLITE file formats as both +`--input_format` and `--output_format`. This means that conversion from and to +any supported format is possible, and in particular, same-format "conversions" +are possible, and effectively ask the converter to optimize and simplify a +graph. Example: + +``` +curl https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_128_frozen.tgz \ + | tar xzv -C /tmp +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/mobilenet_v1_0.50_128/frozen_graph.pb \ + --output_file=/tmp/foo.pb \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=TENSORFLOW_GRAPHDEF \ + --input_shape=1,128,128,3 \ + --input_array=input \ + --output_array=MobilenetV1/Predictions/Reshape_1 +``` + +Here we did not pass `--input_type` and `--inference_type` because they are +considered not applicable to the TensorFlow GraphDef format (as far as we are +concerned, TensorFlow GraphDefs are technically always float, and the only +flavor of "quantized" GraphDef that the converter deals with is "FakeQuantized" +graphs that are still technically float graphs). + +Below in the section about passing arbitrary input/output arrays we give another +example, using the converter to extract just a sub-graph from a TensorFlow +GraphDef. + +## Convert a TensorFlow Lite flatbuffer back into TensorFlow GraphDef format + +As we mentioned that the converter supports file format conversions in any +direction, let us just give an example of that: + +``` +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/foo.lite \ + --output_file=/tmp/foo.pb \ + --input_format=TFLITE \ + --output_format=TENSORFLOW_GRAPHDEF \ + --input_shape=1,128,128,3 \ + --input_array=input \ + --output_array=MobilenetV1/Predictions/Reshape_1 +``` + +## Convert a TensorFlow GraphDef to TensorFlow Lite for quantized inference + +Let us now look at a quantized model. As mentioned above, the only flavor of +quantized TensorFlow GraphDefs that the converter is concerned with, is +"FakeQuantized" models. These are technically float models, but with special +`FakeQuant*` ops inserted at the boundaries of fused layers to record min-max +range information allowing to generate a quantized inference workload that is +able to reproduce exactly the specific quantization behavior that was used +during training. Indeed, the whole point of quantized training is to allow for +both training and inference to perform exactly the same arithmetic, so that the +way that the training process about around quantization inaccuracy is +effectively helping the quantized inference process to be more accurate. + +Given a quantized TensorFlow GraphDef, generating a quantized TensorFlow Lite +flatbuffer is done like this: + +``` +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/some_quantized_graph.pb \ + --output_file=/tmp/foo.lite \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=TFLITE \ + --input_type=QUANTIZED_UINT8 \ + --inference_type=QUANTIZED_UINT8 \ + --input_shape=1,128,128,3 \ + --input_array=input \ + --output_array=MobilenetV1/Predictions/Reshape_1 \ + --mean_value=128 \ + --std_value=127 +``` + +Here, besides changing `--input_file` to point to a (fake-)quantized GraphDef, +the only other changes are: + +* To change `--input_type` and `--inference_type` to `QUANTIZED_UINT8`. This + effectively tells the converter to generate an output file that can take a + quantized uint8 array as input (`--input_type=QUANTIZED_UINT8`), and have + quantized uint8 internal and output arrays as well + (`--inference_type=QUANTIZED_UINT8`). +* To pass `--mean_value` and `--std_value` flags to describe how the quantized + uint8 input array values are to be interpreted as the mathematical real + numbers that the graph is concerned with (keep in mind that even a + "fake-quantized" TensorFlow GraphDef is still technically a float graph). + The meaning of `--mean_value` and `--std_value` is explained in the + command-line reference; it suffices for now to say that they are a property + of each model. + +## Use dummy-quantization to try out quantized inference on a float graph + +Sometimes, one only has a plain float graph, and one is curious as to how much +faster inference might run if one could perform quantized inference instead of +float inference. Rather than requiring users to first invest in quantizing their +graphs before they can evaluate a possible benefit, the converter allows to +simply experiment with what we call "dummy quantization": provide some vaguely +plausible values for the min-max ranges of values in all arrays that do not have +min-max information, so that quantization can carry on, certainly producing +inaccurate results (do not use that in production!) but with performance +characteristics that should be identical to those of an actually quantized +flavor of the model. + +In the present example, we have a model using Relu6 activation functions almost +everywhere, so a reasonable guess is that most activation ranges should be +contained in [0, 6] and roughly comparable to it. + +``` +curl https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_128_frozen.tgz \ + | tar xzv -C /tmp +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/mobilenet_v1_0.50_128/frozen_graph.pb \ + --output_file=/tmp/foo.cc \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=TFLITE \ + --input_type=QUANTIZED_UINT8 \ + --inference_type=QUANTIZED_UINT8 \ + --input_shape=1,128,128,3 \ + --input_array=input \ + --output_array=MobilenetV1/Predictions/Reshape_1 \ + --default_ranges_min=0 \ + --default_ranges_max=6 \ + --mean_value=127.5 \ + --std_value=127.5 +``` + +## Multiple output arrays + +Some models have multiple outputs. Even in a model with only one output, you may +want for the inference code to return the contents of other arrays as well, or +to perform inference on a subgraph with multiple outputs (see the section below +on specifying arbitrary arrays as input/output arrays). + +Either way, using `--output_arrays` instead of `--output_array` allows to +specify a comma-separated list of output arrays. + +``` +curl https://storage.googleapis.com/download.tensorflow.org/models/inception_v1_2016_08_28_frozen.pb.tar.gz \ + | tar xzv -C /tmp +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/inception_v1_2016_08_28_frozen.pb \ + --output_file=/tmp/foo.lite \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=TFLITE \ + --input_type=FLOAT \ + --inference_type=FLOAT \ + --input_shape=1,224,224,3 \ + --input_array=input \ + --output_arrays=InceptionV1/InceptionV1/Mixed_3b/Branch_1/Conv2d_0a_1x1/Relu,InceptionV1/InceptionV1/Mixed_3b/Branch_2/Conv2d_0a_1x1/Relu +``` + +## Multiple input arrays + +Some models have multiple inputs; even in a model with a single input, you may +want for the inference code to implement only a subgraph with multiple inputs +(see the section below on specifying arbitrary arrays as input/output arrays). + +Either way, multiple input arrays are specified by using `--input_arrays` +instead of `--input_array` to specify a comma-separated list of input arrays. In +that case, one also needs to use `--input_shapes` instead of `--input_shape`. +The syntax for `--input_shapes` is a bit trickier, since already the singular +`--input_shape` was a comma-separated list of integers! Multiple input shapes +are delimited by a colon (`:`) in `--input_shapes`. + +``` +curl https://storage.googleapis.com/download.tensorflow.org/models/inception_v1_2016_08_28_frozen.pb.tar.gz \ + | tar xzv -C /tmp +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/inception_v1_2016_08_28_frozen.pb \ + --output_file=/tmp/foo.lite \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=TFLITE \ + --input_type=FLOAT \ + --inference_type=FLOAT \ + --input_shapes=1,28,28,96:1,28,28,16:1,28,28,192:1,28,28,64 \ + --input_arrays=InceptionV1/InceptionV1/Mixed_3b/Branch_1/Conv2d_0a_1x1/Relu,InceptionV1/InceptionV1/Mixed_3b/Branch_2/Conv2d_0a_1x1/Relu,InceptionV1/InceptionV1/Mixed_3b/Branch_3/MaxPool_0a_3x3/MaxPool,InceptionV1/InceptionV1/Mixed_3b/Branch_0/Conv2d_0a_1x1/Relu \ + --output_array=InceptionV1/Logits/Predictions/Reshape_1 +``` + +## Specifying arbitrary arrays in a graph as input or output arrays + +Any array in the input file can be specified as an input or output array. This +allows to use the converter to extract a sub-graph out of the input graph file. +The converter then automatically discards any part of the graph that is not +needed for the subgraph identified by the specified input and output arrays. +Another use case for specifying multiple output arrays is to get inference code +to return the contents of some specified intermediate activations array, not +just the output activations. + +In order to know which array you want to pass as `--input_arrays` / +`--output_arrays`, it helps to have a visualization of the graph. See the +section below on graph visualization. When using graph visualization for that +purpose, make sure to use `--dump_graphviz=` to visualize exactly the graph as +it is in the actual final form being exported to the output file. + +Note that the final representation of an on-device inference workload (say, in +TensorFlow Lite flatbuffers format) tends to have coarser granularity than the +very fine granularity of the TensorFlow GraphDef representation. For example, +while a fully-connected layer is typically represented as at least four separate +ops in TensorFlow GraphDef (Reshape, MatMul, BiasAdd, Relu...), it is typically +represented as a single "fused" op (FullyConnected) in the converter's optimized +representation and in the final on-device representation (e.g. in TensorFlow +Lite flatbuffer format). As the level of granularity gets coarser, some +intermediate arrays (say, the array between the MatMul and the BiasAdd in the +TensorFlow GraphDef) are dropped. When specifying intermediate arrays as +`--input_arrays` / `--output_arrays`, it is generally at least desirable (and +often required) to specify arrays that are meant to survive in the final form of +the graph, after fusing. These are typically the outputs of activation functions +(since everything in each layer until the activation function tends to get +fused). + +Here is an example of extracting just a sub-graph, namely just a single fused +layer, out of a TensorFlow GraphDef, and exporting a TensorFlow GraphDef +containing just that subgraph: + +``` +curl https://storage.googleapis.com/download.tensorflow.org/models/inception_v1_2016_08_28_frozen.pb.tar.gz \ + | tar xzv -C /tmp +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/inception_v1_2016_08_28_frozen.pb \ + --output_file=/tmp/foo.pb \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=TENSORFLOW_GRAPHDEF \ + --input_shapes=1,28,28,96:1,28,28,16:1,28,28,192:1,28,28,64 \ + --input_arrays=InceptionV1/InceptionV1/Mixed_3b/Branch_1/Conv2d_0a_1x1/Relu,InceptionV1/InceptionV1/Mixed_3b/Branch_2/Conv2d_0a_1x1/Relu,InceptionV1/InceptionV1/Mixed_3b/Branch_3/MaxPool_0a_3x3/MaxPool,InceptionV1/InceptionV1/Mixed_3b/Branch_0/Conv2d_0a_1x1/Relu \ + --output_array=InceptionV1/InceptionV1/Mixed_3b/concat_v2 +``` + +## Logging + +### Standard logging + +The converter generates some informative log messages during processing. The +easiest way to view them is to add `--logtostderr` to command lines. For the +previous example, that gives: + +``` +curl https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_128_frozen.tgz \ + | tar xzv -C /tmp +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/mobilenet_v1_0.50_128/frozen_graph.pb \ + --output_file=/tmp/foo.lite \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=TFLITE \ + --input_type=FLOAT \ + --inference_type=FLOAT \ + --input_shape=1,128,128,3 \ + --input_array=input \ + --output_array=MobilenetV1/Predictions/Reshape_1 \ + --logtostderr +``` + +After some initialization messages, we get the following informative messages: + +``` +I1101 21:51:33.297475 5339 graph_transformations.cc:39] Before general graph transformations: 416 operators, 583 arrays (0 quantized) +I1101 21:51:33.308972 5339 graph_transformations.cc:39] After general graph transformations pass 1: 31 operators, 89 arrays (0 quantized) +I1101 21:51:33.309204 5339 graph_transformations.cc:39] Before dequantization graph transformations: 31 operators, 89 arrays (0 quantized) +I1101 21:51:33.309368 5339 allocate_transient_arrays.cc:312] Total transient array allocated size: 1048576 bytes, theoretical optimal value: 786432 bytes. +I1101 21:51:33.309484 5339 toco_tooling.cc:249] Estimated count of arithmetic ops: 0.099218 billion (note that a multiply-add is counted as 2 ops). +``` + +### Verbose logging + +For debugging purposes, the converter supports two levels of verbose logging, +which can be set by passing a `--v=` flag: + +* At `--v=1`, the converter generates text dumps of the graph at various + points during processing, as well as log messages about every graph + transformation that did take place, typically answering questions of the + form "why was my graph transformed in this way"? +* At `--v=2`, the converter additionally generates log messages about graph + transformations that were considered but not actually performed, typically + answering questions of the form "why was my graph NOT transformed when I + expected it would be?". + +### Graph "video" logging + +When `--dump_graphviz=` is used (see the section on Graph visualizations), one +may additionally pass `--dump_graphviz_video`, which causes a graph +visualization to be dumped after each individual graph transformations, often +resulting in thousands of files. Typically, one would then bisect into these +files to understand when a given change was introduced in the graph. + +## Graph visualizations + +The converter is able to export a graph to the GraphViz Dot format, for easy +visualization. Combined with the converter's ability to transform the graph into +a simpler, coarser-granularity representation, that makes it a very powerful +visualization tool. + +There are two ways to get the converter to export a GraphViz Dot file, +corresponding to two separate use cases. Understanding the difference between +them is key to getting useful graph visualizations. + +### Using `--output_format=GRAPHVIZ_DOT` + +The first way to get a graphviz rendering is to pass +`--output_format=GRAPHVIZ_DOT`, instead of the `--output_format` that you would +otherwise use. This says: "I just want to get a plausible visualization of that +graph". The upside is that it makes for very simple command lines, and makes the +converter very lax about aspects of the graph or the command line that it would +otherwise complain about. Example: + +``` +curl https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_128_frozen.tgz \ + | tar xzv -C /tmp +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/mobilenet_v1_0.50_128/frozen_graph.pb \ + --output_file=/tmp/foo.dot \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=GRAPHVIZ_DOT \ + --input_shape=1,128,128,3 \ + --input_array=input \ + --output_array=MobilenetV1/Predictions/Reshape_1 +``` + +The resulting `.dot` file can be rendered into a PDF as follows: + +``` +dot -Tpdf -O /tmp/foo.dot +``` + +And the resulting `.dot.pdf` can be viewed in any PDF viewer, but we suggest one +with a good ability to pan and zoom across a very large page; Google Chrome does +well in that respect. + +``` +google-chrome /tmp/foo.dot.pdf +``` + +Example PDF files are viewable online in the next section. + +### Using `--dump_graphviz=` + +The second way to get a graphviz rendering is to pass a `--dump_graphviz=` flag +specifying a destination directory to dump GraphViz rendering to. Unlike the +previous approach, this one allows you to keep your real command-line (with your +real `--output_format` and other flags) unchanged, just appending a +`--dump_graphviz=` flag to it. This says: "I want visualizations of the actual +graph during this specific conversion process". Example: + +``` +curl https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_128_frozen.tgz \ + | tar xzv -C /tmp +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/mobilenet_v1_0.50_128/frozen_graph.pb \ + --output_file=/tmp/foo.lite \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=TFLITE \ + --input_type=FLOAT \ + --inference_type=FLOAT \ + --input_shape=1,128,128,3 \ + --input_array=input \ + --output_array=MobilenetV1/Predictions/Reshape_1 \ + --dump_graphviz=/tmp +``` + +This generates a few files in the destination directory, here `/tmp`. Most +important are these two files: + +``` +/tmp/toco_AT_IMPORT.dot +/tmp/toco_AFTER_TRANSFORMATIONS.dot +``` + +`toco_AT_IMPORT.dot` represents the graph as it was imported from +`--input_file`, before any transformation was applied to it (besides some +transformations that are applied immediately while importing). This tends to be +a complex visualization with limited information, but is useful especially in +situations where a conversion command fails (this file is generated even if the +conversion subsequently fails). + +`toco_AFTER_TRANSFORMATIONS.dot` represents the graph after all transformations +were applied to it, just before it was exported to the `--output_file`. +Typically, this is a much smaller graph, and it conveys much more information +about each node. + +Again, these can be rendered to PDFs: + +``` +dot -Tpdf -O /tmp/toco_*.dot +``` + +The resulting files can be seen here: + +* [toco_AT_IMPORT.dot.pdf](https://storage.googleapis.com/download.tensorflow.org/example_images/toco_AT_IMPORT.dot.pdf) +* [toco_AFTER_TRANSFORMATIONS.dot.pdf](https://storage.googleapis.com/download.tensorflow.org/example_images/toco_AFTER_TRANSFORMATIONS.dot.pdf). + +### Legend for the graph visualizations + +* Operators are red square boxes with the following hues of red: + * Most operators are + bright + red. + * Some typically heavy operators (e.g. Conv) are rendered in a + darker + red. +* Arrays are octogons with the following colors: + * Constant arrays are + blue. + * Activation arrays are gray: + * Internal (intermediate) activation arrays are + light + gray. + * Those activation arrays that are designated as `--input_arrays` or + `--output_arrays` are + dark + gray. + * RNN state arrays are green. Because of the way that the converter + represents RNN back-edges explicitly, each RNN state is represented by a + pair of green arrays: + * The activation array that is the source of the RNN back-edge (i.e. + whose contents are copied into the RNN state array after having been + computed) is + light + green. + * The actual RNN state array is + dark + green. It is the destination of the RNN back-edge updating + it. diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md b/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md new file mode 100644 index 0000000000..cc6d416959 --- /dev/null +++ b/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md @@ -0,0 +1,238 @@ +# TensorFlow Lite Optimizing Converter command-line reference + +This page is complete reference of command-line flags. It is complemented by the +following other documents: + +* [README](../README.md) +* [Command-line examples](cmdline_examples.md) + +Table of contents: + +[TOC] + +## High-level overview + +A full list and detailed specification of all flags is given in the next +section. For now we focus on a higher-level description of command lines: + +``` +toco \ + --input_format=... \ + --output_format=... \ + --input_file=... \ + --output_file=... \ + [model flags...] \ + [transformation flags...] \ + [logging flags...] +``` + +In other words, the converter requires at least the following mandatory flags: +`--input_format`, `--output_format`, `--input_file`, `--output_file`. Depending +on the input and output formats, additional flags may be allowed or mandatory: + +* *Model flags* provide additional information about the model stored in the + input file. + * `--output_array` or `--output_arrays` specify which arrays in the input + file are to be considered the output activations. + * `--input_array` or `--input_arrays` specify which arrays in the input + file are to be considered the input activations. + * `--input_shape` or `--input_shapes` specify the shapes of the input + arrays. + * `--mean_value` or `--mean_values`, and `--std_value` or `--std_values`, + give the dequantization parameters of the input arrays, for the case + when the output file will accept quantized input arrays. +* *Transformation flags* specify options of the transformations to be applied + to the graph, i.e. they specify requested properties that the output file + should have. + * `--input_type` specifies the type that the input arrays should have + after transformations, in the output file. This is where you choose + whether you want runtime inference code to accept float or quantized + inputs. This flag only applies to float or quantized inputs, and allows + to convert between the two. This flag has no effect on all other types + of inputs, such as ordinary integer arrays. + * `--inference_type` or `--inference_types` specify the type that generic + intermediate and output activation arrays should have after + transformations, in the output file. This is where you choose whether + you want runtime inference code to perform float or quantized inference + arithmetic. + * Some transformation flags allow to carry on with quantization when the + input graph is not properly quantized: `--default_ranges_min`, + `--default_ranges_max`, `--drop_fake_quant`, + `--reorder_across_fake_quant`. +* *Logging flags* described below. + +## Command-line flags complete reference + +### Mandatory flags + +* `--input_format`. Type: string. Specifies the format of the input file. + Allowed values: + * `TENSORFLOW_GRAPHDEF` — The TensorFlow GraphDef format. Both + binary and text proto formats are allowed. + * `TFLITE` — The TensorFlow Lite flatbuffers format. +* `--output_format`. Type: string. Specifies the format of the output file. + Allowed values: + * `TENSORFLOW_GRAPHDEF` — The TensorFlow GraphDef format. Always + produces a file in binary (not text) proto format. + * `TFLITE` — The TensorFlow Lite flatbuffers format. + * Whether a float or quantized TensorFlow Lite file will be produced + depends on the `--inference_type` flag. + * Whether the produced TensorFlow Lite file will accept a float or + quantized input depends on the `--input_type` flag. + * `GRAPHVIZ_DOT` — The GraphViz `.dot` format. This asks the + converter to generate a reasonable graphical representation of the graph + after simplification by a generic set of transformation. + * A typical `dot` command line to view the resulting graph might look + like: `dot -Tpdf -O file.dot`. + * Note that since passing this `--output_format` means losing the + information of which output format you actually care about, and + since the converter's transformations depend on the specific output + format, the resulting visualization may not fully reflect what you + would get on the actual output format that you are using. To avoid + that concern, and generally to get a visualization of exactly what + you get in your actual output format as opposed to just a merely + plausible visualization of a model, consider using `--dump_graphviz` + instead and keeping your true `--output_format`. +* `--input_file`. Type: string. Specifies the path of the input file. This may + be either an absolute or a relative path. +* `--output_file`. Type: string. Specifies the path of the output file. + +### Model flags + +* `--output_array`. Type: string. Specifies a single array as the output + activations. Incompatible with `--output_arrays`. +* `--output_arrays`. Type: comma-separated list of strings. Specifies a list + of arrays as the output activations, for models with multiple outputs. + Incompatible with `--output_array`. +* `--input_array`. Type: string. Specifies a single array as the input + activations. Incompatible with `--input_arrays`. +* `--input_arrays`. Type: comma-separated list of strings. Specifies a list of + arrays as the input activations, for models with multiple inputs. + Incompatible with `--input_array`. + +When `--input_array` is used, the following flags are available to provide +additional information about the single input array: + +* `--input_shape`. Type: comma-separated list of integers. Specifies the shape + of the input array, in TensorFlow convention: starting with the outer-most + dimension (the dimension corresponding to the largest offset stride in the + array layout), ending with the inner-most dimension (the dimension along + which array entries are typically laid out contiguously in memory). + * For example, a typical vision model might pass + `--input_shape=1,60,80,3`, meaning a batch size of 1 (no batching), an + input image height of 60, an input image width of 80, and an input image + depth of 3, for the typical case where the input image is a RGB bitmap + (3 channels, depth=3) stored by horizontal scanlines (so 'width' is the + next innermost dimension after 'depth'). +* `--mean_value` and `--std_value`. Type: floating-point. The decimal point + character is always the dot (`.`) regardless of the locale. These specify + the (de-)quantization parameters of the input array, to use when the output + file will take a quantized input array (that is, when passing + `--input_type=QUANTIZED_UINT8`). + * The meaning of mean_value and std_value is as follows: each quantized + value in the quantized input array will be interpreted as a mathematical + real number (i.e. as an input activation value) according to the + following formula: + * `real_value = (quantized_input_value - mean_value) / std_value`. + * When performing float inference (`--inference_type=FLOAT`) on a + quantized input, the quantized input would be immediately dequantized by + the inference code according to the above formula, before proceeding + with float inference. + * When performing quantized inference + (`--inference_type=QUANTIZED_UINT8`), no dequantization is ever to be + performed by the inference code; however, the quantization parameters of + all arrays, including those of the input arrays as specified by + mean_value and std_value, all participate in the determination of the + fixed-point multipliers used in the quantized inference code. + +When `--input_arrays` is used, the following flags are available to provide +additional information about the multiple input arrays: + +* `--input_shapes`. Type: colon-separated list of comma-separated lists of + integers. Each comma-separated list of integer gives the shape of one of the + input arrays specified in `--input_arrays`, in the same order. See + `--input_shape` for details. + * Example: `--input_arrays=foo,bar --input_shapes=2,3:4,5,6` means that + there are two input arrays. The first one, "foo", has shape [2,3]. The + second one, "bar", has shape [4,5,6]. +* `--mean_values`, `--std_values`. Type: comma-separated lists of + floating-point numbers. Each number gives the corresponding value for one of + the input arrays specified in `--input_arrays`, in the same order. See + `--mean_value`, `--std_value` for details. + +### Transformation flags + +* `--input_type`. Type: string. Specifies what should be the type of the + entries in the input array(s) in the output file, after transformations, for + those input arrays that are originally either floating-point or quantized + real numbers in the input file. If there are multiple such input arrays, + then they all use this type. Input arrays of other types, such as arrays of + plain integers or strings, are not concerned with this flag. Allowed values: + * `FLOAT` — Keep floating-point input arrays as such. Dequantize any + quantized input array. entries ("float32"). + * `QUANTIZED_UINT8` — Quantize floating-point input arrays, to have + 8-bit unsigned integer entries. The quantization params are specified by + `--mean_value`, `--std_value` flags as explained in the documentation of + these flags. +* `--inference_type`. Type: string. Specifies what to do with floating-point + arrays found in the input file, besides input arrays. In other words, this + controls the possible quantization of floating-point weights, intermediate + activations, and output activations. Has no effect on arrays that aren't + floating-point in the input file. Allowed values: + * `FLOAT` — Keep floating-point arrays as floating-point in the + output file. This corresponds to what is commonly called "floating-point + inference". + * `QUANTIZED_UINT8` — Quantize floating-point arrays, changing their + storage data type from float to some integer type: + * All float activations are quantized as `uint8`. + * Almost all float weights are quantized as `uint8`. + * A few exceptions exist. In particular, the bias-vectors in + "Conv" and "FullyConnected" layers are quantized as `int32` + instead for technical reasons. +* `--default_ranges_min`, `--default_ranges_max`. Type: floating-point. The + decimal point character is always the dot (`.`) regardless of the locale. + These flags enable what is called "dummy quantization". If defined, their + effect is to define fallback (min, max) range values for all arrays that do + not have a properly specified (min, max) range in the input file, thus + allowing to proceed with quantization of non-quantized or + incorrectly-quantized input files. This enables easy performance prototyping + ("how fast would my model run if I quantized it?") but should never be used + in production as the resulting quantized arithmetic is inaccurate. +* `--drop_fake_quant`. Type: boolean. Default: false. Causes fake-quantization + nodes to be dropped from the graph. This may be used to recover a plain + float graph from a fake-quantized graph. +* `--reorder_across_fake_quant`. Type: boolean. Default: false. Normally, + fake-quantization nodes must be strict boundaries for graph transformations, + in order to ensure that quantized inference has the exact same arithmetic + behavior as quantized training --- which is the whole point of quantized + training and of FakeQuant nodes in the first place. However, that entails + subtle requirements on where exactly FakeQuant nodes must be placed in the + graph. Some quantized graphs have FakeQuant nodes at unexpected locations, + that prevent graph transformations that are necessary in order to generate a + well-formed quantized representation of these graphs. Such graphs should be + fixed, but as a temporary work-around, setting this + reorder_across_fake_quant flag allows the converter to perform necessary + graph transformaitons on them, at the cost of no longer faithfully matching + inference and training arithmetic. + +### Logging flags + +The following are standard Google logging flags: + +* `--logtostderr` redirects Google logging to standard error, typically making + it visible in a terminal. +* `--v` sets verbose logging levels (for debugging purposes). Defined levels: + * `--v=1`: log all graph transformations that did make a change on the + graph. + * `--v=2`: log all graph transformations that did *not* make a change on + the graph. + +The following flags allow to generate graph visualizations of the actual graph +at various points during transformations: + +* `--dump_graphviz=/path` enables dumping of the graphs at various stages of + processing as GraphViz `.dot` files. Generally preferred over + `--output_format=GRAPHVIZ_DOT` as this allows you to keep your actually + relevant `--output_format`. +* `--dump_graphviz_video` enables dumping of the graph after every single + graph transformation (for debugging purposes). diff --git a/tensorflow/contrib/lite/toco/g3doc/python_api.md b/tensorflow/contrib/lite/toco/g3doc/python_api.md new file mode 100644 index 0000000000..440f9c367c --- /dev/null +++ b/tensorflow/contrib/lite/toco/g3doc/python_api.md @@ -0,0 +1,62 @@ +# TensorFlow Lite Optimizing Converter (TOCO) Python API reference + +## High-level overview + +While the TensorFlow Lite Optimizing Converter can be used from the command +line, it is often convenient to use it as part of Python model build and +training script. This is so that conversion can be part of your model +development pipeline. This allows you to know early and often that you are +designing a model that can be targeted to devices with mobile. + +## API + +In Python you can run `help(tf.contrib.lite)` to get documentation on functions. +In particular, `tf.contrib.lite.toco_convert` presents a simple API and +`tf.contrib.lite.toco_from_protos` allows more detailed control of TOCO using +the protobuf interface to TOCO. + +## Example + +In particular, here we show creating a simple model and converting it to a +TensorFlow Lite Model. + +```python +import tensorflow as tf + +img = tf.placeholder(name="img", dtype=tf.float32, shape=(1, 64, 64, 3)) +val = img + tf.constant([1., 2., 3.]) + tf.constant([1., 4., 4.]) +out = tf.identity(val, name="out") +with tf.Session() as sess: + tflite_model = tf.contrib.lite.toco_convert(sess.graph_def, [img], [out]) + open("test.tflite", "wb").write(tflite_modeL) +``` + +**NOTE** Currently, the TOCO command will cause a fatal error to the Python +interpreter when TOCO conversion fails. This will be remedied as soon as +possible. + +## Example 2: Export with variables + +If a model has variables, they need to be turned into constants. This process is +known as freezing, and it can actually be accomplished with + +```python +import tensorflow as tf + +img = tf.placeholder(name="img", dtype=tf.float32, shape=(1, 64, 64, 3)) +var = tf.get_variable("weights", dtype=tf.float32, shape=(1,64,64,3)) +val = img + var + +def canonical_name(x): + return x.name.split(":")[0] + +out = tf.identity(val, name="out") +with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + out_tensors = [out] + frozen_graphdef = tf.graph_util.convert_variables_to_constants( + sess, sess.graph_def, map(canonical_name, out_tensors)) + tflite_model = tf.contrib.lite.toco_convert( + frozen_graphdef, [img], out_tensors) + open("converted_model.tflite", "wb").write(tflite_model) +``` diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index 24692ff12f..6e2190cb7a 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -60,61 +60,6 @@ def _safe_div(numerator, denominator, name): name=name) -# TODO(ptucker): Move this somewhere common, to share with ops/losses/losses.py. -def _assert_weights_rank(weights, values): - """`weights` rank must be either `0`, or the same as 'values'.""" - return check_ops.assert_rank_in(weights, (0, array_ops.rank(values))) - - -def _count_condition(values, - weights=None, - metrics_collections=None, - updates_collections=None): - """Sums the weights of cases where the given values are True. - - If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - - Args: - values: A `bool` `Tensor` of arbitrary size. - weights: Optional `Tensor` whose rank is either 0, or the same rank as - `values`, and must be broadcastable to `values` (i.e., all dimensions - must be either `1`, or the same as the corresponding `values` - dimension). - metrics_collections: An optional list of collections that the metric - value variable should be added to. - updates_collections: An optional list of collections that the metric update - ops should be added to. - - Returns: - value_tensor: A `Tensor` representing the current value of the metric. - update_op: An operation that accumulates the error from a batch of data. - - Raises: - ValueError: If `weights` is not `None` and its shape doesn't match `values`, - or if either `metrics_collections` or `updates_collections` are not a list - or tuple. - """ - check_ops.assert_type(values, dtypes.bool) - count_ = metrics_impl.metric_variable([], dtypes.float32, name='count') - - values = math_ops.to_float(values) - if weights is not None: - weights = math_ops.to_float(weights) - with ops.control_dependencies((_assert_weights_rank(weights, values),)): - values = math_ops.multiply(values, weights) - - value_tensor = array_ops.identity(count_) - update_op = state_ops.assign_add(count_, math_ops.reduce_sum(values)) - - if metrics_collections: - ops.add_to_collections(metrics_collections, value_tensor) - - if updates_collections: - ops.add_to_collections(updates_collections, update_op) - - return value_tensor, update_op - - def streaming_true_positives(predictions, labels, weights=None, @@ -194,17 +139,13 @@ def streaming_true_negatives(predictions, either `metrics_collections` or `updates_collections` are not a list or tuple. """ - with variable_scope.variable_scope(name, 'true_negatives', - (predictions, labels, weights)): - - predictions, labels, weights = metrics_impl._remove_squeezable_dimensions( # pylint: disable=protected-access - predictions=math_ops.cast(predictions, dtype=dtypes.bool), - labels=math_ops.cast(labels, dtype=dtypes.bool), - weights=weights) - is_true_negative = math_ops.logical_and( - math_ops.equal(labels, False), math_ops.equal(predictions, False)) - return _count_condition(is_true_negative, weights, metrics_collections, - updates_collections) + return metrics.true_negatives( + predictions=predictions, + labels=labels, + weights=weights, + metrics_collections=metrics_collections, + updates_collections=updates_collections, + name=name) def streaming_false_positives(predictions, @@ -294,34 +235,6 @@ def streaming_false_negatives(predictions, name=name) -# TODO(ptucker): Move this somewhere common, to share with ops/losses/losses.py. -def _broadcast_weights(weights, values): - """Broadcast `weights` to the same shape as `values`. - - This returns a version of `weights` following the same broadcast rules as - `mul(weights, values)`. When computing a weighted average, use this function - to broadcast `weights` before summing them; e.g., - `reduce_sum(w * v) / reduce_sum(_broadcast_weights(w, v))`. - - Args: - weights: `Tensor` whose rank is either 0, or the same rank as `values`, and - must be broadcastable to `values` (i.e., all dimensions must be either - `1`, or the same as the corresponding `values` dimension). - values: `Tensor` of any shape. - - Returns: - `weights` broadcast to `values` shape. - """ - with ops.name_scope(None, 'broadcast_weights', (values, weights)) as scope: - weights_shape = weights.get_shape() - values_shape = values.get_shape() - if (weights_shape.is_fully_defined() and values_shape.is_fully_defined() and - weights_shape.is_compatible_with(values_shape)): - return weights - with ops.control_dependencies((_assert_weights_rank(weights, values),)): - return math_ops.multiply(weights, array_ops.ones_like(values), name=scope) - - def streaming_mean(values, weights=None, metrics_collections=None, @@ -423,8 +336,10 @@ def streaming_mean_tensor(values, updates_collections=updates_collections, name=name) -@deprecated(None, "Please switch to tf.metrics.accuracy. Note that the order " - "of the inputs of labels and predictions have been switched.") + +@deprecated( + None, 'Please switch to tf.metrics.accuracy. Note that the order of the ' + 'labels and predictions arguments has been switched.') def streaming_accuracy(predictions, labels, weights=None, @@ -592,53 +507,6 @@ def streaming_recall(predictions, name=name) -def _true_negatives(labels, - predictions, - weights=None, - metrics_collections=None, - updates_collections=None, - name=None): - """Sum the weights of true negatives. - - If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - - Args: - labels: The ground truth values, a `Tensor` whose dimensions must match - `predictions`. Will be cast to `bool`. - predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will - be cast to `bool`. - weights: Optional `Tensor` whose rank is either 0, or the same rank as - `labels`, and must be broadcastable to `labels` (i.e., all dimensions must - be either `1`, or the same as the corresponding `labels` dimension). - metrics_collections: An optional list of collections that the metric - value variable should be added to. - updates_collections: An optional list of collections that the metric update - ops should be added to. - name: An optional variable_scope name. - - Returns: - value_tensor: A `Tensor` representing the current value of the metric. - update_op: An operation that accumulates the error from a batch of data. - - Raises: - ValueError: If `predictions` and `labels` have mismatched shapes, or if - `weights` is not `None` and its shape doesn't match `predictions`, or if - either `metrics_collections` or `updates_collections` are not a list or - tuple. - """ - with variable_scope.variable_scope(name, 'true_negatives', - (predictions, labels, weights)): - - predictions, labels, weights = metrics_impl._remove_squeezable_dimensions( # pylint: disable=protected-access - predictions=math_ops.cast(predictions, dtype=dtypes.bool), - labels=math_ops.cast(labels, dtype=dtypes.bool), - weights=weights) - is_true_negative = math_ops.logical_and( - math_ops.equal(labels, False), math_ops.equal(predictions, False)) - return _count_condition(is_true_negative, weights, metrics_collections, - updates_collections) - - def streaming_false_positive_rate(predictions, labels, weights=None, @@ -696,16 +564,16 @@ def streaming_false_positive_rate(predictions, weights=weights) false_p, false_positives_update_op = metrics.false_positives( - labels, - predictions, - weights, + labels=labels, + predictions=predictions, + weights=weights, metrics_collections=None, updates_collections=None, name=None) - true_n, true_negatives_update_op = _true_negatives( - labels, - predictions, - weights, + true_n, true_negatives_update_op = metrics.true_negatives( + labels=labels, + predictions=predictions, + weights=weights, metrics_collections=None, updates_collections=None, name=None) @@ -1102,8 +970,10 @@ def streaming_curve_points(labels=None, return points, update_op -@deprecated(None, "Please switch to tf.metrics.auc. Note that the order of " - "the inputs of labels and predictions have been switched.") + +@deprecated( + None, 'Please switch to tf.metrics.auc. Note that the order of the ' + 'labels and predictions arguments has been switched.') def streaming_auc(predictions, labels, weights=None, @@ -1636,9 +1506,10 @@ def streaming_sensitivity_at_specificity(predictions, updates_collections=updates_collections, name=name) + @deprecated( - None, "Please switch to tf.metrics.precision_at_thresholds. Note that the " - "order of of the inputs of labels and predictions have been switched.") + None, 'Please switch to tf.metrics.precision_at_thresholds. Note that the ' + 'order of the labels and predictions arguments has been switched.') def streaming_precision_at_thresholds(predictions, labels, thresholds, @@ -1697,9 +1568,10 @@ def streaming_precision_at_thresholds(predictions, updates_collections=updates_collections, name=name) + @deprecated( - None, "Please switch to tf.metrics.recall_at_thresholds. Note that the " - "order of of the inputs of labels and predictions have been switched.") + None, 'Please switch to tf.metrics.recall_at_thresholds. Note that the ' + 'order of the labels and predictions arguments has been switched.') def streaming_recall_at_thresholds(predictions, labels, thresholds, @@ -1909,8 +1781,8 @@ def _at_k_name(name, k=None, class_id=None): return name -@deprecated("2016-11-08", "Please use `streaming_sparse_recall_at_k`, " - "and reshape labels from [batch_size] to [batch_size, 1].") +@deprecated('2016-11-08', 'Please use `streaming_sparse_recall_at_k`, ' + 'and reshape labels from [batch_size] to [batch_size, 1].') def streaming_recall_at_k(predictions, labels, k, @@ -2543,7 +2415,8 @@ def streaming_sparse_average_precision_at_top_k(top_k_predictions, updates_collections=updates_collections, name=name) -@deprecated(None, "Please switch to tf.metrics.mean.") + +@deprecated(None, 'Please switch to tf.metrics.mean.') def streaming_mean_absolute_error(predictions, labels, weights=None, diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index 190a25d4d7..5ff7516246 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -233,6 +233,8 @@ def input_layer(features, ordered_columns = [] for column in sorted(feature_columns, key=lambda x: x.name): ordered_columns.append(column) + # TODO(b/67952670): Implement a column._var_scope_name property and use + # that instead of column.name. with variable_scope.variable_scope(None, default_name=column.name): tensor = column._get_dense_tensor( # pylint: disable=protected-access builder, @@ -340,6 +342,8 @@ def linear_model(features, ordered_columns = [] builder = _LazyBuilder(features) for column in sorted(feature_columns, key=lambda x: x.name): + # TODO(b/67952670): Implement a column._var_scope_name property and use + # that instead of column.name. with variable_scope.variable_scope(None, default_name=column.name): ordered_columns.append(column) if isinstance(column, _CategoricalColumn): @@ -489,15 +493,36 @@ def embedding_column( representation (e.g., to feed to a DNN). Inputs must be a `_CategoricalColumn` created by any of the - `categorical_column_*` function. Here is an example embedding of an identity - column for a DNN model: + `categorical_column_*` function. Here is an example of using + `embedding_column` with `DNNClassifier`: ```python video_id = categorical_column_with_identity( key='video_id', num_buckets=1000000, default_value=0) columns = [embedding_column(video_id, 9),...] - features = tf.parse_example(..., features=make_parse_example_spec(columns)) - dense_tensor = input_layer(features, columns) + + estimator = tf.estimator.DNNClassifier(feature_columns=columns, ...) + + label_column = ... + def input_fn(): + features = tf.parse_example( + ..., features=make_parse_example_spec(columns + [label_column])) + labels = features.pop(label_column.name) + return features, labels + + estimator.train(input_fn=input_fn, steps=100) + ``` + + Here is an example using `embedding_column` with model_fn: + + ```python + def model_fn(features, ...): + video_id = categorical_column_with_identity( + key='video_id', num_buckets=1000000, default_value=0) + columns = [embedding_column(video_id, 9),...] + dense_tensor = input_layer(features, columns) + # Form DNN layers, calculate loss, and return EstimatorSpec. + ... ``` Args: @@ -551,12 +576,144 @@ def embedding_column( dimension=dimension, combiner=combiner, initializer=initializer, + shared_embedding_collection_name=None, ckpt_to_load_from=ckpt_to_load_from, tensor_name_in_ckpt=tensor_name_in_ckpt, max_norm=max_norm, trainable=trainable) +def _shared_embedding_columns( + categorical_columns, dimension, combiner='mean', initializer=None, + shared_embedding_collection_name=None, ckpt_to_load_from=None, + tensor_name_in_ckpt=None, max_norm=None, trainable=True): + """List of `_DenseColumn`s that convert from sparse, categorical input. + + This is similar to `embedding_column`, except that that it produces a list of + embedding columns that share the same embedding weights. + + Use this when your inputs are sparse and of the same type (e.g. watched and + impression video IDs that share the same vocabulary), and you want to convert + them to a dense representation (e.g., to feed to a DNN). + + Inputs must be a list of `_CategoricalColumn` created by any of the + `categorical_column_*` function. They must all be of the same type and have + the same arguments except `key`. E.g. they can be + categorical_column_with_vocabulary_file with the same vocabulary_file. Some or + all columns could also be weighted_categorical_column. + + Here is an example embedding of two features for a DNNClassifier model: + + ```python + watched_video_id = categorical_column_with_vocabulary_file( + 'watched_video_id', video_vocabulary_file, video_vocabulary_size) + impression_video_id = categorical_column_with_vocabulary_file( + 'impression_video_id', video_vocabulary_file, video_vocabulary_size) + columns = shared_embedding_columns( + [watched_video_id, impression_video_id], dimension=10) + + estimator = tf.estimator.DNNClassifier(feature_columns=columns, ...) + + label_column = ... + def input_fn(): + features = tf.parse_example( + ..., features=make_parse_example_spec(columns + [label_column])) + labels = features.pop(label_column.name) + return features, labels + + estimator.train(input_fn=input_fn, steps=100) + ``` + + Here is an example using `shared_embedding_columns` with model_fn: + + ```python + def model_fn(features, ...): + watched_video_id = categorical_column_with_vocabulary_file( + 'watched_video_id', video_vocabulary_file, video_vocabulary_size) + impression_video_id = categorical_column_with_vocabulary_file( + 'impression_video_id', video_vocabulary_file, video_vocabulary_size) + columns = shared_embedding_columns( + [watched_video_id, impression_video_id], dimension=10) + dense_tensor = input_layer(features, columns) + # Form DNN layers, calculate loss, and return EstimatorSpec. + ... + ``` + + Args: + categorical_columns: List of `_CategoricalColumn`s created by a + `categorical_column_with_*` function. These columns produce the sparse IDs + that are inputs to the embedding lookup. All columns must be of the same + type and have the same arguments except `key`. E.g. they can be + categorical_column_with_vocabulary_file with the same vocabulary_file. + Some or all columns could also be weighted_categorical_column. + dimension: An integer specifying dimension of the embedding, must be > 0. + combiner: A string specifying how to reduce if there are multiple entries + in a single row. Currently 'mean', 'sqrtn' and 'sum' are supported, with + 'mean' the default. 'sqrtn' often achieves good accuracy, in particular + with bag-of-words columns. Each of this can be thought as example level + normalizations on the column. For more information, see + `tf.embedding_lookup_sparse`. + initializer: A variable initializer function to be used in embedding + variable initialization. If not specified, defaults to + `tf.truncated_normal_initializer` with mean `0.0` and standard deviation + `1/sqrt(dimension)`. + shared_embedding_collection_name: Optional name of the collection where + shared embedding weights are added. If not given, a reasonable name will + be chosen based on the names of `categorical_columns`. + ckpt_to_load_from: String representing checkpoint name/pattern from which to + restore column weights. Required if `tensor_name_in_ckpt` is not `None`. + tensor_name_in_ckpt: Name of the `Tensor` in `ckpt_to_load_from` from + which to restore the column weights. Required if `ckpt_to_load_from` is + not `None`. + max_norm: If not `None`, embedding values are l2-normalized to this value. + trainable: Whether or not the embedding is trainable. Default is True. + + Returns: + A list of `_DenseColumn`s that converts from sparse input. The order of + results follows the ordering of `categorical_columns`. + + Raises: + ValueError: if `dimension` not > 0. + ValueError: if any of the given `categorical_columns` is of different type + or has different arguments than the others. + ValueError: if exactly one of `ckpt_to_load_from` and `tensor_name_in_ckpt` + is specified. + ValueError: if `initializer` is specified and is not callable. + """ + if (dimension is None) or (dimension < 1): + raise ValueError('Invalid dimension {}.'.format(dimension)) + if (ckpt_to_load_from is None) != (tensor_name_in_ckpt is None): + raise ValueError('Must specify both `ckpt_to_load_from` and ' + '`tensor_name_in_ckpt` or none of them.') + + if (initializer is not None) and (not callable(initializer)): + raise ValueError('initializer must be callable if specified.') + if initializer is None: + initializer = init_ops.truncated_normal_initializer( + mean=0.0, stddev=1 / math.sqrt(dimension)) + # TODO(b/67952670): Validate categorical_columns. + if not shared_embedding_collection_name: + # Sort the columns so the name is deterministic even if the user passes + # columns from an unsorted collection, such as dict.values(). + sorted_columns = sorted(categorical_columns, key=lambda x: x.name) + shared_embedding_collection_name = '_'.join(c.name for c in sorted_columns) + shared_embedding_collection_name += '_shared_embedding' + + result = [] + for column in categorical_columns: + result.append(_EmbeddingColumn( + categorical_column=column, + dimension=dimension, + combiner=combiner, + initializer=initializer, + shared_embedding_collection_name=shared_embedding_collection_name, + ckpt_to_load_from=ckpt_to_load_from, + tensor_name_in_ckpt=tensor_name_in_ckpt, + max_norm=max_norm, + trainable=trainable)) + return result + + def numeric_column(key, shape=(1,), default_value=None, @@ -1847,14 +2004,18 @@ class _EmbeddingColumn( _DenseColumn, collections.namedtuple('_EmbeddingColumn', ( 'categorical_column', 'dimension', 'combiner', 'initializer', - 'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable' + 'shared_embedding_collection_name', 'ckpt_to_load_from', + 'tensor_name_in_ckpt', 'max_norm', 'trainable' ))): - """See `_embedding_column`.""" + """See `embedding_column`.""" @property def name(self): if not hasattr(self, '_name'): - self._name = '{}_embedding'.format(self.categorical_column.name) + if self.shared_embedding_collection_name: + self._name = '{}_shared_embedding'.format(self.categorical_column.name) + else: + self._name = '{}_embedding'.format(self.categorical_column.name) return self._name @property @@ -1877,14 +2038,47 @@ class _EmbeddingColumn( sparse_ids = sparse_tensors.id_tensor sparse_weights = sparse_tensors.weight_tensor - # Create embedding weight, and restore from checkpoint if necessary. - embedding_weights = variable_scope.get_variable( - name='embedding_weights', - shape=(self.categorical_column._num_buckets, self.dimension), # pylint: disable=protected-access - dtype=dtypes.float32, - initializer=self.initializer, - trainable=self.trainable and trainable, - collections=weight_collections) + embedding_shape = (self.categorical_column._num_buckets, self.dimension) # pylint: disable=protected-access + if self.shared_embedding_collection_name: + shared_embedding_collection = ops.get_collection( + self.shared_embedding_collection_name) + if shared_embedding_collection: + if len(shared_embedding_collection) > 1: + raise ValueError( + 'Collection {} can only contain one variable. ' + 'Suggested fix A: Choose a unique name for this collection. ' + 'Suggested fix B: Do not add any variables to this collection. ' + 'The feature_column library already adds a variable under the ' + 'hood.'.format(shared_embedding_collection)) + embedding_weights = shared_embedding_collection[0] + if embedding_weights.shape != embedding_shape: + raise ValueError( + 'Shared embedding collection {} contains variable {} of ' + 'unexpected shape {}. Expected shape is {}. ' + 'Suggested fix A: Choose a unique name for this collection. ' + 'Suggested fix B: Do not add any variables to this collection. ' + 'The feature_column library already adds a variable under the ' + 'hood.'.format( + self.shared_embedding_collection_name, embedding_weights.name, + embedding_weights.shape, embedding_shape)) + else: + embedding_weights = variable_scope.get_variable( + name=self.shared_embedding_collection_name + '_weights', + shape=embedding_shape, + dtype=dtypes.float32, + initializer=self.initializer, + trainable=self.trainable and trainable, + collections=weight_collections) + ops.add_to_collection( + self.shared_embedding_collection_name, embedding_weights) + else: + embedding_weights = variable_scope.get_variable( + name='embedding_weights', + shape=embedding_shape, + dtype=dtypes.float32, + initializer=self.initializer, + trainable=self.trainable and trainable, + collections=weight_collections) if self.ckpt_to_load_from is not None: to_restore = embedding_weights if isinstance(to_restore, variables.PartitionedVariable): diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index e57e9a9836..4b06a85ad3 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -27,6 +27,7 @@ from tensorflow.core.example import example_pb2 from tensorflow.core.example import feature_pb2 from tensorflow.python.client import session from tensorflow.python.estimator.inputs import numpy_io +from tensorflow.python.feature_column import feature_column as fc_lib from tensorflow.python.feature_column import feature_column_lib as fc from tensorflow.python.feature_column.feature_column import _CategoricalColumn from tensorflow.python.feature_column.feature_column import _DenseColumn @@ -3403,6 +3404,7 @@ class EmbeddingColumnTest(test.TestCase): self.assertEqual('mean', embedding_column.combiner) self.assertIsNotNone(embedding_column.initializer) self.assertIsNone(embedding_column.ckpt_to_load_from) + self.assertIsNone(embedding_column.shared_embedding_collection_name) self.assertIsNone(embedding_column.tensor_name_in_ckpt) self.assertIsNone(embedding_column.max_norm) self.assertTrue(embedding_column.trainable) @@ -3426,6 +3428,7 @@ class EmbeddingColumnTest(test.TestCase): self.assertEqual(embedding_dimension, embedding_column.dimension) self.assertEqual('my_combiner', embedding_column.combiner) self.assertEqual('my_initializer', embedding_column.initializer()) + self.assertIsNone(embedding_column.shared_embedding_collection_name) self.assertEqual('my_ckpt', embedding_column.ckpt_to_load_from) self.assertEqual('my_ckpt_tensor', embedding_column.tensor_name_in_ckpt) self.assertEqual(42., embedding_column.max_norm) @@ -3456,6 +3459,7 @@ class EmbeddingColumnTest(test.TestCase): self.assertEqual(embedding_dimension, embedding_column.dimension) self.assertEqual('my_combiner', embedding_column.combiner) self.assertEqual('my_initializer', embedding_column.initializer()) + self.assertIsNone(embedding_column.shared_embedding_collection_name) self.assertEqual('my_ckpt', embedding_column.ckpt_to_load_from) self.assertEqual('my_ckpt_tensor', embedding_column.tensor_name_in_ckpt) self.assertEqual(42., embedding_column.max_norm) @@ -3979,6 +3983,269 @@ class EmbeddingColumnTest(test.TestCase): self.assertAllEqual(expected_lookups, input_layer.eval()) +class SharedEmbeddingColumnTest(test.TestCase): + + def test_defaults(self): + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) + embedding_dimension = 2 + embedding_column_b, embedding_column_a = fc_lib._shared_embedding_columns( + [categorical_column_b, categorical_column_a], + dimension=embedding_dimension) + self.assertIs(categorical_column_a, embedding_column_a.categorical_column) + self.assertIs(categorical_column_b, embedding_column_b.categorical_column) + self.assertEqual(embedding_dimension, embedding_column_a.dimension) + self.assertEqual(embedding_dimension, embedding_column_b.dimension) + self.assertEqual('mean', embedding_column_a.combiner) + self.assertEqual('mean', embedding_column_b.combiner) + self.assertIsNotNone(embedding_column_a.initializer) + self.assertIsNotNone(embedding_column_b.initializer) + self.assertIsNone(embedding_column_a.ckpt_to_load_from) + self.assertIsNone(embedding_column_b.ckpt_to_load_from) + self.assertEqual('aaa_bbb_shared_embedding', + embedding_column_a.shared_embedding_collection_name) + self.assertEqual('aaa_bbb_shared_embedding', + embedding_column_b.shared_embedding_collection_name) + self.assertIsNone(embedding_column_a.tensor_name_in_ckpt) + self.assertIsNone(embedding_column_b.tensor_name_in_ckpt) + self.assertIsNone(embedding_column_a.max_norm) + self.assertIsNone(embedding_column_b.max_norm) + self.assertTrue(embedding_column_a.trainable) + self.assertTrue(embedding_column_b.trainable) + self.assertEqual('aaa_shared_embedding', embedding_column_a.name) + self.assertEqual('bbb_shared_embedding', embedding_column_b.name) + self.assertEqual( + (embedding_dimension,), embedding_column_a._variable_shape) + self.assertEqual( + (embedding_dimension,), embedding_column_b._variable_shape) + self.assertEqual({ + 'aaa': parsing_ops.VarLenFeature(dtypes.int64) + }, embedding_column_a._parse_example_spec) + self.assertEqual({ + 'bbb': parsing_ops.VarLenFeature(dtypes.int64) + }, embedding_column_b._parse_example_spec) + + def test_all_constructor_args(self): + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) + embedding_dimension = 2 + embedding_column_a, embedding_column_b = fc_lib._shared_embedding_columns( + [categorical_column_a, categorical_column_b], + dimension=embedding_dimension, + combiner='my_combiner', + initializer=lambda: 'my_initializer', + shared_embedding_collection_name='shared_embedding_collection_name', + ckpt_to_load_from='my_ckpt', + tensor_name_in_ckpt='my_ckpt_tensor', + max_norm=42., + trainable=False) + self.assertIs(categorical_column_a, embedding_column_a.categorical_column) + self.assertIs(categorical_column_b, embedding_column_b.categorical_column) + self.assertEqual(embedding_dimension, embedding_column_a.dimension) + self.assertEqual(embedding_dimension, embedding_column_b.dimension) + self.assertEqual('my_combiner', embedding_column_a.combiner) + self.assertEqual('my_combiner', embedding_column_b.combiner) + self.assertEqual('my_initializer', embedding_column_a.initializer()) + self.assertEqual('my_initializer', embedding_column_b.initializer()) + self.assertEqual('shared_embedding_collection_name', + embedding_column_a.shared_embedding_collection_name) + self.assertEqual('shared_embedding_collection_name', + embedding_column_b.shared_embedding_collection_name) + self.assertEqual('my_ckpt', embedding_column_a.ckpt_to_load_from) + self.assertEqual('my_ckpt', embedding_column_b.ckpt_to_load_from) + self.assertEqual('my_ckpt_tensor', embedding_column_a.tensor_name_in_ckpt) + self.assertEqual('my_ckpt_tensor', embedding_column_b.tensor_name_in_ckpt) + self.assertEqual(42., embedding_column_a.max_norm) + self.assertEqual(42., embedding_column_b.max_norm) + self.assertFalse(embedding_column_a.trainable) + self.assertFalse(embedding_column_b.trainable) + self.assertEqual('aaa_shared_embedding', embedding_column_a.name) + self.assertEqual('bbb_shared_embedding', embedding_column_b.name) + self.assertEqual( + (embedding_dimension,), embedding_column_a._variable_shape) + self.assertEqual( + (embedding_dimension,), embedding_column_b._variable_shape) + self.assertEqual({ + 'aaa': parsing_ops.VarLenFeature(dtypes.int64) + }, embedding_column_a._parse_example_spec) + self.assertEqual({ + 'bbb': parsing_ops.VarLenFeature(dtypes.int64) + }, embedding_column_b._parse_example_spec) + + def test_deep_copy(self): + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) + embedding_dimension = 2 + original_a, _ = fc_lib._shared_embedding_columns( + [categorical_column_a, categorical_column_b], + dimension=embedding_dimension, + combiner='my_combiner', + initializer=lambda: 'my_initializer', + shared_embedding_collection_name='shared_embedding_collection_name', + ckpt_to_load_from='my_ckpt', + tensor_name_in_ckpt='my_ckpt_tensor', + max_norm=42., trainable=False) + for embedding_column_a in (original_a, copy.deepcopy(original_a)): + self.assertEqual('aaa', embedding_column_a.categorical_column.name) + self.assertEqual(3, embedding_column_a.categorical_column._num_buckets) + self.assertEqual({ + 'aaa': parsing_ops.VarLenFeature(dtypes.int64) + }, embedding_column_a.categorical_column._parse_example_spec) + + self.assertEqual(embedding_dimension, embedding_column_a.dimension) + self.assertEqual('my_combiner', embedding_column_a.combiner) + self.assertEqual('my_initializer', embedding_column_a.initializer()) + self.assertEqual('shared_embedding_collection_name', + embedding_column_a.shared_embedding_collection_name) + self.assertEqual('my_ckpt', embedding_column_a.ckpt_to_load_from) + self.assertEqual('my_ckpt_tensor', embedding_column_a.tensor_name_in_ckpt) + self.assertEqual(42., embedding_column_a.max_norm) + self.assertFalse(embedding_column_a.trainable) + self.assertEqual('aaa_shared_embedding', embedding_column_a.name) + self.assertEqual( + (embedding_dimension,), embedding_column_a._variable_shape) + self.assertEqual({ + 'aaa': parsing_ops.VarLenFeature(dtypes.int64) + }, embedding_column_a._parse_example_spec) + + def test_invalid_initializer(self): + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) + with self.assertRaisesRegexp(ValueError, 'initializer must be callable'): + fc_lib._shared_embedding_columns( + [categorical_column_a, categorical_column_b], dimension=2, + initializer='not_fn') + + def test_parse_example(self): + a = fc.categorical_column_with_vocabulary_list( + key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) + b = fc.categorical_column_with_vocabulary_list( + key='bbb', vocabulary_list=('omar', 'stringer', 'marlo')) + a_embedded, b_embedded = fc_lib._shared_embedding_columns( + [a, b], dimension=2) + data = example_pb2.Example(features=feature_pb2.Features( + feature={ + 'aaa': + feature_pb2.Feature(bytes_list=feature_pb2.BytesList( + value=[b'omar', b'stringer'])), + 'bbb': + feature_pb2.Feature(bytes_list=feature_pb2.BytesList( + value=[b'stringer', b'marlo'])), + })) + features = parsing_ops.parse_example( + serialized=[data.SerializeToString()], + features=fc.make_parse_example_spec([a_embedded, b_embedded])) + self.assertIn('aaa', features) + self.assertIn('bbb', features) + with self.test_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=[[0, 0], [0, 1]], + values=np.array([b'omar', b'stringer'], dtype=np.object_), + dense_shape=[1, 2]), + features['aaa'].eval()) + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=[[0, 0], [0, 1]], + values=np.array([b'stringer', b'marlo'], dtype=np.object_), + dense_shape=[1, 2]), + features['bbb'].eval()) + + def test_input_layer(self): + # Inputs. + vocabulary_size = 3 + sparse_input_a = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 4), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 5)) + sparse_input_b = sparse_tensor.SparseTensorValue( + # example 0, ids [0] + # example 1, ids [] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (3, 0)), + values=(0, 1), + dense_shape=(4, 5)) + + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + ) + def _initializer(shape, dtype, partition_info): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + + # Expected lookup result, using combiner='mean'. + expected_lookups = ( + # example 0: + # A ids [2], embedding = [7, 11] + # B ids [0], embedding = [1, 2] + (7., 11., 1., 2.), + # example 1: + # A ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] + # B ids [], embedding = [0, 0] + (2., 3.5, 0., 0.), + # example 2: + # A ids [], embedding = [0, 0] + # B ids [], embedding = [0, 0] + (0., 0., 0., 0.), + # example 3: + # A ids [1], embedding = [3, 5] + # B ids [1], embedding = [3, 5] + (3., 5., 3., 5.), + ) + + # Build columns. + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=vocabulary_size) + embedding_column_a, embedding_column_b = fc_lib._shared_embedding_columns( + [categorical_column_a, categorical_column_b], + dimension=embedding_dimension, initializer=_initializer) + + # Provide sparse input and get dense result. + input_layer = fc.input_layer( + features={'aaa': sparse_input_a, 'bbb': sparse_input_b}, + feature_columns=(embedding_column_b, embedding_column_a)) + + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertItemsEqual( + ['input_layer/aaa_shared_embedding/aaa_bbb_shared_embedding_weights:0'], + tuple([v.name for v in global_vars])) + trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + self.assertItemsEqual( + ['input_layer/aaa_shared_embedding/aaa_bbb_shared_embedding_weights:0'], + tuple([v.name for v in trainable_vars])) + shared_embedding_vars = ops.get_collection('aaa_bbb_shared_embedding') + self.assertItemsEqual( + ['input_layer/aaa_shared_embedding/aaa_bbb_shared_embedding_weights:0'], + tuple([v.name for v in shared_embedding_vars])) + with _initialized_session(): + self.assertAllEqual(embedding_values, trainable_vars[0].eval()) + self.assertAllEqual(expected_lookups, input_layer.eval()) + + class WeightedCategoricalColumnTest(test.TestCase): def test_defaults(self): diff --git a/tensorflow/python/kernel_tests/metrics_test.py b/tensorflow/python/kernel_tests/metrics_test.py index 971dc9d553..3358b78efd 100644 --- a/tensorflow/python/kernel_tests/metrics_test.py +++ b/tensorflow/python/kernel_tests/metrics_test.py @@ -3857,6 +3857,56 @@ class MeanPerClassAccuracyTest(test.TestCase): self.assertAlmostEqual(desired_mean_accuracy, mean_accuracy.eval()) +class FalseNegativesTest(test.TestCase): + + def setUp(self): + np.random.seed(1) + ops.reset_default_graph() + + def testVars(self): + metrics.false_negatives( + labels=(0, 1, 0, 1), + predictions=(0, 0, 1, 1)) + _assert_metric_variables(self, ('false_negatives/count:0',)) + + def testUnweighted(self): + labels = constant_op.constant(((0, 1, 0, 1, 0), + (0, 0, 1, 1, 1), + (1, 1, 1, 1, 0), + (0, 0, 0, 0, 1))) + predictions = constant_op.constant(((0, 0, 1, 1, 0), + (1, 1, 1, 1, 1), + (0, 1, 0, 1, 0), + (1, 1, 1, 1, 1))) + tn, tn_update_op = metrics.false_negatives( + labels=labels, predictions=predictions) + + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertAllClose(0., tn.eval()) + self.assertAllClose(3., tn_update_op.eval()) + self.assertAllClose(3., tn.eval()) + + def testWeighted(self): + labels = constant_op.constant(((0, 1, 0, 1, 0), + (0, 0, 1, 1, 1), + (1, 1, 1, 1, 0), + (0, 0, 0, 0, 1))) + predictions = constant_op.constant(((0, 0, 1, 1, 0), + (1, 1, 1, 1, 1), + (0, 1, 0, 1, 0), + (1, 1, 1, 1, 1))) + weights = constant_op.constant((1., 1.5, 2., 2.5)) + tn, tn_update_op = metrics.false_negatives( + labels=labels, predictions=predictions, weights=weights) + + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertAllClose(0., tn.eval()) + self.assertAllClose(5., tn_update_op.eval()) + self.assertAllClose(5., tn.eval()) + + class FalseNegativesAtThresholdsTest(test.TestCase): def setUp(self): @@ -3906,6 +3956,56 @@ class FalseNegativesAtThresholdsTest(test.TestCase): self.assertAllEqual((0.0, 8.0, 11.0), fn.eval()) +class FalsePositivesTest(test.TestCase): + + def setUp(self): + np.random.seed(1) + ops.reset_default_graph() + + def testVars(self): + metrics.false_positives( + labels=(0, 1, 0, 1), + predictions=(0, 0, 1, 1)) + _assert_metric_variables(self, ('false_positives/count:0',)) + + def testUnweighted(self): + labels = constant_op.constant(((0, 1, 0, 1, 0), + (0, 0, 1, 1, 1), + (1, 1, 1, 1, 0), + (0, 0, 0, 0, 1))) + predictions = constant_op.constant(((0, 0, 1, 1, 0), + (1, 1, 1, 1, 1), + (0, 1, 0, 1, 0), + (1, 1, 1, 1, 1))) + tn, tn_update_op = metrics.false_positives( + labels=labels, predictions=predictions) + + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertAllClose(0., tn.eval()) + self.assertAllClose(7., tn_update_op.eval()) + self.assertAllClose(7., tn.eval()) + + def testWeighted(self): + labels = constant_op.constant(((0, 1, 0, 1, 0), + (0, 0, 1, 1, 1), + (1, 1, 1, 1, 0), + (0, 0, 0, 0, 1))) + predictions = constant_op.constant(((0, 0, 1, 1, 0), + (1, 1, 1, 1, 1), + (0, 1, 0, 1, 0), + (1, 1, 1, 1, 1))) + weights = constant_op.constant((1., 1.5, 2., 2.5)) + tn, tn_update_op = metrics.false_positives( + labels=labels, predictions=predictions, weights=weights) + + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertAllClose(0., tn.eval()) + self.assertAllClose(14., tn_update_op.eval()) + self.assertAllClose(14., tn.eval()) + + class FalsePositivesAtThresholdsTest(test.TestCase): def setUp(self): @@ -3957,6 +4057,56 @@ class FalsePositivesAtThresholdsTest(test.TestCase): self.assertAllEqual((125.0, 42.0, 12.0), fp.eval()) +class TrueNegativesTest(test.TestCase): + + def setUp(self): + np.random.seed(1) + ops.reset_default_graph() + + def testVars(self): + metrics.true_negatives( + labels=(0, 1, 0, 1), + predictions=(0, 0, 1, 1)) + _assert_metric_variables(self, ('true_negatives/count:0',)) + + def testUnweighted(self): + labels = constant_op.constant(((0, 1, 0, 1, 0), + (0, 0, 1, 1, 1), + (1, 1, 1, 1, 0), + (0, 0, 0, 0, 1))) + predictions = constant_op.constant(((0, 0, 1, 1, 0), + (1, 1, 1, 1, 1), + (0, 1, 0, 1, 0), + (1, 1, 1, 1, 1))) + tn, tn_update_op = metrics.true_negatives( + labels=labels, predictions=predictions) + + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertAllClose(0., tn.eval()) + self.assertAllClose(3., tn_update_op.eval()) + self.assertAllClose(3., tn.eval()) + + def testWeighted(self): + labels = constant_op.constant(((0, 1, 0, 1, 0), + (0, 0, 1, 1, 1), + (1, 1, 1, 1, 0), + (0, 0, 0, 0, 1))) + predictions = constant_op.constant(((0, 0, 1, 1, 0), + (1, 1, 1, 1, 1), + (0, 1, 0, 1, 0), + (1, 1, 1, 1, 1))) + weights = constant_op.constant((1., 1.5, 2., 2.5)) + tn, tn_update_op = metrics.true_negatives( + labels=labels, predictions=predictions, weights=weights) + + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertAllClose(0., tn.eval()) + self.assertAllClose(4., tn_update_op.eval()) + self.assertAllClose(4., tn.eval()) + + class TrueNegativesAtThresholdsTest(test.TestCase): def setUp(self): @@ -4006,6 +4156,56 @@ class TrueNegativesAtThresholdsTest(test.TestCase): self.assertAllEqual((5.0, 15.0, 23.0), tn.eval()) +class TruePositivesTest(test.TestCase): + + def setUp(self): + np.random.seed(1) + ops.reset_default_graph() + + def testVars(self): + metrics.true_positives( + labels=(0, 1, 0, 1), + predictions=(0, 0, 1, 1)) + _assert_metric_variables(self, ('true_positives/count:0',)) + + def testUnweighted(self): + labels = constant_op.constant(((0, 1, 0, 1, 0), + (0, 0, 1, 1, 1), + (1, 1, 1, 1, 0), + (0, 0, 0, 0, 1))) + predictions = constant_op.constant(((0, 0, 1, 1, 0), + (1, 1, 1, 1, 1), + (0, 1, 0, 1, 0), + (1, 1, 1, 1, 1))) + tn, tn_update_op = metrics.true_positives( + labels=labels, predictions=predictions) + + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertAllClose(0., tn.eval()) + self.assertAllClose(7., tn_update_op.eval()) + self.assertAllClose(7., tn.eval()) + + def testWeighted(self): + labels = constant_op.constant(((0, 1, 0, 1, 0), + (0, 0, 1, 1, 1), + (1, 1, 1, 1, 0), + (0, 0, 0, 0, 1))) + predictions = constant_op.constant(((0, 0, 1, 1, 0), + (1, 1, 1, 1, 1), + (0, 1, 0, 1, 0), + (1, 1, 1, 1, 1))) + weights = constant_op.constant((1., 1.5, 2., 2.5)) + tn, tn_update_op = metrics.true_positives( + labels=labels, predictions=predictions, weights=weights) + + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertAllClose(0., tn.eval()) + self.assertAllClose(12., tn_update_op.eval()) + self.assertAllClose(12., tn.eval()) + + class TruePositivesAtThresholdsTest(test.TestCase): def setUp(self): diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py index 67caf72621..717ee1254f 100644 --- a/tensorflow/python/ops/metrics_impl.py +++ b/tensorflow/python/ops/metrics_impl.py @@ -1511,6 +1511,56 @@ def false_positives_at_thresholds(labels, predictions, thresholds, weights=None, return values['fp'], update_ops['fp'] +def true_negatives(labels, predictions, weights=None, + metrics_collections=None, + updates_collections=None, + name=None): + """Sum the weights of true_negatives. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. + predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will + be cast to `bool`. + weights: Optional `Tensor` whose rank is either 0, or the same rank as + `labels`, and must be broadcastable to `labels` (i.e., all dimensions must + be either `1`, or the same as the corresponding `labels` dimension). + metrics_collections: An optional list of collections that the metric + value variable should be added to. + updates_collections: An optional list of collections that the metric update + ops should be added to. + name: An optional variable_scope name. + + Returns: + value_tensor: A `Tensor` representing the current value of the metric. + update_op: An operation that accumulates the error from a batch of data. + + Raises: + ValueError: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. + RuntimeError: If eager execution is enabled. + """ + if context.in_eager_mode(): + raise RuntimeError('tf.metrics.true_negatives is not ' + 'supported when eager execution is enabled.') + + with variable_scope.variable_scope( + name, 'true_negatives', (predictions, labels, weights)): + + predictions, labels, weights = _remove_squeezable_dimensions( + predictions=math_ops.cast(predictions, dtype=dtypes.bool), + labels=math_ops.cast(labels, dtype=dtypes.bool), + weights=weights) + is_true_negative = math_ops.logical_and(math_ops.equal(labels, False), + math_ops.equal(predictions, False)) + return _count_condition(is_true_negative, weights, metrics_collections, + updates_collections) + + def true_negatives_at_thresholds(labels, predictions, thresholds, weights=None, metrics_collections=None, updates_collections=None, diff --git a/tensorflow/tools/api/golden/tensorflow.metrics.pbtxt b/tensorflow/tools/api/golden/tensorflow.metrics.pbtxt index 85088834b7..e9b996c9f5 100644 --- a/tensorflow/tools/api/golden/tensorflow.metrics.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.metrics.pbtxt @@ -116,6 +116,10 @@ tf_module { name: "specificity_at_sensitivity" argspec: "args=[\'labels\', \'predictions\', \'sensitivity\', \'weights\', \'num_thresholds\', \'metrics_collections\', \'updates_collections\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'200\', \'None\', \'None\', \'None\'], " } + member_method { + name: "true_negatives" + argspec: "args=[\'labels\', \'predictions\', \'weights\', \'metrics_collections\', \'updates_collections\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " + } member_method { name: "true_negatives_at_thresholds" argspec: "args=[\'labels\', \'predictions\', \'thresholds\', \'weights\', \'metrics_collections\', \'updates_collections\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " -- GitLab From 7be5ab5ddbfd7d81ffd7e2022633908a14a52ff1 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Tue, 14 Nov 2017 11:39:36 -0800 Subject: [PATCH 0402/1801] Add tflite documentation Merged commit includes the following changes: 175703479 by yifeif: Internal change. PiperOrigin-RevId: 175703479 (This is 1 of the 3 commits in from staging c674e27bfd68a6c990e694b6afd901bfeeaa006d) --- tensorflow/contrib/lite/README.md | 200 +++++++ .../lite/g3doc/TFLite-Architecture.jpg | Bin 0 -> 48710 bytes tensorflow/contrib/lite/g3doc/apis.md | 359 ++++++++++++ .../contrib/lite/g3doc/custom_operators.md | 91 ++++ tensorflow/contrib/lite/g3doc/ios.md | 67 +++ tensorflow/contrib/lite/g3doc/models.md | 22 + .../lite/g3doc/tf_ops_compatibility.md | 417 ++++++++++++++ tensorflow/contrib/lite/java/demo/README.md | 36 ++ .../lite/models/smartreply/g3doc/README.md | 146 +++++ .../lite/models/testdata/g3doc/README.md | 102 ++++ .../lite/models/testdata/g3doc/asr_am.svg | 4 + .../lite/models/testdata/g3doc/hotword.svg | 4 + .../lite/models/testdata/g3doc/speakerid.svg | 4 + .../lite/models/testdata/g3doc/tts.svg | 4 + .../models/testdata/smartreply_samples.tsv | 50 ++ tensorflow/contrib/lite/nnapi/README.md | 15 + tensorflow/contrib/lite/toco/README.md | 26 + .../lite/toco/g3doc/cmdline_examples.md | 509 ++++++++++++++++++ .../lite/toco/g3doc/cmdline_reference.md | 238 ++++++++ .../contrib/lite/toco/g3doc/python_api.md | 62 +++ 20 files changed, 2356 insertions(+) create mode 100644 tensorflow/contrib/lite/README.md create mode 100644 tensorflow/contrib/lite/g3doc/TFLite-Architecture.jpg create mode 100644 tensorflow/contrib/lite/g3doc/apis.md create mode 100644 tensorflow/contrib/lite/g3doc/custom_operators.md create mode 100644 tensorflow/contrib/lite/g3doc/ios.md create mode 100644 tensorflow/contrib/lite/g3doc/models.md create mode 100644 tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md create mode 100644 tensorflow/contrib/lite/java/demo/README.md create mode 100644 tensorflow/contrib/lite/models/smartreply/g3doc/README.md create mode 100644 tensorflow/contrib/lite/models/testdata/g3doc/README.md create mode 100644 tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg create mode 100755 tensorflow/contrib/lite/models/testdata/g3doc/hotword.svg create mode 100755 tensorflow/contrib/lite/models/testdata/g3doc/speakerid.svg create mode 100755 tensorflow/contrib/lite/models/testdata/g3doc/tts.svg create mode 100644 tensorflow/contrib/lite/models/testdata/smartreply_samples.tsv create mode 100644 tensorflow/contrib/lite/nnapi/README.md create mode 100644 tensorflow/contrib/lite/toco/README.md create mode 100644 tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md create mode 100644 tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md create mode 100644 tensorflow/contrib/lite/toco/g3doc/python_api.md diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md new file mode 100644 index 0000000000..b173936f5b --- /dev/null +++ b/tensorflow/contrib/lite/README.md @@ -0,0 +1,200 @@ +# TensorFlow Lite +TensorFlow Lite is TensorFlow’s lightweight solution for mobile and embedded devices. It enables low-latency inference of on-device machine learning models with a small binary size and fast performance supporting hardware acceleration. + +TensorFlow Lite uses many techniques for achieving low latency like optimizing the kernels for specific mobile apps, pre-fused activations, quantized kernels that allow smaller and faster (fixed-point math) models, and in the future, leverage specialized machine learning hardware to get the best possible performance for a particular model on a particular device. + +![image](g3doc/TFLite-Architecture.jpg) +# Getting Started with a Demo App + +This section contains an example application using TensorFlow Lite for Android devices. The demo is a sample camera app that classifies images continuously using a quantized Mobilenet model. A device running Android 5.0 ( API 21) or higher is required to run the demo. + +There are 3 ways to get the demo app to your device + - Download the prebuilt binary or + - Use Android Studio to build the application or + - Download the source code for TensorFlow Lite and the demo and build it using bazel + +## Description +In the demo app, inference is done using the TensorFlow Lite Java API. The demo app classifies frames in real-time, displaying the top most probable classifications. It also displays the time taken to detect the object. + +## Downloading the pre-built binary +The fastest path to trying the demo, is to download the pre-built binary +[TfLiteCameraDemo.apk](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk) + +Once the apk is installed, click the app icon to start the app. The first-time the app is opened, the app asks for runtime permissions to access the device camera. The demo app opens the back-camera of the device and recognizes the objects in the camera’s field of view. At the bottom of the image (or at the left of the image if the device is in landscape mode), it shows the latency of classification and the top three objects classified. + +## Building in Android Studio using TensorFlow Lite AAR from JCenter +The simplest way to compile the demo app, and try out changes to the project code is to use AndroidStudio. + + - Install the latest version of Android Studio 3 as specified [here](https://developer.android.com/studio/index.html). + - Make sure the Android SDK version is greater than 26 and NDK version is greater than 14 (in the Android Studio Settings). + - Import the tensorflow/contrib/lite/java/demo directory as a new Android Studio project. + - Click through installing all the Gradle extensions it requests. + - Download the quantized Mobilenet TensorFlow Lite model from [here](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip) + - unzip and copy mobilenet_quant_v1_224.tflite to the assets directory: + tensorflow/contrib/lite/java/demo/app/src/main/assets/ + - Build and run the demo app + +## Building TensorFlow Lite and the demo app from source + +### Clone the TensorFlow repo +- git clone + [https://github.com/tensorflow/tensorflow](https://github.com/tensorflow/tensorflow) + +### Install Bazel +If bazel is not installed on your system, install it now by following [these directions](https://bazel.build/versions/master/docs/install.html) + +NOTE: Bazel does not currently support building for Android on Windows. Full support for gradle/cmake builds is coming soon, but in the meantime Windows users should download the [prebuilt binary](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/demo/TfLiteCameraDemo.apk) instead. + +### Install Android NDK and SDK +Bazel is the primary build system for TensorFlow. Bazel and the Android NDK and SDK must be installed on your system. + - Install the latest version of Bazel as per the instructions on the [Bazel website](https://bazel.build/versions/master/docs/install.html) + - The Android NDK is required to build the native (C/C++) TensorFlow code. The current recommended version is 14b, which may be found [here](https://developer.android.com/tools/revisions/build-tools.html). + - The Android SDK and build tools may be obtained [here](https://developer.android.com/tools/revisions/build-tools.html), or alternatively as part of [Android Studio](https://developer.android.com/studio/index.html). Build tools API >= 23 is required to build the TensorFlow Android demo (though it will run on API >= 21 devices). + + - The Android NDK is required to build the native (C/C++) TensorFlow Lite code. The current recommended version is 14b, which can be found [here](https://developer.android.com/ndk/downloads/older_releases.html#ndk-14b-downloads). + + - The Android SDK and build tools may be obtained [here](https://developer.android.com/tools/revisions/build-tools.html), or alternatively as part of [Android Studio](https://developer.android.com/studio/index.html). Build tools API >= 23 is required to build the TF Android demo (though it will run on API >= 21 devices). + - In the root of the TensorFlow repository update the `WORKSPACE` file with the `api_level` and location of the SDK and NDK. If you installed it with AndroidStudio the SDK path can be found in the SDK manager, and the default NDK path is:`{SDK path}/ndk-bundle.` + +``` + Android_sdk_repository ( + name = "androidsdk", + api_level = 23, + build_tools_version = "23.0.2", + path = "/home/xxxx/android-sdk-linux/", ) + +android_ndk_repository( + name="androidndk", + path="/home/xxxx/android-ndk-r10e/", + api_level=19) + +``` +Additional details on building with Android can be found [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/README.md) + +### Build the source code +Run bazel with the following command to build the demo. + +Build the demo app: +bazel build --cxxopt='--std=c++11' //tensorflow/contrib/lite/java/demo/app/src/main:TfLiteCameraDemo + +### More about the demo +The demo is resizing each camera image frame to (224 width * 224 height) to match the quantized Mobilenet model being used. The resized image is converted into a ByteBuffer row by row of size 1 * 224 * 224 * 3 bytes, where 1 is the number of images in a batch 224 * 224 is the width and height of the image 3 bytes represents three colors of a pixel. This demo uses the TensorFlow Lite Java inference API for models which take a single input and provide a single output. This outputs a two-dimensional array, with the first dimension being the category index and the second dimension being the confidence of classification. The Mobilenet model has 1001 unique categories and the app sorts the probabilities of all the categories and displays the top three. The Mobilenet quantized model is bundled within the assets directory of the app. + +# TensorFlow Lite Quick Start + +## Step 1. Decide which GraphDef to use + Depending on the use case, the developer may choose to use one of the popular + open-sourced models such as InceptionV3 or MobileNets, re-train these models + with their own custom data set or even build their own custom model. + +### Using a pre-trained model + +[MobileNets](https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html) is a family of mobile-first computer vision models for [TensorFlow](https://www.tensorflow.org/) designed to effectively maximize accuracy while being mindful of the restricted resources for an on-device or embedded application. MobileNets are small, low-latency, low-power models parameterized to meet the resource constraints of a variety of use cases. They can be built upon for classification, detection, embeddings and segmentation similar to how other popular large scale models, such as [Inception](https://arxiv.org/pdf/1602.07261.pdf), are used. Google provides 16 pre-trained [ImageNet](http://www.image-net.org/challenges/LSVRC/) classification checkpoints for MobileNets for use in mobile projects of all sizes. + +[Inception-v3](https://arxiv.org/abs/1512.00567) is an image recognition model which achieves fairly high accuracy in recognizing general objects with 1000 classes, like "Zebra", "Dalmatian", and "Dishwasher". The model extracts general features from input images using a convolutional neural network and classifies them based on those features with fully-connected and softmax layers. + +[On Device Smart Reply](https://research.googleblog.com/2017/02/on-device-machine-intelligence.html) is an on-device model which provides one-touch replies for an incoming text message by suggesting contextually relevant messages. The model is built specifically for memory constrained devices such as watches & phones and it has been successfully used to surface [Smart Replies on Android Wear](https://research.googleblog.com/2017/02/on-device-machine-intelligence.html). Note that this model only works on Android as of now. + +These pre-trained models can be downloaded from [here](models.md). + +### Retrain Inception-V3 or MobileNet for a custom data set +The above pre-trained models have been trained on the ImageNet data set, which consists of 1000 predefined classes. A model will need to be re-trained if these classes are not relevant or useful for a given use case. This technique is called transfer learning, which starts with a model that has been already trained on a problem and will then be retrained on a similar problem. Deep learning from scratch can take days, but transfer learning can be done fairly quickly. In order to do this, a developer will need to generate their custom data set labeled with the relevant classes. + +The [TensorFlow for Poets](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/) codelab walks through this process step-by-step. The retraining code supports retraining for both floating point and quantized inference. + + +### Train a custom model +A developer may choose to train a custom model using Tensorflow. TensorFlow documentation has [several tutorials](https://www.tensorflow.org/tutorials/) for building and training models. If the user has written a model using TensorFlow’s Slim Framework the first step is to export this to a GraphDef file. This is necessary because Slim does not store the model structure outside the code, so to communicate with other parts of the framework it needs to be exported. Documentation for the export can be found [here](https://github.com/tensorflow/models/tree/master/research/slim#Export). The output of this step will be a .pb file for the custom model. + +TensorFlow Lite currently supports a subset of TensorFlow operators. Please refer to [this document](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md) for details of supported operators and their usage. This +set will continue to expand in future releases of Tensorflow Lite. + + +## Step 2. Model format conversion + +The model generated in Step 1 is a standard Tensorflow model. After the completion of Step 1 a user should have a standard .pb or .pbtxt GraphDef file. If the application developer is using a pre-trained model (as defined in Step 1 above), they can download a ready to use, already converted model for use from [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/models.md). Models generated using retraining (aka transfer learning) or custom models will need to be converted using the steps mentioned below. + +A prerequisite to converting the model to the Tensorflow Lite format is to freeze the graph. + +Since we employ several formats, the following definitions may be useful: + - GraphDef (.pb) - a protobuf that represents the TensorFlow training and or computation graph. This contains operators, tensors, and variables definitions. + + - CheckPoint (.ckpt) - Serialized variables from a TensorFlow graph. Note, this does not contain the graph structure, so alone it cannot typically be interpreted. + + - FrozenGraphDef - a subclass of GraphDef that contains no variables. A GraphDef can be converted to a frozen graphdef by taking a checkpoint and a graphdef and converting every variable into a constant with the value looked up in the checkpoint. + + - SavedModel - A collection of GraphDef and CheckPoint together with a signature that labels input and output arguments to a model. A GraphDef and Checkpoint can be extracted from a saved model. + + - TensorFlow lite model (.lite) - a serialized flatbuffer, containing TensorFlow lite operators and Tensors for the TensorFlow lite interpreter. This is most analogous to TensorFlow frozen GraphDefs. + +### Freeze Graph +To use this .pb GraphDef file within TensorFlow Lite, the application developer will need checkpoints containing trained weight parameters. The .pb contains only the structure of the graph. The process of merging the checkpoint values with the graph structure is known as “freezing” the graph. + +The developer should know where the checkpoints folder is present or checkpoints can also be downloaded for a pre-trained model (Example: Here is a link to the [MobileNets](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md) + +Graph freezing can be done using the command below (and modifying the arguments appropriately) + +``` +bazel build tensorflow/python/tools:freeze_graph + +bazel-bin/tensorflow/python/tools/freeze_graph\ + --input_graph=/tmp/mobilenet_v1_224.pb \ + --input_checkpoint=/tmp/checkpoints/mobilenet-10202.ckpt \ + --input_binary=true --output_graph=/tmp/frozen_mobilenet_v1_224.pb \ + --output_node_names=MobileNet/Predictions/Reshape_1 +``` + +The user has to first build the freeze_graph script using bazel and then run the script. The input_binary flag has to be enabled to ensure that the protobuf is read and written in binary format. The user has to input the .pb and the .ckpt files to freeze the graph The output_node_names may not be obvious outside of the code that built the model. The easiest way to find them is to visualize the graph, either with +graphviz, or [in tensorboard](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets-2/#3). + +This frozen Graphdef is now ready to be converted to flatbuffer format (.lite) for use on Android or iOS. On Android users have the flexibility to use either the float or quantized versions of the frozen graphdef, if available, using the Tensorflow Optimizing Converter tool. + +Here is a sample command line to convert the frozen Graphdef to '.lite' format for The Tensorflow Optimizing Converter supports both float and quantized models, however, different configuration parameters are needed depending on whether a FLOAT or QUANTIZED mode is being used. + +``` +bazel build tensorflow/contrib/lite/toco:toco + +bazel run --config=opt tensorflow/contrib/lite/toco:toco -- \ + --input_file=(pwd)/mobilenet_v1_1.0_224/frozen_graph.pb \ + --input_format=TENSORFLOW_GRAPHDEF --output_format=TFLITE \ + --output_file=/tmp/mobilenet_v1_1.0_224.lite --inference_type=FLOAT \ + --input_type=FLOAT --input_arrays=input \ + --output_arrays=MobilenetV1/Predictions/Reshape_1 --input_shapes=1,224,224,3 +``` + +- The input_file argument should point to the frozen GraphDef file that holds the model architecture. +- The output_file argument should point to where the TensorFlow Lite model file should be generated. +- The input_type and inference_type arguments should be set to FLOAT, unless converted a [quantized](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/) model. +- Setting the input_array, output_array and input_shape arguments are a bit trickier. The easiest way to find these values is to explore the graph in tensorboard . The user should reuse the arguments that were used for specifying the output nodes for inference in the `freeze_graph`step. + +Note, it is also possible to use the Tensorflow Optimizing Converter through protos either from Python or from the command line see the +documentation [here](https://github.com/tensorflow/tensorflow/tree/mastertensorflow/contrib/lite/python:toco_from_protos target) A developer can then integrate the conversion step into their model design workflow to ensure that a model will be easily convertible to a mobile inference graph. For example, + +``` +import tensorflow as tf + +img = tf.placeholder(name="img", dtype=tf.float32, shape=(1, 64, 64, 3)) +val = img + tf.constant([1., 2., 3.]) + tf.constant([1., 4., 4.]) +out = tf.identity(val, name="out") +with tf.Session() as sess: + tflite_model = tf.contrib.lite.toco_convert(sess.graph_def, [img], [out]) + open("converteds_model.tflite", "wb").write(tflite_model) + +``` +For detailed instructions on how to use the Tensorflow Optimizing Converter, please see [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md). + +You may refer to the [Ops compatibility guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tf_ops_compatibility.md) for troubleshooting help. If that doesn’t help, please file an [issue](https://github.com/tensorflow/tensorflow/issues). + +## Step 3. Use the TensorFlow Lite model for inference in a mobile app + +After completion of Step 2 the developer should have a .lite model. + +### For Android +Because Android apps need to be written in Java, and core TensorFlow is in C++, a JNI library is provided to interface between the two. Its interface is aimed only at inference, so it provides the ability to load a graph, set up inputs, and run the model to calculate particular outputs. The full documentation for the set of methods can be seen [here](https://github.com/TensorFlow/TensorFlow/blob/master/TensorFlow/contrib/lite/g3doc/). The demo app is also open sourced on [github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app). + +The [demo app] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app) uses this interface, so it’s a good place to look for example usage. You can also download the prebuilt binary [here](http://download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk). + +Note that you’d need to follow instructions for installing TensorFlow on Android, setting up bazel and Android Studio outlined [here](https://www.tensorflow.org/mobile/android_build). + +### For iOS +Follow the documentation [here](https://github.com/TensorFlow/TensorFlow/blob/master/TensorFlow/contrib/lite/g3doc/ios.md) to get integrate a TFLite model into your app. diff --git a/tensorflow/contrib/lite/g3doc/TFLite-Architecture.jpg b/tensorflow/contrib/lite/g3doc/TFLite-Architecture.jpg new file mode 100644 index 0000000000000000000000000000000000000000..bc83946647c6a923a8a0bd3a041b42e4febe6a31 GIT binary patch literal 48710 zcmex=oIr{vTivovIz$!vMUve7&T5@$f4}C@t|nX z#SbdRNkvVZTw>x9l2WQ_>Kd9_CZ=ZQ7M51dF0O9w9-dyoA)#U65s^{JDXD4c8JStd zC8cHM6_r)ZEv;?s9i3g1CQq3GGAU*RJ2VdF$b$$4{OPfBE|D`;VW$K>lK6U=!0ld(M2vX z6_bamA3_l z2KW9s^}peD{?{gb|3uhd5yr+*!$*TZdX|KG2@sy;|tI(UwiTr}yrfW3cYnj`{8NpZM~>&;QThBzil5(NzQ$BbS-) zR=0j!|EDKP{r7){#|~0c0$mwIsD?vIyT4vGS>3((s`yj6wYM7d1LMr)wyro{u(@{E zgQ>YUjJDZpeLc;+W&ehzSN6M=*#1;^F>D08R+4?;q6RJ}rzTGZz9knpZ27XZ*Z=r3 z|KHB}PcoP+zG_P@5j41ILa^I?UG0IHJwGMgy^8$M**i8xwUzDabaA_=Ik!uNHy69T)>wZuEm%B-i zE@><3njF=TY z^yB(JH(lYs{~126d$l88>OaGYql@bI|1)7+3HCBt!XZ~IM!x^6?j^_NZg=$Gs~lgS z_~-S`Ss_jAvz(ZerX^J~#+j~&+xl<0>64ju$Ltrmwzz zm-*A`#N(^x@iJ8Tcgs#bT<+5o+91>1d?kGD%k5Ho4|`@Ns=WKmV|LAwq59i;$B)VE zANF3>JT*0lf5nM)$q&E(Y1ONL-TzgDarML1()amq$KQF?5oFNVeoorrT$R+br&mhr zMPAKMZ*vPx@;b66G_=-#ZF!tFPmO8ycFS9w$CtSl*)eFD&TKsPnDh3}E{2sWUi>=W zE~>ZhLfN!qKVm-AzFz;~-?m@pnhP#Jek-ujUHQk32iNXCzU7|h`?AG(;(TlA;vZKd zo?L#iao4}|*-1>Y|J-c(jQfu@*sS;b9sg|&)A4_H-GcHPmDlEE{gb)2CUcgB;( zIfC<#ZQPStXj7*=H7HZGOrPrr-&FO6`X{#Pe+%-TY3=X*&+sI5`ro7fLKrW_XPVDC z-d=Jzw@As*dFN@vpPSg17R;($vVGPRvyC1?k@8vF{Mq(Kd;MpaSakj0-u6%HO+P%h zQ{VKTVcq@54zk~7^X!xQ@a1j2`JCjm$qJJvI2?F>Z^!;^*>>7j&KPKnS{|$A zFuh$-94obL!gb-dk%>Q3kNeF$TWlUD|1tKBn)K?a({!F^pFLKuwR=fMRNLf5uU;1j z8y%j)aq|WLj|*&Si`QkvDP3h|~ zR;eDd=LulVFx8DdbZh<1cPizs$~8H+r?#(H@7t8Jqsel@Q(0%`H`(Xj{ZW6U{nBN7 zkh{)|1L@P}gqJUg_D)}NJNkUTk;S2Hd{qIgw?I7vN-?Zf^$a}!Z;}6Z|4$3J>iPaN zJdwHo-u_>U_O^BE5AXkJp8fave})UKWmgyiT}4oEIQQfDpAuXAzyD`AwtLmXn(Y4! z%3_`W{xkfzz_wH*(1DdoSi(;Ccg`-ytsA#b3ZJ=c-@!ThRYyLQx2HF~s-Ej~W!Y7( zZmqBCE7v{iVrYI-w*T{6{=dIP5C7PGc)C$mcJcCCCY?$(k4%3rssFk$_OA%ziAt-f z*Bh5?o^>g)T!KSjy$+*<(&`Frul*aYq*t$wObqL1*m2A+d&i;%e*F8QW;9b1no->#6 ziH{FMt0o`K40MnYZ#J3hbUboW^lHz)494V3Cg<=VAkZ%J3isYO=d z3okl|Y*`U!D|JmWWc3=YHOp5#+VkrB;akThuaaDw`*dR9cUK+f@;Zz4L5ms;OhWcu zHSz8!-EqHU$vVlhxjX(dNIiM?M}N(W>-RG*?>QOj7Zl-thJB&@lklY%9pnnW%Nbl* z{M)eaVQ3pChw|e)A7iIACYC2IS-SVjqfGX^-QmA?zK_}xz&rEqC9(%Dg9ZajUd$rP|KT zvox9v?wmeaB`U&rpyIg475%N}t*)JmO#XBwFZ9HD)9Wt^PUbI)JgOx!QHgQNSGFx0 z3wYuMUvRFA(g@=$l=I1}s^u*#`?gQxg6!h$Z3Ro7v;@4U{e69Tpo3g>e(IyWao;4< zCuNuRZfCDih+ovV^#zAya=`Io#*@dRN>MWmdIlnv4{!6V@`B}AhG4YjPN6+=*2Qde zA+>i*?;_{i22+hiT?|yk>&lNswO*UM#8r36F-^II*7lf$`7dLBc|Ouq4wpK}*j9gD zv^U39OVg_F*p?L`t3PPZYjv&sR^$4g!Ey4xdajrs-T#`^|NIj@|95HYzvBEAkL;&> z;nF_;`H}uFp821D1kM+%zmW2uVg8~1w>tWt+piSLpQ>UNeg5;K_+K{vpMNyYckF** zRDb@V`?qQ0KM%jM=znU<)b;s4!;cmJ86XmWLh8Rr+CTrW__sUvpNC&%+&|50bgldc zcY-m@s=td{{}ue|Q~v4iuxQVJ20Nwy3=2;FXRvRTgjx3Q;-UWx7Panzf0i!@jH`#c ztq1B0k^0QU`Z;!sJN^Y<&{%H|bDJC7F8eJ7`_IV-9;w%SAre0S`H^^t%Mez{-?E5* z&QGOK_yc#=QFmx~qdGwNkpll8KmMP8w9^k(O?|g8w@hE%*rbSINncgimqL@(@i%wh zIJTB+%0AQN6^pvGixn1iX)sU&uXz5SAyEDI@}D77W&Sg~Qu)tN@}EJAVflym{~XW$ z`~08bh12e=M|Gg;*>eBA`oA3ExtLWSAMKt=l{Jm z*YRjb%&8aWwxm4x8Qc4NK3`OnfZCO-+&m2@d;V?h%E>Di*!F4DSN6mwRoC8k|7W-! zUB4si-sTnOm%qH0oflHs*>>uA57*c1#1%WwMo)FSon1XIR>w6}!uqpi`6o+{w|Q#5 zA03rfyj$h5dRJ`g-q&Zg3wcU9P7tl1TN*esci&Nw&7wMmPY;Tps_lHy|taSn_PPj?sbNs=X8UN+j*- z$V=P&p{Cg3b_$P!#W}m=aFvMneiKa(wWa^;tDd{_U+MI3DXL3S*Y&?Q{dZwb>{D6O z_`>7T-c$R}WLL7(-_t7e?c!X=DRAhM;PtG@UGG(Pdc0rKLdOtvqWc~F;q;5?kV1KT{|YS*CKfBY1Y(~vV?TaOM7;2Sk$H6@&-H>tFfp- zaNo;c!FBTg8CLB7&#b=u#xKh>c5$NE3RgyQ-={~3M>th+U6#z0&dissq> zXPBTO|Mi3Y7qL~>{xf`hEB?FrXXt$S{|qOD_<#M7|HU1eUH{`-_uqqmRv$QTCn0w( za8IYkYB8rPcN#0sDIT}By6#}oEAsN$?=w3;u?Nqq*9q@;X16-IW9#y=s?v>ti{7f7 zi|e{L*+2U9q+KCJr#a#xj~Uc$W!aVcM4Xdlg3*cRT(LDU4N!)%Yvy2eT&w_ zM8rm&+UU!?dOMfloWEM-7fmG|evZ2Ot#O`#|5K;MVZU5o@NL!BKAGiMUSC{u>bg+A zmVpIR_gbfCUzPhR%hx1x8L(-7`xW~3j#7j|x>MoAu4|hvtaY1jdTc>z@1rCcMH$9e zlNCwX@ujt;maoDN2YILG9p(7rxYCuQH_t+rGr-p%dsf5DrNV#SER1*)=l(Q2dHY4x zT}{@R>SdK8qJ@n;p98XX<=stvo3l{;DT~(UsB;o44$t)ab=$pI!=LBrsfT&H7EQdO z*W0n#=-&gGx3_GIuTAsydKZz{xy)Me`1A(GnCi>#N@l-y;Wyo_HP84;klplZnTswL zWIz2qIPq+;X=jx6x7VMn!mrL-b-yv;M|-egW!mFy=DCt9a=upI7C0o?C-J0BQu6CE zz3Ek-y@d|ke)RjXa#wEat~;*;4yrlrPI$aFXrAgh^St*i-ffbuH?{MQ$G)0A{o!u!iam9P~ z)ElouS8mUK<>*=xW!I>d6ORRD2bc+nOu7Y^9FDx+@X~{%EPT&B^@j zzHFP8MTgg-<+3F^=iD#(tGS>5Kf{&y{|wh1|7M>3?tf|ahyM(JUH>!uEs?*rcTV@) zKmEq$x75>JMZz!4Qy{X2LFsK_(=)^xhEHvMfN8O~2S@1L?ixa0As_0qi2H;dJ_huQl0>SrxZ-@NnM z3u$SG(%#4=cWzHJyY>9zuaL{9{J&M%ZaMk;;nt-I{*TXP_cpzXPQTByR=R3+(p=TA zXD;bXiaBv^f5|jUd9N>;&%8_?%Y`pIY{OhCvvW&&jGo$mhL)`hKgS$>`uFUek}r$y zoLX5kKi%j+T3%xPoZ^=;4SRM7SKiN4 zyYKGb6Lk7^l(R5F zvmW^yMb5obqI2`8*R83Kk3O}%vu4II?pf>KYTWy?HoHDPu+}MTyV0%l{}``u_bWT> zn|1Zuqp%z9zpu=SeZ0Kwo!%0rbe<#a|Mcc&PZBp;qb6wF(I0a6;2+JPXDJ;=HhU=B z23Ruu6P^F9JIL>>cJ#B%udJsYmwerS;#PCm@4BU_@;4LPa}I85o*aE)%D=n&0~}@? zn_c|H^x%>k}LgimKmwX3W|1Nz(n}bjv@lG}44xa!t#NRwpEzzq8hsP3?+x zUJ@@GIy;k{$?fFJu$(-_>7LuKs6KtWIPu40_MhR>wtvrFYMuA+3G-%iQK`E=8zsWh z1?5&qytQx5mN`B3rP$S7?@~T}S6wsp{pF3fIbOUrQ4HJ;J#qEps?XvQRyJV}|=d_8;|>#Ba7&yEtcEQ#golJHOW3)r{jy}Dui zz1WwVOAhV&&+v8Shr2qL?mM-AFMA=t__}%i=eIHHxAkn^Tfa5>Ipf&h=k{}zV$Vw^ zO&8Mp7tdFhzqa&0L)PoM@az8=Pye0uHZTA6%BzBf`fa+?eRWh=@&)I}*I!#dy;uEm z(Az0@a}_4P-ah}@jt>2Gy2VfYJ~m&^4p!MQDYNFLYFhn!v1bd@0;5*!e!1M&TI2)o z?qcn%KVPl0HNLW5*SlBrpCL{8$;qF!8#HsyRG~MQxudWKN2_>#Le@w{C0UnMbp{bk~;O5472pn|0m)z?-GBOOMU!&(}%u{IsP- z_`=t9-i_z>laP-d}ng& z#uNL8jNAS*ENDIdMB_h03)Ag?P2%t=)74bgq<(~9$^U@-uQsegsd}3d#X|26!vAFuF zu6>IC878P6zff-&BU(~5z2o7&4b$!_O!R&{$zA5UWt^DR5-nc~>-phR16I0C?sd&| z+TG7ne?rv$e*W_q+Y%}N4#mrB#O^+r*pXDkaol7>sIuSECvVzk1d&nxkFvc zm)|%2V^@Cjo{D|R&foXFcE-I)S+cHw@*Ek})4GScG*+ol+V{eze`xi~L_(`?|Bkid zw|bY_W%HJG_k6E&{~21=yZmRE|K&f!KK~y9Y-dG#|1)sLb^d3tfBB!GX8wo8|Ah7m z|Es_J@2CAk2WgWq^FOLp>VN(-EdTqd{!sr9uP^>T|1kox5sz3j${=WJjP~k_s#olhLw)uw-evi4S zCcXNI_3!=<>q46NI1b9L-iGRdFtM$b3+}P)*tV}X{goRB|F6>Z=HkuIzQ24|y|XU! zbXMuSxsu(ndYum+tKFV`^-!rte|>S}FVl1Fd9Fv_zDW!Go_tWT_H0Q!vM=VSalgIt zyZel>_r1B|pUoH;7}yw=uE|I7x;(OmuO)UhHy3T(kK}t~pEIX*9Yb|>AZm~=;GLx{ zjTG$e$U)vy8I)zOpz|=XqqPpR{Z*LliYe>0o9Iz0tZFhyM%{WFP|* zYgU-77HcvyJK-7NoAm?xFkYNoX;49rWo&=6R!iE`GwZ_to!s!BVSdQs-)e{d)h4f= z65jZA*8Ow;zE#H^n*M41j-`JmC;n%!Tj~DIxBbs=_t>!R>YPi_H%~5iR7^WKaf7i=Eg7q@8uXEi2Q%%4X)(*qiRTz^LUZaz->IuG!S?y!7d_<=^{nC0gz* z`Q4Wf2^C1-1nv6I@Wbuj@j~IV#lroj?9;cuXF3ozZ9!*tfzW;Dwi4}02ifCK?MiGt zTC_cAaoE@Ti7g*3>QWZ6ynnIZ`22r{O*$UuZwmA;TmlL5h}XLsr{(eo7w1fn+5Yap z@BJEUPUrKU*4As}q}3*itsS0(O`Dy~GEZ{%%!j>8f^H-&nE2zOpT$hSkUO_BI^=%6=f8D8oCXL!Bf-`00ewyN9J zrPuZ=n=Y4F@oBZ}T#>-IkB_M}ugeM>&Rha7OafT#kymqt3!@DFW6V-%BQ58Oy^1{c zpTQhNt))j$bNGUf|7ZBb()@29`@g2EQTPAZF8a?Q3dL zB~vZeNNKN|rxmdL#PsMnR}D^;THbz{yl2we)R}p??{bbQcdei1w&~MxS48oHvllpI2RlG}(liucBwBohaqWY?Wn4<=m>ytwT0 z!yOTKtaCnF_dE%HvTN(cZI|EJU6^F$v2=!<2=lBb87m@Zdq2GXMf~ekG{nCpg9xRws zx%Fw7m9O0=E!dSC3y^D<15v@KHLwx7*Ck$p1gXx*b_s zk7tMYzTCb450hM^?~?VGf2D2jcz5k}XufWmP++&&@ZIAM*w-PVP&cX$2R=E-w%<0CE! zbIC_LrL;YguUcyrY}$Kb9#`wK*Sb*t2`@C&vgNE@bm*oI zkBjE#HDZJeyS}eF>3R2TwRx`ny5*gIdiJTmU2EO{GaRx_NHjJ}n^-@ij z4}bLJy&UUzQ9@>=LgDeM{omFqWTl6Sw``368u8rXXt;0Mt!Q2s>y}C7s=_i88`wXs znyRA`dn-0~rPealwbCzK?OM~eeTY3+svPa{FtXzbQ{k-OhfDumkB&F|(=7M*?8VagS?;Ur ze}tyU$!#y4RhcK7JF#me_p}_D^v{dp`IPQOttmE+_MafpBX}&URz7mORK(_K#kpF~ zb7q`)?Oz*qQ~%B3EM6mJv58#_>@0lE_tpj-+*O#Vo8!E)yXRK!M7!Rlk#~>FY(JBF z&(4SW3ro_HYhQL(_qyHVP7*4Q3(ynKw>M463NJXl-t>y^KHXP0zaG<0`y8?%*6DUo zSH0oXnr%INh38ETrb!jfk@AeKJ?drp=%MwtH~&7VeK@;1zfF53?uvISwSkjGXF!VlsZH-ntt;?7 z334YMd6CxjQskx43|AIWX&xVH4)c_cKh&LD8V~G{{Lip2YUhhR?r9U1B%YsI!OmrW zY_Cp(8wX2JW#-?be^$>HZdu;x8SWSV zuKxFv`ra!0Y4h*IpX%4O-?4vc;eUqYhr8|fy!>~0{a@vC_77Dz{b!h0{GZ{z@}G?V z3@y`6|EoOzpP{Dz<9hc`mw$Kv41Bu$yZEQYzvX#v{(bm&dHgT;IrWFUQ~xvAJpa$I z&;7?mWNYL4A*xUNztex}{>@H$^Y6pI{q}$9w?Hv^@jszx{h!b6|9-MRwpSja7~;k^ zHRkEx+rQ1Nen0 zMb#J9YrXt;{BT))s}0j4Gh1+!=%~0dF#`!JYV0nGVab_)~Yo5W@5Kv6RR(}ol6`S~a2vN_{ zJL;cs7JiXG6V4j@zQZ2Ij#Eqi<5KJ9y;S^_=$(q>e-11E{jRrq_O2|#^6#yfu}5d+zKmD>D^GvsxE3?%QvA%hVQ+HRtxpq(e)s+0f%sy-%Oxkj zYJPJuXnw!*#iHG_r8!J~7m8^A`z*Jn_Eo`-$dH=zE@6rNtsI|pQ;hvCFTVWXhQvw5 z-{u{E>(FFgFu@KE>Ke})%)^JgxGd*H>Q&9#;DJ~{Hv-IJWDW$}1c z&ObSgYwvkaon<*E;<0zL^RY*_p0x-&Y}k?VGhJ=w`Tq=XpSZ((XL_ankiPh>DE{s1 z=WZ`rcCYaL*|4o2exJ~jdz9Ly5wY6Lcux7^*A-VTJ&CW-vEyC+c{4NI{=R(StnUI_ zf?iDAWpI$)^upZ?BX=F9TPbtZEy84y?_8byaK>p9!~1U6y>=Qtntt7I)2?qB_2Gw( zo%NkFcYDXzYzr|76OXIx3vI64|1j-!{TFusGuIuHA3oc)by8-!z>GVq*<=l`&$@A0 z=h3#y?_4-VZ!ezPvch_Y=vp2(tuQ|=t^3zn!k4y(XZiU&Jl<`3sK6ve{Z(f4(W+N- z%z765lIeOKSADrSYr0g5*{jP7?kz}PYaV*($yL+ueMW*o29f<)sy*vZS7`6Poz~eW zH{EpEo3;0ztN&-<%-jB-!J*{OS$VjlobB7Ezq#a_UN)Uc=RHG_p~pSBe5;Gevx{PQ z-W|XD*mUx-zM+^~d?Q+VnC<^&Zn1 zi_>e>OhYAACLb-iCAaL>zNw;*k140Ah_Wj;cRelD|H*czen)ox!TL|Sj{iQ_TYdfX zCuL(m(WI^Y=F)9_a-ObtUT7@p`ogC`t!iSZRSArTe_a3PTor%)SJ%eSKiU5o6!+@C z{_7gCe%5*WPI>3Mr|dXcMFQgl->`}V`sj&XxPABb`j)5LGbi{Ix2VtCdcGws=+u(N zD~*>XZC@A_l6UED#G$Pk*EZ#+7cc+tZ8=L-&9q5A8%|FOoKsL7eCL&u+P1FfA1Z%e zP3Ks^z<93s)=nwo*n%_9rLx3sJewEGd8K&Y^MLEu_J5eBdtjcu`srhTKF$f*c71~M z>dkD6HlDa7v|5eJeE(0=6OX&%wmhEc$Mmj=Rf>Cp#H0nE0y}Yca~ib*fBj|n|Zk2#U-nbSAVd49m{b|XL8gW$;qc?nKC&PF-oqp zUS3t%7TV*LoY1~Fx&PdzgP9&;##^|dAjyp>GY~ALO&|IPR-An%4F1h ze&IU5`A}9Hn}1%v?8JYD zR{h`uXZLr?J1!iEx zuA8R0@Oh@pBE_a^!Dc z&}G?vE4koy^@p3QtJ-#E>Xf}??(tOGa;Jp(+U}hn-X03d`sn?PLtkPsB*tleEZL{sa6%4r-G)=eP3JlHtAuoAjgW;U$)KuzSHZzQ)72%PzlNq&(H$%;Lat)A+Ymb*mTMK3G_$`=nWtrHUuyyVcz(=GTH; z)n`g6D^IXwcs&2otTri=J0^iOdrOmw*K2h}eb)Y^wD?CFo6zz6invFHlh*7I`F(Nw z!D|nC1P;ibx%zUe2jiratJ6;>PVG%*dA4bN{@Z6C?ig@7&M}KDJRj_}rQpj$`}EY3 zUqAb%tiCd(^6O^j8jXcpEtwhIr}*0kMor%?b;PbR+QO#wj8m|WDb%R|q?QcF>(jT*>J$A>0 zlQU97nH;PHV>gv(=|!IQUVq|A*IM?otZU~#3*2=w|F$VS-8Mhl_Rb*-P&kY=;nFC;&1lSi?jVA*GFAU z4U69w{qbd>Yr%9Wu_YI6)=H@Gn|9i=mO1M#ee+&3_SKh9mZ|-#&u1Jv?fd6kP_%bP zlIN1vYv#Y#xJUIgEoVvieLY^qVCk|+*M+=Z#1`y4Z}DjM+MCh$Uw!*_a;JLgX)_DX z^RFXAPD}MZ_?`JGljrcgE!S&n`P|m{CKl%BiyoOcUn_8G;)!*0eiR*YR^6p~$YZfm z*zYA*W2MY?Y`S7{@|;Vh_M7^@NlQ~Nbw8aS@;2}Ff_rvGF&AF$Uq10{LZIhc@3`bJ zTh(<7A^pxE&Duz=GC=#EqBv;dG?T(_d?^QIWh6U)j`{~Z@<3h_13g4 zGv4!TIjbME>GIdiyDm*PuV+m@DIaxm(zD7$kGQam%Tt3YL`6&Yr0e=sWv~5u|DVK@ z2=B{JR{VCt^r&#*TfJ6~K*3!$XJ6HX@Bm3sb-rSWW8LG|md-KbWY|8ec(iPx%6O6<8c-{qvHIpdwW zl7|l*?!PXLZu`amZ?8=Sv!DHIy?3`?oU&h$bLHR4{m<}x zPyP2Bt5-kJm95%)xi&@qLdg6N%^CYYpUeOKBkP*0{PRB?|E|}cO3E@9*fzI-<>I7( z2W+DI5v@^^3T0RMPsf7)GvxZ_+_-W3<+je;oYth62g0+yY=|&z?d~eyd9drKjmwl- zTf0pHUCZV`^Caxh5%TGwl$93sJ>y(b@px+F<@Xx4T2Y?MH#`*=Pb<2oeNq3b_G<6h zqV5@tr#2_gX^wcaPo?>9-Xxi-!G2K9QK__C|}-!IV%^EqGSR9?RPp-leTj2gv-@;?iiUn+HZuzIhQ)`R;zhObhm9Q4<1Lh&Y zv5t8sHl0`z^U%@OSM5@Ga$r%J%a%Y_L+t8FJaS>sFTY`!z~=Aens7?eL!qb1`bv2J z)0;m38SXmOdp+4wFJRh|%X=VG=~%FA$ggnQ7?(_6w^+YZ5|-DOJ>_dpR$0lDtI{Vb zKIzB@?MYYvxP5-tbY@ET!n(I#zN~rc9lqnrlAq^pwBPM1k$)Wd_1UAsb#-sdY}fFG zN|nvcjn6ysZTjTMJ8M2{laZ_a+gm;>Z>iYQ9F_id8LM)(J)gFgXyniOnX*}etFA16 zcfWJwgO5TjKg)e?8y&p7+JEkD(<|>@Eb98+KN()3Axh#Yl*VFYITr0np+o{yf zGTHxDz&`5X>Z51k-wWBzR}EWqe@38d_K*5EYJYTZH68yO_kQ>Ms)x(@DmCsMUm3N0 z)3WE;H>P`j__Dn?YVa5YEJ*XN$_v|+OpL5-fRRpSBw9=`YJkWv2X2^oR6mS zZ`D`E=Hv>$i#o91`VVM9O%?we#NEz^gqKJexd&i7pmg_a7^k)VmH_= zShO!5Sv@j)RPV3|E5H4-<1gsvZhvjA_oiIa^L~$f)vsm8+-Cc4n|LnvQCRXT4-v*B z=xSf$4=!A}%HzXJ^G(N&#wwHrSqm)N7k*jg`q!J!KYg7&CB-u7JO8=5DU4s|ihlaf z@QuH-{)MgnKNe5-Z|pFpgM2_>y#9ZNgo^3^nlJrlc;GUn9*NEPl@+WNu6|VOPz|Q3 z%Rk%>y83pX^6kveufl~DbtY+homt!XB7^w__&hgytnQIFt=Rp~TlV_j^7y|j;jS_7 zyZ1lYc(?v~>3@a`t|cLO1}N<`Qs#yBN!;?3ZdhgH|ImSF(UMu5YnK}O3m;W@wQh<0 zi7O=xfv&a>rztG+xXaA5?y0f~!zr!oYVYptnz<~S4J4IJbR7O@sRgYGZDf%VsFq^D zw>{BfzI6Qwx8r{=|2bM~zwAH53BAkzcBubr(tgH|T|$HDDv7O6k*)K8GT#5saDB`C zU!3b_+Hn78kT>7&`fux(dz=3=C|Lhz`1Nh_e+Ea>&+3i!kDq;ivHx0wbrjCl`|^`K z8&n(e?U@~ro&vynmYg^353UBEb^7QKBJ^Piz{ z|C0@l{~2QCe}+4va0A#cl?1vn&}rwWUDn6)Khvz&|9ZFo7stA;o%hA-PbTmB&#>I3P$>fXC{UNMzk@pz(ej>gTC>MLJ5 z81J~gWw&nd;qO(p3*xs#x-E5(X=0Z@wZoqGhtu`Qi|f|ETYv9rLXz|o76x{e>yAtR zxNn*rpZ`pb!v3_e_ML_zb!^{1n7>-{3!=EcQ zm2L6j`&*v`omklxVB~qs{c-dz`{pfYxuY+i`E{&R=e}Pmd%aBZc0Pi@&~6wF8QrqlvSU*y?0;1DI;5t zwflu%Ouu#J7H|KpiGNO<7W>rnhJj}uziCvhzsrZ~t?8F?OhvD#?@01q?s|@`){1Ly z<)go9_d_l`?|PZmr!mX2i^->wA#H|ujsAknD7B5Hvh!Bt~tWtO6Fr+@T&R~ zQJ??qw*S|XeMM*8ALFI3ukYIQdZWkn?js2)`hJgJzKzRNXnG%C`&ldcbV0G^3z6+n zR{aZK++*3i^7E8U5` z{~5TxyJz1ldGYnQRYUA+LB|8%WnV3u{Xjdjd?t$6P*~adQ99#{C=c=v;mYL{k zAN?+M_1?P9f{Aa&9>{tb}7o%;tWi#9y0kId*t(% zy%fy4mHKh_rX)rk24$P{&+FXwG9Q_>ZsPJERLc{XWB3Sn*VbwoB#Eo{TIP_g9@zT4q_`7k#@-5(l!4XPVoI_ z__ue-e})UbH76g$|Mct^!n{B zAM;<1!5g9(&av;X%|4zl#lCmu+Qk_U(p3d_t@yG&@KXL3fBw_9mwqg`6S?8<-Ff9X zzj&U15q&Ohbo*7*@6$*3Zu$IV@jV!q3cybd=GE5ke{!% z^KJg27yGRH{dtnM?Wuk7%8JRT@W-`pn;-Y8`*vMA?!S54m9>u@Pk&h9?vqlwI&*EN zo${`WdXp+1T~V5@c7INT+HRC6YsdoUCwd0wd#-4vNd~kJ@&)4tu~*fv!lD) z&$Of`oH%GQ?Lgr_g%71$+q&h}FO8pl+DySP!N~Z2*AwjvrGLXcF55`TH7>ue`=-b3 zrSJmDllFhJKEHAh33N5U7+-q+hxb2&Y7_sj7ylVf?cze=HrP!Db;{}0xn}$vj?%UW zJ@*5t<{1kRiSSRO~OHYTuk@>`RDncp@z%;A=~!<4D(Ogzi)K^c1-x^ ze#Y9M^~s%!cIW?aT>GEl`6vJ14?u@%AT(?VMm#|GAF&*V4G- z!mefak-UVYJSVY!_6Kor5Nut3|NPSXM=Nted<|2_Lf;haTDQD# zP5j=sH=T_iTZc3EXKl@WWb1U@WwuA};@>Ja?{L-J`6cxNIbIN6(5%mX2sH;!MEVmO z`c#Za>_)jJRy4czyG;}EjPf@S^5#hvUCVQvKdNRz)h}TiTMxZVTu)eg-a?b&u)PV%1*pA$RH4JJXvU-P3exy}g$?dHveb`8R&$ z|7Ct#|Nhc{hSo}pHMb^lAxr*}Rr313y8 z{Br*n{;mHR_Iv*mLFMEJ{|nP6PMJM%%H#v57oU1{w)M-c+%M+Sp4QyxdwR#nIBxpi z{$>9eF4$fF_gDW%AS!3C|Idh8x7hO*SGPvbP5b*^@0;WdhB7^N(X{JF%D>H0`Q_tg zzv+x;L9p)6>$Rv_Q4HPd|5E~Fj`pIx!N28w|1&J8UH|u2{D;M;oT~YsHVlb)`_F(J zar1B7%K!5t>OaH%h5tnAK@kTI>eur>nE#o^VgGvZe+I4BUsqg`O?qWb-A&%o1j zJ>)~^Vr|vBT@vnsZ0oBZ^d~vvS;Y}C>!<(TGyV4hsq?R{=I4dQZ*4Ey zDw-E-_DbUWo*KR#mPbQYtn@0myKIfcIiqVj8f$0UYH8*iJ~`j8QTv?M+}1_jH3>6h zc%I9Ihini{dNBXijgv_V@1F!M+UB}$)_1oXI!%5v&x_nJIpG|$W@fVdUCX1He`kij z+47&^0r%^`Z7WVJYBDHvtvvG*qk)30`~0Y;_&cTE+X`{ZEvW|GoT|v~T|9`X}O<{~2P0|1&s>erCrd^Ub}F)H6?* zrs3-Q@0)$C{s}wiR)I_PFVyy>e9-?>bMLSCuM6y(D#ZWhe*eAlKf~tm^qBO<`fK;= zcg_E8T9!Py>&Km|@#3m4swKj+G`m)v{~ogaLfQn-@ipJqo<7+(GoPjG#exk_CWhvz z_XkB>ve;OBr;l;P>ZTUkbM>_)+n)%oSAQSvY}R$XR&n2f9sPgRZT?&poj27uhTqQq zF3ZN}$K#z_rdj;^qqgu;d9PKl)VqS?8Fo9?eV=-3`{rktuJ1hm zb$#)yV~Q-LEP>5E53W0Qrf-=2_M+AVzFX@n*uSZ;RCR?K-HX_{GGppDnZoeQ=(Rmr z8jJQUUW0F*0$J|U+v~oxcKs`KUUc>QI!`xB>yw)4ADHP ze5|PNRBfO3BMrYfDSwyb^=BK_PO1Ie;w958RVKg6d=$xMUva1U+I!QUxleiLUYvhbD#|$Q zaj@9;AeQa@6&ClZE`PZ>DNkR`lpUL(6&DkK6yW zR%}BOTZz1;^XO%Kv#_WgYX^t7KFWQxPH0n{GW#R{~2E2 z^8d?n{meYp{|x=J?=Sk#5XJD#pXoot+I{~SO6|X8z4^(KwBtOo!0X4x={Yg8KH1Ju z|1fdG=bNv@KCbyRV~N-L%EDYJm9pe#ahn*MBY!N~JkPGUJ7(4U^?}FvMZRC&xZu`o zY1S5#rhv+tD`nF^Ia}?S-IFgck>>FTi-sduITwP ztYmk0wdUL^C@L|_{W>MHW15Of-Wqlj*Ye!_+=7Xx zPaA3;T$81}Fm3AEwX?6s2b;IFJ4NcH&q#b)Cf9QH_UsiYmln0g8|fJtWSez8Z_`>5 zGWq&!=g1)6^$Lr+v}asFE(Y+ihhP!GUw@(uTtdVnl@OaQ3NF2-dr>9h#&*6PC;q&a z@&Aze;`H*5i?6I$bN|ih^}o1d|1;dz{LgUY;@_F2{}~*!_n)-?G3!TtI{$OqbLlrO zE!teaS#SRf-SVI6{~3hczolOP%ewAAgUWw~N14?Y_4dzXF1^VNjNE^7di)o$-G3(k zXXsq~+vxSbrt9^S>OY!5l$_tV)HUrtLt1S8g{b$R{Qs%-L#zh%p^sdA^YG7qh8e}) z#3-^mHdS;svPqFF5py>h!lyy7%acXQ}S7&MIvcN@5uiQ6J-B0{H^x<&#++D&&db&fBaniCH{51?`!){%=`Z{tk1OnA{=j2 zA^y)Uzpndl2J=lD?tj~U|Gn~`;pXbi>x>WVf1SVouK(XzyRCklx&8N@`{tH^;Dy7N zm*6Wx2+0k>B8KQz$csf=b#fD<-FNSZeNYnhqsLAz^!ceR8{dYkSg`KQF71H5t#@}% z=w|8uUims8^5xqinT;v8j4WP9on|#xmCE({+Yx$o_N1*{AtCd8W`BE=xBu>H^Ig?Z zZ*E0?=b2{}rp+IC^sKI6-Wt8B-!%i>x8~h?xuLexWtGi!mgaj~4n0)w%ynOAnyr;- zmKYc%mAlFI_fexfE`GMPi*EXsUAv#G<>vM5%64>!hQPcV3=%VTY%6 z+Qt{_yACJGa&FIFzVOY}Y+`YN@@5bV1?|$0az0cZQzv|Db4ORw;p9?Q-%vcfGr#vYj zzwmF^e50}(8&d`5y%pUdc~Wg#`u;86TX|%*)G+ajo)7fr@0-d|bTI4gJ5hsIOV-`I zxTbK~3iYtqb?r9aSD9Pwd1=zo$i~N9zk1l>&#Byw~-eYZQC! zy3gBbc}4zF#};)xUEfif$g%aqKea6{i#+!|bQK9V+(1krjNGvX-54VCpW*n``M-Pr z9IdK!|9$D_oE!Te8XvDAc^q^7CYvOBOugGq22}u%T<{oNGrR!xu}eSB-2VGYe>&$M z70@=b)Zc{)bcYeKX!h}i$z^G8U|`iDN5&L{_5MO)!ug9w02bgBzo+}e#8oo z_$B`tPWZ{MP43vfY3gsO!0;6lU&w!$7*T)u_}?G-pc8(j`F~aXXV}mCPt>XF$2~9m z(?9+*9I%`G>e9cY{|tXr%;dj({LfG)4L-HE?eM>O_U{kZw+1ZggR10T?frBA`aSzW zm-V*)wW+_~1{#rlsJP)j!}9*$AMQU4(O5jG|I{}~wU z*6Kg|x9`)Sb?MgYhp+1_Y}xRj=d1n0$k3IoOY<|A{`t@FK+wudUZe}B7t{Xqvb-I3CW*7ZNi z)8jv1lRxvHp;i7vUu^yPh>q^wXJ~<^Bzc4%R>vcB)E}}e z{m)>V_~-nOKuT<`EoQ#|@jpX@-1E=>SmP0i4Rq`>FFd_&{m;Ph^!lH#?Vs%*dLpsC zp~21Y-G0tmlpuRr6J5{x=i6DfKS~qI7smTXy-E4-_e`Kon@7Q_hzIlTw-($}`Onbb zVSm^D+}@md>*UrcTE*J@XLumW& z`KN#CAFv~wi7mBab0#jYV{s;|)I)P-SWeE8{WFeAYfad_>Bz7Ck1OYIXo*>}C9eJh zs}`TuI~UX}uMID|k+M8;dGYMmGxq(Ua`M=Pe?{B>sDvSx=J3Fe{Ljz=%jb(R3Jmsl z_4W_KP$LUof#ArqK`?v4<4N7#vgAP|AhpVUwLQY+4|1%&; zwovW2R@{WU7rt6s2?f<~GSkl@poptq@=~7$IY!xbOB|EW_R z|MhPD7s1%N$^RJ?E$r{DDU?4neX-fXKOVmpZ(Kbwb9(TNy^k~umX*u=3blIm<@`_K zb^jS2gnqgaWw=^x>CMeAK87y!(mcM)(rc20>W(R{T;jzSxL&7c>)IVU z`|?X`)`VS)Q|L_G=KA@vrTn4V)vGLvcKOBb7oHW_ynVOae}={*-ol5q-LbAYLU&Ei*3WmS~rTKm8?)n!}c zc4rlqgcr^7of36XiN)}wU~Wqa^V`rh4=;bsb!@u1rtsF2+z0HI@4M=6ZrbJN^``!o z;npj$6OUi*)LECb-MdpUwe#O=xwhThv!yN9sv6yna#yK2{&as#)d$U8tw)L~>hAxQ zH8c~^PdQ)c`}CCNXYXY#^%*-&Ps=SAb}c`>EGlwIz}mH&7xUkHV~OxG)0WRP;FmN9|R-s|OSuPs^^ zYqlmm`g3)^q1wEIt|DR1;JHFAvEAFew`|FLEUvfon$qEFjaLq@``k@U&1T(U>e5&O zS~|;OUjOmA{omsJr*WQ-{D1st|9kn*>uc}+Gdw<5|LfWPQ+p;Kng8)4|L^5LuV4Q# z|KpOF{|tARf3n$)dprx@Sl3^kPu_Hx4fyKMTe|ZnF3ag|eXcxfD%tm+cG7LmE_%8D=7-i#-^Gsy zAOCUf^6ABAGfv&LD>GVgCJ?5fW8T&~)Zoc(0m#pdHny-&}&=UpwfS>#*yt>@CNWiS8EH(P5r zt>i^h>$wkBv%2nGJ9XG&$E@s-*(awqoRGBn=>Dzxu5WZ~=?ne0qT99i&F?&Rd7_l3 zs6bwt1w*;%jhENTri)F=^3LR}e7`&2GY&#bbp0(~<;nnt~5!3xQ zPB@)!x^+zKumE7qM@+ctCI;bPTUeutm3XPo@L z&M7@oy*%{Rnj@`W*4em8-?(XBc5aQG`fc;nlwCb*>-p+WIQjp2Qa@u&*Yyw2|Fo$5 zXISU{v+L8x`|mD)F*#Dt|ZT~30 z5?tZTY^BQe-;!SqY?a1s} zrC1vW3D5cS%YA+)ZF;eO!t$VyyN}MVTjx}?|pZ0fqVxe1?~eo8zqEABtEeD;wd z?-kK^yGw%wZf`Fu=RdVkV{P5`uM} z=UTU-<0rd!?Hkv4Y0Wd*oO@?#^P;J1O1K}nOcRN|E%8ppf7g>M1{JLfC%rwos(uNB z!Pv-a@R#Y~O)t0Knh?E!kD1{FgZhiNaf>7DbU)0S|5#T3x809DsWP5C zA7yO(tOWmwK7YESBk^Ov>fqG+sh7il=2cGiJ$B;hKFuH5&g~D+e)*XBZQ7=rTDDsz z99eOqe8mCfjqDQt8MuyhAGxP@W$S+i?u;_2C1v%uta!|WPwVbF6?*RPC5B3sRvF>G z?8txOuXb(RzV)Mi&w5i|JvJU)7A9pitNh#}^Pj)dwm&K*`}F;>UMXFpSGk4@FaBpJ zEah>WrnFX1WaWdN!hYwtXJX4_FMku7wc^s8b(b?U)-IWPZ`FJMJ?A{{ecQVvqIXvq}1b83i#Ft)(|i%>-ke zOdd0CE<1L2?Up=Ai$DBpOTNW(zm4|3u6XUM=2q*!>G`+vZ8l2AF4x>x$;{4vxxuh& zU?)zNY>fX8Nw~^R9gZr;eCpR#>zW&UvY3JJF z|R}cKXv#0i{GjuWYTlFlrD!)|JJB0wr=C(l+AN)Yo6Wr!N)$xzJINE z;<}34$HJuccWlYcyYI7M(=3m-Ilik-K4)S0^QJPU$*TN~)%C#o2h}(Cx>=%F6I+MNXK@*qk+$45djl1Bs#Ds9= zKI`PQUdIyEB|dxEoHC3$_A>JKsXOb#)~2tI4Lv-~Vm{Xh83(zy^B^ExOd z`p)VjvdpWM*d$Lfq-s3Cg|EY2Q_wrxqmpiI|GNsP{y;ZMF&S=JkewD4*)^y+z zW7SRn=TQEi;pHeuI&;F_(MKikDmX7|LfXc2jHxqF{l4N zbHAMWpW)q$JCl#(JUjDu2LEQ6{`v0<`~5UK*Kd05zvAy4p_ALw{>`<1d~4&1HE~9p zukJVYUlkb3yIWsCI=g%Oq#K^Hr^PgPFV$V&;li!Mo7JE3WTIHzi!Z;zWJRu-ZQ45T zP=%HL!fm&{WS8gahzTB>ke=0i^2G6&pqJYheLnKC=#}z4&tt`X+I)Ret{E+!RCHCn z&_>o&Z1LVTrMKGli}o!nP~7acbwgj*4 zU2XPt(;KY{tpKlAZ?0W7J-%^!cILw9?y`Suk40i%Yii$~U2U--BD$#Y-lmKdzwYLR za=+;QwN_oPIxFw)kDjxIyl2c-iykVid0y~k`PoMnD*Wo-a-1GsXFYlT%euO*`q;JW zc2EAQWtF*cW$~J=$0mz5HfT8UeYjiCb$U_Pv8A3N1uk1FfV0{2;CTJ+^B3m0 ze^^rTCD65|^U*x-57+%SUAGB-^fx}VNqeuWDO+pJ7yhRTIt&b7UiL4#BD|zd;>YDm zzK+g(-fR6T-vwP5TqlNhSXc3d`v2khb=`Y)jn$5P!IXJG5wLe{M|p6a>i1BS40OLxj9?q=ppS1-rJLJGw`v; z%$gthV{-M%*HyKx%Qo$k+fr+#ee+%2vAj8Zx+L7h*&8H^kJnXkt$r|{G3(ZUhQqt9 zBW+yElb6PNye(foJ5^=sgumA9eaU%xt+n9|@ms+`X|}_35FRoC<F^j>!MeqXC~twpDf z&%60!e%!S!`=v7c*GZpvTXMOO^~chR-ckylD(%c6glc8hJKW{W}2=4yJuUntJ`exQeo6U;8@ki_UKen|m%l>u7>gu#jXN7ibi-_Z7v&wyCbo%=J z-+t~ zpzqo{5t`8^X0N&2{67kCp8Wnzwm{qc&^=y%-ps1C{kNaaE#+RR$J4P)t-q?VRbleP zK7P@Y;o>d5mPIR0EuFNhuRpw6ZR+0J+BYl9HQmBL*_?L$(Y*HT%4ogikFH$)%3QeZ zxzCl7gCdGXb)gd-6AJsH<>M^BM}78Q-+F7>%3QXYt1Gruzm=2Rb5LemS02YnYo8;N z?|l9hzLY`KL59IVd$!r?)>RL6Yh>8D7t>*ZSlyRDb-4{k!?k zuIt6_nGp()BiK<2A!4M@D^u6LLLY>vHF+qn=o@AnY){K_r@*e=8>gl`^<7f>CCvYiN?84!+0grMV9a>quQKkRj8FdB8~@aF#hUp) zJl6bY*lGUvll`%NB=-C(h4Lp3-w1g_D6R1)25(&Jn)09FkZbzillvhTsKA-E?t*`M zY+L<~Z55sTpMg_!{&zL_H8}_l$VCR+$ zw^Z~$!xSYxrgZBz?e&bUYk6E!HEv1=EDj9uU3~Vd{!flO_4~4?BJBrxwN)Nd@NZoE zhsQ4}w(jrv>kpc>zj8*)@;}2B&wnrfg{s1aFB&vauTk1DzrFqwU;g*`{~4S_Z(|X|=r zEo;@rxOD#~S@NGh=Kp7?>;G{PiGB5z_M?9r^-jODTYqES`}Z0@rP}}eiT=-UfAK$) zN(9@qs>=?fia%-h{ymG4^rM=k@$1NL{T;Sl@BV%2pSYs_{L%e?f7w5FM=--zEPgHY zX5WKv-n)OFvU6MbpTS=2Kg0JI{~3DaPIWQ^Lc)PqFEB{#U2_pW*$*{|r(noIMp5%IrH#0>5^a#TQho-u?SjUOC`D!~E|0 z-(TvF^&_#@2e|QZfSvjfzq(b{O`#Hv*O?~G-#$O}JNu^o^6yh>a5`YGtNt4QO~2dU zOedgBK4Ln^wAlOi0zaAZ|NOE2Kf`|aKN(2utgj-qNwL%K$ek>&uXI0g#s2xD`~UvR ze_V`UuD-HcI_>ZB{B;%!!)M>r-w}7t_Wr#9Br~v?XP-h$3{N65hW(IZIN|k4D}Ro8 zF2!3OCr$jDy=Gndk^3J%v;S`Yd41v3Yeh$vT>9N7b7)pC+q?5RI9Ao;T5Z4L=cOS~ zfP-2Bc=6(`OzGk)>rc2n{`)y*edB)k@g1Sq1aAd`#!e@${^S0tR^3%3Y*`|GPRn9` zwjSjKk2fayKe-P6`&#%qA!&@Hxl~t7pZ|+h@Pgm;RI@I!cN6A`m-l}8e z()|n^|n19lO;! zcFEfXf3^MFPH`XYdYb;2xAD}hrzS^vS4C?xI8G?2+j=?jZ?kXoo>zjFvsHMyo|Rnp z*s7gg#`V>p=~}%u z;^(znC1D@6PO7mp%kI8(_jTdz*RM^2I(2U{?Ylhj-kPkaqqE{7&dRJ*QkFf)SHQ9E z*8H#)%ic{}X}{-b`}TIV>37$EGQNG7%dt~`;<~1HhL2XBUSwnCxVO?HHEEem$#dE4 zrPBj@cFwc)x|yiF>_XHE*W%ojFFxJcd#BJVW>R!R<+;L$D~A$euas}zw*P~7o4W9x zZM*i(|G@M3;inBZckcMl(BH&<_vOS{(;gr8<`i7}CRbB|#isD}wJpo{Zt698G3lMT zzz!Z~J_d=iX|XYp-j7;mg}ZNmtb3(o#me?W2cu4hO?!H1pHlAYP5Ft>4;yYMY5OZz z60<33>5cG%>o|TkKlsmZKXl#nZx?+PtAFRKPXC^-IpYm7GVHVoK1_QO|Qp)^KYr=nqsvZ9suokF^ zUH^FgN^Xf3Zrl)=Jxfa>0TCz3c$%=KGp8H&0Uz)j) zSLu{^VW@v=>i4JW=|F?&66D zHTMNobJ=e+dA(((PF{+!^YwCP@84$oC)8&IezrU{^R1q8V|v95m%9>uy!oz+GB3WI z7#ruWQoXC8`dZJff-5E7(G~H6?)kigpWmq16Te}T*V{Qt zi+3KT1VOYYKuZ;;{80ksmHJDY4g4j zDcyUn$4vEF(WPr#8jE(E+WrWT^cuXE-x+JuX;GJwct(F1Wf&TPFCxC5B(XGrU+e=<5@jMAJ&iZzGWz;Tzn~N?* zXa4P5wDXqMywu2}>KCR?zvR1l(T*!QiPx3(Z=XBIT!^PcVs-kFZT}f=9ca(r^yc7C z|M;avA;EpAQTDvkwsWR_Su*wAmyH*e?U%~lbZ5fZyw{r*irCls&1##xtb69$lu40k z@+Ey!PhSj{IkxJq+Slz7n#cm1M zmg>a3GWha-(=&mdX`)l^Tzjkfo%cbe#R}W?#fz)&O1#Q0lXBcs^V?iA?CYatd#~y2 z-@LT>t$x`#j)2p>TTcgu{#_hA^{A`%E zm!EDk=Ez}Fc&D^;m$p{Ktt|@{@|x~k>!WJ5`T4x%VY`man6-0nqy&3+HgEg7Y^Kiv zJLXlE-8q$*roww%;#a7Z=Cxy2SIdPjU$8Ra<)!lbSMFRc)y~cpdUY#IDbjdLuFc1Y zlE8?o>$N8Kq<-_w4D7Y^@?H_>D%!XKN97Ld5RKODp!PuDl|YAi$A?Cb=syF}p@uIP z&hoAd^O&8z-Z`?)`a^$L{3F(PE0?_#7Z=>U`lw3gPJf25@^9rw>VGlD)<3XSn;e$x zyS${#{`Rc)Ki~f|9Cy|K_H5yQ2HXD(A1}t_BNWQtyf65l;RS2ye};yy?N>GGm45fu zZI1pq^JaZsmi*`Y{~2UOz^sq|8Tz^&*KgjZ{GZ_h*X#cbj4%G3`_ccO!9jcdANIws zrr1xcJ@|Fj{WIrI|1J*r&oJ*l!;cq>5zKYwzpWqH|7BRW|ASoroA(|68NM*>`pP#Uqg8O z4}Nzf4~H!L)BTQr&9#4NW%2OPkq5E<)IVOd<36(6?2*0uYbpPe_=4KB?VpV|{kwC; z{`q5g;DCZj^w@uf^cwg73=2fB|6{(0WH~rgn_rvmpE-9rA{N}iOndnsFNhDCAE1~7 zyA>R#h5M0W?@K+B?O(m*Pwac}>umnB^i)`YKtksDKP&BCmQ?}&T)(ql%dR)J z)_;4}`Cs*ahR2Kg;Ve?ZX8%7c4R|1t5Hz6hpfJ^d1Bry7ISzB`eiDMF&i>;?NDSi5 zQeSF=+CS;*;?6=v@rpP`PnQ62( z$m`PNj8izqurhXk=7*8OKdZPNPB#vA^@RWJUX_MZWg_ILbeKvs@g9Bh?;>iD&5|Cw`#5gPj6&i>B; zDZw7&jMl}kuGCLuzozw{!PuG~p#ko4?S7;*uo%hds{{Xq$dCFGM?Eqcmkf^=(LBXk zS*m2FWXe39@nqKX8l+=AC;zs7WDcp`x7Nx1@IET=p#5zC%gIY^ZywZa=l&M_d#^)(Y2l4GiGV>J-cz& PEh0SG0d`57{QsK( interpreter; +tflite::InterpreterBuilder(*model, resolver)(&interpreter); +// Resize input tensors, if desired. +interpreter->AllocateTensors(); +float* input = interpreter->typed_input_tensor(0); +// Fill `input`. +interpreter->Invoke(); +float* output = interpreter->type_output_tensor(0); +``` +### Data Alignment + +TensorFlow Lite data is usually aligned to 32-bit boundaries. It is recommended +that all data provided to TensorFlow Lite be aligned that way. + +### Error Reporting + +In many places TensorFlow Lite returns status information through +`TfLiteStatus` objects: + +```c++ +typedef enum { + kTfLiteOk = 0, + kTfLiteError = 1 +} TfLiteStatus; + +``` + +Failures can be easily verified with: +```c++ +if (status != kTfLiteOk) { + // ... error handling here ... +} +``` + +In order to obtain detailed error information an ErrorReporter must be +provided: + +```c++ +class ErrorReporter { + virtual int Report(const char* format, va_list args) = 0; +}; +``` + +The `DefaultErrorReporter` takes care of reporting to `stderr`. + +### Loading a Model + +The `FlatBufferModel` class encapsulates a model and can be built in a couple of +slightly different ways depending on where the model is stored: + +```c++ +class FlatBufferModel { +  // Build a model based on a file. Return a nullptr in case of failure. +  static std::unique_ptr BuildFromFile( +      const char* filename, +      ErrorReporter* error_reporter); + +  // Build a model based on a pre-loaded flatbuffer. The caller retains +  // ownership of the buffer and should keep it alive until the returned object +  // is destroyed. Return a nullptr in case of failure. +  static std::unique_ptr BuildFromBuffer( +      const char* buffer, +      size_t buffer_size, +      ErrorReporter* error_reporter); +}; +``` + +Note that if TensorFlow Lite detects the presence of Android's NNAPI it will +automatically try to use shared memory to store the FlatBufferModel. + +### Running a Model + +Running a model involves a few simple steps: + + * Build an `Interpreter` based on an existing `FlatBufferModel` + * Optionally resize input tensors if the predefined sizes are not desired. + * Set input tensor values + * Invoke inference + * Read output tensor values + +The important parts of public interface of the `Interpreter` are provided +below. It should be noted that: + + * Tensors are represented by integers, in order to avoid string comparisons + (and any fixed dependency on string libraries). + * An interpreter must not be accessed from concurrent threads + * Memory allocation for input and output tensors must be triggered + by calling AllocateTensors() right after resizing tensors. + +```c++ +class Interpreter { + Interpreter(ErrorReporter* error_reporter); + + // Read only access to list of inputs. + const std::vector& inputs() const; + + // Read only access to list of outputs. + const std::vector& outputs() const; + + // Change the dimensionality of a given tensor. + TfLiteStatus ResizeInputTensor(int tensor_index, + const std::vector& dims); + + // Returns status of success or failure. + TfLiteStatus AllocateTensors(); + + // Return a pointer into the data of a given input tensor. + template + T* typed_input_tensor(int index) { + return typed_tensor(inputs_[index]); + } + + // Return a pointer into the data of a given output tensor. + template + T* typed_output_tensor(int index) { + return typed_tensor(outputs_[index]); + } + + // Execute the model, populating output tensors. + TfLiteStatus Invoke(); +}; +``` + +### Writing Custom Operators + +All TensorFlow Lite operators (both custom and builtin) are defined using a +simple pure-C interface that consists of four functions: + +```c++ +typedef struct { + void* (*init)(TfLiteContext* context, const char* buffer, size_t length); + void (*free)(TfLiteContext* context, void* buffer); + TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node); + TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node); +} TfLiteRegistration; +``` + +Refer to `context.h` for details on `TfLiteContext` and `TfLiteNode`. The +former provides error reporting facilities and access to global objects, +including all the tensors. The latter allows implementations to access their +inputs and outputs. + +When the interpreter loads a model, it calls init() once for each node in the +graph. A given `init()` will be called more than once if the op is used +multiple times in the graph. For custom ops a configuration buffer will be +provided, containing a flexbuffer that maps parameter names to their values. +The buffer is empty for builtin ops because the interpreter has already parsed +the op parameters. Kernel implementation that require state should initialize +it here and transfer ownership to the caller. For each `init()` call, there +will be a corresponding call to `free()`, allowing implementations to dispose +of the buffer they might have allocated in `init()`. + +Whenever the input tensors are resized the interpreter will go through the +graph notifying implementations of the change. This gives them the chance to +resize their internal buffer, check validity of input shapes and types, and +recalculate output shapes. This is all done through `prepare()` and +implementation can access their state using `node->user_data`. + +Finally, each time inference runs the interpreter traverses the graph calling +`invoke()`, and here too the state is available as `node->user_data`. + +Custom ops can be implemented in exactly the same way as builtin ops, by +defined those four functions and a global registration function that usually +looks like this: + +```c++ +namespace tflite { +namespace ops { +namespace custom { + TfLiteRegistration* Register_MY_CUSTOM_OP() { + static TfLiteRegistration r = {my_custom_op::Init, + my_custom_op::Free, + my_custom_op::Prepare, + my_custom_op::Eval}; + return &r; + } +} // namespace custom +} // namespace ops +} // namespace tflite +``` + +Note that registration is not automatic and an explicit call to +`Register_MY_CUSTOM_OP` should be made somewhere. While the standard +`:builtin_ops` takes care of the registration of builtins, custom ops will have +to be collected in separated custom libraries. + +### Customizing the kernel library + +Behind the scenes the interpreter will load a library of kernels which will be +assigned to execute each of the operators in the model. While the default +library only contains builtin kernels, it is possible to replace it with a +custom library. + +The interpreter uses an `OpResolver` to translate operator codes and names into +actual code: + +```c++ +class OpResolver { + virtual TfLiteRegistration* FindOp(tflite::BuiltinOperator op) const = 0; + virtual TfLiteRegistration* FindOp(const char* op) const = 0; + virtual void AddOp(tflite::BuiltinOperator op, TfLiteRegistration* registration) = 0; + virtual void AddOp(const char* op, TfLiteRegistration* registration) = 0; +}; +``` + +The regular usage will require the developer to use the `BuiltinOpResolver` and +write: + +```c++ +tflite::ops::builtin::BuiltinOpResolver resolver; +``` + +They can then optionally register custom ops: + +```c++ +resolver.AddOp("MY_CUSTOM_OP", Register_MY_CUSTOM_OP()); +``` + +before the resolver is passed to the `InterpreterBuilder`. + +If the set of builtin ops is deemed to be too large, a new `OpResolver` could +be code-generated based on a given subset of ops, possibly only the ones +contained in a given model. This is the equivalent of TensorFlow's selective +registration (and a simple version of it is available in the `tools` +directory). + +## Java + +TensorFlow Lite's Java API supports on-device inference and is provided as an +Android Studio Library that allows loading models, feeding inputs, and +retrieving inference outputs. + +The simplest usage of Tensorflow Lite Java API looks like this: + +```java +try (Interpreter interpreter = new Interpreter(file_of_a_tensorflowlite_model)) { + interpreter.run(input, output); +} +``` + +### Loading a Model + +The `Interpreter.java` class drives model inference with TensorFlow Lite. In +most of the cases, this is the only class an app developer will need. + +#### Initializing an `Interpreter` Mith a Model Mile + +The `Interpreter` can be initialized with a model file using the constructor: + +```java +public Interpreter(@NotNull File modelFile); +``` + +or with a `MappedByteBuffer`: + +```java +public Interpreter(@NotNull MappedByteBuffer mappedByteBuffer); +``` + +In both cases a valid TensorFlow Lite must be provided or an +`IllegalArgumentException` with be thrown. If a `MappedByteBuffer` is used to +initialize an Interpreter, it should remain unchanged for the whole lifetime of +the `Interpreter`. + +### Running a Model + +#### Supported Data Types + +To use TensorFlow Lite, the data types of the input and output tensors must be +one of the following primitive types: + +* `float` +* `int` +* `long` +* `byte` + +If other data types, including boxed types like `Integer` and `Float`, are used, +an `IllegalArgumentException` will be thrown. + +#### Inputs + +Each input should be an array, a multi-dimensional array, or a `ByteBuffer` of +the supported primitive types. + +The use of `ByteBuffer` is preferred since it allows the `Interpreter` to avoid +unnecessary copies. Each `ByteBuffer` needs to be a direct byte buffer, and its +order must be `ByteOrder.nativeOrder()`. After it is used for a model inference, +it must remain unchanged until the model inference is finished. + +#### Outputs + +Each output should be an array, or a multi-dimensional array of the supported +primitive types. + +#### Running Model Inference + +If a model takes only one input and returns only one output, the following will +trigger an inference run: + +```java +interpreter.run(input, output); +``` + +For models with multiple inputs, or multiple outputs, use: + +```java +interpreter.runForMultipleInputsOutputs(inputs, map_of_indices_to_outputs); +``` + +where each entry in `inputs` corresponds to an input tensor and +`map_of_indices_to_outputs` maps indices of output tensors to the +corresponding output data. In both cases the tensor indices should correspond to +the values given to the `TensorFlow Lite Optimized Converter` when the model was +created. Be aware that the order of tensors in `input` must match the order +given to the `TensorFlow Lite Optimized Converter`. + +The Java API also provides convenient functions for app developers to get the +index of any model input or output using a tensor name: + +```java +public int getInputIndex(String tensorName); +public int getOutputIndex(String tensorName); +``` + +If tensorName is not a valid name in model, an `IllegalArgumentException` will +be thrown. + +### Releasing Resources After Use + +An `Interpreter` owns resources. To avoid memory leak, the resources must be +released after use by: + +```java +interpreter.close(); +``` diff --git a/tensorflow/contrib/lite/g3doc/custom_operators.md b/tensorflow/contrib/lite/g3doc/custom_operators.md new file mode 100644 index 0000000000..204a489a93 --- /dev/null +++ b/tensorflow/contrib/lite/g3doc/custom_operators.md @@ -0,0 +1,91 @@ +# How to use custom operators + +TensorFlow Lite currently supports a subset of TensorFlow operators. However, it +does support the use of user-provided implementations (as known as custom +implementations) if the model contains an operator that is not supported. + +Let’s walk through this via an example. Assume we are using the `Sin` operator +and that we are building a very simple model for a function `y = sin(x + +offset)`, where `offset` is trainable. + +The code to train the TensorFlow model will be something like: + +```python +offset = tf.get_variable("offset", [1,], tf.float32) +x = tf.placeholder(tf.float32, shape=(None,)) +y = tf.sin(x + offset) +y_ = tf.placeholder(tf.float32, shape=(None,)) +loss = tf.reduce_sum(tf.square(y - y_)) +optimizer = tf.train.GradientDescentOptimizer(0.001) +train = optimizer.minimize(loss) +``` + +If you convert this model to Tensorflow Lite format using the TensorFlow Lite +Optimizing Converter with `--allow_custom_ops` argument, and run it with the +default interpreter, the interpreter will raise the following error messages: + +``` +Didn't find custom op for name 'Sin' +Registration failed. +``` + +All we need to do to use the op in TensorFlow Lite is define two functions +(`Prepare` and `Eval`), and construct a `TfLiteRegistration`. This code would +look something like this: + +```cpp +TfLiteStatus SinPrepare(TfLiteContext* context, TfLiteNode* node) { + using namespace tflite; + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input = GetInput(context, node, 0); + TfLiteTensor* output = GetOutput(context, node, 0); + + int num_dims = NumDimensions(input); + + TfLiteIntArray* output_size = TfLiteIntArrayCreate(num_dims); + for (int i=0; idata[i] = input->dims->data[i]; + } + + return context->ResizeTensor(context, output, output_size); +} + +TfLiteStatus SinEval(TfLiteContext* context, TfLiteNode* node) { + using namespace tflite; + TfLiteTensor* input = GetInput(context, node,0); + TfLiteTensor* output = GetOutput(context, node,0); + + float* input_data = input->data.f; + float* output_data = output->data.f; + + size_t count = 1; + int num_dims = NumDimensions(input); + for (int i = 0; i < num_dims; ++i) { + count *= input->dims->data[i]; + } + + for (size_t i=0; i SDK Tools -> + Android Support Repository`. + + 2. [Edit your `WORKSPACE`](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#edit-workspace) + to add SDK and NDK targets. + + - Make sure the `api_level` in `WORKSPACE` is set to an SDK version that + you have installed. + - By default, Android Studio will install the SDK to `~/Android/Sdk` and + the NDK to `~/Android/Sdk/ndk-bundle`. + +2. Build the app with Bazel. The demo needs C++11: + + ```shell + bazel build -c opt --cxxopt='--std=c++11' \ + //tensorflow/contrib/lite/java/demo/app/src/main:TfLiteCameraDemo + ``` + +3. Install the demo on a + [debug-enabled device](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#install): + + ```shell + adb install bazel-bin/tensorflow/contrib/lite/java/demo/app/src/main/TfLiteCameraDemo.apk + ``` diff --git a/tensorflow/contrib/lite/models/smartreply/g3doc/README.md b/tensorflow/contrib/lite/models/smartreply/g3doc/README.md new file mode 100644 index 0000000000..cab5dcca43 --- /dev/null +++ b/tensorflow/contrib/lite/models/smartreply/g3doc/README.md @@ -0,0 +1,146 @@ +# Smart Reply Model + +## What is On-Device Smart Reply Model? + +Smart Replies are contextually relevant, one-touch responses that help the user +to reply to an incoming text message (or email) efficiently and effortlessly. +Smart Replies have been highly successful across several Google products +including +[Gmail](https://www.blog.google/products/gmail/save-time-with-smart-reply-in-gmail/), +[Inbox](https://www.blog.google/products/gmail/computer-respond-to-this-email/) +and +[Allo](https://blog.google/products/allo/google-allo-smarter-messaging-app/). + +The On-device Smart Reply model is targeted towards text chat use cases. It has +a completely different architecture from its cloud-based counterparts, and is +built specifically for memory constraints devices such as phones & watches. It +has been successfully used to provide [Smart Replies on Android +Wear](https://research.googleblog.com/2017/02/on-device-machine-intelligence.html) +to all first- & third-party apps. + +The on-device model comes with several benefits. It is: + +* **Faster**: The model resides on the device and does not require internet + connectivity. Thus, the inference is very fast and has an average latency of + only a few milliseconds. +* **Resource efficient**: The model has a small memory footprint on + the device. +* **Privacy-friendly**: The user data never leaves the device and this + eliminates any privacy restrictions. + +A caveat, though, is that the on-device model has lower triggering rate than its +cloud counterparts (triggering rate is the percentage of times the model +suggests a response for an incoming message). + +## When to use this Model? + +The On-Device Smart Reply model is aimed towards improving the messaging +experience for day-to-day conversational chat messages. We recommend using this +model for similar use cases. Some sample messages on which the model does well +are provided in this [tsv +file](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/smartreply_samples.tsv) +for reference. The file format is: + +``` + {incoming_message smart_reply1 [smart_reply2] [smart_reply3]} +``` + +For the current model, we see a triggering rate of about 30-40% for messages +which are similar to those provided in the tsv file above. + +In case the model does not trigger any response, the system falls back to +suggesting replies from a fixed back-off set that was compiled from popular +response intents observed in chat conversations. Some of the fallback responses +are `Ok, Yes, No, 👍, ☺`. + +The model can only be used for inference at this time (i.e. it cannot be custom +trained). If you are interested to know how the model was trained, please refer +to this [blog +post](https://research.googleblog.com/2017/02/on-device-machine-intelligence.html) +and [research paper](https://arxiv.org/pdf/1708.00630). + +## How to use this Model? + +We have provided a pre-built demo APK that you can download, install and test on +your phone ([demo APK +here](http://download.tensorflow.org/deps/tflite/SmartReplyDemo.apk)). + +The On-Device Smart Reply demo App works in the following way: + +1. Android app links to the JNI binary with a predictor library. + +2. In the predictor library, `GetSegmentPredictions` is called with a list of input + strings. + + 2.1 The input string can be 1-3 most recent messages of the conversations in + form of string vector. The model will run on these input sentences and + provide Smart Replies corresponding to them. + + 2.2 The function performs some preprocessing on input data which includes: + + * Sentence splitting: The input message will be split into sentences if + message has more than one sentence. Eg: a message like “How are you? + Want to grab lunch?” will be broken down into 2 different sentences. + * Normalization: The individual sentences will be normalized by converting + them into lower cases, removing unnecessary punctuations, etc. Eg: “how + are you????” will be converted to “how are you?” (refer for NORMALIZE op + for more details). + + The input string content will be converted to tensors. + + 2.3 The function then runs the prediction model on the input tensors. + + 2.4 The function also performs some post-processing which includes + aggregating the model predictions for the input sentences from 2.2 and + returning the appropriate responses. + +3. Finally, it gets response(s) from `std::vector`, and + returns back to Android app. Responses are sorted in descending order of + confidence score. + +## Ops and Functionality Supported + +Following are the ops supported for using On-Device Smart Reply model: + +* **NORMALIZE** + + This is a custom op which normalizes the sentences by: + + * Converting all sentences into lower case. + * Removing unnecessary punctuations (eg: “how are you????” → “how are + you?”). + * Expanding sentences wherever necessary (eg: “ I’m home” → “I am home”). + +* **SKIP_GRAM** + + This is an op inside TensorFlow Lite that converts sentences into a list of + skip grams. The configurable parameters are `ngram_size` and + `max_skip_size`. For the model provided, the values for these parameters are + set to 3 & 2 respectively. + +* **EXTRACT_FEATURES** + + This is a custom op that hashes skip grams to features represented as + integers. Longer skip-grams are allocated higher weights. + +* **LSH_PROJECTION** + + This is an op inside TensorFlow Lite that projects input features to a + corresponding bit vector space using Locality Sensitive Hashing (LSH). + +* **PREDICT** + + This is a custom op that runs the input features through the projection + model (details [here](https://arxiv.org/pdf/1708.00630.pdf)), computes the + appropriate response labels along with weights for the projected features, + and aggregates the response labels and weights together. + +* **HASHTABLE_LOOKUP** + + This is a custom op that uses label id from predict op and looks up the + response text from the given label id. + +## Further Information + +* Open source code + [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/smartreply/). diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/README.md b/tensorflow/contrib/lite/models/testdata/g3doc/README.md new file mode 100644 index 0000000000..d0c21d2833 --- /dev/null +++ b/tensorflow/contrib/lite/models/testdata/g3doc/README.md @@ -0,0 +1,102 @@ +## Speech Model Tests + +Sample test data has been provided for speech related models in Tensorflow Lite +to help users working with speech models to verify and test their models. + +For the hotword, speaker-id and automatic speech recognition sample models, the +architecture assumes that the models receive their input from a speech +pre-processing module. The speech pre-processing module receives the audio +signal and produces features for the encoder neural network and uses some +typical signal processing algorithms, like FFT and spectral subtraction, and +ultimately produces a log-mel filterbank (the log of the triangular mel filters +applied to the power spectra). The text-to-speech model assumes that the inputs +are linguistic features describing characteristics of phonemes, syllables, +words, phrases, and sentence. The outputs are acoustic features including +mel-cepstral coefficients, log fundamental frequency, and band aperiodicity. +The pre-processing modules for these models are not provided in the open source +version of TensorFlow Lite. + +The following sections describe the architecture of the sample models at a high +level: + +### Hotword Model + +The hotword model is the neural network model we use for keyphrase/hotword +spotting (i.e. "okgoogle" detection). It is the entry point for voice +interaction (e.g. Google search app on Android devices or Google Home, etc.). +The speech hotword model block diagram is shown in Figure below. It has an input +size of 40 (float), an output size of 7 (float), one Svdf layer, and four fully +connected layers with the corresponding parameters as shown in figure below. + +![hotword_model](hotword.svg "Hotword model") + +### Speaker-id Model + +The speaker-id model is the neural network model we use for speaker +verification. It runs after the hotword triggers. The speech speaker-id model +block diagram is shown in Figure below. It has an input size of 80 (float), an +output size of 64 (float), three Lstm layers, and one fully connected layers +with the corresponding parameters as shown in figure below. + +![speakerid_model](speakerid.svg "Speaker-id model") + +### Text-to-speech (TTS) Model + +The text-to-speech model is the neural network model used to generate speech +from text. The speech text-to-speech model’s block diagram is shown +in Figure below. It has and input size of 334 (float), an output size of 196 +(float), two fully connected layers, three Lstm layers, and one recurrent layer +with the corresponding parameters as shown in the figure. + +![tts_model](tts.svg "TTS model") + +### Automatic Speech Recognizer (ASR) Acoustic Model (AM) + +The acoustic model for automatic speech recognition is the neural network model +for matching phonemes to the input autio features. It generates posterior +probabilities of phonemes from speech frontend features (log-mel filterbanks). +It has an input size of 320 (float), an output size of 42 (float), five LSTM +layers and one fully connected layers with a Softmax activation function, with +the corresponding parameters as shown in the figure. + +![asr_am_model](asr_am.svg "ASR AM model") + +## Speech models test input/output generation + +As mentioned above the input to models are generated from a pre-processing +module (output of a log-mel filterbank, or linguistic features), and the outputs +are generated by running the equivalent TensorFlow model by feeding them the +same input. + +## Link to the open source code + +### Models: + +[Speech hotword model (Svdf rank=1)] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_hotword_model_rank1.tflite) + +[Speech hotword model (Svdf rank=2)] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_hotword_model_rank2.tflite) + +[Speaker-id model] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_speakerid_model.tflite) + +[TTS model] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_tts_model.tflite) + +[ASR AM model] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_terse_am_model.tflite) + +### Test benches + +[Speech hotword model test] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_hotword_model_test.cc) + +[Speaker-id model test] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc) + +[TTS model test] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_tts_model_test.cc) + +[ASR AM model test] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_terse_am_model_test.cc) + +## Android Support +The models have been tested on Android phones, using the following tests: + +[Hotword] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/android/BUILD?rcl=172930882&l=25) + +[Speaker-id] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/android/BUILD?rcl=172930882&l=36) + + diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg b/tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg new file mode 100644 index 0000000000..ca96556422 --- /dev/null +++ b/tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg @@ -0,0 +1,4 @@ + + + + diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/hotword.svg b/tensorflow/contrib/lite/models/testdata/g3doc/hotword.svg new file mode 100755 index 0000000000..36187aa321 --- /dev/null +++ b/tensorflow/contrib/lite/models/testdata/g3doc/hotword.svg @@ -0,0 +1,4 @@ + + + + diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/speakerid.svg b/tensorflow/contrib/lite/models/testdata/g3doc/speakerid.svg new file mode 100755 index 0000000000..dbe4312c46 --- /dev/null +++ b/tensorflow/contrib/lite/models/testdata/g3doc/speakerid.svg @@ -0,0 +1,4 @@ + + + + diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/tts.svg b/tensorflow/contrib/lite/models/testdata/g3doc/tts.svg new file mode 100755 index 0000000000..9664b78f16 --- /dev/null +++ b/tensorflow/contrib/lite/models/testdata/g3doc/tts.svg @@ -0,0 +1,4 @@ + + + + diff --git a/tensorflow/contrib/lite/models/testdata/smartreply_samples.tsv b/tensorflow/contrib/lite/models/testdata/smartreply_samples.tsv new file mode 100644 index 0000000000..dfdc783106 --- /dev/null +++ b/tensorflow/contrib/lite/models/testdata/smartreply_samples.tsv @@ -0,0 +1,50 @@ +any chance ur free tonight Maybe not +any updates? No update yet +anything i can do to help? No, but thanks No, but thank you No, but thanks for asking +be safe. I will be Will do my best Thanks, I will +congratulations Thanks thanks Congratulations +cool, let me know when you have time Cool Yes very cool Yeah, cool +drive safe Thank you, I will Home now I will thanks +hang in there, you'll be okay Doing my best Of course we will +happy birthday! Hey, thanks +happy new year! Wish you the same Thanks and same to you +have a safe flight Thanks, love you too Safe travels +hey What is up? How it going? Can I help you? +hey, got a sec? What is up? How it going? Can I help you? +how are you doing? Great and you? I am doing great +how are you feeling Feeling okay A little better Much much better +how was your weekend? It was real good +how you doing Okay and you +hugs. So sweet Thanks sweetie Take care of yourself +i'm bored Sorry to hear that Join the club No you are not +i'm planning on coming next week. let me know if that works. Works Perfect, thanks +i'm sick Sorry to hear that +i'm so happy for you Thanks me too +i'm so hungry Haha me too +i'm sorry No I am sorry Why sorry? No worries love +i'm sorry, i'm going to have to cancel. No I am sorry Why sorry? No worries love +is there anything i can do to help? No, but thanks No, but thanks for asking +lunch? Yes coming +okay. lemme know as soon as you find out. Any more questions? It is done +omg amazing So amazing +on my way Okay see you soon Cool, see you soon Oh wow, ok +oops, mistexted. Oops Haha, oh well That was funny +safe travels. Thanks, love you too Safe travels +so sorry So sorry +sorry, i can't. No worries at all Sorry what? +sorry, i can't do saturday No worries at all +thank you so much. You are so welcome You are so very welcome You are most welcome +thanks for coming It was my pleasure +thanks, this has been great. Glad to help So happy for you +tomorrow would be ideal. Yes it would +tried calling Try again? +ugh, my flight is delayed. Ugh indeed +what are you guys up to tonight? Nothing planned +what day works best for you Any day +what do you want for dinner Your call Whatever is fine +what time will you be home? Not sure why +where are you?!? At my house +wish you were here. I wish the same Me too honey +you're amazing You are too You are amazing I am +you're marvelous You are too +you're the best. I do my best You are the best Well, I try \ No newline at end of file diff --git a/tensorflow/contrib/lite/nnapi/README.md b/tensorflow/contrib/lite/nnapi/README.md new file mode 100644 index 0000000000..913467d176 --- /dev/null +++ b/tensorflow/contrib/lite/nnapi/README.md @@ -0,0 +1,15 @@ +# Android Neural Network API + +The Android Neural Networks API (NNAPI) is an Android C API designed for running +computationally intensive operators for machine learning on mobile devices. +Tensorflow Lite is designed to use the NNAPI to perform hardware-accelerated +inference operators on supported devices. +Based on the app’s requirements and the hardware capabilities on a device, the +NNAPI can distribute the computation workload across available on-device +processors, including dedicated neural network hardware, graphics processing +units (GPUs), and digital signal processors (DSPs). +For devices that lack a specialized vendor driver, the NNAPI runtime relies on +optimized code to execute requests on the CPU. For more information about the +NNAPI, please refer to the [NNAPI documentation](https://developer.android.com/ndk/guides/neuralnetworks/index.html) + + diff --git a/tensorflow/contrib/lite/toco/README.md b/tensorflow/contrib/lite/toco/README.md new file mode 100644 index 0000000000..281b2ea5e4 --- /dev/null +++ b/tensorflow/contrib/lite/toco/README.md @@ -0,0 +1,26 @@ +# The TensorFlow Lite Optimizing Converter + +The TensorFlow Lite Optimizing Converter's most typical use is converting from the TensorFlow GraphDef to the TensorFlow Lite +format, but it supports much more than that. + +## Usage documentation + +Usage information is given in these documents: + +* [Command-line examples](g3doc/cmdline_examples.md) +* [Command-line reference](g3doc/cmdline_reference.md) +* [Python API](g3doc/python_api.md) + +## Design documentation + +Coming soon! + +## Where the converter fits in the TensorFlow landscape + +In the typical case, an application developer is using TensorFlow to design and +train models, then uses TensorFlow's freeze_graph.py to generate a frozen +inference graph, then uses the converter to convert that into a TensorFlow Lite flatbuffer file, +then ships that file to client devices where the TensorFlow Lite interpreter handles them +on-device. This is represented in the following diagram: + +![drawing](https://storage.googleapis.com/download.tensorflow.org/example_images/tensorflow_landscape.svg) diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md new file mode 100644 index 0000000000..b9f8c8d152 --- /dev/null +++ b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md @@ -0,0 +1,509 @@ +# TensorFlow Lite Optimizing Converter command-line examples + +This page is a guide to using the TensorFlow Lite Optimizing Converter by +looking at some example command lines. It is complemented by the following other +documents: + +* [README](../README.md) +* [Command-line reference](cmdline_reference.md) + +Table of contents: + +[TOC] + +## Convert a TensorFlow GraphDef to TensorFlow Lite for float inference + +In this example, we look at the most common task: we have an ordinary TensorFlow +GraphDef and want to convert it to a TensorFlow Lite flatbuffer to perform +floating-point inference. + +``` +curl https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_128_frozen.tgz \ + | tar xzv -C /tmp +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/mobilenet_v1_0.50_128/frozen_graph.pb \ + --output_file=/tmp/foo.lite \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=TFLITE \ + --input_type=FLOAT \ + --inference_type=FLOAT \ + --input_shape=1,128,128,3 \ + --input_array=input \ + --output_array=MobilenetV1/Predictions/Reshape_1 +``` + +To explain each of these flags: + +* `--input_format` and `--output_format` determine the formats of the input + and output files: here we are converting from `TENSORFLOW_GRAPHDEF` to + `TFLITE`. +* `--input_file` specifies the path of the input file, to be converted. When + `--input_format=TENSORFLOW_GRAPHDEF`, this file should be a + *[frozen](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py)* + *inference* graph. Being frozen means in particular that the input file is + self-contained, and does not reference any external "checkpoint" file. An + *inference* graph is a version of a graph meant to be used for inference, + typically not the same graph file as was used for training a given model. +* `--output_file` specifies the destination to write the converted file to. +* `--input_array` specifies the input activations, that is, the input "tensor" + in the input TensorFlow GraphDef file. The array designated by + `--input_array` is the one that the user will have to provide the contents + of as input to the runtime inference code. +* `--output_array` specifies the output activations, that is, the output + "tensor" in the input TensorFlow GraphDef file. The runtime inference code + will store its results in the array designated by `--output_array`. +* `--input_shape` specifies the shape of the input array. It is currently + required, but the plan is for a future version to no longer require it, + allowing to defer the specification of the input shape until runtime. The + format of `input_shape` is always a comma-separated list of dimensions, + always in TensorFlow convention. +* `--input_type` specifies what should be the type of the input arrays in the + **output** file. `--input_type` does not describe a property of the input + file: the type of input arrays is already encoded in the input graph. + Rather, `--input_type` is how you specify what should be the type of the + inputs to be provided to the output converted graph. This only affects + arrays of real numbers: this flag allows to quantized/dequantize + real-numbers inputs, switching between floating-point and quantized forms. + This flag has no incidence on all other types of input arrays, such as plain + integers or strings. +* `--inference_type` specifies what type of arithmetic the output file should + be relying on. It implies in particular the choice of type of the output + arrays in the output file. Like `--input_type`, `--inference_type` does not + describe a property of the input file. + +## Just optimize a TensorFlow GraphDef + +The converter accepts both TENSORFLOW_GRAPHDEF and TFLITE file formats as both +`--input_format` and `--output_format`. This means that conversion from and to +any supported format is possible, and in particular, same-format "conversions" +are possible, and effectively ask the converter to optimize and simplify a +graph. Example: + +``` +curl https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_128_frozen.tgz \ + | tar xzv -C /tmp +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/mobilenet_v1_0.50_128/frozen_graph.pb \ + --output_file=/tmp/foo.pb \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=TENSORFLOW_GRAPHDEF \ + --input_shape=1,128,128,3 \ + --input_array=input \ + --output_array=MobilenetV1/Predictions/Reshape_1 +``` + +Here we did not pass `--input_type` and `--inference_type` because they are +considered not applicable to the TensorFlow GraphDef format (as far as we are +concerned, TensorFlow GraphDefs are technically always float, and the only +flavor of "quantized" GraphDef that the converter deals with is "FakeQuantized" +graphs that are still technically float graphs). + +Below in the section about passing arbitrary input/output arrays we give another +example, using the converter to extract just a sub-graph from a TensorFlow +GraphDef. + +## Convert a TensorFlow Lite flatbuffer back into TensorFlow GraphDef format + +As we mentioned that the converter supports file format conversions in any +direction, let us just give an example of that: + +``` +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/foo.lite \ + --output_file=/tmp/foo.pb \ + --input_format=TFLITE \ + --output_format=TENSORFLOW_GRAPHDEF \ + --input_shape=1,128,128,3 \ + --input_array=input \ + --output_array=MobilenetV1/Predictions/Reshape_1 +``` + +## Convert a TensorFlow GraphDef to TensorFlow Lite for quantized inference + +Let us now look at a quantized model. As mentioned above, the only flavor of +quantized TensorFlow GraphDefs that the converter is concerned with, is +"FakeQuantized" models. These are technically float models, but with special +`FakeQuant*` ops inserted at the boundaries of fused layers to record min-max +range information allowing to generate a quantized inference workload that is +able to reproduce exactly the specific quantization behavior that was used +during training. Indeed, the whole point of quantized training is to allow for +both training and inference to perform exactly the same arithmetic, so that the +way that the training process about around quantization inaccuracy is +effectively helping the quantized inference process to be more accurate. + +Given a quantized TensorFlow GraphDef, generating a quantized TensorFlow Lite +flatbuffer is done like this: + +``` +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/some_quantized_graph.pb \ + --output_file=/tmp/foo.lite \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=TFLITE \ + --input_type=QUANTIZED_UINT8 \ + --inference_type=QUANTIZED_UINT8 \ + --input_shape=1,128,128,3 \ + --input_array=input \ + --output_array=MobilenetV1/Predictions/Reshape_1 \ + --mean_value=128 \ + --std_value=127 +``` + +Here, besides changing `--input_file` to point to a (fake-)quantized GraphDef, +the only other changes are: + +* To change `--input_type` and `--inference_type` to `QUANTIZED_UINT8`. This + effectively tells the converter to generate an output file that can take a + quantized uint8 array as input (`--input_type=QUANTIZED_UINT8`), and have + quantized uint8 internal and output arrays as well + (`--inference_type=QUANTIZED_UINT8`). +* To pass `--mean_value` and `--std_value` flags to describe how the quantized + uint8 input array values are to be interpreted as the mathematical real + numbers that the graph is concerned with (keep in mind that even a + "fake-quantized" TensorFlow GraphDef is still technically a float graph). + The meaning of `--mean_value` and `--std_value` is explained in the + command-line reference; it suffices for now to say that they are a property + of each model. + +## Use dummy-quantization to try out quantized inference on a float graph + +Sometimes, one only has a plain float graph, and one is curious as to how much +faster inference might run if one could perform quantized inference instead of +float inference. Rather than requiring users to first invest in quantizing their +graphs before they can evaluate a possible benefit, the converter allows to +simply experiment with what we call "dummy quantization": provide some vaguely +plausible values for the min-max ranges of values in all arrays that do not have +min-max information, so that quantization can carry on, certainly producing +inaccurate results (do not use that in production!) but with performance +characteristics that should be identical to those of an actually quantized +flavor of the model. + +In the present example, we have a model using Relu6 activation functions almost +everywhere, so a reasonable guess is that most activation ranges should be +contained in [0, 6] and roughly comparable to it. + +``` +curl https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_128_frozen.tgz \ + | tar xzv -C /tmp +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/mobilenet_v1_0.50_128/frozen_graph.pb \ + --output_file=/tmp/foo.cc \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=TFLITE \ + --input_type=QUANTIZED_UINT8 \ + --inference_type=QUANTIZED_UINT8 \ + --input_shape=1,128,128,3 \ + --input_array=input \ + --output_array=MobilenetV1/Predictions/Reshape_1 \ + --default_ranges_min=0 \ + --default_ranges_max=6 \ + --mean_value=127.5 \ + --std_value=127.5 +``` + +## Multiple output arrays + +Some models have multiple outputs. Even in a model with only one output, you may +want for the inference code to return the contents of other arrays as well, or +to perform inference on a subgraph with multiple outputs (see the section below +on specifying arbitrary arrays as input/output arrays). + +Either way, using `--output_arrays` instead of `--output_array` allows to +specify a comma-separated list of output arrays. + +``` +curl https://storage.googleapis.com/download.tensorflow.org/models/inception_v1_2016_08_28_frozen.pb.tar.gz \ + | tar xzv -C /tmp +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/inception_v1_2016_08_28_frozen.pb \ + --output_file=/tmp/foo.lite \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=TFLITE \ + --input_type=FLOAT \ + --inference_type=FLOAT \ + --input_shape=1,224,224,3 \ + --input_array=input \ + --output_arrays=InceptionV1/InceptionV1/Mixed_3b/Branch_1/Conv2d_0a_1x1/Relu,InceptionV1/InceptionV1/Mixed_3b/Branch_2/Conv2d_0a_1x1/Relu +``` + +## Multiple input arrays + +Some models have multiple inputs; even in a model with a single input, you may +want for the inference code to implement only a subgraph with multiple inputs +(see the section below on specifying arbitrary arrays as input/output arrays). + +Either way, multiple input arrays are specified by using `--input_arrays` +instead of `--input_array` to specify a comma-separated list of input arrays. In +that case, one also needs to use `--input_shapes` instead of `--input_shape`. +The syntax for `--input_shapes` is a bit trickier, since already the singular +`--input_shape` was a comma-separated list of integers! Multiple input shapes +are delimited by a colon (`:`) in `--input_shapes`. + +``` +curl https://storage.googleapis.com/download.tensorflow.org/models/inception_v1_2016_08_28_frozen.pb.tar.gz \ + | tar xzv -C /tmp +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/inception_v1_2016_08_28_frozen.pb \ + --output_file=/tmp/foo.lite \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=TFLITE \ + --input_type=FLOAT \ + --inference_type=FLOAT \ + --input_shapes=1,28,28,96:1,28,28,16:1,28,28,192:1,28,28,64 \ + --input_arrays=InceptionV1/InceptionV1/Mixed_3b/Branch_1/Conv2d_0a_1x1/Relu,InceptionV1/InceptionV1/Mixed_3b/Branch_2/Conv2d_0a_1x1/Relu,InceptionV1/InceptionV1/Mixed_3b/Branch_3/MaxPool_0a_3x3/MaxPool,InceptionV1/InceptionV1/Mixed_3b/Branch_0/Conv2d_0a_1x1/Relu \ + --output_array=InceptionV1/Logits/Predictions/Reshape_1 +``` + +## Specifying arbitrary arrays in a graph as input or output arrays + +Any array in the input file can be specified as an input or output array. This +allows to use the converter to extract a sub-graph out of the input graph file. +The converter then automatically discards any part of the graph that is not +needed for the subgraph identified by the specified input and output arrays. +Another use case for specifying multiple output arrays is to get inference code +to return the contents of some specified intermediate activations array, not +just the output activations. + +In order to know which array you want to pass as `--input_arrays` / +`--output_arrays`, it helps to have a visualization of the graph. See the +section below on graph visualization. When using graph visualization for that +purpose, make sure to use `--dump_graphviz=` to visualize exactly the graph as +it is in the actual final form being exported to the output file. + +Note that the final representation of an on-device inference workload (say, in +TensorFlow Lite flatbuffers format) tends to have coarser granularity than the +very fine granularity of the TensorFlow GraphDef representation. For example, +while a fully-connected layer is typically represented as at least four separate +ops in TensorFlow GraphDef (Reshape, MatMul, BiasAdd, Relu...), it is typically +represented as a single "fused" op (FullyConnected) in the converter's optimized +representation and in the final on-device representation (e.g. in TensorFlow +Lite flatbuffer format). As the level of granularity gets coarser, some +intermediate arrays (say, the array between the MatMul and the BiasAdd in the +TensorFlow GraphDef) are dropped. When specifying intermediate arrays as +`--input_arrays` / `--output_arrays`, it is generally at least desirable (and +often required) to specify arrays that are meant to survive in the final form of +the graph, after fusing. These are typically the outputs of activation functions +(since everything in each layer until the activation function tends to get +fused). + +Here is an example of extracting just a sub-graph, namely just a single fused +layer, out of a TensorFlow GraphDef, and exporting a TensorFlow GraphDef +containing just that subgraph: + +``` +curl https://storage.googleapis.com/download.tensorflow.org/models/inception_v1_2016_08_28_frozen.pb.tar.gz \ + | tar xzv -C /tmp +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/inception_v1_2016_08_28_frozen.pb \ + --output_file=/tmp/foo.pb \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=TENSORFLOW_GRAPHDEF \ + --input_shapes=1,28,28,96:1,28,28,16:1,28,28,192:1,28,28,64 \ + --input_arrays=InceptionV1/InceptionV1/Mixed_3b/Branch_1/Conv2d_0a_1x1/Relu,InceptionV1/InceptionV1/Mixed_3b/Branch_2/Conv2d_0a_1x1/Relu,InceptionV1/InceptionV1/Mixed_3b/Branch_3/MaxPool_0a_3x3/MaxPool,InceptionV1/InceptionV1/Mixed_3b/Branch_0/Conv2d_0a_1x1/Relu \ + --output_array=InceptionV1/InceptionV1/Mixed_3b/concat_v2 +``` + +## Logging + +### Standard logging + +The converter generates some informative log messages during processing. The +easiest way to view them is to add `--logtostderr` to command lines. For the +previous example, that gives: + +``` +curl https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_128_frozen.tgz \ + | tar xzv -C /tmp +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/mobilenet_v1_0.50_128/frozen_graph.pb \ + --output_file=/tmp/foo.lite \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=TFLITE \ + --input_type=FLOAT \ + --inference_type=FLOAT \ + --input_shape=1,128,128,3 \ + --input_array=input \ + --output_array=MobilenetV1/Predictions/Reshape_1 \ + --logtostderr +``` + +After some initialization messages, we get the following informative messages: + +``` +I1101 21:51:33.297475 5339 graph_transformations.cc:39] Before general graph transformations: 416 operators, 583 arrays (0 quantized) +I1101 21:51:33.308972 5339 graph_transformations.cc:39] After general graph transformations pass 1: 31 operators, 89 arrays (0 quantized) +I1101 21:51:33.309204 5339 graph_transformations.cc:39] Before dequantization graph transformations: 31 operators, 89 arrays (0 quantized) +I1101 21:51:33.309368 5339 allocate_transient_arrays.cc:312] Total transient array allocated size: 1048576 bytes, theoretical optimal value: 786432 bytes. +I1101 21:51:33.309484 5339 toco_tooling.cc:249] Estimated count of arithmetic ops: 0.099218 billion (note that a multiply-add is counted as 2 ops). +``` + +### Verbose logging + +For debugging purposes, the converter supports two levels of verbose logging, +which can be set by passing a `--v=` flag: + +* At `--v=1`, the converter generates text dumps of the graph at various + points during processing, as well as log messages about every graph + transformation that did take place, typically answering questions of the + form "why was my graph transformed in this way"? +* At `--v=2`, the converter additionally generates log messages about graph + transformations that were considered but not actually performed, typically + answering questions of the form "why was my graph NOT transformed when I + expected it would be?". + +### Graph "video" logging + +When `--dump_graphviz=` is used (see the section on Graph visualizations), one +may additionally pass `--dump_graphviz_video`, which causes a graph +visualization to be dumped after each individual graph transformations, often +resulting in thousands of files. Typically, one would then bisect into these +files to understand when a given change was introduced in the graph. + +## Graph visualizations + +The converter is able to export a graph to the GraphViz Dot format, for easy +visualization. Combined with the converter's ability to transform the graph into +a simpler, coarser-granularity representation, that makes it a very powerful +visualization tool. + +There are two ways to get the converter to export a GraphViz Dot file, +corresponding to two separate use cases. Understanding the difference between +them is key to getting useful graph visualizations. + +### Using `--output_format=GRAPHVIZ_DOT` + +The first way to get a graphviz rendering is to pass +`--output_format=GRAPHVIZ_DOT`, instead of the `--output_format` that you would +otherwise use. This says: "I just want to get a plausible visualization of that +graph". The upside is that it makes for very simple command lines, and makes the +converter very lax about aspects of the graph or the command line that it would +otherwise complain about. Example: + +``` +curl https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_128_frozen.tgz \ + | tar xzv -C /tmp +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/mobilenet_v1_0.50_128/frozen_graph.pb \ + --output_file=/tmp/foo.dot \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=GRAPHVIZ_DOT \ + --input_shape=1,128,128,3 \ + --input_array=input \ + --output_array=MobilenetV1/Predictions/Reshape_1 +``` + +The resulting `.dot` file can be rendered into a PDF as follows: + +``` +dot -Tpdf -O /tmp/foo.dot +``` + +And the resulting `.dot.pdf` can be viewed in any PDF viewer, but we suggest one +with a good ability to pan and zoom across a very large page; Google Chrome does +well in that respect. + +``` +google-chrome /tmp/foo.dot.pdf +``` + +Example PDF files are viewable online in the next section. + +### Using `--dump_graphviz=` + +The second way to get a graphviz rendering is to pass a `--dump_graphviz=` flag +specifying a destination directory to dump GraphViz rendering to. Unlike the +previous approach, this one allows you to keep your real command-line (with your +real `--output_format` and other flags) unchanged, just appending a +`--dump_graphviz=` flag to it. This says: "I want visualizations of the actual +graph during this specific conversion process". Example: + +``` +curl https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_128_frozen.tgz \ + | tar xzv -C /tmp +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/mobilenet_v1_0.50_128/frozen_graph.pb \ + --output_file=/tmp/foo.lite \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=TFLITE \ + --input_type=FLOAT \ + --inference_type=FLOAT \ + --input_shape=1,128,128,3 \ + --input_array=input \ + --output_array=MobilenetV1/Predictions/Reshape_1 \ + --dump_graphviz=/tmp +``` + +This generates a few files in the destination directory, here `/tmp`. Most +important are these two files: + +``` +/tmp/toco_AT_IMPORT.dot +/tmp/toco_AFTER_TRANSFORMATIONS.dot +``` + +`toco_AT_IMPORT.dot` represents the graph as it was imported from +`--input_file`, before any transformation was applied to it (besides some +transformations that are applied immediately while importing). This tends to be +a complex visualization with limited information, but is useful especially in +situations where a conversion command fails (this file is generated even if the +conversion subsequently fails). + +`toco_AFTER_TRANSFORMATIONS.dot` represents the graph after all transformations +were applied to it, just before it was exported to the `--output_file`. +Typically, this is a much smaller graph, and it conveys much more information +about each node. + +Again, these can be rendered to PDFs: + +``` +dot -Tpdf -O /tmp/toco_*.dot +``` + +The resulting files can be seen here: + +* [toco_AT_IMPORT.dot.pdf](https://storage.googleapis.com/download.tensorflow.org/example_images/toco_AT_IMPORT.dot.pdf) +* [toco_AFTER_TRANSFORMATIONS.dot.pdf](https://storage.googleapis.com/download.tensorflow.org/example_images/toco_AFTER_TRANSFORMATIONS.dot.pdf). + +### Legend for the graph visualizations + +* Operators are red square boxes with the following hues of red: + * Most operators are + bright + red. + * Some typically heavy operators (e.g. Conv) are rendered in a + darker + red. +* Arrays are octogons with the following colors: + * Constant arrays are + blue. + * Activation arrays are gray: + * Internal (intermediate) activation arrays are + light + gray. + * Those activation arrays that are designated as `--input_arrays` or + `--output_arrays` are + dark + gray. + * RNN state arrays are green. Because of the way that the converter + represents RNN back-edges explicitly, each RNN state is represented by a + pair of green arrays: + * The activation array that is the source of the RNN back-edge (i.e. + whose contents are copied into the RNN state array after having been + computed) is + light + green. + * The actual RNN state array is + dark + green. It is the destination of the RNN back-edge updating + it. diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md b/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md new file mode 100644 index 0000000000..cc6d416959 --- /dev/null +++ b/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md @@ -0,0 +1,238 @@ +# TensorFlow Lite Optimizing Converter command-line reference + +This page is complete reference of command-line flags. It is complemented by the +following other documents: + +* [README](../README.md) +* [Command-line examples](cmdline_examples.md) + +Table of contents: + +[TOC] + +## High-level overview + +A full list and detailed specification of all flags is given in the next +section. For now we focus on a higher-level description of command lines: + +``` +toco \ + --input_format=... \ + --output_format=... \ + --input_file=... \ + --output_file=... \ + [model flags...] \ + [transformation flags...] \ + [logging flags...] +``` + +In other words, the converter requires at least the following mandatory flags: +`--input_format`, `--output_format`, `--input_file`, `--output_file`. Depending +on the input and output formats, additional flags may be allowed or mandatory: + +* *Model flags* provide additional information about the model stored in the + input file. + * `--output_array` or `--output_arrays` specify which arrays in the input + file are to be considered the output activations. + * `--input_array` or `--input_arrays` specify which arrays in the input + file are to be considered the input activations. + * `--input_shape` or `--input_shapes` specify the shapes of the input + arrays. + * `--mean_value` or `--mean_values`, and `--std_value` or `--std_values`, + give the dequantization parameters of the input arrays, for the case + when the output file will accept quantized input arrays. +* *Transformation flags* specify options of the transformations to be applied + to the graph, i.e. they specify requested properties that the output file + should have. + * `--input_type` specifies the type that the input arrays should have + after transformations, in the output file. This is where you choose + whether you want runtime inference code to accept float or quantized + inputs. This flag only applies to float or quantized inputs, and allows + to convert between the two. This flag has no effect on all other types + of inputs, such as ordinary integer arrays. + * `--inference_type` or `--inference_types` specify the type that generic + intermediate and output activation arrays should have after + transformations, in the output file. This is where you choose whether + you want runtime inference code to perform float or quantized inference + arithmetic. + * Some transformation flags allow to carry on with quantization when the + input graph is not properly quantized: `--default_ranges_min`, + `--default_ranges_max`, `--drop_fake_quant`, + `--reorder_across_fake_quant`. +* *Logging flags* described below. + +## Command-line flags complete reference + +### Mandatory flags + +* `--input_format`. Type: string. Specifies the format of the input file. + Allowed values: + * `TENSORFLOW_GRAPHDEF` — The TensorFlow GraphDef format. Both + binary and text proto formats are allowed. + * `TFLITE` — The TensorFlow Lite flatbuffers format. +* `--output_format`. Type: string. Specifies the format of the output file. + Allowed values: + * `TENSORFLOW_GRAPHDEF` — The TensorFlow GraphDef format. Always + produces a file in binary (not text) proto format. + * `TFLITE` — The TensorFlow Lite flatbuffers format. + * Whether a float or quantized TensorFlow Lite file will be produced + depends on the `--inference_type` flag. + * Whether the produced TensorFlow Lite file will accept a float or + quantized input depends on the `--input_type` flag. + * `GRAPHVIZ_DOT` — The GraphViz `.dot` format. This asks the + converter to generate a reasonable graphical representation of the graph + after simplification by a generic set of transformation. + * A typical `dot` command line to view the resulting graph might look + like: `dot -Tpdf -O file.dot`. + * Note that since passing this `--output_format` means losing the + information of which output format you actually care about, and + since the converter's transformations depend on the specific output + format, the resulting visualization may not fully reflect what you + would get on the actual output format that you are using. To avoid + that concern, and generally to get a visualization of exactly what + you get in your actual output format as opposed to just a merely + plausible visualization of a model, consider using `--dump_graphviz` + instead and keeping your true `--output_format`. +* `--input_file`. Type: string. Specifies the path of the input file. This may + be either an absolute or a relative path. +* `--output_file`. Type: string. Specifies the path of the output file. + +### Model flags + +* `--output_array`. Type: string. Specifies a single array as the output + activations. Incompatible with `--output_arrays`. +* `--output_arrays`. Type: comma-separated list of strings. Specifies a list + of arrays as the output activations, for models with multiple outputs. + Incompatible with `--output_array`. +* `--input_array`. Type: string. Specifies a single array as the input + activations. Incompatible with `--input_arrays`. +* `--input_arrays`. Type: comma-separated list of strings. Specifies a list of + arrays as the input activations, for models with multiple inputs. + Incompatible with `--input_array`. + +When `--input_array` is used, the following flags are available to provide +additional information about the single input array: + +* `--input_shape`. Type: comma-separated list of integers. Specifies the shape + of the input array, in TensorFlow convention: starting with the outer-most + dimension (the dimension corresponding to the largest offset stride in the + array layout), ending with the inner-most dimension (the dimension along + which array entries are typically laid out contiguously in memory). + * For example, a typical vision model might pass + `--input_shape=1,60,80,3`, meaning a batch size of 1 (no batching), an + input image height of 60, an input image width of 80, and an input image + depth of 3, for the typical case where the input image is a RGB bitmap + (3 channels, depth=3) stored by horizontal scanlines (so 'width' is the + next innermost dimension after 'depth'). +* `--mean_value` and `--std_value`. Type: floating-point. The decimal point + character is always the dot (`.`) regardless of the locale. These specify + the (de-)quantization parameters of the input array, to use when the output + file will take a quantized input array (that is, when passing + `--input_type=QUANTIZED_UINT8`). + * The meaning of mean_value and std_value is as follows: each quantized + value in the quantized input array will be interpreted as a mathematical + real number (i.e. as an input activation value) according to the + following formula: + * `real_value = (quantized_input_value - mean_value) / std_value`. + * When performing float inference (`--inference_type=FLOAT`) on a + quantized input, the quantized input would be immediately dequantized by + the inference code according to the above formula, before proceeding + with float inference. + * When performing quantized inference + (`--inference_type=QUANTIZED_UINT8`), no dequantization is ever to be + performed by the inference code; however, the quantization parameters of + all arrays, including those of the input arrays as specified by + mean_value and std_value, all participate in the determination of the + fixed-point multipliers used in the quantized inference code. + +When `--input_arrays` is used, the following flags are available to provide +additional information about the multiple input arrays: + +* `--input_shapes`. Type: colon-separated list of comma-separated lists of + integers. Each comma-separated list of integer gives the shape of one of the + input arrays specified in `--input_arrays`, in the same order. See + `--input_shape` for details. + * Example: `--input_arrays=foo,bar --input_shapes=2,3:4,5,6` means that + there are two input arrays. The first one, "foo", has shape [2,3]. The + second one, "bar", has shape [4,5,6]. +* `--mean_values`, `--std_values`. Type: comma-separated lists of + floating-point numbers. Each number gives the corresponding value for one of + the input arrays specified in `--input_arrays`, in the same order. See + `--mean_value`, `--std_value` for details. + +### Transformation flags + +* `--input_type`. Type: string. Specifies what should be the type of the + entries in the input array(s) in the output file, after transformations, for + those input arrays that are originally either floating-point or quantized + real numbers in the input file. If there are multiple such input arrays, + then they all use this type. Input arrays of other types, such as arrays of + plain integers or strings, are not concerned with this flag. Allowed values: + * `FLOAT` — Keep floating-point input arrays as such. Dequantize any + quantized input array. entries ("float32"). + * `QUANTIZED_UINT8` — Quantize floating-point input arrays, to have + 8-bit unsigned integer entries. The quantization params are specified by + `--mean_value`, `--std_value` flags as explained in the documentation of + these flags. +* `--inference_type`. Type: string. Specifies what to do with floating-point + arrays found in the input file, besides input arrays. In other words, this + controls the possible quantization of floating-point weights, intermediate + activations, and output activations. Has no effect on arrays that aren't + floating-point in the input file. Allowed values: + * `FLOAT` — Keep floating-point arrays as floating-point in the + output file. This corresponds to what is commonly called "floating-point + inference". + * `QUANTIZED_UINT8` — Quantize floating-point arrays, changing their + storage data type from float to some integer type: + * All float activations are quantized as `uint8`. + * Almost all float weights are quantized as `uint8`. + * A few exceptions exist. In particular, the bias-vectors in + "Conv" and "FullyConnected" layers are quantized as `int32` + instead for technical reasons. +* `--default_ranges_min`, `--default_ranges_max`. Type: floating-point. The + decimal point character is always the dot (`.`) regardless of the locale. + These flags enable what is called "dummy quantization". If defined, their + effect is to define fallback (min, max) range values for all arrays that do + not have a properly specified (min, max) range in the input file, thus + allowing to proceed with quantization of non-quantized or + incorrectly-quantized input files. This enables easy performance prototyping + ("how fast would my model run if I quantized it?") but should never be used + in production as the resulting quantized arithmetic is inaccurate. +* `--drop_fake_quant`. Type: boolean. Default: false. Causes fake-quantization + nodes to be dropped from the graph. This may be used to recover a plain + float graph from a fake-quantized graph. +* `--reorder_across_fake_quant`. Type: boolean. Default: false. Normally, + fake-quantization nodes must be strict boundaries for graph transformations, + in order to ensure that quantized inference has the exact same arithmetic + behavior as quantized training --- which is the whole point of quantized + training and of FakeQuant nodes in the first place. However, that entails + subtle requirements on where exactly FakeQuant nodes must be placed in the + graph. Some quantized graphs have FakeQuant nodes at unexpected locations, + that prevent graph transformations that are necessary in order to generate a + well-formed quantized representation of these graphs. Such graphs should be + fixed, but as a temporary work-around, setting this + reorder_across_fake_quant flag allows the converter to perform necessary + graph transformaitons on them, at the cost of no longer faithfully matching + inference and training arithmetic. + +### Logging flags + +The following are standard Google logging flags: + +* `--logtostderr` redirects Google logging to standard error, typically making + it visible in a terminal. +* `--v` sets verbose logging levels (for debugging purposes). Defined levels: + * `--v=1`: log all graph transformations that did make a change on the + graph. + * `--v=2`: log all graph transformations that did *not* make a change on + the graph. + +The following flags allow to generate graph visualizations of the actual graph +at various points during transformations: + +* `--dump_graphviz=/path` enables dumping of the graphs at various stages of + processing as GraphViz `.dot` files. Generally preferred over + `--output_format=GRAPHVIZ_DOT` as this allows you to keep your actually + relevant `--output_format`. +* `--dump_graphviz_video` enables dumping of the graph after every single + graph transformation (for debugging purposes). diff --git a/tensorflow/contrib/lite/toco/g3doc/python_api.md b/tensorflow/contrib/lite/toco/g3doc/python_api.md new file mode 100644 index 0000000000..440f9c367c --- /dev/null +++ b/tensorflow/contrib/lite/toco/g3doc/python_api.md @@ -0,0 +1,62 @@ +# TensorFlow Lite Optimizing Converter (TOCO) Python API reference + +## High-level overview + +While the TensorFlow Lite Optimizing Converter can be used from the command +line, it is often convenient to use it as part of Python model build and +training script. This is so that conversion can be part of your model +development pipeline. This allows you to know early and often that you are +designing a model that can be targeted to devices with mobile. + +## API + +In Python you can run `help(tf.contrib.lite)` to get documentation on functions. +In particular, `tf.contrib.lite.toco_convert` presents a simple API and +`tf.contrib.lite.toco_from_protos` allows more detailed control of TOCO using +the protobuf interface to TOCO. + +## Example + +In particular, here we show creating a simple model and converting it to a +TensorFlow Lite Model. + +```python +import tensorflow as tf + +img = tf.placeholder(name="img", dtype=tf.float32, shape=(1, 64, 64, 3)) +val = img + tf.constant([1., 2., 3.]) + tf.constant([1., 4., 4.]) +out = tf.identity(val, name="out") +with tf.Session() as sess: + tflite_model = tf.contrib.lite.toco_convert(sess.graph_def, [img], [out]) + open("test.tflite", "wb").write(tflite_modeL) +``` + +**NOTE** Currently, the TOCO command will cause a fatal error to the Python +interpreter when TOCO conversion fails. This will be remedied as soon as +possible. + +## Example 2: Export with variables + +If a model has variables, they need to be turned into constants. This process is +known as freezing, and it can actually be accomplished with + +```python +import tensorflow as tf + +img = tf.placeholder(name="img", dtype=tf.float32, shape=(1, 64, 64, 3)) +var = tf.get_variable("weights", dtype=tf.float32, shape=(1,64,64,3)) +val = img + var + +def canonical_name(x): + return x.name.split(":")[0] + +out = tf.identity(val, name="out") +with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + out_tensors = [out] + frozen_graphdef = tf.graph_util.convert_variables_to_constants( + sess, sess.graph_def, map(canonical_name, out_tensors)) + tflite_model = tf.contrib.lite.toco_convert( + frozen_graphdef, [img], out_tensors) + open("converted_model.tflite", "wb").write(tflite_model) +``` -- GitLab From fdf82fc5c26c848749b16e860925b92ed2d3d727 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Tue, 14 Nov 2017 11:45:24 -0800 Subject: [PATCH 0403/1801] Comment about python2 compatibility issue --- tensorflow/contrib/lite/README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md index b173936f5b..0c511f753a 100644 --- a/tensorflow/contrib/lite/README.md +++ b/tensorflow/contrib/lite/README.md @@ -77,6 +77,11 @@ Run bazel with the following command to build the demo. Build the demo app: bazel build --cxxopt='--std=c++11' //tensorflow/contrib/lite/java/demo/app/src/main:TfLiteCameraDemo +### Note + +Currently, we only support building the Android demo app within a Python 2 +environment (due to a Bazel bug). + ### More about the demo The demo is resizing each camera image frame to (224 width * 224 height) to match the quantized Mobilenet model being used. The resized image is converted into a ByteBuffer row by row of size 1 * 224 * 224 * 3 bytes, where 1 is the number of images in a batch 224 * 224 is the width and height of the image 3 bytes represents three colors of a pixel. This demo uses the TensorFlow Lite Java inference API for models which take a single input and provide a single output. This outputs a two-dimensional array, with the first dimension being the category index and the second dimension being the confidence of classification. The Mobilenet model has 1001 unique categories and the app sorts the probabilities of all the categories and displays the top three. The Mobilenet quantized model is bundled within the assets directory of the app. -- GitLab From a2c3dab386857cd4fe63990c6bb3aa791e3fcaf3 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 14 Nov 2017 11:01:11 -0800 Subject: [PATCH 0404/1801] Tape stack in C++ instead of python. PiperOrigin-RevId: 175704617 --- tensorflow/python/eager/pywrap_tfe.h | 49 +++++---- tensorflow/python/eager/pywrap_tfe_src.cc | 121 ++++++++++++++++++---- tensorflow/python/eager/tape.py | 121 +++------------------- tensorflow/python/pywrap_tfe.i | 15 +-- 4 files changed, 153 insertions(+), 153 deletions(-) diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h index a67519f9a2..f96245f7a5 100644 --- a/tensorflow/python/eager/pywrap_tfe.h +++ b/tensorflow/python/eager/pywrap_tfe.h @@ -87,22 +87,36 @@ TFE_TensorHandle* EagerTensor_Handle(const PyObject* o); // newly created type, or nullptr on error. PyObject* TFE_Py_InitEagerTensor(PyObject* base_class); -PyObject* TFE_Py_NewTape(); -PyObject* TFE_Py_TapeShouldRecord(PyObject* py_tape, PyObject* tensors); -void TFE_Py_TapeWatch(PyObject* tape, tensorflow::int64 tensor_id); -void TFE_Py_TapeDeleteTrace(PyObject* tape, tensorflow::int64 tensor_id); - -// Records an operation in the gradient tape. `tape` should point to an object -// returned by TFE_Py_NewTape. op_type is a string for the operation type, used -// in the backprop code. output_tensors should be a list of python ops.Tensor -// objects. input_tensor_ids should be a list of python integers with the ids of -// the input tensors of the recorded operation. backward_function should be the -// function to be called during backprop to, given the gradients of the output -// tensors, produce the gradients of the input tensors. -void TFE_Py_TapeRecordOperation(PyObject* tape, PyObject* op_type, - PyObject* output_tensors, - PyObject* input_tensor_ids, - PyObject* backward_function); +// Pushes a new tape into the thread-local stack. +void TFE_Py_TapeStackPushNew(); + +// Pops the tape from the top of the stack and returns it. +PyObject* TFE_Py_TapeStackPop(); + +// Pushes an existing tape onto the stack. +void TFE_Py_TapeStackPush(PyObject* tape); + +// Returns true if the tape stack is empty. +PyObject* TFE_Py_TapeStackIsEmpty(); + +PyObject* TFE_Py_TapeStackShouldRecord(PyObject* tensors); +void TFE_Py_TapeStackWatch(PyObject* tensor); +void TFE_Py_TapeStackDeleteTrace(tensorflow::int64 tensor_id); + +// Records an operation in the gradient tape stack.type is a string for the +// operation type, used in the backprop code. output_tensors should be a list of +// python ops.Tensor objects. input_tensor_ids should be a list of python +// integers with the ids of the input tensors of the recorded +// operation. backward_function should be the function to be called during +// backprop to, given the gradients of the output tensors, produce the gradients +// of the input tensors. +void TFE_Py_TapeStackRecordOperation(PyObject* op_type, + PyObject* output_tensors, + PyObject* input_tensor_ids, + PyObject* backward_function); + +// Watches the given variable object on the given tape. +void TFE_Py_TapeStackWatchVariable(PyObject* variable); // Computes a gradient based on information recorded on the tape.`tape` must // have been produced by TFE_Py_NewTape. `vspace` must be a @@ -114,9 +128,6 @@ PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, PyObject* target, PyObject* sources, PyObject* output_gradients, TF_Status* status); -// Watches the given variable object on the given tape. -void TFE_Py_TapeWatchVariable(PyObject* tape, PyObject* variable); - // Returns the set of variables watched by the given tape. PyObject* TFE_Py_TapeWatchedVariables(PyObject* tape); diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 5cb1313c4b..387eec1358 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include + #include "tensorflow/python/eager/pywrap_tfe.h" #include "tensorflow/c/c_api.h" @@ -525,12 +527,65 @@ static PyTypeObject TFE_Py_Tape_Type = { "TFE_Py_Tape objects", /* tp_doc */ }; -PyObject* TFE_Py_NewTape() { +// xcode 7 doesn't define thread_local, so for compatibility we implement our +// own. TODO(apassos) remove once we can deprecate xcode 7. +#ifndef __APPLE__ +thread_local std::vector* tape_stack = nullptr; +std::vector* GetTapeStack() { + if (tape_stack == nullptr) { + tape_stack = new std::vector; + } + return tape_stack; +} +#else +static tensorflow::mutex stack_mu(tensorflow::LINKER_INITIALIZED); +static std::unordered_map*>* + tape_stack GUARDED_BY(stack_mu) = nullptr; +std::vector* GetTapeStack() { + tensorflow::mutex_lock ml(stack_mu); + if (tape_stack == nullptr) { + tape_stack = + new std::unordered_map*>; + } + auto it = tape_stack->find(std::this_thread::get_id()); + if (it != tape_stack->end()) { + return it->second; + } + return tape_stack + ->emplace(std::this_thread::get_id(), new std::vector) + .first->second; +} +#endif + +void TFE_Py_TapeStackPushNew() { TFE_Py_Tape_Type.tp_new = PyType_GenericNew; - if (PyType_Ready(&TFE_Py_Tape_Type) < 0) return nullptr; + if (PyType_Ready(&TFE_Py_Tape_Type) < 0) return; TFE_Py_Tape* tape = PyObject_NEW(TFE_Py_Tape, &TFE_Py_Tape_Type); tape->tape = new GradientTape(); - return reinterpret_cast(tape); + GetTapeStack()->push_back(tape); +} + +void TFE_Py_TapeStackPush(PyObject* tape) { + Py_INCREF(tape); + GetTapeStack()->push_back(reinterpret_cast(tape)); +} + +PyObject* TFE_Py_TapeStackIsEmpty() { + if (GetTapeStack()->empty()) { + Py_RETURN_TRUE; + } + Py_RETURN_FALSE; +} + +PyObject* TFE_Py_TapeStackPop() { + auto* stack = GetTapeStack(); + if (stack->empty()) { + PyErr_SetString(PyExc_RuntimeError, "tape stack is empty."); + return nullptr; + } + TFE_Py_Tape* top = stack->back(); + stack->pop_back(); + return reinterpret_cast(top); } static std::vector MakeIntList(PyObject* list) { @@ -557,10 +612,14 @@ static std::vector MakeIntList(PyObject* list) { return tensor_ids; } -PyObject* TFE_Py_TapeShouldRecord(PyObject* py_tape, PyObject* tensors) { +PyObject* TFE_Py_TapeStackShouldRecord(PyObject* tensors) { if (tensors == Py_None) { Py_RETURN_FALSE; } + auto* stack = GetTapeStack(); + if (stack->empty()) { + Py_RETURN_FALSE; + } PyObject* seq = PySequence_Fast(tensors, "expected a sequence"); if (seq == nullptr) { return nullptr; @@ -575,16 +634,22 @@ PyObject* TFE_Py_TapeShouldRecord(PyObject* py_tape, PyObject* tensors) { tensor_ids.push_back(FastTensorId(item)); } Py_DECREF(seq); - TFE_Py_Tape* tape = reinterpret_cast(py_tape); - if (tape->tape->ShouldRecord(tensor_ids)) { - Py_RETURN_TRUE; - } else { - Py_RETURN_FALSE; + for (TFE_Py_Tape* tape : *stack) { + if (tape->tape->ShouldRecord(tensor_ids)) { + Py_RETURN_TRUE; + } } + Py_RETURN_FALSE; } -void TFE_Py_TapeWatch(PyObject* tape, tensorflow::int64 tensor_id) { - reinterpret_cast(tape)->tape->Watch(tensor_id); +void TFE_Py_TapeStackWatch(PyObject* tensor) { + tensorflow::int64 tensor_id = FastTensorId(tensor); + if (PyErr_Occurred()) { + return; + } + for (TFE_Py_Tape* tape : *GetTapeStack()) { + tape->tape->Watch(tensor_id); + } } static tensorflow::eager::TapeTensor TapeTensorFromTensor(PyObject* tensor) { @@ -646,8 +711,10 @@ std::vector MakeTensorIDList(PyObject* tensors) { return list; } -void TFE_Py_TapeWatchVariable(PyObject* tape, PyObject* variable) { - reinterpret_cast(tape)->tape->WatchVariable(variable); +void TFE_Py_TapeStackWatchVariable(PyObject* variable) { + for (TFE_Py_Tape* tape : *GetTapeStack()) { + tape->tape->WatchVariable(variable); + } } PyObject* TFE_Py_TapeWatchedVariables(PyObject* tape) { @@ -661,10 +728,14 @@ PyObject* TFE_Py_TapeWatchedVariables(PyObject* tape) { return result; } -void TFE_Py_TapeRecordOperation(PyObject* tape, PyObject* op_type, - PyObject* output_tensors, - PyObject* input_tensors, - PyObject* backward_function) { +void TFE_Py_TapeStackRecordOperation(PyObject* op_type, + PyObject* output_tensors, + PyObject* input_tensors, + PyObject* backward_function) { + auto* stack = GetTapeStack(); + if (stack->empty()) { + return; + } std::vector input_ids = MakeTensorIDList(input_tensors); std::vector output_info; PyObject* seq = PySequence_Fast(output_tensors, @@ -697,14 +768,18 @@ void TFE_Py_TapeRecordOperation(PyObject* tape, PyObject* op_type, return; } - Py_INCREF(backward_function); - reinterpret_cast(tape)->tape->RecordOperation( - op_type_str, output_info, input_ids, backward_function, - [backward_function]() { Py_DECREF(backward_function); }); + for (TFE_Py_Tape* tape : *stack) { + Py_INCREF(backward_function); + tape->tape->RecordOperation( + op_type_str, output_info, input_ids, backward_function, + [backward_function]() { Py_DECREF(backward_function); }); + } } -void TFE_Py_TapeDeleteTrace(PyObject* tape, tensorflow::int64 tensor_id) { - reinterpret_cast(tape)->tape->DeleteTrace(tensor_id); +void TFE_Py_TapeStackDeleteTrace(tensorflow::int64 tensor_id) { + for (TFE_Py_Tape* tape : *GetTapeStack()) { + tape->tape->DeleteTrace(tensor_id); + } } class PyVSpace : public tensorflow::eager::VSpace { diff --git a/tensorflow/python/eager/tape.py b/tensorflow/python/eager/tape.py index fb6b62a3e0..440c84b7ea 100644 --- a/tensorflow/python/eager/tape.py +++ b/tensorflow/python/eager/tape.py @@ -18,106 +18,24 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import collections import contextlib -import threading from tensorflow.python import pywrap_tensorflow -def tid(tensor): - return tensor._id # pylint: disable=protected-access - - -class TapeEntry( - collections.namedtuple("TapeEntry", [ - "op_type", - "output_ids", "input_ids", "backward_function", - "output_shape_and_dtype", - ])): - """Entry in the gradient tape. - - Represents the execution of one op or function, with instructions for doing - its backward pass and useful information for it. - - Args: - output_ids: tensor_id(t) for each output tensor T - input_ids: tensor_id(t) for each input tensor T - backward_function: function to be called with the downstream gradients and - side outputs as arguments which computes the backward pass. - output_shape_and_dtype: a list of (shape_tuple, dtype) for every output - tensor_id - """ - - -def _tensor_shape(t): - return t._shape_tuple() # pylint: disable=protected-access - - class Tape(object): """Represents a gradient propagation trace.""" - def __init__(self): - self._tape = pywrap_tensorflow.TFE_Py_NewTape() - - def should_record(self, tensors): - """Returns true if any tensor should be recorded. - - Args: - tensors: some tensors. - - Returns: - True if any of the tensors is in the tape. - """ - return pywrap_tensorflow.TFE_Py_TapeShouldRecord( - self._tape, tensors) - - def watch(self, tensor): - """Adds a tensor to the tape.""" - pywrap_tensorflow.TFE_Py_TapeWatch(self._tape, tid(tensor)) - - def watch_variable(self, v): - pywrap_tensorflow.TFE_Py_TapeWatchVariable(self._tape, v) + def __init__(self, tape): + self._tape = tape def watched_variables(self): return pywrap_tensorflow.TFE_Py_TapeWatchedVariables(self._tape) - def record_operation(self, op_type, output_tensors, input_tensors, - backward_function): - """Records an operation in the tape.""" - pywrap_tensorflow.TFE_Py_TapeRecordOperation( - self._tape, - op_type, - output_tensors, - input_tensors, - backward_function) - - def _delete_tensor_id(self, i): - pywrap_tensorflow.TFE_Py_TapeDeleteTrace(self._tape, i) - - def delete_trace(self, tensor_id): - """Deletes any trace we have for this tensor.""" - self._delete_tensor_id(tensor_id) - - -class _TapeStack(threading.local): - - def __init__(self): - super(_TapeStack, self).__init__() - self._stack = [] - - @property - def stack(self): - return self._stack - - -# The global tape stack. -_tape_stack = _TapeStack() - def push_new_tape(): """Pushes a new tape onto the tape stack.""" - _tape_stack.stack.append(Tape()) + pywrap_tensorflow.TFE_Py_TapeStackPushNew() def watch(tensor): @@ -126,8 +44,7 @@ def watch(tensor): Args: tensor: tensor to be watched. """ - for t in _tape_stack.stack: - t.watch(tensor) + pywrap_tensorflow.TFE_Py_TapeStackWatch(tensor) def watch_variable(variable): @@ -136,48 +53,42 @@ def watch_variable(variable): Args: variable: variable to be watched. """ - for t in _tape_stack.stack: - t.watch_variable(variable) + pywrap_tensorflow.TFE_Py_TapeStackWatchVariable(variable) def pop_tape(): """Pops the top tape in the stack, if any.""" - if _tape_stack.stack: - return _tape_stack.stack.pop() - return None + return Tape(pywrap_tensorflow.TFE_Py_TapeStackPop()) @contextlib.contextmanager def stop_recording(): - old = _tape_stack.stack - _tape_stack._stack = [] # pylint: disable=protected-access + stack = [] + while not pywrap_tensorflow.TFE_Py_TapeStackIsEmpty(): + stack.append(pop_tape()._tape) # pylint: disable=protected-access try: yield finally: - _tape_stack._stack = old # pylint: disable=protected-access + for tape in reversed(stack): + pywrap_tensorflow.TFE_Py_TapeStackPush(tape) def should_record(tensors): """Returns true if any tape in the stack watches any of these tensors.""" - if not _tape_stack.stack: - return False - return any(x.should_record(tensors) for x in _tape_stack.stack) + return pywrap_tensorflow.TFE_Py_TapeStackShouldRecord(tensors) def record_operation(op_type, output_tensors, input_tensors, backward_function): """Records the operation on all tapes in the stack.""" - for t in _tape_stack.stack: - t.record_operation(op_type, output_tensors, - input_tensors, - backward_function) + pywrap_tensorflow.TFE_Py_TapeStackRecordOperation( + op_type, output_tensors, input_tensors, backward_function) def delete_trace(tensor_id): """Deletes traces for this Tensor from all tapes in the stack.""" - for t in _tape_stack.stack: - t.delete_trace(tensor_id) + pywrap_tensorflow.TFE_Py_TapeStackDeleteTrace(tensor_id) def could_possibly_record(): """Returns True if any tape is active.""" - return len(_tape_stack.stack) > 0 # pylint: disable=g-explicit-length-test + return not pywrap_tensorflow.TFE_Py_TapeStackIsEmpty() diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i index 5ca0e57286..82b154164e 100644 --- a/tensorflow/python/pywrap_tfe.i +++ b/tensorflow/python/pywrap_tfe.i @@ -24,13 +24,16 @@ limitations under the License. %rename("%s") TFE_Py_RegisterExceptionClass; %rename("%s") TFE_Py_Execute; %rename("%s") TFE_Py_UID; -%rename("%s") TFE_Py_NewTape; -%rename("%s") TFE_Py_TapeShouldRecord; -%rename("%s") TFE_Py_TapeWatch; -%rename("%s") TFE_Py_TapeDeleteTrace; -%rename("%s") TFE_Py_TapeRecordOperation; +%rename("%s") TFE_Py_TapeStackPushNew; +%rename("%s") TFE_Py_TapeStackPush; +%rename("%s") TFE_Py_TapeStackPop; +%rename("%s") TFE_Py_TapeStackIsEmpty; +%rename("%s") TFE_Py_TapeStackShouldRecord; +%rename("%s") TFE_Py_TapeStackWatch; +%rename("%s") TFE_Py_TapeStackDeleteTrace; +%rename("%s") TFE_Py_TapeStackRecordOperation; +%rename("%s") TFE_Py_TapeStackWatchVariable; %rename("%s") TFE_Py_TapeGradient; -%rename("%s") TFE_Py_TapeWatchVariable; %rename("%s") TFE_Py_TapeWatchedVariables; %rename("%s") TFE_NewContextOptions; %rename("%s") TFE_ContextOptionsSetConfig; -- GitLab From 6408777b225b742720fb6575addd3643fc57f0b1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Nov 2017 11:57:40 -0800 Subject: [PATCH 0405/1801] [tpu:profiler] Add matrix unit utilization to TfOpStats. PiperOrigin-RevId: 175713542 --- tensorflow/contrib/tpu/profiler/tf_op_stats.proto | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto index d8ee243790..2d2207a43f 100644 --- a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto +++ b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto @@ -124,4 +124,6 @@ message TfOpStats { optional LoopingResult looping = 4; // The result for the HloExtraInfoMap. optional HloExtraInfoMapResult hlo_extrainfo_map = 5; + // Overall matrix unit utilization in percentage. + optional double matrix_unit_utilization_percent = 6; } -- GitLab From 7a346347ee5a4078e2bd1cca00247c4219af326c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Nov 2017 12:02:21 -0800 Subject: [PATCH 0406/1801] Adding an optimizer module for preconditioned stochastic gradient langevin dynamics. PiperOrigin-RevId: 175714379 --- tensorflow/contrib/bayesflow/BUILD | 21 ++ tensorflow/contrib/bayesflow/__init__.py | 3 +- .../kernel_tests/sgld_optimizer_test.py | 209 +++++++++++++++++ .../bayesflow/python/ops/optimizers.py | 34 +++ .../bayesflow/python/ops/sgld_optimizer.py | 216 ++++++++++++++++++ 5 files changed, 482 insertions(+), 1 deletion(-) create mode 100644 tensorflow/contrib/bayesflow/python/kernel_tests/sgld_optimizer_test.py create mode 100644 tensorflow/contrib/bayesflow/python/ops/optimizers.py create mode 100644 tensorflow/contrib/bayesflow/python/ops/sgld_optimizer.py diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index f92b57869e..9f3650e8f9 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -160,6 +160,27 @@ cuda_py_test( ], ) +cuda_py_test( + name = "sgld_optimizer_test", + size = "small", + srcs = ["python/kernel_tests/sgld_optimizer_test.py"], + additional_deps = [ + ":bayesflow_py", + "//third_party/py/numpy", + "//tensorflow/contrib/distributions:distributions_py", + "//tensorflow/contrib/layers:layers_py", + "//tensorflow/python/ops/distributions", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:gradients", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:random_seed", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/bayesflow/__init__.py b/tensorflow/contrib/bayesflow/__init__.py index beaf6f1854..a638753f2f 100644 --- a/tensorflow/contrib/bayesflow/__init__.py +++ b/tensorflow/contrib/bayesflow/__init__.py @@ -27,6 +27,7 @@ from tensorflow.contrib.bayesflow.python.ops import halton_sequence from tensorflow.contrib.bayesflow.python.ops import hmc from tensorflow.contrib.bayesflow.python.ops import metropolis_hastings from tensorflow.contrib.bayesflow.python.ops import monte_carlo +from tensorflow.contrib.bayesflow.python.ops import optimizers # pylint: enable=unused-import,line-too-long from tensorflow.python.util.all_util import remove_undocumented @@ -34,7 +35,7 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = ['csiszar_divergence', 'custom_grad', 'entropy', 'metropolis_hastings', 'monte_carlo', 'halton_sequence', - 'hmc', 'special_math', 'stochastic_variables', + 'hmc', 'optimizers', 'special_math', 'stochastic_variables', 'variational_inference'] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/sgld_optimizer_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/sgld_optimizer_test.py new file mode 100644 index 0000000000..66793383fd --- /dev/null +++ b/tensorflow/contrib/bayesflow/python/kernel_tests/sgld_optimizer_test.py @@ -0,0 +1,209 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional test for GradientDescent.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import math +from tensorflow.contrib.bayesflow.python.ops.optimizers import SGLDOptimizer +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +class SGLDOptimizerTest(test.TestCase): + + def testBasic(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable([1.1, 2.1], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + decay_rate = 0.53 + sgd_op = SGLDOptimizer( + 3.0, preconditioner_decay_rate=decay_rate).apply_gradients( + zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval()) + self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + grads_scaled = (0.5 * 0.1 / math.sqrt(decay_rate + + (1 - decay_rate) * 0.1**2 + 1e-8)) + self.assertAllCloseAccordingToType( + [1.1 - 3.0 * grads_scaled, 2.1 - 3.0 * grads_scaled], var0.eval()) + grads_scaled = (0.5 * 0.01 / math.sqrt( + decay_rate + (1 - decay_rate) * 0.01**2 + 1e-8)) + self.assertAllCloseAccordingToType( + [3.0 - 3.0 * grads_scaled, 4.0 - 3.0 * grads_scaled], var1.eval()) + + def testBasicMultiInstance(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable([1.1, 2.1], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + vara = variables.Variable([1.1, 2.1], dtype=dtype) + varb = variables.Variable([3.0, 4.0], dtype=dtype) + gradsa = constant_op.constant([0.1, 0.1], dtype=dtype) + gradsb = constant_op.constant([0.01, 0.01], dtype=dtype) + decay_rate = 0.5 + sgd_optimizer = SGLDOptimizer(3.0, preconditioner_decay_rate=decay_rate) + sgd_op = sgd_optimizer.apply_gradients( + zip([grads0, grads1], [var0, var1])) + sgd_optimizer2 = SGLDOptimizer( + 3.0, preconditioner_decay_rate=decay_rate) + sgd_op2 = sgd_optimizer2.apply_gradients( + zip([gradsa, gradsb], [vara, varb])) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval()) + self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) + self.assertAllCloseAccordingToType([1.1, 2.1], vara.eval()) + self.assertAllCloseAccordingToType([3.0, 4.0], varb.eval()) + + # Run 1 step of sgd + sgd_op.run() + sgd_op2.run() + # Validate updated params + grads_scaled = (0.5 * 0.1 / math.sqrt(decay_rate + + (1 - decay_rate) * 0.1**2 + 1e-8)) + self.assertAllCloseAccordingToType( + [1.1 - 3.0 * grads_scaled, 2.1 - 3.0 * grads_scaled], var0.eval()) + self.assertAllCloseAccordingToType( + [1.1 - 3.0 * grads_scaled, 2.1 - 3.0 * grads_scaled], vara.eval()) + + grads_scaled = (0.5 * 0.01 / math.sqrt( + decay_rate + (1 - decay_rate) * 0.01**2 + 1e-8)) + self.assertAllCloseAccordingToType( + [3.0 - 3.0 * grads_scaled, 4.0 - 3.0 * grads_scaled], var1.eval()) + self.assertAllCloseAccordingToType( + [3.0 - 3.0 * grads_scaled, 4.0 - 3.0 * grads_scaled], varb.eval()) + self.assertNotEqual(sgd_optimizer.variable_scope, + sgd_optimizer2.variable_scope) + self.assertNotEqual(sgd_optimizer.variable_scope.name, + sgd_optimizer2.variable_scope.name) + + def testTensorLearningRate(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable([1.1, 2.1], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + lrate = constant_op.constant(3.0) + decay_rate = 0.5 + sgd_op = SGLDOptimizer( + lrate, preconditioner_decay_rate=constant_op.constant( + decay_rate)).apply_gradients( + zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval()) + self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + grads_scaled = (0.5 * 0.1 / math.sqrt(decay_rate + + (1 - decay_rate) * 0.1**2 + 1e-8)) + self.assertAllCloseAccordingToType( + [1.1 - 3.0 * grads_scaled, 2.1 - 3.0 * grads_scaled], var0.eval()) + grads_scaled = (0.5 * 0.01 / math.sqrt( + decay_rate + (1 - decay_rate) * 0.01**2 + 1e-8)) + self.assertAllCloseAccordingToType( + [3.0 - 3.0 * grads_scaled, 4.0 - 3.0 * grads_scaled], var1.eval()) + + def testGradWrtRef(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + opt = SGLDOptimizer(3.0) + values = [1.0, 3.0] + vars_ = [variables.Variable([v], dtype=dtype) for v in values] + grads_and_vars = opt.compute_gradients(vars_[0] + vars_[1], vars_) + variables.global_variables_initializer().run() + for grad, _ in grads_and_vars: + self.assertAllCloseAccordingToType([1.0], grad.eval()) + + def testWithGlobalStep(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + global_step = variables.Variable(0, trainable=False) + var0 = variables.Variable([1.1, 2.1], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + decay_rate = 0.1 + sgd_op = SGLDOptimizer( + 3.0, preconditioner_decay_rate=decay_rate).apply_gradients( + zip([grads0, grads1], [var0, var1]), global_step=global_step) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval()) + self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) + # Run 1 step of sgd + sgd_op.run() + + # Validate updated params and global_step + grads_scaled = (0.5 * 0.1 / math.sqrt(decay_rate + + (1 - decay_rate) * 0.1**2 + 1e-8)) + self.assertAllCloseAccordingToType( + [1.1 - 3.0 * grads_scaled, 2.1 - 3.0 * grads_scaled], var0.eval()) + grads_scaled = (0.5 * 0.01 / math.sqrt( + decay_rate + (1 - decay_rate) * 0.01**2 + 1e-8)) + self.assertAllCloseAccordingToType( + [3.0 - 3.0 * grads_scaled, 4.0 - 3.0 * grads_scaled], var1.eval()) + self.assertAllCloseAccordingToType(1, global_step.eval()) + + def testSparseBasic(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable([[1.1], [2.1]], dtype=dtype) + var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) + grads0 = ops.IndexedSlices( + constant_op.constant([0.1], shape=[1, 1], dtype=dtype), + constant_op.constant([0]), constant_op.constant([2, 1])) + grads1 = ops.IndexedSlices( + constant_op.constant([0.01], shape=[1, 1], dtype=dtype), + constant_op.constant([1]), constant_op.constant([2, 1])) + decay_rate = 0.9 + sgd_op = SGLDOptimizer( + 3.0, preconditioner_decay_rate=decay_rate).apply_gradients( + zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([[1.1], [2.1]], var0.eval()) + self.assertAllCloseAccordingToType([[3.0], [4.0]], var1.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + grads_scaled = (0.5 * 0.1 / math.sqrt(decay_rate + + (1 - decay_rate) * 0.1**2 + 1e-8)) + self.assertAllCloseAccordingToType([[1.1 - 3.0 * grads_scaled], [2.1]], + var0.eval()) + grads_scaled = (0.5 * 0.01 / math.sqrt( + decay_rate + (1 - decay_rate) * 0.01**2 + 1e-8)) + self.assertAllCloseAccordingToType( + [[3.0 - 3.0 * 0], [4.0 - 3.0 * grads_scaled]], var1.eval()) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/bayesflow/python/ops/optimizers.py b/tensorflow/contrib/bayesflow/python/ops/optimizers.py new file mode 100644 index 0000000000..ee32e6b5c3 --- /dev/null +++ b/tensorflow/contrib/bayesflow/python/ops/optimizers.py @@ -0,0 +1,34 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Probabilistic optimizer modules. + +See ${python/contrib.bayesflow.optimizers}. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# go/tf-wildcard-import +# pylint: disable=wildcard-import +from tensorflow.contrib.bayesflow.python.ops.sgld_optimizer import * +# pylint: enable=wildcard-import +from tensorflow.python.util.all_util import remove_undocumented + +_allowed_symbols = [ + 'SGLDOptimizer', +] + +remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/bayesflow/python/ops/sgld_optimizer.py b/tensorflow/contrib/bayesflow/python/ops/sgld_optimizer.py new file mode 100644 index 0000000000..5d36ea7a2b --- /dev/null +++ b/tensorflow/contrib/bayesflow/python/ops/sgld_optimizer.py @@ -0,0 +1,216 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""An optimizer module for stochastic gradient Langevin dynamics.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.ops import variable_scope as varscope_ops +from tensorflow.python.training import optimizer +from tensorflow.python.training import training_ops + + +class SGLDOptimizer(optimizer.Optimizer): + """An optimizer module for stochastic gradient Langevin dynamics. + + This implements the preconditioned Stochastic Gradient Langevin Dynamics + optimizer [1]. The optimization variable is regarded as a sample from the + posterior under Stochastic Gradient Langevin Dynamics with noise rescaled in + each dimension according to RMSProp [2]. + + Note: If a prior is included in the loss, it should be scaled by + `1/num_pseudo_batches`, where num_pseudo_batches is the number of minibatches + in the data. I.e., it should be divided by the `num_pseudo_batches` term + described below. + + [1]: "Preconditioned Stochastic Gradient Langevin Dynamics for Deep Neural + Networks." Chunyuan Li, Changyou Chen, David Carlson, Lawrence Carin. + ArXiv:1512.07666, 2015. https://arxiv.org/abs/1512.07666 + [2]: http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf + + Args: + learning_rate: Scalar `float`-like `Tensor`. The base learning rate for the + optimizer. Must be tuned to the specific function being minimized. + preconditioner_decay_rate: Scalar `float`-like `Tensor`. The exponential + decay rate of the rescaling of the preconditioner (RMSprop). (This is + "alpha" in [1]). Should be smaller than but nearly `1` to approximate + sampling from the posterior. (Default: `0.95`) + num_pseudo_batches: Scalar `int`-like `Tensor`. The effective number of + minibatches in the data set. Trades off noise and prior with the SGD + likelihood term. Note: Assumes the loss is taken as the mean over a + minibatch. Otherwise if the sum was taken, divide this number by the + batch size. (Default: `1`) + burnin: Scalar `int`-like `Tensor`. The number of iterations to collect + gradient statistics to update the preconditioner before starting to draw + noisy samples. (Default: `25`) + diagonal_bias: Scalar `float`-like `Tensor`. Term added to the diagonal of + the preconditioner to prevent the preconditioner from degenerating. + (Default: `1e-8`) + name: Python `str` describing ops managed by this function. + (Default: `"SGLDOptimizer"`) + variable_scope: Variable scope used for calls to `tf.get_variable`. + If `None`, a new variable scope is created using name + `ops.get_default_graph().unique_name(name or default_name)`. + + Raises: + InvalidArgumentError: If preconditioner_decay_rate is a `Tensor` not in + `(0,1]`. + """ + + def __init__(self, + learning_rate, + preconditioner_decay_rate=0.95, + num_pseudo_batches=1, + burnin=25, + diagonal_bias=1e-8, + name=None, + variable_scope=None): + default_name = 'SGLDOptimizer' + with ops.name_scope(name, default_name, [ + learning_rate, preconditioner_decay_rate, num_pseudo_batches, burnin, + diagonal_bias + ]): + if variable_scope is None: + var_scope_name = ops.get_default_graph().unique_name( + name or default_name) + with varscope_ops.variable_scope(var_scope_name) as scope: + self._variable_scope = scope + else: + self._variable_scope = variable_scope + + self._preconditioner_decay_rate = ops.convert_to_tensor( + preconditioner_decay_rate, name='preconditioner_decay_rate') + self._num_pseudo_batches = ops.convert_to_tensor( + num_pseudo_batches, name='num_pseudo_batches') + self._burnin = ops.convert_to_tensor(burnin, name='burnin') + self._diagonal_bias = ops.convert_to_tensor( + diagonal_bias, name='diagonal_bias') + self._learning_rate = ops.convert_to_tensor( + learning_rate, name='learning_rate') + + with varscope_ops.variable_scope(self._variable_scope): + self._counter = varscope_ops.get_variable( + 'counter', initializer=0, trainable=False) + + self._preconditioner_decay_rate = control_flow_ops.with_dependencies([ + check_ops.assert_non_negative( + self._preconditioner_decay_rate, + message='`preconditioner_decay_rate` must be non-negative'), + check_ops.assert_less_equal( + self._preconditioner_decay_rate, + 1., + message='`preconditioner_decay_rate` must be at most 1.'), + ], self._preconditioner_decay_rate) + + self._num_pseudo_batches = control_flow_ops.with_dependencies([ + check_ops.assert_greater( + self._num_pseudo_batches, + 0, + message='`num_pseudo_batches` must be greater than zero') + ], self._num_pseudo_batches) + + self._burnin = control_flow_ops.with_dependencies([ + check_ops.assert_non_negative( + self._burnin, message='`burnin` must be non-negative'), + check_ops.assert_integer( + self._burnin, message='`burnin` must be an integer') + ], self._burnin) + + self._diagonal_bias = control_flow_ops.with_dependencies([ + check_ops.assert_non_negative( + self._diagonal_bias, + message='`diagonal_bias` must be non-negative') + ], self._diagonal_bias) + + super(SGLDOptimizer, self).__init__(use_locking=False, + name=name or default_name) + + def _create_slots(self, var_list): + for v in var_list: + init_rms = init_ops.ones_initializer(dtype=v.dtype) + self._get_or_make_slot_with_initializer(v, init_rms, v.get_shape(), + v.dtype, 'rms', self._name) + + def _prepare(self): + # We need to put the conversion and check here because a user will likely + # want to decay the learning rate dynamically. + self._learning_rate_tensor = control_flow_ops.with_dependencies([ + check_ops.assert_non_negative( + self._learning_rate, message='`learning_rate` must be non-negative') + ], ops.convert_to_tensor(self._learning_rate, name='learning_rate_tensor')) + self._decay_tensor = ops.convert_to_tensor( + self._preconditioner_decay_rate, name='preconditioner_decay_rate') + + super(SGLDOptimizer, self)._prepare() + + def _apply_dense(self, grad, var): + rms = self.get_slot(var, 'rms') + + with ops.control_dependencies([ + self._update_momentum(rms, grad, math_ops.cast(self._decay_tensor, + var.dtype.base_dtype))]): + new_grad = self._apply_noisy_update(rms, grad) + + return training_ops.apply_gradient_descent( + var, + math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), + new_grad, + use_locking=self._use_locking).op + + def _apply_sparse(self, grad, var): + rms = self.get_slot(var, 'rms') + + with ops.control_dependencies([ + self._update_momentum(rms, grad, math_ops.cast(self._decay_tensor, + var.dtype.base_dtype))]): + new_grad = self._apply_noisy_update(rms, grad) + + return training_ops.apply_gradient_descent( + var, + math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), + new_grad, + use_locking=self._use_locking).op + + @property + def variable_scope(self): + """Variable scope of all calls to `tf.get_variable`.""" + return self._variable_scope + + def _apply_noisy_update(self, mom, grad): + # Compute and apply the gradient update following + # preconditioned Langevin dynamics + stddev = array_ops.where( + array_ops.squeeze(self._counter > self._burnin), + math_ops.cast(math_ops.rsqrt(self._learning_rate), grad.dtype), + array_ops.zeros([], grad.dtype)) + + preconditioner = math_ops.rsqrt( + mom + math_ops.cast(self._diagonal_bias, grad.dtype)) + return ( + 0.5 * preconditioner * grad * math_ops.cast(self._num_pseudo_batches, + grad.dtype) + + random_ops.random_normal(array_ops.shape(grad), 1.0, dtype=grad.dtype) * + stddev * math_ops.sqrt(preconditioner)) + + def _update_momentum(self, mom, grad, decay): + # Keep an exponentially weighted moving average of squared gradients. + # Not thread safe + return mom.assign_add((1.0 - decay) * (math_ops.square(grad) - mom)) -- GitLab From 317c011b19c90c8aeed4ce200d33f68b56311150 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Nov 2017 12:02:25 -0800 Subject: [PATCH 0407/1801] Fixed a bug in tensorflow::str_util::CUnescape. Added a str util test that failed without this change. The CUnescape did a const_cast to the result string's buffer, which made it write the same buffer without copying. PiperOrigin-RevId: 175714391 --- .../xla/tools/parser/hlo_parser_test.cc | 2 +- tensorflow/core/lib/strings/str_util.cc | 28 +++++++++++++++---- tensorflow/core/lib/strings/str_util_test.cc | 13 +++++++++ 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index 8eeed339b8..29ae3296ca 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -866,7 +866,7 @@ TEST_F(HloParserTest, CommaBetweenSubAttributes) { const string original = R"(HloModule test_comma_module: ENTRY %test_comma.v4 () -> f32[] { - ROOT %constant = f32[] constant(-4.2), metadata={source_line=5, op_type="const"} + ROOT %constant = f32[] constant(-4.2), metadata={source_line=5, op_type="::const"} } )"; diff --git a/tensorflow/core/lib/strings/str_util.cc b/tensorflow/core/lib/strings/str_util.cc index 240e1454e5..d28857803d 100644 --- a/tensorflow/core/lib/strings/str_util.cc +++ b/tensorflow/core/lib/strings/str_util.cc @@ -84,15 +84,32 @@ inline int hex_digit_to_int(char c) { return x & 0xf; } -bool CUnescapeInternal(StringPiece source, char* dest, +bool CUnescapeInternal(StringPiece source, string* dest, string::size_type* dest_len, string* error) { - char* d = dest; const char* p = source.data(); const char* end = source.end(); const char* last_byte = end - 1; + // We are going to write the result to dest with its iterator. If our string + // implementation uses copy-on-write, this will trigger a copy-on-write of + // dest's buffer; that is, dest will be assigned a new buffer. + // + // Note that the following way is NOT a legal way to modify a string's + // content: + // + // char* d = const_cast(dest->data()); + // + // This won't trigger copy-on-write of the string, and so is dangerous when + // the buffer is shared. + auto d = dest->begin(); + // Small optimization for case where source = dest and there's no escaping - while (p == d && p < end && *p != '\\') p++, d++; + if (source.data() == dest->data()) { + while (p < end && *p != '\\') { + p++; + d++; + } + } while (p < end) { if (*p != '\\') { @@ -192,7 +209,7 @@ bool CUnescapeInternal(StringPiece source, char* dest, p++; // read past letter we escaped } } - *dest_len = d - dest; + *dest_len = d - dest->begin(); return true; } @@ -215,8 +232,7 @@ bool SplitAndParseAsInts(StringPiece text, char delim, bool CUnescape(StringPiece source, string* dest, string* error) { dest->resize(source.size()); string::size_type dest_size; - if (!CUnescapeInternal(source, const_cast(dest->data()), &dest_size, - error)) { + if (!CUnescapeInternal(source, dest, &dest_size, error)) { return false; } dest->erase(dest_size); diff --git a/tensorflow/core/lib/strings/str_util_test.cc b/tensorflow/core/lib/strings/str_util_test.cc index 5c735a87a3..d5909d17aa 100644 --- a/tensorflow/core/lib/strings/str_util_test.cc +++ b/tensorflow/core/lib/strings/str_util_test.cc @@ -43,6 +43,19 @@ TEST(CUnescape, Basic) { EXPECT_EQ("\320hi\200", ExpectCUnescapeSuccess("\\320hi\\200")); } +TEST(CUnescape, HandlesCopyOnWriteStrings) { + string dest = "hello"; + string read = dest; + // For std::string, read and dest now share the same buffer. + + string error; + StringPiece source = "llohe"; + // CUnescape is going to write "llohe" to dest, so dest's buffer will be + // reallocated, and read's buffer remains untouched. + EXPECT_TRUE(str_util::CUnescape(source, &dest, &error)); + EXPECT_EQ("hello", read); +} + TEST(StripTrailingWhitespace, Basic) { string test; test = "hello"; -- GitLab From 52467b8e68122560439534e3bc9ea266408fbfa7 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Tue, 14 Nov 2017 12:24:41 -0800 Subject: [PATCH 0408/1801] Docs fixes (#14561) --- tensorflow/contrib/lite/README.md | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md index 0c511f753a..feb35c850e 100644 --- a/tensorflow/contrib/lite/README.md +++ b/tensorflow/contrib/lite/README.md @@ -43,7 +43,7 @@ The simplest way to compile the demo app, and try out changes to the project cod ### Install Bazel If bazel is not installed on your system, install it now by following [these directions](https://bazel.build/versions/master/docs/install.html) -NOTE: Bazel does not currently support building for Android on Windows. Full support for gradle/cmake builds is coming soon, but in the meantime Windows users should download the [prebuilt binary](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/demo/TfLiteCameraDemo.apk) instead. +NOTE: Bazel does not currently support building for Android on Windows. Full support for gradle/cmake builds is coming soon, but in the meantime Windows users should download the [prebuilt binary](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk) instead. ### Install Android NDK and SDK Bazel is the primary build system for TensorFlow. Bazel and the Android NDK and SDK must be installed on your system. @@ -51,9 +51,6 @@ Bazel is the primary build system for TensorFlow. Bazel and the Android NDK and - The Android NDK is required to build the native (C/C++) TensorFlow code. The current recommended version is 14b, which may be found [here](https://developer.android.com/tools/revisions/build-tools.html). - The Android SDK and build tools may be obtained [here](https://developer.android.com/tools/revisions/build-tools.html), or alternatively as part of [Android Studio](https://developer.android.com/studio/index.html). Build tools API >= 23 is required to build the TensorFlow Android demo (though it will run on API >= 21 devices). - - The Android NDK is required to build the native (C/C++) TensorFlow Lite code. The current recommended version is 14b, which can be found [here](https://developer.android.com/ndk/downloads/older_releases.html#ndk-14b-downloads). - - - The Android SDK and build tools may be obtained [here](https://developer.android.com/tools/revisions/build-tools.html), or alternatively as part of [Android Studio](https://developer.android.com/studio/index.html). Build tools API >= 23 is required to build the TF Android demo (though it will run on API >= 21 devices). - In the root of the TensorFlow repository update the `WORKSPACE` file with the `api_level` and location of the SDK and NDK. If you installed it with AndroidStudio the SDK path can be found in the SDK manager, and the default NDK path is:`{SDK path}/ndk-bundle.` ``` @@ -69,7 +66,7 @@ android_ndk_repository( api_level=19) ``` -Additional details on building with Android can be found [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/README.md) +Additional details on building with Android can be found [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/README.md). ### Build the source code Run bazel with the following command to build the demo. @@ -100,7 +97,7 @@ The demo is resizing each camera image frame to (224 width * 224 height) to matc [On Device Smart Reply](https://research.googleblog.com/2017/02/on-device-machine-intelligence.html) is an on-device model which provides one-touch replies for an incoming text message by suggesting contextually relevant messages. The model is built specifically for memory constrained devices such as watches & phones and it has been successfully used to surface [Smart Replies on Android Wear](https://research.googleblog.com/2017/02/on-device-machine-intelligence.html). Note that this model only works on Android as of now. -These pre-trained models can be downloaded from [here](models.md). +These pre-trained models can be downloaded from [here](g3doc/models.md). ### Retrain Inception-V3 or MobileNet for a custom data set The above pre-trained models have been trained on the ImageNet data set, which consists of 1000 predefined classes. A model will need to be re-trained if these classes are not relevant or useful for a given use case. This technique is called transfer learning, which starts with a model that has been already trained on a problem and will then be retrained on a similar problem. Deep learning from scratch can take days, but transfer learning can be done fairly quickly. In order to do this, a developer will need to generate their custom data set labeled with the relevant classes. @@ -135,7 +132,7 @@ Since we employ several formats, the following definitions may be useful: ### Freeze Graph To use this .pb GraphDef file within TensorFlow Lite, the application developer will need checkpoints containing trained weight parameters. The .pb contains only the structure of the graph. The process of merging the checkpoint values with the graph structure is known as “freezing” the graph. -The developer should know where the checkpoints folder is present or checkpoints can also be downloaded for a pre-trained model (Example: Here is a link to the [MobileNets](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md) +The developer should know where the checkpoints folder is present or checkpoints can also be downloaded for a pre-trained model (Example: Here is a link to the [MobileNets](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md)). Graph freezing can be done using the command below (and modifying the arguments appropriately) @@ -150,7 +147,7 @@ bazel-bin/tensorflow/python/tools/freeze_graph\ ``` The user has to first build the freeze_graph script using bazel and then run the script. The input_binary flag has to be enabled to ensure that the protobuf is read and written in binary format. The user has to input the .pb and the .ckpt files to freeze the graph The output_node_names may not be obvious outside of the code that built the model. The easiest way to find them is to visualize the graph, either with -graphviz, or [in tensorboard](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets-2/#3). +graphviz, or in tensorboard. This frozen Graphdef is now ready to be converted to flatbuffer format (.lite) for use on Android or iOS. On Android users have the flexibility to use either the float or quantized versions of the frozen graphdef, if available, using the Tensorflow Optimizing Converter tool. @@ -169,11 +166,11 @@ bazel run --config=opt tensorflow/contrib/lite/toco:toco -- \ - The input_file argument should point to the frozen GraphDef file that holds the model architecture. - The output_file argument should point to where the TensorFlow Lite model file should be generated. -- The input_type and inference_type arguments should be set to FLOAT, unless converted a [quantized](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/) model. -- Setting the input_array, output_array and input_shape arguments are a bit trickier. The easiest way to find these values is to explore the graph in tensorboard . The user should reuse the arguments that were used for specifying the output nodes for inference in the `freeze_graph`step. +- The input_type and inference_type arguments should be set to FLOAT, unless converting a quantized model. +- Setting the input_array, output_array and input_shape arguments are a bit trickier. The easiest way to find these values is to explore the graph in TensorBoard. The user should reuse the arguments that were used for specifying the output nodes for inference in the `freeze_graph`step. Note, it is also possible to use the Tensorflow Optimizing Converter through protos either from Python or from the command line see the -documentation [here](https://github.com/tensorflow/tensorflow/tree/mastertensorflow/contrib/lite/python:toco_from_protos target) A developer can then integrate the conversion step into their model design workflow to ensure that a model will be easily convertible to a mobile inference graph. For example, +documentation [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/toco/README.md). A developer can then integrate the conversion step into their model design workflow to ensure that a model will be easily convertible to a mobile inference graph. For example, ``` import tensorflow as tf @@ -188,18 +185,18 @@ with tf.Session() as sess: ``` For detailed instructions on how to use the Tensorflow Optimizing Converter, please see [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md). -You may refer to the [Ops compatibility guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tf_ops_compatibility.md) for troubleshooting help. If that doesn’t help, please file an [issue](https://github.com/tensorflow/tensorflow/issues). +You may refer to the [Ops compatibility guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md) for troubleshooting help. If that doesn’t help, please file an [issue](https://github.com/tensorflow/tensorflow/issues). ## Step 3. Use the TensorFlow Lite model for inference in a mobile app After completion of Step 2 the developer should have a .lite model. ### For Android -Because Android apps need to be written in Java, and core TensorFlow is in C++, a JNI library is provided to interface between the two. Its interface is aimed only at inference, so it provides the ability to load a graph, set up inputs, and run the model to calculate particular outputs. The full documentation for the set of methods can be seen [here](https://github.com/TensorFlow/TensorFlow/blob/master/TensorFlow/contrib/lite/g3doc/). The demo app is also open sourced on [github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app). +Because Android apps need to be written in Java, and core TensorFlow is in C++, a JNI library is provided to interface between the two. Its interface is aimed only at inference, so it provides the ability to load a graph, set up inputs, and run the model to calculate particular outputs. The full documentation for the set of methods can be seen [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/g3doc/). The demo app is also open sourced on [github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app). -The [demo app] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app) uses this interface, so it’s a good place to look for example usage. You can also download the prebuilt binary [here](http://download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk). +The [demo app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app) uses this interface, so it’s a good place to look for example usage. You can also download the prebuilt binary [here](http://download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk). Note that you’d need to follow instructions for installing TensorFlow on Android, setting up bazel and Android Studio outlined [here](https://www.tensorflow.org/mobile/android_build). ### For iOS -Follow the documentation [here](https://github.com/TensorFlow/TensorFlow/blob/master/TensorFlow/contrib/lite/g3doc/ios.md) to get integrate a TFLite model into your app. +Follow the documentation [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/g3doc/ios.md) to integrate a TFLite model into your app. -- GitLab From a6a562f1174159143c5fcf85f494eaf511bcf168 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Nov 2017 12:06:05 -0800 Subject: [PATCH 0409/1801] Added support for multi-column vocab files to tf.contrib.lookup.index_table_from_file PiperOrigin-RevId: 175715120 --- .../python/kernel_tests/lookup_ops_test.py | 31 +++++++++++++++++ tensorflow/python/ops/lookup_ops.py | 33 +++++++++++++++---- 2 files changed, 58 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/kernel_tests/lookup_ops_test.py b/tensorflow/python/kernel_tests/lookup_ops_test.py index 76c790a0a2..9944b5929f 100644 --- a/tensorflow/python/kernel_tests/lookup_ops_test.py +++ b/tensorflow/python/kernel_tests/lookup_ops_test.py @@ -281,6 +281,37 @@ class IndexTableFromFile(test.TestCase): lookup_ops.tables_initializer().run() self.assertAllEqual((1, 2, 3), ids.eval()) + def test_string_index_table_from_multicolumn_file(self): + vocabulary_file = self._createVocabFile( + "f2i_vocab1.txt", values=("brain\t300", "salad\t20", "surgery\t1")) + with self.test_session(): + table = lookup_ops.index_table_from_file( + vocabulary_file=vocabulary_file, + num_oov_buckets=1, + key_column_index=0, + value_column_index=lookup_ops.TextFileIndex.LINE_NUMBER) + ids = table.lookup(constant_op.constant(["salad", "surgery", "tarkus"])) + + self.assertRaises(errors_impl.OpError, ids.eval) + lookup_ops.tables_initializer().run() + self.assertAllEqual((1, 2, 3), ids.eval()) + + def test_string_index_table_from_multicolumn_file_custom_delimiter(self): + vocabulary_file = self._createVocabFile( + "f2i_vocab1.txt", values=("brain 300", "salad 20", "surgery 1")) + with self.test_session(): + table = lookup_ops.index_table_from_file( + vocabulary_file=vocabulary_file, + num_oov_buckets=1, + key_column_index=0, + value_column_index=lookup_ops.TextFileIndex.LINE_NUMBER, + delimiter=" ") + ids = table.lookup(constant_op.constant(["salad", "surgery", "tarkus"])) + + self.assertRaises(errors_impl.OpError, ids.eval) + lookup_ops.tables_initializer().run() + self.assertAllEqual((1, 2, 3), ids.eval()) + def test_string_index_table_from_file_tensor_filename(self): vocabulary_file = self._createVocabFile("f2i_vocab1.txt") with self.test_session(): diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py index fa58ffc37e..f28eadf248 100644 --- a/tensorflow/python/ops/lookup_ops.py +++ b/tensorflow/python/ops/lookup_ops.py @@ -864,7 +864,10 @@ def index_table_from_file(vocabulary_file=None, default_value=-1, hasher_spec=FastHashSpec, key_dtype=dtypes.string, - name=None): + name=None, + key_column_index=TextFileIndex.WHOLE_LINE, + value_column_index=TextFileIndex.LINE_NUMBER, + delimiter="\t"): """Returns a lookup table that converts a string tensor into int64 IDs. This operation constructs a lookup table to convert tensor of strings into @@ -881,6 +884,16 @@ def index_table_from_file(vocabulary_file=None, The underlying table must be initialized by calling `tf.tables_initializer.run()` or `table.init.run()` once. + To specify multi-column vocabulary files, use key_column_index and + value_column_index and delimiter. + + - TextFileIndex.LINE_NUMBER means use the line number starting from zero, + expects data type int64. + - TextFileIndex.WHOLE_LINE means use the whole line content, expects data + type string. + - A value >=0 means use the index (starting at zero) of the split line based + on `delimiter`. + Sample Usages: If we have a vocabulary file "test.txt" with the following content: @@ -912,6 +925,11 @@ def index_table_from_file(vocabulary_file=None, assignation of out-of-vocabulary buckets. key_dtype: The `key` data type. name: A name for this op (optional). + key_column_index: The column index from the text file to get the `key` + values from. The default is to use the line number, starting from zero. + value_column_index: The column index from the text file ro get the `value` + values from. The default is 0 that represents the whole line content. + delimiter: The delimiter to separate fields in a line. Returns: The lookup table to map a `key_dtype` `Tensor` to index `int64` `Tensor`. @@ -944,19 +962,22 @@ def index_table_from_file(vocabulary_file=None, # Keep the shared_name: # ____ shared_name = "hash_table_%s_%d_%s_%s" % (vocabulary_file, vocab_size, - TextFileIndex.WHOLE_LINE, - TextFileIndex.LINE_NUMBER) + key_column_index, + value_column_index) else: # Keep the shared_name # ___ shared_name = "hash_table_%s_%s_%s" % (vocabulary_file, - TextFileIndex.WHOLE_LINE, - TextFileIndex.LINE_NUMBER) + key_column_index, + value_column_index) init = TextFileIdTableInitializer( vocabulary_file, vocab_size=vocab_size, key_dtype=dtypes.int64 if key_dtype.is_integer else key_dtype, - name="table_init") + name="table_init", + key_column_index=key_column_index, + value_column_index=value_column_index, + delimiter=delimiter) table = HashTable( init, default_value, shared_name=shared_name, name=hash_table_scope) -- GitLab From e0b662c1f6ba378e5c0d0da011d6f789ab6606b3 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 14 Nov 2017 12:10:00 -0800 Subject: [PATCH 0410/1801] Removing generator expression in args_to_matching_eager PiperOrigin-RevId: 175715743 --- tensorflow/python/eager/execute.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/eager/execute.py b/tensorflow/python/eager/execute.py index c6457232e9..0316f33d7e 100644 --- a/tensorflow/python/eager/execute.py +++ b/tensorflow/python/eager/execute.py @@ -168,7 +168,10 @@ def make_tensor(v, arg_name): def args_to_matching_eager(l, ctx, default_dtype=None): """Convert sequence `l` to eager same-type Tensors.""" EagerTensor = ops.EagerTensor # pylint: disable=invalid-name - if all(isinstance(x, EagerTensor) for x in l): + for x in l: + if not isinstance(x, EagerTensor): + break + else: # note: intentional for-else return l[0].dtype, l # TODO(josh11b): Could we do a better job if we also passed in the # allowed dtypes when that was known? -- GitLab From 301a6c41cbb111fae89657a49775920aa70525fd Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Tue, 14 Nov 2017 12:13:19 -0800 Subject: [PATCH 0411/1801] Do not log a warning when `Rendezvous::Send()` fails in the Send op kernel. A failing call to `Send()` indicates that the step has been aborted by a corresponding call to `Rendezvous::StartAbort()`. As a result, the error logged by `Send()` is not particularly informative, and creates a non-deterministic amount of extra log spam for each step that fails as `Send()` calls are being issued. The failure that causes the step to be aborted is logged separately by the kernel that failed, unless that kernel deliberately does not log on failure. In particular, this change reduces log spam when using `Iterator.get_next()` in a multi-device setting. The `Iterator.get_next()` op deliberately does not log when an `OutOfRange` error (indicated the end of the dataset) is raised, because this is common and expected behavior, especially when using an initializable iterator that is reinitialized at the end of an epoch. Previously, when running in distributed mode or using a GPU, pending `Send()` calls may cause unwanted log messages to be printed. Fixes #12414. PiperOrigin-RevId: 175716290 --- tensorflow/core/kernels/sendrecv_ops.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/sendrecv_ops.cc b/tensorflow/core/kernels/sendrecv_ops.cc index 9c242052f7..542382872c 100644 --- a/tensorflow/core/kernels/sendrecv_ops.cc +++ b/tensorflow/core/kernels/sendrecv_ops.cc @@ -91,9 +91,9 @@ void SendOp::Compute(OpKernelContext* ctx) { if (frame_iter == FrameAndIter(0, 0)) { // Use the cached rendezvous key. VLOG(2) << "Send " << parsed_key_.buf_; - OP_REQUIRES_OK(ctx, - ctx->rendezvous()->Send(parsed_key_, args, ctx->input(0), + ctx->SetStatus(ctx->rendezvous()->Send(parsed_key_, args, ctx->input(0), ctx->is_input_dead())); + return; } else { Rendezvous::ParsedKey in_loop_parsed; GetRendezvousKey(key_prefix_, frame_iter, &in_loop_parsed.buf_); @@ -101,9 +101,9 @@ void SendOp::Compute(OpKernelContext* ctx) { OP_REQUIRES_OK(ctx, Rendezvous::ParseKey(in_loop_parsed.buf_, &in_loop_parsed)); - OP_REQUIRES_OK(ctx, - ctx->rendezvous()->Send(in_loop_parsed, args, ctx->input(0), + ctx->SetStatus(ctx->rendezvous()->Send(in_loop_parsed, args, ctx->input(0), ctx->is_input_dead())); + return; } } -- GitLab From 5844c589143cb59e55e24776f8eb9b757f75d226 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Nov 2017 12:15:36 -0800 Subject: [PATCH 0412/1801] fix a typo in the comment. PiperOrigin-RevId: 175716586 --- tensorflow/core/lib/core/threadpool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/lib/core/threadpool.h b/tensorflow/core/lib/core/threadpool.h index 251d58817e..b89b74b8de 100644 --- a/tensorflow/core/lib/core/threadpool.h +++ b/tensorflow/core/lib/core/threadpool.h @@ -30,7 +30,7 @@ class ThreadPool { // Constructs a pool that contains "num_threads" threads with specified // "name". env->StartThread() is used to create individual threads with the // given ThreadOptions. If "low_latency_hint" is true the thread pool - // implementation may use it as a hint that lower latency if preferred at the + // implementation may use it as a hint that lower latency is preferred at the // cost of higher CPU usage, e.g. by letting one or more idle threads spin // wait. Conversely, if the threadpool is used to schedule high-latency // operations like I/O the hint should be set to false. -- GitLab From 6c728c3f5304b2f664608d1692392c9036eba28f Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Tue, 14 Nov 2017 12:16:20 -0800 Subject: [PATCH 0413/1801] Adding Python3.6 support for nightly binaries. PiperOrigin-RevId: 175716707 --- .../tools/ci_build/ci_parameterized_build.sh | 9 ++- .../install/install_python3.5_pip_packages.sh | 27 +------ .../install/install_python3.6_pip_packages.sh | 75 +++++++++++++++++++ 3 files changed, 84 insertions(+), 27 deletions(-) create mode 100755 tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh index db02f6ef10..5f791d7bc7 100755 --- a/tensorflow/tools/ci_build/ci_parameterized_build.sh +++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh @@ -546,8 +546,9 @@ echo "" TMP_DIR="" DOCKERFILE_FLAG="" -if [[ "${TF_BUILD_PYTHON_VERSION}" == "python3.5" ]]; then - # Modify Dockerfile for Python3.5 build +if [[ "${TF_BUILD_PYTHON_VERSION}" == "python3.5" ] || + ["${TF_BUILD_PYTHON_VERSION}" == "python3.6" ]]; then + # Modify Dockerfile for Python3.5 | Python3.6 build TMP_DIR=$(mktemp -d) echo "Docker build will occur in temporary directory: ${TMP_DIR}" @@ -563,10 +564,10 @@ if [[ "${TF_BUILD_PYTHON_VERSION}" == "python3.5" ]]; then # Replace a line in the Dockerfile if sed -i \ - 's/RUN \/install\/install_pip_packages.sh/RUN \/install\/install_python3.5_pip_packages.sh/g' \ + "s/RUN \/install\/install_pip_packages.sh/RUN \/install\/install_${TF_BUILD_PYTHON_VERSION}_pip_packages.sh/g" \ "${DOCKERFILE}" then - echo "Copied and modified Dockerfile for Python 3.5 build: ${DOCKERFILE}" + echo "Copied and modified Dockerfile for ${TF_BUILD_PYTHON_VERSION} build: ${DOCKERFILE}" else die "ERROR: Faild to copy and modify Dockerfile: ${DOCKERFILE}" fi diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh index 81bce95d54..479242aa43 100755 --- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh @@ -18,33 +18,12 @@ # TODO(cais): Remove this file once we upgrade to ubuntu:16.04 docker images for # Python 3.5 builds. +# LINT.IfChange + # fkrull/deadsnakes is for Python3.5 add-apt-repository -y ppa:fkrull/deadsnakes apt-get update -set +e -# Upgrade swig to 3.0.8 -SWIG_VERSION="3.0.8" -swig_ver_flat=$(echo $SWIG_VERSION | sed 's/\.//g' | sed 's/^0*//g') -local_swig_ver=$(swig -version | grep -i version | awk '{print $3}') -local_swig_ver_flat=$(echo $local_swig_ver | sed 's/\.//g' | sed 's/^0*//g') -if [[ -z $local_swig_ver_flat ]]; then - local_swig_ver_flat=0 -fi -if (( $local_swig_ver_flat < $swig_ver_flat )); then - set -e - wget -q http://downloads.sourceforge.net/swig/swig-3.0.8.tar.gz - tar xzf swig-3.0.8.tar.gz - pushd swig-3.0.8 - apt-get install -y --no-install-recommends libpcre3-dev - ./configure - make - make install - rm -f /usr/bin/swig - ln -s /usr/local/bin/swig /usr/bin/swig - popd - rm -rf swig-3.0.8 swig-3.0.8.tar.gz -fi set -e # Install Python 3.5 and dev library apt-get install -y --no-install-recommends python3.5 libpython3.5-dev @@ -92,3 +71,5 @@ pip3.5 install portpicker pip3.5 install werkzeug pip3.5 install grpcio + +# LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh) diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh new file mode 100755 index 0000000000..c354aaa154 --- /dev/null +++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Install packages required by Python3.6 build + +# TODO(amitpatankar): Remove this file once we upgrade to ubuntu:16.04 +# docker images for Python 3.6 builds. + +# LINT.IfChange + +# fkrull/deadsnakes is for Python3.6 +add-apt-repository -y ppa:fkrull/deadsnakes +apt-get update + +set -e +# Install Python 3.6 and dev library +apt-get install -y --no-install-recommends python3.6 libpython3.6-dev + +# Install pip3.6 +set +e +pip35_version=$(pip3.6 --version | grep "python 3.6") +if [[ -z $pip35_version ]]; then + set -e + wget -q https://bootstrap.pypa.io/get-pip.py + python3.6 get-pip.py + rm -f get-pip.py +fi + +set -e +# Install six. +pip3.6 install --upgrade absl-py +pip3.6 install --upgrade six==1.10.0 + +# Install protobuf. +pip3.6 install --upgrade protobuf==3.3.0 + +# Remove obsolete version of six, which can sometimes confuse virtualenv. +rm -rf /usr/lib/python3/dist-packages/six* + +# Install numpy, scipy and scikit-learn required by the builds + +# numpy needs to be installed from source to fix segfaults. See: +# https://github.com/tensorflow/tensorflow/issues/6968 +# This workaround isn't needed for Ubuntu 16.04 or later. +pip3.6 install --no-binary=:all: --upgrade numpy==1.12.0 + +pip3.6 install scipy==0.18.1 + +pip3.6 install scikit-learn==0.18.1 + +# pandas required by `inflow` +pip3 install pandas==0.19.2 + +# Install recent-enough version of wheel for Python 3.6 wheel builds +pip3.6 install wheel==0.29.0 + +pip3.6 install portpicker + +pip3.6 install werkzeug + +pip3.6 install grpcio + +# LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh) -- GitLab From 79cbc15a815d8a3e6f3a76df5f419c25301be4d6 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 14 Nov 2017 12:21:22 -0800 Subject: [PATCH 0414/1801] Build the function graph only once when inferring shapes for function. This speeds things up quite a bit when ShapeRefiner::RunShapeFn needs more than one pass to compute the shapes. PiperOrigin-RevId: 175717420 --- .../core/common_runtime/shape_refiner.cc | 89 ++++++++++--------- .../core/common_runtime/shape_refiner.h | 19 ++-- 2 files changed, 54 insertions(+), 54 deletions(-) diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc index 8e314c7ea5..10901da192 100644 --- a/tensorflow/core/common_runtime/shape_refiner.cc +++ b/tensorflow/core/common_runtime/shape_refiner.cc @@ -129,80 +129,82 @@ Status InferShapesForFunctionSubNode(const Node* node, ShapeRefiner* refiner, // Maybe we won't support recursive functions at all in TF, because of // other maintanabilty issues. Status ShapeRefiner::InferShapesForFunction( - const tensorflow::FunctionLibraryDefinition& function_library, - const tensorflow::FunctionDef& function_def, bool keep_nested_shapes, + const tensorflow::FunctionDef* function_def, bool keep_nested_shapes, ExtendedInferenceContext* outer_context) { - InstantiationResult result; - TF_RETURN_IF_ERROR(InstantiateFunction( - function_def, outer_context->get_context()->attrs(), - [&function_library](const string& op, const OpDef** sig) { - return function_library.LookUpOpDef(op, sig); - }, - &result)); - - Graph graph(&function_library); - { + const Graph* graph; + auto it = functions_.find(function_def); + if (it != functions_.end()) { + graph = it->second.get(); + } else { + InstantiationResult result; + TF_RETURN_IF_ERROR(InstantiateFunction( + *function_def, outer_context->get_context()->attrs(), + [this](const string& op, const OpDef** sig) { + return this->function_library_->LookUpOpDef(op, sig); + }, + &result)); + + Graph* new_graph = new Graph(function_library_); GraphConstructorOptions options; options.allow_internal_ops = true; - TF_RETURN_IF_ERROR(ConvertNodeDefsToGraph(options, result.nodes, &graph)); + TF_RETURN_IF_ERROR( + ConvertNodeDefsToGraph(options, result.nodes, new_graph)); + functions_[function_def].reset(new_graph); + graph = new_graph; } - ShapeRefiner refiner(graph.versions().producer(), &function_library); - refiner.set_disable_constant_propagation(disable_constant_propagation_); - refiner.set_function_library_for_shape_inference(&function_library); - if (keep_nested_shapes) refiner.set_keep_nested_shape_inferences(); - + std::unordered_set function_nodes; + Status inference_status = Status::OK(); { - Status inference_status = Status::OK(); - auto node_shape_inference_lambda = [&refiner, &outer_context, + auto node_shape_inference_lambda = [this, &outer_context, &function_nodes, &inference_status](const Node* node) { if (!inference_status.ok()) return; inference_status = InferShapesForFunctionSubNode( - node, &refiner, outer_context->get_context()); + node, this, outer_context->get_context()); + function_nodes.insert(node); }; // Calls inference lambda for each node after visiting all predecessors. // Ensures that we are adding nodes to ShapeRefiner in the topological // order. - ReverseDFS(graph, {}, node_shape_inference_lambda); - - TF_RETURN_IF_ERROR(inference_status); + ReverseDFS(*graph, {}, node_shape_inference_lambda); } - if (keep_nested_shapes) { + if (keep_nested_shapes && inference_status.ok()) { // Fill the nested inferences map. // // The materialized function graph has extra nodes for arguments and // return values, which are not explicitly listed in the FunctionDef, // we filter out these special nodes here to not expose the implementation // details and keep only inferences for the nodes listed in the FunctionDef. - - auto stolen_contexts = refiner.StealInferenceContexts(); - std::unordered_map user_defined_nodes; - for (const auto& node_def : function_def.node_def()) { + for (const auto& node_def : function_def->node_def()) { user_defined_nodes[node_def.name()] = &node_def; } std::unordered_map> nested_inferences; - for (auto& stolen_kv : stolen_contexts) { - auto& stolen_name = stolen_kv.first->name(); - if (user_defined_nodes.find(stolen_name) != user_defined_nodes.end()) { - nested_inferences[stolen_name] = std::move(stolen_kv.second); - - // By default InferenceContext refers to a NodeDef from Graph, - // we have to change it to a NodeDef with longer lifetime, - // because the Graph is a temporary in this function. - nested_inferences[stolen_name]->get_context()->node_def_ = - user_defined_nodes[stolen_name]; + for (const Node* node : function_nodes) { + const string& node_name = node->name(); + if (user_defined_nodes.find(node_name) != user_defined_nodes.end()) { + nested_inferences[node_name] = std::move(node_to_context_[node]); + node_to_context_.erase(node); + // By default InferenceContext refers to a NodeDef from Graph. + // Change it to the publicly accessible NodeDef of the function + // definition. + nested_inferences[node_name]->get_context()->node_def_ = + user_defined_nodes[node_name]; } } - outer_context->set_nested_inferences(std::move(nested_inferences)); + } else { + // Delete the contexts created for the functions nodes to save memory. + for (const Node* node : function_nodes) { + node_to_context_.erase(node); + } } - return Status::OK(); + return inference_status; } Status ShapeRefiner::AddNode(const Node* node) { @@ -781,9 +783,8 @@ Status ShapeRefiner::RunShapeFn(const Node* node, auto* func_def = function_library_->Find(op_reg_data->op_def.name()); if (func_def) { - TF_RETURN_IF_ERROR(InferShapesForFunction( - *function_library_, *func_def, keep_nested_shape_inferences_, ec)); - return Status::OK(); + return InferShapesForFunction(func_def, keep_nested_shape_inferences_, + ec); } } diff --git a/tensorflow/core/common_runtime/shape_refiner.h b/tensorflow/core/common_runtime/shape_refiner.h index 570b4db163..da42c30ce9 100644 --- a/tensorflow/core/common_runtime/shape_refiner.h +++ b/tensorflow/core/common_runtime/shape_refiner.h @@ -159,6 +159,7 @@ class ShapeRefiner { // With this enabled, shape inference can take more time since it descends // into all function calls. It doesn't do inference once for each function // definition, but once for each function call. + // The function library must outlive the shape refiner. void set_function_library_for_shape_inference( const tensorflow::FunctionLibraryDefinition* lib) { function_library_ = lib; @@ -210,10 +211,9 @@ class ShapeRefiner { // - outer_context will contain output shapes inferred from input shapes // - outer_context will contain nested inferences collection, iff // keep_nested_shapes is true - Status InferShapesForFunction( - const tensorflow::FunctionLibraryDefinition& function_library, - const tensorflow::FunctionDef& function_def, bool keep_nested_shapes, - ExtendedInferenceContext* outer_context); + Status InferShapesForFunction(const tensorflow::FunctionDef* function_def, + bool keep_nested_shapes, + ExtendedInferenceContext* outer_context); // Tries to infer tensor output based on the input shapes of the node. In some // cases, the shapes of the inputs are sufficient for inferring the contents @@ -260,12 +260,6 @@ class ShapeRefiner { Status RunShapeFn(const Node* node, const OpRegistrationData* op_reg_data, ExtendedInferenceContext* ec); - // Destructive operation, which steals ownership of inference contexts map. - std::unordered_map> - StealInferenceContexts() { - return std::move(node_to_context_); - } - int32 graph_def_version_; const OpRegistryInterface* const ops_registry_; @@ -299,6 +293,11 @@ class ShapeRefiner { // defined functions. By default that info is discarded to save memory. bool keep_nested_shape_inferences_ = false; + // Cache the graph corresponding to each functin definition for which shapes + // are refined. + std::unordered_map> + functions_; + TF_DISALLOW_COPY_AND_ASSIGN(ShapeRefiner); }; -- GitLab From 6a4391c19bc8346df45862865cb4db3ba231bd86 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Tue, 14 Nov 2017 12:28:23 -0800 Subject: [PATCH 0415/1801] Remove experimental tpu.outside_all_rewrites() API. PiperOrigin-RevId: 175718301 --- tensorflow/contrib/tpu/__init__.py | 1 - tensorflow/contrib/tpu/python/tpu/tpu.py | 21 --------------------- 2 files changed, 22 deletions(-) diff --git a/tensorflow/contrib/tpu/__init__.py b/tensorflow/contrib/tpu/__init__.py index 6a5fe06ff0..ec4c4e1be6 100644 --- a/tensorflow/contrib/tpu/__init__.py +++ b/tensorflow/contrib/tpu/__init__.py @@ -24,7 +24,6 @@ @@initialize_system @@shutdown_system @@core -@@outside_all_rewrites @@replicate @@shard @@batch_parallel diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py index d521297d99..bc3c888b1f 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu.py @@ -19,7 +19,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import contextlib from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.contrib.tpu.python.ops import tpu_ops @@ -81,26 +80,6 @@ def core(num): return "device:TPU_REPLICATED_CORE:{}".format(num) -# Experimental API to 'break out' of a tpu.rewrite() (or shard(), etc.) context. -# In -# -# XXX -# with tpu.rewrite(...): -# YYY -# with tpu.outside_all_rewrites(): -# ZZZ -# -# the Ops in ZZZ are added outside the scope of the rewrite(). -# TODO(phawkins): currently outside_all_rewrites() pops out of all nested -# control flow scopes, for example loops. It would make more sense if it only -# popped out of a single scope. -@contextlib.contextmanager -def outside_all_rewrites(): - """Experimental API to 'break out' of a tpu.rewrite() (or shard(), etc.).""" - with ops.control_dependencies(None): - yield - - class TPUReplicateContext(control_flow_ops.ControlFlowContext): """A ControlFlowContext for nodes inside a TPU computation. -- GitLab From 5ae3049e4b8317ee5247dc5ebc3f2bbc0a5b3869 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Nov 2017 12:36:22 -0800 Subject: [PATCH 0416/1801] Enabling default extend_export_strategy call to use the base_export_strategy name as this is a common case. PiperOrigin-RevId: 175719299 --- .../learn/utils/saved_model_export_utils.py | 43 +++++++--- .../utils/saved_model_export_utils_test.py | 79 +++++++++++++++++-- 2 files changed, 102 insertions(+), 20 deletions(-) diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py index 49413092a6..6ffd2a1339 100644 --- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py +++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py @@ -33,6 +33,7 @@ from __future__ import division from __future__ import print_function import os +import tempfile import time from tensorflow.contrib.layers.python.layers import feature_column @@ -644,18 +645,22 @@ def make_best_model_export_strategy(serving_input_fn, # TODO(b/67013778): Revisit this approach when corresponding changes to # TF Core are finalized. -def extend_export_strategy(base_export_strategy, post_export_fn, - post_export_name): +def extend_export_strategy(base_export_strategy, + post_export_fn, + post_export_name=None): """Extend ExportStrategy, calling post_export_fn after export. Args: base_export_strategy: An ExportStrategy that can be passed to the Experiment constructor. post_export_fn: A user-specified function to call after exporting the - SavedModel. Takes the export directory as an argument, and returns - a string path to a (potentially different) SavedModel. + SavedModel. Takes two arguments - the path to the SavedModel exported by + base_export_strategy and the directory where to export the SavedModel + modified by the post_export_fn. Returns the path to the exported + SavedModel. post_export_name: The directory name under the export base directory where - SavedModels generated by the post_export_fn will be written. + SavedModels generated by the post_export_fn will be written. If None, the + directory name of base_export_strategy is used. Returns: An ExportStrategy that can be passed to the Experiment constructor. @@ -675,12 +680,24 @@ def extend_export_strategy(base_export_strategy, post_export_fn, Raises: ValueError: If `estimator` is a ${tf.estimator.Estimator} instance - and `default_output_alternative_key` was specified. + and `default_output_alternative_key` was specified or if post_export_fn + does not return a valid directory. """ - export_dir = base_export_strategy.export(estimator, export_dir_base, - checkpoint_path) - if post_export_fn: - export_dir = post_export_fn(export_dir) - return export_dir - - return export_strategy.ExportStrategy(post_export_name, export_fn) + tmp_base_export_dir = tempfile.mkdtemp() + tmp_base_export = base_export_strategy.export( + estimator, tmp_base_export_dir, checkpoint_path) + tmp_post_export_dir = tempfile.mkdtemp() + tmp_post_export = post_export_fn(tmp_base_export, tmp_post_export_dir) + + if not tmp_post_export.startswith(tmp_post_export_dir): + raise ValueError('post_export_fn must return a sub-directory of {}' + .format(tmp_post_export_dir)) + export_relpath = os.path.relpath(tmp_post_export, tmp_post_export_dir) + + gfile.Rename( + os.path.join(tmp_post_export_dir, export_relpath), + os.path.join(export_dir_base, export_relpath)) + return os.path.join(export_dir_base, export_relpath) + + name = post_export_name if post_export_name else base_export_strategy.name + return export_strategy.ExportStrategy(name, export_fn) diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py index 27f17b5422..ec3a88003f 100644 --- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py +++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py @@ -743,12 +743,19 @@ class SavedModelExportUtilsTest(test.TestCase): None) def test_extend_export_strategy(self): - def _base_export_fn(unused_estimator, export_dir_base, + + def _base_export_fn(unused_estimator, + export_dir_base, unused_checkpoint_path=None): - return export_dir_base + "/e1" + base_path = os.path.join(export_dir_base, "e1") + gfile.MkDir(base_path) + return base_path - def _post_export_fn(orig_path): - return orig_path + "/rewrite" + def _post_export_fn(orig_path, new_path): + assert orig_path.endswith("/e1") + post_export_path = os.path.join(new_path, "rewrite") + gfile.MkDir(post_export_path) + return post_export_path base_export_strategy = export_strategy_lib.ExportStrategy( "Servo", _base_export_fn) @@ -758,9 +765,67 @@ class SavedModelExportUtilsTest(test.TestCase): self.assertEqual(final_export_strategy.name, "Servo2") test_estimator = TestEstimator() - final_path = final_export_strategy.export(test_estimator, "/path/to/orig", - "/path/to/checkpoint") - self.assertEqual("/path/to/orig/e1/rewrite", final_path) + tmpdir = tempfile.mkdtemp() + final_path = final_export_strategy.export(test_estimator, tmpdir, + os.path.join( + tmpdir, "checkpoint")) + self.assertEqual(os.path.join(tmpdir, "rewrite"), final_path) + + def test_extend_export_strategy_same_name(self): + + def _base_export_fn(unused_estimator, + export_dir_base, + unused_checkpoint_path=None): + base_path = os.path.join(export_dir_base, "e1") + gfile.MkDir(base_path) + return base_path + + def _post_export_fn(orig_path, new_path): + assert orig_path.endswith("/e1") + post_export_path = os.path.join(new_path, "rewrite") + gfile.MkDir(post_export_path) + return post_export_path + + base_export_strategy = export_strategy_lib.ExportStrategy( + "Servo", _base_export_fn) + + final_export_strategy = saved_model_export_utils.extend_export_strategy( + base_export_strategy, _post_export_fn) + self.assertEqual(final_export_strategy.name, "Servo") + + test_estimator = TestEstimator() + tmpdir = tempfile.mkdtemp() + final_path = final_export_strategy.export(test_estimator, tmpdir, + os.path.join( + tmpdir, "checkpoint")) + self.assertEqual(os.path.join(tmpdir, "rewrite"), final_path) + + def test_extend_export_strategy_raises_error(self): + + def _base_export_fn(unused_estimator, + export_dir_base, + unused_checkpoint_path=None): + base_path = os.path.join(export_dir_base, "e1") + gfile.MkDir(base_path) + return base_path + + def _post_export_fn(unused_orig_path, unused_new_path): + return tempfile.mkdtemp() + + base_export_strategy = export_strategy_lib.ExportStrategy( + "Servo", _base_export_fn) + + final_export_strategy = saved_model_export_utils.extend_export_strategy( + base_export_strategy, _post_export_fn) + + test_estimator = TestEstimator() + tmpdir = tempfile.mkdtemp() + with self.assertRaises(ValueError) as ve: + final_export_strategy.export(test_estimator, tmpdir, + os.path.join(tmpdir, "checkpoint")) + + self.assertTrue( + "post_export_fn must return a sub-directory" in str(ve.exception)) def _create_test_export_dir(export_dir_base): -- GitLab From 6d5793853cfdd27fe806bca4fad0f4e3c3a32b73 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Tue, 14 Nov 2017 12:53:47 -0800 Subject: [PATCH 0417/1801] The link fixer needs these to be all on one line. PiperOrigin-RevId: 175721637 --- tensorflow/docs_src/mobile/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/mobile/index.md b/tensorflow/docs_src/mobile/index.md index a10db74364..6bcd7d09d9 100644 --- a/tensorflow/docs_src/mobile/index.md +++ b/tensorflow/docs_src/mobile/index.md @@ -2,8 +2,8 @@ TensorFlow was designed to be a good deep learning solution for mobile platforms. Currently we have two solutions for deploying machine learning -applications on mobile and embedded devices: @{$mobile/mobile_intro$TensorFlow -for Mobile} and @{$mobile/tflite$TensorFlow Lite}. +applications on mobile and embedded devices: +@{$mobile/mobile_intro$TensorFlow for Mobile} and @{$mobile/tflite$TensorFlow Lite}. ## TensorFlow Lite versus TensorFlow Mobile -- GitLab From d045a51072eed09b3fcb990ccd3ad4872ce0ada3 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Tue, 14 Nov 2017 13:03:31 -0800 Subject: [PATCH 0418/1801] Enable prefetching on the resnet50 benchmark for eager. PiperOrigin-RevId: 175722984 --- tensorflow/contrib/eager/python/BUILD | 7 +- tensorflow/contrib/eager/python/datasets.py | 72 +++++++++++++++------ 2 files changed, 57 insertions(+), 22 deletions(-) diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 6783f7beb0..92746b866a 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -50,21 +50,22 @@ py_library( srcs_version = "PY2AND3", visibility = ["//tensorflow:internal"], deps = [ + "//tensorflow/contrib/data/python/ops:prefetching_py", "//tensorflow/python:array_ops", "//tensorflow/python:dataset_ops_gen", "//tensorflow/python:errors", "//tensorflow/python:framework_ops", "//tensorflow/python:resource_variable_ops", + "//tensorflow/python/data/ops:iterator_ops", "//tensorflow/python/data/util:nest", "//tensorflow/python/eager:context", ], ) -py_test( +cuda_py_test( name = "datasets_test", srcs = ["datasets_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":datasets", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", diff --git a/tensorflow/contrib/eager/python/datasets.py b/tensorflow/contrib/eager/python/datasets.py index 98e6983658..b559cce6b1 100644 --- a/tensorflow/contrib/eager/python/datasets.py +++ b/tensorflow/contrib/eager/python/datasets.py @@ -20,11 +20,15 @@ from __future__ import print_function import threading +from tensorflow.contrib.data.python.ops import prefetching_ops +from tensorflow.python.data.ops import iterator_ops from tensorflow.python.data.util import nest from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import function from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import resource_variable_ops @@ -32,12 +36,12 @@ _uid_counter = 0 _uid_lock = threading.Lock() -def _iterator_shared_name(): +def _generate_shared_name(prefix): with _uid_lock: global _uid_counter uid = _uid_counter _uid_counter += 1 - return "eager_iterator_{}".format(uid) + return "{}_{}".format(prefix, uid) class Iterator(object): @@ -72,11 +76,12 @@ class Iterator(object): with ops.device("/device:CPU:0"): ds_variant = dataset._as_variant_tensor() # pylint: disable=protected-access self._output_types = dataset.output_types + self._output_shapes = dataset.output_shapes self._flat_output_types = nest.flatten(dataset.output_types) self._flat_output_shapes = nest.flatten(dataset.output_shapes) self._resource = gen_dataset_ops.iterator( container="", - shared_name=_iterator_shared_name(), + shared_name=_generate_shared_name("eager_iterator"), output_types=self._flat_output_types, output_shapes=self._flat_output_shapes) gen_dataset_ops.make_iterator(ds_variant, self._resource) @@ -84,6 +89,35 @@ class Iterator(object): self._resource_deleter = resource_variable_ops.EagerResourceDeleter( handle=self._resource, handle_device="/device:CPU:0") self._device = context.context().device_name + self._buffer_resource_handle = None + if not context.context().device_spec.device_type: + is_remote_device = False + else: + is_remote_device = context.context().device_spec.device_type != "CPU" + if is_remote_device: + with ops.device("/device:CPU:0"): + iter_string_handle = gen_dataset_ops.iterator_to_string_handle( + self._resource) + + @function.Defun(dtypes.string) + def remote_fn(h): + remote_iterator = iterator_ops.Iterator.from_string_handle( + h, self._output_types, self._output_shapes) + return remote_iterator.get_next() + + remote_fn.add_to_graph(None) + target = constant_op.constant("/device:CPU:0") + with ops.device(self._device): + self._buffer_resource_handle = prefetching_ops.function_buffering_resource( + string_arg=iter_string_handle, + f=remote_fn, + target_device=target, + buffer_size=10, + thread_pool_size=1, + container="", + shared_name=_generate_shared_name("function_buffer_resource")) + self._buffer_resource_deleter = resource_variable_ops.EagerResourceDeleter( + handle=self._buffer_resource_handle, handle_device=self._device) def __iter__(self): return self @@ -93,20 +127,20 @@ class Iterator(object): def next(self): """Return the next tf.Tensor from the dataset.""" - try: - # TODO(ashankar): Consider removing this ops.device() contextmanager - # and instead mimic ops placement in graphs: Operations on resource - # handles execute on the same device as where the resource is placed. - with ops.device("/device:CPU:0"): - ret = gen_dataset_ops.iterator_get_next( - self._resource, - output_types=self._flat_output_types, - output_shapes=self._flat_output_shapes) - except errors.OutOfRangeError: - raise StopIteration - # Copies tensors from CPU to the current device if necessary. - # TODO(rohanj): This should be replaced by the mechanism to have the - # runtime's threads copy tensors to the destination device. with ops.device(self._device): - ret = [array_ops.identity(x) for x in ret] + try: + if self._buffer_resource_handle is not None: + ret = prefetching_ops.function_buffering_resource_get_next( + function_buffer_resource=self._buffer_resource_handle, + output_types=self._flat_output_types) + else: + # TODO(ashankar): Consider removing this ops.device() contextmanager + # and instead mimic ops placement in graphs: Operations on resource + # handles execute on the same device as where the resource is placed. + ret = gen_dataset_ops.iterator_get_next( + self._resource, + output_types=self._flat_output_types, + output_shapes=self._flat_output_shapes) + except errors.OutOfRangeError: + raise StopIteration return nest.pack_sequence_as(self._output_types, ret) -- GitLab From f4025a66715a027592d45c435f95cdbe467608f1 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 14 Nov 2017 13:09:50 -0800 Subject: [PATCH 0419/1801] Clear trace in C instead of python. PiperOrigin-RevId: 175723846 --- tensorflow/python/eager/pywrap_tensor.cc | 19 ++----------------- tensorflow/python/framework/ops.py | 5 ----- 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc index 653f3ef84e..91192fea62 100644 --- a/tensorflow/python/eager/pywrap_tensor.cc +++ b/tensorflow/python/eager/pywrap_tensor.cc @@ -330,24 +330,9 @@ void EagerTensor_dealloc(EagerTensor* self) { // We have the global interpreter lock, so use this chance to perform delayed // refcount decrements. tensorflow::ClearDecrefCache(); - PyObject* id = PyLong_FromLongLong(self->id); - PyObject* func = PyObject_GetAttrString(reinterpret_cast(self), - "_delete_trace"); + auto id = self->id; Py_TYPE(self)->tp_free(self); - self = nullptr; - // Note that we run `func` after calling `tp_free`. Otherwise calling that - // function can potentially trigger garbage collection that observes `self` - // in this half deleted state and crashes. - // Note that `func` is a staticmethod and does not need `self` to be around - // for running. - // We clear (and later restore) any errors that have already been set. Else - // these erorrs may appear randomly as part of the function execution. - PyObject *a, *b, *c; - PyErr_Fetch(&a, &b, &c); - PyObject_CallFunctionObjArgs(func, id, nullptr); - PyErr_Restore(a, b, c); - Py_DECREF(func); - Py_DECREF(id); + TFE_Py_TapeStackDeleteTrace(id); } // Getter for `_id`. diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index b0abbfc7dc..09e0a83c76 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -605,11 +605,6 @@ class Tensor(_TensorLike): class _EagerTensorBase(Tensor): """Base class for EagerTensor.""" - @staticmethod - def _delete_trace(tid): - """Helper function to be called by __del__ of the subclass.""" - tape.delete_trace(tid) - @property def dtype(self): # Note: using the intern table directly here as this is -- GitLab From 7ad948134fc7fa376c4cea909316561d6f98ef96 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 14 Nov 2017 13:12:58 -0800 Subject: [PATCH 0420/1801] Do not create dtype objects in the eager hot path for execution, use enums PiperOrigin-RevId: 175724282 --- tensorflow/python/eager/execute.py | 8 ++++---- tensorflow/python/eager/python_eager_op_gen.cc | 5 ----- tensorflow/python/framework/constant_op.py | 4 ++-- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/eager/execute.py b/tensorflow/python/eager/execute.py index 0316f33d7e..1b5f3f7f9d 100644 --- a/tensorflow/python/eager/execute.py +++ b/tensorflow/python/eager/execute.py @@ -172,7 +172,7 @@ def args_to_matching_eager(l, ctx, default_dtype=None): if not isinstance(x, EagerTensor): break else: # note: intentional for-else - return l[0].dtype, l + return l[0]._datatype_enum(), l # pylint: disable=protected-access # TODO(josh11b): Could we do a better job if we also passed in the # allowed dtypes when that was known? @@ -196,7 +196,7 @@ def args_to_matching_eager(l, ctx, default_dtype=None): else: ret = [internal_convert_to_tensor(t, dtype, ctx=ctx) for t in l] - return dtype, ret + return dtype.as_datatype_enum, ret def convert_to_mixed_eager_tensors(values, ctx): @@ -205,7 +205,7 @@ def convert_to_mixed_eager_tensors(values, ctx): t, context=ctx._handle, device=ctx.device_name) # pylint: disable=protected-access for t in values ] - types = [t.dtype for t in v] + types = [t._datatype_enum() for t in v] # pylint: disable=protected-access return types, v @@ -243,5 +243,5 @@ def args_to_mixed_eager_tensors(lists, ctx): for j in range(len(lists)): lists_ret[j].append( ops.internal_convert_to_tensor(lists[j][i], dtype=dtype, ctx=ctx)) - types.append(dtype) + types.append(dtype.as_datatype_enum) return types, lists_ret diff --git a/tensorflow/python/eager/python_eager_op_gen.cc b/tensorflow/python/eager/python_eager_op_gen.cc index e57488cb64..371df563bb 100644 --- a/tensorflow/python/eager/python_eager_op_gen.cc +++ b/tensorflow/python/eager/python_eager_op_gen.cc @@ -589,8 +589,6 @@ void GenEagerPythonOp::AddEagerInferredAttrs() { strings::StrAppend(&result_, " ", VectorToTuple(p), " = ", inputs_var, "\n"); } - strings::StrAppend(&result_, " ", var_name, " = ", var_name, - ".as_datatype_enum\n"); } else if (attr.type() == "list(type)") { // NOTE: We ignore default values for these attrs, since it is // unclear how you would use it, and the one use case is @@ -617,9 +615,6 @@ void GenEagerPythonOp::AddEagerInferredAttrs() { } strings::StrAppend(&result_, " ", var_name, ", ", inputs_var, " = ", conversion, "(", inputs_var, ", _ctx)\n"); - strings::StrAppend(&result_, " ", var_name, - " = [_t.as_datatype_enum for _t in ", var_name, - "]\n"); } } } diff --git a/tensorflow/python/framework/constant_op.py b/tensorflow/python/framework/constant_op.py index d51e142da1..bf3be34d85 100644 --- a/tensorflow/python/framework/constant_op.py +++ b/tensorflow/python/framework/constant_op.py @@ -55,10 +55,10 @@ from tensorflow.python.framework import tensor_util def _eager_reshape(tensor, shape, ctx): """Eager-only version of Reshape op; requires tensor is an eager Tensor.""" - attr_t = tensor.dtype.as_datatype_enum + attr_t = tensor._datatype_enum() # pylint: disable=protected-access attr_tshape, (shape,) = execute.args_to_matching_eager( [shape], ctx, dtypes.int32) - attr_tshape = attr_tshape.as_datatype_enum + attr_tshape = attr_tshape inputs_flat = [tensor, shape] attrs = ("T", attr_t, "Tshape", attr_tshape) result, = execute.execute( -- GitLab From 144eaa8e273da43b7ca881d7dcac98b65f698f11 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Tue, 14 Nov 2017 13:29:14 -0800 Subject: [PATCH 0421/1801] Update tf.keras Dataset, Engine and Layers to the Keras 2.0.9 API. PiperOrigin-RevId: 175726451 --- tensorflow/contrib/cmake/tf_python.cmake | 1 + tensorflow/python/keras/BUILD | 2 + .../python/keras/_impl/keras/callbacks.py | 15 ++-- .../keras/_impl/keras/callbacks_test.py | 12 ++-- .../keras/_impl/keras/datasets/__init__.py | 5 +- .../_impl/keras/datasets/boston_housing.py | 7 +- .../keras/_impl/keras/datasets/cifar10.py | 2 +- .../keras/_impl/keras/datasets/cifar100.py | 2 +- .../_impl/keras/datasets/fashion_mnist.py | 59 ++++++++++++++++ .../python/keras/_impl/keras/datasets/imdb.py | 29 ++++---- .../keras/_impl/keras/datasets/mnist.py | 4 +- .../keras/_impl/keras/datasets/reuters.py | 14 ++-- .../keras/_impl/keras/engine/topology.py | 25 +++++++ .../keras/_impl/keras/engine/training.py | 68 ++++++++++++++----- .../keras/_impl/keras/engine/training_test.py | 13 ++++ .../keras/_impl/keras/layers/convolutional.py | 58 ++++++++++------ .../keras/layers/convolutional_recurrent.py | 10 +-- .../python/keras/_impl/keras/layers/core.py | 29 ++++---- .../keras/_impl/keras/layers/core_test.py | 6 ++ .../python/keras/_impl/keras/layers/merge.py | 33 ++++++++- .../keras/_impl/keras/layers/merge_test.py | 14 ++++ .../keras/_impl/keras/layers/pooling.py | 4 +- .../keras/_impl/keras/layers/wrappers.py | 4 -- .../keras/_impl/keras/utils/io_utils.py | 2 +- .../keras/_impl/keras/utils/layer_utils.py | 40 +++++++++-- tensorflow/python/keras/datasets/__init__.py | 1 + .../keras/datasets/fashion_mnist/__init__.py | 0 tensorflow/python/layers/core.py | 12 +++- tensorflow/python/layers/core_test.py | 10 +++ .../api/golden/tensorflow.keras.-model.pbtxt | 2 +- ...sorflow.keras.datasets.fashion_mnist.pbtxt | 3 + .../golden/tensorflow.keras.datasets.pbtxt | 4 ++ ...orflow.keras.layers.-conv-l-s-t-m2-d.pbtxt | 2 +- ...flow.keras.layers.-separable-conv2-d.pbtxt | 2 +- ...ras.layers.-separable-convolution2-d.pbtxt | 2 +- ...rflow.keras.layers.-time-distributed.pbtxt | 4 -- .../tensorflow.keras.layers.-wrapper.pbtxt | 4 -- .../tensorflow.keras.models.-model.pbtxt | 2 +- 38 files changed, 382 insertions(+), 124 deletions(-) create mode 100644 tensorflow/python/keras/_impl/keras/datasets/fashion_mnist.py create mode 100644 tensorflow/python/keras/datasets/fashion_mnist/__init__.py create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.datasets.fashion_mnist.pbtxt diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 9517aa4963..9b863f7bc6 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -238,6 +238,7 @@ add_python_module("tensorflow/python/keras/datasets") add_python_module("tensorflow/python/keras/datasets/boston_housing") add_python_module("tensorflow/python/keras/datasets/cifar10") add_python_module("tensorflow/python/keras/datasets/cifar100") +add_python_module("tensorflow/python/keras/datasets/fashion_mnist") add_python_module("tensorflow/python/keras/datasets/imdb") add_python_module("tensorflow/python/keras/datasets/mnist") add_python_module("tensorflow/python/keras/datasets/reuters") diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index 55b5d7ff61..a46a92cd0c 100644 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -30,6 +30,7 @@ py_library( "_impl/keras/datasets/cifar.py", "_impl/keras/datasets/cifar10.py", "_impl/keras/datasets/cifar100.py", + "_impl/keras/datasets/fashion_mnist.py", "_impl/keras/datasets/imdb.py", "_impl/keras/datasets/mnist.py", "_impl/keras/datasets/reuters.py", @@ -89,6 +90,7 @@ py_library( "datasets/boston_housing/__init__.py", "datasets/cifar10/__init__.py", "datasets/cifar100/__init__.py", + "datasets/fashion_mnist/__init__.py", "datasets/imdb/__init__.py", "datasets/mnist/__init__.py", "datasets/reuters/__init__.py", diff --git a/tensorflow/python/keras/_impl/keras/callbacks.py b/tensorflow/python/keras/_impl/keras/callbacks.py index eb678c4d1d..40a996a03f 100644 --- a/tensorflow/python/keras/_impl/keras/callbacks.py +++ b/tensorflow/python/keras/_impl/keras/callbacks.py @@ -265,7 +265,7 @@ class ProgbarLogger(Callback): Arguments: count_mode: One of "steps" or "samples". Whether the progress bar should - count samples seens or steps (batches) seen. + count samples seen or steps (batches) seen. Raises: ValueError: In case of invalid `count_mode`. @@ -417,7 +417,7 @@ class ModelCheckpoint(Callback): self.epochs_since_last_save += 1 if self.epochs_since_last_save >= self.period: self.epochs_since_last_save = 0 - filepath = self.filepath.format(epoch=epoch, **logs) + filepath = self.filepath.format(epoch=epoch + 1, **logs) if self.save_best_only: current = logs.get(self.monitor) if current is None: @@ -427,7 +427,7 @@ class ModelCheckpoint(Callback): if self.monitor_op(current, self.best): if self.verbose > 0: print('Epoch %05d: %s improved from %0.5f to %0.5f,' - ' saving model to %s' % (epoch, self.monitor, self.best, + ' saving model to %s' % (epoch + 1, self.monitor, self.best, current, filepath)) self.best = current if self.save_weights_only: @@ -436,10 +436,11 @@ class ModelCheckpoint(Callback): self.model.save(filepath, overwrite=True) else: if self.verbose > 0: - print('Epoch %05d: %s did not improve' % (epoch, self.monitor)) + print('Epoch %05d: %s did not improve' % (epoch + 1, + self.monitor)) else: if self.verbose > 0: - print('Epoch %05d: saving model to %s' % (epoch, filepath)) + print('Epoch %05d: saving model to %s' % (epoch + 1, filepath)) if self.save_weights_only: self.model.save_weights(filepath, overwrite=True) else: @@ -519,14 +520,14 @@ class EarlyStopping(Callback): self.best = current self.wait = 0 else: + self.wait += 1 if self.wait >= self.patience: self.stopped_epoch = epoch self.model.stop_training = True - self.wait += 1 def on_train_end(self, logs=None): if self.stopped_epoch > 0 and self.verbose > 0: - print('Epoch %05d: early stopping' % (self.stopped_epoch)) + print('Epoch %05d: early stopping' % (self.stopped_epoch + 1)) class RemoteMonitor(Callback): diff --git a/tensorflow/python/keras/_impl/keras/callbacks_test.py b/tensorflow/python/keras/_impl/keras/callbacks_test.py index d9d7fb5a9f..9f578a0fab 100644 --- a/tensorflow/python/keras/_impl/keras/callbacks_test.py +++ b/tensorflow/python/keras/_impl/keras/callbacks_test.py @@ -203,12 +203,12 @@ class KerasCallbacksTest(test.TestCase): callbacks=cbks, epochs=4, verbose=1) - assert os.path.exists(filepath.format(epoch=1)) - assert os.path.exists(filepath.format(epoch=3)) - os.remove(filepath.format(epoch=1)) - os.remove(filepath.format(epoch=3)) - assert not os.path.exists(filepath.format(epoch=0)) - assert not os.path.exists(filepath.format(epoch=2)) + assert os.path.exists(filepath.format(epoch=2)) + assert os.path.exists(filepath.format(epoch=4)) + os.remove(filepath.format(epoch=2)) + os.remove(filepath.format(epoch=4)) + assert not os.path.exists(filepath.format(epoch=1)) + assert not os.path.exists(filepath.format(epoch=3)) # Invalid use: this will raise a warning but not an Exception. keras.callbacks.ModelCheckpoint( diff --git a/tensorflow/python/keras/_impl/keras/datasets/__init__.py b/tensorflow/python/keras/_impl/keras/datasets/__init__.py index 22afb6a553..60db3766fb 100644 --- a/tensorflow/python/keras/_impl/keras/datasets/__init__.py +++ b/tensorflow/python/keras/_impl/keras/datasets/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ # limitations under the License. # ============================================================================== """Keras datasets: utilities for downloading and pre-processing common datasets. + """ from __future__ import absolute_import from __future__ import division @@ -21,7 +22,7 @@ from __future__ import print_function from tensorflow.python.keras._impl.keras.datasets import boston_housing from tensorflow.python.keras._impl.keras.datasets import cifar10 from tensorflow.python.keras._impl.keras.datasets import cifar100 +from tensorflow.python.keras._impl.keras.datasets import fashion_mnist from tensorflow.python.keras._impl.keras.datasets import imdb from tensorflow.python.keras._impl.keras.datasets import mnist from tensorflow.python.keras._impl.keras.datasets import reuters - diff --git a/tensorflow/python/keras/_impl/keras/datasets/boston_housing.py b/tensorflow/python/keras/_impl/keras/datasets/boston_housing.py index e4f7fb9d21..4359be8928 100644 --- a/tensorflow/python/keras/_impl/keras/datasets/boston_housing.py +++ b/tensorflow/python/keras/_impl/keras/datasets/boston_housing.py @@ -48,9 +48,10 @@ def load_data(path='boston_housing.npz', seed=113, test_split=0.2): f.close() np.random.seed(seed) - np.random.shuffle(x) - np.random.seed(seed) - np.random.shuffle(y) + indices = np.arrange(len(x)) + np.random.shuffle(indices) + x = x[indices] + y = y[indices] x_train = np.array(x[:int(len(x) * (1 - test_split))]) y_train = np.array(y[:int(len(x) * (1 - test_split))]) diff --git a/tensorflow/python/keras/_impl/keras/datasets/cifar10.py b/tensorflow/python/keras/_impl/keras/datasets/cifar10.py index 672249ff20..4a68789015 100644 --- a/tensorflow/python/keras/_impl/keras/datasets/cifar10.py +++ b/tensorflow/python/keras/_impl/keras/datasets/cifar10.py @@ -34,7 +34,7 @@ def load_data(): Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ dirname = 'cifar-10-batches-py' - origin = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' + origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' path = get_file(dirname, origin=origin, untar=True) num_train_samples = 50000 diff --git a/tensorflow/python/keras/_impl/keras/datasets/cifar100.py b/tensorflow/python/keras/_impl/keras/datasets/cifar100.py index 1be7483d27..b69c0724c5 100644 --- a/tensorflow/python/keras/_impl/keras/datasets/cifar100.py +++ b/tensorflow/python/keras/_impl/keras/datasets/cifar100.py @@ -43,7 +43,7 @@ def load_data(label_mode='fine'): raise ValueError('label_mode must be one of "fine" "coarse".') dirname = 'cifar-100-python' - origin = 'http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' + origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' path = get_file(dirname, origin=origin, untar=True) fpath = os.path.join(path, 'train') diff --git a/tensorflow/python/keras/_impl/keras/datasets/fashion_mnist.py b/tensorflow/python/keras/_impl/keras/datasets/fashion_mnist.py new file mode 100644 index 0000000000..17be684e4f --- /dev/null +++ b/tensorflow/python/keras/_impl/keras/datasets/fashion_mnist.py @@ -0,0 +1,59 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Fashion-MNIST dataset. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import gzip +import os +import numpy as np +from tensorflow.python.keras._impl.keras.utils.data_utils import get_file + + +def load_data(): + """Loads the Fashion-MNIST dataset. + + Returns: + Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. + """ + dirname = os.path.join('datasets', 'fashion-mnist') + base = 'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/' + files = [ + 'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz', + 't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz' + ] + + paths = [] + for given_file in files: + paths.append( + get_file(given_file, origin=base + given_file, cache_subdir=dirname)) + + with gzip.open(paths[0], 'rb') as lbpath: + y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8) + + with gzip.open(paths[1], 'rb') as imgpath: + x_train = np.frombuffer( + imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28) + + with gzip.open(paths[2], 'rb') as lbpath: + y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8) + + with gzip.open(paths[3], 'rb') as imgpath: + x_test = np.frombuffer( + imgpath.read(), np.uint8, offset=16).reshape(len(y_test), 28, 28) + + return (x_train, y_train), (x_test, y_test) diff --git a/tensorflow/python/keras/_impl/keras/datasets/imdb.py b/tensorflow/python/keras/_impl/keras/datasets/imdb.py index 0db9d61f6d..0e83473899 100644 --- a/tensorflow/python/keras/_impl/keras/datasets/imdb.py +++ b/tensorflow/python/keras/_impl/keras/datasets/imdb.py @@ -1,4 +1,4 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -65,23 +65,24 @@ def load_data(path='imdb.npz', have simply been skipped. """ path = get_file( - path, origin='https://s3.amazonaws.com/text-datasets/imdb.npz') + path, + origin='https://s3.amazonaws.com/text-datasets/imdb.npz', + file_hash='599dadb1135973df5b59232a0e9a887c') f = np.load(path) - x_train = f['x_train'] - labels_train = f['y_train'] - x_test = f['x_test'] - labels_test = f['y_test'] + x_train, labels_train = f['x_train'], f['y_train'] + x_test, labels_test = f['x_test'], f['y_test'] f.close() np.random.seed(seed) - np.random.shuffle(x_train) - np.random.seed(seed) - np.random.shuffle(labels_train) - - np.random.seed(seed * 2) - np.random.shuffle(x_test) - np.random.seed(seed * 2) - np.random.shuffle(labels_test) + indices = np.arrange(len(x_train)) + np.random.shuffle(indices) + x_train = x_train[indices] + labels_train = labels_train[indices] + + indices = np.arrange(len(x_test)) + np.random.shuffle(indices) + x_test = x_test[indices] + labels_test = labels_test[indices] xs = np.concatenate([x_train, x_test]) labels = np.concatenate([labels_train, labels_test]) diff --git a/tensorflow/python/keras/_impl/keras/datasets/mnist.py b/tensorflow/python/keras/_impl/keras/datasets/mnist.py index 02be5e2a40..e98f29537f 100644 --- a/tensorflow/python/keras/_impl/keras/datasets/mnist.py +++ b/tensorflow/python/keras/_impl/keras/datasets/mnist.py @@ -34,7 +34,9 @@ def load_data(path='mnist.npz'): Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ path = get_file( - path, origin='https://s3.amazonaws.com/img-datasets/mnist.npz') + path, + origin='https://s3.amazonaws.com/img-datasets/mnist.npz', + file_hash='8a61469f7ea1b51cbae51d4f78837e45') f = np.load(path) x_train = f['x_train'] y_train = f['y_train'] diff --git a/tensorflow/python/keras/_impl/keras/datasets/reuters.py b/tensorflow/python/keras/_impl/keras/datasets/reuters.py index c36bac5cc7..d05eb0ef8c 100644 --- a/tensorflow/python/keras/_impl/keras/datasets/reuters.py +++ b/tensorflow/python/keras/_impl/keras/datasets/reuters.py @@ -64,15 +64,20 @@ def load_data(path='reuters.npz', have simply been skipped. """ path = get_file( - path, origin='https://s3.amazonaws.com/text-datasets/reuters.npz') + path, + origin='https://s3.amazonaws.com/text-datasets/reuters.npz', + file_hash='87aedbeb0cb229e378797a632c1997b6') npzfile = np.load(path) xs = npzfile['x'] labels = npzfile['y'] npzfile.close() np.random.seed(seed) - np.random.shuffle(xs) - np.random.seed(seed) + indices = np.arrange(len(xs)) + np.random.shuffle(indices) + xs = xs[indices] + labels = labels[indices] + np.random.shuffle(labels) if start_char is not None: @@ -129,7 +134,8 @@ def get_word_index(path='reuters_word_index.json'): """ path = get_file( path, - origin='https://s3.amazonaws.com/text-datasets/reuters_word_index.json') + origin='https://s3.amazonaws.com/text-datasets/reuters_word_index.json', + file_hash='4d44cc38712099c9e383dc6e5f11a921') f = open(path) data = json.load(f) f.close() diff --git a/tensorflow/python/keras/_impl/keras/engine/topology.py b/tensorflow/python/keras/_impl/keras/engine/topology.py index 1b7ddef9c4..814961bd1d 100644 --- a/tensorflow/python/keras/_impl/keras/engine/topology.py +++ b/tensorflow/python/keras/_impl/keras/engine/topology.py @@ -1422,6 +1422,31 @@ def preprocess_weights_for_loading(layer, weights[0] = np.transpose(weights[0], (3, 2, 0, 1)) if layer.__class__.__name__ == 'ConvLSTM2D': weights[1] = np.transpose(weights[1], (3, 2, 0, 1)) + + # convert the weights of CuDNNLSTM so that they could be loaded into LSTM + if layer.__class__.__name__ == 'LSTM': + # determine if we're loading a CuDNNLSTM layer from the number of bias + # weights: + # CuDNNLSTM has (units * 8) weights; while LSTM has (units * 4) + units = weights[1].shape[0] + bias = weights[2] + if len(bias) == units * 8: + # reshape the kernels + kernels = np.split(weights[0], 4, axis=1) + kernels = [ + kernel.reshape(-1).reshape(kernel.shape, order='F') + for kernel in kernels + ] + weights[0] = np.concatenate(kernels, axis=1) + + # transpose the recurrent kernels + recurrent_kernels = np.split(weights[1], 4, axis=1) + recurrent_kernels = [kernel.T for kernel in recurrent_kernels] + weights[1] = np.concatenate(recurrent_kernels, axis=1) + + # split the bias into half and merge + weights[2] = bias[:units * 4] + bias[units * 4:] + return weights diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index b1e48439ba..e6d29c4968 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -71,6 +71,9 @@ def _standardize_input_data(data, if data is None: return [None for _ in range(len(names))] if isinstance(data, dict): + for key, value in data.items(): + if value.__class__.__name__ == 'DataFrame': + data[key] = value.values arrays = [] for name in names: if name not in data: @@ -78,6 +81,9 @@ def _standardize_input_data(data, '". Need data for each key in: ' + str(names)) arrays.append(data[name]) elif isinstance(data, list): + for key, value in enumerate(data): + if value.__class__.__name__ == 'DataFrame': + data[key] = value.values if len(data) != len(names): if data and hasattr(data[0], 'shape'): raise ValueError( @@ -100,6 +106,9 @@ def _standardize_input_data(data, ' Numpy arrays instead. ' 'The list you passed was: ' + str(data)[:200]) arrays = data + elif data.__class__.__name__ == 'DataFrame': + # test if data is a DataFrame, without pandas installed + data = data.values else: if not hasattr(data, 'shape'): raise TypeError('Error when checking model ' + exception_prefix + @@ -367,7 +376,7 @@ def _make_batches(size, batch_size): """ num_batches = int(np.ceil(size / float(batch_size))) return [(i * batch_size, min(size, (i + 1) * batch_size)) - for i in range(0, num_batches)] + for i in range(num_batches)] def _slice_arrays(arrays, start=None, stop=None): @@ -627,6 +636,7 @@ class Model(Network): self.sample_weight_mode = sample_weight_mode self.loss = loss self.loss_weights = loss_weights + self.sample_weight_mode = sample_weight_mode # Prepare loss functions. if isinstance(loss, dict): @@ -936,9 +946,28 @@ class Model(Network): trainable_weights = self.trainable_weights self._collected_trainable_weights = trainable_weights + def _check_trainable_weights_consistency(self): + """Check trainable weights count consistency. + + This will raise a warning if `trainable_weights` and + `_collected_trainable_weights` are consistent (i.e. have the same + number of parameters). + Inconsistency will typically arise when one modifies `model.trainable` + without calling `model.compile` again. + """ + if not hasattr(self, '_collected_trainable_weights'): + return + + if len(self.trainable_weights) != len(self._collected_trainable_weights): + logging.warning( + 'Discrepancy between trainable weights and collected trainable' + ' weights, did you set `model.trainable` without calling' + ' `model.compile` after ?') + def _make_train_function(self): if not hasattr(self, 'train_function'): raise RuntimeError('You must compile your model before using it.') + self._check_trainable_weights_consistency() if self.train_function is None: inputs = (self._feed_inputs + self._feed_targets + @@ -1258,7 +1287,7 @@ class Model(Network): for i, batch_out in enumerate(batch_outs): unconcatenated_outs[i].append(batch_out) if verbose == 1: - progbar.update(step) + progbar.update(step + 1) if len(unconcatenated_outs) == 1: return np.concatenate(unconcatenated_outs[0], axis=0) return [ @@ -1313,9 +1342,13 @@ class Model(Network): """ num_samples = self._check_num_samples(ins, batch_size, steps, 'steps') outs = [] - if steps is not None: - if verbose == 1: + + if verbose == 1: + if steps is not None: progbar = Progbar(target=steps) + else: + progbar = Progbar(target=num_samples) + if steps is not None: for step in range(steps): batch_outs = f(ins) if isinstance(batch_outs, list): @@ -1329,7 +1362,7 @@ class Model(Network): outs.append(0.) outs[0] += batch_outs if verbose == 1: - progbar.update(step) + progbar.update(step + 1) for i in range(len(outs)): outs[i] /= steps else: @@ -1456,11 +1489,13 @@ class Model(Network): If all inputs in the model are named, you can also pass a dictionary mapping input names to Numpy arrays. + Can be `None` (default) if feeding from framework-native tensors. y: Numpy array of target data, or list of Numpy arrays if the model has multiple outputs. If all outputs in the model are named, you can also pass a dictionary mapping output names to Numpy arrays. + Can be `None` (default) if feeding from framework-native tensors. batch_size: Integer or `None`. Number of samples per gradient update. If unspecified, it will default to 32. @@ -1621,8 +1656,8 @@ class Model(Network): validation_steps=validation_steps) def evaluate(self, - x, - y, + x=None, + y=None, batch_size=None, verbose=1, sample_weight=None, @@ -1637,11 +1672,13 @@ class Model(Network): If all inputs in the model are named, you can also pass a dictionary mapping input names to Numpy arrays. + Can be `None` (default) if feeding from framework-native tensors. y: Numpy array of target data, or list of Numpy arrays if the model has multiple outputs. If all outputs in the model are named, you can also pass a dictionary mapping output names to Numpy arrays. + Can be `None` (default) if feeding from framework-native tensors. batch_size: Integer. If unspecified, it will default to 32. verbose: Verbosity mode, 0 or 1. sample_weight: Array of weights to weight the contribution @@ -1877,8 +1914,7 @@ class Model(Network): Arguments: generator: A generator or an instance of Sequence (keras.utils.Sequence) - object in order to avoid duplicate data - when using multiprocessing. + object in order to avoid duplicate data when using multiprocessing. The output of the generator must be either - a tuple (inputs, targets) - a tuple (inputs, targets, sample_weights). @@ -1889,7 +1925,7 @@ class Model(Network): steps_per_epoch: Total number of steps (batches of samples) to yield from `generator` before declaring one epoch finished and starting the next epoch. It should typically - be equal to the number of unique samples if your dataset + be equal to the number of unique samples of your dataset divided by the batch size. epochs: Integer, total number of iterations on the data. verbose: Verbosity mode, 0, 1, or 2. @@ -1913,9 +1949,9 @@ class Model(Network): non picklable arguments to the generator as they can't be passed easily to children processes. - shuffle: Whether to shuffle the data at the beginning of each - epoch. Only used with instances of `Sequence` ( - keras.utils.Sequence). + shuffle: Whether to shuffle the order of the batches at the + beginning of each epoch. Only used with instances + of `Sequence` (keras.utils.Sequence). initial_epoch: Epoch at which to start training (useful for resuming a previous training run) **kwargs: support for legacy arguments. @@ -1944,7 +1980,7 @@ class Model(Network): ValueError: In case the generator yields data in an invalid format. """ - # Legacy support + # Legacy support if 'max_q_size' in kwargs: max_queue_size = kwargs.pop('max_q_size') logging.warning('The argument `max_q_size` has been renamed ' @@ -2142,8 +2178,8 @@ class Model(Network): generator: Generator yielding tuples (inputs, targets) or (inputs, targets, sample_weights) or an instance of Sequence (keras.utils.Sequence) - object in order to avoid duplicate data - when using multiprocessing. + object in order to avoid duplicate data + when using multiprocessing. steps: Total number of steps (batches of samples) to yield from `generator` before stopping. max_queue_size: maximum size for the generator queue diff --git a/tensorflow/python/keras/_impl/keras/engine/training_test.py b/tensorflow/python/keras/_impl/keras/engine/training_test.py index bc9ad6693e..e2a06e8e77 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_test.py @@ -640,6 +640,19 @@ class LossMaskingTest(test.TestCase): class TestDynamicTrainability(test.TestCase): + def test_trainable_warning(self): + with self.test_session(): + x = np.random.random((5, 3)) + y = np.random.random((5, 2)) + + model = keras.models.Sequential() + model.add(keras.layers.Dense(2, input_dim=3)) + model.trainable = False + model.compile('rmsprop', 'mse') + model.trainable = True + model.train_on_batch(x, y) + self.assertRaises(Warning) + def test_trainable_argument(self): with self.test_session(): x = np.random.random((5, 3)) diff --git a/tensorflow/python/keras/_impl/keras/layers/convolutional.py b/tensorflow/python/keras/_impl/keras/layers/convolutional.py index ce96bc66f7..1cbae91263 100644 --- a/tensorflow/python/keras/_impl/keras/layers/convolutional.py +++ b/tensorflow/python/keras/_impl/keras/layers/convolutional.py @@ -793,6 +793,7 @@ class SeparableConv2D(tf_convolutional_layers.SeparableConv2D, Layer): strides=(1, 1), padding='valid', data_format=None, + dilation_rate=1, depth_multiplier=1, activation=None, use_bias=True, @@ -815,6 +816,7 @@ class SeparableConv2D(tf_convolutional_layers.SeparableConv2D, Layer): strides=strides, padding=padding, data_format=data_format, + dilation_rate=dilation_rate, activation=activations.get(activation), use_bias=use_bias, depthwise_initializer=initializers.get(depthwise_initializer), @@ -831,30 +833,42 @@ class SeparableConv2D(tf_convolutional_layers.SeparableConv2D, Layer): def get_config(self): config = { - 'filters': self.filters, - 'kernel_size': self.kernel_size, - 'strides': self.strides, - 'padding': self.padding, - 'data_format': self.data_format, - 'activation': activations.serialize(self.activation), - 'use_bias': self.use_bias, - 'depthwise_initializer': initializers.serialize( - self.depthwise_initializer), - 'pointwise_initializer': initializers.serialize( - self.pointwise_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'depthwise_regularizer': regularizers.serialize( - self.depthwise_regularizer), - 'pointwise_regularizer': regularizers.serialize( - self.pointwise_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), + 'filters': + self.filters, + 'kernel_size': + self.kernel_size, + 'strides': + self.strides, + 'padding': + self.padding, + 'data_format': + self.data_format, + 'dilation_rate': + self.dilation_rate, + 'activation': + activations.serialize(self.activation), + 'use_bias': + self.use_bias, + 'depthwise_initializer': + initializers.serialize(self.depthwise_initializer), + 'pointwise_initializer': + initializers.serialize(self.pointwise_initializer), + 'bias_initializer': + initializers.serialize(self.bias_initializer), + 'depthwise_regularizer': + regularizers.serialize(self.depthwise_regularizer), + 'pointwise_regularizer': + regularizers.serialize(self.pointwise_regularizer), + 'bias_regularizer': + regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), - 'depthwise_constraint': constraints.serialize( - self.depthwise_constraint), - 'pointwise_constraint': constraints.serialize( - self.pointwise_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint) + 'depthwise_constraint': + constraints.serialize(self.depthwise_constraint), + 'pointwise_constraint': + constraints.serialize(self.pointwise_constraint), + 'bias_constraint': + constraints.serialize(self.bias_constraint) } base_config = super(SeparableConv2D, self).get_config() return dict(list(base_config.items()) + list(config.items())) diff --git a/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py b/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py index 2335bd4df0..c88122ce18 100644 --- a/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py +++ b/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py @@ -536,7 +536,7 @@ class ConvLSTM2D(ConvRecurrent2D): conv_out = K.bias_add(conv_out, b, data_format=self.data_format) return conv_out - def reccurent_conv(self, x, w): + def recurrent_conv(self, x, w): conv_out = K.conv2d( x, w, strides=(1, 1), padding='same', data_format=self.data_format) return conv_out @@ -556,10 +556,10 @@ class ConvLSTM2D(ConvRecurrent2D): inputs * dp_mask[2], self.kernel_c, self.bias_c, padding=self.padding) x_o = self.input_conv( inputs * dp_mask[3], self.kernel_o, self.bias_o, padding=self.padding) - h_i = self.reccurent_conv(h_tm1 * rec_dp_mask[0], self.recurrent_kernel_i) - h_f = self.reccurent_conv(h_tm1 * rec_dp_mask[1], self.recurrent_kernel_f) - h_c = self.reccurent_conv(h_tm1 * rec_dp_mask[2], self.recurrent_kernel_c) - h_o = self.reccurent_conv(h_tm1 * rec_dp_mask[3], self.recurrent_kernel_o) + h_i = self.recurrent_conv(h_tm1 * rec_dp_mask[0], self.recurrent_kernel_i) + h_f = self.recurrent_conv(h_tm1 * rec_dp_mask[1], self.recurrent_kernel_f) + h_c = self.recurrent_conv(h_tm1 * rec_dp_mask[2], self.recurrent_kernel_c) + h_o = self.recurrent_conv(h_tm1 * rec_dp_mask[3], self.recurrent_kernel_o) i = self.recurrent_activation(x_i + h_i) f = self.recurrent_activation(x_f + h_f) diff --git a/tensorflow/python/keras/_impl/keras/layers/core.py b/tensorflow/python/keras/_impl/keras/layers/core.py index b2e0e7b8ee..517129fab0 100644 --- a/tensorflow/python/keras/_impl/keras/layers/core.py +++ b/tensorflow/python/keras/_impl/keras/layers/core.py @@ -52,7 +52,7 @@ class Masking(Layer): Example: Consider a Numpy data array `x` of shape `(samples, timesteps, features)`, - to be fed to a LSTM layer. + to be fed to an LSTM layer. You want to mask timestep #3 and #5 because you lack data for these timesteps. You can: @@ -121,7 +121,11 @@ class Dropout(tf_core_layers.Dropout, Layer): return output def get_config(self): - config = {'rate': self.rate} + config = { + 'rate': self.rate, + 'noise_shape': self.noise_shape, + 'seed': self.seed + } base_config = super(Dropout, self).get_config() return dict(list(base_config.items()) + list(config.items())) @@ -383,20 +387,18 @@ class Reshape(Layer): def _compute_output_shape(self, input_shape): input_shape = tensor_shape.TensorShape(input_shape).as_list() - output_shape = [input_shape[0]] - output_shape += self._fix_unknown_dimension(input_shape[1:], - self.target_shape) + if None in input_shape[1:]: + output_shape = [input_shape[0]] + # input shape (partially) unknown? replace -1's with None's + output_shape += tuple(s if s != -1 else None for s in self.target_shape) + else: + output_shape = [input_shape[0]] + output_shape += self._fix_unknown_dimension(input_shape[1:], + self.target_shape) return tensor_shape.TensorShape(output_shape) def call(self, inputs): - # In case the target shape is not fully defined, - # we need access to the shape of x. - target_shape = self.target_shape - if -1 in target_shape: - # target shape not fully defined - target_shape = self._compute_output_shape(inputs.get_shape()) - target_shape = target_shape.as_list()[1:] - return K.reshape(inputs, (-1,) + tuple(target_shape)) + return K.reshape(inputs, (K.shape(inputs)[0],) + self.target_shape) def get_config(self): config = {'target_shape': self.target_shape} @@ -595,6 +597,7 @@ class Lambda(Layer): @classmethod def from_config(cls, config, custom_objects=None): + config = config.copy() globs = globals() if custom_objects: globs = dict(list(globs.items()) + list(custom_objects.items())) diff --git a/tensorflow/python/keras/_impl/keras/layers/core_test.py b/tensorflow/python/keras/_impl/keras/layers/core_test.py index 9cdebd375c..dd768dc268 100644 --- a/tensorflow/python/keras/_impl/keras/layers/core_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/core_test.py @@ -111,6 +111,12 @@ class CoreLayersTest(test.TestCase): kwargs={'target_shape': (1, -1)}, input_shape=(3, 2, 4)) + with self.test_session(): + testing_utils.layer_test( + keras.layers.Reshape, + kwargs={'target_shape': (-1, 1)}, + input_shape=(None, None, 2)) + def test_permute(self): with self.test_session(): testing_utils.layer_test( diff --git a/tensorflow/python/keras/_impl/keras/layers/merge.py b/tensorflow/python/keras/_impl/keras/layers/merge.py index 84b65d87c2..5f26ce44e3 100644 --- a/tensorflow/python/keras/_impl/keras/layers/merge.py +++ b/tensorflow/python/keras/_impl/keras/layers/merge.py @@ -299,6 +299,21 @@ class Maximum(_Merge): return output +class Minimum(_Merge): + """Layer that computes the minimum (element-wise) a list of inputs. + + It takes as input a list of tensors, + all of the same shape, and returns + a single tensor (also of the same shape). + """ + + def _merge_function(self, inputs): + output = inputs[0] + for i in range(1, len(inputs)): + output = K.minimum(output, inputs[i]) + return output + + class Concatenate(_Merge): """Layer that concatenates a list of inputs. @@ -375,9 +390,8 @@ class Concatenate(_Merge): masks = [] for input_i, mask_i in zip(inputs, mask): if mask_i is None: - # Input is unmasked. Append all 1s to masks, - # but cast it to bool first - masks.append(K.cast(K.ones_like(input_i), 'bool')) + # Input is unmasked. Append all 1s to masks + masks.append(K.ones_like(input_i, dtype='bool')) elif K.ndim(mask_i) < K.ndim(input_i): # Mask is smaller than the input, expand it masks.append(K.expand_dims(mask_i)) @@ -584,6 +598,19 @@ def maximum(inputs, **kwargs): return Maximum(**kwargs)(inputs) +def minimum(inputs, **kwargs): + """Functional interface to the `Minimum` layer. + + Arguments: + inputs: A list of input tensors (at least 2). + **kwargs: Standard layer keyword arguments. + + Returns: + A tensor, the element-wise minimum of the inputs. + """ + return Minimum(**kwargs)(inputs) + + def concatenate(inputs, axis=-1, **kwargs): """Functional interface to the `Concatenate` layer. diff --git a/tensorflow/python/keras/_impl/keras/layers/merge_test.py b/tensorflow/python/keras/_impl/keras/layers/merge_test.py index a574658279..1f34c367e4 100644 --- a/tensorflow/python/keras/_impl/keras/layers/merge_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/merge_test.py @@ -116,6 +116,20 @@ class MergeLayersTest(test.TestCase): self.assertEqual(out.shape, (2, 4, 5)) self.assertAllClose(out, np.maximum(x1, x2), atol=1e-4) + def test_merge_minimum(self): + with self.test_session(): + i1 = keras.layers.Input(shape=(4, 5)) + i2 = keras.layers.Input(shape=(4, 5)) + o = keras.layers.minimum([i1, i2]) + self.assertListEqual(o.get_shape().as_list(), [None, 4, 5]) + model = keras.models.Model([i1, i2], o) + + x1 = np.random.random((2, 4, 5)) + x2 = np.random.random((2, 4, 5)) + out = model.predict([x1, x2]) + self.assertEqual(out.shape, (2, 4, 5)) + self.assertAllClose(out, np.minimum(x1, x2), atol=1e-4) + def test_merge_concatenate(self): with self.test_session(): i1 = keras.layers.Input(shape=(4, 5)) diff --git a/tensorflow/python/keras/_impl/keras/layers/pooling.py b/tensorflow/python/keras/_impl/keras/layers/pooling.py index e773e39679..afe4ebfdc5 100644 --- a/tensorflow/python/keras/_impl/keras/layers/pooling.py +++ b/tensorflow/python/keras/_impl/keras/layers/pooling.py @@ -367,7 +367,7 @@ class GlobalAveragePooling1D(_GlobalPooling1D): Output shape: 2D tensor with shape: - `(batch_size, channels)` + `(batch_size, features)` """ def call(self, inputs): @@ -382,7 +382,7 @@ class GlobalMaxPooling1D(_GlobalPooling1D): Output shape: 2D tensor with shape: - `(batch_size, channels)` + `(batch_size, features)` """ def call(self, inputs): diff --git a/tensorflow/python/keras/_impl/keras/layers/wrappers.py b/tensorflow/python/keras/_impl/keras/layers/wrappers.py index a0cca9dc2f..6f786b7850 100644 --- a/tensorflow/python/keras/_impl/keras/layers/wrappers.py +++ b/tensorflow/python/keras/_impl/keras/layers/wrappers.py @@ -97,10 +97,6 @@ class Wrapper(Layer): return losses + super(Wrapper, self).get_losses_for(None) return super(Wrapper, self).get_losses_for(inputs) - @property - def constraints(self): - return self.layer.constraints - def get_weights(self): return self.layer.get_weights() diff --git a/tensorflow/python/keras/_impl/keras/utils/io_utils.py b/tensorflow/python/keras/_impl/keras/utils/io_utils.py index 5f2ba99be7..1c8299c27d 100644 --- a/tensorflow/python/keras/_impl/keras/utils/io_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/io_utils.py @@ -84,7 +84,7 @@ class HDF5Matrix(object): if start is None: start = 0 if stop is None: - stop = self.data.shape[0] + stop = self.shape[0] if stop + self.start <= self.end: idx = slice(start + self.start, stop + self.start) else: diff --git a/tensorflow/python/keras/_impl/keras/utils/layer_utils.py b/tensorflow/python/keras/_impl/keras/utils/layer_utils.py index 86c0264355..053c0600a3 100644 --- a/tensorflow/python/keras/_impl/keras/utils/layer_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/layer_utils.py @@ -24,6 +24,18 @@ from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras.utils.conv_utils import convert_kernel +def count_params(weights): + """Count the total number of scalars composing the weights. + + Arguments: + weights: An iterable containing the weights on which to compute params + + Returns: + The total number of scalars composing the weights + """ + return int(np.sum([K.count_params(p) for p in set(weights)])) + + def print_summary(model, line_length=None, positions=None, print_fn=None): """Prints a summary of a model. @@ -46,12 +58,28 @@ def print_summary(model, line_length=None, positions=None, print_fn=None): sequential_like = True else: sequential_like = True - for v in model._nodes_by_depth.values(): # pylint: disable=protected-access + nodes_by_depth = model._nodes_by_depth.values() # pylint: disable=protected-access + nodes = [] + for v in nodes_by_depth: if (len(v) > 1) or (len(v) == 1 and len(v[0].inbound_layers) > 1): # If the model has multiple nodes or if the nodes have # multiple inbound_layers, the model is no longer sequential. sequential_like = False break + nodes += v + if sequential_like: + # search for shared layers + for layer in model.layers: + flag = False + for node in layer.inbound_nodes: + if node in nodes: + if flag: + sequential_like = False + break + else: + flag = True + if not sequential_like: + break if sequential_like: line_length = line_length or 65 @@ -61,7 +89,7 @@ def print_summary(model, line_length=None, positions=None, print_fn=None): # header names for the different log elements to_display = ['Layer (type)', 'Output Shape', 'Param #'] else: - line_length = line_length or 100 + line_length = line_length or 98 positions = positions or [.33, .55, .67, 1.] if positions[-1] <= 1: positions = [int(line_length * p) for p in positions] @@ -144,8 +172,12 @@ def print_summary(model, line_length=None, positions=None, print_fn=None): else: print_fn('_' * line_length) - trainable_count = int( - np.sum([K.count_params(p) for p in set(model.trainable_weights)])) + model._check_trainable_weights_consistency() # pylint: disable=protected-access + if hasattr(model, '_collected_trainable_weights'): + trainable_count = count_params(model._collected_trainable_weights) # pylint: disable=protected-access + else: + trainable_count = count_params(model.trainable_weights) + non_trainable_count = int( np.sum([K.count_params(p) for p in set(model.non_trainable_weights)])) diff --git a/tensorflow/python/keras/datasets/__init__.py b/tensorflow/python/keras/datasets/__init__.py index b76f278964..69e10bd63c 100644 --- a/tensorflow/python/keras/datasets/__init__.py +++ b/tensorflow/python/keras/datasets/__init__.py @@ -21,6 +21,7 @@ from __future__ import print_function from tensorflow.python.keras.datasets import boston_housing from tensorflow.python.keras.datasets import cifar10 from tensorflow.python.keras.datasets import cifar100 +from tensorflow.python.keras.datasets import fashion_mnist from tensorflow.python.keras.datasets import imdb from tensorflow.python.keras.datasets import mnist from tensorflow.python.keras.datasets import reuters diff --git a/tensorflow/python/keras/datasets/fashion_mnist/__init__.py b/tensorflow/python/keras/datasets/fashion_mnist/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tensorflow/python/layers/core.py b/tensorflow/python/layers/core.py index 76e8fbef2f..7be1fa5cfe 100644 --- a/tensorflow/python/layers/core.py +++ b/tensorflow/python/layers/core.py @@ -286,11 +286,19 @@ class Dropout(base.Layer): self.noise_shape = noise_shape self.seed = seed - def _get_noise_shape(self, _): + def _get_noise_shape(self, inputs): # Subclasses of `Dropout` may implement `_get_noise_shape(self, inputs)`, # which will override `self.noise_shape`, and allows for custom noise # shapes with dynamically sized inputs. - return self.noise_shape + if self.noise_shape is None: + return self.noise_shape + + symbolic_shape = array_ops.shape(inputs) + noise_shape = [ + symbolic_shape[axis] if shape is None else shape + for axis, shape in enumerate(self.noise_shape) + ] + return noise_shape def call(self, inputs, training=False): diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py index b67df89f81..2d47cc6979 100644 --- a/tensorflow/python/layers/core_test.py +++ b/tensorflow/python/layers/core_test.py @@ -387,6 +387,16 @@ class DropoutTest(test.TestCase): self.assertAllClose(np.ones((5, 5)), np_output) @test_util.run_in_graph_and_eager_modes() + def testDynamicNoiseShape(self): + inputs = array_ops.ones((5, 3, 2)) + noise_shape = [None, 1, None] + dp = core_layers.Dropout(0.5, noise_shape=noise_shape, seed=1) + dropped = dp.apply(inputs, training=True) + self.evaluate(variables.global_variables_initializer()) + np_output = self.evaluate(dropped) + self.assertAlmostEqual(0., np_output.min()) + self.assertAllClose(np_output[:, 0, :], np_output[:, 1, :]) + def testCustomNoiseShape(self): inputs = array_ops.ones((5, 3, 2)) noise_shape = [5, 1, 2] diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt index b6f9eea2de..64352508b5 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt @@ -152,7 +152,7 @@ tf_class { } member_method { name: "evaluate" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\'], " } member_method { name: "evaluate_generator" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.datasets.fashion_mnist.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.datasets.fashion_mnist.pbtxt new file mode 100644 index 0000000000..791cfda233 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.datasets.fashion_mnist.pbtxt @@ -0,0 +1,3 @@ +path: "tensorflow.keras.datasets.fashion_mnist" +tf_module { +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.datasets.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.datasets.pbtxt index d4aa436f32..36e3aafbe4 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.datasets.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.datasets.pbtxt @@ -12,6 +12,10 @@ tf_module { name: "cifar100" mtype: "" } + member { + name: "fashion_mnist" + mtype: "" + } member { name: "imdb" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt index a0906e62cf..8c2b110c6d 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt @@ -191,7 +191,7 @@ tf_class { argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { - name: "reccurent_conv" + name: "recurrent_conv" argspec: "args=[\'self\', \'x\', \'w\'], varargs=None, keywords=None, defaults=None" } member_method { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt index 7867e3c1fd..f289664ba2 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt @@ -93,7 +93,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'depth_multiplier\', \'activation\', \'use_bias\', \'depthwise_initializer\', \'pointwise_initializer\', \'bias_initializer\', \'depthwise_regularizer\', \'pointwise_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'depthwise_constraint\', \'pointwise_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'1\', \'None\', \'True\', \'glorot_uniform\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'dilation_rate\', \'depth_multiplier\', \'activation\', \'use_bias\', \'depthwise_initializer\', \'pointwise_initializer\', \'bias_initializer\', \'depthwise_regularizer\', \'pointwise_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'depthwise_constraint\', \'pointwise_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'1\', \'1\', \'None\', \'True\', \'glorot_uniform\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt index 0fb6e84f8d..d788728612 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt @@ -93,7 +93,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'depth_multiplier\', \'activation\', \'use_bias\', \'depthwise_initializer\', \'pointwise_initializer\', \'bias_initializer\', \'depthwise_regularizer\', \'pointwise_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'depthwise_constraint\', \'pointwise_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'1\', \'None\', \'True\', \'glorot_uniform\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'dilation_rate\', \'depth_multiplier\', \'activation\', \'use_bias\', \'depthwise_initializer\', \'pointwise_initializer\', \'bias_initializer\', \'depthwise_regularizer\', \'pointwise_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'depthwise_constraint\', \'pointwise_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'1\', \'1\', \'None\', \'True\', \'glorot_uniform\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "add_loss" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt index 34c9efb3ca..dedef65ff9 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt @@ -9,10 +9,6 @@ tf_class { name: "activity_regularizer" mtype: "" } - member { - name: "constraints" - mtype: "" - } member { name: "dtype" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt index 9cee68874a..313b3a9e15 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt @@ -8,10 +8,6 @@ tf_class { name: "activity_regularizer" mtype: "" } - member { - name: "constraints" - mtype: "" - } member { name: "dtype" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt index af9a44086f..8916925b3b 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt @@ -152,7 +152,7 @@ tf_class { } member_method { name: "evaluate" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\'], " } member_method { name: "evaluate_generator" -- GitLab From 29d84f18369cfe08beae97cff0aa8bde601b4cfc Mon Sep 17 00:00:00 2001 From: Neal Wu Date: Tue, 14 Nov 2017 13:43:50 -0800 Subject: [PATCH 0422/1801] Remove wide_n_deep_tutorial.py in tensorflow/examples/learn in favor of wide_deep.py in the TensorFlow official models PiperOrigin-RevId: 175728483 --- tensorflow/docs_src/tutorials/linear.md | 2 +- tensorflow/examples/learn/BUILD | 8 - tensorflow/examples/learn/README.md | 2 +- tensorflow/examples/learn/examples_test.sh | 1 - .../examples/learn/wide_n_deep_tutorial.py | 252 ------------------ 5 files changed, 2 insertions(+), 263 deletions(-) delete mode 100644 tensorflow/examples/learn/wide_n_deep_tutorial.py diff --git a/tensorflow/docs_src/tutorials/linear.md b/tensorflow/docs_src/tutorials/linear.md index a6517549c3..d333d01279 100644 --- a/tensorflow/docs_src/tutorials/linear.md +++ b/tensorflow/docs_src/tutorials/linear.md @@ -175,7 +175,7 @@ the name of a `FeatureColumn`. Each key's value is a tensor containing the values of that feature for all data instances. See @{$input_fn$Building Input Functions with tf.estimator} for a more comprehensive look at input functions, and `input_fn` in the -[linear models tutorial code](https://www.tensorflow.org/code/tensorflow/examples/learn/wide_n_deep_tutorial.py) +[linear models tutorial code](https://github.com/tensorflow/models/tree/master/official/wide_deep/wide_deep.py) for an example implementation of an input function. The input function is passed to the `train()` and `evaluate()` calls that diff --git a/tensorflow/examples/learn/BUILD b/tensorflow/examples/learn/BUILD index 23a42a60ba..aba7f600b5 100644 --- a/tensorflow/examples/learn/BUILD +++ b/tensorflow/examples/learn/BUILD @@ -113,13 +113,6 @@ py_binary( ], ) -py_binary( - name = "wide_n_deep_tutorial", - srcs = ["wide_n_deep_tutorial.py"], - srcs_version = "PY2AND3", - deps = ["//tensorflow:tensorflow_py"], -) - py_binary( name = "mnist", srcs = ["mnist.py"], @@ -153,7 +146,6 @@ sh_test( ":text_classification_character_cnn", ":text_classification_character_rnn", ":text_classification_cnn", - ":wide_n_deep_tutorial", ], tags = [ "manual", diff --git a/tensorflow/examples/learn/README.md b/tensorflow/examples/learn/README.md index 70d9db85ee..b74a8f39d9 100644 --- a/tensorflow/examples/learn/README.md +++ b/tensorflow/examples/learn/README.md @@ -23,7 +23,7 @@ processing (`pip install -U pandas`). ## Specialized Models * [Building a Random Forest Model](https://www.tensorflow.org/code/tensorflow/examples/learn/random_forest_mnist.py) -* [Building a Wide & Deep Model](https://www.tensorflow.org/code/tensorflow/examples/learn/wide_n_deep_tutorial.py) +* [Building a Wide & Deep Model](https://github.com/tensorflow/models/tree/master/official/wide_deep/wide_deep.py) * [Building a Residual Network Model](https://www.tensorflow.org/code/tensorflow/examples/learn/resnet.py) ## Text classification diff --git a/tensorflow/examples/learn/examples_test.sh b/tensorflow/examples/learn/examples_test.sh index b8763de471..ef5e8a5de2 100755 --- a/tensorflow/examples/learn/examples_test.sh +++ b/tensorflow/examples/learn/examples_test.sh @@ -56,4 +56,3 @@ test text_classification_builtin_rnn_model --test_with_fake_data test text_classification_character_cnn --test_with_fake_data test text_classification_character_rnn --test_with_fake_data test text_classification_cnn --test_with_fake_data -test wide_n_deep_tutorial diff --git a/tensorflow/examples/learn/wide_n_deep_tutorial.py b/tensorflow/examples/learn/wide_n_deep_tutorial.py deleted file mode 100644 index 072353392a..0000000000 --- a/tensorflow/examples/learn/wide_n_deep_tutorial.py +++ /dev/null @@ -1,252 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Example code for TensorFlow Wide & Deep Tutorial using TF High Level API. - -This example uses APIs in Tensorflow 1.4 or above. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import shutil -import sys -import tempfile - -import pandas as pd -from six.moves import urllib -import tensorflow as tf - - -CSV_COLUMNS = [ - "age", "workclass", "fnlwgt", "education", "education_num", - "marital_status", "occupation", "relationship", "race", "gender", - "capital_gain", "capital_loss", "hours_per_week", "native_country", - "income_bracket" -] - -gender = tf.feature_column.categorical_column_with_vocabulary_list( - "gender", ["Female", "Male"]) -education = tf.feature_column.categorical_column_with_vocabulary_list( - "education", [ - "Bachelors", "HS-grad", "11th", "Masters", "9th", - "Some-college", "Assoc-acdm", "Assoc-voc", "7th-8th", - "Doctorate", "Prof-school", "5th-6th", "10th", "1st-4th", - "Preschool", "12th" - ]) -marital_status = tf.feature_column.categorical_column_with_vocabulary_list( - "marital_status", [ - "Married-civ-spouse", "Divorced", "Married-spouse-absent", - "Never-married", "Separated", "Married-AF-spouse", "Widowed" - ]) -relationship = tf.feature_column.categorical_column_with_vocabulary_list( - "relationship", [ - "Husband", "Not-in-family", "Wife", "Own-child", "Unmarried", - "Other-relative" - ]) -workclass = tf.feature_column.categorical_column_with_vocabulary_list( - "workclass", [ - "Self-emp-not-inc", "Private", "State-gov", "Federal-gov", - "Local-gov", "?", "Self-emp-inc", "Without-pay", "Never-worked" - ]) - -# To show an example of hashing: -occupation = tf.feature_column.categorical_column_with_hash_bucket( - "occupation", hash_bucket_size=1000) -native_country = tf.feature_column.categorical_column_with_hash_bucket( - "native_country", hash_bucket_size=1000) - -# Continuous base columns. -age = tf.feature_column.numeric_column("age") -education_num = tf.feature_column.numeric_column("education_num") -capital_gain = tf.feature_column.numeric_column("capital_gain") -capital_loss = tf.feature_column.numeric_column("capital_loss") -hours_per_week = tf.feature_column.numeric_column("hours_per_week") - -# Transformations. -age_buckets = tf.feature_column.bucketized_column( - age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]) - -# Wide columns and deep columns. -base_columns = [ - gender, education, marital_status, relationship, workclass, occupation, - native_country, age_buckets, -] - -crossed_columns = [ - tf.feature_column.crossed_column( - ["education", "occupation"], hash_bucket_size=1000), - tf.feature_column.crossed_column( - [age_buckets, "education", "occupation"], hash_bucket_size=1000), - tf.feature_column.crossed_column( - ["native_country", "occupation"], hash_bucket_size=1000) -] - -deep_columns = [ - tf.feature_column.indicator_column(workclass), - tf.feature_column.indicator_column(education), - tf.feature_column.indicator_column(gender), - tf.feature_column.indicator_column(relationship), - # To show an example of embedding - tf.feature_column.embedding_column(native_country, dimension=8), - tf.feature_column.embedding_column(occupation, dimension=8), - age, - education_num, - capital_gain, - capital_loss, - hours_per_week, -] - - -FLAGS = None - - -def maybe_download(train_data, test_data): - """Maybe downloads training data and returns train and test file names.""" - if train_data: - train_file_name = train_data - else: - train_file = tempfile.NamedTemporaryFile(delete=False) - urllib.request.urlretrieve( - "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data", - train_file.name) # pylint: disable=line-too-long - train_file_name = train_file.name - train_file.close() - print("Training data is downloaded to %s" % train_file_name) - - if test_data: - test_file_name = test_data - else: - test_file = tempfile.NamedTemporaryFile(delete=False) - urllib.request.urlretrieve( - "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test", - test_file.name) # pylint: disable=line-too-long - test_file_name = test_file.name - test_file.close() - print("Test data is downloaded to %s"% test_file_name) - - return train_file_name, test_file_name - - -def build_estimator(model_dir, model_type): - """Build an estimator.""" - if model_type == "wide": - m = tf.estimator.LinearClassifier( - model_dir=model_dir, feature_columns=base_columns + crossed_columns) - elif model_type == "deep": - m = tf.estimator.DNNClassifier( - model_dir=model_dir, - feature_columns=deep_columns, - hidden_units=[100, 50]) - else: - m = tf.estimator.DNNLinearCombinedClassifier( - model_dir=model_dir, - linear_feature_columns=crossed_columns, - dnn_feature_columns=deep_columns, - dnn_hidden_units=[100, 50]) - return m - - -def input_fn(data_file, num_epochs, shuffle): - """Returns an `input_fn` required by Estimator train/evaluate. - - Args: - data_file: The file path to the dataset. - num_epochs: Number of epochs to iterate over data. If `None`, `input_fn` - will generate infinite stream of data. - shuffle: bool, whether to read the data in random order. - """ - df_data = pd.read_csv( - tf.gfile.Open(data_file), - names=CSV_COLUMNS, - skipinitialspace=True, - engine="python", - skiprows=1) - # remove NaN elements - df_data = df_data.dropna(how="any", axis=0) - labels = df_data["income_bracket"].apply(lambda x: ">50K" in x).astype(int) - - return tf.estimator.inputs.pandas_input_fn( - x=df_data, - y=labels, - batch_size=100, - num_epochs=num_epochs, - shuffle=shuffle, - num_threads=1) - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - - train_file_name, test_file_name = maybe_download(FLAGS.train_data, - FLAGS.test_data) - - # Specify file path below if want to find the output easily - model_dir = FLAGS.model_dir if FLAGS.model_dir else tempfile.mkdtemp() - - estimator = build_estimator(model_dir, FLAGS.model_type) - - # `tf.estimator.TrainSpec`, `tf.estimator.EvalSpec`, and - # `tf.estimator.train_and_evaluate` API are available in TF 1.4. - train_spec = tf.estimator.TrainSpec( - input_fn=input_fn(train_file_name, num_epochs=None, shuffle=True), - max_steps=FLAGS.train_steps) - - eval_spec = tf.estimator.EvalSpec( - input_fn=input_fn(test_file_name, num_epochs=1, shuffle=False), - # set steps to None to run evaluation until all data consumed. - steps=None) - - tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) - - # Manual cleanup - shutil.rmtree(model_dir) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.register("type", "bool", lambda v: v.lower() == "true") - parser.add_argument( - "--model_dir", - type=str, - default="", - help="Base directory for output models." - ) - parser.add_argument( - "--model_type", - type=str, - default="wide_n_deep", - help="Valid model types: {'wide', 'deep', 'wide_n_deep'}." - ) - parser.add_argument( - "--train_steps", - type=int, - default=2000, - help="Number of training steps." - ) - parser.add_argument( - "--train_data", - type=str, - default="", - help="Path to the training data." - ) - parser.add_argument( - "--test_data", - type=str, - default="", - help="Path to the test data." - ) - FLAGS, unparsed = parser.parse_known_args() - tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) -- GitLab From b77b20f8e8ef5a670201032a8fc8daf157524b74 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Nov 2017 13:48:43 -0800 Subject: [PATCH 0423/1801] Automated g4 rollback of changelist 175571632 PiperOrigin-RevId: 175729221 --- tensorflow/compiler/xla/service/shaped_buffer.cc | 8 ++++++++ tensorflow/compiler/xla/service/shaped_buffer.h | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/tensorflow/compiler/xla/service/shaped_buffer.cc b/tensorflow/compiler/xla/service/shaped_buffer.cc index a2a442eb1a..a57ebf59e7 100644 --- a/tensorflow/compiler/xla/service/shaped_buffer.cc +++ b/tensorflow/compiler/xla/service/shaped_buffer.cc @@ -63,6 +63,14 @@ void ShapedBuffer::clear() { } } +void ShapedBuffer::AddBufferAtIndex( + const perftools::gputools::DeviceMemoryBase& buffer, + const ShapeIndex& shape_index) { + *mutable_shape_index_to_buffer_entry()->mutable_element(shape_index) = + buffers().size(); + mutable_buffers()->push_back(buffer); +} + const se::DeviceMemoryBase& ShapedBuffer::buffer( const ShapeIndex& index) const { return buffers_[shape_index_to_buffer_entry_.element(index)]; diff --git a/tensorflow/compiler/xla/service/shaped_buffer.h b/tensorflow/compiler/xla/service/shaped_buffer.h index e5ea06fb13..b440948700 100644 --- a/tensorflow/compiler/xla/service/shaped_buffer.h +++ b/tensorflow/compiler/xla/service/shaped_buffer.h @@ -75,6 +75,10 @@ class ShapedBuffer { // Set all device memory pointers in the object to null. void clear(); + // Adds a new buffer at the given shape index. + void AddBufferAtIndex(const perftools::gputools::DeviceMemoryBase& buffer, + const ShapeIndex& shape_index); + protected: // The shape of the device buffer with layout. const Shape shape_; -- GitLab From 5302a66f01deaaf0774e127f79b0373da194529c Mon Sep 17 00:00:00 2001 From: Russell Power Date: Tue, 14 Nov 2017 13:55:42 -0800 Subject: [PATCH 0424/1801] Check for placeholder ops (other than features and labels) during graph validation. We can provide a more helpful error message at this point than if we detect this at XLA compile time. PiperOrigin-RevId: 175730190 --- tensorflow/contrib/tpu/python/tpu/tpu.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py index bc3c888b1f..9aa5a9c78d 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu.py @@ -31,6 +31,10 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import variable_scope +_SUMMARY_OPS = ("ScalarSummary",) +_PLACEHOLDER_OPS = ("Placeholder",) + + def initialize_system(embedding_config=None, job=None): """Initializes a distributed TPU system for use with TensorFlow. @@ -103,6 +107,12 @@ class TPUReplicateContext(control_flow_ops.ControlFlowContext): def _AddOpInternal(self, op): # pylint: disable=protected-access + if op.type in _PLACEHOLDER_OPS: + raise ValueError("Placeholder %s is not supported." % op.name) + + if op.type in _SUMMARY_OPS: + raise ValueError("Summary operations are not currently supported.") + if any(x.dtype._is_ref_dtype for x in op.inputs): raise NotImplementedError( "Non-resource Variables are not supported inside TPU computations " -- GitLab From d3d907e0e9658469799aa24694240c5f99ddf5d2 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Tue, 14 Nov 2017 14:08:34 -0800 Subject: [PATCH 0425/1801] Fix speech hotword model links. --- .../lite/models/testdata/g3doc/README.md | 27 +++++++------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/README.md b/tensorflow/contrib/lite/models/testdata/g3doc/README.md index d0c21d2833..d0cdae6bdf 100644 --- a/tensorflow/contrib/lite/models/testdata/g3doc/README.md +++ b/tensorflow/contrib/lite/models/testdata/g3doc/README.md @@ -72,31 +72,22 @@ same input. ### Models: -[Speech hotword model (Svdf rank=1)] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_hotword_model_rank1.tflite) +[Speech hotword model (Svdf rank=1)](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_hotword_model_rank1_2017_11_14.tflite) -[Speech hotword model (Svdf rank=2)] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_hotword_model_rank2.tflite) +[Speech hotword model (Svdf rank=2)](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_hotword_model_rank2_2017_11_14.tflite) -[Speaker-id model] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_speakerid_model.tflite) +[Speaker-id model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_speakerid_model_2017_11_14.tflite) -[TTS model] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_tts_model.tflite) +[TTS model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_tts_model_2017_11_14.tflite) -[ASR AM model] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_terse_am_model.tflite) +[ASR AM model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_terse_am_model_2017_11_14.tflite) ### Test benches -[Speech hotword model test] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_hotword_model_test.cc) +[Speech hotword model test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_hotword_model_test.cc) -[Speaker-id model test] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc) - -[TTS model test] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_tts_model_test.cc) - -[ASR AM model test] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_terse_am_model_test.cc) - -## Android Support -The models have been tested on Android phones, using the following tests: - -[Hotword] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/android/BUILD?rcl=172930882&l=25) - -[Speaker-id] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/android/BUILD?rcl=172930882&l=36) +[Speaker-id model test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc) +[TTS model test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_tts_model_test.cc) +[ASR AM model test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_terse_am_model_test.cc) -- GitLab From 98b52cfd420fc054ad082bf1865d9eabee0b7a3e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Nov 2017 14:14:07 -0800 Subject: [PATCH 0426/1801] boosted_trees: Finalize the centering bias tree only after centering is done (to make sense). PiperOrigin-RevId: 175733336 --- .../boosted_trees/kernels/prediction_ops.cc | 19 +++++++++++++------ .../boosted_trees/kernels/training_ops.cc | 2 +- .../python/kernel_tests/training_ops_test.py | 6 ++---- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc b/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc index 766982b4f2..f8086b0c2b 100644 --- a/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc @@ -63,19 +63,26 @@ const char* kPredictionsTensorName = "predictions"; void CalculateTreesToInclude( const boosted_trees::trees::DecisionTreeEnsembleConfig& config, const std::vector& trees_to_drop, const int32 num_trees, - const bool only_finalized, std::vector* trees_to_include) { + const bool only_finalized, const bool center_bias, + std::vector* trees_to_include) { trees_to_include->reserve(num_trees - trees_to_drop.size()); int32 index = 0; // This assumes that trees_to_drop is a sorted list of tree ids. for (int32 tree = 0; tree < num_trees; ++tree) { - if ((!trees_to_drop.empty() && index < trees_to_drop.size() && - trees_to_drop[index] == tree) || - (only_finalized && config.tree_metadata_size() > 0 && - !config.tree_metadata(tree).is_finalized())) { + // Skip the tree if tree is in the list of trees_to_drop. + if (!trees_to_drop.empty() && index < trees_to_drop.size() && + trees_to_drop[index] == tree) { ++index; continue; } + // Or skip if the tree is not finalized and only_finalized is set, + // with the exception of centering bias. + if (only_finalized && !(center_bias && tree == 0) && + config.tree_metadata_size() > 0 && + !config.tree_metadata(tree).is_finalized()) { + continue; + } trees_to_include->push_back(tree); } } @@ -250,7 +257,7 @@ class GradientTreesPredictionOp : public OpKernel { CalculateTreesToInclude( ensemble_resource->decision_tree_ensemble(), dropped_trees, ensemble_resource->decision_tree_ensemble().trees_size(), - only_finalized_trees_, &trees_to_include); + only_finalized_trees_, center_bias_, &trees_to_include); // Allocate output predictions matrix. Tensor* output_predictions_t = nullptr; diff --git a/tensorflow/contrib/boosted_trees/kernels/training_ops.cc b/tensorflow/contrib/boosted_trees/kernels/training_ops.cc index 2a5c7949f2..c77d90e243 100644 --- a/tensorflow/contrib/boosted_trees/kernels/training_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/training_ops.cc @@ -237,6 +237,7 @@ class CenterTreeEnsembleBiasOp : public OpKernel { VLOG(1) << "Continuing to center bias, delta=" << total_delta; } else { VLOG(1) << "Done centering bias, delta=" << total_delta; + ensemble_resource->LastTreeMetadata()->set_is_finalized(true); } Tensor* continue_centering_t = nullptr; OP_REQUIRES_OK( @@ -260,7 +261,6 @@ class CenterTreeEnsembleBiasOp : public OpKernel { for (size_t idx = 0; idx < logits_dimension; ++idx) { leaf->mutable_vector()->add_value(0.0); } - ensemble_resource->LastTreeMetadata()->set_is_finalized(true); return leaf; } else if (num_trees == 1) { // Confirms that the only tree is a bias and returns its leaf. diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/training_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/training_ops_test.py index f0413fee5a..c2e65b643d 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/training_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/training_ops_test.py @@ -181,7 +181,6 @@ class CenterTreeEnsembleBiasOpTest(test_util.TensorFlowTestCase): tree_weights: 1.0 tree_metadata { num_layers_grown: 1 - is_finalized: true } growing_metadata { num_trees_attempted: 1 @@ -189,7 +188,7 @@ class CenterTreeEnsembleBiasOpTest(test_util.TensorFlowTestCase): } """ self.assertEqual(new_stamp, 1) - self.assertEqual(stats.num_trees, 1) + self.assertEqual(stats.num_trees, 0) self.assertEqual(stats.num_layers, 1) self.assertEqual(stats.active_tree, 1) self.assertEqual(stats.active_layer, 1) @@ -231,7 +230,6 @@ class CenterTreeEnsembleBiasOpTest(test_util.TensorFlowTestCase): tree_weights: 1.0 tree_metadata { num_layers_grown: 1 - is_finalized: true } growing_metadata { num_trees_attempted: 1 @@ -239,7 +237,7 @@ class CenterTreeEnsembleBiasOpTest(test_util.TensorFlowTestCase): } """ self.assertEqual(new_stamp, 2) - self.assertEqual(stats.num_trees, 1) + self.assertEqual(stats.num_trees, 0) self.assertEqual(stats.num_layers, 1) self.assertEqual(stats.active_tree, 1) self.assertEqual(stats.active_layer, 1) -- GitLab From 3fab2f9bbdf5745643d2dd0a390e1dd762c85bc2 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Tue, 14 Nov 2017 14:28:04 -0800 Subject: [PATCH 0427/1801] Make save/restore non-members of tfe.Network. This should make it easier to move to core. tfe.Network.save -> tfe.save_network_checkpoint tfe.Network.restore -> tfe.restore_network_checkpoint Some minor changes in the restore-on-load logic to make it work as a non-member of Network (particularly in _add_deferred_restoration). The other code changes are trivial, just moving code around. PiperOrigin-RevId: 175735659 --- tensorflow/contrib/eager/python/network.py | 860 +++++++++--------- .../contrib/eager/python/network_test.py | 83 +- tensorflow/contrib/eager/python/tfe.py | 7 +- 3 files changed, 485 insertions(+), 465 deletions(-) diff --git a/tensorflow/contrib/eager/python/network.py b/tensorflow/contrib/eager/python/network.py index 1a5c6e8aec..713ab1ee57 100644 --- a/tensorflow/contrib/eager/python/network.py +++ b/tensorflow/contrib/eager/python/network.py @@ -37,185 +37,6 @@ from tensorflow.python.training import training_util # functions in base.py which should be reused. -_DeferredRestoration = collections.namedtuple( - - "_DeferredRestoration", - [ - # The map_func to use (either user-specified or the default). - "map_func", - # Boolean, True if the user specified an explicit map_func, for error - # messages. - "map_func_is_user", - # A mapping from checkpoint names to initial values of not-yet-created - # variables which should be restored. These values come from parsing a - # checkpoint. - "checkpointed_variables_to_restore", - # A mapping from checkpoint name to variable objects of variables which - # have already been restored, for error checking. - "restored_variables", - # The session to restore with (if in graph mode). - "session", - # Names of the Network where the restore was requested, for error - # messages. - "network_name", - "network_scope_name" - ]) - - -def _default_naming_conflict_error_message( - mapped_name, first_variable, second_variable, - network_name, network_scope_name): - return ( - ("The default checkpoint variable name mapping strategy for Network " - "'%s' resulted in a naming conflict. We attempted to strip off the " - "variable prefix for the Network ('%s'), but this resulted in two " - "variables named '%s' (originally '%s' and '%s'). This should only " - "happen when using variable sharing (i.e. the Network contains Networks " - "or Layers which were first added to another Network, and therefore " - "have that Network's variable prefix). One solution is to pass " - "`map_func=lambda n: n` to Network.save and Network.restore to use " - "fully qualified variable names in the checkpoint, although this will " - "require that the variable prefix of the Network being restored into " - "is also '%s'. You may alternatively write an arbitrary mapping.") - % ( - network_name, network_scope_name, mapped_name, - first_variable._shared_name, - second_variable._shared_name, network_scope_name - )) - - -def _restore_custom_map_func_error_message( - mapped_name, first_variable, second_variable, - network_name, network_scope_name): - return ( - ("The map_func passed to Network.restore for the Network '%s' " - "resulted in two variables named '%s' (originally '%s' and '%s'). Since " - "this is also an error on Network.save, this Network was " - "probably not saved with this map_func. Note that map_func " - "always maps from full variable names to checkpoint names; " - "there is no need to specify an inverse mapping.\n\n" - "Try stripping less from the variable names, or renaming parts " - "of the Network. For reference, variables created by sub-Layers " - "of this Network are prefixed with '%s', but if they are " - "re-used after being added to another Network they will have " - "that Network's full variable prefix instead.") % ( - network_name, mapped_name, - first_variable._shared_name, - second_variable._shared_name, - network_scope_name)) - - -def _make_custom_getter_for_deferred_restorations(): - """Returns a custom getter which searches `deferred_restorations`. - - Returns: A tuple of (_custom_getter, deferred_restorations) - _custom_getter: The getter which should be added to variable_scopes where - variables will be created. - deferred_restorations: A list for _DeferredRestoration objects. Typically - empty when the getter is set, and expanded as deferred restorations are - requested. All new deferred restorations should be appended to the end of - the list, where they will have priority over older deferred restorations. - """ - deferred_restorations = [] - - def _custom_getter(getter, name, shape=None, dtype=None, - initializer=None, - *args, **kwargs): - """A custom getter which processes deferred restorations.""" - # Iterate over restorations, newest first (newer restorations will take - # precedence over older restorations, just like with immediate restorations - # into existing variables). - delayed_restoration = None - found_value = False - value_to_restore = None - for delayed_restoration in reversed( - deferred_restorations): - checkpoint_name = delayed_restoration.map_func(name) - if (checkpoint_name - in delayed_restoration.checkpointed_variables_to_restore): - found_value = True - value_to_restore = ( - delayed_restoration.checkpointed_variables_to_restore[ - checkpoint_name]) - if found_value: - break - # value_to_restore may be False because this variable is not in any - # checkpoint we are restoring, or None because we have explicitly set it to - # None when it was previously fetched. In either case, we don't need to - # set an initializer. - if found_value and value_to_restore is not None: - initializer = value_to_restore - shape = None - variable = getter(name, shape=shape, dtype=dtype, initializer=initializer, - *args, **kwargs) - if found_value and value_to_restore is not None: - # Mark as already restored from this checkpoint. - delayed_restoration.checkpointed_variables_to_restore[ - checkpoint_name] = None - if context.in_graph_mode(): - delayed_restoration.session.run(variable.initializer) - if found_value: - # Error checking should run even if we've already restored a value. - if delayed_restoration.restored_variables.setdefault( - checkpoint_name, variable) is not variable: - # Naming conflict. We've tried to initialize two variables with the - # same value from the checkpoint. - if delayed_restoration.map_func_is_user: - raise ValueError( - _restore_custom_map_func_error_message( - mapped_name=checkpoint_name, - first_variable=delayed_restoration.restored_variables[ - checkpoint_name], - second_variable=variable, - network_name=delayed_restoration.network_name, - network_scope_name=delayed_restoration.network_scope_name)) - else: - raise ValueError( - _default_naming_conflict_error_message( - mapped_name=checkpoint_name, - first_variable=delayed_restoration.restored_variables[ - checkpoint_name], - second_variable=variable, - network_name=delayed_restoration.network_name, - network_scope_name=delayed_restoration.network_scope_name)) - return variable - return _custom_getter, deferred_restorations - - -def _make_prefix_stripping_map_fn(scope_name): - """Closure for stripping the scope name of a Network. - - Implemented as a closure rather than a member function to avoid reference - cycles in deferred restorations (this function should not have a reference to - the Network which created it). - - Args: - scope_name: The Network.scope_name to strip from variables. - Returns: - A scope_name-stripping default `map_fn` for the Network. - """ - - def _strip_variable_prefix(original_variable_name): - """The default map_func for saving or restoring variables. - - Strips the variable prefix for the Network on which save/restore was called, - and leaves other variable names fully qualified in the checkpoint. - - Args: - original_variable_name: The _shared_name of the variable (no :0 - suffix) to map. - Returns: - The checkpoint name of the variable. - """ - scope_name_with_slash = scope_name + "/" - if original_variable_name.startswith(scope_name_with_slash): - return original_variable_name[len(scope_name_with_slash):] - else: - return original_variable_name - - return _strip_variable_prefix - - class Network(base.Layer): """Represents the composition of a set of Layers. @@ -250,8 +71,6 @@ class Network(base.Layer): # closed before build is called. self._variable_scope_counts_on_init = ( variable_scope._get_default_variable_store().variable_scopes_count) - self._custom_getter, self._deferred_restorations = ( - _make_custom_getter_for_deferred_restorations()) def _init_set_name(self, name): # Anonymous Networks (name=None) defer setting a final name until they are @@ -543,252 +362,6 @@ class Network(base.Layer): "at https://github.com/tensorflow/tensorflow/issues/new if this is " "important to you") - def save(self, save_path, global_step=None, map_func=None): - """Save variables from the Network to a checkpoint. - - Args: - save_path: Either a checkpoint prefix or the name of a directory to save - the checkpoint in (in which case the checkpoint will be named based on - the Network name). - global_step: The global step to use when naming the checkpoint. If None - (default), we will first try to get the default global step. If that - fails because no default global step exists, then the checkpoint is - created without a global step suffix. - map_func: A function mapping fully qualified variable names - (e.g. 'my_network_1/dense_1/kernel') to names in the checkpoint. By - default (if `map_func=None`), the variable prefix for the network being - restored (`Network.scope_name + '/'`, e.g. 'my_network_1/') is stripped - and all other variable names (shared with other Networks) are left - unchanged. - Returns: - The checkpoint prefix for the saved checkpoint, which may be passed to - `Network.restore`. - Raises: - ValueError: If the Network has not yet been called, or if map_func results - in a name collision. - """ - if not self.built: - raise ValueError( - "Attempt to save the Network before it was first called. This means " - "variables have not yet been created, so there is nothing to save.") - self._set_scope() # scope_name should be available to map_funcs - if global_step is None: - global_step = training_util.get_global_step() - if os.path.isdir(save_path): - # If we were passed a directory, default to naming based on the Network - # name. - save_path = os.path.join(save_path, self.name.replace("/", "_")) - user_map_func = map_func - if map_func is None: - map_func = _make_prefix_stripping_map_fn(self.scope_name) - variable_map = {} - for variable in self.variables: - mapped_name = map_func(variable._shared_name) - if variable_map.setdefault(mapped_name, variable) is not variable: - if user_map_func is None: - # Instead of erroring out, we could just re-try and silently use the - # full variable names in the checkpoint. This could be odd for deeply - # nested sub-Networks (since the full prefix from the nesting would - # get added), so for now we'll let the user deal with this case. - raise ValueError(_default_naming_conflict_error_message( - mapped_name=mapped_name, - first_variable=variable_map[mapped_name], - second_variable=variable, - network_name=self.name, - network_scope_name=self.scope_name)) - else: - # The user passed their own problematic map_func. - raise ValueError( - ("The map_func passed to Network.save for the Network '%s' " - "resulted in two variables named '%s' ('%s' and '%s'). Try " - "stripping less from the variable names, or renaming parts of " - "the Network. For reference, variables created by sub-Layers of " - "this Network are prefixed with '%s', but if they are re-used " - "after being added to another Network, they will have that " - "Network's full variable prefix instead.") % ( - self.name, mapped_name, - variable_map[mapped_name]._shared_name, - variable._shared_name, - self.scope_name)) - if context.in_eager_mode(): - sess = None - else: - sess = ops.get_default_session() - return saver_lib.Saver(variable_map).save( - sess=sess, save_path=save_path, write_meta_graph=False, - global_step=global_step) - - def _restore_existing_variables(self, save_path, map_func, user_map_func): - """Use a standard Saver to restore existing variables from a checkpoint. - - Args: - save_path: The checkpoint prefix or directory to read from. - map_func: The function to use when mapping from variable names to - checkpoint names. - user_map_func: The original map_func passed by the user, for error - checking. - Returns: - A dictionary mapping from checkpoint names to variable objects which have - been restored (for bookkeeping to avoid deferred restorations on these - variables). - Raises: - ValueError: If there is a name collision. - """ - existing_variables_by_checkpoint_name = {} - for variable in self.variables: - checkpoint_name = map_func(variable._shared_name) - if existing_variables_by_checkpoint_name.setdefault( - checkpoint_name, variable) is not variable: - if user_map_func is None: - raise ValueError(_default_naming_conflict_error_message( - mapped_name=checkpoint_name, - first_variable=existing_variables_by_checkpoint_name[ - checkpoint_name], - second_variable=variable, - network_name=self.name, - network_scope_name=self.scope_name)) - else: - raise ValueError(_restore_custom_map_func_error_message( - mapped_name=checkpoint_name, - first_variable=existing_variables_by_checkpoint_name[ - checkpoint_name], - second_variable=variable, - network_name=self.name, - network_scope_name=self.scope_name)) - if existing_variables_by_checkpoint_name: - if context.in_eager_mode(): - sess = None - else: - sess = ops.get_default_session() - saver_lib.Saver(var_list=existing_variables_by_checkpoint_name).restore( - sess=sess, save_path=save_path) - return existing_variables_by_checkpoint_name - - def _set_restore_on_create(self, save_path, map_func, user_map_func, - existing_variables_by_checkpoint_name): - """If necessary, request deferred restorations of variables.""" - checkpoint_reader = checkpoint_utils.load_checkpoint(save_path) - checkpointed_variables_to_restore = {} - for checkpoint_name, _ in checkpoint_utils.list_variables(save_path): - if checkpoint_name in existing_variables_by_checkpoint_name: - # This variable was already created and restored. - continue - # Save the variable for later restoration in a custom getter. - checkpointed_variables_to_restore[checkpoint_name] = ( - checkpoint_reader.get_tensor(checkpoint_name)) - # Only set a deferred restoration if there are checkpoint variables which - # have not been assigned to existing variables. Note that this loses out on - # some opportunity for error checking, but avoids creating - # _DeferredRestoration objects once a Network has been built (so that - # restoring in a loop does not take increasing amounts of memory). - if checkpointed_variables_to_restore: - if context.in_eager_mode(): - sess = None - else: - sess = ops.get_default_session() - # We need a name for error messages. If we haven't been added to another - # Network yet, we're top-level. - self._finalize_name(False) - self._set_scope() - # Save a record of this restoration for use in the custom getter. - deferred_restoration = _DeferredRestoration( - map_func=map_func, - map_func_is_user=(user_map_func is not None), - checkpointed_variables_to_restore=checkpointed_variables_to_restore, - restored_variables={}, - session=sess, - network_name=self.name, - network_scope_name=self.scope_name) - self._deferred_restorations.append(deferred_restoration) - # Add the deferred registration to non-Network children, and request that - # Networks propagate the request to their children. - self._add_deferred_restoration(deferred_restoration) - - def _add_deferred_restoration(self, deferred_restoration): - """Add a deferred restoration to this Network and all children. - - Restorations which are requested later have higher priority, and the highest - priority matching restoration is applied to a variable when it is created. - - Args: - deferred_restoration: A _DeferredRestoration object. - """ - # Networks don't create variables at the moment, so this append isn't - # strictly necessary. We could get by with only adding deferred restorations - # to non-Network Layers. - self._set_scope() - # We use set_custom_getter because it avoids recursively calling up the - # variable_scope tree. We've done the tree traversal ourselves and have - # added the request to each Layer which needs it. - self._scope.set_custom_getter(self._custom_getter) - self._deferred_restorations.append(deferred_restoration) - for layer in self.layers: - if isinstance(layer, Network): - # For Networks, request that they propagate this deferred restoration - # to all of their children recursively. - layer._add_deferred_restoration(deferred_restoration) - else: - # For non-Network Layers, make sure they have a deferred restoration - # queue and a custom getter, then add our request to it. - if not hasattr(layer, "_custom_getter"): - assert not hasattr(layer, "_deferred_restorations") - layer._custom_getter, layer._deferred_restorations = ( - _make_custom_getter_for_deferred_restorations()) - self._set_scope_for_nonnetwork_sublayer(layer) - layer._scope.set_custom_getter(layer._custom_getter) - layer._deferred_restorations.append(deferred_restoration) - - def restore(self, save_path, map_func=None): - """Restore the Network from a checkpoint. - - If variables have already been created (typically when some or all of the - `Network` is built), they are assigned values from the checkpoint - immediately, overwriting any existing values (in graph mode the default - session is used for the assignments). - - If there are checkpoint entries which do not correspond to any existing - variables in the `Network`, these values are saved for deferred restoration; - their initial values will be the checkpointed values once they are - created. Requests for multiple deferred restorations behave the same way as - immediate restorations, in that later requests will take priority over - earlier requests relevant to the same variable. - - If this `Network` shares `Layer`s with another network, those `Layer`s will - also have their variables restored from the checkpoint. - - Args: - save_path: The return value of `Network.save`, or a directory to search - for a checkpoint. - map_func: A function mapping fully qualified variable names - (e.g. 'my_network_1/dense_1/kernel') to names in the checkpoint. By - default (if `map_func=None`), the variable prefix for the network being - restored (`Network.scope_name + '/'`, e.g. 'my_network_1/') is stripped - and all other variable names (shared with other Networks) are left - unchanged. Note that this is the _same_ map_func as `Network.save`, not - an inverse mapping. - """ - self._finalize_name(parent_network=False) - self._set_scope() # scope_name should be available to map_funcs - if os.path.isdir(save_path): - # If we don't have a name yet, set no parent. - save_path = os.path.join(save_path, self.name.replace("/", "_")) - user_map_func = map_func - if map_func is None: - map_func = _make_prefix_stripping_map_fn(self.scope_name) - # Step one is to restore any existing variables from the checkpoint. - existing_variables_by_checkpoint_name = self._restore_existing_variables( - save_path=save_path, - map_func=map_func, - user_map_func=user_map_func) - # Step two is to set a custom getter which restores variables on creation, - # for those variables which have not been added to sub-Layers yet. - self._set_restore_on_create( - save_path=save_path, - map_func=map_func, - user_map_func=user_map_func, - existing_variables_by_checkpoint_name=( - existing_variables_by_checkpoint_name)) - # TODO(josh11b): Support other Layer methods needed for graph mode, such as for # losses and updates @@ -838,3 +411,436 @@ class Sequential(Network): else: inputs = l(inputs) return inputs + + +_DeferredRestoration = collections.namedtuple( + + "_DeferredRestoration", + [ + # The map_func to use (either user-specified or the default). + "map_func", + # Boolean, True if the user specified an explicit map_func, for error + # messages. + "map_func_is_user", + # A mapping from checkpoint names to initial values of not-yet-created + # variables which should be restored. These values come from parsing a + # checkpoint. + "checkpointed_variables_to_restore", + # A mapping from checkpoint name to variable objects of variables which + # have already been restored, for error checking. + "restored_variables", + # The session to restore with (if in graph mode). + "session", + # Names of the Network where the restore was requested, for error + # messages. + "network_name", + "network_scope_name" + ]) + + +def _default_naming_conflict_error_message( + mapped_name, first_variable, second_variable, + network_name, network_scope_name): + return ( + ("The default checkpoint variable name mapping strategy for Network " + "'%s' resulted in a naming conflict. We attempted to strip off the " + "variable prefix for the Network ('%s'), but this resulted in two " + "variables named '%s' (originally '%s' and '%s'). This should only " + "happen when using variable sharing (i.e. the Network contains Networks " + "or Layers which were first added to another Network, and therefore " + "have that Network's variable prefix). One solution is to pass " + "`map_func=lambda n: n` to save and restore to use fully qualified " + "variable names in the checkpoint, although this will require that the " + "variable prefix of the Network being restored into is also '%s'. You " + "may alternatively write an arbitrary mapping.") + % ( + network_name, network_scope_name, mapped_name, + first_variable._shared_name, + second_variable._shared_name, network_scope_name + )) + + +def _restore_custom_map_func_error_message( + mapped_name, first_variable, second_variable, + network_name, network_scope_name): + return ( + ("The map_func passed to restore_network_checkpoint for the Network '%s' " + "resulted in two variables named '%s' (originally '%s' and '%s'). Since " + "this is also an error when saving, this Network was " + "probably not saved with this map_func. Note that map_func " + "always maps from full variable names to checkpoint names; " + "there is no need to specify an inverse mapping.\n\n" + "Try stripping less from the variable names, or renaming parts " + "of the Network. For reference, variables created by sub-Layers " + "of this Network are prefixed with '%s', but if they are " + "re-used after being added to another Network they will have " + "that Network's full variable prefix instead.") % ( + network_name, mapped_name, + first_variable._shared_name, + second_variable._shared_name, + network_scope_name)) + + +def _make_custom_getter_for_deferred_restorations(): + """Returns a custom getter which searches `deferred_restorations`. + + Returns: A tuple of (_custom_getter, deferred_restorations) + _custom_getter: The getter which should be added to variable_scopes where + variables will be created. + deferred_restorations: A list for _DeferredRestoration objects. Typically + empty when the getter is set, and expanded as deferred restorations are + requested. All new deferred restorations should be appended to the end of + the list, where they will have priority over older deferred restorations. + """ + deferred_restorations = [] + + def _custom_getter(getter, name, shape=None, dtype=None, + initializer=None, + *args, **kwargs): + """A custom getter which processes deferred restorations.""" + # Iterate over restorations, newest first (newer restorations will take + # precedence over older restorations, just like with immediate restorations + # into existing variables). + delayed_restoration = None + found_value = False + value_to_restore = None + for delayed_restoration in reversed( + deferred_restorations): + checkpoint_name = delayed_restoration.map_func(name) + if (checkpoint_name + in delayed_restoration.checkpointed_variables_to_restore): + found_value = True + value_to_restore = ( + delayed_restoration.checkpointed_variables_to_restore[ + checkpoint_name]) + if found_value: + break + # value_to_restore may be False because this variable is not in any + # checkpoint we are restoring, or None because we have explicitly set it to + # None when it was previously fetched. In either case, we don't need to + # set an initializer. + if found_value and value_to_restore is not None: + initializer = value_to_restore + shape = None + variable = getter(name, shape=shape, dtype=dtype, initializer=initializer, + *args, **kwargs) + if found_value and value_to_restore is not None: + # Mark as already restored from this checkpoint. + delayed_restoration.checkpointed_variables_to_restore[ + checkpoint_name] = None + if context.in_graph_mode(): + delayed_restoration.session.run(variable.initializer) + if found_value: + # Error checking should run even if we've already restored a value. + if delayed_restoration.restored_variables.setdefault( + checkpoint_name, variable) is not variable: + # Naming conflict. We've tried to initialize two variables with the + # same value from the checkpoint. + if delayed_restoration.map_func_is_user: + raise ValueError( + _restore_custom_map_func_error_message( + mapped_name=checkpoint_name, + first_variable=delayed_restoration.restored_variables[ + checkpoint_name], + second_variable=variable, + network_name=delayed_restoration.network_name, + network_scope_name=delayed_restoration.network_scope_name)) + else: + raise ValueError( + _default_naming_conflict_error_message( + mapped_name=checkpoint_name, + first_variable=delayed_restoration.restored_variables[ + checkpoint_name], + second_variable=variable, + network_name=delayed_restoration.network_name, + network_scope_name=delayed_restoration.network_scope_name)) + return variable + return _custom_getter, deferred_restorations + + +def _make_prefix_stripping_map_fn(scope_name): + """Closure for stripping the scope name of a Network. + + Implemented as a closure rather than a member function to avoid reference + cycles in deferred restorations (this function should not have a reference to + the Network which created it). + + Args: + scope_name: The Network.scope_name to strip from variables. + Returns: + A scope_name-stripping default `map_fn` for the Network. + """ + + def _strip_variable_prefix(original_variable_name): + """The default map_func for saving or restoring variables. + + Strips the variable prefix for the Network on which save/restore was called, + and leaves other variable names fully qualified in the checkpoint. + + Args: + original_variable_name: The _shared_name of the variable (no :0 + suffix) to map. + Returns: + The checkpoint name of the variable. + """ + scope_name_with_slash = scope_name + "/" + if original_variable_name.startswith(scope_name_with_slash): + return original_variable_name[len(scope_name_with_slash):] + else: + return original_variable_name + + return _strip_variable_prefix + + +def save_network_checkpoint( + network, save_path, global_step=None, map_func=None): + """Save variables from the Network to a checkpoint. + + Args: + network: A Network object to save. + save_path: Either a checkpoint prefix or the name of a directory to save + the checkpoint in (in which case the checkpoint will be named based on + the Network name). + global_step: The global step to use when naming the checkpoint. If None + (default), we will first try to get the default global step. If that + fails because no default global step exists, then the checkpoint is + created without a global step suffix. + map_func: A function mapping fully qualified variable names + (e.g. 'my_network_1/dense_1/kernel') to names in the checkpoint. By + default (if `map_func=None`), the variable prefix for the network being + restored (`Network.scope_name + '/'`, e.g. 'my_network_1/') is stripped + and all other variable names (shared with other Networks) are left + unchanged. + Returns: + The checkpoint prefix for the saved checkpoint, which may be passed to + `Network.restore`. + Raises: + ValueError: If the Network has not yet been called, or if map_func results + in a name collision. + """ + if not network.built: + raise ValueError( + "Attempt to save the Network before it was first called. This means " + "variables have not yet been created, so there is nothing to save.") + network._set_scope() # scope_name should be available to map_funcs + if global_step is None: + global_step = training_util.get_global_step() + if os.path.isdir(save_path): + # If we were passed a directory, default to naming based on the Network + # name. + save_path = os.path.join(save_path, network.name.replace("/", "_")) + user_map_func = map_func + if map_func is None: + map_func = _make_prefix_stripping_map_fn(network.scope_name) + variable_map = {} + for variable in network.variables: + mapped_name = map_func(variable._shared_name) + if variable_map.setdefault(mapped_name, variable) is not variable: + if user_map_func is None: + # Instead of erroring out, we could just re-try and silently use the + # full variable names in the checkpoint. This could be odd for deeply + # nested sub-Networks (since the full prefix from the nesting would + # get added), so for now we'll let the user deal with this case. + raise ValueError(_default_naming_conflict_error_message( + mapped_name=mapped_name, + first_variable=variable_map[mapped_name], + second_variable=variable, + network_name=network.name, + network_scope_name=network.scope_name)) + else: + # The user passed their own problematic map_func. + raise ValueError( + ("The map_func passed to save_network_checkpoint for the Network " + "'%s' resulted in two variables named '%s' ('%s' and '%s'). Try " + "stripping less from the variable names, or renaming parts of " + "the Network. For reference, variables created by sub-Layers of " + "this Network are prefixed with '%s', but if they are re-used " + "after being added to another Network, they will have that " + "Network's full variable prefix instead.") % ( + network.name, mapped_name, + variable_map[mapped_name]._shared_name, + variable._shared_name, + network.scope_name)) + if context.in_eager_mode(): + sess = None + else: + sess = ops.get_default_session() + return saver_lib.Saver(variable_map).save( + sess=sess, save_path=save_path, write_meta_graph=False, + global_step=global_step) + + +def _add_deferred_restoration(layer, deferred_restoration): + """Add a deferred restoration to this Layer and all children. + + Restorations which are requested later have higher priority, and the highest + priority matching restoration is applied to a variable when it is created. + + Args: + layer: The Layer (may not be a Network) to operate on. + deferred_restoration: A _DeferredRestoration object. + """ + # Networks don't create variables at the moment, so this append isn't strictly + # necessary. We could get by with only adding deferred restorations to + # non-Network Layers. + if isinstance(layer, Network): + layer._set_scope() + # Make sure this Layer has a deferred restoration queue and a custom getter, + # then add our request to it. + if not hasattr(layer, "_custom_getter"): + assert not hasattr(layer, "_deferred_restorations") + layer._custom_getter, layer._deferred_restorations = ( + _make_custom_getter_for_deferred_restorations()) + # We use set_custom_getter because it avoids recursively calling up the + # variable_scope tree. We've done the tree traversal ourselves and have added + # the request to each Layer which needs it. + layer._scope.set_custom_getter(layer._custom_getter) + layer._deferred_restorations.append(deferred_restoration) + if isinstance(layer, Network): + for sublayer in layer.layers: + if not isinstance(sublayer, Network): + layer._set_scope_for_nonnetwork_sublayer(sublayer) + _add_deferred_restoration(sublayer, deferred_restoration) + + +def _restore_existing_variables(network, save_path, map_func, user_map_func): + """Use a standard Saver to restore existing variables from a checkpoint. + + Args: + network: A Network object to restore. + save_path: The checkpoint prefix or directory to read from. + map_func: The function to use when mapping from variable names to + checkpoint names. + user_map_func: The original map_func passed by the user, for error + checking. + Returns: + A dictionary mapping from checkpoint names to variable objects which have + been restored (for bookkeeping to avoid deferred restorations on these + variables). + Raises: + ValueError: If there is a name collision. + """ + existing_variables_by_checkpoint_name = {} + for variable in network.variables: + checkpoint_name = map_func(variable._shared_name) + if existing_variables_by_checkpoint_name.setdefault( + checkpoint_name, variable) is not variable: + if user_map_func is None: + raise ValueError(_default_naming_conflict_error_message( + mapped_name=checkpoint_name, + first_variable=existing_variables_by_checkpoint_name[ + checkpoint_name], + second_variable=variable, + network_name=network.name, + network_scope_name=network.scope_name)) + else: + raise ValueError(_restore_custom_map_func_error_message( + mapped_name=checkpoint_name, + first_variable=existing_variables_by_checkpoint_name[ + checkpoint_name], + second_variable=variable, + network_name=network.name, + network_scope_name=network.scope_name)) + if existing_variables_by_checkpoint_name: + if context.in_eager_mode(): + sess = None + else: + sess = ops.get_default_session() + saver_lib.Saver(var_list=existing_variables_by_checkpoint_name).restore( + sess=sess, save_path=save_path) + return existing_variables_by_checkpoint_name + + +def _set_restore_on_create(network, save_path, map_func, user_map_func, + existing_variables_by_checkpoint_name): + """If necessary, request deferred restorations of variables.""" + checkpoint_reader = checkpoint_utils.load_checkpoint(save_path) + checkpointed_variables_to_restore = {} + for checkpoint_name, _ in checkpoint_utils.list_variables(save_path): + if checkpoint_name in existing_variables_by_checkpoint_name: + # This variable was already created and restored. + continue + # Save the variable for later restoration in a custom getter. + checkpointed_variables_to_restore[checkpoint_name] = ( + checkpoint_reader.get_tensor(checkpoint_name)) + # Only set a deferred restoration if there are checkpoint variables which + # have not been assigned to existing variables. Note that this loses out on + # some opportunity for error checking, but avoids creating + # _DeferredRestoration objects once a Network has been built (so that + # restoring in a loop does not take increasing amounts of memory). + if checkpointed_variables_to_restore: + if context.in_eager_mode(): + sess = None + else: + sess = ops.get_default_session() + # We need a name for error messages. If we haven't been added to another + # Network yet, we're top-level. + network._finalize_name(False) + network._set_scope() + # Save a record of this restoration for use in the custom getter. + deferred_restoration = _DeferredRestoration( + map_func=map_func, + map_func_is_user=(user_map_func is not None), + checkpointed_variables_to_restore=checkpointed_variables_to_restore, + restored_variables={}, + session=sess, + network_name=network.name, + network_scope_name=network.scope_name) + # Add the deferred registration to non-Network children, and request that + # Networks propagate the request to their children. + _add_deferred_restoration(network, deferred_restoration) + + +def restore_network_checkpoint(network, save_path, map_func=None): + """Restore the Network from a checkpoint. + + If variables have already been created (typically when some or all of the + `Network` is built), they are assigned values from the checkpoint immediately, + overwriting any existing values (in graph mode the default session is used for + the assignments). + + If there are checkpoint entries which do not correspond to any existing + variables in the `Network`, these values are saved for deferred restoration; + their initial values will be the checkpointed values once they are + created. Requests for multiple deferred restorations behave the same way as + immediate restorations, in that later requests will take priority over earlier + requests relevant to the same variable. + + If this `Network` shares `Layer`s with another network, those `Layer`s will + also have their variables restored from the checkpoint. + + Args: + network: A Network object to restore. + save_path: The return value of `tfe.save_network_checkpoint`, or a directory + to search for a checkpoint. + map_func: A function mapping fully qualified variable names + (e.g. 'my_network_1/dense_1/kernel') to names in the checkpoint. By + default (if `map_func=None`), the variable prefix for the network being + restored (`Network.scope_name + '/'`, e.g. 'my_network_1/') is stripped + and all other variable names (shared with other Networks) are left + unchanged. Note that this is the _same_ map_func as + `tfe.save_network_checkpoint`, not an inverse mapping. + """ + network._finalize_name(parent_network=False) + network._set_scope() # scope_name should be available to map_funcs + if os.path.isdir(save_path): + # If we don't have a name yet, set no parent. + save_path = os.path.join(save_path, network.name.replace("/", "_")) + user_map_func = map_func + if map_func is None: + map_func = _make_prefix_stripping_map_fn(network.scope_name) + # Step one is to restore any existing variables from the checkpoint. + existing_variables_by_checkpoint_name = _restore_existing_variables( + network=network, + save_path=save_path, + map_func=map_func, + user_map_func=user_map_func) + # Step two is to set a custom getter which restores variables on creation, + # for those variables which have not been added to sub-Layers yet. + _set_restore_on_create( + network=network, + save_path=save_path, + map_func=map_func, + user_map_func=user_map_func, + existing_variables_by_checkpoint_name=( + existing_variables_by_checkpoint_name)) diff --git a/tensorflow/contrib/eager/python/network_test.py b/tensorflow/contrib/eager/python/network_test.py index 1127055c05..e66486d165 100644 --- a/tensorflow/contrib/eager/python/network_test.py +++ b/tensorflow/contrib/eager/python/network_test.py @@ -46,8 +46,8 @@ class NetworkTest(test.TestCase): def _save_modify_load_network_built(self, net, global_step=None): checkpoint_directory = self.get_temp_dir() - checkpoint_path = net.save( - save_path=checkpoint_directory, global_step=global_step) + checkpoint_path = network.save_network_checkpoint( + network=net, save_path=checkpoint_directory, global_step=global_step) input_value = constant_op.constant([[42.0]]) original_output = self.evaluate(net(input_value)) for var in net.variables: @@ -56,13 +56,13 @@ class NetworkTest(test.TestCase): self.evaluate(net(input_value)), original_output) # Either the returned explicit checkpoint path or the directory should work. - net.restore(save_path=checkpoint_directory) + network.restore_network_checkpoint(net, save_path=checkpoint_directory) self.assertAllEqual( original_output, self.evaluate(net(input_value))) for var in net.variables: self.evaluate(var.assign(var + 2.)) - net.restore(save_path=checkpoint_path) + network.restore_network_checkpoint(net, save_path=checkpoint_path) self.assertAllEqual( original_output, self.evaluate(net(input_value))) @@ -91,7 +91,7 @@ class NetworkTest(test.TestCase): net = MyNetwork(name="abcd") with self.assertRaisesRegexp( ValueError, "Attempt to save the Network before it was first called"): - net.save(self.get_temp_dir()) + network.save_network_checkpoint(net, self.get_temp_dir()) net(constant_op.constant([[2.0]])) self.evaluate(net.trainable_variables[0].assign([[17.0]])) self._save_modify_load_network_built(net, global_step=None) @@ -105,7 +105,7 @@ class NetworkTest(test.TestCase): self.evaluate(net.variables[0].assign([[3.]])) default_global_step = training_util.get_or_create_global_step() self.evaluate(default_global_step.assign(4242)) - save_path = net.save(self.get_temp_dir()) + save_path = network.save_network_checkpoint(net, self.get_temp_dir()) self.assertIn("abcd-4242", save_path) # TODO(allenl): This test creates garbage in some Python versions @@ -116,10 +116,10 @@ class NetworkTest(test.TestCase): test_input = constant_op.constant([[2.0]]) net1(test_input) self.evaluate(net1.trainable_variables[0].assign([[17.0]])) - save_path = net1.save(save_dir) + save_path = network.save_network_checkpoint(net1, save_dir) # With a pre-build restore we should have the same value. net2 = MyNetwork() - net2.restore(save_path) + network.restore_network_checkpoint(net2, save_path) self.assertAllEqual(self.evaluate(net1(test_input)), self.evaluate(net2(test_input))) self.assertIsNot(net1.variables[0], net2.variables[0]) @@ -176,11 +176,12 @@ class NetworkTest(test.TestCase): "checkpoint_creator/first_layer/kernel": "owner_1/first_layer/kernel", "checkpoint_creator/second_layer/kernel": "second_layer/kernel", } - save_path = checkpoint_creator.save( + save_path = network.save_network_checkpoint( + checkpoint_creator, self.get_temp_dir(), map_func=lambda full_name: name_mapping[full_name]) load_into = User(use_layer=first_owner.first) - load_into.restore(save_path) + network.restore_network_checkpoint(load_into, save_path) self.assertEqual(0, len(first_owner.variables)) self.assertAllEqual(self.evaluate(checkpoint_creator(one)), self.evaluate(load_into(one))) @@ -201,7 +202,8 @@ class NetworkTest(test.TestCase): else: return "user_2/" + original_name with self.assertRaisesRegexp(ValueError, "garbage collected"): - load_into.restore(save_path, map_func=_restore_map_func) + network.restore_network_checkpoint( + load_into, save_path, map_func=_restore_map_func) @test_util.run_in_graph_and_eager_modes() def testRestoreIntoSubNetwork(self): @@ -221,17 +223,18 @@ class NetworkTest(test.TestCase): whole_model_saver(one) self.evaluate(whole_model_saver.variables[0].assign([[15.]])) self.evaluate(whole_model_saver.variables[1].assign([[16.]])) - whole_model_checkpoint = whole_model_saver.save(self.get_temp_dir()) + whole_model_checkpoint = network.save_network_checkpoint( + whole_model_saver, self.get_temp_dir()) save_from = MyNetwork() save_from(one) self.evaluate(save_from.variables[0].assign([[5.]])) - checkpoint = save_from.save(self.get_temp_dir()) + checkpoint = network.save_network_checkpoint(save_from, self.get_temp_dir()) save_into_parent = Parent() - save_into_parent.restore(whole_model_checkpoint) - save_into_parent.first.restore(checkpoint) - save_into_parent.first.restore(checkpoint) # deferred loading multiple - # times is fine + network.restore_network_checkpoint(save_into_parent, whole_model_checkpoint) + network.restore_network_checkpoint(save_into_parent.first, checkpoint) + # deferred loading multiple times is fine + network.restore_network_checkpoint(save_into_parent.first, checkpoint) save_into_parent(one) # deferred loading self.assertAllEqual([[5.]], self.evaluate(save_into_parent.variables[0])) self.assertAllEqual([[16.]], self.evaluate(save_into_parent.variables[1])) @@ -240,9 +243,9 @@ class NetworkTest(test.TestCase): # (deferred restoration should happen the same way non-deferred happens, # with later restorations overwriting older ones). save_into_parent = Parent() - save_into_parent.first.restore(checkpoint) # deferred loading multiple - # times is fine - save_into_parent.restore(whole_model_checkpoint) + # deferred loading multiple times is fine + network.restore_network_checkpoint(save_into_parent.first, checkpoint) + network.restore_network_checkpoint(save_into_parent, whole_model_checkpoint) save_into_parent(one) # deferred loading # We've overwritten the sub-Network restore. self.assertAllEqual([[15.]], self.evaluate(save_into_parent.variables[0])) @@ -250,12 +253,12 @@ class NetworkTest(test.TestCase): self.evaluate(save_into_parent.variables[0].assign([[3.]])) self.evaluate(save_into_parent.variables[1].assign([[4.]])) - save_into_parent.second.restore(checkpoint) + network.restore_network_checkpoint(save_into_parent.second, checkpoint) self.assertAllEqual([[5.]], self.evaluate(save_into_parent.variables[1])) with self.assertRaisesRegexp(errors_impl.NotFoundError, "not found in checkpoint"): # The checkpoint is incompatible. - save_into_parent.restore(checkpoint) + network.restore_network_checkpoint(save_into_parent, checkpoint) @test_util.run_in_graph_and_eager_modes() def testCustomMapCollisionErrors(self): @@ -277,25 +280,30 @@ class NetworkTest(test.TestCase): self.evaluate(make_checkpoint.variables[1].assign([[3.]])) with self.assertRaisesRegexp( ValueError, - "The map_func passed to Network.save for the Network 'parent_1' " - "resulted in two variables named 'foo'"): - make_checkpoint.save(self.get_temp_dir(), map_func=lambda n: "foo") - checkpoint = make_checkpoint.first.save( - self.get_temp_dir(), map_func=lambda n: "foo") + "The map_func passed to save_network_checkpoint for the Network " + "'parent_1' resulted in two variables named 'foo'"): + network.save_network_checkpoint( + make_checkpoint, self.get_temp_dir(), map_func=lambda n: "foo") + checkpoint = network.save_network_checkpoint( + network=make_checkpoint.first, + save_path=self.get_temp_dir(), + map_func=lambda n: "foo") loader = Parent() - loader.restore(checkpoint, map_func=lambda n: "foo") + network.restore_network_checkpoint( + loader, checkpoint, map_func=lambda n: "foo") with self.assertRaisesRegexp( ValueError, - ("The map_func passed to Network.restore for the Network" + ("The map_func passed to restore_network_checkpoint for the Network" " 'parent_2' resulted in two variables named 'foo'")): loader(one) loader = Parent() loader(one) with self.assertRaisesRegexp( ValueError, - ("The map_func passed to Network.restore for the Network" + ("The map_func passed to restore_network_checkpoint for the Network" " 'parent_3' resulted in two variables named 'foo'")): - loader.restore(checkpoint, map_func=lambda n: "foo") + network.restore_network_checkpoint( + loader, checkpoint, map_func=lambda n: "foo") @test_util.run_in_graph_and_eager_modes() def testDefaultMapCollisionErrors(self): @@ -323,7 +331,7 @@ class NetworkTest(test.TestCase): ValueError, ("The default checkpoint variable name mapping strategy for Network " "'parent_1' resulted in a naming conflict.")): - make_checkpoint.save(self.get_temp_dir()) + network.save_network_checkpoint(make_checkpoint, self.get_temp_dir()) class Compatible(network.Network): @@ -337,14 +345,15 @@ class NetworkTest(test.TestCase): successful_checkpoint = Compatible() successful_checkpoint(one) self.evaluate(successful_checkpoint.variables[0].assign([[-1.]])) - checkpoint_path = successful_checkpoint.save(self.get_temp_dir()) + checkpoint_path = network.save_network_checkpoint( + successful_checkpoint, self.get_temp_dir()) load_checkpoint = Parent() load_checkpoint(one) with self.assertRaisesRegexp( ValueError, ("The default checkpoint variable name mapping strategy for Network " "'parent_2' resulted in a naming conflict.")): - load_checkpoint.restore(checkpoint_path) + network.restore_network_checkpoint(load_checkpoint, checkpoint_path) def testNoReferenceCyclesAfterCall(self): @@ -494,17 +503,17 @@ class NetworkTest(test.TestCase): self.assertStartsWith( expected_start="scope1/scope2/my_network_1/dense_1/", actual=net.trainable_weights[0].name) - save_path = net.save(self.get_temp_dir()) + save_path = network.save_network_checkpoint(net, self.get_temp_dir()) self.assertIn("scope1_scope2_my_network_1", save_path) restore_net = MyNetwork() # Delayed restoration - restore_net.restore(save_path) + network.restore_network_checkpoint(restore_net, save_path) restore_net(constant_op.constant([[1.0]])) self.assertAllEqual([[42.]], self.evaluate(restore_net.variables[0])) self.evaluate(restore_net.variables[0].assign([[-1.]])) # Immediate restoration - restore_net.restore(save_path) + network.restore_network_checkpoint(restore_net, save_path) self.assertAllEqual([[42.]], self.evaluate(restore_net.variables[0])) diff --git a/tensorflow/contrib/eager/python/tfe.py b/tensorflow/contrib/eager/python/tfe.py index b6c687c829..577d3efef6 100644 --- a/tensorflow/contrib/eager/python/tfe.py +++ b/tensorflow/contrib/eager/python/tfe.py @@ -46,13 +46,16 @@ To use, at program startup, call `tfe.enable_eager_execution()`. @@seterr @@Iterator -@@Network @@Saver @@restore_variables_on_create @@Variable @@get_optimizer_variables @@EagerVariableStore +@@Network +@@save_network_checkpoint +@@restore_network_checkpoint + @@in_eager_mode @@in_graph_mode @@ -74,6 +77,8 @@ from __future__ import print_function from tensorflow.contrib.eager.python import metrics from tensorflow.contrib.eager.python.datasets import Iterator from tensorflow.contrib.eager.python.network import Network +from tensorflow.contrib.eager.python.network import save_network_checkpoint +from tensorflow.contrib.eager.python.network import restore_network_checkpoint from tensorflow.contrib.eager.python.saver import get_optimizer_variables from tensorflow.contrib.eager.python.saver import restore_variables_on_create from tensorflow.contrib.eager.python.saver import Saver -- GitLab From 21bb1160d37e8cd6e4ea6141497b91a6e9a0a529 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Nov 2017 14:33:31 -0800 Subject: [PATCH 0428/1801] Switch use of gRPC Generic API to Generic Unary, reducing CQ trips from 4->1 PiperOrigin-RevId: 175736600 --- .../rpc/grpc_remote_worker.cc | 22 ++-- .../rpc/grpc_remote_worker.h | 4 +- .../core/distributed_runtime/rpc/grpc_state.h | 107 +++++------------- .../core/distributed_runtime/rpc/grpc_util.cc | 21 ---- .../core/distributed_runtime/rpc/grpc_util.h | 23 ---- .../rpc/grpc_worker_cache.cc | 7 +- 6 files changed, 40 insertions(+), 144 deletions(-) diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.cc b/tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.cc index 2b9798d413..170c72deca 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.cc @@ -39,12 +39,10 @@ namespace tensorflow { class GrpcRemoteWorker : public WorkerInterface { public: - explicit GrpcRemoteWorker(GrpcCounter* live_rpc_counter, - SharedGrpcChannelPtr channel, + explicit GrpcRemoteWorker(SharedGrpcChannelPtr channel, ::grpc::CompletionQueue* completion_queue, WorkerCacheLogger* logger) - : counter_(live_rpc_counter), - channel_(std::move(channel)), + : channel_(std::move(channel)), stub_(channel_), cq_(completion_queue), getstatus_(Method(GrpcWorkerMethod::kGetStatus)), @@ -182,23 +180,21 @@ class GrpcRemoteWorker : public WorkerInterface { void IssueRequest(const protobuf::Message* request, protobuf::Message* response, const ::grpc::string& method, StatusCallback done, CallOptions* call_opts = nullptr) { - new RPCState(counter_, &stub_, cq_, method, *request, - response, std::move(done), call_opts); + new RPCState(&stub_, cq_, method, *request, response, + std::move(done), call_opts); } void IssueRequest(const protobuf::Message* request, TensorResponse* response, const ::grpc::string& method, StatusCallback done, CallOptions* call_opts = nullptr) { - new RPCState(counter_, &stub_, cq_, method, *request, - response, std::move(done), call_opts); + new RPCState(&stub_, cq_, method, *request, response, + std::move(done), call_opts); } // Helper function for initializing the RpcMethod objects below. const char* Method(GrpcWorkerMethod id) { return GrpcWorkerMethodName(id); } - GrpcCounter* const counter_; SharedGrpcChannelPtr channel_; ::grpc::GenericStub stub_; - ::grpc::CompletionQueue* cq_; const ::grpc::string getstatus_; @@ -218,12 +214,10 @@ class GrpcRemoteWorker : public WorkerInterface { TF_DISALLOW_COPY_AND_ASSIGN(GrpcRemoteWorker); }; -WorkerInterface* NewGrpcRemoteWorker(GrpcCounter* live_rpc_counter, - SharedGrpcChannelPtr channel, +WorkerInterface* NewGrpcRemoteWorker(SharedGrpcChannelPtr channel, ::grpc::CompletionQueue* completion_queue, WorkerCacheLogger* logger) { - return new GrpcRemoteWorker(live_rpc_counter, std::move(channel), - completion_queue, logger); + return new GrpcRemoteWorker(std::move(channel), completion_queue, logger); } } // namespace tensorflow diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.h b/tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.h index 174dfcc707..8ad4133540 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.h @@ -26,12 +26,10 @@ class CompletionQueue; namespace tensorflow { -class GrpcCounter; class WorkerCacheLogger; class WorkerInterface; -WorkerInterface* NewGrpcRemoteWorker(GrpcCounter* live_rpc_counter, - SharedGrpcChannelPtr channel, +WorkerInterface* NewGrpcRemoteWorker(SharedGrpcChannelPtr channel, ::grpc::CompletionQueue* completion_queue, WorkerCacheLogger* logger); diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_state.h b/tensorflow/core/distributed_runtime/rpc/grpc_state.h index 087b49ba76..3f80bdfb70 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_state.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_state.h @@ -34,24 +34,18 @@ template class RPCState : public GrpcClientCQTag { public: // Default behavior is to set fail_fast = False and handle timeouts manually. - RPCState(GrpcCounter* counter, ::grpc::GenericStub* stub, - ::grpc::CompletionQueue* cq, const ::grpc::string& method, - const protobuf::Message& request, Response* response, - StatusCallback done, CallOptions* call_opts) - : RPCState(counter, stub, cq, method, request, response, std::move(done), + RPCState(::grpc::GenericStub* stub, ::grpc::CompletionQueue* cq, + const ::grpc::string& method, const protobuf::Message& request, + Response* response, StatusCallback done, CallOptions* call_opts) + : RPCState(stub, cq, method, request, response, std::move(done), call_opts, /*fail_fast=*/false, /*timeout_in_ms=*/0) {} template - RPCState(GrpcCounter* counter, ::grpc::GenericStub* stub, - ::grpc::CompletionQueue* cq, const ::grpc::string& method, - const Request& request, Response* response, StatusCallback done, - CallOptions* call_opts, bool fail_fast, int64 timeout_in_ms) - : counter_(counter), call_opts_(call_opts), done_(std::move(done)) { - // TODO(sanjay): The counter will no longer be needed once we - // get a GenericStub API which allows us to manage an entire - // RPC with a single completion event instead of four events. - counter_->Increment(); - + RPCState(::grpc::GenericStub* stub, ::grpc::CompletionQueue* cq, + const ::grpc::string& method, const Request& request, + Response* response, StatusCallback done, CallOptions* call_opts, + bool fail_fast, int64 timeout_in_ms) + : call_opts_(call_opts), done_(std::move(done)) { context_.set_fail_fast(fail_fast); if (timeout_in_ms > 0) { context_.set_deadline(gpr_time_from_millis(timeout_in_ms, GPR_TIMESPAN)); @@ -61,84 +55,43 @@ class RPCState : public GrpcClientCQTag { call_opts->SetCancelCallback([this]() { context_.TryCancel(); }); } - failure_.store(false); - remaining_callbacks_.store(4); // Init/Read/Write/Finish callbacks response_ = response; GrpcMaybeUnparseProto(request, &request_buf_); - // TODO(sanjay): When new enough grpc is available, enable the following: - // context_.set_initial_metadata_corked(true); - // We can then skip the extra state transition for init callback. - call_ = std::move(stub->Call(&context_, method, cq, this)); - call_initialized_.Notify(); + call_ = + std::move(stub->PrepareUnaryCall(&context_, method, request_buf_, cq)); + call_->StartCall(); + call_->Finish(&response_buf_, &status_, this); } - // Called multiple times: when init done, read done, write done, call done. void OnCompleted(bool ok) override { - if (!ok) failure_.store(true); - const int old_count = remaining_callbacks_.fetch_sub(1); - if (old_count > 1) { - if (old_count == 4) { - // Init callback finished. Issue remaining ops. - - // Annoyingly enough, the way the generic call API works is - // inherently racy. We can get the following sequence of events: - // 1. stub->Call() starts. - // 2. some stuff happens inside grpc - // 3. grpc delivers the completion event - // 4. tensorflow event handling thread calls init metadata callback - // 5. stub->Call() finishes - // 6. the result of stub->Call() is stored in call_ - // We are currently inside the callback and therefore need to - // wait for step 6 to finish before attempting to touch call_. - call_initialized_.WaitForNotification(); - - if (ok) { - // TODO(sanjay): Use WriteLast() when grpc version we are using - // is new enough. - call_->Write(request_buf_, this); - call_->Read(&response_buf_, this); - } else { - // Skip Write and Read. - remaining_callbacks_.fetch_sub(2); - } - call_->Finish(&status_, this); - } - // Still waiting for some more callbacks to finish. - return; - } else { // old_count == 1, i.e., all callbacks have finished - // Last callback finished; clean up. - if (call_opts_) { - call_opts_->ClearCancelCallback(); - } - Status s = FromGrpcStatus(status_); - if (s.ok() && failure_.load()) { - s.Update(errors::Internal("callback error")); - } - if (s.ok() && !GrpcMaybeParseProto(response_buf_, response_)) { - s.Update(errors::Internal("could not parse rpc response")); - } - if (!s.ok()) { - VLOG(2) << "Call returned with non-ok status: " << s; - } - done_(s); - counter_->Decrement(); - delete this; + if (call_opts_) { + call_opts_->ClearCancelCallback(); + } + Status s = FromGrpcStatus(status_); + if (s.ok() && !ok) { + // Since this function is only being used for processing the response + // to Finish for client-side unary calls, ok should never be false + s.Update(errors::Internal("unexpected ok value at rpc completion")); + } + if (s.ok() && !GrpcMaybeParseProto(response_buf_, response_)) { + s.Update(errors::Internal("could not parse rpc response")); + } + if (!s.ok()) { + VLOG(2) << "Call returned with non-ok status: " << s; } + done_(s); + delete this; } private: - GrpcCounter* const counter_; CallOptions* call_opts_; ::grpc::ClientContext context_; - std::unique_ptr<::grpc::GenericClientAsyncReaderWriter> call_; + std::unique_ptr<::grpc::GenericClientAsyncResponseReader> call_; Response* response_; ::grpc::ByteBuffer request_buf_; ::grpc::ByteBuffer response_buf_; ::grpc::Status status_; StatusCallback done_; - std::atomic failure_; - std::atomic remaining_callbacks_; - Notification call_initialized_; }; } // namespace tensorflow diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_util.cc b/tensorflow/core/distributed_runtime/rpc/grpc_util.cc index 9a97978c50..c80728544b 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_util.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_util.cc @@ -135,25 +135,4 @@ bool GrpcMaybeParseProto(const grpc::ByteBuffer& src, string* dst) { return true; } -void GrpcCounter::Increment() { - mutex_lock l(mu_); - counter_++; -} - -void GrpcCounter::Decrement() { - mutex_lock l(mu_); - DCHECK_GT(counter_, 0); - counter_--; - if (counter_ == 0) { - empty_.notify_all(); - } -} - -void GrpcCounter::WaitUntilUnused() { - mutex_lock l(mu_); - while (counter_ != 0) { - empty_.wait(l); - } -} - } // namespace tensorflow diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_util.h b/tensorflow/core/distributed_runtime/rpc/grpc_util.h index 04a54e672c..0ddcd89130 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_util.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_util.h @@ -84,29 +84,6 @@ class GrpcByteBufferSource : public ::grpc::protobuf::io::ZeroCopyInputStream { ::grpc::protobuf::int64 byte_count_; }; -// GrpcCounter is used to delay shutdown until all active RPCs are done. -class GrpcCounter { - public: - GrpcCounter() {} - - GrpcCounter(const GrpcCounter&) = delete; - GrpcCounter& operator=(const GrpcCounter&) = delete; - - // Increment the count of live RPCs. - void Increment(); - - // Decrement the count of live RPCs. - void Decrement(); - - // Wait until count of live RPCs is zero. - void WaitUntilUnused(); - - private: - mutex mu_; - condition_variable empty_; - int counter_ = 0; -}; - } // namespace tensorflow #endif // THIRD_PARTY_TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_RPC_GRPC_UTIL_H_ diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc index 06695db779..a7b93e0460 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc @@ -51,9 +51,6 @@ class GrpcWorkerCache : public WorkerCachePartial { // Explicit destructor to control destruction order. ~GrpcWorkerCache() override { - // Wait until all live rpcs are done since otherwise the completion - // queue shutdown will interfere with rpc operation. - live_rpc_counter_.WaitUntilUnused(); completion_queue_.Shutdown(); delete polling_thread_; // Blocks until thread exits. delete channel_cache_; @@ -69,8 +66,7 @@ class GrpcWorkerCache : public WorkerCachePartial { } else { SharedGrpcChannelPtr channel = channel_cache_->FindWorkerChannel(target); if (!channel) return nullptr; - return NewGrpcRemoteWorker(&live_rpc_counter_, channel, - &completion_queue_, &logger_); + return NewGrpcRemoteWorker(channel, &completion_queue_, &logger_); } } @@ -94,7 +90,6 @@ class GrpcWorkerCache : public WorkerCachePartial { private: const string local_target_; WorkerInterface* const local_worker_; // Not owned. - GrpcCounter live_rpc_counter_; GrpcChannelCache* channel_cache_; // Owned. ::grpc::CompletionQueue completion_queue_; Thread* polling_thread_; // Owned. -- GitLab From e8b2049f5be7accae9f272972acfc5afb36c5ef2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Nov 2017 14:34:32 -0800 Subject: [PATCH 0429/1801] Fix docstring typo: "GraphKey" -> "GraphKeys". PiperOrigin-RevId: 175736748 --- tensorflow/python/training/optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py index 9f5e8ec938..b31d02eb8d 100644 --- a/tensorflow/python/training/optimizer.py +++ b/tensorflow/python/training/optimizer.py @@ -381,7 +381,7 @@ class Optimizer(object): loss: A Tensor containing the value to minimize. var_list: Optional list or tuple of `tf.Variable` to update to minimize `loss`. Defaults to the list of variables collected in the graph - under the key `GraphKey.TRAINABLE_VARIABLES`. + under the key `GraphKeys.TRAINABLE_VARIABLES`. gate_gradients: How to gate the computation of gradients. Can be `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`. aggregation_method: Specifies the method used to combine gradient terms. -- GitLab From b8054c19b7d72cfb7eb07552a8d0385ffd8810d7 Mon Sep 17 00:00:00 2001 From: "Joshua V. Dillon" Date: Tue, 14 Nov 2017 14:41:12 -0800 Subject: [PATCH 0430/1801] Add `tf.contrib.bayesflow.layers`, a collection of probabilistic (neural) layers. PiperOrigin-RevId: 175737756 --- tensorflow/contrib/bayesflow/BUILD | 21 +- tensorflow/contrib/bayesflow/__init__.py | 19 +- .../layers_dense_variational_test.py | 304 +++++++ .../contrib/bayesflow/python/ops/layers.py | 37 + .../ops/layers_dense_variational_impl.py | 797 ++++++++++++++++++ 5 files changed, 1173 insertions(+), 5 deletions(-) create mode 100644 tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py create mode 100644 tensorflow/contrib/bayesflow/python/ops/layers.py create mode 100644 tensorflow/contrib/bayesflow/python/ops/layers_dense_variational_impl.py diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index 9f3650e8f9..a262d4aecd 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -19,6 +19,7 @@ py_library( srcs = ["__init__.py"] + glob(["python/ops/*.py"]), srcs_version = "PY2AND3", deps = [ + "//tensorflow/contrib/distributions:distributions_py", "//tensorflow/contrib/framework:framework_py", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", @@ -32,7 +33,6 @@ py_library( "//tensorflow/python:random_ops", "//tensorflow/python:state_ops", "//tensorflow/python:util", - "//tensorflow/python/ops/distributions", "//third_party/py/numpy", ], ) @@ -99,6 +99,25 @@ cuda_py_test( ], ) +cuda_py_test( + name = "layers_dense_variational_test", + size = "small", + srcs = ["python/kernel_tests/layers_dense_variational_test.py"], + additional_deps = [ + ":bayesflow_py", + "//third_party/py/numpy", + "//tensorflow/contrib/distributions:distributions_py", + "//tensorflow/python/ops/distributions", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:gradients", + "//tensorflow/python:linalg_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:nn_ops", + ], +) + cuda_py_test( name = "monte_carlo_test", size = "small", diff --git a/tensorflow/contrib/bayesflow/__init__.py b/tensorflow/contrib/bayesflow/__init__.py index a638753f2f..95b9452b1a 100644 --- a/tensorflow/contrib/bayesflow/__init__.py +++ b/tensorflow/contrib/bayesflow/__init__.py @@ -25,6 +25,7 @@ from tensorflow.contrib.bayesflow.python.ops import csiszar_divergence from tensorflow.contrib.bayesflow.python.ops import custom_grad from tensorflow.contrib.bayesflow.python.ops import halton_sequence from tensorflow.contrib.bayesflow.python.ops import hmc +from tensorflow.contrib.bayesflow.python.ops import layers from tensorflow.contrib.bayesflow.python.ops import metropolis_hastings from tensorflow.contrib.bayesflow.python.ops import monte_carlo from tensorflow.contrib.bayesflow.python.ops import optimizers @@ -33,9 +34,19 @@ from tensorflow.contrib.bayesflow.python.ops import optimizers from tensorflow.python.util.all_util import remove_undocumented -_allowed_symbols = ['csiszar_divergence', 'custom_grad', 'entropy', - 'metropolis_hastings', 'monte_carlo', 'halton_sequence', - 'hmc', 'optimizers', 'special_math', 'stochastic_variables', - 'variational_inference'] +_allowed_symbols = [ + 'csiszar_divergence', + 'custom_grad', + 'entropy', + 'halton_sequence', + 'hmc', + 'layers', + 'metropolis_hastings', + 'monte_carlo', + 'optimizers', + 'special_math', + 'stochastic_variables', + 'variational_inference', +] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py new file mode 100644 index 0000000000..50358fd1c2 --- /dev/null +++ b/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py @@ -0,0 +1,304 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for dense Bayesian layers.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.bayesflow.python.ops import layers_dense_variational_impl as prob_layers_lib +from tensorflow.python.framework import ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.ops.distributions import normal as normal_lib +from tensorflow.python.platform import test + + +class Counter(object): + """Helper class to manage incrementing a counting `int`.""" + + def __init__(self): + self._value = -1 + + @property + def value(self): + return self._value + + def __call__(self): + self._value += 1 + return self._value + + +class MockDistribution(normal_lib.Normal): + """Monitors DenseVariational calls to the underlying distribution.""" + + def __init__(self, result_sample, result_log_prob, loc=None, scale=None): + self.result_sample = result_sample + self.result_log_prob = result_log_prob + self.result_loc = loc + self.result_scale = scale + self.called_log_prob = Counter() + self.called_sample = Counter() + self.called_loc = Counter() + self.called_scale = Counter() + + def log_prob(self, *args, **kwargs): + self.called_log_prob() + return self.result_log_prob + + def sample(self, *args, **kwargs): + self.called_sample() + return self.result_sample + + @property + def loc(self): + self.called_loc() + return self.result_loc + + @property + def scale(self): + self.called_scale() + return self.result_scale + + +class MockKLDivergence(object): + """Monitors DenseVariational calls to the divergence implementation.""" + + def __init__(self, result): + self.result = result + self.args = [] + self.called = Counter() + + def __call__(self, *args, **kwargs): + self.called() + self.args.append(args) + return self.result + + +class DenseVariationalLocalReparametrization(test.TestCase): + + def testKLPenaltyKernel(self): + with self.test_session(): + dense_vi = prob_layers_lib.DenseVariational(units=2) + inputs = random_ops.random_uniform([2, 3], seed=1) + + # No keys. + loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) + self.assertEqual(len(loss_keys), 0) + self.assertListEqual(dense_vi.losses, loss_keys) + + _ = dense_vi(inputs) + + # Yes keys. + loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) + self.assertEqual(len(loss_keys), 1) + self.assertListEqual(dense_vi.losses, loss_keys) + + def testKLPenaltyBoth(self): + def _make_normal(dtype, *args): # pylint: disable=unused-argument + return normal_lib.Normal( + loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)) + with self.test_session(): + dense_vi = prob_layers_lib.DenseVariational( + units=2, + bias_posterior_fn=prob_layers_lib.default_mean_field_normal_fn(), + bias_prior_fn=_make_normal) + inputs = random_ops.random_uniform([2, 3], seed=1) + + # No keys. + loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) + self.assertEqual(len(loss_keys), 0) + self.assertListEqual(dense_vi.losses, loss_keys) + + _ = dense_vi(inputs) + + # Yes keys. + loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) + self.assertEqual(len(loss_keys), 2) + self.assertListEqual(dense_vi.losses, loss_keys) + + def testVariationalNonLocal(self): + batch_size, in_size, out_size = 2, 3, 4 + with self.test_session() as sess: + seed = Counter() + inputs = random_ops.random_uniform([batch_size, in_size], seed=seed()) + + kernel_size = [in_size, out_size] + kernel_posterior = MockDistribution( + result_log_prob=random_ops.random_uniform(kernel_size, seed=seed()), + result_sample=random_ops.random_uniform(kernel_size, seed=seed())) + kernel_prior = MockDistribution( + result_log_prob=random_ops.random_uniform(kernel_size, seed=seed()), + result_sample=random_ops.random_uniform(kernel_size, seed=seed())) + kernel_divergence = MockKLDivergence( + result=random_ops.random_uniform(kernel_size, seed=seed())) + + bias_size = [out_size] + bias_posterior = MockDistribution( + result_log_prob=random_ops.random_uniform(bias_size, seed=seed()), + result_sample=random_ops.random_uniform(bias_size, seed=seed())) + bias_prior = MockDistribution( + result_log_prob=random_ops.random_uniform(bias_size, seed=seed()), + result_sample=random_ops.random_uniform(bias_size, seed=seed())) + bias_divergence = MockKLDivergence( + result=random_ops.random_uniform(bias_size, seed=seed())) + + expected_outputs = ( + math_ops.matmul(inputs, kernel_posterior.result_sample) + + bias_posterior.result_sample) + + dense_vi = prob_layers_lib.DenseVariational( + units=2, + kernel_use_local_reparameterization=False, + kernel_posterior_fn=lambda *args: kernel_posterior, + kernel_posterior_tensor_fn=lambda d: d.sample(seed=42), + kernel_prior_fn=lambda *args: kernel_prior, + kernel_divergence_fn=kernel_divergence, + bias_posterior_fn=lambda *args: bias_posterior, + bias_posterior_tensor_fn=lambda d: d.sample(seed=43), + bias_prior_fn=lambda *args: bias_prior, + bias_divergence_fn=bias_divergence) + + outputs = dense_vi(inputs) + + kl_penalty = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) + + [ + expected_outputs_, actual_outputs_, + expected_kernel_, actual_kernel_, + expected_kernel_divergence_, actual_kernel_divergence_, + expected_bias_, actual_bias_, + expected_bias_divergence_, actual_bias_divergence_, + ] = sess.run([ + expected_outputs, outputs, + kernel_posterior.result_sample, dense_vi.kernel.posterior_tensor, + kernel_divergence.result, kl_penalty[0], + bias_posterior.result_sample, dense_vi.bias.posterior_tensor, + bias_divergence.result, kl_penalty[1], + ]) + + self.assertAllClose( + expected_kernel_, actual_kernel_, + rtol=1e-6, atol=0.) + self.assertAllClose( + expected_bias_, actual_bias_, + rtol=1e-6, atol=0.) + self.assertAllClose( + expected_outputs_, actual_outputs_, + rtol=1e-6, atol=0.) + self.assertAllClose( + expected_kernel_divergence_, actual_kernel_divergence_, + rtol=1e-6, atol=0.) + self.assertAllClose( + expected_bias_divergence_, actual_bias_divergence_, + rtol=1e-6, atol=0.) + + self.assertAllEqual( + [[kernel_posterior, kernel_prior, kernel_posterior.result_sample]], + kernel_divergence.args) + + self.assertAllEqual( + [[bias_posterior, bias_prior, bias_posterior.result_sample]], + bias_divergence.args) + + def testVariationalLocal(self): + batch_size, in_size, out_size = 2, 3, 4 + with self.test_session() as sess: + seed = Counter() + inputs = random_ops.random_uniform([batch_size, in_size], seed=seed()) + + kernel_size = [in_size, out_size] + kernel_posterior = MockDistribution( + loc=random_ops.random_uniform(kernel_size, seed=seed()), + scale=random_ops.random_uniform(kernel_size, seed=seed()), + result_log_prob=random_ops.random_uniform(kernel_size, seed=seed()), + result_sample=random_ops.random_uniform(kernel_size, seed=seed())) + kernel_prior = MockDistribution( + result_log_prob=random_ops.random_uniform(kernel_size, seed=seed()), + result_sample=random_ops.random_uniform(kernel_size, seed=seed())) + kernel_divergence = MockKLDivergence( + result=random_ops.random_uniform(kernel_size, seed=seed())) + + bias_size = [out_size] + bias_posterior = MockDistribution( + result_log_prob=random_ops.random_uniform(bias_size, seed=seed()), + result_sample=random_ops.random_uniform(bias_size, seed=seed())) + bias_prior = MockDistribution( + result_log_prob=random_ops.random_uniform(bias_size, seed=seed()), + result_sample=random_ops.random_uniform(bias_size, seed=seed())) + bias_divergence = MockKLDivergence( + result=random_ops.random_uniform(bias_size, seed=seed())) + + expected_kernel_posterior_affine = normal_lib.Normal( + loc=math_ops.matmul(inputs, kernel_posterior.result_loc), + scale=math_ops.matmul( + inputs**2., kernel_posterior.result_scale**2)**0.5) + expected_kernel_posterior_affine_tensor = ( + expected_kernel_posterior_affine.sample(seed=42)) + expected_outputs = (expected_kernel_posterior_affine_tensor + + bias_posterior.result_sample) + + dense_vi = prob_layers_lib.DenseVariational( + units=2, + kernel_use_local_reparameterization=True, + kernel_posterior_fn=lambda *args: kernel_posterior, + kernel_posterior_tensor_fn=lambda d: d.sample(seed=42), + kernel_prior_fn=lambda *args: kernel_prior, + kernel_divergence_fn=kernel_divergence, + bias_posterior_fn=lambda *args: bias_posterior, + bias_posterior_tensor_fn=lambda d: d.sample(seed=43), + bias_prior_fn=lambda *args: bias_prior, + bias_divergence_fn=bias_divergence) + + outputs = dense_vi(inputs) + + kl_penalty = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) + + [ + expected_outputs_, actual_outputs_, + expected_kernel_divergence_, actual_kernel_divergence_, + expected_bias_, actual_bias_, + expected_bias_divergence_, actual_bias_divergence_, + ] = sess.run([ + expected_outputs, outputs, + kernel_divergence.result, kl_penalty[0], + bias_posterior.result_sample, dense_vi.bias.posterior_tensor, + bias_divergence.result, kl_penalty[1], + ]) + + self.assertAllClose( + expected_bias_, actual_bias_, + rtol=1e-6, atol=0.) + self.assertAllClose( + expected_outputs_, actual_outputs_, + rtol=1e-6, atol=0.) + self.assertAllClose( + expected_kernel_divergence_, actual_kernel_divergence_, + rtol=1e-6, atol=0.) + self.assertAllClose( + expected_bias_divergence_, actual_bias_divergence_, + rtol=1e-6, atol=0.) + + self.assertAllEqual( + [[kernel_posterior, kernel_prior, None]], + kernel_divergence.args) + + self.assertAllEqual( + [[bias_posterior, bias_prior, bias_posterior.result_sample]], + bias_divergence.args) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/bayesflow/python/ops/layers.py b/tensorflow/contrib/bayesflow/python/ops/layers.py new file mode 100644 index 0000000000..dcead38af8 --- /dev/null +++ b/tensorflow/contrib/bayesflow/python/ops/layers.py @@ -0,0 +1,37 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Probabilistic neural layers. + +See ${python/contrib.bayesflow.layers}. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# go/tf-wildcard-import +# pylint: disable=wildcard-import +from tensorflow.contrib.bayesflow.python.ops.layers_dense_variational_impl import * +# pylint: enable=wildcard-import +from tensorflow.python.util.all_util import remove_undocumented + +_allowed_symbols = [ + 'DenseVariational', + 'dense_variational', + 'default_loc_scale_fn', + 'default_mean_field_normal_fn', +] + +remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational_impl.py b/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational_impl.py new file mode 100644 index 0000000000..b05ce0ffc1 --- /dev/null +++ b/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational_impl.py @@ -0,0 +1,797 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Dense Bayesian layer using KL-divergence based variational inference. + +@@DenseVariational +@@dense_variational + +@@default_loc_scale_fn +@@default_mean_field_normal_fn +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.distributions.python.ops import deterministic as deterministic_lib +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.layers import base as layers_lib +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import standard_ops +from tensorflow.python.ops.distributions import kullback_leibler as kl_lib +from tensorflow.python.ops.distributions import normal as normal_lib + + +__all__ = [ + "DenseVariational", + "dense_variational", + "default_loc_scale_fn", + "default_mean_field_normal_fn", +] + + +def default_loc_scale_fn( + is_singular=False, + loc_initializer=init_ops.random_normal_initializer(stddev=0.1), + untransformed_scale_initializer=init_ops.random_normal_initializer( + mean=-3., stddev=0.1), + loc_regularizer=None, + untransformed_scale_regularizer=None, + loc_constraint=None, + untransformed_scale_constraint=None): + """Makes closure which creates `loc`, `scale` params from `tf.get_variable`. + + This function produces a closure which produces `loc`, `scale` using + `tf.get_variable`. The closure accepts the following arguments: + + dtype: Type of parameter's event. + shape: Python `list`-like representing the parameter's event shape. + name: Python `str` name prepended to any created (or existing) + `tf.Variable`s. + trainable: Python `bool` indicating all created `tf.Variable`s should be + added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. + add_variable_fn: `tf.get_variable`-like `callable` used to create (or + access existing) `tf.Variable`s. + + Args: + is_singular: Python `bool` indicating if `scale is None`. Default: `False`. + loc_initializer: Initializer function for the `loc` parameters. + The default is `tf.random_normal_initializer(mean=0., stddev=0.1)`. + untransformed_scale_initializer: Initializer function for the `scale` + parameters. Default value: `tf.random_normal_initializer(mean=-3., + stddev=0.1)`. This implies the softplus transformed result has mean + approximately `0.05` and std. deviation approximately `0.005`. + loc_regularizer: Regularizer function for the `loc` parameters. + The default (`None`) is to use the `tf.get_variable` default. + untransformed_scale_regularizer: Regularizer function for the `scale` + parameters. The default (`None`) is to use the `tf.get_variable` default. + loc_constraint: An optional projection function to be applied to the + loc after being updated by an `Optimizer`. The function must take as input + the unprojected variable and must return the projected variable (which + must have the same shape). Constraints are not safe to use when doing + asynchronous distributed training. + The default (`None`) is to use the `tf.get_variable` default. + untransformed_scale_constraint: An optional projection function to be + applied to the `scale` parameters after being updated by an `Optimizer` + (e.g. used to implement norm constraints or value constraints). The + function must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are not + safe to use when doing asynchronous distributed training. The default + (`None`) is to use the `tf.get_variable` default. + + Returns: + default_loc_scale_fn: Python `callable` which instantiates `loc`, `scale` + parameters from args: `dtype, shape, name, trainable, add_variable_fn`. + """ + def _fn(dtype, shape, name, trainable, add_variable_fn): + """Creates `loc`, `scale` parameters.""" + loc = add_variable_fn( + name=name + "_loc", + shape=shape, + initializer=loc_initializer, + regularizer=loc_regularizer, + constraint=loc_constraint, + dtype=dtype, + trainable=trainable) + if is_singular: + return loc, None + untransformed_scale = add_variable_fn( + name=name + "_untransformed_scale", + shape=shape, + initializer=untransformed_scale_initializer, + regularizer=untransformed_scale_regularizer, + constraint=untransformed_scale_constraint, + dtype=dtype, + trainable=trainable) + scale = (np.finfo(dtype.as_numpy_dtype).eps + + nn_ops.softplus(untransformed_scale)) + return loc, scale + return _fn + + +def default_mean_field_normal_fn( + is_singular=False, + loc_initializer=None, + untransformed_scale_initializer=None, + loc_regularizer=None, + untransformed_scale_regularizer=None, + loc_constraint=None, + untransformed_scale_constraint=None): + """Creates a function to build Normal distributions with trainable params. + + This function produces a closure which produces `tf.distributions.Normal` + parameterized by a loc` and `scale` each created using `tf.get_variable`. The + produced closure accepts the following arguments: + + name: Python `str` name prepended to any created (or existing) + `tf.Variable`s. + shape: Python `list`-like representing the parameter's event shape. + dtype: Type of parameter's event. + trainable: Python `bool` indicating all created `tf.Variable`s should be + added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. + add_variable_fn: `tf.get_variable`-like `callable` used to create (or + access existing) `tf.Variable`s. + + Args: + is_singular: Python `bool` if `True`, forces the special case limit of + `scale->0`, i.e., a `Deterministic` distribution. + loc_initializer: Initializer function for the `loc` parameters. + If `None` (default), values are initialized using the default + initializer used by `tf.get_variable`. + untransformed_scale_initializer: Initializer function for the `scale` + parameters. If `None` (default), values are initialized using the default + initializer used by `tf.get_variable`. + loc_regularizer: Regularizer function for the `loc` parameters. + untransformed_scale_regularizer: Regularizer function for the `scale` + parameters. + loc_constraint: An optional projection function to be applied to the + loc after being updated by an `Optimizer`. The function must take as input + the unprojected variable and must return the projected variable (which + must have the same shape). Constraints are not safe to use when doing + asynchronous distributed training. + untransformed_scale_constraint: An optional projection function to be + applied to the `scale` parameters after being updated by an `Optimizer` + (e.g. used to implement norm constraints or value constraints). The + function must take as input the unprojected variable and must return the + projected variable (which must have the same shape). Constraints are not + safe to use when doing asynchronous distributed training. + + Returns: + make_normal_fn: Python `callable` which creates a `tf.distributions.Normal` + using from args: `dtype, shape, name, trainable, add_variable_fn`. + """ + loc_scale_fn_ = default_loc_scale_fn( + is_singular, + loc_initializer, + untransformed_scale_initializer, + loc_regularizer, + untransformed_scale_regularizer, + loc_constraint, + untransformed_scale_constraint) + def _fn(dtype, shape, name, trainable, add_variable_fn): + """Creates a batch of `Deterministic` or `Normal` distributions.""" + loc, scale = loc_scale_fn_(dtype, shape, name, trainable, add_variable_fn) + if scale is None: + return deterministic_lib.Deterministic(loc=loc) + return normal_lib.Normal(loc=loc, scale=scale) + return _fn + + +class DenseVariational(layers_lib.Layer): + """Densely-connected variational class. + + This layer implements the Bayesian variational inference analogue to: + `outputs = activation(matmul(inputs, kernel) + bias)` + by assuming the `kernel` and/or the `bias` are random variables. + + The layer implements a stochastic dense calculation by making a Monte Carlo + approximation of a [variational Bayesian method based on KL divergence]( + https://en.wikipedia.org/wiki/Variational_Bayesian_methods), i.e., + + ```none + -log p(y|x) = -log int_{R**d} p(y|x,w) p(w) dw + = -log int_{R**d} p(y,w|x) q(w|x) / q(w|x) dw + <= E_q(W|x)[-log p(y,W|x) + log q(W|x)] # Jensen's + = E_q(W|x)[-log p(y|x,W)] + KL[q(W|x), p(W)] + ~= m**-1 sum{ -log(y|x,w[j]) : w[j] ~ q(W|x), j=1..m } + + KL[q(W|x), p(W)] + ``` + + where `W` denotes the (independent) `kernel` and `bias` random variables, `w` + is a random variate or outcome of `W`, `y` is the label, `x` is the evidence`, + and `~=` denotes an approximation which becomes exact as `m->inf`. The above + bound is sometimes referred to as the negative Evidence Lower BOund or + negative [ELBO](https://arxiv.org/abs/1601.00670). In context of a DNN, this + layer is appropriate to use when the final loss is a negative log-likelihood. + + The Monte-Carlo sum portion is used for the feed-forward calculation of the + DNN. The KL divergence portion can be added to the final loss via: + `loss += sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))`. + + The arguments permit separate specification of the surrogate posterior + (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` + random variables (which together comprise `W`). + + Args: + units: Integer or Long, dimensionality of the output space. + activation: Activation function (`callable`). Set it to None to maintain a + linear activation. + activity_regularizer: Regularizer function for the output. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + kernel_use_local_reparameterization: Python `bool` indicating whether + `kernel` calculation should employ the Local Reparameterization Trick. + When `True`, `kernel_posterior_fn` must create an instance of + `tf.distributions.Normal`. + kernel_posterior_fn: Python `callable` which creates + `tf.distributions.Distribution` instance representing the surrogate + posterior of the `kernel` parameter. Default value: + `default_mean_field_normal_fn()`. + kernel_posterior_tensor_fn: Python `callable` which takes a + `tf.distributions.Distribution` instance and returns a representative + value. Default value: `lambda d: d.sample()`. + kernel_prior_fn: Python `callable` which creates `tf.distributions` + instance. See `default_mean_field_normal_fn` docstring for required + parameter signature. + Default value: `tf.distributions.Normal(loc=0., scale=1.)`. + kernel_divergence_fn: Python `callable` which takes the surrogate posterior + distribution, prior distribution and random variate sample(s) from the + surrogate posterior and computes or approximates the KL divergence. The + distributions are `tf.distributions.Distribution`-like instances and the + sample is a `Tensor`. + bias_posterior_fn: Python `callable` which creates + `tf.distributions.Distribution` instance representing the surrogate + posterior of the `bias` parameter. Default value: + `default_mean_field_normal_fn(is_singular=True)` (which creates an + instance of `tf.distributions.Deterministic`). + bias_posterior_tensor_fn: Python `callable` which takes a + `tf.distributions.Distribution` instance and returns a representative + value. Default value: `lambda d: d.sample()`. + bias_prior_fn: Python `callable` which creates `tf.distributions` instance. + See `default_mean_field_normal_fn` docstring for required parameter + signature. Default value: `None` (no prior, no variational inference) + bias_divergence_fn: Python `callable` which takes the surrogate posterior + distribution, prior distribution and random variate sample(s) from the + surrogate posterior and computes or approximates the KL divergence. The + distributions are `tf.distributions.Distribution`-like instances and the + sample is a `Tensor`. + name: Python `str`, the name of the layer. Layers with the same name will + share `tf.Variable`s, but to avoid mistakes we require `reuse=True` in + such cases. + reuse: Python `bool`, whether to reuse the `tf.Variable`s of a previous + layer by the same name. + + Properties: + units: Python integer, dimensionality of the output space. + activation: Activation function (`callable`). + activity_regularizer: Regularizer function for the output. + kernel_use_local_reparameterization: Python `bool` indicating whether + `kernel` calculation should employ the Local Reparameterization Trick. + kernel: `VariationalKernelParamater` instance containing all `kernel` + related properties and `callable`s. + bias: `VariationalParameter` instance containing all `kernel` + related properties and `callable`s. + """ + + def __init__( + self, + units, + activation=None, + activity_regularizer=None, + trainable=True, + kernel_use_local_reparameterization=True, + kernel_posterior_fn=default_mean_field_normal_fn(), + kernel_posterior_tensor_fn=lambda d: d.sample(), + kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda + loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), + kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), + bias_posterior_fn=default_mean_field_normal_fn(is_singular=True), + bias_posterior_tensor_fn=lambda d: d.sample(), + bias_prior_fn=None, + bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), + name=None, + **kwargs): + super(DenseVariational, self).__init__( + trainable=trainable, + name=name, + activity_regularizer=activity_regularizer, + **kwargs) + self._units = units + self._activation = activation + self._input_spec = layers_lib.InputSpec(min_ndim=2) + self._kernel_use_local_reparameterization = ( + kernel_use_local_reparameterization) + self._kernel = VariationalKernelParameter( + kernel_posterior_fn, + kernel_posterior_tensor_fn, + kernel_prior_fn, + kernel_divergence_fn) + self._bias = VariationalParameter( + bias_posterior_fn, + bias_posterior_tensor_fn, + bias_prior_fn, + bias_divergence_fn) + + @property + def units(self): + return self._units + + @property + def activation(self): + return self._activation + + @property + def input_spec(self): + return self._input_spec + + @input_spec.setter + def input_spec(self, value): + self._input_spec = value + + @property + def kernel_use_local_reparameterization(self): + return self._kernel_use_local_reparameterization + + @property + def kernel(self): + return self._kernel + + @property + def bias(self): + return self._bias + + def build(self, input_shape): + input_shape = tensor_shape.TensorShape(input_shape) + in_size = input_shape.with_rank_at_least(2)[-1].value + if in_size is None: + raise ValueError("The last dimension of the inputs to `Dense` " + "should be defined. Found `None`.") + self._input_spec = layers_lib.InputSpec(min_ndim=2, axes={-1: in_size}) + dtype = dtypes.as_dtype(self.dtype) + + # Must have a posterior kernel. + self.kernel.posterior = self.kernel.posterior_fn( + dtype, [in_size, self.units], "kernel_posterior", + self.trainable, self.add_variable) + + if self.kernel.prior_fn is None: + self.kernel_prior = None + else: + self.kernel.prior = self.kernel.prior_fn( + dtype, [in_size, self.units], "kernel_prior", + self.trainable, self.add_variable) + self._built_kernel_divergence = False + + if self.bias.posterior_fn is None: + self.bias.posterior = None + else: + self.bias.posterior = self.bias.posterior_fn( + dtype, [self.units], "bias_posterior", + self.trainable, self.add_variable) + + if self.bias.prior_fn is None: + self.bias.prior = None + else: + self.bias.prior = self.bias.prior_fn( + dtype, [self.units], "bias_prior", + self.trainable, self.add_variable) + self._built_bias_divergence = False + + self.built = True + + def call(self, inputs): + inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) + + outputs = self._apply_variational_kernel(inputs) + outputs = self._apply_variational_bias(outputs) + if self.activation is not None: + outputs = self.activation(outputs) # pylint: disable=not-callable + if not self._built_kernel_divergence: + self._apply_divergence(self.kernel, name="divergence_kernel") + self._built_kernel_divergence = True + if not self._built_bias_divergence: + self._apply_divergence(self.bias, name="divergence_bias") + self._built_bias_divergence = True + return outputs + + def _apply_variational_kernel(self, inputs): + if not self.kernel_use_local_reparameterization: + self.kernel.posterior_tensor = self.kernel.posterior_tensor_fn( + self.kernel.posterior) + self.kernel.posterior_affine = None + self.kernel.posterior_affine_tensor = None + return self._matmul(inputs, self.kernel.posterior_tensor) + if not isinstance(self.kernel.posterior, normal_lib.Normal): + raise TypeError("`kernel_use_local_reparameterization=True` requires " + "`kernel_posterior_fn` produce an instance of " + "`tf.distributions.Normal` (saw: \"{}\").".format( + type(self.kernel.posterior).__name__)) + self.kernel.posterior_affine = normal_lib.Normal( + loc=self._matmul(inputs, self.kernel.posterior.loc), + scale=standard_ops.sqrt(self._matmul( + standard_ops.square(inputs), + standard_ops.square(self.kernel.posterior.scale)))) + self.kernel.posterior_affine_tensor = ( + self.kernel.posterior_tensor_fn(self.kernel.posterior_affine)) + self.kernel.posterior_tensor = None + return self.kernel.posterior_affine_tensor + + def _apply_variational_bias(self, inputs): + if self.bias.posterior is None: + self.bias.posterior_tensor = None + return inputs + self.bias.posterior_tensor = self.bias.posterior_tensor_fn( + self.bias.posterior) + return nn.bias_add(inputs, self.bias.posterior_tensor) + + def _apply_divergence(self, param, name): + if (param.divergence_fn is None or + param.posterior is None or + param.prior is None): + param.divergence = None + return + param.divergence = standard_ops.identity( + param.divergence_fn( + param.posterior, param.prior, param.posterior_tensor), + name=name) + self.add_loss(param.divergence) + + def _matmul(self, inputs, kernel): + if inputs.shape.ndims <= 2: + return standard_ops.matmul(inputs, kernel) + # To handle broadcasting, we must use `tensordot`. + return standard_ops.tensordot(inputs, kernel, axes=[[-1], [0]]) + + def _compute_output_shape(self, input_shape): + input_shape = tensor_shape.TensorShape(input_shape).with_rank_at_least(2) + if input_shape[-1].value is None: + raise ValueError( + "The innermost dimension of input_shape must be defined, " + "but saw: {}".format(input_shape)) + return input_shape[:-1].concatenate(self.units) + + +def dense_variational( + inputs, + units, + activation=None, + activity_regularizer=None, + trainable=True, + kernel_use_local_reparameterization=True, + kernel_posterior_fn=default_mean_field_normal_fn(), + kernel_posterior_tensor_fn=lambda d: d.sample(), + kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda + loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), + kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), + bias_posterior_fn=default_mean_field_normal_fn(is_singular=True), + bias_posterior_tensor_fn=lambda d: d.sample(), + bias_prior_fn=None, + bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), + name=None, + reuse=None): + """Densely-connected variational layer. + + This layer implements the Bayesian variational inference analogue to: + `outputs = activation(matmul(inputs, kernel) + bias)` + by assuming the `kernel` and/or the `bias` are random variables. + + The layer implements a stochastic dense calculation by making a Monte Carlo + approximation of a [variational Bayesian method based on KL divergence]( + https://en.wikipedia.org/wiki/Variational_Bayesian_methods), i.e., + + ```none + -log p(y|x) = -log int_{R**d} p(y|x,w) p(w) dw + = -log int_{R**d} p(y,w|x) q(w|x) / q(w|x) dw + <= E_q(W|x)[-log p(y,W|x) + log q(W|x)] # Jensen's + = E_q(W|x)[-log p(y|x,W)] + KL[q(W|x), p(W)] + ~= m**-1 sum{ -log(y|x,w[j]) : w[j] ~ q(W|x), j=1..m } + + KL[q(W|x), p(W)] + ``` + + where `W` denotes the (independent) `kernel` and `bias` random variables, `w` + is a random variate or outcome of `W`, `y` is the label, `x` is the evidence`, + and `~=` denotes an approximation which becomes exact as `m->inf`. The above + bound is sometimes referred to as the negative Evidence Lower BOund or + negative [ELBO](https://arxiv.org/abs/1601.00670). In context of a DNN, this + layer is appropriate to use when the final loss is a negative log-likelihood. + + The Monte-Carlo sum portion is used for the feed-forward calculation of the + DNN. The KL divergence portion can be added to the final loss via: + `loss += sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))`. + + The arguments permit separate specification of the surrogate posterior + (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` + random variables (which together comprise `W`). + + Args: + inputs: Tensor input. + units: Integer or Long, dimensionality of the output space. + activation: Activation function (`callable`). Set it to None to maintain a + linear activation. + activity_regularizer: Regularizer function for the output. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + kernel_use_local_reparameterization: Python `bool` indicating whether + `kernel` calculation should employ the Local Reparameterization Trick. + When `True`, `kernel_posterior_fn` must create an instance of + `tf.distributions.Normal`. + kernel_posterior_fn: Python `callable` which creates + `tf.distributions.Distribution` instance representing the surrogate + posterior of the `kernel` parameter. Default value: + `default_mean_field_normal_fn()`. + kernel_posterior_tensor_fn: Python `callable` which takes a + `tf.distributions.Distribution` instance and returns a representative + value. Default value: `lambda d: d.sample()`. + kernel_prior_fn: Python `callable` which creates `tf.distributions` + instance. See `default_mean_field_normal_fn` docstring for required + parameter signature. + Default value: `tf.distributions.Normal(loc=0., scale=1.)`. + kernel_divergence_fn: Python `callable` which takes the surrogate posterior + distribution, prior distribution and random variate sample(s) from the + surrogate posterior and computes or approximates the KL divergence. The + distributions are `tf.distributions.Distribution`-like instances and the + sample is a `Tensor`. + bias_posterior_fn: Python `callable` which creates + `tf.distributions.Distribution` instance representing the surrogate + posterior of the `bias` parameter. Default value: + `default_mean_field_normal_fn(is_singular=True)` (which creates an + instance of `tf.distributions.Deterministic`). + bias_posterior_tensor_fn: Python `callable` which takes a + `tf.distributions.Distribution` instance and returns a representative + value. Default value: `lambda d: d.sample()`. + bias_prior_fn: Python `callable` which creates `tf.distributions` instance. + See `default_mean_field_normal_fn` docstring for required parameter + signature. Default value: `None` (no prior, no variational inference) + bias_divergence_fn: Python `callable` which takes the surrogate posterior + distribution, prior distribution and random variate sample(s) from the + surrogate posterior and computes or approximates the KL divergence. The + distributions are `tf.distributions.Distribution`-like instances and the + sample is a `Tensor`. + name: Python `str`, the name of the layer. Layers with the same name will + share `tf.Variable`s, but to avoid mistakes we require `reuse=True` in + such cases. + reuse: Python `bool`, whether to reuse the `tf.Variable`s of a previous + layer by the same name. + + Returns: + output: `Tensor` representing a the affine transformed input under a random + draw from the surrogate posterior distribution. + """ + layer = DenseVariational( + units, + activation=activation, + activity_regularizer=activity_regularizer, + trainable=trainable, + kernel_use_local_reparameterization=( + kernel_use_local_reparameterization), + kernel_posterior_fn=kernel_posterior_fn, + kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, + kernel_prior_fn=kernel_prior_fn, + kernel_divergence_fn=kernel_divergence_fn, + bias_posterior_fn=bias_posterior_fn, + bias_posterior_tensor_fn=bias_posterior_tensor_fn, + bias_prior_fn=bias_prior_fn, + bias_divergence_fn=bias_divergence_fn, + name=name, + dtype=inputs.dtype.base_dtype, + _scope=name, + _reuse=reuse) + return layer.apply(inputs) + + +class NotSet(object): + """Helper to track whether a `VariationalParameter` value has been set.""" + pass + + +class VariationalParameter(object): + """Struct-like container of variational parameter properties. + + A `VariationalParameter` is intitialized with Python `callable`s which set the + value of correspondingly named members. Corresponding values have "set once" + semantics, i.e., once set to any value they are immutable. + """ + + def __init__( + self, + posterior_fn, + posterior_tensor_fn, + prior_fn, + divergence_fn): + """Creates the `VariationalParameter` struct-like object. + + Args: + posterior_fn: Python `callable` which creates a + `tf.distribution.Distribution` like object representing the posterior + distribution. See `VariationalParameter.posterior_fn` for `callable`'s + required parameters. + posterior_tensor_fn: Python `callable` which computes a `Tensor` + which represents the `posterior`. + prior_fn: Python `callable` which creates a + `tf.distribution.Distribution` like object representing the prior + distribution. See `VariationalParameter.prior_fn` for `callable`'s + required parameters. + divergence_fn: Python `callable` which computes the KL divergence from + `posterior` to `prior`. See `VariationalParameter.divergence_fn` for + required `callable`'s parameters. + """ + self._posterior_fn = posterior_fn + self._posterior = NotSet() + self._posterior_tensor_fn = posterior_tensor_fn + self._posterior_tensor = NotSet() + self._prior_fn = prior_fn + self._prior = NotSet() + self._divergence_fn = divergence_fn + self._divergence = NotSet() + self._init_helper() + + @property + def posterior_fn(self): + """`callable` which creates `tf.distributions.Distribution`-like posterior. + + The `callable` must accept the following parameters: + name: Python `str` name prepended to any created (or existing) + `tf.Variable`s. + shape: Python `list`-like representing the parameter's event shape. + dtype: Type of parameter's event. + trainable: Python `bool` indicating all created `tf.Variable`s should be + added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. + add_variable_fn: `tf.get_variable`-like `callable` used to create (or + access existing) `tf.Variable`s. + + Returns: + posterior_fn: The Python `callable` specified in `__init__`. + """ + return self._posterior_fn + + @property + def posterior(self): + """`tf.distributions.Distribution`-like instance representing posterior.""" + return self._posterior + + @posterior.setter + def posterior(self, value): + """One-time setter of the `posterior` distribution.""" + if not isinstance(self._posterior, NotSet): + raise ValueError("Cannot override already set attribute.") + self._posterior = value + + @property + def posterior_tensor_fn(self): + """Creates `Tensor` representing the `posterior` distribution. + + The `callable` must accept the following parameters: + posterior: `tf.distributions.Distribution`-like instance. + + Returns: + posterior_tensor_fn: The Python `callable` specified in + `__init__`. + """ + return self._posterior_tensor_fn + + @property + def posterior_tensor(self): + """`Tensor` representing the `posterior` distribution.""" + return self._posterior_tensor + + @posterior_tensor.setter + def posterior_tensor(self, value): + """One-time setter of the `posterior_tensor`.""" + if not isinstance(self._posterior_tensor, NotSet): + raise ValueError("Cannot override already set attribute.") + self._posterior_tensor = value + + @property + def prior_fn(self): + """`callable` which creates `tf.distributions.Distribution`-like prior. + + The `callable` must accept the following parameters: + name: Python `str` name prepended to any created (or existing) + `tf.Variable`s. + shape: Python `list`-like representing the parameter's event shape. + dtype: Type of parameter's event. + trainable: Python `bool` indicating all created `tf.Variable`s should be + added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. + add_variable_fn: `tf.get_variable`-like `callable` used to create (or + access existing) `tf.Variable`s. + + Returns: + prior_fn: The Python `callable` specified in `__init__`. + """ + return self._prior_fn + + @property + def prior(self): + """`tf.distributions.Distribution`-like instance representing posterior.""" + return self._prior + + @prior.setter + def prior(self, value): + """One-time setter of the `prior` distribution.""" + if not isinstance(self._prior, NotSet): + raise ValueError("Cannot override already set attribute.") + self._prior = value + + @property + def divergence_fn(self): + """`callable` which computes KL-divergence `Tensor` from posterior to prior. + + The `callable` must accept the following parameters: + posterior: `tf.distributions.Distribution`-like instance. + prior: `tf.distributions.Distribution`-like instance. + posterior_tensor: `Tensor` representing value of posterior. + + Returns: + divergence_fn: The Python `callable` specified in `__init__`. + """ + return self._divergence_fn + + @property + def divergence(self): + """`Tensor` representing KL-divergence from posterior to prior.""" + return self._divergence + + @divergence.setter + def divergence(self, value): + """One-time setter of the `divergence`.""" + if not isinstance(self._divergence, NotSet): + raise ValueError("Cannot override already set attribute.") + self._divergence = value + + def _init_helper(self): + pass + + +class VariationalKernelParameter(VariationalParameter): + """Struct-like container of variational kernel properties. + + A `VariationalKernelParameter` is intitialized with Python `callable`s which + set the value of correspondingly named members. Corresponding values have "set + once" semantics, i.e., once set to any value they are immutable. + """ + + @property + def posterior_affine(self): + """`tf.distributions.Distribution` affine transformed posterior.""" + return self._posterior_affine + + @posterior_affine.setter + def posterior_affine(self, value): + """One-time setter of `posterior_affine`.""" + if not isinstance(self._posterior_affine, NotSet): + raise ValueError("Cannot override already set attribute.") + self._posterior_affine = value + + @property + def posterior_affine_tensor(self): + """`Tensor` representing the `posterior_affine` distribution.""" + return self._posterior_affine_tensor + + @posterior_affine_tensor.setter + def posterior_affine_tensor(self, value): + """One-time setter of the `posterior_affine_tensor`.""" + if not isinstance(self._posterior_affine_tensor, NotSet): + raise ValueError("Cannot override already set attribute.") + self._posterior_affine_tensor = value + + def _init_helper(self): + self._posterior_affine = NotSet() + self._posterior_affine_tensor = NotSet() -- GitLab From a5a192865e4c1732b414d6a503d07775f7163a5c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Nov 2017 14:44:28 -0800 Subject: [PATCH 0431/1801] Refactors the WALS estimator so that part of the control flow logic happens in the SweepHook. This fixes a bug that causes both input batches (rows and columns) to be fetched during any given sweep. PiperOrigin-RevId: 175738242 --- .../contrib/factorization/python/ops/wals.py | 449 +++++++++--------- .../factorization/python/ops/wals_test.py | 112 +++-- 2 files changed, 274 insertions(+), 287 deletions(-) diff --git a/tensorflow/contrib/factorization/python/ops/wals.py b/tensorflow/contrib/factorization/python/ops/wals.py index 3976395d78..b2f22eb2fc 100644 --- a/tensorflow/contrib/factorization/python/ops/wals.py +++ b/tensorflow/contrib/factorization/python/ops/wals.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.factorization.python.ops import factorization_ops -from tensorflow.contrib.framework.python.ops import variables as framework_variables from tensorflow.contrib.learn.python.learn.estimators import estimator from tensorflow.contrib.learn.python.learn.estimators import model_fn from tensorflow.python.framework import dtypes @@ -32,175 +31,64 @@ from tensorflow.python.ops import variable_scope from tensorflow.python.platform import tf_logging as logging from tensorflow.python.summary import summary from tensorflow.python.training import session_run_hook +from tensorflow.python.training import training_util class _SweepHook(session_run_hook.SessionRunHook): """Keeps track of row/col sweeps, and runs prep ops before each sweep.""" - def __init__(self, is_row_sweep_var, train_ops, num_rows, num_cols, - input_row_indices, input_col_indices, row_prep_ops, - col_prep_ops, init_op, completed_sweeps_var): + def __init__(self, is_row_sweep_var, is_sweep_done_var, init_op, + row_prep_ops, col_prep_ops, row_train_op, col_train_op, + switch_op): """Initializes SweepHook. Args: is_row_sweep_var: A Boolean tf.Variable, determines whether we are currently doing a row or column sweep. It is updated by the hook. - train_ops: A list of ops. The ops created by this hook will have - control dependencies on `train_ops`. - num_rows: int, the total number of rows to be processed. - num_cols: int, the total number of columns to be processed. - input_row_indices: A Tensor of type int64. The indices of the input rows - that are processed during the current sweep. All elements of - `input_row_indices` must be in [0, num_rows). - input_col_indices: A Tensor of type int64. The indices of the input - columns that are processed during the current sweep. All elements of - `input_col_indices` must be in [0, num_cols). - row_prep_ops: list of ops, to be run before the beginning of each row - sweep, in the given order. - col_prep_ops: list of ops, to be run before the beginning of each column - sweep, in the given order. + is_sweep_done_var: A Boolean tf.Variable, determines whether we are + starting a new sweep (this is used to determine when to run the prep ops + below). init_op: op to be run once before training. This is typically a local initialization op (such as cache initialization). - completed_sweeps_var: An integer tf.Variable, indicates the number of - completed sweeps. It is updated by the hook. + row_prep_ops: A list of TensorFlow ops, to be run before the beginning of + each row sweep (and during initialization), in the given order. + col_prep_ops: A list of TensorFlow ops, to be run before the beginning of + each column sweep (and during initialization), in the given order. + row_train_op: A TensorFlow op to be run during row sweeps. + col_train_op: A TensorFlow op to be run during column sweeps. + switch_op: A TensorFlow op to be run before each sweep. """ - self._num_rows = num_rows - self._num_cols = num_cols + self._is_row_sweep_var = is_row_sweep_var + self._is_sweep_done_var = is_sweep_done_var + self._init_op = init_op self._row_prep_ops = row_prep_ops self._col_prep_ops = col_prep_ops - self._init_op = init_op - self._is_row_sweep_var = is_row_sweep_var - self._completed_sweeps_var = completed_sweeps_var - # Boolean variable that determines whether the init_ops have been run. + self._row_train_op = row_train_op + self._col_train_op = col_train_op + self._switch_op = switch_op + # Boolean variable that determines whether the init_op has been run. self._is_initialized = False - # Ops to run jointly with train_ops, responsible for updating - # `is_row_sweep_var` and incrementing the `global_step` and - # `completed_sweeps` counters. - self._update_op, self._is_sweep_done_var, self._switch_op = ( - self._create_hook_ops(input_row_indices, input_col_indices, train_ops)) - - def _create_hook_ops(self, input_row_indices, input_col_indices, train_ops): - """Creates ops to update is_row_sweep_var, global_step and completed_sweeps. - - Creates two boolean tensors `processed_rows` and `processed_cols`, which - keep track of which rows/cols have been processed during the current sweep. - Returns ops that should be run after each row / col update. - - When `self._is_row_sweep_var` is True, it sets - processed_rows[input_row_indices] to True. - - When `self._is_row_sweep_var` is False, it sets - processed_cols[input_col_indices] to True. - - Args: - input_row_indices: A Tensor. The indices of the input rows that are - processed during the current sweep. - input_col_indices: A Tensor. The indices of the input columns that - are processed during the current sweep. - train_ops: A list of ops. The ops created by this function have control - dependencies on `train_ops`. - - Returns: - A tuple consisting of: - update_op: An op to be run jointly with training. It updates the state - and increments counters (global step and completed sweeps). - is_sweep_done_var: A Boolean tf.Variable, specifies whether the sweep is - done, i.e. all rows (during a row sweep) or all columns (during a - column sweep) have been processed. - switch_op: An op to be run in `self.before_run` when the sweep is done. - """ - processed_rows_init = array_ops.fill(dims=[self._num_rows], value=False) - with ops.colocate_with(processed_rows_init): - processed_rows = variable_scope.variable( - processed_rows_init, - collections=[ops.GraphKeys.GLOBAL_VARIABLES], - trainable=False, - name="sweep_hook_processed_rows") - processed_cols_init = array_ops.fill(dims=[self._num_cols], value=False) - with ops.colocate_with(processed_cols_init): - processed_cols = variable_scope.variable( - processed_cols_init, - collections=[ops.GraphKeys.GLOBAL_VARIABLES], - trainable=False, - name="sweep_hook_processed_cols") - switch_ops = control_flow_ops.group( - state_ops.assign( - self._is_row_sweep_var, - math_ops.logical_not(self._is_row_sweep_var)), - state_ops.assign(processed_rows, processed_rows_init), - state_ops.assign(processed_cols, processed_cols_init)) - is_sweep_done_var = variable_scope.variable( - False, - collections=[ops.GraphKeys.GLOBAL_VARIABLES], - trainable=False, - name="is_sweep_done") - - # After running the `train_ops`, updates `processed_rows` or - # `processed_cols` tensors, depending on whether this is a row or col sweep. - with ops.control_dependencies(train_ops): - with ops.colocate_with(processed_rows): - update_processed_rows = state_ops.scatter_update( - processed_rows, - input_row_indices, - math_ops.logical_and( - self._is_row_sweep_var, - array_ops.ones_like(input_row_indices, dtype=dtypes.bool))) - with ops.colocate_with(processed_cols): - update_processed_cols = state_ops.scatter_update( - processed_cols, - input_col_indices, - math_ops.logical_and( - math_ops.logical_not(self._is_row_sweep_var), - array_ops.ones_like(input_col_indices, dtype=dtypes.bool))) - update_processed_op = control_flow_ops.group( - update_processed_rows, update_processed_cols) - - with ops.control_dependencies([update_processed_op]): - is_sweep_done = math_ops.logical_or( - math_ops.reduce_all(processed_rows), - math_ops.reduce_all(processed_cols)) - # Increments global step. - global_step = framework_variables.get_global_step() - if global_step is not None: - global_step_incr_op = state_ops.assign_add( - global_step, 1, name="global_step_incr").op - else: - global_step_incr_op = control_flow_ops.no_op() - # Increments completed sweeps. - completed_sweeps_incr_op = state_ops.assign_add( - self._completed_sweeps_var, - math_ops.cast(is_sweep_done, dtypes.int32), - use_locking=True).op - update_ops = control_flow_ops.group( - global_step_incr_op, - completed_sweeps_incr_op, - state_ops.assign(is_sweep_done_var, is_sweep_done)) - - return update_ops, is_sweep_done_var, switch_ops def before_run(self, run_context): """Runs the appropriate prep ops, and requests running update ops.""" - # Runs the appropriate init ops and prep ops. sess = run_context.session is_sweep_done = sess.run(self._is_sweep_done_var) if not self._is_initialized: - logging.info("SweepHook running cache init op.") + logging.info("SweepHook running init op.") sess.run(self._init_op) if is_sweep_done: sess.run(self._switch_op) + is_row_sweep = sess.run(self._is_row_sweep_var) if is_sweep_done or not self._is_initialized: - logging.info("SweepHook running sweep prep ops.") - row_sweep = sess.run(self._is_row_sweep_var) - prep_ops = self._row_prep_ops if row_sweep else self._col_prep_ops + logging.info("SweepHook running prep ops for the {} sweep.".format( + "row" if is_row_sweep else "col")) + prep_ops = self._row_prep_ops if is_row_sweep else self._col_prep_ops for prep_op in prep_ops: sess.run(prep_op) - self._is_initialized = True - - # Requests running `self._update_op` jointly with the training op. logging.info("Next fit step starting.") - return session_run_hook.SessionRunArgs(fetches=[self._update_op]) - - def after_run(self, run_context, run_values): - logging.info("Fit step done.") + return session_run_hook.SessionRunArgs( + fetches=[self._row_train_op if is_row_sweep else self._col_train_op]) class _StopAtSweepHook(session_run_hook.SessionRunHook): @@ -246,6 +134,9 @@ def _wals_factorization_model_function(features, labels, mode, params): Returns: A ModelFnOps object. + + Raises: + ValueError: If `mode` is not recognized. """ assert labels is None use_factors_weights_cache = (params["use_factors_weights_cache_for_training"] @@ -269,86 +160,156 @@ def _wals_factorization_model_function(features, labels, mode, params): use_gramian_cache=use_gramian_cache) # Get input rows and cols. We either update rows or columns depending on - # the value of row_sweep, which is maintained using a session hook + # the value of row_sweep, which is maintained using a session hook. input_rows = features[WALSMatrixFactorization.INPUT_ROWS] input_cols = features[WALSMatrixFactorization.INPUT_COLS] - input_row_indices, _ = array_ops.unique(input_rows.indices[:, 0]) - input_col_indices, _ = array_ops.unique(input_cols.indices[:, 0]) - - # Train ops, controlled using the SweepHook - # We need to run the following ops: - # Before a row sweep: - # row_update_prep_gramian_op - # initialize_row_update_op - # During a row sweep: - # update_row_factors_op - # Before a col sweep: - # col_update_prep_gramian_op - # initialize_col_update_op - # During a col sweep: - # update_col_factors_op - - is_row_sweep_var = variable_scope.variable( - True, - trainable=False, - name="is_row_sweep", - collections=[ops.GraphKeys.GLOBAL_VARIABLES]) - completed_sweeps_var = variable_scope.variable( - 0, - trainable=False, - name=WALSMatrixFactorization.COMPLETED_SWEEPS, - collections=[ops.GraphKeys.GLOBAL_VARIABLES]) - - # The row sweep is determined by is_row_sweep_var (controlled by the - # sweep_hook) in TRAIN mode, and manually in EVAL mode. - is_row_sweep = (features[WALSMatrixFactorization.PROJECT_ROW] - if mode == model_fn.ModeKeys.EVAL else is_row_sweep_var) - - def update_row_factors(): - return model.update_row_factors(sp_input=input_rows, transpose_input=False) - - def update_col_factors(): - return model.update_col_factors(sp_input=input_cols, transpose_input=True) - - (_, train_op, - unregularized_loss, regularization, sum_weights) = control_flow_ops.cond( - is_row_sweep, update_row_factors, update_col_factors) - loss = unregularized_loss + regularization - root_weighted_squared_error = math_ops.sqrt(unregularized_loss / sum_weights) - - row_prep_ops = [ - model.row_update_prep_gramian_op, model.initialize_row_update_op - ] - col_prep_ops = [ - model.col_update_prep_gramian_op, model.initialize_col_update_op - ] - init_ops = [model.worker_init] - - sweep_hook = _SweepHook( - is_row_sweep_var, - [train_op, loss], - params["num_rows"], - params["num_cols"], - input_row_indices, - input_col_indices, - row_prep_ops, - col_prep_ops, - init_ops, - completed_sweeps_var) - training_hooks = [sweep_hook] - if max_sweeps is not None: - training_hooks.append(_StopAtSweepHook(max_sweeps)) - - # The root weighted squared error = - # \sqrt( \sum_{i,j} w_ij * (a_ij - r_ij)^2 / \sum_{i,j} w_ij ) - summary.scalar("loss", loss) # the estimated total training loss - summary.scalar("root_weighted_squared_error", root_weighted_squared_error) - summary.scalar("completed_sweeps", completed_sweeps_var) - - # Prediction ops (only return predictions in INFER mode) - predictions = {} - if mode == model_fn.ModeKeys.INFER: - project_row = features[WALSMatrixFactorization.PROJECT_ROW] + + # TRAIN mode: + if mode == model_fn.ModeKeys.TRAIN: + # Training consists of the folowing ops (controlled using a SweepHook). + # Before a row sweep: + # row_update_prep_gramian_op + # initialize_row_update_op + # During a row sweep: + # update_row_factors_op + # Before a col sweep: + # col_update_prep_gramian_op + # initialize_col_update_op + # During a col sweep: + # update_col_factors_op + + is_row_sweep_var = variable_scope.variable( + True, + trainable=False, + name="is_row_sweep", + collections=[ops.GraphKeys.GLOBAL_VARIABLES]) + is_sweep_done_var = variable_scope.variable( + False, + trainable=False, + name="is_sweep_done", + collections=[ops.GraphKeys.GLOBAL_VARIABLES]) + completed_sweeps_var = variable_scope.variable( + 0, + trainable=False, + name=WALSMatrixFactorization.COMPLETED_SWEEPS, + collections=[ops.GraphKeys.GLOBAL_VARIABLES]) + loss_var = variable_scope.variable( + 0., + trainable=False, + name=WALSMatrixFactorization.LOSS, + collections=[ops.GraphKeys.GLOBAL_VARIABLES]) + # The root weighted squared error = + # \sqrt( \sum_{i,j} w_ij * (a_ij - r_ij)^2 / \sum_{i,j} w_ij ) + rwse_var = variable_scope.variable( + 0., + trainable=False, + name=WALSMatrixFactorization.RWSE, + collections=[ops.GraphKeys.GLOBAL_VARIABLES]) + + summary.scalar("loss", loss_var) + summary.scalar("root_weighted_squared_error", rwse_var) + summary.scalar("completed_sweeps", completed_sweeps_var) + + # Increments global step. + global_step = training_util.get_global_step() + if global_step: + global_step_incr_op = state_ops.assign_add( + global_step, 1, name="global_step_incr").op + else: + global_step_incr_op = control_flow_ops.no_op() + + def create_axis_ops(sp_input, num_items, update_fn, axis_name): + """Creates book-keeping and training ops for a given axis. + + Args: + sp_input: A SparseTensor corresponding to the row or column batch. + num_items: An integer, the total number of items of this axis. + update_fn: A function that takes one argument (`sp_input`), and that + returns a tuple of + * new_factors: A flot Tensor of the factor values after update. + * update_op: a TensorFlow op which updates the factors. + * loss: A float Tensor, the unregularized loss. + * reg_loss: A float Tensor, the regularization loss. + * sum_weights: A float Tensor, the sum of factor weights. + axis_name: A string that specifies the name of the axis. + + Returns: + A tuple consisting of: + * reset_processed_items_op: A TensorFlow op, to be run before the + beginning of any sweep. It marks all items as not-processed. + * axis_train_op: A Tensorflow op, to be run during this axis' sweeps. + """ + processed_items_init = array_ops.fill(dims=[num_items], value=False) + with ops.colocate_with(processed_items_init): + processed_items = variable_scope.variable( + processed_items_init, + collections=[ops.GraphKeys.GLOBAL_VARIABLES], + trainable=False, + name="processed_" + axis_name) + reset_processed_items_op = state_ops.assign( + processed_items, processed_items_init, + name="reset_processed_" + axis_name) + _, update_op, loss, reg, sum_weights = update_fn(sp_input) + input_indices = sp_input.indices[:, 0] + with ops.control_dependencies([ + update_op, + state_ops.assign(loss_var, loss + reg), + state_ops.assign(rwse_var, math_ops.sqrt(loss / sum_weights))]): + with ops.colocate_with(processed_items): + update_processed_items = state_ops.scatter_update( + processed_items, + input_indices, + array_ops.ones_like(input_indices, dtype=dtypes.bool), + name="update_processed_{}_indices".format(axis_name)) + with ops.control_dependencies([update_processed_items]): + is_sweep_done = math_ops.reduce_all(processed_items) + axis_train_op = control_flow_ops.group( + global_step_incr_op, + state_ops.assign(is_sweep_done_var, is_sweep_done), + state_ops.assign_add( + completed_sweeps_var, + math_ops.cast(is_sweep_done, dtypes.int32)), + name="{}_sweep_train_op".format(axis_name)) + return reset_processed_items_op, axis_train_op + + reset_processed_rows_op, row_train_op = create_axis_ops( + input_rows, + params["num_rows"], + lambda x: model.update_row_factors(sp_input=x, transpose_input=False), + "rows") + reset_processed_cols_op, col_train_op = create_axis_ops( + input_cols, + params["num_cols"], + lambda x: model.update_col_factors(sp_input=x, transpose_input=True), + "cols") + switch_op = control_flow_ops.group( + state_ops.assign( + is_row_sweep_var, math_ops.logical_not(is_row_sweep_var)), + reset_processed_rows_op, + reset_processed_cols_op, + name="sweep_switch_op") + row_prep_ops = [ + model.row_update_prep_gramian_op, model.initialize_row_update_op] + col_prep_ops = [ + model.col_update_prep_gramian_op, model.initialize_col_update_op] + init_op = model.worker_init + sweep_hook = _SweepHook( + is_row_sweep_var, is_sweep_done_var, init_op, + row_prep_ops, col_prep_ops, row_train_op, col_train_op, switch_op) + training_hooks = [sweep_hook] + if max_sweeps is not None: + training_hooks.append(_StopAtSweepHook(max_sweeps)) + + return model_fn.ModelFnOps( + mode=model_fn.ModeKeys.TRAIN, + predictions={}, + loss=loss_var, + eval_metric_ops={}, + train_op=control_flow_ops.no_op(), + training_hooks=training_hooks) + + # INFER mode + elif mode == model_fn.ModeKeys.INFER: projection_weights = features.get( WALSMatrixFactorization.PROJECTION_WEIGHTS) @@ -364,17 +325,45 @@ def _wals_factorization_model_function(features, labels, mode, params): projection_weights=projection_weights, transpose_input=True) - predictions[WALSMatrixFactorization.PROJECTION_RESULT] = ( - control_flow_ops.cond(project_row, get_row_projection, - get_col_projection)) + predictions = { + WALSMatrixFactorization.PROJECTION_RESULT: control_flow_ops.cond( + features[WALSMatrixFactorization.PROJECT_ROW], + get_row_projection, + get_col_projection) + } - return model_fn.ModelFnOps( - mode=mode, - predictions=predictions, - loss=loss, - eval_metric_ops={}, - train_op=train_op, - training_hooks=training_hooks) + return model_fn.ModelFnOps( + mode=model_fn.ModeKeys.INFER, + predictions=predictions, + loss=None, + eval_metric_ops={}, + train_op=control_flow_ops.no_op(), + training_hooks=[]) + + # EVAL mode + elif mode == model_fn.ModeKeys.EVAL: + def get_row_loss(): + _, _, loss, reg, _ = model.update_row_factors( + sp_input=input_rows, transpose_input=False) + return loss + reg + def get_col_loss(): + _, _, loss, reg, _ = model.update_col_factors( + sp_input=input_cols, transpose_input=True) + return loss + reg + loss = control_flow_ops.cond( + features[WALSMatrixFactorization.PROJECT_ROW], + get_row_loss, + get_col_loss) + return model_fn.ModelFnOps( + mode=model_fn.ModeKeys.EVAL, + predictions={}, + loss=loss, + eval_metric_ops={}, + train_op=control_flow_ops.no_op(), + training_hooks=[]) + + else: + raise ValueError("mode=%s is not recognized." % str(mode)) class WALSMatrixFactorization(estimator.Estimator): @@ -452,6 +441,10 @@ class WALSMatrixFactorization(estimator.Estimator): PROJECTION_RESULT = "projection" # Name of the completed_sweeps variable COMPLETED_SWEEPS = "completed_sweeps" + # Name of the loss variable + LOSS = "WALS_loss" + # Name of the Root Weighted Squared Error variable + RWSE = "WALS_RWSE" def __init__(self, num_rows, diff --git a/tensorflow/contrib/factorization/python/ops/wals_test.py b/tensorflow/contrib/factorization/python/ops/wals_test.py index 8bd72b7025..36b483c6d7 100644 --- a/tensorflow/contrib/factorization/python/ops/wals_test.py +++ b/tensorflow/contrib/factorization/python/ops/wals_test.py @@ -417,73 +417,67 @@ class WALSMatrixFactorizationUnsupportedTest(test.TestCase): class SweepHookTest(test.TestCase): - def setUp(self): - self._num_rows = 5 - self._num_cols = 7 - self._train_op = control_flow_ops.no_op() - self._row_prep_done = variables.Variable(False) - self._col_prep_done = variables.Variable(False) - self._init_done = variables.Variable(False) - self._row_prep_ops = [state_ops.assign(self._row_prep_done, True)] - self._col_prep_ops = [state_ops.assign(self._col_prep_done, True)] - self._init_ops = [state_ops.assign(self._init_done, True)] - self._input_row_indices_ph = array_ops.placeholder(dtypes.int64) - self._input_col_indices_ph = array_ops.placeholder(dtypes.int64) - def test_sweeps(self): - def ind_feed(row_indices, col_indices): - return { - self._input_row_indices_ph: row_indices, - self._input_col_indices_ph: col_indices - } + is_row_sweep_var = variables.Variable(True) + is_sweep_done_var = variables.Variable(False) + init_done = variables.Variable(False) + row_prep_done = variables.Variable(False) + col_prep_done = variables.Variable(False) + row_train_done = variables.Variable(False) + col_train_done = variables.Variable(False) + + init_op = state_ops.assign(init_done, True) + row_prep_op = state_ops.assign(row_prep_done, True) + col_prep_op = state_ops.assign(col_prep_done, True) + row_train_op = state_ops.assign(row_train_done, True) + col_train_op = state_ops.assign(col_train_done, True) + train_op = control_flow_ops.no_op() + switch_op = control_flow_ops.group( + state_ops.assign(is_sweep_done_var, False), + state_ops.assign(is_row_sweep_var, + math_ops.logical_not(is_row_sweep_var))) + mark_sweep_done = state_ops.assign(is_sweep_done_var, True) with self.test_session() as sess: - is_row_sweep_var = variables.Variable(True) - completed_sweeps_var = variables.Variable(0) sweep_hook = wals_lib._SweepHook( is_row_sweep_var, - [self._train_op], - self._num_rows, - self._num_cols, - self._input_row_indices_ph, - self._input_col_indices_ph, - self._row_prep_ops, - self._col_prep_ops, - self._init_ops, - completed_sweeps_var) + is_sweep_done_var, + init_op, + [row_prep_op], + [col_prep_op], + row_train_op, + col_train_op, + switch_op) mon_sess = monitored_session._HookedSession(sess, [sweep_hook]) sess.run([variables.global_variables_initializer()]) - # Init ops should run before the first run. Row sweep not completed. - mon_sess.run(self._train_op, ind_feed([0, 1, 2], [])) - self.assertTrue(sess.run(self._init_done), - msg='init ops not run by the sweep_hook') - self.assertTrue(sess.run(self._row_prep_done), - msg='row_prep not run by the sweep_hook') - self.assertTrue(sess.run(is_row_sweep_var), - msg='Row sweep is not complete but is_row_sweep is ' - 'False.') - # Row sweep completed. - mon_sess.run(self._train_op, ind_feed([3, 4], [0, 1, 2, 3, 4, 5, 6])) - self.assertTrue(sess.run(completed_sweeps_var) == 1, - msg='Completed sweeps should be equal to 1.') - self.assertTrue(sess.run(sweep_hook._is_sweep_done_var), - msg='Sweep is complete but is_sweep_done is False.') - # Col init ops should run. Col sweep not completed. - mon_sess.run(self._train_op, ind_feed([], [0, 1, 2, 3, 4])) - self.assertTrue(sess.run(self._col_prep_done), - msg='col_prep not run by the sweep_hook') - self.assertFalse(sess.run(is_row_sweep_var), - msg='Col sweep is not complete but is_row_sweep is ' - 'True.') - self.assertFalse(sess.run(sweep_hook._is_sweep_done_var), - msg='Sweep is not complete but is_sweep_done is True.') - # Col sweep completed. - mon_sess.run(self._train_op, ind_feed([], [4, 5, 6])) - self.assertTrue(sess.run(sweep_hook._is_sweep_done_var), - msg='Sweep is complete but is_sweep_done is False.') - self.assertTrue(sess.run(completed_sweeps_var) == 2, - msg='Completed sweeps should be equal to 2.') + # Row sweep. + mon_sess.run(train_op) + self.assertTrue(sess.run(init_done), + msg='init op not run by the Sweephook') + self.assertTrue(sess.run(row_prep_done), + msg='row_prep_op not run by the SweepHook') + self.assertTrue(sess.run(row_train_done), + msg='row_train_op not run by the SweepHook') + self.assertTrue( + sess.run(is_row_sweep_var), + msg='Row sweep is not complete but is_row_sweep_var is False.') + # Col sweep. + mon_sess.run(mark_sweep_done) + mon_sess.run(train_op) + self.assertTrue(sess.run(col_prep_done), + msg='col_prep_op not run by the SweepHook') + self.assertTrue(sess.run(col_train_done), + msg='col_train_op not run by the SweepHook') + self.assertFalse( + sess.run(is_row_sweep_var), + msg='Col sweep is not complete but is_row_sweep_var is True.') + # Row sweep. + mon_sess.run(mark_sweep_done) + mon_sess.run(train_op) + self.assertTrue( + sess.run(is_row_sweep_var), + msg='Col sweep is complete but is_row_sweep_var is False.') class StopAtSweepHookTest(test.TestCase): -- GitLab From 2eb758397f02e4234e38811c08723be29c83dbc4 Mon Sep 17 00:00:00 2001 From: Sergio Guadarrama Date: Tue, 14 Nov 2017 14:46:14 -0800 Subject: [PATCH 0432/1801] Make tf.make_template compatible with eager. PiperOrigin-RevId: 175738521 --- .../python/kernel_tests/template_test.py | 151 ++++++++++++-- tensorflow/python/ops/template.py | 186 ++++++++++++++++++ 2 files changed, 317 insertions(+), 20 deletions(-) diff --git a/tensorflow/python/kernel_tests/template_test.py b/tensorflow/python/kernel_tests/template_test.py index 8b9c58ac3f..798bd0fe89 100644 --- a/tensorflow/python/kernel_tests/template_test.py +++ b/tensorflow/python/kernel_tests/template_test.py @@ -20,7 +20,9 @@ from __future__ import print_function import traceback from tensorflow.python.client import session +from tensorflow.python.eager import context from tensorflow.python.framework import random_seed +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops @@ -50,6 +52,13 @@ def function_with_create(trainable): "dummy", shape=[1], initializer=init_ops.zeros_initializer()) +def function_with_side_create(trainable, name="side"): + """Creates a variable as a side effect using tf.get_variable.""" + variable_scope.get_variable(name, shape=[1], trainable=trainable) + return variable_scope.get_variable( + "dummy", shape=[1], initializer=init_ops.zeros_initializer()) + + def variable_scoped_function_with_local_variable(): variable_scope.get_local_variable( "local", shape=[1], initializer=init_ops.zeros_initializer()) @@ -99,6 +108,46 @@ class TemplateTest(test.TestCase): # Parameters are tied, so the loss should have gone down when we trained it. self.assertLess(final_test_loss, initial_test_loss) + def test_end_to_end_eager(self): + """This test shows a very simple line model with test_loss in eager mode. + + The template is used to share parameters between a training and test model. + """ + with context.eager_mode(): + # y = 2x + 1 + training_input, training_output = ([1., 2., 3., 4.], [2.8, 5.1, 7.2, 8.7]) + test_input, test_output = ([5., 6., 7., 8.], [11, 13, 15, 17]) + + random_seed.set_random_seed(1234) + + def test_line(x): + m = variable_scope.get_variable( + "w", shape=[], initializer=init_ops.truncated_normal_initializer()) + b = variable_scope.get_variable( + "b", shape=[], initializer=init_ops.truncated_normal_initializer()) + return x * m + b + + line_template = template.make_template("line", test_line) + + def train_loss(): + train_prediction = line_template(training_input) + return math_ops.reduce_mean( + math_ops.square(train_prediction - training_output)) + + def test_loss(): + test_prediction = line_template(test_input) + return math_ops.reduce_mean( + math_ops.square(test_prediction - test_output)) + + optimizer = gradient_descent.GradientDescentOptimizer(0.1) + initial_test_loss = test_loss() + optimizer.minimize(train_loss) + final_test_loss = test_loss() + + # Parameters are tied, so the loss should have gone down after training. + self.assertLess(final_test_loss.numpy(), initial_test_loss.numpy()) + + @test_util.run_in_graph_and_eager_modes() def test_skip_stack_frames(self): first = traceback.format_stack() second = traceback.format_stack() @@ -106,6 +155,7 @@ class TemplateTest(test.TestCase): self.assertEqual(1, len(result)) self.assertNotEqual(len(first), len(result)) + @test_util.run_in_graph_and_eager_modes() def test_template_with_name(self): tmpl1 = template.make_template("s1", variable_scoped_function) tmpl2 = template.make_template("s1", variable_scoped_function) @@ -118,15 +168,23 @@ class TemplateTest(test.TestCase): self.assertEqual("s1/dummy:0", v1.name) self.assertEqual("s1_1/dummy:0", v3.name) - def test_unique_name_raise_error(self): + def test_same_unique_name_raise_error(self): tmpl1 = template.make_template( "_", variable_scoped_function, unique_name_="s1") tmpl1() tmpl2 = template.make_template( "_", variable_scoped_function, unique_name_="s1") - with self.assertRaises(ValueError): + with self.assertRaisesRegexp( + ValueError, "Variable s1/dummy already exists, disallowed.*"): tmpl2() + def test_unique_name_raise_error_in_eager(self): + with context.eager_mode(): + with self.assertRaisesRegexp( + ValueError, "unique_name cannot be used in eager mode."): + template.make_template( + "_", variable_scoped_function, unique_name_="s1") + def test_unique_name_and_reuse(self): tmpl1 = template.make_template( "_", variable_scoped_function, unique_name_="s1") @@ -142,6 +200,7 @@ class TemplateTest(test.TestCase): self.assertEqual(v1, v3) self.assertEqual("s1/dummy:0", v1.name) + @test_util.run_in_graph_and_eager_modes() def test_template_in_scope(self): tmpl1 = template.make_template("s1", variable_scoped_function) tmpl2 = template.make_template("s1", variable_scoped_function) @@ -158,6 +217,7 @@ class TemplateTest(test.TestCase): self.assertEqual("scope/s1/dummy:0", v1.name) self.assertEqual("scope/s1_1/dummy:0", v3.name) + @test_util.run_in_graph_and_eager_modes() def test_template_with_internal_reuse(self): tmpl1 = template.make_template("s1", internally_variable_scoped_function) tmpl2 = template.make_template("s1", internally_variable_scoped_function) @@ -173,10 +233,13 @@ class TemplateTest(test.TestCase): with self.assertRaises(ValueError): tmpl1("not_test") + @test_util.run_in_graph_and_eager_modes() def test_template_without_name(self): - with self.assertRaises(ValueError): + with self.assertRaisesRegexp( + ValueError, "name cannot be None."): template.make_template(None, variable_scoped_function) + @test_util.run_in_graph_and_eager_modes() def test_make_template(self): # Test both that we can call it with positional and keywords. tmpl1 = template.make_template( @@ -199,10 +262,28 @@ class TemplateTest(test.TestCase): with self.assertRaises(ValueError): tmpl() + @test_util.run_in_graph_and_eager_modes() + def test_enforces_no_extra_trainable_variables_eager(self): + tmpl = template.make_template("s", + function_with_side_create, + trainable=True) + + tmpl(name="1") + with self.assertRaises(ValueError): + tmpl(name="2") + def test_permits_extra_non_trainable_variables(self): tmpl = template.make_template("s", function_with_create, trainable=False) self.assertEqual(tmpl(), tmpl()) + def test_permits_extra_non_trainable_variables_eager(self): + with context.eager_mode(): + tmpl = template.make_template("s", + function_with_side_create, + trainable=False) + self.assertEqual(tmpl(name="1"), tmpl(name="2")) + + @test_util.run_in_graph_and_eager_modes() def test_internal_variable_reuse(self): def nested(): @@ -241,11 +322,28 @@ class TemplateTest(test.TestCase): v1 = tmpl1() v2 = tmpl1() v3 = tmpl2() - self.assertEqual(v1, v2) + self.assertTrue(v1, v2) self.assertNotEqual(v1, v3) self.assertEqual("s1/nested_1/dummy:0", v1.name) self.assertEqual("s1_1/nested_1/dummy:0", v3.name) + def test_nested_eager_templates_raises_error(self): + + def nested_template(): + nested1 = template.make_template("nested", variable_scoped_function) + nested2 = template.make_template("nested", variable_scoped_function) + v1 = nested1() + v2 = nested2() + self.assertNotEqual(v1, v2) + return v2 + + with context.eager_mode(): + tmpl1 = template.make_template("s1", nested_template) + with self.assertRaisesRegexp( + ValueError, "Nested EagerTemaplates are not currently supported."): + tmpl1() + + @test_util.run_in_graph_and_eager_modes() def test_immediate_scope_creation(self): # Create templates in scope a then call in scope b. make_template should # capture the scope the first time it is called, and make_immediate_template @@ -270,6 +368,7 @@ class TemplateTest(test.TestCase): self.assertEqual("ctor_scope/a/dummy:0", inner_imm_var.name) self.assertEqual("call_scope/b/dummy:0", inner_defer_var.name) + @test_util.run_in_graph_and_eager_modes() def test_scope_access(self): # Ensure that we can access the scope inside the template, because the name # of that scope may be different from the name we pass to make_template, due @@ -294,6 +393,7 @@ class TemplateTest(test.TestCase): # Template is called at the top level, so there is no preceding "foo_2". self.assertEqual(tc.variable_scope.name, "blah") + @test_util.run_in_graph_and_eager_modes() def test_custom_getter(self): # Custom getter that maintains call count and forwards to true getter custom_getter_count = [0] @@ -326,6 +426,7 @@ class TemplateTest(test.TestCase): tmpl2() self.assertEqual(custom_getter_count[0], 2) + @test_util.run_in_graph_and_eager_modes() def test_fails_gracefully(self): for create_scope_now in [True, False]: def module_function_with_one_arg(inputs): @@ -336,7 +437,7 @@ class TemplateTest(test.TestCase): templatized_function = template.make_template( "f1", module_function_with_one_arg, create_scope_now_=create_scope_now) - data = array_ops.zeros(1) + data = array_ops.zeros([1]) try: # Try to connect with a kwarg which is unsupported. templatized_function(data, is_training=True) @@ -348,6 +449,7 @@ class TemplateTest(test.TestCase): templatized_function(data) self.assertTrue(templatized_function._variables_created) + @test_util.run_in_graph_and_eager_modes() def test_name_scopes_for_variable_scopes(self): # Test that name scopes are not unnecessarily uniquified (but are # still uniquified when necessary). @@ -374,12 +476,13 @@ class TemplateTest(test.TestCase): outputs_b, _ = linear1(inputs) self.assertEquals("foo", linear1.variable_scope.name) self.assertEquals("foo/w:0", w1.name) - self.assertEquals("foo/add:0", outputs_a.name, - "First application of template should get " - "same name scope as variables.") - self.assertEquals("foo_1/add:0", outputs_b.name, - "Second application of template should get " - "a freshly uniquified name scope.") + if context.in_graph_mode(): + self.assertEquals("foo/add:0", outputs_a.name, + "First application of template should get " + "same name scope as variables.") + self.assertEquals("foo_1/add:0", outputs_b.name, + "Second application of template should get " + "a freshly uniquified name scope.") linear2 = make_linear_module(output_size=2, name="foo") outputs_c, w2 = linear2(inputs) @@ -388,20 +491,26 @@ class TemplateTest(test.TestCase): "New template gets a freshly uniquified variable scope " "because 'foo' is already taken.") self.assertEquals("foo_1/w:0", w2.name) - self.assertEquals("foo_1_1/add:0", outputs_c.name, - "First application of template would get " - "same name scope as variables, but 'foo_1' is already " - "a name scope.") - self.assertEquals("foo_1_2/add:0", outputs_d.name, - "Second application of template should also get " - "a freshly uniquified name scope.") - + if context.in_graph_mode(): + self.assertEquals("foo_1_1/add:0", outputs_c.name, + "First application of template would get " + "same name scope as variables, but 'foo_1' is already " + "a name scope.") + self.assertEquals("foo_1_2/add:0", outputs_d.name, + "Second application of template should also get " + "a freshly uniquified name scope.") + + @test_util.run_in_graph_and_eager_modes() def test_global_variables(self): # Make sure global_variables are created. with variable_scope.variable_scope("foo"): # Create two templates with the same name, ensure scopes are made unique. ta = template.make_template("bar", variable_scoped_function, True) - tb = template.make_template("s", function_with_create, trainable=False) + if context.in_eager_mode(): + tb = template.make_template("s", function_with_side_create, + trainable=False) + else: + tb = template.make_template("s", function_with_create, trainable=False) # Initially there are not variables created. self.assertEqual([], ta.global_variables) @@ -413,6 +522,7 @@ class TemplateTest(test.TestCase): self.assertEqual(1, len(ta.global_variables)) self.assertEqual(2, len(tb.global_variables)) + @test_util.run_in_graph_and_eager_modes() def test_trainable_variables(self): # Make sure trainable_variables are created. with variable_scope.variable_scope("foo2"): @@ -430,6 +540,7 @@ class TemplateTest(test.TestCase): self.assertEqual(1, len(ta.trainable_variables)) self.assertEqual(1, len(tb.trainable_variables)) + # TODO(apassos) handle local variables in Eager def test_local_variables(self): # Make sure trainable_variables are created. with variable_scope.variable_scope("foo3"): diff --git a/tensorflow/python/ops/template.py b/tensorflow/python/ops/template.py index 24ef70c6f4..98578b799a 100644 --- a/tensorflow/python/ops/template.py +++ b/tensorflow/python/ops/template.py @@ -21,6 +21,7 @@ from __future__ import print_function import functools import traceback +from tensorflow.python.eager import context from tensorflow.python.framework import ops from tensorflow.python.ops import variable_scope from tensorflow.python.platform import tf_logging as logging @@ -138,6 +139,10 @@ def make_template(name_, func_, create_scope_now_=False, unique_name_=None, """ if kwargs: func_ = functools.partial(func_, **kwargs) + if context.in_eager_mode(): + return EagerTemplate( + name_, func_, create_scope_now=create_scope_now_, + unique_name=unique_name_, custom_getter=custom_getter_) return Template( name_, func_, create_scope_now=create_scope_now_, unique_name=unique_name_, custom_getter=custom_getter_) @@ -336,3 +341,184 @@ class Template(object): def var_scope(self): """Returns the variable scope object created by this Template.""" return self._variable_scope + + +class EagerTemplate(Template): + """Wrap a function to aid in variable sharing in Eager mode. + + Templates are functions that create variables the first time they are called + and reuse them thereafter. See `make_template` for full documentation. + + Note: By default, the full variable scope is captured at the time of first + call. If `create_scope_now` is passed as True to the constructor, the full + scope will be captured there, but no variables will be created until the first + call. + """ + + def __init__(self, name, func, create_scope_now=False, unique_name=None, + custom_getter=None): + """Creates a template for the given function. + + Args: + name: A name for the scope created by this template. The + name will be made unique by appending `_N` to the it (see how + `tf.variable_scope` treats the `default_name` for details). + func: The function to apply each time. + create_scope_now: Whether to create the scope at Template construction + time, rather than first call. Defaults to false. Creating the scope at + construction time may be more convenient if the template is passed + through much lower level code, and you want to be sure of the scope + name without knowing exactly where it will be first called. If set to + True, the scope will be created in the constructor, and all subsequent + times in __call__, leading to a trailing numeral being added to the + names of all created Tensors. If set to False, the scope will be created + at the first call location. + unique_name: When used, it overrides name_ and is not made unique. If a + template of the same scope/unique_name already exists and reuse is + false, an error is raised. Defaults to None. + custom_getter: optional custom getter to pass to variable_scope() + + Raises: + RuntimeError: if eager mode is not enabled. + ValueError: if the name is None or unique_name is provided. + """ + if not context.in_eager_mode(): + raise RuntimeError( + "{} objects can only be used when eager execution is enabled, use " + "tf.Template for graph construction". + format(type(self))) + if unique_name: + raise ValueError("unique_name cannot be used in eager mode.") + super(EagerTemplate, self).__init__(name, func, create_scope_now, + unique_name, custom_getter) + # Create an eager variable store only if the current variable store cannot + # store eager variables. This should allow for correct nesting. + default_vstore = variable_scope._get_default_variable_store() # pylint: disable=protected-access + if default_vstore._store_eager_variables: # pylint: disable=protected-access + raise ValueError("Nested EagerTemaplates are not currently supported.") + else: + self._eager_variable_store = variable_scope.EagerVariableStore() + + def _call_func(self, args, kwargs, check_for_new_variables): + try: + vars_at_start = self._eager_variable_store.variables() + trainable_at_start = self._eager_variable_store.trainable_variables() + + result = self._func(*args, **kwargs) + if check_for_new_variables: + trainable_variables = self._eager_variable_store.trainable_variables() + # If a variable that we intend to train is created as a side effect + # of creating a template, then that is almost certainly an error. + if len(trainable_at_start) != len(trainable_variables): + raise ValueError("Trainable variable created when calling a template " + "after the first time, perhaps you used tf.Variable " + "when you meant tf.get_variable: %s" % + list(set(trainable_variables) - + set(trainable_at_start))) + + # Non-trainable tracking variables are a legitimate reason why a new + # variable would be created, but it is a relatively advanced use-case, + # so log it. + variables = self._eager_variable_store.variables() + if len(vars_at_start) != len(variables): + logging.info("New variables created when calling a template after " + "the first time, perhaps you used tf.Variable when you " + "meant tf.get_variable: %s", + list(set(variables) - set(vars_at_start))) + return result + except Exception as exc: + # Reraise the exception, but append the original definition to the + # trace. + args = exc.args + if not args: + arg0 = "" + else: + arg0 = args[0] + trace = "".join(_skip_common_stack_elements(self._stacktrace, + traceback.format_stack())) + arg0 = "%s\n\noriginally defined at:\n%s" % (arg0, trace) + new_args = [arg0] + new_args.extend(args[1:]) + exc.args = tuple(new_args) + raise + + def __call__(self, *args, **kwargs): + if self._variable_scope: + if self._variables_created: + # This is not the first visit to __call__, so variables have already + # been created, and we want to reuse them. + with variable_scope.variable_scope(self._variable_scope, + reuse=variable_scope.AUTO_REUSE): + with self._eager_variable_store.as_default(): + return self._call_func(args, kwargs, check_for_new_variables=True) + else: + # This is the first visit to __call__, but the scope has already been + # created in the constructor. Set _variables_created after the inner + # function is successfully called so that subsequent calls take the if + # branch above. + with variable_scope.variable_scope(self._variable_scope, + reuse=variable_scope.AUTO_REUSE): + with self._eager_variable_store.as_default(): + result = self._call_func(args, kwargs, + check_for_new_variables=False) + self._variables_created = True + return result + else: + # The scope was not created at construction time, so create it here. + # Subsequent calls should reuse variables. + with variable_scope.variable_scope( + self._unique_name, self._name, + custom_getter=self._custom_getter) as vs: + self._variable_scope = vs + with self._eager_variable_store.as_default(): + result = self._call_func(args, kwargs, + check_for_new_variables=False) + self._variables_created = True + return result + + @property + def name(self): + """Returns the name given to this Template.""" + return self._name + + @property + def func(self): + """Returns the func given to this Template.""" + return self._func + + @property + def variable_scope(self): + """Returns the variable scope object created by this Template.""" + return self._variable_scope + + @property + def variable_scope_name(self): + """Returns the variable scope name created by this Template.""" + if self._variable_scope: + name = self._variable_scope.name + # To prevent partial matches on the scope_name, we add '/' at the end. + return name if name[-1] == "/" else name + "/" + + @property + def variables(self): + """Returns the list of trainable variables created by the Template.""" + # Currently there is no local variable in Eager mode. + return self._eager_variable_store.variables() + + @property + def trainable_variables(self): + """Returns the list of trainable variables created by the Template.""" + # Currently there is no local variable in Eager mode. + return self._eager_variable_store.trainable_variables() + + @property + def global_variables(self): + """Returns the list of global variables created by the Template.""" + # Currently there is no local variable in Eager mode. + return self.variables + + @property + def local_variables(self): + """Returns the list of global variables created by the Template.""" + # Currently there is no local variable in Eager mode. + return [] -- GitLab From bc0b26046fa729612b0017815b72a5faa4890e86 Mon Sep 17 00:00:00 2001 From: RJ Ryan Date: Tue, 14 Nov 2017 14:57:53 -0800 Subject: [PATCH 0433/1801] Add seed to tf.contrib.layers.dropout. This allows customizing the dropout seed via arg_scope. PiperOrigin-RevId: 175740302 --- tensorflow/contrib/layers/python/layers/layers.py | 6 +++++- .../contrib/layers/python/layers/layers_test.py | 12 ++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index ad4a0b302f..46b3eeae91 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -1403,7 +1403,8 @@ def dropout(inputs, noise_shape=None, is_training=True, outputs_collections=None, - scope=None): + scope=None, + seed=None): """Returns a dropout op applied to the input. With probability `keep_prob`, outputs the input element scaled up by @@ -1421,6 +1422,8 @@ def dropout(inputs, Otherwise, inputs is returned. outputs_collections: Collection to add the outputs. scope: Optional scope for name_scope. + seed: A Python integer. Used to create random seeds. See + @{tf.set_random_seed} for behavior. Returns: A tensor representing the output of the operation. @@ -1430,6 +1433,7 @@ def dropout(inputs, inputs = ops.convert_to_tensor(inputs) layer = core_layers.Dropout(rate=1 - keep_prob, noise_shape=noise_shape, + seed=seed, name=sc.name, _scope=sc) outputs = layer.apply(inputs, training=is_training) diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index 2837a3172d..ff7f0e4462 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -1345,11 +1345,20 @@ class DropoutTest(test.TestCase): num_elem_initial = math_ops.reduce_mean(math_ops.to_float(images > 0)) output = _layers.dropout(images) num_elem = math_ops.reduce_mean(math_ops.to_float(output > 0)) - sess.run(variables_lib.global_variables_initializer()) num_elem, num_elem_initial = sess.run([num_elem, num_elem_initial]) self.assertLess(num_elem, num_elem_initial / 2 + 0.1) self.assertGreater(num_elem, num_elem_initial / 2 - 0.1) + def testDropoutSeed(self): + """Test that providing the same seed produces the same result.""" + height, width = 10, 10 + with self.test_session() as sess: + images = random_ops.random_uniform( + (5, height, width, 3), seed=1, name='images') + output1 = _layers.dropout(images, seed=1) + output2 = _layers.dropout(images, seed=1) + self.assertAllEqual(*sess.run([output1, output2])) + def testCreateDropoutNoTraining(self): height, width = 3, 3 with self.test_session() as sess: @@ -1358,7 +1367,6 @@ class DropoutTest(test.TestCase): num_elem_initial = math_ops.reduce_mean(math_ops.to_float(images > 0)) output = _layers.dropout(images, is_training=False) num_elem = math_ops.reduce_mean(math_ops.to_float(output > 0)) - sess.run(variables_lib.global_variables_initializer()) num_elem, num_elem_initial = sess.run([num_elem, num_elem_initial]) self.assertEqual(num_elem, num_elem_initial) outputs, inputs = sess.run([output, images]) -- GitLab From f89cffd37c88e4d9fa0ee3ac191e6f5fd5c005c8 Mon Sep 17 00:00:00 2001 From: Anna R Date: Tue, 14 Nov 2017 15:00:48 -0800 Subject: [PATCH 0434/1801] Internal change. PiperOrigin-RevId: 175740778 --- tensorflow/core/framework/op_def_util.cc | 9 ++ tensorflow/core/framework/op_def_util.h | 4 + tensorflow/python/BUILD | 1 + tensorflow/python/eager/BUILD | 1 + .../python/eager/python_eager_op_gen.cc | 30 +++-- tensorflow/python/eager/python_eager_op_gen.h | 3 +- .../python/eager/python_eager_op_gen_main.cc | 23 +++- tensorflow/python/framework/python_op_gen.cc | 120 +++++++++++++----- tensorflow/python/framework/python_op_gen.h | 17 ++- .../python/framework/python_op_gen_internal.h | 6 +- .../python/framework/python_op_gen_main.cc | 26 +++- 11 files changed, 184 insertions(+), 56 deletions(-) diff --git a/tensorflow/core/framework/op_def_util.cc b/tensorflow/core/framework/op_def_util.cc index 2f737a0f16..f7d4166f97 100644 --- a/tensorflow/core/framework/op_def_util.cc +++ b/tensorflow/core/framework/op_def_util.cc @@ -161,6 +161,15 @@ OpDef::AttrDef* FindAttrMutable(StringPiece name, OpDef* op_def) { return nullptr; } +const OpDef::ArgDef* FindInputArg(StringPiece name, const OpDef& op_def) { + for (int i = 0; i < op_def.input_arg_size(); ++i) { + if (op_def.input_arg(i).name() == name) { + return &op_def.input_arg(i); + } + } + return nullptr; +} + #define VALIDATE(EXPR, ...) \ do { \ if (!(EXPR)) { \ diff --git a/tensorflow/core/framework/op_def_util.h b/tensorflow/core/framework/op_def_util.h index c329e4627c..f9661dcedd 100644 --- a/tensorflow/core/framework/op_def_util.h +++ b/tensorflow/core/framework/op_def_util.h @@ -43,6 +43,10 @@ Status ValidateAttrValue(const AttrValue& attr_value, const OpDef::AttrDef* FindAttr(StringPiece name, const OpDef& op_def); OpDef::AttrDef* FindAttrMutable(StringPiece name, OpDef* op_def); +// Searches op_def for input argument with the indicated name. +// Returns nullptr if no such attr is found. +const OpDef::ArgDef* FindInputArg(StringPiece name, const OpDef& op_def); + // Produce a human-readable version of an op_def that is more concise // than a text-format proto. Excludes descriptions. string SummarizeOpDef(const OpDef& op_def); diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 76477384de..bc034e1902 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -444,6 +444,7 @@ cc_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core:op_gen_lib", "//tensorflow/core:protos_all_cc", "//tensorflow/python/eager:python_eager_op_gen", ], diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index c36647b21c..912aa4c195 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -222,6 +222,7 @@ cc_library( ":python_eager_op_gen", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core:op_gen_lib", "//tensorflow/core:protos_all_cc", ], ) diff --git a/tensorflow/python/eager/python_eager_op_gen.cc b/tensorflow/python/eager/python_eager_op_gen.cc index 371df563bb..374894733a 100644 --- a/tensorflow/python/eager/python_eager_op_gen.cc +++ b/tensorflow/python/eager/python_eager_op_gen.cc @@ -17,6 +17,7 @@ limitations under the License. #include #include #include +#include "tensorflow/core/framework/api_def.pb.h" #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_def.pb_text.h" @@ -100,8 +101,9 @@ string TensorPBString(const TensorProto& pb) { class GenEagerPythonOp : public python_op_gen_internal::GenPythonOp { public: - GenEagerPythonOp(const OpDef& op_def, const string& function_name) - : python_op_gen_internal::GenPythonOp(op_def, function_name) { + GenEagerPythonOp(const OpDef& op_def, const ApiDef& api_def, + const string& function_name) + : python_op_gen_internal::GenPythonOp(op_def, api_def, function_name) { op_name_ = function_name_; op_name_.Consume("_"); } @@ -139,8 +141,9 @@ class GenEagerPythonOp : public python_op_gen_internal::GenPythonOp { std::unordered_map attr_expressions_; }; -string GetEagerPythonOp(const OpDef& op_def, const string& function_name) { - return GenEagerPythonOp(op_def, function_name).Code(); +string GetEagerPythonOp(const OpDef& op_def, const ApiDef& api_def, + const string& function_name) { + return GenEagerPythonOp(op_def, api_def, function_name).Code(); } string GenEagerPythonOp::FlattenInputs( @@ -662,7 +665,7 @@ void GenEagerPythonOp::AddEagerExecute(const string& num_outputs_expr) { WordWrap(return_prefix, return_args, kRightMargin), "\n"); } -string GetEagerPythonOps(const OpList& ops, +string GetEagerPythonOps(const OpList& ops, const ApiDefMap& api_defs, const std::vector& hidden_ops, bool require_shapes, const string& source_file_name = "") { @@ -698,6 +701,7 @@ from tensorflow.python.framework import common_shapes as _common_shapes from tensorflow.python.framework import op_def_registry as _op_def_registry from tensorflow.python.framework import ops as _ops from tensorflow.python.framework import op_def_library as _op_def_library +from tensorflow.python.util.tf_export import tf_export )"); @@ -727,7 +731,9 @@ from tensorflow.python.framework import op_def_library as _op_def_library continue; } - strings::StrAppend(&result, GetEagerPythonOp(op_def, function_name)); + const auto* api_def = api_defs.GetApiDef(op_def.name()); + strings::StrAppend(&result, + GetEagerPythonOp(op_def, *api_def, function_name)); if (!require_shapes) { strings::StrAppend(&result, "_ops.RegisterShape(\"", op_def.name(), @@ -760,19 +766,21 @@ from tensorflow.python.framework import op_def_library as _op_def_library } // namespace -void PrintEagerPythonOps(const OpList& ops, +void PrintEagerPythonOps(const OpList& ops, const ApiDefMap& api_defs, const std::vector& hidden_ops, bool require_shapes, const string& source_file_name) { - printf("%s", - GetEagerPythonOps(ops, hidden_ops, require_shapes, source_file_name) - .c_str()); + printf("%s", GetEagerPythonOps(ops, api_defs, hidden_ops, require_shapes, + source_file_name) + .c_str()); } string GetEagerPythonWrappers(const char* op_list_buf, size_t op_list_len) { string op_list_str(op_list_buf, op_list_len); OpList ops; ops.ParseFromString(op_list_str); - return GetEagerPythonOps(ops, {}, false); + + ApiDefMap api_def_map(ops); + return GetEagerPythonOps(ops, api_def_map, {}, false); } } // namespace tensorflow diff --git a/tensorflow/python/eager/python_eager_op_gen.h b/tensorflow/python/eager/python_eager_op_gen.h index 250623850f..f9dfdf0408 100644 --- a/tensorflow/python/eager/python_eager_op_gen.h +++ b/tensorflow/python/eager/python_eager_op_gen.h @@ -18,6 +18,7 @@ limitations under the License. #include #include #include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/core/framework/op_gen_lib.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { @@ -26,7 +27,7 @@ namespace tensorflow { // in the output. Prints the output to stdout. // Optional fourth argument is the name of the original C++ source file // where the ops' REGISTER_OP() calls reside. -void PrintEagerPythonOps(const OpList& ops, +void PrintEagerPythonOps(const OpList& ops, const ApiDefMap& api_defs, const std::vector& hidden_ops, bool require_shapes, const string& source_file_name = ""); diff --git a/tensorflow/python/eager/python_eager_op_gen_main.cc b/tensorflow/python/eager/python_eager_op_gen_main.cc index 9e4aa97ccc..cd74c438ec 100644 --- a/tensorflow/python/eager/python_eager_op_gen_main.cc +++ b/tensorflow/python/eager/python_eager_op_gen_main.cc @@ -20,15 +20,36 @@ limitations under the License. #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/core/framework/op_gen_lib.h" +#include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/init_main.h" namespace tensorflow { namespace { +constexpr char kBaseApiDef[] = + "tensorflow/core/api_def/base_api/*.pbtxt"; +constexpr char kPythonApiDef[] = + "tensorflow/core/api_def/python_api/*.pbtxt"; +constexpr bool kUseApiDef = false; + void PrintAllPythonOps(const std::vector& hidden_ops) { OpList ops; OpRegistry::Global()->Export(false, &ops); - PrintEagerPythonOps(ops, hidden_ops, true /* require_shapes */); + + ApiDefMap api_def_map(ops); + if (kUseApiDef) { + Env* env = Env::Default(); + + std::vector base_api_files; + std::vector python_api_files; + TF_CHECK_OK(env->GetMatchingPaths(kBaseApiDef, &base_api_files)); + TF_CHECK_OK(env->GetMatchingPaths(kPythonApiDef, &python_api_files)); + + TF_CHECK_OK(api_def_map.LoadFileList(env, base_api_files)); + TF_CHECK_OK(api_def_map.LoadFileList(env, python_api_files)); + } + PrintEagerPythonOps(ops, api_def_map, hidden_ops, true /* require_shapes */); } } // namespace diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc index 3c62dfd133..c57f0a9842 100644 --- a/tensorflow/python/framework/python_op_gen.cc +++ b/tensorflow/python/framework/python_op_gen.cc @@ -447,23 +447,48 @@ static void AddDelimiter(string* append_to, const string& delim) { if (!append_to->empty()) strings::StrAppend(append_to, delim); } -GenPythonOp::GenPythonOp(const OpDef& op_def, const string& function_name) +const ApiDef::Attr* FindAttr(StringPiece name, const ApiDef& api_def) { + for (int i = 0; i < api_def.attr_size(); ++i) { + if (api_def.attr(i).name() == name) { + return &api_def.attr(i); + } + } + return nullptr; +} + +const ApiDef::Arg* FindInputArg(StringPiece name, const ApiDef& api_def) { + for (int i = 0; i < api_def.in_arg_size(); ++i) { + if (api_def.in_arg(i).name() == name) { + return &api_def.in_arg(i); + } + } + return nullptr; +} + +GenPythonOp::GenPythonOp(const OpDef& op_def, const ApiDef& api_def, + const string& function_name) : op_def_(op_def), + api_def_(api_def), function_name_(function_name), num_outs_(op_def.output_arg_size()) {} GenPythonOp::~GenPythonOp() {} string GenPythonOp::Code() { + if (api_def_.visibility() == ApiDef::SKIP) { + return ""; + } // This has all the input args followed by those attrs that don't have // defaults. std::vector args_no_default; // The parameters with defaults (these have to be listed after those without). // No input args are included, just attrs. std::vector args_with_defaults; - for (int i = 0; i < op_def_.input_arg_size(); ++i) { - const auto& arg(op_def_.input_arg(i)); - args_no_default.push_back(arg.name()); + + for (int i = 0; i < api_def_.arg_order_size(); ++i) { + const auto& arg = *FindInputArg(api_def_.arg_order(i), op_def_); + const auto& api_def_arg = *FindInputArg(api_def_.arg_order(i), api_def_); + args_no_default.push_back(api_def_arg.rename_to()); if (!arg.type_attr().empty()) { gtl::InsertIfNotPresent(&inferred_attrs_, arg.type_attr(), arg.name()); } else if (!arg.type_list_attr().empty()) { @@ -474,14 +499,14 @@ string GenPythonOp::Code() { gtl::InsertIfNotPresent(&inferred_attrs_, arg.number_attr(), arg.name()); } } - for (int i = 0; i < op_def_.attr_size(); ++i) { - const auto& attr(op_def_.attr(i)); + for (int i = 0; i < api_def_.attr_size(); ++i) { + const auto& attr(api_def_.attr(i)); // Do not add inferred attrs to the Python function signature. if (inferred_attrs_.find(attr.name()) == inferred_attrs_.end()) { if (attr.has_default_value()) { - args_with_defaults.push_back(attr.name()); + args_with_defaults.push_back(attr.rename_to()); } else { - args_no_default.push_back(attr.name()); + args_no_default.push_back(attr.rename_to()); } } } @@ -515,6 +540,7 @@ string GenPythonOp::Code() { AddDelimiter(¶meters, ", "); strings::StrAppend(¶meters, "name=None"); + AddExport(); AddDefLine(parameters); AddDocStringDescription(); AddDocStringArgs(); @@ -530,18 +556,37 @@ string GenPythonOp::Code() { return prelude_ + result_; } +void GenPythonOp::AddExport() { + if (api_def_.visibility() != api_def_.VISIBLE) { + return; + } + strings::StrAppend(&result_, "tf_export("); + + // Add all endpoint names to tf_export. + bool first_endpoint = true; + for (const auto& endpoint : api_def_.endpoint()) { + if (!first_endpoint) { + strings::StrAppend(&result_, ", "); + } else { + first_endpoint = false; + } + strings::StrAppend(&result_, "'", endpoint.name(), "'"); + } + strings::StrAppend(&result_, ")\n"); +} + void GenPythonOp::AddDefLine(const string& parameters) { strings::StrAppend(&result_, "def ", function_name_, "(", parameters, "):\n"); } void GenPythonOp::AddDocStringDescription() { string comment; - if (op_def_.summary().empty()) { + if (api_def_.summary().empty()) { comment = "TODO: add doc.\n"; } else { - comment = strings::StrCat(op_def_.summary(), "\n"); - if (!op_def_.description().empty()) { - strings::StrAppend(&comment, "\n", Indent(2, 2, op_def_.description())); + comment = strings::StrCat(api_def_.summary(), "\n"); + if (!api_def_.description().empty()) { + strings::StrAppend(&comment, "\n", Indent(2, 2, api_def_.description())); } } strings::StrAppend(&result_, " r\"\"\"", comment, "\n"); @@ -552,9 +597,10 @@ void GenPythonOp::AddDocStringArgs() { } void GenPythonOp::AddDocStringInputs() { - for (int i = 0; i < op_def_.input_arg_size(); ++i) { - const auto& arg(op_def_.input_arg(i)); - StringPiece description = op_def_.input_arg(i).description(); + for (int i = 0; i < api_def_.arg_order_size(); ++i) { + const auto& arg = *FindInputArg(api_def_.arg_order(i), op_def_); + const auto& api_def_arg = *FindInputArg(api_def_.arg_order(i), api_def_); + StringPiece description = api_def_arg.description(); string desc; if (ConsumeEquals(&description)) { // Skip the generated type info. desc = strings::StrCat(param_names_[i], ": "); @@ -572,7 +618,9 @@ void GenPythonOp::AddDocStringInputs() { void GenPythonOp::AddDocStringAttrs() { for (const string& name : attrs_) { const auto& attr = *FindAttr(name, op_def_); - string desc = strings::StrCat(AvoidPythonReserved(name), ": "); + const auto& api_def_attr = *FindAttr(name, api_def_); + string desc = + strings::StrCat(AvoidPythonReserved(api_def_attr.rename_to()), ": "); static const char* const kAttrTypeName[][2] = { {"string", "`string`"}, @@ -596,7 +644,7 @@ void GenPythonOp::AddDocStringAttrs() { for (size_t i = 0; i < TF_ARRAYSIZE(kAttrTypeName); ++i) { if (attr.type() == kAttrTypeName[i][0]) { string s; - if (attr.has_default_value()) { + if (api_def_attr.has_default_value()) { s = strings::StrCat("optional ", kAttrTypeName[i][1]); } else { s = kAttrTypeName[i][1]; @@ -625,14 +673,13 @@ void GenPythonOp::AddDocStringAttrs() { strings::StrAppend(&desc, "."); - if (attr.has_default_value()) { - strings::StrAppend(&desc, " Defaults to `", - AttrValueToPython(attr.type(), attr.default_value()), - "`."); + if (api_def_attr.has_default_value()) { + strings::StrAppend( + &desc, " Defaults to `", + AttrValueToPython(attr.type(), api_def_attr.default_value()), "`."); } - - if (!attr.description().empty()) { - AppendWithinWidth(&desc, attr.description(), + if (!api_def_attr.description().empty()) { + AppendWithinWidth(&desc, api_def_attr.description(), kRightMargin - 4 /* indent */); } strings::StrAppend(&result_, Indent(4, 6, desc)); @@ -650,8 +697,8 @@ void GenPythonOp::AddOutputGlobals() { // Prepare the list of output names std::vector out_names(num_outs_); for (int i = 0; i < num_outs_; ++i) { - if (!op_def_.output_arg(i).name().empty()) { - out_names[i] = op_def_.output_arg(i).name(); + if (!api_def_.out_arg(i).rename_to().empty()) { + out_names[i] = api_def_.out_arg(i).rename_to(); } else { out_names[i] = strings::StrCat("output", i); } @@ -714,11 +761,14 @@ void GenPythonOp::AddBodyNoReturn(const string& apply_prefix) { } // namespace python_op_gen_internal -string GetPythonOp(const OpDef& op_def, const string& function_name) { - return python_op_gen_internal::GenPythonOp(op_def, function_name).Code(); +string GetPythonOp(const OpDef& op_def, const ApiDef& api_def, + const string& function_name) { + return python_op_gen_internal::GenPythonOp(op_def, api_def, function_name) + .Code(); } -string GetPythonOps(const OpList& ops, const std::vector& hidden_ops, +string GetPythonOps(const OpList& ops, const ApiDefMap& api_defs, + const std::vector& hidden_ops, bool require_shapes) { string result; // Header @@ -738,6 +788,7 @@ from tensorflow.python.framework import common_shapes as _common_shapes from tensorflow.python.framework import op_def_registry as _op_def_registry from tensorflow.python.framework import ops as _ops from tensorflow.python.framework import op_def_library as _op_def_library +from tensorflow.python.util.tf_export import tf_export )"); // We'll make a copy of ops that filters out descriptions. @@ -766,7 +817,8 @@ from tensorflow.python.framework import op_def_library as _op_def_library continue; } - strings::StrAppend(&result, GetPythonOp(op_def, function_name)); + const auto* api_def = api_defs.GetApiDef(op_def.name()); + strings::StrAppend(&result, GetPythonOp(op_def, *api_def, function_name)); if (!require_shapes) { strings::StrAppend(&result, "_ops.RegisterShape(\"", op_def.name(), @@ -799,16 +851,18 @@ from tensorflow.python.framework import op_def_library as _op_def_library return result; } -void PrintPythonOps(const OpList& ops, const std::vector& hidden_ops, +void PrintPythonOps(const OpList& ops, const ApiDefMap& api_defs, + const std::vector& hidden_ops, bool require_shapes) { - printf("%s", GetPythonOps(ops, hidden_ops, require_shapes).c_str()); + printf("%s", GetPythonOps(ops, api_defs, hidden_ops, require_shapes).c_str()); } string GetPythonWrappers(const char* op_list_buf, size_t op_list_len) { string op_list_str(op_list_buf, op_list_len); OpList ops; ops.ParseFromString(op_list_str); - return GetPythonOps(ops, {}, false); + ApiDefMap api_def_map(ops); + return GetPythonOps(ops, api_def_map, {}, false); } } // namespace tensorflow diff --git a/tensorflow/python/framework/python_op_gen.h b/tensorflow/python/framework/python_op_gen.h index f485044c5a..4d20888dc6 100644 --- a/tensorflow/python/framework/python_op_gen.h +++ b/tensorflow/python/framework/python_op_gen.h @@ -18,20 +18,23 @@ limitations under the License. #include #include +#include "tensorflow/core/framework/api_def.pb.h" #include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/core/framework/op_gen_lib.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { -// hidden_ops should be a comma-separated -// list of Op names that should get a leading _ in the output. +// hidden_ops should be a vector of Op names that should get a leading _ in the +// output. // The Print* version prints the output to stdout, Get* version returns the // output as a string. -void PrintPythonOps(const OpList& ops, const std::vector& hidden_ops, - bool require_shapes); -string GetPythonOps(const OpList& ops, const std::vector& hidden_ops, - bool require_shapes); -string GetPythonOp(const OpDef& op_def, const string& function_name); +void PrintPythonOps(const OpList& ops, const ApiDefMap& api_defs, + const std::vector& hidden_ops, bool require_shapes); +string GetPythonOps(const OpList& ops, const ApiDefMap& api_defs, + const std::vector& hidden_ops, bool require_shapes); +string GetPythonOp(const OpDef& op_def, const ApiDef& api_def, + const string& function_name); // Get the python wrappers for a list of ops in a OpList. // `op_list_buf` should be a pointer to a buffer containing diff --git a/tensorflow/python/framework/python_op_gen_internal.h b/tensorflow/python/framework/python_op_gen_internal.h index 92237ac81a..c1efbf9be2 100644 --- a/tensorflow/python/framework/python_op_gen_internal.h +++ b/tensorflow/python/framework/python_op_gen_internal.h @@ -18,6 +18,7 @@ limitations under the License. #include +#include "tensorflow/core/framework/api_def.pb.h" #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/op_def.pb.h" #include "tensorflow/core/platform/types.h" @@ -42,7 +43,8 @@ string DataTypeToPython(DataType dtype, const string& dtype_module); class GenPythonOp { public: - GenPythonOp(const OpDef& op_def, const string& function_name); + GenPythonOp(const OpDef& op_def, const ApiDef& api_def, + const string& function_name); virtual ~GenPythonOp(); virtual string Code(); @@ -62,9 +64,11 @@ class GenPythonOp { void AddDocStringOutputs(); void AddBody(const string& prefix); void AddBodyNoReturn(const string& apply_prefix); + void AddExport(); // From constructor arguments const OpDef& op_def_; + const ApiDef& api_def_; const string function_name_; const int num_outs_; diff --git a/tensorflow/python/framework/python_op_gen_main.cc b/tensorflow/python/framework/python_op_gen_main.cc index f681daa7e4..61b1d02a5e 100644 --- a/tensorflow/python/framework/python_op_gen_main.cc +++ b/tensorflow/python/framework/python_op_gen_main.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/core/framework/op_gen_lib.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/io/inputbuffer.h" #include "tensorflow/core/lib/io/path.h" @@ -33,6 +34,12 @@ limitations under the License. namespace tensorflow { namespace { +constexpr char kBaseApiDef[] = + "tensorflow/core/api_def/base_api/*.pbtxt"; +constexpr char kPythonApiDef[] = + "tensorflow/core/api_def/python_api/*.pbtxt"; +constexpr bool kUseApiDef = false; + Status ReadOpListFromFile(const string& filename, std::vector* op_list) { std::unique_ptr file; @@ -108,6 +115,19 @@ void PrintAllPythonOps(const std::vector& op_list, OpList ops; OpRegistry::Global()->Export(false, &ops); + ApiDefMap api_def_map(ops); + if (kUseApiDef) { + Env* env = Env::Default(); + + std::vector base_api_files; + std::vector python_api_files; + TF_CHECK_OK(env->GetMatchingPaths(kBaseApiDef, &base_api_files)); + TF_CHECK_OK(env->GetMatchingPaths(kPythonApiDef, &python_api_files)); + + TF_CHECK_OK(api_def_map.LoadFileList(env, base_api_files)); + TF_CHECK_OK(api_def_map.LoadFileList(env, python_api_files)); + } + if (op_list_is_whitelist) { std::unordered_set whitelist(op_list.begin(), op_list.end()); OpList pruned_ops; @@ -116,9 +136,11 @@ void PrintAllPythonOps(const std::vector& op_list, *pruned_ops.mutable_op()->Add() = op_def; } } - PrintEagerPythonOps(pruned_ops, {}, require_shapes, source_file_name); + PrintEagerPythonOps(pruned_ops, api_def_map, {}, require_shapes, + source_file_name); } else { - PrintEagerPythonOps(ops, op_list, require_shapes, source_file_name); + PrintEagerPythonOps(ops, api_def_map, op_list, require_shapes, + source_file_name); } } -- GitLab From 6357bafeb80523c45bee21a19def146d221cd295 Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Tue, 14 Nov 2017 15:16:45 -0800 Subject: [PATCH 0435/1801] Use Toggle instead of bool to make the layout optimizer name and usage consistent with other optimizers. PiperOrigin-RevId: 175743440 --- tensorflow/core/grappler/clusters/cluster.cc | 2 +- tensorflow/core/grappler/optimizers/meta_optimizer.cc | 5 +++-- tensorflow/core/protobuf/rewriter_config.proto | 2 +- tensorflow/python/grappler/layout_optimizer_test.py | 10 +++++++--- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/grappler/clusters/cluster.cc b/tensorflow/core/grappler/clusters/cluster.cc index ead44de1e2..e2db47b758 100644 --- a/tensorflow/core/grappler/clusters/cluster.cc +++ b/tensorflow/core/grappler/clusters/cluster.cc @@ -57,7 +57,7 @@ void Cluster::DisableOptimizer(bool disable) { // Disable Grappler optimizations. auto rewriter_config = options_.config.mutable_graph_options()->mutable_rewrite_options(); - rewriter_config->set_optimize_tensor_layout(false); + rewriter_config->set_layout_optimizer(RewriterConfig::OFF); rewriter_config->set_disable_model_pruning(true); rewriter_config->set_constant_folding(RewriterConfig::OFF); rewriter_config->set_memory_optimization(RewriterConfig::NO_MEM_OPT); diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 6204a81f80..eb04bc6e9a 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -71,7 +71,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back(std::unique_ptr( new ArithmeticOptimizer(cfg_.arithmetic_optimization()))); } - if (cfg_.optimize_tensor_layout()) { + if (cfg_.layout_optimizer() == RewriterConfig::ON) { optimizers.push_back( std::unique_ptr(new LayoutOptimizer())); } @@ -175,7 +175,8 @@ void MetaOptimizer::Feedback(Cluster* cluster, const GrapplerItem& item, } bool MetaOptimizerEnabled(const RewriterConfig& cfg) { - return !cfg.disable_model_pruning() || cfg.optimize_tensor_layout() || + return !cfg.disable_model_pruning() || + cfg.layout_optimizer() == RewriterConfig::ON || cfg.constant_folding() != RewriterConfig::OFF || cfg.arithmetic_optimization() != RewriterConfig::OFF || cfg.auto_parallel().enable() || cfg.memory_optimization() > 1 || diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 8f3457e97c..eb74d4b1c5 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -30,7 +30,7 @@ message RewriterConfig { } // Optimize tensor layouts - bool optimize_tensor_layout = 1; + Toggle layout_optimizer = 1; // Fold constants (default is ON) Toggle constant_folding = 3; // Arithmetic optimizations (default is ON) diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py index bc9d910447..9ac33fbb4a 100644 --- a/tensorflow/python/grappler/layout_optimizer_test.py +++ b/tensorflow/python/grappler/layout_optimizer_test.py @@ -88,8 +88,12 @@ def loop(): def get_config(layout_optimizer=True): - rewrite_options = rewriter_config_pb2.RewriterConfig( - optimize_tensor_layout=layout_optimizer) + if layout_optimizer: + rewrite_options = rewriter_config_pb2.RewriterConfig( + layout_optimizer=rewriter_config_pb2.RewriterConfig.ON) + else: + rewrite_options = rewriter_config_pb2.RewriterConfig( + layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF) graph_options = config_pb2.GraphOptions( rewrite_options=rewrite_options, build_cost_model=1) config = config_pb2.ConfigProto(graph_options=graph_options) @@ -194,7 +198,7 @@ class LayoutOptimizerTest(test.TestCase): meta_graph = saver_lib.export_meta_graph(graph_def=graph.as_graph_def()) rewrite_options = rewriter_config_pb2.RewriterConfig( - optimize_tensor_layout=True) + layout_optimizer=rewriter_config_pb2.RewriterConfig.ON) optimized_graph = tf_optimizer.OptimizeGraph(rewrite_options, meta_graph) found = 0 -- GitLab From 1bc367859c6dc3c3ab17fad25198f9fb25132e2f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Nov 2017 15:33:12 -0800 Subject: [PATCH 0436/1801] A few profiler improvements. 1. Op view proto copy uses too much memory and time, optimized. 2. Add a hint to use "bazel-bin" instead of "bazel run" 3. Make proto string parsing explicit (seems no longer throwing error) PiperOrigin-RevId: 175745677 --- .../core/profiler/g3doc/profiler_ui.jpg | Bin 220483 -> 190596 bytes .../core/profiler/internal/tfprof_op.cc | 18 +++++------ tensorflow/core/profiler/profiler.cc | 13 +++++++- tensorflow/python/profiler/model_analyzer.py | 28 +++++++++--------- 4 files changed, 33 insertions(+), 26 deletions(-) diff --git a/tensorflow/core/profiler/g3doc/profiler_ui.jpg b/tensorflow/core/profiler/g3doc/profiler_ui.jpg index 36aa94502a8c3de7915fb0e388c861cd706c3af8..77346e61ae971725e163c561a813bb6c0153ad89 100644 GIT binary patch literal 190596 zcmex=9G120;$yXqGx=MkNL&K}Kdl#{Wkc&0Dr^+rDGxu0w~996fgY#K}{a zE?>EN?fQ+Iw;n!v{N(Ag=PzEq`uOSdm#^Qx|M>X}Pb?HxGHT=yahkYr<3UbkKbJlOVGogFS=BqON}%>|2`Vp89-mcC3C|4fCps zsm;gbZ{}Lx@iExnzP94#y!^(KE4&5!R!g{+m;KaRJ8xg~)a2r-iwm2UJXgP{{rB(0 zvw?9N%@5D#)zMte>$>mhbp; z&tS?nM^};Hy(hP81iD9ExU|Q1)$3!9%QB}|^-ul&?{wa$DRRFZkGgtod2xK+;!UQl znpY>ah~9qD^Ghpxb>y2om8Msmy3c#wRxESsTYjPW#EQ&A1+(^e7L?qb_s~@&bb?5r zE5k^~{;9>^tamKCwf^b0H&*Y0zly#LuNORfZa$M(NKv!F!2|ypUWZTr{@~fN&GLKy zXn&{|{^8e`-#jlfY{}%s;u&k_?Vf+}o?v0f2gz2QX(q|4VVOI(t_s^6m+^K_^x{`W zH=ZLyJcd#PZi$iSV3Xd8X9in-RT6oHk3k%%wXlEw%i!{ysNbtaW3TK#$0mX_Ia*+iid1 zU&YC6w?2MBOW)(UVe8k;Ud!?^{_vC^osasN4w_GW__jXw^3wT%H}ene(o8Mu)^0i= z^U2=pLBfyN^feXJZo6ODEIwsL!4i&#~pcNVc7(N6FP4m!4gI^S0;X zp6Nz*3|gy>D#kk3=jI>SC%)su{kMz{Td$a^U(?fHGwbU+Ri`!O z2Q_XLor&n(TYP?L<$s3uI{D=txm#niL#{5mU?S~3nfu;pZlCkUGA$n0S^F$)dbqxE z$ojwKe^91l_Mah1-TvFLtRp|lSD#%|V|k^T&1m-nZ0#-Ztvs9L!ou2mTe50>|J(WCo1~x>t_Y!{~0*{ z>`gx$c;mI;D=WGE7EPB!`q+Zj?on>7MpmpoQ02Bealwlfv$m8QzMSQ?%3Dy6F;mnt zly{!x$%E&;KCSOu8vV$=^G1>VmUWf?8MdvBdluPsEUD0-HEQ0g?yAV;tG(Xc-5v2^ zvyB-yZ`+)g!QXdIy7qGQ>fPGk-xtqZ)TRBlzlBR0u|LzZKuQ!(FX8sXR|2Eru zh3-Yxzt^tusi@4qSNfCPKR9yQKGyk13xDL6oR-$Li_LrX!Qt7nFo}L1!Gr7>frln| zTK?RXwz=eP-nsh^&uw42XLtU({I$!wYjHoa`pwPON5f=f!CUI~x85bY~8Lv?TTF-@KwEN*Uhk9M3hV`E_| zZk)CHKLgk4P~ZIE%g^VXx~6L?6us6d(*I_VIahMR1EJgB^)4`~+geyO>1JK5lZwfd z%hSIf-uK3J>&_lWC9%6b1!vfd=T(K*r+(CS%iddb`fY)0{)(5sirmYqm;YTQGV7iJ zmnXNTicmyesD&2CiIpom&%7+wSeN_ny!Y`~{qof>zTGZ*`Eb*!bGrMYZ|<$S`EvEG zm$lDzlOOQEdAY}vZ^gve*|j{c%)-qs8Fu-cXjE}B+>x;1FyGD-CzkH~ky*JStNh~a zxz?UdhYnqP_uzD?VrQ6Z{QP3QBJVuy+Y@&0VE6ic?3&S??839F9jkgDK3;z7WbX3X zX~K##RX$y0>e0XVbB4as_EX8QvU%#`ykk!u z$Q^kSZmack^2)M#+g`lc+O>D?F0*g%Nfiw_Z*&`?a=jDerD?y}R~4Vg@c}x5zGNO;=Gksb$#Ml)az3>r85Q zuhquyrPt0!$H%Uj*X_Mlf7y!98zW+NNqB}X);{kx^;+-ETi17c$L17<1iIE%ete$) zU^cI2jpC!;_<&2wSD$T|bTOeX@>y^AnM`K+ySpZTcaQgaxka8e!|K|~htfYy$~JAi zD)z&F=}DW`_>(QWJf)v_Fsd+rmUDczv!1Qu>>lk0{~1IaGgq8m{%U)F`nR;^g0zrK zoAWbx_#{e|EtgmDA2+^Lpe}}yLv`LtytD)4svfXdy`u?K<$3j+i z%r(0dzHQU>yHTp!e%;M3shuV+*wShxFspm-`jRc**WIq(E4t?7ti$df?VGpW^}6lw z@6e7**Cc};Nq45S<-S{yl&EdU+jhsGzU5bQ89oTVFQ$v%9a1 zAJfv?fqpNuChdFsa#OVS;%QySoj%NO+me0O-^1$ezg3%JIG*g9#POgpMPtVeN4=vF zPv$W8@tbZ2ZhskgWCShh^;i#V+h z&nBj>%vN^ae<{p6b?)1gS=uwCoL+?|oZ2)kv57&U&)sxw>PMwh$0M&U-?)3;DeiiH zUUi=xT8F|*GEO9KxAqn|d!5c%1tD^vR~D z7tO1!@0MQQf8t^3+n~cipIZCc*?PjuKJ$F`e3mY8e#Y6JgfGXVPAH2mUR%)}{88rP z*;Vo9Yjv`>N|oz`7SCa-PTRct&4vvnFEvirhy7;|Uu|bCY#OwBO4%3h4^fKMA7l5o zoP1V(>VR96(t(Y4mUi&ex9)ry z`iJ+!$r{)C#s3*HUll~$dR1`etc>HG7|{-k^S`zQm}}b13yqd}YC?@1CdTAyC~++HJg!%~wW{|>VT9N`#w%*GL;7Al zI`7r&JO9mfaq&x2d%P#HBxfX@Dcktd@j=Ri(7nkKmxB-6$z0Z5?|Nre>EA1pb{%=O z%-rxz;ZAqnjeS-B$cvqP6l<32|J#i7YxAnec-uYwGOI!-EMB$l>w_k~ z%Xef#oVMBZh`lw}t&Gkrj?UZe@0u%iV%3%c(e!V@T~TV2@@~D}wE6CwFZJAC-S%90 zIO*B3n>q?z&%9@@Gh~@?;*^-j9VSVSgj#4Q^W(bve79>~PDehOq;g5;%W2D$&Z+C7G_SeW@Vt?)XgI!r zdA;bFAB!WlT-jpJ|G`yy(1wq!$U9mnSWHud=@Se)O$-RWesa0^I>HDl{Hk zcl^MtZPU{qt!MpF?YSq{O~TY{@2{p{9IcNLyV6`~(!?3%GER4b(4 z+-Udf)29}H&;R^WKDBvy+iE|N=ciA-nRk76_Vev09f>t*6kOr-7A`xFj?r7U}ezasrozmuI$@6W7ou<lJw@fYw%2X7S^i7s{<o#d zY~Fbip*(ks7}xT1{n3jJKl;mU_KH{24*OqGs$8&p%fu6N!wa+}4d)o7TJo$uG(YD~ z`_WrA^M(I-ullR`?0ZeHKDvmEoajiydclx{yIS;Xe{=F!$AKbn_B|L8xkJve*1 z)s0K7N5Z#%I;c=T_cjl6(JAJ_``j1gw6$i;IeFCSe8l_H*Uo<1_A9?$Z|UX3sjvJN z7jDW_x%cX2_PV=%vNejP=Ng4?9M4^G)@<$H`Mg*6Y|5Y7lh7Jc8n|Wh+~d6SZmjsa zpCRtWuhOe;%5T*zJoZ7ta|?EM+f}_spSS-pet2FW@@3WLY~!qF8p&+8JX{2I76ql~ zG3a@%IP|2{a_!2b!i&?j{H}*xd^LOX{%qZ_TQg4cHWoh1&3T`;B{Md%bl;iom>>SD zGSq|mqW|hW`s~+#e&?obb7!S?D}SG%#b%dKIQM|u)FaoIXYDh+r1z)tajlp2-fOd_ z&9>dUWnOT+3bW1r;T*92(w^8w@iF$( zb8~B1E_-g-b|q4D#)FhO3{~M}#+xlx>*L_}-75lU^ z%L?7D?-RfBWbVS#2HG1eDk2-ty|`O=ed3lU&vxW89~A!4#9b*Kd|Btot|c`Y2FYf7GnH)KFzU#D4*}uIf^c=gFB*VtVn7gibOHX~t z$FBARb-EY7%;k3pe8l<3+H~&i(<=_MuHG`M>3z_Qls6)|w~jELWO%mEHT;Z*rt7j> zfu1*azk9#=ddipI{TshY)+rwIJf^wr)~>LdQE%6pXFN0c&(QY$dR4`qT~|u(D#>p> zndJKCP0waGkxdg;-6_7l;NUrnGqctodK>Q19wdC{<@?&@iqoF^wbgzXbd%sWX2?~3 z7Tj=?p{Fsdrtnd$dBnwM+c)p{oMQF%?F+eR{dY?_pYB&Ey;bqb@Pz2nl@;gRcYZwH zS1PjWZsw}Y&j+*Br#wDeesfE5o?F>G59Y2Hed>mVp;7nV-Pt?Kv*vRC(^tj*{L_}b zT)t}4-rvh^zg-;mbIzZ7-cnB;vG^^Kx{q(oe(~kljK51Ca79B_XWld6KIcTVl}aGp22f4NOUQTF!TKc;7vO?WLfYi--y6{=ka zzMWsRn+fBWFYaTQlNq8*!?4Q`bEPcP&%!(>wu!dS^44a1&{W`fUSid(^@m@-<}S9~d$(`H z%A#qT*3CAYx@hX>jS-@fCs~+w2Dur>_Q(sA{Ab`jxIkyunSDCDH(oh(c3+=v5y13>&(EJ} zXGW6Ymg`!6clBoPuZ^7*9rgFu;${2f+11Z#y6)SS85tRMGUI-==+^#rtG3Ch)%Led zexEG)#QJsa(u0$-bGF@|lgrR>L{lSzsdBZ{^h0O8eP{i;@k%Q7*ur%7Td5n{%8ezy zvFk88Eaxf;x87jfb!pEv8{38bBJnSG?U^R2? z=Up%Nq?a%Jk^D$+<9g54l3PVK2^?O!c5|E2$*he{=X+-})ZQ(=Eh-;(TvOw9+3dL2 z;aRs#FUD@IKfW@Xxv8-DqROSJWs}OU+O0cjEyFhV!{v}GYjRb*yEWFWPt2a}YZrOy z&K)=7&q{BPTSqf?==K@MOs)FceeDXcY-`U>!u5J*&vljb}OAgk-rdMcMNO+n07P)spDYbn-fDT5^4swZV)% zZftuBB{FxM(|fr*ZGOq=@LyAYO;+}K@J2e+;Q-UBHmnLmK_)sGu<2Zj?Q;+hZ zt}yQ>HhI>9Gs32+i<(=WynAx@_Rm}zi+1zbrHKT(>V@cKvWf(HPt#7C^HldnpsPsH z@eReNcLchMXdJWWu=I*<6$uRS2y_);AQCgC9S_=l>3L?(x@B9YPB@m*WVF;<88YwlfuFEb3ZFFt+^5s|#1e?D-yiy4d=G$z;p&90oSe=M4;hw79e^mCv8C zOV2Lh(pa>{(N!dXaZus9&i@RDwtu+)mnnDu`!4&3(LeJ4HJaW3ezea2-`@Jm{m1_? z{@(t_S^rn(e}WTAMJYH{|pz@ZvXqS_@C(hw*L$-gmeEh)C5AcFw`G0 z`p>Xp{-5dpIDg)B#c7B9>+63l{?tEU|KluAYYRyBfgy=LAlU*^g9vT`ONINvsO$Zv zo+fYEj=U1zcqBbzT1dZOTd2CuDXUE%=X{QxTzBR_gV2=XhtvNv95`M7&pJK{M>JP% z-Emu5^InhA;+uE(?=`8qQO~=iN1@!zgA`_u9}|1-?I`kz7Iw${JO+Sh;j`}RMa`A@a} z!j}IGKg4tYGaT3W&(N0sW9okfr_28t9>i|{&mgmYV?81%%70k<$NWEo!|eRukNUq} zHvAXy_xzvZR5$VPKgDgKe?I-b{!^Y3U(3Hua-{^h&-{~7wir(egNk;3z&`wR{DU(-AmySDWEz3(<}!rOLue)=1=W%2!) z>UGz)_MbbY{ly<8@BL@sx_`L-3+J}~4EwY<|E>DZuw1_X5A*r|3>~Te83OHp>i-e` zys7K2@PCHZ{D=F0vD~h|FH(<`_ph)2RrupS!vX#uZSh})|1(Vb&v4M*T4Vi1`yXNd zr2jK4@ZJ9RqdRimhgrffYR70;ji#&7`~h!AjFu&%W#VZ4FiV`X)o1;_&dk+S%3P|kyCYQ6LiV-%p?bkB@shTeB5jWtM=i3s z7uTI89nzSZJ!6{K zv76q#%U@K5)zs-+R=M(gqqu@MkKwsX=7J(+sWJ9Wp25dzHFr(uUH4tdB+GiM*qiK& zq4D9H`ZS(opDyk5c~h*_aN~9JysYruP3FB)CE@iumNw{q`MUhA@*Uf$t{0|ttuoD4 zexCG>`N@BVwSQmzne+JB^S;`wQyWTomKaT(;O^{a)Uz$AVA5S~*MMEub#jw-mhI|2 zyUAqIl03;P8b5c)TsG+!xRE_8we|0o2adHuXBj3xO%F1%u>Fu_yG8$Wt^1o*QGeFm zzihH%#k5!KGap6gFxkTsr-U zu1dun5xE5^MVeY$?ygl`cQ4gSRP??0vR^s5ff3j4X0(>_pM3RyUUXD6 z*3{Zl$lQBHq)KkbX$8gE7pi7$3|lUlCA1n zsuJ~I0j#IA=l;99>udMFANG&kzrDS_CjR@u{|r(LFIKE8|6TU_n*Q$x{~3DaZ*Py= zSbv}WPX_a%MLX}my}dqS|NG{DCiQuDtB(F>sFDA;fUm3TdHwdhy-EKW_VNF?_;+{d zYw^Dy>W?*8X^mPj8c-uSMLjCjmwsh7Yw5DN=}Ycpi*GkLU7)mCQ18U!jH}&}X^z)I z4zSGIH>Lhi;G_M&gi7Oo|4#nTaI%E)c3_PEA5}T+{|pP&L;o}E-)8@3x;wgngX~t( zBmWuD^wQcarwqfjgRqvIjz?J{k!o$!;@Y1XncmZD@Kj_=S^%_t+&5yQ~gLb>E`n- zAIetic$#c`bNkJ7Ay;L0g=a5HN)qR_OG=97{}Y-oT7O|GEM^w)UF&+Z|Doo`@V}f< zu!vZ>3>3|Kcc0IE@HQ-BQfBVW?KkyQy4?C4&t7=(;;=2Bk&y-{p0&{oTa3mB8UA^8 zeCPFFE!(Dji(>uy^v{2W?7Qg_+r!)6T-GsqpEgJF=MBcKvu(4TiAo{oul!Swe_@~T z@5TlFFYC4acU}oUC-$jwyvZC@XJ{ZLxzhLXD$s@coee)qNe z_F{Wn`)N6?4|Ddhe7M%}Yui+nZA`B|{AWmwyK_|S`Xp1q9fpgJE%n;SVR?G)P8;58 z`!+kJtADg6_G)Y?xAL3JKV7GzWyW)Ut*L1&iF}pE7d-F!Wz$e~>AKXZ#53>JPhDM` zTej_a^?9uou5R2jt?#{E`)=?Ypd#JS(mB^r1HQ$vKrOe{P4Xv&(!urhQ?> z+LaHIy|M~≫-N{`0Q*%9+%t*-L{oSKqZepTASyG-Dp$)jh6{d8=OgUJ|T3yEaQL zH{ev)rPXc$(U0E??rc2y)2+5R=)SQjA5-e=ujJqM zy2*O${;}_dcU?NQJou;C1wQL@IGKAxMS#t6aqfroqwWW%&#M2Hu+{%9cjeixJ5H)Q zdj+RnnZRRtGKgD4ce@_v(OO+s`!}gwG0yDxtI-9P88 zozRaiUTcTG?}snGT4rnhrs-bv)$TXOKi@n{I8}UV>NBJ0r$_UD2tRz@VPpQ$?LR~G z-pgXI{de!)Ex6mSb#34cqce-kJ=sDU%z5~(zTFZQa5~d1aO&O1*)?0P+>1P#c`UFn zY*OZ*yT4z4vI#%7pXY0A{;^s2HuFu{d@B92+_4uYB}yai%se&eW(-fd#rd4;CiB_$ zc=NvRe3s4mY|pN(`}?d9Rx)%wImJJ1sagAZW$QZ(ZjNu`-7mUqIu$f~a^BnRmsjt9 zC3r6Tkj%6&PrcnL(_e18ZddVf`a#{OZ@G*)CYP>>{i=$d9qm0M!EC$qcbk@7_UtxO zes0?OMMo=0G{{?k;m%!OtFBiITiWD`rqo^C&v#r*T{c+M&tUGBz1v^CU$@XzB)srB zcmg)5V&S&iPW3Mr{Bzs(dj7}m+zmbd87@2i>18M`-*o3>YQeT*SJvBZha}T`PD|O@ zg{AN;ZLm;b3v!o~%|5uMPVdM2L$}(G^m-qYy563@$M}J!_}dFF=1K2p`}EChL)U9V zlUdDca+W0+gl=cCS@+I%`}yzdgrk1AFUsCke}vn*d-a#@V@v%OB~N5_-FD47@yS!Z z`;%1d&EC(LcZY#1PXF&xllKe}&XtesxhtC8wi~SZu3MH9{AR@^BcA^ZOaA@bu=|wb zvrVh@9!RvVm8`RW@$J*A??x}BE>+}v&H8rl{gFdA0?a8yfD%Aoaf zMv@2n{2%?lZC=<5eT^;q^*-{N$?~D5|PnYd)vppMZCjMtwW65!9?H{kA z;JfClyZeA&Eyg_*qTk)KfiANcNcK=;w!p-ez%H69R-R@k!UA1q+g~Q)!-J&Cw zO^bNQ6#KC2_PKx)9qXUYUE?UZR{yR2(x|nM@BZ*V@~tp3;#Su6qO-i~6gKWG_sEMD zXL+lpS=1oOKJBL8%(a!jtt;vSw$|D`j=1*p!eaCF$%{=c{GIJT?Um-DX&(Mlp9zUQ zDrj;q^?lA|V7Pj1jrfJq+qG5P-FscdBQLDkV_%Rw%^>XF)pv4cJ_`>|E>?R~#KGNQ zP{erkkMqa5?_1xPOu6*-()X|{U#+X9v(qI%i%eGe@pDREbj9+-Y0O$ig|j}*=lOSC z;Oql!-Te3u)3%FVswn>_lIqjqe(YGbS4 zdU9F>^X(hEeVmSk34Yd?+qAjuf!^EWDh)Yz>n>K6PQ89Z{loE&#n=26W-dRzv@+~i zCGVM_(>x8G-&m9+8Fdb<4BqMRY{iew-Zr(nr za^UUcwDP}mnc20wTN8Xke{_HF{S*2qnm6C$+PW{ZKd;LCUOt^ui>*sLB5xuO!;uF& z&22gvgBV%LtbU0fjAeKEU462A#jA3w)}?pX-aY$fMaqo37S)}8M*?KOx$M{A5|cQz z>hT1=yw~53eOTURvR3(@P{z~CUn5JpW5a{Ci<@QSOiERe?JtmvX-(r~n0n~#3DySN zyt%&}Gp*Y8%gr;1&EmE+UER-reDz5YwX5$HBPAAaYffnsG`s%k+jE8q6>GfX8_L8C zj>|6Z-V>OTa{29-vrf_8vnx`rBs3Rvc%5W0+_iX9<%CMVaLFVG<;*(47u~X(xYs)8 z$SKZ0vdZNY=g#S8@66<$uTy!p!Qe3Sv#$*2K74=c{_woekGR^p>)u!Y9iAQW)~q{o z-mmCRmw>|(UFn>fMGKA__D(K~(BN_H-ha#5f9tNTm)TqQxSMyTU*(%rV=q&-KdUP- z>w3cG*^PXMxu)@Lns~d7g(qTv+I@cM?%li3=Eq*y>Y2E`jW_5~bm^{bpVT8CPVkh` zN}ZVEV*UJc#DgY}bl$CtYa}k2tQU-|XnGuab!%?b{h->QSf_6u<#)ZV<)*EAE#uKB zrmh%f>gVR?nbS~EC-!&69>@O-oVIVzcbDw06+YA{-|?hRbj$s!W}kgqx47PyJ+oTd z+FwCS(5_&ixBj_%?fh@~7vKKTciZvWm)!KjQfn?L?U7m({kF|gVaF`ZxsPVK-1yIs zyu%^1ldmyC@8O)?=Wl*Kw5!Wg_Rg)|BSC6gdL`$E|8pwe80U9j6&uf_L-Qh8a%Ib9 z1f!4GZ(i2?SeE~Yn4Qw~(zkiPqx&v#e(ikLq`hO|wuwu+H1@0rGKsz9JxN&i**4F| z3_`d5U8&OVwBc3T`tYrNz_f3%QjdeSZQkFSYu=ki*{v`B;uh<-$q&wp z-1T2~(`@DPm;YjC-SJIXEtoxd*Apd$%*76p%^U5tHvPGJDr^6icdJ)aT)UAW)+yff z@8H6?xn8gRg-;ii>bl%gTJ?9GT6^C0;9cekr=F}6b=jM{{vfZh$Avx0iHlzUy4PC% zuvaCctZM0$X>W^lm)&UW3sljUPB|={qM)$;RJe*n^56M3nz`AcYuD`RUArZ0mgy{4 znO8S_jefpQP}ww1=hKOMi7M}8&E{-nD8BIT#y$BD-+KA~sH!QhnSMoW+O1>N*@dD@ zq*~9e3Ci8M!&%v2d;TQdH(uLBQeCUUoBtU`RJ=aA>s;@W%%~;bW`$2uzjiY*S}iko z!Oq@bqslctuOTcG$V2?b)?Y&;53`rtWIW-uZ6N zHeK%Ao2qAi%h^uSDE|C>)(i26;w|r#KI-mUz4YKCYu>tFyiV^9$8=fWVA&XTFlR=j z$aBtI>C|qsN5^d%81%F1rAl%&esp-}*4)2Q^=JFl25Tvf?gn03+RY<8)MmL$s>kH6PdEbe~aF!R=y zulhHCAO1dT$>gZ4s&(Bn7rC$L zP`$j>!zApg>P6urMeYI{vTw}1`0zhNb zuWvbW-DKz1xqAN@0@_5j@7dD3FzEHBuu|P`9W#_yZR1z0IG6cDev#ce^~u3k(=JXA znI7%WcUr~mV17z^`E0rPS;Z>-YKIkl-}v2|@o=BqABB(CoA!7vc`bbR#;d@JYM1#g zmrQ2&X5M1$i_Gm*a!gq-v?!{9`R~Hp1#JG)RhX~Y8UJzK|D*b0bga{no6)t41GaCT zA9Cg5z4N|ZZi?&DLsM178EU^smY&u)x!z{WkGGHZ3sy)+Un}wa;yq)Jb$w}a{mPp! zzTfJ4#j4!qUAAR&0T(?V9&GV%v^}JMp#$oi%65 ziM7nXdwk29tN;bhEhz$gzYoOe)Js-KAHMaalf@`&R_)%$N3#nyAL&)eIvV8q@QRQ_ z;=yCt)#tyji$7E+cX^NS!@2qnmz2zYrOnQpw(aBQ=?Ps1M#rbwp0t@fIci`1UWUw{ z|GrfI`#SUa?~BiWUtpU*|GoSC_mT7J_sWe}N!GJ$zW*6Sw#?6!4*1V-@mt8Q?MEy3 zD6abP>vsJ1?$a0EEV}yTVAh)T)fq3sPajk-S@g(K_~~&W=VSh+HQH+b8M>mZuOs`eQT}jx}fK-v(NS_lt#Q0KhqZ|b71A2rU-=z432k&KKC>+ z9y~$L?KxTG`%n30cvXB?sYY1&h7X@l zOc&Q$6;wOzrS*&sdjrOAlL|j;^W9v>@yF}qxy}b~m5*25vEr^M`ngtR$Hg@dX2eB0 z7Aq+k?9BA?dHHN*s-S{J!rIHW?+@Q^yXAhkp6OOau)n$ZuH0Rx*8J-3Or3wGtlY?q zA<28*U5j;f#~=93%#@p*XtsZSmYn2ewXSM?zSui1*9vCLFwu%|QJbio8njYn-Quhd z1sY5uKfWfGW~8?t{&gkLzi>sw%kScvOs>`C8EX1mbw4&u`)zVG^3Y0-y2N9Jd*pvR znJr&>GbZa+=%rWTyW;u_^B3QYKK|UdxJr-j#uIZ^u8Dc)%-`H-n3m+pS$$wXQ~f5z zZS?}L&1bFLa^uBx672lhHdQy>@G}J$TX(x%}USeKOZdd-?q@>zc|sZq1g~ z_0m*|2v{QS^3yM=GAx`~vRKN(aqf|XYwO$W_+Q$Ie^lMyA@nGUJn6k@3L4i;lRV%4XAtJ@ zYWDbX@GQvNUfusy zuw(67^~1mNT64brs-0ps?Y;Jy#Lj}oyWNJ|7mJQeVq^2P^<2RI((2ya^-;6u8vVN- zxSc29Y^Kqv#x^6PB|V=kzOT`5*ID^+_q8NZw~ecXJP$Dcc6zjHYCtm=Q@?}u^dI^E z8I<0>|JzoY|JNcWfMx%O%{7Yu8JyEP|1&g8)c;tu^&!7x#y*>i%2&6@I`MiPy}8)P z>A{L32t>3sP5ufMRcFgLeWuqk~@!6b>> z?EyEe%k@Ggo?v;<+{7TeprYUJqigrQ%;`$cSY~;bD|&76lolyTRS`CLUK0ViP?MQlIQl4z8lQe4h&jaZVMPhl^v|Y z=hR)~-JZX{ZR^7S3`?dkzBPUN{&BuU=6?nulUk`yx>C<>2k+X+?zOQkZ1=_uu9Hvi zomKSP;LOC8{<@XZxy}SUzoMjiM?m9E)mO@>{D2%R2l2{VP4|1w1;z6 z{1Xj|*uWNkwEm~+KEwYE8%&@7>!{lO@Y;Wd&Xn9&(f=8oOuZlN|1;^n#D9hxQlI}b zxY$O1xSZ_X^6UA}SqwK8b!mXss%E&}{hxWx+`QOPm$HlMu!StWufSly@ zuRm(+AJ%h3`sXi?(raI!v1{u*|82*7&#e0PQGJH!w*I@5tlm8={L(72_}!_8`jx7G zOKjZk9~YlBy|?tPeZa?#P5Zk(eY$8P`^{p0?JS40vd5f$p4`p#uh_DCzJA^#-mt%u zYeHAQj$_|^`H#bM|0_CM)>^-rWe}n;Y?5PuYFG+gj9=CAwZFCGe|L#k0|L%H| z>36SR52(I>?mxq{dF>4^?u%{hKHop>meA{ozRT{sGQU-GOXZW3o^F_g?4@|s1RIYmDS*08hOOqxrHG$2~9Esl7IScJ@rL9jqx{`YJ(zf(ni*y!e zWpaJSDlH>g^((Jy%G`aPwYUF1`p;ngx2AvY*ZM9wquoE2E`2%e*0t#iZqE3!Gxv>Y zrR$Pgn|zxKg6`b*R^D)0?Zz{I-6`THo_xK}W}2~Ob-#G#ukBqaS4^27b??qt`&Ba` zGFf+~a&M?0|5*t=Ms|hNiuF;8)<&BBzW;Ii6K!*m-O(Sy{`j1>7tWMw*|st2)wFV> zSk4|!kypn8g;}O&us^$TgzdOfLAGC)rU-1a2aK*%KOQe#znm+f)4~KpqlUm=lc;Cml+vYBRWiCHrOfHT427|1;d`WL-b=Y2~EazdMhK_WK{&rZxBapZdu9>(2if7ML;= z@Bh#+|L1>({|qPW|4x-h=9FFujhH>B|Br(Dzxw|SpZNbXq`D(>VoOCL^os0}wV>$_ zOxStiKf@uW`sW}E6#g@O3#muugZ2gOAYqHnhn*8AG)GODv$BWl+<`yz$tS%r2eoE0hC3aN5HJY2 zBzdR)(0cnXBB1R*|4#M)2z~yqY5q_B{|q<7?EjedALRd~GoSxI!-o|c|1&(^Qjck+ zgS7mI)#v}R+JB1w&#-ARvY$iy|11fo@uO(fzQ0n}?iJgpocyeic}vNyt|CRYgF%+$w2Up1NpKN=sLa62+0^4_vF|5P^pHa&b^^^^(6y)^D4x)z;E`R|NBQrkYxo4Qc{$ayy3-0H;2s$ZKbCY^qB z?LEu16C126XYgz6D07_Wx@T@d;RS3(x`Xhm`7B@V7N7ceG27Sq)cNV(Rmr^%qlq z)c@SnI=#HFnxRYTG~M%@dA`lwUNNWFRD?wL)d%0|w6py!Ct) z?1$tOKIR^8U-Du5H`hmpZO^XRHLpEiD(*1PAxV{9m!4M(ZyG*ZCB`S|%rkN4!~YB| z^Z9KeWiMxx&y%<|?OMFj!!5s~D~}k}7k)Fmb?^ABCw4M5X+N?b>PuA2j=lb$A;Z6Q zpVI7zsWZIA7U$-c@^~vt=pN~U|5F-it(Nl&-`fG@xIZtvL^Ca=AxoIAPTaoF3-V$9US(2e4di~kvTo<7|Fi~Xhj56$jB+2Y6Kx3wQ!&zm_R_VypY4{4h| zWr>-&-8y_^o4=Ck(JKKqM>zRU3Y|(VzNuV%%s3iL)p3C(9xbu;qa~ssZ216Nbs&!B zL$boq71f6s{~6kfKHUGy{AK}SiqZWTAb zuv%>D@*k5TPU#&NFMa86(dO7nvuAUKEIYG2UC$y27)ptmQ+xf%wpC3ZAF!31- zS=Zu!lE~aO@$CO@$+PY8Uglq3_|f{{EaT%V;v>1U|1${PROy@cJ?Ow>h7@^~)dExb zOD6JLp3TRWSsVoG4_p4o|5sxIPQ)fCJS<5XYr~%9KBI|scXV}T!sVyiw%>hn=vPjv z(w(B)juw(Ob4pjBS0Q93;yu?M*2q72EB>f${fd`ivr^kz@^8D%%rSZKbbfHyrmeef zbeMg5_QZX|Bs<@u4eI&Jv3WP%W`DmO%b$SBkHzM%{}cUT-L$+v{-(z!u5fk%(A&T-U+yRJVZ*>5NnHQLQ77rT3s!I4RvrFT4U$2(qNpZ}lX(3B6? z|1z1(|0Cua*ZuHzy|$l6@po>$!k&j6D^}fW|IZ-z_TTUS3@_3e^e(!JFx-rs%IR|N z?sSW@j?eG>XE?w9pNnW(7sFZ@N2Kz*{pZ*J878tuE^5$*bL48z|M~Tw;fZ5d#saQ; zFpm5D;{ObG`+o|A&eCAo4&wyE%o11pYSL1@7h0%zinSd zrS|vRxYsT^ztZJ$l<1~iJ3K=p*bX1**~!w=^CXXb$GZ37KPD$W{LjD=Q?Wbbs@|5r zY`2d8cB}4gnHnWwmalX1afM>lx;uR{&Q|sx%V((3|MAL3%f^3Arn&rwzE>Y!eY>%@ z{dvmoi+28o`=zJvyZAEtzR1!K=Z~y!w^RAB{*US}zqx-mC+beW_3yICKEJQpyVJhj z?fG)z*9Deu_qWV%(JN)!DD`Y5@2S}6n`fq5#r{#c7u~cv|G9Vv$F!hVf_GN!vA5&Kp1&HnA{MIV`K=*eAsnY*mIv}~S5y5>`#7s?Yhxt%$Dy3Q`_IP;_VY(LVE zZ2zab^FKq&d(UnE?(e<0-t2wnpZ*R0GP{eP_1;UL-t%$0<>6%Z{tNq&u)jsnk`!y|Hm8SL58aw+G}_%D;_%5WYS0Kf^<% zE7w>zcL!yrTq3o)Yf<;fc_qLA1V_6T;2fywgde8G(PW<|o`X3QdKK8ffv34JfD}Oip)Q!#k zTi$+dD}G+T`o!=3)$^?NJ5De8vG4n#U(5O*&*gTzw#T;a`?bq)IeWHGu-¨gdP$ zi-xRfDguvL-2*2$nB~=H+~{JNa$y>_5$=n@~5_oGjimg}CJb*&;Vxk5!Qk3;9<#$M^uGR^7nKa3Yw zOgz7=6dx;NwmW8%W5rwcX}nczis$8P+NmiBaGn6zA8?S(ww`kH##_I8`hbvItxiCo+x zU;S{lr^;-9;dj%#?KKZ}y-J;*b@R`emkjySzsDOhzP0E0WBM?^&AjUDusyk}JkB9^?))LU9ytCUo~(Z5nBtH~^WlEj0e*bBKcK2PEL zDW~>#wT$F@UW zqQczmBYQ^)uU2i=>A*{QTWzyLuA45sqPu{}dbjp$j|ZVfGr5-(AC^d1T6TDnt0b=L}dnmP;$=1g>uyS}6* z`{LFck#DxjMtLgl`OS0f{IYw;i}MXz&ahnAlH@Vvm^piCXwC|jX#vIiA1>J=`NU-P zqidy7EO8o)LS<=}Q*t++StYI$@qpui{v@`0Zk6wO9J3d^{xxrJ;Ya4f+h^_DSjqg# zS4LYzH=Bo7rogG~X;1FX?K35poQue1=-ro__}k`((c#FA`{V_4HQhQR`n79A#qKj+kI{ABl@97SlCpVM=i|~{fzKYS6r57A z%gB0Vj^P6S)coFGTtB8{xX0`jKlU`I=5lWFx4@zhgYD+lHWM{j>fY>oGr@3j*Yo3V z4Hv$ulAFEyMftA2mCNtl=XTvW>)yLd`npNeDxbMcQhe6&s41z8d0IeAQ^n_A^9RqH zzr63@f28=RpY^oNt?S?ZT=HHVd3t5-f`_v#-m6UPa!Qpxa3dw=VTz34#)W_EKU|ym zFqYe8`tezzXQJ%e-_5XH?^2ZwrmK7l zdH0@~w&Cy6(6y)UXh}>=;hP;fhj~uA>618}ANmjZ1?2hexIQ}1?!O|lPQL%}di4&I z)ww}pT{a@KB<7`lO67@Zo*pF4v$iqvLycwM@0gGCI@No(*lHh$>Y3$Z=inW-V^7ny zYb=GrN40eKJr+C0ret8}Fh`2v%YO#Gf0xT<^=`R7>+Go)W(Cpr?|8+1%Ulw=?MmDc zfkg_^)k+hX^Y1)#+J0~C`5zhmyt7?rmgZ)8o;`ePu9dBFK=qf2%8?SL3tG-CP?@+b zlHu^e^Jk2o=za)4%%5>_`;lA|uTJaRr#>l2*Z8+gLr7hw~h|`>WhB@mBO!kC6 zxIJI;baL_PKhYol{@LJN@F{b1V zmCb&z|A$A(>eYYCG~ec|(-C8xWs%SO?CE)a@Z_>XNdUKaWMymEc33QeD7l~g`P<{ z_Ik&AZoyM;ey)%GA?`UncJ_%Mor_%`wqM`>P)*AFxb&Irhxrzt|9rsU=L4VM(|7#b zXNY$(yfU4CtJ&wnzsYOw#mB^R=G+f3n|AHcqo?Z>w-k$PKK<0~etP#gPvJ9@pB!qt zdyZ}1{NJuGCP!tm{bvZ(`w~_Z0U@HNWCqs=XAnli@f+cl{PEi9L5@ zrre(@slp@m>3-XN-YxNM@tHd>{YiY_9UG9@xhyL7+{l)xVFW1{? z-HgmGe(`?Q{gW#lMW=jfwlDib{vnG!?jQEM zU#O{kyjD#jw}jW(quF<(fy%YEPijr>lM1bu992#WxwXPBXWQfExaNnkN{^jC#;tx7 z{hy)pp464CwN9tKFUOzS5TW3m?fFinVCA=E)=#5XvOPMYoVabnPYvdJx4#Rw<^~_J zN|^3D?f#)%+qPCd{A(XBHQVM>?CqI{C3XL75K_;Myei{yAabX?o>2~?=$uYJ9N&bHMdK0?wmC{LS+>veC1mE+{(Q1ORWA8tH{a6f4Te< zy=1!kyg=qJi-=1lyLNm&tutM&kYC93%9|C+K}HRmSr4B0@p$R+_5B^?Cv+L1{O!+O-W#IKDg31%+8I|T(?%$$Tzn%=~&U=`Hh&f+Fq~_mw^0TVj7({Ql?P#s3-V zqyJ=|tP{BV(oXP0VSn&-lNnoMz5acV*wd*0D77hi^-Og`CTZTMKO4>Yb{pT@?f=8~ zkM(~B$G`gj8JLy-GdxI+{?>f|=KldNUod2VCPxe28&+sv1{)gEW<^P(0#s6b-|IctVfZhM^`uhJ2_Sfyd|J;9U_TSA2&d&8PcN)Xo z`JbWX{^9*!cJ_aUpZ|9LXV`!CpUM2&;s}>y&w{F!gR1U_8uu1% z9P`qDH?P<~|MvXfe+Kg(7yoWvQGfpJ`M>}2Kdwvpar^N8FGyNzKp}>q^%t_%uMn-- z|AgcD>n~LM{AW0D;y=TWRsKJGYi$2BIO5j5bw&Mol62p?V*mUf%x;kGzr@?U>>rZu zOZB-~@}K`RKw=LZX4enjNAWna)1??*)^FR&|MNcsJjkos|NLjT^Pl1Vv;PcI&-Onw z`SATO`4b{bS?OC8OgK;EC07^-T&3U+yDM^eXsoOsQy3y8Sd6Ybk;+3mVk8r z$*g~9{NX>tUv@ATWDwu<-`Rox8Rq|9|NGDV$L`;*E&k77|2-a*YIe^5;cH|6pTY4i zJZAqhv|K*?pW%xvH10tuWiOJ=Rme7j+>C735>(wE7x2ygosY2F^Y8W*{~6Bzy$((& z`TvCV`TsLqDEEa#_p!{uf6q#HapmtAAQ#zn;cCapsH%eeG+D z<3G%*sQ=fzEB_yx`#)3F{oN&&H;+`Eo39v>uBbQR0fVP)6{CYJ|F>t0|1&spgZ=OM zclV0_3>TbUgH!HxlbZC0{~3Pqz1;spQvN@KsQVwaI{E($3*OEC&(K)-pP`qbWdH45 z{eM~Xz#*^x?b%YeQ8Cy5sLg{b24~XxL#Z(1#N~fnVE6vJb7eiuvWp1L_8-@c|1)q- zZ{PpIa@l`|2Ql@J-T$b?fed>FwfSQRDl^2$qVW)6oUe9 z$A1P;;Mp?&Yo3+=kM;OJ6UNI3#cx5!zrc)n`_J^ue}+S;NLoSJ%x4S#FTTwEKfsRm zDZT6cuSq=q```V?CjTxE`p>YS_4?oc4EZ0|W&Akp@SkA;%#FSBA7*68f04Od|3MF0 zs(`iDBWdlu57Cz=;z#pYOl>SN6l)j>Wa0sDV3R0m!YOh=jTo zlu?^OZl4N|9ihMM|C+1H!O5roQ2c`b3>WhK{xci^B`#3({0;us6h0pmvHfom?48p- z);0fUaNKJRjSrBM_anTj|Lxz({|t`W5U>7Gky`$jWgSuw#lwT>KSRrYC*M2<`sQu>b8p)3fs8FWmnnM?u^>|HJf*_%BkBz+S+2|M0H8U(apM7rB1j z*yZ-7NnU4`KUH67dinnAFsazHbNAl86ZZL@=hNsVz0JzW7V-;zO|Y84Kd(anNR{t@ zhR&P^+pg-YQ?LD<)b&uZPq6a30G(FFa%y;kYTWz z#~$P#l)*gz$Jzf3%FhqV|CZ6S|8j8k^~K@S{{+vwmM>YuFE*XKn5RyEp6zS%Srs4t zt-so-GADUbaZttUZyg`v56?||>~p=pBVitALgTWnAJ(lb&$;9i;L>Shc3yb1Nk@aW z@Xl?2-)S&k)mBD+_Zn5EH%2|Xl3MVhiJC-J-#)QSbDD{w z^Qt43w_n~!f04mdt-W>c_Q#z*@FYpOC$yKg-Gk^I>DY_Qjdmh60~pT;kj3f@1%C3*CO_et?W z3ju}&=`*&T?~^m#^6}gHWk=WiEDQV3Ahu;b&r$K43DzYqx7<*h#xv#QUFFUB>J{gh z85<+-8U5{8^uhm-ujD`JS8>W${uDp>sq@vB>3Gmb)yp@$%ky4EvhKO`Q}E}?=k6Ds zelDM29JYQRPsQtykJA>ek>}gHMSQ;KmU@wvNuTa)%KVr)!P%r;NG`~w^p;)nb}bHt z*$Q8z&)@of@LN>xk>15WCi!zly=r&c@o`I4)m$m*h5D01m)?JtBQ#&B9-}wn2dbjKn{rdInmN*-$B}_L>PQH(NxTHF%=0C%hRn}?;mU1`T{usL@ zPX5Q{56wsFg}=OK`C)bW$hCh4*HW&R^b6d{DmctGv(R>r!HMk~c5}umXO#D=Se9DX z1z%uZ^YQxI@C6lW?~llZx9*926svrA;+kFeb}xMS@pS5P&CWE{n@`;)=lHF==XG+6 zTtUSeU;A$pclCd3`naxl<;}RLKUNoQ)Kk~%?Rc4^o8%sv#9UA8Ii#dJHB4c-~05y=7X619~D4sz5^ zYN_3syMtXW^RfP|A3wT&KV0?0*!QE_)WsFu$IPTsix8W`B79&Cf@<^~dVO>V@y-zv zH-10WCAW>u_9nCG3;C`4oBnt{wr8#I`4NBE_tERS(noLkZMT~+-R#@5NB8dK8-?;( zFG^|b5x%$MJzs_Lv3a(jRZafVaW(qee|RqW5q3X7e7kU#S@7LghaT~3>VCUryrNTD zXtHwclNg4|1^gcE-==)Hw)jDR`#qIQC2!-S>?{k#CN8<@%75FtyE1)ZO8C@3o2{)^ z&wXNMFe;w)d42Q!ty_uG-p}}Z~9QA-Mjto?0-u2hZsLx&9~8B#q0m@-r6oIP=8zZ<}_z|2`IPd^#gE zHIirI?%tipJe2m;w|}en$TE-Rqwd}BhgO(AUi+iz+^hG8>r_6*9$UQDJ2ju#ZL;1% z)q{Q7N6Sv-Owj1bxj%{H3x8u2`;q;v|HMAZOt+K0_Ep#HbM)3&?nArocPj3?VlrE3 z?Z~W*#!=o&jOJ%lF6;fLhmd(2U>rJho zYt8%r4BYWYK6edYfdTGu~X z4_dLjC;pGG{D-Z7LLm$1yZO+06Fa*opT%WC`TQ0+?0uKN0)q2oR4wZ-b<( zXXavfmub1K^|0>TzdyAr<=y`?ptbTA@A$aA{q2Iv_^6Fq%_{CzyX&}?O#Z?3b5d8# zn-~AiwtqH$p?$vlpX9cNld^ZV=W+bKVqlT{pP`mjermJ$Vr=r{|E}tr!@?=^Z=eeq|3FQ~(#U;8K^rW*UFJ0879ejjK zV^ISJ?t1=i`R~_%UPVC}FPNgRDI-x#W`0e0)AhaT_eCFUVK^K7&v`#v{hO*?{~4~7 zhpwJ4SyQn+y#HZ$KhuxZA=@`x*>cynR95M|N1x`BZ9AL3nKC{4X36xvGjO}xlHS|? zZmX)6|7ow|`Ok3UYw3T674KHZ+5h%f6Ic^^<*TO&KX=T%=vULe3A~J&-lF?{`E}7P zGuJ7U?JO}eoc8Gcn+#>;^l6^w85nAvKeW5{JAJ4s1-0I%O70iWC^Zqks-kpqHI#pU@U*p36499oZ@B7d2OLC1b^Wk0LEnhlJ)(ZP8 z>y*igoY|AZz)-4wY17(0_FO-Xol0J|V0)~>p_l_^Z1?u-74|%~sY+|GGG4*F;`VLD z)=L?23HJ)_IcTw6`KS7GXWpHvi@9rFexK|wy7%jrPq+Uwtn+vlmhqqA9>d2U*SQ~_ zRSp!W$TMd7cIo(nnCO`*z0*%l*kT>Jo+sC)cKOS1*9&@U^K(kRH+ep3 zIks`zRaTKN(+ed}oOo=T!Mw^}Ff%CfblI-Cv!jkSsyuqgrRJclJBO#q-fH&mpc4_b zyWdWz?aq6@<>%_BkFS>%c0JPFGSlQf-w&lqlbb2qH*}fp-qOW%NqL4@yJ6#`(9RRb z+=}}&mwQd}We%A6<@XESD|Klw zYaf;qzVJtQiR+Rd{~2ude|-4QRA))>qILBtck9pV z|EZ|&mA`2o|M~QP208i19;sGZ&%gOz|MTfT!}0bX7ZI9F82!3-B5@Jg8E_fqAbIU? z+JA+&-{+>ga0$Ytw+^g_^WNm-3Dt;;W8+cInS~y&w1gGboqXe|<6km+K*8s=+Cuk#T)wy{~&*ruWRQ22-;`#pJ9P^{R1EQKd}jy^OIk$2wmJK2+;x7!w=etW*1mX@vF`Rik?p;DmS&*VSD0>%Ff2WHs+*roHEef!csuiv{1yO!NYvk=XW^c%K&^`FOAS{&Hov?7o*tGZFlhP%X+KdMEEK48-Kp{&+GU4)Vkj8f9PFt`Cmi(e+H(6 z{|t{_pxLqL{D$hP_;dTd6q>&O&%j>)#H0D&ZqNS=3&UssXE-$VTe!dEE>40TE=3h(^k8Pv+-BuR=O7K&-k(UKZDb? z`TrR}gG2p+-}<}kzvyQEXZZfr{>RtsGZp_1{%0_$`%)ef&W$Q3kFNEM+z?b6Dti8| z|M%-ZuZw=0kL+_#(eFnrPR)ML6?Im!luKbxpYm?olV$%xiiKW8zt!I>yH)gDea4T= z|5_&hXK0Sd_;>N+^1qzw{~6deT;3^n@_T&Me+J{li*}xWD=$@lVN(5vY+m_Wb|(KB z7OMPbcyLWhZ&b|ygpa=De})T_?*A}t+gc+0h}X%pZRx}R3~#0caGBpuo?ZFQV6VoG zxt6Nc>-W~rtin;$8Y|5eE&i?c`d?-Jc)ID*&zX7>7@PyaLY^M72xwetJX3f)?^TeU}+tB-GW58e@eehPZSYu)4{ z{~2T#eE7HVQMHAQZT%|Oh5o6_OtyVGvh01xZi5**kHQnrot_c9Bx%LMUvtbX_oBAj z)@dKVbZEZRpY+w4R%MG<9(=em_iJp{sryG})oxu9p6nOe^)DmU*~ln*D#J9v?UQ=9 zOgX{uOa0;Z-rRqR6+yS7w{O{b`SfYqHQ&OkYhGt}s8y(Uew!8)6tN^`rNY}2DpIdY z;7usgqkoJ(?1|&~kh-j&ckbCq(Z0nKD@*j!=gxZMp53V;U->M}*nHuH9-gG#zWo1e zZ~y)NpW(t;hFEajJaKx#F^}&e=hgqsvVZO%6d3r^{>#Px3|2Pte`Wki*P(uQB_oa@_zi_40p))GUU!d{N!qtW%=gj->M) zk^jrJBwpaeE6K#hDLqZ*3I7=$U;WhocIt2G8~?79|6|@4nY?lD#jh&e0rRcnat&`A zyDk6yV1?#9w&!2hhyA$zAbW281NH72<%hFb&CR7`Zk5eVW?rMkKXsEyn2HwT0hS4@ z3{~g8-vXym-8w`UdcNY-vz4uoUi#wFy8Iox%<5NHLNQtW`Up+U+%Ku_}mvuPaB`ykjEuh#Q25%k4kX;`91QJ>pzCnA96MS^ZVdW z^ZyK@@d`&j#2-znj#&6%`HelE`K4=o*ZesCw(R-us#_I$m!xYtZJyux9Bp=UYLLD% zmq+M%=ZE}F=O46flK;?juzrc}gW1`<$zI#CZ_8hso$dX5MO~L3x%8$)&g4F^U{Ol+>F+a&L`q9|%{%w0MEB0nPKAx>P`1n0vw)j5V3%N;q`#nDH?=Zbv_U)$2 zsZTef)%r`5(+gWvIzDWB=bt{Oe@|h|znvfSk5sL1)6XrjZ&yDo*6y--bhu9|UsR}kNcg*)X+mZsL*zk3qEerx`RiD&;a*nI$Jg~0y|oGSM}zlS8v?+@hL zUM|VDT66i2t*`mv@>_d^GiNc^_N<$5I$b3=UgA=)OkHBK+zoHT`#aX3`|&B7|K{yS z`rqbG-TQRKWY4sCrn~I>dYgYlaVpKyo)&xOq|$L4)yl$eYbH2YF#N9mC*-;R`91!V z`+uxDX)ly>{V4atOCRcUOlR&FyBD+PfQ(}8&D-BQB+k`tjMrLzuwL}%whIRz+Hb6w z#634DH~E-+_V)h_onQBSh+Sg#Q>gO%&S%?lH)}14a}$}cdd}PAZ)w-Ry!fN4-d@5T zp;OY&6aAkdyP`IZI;*sJzQNp{SMw56?kr(`%m2eu{XfI}3UC3j z+CK6}YyJY&#qk{qzhlpo6)?*5rOa|}4G@|9%JT(-$^=H!Py6P72tTU(aq1q&`sLaB zMgNpz{dm))*J^23*IH+2XML*lul}^cV20qHWv}--%S3z#KcMUH^rQ0OT7`-ea`=Jwh+52;&sW3Tr8-Eh9hTzIQv4|8J!Dx3c^oZq`2R8Gi$m~{R> zgWb3P43q0WhJMWNtWTTkci#I$@7n2Ydh51y=ku&xdu;Ne`jqze2t}LQD|_GMwJu={ zx#Qxq;P}KHrcV3$zV;>`n`e1(%^zdE<&RcZ?y={wGn)5{Gu~^Owso-1TJ0H6bz9T! zip1|#&u$T6{Cd3YpTWy{dfI;{cW=qxl9a#kj!I-(+4iaB8@P9P^q4(Y;XNSwY#09z z&*lFa=7~X_wPiljpORmpS2UMaR1!c!?)6V51Y z-@dQiXP)+M?$y;Z@|IRy(pb{u8W}j_teCmRiHXtg=3H;e_|L#)Mea;d$4}1UZaN-Kx6B|BHJzdiBU{lBOTkMwi#M50r@ORpTe)M~O#Oiwc(hFa> z|GL>aMRULIu1r0;bpAx9GaEf#?tD7;sdQw6Sm%zWgBi^C|JZ+2x_&gu#cJ7?8A!xai(M`%S#x7O(P8Ef(>V z-4b|LWNwQ04poU6m7dQXl@t4HZW=Itb#Jp%yH#=8VdJ$gW!v{$)=&PN`~1^W-`8`} zvjq+>FVGhFcR@~8PX9kcOZwZw`mDc;-_(D6Ui>Yw@afO}>A(GK=3A$qeVTv2uQ1-K zM)ZgABmed)*{92UY*cEB#TGAeo2@qO;Y zzmINw6)q>=fB4g#%;m>UKe~GT(yKcz*Ipalt(4|zinlni&qLZQSFyWq3gi3m`+{3~ zvk(7M5m&nR*lUCE%~vVz23%q`rxr9%JG70{=kvt52O768t#O+ibLq`jZRLF1sZTTu zS@uP*`*GJ<^PoaclZCSBC-bg4jvq~r-M0O^_95%XqwM=Ucb#&DUfpz?x-RCd>d~X8 zW-bX>crRg5#CDG+!3O8Bko^ocspg%zdih`e#=U!(crn~(-{tovD;8b6#9bluNPSku zl3g90$+P^wwf)t>-ygh_y1V+lldY4 z!Cv=6X1~_V+TXOb`gUZA#+&tDs(;UwIJ)X?W!I4lC{WFX4z)3 z+4jeF>xzD<&P_h9{D`Hh`Sr%w+q0KkZZuk>FZQ2dOUI)gt%T!G{xlw*rXaw-%l@I+ zzW)sK!u~USKMk&Sb8G)I*sT4}@P68VhDWO(gIIAOE#2 zx6fu}cQ9oK#X4NPt@!EglGJb|r?k*d-`@HjnDuQgyYuy}?>lZi@2qkE@amPU)nnfe zZ(Eo4zP;M>Y3b3mZ{KW+nXuVNqv4LLhGMtC%z*f(408Juf4gPO+TJaHxa|7IeJj+a zT`f*6*!JnXfUb_=9Ra0DA&lK_eUW8m4!3T+sefpJXeNRFUc{rOn*nEHM{bBtvdBLsv0(!j< zU9WoosEE$kHHYo&vhDlMJlFXi%$&PrMuhF2)Z2TyH<~rnn&*F6L(zp z;<*2QU%Nxr-P$K}^i*d|;n^-+Dc0o>es+^ruxd!d0@J_75B7H{eC%youHSO#{+&BN zvi^OK{JfUCIa}=XjwkuXXUch)3nZgu3~d6~Z~SNYF#G<0hV#<-zn|{^vC96T#s2>c z^UCgn5?1JkulEo1OZtEK_1a_OhxTvjotF<;9hq3HAPo93a>iV zzIdI7s>c(#76CU$wmsaAAJn^!zd8SCJ=a{j+m6@Qtuf7x)w_2^#V&k~>Thc!ca@1p zeinGSJr%dtyZEVJ=cC?K_9N>hUl;zUK4|}8LQLEr^Gmv4cDK#qE?#)Y^w_eEyMOkr z7j}C*t;1yF3=i(f4)U2dYP@F$&iba27;r84@4R@kTM>=5UH=&rbJ%07{+ynA&aG$( zR~7rQ=zO6cN!O1=)gE24OZSX?X5TxHd#Z-X)*VG$J z2F~~G^TYPc{4(F_y1CKG{sZq?=c_sN@EyVvg0 zqo?nld#3bGl6cJXY3_y7-$j~MM5I=(wdH>NZTcgZOVWOOw$3e0*^zB~?Xz2of!WE$ ziW@h%nh5#+QdLQtgVV)Tz{hbfaUkw;ZX)rHCQUIz-{C`AP_5b;}^*_Tt>HiE}P=_YG0&DsK?((53Na|vE`Jdq-l8VXy z#KabZf}{R@8z^(lT-0Ed|05y_$=DT7KVJQNA~YknA&k#p?Uf??uAQrDYW*+#XV3zL zOCpMORsDZHz}=I$>rw!F;C}{g{yJp^lF+zn=gVp>W5xb@%j9&atOmFFb1{+ro;?gN&k4&Y>$=4^^UzP%<543OB@*rgMgqUU>Vq zR>o^hZz8H|_H--r5EZCm-DLGgaG{Wsg#`d8Mgt{p$3-#(w~ zkK-d>&pqBNqiJu~)-gBx#Hl=$<3wuW1V@=BZGY$g4E<&Q8EXGCd@*(q&sfyO zps`|IdUSN9T7uW2X$M}_uG(;=N6oQZqG1vHsf`7W7GGqZ-`>j*w`isRs-v8)0q5SF zFq@j#dxqVnpz<95Qjb#U(oDu8n+<19p+mbk#DzPTIspi=O!7 z6YKlO)Z*fnsn<&#iu0G7E&G)*w?RE~r)ygObp@Hnvb?2N{a*jGU$g)9!v72xq#Eq} zKa?Md?}*d;(fP=pQ~%(Kw|RTaZRbR7dH$&P@fz(yfvHl4-wK~48P7>6GMxAIotUAk}PRIh2S+xuJ<1nMg+Wiy!M zZ*D(6i~lYA!(Q)z>5kWJG*{j5=a}xf>!a+GPqnii-LszUd6I93&|#xF$xl;DPu`w& z@`v`vaMvFPS5-)z|0j4i+~Yq(%j|=nE7sTlQ8nLOJNxvmHvY|zwfEbVz5UOiXZASX zQo<^3{^S0}_$}9)zrL5snkT*c!k)^MeSA}XKm3;t875p8~ux^(NhjSHNs|1Q^U?QF=Ds=WNP zC+gmx#o49@w;#!$cBggYv8?R}?1ldrU9OX>QT{Nw!tQy*kNpdLcL!X`d3#GX+~-P= zr0dhCSLSO9?iOd+EjszE58Gn~dH0C>HTqX=BK2F>>`Biqf0WV9X%b-*ZG3NGT!e({ zv5!46PYUC*AN2S9WBxHS)8wDn&gNjTf1uubQfa=Nr*x&7r=Z^Bzb`-A+}E6?(Qc%n z>d!83_`=TQvT5lL-fyz*hjzJa`)79PNj>Y2a>qGw6*Zp5D%~sM&uZJxh;o!>i}-VV za_^ztf6YTLUCRIAo-|2g>sjM(O}|~A_MM${nD^+@E7Q&$X#BK`DddZSOaS}U&E|(Q zY+~hGQ$OYy{#fL=)5QH_%%iVu#uM3Zna{I)d1+PFuj7aM`Csk7Ics@ePRU*GxAsZD zWuNz`cI9u$asO0idcgYIN$K2U1+NO*L_a=TS@>|t-#Ip(AFe(2POHc2*qwNcyuVtuZeH2+IRbd>q@zNj1M=wlt{r)q^r);{iC){+;bx8}k zDBA=2=Vh4BX031iBd@k!t!3BR)7gc$bzi?+e8k-CsY05@rWt?U9^}^6$Vr&0Y$^It z{itrZ>jnAHD^EX)&CXq)ZO41b^?U1|+J~QOQy)!@HWD=X-EqfnQ_3czbsm$S-9GpD zTmRwrqCd(*uA21P)tcRQ{?YVLV$&6UlTIbw^7OQ)O-DXWJ+0-X8k4?ia^oz9+y5C_ zvlrD@hyG_cc=h>z23b&Diz2XYSr^08{STvF%)fWF{zGZye}>~3=$fdY<}Ao^RC9{a z?LtwLQMYUTKfZi>&_S(hP3&XC|1+?g&;L`lYnBG{wEqmPnG1it+W*6H&wtQiu!pu> z_*)wPN9JDr$6K#57Vt&?6J6H+dewi12hTqLXXpbhY`EPCz|&-~lsxm&N9X%(a@JZQYOuKv)Ti~kw+ud09W2xQ8&^=PXS zgHE}d{%7F&?EVk5+o8Gp{6A~=KiX&IfB#zkk5AwJ$J>%EL6T?DOaZwCSx2m{2qTed zb|bq6WX^jen?dHB>1WCem*l(E|L)w;+pF&C%l()<`F!wM{Xg1!rGMXoIZ6z^lH~$B z$o=o#{(W2dp8@86WTjgIh*a?$)r`4F)7}N!H{d(m;!-qmx z3M^jq>s9>^3s7R(dO6TR=KLS+z2et{|1%ssdjCIz?Bai-_qtz$ZGHcrq3^n{D?_pU zhjm}#WBva#u#4A&PNWRC(q9J(#p?b4tPyUlE%pB+eHU`}Wi;3wyzl-q{CHaeb;x~) zL-Nc2GaS#fe+Y3ZRLPeBc0+_)!Ad|m6XI6$-TxULZ@qlcLFV`$?Ox%3i|T)Xg|^fm z+HukTmHK~%X0S_aMHtVZ3c3H$u0^Pa8G$6UrT)-%Od~)-s!&rsY8V&adbs%Ch137u zyuYe9yZPX2#e>h+2j1G%1zSYPJoi6CYyP78E8hPZ*uH~9DD0Q|UzhzKz#hq7zNo>n z{|B-V$ZyCRc2#LG&j6J~3;#19o3cwD)s*@lVYlR$ErmMG{f~Ao|K*_n3En%rh$%HCWdF2-}6?j94trhlZ`$&0<0*;MveWu356nwf)t@MTaQ<(9c>J%vS6iRV zm;9&X`tW|&d|{K)X74NO^8Ygw>|*{=aL@M5{+nf+oX%_Uo=Ml#T^MvYX+{L&)dP0D zf9G#Wy=W)$GEn#3^cuZ9{ZigvvIll-?0#!LIi^y;gMnWrE`Q2$W+lS|FKq3RJ6n<8 zrcH>5ULtnkcs^5lPM|_)%$zxl$AhD!05knmEBC0N6t16pQMKHY<$bWJa4AWtP6`8H11@c*Ew_Az@<`o;+18dT8lL_#OQ~ z9Yz8e<@SOYl|zRx3QU6Rk17Y$s4-eW6Bjv445-(ihhbUAve4JbdO9p`xdu zA@iy=)~zG3sujF422&05+#mh_8B{(WmH)=I>p#Oq-SAiK7}dO^t4NSUpsNUMCmN&Z z{>c6J?N5Ykg)z1;1iDr}{JUTe(~{=(MfNYxJ^s(od&R4;{m2`a{P-0ulfE@^}_!Q$6kEf@NVB1xoff)UrY&~`=0?-@;^gc%Ln$q z?E63Yf!uKsY_jbd>%|u})?1@XqPyzW;a$HkR7aG%FA5~&WcRJ@rQa8P+wiX6WznAZ zsNQk^W1`+*|HAe^!=!TgKY9Jxe6Qa-zD>L<*Xde$A5HE@hCf67h3fwdD(~!n+?Bf~ zd;MPXt;4%)g}OfPN0pQRaA6+9e})C$|9Rf2|G3L`jrIDy>|5JQs#Hav*Q3hWqXl4f zM0tD_|7h?c1@EGrzgPbI^`F7&0a{>{%KT^8!rl<0X8Pvg!T${X*8TCno+D!0V>Gsh zT4p-^-d3o|K~kLwJ)Z{1ciofzcx-X*k{7d9?w=kDHrF|XqM^!a;ptnwzW zvwMDayU+;2OcT7KZ^TWHPywf|Q-GoFlb9=7@ zJeVLFq5a?sgP5P~kM|C;t!Vqn`26>2AI!ct`F+=iD26oD5!35)>Mlgxo&9>hz=C~O zY6^9-l_S?g>37^ZB<-PmDS6lKiU(m_r=@IsxSyQVy*~ZZs^++U)JDLo@cX9THFq7RZN7g#S^BJw$Ez1TM~fU%+!(&Q+8;7tT>9`VmpyOBv|TCT<<~FX zUKzUM%DQ>a9)9xt`J^ba-{Pb#!`i!uvDlE+RcLiP8-M?Qh9_@CzT>0+Z^h2NA6(@b&V_m%Q z&XehP8G<~&AN771xUhEGof}*3-)8o&-`;Z0`>pi8i}N4fos&Lq$D_S+T@0UI`x^UQ z-lO>7+OAfmi+ajkv7WQ7Rf_XJN&1)G?c0-{@iItc`{YAXO`W%HJO8`NcJfK@l7<6j zG8QN7JPRIM9^-f$H`|q==sw@Fwpg(z(GC-CFTK1EA(okYJEMQp+31f z8%jbLhN1oJ2R zxAWV7Jk4G4gYWr+_nq%dR+P$mn&bvu)yaJ)ByYM|_O_&QT1bpVUEy~FtLtn2WL_^_ z-@cz=PvokXRg1UnUy@Pm&9=OdC-1CHVWGB~nNUPZPx8DO%L^mE#lKx`|11ALLk<5w zt^T*GK^GFdZv{6ufsgFn7k)hY9lZ6!yY&xWBsM)ebkg-o`E`M#g)&DK zg14|IwVD}C{%qXC_1uKNPhMcFyiok#^+%*^*IwGUWqX{&=OfpvO2R(gk&u7$I!R7Q z%7TBk?ct*e#aq%`74yFL9sgYSQa_l={ABpd{ zCExtVarJz;d$ImY|5$GQxM=aqZHg&-lpf9#?l985eUdr<*TH!zqHq5*i2YNrKYU*B zkKN8aR~KBgF|1zgpSk*8TxDk7ik!SSL6voe62>YHGVA=r)yp^wHompLJq>hk1JvrB z=`Zd7e%Svfv_^Mr-9jr!bimBi|elWGPm++_dlVEFOfE;H92chfw=L{ zhwkXOD6O{2vpy%wD$XtMR-bpP{&M~AZ~Om*%HQ5r|KwCUbbCl*fy^pzSa#rtFbjieL{eGA9uPV7IY!cfeUT-Aeb+YHd zoR9Mb_Nl$HQ~1EPI643E!^gaOvC6(zzUqeDy#3gDztqwR5ncy6dry5fpQ!gz{LP>3 zXI&2+zqkMPwRossC;zTH4YnPW?!wRhWBj=E+V(ZCcIE6Bj1Ru3`?W&1q+USl#=EOa zt1acC6;29t#q0DNmw2uHl&kWlpk~_P$M28abq?Ni>DGs%rnmJQ&b&+%Oa7>0#Do!ZBL;g zt3A)UUBBLKH(fVxy7)Tb(j|%|Kc`+ZY!he|HkjTioiAR>-+NDU+jSdPo_~g2_m7(S zF6=cvxFOP|bH&NR{JigmtIi7SOx0DH%eBS%Sq&d!!NzaX|EP$?O_%@vwjQ*yMsd@Z z9ngpfKWHcaaoP5Vyn2;if?4-`yp?t4`>|7&OE(^JHbxhC`oDZ;;Qr4b5cMkT{*8xrM>8Tl-uFapZ`vpJ zvdV7eK}X|Bq%D;9mWui`A+$y&uW|Ksf==JtYr zbhgK-bb0ee|5ngzUD{o5!o50U=FPXuI@71=ewzN!gaT^>Wt|*+m~f7PDqR7aO>!uxMre| z%#K9ex96s2eq?)8`DAFiY^~3Mc%hcHifAQ^K z@oM^Xs~REK`dr;L{Yza=T%2*!%Tw(SU(}H{*4V~d@?tfPY#Z0t9}PMEs9XE@?p*VS zPpV3c{%*TxomO3GutHD7!|CMn_n)R)n>}l=I{{i^cluw&o%;LQpdtw3f{$<)bo~?f z6Y){J^X=F0kL*Ws6-_oSbgwO684>GTsMlX~N#oYjEk6>Q6;AL7&rD@?50JS0wZHq; z*ZoJdqcv;tOwPvnOCMddz1i+`&dp1wAFBUcZ2a#l)5+)gH$T5&Fym#YU5sYm;(x-~ z`ad7T0wkXCrMzrS=Bhs?;`ih}E}5tQG4*)M*VV6IbL8YkeyLuekhW!lyNttXlZ62) zyIXc|*EpcGH{9{Z`Q<-eKQh~yyX?oiYx>+#nfqj?-TQRnwoOpnow$2TracOG<4F^| z?Gz%j(!#gTa{>R#{|pb!HvMOqXDIu}8l5p61mOeW4ncD{uAcPL@85Z#TC()~c+u zYw7FR8+8s!h^F;KNAo!znAz^G&`=wj`(ah#`mGOv~QTaSVir+KlTNtoof=5-v-CrS+in^+SR0qixU_=CQts{b^1r}sSBS{yY9ia7NUYTl?En-f5Nf$|0-f2=`N`LQ10^j^W2c!_d);Wzju%7+b`WZ_11LR z%r#lFVq4A?#httNt0MPU_1ivE-@D(^_g1<3PFPd6|HJIe`=1Xz{`WH;w8zGG%ln@X zVaX>Z)a2`;6FPnO{|V<>BkcOma0qnk3COmG^*>fUT(RhcPPzSu*)RVyfD8gPcqac7 zo@@QD_FX-s#oNB9TVdm!`a{Lb>(B2!4{i&}f0&bb|MUA|Xx5FfHS1Q`WWD}}pUL~5 z-*-YY@Zx{My%1xjf|@d|i$H4T|L`lh|MMYCr~4mutNcG7U|QBmb?G!6FaFQKd2aT9 z1{=Nl_aLpH9aFFU{xi%Im;ZhUR6QTO=sF?dz5PS0%ln_V9*4SUE+iyC9@t@}v58xG z_kV_#&q&%r>JL>yf*I^_4N%~`|0jGF5;&j+52!M@hh*{UCeYfrP295Y|ESM`I#c{V zLl$%om#*_cF9%_F4YVZ7K#U^g$eW1mA%aEdP{>%Lkm+Jp# z5RI$5o2jvhTdw&(12;3&Tc98UE%8hL^E0Ix23n;z{Xc`<_jqta>GLeG zQJ9L%reaZ)oEZWG6%0>4vP4bWTIPpsH+dHMYEW>Md;r6sXw>!DV z4CN(m${Aih-7jBQSnu>vcU$8l%d=;K=Ud&fVet`cn{3JBC|ebrJ@2ahlVty2-{gM@ z2e2;(t<1Y@|NP#2aQdD6Pb_Tue+C=SIf^ZyEPwXp7RHzLA2xm2|MPJIi>trS38|{N zjlDoR>Ond}pY>jw)L?5SFB9=vPv=qF!_>(OCx+0*_usGo%X5MCmd2t6!A50<2fr?xdH!dxwf}f)QWryi z%KXolL6gi-dZoq!j&D%u!K|!Eo9A8b{Wi@jkU^M;d&*mP?(O2pdERyF>5`wS^MtkZ zL_1Hl1y!8*efZ`fYRyL*voG72&(0^FZ$xwS|d9u=~m_M!?Heby% zxmi=VWa-;|d)eDkyZ;Hk-n>0rZ5_U9@dl%I#AzCJPm&M6mKo96EOMmY?>+SdO& zeXR9A!?Nlu4!ceN8Cs%`{Ac(gcd7nEp#Fabm5cuwIJS5GXLw-?x_50+{6Ek26Sv9> z|Jam&b8+0?h4v5aKZ?kiY`pSrUBZRQ0f|f8WvsfR^=4diev*5lk$d8^_q7LialLC@ zw)$g!8`!ixvK4RY{_qYPVNRt=KxRkvuY{LCJk$U}RFBVLlt6Wrf zxyRU5f!Xq>Z_W0P_m4|m@5tGw^JDg*s_#d1EC1+bE4D{pFs&%@=YO%;F=etzn%ex* z&v|yq&ny{*Sb_{^`Boo^>Tmv2{IUAcvAKmS{U5E?Z~5i6n@4v=jO~n_6ZIR@w&_3J zlDWa6XWdon8w|0XVa%`Z|8TYO2A$yyy4F+ZKf@>AgddyUy*}u<vqk-SBAic8c*<*-HtphDyuQZb@9cf5ul5UkeOn&&F}(YW z^k1(--vf+ZUh=m4=@a(t_>+c1MT<}0p6brK_y>-1)@p3|0 zzuUGQ^L?+DmUo{k2)lJpf2+^M)mt(@xh?cNa{g{i<{y(EhZokwUVFDCpQqxz-;Qg! z3x0>pD*eyU`u@xB`RZl&4U=|GGfUxd&{@#naXv7lKKI8~%RQnWp4kdLx^_9Dz9Wk1 zrhm1tH%qbf{E16kQ*{<@@M>e}>FL{g!hhM1=Wc&j)kz=!7#?HEGd z(zW@HAJckQz6|G8axE3>H*hyRadDq5qX^UKveZIxbUWNWUZ z7b-hXh~DuvzM)(Gh(1rn@~K$zxXrbe`rkq&oC)_>!PVm z&qe<;aP)W9!^3>>KS6n+{|pzZz5X*CblLxBa_m}>2)#o44;>Yg|25wVhh|axhXNnf z|FUm^nr(VJ(6#Bg@P7u5_D*}aS!?#1KlEF=BwOm(%FI|INwUXI(tD zapA#TiUnPZpE57=x4FI)roA3X`{I8B{~3hpk({y>;*@rfQ+|d2<5B^Y7XCk+_jrPG zCp?tPz62)hJW_v1_K`oryP5S5`9Mwv1&8Nri|7SQDGXKxyn9d(8>mN<|$o$G|(<{!R{2oBB^}Z~kpoeq-P4 zSuZAE-)Xw?KLf}9j{giVEI}vxuk!!rxo%$+)?02$tCY*XWM-ClJb$hJAEkZDpkueE|7U2LY5!Ag*Y3K__5124tju8U2iHZ= zAPEHp$>o#}(>qdoCvH7!!p0)BD`r(alU|YCqTN~=%ncX*$!5)yy0~V`TH}yQxhB6Y zrg13vY6S_3B=F3hGk>MZyydg@{%7D=-?1ObbH{e-|A_i03OSze^nZpUmv*^LTyFVC zbCdlK*BV#QY25n%8CZkrKl#dk=&ERkMB5d|ewllnvIkMw`>WzPSh1xg=T@u*(82s&x; z(0=fVj+OO4X6fy2-?;M6`gbYuA3A?@|7)I={*Mdn6MIy1P4)li)^kB`r+@mN;ZgS7 zkMhTEXIsvamAk2bWr23FvXR#Ig-#3RpDA5>rn^sU&D|;SPxAgVpgJeh{-GeUb4?HY zXE^lf`u^wF{r)rTFRK3tntzwx<7lGS$=kzon91W&(&=|bN9qzTJ$HBiR$H%pVgEy+ zkNhb9RemIo@Nf3X=RX!d>h6eBzZ#?6yX3e?#{FhimRiR()^DqWuPO@0Wto>3vfUA? zym>`#=0V#^`?>yBW_{7-A3f!EuA2EJP-D->ws!8L4Ig>U+$N+;{A4|u_jvo>l>ZEe z0(1W}IQPx`&(Nej|DSDKxT{`S$#LD~`Bgq&yYmHHD~m-0EB5LW_)igZvOmblEA4-( z?e@}O?)uNrVt>RQeA@vyoq-y17$O>&gn*TN5f+nD@w;2og)c*{)^n@`VU|?1+8z{dV(+ITFFmlrUtdyN)P)P{xht% zWcc>}mG;&Z>*`+B@7sT_!QwsC1#itEnbB4Kk76CF=~Wuc9se0x^pD7Yk%gHa_@99z zzQg{7HRv9KRsR1x*4^u3cvSz83+(0lkZdGUe@OHZKlqvia5mE3wy43P{)f|6=-mlm zL5=?mE$Z=kYUOghPC}?Xh9Y2|KVhcV%W|_4Ho%7 z9KV7Ep}8Jpm^|1pXyhyHo~6MIG7MF;|A!-rkF=qNZGHcX@8x|+2?8=pIu7K^xv)r^ zrNP|#pP@wAnMeEG-gKPmrc|99}Z{4dSbhyQU^%sz6>YED(?+?7|h{0um!t2WQV!}msd z>ylLVIZquA9(;Ln{lfe=30HOWa^3~(-FhUKXB8i(8;jt?%T3v{Kh`%%M{c{c^kT}k zOIu?VuiQATpe=O#K*sD0^-Q_r%d96m$OcUc5nAdgvr=K=+vB3$r3>R<$EgIS*5;=^ zy{-43VN2AO-fMoc1nY}`9DJos%ZZYhpqR2@n=FqTYJx< z1`B&sF;EO}$NpzPHEPO-?=7Xd`VrfA?Yz2X-2&g+i@Nq+nN%MvO3{ju8 zzAyRxXwUS20=m`zYV1Fm)c*-(D4vCWqUzf??MHK!-n)L(KUkr!^eHu8`p%BngFF5+ zv{Z*mKA7(u7yaAuN1ko)qh)JuT@jBH`tYWcv0n6L`1V~>&n!RX;C|!5j=;2ViF+4( zT6mB29%qn`L*8dsdHldx`NLKGhvqUJKV+w}<70jMVo%>~TT54Pzs%e2_2%g^8AHC< z0*2e$^Vsf$FNTjGWH7xI33QN@GuXT1a*g)tySGk1i8-9DGV$~lvtwKy7B(`~;2TBS z7M8B~k#)^dOp>S7G(bdtPG#qYlmhOfiYE>p;yboG*69#$(6MUkC7*MbJIi!VGS`$) zR-X7H)bZh5gV@tWx+4C%hwfUp*_$jhW3&0sP_8U%sV-@`fUiovUE5aI7B6^deB}L7|Jq_E^C>~$y)s`;YsNg5?c?|P zx;kth$Hg`O8Lr;iDZh&`Wyiz=f*Uig^Bld zb_;o0wrN9R-vK#xSqA|()AKIpT+i>t(rC?H8 z=_Kn;p_uLW)f^7aF0Yw9&p0shht;Xu5xcLX+%?%0qkAOqeW8qt(?tHsW!Eg6oCPQD zmR-R-`_iBMHM&dZzP-Ce_`ogSrEd?Z+%A)pJ#6)7rb@%|Oa&gndCxDbmXhblH2c<{ zGwtjX6YiYQWo3UaZ8lnbdv(D)kD99t+b&()w2d=x0w2c-OP1!VS5YoJU^w@2d*c@Uj(vhxYqTHf+M4Pz zhiuP|-mRAAsFJ=RMRCfPgFze$0*<2RTtA2(*cc<-xM=se($)Ebk}3Okl;t|zV`-8+ zX(ThX&m?yF)11^sm04f7v`*M;d;BfoM`HQo@Q*S3L|VU=?~_`*{#LqJe}$#EZsxN} zUNZ-~#Qw6%bCoOir2jUnG5gW{Xl0G-`idm&w_$}scQ@Vj7coDi|265ENNL{#JqEd~ zeGJS?t5QW6pWM^=JJn9C;(p}Sue#ZX?q^SrNch#e>6Lh3RGfuFlV|Rv(AFo6;d}ub zZt)c{uANcGRKdqDm}&Lsh2mCQV>daurbxZ#rIY=+PASafo-)^={NNPJpPlD?t5`)C zpVcw`ooy#mV|e}HwLR7k&u_{K`>=QKlI`Nx&An5FAD5J!H0j&+s(>+~sfVXz&FlD^ z#t&_$cj?+V=08+v588aiZFPq7zMTS5v79!CRSYWkg}7|j5oE}J#J0Dri{Xi#MvdBs z?Z@JIk1qJJ+i!Z%Wu;wRzbqc^nRoG0#Yw{xo19)eSZOJd^?aXP-C23QeWDdjzV(M^ zu}pk@jBT5*ckq>UUnL*)%$ef8bDEp0x+Ird_4O5j4sy;P%pcnAe5fz@*kT&Yx{MNsIs5AQ&jwFO91-}^*3ifT(l81Z`uD)ymL$D`nHvd*(-0o zT+i-3xg_QDp9gF#^U@wzoU>Rh75?V^hyM&NANI_X-+bXox8Llb_VBrV3roJ6ix*m% z6rC=(B$=vkteNkLz(Nn@t}j)%4-tl@XeutRb}p>W&yYU9qf2&0O1|NthwfpPZ;ULz zFDTo0dqS=yXABc;Czq_J2Beng6Xi z{%7^GtY6{}Lnv@BbO*d*A=V4C*Sa zllK0=^694A_o{cB3kwdHOyUiCWDxQ7(CP;E4Wh?0{;7Z1w)10l^pDc)N4lANT$kK( zEjN;ld!F3(`^56IPbWy6=KqypILC~^b~UmU(;X~7svPIp|Kj3@Qz8Ac{1$xDKJzE~ zVfTBU7u&7&&0hOzWoc4x!OFrFzBWOul3FL;II=KlUoJsyMXZJ<4p?!>c=G#E-+<-! zMNJth(~srcGqw&_$3nz|6ch_%R(9=6FQr(8}OFz0Pd+NXSeR$rv*v|gyJCpY^_kWb!+|pUT zuwrkH;^u8)9h;_X31j`XVVd&}V?z}Nx2p^Im)6)nJga`F%5~q=g)hsZ%%KDqJo z+e;kEcaF}^Qs6Q=bL2shr|s)v>XYvKEyBE+V1hihw&-r&|_rG(W*cLw7e{^-7H6){k`ui8Zc*=>oPWnT+ib0_JwF|C=WXw${W@pve0{;b?fR@A z=?{IrZM!d;Q(C!T((z+fd%hRePMxu*?9+|tXn~&r%+DnG*xa~2&uL(Meoy2_^W%8d zS8+VMKgM_3Kh)UtF}_{jv~sQ8Vb%?o*DYw1?ww$gCAmmbrMr`RTcF=NkCkH z_gwj7G|y_+Mb%4x^cTjwnC9*_DQ$Xyu}YxnkwY$loPw_Xy57B)4DU9u+CSWW@IQl8 zrq%g|3%kFiZCA!^nxw~tG*C7efu*o`4o-lSXJg<}c8mR`vZUmX$U1qOfB4UEBe74{RAt-F9a}R}LWPVqwujUk&M^Dr5W^MA|0uur zS^uGbwmUuw%s%|J{O5c8zklt2>^@ig&-dnk|JMJweo}tx`nTSX z`fsie`p}eV&tL!0OP8fugMmx+TZjhcj3iSUaj*1^KQpvc6k3x?ow~{-So4kSN+_Y00z6T zng@Xw_h)5pz5Tm0*G72N>4%?6S+-qe-TP{#*-GR5nYS(`cswyMTCMWQES52F+N~I! zTHoi+{~7EBFsxONt-JhB>GBp^_s~5bMCXKElM`z`X54c9^rjo0&%_+;&KMUgQn$8y z_@7~3)86umYo$CFk|Cv;N_|I_ui2V1T^~d&wT>a0Gl|9$k`|?)hhY@SPobEJbnI3S( zR8#k@z^!{HbDwrM=3Uh3Q!+{_Yh_4a-X_ugPx0@vqz})JNPO|!((T=={5!N{X8u}J z^?A?4Gq!iiNj%7ySp4a2hh*~wgw26F|E|*CqO0w9ZR>vq*1zq} zSN9l~KB~2f)(w6eZsaN))zaiqGnG^O?IU2;1zWkUbtqIDipcDz4MKhhWTW}W=Q?bz|q zD>k<59a~NpuQfid6J!u9@~L)?#N2ZIyOAPK9(;5?bo`=SiT&q~?0^5oe_V`kf$68~ z-TO@ADxx3pT%TSZ?tDmW`**40=v!+niZhqoxgT@N+wI)X+=;p2KDOIq#KTYj==}Jf z;pi{>)|7@HWwOjirf(CgIyPZjRae}aZ_oZ+;#@f8B$rOmqj#nctp0tNTC-N1TR!F| zB7FO;gi$bbF}_W1f4{cokqS=;8Xy;Biou}6PRO#IE4 z;wx9??VOdJk(Vl0A{QX2(w%8`EgTZYGClXVT<s{I&dtZ38 z|FeSd`9iM4s#hOwk5}eabXs7c6B>LbEztGQ@tgIT&Hw5W{xg7*b?q|#pFg2KJ~OR; zOU|$PMco6zlx6shED` zANPlId*#}57xV@$zbbT#*ZFO+-P~-JNoP{-GOek85}|nNy<#47YIH-4PHpVB4*TcN z?t_E(A`<)h$#D5C|90nQFFI1g^!c^bw7yyY8E)w4<>$AU6(u-HzI~&jRHoz*IK|DZ zey!!&?DeEE?b`Lke0 z#IO3d0`=$5?gu9<1ap1w9^2n8*;20SZ<&_vysj6izj*t$-UUsT&+Qbfa+jnFrq=z@ ziZ5eo@-i-t-QQ(rQqh0#+O=u7wzqz%wk>-0Dk5onzQC4gpN~yce&2FD=ev z! zqSoZEc6hYfmMf;-$G254EaW<|j3LqB^wv3U!RLkY!VU>;FD%&j<^HYQ{|qp{Ui>z@ z{^!r_;Dl!SDgVvqkN4w$Y<{r1f78ngS-*}=Oc%Nr`zqZ2=CfBHPb!9~wr=FRP+udS zZM13CwrdIfAH|P4M@~MH9992j$;Vw4!T$vJZ+3nnU2Jq~=AN_j%IEIg^XC1{%};Os z35n6E)qmWve`}5O`iNV;`MOuHwjL2U{;ehU+{F!nlN=;fO4m*Pr}CfS^F2`7`_J(8 z_2b+9eR0~mF8*;b;Wd&?`nl!ydx6_$i_501%v{!)+H~TO&GgStrbKVKopNix9d|=W zuu1)``T+gJ}X!zFct^B|0+y5Ev|7ZBOx2E%P|1GCS#}CP=bzOF!b^S<{Z+l1j z6~*m)e18>XewgNYb)I;b(ac;agO87Mo_6H6M%U^0MKwFGeXV?$TfOOv_w3cyZ^M6k zx<1;v#pklewq*uuB}z05^*&V!K5DR=b9eE7hV#|+zd;sXUtHGzpTYL+e}?xUZrr}u zALSoUACVV0dqto^=(YduT+32v_I5#}wLcOB#d@-FUw{s+TX`wTVP-aUJz>A+<6bFsHu zft6*#l4a%r?7QC~+5EEpk?(&7d$Iov-~ZJ2*4$5hnG-! z{VrirM@<`e)~VOP#xJL)zxK>6zh1F%qJSKK<$BW)%LoJ+sJzK|bW&#g6|B{oVP$ z|JfhgsomdZ$Gm4=c)N``G7xP2E4Hg)4s4Y4a;RWIc4e zT5#f#RoC8dSU%5wlFJy8GAqny#i_uXH+~iW6AeH7{^ytN{}~SatpD*;{==$$^7ZGJ z?*GSr`ai>Q?PtI5-)ejB{lUdfZ|{dL_eELv>q=L_Dgs6VuApW3yp!YjCq{$=0gRBRIDW(tY2J9~zag)N~jO=ma%?T~-v z-#5R|{}F1>U;q5t{r?PXLjM^)p0a-!^2hc+!~E#_KN8-cAbDOZ{CM5+wLgxBeE8S6 ztuBhu1r6d>{AUyzY2i z_1c0#o6OORcC8cszDag*RzqYNe|GTC`}fW-0tJK(Bp~>w|7Yk6-v2Q44@e*z7BX_P zU8g5}+iE}e@0(ly8Ct_XeEwIv_CLdiME(B^GK>F-#veZa^BXKaH)bq4y`eg4{<(kO za_bMR{!#g#!S33Bh93qXvjhJ#aIJs%{Lk;`{|pa6evjA^n0mu@t^f1-YBO|uHSN(I z!q=sPq z(8?gMOOvy&d|7Yo`k!I`>ij<(@{iqTF8a@K{_6IBEc_qWn=YC;^JQFK_kRZau>C&- z?2q-EgH^_ZRK}%MU8uUizN~VW{-3{5{}~=QfI3!p#sAfNq7+CP7F{XYW}`;Y500v+V$KYKC%^VjVE42{fxGHu@S z!%giKVSIV7tR3#8)%Q*>p3;4K=Qj1TK9342PMojz`kVzjI`hkWdyt*G!FIaOUsQko zF4(&D`gvQXFut;@>-^7PUk33SlFGPPSB9#7g!0AbFWNtU2X)=_U=8N0Hh+cx)x8D# zQx0s%wEaJX?2qkP*TwMFA7YaKA5Qtl5IdLuW9k36UVBl4t^Zpo`9FVe)j#k61=e5w ze|2x|Kd99ARz?Q}u&?_03n3Z5C8PfH_sstcjmm$rPh~9N5Bu!~@^#sKkVX9{vYAsD zU)yI~_|NeC-R1ubOzuA}e(OaTZ7M>N?DbiHil+NbK2)A`u0nP4o9U5)UNVp0TrN9* zX+^mH<8>vq?*9&6um7dw#qel@^{74zQ?=2Z1&h0Y4Rhb%OUWx;*<}Lq7UZr{+<8F_2K(l-;e%h5Y97w zZ|`)~ax!vE&87n+m_vA<<&iz=PV`P#-tzl zJvE;Bb-vD=txHz#>{soYU~lB=zpDPkvH4%$*nd%5z@PBjt@S^H{WMUt{8(jod13we zx1jj`r?qF=YzMiW-{x}vtIq{R^2dGw3@X43==;;=>OX%F8?dx>gz-QEHeyV-828u z`AEs6V#$YHH}}kveSZ7wvmKkFk3N-{r@iBvqW0u$u^aP^j@-QC8Tf9#yQ1@h!y+x>9EgE?ds)O0k1#ZTI4Y;lXdI=6TH|lfI%O^CE9NUT}_$foUt^Rz}kx z#KL6Av?9lu6-#HdGViol@$V}CpKtyDeu*k;?V6e(A$KHg@oydb&&TY4KM7z2by~L6 zf4{K!ZNPtq^BMmc-Y;~&nfRY!p5cFn`%3+r4*zGcN&e5U&y_(wtNmZK`+tU7E<31! zBDK@xe?FG~`$=QeJW|51%4<@3^6JOyF3(Cw1oga$DwQ|7X1?B%)FA6B62KTSPor}2 z_SUs;Rrk&CFuQ%=#JWE+4|Z#Puzb=tZ%^3W;yTL@ZQYOW%I>j$q-(|-@48>jdbRe{ zyE3b{_DR$_*taqi?K(HzzN7B&-{lXyf4|>w*S$LT`N@=fJ}>7^(*NboV^mQ4a-QbJ zz|;sxt7gpdAF)Fu0U)|n&=%C@$8-8mhdhAQ$_;AWDM&|NHbF0$FuJ^HxsZr;9}X?>fbw~IUH9dosaVeX%;Y&x0$U@jYP*y8~IcB}KVwPP(Bw?>@Z zqF*GLr_#n;`PAtXr^=m=t2Ta?pFiVy_mA@59)J9I{fPeX_)&gO$$tiJ_lNZ&Sz=MS z*Jdt_zO(f1q`N-44|faOoLjfdt!GBfhEVyx$%4&q<+tP?&Dh8BNAP2N&)rAfy=F)E z9aBqK@!ij*?A4JEj8iWNRq@Qb@~D zJ>Jzy{b$(xzD?S5`(@=H`Rk%HKOD4OxqZvE7ETjkug~5*2ld_O3RbGv#mt_PpTNM< zwbA^*TejRh+kii|*QY(wb;19&(4*bC2jd}@}u^{ z>)m_Ab3gj~e>jvePw(@Cws?NsLuZ%79?$u%B-yj(P-(`asWnG9+X6p5c((M1>qqy; z>_@Ws*(+|v^Ht>TVSUsSqc$^Br*qZIX+g@id(Uk4RG(_8;J(kqRr$<=ZI}HH?L6{B z`dj>t3#Kde_BbxMSJCYsEB4%>**m1mI(5sAidmrIpFgv_F!|xu#VTC$*skZ^;?*qi zSi1FUQk%z{t5aLDUY1G7Ec)}{-Q)FYd;Wd@Hup!zd_GhCo^7}E+od16w*EPBEBfd2 z$l3l~FK&BQY-;;7b<!`JErJ6EbGP6~~PVXyJtdhkwj!mY?|*0XnmAKHHX-dST@EZ(NS;6h3DUiZMsUZq=S zzI(S`sATHA@IA_jzeA2JJ3Q$yi^i?Q_?!P34#(dN-<30;uk`e4@7)!-vUy(@TD#|q ztkx+w^1docV(P`{(><&HEbh~u{A1$p*otoVkDD@Q#a?gzBcE>8pSmj3^YO+)hL6nW zB_}T2z9-y6LG#lw@1O03=Pqc!^fikYzFV;%CZ;~+=)*{p9g(_^g={~?izln4oS$~# z%?&p3vIoa46oNObtK8QA;hiqy)Hi#iAF0fLdpK~?Gv7?rS&tG0JAE$MKFpk=cuUCX zguxQMP><8i52m-T&3~}B`(f{|Sl9JuAC*jgJMB+a&_Yl3RrP)HpjcdnFBUhis!_gFx|)Au+#ky;?{w=o9&1vreqphRXU)duZ)@6k zx(+#hZd%aBw)a+-j<}7Oee*7#I2GTL8vAQ!f8Evncl_$&?6h4W9NTsC@@r9@)L;YNF9Lu>bw#UA3l56g{epw_iVdn$& z2kY9dZ5Mm{;dW3(dQ)|J`2NOyp6hz=%s6|W_xWUn#J1qxB@7I!?lW$2`sicB`6y+- z`z+^`#~-PAzw}ux;n52X)HQed@#q3|7BHv8-KgPkJ>f7 z3@=|U?KSfa(p2y+DKFvbEpytCuHs?vN8xTufcl5?hi=<*ywY5ed5`7Ve}*j~XO4aQ z9NP7+QYuhrx2MYWyh~;`lmgCphQ_XPn#8~JqHELhvzPvz+b2CuGwDA=YwaV8MYAjy zOw*`wy0~P*X~l^TQllS83B7sZQq;Zo!{WlTuk#y!nJ)c1^-eth)+5uiuwDX)5$1jwC0>x}TTgh50>5Mlu%zA1p zb;wcwCQp;2wpz#`&t|>$ zTRDf_f}DHyPx0PdAuF}_S(lEuUD?O_L-G9gDyo=QSFlHJyYwYj^9|#st3o$=Z3FVQ zPd>RjY?|&0`?BAS$9-n0|ImK;yZd(b^dFV0o4KNnCHMvk-Fpz75dC+NV43m`!?&@^ zLW|7$Dn#G>XAu0)@MK=a;(rIjG{!Z%s$ta zicfoWz3h=$hoaJxoS!l)Q*Iq$J^3?S;N*vX*V%Ekaq;cD)@*j4f?<@5bGmk)c4=igsb_P*ld z0hPxUJ-j!bo9i`0p0NCc>GoOM9oSecG+v;=F8uZ?T;D z5A($}uDh=2ncip0-p;$?=DPVc({7!*b-Xff>8ax;1^1LriJzG)eNegZnD*U_1^iN1 z_DQ~sT72Ys9rNBR+iu^zy~($fdD-cut~+|KzBBlJBy&!aEx+2Pq#w!)T~o{R?lE6q z7x&`!^gX+;9r@n*#4y8V_w+qRN0if&7RowJOR}2tdn@!H!!3-T@18%v&l~HMl;8gA zSLFon++r20mwnzho)oKem%Of$dSYxW{9OF^*;iR#O$6~n`50{9UE(SSY$@yI#}-*DA3>tvd=8 zcfEuqEe^kpU%p}iKlhb?{4b@}-aUF}AM>@f@6-3zI=*{TnzD6{&xhm8ZzpMQO%3al zn7TMf;0?=4o6GO_3B9apKdkzrTEAgy#~-B$?peEP_br!Eap{o^@S2|Flj{Dl$ikAP z>${W&^TF#6p0s=Pj>!*w{m#35c=Gwa<8sd2nA2siUa3tA+o^M;@;Xm>%H|U?+rsCUE^4rl zw~zeKurNCKKg0fS^?&LnpS!mIMc9@94DbIg|IZ-5?q3(f1Dm(m|5|hX|NUpURsZo9 zf_L*j!^dCmUo7Ca|NM3RFVU^`zyC92{%1J;;&Ul7_w@x0kP;LzY)b6zul>((VeQKQ z4EKNg|Fd`dn~!Ac7by+q^V?t7f6>mW|Nigte}?```6w=3@3g4F=6l`N{|rvsmj7p{ ze{28auQZCYUSC+TfZyi++Il2AO85T~Erq%J^@SG;_-)Q@|IgsG4rYYfTNGD#X)vGr zjOt8~5pb`+zNoQ)-x_QL+!bnQDl!)ETm5I~y#JT$*7U#s8Mgdqc>LwZ{%z*wGav4C z65R4&&$Z?1GWx=D{{H?!+80)7&)+rw{rb-f`0da&B}ao|E(jEJ^Z(TCmFi;nJSQ6z zLh;ZLN{&HMAlJq4xhH%67ws&lf{WWh3gY1=fE3iudj6N|7u;bv*X>_~U)c{y4eLD? zHQ3xw+xeftY2Q*<7@MB|CHhq#8uhO)X)NG3`}7qg1`E~~sNC0=GBA|9v{_sHueH`2 zn#8`W(qKMYgCaywfxmj}e};t!zvy)_d`1)N$D+hu85{(G&_G=E)2jX3(~f-?*5@iF zyq@V}%zn!J^fpb|e#y5z%yDaDu6O-sc%rzp{(9JdhKuFpcS*%_NW zx;XV0yZ+Lez{mwloANO0j?FTs_9|5n@N{d@Or z_WqmaGMJPftX1=TFfG?QbXP`B8)wnw8U%a5}H(jmQ0rqLq*VMX< z<qgwL)&(5PIQ{Hm_bBp}X@cNehUoJ0(=XK}T{b#taeg!lGd49_+|JPdU4hz&* z8q8guPLBhHJuDFS+I2BJtkKS2t+qewdKB;5Da>UrpH90wwa&SxW$Nt7jR$PA zFRT*z{b<<&ew`m@|1&5*KPdlOM$i7s!PVCnhtK(gHc)nP1;c8jgAP}QA%>_-7|X!R zGf#i?z4J{@>Z)FD%BQ@-c> zj+cqsx+Gt0j^0G&?M9xa7a}>Q-V|qWZ*F3%DyWiLT|Yr~t>{_TM?V>7X}$@w?nvA< z$<_0_UZUm!Yd2?3#&c|j7p`zV*8bhy;$5pH-sjzz)%U&Ce@3FvlYX6t1s*3H51 z>+FBrt=V1pVUgncdoROw?U+>^-I06qo<)=Rqz6vMPfz_W@Oxt>z3pOTReQsw`z|}K zu3eup@2c*!iaj?3<(%@wgqEnR-!9$woA1%OcD|G! zGc_)*HMB{gZ#DluNVF^yx@&<{Ng?>zV4h= z*#0#&Dbr*2*E)Orn4FuLS(+$g7kqPx+59xslsmU){QXe2?hQt! zugf;3i+)tcd}Mw-%XNOkyFa|8B4zRE)l=p@((FC8ea63N{izK*@;t0}9Q+m3#qeQ2 zZ{4|h3iFS~iC?W#_~TSB7gZ6aIoW-~zY~2&Bvo4uT)t~C@y|~ezgHZBEA)=%W%akz zr~WNJTy{_X!`0(Q&icK7c)ndV{n8fcY{Tpd0p%qIF>y;~&eCZ9vr5_Lt?$u|&0j=C z7(YF0{Vnrjvd4vevOfy{sU7+!`ax}T>gvp@()DeRw^z^Kd~(9}qBY0Q7E6}i>y|%r z`({i}xO4t*?fVB;#|l4oo$mafq2-spT;$8QOB!|eEb~m=vE7){?d!y+(h(~(#3gDz zf-L!5`ut66eet6BACh(ocbq@8{SK;i%ID(MGA>fC?%#4UZTjv{+umv>Z$GWSwd?)S zvgz^+HGkzF$F_facmGGEzH#OJo>Xt_SDdu1@*&R)q2M{qFr~ z{vz4CM6b7RdEdD<`{7)F*R2=7>Z*rb6b@>sG^kX|Hu%(-^KOTVpwc_(jdPw~`6K%v z`#%F~%*$)DW5{SAS+S2aFN)nnG3$O1f&3|}%dDmm_tEsM&ELy&OYkibf zv}%D_rRog*>s<^lv+lM3XOMgQ@ArR(7ik+!Wb^n1GdpYZCc8>|s<>IV+<0~I#Kj%T z8+n$5AuDXqyXY#y$oqDVRqK*#+c}>(8cEzWatrAZoOm`^PUdphEANR}iz?NlrxzR% zR$6wT>OVvIc0bS8Q6T|rDRNSma~DJ;Z@Vou!z1=h{ng~t?{71|zHc(&N3zfM))|GCCbZ{8fUeA2V+19MYoTS%*9+#Eiw ziV|+_qBaY&DYM^lf8Kaxw#}Ze|K3Zy{Bf;SBu9Ft{fDm1c`16jcYRz$LuWaxZPPQ? zdt)at$(*_8W7L&E2UZaTmdpL^P*L^y$bSajt=7^Ke{N3;DfNG!Q(D7cBy#=8#f(|+ z=9f%5nz$l5=E%cI9*;^(5-&8dIm&JQWv`=YC0fd}eB+wsANr;$i)Fj0eEsSEBm8=* zjdQT}Vl6JNz`S4gH@P27zoY7IyPn78*<+2>nYoj9{hZIZ+@!+|L_5jkWXcPPac&UOf5!tSxzeQu4*#oL?us)mdQ9mVXI8Pu9KLao%G6 zzWRrJm+GHivj6?0zE%E1d#3&8m-2r<*&mwx=w0p$7xDHji?^w{KmX4#@lnFkn!VAP zZaqETy=!jYtZUtuWIeCyQdU64^r)%vf&HQTOHPFC^h`;d=f|D4-vQ0smoRfz7PdV4 zx!BEI_`|!NMUOrjZD07{Kf|W@man!V+2Y@?ZHjU)ONy8ldh3Wn&73lM`d?r}w~j(YvqyGYI9_*gxp*7tflt^xFIdwDWSlQ7n6%oN?d;Q8 zHu)_2p|>YqmRw#K@kL7eXzu<@dybe_yh)~KgQTbI+P-FCeJ1 z^osJ%(4vAR6S6-2sr|6~Rq)OF{5b6+c>yuDYcJjQ`S$9qO~JX~Q!_8PE2i|$Y@hb0 zW^L2M|ye6nq0w~_f=C6~`ePng67kG-9yP}sMGOHa&tmiDo{`i%RW zQO9}O3><(h>WDm%I|l|!Z}r1)4Fv)wV994p?kPxIwJi)r6v^#iViy*lA`U975g z?b+TV*=p}~&jc^BJ9guDw6v!i!=&Y`hmJ437-ud1mi=MoSL<%kkD?bQDHkm&-?l3{ z^VS{19UkeLPtSa8sXZxe%-etbRKYCyx10~x3*Cyj^K#d*)kPB`j<0NyO8s=&c1=5{ ziQtT8zn1?DB986;pf%$gFUBdatSR|-_D7eExyi(XNt4$u*){d-m(I?AM{S>MT*Yy- z&Lg?;WVz$Evp*UtCd+xP5VGUp7=g&9XC zYWC#uy;^lMfKz9|^ZLV{6~-qr!;h|cWx}y1fJx=e*<~kgC(jJgi`aNTupwTBcTdlB zZn>+j6?fvdsJAY=@=hoGNO+Qm@2Spvw_fHh&n`IjP;t)k>Bh$dzpLb&F|(J~JZ_%* zJN!r0`>1Um@3*pCShr>4is-D#x9=WRy=-#vs7ye#jg#k}^M+m~iOnSel_oJdwSumd z+BNwX!jCL=vU>VrpSgDOv)S46H+4^1*t&D)%_p56%HLDJr!@A=3AeM)?Bx%>@a1m2 z^WH0^zINY!MCqQn_PuOkWwyxX^F}5U`AytKgoQWAY-&*vK3K5veMorQ!QVa=L9acx zTv>2!>&mxz53^_ZMAYLf*Y+rfvAUG<5ChJ6aMGQ}|{_&S9RDZW5zY?JD~De0zQ7ZeO=s*=N(9mPlLZ zzJI5xUu|+p>vooY5=Wo_@7{?XjN9K8ER231_WX~Dd)wJXXLfDds`BjNTfez-6Ia;& z@|d`$N9ux8dVr9}ZJmb13(ucP>WSH|>iY1XLEztosIu7)_W$rGS-tvindaM^bvk0K zvn=v?pS;e`YATCuoD~*udeRFHojpCz4}Sa}zD;V{+|`-=*^g%P#vS*rH9H+V>6zrT z=gDh6dL_A~M)aIJmCQRshQUo@I=8$-V7zrj@was!{;hwon_uAm`E@V5OJ&a<+!AZ_ z?^#4bTTs7Aw~We~TDvmFB9D`|vR8ILUe{>ek$5rjdVXHs(~2aa+qYv*XKnJ9n5I%a z)tSqBu`+8qOM9GM;-rVkH#F9>d|-d8xV+-_p$)Hk3+q`wWnO$`zN@q==Kj`M)`^0% zG|t6t+@WSZ^=H%DXRG$V;DQ}y`pH(_-E(+t?Z0(jRNHci_uty; z^-IJD)U`qUWm*40Gne@<(kLOX!3mTf5ZPP|35=t{nr=&4*X|W z_MhR$^ZyKSd*qMWZ}DqgP@(razCE_`(BYL^_XN+Larkti%-NzzZ#(a)Iq5^zHPt3{^kCE zmj4;9FaFI8I&Adu{J&KnZM7f2Z~wZwdS}kp+BpBsQEOjzZn^zrR&|k6=Yrlv{5F-! z&pHgA6rWfY_wPcS)a5@uvv}?)?vrYM#ko{_>C)S`rH?ao*Kg61*mFl_%KfR5Dm+pQ z-~Kb;@D|(UZ}UHte(Nq>ef&}Ni!Y~ZtzL(j7vGF?mf5!LusJ_x#qqgM`}b^om-sT) z^!d?fZQZ_hXq{HzJQuSi8~Lc=OchT#J_&$t?_R zu$^???X%S5?>BC1#_xP%`|3Et+K1uuWxw2*@6&fUEK^Va-I~P)v0Kls*%x}*%2mTz zFnPz3hee{E!P8?xd6mCSx?JP?k=^mbvG&UP6~|Xa-Mh={I_(jM*X=FdGom~W#VAX1 z+?N)Wc2_T0z`xLbL&blFcyL;IeJ=W8dH;5q8b5`q`a|~lpha}dpi8CZX?)OIZ1O3? z=I6>dtV|1h)^ox^QX=5aM4_g6{~2b-PZVmJ2BvS+A6onIKf_;9 z#{Ud0fA@nH^!?GgFaMw6Vu1V)|J$GweP=$9?-ZA_`LK1{dXej4lV+-I+qI?gwCUC} zMq7*z`Acf*-zoAzl;Af)X)Bt@y+nvzwiGN z-z>iS_x+z!^FOSvssGR5>QMg>9O$4W&D)RvXZRAr{!jSZe}+Gy(f_XP11)`=|NGDV z$DV)J*Z*g*zi$8i=l)~;$#U=C*FTdxWqbeLe}*%w|1)r*YL!PbJaqlr>i-Ns|A7MM z>_3zG?E7Hv{X_Sj)_(?q(UJZ;=|2P9gSW;1GvM&yeKbd4`Y`g}_5C0p_Je%r{%!An zhCl!A)Zc&hpFzsDr+WY1{|qy#HNMwZ|7S4Tj+QDypb;VR5teR(|1)IY|Icv#>-oPR zfBlXFxpEyi8sv^a0?Kv@*wC5mf3)_4lh}WT)8N#wuKVCW?(16$OHA*J{AW1ibV>A6 z|F@E|dtt`&cN{M(vifp-Z{mN3+${Oe{~4gM`R~RR`{(~2|NGBi{^P3KtnOX=@7)Ys zw&>mZ@B2U7vLgybL`k{c{@*Q-p66gam+Fy?%MNzFP>-!l{rvvzTIYZD@4)_0|8{TT ze+K*S;6%`1%Z=3*n}4^i)Ib0C8f?qOzgt)QXE^`&Iw(vQ@H-<{42b&T^?CbkTjhWL zXV~?hVLvE+@w{xe{8+WxnD-T&3U+Ye6Zx2yX9{Aaiei9Lod{y0kQ z_rLQ4|1-@0y&e?ho`1I^yUrEab*tku_C>kWym=N9zw_1Mdpqpko^}7%Bpwe+|C4{m z{qvLh^|awhTAGFO!q*ORy?-0Qx8Qs{|8KAS4f!wk|1-$^XLx;m&VPoyEc-74P>;>O zy{rE(iyqj;3|02CtyL-idh6_%R0K8P6mlY4!&-fC_@QX-xR__1yZ0Uan zM{cmc+~J&m_a8gRO+-?Nr2qQt`n*>Nvr#yVFYOV&K(T21w|7vB{@s7hb@Z~&?RB`%ub?|?N1yJ)Bqwp5+ z`{J^#etW4svSr9ThA;2omc8!(cx_J3x-B!AJq?WoH)(X6|Jwe7&GXM{=HK;5_OAKe z>-Fh-?B$?GuPQHyE2d7^z@A)uash9>?SF>nm%jgJU;v#6nDP1kzgqqJ4}9_e)Mn<_ zvfjBK=<@r?wh68p2c;tx?X&$C9(;VQzk}e<_x~B}rtkm3uK%AwI7&`vqxnANovOEt zwp}cE;rMjPD?Wp4!}kmNeo33v#GIBs)!BS%VZd=)3%&jGuWi;_|1PJ-&F*ZW?!MQl zcb{6u{5jk6sKK@xYlheRcmK-%7s}=T8R~EU$+Ul{aOFS43snd=gZavSY=Nc|L$M4hnxDJp`|(GKf{8lumlnO52?k8lps!>?=bcM#Tg4r z4-XZPOkuvFeyQ%Xu=lffw{Pd!uMJtJe|Xp4uje-Bi(J2M>~ee4B(F2epQ&Am*?M+gk%Mix+a=|TIZw8C z-g2#;`OdQ@y=1e^8oP#_%s04hB98XB>a~60e}?0`>-YU<_$9f38+^1waO(XHKdZ~X z-MzHnKZEywhGp_UzA{8ES`9j)|1$rds{SwgerM3N6{9l9r498at5tXYv zYTnO2_v!Dp^j{ZPK)w}iesueHuF*N|*{5f$)t%(HFY}I)o+Go$2TO%I)4d_@_CNl0 z`rj`3zl?U`ms@VAP2-tz@~-mceD#WR%#4kGhjuQ0G@tE9`jPGbba(z| zXnF6s?ce>q7uTD;@BGuh!Cz)~@w480>C<~YZnr#~%-(-tAJY%@!@qhDM^$V;xG|FL zO3JpmCfSCo-tCT@o^ySf(bEyG?&NBm+xGU}=E~o3KkQBi?wGixUdZ#Ic5J|l z>vCrO{=ti#OH@C3ZQ1?jY*X;lnJ#Gtckp)ce(cYkD``Y>OtV!FqNC)s-2*S)B!DY}_? zDk-wFtTJ%PZQ<41CvD1QpChL&?EiO3-r~Ot_GB;4Dhr>spKpsl=gsWX>#U-=BPRa4 ztTNs2P_Sn2B~uqhBPsP$E40o;=vB^J{K43L&xdm}m##l5%Mcg2_J`D&X@$C9_e>8j zx-)ZadfPMeCdX$Fn$Am0T->wqgjwl-hGr}CPMcJ*jVo&WE}BYbY~6D8LcE*WUctuZ zjLoyxPCUA0gItW3JIlMD+pEJH>@)YWAForoRMPp!_EA2k=h|B*s~5%mm~>Eg;nl=m z=bbHUdYoAlzj+CMd%Ep`Oy%;P{n?KnX}@n<{ot(mky&BAkMt#OWKO8)`_JIjlX>gx zY>(GQ4sRl_7{s1BH*s31D(AsP2a8Xe@VAHWv+p~6c%I4ioRaxm8Ta`2emJ{ilJD}) z@7}8Or#v`XYIJ^U&cC#h2J_Ru*UxPDHov7#>cjTK(tFcB?s0xFWyh5uj-EJ~OKVNK zOCq*Em>Q{4o@QjIxPNekaIH!NRlInSRNC%^Yn{+IO~QX*DuGapv`v1Gop$XhU_ z;`aFz;gt$MD{e@X{=QRu|L||GitNA-?uXwCWm=W3Un?)Oqub+B&Z73WOONi^d}5RK zsTK304!wF_$=D!idA9QU!S}arJ^KA1zO}~lu`P4l?vHW%SKiIMl{azOt>}}btF1h> z({5Qj)7p4kYHIq_#Roe(EkJ3hIL_chUhgA*ffrICzphSOyyR|H)wPxDvb|SCJFt|O zg_})0uqxF^+~awh!9lOL?JCW!ORn3A-mX}8q(0|Q#D!&7-fTHxGFR`@rBfO~9FyMN zdii9QalxU;*fkF&Sp^avS}N@=_;+=lXgt@pWw+crf@6GhZ7l8yRjb6tmWubBnjmpz zUi?`BqfVRiItxw|N6+Fv?p-hNBk6VMwh#H;$@@25U3BsN7RfD7qJKxmF8=#if9BII zTAS9idE7mA=dp^QYtwVahaI{lvh~5&*V!pllzk4rrms-zxwti{b!}qf;y#XmI~#kq zn|)R`nR%Lbf#CMDYwXkKvqfZ^nIC?n+k8Cxk@%HAO&1z&rTs{4_dY-SNZ{Gryx%+T z2&{~st^C$YV?)eO&V8aUF5h^0OZE7Xy;A<%FTR_9pXEErRqD=B(=+A<{dV;>ok6C% zXJ@S`oZNEfZ1c1q{SWT9l>T1N^GfsaUA@yWAC5KhOW73b$UOT(pKm2{y&E3|?IPG~t_N0B8PeZv{&wI*0 zd$w=oqHfmneS0E5#2=DVu#?@ZRL!|_PHD$Tw^hoVchbk0O_hEJPU|!qpZ@eXBYy3QMF)$Y z+*N=0t$+C+#moET&VCeko9=g6XU(KzpEMTrRjkj=b3GEy{{3jAvpf5{rv4rG&16{j zK3vb4DHdlPFP{CMLBPZ}Ijnm6o>|r|-qH0Y(YkAQ_WFD>T2p9xLUHD4)6+650=9lt z1^K{zcusGd>@{Tv{TM7u$wZg$oA^oEoIj~FI`i(*db4O2jhb2r>z&yvg{DrwYqV8 z{*5hx2|GWDAG+UWWqvT%DmtFC#^u7AqgVT<*NUFu)ZKgZN6p#~Ju#Z$hLfe&J>H|^ zuD$u$-tq^rYO!Vc%8%4~#oOfL{xh`9$kdm~H@eSb)o*{E;qdcDLx253OSyzN?N`4* zsg>{h!*x^Bm)&xFc)!!iYE}7iHL~^kEN8DH z9cMRadH#JF!1&v*i{Z!WgZnuieUv}+SKjgC^S&)J61QG3eY{otttH*P{70+Scjo_7ugDI%`juN(-?v`tU_ztrTJ;?h4=ukn z&RGU|m(^+jLR3;mH_*L`z|gZgF3eBGSRGi4Ke zzkQbRSfZJvc&4@JriHTh;*yuISqlBXRkCQ!EBRN=DJ#Sh zvq~Y)Q}Wrq1>0Yn|9<|HYg&U{!biEr$~Aw&`j2k>&wKsu>4(oX9@{=P{aEf>oV)n^(r(rr-TKdbMRp&a$sqA* z+k^f`=iIdKXe{7o{^4A{aO9Xzf5vjh z>dpD8xAqUsKfM3T{$>9ko_^2`hv22V|Ag!L{xe*t_x{gt-~?!t()n>ds=$|78q8<+ zKQsgDT@TR>&FDI?{+dk$yc z7%iwv2~+)kJl4p2Y}?SgapR$xDt~Qa=AC4EbaW>n({Agw6x7C?x z5%=`9OwK*-TUdOo_(%AztLnwm_Q_S0P1!Xs5zVvmA7=dkoynd4kIns`>B}#nGRLR? zXE>Az3ZR$se}DtX@#Au&;HhdAVSKv(!>o$xf6cqV+D!I;nE9g~$^#AYuej(S7yX}s zb9wuJ2Jor)pkS5%FslMt(aJz9iq!w8&65Y6lRoo5L*vB%482uz!M4o=Yt08a(qul= zHV_YF+sca$a^VQu{xdvCsec?4{bRfH+&S)RUVajFD-wFi$G|fA`U25cqN3;T`hUOv z^8)+y`I2BqLyZAl!8ae`-v0~_V(K5qt#)Oo^oO};HpD$2L+THuJ_H{VzyF80{EzFa z10Cc*TBo<~e_;ta4L_#-vHKsjIFKivfix9@W*J_631FX6e<*oD{e@!R{|pCq{AV~e z`Jb>Y^S|a<`QRk`S|osd@_&XysYptGtn&ZivxWZ`Unaz^H5VP^d}@$Y9P9t#V**p~ zpFuQkvnxZT{vS0f_P^{V_y2JB|H=5z&~n@XbPovBF}+n+MHrvV|1jf){1=(a^&j-? zKQ8_!EQ{olk^uIJ{~1sm`(qV@>IXW(pW{?FjJ*ZMyL)8qdPQvN@D zOyqy@zP$fK9F(o%cDpiEivLml#s8P>%X^SpGI!POUiNC9>V~e)$$1g)Wzi?1YJHWeufELTUAm3`$`|$pc0W#+-tWAp z|L@2Z;}5^1F0k)6$JtvYUnZ0=B#$g|j!QAvgza#%oVCloN&Ow{A zy=R)-zBo-nm{E0|O5hFi{|u{p!nSvOt>)D}Qc%0?+r9H=w*9obsI}~kak`|M)g0cx5WRlZ2?D5_Kg<{_{~4WcYK-spF!y6 z!Q54mF@C?l9Th!fETb&kz*of_w-DDkyQ^wE_Pfsy+a$HFLp7tRTl}(I^dm1G)(M`y z&sWWzlUnuIH2<;rpZ@tQ^=}Gx{Aak#wmz`pvh($KUvI8kGdZa8+Edmk+pf=x5j>JR zhxg}ePGWb6D2 zoF1?_^$PEyqq1{1NZ;{TAEmRcmVVuJ>!j=!!J{hSJmEs@ zv4n{?R<%N*W6{Vrg!Gu3R@z(u#F|rM8W0L!Zh9XQ^$%JVuY-|uHCDA zM04%TMSb_GwOiUOuj)-(DpxwoDSxzVjH+S`AAj!b$(0f)K> zu(3&)$93D!XLb zhnVQwZilih78)ga@_jp@$nbmh>7D@gO4p)o_JSpUeVuOJz7u;_(k1B{d+UuqOSk6< zi7+#K&3WFF(v?-?bMaoSmClLvRTFlt-ja3k=W-57?{_Nmb~`7pJn*c+>Xi1h`i@;G zWyN<+${z7%>16KQ=H7YIvR&t(vxcOlb%7h_N}Zb8<*&lU0&X2HEXWJ6dvQom!pNlN z&&>o$X;o$hHcy6T2U%UDrHSBWij~igciKeF=PCX2LACKpO|t2dZNEa4DjwdpnYc-3 z>66p|<-5;h=1iC&slxTe3T1(tgS`5W)8CZ35888@r2oiYS%~wshWxbC~ z_fOQlyL)Ad*k|u2?$4Ibm3QTSHqYT~)V?2vkJknti)!0YWA*JqTus=;KS4s?r;AQ5 zKf273=aP&^rKjNajGujXRwy*pZCbRZ`LXup3&!v-I;mhbQY|-Ieo?$Z!7MXA`R5QDbk` zb}cHrXPo>euz~B2%_TTw=YS;gy zKTQ9q-}J|Q!L1kDwmZEJ{>XnsdTo}W&9wD7d~?}vr7#F;KYiw+Xu&;sLS{X`ox$-3 z{~4P8Gw|vCVa(c<-(dPz@%v)%1nZW4NbK7Ml*VSMg#2@y3^!l#!(OZ7o?Iuh&`}XY7y?gmap}f|MQW|@N@9lWcSD}1t zp6!qS3=jOJ<7)J`|L|P$BkX>F_;%qev*5e04n5-6)ctnLctxkQ&}8M>Cov3_>sMqb z2sWQsz(4UnL)v;4_sZWcHO^N|{(4=G_`u)1KJV=xgKtHg3T>}LKINBkTx~x+V>XA` zu>IVr2a=io;U3Cp*(ei8S^w_jh;tr+>@4Anpw)TDmm3I z{@t>S@4OElE8A!NNIq00-&E4EWj^1FTXoG#k3=rscv&~-7K={N5haENlTSXLb#(!M z&%?j-Y#h}aZM@5CKdj=dcwMKkdDrd__d9LE3s<}jx*2`o;*!%VJPVS$KE`gEt-_@& zZT_Ummia+g<4666^{so{RnNDV?)CQFbM0-o(}#82A9>_&{bBCGqTlHd!xj>dy8@T%DVW7d-ui{9_3ma0Fs?@1}> z&ahC(onhV3JHwD1aWc^Le!(}B{|pmrPrOELuSZ1P+gtlc`(Wvc^WVA~8qME?K1gTg zVtAKr*AVilh1y0$oLVow^`>d{^;v(s!jlXHRJ?+Gbayv8amy9cw|ig|5tkG8S-VU{Qvy z4aC=;|Iv46=9+buH<`t9nEdZ1UZ~Apu%zOe>tTk&7cQ`KldDT;mPku%T@%x@0!(Mvl*?;5Y4~3 zVtU}Uud{1)O_prku+zIB@6Ub1lTDhQKP!&EFZ>Z^kA8Chra$>BopT?>COvGv(OvP) z<6G2{X`5rFw5I*M!XvZTW7XYq_nrqI7+=^4Tz$87uRzSbC5CrZ&z{}Vbl)!0&|f7o z=5zCA``(gS@_(wg|9=0UVd1P(mrYLRO8YmRI;Y6HMSH7&P=W2qC-a`n^IRLt|IhaJ z-|zn!E}UhEUDTz)v~2A%*EKJ|qtAKF_Nf7rKhm26fz*SG%+ z>GqlNnoVn5-wB^iw02l>O4MUAm$w{)t*(u>Ou6B?S-rnbZWSw1mE6g7=JcMoC!WrJ zC;H&=0)}uq=8YBe)OYT#aPjBelX_ij)tNOl`={H67Va)HYrL$q%S(fK-j)xsTelv$ zE)*AgH^%pQ{-iXY?K38>E?C8IptG>?_1o@U;q4DEzLSbjo6**!!IKxV^3yTVUFl^O zHji&@)mXrnmd|`>+oj{YSC4ec6zk4i^KPH-W~)t;cEsM5Y`7E6Fu^dq#`@vGMT(Q6 zSFYE)$FZPNP0MK!^H%Y!^$eS3CQhE`xptNY^So<+^zu#CCSAMQK6BL>m1&+!w4coF zn0=Xh&Whd$=KDKV$RG4JTW_lC+haOg z!K?W5l4tI$GvAP7F73gP5OQ5kuiE-BbH3i(uE0MSmUg`BDIYz$cW=^^M8i^(3siMCSmq}RD|o#bJ(l>XLz*Ie*2UE4D$r) zeE%~X4J_EW*)_TTkfZsZ56z$Q|7rFA@KD2$b%x1K{wE~4|9Ol2<{5&J(( zIQyT$M&ZwF@M1V2O%&PrA13<#XR!MKvFbkqr^x-!@7bT+{}U?CEnN~WsD3op&R(RS z?}6v)oIbZMZSPJY(}`&*`btfm6Z7@&C7uYpdcowL+K-9p5B|#e28UPZ>L^drm=&)V ze%Mp#NHdq`)4T3|#*>em#ORcngvtL=DXu@i2kf}|L!Rb;ejog4{+~hA_0X}WqV4|~ zI8|V_r_Ap(%YM}Q;bn#WHmNiByYuzp68Hr6y?Oil2v54*jYSWWH!a$*|DnqKf3*+5 zK3nrQ{gLRthH1TZ3e9iJ4R0P`DYBfD$tUU%~$Gp_0Mv zITOqo!e_m(f2cP9Uqv)RU*SKc9}}}5+6sM+xO(r>ao^27r%gVwE4p70@>FMF;8bZ~ zR0$KC{qeKI)so1s9%+Knx9>YI^B29_dUfv^^PoMMB0-KyYgU{|>z1C9;<5I-U&fAz zFSoS&Km2Dn(j70FDe!7m@U43hJ~8fAIt^ymG;W+^f3GRuno)nqAKCpE0%OA4?9^^m zoOalF?MvDAJ(u;9Kj%LG^wjtDob+sg!^;b_1^!)-la{6Ej4z=hOe$*dN>B z{l4Rt+t!Ew8HBhe<`*uF;C%DdNO=Y;w_Iv~4Qdr*6De zf2gqbKLcnp<@;%%5n86d>t+_cwNpN)qVVVNh3YH+84%8i(cOP6TjJ~QT1mYGtGR!s zyGif}{W-?3Ev^J)HnhMMjF8AKhVP)z83`@Sd0yW{O1 zg-HyNNi(-<6x{0xZ+=lY>)L;YmPh{?{#D+tzpwkB;b<7oeyQB#Yk4IHwS4QdP4@^N z%+Iu0rZz`d?9+l)7ERTDE~|+XJ*P`*yqo-W|BLj$iy!s><&LWV``7=UZCv?(2Cnd9 z^%|&s6PCuG+u;Gt7Lr z|HIOn^#2SF!T%W=4eCFZ%>S@-Pv(CH2WJSs^qu>0^HZFLw;3!|R@Z+FoxiXq_&-C- ze2>jnw;XSJ?Z4YmckhADtEtWxud4_xDdKnHP*!O6o+$O7A>-GRt@?a2hmFckG zlJzaV|L45S{|sEw$L4=wfrg0tAI*HJ`U?X886Nn^|A{SI{X4j#{I^x~<#HvXxOH=` z31y0}uKzZz|7=9-On;k=J(6t7PZ{$XW?lX8pW!Xz(jPN2)BW~ro$)H^@talOKD(T` zVBq;U;fJP@nPcyU$Ju}ODHJ zpO15)>Bn73{9A9_>5u;zj1T{3c<5bm`Cmi(e+H(6{|t{_{AXy@eeC~>N&XLK^M8i! z^#!2SGXeh@=9z(`HKhJf`HK4U(h!3!1^zQYLqi^aXjIq#QQy`6=i|2j4Ev(Nm6h;a z=YJK@lo@!U`M2Bp)F1yDW5jTqe8Mxw)IsZ#o4)xdTYx^XA~3XOH3hd>=C|e8C4(x? zRe&yy=6nZA%ewm^S@u6e%XfsQL%lR5LXV!?QkgPwH`W3ovHnmkvYYG>ZVEh-_n!e< zK@<`HVSbkU=VQ13{R9_X{-7wzhDMQtG>-C)|Bnu@`@f1?{~5qpI`BUOH*+8;cJ_lZ zT?pe|tnN$r&v59#mHOws>%m2%`X3!xs3E-n8IA_9{bxWFsTUC@m^S`r;AV#!f4Ckr zCVvoN3PU~)kI8@7P%8iPF~~2V27u>(25xR7Ll`e0N><16WigA~d@9a#*?su>>zer= z9bF`cg%tgG_3w$+7PAA|>=I$GEai_n$f{#^>G}FYhY?Pb|FGe!{LhCl7dXiB6QfK0 zkB+bRzlx~;4EMpo!tRO)3Z&Ygc>jluruqoe-67gQQG!?7{0|#TQMJXQ*yfJiHcV}~ z#AvJkuyLzCA`;yH=zwjjhkDHuyJhtdH)j54sDVYDe9(V}d1CXwgW{j@680#Mx&B9| z7UU|3@t{;GA54U{^M7=D{r^=&A+#wYYrBj{C`dJU40*ba|IwL?YL_~ST{wJ(sx7Gg z(5Wl`8J_np2WKRPQY3dFHA}AR|7YOV1{rS#2{B6)@!@K^_X^?HLFx%`A4F_b~ zz}iAU;av|d1?mr-x@!Ntbw0RIWGKa4t9@Mj*WNE>#jZI@lPxvYNH_SG*-hQ+AQy?YtarVn zPH@HaxRP3>W1sFaAFVWN%8M~NDtPDf{71g8ZgAZWU_K|o8{c#PlU)oJtpK61H`>dDU)tw{cwqgEVjv2LQ?;G*R&$gd%fZc!j(PG{?o|OgD z!xd^iyNgd-dp`B#ak;O{k=FjMnwpP%L!g5=Xjo_En%I~dFU_}`h54_%=gfSPcS?~? zZoxy(h{J|lOFNi#93-E2E!S`Va<;&=a^;I@{naW3@5K#{>vSX+YANQ(yqSeTd>{gaxLam(xVt_7CcFV7CHHSs$j zyyecK&F-%!@D=v8HQ1&7-LyZsTK-nwe})6)r*+EAciAnOm?iu?`|L97w2UkN8EQmX zjSt-UxAoP1sXx5M>;JHP%x|eNPFVKrvGZ)d%WD!!TOB`bbL%?N>R`bziDA9Thw>xw z9dUX;Iv?3{>K|P3HgAu)?VPAB&mZ+ZUZY(oFjdO%Tj8@L<2flshV#C@vy-fE`)7J3 zf6IRc{)%n7RsRk=dUxIDW6XD>+jAa&@0!lC+@`pzO!PeaNB%c&zQrkgto`0@qneyi zzx9u8^+L~_#Et`sJ6i60@}IF;X1JK$UULdh%^$AkCj1R{hMPXB{=NHd{+gKAIlnI7 z&7PlgY0;J|$@8pRxAzx0e>>UuIq~n6cN>o-vqp4WkY7?`zxt2X$GnYVlNbJYbUyNG zT0^XR^70@1BA-`y+)?3}&e}Ec+yld|7hmI9f8@JddT%QId#&HKW9P(o%&RorePQFV zuE-hxzAs>yr6X?Vea$}g^^F?&M;s;V*>4_SZGL!mw64vP3EK6C93^(xWhDI&XY8?m z$?~7!zz@^6{~5%-ER&W6$oFq-84iOsx!< z5K}nG$ckNS(~0ud>zkhQ&|2mRx0d6dmW@ ze)Ugt*N3zH-Aa2usxDA_?7sbD$fr0b_Sg^IO|Sb4U7MaCiR1rpXP*7E)2<)*X6G*c zvF-Z9Z!4F-mQ&bpW$lySWeYytVEF7QY%}qsX5WMMoi4MV)hs{ZS}XnAy2f=HaePzak8(EWHX3kU=`Z-B=-&~=?a^;Jr>zr3RxI@GB{E{2@YxJ+$MC!M$ z*^{1K{wSlH(UpUt_Q7V5F5M^iD}mcPZ0+rCNaMExOd)_)Vtn{wBf_G;Z+TmL9| z#c?GWFF9Awl!Fs~t}Ceg+2>lFa`wY5esTAQku}jgbs7`$dEBZr^Ka(| z{UcTD+w^lw?Az53i?zFK-gsq8;)+|xN>5+8qN`@y-W9}mV&P7?lBKEj_V1pAUx`zl z{&0D-X|}BX5jnkPrz`J2e)HD#K3W`ows3Oe)5Uj~{(L-Ob8Xd+_=BIaKV^8Yy0dM& z?52DBX4yEGvcrG|8$I5bJLc52BuRi7beywKCM(I;|yRR4??PslFy7EQ8eV)`M_O`WKZr_d- z4%~f3PQCf;uB%TP*RH*KH{$(mp6|+q)*%uHEF|QE>JM$bXuo&We})IIKL2OvyNKo> z*QV#k-;-OPBx+CbKWxZjl0Aqum%?j8a2zg zJ-EvC$NHLSzf{F@x6L2U?3&-T_SuI&20u7?4qPeuPdTsus2)nyNwd>SB z%HNK16*J`-f34=HI(o|Dm|@Kg011q=a!6lrR<@EPlo-|Iod2 z$q&JW8RqlEZ$7To_PH3B>-6l^-G4W7Oj&n|$rbI)n=nT#P~x}4bL&HIUhHH1sNS_t z^UjaT@XUCS!pA;geY08@0Y~Hgt+kok!xj=+&E$@E@uFK9} zksXn_@Yk#TKPjG<@4ta?3( zn>LG7BW4wDX%fzAd;R4JKw1M9dvpJcADtKc0!|>y*lw741Ij`02^) z-dvw_(!^NP!h)L)#Tz_7eeUn`!|%n8uZ*dvv*$luxNN?(es_4h_v%97=ryNHpX3;E z);w1FsgQGXOPiim_@(*}YfbE9!~Zj|o6rAK*8d}Hmi)Sv{~4NRpZ{m=+VuRSoaPVZ zN8G(qw|_?;T>ZFneT{J`=N9kPsZwD(i%q7Bi)*D#bZW0`>P)(;#No!s8yWP6;p3Eh zs#|vMzx=g&cWb+AtkG}(J(m`ByL;$9@a~wGxoFZWCDp!6`-|Ik=H*@P*|>hqp41d%W$fNo`R)hm)Q%+oRth+Y2sEIAgP4_1-H}-K}MD416Mw9sa0( zlzLtD?>wjWk8R)ny?gBcqv`uc_ixgbx-%7eMC!N5YfSAwar3F{?(b>)rv7JWtppto z8uFjv;L+#*8Dtm#6MfeI_iylj26plJf65x{Vt+V)NDlnayKlAH`Uh*{zFmvuvey19 z_wTuzVb=Dzq>Y-gdkWXwxOK$8RB0!lW6UIX%{51to&UJ*v}>*0?TC*jel*H0`1K~d z?Ac3`2R^I0kM4h~WU-iQhT%_pg@UIY>~HS>2%lyD{j2{UnYj9ox9lIrW&CHjf7Sj& zAvm)zKaywoqkQe3!XMF%TR*&SRsL{f`ab?^TW+7t%I;vw4vKZScw6z)-6g5vN=|8^ zpT52IJuvIrTz2Q{TiJj*Svio*2u%K3R8VKi8kckJ3ROnO@uzi4ED`-ny-O z<$;`A+0}2(6`e0NFuU#9+I;(1?Cul0h4Zh>|FG^${rgw*e^`JsWc{Jt7ymQtUseC$ z5vYW^zx zx$5q}rKZn%KfM1ZIG;`XWqZH}`&&8Je`g$4d6srlW-Ys?;(vySS81QN?PG}f8)d_} zG$zG5%YL`Dc;4!gJ=Kr={B8YB_J3ISCI5XE%t4Hw#E$Kjld4BGGf$-_yi<=6s{kZ?Y^-lOc zjm;No^p$?({b%sK5dEgg=Ubt4_wDN4+jbW79xyobFFNYIFl%H8<9;o}FZcgw@0I?2 z3!3-j(E{LM6n~>ln0;EkXhq1&Z@2jNmL{%_dRg{uq2i}K5A|=}oMfrer+&IA$XsLR z&$9k0mFb-&E9_J&&PVqjzQ_5Wp{423_lNr?<}bZtav^#y-@|pLroH#ilvd0#m)YXD zwC9jR%&gza|3uFr`MUo{xRv?$t zxy-C-t5@4ieCEQV%6oJVcU>~4lt!0@&#JndjWs*7H&ztfimcJNu*Re|-tFq6k1D^e zp2{x1eYW=LnsT;J@5;AN@;E=|yCUDUV|gLDfd_uc{}V-ya;WF+A>{(YYTwoKKddXs zf1gzk&muc7?%%ueKLa$2uv%$7|D!!u{CY4_S$3}bHAyPCwEi>v(cTL+`d>v)~VeOarSpWYFY~Snu zluiC83Uw`1O{wVO{|sEuUB6!W&+s9?9NKm$Ui9l#{SWiqpmq%7T-S{M42O1Hw11`k zpP{*WKe&C@EBtQ}T*XY+OiUHEm@2lTnt`HXvVZ@|U-D0*>OZXg;{U4?>d#B^3>n{c zWc+8?V)UP3xAj-=We+}7ohYb$eWh#x&$ic|QTI`lgW7%fUH&tynEsFV-G7E3Zy8c9 z5>|3qJN=*Ny7vDJf%<=>@BU{1)euoH>c0y8XE+EB*#$gV+R6V!x3#}q33V69qT9}2 zuIz_~8M9p1jsFa-xeI?~#Y4lb{?N7ye@$T~G91hDx`kiG(M8+#Ka6@&Hw)%?`44ML z?0vnVQP*IYIdwOvU5jKbsF{akC1a~=-hYNeTaoOI{}HxJeA!ZH#5qWqguVZxZOeW+ z2<1lx@!%FXnX}uvVpCF&MTtDBDJV)T?q+H)`Onarz5v+)V6XqZ>;9jC?Rx#6vIdO` zqzm}MO&9-=_Pn@NB40P`i@#ji&t1JoR-aIK8t1gUsjBN`2J>k<1$|72YiVGKubGya|3_Mc&1{g03T8KkzlGK3=Y?KkJ{e?I@8P5rU8 zfexbea9%&cVS#B3^1_xYj356q z^qJ%fRCLGu-BNO%Yt5yKruSZ#7H#gkF;Sc^!O;6J_e8Wz+P>3^)--?aelWeOM)pzv z&BG7$8~2Fju^-v3FZgoSoM{C&cip+E75j7g_DkFDDNee7R{G@OPEoch{^k2~ZvSUVVP~Ot;GCytLqAt_g08fz8Kk_zy;z?SEw7{7>Yvjn-z* zN8I}xs`i8x-WF1wcy3|#6rbXqZ6V1!MSo?xGSmcSulphV$o@!w(}o(JE!(zUGvUau z-|}?BMgv}Df2*2}-O=}_Rulk#ZkNn%VOBLkbGJAZa zJm_fpH?@D)ribSGF5P}5D0b&37dP|IhjRp1#(Z1vb=^jFaaNrC-);HtO?F$ae*W8q zx6p*^{3ctbdmaZ=QgX|5Z24`o--2Ae{?WA6i(c@rdR6qJ+jrabt@GJlD*Zn6&+fy| zeVaadt)0m+tM*8qdZbaLWVnjw=iKLozug~x@A_lCDr=V8e7o7X3;#IQ^F(He&vKb4 zwtMsDqJv45UQeg03avA~>UrR7l`O#{o*B>masSUSsq#_(Z`RoQ7t>aMb$(?3XYzTG z`WsrW|1-F3jbE9nu&7Iu2ee)jhP&>sx&MCt6RjvX17lSvOewK61*=H$Sy5E`F*ro? z`%zzv*_HKf>n+i)e7N#Y@1xuL#aY{vbRBL*91(fC$F#U)g;|s5Ow~|jmCucR?F&s9 z?_WhKYEv@{;~bNPvcwJU-tBom>mDDCHvN99Qtx*rRdUq<|j7seoQW8kX=5>Lo#gb+hD)`u*=Uk&z!3` zx6FQ;gQ|bviFX!%vQ_`^FFkJToh<(?cy5cdh{u6amDO)=Ip!GiIkE_GEMDu>q4x~$QWcE?y$`q-O!q5Gb_W2;KIV*g>*%~hNCZoTAm>O9Yy)!QCAx2oFZ zd`Y3c$6rHm;(*5K4 zZQ~dJ6msK(b~oy#cxNo?idZMHPKA&21*h^yrbU}SOcC2I6ysT0T3V<+d!J`l%KL~b zA6Bt5nI2LqU-_|;){-CwpPU**!Gg_5Cn!lkz} zxjQ+qZ(Q;CgYJsW)sgG-^%j0gZBm_b%%*jMz|pb=Cl4Nrw)o>0x$cK zikn~aG3Tchj1FZl7*C$d>V4iQ<{iFGTD8S^QT6vsZ-In{E z>cvfgAEiCTCq{e4v8M+8OLpUNIsPm+m3Ozb#}mz%kF&Ne`q38~9lX2q&C*TVyKYHd zh+!2n`>bxSE!d-Q;6Yz;nEc^_y{FEY&_HUUlG(uS7*VmD}FEx;yEXcIYItm^b}p zMqE=Z?-X$;{0c7nw_hmqjc5JZQ?pW+T)Qx1UB0SEviH*C)6P`tFg|AxZm2xYKIbaS z$IzoK?-Q#tzW-`E|77{MDlMG_N1w4B>HqYB<6!dT^`?vFNp03LlFz4O1%w5h#Qc<0))`Bl4mTlZd4J~sK-Can_&2LvZhd>!-k zedEJi#V@bR^hF+VEUSES(BtvD-Qm;zv|GMyzM|F2z}R5H5d4nmqN_*)T1!o;=VX3i zrpoMXoP1?H^LJcftm|6(cWWm2n7sFmpe4j8ywI50;u)*rZ->=?x&QBn{GU+y+hKK+ z|1;d@1}QDrSk|@p@AhBJJ&+u~p-w*pitBu89tdhUI4eX})e_E4&S6v33 zHhX_7#Lf$gR@CQ5)?cpw{jmPWD*v~?^+6}#*62eP54VcW|GPI8;^bTZ89s*8=ijXI z|Ibi&{Xaw3qKC;&UFZMZ-}GhvzrX82=Z=2+JAKK2hVLLZ=!uzVtohII_P0LB6JWjb zKTL?MFRTCk4eTr_ld$qXDq?Zd<-zLhA1ZG8vg1F){%lBKEp|<>KjfVHvJh(7;(tQY z`riEr&G~;+!VsF3P&6-fP5aMq$Q4DmwLDz6B*q<$X+NgDJAXt@_NvK7)ua1wuT93@|m_4l>^GaL;{b_ETisBMe?eCqMPpYi{+rvDSp z*8lmm@IONhC?q^mwY1*m1*}3HZ`1!(meN* zyGQHGf$yv3Kg@~v&v0J5|M%1SAFCLa$5q_l!ru_uVfBr}!v00M{GYtN#{Yz?k<0^~ z4zRWV&&M6|A4LB%JX)o)bp&tRhm@ybg3hh~@mGdypFcs4z2|CxR7f2c&Q|E#~`v;B_@ ze}?)C)&Ch(-r4`SEC1obJcj=a3%>vJyi@;i*O&7@e7F2(`1uePj_!ZdZq@(!u;V|& zzHm^AyIMc9_WlQ_tJdoW(kM`99h?83!S1{K_d}pu0rK1Ke;iTEOTWuy5(uv8{~0(@ zG}j+0TwZ^EFC?_YR{uHu?mt75*vh-L$7j_acB)|h*L?q{I_Ng37ylXBT0XGxvu)pVDx+cgVt5$@#TMn&)Pp+Xv6rQ!SVf{$>7VX zPz`^5=|96mtIPYJxBO>-c&h$T>GJyXoUkHjYy7kL@;?Gw{a(Mbxr~|w-2a%UH`u?h z{m(E7oVfJ=39q&OSKRO-6J6@WYy zz+x&2Im_Yd{vQ!h<$pfz`p>XW{69n23cZ@*!~*5ewkM(!W1faDu(Utw!0qb&pMjg- z??1yl^LR)s?gyLx5acD0=?(%HgZ?udI()hQdFyp>1tI@oqsjfB2nB+HAO%MVDL9Iv zpaji?`a}5SF9s2oKL#@YKG=ooe{^ci|5boazy#H#0n8f9|1%sqj!+=~VdIzcKOdGu z)0+d&qWS+BxRrhWGt3jOhXh0Yp%a(wpSMB`WH9Qg|D)4uj4(5({?JJzd9$waKRR=f zTiyTYtkp-D7{Hvdy#COsOQ3=fVypTeowZ0t1~3JJB1_xn zKg0a${gB%CM`Tp|pO3r$Gwc)o&(O7isVn|RWEE5d6z7ZoiN#F^RT{8N$*2Ju6r8^F zKf`(M^S>Y3|5z3OBXU;!pAR52KspvMb@_wM_^=0R>f(Q5X;VR^^?!!aU*pA*2o3c1f4f_w!c!){lR0LBROe>N9NPPa8Z4(#z-&a-0854-jZllRhh z&nG|h^~z2POYLWzzJjH}{{g4z=daZ}%VK#_QmrO!ipr?jmpkjq0Y8(MN6oH&z45I! z|FL?@7g?+9)4e`F3)dAWOxCGp{PflI!LIjjJJ)zu-M{fmcZpfzzW8m^DkW>bO%VB4 z-&-(WV%^f)$qeVGM9%o@An`A4&#C?1r!110URFn4EY8o~D}FcmW1Yfdo3mlxL{b(k z&fc=M{CfF?-_8&D-^!H?tldmW0lyc`mzl79MNZ0tU#06a*p>F1e5roj zYaX^~_m;Hi6>6SJ+Sdyb++W8psVZ5Ptug1T(mwMpqbkeTic@V9XH}JD-EaJ*&+`06 z&ue~*q^)zLzOT(*zV%nESjM8RACtdqSk#5Noeo)SO>h0X+y5D^`~RJ~I&R}0_w3?o z5rLcMMYo>z{N!z;780W^Je6n3$BEzDA75&)^ZmQ|?LW)tEIa0%Kdc{B_iD+y2l+~e zZ1t9|ZfQT8bG+=B$IYqYeDZT@&+u)1Z7)@NH~ZoH1NvfFyW&>7UH?)_=F-8Hcbug+ z`^&_5o$PIyCaEm(e3`bp)5qTZo%A;iV4eKi1J8I$|uji;}KKi1juy*CGALZd6z8D-VPE1+!%KCLj(vH5Q37WT< zH#*Ih^!z#V7;Et?_T&5=QT?6z|E@pcmAm#bJ}P=WPvxWRy;j;4?|AsXm2TwciXVme3WLsBLU0G^3{ll~C$GZBL-DsWkp}VcZPvOM&d==jPuh}Xlgm3>7{iESO z1HX;?hhNV=hc&x=IQnj_?XvZil~0%MTDWm%h0CNJM-t9VWGLEc5i|FOUFUxWj_sZQ z8D7{f`OolRQT#v8HPz+&c72R}-tygV#Y9{aBqmy2I&gFQ$CB`f=0tw9du1C*~0BH6pMhg}XFZbJpm2`-%ngfo zibUwutd)Pu_}IIeTYIheG3R4(it`Wrnzw!BkK8Rk*uVKL6~56Z=#eC>(fwAqU(M-~ z?3`&T;p_jMyJvO%PyC9CS-jpyrQEx>oOan#o&8qtWB#68)ts3*d6sz#VUvI;)`q7g`Y?^>l6V_ z$yXX1Vm^87GpksBe(NN8(ZAb|OgE1@Vzs|{-^qN?#=Z)L-4%HT%AY4^AI{nREX}j# z&kE`3vivPJg^#4uHf{T8e}wbFrmHr>xsN!%cF)e%u-}}=)3Gke&ck2l=Ui1G!N&8Z zU)297)v19J;8bV=-1tZ3`aY`*xnK9PADv~kd})kHr|6X^n}agPmEC@vJ#m7k$N0iy zWu#^ng?Im8l0Zr%l9X1nq#6ilbX5qnR_m~m&9-W zblRnIPGqAf%oB zPf%Y7bVQrie};oD`~OUKZF=5S!G3Id)VfF$o?Y}P73bY7qgJf zUh!QDN74KjcbF%~C;ewQr1~iS7yrxn9~#sDGfV;}TB{tpvgT-&z#k?G~c z4{xqJzSG?Up ztnvQn{YbveWIxBz%E!B-IkwvKtlRbL-FDM;^QMch6E0n%Sn_k~HN!T6R$+tboznT@ zrTo43G`C&1apn1E*meJ?neW105sEX>dQZn)~Kz|K@%mAPD7oS)V3v0vT) zp`&8*zvf%v|F~5DGd!7f`f{Dft0=dPYqF-zS~r#V-+Gmzd#C@-w60t$lPx79y-1v6 zm0f^U%IC9U-18MH>W|I3-(s@!`9&YG&&M-Y8{e1KcsA{v_{U(^!s1CwyAE@o);m;| zW;4}d-R`6DQYn`fUjFh{`_YUw|3vnt#aPERq!m8zEkq-?Q|7brR)!wp4RCUo0rvD5_#CA>H{>{RF zqf%tFyY1=dTsGVJ*}s_#{3q8;dak=ee&zm$d>_^SvVU3sN5m79TIPRfM+q_=aXZP& z`wX-0as7y1`r=o5|GNcO-|CopuZ(*6<;M2drMFjXPoE|!6)Dn}`>6ZO-xsRKo`}AX zsz07DaOaQvhfQ}YCTZ%J=D%s!B{og>&jr1;i_+7#%~G8gvHev1Td~iZ+*UcOB~7*u zu0JIE$p05VERkl`qd6-=ucr66dByjT?H?2_*@Qm|eBiBN!tt)ey(M~!QDkS?>8i9@ zW^AbwS~5CrN}Uu`Jl!UA@6OAx`2u<=in)}XSUo4NPPVx7B$G<5Y`Ok3L z_UAvVU+4b`ya%OJZ%{cg|IcL4{|p@Yo%Szmmx8K4|9_r~4i-OtFI=%dXv2qn3;(Wu zH0^_Kd*>hP`bQP2lkTzA27P*Z|MbRWBg16g4O{z9b?5Z%Zu^g|*wqf&y1(&wWb$Xt&>qv0a?Jb*BcilK^df?t?*BNR3=~Jgku*_fk$8%A|HY19&a1mrI49Y{H2I(4e8K+=7m7iy^_l-?vPb%9d7*vE zKTbV9ykE3Ny|84{*$02w0fw%nK zb_&fA*Y`hKmHN?j>fgKDW`Fo(Yb9BCJ70FQ?VcGS;#?|wjoY{?9Ne)Bd64NBO_(Cj0-0oCKvA#?SVL39PMkbxdSZTq*FQ~v0lUT7dJCqj zeN~>OeSM2}Q>)F>t>()_C$s!{$oOQ=lgSS9W-sMsYcf~;F%iEf_i@QQ{g0`~TfVM- z{hA{uH}XsM3Wc;S6WnDSR+}sgP}$wGd%MN~rM=;fKh7`z@%oY3&fH}`-d)q@j>_C8 zJMG@56Sr-G;_k%VTQcoYxEoKJ;BBW6nUxm4eV*&XKb$|T>wfSSYhU-`S5>j|W~Rzd z&1e6X^L}Hkx#>mq^+nmi{{-vd(H1iQ&twOA6YYvL`&-(Fw(k?EHoqDjab<1kT|EmG z-`vwM&%$i^*K6F;W6 z-+F)eJ9EUWV@!5eP0yY>)#a2MrYU8{z%OQLZ?R^7tDV{2kH?RmZ~gvje|P=XKdxr2 zt9@7hC_m9;|3-S5+3vR+rL>n_-f3un%5>Rm$aG zGBZm&p1)TAkJ3J6(1DB7|1&hrwEwBrU>E=6@VXkatN)C*xASdZkSS;0_u6mQ{*P?i zS2^<@n=EByQ+AD8x&3om;Hl#+Olj#lk%7-jKWwhMa8F{_hl3ku2TcyUvexvkilfqN z$64CF-^Gh|Z?>JFEzzgu>OW7c_XJ1#%k@88_rSv{sQ#0$nLlSmTf|N2n9B|cj@nC4 zE&rhOT=|s-^LhL9-@7~i$$MvOZ@D%rZR4uUGGWV;)z;N7A8wa?zs=&RdHKzGUtfyO zUiioAs*UO5$jqp;l}m1Aek;@waF`XcL`ma-q`9Sikf*J`Y4tzBc)@x|J_aWUpcLHP4Fw$F&4hr}_VIwQ)sJW7U=aN2v~M z(9Hh~O%Q`z{(u!3|7TzY8)P-xovvxuv}LEV3q$r&lP!lEZ5#R@cB+f zx5xHY%S_eFvY~ZL?pz4hPPr1UU%CEhvbD2%&z7>no*5;TPvd{TOjr0<%Kt~z>_5X7 zi&$N1#E#3 zHS(xx@;mIoN93;Z|L3uQW5vq)hrA!z|FW0thdW975kL4;U9gihn6$L~e>m4T{%3H! zYYvGqSNRWZKbrqF@5+b7dly5~eEHxn_NPF{Rhl1>{~~v}9^$B=^(|XZ@TFWU`N_=G zpf+3SVL!uvhV_;V-`>B{U}Agi2^w33ShpV%0-E*@xxvo44{?qNBd@6WAH_Na&|$K( zAt^|y|A*5b6!$PV`>c-t&{~1)Ci{n6s0M{~ZT!!`5w;)IBQ3B0GklSSxi^5dO6&L^ zMXUb|f7wf5PN+X5ydD(#zL3zL=pZ_4(pq#u_dkkKD7qNKx;Fo3;BZ5?FB{dqtkjtQ z3@!5UApb#gh*JL#$FJbCX`#tNgK4g4@jroD$g#WNtkz}!knKA7RNns#pxofGfU6|* z{13;i?|<>VyblRWjsFZS(s3Yz=RyOnLG!YI)b9Tb9BO+(p-%5)nNT-tqAM{8eUGXFT-b2yw+YMG>^N{W--?U|~(S5g# zUFR;w>OMYq;gd=GB+s8SCRM7h#5lc-akA5NfjvPBCYcAVS!2Xu>stBrKf}SCq zno8;>sVkXxpEdz-W3uLe#f!?Vi7Zs;Ka!9c5;{B-LKhp z;_NHeTDRCEKhAI!md%NG+xBJS$H?_8Pj5~R4sV$r^1k%h%unhE^_!%oZG6P-8yvnZ zyIMP4Xo(U3EH{~+z5{Z#A793&L^~-oD$9x_&CX5dUDVoT>i9-0>tnIarG1&vx2{!Z zOuDK3VpGgy?uifo_Q~4D-mc#M{1>Z8V4UL@MbLmMJ{P7xIe|UXVPHxxU>-wr6`#aVjopJf;=4)xbdzK#d z65@1i2)NjzlRJ0Gquu&~{~1n(d}RMSy_fqxgU`3C-~4;`@Gh)z{m{4iVZ31G%as{Y zOLs@zy=iR3schS<$g_MI*91p~CXRJQ`VIS7yB^%(6}?xyHoBtYwCdNxo3q3|s0f_% zobY5#;i9+p+Q~YcDpx1eEMM_s{?W;GXQjTs<^0dU`JbUE4%)&96(?a4BK? zmZtixcWysXJuz`?IPm3z{Eo7I^8XnwS^#&vx~lJ?uMLZQ2x?)rIqQsy7#b(Ep$tG!P^GiSH z*(E=-WE5fvGMwdGeI%;C`A_l3>PN@s7OwPvv|7LAm)mY0-4!vmGj>kYZ%o^!|8z^{ z28*6`SFLX_#CHCQKRSQY@TdGdu%|)m z;VCg`$8!#*oB0p;Gah`Ts~!`}Vzzu)#^J*kyjAbK+m!h*(nVOYXz~iaA4cu=hL$H6 z@XxH1typ~g9>>S!jqld#)TzeZ%=%`Y933zBx~lf!C;!=^emxP+ zrzbq+HB6g!@hx6oWAS(PKGj$I1-`y5kNOzi{YCn(*P-tLMlUaU+x_$j`*!?E!=a+Z zr*BVn=iTwiZp{b&H#a{#e%QC4Z|lSRe6^E6()6f*C+Bm%*%O%gh`XLs#cJxjMW)e?vR_X$U3%g9Oj5D=?59PlI}jg|0 zPt@K1m{nY%{qRhc>e02!HvDH0*uuJL-Z!;r9M8<{l|8ynMLH);<>8c+l+B(n-}mFW z-rpgA%=;g$Tf4ZTDYo9@T1hO^^Fsk=mzmAhpDgp;$fNI*^}9aS(1|HbX7c4a619>a zogbRF?lbz4r^|lyR>`huGexh2Up&tHOwJx?4CxBc+>TgH#l537#| zW&e}8pwb^_V7e(QYTnZF73W1#o-DFGv&l%);zW?~y#^cUhwVKzfgjByKByn!^*E*jTy+2~e}sdt zT>xFC7@J!&HBi%H-ZXw*9(Qx&s)%38|HSsU)`QQX{q&#Vqe=at>mSyGkDZPGr?#(r z(dOsri~r8{-u=h^1VOcZW%4(-?B0L)PyZr&6l>l8=+<-nXSfjjpF#EMe}+fd&#N?^ zC&G-C{ijnA8QArzdIq2Pn@>Ie86Ho4`fK_zdBJbVcXSmAotJ@ryeo(w5^i5= ze?R_t&=n}dLFl5Z2m?rp5{&2M*qHZ61XixAX!*NEcg3CSc5dCJr7DhVmED82_;qAg zOE(;E5}Di_D~{f1TlwRjY|88J!cqP!RbCf$XKcIfsJmy|*4kw*cNmX6^q$WCRJ(7- z0)DT{?;l-x^JrIVTEr5`KpTDTzQR)$JobW7Rr2t2V^@ZKvH$Ts|NH&_49+HuWnCHz z*iP?H{PgYp+uchY|4skT;A8*s)n-?QF!-H~@-OSZ9RANBx7s9tH6MDrW8e@`88YGL zee=0bf9IV4x-tWD%*@KW^~b+X|65}Ji#hbv@-nA;k<(HGpUFiu*gY;cVJzy(pDX_7 zd-K14>wjE)?)sm>_Q8LK{j2|(rtmGAjl?&pyzrml{1N%@U+a%G$hw-JYySu8$%(Gn?~p|X~*2y_eTC_=!6>d@t+Cf#K72lj){4n@A}Jb;q1M1>S1V6fswzg z$J)KO?En0UuD}2BpXte%KSm!!pL{y6yLR_!?dfH|OPW>YOg=JY?k{mxw!h#0#eaD8 zpW#sag8vK`^8KI}yzEO@zwAH5!;nAr=l@hi|7Un`DE^9}@NdGsvuX zlAix-~Rcf+y5EZg#I&pJOwhr24q4$)PyIAf0z1i{$qdU zXJ6N^;(wyyhtL1~vh6>^!Iu4hiYBA+``qMjh5WWZ{O9~k4X_tNYmg1otbZ8##}?gm zZzR*>58p?a-v1-Cp1&T+ah4MGnZa-VGw^@5OJ1~Z&VPp1@DJY+o?HA+6h%M6HPEm@ zv0Kv~<``IDfxHTK%!m1H-!HzrXK_h&g0Z$&A|tzHsfInzhVxr%6(hy=^ZsF9e)n)O zi`mEX+-oN8zx~8ILZ-P{&8q9wp1bnpe`=$X|LyMoxK?SF&6?1P@JUmGjWyQB8@v8z zn7=yz4~P6?_nC|SGn~J={T~be$JG`+tF*(iT~MCiWi}pS{@s`D^xnhDPQ; z8O&N;S3j4;|NOP}Kf{5>eL7EYwn8wt6N+DL7~1^F0b=H zgMHZk9|HEr8jMz~jW>4z*%%MDarUD6^H=1cik1 zpA2R_sC`@hGaQ)s&!iG;U;P6QP(YkswB_H$h5s4O-(CKXrT^pN^B4XzJb!ojKLeBd zj|*(6qRXGZsQ>&u^FKqQ@}JB)B$W$zc4-H{m6HGS_ZGw`JK9M4Z_SAR{1f7U3}&OQ z%fG`~|1;Q6oBu<|{#ZXg$;oe}#Q*$@0=w?f3TCseOTXRVT7RsvyS%Xe{9BN(|7kTC zXROuV@*@88Z?Hw6BT3&{iU0Wr@fuV!*xqd*dw;C5yNae61PeT7QpbuIF?Y*veUG?ghZ0+igtFsN2?`0OZZ2xU}W8=x+ zC)v+P3g{*CUoLp8(hhgW&l>(KSN8CY?NnE7K>rajjwx@_eBo zZm?ywz*L@|c}WWrC+(>@P*r64ea(6C=)Lz_w^k_kE}P^X8JO&PCt|7%NpEXG5ysETKD*Q9IjxL=pd zyOr@yp>XOnHJ&xL9S^SSm#41uUYOx2^4|7Q?Bdzo-QhpO`P(YK=Si^eMcXYmZ8e>@ zStii!(%l=I_ntnlx^&68Wlv6+ZCaDJJTWf3F!lBBSz>SNV;R1z(muC7&(z~tkGc%t(-}1&h zyZ6@RZ;wkZz4Z9szQ-3|tP@PWs``=H=x&yQ(?Z`Z^=UH?AAZs&IrB*3_q=mo@6Rdj zy86L!r$DUAqpw#Z&CXUnTRGwJGey;()3-UOR{r#lO0vp=-tu9Y-#O3RzkMI?mC|j^ zZ*_B;Q>?eWPfJ+E`Nrt5>1UDoTkrS$x#Zf{w|X99Tax=f*Y#O{SNQ*liVgq!qW()w z>Bs9W&o95-`r6af)NO0PPov|{ly>45m+{LBu7$T8O~^{pzWjVr z{yR@=<&S)3=RW?BIW|w4H}nvn=HBgj*Yh{Nw*PRw&g?(Kd|ml}e|-Nl9M80Wxb9Ep ze+D}%`@cVC|7YmCuD#^dEs^l=Y|q|%WS*JUx~^}^v3nj-@_2;Mc z|7QTLV0yguT2x_|ITwarbQa z{@ovr{_XkJTbnSeiC^Y;4U)&CjV_I_Oc=V!+K?~n5TS-bx+{oY&u{5;HQhpz7Ne{i~2 z?BBIZr*2ty%02R!yXLA>+nzg}Q@g7+G%u4+V9pdh`^V>sNw4=2Bj1Hv7hXPX*{ho% z$QtxyokNrU!{h9g62FgaIrMaf)a8JAg2%pJy}xw7^*@%R;xm_8%`K8EXKm|TxBKrk zyY)Y~KhApnXz8>CvwD}9&1#*jP@LyK?bX9=6F61mr+l|veQovA_!~DKhkjK5z3u+* zuzjo6pV|CJS4S=}s8=*&b!63&{|v_8>qL)TKVvj2^!4dkdEaZ_ylFx`zVgUN=d-eV zbn`QHzZ`Phc8|@q{K0e9o8gV;RZ6}-+E?-G@iX~b*S{_O=>F#V$_hu5`R)5Z%*yU!ZGH6nucqGmExnpwxqFT7 zJ?HwQQawv`%6gwBX3_6QGqSB>{wenJZgl(+^s0eKvK)fp6Et{xf84?LX7oCRwg~B5~p!fehxe>+S3-x?QiC zcH71uEHk_CVprSL^cDWO>r9TQ{@Hj`_|USOHOJg0KM&Jjkk_^=(0FlwtI4gme~)aN z$9XmN!%wTG=&YuDUqfbx%-nwFZN>(bA_>XRNtM!i4J%IPe$fqo_;ub^yY;5;8%#5o zUR}Ov$DaJ{(d%ccE?zV3?a>RjH%{~uXL$VV#MzX?AEF(bLw8&eet2&4vs2go6mr9i zbGh%dMRzJT+^Y=O)8uJxleMPi`15C$43G5oXZ~jp%)BMHc-6fMu5;Si{M?R@B@bmk zy_uu(nOi|VO>#$|@Z9+Y{~69Z#_fJIU;018A=5|u*)sMg{r3N(Qhxd7&wqLMH~%b~ z9Fuo@%lq?_w@XUgoiiul;r$woOQpJc{~7#t{3u`TnO?h4y=|THzY7idm#0`n80md7 zP_3UM@Snl_-h;6DJ$qzqYIBqMHb2-~dh5`W*jq7?uL^G7i9Wq?a!Ijs*(6PkcguN< zB$_8=FrO){zcuT*&!sPKJ${5~*BU+YbyU4#%IoD_?P9ilQ*>q0gXk^Z${js3<}P%Q zV0hc6^q--lK6Az%%a3ffLb*zp9((+|8mkul+Sl{>&emJHkKEfVq*P-5?2ap)JfW<( z>uUYY?}vGp+w|3q#H)A+?f?c3K%_qMt46WY9@7kn!^5CQOM?XY6w};I8TPOQk&6D@E4*#lS*Dlx| zNZnD9U_LKIUtyBNd8yU1@6VqpUZ{RJ+s#ANhIifVEmA=*CVFSDI?6Ng=4x$@tC!vw zSi1f^c=vdmFGImRy&qRAALe%$$Hm@_<>!dHe<$;7e%6MEXO~Sbt}WKom9~Dm&2?+6 z!MD2wvGMwk)Q|sX5P$JcbQ1H!8zuZgaW^7gOF7+qrCGK4_UzvtYHh(P8XLX4cU*6H z{rgbpr>%Fg=5xMZzsLLG>=}CuYuTC?-+g7(Gb?!Qn)BtQ6_b~&`Ly?;H2(i~+>6O5h&?QPuR%I|QbmH5xB0X}CEGKSliAyngrAzeva&zCV+&jO zrVsfYlj^tgx_QquT{11Zv&Bu-*yDTavVissPi}=n6Ly9w>+v_m$!*==^i99b{AH=w ze)G;t{@bEIuJmeEnJ=Gp@PvKIpN3XN#{dU*8&JZ~G5MPZNf=3w{+;*O>x-E%_O#T@ zo?7JaB<=6AJ>JVJ&L8~vYL@8U`F~WaRLrH~+C~2}%w(8X-4Q0Kx1M*`Pl-q_QO1Kw ztL7yeb$$7t8%gZHy%3*?)@SY$rcuI@OPLqn`@%N&w~NZ%nmYp=f9PEI|82{^ z_A>vQ-CF5r(@P6ncV-t?J~cV$@b|T1O+$J5tnJHd3^!gXxgE_N-~MY&*|c3Lw@d_Q z|BybtWy;D)2EG^V4P8#xzMoQjU;)3`j=#&l-p-B{Q+^;mKk~|!lI#BtCKgxMC~lt2 zByc`IQBioK5L3@0!_9v)&t}KgU94(hj=!@1Azy`!<-+>3y?>9s{n_+W=$%BW#nw;$ z=LKfuyG^Wnc~JeStljPp#acK6M!m)Yt!IRbL;d8`**+VeJM z@icZxWz*L5-?lf^drj9?eLlEr?-lXQ5vOyPI#=@UUApU*vrNI_L(`iy4{b@jUM0}J z;IRqg(I3^{I?$8X7cUA!)4Q@neAuCyad@wpm@lG`5VlurHq5whc2yYl}aG zJ7-6Cm;K0HX=8lQcJu1{2kxs}cYEiP_P*`&$q5FBHIH3lUgB0@UbcD1-o0Jdd@sy6G_}QNoy7Jtj;td5v)0&Ie+--cXm0QU@3p7byt?@MR_Wep z>nm9}jiYRL?%COwDB>;WQucX4X;1RG?xw@Kw<2ofFMYYsBhTmf;dg`-m>uSm6I~e(Sb6!7IT5m-le4{m+oGa%S(_XRAf;d@@<2 zS~kgZ-Q6X*8(bF7n6zqlNUMqfzv89(%=5w-^*md0gQG5PsSS?Vxn`73PQHs8z@S=0M-zB@e1mot{~20qA6YD#Ww~IQMvc?OB@<37PJECW{Xk0S z%@dcR?!6xt7oL5c-}uXP>EEe$;`z58$u`?E$-4gS6tnZHo0OtYtH|ekdOFFpOLvpU zn#ZS|=d3uuF8SN$Px^7mP9 zomsPL?baE}e_THXTub>Vac%x@O`E)VBL5j$^50ze$#&<=wq^Oo>2sgw=UZEU%lTPW zFIsLkpRHuAm2}3N8s>GG{~6LR`}NCAi#p_}f0L)lQF5X_vzfeV#H7mYAZb8mH`=@wsu8@^#uzB_P@${~DTj~XQ znSI(L_T`%WK4SJc=I+u}QJIS(6E+$2tN!_^D)!dwrrG-QO%Fsx&dPtAe}(%-=EsWQ zkdMkYZf8z>ZT>B+Jnuh4#?Ea`WrjLt>lwMGbG(tu*foLi!1=;Q{~22L$?y5_g+1&8 z-{#z2fX!QFiI$z^MGq66yQerN8RY2yDO{P1`2ZP)cbd=6(5+1hhp z)*{ut2euv9{@X?Aje9}zt=&GWp5)4W&|seU@%UTC!sLhi!mgQ%&t^ZeRXcpd_?5uz zduo3zO@4T*ygJXlTGD4*uSCViZAC}S4z1lUeq7ga;kB<5AGS_DSfzUQ>fCRu|1Med zG3u6D=91`OiLf3k5sB%QHXR?MXH7qJS2=v|$J)Es5ASvIo-cNT*EniQT=eSL+a0$Q zTDm80ep0k_a<+iD`qLO^^+UE|xs_il>*QW-x?^+f>Dwn)l(Nld9DOR;yK>hjpQU#3 z2Fu=*FW|9nub0{5_>t$XBYUIXmfzbhPY>Ks{eIi+C?9T(PpR{&H4b^U^LDhHa>y~4 zS$qF@ZD8pR-{q}}J3pM6r!FV@?^12>>7C+C&LWfk&OfPqDm*;!_t`TJ)=qP;u3l6l zeQ}RN_rf6}_x@EjZTK=#IOz>8ayeBlkZ_$US*p_2`a?4F`))Du|~2U0Eut|L~2S-qk(Q zyi4c)YhL*}Dn%gcS*BXHg;X)m{T+b<69v0=y+3A`%FggB@JC~P<>SBphi>KdvmUu7 zl{a;jXJKJYQZVn`vsZ!|SaLtRP3w73QgqPMCvTng!~1VOF8!f==-aK>=gnK|1!DB( z+Re22etn{iKF{;-uVr`Nvnk-TxTnlqW_dP0L!P~4Jx@%`otO8xKk9^+E1gZ>HT?)% z=F@0TDesa@xi1@C(+fAuU9%>%S^uwdmPfX4Fsi9j1yUU~Gk@mh=wtsG*uLa#R^w)=r^*~6{UrkCGOx*ZuE@m}}J&WLAy z(nlhleojiRP|346%1|U{`r~%}1AoD&`}gj67cPjto&CK$EWlZQrq!+PWF~C^4&^fi zmJIKHb{?0q+J1a*_Tj28abj2hC}#8b=%1bcEp_g?4QZR_re}s5#Yl)Q`t+xhn^E}V z_RJy=*|Mb{&U;;zQ``MP_TAEL_uk!|l)XaMELKbRNZ$Iw_It`cPXu37nobr!D9fd- zy}Rs#+}@(^Zg=)Ry|+z$kH2EK_Q#hEkaTkOA=9~{2mhv}l*S!UrEE7+qv-&{JWD1YnZ)Ba6`IcMf)-+I^F z`-WTaph^I{WcBZWyD#fF+K*n}C+P9TH~R4Re+n0(Y=djA2j0FlduiW8n+dwNJlr0H zF6BO9VZk09{^R|+7tHZn_I~i6xh8Ylth7bje${^2UML-Xa#@F`wtG&BzsBuTF)Be1 z5}Mo!m+fcVW2U=RI{3ru@JW|`9eeGQ9a)-Mk+5y2#60bavk#+F3lDyBDv@j2y2tUu zsl!Lbw}m=XgTb6&q9b z#;su&zp~EFSK6_+sZy`z&Cjzsn_@QaHhVlRP5pk?gNJ|Trd;{@{;j;oOR4uAS1PJb zA3n=}Yta^Q!RXUPPuH=8FH9F&dx*XF%0k1*J$6?a(mFq055D#%c~wQd!=?J|TQ#?G zuhbO}-XCqDw6W>D-_#pc(VLm#13I(}%2emfWNKKw{ln`c`_w)jfAf05>q)NzGVPT2 zUsm3+kpxgC4swbX(QANo6NLcdkB?&?1gHtSVT zH&1T9bY-iAmZU?#&U<~gpRn8E*b(`1s4;I_f{e!sy`t4Ew*$|5MkXcQIto+ia+cTlF7**}!>V^$fdR13!OV z|4VeM{qO$_ng1D%zxZ5=!ezQE8hrNJ{ug0a{xiJ)yZk?c{9;t@0`8Jf+h6zpa^0H$ z_dmmy{|t}6>e_#X`u~{wpW(vVmG$?3`~S1|@BW=!TscqrilVe1_t~tX!b2Yo zpM3f9RjENOZ_Cu^f9fC;Feq*tY7(`0qj}Nr)UruRKr!bejd^+v!)H>&$mZ`HRHy*IfzOVx0 z(AYb#cIme5&V7<4R&dXS@4;q;+~rrQ@ zOI~N{y<2|c{L`aS)5>lwnB2g5faMq0`tnC-wT=f~e51BYvwU@@)GTl0UJr ziO-)c`Sz?_LiWXo>-%&!ZT?Vuyt6FEZZ2$#>p2#XF=~chu(G07yih=qAb*;9Lc`P~o;N`>Trt@Yci>&t(JYdzQh)&1lC z(=YwkH5KgsO4{|xi5{Abu7|97hVhtpBt|NLA2_t$@hH>=;=()iC1 zW$gd+_YeI~{9l!o{xi&%@BYtV{8;}->e}jm^}+xC)_)7BKdhU5|MRc;-(UY{*tC9g z#^S#jXX~H8f6$Du=Tm%RJjecrQIWAGo3?zP?z2{>wX8?O+ClE7{oJanBH`zf3QY?` zQ=``GT&jEX>kk`Q9$tkdPjTq$MY>F@p3QTJ_HS#Z^-(k`_u4lWX!r3 zPh-~<9Dc?4M|%nKiMdzLl*zM3|B?Qo`=R@h{#)fobKM`Fn_Ri{2a~RP@TE7&-I=Zb z88#m|nq>LY`;^F&=Pb|mHC?RJt_k>%-(SPNw8r~#jp5(vk26&_ZLR0$Zkg@1H@bXB zgqqQA&a0C&#m_j(NxGkIp7lb1f4Z!8{53lx?{)F}7A~pr+_q)Qt_^+inr|*i`)?N7 zl$U#^?uT`FCiD8Jb06y8{Cwo`C-tFj@$q`W)Q@}O1S0)9GiLdeS=UyPG*HbnmIyo4QvH z-ILyEeCE}wl#G+@r&$C4Gh8#2u)Fx%?N3CqX6?juu** zJ!`WxcrDo*-O4(HC;Y;E(S1VwUAMM3-ufOKVPemp?YZ`DX4>^L|1M6`xD&nDGC?rN zT}W{2c9Fp2Q4Bxte^dBq-@1qOalPOlr4QeGzw}Gru$!FvDBJt5eo)_++NF!VYkeiB z_Hpj$RpeO&v8zchPcjlh-lvu>%~x-4>KE!Q#rq_eH( zbDr%x@$vkD-~K`s?;n2eE{Wb@W4iiBaKN;(ik#4Ezs_YuX?KS)PR{$!aCbxT?#v$9 z<=f}{i2tT}Y0G)ujD2#~Z5)0*o0$H8A;L(v$e! z893-i)ujBG`r-A_d)(W8EMNX3(ZpZ8@`JN}`?QB?m-HTQtDeYnEG*G#OQF}kB$mS( zKMRV3{wV!e{IU6wX?ydYT>W+%vwKV*-SmWgwoG%mT5?)baNfOVkxM3@nlnN9zLe6$ zX9dxjtM^I$oonN0-?c}7)gQ+nE&pt`d{`5#)}Q!yqHKqeH)rGWU5Uy+KXuH#BA^@) z+jr-xJjcJ&zuvdK-DCf7b${oqx%Usx=kq$hG{fv#(zOo??j8v{_Ietd1hW4OahrK- zc6TxhbK{1;8*F@2Kgu7H@>{>U#%7e%O4m9@;9HaNU~ zy8ND^=h341{9E@6znrDy`RLgNn~*#^%ah9w)EzD5j`Gvj6N-GS_Hvh~zb2nc7Dv8u zldZDT-@Y2pB^mL2e=Pnp2q)Ha{Sc1abal&KtLujgZ|^;S^T~blO4i%u{6&v2+#ykF0P>iUg^v&Ki2hbncl6DuNkd%iNi}~#qpVw z!)+ozK7O>Ne805*AKUbYq96V<9F9D_rm9C=zj&3)t`hS}PLisjtq01F?2eH=VaB_3 zw$tZtPWyLW)c?aXUm#EMhjZ;pyM^YhY*Qt73hq6AD`m6y-Jj8AQ#RgyI{nts@<+S9 z`y1@)`aiBa{JT8=kIwX&d(LnC&v3%}%;iO!UOub)E*JKn;la7Ldp9}Q%zotlw)By% z{IQeEGRx!mKh(~-Wjk%MsNQ9So2ErMl{`w;;7zHa zt|9@5akLCZAx8wNFa07G_;WFO1z0wNLZ#8gjCvQb@77x&a8X)qUk3IoHFS)U(&cztA+{So`N z@7LqhKI(V9RcQK~=IeFtWa1{>%vRO4Ym>A*f4Y6Me0#R)dv8rJdy|dq+8=GNIM=P+ zvf45~rR`?1@|ui@R`#9=)*s^%AMy9@Q~mJ0;cISn@P})2m#y8qcI)3|3)5F@Te{qr zXY!J~wF-SgDxtZz9khx#(he>MaJ zpN=g)C+IJGBs1mrv6xc}j3#g!u)1HEc3*Oja_Pgj{!#xf&EmE-dz$lZ@vaR!nx0wC z&O7n%yh>8g`Dbbe1g76Hdv2QS`D63L*eq-IZC6gszL}@cXFWS|!9>+Y?z1v0m9$-6 zHEi@`+Q#Ov&@Q~*e#f+(YhPS{l3h_`Yi7KvK0WhO=I`}Ov*#OKUGVw&cb(@xrP*Z47e%ht7K=l(PJU6l{my8H6vu*S7H&!^72ddF@3)_@JCWpb6vd5?)a z-n8tS=YNJUsq)!^?4fq88(Do|JRxIa{mVVm;3)&{%5$pGF4fr>-xU~|3T-@ z{&@bMA+G-o|F8W241M)qU-Fz-wDms&TV8Y7YJazF-y4B!eS?w4+Wa>%^{4Cq6#r*fA6NFDf%{Kg#Ke#G%$NTO{;vP0 zwJ50YX8b!P5c%t~)hq3O&|2D$?Eku&KxZ@N|7SQF_@9Am{UiRrqOAWJ4%OZN&(O8z z?~?sf|H(h|-!lEv{=e6#|7U3VyB~Bc?jNoF9rj;B#Q!sJf;`lp zssFUT{b&1@`ct6w!PZ)2@1OAx|0TYaAACNV4+-YO5GST5x)$B4Kb$WU{mSgp z-5U4bv0I8(d*}acWvX1kp<2#%;>3v)FK4~~&#+pG(b&-wda@oou)SZ?A`6#?$k%E z#JJ;O)ja(zvz{McxW_tu)w(6y-fiD*Roz`tbNR^(pY;lcCtLUrge!l(IWhkKx-K)#P6>^@ZL{@fv=09= z4dyE~wsI@^dUIx-x%T;0g1nV?fJ@4g$CAf$QXZr{c~G+UEaFn0)XZ7A+xk;wmo7=u zuK2u_TkM#p`XtX+zF9BoKfb&Fe*eEV$g*R}eJWe7tpCFOyH@U(t$D~KvFPZX3x!pJ zlAr3d^d7c#a~FNM_Ji_~UA_A|d#dW&YUJgXv#yE1DaCu|Qe@0|*-Jj(i-q^B23Z{5NveB6@Di|z2kDRy^O zCp^wg`Z4{1d&i&Hhq9T=ub8~mJ{omy->fbBQ>J<9u6dYUvT2sa6OA=*a$V+Ix}4te z(A{+NNAt&L&mUU<=GirqxpEe_u1R;_H$NhHSX;cdUv`SG-0x+65$^<=xE@UM>Z-H< zarDt`dA>i9yDr#RFFP*ZympyKsr1{xW`C<&M+-a}6Y{5q1s|-RIfai|EHE@>P z@Llox`;+}2em~S3{czdx$I)+Xzr4O!ThzHI^0e|{*|jsuXD%w^IO;L$*^`C^z55~_ zjV@cqTN`oy+dA;yhGd8#Rze0-4xYjDX z>Flb@j0K+erP?|<9n2;?XL-`QSkmVEvAg9*_sRXJTyeKT-6uBsh;N7P+E32xlb?jg z-`({za%$fPVaDx(*H-ywf4MLta`n3XtN!w>_xo^f@{-u|rOC^pBJZ4^sB^(##ium) z3GWnAo<1pizI)><-;8Va&0D?y79YA5w|M0()z+_fxNW9woXOE$a3c7blAVLH^+dno z18cjaGC%EWj#`)Z&{ZTzA`s91$FR8V+K0cT_uiE8Jgj+8UOQ}Q!jEnLZu@NRdiLp= zws*>e{8{B3-4h&6+VFYE_cB!SADe&c{sH;T_m(b;`0@Ej`7NDaQ+!PqMr*HbnHt)V zW8-mpg2#z-ZV#%v-rD$n^g4Ygp6~si>=kkG?u%}|JRUqjq;$>^;fg@%#O8?&A`hNC zc(mv7WAg?(@js<7&m~2lPsmV@xmsiM&-lmMg%#bN7xXR%Y}vAT zOV_FB*)y(mEZIH%tq>=d*gcj^q3H}08@KJefAjkx_e1}req@(Ej+cH>lqDzj;nvHt zU9OuQ1-DG{co^w&S?80^n}>q3mwEWNbA?l7-Xb}zl4<9(PrerlnP;}S@9LkO zw+c?#Optg`IlKI;@8MaWYO^1_Zz|RAlv>{T=%anJPt>}V*{fs!Gh}3%mG5dPIy6zS z@7_CiyO2BupJwT)I!`6JnYNnhH^oU@d-`$dv4tP?{pUSDcI|TXkIu;_1n-C{ztQJ1 zS)=sZig$+fkyU>GDjDRC*k{D^teah3skeP)l(eNnTeS9Gt8d(wK2O@fvbEyKrOMVA zwIkMDcE(#C9h>~|@!_Z&JASL}FNwQ#s@Ca|M|*9M_-G*EsR&K{hkS{|q` zr>%T+yI1V^+hrQ>wydsL|6c4JM_$-h^M$>8n#>u(xsK$WJQ=X$+Twqg=W+I(57;yP zsJTJJ&8V38txv9P4?JkQhh5k^U)fFUVX(5A(2olx>-iUYg5?|o;=U8^3vFMF-c@o%%UPiN2e zTr9+toUfPSrggwff@yhn!iVfLCiidsJM``p?j^+iUvoq+^FpznP!WQE8u+J?-xP z`m?*=+8P*cUU^=oy~Rd!;g9&E+oZPa7chxF?Q0)&J^yWPXmanNSLI4iFFyHv!?4d; zfk(7x&4PxI%?xE9{$2XVwR4BZ!jHa3PTY#Ax_swoZppO^kyairO*XfDdc>nRfl*mg z)$4H?YgGT^@9k0)!MzAuWfzpj0&qvzz)r{&@f;zdt-4_nGH|6TD#U*d(F^qzUY zejWN9xpnJKvuht?V-IG3o36=px@cl$;rqna&s^!3_B=kgpX1AWhS=z=UGhiH^W1uU zA@)^}PP>2q8>f5U-<&mJx)aOBnzE&H;-52(6ApgzT)zAd&&84(HrgWg!e3NpyKLFC z)oM-HM8}m=n}Xgb?dW=Fwq&N_;;CtYJ2x;SX)wP$t9>*mTj0Fz)~i1v@>I9gZ*eza zugMGCpu!bd#3z#)k+4#sIWF_z`CEC%gSTGMFZEt_>CK|9OOGxl%zO0i**oo(@07P_ zb2(4ToWwdY$XL1N4F{vMUElh3x3cX7uYXx-Q@JQQOZ?+q=gbwiruc7MGP^}RamM?K zE9;_*+O+5B71+$(Be^W=pZ#@HZrObA4^!7oGV%ChSzK6}TPnPFQk=3_t2)b}my9GT#m=V8tL z;6DHH54+wU$b5WP`L~v@{)wiSdlnb-El92OefLl!h^1G0jicM8_GbzW4Ay0T7uQHl z+a9j_=)<&cyIS8(FVvQ7I$D@1?OC+O|1Ns zPb|*Q_*O2J`lWnd&d*OGQCpmnpNS^4DDs3fcuK67-0*kq9^o8Qk=c(_mu$S3A^38> z$^>QAe+t5{EcKi{^>6AHwIo#L7&%X{vi!rmJo}z@_mO`}KPE1?|9WNl(i2jL?&{{Y z?(La;YNAeVp>lrt&Iwyu&wprGEYnavPvGz3lKsN3X0^>NtSnu6?_h0lKJT8jh7;F5 z?6Z`6ma4JrrhSFT&)p6=dgU@9>wkM!ILRvg4&L!m^zYoMa|C0p@7}oQkkaHSrZP*l z;;^q|x#K>A)1P_f2*xGdXS0!*r}5EjXa3{4M%Nx66*phGc8$Np@3Q&D4E=A;)ZCL( z3;1~UkM$C1j%(bsHi)~J3ZSqYm7vU~=A)9+It;cP0LjPT3c9w>4n{dH3 zKjIHZU0Z9m*t~bk`Yop)McdX)R&mcMyQorloBv*com=kxdlUQaWzXF5!}`H){*Lc* z%FRdi@nvUzJ-mMB&PzGZuZk6w&$#sS&fD%Y(#MoPi!VCCX25vs_5H2W+jDEUS66hq zT*==$ed}etQg83dN4qxm{Z%+H!P#UZYwePT+vcL@mA2jg)_m#9&-{m1wrVfk*&e#% z(t|CR9_`t`bM8WyO6ik*vi})03L;jd#H>GmPWyn3?T6ew;UA{EU9d_1$nCOW+F{o{ zm*1>e`?zKAl9bsSrp`}F4N>Sx>`Xe&w)_4MN0YnSM~f>KJO1ch7@2)`MOL`s z-Y=ZRdAjezZl7&k9NlqqhUpv~6~4V3T8<0&U8jHR`LLv1%wwPOo_W&eH&vEzySBYJ zO8r*Z-#Oj|9Z4Pgz8si1{r&nCXW}$|TwHZK`&NAW9>YYlt&g~$?NyQudR1$5&tk33 zLYp?X)|+iVABJl5tckbYz`0)LWBI{qe~I`lx7v@M)egC9Hn%{}G`nQS&c?SjlMXR$ z*W~fmJ`~$@c(>8Xwqtkqe(%y1j=KJ3zhFd-%ZF$4AIWO2&wcZJ*46awzj@D3iQ-HW z*E;ubLF=BQrIP+Tn_L;3ANk+9Cd+?NO20GVvT5{-{4FOsXtcr0ki@_61|JG%`2N7A|Czq5n;zCC)HGx6z$J>KAHga& zw%z-Oy3^J^?zKM>HUG$8rhnPKWwM2F(XrRLH%o5obKjHN#lJ^1$l&nPJLlW(F}}1q z|JHW#(TCl8A=~@zRqI+^vi7cCw7uYt$#({y%CgBk5`7*qQjgx&Nx%HQ`Nru-@}0Zv)?E|ZR+YMc zB1_rtH4ArkJ#v};{rx`A-aT6H%+_61a5I;h_4t5P`L{1>mmm3c=RaKW$NxVAXZW|2 z3GS*Vcckm9#Akdy@M-4F8~pl3lY5Ww1h6mk{OwiYC7peHrj2K<=-jz$uCFauo%`sZ ziNM6?-Yjjl+qq`4_P8F9nP<7WPU*+fFeTEa`eFZp_e?dGySB9du0J?^mThm#uM)w7i*Me(ALFdR*}Z%3 zo6CZ7!5SO$?s&gjmbss~Mw!29OFsXrdV#m5<=d{N@4sToncA7Yzw^uKUNei%&wPib zeL7_R=cd8rE%^_auj%a1 zmkB#J^W$`{t8X6e+IB7f=w@_<^NIyH)R!?>NB7@KJ~*p% z&35A-^WI%kTz+)*#z~jna&DcL+GN>a_++)mDMp6p>m7WvUf!1}vGx0O>YQ%TKk3(d z-G8fDTkpe2PRcG3 zyyd7Pw&>wQ-?mDQ`Ykb>Ge0*a_ByR#xnLJPcYeozhRn76E*EROD|1)8x;5pN>fEoV zT}9{KI$CtCWzEOKmi?zEtnOXAadw`>99D~1`){*8oV|5!a<_eejwD<1dpS$loqBlgScl@FR!+BDZpViYtu zY0~79zRfKq?I7!Pn`e!3>+Cal8D5?(|GRnh>f>eGcoS}wzS#fyma*3In>=-uY6dYi zGqjfOV#;d0{?_)Pwy)i>w|hKSddfcCdiPEy+x%Lc)uL;fw5Lrn>DUt&(xTnY74k&D zAp6wugMX!DWp3ZBcz*Dg&81hJ^@XX6UkXNKmK*BsKC;0(pY@ct0?)s^xH9PjKg)VS zOK$(jeE8l{@_v(ihm^^GhL2j;OD;Zf6yex^D|~fHWPp#;8dt9tSx1?5ma7@xM7bZI z%YKBj>{i|8RdLcOTc=L4);{~5_e`6GT5rvs)7+c3zYj2CSmVvGq+|Jy{|t-NHmXe& z+q~lBil5WXKICuP-FNHJBbM@M9PfPUyiY9BG+OnfU-!pPKZ~>chi|2~nPzOYH9kH| zDfak+%qoW8UMd|ymono#4b+2Y3Z(@Ye-e;>{(I}O{Rj6NJwI+$-l1=&`eORo#8vmR z0=)|vCjGqjNP30~f5CNEm%_E@M3Z~P1=T_xn=0;nEzkDP>i(bTjk|aJHa$1>TXnA0 z^ZeVha_?ttYI~hmw$7~bw(z7&xhwk3n?ACQjdiBk(9BU&~ zHtQ4|JL>lKsJo@_+B>b!>v(@eAKuTYllS&N!$Z@{rtw>%Gq2s=ew^?Arkl>$yXBJ8 z>nHD*-JU!r<>$YN56`AMUfIK(yy(Ydhf5DWY`B&)J#^1B?h|TTr!LG6y0G?0kx)Qx zj)CIC}uNU{~%sjPukKY!b=keIE#${t#bJi+j zkBAK#J6PZTvHWqd^rO|~qrorhvpY+2!+fZTX{kvzqI`dlCk%?AqZZh9lq(0TEuBnszA$`>FrJcg1JmIXS zlbB|3{MkEklNyD;ch9^2yLrX_`LE~y{xg{WxcGPTiu&_k&;R|G|8ZURKjC=(`U}-Q z{}~RP08O;{|M0D`{mT*>w;BYPbXfk&&`tm{GXxtKSTZOKbiL1w(|e{&w#@P?tj$w z$$<{%o%x@kapHf5UY0i{`)@~qd~gTq1OK;so&VLptAGEwzPC93!>o$xf6c$r|FOCM zXE+*AlZosfhn*8I)#ql}fBw%<3bsAt->ob4&;Pv!hvs$T{|ubV+xLSH%1)_&?ERmC zbACJcP-KYrnXTgAu66%c|876XF8SM0{eS*5+^q+PAd%TI(#p735`!64)x z*AQU5@Zaq#^)D15A(&~u-BkV;M=UfH`g_o9{m;;H9^$L|4|?_=m!CR(;${8zQiK+N zBsc9`|HIb?)z$wQT23SBUn0-<-23m&mGu{#Adz+v!P));9D~!__rI`Q_MhQFO#Nf` zKWcFxKRpBasSq@U&3NfQL*A?SF9NVgK;nSXfAWI*3&pGePE2TTk5&FZe75la;>+CsLtOsH<);ZH$78SThlJYgKbiL1 zOZCB__U%7|)U*B`gwl)u893V-L5JR(|7T!&49d2kjDK$W@A9Dk3=3NC|NYO9|8dE` zyI1zVP%i(^P=EUmIPDyF0G+N6^=GgAhZ!&AzsOt$2l~qWW=B4|ysQVu)ZPCK_d%B0 zgChaN0~G_$>K__jutyFzEB3$aCinkv_y5W2-fZRn_OAE8Ch_|3fA=5jM`G_>|HG#Q z=FR^MqK+T?klilDbOsdL_P;nl7D25=;yv5{(BMM-3-jgmpxE*RdA<2RgX3OnsMlAB z9rymbf8~FM3vTcK{b$(z<3j0M|9?&9V0Y|X|HJ!>{4d^@_d!8?NnRqu{)_bG{h(yF zV5y{c#9Z@l|Caw}aFpKv```V??#P^&^MBN&*#EM9c@OenraiJN9Db_ac=?}U`&T4) z=70OQ6mAkIr%3U`G=uI!I_&tLVZpt*{}~z!K}D3TKC*IBD&<49B=K3GiUnsiw zqv&QR*?;@j^1m$W?ce{d@0CYl7wbOy%l@yqsvMje>klO^tVf(G*I*U@_HXdNrf_i3 z^#AZM_5Z~Qx&RUs=+E{))W5JFIhgBOk%C!@;iZ2EvU>Z63Rmr4sKQd0o&JUXKgYJf z!bhk6z+ppbfostx?wcn@fkwi+wcMYT3 zxu+&cmwBh;t1mnn`Qz+=2Ic1m<$ufQ*?&2>`tly%1wW<-TrAxc}l;)`@G@CuXP$oVd6n$Z%?@!b(|3 zc7^L)_Zh$3b-NzhANP@O@1y9s(cZNp_g)#@UbF35kEf>iWS;D~H*(*tIkM`>1jE>G zI~Tm1r+z7~WN)(HmaAJ%PrYqB^$WL&QBQN58;ijS_KCOlrF^*Gx@CRae+Dj_$aO!o zJud#yd=&n%^xQ|)5@&!gq1Sko~0jlyYIcU zCwJK$&r;p>-Lu@Th@4ok?svx98E#I07N=xYo;<0(jNwzhboM=k4}BXS&oDWg+qOD4 z`(by;wLG2NSr5;AzjRppgqUf#YQ)DaT2J0`&+7c+`FK5Fw$ZF@AiG`+#h=Ri~G(OcCzzb#16$4 z2VHeBE4(FG{iIP@QT<$^Oy=)nEX+N6B8l!Q_cmRAvU&IZneW%-O?wrn}7ut9o>qwbz?rd%&ypRGsMtyem>FKC)YM@|5T|L)1t9#8U(TlW5n zkhkmK2#rNu&RiOc8t~!=`^;;c{9FDr@YZ-fHs3BDf5R&G+M;bw9{7B`ed1Khw>t-T zeqXPS;Xlsbe)?nl!}*;y5|_J!*Jo~zs){i<(fMTi#LyG(L^~|LE#TMtu)Xu2>L0To z{U1*IeLUWy__4OXKWXQ-c{ef(B74;X`mU~<61^qC_)wVTWT9y}E!I1g_VuryQ=|QG ze`meWKIIytD|-qbyx1d~{J>&eUY?%)R)sxM>lW&+j?&y7(W%MdsaA7(>!yI*2eW=z z(Qtv>{-9HiX!FyIqWdHF-?u*zvK7YI!Vv!ae$Rh~Czcx}<1 zOn82T|JUao{~0cSx_aTCiEaIfUpmXogMZ2-{AZXD@SkB_^M3}n;Q8-w*I)nfZ_360 z3sfApf7^eC^&kIg_v`*=sG7g}&&AI(diTHm{!=I`zW9i|NBx)R;61Yc z8I-QxzgGX3$trK@cN@nq?f)5Ew^|>(|4BFYZ~A|R3(D(Pui1a|b=CTlg8qLG)jx_~ zSnlzAx_#(>hKrstz1RP=#Q)pt{%^9>>n;5|i@(PI6`6YCZv6?@{|v9M)W0-b&(8hF z>OX`2Nx5~Q|GxYUf0CVf|Krb<{~4|y|0)`n|Kr!S`qy9TUj&8rTK}^*wg2_w-=fuL z|1-!(o&Q_c|Ch__mkd|jx#f%gOi0*&Z=?Nn=j?;_pA3H(|J!x+Kf_YPYwzDm{%26V ze{}xWkNp3-+BWPF{?DK+-)#SSP5iH5*}Q)j=YNX*6ZxMZUf@5&ixt@%`u7k2v0r$! z@jt`#{|t|&KkGM+|MAQJ^Udt*ChLES>HlYVJN;i@+n(O~$1m;Ar>>8=wEs!O{{IZy z;(u95eKbC+?sxb4i^@Bm?|y5S)lYo(`9H()Zwvo3#Hs&haE{viDF4Tw%l5Aq{%5$* z8aDr*{Z{^8AMJlhU7asffBf0Z{|vGI|5~(P78u5KU;XFtF=PAJU-l2`C#!!Hzmxy# zx%wy3)A#C+uel#x|5qq`1vbHX-hXH9|9a)uJ+bf0-DjU!p8B6be%pVB_x;bKzuSK@ z$g=->&HpdsYNpfw>~sG!+?PMM{oVd2_ACA~T+g)s!nS_)p)X(E?>#f@-!lJ){Ie+g zkALi6FaOfCb@o5!sQABo>R+_3nf#wY{_wwD^&Z-h`=6LD{m*dw_`i;*ck_Q=m3RL+ ze_mRZoBhMTob7+AqW&}7@ZyK0I3$JdYbe|?od5xVg|gWR@j_x)!m`p>X^@z2H23VeS@*8FGqvLtB!Kl_~huOIpUT^XDu|MBb6{|q;L z>R-&r()`cx_|xfs`&9lj+?uNUpW#o<%lY4g<$tMZy*O~|Kf@347p4Ax|5hFUqH@Ll ziJ|`A+T;Hj7N@Pb>;BL2*Y&@(`~S7dW@gEMl9>K)-{b!b7dMCHE&k7-@HhM4zWe_f zocfl{-KqFuzwyPR{~7iK{o6I`^M8gD^Oyf;xSrkrSLn+=`Tq{^r|Q2y z|7Uo?8TyCwKZ6pB{0IKOVpo%KNxVw@&v56}e}*pmNp``8xoJjmg?~<(%>OC8mH)f_ zx0E$E>YoV8U)=x2XX?@={~1oqT~WXDKf`9#w8Q@y6gvL({%2T}vf{vh2BG*z@e{v_ z?0=FnefFP;@%}H<|1-p${m-ztH(>jJhW-=v_u`+2fB849{_C6jzbvkD*Z=s_{`cv> z(k~KE_D=Pm{;cX|{h!_}ef6KSy8o@a{O{2Ghf&+-m+pUkcmI^BQ}(}lYyU>`pXln! z@6rGE-u~$suj*gi_yp&;6JT&cNUM|1&syz1X_{ zr&R8LhSziE|FXGm{mB2vpDX_vUf;9-%d6eh{`)V#|CJ8~_M-9bKYKurQudS<=Q{6&Ue{J(Pk zXNdpspFwTat?9p3+kels|6=j#T<5=q_OJiRKe-wm^q(O#{omW=|Jr5CP4;d6T)*&} zsQ6y{=}$6$O#f$J+W+gR{J&P&+?Vp7^wj@u_|I@bZljI&e+Kzo$Nv`fgObj+Fa1Bo zh5y~Cf59JbIy-av*`NPb{Pa`%9-{jQAgR|NZ!scXQlsXS!@4#Q~e94E$`}|cwDl7y=MO} z>9Dk!{}~ju{eQig|4U%i>HiFk_8+}JC!bE5CtJ1ujKY70e(w11{Qugn?oR*Dpj@}} z-{blhN7iWAKR$H*@8SOp3%BOo{LgUW{<``d_kS5`YuO*^|9Vwkc=k&j{~z%$5-R5Z zbNX)o^^N_EHI?>PJwerjhm_B~=pP^qrtZwdM z!JbWBe6iD?bo&3P)&E`L;$@iHx#;F{ z)t%e?b)SE>xo@)0ApXqH@1=Lo6`#!vbdcO$CwcKJ=Q_1rm)7K^?)+5qQf-@iX3g~b ze2?7EZtP4we^*_Zt#gm*k{Id9dQ(#sckb-^RP^md&CSO*pPsr?b8cQtb;Kt#J&s%2 zUR?}p6PMn6>1h-h<(M|v>${Ndo~*_v#na0UJe@Jg(`NFuH5XkOKz=2JMg88i?%Aw! zYVjGHjSRKN-eo;a$^Oq!cK_e5`ZsGGr1q7>f4;~6``7-*?(mc2ub-6Py8f;AqyC%g zgFZB6+Vj^xG`al8y`bbrT7Qt)x#>%KC**#LS3P)Y&gZRd7SFHj)2#R%{G;$g^pQCI z$$m|~$zB(q?8_F^tPm^hSM`1%EVVuCfVHEA$UAMh)6Wry(wkI5kB$FYe{5gIHGP}( z56c5Tg#KInxbshH?z(KTTc-c!mM_0`P-y1iq-bNFtXF~!Nt^cU>@ip9xq82~UUsXz zoPOhyYc+;^jr=`Z+^>}!&R;qESU@-TmSTQ2+m*lucPXwu8MwX{;PHA+BZFuy(a87sw`X2=yB~Kcje?`H~3ra z^?r%Z-)fa<)!whOm7n{h%s;m6lWGfd*H260-te0%x}o7zy7P`F>Qm~1cU&yk_*H#A z{~yqc(3#Ib@y*ud*80-Z>LQ&FN)#x zH@+KrSm|eWzOvnoc!p(nk8Utu{NwDZn_08wO<(&X%GNyS@)qg6c_Gaq(H-+o&NwD0 z5r_ z+8%H9E-vfMPc-tF#J57 z`?hxq4t{Ixd&r~UaLM^I?^+EB#SV*;1slKi&qeYp==6N3Un}l6{;6GA(R?&c>BGJ4 zi*CR8wXL6Z=}ocnXP+b!cYJn|eWr0D;erR(Tjh5Xj@wkcZ?iM_6L9tGeV%Bwxl7{j z#%J5E6`VRZFH&dXK}GAyd5`=qJxqSZ zj|kk0&z=7>*bCI(fBK(6>Sytx{af0WKkh8KtM2+?iqMaFk7|9@w(P$0bla9LX|=|b zscF^uhTElg*RHy-(0aB7_vG_6c7Kx} zf_L|x3YNvb!ms7 z+n&~WT~E%;Pj|CcP1Bg|=^#0sMN=g&C*_BG>p$y?u5({*zA_7b_3PVuzKMFXm)-Rj zT_fPR!uHQ=;Y|*4hCE^qBl~AXpNi3Ybo6v<|GIsb+5h}G_@AME`k##7KJuSGLxXww zkK2!JvmfcVe~}e;+Uy&2?|Inet$XDj2XUuAcjTS((QA9s-6fh&?xaILG z_;Kr<{rqcea^}gne)w}g*?Y0>KRsR9_jYH?ce12LD#-7-C;y!BM-0=^L&q=5BVWvb z!l~(+zfHAgT2;@g`@kZfoMf2nT z3>`LxtM<3&|B+0oiMei-QV@CjbS32lTt8i>{^RJQ`}{w8lVkZ#eam=qd5dSs>{^fRz?E?x zZ@gno_s`rW5yocGlM*v|%haf{{@?ERU)?j4(yGu7n0HJzBBOj(Yft#*HRcX2QxE^` zaY*p=40>y{Ph|TvtH6SdU&U{M@)6|H3wI=Tyky1cZ?is1{gKbDEs>}(EMAup{{5Hi z6Y6Wy&>DJHEXS#0AsQF%~wa`6zXJ-u`cWeLhf0y(BGuWH`XZQ{(v$ov+&v5?iesCt9 zS`+%)=T1d%_~xy(b;s2s*SGbTeO&qXj-a=kve9~@vtl1OW15z3-;=(+ zVtwe3)`#oaqhGNf{P-&1)vo5OsK~Z=l?q$4Lpt4WDWtd+m+zgPETD8C&vVhkEqvXa}LhzYE&OdFHu=UKcZ$9nv8&_Zw9NdlOYAXAT0>jRH?hifa4N4auKewKcG(}-4Ii$aYfo0) z^J`0#&UG=X)pyS3TOZ$Hyy*7Ntl8)8*ql@|sM$CtNb;*c=;m+u-5zK+dBlrUO#im% zYWj!pPQCig!iS``P1AYTyJfP?uNH%?e+T^PM*SIJxR| zJ?A@{+T|5hH!_tj{rYFI>mdTqi<=LEh^fo?ArXWsZN; zKRS=Ocj1+-*FNUnTltWAU1aT|TO2M?{j+%tZy$fE^h`5lS7(UbcWJ-a%}0KvvKLTAC^Pl1C%lb#Y{~7GX{xf|4Q{P*&d~Hqb^*@>qSM&PEelR*{w&B~vjBmfn zmVEzLGPylpFSYB=X=x?Jv`>#_R%;usUCaMi_wV;6Q+|=GIb4-#eZf%OIT_h=_ zq43#d+r_i_(r4Bm`tw?h@7?YK2fNPAR$;GuKOeBKDtnfbl2Cnh{*TG<>sR7ce@yH16`o1@wNTLh2nsr!=mSZ=PxI^%mx`v&*Y9 z7f0Q`8GS!nD%(@{xA^Bb@1Jbxekxdd(j~|;=c!#Nlg~T3%0)GfA6HkXAG>yI-RgAx zZuYM5!@E*`)voZ!^*Na%I#Ir`kbkbql6~GBHzxK^ky>rK%cRxw@_wCJ`_ro9{}{&~ z;@3X?@5uI>a~}T=d%UjrKZEVwO#e4GKQz3xzj5t9gU-B+{|udLkKge*88&d%>-YS?4A$%i3;LapnDzu4m3!(kb^`raesQy;9kgSlMRQesl6R&-#qN z#SWLgzFr%BD8M&&f$Os9yD>Lk1^Mb;efui+tQ( z?oAf+%O3t+Sx~%#q0;C#*T<)S7k%aUV{3c$d&_@@O{OYW7F~Yi8};bbu}cqZbTumG zRlDq%(%Z9Cd3)MX2p4Lx17xm+|%|sv^u5vx7pj=v|i6` zdPna(EmxAMeXv|1>($xi;_B)9c6Xh0J(zR%?WMne{ma|GmH+T$NR;bmYJ1%q_IMt{ ze};Mc8bU4WQ~xtG=POQlQIM~>+5Xmb`|`Xy_3OSo+QHBEC-&N(#QMVJ%iDByP1!DO zxm$F}(lqI$>fAe<6hBKi{@%GbB6gcbY4R3MHgCSF-{1Nt+7!%_@m0J0=KJ*PzkK&C zPG7P0%!E79x9|AyoY+vQArbBQsq(FY3Y&v-OY6JkdAoo9n$GDZv?q3cNwrD+=JFx3vf4c0xOtP*{XJ%jw{iFMF{*hf**SFj| z?!NU}-kzjibN|kYi?8kCwL5orVxpTiPtVhd6K6b_@b3QMhFxx@%B7N~^#(n2qNl{p zzJLD8w`~2JryuIgwVm?qmy4ixnzO_GThrMSd1Okb&!6e|Y)!@F2jYj;i@p9Hb#Cp_ zgXJsl8dNU-cX_V4fC&H8REC1p7s}Spf3rU`(^NJx?zrd99rFV&Zzy~Z2=k>IWn>Kh}^H2)i zyltX@mmv3Io1WVIV2PwvM)`3^b8 z57)bNA7)pW?lrm{)F)XM* z#B-gDnZl=&|E?`Q_1xk=L+oq6+23-0I9FY?J=UFfR`8Vk+w8{XAHLxlYm>v4)Jd9t zHV%2PY}ul9Qzvg*S?0en>hZVyAC6@l)7`cgILdz#+aV*jS8Y}QZ{0uQ6Jw*D8&>C@ z_;-EhKS5Ad+^g-@H9ynZ^vs*@pTC+d{XM(dzU+3icU8pc?r*_AG(}cDcHVwxx$-}? z@AiMH*6e?C`a|J6mAP9Ycs2>0xqs`s%_I){Pq*(}-KA}^v+TmX&{t=-TW(*vy}Im+ zcKu<~8oAHq{~3~QKaqLWJ)BhQK_uq^^wKV_c^@q~_B8xq@3%Tx2uFpDu?CGug zr|TDps)~k}`~6zGXD^TX`_ldU-#^}CVXA+by4t?$EVdxZ!eo&d(3O9#kMP<_ovsM zkpGtR!{$u?-`>1fiCy#GX7_QPtAD!vx7#%Tn};9#Su1STpXXEb^W=X9;p{$n#^Qgw zFS_yXSLFF|Kt7ludGiO*Q?h4{(9MwqPlbT9I7);-E!SHVMkH@)^&CtRGR4y52k8WelET+t91RF*B`pBX*}xp+!o@v)!}NoP2S!A47Rly zix%Co_>%Yj?Dtgv`(e{o*Op%2aa;f9`G3?@$c#m`wE7t(=S+!egDqc zCy+U->c;IC-!9%-XDE3!kKe)J$?175SGI(zYb=|5@#faMYv-R{IAx_%@wZ-e_1#(T z-%HBBdHSJ^+kA_!(%pm!iIabaJ)Y@6=RFiL3AGe|(M3+cLFib&%&u&)q+#N_q*G3U8Sox4%;2 z?<_msbqD|U=Qw#k=l?M~rr_WH*Q?zB&iTiGVu{(r9LL!CpTu^&Jpb41KSRd9lEBEd zW}=>6YbLLFckS%!e5c6&4C(t@rkiUQs!h$gyh6eJ+mgpkxBKPt*PDht{+9j2y6V$O zv;LfCu3Y~aKAP{4Zcyl(T+KCYX27B;Q!ZW2m>;sIvnz1F*48gSIrD#;)<|!4nwYWe z)SBHK|E}+_NiYBN?W?HrzjN{Ix0o3(`6{~4z3Ynthr^W5y+?OIn0 zx3XRPSDdg<>u(NQC1xvigHd5h{gHbQ{u~BPK)v^!{4M#1p@G9K-Q6r4;{VjeZE_XX z|JnAoG3cWItt-#=8}F~4U3@<5$ycH0-*SGKR(-a)WxIOI0qyy3s~aa6xO?txjp+YY z`NNraN2AH<(2m@nZmaDR96(NZwtA~-y8S)(Nmc94=kGYSBsBk#UVd!7(fL2d*AE%g zKM~KVf2VE#pFwt+{hRk6S|63>YTamK_4s#n{=)wZ+qUwG&b2C7vX*1%i#3TdrV^fB z2A*q!H(f~hJHw9a@jr)c_kURI_|Ne8s`kIL``MOkH9Ncb)(p;*@o%&H^Zu1pbLARs z51Lf&^LA>~&D0Ff8x2?xdd8&MVp1yO%qQA53cn&Lh z%<^4PqBx~D?s1d-+pVgihDA+Jn>StkbNgza=_<#fW4`)LPd%OGj@1(~oU;k&&UCI7t z_Xi&aw%+W9rS(q?3oranY@xu06B5Hc#EIcKhzUt>1dN|JCc-e^9Ca7*f03 z=|98#+x~wz<^P0M>Y6pm?ff>E`(J%7HfdQA#@F++F6@8)4SdHL=roL5Tl9ba1>cSK zpW$fu#ETAcJHM?(*Z0k_G(H)1G^lXlIw^syu(UrnoKn^|4yxfa~*rME`TX zw*Q0vUzY0&0@$b3plzP=kEV&koEP&6^78gh^_J=$fL>(yIn;B zm{U8`ocfHGuUPe;;pVpcpU?kikh6arwqgO-egxP3TkiHhplQYaA6HjtFs(;$FaDjK z`=7z)Kf}kz|4g&G7*-?kP3qHb*PpllQ}LgnH)>IX)_(>BUmjujptS4ChTHD>yHCgN ze0|bX7jlDbn0l97#@mP4)t>pwgm|}oFw)-7ZlQN-4_~G7^7W$I;{tyN{x}`@qw<6N z@i>)>PYkkdrFB;qW*U9%O3YO|F8^+u^oIr9R_CVMchnvJyZnLo@An(-x>x5uKbdmR z=jGf<`oG+Hj0$RB-F=)@)w<-{_Bfs^>vEU7bw#92I(f-OnP<(i$1DOK3fH?b?{j~# zzxn>re})6cKGX!=^nNsL@5Mb%{~0nSIj??udFd|incofX8=YpWIs7@vUGm`3+St&f zybAp`JDwN&1g`09wY7fuZ%MYmR4bv5j@j%(U&%8$~v$9l7uBM7?G2L|y_fGbc7HnQsF}s>of~_Po35{8_>uST5_jc3 znb*6M`yNTE^hXPxE|)#bK36X#`oeduH;jpf1s>}czF2gy_~7IR&w9mHXRcU&_->|N zZrI5r?$vHeY?uGp&N%+OST_#cb?fY|X4RZ{ zXe2Q`bLTCynM7H7h40doj-BGD zyrO6o!=1}YKuBppAF~jz9 zfz8j#SLH>1Y?}H9_VVVcweDekWSKVY5%=PHv;EKFbtbe$c-=MqIFpG}g=^IVM&+KG zv-1~fEIL^Hyu*$;Lu{I7ZvKb=3`fF_>Z@I0y<_$5^nSb4(3$%#=({ZO5G-H!cAZ;< z@L99#-|9bH?UAb8yZ&%()~$1=q_aylwMT?(n`T{hm+36W-kWxZFP{xN?l<*|uSkSm zCGYi?r~5y=TfZ>2!aR1@rLXy0x=*BEc{t(3=4nPNic2Cz-Y1B9a9IQ_V2qjbpMgE! z^u9>N>adUUZCiDNbd%q3X6I*o`<9fFrB&l58DNk)0S(Kew3~~%`-9jz@pa?(ak&tUYlKc z)Vz-w9NDB~K3y{8r<)_Q=oed1cG-KrA!+y2r?U@Ktv>NVdod{1@@gJqjDjx$s3jU~FQ=c*W;EFBD!5vXsZ}>JnxLv#cq%i~gd#=0(GEc(y zTy$-Ee%g-v?~E<|yg&4G*+V|WZGXgWRdOrkySUUOEs15hmpPKm*KE6&$gq9tmTi`5 zyViO?_%?gho=qno=$74k<^6K2aC!0?)wPGt8)|v)AtZa(+%5iX zRgv_xX5)w3QQNlYX1z^LU$b`a;=P-WO?!92y=vp;MTZ6WLbkuVJby~qv%McS-9NHX z>;4bpBl$YzMfZ-D^6gUG+2rYVlOVP4Hl~h$zL+NpZ|^r7wD&ENVp*Lz9w@jcg`e&=`Rotxk8cl>Agn2Ot za9w}k-}Y89BlzL7?J7(n+q*QsHflv3+U#z1lxgon0rzLl+&SG@^K}_nG(>kNK9G*p z`}geK<%3dJcgNg2zrvrTYKqgn$Y=ehr_SQvTA5<-WM%B`lQsFO{Tl*3(pxj?*-G^J zrkd6Bf1N+^ZIu3&S1Svjtlcs%bx+{W>A6pjO8+@6^pKI^?StTV|M*{Azo+h77tioI z{uXE5tW~SBPE0%W(d}a4x6<8$l{+N#H>;!w6=mO9v4CIb!|_AC>IZk7%9`cgP+Fb6 zA}HeRBwsJh1)C!yOjhS}-`!HcrjX*YdQS3E_a%SiFO=wVyIovUCmnZiX{Pw)S7u@J zv$m)7RVL4~lxQw0Z<=nbckEQd0S2>mw#T{{K5*2rR1|S@=Gs4udtG$wHIuu}uM^X% zLmRsfZTqzKVRfmS%8Jej%$K(6?30YnwDP}U@}Gh2^|X`wo~2cqYAiQCZ6z|vyWi;L zzDpkVpKjMXzA$`S62N}4{6E8{yVq7P_Rp-$T@mAN)|FNJ=b zxAQ-P;^zMhF?+oKGc3+u5$l%|8{WS7t@3`Oc=i2H%BA9)ZG_ikivQEx_91R#1n$?j4-6!Rn zPL_S{n5fh-Q?9Jwl(K__K|+XGk#XGY510S>uKUmMZ_m2=FI+!`AKCwD0WE`>RR7{) zOo;ZWWyw6sKT>AbzT3~}wr0uWXX~oI^*=h_9QF6=xtWPee{|IboqGM-FZ(~kTkfS- z#PYRI+{^bpG2`FMp6Z%IW^l$=;jHf@BU}FuJNDY%fi?Dg`G@RU2Z9x_Mf5s=gjb~rII`TPF;R) z`jhiDA3zfbt@ruf<^?-l+Ujd3)^$tL?b`GwO`_XdYCg^1Eu&cRdb_&HlaFHMa!l8^ z@(Q`^yZ+?j9(%jxx^tIKI+<+4SrK|ByHNC$h2WGKJjoiT+TT6hGwbs|oAvwo|4N$X zfbQb%_jUg8pW%Yqx?MZ03f6LE?l$?)p!&ouQ?%*yJiTd~!tUpm+b(*2WIp>8Ud=tm z_4&^tZoa&DJb#z6jM=1&8u$f6}@{eGIqvPm-9BCswVljy}JL&{AT^U(=mVlGyHme{?||YUrgaT z8$R}WI@${TXHY(NFzI>6N;e^|%bxqAzu2C;e$1YyM4usv74K@#+WvGbq)~|IZL_|J!2S`-kO!CheF1&#*iHKf^-P%B|7MB~34?f4}iH z`tMcG{|q{J!WZ4#Z+!Rh-=%xh)$VS{N-s0~-SVH|(JRaP%=;WpE6(#RW8N6}uW!!; zM*HLcF8q1@Oj~Y3t=Q(T_CLOF`OmQK(|?8+g5g@L)gyynR?9@XW+s0WS{fMXYQB8p zSFgG&^LQuSxT8}wv%}E$etO)3^xyxcY>(e6-kh_rD{}FbOP0kO{?7F)yuSGQiEs6v zboc+QvH#a)@}EJZ{)D{Guao~77D|1$TQBZD@7Jy8t5&W|N_1a5yMOMay?^~zzulj8 zp5^WxR~GM;dp2!wiQmjP$&cabs?&9s;{>Cd8QX(}+6AY65`Qn?Azy#v`ms2f{|pn` zef~47pYfmJi(;(Ke}+#6-v1fm3;#2`(2Yxz{2dxR`^~Y}*Us1P_-?GTZyMinU9($H z^W$DWsg17v?fxU$Z`-8m?pi;K#mAm~3V8gK_vT+-kNsQh+vbII=9U`0TR5fC@Drcm zpSnl;l0WYM)4i+z5Bq-xm$eyD{~1n%UH-S@=zoSy*92FpH~zYQYj<2!^q=!*wbG_8 z4m0qK++F&nmY4sn>ci+xF^jif&B}NVRB~7_tN(bltL};&-?XyUopJpOrJcX^fAahPXNbS{pW)=|{j97Vt&#q3j+(yu{Zrmk(p**i zn&zKd`=8b3%D=UI$eCUdrt5QJO-TRd^(+GXAAfE5^YXj>mgOyP%_}!#Wo`{U|4!h6 zfr^~`jq9hR>i@K@+yDCEe}*p>tNt@|{EGR{aDD6ce~qeDrW>ZbJU1t}?$h;m{{DL& z_NnFV(KK0eDgW86zx*qf{9XJQBy; z)Jb(W6`Qsx`(65_u>D)Ex9FE_y^`1+Ty5rW;Q6goGOqGq%{SYXkH1a+u-GHOapgOf zm2ys{EX;rEmV}>*6Z_9FDKYmyLtOGt&+z2`4E;arUvJs}ODa50o1e?>`qcfUTlNb1 z$M3n+r&S!9H+|Qoc)RD9Kh<5T6Y0A9Jj2vbNNczGggI=-?mzx6;`!V2N04}FMXAAw zHAgBr55(<1{x;qq?mxqcz19C2;;!BQ#TArQ|M6?-e}))6`^n)m+v;wb{W9D4TK3C) zyZ+trs@XMP?dH_qN=>BJzQxa8wV$u9}-hY;?dvzt*uSIL^ldZqsls(MU zdRbg=x-b4&t?z#ZUdIQQi@rTH)wks2*`FR~dHnmTiv5}LT>iy2oNP-j9g;Y)d% z;6JOMg7!~RcK>In-T%ev-lh5{N!Rwjp7WpKm&CfLi2+jik z=k{11y8rPjXt8oc{p4$6z5mwn|N2?~SC+S8hEC2$(^tW>!n!@9&&J;Q`ATBf)J)4Q z>uOo-GvBkDUNL)Us&AEhFY52y%Hwh6As6c|*@?`PcNVqyuNRo5TeS;w{icuZ@;%#ZwyU!=hfPHPgj3$?EHZL z42NAR5?Yk*2Oa*jf2UvN@$Y|EDgO5S;k&5Ik#FgyNHG;Vr%%Th{QmlRQT^&I{~5~v zGo)vo?EdFimHxLS5wz~JAVKT2s5#dxm0ywpN4!>bPr7%%Yv)&~e;4EnCw05znHA+I{eS| z#{Sp$?*C_S@cQ&|$E_J9cZ4qaW`F-~HZ`+0F#NZlZ)K$1^XlIo6~X?w+XY?)&a?O= z@>Nn+{?aPn-$6gL?qw=X$nsV)43ke|XDO)f+L!Zj`k$IQ!TJ{)W0r0H&!E`-{?~i; ze=XTBH+5ypd(T_ffBIKS(0a{)P`S5TzeoIf`=24JpT%9sHirV`?hac|C8$?aO(T}MK3DfO9#*6 zR9*RH{U1~JCi_?F{}~z%|7Yl0ZU5oYJwdZYPuy2ls-&0SynbEQJbi6VYOGQHiPiGI z_x}#LzP0h@j(bz4ZM*rWfA!f79}YacnqhW3GyD0AT95r(&Ntr^*6{T>W<2>@ z>D!+F4B|UhpZ)mzaOd1>w_4bvYvv!=C@@STK@(bgS{}~P^cFoRpS8SSpXL;~{2IeLI8QK~?+)QBI zw|J6Q|DEN5{~4Iq&wKKpAzuG)dHic%|F0MSGh7ev+8+p;rJY$~n^|dCsR;L4ZTv^wr*nGOTyuklnIzs_}aJ;$t zuO0sxUQhnd5cTpO$A5;C6(5%WzW>|v<=;5l-~Zg}xaETd1CM3qUcOpC^Q-;MulIk6 zewF_Xy;SKp>a{dgxuOTEkI$@~AB%i#%;Yz>-Xq-+^Y!JG0QMPwXQQa;-<*9Pakb3# z<$(@ziQi(Y|Fzb_FSt34d^=6td{>6b_M4^eQCy?9_dn7lIaReHjE8=N-4FAvySi?R z^lc5cx0g@r?$-L#F3G8FuFl+0b$wxo=J%t%7`yA%yREmxSU4QO81}{f$M^j2_y03E zn+B~#Kk&3p!)5y6U6VXbu6^9Yx7LI!@`7N~0X{#gk1@PcuDvWy|E4`VTiRo*g;;d2 z;+06>U6u?76k2lLh6VWFi}O5Y>(#~Z>B+~sP1mLi-^seRt@&J|daJBlwC?r7V+U4t zut-ZDKNmBHiO)E8drPTisjXF|-1D~=RnJU3Sl%jElknlb23O^Y6AZFD*LvTUFqH_? zZuu;j(wAW5CM#n#&sNg$k5(0f#-c8wu=eba_ghWg%l@eQeYom~+GEday3050?vN4d zib!#1_-2uOwq&Yv;7r|jx+;^)4l16zIQhF&gH7|p?{B`ZS?aBTZy{SKiYH*HkY zP3O12JvUW$V)I!?;qwKr-abG3lwZGqV}bG9ojKRHhw8sw?f)zPKSK@wKdpWw_Npg0 zcDn}syS4d0!?OPj-w%LJ&;EC7^OyPme#HON>Td1c5-R_8w>?;^K4{SY+uiwp&Hpp( zWB<<}>U!wtE3N)-PvaXd82VUgR(T{|q%Cr9{ZKe|x(eWZM30(Ba>Icc=en z@cqy5z7^z(9TEFo1ODBGIt*<3{M*;&f4Tqf?|QJ^$67_5e^;Fboos%e`#;0cp!)oq z^_S~^Kdk?;N>9v0W6ghtx3Bdl{|qfnr+;t$&v1V$#Az27ZQlP-VH0Q% z0z}h)22S4TOOOBk4Gy(lDQ)Y2Lel!){b2j#KTNn;6;qF-x&EONs%B9n&05;#|Ae#= znkU})h0uH+{fzk6=0~=d)hT>246lD^29h_2 z%GV#tP5sYclV|^aD#-UtQ$@@F31{p7d|ddSp#~He?tj#-#eY5qv7rIefcYQhMEqxf z*}!maJM=jDCEGr>PGKv$=vRIsCNX>&``(EE3@wk2|Er9tzpo8CCV|!J$N8B53=?kt zXIK}f`k&$A_N(7^d_3CupJ5u*@uBj6)TXIRwiVUijsK_BQk;}oo7BH)?^eHmbNM-b zWd3JRyI%jFAuinh7i&%DNBMtFX8#$!p0xjxvHr5-<7mElzQ^oA8YkA|FJJY*vB>@b z$lh>OExGg3JgM(b&Dvi5b;o1xsmEid_MeV#{?G6!5VXTB?*7%x>;D-(S={;0aQ(r* zr2T=%K}Wv)d$B%2b zy71HY$-}blGW>3c%)vs@a5B@RV zw)^dqx-6{S=T`wA_clm;(kuM;3Y2{2f0+I9=lO}n(8LgP>BwiV;BC)mwjTT&x7709 zpRU{a|J3UVl>i<9~+V+d!LC!e6-mQTtW?=l2~*ya)Bm zynOyI0(2G#I66e*@2TxxvhTHA>)w-xe=d3YvTnZeXQSgwxCAR;QD5$ea{P+ ze5J?ge?bN5YKcov5B?4M8@+$+uA2U~d7)>6OfA2a-~Pw>bNS->6JGqk9-IGb(w@KR zKf{DC{~5mi&HpoRf3W>StIG|?C&MBYbmqvtxyRo@)8p#(-1e*83Zp)&?A5(~=DSGk zQ@xY=J0t$=x%{8uiD~J7hIsz}3=5Bj|8c+c{NLWS`@e8%e>wBxy`M50_k3{q?D+Vs z1(Qwp-2LxB6)X z?%TWbr{BZb?|eD)iN*83 zgN#^Sf2e+W{dw;7;3D?X;eQWJ_uj6r?YotIbNA2f-*5XL__G^FcXIs|vyb1vPJ7h- zpW$eTxSaWh9T8U(ce190TZX>i^Zyv`>-FzaPiFMP*Y~66Mr7?f7*?Y zpsafT_uKyr3$J~f{GUNtZ0mo9nCJf)E*!0!segpmr2aJb{DUC(E&eC`*V_J--o}5A z{xf_GZFzcLUGL@kefOW})cUVmsU7uF?|12oy1F0H|LnHP|9Z{;sp`%o!zX~PWtRafVF-=DdB+qJMi{$hRHHysbp z`p=;ML;Y!s{U_rW`#txs&3#$_iDTn`hIK*z8D5;!{&M-B7~AamU&Z4gEsOT!w=$Xk z$~_0SOZMit{M)E;Ixph($G!gicnDV$>c5)yy!!r=bZ6c-lfw6| z{h=*CD-1_>@GmFGhs*vmfT9%C-mrg|kU9VJp(lR>>wm0zaP)!QmRZlX-mb2U-nn>j zjn%^1+2vnfCV$pH7XR_%57++;i;fn}I{&9lfB)+}^M47hzxwebvyc3z_odL5llOlH zZhoKt4D-w(o)3NZbgTX5mA|cv>*SyLXcPs8tlymXZ`*$cerRgM&C9BJlTH!AH%B)-!_E2&z7GVHviSS z`qwl5GhDc~==#1d@!tfy|1+eozr6lO#H{o`A9h1C!;=3Dt({B%Go0Uh|L=$We?luS zta;ekrgWe8%lp#YbM`BKUz~1sFaCr7$*Px=|G7r|XSn;H;nuSK0sKdFRsJ)a-g+L~ zLYiZzl$q!t5X=9A36$PmC4F}9>PdQ-ZSwu?C%bmTBlQ^pA-UNm@An;lx^(}a#wWk_ z|GT{I-s^X#e_VgD{EtqovE`>cSUd4QLu)tKLD#`X@2fX#zy8!Oo?m-^_1onSDohRH zE~=iry#0^x=Vkt1KmBL8Uixp#mH!N}@_$R~U&b$%|FE%S{m<{P%)a=anBLU?40hik zAs6~W$9>}S`jq<1=MVqm{CQ&jtH=Kt*7^U4q|8dt z8qhWW8FuUkT`#k7%f|lT z3Q9xo!6|$ve}b20Jy_&@IW$>#{%7FU_68-3`tOJ8KZY;_f)a{8OaxRH*B?54=|98s zUP$CHsEW-0u<6VGpARelGt{X6XAlKxFa~LWr3{8Zkf~rZ#OH%etv__;(*EbI_y2yF z|0k5e7PP~2kDbNFYcY{Wc@LddcUD>vXZhy%r8U%9sKZv?vGunfHUNQp+ZV?ywi2-kIVoW! zW6~*Eiv#vY46kp~V1CMf+wT73soNGmu%6$%PkqOwg;z>dE^hS>*|>d&N6ob}m~M?nnD}>P>u@-(h98bj8x0yFKO~zCP{IGlqjc7fd#5otH>r`_ZkhJkPbt z{EyN-_5TbDKxKbZXZ=sLxvj-dZ_ED(Q$A|X?|!gx`G1B(;eXe&e_Q{}uI~EZxf}mo z{k!^43)Zb$>TilWM9=vA zrWj?xKylu;R{uj5_O-^5uS+?U0CI@D!2kHbba!WeX#blXNy}w`1X7jF(A2(d}+nBucMwampuZLe=9Qx6_cnar@ z85gGg7S|9{4K`}}&tM_?wIAJu{XbkmThATes{dzT75dNc$ybbfzGkMqq)pN-yT#9b zy!Y6Dfo=KH+c7`Zd++EDv~n@7JXW6Fn3$jaN9+}2@0qy5ObZR|u*Z+@>=WOy=ePe6 zmaAVhYphqEUYspnCUE;dLuUNp!fmE4t5|kUP-d0$;8Zi$DqX+K{-MZ6{lDy6=Km4# z{?BmIWd4V)is}EFZ-xKkQvJ{HWR{M&o#mxJfh$gb6ub92@FU|DQ{SlXhs4u;_BLH? zpRS^QW}eEYW(}b{#w&l8_A5N*z51UaLtg6UAG_RW-;Z6d;rIJb1g>si7Pw*?=1 zpIFE*H8tY!0#DEO+=IpH_hJ&R-<1E*`C~ScjrI?PKbrq#-?AR+q6odp{V))~&b4J90~I%NqT=_39Ic(oEiiyCRkUp2=A1xGl(NciS_Td4><-`M2Elj`?`L z^-H(E?5nU}OV@Y&DVDu^r|_nCWrqD0XO-$Gp^n%n&3zTOpH6>rYpL1a&i@P?^E>yy zuw7dJU{U-(&&B@)^@aX3T&VW?&v4LX|DVaOP0vsK;ywxOdO|w<~onH41w<{p3{WQ;Q4f5h$Cv*3PnG+TlaL-fXY4y8Ja;uy$G0 zt=F?mudaQ6bIFWTX`cCU=dPVIy)9ESW#VG}HlCE8@N4!TIzch;3l;fR+Kx$`bTb=eN2#B)8?nbZ{x_@Cg=0E{b$(I_o)2dM*SHI z(stkc%|D!f^la~g{|r2T3^&FLw_dvKb&2W4w!qswcUAJ-lADwHax_Y@z_Z=#8|5)8q{3sGO z!T9C-a=qoIPY+JK(6sC^$FkUy@^hl)tL>xf4=F#2|HYpf|3hQ?e}+lf!tOup>leJ< zb#8O|>X)xqzUGyVyW^W&Y*;9vWE${2MM>}3AvF=P^ZI{u`!gAQ-`%a7Ve-hOD(7d`t( zSiH03t=j3RDT{a^V6p-#N6!SDTcePDbfy#o4@e?QIb;um25~MWaEhz zLi6P!{?z?gxZ+lJX5N|a+qQ|@88%B5?TD3oC2?#%vqJ2-i+i@~l|8)u#zOtM!n^x5 z!5>Y(ZMr4iYNvGakuO(mu=7@b@q^jPCKFGV>YloLYq2}W_OggaZ4Nb?0!}gKv2gvG z{ZEizs2-72-Tx^5Lvd9%>-mW_%8%EJ|ByaD&uoK`r{0v(+L)I?KMQSU{o5$J#BbYn z76pNKp?ci{!OF@#D+2o6J%8N($n~G$;H|A!xA%Ujb$YtSR{|qV@6Lx+M@6r3? z&faD7SN~}E%F_HbFXOmli>k9D^&(a5GxkmlKPk|bsJko0z2VwjJ`NL;^`tsIs&tzs1Ys{Tju9?08}#<6m#teUd;I3D%CK`G z)81a4l%_iMVMN!ar#<=eUO#Gm-WQd5>3`d3b7tS;E8DbR^#5?)15TUzpxj&k$yfc4BIv}O1>dHF^0)m@wQkn)onJq$ zshIyDIVS#rOtoZqeh%|JK;) zytoz>-?M*9$w?buwr;(-CD%Bg-?mAtdD3Pvt4OkI*Bwda-tFsdoZ8g<wjp1f+v&b$MUEj zJ0JOr{NO)wD{kSsYi$z65J^Wl-MTRK^qId7&-WNLHkGf19g$(&!R^`>mw!FT@| zKJ@(P|JOV#{vTJ$e}*Ts{C~LBxPuOw1cjGT{U=|Bn$`tbyVT~*PyLZ@BH+I2hu>|F zD^D)}I=bd=%wg6o+b#YxOkWbIw`6ktd$VQn3szU%?|3r(n4Q!Gy*+*=H^0=&MP+V_ zV~bD-6r0NzcI!dSygFGY`x!CEKfmFU;QDp?pJ2b>e})Ui-v1d6hRpvn+4DaG$MKH; z3@SR~0{$s>Qhx$}EdMC6cTSwv#WjCCS7dy@?7diJb*Wu@RYRqx3e!Qm zb7IaId%x_p@k}m$#J=^SiS}uYTmE+U9j`2z&vWvb zN^hCeov8_Bw)KP_ASae4g4520(X zxBpW<`e=9e;$^)_zg}IscaNXH^31qD zTWQ|;T;ez_Bq1N0fAijtp%hx#bZb)BBce zx)fUzu$DzvboIXJY)f?n?<%?oGxIR1@U*0y;btpZQhB{`{U7Ct5+XsoS8hYI(4q$NhXWK#%4}`tN$}_Y=v#IX$$-QEGYwj7({rmCqysdLyxpD5a6!FDIE`LiE-6^Se+SVTZ zsD7+$9@ocZJ0I?sIQfXz{P4G5rt9Vlv|W6ACR`wD_Ux0XU3X{hSvtwn?LbIQ?uo}- zwucYz)BPy6_rv<2kMnPt?65nYU0D20f1}6kKTGtt*UvGWlWbk_eZA6_zpM7RmTa8= zX0C4e&%H}5Zp_}l|Nhc^3D1L9wsjYW`uXuKic)I&llM`!zjdkjK8uaJw#|<3_TT!E zZ*pm-jn}e^&n9V>a;{a_93UyqH{nFly)g5+r-Kje7q6*&T+bhARB_CkGk1%=NW6D> za(1S1?CHc6OZP6`Y$IKel(X%`WB%O=Y7Vk$?Z5phx}&_m{X2W<;cp#J^*a-{=LOyP z{a98z?)u~B9qSf;wmZ3Wy`RlY{*bW$4D;r{t($tjYmPy{x2^YeO0L&_yM5*H%DA~& z^Umd+JNakUF7bnP*4I_8Jl*_Jz4yu1ExSLwYhJf*p^n&uztf(*dX@2Hd%t1sr=^C+ zrP}W++$;Fqq1?Xf$J@18@*+83R@ivtm7H~sy85Mhuh^TL$5rSHJ(S6*zy!-pK zkBusq`3>w`94t7~1OBf2yc$<}GglRqPrg~F`u@i4e-2gK z|2ErL|7Td{ZS{Ohe8+Svy{R8w zD>+Yh?&x!^?p^g=P0^C`*<0yT+H!W^p8Y;|wZvE3+5-b*gRNGCSzX72TK>eUVPMi?IZ2YiK}xvYF8eco1v6{x3zaCm$?SljS~z~$LD{W z{ZYi|)cl*fk}C^TZr)V4o^J8`>qP#yrH?chMA~mGIXH>UzVhzIzpp}8o?5-twElg#@fxP0kdW#=hZZZ63; z`docl>C9`HRTq3eH5AAD)F@v!wHN-Q@gZ*G+G9!AN_oXjz5B}b_O{IgWe>yK&(ynq zeh>e<_^ZEo=B|0YD_=yV9-D9Q+xCd+vOpvAdttX8yj5;8ZsKHDm>5;If2(>&>P`V( zb#Dg=1;vyv!I$gzzYNyc;d^=7%lJ?K84TunIG!}$w*H;}SGzfj_J!JSJ>KEE_00Vn zyVBpqsM%MZxA^zfJMnkKkG3C2|1${H8rmE$`Oh$|T{^g|?)yVur=Z(=_m%D3a(#2V z&6ck4`u3mm0|VoYUe4cY-chR2{huKr_$5E34v5f%kv?6!!d`b?=bWsc+loecN~Z@|N(E+x|0b)9*;% z(Wd`K*RbYz$hnt%MumT0Z5Dr9_((Eu?ZSUYFSE}%wR^L=HD|V zkEAWR{xhUy_W$eEJyv$Y?s2kw{rX=_fT|d{_XTBHMd-sg|&8mb? z-o3X|o@Xok`_J&ZeyOwhTkl5_dHRd~ox03_GtYHjq59e16ZPu8%NF(@zqM;c&@`o2 zvv!Bo*LBxBp4w|w9P;FA)_(@=xc>}=U#^ON)&-sI#XbAy+x?ri9Q|}cdZXgG6wS-! zkHb{|E~{}An=bqOTiT5BPQJ673bz(tudh0~Y?a&OA4@kbc`lV(Z2LBUZvPUE_4Di> zF0OI>7VDS)=CAQzt|!rNZ>pc4_n)D@f4S+*%-_KkP47e}|5o>jJ+Z#WqiTx$z5fjP zUl!dL{~qmsReS$|8CP3wd%ju!>2mPPRoc&re+T|({+;&d@NaG3zvuol+)MH+TWs!o z|IOo%%U<5!s^4MCadH2qk{jO@bCR4dH2!^GrTkvy+S!;_CCe5|nlAQS@3wB{m%Vm& z%U{$_tvO_$HD5qb@ZX8cb!9W^Py3ni{AZ}%n|Z75(yxbC>rSdoUA`elq2x20OF;Nk23ae{BrSWCtrQ#xvT2G?|zK1*m3sq^dqz4%{RO>4S#Um`_JzA z0fF&Wx9cv+DeTzn-T&sTX}SBe+?$H;n&s>N?!Pp%@V8e*6Xyc|H+N0<+)V1XPI+8s z`TOtYfVr=0V{2;nKX_AcTGq`dpy>L={|pnq-dh(|;!~TRv^WwnD{o zU(bKv9h7x{t9^&bUQ@mQ44X=BmV2z+{JnWa@|<_y{${U}$lA1OrEB&1Wpjh-C$8CO zvaou0zwP;j`rqPa^G`{ZIMO@!VgL7b-~DTy{xfVf+c({O z!+(a9m&@NAJ@)=y&F$Fm-*;-?{1aV#;ClDw{zv=ItTJ7BeYKXz-6v~HUvFD}*(Chj z#JWpSf2Mzqu1|Zp_;e)S+fBurZMT0v>iC%dR(|}4d5wiD|1+e%+#cuuXVa0+`S)i0 zdS^HDWyFQUSC52;XlkGR^7Zn%cGH+zF5!Tc+Gdw-zWZi3G0W?xyVu_hU;p|$H3$AX zbUFF6H@|jGyM#*V`Rz;dkN);~{pqmC{F}2fJ}ZYGQ+cc)Q~SOuqRZ4^s;pJlrEuT; z&6{t$nHu3VecR;k`q@8keY5Ldw692h%esBe@^8+XH_qP;TCo+Z|5kea)=j>D=Pvtm z?qu(#GOqgl@8fEHT(#n*H=dTM%u8 zHp<`pmGQ9u&g4y#L4{mLvc{E3Evc(NrENNLscU!r%rCN{pVuw^?fLpw%_I9acg=jx zK26VQ-ue9fHU2-Q&-VXkaIiK1_iOT>?CWhpTU{%Te==We8YlDNs(eUbyy=a)D^g!y zIRBlxTD*iL(;zshyOE}KlsmZ z;+Im_hyM%=H3irAlzwD){c!wd$!q<$suTBJed?(`eWUEYXCLRhIUaaz@ArQKzSaMl zZU{V6pzsz#3Hl$yNSC zK>I*4SfzK^OT@IM2G*luKl z8Pc~$d0qU^(83)H8sVAypP^}`{ZFO-9}ZjZ|KiWQ|3eGZH`QSB=dM3db^RYJXm0Nd z*JB&ohqBS$*L2^lE6vtkAzb!i(dBz;IV+iY(h@8czSv*eZN>2Ee9!$)b~S?k88#kz z{GY)`YHyWk<@sazKjrN>{xjSFE#6qh8@E@gfouE1{|v`>*YEq!@Jn*l)O_dvh=bT2 z#4{FkG32a&x-fUmk2j0e7W^*%YkmGdLrrLtgUm;t>>2GJyE^?0f6VkUnsy{U??1!x z%Kr@SyX@}%XYl*{pW(i!`mKfk8Rj+hZ*~69V9V;iwf(c`N&BbDFk=G0z5i7{|KE?5 zb(i`7d=aT#rvJXnc8&cEOZ6MwZ|{G3Z~ynB`|bTN@74c)wB&b){pSlof0xPse4+7e z!@qW7&dxu^lZ$|Y^} z*5&V>^vmCmNm~j%==@{KG_891_JH&CC zq0B|JLZ(3b;;R#QPhZD#I=dytF1i&p?*ADkRX*zf%^F+(V%pmMZ)UIm`Sd?SKmW&- zsY-K2i+`)V{#RN5`0$^MeS${(O=zoT4zA<&9Ez0TcPg%__1N{oi8scAMN-UIA!Hl z(ekA1?Di>P-<|i#)t_8l(f{w@toDB$SwE#c|1*T=e%%-k^jZa<6)5%xtx_-XnU3teU-h8{qT^Y~eOME+Aa=#HEC&Gm+{ z@?G|i|1hjirNi`cbZ{9m^mfe|Gr&cijQKI2wd&VPpU@m@WDK8l{~454 z9|~G&HtqEn?=LpG{~4Z0g0?!O{%5$b>RRKq>;HD2{?D+;wQ6zQ$@!vZd*9T5==$@? zOVa&Y?{a1Puh*7cs!!$bn$~vz+=p#G-HilRaPkyZSHe&KJYF zZ(;#a+qT8)f3mrLSpMUi`|r;FXINnB`e^@8hJ*jU?*G@7_4O|Ix9}f}Q<-({iQn)# zW%zetR@F-8CHuaL?f79SiJr)!zC%FAlm)%o|* zr~P|x-0S&LF8lt{wR_LcZ+oc^C7cJhA)C(-4Z{}~ix&Hw(`|1IjvZ~vS8kM5XH zyJX(d;W9O{|6A{J2EqA1bHmr}Pubt~az&)aJ&{8$AE(>j5}T7O2|Cr~>f3;o5)VbB zuf6x5+Q0g2_NCQPOBSkJ_D+9R@K5*m`+ra7|FT{A>-wJ>`QL1J{xiHd^_5@n-|6|n z@4aW(81L75{LlWadt^7?re89bzxv;-f0TRV=;9CO9)6R2_@ANwci`vEPu+jrd(>6j zwVdVNO8W~xWLzyzO;wq;%I{yaob&4X6YS3a8P*;C&*17FZB_oKam{~*xBZ_d?Y{k= z;b!uqnqPiXnJVH-Hh4(6{;pkr%4UoF=j&H}GjIMovtMYzrMC;~c?Fd!s~h%hd(8Uz zKZ9=lEzw6;eUEwS{b!ijXEx_~Q;)LPedDs%>Dk7|7yf6Mux;tTFZ+KPNv*8@&mjNn z{oj`3|0b$d{i)yd{K%Ip`pfJjtW`3(0tz+IZK=S6VfFuS^Q>y6@X{pJ7wI*2(`2r{7ze?YTN_MMQ8FSs`@~S5KT&z!x?+TmfHt9!vqTr_aKW0SD^VWQ;zj4*VB?|NNKil;!_1G11 zscMDlXYbHgm0TrT|1;Em{?FjB_I3VG$w&Vg)@`hR;U9bdkJ0?2CIaz};`ZlmGbMK6Ab>?vB>M_bK}x-dV%`XX|&-$5$s+tn|$-UAc_?&W|mB zZ^hf)+vhI3>_5W^G5KHL?0<=biY@%lAYW$x^=tND?W=3{r`dOg{mLoyd^q_TN15Qa z*Lgd8K&9a~|EGcX>dwRqtL@m5`r+hlnQ!lozrA*c_1NQo+qV`6Up6a^Kb-q3_kQw| z?8H~61tm3hXI`AAy5K(pXsOYdM#OhTI}z$Z!a!wwmE)a{wMw`cH90l{9>8vmH(47yZ$}svV%!`yARFZ zbo{8S-lcce^SNi-3g)}`?RW6pRS&*QzZ05SpK9NgU%m2#jrJ$MOA_b5wJtw9GqnBB z)-P%q<2{xeMY`~3HG`QNrdpn)$Xv4vmc ze+6aVQ2aanpPKvXJLhVQJLaZw9r|^B?(L2L8FXup2LG7)TkntG^*yTP$;l6!ZLiF@ zH9v1#%)dX|gEi#KXMOrqHDT#2x3!+CBG=Zt?k_%g*5K-@q>zPT*Ntx4JwN+T^gqKy zspEft+J8}B_v`vUo80=>E%m<`*3CZrpJC(kqp$V3q9V^e6q>GaF4BI>nsY{H+?8_5 zMXyKyI~^~4>DpcCLqAeFMf$hj`ry=1|8#Bo+^elQCC`0Vs@+>1y7SA94zI|hWz+Yb ztuEcVwz_Iwg5Bf4_A6Ew{%1I0CjaZL`oE^BFXKO)zAt=e!+(aBgirR*|J}a+Z`YNN z|IWk<9qlQc&olRtdlK*0^-kjQpRV7yek3{bT<)i%`zL;$kz<>2)%WP%GoMz?JN-8i zd|%*~=H3wf zM~5!1mik!K?e%)Gt=E-RR=FDccYM+p{LgUwQ~mYlUri5&Uy{21X8%XCch76$Be&mgz{O2O5Y$MimVKdZKkD*5orIIO&S zj#@KlJK^X2zjE1O@w;>XGvwBPn_DF}8C3Xfi)h&=etX-EWA&Hc`k#F^?f9?zUKi?b z);~%Vczl@s>1X4AbF=TK**{&saXq`-m5+P@S0jGzspZVg^ZyKy=6_54pJZqMllcDV<*qTvy$pl_4Vo9R8@GUK z<@ck${<(IK823KDwRq(n%_)pe=S%K?QUh8DncRB(-we~LFIk`GV|K9_L{U1>p;&G* zJNzf*YM#k_=J=Dj%Ff2D4lfEE^cFn$%ej8q%x)o1m3d39U;57=TmQsm_kV^e`@a%f zKfBh69}iunQ=%-sbo%rE3=Ds(n_nJ^u=Mhd-hYx+^wuBw&-D+@S`HNIPc)zAXOXY- z;K>KewXb*Yp1J5k;&uJp^;;jVIdoAX&Qez4vEoc4gJ=nsXZxZ#8|JAst$DSz?E186 zckh?iiuPwq_DAHnZwz_AsQUHVU(u<#vFCLb?d?7;f6H@m#VX&r3O!w^_opshl$3om z!NY@Lo!3Lg6G{AAOrAAhkv{NmD`m-fl8DYN<` zrFS=|#ONG{(2BqYgSB7R%$A(@{E*ouZ;g4CHhya^J%0bpz`M>iTYZ|R#roXU;h$bE z`~LdI#k9_4f2VcD?A~u%w%xCHUaf1XoS}4`Qda!efAwFqUhUI)wW}%q)wKH`COVqV zpRuas2z%9fc6QA^j`R1{xi-&g%{;aEyX@ZbcUQN)EMEP3<*A-z{~uky_nN(#`~7uF zWp24VdujDt>*HbqN43`ozxPaLf10>q;>52^JE~so6R$tJ^vYg>8>iux*C@3 zs9C%8$Ci?3TUPGj%|8BS|A}8G{X(RoJpMiUtC{_)<6kw_F8A51>v-2}+q7!6=!6e9 zkKf7nx`ebGa|M&+o{(*C!~!#=1?N}`)PK>+z23d}@|(0hJFBg$Es~8r3$>^A3h+$R ziDqCtSF8QB{qv*~zp_2O5aVhioc_S{O|me%Y4{a`?_4e$Baafd(p zKU`FC|H1K&J={xdteoOAg)*nrYko?Ui@R}U#q-k-G?{-CX)rqL%l>w8q?s^&6aS+k zTz{U!UJbN!3$%?b|K~&YC-?t^*35kLz9~-g>Oa$0yWV`OI=1jzZ@&NDOIy38*Xl`_ z{I^KNQP(s&|HrhGy2od7`^W{#6mCdU)jiW>*R>>o{Zdg) z*k$Ewd0RHTwp*BOx<_2>+qcc95;G4P=4)|;DthWv1b**0vG)BBk7aO2h9940e_*~~ zrkvj8yu}}C0(~z3NP71;yv^HjLzL^&yDrbq2pMZ~-O&tVNSI_8A+`MgztsI2+gxe? z57n7<>fMK}rp;ad`gDEH@}uTOhXiG;PVYVKJ#+gVjx+V8RkIj=xBnB;-2eO@I0Zc0 z^1e4pZ0*X38-{mW}j|L)zTW1RS8)9jPGV!p6hGRAqFvy16^XeU==2^tQ4l;5j* z=}UF3^r7kBtP;1holz;+dr5lfj(SZ$XHl~Q8=1?*CvSOt@{jay3%7e}7jsPSxBsg8 zk?(iCCbY6(^3uh(VwXO>bvjjj(mL-(&WLX(8Wy(jFswMj{JQ^#$MgRT^DE$qq_6(x zcStnpU8qlcFMg-O`AB}{vg4_bMLxvMU7S(1^z4-&-)x)u+{njW2{DJFG?RKxuq(FT zyXESe`62kouK3okw`;Wz&T9Xbx3G3uP z-(9qW{bu}!iD&;a*nI$}lCbbLJC&P1P9Odzb+7M2TxR)2&b7}!J@tK2CA-F%{|MJl z@q_nSZTuJCe(8K9j&b^tIQ3Z|d@wX6IGkUY+`M?#iaJO!rx_dTZ}G3-7wS)Uz)3<@ZL0 z-}V25JYlhP=)?1)wf09h-BX)VcV*kUizl_CV%!B&r$t1l^f8@mZ*nLTJ`jBI%66TL z_g>uN{%EpK`opG|yADkH_vpM|T9fI%U3zaei@9sDoO7P^#Qgl{h9d3fWj~6(#~EDz zW4|DJdH0uo-rE&%=OZpndSp=QvR$fuZr;-yZYgIr_*^`3z?`vz{{;KB@ArjrzPuO6 ztWrIESZ?LY<2KtSR2I!QOuYF__P|?-&%!Gu&r2R$oBzWz9UhIVAFtTQm39Ba#Ehu6 z)gR9PPCxh|E9&Uk?Jaf}lhwcZ?)eb8#Bj!v84kCXwmpj1m7Qd7lJL~EH1oss82=W* zOrs2?dfCh<->FaAHeDCkKeI5q?0eX(@;T?92})}%EMw$4<&iv%f%)Zph?Uj#pc2jg zq3Zm9wQv73fXbTHkG$n?K7JJQZPwkT2ck=NZrJ%z#z@wI`S1REp2E0j?S;Bhi`L!{ zV(kC&X5~*0>FxKKw(<*S>*(6aAAji2e126%zIm(qv@`CC+owHU_imm>YK7CDB1g%9 zqe}#3PhVD$4|!29wC0b;tw}G}=NH||eS9O9UoZ62bBCpqpDA1L?CI&Ny0-q0%4N_F z9kAnmvitque{9cR@%qTDd9$;nUhZnWw>*8(?Vl5t)X9F%O^(j>x+~i7WUR?(jQxVZqM~y8}<2w;E6(kg4x~a%sK^(mKHh8TOK?A6RP~r@ciC= za3PrdvHU>VN7mD~k3X!|_#*mIZX%17kCMdO>26zi-p**`-g36EbjHVNVGQfD4qV#D zT+!_P7RPO??ccagFVALY{OMk|$ZaciD&8=fru%=q|83!Jf#`5z zj_(HNqS>*{`g3>iX)wQf|HBhh^gt5ObDPwU^SwXFlr0apPTf=1ck9}zW6!=lN?H|p z>M%nkN2p{PcZ1<1Bkd>iCI3i&ytj7ME&tpHE?f6retO})@7_)Ow(ic($o+Tj^5VF= zXTMEN)wW`7GZErlI%lT5+|KnsibF2eDZH3xc)^CZ(8j9rn$CKY-I{A2?%18ZZR>=1 z#mZ^M)3zME_gYOwWxa%gT=ygY-hYC265D16U&%?mXnNPqyCbzTtz^emt*&!YMjkfP3aqj}1or`DYCyLM#dPCg6MmBk;|ojdP+(NywTo3w9QhVGllx662SCi5IwTK7rF zeh-`VqkXwMf7CwCm)hHP??;qBzoU4&!iTwUitgR^{MM1abG!MaBgqr@muzr&$emks z>F?2lt9DI4__oZyW7_mhKQywUmc*I)X8br28?9%uXYREB3?@y*Pc=h6mF4_2Z;I|> z`1SgqaQ6G3PeDf_Z2!+78uEUhitlpYM4k&x-#7oAC$DuaQpA4Rr<$il%CQ@_pZO8< zHs)O_`{Vq!{hQskE7s~C-Bxv`Ts+G5+uU`p*Ox9+k(#opVYNwpdP}UIcUoxn^stV` zrX#a1*>l*b{#fpP(e!VcM#+2mJFoTf^%m^xo$R-*-Svvj+NVL%&%$=@Fm8?6^mok@ z-n9+<8~!sqG~4%|VP4pOhVQ5AKZevFDy;p_V6*l=!~1EVz2uMn`af8DztiK5wZ$Z;jyfU0?8wJk_V;{*c7OBQbvlc8M6^vWPq#U-+Wh9l zdEPT_Crn9A((!wCl%Y0o^&f9L`AhkwwqD-ax^}0;ckbLdUt1-S5|ZyGL34=GNP?Ws>W+mT#JJ;=A;%`zLpHPRk5A`E-{MsTGeRc_U zDV~zE?!hSy<~Q$u`04L|-n$)KR_XsyH~-J@`PA)yKS3>W{rF}%v0{9D;a*8dFcYu42GK8$#$ zwBf>@z|7R_TGJ(~^G(k!nsh^JP43f`0!$(cxaIEf&f#MCo&Qg`8WXftS|(v0iYrI#u|#`K{6j zy=9r}RQMJ6brKg%U78fRXXBRL4BEZ+oHp5oAG7_g|7Un8AXRhm^{g%1fAubo)F~G; zj*fF@Fy3;u)GW$7P2-^;$D!jbz6<0-f0#eMH}@kGXW9k5x9cBwX=l#)J1gVE2|r6$=-yyKp|dHaN4 zRBdmOK6d?=&f3x*=vo`Q= zb!I8&^Ifw@x9{BQPW}^TXWZ7}OqKRzFq$0i_G8C9g`HOyTz;}`$8@hfX5!sXPV5VL zWHLEjCGV$d&y$pz8|NHFKgrZH*Oc;3Q4YM!wl!O<|KSh)l7rRK+&|WC<#o#Z^5K#J z|I@>TdB$otp4{HDjXU}BKCU19;UBiQ{ZqJo>&5B^YFEGR?!GH`&0pVNf7h${lyL941Hf68pab!KWsgq}&PVx-P-X+QkH5+%! z$WL{9c0gg?)|$$?1A?L}*(W${xBv0AX6A$aZT}R1n6CNbaP?1U_0nuR;mL=^%H9{I zFS9XzRj#S~`SgVA!Ny)^3%pb7tFChhXS(sena}Y>*ZT@bfV&wYmRMFaOW*`4}X<@4ThKyy!ne%jbaq4D-zH zL(AIg74_$(^M60x|6|p&6$|(#{}aCJ{I4Rq{=P2gfSUgdobML?XRy)T4=IahFKV#z z|KYzLwBr_%r{q7(&yt5*u}cGVyj07t{|x^sZX?Y22QnkK9%9B!+YH+b=B57`S{qmV zXE-k%54PO?VM3Pu=VRBwS##$s4d!M48Cw1`1cKbU|NG(kk0JMeL`e1j`MC8z!#-)y zAq9C|3@_oje&qkt>PJ(M*2VC0Kg?L%3KAjqf)p&8e%$=KV(7wc(h(Miwg;a0r}gaR z$FKIze^!9q+YWW_!~H*2**^s7J9PaYNZ-!3*Paft?tgT2-TzfY{b#t(3+eqIxw8Lx z>-xVR?*9ob771YY2klZ1`p+;=4C15uLx->If8GmIaEjqo{llbJ@t>il_Wy|3HT};A zxQC;<&=gGmC#JXfKZD&juyLWMFM{2Wi{ysD{|wyXU>oOy!Z7r-sVYNh{D+OD`ad6L z{%5E$|IZ)_+72y`tR~Pw*8h)At@po*sCuwZK&HqCgWL@@C0K)b)qjT8{*|EKhW~F+ zc*OsRl$!tN!xmWV1a&dI+W#>5RsH8fm%(Aw4_5H|7R1PKZC8*(Hkl$h#Ql%XTz|Nw z;o82o8O&?`Gqg@dvc~^MWYqjWAK}*cbuqm9&+src3vSQ!e_~;1kzCi^rm=;=f&c2tcZdiVOY?#Ql`Hjk~5y+=-*y5VU)jc>kLNYBJ`6&9AOxBX`j+-hB&5t_86 zdWYgCrTrDgg;L3kp`ZI06=YKQzOMeyz?B^L?^Rt%{h=Ax_wSlNx8y%VVj)+}opt|K z@BPmZ%jZ7xiEq@ad;MGYtL&|sGiBG)Be!I%f_0MaY0fG*AbYgyt-_qg63kn@Gap&6 zw$+lg6*J}uU-Z*k^Ii&bX@M_qzF?q1AAGeW|^bJv0Qj&2yiB*2`#lUGBH*=dZ_pH|)!t zTt@+aE5voZPf-^~3T%{k!~spa0LW=+>713{Z``<=ggi=9}c^*}DCC`iT8> z=nGB>^-pKRLJ$3%%ag0MHSx5>NwatJI+tI(Q`s(%`n6!I=Hy!UospGv;r|^gU#J$7|2(Z4>Bth-K^*r`|n?^N7#r%RmT2%AXSx6cjYen&rl6E>p7CUqR~`kFRI5> z6!l_0+^9_ZhsfOQm5UlI^M8cxQvbd+{*P>2{l{BWR8S0ZJgOBk_mHhXQ7}t>-Rl1g z&9l${vv&WZ?aO~1nBXL!7&{?L{Se@p%UfGvELv4G#^fgVb|HA@g#Madx<^Op;@3Ma*3R>_Mwf@Sc ziD&B%?Ya1$VgIW72ai7gXXv~5pMmSM`@erH|1&gqpZ{kat1Hr09FJnm7Bnr_y- za{qn%6Cqm|&w@!<9uobNWh-+o_E?Xn4Pama>^u~w2t;`65Y&z;j$^v2)u zvc>u>uOHUmHvi{!_xRtY-2V(;vco3c+^>ICZe29Tm350%OPBp;P`+L#r}FFln~$r0 z%=#U8RsFichTZe5*T2e4{Mcgcg3^z6Dr`G1Cs*>T6X=BP&nP6>|s!d1|JeU8-S zSS#6Uy&s*AUayKhbLq$QkV+x-)cma$2cmeRrJkR)NYm^{e7Vn)WYsif3H- z^=op(Kbijw6I7r7Eu8$HA#Kfifs$?i84B&cgp{6oET60y9hd%d?o3~;;^pcZ%U9ox z-}xeZzOYTSpzl7Ft86|u|1)^iKU9}}ReiqWpH^4W>PNN#{9CrvfBg2Jp**Kuwl<_L z^kkN7@~=Df#$l!WKYrMM{c-;n%ht0J%cHbzMgDU?<>x<7>-Ww&t`~3AKK^IuoOa@b zjq@t@Pp8x07M0$X{Q5Qd=QY2DAI*<$sHl7N)W&mV=R{B5Z_6J)JMQynbxi#U%l)q> z{%5$LxJEbppKJ8~@BIH-qY{#qYZPnW^Zx99%2_b-a_-stGj;D>+uIv!{^)qmI+Ke! z^VF`~^1ayecWHaf^Sz$y|Ac<*owisc;K%G>kDDqgwM+Yzmj}nRP5jTGQ2L*tPXBdK z#p-{~0`=D|Gmf}vo0VMpUV3@i-TrXB)s|PArp&&-J|#cyPwdAvR%Y|9Di#L|ojNDG z@4}sj8?^o8yX^FHxuTmN#rmAtEB)60m37*l-+})bCRTg>XQ=c4%W>`O{y*Kq|K!hv z@AT?5KV7kQx^vc6by05fKVkb~u5Dl1*X;Xa?PKX~3psz`temsYQ~oY(D_iGYwYwe^ zBzNv9RZI@%mdsuJ*8i9!!~1_1|GaL$_@Cj4eC~gSeEDCjnOXBc{#@}l{y&3L-#&MN zJH@8HHC8V#yWjXFxnxyn*v%z-b+%aLnm@|#&eO?TGWp1?*;Yoa_F08J$J(!7nRW2m zK7)!|mjmZ%>}k@CI{wkwfMcT@WSs|pU-OlE$#nXz5d^n>y8iZ|74B+&#<@f z&!l--Q#9kGtV=F$jC>TbF!XZRmZ@u(Zn2X85oxFWo9X0#22Qc%dsfT*UHO~+`!|t_ z_DB9*Z+zytcWgB|lGy$$cwT+!tNL5{=XUP@*{c3~?Vlx!kJeYk|9WEotv9oyCt0dy z)%EUr)9l*gjg$OB3;j%GuU`8#*LV4mc#+eKC2t?GnmPT=@9(uM|9zkK+_u(XvGMEMJ5>WEj^-11qd7+x{?2Pr|9a~M4KKJ}xd7fKB`~JpN zV)Y_>;&m7Au9zIGqqaSu?$TXp>)qei?5+Q2Bm8eyZ2gOckzw*5AM5|EiK;h_4VhG; zaPRH@v+|x(*Jn=sd_J;RwS4coEjzEde{?^3_TlD|8qbw8|7^|u&ybn7FC%W|ac7?^ zb($6Xc*9Tn3uR@?Pq)tf?JXDjgn9p>{!=?w*gx^J|9Z0iOVO38{eRm2Gra9R|F2y) zUH-w!(*3WhZb&$lu3G78GH24|lxf{hs<>*LKRV96@}oWYz$8`XxBeFvsPA97F75cE z`#sOLht&&brS|Tu`MYr6w`Y=nS-s`|)SUd!5DPjdTx`c(^MAI%|Mv3#YrFdH(sH)t z6>D`bpZMu+S(|$7S7e=L{Cis`|BiVYV&O*o099aq<>}`&m3h(%GV>NM&j~Zx|A}w+e}?rl_kS^21HLz5W$J&1n6>-A zgchWJ?De?vEPV5}z3VqVxx6~?(axB;>#nZ9sCNBM#;g5eHK{qLls}p+*?IiW9>e9g zYr;PE%yK<*_ED_IsUD{7x1{^R|M(V3J--)!XvyaP3=?>t|9<`F+Oj=COU0(Gd;apL ztyE~i#;G35es3*a5#KFmWEDR5KLe-U(r2~v&)ogi_WNGhoqsXm;i-?$3;E`)`Sou} z#2uydxB1_FJdTupRok<${t27?>&yQc^0aqd-v9WL{jcBs{~27|V}3k6*%aK@KlSzX zsnQ`--`Pm+xz}_E{%9)6!rSn6o1h(feU{}&(Ty@p8Ph(_4v{EQj6|Q z{b;wuW{2SQ@6Vee8SQ_3{PQ~d5&NH}ulc|JwEx^%x%c^q;3{W&SVIUI|RRe{>=Y+EdH<9)nM2E3=*d2!BC8KAUM;F^y37Bw%{!IJhab<;vrJ)Ur*vgjq+qxCt%#IPiK<_W(d@_TR1W=TIPyE_X~=(uc}OXYO`GxS%~zkX1!wqDf9K77vFRo_5YWUhV%3t9SfoSXWrDx_BV3FGpu;H|M{J|CuP1L%a1js$N%u$ zD#JOI{r!38IoCJTouBn@?c;vwmr`}xSHCjfHu+$|C--kBlkbG=yF4Yq&8%rgeP37tOLz^$`M^$y+30aj0 zBUUwzSk%;`nL~z}*2O=hJuhyR$kz?~;xAYBb64+?)h865#yKr-s_J@~CGz{xvIYD) zKhFMVP=0<;{%!WpR}XouI$$QJl6pcVC0)6|_GsBWwI54UAL_T4|1-U!ufOn4 zPjFeCOXA$4(YxW`i33~(MFoX^k2by(32(mrC;Q{P&Q%qaHJJ|sAKulD+wE(2>D~Dl z^Gmm5JeC}ly;J9tWcwtC&3p>G7U#jc(fe3`e0{jT(?4OK{Pq02`Yk$ri&L(PO%GdL zp2yWTN!UnXf@TTtc5Cl-g%e#59bbDfF6uvnt@`sIHOXV%CTyt)-&wR|{xY|8bEudNmv-Jh#-qdx6k_kV`194*K6 zH~x&Pd#}-HfA#Mn>35$a@2US?wnzWrd!D&*x@n(-(l_kf-*)TLy;ts>jGBsbRC02s zrUXly9CR{BCJy=)s z^JDKR1LJcBg=aJVGi=qHui7)KZExY<#zda*NnXdEJ$mFKkd?c9b*@K2=pD^-Z&v2y zDu^$7nEawk``GMn?H_jCvRNK^)%8$(ecD{}-$uWGJKU*^D?6C^wDS3P_cJGaa(`~_ z%i|&>X^^2@uZm_4=J;iO z-;!^7>vb#s?(6A~a_#iBuIBOX;@-K~W6$ZOy^DIgyi`KUcn;^;eK3j9ss1YZ_*VJx ze*RaNUl~Tey0!Fl#KGdCvvPB*f=agMPM%%%>0?pohTVyJo(c@bQTxn)be2BkXS0!`w*y=Xv=lqqv?I{)pts> zBK7wdE0X?~Yyi9&|0}+pp@~ z)zKbXcE3!Y`?f>)O|O#9DI<;qvrT_aMOLo*P

    <*zUcT-@dJJFR%Xg;_cGDYVYWk zYad6qX|m5XQtT6Y!@=YhJnu@3PPMJ*!%1})exQME!&8OU1e69=cZeE&lCtLuXfxRm}D8*Tbdu_RTEJuGlUn9IbloyN|Yg=~Dhy+oJ5u{59c$6LXaJ>~6buckaZrS%F+X4aMA< zJf|6cvj6TY&Mo)bwc?GuY>j>Kql_;cU)Q?461wA`&OKXu>$Z81M9XGLyyK}0nWWL> z6d1Gp-TnS4kEegkf2+9irS3}qg_+{BuFYKZ!k}Vu__TMQqPaVA)joIW1b7Oj&rjWR zCTa4i+pR23H&_oHzZepJ-cIrFQeo$hZS$A7=dOGuX1;6cv`MpiXU+@QE_~a&=+udc z+n8K*Z%;lkXC6zA-yZud?JY~I_uf<4v$gHksmFol;nSvj|gCT>~%a%<_1 z%hG<_%kSL#dC9#{YGdrS14SZTg?hpJ&~&&FoTi%q{Ne zJ9l_zm*{ITv@V^vdSvx*6B@ zXX|O`k~BOrE8B| zH}6$FP&$cwVYW)fys(=L(>1s(lOAWSE?vN1^X~xoQjj0d|1-q(zu^bnq|sOZ_4Q%) zxBL&UZH)|%ye9YHj_vwyS()W;SLk$fE3<7DtbE1fDJ`bKrs6!u;OxtP<{4qP_`h9# z{oVe?{X}1vEN+>%%ueSwI-L;DoUl#Ag@5KGwtKD&)%xGq|C;}2IBx&zOMOE9<@!Ix z{~6ZDaoK0u^HhG@_MhQ))XDujXJsf~cALCH`o59I<4LaB`RZq#l&6{hoRS)0aN^ne z`a{1zoVfTkH?$^DWmetPg~uOxpL`^|>-h5h^S9-Ry!+44ANkMqbngr;BO}H))2hs8 zT$kT>?55Y>^NH`nkm6AAx#+yE=Q&?2@;qtoq>w^96>RwG)Jv z`Y~EC$X);2-2XHGKLaQT57`<2JIVg&eRs;W<6Ui2_g%7%SN?5%?$|Zm-Q~0P7GD+( zw9oWa_uaZJUNHVul-P=2K2}e!b~L?u5fORnl9sp4#$!PSHp}mM-fH#pyJI-}pOm!M z=4sz_okMpDA76PV^V+vxQE~C-%_ntyvh1@cXISK_b4pUY=+A!!GtK*MyJrWj4(i#t za<}o85B&$)J{&(3{hnoR{#*O3t@CcbJ#;zmzLC-FZI$g7=L&bv&-x_Ye?xXzm&WnF z0)d_9zq!YrV*m5~ZP)8P`X3iZe(XQESNph>+sx|v@|8D_7UwRBzW=<~<6wxS2ZP4B zXSsVcRG8fOOt-(~Uv)F;_4{=%kDZF=d-ZKs+IqG5t8MRYF}$PVvstB0<8+R(`!i)N z(b%)o4)AWZx|->EitlXGTGQZL>hH_HYTT$hv_3J>cK;u>eTfq#&1Kak&Q%_t|338n z{)FT@rT+|FFVv25|7TF>dCs-^KZEUmhMRx0Z|~pwe*g1t&;R|~{^OEt+z^4e{(blUqpXZ;6xWrzTl1I4ez$3R{7wAxxBm=BbGNfO|7TE`|33ZS z>~Cl7>Tgu8Q@K-S)Ust);-w0i| zWo6F(ho-lmF`55oNKKb{zI=Y%VPvyr|J_*spTYh**x;_umh~*z^25|1&hz zOZQq`y8cgi?>=Y4`+xiA-n4xA{9}01-xjWy{ptT1IQNy;OJ1rfdHei#_{RF2`2P&& z-=6>bFaP7>-%0-&{?+HzzyDm{n=ccaaqH6UlOivi!gk%f|LpGj@ALFif3ujb-4kfu z$9VHu@!yF(?_%oD*Imte{!Q52IK~rX^nL4c`J0xi-Oz ze_e}P{GWm4XRX^S&;JY$tp(qw$j*grHEs%()r{;e)?P<{FQ_o4T{aine8 z-~3N_g58zh-|JIq@6Ry*^ZohKYW;8L7Q&;r|LyGm3_t%f?D)^H|Li~0B`J@^MK|9G znA{p3WiMTxfA0JKjH{%xWf4Tp6{8Fj)ZwziN$p5jnX42{H{~RO# zJqH!s$zRt;{JV9<{`tS>|Nb+W|F~42n{m*}fyY=^gCUF zJ{AAZu;Fz40p1r*6FJx=UnltF3bN?X%hYo=u+Hb0rG55E>Ei%1+L*%Z#zy1HHOV@b{=YMmn`)&C8`M0~j!q@$0$OYw%zt=%| zW9h%!pm2K)4!0}1;`+{6rLQg(O_7~5J3sf{e}>9u%HQNurq2H-Y`3Pj|IPk)E95`l zzO`{}{e$^y^9B5W_?su`Yun%W@y@UQ`tR$T|1;#i0)>1jB;^0yzEc1EAH?wMPHV%= zH{ZMdGpF#k#plnrW9@nS_3Sr%-n(aG{h>65N$!*FZ`PU%6#myL0|84mv z`{MrXrt-fyVqwYWEC0{`4AAr=D!rON-8Omqy*0(*laIvTdR_aUVM*ojj{Qm7ujlG3 z{%2^}fAY7>Ne%m(v$wrHFaNr&>bkA|AK?e5HuwMVFMRH{asH3VUk(P(|MBBLL;BbC z8UOBF+5bWj77ur>?1yBI?LV$0Dr^vZdieM1_kZ4{i!5aI&6)cz=hKY(L}rnm`iJJ{ z0w@1-v|Hs?fBp9rv-%Cuk*_x1LGd>FfI2vnKv$n9&MOLW_Ur z2f^bcwp%zf?))QP+ilfdf+3d!Z{GXQU@Q4#Qr$t;Enm0&XJ~mY?P2|&VUzv+x6l7G zZ2uMF{*6`bTE_l|R{t3yi@ra<`xGbOj`2qE+}L`F~0rB)rW;sKV+^F+WP&X{kdlq zE8HamudLg-^nSix`_z~J88*zm@$%ZW#m7!7wtBt)ck)=#nY%Xs8NRLd_;=);(|?Ae zNqaQpKkWlc>v-2DS|GU0d{oB2z{}~*m_y7KP|8dYp(%7C^<#XRhs{iAB7j^RO`F@7G zij&K~*{?We_^s=k|J$?P|C-F}Az@Pv@=@=7aM-L_8dbDnmHB(U<-1!aA9%QLsk2x0 z?b@$*U;mgV`=5dD@$-LP^8W(ue=pjy092*c&i2a=e7yJe^t}HJZ+hwl^5)fVt@_XK z?H$O$++YWL{@uN@{({r{fBzY_|G4(mu=KV2w4z;KoO_;fYP!DK>fZmK;qd;ho0ak( z=JD^}U;fR0oBi{>nXmS5IC)hm!sjlaK%U&rtQ{{@c6$FsJvwJ?sCk zNgV8r4c|5g7VPVH(yIHlMf-f@!EN)?-j?U~OT_&#=5d>s$WYzi`u=*Jss#5#0TsVgK8Irl;R5yz#Am zo8kV?pFhMuskr%{p{d$(;{1QY2hTM5)F=N+@Bi`R`+CP8{=e1dx5=q&xZ25(g~>H?tfIImj7j02k|UJ)% zY&JhO|I6xNq`KP+XyLQiaR}d#QWtxF44v zQTgHj;rfwz#?^(Tf18)x&C<^EV>vIor)PzK?iq&jdl@S8kL5Ge=>K?Sqh;g2CevL0 zL*J_pufE;b+x|S|_eDE@!~N3J_g#D$8{en*NAM%}!CU9sN^9$VYmLLUOgi_#b%k4> z;B}VOc@Nu)_>&md1s}Sn{$u)w+mGxI>slX|GiX2h&Ca&#*{$ltr3ceY<~mu2?F!7h z*l?aR;+cUww=J!M8hxNQQrn^3_@BLzF%FkVqTe3S=>E?otTb?Qf zYZhfrs}w79Sf!TEBh!7UiYOW@n48G2GdA z>f5zv#s&W>Kc{MDDDgF^$|Y}fvtWK5rLiIAr}%Ms_CK_&A-jK#oHpHQa=|d2(gDbt!+HPUSe3IcFDw7`{ak`NBVhQY@a^Mhi5hqew~~8=*Cw4jw+E^?H%v7>^|My9dSux zQMF;7gidb3alQlz5H55uizS62If==R@sG0!B{`q8}h%q6dkGsR~uDZ1sZ zvpMF*@ocdl@oyDeCaF$QSgg|`GvTo5H&>Aey_)F{``f?C34ffn`jM{nBlmV)y}ZP| zg2yw{rhUBQZ_U=1{o?UX!5EX2QjdpaCr>|_RQ;bpEGvq=W3t1fdXdbc<4f+VEwx=@ zF7TgWxlN(x(*<`mI6G%qx_LfRW%)eoJ19N<*nYr{bH3k&q^r3$`HL#-k3_Xd^)6eO zzJkM4VA|(b=HUfzKfMbL%Q$+P$=Rc2BWpCn)!X{b`{XZedFy}pn!S)o#~XDz{oCvEV^!zXe{DZ~rt9tYXa5-@dp=HnyzBa#_YeOwaQ`v*$T#_6Z~ejV(+hso zefPR}>Ad%b%~#ShgLCS$_EqxFIJG$MsUX`){ayQhgA(!Nv=3sl*Zx?hFI>_5$gRD8 zo$9mc3e^_M+Zv)HPTF*G^xUz$_r6p!lUwk+g9E!sjsAyAw-1yVI6qw9^~-Ih%`Mw) zy({kS-D2#iZG;9g`Ivo34KJpMmvu#fn#x^pD)-_TTYQ zu9>azk$c-s$-Kw?=gw?faHoE;hwGUKdBzM}1zstClR;6UbEiVzq2K3{^5s9Ji*9E{ z)pyQ6;`FLu(s2!?h-qv+KFc}tm0Z^D{uik%P#N{8q`t8#w{Iou-}gst*#mdCd+fgy z^($Lq+3r2!S`DX5zKa{JIm5*ioOG<*;s&F1-5roeI-|c;eu&$;G%C|h`Qy6g6&Zoq zoU6=Qla}50ca4^w9XrA0d(al1s3%^L(F})n2l_?r?sa$SJ|f4^edyQyBY)L{e%pw> z*Jb{A)x)=(kO8Exr0pYl$%GVK)qA-ymwPX4lu;p!US zOL>3Pm;9*ueb}mW=@vofl4;uok4l+ro_RB6l1VFrk}2=WRMT&=B8;E>=6)zX^lhKa zN70!d^P9e$^$%#enzO#fa#7^Vm8pff>T_97E1f#8)0EKXI5AkTVb#o})IC4UA4+c7 zt99=F@oi$yeJ-vo`H`O9d3W7|eQiJWRnE*vx_m(Lvb@=|pKaN%r8JnIyqBmky8Neh z#mlJs9eZjYyseMDoU$uI@#>Zgy^Wtj&|E|Pr`Ih5hu^l?Z*APZoA#R-&|td z-)(v2pFitYR2Rcf{kY42 z{0lz{AF8?{FJxn0TAH2prls6R~p z(0t?`$DWVV=9ZgxJiOAjc%{xq_kG!qwS8X7ectLgr8Q=b)(Vw5lh56)KY9K9e}=@< z^?wybzm^7vDH%Q6KY8QFwb$C${kVT**O~46v@YyXPh9&$+xLQL*!}#f)R}5m>@Mk^ zI=3$}WUiO>YE5CDg{fM*cjX_pl|QuWSN1%~AMJ}?-dbZG{!F--3Yq6*iqZf$*V{wHf}{SB|*{~5fNu8DbT%J*{ruA}kMa-xX~T_2ur44cRN z;ro%d+TPdR<_I0r7qThz3g-e{|UzYVEHlqkomS* zOV5N|@?YZojdk1BOy;}3hO<6Bd_MJxoqXA2<&`HB}#U0d{F`k(%PR{t5+eXIZFGc_n?&%U&)^2$yB zlI8yi*Yo{nxKQu&pW(oX{|unrN#-Bk|7HJj{tr+8e}=BLXYK#l+f@E%h|m7du*B@^ z`MCO%dtToE#@TPNL$Ka0P`!(xhF8N4f4GPFKZB~i z&wqvut;hehuD*U@YH8+g&7ynHHn(q{+Qe|DgIq*$?0U;?JD_LsI@fgJ|S@-ufqtv+BS8 zT=$>hrSIIzTG$5vJ8=0w!%L%U^UmKd*#9+qxd%ZmfQQEfpdNP{uj2( z>K~-k{|Qz9qqa{Dbj0$^{|w*&4?R**{LfAOKg0U&{m)ll@BNeUpJAfZ=f8LP|1K_M1|Jd$4caHm-m!Cx4iiBSBF|bU&zCiSqsOb5-{@<_vyudzvz9iVu zP-8$LHXq{N{|pae>L16gc4esahq-4qG@3)|52Zf*&+v=y<^CVy@;|Px4s?(MX`SA_ z|Apl;h)(xEYH=V>JOgPe1mDs6C4hZO{h{Oq^%shL|1%ue@t@(?SBa<^N+n z{?GKaNC5lf{|tvxk(B&c<^RKH3;!>^Oo&};E;`8h)F7)k*8juD1g79WgJ|4lSB6Uc zKWbL&f7wm$|KaWjrNWlu4xl@UppNOSx+=o>Wd4U4FXX?-T(1A1Xa8~WKVexUmy`ss zPyEk-;@BUn_^F{XYX|Tl0Sg$Gz778JHgbXOQy$;bS8Ii}&UIAL5`K z7Ps4#p;G*h>M#DkY+v4k+>*)P16T0l`uaczIcJa|jkpZ?aee(o2RTQO8WfA{9~xY! ze__779u!TH{~5ME|1;^g{%b=yiNY{h{~;{~0di`#~e4|A+S%nA?9`-w+t1w!GiG8zBT52xx0XcIY=L z4d&zfAL?J&4|54Ej;yW^)n+tkZmztvN@TOy^g;O2jZ&?7k(-@mR;{&#BY zBtuKZTp_ggeKuRxCF_pJX6f7|rxXDq95 z{^vGdrv8SX{9naw>wf!xI5&40cmL*FKkI7F7OKCOcvtrO_~c`?>>{V-d0tKnw`x7Q zcVk=j%dqUW&ob`|x+N^s`=^Q&_RZV5Dlk*^hT;;L$R`v2Gn`c3@te#1Tg z876IA{GZ{5(D(lg?zgVQ3H@i7_@Ci;{MUQu|1)?_+RM8~ciG9+M^|oLm%pg-?IdUA zKO1)|8$Gsoa;)9ryxg-%mk-T~?ORx1&ARUN>St~{CZ8$Zv#K?Y^T447i=!d;&2@K4 zUyXUkTRXY&@_&ZMpO4S~`fdL&=lqzj&;QImH}^k7Lbm>2Hmk4KK0KS6Hf!#|?Kifk zoG!V2=lzkanXiwZ=(70obVF1h z|HJEl^UeROvSWS1zmr?RXWoO-%jqkBq?f!7k19R-boorPx%RhmBfMTeJ-Kmm8KdOS z*fu_Q(kE|%=jUB0j0>Z8qy$6GuVEqEjzu{*b(w0!>TY*9$_+&f`i z@82gjPv5Xw@^rM$p!|Ld%} z_d&j`#yI=oS-aPVr~NM3y<0mu=l4_V2N6@856?aPS&tS9ve+6X>IbyMduHg&Exy+-ak35S@cZq z4(aAjKc_pN>giy!?>i`73oU0kZ=!x}-&{I|kS9l0hcbyjIT?GK8W zrnYbPI`PByJoOJ{f9!nRFH@gcyZE0_wyx3nH*#+GH`n&W)Z9yZyubdIRc`EVt2rO< zhE5O(bY&n17w;GR&+uek#o~Vl!{dLcuHL(j`;q;h$>&AtZ)m;#&)~APe5=k9kw90) zR*^slRuEQv>FrS!zaL&MhxdP~t2+LB(tn1RBBo#8 z7e)l#{Iz+r@HbogFkO+r=sAf+^*@6`^?!z657qxQWph6a|1+5%be)je<^K#W zOY>gGe`4SNpJAO){R_1X`$X$c8hq&g+mvhn#Va7Dj(_Ggt|-S#)0&tjd|A50R)e|! zNBQL=^S?>${LgSRYkKScC+jNS|J!SF{x4hA?ho&mciO)(E&k82>8d_={mGb*_y1O9 z&i}>p<>ep!%U|6JZ_4KF|IeV{>-}%^ z(`YLH>)Yi2434Is^CtdhP;r0s{%>RM{x3(fql5p+{Iw4$3H+y08~AoV>wktP6Dzd; z9oSa?OHs?e$NrP&o^+5|=l=?Pl|NYjQzee;Kf?yy`Tsh+CO_u?Gbvy4Kf?{N&;J=* zY>V|;?H~O)Eob$gVNxNeqR@H_syP;Z@c-xWz41T88_x9q42y!c*p&ZgP`Q6Z{u|eh z{|py>BmXn7{AYO5U!nZ(z_j{bs@i@{(+~Xhd(@?|(g)+h}3=5^6*(b^WWWMvC;f59eU#;Eiv-JNq{`$`_@qG7xhUs$a zKe=BO33RPk?hxoI!T`mL;X(fy`YrAGpK0@LH`V{~dGd#UNgG};pSDBG+*hC5W1Oqa zpmnePKZD%cf4~1TyhvMxockGKN$h7ZfYh(K8~5qHpTXU`{<2%wNI#vHIY-N_TY6fW zdIJN?q-?L9_hmAr?Dj8=eK}9Di*t+g>Bk8RxLk2cgY2O@dxC$ zlpkK-CG{!ep3FyqJy(`Cu3P(X-nmCtVh=50n|60uuULtsWJWiidf8dY$zfB^JYVY65Z%en_ z{<;3@&$=7G@BO>_+BxRHf6$3fU>84$=dVBc=Jovd`u`?ezp~|R#`0HXk5(R=a;fXL z=ATHaxYG2uKm0B z{IC8$_5T^xhM1lgtxtZj{ri80)X)1)=d1jSpSiv4s&3DN#}=;x!pwW?pZHy`e|@O_ zuh`Xq@JT)^R-aVMsxUszezZ>LqUrvPwNqZ11>CEY%lpruf5zcdKSTb12B=o| zZ~qzo8vJKC{-5F3)rqc(v;H$6#Z4emoTUUhNYO(wH2ZD#8IShKJzl-v|4LMcN_9Pd zxBU0(KQEYz5lP-2krCE(X)wo+%HRouIX_N*oc&MKzUj$z(3-ogW&Seq>A(Fa&wVa= zcjK;Ft$3zCasL@wu02xucS5ZCOSNvaea2_QZ_Ngu{yq4;&;Q>0-3yM}#s5zH&u~&4 zG%WEu|1ZbY{f+e>|1;$NXGp5+{LirPUbv55|6nupR1NoAd=ZF0c@fDxCy}b73zo|h3Kj=5}<9~+clKzf5`CT8*?a#TsOKr8t zwCU4M+Dw?4RB^{BzjtbbvciKt-jb`$k9n))xiihB&K})+^^R1DaqQl`Q`9n_hP2*T zQ@CF5xdfM`N~41ee?xt$ecOJX8s}xN<76(IR$ITdjLdcUr+Vexf?l6ABb%A);*88T zGKg9{=n>%fdg^07V@=IRwzZG<^H(Gv+s82dptzmd<#ndIg2z@(KNi}2+URh>s*Z%2 zVJW$btNuRRuPpl3GzHu`1+`E~#4_(u2Mcc_-|%q+t-_0a>#ZsC{+sW0aM!v0$Hl*s zOToSEhyP432707hBlsruDR=8Zo$vbIl8^v)a%4{Ll0T#`cJy~(wwzwAx}4n9Lu;Sk zQhd_wuk&a2_rH((<}a=O{w)^fmg{XlvLDUw%G;lPUZ6rNH(-0)f{xR&{SyUl-B0h` zyg%>X8vlFimu#r~x@75@wKv`$JGLa1J?Pd^)4Xe&e!fWf&j5F>{7v(C@Zh5S<5*FV zbIfy*L&P3_Y054wjUga42iftbKiW0cuK1B1wsrgV&wA=xW@%im>OFJ9K~kN0^1aM^+UZG!5(d5amEIm(d`!(k~B9UCTf1uC(s!G4*jm~=An%Oh9zxOwkmMl}A;PJw> zN>o&34$cJSfIF?xR_cm)Rq82~l;b@o7Qg3@Ut4O5(ovAKN2CyUB!9ZPG8FG0l>&{b zwg0$|KQt=*-)erif27XtN2=N61uyUCUy*tz>0(z?_-EBkb-e?1Tg?yUx8M5D(Dq(2 z*W3TMfj{rdGM1kg_o=t8^XdGw@#QmhTfPc)w!O;7tCn8tU%>fR)!^LqX@&W>q@PyL zn0Smsd1{(`zs%KK^N2n3ALZ>|YO;HcZpmD`XBuAp(>&AmMoL?pJke&@RABb8XU2@1ypV5NZL9S>cGrfc>=j#1>r{TvnXsE<@pTyt?A$&~@3GPT$oy^12mS6kiC4eocWmYLJNHdrBI^E@>xLQI+;~5_|9((+ zSpQa->ZfGw{m*okOSj+H5*UWu(}neTiJ=+Kp>5&)wnM+;Kf@<~o524J39Hxt6}o!w z%BBf{t|D#laiQKz(%P@>4?O1Rd%rQa3*Gpqw$&F9ph2OR()CO8-ZT@@GbIX^1+&(IZG|3-QFe}+deBy!)*unOWLu&PT6PFTaYEpdq-; z)3j=bf6spge&I*w8@}4JpIm)EBOu$*bj|i#nSoYrA@4))a2(dqaAa5PWvH}2IKMwm z?Z@ih9@*C%mTo^>xyM*;<-;2_ONHVV?5W`WY#946RpXiIIi9-cWS#7nRzJHRet+}+ z@qN~Rm$&Ux+w<|>)^$IEE+5*MdFauz%RY_jsq=!ixmhT>d0a3$B^g@zvt01g^~+nS z)@IuuVh5V}xB2M)w`tw}FW#ZQrUh|nEShkr3vqA>|Fk`Z{~6RzKhpp8&Hk6KXsvg= z&X44O{*#Z`e|=N`Wt!HnS(}${{%Tuu{L%7%qCZ(h0;A-}&7I&R8xn3`YJWfedC(Oo z!$IgGVpt2Tgis0(mtXpy;bhE5^}h{Y_W#wmqP=0Av(JBqNjo3;|7Lty4@%P)ex$xg zw|)Go^h=;?QU-Efw6y!Na@C+Y0p`HRn!4PkaAg`N@rg$ zVWGfW*Y$Ju!}V`Ae%SnwZ)@`EAND>wE_{vF@DYeVw5`8jMR;*BZK@?T7{=l@jx&v0T(jLxVLgDoub(CWD3@Cl~ulV7!qSHHX-W?rXq zU3KfO&Ihw>Zt6WraOUX_aC5ZPcK+@6`(NAM;(tFk{AYOL<&*IRX`u&VVMq4a{|sB+ z?|+ef-TwV-{(mluQMEbSD#LBeK%Ub2T>sYk{V%Dn=YKyt{GVZ>#=pz^{xd9GAM>AK zUyl7xp$01G%e3G07P@9-&tp_)*&{o70nb`?j~|JJA0NkxO`QBu>YlvWTl1KP4HAz6 zD(yUf#@WsJ{B?cwzf)V~KS9=u#K|wO5casVr*uWdx!V!f9)HwZ$gq9azR$V-GbK0- zH{8yVe>Hi+fHZv!_lt;|*BF#cypZ%X9^M3t>>+9<8o5}y_bpJN@`(NHFnB|P4+D3z<|Ho)5 zf#hP0l@!-m|1Pin&)^)l^FKq)jrvcj?z&%}9CZG%JVr6X;L+g~mhzuoI>^n$u{xx3 zKWc8PN6v{3auY|jO&JZ6UYe&8(|Pm6E-PGpR~qrQu)dfj?9%CH_Ge{sm6aY_NEmw* zvH86#ZtRoCQyVTX+a}uYfm|Kg!;H3IcldC!R7(7wN%ebw-zkbr?lJf(Ol6zUiW0B_BMWGy4C&clIyA3HPO>eiz(kc=tT>wchZfv*cPIeqQ$~>fHS0FB+4JSKhs=5_UU9 zYvX*=ju^vKhUb$G%epcY?*FhP*ZxJ;NB@63?*ADC-T!Dx`~Q;K6a1f{S@1u@k?_SA z9pnoCGjO?G{@1o%JLra@}FVh>ZA5Q4CVj0R@sU$K9?t8Q2wGH-CiG6 z#Dz1Z=FVHqyZ6Yn$)2eNlXZfR-A?K3n%o{Hk!sU?P;?<->&mz7T08aa-Mee|J`oo; z>seXM6SUe!=k!kJ6_!DcEX+5atqowGgXTmu*TwE@e59*=w8}+lUFXW1uccy-y;uA0 zzr*w4`eF9*sN-)VSJb$F@a_GTeNW^=YwNX%+HS8?S9@$Pug|(1=aZ|e zqJPV`PZ~a59ZT}e-lz~p6)V}}E*!_=c*!?ej*X=*JR{v)>mcjh_{n25t)3s;C$)Ebi4xqyFSP2i$GA>Bv+nSGRPTljdt z-SJ0ZQ~4$?KWutC;^Y#GZPCl3>vyW|lkz>=v?8$MR=M!u7ylUqYhu-p$+OT)Y_;288}0?|7G5K{*PeX ze}<2Uheu>o~I(}`LQ^iy?ZA+TzsFWpEC8GbHvWJTh*1b#N!PXw;KvM z7KF2u+iAsjiQRhFHcw~vqic&Z_K9A$-=zM*gget_*7_Ouv#eSQ_iRvEQW+_{`2gD= znd>*|(PCvWS|Voru)N!b`=jx}f0{q;+!NUHao^nXH9z+I?%#Puq3_d6)r<2i&Yr%- zGLdDo%6v@^*;K)muBi;k)xMj5%+Fu?$2MLt>pz3YR()n!_ek^NmsP1(+cxnj|2rSK zIP&=}v)?UGqtbXkE6j9i@V~kL18VxfaLGgajXJZ|H`y`QSbhlas4;&es~>A-{yli> zyO}D5(YgDkKbhskw4_HREY!ZT$X(R@n10*i=)bG}v1VQP5&ia;@18nEtG!>U>Swa# ze4Efwcj2K|(-{f9&nMYM-=U{X47)$1&hOZ#d*jFbh^tGQABi*`k`R~%!tFr^9 z-8{`)%v~aI+roc=sn! z=TlTnR{xVNMv*25mB=+JRJ3OA9 z3yog5&itW$drj_#IDS48Df=V!Z|-V8GFx{~aZ8u=?~SF&S~1;`lXSMy zaCX|Web3O6EP605;BU9F{Lp{QUew0_gWi9J<6?34O&)t3AMJlDxlTX-?X$~k|1(^Y zJF}RJ zmK2KJ7qeDf?Dq9UkADHjs~IO9`OA%-&+XBQK?iA*FmnP~F6UyzeL~4_vuiSX$=?6N zYwi9Qvg_&}bbbHN&~t(9tZ473b71w}Xz0LN1dO9;2dM#oQOJ&l&S>a>8mMTs3d2w= zm67gJGCljB0j(>r{R?~aYSyXF$JU)W;4SU;Sf?U2fopz%gYsU6OhW5}u*rvb|7TFl zwf?uu{9kv~v_V`JZ8ZXZ@Gta8G1j0P8G` zMGYAHAqk0f)<4l*UVq)~Kf}fRpex9s%kTo E0Lz_{R{#J2 literal 220483 zcmex=9G120;#{CCsas8I>5A1R0qH8UG()kY`|EWMu>c1}I=;VrF4wW9Q)H;{Jbx zVXFWG6C*P-6ALrQ)eH=bwTw*63@n1ILW+itY{G$w>`H|qMvW5}awt1(JSZA;@q>zS zQc)8pmzcPOq?D?fx`w8fiK&^ng{76Vi>sTvho@I?NN8AiL}XNQN@`kqMrKxVNoiSm zMP*fUOKV$uM`zch$y26In?7UatVN5LEM2yI#mZHiHgDOwZTpU$yAB;ba`f2o6DLny zx_ss8wd*%--g@}x@sp>|p1*kc>f@)+U%r0({^RE_kiQrin8CgR5fG1|`Ad+2iIItg zg_(t&m4$_Yk*S=4k%?K5g;mjzO~^5jJ+V+&$*7S-#A)KfjR!fEje|ajCKX-e5>qjG zsQMA)HL%Z!^H>vEK7)G<;jdc^Jj{#?OoGgU4E78fi{^{y+jPrhT(Xv74V;wyYgbU9 zUr?FHKjEI3Sv{e*C0(od+wDZo+GLlm_5Q}4w=ckcPvZNR(bIpaH*Zyw3evc|F3^2y z0UP&U&(}Y$Kl-`7dye_k$H`4CPov|jLav)6e%`XApeMZPN18 zieu$L(@&kB{_o!QyDOiBggZXk&lXn^6l!ebo2z)F*sa1o@YaTfZQh-eG$t;c=El|0 ztaIc81M|xDS2x|c%sMAAx%WV5sHS(?w*L$b*F(O>`kyM6*(^EbQEKz?ppsYaCI1=L z2EVx0lPY?}^TYaM8XC_W6UaG2kX5G5o>y9tD_v7nV%jDQ+ zVL!`_3l?8zTJUjyyWZQy{h@skOSh#mwA4(D>6_8yc}T9uVeNc@*po{S+zOKYXnuZY zzd}{`$7RB;nNMp<|8`Cb3e8+H<*K*!<-+FK`=(9W`#5**n);H*JC;S<=gasgXSQd< z*C$UNyo{}WRotU|{qChp`+DP^)h%CtdSyiPtsT}$2k#tLnZWUH%X{|!3@2_cs=xk4 z{+D64(pDR7ld^vr7oSeOrm=WW>)bmJL)cj+`8&p6|C#68cwz%T^Yq{+%htVJef8Dd zyZ4rV_?JH~`A54!Wm|giyftgfQ?^8Ft4y-J{%GC&k74mWQkQ>}$%}L=mo2;IooIDC zIqOUQ&h3`pUeDV7*q*5(|M-6faT||aYk%L^H}m1s{a4K1;h^XG+kJuUhX^uG4}wz-}r_6$GVk6gF9w>`IZP3Aq$i@J}iSFRUQ-!m;e zX|+sZo#T!^<2mKaUd?0rux{f+?)f4WexJi89rxPNWgG5y_8;pHMr~Wl*HzlDRBh|@T#03#nuEigtt)mv8R5ZmtrvZ?@B^^Rs8(KfUzFzIkGMKFmKH%^iB}QRSM% zf9Lsa;$jMI7FbSUedc6*>Os0paEY zyQtfX$z6&r-j&Hs+hcGmkCad_6u>x=y^_?0osH3v2s& zD~ndTMU>3ibn)Wk>5o&Nru5ri_{08iuJ}Q#^3FY(m8*2Dwrp}={_>aAF}0GOhfmWc zH|%`Clk%TI_pkhevi=o+0zTZEWNLkSKKJWy`YWR1+{COKn#^;1I1+b878F>1Ti1NB zUZ_+y{$W}3q95hQuF3IT`;x2^F7|6lCy#ldah`jmq{p3|%K6uCYd_4FvZ2Hy*=Db0v))X8l35g4QvR%b z_v=MZ4<~%uE~8ZFXH=5+_sY7b@)=A1F1>Zld-}wvlW!~SB|mPF6WzMIiJ$xYs=F$V zlb$^ein!qQ;KRjLpH@{q?6lRMz5I;h)DutgOiumI3iwtKn7iM6=g)IWi&lpP2L{GY zJN=nUW6@U2L;EINvoSB%KeGF--uHu6x$7H`w5DF!zbWWqVsnC;>(0-B|;m;W*L_4|Fe&TZv?2BEug_o_SfrS8k8 z3QYgp7<=FBYQSB$a=xioUexh_SQB2)oxZJdefm}Xrn|1?87$=*dR*Hcq$HR<&+zC0?m{IIom#@5-@`~t2kv!ir!b-zx!ZIZc_BeG_t=g)7#=bWA}xfO<% z*vmREd}*i9C-L-m>9d9DwOL0UT|cEvJom5oPj*lG!k1B1?R_B42d*AjeMdo@G zZxy*!9zJc>+3H=d?k--u>w0$F`N&LCX(IAYC-{RP%yhGbA z+<4I3I47O??COh&KYEuPfBaWJ#vrzK(shq$c8Bg~lB^S(74nYVJ#Jgvck|JnSGx=9 zBmOf?f4f)dt9NXGUzPFUq_&s0+ucVyE92I_FW;Wqv^F!0ch#%1GLM6D z&r(_=zOc^V`L{J}$@U$SEJMH1 z&(OmBPjIgBKf(8J<=^;k`N{nD_xGRi8K3`sd;6ck%%BKt-g|ub`h2b$W&5s_s~)w%K~wBzP26q&r+RuAU%2Fn!%T(E zpXPl0R9O1LD$C}g->GP`CyUK)?ftF2Z@=<;wGBI#hMqi>XLG6VV^{Z7Q<1l)uJztB z|6a2$VSdYhh8x$VUjLXJ7OU)cX??iAx!Jd8PuHm>M7m_|EZWWX$y0)*R9!T;j{jD~ ze}+Tub{ij=ZoIr@VyaGg*tcB!TaR{zWy}>XdhkH=;8&ZT;?2n)pRUXM<27A8YJYp~ zR_m|(x9_~Lge}8t`81`{w18Iu>K}PT&ew$7udDIcdUZ>^_{}}P_WrrGW*XbA*TGX$ zr{_q2uk7JCR{lhTUDo)R-^FR^mQi)@4{x0x5xaW%1OH?7AoG zQ|&_Ds(gR4bxO_ctD1joKV(!acKRJRcjeu?ang}`4gFqIZZ12TW@vk^U{hjY@6sZN zyW!yn{Y4P0*g$79X=gbE{YI%oXC*OZtLn?@$>w>XPauLN1Tn$4XW8+`zvP4rd0u3LSkm! z`}9&P@7Mk^-{O~b(_W{&p5I<3V7GMTkLK8Z=MA-Cj}BdavyClm?&JJ>Mn@!Addu~T zDi5%WeEH!h+Wce5>seo-a`!c>uE<=lExWN;_CT`qF=qWxE(eQ;JC#L#pO^WOeY7lG zs=h<%YVO+wSM#=VbI-L^uC&>C)BC8BF@xQs2?yo~RhY_m)M-RjwEKRXqML5I`qih* z*Qr7RFL}&Xoe;e-p?G_`LQ(OhZQaNBm%YucRZiKtI6HIw&NFx4r{8v-7kFyw^NhW> zu7&4M%KepUKha;F7zsG4amjREt8tI`oY|=T%*9+rPA$JTB-xnrui;7D|qto$Nduw?#~Trf5hLmWwx)>wIkW# zP3MmYWDDPo@A!VI!{C&N;lYEc?&E{l$OdtLvVRo>pwhye8{zRo%33=0qRm^7lrE zC9DnSJQt1fZ(U`&P^Rig?!x~JslAC?-5aN@-Dq9et@8f^IWbAqF+wGX8xbyR#n(i>AibTJ0;ehxE(9YY3*V0!b72X*83XU#Qle=bnZ!f z_%`uT&_?5Iv+Dv&zTZpxc7nt2tMBZx8OPUEt}{OVPft!`-pSywYg;?sPHdmQ)??{* zL&+Om(Fq46fA$sLTK6*Qd5fH`(yMQ`!!B)&wGEBk(kSV@XzkPq(-M-452)n5oWpf# zi}sbn2e0^^dcXAdt(QgnZmo`dovdT?;YwktsM?e$&+NSGPxI&I*GiZ7UcJ%zkXPy# z_uHb^0kaR!;+g(FGnx7J?Ac8YJRuFI4{2-Y&Ux}ht>s5?W~$iI>eWA*xO0`Ry@_Qvyw%%F+!7Z9@NgKjQ0r?6FnC#yPt>QDoMdUrX1_idxbuKTE{M zXI+TkC(S#2#q;>G*2L+2IJbDo{fzQ$;=kg0zx`*}e#5j|=hGoCj+-(brz8wQEm;!! z*eq5C9aDW@toKVjb=mo{-}nBOUHYn#`Yya*``+1|ZttT%tGt!CpB=sI*UiHZW6HKm zz1)9o60g1Q=IvLM_B7pHDjwkTLU3vkhl<66?-D9}nX5l+&0D`_>E>(7vmJJ9m>!fH z6u~8t)Y)}PO2PATg7#tLU|YR*^sl)&a(RU16<<7w0CPFnRe-W!KatQ_u9>jqG)IEz*&bGQFo6 z+`lF3Tv*K6vVGq^?Yym1rd5&g;AxoOr1F4gH7obt>U}eRZc~@Wy3HLLi@F$wGrsVj zp`|+HKf{9D=YN0He^~ra@NMwF#_jXJKm5;dXusQkhK+0LpI?&y{UGpfZ}NWzy9xgp z?lbL|`Ogry(f;!@`)d1z^^YVT{AYNf7GGp?@_#f?jb49o|C?<2 zpI_Sl{Sc{7j`+`Teu@3}1B?Gw9r@32e#YN7_M_&EhRBavfyF>mnEWCWM{)cJ(KlAwH{|tzn{9Eeyzgorr z4EtD<|Lu6T|E&BUmKXmSI;toBXVBUIL5+Ol>tk9>iHTO)uaE%xKF4)CV6?Ef3dQ8jJQD*=LZv z*2Fka$@J-~=uRnBjdPI>=UI4)|9y@AcToQG@8bUqef1wr82K{J`u=UQfBxBshSxeD(2CEGT8nt1G9X7F>r#hD~mmW7EEIG)eC zwPM{`g?-zrt7jzdXmV`t3gua2%V8tM@cHATy>18H<}SVMUs%%i@JWBbRMm`{{rZMK zd$r!h-(%jhS%Blzyu%f{Be(6`v31k7-@d2Mm)fb9rn>Z>{>hjbz&QPBqs70M=VcfF zDE6FmPTK9#n_1Vy?#r;<A)KE{|wa=uiszG zP+T6Tcy-;wD{-7TI?26Hw?(9}F6Z0ub9N&Sv&?@64(6vnw-YA(o!zflb7H`()N8d)9zJUru3ijZA#qPH1`xIL)_<0U;YIB zdD5I(e(C*_$>o3e7p_=8*W`X*o#jWdrAsS}j@sC!KQ_Bw>Z|lSe8=W_xz?vQby@ms zjPAVVdx`U*G|%zG&z7}|?Fv8Ke%#;rNBzU|hu>REs;jGW(*w3#Gp)Y;@!?(X_$EWS zr7B4h4>lc{Z+)V^>W!#V|I}_q&n=T`zs6ICl+_ZzopvW$Pe}(81IlUh@ALp_k z+h>$r@rC(eJkRT2YLivBY@59G?UnLFS5p!-W~k&!$Gnr@op2!6y6)q4vE?VP|Kp0E z?V_nJm;Ay?`+0Bnx3;Ta;(Mghx>^6tTvt)``-uF9{*(F156`+vopY9+vu{PjEt{^F zTNiMxF{(T@F($mBKC8YXd{$q3<)aK6Q?u4e-nnN!ytJx46Ye!V%zV3s;Jk;|HLj`1 zPkArYlXLTP`k9@!i*Ns`U-e1v{LF)STYqN%^FQ<#ws_pHBbQugU?EWg&9T^>g~1srFb6KRs#*S1zwT=qZ1+T33POO|d3 zjEnrO|DWL`+lT(Y1(W|XxO;~GGp%^{{=hG}StWO~{V#sC{(Ui@Vb4W<)0OH>=4BB% zZ)Me{9*CJEqt&Lq)VWModtRq&&GU!(x7d&F7ql^dz^yBE+vAc=@FVZNQTHDGX9(G_ zGuu1m^0JDhrCTx;XV>%ytnpVeV#_@;<>2cY*T?>H`+N7;KhPC>eRx`&(qz}^kypNU z|FwGb=)x**86^wdX^+fLZ=U4sRmNaZl=*dg{YA5XPiE{gzg@rY{=D|}7vqh8-2c}9 zaas4{{|tf|yUdzyEvkrIldyNgrVsm8ukc>&t7CXp>tHE^!7WzL@ccP48_$*T7{1

    )*9C;l;}{r&ZM+Tl`R4diKfOwU^X)PJcVy`XTiQ`!{*4`@oRwz*V{W^Tr_U6(7`(_VBA;9*S%W{&}8v z%_OB5&w@NA7e>duHOo$fDNa|=xf?xu>Fl4{^|Ss>-fEgz^mSLfdHtvQ%i-U4X+O_z zlb8CF_>uX@uKx@z$CfVjT&dT|w|Uv?^$&T2eYORB+LF_D>s4uW>w%z6N3~Yoy^uVo zzAav8PyIvxo8bqno~>MyS*P)lzeCvV@|M%HS8I3tnW=ZrV$bc9QqSCWXzwr7ek!Ic zlXqgt>_0-Eb9dZ+?_2hHqj}WN{3mOc*6+N(^DD3D=W|;>UOu)@=11k@{|si=ov!@J zd}uH5@|VIaiL~vP-?v`AxykeC%-BbEvCp`E_P6;wZM+?K^`UgWP*mn7^)6Fg{g5j> zOP1Vno3A>>$%aSMQ+>uX_VTkkm<&I4iLH8_YcDeY$pz^t;Vm_JKOR2PXaD24;X`|y zZjkl1ODfmW_s%{XHEqW_oBMj&rCrB)CDZtP))g$^(VpBm^^yH8?W4QGc7FKnW+gXw z$*Xt)9o>}6x?c-Eo?aZwyS7~ROU{dTOgjWyIHFuX-DYiEDqnZ))tkMg@w4hLhZlX% zuj_h$+5M-Aa1crz`U$HXZX!xwG}&58-3`jDNI0=B@g4E|=Z^Ltg*dm=|jkZKqs%Wv<3Q z&9miEYM^qe&HQw`<3H&|-F%zCx{)8+U-Gi(3(9oc*B@{6~hulxU*`uAS@>Wgur z5BB%oQ~6jU&-3Whmf7C>MQ>;G+ssd&c!^WjPf255hH8=9gJ|^y+Md5n{XcSte%Rji z{3Cz2$=cM(@z-_U-2ScfFpe|%ih8fN@jcI7jN7?6ZiPCnpHVnz+S>Kn{~Eq{elPnY z{g=ON(Y}%o&yU(BANRNQHo%hbtojONPA4{Eh#d!a$ z*UyAJ6F5#BF4gbdFJRL7zD8f+!`-&bXUm~GcZ`>GaI*5O z;k$U^?x(HqO{RxUKYRN>!=(876}I|ybMBYq*Pop4b}@$k;C$(IpTh%Z0_Ma&TE{t>!Ry#U)%gApk#J^`mFy9-`>9eDR?KS)6T%4dGxYD( zbA9&D>;AK*xL3Ykw?97npW(#j*Ie4q&Psoq@G-r^PW61|@)yFf6?yW)ryuUrS@7Z5 zN|nX~NBS#mm9Cv44cu}Ds?#@Xyy z?-D0BH%|8Y_iB2{{CkuCx=nrZ_}TS4Z^}%1FCi;DLs{a{Uc6)xKc`hK7GH8Ey8ks410Lu`c`e^8XCk^}A=;*Uc8XJN16{pS|z@ zxz79SS}4!Hr?C1X_rcGvu7AAkYL)eCY17(O8=Pgge);xsUc}s^N?wKV9uV-+U+C3VimZzACql^I!B$ zdC!Y+{vRG6`)Bf_cYW5bGsYQaa@-rQ>3=D`EO#o?RI={0&IFTV4}6q;FCFvfKWVSV z_h(KRd&57oiuuRI_H!>3d?{vrPyfTUrK`>xZk^`w_FaP6X|XGx8x5a5?AFYg!ys98 zDQaons{agC`JdvSnJ<|W?`!uw{l~obt`*^L&f6c+O@6p-eet>H|(Bl^s~I6Q~8jdvVy>?^*bKtX8-ul5ODR&`5t@D2RmYg{xkUO z+p}x?kzFN=KHYknqnDETVD|Z2t4{4UGs!79`}vc>w8d4{mGQrR_eU+-=l{|Ct>#C0 zmyg#rvKah`{bQY6U$sbl!w&P@{OsD)oY&8|8)n~$d&j$(E9q%;{psBg-rsUQx=;T{ zw}{y;p6haA(~pE#aYs2n>qLYhd zH_h3=IxmtV;-cf7GgVr1m>g9ii)DO?r%Pr5YwY}J@ zJB@q73ci=B9Ji;I2^3F^uIfLwxBSpN`K!-A`Zq4WymfW{9sf0x_o6?p<@!hL-o16-rhOYXL`T;eWv53f zWaQp_a{j%5@%ys-lPn|-uBwsw$lkSwcV$MM(#P7hPw(DaxcuJIqir42rhU*|d&OK@ zOy6|QbVGs2r51N5Fi3l_>wXmecJRZ0h6bDLwHcSxChE>>E@WM|;(OE=vF*Q~MBJGj zZN|DzR47!ZTwQT$xv0{vzub?$?Ogw3`tfVq{_%e{iRZj?Hs$)oR~dQw>x6DexAtj7 ziiRsCEZNh*ynXWIZPM?XejQ!j`}J({;X2)Jr^}|lSLAQ?=G@Zp?&;@C7LU@mO})d> zeOx5#;3db26Btd4;+xFA&y)DEeBs?!pJ(UG-ix_&T6arb$h2(JDPl`S-dUdzcz2RP z_Qr8Z1-bhC-+r&^bS~IbF3;E|Cl@dF;uhyM-Tj+wyyX^WbxsQtIxWL~dYJ&StQ zc=i5JZF}BcXZ&MZJZFv7W%sr>U;i_7$SGd*TWh=P)&BI&UAIg-Opi?dJ74QuTD#yH z7q+s1vUQKs56@>P%_!aLAM{bbNAX(k`?lGKnQr@EyzJk`!JWs~rBY+AJE6$Pjj6G# zbpC<+f|*wCO?f8wrJ|!N_8*qY*`@XB{G#_;_iQYdf46ko=Yw}_IwGu2*`+4;m|b9u zioc~Fbmh$-N%8g?>AwrtS2TSOysUn?l-DTA! zepS~e|44oyBPX%{gYA>5JJ-wY9O|Pfqu4o#nFG**-rw|IU3F&zW)m_P+FQ z>|J>_-p~K`%hk^=E;XOHz5aCn*4+D(=gz!k+z|hV<#*ZAHL?F0^4|Qr@NsT?_qN)8 zyZD=nw)MaHdH>w|&FS?L_rLpDGL+a$m97mwGOH`&9>9h6D?9DE`e3homT-H- z(@EBjlKMP;KgvIgHn=N##LRhVpV56Xdj-`uiwPkW%y-7N903u^-1 z*{3nJ8?)XyXK+J8VVYFge+IU%ca@L)>Y1hAyY!Nc_R81H(R(f_Z3r$(*IBPqcsKv2 z;?(PU`=ZUyJ&bjH6j{e|E55@1_&vG3SJNtLLRX(%z4quHCrh1_e2wFqRhv!nxpt^9 z__G#XVX~EfY_~FV#p_@G-G5YmiEUk$ad!RT%%%5vJ@a!zHtp!$v*Y}%HKJak$70Pk zGatFnW@qq9UUWhB$}im3vR2U^>vc_KwIln3wiWz5V`Q{Q;&~jmnndwo8L70^57kjt z$F{Don13uN>z>Nqi|5)iS7e$o`^DPbne^tPVr8*m?V6Zv#!?ImnD`CF5AsX?2(^iQ z6svvYSLyfuuey>Y-O{IR<#eq?YoDyWdrEwYztOb2I~oimop`S>zOom#sm}j!`C$33 zzektcYIgc?`}WP~2`W2x%nE6fm@<#;rHbpx2PyNe9{SP#xV&?Z=!5k_ukI;*V7~ck zmd!LJ?Py;g-^{x0#%r{n#+rD3UZ`GV#8b29;o)0dH;bx7MpR(ut2X_IN(j$SHl#9~qTZcrz+%*`s^k zu02X?jZHIH7#8l%v&_z5hl)(!uk*+EXaCr^xqA9MTrR6dq>+i%@^UMO?JOT+kx%tM!S?e2+JDhVD*nOt1C z$#`Sh&kYP*to}=CEIyuZu?bxN<5q0C&&T_nUwQRTd-WZPE?#u&xQwvBVxrRDg>9~f zWkmBFpD-~sRP&0qK6w3~LGYDT%BG6>ZkNAUe>4o)e!0|qfA8X3JBpPZIe8}?&bxeo zUEM-@3j=6)5Z~~hp-t+;yW&UX!r5G6rOW) zE%;U5W+zgUb?J|D`QrG@I;|~NzG~!s<^Ibal^JDyL%GjliRXjqB|?kbCD_cI4CgJG z?Eh`U<#k`}8-Hynh&o@NF8gkN;mTXlZkIgm=F7g}uV@qzdT?OVQ}=t45|&2`|1*fy z=~TEMna6(7l%M^_p-c8`_8o85)C51`=ehq=>D=kl#qGOWni=jXxEGvC4!6|2qMH7F z>GvJ4{;bcRQ`5d}pYPw3FZTRrIDh8L#}Cp6IUk-icj@-txozv^bx-$g+qvz^yN7$d zJIy&$%`-LT&U1JtJIg7?@b0X&{~35*KakjCx-cVbmhZjk$qPBB`%d?|`aWmf+DYN} zwRF!I>)e*zsS&gC(36_Qa)J+IgM&V9y?*$wY)$wh?e%PN6_=wnU*4*noAyleYpj+w z=acr*TNcwYj;uPEde58V;&z!Q*XKWa-uX}RkJ<KkUVb4TDO4M**tlja%D^Rzv=F6(QZz>nr*`xHO) z{JPg%`si)YxATElzKC2qe*0c^YU|s(3nI&xU2PLP5Wo>L!O`;L{=RwYAN*VQ$S$lA z`Iz5R!mTg*`WB1XuKQg!kt+_1OpMSAi|#iop{zBO;{ z$LNDqehbrA{Bhj*qkYl67gnWvR_p%KihAk$?I^R(M4iG@QNgW;Hk`Lu-M=^N`o8s{ z*DiHkzV!Wb_R@9fYKvLk=9<--PzucYTw|@0yAxqsvyc zE0=!0FZM#_#NiV?B0)Cyj&Jx;`7!Bz*x!XU8oPA=o)O#i_T9$Z6)&aQd)96B-uh|L z%*WzV#mAq#JR_$N;`Xp>5{J#ybG%lD(@U$X+>9j0nm-&rSkIn(rSz|rt+UU@&6}sr<1~%FJ=@y4@b__nD4X>n+Mm*MH1-;>XT2?{ zjk+9nDQBxSXU%Wv8$!=G%ckC!UXhw3z;Qz1K|}V_{B}E~i#3iP>W_v?&F?L>4Suv{ zpV*c7Bl~3cU3)&^hX012t4f~L>s<}}=b|Z<7gzN2Y(dyQ>yPg3_383bw|^{;zw$+2 zCi;bx>$5o}-Y=)!vX$-K<*wVsBX8>MG1-vy^u%+=dR^C6UjD6VdOI%r=GR;Q=A2*f zWX!r{AXW;y2clv?9 zWY(_vttM~n?r!lHR9kQQW!m?(+_ftec2B%}Ls4Lx`GmfAdlFW>*=Lw37SE&eM>xIy z(R;xc_t>tQ9{bPGGCOwfN7F}pymX!!td29;y;f;bNsqkdVxNa@cb|yr+v^|y^=FOB zyDgvph=2WY=S$&l+aFx^9GUy1n%(A|_qnXEc10)dbnx!&ySMC^dH3lg?G;aj7bfe> zy!BYIc82_wBm1%QE+=&42jz%l7g(^-F4djE{c1?b~zF-G*n=rk^>U zw%)vZKBzoZ*}eUTn}6K-yW5xF5#N%kcYn>KvUls=*X=jH^HslnKA(;2BYys@jemE( zH_eW?cJ8y&)%=IsuIN7OuSt8jcB!CWBmb%NI}$Q_#a){YADCzLo;O4FkholCY2vrl-+#%y~mk#;9qWzKcS^bg+; zU$c|HR$}qx#BI-OX}jnByOf?8^zdEetjA@KDvI7tmEn27pna%LE~8H3L;G9fk7ie1 z&uWp=-d!DiSKi~wpTMg7w*HGy2G#8QKL7FkldqSb`DJ`?KI zw^^^fX371oV{y&H#aiWme%?J>9ugFlV|z~S_k;Z{@BhTu_*~o7x^2y+qc+D*N9@_w z^va__aPQGQL2H6HwFd4ANp59cAgU72{zp6iab-!M4qP0iNiWt}WIv) z`>AE_I@imM#iuqkEbw?^uppTsT=ut7#r8w<1-{Og{o{Pe#`)pv-FI^$fBw>IU2*&F zw(03y9=iLkKH1Dz@Nh}m8-@_pJ)8V4&3kXJQtb-l`!rPrQc4So}R-nixx=iO5^XDql1UugRujpP4t|LA<)D94q| zD>HLdtXk*Zn_H{9d-3|SA{XvByn0djaAKRI&4cU5taP^B(r>ks&(3@lxU3@V{Ly*J ziI2|*P_`EI;q8L#IfCXGp&-2PI68_g|l)g{j5{Hj?a{XzWU*RSupbZaaZtv$B0 zI`iRL=Of?a#HWXBsop;6gkv$oXY=FSYNw0Lc1(3VcCl64Rqyolr;9SncfDNn*?Xz0 z*_-V(_ibbEFW$CmZ`}L!SK_pPa6Z(Ry7yvw)#e{v(Q`#ZB0TdStqrS6Z&}#s4N2Tm(?fpnKFH1Tb$wsrwxiZ1R`;^$+heKXoJ{_cR2~=II$&g#bK+py z`7`z6`xGk{eLvdyyrNCN{abKFxc{CH8$a6jr?E~ciCla4tZ`#F zByU_=<@=wZv*e|n)vGXh!9Urx54%@aY&rkK^~-JFyvIIUuS8xp-cz`E!JcBfCx$yF zXRWGfF7xZs_58j&zHi@p-|#P+cV*hE{AXaz_wV+fH#`2?ja%|k3K%V|B^n#D!}d&mIBW0r5TL{~3g8 zv&27riT`l_`Xt@2we{;?U!DF>;QYhibMqgWEnfIzv+GBbsJ1qC z@9k?!<=>`VE0wK#bnTJ#wNrl=er2EG`^+Gz^;XQLxyzNi9@MAn)?Kp~_!IZB@}q2j z#~$^6&DuZuy|xNQ-1BE$EWL7($LS_9`_0FWMsB(&ctt?=x9g92r(-Vt6ZmoM@R6`8 z^A_pANk40RS435A3*)t$U9c^@__(5jfK3ybP;bzWjw%`(ghkd%lv(nd@H6llqYI*=Ae$_FKEUGned`xJ*@jmh+z7 z)*F8I6(1L6@2Qdf@cNN_&zc2SwyOGzam!wGUHqf%vEROVC$<$lx|VS-!#t(o_hgk{ zvwr$J&9djZU1R%^Z|mBKAKnK(`#;)tX{&GA0oyr&Foh_WsT#Uvyy26 z3A~>#+}duldwpp2+G<;S*=Y~fuZpX;w!J?6kKOWXR?830XV3Osz4zUOJhiL53wO?4 zw@j!x%W>PMCH%YOb=#hPI`y#I{9c->+7IK0o_mt+X+%f=jt%p?^s038)%Ut@^U@rc zPMfYNT4QX%wYa3?IG5#vpdaCn()#O{{E(7m@zzewhzSBDM<25b+`X-%D85X!wX zv@wUlmObn3s#%58XZ_pn6dcq)*=GKsU*U&xogW=rUHb5Bz5kAnOZS{FUYYUBzf&dE zv+HNR$*-E?pK)n?mz9t1w*4GfWBx(?khH#BT+9!%Z-;&#xy5uR@@#tcBhGxkN!x|A zPxIgGIk)Z9c?oMK4b}?gS)XgwAE_U{-~FFqi*9d!yN&O|T(j2)gI|Sxvnmn4_VHEK zvgIoNvgd9W+)HRuNYU~7_wm@nPsb#GAp0pnLxrfnxTC@SOxpClr}J)AMZSHSx_)xhmr`4q_4j}6pSP>> z&aW>0)_DRSTH8C{B~1^w@a1jzxm{O$s{X#+yN7k=l35uIb5vLAoO$!%XUZf`)002! zANC)-xlbhPO0HyB4aR?vPEp^HwEijck!$h~WLq z`vQAZAFbs-GJWE!X~i#osb0M{clFr{o`7gihW`v6o;(eDS7oE$J>IANxT7X|!H?hH zVn50*Z?y?t93T1eKZ9WE-fPkR)1<#_x-1_*$+1vJ%fqlT`K)x?F&FizY>R)d&USw? z+tc1R|F+Y-?wQ}qzi!TdUYZ|PEBP>*zo{%v{DS_MdV!4cT~@D;{W=_8FC=|_>#JpE z9h+lq-dr~pDT`hAuCUK8u# z7;|G|yve~Oui_Or6{_GNqSKV8}Ee~ zoS%-}6luG=`VoIec)ZYmhLrq{pgVWnS7yDl-@ImBPTr!OTc>>sZMd~`&Bmj0PQE6Y zP24ISuVS~ovszpBX8y@f|MXoyMP;suG+Tdl>u2Bp472Oa@7{hQ%)ZuJ;0OMt%K<)2|s{cf%M&(Qz#)_n!vE$eT8_|MS($B*kj!-Md~uh!iw-<9~AMK9Zt zIpgj*^O?`uGW+?Dz0_TpXO(l{fQsare>^{e{XVwrQ`@|C|K=;HJF-tIY`^+mdE=xq ziOgANQ^npYzKS`%W1i=E)7uxFE?s-M=#%rkf7Pv^8ML&D{v*H4o3~W=M#o=WQ(~)i zhuQSo(xPb}+$T5gywfT(!P0bjO!?8f#fSIF{3u-a%1(3tWs|dV(rNcgSD%#$d#Na$ zeA>gQO)o^hW@2Bq*5;Czr?MBW)u{GdQLgtoUTBZ8rX z`tttf`i=)b;(uFLNK~AAS~Al#*Du=E&VG}()W+R=XRdh^@gd68a8oXS?_!3>qE{`7 zuFMhJRu*r&FMr`_&Ye!ewW0T4e~R@_y8fs3VLw;athkwLfAo7^{Hhz~ukF8i|GqBH z_3{e2owAJ+K3N1*T|FCpWR_c_-CVZit0FTuXPGlyI%U1PrE}eKg$-Q_NAinA`>vim zU{e?`{qO4GH}kB1EX`bzRrbw%p5lkzxzie#--+aWe(ec&O?P_Q(Z}s~*Gt#>GBj<}=$$eCfTspmfo@v0e z*Pm1Sik81WCHP)t{cG*aXS>$hZZVy|W65vV{kt?@zy6h9D}E~^{7_Zi`bXON9h|HA zC4MY&`*+{@n`PUzd8H#&E7rQ$wf`J`VZNACh^~r4!fZC(dftX zfW5PWXRa?9hX~Cv)pxpME0z zT~*^hgSPkkfBWTb%@4T7yS;A<_v+fRr==dsTP{4W&UJfLl+<+3#kHKxozWw-a7UAa zB})^B^wmf68P=Pe6@Rd8TaABVdZygmwXfS_=LeN|Bqy$j@%ZF*{!^>Qsh@df6RJYC z?_d6E>zDrw4c57@ra$=4AZu@YX|0{&kFfs?hj<^Iju+1OZYLITD|nrAy>KK;nAX|n zZ8sYnS)Mm>tkA2|{GoUH;Khn!eN>ycA~TBrh^7A4ojS|=KIvLr+*-X-VaJYM=?+JOv!4@0d~e<|GG3!L z>(Fnnk2(LeFPpq)^?d9bRqZc%=f=WIV$Xc_BwYV9Oi2-36Ld%QgWLV>C5L&+d9KvH z%YK>npW)m4=VzBMn||$Z*!JlAAD;bZcv5=b{j&Gpx5>4+D{nehrWcCdU3w&2$N0d* z#}g$|BO?kNSp-~DD*BJ#llV~jY-Z;AAN`&e-hH*K>EG6Z`J0kXXf6|KNhq7%iTZQqt~zg8ZTdw?DSD=?!&*{N2>hhug`Ke zvx>d?B``LY=dq#78OdMryLh86?Fn7w58RKY&b?B8&u!*fUyH4qqfhia6Rz5{ck61;+|JZ#-peMxIb!_Q z{dU1Zr)zVz&FXz$cJ)8Qy1m!WpE&9D<>%YBY`>tR3>bksSo&2o|ulq-Xe>^`N@FVHr zpB1GmO{+^cP5t>ZEa^ArgAe@4QS(2{{gHkC;==z7E$v>Pw=OZtocMQ}?)BFV!eMFkxL_P1wju zN0_`iWUyQ0&hxTh>Ehf|e8xQ?Qy?cS@m${j#A5%~_v-&zrtXcfD*w|i{%`M-{|px< zZC}CM|6}Dxt;-MHTaN86U;4`G-tXJKcD(jp0<$}x@0_d98Gde(#j!Zolj#9$zJVXM zA3M+cqxtZycdJ)=syF{<*r3O|Y-{GD@UCmVX1-kyd>-D?T{B5pd-W!P!#tI1=HK!^ zJnMa@oowXshj+#51zv?&{p(*PYjrI{aazXOtsLr8l>Jo--`$i>FkYeFbG^S}_k-pA zd$LUDOK+Q9z2MdMeR`L-Uiol!>fM&@uhTQrawp`^@XW|F-#$mQTj54?Mg9yQ>8$Yg z`O|ljy@AOYG=;Yvaos{o8*W4TjZEDAAY-1I#>MYr5}?XAKqqOz?ryh)}wd+n_X4< zCTu=3-OI8x_~`_GpZmA^vyc7jyZ&|Sy?wFL7i*{IzSXW;mtFfUs`6XZFV*jF*&o`< zKN6{vyK1^t+WT_emTudi!%=&#F6@$iwqsVt<2KzJd>-uuTF1-Yu}j+C|D$SU{>}H_ z%D?Lq|ISQEi#~mS%ks%Ni7GxSH4n{16K*Q!U7nM)NB`0GLu;!a@vZ$R9oBx>e(!ZV zskwZIv`^;ldAc`!S8K%-)_P+fF-Ui4d`+;hHy;alQnzkYt^lK+#f)mgJI2YTMzwl%8u z&c?la-rbw$f8kfY_mBRi+4BYTYYdmYs#05M+S_$E_GtF%qpELrZ|3b*UDb4|HNs%R z#49|-v$ih#ZEDAF23E$k+Nqyv!p&p(knLi0IROgLHD%c zZ)NGG*2%rz|Dji{twjHAW9hP=jAt*Md|p|f{dv#Y#s4bb|7S?r`JaKKg8kU7@FTL> z-WN;WRv-GuzE^6`b^F`pQn_dMl$Yzps$ZVPmc8_@@=0aggItV<8nX0$icGq8sov!7 z^r=_ZE}vc&y7$@a*YZ1;)<@fae$$`W{OEd9-lLB^)As3I)USUi+xzsI(D{I_^2=8l z_0mImOw9QGCQv6T0-?jUQUS4{f_O_3N>&lAd!D7aHi!$u3nA z>rG*MpebQ_+R*gHKZDCQ^&it8^sRsB{hxv1?8CQd*X|wt7WXP_Qsk>kjy6H3S#(AE z4rz+1`Ce8@7vfoG9Az@O>i$2u@cJUZ+plv=QkLD_@)~qR_~)vh@y^$D{^|UX5BRX- zqwe~iJ@&;9FB@$CcfCEN@vY4z2hQj0H+QS>q+6PnyMO%NX5*T_EG9Ef`I4!HsoO*y z)y@05LU(5zG~(#m$QPn5#yuzD`O=#2^&gfW-DiIF>wUqP3OipW-uR>Yq^__v)|O^` zKF->+ad!90nw4>~NBBfzXROh@^H$1E@kjJyTl2&JjIM2w<<}xs96YRcc+l@arZoRcD+Oy3+AWQh}*6sfpGQa-``pfX22xq18N@Bdz-9sfRl z>u>#zdn%XK*u*}(FY&U9z2i@L^&;n$FQ3_Snl8C;J1@ia#15m=^6y0#1_(yiIrp~o znb`RLcCAo9e4jh#PwwBn%XGJ`Ub1iHqMNV8SEiaR@2I)jr8@bUp4-KSB~4Xj>n%Tu zx9(H;!T#WW(;m);@3~*wDg3+8<@PaesCNot zw|mZa)^lxr`Q>h&Ki?mtdB6Rx*U8o&Qcr&PGjEM@tyqY$XjjtZpLNlk$*c=23ld~2 zOie`=bGqiO&o19G`=@!zzVesV%TH^%?w$9q&olek)Z1}hcXMwY{vB80>fZBk_vNn{ zru>|lVN%-`=1p`vRIzShpM&m<9iMIXo0--gG>EKqt*ZDj{UO`O&JPnlhIQ10h*w@{ zTg-oZ+uOVImCOF*KDT&zwx<1~@wfCJ?W=zH9~0jt);6hP(xY0lYxi!hPw?M%!)#`1 zZpG~Gx3QC>*EASPnmv}};e7X}_JMp?oc52I%RPRTExP44|8X{N7-PyU7pw4ZHGVT^ z&TQOQJz@7@o>dbknDel`oYcK)vFz;4wdPTOrtB-LS~q7y(Knk<_pWcBJ8!<(zVPUI zJ0J4jQh)fquTCL*9?vD4;H9w@&0aGTvLn87msd~wwCDJzz!aT-5vTT_GTXGlRy+Lg ze$f(nf%-$?<$OCI{w%TQKl{{w<<0DXJ1W*m{N}gH9z;*EZVXF4^v>cOyY0hV?IUvH zKb}7BeD>k#(|e7N!Zz>PeqDE+?w;1k&JVRudvBliu5yyZBfiTjUgaJ;@4Rj(j+0$lcV1p# zkMD=skMt%!QkWK}WR<$UWsBsjSDJRWmql8Z%)G%KV(xmwpwxn~TvH}AU#v#-V|=HL z_#v6|L|_U!=+Mr(PfLag@@lNTlpxvIyiMx z-;2MlI&x2L+NT3? z{;aoSIPwHm_FX^cZ((*JIdJdCCI-{})4KmNt$|rcXsLh{|tHCpX@4)3;(XU z&nV;hv3YWr)~avZd*N#>*MEk%cmd~{-kb@yJ#*SwnAWV(^UlA=Ao%^9+noOl;x^p5 zD{o|8uDE>oVwO~sv+CT%uQqVzg~fk$Gtr;U|3`&+_qOcA|IGjJeRMzeUa-PAY~$r+ zAC^?*b*?z|O}seTGw=JdSEsV8tzSG&7dosz;hCdk>d(55kISVW%UU0k7yMKBG23zT zk7}K&=Up|*3nOBDzJIGOk-Yu3GIRc!(@RpO2Qy6UR`}^w#JXwU_xd?&*WFfqxwu|P z{=B#T-|fr(Mt{D&*lxu~@uU1~Kb!;m{jcwdUYIfclSEV0E2}AHpSI74aBnSRd9U%X z;>qG<3sc+uNA$Vw{@7N}b?{NFT)yx@`Pq7%vVVN5r4nS%a^yZpXr5$!U2(x5``z3A zE~p8AwEL`;rB2=-)1@!Nc6ColFYl{9zN26A&SD`4KL>VYmbVt4`T1)!KK8Ef{vs<@ z&po@^SN?cZf4Az#>9^kYgqK}sdpS+jQE~n`rH6_I7hAX`4{v9hQaY*o^}o!zxbNkg zVqMj)M0;n~zcl~z_rC7^=V?#>5&7}{Ke4*@{6}mb%sXw=A9~L<$=XoSd_?$VXSNCp8%syN>Qtka?dQ4} zf+z7Yepr9=^r3rdAH&=0B+iPrN;_ZtW47zfr)=HQyyPqYE`4^J=+f~lz}sSyzo`IQ zXF+_EZTPo>ANIu`ydT_ek9&9f$o6}hTQ4YV-+STdN0I9rIWL*&i|z@&GdrU1=`Aji z0E;_fQ_6C`#rK>0{?dOrRrk&A{|wV(cmHSbc=P6;YV4264}1SJbp6q1Z~x=+@nliX zwd?Uh5qF(M&dvUJoyRCQsdL8yw_87*Ds&^OmcQkHxSsP5>!mu)id7LMx@NyUn}fyt z%f)BB*I`P`<@QJxI@x=Gf6BH?1=`{d%qBld`M}>*!Y@!^ANpbH*=sA;#2qwSTAXZB zbiA^<*d(#;Y_TTKO07H|k(38wuOrLkI6mfg{AWnM>;CZV^|rhcd6B4!PHsH@QDJe$*?kwY|1+pPi`&hs@?7-NC;#^`>n~mZ8~G~v z(SL?U|K|2?|6UuLePa4g|1;!nTg|YEe@^b_?QibRGn}`zJmg3F!&rWoP1j2Gg)@HD zO|06zwv>DIk44*e$VGe3c#_6-IBD|-|5Ho8ukfTqC@9+r@o6QEI&_PNy&UvFvlq z6@}~1H%)qW#yh|6)EnJzhbP(|sSXoxN=a^2Te&uVRs4sV{|rk~=I@$)@9dweU;gMm zub%gLVfL-`E|c!qaJMVDrn!1AZBAF;mb{js7XMy(rPkp)Qu}>FLL@@DjQkjmt55b@ z_*49qX!vif)4zj%d%r3B&v47m|K`7&Pxa?M&Aa>b{jsv=bJ8WBtqt^GTJd_;?2KKu zPk%++&C1Ue_R@J17+2XLd84s|wcl|P`;OID{Ui3x_PMm~-PquQnlgQbJY*ik5t=t%W-Y}sMnq#Y)#^acOCC4qesb;)ex5sFDRL$FEF%l8N`<`Vinw(hf zd+O$M1u50q>(d{9|IcvXbwb};`}fgLx4(RS;o?X8V;3K0{4vhfZ~Eh1{V2DJ^IhVH zt1~uL-%x5$ocXl4NcVuhk>RH^N2W76%RG9!{P_I#&lT~9rT#we-qI^{_{av;V^d6L z-&nI`7Y`%XN^MOYbB2(V=A$)#%Rks3H>l8mxOLv{1+VuhU-~kmw6JjDkK`kn=__xT z9yxI1n10+NW|e^3(;Ba)F?IxRb7ctK>;16)$b7yZ`Ul=Gd=WLP#$&^WBUL)H=5^am z{oAuj#$fBdrUxqx)`@Jsb$rL4+l2>KhvnYi|L}eNq-*c1@4ghOs@Wg;O=5+6HoG)yE%~$K94C8x$dB%a zzWhh7$;n))@n4+zPjNrP&6nm;ErGFjnmpJnW;X9v_`<%l=J`kM2l-uF=Xc(-oK|5I zzN(`781M3plMjna=>$e|DD~Rxy!FetP$E)rsoVCn{|pfxp^4MJ`154gDSgmC+AsC< z9?y=Cax)+8O}ib`ex&TWS)x3f1;_sU1ynL$C@N@p5;$Ys&;Lm@q&5Pb% zJ9mD;m0$Nu#NWAla%p6o z%XY(qd`cVIG`IiER_K#h!m{n^BsZ0>tNt@E)XDzXd{|ELN8{ssfy3*(SH7Nb^LA9k zk4e8y#X7lc^(@=6>m;Ek1o9}z2bobw=zb0p`y)I2ME-WuN@lMcZle<*I#1lv7UUdjQ?-+A# z;m6enZ}lIF-Y=K6EB1Co#`PkVS)oto6vaEPh(4G2Nyzr7&!f~^OJkZm6gL#dd_R`Y zUvcjc`Yl+Q4E&?Ziua17G;#zN)GTnc4K68!VNAttqc3jwM z-5LL8*S4wEMJqPkxEmXBRz*{3%}%4)_4X6`y6St(v~M|{IPoO>wEu6LAKm{%<~O8X z)ve!RQ@JSPn^f!-E7iSow|tD&yE;!}UV6&ZBPk0bmt~hL&OR;4wAcG^o%vN8nfu%| z!mD3+{Q9>w(QuDc`E(hnoU#j=5uJo=|e9ce!p;TkG{$M%|Ck!CjZYSiJhfe}-c-O#S5#KjaNRbn8Du zYpM0=%6D5{nzn8=%o21up_Y%9_m$;+b!6);Mhcxcvx!YrRm3w=Ney|LY*td$SjvtSsj}6V;-o zAXLP(p~Esgb`n!~o`=E}uUB)gA5HtuAYDIg+P=5i;v#dsy&vw1Z~tSrYwgyZ)6L&n zFO8@uKeGMT^cI`IC9ihzlo|U!e7$4MzuP9|lYWMs7xbRi&|YwuS8M8x+f5(m+U7q# zQz!D_>bljBG@tfhaC@cqmFyC{04@QDzEDP5k2n)A4=!w@GQ&gj0tV`dY%znC;{Kcx?V}(~AELhs}Sn zUjKG(zu=`ek7sFT>Mfo1j^pjK9dqAuYrAZ+=~JCB(`E^q^Tb)-@+B*-+KIi~$9s9< z<#nd3=SGKaxsdy?_0WzxYvPjUt(GbDRIb)sm8o{=-K4Lzc2TvDd_OJwZT2s=uIlgc zQdg0XDI$Tc45DF|y*FKcr?0+sbJMNrlP=edoP`$qE4Vtmb2x2wCG_NTE_Mg4t0D~H zq95xY_3l4-pE>@=vd>P}On*zWRIXciQMvV5mxx>W)IB$oV^^&z{+zDDz?Z#sz24vD zI;L;SAH?^R$n#cY{bx98b#CXfn3n}n5&qeny~|p!cq_;F3bnBGyj9&P5qwT5cg0<) zV9&FfIoW?gejHs`BmX12`f+Za^~o)-D_BlvCJUV`E%18BDJ90R?ZT0#4U7?!7}R$2 z>Id|PUDnlK7#6WE(Q{%9!@)>L$>Vj|&))aibJ;2VN&1nutF0^dkMWAb4|CnU{n;yK z`*eLP+cit9H%srLK>^sP?hF)kgb)uTibk{Uha{ByzT{ zKCKyaOq%0}PE?`DB##)j5Z|shet&NsFx@)!$(~Dby<2~&SHGCI&GPbe9qBi>zk2C^ z&AJ

    8W>r&m{As-1k{C=W%>g+o?DAoAZinG5_UDr_Q>=6gdCfuD&$J(9TbfijOnQ z)0%t!t>A*FA7A%alS7Dy@{HuOL zoXY%1sZSUE$jj8p`;*Sz8Gf`m*fCqQ={)O`X=k&ay*Fp8E@*6V$w{a=c)7UHlk~T|=5>;M zr|V(uKU>N$<$3=MzU*iDjCJSl8CW4# zm*1-vXU?)++8Ft+)akm~y*tO&ZVWkfxTr06p+{QcIa}B1MH62c#@ke+KRo*Q-|1iF z&0lS~V|QHJqggQR+QgSa&e>I;Bk%lYSbX=^$)Yf^JF6x&Jx@Ftc3x@ux_xom_HFy@ zo&Bq33Tux(l*ktGb3@vUJYu%R0-YNLI@aoLv zVh>#`CZGAdO*Hkeki)v~5C1dteYxLhC$i=Gm!3wbc{Y|ZtBHC!LQO*T#2Q6T2l zX6bFnkZ(9)e??e#?Dy|=-=<#rwD)UubWrrId-bLF{#9pQd-KXpv&QX`o!skvf|J8O ztXsZD`mL9))%EbrTbn$-oLraUU8?uVV@>0kM*nFNk1ZwEyqEuXahCky`$Ab!vDZ4+ zy$ZW!x_9ff?W;3CI9;3N6V7*9*mqyZ#LpXNbObrAGICgR|5nlMqf+bIA6Cor{7?@6 zShJy`+4t%?dEZ?d8WXy%z0;f<{WjcS(z{g$o!TVj4dXaJ{b%U2=d>w)d|&$YuA|Fa zO=q9p8|-^o*kPV*+O=E9J!YvJdCc0neV*9Ips;&vrjof|r84{i2u`n$|B zR4lsm@5fArNiXaCbsJ8skGb?ui8uPuvfU3(l$@MdQfqYS*YglQn~6H_HKJ?xmbZ4- zU!1vn_)JT~PS+c9GFplfG9Ik4o`H-n|*Z z*B=X4Z*QsJ=6={-{O_K-wR195ULN|g?(V(3_uDENt&25sK5pB|-xDYB(f!zehQ?Wa zvnwAqFM2&O>%!fmi?*%k$yeO6-!p*o@u7wDG&UKz9ai9R<6P%|NRIi+9`%R$G7kS4 zSkrcW2=CY$(YyZejCCe^Zr)zINoUiHSr4KZRZi|c<>~)@-$$jTu`A6#{jop0ZrirI zUYA~0-T(M++R5+MYt6qc{IG6q_M-E-iK}E!dwqBoYvZ=~H#?rry?0BnsEg~gl&EB1s9T=Qt|!;#EqpLv zIR1~shviN0N-W>3%dZK{xb3=im+jQny&v{`&g)(9+sZ3>`z@dJY)_@flcRK_{CDkN zoc(Uo{qWp(X}3MMKL4SXvcVSKiDT z8u-eS`R9WG)>GP2w~tQams$4pKf@-qu0ZC4buXpawRbuwc5MrvxMg1Zh0Fb2U-rKZ z+Yo&sF5}^}%9*K78#rztVy4Yb=DeOSg?+1Pt3rvptS4>me+I>){~6X-+J8}5`|2R_EV_e?{jvIv z$u)XEYAe@8MXD`bX?k?s{KQP{DSgqaXLr1P$ag?UxMo#E;~eI`Y`M?J=PCaPe>me^ zO|@=+S81(tK)e5?oIUc(Y?o{=_TUugc&8Uz&b#iI+f?zaNo9|A`FF+h>)L7lko(VY zczSv8%YW6bwtK&BV2h6KNcCJe)0v6WQ`zm{gU9S^3vazY39sJhp5x|CRg4?33*w>yrk3#!t0- zUn+)$OC+tGb??LTliIab4**&CaKMyLj08yfBl`(kETQd>3qK7kn_u zGwblj_&3XJyg$r$`H)kmHQjI5O~ckAa(Ro^#l(2l*ITN}6TUU=5&t#|**f7E|0-dpm%b=!Rr8;x1A%Ujzf zvaK+&7rb$%)YkE-rFX_X%|p)0ZN0%yXH2gV742SKQ}^*%`}!ZZV|Hx6p!z|-^^a%b zg1ebXrz*ahc0H0-%Wf=wz9lE{y=DK&m^JCoPp&)tDE{DmiHh~d>!h#!v48ZJ+3VsK z4)z;cc=HbzXPJGx`=qgiC;ZyaQYS%XHqOtY2XlIAoxf#WE8Q^09iM%9=}e(# zPrcViSK02B-CMf%YM=Gt{|sC0_OGZ(wv)8_-t;xBUgB;=&JSz}q!4_E7$`yi(u| z&*QTm-?b(gcRhIUcUI~5k0RZbkA1EDIqMI_-81a=-*!Pt*HzEvy=9VlZ^`MW?-*n^ zXd68(GRj)t^q(Om$8EUu@Z$RXFY`s>oO5*6`*_@rQ~4YfBit5mroy*aS!vTB)87FvR(~jZe$2e= z*Sh`53tw*YUHNKP>KKg?%(CC8F4 zaVzeJVM*tgUbVYhlBQpqxNA!PxgEb9BadX1HEeYAIQGcj^x1wv`?T5n+vl^`L@xXx z_bD-P#miZ_n=`7?W==BYNNqCP-x%6==;H!?xoa%PqzkOKhvdxpZL;>>>sjAlZr%3! z^hImS?9+Mg{`F70zwWQ)t}Rtt-@oNw^x~et$9+53yr_ECUHIUI*z1ss>yjGZv*^u! zJjqbxymUL`#!llY2@c94f4v{CxcKF+UG#nCie>jhujaS5O|)=)RbTnz{xHAe+CRmQDdP8(KQ7&37W}F#DsbEOtxY1@T&g?g zo>bV=bL8}Aaf@3F%kA)^N-9&`fazax3Aq|Fa9d* zR${zBhPimLTY98~=kMJo%%*%gwIxM(&Y!~)>#BY~Xz$*m`uKiZ*gdh2jQv5|nvYEX zwY*b*i`%ZvlU{wQu3RGIH0k)$_VNkG76rIZu`H0b<=gw)qGFmp`yY!B_gnsCFU>Tw z=de?tccO{mYG?{fy3jef?s| zq}ZkT(bd~O-}-jBc+p4kx1vicoR9ok*IK{u_4LIqwN|q6QJc3n=H&W5KA`yrL&7|KWpFo^{x+h&#m)F5aX=Xa=YmMZ}(ZnGZz;W%3V0a zEa>ym|3GFF)Vs3uS6&w4UwK56a-S%56*WBN8 z!0orpZjY1JlNE~!dp~$TR`2+y_IjSd#%ndaD}Pko_qh5_M`7$M0X4#sVe~UU_iwji^`QRXt{5 z7hZ{H%9M*29$Bq(YW6nm9g+#VXEV$*mnk(Z>H=Tjw|vPt;UbZv4hlx3I)F61A>{ z`#1k*==)mVSc^NoIxrPL!TsvEJl|`-j=bOTCS0Ae)q59O>2RF7vqKg_k+4aplT5iEFTYbMe(`)EkLudw=$^y>HulLrx!u6$m%aC+c>ABq!iR6gZaaLO zHg)-Hv5PuYZ3|ar@K(yc&S6{W)oZ_At-V_pU%puV(RN?? zudl!VTD;?<`!Vi%{vZEtAG{VPb8V03n!S^#d^|zckH(Yu?Ke5ZQ+iR2Ik&R-8ZVy?y zVpgoN@0MpM-QQ#OOGWD1-iK4OJ(7f9306e*Eo42k@A-Vc9od_%rEXw2F{7u{H28@9 zrfsuV{t3GBCpMeCMNc?l_x62Pgpbq-b%*TrOzN}voL!RY%#qS*kurhpw4RZx#k(oH zcmI8LUpxNXw=XWcw%&|>cl~+&)V+GYSNnH=$hv2>@58%^57(AQZo9nh+Tps1;yE@VjUe6(f%E%l?< z|1)sf6t2DX%8G6GqrZB`Zv8s%zTv~UIo! z&*oM?`f1V~E9`eg{O~dB=^sw>)RL>o^;olz6!{e9oEtZ111R-*0}de{${Z zvh#~qMt$%9Y`^Q({gV98cWtlzXE<uF@c-ZSwMf2!C2NDloF-u}l_ZEEtWAB%&g9X0jMyXV~<9VokH z^PHnH8%_G|gW+$%;5QWeOuJ=zB=)j_av{F+Vf@4TH?umd;K2&hf}WPtWR+{d?Kgi z;woeFa|TcJeFQ^o%Rc>QX#QUM_0q4&vgV)vGc^8ZV7?qJXIcL{GVAgW@q@Rve>>h> zBg?n2a`nq!^EuzV3#jb>!aOGn2)tosDgR`={RU?QmVtbFMu9g<+Ig{id(STi)!EH*c}=O77VvpC$sCKkH6RkKi;pu4|#{)qt+{zwI-;`fo`s?oc!@joqtD}Ev+n;;= z{POzQ+uFB8#jJh0#++l5mRIA|)ZWDf9Oon>n3i7=;CLT&eRgJ?{Kx67TmLh(*U4Yb zue<%=ueRqr+5Dh=H@=HcRw;DPSv>b(r;*3m6NeJ!OpKcx^0wCO`sWSWw`0$1e>mS~ z6Z4-z-p2F6+1xFWB_1UirM-r4beHu#6@91LopSEerkzI|{;ZgxzBl2I<&Uk8dEFnk zEq|a}nX|RKI@c=uWvQ(7;pxA8!h%#TE^>Yr>!}pm&iBNVlcCaGd17yOeQTX*`{S+O z`=yUrNnXk;d3woyTleaYXHhd1Mh<@na&3+`;_~#uV{|vqV8BQl(v~e*lyYQZ^>6+&ni4$9EU1Qf?{VmSnEMWCOaNbPW?2|R} zA6G|S(zR1Q{&?D@uWBDkuZW#q^`q!ogyI#|X(EML6Fha+=!xD{zNL~gGkBp2-|>JC z{k?g6#2?+&lb1ctwtMBBn0*u1%F1$FQkc-MrLW?_a!hfKwb3*cA&Y}gY}9{SZU3Xa z!$$gXt-t@ZEvuz>tl#p}MmK*&Mcp&)mb9{Y(Jfb0XL&~6uB?6KEfig7$r*e9tp3s0 z_e0h{xgX8%njik3f%(b5ueY9a=Kl8hlYdGpXDVZZ+`^CXk7|B}A3U3UTuy4oOr;AmO(lGS_OU_NyZ^|vUO)>i68#Y!2YA%>EFdGrkSR;VgDIIwp}UZUfsQWuH`!^ z-?hwlPHTDwrW~B=k)9B_Q*Cq7-#PEU{AXyMb-iK1)smaKUthndyZ_BwZNt{46CFWM zbszBSU7utzXHq<`%DF%3qF(3jn)Kga`u@q>_ZQdCy>{+>_Njl<%eJ4++uyqPk|oQ= z)$6qm2!2@}F8%o8qyG%se^}i=PTY|ri^^EcP+`Om=q$M}If_m6Xjy_t z%J$Ws^+)S0KjydFSyxCOesxP{p1?<`IJXnI%gjHXX5BfxVES3XR(0n5X@1sSuk*IV zi^Tdq^b9}p?ONL*XSMCT+osv94Uvc}bCYw=P8FIuA#u*bvPnh0h4%&R#464o-7jh* z|7frB(O+^CmsjL2tJre;KHu4Lonskig7fC)CoyZ(#khZ(=lAhwc>L@4|F(YOo3H<} zKB{t(>*M)iH~w8T{NWssD;Cck|H3?K>-3vzmqp*0$GLWqQlwN+kM#NC)}G{yC!Zta z82_&QWBd3&gJ8xlA3L#+Zj(%dIkxY+vS4d;{e4H14?@X1%KkozKHsBxu%CB((Ysq$ z|A|$c`_J%r-|WXHY~mL>E{?e2y;QGTy*+Kx!?jB%NvfT_us8HG_W`s0leanx1ooSr z|DpXY`aeU99cV^S`N)5UY%Be5@u&aJ-+b=3?A9l;HJ7*7-!?D5{q*tNo5Iq+E$-Lt z{~@yVr`uhpc#*eT*X^wgn{OHOZlB1X`0YP-CzaY7AD(q>{(`gevy<0k$)|qOzg!r5 zaZT`^&db)R9=Bz8b-i1cCv@pnQ3_J#jV$2!j`9(j7#&rd%;zju-UWX2iE^K7dkCRNO{6tS|Jnrh2o>ngloedovM zkJs)mxVrQ1xBU+)c=sQR7j^!ieBhpjXO49I$79PgMd$pQW4Za#!+jIyF)FzwXR;KE zIGC?2SJpF*xwiCoU`5m6!}naDZG3Q+>9gO*((A2T*Gpyvo+$AS7TrEyW}oK{w_vpw zk8WLlC|{s+hxz3!{ey3V4nHud=UYDCVYgV|T)T6=>E2drr))Dk{jYDk*qM*{`6@~q z_!tr=_!jN5f3r}%Wy^cM*dO^vtkUK_+N&J0`)b`)^Y*J>cFH|s-Cl9&nB}5R6O}wJ z*-m=uR1{yFx;w7y_VU`t`8ze^_kQ&|5i4Gs|K0m@p4r=6^YmqZL_h9Z{89N~e)}Kq z)e%RR-_H^->6hL2Udxs5)MPj5XPio{6P=4h4_LmA%m29gad^ic#*cod-F@@jKJ>Tj z->kT2$F;BXxn4=FQA^)Zam#E+(<6zsAzCZ%noDNAu~GieAMt_x&|C3C^W;7jZrHo| zitE>V%YLO4>7KH^9TRh4g4k^?i}KzA&t8vXk1YLr>Us7=e{}c1vd2F4u{AIIA*rLy znZM@F&PaRq^>OWK&87!;v|J*k^Uw3lcxAa<{ryV4_x{tLmftSj+N4>uW8dvR{~4_H ze%rn)yr=o&?jv2{$9LvQ?zvtP&t2iFa(U6#9asKn*O&fXs`B%`&@0Vi*)4p2R~%j) znJ#1Qy!OHTKC7+wkJJ_)j?=t+wE5WEZ`$6mMz7y$*j-fdocy_a#w}A%-y>NYkMB>G z>A8BpL!QmX^YKIZ_Gcekd#4}XZ*=zQ#YGpnvv{VxmF{SLx#O1TJO$5B+zcK(R+smw z)t%oKXLa3VuDj2!jURqT`>K4mHaR-=$+j9m@4K)3_I0goOTX=Z^LqW8kBs|2&3^Of$wcv)&D-nll|7d{ zZm}-j$)VWi{5h;ls!q8II&{-;iBtDlh5pv`0@H!dAzWV|AX~n zrx(2X^fIb;-U8EjzFWT8BD&UIcBf5>&T4*UIwoUKIJLk_&#jjE=*J&jxm+n9wjP_A zy=JpQS8Wof`u5&(-ZK}Ey*O9)oWbhJHG*?cK78!g^zyik1FzH*QM^> zctL%`-d($kXX@#tPWo1;ZO+UXpuwOg>eMZ#`lIsk+3E*>pSM2x(JQ;Y>)qU|e}ez^ zu)mW^I%dcc?7rovi|gJL9>YBXH;(#0^vwGAZvXYUTkp*HzByaenP0*s$`m-Qg7}v*>pUt$xkHyQ_ zxIXmW&k}n!F`cb>b#%n(@^aH9XJsb&E3G-Px&6~ypTPMhl?(U0`I-5d=j!Y8&G(FU z{SaPRW7B=)pYUb1U0Wx6w|(~gu=GgSv}^i;y^EuhR~SoHCaD-bo;by3a^2$xTd(c^ zuG;H`;WN@u_@7vR#H`Uym zUUqrew$J_{)2=`J8M`h%T6^{Tyz`m+RBBxIeE2u_!F-{YlkT`Kx|{uNR^Gz&%9IP9 zCgw%8v&?R5aVn?G@`&FSV&-^az5jZ-{s*~lab>_Tzd+nw7UpnY25+ngS?Gp(zc1G4I zU3zvkM>#s(Wmi+hJEhOX8#Wo_uI(#uP*z@6d`>&4i{Yu_hqHgrZOvYFuXdW{(HHYP z6w69IuHA~5qAe;tyJU8!V#TlGiNz0i{a)R>cV+LAf}6K(483J8=}ex##o1{3p{ALi zJX9klL`;lpb!8~JCEuEMd26=y^)3k}=aNls+l}s+{frah`^gsF(dRGg#vUB9zDB&X z_)+xkr&}*iKeuoDuKqy1x~p7n6SdtIr@MW+QTr$Hoa2k?tmiegk7x6@+ey_H2X#HF zcGKl4l8)W1thPAj>ayp>&bR0D zE1x7TnJmft^meQ-Xd`II)veyK{9b#mtY7*`XU)Uv&ZoDcQ!V9d_wvx;5zA~Gh345Az zHR6bD)6Do{{)xrYl+_s?$W39a0}Uc!!EBlLKgq=Z`abzTgOk_2HL}Q;GF@SOx_`^| zuC4JLe^mGGytqEzYww;bCU5nBdl_&?x|de-?wP#Kk#VAG5~lf8o9J@8 zeQ5prZRzTY)p5;U+jf6!TUojKXv$lWNuHXY(>_O*IsE8Mafoc=>bbtS#zjsnpCw!K zab?K~8?|q5W=)xOW~JzYmJc<@i$vX(C%N%iU!VO$_~3sAHaoQn+Tf2;N*3d$1Io1tv+l1vb6Ncma6Y{&ei&J_vSAC z{Bn8fv*K&@GB(v~;(wGMHfugSZJ+Y=qfyh_N^QNeC7*6qx^?zl*dq>>SC0z>o29+V zCLJz3pn9wPt@(eD`EY>YPWGN!xd(22W#U zxRZT#i+uAufe&Y*m@;p9cDyJL7rz#CveaZ#cjv`RnM)?#*>di+_S2<0o;lAIq*ku^ z!}dYF!$vakp|87x_kR8xS@yTKeakO26MBBGvilwXj;2qZQ&m>-gfuM|@r+fOpS^$T zZ14AzqT}u|Mw;Ea9IZb)GrsQRzUx1K>L26w7s$5f+he%$PWG;Ey&oq>UQXD4af?0g ztLtLZ0$;h-9G~Q!@Km);_x|3@@9T~nV4T5x;>Y5U>wj!l{t5qhM^5Yde}?Tg=8tRD zeYRfy&#?7fyw9v#5hh2c-G1ZqOhD$&8D}kao&#-<_8xz9{ZM@SuhysEkHm|fUH5Xj zmEB~em{+sRJGRDJF51+wwl|tgn;!o*^KP0~K4qflmRmks$6Fnj9bGH#ebUx8DD(T?tuDTH zF0)_myMF2Xli6W!_r3YPN@LZus=aNC%WJch<288{m1Usen;^!>V7) zdrWk>4<{dyTDLxR$%|^`39qDjUgt8cX>}5-Mqh9~R)zvd?W~g4%f3(Fj{`abs1j)VaCX5#XeY!&CsZ27DR4$N=<#T4f(0nKO z1;e6ErTwCrReY-#Wp4i-=lX7I!MACGm3j9{RtldvJ%J;}U3gnW<1woVd|MenOWn|M zdPTddzu=4Mvr?v)Z!^5>mYm&d_i*hyfvP3lZO-rbvd`A1$~yfH{=xom=2ls^?_s<7 zD~02-!h>S&c&w~6()*dBI;S;MGRa@`g`Kgl;vdabdV5!Fo4i;;BKE%8Vzqe*{kyqm z+?CU*uZ;Bgx=#9G`=R-5*Y0zzFY#x)b9QZoaC}pWYK>>*s_3&KjgtadF8M1u9G!0$ ztbfMy_nk}EE^Yt1cm3`4g)f)>XISqn|84${gG>Kdet0%}!QG1O{vYBNKPsDbOEx`l zf8W`h2fgMo-ZGPn`pq*#dYJrv25xZLTIqYdQ|;sYH{KskAKO*mXF7Xs(xKbV!dX^+ zdkxIZ`HQo8Pu;c2pE*NVY10Y+GtmWSYt`4i{>S;l`@wH%+xpl`d$b?ENc6S3`E{}h>hFv0`^x?+`_Ir;=TwvOW9y^* z_I*~Db-&;EQGL+un&rA0*A*|!XU*Za_6b~j&$}`(ry;iYxsN30WSLycr+*@UdsJ+@ z{;0m=%Y6P9##vVW?WW$gdXN2^4@)imdTdc@A7|Pm>l^Gh*S2I9a6~dsyHONiH0vvG z_5=6knqaXRyOvZPTb`SmE4sot-8 z88}tGZU1&_^Nd;Lm$z1**2p_k|LW`!gHsdFg)p$sT6=(b!BLCfYvWVyKU@1FHadUS zZvT*5uckj;|MPzRwC?#IruXhifBc_8?#G0elQW|7TWW0AZ0ojNdUwj@Eb*R+Zl6wV z);TqyA(b)YsM>>_I=kC!<93{^EoZJ`*xbNYq-n;p0ftZ3ukt`j`+{Op%ZoN{?YrqeYMs-KIbbtjUV01qGIp*7OtwD zyY;P|q@2dWsc-*mc&WsyseNeTyIVgQlXEn4wi~k_oZtLiPWt2fHlEWfqGOf>{8;9_ zUij{v`^Tm9XNr`Y=NDWTRB(#WoalBfhsW@Yewo?7H^HAXqy9}Vul@BpE%^J={|xzN zdnT7}pF6EPyDFCd@2p2x;=MlFeuZ*2m6z_gmQa=JM?KrN8RR-#)AP_)-0l?D>QGH`gDoW4ZoM;pD@9>DRMF z->ocNlf6$Nu{vi}Tl)6xJ9kVtsd6u6WAyPhJ@I0`fY86%kJ9!pJ^LVDCNewxp8kio z@202N+}6ImySsAkS}C<{hAR0Y@A5*IRWAIT7o4ovpP;sH-bcQr4}0?uSWSJlc4g+L z3cJt#6W4uZ-xKjR>f&R?JLe==om&1pY-6`v!~ItK;rp&Hd!-LN58gEW#FnP-J{vY% zi<7?b_Hpb+f1Zh6n^|ukpRx9Yijbpa?!L@@Yu8WuzHaxM>ucxT>N-8mcFn)sPp`kP zpL%txH9y~LgCE_8f90*tH{J6g?zVf;q+1>-oma0IXdGcWeV(Op;*FBEQV-@oRGm|2 z`oV2|YNguT#Xr3LcWv5qQMWN-=F*78;bnI}b3E?7qRBqfJY8w`t-P~$EC1C$U7>wD z_{bmq)Y66juEuF!z8rJyeaY+hJkeQg$Nn<}&3bg}UiqFKP6?eemN;i=9AdnXZaVwJ ze%GCs>#VY-{ge1;-f8-tGjmqSws{J(y zLtrmk`vd#dUHgyqrpI5ZaV}i($7z$le$bY!ALo8Kn=35FEwjz(#c`SHnWq-^Xyi;_ ze75L^{)gWW-J8Dt_4fNw{Ybid_gm%Kz3)2Zd-Jz?tdYF4nN#QKu1OYiHmIvIr)=X_ zTQ;vt{KeAGyQl8+wBM_K>FkxO=kCVTv#kG|TYL7`-0er}JEh_e>9@~%-o3R`P^aXC z%^{VkN#SJ)6Bm{#1{ZHy_e_E3$pl8zPwdV6j9y7SZ>&>%Syg_N<4^MHdq4g&cwByZ zr9?MZ-E3O<$|SXD^>8~EHMgB@W-+>;OJ%10Vw+l6S-dW5 z`t6g^XZE}MBTT*>sY1&h|>2JKew_l@_AKj`;v&8YEym>Yj6YVWU@AI%}#uRor2 zG4Gm1pzQL&0L>#iHwan0Fn$lcd0 z%HL@x`GfnA9A~@hzG>fmF63`r&8c3Qw|sZ?=|&Upe38$-&+Mn2l`(ucgJEH?mc`nt zoBtU~_5R#a`&+g4cG)MzX!qCkrmO$$(wpla+suO(ko-M*bg`dFW)n8nOy-W9S}PaM3vfv0%Z8?Ex)pZ}PEW`!!7!ZzLa zKOX<1enz0H2#>}h$jtS}{|vXZ5B$^rxOI7J_@P6uo^i{%dC&Ft-}=$_dHbw){F}Wx z=4WkT^Ha^Zo$tJTPI&W=`;Tm&_ib6v!Os=_ghiYKMFs%J6`;wpZX|zulB)j2Di;7FOk^etk;#Nd!Kc!^HCdL zE5n0^8xxK^Y4Xdq3wt-c{`l0TpLhH354pT?QKrfL{KGTNf5e|&>?_y(VRyvjBl5g9 z>Z^QLxvulATyiVMbLBzFKI?56p*^dXFfHuVVPxp}BKqn5(RkU4eU~5c_uF`;maaQ8 zPxzzU^rbg-x2;ROys-7UUs_wW?t>W9>$jIPIrRjeie2hw{FV3kVSAp~lMmIcLJIr2 zPCk3F@%D+2?in?04GbLzBRK9((Pm;RVskJ(@uT_Re$K7?d&2Zr-Feyjk#(B(i7m}* z_7vT%ov+LDDPqk+^^BdG%EyixPH@v+6y&$xDk?sIR{7UWS+}E4JI~WRHeLJ8{!{m6 zXYc(br@HdT_M>(DKlU#5U18q4|DnqMk3TP~bS0PnXW;lVOXQgAc0uNU9kI!u?)_W?NjAIxXXVQ|dWkz6E7L1&D#~}1{=FS3P_(>a zdhcwz{D@7L{t0K*OQp@zt@poLr*!lE^xty%LY}3%-6A(Wxh7oR* zXS@Coe^Afwzsz^}i}~r3eShoUjDB0ZefqbIWnKJxo?8b!OFUP+UtNXCsP(GFdjDM~ z?e|^ZU-r#p-sah{^Y5?re_FEkrs~QQf4F~aeH1VCqdDM`O}c7d_alzCz4!kp{b#sg zKF!=LL-mHou36z-zYM3cMTmy0r0_JxovgRMb>oh&Oj>xKS*hdI4oRk+@~nSkKWuL( z?e2~C+_rym&@BE#d~|tirzDuDg)mgyeW58h0j9t)*xRB zJ+@_b_3Hl&@t5qPE{n&`|K{_fd{yS8>|)-mM~9<6`uvmmT)*Y%_FcucubrR#`(5_a zpl1C&79QaV+UM5TDOcP+I_c$n{x6D;vghj6OTOAwyYiap>b1&&vyR<6B)wwcj(r;6 z3)a27@$Oc-l8M_vJ@-peannD%I{x6<%A|Wr^Nt@Vn|?jX*DW?IR#T@dc4x(-UA+ra zx-6ZQHtjQ5SZ=VUH{R`s;L<-@7kHm{l<3MIoS&0@Rz22V#B^r$^85QTwu;V8t&A^e zoZ}U_-5{wb^KP84uI!rKvB&Ngb$eExj&+W9Ee-#9)9XKj+1>k#E<11jA^K=_y~H2A zi|-d~+qc=kVUx!GSfKA$gmIrc;Ok?lWbu3ffqeT|K}M|9}s+(eo(q~Fe+)UWnyJV7C z!rMs;{;a)lcF~Xj3`gp@_i1Nc+o!Z^&;H9>y=Mw(?{#|iC5lzt-S}rdckR^Vz6`}P z-rv%#BbSBcgfFYO{4o8n^P|1maUXs5r|;Rhe_PkF^8wf2C0>4~FTlTfRm7G;Yp zgjA+(+9}g>ZQ0I;`x*X3U-t_wRYW>yJ|ZYN5+do>t$?ezsr!cx-s_-c`4z&i~dow>0xx=UST|v-It5 ze)E=l;Pxa(#^9Xe*LCquKdMURch1xN*mG;m_ernKu9XGcm=LCIk-%5L_DFK>(=y%0 zV;66$FS)j*JGS0^-=+6c@16R_sk=PlX~m3VuRACZeLx%bqt zZ`jfPh`%TGg6Z4p=pUs#29eQsvVX1lHGTfd<0^|@d1t1l)oUu%esdv{91;nn_2cORNwX!Egq1>+>$k2;+@+3K=R{SbVp-)1Mh zZEyR-krE_R`@@R`6suh z(#G<^ZT0G#dg3uVH<}$?nmNmVo6c#S`2%~`+O4u-&waLEdXuNGv~9;u-_}^EvQq9Ru_iAL z3Cf(4^}H)|;<@OBS5oo^BI-D7r~K2tw8l>Q)%PHqTYKz`X5F&&DnBv7dxh$$lLcoO zdE~gI@20kZTWiU-&x1vof80&)o_&+h&UUXdEpvYQsZ%GO zr-^6AaAm#Re*Nz6OXa`cZ`-A4Z8F`z`dzN+_vrZe{S$voe60WWJHGecjSLKfBE@Fm$ju={9AYA z(Pt+d4BXA4%oMwJ#t6rjcT7GI|6$#psQcWhd+d0u`a1tJaB{tn{?D-4b;2(Byx*Gr z|DvR8e*TO6y|DOm-ukr~3-~A5WY`$YXR9f?&#tG|e>it#X8kSyWrtO6`IOx&sn0d* z9b`WsbX$XXE^EG4JwLlbvh6tk!H>nx*o| z`>fVxi%+LJZgZVJ!P;B9)|H{g_oKV-j;sI7Dz3>3RBU$sux+2IwerKtue@UKK8HjH z@~{~%vkB+mFy?IOn7HxW-=9GY@4n>ndtbW#ZQ{p86?M7UN6Mns{@o&ghXDOYj`&4uDR;AXCUsYWUKjI&Lo4BB2df<=thj%4@q#vo1y|RZbZ@&MBXA@1L zYNlWGy3NhGbc%mb>trK_^>b%WF%Z3wnfu}GAN3FZ|AbU8CI7IqF)dMX>Y5dB?Q{Nr z26+Y0;-JUhrH}tQ`=8->*KegRh9A!l?-Sbc@$?^&i+_BVpIudvmYaU0-Fx#j=4sQt zLucIRocc5MlZy-cxe&GNHwgv?R)H}h{}~weN!|I;`t@ZE`@@^2%XgWtU12s`aPD&b zJ*VRH%HRFmUV7ey@zb~754#VW|Ks{my3TVM>owE-j$6}a*VoFdn{?;q-hWHhXT6aS z=H|E}@$cnx<;inDFO8~5HC@m5vOsrQ`;tG27ZWePQ999e&AjVSTA%5QmCwADb7h#G zi+=qp!uYAT>xbe#)gK)d=e)1{SpA=Yvum!*&G%OGZ`9oAU9tU@-N`2(_3X?0t?SQ* zysJ?VJ}58$$9v%o|Njh}$v-B2I_B$iz&KuS{;rGfk{bGFdiHo6P~%MCIC-?T)Rm!T z9IfxRTd=aV>Ccgl?Vr2K-%Ph_ zOn)pJ?ET?W#;+-yi=u6h?S5E3OWV4WrPb+P)XqfFmy${4n+12585n=n6=4W;MaB5; zXS0d^b}BOZTjs|Njh8IXggr8!$gQ>gR?{V%nsJ*s?{?x7U=c}bt)^od20R~RRU)aZWPe7H{G!?)e5UIlqR_|Nc$yE$`h zRK?UsTPOQYPTC=CQP!QU(sAn5n-+t^a+$8L&+I$x`)5}DD*vnH(zmC6^{sum{`2;Q zpO0;Q{OzOpVc+n`{Elx?em(OAOf@d~rXTrkrMhi-Mw6rLnyM)(C-Y`%D@lBkaAV`z z8uan|k-gzz7x#(Wy%XpE;6CH4IQbnrEBClQ?mo+I{_s&mP_Caw%A_D=y%(EAgqb3n zb8p87$Nq`=pf~^FTl=_OAD6eU-Jy{jK+4E=3y4Jh&<@PAoc`Fmw#${bN z%KJIS*QD^U*c)~g=N+QSyUUq-=Dq(XG*|k!+JA)G9m>=s4_HVK4 zFv)d#b*Qo?`)~7#%mF^r`e@R(yEog8PP*!t>a<3}!{Zg3<=Lpqbt)C!ksp2E zx8>LbF1lYa>7;jT#>hj#A`?>MeeNksm zFPlF*`|8c5pW@n2NAY)*#0&g8UuVDL%AVYnuj)lpKcubtr}E+Jx9v7Q-93ES z!#?b9w+UQ#+w0rsWwRxB2A4lyC#9(4c6VN=!h)&i!t>l0oO~1;9r{Cg{qY4Ahohz+ zKKqf|`_dlUM;WD>>bvIqPWp9J<T z#nR|)J;Dkds|B7qZA{pCjISzm*}JE@v6t(AD%aO*Z_g{=dnjyE;HB%^|M~0>t^2cj z<7fWk{~7pVez+gKCD*&?$L3>Jr7dL!cUt{LUI|%ynr&SkZCNRoRxTHEX$6l*?2{EN zJVkNxAMea#zp8KJ`Dkyw|CT8~!rr@oIJWb#?f0_vw(Hi`OulWsWyK|5740;G6n?b> zb-7Ezug)|2v6H`Tt&ENCJ zM0>aPe+J1|o6om*dOXoy^fQ;~_J*3ZhmCgT7}zw_CduFWd`QbCo%i1S)s8DFyuCJD z-O63kWhQ*Il&6R>=jid7Pk(M){f@Kg^egSl$B*piFz@kiY46(N{Aj`Mb1kM4b$7St zz4~e2(|7xIf6Ki5e0l1;m)W=Pcz?VUfA#!i8^yH; z*VJVH2sPXI=&aZGN8CTQ$HgWe-flH#($Qq8==h~OqMeTFPH|DXaccFm)zege^>2NB zc8}t3mp`^2&mZ9}J~+$0t7P?hfy`hX-fX#rdGCwvh0plsrPDh9v}E{Cw`*bSOS68I zKU&xPi2sMzyNB0p?Dg6o{@k;HbW3}Vcesn%3$)Hu#2%MCaI{AI(tieB z@jIEiY4=Zm%>VVp+DqFkUg)LR_v~!F)v2mfe+k&u1$3DZDjuSzgjT_uLM4ll6bA zcGp!^wf*|f5FP*N%9m5xXaCrL{C;Rwd;8Y4;YWn$GxYNPXZUR#y6t1?(XERkV#(wB5+?hTE*;AwBkXA z)!N*XZhvoJsr@{0x&6`eOt-Vjq)P6YeE1U8wqU}^^o(C?n4+gn{~OrL{q*pE22O@| zZ)cVL+m~CH`S;NNuix#j%I1omspx*x-dv-&;`s8{_jE68@&3u}n1$&ADH_nlocmI)iYu=w|`__bydi~~|5Bkqecj^15wzt=S?^%o42Ls2GvM+VZ zq%RjQShTa2{ZV_1O>A{}^27Hc8T*v3tW7fZPF$K55aYLs&nPthspMmIX4}`3f0Taw zcl~($hiTp`et5@zFq`|ZbIGR;lj|O)=ipN><|v|D61)D> z`@fat(qX^fezs*kDmAZraYfy~{|raM!^Es)x41>`p6ln*cT3~A>XP(Bt#Zq|KGfe7 ze$?N*|KW^9S9AAxE{n^&C-?8-{E$h9p61mZmDo8i|DH~2*T46CY(|bO%(K2}$9MhX z*mrR&Z`|!;R;OYz$xz7H1m(@m|Kk`##K3flB7rb{jqxjB`=A1a0=`r)Hy))$^w*Q>utH)x@8olIc)qsxK=N_7i0T$%k;1d55H;OaoK!Ss!fGeYhl-0d*46ae;4dw z{V?CrH#+{K+HBF?S%EK|O!se(wD~sAVo8immwoB8_6g}4jh!+H-)@QRUskc$W82k| zT(`^Nm)=dU5#J`f=EIqt#{#cRO`K54zSU>Zvf0t`pS#0qzfApYt2c{PU0VJ2M_=o8 zx1&$T`gf*X-)i6bkL!m+jrSu-)qR^jOqUdTVpM440BiiIQHJhyW3cXSAF_2_nePeaw)xq z(#F{ zGDkLDGFlNV*jW_8KcS~5{9IU7-unE9>-%r%{NwtlHYae8{fg|MS7(!Lu4k?1+5XIB z>(1H5XXWRsT|M>o)FeK!eSg}$Hf_I>m;Y$<_O4$qg)HAqQWZM2a(B{h#%bI;H}Lx& zm-TgDmzBHd#rNqilS^0b-QugUd82FPGllddHU%+THV?T+ORFcwd1?P=U<~_{yXsZ= zQT1oHf8YJjz&kzp`TU!w3^CR2&NBdc$eoQ*-f5l`*+Pr5s*KBp|%~PDjd5=@{NZ!lC$L^PQ z9Mjw{_)qobAB!!Q_M9~jyk6=X^&uj2m)(SkrdOZbmUCMD?4(SVn8n@G)~g!!7B|Ga zxw=>U<>h_T-ahk@kGsDA>(jNrf6B{c6=ZLZ-;_3SNoE}PNAb=*?7VGzYwK4Ww(T*B zJ1KK{ws&XJj#+os?pbr}eU8TsP9aw9>r3`tF0JRO;D7l2NOnzymu9ZXeKXnOfbAZM zyUlzv^qx&ko~ql>{4`wOB;`cof^h5h9cO=gXBbyhAKum0zp$eDNY%6*c}Jx9=H_zj zZ-3Lf>GRphrF|x!;yDgis&=*9pE$2ezHdK&f7>6^b+JE=yIwN+&miP5`K5cZXzN>y%f8?y( ze+JJFCg#_RBVNmCTt0fWwQbSDyk$4JPU}5bC0?|e=>|g;-__gU{Eu|KAO4&9QTy=b z*Rzgwr#?>GHtV$e_I+1^v{x$4Tx_&PU~})&St^pD>Q9dqcv$^fek7hNqfTwZhi8i) z)k^z+zQ9&<+md(F-u|nuJuqEs^NWhO`5*ZYYX46C zG1=+TpTLJV_Ba+kw8&{~5&yt>?{D6_l!~6j?UJD$WiffolMLf>f9tH79%u6-Fgbly z^!-~upA?_jeQ1)o0LRb%+gFditQQIn|H$9F zCb#6|yPQ8dNln?^WuH}d?#f{Ks`V=AHMF~eZh2Yg#HwhpHn)Xv7Fdl z)Y^WwY40c3%KX;<3>mTfhwb=&bg%uf`{=cLfqk+SdC!jjRz5l@I%-jTjF;uMNac|2 z6BkcE#T3f5`dQ79oV4{G{oiI@c(dk@-#_IK?>qj;*Dr{0U3qp{*0&2=kDlGK?&6UT zwevCqPkSsi6XTI+IFPn$J_#oWibTH7JN^0U`ni9uN8enq>(bx*kGB0@`{Y-&^K)JK zhxev_RDLM7|4~2hkI63AO?BP=GX!5LUHp*OP5ZTX$d2vZ=PxhzxOn7Da@*Z#ewLo2 zb5neOSp8G@(OUXZ|C`&7R^78#s-~@b{M+i(vtzHcXGi9zM)0nRIxQ3UMCG#E424Y| zeM@iWJ$}DmXrJoM8k5Ugb}qkMDx3G8A#A^|vEQXFOM33a-Z8Rn3D%i-Y=KIVVR~tR z$ma{;oR7}m5ON9{jD%eT)5Z@t^O>}C07-FNe4r*4^E<$HEX-=;l_Jg1o&+T@rk zZ}a04J7Ma%Ht4qN%WPd+(Y<=>zKX3py7%4vweS8jh|OQSB}#7Yx*xkeKh#87y`I$* zW|UPv;pMb8pMU>eIh48Wkd4m#v$|-{=A&{(AKEs5^ge9Gwsvvc{j8|eH|1hho!sA| zyul$7Z*_ucoO;pva}pEhcpI$0LKZr}X1M;CQ{ znyde<`@+_EY3BvYy4N`;7Osh@sJnh7*qP(GuGhX`XLs*)o$9qc*7}FuOJ-Jmo4I7&`fuDjOQvOa+;i)*xcF#)Nu=x~-$iZ? zE;+?VwO{AI-v7dG`oGySR0)*|SS0GN-ruR_dkg z=<=Lo@ucmWM`OgIObL&dt=d;Mu)SNfGeR*(l0P#w zg}hDL%P3p%crEWzakVee|LP~G+?f*g=D~l4oV(SB_sRdAWvAf%NY~lta?3B>w`-eU z6}yYiEcTCxA|>E$owY%bbFK8P*$|H$6F?vE+!y45Si z#ht6BXS23h)?L?oe|PJyw|Y*zueV&k7R9zR^-(Ri-!+q$t;goCh;}c}HTqI<*IBzx zp=*DzRlP{+;@cmlMEzW8?;quX>#f~{H%H|df-pykM0NlTy>Z4v#;IiFI-XWel5(` z<<8;qdKPiU)GdYw_ZYJs-hXQ5s|rceFSVk3p1lewu8pp*oTB{Z!Gc}d_3=-C%-7Gl zFJveE$NTT1E%BT;D<&VhWqZBp>&|@D#`U=muYA>wT{y`&pT)YRHN@+z&WxWow0It# z_#EFj>wHJT$Nq-=^pEXbrovSvVLP^8*?M~Q>Poxo%d`9oXDpep^7L(o1A?DKewMw< zyn`ea`%nOP}uDFE0PO@2*|^->H96w%1u+v$0q23_pJT z?fB;8 z=AT!-1ez@8^VhDcFSxwr&h+a446)~@yeYYzX`j5l_12tS>$(@7erSH_EBF2%p6`6u zHs6c3pKfCwoSQ6YTshzF+HIwkw+g#X{z!l9tL^J?e(SO#L?4+VARv(?d{TJKr zv;?zm`Kb&OZtZ=eW-0h2a>HAxpF7h-rQ7W0Uw@ywPrf2O@Jh*fzL;0mDcRF6n`kHr z-#M-P^bUK*g^fCqGiQ{mTo+$kchM?r@&|3dA8Xm${`KzarMo9(Z|k}jyQ*Qq+?gqXTeLVG9`IbBTz5liO_{tv zO?tlG+C@K7vh0|m*^epqUqU;%&4NQUuW-4WHEAHa?{vq&FA-1B^y62 zwN8qZ7u zKaf8vt-WRMZ@Zamz3p%2v_5freA0Zi_U&lv+ewj2C%v2xzNhPE!9u6&YkzcqoABX# z`yTJ|<+9q5S4{R=MO&Eq3r4zfXa35W?#RQfnHd&#N2Ry3&pbp(GikwfKRcrz3_luP zI3Kr@wz{!POx@?-_3ghscBoEqewu&xnNW|}W6R*iFT^^_~BeHeK4fm(O25evSUB zmmjkVPu2yA+?IP`KFu{WuhHRW=c^<0-fdXPpXqyiSK5d9s;;UR7eCp5v*_LVxF6f) z7Ct;1bvp7w&K}YH<#+97r|B-^?d6@}IqNk0e>w&HIpLxcY94+bM2mo{2)Q%G$WOO^t}-Jex{8T$Dt>;HyQAhSKi4fC~(uxJb$Zb>!nM-pPqev{c3*ot5l9p zu{HZ%eEK(Uu5K>Bz>mz2+=p(JH~mw(_oMrfU;o3)YbrK*>h4o~*`3Qaz1IGwZs(+v zc`Y9naLn^&t1LJ2aGqwbZ~pK+t9RmWyNdE-Rnm8E-+b}b_)57E+$2UnNOyY|kb zruEapN}0R)?mce3r+gZ8YvVthf0Q@>qrLZ)yj-39FWq)Uixa<6BBu7(Q1>t zr1IW<;cl$5Q`%&{uG%kDr*o^uYR@H;Cl|i)B4sav$}~F|K56;ny(TRbG?7|{(JBC{b!haZJN#EBlooCAHUDB zPx9_@o9VNj-tRSe@iuSIi)Z3&_kQ*sxo}#fP-Vi_E#^GyHZFT{LWO76FSkA2N91Mx zXo~h9UN3y~;a&eDRpA}$bQfJVy=`qFWoxCXP%l1Xp zzARH;mA~oFlk2a)o0Xdcx_UagR^~VE&!}ZSzE5+@#kVGV(~jK_@?L3@8~2N2+WGM4 zbWY3d+R7HU{M=_voEEd`UEr*@HIyUN&&r@Lboj&pOY0G_m6QEcT8qe}jGJJ<^SK+qr$acvO<3NU*QT z5})Lfh~=%vCw(&JWV@QgA(~#c>p#Qc`dRz)mMlLrIktZNkG#M9S&Me+ch=eYx629E z3!YB@cGv3KqdV~i(-XIEs|;0{zwnN7_`UF+Rr6}A4ip(#1^wH<{`DvBZdZ{2W1+<} zWv_5N`83~emIY}ogsx5{?SliIXaQyTu_TrA#eK-H? zj-~l?!u};syK_F<AALbjMnE#W$S|?vI`Jvl_eIg(AcCU{6aXj?u(@UnO*Q#Xc zykB~1-N(N(7jJg^>HYSek+;yztT$`^NmLv^T+d;{5`6gWiq}^9-Ag~J9&gXddmwZ; z*YxO}`DtCAvWn@QdQ*yCEkCUwH}kEH=f}f`Hg0Ww|3}%XZU4IHAJbkRt_q!F5XHTG zU!SJUwIUcTp;?sp9O|Kag zcTC*>_{O>$eGx^jm%~;p7F~D0=I-@>EN}iZ*h;I_^BdRIP3sZ;Gj~$<)%$J}U#9K} z*}vsV)TuwV*Xtij`SQGozs>a8_Y?E%rIXKn`gi?#WwiUB&-ecwE&eC)fxmal`VOmW z(-*zG^}Y2Ef0b$Fy83PJOm5Gvc=s-5m;Z7Tp6lMb+$Xt3H;FK}GF)Gp6)*AVqD_AC zf~ANgjdu88k7I(@|WcutO8V(s26Op;fPpjI2KWGyc2M^tOCU)Z6>*dyI=SAOAJF?6zau z#*ZN!6S4&F$ExlrUeT1~w3*ZGn#PL*c@7q4&u-`btbKcOdEWQk*&nJVo?7&1&;8R6 z{NwI(?SC{g=s&|l|L&Un)vxy{?f6hLi$!XGca7-^UZ!8KEbrdZ3_Cb6u)GI!(8jazTk zJgv-`x8Pb^^Y5_8`!TP#Z+`jd?n}?yg|hPgmlxh$5qtL)w_a{XrKtJpW8#w<%XX$6 zIPog)KSL%jv)_-}6&1@qyIs$n?|V)0LjD)yT#;F=Yu{{k6>*MfIUy z=R54TC{5fGy{4k>+ac`}a?a20a@1WdX1H&&=@iSt{%N~AShqbkeiVObZB?y#^yI_) zd6w_Hy!C#2{D;mPKkA}abKG~G%5yYn#+HBa(+))(%!rGTV2|T1->3g~h3+0syZPB` zYivLEuKjyU(s!2HeC@-MJDYCGo{AGue7dvKAuP$ro0Z}5rG4^0Zk>Om-;!hFn4DXh z&U$^uvTI*M)0;e=WIX9TWUTkJB-J{yT}H@qsm7{_MK9I9eJR}ZDLni1vv+QOLf&O- zf3A%Exoz_2c;zSjo%1)_AF&NTRA;dF!WNUsN2UwE+_vr8*=nZRZ3nlU*3Le1CqC=V zth*_U9VfR-oJjC5fBc@|PteEsBWJDLFK-nC4M$2xWNqM9yQFw&|K>*}XU?4A{d}kO zoEiA`kALi+7TDCQpR#T1ezyJUKa@}H(Op{6eCR)e z%$`DDHs$C+1wZ)iF!{eBXHMWm-CT6@V)n02B(c_sayZgA(!E)uMqtOyUhFAKgzMB%}t2V1@*_!OK zx!2lHM1{Zk_>ud_ea?MKFXfatQy(cWi~8a1ICI^i$!~Y1PM+>Lx1rRzB(|A7xIw&V z;?b~==?CLQe@s4Dr*MV8ZBKRm+Iv~^WUG7agRYi%OP}tJmOgKKcFC-g3GTvcavpem zT0BQVx~%KBR9D`oEdTJuKbKekxwro6twT3W_WXOcHh+@Wy^f#OhvoVHsr(3bxO)5W zKiO+vc+~?xMrCQImewxMny0pXt68+_?6>B{zATfryqaNjR$Irl!A7g)ga09axgXAl z$Hk`uje)k9FJE_&$7@ufzJ}L+sWAZ4bF-ysdfixjC23RQkSz0`u3YBA2(_ zo%DNW{ZG*!ovWAZt(VLH=lM^r{Ac&$`UCGpYVtmcH=Ab5TIF-OFMBboXMJM1>6U5l ztMb+?+rD{UpIM=f(NDe??zt(atMkmlWQKJub9nvsT`X zbq`XT-sLb=+$p?rVT+fo-u?U4f4~0`pZn^4-L5*Z{|x#^e@34-k#GNAc7F?h*Prmp zyHqoOBe(3osd_8|Q7m7pk3@9jTV)@5JbsjDt7 z_n+ZUzxtE&{r?%#;`x6}j=Qu^G~WH8rb(^%p|fl6o?P9%I?XpA#?wf}VA08EOlJY*ejO*j?_AdQWev zy*O#Tp+WHT^R8d*o|kk?WbLGRjZWS7+t{vTo$WDAW3tNRtsx9Xo;Kxb>w>(lfA+Ii zyZl#t`ou+(eN8W}thxH1;q1?!^Hu9E$uVCw$>o2@k>O*dB>$GVmU9T!k z)A%`K<}T3Lu zl6YkAj;WtKg7vtL>`d@uUsHEspVH(*@f=&vCfO?cUW+?%`|Oj~fa^-9Qk((91qHQ?BDXdWBzlV~Ys(AW&ggvnI^#(!!!3`ACex4UZ!s3> zd}6R6x_9?<>nm)Bd-gv2wNm}_v)296`Ahf8ZVCwO-DPt9r0en@W!L_#6qkN@yth=p z>7P+sJg;Z=`nZa_HQt$vw!Yo%ZMvmu?%qqwXQ&wJ^vdWMoMT9CDm+`uyD%$UYTv}{ z%JQYLFQyCYJ-__^me@Sz`MaJ;s`SkJ&!D~G`P!PB#lQV3dg~;Aq{q$={ZQ7Ny7EWc zXOHQ}tL7BRa@=-OHMr+4xWnycK{Ch8`5EfgcSO%`{X_JjDZbjyQ*(BP|MQykM`%~o z{Y6*w-ycQ zey?)v)<3cR&zj<1XTIC=`#;0OeQ*68R#hGSaw~0Wd3o;gJFl(_xLk5iOuMLaorldO zBelfg_=0<{_Iz@9wk~{1jpfJuZac+4HrM|+FTI!fy0&&ryz`>tD;=V4UAo0RZCmUk z+x2P>xlcLfyqlc&G{9hf=+nP7{~1nXd)wQ8{$paeKb_-G;=}xwUq{=vK9CJ}zP9H5 zSL@&2vQEDaw*F_hrT0!R$z{^LM+xmhN`{j&cq;C&W%2jMaa{kWQc?c!-Tu-CzgPd> zYZj??ZI5#5s-wKuCzmGc?)m6k`pi{wlG)U{d_!i|9D@^Q-?LPFf5hLl#lIseaQrg>ltq@P7d1cv0h9j>e7$E638N4x8NmW;dgP5PLS>5d}5 z-zA^9p0mhseYNlV&mi$Np7F=z!*S9V-ha^*`yJZ!=v!>~k+9D(x1D8*ZXc9!(pb{l zecJYphIE}so{Blsj`LD=*R0mav;0wgG@s|iHUCGxao6q#?%TL8eHyRT^|yittqt9R zj-2`zQ`37~w@I+o!d6`GW<9 zKCoZRCi{7X(c>dk>0MuE=X%Kt-j2D&vZ+&dLHns4o~e=bB^Nd?pE8O0Sv}`jg&z-B zXY7~Q%Fp|w?O@KArpdD<9$eOodlfLxf4WNE(rK^Hd{$Le^u6#i-tyVrB{B`QwVBQy z$Go%aq%!_JeYWeX$h39(%RyV;<=uC^(5=*O*m5`dz&xcNd$}^USIv&is{b%CGtp2g zzH_aTGt;Rj3KP6@UO5}wk~1oh2w&qNaqJ*R_`zEdDa^N(mF?$N>~E?G|EO7K8B<|> zMCy5~jpLHYjOO5d^W8ptJvY~M%ciqWZyn#|{kt{yk#(DLhJxGvdrPK1xm1>GTK{ME ze}>%XU!|J6-QI`Hs$c#(dE$riX4Cf_bwVG%>=&w&u9$cFXq>{v_^lql+GeZ>Q#zZQ zynK)Nq!~wiymcnE@Vq&9Bt7e04dajUN9TDe+8?p^?s4YZskeNcb9!!O@+0rpnx&jV zS5^D7{lpZJzm_nRAsY zn;15!J@1{aY{`D)j`HG6)z6vQ@oR5|S7om+%-Okg>$`tmzkWu>osKoR?7Z~H*@y2X zYJ7hbKd@f;M{|Xvjn&j`%OA;ey_7m*kmb=NXmtC)XmFIkIE)}+Rb>=s5{p5m|`79sYlp|-{{JF2PciA-eKC#a2 zLK4ZB_i6v}pYQdPeaA5hDt=qq^t$Q5)tgHIC|E!|;*>(PYw~GEVh}KEmukrfuZt9~OHKway?$hkD z$X#+t;$?GS)Ah4TC7$JbjEoiScTFpAXg-y-IQ15Td$Yb z|6t2hn;g6LKLd;QllMG-WIpcgKepCBa>s{ni*~ix>F&RpZ{u#ao@bWIt(=ucZqvTW z6=tm4v#pV55ld3&X`fd|%XTJTtR~GA>AIi6UoogKS@p{)E_mo`I z^8&BlC2jp2?YgL6FMqasSKj~p$Mlcj zqkYUj^z}RHM6*2*=Kch~+^LARSwq)s5>p$Vwn7%yBuKWqBi%5^a>;#hixy+5p5zvhL}kLJK#doHg3x*}=2XU)=# z+_hfbPfxZ@KUc;rlzVs5p)FJXE}1j$?Z4iC{qHt??l?S6Q+LwepugewSAK03^>)3o z@R~_B_wVWz`U$(5EFbUOYT1^W%3>umXC=Gx57cP>*q{Hg z?8vLC^mfy=_Kvgc&5lQ0u{-@Fe%YhlizAj-3NI@=HF2ADsp#F9JE}{!Y(7;e;}P4x zfLX26&h@OWz@^=rjQ%9AjLWo>yrQ?|uh8!!;hl2ImrY(Ocs^Ho%ys#+NV4R~JFgE| zoIGG}5&fh3_{D#R&OZ?Qk-onEQ1X8Uj>i=Vnf3XXHl>)g*>Cx|Kj+h*-sAGsXZ|xR zt9Z7ny1r+hR{I0@#ytU(ZkX+V-0OEq+w!YLar)B3waPY3EPYrvndq=xF+Tcf8f zdA!u+UtP?${ddo;f4%HhNa|?~&ze2&t84$PyEp4y`LucJA6@q!{>%35@`3XI47Y{+ zb2Cd9zLIL4V|>dgMeww>r_81!2ZfDV+*RAyc?xV+f1CHib={9g??d_zS;cpT2Y)o{ zDA9Efxb)SUJJ;~xUG-_(Puu00EuX8*oVELgv0?&m)%An%vhja>euO?;FOpSNm05mk z?t*#V6*FfUpJehl%}{2+c-`i0;jBmh88YMhtkxg$<+^L5pPIVz^_vTKW0w9XYr1s& zt>9smb%ImgrA-ZFbMs7l;&y1=lFH?$O;_iC+8j4+cWrK6Tgd#)O;2yC{;*#Dx;pyY zU4M%o`+r-$mQ&nY&;29q{86!6X*&fQy>U6KBh(qH#HDs9217qyw%Q|~+OV<&rkE3cNX*Ryk?Q)8cgnq@6|e%=9> zHH;yr4bx3u)-&4_K3Xp>FO&ICKUXd``0%dZncildxw?CV+`mOv9@JLZWcDCkVNOF? zA8UD?W^Cpc?t>3+uF?HEY4Mv&uL^Cpy(%(_6`VRF*@jK#@ho4BgLZv}zt_zD8+HFm z`JP$-zU=$`&-3kziu(um^HnT<=zr)xgY@|&HRf9{{yFRSIPk;$hFrb3x=Z@hx)h(B zwn<&!J}GtTkMs~e@j?cM_kHvEzs@Z_HjnRP?edoQHu0=&^UIgtdr?)kQg-Soo5@iz z2enRpc3B#fZ5l59GyGu;)AHGS|E@57ZQppGyY%E_uWcXedzXsc%gWqdUN-H`#?vbO zvU7N@3(s-gn&^JVyJrK#wcg`zh1cG=^+P)-daiul~eof%&C?Ypq_VxEsH zEBSB8i3B~fZhEWCEcCE(RrV||^Q)#J{~6xiuG3!qGt2+bx@+I-tN+U9&zojwn{6V(Rr^ki&L%bo&RRH+o_H!Ig>rPznyA3VJZ6Z%l$n| z|Ec|zU0nJyyFOgg+Oza~(AV(#-uFLb*k*oYdw2c#E%rveFO7OD)!WalU2#9sOgHCd zoW(Q)j&mx-dgn7{?S2+}%6-LAT@kO#VUzBC|GE9Au3fmQUaZ)yI`7X_f8NwA{9yk; zzSBnYaX*v$%8I;PrP~J!?ztDQJGpY{5vkJ>zH1k5Yw?@94<42S=|`W_qf zIsBt}%bwiSbrmf4qE)7giS2USzkT}evNOjP*F~&yeU?{pQ&HmfNrvmCuA0@SE?xJo zKlt_Kt-D-)=U(ssve#bT_?bR)Mf$P-4BT7hch)I>us<}jguD7j&Sb|QwQ5U~=D*B- zrhD|J;2r6V(3Xipof`Vziu|r6M%79GF!^WvBd|Vo?T_%tOErG_ORqkgeN^4Ia%-&5 z(bkZEYP}TjjsxR~Io7V5%v|&r;&-)*?KmEDy-G2rH z{|EjO+uxjj^q+w}?nU^-tU9F~Qogf%Sy#ug?21U5_v-DM&w@AJ7W_-J?#Y;a?o;uX zCx#tK9tTgja&dlJelhIs*MD2$Km2E?hQ1+NBL&=u#S@WgNSFbz1TIGHIvgnta-)%3;omaKU zQ|*J*+dC^?aTJE-{i$8`a#re_>tf&TXKdLcwqAPuwCEDWs~a|11T5WjXa2;9k39kG z3ogbS??|jZ>woy#n*R>B(7Q4y2@MT+eG8zE0?fo%s$g{dvn8) z_Xkh-?T|XZK&7OLil@u6P6#qik9;E&aRhStp=?ni!Dc*TCxu|*s8Z~f`F zK7CWm^SilT@n%`~sPrT6+l?w-J73QIa@I+fJNVL9@7%akue|-YIBj;id1NAc&#q0= zf|yr6SNIsS{B5A^ts0}tZ{M2gx_kXD@7!X|`OakZ%QfqGgtNjvp6WKy*`)TQEGV?` zpw0DZv47I8|KrJ$GyIS@zr6mDWB#MP_O2K6zgAbD&B~T|SMr}>o7bi_mOFNTYAKo; zd;9#1#yN}=4QpobGhx;lFb`cCYUo6P}N_t>t@qa)%uV!s99?90(Lk$w-K7Nyy~nn9>`#4LA9vLzJj*73d31&Qk?X%||901YL1q5Peubuerx7N1D%tr4lEE%rbe~dn|&-{b<(evUz`dxNh(9#N_A@^h?_2+5$-lp$Z+}G0)V7PvUG)30{H{}{{>_bC zUiw>A_5M+Aeeu(4GlkA)K1zOMvS;?;*J|rtmDexa`rCKDV);YS{DsH)4d#T7g(ciI4pDvrPW4=LQr%Q!g*xYL_`qL#`lpD_(73|Ufxc^9C#rGpo z`8{%WU_dVruHAR$v^c= zfBn&Zw|{1?rSIN#myJ~{q{-x#X9C6?H)fe*Vb6&OGU+07{v;`cDa<4E7JYi z;)%0NTIRot+X{tb7w8mWa4Y4+{wqL)_2&Ho)*<9NkRCi|aSKL3^trysoj&mef?B!BBxZ;6tXk6TmP z?q$zj_%tp*T*Xts!N<2%u{nHU+~PmXKdj3i%UVZX`;)jXK03ld{)pAxvv-$#FBf*_ z@>5RJX3C#qtivdtae%ow>Q>;?d#~3FyAvPD%2}Fx)QNIEyV>ha&%T7uk0!oTS9R=ZRAk)2QnKwo z!{)DN?g{PRc;%1R^jY6lKU%FXniVz2s(v!ho?yf-1=#s`nkNa zs(0zTx3{lL{#HAa-1~cfJNt?snGgDZgx3jnJO9rA)^>XL>Gk%fK5c$}*W2#y?%D5t z?k=8J_Ppmo#rcjuzU)Wxo2=6N_5`o0Xm>WhbZ1-R(hB>~U6z@*Q|_PF2|jeuA-Uq3 z@(i_P&2UK#ri4jKK_xG@=C}Q-TCTpnG<)CmuzSZQ_S;8$x><*2~M$?&Zo6?y5U>ITt)V_0C0S&CJjTx(T-epVhAaSwC}C_;juJ$+uRlpQXP) z+xzAo`L#dRyI*{;?@~!Wcjm9&quZxz`~O{Ct#{(Br|%JthBIA)^Pk5Q8C0Kd+%HrU z{Mgs~h*W-O$xqpmA8(g$yEgf-RbOj<>9e1W4aIv&kinuZ4F)J??*6g=!%aKwA5xDGKl}4?vh?ie+trI1 z<_cw3KAxvAJmp**9vp;mt{>xi-Jl#~{`8ay^ ziQGHK-cNPEt;||{XcB+clN#P-aX+RX_-Fn>=UUX;>0LI~^^&tL?o-GL=5CwDGEH~Z zwu?m*4f4VhZXMjPBw=R%ttnOZM~n9VbI&c=aNp(E{dbw`Ki_w{y8O-Y)tTN)vc=>K zHpWXuyx!He=lb?4!Rg+a`O2T3xonDVt+=8Qw8SsM>{I0S4eJgcvgi2P&rqXxu}1UZ zdvR&*+qN$8v9I!%xhvgSe0S^7Gn|aM8y2@$TxXear6*5$zVe69NA=&_UY~VeaO>T= zT<3$^f35P(U0AU_c>9j+8Bg=Ij(P7;Y4SeCmE>Bd6yft;$H%RpI`=m#@jE zUp@72?#}wOvHfHB!|<+u+P7ZW$;V4Zyp&_V`qp*c(&fB7XX`#ra#n+JL5boU(R3oN8#h1{|r*X$F|Kgxu9}s?ZduHhuyF4F||Fjt8}0ETPDs+|5PK@ zlxIe%DX_+c2-I|cTktWwW38P+#lbDP?nnLQ+y9uRSBqY2UTM3uz3qAC^7K9XTT|k< zxm{e4uNm~9wY7bkdE1J820u<8jT87dZSNya^^db(ZG8Pp>qqLA%b*j_zWfSVn<;zf zjQC%X-Fv1hbndU6)_wZ1sL9tUxqJUJn4kWazg^#S*P>M!vugLd{AV~F`#$?i`m*D{ zb$^yWdj3x+=h0vNvcJv0cRkM9UHt4n!`3IyZ`awkZL_idAobq&$CdvK0{LHK`OinJ zTNrZTkNw}dr4^H=-qL?(^Zj@DG?Cn&!tC?2K6CTm@_*!9@i1{``=PfF53gRkuuYr0 zb@wOf*vVep&!n4Xe-MiOZFpGko-@DekIIkPM{b=zTI+wfPPyh|^f_?vjtgIxY&FkM<~?()oS!q}U`b`V>*2q5d*?jO zzV`Z<9aDt7RE@^W$bR`!R0a*Hgtl z?;ofAR&N#b<~`25@MZM=O=rLDRd4)y*Z9b-#$DG+dh0$N^ESVtdL`xZV%9kAv^`Tp z-+A=8i&8{K?uR3<4t`i#$-^o7e4ggt_8ReZ zf2_BEcsp(7`j=KyqH_CY+}gTtd%e&d_gwuM%d#W#l-k@>ms)Cn+S1yt5T5zlRY#VcXQ8m`WC~H6v(XYt@iY1Xs6>e9kV9){u-wb_m9+b zzW68qaeJ@Z+DEnOhwB8k)eAZ<_;KsE^Td2bebu|C9CL!S7jKr=>1lX-;B{BW*0o>4 zf9k6Ii_3O>ul8Mg>XIp?Ww%#f)t^84zI$$e%#YQ-ZC>89zo@fj%gr}mX3vQJ{ynO- zJBzo3b+^MLud`a7Dj(Z;WzM|`y<*6dy?j&iF@Zk;ZTHXk|8RI~BI>O3Tkg^Oz|4(v zns=~t8r@_toRMtpXA>OrqxTViyS!+=jbmwX?W&6WU{BtSJLY}OHM0pgEsz~#cC$)P&$R9rc!+Gm|iPz>Ma_R{$$KjVBfghW?2mM$bb!lDR+pjbCWY@2~d9OpY{~X)3 zhfPr)HZM9pU05l}_*+yqJzxD_{qpsBOYPhKW&hb9@ArI9MZWt-wwal0)z&`J_`14# z_UrC$PQQ5@bf@l~f9Lkpu)pu8|NZ2c_hX&A+Yf_#azAADF>Tv_xzzf2x?Whe4uv&V<6k6->%-g`N%*?(8x z%ePUQH$B{h)z8ZDf7DlAv3|aM&_}lU5A?+<%>6IziC+Ap+2xwa%0G^x{as)A1@Cz; z^;epivSHWU-UDhHPuWaD3b%GKo#E`2ee~^n_~YF8)IIaozI&gvHEhzm`>(gpJDGbi zuWJ3_`!~0{d?7e~vl6boTmSYe$HQvw?2hLCuT=SxDlh&zM zAzWYeyX1wGKmKQUbMi62*sWvRq?WfAu8MT2xw>Qf&TXIGC43HwSe#NWZL~l%WkPOw z7|W~qw?6g$XE?k#JZ@I>)4EI3R%!29`?_rV+pA@7?_aa2vWfrppMgF4hiC4?ExAd? zYww&BY>6szRkm6+iL<;=h-s&Cp?X91re#jgB9hBumrfDdHz)XIZt3%X@lStFnl;Hc zH~;nLKTTa4O9G(BG%-Glm;ERHqx;dZ=4CIX^gF-&WjbWN(Qaw_s!X9Qv0qEP?4m2f ztY?=AzB_Exez-xjG3dbKipdYwcbBeytA9kS&gkNn`I7F7V=|;#|JcctZ`!$Knrq?H zXw!QyQk)iN8lBjwJ2k;U%syM28+7C4d6`#YRp&M?yq_uMdT(iV>7p#-h%;(koLqSo zGb2{y2<<$mkZ#)j+adEq%nxS2{c;K)L)Uk`(>dDTzU4mO>)@Mv-z|E%%P7b!P>OHJ;njbJgSW&R zO!#?!w0^nEeAv)Evpm~&R*dJ$+wM6@ea4CkN1WeG*ws9Db^+r$|3~p}-ijZU@0gXh zdzJsgFPGM;Y@0gO#!@<4sM3g&ML96j(?jyAx6%<#N$CS>dxH-fWR=~MxwP=wI_2Hh zl(%&1=PAan)OpA-aY`BMCWr6qr_|ZhU6Qh~m$gw}==@RH_ruotZGVCjlUHS?M|E9# z^!HisykHa8!_pZO7fB{=oEJPVS@go~(#O*G{GxwtS^skV<7bJ|T6$rd0&ib5&yPRu zuY9X?f6F}0kNtfmwa!Q88Ex3}VO!^F*>t~)Z%T}Fi=S)lI<)z&bi0reC+8to$s{&E zBYx`#>u=`&6Zrj~f#*)Y@ZU8r_TQ}6|5kV2`c2vO38kO??t=D1-p`-jSDsQ?5@+?X z>5otIQMLE6hNnOB`STjdM$FZ@+1;tjI%C8AMg^DX_4Wx#e&&y#uHD~qf6MeE`6547 zADZy`V@2$j-h`L`QmS%W&zN)Q>%3R*=ljmLe|NSLU&wo*kDJraPPNTE=ACx^e#-T^ zZ?69lO?y}6S6C~&f9aF?r#*A;@4fWped~V4b$d8JN=JO`{kMGK>Dm>MfBPbA%oo(B zxEA>=axN^}SF^9?Pp(kS44%U>A75WOGWp2I6ALe1S*`iP@s;+`f2@C^KI-?@IIhT) zGoEzZ^&?B!vG@N3>)%M9mY!`Q^hPDtSNmwK#HnT-uGN!_*c{fU?4SEKCr_=Ro-e?|!CtM{#6?`+@J{qonkz(8HsHR1shxl`PQekQry+1OfiVBecs!_u4o8B)%-S1p^V zw%y>`IvvyXBIh5TH~;MyUA^4+-WEIm_uKO)v!D9=z~P`J)6VYR^3adrZ+3p%`slh% z<`VU0{jbgo?#H|eFRNNVS#**=*B!42zk_kfpH7uO4DdXmwDPXk((lnfZyx*4&>i;T zR&_*S%sI|n_2pBqto@-|wR-KlTGPUZ_a$R1>^(Qei)H>Q@6dg>_UWE0TkZ;{Zau=O zk*VOFnLgp@GdG*P_mV}`3*KmV{&@OZ=STYE6sfIew*NZUx<1R`)@|<OXl-cd_H{Z=+&uBv6<&rX7@kpexa+Y+w)O- z(@Txs{Gv_D2CAnvy=N&ov6AG2Sc7|iPI_Hg`>W>7e+GM9$1M+@Y}@pE{=-Fo>>l4X zQGU$1Ppx9%?yg(sWaShDPMiEF+c{-Ik@&MtrfqzI9S@qSzVie&39Rqq=4q%s zYPD0pd!OpQ59x>cC13waYQIvlR_xQSn~~+rItA9tWq$@9O%C!BD5_RDn<#F%V(s1i zC!X%({iFA>|L}Riio*dP`MbB+3r1J;{du)1`sF;i*?tvzJ^_ah?|t*&_$Jn$Ue62F z%j}|Fe!0th%&NX2^>XUR+`g5M&YViVp1&pkOX04t6#i_Hi|;rkLms)Rtb0_cw8#5f z*;O0YMSnu$`Qq=rzP>AO>BEp~l3U* z7x_=W?p<|zQI({VS5a>H{(C$3TK$UOb>)GL_HVO_#fRtf+XyeV@BPx*1_3FvR1&efVM#!EFYw`T-{%M-}v-SG3A7AT!Am4Vm!s&N-e%n9Q$?>xfPdC4( z{V`?Mq|A2;-ZR%FU(rvwt04AiVP`b^nTa9`7*|jJAb-SPFiW1zPN?F(|HrhYk7i5T zYF){ieCyS8;a$EWufNT=={a1gbK*4rRGTIj^%AdlWfzy+u9aQ)XUY0{Z`Idz-{n@_ zz4~UabbRfdy;gbsb^WB>vQ?pfq+hZ=ze4@t{yv=+gICw?>0W*tq@6x>TX*;R4R;;x z_K|hle;?}7Sg8R!b{$3^S@C>( z*VE9nPSw!V0$E4KDNp@v4>B~r((*d>(Qo>S>?q#5ebd+YE{OLL4G!AgX1SyxkF{$} z!$MU@h7Aho78XYtqNRL0B*e;o{C)msQ~YDg$=g55`LC~PyM6dp=p@$dmvtTl&d~{b zu^=Vki|lcEub-DaJ|6Ehk-zac^6KRHeS1HOShcNO=Daq3$E>HtYtCoNEScDJqP4(q zOD+QsPpEx|oZ|I;%qKr;yI=U@x+beYy;ira_t%a=#RIz3l#8pX8Y`ukYvEXKCNO=p*0ubuVtUx7Ba;xW1KBKeg+X_SDY1Difv! zDxF%gz}nG&3eT)>s~ApRw$WaeS)XYb<@D*!p7SP`T$XvVOfufIIeF@70me7%QQP?@ zCgw{m2@CtEw8yOuonx!H@tU2Z+gc;epu|D=?C#P6YfJ{w}U^1 zP0x6@zmoY>c)@qc6W>K2e2j|yBlnU0h_t;}{MLW_*ZD4xjgHuGJ^g~t(JNAjbx9!v;0uY{;0fXD}U3q zw-0Oc9*6ws&V0=4?Dyf=e7V(T%kx*U^{|hJ-W;lQNC;EnRLQ&5{5$-Z zoz)Km8~#T-(ss>1Wc4fd)jq?^Pp>>n|Lxjz$K6EfkQ{Hy7EgazV)BLN7>dlm!h_<&R>(Ex!_K$-9nqQ z<;xRKEu3<~Tll%<=i)u@P9`4>x?-RHpP~KNx~+Qa%OCDYzmmG^^7?rXmrqyAmg#ms z_9xB2<4fZA?crzc3zXcjW6OO1p!&UB_AcK2?KZsZe-wRFSDa2L&5iLaP~{f!5qsru zI!o+o^NL6d=Q&sJi&y-PZ9g{a`nT;}rn$1inr}i&t2)uyZ7y!iMON|=}g_T zdy3xo)hcV&BnkEy<+80aoAOrty8C~Ie3P|z?|s_#>ic~s)px5sORoQCVC*jcv~}vY zOSe_3{hA;8xBXN9ad%x-Sa8LPir>y2vp(j1_07A++7y{$*zstZ>-Jf9=SdoDTxUh~hs&-b7FOZ!kf%OAxD)w4TIx}{Gnn8m(Ts`l9G^5SG3 zJ;#iPTc?G!iCD<%WGt2`d9*U)Kf~tZ4O?v4!!Nx3(miuaY|xF!^ULmMZ7(mocz`cV zLwQ>t-_rSm0zGzV?CQsbk%W!trPGgP;jZx37| z5dCwW-_wX=$5V8cGZ=(2RGLlwXnn*p@uRWdhwmLva;0RYUG`_MRX=84r;*L^Z2u(f zpO5$K=!~-zyBXP&5t*zZ>7mjUSaa5WwfD!}n?G%R|M}&Lr|<5?F3q2n8C!XG{q%Y5 zm*)xp$o_C8=5?LqrMG$d`CEFUPKREZ_3PKSXRqI`*mycyWxDZhPqV8LAGkL)Fiq^! zUU0pR=ZE!?ec~Ve+e>vNVqW}H-<5qg>du`r%N`{uOCMg{*7NU~O^$Zcf*zS!pXT38 z^JVkzm@nzxX5(D^sFt_X)-rc1_gpVuj}5=#Req9VV> z&x(Jt@99lpv1_ZBzpVdUUv2aGm_5TD@y81vu74|S>vwgJx$way`?}Y~zs%3*>FQZ^ zZSyg|K+#I|p1$WgAMM}#e$X%ZvTU2w)wad;JW|Ckb+uoYAF7`1_WQz}GipLH3y*os zoIlAwDEs1yC4XPvU;p*j{yo`uw{F?^?AP}`lVuQyl3CY?A=QHooejqP=|3|aC)rYJ4 z=klKH|ERaLTIPv@!+DYKd_4z4Lxgq)Fi5QX)#dHm_Vv!gFZxY!>>utmKk60t|L}H8 zRe0xnJ^ioMo{>+zcP?k}@+h-*WJnC>+S{;}pQXm)Vx638soaEHOXg?ZerC6N&Alu? zrsr9!V@@l&N?L>}6kpoar4g0;WS@8Hrx*OOf9pO#r&r+9Eb2$&L@(9kK79R_KW0;Z z;HCA+pPp}@FhL?+p@~Dwk)e%6z(JvjalP3`^F!xbtFAmcxpa>jpT>>2X8a# zEnFB`V-bC$G-9jt+-$LObEOA9lRZl={rfvBmB**=_KBa{OV;vt*XjN6xy9D9Utmw{ z!?W6+yLUB*URrw9^u`QtJr<>NwW;o{!q&m^y2Qn_Fw{bBimeTrB1 zY4EpN{aw1gL;7&p^?+#e+c|pqN3MP3uI{)wr?N0Q$zt2dtqm-pUNPIIFaGj9Z_oQh z+d6ONmL|HH*X(__Uu^x$`=#aCU+evT?0?YTuutp$x}B>t->c7AM+=)3>eywBwM-b-)x-F&p3FUGtjy>oWVN6z*)t$J%z@7U~^(6DB;$o!A* z_2Pt{T${6J>r~bIA9qDnzOMThwmb53ad2^M@{jsy)7~!o%fIy9KZzQHOM4ubz5E^! z8I>J1<$Y;#s@-xW|5BCk{kQZkx}}_cvm@WMsfoeDUj1~ml>EW{TtDiMzJFAmo&EFc z>ruXOK-1Eh1Kdx>lV|Xg)Z~4JE@*|I&;RoI~{|r95Ez0-n zx-Rx>+RoC_eDzyj-g>?>7MUb!S)C!lWal(r@9x>%7Q0EHMHMbG!Vza0Mj zbp5BxU+iA53-imV)_piU~Z$3Rv{nDSv(p5J-?$uqG9C`ZpUaJSsl_WOp z4&9Z@v~T8q&m|joM3-?ET-?yxpLU%6^HTdVnQwb9|9rjuURS)l+O_)L?Z0O}D2wC# zA^D)bb^E{c&b&Xi52jyz5_YAuU+Pua#^d!em)6X4y?XRQ#?7V1DMuc7lnd`(TPnx& zA^ecMY>m=28^Z^;*J>ZR$97d^9zm#Ubia%o-e!p$50-Tt`tR#etS&HGZPHpNz!A9=@a)XANa zGV7^*X6K{!N5>EImIr^FQ^)jSPE_6+_D;3!+e)_YEzC&>xE{F2aL>dw2gIC?x^(PF zKU;Bc=i~W82dgt5uNVF?jh`udyFKq)l?!X1UeZ1FTKuHxY0p%daKW8&dAT#!KD=$r zseG7Ws^IkS`_Gq~KYF`7+IGg9weP-#ykAQ>+{QVWjX@h3wUhexGS+T?5B*3fNMna!(GlFBW+UuF4kzf?;I<1-lNmY znZvkplUKowq}xdWX1xogC-3f`SEt@v{7`S}@`|tPlrJp4+H&q%alA{@y~?$ZXT40C z7F{NIUO8$>rbtu48-rysljD7(D;~SeJ`!dpTYZpkfBACRAX~FkZS$K~r%IkU#wK~y z<9u+~)U|(3*GF6b`n~=7wq25UO`lwwH$CjjwOi$v<92USIw`GPdyPe|@MtCf!TY>_ z6q=9xTKb=%Ve4&up&w1>;i0uy6&-Nw#kpC$B@cgm$efp2`xhlF3eRN&;Qrt?dcVTAvw*L&b#KYd2_}X5% z_Tu*A78AoY9;Y~1=RB@zG`O~OdiqcM^VyZ#H>I9_u>0M9`i3z(fY7%?!*0p zFAby4r!LE$zWLRxiS2JE`>2|G zKJQtB+VUe}`}Ag2w@x$IVf-pS(^y#{l-a0habrVP`&+?B9d&ATXZMRd_~3iCjqtQSU21##UJU1==!)gD@(urN%Xrw&bAXj=X5i79 z-w*rQyg#}hj2ALp&FgnPcBgN7z@*nl{>e(0yv@3n&7E(n^VVZ})rK~U$gqOnkISMP zJVmcnG(Qr(eq{em=|>{l%>!RW?e2f`T6?ziigmfEd#5Itt<2Ubbe}NUS1O_Dq>|P2 z+}(Yv<16zsw!e6D>GzlL*O&K1X3RT#vgT{}-}vXZ@7vUW`10d>;D?s;tIjXKm0@+J z>d3Zl51K139#uLnY>>E>dDl(}>urpE#j&l4ulMO)s*xA#+x2X*XwR&b35yj8dC+{ZGz*?>ffnn|t5>i2b*nOJh-Et4N>&t4N@O%=RPm+h*|} zUC$j;QG8_M{qu`sp7`e{u8NJ2oU$@#lI@;}n-}spx~ck0vNP@3fBZiK_dc#4S+5Tj z{@8yse4fmeoZP3EzeIQcXXuEU+WKVTNrg=1$lh1pW)TWMXLfL{GGJi7m;HSHt^JSY zH`y4b^Ea7-R!ORN+QcW?c4gVA?P$7p@7RGqo6o)1jQw3+Gog8Uk(EYSME=y(y3xPy z&VPA*(SD~#JGWQqAC7G$nbY^#eOMz21d`ChNaSaZJDyy~0xJeC-2QN7#! zEGfB+VsVA19tNL(Vh>kKJ50rkj8VJ>5qF(N|35U$TN+b-zPC1XP+2v zSaJMF)cyuLn?J7mx{rLgRk7+n!x1Zmi#DpJt$Q|AG#y;Dwe#ENg)=9q{q|&IbM2gZ z_dDC#S${t@)s}zut+QHw^+nSBcRS~OyM6Nhhr4CAuj09C6gK|2&hzPhra+C`&ax|t z7nfYSnK~(6<=A!4XPk9S@W(vqezC0mf_Xvj59%l9rA_yo zUf3U?m;BpLx%u>MjmamunrwG(%GQ36w)v5SP4WZjn|~4?^}5`fE4ohit8US&SNHt4 zzPvU0PMW5QR?)W$1}rtLcCou>mj!%Zc`R05|K6qScYD|Wv|d_#sW^7>&!uPP-_PA` z`*!8m?M?sqencOzI(lsFlH2hoSNgKwth>4@>-((kqik>Qg}6P7v{~^z;Z1+mBz76D zx7knsGu(_n=3V}HZt~$z6;o?!>tRaa*V!!vj!wO` z_Hu^km$ScoReKNJ-r(KV+mcglW??_qa=TPk^o#!t=G)I-$c$Uueec)b)AbXd&Wa=D zM!2)p2mdp$yxYgTwDsH0^K$*63Y%dbK2$FP?MX^cAuH84mdu<>ttJy7_14!(FqpB|2xH`T1?zO;Nwg zJ>gSdiqBe8^it09TIpQzL-R!Te>}FmK6U*J-ecW~Dz|D14;R_I*Rb)`id}T(kZJk! zisLgFYgT=3uC6_IXJzg8vR6B+?&$r!ski-SrrN*P+WX&UZtdTgTPOCzI_3jk>q7pm z*>y6TcWk@zfP7Vy8>;5YL`CCR1G-j!SiMb!w&7E#iU&g7g4|Yeb<-uZS|Ya zx2>;nwa(ZT`fq(<{!*Q5pO_SPUs)*F zwsk+8xZ($Aj%4WzpG`k{rK+29N*?cY3D-V%F#&W66-%}>p^`X24FEk0bw_+$IJ8SB>Nn(hrhe5umh?3&c#)wkcL7PDR{IAN|V zF|j9@X~_a>KkX;^-H)F9=sx}{I8Lwkk-xxyh6L}2@A>cFs@&DKKukMzyV$2`ca~jy zsu$UOzU?t&-3)VnSR{$Ty+b?%k*du5+%%jQJgT>HeWm~(Gr+^wriwjJ4YTJ^H$ zwhvERN3$Q6>-+<YD>-lQEsJyUUp8px*EB9J_4hh^lJNwIDo#Op0 zd-Ol7cFm2;Z`=MWZ`H@2Rkd%nMcsbYo3%0T(_3eoO%lq!=eLR8T0OZ{v^@Mr`U8HB z8tvJKzhBzlwns8m`JU~8>v=i6n@x^|)o;@|ty4Mi+2z)}6|Q?%RR+ztvvY<oFX>su&>iqDrfKe&(NIxW!KlR{PJGQ zV%yzyn)ds{_W8SRI{jhke}+T%Tk3h0u5Q^ckWuB{^ghLW{_X3#E}dq5GwD-W_01*8 z#%6ZEt%XF28-v$1et7QsA*}Vo^S4pC%R1|`w%u}@@@r<~Iwc>Y+2ux+>%yNVyg1r; zkVQu7=lMhNLi^t^20#=zaNnE&i3)n|`m>Tl%1%wW8?z0V$KucO#DHuJ$iq za`U9lWYxDn+bSiOPjh&y(qM9_=^eL)?q|Wcsqy9&vyTS<`2A4)u=S4TlRn4n{Kecy1$EXkxkl}TdT+8$25W^z}OrEzk+$Hnx__$9OcD82u2`-%4I)n$KE zmp}ZzO(YO>_#*255jItkkYc}SucX{=EqQElmZ5U$^aHaEKSYQBD6e1TU7Wk({E9pE z8PP`+CyDLcJL{!n$TaCinc*yRgk~N%$CmMT{yH1q%*SFMpLgzmsCwm(`(m|*>tRwA zmoHtq=+nbf_;XU-hm)E`Pd9ii;9@vhb^hDj?SH=hXE^TvBk=F?-2V)=^&c<)6Jb2? zgZW|E<^?}?y*_HE{n4vpb!@Jct=g^VT9b&Qyqk7T+|=Z0?(r^Ua|-`NhtTyNtM=b2 zyZ`z6e+F6mhwca?16WsStw(TCY{=?bjbvec=I#3P@qbGGGqf^9En4-T0l}9?80{do zVo=y*k?!59E1efmI45Zdc%laXM9hhrkROvXKP*0^yFT`1Ro@ogr)yixqyk#*_?sSi z$>O!IWa2TSPkP=~4MxS0b;3U$KYTCnVxIC9)71F~^&jqDzwve0Jhe&OWtZ<9o3wJ0 z%hZQjCv|dUQ&O#WJy+WNpCMDf{a62y_if*P@y*V+J=4GLrMPi-^yPG|=%Xtfb>2@rKKwrhodgqZRkN?9<~#Urd|TH}T==*r4~`muuqFAKkWfvvs=mIOM9p?L9%a z_iX={Fs-_h$;$S`trHvdKAha)6ZhxtoBf}B>m{phT@SmuZ`#}XzuNwLRi9SBzI1&!{jwOU34!{@r5!*;(~W*(RUW+D9b=RT?=@hUztJkdVG^>ss^cKF8OW`xt(#_up{Y z^xH?imFwKs|7S?AnDyFMy!f>2(LLuHZsu<7IQc||p>XGEok4ZQ(hKy$9DChPv*CHT#a)M_7qw3zH*vZ;Jf3vYx(|v-X(kf zKF$95I+scA^YvfX>vzYWdhY#5zVDac{53Z-d}GO_(goc6^U>q0GerQ5H5^_Xo+ayn=iDIq^& zapSAdh8Nk-y!o5=3)ZkdaA`Vz*lOGI6|e6xTr%OF`r+SB-qv^Bt0Io;9XFQl*vy&P z_E@B+zU*-F>gyYRC_h}bdHJ8j_9L_2&MEH|Z~XFBaBHM$WSsYsdF4C({66s|Cmmt9 zDKm-B^r=^#`9<^f)Aw%w$29%#-qPABW5wpF!5f%uXXij{VYBUH#~*>(}Mwu79xk z%Hql|+q&*PlgrWG*_b=K;CA8lc~hU*seIr+sxR~>{KMb7_vWswPk9+mB|=KlGY|0+vk zQ3Dq-_`t{O-O>l;#P?pC?snzte}+vfk`tGxG~V6SMI zxzFD+E?M8Qhkwn>S!}!4->W!vHQ!_|zklDIxIJ!+)|+Rhc?2j4?wDCPiRqn68t49u zw|uW%J{*1C{>8TX=&b1-3+Gk)3VUsD%~$SY<1Dyb`RS-kTu6^{?j`;?56h)K=c*sL zr(w@%C%*Q4Q%&~5AD<6iyYOSjwdh4`c~E7-uib>8~5tlM|XF{eh+e$S!9&bb8_y3wm4CO;>J*-wA=^Sd15 zdcz;vKgd5STmMnt@1uG99?u0?Vw-;~_q`&={g%yb=jB}8N2Q^k_uZE|DYW_2zo;YY zqRK2E&Gx?SW}iM&?a#XDPu|zAnYUo}H{Y*MbE|IN{urIVZPvWWtN)bj{P3UQ2(S2| zU(*lI7mGZ;=2h;}8`*BYM&6b;lP3l1UD=y?&0bGEXNyCFCBvL>*9A4HAK1H1Gfegh zAGedfR%&^AO;}Xj`h1&9A_3<_1y5RsG!=u&-tzCb&rJGxv+wJ>@7q?_ zrWWQLS#?(0&EplL#G>+mXMOAL_w6sZy7Nxl^?!T)gqf#SMTTergs-L{(R za6v!Id**izXSYA=-C#T?SwW`p=|1Tn(U10v)ScbO^}#2qcYmvCJfCN7V%@A`6Lp`i z{S~pXN3%1%ah7D1#!{Y5iPN7dv?;Bt5&ZD{$o!UimKu*)$0jE}nlE~Mb=I%Gxr<&d zG8q_$06d~iW_oc8rn4fWmaUKf%Zmqk~YaV|?X*{Bd|bj)yfu{dLB zZ&B3qx3Ld>&HcArDLJbh`_XL1FP=K}J$o+cKGIyCxh_KIGiPdV|F50(qJW9p?+ zkLTMzUzKb*<|$f#HP_ZQ@AxB;^B?Wxnf6V)bk}vkvW>MXqj&ta-EDF&q~NWxWzl4# z`5wKQsU_21y}s^tJ1%Vh)0LC*r+vD4^HPA{R>6 zvIl&4GC}pwYeA=`(wVV~C+X{jTYETvKQwPnczK-chdmXW?EC*S2!6E&r|$VU(f9)z6-C+dbh`XUG}WG%eLyReYnyjGFN|{?wbYT8)HoW?vlS7 z!?c9s&-r(jtku`2{HXT&(fnce1F^*`U(OT#&(PBSXx{d6z4f(A-OKY9S?^w1bYRb< zgjsud}1;dM?^;wQpN=@!9Sj z7i}CfYf}r$*;X#`&u7?m>F6qV)gzPZexFWtnI-=B@`(r!Gq&fl?MK&(X8z;-P*bOL zWlM+YTw}kzTjxDEyWOmCW~lG2w&2e$(r4ZjEN7AWEX)4jRK@-We{SV1eVCiHlJ}%-g@|F%R9qG`PL`TO}lnCT>DeLulUq|nWg(K>?$k& z>Kz+@`pNxi{~4UK=Luf?W$%CJhoD0952$Ga_ za+7i?SYT#R#+GvH_OJg8?YEz0vdmel|N7Vc$26)KOOH|v!8h$pWoD{ zd)I&dx3|{RePyP+_}f3uk9Xha&yWek1B=-cI9q8rS`04^O;RMWi$qbA6oi z{=^9$7uS1&lka=~+OqDiaM|tUXJ6{7nr!pj`aJt))t2e^w>!V==c{r5FukWV*4^iV zP2i%}v!>0ut-t2WT_@?zwb2$wR`htuG}$Fsie3~F5%ObrGC7X*w{3;a6Y8!o|*bt+}`* zrEh^?mwG8*#;dnKUVrC}o_q9d{p%~AdgUMcwJ!OiyYXZ1{xw>SKBszvL1J1R=TiGE%evJ%lTEj`*Em-`vfdk3D_Wz@14Y% z&2GP!3ouM-{%-AI9^&lH%`>J~B_>x9Qp~RWj4QZGT;w ztUhUa*x9l$wy+Q*eq)UnPXvm)KJVtgdHiTq+Wh24wegNu|Ad!UKGO9MzPiWy@8*2D zRoAwcO^jYCuBjrx*&OZiX5%geA*+Ro4q2Bzg{i z@BGZY`ns*UG%Fyk@*k7Sm4Z=RcWOCbVCulGE}*(Z5LUE8%nNF3xBCsxS7( z`J;G;t_}Bp2F^3H?zcX=_+{tOw|m$fuOv-+xOk6UNk>T1Lyu_!pO#NMpnpDCJyTm( z?>_@;{jP7}H{Vvjd9+@<{MP*+qU*oM-Y&iQ=CAWH`z?9x5B7^@S=k=zTzBtQrhcm2 z#V2(KH}5R@8F+Gwfo|8m5AH|*Dj%Pxc;!pCw)gK$og@9CM{l_) z?dh|;ch6Jl)|&%58+RqSx>=lHk-2_eUg#hDAIEDp`72|4rGq~5ZM4c*y||+3c6i%; zk1b}qdCsnk+_EaKV~VfRWS%GN0c~cI>rOw)KPa2*|D!eakv#iL`CG+V=Dy{1XGNP1 z=!He}@Rka%V(%2VQ}2$mtD3| zT$1rm@mlF^Z`JPbYc>;W%XiLPCa88|`Xr}SO$Fl2I>#kdX3yHUHgiFy`L1~jt!~|j zbog#Ay#H>WNzj?&HyC`2qr{ImuK(ln(Z6NkW%Yakpu&yQ=Rg>zg;Oap`3l-S+;^ z5Z7+|;qc?W=^w3wXZ!WLT>E3Wf}6uMJ3F_CV{!Fq$!?B!%AcN0$W<)}{dQtW?}?qF z*M96jQuHI(X?9?<%auQ_Rz^yhw`>ac9$7j=p%v-0Q_FEj=Y_e7*&_eK0?(8db-kh*h&RXfJ7ymkJ`n@&p zm;GA0>C}9a+0Q=xnOtAI>u1#a^11SFbr<PKuhJm^tn&p9=tobt8DUb--__)e}~PcMeRPmVoTwj-M5d;zge{RcI29HtxIh8 zzfGy==GD)1LW_{of+f}drv3*$k@<;8X>e*-YqxSB){-rlMWW)BWTd(ffJdL}1 z#q(`@3>LqeYPNEg?n=qfTTE$x_rH0(#`{A>Z0wE7III4ED_>W~+Fgq-R-Jq=ZqHkp za35yQAoZI^R&($8d3dSr+&m30#i^(g=3nKXA#+P>CBx2(Ts zKh5~Jw)Jb={NDR7ra!-;v8cad5#$ zrVJ^yF)7XJ~XL0W7C;aW|Ot7CC?@|1+d^^Go}$OAg!+Yxh2K+eg{H z$tz#HtWV8evBaO}y^78QZ;=;|OOg}$cOHJ1r)D1h?A3n;{^Bpsf4-YHEk8PX(VyD) z*QY<->|e1@^-si=()+DJ6^jr3TDJMY+i;ibZ%yucd+gk{xzeS#GGAodcg9Jn$EO=4 zTsUDQ>rteAwKQM;~9bD+0^#PQhA#CZ-uGf6Bqj4UUHhT_i#_t zNB=|hqCX}Fe7Lt~p8jPWoA6~Xx8L%8`ov16H?x@aV&|OW%&}sfR~K}w%uJE5)+#da zdu8|RKf`+cCrh@y`SnU*bKUnZ>wn(<6(xJWaZhaeN9Kc7{GDH3EN?p;xv@Q9d*92@ z)QG1JuVge8%p}rTdcK?Pj5&UopLKq_JntXfht5@{$_rnX-O9hs6Bj=Jsmsd!X?}O! zo--<(w`^C{wdGo~pWON-DE(<&?x8M?6&SlNa3weGSwFf$%~i>KKxxTpx?Im zqv#u{-X))-41TN6S1DZN(`R&pVe=fG=Y^~5h3#}I*86=lYk&Bkfj6e2+A>J?j>xZ25uYA3I^5OJ*${@V}We%JZ=9E!pA1pomu?Q{Exbo&>dOh->h5r z|8PsYRq~%<^G|En{kK0qe>~s(%#zR(%V|?8r)t}G-S7YU^ZhQ9Ki-e`%U9?>THadn`=^cRs*3U>yVl&Q-8Vfs zyHIX3 z-*z+e@zgC%PvSW)sHJA3!rPan^{=~-MVTCYE;`q%B+@3~o%^x{R<-n@PGb-v4&xK3N?^wJN@ zAN}2b^tSNIm-A$r>iFLEP-t{KUed0IswwBwCw^zbTiwmYryL4Xj zNTFhxT?5aoPxch;LDiW1)AoGV{l0B~+18$Wzx6)@^9dXI$MzBx)4x?-{*%eKJUw^K zt8K@ld*7Q>Zk2eOtD3A+uy*Zgcbzj6HfeC(p4n*gTy*7cn?F8R|7riRpB{NFXWFet zvTm=AS{0&k{Foij57#9X?k*qYW-fVU zr93-Y*6Fo>t&^mVa$0oBG<_qn`%fzqx9vErc~Id}MfgMcP8-z+{NfRjKkn%F?s2ay zTzUGDhG1yu3fvl@74X*f7Qzu2d7;LzW3|f z>-uANzudaDeErArz>o8K7w%jBFgO0tFWI@Rd%joo-tT;?zN1G;BbEE9uh_wc-*dZj z4;-48ee~=8Tj57P-mB1i_p4U@;l9Sz6K=D({0E|>Ol`8r(>omw`-XSM9zog97K zZ|;Q7&U@h07+B-+Bl7WohE6+)*@w&HY%gy4D;#>6Z}vLX`EBv%=U1f8e4A2zOzii? zZH-Fjx35sS;`Xupe*DFMyjz!U*>v<u2#0UoL;OUt0b9yER>!A5Vu~vNMSPQG7&9 zJ9Dv--!;zsWq0;oSvAjpx{BY@X)ipVd3ia_UMN`eeAc%GMT}p+u2=a1I*DSxfJwBq zt)8#7%RKSFU#?pv?2c|w$Pm24shn)$*zBs9ym5+*)XOysKDOA4C~p5O{7=Bzmo?zD z-SHi}uSDgfE4*V=^4aFVlO}QQa#z@<{!@Ryy#&cJC!o^`=&SYMlCY)w6AKP{KNWT z^RYVdA3Go27k{-+|H9W;^CR9&-b-)(D9esqD4?w(ot}M@@zBouGyWx?xOr*PoptS= zAE!U!na^z}@uT^uK;*i`rHKq?*ZO{oW!{};F?B{dSL}6_63O*E`M2VaxgVTo)^)6& zzoP$0mF%%e-)wsJ7l6t?_or<%Bfee4z%9i3?L%tcz|u2{UW|(0{B-xnh1?yi8`f zoc(p)IsU6pK796@CG6V4mV$1_(ml`5Or5#sgSUsr0miJ-Kl>lA1N0v%!-;jzde7=f~JgZKg)unP8NK+xqF)8^10jG zw}|-|to!|F|B;_x);G^O-e+T4Xym%kD9gC>%}udS*Tlnj^x9mJp8DvXSjdAp0!Pv& zAF5}3c)7;*k;fnBRj%tRX2)MqznWLNLr3WPo9+I2(@v`{NnK))(`CX{zBxrG;@Qq0 z`_rxe+CO-<^?`qnofvNo1#Y1@q~xPx{nVrXTh+-Y$IuAl$24%Cuh6A5%> z0AbMw{~5N3AF~(wWA<@+`%f63)-Id4Sd4;schkN^O`87Vg;+?%(bFI+RuhJ23&jcPxP2zDepZ4^M zP+78s>DfQ7KfE9HcUGM#i<;k)llVdEl~`4G#wVAXyZ#Q!I{PtDNAipQ54ZbA-acHo zWQ}os)~e398}T#uoYy~Xo^kPKT!dm>?(=uoy-BN7Uta!Jv_sCsd-9{weAeTS+&|{j z$^Sd|_s5k;Ps?4O3lcv^E))D2z5PFf-2G`QE405w|7U2i`_FLLT=~dp;6`_JXw_di^W`DeM~ZF+w6snhSC z`u~&N_xhp#(cksm+thA(KY(|!kR{w=eiBH!WPiLNIzUD_VK zt@&B=^T#cfp9Yd`mXEJ5?Nsl4Jkfda%CO)I%~t}gKUL2c-7lN_i#K}i{p^424Y&WK zehjf^vf+KyGVS2j{|qu&WfLRMuRJ{4R!{n++VTjXSSP#Z-_R` znD*@6Bk#&Rn<_Qp)U@XnxMlbMU0P@HYCormcFBjmQ>QIozuCF8X!^rj+}`qA+<7ML za0}i!E7V0&Qg!x3X|5MF<{uV^e6T<4D}3mm)W_(T*RM@KSzWvOPK0~G_1o`!k3O1l zl51XJrP~8f-v>R9HrQF#owk$wag%LrvF^Xk3$t(am~FfCN~&(_3TN)4EccRhrfs-$ zTx7;w1=i_W@dcJl?H|>j|BXKN*L+i*k9XR>_xsn+-50)n=BYoCe;4m@e-PgtZYQ;Q z_Tc~ymCNG(AKopPWViN_>(1#Z=hIFbtw|ExZP^vS!#gL4x8xt^kH@QH|F~bbF?}@G zCc-A~e$tk$m-Y5tYJMx^GSl8-XYb;WDR^4*FJfRpYw!G7PpFBa^~dUy?^ocr>{R%ON4d@ZBNY) z-G6`Eed~EUBjRj6rpPgUu)*#Ol|#Go z`5jF(GnKrQc5a&Ct^B@XmdF08rOtWwN{{C!eypien(Z{xMz?TT)N7p+O7kO{Jl{T! zJzQ*3xXXC%&I5cqEuMt4epvlr-|7eJh5j?-|CoH#^U>b0+iuN=f7NVwxj#wZ=CP+I zCsb~#oNS$=_c&>8_g3*aw?670J};MjpZ&d!zW9-hw>9A<@76z>|0cn{;If!uhte$D z<)@T%Un?Z`K4?2;w)%AH$-h}|e?`!zp&s$KTJ^4p)n|K#US>&)0$&-17B`u&IR zJMv1sZH+@e`cB(#-&QjHQkX~1*4<0F1RuMnXIzXpq5AWF`psw79Z}Cr_-{oY{rtoD zVR@%%t-1G>%YU2`Q|%TP%T7Ihb6)1Mn?~;DGlH(4>i@pLC+75gUAwGzTh8CScRlRG zw8q6h&V9P|D4%b4uT(wrwxthSHg_%-oN~nc-J8N4LMaD%zZXx+{1N{x`jU_P1Gi3)3r8E_j-l7uC)( zyRF5koHENJep`r{*Yv1#6e2sZ|q)O={j@XyuJ3)`!5}I6$z4np6^<6%@ zIH#jq=>(U~P~F6Ks15_TJbOs3T;p|Bhw%$9?O(j zErBdBrI|qli^Oo&ea`|nt!qP2T&Oe)T5)!|?z5Dg{ zv0952Y9IbhFZi%$SMHMad^O7|L}q*{Dtl6R-X?Tg=eBmeeJa82#23@L<43MfuR6NP>h|GeyQ_{{b{k9795Qb^wBh;FR7sh(=b{(% z=4LHm-PXb&!{cVbQ}pOA?siN`rYdCdw&24&P1+~_@P14_E*0KUr}FBb;$RJ`u%hkQ=g?RKzr3rt3ogHX5-FF~#Zu;RLUsGbm+|2h^AB6ucjmt7+st&HZ+4~F za%PjY5?vL3PbN%Nw)VHMWZz+W()YvThtH38obOt@XKrd~aCWiS&PA^-t9{N>jYw&# zu5>%JD06yAjyZe6kv^^DieuiZr~VaP{A0WLUo*?dEx+}TeflGM{maCszQ04VtIQ8Z z_jg3io1Iyn{qS$yWA|%sl{fCV{4PH^(q_fetxZo4A6887>=AlragRBLso{*VJQEKb${OlxZm0GR=*X=h!#asmhUF@6r>Fc5{3vuru)^fSW&B06`1aOn*DibZE>Ld8fup6`oO6CoDmq-9ckG!~U6M~d-zvHU3yISS`?T+|&{;11ySExcgR>@ZXIfj9`Tu87DbK#T zFXfkRXzQxHyVCJL%{qTBUHtC*=g%5;>NU|H!dsqP)LV8Tug3kseAe3)?MI4V8=fpw zTdF*}C;F&LklV%$(cyP3BN@{qd{VBhTl?FsVpU}6e+HqKX0vSKxIVmFzBF_CuF|)A z)~<~GyU99IE@D}vlk3y9Zz|k+zfZ}ZX{3GWkN=0o58ofVxco=kuY0y-oNta8-g`M~ z$w$ZfjI(=OHMdUk0)149Y3(1CHtO!_mS(njO|uTiav8%_U)0d%_rGx zI@Pv%vKZS`NXxmT#jb8LFwERnm-+qo*8Jzord_&y@7JVj)7tYd*50~%|J=2Wr+$bZ zz7^K^JGjQX*4StB<+%S0hi9oiy&V<3#O+bp&MSw#)h?LtJaAfOa^u-**J7#philnG zKW=UBPHTE6TX#x+v+C7(3k!2L9pN_Ut9U#ysD$T@)2VeL8x@$cm#>WHioZV5Kk`)U z+n*}`85GbqhQl_9W22eR{Ab9pmghhG;d$ss@s4$0e`otLUCX}e%X~!qm(S!K@zad1 zO?FAqxg8q1`E=pQ^%_4qAIul1;Qto&p=MWmcTJ#t*B8w#+ji}4kK28}(B}KKM-Q|`t%=1SJzlywlPoJFZA`SxbuZu|L$eT$6O6M#F>{k!C&ZIy2BHb z#BV~5w@)bT?C;t1N9ITOLtBQ5_s3?HZf`GDkDjX+QIn`QIe*2R)ym4Z?;O!%P?W4p z;Nv*>Y)Z-Q*vCKW*8Q&3H{4nszxhwS>#xl>UO&&jCG7E0y>(Ce!}Ss`c1?XYeW`4+ zZ+qC)Je%Me-L;9;GahDydvaX+%<0TxB(!F&+WUuRt3LlR`SAArAEjV_)~kQ^Nldtt zX>J}h^~|SZ&%|1GpZFv;XU3DnncmMWUpL42cm5Il&>Zw*d(y0Lt25Pm*Qs>}ZTs-` zedls+n~jW5&+ST_f2u6Dq5V@QYw?_(%TXcSZ$KAcRlWRW=6y}*bNuy}>mM$BvQOkk z_apWnJ8K{97rOa6=!Ng&?)B_9&a-zuZ@YJM>O2m{OA}p;U5+qa_IT#-e!^}MshuBY zf1C89_tAXOiXhoKeeQh|*T~wR>G$2Rv+W`4)45MRKD_tv`uE%v$5V43#hK;&+$$t& z{UC4Q$DNPck4Ui|KT@(Md-;biaVokBdm_BHJogp;Sy#~^b1LaGQ_s6~yIp^XKd9&T zU*@~~#r*WizQ6TvM!zlIKK)z9vM&BT&#i-=C7vtZudc!*VzON1`{j3=p8ecjUT^n3 z_k85JyZQO*YxjA*j6R*$uKYp&(e=)n>_@!L9v|mSv(oQS-ez9^q5D5W!o7$)Hy2D# zRC<*er~Tqsh-5-&%)|*stktT2-e)HlniS5Av$%PcJxn2m>q(9Hhw?-B7_aG>=BnNL z=j)YWclz7f;P5?H;xx?jJ}ojgI{DOnH+xET$OMZA;n^pec$xBg0-y!ziMIjO6A+#gB*G168qPO6UGUC(^V`#PAEtAiT2rpM z#(O{WAN6Z>T(K{s_N`a=QNHZe=R39Qo`=o$>N=INUUr-68DDOWMf0`V3+0s_&StnC zy12&pgL~&bk=Tl&Ywvy^wd4B8y4%{n^pW-5fV+-beaU+!l}}HUP)=zmo_OWzy>~@j z7vJnE$^NwI&HBAxd}|Ez)8761>i>E7=WW-nYk%w9aB=@b;r`%jrsqX6FGauKetFA( z2JUQeF>$NbH#-E5EMg0r=6m|nYN2MGh+LNb`TLY#+Y7zTU2pO?Kk{08hmG`yU&q6C z?!EA}Fp#Q^6*(b$wXUPlO1m-{F+;ZPa zSL{>1M9h!cgDIg?+*QI2rdvBTPx4^D^K5zF9_0^C-{d*eySLso|EqRZ=Syd)zv!7s z*Z68Rti}x$pB$_Qst*^Xb{VuQ`u16`E3=*(bf0ie6IAx_p~w@t&W@ zYHHjec5C;3#S%&wEL}>*Qnow~{VqzocGFt8(!Ql5m`Tc2aQLVX@mn zjaza#?##WDb*s!XvgFI=-(N3(Ow-cS4lloc_HNzu`lc79(>HJ3I_I;Jq#Psxr;C1) zfAjFMz1aE3{%zs6mVVn=D&AMZz1KUyYT5d4r=wT8m`!J@jxgV_*<)U1?w_q8JWP`~ z4~O3-K4) zVUyI#BmNxa{WLA$Y1=y^wu-X_^Le-2e(Ub}@&4htL6?t6)pu_BUG3}f{^PfWRe3?B zlTT`{m>T(ZyX~HCCAY@#NsW!`<2L;WeHhKleq@$XCa2!yN7k!n-wnI;EK=QjW}f|Z zA=$nQ_wUANykRZ0ia&I}Wn)G1(Xz6r&Pk`Zw@STRwohx%q;Hq1Q&@wt9~Y#z9BE19 z=@N`yz+yIl2`cjw9G{bvw-_@jR9&HWwvi?7yLKjMBJ zerfI2+pl+;EXmGL+g4es*6VxeNYCZmU4};<6-=@)Y>b=y(fN@}o&DdLHi?gJM_fA` z^5LDSw?_Z0?PA}}?!}&w%kmb<>3f_O!n9@*@48n9<{dr|TqFJA{NZQKhpYZGwCCAq zu8%vNuli=zqx)(r%QbE**`C?7JI3n(tK!@l4eX9V6_X#%Z`h;wU`9#w%lwkFY`>#6 zOe&xDs(SmL9l|c#KKh&7Oa7U2@vxj^1DB}L;;!ql!v9{yKl&c}pCNR2Ri(h%>i-O# zTc4J_{Z$rU`Kn;&G>N;@GgDdCFBkds_z&^hsr|m!)wj+6Q1{-z>ss{7AFuy2@P9_$ zEzK(0{ph~bkLm+f&RP4^uWvcemznj*xO#P*dp1Mz#qtvei>i69+ds;npmr2$<`eWSGNBUszP(>zqPvSTWrMI*%$Af?$;U;z-8c>$ZhA7yfA{5ge}&)teZH>c%g8`?<3!~TkZKS6}xQ4N3V^Ca%c7!dVKm>7vLIc zD7{hD@$7lFKSkH~IGZ1C*}3KNgddZmu9v*tC{n~U@uu`f!G67h<8NN>nA1G%Kuk~M zg+0t4`aOS~T{&0%$mH999Cz-@P5RH^xb5TKZ!c3*C8x!j#QsbzYn8~@)*!;zpuOVK zKIRH#*N>&!JJ)=f`S|MEyu9_g-C5!OtXr-twjOrPyEJ3*XY-8-D;U+wjL)jpr%hk~ za{Khx<+)KZ`^>ISf4zU+>Zhrf-cR`<{xSQ=KGTnE3zt-!^OhAqB6wSR{}0#y3>)%w z@^UVCZ6s~?7T;~&WV^~kra*h^qU>2R8_UZ6GuZ!- z?#(p&9{z9tlA5i{^mg07nfc*;$Yqsl&!ny%ZQJ^2<{!&v{~0n*=a)RIzSe&7@3*&~ z5`M<-;gC>Q2%n!BFR&;0(Mp{?!YTKR_gr}VF>if#X0F*YE7ckyTr&}{Vo4E4u7cknpYd`e)$i3Zb|-|8dv^~dslQ%PY;`Gsxmjl$g)y& z{@FXfX9yL!TCYAAcJa;b_-Xfxx}Scny4gB^*RIun-kzK0lk+qdZN8cURcVSSJmJJOz2@?uYG7ruP|sR6A_Fy!QEr zWph=R{+rI*dwPxIg3@W6qK^w=@|5`PZq1GtjE#$bd28vJPSJY{i>q{*x);y zHlDlYvbLG_1sm@N`YpefAJx^nA9q!0$H$&$+PCfb<16MpS$o=HXJ6s6t;eToEA*Go zNJ`?$JLi_Y;JS_Z1NrW%mtC)(U5>t5W2-*7I5%;X)!u#I!VO~Gd76t1J0*@9Xn8Sj zdDQXZ^5^>E?XQ2_y!VnT>ge&4wXc8uT>Wz8z3ZwU%a3fo$MYlbv2DFiz^}fki|)lo zYwJBo?PD)_ykE;=#TWhd<(@xgKAJCacGdYxz0JAlsl1)6Nt@dko-^br&b$|%bVDXr zc2iZa%Pn5J7x!nl_D{K=VJ`9M&mP&^pO4*1w|p!*)#a)0k~g=~w%qEuob~GUvznW~ zt+VDSU98hu->(0@^z~iypbx9Ed2W5%P^vboWJko&CqA)zI^Kx|)l3KtKlM<2&M*15 z#_O|o=^x%LU90r&wg3F^>s$89W$%`(nccBq$L>DMnw|DfXK$L+_=#&ei-3d4`^Gr= zAKuH4u8RAkcje34Ttm^RSI^B|w{7jWipO`7wuNUd`IHRW0jfOhN(vRqKpPQ#J>-*uTr#mi}+`caz z7jf4`ZNt_blg>R3-Dc#g(fLl5v2(J8gC~RF=}bAzkI!a4uxI`;>p#O0tG;c&yiez@ zS~~UaMX^cISwX5tjLeR<{k+L^tM2F2lm2F^6K{*Yyml-9&F}gTd*8jW$z7`6{Kiy$ z+T$Zp`F+b<{%&)X{&#VuYne)=q}LNqO_kg?tYzP%)DOKiKd?)^Va>ae=y<-@0`hO> zTFH87-&^Giw*+CPZQ zTXVI~{*imn{_T5&Kioeq<|I*?oiFzC+e*>Rr*69*y?!HaLB5&A5*C$BdG5Vldun>t zR$r}LN7_bbZI zbz7gOctO9`nP^$gLyzZ2efjl)|F-l$mt5t4hrItYELyrgUuChs;EW4PL*w+K);&M_ zQB7%`u*Q*V3-uy3cAM_{ViR}$TicJ>D`MUAv)7$2EYxQ0lgLp>FREf-sZ?9v-}9eA z!f*563HqkOtLx9VfAfi}_xaDT`qTE3=SvI!GjNx3*V-Q`cgo)J<>$IBuXlTApPZlM zY%*cfLj6Aqled@W-h3GwHdklDtyjNT{%LNQ9l7IT;?`}ue@E#Eu?j4_J+u5_g&l#CmAs($#JsCRZCsYm(2*9R;RLg|Gw>4*5h*|%-muYWf;H>~{N%r{kd&&E&Yt2#EFKKP{Y;EMRi$B$-AbE^$|D`t6g z^|Z^0jA@of(!a-;vp;=(aILj(wC|NYw)sn3SKYdOKc=GYnC`J})&+;GcgkfK8`o^y z8MMayTi?+yE<`Z?J<&9h@V$0m`oykS9iTVm+;{|x?e-zINAx^MnZqubuskjx)Av=s>6OuIsjv6uT#8dMdyyJ(CO*r_JZJI(>nSNRPts-b zkA!W%QX{+IPSnwbXA~~2Sv2jPZEVyRqXl6-w=3^8cZR3h% z%M+`MWw%V+yQgsKy@Sq7U6w4(=d$0JT2GOW`t$rp?SBU4_ig*%sOSD?SRA(Bb>V-8 zkok`B!p9-+iEuPxo* zCMR<7Pcd`+annCx+ry+--x$X@?|OBqJR&*!XokF$nqb`z21g@7)+1;5Pp2VrKa%1tjmvdrtkmI{i7&QVeTJKl`#L5m)b5L z&v{RA<6Dwk`1Y*E?{)sQ>u+86t<&=QJ3H*%)At(3uibrjb?!c2>vGRKKg$nTU7O9; zx#ZT}irXQV_esW8JU`O({Yd9p-<0n>Ewfdo{gstZp8Le^|*Wp$+QcIt$7TCJ$)BP-u$!1soK6s*`eq+`PJK;Z${g0+Mys_~WU3Am6 zT=z_-kpFW%r5%$54u0!t`}Do!ahc_7mqm9HpG^NZ`>F5Wm$$d|?=YJ^CqC@`e}=}p zOTU}e&w5iNze!}Z{hQq%wz*ikfA~6E^DCQS++J7FrJluSPw!UJTr!zUW6^>Iq>R8G zv(M!J;hH=BkktH+e`@X@cD?uBH|w>}Rn=-&p;Ng}S{EO^WRv^J=f0k~+@0&weysl1 ze(CWK#Rs?RW$#s_yMFxMo1^z8H~){{#>{V>X);TuUt05qYq9%NFWrN8-YTtNU`zcK zeB4fd--p^3aWOgJYk%T(HszS@lH4lfW;p3hUV1CjUy1oy50?L3{>Qd>Q3YsR`j>cz zO|*T7j$WyEt-WUx>yD;dcc<#9Og}$MMOyZ%*=l2B`6;GpS7K6@Zm#+jdH-qdt!&nB=es6hiwxd$toomFU{IEF+R#CU}eP;hZi0;Jxvy- zXKEaNTwG9N@iD&nkL8Nv)upQ{7X660<(?|EquKt`wWNrg{xWf8mB|MzlB`sozP+;a zV{z`)S9AAPPO8_tZuw~XrYviXzqf2_buWLsbx%F}(QSE_nzC-s59&uM_JmeHk{69J z@A$)`v~A{>BN@@>cHWiZd?2{R@Uyl^?hR%Ysn7QtXPxgz_}Jf&pZ>AE%T&0kBy7j_ zD_c*mUR`N-eR-CD;fy5{R-V4?a6s^r$j`EOx%Y!!*-Pbq(U;HMCv~kxcm0cRzm~qv zyj8ZhWZSYspPsmV;<%jh*63++k>sOLi`OU4t!w-^{eYa#)diQn_{;prkG%3+Z)0TU zuf2OddjvXLgt)7`4dOiTK)^SZ^_|MD=yze?t8d-?y7_Ig=WEUTtLm%w?|of&clVu- z{FI+*mH4Chk$uaa~Xv(nlonx6gRZ_T>% z@Qev2i-l#+9>|%dZhG~P)yK2_N6s_;s6RR@McnLG-PUB$zkAHPRvW2wNroQUtdYQP zkz{)|=VE3ydpCD z+Pc*1x7_F6JM0%$_#|ef`j$7mM`ukpoa?{Ksx`hm-=lYQ-?_d|ItjvC z-JT~EFxsSBhf5X5oqw}8eCgb^pSPb*R+S4)-F5NT-T3F(f4;xUxVL6~YQYAB$TK<0 z$u~D1{PX%z|IO5tT_2wx>Xx<*y7!+U=)?QA{I@o)%Wp?^q+d+Y%$O7%w<9SoGGBRK z>OGN(I~v^2)ZJClP?^JcQf<%vhxgxdFZkpBk$2hj%%!ibQmW=Ix&BLU;YvrL=xno# zR!b&Fi%guMp)Yx4wX|r#L9frjylS^z?f1<6l=n9(c3sRq`PaMtO;0-B^-tyQkN$_p zd-DEC9H}{~th=a~bIke4G`pnr_h-FZuGYKw z?tSOwcVAsPC|SC3BS-q2{s+EqR!{zSnem-={|EhJvHHh6A4R+`x$Az&YJS@(*=XPE zds?Sml+!m=NpFfw(m26+>O|p|=c3$qC(r%n^`Al0_3!nlwQs&(jw+uN__^%;s_gfh zFW#=5w*35tOsna~{F#27dUX7V`_Wr&jrEnu%PTfrvM#Mn_nCHzeOHHTK117!nT*XP zYD_g7x49~A&-&5)h@>+q`)ilgUArfA%|HIUqvcXw{}@RXgc~W=FTpY z#e(ZXT3Jpx^SIAxyi*hPF>L;W`|QUb>3VxKM_x90eLM0U&+X!L$*CcCSESmTIPN)j zoU6cbZEmjI`Kfi`_4Yfh@4j=YTDHrw=IiYz-+yLq+`8_^ZqE-jQC6>K^@JH^l}~s% zt9;^4uxv*vw&|0DC){SPc_Q?_k?y#8hW5sgJ%Y>)W!~rfj#q%t`bH5*dD<{uVp?*Y6N^k4hyA`J-mR;N; z&9UHj&BM2658i#|HBs%vL7gav#SNS)%>MtRyq&LX+0PqOQS9)My?d){)vfFkCAnJn zM0Rg)6Bj&p`6uYSm6-O^TJfrlT+(bSk5ApT6wT5^y@yYaw6LZ|EcwsSUs?2F zy4t>n+ozmoDU!IS^h)Bm0^@Y$g~toys^yPa>es1f)$v}o5q#tuS+er&gDa-D%Qtv) zyUkLa>dmP)sf*oy|m)f;lowiTWdTQ-_8nuzbqpA`?L?dcJH1B-f>%+mKk1j?04j&(42zf%GOfs z$Kn|(HitEPU$iNHxHmm`&aR`W%U^x_d{)tw=V74C%7@3oC{4?r%Ox~>~x;seuiCXmy%PH=K+~XnZBBr18cYc{ux!)U6%3j*mtWX)5TR4rZ7KU zwCAVLq&E!w)9RMZnZIRTw_YpHhacrfjDK{+#>H~hsLO~w4-((#{_LcrUHJsjq$6$q z)~9c|pPj@Sd*9^A_FC)ML;o43gipPC_1?;*8Otw!^z^&0oAT%DwbOp}-)cX`P5p5G zTjhtV?>m;~ihJ$9`cC)D(pmQ$T+FMcEcWkF2nsPNIwT^%)MVVC&HiokkNyu|m*=<6 zdbj^!J#TiL_T_C^QTsdc-oAD1o4$9-t}RPzPIl)>$F7L6vasOvs4a-;ePj@dD{AWm=Q6(JHf6TsDiv6HG->s})Z_2lOmsg5T5t~)w zt@6ou18dV}4UUF)X(|bPPa1;bKKbwe!?N%1{m^BzTVf|qdRy;aegDa}Sozw1^&c-k z3P1ebY-jd|zdQJ2czen0A73?}PhG!NLTUb+wae}v<>`xTuFO?t4YLumkePMpV^o$s z!#>TMcin5BWX`&BxjWi)Psh1L^VQn7qxMX@F)cD>SNdfZC?0isO{7b)FMSJii zowfRJmuwcY2^Y_l==x;wyt-1QJ9{(3reiNVMI|092o(G2eG2|uKW+83+uwa(CVEEk z>1Vsv>D}+UmnqA-zQ-i{zUV7CsiuFLrnk8>^7YnCw|>+XR@!@B;;_UWzWSD{XR`x0 zUHnr0`mdKuyWiHvXFDgk-dQcQQqSYLGQ(A0<3mzz{f}hRW4COI*jk%>Qv7=L%vp&- zIh~a%S}W8U3yjVbPV&65vO05TY)$_xJEp9E@*nx%@;|N(KRnNH_9NG+=i1ZOuDd-u zICVMejv345t<4rv-XSD@T5O`jVL7+jNw*(8Z#A`Vo7LX0^VMbdhpkh@!-eks%xliw zZFTn34)(U*<9>z5Dpu`d`f+i=ACHgAd-j+wK3a8MPHdV)-g>ppuv-t8d}epp{ z)u!xkTP&VyQvL4Te*5{++YV`e?>)V5zsl78{L`*&yKi!@qO6|rkN%I!kMi+XZA=&b z=rz4PU-*^TKK(0v^OH@_{@rs*A-hqg%~G+?rI+v6BiS9#l_!7L-(cgsw1)k1O}VmT zLhky!uj<{rqHDG_M@)O~&&J+e6QDT7_Zg?Mru_ZBP-*tdQp=AsF8q=0x$nvz;YZcF z#o<@@W-tE{d+qF2>6I;uS9i;+(5j_3GIMp87c}=d#;tX}yp+9uJ&6QY|M| z^<2+&s(ZcdobR8vqWL@3KeO$9T6g;0p8XHjPY84sX@j&RAbV3l?40P;c!salw>=2quGiHq)0|!rkNrgSn$jUs`+Vy?Ao;p~mA4pAUzJw_Kg|$&Nq& z=J#V>9=}D>?^ph3;PU^G{$8^Fd{6!NoBtWOejgV}+PD92;o16G_CGj3$baN7vVS`L zAItgX{SW8wN&U}YXZWAte)=Dc{|s$)AE*EMar{3+?fehx_Y}Hm{AY+Mj(^twk7;0Z z_1lU6J71^ReatHL-LlTy>~yP`nk5_4f~U$_NHXm^vQZq^xG(!J|vn%gf7d1iFeonfDruCVoBE->vrh4k>XjPRhZT>+ zXMH`oZ*FO6;nqc4HdThdHQFVqY{V!~dSi{`b_R}DHg%^TY(Fxe=e79S6*nR(cKco4 zlb`umJ7P;j^5sW+g>IeZJtJ`YPORtCJ;5S7R;+ znf$g551AdlWz%ofSJs_L6aO<*tlT)IldDN&l0%Xa$DOW|52;6{3s({R&es6 zWvQ>R--omJ=1LmJ=$_o^Z1Uz(u13n%vY-apo;_y8uDa0$_g>b#XPbWZ=DPi2+oD8G zdY5mV{YPc??{)KicWs{kA^y;QiHfxEP9I&5H>75~*%SFl)_0F?Ze>ckhyLd$F`Ajv zcwZSw)*Nt{`EgFdulH|GK6o2_K%T|Ma>46aK2>7Nv+{a&|rnbFG~9)^pScOSYkoR);&voY z{(Qfw@1{KZdo*{g$X=1qeP6EsVc~x~y?Xbroz{QP|Jo7gD$;;!6F4sM&*ys!=K3FI z_SpF&)%>Wg)SoGPE-jjNg!OitTgvWfDv3Kvd8aA}@YeiZS;O;j`ya*qd^x4}IkT+J z&0O+g*VN`0b{y9pTw1r~W!hA`Grfh+qdOD&?o|4%dKx>$*j-=ZSMnqMKWf}dkE*w9 z_2=uIwFaufFX%DH(9;nQQvpPn({|WmSeuj=l9K*!y$UyIB|3rAynZ|FZw| zsn+lJ0nNw$EI-5__BF1^5Zkr)$CkF^@p~`7Q{1-wvd!+Knj_s=Kf6*wdvp?#%igd` ztk?TN`bmNN=B%H+PvPIieM%peAM5Vtx_$QHifOX$-kUakIC{(D*16rR7adLSuI_AF zQjj-U;i&qrcAXO(rt6KTR-AupskS@r4Az1YMysrANiy6 zgYmIE_v?8ayZ3x}Q#SwR{TuN)6Z3LU`ft9GRdA>Fy2{(?8R0$8>)afYtSt6t`9|*3 zQf;%)yMxS zUHmemM)y&#O4c&L{Mwa{S=;_I>}$$=pmMz6?e8Po=Su`P%Sq=wy*owc<+5$L_VfBL zN8h`9`*qRsk7sS)T>f&^es^rV%dUu(6rX4G@Sn(^`0YP-CzaY7AD(q>{(`gevy<0k z$)|qOzg!r5aZT`^&db)R9=Bz8b-i1cCv@pn_%Ta~TcI`=<=^qs=} z)`j=}Gd$h+q@J^F=Y#&6hg~nFe%QBiRrWpE%f~uy`OavQTsP~^-#MPbF`go+vH@Na zJ6QcCZ9@4Q&p*y*tULWnZ*K92?*aYC;-oJ=+Sq^i`mU)x>4CE}rleVSESqNHX302b zrG#aG;hg0^o)wefAbhwe`0f^^OA zZEyN6f2;ekw)|u8;VRHtkJ5V!7r*jdezJav+sCMQw+lxLS&sznwpOUfQ&QzN44$~- z>)P6vUQ54U`sZ8!yS{Jw<}EK-`R>|W+xD}1|MTp{`rnpa{^S3szbR^2^TYM*uk6&W zm6mqtZ;ptKJfd5+%OlEt=RAe*r`n(LER}nTjO?uAD$XCZvOm1?OML4;l`HFXO<&8F z{X45)GULgnK6cfG#j39sdpSNno$hB@7(4&CZTZo8{2%HTKFVc2GEeTqzuAi;W8z*h z$6QFiGAq>e4oh9=+kJ-*N+@m@lQ?Giaec@AEz6Jax7|u_e{?Qac_UW*>z*JsdTw-?Kl7D z@AdWe^7TKSf3P<=c3=0=I?0QA`lWZBxoa~vzC3tn4g1Ny(&>URPHKWPZH`U7!u(|7 z(W<5WM>OX@sN5SS`0q+a{aV_3^iOI^;`a=J}A3dW2)E8 z-&|7NYxO>Kda(D5t-G6fx2`d|?XqcSo4wKvV|I_eZ_L?6$6rOyZrQSZS|y)yFZaeT z3@O*o$urkyo=tva-}A>ddDXsF)AlZtms_Q+eYG?vy>?ISG?+e&OiS8?KZCE#Sb!jzfS#f?V40b z)zsblR%WeX@>}QO@#mh5Qk*0W8!_Hv~}{F}37Yc6ya zluhSy)Si>V#AkiI;bu+M#}<1Q@tFPnj`?EVk8ake>0P>&QF0<8t3PGCzTnLH8C(*} zXPFx^7d=;Wf3)fDr}g0_)9<~le!1?N^d9_{MGsucxs;(B^+EO0VUh3Qx#=_dj%T-j=Dd^aa<6>yjIaE+l8?;_oBYUw zcUkw!S9ZeJ@=Vr7hh2W77kBL8rF)zYdfYY}ZvI^JS=v{pUq)9rPsQN)@|y4;^X;a7 zD317eyltJD#}+kTw?f<5O824;z4|V=d{%SOp6ElT<_I@Q^jy`Q{NhUZsf*W7y}JJT z>4xmH%a2yt&%HNaef#shyYyu%{?;&7J~(S+YxFpL*GInfV*Mfo7QtLUgS=JV&Rfwb zZ1_ie^RnX#(@aaBMljh;d1ZaS`ai?8`PYj?*X%6Y{^j56TF{0t&<<|~?O8wit5-z) zP>!h8cHH+-_s_JqJW+YGRreHo#pIsI?o8@ooO@i)f?d*r?R$4k@bVw(hoky-ukuRT zu%UI?gcWzr_I0hkICeD>(-{SzP+QLGGWWN>w>XA6F8Hdn0=*1?@!=kF1{|m z>EG66(HT|S_J8Cb(8uGg4WnuwQe`((BDOxfAWeBBMHJv%)kZ=}m^ zsuh1QzoW+U(fgKv;@53rd8cmgFtt|Mv$O5c-M5c=JU=h$xaTPuq3C;Hcg@Xb+gDa? zp7M16e+HFVKjUM+`fu&Y+@H>I$h!WMZtb_rxl-?ombU~Sv*Y~Ge>k82-pjR*RzKa^ zcKC?Yh5asKy|OFs7CHa9{C+A^VqO0H!Us~H`MdA)}rzs+bIiS9{9z;tjE@uY8dgs#C6be^Baq``2nu8}k(v-Vs;7 z{MEbd@>})5)pcSI7VTMSKl^F3gppVBCy!5$l42_lKP%N*H8*Cz*q5K?zwSFc{Z>%$ z^7IeY^3R^DzuaCqL;hHu){k$~<1~KcKVp11m+$4n4{x9DeK+k_m9bmkSIsN;I8RSa zcX8x2Ty1zT-P(e`mG~nT-51}iyKX1D_hZuw^+WeG8vPdjo&R^Gj@bT`^SAH&3q6Su zzmXSw`uFU8skyt0+%q5O_lW&x5Zrp#@3Pm2XIo6VrB|<(|KXwjTVwOANB4q7Hm+Lp ztW@!8gT7GnmI)rpmPg~bZmoTPe|q(oyRW}jirT80{QA9by1(nE-GAc43hXI8{CMT- zLyA96KJwVd{NwQ>*DVHFlaAJ}Fq@og)zWtB(!B?_R$DsAUg3y)xKpQGG{|5_sIG>^ z`E2`ndw;+FuC%zWF5}&;pP4^1zi*zo^GErkv&r9rKcqJ=y10Jnz7KD&_RU}Yvh1^N z@7=F!dk&drTRzcHPXDxcnx*q{`O*sy`t0m)-j1Dh+~I2J`u6K}${+qse^l%D(}sUl zmf7@KQ?7}Zomg$cAoSuB(7JirQUiR$XyR-ipZeOeaoqy`x?FqB8G;`KXKlS=Q zL*CDu@!2xV53N-`R?qd1^GCjqopf$?q_6YoT*Xq4>%WZ-WgkzCxBRrJcG}%5xk2TX z(INAA3-@nTKd`@JzMy#I)f(mX8P|2q=4URy^`dH8&!^KI(I=kGeYo>?j>hb7ZZRe~ z9*rS_vtI1axZk^L{h{3O!{4Uey571+dD;D2ad%Fx|FO+dbz9{_!5MEByUY@Pc1rd{ z((#TH98Vst{Nwav{YU-7{~0*GyzdkXtCP9-X3<66QtkIVug$I--M-TUY_k3whF?7es@a)o#*g^_sP9w$F?HdK zTlWv&%|0@Z^%C#W$M-YDrEEnBoS`s^(2?kBUXD`eOs8j8*_sx~ovwNtMVyk_#B zp(89#@k8Cjzst=Qms~G@b1_1P>1k@mf$M=*3o0h^9)BMg_rmOqopJQH{j#OK`j4*O zZ~x=DtUm3fzsULYe6iU@-}8h|e!6EKbh`9e<)o*Iz88MRTRz*nMW&&)HuKm^x9qcj z|NeXWY|V6Ufm>4_Kh59$PwqcMm&IT1M+(>fsZ}gL6!3ELnkd$-b2ewS9-pdmxjkQj zZ}(JDncrSpH$Udp4!T~Wxh8X$-P*s+tM1fI&+*6mEvx9h&_t`+0Vo%O(%XOoVR$tDZ^8CT027Ek_k zE%@n^n%j~0fBoC{IYQHNUF^I4>-T?tlko5Se0H1i5BneWcc)(0-@?6~+k0_lRBg*F zw~4CL=UjV~c%b@rTWsUGM-P6sxhkx?#=H1q_rZN?Kkk2UyCmy<$ldq4Zs}@YuXrA_ zl_G7gq_Z!a<-aA;!j>U0aT0@`XvaZQ?{7Ijey;fwckTU}^``bb6|>IiDqRko9{6F6 z(NVG4rP>y?Ru610y_?C@rmj)=eQ`|B_x}tsb_!Ycd^OV9)l1w{N^PA(H(t_xXR2`Q z($#l)LV_j-BfYiWpSy5+$Ai3>C%J*1r)+NDjlMqHU%z`_f9+PI=CDc8(Xrp2UO%5d ztFB7kR{rLHhIpxps|i19OOqeYsPQkZ&i(M{bnv#VyS87>GuglK+y+n4L$?nr`$Q#5 z`K@9$lQ2A5vh6>^=Dv;k2OpM}CqAusW$PS#ZEeZ#$lNp2{41+IZz+s5;8FFMpDwgU z@9}fKseG${hi1>R?OV7Y_EntN){ky8A4SjQ`p&56_LE&^i|UP)jFb(2 z-m$Bz;R)TV|82oXmv8fA_FgerYjx=OZ?{b^da0eBkN3ao^qk{XX@d!S>tB)i2Ig-)F0s9QY&s!{bBd zRVvfM-H(c0pIjMsL1E*@&AYQt+_({^dQ( zkLf*l-v5WT9fC(-IL#3+;%g-Q(EPc)89=CH}Sbv)mq>6{^R#=Tkn4c z?mESkSzqEypZ;fHf10qUOOpqEc;v`3U-3B$%XbyeS$J^yMF-J+hyOG93;$=hUsBut zZ=U*phJCNjiZDjMvH$YX{_l798}%pP|1)>=ujK?>EX`*WP^X`DvFv`-$iN)4vZ(@5*~F*S5o6=-=6o_kT18Tr=TK zx_~B%HZ7kgYu4cxmtXD@ z@5sqF4ZY0r>}}XBUb}m)6Q5-)a8K!JimsNMvf%HvYtyFx^Ua9<@@4zHt=7vXzm`Ar zY43jq`RJdSZw%_rZ=Y17{V~3^lwV}+wUz5$2WKA+Yxny!>E6?ILXQj-jJ7>-?{(vD zI(^LX+1~KOAF{69GF=k&^zFRGtF@9Iz4qT+?V*x0FGzh_m}5^tO8)w+pXEpE#cQfR z@;?x`wk0?1^kJ!Evma~Q^8I?aCHi&QF6OYUt+5J|9%-(fI?eH!vtrSySR>mya{?XNE6`Xmx^UfT-*^;ep@o3iW zr(LfX{dDxKijB;ETDxVg?Mu7f?XRAvzh0g1{WGsN)3v7a!{o>HT{Ze2;zQc1mtJ4D z$Na;;d_mV0uY-Jxi%UzUn_q0SY4h2Ybj#lD$)CH1FLtF(UAF#h@WZoZ_q{i6x>`D) zInIAY?5*2tiefh1xxw;ThWGpy8NEjZ7Rx40xe|8n*DuZe$0v4fm)Q0_?DF-={~0E` z@5VMv4;re+g)65YuW$XQ^kd_Km%H4$AJy7BZ0LLAzcTXGESFiW`&J6|PrDb|_v~&< z^@cMGKRjSf2-#pcU(`z8^GjBgdtI^lk$%OL<-?Irj`owSdNXJ zney)PG&1hpJNI{FM2!D$yNc%lo3EAj^8NO>r1w=jdaZEatYe8L0Up7{M@}7ePMCU* zli6}j{4Hy*&C?(JdVMLrqSwZ4T4Cld8!Pk9t-88-`tKr@whQU5`PgY+^}@5@2=Cp^ zfvfyZuc}+WbYIrB8|yaizH2{qZS~*pJ6~RUQKz%Bp7X>%%`Mv|9ST<4E<10Mm~5nv z&TeHPueUiYttl+l^CxGj2xXu7lla@@-jCTw?NsL7K3ew8PUfTD`sJ^q*7nSDpIfJ= z%fiUI_4%AdB1*UHq%^rEJ@tC`_vV{m zR^F&{=7^A|%B1Y4_FZ*G*Wd0l`myNW`OuFJm%ro-ya?l7ck`7#U+cmn{~6NF&9;{% z1%8`8HTC=q#VPu?lusYsFzedMFfaY+_n%{}BCAVYEe*b5X{d)iD_BG4j zJT0)vn67^0)jjV1BmIwV{IUN#_xHyYnPK6`NZ*=8T*PuuhV+3wd3`VjtR`+o+G z+W!n~dG4M6u9d{U*+2caefr#Q*4ZblelE}Z&#*1;{r1zxtvA_R5B`|n^+*0=wb$%` zeChf_p7oDEFY^>R_WqyXPZQ0a?mQ*-zXH3CKi&E1KSQ0Y%=Xi>uKe)*?OEgY(Z6et zCf|PX*83jU_~t)zUv?wra6!|jbDt_cyf1kDd)p?3r+y#xrOj$C@@5`gyIy|Z#muTD zm%fI*=$F4K z)G3Jv6&+t(E-@!c_cW!pnJLG!h(w^{(Ju8e3tlFupkizxN`{%uD#~x)oHri9T zc@^IphZ7>JOr^r7ADaAo>;84yrFO4)yZ1kXwEk{yIor$+;sN!|a&jL+cAYVkn%sALxDuP>>DY{M_m?)QquH@d-L z`+k?+5wu;q_2BktPc}==Qr|Jj-e9wkKx%)~Rkf!U-gjR_h_j}DkD2y~iPP-O zPe~SjPARMx+n>JI{Skl1AIDXha_XBdCVy0Gm?wQr;YvyVTc&HPJ{x4GeO~-2SG`cFZt}rNKVExHf5h6~cD`Z3RU2uwZqqjtomVQ%efIBi#m$MQR^3ir zFS#gHNWfdjPxtuLojJ4fpNs7ml~&!pSWok=#<%#Esr#P2yZ0?N$oBc8{5LO`S3EvE zt8DJ_n46j9X1BwtPro&Zlim}(g;S@&CUtp_z{OJ$ZxZ+}$A(UkGiZO*V)ieo{t$cS zf>n~~H@=Hkb^G=_kv*W|Iqd{v&tb`1g%8;ug?HP@SA4bcT)X${z3HoNiG5lQ+De|@ zBsrBw&U;N}-o!g%^KPo1`}|zu+p~Qun^JbPStgg3l=7@;*K(9(k$bd#(xmcf->+Xg zoBea$^s6!-3#SA{NBes(|MB+Tt&F47OlO(*UNN%JI#t~qXZm6Hx60ig_&aQ@i&y+u zlD*5l<;&|{^}}J@%m2>v_O6^|^LJ6a>7SpXN`0oO5eEW_miA@e$WPDCwtVJdWCKaz z=d_>L-#mP*PIvyX`8?~ldcQ4tr7!bBylUElQs4Nu)3yh7b9a~irLaHqXYJ44x@MpJ>(l?(SAKcdyXeP!ze`_#ytnbrUG>U5e7f<= za^shcN*h}LGhA`_cwA{_OYQ9{^r{EvQkJ?H6o&IiZNpE=cRnzExxjR3SxPQ+Le7fH|{kqDwQ1RgD zsrmYXozjIqO>7RL();3G>hE3pb8GqTSHTxGOZ@)L^1T1f-?u;ENA+*B7xTD2*sWgd z|Bdsjuh+BaXIDzjYH4%4NzV|wD|L!-M>NMHmypAT4<;R%9BEj0uKZX1!L!*107^GpPqeE#?Mo~ZBlxjao1CC#*TZ`}f3HUKlzW+Z%n!iNF zwri#vuG(1l2AxuVw)W`Wv)iW^v(K7zR9bn*&8ZBlRhqMRZBP&Scj>M4e};Cq{rvm0 z-)DF~>|eXL^xvL+uYVoyckljp|H12ePTf6by=&Y`ch5Xz(Cz23Bt109QSY6SL%2$w z$_L)EkP|VddGbzg+0+z%xvN%p-Jbspr~m9XR9iARI{Mci@X;@tBA}K;@jSa9wTo(e zKYYAc`XlezhTksc-rr=M9vw@WrY_|6+H_B+($vy0iNq74&-StZowv2VWfp(O+daIG zBiEM7CdW+=&z>)|^x1ZyBM%<2^<`gpx|8M1CS^SB7g}E7IR4 zK78%@huv&>{)!)c=Z}V~=?XjbyMDA=y3FnScB77m>0Q?znM_W$`xGI(ReBDWbk<3| zRri~`KX~1)efP-o-ur#~c5VN;`~7z*d71wV>05q0TyfLAw)o-iTE*P6Yfs<#QDnV0 zG&!l}>XyBg?l-qD)bQv$$EnbKR&D8r>SLGwr29)n{y2U3?ULKhqVwi2&YCAR@0qXJ zeJ(w*dnx-n#XJkotQ8huTCc@nxBTHr?UD{ql#SGmM{C0Ro`L$YhC;EHQ@oPGC*fw?%Lz& z^;Z15T=n18r@74jbT*&gT6AB~WWC@I^~d*{N_XpeefIruZvE-bTVHm~MP}ta)yHY)XJzhoE&7sM^K}3HzmX<){U1%P4VwHV{QA?p`ExJ*+P5dJ z{^q~j;D_ejc5-*K3ZpX1XC1lKH?f?l@+_0>rF*M4|ysU;bxcU->jn@`v-I{d}&E=f+fQcbk1|pGJ1>_0Q>CUN5`$RY0o>Fs|%kccfyFX?hkgk>!K=l2Y#$?l|DG@%&y$O%Vs{-b^7$| z)yv0y9?5M#jACQj#RV%1=PobK)jqJ+w0qX8E%o2t>)o64>e|)q>z=INn73>F+_&|A zSYLc`UtHt!!*TVG?MHw4ckQu#B+V-wz4_9+EtzZ^7lZH0$St_Id1{9*r>C*d!|kF9 z6HXnLc>JzD?Y-Db-w)reAC9xR6zu#VZaVMWzL}5m`A)CYj$QQTcecd(+=CO;Pc3NU zoTlAWWUK$6x4+Bee#<|JZ6B8&nI5O!ced`P%DeR%Dw9?#>+>ot3{8_U)|UJFFn(*? z)<^!l*>QT;RCat^yKesSih#I^sCsVG8=Lw=de_ON?fX<;zC~l(e}?|`ciwz9{G7JT zH?Zu*?D}%!zp>NH?-ckgx?DEt_s{U3vETfcwXgaU_k%n4qqA>g!hG*brN)}mKD~-n ziE%GE#@6C-LdETK^4(pUlDAK%vh#MmAnIW0e$^invsYwWEpL3kg_9ZbgvWvb8Jy{d`QTKbZok+#`_{$=8ipMLGs=^c{E zjdv~HpX$?|x4I`$(=#^m?{QaCb&F$q=UzPiCt4$Tb&ueqD4q{1cN|eNZzgH!nMEGGyZ4~`w?0BaBcPxDgVyW)f2T&uFSS# z@=et;IXcZUXIf}@XwRCIe97Cp73PFb_;>Es@{TR>U8^6S{(ZOVyYH1fTC)zD?Oh}5 zxoobUm|>-`c)pT#>y-H_qM@I5xIVZ0`S?D+eOHa@%FK(lxBOLH7l}o!`IXZ5`m8p|R>=*t! z)qW^W{&MPvfBTnb{4?5cT|TZmXxoJ_Tf>y>;EddBhb)70cWl4a$g;38;P%OLW}ZovYm^MKa3YVD>KbXzbpUQ?k$}QJJ-Is9l&Hgd(*$7UHp5Fg*Z>zvF7&8I<+6d zhw7NF*Ep|yU2b&a(B^y5(kqXAyP*5BV7j#UoA1RZb3WWQI`!~yTg5bYib{58S?Cqy8YDd)wml!o@G8K3x0Cuf?{_NqL%g zcXf4y(IbmV#T*_1kM8#IsZTg%4y;USIC;)9S9Al@) z84)25h9b|m-Vt?1(I>xfu6dJWbmyL6j-}R$H3e_ZFy3N%_guEnL0as)VC?PVHbP4- zcrCo-al>fW#0icv0<~FQpI7L9m9?|@^7?i3ZC|5XkBWO`m(@=_P~o&mYh$Rc(gOyz ztLH^C-By0vy{@$XZA|^b_vdeZtDTd4_V~Na&+^XM<=!}c+ClcN=<{itv&=TR+Av4u znu%}CZY-H>bIku9XW2^DnUy^g7OekXeKljzwRdmd=bgOzc#?+Cwu{rI8vSR;nbQ#b zV}JRET+5#xk0loHyUqG`?;dCRw2DP-Ix|(u8G~hus_RU{eLwO|U$cAJ{FRaSt#~%w z_M0RsYkA90+x(E3{mt)EPg3;)XZ?M`RyJ|!)CPr>IZal7?=M`Veg3Vj^x;i$Q92uA zkFSnx*(gvm{Uc#jaI) z@To36f!n3%Zs>-GyMzqqeZqPtu_W@*pBqZv|K!vETYd6B!$Q^k05{#CN&XY69c+2PJ3DwOpQ||NG$<;*k{8)ASZDzUK;es+Ak4GE#XUhx4XV^3SaZF!)*!J4qNBr7{Z{==Xt#nrB zd!B+^WLYTBs&_xr0?bk-uw@_G!@E9fKTn-Z#d)7?mrU2Q9R6_m$cKt9Tc&pvHP=4; zJ!#}C({w8{sU)=e(JI-74CM!+2WD*fxqgwa^_P9q_s{avzi4-&XqVTm?WOs;xu5c0 zZ@s_r%iiS&&a>S3As_mo$I7MI@v7dvfA7Ti?Wj!InI6e=uX4hvP5G_`uO75XH?q$< zdvz;+>wM4KAuRqA@?rn($vbLh)MSiSROO& zajO@Pi~CXh@P3C)F6dyUIJS@T)~|eRzWwzY|K+-Cg509LwwPZ zJztfrw=Z3&s*E~kH#>8|-fQ#IfBe4v^X|=GKj*u}{X6@gfu|<);cac>#V76h|IXE$ zFR}6JyGMP`PNf-{1epZ$+ZmmSVAo+x3YT2BI`&8G!}Ee!^W?6qTf6Sz6%&E5L(dKe z?c6+3S(V%6El)>kXHVd?KHn3!-9oOPk{2ksVaJyF{z3J7x$IrM``c}J+5af|rmi@h zP?{U#S)j@-;v@FT;dGYR)#eqE7S407-WRX<9ov3v*7a}OyG(PHAGJvBYZY(Xqn^9c zmv`^mITLS5FVdO1XZIAn@2geTtVt5=G0J6IXIAvI?fT{a3{2Vn)_1q=^85Y5=XSi@ zx4%kX->kW`zcjD?sucT?TJ9tFbgtK!ChI=9w#Bu}`*!wTt7liVye{b!xqUje+Q_iY zLC)y(r+Hc4@g4Vs_kJw-C%XNkTd#eWg1=F&kgk*3-rmBwXBl!IB<~b`_MxKN`@{LS z@#}BCx;1s#^wLNCJedYB1><}+n*P~v=apKcW1{3I6?k`|^{2J6&tee|TqpxlZ)Q`$u}4*ZeqsL~3f=;zx6~?(0vs)-PK;eRugqvwhDN zFF*VDUX^@qW}Wt>t!s;;F1~rU=dHV2#o~SKr%OLYc^v657hq21u-CMBQIH+CujKEv zlnW_8IzLXSNIqh2r+(pj@a1W@19o0na;2MpgZtFzm6Eqoc_;3i+wrvc(}P;^kgIE7 z%)htJtDH+?(S$=?8Vi{4V7}-_{)c_pA6;)x$I3Z@8+rhUZksN>90a z9yc$vDJ&LR-qYL|bG&m)eVaVbAKQo8`C_lP?ed$s=oQm*xA{lwRA$b8n;4`2e%*3i zYmKSjR&Rd$vw-dD@fG2x>$k>jFwyU_6W(p7@Zmp0&hs~4=wM*^I++#Ms?EUw<|9t)9X4byeYk#B{ zR_qS_@N9E^?zhu>`5vy{a`Mt6*N7z*^OSzLaXmP$%wv#v&fx^xp3leS*nXJ(XAss& z{b>7bX7Z!EX^-wbl2v+sL?~+AS{qdtA=`$TNxMok8Y3SUnQ5>$cG~e)+;;ph|B!t1 zF0+NQ_uKVN&uU%vzqDnpi_La6k2ik~3-54j6Fhcg8t0r6mPdQkkJ%|*{lob2`6Jyg zaq4ZCkB0wKyvWx1P;-s$)5%A~ctzLvZ=K6?W}^8m4+A3|pLOqdn3h<4y8bPD@%?iz zYkw`j?vk|htoOXw{k-+u_fI|Fd*kJO&QkkcV_*IblegiIbZ1u9Z+!YuXL`_$js3|d zMVtLjGUup>drW!Q=dW%(OaH)r<{wqHM!EJbzinpa->f@zKYQ+;{hM7)0&1UI-kNUA zS3LKprGg;n5WGLZ5A}uXF4*%YUi_1--cj1CeI!@*d}GP^O>G;eINB7&p9z!SqO>4+ z?kA4>$}*4bia!`fe&mre{J{I>pTS4BMfrYR*TsHK+gVzguYT*xThDjKB9lZdt1~2+ z?40K7-95Wo;^slS>E3_##ZRxkt37Yh{2$SEv+mmO{uP%0I-m8A?1$|QrQN;Jp4;|M z4w}Wk>F?^0tM5(4yLfN!SsYb6z=w%nQU(WtA%Uim2d%|>HX^tD; zati9Lit|jaY`^#~KY#v$OMBhL_kR67?LUL^sV(n$YK(XMFkO`y9@SF($A0s^-Q_#4 z z)wFV)Ypj^ZtZ4ztTwl+gHbT?w!sWlU!%FdI8@;{@Jp#9o0^E z<_Y(n7QNIy*@F3Wk_W@Y8~@JN8C?3q`{Dkh`rfa5i|*yK>38UrT5SCt9W-rwz}mH& zV(uQ6ss7JUu6ioQ;zoq*k*4Cwp4wNstK2_s*MENY`p>G@i97YvuD$(x{n?+;UF9#A z{&jxTf4HBuV!D&}dGWikFL&AX=c>;v=I_uqS)siBmd!!y!#0VJ5A}3<#xTFip89cH z^9Q>}@$4_^l&+ijTJdi>$k+Ms+ehulsQBn6o2g=+R<5OT4!2}ws=^I_C!c*Jn}49_ z=8YfDVIS?AX1njdQfm2Sb+^WUhFTfRt)7#fJrXl$Q%n1Fc%#9hx0VV&-yi+YAo(Lc z>hH8Iy6jOOQ_nZ<@h*O7AyeV-OCGzOR4rrMGbpQ{TDfT>j!&^k?m=SJSR<{Bpi~i$2qzkSi)5_N}hWUXe8`;`7tX zYf8hn@7eAtu<$dBkWQ1iyH@DFaCR2K;DaCTD*jkA&*hL=Y#6Kdhd^vS-;I+^VW3P-dx?3$!98p?k+9e95x}(@Yu$-#7Uf@uf$ht-;B}W z5i5oz?H`N3MMmoFsr<0$*!hs{`>*V=emMKLzhLIKYe|w{BDtberd?krF@0L}iRUxR zTUGKD1V4-NW<_uMD|f$a-+uL?OY){K|CW8R+j6;v@!@&S7j-IIH}&Oe9gp0#>&jNm z#aGmO|GYewHZ|?Dx$|X(W2b~AW4Au5ub06GsK;_F17UQ`JQ!K ztu@jkdDd&}nDxprUt{8)VvLw+)V)iL zDrYa4%KLOj(1M+xzOO&Uk@Fzk^z-|UQjXHS%8$}JOx9+5f3$B`yJDl$d^F6gNmq&c zM1Gvxu1O|ok;>az*IWwR!K8H9q(U|Q^scU-W@|s)x~*@rRdcQ0@BMr8Ki~VkU+w&g zKVttGIQ|$vkja?!uf6nPy-=);?~1#x7yT`*?i4?rn!R#q*^UXTS`L<31X%F|*LZ$7 ze;`iagO8Q&m+iaKnrqiacv_z>wY_~WhI3JL#qwGChJUtYJnZK=z{)i<@Mz8Nhy85c zAKeee3z@Fw^}8Os)3-ce((5DtWTi{qW?jqX&bQTh>oL7*Lz_ipSi$edWzh|uqSq># zNjo^8uRAvETX)?olYM_hubf8(FuzHdYP;j4+S_vu}#kr(UR^=z?d z&#bvmtWp|wX+weGBpQk!=DPtJbtI>zdod*A+u{kI+Ebk?NT`}i)| z_^!+-|222{t#>8fn`;xI!nt{#sdt~b!4amYQee*9dsD)~bgL^v<@dg={%vO;ujh#R zu{!=@&YsA`lIycXTc2&%c3szG;vUsW@`gS)!%G=f^UPy&P`cl<{txS#iq(hbNq>~v zxTIgmxm<4jl1y=_Nr%fmyGZD4JS1b&kox0b+)kPBp1Ldl83g~y#=VaJu=4AE!5{I5 zey#7&T{u7D>ek!0tKGI-o)*0QRA!{l-F|;-gFH=r`Aui4E~6QY94yxm3?#h=bzeH``4~{zrx$|q`mv+ef_^)U3aT~-t>>} zNAv-!qsP`RxgCFUr7!!Dq3z>0?yYKz_d%Aw&(_QW@|D^BN$Y0sgneTeN;m!MZPqMpTw{7h^ z_-J=|BG2Q~hD(H2vrLSBdc)v3!*%&J6~^BxcK)bb@M?OU&gC_KblFy#Elhl5VN+V1 zmA=1JQ)PKT-`lv#u6IFkZV5)(=l?jT+g^VY{-Mj_-Lc7u%d@1kD*E14m(I+YCfO8w z+Q>qyW_9bmqXz>5Ha9Kk+MoN|D#x?ArzeM>eqR2vF4xY}`ls#G z^9#iv&TskS`5;%=ZNlBxyI7YV-L^d0_S)vsO68Lxiz-yj{k6R{ctm>79q3`6`e>(r zul|Ru$48=UmmfJV`cf)v!Hc!oi(mcI%(at#^4Tul==^z)ci|NWgp_+ex*uAbf8aj@ z+dt+CJ&l^!#5F$(_DJO}+qqoc?%JnUk7~P=K9x808cz-2}jPE}i62Vs|$FImuWpAvN`!=c9FTZ(VQC{B`gC zynAIYe8f#m?w!7L{m=GK`-69UjBl_LG(Tpservpt|Dr$6*Cz4n9lCt%`>mAk7Y|Pq z5G%NQY?IQTJC!Li+lnX8TOae`enXw^>t))jXZz-de)uxa_w3fT6|cViz8Q0{RK|$+ z#4(j*kpn9=_%3(-Z2s2X9J=YFc-IzPwf_ugL)fZpEB?BjaveeX{MyrqimI z?QeE%e(Qd_V4>5crfY_u>(!rrzW6iy$>s9yNpJSO|B<`z@e5h*hc7SHsZ}gLW}LP8 zp7)yDkvE-_93$2|Y_L2a-H}#4^~N@KY4@CYo1Wd%-d&k6&tw*7z>_P;htTaV%~R2MqV?wN#;o|u>2<+@t;Am4$ys ze0Mob*0HdePk%?9*>uc&vsujn28#!Uak9VlVsE~_FZTF~jpgdd zY%AXBO>4jWUVb^}=$0~}+6AWT<{R)7+X!|y+}?2bjAZDE^T{8-AG5MQ(%)#6-noVQ z;mV32d7;ehQoYx2&N;Si+pS5ur}yMA8Xmhit)?fpx8EeJwCqd+t+)xf2JLJ70+kKV18(=anSs@4VylMSgp=B`=`47d^HA1JdEy==o-yH@vV-$rR}?%enz!gu5D^WIB)u}*E~ z@(C*yr%pZTqj$-`@UxB7x8~(_=i|4??O*p}y7T@I?^YKU?DFqWi&*n!YxMg!6^YZT z9nJ)Q->zv8+jBGFhwhcHc7{!-kH&NE(f+7?Nb1?#m6^NJCMW3Yz13g5W6O^0$VnbS z4l8blNuJ()Vzb(T-o2{Ksk)~7?me!X)cq%P(#xe=oa@tW@5`=Qw%>aDt9iRO-z?Q< zsn9+=eILWcgpY{|kGFiLNEtFw*f}a1%LyhwtY_+^J7BH>4|1vCUl~UZz+xL{ro3A|FV(Mi%*sr7Mi{@tqxcvc}-Y^@r=b=COYG-j$O&-F^FYwMmz* z9ge!0U;b^zEl1Uz{grH?pHfsqCby_BFrE6_>xc7^{R01lUfBtK*i*=Tw(8M+zg@d_ z+`1SQX|wv<%vhd@GZ{Lp-h}boZeS^u3(fry{>ZKU;rdQ9VfSU(RZFbCF7LI`tz7D0 z@;dWj%bc~ZckP>7QXW^hJyvGg#K8OUCvSg@ z`e{AyhWs`v66vZ^1 z=*FIK<>U|NkH&Fbt`n=6_5N`Ck*cptzpY$V(d@Ks&sNWWw^z8m`{eZ5W1?llwC?x) zcc*F3N%%OQ#g5beqx|7`wo>lby3W2Ij?G_G(WX7^g6YXG?=|0Q{=^R7&q<}p)w zLQhSL*5sbGVv}lCKfm>7{fAwbZr{6QuL2^+N45F~_c3-Kk(0gtN07gzXArfCesI^{W%K3B?wdAW$<<$LD!$V^AjY#;>gU8QhClsyH>x^&*d=~0 zjD1>}c=79d{(l#~|9WF5zx(PQ`$xQdhka|^&Q+JjT4iUhS5Zru6jIc)UAtN6MA32u zzU(Wn!Z!LW|9Db&%l13#f7}7B`6d=?pWmnSCoZ4y$85(BU;CT?I6l@*kJz?rPuq2; zY>`h3w`YX8mF?cySarvZA;|N2pZ109qxKp98Q7vK&fPn8@9~2#^M&)mu07M%ez9&% zf>q4=&2zsCyM27JO5RA|jah+lSEt#UOZ)#A|Ml+jeN}C`=6qo;&n8FJ&AZq#QnliY z+`P733RhluQ2D~FPw$25E>?-R)uh%hJiF-ibhD_`x6@{?%KmoYP>#v(+57e1eDvHE zs4ez+;c0j2RX@9T^cAjrDDrW7(;n3iw;lFhKHqK1n9bYYQF^duVs}{Im3KR}{xg`| ze(EqIe)jj-{S4PTYJ7iKKCUf(^l#_tsEX>qEBSg`*D8H?xvqHiVY}O=E!#i&bk-(a z*4!~QGEY+GpfT^qHx*qUH+zN!{x<&`{^CEwgO}-D`8V&r&;HL4U%r0YcH3L$AKhJg z{^-en@@E&k4l_T!{PfC$t749F-HbgdePj~H46hUwlNWXwS4t0Btd3$Q?o6xazdn3S z{&!`T+IDm=CUZpSLG(`FHN$q2DRp#d%(}^9~2MJ$|=SJ^Rj~J?Yk;jF<)I#J)V{UpxOF zt7`qOv%jY6s+#Q5UjK*Hy7s&KWjUE2{SWmeUhV3exu9aP>vdCI({1sQFL$+=3rB4$ z>})~qZ&e@Kdu${hd4Ab;@#|jZhfk-?O4xY0#;$++7{7U-n+H;Z>%lJnnWw^(+v>6U#i>jL=AbT7I6p6aRjSwHa7ALS3z zJO7E+SbV&^y7~Hhf2jGjCO#u#@wTRG_wGe6`sAjPvMns_-UIR9XM3)$ zjw`>J`)s=IzTchd%L~*r+wAyr>U#oUthkY#_8kR`;EE^JGOnO+xzH9!Fz)X z|4#63QefFLKVwHrGJC_)84o_j)WmOTZQH&oL#$5tLw#pW&a7kIxsN#BeQ8eS35Y!R zX5*Im*}EDyPfZF9?Ku80_)pmeJB<%p_jRuPqd57|w5heyHoq!%Zs5$eU`{M7)k&5& zuK74E_27Xg;e8dKADr*lI=?ke=EK|PhgQ@$e^k1CSnOBs^gffQM^0P#&28p-`%!Ts*3N4 zC*4nXuM{)jytTM^qXok}vb$$F`6B?Q2r5?{P0$&9*3^-QU^l1V@Y4 z`^H`#yE&yJS3F z(^&IM$5qu$`}b~N*Sl@c+WON$(q+4&Y->$D)t^__Rh7K$h#K`XQJ*NY`b+$GIvpA z%whS7Ig-sw+^ZS4?o3*^=GL@pf90R1zOIg5wK7wB{p(-XFZ{AUcF*Y2Ke-?7VIS4D z8b`m3irszZj+J8Bt;LI0Je@Izfn8;S_Sxrumu*?C&y)SD_j$`6|Kzn9+oy?z%Sr6$ zx>zb+eKWc1>V&_GPiVYV_}OzwA^VsK5+hxc9k4F1S`yx)G`vH$R{gFC0mD(bCY zJkR<|Q^=%yX({ZdBE1q8Go9d9+VlMIelESmTR(o=zP3hb-<2=9sV1kT>zT5o=FWY4 z=VZRkcOkz|-bEMW@88+1Wpckv@q3Yo-~Q=${xif)dwEs#^0WU8{4clOUu^3h^W%H| zLtD1vfgi=2|F|!Vxubga%&gdx8>cSVO5d*e&X&7#bI++&c~^|)&r-OTp4@7oaJ|1m|69$zEC0lQbo*X4 z5&7_Rb#}Sb@wOWEQX6sZrRo#9d322`=eeavs{Utio#<6LJM`nk(B~i2d!oOkf2?m_ zU!r|ttG!?hui9C)rmO3jVs#AtOUtfRcDyOuQExE!n2GQ6O3SUQKgXNLnuRgl?sE?g1F<#3vW$xyyf=X%jtz{*_wGOc1**iMhiyWR$g13b0;;qTxRO z`KNa)cQ}OzpJbl(xXx<6WX0@bRq^fi4`W{cQ~!}KQh4u`(Tnn3NoGq6Hl^yOMz200 znc%zXeZ##(tvwnEU;dd^*hl_oe7u%*%l(!OC)dBYwY29~{t;fcSUV4$+({{u9%s$u zJ@;r{P9lqIxk2t>h9;>U_t$LwvdjPL_WP?iU!Kjo-~V;r-|e#g_?lNyx5j2=`!XxuS&BYZq59Wk>WGkgA%ew20 z9#u!WEHc|IJmZ4X!~LbZEpDx`To-&Go^Mb7;veT9zMEP3(DK2>KZffo+J3!`$$lNo z^5O2^wQFZ-TnpAbKW|~P(p1|SsZD-g9{sv`|EYTZ-pGH>( zIqIc+8L!^{c>SFTUWM)wKW5MWGM+Q|xUy5Nz4z6v>{fa6m%41a^QzzWq<5ttgG1^L z)s%xa`KQfUdaK`e-CEk;reo^=BdpGF#}&P$6|a@A?x~(fga_55Kp*+hcF~Z{>rvZilSTa;1sgzqK*qM8Z_%O0I>Ti+El|v=kf= zVpIvc{Kx*I^~3XrYvX-B{P5$~0e%%kX;7j(nxlggTv zGCdP#Zm+prHtpZ7ShLS@Z}^{+`M@Kba`o%)h(t4~nCsRxQ)X;gt$AXSkx${S1*JM6Qnx?2AN{L* ze4gT!FWuVSzcY1?^ot(7<)*Z!&+^_qPo-OL4(M#$mE`JXae_tWdi$S(ADoZmWnOQe zW!9VfC}Zs(ACqSv+19cxPUP#hzP%-J`82h&&*yABvM@|w<4(bL73S#q!uvR0OVuCI z)))9;@A2>Q?7&^wyT4_u5o0XcbR;2r#oPcJkG99(SFA6QPLr2hq#d#B*VdoeFRO3w zx?CHw?N0RTl>ZE8tG;}ybKmuHp1>uOdV$wgZ$aUNfqS3!9BnBV{8{8-Hfe{}@01|7Cox?w_9_1O?fWD1 zVdmGfe%7-)CPm~f+_I_1v#``=lTN^4p-GJ8<)M-a#giOZ`Xzrbf7pM-D*j0NN58EP zz5Cf!x3?dgth)8uO4-kG_fA`#F1B%1otz@sWzgj%KXZTCjOUAf%zyZwLA)mHhvV`X z--q#U`h{Qay1Q(Rjm57yLDC&>^PMY`9&$KbI@CI464T}tNd_Lb7I{yfzHguRtn2?7 z4wT-0vdw7O?fphyzCPW%*K*zVpL_3~et*l^cjJ%3NBUy&oE7sj_gEI|F5CR@$AWEE zt;W}cZj$0_`j#=}WrL9zv&7HfO`A5Uvv`f3;Nlxa~@7Kb*Sd%8}!&tC7O-cy0w zl{Y6C>TBNoy|s3U(CzS9A8*UI#c6&xe&9bt_r%P7OjrAsKGyvf72dk|%A04qH*SkH z)0U8WR=KXl=l;%nPtzuDILvy&^isv_hi$7rN>4Q?shPv!39gMxM+2pIGew`dzu&z5aeJ}xWB%jw+pg~l*;U_NCx5@rY!TW)#{!5q+Pb5V)3jeOFlJj zsa;w&mG#8kPkrsz>%@N?_xrfNMPcuyFRSOW-Md!#<@%-d%3YQ@Pqt58SkZOly#&|A z31)^d$!vStK7aGsdv(j)AYS)lorSe4=H@=z+3)%3g4tr-E$vyA$GPQieqcnD6!l!S3E6#5|z4y3v`QviQV+B@$vPzThF1i0> z%YTOEe~G%$**|B+KfHGS(|-o#6(rmS!vIsep6Bl(JG~#XA1?3sqo27nORBv6i}r2n z-)p^Nul&(nk-ah6L^n^_E!y+Zr_h#78z(t$;rJcC^E&TG{X_T6KH7NZKJ*uVIZtBC zEKc3`V*eSOta|#S8$KHB=x{rb{$8i6$Y@Wo(6fuje_i~y^w{<9eVbkP_lI0uqp!4Q z`<0{}O_pJw&Ll}5)=+%>bN17mCpE{LEN!z-*=haweEdHH-=4t5@dwr0w(@glMcwV| zXKT~Vv){VT=3B_5kojjneU?j^a{S4J}Sbe~d-S zriV=z>nv1To-Mnv%i>H+voPCD#Zq=_vj@yE`__M$eB6A#wEaWNZ6B94s?FpnymjNH z)cUsNJE!|KwElKz0&lzOu$;Omi6QiBeC~(#x9I;2Eq4DI4x1|<`OlDTrT;Dd^xyfL&;6F&`b4(o^7i`M=H<7aKAwA1 z^4OoQ->dbfhrhmdeS6-i=O2C7on0TBv)Amp`_H-856$oX_MbsK$8Y(visGotUnCcQ z&MED6ezVW@SDyAR)2{M~R_Qq_rIWV@#$G*pC@R|9X1U;z)*HVVjIvMs6Z^xq>x24{ z+~i~Czvi{;ep{IS%`)M`y~e*2MNb-a@*chQ^V84IAGdgZN-*XrKEB>t$Y1!lQoDQ5 zn&k`GS1c-j^4(6m&g%9TsqNPLuK(emAOCoMM@jz8=ZE5q=S6FlzL&Und$!n>r2h;q zmly6Xn`W$aeCu3=ZBKd+&RKp>dXu&P&EJo<-aqQk`&uZ==;igQw*Hl7^Hu*%ls=kS zEA1?LL|oAA&6~!DJ36N59QAX1d~K7i9n&lR56Oq`aZQhGI~??3?H_Y1w)H%250j!J zJ2xAK6H=yWA5Dc9_8&H&v)uQzw*w;XI9XR#N88x zOe)^Wxyi*89+z5W(&G^qF)8b^=h-j6?SJa-s~7!Sx^J3q{6zKj@qcFhXV7oE{WE&i zpW2VhJO0W2@W{TUwlr~h)UEgC_1Pb;{@bZCCskBw^F%qr!+8^%wLVUabAGIU>-f>A zyt&K#vlACqR0lL4npUTBrKGo1ZC3W_&PO|Dmb@N{%MDK+`QM@tUOv-`uxpw+y3_45_{f? zYq?6No*(+~!#!rwXMgeH`#0TEv{igmdK@i;&oNJ(=rH+R&4O!XllJa@`Q7`&e*K5b zzAt(7w)FDX{>Rz>8D?GBzo72Iua^6Q`!|1`YyT#S_3PrhFE_d_i`jc;;@ieWlfVZa zai*RrSoWf3{hZHn@Aw?-_HBI) z8Krf4WGvCC*%nYX>y^%ek1h5hirYU6{}ZtGWeqrOcYMe0D^WS=3hx+|e6~68q)D8+ z+#9c5v+>{5*}MPjtyWe&v$(S6Z{F1D_v>BP-n?<=j_T4qn@<$VWW@F_U{>2#@I(5y zU$(s9pY(^XWkbJcuCWPR?z`gVt;|55=~31?1u?GqCGVAW&Z=%bQ}#djFh==?~2#4qBQ zwyXDQ3&ZUw>o<|h%OW0yCCg}`N z2u4S<2_DjAz4m#&QCQmNgSv{!46G}!fBMhR_MgFCceb{7&Hb;RUq0Vuu{QWy%SXMr zA5Qy!l$*ZtMV-Fjo!Fx*v&!mZ_ibw8Ty*>CP2X(Q!>7~UMYPQ;yD4`e(%{hU_g()P zgr8s9$CoW<@!{%`lVU9=SDaJdoDzLHA~x=k$>)H)@a~Gp;HCm+!CC&Cf7si7KYZ_0 z{>ZmdZ11Do`hfYNJFh+1;2po|uI;X9ooN%NO*pto-DJ(EVY_fkNx;kvH$HqzU?0--9DUg>qvU^&N<2) zTb^{em4>Pay?dm>TIBY~pW%9{clCbzPyZS04|c{z+P?i)|MUmwGB*k6_9n)oH6c4c zy3Kz0($t^t$K=ERq?-M^qSrb-KO&`bYf7D$+M^KH^8C#VLY}%4Z-h7enEq|X$M-EZ ztPgaZkId?Cs*zopT_?Bm+Vg$ab)I`iuWmKmB9$ZXcZE&yo+74Vm3FOzfgc;ch#y_w ze)gmPf$w$VxgIa4S+#7?{k_I6**{~`YX3aD)Z5P%ZAsmEp!ndr;79ETW&MxI>GmI~ zll{mye_5%=GpIpjcpR;i1eE%s=-!3WPxhQ;RrD{Fb zC;RSru|3Hj9zRNNEBWoZ>6*#h{JZ9LEB)YPRPqd)%J7xjMUA;;hDKYIUJ%d>B}Csp{Lfs^~i+lLmD zoY%Q0>7H2}nrZWPo~J_1J`afmo3sBJbW7wI|E~RG`}jYDV8$;WJF$;$lT3p-w(q;L zU~6>!eMgfILdiSI{yvI6-=lf3pLcuFyFUAy^M2Wjt2{nhcKFcBN83Mky?nj+ap3Q4 zv1G};pEo4lIsWuh`J12SZ*uGR zpFhr+{NzBI#Pj~RX)5#7`&{|@mPYzJ@dvuRKl!dE>`&?T7p8gTwd;S#+r<65V0FCZ zKSSDko{G-_lWskZ`B-{)|F@#pL&xr2%ums0GIH3LvPgqNI^4+PX-R! zHkJ=oeyz~?D$kWA5c5K6O~LG`ow;dhXU+=e+^Lzc@x;Qmy4*Du%Irt%H~V*&_H*Bl zaDMz(H~;I|+O7A6YS$m0 zlF&Za)q`wqEcD^P{=b`exnheJIaapXOXyxc0T3=oQllpYsJDx8xSG z9x;qs5gNQJ&*xDZ=SLAcgwb;3fBW4%djCjm@!>el z%SW4!z5S-`9c%RZt%lu270=0^yJy@o_4GZGwek4=WSO38={^4$Qg`ck4F0-sxS+C-kmsnczX|o$wl{8%`-)2*q`#|E$jX%Zdg^!NRee&<#o(oeKOfuFsI(BX2nK>M%mW8>h=e=9f zrMgDEZh!shrTewFuMWKA`{m{LnqT+-OuAUnQoohG>;A*uzOB{j?SD*V_G!&N{Q1Z6 zBUZ6j+h+1^ayh3jcdT;pJl#v{CkS~?=lv{pkZFCU{g&h1sTckTuFI z_15{Gxwu4e)3h^Z0uHG>HCnoi7NH zkBJ|JJ|F&7-c;kg%)3ymyLA1n%w4T>pH6mExp4R7w~X64D^z+XWh&O)47kazwAXG= zm3`y(4HfHU{LXGg;t3JkCYL#L}qODoId2CK;q+^+7dN5TW#O^j47oLD%? z+BRM+&b*}psIvsXmL9FR>mI&8J8 zO@(16yQE43`(B2)MP14MQfDU26!Cf$nXF?jU^t0=!HGXd8SKEzx>AadYoF*;vUa&0 z8``K*z{kJvAp1vA*1pxLt2lY*KHYnxciYaU(Awn}9(>4Px(ZszCMt40;Pz^h&F(_$ zjA9#@)tT3?e6(}{QBs}4 zT?SeQz9{rfxs!F6jIdbXmsM~us$NXnlH7nwxYR#IB|GwyJe>s0-bv$Rxk36-H zYp+h7*O_0RUb6X>`2Lj6yu~|u-|ny6S^0UIjiyC%>%nt;cAGyOe)ylEZ|i=crB}Cz z&pLWIQZ^(i%jjq!%Z9e>$of)+3^lGNYgTSN_4t_rkB`3e&;5t*{AWhQL?Hq*_vzkS5({Px8sy_@!z{`_aKZPs8|Z5hOH;z3B~ z2kV1@HP+icm{_??yC=1E_FJ=S->zR5^vyS)lP=_1u!(bVp`wmtjNlw5cG1jRfzv}Z z_V2p>W#8`1btk5+mvrAX>G%8m-M(Kx+gLx&xT^kR;>YH&Z6EVHO0@gge@GpUx+Hcy zVn^F4>AAPMcc0FZKQY7p=A$qn<(k!vU0=BCMY64?ACqsLoMkr4_*QnE?B3m-(f(6Q zdF>KqS9`6toUzdTGlTY1CYz0)rS4zfpZ31}pXPPNt6yvN{xbya_O{No3*C9`{XOmz zx;JNu?rJ&z^lomZ%)6N}la5R(4*%o&!TiuX&5vv6ckBOW*uH3sL%+xp#){=8d%>#kGzl%3PRE~!_W|BrR!G53$h<0l_C z{Gt42R{kGV`47|WW-C<|RVKSf=WpX)u>13lLl=+kwywEU7gMwKUFd23!~8tzN2Y)L zacM%u&Mo)fOkP>pKRNqZt*)Aq*&6zcq z&zc=!G&<@+qRT#V`gO{&&t|d zi@EBjcs5KBlAiN<-p=)MKdwHg&UJoty&-2yZSm1r^&MaDM&`d~o!)Kgk*joh&DE40 z_9@Yg43W|&8v6BF&iTh>U(fr)`f$I1??YdwXvdO!vXkFN*<8GHLQE_}O7^~Xl0$Nt zyxqfdRS&LI?VaCJA}^D9Px-1|&X)fSLRkjAY=^3E-`8CpbS!`KVQI5VZ$dV5?4F@; z&Z(!wcX{;O%=hu7Q!dm_e;c)IedUTh0eN?yy5yx!KGk2w zcjDVhg~9{tu63@)*srS+8epZGW@$!#ZE1PoH$XkNui9 zTWxLS@|eG~US;afU6&VTxrxVLd9pxFQ{yqN2^?SM->ik;k7yXFMFI*=D_)`>?;QT%K*Kuh#2;zWVlW<=6AR zTlJpY(gxncB6{G{nM|HjE=Ag@WqQ{w*tYyJ|M2+X@veOWF+cL1qjQT7%z9HS`F!)$ z)Ls8B2yHYvalI*Ql6%^@l+WD4_q(>dd3Nc3>a9Q9;RgYtnwD{Pgv6cdq*z zd-dAv>`PbdGiLKj{}Z(MYMUJM(QU5kzTdUh{vWn&EKDq35qG+g$4enKGv&^Ht&JK+ zN82uVeDIg}(f%lO-PYVCKW3K37VZ&#U~3;k`;SsS! z{ke`?NcgUr{1597Wj_6|Z{bJjBZfbsat%)N^;Z1T+j${2@@kl?r{d0*omwI?lL~6j zC4Rhq_2e$Mdi*;-eYlZ8A*$6md7B^k*V(qzP8 zd$i!6=7;YM`_$v)xw68;rtj*Tuqgeq$=|@Y>)s}pXtqbFM2e}q-3s03#?B%bmhdBQ z!k;jI$@+{_irX(-ciDVNDQLnWo_jfy=9o!ryD^1#afaKIcaOeY($1SVt@?ZU<&P^oES<`Ply%J}6 zLGP=s!>d{QZ|mkQ6}T0{`8MI+p|eVrPq%-%8eqWXq0SqhczsHZ-iP)>vC_wGeVNC; z`_j8V?#cVE{W^C!D*M`{Yo0d8TXk=RN1JA{J(kIqNRw2U7|g47`|U6LbFcocy?1S8 zjlKQrf9yrq-)cWv8+e@9`o|lw9Ztr0Ie&fA<%B}{^^-CshsU=tuTZspyvzdimpb?B0J(VeibRZRK<7ml#*}l&xm`?j};kv}qq##(lmmf0=%L zet2wo`ns3jr`x}bTDxt_ufu1xv=)T#niLtkO(T>ep38uNX;S$9Mf+dv*)#t^aGCej z+h4wZjxV;0{@~wZ`kz7kkJz3aAI={+ywow%x^Tx#hQ_p{O6)wA>Up7EQicA2>s zZq{N9^JKeTn&7|a#jo~5f4L7`zYv)n#q^^-_Ud}YIVV@S>xh?sKK1OGz_}-qJZ%n1 z?9CHIl11k}@^7nS`q18#crkzJ-peM^Z?g`0-iu!O?%t;dzCyQ;+FY*ef4bS7ry`9( zBji+{*5tj@f8_qtFOB~i8#mA8t@h88`@Og4U;GmFbKY|9nTfqO%ccc*{VMcdXMg-Z z1HXTNsXT|zuQk7Fmo=|BzQ#L0kHfrICP=5ym`yssdiBKS-3b!UO>YF4J<$wHl zpQ)@(-8a8)|A+q!28+5hVC{RxXZ2fT``aJ>C|>oW`$%}z->u6UvrS(uYkb80SIzT| z{%Of<&qYRJxvN&)e7fVw_483j`;XX))ic&uPKvERGC9+ZXZn$-7RkNkBu>lBJ{+-{$7K;!)7{bH#fOzgzE{5z`VOm;9?LV>~4L*G-@hnkh;_7w({LeNSpL}lm=s&|2wTX}Xxhu{ejx)Sis(n`9%j(!{ zzO}3FR4n@u9d+ot;h_ubCJC;6Ce+4}q8b_Hd;jpPZ`SAA?n&*Re`vDZw2m5G^|^~x z_w@6W^*-4mqht^kD7~pQ!Qq_43AP>n!atNBvMv3XchvN4*d>4I7+lsCH>`JW1LJ-3uxyX5kkCEK3uKlOe4-Rl1glhZU!J&%6-Q@=X$pOo@3 z-J0}A%pXsdDz2(n{M&F(tzJ)!v)Q7@4%La=ZYEnde72b3XIA;oA?TI={K=cP)d|;K ziWl4y{AfLw^ZKI?%P*NWFaJ65nPFTS?q ze#zS>@3m{M$CqyN)Eg|-{%5`Bhw{QdK7VugkoaNio%z zV&fL>-p&@Wy?vt5f(aZFk2a)Se{VXUw<6uA*8cb}|F#m@>cC4ivFQu5XD!>j%DrSm z`}U)6^sZ03J#AvNmYcakYA6r;@@+FetoQw4S`(^PxuBsj-Ku6t=W2| zE3xaHdE+#bsq;(@PjKMk<_Rmk^@s7Yoj_*8kK3*nYnY4cRi@oKJHIz&Q{S&Q??0Ev zA1_*8s9g8CB2!`;-pz3_?(NH_`@E=6 ztrvUs&+Lcvf~=~?cTcZabA4M{tTWp)j|aPFaoh^BFcRr47reTu`da#1(aiG1b$<-6 zy(vAL?Kh)hyYJ;CTQ*)<)^zQWWQtwVSuTUfA_jR2W!cr6wtV1k*pvC;{o$&(+1bhU z4{n=nzGBk2D8(nX%--9O|0 zGc;Yi^RfSy^3i$PAJn_|WIkfAm)2XDSiI!)L&GJr_rLMfE#|uGUZnh=!AA?-6!@VTK|YYuk)kaB<4qRoNoDbud0|>ro+)EKUf z_s?BwH$A_8=}Yr{YI{GjT-KNuGU@&KU9K6$zn2&=o#J4TE2`~3zE7@Ry5_V_aeE+>KcFmfJvTC`MJs<&TCRT{_O!6fJv_H9zCC&kde^>2r58tSdZP z7JtK@&!nII#eW9=nzvfV-L5<^d^s!Nip|cvD}ss@(iIc$7z$N(dB3y%o<3`B@cz*J zy{_8(cfNRE@_z5hvaGwZ|6XrT{l4yY+sC~5Sreme z)^3T3t!;b7_BpxB!O2_gNyU}b$4{KPzBhc8Ncj+spHjA`0hPRBx=WMl;y4W`!aL%t??kqRII!~xJW*R+;UFG&HM&v|5`0u^182Q## zv%bCeHp)_|bJ^DZl8sOQT{a8|?!B*ea7**2iO0QOt+&a3_}A`B?xTQASL z?fWr=Q}p7rNVQa& zhD=$Zx8N(oSB3|5p}O|3fBoClc=YA3leF`X6~_rn_74j3hguiMU-xF8{`Jo1+fS}8=?&xjs-3HF$M@-p4>6|;>>|FUhE9v! zz2{Ay#qXUGNBvE=EtTDS`HuI>6QAPN_!|9uqLajGJxybVjQEM490zSCF^b*~bdbx> z&0eLG_Riz0;!6MJk(!0idN!SLY3P}-cFW4+F=Dniu53RXk)8c8uQYIl#5&iLf8S5P z{yonB!}Db}`)=MnTO})U;KA1F=+~lqnk-Gzw)VX*ag=)GHbElb?Pr0R4ELhhp9K_g z9MjGZHg@?X`uFJ8BO9aN86TTCb4l+l8#b+G9mi917ysCNX_uBJr99*Z-W=UF^?HmwUh z^3FI5c@^bqcEA4&6CV9%xV~rpFQKrn3>D>X`ybxc7q5}qa^b7SpSSkGAI)0R3*~(; zo?Ll*G5?8KKbPGJL6f& z_GB%+(DdIYPwU^77qwHlRip8tJNtT7+~T#&mD!2BZBrD@h0hjRISQ?xt5Vprr+d=5 z1w3=E@-zL(xbP=aZn5YU@i^hDU*7tfRG*bU!pjnF7`w+?r;u4@le=QY-N>5lInA#) zgjqkT-nHL7+28K`exv=fC*Rz6w)W5Ux>%|DyTA7pZ2DMxq>8<1(Z}>78HzvB%hzXl zs(0AfKIUHC@yK<1*yb&>BX0Aov$<@4Ciyl?vB{n(zvZ`-tOR?iyCm)&u; zxgOl{i0AV?mv^5$l{QWcS^nO5M{)Nv->OebpD&wle!u+rjcT>Wd z)sJ>pSI_5o`Tg6qdD0*G_FFnWJTLHCY(kc%P`l2@<06xF-f*oyDLqeGwJ}KR?SF=2 z@$J9D_Ug4)Kc3GU`8v>N+NaFzQhRsBELrE^VmSSdWVZ&hq@~1b*Y%6O{NDfYXm$SU zzqQAcT@&Ox?8M~sK`yDxs*r{dgy2IaE9JDyt{({BH;{;mAO_hJ?6 z&U>_bU(3lc)m*RpWp3R!iOK#~Rk!LsU!${WCfkKc0x7-B9XD1cXx7IUoDW~Lb3a#& z*N6Uo)g9f3?X)i1xT@|_nXEQ>#o?0Vu-`q0%q-a1S-!5DGm9If*JZt&P_d5*iFxXt?~;l|;!U2Pj^s?~XVz~1 zDq2`CVW;zAKFc1)l|OdJd#`+x6R=_H<;OxXcg#3%3W)`rZ8&?9fi=m4Vb-Zdepb8d zqw1_ymu_6EcIE2S{|xJ2R*BfCew6mQv`6#9tMf75{rs=)DK}aB+WYJcos>RP_sm+! zj3|$Tg2&#@{qb1xyy)lqZ@Ft9ZTz_Z;ICt=KYWhgzv1$p_+^n5Ro~9N_c7b#lw3Y{ zYTU=v$fc9H%8#uQx?7jC@%;5&aSP?vE{lqHTC*|c&}5|@j7^)GjQjpvpZ#O{xAqG* z`j5SP*&cge)6><-d-LV5fBg0~<>fE8hey|)=Q)!il$UR$7i1p#Ik;LcdsXfpuh~&6 z-ktd6fs*#XXiM&ac)q^>qYndPGH`qk3c3k>mp5=#on^&HTP1e! zyg$WjD(d`&*7vW!>~B6jPW{rK$kJ6eJ?_hZf_gIW%|wPe!Ktvsy8NY%~kK`?=F7)*OjM!(-oZa+zzFj)?+tkwJ zO1-)DOJ8Ml7N-`=nY${um#Fisk(8aXT9c(YO6`4+cV+z3I`6;N`g5=T;_{5*e6wG= zIQsqW>#=ip%`5!vU6b*l{P0|1_bpf6*O+HN(ie8M?dvUmRJm~b<|egJbrh7qu%>BJbV}WJ>iI5 zP^90%iRZn3u|MWNy4%0mPBOMS;6mxzeE&#MJ!!WtakEts`ZLvM>m=w!l&Jh-j||H-p_>-Os>>I~ODKjbg&`uMDN`1WlVw?^j0 z-RxV);d7Ry*zbd{>TWGx`?Df=aNzD&mDimu~_*HeshFE{{dZZl#+T=N^p5U3*Y`;$L3pd%^P~bsn{?}NUcFF> zFE3x_+Dq4Ld*NJjeNxpw-n;Q1_Mfj?*8A$zw{UJ|);Y%xnc2>kkw3RjW7l zVNWa%y74duX~nMjxl6VF+CTqoulEP7`{KR2?62;#E!Wd;{LFvYfB58|V(~lWkkmn=KvnzI)4j_M?l}?Obs*S#5bG-^`4Z^Y7m+VNc0!yt-}vsqNa*F(3N+ALTl` z?7OmN+NbBPyUlM^@7|_3=g7y(*^Y;0Cs{~09(&UxBXx7-`?~)OZS&XuXJGKU=AB(< z^>5lAr!I{JiQsNy@{i3&gesyPc3xR@$yBx~qO9femKm44XLcI|2v{q9KdEzyfn}0m z%=VT#vln&hAI^7{cHj1OKXm@#%q7iHm%r|K?A^`oF)exjsY>O9GozOUaG-hBCd;fcCSlN}%Wx*xb_Jv-{dzF9T)mBm6)KMn`x+MRlFCwfbI&Lc~% zGYdTfH*RNoeeBKk=7=Bv84mAb{5!`+{eiwjmYu|h=iN5x=_`-sRz3_}k&Qm|Yu@fh{~1JHL_IOfEmy8+I=Z0R)mnF&5f9h2v+DCM zDfC7KZtax(#sBF0aqa%*4|nd}{bT(xZvUcacj?osUtG#;6FB!!%uwe0MxXOLrz9p@ z>M)%3ezd=@>>lSw+ii2#RCph`w#(-Fv|W`S4YxJCym%)@Bw}L6>~jixIweax79=+v z+7eW}YE|j}`>V?SGaQ(7eO>puou5`*E_gBh?(^@qdt-kmXTRAmV4v|<+9N>a(jV^S z+qPF8bG(u_^}&|lcN4moAI~>3ei?7`hv!<2GkZ@BbG3bsP2h^zmlth4x^L;a*x2Zk z-}YJuRrefKY4wY{#m3M-VfpFp#d*J8-F~cp`1jfR-(2^P+eyuj{_VNx-`W1~?b-9U zFdaDhwEgaWjsFaK#-Fy!NbOzr$K%7a&5vf^=ebd_*md7W)k)KKU3|NhBW2HaW=VaH zq*rY20ajfdYlY%J{Mz=Pfo=LCSMRs+tF|wj7Tzn*{d-mE!E-K#sb#B6Zn4%H*G=|JF4*=)=;7i-X}Pm~ z{Lh20PmlRiwc+wV*&oM`eSRrEU5c-9MNGs2oj6N9Q%06WoU)>)l>YtkXfS@za(Cix zMqkatH-FlnUw&_ktFHRFU0Z+FAISOimhbwZ>+u`6AD&?o{8;oyb?)+IGcLa__GC%B z9<*g@vYPa@XZ~Y9fAhKKN4btRhL6*lPF*!U zA$E76P|G~C#!s71%yeb^v-bpB_I;Cn$@5DpghQ{_Ka8kocbI2Aed->Y&gG_WZ|`RM ztbg;}Jo%Y(JJY8|KTWC@zVOHD9de=pAWy=bK>dh7L7?Uz69Ur#HT&-2g7B%Z}a`O*H4a_*&Z zuQod;sG9{H(px&~!NF{W7N(0o9~}Gb&;6{(LGxu_tB>r;nJ3?U zG`xJ4@0Mtjzk0s6_0ojBrDa+syy;At%yD0Tjn`G#U$H+kF2^s~dfR=`W-alydGo6> z*B_tmd-vM9=V$kettpMQkNtSR{i|)#z5B;zo%&J#Ti$2h1@Fq$dkSu==x1x@tk}NW zdu4pqwhK>ZXIR?xKiYDojz&*7n$|8b&L7* zFz>k^owcowJ?}~E$z!)alv$IwBI{Oot7-RHcJF_;uf2V%I_^n;5mR&lTboFb$N__0Qw9FRIO{z~OtXeRK^}uP+rC}33tUoTR zek7{BPglRBH~G->gRf<7wd}eRu*in5U+Jv;bnUsblaoKQ=FJH+-mblSTYvoPU$63| zKWl5hUHIrf!=|FFTmHxXGj#4MTlSe7qeMeeANY%^Fc{Q^Xlbmzh8TP^g{M6zPZ0`>n>FN+x;k(??1!gU2BgXdTFOHGimdt ztNHskK5UD8%WUX>@6M8gWyYz^54Hzi`EiU}U$jQ;!}=rZIkIP!EnW6nSGhUJN4B+@xKdmYpOqdwTXTh&-!Ea5d*uWh0A)) zIC2&C@bdKaE9)LTlCi?8aOdHg!&-a}-EM!E*J!RkzQ8*gB@ny&*Z7KMJ(=AX^ik#c6|5YD4Xk>^k;-~O?)mr zkR&djt_23(>Xe0lIKbFz18t=ZhlPq&u}qsW4^F_aNk#5{HXNKACV7NuXoz1WUS6>Ii6WCy=cdaMhSyI zCzwC3-*zrD@zv9>(-ieW#r{e~nLe-WKl`I>pH^FT`!vCQ}(~H>}ofJJRER`qOizuB=Pn&7b){uZXPu*#Bt#F3vx8->%h{ zE-n9V{=+)wocO_bZkyQRNB6tX_VL=z^}a0M!#?d@yY=XjkGD^hSu-7- z%g7P2QhmeONdK03+Wt4cAF7j(@`#Q1``Ef{sqTUuEgWkeU$46qVfJ=Mbb*A1<>L&= zM_X#je>>j2^<(?Ni52-?*GfH9KH4pR_}1BD^Y(4#;+LXto_TyaGbO}@Z`JQx9zTNT zJHM{y|5LiW#`DA6=^Z9pcC=l4a8f$@?(wJRSvH=y=q=JciFrxKs{)TxbA${s`YvQ= zd@}tMSor%=_+O@Yk;!*I$3J^oe{^?MrSZ9acKjl772Oa2GkjxjG~F)}opI|))X}@w z=9VsZl{&T|#wD5Q*()`pG#C5wJpOwpj#io<;cu8{^3iSWV_mQ7haR83a%{Km^4Y~X zD*VOyOLZ9L8#H)Foim=puRn<~sP4d`j=HzocHRH@bpO9eO>tk>U%KCJ{+~hR{8SVB z#(5f-|Eb40u0Hta^jo=jDbGi{bKQM+UKI9`Uiw-2bI-)?4=>tJrZ}BCwS!0GEzgJa zL-Ep?r$4+uEVaHp+W%smTDhIh>YqHtmcHM)Z}%^=+ws=>FZ{Ir=K0}y$Jf2e z56vHdwrtkc%d@RXh?i>XHH(ZTh!6LSs?a z8PNVbf@7lVnf|V<x!^H?Tcpkp-U zjMpdaRh%-9c^tpr`aHka>VB_{{=?gA%MV9wZ{1RB?zL&&w}0_Mk>?9!l-`8|i(KS5 z$hhfwoy8%kQ_J6|8k$AZ|haE&I%V;26?=&3ZMNa^yBmXf3g*e-S#*6^JdA3UD>)@R*sh^>P~%j zOy!%EDl6@0KYeC%QnUZ0!f|1t*SA;IoEEIC@x6UIY|EFt`_Gf7&FbG6vs>ez=j+$` z)2E(_qUEuQ*PMU5Rdn4yHmzLz?86^>RBIn+{t*S>?U8wckZ+zaRs zl{5#cE6PPVSAU!#8op~_kLT-y6X17U!2PuCd(e){$A)_O`;!9 zMegqDWdhze-EL?6XOJtI?_0S(@7%G?Kc3gET>fHmbG=^p_xSX+{x@GAk~8>_-l=}+ zw)?d&{O!}CGMkS3el$HcyV!Q?@wpSGO}e?+I=bcV{4E=6-W;#`UVS@s-=-bgl0wsZ z7lTi)b#uj!0x}oer<1WMVwhJcdex9_9Sz*qWKkhC*Inpzwf$UFKT1H#&gB#<*~1% z?3UcBnf}WqUv%l0T%K@&7+2}do7mePDFtj6wUq9Ux>{@*@_xPf`knQEw}0IF_fcxf z%KLfq=6&54{ZD*bTl`z!hm1o)N%YHbH|4NPTnpdwy;?p+l+T0QKwq(MV{$d-4 z^{2w0+$nOGcv9JZe1BV=$&Zz5@4lA*&|J~=NZ0$&>E$oO#AZF6J2}(*S!9&|Thr+E zdhhN$yYn;Q&hbwcpE`5qC;VMvQ~c~8@-A&^Ae)bCNYr~f4XkY0DMn_nimqwhBw09kVbw zJ$=^{^~q09O0P`a_1Wg6r$=y!UHG}m^_lDn^6^ny-c8wMoBOG%GJEar7iWG3eXm+; zFZ%t(n)!3DJ87Rztjx6CEwoz8B&BbGVAs$52kZyd3;pmv7RR0&emLlb!4Gry9hcXA zjVv|U*&V&Z;C9qu-7VhQGm<~)HXD?5yiYLOu2Z*k=MHc0%Jd13EH3PpcVP3adHBKn z+nj55%0JE@zGk)hMQ}vLV!zpeo3~tAG*fR&S;T`KUH4wzbBmniojv2py>5vhuD6zK z%jBh>zFmLk?XTNQ|GWv=CtCkH``>iy`mQ+HkKxDUo99`7`2E80M|p&~(cvYkSHBb= zubbdCDf-NrhM(S=J|YQ`j8DR6X58QWz3We;e6QYH-TH?S(XaKF-gaG5(J%GdKR;Xd z=#kfjPww#?H`(sDJFMnsW|N57#dr55>l99Y*!-}+!zPe*)+_Tc0p{F{_Z!pZ-c0U{ zY%5rLx5(Xb*5~-P`Qm%BKg!m`KD=uk{-JH*qga*5JGZjGZ4>ui=9_Oa+3ok*o@f3> zmb!VuObOClMLQK#e@4HLtv|nX7w;cC-~F$jUS9iaGxve{o9}m*^0&!pU9ze8W&3XR ze+EvwwQFArMrFRv6+WqQX^+w7nd?+KW9P=!EboiB@>a(~<+y#LP160%_T7JEs~5(( zuKdyUAk!?ebsepK(v#diZC{JHti!)+bNh{daMm-DMle6|an5R@r{M zSUUT!-s_kR8+K0i+i&(-vd3*vP_9egCzs2om5*{C_}!h7e5p?RkN9v==D zR*4c;TWm4!?8JQC#M0`H%V!;T#_2vx_2WFq=Epey z=jF>!zWw>GB+lyNuOCdG56()dNQurhtzj@qE>}ppYr6?0;li|I2#6|D?FYu3pFb@UXunZ;PK-*SE(D*O>p7 z__6+=RD1jWhuR1Y3-P}DH`7It>cTUUAov}?s(oHmC((QuEyT#w8ehfW+ zKv(`zUawtmu7LLr{hZ_v-xjZ3`}FRfO-I5cbyMOuA9~B3lo>kJd0p+dS>J-Dq+VR| zFK({f`E|P_kAL6T+#mAxKSO@i*GsRL&3aL_{ir_2YdPzW{S78pHs5_{&Fi^!Z`_Mh z4JOl0@y?OvymNEzw3r5m-*%iVJss`m}aK)djH%NTi?dXUdc1<=YKi zW|P!*mX#_gRpIk0)*rXi{qX#FK66EFwCm(Ua?0K(Zt`w9_Rlh5yU{zLpNkBZJjnfY z@_3=U_HvW?Zuigce=uqLG-lwx9l$_x)w*lZw6D-~4{eQRB4z@!^fDd!wx9 z+r->{C&({q`&;Jy`9IYLlVvM!f8Q7NexGE8`!PAuAG?eBW-fX?OT9s5e$bx1+pe!G zxjj2<=e&Hw)XVqSvX_3AT~Z$1qruzC(6#fCes`SVhxNzT^ZoF0-E?hBZ_&%gV$~HdxF65b1`F^EMZ?-0zDVu0${T#u|Q%GyHlmurPc;@Wf|0(xBgRaf>uq~@*oy&{f zZNJmD?)ClCCttrSeJIEMVd}B&56(yJc(2t67d~dW{I%Cf{!JuH;JUThrjKKeecbbA zqQ$4@F;5TNmtLvR_0CS@PyF^DyOT<7jStVdHh;lc`Ps>9vgA`g>0d64y|^ZLPv>Rp zRFB)TySm=3%M-eEEApvDpNih`3yhP!%DVEZqksLG^>eD&yD8=Szw8%2@Q?pT@DX`l z8{?1Mft!ESu6S|FZgI8DE%C#@wQsBM+#0W{JaxJ`(=~a+?;gpK+ZvWM@7#9vKSPtz zkLL&EH?uV^_xJauy`-NX4Y z?w$P2{|vrAS|7Pr-<}(HC#!6mfs*!yU6akq0vp(#ADM83^U21OriZs5d*4}JziHmy zk6aoxy2+3Jvc;yZU%PDU&Ur@{Z``r7J0(la{mwln-lHE+%kC9hyPaKC@1(tU=8yQ; z{|t(*#YMMQmwn#1zj)hh`y&_rE~%5w-e>+Xyq9z8Sb4GJ!lT2uJ`#&KoS!kV+uY4_%J=j*qD~nIcyKMcM zUH417GVU*b_WD`uEj5Xc_e<9lUHZz;lwBuvu_kx1`_dcP+pkxZOwe}CHetP~xO-B7 z%qhwG@;S`A6CdrHzUIErtMDlEcGFm=Qzu@9Z`(HMJnN(#&V@<~Q+o_w?r1Q69})WW z$#k=8t==8g*%ZZza#We9TVrWB;-KmJct#3*M@+zLvf(Qs3mSm*Sd(x?6%LDNb;- zk)D+5lE$rYXruF-Sx@(;o$sG@{^;AhU#C6?e>mQ`=FufxQ^`_!uERQkncIUlw_jez zd{*z}1dXJkcKIsP6X!Qne0v?S@x!#Gr3+)DBC`x!(jwoNy!^^>HGKMi2KUBGp3xZ= zE(xB;j4$od+BWIg%l`}~&Hjb&pStaPPNJ{&R(buus;WPt&;5Q~d|;2@1Amzc>xb=K zCHitdm`*=hx@`NSUcYy~((c)ugLl{KBhFE?9WGof`8`+9Hm$Hxrs>*)E#F>9pSia#V%0(ACp}?ejlaDs z_SLi1=zUaMzwBk%ton`|o#j{Zk~i)6ZG6_`wwlMY&s|p4AA0X~s!qN5(RJ&S zd!bu6baHau-TGv<U5xH^Dz`o__a?lm0-JoR^ml#w z&(Qp7L!hfjn@Aw!3~Csi6zFRDB;r_XQ-4sQ1K*l_kso2lkKPKoCjMc~g%bv$$6T{& zSIPf;EC2V4#_tgO&$sMt=rL{m<~GHBiC9{`r>r?-$m)3(ia~EC_jhb;_b$ zX6My8C#T-vTDMKCB~ksV%KSh46Lm$!Z!P@KFz@1jhWk$aTb=(i*e?Fhu&=dp`37%S zSC}!2-v<0=IG^#K;r&7fp^JeS4h~@15)z-){;%5oKSM2--BtcS-}?Xk5@Ez;Is-M# z7P&GpMbeGO*72+r__R^Hw}iWQ>zK~OK z*XP}MJzwA-PmR%!`Hw2T%#FM3+x6(=9>WDc-bYz&ed5|v=$E>~yYq(Iy9bMA&RNHK zfIU~^-IHC>9}4T|&YRoz&3o_ld$az`j+$!{zq!0~js3>2(){vKu8(}JkKCHxS$ET} zDP`--cMm72H7*d>I%Fy&BdGRxOA){6D|xPe^2-nFvqrs$&#w)7;$K)iQgibHTK~bL%984E)O>ffzb` z%Af2X!H@QL&YIu$nFTl9Ou69ZpJ*#UDFRu>KDEpCC$k*>yFv30Ku#UhfZ%Y_+%ZeF=b6N&vWiA zlP6`jUi~Lj@$B>C^grrm_e?IRTvqI}j+Rvp{cz>G(aQ@0`!fW5rhimAX~=H0#m9kb z&kBX#qBU=SJ7vnLeAwRe$MmtRc+j3pUly;)-02(8m_F&x*5CgbWcS62ZTxNJ{3yO>+dlKFY9Bsq z-?Xdm8^@Rb47|tlSDx3&+@`wg3eTF$Eul?%Z53JyGdZW}NnmVt$n$!#4b#vQPBnvbZ?cl@<4cE~i{wZ}NKn=2&0V&s)!) zI&S9J`J^a`e?ox8!5~Jt$-niptIodbeyl4UQS17jVS|msH?ftg<7;=lPpq-~KNyd8g)@H9ztf zj`-tzMOjQv{L+`&wCwD%-7MSZo9;5`ihyPz^@v)3d~o5W^!-sQ=idw1=Wb*9M;$>!)I3xrfMA|3he zY_k+Saqkc7$ED92A4j+T=+Dl4d|pI#?vi_oTQ1Dg&|UNP6N_PV_u5xyRAw7-9%h>| z^>!=svcF4fLRVF2KgjPctrdH_<*%Q1N|*FihOHYXg>;-T;yA^wV`MmWz10uL<*%jY zciO4e*j!y-6TU1n>|0ds*IoB3y1dW&om^HuWn$0|k58-7ipD*dd{*HGx_T{QQ`D~NRAD-pha`m#n z(Wk4OCf$kIbZTevGj>^bS*ZUCnyBN6Ovy!p3>mAGIIp_*4JesN(R^Z5!9Nt$cVVYg(YZX`b%8 z+&#Nz*j$;WaZ~o#nH8*&D$=o68?Ej8{LjW;H2quow)QQTm6P4JPxaUD-rm2ag2zc<+@pa=#?5eE8X_*0S;T1p5jD4|R)!_t%9#>TBAk@SYuEi|8&|ci4bjf!Yby-by?L^&O;M(Lt;$~e{StSdbsEn9nI6V|pkK5i`{A?s z5B9TGygTl6@!F~E=%1$Y{JL*GpEllowouUXk;mLw?uuSkd0&g5f##~#@S+3HvS zL_hY;x_roL>A8jJ5AKG3dAT-!;Z6bf(qhX=@>^8=Wj0JXJyVjqBjvoTMSoeTa&T%&vDt?CR)crgBSyjOk-$ptK#~2|CX@FNA=b{=?~XSyx2AM+4QBd$-eDjSMzLw zYjoEpR?m2t5$?%x?K7t{i;>WpwQBDlo{e=?ji2TpuKVZq_Pc+ho&;xJde*h+>Fv|i zRsZajH(ak1s@Q%^&h+B@(*F!HQ85+KZrAL$c9=KmA;{hxv7PQUQqH81wxtk?fmci#F<+4TvfpZ)IsXUMsGKYw~(c}ivL z`kp@yb(VTdO#Mab)6DyqUhMBkyAqSZpQD&@Y}z)tbYae8=j~5$#d?1JR2FvrQ2s6N zN9J8Mp$|Puf9%ow<+h>ZKZB92*`a6BE#lM4Cm(OGZjZmabt1b~d1uAV&CiZWUG9!H z7yq1T`uS@8;mEz}OPyj6*}dM!dKBp{~4J6=~hfX zwol=TNx$UD_MV-^Stg{nZ0e(ZJ*ke|Eu==p?tmy>7)IH zmDvwh&v_YMWw*9``MHTsDc1+$*_fq!?@Ts!7b%Kc=_#5T_$*)XX5>w758s#{-VZx#(tm4JD(3dyd#jhfn60pL zdtX3GYwpu|!IF~{pHBZQu~N0+#p3=SPk-zDNPnCnwe`&QU*}raXBphO?Y$!N*R_Q_ z`Xa)W9h;&K7s~`Cmo$jQtgJoogzNU-Szp%%$Nf-z7(KT*T|S;j0#X-7{(b>@4#yUoO|Z>ClVHU8^g;eCgf&5C7~I-&6k~`N%%Ih(GrB zJ#W|kldGJTuQ%yd$mCZ=y1U&T8L#jxDw2J~I&)#BrM+F{@*1DN^K;ED7SpY z-Iw31QmUq$PBigc9(&3>noV$XeyH^fA+sLAn7r=C{}}}T=>1r`I_gK)t55$K{I2Sy zc&z)%J^NyYNUY5Hr+ak2h4xu4XgAcqIp=NK%*c&_Vs{qpy8HJ(!-tZee{KfFY|LLe z{qCwirax;zQZ)ho?`bHC+zhIPXwq?(Lk8Nyko{S-zR!NPpF>HS@)8-p{fVxwh8iWE8Jg zwdSI&Q&SA5OkJGNBT=*?iO=2Z+|s?XedFDi7kkwo@wLv{`lj;uq4iBx+na2n<|$s^ z>0twzvin=Y|ES1+SZDvp@$dTWzr9ZX z4*tzt|E>R9`7PnM@#&xbGgLmStloe1&yREGXN&F)FWb5+zi!s)@R!xwKks^x?5i8P zHg9g7toYissV|Cexxd-DEK4x%e#PzBt2U00s?WydW`2$3`uE)-LT%DL?W75+YfqH+ zICJUAxvM`@nRDq!{I}>!Hu@jbeSg%-ExLZIx3zI|_YtX6uhiB(>Z91QZ)E}ZUTySW6(8f7tsL|7x}f#RUDkU~ zYTa5=d~&Z7TW3mfS8MJzU*{)tV%NWzI(Pl^3v0JVZF%}R^L760{k~spNjw15^uc~1 zo7BI1AIxw2XR%4(@apJ_?fzRm*3JvNrKh*=a4B=;#-j>~s*{UsvO*s}NzJ~uYHL>f zqx#y#>+k08n5(zfKC*86??YW0D=}6H9$B_0TjB7E#20RvR{~h#Q``Sl9RJTy(`9#> z|If$%e?LWaMHt0zCjMubXZWAtzEc0D!~YpD5+_D@vq^}`<}-o0D0Yp2;q_a`SdE#A^NJtN#T&qJp8ShlTx4gbgc z2g?m^yt)U zc4^VPOD|pvE!q=y>1eUchApceEOvjU#KzmI<>&FruI!`vvH7AE?GLXlRGYiHqCW7# zTASGP#8p4uA2u(3u`6%!@~tb&C%+Bkp1%FIMjNMk=NyKLjGr2d-j_bxFE;i5{qN;B z&DKo*`bW5`_Wp<4^PT>b$ua#qelhy^#;l1^>lRAg$g=XOYVu7lR{RvY?`jImw1a!@ z&YpSE?~hP%*VYyP&i`Y+vPD+fEm!cZj!AcB&YJ6f`*&_GZHlSfXfxr&dL@q#@o*iP zh6v%a6}itAu89A4N=|j}?vG+KAJuA|kKej`^S(=^C%2w>yXD9F+|<(Qmr3qBJ~f^3 zX%P}&i0qmiR-ZAyGk?{O*$2g=>bp&5=eJq)O|LAN_3Fp1dj1=+@j4TAy#FrF7TMjk zaKakbsWI=){hggL$un~8{eQPh>us;6ec8K}W82>+-~KaLXFlEhDe9-Z&yVE8Rpre! zmP=n)*>1n_Wv<>eZEf$`H~&HmSGdHU@s`nGR+;=N>)_QlY-@H!?%ggm+ab?&;?$bQ z0tw3;W}drng!jfV57AFY|1)$&>i?6E{1g47U9I!s%^GFdGf~fa*TrO-MQvNXhdsGe zvUTpB^}3gox7g2ko_M=)^P|1&hphD9CO?c#j{PXMa>=r_53l*!W$zb>yLqs(VtUza z1((m4%ypIs=ZR-e@^rH_T{>r#s`~Cpzjy8zd%V2ldabg@!7bZ7|NhzE6m)&F|Hdz8 zMFImrYZHhXQ3Q=ThEM-f{^9-Mc(F_?zo{SfV`G&LJ$f5<>h2%^!bNNE9($@Wf9E`X z%Vrj><=4B1z-pWnZoe>6_y!}fz`|0!MlQmghU*WX`^cjcX!w1_K9#y z(An*adgf_b$Hwpab8P+E{)p)h``g#WA1aEuefQq^hnYUVLXSRMTx@gEBQeAKr1FWV zq`(`(nsqy0H^+!~?9u#?9{J;7e*W6*Y2tEL*&X?hKHRlM;Zzbo)mqs zf6M;PKdNggP9Ltaku#h0_{gmCjxRIQepD{w4lG@HVtb&~nVYTaLfl$BVr7$W3x*x@ z6j}c^Y1j78ce7t@`D1oxdVae4(;wOS)BZ)zYt6dPmcP$p!anVbU-dgDc&krec>d9} zo%1fmYjjG<+CEQd^4BhV)~A}gbW?$E$u9i^`9)AOF)72LdB{GaIO>gbcUbN@3)FL}P?mEOtyA?x?g_RhcX>-NEV z_8Q#>vPqA_F6FL&a%EZboBdf=O{>qkK58y|ucch9GcTliLz(+lPv$<>vaO%~GrU>< zSatWua~mK2wDHib0>{OXeU&yYO7dGqdWZC>esi}Q~N z$2dPbo_4`=)1&B?$3;g=7Ct%0@ae`R29+}V&G&mu_A}R*|Cs-{Ha}+j?1SBpxaa!m z)?K@@bnO-G?40T51;^$7Gu%B{G@Z{(jJ;4{&XY@CUzP4n{F8h7=jYOYal3BhdEeZ+ zyT0`2e$l+SzcT9`emsA;zOCwg^P7_T&Zs+8>g`KrDmH2U@hx2JUaZcToZeN@bGO4y z_XN8n!*%<|@-6Wkxizs5mwsgL_-ZSEG&piy?~>_18dpWU65r6t-7#hDBhP}{=RB=% z%t@KUlU8?op7w`z-M_6~mrFfcyX57idrLf**)6px*mFrTIcdXtg=sf96)O)|J_vJv zYxyz%;nuC~hjOzXABz*~vfeM6IsMzLmRWN*2uG?cIcZWP_H$v7+cWlz6DODKvsix3 z(_Z`K_nZ4CO!_X}cS?8twEd-*e$LzbXg+_1@T1b3AC|4G&R>~T<w_lK-mEnF&yd=y+7Q{*6Y{K z{by+X7u%{`V{26v8+|#t=F9bUe=FS&z2~S=y|^cLz2}4Ky?XDDlv%&cU-Q;<+1^~; zl*wl*g6=LY-5fR{&+ypBw!}%CqOZhPYu}8~;SnpA=D6`Kr=Z^I$Kr31k$QV7KP) zU+w7hyO^^F1NTQ0o&jGdQUwfMs#UPdCP+ z>2(+6)HeQ@``fz4xB1Xy@3q@zea@_!x_60DpA-$vR(I|m{963bb7_D z*v{~bu6y1ypRh)x3m&`e=gF*cd5`CZ^9SMtKKNMae%Zb&t+{q>gs1iCQrp}2VmKE? zS1g~EZ}?|h#>0M|1FT##1CLh7AGVeE{BiWR;t#v$hkiYsx3qY{OY<_TnV0gFcTTcM zak_lVgsXq=laP)aex3$_FxT_8=A>@xe)ImkcHY|0;bH$78ZZ5|KIC=Lf9*g2^?&5{ z)$aXF!oi(ka=RbBRX_MKR4vk~IGPdnMydsRyOG%Rj9>)w(D%xBl4a zFWVo8@11?`e(U|$1&g{gBtSKFn0}9q^70?A4{xthTlM{9VhPjfzq41|iSThyc;K6_ zI#7oT3czd61!=a2l-A945W1#Z9i6`8+L#ZLQbP<5%+ ziloS+=VvM1>SBrh##69ZZNr}054#_N90>%Zqe&(5pf zd;G9HPwdHu>Q*6z{ah!Xz1VpB#7Fmxnzjapj)M^#cc*AGu@$j7n4bD^{9*WEOPk!K zukWc(PFq{Q(&?I6)wKOZ)5^myu1hk!vE|V{&nfOZ3nlX>+qz}eNmlg9x5x9^2~{K? zGe4XDXl=01mP?PCQpzjeD{cO)>g1pxu+hUaiT(6-i}jp;7v}E?vorr;A91x#F8lj8 zsq*eGr}g>cZr**_cFn^l@AIbB_jFd9Y)bU|an&h${n=@jFQ(*n1o-{?y8g4*zul$# zV{;P4tMqcC@9y0zT=wPu>Ef$vP3#-R>m~19yB~jH`HyC~1-}|UT&Yt}PuV9p>AU>R zy83DAoYRadt&i)@`+6&b$kRC8J8%8icKo2c?4nCGoFC=%MdN<79}VYyWY5g8c3G{V zl-TEW2r;bXOF`Q)E^Zh~E=0_4X$q%G&{z-h)>vC_d=sMl6x<#*E-Sgl2 z^48=#X__ipMc*zMu++5L#qOS6X1IB|_~pCt=3DnncmL18s;j=_`IFgSy?+1Po%{S* zZSI5p&HKbY@NFzyooROcQ&zC!xA?7Dv-Un)^wDv5`BcH3)BLq;Z&)keTGDu&X?N7m zddjz7`{f>+HSL%E*|(p+NQJL>^nKs{qWe!Oe!ZyY&iyA46Lr6$&)5FIuCmo77hjo6 z9ogj+ab~+wTbK5VTVfHHoK>=%CpI>mVNJ}oVt@1X;r#Y0&b=SoCO_QI9e3wW?8{$A z;wG$&yJin-lVMm7g=)YV|k$Q?GD-@cm8xgL=V=y}8rwdtZL5bC3J7 z)aIh0-pN>|pnv9!1Itx?&Wh9CAK2eqe$+2q5gz~HKSQ^TP-tnhZF%cl8EH_o%Zn{-R{XLY>My?i6{$i(`4Ng=0G zJCdZ!in8q=n>X*1sCXRzVc+b>XX||J!?P#cQkyT{z3xk8o*r{*lt#kN?z2<<@8%fm z`Ei_Fn=A1n`&;FY_=mC5!B^@|E|2`%?b7zj$u$C zu|M7)jN{(@Bk7;gN4DmLuk1fe&z!DxYwi=X&$ZJmJ*PSDZTxg);?8|9PMi%{`!;Fv zu41o$xl8M#BfoF$?EO2tZ1dB+dtUGEP2cs^I=(ZW^PkLXJB1JHHa>bA9(?($w&$Y_ z7gg*GuarEvuCs2{#wFz*oBUp#Tk_zlPm+ZUyAndCuxB2vdHmb^)jidHKPEn`Eqi{T>fX+Wf31();;ECo zvSs&O!#B?(^^_)MhIm#?;!MrmsF`tywcq-{#MQ}1y;s&S|5%(_Hhq8kWz&`EzjY_< z>9|v=_BFLSJH0x`+hkL$@!uJ3wJUz>iD*QHyw^5g{A|*fvhMw>_wBZ>ik!S9KezVE z-_Tq2D%0jw$vbAm^ZQNyXnfSpaCYdtPqAsY&YhAL;yRkn!+4J2je#|TN{b*Ht+k*XFzGa7Q zNuOS=dhW-tFu4}Fxck(|&eC1lxvw_+-x9#_?6SMr!(DYCCNBr`b?CJOUw^qjs#v3o4w|qsv zp!d31#s0DrD)TazPGowQ;L~#A$kUi)3kjL<^Yb@7y*qF3$2?o-!?U(d+q~drh1PxlUA1OsqTLhKZ!t%` z{LFCN_@KlN-rYPl2dBpL{Mz3CPa#WQppMsW^@nZOejm+V6>KD#T@Wn;}F_oOn>1UJ`m zIo*99>O0=#)|jt*H7k30hw^_0=grk$+p;ehJY0KfLs9tCgo|@l9%PY``l)@$>OVt! zjp1*pn&fTQzj*v%UUKu*KGjW^6+Vid?NXTgWT&8`U|*&9*`S&|%%_<*?D*NA{8&O~ zPw0o)hxQBYQCydKYufB(nPyC%-d*}--R<4Bb*4(I3Rmye#5ue7bQ|Sx+`bfG7V_eK z?cQU5RmJY#P5rdt?&C%GK6_s?^8UEHZuPZ$r&m@eAB>aP`+=?3=6jgo?X1gpUz!zp ztDLN^OPuGH`E-J{rvk6Zg56E$xNDAtFAYC?kK~QqCU4$qf0-QL68oP)x~8?XH1GbJ zEr0C3VILn{Q!n~zdzD$wbu$C^;?n2NQm%>zYA!VaJ}GoX;ZNMfl||t@t^)Lh9^O*C-@g<78Y8?yl}}>HgYO^@ZWts-SXQ}gF4sh`yEzwvl+5iq;1>r zSx#;1%t!7)3%zBg->4|HF;p*SUMr{g(f>yMrZ`@=!~R?Mx=(%^)g&~7i>twW8Rz-p zAEBrFxen{zzI(X5G`;N8mU%U-<_xF0PPPS^&DZ7kc~ZK1yOoW2x!9%CyPTP95+j<{ z$uN|s6dzc6`Mm!RiQ{p8-A9y2dSIygPgl3jD~g!=p`GEBx@Gk*L# zWN|O@?YrEydnOl(d}iS^$`4_!%juaOlgEAVWk?)%E+RLdLQ zsplCUD>S*sTyyPjT$Vl4_05}Yfrp<&xr!XTCHPExwdSYmd6&8y%D=omYPY{)`cH9% zAC1dn99bqFEqInG#c|DcQr;9%xro%Hr5kSVusEP@BbEKf^^(xLPrT=j-AqxPxH$Ki zaXX`PUX0uuk@9nvtEbxucm0UodiC$yGuNK%3AR>9nJ~e*VHMj0Hg<`F$}+QR;-n(% zu58Q_-*C{dJR*6b)1f!FyA^NG+i8*Hu97Sh!1`7{P-^L-a|+XLtMpFJJ%2@1)x&PV z#IV=)t``?wcpJyDq_X_Pr9G2aS!aB;2v}uR#{BtgMli#oE=`={TxYHSId}hOi2GFk z!ZZ46^YZ!=h4ruR`TynAu4*v;2U*nRMx-S(}s4m(}{T5@Umzq>PaQZAi)85A~udZ^(R9jO`UtKdD)Lwr{v{c&NJNh@1)0;+m*>T>aL3f#Kd|8Uh=r+xX;JfcY?#+tzmfp$g_9r)g&&6{t;Hh z67^))9PVT_Pv)Gb+hiuVv#&7SyEN;X)w|ibD_(A&_N#4vsnluKyK7eK1S?AN^et&) zDN%T|FQdM7pR%0L?rZC8R3Amlmc2e!UZ;BL%YO!*EUAbqCjLgu3~9f8=BG^9@mcb6 zNrU7$bHQ2P^IHXe%)0;ZeEZ{%6Sl{T=&t&^Cu)^e<(zN+lgkWur=Q+!U2!z%YW(Go z;o0ZTyZvW4?^)$_I=7xZ^<`=Le+J(5k**>^;QkG$k3%dD=lq!d=b^1twA zM{L-+tLs{HubM`z&Y4Nag=~&_26M%Q%IiIQeO&bJ^}f2RW^$_6|4Ep2KFnu-Ri}GJ zUw_Rr&E6A#9ixW&AwS2>#|S%BlrH6d$$j6 zPvQ*<+O_Ile6+h+xBR4YQm*c$Gv0GcM$Fx~zs!SeUnM!4sRREhb>IHS^+)I4?#%0H z=I^%cuAl$->CSiC>c!sb)#>?v_?OEbwQ*mpU92B(&@+v&Q|~nHsZ4M=6s)MY-0`#9 z;&q%yqb}GP{a`*iYl~D|=i~XjudU`hy{4^eAHIEFCYMG!SI3jNs~jz4m^jzlUvKZ0 z|F-19*9tqi*W2SHHeY!6C9?FzyDt~>pT+oRYfN#wo>VNw5o7*td%ARJyXmw2+;$8V z#}EB_-lX^5##(myxAJe|w^gEN7X`ohCN;-gzVxJZ+%wU~miH%4km&5*YjN`Z=Kl-_ zmaP9Cxj$gqq*<||*Z(oi-S>O(%YMEZ_Yc#1N@Lx9F4zPvdOd5}tlRo)zT9<^?pzyf zab!i0mrRpgf~Dw1ArT=zh9{HbSby7AI3JVa`KaN+^4qVa$fqj z9iKM|U5YkK&n}p~`P(%8xij9!y|2#Nel1+=jMtirTfninCtAHF-r@>Ta%ANc+)@J=Sb`c3)3zLT?V#Vdttu+J8I zsa$(%$&{bdSG=qf{Na7%KLgi3nTq$3n?96$&aPUvSZ(>zA9l52`df4YZ#!n^=j&dq zR2DLd);7*B;QssUSk~3`)^E0b_MUdX_UU)sUCZmsZtwjwe{bohO;^9azP$U#;g5Nj zADnLskGoL{=|}GO-_k$yHZIogwa2xs z(>7h+zs)pqONY`VTdl*@>bKJGD+%rNG}y=~({ZQez}b|#E4S)9{{(*U4!KZc`rxno zkz421ZFK#X&Ar;9b=`GOZjbNAN4Wkht@AyuQrPD{`%n6h=tuYEvVR@#(6M1&argYA z^*qi?59gMyz8!7XSTC`+H+7HTohh^A_5PlW==pQ}JEy|BqG!?Cul_UW+J(RVwCiQz z)2z3*^UuaVPy6@!=9|BV56_qUVlPs0tDg5}^u2ek^G{rvoBUh%vXi2*PKssC@>@zz zFNT0mQ}PyXDe2e~UHGVUbNt(zZx&yk_hGl#Z!7B)8yoztO3&>)!18g<+nAcaAEUoj zUdhSXGS~UgteRcVHf{SD5yz3VAy~^fcN3$QpOEFl&y&pRElw)+e~tF{PulpS)%cHp z`+@aKw`>p0&|DLr752{VeBN2}sWR3c=VF$eS$;`5YOC~8@m0INUfjKY>VF1ZpWx}b zPqvihpAMN+Z(Uba^0t$UQ=Os6bG`nL^50G`>;?ZgXD+>!S#@rv-11D*f88sx0$iru zI=jVhrOLIVDrsEY#|pKM9gQ&v+H-yBy=d;!o^NkVjSPhB^gmtCV8>c9{ZO6J2kWeT zQqvvV5AL!(Hn)7$>6J^*MBP2ucI%pC?xM(;!}1ezB%7DGS2J$inJjQE*E_qe|I?)-4bwepYiB{S>!Ym`@4Opcnl_RCtmyYKR31$O5KUJNNZalk<_slGEmwK#uPs-kD z(+~Xzf4y%q^_Q=>{!c(R-6#6Bx3;eJq1;tb_p*#R;vPk&7~5WK$)EPgcj5Af$@1lj zE@nTYW7q$!{F%3F(eA3-zdqf)pMG(!!rx_k%wy9=LIVA)PC#E zTdUjh@lMPt!!DOu_dMsR@Em^L`)G=&g~jW*dZvn_`_t;he^`BY+S48UQS`v2oJ$$H zQ6E$<9Txc>o|`_S?|63mXU;pxF89hO&yD(iD^@D|d}!+5tnK?VquqX6FMs;{Z^nNH z`-NN2bG-PccE!Z|t=GwXy^`q7waa(!PLe$1FFtdNjNwh?#vL+wCk`H5JL|Dc>Eo{- zSf4kWocMCOH}3Tjt36RRfd?M?Xg~GTnY^uPk&4gp59`h?U%c)5U-6guFFE7=a^L=| zUwa6C4x~r`yXoWk;`b};4smz$bH1+ApB8B*9m{j8Q(59W$9nDU ziW3>zXNhe5;U021H)pL*%9Oo(r{2E9GvjessBzT7DI!J7pPxLtQ{`6p^)F>_mz%DC zS{uAE({;=IudDW-yS{zymaOMjcd2bnedH^4IP7ws*n7YI+xuSKdT94%qE(xl+!2B7 zbEWrhDLY-TNvvLUdP#Qh%Ufj&vzNSzQ*M&*o10i!q-=0d)2P$auEa8>cVf-eJEw0u zAKaz4{ln^m`+5FIFV8rB#H`G$AX94Is_mT&8~dbVIKSDS)yNbK&iZ8jROHN`WN4REq$GN zt88z{wq=JtJ#qWQaXIC!(bME2$w#3UuTPv?*Z6Vz0Xdzk3od=}m-&$&dF8p@#>mWH zd-r_y2z0gxaaVa8#ChO>fNy1d_a51`6~`Y>Z~gk3zvV+l)xPdk20K4KTc|f*W5;Ed zqJ?d;lS~h0GOuGk)@bDSK2H5%`T>54AMr6CT<<<_Pu}wH+`mJ=Q@V@uylUqi4sLt= zZl`+ookM%ltv?wt3r>lBTz*%7|N7JGy0>oGeJRQ-?8W-e;g_GM=Kg1p)?a30`BDB* zoZ;0{S!0KfZr$s$qOx=LZN6N}k*Bsb=dsWtLyaPipAApKT|5FjEtn_yR*1j(`jB6u z!uZJKm${2ySFs;Ht!r{HdD)Lyk9~K}yY=|o32)Dwn{(G5$~%86VqaPE-s-(?SKW_{ zi8j>Q>~?XIRfC(x+$)xgS8sp3{*Gy0x%k%q4C{l>)~J6px>w(N@yB8353Yaju3KGg z@+~U9{c-C${p3mMH%sm%yGltq2u*rtxF@x9)r5bSc8T6^TdTj;gkRw9>+4ZlZb?-H zX=k`C(@zZg>~e6azzUbxXU_W5x2%(S!hG&W+WSZRhwtA^cf4d{{NVT6^kctNx5UYb z`6M>zPq?3+eN{eD~h} z46^^qwofX*y7*NrckuMci*Md9zr5~C=kgxw8~1(psrl$dRxaThP*Igs@qeXRbWYp<`mok}3e;1!quU<82 zqES=TgS0Js1V7e$e0<(fYO8eW(!X2YhRR0^%O-ZLSMDp{KFMR-ZUb(eycrV%`-~Xb zO`px*!ruIKeaAfI%ef`{TLrSLzD=!9FI@EFd*F2c>ReNojVF!YR`MQD&|Y`6vsp&= z@QSq`tsnGlT>i&t(?|a1Fa2*Vw@NSDy+5t%V1l@|jvL#q&a2EvI`ZtRy1qMZOZ+%X z`Mqh~+P}A=f8Dy3ze=@a?sm`mgTMclf4cBJ*7%T=TmK{3^w=$%BDU5hpA^4dJ#$u~ zP)=v1iq;Bs#sZ@=g_At5&hl4NpPX%XRd*{}#N!QeTTi@*7i8NH8h8|H_nDh?L3hp5 z*&%v1E7j&$S2C2|S+U~ztjsebm9?HiB$(f+YM`(xDQ*h#$n z5g)!@>t1O#y*zpCaW2129gC(bxaxFoS#rY7!`6{mbnm0Lb>9z1scgSh-)?Fz8nald z*W6>$RXrKurUQ4*@>IzB#mGBkpZd>`e)MDP_U?VEryt2PWX}`Ya^3XUwVZWZt@m1` zRGeMAMlMq9da=>T8L>NKcNcz6?|QFWN}bEmCW zv6|VLHu3Ex`!_dLc4wRPJ&Zh@XRh&4P+~Hxr$vn6b5nc%ivJ7``48yx{761n#rwT; zk8jaI)3x5d^Umh!rp(xLefq|=S1ue^>DAtMRKFp{kVjh9<$>5SDa0{)uMNa-zzTNoBUce@B`49h?52@!CFLA8oeYeVF$5(glXPKLQrPt)_3@TJ^ zJ0gIJl3b-#o4y46 zXL!>!{m8ak=1p&Ff?pys6Mk?$-7vkI=V7IKqz#B^fHGPdt4tp*}m<{!dRul zZ%;pxHP>Et?2^(^+tnwxny)D6@v!D#khA8xn`$XNx&P|7vL8$D)>r>N{cG#brSl6- z5|)46eK-C=d#3%<`*-WFzP*JKK<+3G3}wo&QjqEps}lcK|Lxemw9VP9*}GR)gh~os zJyPl0Fo~h$fDLn#cH@uU!euY-@mV zhHPnnv5I-eJ@#FQ_x_#IB(dCky#;%6D@#rcQ-SgMsP7MojN|A0XW;jx}T^$mI@*n^8hgZJ3FCgUc)%vWu-*47E3b(pb zKP6@kE1lA<$L~sao;WmdPW;FBN44Xb?!DL*V`aAXQN2L6 zTy|u}Eu+KVI+Z&#rv+U;E#lF+mr?Ka=@pXdqNjg+e;D5Pr}jhI`-A3jrWX{i^x${FO@#;3jEv?LblpLENOq-hgcrTaqYUdSt zt;#RAhh1monP|Ik_h)v4L(beiZBA8&Q7dcwZpVN5&tUuOefEn)UsH~i`MI_A>o4cJ z{n-Ak^>U4>)$RSQd%RcPb6m1)=jx~%(FZPNT+7|TsaJkx!JOmz?;aXiewx#&kn$_H z_3yrwiMiF8TIaKirOxuKym+xe>+p>x-irJq{x>UaDj)6k7rEp6P&W98QD#79)RcXi zA6lxm?m47Z7Mc6l_}I1gGgD67d*v7qGL>!bao>&oK9|?#Jo0$lbx);JNa9GwLI(z| zK9Rx$GF;gY%kTYXXubb3-}e1e|3_iB?l1bA|D)#3MJ#7Iz|M5QMc2Ijxc*zyNAtsf z>yI0~Oh0^WmwaPMv~PaUwrfgL*KG}%`Rq*DC7l%;9yo!i0_{gTUl_*UrGzWJqUix3G`b1ZN>(1Wu?aI5ahMB3KjrP7irFM3eP)o<02a`B#uG)+JJ9}N~KSPJ< z!^B4!CF?mWCjC1cbnQ#=vdZm3XEPK-1!8`#QVfsI2tDks_|r3J$wmLtvs?c&#NMB} z{{986oS)To-TBt{e&_Gh+xzi&nEx&QLv@@N*Q{A*V_IBTy!_R#DG$#q`lxg7rD8)H zmzkF0gbn#$Pc3FKv&vZ=d!izwFxm5C0iH=s){n^}f|!{Ov!z8*}}AK0MpH{Dram zvm39a+Hcl$f4LK0k}dLHspQ+a+mif z-_?ZuDc$~p>(0C1@gMAJX?sND3EJ*9U+t@Ie)-LRh9}#8{t5qj?N_8}oy)wv^`El; z?32!anzvJ4_MhRaeas)#+xD0*y#FFf_uFdLqi=St?^ye6=e9PBr`ww>S^_<##iv#8 z2r&DjamOQ%wQ}kEx4Dmcvk!&Wsa@2qv0ZpGBaV0b$FkMQ>vT4qu8Dmogx5gsgre8k`vZv{^lGW7cKD0eofL~x{9xYlM>4y9(eVW@c9nn2zV~XMY=6MjZySzmx^;I; zmu|`gvC|y)S)MRBta+;!t-kjA%WrG`itjGHx^{cs zETI|?tFq0@|8_g<-BP{QOk>OG>ANS{ZZHXQdli|t=b+BcK9RfpQ$NqkzT0?${ra^% zlOJt=^Lgoy{|v61H(lzRz2G$Wzi*yjHkM3Y(YU}pRmja}qc&TY)A7@(?3wFko#K~p z{^%rKZ)pFJdCP@Rp3QHnyZ5>-J66Q^KuG0u3d6C)p0^fPKW#s?e`(sg{|x$ZCr%ct zR%YM7_A>MP{fpUUW#;CbnfW@OWZb31lV32LTtCN-?ce2D$B)!Y{-`=0TFaHqFOqpV z%dBixNU-X)zSA<|g3))aPn0gk2Y@c}D zdGk8PkL>MvrSc-ZH`GxLqz=Wp?Twtv3TlE7nYR!ea4g={?Q zVrPDK>wfoNx9-o~Qm)_b|N7Up=b0a;KjN9sZ71=g`KUnTy2Yi53})B*ev4(^on|q0 zMmksQb(Ipy^;SQkKQI<`ik$AKK4V6Z-Jl$FjYJkKXDBef+jxbnAwzOD@dS(OtMbOi*Eqc=YX=YH4;A zr#+8dV7g-cX?K49%c^yIts(!fS^Zr<2)NV`ftES)A%1?TI>Av-;O;3N`zx?NS=^y@*K1ZFb*QYkH z)M{Qm_Jha`;=z0G&;MCJ|G|-UYvz6X&tU)Q4``bO4`?qC2tU|o{Gq{lQ|3&ZCdrU8u`w=l<)4C&U+%No$<{b*zPu$?`FGTJ z-{y$RrPh&8rY_#Kx9w)seW8UNQ>UD;Iq)PkWS!&s*pKh}AInB@`~PtM-U-&35Nva4*d+}hfeucL(T zdsY`sUtPteQXZhAk;Xgq=RP+su8BJ)UcCFC!KCip{`TdI@5t>gtN&oX{;1r~c=H;^ zzl&>JANu!-%ZYBEeK=6Zs&9H~>cj3W6SvMS<6hL1y*pf()#JfkHODUhuY8dc1hQ|f z+Y&Dl>-*3%{K&U!ZHJuIw)1YAX0tX#BC^a)&OJL-XzGN-ISY+zAs#{ z{K(|n73=ODKRi$M>X*OSp&KWAd|rR_=z<1?ZF@dXacYTnKkTX)6tgn+j^L#w`CGsK zzW%cQ)6FIS*mK|ht&gjI*F8_+$J13ue>?mLJ{V;>b?Wi9b$WVhOl!Roo&{D+^Do(O zz?qd*RNzE}`LCyeP12%!n14tAs6PHr>Ef3eHM);_RkD@|=GU%t%-Z&!VP8|`1C`?i zZ+{=zK3^iZSx!3d>7C#I8E)zyTK>qtttNJ9mes$hYLi#reQhpP^`kj>(yeFJEt4xF zdaqCAF!G3PQoAncCLPhWmiRr~s@2->mp|V9v*gn|%Y5y!vai?f*G>P+_xhp#(cksm z+thA(KY(|!kR{w=eiBH!WPiLNIzUD_VKt@&B=^T#cfp9Yd` zmXEJ5?Nsl4Jkfda%CO)I%~t}gKUL2c-7lN_i#K}i{p^42Q)+BJ@`D=M){Y<5X0FII zsAxaJv1^G@wpgy|*11dGq;~0RiFGc~6n5LQYm%et>a(Wtx3rJHd{8U)P4(hhJ^n6J zUhaU8yvy6R@@5_K&dzwI^iJA4^IB^7B-5T}?p)4AhuL;7`=R~l$A5XeO6|GiP(0PzBr3po{}PS{;YZ9?&K-@ zXusTlhK#fM53l!>>dHG#c4)|$Wm0wYjMbH3PrcPDO)oE?csk3MN=|65~l ztT0ku%#P`moc;B!wa#uIBR*V-Q@qIDUixFU-|yg1>!+Dr7LRQ2oxS zX0N4p*7Lppbn#dG>1C^KPkFOA^IQC^{pYT~zV2QdKDXlYw_R6E?1i$-t)ep~RusGS z9g6O?4roieQfe|wBTrB2p34N4I2Un)eX=U|JSF|M-bI{uc)!bp;CYA9N9S)A|MqO{ z>gbpARAQA6-uiL5ZTG@$^PitC()p&7B;9XXGvVV=k)pglpH($WIn}n=snwm|7H4(c zWUjlCLC^~uyHooCPJt(zBm;Zs@Xi6Sv~X}@g$^10CucmLaKUHX1m zTlVamx8g;oFTMUXt?H>!zVWGg?fltW-rWCQeB18oi5Eoes{Z;;Vlj(iyo{J6!|$%M zZ!)%|&-lah@p#vsn3}@nKlW~^EcNGm{dTF&E!8dCK6uLC%ze(XX3~k$&$NfB1YP&ftoT^P{)?N5bwY@3&er?N@sHvdiSNh>U!=w5m@}2(~cuH-H&WC>Z&(ONgq&Mu< zwcOv`kJbJc-@f_! zW5LcHyPLgsHE|}-d$LNSZ0@zQkGC0~2}`RtopEeNV(g@U=c|^!Kin@Eirj57%A?jdp@AZ*b<)SOh+l4@MhmAFa=t&vXCB{oeV9_O6;~B6!Cw$c1nD3Z>l!6)PWX6gYA@ys-Sr=gl&_Hm5m**+aKV zuC~sr%e(dR*SWdnvPZXW{kryyj-+arxx1Szhw_B15BonRT&v%D^xP)X{Gb0BKC6EI zmHapT-pk`rf0^whF7^M}ari$&&Bgx=9LJ{qXRtH)&v4)L`=!TOm&&Kf=`WA%Kb`z= zn;h50qrTa-)BgVTKW{nth`46SrgclT?s{#G(eC7!<$8DPef!F`X9sQm9KUk;{rw}e zR!oW8wz@)fioe1RBfFnWZ~2O^Zan61@o2wO{X>aM`=1|^|NX50hnvazpC8-*{apOF zY;E=5IQ#yu7uo+f=U9hTto(cW*QXtYkVW&%rkIJhj zZFm|!`M~4J{#&lpL?%|3R%REkjgE_D($iC%lsYM)sy*>^o#`)DWIvhIA2M88f4;~5 z`&sYDUhBReJ{~l^-6T0X>G`s~w*2R_(>}`1zgc&#)Wh!7d)@uhpXpC}+i+&a?Y0jvIhbNhR0ZI|KhNWzgKJ9?8C<$ z;$#B53L|zd`SNo6m;Tm$B0uyauI{N_IOC1F$J+NwJ0^K5Ypq)GuwVUU*Y9_R9apNj zR%S1{abw>P-Nl`ILqc!;ta)s)dd}5euZe+A!#w4458ttMZeQWoE4j1SdfVGucbU30 z*3Y+F18O*8!+1BB%PIbj+21ky;kEY6hp}0o9k0Ew@mwBtuspTk!^X7fEk#R&r3IKZ zAGSG)K9W*DzFz)^_0f6y`>($J8c{l5EK5o!Dz9}_R9u!}MXg%j-yNH_pXs^hI&1TB zu}hcCb=V?mvp>q&b2=|@UR`nR({bO6CDA8L4eM9N{hf60TE-oTC6(M-4HA744<<3P z?d*={`lJ2har)})TcOvcKRR6-xOVZ5t6OYimoj+=on`FGQ2l#m;?FhCZlU|~{JjEp z1x4@6{?BlG-`nfwr~d1W?F%cb&di^`_xr8a{(Ex@njatUdip_j&GyYvKhlr%uQ|W$ zpzeQ$rmfv=R~PgdD$Kj?74E%(BSE^@z^VVU!9kNf*1sKoe1Bv+y)pRkg^GH&z1K=~ zvjg`}54-$sS;X@VyV`DWJp7zk8Y$(bnqoacaL!c4y-D$nb^`N#F6S(1k0`smeA2UP zfA=`9sNE~)dRzTh{QTz|QW-?kwHzd6xR!p{en6g&t#RS2c|4a7TsHY#>+Ey+snDH9 znc&#G-TgUhC(m5?>cGV9PZj;O!ZSbZt&a;^|DT~T@@D4K3(AXq*V;e0|LJmU&AHhR z>wBUf$sf+=`mxA6F%{(?-o+00f|efS=H}PQ=Z#zV z*yEi}ea88gT~p_Gn6_=b_V`NTuFIE$W@KJgDRX_CznSgR={)Q0qUy^{7k--*y-N>1*1l^O+aPe3cS^X%8V=Toj&lzTZZeBFyG)<`H~;hY<+GFL zZOT7c=XGCu*?$I=t@}+My7)-S#!3z-7OX|`_GV4 z-ORLP^2r@q$JlPCGcZQg@&0(azM?I^owGP|x#CB)t?OQ1Ptth8s=UcONm(xDVgJp` zg_i8P4Lc?7UHYd|BeVI(`qj5@Ry;q{{zz=qhnMBE3O=$J#akbUanI8|vvlH=J@b`T zSSER>ZTYgk;U34un%LY&*|usGU$diUuRfi0YwG2Lxuz!)3ob@)S*G#l?3)NJ2G6@a zpFQIzs&3m~?_INIzjT(+kJ+!6KVAE;{AcEcr*^D=qCV>P);O-nlrx@m-1Q?%*|GQk z1nb{OpO&6&BJ@Tj)>r#zt;DHj9j?`rjMyC3r|h5mHYZQ5&sh7owAF%FM|c`z`kVgo z{fItbb@bTUCAZ^GuJmQUS$B0+*7sT6N7>%q3vqiEX|v*c!khlAN$fIQZ!=~+YuAaL zyZ`(@wg+2gSFirh5P!+em)55*#$MiI_~G2+2iI0Ejm;7~|KZ=cUvKX{58P=s;f76k z-W#PIa*@4?Gd=oTbJiWb^_=0_{I-8GH5MPk4{^`ueUY8Ju0lArFWKf>)tc?`x0u4z zBBeffUGjKy^US=&r0S1hbLZTOezQA%+Uck1x!=pSZ4U~6;``?7=k2EVSMS<&Ikzgh z-Z6)uOX@+e-|7>M3 zcmDis^GE+<^GrUd&0O6Ze6(uY%ES-9biKUi^JeKLvQ4f?n=2f$&vhc}l_DMMKT|zV z2%PNj`;h%QzUQCJkMzQaZ?*C}X7x^MTz2F1lN+zXqn^xiKK8Da$FpeCksl?eHsv2~ zY~n4n7uhoZ=4sD;mrc$~RZKqgPk2YSN7pf4JAcvt49T_2eOOxxPV$rsa(CD7Z<%0K zbVFv=*^D*w<1fW`?YbSSUn{!S`rR)5bLLbPO5^Uw5%#cdYPi%mCqPgH;3Q(YV%c+e=uS!Uy|T~=*j&Qle7 zn)nN2#P%=zWBp@Q(ubo*pSCS7TzX{ly7^19*5()HvrK#=DfUUbBhLI*$e+F?OdP8( z)R=qJX{-0_-;yeD`$hJm$d`he9*J!EW}bf1JkdE`Y!|mE7nK*T-ZpXDTw9&*pZCr0 zHuPLtefR6vUAIqNuWx!$I(_rjt#dvrNy(2&V|%glkNw-iZ!P_{vsApV zgnO@dfYq|~-%dxbbTOOGR2^ZyVYA1)%G^I&LwJ}baUKr8QTkM#dynFW>24pl?Nx7l z_ht8f!5ic3kO) zX8&uZ_ChsYKPp$p{aEjFY3-XWw_lg;`zSJf*4)WH-!+w%ewg@#Tj|zu4e3wwiW#>r z_@+@OS>f#Z;d+(@9|8=vNRZ}FeOBki-q+c4&LJsNLV3+n_g+VMx$ z7YaEe z=Tt-JB*(3HBZO~Ousi+m|8V}{+`ichUvG>5(44*e#0IP7ovBxH^^UCG;+n5|<`dHz z{s+%W5<2h)~L*Vx@+Fs_j)WBb@v3HHJ|Y>+N9uk zTLM$7&x%Les?OeD9rkW%>Ccka+b42e>yB1=|7`#INx#nrBOl**UHdd`)%xb{6SleVSqxGn;u=$X-2h@a_hl;#qIB%y)nOWAdM&`Fmwk*rxmb z$K!w0&j@rCfsgEclD~E8(4+dBpO0I$&fB~uqx_cJ#PXF~>gx@JHS4-RmA~yST;m;-A2knJZPlUe+ntGch{GZBuUg zy`u-M19u)+AMs=Ihs%yXwm!`6=c|}z{qsxqoX)pzOubh#8%@;;a^F75tVFRxE_jJL^j=D{ttWTP3sa?%_NG zLrwL?ude@*F3Zj6%3W>oCFrkiy~?$H%NPDA_q1YbT$b7P?%CfpYZq@fFQ2q!^O-=W z_Y-eS0<9xDsdBxoChLRkwuK+F512*S?R<3B{z%okHQT>8ojy8iiP^-=YgwCO;^02LDP4WN>6>5w8JbGsMPAFvU)`$hTwJ^0&fa&kHRibpocd_vb2PM}W=WsH zgTCUPhHUnLtNY|K?Zw{A`p*z^ajl(VT;_j{C)dx+Zk@Vwv|`; z$Uc=>r(LeB-IKk-J2ykM;itgaC1TM9^LJ0?+kGM}C?vVy)DPd^o;7YC{k!&P^6eLI zz3*|2Z~im)WjA6D7c_l3_o?E;`-0cMw{22*>i1D!+N|c@^2`VGTRzmJ-2J;czw#rS z_~f8&*L@#Nk6xM9t2E)ez{^kPPI79UF+VQ!;rJY>`xon2f5;!c{Bm;UFZG*GF4vv@ z7WO;l?bE-rG#53$w>)=c=ZxYx?@yj&=^^f#jxAO11xAv;5 zF2`SZQa6w-fX7k3d$0aySpQOv{lk8rOQrQUoexKC@A%_fzcBKZRZ+#{sH{$=nAKq=-Xmh%}a*gkj!9*sFt?T(}M7R7XToU){p3N+N&XYEiUjF*~{79^0&GQMY zDc;}q-g{A2zgkH2!GqG}YD>>u>%4U@+Vu0K)2F`u*djJ(<*TdjpYN*I&HMBAcHHuB z+1IxERzGNO%_}_{lRfK_qw3bL4^-ycX~t

    p1A0IFYR1xMyMhqxA}Z7uZBUs{Ch= z{VnWU)zR;*f8-xV*9*qndTFOI+fjdC_IxAX51QGXOwkMzcB{nK7pJVPp1S@}J=3dt z{yp{&Ps{4v{G+(t0%if@0YM0+8?!JECyVgrAA?oHuwf?t(V#3KKCED5BrwaE`8}=xa@rG>N`Ic{o<~ccAqD3 z|D%>mcJKZ2(tfee{-gY%_g%NPuJ2g4UjNJ6pl2>!?*n%36>D_8c;;X9 zq5G*3k7scd3;msW?po&5*`jxT{5~+BJ^c7Amw5(P-mKYr*XKXTnhZFML z43#C8v3fd`%?a+E?3-Izd+z@9Wxw8e`%hh&pYiwAe}?(%R(s37Es7Vf@vePvzE#fn z`j?~pD_%zFKK;6B(>u0^2jza&pB8uZJN>9Cncq22^JCAgHQy(_HoI09aAQK4wnYM8 z0ox z)=QI`i@&Y2<|$pQ(^}uI|GxC~UGtz1tFn1+ecMo~HmhVu#L*`{v3ok+i3Qb62n|2= zP<+ns{|vo*jF(qzKD^ybYU{nl$%#iU`7h17Zk2LE{Mq^Wxhh2(k0(elSv?6C{@{J+ zKLgj6u&d90WY;fywO)Su@_VOK*X~U$i^2la-1 z!ms8@?GboU$7&F>^!;s9&BR`%9p#zZV{95P7<=7$FLAfAPdnf`$@`#63SuY6^7y<@BS=1pziL%V8&em(Mz zlzM({N9R4?i+A67hFw*j=le>)#9Mhz_|6~uBYp(depv2PFHq5Zctef#{5Q)^y~|Cq zuC*%^UNb?%j#m$KdwD~Nukv{!3dr^)W^ zpSFbVXaC-N@8!O0e^UQtXe{c&95K|KT)EqQ>gKZgev6}aT?}jg&HT^cWB>7${bl=? z?f-1UG8S;%e>wll;r|SBi~kz`n^FJxYTDpdmSUH8`PrMh^Ba0mC3oAjfKw67Jq?GIcf5{~@BgD-{Gr+9qj;xH_&WDh zuj?$Xefgced#~8HdXAUVk~CtIjbq()E<3bn^6Py|PwQo1X4?Dvk$L}?{|s$*{ChvD zt>is>w0H5Le%V>8y?iDu@oqS?_`QYtyqT{rZMAWHWUGEC%J$saby?G@%%*1Te!lOD zo)CwU;CbljvuSz`q5eYNS^0M*yW@D4EJ8#!!F*n z^{n0J=*=HZPpymyF74r8HIe6%i?`=CH?7o9k6E@A+Mz|Z{MGC1 ze)Ii5-=$ysxBkiPs!HAcr<1;l1O|a7rg83z#<9be@gx7+{FO)3KXO0PU9xN1#D#jx zA972ZU)~xK<$YgN>1@$|hLlV93qCE6J<828=iM~pr|T{pn|{==n`J#P`dHcIf z_X}r)@6v0uol*V!M`+1?r3%%%JCnC&o z?Ozn_E`56Si%Xep0_Prz8OnU$=yQJOl*D999fq^6YwIpn$+bVay;m+*JZ7^0hW%SE zZPgbz%A@Xo`MsV?nuMxapr-n{7BP>zA}4mw2=u-?C3p4ye|P3TPnmb?W!j--+w-@- znEqtj{e4Gu3)UZzzvb<6Nrcm6KXZg^-shg?6?fU2F8c3s(wi^4=fwG?mHmhNyQZ7} zGx-twF!E1mY3U=0J)Vy$O4k~`kMq2w@$XjdrKg-H0`>*F$v3T9Y7x^P-TtlT!n2PD zF0gg3TicxN`|#*TX-EEV)mdJxXE*$PKX?9=b^jT(Pv0@zZ+idMkNr;j%#?0NU0%og zXR~95((c}U7geJ7TeJNHg!jY+PvTfKm9 zA^&XI*^X+bJM)BlPm5k^pKQT=I?01!;*Edj>kKaa;r(#`QGM^%y+!x(+4MW~N-ehj zjt-i(Jz(wHO)+;5%T)hoC|5ldV{s!w_DECla5?gdbRY)lGlIr zgI&C5YtQ(&OOwfZp+#!Ki8o6lJyfJFnpaJVKJ}kL`&0b8E!BDJF9dmA@K615d)EEh zd%NDez49^tmUGyai~BVGSbY>*|A^QAaGaQw-}1a&=hkOzzIn7*Wt#s}qupBV^KM!1 zm@s+HW(H1cfBVHRfA#)ncyseZ^uy3E!na<<>3Ua3pN?(_%g$9|{MJ3Spfcvb#^?!E zk_XmT|CszS?efE`&n7NcYhP2bIQptsWo}6?mqpyeg{`5>X3SjL%cS;&U3XHfQ=7VI z%l$2R?JNH?9J1Rddg1GP8Q(RT6SKX=T3Hls-SjA1a(Q-VQs`-RMn*lK{8npUAG4=X zul8G7+r6&WR@=Ry{O-5&Nxv6*a;iSA3k>w@_^6vUvZJ4M%m~1a{o> z2#RhNnH0V0xXH0=7MH|@+ieuvRjyy$cF*GCvX7$u?QtKR*WZ14Jfz=urq|hTrKxU3 zhK+5dLB?0Esdav=_qbkDUHQ->=l!Fd4R@5{eI|BicPf0^^W!pKjL7FlCrd&te|jfe zcNHgQf34sp!&s}TXz!Z0pKdQ$n!RoN-@Nrd;-BvicKe!It=-z#bS^njTJCHg|MPf} zy3^XK*Zb{6B5uB#_WjrN_8Q~*%tyL*x6U}*bt`(iOuF^;u+XQ|l`^08J}l`y!ep2H z?WLll$^$-6)2II#`2I6w#IirK@5nXT?W-SoS^3Iawfz@MdA&F$-HzPWc~xcBjU_z? z9V^bMM%-B4#uYwGamnr2@Rz##eDAOJ-g;cW{nxwwOK&{;&yc|{9P!8R@7#TA2OoMa zy>{ETa?4(?dzU0_&R+0p`*cS$Am(TF^`|Um1;(=;@8hcIKiq!QUaTf|MaAmFx6U`% z9bM{P_$ahX+Ey)k#@T~;-difWBy|?0-Tl6p;X&`>7yG3vPJ92TekdD#WY(3l`Aaj) z-WM0VXuX})*R^Xbk>;{Kl`}xaITiC%hsN~_RC-1EO~dT>SpDChRHumcHjKwe%_wvORf52@i*-c ze$VHAJ*{f$x#lHzZ@fzP+|&2WVaVsMNUxA}5U;YVxfNBwVZ zKU#IqUa6Y4?(uJ{Q_qgQ(w-fepBlltD(bXM;1iY0ZZi}%dGsy4o%i_V;cv4(-274d zaK3o2dX$;f+n!sCZv70XU3Bh}0MBP1o4Y&ui=HN@C(oZ6J*QM&Y)|O&ALa)?{^$<6 zq;Rd|tkt(`)y-yiZXQbwHP4dtKP!-Lyz$T+f$a2;UVo$F`hNXqh}JE=Gv%MyuGn8c z>*t^L?Oj`b^p=0~ql?#Lt|V=o9K5I9XNyt#_wPZ>n@kV9y6yiodTQ^w3U~L9W(ya^ zXN7rJ7B9Lx`>pNW+XCm3D)yXR@UZWs4DW%*Wo~@DIakW}*B|!3_Mf3)($-10?)U9q zx?f-s^e8@1B^rL-erx?vdzOl%bMh?NR{Psb{Dms^9jX?&QsZ05x_57HY%q$vRk_GjZoZl43*N%* z{~5NV&6&q==}-2OAO1(~DSx`@5B|`q4U}4`#g&y)<{X$mdSDi+E4eUjmE^NICS>I_~B-u{#J@z|X2yJDx`wh4WBy6r>HN8{{yiUxlt zEWO&YyM4Nlfauhu$^UGho<0;a>vyv2HJv}sizBjj)f{?s=+`~#)s+njp3*B)bL89{ z*=K!TFZA!iFZR|ynS~4PWL6zLy0rK1y!k6%fBSHxuP)mi6?fbw=v!FdwSW7KkH%N^M7J@)ikC~~ zI^`O@KeB1xyj@lPQtS!hZ{sr*-UV>9uY6})rLzzA8m=~5wSQN|X%D9*^C~8YCo%mwbA-XS=$8A@ z`|a{<|9Fl+;X+Vc(e|zm*?ak&;(vyS$&z_Of(;=}n>HjI&U>}fT;0<4 z-apSB+xGpA{qnu+m42yfNqqN~Nq_X0$Ch8Z@=r9oj{S%E2kC=_&W~R6rXOy7a5>lX zti{i7+oy*MzX=qdyoV)o&Z-6v#|A%f(}(-Fv8|SEKO!%A`}FFJeUeiB4bu)zk83%t z-*IvI4z=Tc)>B-I_WU`RrgC2Jli!8x*ZS@MtS;?St1$QeXx4SvZ1##HDLpZ6$x}I! z*7Yraw`tR+iLaKgEsB?@Gxz^cbnJ8Rt{?S}`UPH?h3(JUdG$+X$$2$BkDHqwebS8l zRj_f+I|iW&u4S8d-+TYjx2m+xKfAo-OYgMZb-eZ$E7!ii{j6T1Vt&L2@uTrluLE8m zclv1hBu+k`=ZAT~rtPyH$8G<#$Wyo6tJkT^iOato2TvT{y_Yww)Kkl zdwPwdW))_qY@aB327I1f_@vuHr*)?L{AWnhI=@vu_fRW$+{2H|kIPy=+z$AVH-B~1 zEBzHemi^=Aj`oh2_+-10k@{@=b4nJF8yejT5rzwW!S$sv`SLZ3WB1N@jCq|SJ( zGPyYX>RJD*wIlSXbnX-wd4Vs#bc1Hhy1s&ZN2=Au`j*Jut2&@qOw_Q zr{wn+`_wC>kNjugePWWz++BXu#8c$ci>+ILfA;!bg;UHl#U1xo%i zw9S+Lcm0a)yZ1fQ4I-m%y!^QOPVAkt3Vj;MJg0WgoWZkMt8tPC!>ku;?p)Unt6D#M z^3Uz%+4;3{+YOeB%InTO{%U+yN z;hFWzZO8l$IgQJI)YnxQJrBI3u&3>Ns8meV;;ElaZZ7M+W)`#3V&Z|838#vczDlmU z)$;dx?wr{_Z)x2x@_)u#ed_P@yOs3^;}sTlX~HLwV5@wNEb2P?F@Mf2hrW9+bJHee zOf2_F{9IQ!<46~$(_sa{^Cpa2T`TWy+g-i36?wXZK&7OLh4 zu+ONw^iS;Cp7amFN2L7Qx7;-CO$ysOjay=Am-_6}ySm&$Gh+p0SKPRBp!&MzN6tTq zAMM>fJXNUs5%v}{3IWs~ujAh-02#vk@H4 z*S5D>Kl9$LR%6&XJu^r4UgvL_LkWN9*eLL>3G=$2JyG}V?GlgPN@ES@#woYlI3Dy! zo&UK1Xg^bp`l53knA#@9GruJK)UK6l}bn3vPS4eyx-$Tulv{DpIZay&&^Zk0snZn%?9MkiD%%BrT>c<#>*IR)*q8g{FYobP{$kd80R9dZ|6BPqm*YKwmV(o&>^O#)pO**_59yX73&{`H@s24x<_%{k3hv;JGWfc zG4V}4eBC@;Jbdz*jT*mQ%0FtYKfB}Ki>V>WXWc)>AB`9OBmJ14+i`LBKcQV4Kk{wQ zUKX8k-A-m#_C~SGs#n^cma@iA(pWY@UR_3O&U2OJA6?f!`%`=OKf|YZZ?!|!UjG?R z|IprRFMm|7H1|XHv5SB5eRKcVuC)E}an7v6Kl*&**e?8M*wm9Nb!?kl-opw-ZSmRP z3Vt`*6$Wp4{e%1A_1-V_l7DP3>)WW?NZGfUz8AP3o84QRl`v0w|JJRWU8PQ?T7JG_ zkP@_fGtY*1HJ*>x%U2jbTHk8&`lXHOs*3U>yVl&Q-8MZryHIXg_X@7ErRx{|;8`p4-%_gkyx>J;qXe7(gc@xgf^|E1gd*S?VF z*V%HJwQu&L>|V2H)!F^lin^+s)5^AU?zYcN`gybO>$~sUR@bH$<{VjdR@%+u6{EyD zd9wE(n1pU#o;5r2{>Qz?r9W?%eg7|7-sC^Sp$#A9HvcxRvAq!PJnwPb2iw0>%B7r7 zesjN)yeVzl^b=E0F+MxG%j160N>#hmS61;YbwVHQ53O&Pv%Z$IN7m-ny_G9Yu6$X? zKR>klEw@P7g{a%NPnX1b9O;>M>dfssO#c}wH_lq{pF!r2^WwVmc0wQLe7fnqzT()o zJ2$p17XPI-m-qITjWfe+*Suuiu9C;|)b7LGiMQ&`-xpo?Ppcw7VshYgzui|%YCX67 z)c?|PPuu&&Es1psJG#u%Q}}t0xGYT5Dl(Vby6Rcc%KLHo@kRd`e&;{V{(a=s$>4o= zZEauIZ2zO5%a{Im`Qg9mF&~#V?}^WRXnkJ%VB+$-FTd}So4d+AwWwd^+H?c2O?xhC zhRiw}dhB25k8p;SA8q}=`Fvz=|H2)iS0gOmzqNWj%fW@WV{XSg@>KDym)UN_y&-)f z*8=l+oeOPKYZkZue0|@}`cLxJ8umy2ycPM*`!8&XejCQ!eX}%PF#2q9E>GdK z=s&}O-lB{xZ_8aaPd*Z-F;C*{R!_yQgT4p*ZglLqGB?Qbf$=KipT|5!>#ye8+Lj%E zBy#?vojlXNX_xN0E?Bm)c4hRA-?qC=?u8V*RkkdeY&74ax6tfAgKVj-@uR2j+rPfH zc>BduX_l{TZprPLdg&pbYE~vq^wzyG{mhgpt>!LDKhK-KZFs```gK(7mL=b+{_*?k zmS*qkIdxJmbl2sN@pZG*pO;?f`g9{M%0KVOs#Cp^$}3os_C^0-{GfegpYX@&jkiNT zT-n8A$9wUe$?LsgvA%jq*WU4%UE6RvSLoC;i9g*5XJnPEZ}6@?Ex$4^Pw3LU$R`$k zCVIy&FiwuQsNwy)+_b;rpYeyUeXD+nT$9tdyzENOTay*byw}Q3v(Qu8zN>uVw6saL zo>`hNIwsPz+s|=j`>~(Z+25_}w!N&k7T0f$|8?s>!_ss2A5Cws3H{jq;CbI4yI(wY zve$UeY@z3*ae*mW|a_gs1!Y0qx_+6_k-TxC|bz53{qTZJ*V1NPNe%NS^a0iPBH^OwP#s5pn$PiNKflCF>MUe%So5zr!Ywb=E8M zFahS=jQ1PU=H5*1jBG1ddbh~kan|Sct@|ZxqCX1ROI1|e4*jrf`J-4R>FCwGWyg*^ zH97fc=Cj`B+gr{mWIWxv!@0BIM)0R6jY8YC{kQ+I{`&s%CRf|3-}b*+Td(pqZ{ITh z!~QK+=ezc&&FBB&ee|Bd?hns9Y|Mpgr=1U*cBtZ=UZ})TCEt9N^FnNipO1a?;+i8< zy57FgCh7iW`|dxo)eGZXSN`aFkZG1_BDS_d=w)J8_3CY#&$uUVJ^Zufo#CQ<>yu}< z|6TYeXFg*M@8k7v_L{!iYAva^)$`$o9g{Am#c(`eOjWy?YTbTIwX9Efzg;=k`+wSh z{O5c9uv}B|_;9$eN|dnLVvBiaC+6!WmR5IMKI^zMPWNG|ALl_Hx9spGOY7FEe_y@- z^QW7+mEqrY_pPn#{z1KTRE4EZDZag$2{M4jK^_;IR z9y9wN#?CdfWZAfSz19K2FUy18*S`N3RUdmYY}3lm`<+!J&||@(E)9l7T^h`% z_u0Rev)TCJ?9}##Kjtf6hg(fOe)IkNE!Eu`@3!2IU9xAx+B+W>pQ?NHv?Hi|!fmJR z^Zq!0h{~SNW>U{`@V8xjMZ2f3<)p(tSM#?_v+}*v-e)#<-pjUv@|lU55$`RfH)-G8 zr}UrUP{OX5pbGDh58rxKT%YXT`%!ex^*|M`oez~~$^DJk!NAwH$NSiShOOouU-ECx z_WrnT`ohfr3?Jsava&t4UT$_tO0~t?wO96B_hAe-*z~iS%~7{dKz-$kOpB9tuRoRj z)Q$aRy|-7nuHw`C{|xN^B zOhn0-u=gLVU7JIy)79v&Xp>Lk*|L(U3w<$!i`PQ zLYLRLFe`0YC7{IM_Mw2m^r5`aK87Ft@jqH0-H!I>JzJaps5b9#Nb^zdRVrn-Qg?NW z?(ND>*4i^kaJgc~8b4!oT{*tG)mx{W&*tf2NOw3cYbp9fUbLnXwAd?7EVf<#)^)jREl(}~XFn_Zjl7aSc^Do!vg_N;ZLc*;^PjGcsxr_2yzxA@ zmEF6O@$dZOqigo8E3(>tSo&@B<9WSnf0REgna8<*^L3TI*KG3gb*7qD+iSUGlsOyw z>d*2?&$+n3cPG0-`m`UBADEBXiC?!7EU$ccu(QnCSa-or zaJ=pI&hk`g4UI1s|K6^Qyq!Dmb+KpGv-=^Jzm)I4TVD3<^~T_DRUg`WY$P9fe%W^M z>t5!EPp8dF*m$|dw{Y2=^SOo3Q&g91%Q<)Lp?UgSop@`*dwcJFyPCgdo$idOk&81< z$~q{E9{!{GV}Jg~vLmmm(%Vhf+B?p&H#;72#qRWz_+^iFFOFDVDZH%g)WmJprJ{FZ z?x-%^viVe@j7Mz$0%oMa>6zA&N!_3uSGZg*uGd>K!87a8R{P%jg7V*rKQ8Yzx$@z_ z^*x%UE062G**0D1<*qeiiJx`%WE<_uPrZ3&jmYXxcS6s-J%1~I_wTF!!mHNpD+?^}qdvJJD=?Ji1nu-P5>?DoM8bIPuRBsW#Dt6lkef7X8nTiqXP&2#syI5l@E zuUv1P{IdS1dC$c^9)93{F8Bdi4It`6jD#^FOj3KVm0vseY^a6}?&S1B!W@qPwR~Q)Z4isGiuV-{Ahk0-&MljSO1~=)va%>59@i4-O4e%bN`2bz>Zk2%!8kk`u^qx z-Mib(>Jlm17gO+;?|M_6#jES}f?v<`RxI}yGfL{{C1hB=6_BvIpeCT zx!|hfa({b@jJ@XX&N#lizRCZS|K9Icm&APa3wks8=e_%-f9;-~d+qe2!}LEx=RD01 zQ>;>6i#6t^>)cvc_|((n;q4R5>z=y0+g(~4{bTM!@sIx*4(E&hNU}V-W!AN+rOS2B zUNY9vl}LVLp0+D1U6P^{BhmX)iu69 ztlmH3XV3Z--jT4|`*yCf^VOVt|L$ez73Z6X3mM&he@JI`igm$JX`8N%AB>O7X?-mG zcx?WoZx^3lwXt5mZOZ11X_syvRH;whK1XPiw{jj^5$6f^j0d6r8T#x6*Zq^MkTjJw z(~d07e(QGLYt!$#_nVLF9(gG+DPW1A3(vU_0go6aKfCdM-5mDs^RuwGt1oY# z+Vk?L>q*tEm;W_|O*p#yIthHM*-S)(2d!$t-`|{hvW(+s@R#Q&(l` zu8ml^$=m3%&F!3sUCQ~L%u1ai3aX52dDXuqTs1l0SSD~@WV@Z^#vNBoqbtH5aq20| z`t)}B#GsE8BhGBx8Po2OQonCad)MT;D{%(o$|>mIss zvj1!!^ABwS;nei$E_)qmK&T~2WKMU_pv>@>FRnSE+oBWrK(+}m+(*X7gXoV;H0XdJSia8mR3 zvFsO7pZPoG1@&$0mshAC*_Ejxn`)E9-%jP7 zP^5kN#ojCXkGy}KasR{iM`E7qUBAc0f2a$?e(GA+t4rLk%eU`rv+huls8||s>hOi; zgDbP;kJ=etu;E|E>wcu_+3bgZZB8hOS1r9$c2Qhm!tOhblNcXcKDfZTqr}(ho^{5f z4O=^PXYApcutL!AgsPieP*GLYRq6SxFScvvwEJ!AN$u07+P{oKN~hbMnE z#)W@us?EB6Y(LM|xBADXzh3YvPI2=_Dci=*Ts@^pl20<_Tw_1A-IyW$EqUX)?AtFG zCmfdD<0;~?@koQ#A|6)e1qC*AQZ_9CaU9%l~| z^LqBR#lNN>Ixksq{^0aDsgL#DU%b1yU$6Lg|33rA^Y&+#dCzvp@7=2UK3)5njJ5|~ znRtNF!RglD_FVbOtNq~KUo+ptJoW1(*hoJY4&~J+k0~U)D9~7j&lFmlk~5GmT~Y=093LYUcCQ2xl(% zyZ38!>#Rqg-u?49lJ@A?qw;x~OiWJ9D(jq{JV;1+Z2II^`Eh@ltXtykCQ~luZrLyP zdYo{+IcVTt?hX$drZpW6h}|iEm+gW~i-v7scX+_SDGF$z__XOgF&zMGgKAZRQ z&+S$NZIu%nSL?{X=Gu3u?lp5`#vh-L$9wmw-g+S?G}+waf(Llcmo)jy-Rkas0`jRqp3JmX@AA zf9d)k;Xg~}pFeq{?)B5x)pfdch4tmL_OHDdZ&joB<9h8!^W(=4+9_JucYWb4cG!94 z;jV2T_AOj?%Tvgk$6vs_=i_~s*F{Ub1-TnfJ!~zF-;~?BZTbhlQ>RYd)AqKmjgM@T zt`JZuRC+btF*Kxrfq{XYfopZf{Q8Sq{xgX1mrZxiKlI|86bVCtGWp$ha`NI8uMh5e zdTsG`zYlk|n|Ch#DJgvSr1(^kqu&dkE?{A2W#(4nz$q(SK6`8?+w5H-xW2Y`D?Z{^37iRW4+hM`p#1CSh1sD z@Qi|+h)mi3U6SM+h~l!Yfb_$t<${aF0CUi?PI_v3L&n|5Dc+C%_k zifkK_EW}r-KlYw4^+IZG+d}J_Hv zMaXDG)rtL`R~paz$g7x{7W0-qu}xR9mL}=&WC~a^B7^>H1B!2OEPtpWG2mX3pK* zz_8Yy?N8D5JOn(C*2 zwXXkq$e;J+SBkFXKl#tVp6{#sG;Oz(JVl3ngm?ep{;~B@yws28fJ-*%s(sy$INtW& z|D*Ju;fDD%bF&Q98y>r6g?IfjoXQp<8m^MU(-?QM-ul*!JH9e$;eBSMj#oP*nWnle z_WJZAsJ?9b*J}^fy|T`)KX&@vrQ`ikHLO?U9~^JrBdWKVw{7Xe=pT6{ym1n}hmLI9 zES#3s@#g;T2^&u|wiVruNDZqa{+8*&QO+#y%j@4H*h;-8&c zko$C}&at%4cPjm7bdI!0CSTj?xih3cwEE}%qW=tY%HFQsvTO3D<`-Y%=KbBTneqE& zZSI8{w;$1KD;7t8c;E5I@==A2^`pIV*Ipfnk9|2mcl*4158R(?JE?ZG-RJRJBhM8+ z;lRWg+2?O&t4&_$zT}7f(Z!D2c751WCv;8aipjxR-KUErJ*Jrn9aEZbE$$NWA2 z8N$lCrhjYMbKP`*=RM93rOny5leb$f1aX$(l%6IQm^i4mqPvmlqqUf>JV!e;O-?F7m<6N5gUhLk@N!HUc zLZx&18XhyR)7H4<_1S^Z=4 zq%W78Rr0lWope3R)Arg7y`zWL?sk;kA>iHQpkLB)Mzr4X`y1;RrymfsGr%rh4p{&q+Cv4vL zW8bT`K2KZo*WNbt%--7?Yu7GcdinCT`^DROB9kK*SKM`0Zd$W^hw0;Z?)~Yr_cQ!x zKD^zkzsqg+k_vyn9hYiM6L!_#e6nTx%*E$#seX<>KQTz-aad>%E4x<2v%PKK%C>Eu zm*T8^9I~hO(LU$Db8ONd-TagJ;neSteb@CCeLUA#zA}qF%1ve*0g ztv_Mc4u?&4D%r5D?QVoi(}mdlyp)+Uem>S!mv-O|v(Jnd&6>ycL;BI(c-P&z_Q72@ zmD;{K=;owMRWjyjGHN`{)03KgZl6HruVc#}`FFixvZiu)<{#~($-#kS2N+8Y;J){KPR8>oU&U`Y3hTVt^Xy4kAx_SW*>{&zCvn%IU&L9Upkdl;YI`s6M=$@}4wnfH4h?YVXA^RZp~-)#MG zI_mNs$2GC>$CvwDH|Je(SLdzXrR3eSo}_pvay&U+^pa=xhIapCQd( zqGH=M(+yW`tb2n_DL-3#bnn^i(~H?>O*$&AyyNCnhSe&~*}FEVhc9{Z=jvbK`0Mko z-@7!qrhE0?z4hyNZ?BK|_~K*z?LV^l4^_9fUEJDnZhNtXn_%Wik)BtpJXG%<@!)#W zdt@Htx>GZ`XD0UEESnbK^{dc->DKG>f7VZb=9T4Xx^@5h^Pjjh7EN??6$xN;6$xOU zSyTQa{*ivS6yNM68T-^Pf8pkMcjVX9c`KKAu84j$QR8Rs(};JuH{5sgoRa5b2xIkT zK2sC$ciNhL#ua+M?jQMe<=Z`z-4|6Z%Ex}Vdv5X4o0YC}Cz#Ebed!tK{bb3yhdZC} zFut4mX#SRa)3g3bMxXflUgT!Qyo`B=5ATX^`#ST@toMNhJ}$?FHKu1u&pvxsYSC_k z18NIz>Ta*MUAuhtt-E=d(mQo?^LJOjezHA3w(m!UeduN3Lt<6(9nUX+>6QF;|90H5 zTP?TdFp0VJu2UO)l$$W2Lz>k}c&WlyZAD^dkZF#-ax{d4|TQdHbY-4->o5@$teQVbv%R~Pe7R{8r z_tazd_nU^5HA&n4GfY1j_wM(vE&HQ?72AD(-aA!Rbn3Kuv;H&q@16L{{6E8{_*VZT z^Lc*k_PSE~SIbvDeD}3Hldm7ymaA?(^PN-e(?;p_S^_B(cC+}dGwLzhC>nh*e$)Lw zir@c9?vzvi9eJ^S)4%vPf9I9oc)MQV^|QIV|J~fZ@4mjwyA#jX=KOK|aQN~1Z|8e* zx2|>e_^98hw&|j&cD~drq1#cM33e{VnXJ?9sK0x1c-<$-DN`lx80{0TyA&_DC-~8N zF6Z?}AC_M-ZC?I!;>&H~26q(pY~C}^#`=!czqe~TEQ9xWA3HnWtniv(_V4}byW&>= zx^!&vrC|Tf`{$JI{!;bx&w0Ne(I2EtWIf?e-z(rXY^72fUWz(LqAp@+U0Y7+onvn z@4g?Fe2j8jzDLboOFMXl&?bh838%t)Dr;ZsKdhU%)4^96sj{bz96e5K^c zN42f1GjC0qRkK#-=gu9vHmR{9dwTe;1vfA#FRkqPKF>-{{)2ei^7%)%?+WiQ?M?c3 z*#FXky>3XkV+OPQY%lC(+1+DVs;wy~wAA zGnM2b>kYdmMeud0eO!O|{14wf;SbJwJrDTwi6SvRX|2D4Z5BX?+@U79~h`qCe_if#C#pM3>I|8XY`)+-!u6SBx_u0s0$@U`8 z_c7DoMn3pm8}5EN+V5IT;L?iI@%y%Z_&4d7c~_~eoy0tMU6p6|?mbj07SXm@vPsA# zL8gaGIxp+i-SvBK-S6Eq`FfGE$f9c})%JOP|D*qE%Z*od>VH%}`X80PS+e@R#Eqy5 zwnxvdJ$mfk?yUCrh>^;?sC&}AtNK1R)fYrCIV4_N^ke34yPFlSV=k4-8ofGx_-(Gq zWU{KWpXXo^ZvN}SYABip8k){)i0;tQ~dOY+iZIN z^4i&v8EYQJtKCZz54(N*)~%PzE6R;_#_I@#e5&RZKmJeoN9@Pwu&p0OuRi@ZcU8=Z ziLtN5%@@2UI#-IbrVc=~}Sf7bu{ zd-vDk+a=16IrphmEd8p;BFEiz>zu5dg1~8$A7wkIOehk6*2%PuFRUZx`-S;8=P`~8sUrFs(O4f>f`gJq1oLQ&9db#Y+z@w=_UIIncDrXhN zEmy3)yZ^+~eY}74KK36zFIaIn;3I$c7JI?yioQRuHbuXjCpX)#LeD4Q@Zr609vt7q z`qS%qp?aBJ)aFP0Ew`@s)QKN___q4!v;Pc7w#{m(TDrXB@#W+e*FxUww{$zVhuu2W z7-S~h_LcY4v=g8CZ{801DAxTkw8pZ&dVSo>UDI2GBhs#KZC+_|dFi2J`btS}pSTxn zn*5|`)xL0!Jbo!bvYDKZ zWvj!i)@@!D|D*8v=K0?_nOu1{-CcZS#Y7iIW6Kjs#k-r%e=E4GYbRvh()*>n>NNLC z=_BT08kbD2o9h-A|K8xHILR&H!fJ+qw|DRD^lzJ`w)La>H}?-AZJ#5qn|@4|d3FBL z>9MSgtt`5@o;$G85Gtb25=>eFpkgHFAB(e0_b zRa5AZq;%5Znm3KP_BE%TET1iO;%%2y%sKvC@l)4+Pl~$zENqoly#4$y_x@$d-IYIZ z`-YABgM997i_;4izm)oL?JK_)+cqcVY2Mw{)e%OIEG89maKyarQ&_OA;+xcm_)a;a zfA>E4AFh{)cgmS+ef(s!^?k_KtoTRu zwTsu^&EGLsZ?Ao1-S*$uZ&F?HtVz4&rcS2TbL&_~`L&*bXRV@FWJ+gE?s;s&cJK&O28PzddHrv#^MAhn&(Jsj!vgM*(0BxQ@o&G||Em5oJYN1sgXyYh zI1(4xV1})(gV~~xf^*ZkqRj3rcx-Xh6TBXZ=koq17W==xSO3>Cb#H`K`JZ<2e|w+& zXSgtF`wHeMKPKltl-|pA$zJO2jhio>gEmcL-@o^_lJtq^p1KaTr>A`mG?@9b;}~CI zgw^uH_uK9Z)VMy{9T|2$@cPU(ew%swRw~6tc?)g6<#kAtIr@xfXJmNt)Z=FyB%ao1 z-{*<0xPBl`;(|VBbguu=sMy83{yyQ?j6HlOUHMLU^U=)m57kHH1#L#9 zX>QWt*!d2d#e>ZjDpsyv%obf;AehAJ_PFEvk5$R(lIEAf;_@zB|9Ji1-OpP+Ew<)A z-Th_H{?ng-{!J|Y;C{&bo>2GU>1DfmpSGAg{gUopoAxdGM;^Q9q>szYIUc02MKUr> zoFk+ye*8ZJ`yTenhrH6hAI1Nu6lL5heZrYDZPvf_&$b@Eb1Z36SW~@Gns82{%F=BD z(M1iuHQyKf@%uP!;fJWZd-T`d$(k>iYm>Ml^Pa@z$5+#?y!|R&aZ+FQ$Y+jOukKmI z&hXdZ;X2}|(tB6`NPcIW-~~J8Kc?6CkF0e%94vKhTJo-|e>`EbY3&_F%ZuhP)y7>vB;`82^Vvrp zF>kT;rae*F=hhvlou^*jXE5`Y!KIxiB%ezjkoal+m~Xv(&p!i`TlT$QUwo0C|7J&S ze|wEY$%1^EWWpLk@1=bW%JdsViN`%3QDeu>yyKjug5{t)|Y zX2$h%(}knY%_`VcAs|q*tbR);SMN?e>Gi7Cr#{>~y7cqx`e*N-mS|1u_Agbf*LFMo zCEnJ5Me+~n2Q&T@)x_+wX^#3Gxc~U=b#^E8dg>x~ip@)(psxAp=f1c<^`hR%AL0*L zrFZFX{q4=Yx_bBZY0Zq*>q}Y>eMxR&TszN$9d;p+~Y}}Crv+v&0hVZ>-Hn2 zti#9bG$!ufe&KIlY|+cN9QmTBlY&neiQN2__UT)4KG%&q3;K6!B~4kW`K3JU%cgq$ zPg;IgQ~6fi`e(oL`9Gc?f)Ca+WPhu=*1bH=b!B9P-PCN;duuDpbl*xYPG-qAS+`Qc z(7iQ)>Fr~NId6WrKit>+Al5x%&qc-ETdcMoj*Yo(wzyb#)v0sm-iz)&anT`i?#w+) zV~;%Dl)Ze>s`tM>U;lgs>vRX`q8V(MPyD#m{KNNoe(XPN^}PG*UU}E+i-d1xY|8qz z=+<=O9HBLVyIaex1yVENv zo_Rg9swz*MIk?2It?$l~_Oouft)kwKWA`mhejJ;9=$GH(ht=nWDo!8S#kOx(aqNkE z3YVX>+|p**7I7j&G8k$m#ojM<<&W8OE%>o_&5f7;bUr+pFJ!9Hb? Z=Pl;TXbo;$G?WCZL>>uIu(iN zPr2CsVAV=Lai!2XBEN2b@x6UI_FH|GUzP9CxAmXB{*^`N_1AoeKX{+Jf<0o#)h)BL zf0}A8_^p3yt8d)rz={QTcc1FGb6S0>+#A`6Z-X2jH^#25+5cPn$H}}u#cL|+{Ds!{ zufObXK0QwT(x1rERX08E)m@kzdHVNWs|U}OBsT31-IdFrG*k+|$mbeC8Iu-}Rx^m8X8=`GYs_9-Z3OyCUQKBhGb-pOZIdsN5{n zJyf_eX;;9fbcV_tPX-}t`J3;LNyRtrlm6rXao_fJ(PtkV{m6Q5x!C^fte|VJM7M79 zp1mWvzy95mo!;%z+{`;B_!VWUR!z+J_-FR?&(B@|8T4aw%vImqy1ReZpZi_g{r;Nl zUsRL+;rU_fe*Vnh8kZ|MTX@Zm#j70;+I3y!lAL*oMw0iGY3hfUg-))M;StETK0crQ zpYZ9$ncMHFe=0f8dowyZbHi=V^>=P$3OPyW?lFp9^D^IPT54#d;N}UI55oC>dt_ak zWqwq?Jx=NBAIIfCvJV~hyz=&XROnS@ocbVn8#WSO{YbPoQZn$ebO*uo+ zJvQ#ot)Df~KW}EfUOR1F$)>DcD%bpo~b5ie?52B^s-%T%a`)o`I;RM zt*$-75gvi}Tv2lby;mwef^ec86l`(}N;zh2`< z>_h&RUFN-8U)u1OFTMMt?|ALnuXC4exxRMknn{)YT+v(BZOgjEd(7gR!5Je*rM*sH zQz_jWo3wDPcUk=6+wZPle{yF_``N1c*uDI#7yd|n$SZz0PUF&jpO%cOzen%>+sOLn zypHjn$A1-PpGn>t!N1F3@dnYOKipk@*rt9sd?dV1_M`3U3a@YeKW6=ybpGgfp_4nA z7Vxj^o~)G<(8Vw%S6<_Q#hUlGn2%hu7v7(q-?^b8D6@3i<+xn^pzT}Q{EbV57Vg>3 z6(VjI_w}bE?;M_IJKi_VQ~vPqqyN$Q;#qQ9AHHpVz`rpw@J_|lq_*>()1Da@L{EAv zm(=F1F>`zEivt$Q8EtPR?dGh0_p<)o?Y!2q+RFL&YjdC9U-|MLmCog+Z z^>+FngF}+56j>_my;19}VD!min}*dqk1TFyo>WMgRo-bQ`GfnA9A~@hzG>fm zF63`r&8c3Qw|sZ?=|&Upe38$-&+Mn2l`(ucgJEH?mc@GJAL)x<+Dq(Dv1h;YBR*#H zm31YuYVVE*TlqdIH*)9ch^lsL4E(;QYa(ys!wF1WCw4wx_L2RF_WI8HP3A{5^YRv7 z*;+1t^IEu>d0@<~)74rBa)Yi#JnC0en`Ls4b&^Y|-tBVPxj&!gUfq}V>i!3-_~jLz zQJP=(pZxltq51mx?fXDi(7pWlZt+9T`Jwauw_GTl@wGGe?LoGWi@RL=)DjM7v6mJZ zl;<$ZJloB6_0fBRw7o|B@m%r4a$G->mu6i&|8TE&{KoB@E|*kaRN1`dWahIie81~HspLQnZ*kqkl=|8E zXL9-(zU)PTPuDH1-ZR>F|M2$Zy1V`nlOMJp&J})CH*IJf_>gLvL zuS&k9Hfx$Wr)0NyUBR3MKhvb?cZTu2H*uK?ER4fiOU+$XSUF#jQ zYuEP4Z>!3de3$I<mS(<+Z#%|d!s$K?VlVpi+|JK)gf2kn~Hbw-rlpK_spisz;_34%3W3Avk<*? z{q%o^P5XPMy+3Ysy#2{XtslSsntZM-eN?M|bb8dAcb$H@@W*)LRwjnOxa^@n3%a`~{cxx{L4q`g_`c2IZrr^6Y<< zuI%HAc^UO=N8*q2#5Mj#ckg8jXByn{7k#{;&w9o!F~Oq-sXm7CN)NVO$jbFUG@o7k zp>6NCm5(->H!a#D)!N_rRbTp^*M&0&j>+)`c`w#V5%W=5AaKU;v&?5X?jLg>@;Be# zxWDm_>!YvRqHFd2w`{J^_sKfDYW=%;MwbLus?Sqlz0t7W*pNXuE?n_q$z17&-@9{a z>K}T1sa`LX9W`(5qgb*Pdg-ReMXQTE>L1;mJm-DKJi{O159_6Wq&+|OPv^t4=11A0vwY=v)1$jC#rW7P zuXtD~VyPg)yJg}uhTh{g@o#=V+N=F={+rJaSKk-(yQMd^e)XN`m)@p(8and6YP!vr za9FY;<7tbA0xL^8L%8{GzaR6xf0RCY>wL6I|Hz3w)*rR>`IkuLx5#m4PhR(CR-|H! z!;7?fBe#?@JZpBIv*}nOqAHK}Z^j%fl`-NyaZDvy@@xLkI2jXG5sL2 z?UDT!FPp%1x1X0!&rSuFHWxSU>umdu9Dz+2`7_IZ-#)K5;AN z+#4Bp>*|wjM>d^Sy=;H8Yx7(8+XV}qF0!Ue{5*Ac{oR^(^Oydasa~;j-RXalf7V=I z<$l3R{P5)^e|$dr_oQ7--!naO+vZJeN18T99%d+RFc&!UUVCE>pIP5c+t@n+F;Tvv zm)uo1FVEx3ob%{!%TC4ZBO0H}7`4RkZFaLp{3S*z` z`!0Q9H=3_^JY-kXL!L=n)&)IP6;{+cQ?$;2kBwdQN&TkzN8*`(EIwv0QSzSa$Lzzu zHp`2cz7d;c`$stK`Wv&rpDQMA{Cn?f${n`52YR0Ex-b97;fkHYAF0~#hq-INcU=$J z_3wPWhqcvX7O>bvRv*&a_e3t!Sl-uG3^@nuV9ysj2dDGSKzqZbmy{dawtNoO*yqD^x zyZhPQdD+kU+m;{NHlI_l#&um>4a=4f&yV<~)rx%&n(xh}dcR!!=E3wG?HZGiMs?SoPeR%plhKtMR zy*g%{8&$nm?VM-IO#S>b#wX6&Y+}z;VO%yvydd&m{*10KtA6e#eb4ONxl8LNWq;`2 z|2Tibx^-)Bf1NhJ{oaj0S5Z~;4Tus`t`+RfJl+@=`|SX>V!-|%el?!|8Juw$u=L-)#Qo2Y?E51BshER$H8=iW-TIyLvq1Xfv-|_&MgKFPx^(eB z;rT-K7Y?ra&+s5;{-03!*UzN?)p);m{I@*(Kg0ah@6vD0&+-p+zZHHZ{Am4$IX@=< zYvT?7&%m7WpW)Gp{|qh9kIesKkhTAZwEBMrQTJaz+x|1m7y5mne&yBw49{Qvp80Lp zskN8p$J_j2`gm{S%8Iu4K3DRe?`vFp=0sl1Zf&-}!*T`ACEwOBRJM%zy=Jx1^4W=D zT9Y!ZSKBK;DVAaRbjSJdmCUW_rO&LoG}gy_#x@$mmWjMNbB(O^Kj-fM3~`_8UwB4e zVLbcC`Qx|QA1)u-F7<7t?3oYlI%XfEBi*0*7usA}6L3^&5@Y(ri*6n=PbL^;OZ zzsCvNS!|!-rgGt0#lq?amEb9rlA^JNdp%2kPDz~d;CkPW{KNljKm2Dnw7%n7{D;mT ztG*o%%hav0eApWtv*n7ZwM5+Y(`D11WpA1nIlY+Ww@c5eihnQ8`L6I?d+G9rKXqT9 zF5Wl6^Bm`!d;72F@4g;* zJ$U|nAus$bKO=`*~zl{;_oii-P(r&-lL@>Jb=_uSfincyXPLT>JQ%&cWx9yUUZ zPptcSe2Tgl_NWgd5y&q}Me*1|B8Q)mr^os3wU}Mvi!nUdL$vclr z*j#^e|HJEmYc-}Hw#Q0|Zh9$JC-G5r=BFpUi?&aD9niY4&%i@xtxVCwE4jBUl?&Lj z?mbz4a+UAxce%CSyr1WuSy_Ft-)_3se}>xmcmHP8=JQ;=PVi zUF~-CH~y2m@<;hY`hi_?n;ExW{c@M@)VX)eN++M5mEl;od&*P`g`Ot&h3oFkd9+wv zgmWy^Zr?0bsyPI)L@HMOJ%$ZM_;ZHs)?4q5a#r<ob4$-RMqS(Zaj{po_vJsnOMX}#KI;AG z^i2J&)@+wlb}%j}+uCMxcc#kJeW^3nc+Zq{^H{lu^~399zwCSd$-SIDPk0`${9&o@ z9r^3F9^1Foz@PX2ty?!2^`;ul`T0u1NHh3mTZG2`yUYIR|KR>wwCUc-%z|l?Gjsnl zu*EOx{S~`6H~F58qCBt3gaV_U*;};UB!O*w64j)@7CjUC%ip#Zq8kM zDDV8Oh<#FiaGpr9j^(DXO z7u&=Sv-qW#w?El*dFxuw>E4%1W?Xz%8mXt}6@6k=&DPI)oI*8c8{~EV2wDASX#39~ zxW-QP#rE=T)533sUhRFdtMuRWm5Wr*PCZj`SENz)!W^RwDP=)xmI(1ywExlO7kq8M z+5GTb))LEirtg{W-*hirVlzLb*3DWiJF})UW`SHz^pf81T9bmG#Vua?!~LPY@E@%o zg^%~9oj!Q^?AlkedS=a={@~}=hysXjlR0b7?meLYeYWK4us!cL z-9D=qUHg6doo%n?{=J>|{Zrl7>!(hK-(TDEgo z8V{@6+C;-;h23vW}9azm&BYf^`837c(w}bhtu2llzfVNwaaebwZnehehsEa z4&~I`mYtrpz_RE_?m?N244?PQ+R4ARdfsaz`{=hn=e>%3+`M-0{r6t@@;1m?drGC+ zmYq$ZQ=fHuExDGp&ivE*LpxagC;iy`&Hu;J{13%Gm$$?VW|-evyRJET>0Ghb9+hW= zx{vJKm0i8KnoDal%iVoOk+~O`I9K-F`(A%Ie&4s~+}e95%Bsu$GwAQSUS<2H)^O3E zxJ#zz<>M-Xq`iNv%RPHY^~*|$Z969MYoB*HJe}3%$EH|Ozq+6srWcMrlvUCfyHhbY z=lbumYKVTXANy&80>E8CE5@WnW!Wr|9#aLHOYZ?U%W_SE9JG#cxgB z-M{(vX5Xjoe3y);YZ_~Q>9{C6>396R-fer<)}Ida_TD#t+Der_@AJ?5&D}`B0drq= zpH8gIwB0SVTFWG*Z-HRf&-@4M2i6Py@IMyEo*RBR=!L-#bN3yW*L{sFHQCu6y~E&k z)M4E%-r6&gKj}6blytmLFx#%wfp2kyZDuy_&P7_xjhr*)MA4ALmPE*7MgW zudbLJHFNEkwR(5o<;e={&JDa6Qgp_Tfq9aL=<$mB$5Z4KFYWPo`H){Ys-oH9lkff3 zcM9A7T}+)>?(}j~S?kne?`rOuS>!JDSo6LsPWEH?G5O|s)*pVqF#J&-VQzGIiR#ra z#mDO=cuk5vbEe^^x2BIsLL}ppaMKU{2Y!MhspBgiY{h9qhr_qt^Ap{Ytini+rK{Dy`O$@uEO7Cd(7(O zE7FfX_Po~Hw>#Nv#kPGR@)?`Av@qLT<~Pq%zOysp=8fl1!=B5!A6+k+`H%NQO`Xz} zEghzFjs5m+o%i7EcC*5np}x1;fn}-@5bG>b88m6SK;&%VpL*&v`05hoAR8nj&gp@j9-asp9DVw0iL$R^OfWbO(PF zJ#Z=KQig8S2h~f5MZSmUrqAd*p56YL^G>qMz4FO(qrTsYmC8OJn))|u`~J*mx8K&w zpFaPa@t?td;nwpUFaD`rG4X!ubuwSCBzkl0^4+_WB+vMZ&)gzocvHD?hfLmyg9q2n zdTdkr`0EGO=glT3zMSrjdws-ePn1pIfrmcYPd#-eZ>w6Q;&c4Nx^v4HZ@d0i{AK=2 z&UnAvxBu$b9_rFqkO*6GX!>}*`27mIL)_i`oUiNjr$w4c$MW3jRF*hdW^R0_v98B| zYr*yXTjou(;aysB*!{!wu08$_U!RsV<$Q0l+QTqWBxTLKf@v8iQ?(u}adP|V$0%{` z{;?f?%O5r_iTPtR`_Z)CImK;w&$i9WJA@CJ-^vNP9aXvZNoL{f?%9RS!a7BMGpz%g{+!>N9&Mdpns?jw z&t6;ovWZ#O!*8#@fBF1Bxof-a7F{guwinE}Cbe#^#m>v`*FN60eWoLm$6aS-p3f%~ zKF5_lioYfA@L_+io{4m?e#DL~8+UKM{MEMZoi)x_M+4(UytfwO+TeiVK{ zUhwsQ27x`%*_jV_u3z)Vf9;K|2^9-VUHWeOY!6J@zR@=)HGGnj$rD4-&SNQ07au&g zt2$c0?LWh+ztY!pt=@bJ|5jdo`_F%d&~4w+cisFGy=CW*o%V-+ZjsG8_Os+?sfDTc zjD^wKI~G?=aunwDX=$6wepae)hq`FC!;jMs_{Fj=WLBy7)tIk&RXjan&6dA$_bQiY zES#gIJgICY%Yy@XfBlia_U9xPtYw3T6hUNMDUi?ZvJYVpS>SNjP zz{@85A{o1$EnT+l^lMx5fVM~3Vdo6Bl!e+A`e*RDmbkRCOm47Lx!zcJK2G3cd$WzF z-LzYF+ZWW>Y`W|tW|iN$&gAyYx9e=)v}~DncIK@+YH$87?S7tSeSP=6TWh~gJ-S(F zwMj1cM2F71&afe?37>tOD}V5NP{h4E{*{@_GBSl@ z?jQNQA$pEwC$pEK;hj5dCzFap{ojf%iLKB+uUooWIW@`iOrs$Fr$X znNPP>mp}BVzsJPP!s@tOTd!0$=;`)Z*Z;9EJ+ki2y?5Wg{#*a5R`P+q^vn5zf0UP3 z^t)&Jwtf-|+0k=c$8SxL$CLPnC8eo}H&(9k?~oUJYbRULedt!~x8}9+&g<_VUUq!t zjicOAx6W$k_V}FF+HjKno%Wovb^Z^bE3vNS_%S>7-vy)iTYAg7zoqQn zTbVxj{1krklaVv#F-}fAz}o8i+w1V7wXS=3`P=qzuKBUE=`Cx8W#X~Sn;~5<&rJTZYyV009n&^UuQuB+U2{8H*1Uh8$-xizBb#KC z58e9pLMSFW>g1z}u&Zknrd5hrT)lEm-QIfDqzxM+pFCDuN5XM(*{An7KCN z!^{5+96^tdMCCr6daeI)Zd9(Bhsgw$F3)bwsmpnGl-`udTjQYE&f@;j`a$2u<$s(u zedKTc(*M?StMszn`_sA(CWve6xUuc(yvlr}BhS97>$~H&#E-LP=mt#>yZ$u)b#~^y z71Pch-&FtnN6q`o3*YTZC+3*B)h6?N{MD}2u8Pj$*?2s`HhbcQeM-FbkJ7tpRQYCF z9bFI|dD?76Ol6R-McF5Y*E<6P&u>{BTfNaGjVq>c!s?#+d6~BBg6Yh-t|Yb zeak#dY?BXPkJ4LmEB0>Rb{S2pQ%YSijUt=$E=^XHRQSrm6Yl(IZQ0|)RkBC_uIpU! zqwA6FLX)@JCx5#vnP3-h<#8d|Qgsdwb6ntWJN>S=dtyK8#S8pkj`=Wk+0J!ln^Q|y z-hFvl`B-jl!lx74cF&yVbLj1KMkXmgCs{c>e0+FHTAa*EOA+q}-LJ0Xzq+*R_xl&zk0w>CT|f2ie&hM&Z_O9^ zFS>m?|KWbYm>*radH#I;T*pPXe`|_&=bZZ}&A7WjOz@fb=WWT0g^zkKsk>Y!e(hZM zw_N)O?s_H1qLe0+e`ltymphrfgUx4}x#6DpT|x{Y8x=j09}Ct9e$0{M-`I5f;hSIV z%?m&F+&WXo`r+8p%;m4HZSzgvXtR4qTA#&JbJeGbvWF)0Z}4!qQKf7Dt?GLE5Amgk zE0;;ov_g;0IF1O-o%~`X|#ihN| zwfs~H6jhAooY?D@y>0!x^)-@@eBB?;Kito8_r;{hSb34?INzty4o%07oqM}pIlF%| zzoKeMn@FPkiIgdCADe#Bwa$2dcm34GzqV<=+G{Vp|Dyi+6@jiIZRlHO6Svu*#3GfY4T{yeAjpJAe(@*nk`6aO=uIMu)M^V9Np zZk^tXAGwb%b6j=LdHGD;1@mtBng8rC5@|lixlU}`9_vSGAHJ?zFXWVx>vZ8Zr z{>rVY=n7qAi`P&({iq|>?EZ@ET+4>)HQfQJF(zr-bFc)dDX=|*TU8>@LW|z1O*cxAs(pZG9wLe`w>2X<=p80%O)Ly%YH8VM>dP z@yR(Ga+fRoy0S6y&dne4hqp)NO@8=u&*{jNoznx`?wop~l9PN!adzWv({u4P?rjgU z#LI4}m{r|d%`KWk<$_F`Av z_T^c#@|tbeeB16_^vKuwtPn%-_P-Ig&)3yWJ+Jk{^0#F1(i4wvZN91z?4NPFRQLX@ zy=SlL_}H$gjD8m6c{lUD@4CIS{!OfYBymMI@ZpZjTeQ7{iw>z@PTnq@kj~hXvO(hO z`xw;{Ii+h`d?Qt>Wrb?(K6(1|noPe}G@;1j_QvXYJJ;3isX4Sn>ygYtz0}Y3bCQ3` z&EpbU?&?>Cd*x4+M|*o&Q_4|6j}0uRit#;iv91?0-^i|Mk^>hF?PAR}KCi zjNev}{--PXpQilekN$sp<}XrN)TRBde*(54D!eBja(-O=NcQ?+f0q9Y885E|yplTm z_V2vO%cEm+AFE-@IM@x=!}v8#$$qxpUv`ef088>W+PvOVYM%Ys{_L z+qJ>;NRKy*^^DV3H!(1axM#-K&6^edye|7^bX;@w@$G-?A8y_A&i5nx@i>`}ZjDPb z>r8)mu6lj_*0co|U&aYteN%G!cK5M9eZzHuQ4bgN3QBIebW)o67-It85qT+{g&)*< z*Hk1QxTk!5%kjQP>KiV;U8^|R`?qmr&`~X48!t)ONG7+KNxfSH7_~nhe-PS!EdGc4 zx~H*{TYb4IQ@6wmyg0e~hR3F5CxxQZDo&?!IcqQ03_Qol-uqGg&0PMDl78NZ+5McV z`=+K=W~xoqZ&9A?&R?i>A}@DM&4&qoI~1%XE7DG=OiJ9T@-saC@}DpDtrM=Um;TQX z^`F7F&fH{A_;1&&vU}=3lI2@%#8>^;e0ZM0wrL;r*Gio;pJu;l%f&TL)jPZ_7$f%_ zJ$AWOb%*pn#lH);`1Adk^`F6evg>udHzuoRN9!iXZfz4zUK$gVW9Vv_>C#iE3<|Tef_^vx5}1(bPoDhBJjiR`R}B6_kIWZ3;b}?_4}ySW${?;($i(m45^)V;hs+0 zB9hg%)vQ`O{c!oum!G!&d7szjJ^SXef7*5S&woU`RsC_9XTQ+ac-G8$!XL%9*Dl$* z{9EtWSy4-uE{f56@g#j(Ng4y2&WbgUGkvsPp6bW?LVtI^mH#kN+W1?% zdj8En9X0#+?s#|i`hw$rRo7WRTz@dXdk@R>DAnuTHkPY@Xgzx$xMR}M8ea*-TL+3^!rPG-M0&+ z{(Sk*a9jTFnm^xOWmepOwD)^=onVdU&WkD^`47(*|KWBnd*<7`2OFook`%jfUcmn2 z=e&<63{MGFC~G;ckN&Xq&8_LJTdGe_|E#B{^Ke@D0qL0&gbFNV=3RYK&u^c`EzkPm zzEH`@bz5fS?8)UiXgj@ai^(P3oGH;JyiZ+JCQswtt>byFs&C%(56>O1@6ms#EvtRR z>TKKY>Wn6qy_df{-{{sEqLtUXZ1(d5Pn#J$ABHbAO+B;!aDHt5uS(9lpX}~j|D*fU zc-f*ZEiY_EBD$De#gFa3J?>VxKRWYIXw%g#_52Yp#l&{CtzDV*OZVHdrRfu|hWwp5 zF;_XXU$@XYhxJOXh3U$V$&a4**vbC!`mncm!p3VQwdH}EH#K`SAD;fcAbILl)tOsO zC)_#`kUzog1-Dj$%!I7_N8@jP`_FKE*YnPGU+P7iAHKF1dvP(tPHFpW@82=g#Wv2| zwsp_EERNKhg<5&3Qwu`goO*KWz{PZx2jF`+0(Z_PW^R%yKnBIhlzhzY)${M zT5jI?h4qKb)jMw=R`!fY`*$@|Xzz;s#+CEpr{!H-{Ateyr$4vP-(4Saq0TbHPBEWL zcki;r@_$rjwJlyARk`t)%}$-3N5N_`5B+c6&y-Zxo?v<0Q2x#Hf_Y|rFMiC8K3G^W zbx*8(`+tVb`|C`_g8cIK+>xlcmf2Dl_V)Svw?8xYk#-`1a($}EKkd4Se=2wG*(*9p z_2af}{~4O~FHir+y}P#j(ERRi{~5${{FX1PD2}@PMRM`yoYG$BH~UtAU$U-jQa>7$vo($1ns#0A~nylH&6qhpHB zQ9rlG*EZ?eF}>pdkbL+a*YwD?!$BX`{xP>=ThHV6Fey5+bF;BaS^nP>5l**p8fLM{-^G~deOh7`=NKC@o@#vj=a^N-9G zx^`6etFNtUJ+FEDR$jkH&pwx3-yVIslBH}_G{>g1k};oZwXgnDuF<`=CvfeL@*{OJ zA8q7hKgRWEmnP`1`*MEU?eqBs%Acn$6guLZ-=-eM->Q8g!DC%;#NT-}9`kwL+R0Sp zA1}M5-_+M-;(cSiiT$=LVP$QdOGgWzFLGDuI&Qsa0#gyI)2{xx|6=RC<6geLTzx5X z%e`y=8T5~Rzxj6Q$+!P3UrYHPJ$;_<$7ElrvS}ZEzLkA=x!ovshrjyiiYGy*%b!(F zdaCGi^l8oWS>Fy6F@F8JUirt$-!?Dr*Fb-4ucjOS+v;_%`u@eNGIMjz%zT|sGVW61 z$uAgAuAgJa_V4nn<45Wxe^eb0t>wz*7swfEuSMhxFuYX;8?($>( zBa(JvHHIJAyA(FZy1hEYkh@m)o6hCkI)&N>=R|g|^LSw#SNdW5WA^SkCHKev{Lxwa zM6UetUgDfxnz-)9jXTR`+63EN+)!OMa|y4^w(T5+deU5VF%zu)1^;O8|9JoKeTg5z zN8{8chqOoU+)-`4J6hxIlFJEcy!k>69upU)vRD`0coJ6MS|{>j_2IRT-c~(6dMj?} z$?7g|L`JLj!Ya?H@%zTL>@OwGq>lY1Ait|)(6cJKbBud!8SIqUB2zg*{= z`(trK{Vn-pCqIZExM%h6^yRqM!5b&N4w)y--Ki^HVCc%O}Fp^@uCqb%djH#fyTT@w%A(Q9)>dg`NlVj&Oa2pmaUNz>y6 zww4u5UzPnn%3k&Q(tYJqQ%`^1zx?O7*FWT5$}~k>y*@R8<5%$NUSf{|SYEx~{?mU3 z`-7dak+yID)j$0KI`vZmbYV=Os|e%C8qhgI2TOJ=`7mwe!?#i%-6zYdA8A{zTjVjd z_>jAa?XQgm46?ixKJ{Cc_tp45Jj?Xld)tTet?%a_5|`86S6ip~eEY6G&6d3g#`Jd}GA#J~M#PVS`TSsRc%ddGOQEvFNX6|o0-`|=0xc*N2&+w3c z|A(_DKHA?p*Zp_;r~eG;Z$9rko?FQtZ}Rs3x3hP@{j7Li<`%GPx%B&`|DK-t=l7rC z!!@txAJ0$EwywGt7H@j`KLdZ+@Aa+r;&1=y-I(j|^WoXn7{xk27*&0>Jv;49BXx_AsC;ErJ?=_R%zWzR)U;gR`?cO};7wcwEu7l4GZE`sC zh;4#jB)|Qi-25YQ0@rQy zMU>xs3!A04@!{{;r@L>zTqfu$bzb(1V(2Uv`Byhq@Z7XzU-DzS+qJE+dYA4WzV+%y z-ZSmfnM-$=X_lyL_Y_^)pe!JM#^L0iqTQPql{Wp!{_y#qaK7jdQccpM$n6KeHwLkNv}S(`Vf}a_nR8ymj~cpYQwU z^I)IepMvW)ypPVa{wR0Za(S!t>bc=Q+dhh3d8s}fba!v|p(7>b1vLO5gH_&W_r6P5;Tp>t8h%>@quMz_7jVrBd%Hw-u_PcV@5@X}dp) zfAc-{ac{QA6_eAe<@vX+x#U~C-ACx8%6b*Qyu{LpHmf&2c`LkOLhHn1muCf37r!sB zcUzo4seF6%m(5pZU0m@k?5pqn_hCOP7hlhuTsbfJXLQZo0R+b0*()MtX=c);v+_$&HnA#GdoO*2Cp?wt<4|8;a8yE(>&Jr)C)n?V?Cpv& zK2yh5@%U(*o}I#!U&d*Zx;2-H9v5)23=P7+`Tlh#` zJflu&+eg`-S5vlMwy`yv^C4!&!uIy+n+N+-xIaxj_sH)}&DQiI`}u7u*VoAJ`LHKm zaPiiUv6px4@2cO}@b|D!#4&}gjBoQbI<~KAY%_0?mQbzp_`W6ob!z>w_kOdPHcioU0Pki!g{af!xxx$C0eGAWAWZwC1Ph{!C?%2HZzI%3EemCu&u+Daan@;Zw zwVBrPPnab9OuOa>+qQT18FTZFT|XE#>CTH^``b(PwY1OX-S*nOefy3XcO}w$y-(B~ z_^@cY;T`cz35n@Ey!q+}`UU?p9MXI!o%qO}_vJmM54AscUC1|?&vTk>K8xDjx3@TR z<-WO35Siw)>0Gx_Q4k`gzK(lHI$@Zhj9_uQl8IFFg0pn>Bl`r)AZ6{OGKF_}BWN zo$6)P9o??GuD><8>vZnZ$pw=#@}B%Q$=wzre5(Zun2MrIOrsB5M~zmq<#9IS%p`=NJimt#v>a}sK!~SFd5V{ zWna)ZMu|gL#BSA?HGj|U+8iI+a3J7xhK`+Vl=kzTpLd)|%_&Wf+r4K- zpsVN$8Q5t}jLvyn`ExLTjzJ^Am>N49ygV%MGr z_deaWRbOV)o=t9jy=p?@huD2K2JYb6{Bi9bM*YLLf6I4P9h`3^y>JF!JO6>`bTL=DZJy7B9^h5FSdG?B?!-ww~=F7h{%ja8I^=+T!{_E>GkBJ1y`T&|lQ9v4;mS^IK%b=7V6=j)x#eSSQD+%NDdD!f@| z^5JW7#3ryIOWyEvzVTbhrL`aJkJ=gkV15uU z{%TsCMCG~~@5IbiFN<#Y7c$R(yW;KQ>h#}E68DT)8s~U$Yy5Qe{n&o^AJ+%{L;E@O zCO*7xb;vi&dbalL(|_}hRiy1YHS_5eU4?niG*v3EMlQc4U#-18>*XH(-?~4F>|5dl zKg4$~*}un^|NBFe+_e7;KHGmg?)dGHuP6Bm#y{>L~?d$G)xl~r|^5I!iJ{`Iq| zYLc9t_xic^nxJKoWxWS>toQpUeo&vYVwtW?mf5exv6*IXv$I#+em(u#_j}&SB}K9B zMrXCmuIg;EW>XNJ9BO+1=KBM6Dp%F^Uon}zQf1z&Tfed^zDA!|o4RBBr$002-BlGd z>3-^BK2^s_L#5G6%Hsaw>u2NtGfcd6FZ_PMw3WA4T|Vvl>+bz?w`HC;*4Qr1kl$*4 z(~tg7|srqcpChb$lWxq4nO-pH-BBj`v2=bCK&hB~Ip>;K5s=0ALK>HQ(@l3gmt z9;Icw?hM-87xS~RSx&w{c4N@xr<=;E*88pgxc$`YmAT6`%{Onqe`?WxhWOe33!m@j ztuv_jeYC2*b#+q7}C*Nl(r0|1xpgJLA1Pk{XAZ6?*G$ z8ht)CYyF|svCc>DDR277Kew=A+I!KK3wP8`pS-x|X3Ea#XXLe78H1A=?DDqWsNjE4 z-}Fx^Ytt(`*-NRLcV!o6O3wTD=&5e*_k4rsC7X)yA0Rm59x<}nIC@n zHD?x&__V@T#(U3P>t7jjK3k^et@4b7Np5|-=iVG~S;cAdyzA{l0yRx-?d3Xh5$_Cq)0$f4siW#$&#OO|o`mcKAO25B%+Gw(g!?tA14Y zMSD(f!Dk;aQMFGF(|$yxrgU@MRGkr?_|e;c+V@BPEz3WYcmDCI$zF9oGK<~MH?IE8 zXv@I!WG#bv)OKe`{uvU`8jy}PCCW9IgI zs#ZC>rf!__v(P(l^YL#hdS-u^q`mCX0iMYb|CG1BjuW<4n4=5*W8Y>muyYY(?s?i4)A zwd1v9>iHSkm;XqAJRSFu=RTj%v&(+Yk9@OZFRwcic!JA4CGUuv&CbK}o0mN*;)`Z1 zGQ1U=y`Qfp_M^J@<@h5~YKtHD^Ld({T{oR?Rol0V^VGWBvP)}^qJ4`bVobu`YIfNhSBr5q2$d(>Tnc~UeY*bj z?)}SM_#fq#-+upB_j2sHU@DxTCVJwB{)6|edzjb6WZm-J`e)XZ3$@eVMlD-kxnfU1 z-rc7zd8w06^_TIT__k7^@W8rjo$E4+j@|?wWCkwApHFE0@Roo%JeHf9|@xFw0Fm{>qaDYML64aZTX(GXG|E^tJqwc*%@g zhhORcZG6POVXJS{@3+?rT)u6)&UP<)(wVf3d5=8)96ICSIL$We9WBpG*z)GtrTeM3 z{%niaw$9(Zb?dH4`<3$3*U#O#?r-eXYqPU2-En`*S-CE|UU-l2gV$`=>o@D0+;tA# zvHiohO(nBucPh<0C2F4$?siis*4wbh{QT1uio4{^KDs{d+mroUyCVFN+w7J1>ks*7 zE|~e=wDqI!yXpB^8{Vn?x#`f$`&wX`TbBbf1H&uU>i5U$`D%=JeE2ri{D`mHX@|=> zsh@I6wbvH;Dn2fEYE#+inN(wy8fpIUz(Ka1^E>RAen{#qeE3)RsMWgZOH7YW5tGVq zzkO6V^V;=qCkjR4le}B{o?5e}6jT*oTBxlTy7&9<`}fnP^}Q{099L+{RNEV^9@Zt9d`8ZDjVDjxxwE#LyeBGt z?x`-0_djS9<1Djr*DkBJFz2ZXJx%>ucNeqa00*Xz}#-$9qc&(#_o4yn%G z9dVaS;5AFP(y3R-B4zRB~1lazsXj*A9~MGqk3^q?t0G$)qC~cA1Skbo4@9*>9W1Kx+#;- zR0Q2!TDm!GLZ0EVjctjOI7JpNxm~;8{pa2It-Ehs)o*|PxBkGpsJ}~VLRVF2KgjPc ztrdH_<*%Q1N|*FihOHYXg>;-T;yA^wV`MmWz10uL<*%jYciO4e*j!y-6TU1n>|0ds z*IoB3y1dW&om^HuWn$0|k58-9kQL1m3po38F;-?bqqP%fNU;gd$eDmv@Ynk`H`Pa*KmAp8wvT@JHb?blY z))-ye@?IcYJpAL!%cf@!XPJ9jdvBTX?%7UBAr2=?!#lg2)!tb5@O;}ZVyEK%_&>wV z$;ae$xAx|k`tv;w+0^lKbzy#S?$bL5pDo$wcH*5`s)N=^gNo)mgZY5SySm5gDMM+&!Hp5$=S ziPmh_wUf#}$6r5L`+Vi0XA#L|u}h~2?VA()y8q*UhQpU1-*rDEY`5BOe&&MLhOdQg z-)`c4>@LzR<8!d>3g4?I3GZ&OS)MHumA?D)pH%(9-+K;fN9Heoy#9~<8I46+I0P>(F<`_a`nzU6GPU+%4#kj&U4-g*uk*z!8WT|Cw_E( z_|Nc=Z}!9ce77rZM@yy7@m6h{z2dc;{_btNc6x3|*>2unv^hrkVX^S;Hm*GhT-~<0 z_y05GX4bsBynWKvndMRO+yC8}y?yHBc(W??!}mpQRy;lsC-YHkmbz5k+U3#rqq`T+ zY$eOqyg)01yRK-nC*>-DZyI-4ltmQ1p zH`^NvuE*!i;p8&(*DRZAasEpAeBM8%EBqgN&i8it&(IR(Z|&w?DJSukf6|45j)Rdk z+f%s?G|p+9z*`un{o(k*-{*UzQg(e?xWxJK?`GDc+a{asxZqj!VN$}>H7*PF)?|WS1oBzyx*^QXP1x=sMeX98IzToxmZJQLH`hC=wHmkX}HhFoC>Xm;cXHD!B zE^m$R*p~YA-|^{3HpO};9kcCM3FEa^;oUGHRw=m6(q_)PjQvc1_#eLfa&pUG;WwW$ z_ox2Wu3cC5>EBtAi;mytJkN?bV|lLp$&&`Rf?dl+Uw)5N|9?dj3*s;{m4=V@>2a9>C@7)O@!X4#QJI{ti!c> zk`bH3`jq{1-{$10^%-kJkC|?a>2LbS_apj%)zM>Xm)wp&xzd;YX5H0QS>I=MA7y)c zFU0Lxq|J)&32*wdCb7$Kz0H{QtX(H|?*8-t*d9mu?!Ee-Vf{-v9J%)X{g>4zb@k@Gdh&N!(q=d3WwAFa6j~p5eRBO+-&^DPA^Th2hi7w( zS7lwlr*paHuix{3LY!Ytcb``};=L``QOxr(&+%%TgcC=Zp7R{3ZkxWm#`^L<-mG7h zA61Xdf1shCuQWZNHY!#%=VRHw6vNnK7k0T!de`f^ebw2hzuw%}lP`ktRs@G>q&Dfr(I6*V$PQsi?nLD>G$}{_P-+$-J zdk5C7*%XRItg%3_g=5F=Xc)9)Dh22Lr=1ES=xNr2#L1tqb zpQr9_b^RZU*Z(o!(+W0e@4nJg)wMHz zuHE+A`(IbT&Mmupcd~TR?%e#{@t-dJ%HOofuH?t($L9rpWXFGu@6OHplUVZM^y0fe zjB44UmtWA!i9h+C3VY^JjmV;JzJG7&@83Ur-#5;zS?{Lb{};ckz1nA% zy_I@@vD#DZoqQ7MJM6ZecoEO9it2qDWHtV(N)gQfG=SN;M-5slbv_CsjZ~Ka~ zwKj#)(w1hNs%Z(kpA?A{n9DH6Eq%0>$zG(Q?z{K2Z~qzm`<^mwJvX!L+jb5{_9s(0 zQq(0*C_I?;d47`|_mAjBS@V>x+R6Or%B_0#=)tmeuOCa_%`+@)WjQrD=G1C4<r*o!(QX__XXcJhkJ1g@jC? z$x}bW*Qchh_SF9Kops5R>z~bE%+7xwE$8~-vdgu7D*sOWJx4%q+h9gHu2GXxy*UYAM88V|8ZXNI)BNYZMRlM#@-doRNb~z zDaKhjCnA6D1l{HDXU0tVoD?a$F|O0Rr9S;U;~&$@YtlcSv0`sgypq;->{ETm^=q$1 zFELHJeYoWM!s~h0c)4~kw5b-)dTV1)#~1&{;$n^T!?)>&%k6}(UW+JM9cv!pF1}`x z*u(`>Rqm^Ft3{mXiLpQ9_bzMpo;i>2n!e84UqAPHQq=d5_*p+IYwyo0w>(eU38@x5q)_2K10Uk^QFnM+t%9No1Ct)$!GfZXOd6S=0!4ZnZ$6}i+hL8gY9Qrt3stz zF1@>d>SEX*`FnideZOsgtpC~3RU`;Dt;ztKUhQ;+9J3A@up<@|7})D8kP>HMGMmR+ zv-v@7ywipH2lMAi?z~>({GWkii~NTk^_%N2M^5&d{8mXVW2$ zwUUqbOTV<|t5IL^qd$7)!^=4V#yy>3igH!15S(8+#LLy6RcZqtldaos>58b>wfCoexD(}FbZOVLpA(&ppFByJ99h4` z|H$^MHNFqJ*V-SQ<-7f`th@J~9loVCx%TaE^wv-Fn5O#g;?}O%qX8$vluoTHPp><> z^^5NNqw;K-`vjYhme13>u(n3u&Sq_1t(VBA$tsQ8if;3HC?tGamw$Z!mh!{v+dlk= zs?9z+%SG&-$_|e;diGn^PBZVFI{U5lJXbcxEzC+!Qm0?rw(9Rvw{Ls4ea^b}XV#mm zXEN7$mEAjCubNx$e(%})Ly}*s*SGEynH_gk=j-0YFE^xK2W?2p(7UtX%nC0>#eLt( z*2gt3Exj7=e?_;nnmhZo^o;zRb;*$nc5a{GDYN3R)~UmaQ<^xGn|LFZn^&4=vxF|1 z5a=q($|@4*Knj-Et-BC$x*f24&!u?>b=N$c7n$U&xMNr6$umMi%I**NmONU% z>&L;oIz``yy4r`MzIUW=n#8fy*Wa&;F@2fMDoL^QX(?&ek}+$%6+=^1c)Aq7)kgo` z_GQbO-cNcpW${%*!Ae$FDm{m|ET^z%$+~Xo!;?Bck!Jc`Z1F} zWypQIC+i)2^_HZv*$3wx9)T`vE}iV3+qB@EOwavxc~QOmKbaqPAKLt5)vMP>3@aud z`S3a_`QDz%O9KCHxxMt1@Pvgm%X;itL%j-i%3WVkliBS2@!RZ&bCVC<`q!VoDsHa1 z-}QCVuCB?~bIDieW%9eM7iO+FPc^u2PtVtH`}8lY@9o%jZOiNCdYubV9P1wEZ}Aj+ zZ>oIo=S?%a2S(w^=@KTB{@#k)UpxKo(k=I{eNEYC{^|GD-(UCbeg9%#+25&uQnuGw zUbC@R?+ibF{pIr6r`HR=tkc2 znXVU|T%76*MV?DdmixMXyZ`=bnN->AN$*&mX*b_vqANw<{*;A6c#o z{M>kRi%`yvX%8*#Y>d^YG;XljrqaME+yCbKV_y9O_j&)R|F|E0UH93Cq#sSsgS+Fm zZq>{#<+^oS_3RzP6-O8zr9Fh8B&7I$J+n?<{@KcIRc`YjHWH%eso$(bC~3^|elFd3iNN zPBJMAyb|JeGBUURpXbf}VP9{ZT>I`+%ys{@`(?$tzr0U>Qkj4BxAys_eOk@Os^V6Q zF6*uP&YhEbMTtRDp)c~5=92BHp38pgWK6V}<5BmpUFG_vFoMTDUK4NsXOyu|_2bd= zZMnMZr+FNCBXIA<=sm05Lerrh#O(|_G7vI1PD-8#F) zZ>7q$qbg}!+{X&Fjvb9L2-vteU!aiBaY31ygyS?g(12^V9eBr#NyRq?;bE5&0SuJ8#<+I&zcGI2d8J|UrB3;hze(%P3P}==so!Q4}(?5JaWVOB1 zbmgr3UYFk9d-m`m_qMCDqV76J9xzy{G{)76ztvpm`tU!4K-{Zc{2eB;@vb|zY@L62 z`nM0;7V1pTS)H+wV_C)hb7{AhpE;n|{Pxz3wso^!r+(jSSM_H5=TzJ2+5309fBIZ) zcjCkRwko^HAG^Off2cj*`t`J}*K@~9d2dRMm)vtNRw+2qx_nE9sC@N_l>#^9BpDRe zbkD2G7pQUC^<(;PA3%Xe(mRpL$exx79%eTQ-G**R&U!DS1l{JF^X*;+>PAlufellvxZ ze{^l-x9q&R`8&sQ==o;U4N_1D|Jy=%*l-tupLbn$x3m86Z6gZH%iY%xmz{ym6! zlj(t1xBZ_+Pwib-;qLy?Y~iB#tT6A&;zf67zqP%4Ti{$$#h$YZ9`>D-;XUxU%#DvX z=gRl}^@shh{by*Hv~|+0`+fVD?iX0pr2%WEF+Qr_mbdreeg1!E`;-1~uK!Uv*^fT=bT8UJ8an&i&hjp+@oycbL)ABk9~?}X`H>-_;M__9j1?%j|n?%gKG0m%TE(7#Ho^WOHIkz>6C;JxzSPmFGL{Gxl;H zwlms);n~H~)3bGrzMb~HYWh1P(M+oL?EUHADwf>UnA%-=;pyVut2IaOxLdrMJ^9Gz zli9WV-#&T!{pGsp+1@vvHhJ~y#7(>ZbpPGk@z!#QAN7yg34Jtw%lzS+to*S}73~q1 z_PAZV^F!R{w??+4i|}#PKSvXPJ87|5-dE`TWO&Z_xjnP#e1U_H(P{Hn z-#%~BDq`H~Qsj44VGbW7%Qb_zN87{ZpY~qww)Ru@(%$qnOY5$E`LX72@c!K3*TqNd zQ|l#l@AsAV^PhYa`tj?p%x9loANdvEt+zE=Q~LZZp=`_g^5royPn^ZVv$p##2)}Ip z(qx)BciurgDKo*1Z*DyJSK9vYe8)ebkA16Gyqv|}WfEH)bU8Zp>%7@3oC{4?r%Ox~ z>~x;seuiCXmy%PH=K+~XnZBBr18cYc{u4~=6y#Kyl;Ya@#Q?z z-HNhHD%-E-=If+Jybn8YSa(ZP=_G@P>L+^|{`4fTt>t~l-|=m`J?FEJ+xj2;Ua6Cx zr@m*)WfPOmb@`7fZ|I(rInuaPV#d2X=9G<=tGCrln{cmh`erX2r~NvY>$7inP}{YL zDR(zab?g)9I_k6L%nA`h))@?$=fb~Do;Lky{^{~x`*%MPj5?+rdwt#MU+*vPZ2fTl zv9jw2=RrtCvRVtwdnhtxW4zFes9^Fd+XZE#Go>- z!0WC*|NgqGdhKss&Ev=Sg|p+BejI*$H_cu^uk%s&#s%hWd*mzA^KEYZk&E$EU6C2M zwA8capYBbboU_VL1Y`3dXE}PEdj&tsQ5W}Fj@Q@xX#O_o!@I^u)@$v5Ye!u%S!)-q zVd^g!d6;v<<&<31H5(X%CT96$CisrY=vYTm@u%=%o_851Av z*zIv7eByEI+b30`V`s2VGxzaWaa@w2c~-Wk>+Q7v47%mlzTMxId+WNfXSLb-rHj`; z`JJC#d9%!lJ6so+^zn!!_2 z-Y(vG(Ras{psszZ?q6T|Y5$v@<>~8dqWc3rJX@Q(=7m`t*Y?Zr=KF3IyLiL%Tf?-d)L<6{k$*A#lP)RTl`47<6Fz+E$h|a3QcnOS@TlavFFLO z0t4QX(3+E)djA=$Pi~ukcFV8tb`-;-G{Z$=$IZO>d&S0`R~)?RRZ zu8n-{!(*3feAh(2xaIr%*8aJrx}T5c<}Bh|Dk-$5(A?sj*R=Q-4#ApC0}3eYEmV?T7A%bMF+NtXvoSUpzXsjL$BiTu*QrN>uU#vA+2%D9`HdTMf~ z42LJrawGk~S#MYP-z~d&a{t^#Yrm`fOSryg{mt6z)BlKm!hQp`#-axF-DrgPcsKOU zt*QT@w(=wY!@1XHFZ{{7HtODuSK9*a+MQTF>(V>zshzjkGwQX|3JQ1IuAcRA+wPCb z$Le@D&-VP_w*H~_i_!|#t9v%TG`E_6lgY4ht@&Bi6$MWjjJB1%w|34I-TB+2#`}t$ zV2#R^8sBBHFRf~?h))x{pL;zyQ)lhsd7mUyW`#y3sRpX1{bz_+{PfAA9rGFgU5MJ= zxlg*r^_tJLI*aDR;kwzmkL(#-xuUZS|IV^ZnRU;4^|Wq-=?RC=7x<*`%%8UCrCa`z z*!!!V?*Dgd?w9Rnr%j&RQ~mP${eRqH*PbgreBV;1cgaplcy&ej(eH99+deK^xb9`S zlO|1dZkB*Avt}7(-!uI1Z)5tZ zSG&@t>uwS9TQ%v}Bct~+atT7K1@{I_&0Q|B?qi)|Y-EkihhtlpSCk*IGoKx@W&6~` z%a1pAy=*=te|GxWSrWmv2WnCtIO4)|+L^&OSLNmD$!lWjeyHaERgd?ZX5{NzTyHe}uJ-nyyX%&( zeEHod?nZQsZ>DL>reo{AO=snqFv%#e%UCn`dr{XT8F=YC~UYP@ODt0N)nE0^rrU8h-ly=;H2 zx!mcvuKx_zPsjU~+5YSL`Jds~{w*Qj?W{g5Yk#=bIvcnB+zdU*5_-PnzSP$B9Z~Dszs9=9eJJZ)STXx;(2IHE)2m*Wi(M4w zv^tS~TT_vFOE&+Bwck(5toZ$UX6}CmM*E|-cKNyP`lUDjGem9uVeRkD-w!7(@ z?`KT1&}X*tkepqRsN5sNWZ>}i)v`5S<=FB(wnInx=hSD`w@sffuvL3TiGJ&(EIGqz z=Oj$}J62Yw=b2QBxuq68OWhT(Idgta;Evj({=qTZ`|Z>B*41~#XJw^Cr}Lf*EMBuHG+aSnZuW1Nnxd=w#Mezseb_Cn>m4@f z+tIJeHL{B~ZJ6~bZD{o&i$+e@pbYJc8(HS>G?Kfb+_Hg7!ikJ59XcGS0$ zyQ$@3*txl@a?|UNedoA8cgOqsPd9!p|0mks@pb;q*$>|}?UMN}^}X@^o5edW$@y=x zTP!=-bjv=MbpiZlx|iI3PxaLNtRHykkMf7Wcw5u8d-tLjeR5Mt*%p>|?}7O5vprYYe>{IAEB&qV!}Ol#(@w=!A2Z+f?C4gB z{TEGMzBBF zjEZ-=aI}#1NZ@X3g^D~SRc^!Ji95co4L6?Z_v>@j@9F;;Zr4l4-%|_on)~hL%lFYw z_dl%8D7Gj4$Vb-V2mT#$;y-FZ3mRJAH~NO;4aXm zshsrbd^s1}{Z{Et-UDki?QDK*kNi-7@IQmZOR-(L(>fom^*Zc(^~>uUYV)3HsuxY- zP>mD}Q4Bk5w5{Snh;v-1?3_ul>u$&Y3ckMf(!OcC%RRU6>fT=~^e1=wv|AJYl>X3M znf0rsPW{8WZqt8TYif4ij(Bu=YLgE8iN4esI>Cq3B2NEYtG$B#q*h^n@Wvml#((_V z53FChWqV+T=9>7duy=On^Uj)2m9h3X7qjHd@=MAw2|pgL&e$)pm7n)V+rgYKO_OI! zJh-eE_bOnX|8$kSrPE%Y`K+p{=zHO3yydgKOJo{qYcr2O^sUj4Ke)Owd-dzuUtQi| zyVlx^et)^>WqRp_u1`1OqWtrYtUA>TKGtks^bf`l+DG;Yf1KWUJM_bqT|9QY7vGt@ z-WwL{tCw``9go?y4X1O3PCb+O)17ceR>}GX@7mMyEA#S%F5QcKV$o-!cl-k5X*nhIgQK9uH?KmS+UG}t?V=lJ*Dlt$|p`sn{?}$rTL;`B2Byf z9GCLdKbOA_d2)9ieBwlpN-FAcsu7dJgn>Zarz^hf9rgn8plT+I@bet zZD^3rcCYO=d3RdI>x^-$-s?$LJlET!>#jCAms&ZvG)S>opIjMpvS`QcU+clU&}lnFD;XDZCD;9;NP zS9J3a=VP(IGylXtczu`aldk`faAwcVo(r~SADHgV6MQ#z>%1gu*>&<~)C+rsEARPb z9Q_?yu`QnCmHD-8zw|bhWMAug9eer9>t&bUKdSbwUTw7T=h8JtUKVdTYtk>{rWCE% zT6O$redp(w@$9DkG8I{mQo5Z|uA~d!+@$+-jo$h~smc}R+DG^3tUPF;*l{*{+iz2m zzb4mr?b`b5XWS9p$ESDg-CKJ3{q5!R^SY16*~c@)RT%s9`|P;-ZjbZoql=Hq@QEDS z7c%Ljb^0nLmoxQTZ(eRZq1qc;8OG4R)M|S5`}$9pf4x)9){9;H*M8Uif(3Xww^55ua}7wotMs(! z!hVTgPJ8x0-rl(<|3N?ZD?8N>_m)1)wfDTZ<>&hK!bilW3-3Lxa7W3lFXKpb!?8~) zmWiL_exAF1I<@}f`s?1=)zQIT<$D+J-u`U=x@&vCnA8~R_tsecuw{Mko`}8{M zl75MAk!+kqNw&6ky~>H{*L00$cZN=TIPr|I!F{>o9qZ-&f24=ZkNNOy>dM%Nlgnb` zqFf7gdyebpy=$ChX1B_d{bgMBqx~{JwjZ7^yNCDT*IY4wj*4D=6tl6A6NgF@niFEU*6RbGw*%PZCyWmPfYD1-_|_~%y@pbzYEQ8ozGyC{IIt8 zpm_N_*}luat$%aBouqqqr{=e6UO(ynT~Fq&FXs9<=lzKjJnl<=bU(V@Q=|Ry_2GR& z6LYq%RgT$JadheOcKrvFj@@xqnOFVFUpeZOW3OXtX#8`QlBYJzzoUOtAOELx@ym=F z-ABDDS<3|TYgamEZTru#uPO6^%JG7?zmII6FA>}BHQS8e)o?bi45 z*TtT%Ew}IX``15n`;YDGPTkcB`8#iq;$wccib=J)TWa$iepIF|o1s&3UpOXnyYcOq z%A1^-y0eY9?(n_&+gtqHnez4Jd$&fv*6vPI4a>M?W8I5un)h5@&fT;mx9_fNdAK=eX0FbsIqpK>?fr|^&b1D? zb?!ey?>h@Sd6F1c!FoO!PM=GC23>0GDMWz@Gq zl_!R2dCxrUb2Y*r`XBl2|6u&Et?%OD5C0jWKE%$y`L3j1_~nIHAF{+bb2hqLN!vOy ztrDA@_Wj%=i3M8A_S!#u-uWl}aePBnepk->Eie9fu8XeEzBxDO%E|0Zmh;b!a=Qz= zo!=xDdhUTY&*ZbInf9`#)%$NAKDf`QT3dR))Q`nJ+p=YiSWC7|`xQ}h_|vp>fko{T z|0TH;ZdZ9UH8(UoxK{u5`>6ZY->;N>?`pesd+V0_r;hAr`?6lXM)U7nJK2n?w#NKN z`XV=?GH>O6ZJl!QrtTbhl_JaZX`vGr^s8<&fA1yRSk7Q!e=GYTf7hSN$G&mz&L6JY z->H-L$He5Uu- zN28X6eRvghuQzkWo#Pp6HmsR+Xx%|(V}YH?4W1V7vd`L?)?|H@Z?Mz6nelS%i+h3} zrhiK@pDuN8^TSx@(3Ugb`K8^i3&c2!u4)anUiMfwI@B+8t$ydpfBye?cinxz$#~hN zr4?^ee=h&$`DOc=Cw0buSU;NgZT0+;Yj*mG+2SK7m)UokTFbf}l4XlbFl?XsFy~(7 zs)-CMIU?>0wp+06;_tlAQrc_(NOXEfjr@}O%(E-*n%@0w62{H2Gcw7*^orjKITh!^ z-&ewS@owhtxnBIZU$$cNVR?yva+zl57kNMU&meT?e6mS?u6F#IjW_SauzR{JGT6T} zaLchHPd-_!>1X*Jl{F<<1jn=2%mYTWg#aT7?E?VT5 zCLgI|_@n=kZ~8~cKf!A&f*v0{nfvhLl9RcmtK4g@Z{Kuo)1C8zo+&rkl6sUTwLic)wS)z3`4#Lh1#v)JgvQ_mCY zq#xKH&n-V5XR`Cd`Hpuc+`o@sa$7BOPp&B==#tA6*Wy%zY3}zvnoO+w>eY6q&h@zM z_2cp#*Xw!q@XYF)ExKmc)^Cf`mqym^FTeSC+M|*VPmLgnMQrI>94dN@k|*wMuGtuL z^TocRs5quBQ7ze|N%%+8(j{{OK2LWchp2t|foa zZ!^{QwGZ36DciR5>D|L7g~v5_>T7en6KSb!a$-AtG4nse=4-p{5A4;GK5KBhVzcw4 zU%ul0UEaavnJn9VKCX^!WnI_8&T}YGW|GXQ6qb$o?fWy=i~QJ;X(xSYeU08drF|b6 zFC~9;j<{ns*>rQ};=r~?*1=XM_j$|>a7lmIEPI4O%a{M#s*h}|m;Q(kpLN)~-F?>H7!cz3*m$+B)kIKJp$^Q}kkz3{ZqTlQFf7olYiUfMV&XtBOuVPar zaa|2)4G!Mxm)m|^{_yPmqvZ{HCfvMHvV1@8-HdmvEl$gORBi}>s#t33ZPPIm{QP#_^mUP$d68#(*cUETB)GbA>BCl24%u3RHesxw_#mZ zu}Gh>Cil|g>bq*SLh*LT5APRPIyrXN=FQovWu8yHH>G;cOxJKdGn>)UMnvM|@tarn zo%;UylKCI0+l%ULGe1{*XuPlgy#33Uoj0=XZ|>XrD4ywuy5sdft`F~){?Y%vbo>4f zXC`NcU+7@be16mP;eD-yP5WnV5wlkfnEowjXZM3;tsfro#vl5AtF$?8X3F(FO4}zs ztDNUjoiXj1VXjJCGHXIaiM1QU+TQ#}xzF4F3H-6zamiFx{&3Z_jmcSwo9*QGUE8!j zRmi05n%N|sGva;a0$rBpMNj+xcB{A?{v-TBy;$b2)P23FEB@{+y_@*QdPUZUm+x~G zJJL>xcRdse^OUas&#;QSon=nntk?HfEdO)s*Y|(BzLyy+`g_Bo?DFMb|0b?}6o2Ek zcI7|1B$d zt{=?@gm2V1ZMg90;+GYFGD~^)t=P8l^@rIV%Fz)Q=SW`i@O|C!FE7Zr$zkpL-v11O zHo=ci>(-iVyQuWRulcb&UuMv86QjLfZ|}V~zbL&gQK45rx?_rBZ#To)y%i#E?-pg3 z>z~SuyZ^YlH0!;$$=pfaoBll7694U0)Sqd)fAP%Uyl$m@m&wUon|Rj!tvWW5`rVT~ za#i-8j`318I+R!Q!Dh3{CyT$=9(b@!(A+2TcaAAPcg6HWi8ax@$D97BuekdvO0TD6 z?)C{=KKhGK`d(KU-pYDrc8Js3Lrrh{D(uB8-XGmBV3YZv*DY{ybcNmJN7*m7ebZ)H z9KfG7Jz(a&WJNc=k0(2hc!he+eS2d4>Q$Nl88|(2>;G=!wcokkyF6#ttSg1JVf%lG zT#m0d{eAzWT=udW>0IlF`ki}(X8k?Gzt(^C8PjNsKQq_*em(Q~Pjlfjdwa@m~{>dF{gT);~tGQul5>JN0s`bIHl)C(?=)eQ(cJQ8VZH z_+_oAX3@UX{a^nj{C%2!?>~dt3i2TiM_aEu^eW?u(*m0q) zfBDwcHMf|~u05qtIx*2|$z$cx_k(|5K#-2cq9^LO-%c`BFwn5*~vFdTmtsj=^^!v%r<$A5)T-ZT~a%KZEGL**{yp z<$ewO&(QdzZT`!qkc_kGEAKQhm7!`=_uI#>T^$a#6owy!OBUCq<*h_LF# z1&>@D6)ij#>o}Y|=6LeM_BWqb{m4HSC%NBFuj%pOdoou|bNvo`#i}fMa-XwXWxHG@ z@6#=Bx2(xN&cpe$edZ7EN9rA_KlWBXS}%0-b+d}6cfjV~G8-!L9sZr@dNR|c?a|wspEW;!+*0{zAlYX5`1;aL z_0Go=ofoeR3%<~NCA|9&_m8cQ;-!8x2VAmASMBS5#PPQG{vW0P3^&ZDnVV&(-tgEp zE4=HM;Z(K=(QuU%p2oP7_13p;-0_u33x{q;W18x=*z41ep!%}yU#~q__sTlI{@Cev zmyY*G)v#WXe{j5gkEq^e-nOL=qkrU;@Wx5>9y+pZvv68k$D8}VCu}^?*j98qB1LfH z!}Be>zPIjSe{|cLU&1~A@qD4!^DFOE%rseQaJXDaCFA2Qn;x4TkNZqRG9@PL*{E{8 zZ{5z-5p}2Hxt?6s*H^l-Cx2z;rmX9;O3&Y1w`|hSThE>*&sI3{$*vF->LMU(K*s0nS5=l z=gyG+(CVN2i~cjrDSNwi%dW|rnqPd4oA-CWX2$QAwYe8++}+B^R-n?QJ$LSyr1@rH@VC~I+=;4JHcK3?`4`nBz3|b3k8*n-?Qa1c<tvlyc^-E#a)rGQ{lDq ze(kE7jO!tZemC_I94{N z;(phj!bkBf_a(mih6m339du#cnUAi|@5FT*@DzGORZ({H?R* z`sXJhT^3iTCNnyonr68^aqrxd)5S zdTUu@e|`EN{^0w2+&`@Q&v3k+zb5&izi7oQ-yqq%XAj@Jdz-U(+hpIy8K!%@7?s>w zrwF_~ojft7_xzjtD_?(;Q{MlPzej2B$9*eHm&M+GT_)v!bM2ei{*%vc;m*125m|Fs z*@I`k{j38y632aid;M4)@*(`deX&=+bZ=+=niFwzt?nH2C0RK2PnkZu(UlQW51{?~fJ6S;{T{$Q93DvEFCPl^VyCc@o#8By!%H zuI}#M&1^R_vyA2Yi`YEYC6oL=-8r@4gsk()PwQ&!AGIIp_*4JesN(R^Z5!9Nt$cVV zYg(YZX`b%8+&#Nz*j$;WaZ~o#nH8*&D$=o68?AIrqod6KGw}6R<}RE4w(j!k%+ft; zU$6gTKRxf?#>W>Q{b$(z!|MJa?|k0H*@AQPo)z>cU7n;Vc{OB$_wJ4fq9>(0ZJ1-H z8i^YnmfrMMXTj7jkLG)4uebm7pF#i3lq-|6v+LvMKM@IZRch+eSOA`I+cW#I`k{L9 z8rP5NZR@|pi)Y;Oo4VL?SFU*Uhd;Zf?mc@$;i&b`@@Y?IZ17s07jV4JTx)9gw@n*% z%>P#N@!Z1nHLs;)zfD{c`zmaEl-gp`ojc9BVYrmqRVzo=gQm*L4z z>qGSqU5~f^m%j4;i+gIjcYfG%v0~c2lR2qI*#?fwnB`V26N~NK=ayZdqOG7f=V95T zBHzOKf_7pR=a23ewUK|cSNZ5Kxrxgwa+g(XxqYASY`M;{j5EP`bMup!HR@vAKh5*| zc(m$h1^>~%LQm_oex%l@Kgt*S&v2-;eq~gg=d$B#d`fovs;Rr~u6*;oQpw!4BrMg) zWE1b&Pjk0yofBUEH}huRFO}J17xOp$+n1mB-lb&M`$PU>KMo(;W&3Xb<8JSM?u=W< zznz^nIdPG6_GYDaL3h)OJ4!1qS3XQ~x*?gj^Z6gq-@5-94$0R)oIUYT{nokSzxAK~ zJAdTnw5#(wb(6%6l$UukkFK5F{d3E#pRe{mTyxV_-Q<_r{H0Ik zKl%6XyZ7aN6rBjWUi^pn$Lu5fOh2+MTvBn)TUPvt;BD#sKV1JaY{=Ki%emmaVPdJt zdeJY*T09%H)_I(eWZFCFU-{OJJ7!s&Sud0Gs(H155vx|O+tjBYmi&90|9b7Qb?{1l z?ho~c?>qO1en`51uqtok(pyTgf!y1S^)Xj6CfGoy{44ytSjKEPqA z_WWQy-|Cf#gBchJh_iz%R?hlx=gl(@7(eES>moD$v{ipXI^WYL|2A= z4-K`A>-@RDdcE(wedV(Dl}C@~MtzE^n`>`;vxfUacF2#^57`I*v3@w;{Z8kdiDIhE zudQ!*Ypxd@IhEd_@@Z#hYQU+55gpCE&0MVZkL3^Ri~R|_{8gX-X2rR~jvv}KCfur9 zo_0%e=iZ9@MW+&Wm~%}ObG!Zdu$on0mi*1&qaS~~{#N*-a1C!^jcc)4Yuff%?YI72 zRhh0WwC0efk>>=9gxh@&4m_Xr@%-)NZ;cy2tnz-yZ7+ZGhjQrUMVqhX-FWr!#=X0% z?<%?Ga_Fr)<#bkMsg+LF@=o6T$f(sVcW>8!_|KrNeLL^gW-I?Y!F|)>*FP`Met+Mk z>iQA?7CF6->OD1_t19x3?viU>`oqiFc5B<>hcDMZo#nQD+THIy+c)&?3t7&f=FBrK z@zdphYIWzg#aUf9nd|PeYvYIC(Y`9*txb+jeKPe)=h-uQ>*j@C_*B+;qDahL+An+h zkHZrGzgT1QXaGULmMCp~v#ymQt*vEk)R+XxlHF<%4oLw#&59E&X zWC!ik@7|}n??d{be#zIrlG?A7tQGt8>tg{4(D{~>1vM8PCoN_yJ2^4SDD_G zxZ><1@|;`OOZ?GqK78wY_u2mpow=qQU%mYWk1spU(e+4r{g!CqytP}WGA+q5=Y1tT zRd?bu`OVu_KGGF`wCYFEzwdE-FPG|bE!%K*eKvbg=5p`Wo;a7Iw@>svOjSS08d|eX zu=4!cKczo5CVt3%`1IQD>e}TmKc6*;s_nV8_R#HHx>wF_GMnY5ppxZTrqVxETINBx zYxI&iTd(_vs=r)sl)rf2w=|Kp+AH2Jzk6ywUwHauU%fTwoSuIxzmi`g{>WO^IpV|h zjvDtzzKNx^QJ;_4iHhG!S+ik@rc9Y`5to*`h@-d96H%4PyG83xf6KmTa=%6U!R^f4 zb4xb*|7Q@sx#h>{ndx)R7yjOQyCbE@*{Ns2!kZHhuAg5g^W)a{WAQv&b|ihg-)X1t zBiZ-kxsBP6e4`IpO+ET-)wjfTbw>`aiMXS~^Us@+&CY4Y-+Ni-+iL0`@BAbDuwOjp z{*SiD{+CRpZ>#q`s_u9vcI$HJG~Mf5tDnUBnsCeR^qk`%A>NrXVPbIVlDM<(Pj-gi zO}VuF{6gQq>(2iDbNC$CXllCo_(t}@I+cpkzpD@0^ZqGJQEU&}_AzgM*2HL=wOe9hYujG2 zeNOIjaPn4rQgLPV@e`-6?+w1XvcB{0x4!+Q`yOVNzWjCi>;8Kpfv%p8uBh8AsXqod z<;T*8`P@H_AGp;w_p!OakHVVWyJwzTsM|gvablWh-9_oeM>jqWyCadDAN8b-wBSKqGi51V`>N^D~R-_q5Ya{7CB@7Oyr`R1nVFFP}rFmlL6dxWzu&8xd8rPjE< zM*XVpAMZ!3AGS>u`8M4>Vyft)ZPT=8JZ)VX&MIYZC}5DI8C2vR_@Mm0WU0Pzg;8o_khrpQ>TWFkyH0u|MHC`vvS%`K3Ke*Jf88Z~yY2L2yr<%+;xy+GF-(k+UNp_;kx!Y6BE*$jJdRq4Q?6*1P|GsYj9KLf`%e$uCzq{|A z-+k+OXP5s`|1fRJuajFZDzui?+VN_AGB}q!>5{}fiv=8hS6m}y5*dCUnKLhY_Kv?Z z_sQJZHh;g%PQe?0TNgE&R$pzscRu}Q$}~fR$R$0l5<(u59S)odNqg8IZ9c!ICamka z`^W8_k1p_3`L11f$;Q88!|7bT*{jaPSo!`wR<><>zQv^H2|ssQKK**XPio1_hqiIo z!**@@=)3zzlJ>;f$J#4Y_K0hzUEU+5<#r*);kET@U&SB^kC3LOIr>(8w?Dg9zOJ^P z{;)jXa_&OW+f(;zXUzI}`$N3`+I4kI6>^`Ce*333*>nEyu=|1kZgi+EE!lRmdc(0i z)(umfpNXo7-JEvvTygZrKkgr<+%u@>bbr(<`X{9G*|xS_Eowi5CGRBM-NREnr)2&7 z1s~@vxxb}jlb!Ol`iG(a8FFk4<7VDIl`rr!!m51!vplngJSCf)-wuZ94O7?t)~i^5 zXqH|tf8*ELf30pmnqqZsb>WJa=GNhX)5>uw_gnuN>R;EppS_y>C0F#*_0})%h3XGY{xR|KUj3MldfOk*&fYKL zy2R0JZL#T|xX`C}%08;j`_G_yDkpaKgbfw}Wetmk56fvZ2VeT99QlL)(b6yfgyT7G zomcNR?TyRz(p3*DpPBNX!DHghMfp0)od5NLwWbL^^Z2UK2ey^_#~CJBh2Dl z0sqpPvkQLQ(iix5zRuX!{OGlRqV2xda@XEIYA$E7_v)6_N6zj(F(K>Iql(27y`9eO zIrQeXr_#=Tu|Iy>CqLYO@V-FQ>s{f^YuDIRmS)ZR&(K}E^XprU){Xo9E%noiF^x}#vU)uR}9lSn9?)VLZP|&AWP`!vvJ4$cWv6bYwz}LJNHdEoXvA-mF?k{ zbZrTB9Vcx=W_2!qhwSItdsF}ER_uT1JL5+u-~Pp~r1JKvzFYX=bI5$Z4f`gEZHxGD zA?Cz`<9q%yoDM2yd%+p?@N4tZisc97H;4EAQ>d66dU@gG6svXHYCJ2K-d>n%^g3md zO|aX<6Q4wb^-rDMotVUTEB|YJclPJ_j>)^~`afLx?bP)lU+{(ehq?bQT(gpyr}~jc zPNMzb>Z)l~X1hz%CZEasrgBzCNP8pa#JTs)>T++0K9_3mE#nZOx6Mr!3{*XP=d?)X&Zc#d9n2~Xww_G; zY<>hjKD?^pKZE0@%X@OyzOFOe`SEy%PU%^`=Mg*R>Tbz+vXA3GLt45fPsYVt9nV79 z3vWpDtXRA%regIY{}wxi3cmjgfmij^eq=se8+7bn=JE-1!+xGNo2|2C{(Xb3jjAfo zpYLE_u_7pD&-tzYJY)C!2JH`fdtqPr`IFD{=iZ8+T<3Lu<+t8r8xzx4+80nnmp|mhdOgl&(~Gc?6fbf|04Ny>8{^))AQ0lugg!)PVI1dGih_> zLa!AX_YEI3?hxWI{W$-?eBrns%OADgTl@HL**9M9e}dWX#T-{;&Fa3@Yjr$g()ClF z>i3p03f=N!42zw;L*aVrgW7DDALoC||G4dP`A=-|qrYy4WD5;MCF^ZcCn?2#9E$!{G1r(<|5hP@K+>_zzac5AD<1ceUi?pYU|qyHDqL>gDGt zO#f~43Z+Dsk%el7IR0zx3^$ zUpGJfV!T#O){m7B`vrb1_S(Pe$7HjKAD%rwY`(q9Z2QBtm2cOs|C%y0_;hr4Ia8&e z`^h|ScGXXP;ml|DsnvLVUnn(A{ zLg((etmU~g`eZ0e&hO(EdiDG6_e{V4?%v&5wK>`PJ0 zy%`F;!~=>JHf=M$&MvEcu*UnN`5`-rzw7o`F7y1zbnDc!-%eL_zw!#bKC<=+_eQgM zy5ev0&8N$p>DRlvV$H)Df3A8TjFbN`zqLMlz0C3Y^;u?C-bP&+l3M;C-;Z12OZaU!c__|ZtRRvZ*!3WxhwHt@!p!WF>(B4V z?VclD!}lS$;zh-dmqPWq-0Qa|zr8M>@;iL`)4$8#%uAp6{LRmr`}%eEU5hT)u~bMq zeoWoprCYLd;Z>b?dGB-Ye$`&Kee1L*20JSjWcmdDoPA?M2*c#v>rG#n^4>iXHf^%M zb#9YN`hks};Te$Z1KCX{`b02Z_^eX zJf#V@Jik?hS57P!m@Gcgb$98#?5>aO9VYg@_FQ!a6|WD@`riJoR5$Ii=l02mf2m#( z|8(QUx9ib8(=Qx&GBc&id(y>ku7aOJSFO7fzWnZc)9;z}d+#nSvo5v|{d}=b>p#P! zO*_A=Z>mxKaQYGZq1)?uGo;S8_u5EJzqDztd5GtwQ}On1xkZAbH)QnqoeJiWW$=+> zT>IsR_Jb!s3RnIR{;lQy;oP##x?hCi?$~^H3-Vd7dT_#-cSk0r=tcaxelBdk$X1RT z>veZB=UM${XqkWD7JIjiRq(NENw;ok=BCWj-*!-ETc-@ah;l|67egf1zB1w9?0NzI zZX4FewNdh&W>IeK{6{<$n%;+fR6RB^y*6W8q5Q_ADwt{>U>sw!>f(%7gQ8PlTFwy(^* zc+@i0yI^AEnb~taq+^-_on=H6P(hqj;?*GYh^pW(8r|R$49+_2oDSGmM1}2@wf0xHS zTjf2i>~q}qz5D*%OL6@@Jyz+jZuZ~1e|qgZW|be#t$XkAA-}tHw#L@TMys^BcDugK zYP~eYaEH^THJ7zDQwyi@pL>uU#^3gm|7gm`>EC)5N!Rk0c-tzeYVs(YeRfjwaAJ3E zr^79?mOQ_AcD{cXJlfg+NdKszc*N zZ^Ld$OuNPBf0O@luaLd;;fHMdn2!{_e7kCC|FoZGrxkja-0j_Ct#PdQyiZW?t>9Ph z|1&Ik{n>N(H|cE~UrhRQb)WC8yGJ&C{N842FLLik+T|lvzSlc-77N|Y_z>^%eS+dK z$%njq-IXV?2gcQZ_3aCK>BnY;_mrM~>+j~RFnv~upQ*jsyGOA{XGu5O zHuM*@S_^09KRolp^r8O`|93Ww53_5X?uT62GM_U`%SGH;IowDn}i;Y(HSuc zQw_JbG4>V5wrzdhH4ci~qao zovulKm(<;J`&UI}T7@ltkZJOpb^j)D=HNn;XRrOYpXQ12-gjn2?3tQzrWp_mvwpjw7=qqzJA(s|7`xQ{(HO1ySBt8hg~Vz&6{-Scktz|wQr|x zz2s@SGI6TV0+nU%z6a7K9X|3aZTb4FS-N|#{SEesh=J*p@-6=${z!D)R@v_p6Ia>3cFfXO`n+xLk#KQ=s6Eq;m>xZHDK9!r$T+87 zFQegh+o-Lu&C>D?B-NOBIkEZF}t!JMeZ+&8%87^-AUFft(n^?o1zH?D&hP##*et+}#=Jk8(s~`LB`a8cPKcy*p<(Bs%S<@!ONQCG(c~`6p6AxfBo1#@z#OGHl z*R}ff*(+g}?wb9Sb58y6`6283qo+4#7bg}!?zV1Tdndy%`_hi>I}|kc=9>6ERcWvI z`?13;LiVQRianeU=Qmy7r|heLD5|f0W%fSV-v-&d*0JW{V)1F73);{0_?+1-_AK@1 zan+wEnPrx3{V0B*%Kz=ghj&$e%s#xW>didK{hM}d{9VIYbiL?q#1?~Hd3RDvfWUnOy$+{^OnRWQ>w!i3JcRT>0*s=x!U4Kxyru#E-l@@z0Y-$_T|iHI^Gk^4I=}ZT0R|~bG`LP zc+7|IU3&x{>vP=ukhv_ZihyKa7{gdTn z^mv>9HG21d2351xPmilRQ&Jol4ry{(DlqxhY+Y63czsXxf^C!YAHQ`!GEcny=%Ukp zAD6Wj{++G1lKq?Q)gu!tuhm6)Da{PY?_kYcAUIWRlS#dB*6I(N|Ec|F5IQ$AlW*mc z`+hZ{k6!Op$+FM5@L*na;L634xsCIjjaJp{3~F?m!|b;FX3_ogv#x!8{hxvD(N!<) z)z7k@-1`0Ry>-N|XZbC5LOVXVH9lm&H2a_6UDh{CF0Z-vD)p1p#i?um75u)P>H2WS zyW?l)%$okFc7JQBKKHuGj~P@inR=hjR^84PaQWP2%~tVKKR@pcnNz?t=|+HW*bmJ| zCLhZW)pKVH-h3rhQ(k6xV%x8^m+!`CyfF9q&tS6im`m?zCeGI}*N>T2>{F|e*uCRF z!vvo=#hraYb+7JtE{|2+Y5zg#l>M#JciEn?{!_nSKYLgGb9LDkL>$%{#dTgzGb%Xv8;h%EZ_Z@PwQ(o1-bQQXDkhyrj*>3kUi&G|?@wa1* z>j(MfI@=oY{^*b9?IpIcrr9&!h5h@-U3u;9m)9p2mESqoWYW9Q^TEag&ofQ85vjYns@ESyOh9y4JzVGFt_Eb!@_KkPP+yMQLDMxKnIo5sq za#ef1_v;^9wuah@%>EbskKg~Wzg~a(yIJCgFE9Dy^U=R2?P~g->5M6}-7+byP@I1zVveD-pHEOS%f#&yEY|n^6TS1J z-{<4~){^yX{9He7<+sQ2UEQ+ip4fi9{HJGkZ0q{^#N+YSS)NLFpG+^}n0lJAYp(pR zt5@frJGuSGl0EOAoVs4R_xh=C+aLa{T>Mg>`A7Cc_k%0<_}o%kyeu>L<%H!f?pT*_F&ug!)?0Wew@W{DK zo7eQ5i7@s!W2wyEXBe;iNB%?J&PV+6e}X@Jz236N@!@|4!M)QO*Z8h}^?g@~U#Z3O z9kbu^T>ErSFY>gJ`gA4M?&KV4<5%&U-FwqE?zz54zP@g|Kt+*cz)vIhZC6|iRr2zZ zIG;J{EcIEnR(tiaa;;-_Q5W}KnYX`n()Ew;b9H{bl+~LyJ#Kq-UHz<=>sQ{dk^Hds zeS6*)*KJpFOn&e6s*THkb2jPIskB`)?|H_bYfG8HmMJrZM|@7QglO>5sPx0}?e#nf zAD(xt+mg@uLqjX#u36KR*}pkcs%>pP>z&^>R^_zbvCd0yIPj#0>+}1DI+d+Il7HL&QM>#n z-e#BSx8dC7s=NEyyzFSd-HH4C!f){O`HETHUkFY1TEbtCnJSuKoPaFn8_UTek$} zK0JHv`p@`Jdw*+xwQHUKNWL|9PdtCC+DEf~@vgM~bKNf0KU!ZeGv1k?yQhb#h|Oa4 zF7>XGcwyU58Nc?nt@>s>JA3QKou0jW7OyGFRnO*oRgnGoeYcqTuJohP=h-r%wzqzb zO^^ThtoKp=X4NbG>u;>d&3@LlE#icLj)8UU!X*KMJ=L5E*-N(gdOy%V`d8Wgcivs= z2(z+j*EB1IW46wUwF!uhp1w^!GvePxl}!smraIu z{JrPy)cL30)@hwT_3g9WoQpMq%PQL5hi%=kOZd)9^Z2B_9gCHn&5lga^`9K|_CKosU3d*T(8LhV z{*n2iNxYE$!dU+!yPim~DgKUc*NwWFvwqJ`o1>p27iPA3Ft!veNLV1?Rut$`-^u#? zmq9(t{fYNrCg7q&W8J+sT~mnAfAe5(V`^V&-!s*TE*%dq9#H5#b!rWNfP?ZxR}sc6 z=n5#7RGuX=4|)m;BkhDf%$ximp7-T1w$`*=brTZ>+V0(qoSoEHnWj2plBdp*NuDZ` zG8Q$MzJNxJ*4k}TQ&TG|%k$IoS+73UzH?s0YrSPC*U}q28@U)HUoeOSI*63`b7acN zT={y|-EUKL_1nCc`KFxTjZKfN7IIlKO`Sm{W1$XlXO zw2e#$>(g1^aO)K7lpE@Fskrm?F|y=^g(g%rG0JplEZ`0abY%dYmM`ibc}cg%aY=k; z`Nb{U!Z+A%`M9Q6GUKk-2A(BGPlQh>^d0NcG?}$oDo^i)Nv_=K!?#@frY^hfp1M5Z zz_e@EF7l`ZDz%0(UtnN=7_PBs<#zQlFHdcSMO~UaphUSkd1+*3Z_%}~f+*7~#~u~4 fc(rAEFr4@yZ!EAtsPaKo=ql*aGN?!R|K9`v+D=2R diff --git a/tensorflow/core/profiler/internal/tfprof_op.cc b/tensorflow/core/profiler/internal/tfprof_op.cc index c04b0ea0c6..5a8429d489 100644 --- a/tensorflow/core/profiler/internal/tfprof_op.cc +++ b/tensorflow/core/profiler/internal/tfprof_op.cc @@ -109,7 +109,6 @@ const ShowMultiNode* TFOp::ShowInternal(const Options& opts, fprintf(stderr, "Only 'code' view supports pprof output now.\n"); return root_.get(); } - if (opts.output_type == kOutput[1] || opts.output_type == kOutput[2]) { root_->formatted_str = FormatNode(root_.get(), root_.get(), opts); } @@ -130,7 +129,6 @@ const ShowMultiNode* TFOp::ShowInternal(const Options& opts, nodes.push_back(n.second.get()); } nodes = SortNodes(nodes, opts); - // pre keeps track of previous visited node. OpNode* pre = nullptr; std::vector account_nodes; @@ -166,10 +164,6 @@ const ShowMultiNode* TFOp::ShowInternal(const Options& opts, (*it)->AddSelfToTotalStats(); if (pre) (*it)->AggregateTotalStats(pre); } - if (pre) { - (*it)->mutable_proto()->add_children()->MergeFrom(pre->proto()); - pre->mutable_proto()->clear_children(); - } pre = *it; } if (opts.account_displayed_op_only) { @@ -178,11 +172,6 @@ const ShowMultiNode* TFOp::ShowInternal(const Options& opts, root_->AggregateTotalStats(pre); } } - if (pre) { - root_->mutable_proto()->add_children()->MergeFrom(pre->proto()); - pre->mutable_proto()->clear_children(); - } - if (opts.output_type == kOutput[1] || opts.output_type == kOutput[2]) { string display_str = FormatLegend(opts); for (OpNode* node : show_nodes) { @@ -192,6 +181,13 @@ const ShowMultiNode* TFOp::ShowInternal(const Options& opts, // TODO(xpan): Is it the right choice? root_->formatted_str = display_str; } + // Populate the chidren field. + auto* pre_pb = root_->mutable_proto(); + for (auto& show_node : show_nodes) { + pre_pb->clear_children(); + pre_pb->add_children()->Swap(show_node->mutable_proto()); + pre_pb = pre_pb->mutable_children(0); + } return root_.get(); } diff --git a/tensorflow/core/profiler/profiler.cc b/tensorflow/core/profiler/profiler.cc index a5e513aa21..b280242df1 100644 --- a/tensorflow/core/profiler/profiler.cc +++ b/tensorflow/core/profiler/profiler.cc @@ -266,7 +266,18 @@ int Run(int argc, char** argv) { linenoiseSetCompletionCallback(completion); linenoiseHistoryLoad(".tfprof_history.txt"); - for (char* line = nullptr; (line = linenoise("tfprof> ")) != nullptr;) { + bool looped = false; + while (true) { + char* line = linenoise("tfprof> "); + if (line == nullptr) { + if (!looped) { + fprintf(stderr, + "Cannot start interative shell, " + "use 'bazel-bin' instead of 'bazel run'.\n"); + } + break; + } + looped = true; string line_s = line; free(line); diff --git a/tensorflow/python/profiler/model_analyzer.py b/tensorflow/python/profiler/model_analyzer.py index 040a489163..46a921c0a1 100644 --- a/tensorflow/python/profiler/model_analyzer.py +++ b/tensorflow/python/profiler/model_analyzer.py @@ -20,6 +20,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import sys + import six from google.protobuf import message @@ -206,8 +208,8 @@ class Profiler(object): try: tfprof_node.ParseFromString( print_mdl.Profile('code'.encode('utf-8'), opts.SerializeToString())) - except message.DecodeError as _: - pass + except message.DecodeError as e: + sys.stderr.write('Cannot parse returned proto: %s.\n' % e) return tfprof_node def profile_operations(self, options): @@ -223,8 +225,8 @@ class Profiler(object): try: tfprof_node.ParseFromString( print_mdl.Profile('op'.encode('utf-8'), opts.SerializeToString())) - except message.DecodeError as _: - pass + except message.DecodeError as e: + sys.stderr.write('Cannot parse returned proto: %s.\n' % e) return tfprof_node def profile_name_scope(self, options): @@ -240,8 +242,8 @@ class Profiler(object): try: tfprof_node.ParseFromString( print_mdl.Profile('scope'.encode('utf-8'), opts.SerializeToString())) - except message.DecodeError as _: - pass + except message.DecodeError as e: + sys.stderr.write('Cannot parse returned proto: %s.\n' % e) return tfprof_node def profile_graph(self, options): @@ -257,8 +259,8 @@ class Profiler(object): try: tfprof_node.ParseFromString( print_mdl.Profile('graph'.encode('utf-8'), opts.SerializeToString())) - except message.DecodeError as _: - pass + except message.DecodeError as e: + sys.stderr.write('Cannot parse returned proto: %s.\n' % e) return tfprof_node def advise(self, options): @@ -331,9 +333,8 @@ def profile(graph, opts.SerializeToString()) try: tfprof_node.ParseFromString(ret) - except message.DecodeError as _: - pass - # sys.stderr.write('Cannot parse returned proto: %s.\n' % e) + except message.DecodeError as e: + sys.stderr.write('Cannot parse returned proto: %s.\n' % e) elif cmd == 'graph' or cmd == 'scope': tfprof_node = tfprof_output_pb2.GraphNodeProto() @@ -345,9 +346,8 @@ def profile(graph, opts.SerializeToString()) try: tfprof_node.ParseFromString(ret) - except message.DecodeError as _: - pass - # sys.stderr.write('Cannot parse returned proto: %s.\n' % e) + except message.DecodeError as e: + sys.stderr.write('Cannot parse returned proto: %s.\n' % e) else: raise errors.InvalidArgumentError( None, None, 'unknown cmd: %s\n' % cmd) -- GitLab From 3a63bce95f67854b6745cb43e0e9feb1e93587f1 Mon Sep 17 00:00:00 2001 From: Russell Power Date: Tue, 14 Nov 2017 15:57:27 -0800 Subject: [PATCH 0437/1801] Make the definition of summary operations a warning instead of raising an exception. PiperOrigin-RevId: 175748972 --- tensorflow/contrib/tpu/python/tpu/tpu.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py index 9aa5a9c78d..f3ddc09754 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu.py @@ -29,6 +29,7 @@ from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import variable_scope +from tensorflow.python.platform import tf_logging as logging _SUMMARY_OPS = ("ScalarSummary",) @@ -111,7 +112,8 @@ class TPUReplicateContext(control_flow_ops.ControlFlowContext): raise ValueError("Placeholder %s is not supported." % op.name) if op.type in _SUMMARY_OPS: - raise ValueError("Summary operations are not currently supported.") + logging.warning( + "Summary operations are not currently supported (%s)" % op.name) if any(x.dtype._is_ref_dtype for x in op.inputs): raise NotImplementedError( -- GitLab From 205ff0f7592c60ab09fc705f2c5501d8547e83be Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Nov 2017 16:37:32 -0800 Subject: [PATCH 0438/1801] [TF:XLA] Added tf_xla_cpu_global_jit flag to TF_XLA_FLAGS environment variable to enable global JIT compilation for CPU via SessionOptions. By default, global JIT compilation for CPU via SessionOptions is disabled. When TF_XLA_FLAGS=--tf_xla_cpu_global_jit is set, the value of enable_jit_by_default variable in mark_for_compilation_pass.cc is ignored allowing XLA to use JIT compilation for the whole graph according to SessionOptions setting . Unless tf_xla_cpu_dev_mode is explicitly set via TF_XLA_FLAGS, this code change should have no effect on Tensorflow or XLA execution. RELNOTES: n/a PiperOrigin-RevId: 175754729 --- .../mark_for_compilation_pass_flags.cc | 32 ++++++++++--------- .../mark_for_compilation_pass_flags.h | 2 ++ .../compiler/jit/mark_for_compilation_pass.cc | 13 ++++++-- 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/tensorflow/compiler/jit/legacy_flags/mark_for_compilation_pass_flags.cc b/tensorflow/compiler/jit/legacy_flags/mark_for_compilation_pass_flags.cc index 09aee39d8c..4bc209b7ec 100644 --- a/tensorflow/compiler/jit/legacy_flags/mark_for_compilation_pass_flags.cc +++ b/tensorflow/compiler/jit/legacy_flags/mark_for_compilation_pass_flags.cc @@ -39,21 +39,23 @@ static void AllocateFlags() { flags->tf_xla_min_cluster_size = 2; flags->tf_xla_max_cluster_size = std::numeric_limits::max(); flags->tf_xla_clustering_debug = false; - flag_list = new std::vector({ - Flag("tf_xla_auto_jit", &flags->tf_xla_auto_jit, - "Control compilation of operators into XLA computations on CPU and " - "GPU devices. 0 = use ConfigProto setting; -1 = off; 1 = on for " - "things very likely to be improved; 2 = on for everything. " - "Experimental."), - Flag("tf_xla_min_cluster_size", &flags->tf_xla_min_cluster_size, - "Minimum number of operators in an XLA compilation. Ignored for " - "operators placed on an XLA device or operators explicitly marked " - "for compilation."), - Flag("tf_xla_max_cluster_size", &flags->tf_xla_max_cluster_size, - "Maximum number of operators in an XLA compilation."), - Flag("tf_xla_clustering_debug", &flags->tf_xla_clustering_debug, - "Dump graphs during XLA compilation."), - }); + flags->tf_xla_cpu_global_jit = false; + flag_list = new std::vector( + {Flag("tf_xla_auto_jit", &flags->tf_xla_auto_jit, + "Control compilation of operators into XLA computations on CPU and " + "GPU devices. 0 = use ConfigProto setting; -1 = off; 1 = on for " + "things very likely to be improved; 2 = on for everything. " + "Experimental."), + Flag("tf_xla_min_cluster_size", &flags->tf_xla_min_cluster_size, + "Minimum number of operators in an XLA compilation. Ignored for " + "operators placed on an XLA device or operators explicitly marked " + "for compilation."), + Flag("tf_xla_max_cluster_size", &flags->tf_xla_max_cluster_size, + "Maximum number of operators in an XLA compilation."), + Flag("tf_xla_clustering_debug", &flags->tf_xla_clustering_debug, + "Dump graphs during XLA compilation."), + Flag("tf_xla_cpu_global_jit", &flags->tf_xla_cpu_global_jit, + "Enables global JIT compilation for CPU via SessionOptions.")}); xla::legacy_flags::ParseFlagsFromEnv(*flag_list); } diff --git a/tensorflow/compiler/jit/legacy_flags/mark_for_compilation_pass_flags.h b/tensorflow/compiler/jit/legacy_flags/mark_for_compilation_pass_flags.h index 24f8050742..e1ccd7ddb8 100644 --- a/tensorflow/compiler/jit/legacy_flags/mark_for_compilation_pass_flags.h +++ b/tensorflow/compiler/jit/legacy_flags/mark_for_compilation_pass_flags.h @@ -46,6 +46,8 @@ typedef struct { int32 tf_xla_max_cluster_size; // Maximum number of operators in an XLA // compilation. bool tf_xla_clustering_debug; // Dump graphs during XLA compilation. + bool tf_xla_cpu_global_jit; // Enables global JIT compilation for CPU + // via SessionOptions. } MarkForCompilationPassFlags; // Return a pointer to the MarkForCompilationPassFlags struct; diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc index 78d0aa86a8..74c9791f5e 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc @@ -290,9 +290,11 @@ Status MarkForCompilationPass::Run( global_jit_level = static_cast(flags->tf_xla_auto_jit); } + bool cpu_global_jit = flags->tf_xla_cpu_global_jit; const FunctionLibraryDefinition* fld = options.flib_def; - auto is_compilable = [global_jit_level, fld](const Node* node, - const DeviceType& device_type) { + + auto is_compilable = [global_jit_level, cpu_global_jit, fld]( + const Node* node, const DeviceType& device_type) { const XlaOpRegistry::DeviceRegistration* registration; if (!XlaOpRegistry::GetCompilationDevice(device_type.type(), ®istration)) { @@ -315,7 +317,11 @@ Status MarkForCompilationPass::Run( if (status.ok()) return compile; // Otherwise use the value of global_jit_level. - return registration->enable_jit_by_default && global_jit_level > 0; + // Ignore enable_jit_by_default if global jit compilation for CPU + // is explicitly requested via tf_xla_cpu_global_jit flag + bool ignore_registration = cpu_global_jit && device_type == DEVICE_CPU; + return (ignore_registration || registration->enable_jit_by_default) && + global_jit_level > 0; }; return RunImpl(options, is_compilable); } @@ -556,6 +562,7 @@ Status MarkForCompilationPass::RunImpl( if (cluster_sizes[cluster] >= min_cluster_size || marked_for_compilation || registration->requires_compilation) { string& name = cluster_names[cluster]; + if (name.empty()) { name = strings::StrCat("cluster_", cluster_sequence_num++); } -- GitLab From 94b275420b2c5310f37d0bda3d329d7e0d0b5e99 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Tue, 14 Nov 2017 16:54:35 -0800 Subject: [PATCH 0439/1801] Replace uses of @%ws% with @org_tensorflow in LLVM BUILD file. @%ws% existed to work around a bug in older Bazel versions. The minimum Bazel version no longer has this issue so we can just write @org_tensorflow. Fixes #14445. PiperOrigin-RevId: 175756784 --- third_party/llvm/llvm.BUILD | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/third_party/llvm/llvm.BUILD b/third_party/llvm/llvm.BUILD index 97b833e49d..5344525ba8 100644 --- a/third_party/llvm/llvm.BUILD +++ b/third_party/llvm/llvm.BUILD @@ -7,18 +7,18 @@ licenses(["notice"]) exports_files(["LICENSE.TXT"]) load( - "@%ws%//third_party/llvm:llvm.bzl", + "@org_tensorflow//third_party/llvm:llvm.bzl", "gentbl", "expand_cmake_vars", "llvm_target_cmake_vars", "cmake_var_string", ) load( - "@%ws%//third_party:common.bzl", + "@org_tensorflow//third_party:common.bzl", "template_rule", ) -package(default_visibility = ["@%ws%//tensorflow/compiler/xla:internal"]) +package(default_visibility = ["//visibility:public"]) llvm_host_triple = "x86_64-unknown-linux_gnu" @@ -145,11 +145,11 @@ darwin_cmake_vars = { # TODO(phawkins): use a better method to select the right host triple, rather # than hardcoding x86_64. all_cmake_vars = select({ - "@%ws%//tensorflow:darwin": cmake_var_string( + "@org_tensorflow//tensorflow:darwin": cmake_var_string( cmake_vars + llvm_target_cmake_vars("X86", "x86_64-apple-darwin") + darwin_cmake_vars, ), - "@%ws%//tensorflow:linux_ppc64le": cmake_var_string( + "@org_tensorflow//tensorflow:linux_ppc64le": cmake_var_string( cmake_vars + llvm_target_cmake_vars("PowerPC", "powerpc64le-unknown-linux_gnu") + linux_cmake_vars, -- GitLab From 95c46f90474b3ae3996a8596bf5a53af9c52b290 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Nov 2017 16:59:20 -0800 Subject: [PATCH 0440/1801] Add head name to tf.contrib.estimator.regression_head metrics PiperOrigin-RevId: 175757275 --- tensorflow/python/estimator/canned/head.py | 2 +- .../python/estimator/canned/head_test.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py index eaed412c8b..62fea05867 100644 --- a/tensorflow/python/estimator/canned/head.py +++ b/tensorflow/python/estimator/canned/head.py @@ -1081,7 +1081,7 @@ class _RegressionHeadWithMeanSquaredErrorLoss(_Head): if mode == model_fn.ModeKeys.EVAL: # Estimator already adds a metric for loss. eval_metric_ops = { - metric_keys.MetricKeys.LOSS_MEAN: + _summary_key(self._name, metric_keys.MetricKeys.LOSS_MEAN): metrics_lib.mean( # Both values and weights here are reduced, scalar Tensors. # values is the actual mean we want -- weights represents diff --git a/tensorflow/python/estimator/canned/head_test.py b/tensorflow/python/estimator/canned/head_test.py index 4497cd26f2..f3afd84125 100644 --- a/tensorflow/python/estimator/canned/head_test.py +++ b/tensorflow/python/estimator/canned/head_test.py @@ -2325,6 +2325,24 @@ class RegressionHeadWithMeanSquaredErrorLossTest(test.TestCase): self.assertAllClose(expected_loss_mean, loss_mean) self.assertAllClose(expected_loss_mean, loss_mean_value_op.eval()) + def test_eval_metric_ops_with_head_name_for_regression(self): + head = head_lib._regression_head_with_mean_squared_error_loss( + name='some_regression_head') + logits = np.array(((1,), (9,)), dtype=np.float32) + labels = np.array(((1,), (1,)), dtype=np.int64) + features = {'x': np.array(((42,),), dtype=np.int32)} + # Create estimator spec. + spec = head.create_estimator_spec( + features=features, + mode=model_fn.ModeKeys.EVAL, + logits=logits, + labels=labels) + + expected_metric_keys = [ + '{}/some_regression_head'.format(metric_keys.MetricKeys.LOSS_MEAN), + ] + self.assertItemsEqual(expected_metric_keys, spec.eval_metric_ops.keys()) + def test_train_create_loss(self): head = head_lib._regression_head_with_mean_squared_error_loss() logits = np.array(((45,), (41,),), dtype=np.float32) -- GitLab From 48b25d0a1d71fb426b5765a88785b35a4327e4f5 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Tue, 14 Nov 2017 17:03:58 -0800 Subject: [PATCH 0441/1801] Update tf.keras to the Keras 2.0.9 API. PiperOrigin-RevId: 175757949 --- .../keras/_impl/keras/callbacks_test.py | 1 - tensorflow/python/keras/_impl/keras/losses.py | 2 +- tensorflow/python/keras/_impl/keras/models.py | 73 ++++-- .../keras/_impl/keras/preprocessing/image.py | 216 +++++++++++------- .../_impl/keras/preprocessing/image_test.py | 2 + .../_impl/keras/preprocessing/sequence.py | 2 +- .../keras/_impl/keras/utils/__init__.py | 1 + .../keras/_impl/keras/utils/data_utils.py | 108 +++++---- .../keras/_impl/keras/utils/generic_utils.py | 66 ++++-- .../_impl/keras/wrappers/scikit_learn.py | 4 +- .../golden/tensorflow.keras.-sequential.pbtxt | 4 +- .../tensorflow.keras.models.-sequential.pbtxt | 4 +- ...processing.image.-directory-iterator.pbtxt | 5 + ....keras.preprocessing.image.-iterator.pbtxt | 5 + ...ocessing.image.-numpy-array-iterator.pbtxt | 5 + ...tensorflow.keras.preprocessing.image.pbtxt | 2 +- ...flow.keras.utils.-generator-enqueuer.pbtxt | 2 +- 17 files changed, 329 insertions(+), 173 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/callbacks_test.py b/tensorflow/python/keras/_impl/keras/callbacks_test.py index 9f578a0fab..6924a8926b 100644 --- a/tensorflow/python/keras/_impl/keras/callbacks_test.py +++ b/tensorflow/python/keras/_impl/keras/callbacks_test.py @@ -571,7 +571,6 @@ class KerasCallbacksTest(test.TestCase): loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) - tsb = keras.callbacks.TensorBoard( log_dir=temp_dir, histogram_freq=1, write_images=True, write_grads=True, batch_size=5) diff --git a/tensorflow/python/keras/_impl/keras/losses.py b/tensorflow/python/keras/_impl/keras/losses.py index 7c6b304622..da0984d3c3 100644 --- a/tensorflow/python/keras/_impl/keras/losses.py +++ b/tensorflow/python/keras/_impl/keras/losses.py @@ -91,7 +91,7 @@ def poisson(y_true, y_pred): def cosine_proximity(y_true, y_pred): y_true = K.l2_normalize(y_true, axis=-1) y_pred = K.l2_normalize(y_pred, axis=-1) - return -K.mean(y_true * y_pred, axis=-1) + return -K.sum(y_true * y_pred, axis=-1) # Aliases. diff --git a/tensorflow/python/keras/_impl/keras/models.py b/tensorflow/python/keras/_impl/keras/models.py index 06941e4bac..046fd11633 100644 --- a/tensorflow/python/keras/_impl/keras/models.py +++ b/tensorflow/python/keras/_impl/keras/models.py @@ -716,25 +716,46 @@ class Sequential(Model): metrics=None, sample_weight_mode=None, weighted_metrics=None, + target_tensors=None, **kwargs): - """Configures the learning process. + """Configures the model for training. Arguments: - optimizer: str (name of optimizer) or optimizer object. + optimizer: String (name of optimizer) or optimizer object. See [optimizers](/optimizers). - loss: str (name of objective function) or objective function. + loss: String (name of objective function) or objective function. See [losses](/losses). - metrics: list of metrics to be evaluated by the model + If the model has multiple outputs, you can use a different loss + on each output by passing a dictionary or a list of losses. + The loss value that will be minimized by the model + will then be the sum of all individual losses. + metrics: List of metrics to be evaluated by the model during training and testing. Typically you will use `metrics=['accuracy']`. - See [metrics](/metrics). - sample_weight_mode: if you need to do timestep-wise - sample weighting (2D weights), set this to "temporal". - "None" defaults to sample-wise weights (1D). - weighted_metrics: list of metrics to be evaluated and weighted - by `sample_weight` or `class_weight` during training and testing. - **kwargs: These are passed into `tf.Session.run`. - + To specify different metrics for different outputs of a + multi-output model, you could also pass a dictionary, + such as `metrics={'output_a': 'accuracy'}`. + sample_weight_mode: If you need to do timestep-wise + sample weighting (2D weights), set this to `"temporal"`. + `None` defaults to sample-wise weights (1D). + If the model has multiple outputs, you can use a different + `sample_weight_mode` on each output by passing a + dictionary or a list of modes. + weighted_metrics: List of metrics to be evaluated and weighted + by sample_weight or class_weight during training and testing. + target_tensors: By default, Keras will create placeholders for the + model's target, which will be fed with the target data during + training. If instead you would like to use your own + target tensors (in turn, Keras will not expect external + Numpy data for these targets at training time), you + can specify them via the `target_tensors` argument. It can be + a single tensor (for a single-output model), a list of tensors, + or a dict mapping output names to target tensors. + **kwargs: When using the Theano/CNTK backends, these arguments + are passed into K.function. When using the TensorFlow backend, + these arguments are passed into `tf.Session.run`. + Raises: + ValueError: In case of invalid arguments for Example: ```python model = Sequential() @@ -754,18 +775,19 @@ class Sequential(Model): metrics=metrics, sample_weight_mode=sample_weight_mode, weighted_metrics=weighted_metrics, + target_tensors=target_tensors, **kwargs) self.optimizer = self.model.optimizer self.loss = self.model.loss - self.total_loss = self.model.total_loss - self.loss_weights = self.model.loss_weights self.metrics = self.model.metrics + self.loss_weights = self.model.loss_weights + self.sample_weight_mode = self.model.sample_weight_mode self.weighted_metrics = self.model.weighted_metrics + self.targets = self.model.targets self.metrics_tensors = self.model.metrics_tensors self.metrics_names = self.model.metrics_names - self.sample_weight_mode = self.model.sample_weight_mode self.sample_weights = self.model.sample_weights - self.targets = self.model.targets + self.total_loss = self.model.total_loss def fit(self, x, @@ -787,7 +809,11 @@ class Sequential(Model): (if the model has multiple inputs). y: labels, as a Numpy array. batch_size: integer. Number of samples per gradient update. - epochs: integer, the number of epochs to train the model. + epochs: integer. Number of epochs to train the model. + Note that in conjunction with initial_epoch, the parameter + epochs is to be understood as "final epoch". The model is + not trained for a number of steps given by epochs, but + until the epoch epochs is reached. verbose: 0 for no logging to stdout, 1 for progress bar logging, 2 for one log line per epoch. callbacks: list of `keras.callbacks.Callback` instances. @@ -814,8 +840,8 @@ class Sequential(Model): to apply a different weight to every timestep of every sample. In this case you should make sure to specify sample_weight_mode="temporal" in compile(). - initial_epoch: epoch at which to start training - (useful for resuming a previous training run) + initial_epoch: Epoch at which to start training + (useful for resuming a previous training run). Returns: A `History` object. Its `History.history` attribute is @@ -1003,6 +1029,7 @@ class Sequential(Model): max_queue_size=10, workers=1, use_multiprocessing=False, + shuffle=True, initial_epoch=0, **kwargs): """Fits the model on data generated batch-by-batch by a Python generator. @@ -1026,6 +1053,10 @@ class Sequential(Model): be equal to the number of unique samples of your dataset divided by the batch size. epochs: Integer, total number of iterations on the data. + Note that in conjunction with initial_epoch, the parameter + epochs is to be understood as "final epoch". The model is + not trained for n steps given by epochs, but until the + epoch epochs is reached. verbose: Verbosity mode, 0, 1, or 2. callbacks: List of callbacks to be called during training. validation_data: This can be either @@ -1049,6 +1080,9 @@ class Sequential(Model): non picklable arguments to the generator as they can't be passed easily to children processes. + shuffle: Whether to shuffle the order of the batches at + the beginning of each epoch. Only used with instances + of `Sequence` (keras.utils.Sequence). initial_epoch: Epoch at which to start training (useful for resuming a previous training run) **kwargs: support for legacy arguments. @@ -1105,6 +1139,7 @@ class Sequential(Model): max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, + shuffle=shuffle, initial_epoch=initial_epoch) def evaluate_generator(self, diff --git a/tensorflow/python/keras/_impl/keras/preprocessing/image.py b/tensorflow/python/keras/_impl/keras/preprocessing/image.py index 052a8addc4..12dc718cd7 100644 --- a/tensorflow/python/keras/_impl/keras/preprocessing/image.py +++ b/tensorflow/python/keras/_impl/keras/preprocessing/image.py @@ -31,6 +31,7 @@ import numpy as np from six.moves import range # pylint: disable=redefined-builtin from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras.utils.data_utils import Sequence from tensorflow.python.platform import tf_logging as logging @@ -47,6 +48,21 @@ except ImportError: ndi = None # pylint: enable=g-import-not-at-top +if pil_image is not None: + _PIL_INTERPOLATION_METHODS = { + 'nearest': pil_image.NEAREST, + 'bilinear': pil_image.BILINEAR, + 'bicubic': pil_image.BICUBIC, + } + # These methods were only introduced in version 3.4.0 (2016). + if hasattr(pil_image, 'HAMMING'): + _PIL_INTERPOLATION_METHODS['hamming'] = pil_image.HAMMING + if hasattr(pil_image, 'BOX'): + _PIL_INTERPOLATION_METHODS['box'] = pil_image.BOX + # This method is new in version 1.1.3 (2013). + if hasattr(pil_image, 'LANCZOS'): + _PIL_INTERPOLATION_METHODS['lanczos'] = pil_image.LANCZOS + def random_rotation(x, rg, @@ -172,10 +188,8 @@ def random_zoom(x, (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). cval: Value used for points outside the boundaries of the input if `mode='constant'`. - Returns: Zoomed Numpy image tensor. - Raises: ValueError: if `zoom_range` isn't a tuple. """ @@ -344,7 +358,7 @@ def img_to_array(img, data_format=None): return x -def load_img(path, grayscale=False, target_size=None): +def load_img(path, grayscale=False, target_size=None, interpolation='nearest'): """Loads an image into PIL format. Arguments: @@ -352,12 +366,19 @@ def load_img(path, grayscale=False, target_size=None): grayscale: Boolean, whether to load the image as grayscale. target_size: Either `None` (default to original size) or tuple of ints `(img_height, img_width)`. + interpolation: Interpolation method used to resample the image if the + target size is different from that of the loaded image. + Supported methods are "nearest", "bilinear", and "bicubic". + If PIL version 1.1.3 or newer is installed, "lanczos" is also + supported. If PIL version 3.4.0 or newer is installed, "box" and + "hamming" are also supported. By default, "nearest" is used. Returns: A PIL Image instance. Raises: ImportError: if PIL is not available. + ValueError: if interpolation method is not supported. """ if pil_image is None: raise ImportError('Could not import PIL.Image. ' @@ -369,14 +390,21 @@ def load_img(path, grayscale=False, target_size=None): else: if img.mode != 'RGB': img = img.convert('RGB') - if target_size: - hw_tuple = (target_size[1], target_size[0]) - if img.size != hw_tuple: - img = img.resize(hw_tuple) + if target_size is not None: + width_height_tuple = (target_size[1], target_size[0]) + if img.size != width_height_tuple: + if interpolation not in _PIL_INTERPOLATION_METHODS: + raise ValueError( + 'Invalid interpolation method {} specified. Supported ' + 'methods are {}'.format( + interpolation, + ', '.join(_PIL_INTERPOLATION_METHODS.keys()))) + resample = _PIL_INTERPOLATION_METHODS[interpolation] + img = img.resize(width_height_tuple, resample) return img -def list_pictures(directory, ext='jpg|jpeg|bmp|png'): +def list_pictures(directory, ext='jpg|jpeg|bmp|png|ppm'): return [ os.path.join(root, f) for root, _, files in os.walk(directory) for f in files @@ -401,7 +429,7 @@ class ImageDataGenerator(object): zoom_range: amount of zoom. if scalar z, zoom will be randomly picked in the range [1-z, 1+z]. A sequence of two can be passed instead to select this range. - channel_shift_range: shift range for each channels. + channel_shift_range: shift range for each channel. fill_mode: points outside the boundaries are filled according to the given mode ('constant', 'nearest', 'reflect' or 'wrap'). Default is 'nearest'. @@ -558,12 +586,10 @@ class ImageDataGenerator(object): x = self.preprocessing_function(x) if self.rescale: x *= self.rescale - # x is a single image, so it doesn't have image number at index 0 - img_channel_axis = self.channel_axis - 1 if self.samplewise_center: - x -= np.mean(x, axis=img_channel_axis, keepdims=True) + x -= np.mean(x, keepdims=True) if self.samplewise_std_normalization: - x /= (np.std(x, axis=img_channel_axis, keepdims=True) + 1e-7) + x /= np.std(x, keepdims=True) + 1e-7 if self.featurewise_center: if self.mean is not None: @@ -762,49 +788,76 @@ class ImageDataGenerator(object): np.dot(u, np.diag(1. / np.sqrt(s + self.zca_epsilon))), u.T) -class Iterator(object): - """Abstract base class for image data iterators. +class Iterator(Sequence): + """Base class for image data iterators. + + Every `Iterator` must implement the `_get_batches_of_transformed_samples` + method. Arguments: - n: Integer, total number of samples in the dataset to loop over. - batch_size: Integer, size of a batch. - shuffle: Boolean, whether to shuffle the data between epochs. - seed: Random seeding for data shuffling. + n: Integer, total number of samples in the dataset to loop over. + batch_size: Integer, size of a batch. + shuffle: Boolean, whether to shuffle the data between epochs. + seed: Random seeding for data shuffling. """ def __init__(self, n, batch_size, shuffle, seed): self.n = n self.batch_size = batch_size + self.seed = seed self.shuffle = shuffle self.batch_index = 0 self.total_batches_seen = 0 self.lock = threading.Lock() - self.index_generator = self._flow_index(n, batch_size, shuffle, seed) + self.index_array = None + self.index_generator = self._flow_index() + + def _set_index_array(self): + self.index_array = np.arange(self.n) + if self.shuffle: + self.index_array = np.random.permutation(self.n) + + def __getitem__(self, idx): + if idx >= len(self): + raise ValueError('Asked to retrieve element {idx}, ' + 'but the Sequence ' + 'has length {length}'.format(idx=idx, + length=len(self))) + if self.seed is not None: + np.random.seed(self.seed + self.total_batches_seen) + self.total_batches_seen += 1 + if self.index_array is None: + self._set_index_array() + index_array = self.index_array[self.batch_size * idx:self.batch_size * + (idx + 1)] + return self._get_batches_of_transformed_samples(index_array) + + def __len__(self): + length = int(np.ceil(self.n / float(self.batch_size))) + return np.maximum(length, 0) + + def on_epoch_end(self): + self._set_index_array() def reset(self): self.batch_index = 0 - def _flow_index(self, n, batch_size=32, shuffle=False, seed=None): + def _flow_index(self): # Ensure self.batch_index is 0. self.reset() while 1: - if seed is not None: - np.random.seed(seed + self.total_batches_seen) + if self.seed is not None: + np.random.seed(self.seed + self.total_batches_seen) if self.batch_index == 0: - index_array = np.arange(n) - if shuffle: - index_array = np.random.permutation(n) + self._set_index_array() - current_index = (self.batch_index * batch_size) % n - if n > current_index + batch_size: - current_batch_size = batch_size + current_index = (self.batch_index * self.batch_size) % self.n + if self.n > current_index + self.batch_size: self.batch_index += 1 else: - current_batch_size = n - current_index self.batch_index = 0 self.total_batches_seen += 1 - yield (index_array[current_index:current_index + current_batch_size], - current_index, current_batch_size) + yield self.index_array[current_index:current_index + self.batch_size] def __iter__(self): # pylint: disable=non-iterator-returned # Needed if we want to do something like: @@ -814,6 +867,16 @@ class Iterator(object): def __next__(self, *args, **kwargs): return self.next(*args, **kwargs) + def _get_batches_of_transformed_samples(self, index_array): + """Gets a batch of transformed samples. + + Arguments: + index_array: array of sample indices to include in batch. + Returns: + A batch of transformed samples. + """ + raise NotImplementedError + class NumpyArrayIterator(Iterator): """Iterator yielding data from a Numpy array. @@ -883,33 +946,19 @@ class NumpyArrayIterator(Iterator): super(NumpyArrayIterator, self).__init__(x.shape[0], batch_size, shuffle, seed) - def next(self): - """For python 2.x. - - Returns: - The next batch. - """ - # Keeps under lock only the mechanism which advances - # the indexing of each batch. - with self.lock: - index_array, current_index, current_batch_size = next( - self.index_generator) - # The transformation of images is not under thread lock - # so it can be done in parallel - batch_x = np.zeros( - tuple([current_batch_size] + list(self.x.shape)[1:]), dtype=K.floatx()) + def _get_batches_of_transformed_samples(self, index_array): + batch_x = np.zeros(tuple([len(index_array)] + list(self.x.shape)[1:]), + dtype=K.floatx()) for i, j in enumerate(index_array): x = self.x[j] x = self.image_data_generator.random_transform(x.astype(K.floatx())) x = self.image_data_generator.standardize(x) batch_x[i] = x if self.save_to_dir: - for i in range(current_batch_size): + for i, j in enumerate(index_array): img = array_to_img(batch_x[i], self.data_format, scale=True) fname = '{prefix}_{index}_{hash}.{format}'.format( - prefix=self.save_prefix, - index=current_index + i, - hash=np.random.randint(1e4), + prefix=self.save_prefix, index=j, hash=np.random.randint(1e4), format=self.save_format) img.save(os.path.join(self.save_to_dir, fname)) if self.y is None: @@ -917,6 +966,20 @@ class NumpyArrayIterator(Iterator): batch_y = self.y[index_array] return batch_x, batch_y + def next(self): + """For python 2.x. + + Returns: + The next batch. + """ + # Keeps under lock only the mechanism which advances + # the indexing of each batch. + with self.lock: + index_array = next(self.index_generator) + # The transformation of images is not under thread lock + # so it can be done in parallel + return self._get_batches_of_transformed_samples(index_array) + def _count_valid_files_in_directory(directory, white_list_formats, follow_links): @@ -939,7 +1002,7 @@ def _count_valid_files_in_directory(directory, white_list_formats, samples = 0 for _, _, files in _recursive_list(directory): - for fname in files: + for fname in sorted(files): is_valid = False for extension in white_list_formats: if fname.lower().endswith('.' + extension): @@ -1006,7 +1069,7 @@ class DirectoryIterator(Iterator): to use for random transformations and normalization. target_size: tuple of integers, dimensions to resize input images to. color_mode: One of `"rgb"`, `"grayscale"`. Color mode to read images. - classes: Optional list of strings, names of sudirectories + classes: Optional list of strings, names of subdirectories containing images from each class (e.g. `["dogs", "cats"]`). It will be computed automatically if not set. class_mode: Mode for yielding the targets: @@ -1086,7 +1149,7 @@ class DirectoryIterator(Iterator): for subdir in sorted(os.listdir(directory)): if os.path.isdir(os.path.join(directory, subdir)): classes.append(subdir) - self.num_class = len(classes) + self.num_classes = len(classes) self.class_indices = dict(zip(classes, range(len(classes)))) pool = multiprocessing.pool.ThreadPool() @@ -1099,7 +1162,7 @@ class DirectoryIterator(Iterator): for subdir in classes))) print('Found %d images belonging to %d classes.' % (self.samples, - self.num_class)) + self.num_classes)) # second, build an index of the images in the different class subfolders results = [] @@ -1121,39 +1184,25 @@ class DirectoryIterator(Iterator): super(DirectoryIterator, self).__init__(self.samples, batch_size, shuffle, seed) - def next(self): - """For python 2.x. - - Returns: - The next batch. - """ - with self.lock: - index_array, current_index, current_batch_size = next( - self.index_generator) - # The transformation of images is not under thread lock - # so it can be done in parallel - batch_x = np.zeros( - (current_batch_size,) + self.image_shape, dtype=K.floatx()) + def _get_batches_of_transformed_samples(self, index_array): + batch_x = np.zeros((len(index_array),) + self.image_shape, dtype=K.floatx()) grayscale = self.color_mode == 'grayscale' # build batch of image data for i, j in enumerate(index_array): fname = self.filenames[j] - img = load_img( - os.path.join(self.directory, fname), - grayscale=grayscale, - target_size=self.target_size) + img = load_img(os.path.join(self.directory, fname), + grayscale=grayscale, + target_size=self.target_size) x = img_to_array(img, data_format=self.data_format) x = self.image_data_generator.random_transform(x) x = self.image_data_generator.standardize(x) batch_x[i] = x # optionally save augmented images to disk for debugging purposes if self.save_to_dir: - for i in range(current_batch_size): + for i, j in enumerate(index_array): img = array_to_img(batch_x[i], self.data_format, scale=True) fname = '{prefix}_{index}_{hash}.{format}'.format( - prefix=self.save_prefix, - index=current_index + i, - hash=np.random.randint(1e4), + prefix=self.save_prefix, index=j, hash=np.random.randint(1e7), format=self.save_format) img.save(os.path.join(self.save_to_dir, fname)) # build batch of labels @@ -1164,9 +1213,22 @@ class DirectoryIterator(Iterator): elif self.class_mode == 'binary': batch_y = self.classes[index_array].astype(K.floatx()) elif self.class_mode == 'categorical': - batch_y = np.zeros((len(batch_x), self.num_class), dtype=K.floatx()) + batch_y = np.zeros((len(batch_x), self.num_classes), dtype=K.floatx()) for i, label in enumerate(self.classes[index_array]): batch_y[i, label] = 1. else: return batch_x return batch_x, batch_y + + def next(self): + """For python 2.x. + + Returns: + The next batch. + """ + with self.lock: + index_array = next(self.index_generator) + # The transformation of images is not under thread lock + # so it can be done in parallel + return self._get_batches_of_transformed_samples(index_array) + diff --git a/tensorflow/python/keras/_impl/keras/preprocessing/image_test.py b/tensorflow/python/keras/_impl/keras/preprocessing/image_test.py index 19693410e7..c0790b5a51 100644 --- a/tensorflow/python/keras/_impl/keras/preprocessing/image_test.py +++ b/tensorflow/python/keras/_impl/keras/preprocessing/image_test.py @@ -192,6 +192,8 @@ class TestImage(test.TestCase): _ = keras.preprocessing.image.load_img(fname) _ = keras.preprocessing.image.load_img(fname, grayscale=True) _ = keras.preprocessing.image.load_img(fname, target_size=(10, 10)) + _ = keras.preprocessing.image.load_img(fname, target_size=(10, 10), + interpolation='bilinear') # create iterator generator = keras.preprocessing.image.ImageDataGenerator() diff --git a/tensorflow/python/keras/_impl/keras/preprocessing/sequence.py b/tensorflow/python/keras/_impl/keras/preprocessing/sequence.py index a5deec87af..642f4f2fac 100644 --- a/tensorflow/python/keras/_impl/keras/preprocessing/sequence.py +++ b/tensorflow/python/keras/_impl/keras/preprocessing/sequence.py @@ -169,7 +169,7 @@ def skipgrams(sequence, integers (eg. [0, 1, 1 .. ]), if True labels will be categorical eg. [[1,0],[0,1],[0,1] .. ] sampling_table: 1D array of size `vocabulary_size` where the entry i - encodes the probabibily to sample a word of rank i. + encodes the probability to sample a word of rank i. seed: Random seed. Returns: diff --git a/tensorflow/python/keras/_impl/keras/utils/__init__.py b/tensorflow/python/keras/_impl/keras/utils/__init__.py index 78f325cf61..370ae0dd0f 100644 --- a/tensorflow/python/keras/_impl/keras/utils/__init__.py +++ b/tensorflow/python/keras/_impl/keras/utils/__init__.py @@ -30,6 +30,7 @@ from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar from tensorflow.python.keras._impl.keras.utils.generic_utils import serialize_keras_object from tensorflow.python.keras._impl.keras.utils.io_utils import HDF5Matrix from tensorflow.python.keras._impl.keras.utils.layer_utils import convert_all_kernels_in_model +from tensorflow.python.keras._impl.keras.utils.layer_utils import print_summary from tensorflow.python.keras._impl.keras.utils.np_utils import normalize from tensorflow.python.keras._impl.keras.utils.np_utils import to_categorical from tensorflow.python.keras._impl.keras.utils.training_utils import multi_gpu_model diff --git a/tensorflow/python/keras/_impl/keras/utils/data_utils.py b/tensorflow/python/keras/_impl/keras/utils/data_utils.py index 0ede7f12f2..b3a1f64042 100644 --- a/tensorflow/python/keras/_impl/keras/utils/data_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/data_utils.py @@ -70,15 +70,15 @@ if sys.version_info[0] == 2: if content_type is not None: total_size = int(content_type.strip()) count = 0 - while 1: + while True: chunk = response.read(chunk_size) count += 1 - if not chunk: - reporthook(count, total_size, total_size) - break - if reporthook: + if reporthook is not None: reporthook(count, chunk_size, total_size) - yield chunk + if chunk: + yield chunk + else: + break response = urlopen(url, data) with open(filename, 'wb') as fd: @@ -262,9 +262,9 @@ def _hash_file(fpath, algorithm='sha256', chunk_size=65535): Example: ```python - >>> from keras.data_utils import _hash_file - >>> _hash_file('/path/to/file.zip') - 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' + >>> from keras.data_utils import _hash_file + >>> _hash_file('/path/to/file.zip') + 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' ``` Arguments: @@ -318,32 +318,35 @@ class Sequence(object): """Base object for fitting to a sequence of data, such as a dataset. Every `Sequence` must implements the `__getitem__` and the `__len__` methods. + If you want to modify your dataset between epochs you may implement + `on_epoch_end`. The method `__getitem__` should return a complete batch. + Notes: + `Sequence` are a safer way to do multiprocessing. This structure guarantees + that the network will only train once on each sample per epoch which is not + the case with generators. Examples: - ```python - from skimage.io import imread - from skimage.transform import resize - import numpy as np - - # Here, `x_set` is list of path to the images - # and `y_set` are the associated classes. - - class CIFAR10Sequence(Sequence): - def __init__(self, x_set, y_set, batch_size): - self.X,self.y = x_set,y_set - self.batch_size = batch_size - - def __len__(self): - return len(self.X) // self.batch_size - - def __getitem__(self,idx): - batch_x = self.X[idx*self.batch_size:(idx+1)*self.batch_size] - batch_y = self.y[idx*self.batch_size:(idx+1)*self.batch_size] - - return np.array([ - resize(imread(file_name), (200,200)) - for file_name in batch_x]), np.array(batch_y) + from skimage.io import imread + from skimage.transform import resize + import numpy as np + import math + # Here, `x_set` is list of path to the images + # and `y_set` are the associated classes. + class CIFAR10Sequence(Sequence): + def __init__(self, x_set, y_set, batch_size): + self.x, self.y = x_set, y_set + self.batch_size = batch_size + def __len__(self): + return math.ceil(len(self.x) / self.batch_size) + def __getitem__(self, idx): + batch_x = self.x[idx * self.batch_size:(idx + 1) * + self.batch_size] + batch_y = self.y[idx * self.batch_size:(idx + 1) * + self.batch_size] + return np.array([ + resize(imread(file_name), (200, 200)) + for file_name in batch_x]), np.array(batch_y) ``` """ @@ -372,7 +375,7 @@ class Sequence(object): def on_epoch_end(self): """Method called at the end of every epoch. """ - raise NotImplementedError + pass def get_index(ds, i): @@ -397,13 +400,13 @@ class SequenceEnqueuer(object): Examples: ```python - enqueuer = SequenceEnqueuer(...) - enqueuer.start() - datas = enqueuer.get() - for data in datas: - # Use the inputs; training, evaluating, predicting. - # ... stop sometime. - enqueuer.close() + enqueuer = SequenceEnqueuer(...) + enqueuer.start() + datas = enqueuer.get() + for data in datas: + # Use the inputs; training, evaluating, predicting. + # ... stop sometime. + enqueuer.close() ``` The `enqueuer.get()` should be an infinite stream of datas. @@ -549,28 +552,31 @@ class OrderedEnqueuer(SequenceEnqueuer): class GeneratorEnqueuer(SequenceEnqueuer): """Builds a queue out of a data generator. + The provided generator can be finite in which case the class will throw + a `StopIteration` exception. + Used in `fit_generator`, `evaluate_generator`, `predict_generator`. Arguments: - generator: a generator function which endlessly yields data + generator: a generator function which yields data use_multiprocessing: use multiprocessing if True, otherwise threading wait_time: time to sleep in-between calls to `put()` random_seed: Initial seed for workers, - will be incremented by one for each workers. + will be incremented by one for each worker. """ def __init__(self, generator, use_multiprocessing=False, wait_time=0.05, - random_seed=None): + seed=None): self.wait_time = wait_time self._generator = generator self._use_multiprocessing = use_multiprocessing self._threads = [] self._stop_event = None self.queue = None - self.random_seed = random_seed + self.seed = seed def start(self, workers=1, max_queue_size=10): """Kicks off threads which add data from the generator into the queue. @@ -589,6 +595,8 @@ class GeneratorEnqueuer(SequenceEnqueuer): self.queue.put(generator_output) else: time.sleep(self.wait_time) + except StopIteration: + break except Exception: self._stop_event.set() raise @@ -605,11 +613,11 @@ class GeneratorEnqueuer(SequenceEnqueuer): if self._use_multiprocessing: # Reset random seed else all children processes # share the same seed - np.random.seed(self.random_seed) + np.random.seed(self.seed) thread = multiprocessing.Process(target=data_generator_task) thread.daemon = True - if self.random_seed is not None: - self.random_seed += 1 + if self.seed is not None: + self.seed += 1 else: thread = threading.Thread(target=data_generator_task) self._threads.append(thread) @@ -661,4 +669,8 @@ class GeneratorEnqueuer(SequenceEnqueuer): if inputs is not None: yield inputs else: - time.sleep(self.wait_time) + all_finished = all([not thread.is_alive() for thread in self._threads]) + if all_finished and self.queue.empty(): + raise StopIteration() + else: + time.sleep(self.wait_time) diff --git a/tensorflow/python/keras/_impl/keras/utils/generic_utils.py b/tensorflow/python/keras/_impl/keras/utils/generic_utils.py index 39a10c8650..efa79b1612 100644 --- a/tensorflow/python/keras/_impl/keras/utils/generic_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/generic_utils.py @@ -43,7 +43,7 @@ class CustomObjectScope(object): Example: - Consider a custom object `MyObject` + Consider a custom object `MyObject` (e.g. a class): ```python with CustomObjectScope({'MyObject':MyObject}): @@ -271,6 +271,8 @@ class Progbar(object): self.total_width = 0 self.seen_so_far = 0 self.verbose = verbose + self._dynamic_display = (sys.stdout.isatty() or + 'ipykernel' in sys.modules) def update(self, current, values=None, force=False): """Updates the progress bar. @@ -294,18 +296,23 @@ class Progbar(object): self.seen_so_far = current now = time.time() + info = ' - %.0fs' % (now - self.start) if self.verbose == 1: - if not force and (now - self.last_update) < self.interval: + if (not force and (now - self.last_update) < self.interval and + current < self.target): return prev_total_width = self.total_width - sys.stdout.write('\b' * prev_total_width) - sys.stdout.write('\r') + if self._dynamic_display: + sys.stdout.write('\b' * prev_total_width) + sys.stdout.write('\r') + else: + sys.stdout.write('\n') - if self.target is not -1: + if self.target is not None: numdigits = int(np.floor(np.log10(self.target))) + 1 - barstr = '%%%dd/%%%dd [' % (numdigits, numdigits) - bar = barstr % (current, self.target) + barstr = '%%%dd/%d [' % (numdigits, self.target) + bar = barstr % current prog = float(current) / self.target prog_width = int(self.width * prog) if prog_width > 0: @@ -318,17 +325,35 @@ class Progbar(object): bar += ']' sys.stdout.write(bar) self.total_width = len(bar) + else: + bar = '%7d/Unknown' % current + + self.total_width = len(bar) + sys.stdout.write(bar) if current: time_per_unit = (now - self.start) / current else: time_per_unit = 0 - eta = time_per_unit * (self.target - current) - info = '' - if current < self.target and self.target is not -1: - info += ' - ETA: %ds' % eta + if self.target is not None and current < self.target: + eta = time_per_unit * (self.target - current) + if eta > 3600: + eta_format = '%d:%02d:%02d' % (eta // 3600, (eta % 3600) // 60, + eta % 60) + elif eta > 60: + eta_format = '%d:%02d' % (eta // 60, eta % 60) + else: + eta_format = '%ds' % eta + + info = ' - ETA: %s' % eta_format else: - info += ' - %ds' % (now - self.start) + if time_per_unit >= 1: + info += ' %.0fs/step' % time_per_unit + elif time_per_unit >= 1e-3: + info += ' %.0fms/step' % (time_per_unit * 1e3) + else: + info += ' %.0fus/step' % (time_per_unit * 1e6) + for k in self.unique_values: info += ' - %s:' % k if isinstance(self.sum_values[k], list): @@ -342,7 +367,9 @@ class Progbar(object): self.total_width += len(info) if prev_total_width > self.total_width: - info += ((prev_total_width - self.total_width) * ' ') + info += (' ' * (prev_total_width - self.total_width)) + if self.target is not None and current >= self.target: + info += '\n' sys.stdout.write(info) sys.stdout.flush() @@ -350,17 +377,20 @@ class Progbar(object): if current >= self.target: sys.stdout.write('\n') - if self.verbose == 2: - if current >= self.target: - info = '%ds' % (now - self.start) + elif self.verbose == 2: + if self.target is None or current >= self.target: for k in self.unique_values: info += ' - %s:' % k - avg = np.mean(self.sum_values[k][0] / max(1, self.sum_values[k][1])) + avg = np.mean( + self.sum_values[k][0] / max(1, self.sum_values[k][1])) if avg > 1e-3: info += ' %.4f' % avg else: info += ' %.4e' % avg - sys.stdout.write(info + '\n') + info += '\n' + + sys.stdout.write(info) + sys.stdout.flush() self.last_update = now diff --git a/tensorflow/python/keras/_impl/keras/wrappers/scikit_learn.py b/tensorflow/python/keras/_impl/keras/wrappers/scikit_learn.py index ac7bd49406..31ef4773ad 100644 --- a/tensorflow/python/keras/_impl/keras/wrappers/scikit_learn.py +++ b/tensorflow/python/keras/_impl/keras/wrappers/scikit_learn.py @@ -352,5 +352,5 @@ class KerasRegressor(BaseWrapper): kwargs = self.filter_sk_params(Sequential.evaluate, kwargs) loss = self.model.evaluate(x, y, **kwargs) if isinstance(loss, list): - return loss[0] - return loss + return -loss[0] + return -loss diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt index 5076434dbb..04fe46cedc 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt @@ -153,7 +153,7 @@ tf_class { } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'sample_weight_mode\', \'weighted_metrics\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\'], " } member_method { name: "compute_mask" @@ -177,7 +177,7 @@ tf_class { } member_method { name: "fit_generator" - argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'initial_epoch\'], varargs=None, keywords=kwargs, defaults=[\'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], " + argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=kwargs, defaults=[\'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], " } member_method { name: "from_config" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt index 5034fdff2a..3946ff4d5f 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt @@ -153,7 +153,7 @@ tf_class { } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'sample_weight_mode\', \'weighted_metrics\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\'], " } member_method { name: "compute_mask" @@ -177,7 +177,7 @@ tf_class { } member_method { name: "fit_generator" - argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'initial_epoch\'], varargs=None, keywords=kwargs, defaults=[\'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], " + argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=kwargs, defaults=[\'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], " } member_method { name: "from_config" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-directory-iterator.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-directory-iterator.pbtxt index 8ad1f32551..66cd37bb3a 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-directory-iterator.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-directory-iterator.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.keras.preprocessing.image.DirectoryIterator" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" @@ -11,6 +12,10 @@ tf_class { name: "next" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "on_epoch_end" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-iterator.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-iterator.pbtxt index d30462a8eb..69488d63bf 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-iterator.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-iterator.pbtxt @@ -1,11 +1,16 @@ path: "tensorflow.keras.preprocessing.image.Iterator" tf_class { is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" argspec: "args=[\'self\', \'n\', \'batch_size\', \'shuffle\', \'seed\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "on_epoch_end" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-numpy-array-iterator.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-numpy-array-iterator.pbtxt index 841f1c5585..4ef6e6e99e 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-numpy-array-iterator.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-numpy-array-iterator.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.keras.preprocessing.image.NumpyArrayIterator" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member_method { name: "__init__" @@ -11,6 +12,10 @@ tf_class { name: "next" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "on_epoch_end" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "reset" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.pbtxt index 5652687033..d28fef6965 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.pbtxt @@ -34,7 +34,7 @@ tf_module { } member_method { name: "load_img" - argspec: "args=[\'path\', \'grayscale\', \'target_size\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " + argspec: "args=[\'path\', \'grayscale\', \'target_size\', \'interpolation\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'nearest\'], " } member_method { name: "random_channel_shift" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.utils.-generator-enqueuer.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.utils.-generator-enqueuer.pbtxt index bf27a97cf2..1c5868e711 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.utils.-generator-enqueuer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.utils.-generator-enqueuer.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'generator\', \'use_multiprocessing\', \'wait_time\', \'random_seed\'], varargs=None, keywords=None, defaults=[\'False\', \'0.05\', \'None\'], " + argspec: "args=[\'self\', \'generator\', \'use_multiprocessing\', \'wait_time\', \'seed\'], varargs=None, keywords=None, defaults=[\'False\', \'0.05\', \'None\'], " } member_method { name: "get" -- GitLab From 8ad5cc00f21eb9d6f1811d7ed771f6f042dba1ba Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Tue, 14 Nov 2017 17:08:49 -0800 Subject: [PATCH 0442/1801] [TFXLA] Add source node and make GetSwitchCluster more conservative. PiperOrigin-RevId: 175758538 --- .../tf2xla/functionalize_control_flow.cc | 55 ++++++++++--------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc index 6ef4860f35..40a484da09 100644 --- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc +++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc @@ -731,11 +731,12 @@ string DebugString(const Graph& graph, FunctionalizeCond::ClusterHandle::Vector* clusters) { string ret = "digraph {\ncompound=true;labeljust=\"r\";ranksep=0.24\n"; std::map subgraphs; + auto name = [](const Node* n) { + return strings::StrCat(n->type_string(), "_", n->id()); + }; for (Node* n : graph.nodes()) { - if (n->IsOp()) { - strings::StrAppend(&subgraphs[clusters->at(n).Get()], n->id(), - " [label=\"", n->name(), "\"];\n"); - } + strings::StrAppend(&subgraphs[clusters->at(n).Get()], n->id(), " [label=\"", + name(n), "\"];\n"); } for (auto kv : subgraphs) { strings::StrAppend(&ret, "subgraph cluster_", kv.first.ToString(), " {\n", @@ -743,16 +744,11 @@ string DebugString(const Graph& graph, kv.first.ToString(), "\";\n", kv.second, "}\n"); } for (Node* n : graph.nodes()) { - if (!n->IsOp()) { - continue; - } for (Node* in : n->in_nodes()) { - if (in->IsOp()) { - strings::StrAppend(&ret, in->id(), " -> ", n->id(), ";\n"); - } + strings::StrAppend(&ret, in->id(), " -> ", n->id(), ";\n"); } } - return strings::StrCat(ret, "}"); + return strings::StrCat(ret, "} // end"); } string DebugString(const FunctionalizeCond::ClusteredGraph& clustered_graph) { @@ -761,16 +757,24 @@ string DebugString(const FunctionalizeCond::ClusteredGraph& clustered_graph) { return cluster.representative.ToString(); }; for (auto kv : clustered_graph) { - strings::StrAppend(&ret, kv.first.ToString(), " [label=\"", name(kv.second), - " (", kv.second.switch_nodes.size(), ", ", - kv.second.merge_nodes.size(), ")\"];\n"); + if (!kv.second.switch_nodes.empty() || !kv.second.merge_nodes.empty()) { + strings::StrAppend( + &ret, kv.first.ToString(), " [label=\"", name(kv.second), + kv.second.switch_nodes.empty() + ? "" + : strings::StrCat(" switches=", kv.second.switch_nodes.size()), + kv.second.merge_nodes.empty() + ? "" + : strings::StrCat(" merges=", kv.second.merge_nodes.size()), + "\"];\n"); + } } for (auto kv : clustered_graph) { for (auto in : kv.second.in_nodes) { strings::StrAppend(&ret, name(*in), " -> ", name(kv.second), ";\n"); } } - return strings::StrCat(ret, "}"); + return strings::StrCat(ret, "} // end"); } bool IsDeadSwitch(const Node* node) { @@ -790,9 +794,6 @@ bool IsDeadSwitch(const Node* node) { void FunctionalizeCond::CreateClusters() { for (Node* node : graph_->nodes()) { - if (!node->IsOp()) { - continue; - } if (IsSwitch(node)) { switch_nodes_.insert(node); } else if (IsMerge(node)) { @@ -825,6 +826,10 @@ void FunctionalizeCond::CreateClusters() { clusters_.at(node).Merge(&clusters_.at(in)); } } + // Group all source clusters together. + if (node->IsSource() || node->in_edges().empty()) { + clusters_.at(node).Merge(&clusters_.at(ClusterHandle(Graph::kSourceId))); + } } } @@ -876,7 +881,7 @@ void FunctionalizeCond::CreateClusteredGraph() { for (const Node* in : node->in_nodes()) { ClusterHandle other_repr = Representative(in); // Skip source, sink and internal edges. - if (!in->IsOp() || other_repr == repr) { + if (other_repr == repr) { continue; } Cluster& cluster_node_in = clustered_graph_[other_repr]; @@ -887,7 +892,7 @@ void FunctionalizeCond::CreateClusteredGraph() { for (const Node* out : node->out_nodes()) { ClusterHandle other_repr = Representative(out); // Skip source, sink and internal edges. - if (!out->IsOp() || other_repr == repr) { + if (other_repr == repr) { continue; } Cluster& cluster_node_out = clustered_graph_[other_repr]; @@ -897,6 +902,7 @@ void FunctionalizeCond::CreateClusteredGraph() { } return cluster_node; }; + update_cluster_for_node(graph_->source_node()); for (Node* node : switch_nodes_) { update_cluster_for_node(node).switch_nodes.insert(node); } @@ -955,7 +961,7 @@ gtl::optional FunctionalizeCond::GetSwitchCluster( for (Cluster* in : merge_cluster.in_nodes) { Cluster* cluster = in; if (in->switch_nodes.empty()) { - if (in->in_nodes.size() != 1) { + if (in->in_nodes.size() != 1 || in->out_nodes.size() != 1) { return gtl::nullopt; } // There is only a single `in` cluster. @@ -1292,11 +1298,8 @@ std::vector> FunctionalizeCond::SortedMergeNodes() { VLOG(2) << "ProcessClusteredGraph"; std::stack> stack; - for (auto& c : clustered_graph_) { - if (c.second.in_nodes.empty()) { - stack.push({0, &c.second}); - } - } + // Initialize with the source node. + stack.push({0, &clustered_graph_[ClusterHandle(Graph::kSourceId)]}); // Perform a depth-first traversal of the clustered graph computing the // switch-merge depth. -- GitLab From d16d8495d024e531b34d88745f99679414992fc2 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 14 Nov 2017 17:10:33 -0800 Subject: [PATCH 0443/1801] python3 fixes PiperOrigin-RevId: 175758757 --- tensorflow/python/kernel_tests/template_test.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/kernel_tests/template_test.py b/tensorflow/python/kernel_tests/template_test.py index 798bd0fe89..40c0ade62a 100644 --- a/tensorflow/python/kernel_tests/template_test.py +++ b/tensorflow/python/kernel_tests/template_test.py @@ -513,8 +513,8 @@ class TemplateTest(test.TestCase): tb = template.make_template("s", function_with_create, trainable=False) # Initially there are not variables created. - self.assertEqual([], ta.global_variables) - self.assertEqual([], tb.global_variables) + self.assertEqual([], list(ta.global_variables)) + self.assertEqual([], list(tb.global_variables)) # After calling there are variables created. ta() tb() @@ -531,8 +531,8 @@ class TemplateTest(test.TestCase): tb = template.make_template("bar", variable_scoped_function, True) # Initially there are not variables created. - self.assertEqual([], ta.trainable_variables) - self.assertEqual([], tb.trainable_variables) + self.assertEqual([], list(ta.trainable_variables)) + self.assertEqual([], list(tb.trainable_variables)) # After calling there are variables created. ta() tb() @@ -550,8 +550,8 @@ class TemplateTest(test.TestCase): variable_scoped_function_with_local_variable) # Initially there are not variables created. - self.assertEqual([], ta.local_variables) - self.assertEqual([], tb.local_variables) + self.assertEqual([], list(ta.local_variables)) + self.assertEqual([], list(tb.local_variables)) # After calling there are variables created. ta() tb() -- GitLab From 5936396ceb7bf5e0f45d8c2364bba51901124b54 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 14 Nov 2017 17:56:15 -0800 Subject: [PATCH 0444/1801] Benchmark for tfe_py_execute(identity), for reference with the other identity benchmarks PiperOrigin-RevId: 175763756 --- tensorflow/python/eager/benchmarks_test.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 435505edd7..9849f0f322 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -170,6 +170,18 @@ class MicroBenchmarks(test.Benchmark): m = self._m_2 self._run(lambda: gen_array_ops.identity(m), 30000) + def benchmark_tfe_py_execute_identity(self): + m = self._m_2 + ctx_handle = context.context()._handle + attrs = ("T", self._m_2.dtype.as_datatype_enum) + inputs = [m] + + def f(): + pywrap_tensorflow.TFE_Py_Execute( + ctx_handle, None, "Identity", inputs, attrs, 1) + + self._run(f, 30000) + def benchmark_tf_gradient_function_identity(self): m = self._m_2 self._run( -- GitLab From 4c9b4ebfcb40b8a7b3fe11411f6b91c9de326c56 Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Tue, 14 Nov 2017 18:12:07 -0800 Subject: [PATCH 0445/1801] Fix a few issues with HloValue and dataflow analysis identified when debugging the causes for the rollback of the new copy insertion (cl/174423881): (1) Mark values for deletion during dataflow propagation and delete later, rather than delete immediately. It was possible for a value to be deleted (a phi is optimized away), and still have references to it in the value sets. (2) Make call/while and root instructions explicit uses of the values which reach them. This subsumes the need for the HloValue::live_out_of_computation_ property which was buggy (which computation is it live-out of?). (3) Delete unused methods HloValue::RecomputeUses and HloValue::RemovePosition. PiperOrigin-RevId: 175765613 --- .../xla/service/hlo_dataflow_analysis.cc | 82 ++++++++--- .../xla/service/hlo_dataflow_analysis.h | 22 +-- .../xla/service/hlo_dataflow_analysis_test.cc | 63 ++++---- .../compiler/xla/service/hlo_ordering.cc | 30 ++-- tensorflow/compiler/xla/service/hlo_value.cc | 135 ++++++------------ tensorflow/compiler/xla/service/hlo_value.h | 19 +-- 6 files changed, 165 insertions(+), 186 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc index ff80f18bb5..3f34b9ceb3 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc @@ -75,11 +75,43 @@ HloValue* HloDataflowAnalysis::NewHloValue(HloInstruction* instruction, std::forward_as_tuple(value_id, instruction, index, is_phi)); CHECK(emplaced.second); + VLOG(4) << "NewHloValue = " << emplaced.first->second.ToShortString(); + return &emplaced.first->second; } -void HloDataflowAnalysis::DeleteHloValue(HloValue::Id value_id) { - values_.erase(value_id); +void HloDataflowAnalysis::MarkValueForDeletion(HloValue::Id value_id) { + HloValue& value = values_.at(value_id); + VLOG(4) << "MarkValueForDeletion(" << value.ToShortString() << ")"; + + value_ids_to_delete_.push_back(value_id); +} + +void HloDataflowAnalysis::DeleteMarkedValues() { +#ifndef NDEBUG + // Verify that no marked-for-deletion values are in any of the value sets. + tensorflow::gtl::FlatSet id_set(value_ids_to_delete_.begin(), + value_ids_to_delete_.end()); + for (const auto& pair : value_sets_) { + const HloInstruction* instruction = pair.first; + const InstructionValueSet& instruction_value_set = pair.second; + for (const auto& index_value_set : instruction_value_set) { + const HloValueSet& value_set = index_value_set.second; + for (const HloValue* value : value_set.values()) { + DCHECK(!ContainsKey(id_set, value->id())) + << "Value " << value->ToShortString() + << " marked for deletion, but still exists in value set for " + "instruction " + << instruction->name(); + } + } + } +#endif + + for (HloValue::Id value_id : value_ids_to_delete_) { + values_.erase(value_id); + } + value_ids_to_delete_.clear(); } string HloDataflowAnalysis::ToString() const { @@ -121,6 +153,7 @@ bool HloDataflowAnalysis::Phi( HloInstruction* instruction, tensorflow::gtl::ArraySlice inputs) { CHECK(ssa_form_); + VLOG(4) << "Phi(" << instruction->name() << ")"; for (const InstructionValueSet* input : inputs) { DCHECK(ShapeUtil::Compatible(instruction->shape(), input->shape())); @@ -183,7 +216,7 @@ bool HloDataflowAnalysis::Phi( } else if (current_value != &new_value) { if (current_value_defined_here) { // Remove the existing phi. - DeleteHloValue(current_value->id()); + MarkValueForDeletion(current_value->id()); } value_set.Clear(); value_set.AddValue(&new_value); @@ -193,7 +226,8 @@ bool HloDataflowAnalysis::Phi( // Multiple distinct values reach this point. A phi value is // necessary. CHECK_GT(input_value_ids.size(), 1); - if (current_value == nullptr || !current_value->is_phi()) { + if (current_value == nullptr || + !(current_value->is_phi() && current_value_defined_here)) { value_set.Clear(); value_set.AddValue(NewHloValue(instruction, index, /*is_phi=*/true)); changed = true; @@ -485,11 +519,13 @@ bool HloDataflowAnalysis::UpdateInstructionValueSet( } } -void HloDataflowAnalysis::UpdateInstructionsAndPropagate( - tensorflow::gtl::ArraySlice instructions) { +void HloDataflowAnalysis::Propagate() { std::queue worklist; - for (HloInstruction* instruction : instructions) { - worklist.push(instruction); + + for (HloComputation* computation : module_->computations()) { + for (HloInstruction* instruction : computation->instructions()) { + worklist.push(instruction); + } } while (!worklist.empty()) { @@ -662,20 +698,17 @@ StatusOr> HloDataflowAnalysis::Run( new HloDataflowAnalysis(module, ssa_form, bitcast_defines_value)); TF_RETURN_IF_ERROR(dataflow_analysis->InitializeInstructionValueSets()); + dataflow_analysis->Propagate(); - // Construct list of all instructions to initialize the worklist to propagate - // the data flow. For efficiency sort the instruction in post order so - // producers appear before consumers. - std::vector all_instructions; - for (const HloComputation* computation : module->MakeComputationPostOrder()) { - for (HloInstruction* instruction : - computation->MakeInstructionPostOrder()) { - all_instructions.push_back(instruction); - } - } - dataflow_analysis->UpdateInstructionsAndPropagate(all_instructions); + // Delete all values marked for deletion. + dataflow_analysis->DeleteMarkedValues(); - // Add in positions to all values. + // Gather and set all non-definition positions of all values. Value deletion + // is rare, so just use a vector indexed by Value::Id rather than a map from + // Value::Id to positions. There should be very few holes in the vector, and + // lookup is faster. + std::vector> value_positions( + dataflow_analysis->next_value_id_); for (const HloComputation* computation : module->computations()) { for (HloInstruction* instruction : computation->instructions()) { for (const auto& pair : @@ -684,13 +717,18 @@ StatusOr> HloDataflowAnalysis::Run( const HloValueSet& value_set = pair.second; for (const HloValue* value : value_set.values()) { if (value->defining_instruction() != instruction) { - dataflow_analysis->GetValue(value->id()) - .AddPosition(instruction, index); + value_positions[value->id()].push_back( + HloPosition{instruction, index}); } } } } } + for (auto& pair : dataflow_analysis->values_) { + HloValue::Id value_id = pair.first; + HloValue& value = pair.second; + value.SetPositionsAndComputeUses(value_positions[value_id]); + } // Construct vector of values. dataflow_analysis->values_vector_.reserve(dataflow_analysis->values_.size()); diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h index 63467f3206..dfd81ae951 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h @@ -126,13 +126,16 @@ class HloDataflowAnalysis { HloValue* NewHloValue(HloInstruction* instruction, const ShapeIndex& index, bool is_phi = false); - // Delete the HloValue with the given ID. - void DeleteHloValue(HloValue::Id value_id); + // Mark the HloValue with the given ID for deletion. + void MarkValueForDeletion(HloValue::Id value_id); + + // Delete all HloValues marked for deletion. Should be called after + // propagation is complete. + void DeleteMarkedValues(); // Constructs and initializes the InstructionValueSets of all instructions to // contain exactly the HloValues defined by each instruction. These values can - // then propagated throughout the HLO graph by calling - // UpdateInstructionsAndPropagate. + // then propagated throughout the HLO graph by calling Propagate. Status InitializeInstructionValueSets(); // Updates the value set of the given instruction based on the values flowing @@ -152,10 +155,8 @@ class HloDataflowAnalysis { bool UpdateTupleValueSet(HloInstruction* tuple); bool UpdateWhileValueSet(HloInstruction* xla_while); - // Update the value sets of the given instructions and propagate the - // changes to fixed point. - void UpdateInstructionsAndPropagate( - tensorflow::gtl::ArraySlice instructions); + // Propagate the dataflow through the module. + void Propagate(); // Return the result of the SSA Phi function applied to the given inputs at // the given instruction. If skip_top_level is true, then the top level of the @@ -191,6 +192,11 @@ class HloDataflowAnalysis { // A map from instruction to InstructionValueSet. std::unordered_map value_sets_; + // Values marked for deletion during construction. We don't delete them + // immediately because references to them may remain in ValueSets temporarily + // during propagation. After construction, these values are deleted. + std::vector value_ids_to_delete_; + // A vector containing all HloValues sorted by HloValue::Id. std::vector values_vector_; diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc index 66a538fc51..f08f0b1d68 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc @@ -211,10 +211,10 @@ TEST_P(HloDataflowAnalysisTest, NestedTuple) { HloPosition{nested_tuple, {0, 0}}, HloPosition{nested_tuple, {1, 0}}, HloPosition{nested_tuple, {2}}, HloPosition{gte_tuple, {0}}, HloPosition{gte_out, {}})); - // Constant values should have no uses though one is live out. The positions - // where they appear as operands are on instructions which do not use the - // values (eg, Tuple). - EXPECT_TRUE(analysis.GetValueDefinedAt(constant1).uses().empty()); + // Constant values should have only a single use, which is the root of the + // computation. + EXPECT_THAT(analysis.GetValueDefinedAt(constant1, /*index=*/{}).uses(), + UnorderedElementsAre(HloUse{gte_out, 0, {0}})); EXPECT_TRUE(analysis.GetValueDefinedAt(constant2).uses().empty()); // The top-level tuple values are used in GTE instructions. @@ -274,12 +274,11 @@ TEST_P(HloDataflowAnalysisTest, SingleCall) { EXPECT_EQ(analysis.GetUniqueValueAt(call), analysis.GetValueDefinedAt(add)); EXPECT_THAT(analysis.GetValueDefinedAt(constant1).uses(), - UnorderedElementsAre(HloUse{add, 0, {}})); + UnorderedElementsAre(HloUse{call, 0, {}}, HloUse{add, 0, {}})); EXPECT_THAT(analysis.GetValueDefinedAt(constant2).uses(), - UnorderedElementsAre(HloUse{add, 1, {}})); + UnorderedElementsAre(HloUse{call, 1, {}}, HloUse{add, 1, {}})); EXPECT_TRUE(analysis.GetValueDefinedAt(add).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(add).live_out_of_computation()); } TEST_P(HloDataflowAnalysisTest, ComputationCalledTwiceWithSameArguments) { @@ -323,18 +322,17 @@ TEST_P(HloDataflowAnalysisTest, ComputationCalledTwiceWithSameArguments) { EXPECT_TRUE(analysis.ValueIsDefinedAt(sub)); EXPECT_THAT(analysis.GetValueDefinedAt(constant1).uses(), - UnorderedElementsAre(HloUse{add, 0, {}})); + UnorderedElementsAre(HloUse{call1, 0, {}}, HloUse{call2, 0, {}}, + HloUse{add, 0, {}})); EXPECT_THAT(analysis.GetValueDefinedAt(constant2).uses(), - UnorderedElementsAre(HloUse{add, 1, {}})); + UnorderedElementsAre(HloUse{call1, 1, {}}, HloUse{call2, 1, {}}, + HloUse{add, 1, {}})); // The Add from the subcomputation is used as both operands of the Subtract. EXPECT_THAT(analysis.GetValueDefinedAt(add).uses(), UnorderedElementsAre(HloUse{sub, 0, {}}, HloUse{sub, 1, {}})); EXPECT_FALSE(analysis.GetValueDefinedAt(add).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(add).live_out_of_computation()); - EXPECT_TRUE(analysis.GetValueDefinedAt(sub).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(sub).live_out_of_computation()); } TEST_P(HloDataflowAnalysisTest, ComputationCalledTwiceWithDifferentArguments) { @@ -408,7 +406,7 @@ TEST_P(HloDataflowAnalysisTest, NestedCalls) { auto outer_param1 = outer_builder.AddInstruction( HloInstruction::CreateParameter(1, scalar_shape_, "param1")); // Swizzle parameters. - outer_builder.AddInstruction(HloInstruction::CreateCall( + auto nested_call = outer_builder.AddInstruction(HloInstruction::CreateCall( scalar_shape_, {outer_param1, outer_param0}, inner_computation)); HloComputation* outer_computation = module_->AddEmbeddedComputation(outer_builder.Build()); @@ -418,7 +416,7 @@ TEST_P(HloDataflowAnalysisTest, NestedCalls) { HloInstruction::CreateConstant(Literal::CreateR0(1.0))); auto constant2 = builder.AddInstruction( HloInstruction::CreateConstant(Literal::CreateR0(2.0))); - builder.AddInstruction(HloInstruction::CreateCall( + auto call = builder.AddInstruction(HloInstruction::CreateCall( scalar_shape_, {constant1, constant2}, outer_computation)); module_->AddEntryComputation(builder.Build()); @@ -431,10 +429,14 @@ TEST_P(HloDataflowAnalysisTest, NestedCalls) { // Verify that the uses of the constants are properly swizzled by parameter // permutation in nested_call. - EXPECT_THAT(analysis.GetValueDefinedAt(constant1).uses(), - UnorderedElementsAre(HloUse{add, 1, {}})); - EXPECT_THAT(analysis.GetValueDefinedAt(constant2).uses(), - UnorderedElementsAre(HloUse{add, 0, {}})); + EXPECT_THAT( + analysis.GetValueDefinedAt(constant1).uses(), + UnorderedElementsAre(HloUse{call, 0, {}}, HloUse{nested_call, 1, {}}, + HloUse{add, 1, {}})); + EXPECT_THAT( + analysis.GetValueDefinedAt(constant2).uses(), + UnorderedElementsAre(HloUse{call, 1, {}}, HloUse{nested_call, 0, {}}, + HloUse{add, 0, {}})); EXPECT_TRUE(analysis.GetValueDefinedAt(add).live_out_of_module()); } @@ -469,7 +471,7 @@ TEST_P(HloDataflowAnalysisTest, SingleWhile) { HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 1)); auto add = body_builder.AddInstruction(HloInstruction::CreateBinary( scalar_shape_, HloOpcode::kAdd, body_element_0, body_element_1)); - body_builder.AddInstruction( + auto body_root = body_builder.AddInstruction( HloInstruction::CreateTuple({body_element_0, add})); HloComputation* body = module_->AddEmbeddedComputation(body_builder.Build()); @@ -496,8 +498,6 @@ TEST_P(HloDataflowAnalysisTest, SingleWhile) { bool ssa_form = GetParam(); const HloDataflowAnalysis& analysis = RunAnalysis(ssa_form); - EXPECT_TRUE( - analysis.GetValueDefinedAt(cond_constant).live_out_of_computation()); EXPECT_FALSE(analysis.GetValueDefinedAt(cond_constant).live_out_of_module()); if (ssa_form) { @@ -517,14 +517,14 @@ TEST_P(HloDataflowAnalysisTest, SingleWhile) { EXPECT_THAT( analysis.GetValueDefinedAt(constant1).uses(), - UnorderedElementsAre(HloUse{add, 0, {}}, HloUse{xla_while, 0, {0}})); + UnorderedElementsAre(HloUse{add, 0, {}}, HloUse{body_root, 0, {}}, + HloUse{xla_while, 0, {0}})); // Constant1 passes through the body and out of the module. EXPECT_TRUE(analysis.GetValueDefinedAt(constant1).live_out_of_module()); EXPECT_TRUE(analysis.GetValueDefinedAt(xla_while, /*index=*/{1}) .live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(add).live_out_of_computation()); EXPECT_FALSE(analysis.GetValueDefinedAt(add).live_out_of_module()); } else { // While instruction and subcomputation parameters should not define values @@ -538,7 +538,6 @@ TEST_P(HloDataflowAnalysisTest, SingleWhile) { EXPECT_TRUE(analysis.GetValueDefinedAt(constant1).live_out_of_module()); EXPECT_TRUE(analysis.GetValueDefinedAt(add).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(add).live_out_of_computation()); } } @@ -915,9 +914,11 @@ TEST_P(HloDataflowAnalysisTest, TupleSelect) { HloUse{select12, 1, {}})); // The two constant values just pass through the Selects and are not - // used. They are live out however. - EXPECT_TRUE(analysis.GetValueDefinedAt(constant1).uses().empty()); - EXPECT_TRUE(analysis.GetValueDefinedAt(constant2).uses().empty()); + // used except at the root. They are live out however. + EXPECT_THAT(analysis.GetValueDefinedAt(constant1).uses(), + UnorderedElementsAre(HloUse{select1234, 1, {0}})); + EXPECT_THAT(analysis.GetValueDefinedAt(constant2).uses(), + UnorderedElementsAre(HloUse{select1234, 1, {0}})); EXPECT_TRUE(analysis.GetValueDefinedAt(constant1).live_out_of_module()); EXPECT_TRUE(analysis.GetValueDefinedAt(constant2).live_out_of_module()); } @@ -1318,7 +1319,7 @@ TEST_P(HloDataflowAnalysisTest, WhileParameters_Sequential) { auto entry = module_->AddEntryComputation(builder.Build()); bool ssa_form = GetParam(); - const HloDataflowAnalysis& analysis = RunAnalysis(ssa_form); + RunAnalysis(ssa_form); SequentialHloOrdering::HloModuleSequence sequence; sequence.insert({entry, {param, xla_while}}); @@ -1329,12 +1330,6 @@ TEST_P(HloDataflowAnalysisTest, WhileParameters_Sequential) { SequentialHloOrdering ordering(module_.get(), sequence); - // 'add' is the body root even though later instructions follow in the order - // like 'dead_negate'. Only 'add' should be live out of the computation. - EXPECT_TRUE(analysis.GetValueDefinedAt(add).live_out_of_computation()); - EXPECT_FALSE( - analysis.GetValueDefinedAt(dead_negate).live_out_of_computation()); - // 'add' is live out of the body and will interfere with an later instructions // such as 'dead_constant' and 'dead_negate'. EXPECT_TRUE(InstructionsMayInterfere(ordering, add, dead_constant)); diff --git a/tensorflow/compiler/xla/service/hlo_ordering.cc b/tensorflow/compiler/xla/service/hlo_ordering.cc index 3700936979..6f6e679a21 100644 --- a/tensorflow/compiler/xla/service/hlo_ordering.cc +++ b/tensorflow/compiler/xla/service/hlo_ordering.cc @@ -173,6 +173,19 @@ bool HloOrdering::UseIsBeforeValueDefinition( return true; } } + + // The use at a call occurs before values that are defined in the called + // computation. + if (use.instruction->opcode() == HloOpcode::kCall) { + const HloInstruction* call = use.instruction; + if (call_graph_->InstructionIsNestedIn(value.defining_instruction(), + call->to_apply())) { + VLOG(4) << " use is call " << use.instruction->name() + << " and def is in called computation"; + return true; + } + } + VLOG(4) << " use is not before value"; return false; } @@ -187,23 +200,6 @@ bool HloOrdering::LiveRangeStrictlyBefore( return false; } - // Live-out values from the module can never have ranges strictly before any - // other value. - if (a.live_out_of_module()) { - VLOG(4) << "a is live out of module"; - return false; - } - - // Live-out values of computations can never have ranges strictly before any - // other value in the computation (including values nested in - // subcomputations). - if (a.live_out_of_computation() && - call_graph_->InstructionIsNestedIn(b.defining_instruction(), - a.defining_instruction()->parent())) { - VLOG(4) << "a is live out of computation containing b"; - return false; - } - // All uses of 'a' must be before 'b' is defined. for (const HloUse& use : a.uses()) { if (!UseIsBeforeValueDefinition(use, b, dataflow)) { diff --git a/tensorflow/compiler/xla/service/hlo_value.cc b/tensorflow/compiler/xla/service/hlo_value.cc index e6cf0d37b8..05b7dce3d1 100644 --- a/tensorflow/compiler/xla/service/hlo_value.cc +++ b/tensorflow/compiler/xla/service/hlo_value.cc @@ -71,7 +71,7 @@ HloValue::HloValue(HloValue::Id id, HloInstruction* instruction, const ShapeIndex& index, bool is_phi) : id_(id), is_phi_(is_phi) { // The defining position is always the first element in the positions_ vector. - AddPosition(instruction, index); + positions_.push_back(HloPosition{instruction, index}); } bool HloValue::operator==(const HloValue& other) const { @@ -130,18 +130,14 @@ bool MayUseOperandValue(int64 operand_number, const ShapeIndex& index, CHECK_LE(operand_number, 2); return operand_number == 0 || index.empty(); - case HloOpcode::kCall: case HloOpcode::kTuple: // These instructions always pass through their operands transparently. return false; + case HloOpcode::kCall: case HloOpcode::kWhile: - // Though the while instructions passes through its operands, we return - // true because in SSA form there may be a Phi at the parameter of the - // while which is considered a use of its incoming value because the Phi - // input values are not passed through into the body computation. Because - // this function is used in both SSA and non-SSA forms of the analysis - // conservatively return true. + // Although call and while instructions pass through their operands, they + // are considered uses. return true; default: @@ -151,103 +147,58 @@ bool MayUseOperandValue(int64 operand_number, const ShapeIndex& index, } // namespace -void HloValue::AddPosition(HloInstruction* instruction, - const ShapeIndex& index) { - HloPosition new_position{instruction, index}; - - // The new position must not already exist in positions_. - for (const HloPosition& position : positions_) { - DCHECK_NE(position, new_position); - } - - positions_.push_back(std::move(new_position)); - - // Update uses. - for (HloInstruction* user : instruction->users()) { - for (int64 operand_number : user->OperandIndices(instruction)) { - if (MayUseOperandValue(operand_number, index, user)) { - HloUse new_use{user, operand_number, index}; - - // The new use must not already exist in uses_. - for (const HloUse& use : uses_) { - DCHECK_NE(use, new_use); - } - - uses_.push_back(std::move(new_use)); +void HloValue::SetPositionsAndComputeUses( + tensorflow::gtl::ArraySlice positions) { + CHECK_EQ(positions_.size(), 1) << "SetPositions should only be called once."; + + // The positions must be unique and should not contain the defining position + // as this is added at construction time. + for (const HloPosition& position_a : positions) { + DCHECK_NE(position_a, defining_position()); + for (const HloPosition& position_b : positions) { + if (&position_a != &position_b) { + DCHECK_NE(position_a, position_b); } } } - // Update liveout status of this HloValue. - const HloModule& module = *instruction->parent()->parent(); - if (instruction == module.entry_computation()->root_instruction()) { - live_out_of_module_ = true; - } - - if (instruction == instruction->parent()->root_instruction()) { - live_out_of_computation_ = true; - } -} + positions_.insert(positions_.end(), positions.begin(), positions.end()); -void HloValue::RemovePosition(HloInstruction* instruction, - const ShapeIndex& index) { - // The defining position cannot be removed. - CHECK(!(instruction == defining_instruction() && index == defining_index())); - - int64 size_before = positions_.size(); - positions_.erase( - std::remove_if(positions_.begin(), positions_.end(), - [instruction, &index](const HloPosition& position) { - return position.instruction == instruction && - position.index == index; - }), - positions_.end()); - // Only a single position should have been removed. - CHECK_EQ(positions_.size(), size_before - 1); - - // Update uses which referred to this position. - uses_.erase(std::remove_if(uses_.begin(), uses_.end(), - [instruction, &index](const HloUse& use) { - return use.instruction->operand( - use.operand_number) == instruction && - use.operand_index == index; - }), - uses_.end()); - - // Returns whether this value is contained in the given instruction's output. - auto is_contained_in = [this](const HloInstruction* instruction) { - for (const HloPosition& position : positions()) { - if (position.instruction == instruction) { - return true; - } + // Gather the computation roots at which this value appears. + tensorflow::gtl::FlatSet root_positions; + for (const HloPosition& position : positions_) { + if (position.instruction == + position.instruction->parent()->root_instruction()) { + root_positions.insert(position.instruction); } - return false; - }; - - const HloModule& module = *instruction->parent()->parent(); - if (instruction == module.entry_computation()->root_instruction()) { - // Value has been removed from a position in the entry root instruction. - live_out_of_module_ = - is_contained_in(module.entry_computation()->root_instruction()); - } - if (instruction == defining_instruction()->parent()->root_instruction()) { - // Value has been removed from the root of the computation the value has - // been defined in. - live_out_of_computation_ = - is_contained_in(defining_instruction()->parent()->root_instruction()); } -} -void HloValue::RecomputeUses() { - uses_.clear(); - for (const HloPosition& position : positions()) { + // Build vector of HloUses for the value. + for (const HloPosition& position : positions_) { for (HloInstruction* user : position.instruction->users()) { for (int64 operand_number : user->OperandIndices(position.instruction)) { - if (MayUseOperandValue(operand_number, position.index, user)) { - uses_.push_back(HloUse{user, operand_number, position.index}); + // Root instructions of computations are considered to be uses whether + // or not the root instruction itself actually uses the value. + if (MayUseOperandValue(operand_number, position.index, user) || + ContainsKey(root_positions, user)) { + HloUse new_use{user, operand_number, position.index}; + + // The new use must not already exist in uses_. + for (const HloUse& use : uses_) { + DCHECK_NE(use, new_use); + } + + uses_.push_back(std::move(new_use)); } } } + + // Update liveout status of this HloValue. + const HloModule& module = *position.instruction->parent()->parent(); + if (position.instruction == + module.entry_computation()->root_instruction()) { + live_out_of_module_ = true; + } } } diff --git a/tensorflow/compiler/xla/service/hlo_value.h b/tensorflow/compiler/xla/service/hlo_value.h index 6872bc76a8..2a711e8b42 100644 --- a/tensorflow/compiler/xla/service/hlo_value.h +++ b/tensorflow/compiler/xla/service/hlo_value.h @@ -121,6 +121,12 @@ class HloValue { HloValue(Id id, HloInstruction* instruction, const ShapeIndex& index, bool is_phi = false); + // Sets the positions in the module at which the HloValue appears. Updates + // uses. Should be called once and only once. The defining position should not + // be included in 'positions' as this is set at construction time. + void SetPositionsAndComputeUses( + tensorflow::gtl::ArraySlice positions); + // Return a unique identifier for this HloValue. This value is used for stable // sorting and iteration Id id() const { return id_; } @@ -143,28 +149,15 @@ class HloValue { // Return the shape of this HloValue. const Shape& shape() const { return defining_position().shape(); } - // Add or remove a position at which the HloValue appears. The definition - // position can not be removed. The uses of the HloValue are updated. - void AddPosition(HloInstruction* instruction, const ShapeIndex& index); - void RemovePosition(HloInstruction* instruction, const ShapeIndex& index); - - // Remove all positions except the defining position. Updates uses. - void ClearPositions(); - // Return all positions of the HloValue in the module. const std::vector& positions() const { return positions_; } // Return all uses of the HloValue. const std::vector& uses() const { return uses_; } - void RecomputeUses(); - // Get whether this HloValue is live out of the module. bool live_out_of_module() const { return live_out_of_module_; } - // Get whether this HloValue is live out of the computation it is defined in. - bool live_out_of_computation() const { return live_out_of_computation_; } - bool operator==(const HloValue& other) const; bool operator!=(const HloValue& other) const; -- GitLab From 952030285da7b265c2748506d9613a2ef34e214e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Nov 2017 18:26:41 -0800 Subject: [PATCH 0446/1801] Added support for multi-column vocabs to the int-to-string lookup API. PiperOrigin-RevId: 175767044 --- .../python/kernel_tests/lookup_ops_test.py | 33 ++++++++++++- tensorflow/python/ops/lookup_ops.py | 48 ++++++++++++++----- 2 files changed, 66 insertions(+), 15 deletions(-) diff --git a/tensorflow/python/kernel_tests/lookup_ops_test.py b/tensorflow/python/kernel_tests/lookup_ops_test.py index 9944b5929f..d4bc71f1c8 100644 --- a/tensorflow/python/kernel_tests/lookup_ops_test.py +++ b/tensorflow/python/kernel_tests/lookup_ops_test.py @@ -597,10 +597,10 @@ class IndexTableFromTensor(test.TestCase): class IndexToStringTableFromFileTest(test.TestCase): - def _createVocabFile(self, basename): + def _createVocabFile(self, basename, values=("brain", "salad", "surgery")): vocabulary_file = os.path.join(self.get_temp_dir(), basename) with open(vocabulary_file, "w") as f: - f.write("\n".join(["brain", "salad", "surgery"]) + "\n") + f.write("\n".join(values) + "\n") return vocabulary_file def test_index_to_string_table(self): @@ -614,6 +614,35 @@ class IndexToStringTableFromFileTest(test.TestCase): self.assertAllEqual((b"brain", b"salad", b"surgery", b"UNK"), features.eval()) + def test_index_to_string_table_from_multicolumn_file(self): + vocabulary_file = self._createVocabFile( + "f2i_vocab1.txt", values=("brain\t300", "salad\t20", "surgery\t1")) + with self.test_session(): + table = lookup_ops.index_to_string_table_from_file( + vocabulary_file=vocabulary_file, + key_column_index=lookup_ops.TextFileIndex.LINE_NUMBER, + value_column_index=0) + features = table.lookup(constant_op.constant([0, 1, 2, 3], dtypes.int64)) + self.assertRaises(errors_impl.OpError, features.eval) + lookup_ops.tables_initializer().run() + self.assertAllEqual((b"brain", b"salad", b"surgery", b"UNK"), + features.eval()) + + def test_index_to_string_table_from_multicolumn_file_custom_delimiter(self): + vocabulary_file = self._createVocabFile( + "f2i_vocab1.txt", values=("brain 300", "salad 20", "surgery 1")) + with self.test_session(): + table = lookup_ops.index_to_string_table_from_file( + vocabulary_file=vocabulary_file, + key_column_index=lookup_ops.TextFileIndex.LINE_NUMBER, + value_column_index=0, + delimiter=" ") + features = table.lookup(constant_op.constant([0, 1, 2, 3], dtypes.int64)) + self.assertRaises(errors_impl.OpError, features.eval) + lookup_ops.tables_initializer().run() + self.assertAllEqual((b"brain", b"salad", b"surgery", b"UNK"), + features.eval()) + def test_index_to_string_table_with_default_value(self): default_value = b"NONE" vocabulary_file = self._createVocabFile("f2i_vocab2.txt") diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py index f28eadf248..156e415735 100644 --- a/tensorflow/python/ops/lookup_ops.py +++ b/tensorflow/python/ops/lookup_ops.py @@ -561,9 +561,9 @@ class TextFileStringTableInitializer(TextFileInitializer): The path must be accessible from wherever the graph is initialized (eg. trainer or eval workers). The filename may be a scalar `Tensor`. key_column_index: The column index from the text file to get the keys - from. The default is 0 that represents the whole line content. + from. The default is to use the line number, starting from zero. value_column_index: The column index from the text file to get the - values from. The default is to use the line number, starting from zero. + values from. The default is to use the whole line content. vocab_size: The number of elements in the file, if known. delimiter: The delimiter to separate fields in a line. name: Optional name for the op. @@ -613,9 +613,9 @@ class TextFileIdTableInitializer(TextFileInitializer): The path must be accessible from wherever the graph is initialized (eg. trainer or eval workers). The filename may be a scalar `Tensor`. key_column_index: The column index from the text file to get the `key` + values from. The default is to use the whole line content. + value_column_index: The column index from the text file to get the `value` values from. The default is to use the line number, starting from zero. - value_column_index: The column index from the text file ro get the `value` - values from. The default is 0 that represents the whole line content. vocab_size: The number of elements in the file, if known. delimiter: The delimiter to separate fields in a line. name: Optional name for the op. @@ -926,9 +926,9 @@ def index_table_from_file(vocabulary_file=None, key_dtype: The `key` data type. name: A name for this op (optional). key_column_index: The column index from the text file to get the `key` + values from. The default is to use the whole line content. + value_column_index: The column index from the text file to get the `value` values from. The default is to use the line number, starting from zero. - value_column_index: The column index from the text file ro get the `value` - values from. The default is 0 that represents the whole line content. delimiter: The delimiter to separate fields in a line. Returns: @@ -1095,7 +1095,10 @@ def index_table_from_tensor(vocabulary_list, def index_to_string_table_from_file(vocabulary_file, vocab_size=None, default_value="UNK", - name=None): + name=None, + key_column_index=TextFileIndex.LINE_NUMBER, + value_column_index=TextFileIndex.WHOLE_LINE, + delimiter="\t"): """Returns a lookup table that maps a `Tensor` of indices into strings. This operation constructs a lookup table to map int64 indices into string @@ -1109,6 +1112,16 @@ def index_to_string_table_from_file(vocabulary_file, The underlying table must be initialized by calling `tf.tables_initializer.run()` or `table.init.run()` once. + To specify multi-column vocabulary files, use key_column_index and + value_column_index and delimiter. + + - TextFileIndex.LINE_NUMBER means use the line number starting from zero, + expects data type int64. + - TextFileIndex.WHOLE_LINE means use the whole line content, expects data + type string. + - A value >=0 means use the index (starting at zero) of the split line based + on `delimiter`. + Sample Usages: If we have a vocabulary file "test.txt" with the following content: @@ -1135,6 +1148,11 @@ def index_to_string_table_from_file(vocabulary_file, vocab_size: Number of the elements in the vocabulary, if known. default_value: The value to use for out-of-vocabulary indices. name: A name for this op (optional). + key_column_index: The column index from the text file to get the `key` + values from. The default is to use the line number, starting from zero. + value_column_index: The column index from the text file to get the `value` + values from. The default is to use the whole line content. + delimiter: The delimiter to separate fields in a line. Returns: The lookup table to map a string values associated to a given index `int64` @@ -1155,15 +1173,19 @@ def index_to_string_table_from_file(vocabulary_file, # Keep a shared_name # ____ shared_name = "hash_table_%s_%d_%s_%s" % (vocabulary_file, vocab_size, - TextFileIndex.LINE_NUMBER, - TextFileIndex.WHOLE_LINE) + key_column_index, + value_column_index) else: # Keep a shared_name ___ - shared_name = "hash_table_%s_%s_%s" % (vocabulary_file, - TextFileIndex.LINE_NUMBER, - TextFileIndex.WHOLE_LINE) + shared_name = "hash_table_%s_%s_%s" % (vocabulary_file, key_column_index, + value_column_index) init = TextFileStringTableInitializer( - vocabulary_file, vocab_size=vocab_size, name="table_init") + vocabulary_file, + vocab_size=vocab_size, + name="table_init", + key_column_index=key_column_index, + value_column_index=value_column_index, + delimiter=delimiter) # TODO(yleon): Use a more effienct structure. return HashTable(init, default_value, shared_name=shared_name, name=scope) -- GitLab From 9a9b18ca6a865a2cd65ec49421eea7b788d7d856 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Nov 2017 18:28:34 -0800 Subject: [PATCH 0447/1801] Remove a Done TODO from Svdf op. PiperOrigin-RevId: 175767193 --- tensorflow/contrib/lite/kernels/svdf.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/svdf.cc b/tensorflow/contrib/lite/kernels/svdf.cc index dd414d53bd..72f705fe42 100644 --- a/tensorflow/contrib/lite/kernels/svdf.cc +++ b/tensorflow/contrib/lite/kernels/svdf.cc @@ -183,8 +183,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // Reduction sum - // TODO(ghodrat): Consider not reusing state for the temporary output, this - // way ReductionSum operates on row-vector instead of column vector. for (int b = 0; b < batch_size; b++) { float* output_ptr_batch = output->data.f + b * num_units; float* scratch_ptr_batch = scratch->data.f + b * num_filters; -- GitLab From bd5b3acc1481ba14ab86757b107bd25f4fb1aef3 Mon Sep 17 00:00:00 2001 From: Russell Power Date: Tue, 14 Nov 2017 18:46:55 -0800 Subject: [PATCH 0448/1801] Improve the model comparison utility. Add session reset, verify input function is consistent and freeze random variables. PiperOrigin-RevId: 175768806 --- .../contrib/tpu/python/tpu/test_util.py | 38 +++++++++++++++---- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/test_util.py b/tensorflow/contrib/tpu/python/tpu/test_util.py index b83c72d0ff..a5d4ff9722 100644 --- a/tensorflow/contrib/tpu/python/tpu/test_util.py +++ b/tensorflow/contrib/tpu/python/tpu/test_util.py @@ -32,6 +32,7 @@ from tensorflow.python.client import session as tf_session from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.framework import errors from tensorflow.python.framework import ops +from tensorflow.python.framework import random_seed from tensorflow.python.framework import test_util from tensorflow.python.ops import gen_array_ops from tensorflow.python.ops import variables @@ -89,7 +90,12 @@ def copy_dir(src, tgt): gfile.Copy(src_f, tgt_f, overwrite=True) -def compare_model(model_fn, input_fn, params, master="local", temp_dir=None, +def compare_model(model_fn, + input_fn, + params, + master="local", + temp_dir=None, + num_shards=2, tolerance=1e-4): """Compare the results of running `model_fn` on the TPU and CPU.""" if not temp_dir: @@ -102,7 +108,17 @@ def compare_model(model_fn, input_fn, params, master="local", temp_dir=None, logging.info("Checkpoints and weights will be written to %s", temp_dir) num_steps = 1 - num_shards = 8 + + def _model_adapter(features, labels, mode, params): + """Run users model function with random seeds fixed to known values.""" + random_seed.set_random_seed(0) + np.random.seed(0) + return model_fn(features, labels, mode, params) + + def _input_adapter(params): + random_seed.set_random_seed(0) + np.random.seed(0) + return input_fn(params) def _make_run_config(model_dir): return tpu_config.RunConfig( @@ -119,7 +135,7 @@ def compare_model(model_fn, input_fn, params, master="local", temp_dir=None, def _make_estimator(use_tpu, model_dir): return tpu_estimator.TPUEstimator( - model_fn=model_fn, + model_fn=_model_adapter, use_tpu=use_tpu, config=_make_run_config(model_dir), train_batch_size=num_shards, @@ -131,8 +147,9 @@ def compare_model(model_fn, input_fn, params, master="local", temp_dir=None, weights = {} graph = ops.Graph() with graph.as_default(): + features, labels = _input_adapter(dict(params, batch_size=num_shards)) model_fn( - *input_fn(params), + features, labels, params=dict(params, use_tpu=False), mode=model_fn_lib.ModeKeys.TRAIN) saver = tf_saver.Saver() @@ -148,10 +165,15 @@ def compare_model(model_fn, input_fn, params, master="local", temp_dir=None, return weights def _run_step(use_tpu, model_dir): + """Create an estimator and run a single step on the given device.""" + tf_session.Session.reset(target=master) + + logging.info("Running step. TPU=%d. model_dir=%s", use_tpu, model_dir) est = _make_estimator(use_tpu=use_tpu, model_dir=model_dir) - est.train(input_fn=input_fn, steps=num_steps) + est.train(input_fn=_input_adapter, steps=num_steps) weights = _extract_weights(est.latest_checkpoint()) - with gfile.Open(temp_dir + "tpu-%d.weights" % use_tpu, "wb") as f: + with gfile.Open(os.path.join(temp_dir, "tpu-%d.weights" % use_tpu), + "wb") as f: f.write(pickle.dumps(weights)) return weights @@ -159,9 +181,9 @@ def compare_model(model_fn, input_fn, params, master="local", temp_dir=None, _run_step(use_tpu=False, model_dir=initial_model_dir) copy_dir(initial_model_dir, cpu_model_dir) - cpu_weights = _run_step(use_tpu=False, model_dir=cpu_model_dir) - copy_dir(initial_model_dir, tpu_model_dir) + + cpu_weights = _run_step(use_tpu=False, model_dir=cpu_model_dir) tpu_weights = _run_step(use_tpu=True, model_dir=tpu_model_dir) bad_weights = False -- GitLab From 79733d756285b92c49cbd6315a91933cffac774f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Nov 2017 19:07:13 -0800 Subject: [PATCH 0449/1801] Update BUILD visibility. PiperOrigin-RevId: 175770716 --- tensorflow/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 95be0bc8df..6a66d1d44b 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -366,6 +366,7 @@ config_setting( package_group( name = "internal", packages = [ + "//learning/meta_rank/...", "//tensorflow/...", "//tensorflow_fold/llgtm/...", ], -- GitLab From c798e04fbd24809d0bc52d4b80f30e74418b8f4d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Nov 2017 19:36:08 -0800 Subject: [PATCH 0450/1801] Remove unused BUILD dependencies PiperOrigin-RevId: 175772549 --- tensorflow/compiler/tf2xla/BUILD | 1 - tensorflow/contrib/lite/toco/BUILD | 4 ---- 2 files changed, 5 deletions(-) diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index 376c8108ed..5a81438b1c 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -179,7 +179,6 @@ cc_library( visibility = ["//visibility:public"], deps = [ "//tensorflow/compiler/xla:status_macros", - "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/core:core_cpu", "//tensorflow/core:core_cpu_internal", diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD index 77d381c1c5..eb08b5d1e5 100644 --- a/tensorflow/contrib/lite/toco/BUILD +++ b/tensorflow/contrib/lite/toco/BUILD @@ -75,7 +75,6 @@ cc_library( ":runtime", ":toco_port", "//tensorflow/core:lib", - "@protobuf_archive//:protobuf_headers", ], ) @@ -88,9 +87,6 @@ cc_library( "toco_graphviz_dump_options.h", ], visibility = ["//visibility:public"], - deps = [ - "@com_google_absl//absl/strings", - ], ) cc_library( -- GitLab From 24a6162d2d5fad078157e2ec514f2fbb7ee0c676 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Tue, 14 Nov 2017 20:12:00 -0800 Subject: [PATCH 0451/1801] [XLA:GPU] Mark loads as invariant where appropriate. If we read a value within an HLO that isn't modified by that HLO, mark it as invariant in LLVM IR. LLVM can perform more aggressive optimizations on invariant loads, but I don't expect this will help much in our case, because XLA already emits pretty aggressive noalias information on loads and stores. However, on nvidia GPUs, marking loads as invariant has the additional benefit of allowing LLVM to lower the load as ld.global.nc (equivalent to CUDA's __ldg). This instruction uses a special cache on the GPU, and it's usually faster than a regular load. PiperOrigin-RevId: 175774979 --- .../compiler/xla/service/buffer_assignment.cc | 36 +++++++++++++ .../compiler/xla/service/buffer_assignment.h | 6 +++ .../xla/service/gpu/hlo_to_ir_bindings.cc | 35 +++++++++++++ .../xla/service/gpu/hlo_to_ir_bindings.h | 9 +++- .../compiler/xla/service/gpu/ir_emitter.cc | 25 +++++---- .../compiler/xla/service/gpu/ir_emitter.h | 10 +++- .../xla/service/gpu/ir_emitter_nested.cc | 3 +- .../xla/service/gpu/ir_emitter_unnested.cc | 51 +++++++++++-------- .../xla/service/llvm_ir/alias_analysis.cc | 2 +- .../compiler/xla/service/llvm_ir/ir_array.cc | 4 +- .../compiler/xla/service/llvm_ir/ir_array.h | 32 ++++++++++-- 11 files changed, 171 insertions(+), 42 deletions(-) diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index 3c5b360c8e..033034b421 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -265,6 +265,42 @@ bool BufferAssignment::SharesSliceAtIndex( GetUniqueSlice(hlo_b, shape_index_b).ConsumeValueOrDie(); } +bool BufferAssignment::HaveDisjointSlices(const HloInstruction* hlo_a, + const HloInstruction* hlo_b) const { + using SliceSet = + FlatSet; + // Gets the slices all of instr's subshapes. If any subshape doesn't have an + // assigned slice, returns the empty set. + auto collect_slices = [&](const HloInstruction* instr) -> SliceSet { + SliceSet slices; + Status status = ShapeUtil::ForEachSubshapeWithStatus( + instr->shape(), + [&](const Shape& /*subshape*/, const ShapeIndex& index) { + auto shape_slices = GetAllSlices(instr, index); + if (shape_slices.empty()) { + return InvalidArgument("No slices assigned to part of instr."); + } + slices.insert(shape_slices.begin(), shape_slices.end()); + return Status::OK(); + }); + if (!status.ok()) { + return {}; + } + return slices; + }; + + SliceSet slices_a = collect_slices(hlo_a); + SliceSet slices_b = collect_slices(hlo_b); + // hlo_a and hlo_b have disjoint slices if collect_slices succeeded (i.e. + // didn't return the empty set) for both HLOs, and the two resulting sets of + // slices are disjoint. + return !slices_a.empty() && !slices_b.empty() && + std::none_of(slices_a.begin(), slices_a.end(), + [&](const BufferAllocation::Slice& slice) { + return slices_b.count(slice) > 0; + }); +} + StatusOr BufferAssignment::GetUniqueTopLevelOutputSlice() const { return GetUniqueTopLevelSlice( diff --git a/tensorflow/compiler/xla/service/buffer_assignment.h b/tensorflow/compiler/xla/service/buffer_assignment.h index 08a53af8ba..08a40bfeb2 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.h +++ b/tensorflow/compiler/xla/service/buffer_assignment.h @@ -327,6 +327,12 @@ class BufferAssignment { return SharesSliceAtIndex(hlo_a, {}, hlo_b, {}); } + // Returns true if hlo_a and hlo_b both have at least one buffer assigned for + // their top-level and each of their nested shape indices, and if hlo_a's + // buffers are all different from hlo_b's buffers. + bool HaveDisjointSlices(const HloInstruction* hlo_a, + const HloInstruction* hlo_b) const; + // Returns the underlying points-to analysis used for this assignment. const TuplePointsToAnalysis& points_to_analysis() const { return liveness_->points_to_analysis(); diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc index 163a161353..c2115c4999 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc +++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc @@ -166,11 +166,46 @@ void HloToIrBindings::BindHloToIrValue(const HloInstruction& hlo, *(base_ptrs_[&hlo].mutable_element(shape_index)) = typed_ir_value; } +// Determines whether hlo's buffers are never modified within the execution of +// consumer. +static bool BuffersInvariantWithinConsumer( + const HloInstruction& hlo, const HloInstruction& consumer, + const BufferAssignment* buffer_assignment) { + // Check if consumer is inside a fusion node -- if so, "dereference" it until + // we get to a non-fusion node. + const HloInstruction* c = &consumer; + while (c->IsFused()) { + c = c->parent()->FusionInstruction(); + } + + // If, after dereferencing c, we end up with a node that's not inside our + // module's top-level computation (say our node is inside a while loop), we + // give up on marking array as invariant, because this HLO may be run multiple + // times (e.g. multiple while loop iterations, or multiple invocations of a + // reducer's computation). TODO(jlebar): We could relax this constraint if we + // emitted an llvm.invariant.group.barrier at the end of the computation. + return c->parent() == c->GetModule()->entry_computation() && + buffer_assignment->HaveDisjointSlices(&hlo, &consumer); +} + llvm_ir::IrArray HloToIrBindings::GetIrArray(const HloInstruction& hlo, + const HloInstruction& consumer, const ShapeIndex& shape_index) { llvm_ir::IrArray ir_array(GetBasePointer(hlo, shape_index), ShapeUtil::GetSubshape(hlo.shape(), shape_index)); alias_analysis_.AddAliasingInformationToIrArray(hlo, &ir_array); + + // The GPU backend emits one kernel per top-level HLO, and LLVM views + // execution of one kernel as the "whole program" executed on the GPU. + // Therefore if hlo's output buffer is not modified within consumer, and if + // consumer runs hlo only once (so that it doesn't create two different + // outputs), then we can mark ir_array as invariant over the whole program. + if (BuffersInvariantWithinConsumer(hlo, consumer, buffer_assignment_)) { + VLOG(2) << "Marking " << hlo.name() << " as invariant within " + << consumer.name(); + ir_array.MarkInvariantOverWholeProgram(&module_->getContext()); + } + return ir_array; } diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h index a3120f15bc..62ae1769a1 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h +++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h @@ -76,8 +76,15 @@ class HloToIrBindings { return it->second.element(shape_index); } - // Return the underlying IrArray of the output of the given instruction. + // Returns the IrArray which contains the output of hlo. + // + // consumer is the HLO in which this IrArray is used -- we use this to (try + // to) add metadata indicating that the array is invariant within consumer. + // + // To get the buffer into which hlo should write its own output, call + // GetIrArray(hlo, hlo). llvm_ir::IrArray GetIrArray(const HloInstruction& hlo, + const HloInstruction& consumer, const ShapeIndex& shape_index = {}); private: diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc index af2a92e11e..6e2bd4e11d 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc @@ -68,7 +68,8 @@ Status IrEmitter::DefaultAction(HloInstruction* hlo) { ElementalIrEmitter::HloToElementGeneratorMap operand_to_generator; for (const HloInstruction* operand : hlo->operands()) { operand_to_generator[operand] = [=](const llvm_ir::IrArray::Index& index) { - return GetIrArray(*operand).EmitReadArrayElement(index, &ir_builder_); + return GetIrArray(*operand, *hlo) + .EmitReadArrayElement(index, &ir_builder_); }; } return EmitTargetElementLoop( @@ -145,7 +146,8 @@ Status IrEmitter::HandleTuple(HloInstruction* tuple) { for (const HloInstruction* operand : tuple->operands()) { base_ptrs.push_back(GetBasePointer(*operand)); } - llvm_ir::EmitTuple(GetIrArray(*tuple), base_ptrs, &ir_builder_, module_); + llvm_ir::EmitTuple(GetIrArray(*tuple, *tuple), base_ptrs, &ir_builder_, + module_); return Status::OK(); } @@ -334,7 +336,8 @@ Status IrEmitter::HandleSelect(HloInstruction* select) { TF_RET_CHECK(pred->shape().element_type() == PRED); if (ShapeUtil::IsTuple(select->shape())) { - llvm_ir::EmitTupleSelect(GetIrArray(*select), GetIrArray(*pred), + llvm_ir::EmitTupleSelect(GetIrArray(*select, *select), + GetIrArray(*pred, *select), GetBasePointer(*on_true), GetBasePointer(*on_false), &ir_builder_, module_); return Status::OK(); @@ -349,9 +352,9 @@ Status IrEmitter::HandleSelect(HloInstruction* select) { Status IrEmitter::HandleDot(HloInstruction* dot) { auto lhs_instruction = dot->operand(0); auto rhs_instruction = dot->operand(1); - const llvm_ir::IrArray& target_array = GetIrArray(*dot); - const llvm_ir::IrArray& lhs_array = GetIrArray(*lhs_instruction); - const llvm_ir::IrArray& rhs_array = GetIrArray(*rhs_instruction); + const llvm_ir::IrArray& target_array = GetIrArray(*dot, *dot); + const llvm_ir::IrArray& lhs_array = GetIrArray(*lhs_instruction, *dot); + const llvm_ir::IrArray& rhs_array = GetIrArray(*rhs_instruction, *dot); const Shape& lhs_shape = lhs_instruction->shape(); const Shape& rhs_shape = rhs_instruction->shape(); @@ -571,7 +574,8 @@ Status IrEmitter::HandleReduce(HloInstruction* reduce) { // Apply the reduction function to the loaded value. llvm::Value* input_address = - GetIrArray(*arg).EmitArrayElementAddress(input_index, &ir_builder_); + GetIrArray(*arg, *reduce) + .EmitArrayElementAddress(input_index, &ir_builder_); TF_RETURN_IF_ERROR(EmitCallToNestedComputation( *function, {accumulator_addr, input_address}, accumulator_addr)); @@ -587,7 +591,7 @@ Status IrEmitter::HandleFusion(HloInstruction* fusion) { std::vector parameter_arrays; for (HloInstruction* operand : fusion->operands()) { - parameter_arrays.push_back(GetIrArray(*operand)); + parameter_arrays.push_back(GetIrArray(*operand, *fusion)); } GpuElementalIrEmitter elemental_emitter(hlo_module_config_, module_, &ir_builder_, GetNestedComputer()); @@ -622,7 +626,8 @@ Status IrEmitter::HandleRng(HloInstruction* random) { ElementalIrEmitter::HloToElementGeneratorMap operand_to_generator; for (const HloInstruction* operand : random->operands()) { operand_to_generator[operand] = [=](const llvm_ir::IrArray::Index& index) { - return GetIrArray(*operand).EmitReadArrayElement(index, &ir_builder_); + return GetIrArray(*operand, *random) + .EmitReadArrayElement(index, &ir_builder_); }; } // Emits a single-threaded loop because the loop body generated by the element @@ -631,7 +636,7 @@ Status IrEmitter::HandleRng(HloInstruction* random) { GpuElementalIrEmitter(hlo_module_config_, module_, &ir_builder_, GetNestedComputer()) .MakeElementGenerator(random, operand_to_generator), - GetIrArray(*random), &ir_builder_) + GetIrArray(*random, *random), &ir_builder_) .EmitLoop(IrName(random)); } diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.h b/tensorflow/compiler/xla/service/gpu/ir_emitter.h index 61fdeaa0ee..9c01f5b7c7 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.h @@ -105,10 +105,16 @@ class IrEmitter : public DfsHloVisitorWithDefault { explicit IrEmitter(const HloModuleConfig& hlo_module_config, IrEmitterContext* ir_emitter_context, bool is_nested); - // A convenient helper for calling HloToIrBindings::GetIrArray. + // Helper for calling HloToIrBindings::GetIrArray. + // + // Gets the IrArray which contains inst. This array has metadata that makes + // it valid only within the IR that implements consumer. If you are + // implementing an HLO and want to get its own output buffer, call + // GetIrArray(hlo, hlo). llvm_ir::IrArray GetIrArray(const HloInstruction& inst, + const HloInstruction& consumer, const ShapeIndex& shape_index = {}) { - return bindings_.GetIrArray(inst, shape_index); + return bindings_.GetIrArray(inst, consumer, shape_index); } // A convenient helper for calling HloToIrBindings::GetBasePointer. llvm::Value* GetBasePointer(const HloInstruction& inst) const { diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc index 5da1a130d5..5225ff36ff 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc @@ -115,7 +115,8 @@ Status IrEmitterNested::HandleParameter(HloInstruction* parameter) { Status IrEmitterNested::EmitTargetElementLoop( const HloInstruction& hlo, const llvm_ir::ElementGenerator& element_generator) { - return llvm_ir::LoopEmitter(element_generator, GetIrArray(hlo), &ir_builder_) + return llvm_ir::LoopEmitter(element_generator, GetIrArray(hlo, hlo), + &ir_builder_) .EmitLoop(); } diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index db78f4b84d..1b863c9e3c 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -282,7 +282,7 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { MakeUnique(std::move(thunks), fusion)); std::vector parameter_arrays; for (HloInstruction* operand : fusion->operands()) { - parameter_arrays.push_back(GetIrArray(*operand)); + parameter_arrays.push_back(GetIrArray(*operand, *fusion)); } GpuElementalIrEmitter elemental_emitter( hlo_module_config_, ir_emitter_context_->llvm_module(), @@ -344,7 +344,7 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { thunk_sequence_->emplace_back(BuildKernelThunk(fusion)); std::vector operand_arrays; for (HloInstruction* operand : fusion->operands()) { - operand_arrays.push_back(GetIrArray(*operand)); + operand_arrays.push_back(GetIrArray(*operand, *fusion)); } GpuElementalIrEmitter elemental_emitter(hlo_module_config_, ir_emitter_context_->llvm_module(), @@ -355,7 +355,7 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { // Array to write into. Because this is an in-place operation, this is the // same as operand 0's array. - llvm_ir::IrArray output_array = GetIrArray(*fusion); + llvm_ir::IrArray output_array = GetIrArray(*fusion, *fusion); LaunchDimensions launch_dimensions = CalculateLaunchDimensions( update_shape, ir_emitter_context_->device_description()); @@ -693,9 +693,10 @@ Status IrEmitterUnnested::HandleCopy(HloInstruction* copy) { constexpr int64 tile_size = 32; constexpr int64 num_rows = 8; int64 num_tiles = EmitTranspose021Tiled( - GetIrArray(*(copy->operand(0))) + GetIrArray(*copy->operand(0), *copy) .CastToShape(reduced_input_shape, &ir_builder_), - GetIrArray(*copy).CastToShape(reduced_output_shape, &ir_builder_), + GetIrArray(*copy, *copy) + .CastToShape(reduced_output_shape, &ir_builder_), tile_size, num_rows, &ir_builder_); UpdateLaunchDimensions(LaunchDimensions(num_tiles, num_rows * tile_size), LastThunk(), ir_emitter_context_->llvm_module()); @@ -850,9 +851,11 @@ Status IrEmitterUnnested::EmitColumnReduction( &ir_builder_); const HloInstruction* output = reduce->IsFused() ? reduce->parent()->FusionInstruction() : reduce; - llvm::Value* output_address = GetIrArray(*output).EmitArrayElementAddress( - llvm_ir::IrArray::Index(x, output->shape(), &ir_builder_), &ir_builder_, - "output_element_address"); + llvm::Value* output_address = + GetIrArray(*output, *output) + .EmitArrayElementAddress( + llvm_ir::IrArray::Index(x, output->shape(), &ir_builder_), + &ir_builder_, "output_element_address"); return EmitAtomicOperationForNestedComputation( *reducer, output_address, partial_reduction_result_address); }; @@ -1116,9 +1119,11 @@ Status IrEmitterUnnested::EmitRowReduction( "lane_id_is_zero", &ir_builder_); llvm_ir::SetToFirstInsertPoint(if_lane_id_is_zero_data.true_block, &ir_builder_); - llvm::Value* output_address = GetIrArray(*output).EmitArrayElementAddress( - llvm_ir::IrArray::Index(y, output->shape(), &ir_builder_), &ir_builder_, - "output_element_address"); + llvm::Value* output_address = + GetIrArray(*output, *output) + .EmitArrayElementAddress( + llvm_ir::IrArray::Index(y, output->shape(), &ir_builder_), + &ir_builder_, "output_element_address"); return EmitAtomicOperationForNestedComputation( *reducer, output_address, partial_reduction_result_address); }; @@ -1258,11 +1263,12 @@ Status IrEmitterUnnested::HandleReduce(HloInstruction* reduce) { MakeUnique(std::move(thunks), reduce)); return EmitReductionToVector( reduce, input->shape(), - [this, input](const llvm_ir::IrArray::Index& index) { - return GetIrArray(*input).EmitReadArrayElement(index, &ir_builder_); + [&](const llvm_ir::IrArray::Index& index) { + return GetIrArray(*input, *reduce) + .EmitReadArrayElement(index, &ir_builder_); }, - [this, init_value](const llvm_ir::IrArray::Index& index) { - return GetIrArray(*init_value) + [&](const llvm_ir::IrArray::Index& index) { + return GetIrArray(*init_value, *reduce) .EmitReadArrayElement(index, &ir_builder_); }, dimensions_to_reduce, reducer); @@ -1426,7 +1432,7 @@ Status IrEmitterUnnested::HandleSelectAndScatter( ir_builder_.CreateStore(operand_index[i], selected_index_address_slot); } }; - llvm_ir::IrArray operand_array(GetIrArray(*operand)); + llvm_ir::IrArray operand_array = GetIrArray(*operand, *select_and_scatter); llvm::Value* operand_data = operand_array.EmitReadArrayElement(operand_index, &ir_builder_); ir_builder_.CreateStore(operand_data, selected_value_address); @@ -1479,9 +1485,10 @@ Status IrEmitterUnnested::HandleSelectAndScatter( ir_builder_.CreateLoad(selected_index_address_slot)); } llvm::Value* source_value_address = - GetIrArray(*source).EmitArrayElementAddress(source_index, &ir_builder_); + GetIrArray(*source, *select_and_scatter) + .EmitArrayElementAddress(source_index, &ir_builder_); llvm::Value* output_value_address = - GetIrArray(*select_and_scatter) + GetIrArray(*select_and_scatter, *select_and_scatter) .EmitArrayElementAddress(selected_index, &ir_builder_); return EmitAtomicOperationForNestedComputation( *select_and_scatter->scatter(), output_value_address, @@ -1758,7 +1765,7 @@ Status IrEmitterUnnested::EmitInitializer(const HloInstruction* hlo, return EmitTargetElementLoopInThunk( *hlo, [=](const llvm_ir::IrArray::Index& index) { - return GetIrArray(*init_value) + return GetIrArray(*init_value, *hlo) .EmitReadArrayElement(index, &ir_builder_); }, thunk); @@ -1859,7 +1866,7 @@ Status IrEmitterUnnested::EmitTargetElementLoopInThunk( UpdateLaunchDimensions(launch_dimensions, thunk, ir_emitter_context_->llvm_module()); if (!hlo.IsMultiOutputFusion()) { - return ParallelLoopEmitter(element_generator, GetIrArray(hlo), + return ParallelLoopEmitter(element_generator, GetIrArray(hlo, hlo), launch_dimensions, &ir_builder_) .EmitLoop(IrName(&hlo)); } @@ -1867,7 +1874,7 @@ Status IrEmitterUnnested::EmitTargetElementLoopInThunk( // For multiple outputs fusion, we need to emit each operand and the root. std::vector output_arrays; for (int64 i = 0; i < ShapeUtil::TupleElementCount(hlo.shape()); ++i) { - output_arrays.push_back(GetIrArray(hlo, {i})); + output_arrays.push_back(GetIrArray(hlo, hlo, {i})); } TF_RETURN_IF_ERROR(ParallelLoopEmitter(element_generator, output_arrays, launch_dimensions, &ir_builder_) @@ -1878,7 +1885,7 @@ Status IrEmitterUnnested::EmitTargetElementLoopInThunk( tuple_operand_ptrs.push_back(output_arrays[i].GetBasePointer()); } ir_builder_.SetInsertPoint(ir_builder_.GetInsertBlock()->getTerminator()); - llvm_ir::EmitTuple(GetIrArray(hlo), tuple_operand_ptrs, &ir_builder_, + llvm_ir::EmitTuple(GetIrArray(hlo, hlo), tuple_operand_ptrs, &ir_builder_, module_); return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc index bdddc232ef..21bca1d6be 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc @@ -83,7 +83,7 @@ void AliasAnalysis::AddAliasingInformationToIrArray(const HloInstruction& hlo, if (std::find(parameter_instructions.begin(), parameter_instructions.end(), &hlo) != parameter_instructions.end()) { - array->AddInvariantLoad(llvm::MDNode::get(*context_, /*MDs=*/{})); + array->MarkInvariantOverWholeProgram(context_); } } } diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc index e3f98ac13e..7224bd6898 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc @@ -256,10 +256,10 @@ void IrArray::AnnotateLoadStoreInstructionWithMetadata( llvm::Instruction* instruction) const { CHECK(llvm::isa(instruction) || llvm::isa(instruction)); + CHECK(!llvm::isa(instruction) || !is_invariant_) + << "Trying to create a store to an invariant IRArray."; for (const auto& kind_md_pair : metadata_) { - CHECK(kind_md_pair.first != llvm::LLVMContext::MD_invariant_load || - llvm::isa(instruction)); instruction->setMetadata(kind_md_pair.first, kind_md_pair.second); } } diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.h b/tensorflow/compiler/xla/service/llvm_ir/ir_array.h index 1ed7e99a82..387d462912 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.h +++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.h @@ -229,9 +229,33 @@ class IrArray { AddMetadata(llvm::LLVMContext::MD_noalias, noalias); } - void AddInvariantLoad(llvm::MDNode* invariant_load) { - CHECK_NE(invariant_load, nullptr); - AddMetadata(llvm::LLVMContext::MD_invariant_load, invariant_load); + // Promises LLVM that the data pointed to by this IrArray never changes after + // it's first loaded. + // + // The temporal scope of this promise is the "whole program" from LLVM's point + // of view, but how this translates to HLOs differs between backends. + // + // In the single-threaded CPU backend, we emit one function that + // runs all the HLOs in sequence, so the whole program is the whole HLO + // module. + // + // In the GPU backend, we emit one GPU kernel per top-level HLO (i.e. per HLO + // in the entry computation). From LLVM's perspective, launching a new kernel + // is like launching a new program, and so the whole program is one top-level + // HLO. Since the scope of the promise is smaller than in the CPU backend, we + // can mark more things as invariant in the GPU backend. + // + // Marking loads as invariant is particularly helpful on GPUs because + // invariant loads can be lowered to PTX ld.global.nc (equivalent to CUDA's + // __ldg intrinsic). These loads use a special cache, and can be + // significantly faster than regular loads. + void MarkInvariantOverWholeProgram(llvm::LLVMContext* context) { + if (is_invariant_) { + return; + } + is_invariant_ = true; + AddMetadata(llvm::LLVMContext::MD_invariant_load, + llvm::MDNode::get(*context, {})); } const std::map& metadata() const { return metadata_; } @@ -261,6 +285,8 @@ class IrArray { // loads/stores for this array. They keys are the metadata kinds and the // values are the metadata nodes. std::map metadata_; + + bool is_invariant_ = false; }; } // namespace llvm_ir -- GitLab From 0c19b61a4073aaf191340a02a766ebe238bc7e56 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Nov 2017 21:36:38 -0800 Subject: [PATCH 0452/1801] Add a control dependency optimizer to Grappler. The first two rewrites implemented are: 1. Turn nodes with only control outputs into NoOps, if we know that they are safe to remove. Such nodes can be produced, e.g., by rewrite rules in the arithmetic optimizer. 2. Completely disconnect NoOp nodes with at most 1 input or at most 1 output by rerouting their inputs to their outputs. The restriction on fan-in/fan-out guarantees that we reduce the number of control dependencies in the graph. The two (slightly) non-trivial cases are: // Case a) // x --^> +------+ x --^> +---+ // y --^> | NoOp | --^> a ==> y --^> | a | // ... | | ... | | // z --^> +------+ z --^> +---+ // // Case b) // +------+ --^> a +---+ --^> a // x --^> | NoOp | --^> b ==> | x | --^> b // | | ... | | ... // +------+ --^> c +---+ --^> c PiperOrigin-RevId: 175780178 --- tensorflow/core/grappler/optimizers/BUILD | 42 +++ .../optimizers/arithmetic_optimizer.cc | 8 +- .../optimizers/arithmetic_optimizer.h | 6 +- .../grappler/optimizers/constant_folding.cc | 1 - .../optimizers/dependency_optimizer.cc | 278 ++++++++++++++++++ .../optimizers/dependency_optimizer.h | 68 +++++ .../optimizers/dependency_optimizer_test.cc | 201 +++++++++++++ .../grappler/optimizers/meta_optimizer.cc | 15 +- tensorflow/core/grappler/utils.cc | 7 +- tensorflow/core/grappler/utils_test.cc | 8 + .../core/protobuf/rewriter_config.proto | 2 + 11 files changed, 626 insertions(+), 10 deletions(-) create mode 100644 tensorflow/core/grappler/optimizers/dependency_optimizer.cc create mode 100644 tensorflow/core/grappler/optimizers/dependency_optimizer.h create mode 100644 tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 54004a5e07..dbfa8ae503 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -194,6 +194,47 @@ tf_cc_test( ], ) +cc_library( + name = "dependency_optimizer", + srcs = ["dependency_optimizer.cc"], + hdrs = [ + "dependency_optimizer.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":arithmetic_optimizer", + ":constant_folding", + ":graph_optimizer", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:op_types", + "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/costs:graph_properties", + "//tensorflow/core/grappler/utils:frame", + ], +) + +tf_cc_test( + name = "dependency_optimizer_test", + size = "small", + srcs = ["dependency_optimizer_test.cc"], + deps = [ + ":constant_folding", + ":dependency_optimizer", + ":model_pruner", + "//tensorflow/cc:cc_ops", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/inputs:trivial_test_graph_input_yielder", + ], +) + cc_library( name = "model_pruner", srcs = ["model_pruner.cc"], @@ -311,6 +352,7 @@ cc_library( ":arithmetic_optimizer", ":auto_parallel", ":constant_folding", + ":dependency_optimizer", ":graph_optimizer", ":layout_optimizer", ":memory_optimizer", diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 5cce34e2a6..0cd0d4351e 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -512,8 +512,10 @@ bool UniqueNodes::SameNode(const NodeDef& node1, const NodeDef& node2) const { return true; } -bool ArithmeticOptimizer::CanDedup(const NodeDef& node) const { - if (nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) { +// static +bool ArithmeticOptimizer::CanDedup( + const NodeDef& node, const std::unordered_set& nodes_to_preserve) { + if (nodes_to_preserve.find(node.name()) != nodes_to_preserve.end()) { return false; } if (IsEnter(node) || IsExit(node) || IsPlaceholder(node)) { @@ -551,7 +553,7 @@ void ArithmeticOptimizer::DedupComputations(GraphDef* optimized_graph) const { continue; } NodeDef* node = optimized_graph->mutable_node(i); - if (!CanDedup(*node)) { + if (!CanDedup(*node, nodes_to_preserve_)) { continue; } NodeDef* rep = nodes.FindOrAddRepresentative(node); diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index 4d2e160ff4..c8cc292295 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -28,6 +28,11 @@ namespace grappler { // run a model. class ArithmeticOptimizer : public GraphOptimizer { public: + // Returns true if it is safe to dedup node from the graph. + // TODO(rmlarsen): Refactor to op_types.{h,cc}. + static bool CanDedup(const NodeDef& node, + const std::unordered_set& nodes_to_preserve); + ArithmeticOptimizer() : opt_level_(RewriterConfig::ON) {} explicit ArithmeticOptimizer(RewriterConfig::Toggle opt_level) : opt_level_(opt_level) {} @@ -42,7 +47,6 @@ class ArithmeticOptimizer : public GraphOptimizer { const GraphDef& optimized_graph, double result) override; private: - bool CanDedup(const NodeDef& node) const; void DedupComputations(GraphDef* optimized_graph) const; // Runs peep-hole optimizations on `optimized_graph`, e.g., removing inverse // transposes. diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 02a732b092..993831c412 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -122,7 +122,6 @@ string ConstantFolding::AddControlDependency(const string& input_name, auto outputs = node_map->GetOutputs(node->name()); for (const NodeDef* node : outputs) { if (IsIdentity(*node)) { - CHECK_EQ(1, node->input_size()); if (IsSameInput(node->input(0), input_name)) { return AsControlDependency(*node); } diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc new file mode 100644 index 0000000000..49eb29d037 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc @@ -0,0 +1,278 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/dependency_optimizer.h" + +#include + +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/grappler/costs/graph_properties.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/optimizers/arithmetic_optimizer.h" +#include "tensorflow/core/grappler/optimizers/constant_folding.h" +#include "tensorflow/core/grappler/utils/frame.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/util/device_name_utils.h" + +namespace tensorflow { +namespace grappler { + +namespace { +// A vector with a set. The set stores the same elements as the vector, and +// quickly answers whether a value is in the vector. Duplicated elements are not +// allowed for now. +template +class SetVector { + public: + // Returns false if value already existed in the set, true otherwise. + bool PushBack(const T& value) { + if (!set_.insert(value).second) { + return false; + } + vector_.push_back(value); + return true; + } + + T PopBack() { + T back = vector_.back(); + set_.erase(back); + vector_.pop_back(); + return back; + } + + bool Exists(const T& value) const { return set_.count(value); } + + bool Empty() const { return vector_.empty(); } + + void Reserve(int64 size) { vector_.reserve(size); } + + private: + std::unordered_set set_; + std::vector vector_; +}; + +bool HasRegularOutputs(const NodeDef& node, const NodeMap& node_map) { + for (const NodeDef* output : node_map.GetOutputs(node.name())) { + for (const string& input : output->input()) { + if (input == node.name()) { + return true; + } + } + } + return false; +} + +int FindInputSlot(const NodeDef& node, const string& input) { + for (int i = 0; i < node.input_size(); ++i) { + if (node.input(i) == input) { + return i; + } + } + return -1; +} + +} // namespace + +bool DependencyOptimizer::SafeToConvertToNoOp(const NodeDef& node) { + if (!has_fetch_ || HasRegularOutputs(node, *node_map_)) { + return false; + } + + if (IsMerge(node)) { + return false; + } + if (!ArithmeticOptimizer::CanDedup(node, nodes_to_preserve_)) { + return false; + } + + const OpDef* op_def = nullptr; + Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def); + if (!status.ok() || op_def->output_arg_size() == 0) { + return false; + } + + // TODO(rmlarsen): We have to skip Const nodes to make + // core/debug/debug_gateway_test pass. See if we can fix that test. + // TODO(rmlarsen): We have to skip Identity nodes to make an obsolete test in + // python/training/session_manager_test.py pass. See if we can fix or get rid + // of that test. + const std::unordered_set do_not_rewrite_ops = { + "Assert", "CheckNumerics", "Const", "Identity", "_Retval", + "_Arg", "_ParallelConcatUpdate", "_TPUExecute"}; + return do_not_rewrite_ops.find(node.op()) == do_not_rewrite_ops.end(); +} + +string DependencyOptimizer::TryOptimizeDependencies( + NodeDef* node, GraphDef* graph, std::vector* new_nodes) { + // Change ops that only have control dependencies as outputs to NoOps. + if (node->op() != "NoOp" && SafeToConvertToNoOp(*node)) { + VLOG(2) << "***** Replacing " << node->name() << " (" << node->op() + << ") with NoOp."; + // The outputs of this node are not consumed. Replace its inputs with + // control dependencies and replace the op itself with the NoOp op. + for (int i = 0; i < node->input_size(); ++i) { + const string& old_input = node->input(i); + if (IsControlInput(old_input)) { + continue; + } + const string ctrl_input = ConstantFolding::AddControlDependency( + old_input, graph, node_map_.get()); + node->set_input(i, ctrl_input); + node_map_->UpdateInput(node->name(), old_input, ctrl_input); + new_nodes->push_back(node_map_->GetNode(old_input)); + } + node->set_op("NoOp"); + node->clear_attr(); + new_nodes->push_back(node); + return ""; + } + + // Remove NoOp nodes if their fan-in or fan-out is less than 2. + // The non-trivial rewrites take the following form: + // + // Case a) + // x --^> +------+ x --^> +---+ + // y --^> | NoOp | --^> a ==> y --^> | a | + // ... | | ... | | + // z --^> +------+ z --^> +---+ + // + // Case b) + // +------+ --^> a +---+ --^> a + // x --^> | NoOp | --^> b ==> | x | --^> b + // | | ... | | ... + // +------+ --^> c +---+ --^> c + if (node->op() == "NoOp" && + nodes_to_preserve_.find(node->name()) == nodes_to_preserve_.end()) { + auto outputs = node_map_->GetOutputs(node->name()); + const int num_outputs = outputs.size(); + const int num_inputs = node->input_size(); + if (num_inputs > 1 && num_outputs > 1) { + return ""; + } + + for (auto consumer : outputs) { + for (int i = 0; i < num_inputs; ++i) { + const string& input = node->input(i); + // Forward dependencies from inputs to consumer if it doesn't already + // depend on it. + if (node_map_->GetOutputs(input).count(consumer) == 0) { + consumer->add_input(ConstantFolding::AddControlDependency( + input, graph, node_map_.get())); + node_map_->AddOutput(NodeName(input), consumer->name()); + } + new_nodes->push_back(node_map_->GetNode(input)); + } + // Remove dependency on node from consumer. + int pos = FindInputSlot(*consumer, AsControlDependency(node->name())); + if (pos >= 0) { + consumer->mutable_input()->SwapElements(pos, + consumer->input_size() - 1); + consumer->mutable_input()->RemoveLast(); + node_map_->RemoveOutput(node->name(), consumer->name()); + new_nodes->push_back(consumer); + } + } + + // Clear all control inputs to node. + node_map_->RemoveInputs(node->name()); + node->clear_input(); + return ""; + } + + return ""; +} + +Status DependencyOptimizer::OptimizeDependencies(GraphDef* optimized_graph) { + // TODO(rmlarsen,bsteiner): The folloing code is similar to the control loop + // in the ArithmeticOptimizer. Dedup this. + SetVector nodes_to_simplify; + for (int i = 0; i < optimized_graph->node_size(); ++i) { + const NodeDef& node = optimized_graph->node(i); + if (node.op() == "NoOp" || SafeToConvertToNoOp(node)) { + nodes_to_simplify.PushBack(optimized_graph->mutable_node()->Mutable(i)); + } + } + while (!nodes_to_simplify.Empty()) { + NodeDef* node = nodes_to_simplify.PopBack(); + std::vector new_nodes; + const string simplified_tensor = + TryOptimizeDependencies(node, optimized_graph, &new_nodes); + if (simplified_tensor.empty()) { + continue; + } + if (NodeName(simplified_tensor) != node->name()) { + // Always consider simplified_tensor for further optimizations. + NodeDef* simplified_node = node_map_->GetNode(simplified_tensor); + if (simplified_node != nullptr) { + nodes_to_simplify.PushBack(simplified_node); + } + // When `node` is simplifed to another node rather than in-place, the + // consumers of `node` are already redirected to `simplified_tensor`. + // Re-push the consumers into `nodes_to_simplify` for further + // optimizations. + std::set consumers = node_map_->GetOutputs(node->name()); + for (NodeDef* consumer : consumers) { + // Update `consumer`'s use of `node` to `input`'s operand. + for (int i = 0; i < consumer->input_size(); ++i) { + int operand_pos; + string operand_node_name = + ParseNodeName(consumer->input(i), &operand_pos); + if (operand_node_name == node->name()) { + *consumer->mutable_input(i) = + (operand_pos < 0 + ? AsControlDependency(NodeName(simplified_tensor)) + : simplified_tensor); + } + VLOG(2) << "Update input " << consumer->input(i) << " of " + << consumer->name() << " to " << simplified_tensor; + } + node_map_->UpdateInput(consumer->name(), node->name(), + simplified_tensor); + nodes_to_simplify.PushBack(consumer); + } + } + for (auto new_node : new_nodes) { + nodes_to_simplify.PushBack(new_node); + } + } + return Status::OK(); +} + +Status DependencyOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* optimized_graph) { + *optimized_graph = item.graph; + nodes_to_preserve_ = item.NodesToPreserve(); + node_map_.reset(new NodeMap(optimized_graph)); + has_fetch_ = !item.fetch.empty(); + VLOG(2) << "Graph before optimization:\n" << optimized_graph->DebugString(); + TF_RETURN_IF_ERROR(OptimizeDependencies(optimized_graph)); + VLOG(2) << "Graph after optimization:\n" << optimized_graph->DebugString(); + + return Status::OK(); +} + +void DependencyOptimizer::Feedback(Cluster* /*cluster*/, + const GrapplerItem& /*item*/, + const GraphDef& /*optimized_graph*/, + double /*result*/) { + // Nothing to do for DependencyOptimizer. +} + +} // end namespace grappler +} // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.h b/tensorflow/core/grappler/optimizers/dependency_optimizer.h new file mode 100644 index 0000000000..13ece87aff --- /dev/null +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.h @@ -0,0 +1,68 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DEPENDENCY_OPTIMIZER_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DEPENDENCY_OPTIMIZER_H_ + +#include +#include "tensorflow/core/grappler/optimizers/graph_optimizer.h" +#include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/protobuf/rewriter_config.pb.h" + +namespace tensorflow { +namespace grappler { + +// Optimize TF computations by removing control dependencies or re-arranging +// them to shorten the critical path for a model step or enable other +// optimizations, such as removing nodes that are effectively noops. +class DependencyOptimizer : public GraphOptimizer { + public: + DependencyOptimizer() : opt_level_(RewriterConfig::ON) {} + explicit DependencyOptimizer(RewriterConfig::Toggle opt_level) + : opt_level_(opt_level) {} + ~DependencyOptimizer() override {} + + string name() const override { return "dependency_optimizer"; }; + + Status Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* optimized_graph) override; + + void Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimized_graph, double result) override; + + private: + // Returns true if it is safe to convert node to NoOp. + bool SafeToConvertToNoOp(const NodeDef& node); + + Status OptimizeDependencies(GraphDef* optimized_graph); + // Tries to simplify the expression that roots at `node` and replaces the uses + // of `node` to the simplified expression. Returns the name of the simplified + // tensor (e.g. "split:1") or an empty string if no simplification is + // performed. + string TryOptimizeDependencies(NodeDef* node, GraphDef* graph, + std::vector* new_nodes); + + bool HasOnlyControlOutputs(const NodeDef* node); + + bool has_fetch_; + RewriterConfig::Toggle opt_level_; + std::unordered_set nodes_to_preserve_; + std::unique_ptr node_map_; +}; + +} // end namespace grappler +} // end namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DEPENDENCY_OPTIMIZER_H_ diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc new file mode 100644 index 0000000000..d54d7b2093 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc @@ -0,0 +1,201 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/dependency_optimizer.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h" +#include "tensorflow/core/grappler/optimizers/constant_folding.h" +#include "tensorflow/core/grappler/optimizers/model_pruner.h" +#include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace grappler { +namespace { + +class DependencyOptimizerTest : public ::testing::Test {}; + +void VerifyGraphsEqual(const GraphDef& original_graph, + const GraphDef& optimized_graph, const string& func) { + EXPECT_EQ(original_graph.node_size(), optimized_graph.node_size()) << func; + for (int i = 0; i < original_graph.node_size(); ++i) { + const NodeDef& original = original_graph.node(i); + const NodeDef& optimized = optimized_graph.node(i); + EXPECT_EQ(original.name(), optimized.name()) << func; + EXPECT_EQ(original.op(), optimized.op()) << func; + EXPECT_EQ(original.input_size(), optimized.input_size()) << func; + for (int j = 0; j < original.input_size(); ++j) { + EXPECT_EQ(original.input(j), optimized.input(j)) << func; + } + } +} + +TEST_F(DependencyOptimizerTest, NoOp) { + // This trivial graph is so basic there's nothing to optimize. + TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"}); + GrapplerItem item; + CHECK(fake_input.NextItem(&item)); + + DependencyOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + VerifyGraphsEqual(item.graph, output, __FUNCTION__); +} + +TEST_F(DependencyOptimizerTest, ChangeToNoop) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2}); + Output y = ops::Const(s.WithOpName("y"), {1.0f, 2.0f}, {1, 2}); + Output add = ops::Add(s.WithOpName("add"), x, y); + Output id1 = + ops::Identity(s.WithOpName("id1").WithControlDependencies(add), x); + Output id2 = + ops::Identity(s.WithOpName("id2").WithControlDependencies(add), y); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + item.fetch.push_back("id1"); + item.fetch.push_back("id2"); + + DependencyOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + EXPECT_EQ(item.graph.node_size(), output.node_size()); + for (int i = 0; i < item.graph.node_size(); ++i) { + const NodeDef& original = item.graph.node(i); + const NodeDef& optimized = output.node(i); + EXPECT_EQ(original.name(), optimized.name()); + if (original.name() == "add") { + EXPECT_EQ("NoOp", optimized.op()); + } else { + EXPECT_EQ(original.op(), optimized.op()); + } + EXPECT_EQ(original.input_size(), optimized.input_size()); + for (int j = 0; j < original.input_size(); ++j) { + if (original.name() == "add") { + EXPECT_EQ(AsControlDependency(original.input(j)), optimized.input(j)); + } else { + EXPECT_EQ(original.input(j), optimized.input(j)); + } + } + } +} + +TEST_F(DependencyOptimizerTest, ChangeToNoop_NoFetch) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2}); + Output y = ops::Const(s.WithOpName("y"), {1.0f, 2.0f}, {1, 2}); + Output add = ops::Add(s.WithOpName("add"), x, y); + Output id1 = + ops::Identity(s.WithOpName("id1").WithControlDependencies(add), x); + Output id2 = + ops::Identity(s.WithOpName("id2").WithControlDependencies(add), y); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + DependencyOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + VerifyGraphsEqual(item.graph, output, __FUNCTION__); +} + +TEST_F(DependencyOptimizerTest, RemoveNoOps_EmptyInputOrOutput) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output x = ops::Const(s, {1.0f, 2.0f}, {1, 2}); + auto noop1 = ops::NoOp(s); + auto noop2 = ops::NoOp(s.WithControlDependencies(x)); + Output id = ops::Identity(s.WithControlDependencies({noop1.operation}), x); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + item.fetch.push_back("Identity"); + + DependencyOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + EXPECT_EQ(item.graph.node_size(), output.node_size()); + for (const NodeDef& node : output.node()) { + if (node.name() == "NoOp" || node.name() == "NoOp_1") { + EXPECT_EQ(0, node.input_size()); + } else if (node.name() == "Identity") { + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("Const", node.input(0)); + } + } +} + +TEST_F(DependencyOptimizerTest, RemoveNoOps_SingleInputOrOutput) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2}); + Output y = ops::Const(s.WithOpName("y"), {1.0f, 2.0f}, {1, 2}); + // NoOp with a single input- and two output dependencies. + auto noop = ops::NoOp(s.WithControlDependencies(x)); + // NoOp with a two input- and a single output dependency. + auto noop_1 = + ops::NoOp(s.WithControlDependencies(x).WithControlDependencies(y)); + Output id = ops::Identity(s.WithControlDependencies({noop.operation}), x); + Output id_1 = ops::Identity( + s.WithControlDependencies({noop.operation, noop_1.operation}), y); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + item.fetch.push_back("Identity"); + item.fetch.push_back("Identity_1"); + + DependencyOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + EXPECT_EQ(item.graph.node_size(), output.node_size()); + for (const NodeDef& node : output.node()) { + if (node.name() == "NoOp" || node.name() == "NoOp_1") { + EXPECT_EQ(0, node.input_size()); + } else if (node.name() == "Identity") { + EXPECT_EQ("x", node.input(0)); + } else if (node.name() == "Identity_1") { + EXPECT_EQ("y", node.input(0)); + EXPECT_EQ("^x", node.input(1)); + } + } +} + +} // namespace +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index eb04bc6e9a..1e93900e6a 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/arithmetic_optimizer.h" #include "tensorflow/core/grappler/optimizers/auto_parallel.h" #include "tensorflow/core/grappler/optimizers/constant_folding.h" +#include "tensorflow/core/grappler/optimizers/dependency_optimizer.h" #include "tensorflow/core/grappler/optimizers/graph_optimizer.h" #include "tensorflow/core/grappler/optimizers/layout_optimizer.h" #include "tensorflow/core/grappler/optimizers/memory_optimizer.h" @@ -53,6 +54,10 @@ std::unique_ptr MetaOptimizer::NewOptimizer( graph_optimizer.reset( new AutoParallel(cfg_.auto_parallel().num_replicas())); } + if (optimizer == "dependency") { + graph_optimizer.reset( + new DependencyOptimizer(cfg_.dependency_optimization())); + } return graph_optimizer; } @@ -71,6 +76,10 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back(std::unique_ptr( new ArithmeticOptimizer(cfg_.arithmetic_optimization()))); } + if (cfg_.dependency_optimization() != RewriterConfig::OFF) { + optimizers.push_back(std::unique_ptr( + new DependencyOptimizer(cfg_.dependency_optimization()))); + } if (cfg_.layout_optimizer() == RewriterConfig::ON) { optimizers.push_back( std::unique_ptr(new LayoutOptimizer())); @@ -92,9 +101,9 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, new AutoParallel(cfg_.auto_parallel().num_replicas()))); } } else { - std::set available_optimizers = {"pruning", "constfold", - "layout", "memory", - "autoparallel", "arithmetic"}; + std::set available_optimizers = { + "pruning", "constfold", "layout", "memory", + "autoparallel", "arithmetic", "dependency"}; for (const auto& optimizer : cfg_.optimizers()) { if (available_optimizers.find(optimizer) != available_optimizers.end()) { optimizers.push_back(NewOptimizer(optimizer)); diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index 11bd8fa5cb..66f8c537ed 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -221,8 +221,11 @@ string AsControlDependency(const NodeDef& node) { return strings::StrCat("^", node.name()); } -string AsControlDependency(const string& node) { - return strings::StrCat("^", node); +string AsControlDependency(const string& node_name) { + CHECK(!node_name.empty()); + return (!node_name.empty() && node_name[0] == '^') + ? node_name + : strings::StrCat("^", node_name); } int NumOutputs(const NodeDef& node) { diff --git a/tensorflow/core/grappler/utils_test.cc b/tensorflow/core/grappler/utils_test.cc index 3193b3ec4a..9d747fe7dc 100644 --- a/tensorflow/core/grappler/utils_test.cc +++ b/tensorflow/core/grappler/utils_test.cc @@ -181,6 +181,14 @@ TEST_F(UtilsTest, NumOutputs) { EXPECT_EQ(1, NumOutputs(CreateDequeueNode())); } +TEST(AsControlDependency, BasicTest) { + NodeDef node; + node.set_name("foo"); + EXPECT_EQ("^foo", AsControlDependency(node)); + EXPECT_EQ("^foo", AsControlDependency(node.name())); + EXPECT_EQ("^foo", AsControlDependency("^foo")); +} + } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index eb74d4b1c5..96b55ce04b 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -35,6 +35,8 @@ message RewriterConfig { Toggle constant_folding = 3; // Arithmetic optimizations (default is ON) Toggle arithmetic_optimization = 7; + // Control dependency optimizations (default is ON). + Toggle dependency_optimization = 8; // If true, don't remove unnecessary ops from the graph bool disable_model_pruning = 2; -- GitLab From c154d4719eea88e694f4c06bcb1249dbac0f7877 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 15 Nov 2017 08:56:17 -0800 Subject: [PATCH 0453/1801] Do not log an error when `tf.py_func()` raises `StopIteration`. This reduces log spam when using `tf.py_func()` to wrap a Python generator, such as when using `tf.data.Dataset.from_generator()`. PiperOrigin-RevId: 175831781 --- tensorflow/python/lib/core/py_func.cc | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc index a62847614c..b30125761f 100644 --- a/tensorflow/python/lib/core/py_func.cc +++ b/tensorflow/python/lib/core/py_func.cc @@ -176,7 +176,8 @@ string PyExcFetch() { } // Calls the registered py function through the trampoline. -Status DoCallPyFunc(PyCall* call) { +Status DoCallPyFunc(PyCall* call, bool* out_log_on_error) { + *out_log_on_error = true; PyObject* trampoline = GetPyTrampoline(); if (trampoline == nullptr) { return errors::InvalidArgument( @@ -196,6 +197,7 @@ Status DoCallPyFunc(PyCall* call) { PyErr_ExceptionMatches(PyExc_TypeError)) { return errors::InvalidArgument(PyExcFetch()); } else if (PyErr_ExceptionMatches(PyExc_StopIteration)) { + *out_log_on_error = false; return errors::OutOfRange(PyExcFetch()); } else if (PyErr_ExceptionMatches(PyExc_MemoryError)) { return errors::ResourceExhausted(PyExcFetch()); @@ -426,11 +428,19 @@ class PyFuncOp : public OpKernel { PyGILState_STATE py_threadstate; py_threadstate = PyGILState_Ensure(); - Status s = DoCallPyFunc(&call); + bool log_on_error; + Status s = DoCallPyFunc(&call, &log_on_error); PyGILState_Release(py_threadstate); // Ensures that GIL is released even when !s.ok(). - OP_REQUIRES_OK(ctx, s); + if (!s.ok()) { + if (log_on_error) { + ctx->CtxFailureWithWarning(s); + } else { + ctx->CtxFailure(s); + } + return; + } OP_REQUIRES(ctx, static_cast(call.out.size()) == ctx->num_outputs(), errors::InvalidArgument(token_, " returns ", call.out.size(), -- GitLab From ab815ecae8cad8f6776d4e0f38a0b6a24cee23ba Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 15 Nov 2017 09:41:46 -0800 Subject: [PATCH 0454/1801] Remove `FunctionDefLibrary` argument from `Device::MaybeRewriteGraph()`. No existing subclass of `Device` reads that argument, and it incurs a protobuf serialization of the function library each time a new subgraph is created in `DirectSession::Run()`. PiperOrigin-RevId: 175837162 --- tensorflow/core/common_runtime/device.h | 5 +---- tensorflow/core/common_runtime/direct_session.cc | 6 +----- tensorflow/core/common_runtime/renamed_device.h | 5 ++--- tensorflow/core/distributed_runtime/graph_mgr.cc | 3 +-- 4 files changed, 5 insertions(+), 14 deletions(-) diff --git a/tensorflow/core/common_runtime/device.h b/tensorflow/core/common_runtime/device.h index 674111dbe6..3912cd177b 100644 --- a/tensorflow/core/common_runtime/device.h +++ b/tensorflow/core/common_runtime/device.h @@ -110,12 +110,9 @@ class Device : public DeviceBase { // prototyping of TensorFlow device implementations that need to modify // the GraphDef before execution. // - // 'library' provides access to the function library which is shared - // between all device partitions. // 'graph' supplies the partition of the graph assigned to this // device. - virtual Status MaybeRewriteGraph(const FunctionDefLibrary& /*library*/, - std::unique_ptr* /*graph*/) { + virtual Status MaybeRewriteGraph(std::unique_ptr* /*graph*/) { return Status::OK(); } diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index d652b1004f..2f57164dcd 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -1419,11 +1419,7 @@ Status DirectSession::CreateGraphs( Device* d; s = device_mgr_->LookupDevice(partition_name, &d); if (!s.ok()) break; - // TODO(pbar) The library is currently shared and immutable. There - // may be possible use cases where a device may want to modify - // function definitions - in which case the library would need to be - // replicated per device. - s = d->MaybeRewriteGraph(client_graph->flib_def->ToProto(), graph); + s = d->MaybeRewriteGraph(graph); if (!s.ok()) { break; } diff --git a/tensorflow/core/common_runtime/renamed_device.h b/tensorflow/core/common_runtime/renamed_device.h index 22a70fbdfa..3103ca0751 100644 --- a/tensorflow/core/common_runtime/renamed_device.h +++ b/tensorflow/core/common_runtime/renamed_device.h @@ -104,9 +104,8 @@ class RenamedDevice : public Device { Status Sync() override { return underlying_->Sync(); } - Status MaybeRewriteGraph(const FunctionDefLibrary& library, - std::unique_ptr* graph) override { - return underlying_->MaybeRewriteGraph(library, graph); + Status MaybeRewriteGraph(std::unique_ptr* graph) override { + return underlying_->MaybeRewriteGraph(graph); } Status FillContextMap(const Graph* graph, diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc index 391ffda25c..60d58af61d 100644 --- a/tensorflow/core/distributed_runtime/graph_mgr.cc +++ b/tensorflow/core/distributed_runtime/graph_mgr.cc @@ -208,8 +208,7 @@ Status GraphMgr::InitItem(const string& session, const GraphDef& gdef, } // Give the device an opportunity to rewrite its subgraph. - TF_RETURN_IF_ERROR( - unit->device->MaybeRewriteGraph(gdef.library(), &subgraph)); + TF_RETURN_IF_ERROR(unit->device->MaybeRewriteGraph(&subgraph)); // Top-level nodes in the graph uses the op segment to cache // kernels. Therefore, as long as the executor is alive, we need -- GitLab From b29cf93407acaa8a2f32d118e88d5d9b440a1d5c Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Wed, 15 Nov 2017 10:55:12 -0800 Subject: [PATCH 0455/1801] Correct the logging message as (very likely) we will support queue-based input pipeline. PiperOrigin-RevId: 175848995 --- tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 07877fcc76..97b2d25e0c 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -946,23 +946,14 @@ class _InputPipeline(object): # user code, so, log a warning. if ops.get_default_graph().get_collection(ops.GraphKeys.QUEUE_RUNNERS): err_msg = ('Input pipeline contains one or more QueueRunners. ' - 'These are not supported via TPUEstimator. You must convert ' - 'your input pipeline to use `tf.data` instead (see ' + 'It could be slow and not scalable. Please consider ' + 'converting your input pipeline to use `tf.data` instead (see ' 'https://www.tensorflow.org/programmers_guide/datasets for ' 'instructions.') if _WRAP_INPUT_FN_INTO_WHILE_LOOP: raise RuntimeError(err_msg) else: logging.warn(err_msg) - elif ops.get_default_graph().get_collection(ops.GraphKeys.SUMMARIES): - # Queue Runner has summary Ops by default. So here we use elif to do - # necessary checks for Dataset input pipeline only. - err_msg = ('Input pipeline contains `tf.summary` operations. ' - 'These are not currently supported.') - if _WRAP_INPUT_FN_INTO_WHILE_LOOP: - raise RuntimeError(err_msg) - else: - logging.warn(err_msg) class _ModelFnWrapper(object): -- GitLab From 0c9b03ca3d6017734fc6ea7517556c65d7bb9f90 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Nov 2017 11:12:03 -0800 Subject: [PATCH 0456/1801] Update layout_optimizer gradient test to use a variable for the input instead of a constant. Otherwise the dependency optimizer eliminates the Conv2DBackpropInput. PiperOrigin-RevId: 175851832 --- tensorflow/python/grappler/layout_optimizer_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py index 9ac33fbb4a..99a4d23b6a 100644 --- a/tensorflow/python/grappler/layout_optimizer_test.py +++ b/tensorflow/python/grappler/layout_optimizer_test.py @@ -187,7 +187,8 @@ class LayoutOptimizerTest(test.TestCase): self.skipTest('GPU required') random_seed.set_random_seed(0) - x = random_ops.truncated_normal([1, 200, 200, 3], seed=0) + x = variables.Variable( + random_ops.truncated_normal([1, 200, 200, 3], seed=0)) y = conv_layers.conv2d(x, 32, [3, 3]) z = conv_layers.conv2d(y, 32, [3, 3]) optimizer = gradient_descent.GradientDescentOptimizer(1e-4) -- GitLab From 289af8e7460e69edc106e834b7fbeee17811f1ea Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 15 Nov 2017 11:16:39 -0800 Subject: [PATCH 0457/1801] Move dot-related helpers into dot_op_emitter.cc This keeps related logic together in a single file. PiperOrigin-RevId: 175852532 --- tensorflow/compiler/xla/service/cpu/BUILD | 4 +- .../compiler/xla/service/cpu/cpu_compiler.cc | 1 + .../xla/service/cpu/dot_op_emitter.cc | 115 ++++++++++++++++- .../compiler/xla/service/cpu/dot_op_emitter.h | 20 +++ .../xla/service/cpu/ir_emission_utils.cc | 117 ------------------ .../xla/service/cpu/ir_emission_utils.h | 21 ---- .../xla/service/cpu/layout_assignment.cc | 1 + .../service/cpu/parallel_task_assignment.cc | 1 + 8 files changed, 140 insertions(+), 140 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 4f6e69ebd4..89e8d07200 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -83,6 +83,7 @@ cc_library( ":cpu_options", ":cpu_parallelization_preparation", ":disassembler", + ":dot_op_emitter", ":ir_emission_utils", ":ir_emitter", ":layout_assignment", @@ -282,7 +283,6 @@ cc_library( deps = [ ":cpu_options", ":cpu_runtime", - ":ir_emission_utils", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:types", @@ -619,6 +619,7 @@ cc_library( srcs = ["layout_assignment.cc"], hdrs = ["layout_assignment.h"], deps = [ + ":dot_op_emitter", ":ir_emission_utils", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla/service:computation_layout", @@ -706,6 +707,7 @@ cc_library( srcs = ["parallel_task_assignment.cc"], hdrs = ["parallel_task_assignment.h"], deps = [ + ":dot_op_emitter", ":ir_emission_utils", ":shape_partition", "//tensorflow/compiler/xla/service:hlo", diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index d2202252d9..def801d9d6 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -54,6 +54,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/cpu_options.h" #include "tensorflow/compiler/xla/service/cpu/cpu_parallelization_preparation.h" #include "tensorflow/compiler/xla/service/cpu/disassembler.h" +#include "tensorflow/compiler/xla/service/cpu/dot_op_emitter.h" #include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/cpu/ir_emitter.h" #include "tensorflow/compiler/xla/service/cpu/layout_assignment.h" diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index 2a447a54b0..4c40dae512 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -23,7 +23,6 @@ limitations under the License. #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" #include "tensorflow/compiler/xla/service/cpu/cpu_runtime.h" -#include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" @@ -950,5 +949,119 @@ llvm_ir::IrArray::Index DotOpEmitter::EmitOperandArrayLoopNest( return index; } +// Return whether the given shape is a matrix with no padding. +static bool IsRank2WithNoPadding(const Shape& shape) { + return ShapeUtil::Rank(shape) == 2 && !LayoutUtil::IsPadded(shape); +} + +// In a gemm operation where output = lhs * rhs, check whether the given shapes +// are valid for the operation. +static bool AreValidGemmShapes(const Shape& lhs_shape, const Shape& rhs_shape, + const Shape& output_shape) { + // The inputs and the output must + // 1) be matrices with no padding, and + // 2) have an allowed element type. + return output_shape.element_type() == F32 && + IsRank2WithNoPadding(lhs_shape) && IsRank2WithNoPadding(rhs_shape) && + IsRank2WithNoPadding(output_shape); +} + +bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo) { + // For certain types of Dot, we can call Eigen + if (hlo.opcode() == HloOpcode::kDot) { + const Shape& lhs_shape = hlo.operand(0)->shape(); + const Shape& rhs_shape = hlo.operand(1)->shape(); + + if (ShapeUtil::HasZeroElements(lhs_shape) || + ShapeUtil::HasZeroElements(rhs_shape)) { + return false; + } + + if (ProfitableToImplementDotInUntiledLlvmIr(hlo) == + DotInLlvmIrProfitable::kYes || + ProfitableToImplementDotInTiledLlvmIr(hlo)) { + return false; + } + + // If gemm can accept the operand shapes, use it rather than a custom + // kernel. + if (AreValidGemmShapes(lhs_shape, rhs_shape, hlo.shape())) { + // The size of the reduction dimension should match. The shape inference + // guarantees this invariant, so the check here is for programming + // errors. + CHECK_EQ(lhs_shape.dimensions(1), rhs_shape.dimensions(0)); + return true; + } + } + + if (hlo.opcode() == HloOpcode::kFusion && + hlo.fusion_kind() == HloInstruction::FusionKind::kTransposeDot && + hlo.fused_expression_root()->opcode() == HloOpcode::kDot) { + auto* dot = hlo.fused_expression_root(); + const Shape& lhs_shape = dot->operand(0)->shape(); + const Shape& rhs_shape = dot->operand(1)->shape(); + if (ShapeUtil::HasZeroElements(lhs_shape) || + ShapeUtil::HasZeroElements(rhs_shape)) { + return false; + } + return true; + } + + return false; +} + +DotInLlvmIrProfitable ProfitableToImplementDotInUntiledLlvmIr( + const HloInstruction& dot) { + if (dot.opcode() == HloOpcode::kDot && dot.shape().dimensions_size() == 2) { + const Shape& result_shape = dot.shape(); + // kReductionDimensionThresholdBytes was chosen to be 1/4 of a typical L1 + // cache line size, so that we can have the reduction dimension of both the + // LHS and RHS matrices and still have some space "left over". This needs + // to be tuned further. + const int64 kReductionDimensionThresholdBytes = 8 * 1024; + const bool single_threaded_eigen = + !dot.GetModule()->config().debug_options().xla_cpu_multi_thread_eigen(); + + // This is the point at which it is better to call into Eigen and shard the + // dot across multiple worker threads. This is a rough estimate by running + // a matmult benchmark on my local machine, and it can be tuned further. + const int64 kMaxSingleThreadedFlops = 16 * 1024; + + const int64 M = result_shape.dimensions(0); + const int64 N = result_shape.dimensions(1); + const int64 K = dot.operand(1)->shape().dimensions(0); + const int64 primitive_type_size = + ShapeUtil::ByteSizeOfPrimitiveType(result_shape.element_type()); + if (M == 1 && + K * primitive_type_size <= kReductionDimensionThresholdBytes && + (single_threaded_eigen || M * K * N <= kMaxSingleThreadedFlops)) { + // Heuristics: + // + // - Look for a configuration where we will likely be able to keep LHS in + // L1 and do a cache-optimal traversal of RHS. + // + // - Bail out on matrices that are large enough that Eigen can profitably + // shard the computation across multiple cores. This only applies when + // multi-threading is enabled. + return LayoutUtil::IsMonotonicWithDim0Major( + dot.operand(1)->shape().layout()) + ? DotInLlvmIrProfitable::kWithColumnMajorRhs + : DotInLlvmIrProfitable::kYes; + } + } + return DotInLlvmIrProfitable::kNo; +} + +bool ProfitableToImplementDotInTiledLlvmIr(const HloInstruction& dot) { + // Any Matrix-Vector product of floating point or integral type, or + // a transpose-dot fusion of the same can be lowered to a tiled LLVM + // IR implementation. + const Shape& shape = dot.shape(); + return shape.dimensions_size() == 2 && + (shape.dimensions(0) == 1 || shape.dimensions(1) == 1) && + (primitive_util::IsFloatingPointType(shape.element_type()) || + primitive_util::IsIntegralType(shape.element_type())); +} + } // namespace cpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h index 470bf6ffb4..c9168ccc0f 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h @@ -30,6 +30,26 @@ limitations under the License. namespace xla { namespace cpu { +bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo); + +enum class DotInLlvmIrProfitable { kYes, kNo, kWithColumnMajorRhs }; + +// Returns a value to indicate if (and under what conditions) will lowering +// |dot| as a untiled LLVM IR dot operation be profitable over calling into +// Eigen or emitting a tiled LLVM IR implementation. Possible return values +// are: +// +// * DotInLlvmIrProfitable::kYes - always profitable. +// * DotInLlvmIrProfitable::kNo - never profitable. +// * DotInLlvmIrProfitable::kWithColumnMajorRhs - only if we can manage to make +// the Rhs layout column major. +DotInLlvmIrProfitable ProfitableToImplementDotInUntiledLlvmIr( + const HloInstruction& dot); + +// Returns true to indicate that we can generate a tiled LLVM IR implementation +// for |dot|. +bool ProfitableToImplementDotInTiledLlvmIr(const HloInstruction& dot); + // Helper class for emitting LLVM IR to perform the dot operation. class DotOpEmitter { public: diff --git a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc index 7149a19310..cb5cb8a6dd 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc @@ -74,122 +74,5 @@ bool PotentiallyImplementedAsEigenConvolution( kernel_shape.dimensions_size() - 1; } -namespace { - -// Return whether the given shape is a matrix with no padding. -bool IsRank2WithNoPadding(const Shape& shape) { - return ShapeUtil::Rank(shape) == 2 && !LayoutUtil::IsPadded(shape); -} - -// In a gemm operation where output = lhs * rhs, check whether the given shapes -// are valid for the operation. -bool AreValidGemmShapes(const Shape& lhs_shape, const Shape& rhs_shape, - const Shape& output_shape) { - // The inputs and the output must - // 1) be matrices with no padding, and - // 2) have an allowed element type. - return output_shape.element_type() == F32 && - IsRank2WithNoPadding(lhs_shape) && IsRank2WithNoPadding(rhs_shape) && - IsRank2WithNoPadding(output_shape); -} -} // namespace - -bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo) { - // For certain types of Dot, we can call Eigen - if (hlo.opcode() == HloOpcode::kDot) { - const Shape& lhs_shape = hlo.operand(0)->shape(); - const Shape& rhs_shape = hlo.operand(1)->shape(); - - if (ShapeUtil::HasZeroElements(lhs_shape) || - ShapeUtil::HasZeroElements(rhs_shape)) { - return false; - } - - if (ProfitableToImplementDotInUntiledLlvmIr(hlo) == - DotInLlvmIrProfitable::kYes || - ProfitableToImplementDotInTiledLlvmIr(hlo)) { - return false; - } - - // If gemm can accept the operand shapes, use it rather than a custom - // kernel. - if (AreValidGemmShapes(lhs_shape, rhs_shape, hlo.shape())) { - // The size of the reduction dimension should match. The shape inference - // guarantees this invariant, so the check here is for programming - // errors. - CHECK_EQ(lhs_shape.dimensions(1), rhs_shape.dimensions(0)); - return true; - } - } - - if (hlo.opcode() == HloOpcode::kFusion && - hlo.fusion_kind() == HloInstruction::FusionKind::kTransposeDot && - hlo.fused_expression_root()->opcode() == HloOpcode::kDot) { - auto* dot = hlo.fused_expression_root(); - const Shape& lhs_shape = dot->operand(0)->shape(); - const Shape& rhs_shape = dot->operand(1)->shape(); - if (ShapeUtil::HasZeroElements(lhs_shape) || - ShapeUtil::HasZeroElements(rhs_shape)) { - return false; - } - return true; - } - - return false; -} - -DotInLlvmIrProfitable ProfitableToImplementDotInUntiledLlvmIr( - const HloInstruction& dot) { - if (dot.opcode() == HloOpcode::kDot && dot.shape().dimensions_size() == 2) { - const Shape& result_shape = dot.shape(); - // kReductionDimensionThresholdBytes was chosen to be 1/4 of a typical L1 - // cache line size, so that we can have the reduction dimension of both the - // LHS and RHS matrices and still have some space "left over". This needs - // to be tuned further. - const int64 kReductionDimensionThresholdBytes = 8 * 1024; - const bool single_threaded_eigen = - !dot.GetModule()->config().debug_options().xla_cpu_multi_thread_eigen(); - - // This is the point at which it is better to call into Eigen and shard the - // dot across multiple worker threads. This is a rough estimate by running - // a matmult benchmark on my local machine, and it can be tuned further. - const int64 kMaxSingleThreadedFlops = 16 * 1024; - - const int64 M = result_shape.dimensions(0); - const int64 N = result_shape.dimensions(1); - const int64 K = dot.operand(1)->shape().dimensions(0); - const int64 primitive_type_size = - ShapeUtil::ByteSizeOfPrimitiveType(result_shape.element_type()); - if (M == 1 && - K * primitive_type_size <= kReductionDimensionThresholdBytes && - (single_threaded_eigen || M * K * N <= kMaxSingleThreadedFlops)) { - // Heuristics: - // - // - Look for a configuration where we will likely be able to keep LHS in - // L1 and do a cache-optimal traversal of RHS. - // - // - Bail out on matrices that are large enough that Eigen can profitably - // shard the computation across multiple cores. This only applies when - // multi-threading is enabled. - return LayoutUtil::IsMonotonicWithDim0Major( - dot.operand(1)->shape().layout()) - ? DotInLlvmIrProfitable::kWithColumnMajorRhs - : DotInLlvmIrProfitable::kYes; - } - } - return DotInLlvmIrProfitable::kNo; -} - -bool ProfitableToImplementDotInTiledLlvmIr(const HloInstruction& dot) { - // Any Matrix-Vector product of floating point or integral type, or - // a transpose-dot fusion of the same can be lowered to a tiled LLVM - // IR implementation. - const Shape& shape = dot.shape(); - return shape.dimensions_size() == 2 && - (shape.dimensions(0) == 1 || shape.dimensions(1) == 1) && - (primitive_util::IsFloatingPointType(shape.element_type()) || - primitive_util::IsIntegralType(shape.element_type())); -} - } // namespace cpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h index cbe07a7c2b..ac361ddfb4 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h @@ -23,27 +23,6 @@ namespace cpu { bool PotentiallyImplementedAsEigenConvolution( const HloInstruction& convolution); - -bool PotentiallyImplementedAsEigenDot(const HloInstruction& dot); - -enum class DotInLlvmIrProfitable { kYes, kNo, kWithColumnMajorRhs }; - -// Returns a value to indicate if (and under what conditions) will lowering -// |dot| as a untiled LLVM IR dot operation be profitable over calling into -// Eigen or emitting a tiled LLVM IR implementation. Possible return values -// are: -// -// * DotInLlvmIrProfitable::kYes - always profitable. -// * DotInLlvmIrProfitable::kNo - never profitable. -// * DotInLlvmIrProfitable::kWithColumnMajorRhs - only if we can manage to make -// the Rhs layout column major. -DotInLlvmIrProfitable ProfitableToImplementDotInUntiledLlvmIr( - const HloInstruction& dot); - -// Returns true to indicate that we can generate a tiled LLVM IR implementation -// for |dot|. -bool ProfitableToImplementDotInTiledLlvmIr(const HloInstruction& dot); - } // namespace cpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/layout_assignment.cc b/tensorflow/compiler/xla/service/cpu/layout_assignment.cc index b75ca34e0a..3f2d101959 100644 --- a/tensorflow/compiler/xla/service/cpu/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/cpu/layout_assignment.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include "tensorflow/compiler/xla/map_util.h" +#include "tensorflow/compiler/xla/service/cpu/dot_op_emitter.h" #include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h" #include "tensorflow/core/lib/core/errors.h" diff --git a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc index 4a62a80fac..4b44ac8941 100644 --- a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc +++ b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/parallel_task_assignment.h" +#include "tensorflow/compiler/xla/service/cpu/dot_op_emitter.h" #include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/cpu/shape_partition.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" -- GitLab From b7b183b90aee8a4f4808f7d90a2c7a54a942e640 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Nov 2017 11:23:55 -0800 Subject: [PATCH 0458/1801] Add support for Squeeze Op quantization PiperOrigin-RevId: 175853708 --- .../lite/toco/graph_transformations/hardcode_min_max.cc | 5 +++-- .../contrib/lite/toco/graph_transformations/quantize.cc | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc index d44b5dc7b0..9cb26c8752 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc @@ -143,7 +143,7 @@ bool HardcodeMinMaxForAverageOrMaxPool(Model* model, Operator* op) { return true; } -bool HardcodeMinMaxForReshape(Model* model, Operator* op) { +bool HardcodeMinMaxForReshapeOrSqueeze(Model* model, Operator* op) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.minmax) { return false; @@ -201,8 +201,9 @@ bool HardcodeMinMax::Run(Model* model, std::size_t op_index) { changed = HardcodeMinMaxForAverageOrMaxPool(model, op); break; + case OperatorType::kSqueeze: case OperatorType::kTensorFlowReshape: - changed = HardcodeMinMaxForReshape(model, op); + changed = HardcodeMinMaxForReshapeOrSqueeze(model, op); break; case OperatorType::kLogistic: diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc index 5551755ea7..d33597d381 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc @@ -42,6 +42,7 @@ bool SupportsQuantization(const Operator& op) { type == OperatorType::kL2Normalization || type == OperatorType::kAdd || type == OperatorType::kAveragePool || type == OperatorType::kMaxPool || type == OperatorType::kLogistic || type == OperatorType::kSoftmax || + type == OperatorType::kSqueeze || type == OperatorType::kTensorFlowReshape || type == OperatorType::kMul || type == OperatorType::kSpaceToDepth || type == OperatorType::kDepthToSpace; -- GitLab From 6fb721d608c4cd3855fe8793099a629428b9853c Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Wed, 15 Nov 2017 11:31:43 -0800 Subject: [PATCH 0459/1801] Add graph writer op to contrib/summary This change also defines a simple SQL data model for tf.GraphDef, which should move us closer to a world where TensorBoard can render the graph explorer without having to download the entire thing to the browser, as that could potentially be hundreds of megabytes. PiperOrigin-RevId: 175854921 --- tensorflow/contrib/summary/BUILD | 29 +- tensorflow/contrib/summary/summary.py | 3 + tensorflow/contrib/summary/summary_ops.py | 149 +++++++++- .../contrib/summary/summary_ops_graph_test.py | 52 ++++ .../contrib/summary/summary_ops_test.py | 47 ++- .../contrib/summary/summary_test_internal.py | 59 ++++ tensorflow/contrib/tensorboard/db/schema.cc | 141 +++++---- .../tensorboard/db/summary_db_writer.cc | 272 +++++++++++++++--- .../tensorboard/db/summary_db_writer_test.cc | 78 +++++ tensorflow/core/kernels/BUILD | 1 + tensorflow/core/kernels/summary_interface.cc | 10 + tensorflow/core/kernels/summary_interface.h | 4 + tensorflow/core/kernels/summary_kernels.cc | 25 ++ tensorflow/core/ops/summary_ops.cc | 13 + .../tools/pip_package/pip_smoke_test.py | 3 + 15 files changed, 751 insertions(+), 135 deletions(-) create mode 100644 tensorflow/contrib/summary/summary_ops_graph_test.py create mode 100644 tensorflow/contrib/summary/summary_test_internal.py diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD index d1beafcb28..3892654f25 100644 --- a/tensorflow/contrib/summary/BUILD +++ b/tensorflow/contrib/summary/BUILD @@ -25,13 +25,12 @@ py_test( srcs_version = "PY2AND3", deps = [ ":summary_ops", + ":summary_test_internal", ":summary_test_util", "//tensorflow/python:array_ops", "//tensorflow/python:errors", "//tensorflow/python:framework", "//tensorflow/python:framework_test_lib", - "//tensorflow/python:math_ops", - "//tensorflow/python:ops", "//tensorflow/python:platform", "//tensorflow/python:state_ops", "//tensorflow/python:training", @@ -41,6 +40,20 @@ py_test( ], ) +py_test( + name = "summary_ops_graph_test", + srcs = ["summary_ops_graph_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":summary_ops", + ":summary_test_internal", + "//tensorflow/python:client_testlib", + "//tensorflow/python:ops", + "//tensorflow/python:platform", + "//tensorflow/python:training", + ], +) + py_library( name = "summary_ops", srcs = ["summary_ops.py"], @@ -98,3 +111,15 @@ py_library( "//tensorflow/python:platform", ], ) + +py_library( + name = "summary_test_internal", + testonly = 1, + srcs = ["summary_test_internal.py"], + srcs_version = "PY2AND3", + visibility = ["//visibility:private"], + deps = [ + "//tensorflow/python:lib", + "//tensorflow/python:platform", + ], +) diff --git a/tensorflow/contrib/summary/summary.py b/tensorflow/contrib/summary/summary.py index 813e8b2b09..a73193f460 100644 --- a/tensorflow/contrib/summary/summary.py +++ b/tensorflow/contrib/summary/summary.py @@ -32,11 +32,14 @@ from tensorflow.contrib.summary.summary_ops import create_summary_db_writer from tensorflow.contrib.summary.summary_ops import create_summary_file_writer from tensorflow.contrib.summary.summary_ops import eval_dir from tensorflow.contrib.summary.summary_ops import generic +from tensorflow.contrib.summary.summary_ops import graph from tensorflow.contrib.summary.summary_ops import histogram from tensorflow.contrib.summary.summary_ops import image from tensorflow.contrib.summary.summary_ops import import_event +from tensorflow.contrib.summary.summary_ops import initialize from tensorflow.contrib.summary.summary_ops import never_record_summaries from tensorflow.contrib.summary.summary_ops import record_summaries_every_n_global_steps from tensorflow.contrib.summary.summary_ops import scalar from tensorflow.contrib.summary.summary_ops import should_record_summaries from tensorflow.contrib.summary.summary_ops import summary_writer_initializer_op +from tensorflow.contrib.summary.summary_ops import SummaryWriter diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py index f6be99f6ae..a72c0c80aa 100644 --- a/tensorflow/contrib/summary/summary_ops.py +++ b/tensorflow/contrib/summary/summary_ops.py @@ -27,6 +27,7 @@ import time import six from tensorflow.contrib.summary import gen_summary_ops +from tensorflow.core.framework import graph_pb2 from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -99,25 +100,32 @@ def never_record_summaries(): class SummaryWriter(object): - """Encapsulates a summary writer.""" + """Encapsulates a stateful summary writer resource. - def __init__(self, resource): + See also: + - @{tf.contrib.summary.create_summary_file_writer} + - @{tf.contrib.summary.create_summary_db_writer} + """ + + def __init__(self, resource): self._resource = resource if context.in_eager_mode(): self._resource_deleter = resource_variable_ops.EagerResourceDeleter( handle=self._resource, handle_device="cpu:0") def set_as_default(self): + """Enables this summary writer for the current thread.""" context.context().summary_writer_resource = self._resource @tf_contextlib.contextmanager def as_default(self): + """Enables summary writing within a `with` block.""" if self._resource is None: - yield + yield self else: old = context.context().summary_writer_resource context.context().summary_writer_resource = self._resource - yield + yield self # Flushes the summary writer in eager mode or in graph functions, but not # in legacy graph mode (you're on your own there). with ops.device("cpu:0"): @@ -125,6 +133,43 @@ class SummaryWriter(object): context.context().summary_writer_resource = old +def initialize( + graph=None, # pylint: disable=redefined-outer-name + session=None): + """Initializes summary writing for graph execution mode. + + This helper method provides a higher-level alternative to using + @{tf.contrib.summary.summary_writer_initializer_op} and + @{tf.contrib.summary.graph}. + + Most users will also want to call @{tf.train.create_global_step} + which can happen before or after this function is called. + + Args: + graph: A @{tf.Graph} or @{tf.GraphDef} to output to the writer. + This function will not write the default graph by default. When + writing to an event log file, the associated step will be zero. + session: So this method can call @{tf.Session.run}. This defaults + to @{tf.get_default_session}. + + Raises: + RuntimeError: If in eager mode, or if the current thread has no + default @{tf.contrib.summary.SummaryWriter}. + ValueError: If session wasn't passed and no default session. + """ + if context.context().summary_writer_resource is None: + raise RuntimeError("No default tf.contrib.summary.SummaryWriter found") + if session is None: + session = ops.get_default_session() + if session is None: + raise ValueError("session must be passed if no default session exists") + session.run(summary_writer_initializer_op()) + if graph is not None: + data = _serialize_graph(graph) + x = array_ops.placeholder(dtypes.string) + session.run(_graph(x, 0), feed_dict={x: data}) + + def create_summary_file_writer(logdir, max_queue=None, flush_millis=None, @@ -192,10 +237,10 @@ def create_summary_db_writer(db_uri, Experiment will not be associated with a User. Must be valid as both a DNS label and Linux username. name: Shared name for this SummaryWriter resource stored to default - Graph. + @{tf.Graph}. Returns: - A new SummaryWriter instance. + A @{tf.contrib.summary.SummaryWriter} instance. """ with ops.device("cpu:0"): if experiment_name is None: @@ -240,7 +285,16 @@ def _nothing(): def all_summary_ops(): - """Graph-mode only. Returns all summary ops.""" + """Graph-mode only. Returns all summary ops. + + Please note this excludes @{tf.contrib.summary.graph} ops. + + Returns: + The summary ops. + + Raises: + RuntimeError: If in Eager mode. + """ if context.in_eager_mode(): raise RuntimeError( "tf.contrib.summary.all_summary_ops is only supported in graph mode.") @@ -248,7 +302,14 @@ def all_summary_ops(): def summary_writer_initializer_op(): - """Graph-mode only. Returns the list of ops to create all summary writers.""" + """Graph-mode only. Returns the list of ops to create all summary writers. + + Returns: + The initializer ops. + + Raises: + RuntimeError: If in Eager mode. + """ if context.in_eager_mode(): raise RuntimeError( "tf.contrib.summary.summary_writer_initializer_op is only " @@ -367,21 +428,72 @@ def audio(name, tensor, sample_rate, max_outputs, family=None, return summary_writer_function(name, tensor, function, family=family) +def graph(param, step=None, name=None): + """Writes a TensorFlow graph to the summary interface. + + The graph summary is, strictly speaking, not a summary. Conditions + like @{tf.contrib.summary.never_record_summaries} do not apply. Only + a single graph can be associated with a particular run. If multiple + graphs are written, then only the last one will be considered by + TensorBoard. + + When not using eager execution mode, the user should consider passing + the `graph` parameter to @{tf.contrib.summary.initialize} instead of + calling this function. Otherwise special care needs to be taken when + using the graph to record the graph. + + Args: + param: A @{tf.Tensor} containing a serialized graph proto. When + eager execution is enabled, this function will automatically + coerce @{tf.Graph}, @{tf.GraphDef}, and string types. + step: The global step variable. This doesn't have useful semantics + for graph summaries, but is used anyway, due to the structure of + event log files. This defaults to the global step. + name: A name for the operation (optional). + + Returns: + The created @{tf.Operation} or a @{tf.no_op} if summary writing has + not been enabled for this context. + + Raises: + TypeError: If `param` isn't already a @{tf.Tensor} in graph mode. + """ + if not context.in_eager_mode() and not isinstance(param, ops.Tensor): + raise TypeError("graph() needs a tf.Tensor (e.g. tf.placeholder) in graph " + "mode, but was: %s" % type(param)) + writer = context.context().summary_writer_resource + if writer is None: + return control_flow_ops.no_op() + with ops.device("cpu:0"): + if step is None: + step = training_util.get_global_step() + else: + step = ops.convert_to_tensor(step, dtypes.int64) + if isinstance(param, (ops.Graph, graph_pb2.GraphDef)): + tensor = ops.convert_to_tensor(_serialize_graph(param), dtypes.string) + else: + tensor = array_ops.identity(param) + return gen_summary_ops.write_graph_summary(writer, step, tensor, name=name) + +_graph = graph # for functions with a graph parameter + + def import_event(tensor, name=None): - """Writes a tf.Event binary proto. + """Writes a @{tf.Event} binary proto. When using create_summary_db_writer(), this can be used alongside - tf.TFRecordReader to load event logs into the database. Please note - that this is lower level than the other summary functions and will - ignore any conditions set by methods like should_record_summaries(). + @{tf.TFRecordReader} to load event logs into the database. Please + note that this is lower level than the other summary functions and + will ignore any conditions set by methods like + @{tf.contrib.summary.should_record_summaries}. Args: - tensor: A `Tensor` of type `string` containing a serialized `Event` - proto. + tensor: A @{tf.Tensor} of type `string` containing a serialized + @{tf.Event} proto. name: A name for the operation (optional). Returns: - The created Operation. + The created @{tf.Operation}. """ return gen_summary_ops.import_event( context.context().summary_writer_resource, tensor, name=name) @@ -390,3 +502,10 @@ def import_event(tensor, name=None): def eval_dir(model_dir, name=None): """Construct a logdir for an eval summary writer.""" return os.path.join(model_dir, "eval" if not name else "eval_" + name) + + +def _serialize_graph(arbitrary_graph): + if isinstance(arbitrary_graph, ops.Graph): + return arbitrary_graph.as_graph_def(add_shapes=True).SerializeToString() + else: + return arbitrary_graph.SerializeToString() diff --git a/tensorflow/contrib/summary/summary_ops_graph_test.py b/tensorflow/contrib/summary/summary_ops_graph_test.py new file mode 100644 index 0000000000..8f85f67a25 --- /dev/null +++ b/tensorflow/contrib/summary/summary_ops_graph_test.py @@ -0,0 +1,52 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six + +from tensorflow.contrib.summary import summary_ops +from tensorflow.contrib.summary import summary_test_internal +from tensorflow.core.framework import graph_pb2 +from tensorflow.core.framework import node_def_pb2 +from tensorflow.python.framework import ops +from tensorflow.python.platform import test +from tensorflow.python.training import training_util + +get_all = summary_test_internal.get_all + + +class DbTest(summary_test_internal.SummaryDbTest): + + def testGraphPassedToGraph_isForbiddenForThineOwnSafety(self): + with self.assertRaises(TypeError): + summary_ops.graph(ops.Graph()) + with self.assertRaises(TypeError): + summary_ops.graph('') + + def testGraphSummary(self): + training_util.get_or_create_global_step() + name = 'hi' + graph = graph_pb2.GraphDef(node=(node_def_pb2.NodeDef(name=name),)) + with self.test_session(): + with self.create_summary_db_writer().as_default(): + summary_ops.initialize(graph=graph) + six.assertCountEqual(self, [name], + get_all(self.db, 'SELECT node_name FROM Nodes')) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py index 6e1a746815..09169fa6d7 100644 --- a/tensorflow/contrib/summary/summary_ops_test.py +++ b/tensorflow/contrib/summary/summary_ops_test.py @@ -12,20 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - from __future__ import absolute_import from __future__ import division from __future__ import print_function -import functools -import os import tempfile import six -import sqlite3 from tensorflow.contrib.summary import summary_ops +from tensorflow.contrib.summary import summary_test_internal from tensorflow.contrib.summary import summary_test_util +from tensorflow.core.framework import graph_pb2 +from tensorflow.core.framework import node_def_pb2 from tensorflow.python.eager import function from tensorflow.python.eager import test from tensorflow.python.framework import dtypes @@ -36,6 +35,9 @@ from tensorflow.python.ops import state_ops from tensorflow.python.platform import gfile from tensorflow.python.training import training_util +get_all = summary_test_internal.get_all +get_one = summary_test_internal.get_one + class TargetTest(test_util.TensorFlowTestCase): @@ -108,22 +110,7 @@ class TargetTest(test_util.TensorFlowTestCase): self.assertEqual(events[1].summary.value[0].tag, 'scalar') -class DbTest(test_util.TensorFlowTestCase): - - def setUp(self): - self.db_path = os.path.join(self.get_temp_dir(), 'DbTest.sqlite') - if os.path.exists(self.db_path): - os.unlink(self.db_path) - self.db = sqlite3.connect(self.db_path) - self.create_summary_db_writer = functools.partial( - summary_ops.create_summary_db_writer, - db_uri=self.db_path, - experiment_name='experiment', - run_name='run', - user_name='user') - - def tearDown(self): - self.db.close() +class DbTest(summary_test_internal.SummaryDbTest): def testIntegerSummaries(self): step = training_util.create_global_step() @@ -186,13 +173,15 @@ class DbTest(test_util.TensorFlowTestCase): with self.assertRaises(ValueError): self.create_summary_db_writer(user_name='@') - -def get_one(db, q, *p): - return db.execute(q, p).fetchone()[0] - - -def get_all(db, q, *p): - return unroll(db.execute(q, p).fetchall()) + def testGraphSummary(self): + training_util.get_or_create_global_step() + name = 'hi' + graph = graph_pb2.GraphDef(node=(node_def_pb2.NodeDef(name=name),)) + with summary_ops.always_record_summaries(): + with self.create_summary_db_writer().as_default(): + summary_ops.graph(graph) + six.assertCountEqual(self, [name], + get_all(self.db, 'SELECT node_name FROM Nodes')) def get_tensor(db, tag_id, step): @@ -205,9 +194,5 @@ def int64(x): return array_ops.constant(x, dtypes.int64) -def unroll(list_of_tuples): - return sum(list_of_tuples, ()) - - if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/summary/summary_test_internal.py b/tensorflow/contrib/summary/summary_test_internal.py new file mode 100644 index 0000000000..54233f2f50 --- /dev/null +++ b/tensorflow/contrib/summary/summary_test_internal.py @@ -0,0 +1,59 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Internal helpers for tests in this directory.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import functools +import os +import sqlite3 + +from tensorflow.contrib.summary import summary_ops +from tensorflow.python.framework import test_util + + +class SummaryDbTest(test_util.TensorFlowTestCase): + """Helper for summary database testing.""" + + def setUp(self): + super(SummaryDbTest, self).setUp() + self.db_path = os.path.join(self.get_temp_dir(), 'DbTest.sqlite') + if os.path.exists(self.db_path): + os.unlink(self.db_path) + self.db = sqlite3.connect(self.db_path) + self.create_summary_db_writer = functools.partial( + summary_ops.create_summary_db_writer, + db_uri=self.db_path, + experiment_name='experiment', + run_name='run', + user_name='user') + + def tearDown(self): + self.db.close() + super(SummaryDbTest, self).tearDown() + + +def get_one(db, q, *p): + return db.execute(q, p).fetchone()[0] + + +def get_all(db, q, *p): + return unroll(db.execute(q, p).fetchall()) + + +def unroll(list_of_tuples): + return sum(list_of_tuples, ()) diff --git a/tensorflow/contrib/tensorboard/db/schema.cc b/tensorflow/contrib/tensorboard/db/schema.cc index 98fff9e0ae..d63b2c6cc2 100644 --- a/tensorflow/contrib/tensorboard/db/schema.cc +++ b/tensorflow/contrib/tensorboard/db/schema.cc @@ -135,8 +135,7 @@ class SqliteSchema { /// the database. This field will be mutated if the run is /// restarted. /// description: Optional markdown information. - /// graph: Snappy tf.GraphDef proto with node field cleared. That - /// field can be recreated using GraphNodes and NodeDefs. + /// graph_id: ID of associated Graphs row. Status CreateRunsTable() { return Run(R"sql( CREATE TABLE IF NOT EXISTS Runs ( @@ -147,7 +146,7 @@ class SqliteSchema { inserted_time REAL, started_time REAL, description TEXT, - graph BLOB + graph_id INTEGER ) )sql"); } @@ -205,46 +204,78 @@ class SqliteSchema { )sql"); } - /// \brief Creates NodeDefs table. - /// - /// This table stores NodeDef protos which define the GraphDef for a - /// Run. This functions like a hash table so rows can be shared by - /// multiple Runs in an Experiment. + /// \brief Creates Graphs table. /// /// Fields: /// rowid: Ephemeral b-tree ID dictating locality. - /// experiment_id: Optional int64 for grouping rows. - /// node_def_id: Permanent >0 unique ID. - /// fingerprint: Optional farmhash::Fingerprint64() of uncompressed - /// node_def bytes, coerced to int64. - /// node_def: BLOB containing a Snappy tf.NodeDef proto. - Status CreateNodeDefsTable() { + /// graph_id: Permanent >0 unique ID. + /// inserted_time: Float UNIX timestamp with µs precision. This is + /// always the wall time of when the row was inserted into the + /// DB. It may be used as a hint for an archival job. + /// node_def: Contains Snappy tf.GraphDef proto. All fields will be + /// cleared except those not expressed in SQL. + Status CreateGraphsTable() { return Run(R"sql( - CREATE TABLE IF NOT EXISTS NodeDefs ( + CREATE TABLE IF NOT EXISTS Graphs ( rowid INTEGER PRIMARY KEY, - experiment_id INTEGER, - node_def_id INTEGER NOT NULL, - fingerprint INTEGER, - node_def TEXT + graph_id INTEGER NOT NULL, + inserted_time REAL, + graph_def BLOB ) )sql"); } - /// \brief Creates RunNodeDefs table. + /// \brief Creates Nodes table. /// - /// Table mapping Runs to NodeDefs. This is used to recreate the node - /// field of the GraphDef proto. + /// Fields: + /// rowid: Ephemeral b-tree ID dictating locality. + /// graph_id: Permanent >0 unique ID. + /// node_id: ID for this node. This is more like a 0-index within + /// the Graph. Please note indexes are allowed to be removed. + /// node_name: Unique name for this Node within Graph. This is + /// copied from the proto so it can be indexed. This is allowed + /// to be NULL to save space on the index, in which case the + /// node_def.name proto field must not be cleared. + /// op: Copied from tf.NodeDef proto. + /// device: Copied from tf.NodeDef proto. + /// node_def: Contains Snappy tf.NodeDef proto. All fields will be + /// cleared except those not expressed in SQL. + Status CreateNodesTable() { + return Run(R"sql( + CREATE TABLE IF NOT EXISTS Nodes ( + rowid INTEGER PRIMARY KEY, + graph_id INTEGER NOT NULL, + node_id INTEGER NOT NULL, + node_name TEXT, + op TEXT, + device TEXT, + node_def BLOB + ) + )sql"); + } + + /// \brief Creates NodeInputs table. /// /// Fields: /// rowid: Ephemeral b-tree ID dictating locality. - /// run_id: Mandatory ID of associated Run. - /// node_def_id: Mandatory ID of associated NodeDef. - Status CreateRunNodeDefsTable() { + /// graph_id: Permanent >0 unique ID. + /// node_id: Index of Node in question. This can be considered the + /// 'to' vertex. + /// idx: Used for ordering inputs on a given Node. + /// input_node_id: Nodes.node_id of the corresponding input node. + /// This can be considered the 'from' vertex. + /// is_control: If non-zero, indicates this input is a controlled + /// dependency, which means this isn't an edge through which + /// tensors flow. NULL means 0. + Status CreateNodeInputsTable() { return Run(R"sql( - CREATE TABLE IF NOT EXISTS RunNodeDefs ( + CREATE TABLE IF NOT EXISTS NodeInputs ( rowid INTEGER PRIMARY KEY, - run_id INTEGER NOT NULL, - node_def_id INTEGER NOT NULL + graph_id INTEGER NOT NULL, + node_id INTEGER NOT NULL, + idx INTEGER NOT NULL, + input_node_id INTEGER NOT NULL, + is_control INTEGER ) )sql"); } @@ -297,11 +328,27 @@ class SqliteSchema { )sql"); } - /// \brief Uniquely indexes node_def_id on NodeDefs table. - Status CreateNodeDefIdIndex() { + /// \brief Uniquely indexes graph_id on Graphs table. + Status CreateGraphIdIndex() { return Run(R"sql( - CREATE UNIQUE INDEX IF NOT EXISTS NodeDefIdIndex - ON NodeDefs (node_def_id) + CREATE UNIQUE INDEX IF NOT EXISTS GraphIdIndex + ON Graphs (graph_id) + )sql"); + } + + /// \brief Uniquely indexes (graph_id, node_id) on Nodes table. + Status CreateNodeIdIndex() { + return Run(R"sql( + CREATE UNIQUE INDEX IF NOT EXISTS NodeIdIndex + ON Nodes (graph_id, node_id) + )sql"); + } + + /// \brief Uniquely indexes (graph_id, node_id, idx) on NodeInputs table. + Status CreateNodeInputsIndex() { + return Run(R"sql( + CREATE UNIQUE INDEX IF NOT EXISTS NodeInputsIndex + ON NodeInputs (graph_id, node_id, idx) )sql"); } @@ -350,20 +397,12 @@ class SqliteSchema { )sql"); } - /// \brief Indexes (experiment_id, fingerprint) on NodeDefs table. - Status CreateNodeDefFingerprintIndex() { - return Run(R"sql( - CREATE INDEX IF NOT EXISTS NodeDefFingerprintIndex - ON NodeDefs (experiment_id, fingerprint) - WHERE fingerprint IS NOT NULL - )sql"); - } - - /// \brief Uniquely indexes (run_id, node_def_id) on RunNodeDefs table. - Status CreateRunNodeDefIndex() { + /// \brief Uniquely indexes (graph_id, node_name) on Nodes table. + Status CreateNodeNameIndex() { return Run(R"sql( - CREATE UNIQUE INDEX IF NOT EXISTS RunNodeDefIndex - ON RunNodeDefs (run_id, node_def_id) + CREATE UNIQUE INDEX IF NOT EXISTS NodeNameIndex + ON Nodes (graph_id, node_name) + WHERE node_name IS NOT NULL )sql"); } @@ -387,22 +426,24 @@ Status SetupTensorboardSqliteDb(std::shared_ptr db) { TF_RETURN_IF_ERROR(s.CreateRunsTable()); TF_RETURN_IF_ERROR(s.CreateExperimentsTable()); TF_RETURN_IF_ERROR(s.CreateUsersTable()); - TF_RETURN_IF_ERROR(s.CreateNodeDefsTable()); - TF_RETURN_IF_ERROR(s.CreateRunNodeDefsTable()); + TF_RETURN_IF_ERROR(s.CreateGraphsTable()); + TF_RETURN_IF_ERROR(s.CreateNodeInputsTable()); + TF_RETURN_IF_ERROR(s.CreateNodesTable()); TF_RETURN_IF_ERROR(s.CreateTensorIndex()); TF_RETURN_IF_ERROR(s.CreateTensorChunkIndex()); TF_RETURN_IF_ERROR(s.CreateTagIdIndex()); TF_RETURN_IF_ERROR(s.CreateRunIdIndex()); TF_RETURN_IF_ERROR(s.CreateExperimentIdIndex()); TF_RETURN_IF_ERROR(s.CreateUserIdIndex()); - TF_RETURN_IF_ERROR(s.CreateNodeDefIdIndex()); + TF_RETURN_IF_ERROR(s.CreateGraphIdIndex()); + TF_RETURN_IF_ERROR(s.CreateNodeIdIndex()); + TF_RETURN_IF_ERROR(s.CreateNodeInputsIndex()); TF_RETURN_IF_ERROR(s.CreateTagNameIndex()); TF_RETURN_IF_ERROR(s.CreateRunNameIndex()); TF_RETURN_IF_ERROR(s.CreateExperimentNameIndex()); TF_RETURN_IF_ERROR(s.CreateUserNameIndex()); TF_RETURN_IF_ERROR(s.CreateUserEmailIndex()); - TF_RETURN_IF_ERROR(s.CreateNodeDefFingerprintIndex()); - TF_RETURN_IF_ERROR(s.CreateRunNodeDefIndex()); + TF_RETURN_IF_ERROR(s.CreateNodeNameIndex()); return Status::OK(); } diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc index a26ad61660..ae063d24ef 100644 --- a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc +++ b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc @@ -15,17 +15,29 @@ limitations under the License. #include "tensorflow/contrib/tensorboard/db/summary_db_writer.h" #include "tensorflow/contrib/tensorboard/db/schema.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/summary.pb.h" +#include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/db/sqlite.h" #include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/lib/strings/stringprintf.h" +#include "tensorflow/core/platform/fingerprint.h" #include "tensorflow/core/platform/snappy.h" #include "tensorflow/core/util/event.pb.h" namespace tensorflow { namespace { +double GetWallTime(Env* env) { + // TODO(@jart): Follow precise definitions for time laid out in schema. + // TODO(@jart): Use monotonic clock from gRPC codebase. + return static_cast(env->NowMicros()) / 1.0e6; +} + int64 MakeRandomId() { + // TODO(@jart): Try generating ID in 2^24 space, falling back to 2^63 + // https://sqlite.org/src4/doc/trunk/www/varint.wiki int64 id = static_cast(random::New64() & ((1ULL << 63) - 1)); if (id == 0) { ++id; @@ -33,10 +45,201 @@ int64 MakeRandomId() { return id; } +Status Serialize(const protobuf::MessageLite& proto, string* output) { + output->clear(); + if (!proto.SerializeToString(output)) { + return errors::DataLoss("SerializeToString failed"); + } + return Status::OK(); +} + +Status Compress(const string& data, string* output) { + output->clear(); + if (!port::Snappy_Compress(data.data(), data.size(), output)) { + return errors::FailedPrecondition("TensorBase needs Snappy"); + } + return Status::OK(); +} + +Status BindProto(SqliteStatement* stmt, int parameter, + const protobuf::MessageLite& proto) { + string serialized; + TF_RETURN_IF_ERROR(Serialize(proto, &serialized)); + string compressed; + TF_RETURN_IF_ERROR(Compress(serialized, &compressed)); + stmt->BindBlobUnsafe(parameter, compressed); + return Status::OK(); +} + +Status BindTensor(SqliteStatement* stmt, int parameter, const Tensor& t) { + // TODO(@jart): Make portable between little and big endian systems. + // TODO(@jart): Use TensorChunks with minimal copying for big tensors. + // TODO(@jart): Add field to indicate encoding. + // TODO(@jart): Allow crunch tool to re-compress with zlib instead. + TensorProto p; + t.AsProtoTensorContent(&p); + return BindProto(stmt, parameter, p); +} + +class Transactor { + public: + explicit Transactor(std::shared_ptr db) + : db_(std::move(db)), + begin_(db_->Prepare("BEGIN TRANSACTION")), + commit_(db_->Prepare("COMMIT TRANSACTION")), + rollback_(db_->Prepare("ROLLBACK TRANSACTION")) {} + + template + Status Transact(T callback, Args&&... args) { + TF_RETURN_IF_ERROR(begin_.StepAndReset()); + Status s = callback(std::forward(args)...); + if (s.ok()) { + TF_RETURN_IF_ERROR(commit_.StepAndReset()); + } else { + TF_RETURN_WITH_CONTEXT_IF_ERROR(rollback_.StepAndReset(), s.ToString()); + } + return s; + } + + private: + std::shared_ptr db_; + SqliteStatement begin_; + SqliteStatement commit_; + SqliteStatement rollback_; +}; + +class GraphSaver { + public: + static Status SaveToRun(Env* env, Sqlite* db, GraphDef* graph, int64 run_id) { + auto get = db->Prepare("SELECT graph_id FROM Runs WHERE run_id = ?"); + get.BindInt(1, run_id); + bool is_done; + TF_RETURN_IF_ERROR(get.Step(&is_done)); + int64 graph_id = is_done ? 0 : get.ColumnInt(0); + if (graph_id == 0) { + graph_id = MakeRandomId(); + // TODO(@jart): Check for ID collision. + auto set = db->Prepare("UPDATE Runs SET graph_id = ? WHERE run_id = ?"); + set.BindInt(1, graph_id); + set.BindInt(2, run_id); + TF_RETURN_IF_ERROR(set.StepAndReset()); + } + return Save(env, db, graph, graph_id); + } + + static Status Save(Env* env, Sqlite* db, GraphDef* graph, int64 graph_id) { + GraphSaver saver{env, db, graph, graph_id}; + saver.MapNameToNodeId(); + TF_RETURN_IF_ERROR(saver.SaveNodeInputs()); + TF_RETURN_IF_ERROR(saver.SaveNodes()); + TF_RETURN_IF_ERROR(saver.SaveGraph()); + return Status::OK(); + } + + private: + GraphSaver(Env* env, Sqlite* db, GraphDef* graph, int64 graph_id) + : env_(env), db_(db), graph_(graph), graph_id_(graph_id) {} + + void MapNameToNodeId() { + size_t toto = static_cast(graph_->node_size()); + name_copies_.reserve(toto); + name_to_node_id_.reserve(toto); + for (int node_id = 0; node_id < graph_->node_size(); ++node_id) { + // Copy name into memory region, since we call clear_name() later. + // Then wrap in StringPiece so we can compare slices without copy. + name_copies_.emplace_back(graph_->node(node_id).name()); + name_to_node_id_.emplace(name_copies_.back(), node_id); + } + } + + Status SaveNodeInputs() { + auto purge = db_->Prepare("DELETE FROM NodeInputs WHERE graph_id = ?"); + purge.BindInt(1, graph_id_); + TF_RETURN_IF_ERROR(purge.StepAndReset()); + auto insert = db_->Prepare(R"sql( + INSERT INTO NodeInputs (graph_id, node_id, idx, input_node_id, is_control) + VALUES (?, ?, ?, ?, ?) + )sql"); + for (int node_id = 0; node_id < graph_->node_size(); ++node_id) { + const NodeDef& node = graph_->node(node_id); + for (int idx = 0; idx < node.input_size(); ++idx) { + StringPiece name = node.input(idx); + insert.BindInt(1, graph_id_); + insert.BindInt(2, node_id); + insert.BindInt(3, idx); + if (!name.empty() && name[0] == '^') { + name.remove_prefix(1); + insert.BindInt(5, 1); + } + auto e = name_to_node_id_.find(name); + if (e == name_to_node_id_.end()) { + return errors::DataLoss("Could not find node: ", name); + } + insert.BindInt(4, e->second); + TF_RETURN_WITH_CONTEXT_IF_ERROR(insert.StepAndReset(), node.name(), + " -> ", name); + } + } + return Status::OK(); + } + + Status SaveNodes() { + auto purge = db_->Prepare("DELETE FROM Nodes WHERE graph_id = ?"); + purge.BindInt(1, graph_id_); + TF_RETURN_IF_ERROR(purge.StepAndReset()); + auto insert = db_->Prepare(R"sql( + INSERT INTO Nodes (graph_id, node_id, node_name, op, device, node_def) + VALUES (?, ?, ?, ?, ?, ?) + )sql"); + for (int node_id = 0; node_id < graph_->node_size(); ++node_id) { + NodeDef* node = graph_->mutable_node(node_id); + insert.BindInt(1, graph_id_); + insert.BindInt(2, node_id); + insert.BindText(3, node->name()); + node->clear_name(); + if (!node->op().empty()) { + insert.BindText(4, node->op()); + node->clear_op(); + } + if (!node->device().empty()) { + insert.BindText(5, node->device()); + node->clear_device(); + } + node->clear_input(); + TF_RETURN_IF_ERROR(BindProto(&insert, 6, *node)); + TF_RETURN_WITH_CONTEXT_IF_ERROR(insert.StepAndReset(), node->name()); + } + return Status::OK(); + } + + Status SaveGraph() { + auto insert = db_->Prepare(R"sql( + INSERT OR REPLACE INTO Graphs (graph_id, inserted_time, graph_def) + VALUES (?, ?, ?) + )sql"); + insert.BindInt(1, graph_id_); + insert.BindDouble(2, GetWallTime(env_)); + graph_->clear_node(); + TF_RETURN_IF_ERROR(BindProto(&insert, 3, *graph_)); + return insert.StepAndReset(); + } + + Env* env_; + Sqlite* db_; + GraphDef* graph_; + int64 graph_id_; + std::vector name_copies_; + std::unordered_map name_to_node_id_; +}; + class SummaryDbWriter : public SummaryWriterInterface { public: SummaryDbWriter(Env* env, std::shared_ptr db) - : SummaryWriterInterface(), env_(env), db_(std::move(db)), run_id_(-1) {} + : SummaryWriterInterface(), + env_(env), + db_(std::move(db)), + txn_(db_), + run_id_{0LL} {} ~SummaryDbWriter() override {} Status Initialize(const string& experiment_name, const string& run_name, @@ -76,7 +279,7 @@ class SummaryDbWriter : public SummaryWriterInterface { // TODO(@jart): Check for random ID collisions without needing txn retry. insert_tensor_.BindInt(1, tag_id); insert_tensor_.BindInt(2, global_step); - insert_tensor_.BindDouble(3, GetWallTime()); + insert_tensor_.BindDouble(3, GetWallTime(env_)); switch (t.dtype()) { case DT_INT64: insert_tensor_.BindInt(4, t.scalar()()); @@ -85,22 +288,41 @@ class SummaryDbWriter : public SummaryWriterInterface { insert_tensor_.BindDouble(4, t.scalar()()); break; default: - TF_RETURN_IF_ERROR(BindTensor(t)); + TF_RETURN_IF_ERROR(BindTensor(&insert_tensor_, 4, t)); break; } return insert_tensor_.StepAndReset(); } - Status WriteEvent(std::unique_ptr e) override { + Status WriteGraph(int64 global_step, std::unique_ptr g) override { mutex_lock ml(mu_); TF_RETURN_IF_ERROR(InitializeParents()); - if (e->what_case() == Event::WhatCase::kSummary) { - const Summary& summary = e->summary(); - for (int i = 0; i < summary.value_size(); ++i) { - TF_RETURN_IF_ERROR(WriteSummary(e.get(), summary.value(i))); + return txn_.Transact(GraphSaver::SaveToRun, env_, db_.get(), g.get(), + run_id_); + } + + Status WriteEvent(std::unique_ptr e) override { + switch (e->what_case()) { + case Event::WhatCase::kSummary: { + mutex_lock ml(mu_); + TF_RETURN_IF_ERROR(InitializeParents()); + const Summary& summary = e->summary(); + for (int i = 0; i < summary.value_size(); ++i) { + TF_RETURN_IF_ERROR(WriteSummary(e.get(), summary.value(i))); + } + return Status::OK(); } + case Event::WhatCase::kGraphDef: { + std::unique_ptr graph{new GraphDef}; + if (!ParseProtoUnlimited(graph.get(), e->graph_def())) { + return errors::DataLoss("parse event.graph_def failed"); + } + return WriteGraph(e->step(), std::move(graph)); + } + default: + // TODO(@jart): Handle other stuff. + return Status::OK(); } - return Status::OK(); } Status WriteScalar(int64 global_step, Tensor t, const string& tag) override { @@ -136,33 +358,8 @@ class SummaryDbWriter : public SummaryWriterInterface { string DebugString() override { return "SummaryDbWriter"; } private: - double GetWallTime() { - // TODO(@jart): Follow precise definitions for time laid out in schema. - // TODO(@jart): Use monotonic clock from gRPC codebase. - return static_cast(env_->NowMicros()) / 1.0e6; - } - - Status BindTensor(const Tensor& t) EXCLUSIVE_LOCKS_REQUIRED(mu_) { - // TODO(@jart): Make portable between little and big endian systems. - // TODO(@jart): Use TensorChunks with minimal copying for big tensors. - TensorProto p; - t.AsProtoTensorContent(&p); - string encoded; - if (!p.SerializeToString(&encoded)) { - return errors::DataLoss("SerializeToString failed"); - } - // TODO(@jart): Put byte at beginning of blob to indicate encoding. - // TODO(@jart): Allow crunch tool to re-compress with zlib instead. - string compressed; - if (!port::Snappy_Compress(encoded.data(), encoded.size(), &compressed)) { - return errors::FailedPrecondition("TensorBase needs Snappy"); - } - insert_tensor_.BindBlobUnsafe(4, compressed); - return Status::OK(); - } - Status InitializeParents() EXCLUSIVE_LOCKS_REQUIRED(mu_) { - if (run_id_ >= 0) { + if (run_id_ > 0) { return Status::OK(); } int64 user_id; @@ -195,7 +392,7 @@ class SummaryDbWriter : public SummaryWriterInterface { )sql"); insert_user.BindInt(1, *user_id); insert_user.BindText(2, user_name); - insert_user.BindDouble(3, GetWallTime()); + insert_user.BindDouble(3, GetWallTime(env_)); TF_RETURN_IF_ERROR(insert_user.StepAndReset()); } return Status::OK(); @@ -249,7 +446,7 @@ class SummaryDbWriter : public SummaryWriterInterface { } insert.BindInt(2, *id); insert.BindText(3, name); - insert.BindDouble(4, GetWallTime()); + insert.BindDouble(4, GetWallTime(env_)); TF_RETURN_IF_ERROR(insert.StepAndReset()); } return Status::OK(); @@ -276,6 +473,7 @@ class SummaryDbWriter : public SummaryWriterInterface { mutex mu_; Env* env_; std::shared_ptr db_ GUARDED_BY(mu_); + Transactor txn_ GUARDED_BY(mu_); SqliteStatement insert_tensor_ GUARDED_BY(mu_); SqliteStatement update_metadata_ GUARDED_BY(mu_); string user_name_ GUARDED_BY(mu_); diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc index c1af51e7b7..3431842ca2 100644 --- a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc +++ b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc @@ -14,6 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/tensorboard/db/summary_db_writer.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/db/sqlite.h" @@ -212,5 +214,81 @@ TEST_F(SummaryDbWriterTest, WriteEvent_Scalar) { kTolerance); } +TEST_F(SummaryDbWriterTest, WriteGraph) { + TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "R", "", &env_, &writer_)); + env_.AdvanceByMillis(23); + GraphDef graph; + NodeDef* node = graph.add_node(); + node->set_name("x"); + node->set_op("Placeholder"); + node = graph.add_node(); + node->set_name("y"); + node->set_op("Placeholder"); + node = graph.add_node(); + node->set_name("z"); + node->set_op("Love"); + node = graph.add_node(); + node->set_name("+"); + node->set_op("Add"); + node->add_input("x"); + node->add_input("y"); + node->add_input("^z"); + node->set_device("tpu/lol"); + std::unique_ptr e{new Event}; + graph.SerializeToString(e->mutable_graph_def()); + TF_ASSERT_OK(writer_->WriteEvent(std::move(e))); + TF_ASSERT_OK(writer_->Flush()); + ASSERT_EQ(1LL, QueryInt("SELECT COUNT(*) FROM Runs")); + ASSERT_EQ(1LL, QueryInt("SELECT COUNT(*) FROM Graphs")); + ASSERT_EQ(4LL, QueryInt("SELECT COUNT(*) FROM Nodes")); + ASSERT_EQ(3LL, QueryInt("SELECT COUNT(*) FROM NodeInputs")); + + int64 graph_id = QueryInt("SELECT graph_id FROM Graphs"); + EXPECT_GT(graph_id, 0LL); + EXPECT_EQ(graph_id, QueryInt("SELECT graph_id FROM Runs")); + EXPECT_EQ(0.023, QueryDouble("SELECT inserted_time FROM Graphs")); + EXPECT_FALSE(QueryString("SELECT graph_def FROM Graphs").empty()); + + EXPECT_EQ("x", QueryString("SELECT node_name FROM Nodes WHERE node_id = 0")); + EXPECT_EQ("y", QueryString("SELECT node_name FROM Nodes WHERE node_id = 1")); + EXPECT_EQ("z", QueryString("SELECT node_name FROM Nodes WHERE node_id = 2")); + EXPECT_EQ("+", QueryString("SELECT node_name FROM Nodes WHERE node_id = 3")); + + EXPECT_EQ("Placeholder", + QueryString("SELECT op FROM Nodes WHERE node_id = 0")); + EXPECT_EQ("Placeholder", + QueryString("SELECT op FROM Nodes WHERE node_id = 1")); + EXPECT_EQ("Love", QueryString("SELECT op FROM Nodes WHERE node_id = 2")); + EXPECT_EQ("Add", QueryString("SELECT op FROM Nodes WHERE node_id = 3")); + + EXPECT_EQ("", QueryString("SELECT device FROM Nodes WHERE node_id = 0")); + EXPECT_EQ("", QueryString("SELECT device FROM Nodes WHERE node_id = 1")); + EXPECT_EQ("", QueryString("SELECT device FROM Nodes WHERE node_id = 2")); + EXPECT_EQ("tpu/lol", + QueryString("SELECT device FROM Nodes WHERE node_id = 3")); + + EXPECT_EQ(graph_id, + QueryInt("SELECT graph_id FROM NodeInputs WHERE idx = 0")); + EXPECT_EQ(graph_id, + QueryInt("SELECT graph_id FROM NodeInputs WHERE idx = 1")); + EXPECT_EQ(graph_id, + QueryInt("SELECT graph_id FROM NodeInputs WHERE idx = 2")); + + EXPECT_EQ(3LL, QueryInt("SELECT node_id FROM NodeInputs WHERE idx = 0")); + EXPECT_EQ(3LL, QueryInt("SELECT node_id FROM NodeInputs WHERE idx = 1")); + EXPECT_EQ(3LL, QueryInt("SELECT node_id FROM NodeInputs WHERE idx = 2")); + + EXPECT_EQ(0LL, + QueryInt("SELECT input_node_id FROM NodeInputs WHERE idx = 0")); + EXPECT_EQ(1LL, + QueryInt("SELECT input_node_id FROM NodeInputs WHERE idx = 1")); + EXPECT_EQ(2LL, + QueryInt("SELECT input_node_id FROM NodeInputs WHERE idx = 2")); + + EXPECT_EQ(0LL, QueryInt("SELECT is_control FROM NodeInputs WHERE idx = 0")); + EXPECT_EQ(0LL, QueryInt("SELECT is_control FROM NodeInputs WHERE idx = 1")); + EXPECT_EQ(1LL, QueryInt("SELECT is_control FROM NodeInputs WHERE idx = 2")); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 5e19effe3d..b7386abdea 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -6247,6 +6247,7 @@ tf_kernel_library( "//tensorflow/contrib/tensorboard/db:summary_db_writer", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", "//tensorflow/core:summary_ops_op_lib", "//tensorflow/core/lib/db:sqlite", ], diff --git a/tensorflow/core/kernels/summary_interface.cc b/tensorflow/core/kernels/summary_interface.cc index cd366f8c13..ad28d77ffd 100644 --- a/tensorflow/core/kernels/summary_interface.cc +++ b/tensorflow/core/kernels/summary_interface.cc @@ -17,6 +17,7 @@ limitations under the License. #include #include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/framework/summary.pb.h" @@ -393,6 +394,15 @@ class SummaryWriterImpl : public SummaryWriterInterface { return WriteEvent(std::move(e)); } + Status WriteGraph(int64 global_step, + std::unique_ptr graph) override { + std::unique_ptr e{new Event}; + e->set_step(global_step); + e->set_wall_time(GetWallTime()); + graph->SerializeToString(e->mutable_graph_def()); + return WriteEvent(std::move(e)); + } + Status WriteEvent(std::unique_ptr event) override { mutex_lock ml(mu_); queue_.emplace_back(std::move(event)); diff --git a/tensorflow/core/kernels/summary_interface.h b/tensorflow/core/kernels/summary_interface.h index ccf3459e56..da1c28709f 100644 --- a/tensorflow/core/kernels/summary_interface.h +++ b/tensorflow/core/kernels/summary_interface.h @@ -17,6 +17,7 @@ limitations under the License. #include +#include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/util/event.pb.h" @@ -46,6 +47,9 @@ class SummaryWriterInterface : public ResourceBase { virtual Status WriteAudio(int64 global_step, Tensor t, const string& tag, int max_outputs_, float sample_rate) = 0; + virtual Status WriteGraph(int64 global_step, + std::unique_ptr graph) = 0; + virtual Status WriteEvent(std::unique_ptr e) = 0; }; diff --git a/tensorflow/core/kernels/summary_kernels.cc b/tensorflow/core/kernels/summary_kernels.cc index 1fe2fc5b66..3706f51cf4 100644 --- a/tensorflow/core/kernels/summary_kernels.cc +++ b/tensorflow/core/kernels/summary_kernels.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/tensorboard/db/summary_db_writer.h" +#include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/kernels/summary_interface.h" @@ -268,4 +269,28 @@ class WriteAudioSummaryOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("WriteAudioSummary").Device(DEVICE_CPU), WriteAudioSummaryOp); +class WriteGraphSummaryOp : public OpKernel { + public: + explicit WriteGraphSummaryOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + SummaryWriterInterface* s; + OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); + core::ScopedUnref unref(s); + const Tensor* t; + OP_REQUIRES_OK(ctx, ctx->input("global_step", &t)); + const int64 global_step = t->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("tensor", &t)); + std::unique_ptr graph{new GraphDef}; + if (!ParseProtoUnlimited(graph.get(), t->scalar()())) { + ctx->CtxFailureWithWarning( + errors::DataLoss("Bad tf.GraphDef binary proto tensor string")); + return; + } + OP_REQUIRES_OK(ctx, s->WriteGraph(global_step, std::move(graph))); + } +}; +REGISTER_KERNEL_BUILDER(Name("WriteGraphSummary").Device(DEVICE_CPU), + WriteGraphSummaryOp); + } // namespace tensorflow diff --git a/tensorflow/core/ops/summary_ops.cc b/tensorflow/core/ops/summary_ops.cc index 5efbac7ad7..7f6d8b06cd 100644 --- a/tensorflow/core/ops/summary_ops.cc +++ b/tensorflow/core/ops/summary_ops.cc @@ -256,4 +256,17 @@ sample_rate: The sample rate of the signal in hertz. max_outputs: Max number of batch elements to generate audio for. )doc"); +REGISTER_OP("WriteGraphSummary") + .Input("writer: resource") + .Input("global_step: int64") + .Input("tensor: string") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Writes a `GraphDef` protocol buffer to a `SummaryWriter`. + +writer: Handle of `SummaryWriter`. +global_step: The step to write the summary for. +tensor: A scalar string of the serialized tf.GraphDef proto. +)doc"); + } // namespace tensorflow diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py index cc46dd5162..3677aaa886 100644 --- a/tensorflow/tools/pip_package/pip_smoke_test.py +++ b/tensorflow/tools/pip_package/pip_smoke_test.py @@ -66,6 +66,9 @@ BLACKLIST = [ "//tensorflow/contrib/timeseries/examples:data/period_trend.csv", # pylint:disable=line-too-long "//tensorflow/contrib/timeseries/python/timeseries:test_utils", "//tensorflow/contrib/timeseries/python/timeseries/state_space_models:test_utils", # pylint:disable=line-too-long + + # TODO(yifeif): Remove when py_library(testonly=1) is ignored. + "//tensorflow/contrib/summary:summary_test_internal", ] -- GitLab From 9adc48d3083d33c3674b02787a2f1beeb66a4583 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Nov 2017 11:45:06 -0800 Subject: [PATCH 0460/1801] Don't enable dependency optimizer by default. PiperOrigin-RevId: 175857095 --- tensorflow/core/grappler/optimizers/meta_optimizer.cc | 3 ++- tensorflow/core/protobuf/rewriter_config.proto | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 1e93900e6a..1fa639ad33 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -76,7 +76,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back(std::unique_ptr( new ArithmeticOptimizer(cfg_.arithmetic_optimization()))); } - if (cfg_.dependency_optimization() != RewriterConfig::OFF) { + if (cfg_.dependency_optimization() == RewriterConfig::ON) { optimizers.push_back(std::unique_ptr( new DependencyOptimizer(cfg_.dependency_optimization()))); } @@ -187,6 +187,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) { return !cfg.disable_model_pruning() || cfg.layout_optimizer() == RewriterConfig::ON || cfg.constant_folding() != RewriterConfig::OFF || + cfg.dependency_optimization() == RewriterConfig::ON || cfg.arithmetic_optimization() != RewriterConfig::OFF || cfg.auto_parallel().enable() || cfg.memory_optimization() > 1 || !cfg.optimizers().empty(); diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 96b55ce04b..3b5d1563a2 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -35,7 +35,7 @@ message RewriterConfig { Toggle constant_folding = 3; // Arithmetic optimizations (default is ON) Toggle arithmetic_optimization = 7; - // Control dependency optimizations (default is ON). + // Control dependency optimizations (default is OFF). Toggle dependency_optimization = 8; // If true, don't remove unnecessary ops from the graph bool disable_model_pruning = 2; -- GitLab From 51e8b01c126d76a161b1957e8e1c6e87e5409910 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Nov 2017 12:13:13 -0800 Subject: [PATCH 0461/1801] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 175861269 --- tensorflow/go/op/wrappers.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 5a6ae4fa5f..b43c978245 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -20553,6 +20553,27 @@ func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { return op.Output(0) } +// Writes a `GraphDef` protocol buffer to a `SummaryWriter`. +// +// Arguments: +// writer: Handle of `SummaryWriter`. +// global_step: The step to write the summary for. +// tensor: A scalar string of the serialized tf.GraphDef proto. +// +// Returns the created operation. +func WriteGraphSummary(scope *Scope, writer tf.Output, global_step tf.Output, tensor tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "WriteGraphSummary", + Input: []tf.Input{ + writer, global_step, tensor, + }, + } + return scope.AddOperation(opspec) +} + // MaxPool3DGradGradAttr is an optional argument to MaxPool3DGradGrad. type MaxPool3DGradGradAttr func(optionalAttr) -- GitLab From b0a49cd0f46cbc4d326ee87ab92c28b4b7b9ead7 Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Wed, 15 Nov 2017 12:24:51 -0800 Subject: [PATCH 0462/1801] TensorArray changes: respect infer_shape argument inside the TensorArray object. This adds a new attr to the TensorArrayV3 construction op: identical_element_shapes (default False). If True, then shape for all elements is inferred at runtime when any single element is written. The Python TensorArray constructor's "infer_shape" argument is piped through to this attribute. Since it is true by default, this enables runtime element consistency checking on top of the existing graph build time static shape checking. PiperOrigin-RevId: 175862771 --- tensorflow/core/framework/node_def_util.cc | 4 +++ tensorflow/core/framework/node_def_util.h | 3 +++ tensorflow/core/framework/op_kernel.cc | 4 +++ tensorflow/core/framework/op_kernel.h | 3 +++ tensorflow/core/kernels/tensor_array.h | 23 ++++++++++++---- tensorflow/core/kernels/tensor_array_ops.cc | 27 ++++++++++++++----- tensorflow/core/ops/data_flow_ops.cc | 7 +++++ .../kernel_tests/tensor_array_ops_test.py | 23 +++++++++++++++- tensorflow/python/ops/tensor_array_ops.py | 10 ++++++- 9 files changed, 90 insertions(+), 14 deletions(-) diff --git a/tensorflow/core/framework/node_def_util.cc b/tensorflow/core/framework/node_def_util.cc index f039497f13..477184022d 100644 --- a/tensorflow/core/framework/node_def_util.cc +++ b/tensorflow/core/framework/node_def_util.cc @@ -243,6 +243,10 @@ DEFINE_GET_ATTR(Tensor, tensor, "tensor", emplace_back, t, Tensor t; DEFINE_GET_ATTR(NameAttrList, func, "func", emplace_back, v, ;); #undef DEFINE_GET_ATTR +bool HasNodeAttr(const NodeDef& node_def, StringPiece attr_name) { + return node_def.attr().find(attr_name.ToString()) != node_def.attr().end(); +} + static const string& kEmptyString = *new string(); const string& GetNodeAttrString(const AttrSlice& attrs, StringPiece attr_name) { diff --git a/tensorflow/core/framework/node_def_util.h b/tensorflow/core/framework/node_def_util.h index 523b538295..f6f28aac48 100644 --- a/tensorflow/core/framework/node_def_util.h +++ b/tensorflow/core/framework/node_def_util.h @@ -157,6 +157,9 @@ class AttrSlice { const AttrValueMap* attrs_; }; +// Return true if the attr with the name attr_name is defined in node_def. +bool HasNodeAttr(const NodeDef& node_def, StringPiece attr_name); + // Look up the attr with name attr_name and set *value to its value. If no // attr with attr_name is found in node_def, or the attr does not have // a matching type, a non-ok status will be returned. diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc index c23692409c..4d410809e7 100644 --- a/tensorflow/core/framework/op_kernel.cc +++ b/tensorflow/core/framework/op_kernel.cc @@ -192,6 +192,10 @@ OpKernelConstruction::OpKernelConstruction( graph_def_version_(graph_def_version), status_(status) {} +bool OpKernelConstruction::HasAttr(StringPiece attr_name) const { + return HasNodeAttr(def(), attr_name); +} + void OpKernelConstruction::SetStatus(const Status& status) { status_->Update(status); } diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h index 7eec84e26c..da0dc54943 100644 --- a/tensorflow/core/framework/op_kernel.h +++ b/tensorflow/core/framework/op_kernel.h @@ -301,6 +301,9 @@ class OpKernelConstruction { template Status GetAttr(StringPiece attr_name, T* value) const; + // Return true if the attr_name is defined in def(). + bool HasAttr(StringPiece attr_name) const; + // Return the device type. const DeviceType& device_type() const { return device_type_; } diff --git a/tensorflow/core/kernels/tensor_array.h b/tensorflow/core/kernels/tensor_array.h index 2a41d4c419..90b71e370c 100644 --- a/tensorflow/core/kernels/tensor_array.h +++ b/tensorflow/core/kernels/tensor_array.h @@ -138,8 +138,9 @@ class TensorArray : public ResourceBase { // users to construct this many Tensors for storage in a TensorArray. TensorArray(const string& key, const DataType& dtype, const Tensor& handle, int32 N, const PartialTensorShape& element_shape, - bool dynamic_size, bool multiple_writes_aggregate, bool is_grad, - int32 marked_size, bool clear_after_read) + bool identical_element_shapes, bool dynamic_size, + bool multiple_writes_aggregate, bool is_grad, int32 marked_size, + bool clear_after_read) : key_(key), dtype_(dtype), handle_(handle), @@ -151,6 +152,7 @@ class TensorArray : public ResourceBase { is_grad_(is_grad), marked_size_(marked_size), element_shape_(element_shape), + identical_element_shapes_(identical_element_shapes), tensors_(N) {} // Write PersistentTensor 'value' to index 'index'. @@ -320,6 +322,8 @@ class TensorArray : public ResourceBase { return !gradients_disallowed_; } + bool HasIdenticalElementShapes() const { return identical_element_shapes_; } + // Copy the TensorShapes from another TensorArray into this one. // The sizes of the two TensorArrays must match and this one // may not have any entries filled in. This performs a "soft copy", @@ -379,7 +383,7 @@ class TensorArray : public ResourceBase { // Multiple writes to the same index will result in summation of the // values (used by backprop) - bool multiple_writes_aggregate_; + const bool multiple_writes_aggregate_; // If multiple Writes were attempted (e.g. via attribute // multiple_writes_aggregate), then gradients are disallowed. @@ -387,10 +391,10 @@ class TensorArray : public ResourceBase { // After a read at an index, clear away its PersistentTensor to // release memory. - bool clear_after_read_; + const bool clear_after_read_; // True iff this is a gradient tensor array. - bool is_grad_; + const bool is_grad_; // The size of the TensorArray after a (legacy) unpack or split is performed. // -1 if there has been no unpack or split performed on the TensorArray. @@ -400,6 +404,13 @@ class TensorArray : public ResourceBase { // known at all. PartialTensorShape element_shape_ GUARDED_BY(mu_); + // Whether all elements in the TensorArray have identical shapes. + // This allows certain behaviors, like dynamically checking for + // consistent shapes on write, and being able to fill in properly + // shaped zero tensors on stack -- even if the initial element_shape + // was not fully defined. + const bool identical_element_shapes_; + // TensorAndState is used to keep track of the PersistentTensors // stored in the TensorArray, along with their shapes, and a boolean // that determines whether they have already been read or not. @@ -463,6 +474,8 @@ Status TensorArray::LockedWriteOrAggregate(OpKernelContext* ctx, " which is incompatible with the TensorArray's inferred element " "shape: ", element_shape_.DebugString(), " (consider setting infer_shape=False)."); + } else if (identical_element_shapes_ && !element_shape_.IsFullyDefined()) { + element_shape_ = PartialTensorShape(value_t->shape().dim_sizes()); } if (t.read) { diff --git a/tensorflow/core/kernels/tensor_array_ops.cc b/tensorflow/core/kernels/tensor_array_ops.cc index 2191e4e8c5..cca6d0e35f 100644 --- a/tensorflow/core/kernels/tensor_array_ops.cc +++ b/tensorflow/core/kernels/tensor_array_ops.cc @@ -162,6 +162,14 @@ class TensorArrayOp : public TensorArrayCreationOp { OP_REQUIRES_OK(context, context->GetAttr("dtype", &dtype_)); OP_REQUIRES_OK(context, context->GetAttr("element_shape", &element_shape_)); OP_REQUIRES_OK(context, context->GetAttr("dynamic_size", &dynamic_size_)); + // The HasAttr check is for backwards compatibility with older op + // versions which do not have this attribute. + if (context->HasAttr("identical_element_shapes")) { + OP_REQUIRES_OK(context, context->GetAttr("identical_element_shapes", + &identical_element_shapes_)); + } else { + identical_element_shapes_ = false; + } OP_REQUIRES_OK(context, context->GetAttr("clear_after_read", &clear_after_read_)); OP_REQUIRES_OK(context, @@ -196,8 +204,9 @@ class TensorArrayOp : public TensorArrayCreationOp { TensorArray* tensor_array = new TensorArray( key, dtype_, *tensor_array_output_handle, size, element_shape_, - dynamic_size_, false /* multiple_writes_aggregate */, - false /* is_grad */, -1 /* marked_size */, clear_after_read_); + identical_element_shapes_, dynamic_size_, + false /* multiple_writes_aggregate */, false /* is_grad */, + -1 /* marked_size */, clear_after_read_); TF_RETURN_IF_ERROR( rm->Create(ctx->step_container()->name(), key, tensor_array)); @@ -210,6 +219,7 @@ class TensorArrayOp : public TensorArrayCreationOp { private: DataType dtype_; PartialTensorShape element_shape_; + bool identical_element_shapes_; bool dynamic_size_; bool clear_after_read_; string tensor_array_name_; // The name used to create the TensorArray. @@ -322,7 +332,8 @@ class TensorArrayGradOp : public TensorArrayCreationOp { output_handle](TensorArray** ret) -> Status { *ret = new TensorArray( key, tensor_array->ElemType(), *tensor_array_output_handle, - array_size, tensor_array->ElemShape(), false /* dynamic_size */, + array_size, tensor_array->ElemShape(), + tensor_array->HasIdenticalElementShapes(), false /* dynamic_size */, true /* multiple_writes_aggregate */, true /* is_grad */, marked_size /* marked_size */, true /* close_after_read */); TF_RETURN_IF_ERROR((*ret)->CopyShapesFrom(tensor_array)); @@ -1003,8 +1014,9 @@ class TensorArrayUnpackOrScatterOp : public OpKernel { OP_REQUIRES_OK(ctx, ctx->input("value", &tensor_value)); TensorShape element_shape(tensor_value->shape()); - OP_REQUIRES(ctx, FastBoundsCheck(element_shape.dim_size(0), - std::numeric_limits::max()), + OP_REQUIRES(ctx, + FastBoundsCheck(element_shape.dim_size(0), + std::numeric_limits::max()), errors::InvalidArgument("tensor dim0 too large to unpack")); OP_REQUIRES( @@ -1204,8 +1216,9 @@ class TensorArraySplitOp : public OpKernel { errors::InvalidArgument( "Expected lengths to be a vector, received shape: ", tensor_lengths->shape().DebugString())); - OP_REQUIRES(ctx, FastBoundsCheck(tensor_lengths->NumElements(), - std::numeric_limits::max()), + OP_REQUIRES(ctx, + FastBoundsCheck(tensor_lengths->NumElements(), + std::numeric_limits::max()), errors::InvalidArgument( "Expected lengths to have < max int32 entries")); diff --git a/tensorflow/core/ops/data_flow_ops.cc b/tensorflow/core/ops/data_flow_ops.cc index 3b1ed217ce..ac2dc601f1 100644 --- a/tensorflow/core/ops/data_flow_ops.cc +++ b/tensorflow/core/ops/data_flow_ops.cc @@ -1346,6 +1346,7 @@ REGISTER_OP("TensorArrayV3") .Attr("element_shape: shape = { unknown_rank: true }") .Attr("dynamic_size: bool = false") .Attr("clear_after_read: bool = true") + .Attr("identical_element_shapes: bool = false") .Attr("tensor_array_name: string = ''") .Output("handle: resource") .Output("flow: float") @@ -1374,6 +1375,12 @@ dynamic_size: A boolean that determines whether writes to the TensorArray clear_after_read: If true (default), Tensors in the TensorArray are cleared after being read. This disables multiple read semantics but allows early release of memory. +identical_element_shapes: If true (default is false), then all + elements in the TensorArray will be expected to have have identical shapes. + This allows certain behaviors, like dynamically checking for + consistent shapes on write, and being able to fill in properly + shaped zero tensors on stack -- even if the element_shape attribute + is not fully defined. tensor_array_name: Overrides the name used for the temporary tensor_array resource. Default value is the name of the 'TensorArray' op (which is guaranteed unique). diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py index 0f3b11e7f9..835fdbe2aa 100644 --- a/tensorflow/python/kernel_tests/tensor_array_ops_test.py +++ b/tensorflow/python/kernel_tests/tensor_array_ops_test.py @@ -43,6 +43,10 @@ import tensorflow.python.ops.nn_grad # pylint: disable=unused-import from tensorflow.python.platform import test +# TODO(ebrevdo): Delete this line after Dec. 4, 2017. +tensor_array_ops._ENABLE_IDENTICAL_ELEMENT_SHAPES = True + + def _make_converter(tf_dtype): def _converter(x): if tf_dtype == dtypes.string: @@ -186,6 +190,22 @@ class TensorArrayTest(test.TestCase): def testTensorArrayReadOrPackNotAllValuesAvailableFillsZeros(self): self._testTensorArrayReadOrPackNotAllValuesAvailableFillsZeros() + def _testTensorArrayReadOrPackNotAllValuesAvailableInferShapeFillsZeros(self): + ta = tensor_array_ops.TensorArray( + dtype=dtypes.float32, + tensor_array_name="foo", + size=3) + self.assertAllEqual( + [[0.0, 0.0]], self.evaluate(ta.write(1, [[4.0, 5.0]]).read(0))) + self.assertAllEqual([[[0.0, 0.0]], [[4.0, 5.0]], [[0.0, 0.0]]], + self.evaluate(ta.write(1, [[4.0, 5.0]]).stack())) + self.assertAllEqual([[0.0, 0.0], [4.0, 5.0], [0.0, 0.0]], + self.evaluate(ta.write(1, [[4.0, 5.0]]).concat())) + + @test_util.run_in_graph_and_eager_modes() + def testTensorArrayReadOrPackNotAllValuesAvailableInferShapeFillsZeros(self): + self._testTensorArrayReadOrPackNotAllValuesAvailableInferShapeFillsZeros() + def _testTensorArrayUnpackRead(self, tf_dtype): with self.test_session(use_gpu=True): convert = _make_converter(tf_dtype) @@ -739,7 +759,8 @@ class TensorArrayTest(test.TestCase): def testTensorArrayGradientSplitConcat(self): with self.test_session(use_gpu=True) as session: ta = tensor_array_ops.TensorArray( - dtype=dtypes.float32, tensor_array_name="foo", size=2) + dtype=dtypes.float32, tensor_array_name="foo", size=2, + infer_shape=False) value = constant_op.constant( [[1.0, -1.0], [10.0, -10.0], [100.0, -100.0]]) diff --git a/tensorflow/python/ops/tensor_array_ops.py b/tensorflow/python/ops/tensor_array_ops.py index ea5354c1d6..605654d9be 100644 --- a/tensorflow/python/ops/tensor_array_ops.py +++ b/tensorflow/python/ops/tensor_array_ops.py @@ -36,6 +36,9 @@ from tensorflow.python.ops import gen_data_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.util import tf_should_use +# TODO(ebrevdo): Set to True in Dec. 4, 2017. +_ENABLE_IDENTICAL_ELEMENT_SHAPES = False + # _GraphTensorArray accesses many of the hidden generated ops, but is in # fact built to wrap these methods. @@ -146,6 +149,10 @@ class _GraphTensorArray(object): # write into the TensorArray from a Tensor with a set device # will retroactively set the device value of this op. def create(): + """Create the TensorArray op.""" + ta_kwargs = {} + if _ENABLE_IDENTICAL_ELEMENT_SHAPES: + ta_kwargs["identical_element_shapes"] = infer_shape return gen_data_flow_ops._tensor_array_v3( dtype=dtype, size=size, @@ -153,7 +160,8 @@ class _GraphTensorArray(object): dynamic_size=dynamic_size, clear_after_read=clear_after_read, tensor_array_name=tensor_array_name, - name=scope) + name=scope, + **ta_kwargs) if colocate_with_first_write_call: with ops.device(None), ops.colocate_with(None, ignore_existing=True): self._handle, self._flow = create() -- GitLab From 50b1bc79f640b08633ed970719ee46c17509af98 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 15 Nov 2017 12:36:51 -0800 Subject: [PATCH 0463/1801] Add test util for setting init_value of SumReduce, ReduceWindow, and SelectAndScatter ops to a Constant 0.0f. PiperOrigin-RevId: 175864310 --- tensorflow/compiler/xla/tests/BUILD | 3 + tensorflow/compiler/xla/tests/test_utils.cc | 69 ++++++++++++++++++++- tensorflow/compiler/xla/tests/test_utils.h | 11 ++++ 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 3e62481629..63c3541e14 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -69,7 +69,10 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_verifier", + "//tensorflow/compiler/xla/service:transfer_manager", "//tensorflow/core:lib", + "//tensorflow/core:stream_executor_headers_lib", ], ) diff --git a/tensorflow/compiler/xla/tests/test_utils.cc b/tensorflow/compiler/xla/tests/test_utils.cc index cdd3d66bbb..0d56c9f483 100644 --- a/tensorflow/compiler/xla/tests/test_utils.cc +++ b/tensorflow/compiler/xla/tests/test_utils.cc @@ -14,8 +14,9 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/tests/test_utils.h" - #include "tensorflow/compiler/xla/primitive_util.h" +#include "tensorflow/compiler/xla/service/hlo_verifier.h" +#include "tensorflow/compiler/xla/service/transfer_manager.h" namespace xla { @@ -46,6 +47,44 @@ void PopulateWithRandomIntegralData(Literal* literal) { })); } +bool LooksLikeSum(const HloInstruction& instruction) { + return instruction.opcode() == HloOpcode::kAdd && + instruction.operand(0)->opcode() == HloOpcode::kParameter && + instruction.operand(1)->opcode() == HloOpcode::kParameter && + instruction.operand(0) != instruction.operand(1); +} + +// Given an instruction and operand number, replace the given operand with +// a Literal Constant Zero. Handle the case of a fusion instruction by +// replacing the fusion's parent's parameter with a Literal Constant Zero, +// unless the fusion's parent is itself a fusion. +Status MaybeReplaceParameterInputWithZero(HloInstruction* const instruction, + const int64 operand_number) { + CHECK_LT(operand_number, instruction->operand_count()); + if (instruction->operand(operand_number)->opcode() != HloOpcode::kParameter) { + return Status::OK(); + } + + HloComputation* const computation = instruction->parent(); + std::unique_ptr zero = HloInstruction::CreateConstant( + MakeUnique(Literal::Zero(instruction->shape().element_type()))); + + if (computation->IsFusionComputation()) { + HloInstruction* const fusion_instruction = computation->FusionInstruction(); + if (fusion_instruction->IsFused()) { + return Unimplemented( + "Unable to replace fused parameter of fusion instruction"); + } + TF_RETURN_IF_ERROR(fusion_instruction->ReplaceOperandWith( + instruction->operand(operand_number)->parameter_number(), + fusion_instruction->parent()->AddInstruction(std::move(zero)))); + } else { + TF_RETURN_IF_ERROR(instruction->ReplaceOperandWith( + operand_number, computation->AddInstruction(std::move(zero)))); + } + return Status::OK(); +} + } // namespace StatusOr> MakeFakeLiteral(const Shape& shape) { @@ -117,4 +156,32 @@ StatusOr>> MakeFakeArguments( return std::move(arguments); } +Status ReplaceInitsWithConstants(HloModule* const module) { + for (HloComputation* const computation : module->computations()) { + for (HloInstruction* const instruction : computation->instructions()) { + const HloOpcode opcode = instruction->opcode(); + if ((opcode == HloOpcode::kReduce || + opcode == HloOpcode::kReduceWindow) && + LooksLikeSum(*instruction->to_apply()->root_instruction())) { + TF_RETURN_IF_ERROR(MaybeReplaceParameterInputWithZero(instruction, 1)); + } else if (opcode == HloOpcode::kSelectAndScatter && + LooksLikeSum(*instruction->scatter()->root_instruction())) { + TF_RETURN_IF_ERROR(MaybeReplaceParameterInputWithZero(instruction, 2)); + } + } + } + return Status::OK(); +} + +Status VerifyHloModule(const perftools::gputools::Platform& platform, + HloModule* const module) { + return HloVerifier( + std::bind( + &TransferManager::GetByteSizeRequirement, + TransferManager::GetForPlatform(&platform).ConsumeValueOrDie(), + std::placeholders::_1)) + .Run(module) + .status(); +} + } // namespace xla diff --git a/tensorflow/compiler/xla/tests/test_utils.h b/tensorflow/compiler/xla/tests/test_utils.h index 12d5255fce..9aca162a18 100644 --- a/tensorflow/compiler/xla/tests/test_utils.h +++ b/tensorflow/compiler/xla/tests/test_utils.h @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/stream_executor/platform.h" namespace xla { @@ -62,6 +63,16 @@ StatusOr> MakeFakeLiteral(const Shape& shape); StatusOr>> MakeFakeArguments( const HloModule& module); +// Reductions using Adds, ReduceWindow, and SelectAndScatter, require their +// init_value to be replaced with the constant 0.0f when testing, otherwise we +// may generate a bad init_value when looking at the op in isolation. +Status ReplaceInitsWithConstants(HloModule* const module); + +// Check that a given module satisfies various constraints before trying to +// execute it. +Status VerifyHloModule(const perftools::gputools::Platform& platform, + HloModule* const module); + } // namespace xla #endif // TENSORFLOW_COMPILER_XLA_TESTS_TEST_UTILS_H_ -- GitLab From 38526cfaebf42f00da5a745ce0647f67f6076c58 Mon Sep 17 00:00:00 2001 From: Neal Wu Date: Wed, 15 Nov 2017 12:39:56 -0800 Subject: [PATCH 0464/1801] Add a link to the guide on "Using savedmodel with estimators" to the documentation for export_savedmodel PiperOrigin-RevId: 175864747 --- tensorflow/python/estimator/estimator.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 2d036e2cfb..f267f4a54e 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -461,8 +461,12 @@ class Estimator(object): assets_extra=None, as_text=False, checkpoint_path=None): + # pylint: disable=line-too-long """Exports inference graph as a SavedModel into given dir. + For a detailed guide, see + @{$saved_model#using_savedmodel_with_estimators$Using SavedModel with Estimators}. + This method builds a new graph by first calling the serving_input_receiver_fn to obtain feature `Tensor`s, and then calling this `Estimator`'s model_fn to generate the model graph based on those @@ -506,6 +510,7 @@ class Estimator(object): ValueError: if no serving_input_receiver_fn is provided, no export_outputs are provided, or no checkpoint can be found. """ + # pylint: enable=line-too-long if serving_input_receiver_fn is None: raise ValueError('serving_input_receiver_fn must be defined.') -- GitLab From 04a63c763e25c4f21f22d6d27757f4022d138b8d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Nov 2017 12:41:43 -0800 Subject: [PATCH 0465/1801] Adds a SetInvalidOutput method to XlaOpKernelContext. PiperOrigin-RevId: 175865046 --- tensorflow/compiler/tf2xla/kernels/const_op.cc | 5 +++++ tensorflow/compiler/tf2xla/xla_op_kernel.cc | 10 ++++++++++ tensorflow/compiler/tf2xla/xla_op_kernel.h | 4 ++++ 3 files changed, 19 insertions(+) diff --git a/tensorflow/compiler/tf2xla/kernels/const_op.cc b/tensorflow/compiler/tf2xla/kernels/const_op.cc index 9833323d85..8f78b4c8f9 100644 --- a/tensorflow/compiler/tf2xla/kernels/const_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/const_op.cc @@ -40,6 +40,11 @@ class ConstOp : public XlaOpKernel { void Compile(XlaOpKernelContext* ctx) override { TensorShape shape(proto_.tensor_shape()); + if (proto_.dtype() == DT_STRING) { + LOG(WARNING) << "Not computing Const of type DT_STRING"; + ctx->SetInvalidOutput(0); + return; + } xla::ComputationBuilder* b = ctx->builder(); // To avoid blowups for large constants filled with the same value, diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc index b948dfee6a..a052bb105e 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc @@ -345,6 +345,16 @@ void XlaOpKernelContext::SetConstantOutput(int index, const Tensor& constant) { expression->set_constant_value(constant); } +void XlaOpKernelContext::SetInvalidOutput(int index) { + const TensorShape shape; + Tensor* output = nullptr; + OP_REQUIRES_OK(context_, context_->allocate_output(index, shape, &output)); + XlaExpression* expression = CastExpressionFromUninitializedTensor(output); + xla::ComputationDataHandle handle; + handle.set_handle(0); + expression->set_handle(handle); +} + void XlaOpKernelContext::SetResourceOutput(int index, XlaResource* resource) { Tensor* output = nullptr; // The shape of the output tensor is the shape of the resource itself diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.h b/tensorflow/compiler/tf2xla/xla_op_kernel.h index 5519e89252..76bcf594e6 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.h +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.h @@ -142,6 +142,10 @@ class XlaOpKernelContext { // SetConstantOutput where possible. void SetConstantOutput(int index, const Tensor& host_tensor); + // Sets output 'index' to an invalid value. + // Any subsequent attempt to consume this output will cause an error. + void SetInvalidOutput(int index); + // Status handling. void SetStatus(const Status& status) { context_->SetStatus(status); } Status status() { return context_->status(); } -- GitLab From 82bc287dd183118f5048b10ec473e5b4ea939c7f Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 15 Nov 2017 12:43:59 -0800 Subject: [PATCH 0466/1801] Internal change. PiperOrigin-RevId: 175865309 --- tensorflow/core/api_def/api_test.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/api_def/api_test.cc b/tensorflow/core/api_def/api_test.cc index d95d958d5a..f222d345ab 100644 --- a/tensorflow/core/api_def/api_test.cc +++ b/tensorflow/core/api_def/api_test.cc @@ -272,7 +272,10 @@ void RunApiTest(bool update_api_def, const string& api_files_dir) { for (auto new_api_entry : new_api_defs_map) { const auto& file_path = new_api_entry.first; - const auto& golden_api_defs_str = golden_api_defs_map.at(file_path); + std::string golden_api_defs_str = ""; + if (golden_api_defs_map.find(file_path) != golden_api_defs_map.end()) { + golden_api_defs_str = golden_api_defs_map.at(file_path); + } string new_api_defs_str = new_api_entry.second.DebugString(); new_api_defs_str = PBTxtToMultiline(new_api_defs_str, multi_line_fields); if (golden_api_defs_str == new_api_defs_str) { -- GitLab From 2411c68c35849559efb97ce2392d4505ac4d8cf0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Nov 2017 12:50:05 -0800 Subject: [PATCH 0467/1801] Update ops-related pbtxt files. PiperOrigin-RevId: 175866123 --- .../core/ops/compat/ops_history.v1.pbtxt | 57 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 8 +++ 2 files changed, 65 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 6833c8e0ea..ffb608d600 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -40245,6 +40245,63 @@ op { } is_stateful: true } +op { + name: "TensorArrayV3" + input_arg { + name: "size" + type: DT_INT32 + } + output_arg { + name: "handle" + type: DT_RESOURCE + } + output_arg { + name: "flow" + type: DT_FLOAT + } + attr { + name: "dtype" + type: "type" + } + attr { + name: "element_shape" + type: "shape" + default_value { + shape { + unknown_rank: true + } + } + } + attr { + name: "dynamic_size" + type: "bool" + default_value { + b: false + } + } + attr { + name: "clear_after_read" + type: "bool" + default_value { + b: true + } + } + attr { + name: "identical_element_shapes" + type: "bool" + default_value { + b: false + } + } + attr { + name: "tensor_array_name" + type: "string" + default_value { + s: "" + } + } + is_stateful: true +} op { name: "TensorArrayWrite" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 68fc61150c..30b293a28a 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -31796,6 +31796,14 @@ op { } description: "If true (default), Tensors in the TensorArray are cleared\nafter being read. This disables multiple read semantics but allows early\nrelease of memory." } + attr { + name: "identical_element_shapes" + type: "bool" + default_value { + b: false + } + description: "If true (default is false), then all\nelements in the TensorArray will be expected to have have identical shapes.\nThis allows certain behaviors, like dynamically checking for\nconsistent shapes on write, and being able to fill in properly\nshaped zero tensors on stack -- even if the element_shape attribute\nis not fully defined." + } attr { name: "tensor_array_name" type: "string" -- GitLab From f4b6effba238fbce2c3c66d24ab276c61eda9fc1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Nov 2017 12:57:13 -0800 Subject: [PATCH 0468/1801] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 175867164 --- tensorflow/go/op/wrappers.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index b43c978245..869213eb17 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -14797,6 +14797,21 @@ func TensorArrayV3ClearAfterRead(value bool) TensorArrayV3Attr { } } +// TensorArrayV3IdenticalElementShapes sets the optional identical_element_shapes attribute to value. +// +// value: If true (default is false), then all +// elements in the TensorArray will be expected to have have identical shapes. +// This allows certain behaviors, like dynamically checking for +// consistent shapes on write, and being able to fill in properly +// shaped zero tensors on stack -- even if the element_shape attribute +// is not fully defined. +// If not specified, defaults to false +func TensorArrayV3IdenticalElementShapes(value bool) TensorArrayV3Attr { + return func(m optionalAttr) { + m["identical_element_shapes"] = value + } +} + // TensorArrayV3TensorArrayName sets the optional tensor_array_name attribute to value. // // value: Overrides the name used for the temporary tensor_array -- GitLab From a86ce6a3255ce0aa1e0d237c6235ea3e4cafd739 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 15 Nov 2017 13:17:31 -0800 Subject: [PATCH 0469/1801] Removes eager memory trace. Has been broken and unrunnable for a while (since EagerTensor went to C). If we want its functionality we can recover it from the C tape code I believe. PiperOrigin-RevId: 175869936 --- tensorflow/contrib/eager/python/tfe.py | 4 -- tensorflow/python/eager/BUILD | 7 -- tensorflow/python/eager/core.py | 26 -------- tensorflow/python/eager/execute.py | 8 --- tensorflow/python/eager/memory_trace.py | 89 ------------------------- tensorflow/python/framework/ops.py | 5 -- 6 files changed, 139 deletions(-) delete mode 100644 tensorflow/python/eager/memory_trace.py diff --git a/tensorflow/contrib/eager/python/tfe.py b/tensorflow/contrib/eager/python/tfe.py index 577d3efef6..1697c879de 100644 --- a/tensorflow/contrib/eager/python/tfe.py +++ b/tensorflow/contrib/eager/python/tfe.py @@ -30,9 +30,6 @@ To use, at program startup, call `tfe.enable_eager_execution()`. @@value_and_gradients_function @@GradientTape -@@enable_tracing -@@flush_trace - @@run @@enable_eager_execution @@ -91,7 +88,6 @@ from tensorflow.python.eager.context import in_eager_mode from tensorflow.python.eager.context import in_graph_mode from tensorflow.python.eager.context import list_devices from tensorflow.python.eager.context import num_gpus -from tensorflow.python.eager.core import enable_tracing from tensorflow.python.eager.custom_gradient import custom_gradient from tensorflow.python.eager.execution_callbacks import add_execution_callback from tensorflow.python.eager.execution_callbacks import clear_execution_callbacks diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index 912aa4c195..b491a637ba 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -61,7 +61,6 @@ py_library( visibility = ["//tensorflow:internal"], deps = [ ":context", - ":memory_trace", "//tensorflow/python:errors", "//tensorflow/python:pywrap_tensorflow", ], @@ -88,12 +87,6 @@ py_library( visibility = ["//tensorflow:internal"], ) -py_library( - name = "memory_trace", - srcs = ["memory_trace.py"], - srcs_version = "PY2AND3", -) - cuda_py_test( name = "tensor_test", srcs = ["tensor_test.py"], diff --git a/tensorflow/python/eager/core.py b/tensorflow/python/eager/core.py index 3f3d38b951..483b717210 100644 --- a/tensorflow/python/eager/core.py +++ b/tensorflow/python/eager/core.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function from tensorflow.python import pywrap_tensorflow -from tensorflow.python.eager import memory_trace from tensorflow.python.framework import errors # Trace of execution and memory usage. @@ -48,28 +47,3 @@ class _NotOkStatusException(Exception): pywrap_tensorflow.TFE_Py_RegisterExceptionClass(_NotOkStatusException) - - -def enable_tracing(): - """Enables tracing of execution and memory usage. - - WARNING: tracing is not thread-safe. - """ - # TODO(alive): Add code example in doc string. - global _active_trace - _active_trace = memory_trace.MemoryTrace() - - -def flush_trace(): - """Flushes the active trace, if it exists. - - WARNING: tracing is not thread-safe. - """ - # TODO(alive): Add code example in doc string. - if _active_trace is not None: - _active_trace.flush_trace() - - -def active_trace(): - """Returns the current global active trace of execution and memory usage.""" - return _active_trace diff --git a/tensorflow/python/eager/execute.py b/tensorflow/python/eager/execute.py index 1b5f3f7f9d..e392c6bb53 100644 --- a/tensorflow/python/eager/execute.py +++ b/tensorflow/python/eager/execute.py @@ -65,15 +65,7 @@ def execute(op_name, num_outputs, inputs, attrs, ctx, name=None): message = e.message six.raise_from(core._status_to_exception(e.code, message), None) - # TODO(alive, cais): Use the execution callback mechanism. - if core.active_trace() is not None: - for t in tensors: - core.active_trace().record_tensor(op_name, - ops.tensor_id(t), - t.device, - t.shape.num_elements()) # pylint: enable=protected-access - # TODO(cais): Optimize this, perhaps by replacing this execute function with # a different one when there are execution callback(s). for callback in ctx.post_execution_callbacks: diff --git a/tensorflow/python/eager/memory_trace.py b/tensorflow/python/eager/memory_trace.py deleted file mode 100644 index 094bcab9e2..0000000000 --- a/tensorflow/python/eager/memory_trace.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utility to trace per-device memory consumption across time over execution.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections - -TraceEntry = collections.namedtuple( - "TraceEntry", ["op_name", "tensor_id", "mem_usage", "device", "size"]) -TensorData = collections.namedtuple( - "TensorData", ["op_name", "tensor_size", "device"]) - - -class MemoryTrace(object): - """Records a trace of memory usage over operation execution.""" - - def __init__(self): - - self.trace = [] - self.tensor_to_data = {} - self.current_device_mem_usage = collections.defaultdict(int) - - def record_tensor(self, op_name, tensor_id, device, size): - self.current_device_mem_usage[device] += size - self.tensor_to_data[tensor_id] = TensorData(op_name, size, device) - self.trace.append(TraceEntry(op_name, - tensor_id, - dict(self.current_device_mem_usage.items()), - device, - size)) - - def delete_tensor(self, tensor_id): - if tensor_id not in self.tensor_to_data: - return - data = self.tensor_to_data.pop(tensor_id, None) - if data is None: return - self.current_device_mem_usage[data.device] -= data.tensor_size - self.trace.append(TraceEntry(data.op_name, - tensor_id, - dict(self.current_device_mem_usage.items()), - data.device, - -data.tensor_size)) - - def flush_trace(self): - """Prints the formatted trace recorded so far.""" - longest_op_name = max(len(t.op_name) for t in self.trace) - longest_op_name = max(longest_op_name, len("op_name")) - longest_heap_size = max(max(len(str(d)) for d in t.mem_usage) - for t in self.trace) - longest_heap_size = max(longest_heap_size, len("d0")) - longest_id_len = max(len(str(t.tensor_id)) for t in self.trace) - longest_id_len = max(longest_id_len, 2) - first_line = [] - first_line.append("+/-") - first_line.append("op_name".ljust(longest_op_name)) - first_line.append("id".ljust(longest_id_len)) - for i in range(len(self.current_device_mem_usage)): - first_line.append(("d"+str(i)).ljust(longest_heap_size)) - first_line.append("size") - print(" | ".join(first_line)) - for t in self.trace: - line = [] - if t.size > 0: - line.append("+ ") - else: - line.append("- ") - line.append(t.op_name.ljust(longest_op_name)) - line.append(str(t.tensor_id).ljust(longest_id_len)) - for d in t.mem_usage: - line.append(str(d).ljust(longest_heap_size)) - line.append(str(t.size)) - print(" | ".join(line)) - self.trace = [] - print() diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 09e0a83c76..0e647a27f5 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -715,11 +715,6 @@ class _EagerTensorBase(Tensor): new_tensor = self._copy_to_device(context=ctx._handle, device=device_name) except core._NotOkStatusException as e: six.raise_from(core._status_to_exception(e.code, e.message), None) - if core.active_trace() is not None: - core.active_trace().record_tensor("COPY", - tensor_id(new_tensor), - new_tensor.device, - new_tensor.shape.num_elements()) # Record the copy on tape and define backprop copy as well. if not context.in_graph_mode(): -- GitLab From 3caceafb9e6db900e11eccf697a56144e019cf9c Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Wed, 15 Nov 2017 13:30:20 -0800 Subject: [PATCH 0470/1801] Final steps for updating tf.keras to the Keras 2.1.1 API. PiperOrigin-RevId: 175871749 --- tensorflow/python/keras/BUILD | 12 ++ .../python/keras/_impl/keras/__init__.py | 2 +- .../python/keras/_impl/keras/backend.py | 25 ++- .../python/keras/_impl/keras/backend_test.py | 49 +++++ .../keras/_impl/keras/callbacks_test.py | 4 +- .../keras/_impl/keras/datasets/cifar10.py | 9 +- .../keras/_impl/keras/engine/training.py | 196 +++++++++++------- .../python/keras/_impl/keras/layers/merge.py | 2 +- .../keras/_impl/keras/layers/recurrent.py | 113 ++++------ .../_impl/keras/layers/recurrent_test.py | 29 ++- .../keras/_impl/keras/layers/wrappers.py | 3 +- tensorflow/python/keras/_impl/keras/losses.py | 5 +- .../python/keras/_impl/keras/losses_test.py | 54 +++++ tensorflow/python/keras/_impl/keras/models.py | 195 ++++++++++------- .../python/keras/_impl/keras/models_test.py | 18 ++ .../keras/_impl/keras/utils/data_utils.py | 78 +++++-- .../_impl/keras/utils/data_utils_test.py | 66 +++++- .../keras/_impl/keras/utils/generic_utils.py | 3 +- .../keras/_impl/keras/utils/np_utils.py | 8 +- .../keras/_impl/keras/utils/np_utils_test.py | 52 +++++ .../keras/_impl/keras/utils/training_utils.py | 10 +- .../golden/tensorflow.keras.-sequential.pbtxt | 6 +- .../tensorflow.keras.models.-sequential.pbtxt | 6 +- 23 files changed, 671 insertions(+), 274 deletions(-) create mode 100644 tensorflow/python/keras/_impl/keras/utils/np_utils_test.py diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index a46a92cd0c..e4992afbca 100644 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -590,6 +590,18 @@ py_test( ], ) +py_test( + name = "np_utils_test", + size = "small", + srcs = ["_impl/keras/utils/np_utils_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":keras", + "//tensorflow/python:client_testlib", + "//third_party/py/numpy", + ], +) + py_test( name = "training_utils_test", size = "medium", diff --git a/tensorflow/python/keras/_impl/keras/__init__.py b/tensorflow/python/keras/_impl/keras/__init__.py index f0e8d91a92..74cc9d0488 100644 --- a/tensorflow/python/keras/_impl/keras/__init__.py +++ b/tensorflow/python/keras/_impl/keras/__init__.py @@ -40,4 +40,4 @@ from tensorflow.python.keras._impl.keras.layers import Input from tensorflow.python.keras._impl.keras.models import Model from tensorflow.python.keras._impl.keras.models import Sequential -__version__ = '2.0.8-tf' +__version__ = '2.1.1-tf' diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py index f9a53c4eb4..b029e5161f 100644 --- a/tensorflow/python/keras/_impl/keras/backend.py +++ b/tensorflow/python/keras/_impl/keras/backend.py @@ -2486,11 +2486,21 @@ def print_tensor(x, message=''): class Function(object): """Runs a computation graph. + It's possible to pass arguments to `tf.Session.run()` via `session_kwargs`. + In particular additonal operations via `fetches` argument and additional + tensor substitutions via `feed_dict` arguments. Note that given + substitutions are merged with substitutions from `inputs`. Even though + `feed_dict` is passed once in the constructor (called in `model.compile()`) + we can modify the values in the dictionary. Through this feed_dict we can + provide additional substitutions besides Keras inputs. + Arguments: inputs: Feed placeholders to the computation graph. outputs: Output tensors to fetch. updates: Additional update ops to be run at function call. - name: a name to help users identify what this function does. + name: A name to help users identify what this function does. + session_kwargs: Arguments to `tf.Session.run()`: `fetches`, `feed_dict`, + `options`, `run_metadata` """ def __init__(self, inputs, outputs, updates=None, name=None, @@ -2518,12 +2528,18 @@ class Function(object): updates_ops.append(update) self.updates_op = control_flow_ops.group(*updates_ops) self.name = name + # additional tensor substitutions + self.feed_dict = session_kwargs.pop('feed_dict', {}) + # additional operations + self.fetches = session_kwargs.pop('fetches', []) + if not isinstance(self.fetches, list): + self.fetches = [self.fetches] self.session_kwargs = session_kwargs def __call__(self, inputs): if not isinstance(inputs, (list, tuple)): raise TypeError('`inputs` should be a list or tuple.') - feed_dict = {} + feed_dict = self.feed_dict.copy() for tensor, value in zip(self.inputs, inputs): if is_sparse(tensor): sparse_coo = value.tocoo() @@ -2531,11 +2547,10 @@ class Function(object): np.expand_dims(sparse_coo.col, 1)), 1) value = (indices, sparse_coo.data, sparse_coo.shape) feed_dict[tensor] = value + fetches = self.outputs + [self.updates_op] + self.fetches session = get_session() updated = session.run( - self.outputs + [self.updates_op], - feed_dict=feed_dict, - **self.session_kwargs) + fetches=fetches, feed_dict=feed_dict, **self.session_kwargs) return updated[:len(self.outputs)] diff --git a/tensorflow/python/keras/_impl/keras/backend_test.py b/tensorflow/python/keras/_impl/keras/backend_test.py index 5eaae31d92..e45e566dca 100644 --- a/tensorflow/python/keras/_impl/keras/backend_test.py +++ b/tensorflow/python/keras/_impl/keras/backend_test.py @@ -165,6 +165,55 @@ class BackendUtilsTest(test.TestCase): for y in ys: self.assertEqual(y.op.name[:12], 'StopGradient') + def test_function_tf_fetches(self): + # Additional operations can be passed to tf.Session().run() via its + # `fetches` arguments. In contrast to `updates` argument of + # keras.backend.function() these do not have control dependency on `outputs` + # so they can run in parallel. Also they should not contribute to output of + # keras.backend.function(). + with self.test_session(): + x = keras.backend.variable(0.) + y = keras.backend.variable(0.) + x_placeholder = keras.backend.placeholder(shape=()) + y_placeholder = keras.backend.placeholder(shape=()) + + f = keras.backend.function(inputs=[x_placeholder, y_placeholder], + outputs=[x_placeholder + y_placeholder], + updates=[(x, x_placeholder + 1.)], + fetches=[keras.backend.update(y, 5.)]) + output = f([10., 20.]) + assert output == [30.] + assert keras.backend.get_session().run(fetches=[x, y]) == [11., 5.] + + def test_function_tf_feed_dict(self): + # Additional substitutions can be passed to `tf.Session().run()` via its + # `feed_dict` arguments. Note that the feed_dict is passed once in the + # constructor but we can modify the values in the dictionary. Through + # this feed_dict we can provide additional substitutions besides Keras + # inputs. + with self.test_session(): + x = keras.backend.variable(0.) + y = keras.backend.variable(0.) + x_placeholder = keras.backend.placeholder(shape=()) + y_placeholder = keras.backend.placeholder(shape=()) + + feed_dict = {y_placeholder: 3.} + fetches = [keras.backend.update(y, y_placeholder * 10.)] + f = keras.backend.function(inputs=[x_placeholder], + outputs=[x_placeholder + 1.], + updates=[(x, x_placeholder + 10.)], + feed_dict=feed_dict, + fetches=fetches) + output = f([10.]) + assert output == [11.] + assert keras.backend.get_session().run(fetches=[x, y]) == [20., 30.] + + # updated value in feed_dict will be modified within the K.function() + feed_dict[y_placeholder] = 4. + output = f([20.]) + assert output == [21.] + assert keras.backend.get_session().run(fetches=[x, y]) == [30., 40.] + class BackendVariableTest(test.TestCase): diff --git a/tensorflow/python/keras/_impl/keras/callbacks_test.py b/tensorflow/python/keras/_impl/keras/callbacks_test.py index 6924a8926b..97a650a992 100644 --- a/tensorflow/python/keras/_impl/keras/callbacks_test.py +++ b/tensorflow/python/keras/_impl/keras/callbacks_test.py @@ -273,12 +273,12 @@ class KerasCallbacksTest(test.TestCase): stopper = keras.callbacks.EarlyStopping(monitor='acc', patience=patience) weights = model.get_weights() - hist = model.fit(data, labels, callbacks=[stopper], verbose=0) + hist = model.fit(data, labels, callbacks=[stopper], verbose=0, epochs=20) assert len(hist.epoch) >= patience # This should allow training to go for at least `patience` epochs model.set_weights(weights) - hist = model.fit(data, labels, callbacks=[stopper], verbose=0) + hist = model.fit(data, labels, callbacks=[stopper], verbose=0, epochs=20) assert len(hist.epoch) >= patience def test_RemoteMonitor(self): diff --git a/tensorflow/python/keras/_impl/keras/datasets/cifar10.py b/tensorflow/python/keras/_impl/keras/datasets/cifar10.py index 4a68789015..7905da66c1 100644 --- a/tensorflow/python/keras/_impl/keras/datasets/cifar10.py +++ b/tensorflow/python/keras/_impl/keras/datasets/cifar10.py @@ -39,14 +39,13 @@ def load_data(): num_train_samples = 50000 - x_train = np.zeros((num_train_samples, 3, 32, 32), dtype='uint8') - y_train = np.zeros((num_train_samples,), dtype='uint8') + x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') + y_train = np.empty((num_train_samples,), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) - data, labels = load_batch(fpath) - x_train[(i - 1) * 10000:i * 10000, :, :, :] = data - y_train[(i - 1) * 10000:i * 10000] = labels + (x_train[(i - 1) * 10000:i * 10000, :, :, :], + y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index e6d29c4968..b4205bf4a3 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -108,7 +108,7 @@ def _standardize_input_data(data, arrays = data elif data.__class__.__name__ == 'DataFrame': # test if data is a DataFrame, without pandas installed - data = data.values + arrays = data.values else: if not hasattr(data, 'shape'): raise TypeError('Error when checking model ' + exception_prefix + @@ -271,12 +271,13 @@ def _check_loss_and_target_compatibility(targets, loss_fns, output_shapes): is incompatible with an output. """ key_losses = { - 'mean_squared_error', 'binary_crossentropy', 'categorical_crossentropy' + losses.mean_squared_error, losses.binary_crossentropy, + losses.categorical_crossentropy } for y, loss, shape in zip(targets, loss_fns, output_shapes): if loss is None: continue - if loss.__name__ == 'categorical_crossentropy': + if loss is losses.categorical_crossentropy: if y.shape[-1] == 1: raise ValueError('You are passing a target array of shape ' + str( y.shape) + ' while using as loss `categorical_crossentropy`. ' @@ -286,14 +287,14 @@ def _check_loss_and_target_compatibility(targets, loss_fns, output_shapes): 'If your targets are integer classes, ' 'you can convert them to the expected format via:\n' '```\n' - 'from keras.utils.np_utils import to_categorical\n' + 'from keras.utils import to_categorical\n' 'y_binary = to_categorical(y_int)\n' '```\n' '\n' 'Alternatively, you can use the loss function ' '`sparse_categorical_crossentropy` instead, ' 'which does expect integer targets.') - if loss.__name__ in key_losses: + if loss in key_losses: for target_dim, out_dim in zip(y.shape[1:], shape[1:]): if out_dim is not None and target_dim != out_dim: raise ValueError('A target array with shape ' + str(y.shape) + @@ -584,7 +585,7 @@ class Model(Network): """Configures the model for training. Arguments: - optimizer: String (name of optimizer) or optimizer object. + optimizer: String (name of optimizer) or optimizer instance. See [optimizers](/optimizers). loss: String (name of objective function) or objective function. See [losses](/losses). @@ -623,9 +624,7 @@ class Model(Network): can specify them via the `target_tensors` argument. It can be a single tensor (for a single-output model), a list of tensors, or a dict mapping output names to target tensors. - **kwargs: When using the Theano/CNTK backends, these arguments - are passed into K.function. When using the TensorFlow backend, - these arguments are passed into `tf.Session.run`. + **kwargs: These arguments are passed to `tf.Session.run`. Raises: ValueError: In case of invalid arguments for @@ -1413,10 +1412,8 @@ class Model(Network): output_shapes = [] for output_shape, loss_fn in zip(self._feed_output_shapes, self._feed_loss_fns): - if loss_fn.__name__ == 'sparse_categorical_crossentropy': + if loss_fn is losses.sparse_categorical_crossentropy: output_shapes.append(output_shape[:-1] + (1,)) - elif getattr(losses, loss_fn.__name__, None) is None: - output_shapes.append(None) else: output_shapes.append(output_shape) x = _standardize_input_data( @@ -1484,60 +1481,76 @@ class Model(Network): """Trains the model for a fixed number of epochs (iterations on a dataset). Arguments: - x: Numpy array of training data, - or list of Numpy arrays if the model has multiple inputs. - If all inputs in the model are named, - you can also pass a dictionary - mapping input names to Numpy arrays. - Can be `None` (default) if feeding from framework-native tensors. - y: Numpy array of target data, - or list of Numpy arrays if the model has multiple outputs. - If all outputs in the model are named, - you can also pass a dictionary - mapping output names to Numpy arrays. + x: Numpy array of training data (if the model has a single input), + or list of Numpy arrays (if the model has multiple inputs). + If input layers in the model are named, you can also pass a + dictionary mapping input names to Numpy arrays. + `x` can be `None` (default) if feeding from + TensorFlow data tensors. + y: Numpy array of target (label) data + (if the model has a single output), + or list of Numpy arrays (if the model has multiple outputs). + If output layers in the model are named, you can also pass a + dictionary mapping output names to Numpy arrays. + `y` can be `None` (default) if feeding from + TensorFlow data tensors. Can be `None` (default) if feeding from framework-native tensors. batch_size: Integer or `None`. Number of samples per gradient update. If unspecified, it will default to 32. - epochs: Integer, the number of times to iterate - over the training data arrays. + epochs: Integer. Number of epochs to train the model. + An epoch is an iteration over the entire `x` and `y` + data provided. + Note that in conjunction with `initial_epoch`, + `epochs` is to be understood as "final epoch". + The model is not trained for a number of iterations + given by `epochs`, but merely until the epoch + of index `epochs` is reached. verbose: 0, 1, or 2. Verbosity mode. - 0 = silent, 1 = verbose, 2 = one log line per epoch. - callbacks: List of callbacks to be called during training. + 0 = silent, 1 = progress bar, 2 = one line per epoch. + callbacks: List of `keras.callbacks.Callback` instances. + List of callbacks to apply during training. See [callbacks](/callbacks). - validation_split: Float between 0 and 1: - fraction of the training data to be used as validation data. + validation_split: Float between 0 and 1. + Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. - validation_data: Data on which to evaluate - the loss and any model metrics - at the end of each epoch. The model will not - be trained on this data. - This could be a tuple (x_val, y_val) - or a tuple (x_val, y_val, val_sample_weights). - shuffle: Boolean, whether to shuffle the training data - before each epoch. Has no effect when `steps_per_epoch` - is not `None`. - class_weight: Optional dictionary mapping - class indices (integers) to - a weight (float) to apply to the model's loss for the samples - from this class during training. - This can be useful to tell the model to "pay more attention" to - samples from an under-represented class. - sample_weight: Optional array of the same length as x, containing - weights to apply to the model's loss for each sample. - In the case of temporal data, you can pass a 2D array - with shape (samples, sequence_length), + The validation data is selected from the last samples + in the `x` and `y` data provided, before shuffling. + validation_data: tuple `(x_val, y_val)` or tuple + `(x_val, y_val, val_sample_weights)` on which to evaluate + the loss and any model metrics at the end of each epoch. + The model will not be trained on this data. + This will override `validation_split`. + shuffle: Boolean (whether to shuffle the training data + before each epoch) or str (for 'batch'). + 'batch' is a special option for dealing with the + limitations of HDF5 data; it shuffles in batch-sized chunks. + Has no effect when `steps_per_epoch` is not `None`. + class_weight: Optional dictionary mapping class indices (integers) + to a weight (float) value, used for weighting the loss function + (during training only). + This can be useful to tell the model to + "pay more attention" to samples from + an under-represented class. + sample_weight: Optional Numpy array of weights for + the training samples, used for weighting the loss function + (during training only). You can either pass a flat (1D) + Numpy array with the same length as the input samples + (1:1 mapping between weights and samples), + or in the case of temporal data, + you can pass a 2D array with shape + `(samples, sequence_length)`, to apply a different weight to every timestep of every sample. In this case you should make sure to specify - sample_weight_mode="temporal" in compile(). + `sample_weight_mode="temporal"` in `compile()`. initial_epoch: Epoch at which to start training - (useful for resuming a previous training run) + (useful for resuming a previous training run). steps_per_epoch: Total number of steps (batches of samples) before declaring one epoch finished and starting the - next epoch. When training with Input Tensors such as + next epoch. When training with input tensors such as TensorFlow data tensors, the default `None` is equal to the number of unique samples in your dataset divided by the batch size, or 1 if that cannot be determined. @@ -1546,8 +1559,10 @@ class Model(Network): to validate before stopping. Returns: - A `History` instance. Its `history` attribute contains - all information collected during training. + A `History` object. Its `History.history` attribute is + a record of training loss values and metrics values + at successive epochs, as well as validation loss values + and validation metrics values (if applicable). Raises: ValueError: In case of mismatch between the provided input data @@ -1667,25 +1682,40 @@ class Model(Network): Computation is done in batches. Arguments: - x: Numpy array of test data, - or list of Numpy arrays if the model has multiple inputs. - If all inputs in the model are named, - you can also pass a dictionary - mapping input names to Numpy arrays. - Can be `None` (default) if feeding from framework-native tensors. - y: Numpy array of target data, - or list of Numpy arrays if the model has multiple outputs. - If all outputs in the model are named, - you can also pass a dictionary - mapping output names to Numpy arrays. - Can be `None` (default) if feeding from framework-native tensors. - batch_size: Integer. If unspecified, it will default to 32. - verbose: Verbosity mode, 0 or 1. - sample_weight: Array of weights to weight the contribution - of different samples to the loss and metrics. - steps: Total number of steps (batches of samples) + x: Numpy array of test data (if the model has a single input), + or list of Numpy arrays (if the model has multiple inputs). + If input layers in the model are named, you can also pass a + dictionary mapping input names to Numpy arrays. + `x` can be `None` (default) if feeding from + framework-native tensors (e.g. TensorFlow data tensors). + y: Numpy array of target (label) data + (if the model has a single output), + or list of Numpy arrays (if the model has multiple outputs). + If output layers in the model are named, you can also pass a + dictionary mapping output names to Numpy arrays. + `y` can be `None` (default) if feeding from + framework-native tensors (e.g. TensorFlow data tensors). + batch_size: Integer or `None`. + Number of samples per evaluation step. + If unspecified, `batch_size` will default to 32. + verbose: 0 or 1. Verbosity mode. + 0 = silent, 1 = progress bar. + sample_weight: Optional Numpy array of weights for + the test samples, used for weighting the loss function. + You can either pass a flat (1D) + Numpy array with the same length as the input samples + (1:1 mapping between weights and samples), + or in the case of temporal data, + you can pass a 2D array with shape + `(samples, sequence_length)`, + to apply a different weight to every timestep of every sample. + In this case you should make sure to specify + `sample_weight_mode="temporal"` in `compile()`. + steps: Integer or `None`. + Total number of steps (batches of samples) before declaring the evaluation round finished. - Ignored with the default value of `None`. + The default `None` is equal to the number of unique samples in + your dataset divided by the batch size. Returns: Scalar test loss (if the model has a single output and no metrics) @@ -1694,7 +1724,7 @@ class Model(Network): the display labels for the scalar outputs. Raises: - ValueError: In case of invalid argument values. + ValueError: In case of invalid arguments. """ # Backwards compatibility. if batch_size is None and steps is None: @@ -1926,7 +1956,7 @@ class Model(Network): to yield from `generator` before declaring one epoch finished and starting the next epoch. It should typically be equal to the number of unique samples of your dataset - divided by the batch size. + divided by the batch size. Not used if using `Sequence`. epochs: Integer, total number of iterations on the data. verbose: Verbosity mode, 0, 1, or 2. callbacks: List of callbacks to be called during training. @@ -1941,7 +1971,7 @@ class Model(Network): for the class. max_queue_size: Maximum size for the generator queue workers: Maximum number of processes to spin up - when using process based threading + when using process-based threading. use_multiprocessing: If True, use process based threading. Note that because this implementation relies on multiprocessing, @@ -1949,9 +1979,9 @@ class Model(Network): non picklable arguments to the generator as they can't be passed easily to children processes. - shuffle: Whether to shuffle the order of the batches at the - beginning of each epoch. Only used with instances - of `Sequence` (keras.utils.Sequence). + shuffle: Whether to shuffle the data at the beginning of each + epoch. Only used with instances of `Sequence` + (`keras.utils.Sequence`). initial_epoch: Epoch at which to start training (useful for resuming a previous training run) **kwargs: support for legacy arguments. @@ -2061,6 +2091,8 @@ class Model(Network): ' and multiple workers may duplicate your data.' ' Please consider using the`keras.utils.Sequence' ' class.')) + if is_sequence: + steps_per_epoch = len(generator) enqueuer = None try: @@ -2182,9 +2214,10 @@ class Model(Network): when using multiprocessing. steps: Total number of steps (batches of samples) to yield from `generator` before stopping. + Not used if using `Sequence`. max_queue_size: maximum size for the generator queue workers: maximum number of processes to spin up - when using process based threading + when using process-based threading. use_multiprocessing: if True, use process based threading. Note that because this implementation relies on multiprocessing, @@ -2230,6 +2263,8 @@ class Model(Network): ' and multiple workers may duplicate your data.' ' Please consider using the`keras.utils.Sequence' ' class.')) + if is_sequence: + steps = len(generator) enqueuer = None try: @@ -2309,8 +2344,9 @@ class Model(Network): steps: Total number of steps (batches of samples) to yield from `generator` before stopping. max_queue_size: Maximum size for the generator queue. + Not used if using `Sequence`. workers: Maximum number of processes to spin up - when using process based threading + when using process-based threading. use_multiprocessing: If `True`, use process based threading. Note that because this implementation relies on multiprocessing, @@ -2351,6 +2387,8 @@ class Model(Network): ' and multiple workers may duplicate your data.' ' Please consider using the`keras.utils.Sequence' ' class.')) + if is_sequence: + steps = len(generator) enqueuer = None try: diff --git a/tensorflow/python/keras/_impl/keras/layers/merge.py b/tensorflow/python/keras/_impl/keras/layers/merge.py index 5f26ce44e3..888be27369 100644 --- a/tensorflow/python/keras/_impl/keras/layers/merge.py +++ b/tensorflow/python/keras/_impl/keras/layers/merge.py @@ -318,7 +318,7 @@ class Concatenate(_Merge): """Layer that concatenates a list of inputs. It takes as input a list of tensors, - all of the same shape expect for the concatenation axis, + all of the same shape except for the concatenation axis, and returns a single tensor, the concatenation of all inputs. Arguments: diff --git a/tensorflow/python/keras/_impl/keras/layers/recurrent.py b/tensorflow/python/keras/_impl/keras/layers/recurrent.py index 2bc74d5f80..8df1840b4c 100644 --- a/tensorflow/python/keras/_impl/keras/layers/recurrent.py +++ b/tensorflow/python/keras/_impl/keras/layers/recurrent.py @@ -756,6 +756,8 @@ class RNN(Layer): @property def trainable_weights(self): + if not self.trainable: + return [] if isinstance(self.cell, Layer): return self.cell.trainable_weights return [] @@ -763,6 +765,8 @@ class RNN(Layer): @property def non_trainable_weights(self): if isinstance(self.cell, Layer): + if not self.trainable: + return self.cell.weights return self.cell.non_trainable_weights return [] @@ -1048,7 +1052,6 @@ class SimpleRNN(RNN): unroll=unroll, activity_regularizer=regularizers.get(activity_regularizer), **kwargs) - # self.activity_regularizer = regularizers.get(activity_regularizer) def call(self, inputs, mask=None, training=None, initial_state=None): self.cell._generate_dropout_mask(inputs, training=training) @@ -1114,36 +1117,25 @@ class SimpleRNN(RNN): def get_config(self): config = { - 'units': - self.units, - 'activation': - activations.serialize(self.activation), - 'use_bias': - self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), + 'units': self.units, + 'activation': activations.serialize(self.activation), + 'use_bias': self.use_bias, + 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), + 'bias_initializer': initializers.serialize(self.bias_initializer), + 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), + 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': - constraints.serialize(self.kernel_constraint), + 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint), - 'dropout': - self.dropout, - 'recurrent_dropout': - self.recurrent_dropout + 'bias_constraint': constraints.serialize(self.bias_constraint), + 'dropout': self.dropout, + 'recurrent_dropout': self.recurrent_dropout } base_config = super(SimpleRNN, self).get_config() del base_config['cell'] @@ -1597,40 +1589,28 @@ class GRU(RNN): def get_config(self): config = { - 'units': - self.units, - 'activation': - activations.serialize(self.activation), + 'units': self.units, + 'activation': activations.serialize(self.activation), 'recurrent_activation': activations.serialize(self.recurrent_activation), - 'use_bias': - self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), + 'use_bias': self.use_bias, + 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), + 'bias_initializer': initializers.serialize(self.bias_initializer), + 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), + 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': - constraints.serialize(self.kernel_constraint), + 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint), - 'dropout': - self.dropout, - 'recurrent_dropout': - self.recurrent_dropout, - 'implementation': - self.implementation + 'bias_constraint': constraints.serialize(self.bias_constraint), + 'dropout': self.dropout, + 'recurrent_dropout': self.recurrent_dropout, + 'implementation': self.implementation } base_config = super(GRU, self).get_config() del base_config['cell'] @@ -2125,42 +2105,29 @@ class LSTM(RNN): def get_config(self): config = { - 'units': - self.units, - 'activation': - activations.serialize(self.activation), + 'units': self.units, + 'activation': activations.serialize(self.activation), 'recurrent_activation': activations.serialize(self.recurrent_activation), - 'use_bias': - self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), + 'use_bias': self.use_bias, + 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), - 'bias_initializer': - initializers.serialize(self.bias_initializer), - 'unit_forget_bias': - self.unit_forget_bias, - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), + 'bias_initializer': initializers.serialize(self.bias_initializer), + 'unit_forget_bias': self.unit_forget_bias, + 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': - regularizers.serialize(self.bias_regularizer), + 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': - constraints.serialize(self.kernel_constraint), + 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), - 'bias_constraint': - constraints.serialize(self.bias_constraint), - 'dropout': - self.dropout, - 'recurrent_dropout': - self.recurrent_dropout, - 'implementation': - self.implementation + 'bias_constraint': constraints.serialize(self.bias_constraint), + 'dropout': self.dropout, + 'recurrent_dropout': self.recurrent_dropout, + 'implementation': self.implementation } base_config = super(LSTM, self).get_config() del base_config['cell'] diff --git a/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py b/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py index b1f89a30bb..7dc4c1db9b 100644 --- a/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py @@ -359,19 +359,38 @@ class RNNTest(test.TestCase): layer.build((None, None, 5)) # Test regularization losses - assert len(layer.losses) == 1 + self.assertEqual(len(layer.losses), 1) # Test weights - assert len(layer.trainable_weights) == 6 + self.assertEqual(len(layer.trainable_weights), 6) cells[0].trainable = False - assert len(layer.trainable_weights) == 3 - assert len(layer.non_trainable_weights) == 3 + self.assertEqual(len(layer.trainable_weights), 3) + self.assertEqual(len(layer.non_trainable_weights), 3) # Test `get_losses_for` x = keras.Input((None, 5)) y = keras.backend.sum(x) cells[0].add_loss(y, inputs=x) - assert layer.get_losses_for(x) == [y] + self.assertEqual(layer.get_losses_for(x), [y]) + + def test_rnn_dynamic_trainability(self): + layer_class = keras.layers.SimpleRNN + embedding_dim = 4 + units = 3 + + layer = layer_class(units) + layer.build((None, None, embedding_dim)) + self.assertEqual(len(layer.weights), 3) + self.assertEqual(len(layer.trainable_weights), 3) + self.assertEqual(len(layer.non_trainable_weights), 0) + layer.trainable = False + self.assertEqual(len(layer.weights), 3) + self.assertEqual(len(layer.trainable_weights), 0) + self.assertEqual(len(layer.non_trainable_weights), 3) + layer.trainable = True + self.assertEqual(len(layer.weights), 3) + self.assertEqual(len(layer.trainable_weights), 3) + self.assertEqual(len(layer.non_trainable_weights), 0) if __name__ == '__main__': diff --git a/tensorflow/python/keras/_impl/keras/layers/wrappers.py b/tensorflow/python/keras/_impl/keras/layers/wrappers.py index 6f786b7850..0e82005caa 100644 --- a/tensorflow/python/keras/_impl/keras/layers/wrappers.py +++ b/tensorflow/python/keras/_impl/keras/layers/wrappers.py @@ -336,7 +336,8 @@ class Bidirectional(Wrapper): output = [y, y_rev] # Properly set learning phase - if 0 < self.layer.dropout + self.layer.recurrent_dropout: + if (getattr(y, '_uses_learning_phase', False) or + getattr(y_rev, '_uses_learning_phase', False)): if self.merge_mode is None: for out in output: out._uses_learning_phase = True diff --git a/tensorflow/python/keras/_impl/keras/losses.py b/tensorflow/python/keras/_impl/keras/losses.py index da0984d3c3..19212aeee8 100644 --- a/tensorflow/python/keras/_impl/keras/losses.py +++ b/tensorflow/python/keras/_impl/keras/losses.py @@ -22,6 +22,7 @@ import six from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras.utils.generic_utils import deserialize_keras_object +from tensorflow.python.keras._impl.keras.utils.generic_utils import serialize_keras_object def mean_squared_error(y_true, y_pred): @@ -105,7 +106,7 @@ cosine = cosine_proximity def serialize(loss): - return loss.__name__ + return serialize_keras_object(loss) def deserialize(name, custom_objects=None): @@ -122,6 +123,8 @@ def get(identifier): if isinstance(identifier, six.string_types): identifier = str(identifier) return deserialize(identifier) + if isinstance(identifier, dict): + return deserialize(identifier) elif callable(identifier): return identifier else: diff --git a/tensorflow/python/keras/_impl/keras/losses_test.py b/tensorflow/python/keras/_impl/keras/losses_test.py index b295356ec1..1884c0fdca 100644 --- a/tensorflow/python/keras/_impl/keras/losses_test.py +++ b/tensorflow/python/keras/_impl/keras/losses_test.py @@ -18,11 +18,18 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os +import shutil + import numpy as np from tensorflow.python.keras._impl import keras from tensorflow.python.platform import test +try: + import h5py # pylint:disable=g-import-not-at-top +except ImportError: + h5py = None ALL_LOSSES = [keras.losses.mean_squared_error, keras.losses.mean_absolute_error, @@ -39,6 +46,20 @@ ALL_LOSSES = [keras.losses.mean_squared_error, keras.losses.categorical_hinge] +class _MSEMAELoss(object): + """Loss function with internal state, for testing serialization code.""" + + def __init__(self, mse_fraction): + self.mse_fraction = mse_fraction + + def __call__(self, y_true, y_pred): + return (self.mse_fraction * keras.losses.mse(y_true, y_pred) + + (1 - self.mse_fraction) * keras.losses.mae(y_true, y_pred)) + + def get_config(self): + return {'mse_fraction': self.mse_fraction} + + class KerasLossesTest(test.TestCase): def test_objective_shapes_3d(self): @@ -83,6 +104,39 @@ class KerasLossesTest(test.TestCase): loss = keras.backend.eval(keras.losses.categorical_hinge(y_true, y_pred)) self.assertAllClose(expected_loss, np.mean(loss)) + def test_serializing_loss_class(self): + orig_loss_class = _MSEMAELoss(0.3) + with keras.utils.custom_object_scope({'_MSEMAELoss': _MSEMAELoss}): + serialized = keras.losses.serialize(orig_loss_class) + + with keras.utils.custom_object_scope({'_MSEMAELoss': _MSEMAELoss}): + deserialized = keras.losses.deserialize(serialized) + assert isinstance(deserialized, _MSEMAELoss) + assert deserialized.mse_fraction == 0.3 + + def test_serializing_model_with_loss_class(self): + tmpdir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, tmpdir) + model_filename = os.path.join(tmpdir, 'custom_loss.h5') + + with self.test_session(): + with keras.utils.custom_object_scope({'_MSEMAELoss': _MSEMAELoss}): + loss = _MSEMAELoss(0.3) + inputs = keras.layers.Input((2,)) + outputs = keras.layers.Dense(1, name='model_output')(inputs) + model = keras.models.Model(inputs, outputs) + model.compile(optimizer='sgd', loss={'model_output': loss}) + model.fit(np.random.rand(256, 2), np.random.rand(256, 1)) + + if h5py is None: + return + + model.save(model_filename) + + with keras.utils.custom_object_scope({'_MSEMAELoss': _MSEMAELoss}): + loaded_model = keras.models.load_model(model_filename) + loaded_model.predict(np.random.rand(128, 2)) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/_impl/keras/models.py b/tensorflow/python/keras/_impl/keras/models.py index 046fd11633..ba202827ce 100644 --- a/tensorflow/python/keras/_impl/keras/models.py +++ b/tensorflow/python/keras/_impl/keras/models.py @@ -31,6 +31,7 @@ from tensorflow.python.keras._impl.keras import layers as layer_module from tensorflow.python.keras._impl.keras import optimizers from tensorflow.python.keras._impl.keras.engine import topology from tensorflow.python.keras._impl.keras.engine.topology import Input +from tensorflow.python.keras._impl.keras.engine.topology import InputLayer from tensorflow.python.keras._impl.keras.engine.topology import Layer from tensorflow.python.keras._impl.keras.engine.topology import TFBaseLayer from tensorflow.python.keras._impl.keras.engine.training import Model @@ -456,38 +457,48 @@ class Sequential(Model): 'an instance of class Layer. ' 'Found: ' + str(layer)) if not self.outputs: - # first layer in model: check that it is an input layer - if not layer._inbound_nodes: - # create an input layer - if not hasattr(layer, '_batch_input_shape'): - raise ValueError('The first layer in a ' - 'Sequential model must ' - 'get an `input_shape` or ' - '`batch_input_shape` argument.') + # First layer in model: check that it is an input layer. + if not isinstance(layer, InputLayer): + # Create an input layer. + # First, we need to infer its expected input shape and dtype. + if isinstance(layer, (Model, Sequential)): + # We were passed a model as first layer. + # This requires a specific way to figure out the + # input shape and dtype. + if not layer.layers: + raise ValueError('Cannot add an empty model ' + 'to a `Sequential` model.') + # In case of nested models: recover the first layer + # of the deepest model to infer input shape and dtype. + first_layer = layer.layers[0] + while isinstance(first_layer, (Model, Sequential)): + first_layer = first_layer.layers[0] + batch_shape = first_layer._batch_input_shape + dtype = first_layer.dtype + else: + # We were passed a regular layer, and it should + # know about its input shape. Otherwise, that's an error. + if not hasattr(layer, '_batch_input_shape'): + raise ValueError('The first layer in a ' + 'Sequential model must ' + 'get an `input_shape` argument.') + batch_shape = layer._batch_input_shape + dtype = layer.dtype # Instantiate the input layer. x = Input( - batch_shape=layer._batch_input_shape, - dtype=layer.dtype, - name=layer.name + '_input') + batch_shape=batch_shape, dtype=dtype, name=layer.name + '_input') # This will build the current layer # and create the node connecting the current layer # to the input layer we just created. layer(x) - if len(layer._inbound_nodes) != 1: - raise ValueError('A layer added to a Sequential model must ' - 'not already be connected somewhere else. ' - 'Model received layer ' + layer.name + ' which has ' + - str(len(layer._inbound_nodes)) + - ' pre-existing inbound connections.') - - if len(layer._inbound_nodes[0].output_tensors) != 1: + if len(layer.inbound_nodes[-1].output_tensors) != 1: raise ValueError('All layers in a Sequential model ' 'should have a single output tensor. ' 'For multi-output layers, ' 'use the functional API.') - self.outputs = [layer._inbound_nodes[0].output_tensors[0]] + self.outputs = [layer.inbound_nodes[-1].output_tensors[0]] self.inputs = topology.get_source_inputs(self.outputs[0]) # We create an input node, which we will keep updated @@ -741,21 +752,18 @@ class Sequential(Model): If the model has multiple outputs, you can use a different `sample_weight_mode` on each output by passing a dictionary or a list of modes. - weighted_metrics: List of metrics to be evaluated and weighted - by sample_weight or class_weight during training and testing. - target_tensors: By default, Keras will create placeholders for the + weighted_metrics: list of metrics to be evaluated and weighted + by `sample_weight` or `class_weight` during training and testing. + target_tensors: By default, Keras will create a placeholder for the model's target, which will be fed with the target data during training. If instead you would like to use your own - target tensors (in turn, Keras will not expect external + target tensor (in turn, Keras will not expect external Numpy data for these targets at training time), you - can specify them via the `target_tensors` argument. It can be - a single tensor (for a single-output model), a list of tensors, - or a dict mapping output names to target tensors. - **kwargs: When using the Theano/CNTK backends, these arguments - are passed into K.function. When using the TensorFlow backend, - these arguments are passed into `tf.Session.run`. - Raises: - ValueError: In case of invalid arguments for + can specify them via the `target_tensors` argument. + It should be a single tensor + (for a single-output `Sequential` model). + **kwargs: These arguments are passed into `tf.Session.run`. + Example: ```python model = Sequential() @@ -790,10 +798,10 @@ class Sequential(Model): self.total_loss = self.model.total_loss def fit(self, - x, - y, - batch_size=32, - epochs=10, + x=None, + y=None, + batch_size=None, + epochs=1, verbose=1, callbacks=None, validation_split=0., @@ -801,47 +809,86 @@ class Sequential(Model): shuffle=True, class_weight=None, sample_weight=None, - initial_epoch=0): + initial_epoch=0, + steps_per_epoch=None, + validation_steps=None, + **kwargs): """Trains the model for a fixed number of epochs. Arguments: - x: input data, as a Numpy array or list of Numpy arrays - (if the model has multiple inputs). - y: labels, as a Numpy array. - batch_size: integer. Number of samples per gradient update. - epochs: integer. Number of epochs to train the model. - Note that in conjunction with initial_epoch, the parameter - epochs is to be understood as "final epoch". The model is - not trained for a number of steps given by epochs, but - until the epoch epochs is reached. - verbose: 0 for no logging to stdout, - 1 for progress bar logging, 2 for one log line per epoch. - callbacks: list of `keras.callbacks.Callback` instances. + x: Numpy array of training data. + If the input layer in the model is named, you can also pass a + dictionary mapping the input name to a Numpy array. + `x` can be `None` (default) if feeding from + TensorFlow data tensors. + y: Numpy array of target (label) data. + If the output layer in the model is named, you can also pass a + dictionary mapping the output name to a Numpy array. + `y` can be `None` (default) if feeding from + TensorFlow data tensors. + batch_size: Integer or `None`. + Number of samples per gradient update. + If unspecified, it will default to 32. + epochs: Integer. Number of epochs to train the model. + An epoch is an iteration over the entire `x` and `y` + data provided. + Note that in conjunction with `initial_epoch`, + `epochs` is to be understood as "final epoch". + The model is not trained for a number of iterations + given by `epochs`, but merely until the epoch + of index `epochs` is reached. + verbose: 0, 1, or 2. Verbosity mode. + 0 = silent, 1 = progress bar, 2 = one line per epoch. + callbacks: List of `keras.callbacks.Callback` instances. List of callbacks to apply during training. See [callbacks](/callbacks). - validation_split: float (0. < x < 1). - Fraction of the data to use as held-out validation data. - validation_data: tuple (x_val, y_val) or tuple - (x_val, y_val, val_sample_weights) to be used as held-out - validation data. Will override validation_split. - shuffle: boolean or str (for 'batch'). - Whether to shuffle the samples at each epoch. + validation_split: Float between 0 and 1: + Fraction of the training data to be used as validation data. + The model will set apart this fraction of the training data, + will not train on it, and will evaluate + the loss and any model metrics + on this data at the end of each epoch. + The validation data is selected from the last samples + in the `x` and `y` data provided, before shuffling. + validation_data: tuple `(x_val, y_val)` or tuple + `(x_val, y_val, val_sample_weights)` on which to evaluate + the loss and any model metrics at the end of each epoch. + The model will not be trained on this data. + This will override `validation_split`. + shuffle: Boolean (whether to shuffle the training data + before each epoch) or str (for 'batch'). 'batch' is a special option for dealing with the limitations of HDF5 data; it shuffles in batch-sized chunks. - class_weight: dictionary mapping classes to a weight value, - used for scaling the loss function (during training only). - sample_weight: Numpy array of weights for - the training samples, used for scaling the loss function + Has no effect when `steps_per_epoch` is not `None`. + class_weight: Optional dictionary mapping class indices (integers) + to a weight (float) value, used for weighting the loss function + (during training only). + This can be useful to tell the model to + "pay more attention" to samples from + an under-represented class. + sample_weight: Optional Numpy array of weights for + the training samples, used for weighting the loss function (during training only). You can either pass a flat (1D) Numpy array with the same length as the input samples (1:1 mapping between weights and samples), or in the case of temporal data, - you can pass a 2D array with shape (samples, sequence_length), + you can pass a 2D array with shape + `(samples, sequence_length)`, to apply a different weight to every timestep of every sample. In this case you should make sure to specify - sample_weight_mode="temporal" in compile(). + `sample_weight_mode="temporal"` in `compile()`. initial_epoch: Epoch at which to start training (useful for resuming a previous training run). + steps_per_epoch: Total number of steps (batches of samples) + before declaring one epoch finished and starting the + next epoch. When training with input tensors such as + TensorFlow data tensors, the default `None` is equal to + the number of unique samples in your dataset divided by + the batch size, or 1 if that cannot be determined. + validation_steps: Only relevant if `steps_per_epoch` + is specified. Total number of steps (batches of samples) + to validate before stopping. + **kwargs: Used for backwards compatibility support. Returns: A `History` object. Its `History.history` attribute is @@ -850,10 +897,12 @@ class Sequential(Model): and validation metrics values (if applicable). Raises: - RuntimeError: if the model was never compiled. + RuntimeError: If the model was never compiled. + ValueError: In case of mismatch between the provided input data + and what the model expects. """ if not self.built: - raise RuntimeError('The model needs to be compiled ' 'before being used.') + raise RuntimeError('The model needs to be compiled before being used.') return self.model.fit( x, y, @@ -866,7 +915,9 @@ class Sequential(Model): shuffle=shuffle, class_weight=class_weight, sample_weight=sample_weight, - initial_epoch=initial_epoch) + initial_epoch=initial_epoch, + steps_per_epoch=steps_per_epoch, + validation_steps=validation_steps) def evaluate(self, x, y, batch_size=32, verbose=1, sample_weight=None): """Computes the loss on some input data, batch by batch. @@ -889,7 +940,7 @@ class Sequential(Model): RuntimeError: if the model was never compiled. """ if not self.built: - raise RuntimeError('The model needs to be compiled ' 'before being used.') + raise RuntimeError('The model needs to be compiled before being used.') return self.model.evaluate( x, y, @@ -949,7 +1000,7 @@ class Sequential(Model): RuntimeError: if the model was never compiled. """ if not self.built: - raise RuntimeError('The model needs to be compiled ' 'before being used.') + raise RuntimeError('The model needs to be compiled before being used.') return self.model.train_on_batch( x, y, sample_weight=sample_weight, class_weight=class_weight) @@ -972,10 +1023,10 @@ class Sequential(Model): RuntimeError: if the model was never compiled. """ if not self.built: - raise RuntimeError('The model needs to be compiled ' 'before being used.') + raise RuntimeError('The model needs to be compiled before being used.') return self.model.test_on_batch(x, y, sample_weight=sample_weight) - def predict_proba(self, x, batch_size=32, verbose=1): + def predict_proba(self, x, batch_size=32, verbose=0): """Generates class probability predictions for the input samples. The input samples are processed batch by batch. @@ -997,7 +1048,7 @@ class Sequential(Model): '(like softmax or sigmoid would).') return preds - def predict_classes(self, x, batch_size=32, verbose=1): + def predict_classes(self, x, batch_size=32, verbose=0): """Generate class predictions for the input samples. The input samples are processed batch by batch. @@ -1126,7 +1177,7 @@ class Sequential(Model): raise ValueError('Unrecognized keyword arguments: ' + str(kwargs)) if not self.built: - raise RuntimeError('The model needs to be compiled ' 'before being used.') + raise RuntimeError('The model needs to be compiled before being used.') return self.model.fit_generator( generator, steps_per_epoch, @@ -1193,7 +1244,7 @@ class Sequential(Model): raise ValueError('Unrecognized keyword arguments: ' + str(kwargs)) if not self.built: - raise RuntimeError('The model needs to be compiled ' 'before being used.') + raise RuntimeError('The model needs to be compiled before being used.') return self.model.evaluate_generator( generator, steps, diff --git a/tensorflow/python/keras/_impl/keras/models_test.py b/tensorflow/python/keras/_impl/keras/models_test.py index fd6b20e0ed..86acac4604 100644 --- a/tensorflow/python/keras/_impl/keras/models_test.py +++ b/tensorflow/python/keras/_impl/keras/models_test.py @@ -315,6 +315,24 @@ class TestSequential(test.TestCase): with self.assertRaises(TypeError): model.build() + def test_nested_sequential_trainability(self): + input_dim = 20 + num_units = 10 + num_classes = 2 + + inner_model = keras.models.Sequential() + inner_model.add(keras.layers.Dense(num_units, input_shape=(input_dim,))) + + model = keras.models.Sequential() + model.add(inner_model) + model.add(keras.layers.Dense(num_classes)) + + self.assertEqual(len(model.trainable_weights), 4) + inner_model.trainable = False + self.assertEqual(len(model.trainable_weights), 2) + inner_model.trainable = True + self.assertEqual(len(model.trainable_weights), 4) + class TestModelCloning(test.TestCase): diff --git a/tensorflow/python/keras/_impl/keras/utils/data_utils.py b/tensorflow/python/keras/_impl/keras/utils/data_utils.py index b3a1f64042..4f335af62e 100644 --- a/tensorflow/python/keras/_impl/keras/utils/data_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/data_utils.py @@ -378,17 +378,27 @@ class Sequence(object): pass -def get_index(ds, i): - """Quick fix for Python2, otherwise, it cannot be pickled. +# Global variables to be shared across processes +_SHARED_SEQUENCES = {} +# We use a Value to provide unique id to different processes. +_SEQUENCE_COUNTER = multiprocessing.Value('i', 0) + + +def get_index(uid, i): + """Get the value from the Sequence `uid` at index `i`. + + To allow multiple Sequences to be used at the same time, we use `uid` to + get a specific one. A single Sequence would cause the validation to + overwrite the training Sequence. Arguments: - ds: a Holder or Sequence object. + uid: int, Sequence identifier i: index Returns: The value at index `i`. """ - return ds[i] + return _SHARED_SEQUENCES[uid][i] class SequenceEnqueuer(object): @@ -459,17 +469,17 @@ class OrderedEnqueuer(SequenceEnqueuer): Arguments: sequence: A `keras.utils.data_utils.Sequence` object. - use_multiprocessing: use multiprocessing if True, otherwise threading - scheduling: Sequential querying of datas if 'sequential', random - otherwise. - shuffle: Whether to shuffle the data at the beginning of each epoch. + use_multiprocessing: Use multiprocessing if True, otherwise threading + shuffle: Whether to shuffle the data at the beginning of each epoch """ - def __init__(self, - sequence, - use_multiprocessing=False, - shuffle=False): + def __init__(self, sequence, use_multiprocessing=False, shuffle=False): self.sequence = sequence + + # Doing Multiprocessing.Value += x is not process-safe. + with _SEQUENCE_COUNTER.get_lock(): + self.uid = _SEQUENCE_COUNTER.value + _SEQUENCE_COUNTER.value += 1 self.use_multiprocessing = use_multiprocessing self.shuffle = shuffle self.workers = 0 @@ -493,15 +503,24 @@ class OrderedEnqueuer(SequenceEnqueuer): self.executor = multiprocessing.Pool(workers) else: self.executor = ThreadPool(workers) + self.workers = workers self.queue = queue.Queue(max_queue_size) self.stop_signal = threading.Event() self.run_thread = threading.Thread(target=self._run) self.run_thread.daemon = True self.run_thread.start() + def _wait_queue(self): + """Wait for the queue to be empty.""" + while True: + time.sleep(0.1) + if self.queue.unfinished_tasks == 0 or self.stop_signal.is_set(): + return + def _run(self): - """Submits requests to the executor and queues the `Future` objects.""" + """Function to submit request to the executor & queue `Future` objects.""" sequence = list(range(len(self.sequence))) + self._send_sequence() # Share the initial sequence while True: if self.shuffle: random.shuffle(sequence) @@ -509,9 +528,18 @@ class OrderedEnqueuer(SequenceEnqueuer): if self.stop_signal.is_set(): return self.queue.put( - self.executor.apply_async(get_index, (self.sequence, i)), - block=True) + self.executor.apply_async(get_index, (self.uid, i)), block=True) + + # Done with the current epoch, waiting for the final batches + self._wait_queue() + + if self.stop_signal.is_set(): + # We're done + return + + # Call the internal on epoch end. self.sequence.on_epoch_end() + self._send_sequence() # Update the pool def get(self): """Creates a generator to extract data from the queue. @@ -520,17 +548,29 @@ class OrderedEnqueuer(SequenceEnqueuer): Yields: Tuples (inputs, targets) - or (inputs, targets, sample_weights) + or (inputs, targets, sample_weights) """ try: while self.is_running(): inputs = self.queue.get(block=True).get() + self.queue.task_done() if inputs is not None: yield inputs except Exception as e: self.stop() raise StopIteration(e) + def _send_sequence(self): + """Send current Sequence to all workers.""" + _SHARED_SEQUENCES[ + self.uid] = self.sequence # For new processes that may spawn + + self._close_pool() + if self.use_multiprocessing: + self.executor = multiprocessing.Pool(self.workers) + else: + self.executor = ThreadPool(self.workers) + def stop(self, timeout=None): """Stops running threads and wait for them to exit, if necessary. @@ -544,9 +584,13 @@ class OrderedEnqueuer(SequenceEnqueuer): self.queue.queue.clear() self.queue.unfinished_tasks = 0 self.queue.not_full.notify() + self._close_pool() + self.run_thread.join(timeout) + _SHARED_SEQUENCES[self.uid] = None + + def _close_pool(self): self.executor.close() self.executor.join() - self.run_thread.join(timeout) class GeneratorEnqueuer(SequenceEnqueuer): diff --git a/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py b/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py index 45322f1f29..14b2f08442 100644 --- a/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py +++ b/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py @@ -115,15 +115,19 @@ def threadsafe_generator(f): class TestSequence(keras.utils.data_utils.Sequence): - def __init__(self, shape): + def __init__(self, shape, value=1.): self.shape = shape + self.inner = value def __getitem__(self, item): - return np.ones(self.shape, dtype=np.uint8) * item + return np.ones(self.shape, dtype=np.uint32) * item * self.inner def __len__(self): return 100 + def on_epoch_end(self): + self.inner *= 5.0 + class FaultSequence(keras.utils.data_utils.Sequence): @@ -228,6 +232,64 @@ class TestEnqueuers(test.TestCase): with self.assertRaises(StopIteration): next(gen_output) + def test_on_epoch_end_processes(self): + enqueuer = keras.utils.data_utils.OrderedEnqueuer( + TestSequence([3, 200, 200, 3]), use_multiprocessing=True) + enqueuer.start(3, 10) + gen_output = enqueuer.get() + acc = [] + for _ in range(200): + acc.append(next(gen_output)[0, 0, 0, 0]) + # Check that order was keep in GeneratorEnqueuer with processes + self.assertEqual(acc[100:], list([k * 5 for k in range(100)])) + enqueuer.stop() + + def test_context_switch(self): + enqueuer = keras.utils.data_utils.OrderedEnqueuer( + TestSequence([3, 200, 200, 3]), use_multiprocessing=True) + enqueuer2 = keras.utils.data_utils.OrderedEnqueuer( + TestSequence([3, 200, 200, 3], value=15), use_multiprocessing=True) + enqueuer.start(3, 10) + enqueuer2.start(3, 10) + gen_output = enqueuer.get() + gen_output2 = enqueuer2.get() + acc = [] + for _ in range(100): + acc.append(next(gen_output)[0, 0, 0, 0]) + self.assertEqual(acc[-1], 99) + # One epoch is completed so enqueuer will switch the Sequence + + acc = [] + for _ in range(100): + acc.append(next(gen_output2)[0, 0, 0, 0]) + self.assertEqual(acc[-1], 99 * 15) + # One epoch has been completed so enqueuer2 will switch + + # Be sure that both Sequence were updated + self.assertEqual(next(gen_output)[0, 0, 0, 0], 0) + self.assertEqual(next(gen_output)[0, 0, 0, 0], 5) + self.assertEqual(next(gen_output2)[0, 0, 0, 0], 0) + self.assertEqual(next(gen_output2)[0, 0, 0, 0], 15 * 5) + + # Tear down everything + enqueuer.stop() + enqueuer2.stop() + + def test_on_epoch_end_threads(self): + enqueuer = keras.utils.data_utils.OrderedEnqueuer( + TestSequence([3, 200, 200, 3]), use_multiprocessing=False) + enqueuer.start(3, 10) + gen_output = enqueuer.get() + acc = [] + for _ in range(100): + acc.append(next(gen_output)[0, 0, 0, 0]) + acc = [] + for _ in range(100): + acc.append(next(gen_output)[0, 0, 0, 0]) + # Check that order was keep in GeneratorEnqueuer with processes + self.assertEqual(acc, list([k * 5 for k in range(100)])) + enqueuer.stop() + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/_impl/keras/utils/generic_utils.py b/tensorflow/python/keras/_impl/keras/utils/generic_utils.py index efa79b1612..025e5d30a5 100644 --- a/tensorflow/python/keras/_impl/keras/utils/generic_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/generic_utils.py @@ -271,7 +271,8 @@ class Progbar(object): self.total_width = 0 self.seen_so_far = 0 self.verbose = verbose - self._dynamic_display = (sys.stdout.isatty() or + self._dynamic_display = ((hasattr(sys.stdout, 'isatty') and + sys.stdout.isatty()) or 'ipykernel' in sys.modules) def update(self, current, values=None, force=False): diff --git a/tensorflow/python/keras/_impl/keras/utils/np_utils.py b/tensorflow/python/keras/_impl/keras/utils/np_utils.py index a23172d342..896016d4d8 100644 --- a/tensorflow/python/keras/_impl/keras/utils/np_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/np_utils.py @@ -33,12 +33,18 @@ def to_categorical(y, num_classes=None): Returns: A binary matrix representation of the input. """ - y = np.array(y, dtype='int').ravel() + y = np.array(y, dtype='int') + input_shape = y.shape + if input_shape and input_shape[-1] == 1: + input_shape = tuple(input_shape[:-1]) + y = y.ravel() if not num_classes: num_classes = np.max(y) + 1 n = y.shape[0] categorical = np.zeros((n, num_classes)) categorical[np.arange(n), y] = 1 + output_shape = input_shape + (num_classes,) + categorical = np.reshape(categorical, output_shape) return categorical diff --git a/tensorflow/python/keras/_impl/keras/utils/np_utils_test.py b/tensorflow/python/keras/_impl/keras/utils/np_utils_test.py new file mode 100644 index 0000000000..9680c295cd --- /dev/null +++ b/tensorflow/python/keras/_impl/keras/utils/np_utils_test.py @@ -0,0 +1,52 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for np_utils.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.keras._impl import keras +from tensorflow.python.platform import test + + +class TestNPUtils(test.TestCase): + + def test_to_categorical(self): + num_classes = 5 + shapes = [(3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)] + expected_shapes = [(3, num_classes), + (4, 3, num_classes), + (5, 4, 3, num_classes), + (3, num_classes)] + labels = [np.random.randint(0, num_classes, shape) for shape in shapes] + one_hots = [ + keras.utils.to_categorical(label, num_classes) for label in labels] + for label, one_hot, expected_shape in zip(labels, + one_hots, + expected_shapes): + # Check shape + self.assertEqual(one_hot.shape, expected_shape) + # Make sure there is only one 1 in a row + self.assertTrue(np.all(one_hot.sum(axis=-1) == 1)) + # Get original labels back from one hots + self.assertTrue(np.all( + np.argmax(one_hot, -1).reshape(label.shape) == label)) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/python/keras/_impl/keras/utils/training_utils.py b/tensorflow/python/keras/_impl/keras/utils/training_utils.py index b993a16394..8939c814cf 100644 --- a/tensorflow/python/keras/_impl/keras/utils/training_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/training_utils.py @@ -77,8 +77,11 @@ def multi_gpu_model(model, gpus): width = 224 num_classes = 1000 - # Instantiate the base model - # (here, we do it on CPU, for better efficiency). + # Instantiate the base model (or "template" model). + # We recommend doing this with under a CPU device scope, + # so that the model's weights are hosted on CPU memory. + # Otherwise they may end up hosted on a GPU, which would + # complicate weight sharing. with tf.device('/cpu:0'): model = Xception(weights=None, input_shape=(height, width, 3), @@ -97,6 +100,9 @@ def multi_gpu_model(model, gpus): # This `fit` call will be distributed on 8 GPUs. # Since the batch size is 256, each GPU will process 32 samples. parallel_model.fit(x, y, epochs=20, batch_size=256) + + # Save model via the template model (which shares the same weights): + model.save('my_model.h5') ``` Raises: diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt index 04fe46cedc..f69800b918 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt @@ -173,7 +173,7 @@ tf_class { } member_method { name: "fit" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\'], varargs=None, keywords=None, defaults=[\'32\', \'10\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\'], " } member_method { name: "fit_generator" @@ -241,7 +241,7 @@ tf_class { } member_method { name: "predict_classes" - argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\'], varargs=None, keywords=None, defaults=[\'32\', \'1\'], " + argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\'], varargs=None, keywords=None, defaults=[\'32\', \'0\'], " } member_method { name: "predict_generator" @@ -253,7 +253,7 @@ tf_class { } member_method { name: "predict_proba" - argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\'], varargs=None, keywords=None, defaults=[\'32\', \'1\'], " + argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\'], varargs=None, keywords=None, defaults=[\'32\', \'0\'], " } member_method { name: "reset_states" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt index 3946ff4d5f..8397b373f4 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt @@ -173,7 +173,7 @@ tf_class { } member_method { name: "fit" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\'], varargs=None, keywords=None, defaults=[\'32\', \'10\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\'], " } member_method { name: "fit_generator" @@ -241,7 +241,7 @@ tf_class { } member_method { name: "predict_classes" - argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\'], varargs=None, keywords=None, defaults=[\'32\', \'1\'], " + argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\'], varargs=None, keywords=None, defaults=[\'32\', \'0\'], " } member_method { name: "predict_generator" @@ -253,7 +253,7 @@ tf_class { } member_method { name: "predict_proba" - argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\'], varargs=None, keywords=None, defaults=[\'32\', \'1\'], " + argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\'], varargs=None, keywords=None, defaults=[\'32\', \'0\'], " } member_method { name: "reset_states" -- GitLab From b0bcf675a4b5d6217f3b58fd27b344f20e7bf25d Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 15 Nov 2017 13:50:54 -0800 Subject: [PATCH 0471/1801] Use a static "linker initialized" tensorflow::mutex when possible. There is no need to use a lazily created tensorflow::mutex since the tensorflow::LINKER_INITIALIZED constructor is a no-op. PiperOrigin-RevId: 175874749 --- tensorflow/compiler/xla/service/compiler.cc | 16 ++++------------ tensorflow/compiler/xla/service/compiler.h | 3 +-- .../compiler/xla/service/computation_placer.cc | 12 +++++------- .../compiler/xla/service/computation_placer.h | 7 ++----- .../compiler/xla/service/transfer_manager.cc | 13 +++++-------- .../compiler/xla/service/transfer_manager.h | 7 ++----- 6 files changed, 19 insertions(+), 39 deletions(-) diff --git a/tensorflow/compiler/xla/service/compiler.cc b/tensorflow/compiler/xla/service/compiler.cc index 3b1900428a..e2e9d2a0c0 100644 --- a/tensorflow/compiler/xla/service/compiler.cc +++ b/tensorflow/compiler/xla/service/compiler.cc @@ -27,14 +27,8 @@ namespace se = ::perftools::gputools; namespace xla { -/* static */ tensorflow::mutex* Compiler::platform_compiler_mutex_; - -/* static */ void Compiler::LazyInitMutex() { - static std::once_flag mutex_init_flag; - std::call_once(mutex_init_flag, []() { - Compiler::platform_compiler_mutex_ = new tensorflow::mutex; - }); -} +/* static */ tensorflow::mutex Compiler::platform_compiler_mutex_( + tensorflow::LINKER_INITIALIZED); /* static */ std::map* @@ -55,8 +49,7 @@ Compiler::GetPlatformCompilers() { /* static */ void Compiler::RegisterCompilerFactory( se::Platform::Id platform_id, std::function()> compiler_factory) { - LazyInitMutex(); - tensorflow::mutex_lock lock(*platform_compiler_mutex_); + tensorflow::mutex_lock lock(platform_compiler_mutex_); auto* factories = GetPlatformCompilerFactories(); CHECK(factories->find(platform_id) == factories->end()) << "Compiler factory already registered for platform"; @@ -65,8 +58,7 @@ Compiler::GetPlatformCompilers() { /* static */ StatusOr Compiler::GetForPlatform( const se::Platform* platform) { - LazyInitMutex(); - tensorflow::mutex_lock lock(*platform_compiler_mutex_); + tensorflow::mutex_lock lock(platform_compiler_mutex_); auto* compilers = GetPlatformCompilers(); // See if we already instantiated a compiler for this platform. diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h index 4c2d9600d9..5f021900c8 100644 --- a/tensorflow/compiler/xla/service/compiler.h +++ b/tensorflow/compiler/xla/service/compiler.h @@ -157,8 +157,7 @@ class Compiler { private: // Mutex that guards the platform-compiler map. - static tensorflow::mutex* platform_compiler_mutex_; - static void LazyInitMutex(); + static tensorflow::mutex platform_compiler_mutex_; // Map from platform kind to compiler factory. static std::map* diff --git a/tensorflow/compiler/xla/service/computation_placer.cc b/tensorflow/compiler/xla/service/computation_placer.cc index cdfa30dd9a..6b7b0d25e8 100644 --- a/tensorflow/compiler/xla/service/computation_placer.cc +++ b/tensorflow/compiler/xla/service/computation_placer.cc @@ -94,7 +94,7 @@ StatusOr ComputationPlacer::AssignDevices( se::Platform::Id platform_id, ComputationPlacerCreationFunction creation_function) { tensorflow::mutex_lock lock( - *ComputationPlacer::platform_computation_placer_mutex()); + ComputationPlacer::platform_computation_placer_mutex_); auto* computation_placers = GetPlatformComputationPlacers(); CHECK(computation_placers->find(platform_id) == computation_placers->end()); (*computation_placers)[platform_id].creation_function = creation_function; @@ -103,7 +103,7 @@ StatusOr ComputationPlacer::AssignDevices( /* static */ StatusOr ComputationPlacer::GetForPlatform( const se::Platform* platform) { tensorflow::mutex_lock lock( - *ComputationPlacer::platform_computation_placer_mutex()); + ComputationPlacer::platform_computation_placer_mutex_); auto* computation_placers = GetPlatformComputationPlacers(); auto it = computation_placers->find(platform->id()); @@ -122,11 +122,9 @@ StatusOr ComputationPlacer::AssignDevices( return it->second.placer.get(); } -/* static */ tensorflow::mutex* -ComputationPlacer::platform_computation_placer_mutex() { - static tensorflow::mutex* m = new tensorflow::mutex; - return m; -} +/* static */ tensorflow::mutex + ComputationPlacer::platform_computation_placer_mutex_( + tensorflow::LINKER_INITIALIZED); /* static */ std::map* diff --git a/tensorflow/compiler/xla/service/computation_placer.h b/tensorflow/compiler/xla/service/computation_placer.h index 7d9abcd100..737ccabaa7 100644 --- a/tensorflow/compiler/xla/service/computation_placer.h +++ b/tensorflow/compiler/xla/service/computation_placer.h @@ -89,11 +89,8 @@ class ComputationPlacer { const perftools::gputools::Platform* platform); private: - // Routine that returns the mutex that guards the platform-to-computation - // placer map. Done as a routine to ensure correct initialization ordering, - // since RegisterComputationPlacer can be called during program initialization - // time. - static tensorflow::mutex* platform_computation_placer_mutex(); + // The mutex that guards the platform-to-computation placer map. + static tensorflow::mutex platform_computation_placer_mutex_; // State kept for each kind of ComputationPlacer. Registration functions set // up creation_function, and then we use that to lazily create "placer" the diff --git a/tensorflow/compiler/xla/service/transfer_manager.cc b/tensorflow/compiler/xla/service/transfer_manager.cc index 4da0a0d368..fef131d19f 100644 --- a/tensorflow/compiler/xla/service/transfer_manager.cc +++ b/tensorflow/compiler/xla/service/transfer_manager.cc @@ -28,12 +28,9 @@ limitations under the License. namespace se = ::perftools::gputools; namespace xla { - -/* static */ tensorflow::mutex* -TransferManager::platform_transfer_manager_mutex() { - static tensorflow::mutex* m = new tensorflow::mutex; - return m; -} +/* static */ tensorflow::mutex + TransferManager::platform_transfer_manager_mutex_( + tensorflow::LINKER_INITIALIZED); /* static */ std::map* @@ -47,7 +44,7 @@ TransferManager::GetPlatformTransferManagers() { se::Platform::Id platform_id, TransferManagerCreationFunction creation_function) { tensorflow::mutex_lock lock( - *TransferManager::platform_transfer_manager_mutex()); + TransferManager::platform_transfer_manager_mutex_); auto* managers = GetPlatformTransferManagers(); CHECK(managers->find(platform_id) == managers->end()); (*managers)[platform_id].creation_function = creation_function; @@ -56,7 +53,7 @@ TransferManager::GetPlatformTransferManagers() { /* static */ StatusOr TransferManager::GetForPlatform( const se::Platform* platform) { tensorflow::mutex_lock lock( - *TransferManager::platform_transfer_manager_mutex()); + TransferManager::platform_transfer_manager_mutex_); auto* managers = GetPlatformTransferManagers(); auto it = managers->find(platform->id()); diff --git a/tensorflow/compiler/xla/service/transfer_manager.h b/tensorflow/compiler/xla/service/transfer_manager.h index 057bdffe93..d7f85f5765 100644 --- a/tensorflow/compiler/xla/service/transfer_manager.h +++ b/tensorflow/compiler/xla/service/transfer_manager.h @@ -158,11 +158,8 @@ class TransferManager { const perftools::gputools::Platform* platform); private: - // Routine that returns the mutex that guards the - // platform-to-transfer manager map. Done as a routine to - // ensure correct initialization ordering, since RegisterTransferManager - // can be called during program initialization time. - static tensorflow::mutex* platform_transfer_manager_mutex(); + // The mutex that guards the platform-to-transfer manager map. + static tensorflow::mutex platform_transfer_manager_mutex_; // State kept for each kind of TransferManager. Registration functions // set up creation_function, and then we use that to lazily create -- GitLab From fdff4048d4d0fdf7c12f927b92bb5e2fb812df12 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 15 Nov 2017 14:07:41 -0800 Subject: [PATCH 0472/1801] Add `WorkerService.DeleteWorkerSession` method to fix a memory leak. The new method is the counterpart to `WorkerService.CreateWorkerSession`, and is called in all cases where worker sessions have been explicitly created (i.e. when using ClusterSpec propagation). PiperOrigin-RevId: 175877407 --- .../distributed_runtime/master_session.cc | 60 +++++++++++++++++++ .../core/distributed_runtime/master_session.h | 4 ++ .../rpc/grpc_remote_worker.cc | 8 +++ .../rpc/grpc_worker_service.cc | 11 ++++ .../rpc/grpc_worker_service_impl.cc | 2 + .../rpc/grpc_worker_service_impl.h | 1 + tensorflow/core/distributed_runtime/worker.cc | 7 +++ tensorflow/core/distributed_runtime/worker.h | 4 ++ .../distributed_runtime/worker_interface.h | 9 +++ tensorflow/core/protobuf/worker.proto | 16 +++++ tensorflow/core/protobuf/worker_service.proto | 4 ++ 11 files changed, 126 insertions(+) diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc index 5798ad09e8..91a1fa7d1e 100644 --- a/tensorflow/core/distributed_runtime/master_session.cc +++ b/tensorflow/core/distributed_runtime/master_session.cc @@ -1044,6 +1044,7 @@ Status MasterSession::Create(GraphDef* graph_def, graph_def, execution_options, &execution_state_)); } if (options.cluster_def != nullptr) { + should_delete_worker_sessions_ = true; return CreateWorkerSessions(options); } return Status::OK(); @@ -1122,6 +1123,59 @@ Status MasterSession::CreateWorkerSessions( return status; } +Status MasterSession::DeleteWorkerSessions() { + WorkerCacheInterface* worker_cache = get_worker_cache(); + std::vector worker_names; + worker_cache->ListWorkers(&worker_names); + + struct WorkerGroup { + // The worker name. (Not owned.) + const string* name; + + // The worker referenced by name. (Not owned.) + WorkerInterface* worker = nullptr; + + // Request and responses used for a given worker. + DeleteWorkerSessionRequest request; + DeleteWorkerSessionResponse response; + Status status = Status::OK(); + }; + BlockingCounter done(worker_names.size()); + std::vector workers(worker_names.size()); + + // Release the workers. + auto cleanup = gtl::MakeCleanup([this, &workers, worker_cache] { + for (auto&& worker_group : workers) { + if (worker_group.worker != nullptr) { + worker_cache->ReleaseWorker(*worker_group.name, worker_group.worker); + } + } + }); + + Status status = Status::OK(); + // Create all the workers & kick off the computations. + for (size_t i = 0; i < worker_names.size(); ++i) { + workers[i].name = &worker_names[i]; + workers[i].worker = worker_cache_->CreateWorker(worker_names[i]); + workers[i].request.set_session_handle(handle_); + } + + for (size_t i = 0; i < worker_names.size(); ++i) { + auto cb = [i, &workers, &done](const Status& s) { + workers[i].status = s; + done.DecrementCount(); + }; + workers[i].worker->DeleteWorkerSessionAsync(&workers[i].request, + &workers[i].response, cb); + } + + done.Wait(); + for (size_t i = 0; i < workers.size(); ++i) { + status.Update(workers[i].status); + } + return status; +} + Status MasterSession::ListDevices(ListDevicesResponse* resp) const { if (worker_cache_) { // This is a ClusterSpec-propagated session, and thus env_->local_devices @@ -1604,6 +1658,12 @@ Status MasterSession::Close() { ClearRunsTable(&to_unref, &partial_run_graphs_); } for (ReffedClientGraph* rcg : to_unref) rcg->Unref(); + if (should_delete_worker_sessions_) { + Status s = DeleteWorkerSessions(); + if (!s.ok()) { + LOG(WARNING) << s; + } + } return Status::OK(); } diff --git a/tensorflow/core/distributed_runtime/master_session.h b/tensorflow/core/distributed_runtime/master_session.h index eb696eb06a..4bd4e1367a 100644 --- a/tensorflow/core/distributed_runtime/master_session.h +++ b/tensorflow/core/distributed_runtime/master_session.h @@ -201,6 +201,10 @@ class MasterSession : public core::RefCounted { // workers. Status CreateWorkerSessions(const WorkerCacheFactoryOptions& server_def); + // TODO(b/36574172): Always use Create/DeleteWorkerSession. + bool should_delete_worker_sessions_ = false; + Status DeleteWorkerSessions(); + Status StartStep(const BuildGraphOptions& opts, int64* count, ReffedClientGraph** graph, bool is_partial); void ClearRunsTable(std::vector* to_unref, diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.cc b/tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.cc index 170c72deca..b3b05408b1 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.cc @@ -47,6 +47,7 @@ class GrpcRemoteWorker : public WorkerInterface { cq_(completion_queue), getstatus_(Method(GrpcWorkerMethod::kGetStatus)), createworkersession_(Method(GrpcWorkerMethod::kCreateWorkerSession)), + deleteworkersession_(Method(GrpcWorkerMethod::kDeleteWorkerSession)), registergraph_(Method(GrpcWorkerMethod::kRegisterGraph)), deregistergraph_(Method(GrpcWorkerMethod::kDeregisterGraph)), rungraph_(Method(GrpcWorkerMethod::kRunGraph)), @@ -71,6 +72,12 @@ class GrpcRemoteWorker : public WorkerInterface { IssueRequest(request, response, createworkersession_, std::move(done)); } + void DeleteWorkerSessionAsync(const DeleteWorkerSessionRequest* request, + DeleteWorkerSessionResponse* response, + StatusCallback done) override { + IssueRequest(request, response, deleteworkersession_, std::move(done)); + } + void RegisterGraphAsync(const RegisterGraphRequest* request, RegisterGraphResponse* response, StatusCallback done) override { @@ -199,6 +206,7 @@ class GrpcRemoteWorker : public WorkerInterface { const ::grpc::string getstatus_; const ::grpc::string createworkersession_; + const ::grpc::string deleteworkersession_; const ::grpc::string registergraph_; const ::grpc::string deregistergraph_; const ::grpc::string rungraph_; diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc index 4ee5ae0901..eee93ec657 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc @@ -114,6 +114,7 @@ class GrpcWorkerService : public AsyncServiceInterface { // types. ENQUEUE_REQUEST(GetStatus, false); ENQUEUE_REQUEST(CreateWorkerSession, false); + ENQUEUE_REQUEST(DeleteWorkerSession, false); ENQUEUE_REQUEST(CleanupAll, false); ENQUEUE_REQUEST(RegisterGraph, false); ENQUEUE_REQUEST(DeregisterGraph, false); @@ -192,6 +193,16 @@ class GrpcWorkerService : public AsyncServiceInterface { ENQUEUE_REQUEST(CreateWorkerSession, false); } + void DeleteWorkerSessionHandler( + WorkerCall* + call) { + Schedule([this, call]() { + Status s = worker_->DeleteWorkerSession(&call->request, &call->response); + call->SendResponse(ToGrpcStatus(s)); + }); + ENQUEUE_REQUEST(DeleteWorkerSession, false); + } + void CleanupAllHandler( WorkerCall* call) { Schedule([this, call]() { diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.cc index 348c6dc98b..05a9db10d3 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.cc @@ -32,6 +32,8 @@ const char* GrpcWorkerMethodName(GrpcWorkerMethod id) { return "/tensorflow.WorkerService/GetStatus"; case GrpcWorkerMethod::kCreateWorkerSession: return "/tensorflow.WorkerService/CreateWorkerSession"; + case GrpcWorkerMethod::kDeleteWorkerSession: + return "/tensorflow.WorkerService/DeleteWorkerSession"; case GrpcWorkerMethod::kRegisterGraph: return "/tensorflow.WorkerService/RegisterGraph"; case GrpcWorkerMethod::kDeregisterGraph: diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.h b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.h index e9862a61a3..fb23f8631f 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.h @@ -110,6 +110,7 @@ namespace tensorflow { enum class GrpcWorkerMethod { kGetStatus, kCreateWorkerSession, + kDeleteWorkerSession, kRegisterGraph, kDeregisterGraph, kRunGraph, diff --git a/tensorflow/core/distributed_runtime/worker.cc b/tensorflow/core/distributed_runtime/worker.cc index fcb1830197..8bf87923ed 100644 --- a/tensorflow/core/distributed_runtime/worker.cc +++ b/tensorflow/core/distributed_runtime/worker.cc @@ -48,6 +48,13 @@ void Worker::CreateWorkerSessionAsync(const CreateWorkerSessionRequest* request, done(s); } +void Worker::DeleteWorkerSessionAsync(const DeleteWorkerSessionRequest* request, + DeleteWorkerSessionResponse* response, + StatusCallback done) { + Status s = env_->session_mgr->DeleteSession(request->session_handle()); + done(s); +} + void Worker::RegisterGraphAsync(const RegisterGraphRequest* request, RegisterGraphResponse* response, StatusCallback done) { diff --git a/tensorflow/core/distributed_runtime/worker.h b/tensorflow/core/distributed_runtime/worker.h index 07300338c3..c62347926f 100644 --- a/tensorflow/core/distributed_runtime/worker.h +++ b/tensorflow/core/distributed_runtime/worker.h @@ -52,6 +52,10 @@ class Worker : public WorkerInterface { CreateWorkerSessionResponse* response, StatusCallback done) override; + void DeleteWorkerSessionAsync(const DeleteWorkerSessionRequest* request, + DeleteWorkerSessionResponse* response, + StatusCallback done) override; + void RegisterGraphAsync(const RegisterGraphRequest* request, RegisterGraphResponse* response, StatusCallback done) override; diff --git a/tensorflow/core/distributed_runtime/worker_interface.h b/tensorflow/core/distributed_runtime/worker_interface.h index c9db28ec67..4c58bf41a4 100644 --- a/tensorflow/core/distributed_runtime/worker_interface.h +++ b/tensorflow/core/distributed_runtime/worker_interface.h @@ -44,6 +44,10 @@ class WorkerInterface { const CreateWorkerSessionRequest* request, CreateWorkerSessionResponse* response, StatusCallback done) = 0; + virtual void DeleteWorkerSessionAsync( + const DeleteWorkerSessionRequest* request, + DeleteWorkerSessionResponse* response, StatusCallback done) = 0; + virtual void RegisterGraphAsync(const RegisterGraphRequest* request, RegisterGraphResponse* response, StatusCallback done) = 0; @@ -118,6 +122,11 @@ class WorkerInterface { return CallAndWait(&ME::CreateWorkerSessionAsync, request, response); } + Status DeleteWorkerSession(const DeleteWorkerSessionRequest* request, + DeleteWorkerSessionResponse* response) { + return CallAndWait(&ME::DeleteWorkerSessionAsync, request, response); + } + Status RegisterGraph(const RegisterGraphRequest* request, RegisterGraphResponse* response) { return CallAndWait(&ME::RegisterGraphAsync, request, response); diff --git a/tensorflow/core/protobuf/worker.proto b/tensorflow/core/protobuf/worker.proto index 34a5cff366..e7b3f36fcc 100644 --- a/tensorflow/core/protobuf/worker.proto +++ b/tensorflow/core/protobuf/worker.proto @@ -64,6 +64,22 @@ message CreateWorkerSessionRequest { message CreateWorkerSessionResponse { } +//////////////////////////////////////////////////////////////////////////////// +// +// DeleteSession method request/response messages +// +// Deletes all worker-side state associated with the given session handle. +// +//////////////////////////////////////////////////////////////////////////////// + +message DeleteWorkerSessionRequest { + // Sessions are identified by a given handle. + string session_handle = 1; +} + +message DeleteWorkerSessionResponse { +} + //////////////////////////////////////////////////////////////////////////////// // // RegisterGraph method request/response messages diff --git a/tensorflow/core/protobuf/worker_service.proto b/tensorflow/core/protobuf/worker_service.proto index 3de9e48b78..e1bfb04d7c 100644 --- a/tensorflow/core/protobuf/worker_service.proto +++ b/tensorflow/core/protobuf/worker_service.proto @@ -43,6 +43,10 @@ service WorkerService { rpc CreateWorkerSession(CreateWorkerSessionRequest) returns (CreateWorkerSessionResponse); + // See worker.proto for details. + rpc DeleteWorkerSession(DeleteWorkerSessionRequest) + returns (DeleteWorkerSessionResponse); + // See worker.proto for details. rpc RegisterGraph(RegisterGraphRequest) returns (RegisterGraphResponse); -- GitLab From 2c36ab6cd733876cb2d25696d61a936939cf606b Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 15 Nov 2017 14:09:21 -0800 Subject: [PATCH 0473/1801] Fast-path zeros and ones for backprop PiperOrigin-RevId: 175877719 --- tensorflow/python/eager/backprop.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 33601a1edc..a2a7f1c022 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -733,20 +733,28 @@ _last_shape_dtype = [None, None] _last_zero = [None] +def _fast_fill(value, shape, dtype): + return array_ops.fill(shape, constant_op.constant(value, dtype=dtype)) + + def _zeros(shape, dtype): """Wraps array_ops.zeros to cache last zero for a given shape and dtype.""" if [shape, dtype] != _last_shape_dtype: _last_shape_dtype[:] = [shape, dtype] - _last_zero[0] = array_ops.zeros(shape, dtype) + _last_zero[0] = _fast_fill(0, shape, dtype) return _last_zero[0] +def _ones(shape, dtype): + return _fast_fill(1, shape, dtype) + + _default_vspace = imperative_grad.VSpace( num_elements_fn=_num_elements, aggregate_fn=_aggregate_grads, tensor_id=ops.tensor_id, zeros=_zeros, - ones=array_ops.ones) + ones=_ones) class GradientTape(object): -- GitLab From a6626a8271123d30c9c61ba25e2fa5294ff149e5 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Wed, 15 Nov 2017 14:16:39 -0800 Subject: [PATCH 0474/1801] Switch tfe.Network to zero-based naming to match Layer variable names. The plan is that Layer names will transition to match their variable names (currently Layer names are one-based, but their variable names are zero-based). Since we can't change variable names, it's better to match those. Layers added to Networks will follow this convention now. PiperOrigin-RevId: 175878910 --- tensorflow/contrib/eager/python/network.py | 6 +- .../contrib/eager/python/network_test.py | 257 ++++++++++-------- tensorflow/python/layers/base.py | 22 +- 3 files changed, 163 insertions(+), 122 deletions(-) diff --git a/tensorflow/contrib/eager/python/network.py b/tensorflow/contrib/eager/python/network.py index 713ab1ee57..f7303cb5b4 100644 --- a/tensorflow/contrib/eager/python/network.py +++ b/tensorflow/contrib/eager/python/network.py @@ -108,7 +108,8 @@ class Network(base.Layer): for name in self._variable_scope_counts_on_init.keys() if name) self._name, self._base_name = self._make_unique_name( name_uid_map=name_uid_map, avoid_names=avoid_names, - namespace=self._default_parent_variable_scope.name) + namespace=self._default_parent_variable_scope.name, + zero_based=True) if self._first_parent is None or (self._first_parent # False = no parent and self._first_parent() is None): # Save a pointer to the parent Network so that we can later check that the @@ -258,7 +259,8 @@ class Network(base.Layer): # name, and we should respect it (subject to error checking). layer._name, layer._base_name = layer._make_unique_name( name_uid_map=self._sub_layer_name_uids, - avoid_names=self._owned_layers + avoid_names=self._owned_layers, + zero_based=True # No namespace required, since we've specified our own UID map. ) layer._first_parent = weakref.ref(self) diff --git a/tensorflow/contrib/eager/python/network_test.py b/tensorflow/contrib/eager/python/network_test.py index e66486d165..555c6e048d 100644 --- a/tensorflow/contrib/eager/python/network_test.py +++ b/tensorflow/contrib/eager/python/network_test.py @@ -126,6 +126,33 @@ class NetworkTest(test.TestCase): self.assertAllEqual(self.evaluate(net1.variables[0]), self.evaluate(net2.variables[0])) + @test_util.run_in_graph_and_eager_modes() + def testNetworkMatchesLayerVariableNames(self): + zero = constant_op.constant([[0.]]) + layer_one = core.Dense(1, use_bias=False) + layer_one(zero) + layer_two = core.Dense(1, use_bias=False) + layer_two(zero) + + class TwoLayerNet(network.Network): + + def __init__(self, name=None): + super(TwoLayerNet, self).__init__(name=name) + self.first = self.track_layer(core.Dense( + 1, use_bias=False)) + self.second = self.track_layer(core.Dense( + 1, use_bias=False)) + + def call(self, x): + return self.second(self.first(x)) + + net = TwoLayerNet() + net(zero) + self.assertEqual("two_layer_net/" + layer_one.variables[0].name, + net.first.variables[0].name) + self.assertEqual("two_layer_net/" + layer_two.variables[0].name, + net.second.variables[0].name) + @test_util.run_in_graph_and_eager_modes() def testLoadIntoUnbuiltSharedLayer(self): @@ -173,7 +200,7 @@ class NetworkTest(test.TestCase): # Re-map the variable names so that with default restore mapping we'll # attempt to restore into the unbuilt Layer. name_mapping = { - "checkpoint_creator/first_layer/kernel": "owner_1/first_layer/kernel", + "checkpoint_creator/first_layer/kernel": "owner/first_layer/kernel", "checkpoint_creator/second_layer/kernel": "second_layer/kernel", } save_path = network.save_network_checkpoint( @@ -197,10 +224,10 @@ class NetworkTest(test.TestCase): del first_owner gc.collect() def _restore_map_func(original_name): - if original_name.startswith("owner_1"): - return original_name.replace("owner_1", "owner_2") + if original_name.startswith("owner/"): + return original_name.replace("owner/", "owner_1/") else: - return "user_2/" + original_name + return "user_1/" + original_name with self.assertRaisesRegexp(ValueError, "garbage collected"): network.restore_network_checkpoint( load_into, save_path, map_func=_restore_map_func) @@ -281,7 +308,7 @@ class NetworkTest(test.TestCase): with self.assertRaisesRegexp( ValueError, "The map_func passed to save_network_checkpoint for the Network " - "'parent_1' resulted in two variables named 'foo'"): + "'parent' resulted in two variables named 'foo'"): network.save_network_checkpoint( make_checkpoint, self.get_temp_dir(), map_func=lambda n: "foo") checkpoint = network.save_network_checkpoint( @@ -294,14 +321,14 @@ class NetworkTest(test.TestCase): with self.assertRaisesRegexp( ValueError, ("The map_func passed to restore_network_checkpoint for the Network" - " 'parent_2' resulted in two variables named 'foo'")): + " 'parent_1' resulted in two variables named 'foo'")): loader(one) loader = Parent() loader(one) with self.assertRaisesRegexp( ValueError, ("The map_func passed to restore_network_checkpoint for the Network" - " 'parent_3' resulted in two variables named 'foo'")): + " 'parent_2' resulted in two variables named 'foo'")): network.restore_network_checkpoint( loader, checkpoint, map_func=lambda n: "foo") @@ -309,7 +336,7 @@ class NetworkTest(test.TestCase): def testDefaultMapCollisionErrors(self): one = constant_op.constant([[1.]]) - first = core.Dense(1, name="dense_1", use_bias=False) + first = core.Dense(1, name="dense", use_bias=False) first(one) class Parent(network.Network): @@ -330,7 +357,7 @@ class NetworkTest(test.TestCase): with self.assertRaisesRegexp( ValueError, ("The default checkpoint variable name mapping strategy for Network " - "'parent_1' resulted in a naming conflict.")): + "'parent' resulted in a naming conflict.")): network.save_network_checkpoint(make_checkpoint, self.get_temp_dir()) class Compatible(network.Network): @@ -352,7 +379,7 @@ class NetworkTest(test.TestCase): with self.assertRaisesRegexp( ValueError, ("The default checkpoint variable name mapping strategy for Network " - "'parent_2' resulted in a naming conflict.")): + "'parent_1' resulted in a naming conflict.")): network.restore_network_checkpoint(load_checkpoint, checkpoint_path) def testNoReferenceCyclesAfterCall(self): @@ -423,25 +450,25 @@ class NetworkTest(test.TestCase): # Naming happens in the order of first build rather than the order of # construction, but for clarity they're the same here and construction is # annotated. - outside_net_before = MyNetwork() # name=my_network_1 + outside_net_before = MyNetwork() # name=my_network outside_net_before(one) captured_scope = variable_scope.get_variable_scope() with variable_scope.variable_scope("outside_scope"): - net1 = MyNetwork() # name=outside_scope/my_network_1 + net1 = MyNetwork() # name=outside_scope/my_network net1(one) name_conflict1 = MyNetwork(name="name_conflict") # fine, unique so far name_conflict2 = MyNetwork(name="name_conflict") # error on build with variable_scope.variable_scope("inside_scope"): # No issue here since the name is unique within its scope. name_conflict3 = MyNetwork(name="name_conflict") - net2 = MyNetwork() # name=outside_scope/my_network_3 to avoid the - # variable_scope my_network_2 below. + net2 = MyNetwork() # name=outside_scope/my_network_2 to avoid the + # variable_scope my_network_1 below. vs_name_conflict = MyNetwork(name="vs_name_conflict") # conflict below with variable_scope.variable_scope("intervening_scope"): with variable_scope.variable_scope(captured_scope): with variable_scope.variable_scope("outside_scope"): name_conflict4 = MyNetwork(name="name_conflict") # error on build - with variable_scope.variable_scope("my_network_2"): + with variable_scope.variable_scope("my_network_1"): pass with variable_scope.variable_scope("vs_name_conflict"): pass @@ -461,35 +488,35 @@ class NetworkTest(test.TestCase): self.assertEqual("outside_scope/name_conflict", name_conflict1.name) self.assertStartsWith( - expected_start="outside_scope/name_conflict/dense_1/", + expected_start="outside_scope/name_conflict/dense/", actual=name_conflict1.variables[0].name) self.assertEqual("outside_scope/inside_scope/name_conflict", name_conflict3.name) self.assertStartsWith( - expected_start="outside_scope/inside_scope/name_conflict/dense_1/", + expected_start="outside_scope/inside_scope/name_conflict/dense/", actual=name_conflict3.variables[0].name) - self.assertEqual("outside_scope/my_network_1", net1.name) + self.assertEqual("outside_scope/my_network", net1.name) self.assertStartsWith( - expected_start="outside_scope/my_network_1/dense_1/", + expected_start="outside_scope/my_network/dense/", actual=net1.trainable_weights[0].name) - self.assertEqual("outside_scope/my_network_3", net2.name) + self.assertEqual("outside_scope/my_network_2", net2.name) self.assertStartsWith( - expected_start="outside_scope/my_network_3/dense_1/", + expected_start="outside_scope/my_network_2/dense/", actual=net2.trainable_weights[0].name) net3(one) - self.assertEqual("outside_scope/my_network_4", net3.name) + self.assertEqual("outside_scope/my_network_3", net3.name) self.assertStartsWith( - expected_start="outside_scope/my_network_4/dense_1/", + expected_start="outside_scope/my_network_3/dense/", actual=net3.trainable_weights[0].name) outside_net_after = MyNetwork() outside_net_after(one) - self.assertEqual("my_network_1", outside_net_before.name) + self.assertEqual("my_network", outside_net_before.name) self.assertStartsWith( - expected_start="my_network_1/dense_1/", + expected_start="my_network/dense/", actual=outside_net_before.trainable_weights[0].name) - self.assertEqual("my_network_2", outside_net_after.name) + self.assertEqual("my_network_1", outside_net_after.name) self.assertStartsWith( - expected_start="my_network_2/dense_1/", + expected_start="my_network_1/dense/", actual=outside_net_after.trainable_weights[0].name) @test_util.run_in_graph_and_eager_modes() @@ -499,12 +526,12 @@ class NetworkTest(test.TestCase): net = MyNetwork() net(constant_op.constant([[2.0]])) self.evaluate(net.variables[0].assign([[42.]])) - self.assertEqual(net.name, "scope1/scope2/my_network_1") + self.assertEqual(net.name, "scope1/scope2/my_network") self.assertStartsWith( - expected_start="scope1/scope2/my_network_1/dense_1/", + expected_start="scope1/scope2/my_network/dense/", actual=net.trainable_weights[0].name) save_path = network.save_network_checkpoint(net, self.get_temp_dir()) - self.assertIn("scope1_scope2_my_network_1", save_path) + self.assertIn("scope1_scope2_my_network", save_path) restore_net = MyNetwork() # Delayed restoration network.restore_network_checkpoint(restore_net, save_path) @@ -532,7 +559,7 @@ class NetworkTest(test.TestCase): one = constant_op.constant([[1.]]) net = ParentNetwork() net(one) - self.assertStartsWith(expected_start="parent_network_1/explicit_name/", + self.assertStartsWith(expected_start="parent_network/explicit_name/", actual=net.trainable_weights[0].name) self.assertEqual("explicit_name", net.first.name) @@ -587,15 +614,15 @@ class NetworkTest(test.TestCase): # locally so that previous Layer consutrciton does not interfere with # variable naming (e.g. add a Layer construction before the Network, # suddenly your previously saved checkpoint is incompatible). - self.assertEqual("dense_1", net1.l1.name) - self.assertEqual("dense_1", net2.l1.name) + self.assertEqual("dense", net1.l1.name) + self.assertEqual("dense", net2.l1.name) self.evaluate(net1.trainable_weights[0].assign([[1.]])) self.evaluate(net2.trainable_weights[0].assign([[2.]])) self.assertEqual(2., self.evaluate(net2.trainable_weights[0])) self.assertEqual(1., self.evaluate(net1.trainable_weights[0])) - self.assertStartsWith(expected_start="my_network_1/dense_1/", + self.assertStartsWith(expected_start="my_network/dense/", actual=net1.trainable_weights[0].name) - self.assertStartsWith(expected_start="my_network_2/dense_1/", + self.assertStartsWith(expected_start="my_network_1/dense/", actual=net2.trainable_weights[0].name) @test_util.run_in_graph_and_eager_modes() @@ -616,31 +643,31 @@ class NetworkTest(test.TestCase): one = constant_op.constant([[1.]]) net = ParentNetwork() net(one) - self.assertStartsWith(expected_start="parent_network_1/my_network_1/dense", + self.assertStartsWith(expected_start="parent_network/my_network/dense", actual=net.trainable_weights[0].name) - self.assertStartsWith(expected_start="parent_network_1/my_network_1/dense", + self.assertStartsWith(expected_start="parent_network/my_network/dense", actual=net.first.trainable_weights[0].name) - self.assertStartsWith(expected_start="parent_network_1/my_network_2/dense", + self.assertStartsWith(expected_start="parent_network/my_network_1/dense", actual=net.trainable_weights[1].name) - self.assertStartsWith(expected_start="parent_network_1/my_network_2/dense", + self.assertStartsWith(expected_start="parent_network/my_network_1/dense", actual=net.second.trainable_weights[0].name) - self.assertEqual("parent_network_1", net.name) - self.assertEqual("my_network_1", net.first.name) - self.assertEqual("my_network_2", net.second.name) + self.assertEqual("parent_network", net.name) + self.assertEqual("my_network", net.first.name) + self.assertEqual("my_network_1", net.second.name) net2 = ParentNetwork() net2(one) - self.assertStartsWith(expected_start="parent_network_2/my_network_1/dense", + self.assertStartsWith(expected_start="parent_network_1/my_network/dense", actual=net2.trainable_weights[0].name) - self.assertStartsWith(expected_start="parent_network_2/my_network_1/dense", + self.assertStartsWith(expected_start="parent_network_1/my_network/dense", actual=net2.first.trainable_weights[0].name) - self.assertStartsWith(expected_start="parent_network_2/my_network_2/dense", + self.assertStartsWith(expected_start="parent_network_1/my_network_1/dense", actual=net2.trainable_weights[1].name) - self.assertStartsWith(expected_start="parent_network_2/my_network_2/dense", + self.assertStartsWith(expected_start="parent_network_1/my_network_1/dense", actual=net2.second.trainable_weights[0].name) - self.assertEqual("parent_network_2", net2.name) - self.assertEqual("my_network_1", net2.first.name) - self.assertEqual("my_network_2", net2.second.name) + self.assertEqual("parent_network_1", net2.name) + self.assertEqual("my_network", net2.first.name) + self.assertEqual("my_network_1", net2.second.name) @test_util.run_in_graph_and_eager_modes() def testNestableExplicit(self): @@ -701,26 +728,26 @@ class NetworkTest(test.TestCase): one = constant_op.constant([[1.]]) net = MixedLayerNetwork() net(one) - self.assertEqual("dense_1", net.first.name) - self.assertEqual("dense_2", net.second.name) - self.assertEqual("dense_3", net.third.name) - self.assertEqual("dense_4", net.fourth.name) - self.assertEqual("dense_5", net.fifth.name) + self.assertEqual("dense", net.first.name) + self.assertEqual("dense_1", net.second.name) + self.assertEqual("dense_2", net.third.name) + self.assertEqual("dense_3", net.fourth.name) + self.assertEqual("dense_4", net.fifth.name) # Note that this is _not_ the default naming behavior for Layers. Layers # which are added to Networks follow Network variable naming conventions # (i.e. variable names = network name unless variable sharing). Nested # Layers revert to Layer behavior. - self.assertStartsWith(expected_start="mixed_layer_network_1/dense_1/", + self.assertStartsWith(expected_start="mixed_layer_network/dense/", actual=net.trainable_weights[0].name) - self.assertStartsWith(expected_start="mixed_layer_network_1/dense_2/", + self.assertStartsWith(expected_start="mixed_layer_network/dense_1/", actual=net.trainable_weights[1].name) - self.assertStartsWith(expected_start="mixed_layer_network_1/dense_3/", + self.assertStartsWith(expected_start="mixed_layer_network/dense_2/", actual=net.trainable_weights[2].name) - self.assertStartsWith(expected_start="mixed_layer_network_1/dense_4/", + self.assertStartsWith(expected_start="mixed_layer_network/dense_3/", actual=net.trainable_weights[3].name) - self.assertStartsWith(expected_start="mixed_layer_network_1/dense_5/", + self.assertStartsWith(expected_start="mixed_layer_network/dense_4/", actual=net.trainable_weights[4].name) - self.assertEqual("mixed_layer_network_1", net.name) + self.assertEqual("mixed_layer_network", net.name) @test_util.run_in_graph_and_eager_modes() def testNestableExplicitCollisions(self): @@ -773,24 +800,24 @@ class NetworkTest(test.TestCase): net = ParentNetwork() net(one) self.assertStartsWith( - expected_start="parent_network_1/first_unique_child_name/dense_1/", + expected_start="parent_network/first_unique_child_name/dense/", actual=net.trainable_weights[0].name) self.assertStartsWith( - expected_start="parent_network_1/second_unique_child_name/dense_1/", + expected_start="parent_network/second_unique_child_name/dense/", actual=net.trainable_weights[1].name) - self.assertEqual("parent_network_1", net.name) + self.assertEqual("parent_network", net.name) self.assertEqual("first_unique_child_name", net.first.name) self.assertEqual("second_unique_child_name", net.second.name) net2 = ParentNetwork() net2(one) self.assertStartsWith( - expected_start="parent_network_2/first_unique_child_name/dense", + expected_start="parent_network_1/first_unique_child_name/dense", actual=net2.trainable_weights[0].name) self.assertStartsWith( - expected_start="parent_network_2/second_unique_child_name/dense", + expected_start="parent_network_1/second_unique_child_name/dense", actual=net2.trainable_weights[1].name) - self.assertEqual("parent_network_2", net2.name) + self.assertEqual("parent_network_1", net2.name) self.assertEqual("first_unique_child_name", net2.first.name) self.assertEqual("second_unique_child_name", net2.second.name) @@ -848,15 +875,15 @@ class NetworkTest(test.TestCase): net2(one) self.assertStartsWith( - expected_start="first_parent_network_1/my_network_1/dense_1/", + expected_start="first_parent_network/my_network/dense/", actual=net2.trainable_weights[0].name) self.assertStartsWith( - expected_start="second_parent_network_1/my_network_1/dense_1/", + expected_start="second_parent_network/my_network/dense/", actual=net2.trainable_weights[1].name) - self.assertEqual("second_parent_network_1", net2.name) + self.assertEqual("second_parent_network", net2.name) self.assertTrue(net2.first is net.first) - self.assertEqual("my_network_1", net2.first.name) - self.assertEqual("my_network_1", net2.second.name) + self.assertEqual("my_network", net2.first.name) + self.assertEqual("my_network", net2.second.name) # No name collision; the owned Network is added first and has a different # name than the shared Network. @@ -874,15 +901,15 @@ class NetworkTest(test.TestCase): net3(one) self.assertStartsWith( - expected_start="third_parent_network_1/my_network_1/dense", + expected_start="third_parent_network/my_network/dense", actual=net3.trainable_weights[0].name) self.assertStartsWith( - expected_start="first_parent_network_1/my_network_2/dense", + expected_start="first_parent_network/my_network_1/dense", actual=net3.trainable_weights[1].name) - self.assertEqual("third_parent_network_1", net3.name) + self.assertEqual("third_parent_network", net3.name) self.assertTrue(net3.second is net.second) - self.assertEqual("my_network_1", net3.first.name) - self.assertEqual("my_network_2", net3.second.name) + self.assertEqual("my_network", net3.first.name) + self.assertEqual("my_network_1", net3.second.name) # "Unavoidable" same-name Layer. The owned name is added first (fixed), then # a shared Network is added with the same name. @@ -900,15 +927,15 @@ class NetworkTest(test.TestCase): net4(one) self.assertStartsWith( - expected_start="fourth_parent_network_1/my_network_1/dense_1/", + expected_start="fourth_parent_network/my_network/dense/", actual=net4.trainable_weights[0].name) self.assertStartsWith( - expected_start="first_parent_network_1/my_network_1/dense_1/", + expected_start="first_parent_network/my_network/dense/", actual=net4.trainable_weights[1].name) - self.assertEqual("fourth_parent_network_1", net4.name) + self.assertEqual("fourth_parent_network", net4.name) self.assertTrue(net4.second is net.first) - self.assertEqual("my_network_1", net4.first.name) - self.assertEqual("my_network_1", net4.second.name) + self.assertEqual("my_network", net4.first.name) + self.assertEqual("my_network", net4.second.name) @test_util.run_in_graph_and_eager_modes() def testRecursiveLayerRenaming(self): @@ -939,28 +966,28 @@ class NetworkTest(test.TestCase): net(one) self.assertStartsWith( - expected_start=("parent_network_1/network_with_layer_children_1/" - "dense_1/"), + expected_start=("parent_network/network_with_layer_children/" + "dense/"), actual=net.trainable_weights[0].name) self.assertStartsWith( - expected_start=("parent_network_1/network_with_layer_children_1/" - "dense_2/"), + expected_start=("parent_network/network_with_layer_children/" + "dense_1/"), actual=net.trainable_weights[1].name) self.assertStartsWith( - expected_start=("parent_network_1/network_with_layer_children_2/" - "dense_1/"), + expected_start=("parent_network/network_with_layer_children_1/" + "dense/"), actual=net.trainable_weights[2].name) self.assertStartsWith( - expected_start=("parent_network_1/network_with_layer_children_2/" - "dense_2/"), + expected_start=("parent_network/network_with_layer_children_1/" + "dense_1/"), actual=net.trainable_weights[3].name) - self.assertEqual("parent_network_1", net.name) - self.assertEqual("network_with_layer_children_1", net.first.name) - self.assertEqual("network_with_layer_children_2", net.second.name) - self.assertEqual("dense_1", net.first.first.name) - self.assertEqual("dense_2", net.first.second.name) - self.assertEqual("dense_1", net.second.first.name) - self.assertEqual("dense_2", net.second.second.name) + self.assertEqual("parent_network", net.name) + self.assertEqual("network_with_layer_children", net.first.name) + self.assertEqual("network_with_layer_children_1", net.second.name) + self.assertEqual("dense", net.first.first.name) + self.assertEqual("dense_1", net.first.second.name) + self.assertEqual("dense", net.second.first.name) + self.assertEqual("dense_1", net.second.second.name) @test_util.run_in_graph_and_eager_modes() def testCallInDifferentOrderThanConstruct(self): @@ -994,23 +1021,23 @@ class NetworkTest(test.TestCase): net1(one) self.assertStartsWith( - expected_start="first_network_1/my_network_1/dense_1/", + expected_start="first_network/my_network/dense/", actual=net1.trainable_weights[0].name) self.assertStartsWith( - expected_start="first_network_1/my_network_2/dense_1/", + expected_start="first_network/my_network_1/dense/", actual=net1.trainable_weights[1].name) self.assertStartsWith( - expected_start="first_network_1/my_network_1/dense_1/", + expected_start="first_network/my_network/dense/", actual=net2.trainable_weights[0].name) self.assertStartsWith( - expected_start="second_network_1/my_network_1/dense_1/", + expected_start="second_network/my_network/dense/", actual=net2.trainable_weights[1].name) self.assertTrue(net1.trainable_weights[0] is net2.trainable_weights[0]) - self.assertEqual("first_network_1", net1.name) - self.assertEqual("my_network_1", net1.first.name) - self.assertEqual("my_network_2", net1.second.name) + self.assertEqual("first_network", net1.name) + self.assertEqual("my_network", net1.first.name) + self.assertEqual("my_network_1", net1.second.name) self.assertTrue(net2.first is net1.first) - self.assertEqual("my_network_1", net2.second.name) + self.assertEqual("my_network", net2.second.name) @test_util.run_in_graph_and_eager_modes() def testLayerCallInDifferentOrderThanConstruct(self): @@ -1047,23 +1074,23 @@ class NetworkTest(test.TestCase): net1(one) self.assertStartsWith( - expected_start="first_network_1/dense_1/", + expected_start="first_network/dense/", actual=net1.trainable_weights[0].name) self.assertStartsWith( - expected_start="first_network_1/dense_2/", + expected_start="first_network/dense_1/", actual=net1.trainable_weights[1].name) self.assertStartsWith( - expected_start="first_network_1/dense_1/", + expected_start="first_network/dense/", actual=net2.trainable_weights[0].name) self.assertStartsWith( - expected_start="second_network_1/dense_1/", + expected_start="second_network/dense/", actual=net2.trainable_weights[1].name) self.assertTrue(net1.trainable_weights[0] is net2.trainable_weights[0]) - self.assertEqual("first_network_1", net1.name) - self.assertEqual("dense_1", net1.first.name) - self.assertEqual("dense_2", net1.second.name) + self.assertEqual("first_network", net1.name) + self.assertEqual("dense", net1.first.name) + self.assertEqual("dense_1", net1.second.name) self.assertTrue(net2.first is net1.first) - self.assertEqual("dense_1", net2.second.name) + self.assertEqual("dense", net2.second.name) @test_util.run_in_graph_and_eager_modes() def testLayerAlreadyBuilt(self): @@ -1092,13 +1119,13 @@ class NetworkTest(test.TestCase): # do not match their layer names. actual=net.trainable_weights[0].name) self.assertStartsWith( - expected_start="first_network_1/dense_1/", + expected_start="first_network/dense/", actual=net.trainable_weights[1].name) self.assertTrue( net.trainable_weights[0] is shared_layer.trainable_weights[0]) - self.assertEqual("first_network_1", net.name) + self.assertEqual("first_network", net.name) self.assertEqual("dense_3", net.first.name) - self.assertEqual("dense_1", net.second.name) + self.assertEqual("dense", net.second.name) class SequentialTest(test.TestCase): diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index c71e8382e9..55da959a49 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -402,10 +402,11 @@ class Layer(object): return input_shape def _make_unique_name(self, name_uid_map=None, avoid_names=None, - namespace=''): + namespace='', zero_based=False): base_name = _to_snake_case(self.__class__.__name__) name = _unique_layer_name(base_name, name_uid_map=name_uid_map, - avoid_names=avoid_names, namespace=namespace) + avoid_names=avoid_names, namespace=namespace, + zero_based=zero_based) return (name, base_name) def _set_scope(self, scope=None): @@ -2371,7 +2372,8 @@ def _get_default_graph_uid_map(): return name_uid_map -def _unique_layer_name(name, name_uid_map=None, avoid_names=None, namespace=''): +def _unique_layer_name(name, name_uid_map=None, avoid_names=None, namespace='', + zero_based=False): """Makes a layer name (or arbitrary string) unique within a TensorFlow graph. Arguments: @@ -2383,6 +2385,8 @@ def _unique_layer_name(name, name_uid_map=None, avoid_names=None, namespace=''): namespace: Gets a name which is unique within the (graph, namespace). Layers which are not Networks use a blank namespace and so get graph-global names. + zero_based: If True, name sequences start with no suffix (e.g. "dense", + "dense_1"). If False, naming is one-based ("dense_1", "dense_2"). Returns: Unique string name. @@ -2401,6 +2405,14 @@ def _unique_layer_name(name, name_uid_map=None, avoid_names=None, namespace=''): proposed_name = None while proposed_name is None or proposed_name in avoid_names: name_key = (namespace, name) - name_uid_map[name_key] += 1 - proposed_name = name + '_' + str(name_uid_map[name_key]) + if zero_based: + number = name_uid_map[name_key] + if number: + proposed_name = name + '_' + str(number) + else: + proposed_name = name + name_uid_map[name_key] += 1 + else: + name_uid_map[name_key] += 1 + proposed_name = name + '_' + str(name_uid_map[name_key]) return proposed_name -- GitLab From 9642c81ef1e25094b6e775204f5392d6cf2eb32b Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Wed, 15 Nov 2017 14:22:42 -0800 Subject: [PATCH 0475/1801] Make per-platform test disabling mechanism more flexible by allowing regular expressions. PiperOrigin-RevId: 175879989 --- tensorflow/compiler/xla/tests/build_defs.bzl | 1 + tensorflow/compiler/xla/tests/test_macros.cc | 14 +++++++------- tensorflow/compiler/xla/tests/test_macros.h | 6 ++++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tensorflow/compiler/xla/tests/build_defs.bzl b/tensorflow/compiler/xla/tests/build_defs.bzl index 36d10fff54..f594c609db 100644 --- a/tensorflow/compiler/xla/tests/build_defs.bzl +++ b/tensorflow/compiler/xla/tests/build_defs.bzl @@ -248,5 +248,6 @@ def generate_backend_test_macros(backends=[]): deps = [ "//tensorflow/compiler/xla:types", "//tensorflow/core:lib", + "//tensorflow/core:regexp_internal", "//tensorflow/core:test", ]) diff --git a/tensorflow/compiler/xla/tests/test_macros.cc b/tensorflow/compiler/xla/tests/test_macros.cc index 173fb1b000..978a669bca 100644 --- a/tensorflow/compiler/xla/tests/test_macros.cc +++ b/tensorflow/compiler/xla/tests/test_macros.cc @@ -21,12 +21,13 @@ limitations under the License. #include #include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/regexp.h" namespace xla { namespace { // Mapping from test name; i.e. MyTest.MyTestCase to platforms on which it is -// disabled. +// disabled - a sequence of regexps. using ManifestT = std::unordered_map>; ManifestT ReadManifest() { @@ -66,9 +67,6 @@ ManifestT ReadManifest() { string PrependDisabledIfIndicated(const string& test_case_name, const string& test_name) { - // TODO(leary): this code reads the manifest for every test case instantiated - // in every file. Consider switching to a singleton or using a compile-time - // genrule instead. ManifestT manifest = ReadManifest(); // First try full match: test_case_name.test_name @@ -83,11 +81,13 @@ string PrependDisabledIfIndicated(const string& test_case_name, } } + // Expect a full match vs. one of the platform regexps to disable the test. const std::vector& disabled_platforms = it->second; string platform_string = XLA_PLATFORM; - if (std::find(disabled_platforms.begin(), disabled_platforms.end(), - platform_string) != disabled_platforms.end()) { - return "DISABLED_" + test_name; + for (const auto& s : disabled_platforms) { + if (RE2::FullMatch(/*text=*/platform_string, /*re=*/s)) { + return "DISABLED_" + test_name; + } } // We didn't hit in the disabled manifest entries, so don't disable it. diff --git a/tensorflow/compiler/xla/tests/test_macros.h b/tensorflow/compiler/xla/tests/test_macros.h index bea0b5ef92..28a2d0198a 100644 --- a/tensorflow/compiler/xla/tests/test_macros.h +++ b/tensorflow/compiler/xla/tests/test_macros.h @@ -66,8 +66,10 @@ limitations under the License. namespace xla { -// Reads a disabled manifest file (and retains it as a singleton) to resolve -// whether test cases should be disabled on a particular platform. +// Reads a disabled manifest file to resolve whether test cases should be +// disabled on a particular platform. For a test that should be disabled, +// returns DISABLED_ prepended to its name; otherwise returns the test name +// unmodified. string PrependDisabledIfIndicated(const string& test_case_name, const string& test_name); -- GitLab From bf3b0a8c541bf3d1f7ccbd98375ecc3b92d1537f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Nov 2017 14:24:28 -0800 Subject: [PATCH 0476/1801] Use optimized reverse_row funciton to flip uint8 images. In the imagenet preprocessing pipeline, we can always flip the image first then resize it. It's cheaper to flip a small uint8 image. PiperOrigin-RevId: 175880318 --- tensorflow/core/kernels/reverse_op.cc | 66 ++++- tensorflow/core/kernels/reverse_op_test.cc | 320 +++++++++++++++------ 2 files changed, 280 insertions(+), 106 deletions(-) diff --git a/tensorflow/core/kernels/reverse_op.cc b/tensorflow/core/kernels/reverse_op.cc index 4f2afa5257..7ac34d1c62 100644 --- a/tensorflow/core/kernels/reverse_op.cc +++ b/tensorflow/core/kernels/reverse_op.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/type_traits.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/kernels/bounds_check.h" #include "tensorflow/core/lib/core/status.h" @@ -35,7 +36,7 @@ typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; #ifdef TENSORFLOW_USE_SYCL typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL +#endif // TENSORFLOW_USE_SYCL namespace { @@ -43,7 +44,7 @@ namespace { // NUM_CHANNELS can be <= 0 to compute it dynamically from // Otherwise, it must equal input.dim_size(2) and is used as a compile-time // constant. -template +template void ReverseRows(OpKernelContext* context, const Tensor& input, Tensor* result) { auto work = [&input, result](int64 start, int64 end) { @@ -53,8 +54,8 @@ void ReverseRows(OpKernelContext* context, const Tensor& input, const int64 row_size = inner_size * middle_size; DCHECK_EQ(input.dim_size(2), inner_size); - const int32* in_ptr = input.bit_casted_tensor().data(); - int32* out_ptr = result->bit_casted_tensor().data(); + const T* in_ptr = input.bit_casted_tensor().data(); + T* out_ptr = result->bit_casted_tensor().data(); in_ptr += start * row_size; out_ptr += start * row_size; @@ -64,7 +65,7 @@ void ReverseRows(OpKernelContext* context, const Tensor& input, int remaining = middle_size; while (remaining > 0) { out_ptr -= inner_size; - memcpy(out_ptr, in_ptr, inner_size * sizeof(float)); + memcpy(out_ptr, in_ptr, inner_size * sizeof(T)); in_ptr += inner_size; --remaining; } @@ -81,6 +82,48 @@ void ReverseRows(OpKernelContext* context, const Tensor& input, std::move(work)); } +template +struct data_type_can_memcpy { + static constexpr bool value = + std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value; +}; + +template +typename std::enable_if::value>::type +DoHandleReverseCase(OpKernelContext* context, const Tensor& input, + Tensor* result) { + if (sizeof(T) == 1) { + static_assert(sizeof(uint8) == 1, "uint8 must be 1 byte."); + ReverseRows(context, input, result); + } else if (sizeof(T) == 2) { + static_assert(sizeof(uint16) == 2, "uint16 must be 2 bytes"); + ReverseRows(context, input, result); + } else if (sizeof(T) == 4) { + static_assert(sizeof(uint32) == 4, "uint32 must be 4 bytes"); + ReverseRows(context, input, result); + } else if (sizeof(T) == 8) { + static_assert(sizeof(uint64) == 8, "uint64 must be 8 bytes"); + ReverseRows(context, input, result); + } else if (sizeof(T) == 16) { + static_assert(sizeof(complex128) == 16, "complex128 must be 16 bytes"); + ReverseRows(context, input, result); + } else { + context->CtxFailure( + errors::InvalidArgument("%s has unexpected size of %d bytes", + DataTypeString(input.dtype()), sizeof(T))); + } +} + +template +typename std::enable_if::value>::type +DoHandleReverseCase(OpKernelContext* context, const Tensor& input, + Tensor* result) {} + } // namespace template @@ -91,15 +134,14 @@ void HandleReverseCase(OpKernelContext* context, // Use optimized reverse if possible. if (NDIMS == 3 && std::is_same::value && - std::is_same::value && (!dims(0) && dims(1) && !dims(2))) { + data_type_can_memcpy::value && (!dims(0) && dims(1) && !dims(2))) { if (input.dim_size(2) == 3) { - ReverseRows<3>(context, input, result); + DoHandleReverseCase(context, input, result); } else { - ReverseRows<-1>(context, input, result); + DoHandleReverseCase(context, input, result); } return; } - typename Eigen::array axes_di; for (int i = 0; i < NDIMS; i++) { axes_di[i] = dims(i); @@ -168,11 +210,11 @@ void HandleReverseV2Case(OpKernelContext* context, // Use optimized reverse if possible. if (NDIMS == 3 && std::is_same::value && - std::is_same::value && (!axes[0] && axes[1] && !axes[2])) { + data_type_can_memcpy::value && (!axes[0] && axes[1] && !axes[2])) { if (input.dim_size(2) == 3) { - ReverseRows<3>(context, input, result); + DoHandleReverseCase(context, input, result); } else { - ReverseRows<-1>(context, input, result); + DoHandleReverseCase(context, input, result); } return; } diff --git a/tensorflow/core/kernels/reverse_op_test.cc b/tensorflow/core/kernels/reverse_op_test.cc index 9829e40fe8..e8285fb0e2 100644 --- a/tensorflow/core/kernels/reverse_op_test.cc +++ b/tensorflow/core/kernels/reverse_op_test.cc @@ -46,69 +46,132 @@ class ReverseOpTest : public OpsTestBase { .Finalize(node_def())); TF_ASSERT_OK(InitOp()); } + + template + void Reverse_0() { + MakeOp(DataTypeToEnum::value); + AddInputFromArray(TensorShape({}), {3}); + AddInputFromArray(TensorShape({}), {true}); + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output = GetOutput(0); + Tensor expected(allocator(), DataTypeToEnum::value, TensorShape({})); + expected.scalar() = expected.scalar().constant(3); + test::ExpectTensorEqual(expected, *output); + } + + template + void Reverse_234() { + MakeOp(DataTypeToEnum::value); + // Feed and run + // [[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]] + // [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]] + AddInputFromArray(TensorShape({2, 3, 4}), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}); + AddInputFromArray(TensorShape({3}), {true, false, true}); + + TF_ASSERT_OK(RunOpKernel()); + + // Check the new state of the input + Tensor* params_tensor = GetOutput(0); + Tensor expected(allocator(), DataTypeToEnum::value, + TensorShape({2, 3, 4})); + // Should become + // [[[15, 14, 13, 12], [19, 18, 17, 16], [23, 22, 21, 20]] + // [[3, 2, 1, 0], [7, 6, 5, 4], [11, 10, 9, 8]]] + test::FillValues(&expected, + {15, 14, 13, 12, 19, 18, 17, 16, 23, 22, 21, 20, + 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8}); + test::ExpectTensorEqual(expected, *params_tensor); + } + + template + void Reverse_1234() { + MakeOp(DataTypeToEnum::value); + // Feed and run + // [[[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]] + // [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]]] + AddInputFromArray(TensorShape({1, 2, 3, 4}), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}); + AddInputFromArray(TensorShape({4}), {true, true, false, true}); + + TF_ASSERT_OK(RunOpKernel()); + + // Check the new state of the input + Tensor* params_tensor = GetOutput(0); + Tensor expected(allocator(), DataTypeToEnum::value, + TensorShape({1, 2, 3, 4})); + // Should become + // [[[[15, 14, 13, 12], [19, 18, 17, 16], [23, 22, 21, 20]] + // [[3, 2, 1, 0], [7, 6, 5, 4], [11, 10, 9, 8]]]] + test::FillValues(&expected, + {15, 14, 13, 12, 19, 18, 17, 16, 23, 22, 21, 20, + 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8}); + test::ExpectTensorEqual(expected, *params_tensor); + } }; -TEST_F(ReverseOpTest, Reverse_0) { - MakeOp(DT_FLOAT); - AddInputFromArray(TensorShape({}), {3}); - AddInputFromArray(TensorShape({}), {true}); - TF_ASSERT_OK(RunOpKernel()); +TEST_F(ReverseOpTest, Reverse_0_uint8) { Reverse_0(); } - Tensor* output = GetOutput(0); - Tensor expected(allocator(), DT_FLOAT, TensorShape({})); - expected.scalar() = expected.scalar().constant(3.f); - test::ExpectTensorEqual(expected, *output); -} +TEST_F(ReverseOpTest, Reverse_0_int8) { Reverse_0(); } -TEST_F(ReverseOpTest, Reverse_234) { - MakeOp(DT_FLOAT); - - // Feed and run - // [[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]] - // [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]] - AddInputFromArray(TensorShape({2, 3, 4}), - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23}); - AddInputFromArray(TensorShape({3}), {true, false, true}); - - TF_ASSERT_OK(RunOpKernel()); - - // Check the new state of the input - Tensor* params_tensor = GetOutput(0); - Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3, 4})); - // Should become - // [[[15, 14, 13, 12], [19, 18, 17, 16], [23, 22, 21, 20]] - // [[3, 2, 1, 0], [7, 6, 5, 4], [11, 10, 9, 8]]] - test::FillValues( - &expected, {15, 14, 13, 12, 19, 18, 17, 16, 23, 22, 21, 20, 3, 2, 1, 0, 7, - 6, 5, 4, 11, 10, 9, 8}); - test::ExpectTensorEqual(expected, *params_tensor); -} +TEST_F(ReverseOpTest, Reverse_0_uint16) { Reverse_0(); } -TEST_F(ReverseOpTest, Reverse_1234) { - MakeOp(DT_FLOAT); - - // Feed and run - // [[[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]] - // [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]]] - AddInputFromArray(TensorShape({1, 2, 3, 4}), - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23}); - AddInputFromArray(TensorShape({4}), {true, true, false, true}); - - TF_ASSERT_OK(RunOpKernel()); - - // Check the new state of the input - Tensor* params_tensor = GetOutput(0); - Tensor expected(allocator(), DT_FLOAT, TensorShape({1, 2, 3, 4})); - // Should become - // [[[[15, 14, 13, 12], [19, 18, 17, 16], [23, 22, 21, 20]] - // [[3, 2, 1, 0], [7, 6, 5, 4], [11, 10, 9, 8]]]] - test::FillValues( - &expected, {15, 14, 13, 12, 19, 18, 17, 16, 23, 22, 21, 20, 3, 2, 1, 0, 7, - 6, 5, 4, 11, 10, 9, 8}); - test::ExpectTensorEqual(expected, *params_tensor); -} +TEST_F(ReverseOpTest, Reverse_0_int16) { Reverse_0(); } + +TEST_F(ReverseOpTest, Reverse_0_float) { Reverse_0(); } + +TEST_F(ReverseOpTest, Reverse_0_int32) { Reverse_0(); } + +TEST_F(ReverseOpTest, Reverse_0_int64) { Reverse_0(); } + +TEST_F(ReverseOpTest, Reverse_0_double) { Reverse_0(); } + +TEST_F(ReverseOpTest, Reverse_0_complex64) { Reverse_0(); } + +TEST_F(ReverseOpTest, Reverse_0_complex128) { Reverse_0(); } + +TEST_F(ReverseOpTest, Reverse_234_uint8) { Reverse_234(); } + +TEST_F(ReverseOpTest, Reverse_234_int8) { Reverse_234(); } + +TEST_F(ReverseOpTest, Reverse_234_uint16) { Reverse_234(); } + +TEST_F(ReverseOpTest, Reverse_234_int16) { Reverse_234(); } + +TEST_F(ReverseOpTest, Reverse_234_float) { Reverse_234(); } + +TEST_F(ReverseOpTest, Reverse_234_int32) { Reverse_234(); } + +TEST_F(ReverseOpTest, Reverse_234_int64) { Reverse_234(); } + +TEST_F(ReverseOpTest, Reverse_234_double) { Reverse_234(); } + +TEST_F(ReverseOpTest, Reverse_234_complex64) { Reverse_234(); } + +TEST_F(ReverseOpTest, Reverse_234_complex128) { Reverse_234(); } + +TEST_F(ReverseOpTest, Reverse_1234_uint8) { Reverse_1234(); } + +TEST_F(ReverseOpTest, Reverse_1234_int8) { Reverse_1234(); } + +TEST_F(ReverseOpTest, Reverse_1234_uint16) { Reverse_1234(); } + +TEST_F(ReverseOpTest, Reverse_1234_int16) { Reverse_1234(); } + +TEST_F(ReverseOpTest, Reverse_1234_float) { Reverse_1234(); } + +TEST_F(ReverseOpTest, Reverse_1234_int32) { Reverse_1234(); } + +TEST_F(ReverseOpTest, Reverse_1234_int64) { Reverse_1234(); } + +TEST_F(ReverseOpTest, Reverse_1234_double) { Reverse_1234(); } + +TEST_F(ReverseOpTest, Reverse_1234_complex64) { Reverse_1234(); } + +TEST_F(ReverseOpTest, Reverse_1234_complex128) { Reverse_1234(); } static SessionOptions GetOptions(int intra_threads) { SessionOptions opts; @@ -119,10 +182,11 @@ static SessionOptions GetOptions(int intra_threads) { // Creates a Graph which "reduce"s a 3D float tensor of "num" elements // into a scalar. +template static Graph* Reverse(const TensorShape& shape, int reverse_axis) { Graph* g = new Graph(OpRegistry::Global()); - Tensor data(DT_FLOAT, shape); - data.flat().setRandom(); + Tensor data(DataTypeToEnum::value, shape); + data.flat().setRandom(); Tensor axes(DT_INT32, TensorShape({1})); axes.flat()(0) = reverse_axis; test::graph::Reverse(g, test::graph::Constant(g, data), @@ -130,81 +194,149 @@ static Graph* Reverse(const TensorShape& shape, int reverse_axis) { return g; } +template static void RunReverseRowsBenchmark(int iters, int outer_dim, int middle_dim, int intra_threads, int channels) { SessionOptions opts = GetOptions(intra_threads); TensorShape shape{outer_dim, middle_dim, channels}; const int64 num_items = static_cast(iters) * shape.num_elements(); testing::ItemsProcessed(num_items); - testing::BytesProcessed(num_items * sizeof(float)); + testing::BytesProcessed(num_items * sizeof(T)); testing::UseRealTime(); - test::Benchmark("cpu", Reverse(shape, 1), &opts).Run(iters); + test::Benchmark("cpu", Reverse(shape, 1), &opts).Run(iters); } -static void BM_ReverseRowsOf1Channel_1T(int iters, int outer_dim, - int middle_dim) { - RunReverseRowsBenchmark(iters, outer_dim, middle_dim, 1 /* intra_threads */, - 1 /* channels */); +static void BM_ReverseRowsOf1Channel_1T_float(int iters, int outer_dim, + int middle_dim) { + RunReverseRowsBenchmark(iters, outer_dim, middle_dim, + 1 /* intra_threads */, 1 /* channels */); } -BENCHMARK(BM_ReverseRowsOf1Channel_1T) +BENCHMARK(BM_ReverseRowsOf1Channel_1T_float) ->ArgPair(288, 288) ->ArgPair(1024, 1024) ->ArgPair(10 * 1024, 1024); -static void BM_ReverseRowsOf1Channel_4T(int iters, int outer_dim, - int middle_dim) { - RunReverseRowsBenchmark(iters, outer_dim, middle_dim, 4 /* intra_threads */, - 1 /* channels */); +static void BM_ReverseRowsOf1Channel_1T_uint8(int iters, int outer_dim, + int middle_dim) { + RunReverseRowsBenchmark(iters, outer_dim, middle_dim, + 1 /* intra_threads */, 1 /* channels */); } -BENCHMARK(BM_ReverseRowsOf1Channel_4T) +BENCHMARK(BM_ReverseRowsOf1Channel_1T_uint8) + ->ArgPair(288, 288) + ->ArgPair(1024, 1024) + ->ArgPair(10 * 1024, 1024); + +static void BM_ReverseRowsOf1Channel_4T_float(int iters, int outer_dim, + int middle_dim) { + RunReverseRowsBenchmark(iters, outer_dim, middle_dim, + 4 /* intra_threads */, 1 /* channels */); +} + +BENCHMARK(BM_ReverseRowsOf1Channel_4T_float) + ->ArgPair(288, 288) + ->ArgPair(1024, 1024) + ->ArgPair(10 * 1024, 1024); + +static void BM_ReverseRowsOf1Channel_4T_uint8(int iters, int outer_dim, + int middle_dim) { + RunReverseRowsBenchmark(iters, outer_dim, middle_dim, + 4 /* intra_threads */, 1 /* channels */); +} + +BENCHMARK(BM_ReverseRowsOf1Channel_4T_uint8) + ->ArgPair(288, 288) + ->ArgPair(1024, 1024) + ->ArgPair(10 * 1024, 1024); + +static void BM_ReverseRowsOf3Channels_1T_float(int iters, int outer_dim, + int middle_dim) { + RunReverseRowsBenchmark(iters, outer_dim, middle_dim, + 1 /* intra_threads */, 3 /* channels */); +} + +BENCHMARK(BM_ReverseRowsOf3Channels_1T_float) + ->ArgPair(288, 288) + ->ArgPair(30, 30) + ->ArgPair(1024, 1024) + ->ArgPair(10 * 1024, 1024); + +static void BM_ReverseRowsOf3Channels_1T_uint8(int iters, int outer_dim, + int middle_dim) { + RunReverseRowsBenchmark(iters, outer_dim, middle_dim, + 1 /* intra_threads */, 3 /* channels */); +} + +BENCHMARK(BM_ReverseRowsOf3Channels_1T_uint8) + ->ArgPair(288, 288) + ->ArgPair(30, 30) + ->ArgPair(1024, 1024) + ->ArgPair(10 * 1024, 1024); + +static void BM_ReverseRowsOf3Channels_4T_float(int iters, int outer_dim, + int middle_dim) { + RunReverseRowsBenchmark(iters, outer_dim, middle_dim, + 4 /* intra_threads */, 3 /* channels */); +} + +BENCHMARK(BM_ReverseRowsOf3Channels_4T_float) + ->ArgPair(288, 288) + ->ArgPair(30, 30) + ->ArgPair(1024, 1024) + ->ArgPair(10 * 1024, 1024); + +static void BM_ReverseRowsOf3Channels_4T_uint8(int iters, int outer_dim, + int middle_dim) { + RunReverseRowsBenchmark(iters, outer_dim, middle_dim, + 4 /* intra_threads */, 3 /* channels */); +} +BENCHMARK(BM_ReverseRowsOf3Channels_4T_uint8) ->ArgPair(288, 288) + ->ArgPair(30, 30) ->ArgPair(1024, 1024) ->ArgPair(10 * 1024, 1024); -static void BM_ReverseRowsOf3Channels_1T(int iters, int outer_dim, - int middle_dim) { - RunReverseRowsBenchmark(iters, outer_dim, middle_dim, 1 /* intra_threads */, - 3 /* channels */); +static void BM_ReverseRowsOf4Channels_1T_float(int iters, int outer_dim, + int middle_dim) { + RunReverseRowsBenchmark(iters, outer_dim, middle_dim, + 1 /* intra_threads */, 4 /* channels */); } -BENCHMARK(BM_ReverseRowsOf3Channels_1T) +BENCHMARK(BM_ReverseRowsOf4Channels_1T_float) ->ArgPair(288, 288) - ->ArgPair(224, 224) ->ArgPair(1024, 1024) ->ArgPair(10 * 1024, 1024); -static void BM_ReverseRowsOf3Channels_4T(int iters, int outer_dim, - int middle_dim) { - RunReverseRowsBenchmark(iters, outer_dim, middle_dim, 4 /* intra_threads */, - 3 /* channels */); +static void BM_ReverseRowsOf4Channels_1T_uint8(int iters, int outer_dim, + int middle_dim) { + RunReverseRowsBenchmark(iters, outer_dim, middle_dim, + 1 /* intra_threads */, 4 /* channels */); } -BENCHMARK(BM_ReverseRowsOf3Channels_4T) +BENCHMARK(BM_ReverseRowsOf4Channels_1T_uint8) ->ArgPair(288, 288) - ->ArgPair(224, 224) ->ArgPair(1024, 1024) ->ArgPair(10 * 1024, 1024); -static void BM_ReverseRowsOf4Channels_1T(int iters, int outer_dim, - int middle_dim) { - RunReverseRowsBenchmark(iters, outer_dim, middle_dim, 1 /* intra_threads */, - 4 /* channels */); +static void BM_ReverseRowsOf4Channels_4T_float(int iters, int outer_dim, + int middle_dim) { + RunReverseRowsBenchmark(iters, outer_dim, middle_dim, + 4 /* intra_threads */, 4 /* channels */); } -BENCHMARK(BM_ReverseRowsOf4Channels_1T) +BENCHMARK(BM_ReverseRowsOf4Channels_4T_float) ->ArgPair(288, 288) ->ArgPair(1024, 1024) ->ArgPair(10 * 1024, 1024); -static void BM_ReverseRowsOf4Channels_4T(int iters, int outer_dim, - int middle_dim) { - RunReverseRowsBenchmark(iters, outer_dim, middle_dim, 4 /* intra_threads */, - 4 /* channels */); +static void BM_ReverseRowsOf4Channels_4T_uint8(int iters, int outer_dim, + int middle_dim) { + RunReverseRowsBenchmark(iters, outer_dim, middle_dim, + 4 /* intra_threads */, 4 /* channels */); } -BENCHMARK(BM_ReverseRowsOf4Channels_4T) +BENCHMARK(BM_ReverseRowsOf4Channels_4T_uint8) ->ArgPair(288, 288) ->ArgPair(1024, 1024) ->ArgPair(10 * 1024, 1024); -- GitLab From 459153ab91ede37a3175a4dee5aa3f38690d3ebb Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Wed, 15 Nov 2017 14:36:49 -0800 Subject: [PATCH 0477/1801] Improve ptxas-related error handling. * Don't crash on cubin file cleanup as the file may not have been created. * Log only one error message if ptxas is not found. PiperOrigin-RevId: 175882482 --- .../compiler/xla/service/gpu/gpu_compiler.cc | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 6a0eacc66a..23fb308ec6 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/gpu_compiler.h" #include +#include #include #include @@ -258,7 +259,9 @@ StatusOr> CompilePtx(const string& ptx, int cc_major, return InternalError("couldn't get temp CUBIN file name"); } auto cubin_cleaner = tensorflow::gtl::MakeCleanup([&cubin_path] { - TF_CHECK_OK(tensorflow::Env::Default()->DeleteFile(cubin_path)); + // CUBIN file may never be created, so the failure to delete it should not + // produce TF error. + tensorflow::Env::Default()->DeleteFile(cubin_path).IgnoreError(); }); tensorflow::SubProcess ptxas_info_dumper; std::vector ptxas_args = {ptxas_path, ptx_path, "-o", cubin_path, @@ -500,10 +503,24 @@ std::vector GpuCompiler::CompilePtxOrGetCachedResult(const string& ptx, VLOG(2) << "Compiled PTX size:" << ptx.size() << " CUBIN size: " << cache_value->cubin_data.size(); } else { - LOG(WARNING) - << "Failed to compile ptx to cubin. Will attempt to let " - "GPU driver compile the ptx. " - << maybe_cubin.status(); + bool log_warning = true; + if (maybe_cubin.status().code() == + tensorflow::error::Code::NOT_FOUND) { + // Missing ptxas is expected in some environments where CUDA SDK + // binaries are not available. We don't want to spam logs with + // identical warnings in this case. + + // TODO(zhengxq): we should implement a LOG_FIRST_N and LOG_EVERY_N + // for more general usage. + static std::atomic warning_done(false); + log_warning = !warning_done.exchange(true); + } + if (log_warning) { + LOG(WARNING) + << "Failed to compile ptx to cubin. Will attempt to let " + "GPU driver compile the ptx. " + << maybe_cubin.status(); + } } } cache_value->compilation_done = true; -- GitLab From 4634ee62ed5628ac8a1962f9172907f4b7289710 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Nov 2017 16:02:29 -0800 Subject: [PATCH 0478/1801] Merged commit includes the following changes: 175895735 by gunan: Move IODataType to a new types.proto in preparation for using it in ModelFlags -- 175893178 by fchollet: Fix multiprocessing issue in keras.utils.data_utils. -- 175891377 by jart: Use redundant download URLs and fix protobuf def I uncommented all the GitHub URLs that turned out to have consistent SHAs. The ones that didn't, I left them commented. We can't re-mirror those URLs because doing so would cause past revisions of the repository to become broken. So workspace.bzl is going to have to evolve back to not having these comments, over time, as we continue to upgrade stuff. -- 175889431 by apassos: Fixed order for EagerVariableStore. -- 175882680 by A. Unique TensorFlower: Move IODataType to a new types.proto in preparation for using it in ModelFlags -- PiperOrigin-RevId: 175895735 --- .../keras/_impl/keras/utils/data_utils.py | 6 +- tensorflow/python/ops/variable_scope.py | 5 +- tensorflow/workspace.bzl | 78 ++++++++++--------- 3 files changed, 48 insertions(+), 41 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/utils/data_utils.py b/tensorflow/python/keras/_impl/keras/utils/data_utils.py index 4f335af62e..1f2e9ac440 100644 --- a/tensorflow/python/keras/_impl/keras/utils/data_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/data_utils.py @@ -381,7 +381,7 @@ class Sequence(object): # Global variables to be shared across processes _SHARED_SEQUENCES = {} # We use a Value to provide unique id to different processes. -_SEQUENCE_COUNTER = multiprocessing.Value('i', 0) +_SEQUENCE_COUNTER = None def get_index(uid, i): @@ -477,6 +477,10 @@ class OrderedEnqueuer(SequenceEnqueuer): self.sequence = sequence # Doing Multiprocessing.Value += x is not process-safe. + global _SEQUENCE_COUNTER + if _SEQUENCE_COUNTER is None: + _SEQUENCE_COUNTER = multiprocessing.Value('i', 0) + with _SEQUENCE_COUNTER.get_lock(): self.uid = _SEQUENCE_COUNTER.value _SEQUENCE_COUNTER.value += 1 diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index 9a0ff75594..2cdf585503 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -1225,11 +1225,12 @@ class EagerVariableStore(object): return with_variable_store(self._store) def variables(self): - return self._store._vars.values() # pylint: disable=protected-access + return sorted(self._store._vars.values(), key=lambda x: x.name) # pylint: disable=protected-access def trainable_variables(self): # pylint: disable=protected-access - return [x for x in self._store._vars.values() if x._trainable] + return sorted([x for x in self._store._vars.values() if x._trainable], + key=lambda x: x.name) # pylint: enable=protected-access diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 19e1deb95d..cd4ea8a7d0 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -152,7 +152,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "mkl", urls = [ "https://mirror.bazel.build/github.com/01org/mkl-dnn/releases/download/v0.9/mklml_lnx_2018.0.20170720.tgz", - # "https://github.com/01org/mkl-dnn/releases/download/v0.9/mklml_lnx_2018.0.20170720.tgz", + "https://github.com/01org/mkl-dnn/releases/download/v0.9/mklml_lnx_2018.0.20170720.tgz", ], sha256 = "57ba56c4c243f403ff78f417ff854ef50b9eddf4a610a917b7c95e7fa8553a4b", strip_prefix = "mklml_lnx_2018.0.20170720", @@ -211,7 +211,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "libxsmm_archive", urls = [ "https://mirror.bazel.build/github.com/hfp/libxsmm/archive/1.8.1.tar.gz", - # "https://github.com/hfp/libxsmm/archive/1.8.1.tar.gz", + "https://github.com/hfp/libxsmm/archive/1.8.1.tar.gz", ], sha256 = "2ade869c3f42f23b5263c7d594aa3c7e5e61ac6a3afcaf5d6e42899d2a7986ce", strip_prefix = "libxsmm-1.8.1", @@ -238,7 +238,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "com_googlesource_code_re2", urls = [ "https://mirror.bazel.build/github.com/google/re2/archive/b94b7cd42e9f02673cd748c1ac1d16db4052514c.tar.gz", - # "https://github.com/google/re2/archive/b94b7cd42e9f02673cd748c1ac1d16db4052514c.tar.gz", + "https://github.com/google/re2/archive/b94b7cd42e9f02673cd748c1ac1d16db4052514c.tar.gz", ], sha256 = "bd63550101e056427c9e7ff12a408c1c8b74e9803f393ca916b2926fc2c4906f", strip_prefix = "re2-b94b7cd42e9f02673cd748c1ac1d16db4052514c", @@ -247,8 +247,8 @@ def tf_workspace(path_prefix="", tf_repo_name=""): native.http_archive( name = "gemmlowp", urls = [ - "https://mirror.bazel.build/github.com/google/gemmlowp/archive/010bb3e71a26ca1d0884a167081d092b43563996.zip" - # "https://github.com/google/gemmlowp/archive/010bb3e71a26ca1d0884a167081d092b43563996.zip", + "https://mirror.bazel.build/github.com/google/gemmlowp/archive/010bb3e71a26ca1d0884a167081d092b43563996.zip", + "https://github.com/google/gemmlowp/archive/010bb3e71a26ca1d0884a167081d092b43563996.zip", ], sha256 = "dd2557072bde12141419cb8320a9c25e6ec41a8ae53c2ac78c076a347bb46d9d", strip_prefix = "gemmlowp-010bb3e71a26ca1d0884a167081d092b43563996", @@ -258,7 +258,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "farmhash_archive", urls = [ "https://mirror.bazel.build/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz", - # "https://github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz", + "https://github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz", ], sha256 = "6560547c63e4af82b0f202cb710ceabb3f21347a4b996db565a411da5b17aba0", strip_prefix = "farmhash-816a4ae622e964763ca0862d9dbd19324a1eaf45", @@ -274,7 +274,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "highwayhash", urls = [ "https://mirror.bazel.build/github.com/google/highwayhash/archive/dfcb97ca4fe9277bf9dc1802dd979b071896453b.tar.gz", - # "https://github.com/google/highwayhash/archive/dfcb97ca4fe9277bf9dc1802dd979b071896453b.tar.gz", + "https://github.com/google/highwayhash/archive/dfcb97ca4fe9277bf9dc1802dd979b071896453b.tar.gz", ], sha256 = "0f30a15b1566d93f146c8d149878a06e91d9bb7ec2cfd76906df62a82be4aac9", strip_prefix = "highwayhash-dfcb97ca4fe9277bf9dc1802dd979b071896453b", @@ -296,7 +296,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "jpeg", urls = [ "https://mirror.bazel.build/github.com/libjpeg-turbo/libjpeg-turbo/archive/1.5.1.tar.gz", - # "https://github.com/libjpeg-turbo/libjpeg-turbo/archive/1.5.1.tar.gz", + "https://github.com/libjpeg-turbo/libjpeg-turbo/archive/1.5.1.tar.gz", ], sha256 = "c15a9607892113946379ccea3ca8b85018301b200754f209453ab21674268e77", strip_prefix = "libjpeg-turbo-1.5.1", @@ -308,7 +308,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "png_archive", urls = [ "https://mirror.bazel.build/github.com/glennrp/libpng/archive/v1.2.53.tar.gz", - # "https://github.com/glennrp/libpng/archive/v1.2.53.tar.gz", + "https://github.com/glennrp/libpng/archive/v1.2.53.tar.gz", ], sha256 = "716c59c7dfc808a4c368f8ada526932be72b2fcea11dd85dc9d88b1df1dfe9c2", strip_prefix = "libpng-1.2.53", @@ -351,6 +351,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): native.http_archive( name = "absl_py", urls = [ + "https://mirror.bazel.build/github.com/abseil/abseil-py/archive/231e3870b976c1dc61dce1749138661d21556028.tar.gz", "https://github.com/abseil/abseil-py/archive/231e3870b976c1dc61dce1749138661d21556028.tar.gz", ], sha256 = "8ea2b23bfdb9ae7622f3e5d95236bc600c8d8509a2f38c84732b3145585d4f73", @@ -372,7 +373,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "com_github_andreif_codegen", urls = [ "https://mirror.bazel.build/github.com/andreif/codegen/archive/1.0.tar.gz", - # "https://github.com/andreif/codegen/archive/1.0.tar.gz", + "https://github.com/andreif/codegen/archive/1.0.tar.gz", ], sha256 = "2dadd04a2802de27e0fe5a19b76538f6da9d39ff244036afa00c1bba754de5ee", strip_prefix = "codegen-1.0", @@ -395,12 +396,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): actual = "@six_archive//:six", ) - # TODO(gunan): Add github mirror back if/when sha256sum issues are resolved. - # See https://github.com/libgit2/libgit2/issues/4343 for contetxt. patched_http_archive( name = "protobuf_archive", urls = [ "https://mirror.bazel.build/github.com/google/protobuf/archive/b04e5cba356212e4e8c66c61bbe0c3a20537c5b9.tar.gz", + "https://github.com/google/protobuf/archive/b04e5cba356212e4e8c66c61bbe0c3a20537c5b9.tar.gz", ], sha256 = "e178a25c52efcb6b05988bdbeace4c0d3f2d2fe5b46696d1d9898875c3803d6a", strip_prefix = "protobuf-b04e5cba356212e4e8c66c61bbe0c3a20537c5b9", @@ -424,31 +424,31 @@ def tf_workspace(path_prefix="", tf_repo_name=""): # We need to import the protobuf library under the names com_google_protobuf # and com_google_protobuf_cc to enable proto_library support in bazel. # Unfortunately there is no way to alias http_archives at the moment. - # TODO(gunan): Add github mirror back if/when sha256sum issues are resolved. native.http_archive( name = "com_google_protobuf", urls = [ - "https://mirror.bazel.build/github.com/google/protobuf/archive/0b059a3d8a8f8aa40dde7bea55edca4ec5dfea66.tar.gz", + "https://mirror.bazel.build/github.com/google/protobuf/archive/b04e5cba356212e4e8c66c61bbe0c3a20537c5b9.tar.gz", + "https://github.com/google/protobuf/archive/b04e5cba356212e4e8c66c61bbe0c3a20537c5b9.tar.gz", ], - sha256 = "6d43b9d223ce09e5d4ce8b0060cb8a7513577a35a64c7e3dad10f0703bf3ad93", - strip_prefix = "protobuf-0b059a3d8a8f8aa40dde7bea55edca4ec5dfea66", + sha256 = "e178a25c52efcb6b05988bdbeace4c0d3f2d2fe5b46696d1d9898875c3803d6a", + strip_prefix = "protobuf-b04e5cba356212e4e8c66c61bbe0c3a20537c5b9", ) - # TODO(gunan): Add github mirror back if/when sha256sum issues are resolved. native.http_archive( name = "com_google_protobuf_cc", urls = [ - "https://mirror.bazel.build/github.com/google/protobuf/archive/0b059a3d8a8f8aa40dde7bea55edca4ec5dfea66.tar.gz", + "https://mirror.bazel.build/github.com/google/protobuf/archive/b04e5cba356212e4e8c66c61bbe0c3a20537c5b9.tar.gz", + "https://github.com/google/protobuf/archive/b04e5cba356212e4e8c66c61bbe0c3a20537c5b9.tar.gz", ], - sha256 = "6d43b9d223ce09e5d4ce8b0060cb8a7513577a35a64c7e3dad10f0703bf3ad93", - strip_prefix = "protobuf-0b059a3d8a8f8aa40dde7bea55edca4ec5dfea66", + sha256 = "e178a25c52efcb6b05988bdbeace4c0d3f2d2fe5b46696d1d9898875c3803d6a", + strip_prefix = "protobuf-b04e5cba356212e4e8c66c61bbe0c3a20537c5b9", ) native.http_archive( name = "nsync", urls = [ "https://mirror.bazel.build/github.com/google/nsync/archive/93815892dddafe9146a5f7e7042281d59d0f4323.tar.gz", - # "https://github.com/google/nsync/archive/93815892dddafe9146a5f7e7042281d59d0f4323.tar.gz", + "https://github.com/google/nsync/archive/93815892dddafe9146a5f7e7042281d59d0f4323.tar.gz", ], sha256 = "e3bd4555415ace511338fc27e595351738eea4e9006f1612b76c82914770716b", strip_prefix = "nsync-93815892dddafe9146a5f7e7042281d59d0f4323", @@ -458,7 +458,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "com_google_googletest", urls = [ "https://mirror.bazel.build/github.com/google/googletest/archive/9816b96a6ddc0430671693df90192bbee57108b6.zip", - # "https://github.com/google/googletest/archive/9816b96a6ddc0430671693df90192bbee57108b6.zip", + "https://github.com/google/googletest/archive/9816b96a6ddc0430671693df90192bbee57108b6.zip", ], sha256 = "9cbca84c4256bed17df2c8f4d00c912c19d247c11c9ba6647cd6dd5b5c996b8d", strip_prefix = "googletest-9816b96a6ddc0430671693df90192bbee57108b6", @@ -468,7 +468,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "com_github_gflags_gflags", urls = [ "https://mirror.bazel.build/github.com/gflags/gflags/archive/f8a0efe03aa69b3336d8e228b37d4ccb17324b88.tar.gz", - # "https://github.com/gflags/gflags/archive/f8a0efe03aa69b3336d8e228b37d4ccb17324b88.tar.gz", + "https://github.com/gflags/gflags/archive/f8a0efe03aa69b3336d8e228b37d4ccb17324b88.tar.gz", ], sha256 = "4d222fab8f1ede4709cdff417d15a1336f862d7334a81abf76d09c15ecf9acd1", strip_prefix = "gflags-f8a0efe03aa69b3336d8e228b37d4ccb17324b88", @@ -536,11 +536,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): native.http_archive( name = "grpc", urls = [ - # "https://mirror.bazel.build/github.com/grpc/grpc/archive/54e8f37e537794c2d814c1604c1282125f64f093.tar.gz", + "https://mirror.bazel.build/github.com/grpc/grpc/archive/54e8f37e537794c2d814c1604c1282125f64f093.tar.gz", "https://github.com/grpc/grpc/archive/54e8f37e537794c2d814c1604c1282125f64f093.tar.gz", ], - sha256 = "c2166b6d96daddf72fe45b2c594210c65ca17ec3c1b2e12089159a9529edb5e4", - strip_prefix = "grpc-54e8f37e537794c2d814c1604c1282125f64f093", + sha256 = "c2166b6d96daddf72fe45b2c594210c65ca17ec3c1b2e12089159a9529edb5e4", + strip_prefix = "grpc-54e8f37e537794c2d814c1604c1282125f64f093", ) # gRPC wants the existence of a cares dependence but its contents are not @@ -567,7 +567,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): sha256 = "7f51f45887a3d31b4ce4fa5965210a5e64637ceac12720cfce7954d6a2e812f7", urls = [ "https://mirror.bazel.build/github.com/antirez/linenoise/archive/c894b9e59f02203dbe4e2be657572cf88c4230c3.tar.gz", - # "https://github.com/antirez/linenoise/archive/c894b9e59f02203dbe4e2be657572cf88c4230c3.tar.gz", + "https://github.com/antirez/linenoise/archive/c894b9e59f02203dbe4e2be657572cf88c4230c3.tar.gz", ], strip_prefix = "linenoise-c894b9e59f02203dbe4e2be657572cf88c4230c3", build_file = str(Label("//third_party:linenoise.BUILD")), @@ -591,7 +591,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "lmdb", urls = [ "https://mirror.bazel.build/github.com/LMDB/lmdb/archive/LMDB_0.9.19.tar.gz", - # "https://github.com/LMDB/lmdb/archive/LMDB_0.9.19.tar.gz", + "https://github.com/LMDB/lmdb/archive/LMDB_0.9.19.tar.gz", ], sha256 = "108532fb94c6f227558d45be3f3347b52539f0f58290a7bb31ec06c462d05326", strip_prefix = "lmdb-LMDB_0.9.19/libraries/liblmdb", @@ -602,7 +602,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "jsoncpp_git", urls = [ "https://mirror.bazel.build/github.com/open-source-parsers/jsoncpp/archive/11086dd6a7eba04289944367ca82cea71299ed70.tar.gz", - # "https://github.com/open-source-parsers/jsoncpp/archive/11086dd6a7eba04289944367ca82cea71299ed70.tar.gz", + "https://github.com/open-source-parsers/jsoncpp/archive/11086dd6a7eba04289944367ca82cea71299ed70.tar.gz", ], sha256 = "07d34db40593d257324ec5fb9debc4dc33f29f8fb44e33a2eeb35503e61d0fe2", strip_prefix = "jsoncpp-11086dd6a7eba04289944367ca82cea71299ed70", @@ -618,6 +618,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "boringssl", urls = [ "https://mirror.bazel.build/github.com/google/boringssl/archive/a0fb951d2a26a8ee746b52f3ba81ab011a0af778.tar.gz", + "https://github.com/google/boringssl/archive/a0fb951d2a26a8ee746b52f3ba81ab011a0af778.tar.gz", ], sha256 = "524ba98a56300149696481b4cb9ddebd0c7b7ac9b9f6edee81da2d2d7e5d2bb3", strip_prefix = "boringssl-a0fb951d2a26a8ee746b52f3ba81ab011a0af778", @@ -653,7 +654,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "snappy", urls = [ "https://mirror.bazel.build/github.com/google/snappy/archive/1.1.4.tar.gz", - # "https://github.com/google/snappy/archive/1.1.4.tar.gz", + "https://github.com/google/snappy/archive/1.1.4.tar.gz", ], sha256 = "2f7504c73d85bac842e893340333be8cb8561710642fc9562fccdd9d2c3fcc94", strip_prefix = "snappy-1.1.4", @@ -665,7 +666,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "nccl_archive", urls = [ "https://mirror.bazel.build/github.com/nvidia/nccl/archive/03d856977ecbaac87e598c0c4bafca96761b9ac7.tar.gz", - # "https://github.com/nvidia/nccl/archive/03d856977ecbaac87e598c0c4bafca96761b9ac7.tar.gz", + "https://github.com/nvidia/nccl/archive/03d856977ecbaac87e598c0c4bafca96761b9ac7.tar.gz", ], sha256 = "2ca86fb6179ecbff789cc67c836139c1bbc0324ed8c04643405a30bf26325176", strip_prefix = "nccl-03d856977ecbaac87e598c0c4bafca96761b9ac7", @@ -676,8 +677,8 @@ def tf_workspace(path_prefix="", tf_repo_name=""): temp_workaround_http_archive( name = "aws", urls = [ - "http://bazel-mirror.storage.googleapis.com/github.com/aws/aws-sdk-cpp/archive/1.0.90.tar.gz", - # "https://github.com/aws/aws-sdk-cpp/archive/1.0.90.tar.gz", + "https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.0.90.tar.gz", + "https://github.com/aws/aws-sdk-cpp/archive/1.0.90.tar.gz", ], sha256 = "f599b57aec4f03ad696044dd430b2d201864113937353adc346f53ad47991319", strip_prefix = "aws-sdk-cpp-1.0.90", @@ -714,7 +715,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "jemalloc", urls = [ "https://mirror.bazel.build/github.com/jemalloc/jemalloc/archive/4.4.0.tar.gz", - # "https://github.com/jemalloc/jemalloc/archive/4.4.0.tar.gz", + "https://github.com/jemalloc/jemalloc/archive/4.4.0.tar.gz", ], sha256 = "3c8f25c02e806c3ce0ab5fb7da1817f89fc9732709024e2a81b6b82f7cc792a8", strip_prefix = "jemalloc-4.4.0", @@ -761,7 +762,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "com_google_pprof", urls = [ "https://mirror.bazel.build/github.com/google/pprof/archive/c0fb62ec88c411cc91194465e54db2632845b650.tar.gz", - # "https://github.com/google/pprof/archive/c0fb62ec88c411cc91194465e54db2632845b650.tar.gz", + "https://github.com/google/pprof/archive/c0fb62ec88c411cc91194465e54db2632845b650.tar.gz", ], sha256 = "e0928ca4aa10ea1e0551e2d7ce4d1d7ea2d84b2abbdef082b0da84268791d0c4", strip_prefix = "pprof-c0fb62ec88c411cc91194465e54db2632845b650", @@ -772,7 +773,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "cub_archive", urls = [ "https://mirror.bazel.build/github.com/NVlabs/cub/archive/1.7.4.zip", - # "https://github.com/NVlabs/cub/archive/1.7.4.zip", + "https://github.com/NVlabs/cub/archive/1.7.4.zip", ], sha256 = "20a1a39fd97e5da7f40f5f2e7fd73fd2ea59f9dc4bb8a6c5f228aa543e727e31", strip_prefix = "cub-1.7.4", @@ -799,7 +800,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "bazel_toolchains", urls = [ "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/af4681c3d19f063f090222ec3d04108c4e0ca255.tar.gz", - # "https://github.com/bazelbuild/bazel-toolchains/archive/af4681c3d19f063f090222ec3d04108c4e0ca255.tar.gz", + "https://github.com/bazelbuild/bazel-toolchains/archive/af4681c3d19f063f090222ec3d04108c4e0ca255.tar.gz", ], sha256 = "d58bb2d6c8603f600d522b6104d6192a65339aa26cbba9f11ff5c4b36dedb928", strip_prefix = "bazel-toolchains-af4681c3d19f063f090222ec3d04108c4e0ca255", @@ -832,6 +833,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): build_file = str(Label("//third_party:tflite_mobilenet.BUILD")), sha256 = "23f814d1c076bdf03715dfb6cab3713aa4fbdf040fd5448c43196bd2e97a4c1b", urls = [ - "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip" + "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip", + "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip", ], ) -- GitLab From 77652ffc8a8433e79a3d675a0f13f574f62328c6 Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Thu, 16 Nov 2017 09:27:09 +0800 Subject: [PATCH 0479/1801] Fix a build error in windows debug build (#14404) Required by std::_Debug_range<_FwdIt> in std::lower_bound function. --- .../contrib/boosted_trees/lib/utils/sparse_column_iterable.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable.cc b/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable.cc index bc0a93db8c..0d46565a19 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable.cc +++ b/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable.cc @@ -96,6 +96,10 @@ class IndicesRowIterator return (row_idx_ != other.row_idx_); } + bool operator<(const IndicesRowIterator& other) const { + return (row_idx_ < other.row_idx_); + } + bool operator==(const IndicesRowIterator& other) const { QCHECK_EQ(iter_, other.iter_); return (row_idx_ == other.row_idx_); -- GitLab From 0adc3bf00c6b7073be7f15bdb9b556f02c87c4fb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Nov 2017 16:12:59 -0800 Subject: [PATCH 0480/1801] Added ability to specify estimation mode in constructor of KfacOptimizer. This functions analogously to the estimation_mode argument to FisherEstimator (and is just passed directly to the FisherEstimator that the optimizer constructs). PiperOrigin-RevId: 175897318 --- tensorflow/contrib/kfac/python/ops/optimizer.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/kfac/python/ops/optimizer.py b/tensorflow/contrib/kfac/python/ops/optimizer.py index bfa15e0948..88299e495c 100644 --- a/tensorflow/contrib/kfac/python/ops/optimizer.py +++ b/tensorflow/contrib/kfac/python/ops/optimizer.py @@ -44,7 +44,8 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): momentum=0., momentum_type="regular", norm_constraint=None, - name="KFAC",): + name="KFAC", + estimation_mode="gradients"): """Initializes the KFAC optimizer with the given settings. Args: @@ -72,6 +73,10 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): specified value. May only be used with momentum type 'regular'. (Default: None) name: The name for this optimizer. (Default: 'KFAC') + estimation_mode: The type of estimator to use for the Fishers. Can be + 'gradients', 'empirical', 'curvature_propagation', or 'exact'. + (Default: 'gradients'). See the doc-string for FisherEstimator for + more a more detailed description of these options. Raises: ValueError: If the momentum type is unsupported. @@ -86,7 +91,8 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): variables = tf_variables.trainable_variables() self._fisher_est = est.FisherEstimator(variables, cov_ema_decay, damping, - layer_collection) + layer_collection, + estimation_mode=estimation_mode) momentum_type = momentum_type.lower() legal_momentum_types = ["regular", "adam", "qmodel"] -- GitLab From c91d870d44a1766f60444addfe1f1c7bfb5ae5d3 Mon Sep 17 00:00:00 2001 From: Joe Castagneri Date: Wed, 15 Nov 2017 19:06:34 -0700 Subject: [PATCH 0481/1801] Fix strides format for data format in contrib.layers.separable convolution2d (#12273) * Strides must change form if data format is switched * Fixed stride mismatch on channels first data format for separable conv * Fixed sepconv2d test and added stride testing --- tensorflow/contrib/layers/python/layers/layers.py | 2 +- .../contrib/layers/python/layers/layers_test.py | 13 +++++++------ tensorflow/python/layers/convolutional.py | 1 + 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index dab5a5297c..160d9eb303 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -2558,7 +2558,7 @@ def separable_convolution2d( regularizer=weights_regularizer, trainable=trainable, collections=weights_collections) - strides = [1, stride_h, stride_w, 1] + strides = [1, 1, stride_h, stride_w] if data_format.startswith('NC') else [1, stride_h, stride_w, 1] outputs = nn.depthwise_conv2d(inputs, depthwise_weights, strides, padding, rate=utils.two_element_tuple(rate), diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index 7ccd9d8868..f2406205f3 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -3322,16 +3322,17 @@ class SeparableConv2dTest(test.TestCase): for model_variable in model_variables: self.assertEqual(trainable, model_variable in trainable_variables) - def testConvNCHW(self): - for num_filters, correct_output_filters in [(None, 6), (8, 8)]: + def testSepConvNCHW(self): + for num_filters, correct_output_filters in zip((None, 5), (6, 5)): with self.test_session(): - batch, height, width = 4, 5, 6 + batch, height, width = 4, 10, 12 + kernel_dim, stride = 3, 2 images = random_ops.random_uniform((batch, 3, height, width), seed=1) - output = layers_lib.separable_conv2d( - images, num_filters, [3, 3], 2, padding='VALID', data_format='NCHW') + output = layers_lib.separable_conv2d(images, num_outputs=num_filters, kernel_size=[kernel_dim, kernel_dim], + depth_multiplier=2, stride=stride, padding='VALID', data_format='NCHW') self.assertListEqual( output.get_shape().as_list(), [batch, correct_output_filters, - height - 2, width - 2]) + (height - kernel_dim + 1) // stride, (width - kernel_dim + 1) // stride]) class ScaleGradientTests(test.TestCase): diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py index 8c327d7e27..ad2988fb89 100644 --- a/tensorflow/python/layers/convolutional.py +++ b/tensorflow/python/layers/convolutional.py @@ -920,6 +920,7 @@ class SeparableConv2D(Conv2D): trainable=trainable, name=name, **kwargs) + self.data_format = data_format self.depth_multiplier = depth_multiplier self.depthwise_initializer = depthwise_initializer self.pointwise_initializer = pointwise_initializer -- GitLab From 4f30c8a5c6eddea0d89ea1bc808900c830bd95b5 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Wed, 15 Nov 2017 16:36:34 -0800 Subject: [PATCH 0482/1801] Improve docstring on contrib summary module PiperOrigin-RevId: 175900586 --- tensorflow/contrib/summary/summary.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/summary/summary.py b/tensorflow/contrib/summary/summary.py index a73193f460..f783179f61 100644 --- a/tensorflow/contrib/summary/summary.py +++ b/tensorflow/contrib/summary/summary.py @@ -12,12 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== +"""TensorFlow Summary API v2. -"""Contrib summary package. - -The operations in this package are safe to use with eager execution turned or on -off. - +The operations in this package are safe to use with eager execution turned on or +off. It has a more flexible API that allows summaries to be written directly +from ops to places other than event log files, rather than propagating protos +from @{tf.summary.merge_all} to @{tf.summary.FileWriter}. """ from __future__ import absolute_import -- GitLab From 6d313682bb41a33eb53a47b3b8de9618f1278194 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Nov 2017 17:08:22 -0800 Subject: [PATCH 0483/1801] Add missing Unref of TensorReference in async CheckNumericsOp. PiperOrigin-RevId: 175904805 --- tensorflow/core/kernels/check_numerics_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/check_numerics_op.cc b/tensorflow/core/kernels/check_numerics_op.cc index 56cb50d2d1..534527c6bd 100644 --- a/tensorflow/core/kernels/check_numerics_op.cc +++ b/tensorflow/core/kernels/check_numerics_op.cc @@ -168,10 +168,10 @@ class CheckNumericsOp : public AsyncOpKernel { abnormal_detected_host, context, done]() { ::perftools::gputools::cuda::ScopedActivateExecutorContext scoped_activation{stream->parent()}; - auto abnormal_detected_host_flat = abnormal_detected_host.flat(); int is_nan = abnormal_detected_host_flat(0); int is_inf = abnormal_detected_host_flat(1); + abnormal_detected_ref.Unref(); if (is_nan || is_inf) { string status; LOG(ERROR) << "abnormal_detected_host @" -- GitLab From 94f76e76f277565504b6f80f245152c7ff5f10e1 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 15 Nov 2017 17:13:09 -0800 Subject: [PATCH 0484/1801] Expose an Orc JIT memory mapper registry. XLA clients can use this registry to inject client-specific behavior into how Orc JIT's manages virtual memory. PiperOrigin-RevId: 175905401 --- tensorflow/compiler/xla/service/cpu/BUILD | 23 ++++++-- .../xla/service/cpu/orc_jit_memory_mapper.cc | 40 +++++++++++++ .../xla/service/cpu/orc_jit_memory_mapper.h | 56 +++++++++++++++++++ .../xla/service/cpu/simple_orc_jit.cc | 7 ++- tensorflow/compiler/xla/xla.bzl | 2 + tensorflow/workspace.bzl | 8 +-- 6 files changed, 125 insertions(+), 11 deletions(-) create mode 100644 tensorflow/compiler/xla/service/cpu/orc_jit_memory_mapper.cc create mode 100644 tensorflow/compiler/xla/service/cpu/orc_jit_memory_mapper.h diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 89e8d07200..78216f2ffb 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -17,6 +17,7 @@ package_group( load(":build_defs.bzl", "runtime_copts") load("//tensorflow:tensorflow.bzl", "tf_cc_test") load("//tensorflow:tensorflow.bzl", "tf_cc_binary") +load("//tensorflow/compiler/xla:xla.bzl", "ORC_JIT_MEMORY_MAPPER_TARGETS") # Filegroup used to collect source files for dependency checking. filegroup( @@ -157,21 +158,23 @@ cc_library( ":custom_call_target_registry", ":disassembler", ":external_constant_pool", + ":orc_jit_memory_mapper", ":runtime_conv2d", ":runtime_fork_join", ":runtime_matmul", ":runtime_single_threaded_conv2d", ":runtime_single_threaded_matmul", - "//tensorflow/compiler/xla:types", - "//tensorflow/compiler/xla:util", - "//tensorflow/core:lib", - "@llvm//:core", "@llvm//:execution_engine", + "@llvm//:core", "@llvm//:mc", # fixdeps: keep "@llvm//:orc_jit", "@llvm//:support", "@llvm//:target", # fixdeps: keep - ], + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:util", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + ] + ORC_JIT_MEMORY_MAPPER_TARGETS, ) cc_library( @@ -737,6 +740,16 @@ cc_library( visibility = ["//visibility:public"], ) +cc_library( + name = "orc_jit_memory_mapper", + srcs = ["orc_jit_memory_mapper.cc"], + hdrs = ["orc_jit_memory_mapper.h"], + deps = [ + "//tensorflow/core:lib", + "@llvm//:execution_engine", + ], +) + # ----------------------------------------------------------------------------- filegroup( diff --git a/tensorflow/compiler/xla/service/cpu/orc_jit_memory_mapper.cc b/tensorflow/compiler/xla/service/cpu/orc_jit_memory_mapper.cc new file mode 100644 index 0000000000..e624e5cc7e --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/orc_jit_memory_mapper.cc @@ -0,0 +1,40 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/cpu/orc_jit_memory_mapper.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/mutex.h" + +namespace xla { +namespace cpu { +namespace orc_jit_memory_mapper { + +static tensorflow::mutex mapper_instance_mutex(tensorflow::LINKER_INITIALIZED); +static llvm::SectionMemoryManager::MemoryMapper* mapper_instance + GUARDED_BY(mapper_instance_mutex) = nullptr; + +llvm::SectionMemoryManager::MemoryMapper* GetInstance() { + tensorflow::mutex_lock lock(mapper_instance_mutex); + return mapper_instance; +} + +Registrar::Registrar( + std::unique_ptr mapper) { + tensorflow::mutex_lock lock(mapper_instance_mutex); + mapper_instance = mapper.release(); +} +} // namespace orc_jit_memory_mapper +} // namespace cpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/orc_jit_memory_mapper.h b/tensorflow/compiler/xla/service/cpu/orc_jit_memory_mapper.h new file mode 100644 index 0000000000..2d29550fd5 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/orc_jit_memory_mapper.h @@ -0,0 +1,56 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_ORC_JIT_MEMORY_MAPPER_H_ +#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_ORC_JIT_MEMORY_MAPPER_H_ + +#include + +#include "llvm/ExecutionEngine/SectionMemoryManager.h" + +namespace xla { +namespace cpu { + +namespace orc_jit_memory_mapper { +// Returns the registered memory mapper if there is one. Returns nullptr if no +// memory mapper is registered. +llvm::SectionMemoryManager::MemoryMapper* GetInstance(); + +class Registrar { + public: + // Registers the `mapper` as a memory mapper. This is a no-op if `mapper` is + // null. Precondition: no other memory mapper has been registered yet. + explicit Registrar( + std::unique_ptr mapper); +}; +} // namespace orc_jit_memory_mapper + +#define XLA_INTERNAL_REGISTER_ORC_JIT_MEMORY_MAPPER(mapper_instance, ctr) \ + static ::xla::cpu::orc_jit_memory_mapper::Registrar \ + XLA_INTERNAL_REGISTER_ORC_JIT_MEMORY_MAPPER_NAME(ctr)(mapper_instance) + +// __COUNTER__ must go through another macro to be properly expanded +#define XLA_INTERNAL_REGISTER_ORC_JIT_MEMORY_MAPPER_NAME(ctr) \ + __orc_jit_memory_mapper_registrar_##ctr + +// Registers the std::unique_ptr +// returned by the `factory` expression. `factory` is allowed to evaluate to +// a null unique_ptr in which case this macro does nothing. +#define XLA_REGISTER_ORC_JIT_MEMORY_MAPPER(factory) \ + XLA_INTERNAL_REGISTER_ORC_JIT_MEMORY_MAPPER(factory, __COUNTER__) +} // namespace cpu +} // namespace xla + +#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_ORC_JIT_MEMORY_MAPPER_H_ diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc index fdf02e5b42..db6c201876 100644 --- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc +++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc @@ -32,6 +32,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h" #include "tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h" #include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h" +#include "tensorflow/compiler/xla/service/cpu/orc_jit_memory_mapper.h" #include "tensorflow/compiler/xla/service/cpu/runtime_conv2d.h" #include "tensorflow/compiler/xla/service/cpu/runtime_fork_join.h" #include "tensorflow/compiler/xla/service/cpu/runtime_matmul.h" @@ -125,8 +126,10 @@ SimpleOrcJIT::SimpleOrcJIT(const llvm::TargetOptions& target_options, /*MAttrs=*/DetectMachineAttributes()))), disassembler_(*target_machine_), data_layout_(target_machine_->createDataLayout()), - object_layer_( - [] { return std::make_shared(); }), + object_layer_([] { + return std::make_shared( + orc_jit_memory_mapper::GetInstance()); + }), compile_layer_( object_layer_, CompilerFunctor(target_machine_.get(), &disassembler_, opt_level, diff --git a/tensorflow/compiler/xla/xla.bzl b/tensorflow/compiler/xla/xla.bzl index 3fa5bcc1df..6b136d333b 100644 --- a/tensorflow/compiler/xla/xla.bzl +++ b/tensorflow/compiler/xla/xla.bzl @@ -17,3 +17,5 @@ def xla_proto_library(name, srcs=[], deps=[], visibility=None, testonly=0): protoc="@protobuf_archive//:protoc", testonly=testonly, visibility=visibility,) + +ORC_JIT_MEMORY_MAPPER_TARGETS = [] diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index cd4ea8a7d0..8e62228c1b 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -578,11 +578,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): temp_workaround_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/618cf290880ae9cd87b4bbf6c9b1759476f422eb.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/618cf290880ae9cd87b4bbf6c9b1759476f422eb.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/823bedeb8e23a095173389fa05680597eba3f569.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/823bedeb8e23a095173389fa05680597eba3f569.tar.gz", ], - sha256 = "ec2e032e58372c614c41b539c0309baa91843c30d7a9c6dee647dcd24be02e3c", - strip_prefix = "llvm-618cf290880ae9cd87b4bbf6c9b1759476f422eb", + sha256 = "93464bc760fd0319ebd0a5831fe477fdc4954f3612a29cc64d7405eaee8e00b2", + strip_prefix = "llvm-823bedeb8e23a095173389fa05680597eba3f569", build_file = str(Label("//third_party/llvm:llvm.BUILD")), repository = tf_repo_name, ) -- GitLab From 05fe77d24a22f7f43362f94ebe1949e58f014e00 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Nov 2017 17:40:19 -0800 Subject: [PATCH 0485/1801] Fix MarkDown formatting of code block. PiperOrigin-RevId: 175908614 --- tensorflow/python/eager/backprop.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index a2a7f1c022..25f7ae785e 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -305,6 +305,7 @@ def implicit_val_and_grad(f): is not known ahead of time. Example: + ```python dense_layer = tf.layers.Dense(1) def loss(x, y): @@ -378,6 +379,7 @@ def implicit_grad(f): is not known ahead of time. Example: + ```python dense_layer = tf.layers.Dense(1) def loss(x, y): -- GitLab From 1542d977f410eddf2896553fbbd5f697605d57c9 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Wed, 15 Nov 2017 17:44:18 -0800 Subject: [PATCH 0486/1801] Rename layers.base.Network -> layers.network.GraphNetwork Splits GraphNetwork out into a new file, moves some shared utility functions to layers.utils. Should have no functional changes. PiperOrigin-RevId: 175909000 --- tensorflow/python/BUILD | 36 +- .../keras/_impl/keras/engine/topology.py | 30 +- .../keras/_impl/keras/integration_test.py | 4 +- .../keras/_impl/keras/layers/wrappers.py | 6 +- tensorflow/python/layers/base.py | 951 +---------------- tensorflow/python/layers/base_test.py | 490 --------- tensorflow/python/layers/layers.py | 2 +- tensorflow/python/layers/network.py | 957 ++++++++++++++++++ tensorflow/python/layers/network_test.py | 525 ++++++++++ tensorflow/python/layers/utils.py | 17 + .../api/golden/tensorflow.keras.-model.pbtxt | 2 +- .../golden/tensorflow.keras.-sequential.pbtxt | 2 +- ...tensorflow.keras.layers.-input-layer.pbtxt | 2 +- .../tensorflow.keras.models.-model.pbtxt | 2 +- .../tensorflow.keras.models.-sequential.pbtxt | 2 +- 15 files changed, 1566 insertions(+), 1462 deletions(-) create mode 100644 tensorflow/python/layers/network.py create mode 100644 tensorflow/python/layers/network_test.py diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index bc034e1902..970f3ecaff 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3850,15 +3850,15 @@ py_library( deps = [ ":array_ops", ":control_flow_ops", - ":framework", ":framework_for_generated_wrappers", - ":init_ops", + ":platform", + ":tensor_util", ":util", ":variable_scope", ":variables", + "//tensorflow/python/eager:context", "//tensorflow/python/estimator:util", "//third_party/py/numpy", - "@six_archive//:six", ], ) @@ -3869,12 +3869,14 @@ py_library( "layers/core.py", "layers/layers.py", "layers/maxout.py", + "layers/network.py", "layers/normalization.py", "layers/pooling.py", ], srcs_version = "PY2AND3", deps = [ ":array_ops", + ":array_ops_gen", ":control_flow_ops", ":framework", ":framework_for_generated_wrappers", @@ -3882,12 +3884,18 @@ py_library( ":layers_base", ":math_ops", ":nn", + ":nn_ops", + ":platform", + ":resource_variable_ops", + ":resource_variable_ops_gen", ":standard_ops", + ":state_ops", ":training", ":util", ":variable_scope", ":variables", "//tensorflow/python/eager:context", + "//tensorflow/python/estimator:util", "//third_party/py/numpy", "@six_archive//:six", ], @@ -3900,14 +3908,36 @@ py_test( main = "layers/base_test.py", srcs_version = "PY2AND3", deps = [ + ":array_ops", ":client_testlib", ":framework_for_generated_wrappers", ":framework_test_lib", ":init_ops", ":layers", + ":layers_base", ":math_ops", ":random_ops", ":variable_scope", + "//tensorflow/python/eager:context", + ], +) + +py_test( + name = "layers_network_test", + size = "small", + srcs = ["layers/network_test.py"], + main = "layers/network_test.py", + srcs_version = "PY2AND3", + deps = [ + ":array_ops", + ":client_testlib", + ":framework_for_generated_wrappers", + ":framework_test_lib", + ":layers", + ":layers_base", + ":sparse_ops", + "//tensorflow/python/eager:context", + "//third_party/py/numpy", ], ) diff --git a/tensorflow/python/keras/_impl/keras/engine/topology.py b/tensorflow/python/keras/_impl/keras/engine/topology.py index 814961bd1d..4a7bb2e838 100644 --- a/tensorflow/python/keras/_impl/keras/engine/topology.py +++ b/tensorflow/python/keras/_impl/keras/engine/topology.py @@ -36,6 +36,8 @@ from tensorflow.python.keras._impl.keras.utils import conv_utils from tensorflow.python.keras._impl.keras.utils.io_utils import ask_to_proceed_with_overwrite from tensorflow.python.keras._impl.keras.utils.layer_utils import print_summary as print_layer_summary from tensorflow.python.layers import base as tf_base_layers +from tensorflow.python.layers import network as tf_network +from tensorflow.python.layers import utils as tf_layers_util from tensorflow.python.platform import tf_logging as logging @@ -485,7 +487,7 @@ class Layer(tf_base_layers.Layer): self._activity_regularizer = activity_regularizer -class InputLayer(tf_base_layers.InputLayer, Layer): +class InputLayer(tf_network.InputLayer, Layer): """Layer to be used as an entry point into a graph. It can either wrap an existing tensor (pass an `input_tensor` argument) @@ -636,7 +638,7 @@ def Input( # pylint: disable=invalid-name return outputs -class Network(tf_base_layers.Network, Layer): +class Network(tf_network.GraphNetwork, Layer): """A Network is a directed acyclic graph of layers. It is the topological form of a "model". A Model @@ -681,8 +683,8 @@ class Network(tf_base_layers.Network, Layer): for x in self.inputs: mask = x._keras_mask if hasattr(x, '_keras_mask') else None masks.append(mask) - mask_cache_key = (tf_base_layers._object_list_uid(self.inputs) + '_' + - tf_base_layers._object_list_uid(masks)) + mask_cache_key = (tf_layers_util.object_list_uid(self.inputs) + '_' + + tf_layers_util.object_list_uid(masks)) masks = [] for x in self.outputs: mask = x._keras_mask if hasattr(x, '_keras_mask') else None @@ -798,8 +800,8 @@ class Network(tf_base_layers.Network, Layer): else: kept_nodes = 0 for original_node_index, node in enumerate(layer._inbound_nodes): - node_key = tf_base_layers._make_node_key(layer.name, - original_node_index) + node_key = tf_network._make_node_key(layer.name, + original_node_index) if node_key in self._network_nodes: node_conversion_map[node_key] = kept_nodes kept_nodes += 1 @@ -809,8 +811,8 @@ class Network(tf_base_layers.Network, Layer): layer_config = layer.get_config() filtered_inbound_nodes = [] for original_node_index, node in enumerate(layer._inbound_nodes): - node_key = tf_base_layers._make_node_key(layer.name, - original_node_index) + node_key = tf_network._make_node_key(layer.name, + original_node_index) if node_key in self._network_nodes: # The node is relevant to the model: # add to filtered_inbound_nodes. @@ -834,8 +836,8 @@ class Network(tf_base_layers.Network, Layer): inbound_layer = node.inbound_layers[i] node_index = node.node_indices[i] tensor_index = node.tensor_indices[i] - node_key = tf_base_layers._make_node_key(inbound_layer.name, - node_index) + node_key = tf_network._make_node_key(inbound_layer.name, + node_index) new_node_index = node_conversion_map.get(node_key, 0) node_data.append( [inbound_layer.name, new_node_index, tensor_index, kwargs]) @@ -852,8 +854,8 @@ class Network(tf_base_layers.Network, Layer): model_inputs = [] for i in range(len(self._input_layers)): layer, node_index, tensor_index = self._input_coordinates[i] - node_key = tf_base_layers._make_node_key(layer.name, - node_index) + node_key = tf_network._make_node_key(layer.name, + node_index) if node_key not in self._network_nodes: continue new_node_index = node_conversion_map[node_key] @@ -862,8 +864,8 @@ class Network(tf_base_layers.Network, Layer): model_outputs = [] for i in range(len(self._output_layers)): layer, node_index, tensor_index = self._output_coordinates[i] - node_key = tf_base_layers._make_node_key(layer.name, - node_index) + node_key = tf_network._make_node_key(layer.name, + node_index) if node_key not in self._network_nodes: continue new_node_index = node_conversion_map[node_key] diff --git a/tensorflow/python/keras/_impl/keras/integration_test.py b/tensorflow/python/keras/_impl/keras/integration_test.py index 871a8c7329..15c3d14727 100644 --- a/tensorflow/python/keras/_impl/keras/integration_test.py +++ b/tensorflow/python/keras/_impl/keras/integration_test.py @@ -22,8 +22,8 @@ import numpy as np from tensorflow.python.keras._impl import keras from tensorflow.python.keras._impl.keras import testing_utils -from tensorflow.python.layers import base as tf_base_layers from tensorflow.python.layers import core as tf_core_layers +from tensorflow.python.layers import network as tf_network_layers from tensorflow.python.ops import nn from tensorflow.python.platform import test @@ -275,7 +275,7 @@ class KerasIntegrationTest(test.TestCase): y_train = keras.utils.to_categorical(y_train) y_test = keras.utils.to_categorical(y_test) - inputs = tf_base_layers.Input(shape=(10,)) + inputs = tf_network_layers.Input(shape=(10,)) x = tf_core_layers.Dense(32, activation=nn.relu)(inputs) outputs = tf_core_layers.Dense(2, activation=nn.softmax)(x) model = keras.models.Model(inputs, outputs) diff --git a/tensorflow/python/keras/_impl/keras/layers/wrappers.py b/tensorflow/python/keras/_impl/keras/layers/wrappers.py index 0e82005caa..aefa5a1c02 100644 --- a/tensorflow/python/keras/_impl/keras/layers/wrappers.py +++ b/tensorflow/python/keras/_impl/keras/layers/wrappers.py @@ -26,7 +26,7 @@ from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer from tensorflow.python.keras._impl.keras.utils.generic_utils import has_arg -from tensorflow.python.layers import base as tf_base_layers +from tensorflow.python.layers import utils as tf_layers_util class Wrapper(Layer): @@ -77,7 +77,7 @@ class Wrapper(Layer): # get the updates from the inner layer. inner_inputs = inputs if inputs is not None: - uid = tf_base_layers._object_list_uid(inputs) + uid = tf_layers_util.object_list_uid(inputs) if uid in self._input_map: inner_inputs = self._input_map[uid] @@ -223,7 +223,7 @@ class TimeDistributed(Wrapper): input_length = K.shape(inputs)[1] # Shape: (num_samples * timesteps, ...). And track the # transformation in self._input_map. - input_uid = tf_base_layers._object_list_uid(inputs) + input_uid = tf_layers_util.object_list_uid(inputs) inputs = K.reshape(inputs, (-1,) + input_shape[2:]) self._input_map[input_uid] = inputs # (num_samples * timesteps, ...) diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 55da959a49..9677db2bce 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -30,6 +30,7 @@ from tensorflow.python.estimator import util as estimator_util from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape +from tensorflow.python.layers import utils as layers_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import variable_scope as vs from tensorflow.python.ops import variables as tf_variables @@ -250,7 +251,7 @@ class Layer(object): if inputs is not None: # We compute an ID that uniquely identifies the list of tensors. # This ID is order-sensitive. - inputs_hash = _object_list_uid(inputs) + inputs_hash = layers_util.object_list_uid(inputs) else: inputs_hash = None if inputs_hash not in self._per_input_updates: @@ -279,7 +280,7 @@ class Layer(object): if not inputs: inputs = None if inputs is not None: - inputs_hash = _object_list_uid(inputs) + inputs_hash = layers_util.object_list_uid(inputs) else: inputs_hash = None return self._per_input_updates.get(inputs_hash, []) @@ -326,7 +327,7 @@ class Layer(object): if inputs is not None: # We compute an ID that uniquely identifies the list of tensors. # This ID is order-sensitive. - inputs_hash = _object_list_uid(inputs) + inputs_hash = layers_util.object_list_uid(inputs) else: inputs_hash = None if inputs_hash not in self._per_input_losses: @@ -357,7 +358,7 @@ class Layer(object): if not inputs: inputs = None if inputs is not None: - inputs_hash = _object_list_uid(inputs) + inputs_hash = layers_util.object_list_uid(inputs) else: inputs_hash = None return self._per_input_losses.get(inputs_hash, []) @@ -1267,9 +1268,9 @@ class Node(object): # Following 2 properties: input and output shapes. # List of shape tuples, shapes of input_tensors. - self.input_shapes = [_static_shape(x) for x in input_tensors] + self.input_shapes = [layers_util.static_shape(x) for x in input_tensors] # List of shape tuples, shapes of output_tensors. - self.output_shapes = [_static_shape(x) for x in output_tensors] + self.output_shapes = [layers_util.static_shape(x) for x in output_tensors] # Optional keyword arguments to layer's `call`. self.arguments = arguments @@ -1327,926 +1328,6 @@ class _DeferredTensor(object): self.dtype.name) -class InputLayer(Layer): - """Layer to be used as an entry point into a Network (a graph of layers). - - It can either wrap an existing tensor (pass an `input_tensor` argument) - or create its a placeholder tensor (pass arguments `input_shape` - as well as `dtype`). - - It is generally recommend to use the functional layer API via `Input`, - (which creates an `InputLayer`) without directly using `InputLayer`. - - Arguments: - input_shape: Shape tuple (not including the batch axis), or `TensorShape` - instance (not including the batch axis). - batch_size: Optional input batch size (integer or None). - dtype: Datatype of the input. - input_tensor: Optional tensor to use as layer input - instead of creating a placeholder. - sparse: Boolean, whether the placeholder created - is meant to be sparse. - name: Name of the layer (string). - - Raises: - RuntimeError: If created in Eager mode. - """ - - def __init__(self, - input_shape=None, - batch_size=None, - dtype=dtypes.float32, - input_tensor=None, - sparse=False, - name=None): - super(InputLayer, self).__init__(dtype=dtype, name=name) - self.built = True - self.sparse = sparse - self.batch_size = batch_size - - if isinstance(input_shape, tensor_shape.TensorShape): - input_shape = tuple(input_shape.as_list()) - - if input_tensor is None: - if input_shape is not None: - batch_input_shape = (batch_size,) + tuple(input_shape) - else: - batch_input_shape = None - - if context.in_eager_mode(): - # In eager mode, create a temporary placeholder to call the layer on. - input_tensor = _DeferredTensor( - shape=batch_input_shape, - dtype=dtype, - name=self.name) - else: - # In graph mode, create a graph placeholder to call the layer on. - if sparse: - input_tensor = array_ops.sparse_placeholder( - shape=batch_input_shape, - dtype=dtype, - name=self.name) - else: - input_tensor = array_ops.placeholder( - shape=batch_input_shape, - dtype=dtype, - name=self.name) - - # For compatibility with Keras API. - self.is_placeholder = True - self._batch_input_shape = batch_input_shape - else: - # For compatibility with Keras API. - self.is_placeholder = False - self._batch_input_shape = tuple(input_tensor.get_shape().as_list()) - - # Create an input node to add to self.outbound_node - # and set output_tensors' _keras_history. - input_tensor._keras_history = (self, 0, 0) # pylint: disable=protected-access - Node( - self, - inbound_layers=[], - node_indices=[], - tensor_indices=[], - input_tensors=[input_tensor], - output_tensors=[input_tensor]) - - -def Input( # pylint: disable=invalid-name - shape=None, - batch_size=None, - name=None, - dtype=dtypes.float32, - sparse=False, - tensor=None): - """`Input()` is used to instantiate an input tensor for use with a `Network`. - - For instance, if a, b and c are tensors created via `Input`, - it becomes possible to do: - - `network = Network(inputs=[a, b], outputs=c)` - - Example: - - ```python - # This is a logistic regression - x = tf.layers.Input(shape=(32,)) - y = tf.layers.Dense(16, activation='softmax')(x) - network = tf.layers.Network(x, y) - ``` - - Arguments: - shape: A shape tuple (integer), not including the batch size. - For instance, `shape=(32,)` indicates that the expected input - will be batches of 32-dimensional vectors. - batch_size: Optional input batch size (integer or None). - name: An optional name string for the layer. - Should be unique in a model (do not reuse the same name twice). - It will be autogenerated if it isn't provided. - dtype: The data type expected by the input, as a string - (`float32`, `float64`, `int32`...) - sparse: A boolean specifying whether the placeholder - to be created is sparse. - tensor: Optional existing tensor to wrap into the `Input` layer. - If set, the layer will not create a placeholder tensor. - - Returns: - A tensor: either a new placeholder (with history metadata) or - `tensor` (if passed), with added history metadata. - - Raises: - RuntimeError: If called in Eager mode. - """ - input_layer = InputLayer( - input_shape=shape, - batch_size=batch_size, - name=name, - dtype=dtype, - sparse=sparse, - input_tensor=tensor) - # Return tensor including `_keras_history` metadata. - # Note that in this case train_output and test_output are the same pointer. - outputs = input_layer._inbound_nodes[0].output_tensors # pylint: disable=protected-access - if len(outputs) == 1: - return outputs[0] - else: - return outputs - - -class Network(Layer): - """A Network is a directed acyclic graph of layers. - - It is the topological form of a "model". - A Model is simply a Network with added training/evaluation routines. - - A Network instance implements the full Layer API. In particular, a network - can be called on new inputs. - - Example: - - ```python - # This is a logistic regression - x = tf.layers.Input(shape=(32,)) - y = tf.layers.Dense(16, activation='softmax')(x) - network = tf.layers.Network(x, y) - - # It is then possible to call the network on compatible inputs: - z = tf.layers.Input(shape=(32,)) - w = network(z) - - # It is possible to retrieve the same properties as a layer: - weights = network.trainable_weights - ``` - - Arguments: - inputs: Input tensor or list of input tensors. - Must come from `tf.layers.Input`. - output: Output tensor or list of output tensors. Must come from - tf.layers Layers or Keras layers. - name: Optional name of the model (string). - - Attributes: - Network has the same attributes as Layer. On top of it, it also has: - - layers: a list of the children layers of the network, - a list of layer instances, ordered from "earlier in the graph" - to "later in the graph". - - Methods: - Network has the same methods as Layer. On top of it, it also has: - - get_layer: retrieves a child layer by name or index in the graph. - - Raises: - RuntimeError: If created in Eager mode. - """ - - def __init__(self, inputs, outputs, name=None): # pylint: disable=super-init-not-called - if context.in_eager_mode(): - # TODO(fchollet): check that all inputs and outputs are DeferredTensors. - pass - - self._init_set_name(name) - self._activity_regularizer = None - with vs.variable_scope( - None, default_name=self._base_name) as captured_scope: - self._scope = captured_scope - call_fn_args = estimator_util.fn_args(self.call) - self._compute_previous_mask = ('mask' in call_fn_args or - hasattr(self, 'compute_mask')) - self._call_has_scope_arg = 'scope' in call_fn_args - - # This acts just like the `trainable` attribute of any layer instance. - # It does not affect users of the underlying layers, only users of the - # Network instance. - self.trainable = True - # A Network does not create weights of its own, thus it is already built. - self.built = True - # A Network does not create weights of its own, thus has no dtype. - self._dtype = None - # The following are implemented as property functions: - # self.trainable_weights - # self.non_trainable_weights - # self.input_spec - - # Private attributes to implement compatibility with Layer. - self._per_input_losses = {} - self._per_input_updates = {} - self._updates = [] - self._losses = [] - self._scope = None - self._reuse = None - self._graph = ops.get_default_graph() - - # Network-specific properties. - if isinstance(inputs, (list, tuple)): - self.inputs = list(inputs) # Tensor or list of tensors. - else: - self.inputs = [inputs] - if isinstance(outputs, (list, tuple)): - self.outputs = list(outputs) - else: - self.outputs = [outputs] - # All layers in order of horizontal graph traversal. - # Entries are unique. Includes input and output layers. - self.layers = [] - - # Check for redundancy in inputs. - if len(set(self.inputs)) != len(self.inputs): - raise ValueError('The list of inputs passed to the model ' - 'is redundant. ' - 'All inputs should only appear once.' - ' Found: ' + str(self.inputs)) - - # # List of initial layers (1 to 1 mapping with self.inputs, - # # hence the same layer might appear twice) - # self._input_layers = [] - # self._input_layers_node_indices = [] - # self._input_layers_tensor_indices = [] - # # list of layers (1 to 1 mapping with self.inputs, - # # hence the same layer might appear twice) - # self._output_layers = [] - # self._output_layers_node_indices = [] - # self._output_layers_tensor_indices = [] - - self._input_layers = [] - self._output_layers = [] - self._input_coordinates = [] - self._output_coordinates = [] - - # This is for performance optimization - # when calling the Network on new inputs. - # every time the Network is called on a set on input tensors, - # we compute the output tensors, - # output masks and output shapes in one pass, - # then cache them here. When any of these outputs is queried later, - # we retrieve it from there instead of recomputing it. - self._output_mask_cache = {} - self._output_tensor_cache = {} - self._output_shape_cache = {} - - # User-provided arguments validation. - for x in self.inputs: - # Check that x has appropriate `_keras_history` metadata. - if not hasattr(x, '_keras_history'): - cls_name = self.__class__.__name__ - raise ValueError('Input tensors to a ' + cls_name + ' ' + - 'must come from `tf.layers.Input`. ' - 'Received: ' + str(x) + - ' (missing previous layer metadata).') - # Check that x is an input tensor. - # pylint: disable=protected-access - layer, node_index, tensor_index = x._keras_history - if len(layer._inbound_nodes) > 1 or ( - layer._inbound_nodes and layer._inbound_nodes[0].inbound_layers): - cls_name = self.__class__.__name__ - logging.warning(cls_name + ' inputs must come from ' - '`tf.layers.Input` (thus holding past layer metadata), ' - 'they cannot be the output of ' - 'a previous non-Input layer. ' - 'Here, a tensor specified as ' - 'input to "' + self.name + '" was not an Input tensor, ' - 'it was generated by layer ' + layer.name + '.\n' - 'Note that input tensors are ' - 'instantiated via `tensor = tf.layers.Input(shape)`.\n' - 'The tensor that caused the issue was: ' + str(x.name)) - # pylint: enable=protected-access - for x in self.outputs: - if not hasattr(x, '_keras_history'): - cls_name = self.__class__.__name__ - raise ValueError('Output tensors to a ' + cls_name + ' must be ' - 'the output of a TensorFlow `Layer` ' - '(thus holding past layer metadata). Found: ' + str(x)) - - # Build self._output_layers: - for x in self.outputs: - layer, node_index, tensor_index = x._keras_history # pylint: disable=protected-access - self._output_layers.append(layer) - self._output_coordinates.append((layer, node_index, tensor_index)) - - # Build self._input_layers: - for x in self.inputs: - layer, node_index, tensor_index = x._keras_history # pylint: disable=protected-access - # It's supposed to be an input layer, so only one node - # and one tensor output. - assert node_index == 0 - assert tensor_index == 0 - self._input_layers.append(layer) - self._input_coordinates.append((layer, node_index, tensor_index)) - - # Network_nodes: set of nodes included in the graph - # (not all nodes included in the layers - # are relevant to the current graph). - network_nodes = set() # ids of all nodes relevant to the Network - nodes_depths = {} # dict {node: depth value} - layers_depths = {} # dict {layer: depth value} - layer_indices = {} # dict {layer: index in traversal} - nodes_in_decreasing_depth = [] - - def build_map_of_graph(tensor, - finished_nodes, - nodes_in_progress, - layer, - node_index, - tensor_index): - """Builds a map of the graph of layers. - - This recursively updates the map `layer_indices`, - the list `nodes_in_decreasing_depth` and the set `network_nodes`. - - Arguments: - tensor: Some tensor in a graph. - finished_nodes: Set of nodes whose subgraphs have been traversed - completely. Useful to prevent duplicated work. - nodes_in_progress: Set of nodes that are currently active on the - recursion stack. Useful to detect cycles. - layer: Layer from which `tensor` comes from. If not provided, - will be obtained from `tensor._keras_history`. - node_index: Node index from which `tensor` comes from. - tensor_index: Tensor_index from which `tensor` comes from. - - Raises: - ValueError: if a cycle is detected. - """ - node = layer._inbound_nodes[node_index] # pylint: disable=protected-access - - # Prevent cycles. - if node in nodes_in_progress: - raise ValueError('The tensor ' + str(tensor) + ' at layer "' + - layer.name + '" is part of a cycle.') - - # Don't repeat work for shared subgraphs - if node in finished_nodes: - return - - node_key = _make_node_key(layer.name, node_index) - # Update network_nodes. - network_nodes.add(node_key) - - # Store the traversal order for layer sorting. - if layer not in layer_indices: - layer_indices[layer] = len(layer_indices) - - nodes_in_progress.add(node) - - # Propagate to all previous tensors connected to this node. - for i in range(len(node.inbound_layers)): - x = node.input_tensors[i] - layer = node.inbound_layers[i] - node_index = node.node_indices[i] - tensor_index = node.tensor_indices[i] - build_map_of_graph(x, finished_nodes, nodes_in_progress, layer, - node_index, tensor_index) - - finished_nodes.add(node) - nodes_in_progress.remove(node) - nodes_in_decreasing_depth.append(node) - - finished_nodes = set() - nodes_in_progress = set() - for x in self.outputs: - layer, node_index, tensor_index = x._keras_history # pylint: disable=protected-access - build_map_of_graph(x, finished_nodes, nodes_in_progress, - layer=layer, - node_index=node_index, - tensor_index=tensor_index) - - for node in reversed(nodes_in_decreasing_depth): - # If the depth is not set, the node has no outbound nodes (depth 0). - depth = nodes_depths.setdefault(node, 0) - - # Update the depth of the corresponding layer - previous_depth = layers_depths.get(node.outbound_layer, 0) - # If we've seen this layer before at a higher depth, - # we should use that depth instead of the node depth. - # This is necessary for shared layers that have inputs at different - # depth levels in the graph. - depth = max(depth, previous_depth) - layers_depths[node.outbound_layer] = depth - nodes_depths[node] = depth - - # Update the depth of inbound nodes. - # The "depth" of a node is the max of the depths - # of all layers it is connected to. - for i in range(len(node.inbound_layers)): - inbound_layer = node.inbound_layers[i] - node_index = node.node_indices[i] - inbound_node = inbound_layer._inbound_nodes[node_index] # pylint: disable=protected-access - previous_depth = nodes_depths.get(inbound_node, 0) - nodes_depths[inbound_node] = max(depth + 1, previous_depth) - - # Build a dict {depth: list of nodes with this depth} - nodes_by_depth = {} - for node, depth in nodes_depths.items(): - if depth not in nodes_by_depth: - nodes_by_depth[depth] = [] - nodes_by_depth[depth].append(node) - - # Build a dict {depth: list of layers with this depth} - layers_by_depth = {} - for layer, depth in layers_depths.items(): - if depth not in layers_by_depth: - layers_by_depth[depth] = [] - layers_by_depth[depth].append(layer) - - # Get sorted list of layer depths. - depth_keys = list(layers_by_depth.keys()) - depth_keys.sort(reverse=True) - - # Set self.layers and self._layers_by_depth. - layers = [] - for depth in depth_keys: - layers_for_depth = layers_by_depth[depth] - # Network.layers needs to have a deterministic order: - # here we order them by traversal order. - layers_for_depth.sort(key=lambda x: layer_indices[x]) - layers.extend(layers_for_depth) - self.layers = layers - self._layers_by_depth = layers_by_depth - - # Get sorted list of node depths. - depth_keys = list(nodes_by_depth.keys()) - depth_keys.sort(reverse=True) - - # Check that all tensors required are computable. - # computable_tensors: all tensors in the graph - # that can be computed from the inputs provided. - computable_tensors = [] - for x in self.inputs: - computable_tensors.append(x) - - layers_with_complete_input = [] # To provide a better error msg. - for depth in depth_keys: - for node in nodes_by_depth[depth]: - layer = node.outbound_layer - if layer: - for x in node.input_tensors: - if x not in computable_tensors: - raise ValueError('Graph disconnected: ' - 'cannot obtain value for tensor ' + str(x) + - ' at layer "' + layer.name + '". ' - 'The following previous layers ' - 'were accessed without issue: ' + - str(layers_with_complete_input)) - for x in node.output_tensors: - computable_tensors.append(x) - layers_with_complete_input.append(layer.name) - - # Keep track of the network's nodes. - self._network_nodes = network_nodes - self._nodes_by_depth = nodes_by_depth - - # Ensure name unicity, which will be crucial for serialization - # (since serialized nodes refer to layers by their name). - all_names = [layer.name for layer in self.layers] - for name in all_names: - if all_names.count(name) != 1: - raise ValueError('The name "' + name + '" is used ' + - str(all_names.count(name)) + ' times in the model. ' - 'All layer names should be unique.') - - # Layer parameters. - # The new network starts with a single inbound node - # for its inputs, and no outbound nodes. - self._outbound_nodes = [] # Will be appended to by future calls to __call__ - self._inbound_nodes = [ - ] # Will be appended to below, and by future calls to __call__ - # Create the node linking internal inputs to internal outputs. - Node( - outbound_layer=self, - inbound_layers=[], - node_indices=[], - tensor_indices=[], - input_tensors=self.inputs, - output_tensors=self.outputs) - - def get_layer(self, name=None, index=None): - """Retrieves a layer based on either its name (unique) or index. - - Indices are based on order of horizontal graph traversal (bottom-up). - - Arguments: - name: String, name of layer. - index: Integer, index of layer. - - Returns: - A layer instance. - - Raises: - ValueError: In case of invalid layer name or index. - """ - # TODO(fchollet): We could build a dictionary based on layer names - # since they are constant, but we have not done that yet. - if index is not None: - if len(self.layers) <= index: - raise ValueError('Was asked to retrieve layer at index ' + str(index) + - ' but model only has ' + str(len(self.layers)) + - ' layers.') - else: - return self.layers[index] - else: - if not name: - raise ValueError('Provide either a layer name or layer index.') - for layer in self.layers: - if layer.name == name: - return layer - raise ValueError('No such layer: ' + name) - - @property - def updates(self): - """Retrieve the network's updates. - - Will only include updates that are either - unconditional, or conditional on inputs to this model - (e.g. will not include updates that depend on tensors - that aren't inputs to this model). - - Returns: - A list of update ops. - """ - updates = [] - for layer in self.layers: - if hasattr(layer, 'updates'): - # Collect updates that are dependent on inputs - # that are part of the model. - for node_index, node in enumerate(layer._inbound_nodes): # pylint: disable=protected-access - node_key = _make_node_key(layer.name, node_index) - if node_key in self._network_nodes: - # The model owns this layer node. - inputs = node.input_tensors - updates += layer.get_updates_for(inputs) - # Collect unconditional updates. - updates += layer.get_updates_for(None) - return updates - - @property - def losses(self): - """Retrieve the network's losses. - - Will only include losses that are either - unconditional, or conditional on inputs to this model - (e.g. will not include losses that depend on tensors - that aren't inputs to this model). - - Returns: - A list of loss tensors. - """ - losses = [] - # Retrieve losses for all internal layers. - for layer in self.layers: - if hasattr(layer, 'losses'): - # Collect losses that are dependent on inputs - # that are part of the model. - for node_index, node in enumerate(layer._inbound_nodes): # pylint: disable=protected-access - node_key = _make_node_key(layer.name, node_index) - if node_key in self._network_nodes: - # The model owns this layer node. - inputs = node.input_tensors - losses += layer.get_losses_for(inputs) - # Collect unconditional losses. - losses += layer.get_losses_for(None) - # Add any potential unconditional model-level loss. - losses += self.get_losses_for(None) - return losses - - @property - def trainable_weights(self): - if not self.trainable: - return [] - weights = [] - for layer in self.layers: - weights += layer.trainable_weights - return weights - - @property - def non_trainable_weights(self): - weights = [] - for layer in self.layers: - weights += layer.non_trainable_weights - if not self.trainable: - trainable_weights = [] - for layer in self.layers: - trainable_weights += layer.trainable_weights - return trainable_weights + weights - return weights - - @property - def input_spec(self): - """Gets the network's input specs. - - Returns: - A list of `InputSpec` instances (one per input to the model) - or a single instance if the model has only one input. - """ - specs = [] - for layer in self._input_layers: - if layer.input_spec is None: - specs.append(None) - else: - if not isinstance(layer.input_spec, list): - raise TypeError('Layer ' + layer.name + - ' has an input_spec attribute that ' - 'is not a list. We expect a list. ' - 'Found input_spec = ' + str(layer.input_spec)) - specs += layer.input_spec - if len(specs) == 1: - return specs[0] - return specs - - def call(self, inputs, mask=None): - """Call the model on new inputs. - - In this case `call` just reapplies - all ops in the graph to the new inputs - (e.g. build a new computational graph from the provided inputs). - - Arguments: - inputs: A tensor or list of tensors. - mask: A mask or list of masks. A mask can be - either a tensor or None (no mask). - - Returns: - A tensor if there is a single output, or - a list of tensors if there are more than one outputs. - """ - inputs = nest.flatten(inputs) - if mask is None: - masks = [None for _ in range(len(inputs))] - else: - masks = nest.flatten(mask) - - if context.in_graph_mode(): - # Try to retrieve cached outputs if the layer has already been called - # on these exact inputs. - cache_key = _object_list_uid(inputs) + '_' + _object_list_uid(masks) - if cache_key in self._output_tensor_cache: - # Cache hit. - return self._output_tensor_cache[cache_key] - # Actually apply the network graph to the new inputs. - outputs, _ = self._run_internal_graph(inputs, masks) - return outputs - - def _compute_output_shape(self, input_shape): - if isinstance(input_shape, list): - input_shapes = [] - for shape in input_shape: - if shape is not None: - input_shapes.append(tuple(tensor_shape.TensorShape(shape).as_list())) - else: - input_shapes.append(None) - else: - if input_shape is not None: - input_shapes = [tuple(tensor_shape.TensorShape(input_shape).as_list())] - else: - input_shapes = [None] - - if len(input_shapes) != len(self._input_layers): - raise ValueError('Invalid input_shape argument ' + str(input_shape) + - ': model has ' + str(len(self._input_layers)) + - ' tensor inputs.') - - cache_key = _object_list_uid(input_shapes) - if cache_key not in self._output_shape_cache: - # Cache miss. We have to run the network graph manually (recursive calls - # to `_compute_output_shape`). - layers_to_output_shapes = {} - for i in range(len(input_shapes)): - layer = self._input_layers[i] - input_shape = input_shapes[i] - # It's an input layer: then `_compute_output_shape` is identity, - # and there is only one node and one tensor output. - shape_key = layer.name + '_0_0' - layers_to_output_shapes[shape_key] = input_shape - - depth_keys = list(self._nodes_by_depth.keys()) - depth_keys.sort(reverse=True) - # Iterate over nodes, by depth level. - if len(depth_keys) > 1: - for depth in depth_keys: - nodes = self._nodes_by_depth[depth] - for node in nodes: - # This is always a single layer, never a list. - layer = node.outbound_layer - if layer in self._input_layers: - # We've already covered the input layers - # a few lines above. - continue - # Potentially redundant list, - # same size as node.input_tensors. - input_shapes = [] - for j in range(len(node.inbound_layers)): - inbound_layer = node.inbound_layers[j] - node_index = node.node_indices[j] - tensor_index = node.tensor_indices[j] - shape_key = inbound_layer.name + '_%s_%s' % (node_index, - tensor_index) - input_shape = layers_to_output_shapes[shape_key] - input_shapes.append(input_shape) - - if len(input_shapes) == 1: - output_shape = layer._compute_output_shape(input_shapes[0]) # pylint: disable=protected-access - else: - output_shape = layer._compute_output_shape(input_shapes) # pylint: disable=protected-access - if isinstance(output_shape, list): - output_shapes = [ - tuple(tensor_shape.TensorShape(shape).as_list()) - for shape in output_shape - ] - else: - output_shapes = [ - tuple(tensor_shape.TensorShape(output_shape).as_list()) - ] - - node_index = layer._inbound_nodes.index(node) # pylint: disable=protected-access - for j in range(len(output_shapes)): - shape_key = layer.name + '_%s_%s' % (node_index, j) - layers_to_output_shapes[shape_key] = output_shapes[j] - - # Read final output shapes from layers_to_output_shapes. - output_shapes = [] - for i in range(len(self._output_layers)): - layer, node_index, tensor_index = self._output_coordinates[i] - shape_key = layer.name + '_%s_%s' % (node_index, tensor_index) - output_shapes.append(layers_to_output_shapes[shape_key]) - - # Store in cache. - self._output_shape_cache[cache_key] = output_shapes - else: - # Cache hit. - output_shapes = self._output_shape_cache[cache_key] - - if isinstance(output_shapes, list): - if len(output_shapes) == 1: - return tensor_shape.TensorShape(output_shapes[0]) - else: - return [tensor_shape.TensorShape(shape) for shape in output_shapes] - else: - return tensor_shape.TensorShape(output_shapes) - - def _run_internal_graph(self, inputs, masks=None): - """Computes output tensors for new inputs. - - # Note: - - Expects `inputs` to be a list (potentially with 1 element). - - Can be run on non-Keras tensors. - - Arguments: - inputs: List of tensors - masks: List of masks (tensors or None). - - Returns: - Three lists: output_tensors, output_masks, output_shapes - """ - # Note: masking support is relevant mainly for Keras. - # It cannot be factored out without having the fully reimplement the - # network calling logic on the Keras side. We choose to incorporate it - # in Network because 1) it may be useful to fully support in tf.layers in - # the future and 2) Keras is a major user of Network. - # If you don't use masking, it does not interfere with regular behavior - # at all and you can ignore it. - if masks is None: - masks = [None for _ in range(len(inputs))] - - # Dictionary mapping reference tensors to tuples - # (computed tensor, compute mask) - # we assume a 1:1 mapping from tensor to mask - # TODO(fchollet): raise exception when a `.compute_mask()` call - # does not return a list the same size as `call` - tensor_map = {} - for x, y, mask in zip(self.inputs, inputs, masks): - tensor_map[str(id(x))] = (y, mask) - - depth_keys = list(self._nodes_by_depth.keys()) - depth_keys.sort(reverse=True) - for depth in depth_keys: - nodes = self._nodes_by_depth[depth] - for node in nodes: - # This is always a single layer, never a list. - layer = node.outbound_layer - - reference_input_tensors = node.input_tensors - reference_output_tensors = node.output_tensors - - # If all previous input tensors are available in tensor_map, - # then call node.inbound_layer on them. - computed_data = [] # List of tuples (input, mask). - for x in reference_input_tensors: - if str(id(x)) in tensor_map: - computed_data.append(tensor_map[str(id(x))]) - - if len(computed_data) == len(reference_input_tensors): - # Call layer (reapplying ops to new inputs). - with ops.name_scope(layer.name): - if node.arguments: - kwargs = node.arguments - else: - kwargs = {} - if len(computed_data) == 1: - computed_tensor, computed_mask = computed_data[0] - # Ensure mask propagation if applicable. - if 'mask' in estimator_util.fn_args(layer.call): - if 'mask' not in kwargs: - kwargs['mask'] = computed_mask - - output_tensors = nest.flatten( - layer.call(computed_tensor, **kwargs)) - if hasattr(layer, 'compute_mask'): - output_masks = nest.flatten( - layer.compute_mask(computed_tensor, computed_mask)) - else: - output_masks = [None for _ in range(len(output_tensors))] - computed_tensors = [computed_tensor] - computed_masks = [computed_mask] - else: - computed_tensors = [x[0] for x in computed_data] - computed_masks = [x[1] for x in computed_data] - if 'mask' in estimator_util.fn_args(layer.call): - if 'mask' not in kwargs: - kwargs['mask'] = computed_masks - output_tensors = nest.flatten( - layer.call(computed_tensors, **kwargs)) - if hasattr(layer, 'compute_mask'): - output_masks = nest.flatten( - layer.compute_mask(computed_tensors, computed_masks)) - else: - output_masks = [None for _ in range(len(output_tensors))] - - # Apply activity regularizer if any: - if layer.activity_regularizer is not None: - regularization_losses = [ - layer.activity_regularizer(x) for x in computed_tensors - ] - layer.add_loss(regularization_losses, computed_tensors) - - if context.in_graph_mode(): - # Update model updates and losses: - # Keep track of updates that depend on the inputs - # (e.g. BN updates). - self.add_update(layer.get_updates_for(computed_tensors), inputs) - # Keep track of unconditional updates (e.g. a counter). - self.add_update(layer.get_updates_for(None), None) - # Keep track of losses that depend on the inputs - # (e.g. activity regularizers). - self.add_loss(layer.get_losses_for(computed_tensors), inputs) - # Keep track of unconditional losses - # (e.g. weight regularizers). - self.add_loss(layer.get_losses_for(None), None) - - # Update tensor_map. - for x, y, mask in zip(reference_output_tensors, output_tensors, - output_masks): - tensor_map[str(id(x))] = (y, mask) - - output_tensors = [] - output_masks = [] - output_shapes = [] - for x in self.outputs: - assert str(id(x)) in tensor_map, 'Could not compute output ' + str(x) - tensor, mask = tensor_map[str(id(x))] - output_shapes.append(_static_shape(x)) - output_tensors.append(tensor) - output_masks.append(mask) - - if len(output_tensors) == 1: - output_tensors = output_tensors[0] - if output_shapes is not None: - output_shapes = output_shapes[0] - if output_masks is not None: - output_masks = output_masks[0] - - if context.in_graph_mode(): - # Update cache; - # keys are based on ids on input tensors and inputs masks. - cache_key = _object_list_uid(inputs) + '_' + _object_list_uid(masks) - self._output_tensor_cache[cache_key] = output_tensors - if output_masks is not None: - self._output_mask_cache[cache_key] = output_masks - if output_shapes is not None: - input_shapes = [_static_shape(x) for x in inputs] - cache_key = _object_list_uid(input_shapes) - self._output_shape_cache[cache_key] = output_shapes - - return output_tensors, output_masks - - def _is_tensor_or_tensor_list(v): v = nest.flatten(v) if v and isinstance(v[0], ops.Tensor): @@ -2297,24 +1378,6 @@ def _add_elements_to_collection(elements, collection_list): collection.append(element) -def _object_list_uid(object_list): - object_list = nest.flatten(object_list) - return ', '.join([str(abs(id(x))) for x in object_list]) - - -def _make_node_key(layer_name, node_index): - return layer_name + '_ib-' + str(node_index) - - -def _static_shape(x): - if x is None: - return None - try: - return tuple(x.get_shape().as_list()) - except ValueError: - return None - - def _is_all_none(iterable_or_element): if not isinstance(iterable_or_element, (list, tuple)): iterable = [iterable_or_element] diff --git a/tensorflow/python/layers/base_test.py b/tensorflow/python/layers/base_test.py index 509ad5a7af..1eea20deef 100644 --- a/tensorflow/python/layers/base_test.py +++ b/tensorflow/python/layers/base_test.py @@ -20,8 +20,6 @@ from __future__ import print_function import copy -import numpy as np - from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -33,7 +31,6 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops -from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variable_scope from tensorflow.python.platform import test @@ -431,115 +428,6 @@ class BaseLayerTest(test.TestCase): layer.apply(array_ops.placeholder('int32')) layer.apply(array_ops.placeholder('int32', shape=(2, 3))) - def test_get_updates_for(self): - a = base_layers.Input(shape=(2,)) - dense_layer = core_layers.Dense(1) - dense_layer.add_update(0, inputs=a) - dense_layer.add_update(1, inputs=None) - - self.assertEqual(dense_layer.get_updates_for(a), [0]) - self.assertEqual(dense_layer.get_updates_for(None), [1]) - - def test_get_losses_for(self): - a = base_layers.Input(shape=(2,)) - dense_layer = core_layers.Dense(1) - dense_layer.add_loss(0, inputs=a) - dense_layer.add_loss(1, inputs=None) - - self.assertEqual(dense_layer.get_losses_for(a), [0]) - self.assertEqual(dense_layer.get_losses_for(None), [1]) - - def testTopologicalAttributes(self): - # test layer attributes / methods related to cross-layer connectivity. - a = base_layers.Input(shape=(32,), name='input_a') - b = base_layers.Input(shape=(32,), name='input_b') - - # test input, output, input_shape, output_shape - test_layer = core_layers.Dense(16, name='test_layer') - a_test = test_layer(a) - self.assertEqual(test_layer.input, a) - self.assertEqual(test_layer.output, a_test) - self.assertEqual(test_layer.input_shape, (None, 32)) - self.assertEqual(test_layer.output_shape, (None, 16)) - - # test `get_*_at` methods - dense = core_layers.Dense(16, name='dense_1') - a_2 = dense(a) - b_2 = dense(b) - - self.assertEqual(dense.get_input_at(0), a) - self.assertEqual(dense.get_input_at(1), b) - self.assertEqual(dense.get_output_at(0), a_2) - self.assertEqual(dense.get_output_at(1), b_2) - self.assertEqual(dense.get_input_shape_at(0), (None, 32)) - self.assertEqual(dense.get_input_shape_at(1), (None, 32)) - self.assertEqual(dense.get_output_shape_at(0), (None, 16)) - self.assertEqual(dense.get_output_shape_at(1), (None, 16)) - - # Test invalid value for attribute retrieval. - with self.assertRaises(ValueError): - dense.get_input_at(2) - with self.assertRaises(AttributeError): - new_dense = core_layers.Dense(16) - _ = new_dense.input - with self.assertRaises(AttributeError): - new_dense = core_layers.Dense(16) - _ = new_dense.output - with self.assertRaises(AttributeError): - new_dense = core_layers.Dense(16) - _ = new_dense.output_shape - with self.assertRaises(AttributeError): - new_dense = core_layers.Dense(16) - _ = new_dense.input_shape - with self.assertRaises(AttributeError): - new_dense = core_layers.Dense(16) - a = base_layers.Input(shape=(3, 32)) - a = base_layers.Input(shape=(5, 32)) - a_2 = dense(a) - b_2 = dense(b) - _ = new_dense.input_shape - with self.assertRaises(AttributeError): - new_dense = core_layers.Dense(16) - a = base_layers.Input(shape=(3, 32)) - a = base_layers.Input(shape=(5, 32)) - a_2 = dense(a) - b_2 = dense(b) - _ = new_dense.output_shape - - def testTopologicalAttributesMultiOutputLayer(self): - - class PowersLayer(base_layers.Layer): - - def call(self, inputs): - return [inputs**2, inputs**3] - - x = base_layers.Input(shape=(32,)) - test_layer = PowersLayer() - p1, p2 = test_layer(x) # pylint: disable=not-callable - - self.assertEqual(test_layer.input, x) - self.assertEqual(test_layer.output, [p1, p2]) - self.assertEqual(test_layer.input_shape, (None, 32)) - self.assertEqual(test_layer.output_shape, [(None, 32), (None, 32)]) - - def testTopologicalAttributesMultiInputLayer(self): - - class AddLayer(base_layers.Layer): - - def call(self, inputs): - assert len(inputs) == 2 - return inputs[0] + inputs[1] - - a = base_layers.Input(shape=(32,)) - b = base_layers.Input(shape=(32,)) - test_layer = AddLayer() - y = test_layer([a, b]) # pylint: disable=not-callable - - self.assertEqual(test_layer.input, [a, b]) - self.assertEqual(test_layer.output, y) - self.assertEqual(test_layer.input_shape, [(None, 32), (None, 32)]) - self.assertEqual(test_layer.output_shape, (None, 32)) - @test_util.run_in_graph_and_eager_modes() def test_count_params(self): dense = core_layers.Dense(16) @@ -582,383 +470,5 @@ class BaseLayerTest(test.TestCase): self.assertEqual(len(layer.get_losses_for(x)), 1) -class NetworkTest(test.TestCase): - - def testBasicNetwork(self): - # minimum viable network - x = base_layers.Input(shape=(32,)) - dense = core_layers.Dense(2) - y = dense(x) - network = base_layers.Network(x, y, name='dense_network') - - # test basic attributes - self.assertEqual(network.name, 'dense_network') - self.assertEqual(len(network.layers), 2) # InputLayer + Dense - self.assertEqual(network.layers[1], dense) - self.assertEqual(network.weights, dense.weights) - self.assertEqual(network.trainable_weights, dense.trainable_weights) - self.assertEqual(network.non_trainable_weights, dense.non_trainable_weights) - - # test callability on Input - x_2 = base_layers.Input(shape=(32,)) - y_2 = network(x_2) - self.assertEqual(y_2.get_shape().as_list(), [None, 2]) - - # test callability on regular tensor - x_2 = array_ops.placeholder(dtype='float32', shape=(None, 32)) - y_2 = network(x_2) - self.assertEqual(y_2.get_shape().as_list(), [None, 2]) - - # test network `trainable` attribute - network.trainable = False - self.assertEqual(network.weights, dense.weights) - self.assertEqual(network.trainable_weights, []) - self.assertEqual(network.non_trainable_weights, - dense.trainable_weights + dense.non_trainable_weights) - - def test_node_construction(self): - # test graph topology construction basics - a = base_layers.Input(shape=(32,), name='input_a') - b = base_layers.Input(shape=(32,), name='input_b') - - self.assertEqual(a.get_shape().as_list(), [None, 32]) - a_layer, a_node_index, a_tensor_index = a._keras_history - b_layer, _, _ = b._keras_history - self.assertEqual(len(a_layer._inbound_nodes), 1) - self.assertEqual(a_tensor_index, 0) - node = a_layer._inbound_nodes[a_node_index] - self.assertEqual(node.outbound_layer, a_layer) - - self.assertEqual(node.inbound_layers, []) - self.assertEqual(node.input_tensors, [a]) - self.assertEqual(node.input_shapes, [(None, 32)]) - self.assertEqual(node.output_tensors, [a]) - self.assertEqual(node.output_shapes, [(None, 32)]) - - dense = core_layers.Dense(16, name='dense_1') - dense(a) - dense(b) - - self.assertEqual(len(dense._inbound_nodes), 2) - self.assertEqual(len(dense._outbound_nodes), 0) - self.assertEqual(dense._inbound_nodes[0].inbound_layers, [a_layer]) - self.assertEqual(dense._inbound_nodes[0].outbound_layer, dense) - self.assertEqual(dense._inbound_nodes[1].inbound_layers, [b_layer]) - self.assertEqual(dense._inbound_nodes[1].outbound_layer, dense) - self.assertEqual(dense._inbound_nodes[0].input_tensors, [a]) - self.assertEqual(dense._inbound_nodes[1].input_tensors, [b]) - - # Test config - config_0 = dense._inbound_nodes[0].get_config() - self.assertEqual(config_0['outbound_layer'], dense.name) - - def testMultiInputNetwork(self): - a = base_layers.Input(shape=(32,), name='input_a') - b = base_layers.Input(shape=(32,), name='input_b') - - class AddLayer(base_layers.Layer): - - def call(self, inputs): - assert len(inputs) == 2 - return inputs[0] + inputs[1] - - c = AddLayer()([a, b]) # pylint: disable=not-callable - network = base_layers.Network([a, b], c) - self.assertEqual(len(network.layers), 3) # 2 * InputLayer + AddLayer - - # Test callability. - a2 = base_layers.Input(shape=(32,)) - b2 = base_layers.Input(shape=(32,)) - c2 = network([a2, b2]) - self.assertEqual(c2.get_shape().as_list(), [None, 32]) - - def testMultiOutputNetwork(self): - x = base_layers.Input(shape=(32,)) - y1 = core_layers.Dense(2)(x) - y2 = core_layers.Dense(3)(x) - network = base_layers.Network(x, [y1, y2]) - - self.assertEqual(len(network.layers), 3) # InputLayer + 2 * Dense - - # Test callability. - x2 = base_layers.Input(shape=(32,)) - outputs = network(x2) - - self.assertEqual(type(outputs), list) - self.assertEqual(len(outputs), 2) - self.assertEqual(outputs[0].get_shape().as_list(), [None, 2]) - self.assertEqual(outputs[1].get_shape().as_list(), [None, 3]) - - def testMultiInputMultiOutputNetworkSharedLayer(self): - a = base_layers.Input(shape=(32,), name='input_a') - b = base_layers.Input(shape=(32,), name='input_b') - - dense = core_layers.Dense(2) - - y1 = dense(a) - y2 = dense(b) - network = base_layers.Network([a, b], [y1, y2]) - self.assertEqual(len(network.layers), 3) # 2 * InputLayer + Dense - - # Test callability. - a2 = base_layers.Input(shape=(32,)) - b2 = base_layers.Input(shape=(32,)) - outputs = network([a2, b2]) - - self.assertEqual(type(outputs), list) - self.assertEqual(len(outputs), 2) - self.assertEqual(outputs[0].get_shape().as_list(), [None, 2]) - self.assertEqual(outputs[1].get_shape().as_list(), [None, 2]) - - def testCrossDataFlows(self): - # Test the ability to have multi-output layers with outputs that get routed - # to separate layers - - class PowersLayer(base_layers.Layer): - - def call(self, inputs): - return [inputs**2, inputs**3] - - x = base_layers.Input(shape=(32,)) - p1, p2 = PowersLayer()(x) # pylint: disable=not-callable - y1 = core_layers.Dense(2)(p1) - y2 = core_layers.Dense(3)(p2) - network = base_layers.Network(x, [y1, y2]) - - self.assertEqual(len(network.layers), 4) # InputLayer + 2 * Dense + PLayer - - # Test callability. - x2 = base_layers.Input(shape=(32,)) - outputs = network(x2) - - self.assertEqual(type(outputs), list) - self.assertEqual(len(outputs), 2) - self.assertEqual(outputs[0].get_shape().as_list(), [None, 2]) - self.assertEqual(outputs[1].get_shape().as_list(), [None, 3]) - - def testNetworkAttributes(self): - x = base_layers.Input(shape=(32,)) - z = core_layers.Dense(2, kernel_regularizer=lambda x: 0.01 * (x**2))(x) - dense = core_layers.Dense(2, name='dense') - dense.add_update(1) - y = dense(z) - net = base_layers.Network(x, y) - - # losses - self.assertEqual(len(net.losses), 1) - - # updates - self.assertEqual(len(net.updates), 1) - - # get_layer - self.assertEqual(net.get_layer('dense'), dense) - self.assertEqual(net.get_layer(index=2), dense) - with self.assertRaises(ValueError): - net.get_layer('dense_unknown') - with self.assertRaises(ValueError): - net.get_layer() - with self.assertRaises(ValueError): - net.get_layer(index=4) - - # input, output - self.assertEqual(net.input, x) - self.assertEqual(net.output, y) - - # input_shape, output_shape - self.assertEqual(net.input_shape, (None, 32)) - self.assertEqual(net.output_shape, (None, 2)) - - # get_*_at - self.assertEqual(net.get_input_at(0), x) - self.assertEqual(net.get_output_at(0), y) - - # _compute_output_shape - self.assertEqual(net._compute_output_shape((3, 32)).as_list(), [3, 2]) - - def testInvalidNetworks(self): - # redundant inputs - x = base_layers.Input(shape=(32,)) - y = core_layers.Dense(2)(x) - with self.assertRaises(ValueError): - base_layers.Network([x, x], y) - - # inputs that don't come from Input - x = array_ops.placeholder(dtype='float32', shape=(None, 32)) - y = core_layers.Dense(2)(x) - with self.assertRaises(ValueError): - base_layers.Network(x, y) - - # inputs that don't come from Input but have a layer history - x = base_layers.Input(shape=(32,)) - x = core_layers.Dense(32)(x) - y = core_layers.Dense(2)(x) - with self.assertRaises(ValueError): - base_layers.Network(x, y) - - # outputs that don't come from layers - x = base_layers.Input(shape=(32,)) - y = core_layers.Dense(2)(x) - y = 2 * y - with self.assertRaises(ValueError): - base_layers.Network(x, y) - - # disconnected graphs - x1 = base_layers.Input(shape=(32,)) - x2 = base_layers.Input(shape=(32,)) - y = core_layers.Dense(2)(x1) - with self.assertRaises(ValueError): - base_layers.Network(x2, y) - - # redundant layer names - x = base_layers.Input(shape=(32,)) - z = core_layers.Dense(2, name='dense')(x) - y = core_layers.Dense(2, name='dense')(z) - with self.assertRaises(ValueError): - base_layers.Network(x, y) - - def testInputTensorWrapping(self): - x = array_ops.placeholder(dtype='float32', shape=(None, 32)) - x = base_layers.Input(tensor=x) - y = core_layers.Dense(2)(x) - base_layers.Network(x, y) - - def testExplicitBatchSize(self): - x = base_layers.Input(shape=(32,), batch_size=3) - y = core_layers.Dense(2)(x) - self.assertEqual(y.get_shape().as_list(), [3, 2]) - - def testNetworkRecursion(self): - # test the ability of networks to be used as layers inside networks. - a = base_layers.Input(shape=(32,)) - b = core_layers.Dense(2)(a) - net = base_layers.Network(a, b) - - c = base_layers.Input(shape=(32,)) - d = net(c) - - recursive_net = base_layers.Network(c, d) - self.assertEqual(len(recursive_net.layers), 2) - self.assertEqual(recursive_net.layers[1], net) - self.assertEqual(len(recursive_net.weights), 2) - - # test callability - x = array_ops.placeholder(dtype='float32', shape=(None, 32)) - y = recursive_net(x) - self.assertEqual(y.get_shape().as_list(), [None, 2]) - - def testSparseInput(self): - - class SparseSoftmax(base_layers.Layer): - - def call(self, inputs): - return sparse_ops.sparse_softmax(inputs) - - x = base_layers.Input(shape=(32,), sparse=True) - y = SparseSoftmax()(x) # pylint: disable=not-callable - network = base_layers.Network(x, y) - - self.assertEqual(len(network.layers), 2) - self.assertEqual(network.layers[0].sparse, True) - - @test_util.run_in_graph_and_eager_modes() - def testMaskingSingleInput(self): - - class MaskedLayer(base_layers.Layer): - - def call(self, inputs, mask=None): - if mask is not None: - return inputs * mask - return inputs - - def compute_mask(self, inputs, mask=None): - return array_ops.ones_like(inputs) - - if context.in_graph_mode(): - x = base_layers.Input(shape=(32,)) - y = MaskedLayer()(x) # pylint: disable=not-callable - network = base_layers.Network(x, y) - - # test callability on Input - x_2 = base_layers.Input(shape=(32,)) - y_2 = network(x_2) - self.assertEqual(y_2.get_shape().as_list(), [None, 32]) - - # test callability on regular tensor - x_2 = array_ops.placeholder(dtype='float32', shape=(None, 32)) - y_2 = network(x_2) - self.assertEqual(y_2.get_shape().as_list(), [None, 32]) - else: - a = constant_op.constant([2] * 32) - mask = constant_op.constant([0, 1] * 16) - a._keras_mask = mask - b = MaskedLayer().apply(a) - self.assertTrue(hasattr(b, '_keras_mask')) - self.assertAllEqual(self.evaluate(array_ops.ones_like(mask)), - self.evaluate(getattr(b, '_keras_mask'))) - self.assertAllEqual(self.evaluate(a * mask), self.evaluate(b)) - - -class DeferredModeTest(test.TestCase): - - def testDeferredTensorAttributes(self): - x = base_layers._DeferredTensor(shape=(None, 2), dtype='float32', name='x') - self.assertEqual(str(x), - 'DeferredTensor(\'x\', shape=(?, 2), dtype=float32)') - self.assertEqual(repr(x), - '<_DeferredTensor \'x\' shape=(?, 2) dtype=float32>') - - @test_util.run_in_graph_and_eager_modes() - def testSimpleNetworkBuilding(self): - inputs = base_layers.Input(shape=(32,)) - if context.in_eager_mode(): - self.assertIsInstance(inputs, base_layers._DeferredTensor) - self.assertEqual(inputs.dtype.name, 'float32') - self.assertEqual(inputs.shape.as_list(), [None, 32]) - - x = core_layers.Dense(2)(inputs) - if context.in_eager_mode(): - self.assertIsInstance(x, base_layers._DeferredTensor) - self.assertEqual(x.dtype.name, 'float32') - self.assertEqual(x.shape.as_list(), [None, 2]) - - outputs = core_layers.Dense(4)(x) - network = base_layers.Network(inputs, outputs) - self.assertIsInstance(network, base_layers.Network) - - if context.in_eager_mode(): - # It should be possible to call such a network on EagerTensors. - inputs = constant_op.constant( - np.random.random((10, 32)).astype('float32')) - outputs = network(inputs) - self.assertEqual(outputs.shape.as_list(), [10, 4]) - - @test_util.run_in_graph_and_eager_modes() - def testMultiIONetworkbuilding(self): - input_a = base_layers.Input(shape=(32,)) - input_b = base_layers.Input(shape=(16,)) - a = core_layers.Dense(16)(input_a) - - class AddLayer(base_layers.Layer): - - def call(self, inputs): - return inputs[0] + inputs[1] - - def _compute_output_shape(self, input_shape): - return input_shape[0] - - c = AddLayer()([a, input_b]) # pylint: disable=not-callable - c = core_layers.Dense(2)(c) - - network = base_layers.Network([input_a, input_b], [a, c]) - if context.in_eager_mode(): - a_val = constant_op.constant( - np.random.random((10, 32)).astype('float32')) - b_val = constant_op.constant( - np.random.random((10, 16)).astype('float32')) - outputs = network([a_val, b_val]) - self.assertEqual(len(outputs), 2) - self.assertEqual(outputs[0].shape.as_list(), [10, 16]) - self.assertEqual(outputs[1].shape.as_list(), [10, 2]) - if __name__ == '__main__': test.main() diff --git a/tensorflow/python/layers/layers.py b/tensorflow/python/layers/layers.py index d3f532e79c..0a52b1e8d9 100644 --- a/tensorflow/python/layers/layers.py +++ b/tensorflow/python/layers/layers.py @@ -65,8 +65,8 @@ from tensorflow.python.util.all_util import remove_undocumented # Base objects. from tensorflow.python.layers.base import Layer -from tensorflow.python.layers.base import Input from tensorflow.python.layers.base import InputSpec +from tensorflow.python.layers.network import Input # Core layers. from tensorflow.python.layers.core import Dense diff --git a/tensorflow/python/layers/network.py b/tensorflow/python/layers/network.py new file mode 100644 index 0000000000..9a33a5c726 --- /dev/null +++ b/tensorflow/python/layers/network.py @@ -0,0 +1,957 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""Contains Network, a composition of layers.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +from tensorflow.python.eager import context +from tensorflow.python.estimator import util as estimator_util +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.layers import base +from tensorflow.python.layers import utils as layers_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util import nest + + +class InputLayer(base.Layer): + """Layer to be used as an entry point into a Network (a graph of layers). + + It can either wrap an existing tensor (pass an `input_tensor` argument) + or create its a placeholder tensor (pass arguments `input_shape` + as well as `dtype`). + + It is generally recommend to use the functional layer API via `Input`, + (which creates an `InputLayer`) without directly using `InputLayer`. + + Arguments: + input_shape: Shape tuple (not including the batch axis), or `TensorShape` + instance (not including the batch axis). + batch_size: Optional input batch size (integer or None). + dtype: Datatype of the input. + input_tensor: Optional tensor to use as layer input + instead of creating a placeholder. + sparse: Boolean, whether the placeholder created + is meant to be sparse. + name: Name of the layer (string). + + Raises: + RuntimeError: If created in Eager mode. + """ + + def __init__(self, + input_shape=None, + batch_size=None, + dtype=dtypes.float32, + input_tensor=None, + sparse=False, + name=None): + super(InputLayer, self).__init__(dtype=dtype, name=name) + self.built = True + self.sparse = sparse + self.batch_size = batch_size + + if isinstance(input_shape, tensor_shape.TensorShape): + input_shape = tuple(input_shape.as_list()) + + if input_tensor is None: + if input_shape is not None: + batch_input_shape = (batch_size,) + tuple(input_shape) + else: + batch_input_shape = None + + if context.in_eager_mode(): + # In eager mode, create a temporary placeholder to call the layer on. + input_tensor = base._DeferredTensor( # pylint: disable=protected-access + shape=batch_input_shape, + dtype=dtype, + name=self.name) + else: + # In graph mode, create a graph placeholder to call the layer on. + if sparse: + input_tensor = array_ops.sparse_placeholder( + shape=batch_input_shape, + dtype=dtype, + name=self.name) + else: + input_tensor = array_ops.placeholder( + shape=batch_input_shape, + dtype=dtype, + name=self.name) + + # For compatibility with Keras API. + self.is_placeholder = True + self._batch_input_shape = batch_input_shape + else: + # For compatibility with Keras API. + self.is_placeholder = False + self._batch_input_shape = tuple(input_tensor.get_shape().as_list()) + + # Create an input node to add to self.outbound_node + # and set output_tensors' _keras_history. + input_tensor._keras_history = (self, 0, 0) # pylint: disable=protected-access + base.Node( + self, + inbound_layers=[], + node_indices=[], + tensor_indices=[], + input_tensors=[input_tensor], + output_tensors=[input_tensor]) + + +def Input( # pylint: disable=invalid-name + shape=None, + batch_size=None, + name=None, + dtype=dtypes.float32, + sparse=False, + tensor=None): + """`Input()` is used to instantiate an input tensor for use with a `Network`. + + For instance, if a, b and c are tensors created via `Input`, + it becomes possible to do: + + `network = Network(inputs=[a, b], outputs=c)` + + Example: + + ```python + # This is a logistic regression + x = tf.layers.Input(shape=(32,)) + y = tf.layers.Dense(16, activation='softmax')(x) + network = tf.layers.Network(x, y) + ``` + + Arguments: + shape: A shape tuple (integer), not including the batch size. + For instance, `shape=(32,)` indicates that the expected input + will be batches of 32-dimensional vectors. + batch_size: Optional input batch size (integer or None). + name: An optional name string for the layer. + Should be unique in a model (do not reuse the same name twice). + It will be autogenerated if it isn't provided. + dtype: The data type expected by the input, as a string + (`float32`, `float64`, `int32`...) + sparse: A boolean specifying whether the placeholder + to be created is sparse. + tensor: Optional existing tensor to wrap into the `Input` layer. + If set, the layer will not create a placeholder tensor. + + Returns: + A tensor: either a new placeholder (with history metadata) or + `tensor` (if passed), with added history metadata. + + Raises: + RuntimeError: If called in Eager mode. + """ + input_layer = InputLayer( + input_shape=shape, + batch_size=batch_size, + name=name, + dtype=dtype, + sparse=sparse, + input_tensor=tensor) + # Return tensor including `_keras_history` metadata. + # Note that in this case train_output and test_output are the same pointer. + outputs = input_layer._inbound_nodes[0].output_tensors # pylint: disable=protected-access + if len(outputs) == 1: + return outputs[0] + else: + return outputs + + +class GraphNetwork(base.Layer): + """A GraphNetwork is a directed acyclic graph of layers. + + It is the topological form of a "model". + A Model is simply a GraphNetwork with added training/evaluation routines. + + A GraphNetwork instance implements the full Layer API. In particular, a + GraphNetwork can be called on new inputs. + + Example: + + ```python + # This is a logistic regression + x = tf.layers.Input(shape=(32,)) + y = tf.layers.Dense(16, activation='softmax')(x) + network = tf.layers.GraphNetwork(x, y) + + # It is then possible to call the network on compatible inputs: + z = tf.layers.Input(shape=(32,)) + w = network(z) + + # It is possible to retrieve the same properties as a layer: + weights = network.trainable_weights + ``` + + Arguments: + inputs: Input tensor or list of input tensors. + Must come from `tf.layers.Input`. + output: Output tensor or list of output tensors. Must come from + tf.layers Layers or Keras layers. + name: Optional name of the model (string). + + Attributes: + GraphNetwork has the same attributes as Layer. On top of it, it also has: + - layers: a list of the children layers of the network, + a list of layer instances, ordered from "earlier in the graph" + to "later in the graph". + + Methods: + GraphNetwork has the same methods as Layer. On top of it, it also has: + - get_layer: retrieves a child layer by name or index in the graph. + + Raises: + RuntimeError: If created in Eager mode. + """ + + def __init__(self, inputs, outputs, name=None): # pylint: disable=super-init-not-called + if context.in_eager_mode(): + # TODO(fchollet): check that all inputs and outputs are DeferredTensors. + pass + + self._init_set_name(name) + self._activity_regularizer = None + with vs.variable_scope( + None, default_name=self._base_name) as captured_scope: + self._scope = captured_scope + call_fn_args = estimator_util.fn_args(self.call) + self._compute_previous_mask = ('mask' in call_fn_args or + hasattr(self, 'compute_mask')) + self._call_has_scope_arg = 'scope' in call_fn_args + + # This acts just like the `trainable` attribute of any layer instance. + # It does not affect users of the underlying layers, only users of the + # GraphNetwork instance. + self.trainable = True + # A GraphNetwork does not create weights of its own, thus it is already + # built. + self.built = True + # A GraphNetwork does not create weights of its own, thus has no dtype. + self._dtype = None + # The following are implemented as property functions: + # self.trainable_weights + # self.non_trainable_weights + # self.input_spec + + # Private attributes to implement compatibility with Layer. + self._per_input_losses = {} + self._per_input_updates = {} + self._updates = [] + self._losses = [] + self._scope = None + self._reuse = None + self._graph = ops.get_default_graph() + + # GraphNetwork-specific properties. + if isinstance(inputs, (list, tuple)): + self.inputs = list(inputs) # Tensor or list of tensors. + else: + self.inputs = [inputs] + if isinstance(outputs, (list, tuple)): + self.outputs = list(outputs) + else: + self.outputs = [outputs] + # All layers in order of horizontal graph traversal. + # Entries are unique. Includes input and output layers. + self.layers = [] + + # Check for redundancy in inputs. + if len(set(self.inputs)) != len(self.inputs): + raise ValueError('The list of inputs passed to the model ' + 'is redundant. ' + 'All inputs should only appear once.' + ' Found: ' + str(self.inputs)) + + # # List of initial layers (1 to 1 mapping with self.inputs, + # # hence the same layer might appear twice) + # self._input_layers = [] + # self._input_layers_node_indices = [] + # self._input_layers_tensor_indices = [] + # # list of layers (1 to 1 mapping with self.inputs, + # # hence the same layer might appear twice) + # self._output_layers = [] + # self._output_layers_node_indices = [] + # self._output_layers_tensor_indices = [] + + self._input_layers = [] + self._output_layers = [] + self._input_coordinates = [] + self._output_coordinates = [] + + # This is for performance optimization when calling the GraphNetwork on new + # inputs. Every time the GraphNetwork is called on a set on input tensors, + # we compute the output tensors, output masks and output shapes in one pass, + # then cache them here. When any of these outputs is queried later, we + # retrieve it from there instead of recomputing it. + self._output_mask_cache = {} + self._output_tensor_cache = {} + self._output_shape_cache = {} + + # User-provided arguments validation. + for x in self.inputs: + # Check that x has appropriate `_keras_history` metadata. + if not hasattr(x, '_keras_history'): + cls_name = self.__class__.__name__ + raise ValueError('Input tensors to a ' + cls_name + ' ' + + 'must come from `tf.layers.Input`. ' + 'Received: ' + str(x) + + ' (missing previous layer metadata).') + # Check that x is an input tensor. + # pylint: disable=protected-access + layer, node_index, tensor_index = x._keras_history + if len(layer._inbound_nodes) > 1 or ( + layer._inbound_nodes and layer._inbound_nodes[0].inbound_layers): + cls_name = self.__class__.__name__ + logging.warning(cls_name + ' inputs must come from ' + '`tf.layers.Input` (thus holding past layer metadata), ' + 'they cannot be the output of ' + 'a previous non-Input layer. ' + 'Here, a tensor specified as ' + 'input to "' + self.name + '" was not an Input tensor, ' + 'it was generated by layer ' + layer.name + '.\n' + 'Note that input tensors are ' + 'instantiated via `tensor = tf.layers.Input(shape)`.\n' + 'The tensor that caused the issue was: ' + str(x.name)) + # pylint: enable=protected-access + for x in self.outputs: + if not hasattr(x, '_keras_history'): + cls_name = self.__class__.__name__ + raise ValueError('Output tensors to a ' + cls_name + ' must be ' + 'the output of a TensorFlow `Layer` ' + '(thus holding past layer metadata). Found: ' + str(x)) + + # Build self._output_layers: + for x in self.outputs: + layer, node_index, tensor_index = x._keras_history # pylint: disable=protected-access + self._output_layers.append(layer) + self._output_coordinates.append((layer, node_index, tensor_index)) + + # Build self._input_layers: + for x in self.inputs: + layer, node_index, tensor_index = x._keras_history # pylint: disable=protected-access + # It's supposed to be an input layer, so only one node + # and one tensor output. + assert node_index == 0 + assert tensor_index == 0 + self._input_layers.append(layer) + self._input_coordinates.append((layer, node_index, tensor_index)) + + # Network_nodes: set of nodes included in the graph + # (not all nodes included in the layers + # are relevant to the current graph). + network_nodes = set() # ids of all nodes relevant to the GraphNetwork + nodes_depths = {} # dict {node: depth value} + layers_depths = {} # dict {layer: depth value} + layer_indices = {} # dict {layer: index in traversal} + nodes_in_decreasing_depth = [] + + def build_map_of_graph(tensor, + finished_nodes, + nodes_in_progress, + layer, + node_index, + tensor_index): + """Builds a map of the graph of layers. + + This recursively updates the map `layer_indices`, + the list `nodes_in_decreasing_depth` and the set `network_nodes`. + + Arguments: + tensor: Some tensor in a graph. + finished_nodes: Set of nodes whose subgraphs have been traversed + completely. Useful to prevent duplicated work. + nodes_in_progress: Set of nodes that are currently active on the + recursion stack. Useful to detect cycles. + layer: Layer from which `tensor` comes from. If not provided, + will be obtained from `tensor._keras_history`. + node_index: Node index from which `tensor` comes from. + tensor_index: Tensor_index from which `tensor` comes from. + + Raises: + ValueError: if a cycle is detected. + """ + node = layer._inbound_nodes[node_index] # pylint: disable=protected-access + + # Prevent cycles. + if node in nodes_in_progress: + raise ValueError('The tensor ' + str(tensor) + ' at layer "' + + layer.name + '" is part of a cycle.') + + # Don't repeat work for shared subgraphs + if node in finished_nodes: + return + + node_key = _make_node_key(layer.name, node_index) + # Update network_nodes. + network_nodes.add(node_key) + + # Store the traversal order for layer sorting. + if layer not in layer_indices: + layer_indices[layer] = len(layer_indices) + + nodes_in_progress.add(node) + + # Propagate to all previous tensors connected to this node. + for i in range(len(node.inbound_layers)): + x = node.input_tensors[i] + layer = node.inbound_layers[i] + node_index = node.node_indices[i] + tensor_index = node.tensor_indices[i] + build_map_of_graph(x, finished_nodes, nodes_in_progress, layer, + node_index, tensor_index) + + finished_nodes.add(node) + nodes_in_progress.remove(node) + nodes_in_decreasing_depth.append(node) + + finished_nodes = set() + nodes_in_progress = set() + for x in self.outputs: + layer, node_index, tensor_index = x._keras_history # pylint: disable=protected-access + build_map_of_graph(x, finished_nodes, nodes_in_progress, + layer=layer, + node_index=node_index, + tensor_index=tensor_index) + + for node in reversed(nodes_in_decreasing_depth): + # If the depth is not set, the node has no outbound nodes (depth 0). + depth = nodes_depths.setdefault(node, 0) + + # Update the depth of the corresponding layer + previous_depth = layers_depths.get(node.outbound_layer, 0) + # If we've seen this layer before at a higher depth, + # we should use that depth instead of the node depth. + # This is necessary for shared layers that have inputs at different + # depth levels in the graph. + depth = max(depth, previous_depth) + layers_depths[node.outbound_layer] = depth + nodes_depths[node] = depth + + # Update the depth of inbound nodes. + # The "depth" of a node is the max of the depths + # of all layers it is connected to. + for i in range(len(node.inbound_layers)): + inbound_layer = node.inbound_layers[i] + node_index = node.node_indices[i] + inbound_node = inbound_layer._inbound_nodes[node_index] # pylint: disable=protected-access + previous_depth = nodes_depths.get(inbound_node, 0) + nodes_depths[inbound_node] = max(depth + 1, previous_depth) + + # Build a dict {depth: list of nodes with this depth} + nodes_by_depth = {} + for node, depth in nodes_depths.items(): + if depth not in nodes_by_depth: + nodes_by_depth[depth] = [] + nodes_by_depth[depth].append(node) + + # Build a dict {depth: list of layers with this depth} + layers_by_depth = {} + for layer, depth in layers_depths.items(): + if depth not in layers_by_depth: + layers_by_depth[depth] = [] + layers_by_depth[depth].append(layer) + + # Get sorted list of layer depths. + depth_keys = list(layers_by_depth.keys()) + depth_keys.sort(reverse=True) + + # Set self.layers and self._layers_by_depth. + layers = [] + for depth in depth_keys: + layers_for_depth = layers_by_depth[depth] + # GraphNetwork.layers needs to have a deterministic order: + # here we order them by traversal order. + layers_for_depth.sort(key=lambda x: layer_indices[x]) + layers.extend(layers_for_depth) + self.layers = layers + self._layers_by_depth = layers_by_depth + + # Get sorted list of node depths. + depth_keys = list(nodes_by_depth.keys()) + depth_keys.sort(reverse=True) + + # Check that all tensors required are computable. + # computable_tensors: all tensors in the graph + # that can be computed from the inputs provided. + computable_tensors = [] + for x in self.inputs: + computable_tensors.append(x) + + layers_with_complete_input = [] # To provide a better error msg. + for depth in depth_keys: + for node in nodes_by_depth[depth]: + layer = node.outbound_layer + if layer: + for x in node.input_tensors: + if x not in computable_tensors: + raise ValueError('Graph disconnected: ' + 'cannot obtain value for tensor ' + str(x) + + ' at layer "' + layer.name + '". ' + 'The following previous layers ' + 'were accessed without issue: ' + + str(layers_with_complete_input)) + for x in node.output_tensors: + computable_tensors.append(x) + layers_with_complete_input.append(layer.name) + + # Keep track of the network's nodes. + self._network_nodes = network_nodes + self._nodes_by_depth = nodes_by_depth + + # Ensure name unicity, which will be crucial for serialization + # (since serialized nodes refer to layers by their name). + all_names = [layer.name for layer in self.layers] + for name in all_names: + if all_names.count(name) != 1: + raise ValueError('The name "' + name + '" is used ' + + str(all_names.count(name)) + ' times in the model. ' + 'All layer names should be unique.') + + # Layer parameters. + # The new network starts with a single inbound node + # for its inputs, and no outbound nodes. + self._outbound_nodes = [] # Will be appended to by future calls to __call__ + self._inbound_nodes = [ + ] # Will be appended to below, and by future calls to __call__ + # Create the node linking internal inputs to internal outputs. + base.Node( + outbound_layer=self, + inbound_layers=[], + node_indices=[], + tensor_indices=[], + input_tensors=self.inputs, + output_tensors=self.outputs) + + def get_layer(self, name=None, index=None): + """Retrieves a layer based on either its name (unique) or index. + + Indices are based on order of horizontal graph traversal (bottom-up). + + Arguments: + name: String, name of layer. + index: Integer, index of layer. + + Returns: + A layer instance. + + Raises: + ValueError: In case of invalid layer name or index. + """ + # TODO(fchollet): We could build a dictionary based on layer names + # since they are constant, but we have not done that yet. + if index is not None: + if len(self.layers) <= index: + raise ValueError('Was asked to retrieve layer at index ' + str(index) + + ' but model only has ' + str(len(self.layers)) + + ' layers.') + else: + return self.layers[index] + else: + if not name: + raise ValueError('Provide either a layer name or layer index.') + for layer in self.layers: + if layer.name == name: + return layer + raise ValueError('No such layer: ' + name) + + @property + def updates(self): + """Retrieve the network's updates. + + Will only include updates that are either + unconditional, or conditional on inputs to this model + (e.g. will not include updates that depend on tensors + that aren't inputs to this model). + + Returns: + A list of update ops. + """ + updates = [] + for layer in self.layers: + if hasattr(layer, 'updates'): + # Collect updates that are dependent on inputs + # that are part of the model. + for node_index, node in enumerate(layer._inbound_nodes): # pylint: disable=protected-access + node_key = _make_node_key(layer.name, node_index) + if node_key in self._network_nodes: + # The model owns this layer node. + inputs = node.input_tensors + updates += layer.get_updates_for(inputs) + # Collect unconditional updates. + updates += layer.get_updates_for(None) + return updates + + @property + def losses(self): + """Retrieve the network's losses. + + Will only include losses that are either + unconditional, or conditional on inputs to this model + (e.g. will not include losses that depend on tensors + that aren't inputs to this model). + + Returns: + A list of loss tensors. + """ + losses = [] + # Retrieve losses for all internal layers. + for layer in self.layers: + if hasattr(layer, 'losses'): + # Collect losses that are dependent on inputs + # that are part of the model. + for node_index, node in enumerate(layer._inbound_nodes): # pylint: disable=protected-access + node_key = _make_node_key(layer.name, node_index) + if node_key in self._network_nodes: + # The model owns this layer node. + inputs = node.input_tensors + losses += layer.get_losses_for(inputs) + # Collect unconditional losses. + losses += layer.get_losses_for(None) + # Add any potential unconditional model-level loss. + losses += self.get_losses_for(None) + return losses + + @property + def trainable_weights(self): + if not self.trainable: + return [] + weights = [] + for layer in self.layers: + weights += layer.trainable_weights + return weights + + @property + def non_trainable_weights(self): + weights = [] + for layer in self.layers: + weights += layer.non_trainable_weights + if not self.trainable: + trainable_weights = [] + for layer in self.layers: + trainable_weights += layer.trainable_weights + return trainable_weights + weights + return weights + + @property + def input_spec(self): + """Gets the network's input specs. + + Returns: + A list of `InputSpec` instances (one per input to the model) + or a single instance if the model has only one input. + """ + specs = [] + for layer in self._input_layers: + if layer.input_spec is None: + specs.append(None) + else: + if not isinstance(layer.input_spec, list): + raise TypeError('Layer ' + layer.name + + ' has an input_spec attribute that ' + 'is not a list. We expect a list. ' + 'Found input_spec = ' + str(layer.input_spec)) + specs += layer.input_spec + if len(specs) == 1: + return specs[0] + return specs + + def call(self, inputs, mask=None): + """Call the model on new inputs. + + In this case `call` just reapplies + all ops in the graph to the new inputs + (e.g. build a new computational graph from the provided inputs). + + Arguments: + inputs: A tensor or list of tensors. + mask: A mask or list of masks. A mask can be + either a tensor or None (no mask). + + Returns: + A tensor if there is a single output, or + a list of tensors if there are more than one outputs. + """ + inputs = nest.flatten(inputs) + if mask is None: + masks = [None for _ in range(len(inputs))] + else: + masks = nest.flatten(mask) + + if context.in_graph_mode(): + # Try to retrieve cached outputs if the layer has already been called + # on these exact inputs. + cache_key = (layers_util.object_list_uid(inputs) + + '_' + layers_util.object_list_uid(masks)) + if cache_key in self._output_tensor_cache: + # Cache hit. + return self._output_tensor_cache[cache_key] + # Actually apply the network graph to the new inputs. + outputs, _ = self._run_internal_graph(inputs, masks) + return outputs + + def _compute_output_shape(self, input_shape): + if isinstance(input_shape, list): + input_shapes = [] + for shape in input_shape: + if shape is not None: + input_shapes.append(tuple(tensor_shape.TensorShape(shape).as_list())) + else: + input_shapes.append(None) + else: + if input_shape is not None: + input_shapes = [tuple(tensor_shape.TensorShape(input_shape).as_list())] + else: + input_shapes = [None] + + if len(input_shapes) != len(self._input_layers): + raise ValueError('Invalid input_shape argument ' + str(input_shape) + + ': model has ' + str(len(self._input_layers)) + + ' tensor inputs.') + + cache_key = layers_util.object_list_uid(input_shapes) + if cache_key not in self._output_shape_cache: + # Cache miss. We have to run the network graph manually (recursive calls + # to `_compute_output_shape`). + layers_to_output_shapes = {} + for i in range(len(input_shapes)): + layer = self._input_layers[i] + input_shape = input_shapes[i] + # It's an input layer: then `_compute_output_shape` is identity, + # and there is only one node and one tensor output. + shape_key = layer.name + '_0_0' + layers_to_output_shapes[shape_key] = input_shape + + depth_keys = list(self._nodes_by_depth.keys()) + depth_keys.sort(reverse=True) + # Iterate over nodes, by depth level. + if len(depth_keys) > 1: + for depth in depth_keys: + nodes = self._nodes_by_depth[depth] + for node in nodes: + # This is always a single layer, never a list. + layer = node.outbound_layer + if layer in self._input_layers: + # We've already covered the input layers + # a few lines above. + continue + # Potentially redundant list, + # same size as node.input_tensors. + input_shapes = [] + for j in range(len(node.inbound_layers)): + inbound_layer = node.inbound_layers[j] + node_index = node.node_indices[j] + tensor_index = node.tensor_indices[j] + shape_key = inbound_layer.name + '_%s_%s' % (node_index, + tensor_index) + input_shape = layers_to_output_shapes[shape_key] + input_shapes.append(input_shape) + + if len(input_shapes) == 1: + output_shape = layer._compute_output_shape(input_shapes[0]) # pylint: disable=protected-access + else: + output_shape = layer._compute_output_shape(input_shapes) # pylint: disable=protected-access + if isinstance(output_shape, list): + output_shapes = [ + tuple(tensor_shape.TensorShape(shape).as_list()) + for shape in output_shape + ] + else: + output_shapes = [ + tuple(tensor_shape.TensorShape(output_shape).as_list()) + ] + + node_index = layer._inbound_nodes.index(node) # pylint: disable=protected-access + for j in range(len(output_shapes)): + shape_key = layer.name + '_%s_%s' % (node_index, j) + layers_to_output_shapes[shape_key] = output_shapes[j] + + # Read final output shapes from layers_to_output_shapes. + output_shapes = [] + for i in range(len(self._output_layers)): + layer, node_index, tensor_index = self._output_coordinates[i] + shape_key = layer.name + '_%s_%s' % (node_index, tensor_index) + output_shapes.append(layers_to_output_shapes[shape_key]) + + # Store in cache. + self._output_shape_cache[cache_key] = output_shapes + else: + # Cache hit. + output_shapes = self._output_shape_cache[cache_key] + + if isinstance(output_shapes, list): + if len(output_shapes) == 1: + return tensor_shape.TensorShape(output_shapes[0]) + else: + return [tensor_shape.TensorShape(shape) for shape in output_shapes] + else: + return tensor_shape.TensorShape(output_shapes) + + def _run_internal_graph(self, inputs, masks=None): + """Computes output tensors for new inputs. + + # Note: + - Expects `inputs` to be a list (potentially with 1 element). + - Can be run on non-Keras tensors. + + Arguments: + inputs: List of tensors + masks: List of masks (tensors or None). + + Returns: + Three lists: output_tensors, output_masks, output_shapes + """ + # Note: masking support is relevant mainly for Keras. + # It cannot be factored out without having the fully reimplement the network + # calling logic on the Keras side. We choose to incorporate it in + # GraphNetwork because 1) it may be useful to fully support in tf.layers in + # the future and 2) Keras is a major user of GraphNetwork. If you don't + # use masking, it does not interfere with regular behavior at all and you + # can ignore it. + if masks is None: + masks = [None for _ in range(len(inputs))] + + # Dictionary mapping reference tensors to tuples + # (computed tensor, compute mask) + # we assume a 1:1 mapping from tensor to mask + # TODO(fchollet): raise exception when a `.compute_mask()` call + # does not return a list the same size as `call` + tensor_map = {} + for x, y, mask in zip(self.inputs, inputs, masks): + tensor_map[str(id(x))] = (y, mask) + + depth_keys = list(self._nodes_by_depth.keys()) + depth_keys.sort(reverse=True) + for depth in depth_keys: + nodes = self._nodes_by_depth[depth] + for node in nodes: + # This is always a single layer, never a list. + layer = node.outbound_layer + + reference_input_tensors = node.input_tensors + reference_output_tensors = node.output_tensors + + # If all previous input tensors are available in tensor_map, + # then call node.inbound_layer on them. + computed_data = [] # List of tuples (input, mask). + for x in reference_input_tensors: + if str(id(x)) in tensor_map: + computed_data.append(tensor_map[str(id(x))]) + + if len(computed_data) == len(reference_input_tensors): + # Call layer (reapplying ops to new inputs). + with ops.name_scope(layer.name): + if node.arguments: + kwargs = node.arguments + else: + kwargs = {} + if len(computed_data) == 1: + computed_tensor, computed_mask = computed_data[0] + # Ensure mask propagation if applicable. + if 'mask' in estimator_util.fn_args(layer.call): + if 'mask' not in kwargs: + kwargs['mask'] = computed_mask + + output_tensors = nest.flatten( + layer.call(computed_tensor, **kwargs)) + if hasattr(layer, 'compute_mask'): + output_masks = nest.flatten( + layer.compute_mask(computed_tensor, computed_mask)) + else: + output_masks = [None for _ in range(len(output_tensors))] + computed_tensors = [computed_tensor] + computed_masks = [computed_mask] + else: + computed_tensors = [x[0] for x in computed_data] + computed_masks = [x[1] for x in computed_data] + if 'mask' in estimator_util.fn_args(layer.call): + if 'mask' not in kwargs: + kwargs['mask'] = computed_masks + output_tensors = nest.flatten( + layer.call(computed_tensors, **kwargs)) + if hasattr(layer, 'compute_mask'): + output_masks = nest.flatten( + layer.compute_mask(computed_tensors, computed_masks)) + else: + output_masks = [None for _ in range(len(output_tensors))] + + # Apply activity regularizer if any: + if layer.activity_regularizer is not None: + regularization_losses = [ + layer.activity_regularizer(x) for x in computed_tensors + ] + layer.add_loss(regularization_losses, computed_tensors) + + if context.in_graph_mode(): + # Update model updates and losses: + # Keep track of updates that depend on the inputs + # (e.g. BN updates). + self.add_update(layer.get_updates_for(computed_tensors), inputs) + # Keep track of unconditional updates (e.g. a counter). + self.add_update(layer.get_updates_for(None), None) + # Keep track of losses that depend on the inputs + # (e.g. activity regularizers). + self.add_loss(layer.get_losses_for(computed_tensors), inputs) + # Keep track of unconditional losses + # (e.g. weight regularizers). + self.add_loss(layer.get_losses_for(None), None) + + # Update tensor_map. + for x, y, mask in zip(reference_output_tensors, output_tensors, + output_masks): + tensor_map[str(id(x))] = (y, mask) + + output_tensors = [] + output_masks = [] + output_shapes = [] + for x in self.outputs: + assert str(id(x)) in tensor_map, 'Could not compute output ' + str(x) + tensor, mask = tensor_map[str(id(x))] + output_shapes.append(layers_util.static_shape(x)) + output_tensors.append(tensor) + output_masks.append(mask) + + if len(output_tensors) == 1: + output_tensors = output_tensors[0] + if output_shapes is not None: + output_shapes = output_shapes[0] + if output_masks is not None: + output_masks = output_masks[0] + + if context.in_graph_mode(): + # Update cache; + # keys are based on ids on input tensors and inputs masks. + cache_key = (layers_util.object_list_uid(inputs) + + '_' + layers_util.object_list_uid(masks)) + self._output_tensor_cache[cache_key] = output_tensors + if output_masks is not None: + self._output_mask_cache[cache_key] = output_masks + if output_shapes is not None: + input_shapes = [layers_util.static_shape(x) for x in inputs] + cache_key = layers_util.object_list_uid(input_shapes) + self._output_shape_cache[cache_key] = output_shapes + + return output_tensors, output_masks + + +def _make_node_key(layer_name, node_index): + return layer_name + '_ib-' + str(node_index) diff --git a/tensorflow/python/layers/network_test.py b/tensorflow/python/layers/network_test.py new file mode 100644 index 0000000000..af7813e264 --- /dev/null +++ b/tensorflow/python/layers/network_test.py @@ -0,0 +1,525 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for tf.layers.network.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import test_util +from tensorflow.python.layers import base as base_layers +from tensorflow.python.layers import core as core_layers +from tensorflow.python.layers import network as network_layers +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import sparse_ops +from tensorflow.python.platform import test + + +class BaseLayerCompatibilityTest(test.TestCase): + + def test_get_updates_for(self): + a = network_layers.Input(shape=(2,)) + dense_layer = core_layers.Dense(1) + dense_layer.add_update(0, inputs=a) + dense_layer.add_update(1, inputs=None) + + self.assertEqual(dense_layer.get_updates_for(a), [0]) + self.assertEqual(dense_layer.get_updates_for(None), [1]) + + def test_get_losses_for(self): + a = network_layers.Input(shape=(2,)) + dense_layer = core_layers.Dense(1) + dense_layer.add_loss(0, inputs=a) + dense_layer.add_loss(1, inputs=None) + + self.assertEqual(dense_layer.get_losses_for(a), [0]) + self.assertEqual(dense_layer.get_losses_for(None), [1]) + + def testTopologicalAttributes(self): + # test layer attributes / methods related to cross-layer connectivity. + a = network_layers.Input(shape=(32,), name='input_a') + b = network_layers.Input(shape=(32,), name='input_b') + + # test input, output, input_shape, output_shape + test_layer = core_layers.Dense(16, name='test_layer') + a_test = test_layer(a) + self.assertEqual(test_layer.input, a) + self.assertEqual(test_layer.output, a_test) + self.assertEqual(test_layer.input_shape, (None, 32)) + self.assertEqual(test_layer.output_shape, (None, 16)) + + # test `get_*_at` methods + dense = core_layers.Dense(16, name='dense_1') + a_2 = dense(a) + b_2 = dense(b) + + self.assertEqual(dense.get_input_at(0), a) + self.assertEqual(dense.get_input_at(1), b) + self.assertEqual(dense.get_output_at(0), a_2) + self.assertEqual(dense.get_output_at(1), b_2) + self.assertEqual(dense.get_input_shape_at(0), (None, 32)) + self.assertEqual(dense.get_input_shape_at(1), (None, 32)) + self.assertEqual(dense.get_output_shape_at(0), (None, 16)) + self.assertEqual(dense.get_output_shape_at(1), (None, 16)) + + # Test invalid value for attribute retrieval. + with self.assertRaises(ValueError): + dense.get_input_at(2) + with self.assertRaises(AttributeError): + new_dense = core_layers.Dense(16) + _ = new_dense.input + with self.assertRaises(AttributeError): + new_dense = core_layers.Dense(16) + _ = new_dense.output + with self.assertRaises(AttributeError): + new_dense = core_layers.Dense(16) + _ = new_dense.output_shape + with self.assertRaises(AttributeError): + new_dense = core_layers.Dense(16) + _ = new_dense.input_shape + with self.assertRaises(AttributeError): + new_dense = core_layers.Dense(16) + a = network_layers.Input(shape=(3, 32)) + a = network_layers.Input(shape=(5, 32)) + a_2 = dense(a) + b_2 = dense(b) + _ = new_dense.input_shape + with self.assertRaises(AttributeError): + new_dense = core_layers.Dense(16) + a = network_layers.Input(shape=(3, 32)) + a = network_layers.Input(shape=(5, 32)) + a_2 = dense(a) + b_2 = dense(b) + _ = new_dense.output_shape + + def testTopologicalAttributesMultiOutputLayer(self): + + class PowersLayer(base_layers.Layer): + + def call(self, inputs): + return [inputs**2, inputs**3] + + x = network_layers.Input(shape=(32,)) + test_layer = PowersLayer() + p1, p2 = test_layer(x) # pylint: disable=not-callable + + self.assertEqual(test_layer.input, x) + self.assertEqual(test_layer.output, [p1, p2]) + self.assertEqual(test_layer.input_shape, (None, 32)) + self.assertEqual(test_layer.output_shape, [(None, 32), (None, 32)]) + + def testTopologicalAttributesMultiInputLayer(self): + + class AddLayer(base_layers.Layer): + + def call(self, inputs): + assert len(inputs) == 2 + return inputs[0] + inputs[1] + + a = network_layers.Input(shape=(32,)) + b = network_layers.Input(shape=(32,)) + test_layer = AddLayer() + y = test_layer([a, b]) # pylint: disable=not-callable + + self.assertEqual(test_layer.input, [a, b]) + self.assertEqual(test_layer.output, y) + self.assertEqual(test_layer.input_shape, [(None, 32), (None, 32)]) + self.assertEqual(test_layer.output_shape, (None, 32)) + + +class NetworkTest(test.TestCase): + + def testBasicNetwork(self): + # minimum viable network + x = network_layers.Input(shape=(32,)) + dense = core_layers.Dense(2) + y = dense(x) + network = network_layers.GraphNetwork(x, y, name='dense_network') + + # test basic attributes + self.assertEqual(network.name, 'dense_network') + self.assertEqual(len(network.layers), 2) # InputLayer + Dense + self.assertEqual(network.layers[1], dense) + self.assertEqual(network.weights, dense.weights) + self.assertEqual(network.trainable_weights, dense.trainable_weights) + self.assertEqual(network.non_trainable_weights, dense.non_trainable_weights) + + # test callability on Input + x_2 = network_layers.Input(shape=(32,)) + y_2 = network(x_2) + self.assertEqual(y_2.get_shape().as_list(), [None, 2]) + + # test callability on regular tensor + x_2 = array_ops.placeholder(dtype='float32', shape=(None, 32)) + y_2 = network(x_2) + self.assertEqual(y_2.get_shape().as_list(), [None, 2]) + + # test network `trainable` attribute + network.trainable = False + self.assertEqual(network.weights, dense.weights) + self.assertEqual(network.trainable_weights, []) + self.assertEqual(network.non_trainable_weights, + dense.trainable_weights + dense.non_trainable_weights) + + def test_node_construction(self): + # test graph topology construction basics + a = network_layers.Input(shape=(32,), name='input_a') + b = network_layers.Input(shape=(32,), name='input_b') + + self.assertEqual(a.get_shape().as_list(), [None, 32]) + a_layer, a_node_index, a_tensor_index = a._keras_history + b_layer, _, _ = b._keras_history + self.assertEqual(len(a_layer._inbound_nodes), 1) + self.assertEqual(a_tensor_index, 0) + node = a_layer._inbound_nodes[a_node_index] + self.assertEqual(node.outbound_layer, a_layer) + + self.assertEqual(node.inbound_layers, []) + self.assertEqual(node.input_tensors, [a]) + self.assertEqual(node.input_shapes, [(None, 32)]) + self.assertEqual(node.output_tensors, [a]) + self.assertEqual(node.output_shapes, [(None, 32)]) + + dense = core_layers.Dense(16, name='dense_1') + dense(a) + dense(b) + + self.assertEqual(len(dense._inbound_nodes), 2) + self.assertEqual(len(dense._outbound_nodes), 0) + self.assertEqual(dense._inbound_nodes[0].inbound_layers, [a_layer]) + self.assertEqual(dense._inbound_nodes[0].outbound_layer, dense) + self.assertEqual(dense._inbound_nodes[1].inbound_layers, [b_layer]) + self.assertEqual(dense._inbound_nodes[1].outbound_layer, dense) + self.assertEqual(dense._inbound_nodes[0].input_tensors, [a]) + self.assertEqual(dense._inbound_nodes[1].input_tensors, [b]) + + # Test config + config_0 = dense._inbound_nodes[0].get_config() + self.assertEqual(config_0['outbound_layer'], dense.name) + + def testMultiInputNetwork(self): + a = network_layers.Input(shape=(32,), name='input_a') + b = network_layers.Input(shape=(32,), name='input_b') + + class AddLayer(base_layers.Layer): + + def call(self, inputs): + assert len(inputs) == 2 + return inputs[0] + inputs[1] + + c = AddLayer()([a, b]) # pylint: disable=not-callable + network = network_layers.GraphNetwork([a, b], c) + self.assertEqual(len(network.layers), 3) # 2 * InputLayer + AddLayer + + # Test callability. + a2 = network_layers.Input(shape=(32,)) + b2 = network_layers.Input(shape=(32,)) + c2 = network([a2, b2]) + self.assertEqual(c2.get_shape().as_list(), [None, 32]) + + def testMultiOutputNetwork(self): + x = network_layers.Input(shape=(32,)) + y1 = core_layers.Dense(2)(x) + y2 = core_layers.Dense(3)(x) + network = network_layers.GraphNetwork(x, [y1, y2]) + + self.assertEqual(len(network.layers), 3) # InputLayer + 2 * Dense + + # Test callability. + x2 = network_layers.Input(shape=(32,)) + outputs = network(x2) + + self.assertEqual(type(outputs), list) + self.assertEqual(len(outputs), 2) + self.assertEqual(outputs[0].get_shape().as_list(), [None, 2]) + self.assertEqual(outputs[1].get_shape().as_list(), [None, 3]) + + def testMultiInputMultiOutputNetworkSharedLayer(self): + a = network_layers.Input(shape=(32,), name='input_a') + b = network_layers.Input(shape=(32,), name='input_b') + + dense = core_layers.Dense(2) + + y1 = dense(a) + y2 = dense(b) + network = network_layers.GraphNetwork([a, b], [y1, y2]) + self.assertEqual(len(network.layers), 3) # 2 * InputLayer + Dense + + # Test callability. + a2 = network_layers.Input(shape=(32,)) + b2 = network_layers.Input(shape=(32,)) + outputs = network([a2, b2]) + + self.assertEqual(type(outputs), list) + self.assertEqual(len(outputs), 2) + self.assertEqual(outputs[0].get_shape().as_list(), [None, 2]) + self.assertEqual(outputs[1].get_shape().as_list(), [None, 2]) + + def testCrossDataFlows(self): + # Test the ability to have multi-output layers with outputs that get routed + # to separate layers + + class PowersLayer(base_layers.Layer): + + def call(self, inputs): + return [inputs**2, inputs**3] + + x = network_layers.Input(shape=(32,)) + p1, p2 = PowersLayer()(x) # pylint: disable=not-callable + y1 = core_layers.Dense(2)(p1) + y2 = core_layers.Dense(3)(p2) + network = network_layers.GraphNetwork(x, [y1, y2]) + + self.assertEqual(len(network.layers), 4) # InputLayer + 2 * Dense + PLayer + + # Test callability. + x2 = network_layers.Input(shape=(32,)) + outputs = network(x2) + + self.assertEqual(type(outputs), list) + self.assertEqual(len(outputs), 2) + self.assertEqual(outputs[0].get_shape().as_list(), [None, 2]) + self.assertEqual(outputs[1].get_shape().as_list(), [None, 3]) + + def testNetworkAttributes(self): + x = network_layers.Input(shape=(32,)) + z = core_layers.Dense(2, kernel_regularizer=lambda x: 0.01 * (x**2))(x) + dense = core_layers.Dense(2, name='dense') + dense.add_update(1) + y = dense(z) + net = network_layers.GraphNetwork(x, y) + + # losses + self.assertEqual(len(net.losses), 1) + + # updates + self.assertEqual(len(net.updates), 1) + + # get_layer + self.assertEqual(net.get_layer('dense'), dense) + self.assertEqual(net.get_layer(index=2), dense) + with self.assertRaises(ValueError): + net.get_layer('dense_unknown') + with self.assertRaises(ValueError): + net.get_layer() + with self.assertRaises(ValueError): + net.get_layer(index=4) + + # input, output + self.assertEqual(net.input, x) + self.assertEqual(net.output, y) + + # input_shape, output_shape + self.assertEqual(net.input_shape, (None, 32)) + self.assertEqual(net.output_shape, (None, 2)) + + # get_*_at + self.assertEqual(net.get_input_at(0), x) + self.assertEqual(net.get_output_at(0), y) + + # _compute_output_shape + self.assertEqual(net._compute_output_shape((3, 32)).as_list(), [3, 2]) + + def testInvalidNetworks(self): + # redundant inputs + x = network_layers.Input(shape=(32,)) + y = core_layers.Dense(2)(x) + with self.assertRaises(ValueError): + network_layers.GraphNetwork([x, x], y) + + # inputs that don't come from Input + x = array_ops.placeholder(dtype='float32', shape=(None, 32)) + y = core_layers.Dense(2)(x) + with self.assertRaises(ValueError): + network_layers.GraphNetwork(x, y) + + # inputs that don't come from Input but have a layer history + x = network_layers.Input(shape=(32,)) + x = core_layers.Dense(32)(x) + y = core_layers.Dense(2)(x) + with self.assertRaises(ValueError): + network_layers.GraphNetwork(x, y) + + # outputs that don't come from layers + x = network_layers.Input(shape=(32,)) + y = core_layers.Dense(2)(x) + y = 2 * y + with self.assertRaises(ValueError): + network_layers.GraphNetwork(x, y) + + # disconnected graphs + x1 = network_layers.Input(shape=(32,)) + x2 = network_layers.Input(shape=(32,)) + y = core_layers.Dense(2)(x1) + with self.assertRaises(ValueError): + network_layers.GraphNetwork(x2, y) + + # redundant layer names + x = network_layers.Input(shape=(32,)) + z = core_layers.Dense(2, name='dense')(x) + y = core_layers.Dense(2, name='dense')(z) + with self.assertRaises(ValueError): + network_layers.GraphNetwork(x, y) + + def testInputTensorWrapping(self): + x = array_ops.placeholder(dtype='float32', shape=(None, 32)) + x = network_layers.Input(tensor=x) + y = core_layers.Dense(2)(x) + network_layers.GraphNetwork(x, y) + + def testExplicitBatchSize(self): + x = network_layers.Input(shape=(32,), batch_size=3) + y = core_layers.Dense(2)(x) + self.assertEqual(y.get_shape().as_list(), [3, 2]) + + def testNetworkRecursion(self): + # test the ability of networks to be used as layers inside networks. + a = network_layers.Input(shape=(32,)) + b = core_layers.Dense(2)(a) + net = network_layers.GraphNetwork(a, b) + + c = network_layers.Input(shape=(32,)) + d = net(c) + + recursive_net = network_layers.GraphNetwork(c, d) + self.assertEqual(len(recursive_net.layers), 2) + self.assertEqual(recursive_net.layers[1], net) + self.assertEqual(len(recursive_net.weights), 2) + + # test callability + x = array_ops.placeholder(dtype='float32', shape=(None, 32)) + y = recursive_net(x) + self.assertEqual(y.get_shape().as_list(), [None, 2]) + + def testSparseInput(self): + + class SparseSoftmax(base_layers.Layer): + + def call(self, inputs): + return sparse_ops.sparse_softmax(inputs) + + x = network_layers.Input(shape=(32,), sparse=True) + y = SparseSoftmax()(x) # pylint: disable=not-callable + network = network_layers.GraphNetwork(x, y) + + self.assertEqual(len(network.layers), 2) + self.assertEqual(network.layers[0].sparse, True) + + @test_util.run_in_graph_and_eager_modes() + def testMaskingSingleInput(self): + + class MaskedLayer(base_layers.Layer): + + def call(self, inputs, mask=None): + if mask is not None: + return inputs * mask + return inputs + + def compute_mask(self, inputs, mask=None): + return array_ops.ones_like(inputs) + + if context.in_graph_mode(): + x = network_layers.Input(shape=(32,)) + y = MaskedLayer()(x) # pylint: disable=not-callable + network = network_layers.GraphNetwork(x, y) + + # test callability on Input + x_2 = network_layers.Input(shape=(32,)) + y_2 = network(x_2) + self.assertEqual(y_2.get_shape().as_list(), [None, 32]) + + # test callability on regular tensor + x_2 = array_ops.placeholder(dtype='float32', shape=(None, 32)) + y_2 = network(x_2) + self.assertEqual(y_2.get_shape().as_list(), [None, 32]) + else: + a = constant_op.constant([2] * 32) + mask = constant_op.constant([0, 1] * 16) + a._keras_mask = mask + b = MaskedLayer().apply(a) + self.assertTrue(hasattr(b, '_keras_mask')) + self.assertAllEqual(self.evaluate(array_ops.ones_like(mask)), + self.evaluate(getattr(b, '_keras_mask'))) + self.assertAllEqual(self.evaluate(a * mask), self.evaluate(b)) + + +class DeferredModeTest(test.TestCase): + + def testDeferredTensorAttributes(self): + x = base_layers._DeferredTensor(shape=(None, 2), dtype='float32', name='x') + self.assertEqual(str(x), + 'DeferredTensor(\'x\', shape=(?, 2), dtype=float32)') + self.assertEqual(repr(x), + '<_DeferredTensor \'x\' shape=(?, 2) dtype=float32>') + + @test_util.run_in_graph_and_eager_modes() + def testSimpleNetworkBuilding(self): + inputs = network_layers.Input(shape=(32,)) + if context.in_eager_mode(): + self.assertIsInstance(inputs, base_layers._DeferredTensor) + self.assertEqual(inputs.dtype.name, 'float32') + self.assertEqual(inputs.shape.as_list(), [None, 32]) + + x = core_layers.Dense(2)(inputs) + if context.in_eager_mode(): + self.assertIsInstance(x, base_layers._DeferredTensor) + self.assertEqual(x.dtype.name, 'float32') + self.assertEqual(x.shape.as_list(), [None, 2]) + + outputs = core_layers.Dense(4)(x) + network = network_layers.GraphNetwork(inputs, outputs) + self.assertIsInstance(network, network_layers.GraphNetwork) + + if context.in_eager_mode(): + # It should be possible to call such a network on EagerTensors. + inputs = constant_op.constant( + np.random.random((10, 32)).astype('float32')) + outputs = network(inputs) + self.assertEqual(outputs.shape.as_list(), [10, 4]) + + @test_util.run_in_graph_and_eager_modes() + def testMultiIONetworkbuilding(self): + input_a = network_layers.Input(shape=(32,)) + input_b = network_layers.Input(shape=(16,)) + a = core_layers.Dense(16)(input_a) + + class AddLayer(base_layers.Layer): + + def call(self, inputs): + return inputs[0] + inputs[1] + + def _compute_output_shape(self, input_shape): + return input_shape[0] + + c = AddLayer()([a, input_b]) # pylint: disable=not-callable + c = core_layers.Dense(2)(c) + + network = network_layers.GraphNetwork([input_a, input_b], [a, c]) + if context.in_eager_mode(): + a_val = constant_op.constant( + np.random.random((10, 32)).astype('float32')) + b_val = constant_op.constant( + np.random.random((10, 16)).astype('float32')) + outputs = network([a_val, b_val]) + self.assertEqual(len(outputs), 2) + self.assertEqual(outputs[0].shape.as_list(), [10, 16]) + self.assertEqual(outputs[1].shape.as_list(), [10, 2]) + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/python/layers/utils.py b/tensorflow/python/layers/utils.py index 7c71d3c952..766a6800d4 100644 --- a/tensorflow/python/layers/utils.py +++ b/tensorflow/python/layers/utils.py @@ -24,6 +24,7 @@ from tensorflow.python.ops import variables from tensorflow.python.ops import control_flow_ops from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util +from tensorflow.python.util import nest def convert_data_format(data_format, ndim): @@ -232,3 +233,19 @@ def constant_value(pred): else: raise TypeError('`pred` must be a Tensor, a Variable, or a Python bool.') return pred_value + + +def object_list_uid(object_list): + """Creates a single string from object ids.""" + object_list = nest.flatten(object_list) + return ', '.join([str(abs(id(x))) for x in object_list]) + + +def static_shape(x): + """Get the static shape of a Tensor, or None if it is unavailable.""" + if x is None: + return None + try: + return tuple(x.get_shape().as_list()) + except ValueError: + return None diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt index 64352508b5..07b8d900da 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.Model" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt index f69800b918..546bac44e4 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt index b2df5fba8f..49841237ce 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.InputLayer" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt index 8916925b3b..4e522813a5 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.models.Model" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt index 8397b373f4..ddbb358c84 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" -- GitLab From 7d126c49aea63a283386cd73d04ab1bed5eae2f0 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Wed, 15 Nov 2017 17:58:51 -0800 Subject: [PATCH 0487/1801] Refactor Operation.__init__ to create some state after creating _c_op This change moves around the Operation.__init__ logic to create the TF_Operation before initializing _outputs and before adding the op to the control flow context (if any). This is in preparation for creating Operation objects around TF_Operations indirectly created by the C API (e.g. ops created by TF_ImportGraphDef). This also disables running HessianTest with the C API enabled, since it's broken for now (but will be fixed soon). PiperOrigin-RevId: 175910443 --- tensorflow/python/framework/ops.py | 49 ++++++++++++------------- tensorflow/python/ops/gradients_test.py | 4 +- 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 0e647a27f5..6ac3b862c8 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -1526,13 +1526,6 @@ class Operation(object): raise TypeError("input needs to be a Tensor: %s" % a) # Mark that we consume the inputs. a._add_consumer(self) # pylint: disable=protected-access - if output_types is None: - output_types = [] - self._output_types_val = output_types - self._outputs = [ - Tensor(self, i, output_type) - for i, output_type in enumerate(output_types) - ] if input_types is None: input_types = [i.dtype.base_dtype for i in self._inputs] else: @@ -1562,25 +1555,6 @@ class Operation(object): self._original_op = original_op self._op_def = op_def self._traceback = self._graph._extract_stack() # pylint: disable=protected-access - # Define self._c_op before calling self._control_flow_context.AddOp(), since - # that will call methods on this op that check if self._c_op is set. - self._c_op = None - # Add this op to the current control flow context: - self._control_flow_context = g._get_control_flow_context() # pylint: disable=protected-access - if self._control_flow_context is not None: - # TODO(skyewm): consider refactoring this to call self._create_c_op() - # first. This would require updating the TF_Operation's ID (see the - # comment and self._id_value update below). The disadvantage of calling - # AddOp() first is that we need to maintain Operation state that is - # accessed by AddOp() in Python, e.g. the input Tensors. - self._control_flow_context.AddOp(self) - # NOTE(keveman): Control flow context's AddOp could be creating new ops and - # setting op.inputs[index] = new_op. Thus the new ops' id could be larger - # than this op's id even though this op depend on them. Therefore, delaying - # assigning id to this op until all ops this could be dependent on are - # created. - self._id_value = self._graph._next_id() # pylint: disable=protected-access - self._recompute_node_def() if self._graph._c_graph: # pylint: disable=protected-access if self._op_def: @@ -1594,6 +1568,29 @@ class Operation(object): self._c_op = _create_c_op(self._graph, self._node_def, grouped_inputs, self._control_inputs) + else: + self._c_op = None + + # Initialize self._outputs + if output_types is None: + output_types = [] + self._output_types_val = output_types + self._outputs = [ + Tensor(self, i, output_type) + for i, output_type in enumerate(output_types) + ] + + # Add this op to the current control flow context: + self._control_flow_context = g._get_control_flow_context() # pylint: disable=protected-access + if self._control_flow_context is not None: + self._control_flow_context.AddOp(self) + # NOTE(keveman): Control flow context's AddOp could be creating new ops and + # setting op.inputs[index] = new_op. Thus the new ops' id could be larger + # than this op's id even though this op depend on them. Therefore, delaying + # assigning id to this op until all ops this could be dependent on are + # created. + self._id_value = self._graph._next_id() # pylint: disable=protected-access + self._recompute_node_def() def _reconstruct_sequence_inputs(self, op_def, inputs, attrs): """Regroups a flat list of input tensors into scalar and sequence inputs. diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py index 1211b2e923..dacc2947fe 100644 --- a/tensorflow/python/ops/gradients_test.py +++ b/tensorflow/python/ops/gradients_test.py @@ -573,7 +573,9 @@ class HessianVectorProductTest(test_util.TensorFlowTestCase): self.assertAllClose(hess_v_value, hess_v_actual) -@test_util.with_c_api +# TODO(skyewm): reenable C API once +# ControlFlowContext._RemoveExternalControlEdges works with C API enabled +# @test_util.with_c_api class HessianTest(test_util.TensorFlowTestCase): def testHessian1D(self): -- GitLab From c0662f1620c2b97abb79b8ae6a8a30f7c7719475 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Wed, 15 Nov 2017 18:29:20 -0800 Subject: [PATCH 0488/1801] Fix the mac build (#14602) --- tensorflow/python/kernel_tests/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index d3fa5cb778..9ad5733f34 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2360,7 +2360,7 @@ cuda_py_test( cuda_py_test( name = "slice_op_test", - size = "medium", + size = "large", srcs = ["slice_op_test.py"], additional_deps = [ "//third_party/py/numpy", -- GitLab From b1d89c147d079ab97356b6a677cbf5ee726313d6 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Wed, 15 Nov 2017 18:00:52 -0800 Subject: [PATCH 0489/1801] Support user-space DNS caching for the GCS filesystem. In some environments, DNS resolution is unreliable. This change adds an optional userspace caching mechanism to radically reduce the amount of DNS queries sent to upstream resolvers. PiperOrigin-RevId: 175910642 --- tensorflow/core/platform/cloud/BUILD | 24 ++++ .../core/platform/cloud/curl_http_request.cc | 17 +++ .../core/platform/cloud/curl_http_request.h | 4 + .../core/platform/cloud/gcs_dns_cache.cc | 135 ++++++++++++++++++ .../core/platform/cloud/gcs_dns_cache.h | 74 ++++++++++ .../core/platform/cloud/gcs_dns_cache_test.cc | 113 +++++++++++++++ .../core/platform/cloud/gcs_file_system.cc | 37 ++++- .../core/platform/cloud/gcs_file_system.h | 2 + tensorflow/core/platform/cloud/http_request.h | 8 ++ 9 files changed, 408 insertions(+), 6 deletions(-) create mode 100644 tensorflow/core/platform/cloud/gcs_dns_cache.cc create mode 100644 tensorflow/core/platform/cloud/gcs_dns_cache.h create mode 100644 tensorflow/core/platform/cloud/gcs_dns_cache_test.cc diff --git a/tensorflow/core/platform/cloud/BUILD b/tensorflow/core/platform/cloud/BUILD index 901fb79d6a..624145da75 100644 --- a/tensorflow/core/platform/cloud/BUILD +++ b/tensorflow/core/platform/cloud/BUILD @@ -41,6 +41,17 @@ cc_library( deps = ["//tensorflow/core:lib"], ) +cc_library( + name = "gcs_dns_cache", + srcs = ["gcs_dns_cache.cc"], + hdrs = ["gcs_dns_cache.h"], + visibility = ["//tensorflow:__subpackages__"], + deps = [ + ":http_request", + "//tensorflow/core:lib", + ], +) + cc_library( name = "gcs_file_system", srcs = ["gcs_file_system.cc"], @@ -51,6 +62,7 @@ cc_library( ":curl_http_request", ":expiring_lru_cache", ":file_block_cache", + ":gcs_dns_cache", ":google_auth_provider", ":http_request", ":retrying_file_system", @@ -231,6 +243,18 @@ tf_cc_test( ], ) +tf_cc_test( + name = "gcs_dns_cache_test", + size = "small", + srcs = ["gcs_dns_cache_test.cc"], + deps = [ + ":gcs_dns_cache", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + tf_cc_test( name = "curl_http_request_test", size = "small", diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc index e2d935f35e..d01734ba3a 100644 --- a/tensorflow/core/platform/cloud/curl_http_request.cc +++ b/tensorflow/core/platform/cloud/curl_http_request.cc @@ -131,6 +131,9 @@ CurlHttpRequest::~CurlHttpRequest() { if (curl_headers_) { libcurl_->curl_slist_free_all(curl_headers_); } + if (resolve_list_) { + libcurl_->curl_slist_free_all(resolve_list_); + } if (put_body_) { fclose(put_body_); } @@ -212,6 +215,17 @@ Status CurlHttpRequest::AddHeader(const string& name, const string& value) { return Status::OK(); } +Status CurlHttpRequest::AddResolveOverride(const string& hostname, int64 port, + const string& ip_addr) { + TF_RETURN_IF_ERROR(CheckInitialized()); + TF_RETURN_IF_ERROR(CheckNotSent()); + // Resolve values are hostname:port:IP.add.ress + resolve_list_ = libcurl_->curl_slist_append( + resolve_list_, + strings::StrCat(hostname, ":", port, ":", ip_addr).c_str()); + return Status::OK(); +} + Status CurlHttpRequest::AddAuthBearerHeader(const string& auth_token) { TF_RETURN_IF_ERROR(CheckInitialized()); TF_RETURN_IF_ERROR(CheckNotSent()); @@ -376,6 +390,9 @@ Status CurlHttpRequest::Send() { if (curl_headers_) { libcurl_->curl_easy_setopt(curl_, CURLOPT_HTTPHEADER, curl_headers_); } + if (resolve_list_) { + libcurl_->curl_easy_setopt(curl_, CURLOPT_RESOLVE, resolve_list_); + } libcurl_->curl_easy_setopt(curl_, CURLOPT_HEADERDATA, reinterpret_cast(this)); libcurl_->curl_easy_setopt(curl_, CURLOPT_HEADERFUNCTION, diff --git a/tensorflow/core/platform/cloud/curl_http_request.h b/tensorflow/core/platform/cloud/curl_http_request.h index c7a555de10..2396593d6d 100644 --- a/tensorflow/core/platform/cloud/curl_http_request.h +++ b/tensorflow/core/platform/cloud/curl_http_request.h @@ -71,6 +71,9 @@ class CurlHttpRequest : public HttpRequest { /// Sets a request header. Status AddHeader(const string& name, const string& value) override; + Status AddResolveOverride(const string& hostname, int64 port, + const string& ip_addr) override; + /// Sets the 'Authorization' header to the value of 'Bearer ' + auth_token. Status AddAuthBearerHeader(const string& auth_token) override; @@ -146,6 +149,7 @@ class CurlHttpRequest : public HttpRequest { std::vector* response_buffer_ = nullptr; CURL* curl_ = nullptr; curl_slist* curl_headers_ = nullptr; + curl_slist* resolve_list_ = nullptr; std::vector default_response_buffer_; diff --git a/tensorflow/core/platform/cloud/gcs_dns_cache.cc b/tensorflow/core/platform/cloud/gcs_dns_cache.cc new file mode 100644 index 0000000000..63f2da065d --- /dev/null +++ b/tensorflow/core/platform/cloud/gcs_dns_cache.cc @@ -0,0 +1,135 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/platform/cloud/gcs_dns_cache.h" + +#include +#include +#include + +namespace tensorflow { + +namespace { + +constexpr char kStorageHost[] = "storage.googleapis.com"; +constexpr char kWwwHost[] = "www.googleapis.com"; + +} // namespace + +GcsDnsCache::GcsDnsCache(Env* env, int64 refresh_rate_secs) + : env_(env), refresh_rate_secs_(refresh_rate_secs) {} + +Status GcsDnsCache::AnnotateRequest(HttpRequest* request) { + // TODO(saeta): Blacklist failing IP addresses. + mutex_lock l(mu_); + if (!started_) { + DCHECK(!worker_) << "Worker thread already exists!"; + // Perform DNS resolutions to warm the cache. + std::vector www_addresses = ResolveName(kWwwHost); + std::vector storage_addresses = ResolveName(kStorageHost); + www_addresses.swap(www_addresses_); + storage_addresses.swap(storage_addresses_); + + // Note: we opt to use a thread instead of a delayed closure. + worker_.reset(env_->StartThread( + {}, "gcs_dns_worker", std::bind(&GcsDnsCache::WorkerThread, this))); + started_ = true; + } + if (!storage_addresses_.empty()) { + std::uniform_int_distribution<> storage_dist(0, + storage_addresses_.size() - 1); + size_t index = storage_dist(random_); + TF_RETURN_IF_ERROR(request->AddResolveOverride(kStorageHost, 443, + storage_addresses_[index])); + } else { + LOG(WARNING) << "No IP addresses available for " << kStorageHost; + } + if (!www_addresses_.empty()) { + std::uniform_int_distribution<> www_dist(0, www_addresses_.size() - 1); + size_t index = www_dist(random_); + TF_RETURN_IF_ERROR( + request->AddResolveOverride(kWwwHost, 443, www_addresses_[index])); + } else { + LOG(WARNING) << "No IP addresses available for " << kWwwHost; + } + return Status::OK(); +} + +/* static */ std::vector GcsDnsCache::ResolveName(const string& name) { + addrinfo hints; + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_INET; // Only use IPv4 for now. + hints.ai_socktype = SOCK_STREAM; + addrinfo* result = nullptr; + int return_code = getaddrinfo(name.c_str(), nullptr, &hints, &result); + + std::vector output; + if (return_code == 0) { + for (addrinfo* i = result; i != nullptr; i = i->ai_next) { + if (i->ai_family != AF_INET || i->ai_addr->sa_family != AF_INET) { + LOG(WARNING) << "Non-IPv4 address returned. ai_family: " << i->ai_family + << ". sa_family: " << i->ai_addr->sa_family << "."; + continue; + } + char buf[INET_ADDRSTRLEN]; + void* address_ptr = + &(reinterpret_cast(i->ai_addr)->sin_addr); + const char* formatted = nullptr; + if ((formatted = inet_ntop(i->ai_addr->sa_family, address_ptr, buf, + INET_ADDRSTRLEN)) == nullptr) { + LOG(ERROR) << "Error converting response to IP address for " << name + << ": " << strerror(errno); + } else { + output.emplace_back(buf); + } + } + } else { + if (return_code == EAI_SYSTEM) { + LOG(ERROR) << "Error resolving " << name + << " (EAI_SYSTEM): " << strerror(errno); + } else { + LOG(ERROR) << "Error resolving " << name << ": " + << gai_strerror(return_code); + } + } + if (result != nullptr) { + freeaddrinfo(result); + } + return output; +} + +void GcsDnsCache::WorkerThread() { + while (true) { + { + // Don't immediately re-resolve the addresses. + mutex_lock l(mu_); + if (cancelled_) return; + cond_var_.wait_for(l, std::chrono::seconds(refresh_rate_secs_)); + if (cancelled_) return; + } + // Resolve DNS values + std::vector www_addresses = ResolveName(kWwwHost); + std::vector storage_addresses = ResolveName(kStorageHost); + + { + mutex_lock l(mu_); + // Update instance variables. + www_addresses.swap(www_addresses_); + storage_addresses.swap(storage_addresses_); + } + } +} + +} // namespace tensorflow diff --git a/tensorflow/core/platform/cloud/gcs_dns_cache.h b/tensorflow/core/platform/cloud/gcs_dns_cache.h new file mode 100644 index 0000000000..7a4d3847a5 --- /dev/null +++ b/tensorflow/core/platform/cloud/gcs_dns_cache.h @@ -0,0 +1,74 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_PLATNFORM_CLOUD_DNS_CACHE_H_ +#define THIRD_PARTY_TENSORFLOW_PLATNFORM_CLOUD_DNS_CACHE_H_ + +#include + +#include "tensorflow/core/platform/cloud/http_request.h" +#include "tensorflow/core/platform/env.h" + +namespace tensorflow { +const int64 kDefaultRefreshRateSecs = 60; + +// DnsCache is a userspace DNS cache specialized for the GCS filesystem. +// +// Some environments have unreliable DNS resolvers. DnsCache ameliorates the +// situation by radically reducing the number of DNS requests by performing +// 2 DNS queries per minute (by default) on a background thread. Updated cache +// entries are used to override curl's DNS resolution processes. +class GcsDnsCache { + public: + // Default no-argument constructor. + GcsDnsCache() : GcsDnsCache(kDefaultRefreshRateSecs) {} + + // Constructs a GcsDnsCache with the specified refresh rate. + GcsDnsCache(int64 refresh_rate_secs) + : GcsDnsCache(Env::Default(), refresh_rate_secs) {} + + GcsDnsCache(Env* env, int64 refresh_rate_secs); + + ~GcsDnsCache() { + mutex_lock l(mu_); + cancelled_ = true; + cond_var_.notify_one(); + } + + // Annotate the given HttpRequest with resolve overrides from the cache. + Status AnnotateRequest(HttpRequest* request); + + private: + static std::vector ResolveName(const string& name); + void WorkerThread(); + + // Define a friend class for testing. + friend class GcsDnsCacheTest; + + mutex mu_; + Env* env_; + condition_variable cond_var_; + std::default_random_engine random_ GUARDED_BY(mu_); + bool started_ GUARDED_BY(mu_) = false; + bool cancelled_ GUARDED_BY(mu_) = false; + std::vector www_addresses_ GUARDED_BY(mu_); + std::vector storage_addresses_ GUARDED_BY(mu_); + std::unique_ptr worker_ GUARDED_BY(mu_); // After mutable vars. + const int64 refresh_rate_secs_; +}; + +} // namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_PLATNFORM_CLOUD_DNS_CACHE_H_ diff --git a/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc b/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc new file mode 100644 index 0000000000..cba6caff22 --- /dev/null +++ b/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc @@ -0,0 +1,113 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/platform/cloud/gcs_dns_cache.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { + +class TestHttpRequest : public HttpRequest { + public: + Status Init() override { return Status::OK(); } + Status SetUri(const string& uri) override { return Status::OK(); } + Status SetRange(uint64 start, uint64 end) override { return Status::OK(); } + Status AddHeader(const string& name, const string& value) override { + return Status::OK(); + } + Status AddResolveOverride(const string& hostname, int64 port, + const string& ip_addr) override { + EXPECT_EQ(port, 443) << "Unexpected port set for hostname: " << hostname; + auto itr = resolve_overrides_.find(hostname); + EXPECT_EQ(itr, resolve_overrides_.end()) + << "Hostname " << hostname << "already in map: " << itr->second; + + resolve_overrides_.insert( + std::map::value_type(hostname, ip_addr)); + return Status::OK(); + } + + Status AddAuthBearerHeader(const string& auth_token) override { + return Status::OK(); + } + + Status SetDeleteRequest() override { return Status::OK(); } + + Status SetPutFromFile(const string& body_filepath, size_t offset) override { + return Status::OK(); + } + Status SetPutEmptyBody() override { return Status::OK(); } + + Status SetPostFromBuffer(const char* buffer, size_t size) override { + return Status::OK(); + } + Status SetPostEmptyBody() override { return Status::OK(); } + + Status SetResultBuffer(std::vector* out_buffer) override { + return Status::OK(); + } + + string GetResponseHeader(const string& name) const override { return ""; } + uint64 GetResponseCode() const override { return 0; } + Status Send() override { return Status::OK(); } + string EscapeString(const string& str) override { return ""; } + + std::map resolve_overrides_; +}; + +// Friend class for testing. +// +// It is written this way (as opposed to using FRIEND_TEST) to avoid a +// non-test-time dependency on gunit. +class GcsDnsCacheTest : public ::testing::Test { + protected: + void ResolveNameTest() { + auto response = GcsDnsCache::ResolveName("www.googleapis.com"); + EXPECT_LT(1, response.size()) << str_util::Join(response, ", "); + } + + void AnnotateRequestTest() { + GcsDnsCache d; + { + mutex_lock l(d.mu_); + d.started_ = true; // Avoid creating a thread. + d.www_addresses_ = {"192.168.1.1"}; + d.storage_addresses_ = {"172.134.1.1"}; + } + + TestHttpRequest req; + Status s = d.AnnotateRequest(&req); + EXPECT_TRUE(s.ok()) << s; + EXPECT_EQ("192.168.1.1", req.resolve_overrides_["www.googleapis.com"]); + EXPECT_EQ("172.134.1.1", req.resolve_overrides_["storage.googleapis.com"]); + } + + void SuccessfulCleanupTest() { + // Create a DnsCache object, start the worker thread, ensure it cleans up in + // a timely manner. + GcsDnsCache d; + TestHttpRequest req; + Status s = d.AnnotateRequest(&req); + EXPECT_TRUE(s.ok()) << s; + } +}; + +TEST_F(GcsDnsCacheTest, ResolveName) { ResolveNameTest(); } + +TEST_F(GcsDnsCacheTest, AnnotateRequest) { AnnotateRequestTest(); } + +TEST_F(GcsDnsCacheTest, SuccessfulCleanup) { SuccessfulCleanupTest(); } + +} // namespace tensorflow diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index 17fe704b79..9287de7237 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -89,6 +89,10 @@ constexpr char kMatchingPathsCacheMaxEntries[] = constexpr size_t kMatchingPathsCacheDefaultMaxEntries = 1024; // The file statistics returned by Stat() for directories. const FileStatistics DIRECTORY_STAT(0, 0, true); +// Some environments exhibit unreliable DNS resolution. Set this environment +// variable to a positive integer describing the frequency used to refresh the +// userspace DNS cache. +constexpr char kResolveCacheSecs[] = "GCS_RESOLVE_REFRESH_SECS"; Status GetTmpFilename(string* filename) { if (!filename) { @@ -434,8 +438,8 @@ class GcsWritableFile : public WritableFile { std::unique_ptr request(http_request_factory_->Create()); TF_RETURN_IF_ERROR(request->Init()); TF_RETURN_IF_ERROR(request->SetUri(strings::StrCat( - kGcsUploadUriBase, "b/", bucket_, "/o?uploadType=resumable&name=", - request->EscapeString(object_)))); + kGcsUploadUriBase, "b/", bucket_, + "/o?uploadType=resumable&name=", request->EscapeString(object_)))); TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token)); TF_RETURN_IF_ERROR(request->AddHeader("X-Upload-Content-Length", std::to_string(file_size))); @@ -624,6 +628,12 @@ GcsFileSystem::GcsFileSystem() } matching_paths_cache_.reset(new ExpiringLRUCache>( matching_paths_cache_max_age, matching_paths_cache_max_entries)); + + int64 resolve_frequency_secs; + if (GetEnvVar(kResolveCacheSecs, strings::safe_strto64, + &resolve_frequency_secs)) { + dns_cache_.reset(new GcsDnsCache(resolve_frequency_secs)); + } } GcsFileSystem::GcsFileSystem( @@ -678,6 +688,11 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& filename, size_t offset, TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token)); TF_RETURN_IF_ERROR(request->SetRange(offset, offset + n - 1)); TF_RETURN_IF_ERROR(request->SetResultBuffer(out)); + + if (dns_cache_) { + TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get())); + } + TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when reading gs://", bucket, "/", object); return Status::OK(); @@ -821,6 +836,11 @@ Status GcsFileSystem::StatForObject(const string& fname, const string& bucket, "?fields=size%2Cupdated"))); TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token)); TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer)); + + if (dns_cache_) { + TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get())); + } + TF_RETURN_WITH_CONTEXT_IF_ERROR( request->Send(), " when reading metadata of gs://", bucket, "/", object); @@ -959,12 +979,12 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname, uri = strings::StrCat(uri, "&delimiter=%2F"); } if (!object_prefix.empty()) { - uri = strings::StrCat(uri, "&prefix=", - request->EscapeString(object_prefix)); + uri = strings::StrCat(uri, + "&prefix=", request->EscapeString(object_prefix)); } if (!nextPageToken.empty()) { - uri = strings::StrCat(uri, "&pageToken=", - request->EscapeString(nextPageToken)); + uri = strings::StrCat( + uri, "&pageToken=", request->EscapeString(nextPageToken)); } if (max_results - retrieved_results < kGetChildrenDefaultPageSize) { uri = @@ -973,6 +993,11 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname, TF_RETURN_IF_ERROR(request->SetUri(uri)); TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token)); TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer)); + + if (dns_cache_) { + TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get())); + } + TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when reading ", dirname); Json::Value root; StringPiece response_piece = diff --git a/tensorflow/core/platform/cloud/gcs_file_system.h b/tensorflow/core/platform/cloud/gcs_file_system.h index 36a1d42fde..4b4853c838 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.h +++ b/tensorflow/core/platform/cloud/gcs_file_system.h @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/platform/cloud/auth_provider.h" #include "tensorflow/core/platform/cloud/expiring_lru_cache.h" #include "tensorflow/core/platform/cloud/file_block_cache.h" +#include "tensorflow/core/platform/cloud/gcs_dns_cache.h" #include "tensorflow/core/platform/cloud/http_request.h" #include "tensorflow/core/platform/cloud/retrying_file_system.h" #include "tensorflow/core/platform/file_system.h" @@ -141,6 +142,7 @@ class GcsFileSystem : public FileSystem { std::unique_ptr auth_provider_; std::unique_ptr http_request_factory_; std::unique_ptr file_block_cache_; + std::unique_ptr dns_cache_; using StatCache = ExpiringLRUCache; std::unique_ptr stat_cache_; diff --git a/tensorflow/core/platform/cloud/http_request.h b/tensorflow/core/platform/cloud/http_request.h index 8182b63d5b..02d9e9054a 100644 --- a/tensorflow/core/platform/cloud/http_request.h +++ b/tensorflow/core/platform/cloud/http_request.h @@ -64,6 +64,14 @@ class HttpRequest { /// Sets a request header. virtual Status AddHeader(const string& name, const string& value) = 0; + /// Sets a DNS resolve mapping (to skip DNS resolution). + /// + /// Note: because GCS is available over HTTPS, we cannot replace the hostname + /// in the URI with an IP address, as that will cause the certificate check + /// to fail. + virtual Status AddResolveOverride(const string& hostname, int64 port, + const string& ip_addr) = 0; + /// Sets the 'Authorization' header to the value of 'Bearer ' + auth_token. virtual Status AddAuthBearerHeader(const string& auth_token) = 0; -- GitLab From 7d3e728369ed0ee8c982202ea64547488ed9aa1a Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 15 Nov 2017 18:01:52 -0800 Subject: [PATCH 0490/1801] Don't assert when processing invalid dimensions PiperOrigin-RevId: 175910804 --- tensorflow/core/grappler/costs/graph_properties.cc | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index 26b8521771..d33d86df3a 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -91,8 +91,15 @@ struct Processor { *result = -counter; counter++; } else { - CHECK_LE(0, InferenceContext::Value(d)); - *result = InferenceContext::Value(d); + int64 val = InferenceContext::Value(d); + if (val >= 0) { + *result = val; + } else { + // A shape inference function generated an invalid dimension handle. + // Use a symbolic dimension to encode this. + *result = -counter; + counter++; + } } } -- GitLab From 8c16cf3ff96ccc17a5953c62bf23616472f7cffc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Nov 2017 18:05:34 -0800 Subject: [PATCH 0491/1801] Hlo parser: support infeed and outfeed. PiperOrigin-RevId: 175911331 --- .../compiler/xla/service/hlo_instruction.cc | 9 ++++++- .../compiler/xla/tools/parser/hlo_parser.cc | 26 ++++++++++++++++--- .../xla/tools/parser/hlo_parser_test.cc | 14 ++++++++++ 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index d3096231dc..045abdac8b 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -43,6 +43,7 @@ limitations under the License. namespace xla { +using tensorflow::str_util::CEscape; using ::tensorflow::str_util::Join; using ::tensorflow::strings::StrAppend; using ::tensorflow::strings::StrCat; @@ -1965,6 +1966,13 @@ std::vector HloInstruction::ExtraAttributesToString() const { }), "}")); } + if (opcode() == HloOpcode::kInfeed && !infeed_config_.empty()) { + extra.push_back(StrCat("infeed_config=\"", CEscape(infeed_config_), "\"")); + } + if (opcode() == HloOpcode::kOutfeed && !outfeed_config_.empty()) { + extra.push_back( + StrCat("outfeed_config=\"", CEscape(outfeed_config_), "\"")); + } return extra; } @@ -2920,7 +2928,6 @@ string PaddingConfigToString(const PaddingConfig& padding) { string OpMetadataToString(const OpMetadata& metadata) { std::vector result; - using tensorflow::str_util::CEscape; if (!metadata.op_type().empty()) { result.push_back(StrCat("op_type=\"", CEscape(metadata.op_type()), "\"")); } diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index 3e3406e658..a65e5a856f 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -776,11 +776,31 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, shape, *fusion_kind, operands, *fusion_computation)); break; } + case HloOpcode::kInfeed: { + optional config; + attrs["infeed_config"] = {/*required=*/false, AttrTy::kString, &config}; + if (!ParseOperands(&operands, /*expected_size=*/0) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction( + HloInstruction::CreateInfeed(shape, config ? *config : "")); + break; + } + case HloOpcode::kOutfeed: { + optional config; + attrs["outfeed_config"] = {/*required=*/false, AttrTy::kString, &config}; + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreateOutfeed( + shape, operands[0], config ? *config : "")); + break; + } case HloOpcode::kCustomCall: case HloOpcode::kReducePrecision: case HloOpcode::kRng: - case HloOpcode::kInfeed: - case HloOpcode::kOutfeed: case HloOpcode::kTrace: return TokenError(StrCat("parsing not yet implemented for op: ", HloOpcodeString(opcode))); @@ -805,7 +825,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, instruction->set_metadata(*metadata); } return AddInstruction(name, instruction); -} +} // NOLINT(readability/fn_size) // ::= '{' (single_sharding | tuple_sharding) '}' // diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index 29ae3296ca..0ebc0ca44b 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -560,6 +560,20 @@ ENTRY %fusion.v3 () -> f32[3,2,1,1] { ROOT %fusion = f32[3,2,1,1]{3,2,1,0} fusion(f32[3,2,1,1]{3,2,1,0} %constant, f32[2]{0} %constant.1), kind=kLoop, calls=%fused_computation } +)" +}, +// infeed/outfeed +{ +"InfeedOutfeed", +R"(HloModule outfeed_module: + +ENTRY %InfeedToOutfeed () -> (u32[3], pred[]) { + %infeed = (u32[3]{0}, pred[]) infeed() + %outfeed = () outfeed((u32[3]{0}, pred[]) %infeed) + ROOT %infeed.1 = (u32[3]{0}, pred[]) infeed() + %outfeed.1 = () outfeed((u32[3]{0}, pred[]) %infeed.1) +} + )" } }); -- GitLab From 0dfcc34c954513ff26d20729712baade9dda93ed Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Nov 2017 18:11:54 -0800 Subject: [PATCH 0492/1801] Cast offsets into int64 to allow for cases where tag_indices is of type int64 (e.g. the output of a call to tf.lookup.*). PiperOrigin-RevId: 175912063 --- .../crf/python/kernel_tests/crf_test.py | 26 +++++++++---------- tensorflow/contrib/crf/python/ops/crf.py | 9 ++++--- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/tensorflow/contrib/crf/python/kernel_tests/crf_test.py b/tensorflow/contrib/crf/python/kernel_tests/crf_test.py index 9174c5eb98..964ec75441 100644 --- a/tensorflow/contrib/crf/python/kernel_tests/crf_test.py +++ b/tensorflow/contrib/crf/python/kernel_tests/crf_test.py @@ -23,7 +23,6 @@ import itertools import numpy as np from tensorflow.contrib.crf.python.ops import crf -from tensorflow.python.framework import dtypes from tensorflow.python.framework import constant_op from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops @@ -58,18 +57,19 @@ class CrfTest(test.TestCase): def testCrfUnaryScore(self): inputs = np.array( [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32) - tag_indices = np.array([1, 2, 1, 0], dtype=np.int32) - sequence_lengths = np.array(3, dtype=np.int32) - with self.test_session() as sess: - unary_score = crf.crf_unary_score( - tag_indices=array_ops.expand_dims(tag_indices, 0), - sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), - inputs=array_ops.expand_dims(inputs, 0)) - unary_score = array_ops.squeeze(unary_score, [0]) - tf_unary_score = sess.run(unary_score) - expected_unary_score = sum(inputs[i][tag_indices[i]] - for i in range(sequence_lengths)) - self.assertAllClose(tf_unary_score, expected_unary_score) + for dtype in (np.int32, np.int64): + tag_indices = np.array([1, 2, 1, 0], dtype=dtype) + sequence_lengths = np.array(3, dtype=np.int32) + with self.test_session() as sess: + unary_score = crf.crf_unary_score( + tag_indices=array_ops.expand_dims(tag_indices, 0), + sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), + inputs=array_ops.expand_dims(inputs, 0)) + unary_score = array_ops.squeeze(unary_score, [0]) + tf_unary_score = sess.run(unary_score) + expected_unary_score = sum(inputs[i][tag_indices[i]] + for i in range(sequence_lengths)) + self.assertAllClose(tf_unary_score, expected_unary_score) def testCrfBinaryScore(self): tag_indices = np.array([1, 2, 1, 0], dtype=np.int32) diff --git a/tensorflow/contrib/crf/python/ops/crf.py b/tensorflow/contrib/crf/python/ops/crf.py index 7166e38b28..4282be5ec8 100644 --- a/tensorflow/contrib/crf/python/ops/crf.py +++ b/tensorflow/contrib/crf/python/ops/crf.py @@ -193,6 +193,9 @@ def crf_unary_score(tag_indices, sequence_lengths, inputs): offsets = array_ops.expand_dims( math_ops.range(batch_size) * max_seq_len * num_tags, 1) offsets += array_ops.expand_dims(math_ops.range(max_seq_len) * num_tags, 0) + # Use int32 or int64 based on tag_indices' dtype. + if tag_indices.dtype == dtypes.int64: + offsets = math_ops.to_int64(offsets) flattened_tag_indices = array_ops.reshape(offsets + tag_indices, [-1]) unary_scores = array_ops.reshape( @@ -305,7 +308,7 @@ def viterbi_decode(score, transition_params): Returns: viterbi: A [seq_len] list of integers containing the highest scoring tag - indicies. + indices. viterbi_score: A float containing the score for the Viterbi sequence. """ trellis = np.zeros_like(score) @@ -385,7 +388,7 @@ class CrfDecodeBackwardRnnCell(rnn_cell.RNNCell): """Initialize the CrfDecodeBackwardRnnCell. Args: - num_tags + num_tags: The number of tags. """ self._num_tags = num_tags @@ -434,7 +437,7 @@ def crf_decode(potentials, transition_params, sequence_length): Returns: decode_tags: A [batch_size, max_seq_len] tensor, with dtype tf.int32. - Contains the highest scoring tag indicies. + Contains the highest scoring tag indices. best_score: A [batch_size] tensor, containing the score of decode_tags. """ # For simplicity, in shape comments, denote: -- GitLab From 2800d6e92b57caeb68cdda24c58eeffb57219b53 Mon Sep 17 00:00:00 2001 From: Max Galkin Date: Wed, 15 Nov 2017 18:18:50 -0800 Subject: [PATCH 0493/1801] Minor change in VirtualScheduler logging: there's sometimes a difference between device total uptime and the sum of per-op computation time, because uptime includes waiting for channel communications. PiperOrigin-RevId: 175912780 --- .../core/grappler/costs/virtual_scheduler.cc | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc index 2ab3a9144c..0bb98d3793 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc @@ -677,10 +677,10 @@ Costs VirtualScheduler::Summary() const { critical_path_costs.estimated_max_memory_per_device[name] = max_memory_usage; + const Costs::NanoSeconds wall_time_ns = state.GetCurrTime(); VLOG(1) << "Device = " << name << ", num_nodes = " << state.nodes_executed.size() - << ", execution_time = " << state.GetCurrTime().count() - << ", memory usage: " + << ", wall_time_ns = " << wall_time_ns.count() << ", memory usage: " << "persistent = " << strings::HumanReadableNumBytes(persistent_memory_usage) << ", peak = " @@ -698,9 +698,11 @@ Costs VirtualScheduler::Summary() const { op_to_memory[node->op()] += CalculateOutputSize(node_map_.at(node).output_properties, port); } + Costs::NanoSeconds total_compute_time_ns; for (const auto& op_cost_pair : state.op_to_cost) { const auto& op = op_cost_pair.first; const auto& cost = op_cost_pair.second.execution_time.count(); + total_compute_time_ns += op_cost_pair.second.execution_time; int64 op_mem_usage = 0; auto it = op_to_memory.find(op); if (it != op_to_memory.end()) { @@ -718,6 +720,15 @@ Costs VirtualScheduler::Summary() const { << (persisent_ops.count(op) > 0 ? ": persistent op)" : ")"); } } + + int utilization = 0; + if (wall_time_ns.count() > 0) { + utilization = total_compute_time_ns.count() * 100 / wall_time_ns.count(); + } + VLOG(1) << "Device = " << name + << ", total_compute_time_ns = " << total_compute_time_ns.count() + << ", utilization = " << utilization << "%"; + if (critical_path_costs.execution_time <= state.GetCurrTime()) { critical_path_costs = state.device_costs; } -- GitLab From d8af56e3b4cd0ac5096e32c3eee2d2cfb4d4137d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Nov 2017 18:21:58 -0800 Subject: [PATCH 0494/1801] Fix data race in tensorflow/core/kernels/map_stage_op.cc, and use tf locking/formatting conventions. - Fix data race in which a routine mysteriously releases a lock which was held when it was called, and which the caller expects will still be held afterwards.(!) See the changes to the routines notify_inserters_if_bounded() and notify_removers(). The data race was in put() on current_bytes_ near the end, after the call to put_complete(). put_complete() calls notify_removers(), which released the lock, thus causing the race. Any reader of the code would have expected that the lock would have been held at that point, but it wasn't. The same pattern in notify_inserters_if_bounded() didn't cause a race in the current code, but is a pitfall for future maintainers, who would not mornally expect something called notify_inserters_if_bounded() to mean "and in some circumstances, unlock". Even in the absense of this bug, and the weird, uncommented spec for those routines, it is almost always a bad idea to release the lock before calling a condition-variable wakeup. First, it complicates the code, and makes it hard to reason about object deletion if the deletion is predicated by one of the signalled conditions. Second, it's an _unnecessary_ complication: a well-written condition variable implementation will requeue the woken thread(s) on the mutex queue, so the expensive part of the wakeup will be deferred until the waking thread unlocks anyway. - Avoid the C++11 unique_lock mechanism that allowed the bug to happen, and instead use tensorflow::mutex, tensorflow::mutex_lock, and tensorflow::condition_variable. - Use while-loops instead of lambdas for condition-variable waits, because while-loops are much easier to read. - Fix the routines would_exceed_memory_limit() and is_capacity_full() to include their own validity check, so it's not needed at each of their call sites. This improves the readability at the call sites. - Use annotalysis locking annotations so that other violations can be found statically by the compiler. - Reformat with clang-format. PiperOrigin-RevId: 175913085 --- tensorflow/core/kernels/map_stage_op.cc | 238 ++++++++++++------------ 1 file changed, 121 insertions(+), 117 deletions(-) diff --git a/tensorflow/core/kernels/map_stage_op.cc b/tensorflow/core/kernels/map_stage_op.cc index 7b5a464b72..bdc3b5778f 100644 --- a/tensorflow/core/kernels/map_stage_op.cc +++ b/tensorflow/core/kernels/map_stage_op.cc @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/thread_annotations.h" namespace tensorflow { namespace { @@ -36,16 +37,14 @@ namespace { // Partial Ordering Comparator for Tensor keys containing scalar int64's struct KeyTensorLess { bool operator()(const Tensor& lhs, const Tensor& rhs) const { - return std::less{}(lhs.scalar()(), - rhs.scalar()()); + return std::less{}(lhs.scalar()(), rhs.scalar()()); } }; // Key Equality operator for Tensor keys containing scalar int64's struct KeyTensorEqual { bool operator()(const Tensor& lhs, const Tensor& rhs) const { - return std::equal_to{}(lhs.scalar()(), - rhs.scalar()()); + return std::equal_to{}(lhs.scalar()(), rhs.scalar()()); } }; @@ -93,24 +92,23 @@ class StagingMap : public ResourceBase { private: // Private variables - DataTypeVector dtypes_; - std::size_t capacity_; - std::size_t memory_limit_; - std::size_t current_bytes_; - std::mutex mu_; - std::condition_variable not_empty_; - std::condition_variable full_; - IncompleteType incomplete_; - MapType map_; + DataTypeVector dtypes_ GUARDED_BY(mu_); + std::size_t capacity_ GUARDED_BY(mu_); + std::size_t memory_limit_ GUARDED_BY(mu_); + std::size_t current_bytes_ GUARDED_BY(mu_); + tensorflow::mutex mu_; + tensorflow::condition_variable not_empty_; + tensorflow::condition_variable full_; + IncompleteType incomplete_ GUARDED_BY(mu_); + MapType map_ GUARDED_BY(mu_); private: // private methods // If map is configured for bounded capacity, notify // waiting inserters that space is now available - void notify_inserters_if_bounded(std::unique_lock* lock) { + void notify_inserters_if_bounded() EXCLUSIVE_LOCKS_REQUIRED(mu_) { if (has_capacity() || has_memory_limit()) { - lock->unlock(); // Notify all inserters. The removal of an element // may make memory available for many inserters // to insert new elements @@ -120,23 +118,29 @@ class StagingMap : public ResourceBase { // Notify all removers waiting to extract values // that data is now available - void notify_removers(std::unique_lock* lock) { - lock->unlock(); + void notify_removers() { // Notify all removers. This is because they are // waiting for specific keys to appear in the map // so we don't know which one to wake up. not_empty_.notify_all(); } - bool has_capacity() const { return capacity_ > 0; } + bool has_capacity() const EXCLUSIVE_LOCKS_REQUIRED(mu_) { + return capacity_ > 0; + } - bool has_memory_limit() const { return memory_limit_ > 0; } + bool has_memory_limit() const EXCLUSIVE_LOCKS_REQUIRED(mu_) { + return memory_limit_ > 0; + } - bool would_exceed_memory_limit(std::size_t bytes) const { - return bytes + current_bytes_ > memory_limit_; + bool would_exceed_memory_limit(std::size_t bytes) const + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + return has_memory_limit() && bytes + current_bytes_ > memory_limit_; } - bool is_capacity_full() const { return map_.size() >= capacity_; } + bool is_capacity_full() const EXCLUSIVE_LOCKS_REQUIRED(mu_) { + return has_capacity() && map_.size() >= capacity_; + } // Get number of bytes in the tuple std::size_t get_tuple_bytes(const Tuple& tuple) { @@ -157,7 +161,8 @@ class StagingMap : public ResourceBase { } // Check that the index is within bounds - Status check_index(const Tensor& key, std::size_t index) { + Status check_index(const Tensor& key, std::size_t index) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { if (index >= dtypes_.size()) { return Status(errors::InvalidArgument( "Index '", index, "' for key '", key.scalar()(), @@ -169,7 +174,7 @@ class StagingMap : public ResourceBase { Status copy_or_move_tensors(OptionalTuple* map_tuple, const Tensor& key, const Tensor& indices, Tuple* output, - bool copy = false) { + bool copy = false) EXCLUSIVE_LOCKS_REQUIRED(mu_) { auto findices = indices.flat(); // Return values at specified indices @@ -201,11 +206,12 @@ class StagingMap : public ResourceBase { // Check that the optional value at the specified index // is uninitialized Status check_index_uninitialized(const Tensor& key, std::size_t index, - const OptionalTuple& tuple) { + const OptionalTuple& tuple) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { if (tuple[index].has_value()) { - return Status(errors::InvalidArgument("The tensor for index '", - index, "' for key '", key.scalar()(), - "' was already initialized '", dtypes_.size(), "'.")); + return Status(errors::InvalidArgument( + "The tensor for index '", index, "' for key '", key.scalar()(), + "' was already initialized '", dtypes_.size(), "'.")); } return Status::OK(); @@ -228,7 +234,7 @@ class StagingMap : public ResourceBase { } // Check bytes are within memory limits memory limits - Status check_memory_limit(std::size_t bytes) { + Status check_memory_limit(std::size_t bytes) EXCLUSIVE_LOCKS_REQUIRED(mu_) { if (has_memory_limit() && bytes > memory_limit_) { return Status(errors::ResourceExhausted( "Attempted to insert tensors with combined size of '", bytes, @@ -241,8 +247,8 @@ class StagingMap : public ResourceBase { // Insert incomplete data into the Barrier Status put_incomplete(const KeyType& key, const Tensor& indices, - OptionalTuple* tuple, - std::unique_lock* lock) { + OptionalTuple* tuple, tensorflow::mutex_lock* lock) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { auto findices = indices.flat(); // Search for the key in our incomplete set @@ -252,11 +258,9 @@ class StagingMap : public ResourceBase { std::size_t tuple_bytes = get_tuple_bytes(*tuple); TF_RETURN_IF_ERROR(check_memory_limit(tuple_bytes)); - if (has_memory_limit()) { - full_.wait(*lock, [tuple_bytes, this]() { - // Stop waiting if we don't exceed the memory limit - return !would_exceed_memory_limit(tuple_bytes); - }); + // Wait until we don't exceed the memory limit + while (would_exceed_memory_limit(tuple_bytes)) { + full_.wait(*lock); } // This key isn't present in the incomplete set @@ -282,8 +286,7 @@ class StagingMap : public ResourceBase { // Found an entry in the incomplete index // Update with given data and insert complete entries // into the main map - else - { + else { // Reference existing incomplete tuple OptionalTuple& present = it->second; @@ -312,7 +315,7 @@ class StagingMap : public ResourceBase { // Remove from incomplete incomplete_.erase(it); - TF_RETURN_IF_ERROR(put_complete(key, &insert_tuple, lock)); + TF_RETURN_IF_ERROR(put_complete(key, &insert_tuple)); } } @@ -320,12 +323,12 @@ class StagingMap : public ResourceBase { } // Does the insertion into the actual staging area - Status put_complete(const KeyType& key, OptionalTuple* tuple, - std::unique_lock* lock) { + Status put_complete(const KeyType& key, OptionalTuple* tuple) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { // Insert key and tuples into the map map_.insert({key, std::move(*tuple)}); - notify_removers(lock); + notify_removers(); return Status::OK(); } @@ -340,7 +343,7 @@ class StagingMap : public ResourceBase { current_bytes_(0) {} Status put(KeyType* key, const Tensor* indices, OptionalTuple* tuple) { - std::unique_lock lock(mu_); + tensorflow::mutex_lock lock(mu_); // Sanity check the indices TF_RETURN_IF_ERROR(check_index_ordering(*indices)); @@ -354,22 +357,13 @@ class StagingMap : public ResourceBase { // Check that tuple_bytes fits within the memory limit TF_RETURN_IF_ERROR(check_memory_limit(tuple_bytes)); - // If map capacity is bounded wait until map is not full - if (has_capacity() || has_memory_limit()) { - full_.wait(lock, [tuple_bytes, this]() { - // If there's a memory limit, check if there's space for insertion - bool memory_limit_valid = - has_memory_limit() ? !would_exceed_memory_limit(tuple_bytes) : true; - // If we're configured for capacity check if there's space for insertion - bool capacity_valid = has_capacity() ? !is_capacity_full() : true; - - // Stop waiting upon success for both conditions - return memory_limit_valid && capacity_valid; - }); + // Wait until there's space for insertion. + while (would_exceed_memory_limit(tuple_bytes) || is_capacity_full()) { + full_.wait(lock); } // Do the put operation - TF_RETURN_IF_ERROR(put_complete(*key, tuple, &lock)); + TF_RETURN_IF_ERROR(put_complete(*key, tuple)); // Update the current size current_bytes_ += tuple_bytes; @@ -378,7 +372,7 @@ class StagingMap : public ResourceBase { } Status get(const KeyType* key, const Tensor* indices, Tuple* tuple) { - std::unique_lock lock(mu_); + tensorflow::mutex_lock lock(mu_); // Sanity check the indices TF_RETURN_IF_ERROR(check_index_ordering(*indices)); @@ -386,8 +380,9 @@ class StagingMap : public ResourceBase { typename MapType::iterator it; // Wait until the element with the requested key is present - not_empty_.wait( - lock, [&, this]() { return (it = map_.find(*key)) != map_.end(); }); + while ((it = map_.find(*key)) == map_.end()) { + not_empty_.wait(lock); + } TF_RETURN_IF_ERROR( copy_or_move_tensors(&it->second, *key, *indices, tuple, true)); @@ -399,7 +394,7 @@ class StagingMap : public ResourceBase { } Status pop(const KeyType* key, const Tensor* indices, Tuple* tuple) { - std::unique_lock lock(mu_); + tensorflow::mutex_lock lock(mu_); // Sanity check the indices TF_RETURN_IF_ERROR(check_index_ordering(*indices)); @@ -407,8 +402,9 @@ class StagingMap : public ResourceBase { typename MapType::iterator it; // Wait until the element with the requested key is present - not_empty_.wait( - lock, [&, this]() { return (it = map_.find(*key)) != map_.end(); }); + while ((it = map_.find(*key)) == map_.end()) { + not_empty_.wait(lock); + } TF_RETURN_IF_ERROR( copy_or_move_tensors(&it->second, *key, *indices, tuple)); @@ -422,19 +418,21 @@ class StagingMap : public ResourceBase { // Update bytes in the Staging Area current_bytes_ -= get_tuple_bytes(*tuple); - notify_inserters_if_bounded(&lock); + notify_inserters_if_bounded(); return Status::OK(); } Status popitem(KeyType* key, const Tensor* indices, Tuple* tuple) { - std::unique_lock lock(mu_); + tensorflow::mutex_lock lock(mu_); // Sanity check the indices TF_RETURN_IF_ERROR(check_index_ordering(*indices)); // Wait until map is not empty - not_empty_.wait(lock, [this]() { return !this->map_.empty(); }); + while (this->map_.empty()) { + not_empty_.wait(lock); + } // Move from the first element and erase it @@ -454,29 +452,29 @@ class StagingMap : public ResourceBase { // Update bytes in the Staging Area current_bytes_ -= get_tuple_bytes(*tuple); - notify_inserters_if_bounded(&lock); + notify_inserters_if_bounded(); return Status::OK(); } Status clear() { - std::unique_lock lock(mu_); + tensorflow::mutex_lock lock(mu_); map_.clear(); incomplete_.clear(); current_bytes_ = 0; - notify_inserters_if_bounded(&lock); + notify_inserters_if_bounded(); return Status::OK(); } std::size_t incomplete_size() { - std::unique_lock lock(mu_); + tensorflow::mutex_lock lock(mu_); return incomplete_.size(); } std::size_t size() { - std::unique_lock lock(mu_); + tensorflow::mutex_lock lock(mu_); return map_.size(); } @@ -539,10 +537,9 @@ class MapStageOp : public OpKernel { } }; -REGISTER_KERNEL_BUILDER(Name("MapStage").Device(DEVICE_CPU), - MapStageOp); +REGISTER_KERNEL_BUILDER(Name("MapStage").Device(DEVICE_CPU), MapStageOp); REGISTER_KERNEL_BUILDER(Name("OrderedMapStage").Device(DEVICE_CPU), - MapStageOp); + MapStageOp); #if GOOGLE_CUDA REGISTER_KERNEL_BUILDER( @@ -553,7 +550,7 @@ REGISTER_KERNEL_BUILDER(Name("OrderedMapStage") .HostMemory("indices") .Device(DEVICE_GPU), MapStageOp); -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA #ifdef TENSORFLOW_USE_SYCL REGISTER_KERNEL_BUILDER(Name("MapStage") @@ -601,30 +598,34 @@ class MapUnstageOp : public OpKernel { }; REGISTER_KERNEL_BUILDER(Name("MapUnstage").Device(DEVICE_CPU), - MapUnstageOp); + MapUnstageOp); REGISTER_KERNEL_BUILDER(Name("OrderedMapUnstage").Device(DEVICE_CPU), - MapUnstageOp); + MapUnstageOp); #if GOOGLE_CUDA REGISTER_KERNEL_BUILDER(Name("MapUnstage") - .HostMemory("key") - .HostMemory("indices") - .Device(DEVICE_GPU), MapUnstageOp); + .HostMemory("key") + .HostMemory("indices") + .Device(DEVICE_GPU), + MapUnstageOp); REGISTER_KERNEL_BUILDER(Name("OrderedMapUnstage") - .HostMemory("key") - .HostMemory("indices") - .Device(DEVICE_GPU), MapUnstageOp); + .HostMemory("key") + .HostMemory("indices") + .Device(DEVICE_GPU), + MapUnstageOp); #endif #ifdef TENSORFLOW_USE_SYCL REGISTER_KERNEL_BUILDER(Name("MapUnstage") - .HostMemory("key") - .HostMemory("indices") - .Device(DEVICE_SYCL), MapUnstageOp); + .HostMemory("key") + .HostMemory("indices") + .Device(DEVICE_SYCL), + MapUnstageOp); REGISTER_KERNEL_BUILDER(Name("OrderedMapUnstage") - .HostMemory("key") - .HostMemory("indices") - .Device(DEVICE_SYCL), MapUnstageOp); -#endif // TENSORFLOW_USE_SYCL + .HostMemory("key") + .HostMemory("indices") + .Device(DEVICE_SYCL), + MapUnstageOp); +#endif // TENSORFLOW_USE_SYCL template class MapPeekOp : public OpKernel { @@ -682,7 +683,7 @@ REGISTER_KERNEL_BUILDER(Name("OrderedMapPeek") .HostMemory("indices") .Device(DEVICE_SYCL), MapPeekOp); -#endif // TENSORFLOW_USE_SYCL +#endif // TENSORFLOW_USE_SYCL template class MapUnstageNoKeyOp : public OpKernel { @@ -715,7 +716,7 @@ class MapUnstageNoKeyOp : public OpKernel { " vs. ", indices_tensor->NumElements())); for (std::size_t i = 0; i < tuple.size(); ++i) { - ctx->set_output(i+1, tuple[i]); + ctx->set_output(i + 1, tuple[i]); } } }; @@ -749,7 +750,7 @@ REGISTER_KERNEL_BUILDER(Name("OrderedMapUnstageNoKey") .HostMemory("indices") .Device(DEVICE_SYCL), MapUnstageNoKeyOp); -#endif // TENSORFLOW_USE_SYCL +#endif // TENSORFLOW_USE_SYCL template class MapSizeOp : public OpKernel { @@ -770,23 +771,24 @@ class MapSizeOp : public OpKernel { } }; -REGISTER_KERNEL_BUILDER(Name("MapSize").Device(DEVICE_CPU), - MapSizeOp); +REGISTER_KERNEL_BUILDER(Name("MapSize").Device(DEVICE_CPU), MapSizeOp); REGISTER_KERNEL_BUILDER(Name("OrderedMapSize").Device(DEVICE_CPU), MapSizeOp); #if GOOGLE_CUDA -REGISTER_KERNEL_BUILDER(Name("MapSize").Device(DEVICE_GPU) - .HostMemory("size"), MapSizeOp); -REGISTER_KERNEL_BUILDER(Name("OrderedMapSize").Device(DEVICE_GPU) - .HostMemory("size"), MapSizeOp); +REGISTER_KERNEL_BUILDER(Name("MapSize").Device(DEVICE_GPU).HostMemory("size"), + MapSizeOp); +REGISTER_KERNEL_BUILDER( + Name("OrderedMapSize").Device(DEVICE_GPU).HostMemory("size"), + MapSizeOp); #endif #ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("MapSize").Device(DEVICE_SYCL) - .HostMemory("size"), MapSizeOp); -REGISTER_KERNEL_BUILDER(Name("OrderedMapSize").Device(DEVICE_SYCL) - .HostMemory("size"), MapSizeOp); -#endif // TENSORFLOW_USE_SYCL +REGISTER_KERNEL_BUILDER(Name("MapSize").Device(DEVICE_SYCL).HostMemory("size"), + MapSizeOp); +REGISTER_KERNEL_BUILDER( + Name("OrderedMapSize").Device(DEVICE_SYCL).HostMemory("size"), + MapSizeOp); +#endif // TENSORFLOW_USE_SYCL template class MapIncompleteSizeOp : public OpKernel { @@ -813,17 +815,21 @@ REGISTER_KERNEL_BUILDER(Name("OrderedMapIncompleteSize").Device(DEVICE_CPU), MapIncompleteSizeOp); #if GOOGLE_CUDA -REGISTER_KERNEL_BUILDER(Name("MapIncompleteSize").Device(DEVICE_GPU) - .HostMemory("size"), MapIncompleteSizeOp); -REGISTER_KERNEL_BUILDER(Name("OrderedMapIncompleteSize").Device(DEVICE_GPU) - .HostMemory("size"), MapIncompleteSizeOp); +REGISTER_KERNEL_BUILDER( + Name("MapIncompleteSize").Device(DEVICE_GPU).HostMemory("size"), + MapIncompleteSizeOp); +REGISTER_KERNEL_BUILDER( + Name("OrderedMapIncompleteSize").Device(DEVICE_GPU).HostMemory("size"), + MapIncompleteSizeOp); #endif #ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("MapIncompleteSize").Device(DEVICE_SYCL) - .HostMemory("size"), MapIncompleteSizeOp); -REGISTER_KERNEL_BUILDER(Name("OrderedMapIncompleteSize").Device(DEVICE_SYCL) - .HostMemory("size"), MapIncompleteSizeOp); -#endif // TENSORFLOW_USE_SYCL +REGISTER_KERNEL_BUILDER( + Name("MapIncompleteSize").Device(DEVICE_SYCL).HostMemory("size"), + MapIncompleteSizeOp); +REGISTER_KERNEL_BUILDER( + Name("OrderedMapIncompleteSize").Device(DEVICE_SYCL).HostMemory("size"), + MapIncompleteSizeOp); +#endif // TENSORFLOW_USE_SYCL template class MapClearOp : public OpKernel { @@ -839,14 +845,12 @@ class MapClearOp : public OpKernel { } }; -REGISTER_KERNEL_BUILDER(Name("MapClear").Device(DEVICE_CPU), - MapClearOp); +REGISTER_KERNEL_BUILDER(Name("MapClear").Device(DEVICE_CPU), MapClearOp); REGISTER_KERNEL_BUILDER(Name("OrderedMapClear").Device(DEVICE_CPU), MapClearOp); #if GOOGLE_CUDA -REGISTER_KERNEL_BUILDER(Name("MapClear").Device(DEVICE_GPU), - MapClearOp); +REGISTER_KERNEL_BUILDER(Name("MapClear").Device(DEVICE_GPU), MapClearOp); REGISTER_KERNEL_BUILDER(Name("OrderedMapClear").Device(DEVICE_GPU), MapClearOp); #endif @@ -855,7 +859,7 @@ REGISTER_KERNEL_BUILDER(Name("MapClear").Device(DEVICE_SYCL), MapClearOp); REGISTER_KERNEL_BUILDER(Name("OrderedMapClear").Device(DEVICE_SYCL), MapClearOp); -#endif // TENSORFLOW_USE_SYCL +#endif // TENSORFLOW_USE_SYCL } // namespace } // namespace tensorflow -- GitLab From 1ddd7bdda493b8212437c2e26f15993ef3186b52 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Wed, 15 Nov 2017 18:40:57 -0800 Subject: [PATCH 0495/1801] Add necessary shape util support for bfloat16 RELNOTES: Add necessary shape util support for bfloat16. PiperOrigin-RevId: 175914798 --- tensorflow/compiler/xla/shape_util.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index 2202b6a2c1..c0a0e13f07 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -592,10 +592,10 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { return sizeof(uint32); case U64: return sizeof(uint64); - case F16: - return sizeof(float) / 2; case BF16: return sizeof(float) / 2; + case F16: + return sizeof(float) / 2; case F32: return sizeof(float); case F64: -- GitLab From 542716812332210915d0dfb4dd141c6b768718f4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Nov 2017 18:42:53 -0800 Subject: [PATCH 0496/1801] Verify file content before constructing the model PiperOrigin-RevId: 175914923 --- tensorflow/contrib/lite/model.cc | 16 ++++++++++++++-- tensorflow/contrib/lite/model_test.cc | 9 +++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index f8208f6f98..e2f3560e61 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -30,6 +30,17 @@ limitations under the License. namespace tflite { +namespace { +inline const tflite::Model* VerifyAndGetModel(const void* buf, size_t len) { + ::flatbuffers::Verifier verifier(static_cast(buf), len); + if (VerifyModelBuffer(verifier)) { + return ::tflite::GetModel(buf); + } else { + return nullptr; + } +} +} // namespace + const char* kEmptyTensorName = ""; std::unique_ptr FlatBufferModel::BuildFromFile( @@ -64,7 +75,7 @@ FlatBufferModel::FlatBufferModel(const char* filename, bool mmap_file, if (!allocation_->valid()) return; if (!CheckModelIdentifier()) return; - model_ = ::tflite::GetModel(allocation_->base()); + model_ = VerifyAndGetModel(allocation_->base(), allocation_->bytes()); } bool FlatBufferModel::CheckModelIdentifier() const { @@ -84,7 +95,8 @@ FlatBufferModel::FlatBufferModel(const char* ptr, size_t num_bytes, : DefaultErrorReporter()) { allocation_ = new MemoryAllocation(ptr, num_bytes, error_reporter); if (!allocation_->valid()) return; - model_ = ::tflite::GetModel(allocation_->base()); + + model_ = VerifyAndGetModel(allocation_->base(), allocation_->bytes()); } FlatBufferModel::~FlatBufferModel() { delete allocation_; } diff --git a/tensorflow/contrib/lite/model_test.cc b/tensorflow/contrib/lite/model_test.cc index ae823650d6..6104386642 100644 --- a/tensorflow/contrib/lite/model_test.cc +++ b/tensorflow/contrib/lite/model_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include #include +#include #include "tensorflow/contrib/lite/model.h" @@ -245,6 +246,14 @@ TEST(BasicFlatBufferModel, TestNullErrorReporter) { ASSERT_NE(interpreter->Invoke(), kTfLiteOk); } +// Test what happens if we cannot bind any of the ops. +TEST(BasicFlatBufferModel, TestBuildModelFromCorruptedData) { + std::string corrupted_data = "123"; + auto model = FlatBufferModel::BuildFromBuffer(corrupted_data.c_str(), + corrupted_data.length()); + ASSERT_FALSE(model); +} + // TODO(aselle): Add tests for serialization of builtin op data types. // These tests will occur with the evaluation tests of individual operators, // not here. -- GitLab From 106d1960f4acb926c72e185b684bdffb0ebc06d7 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Wed, 15 Nov 2017 18:46:31 -0800 Subject: [PATCH 0497/1801] Switch the op naming of tfe.Network to match its variable naming scheme. e.g. uses my_network_1/dense_1/ consistently rather than also using my_network_1_1/dense_1/ sometimes. PiperOrigin-RevId: 175915162 --- tensorflow/contrib/eager/python/network.py | 22 +++++++++++++ .../contrib/eager/python/network_test.py | 32 +++++++++++++++++++ tensorflow/python/layers/base.py | 8 +++-- 3 files changed, 60 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/eager/python/network.py b/tensorflow/contrib/eager/python/network.py index f7303cb5b4..97eded7dca 100644 --- a/tensorflow/contrib/eager/python/network.py +++ b/tensorflow/contrib/eager/python/network.py @@ -37,6 +37,20 @@ from tensorflow.python.training import training_util # functions in base.py which should be reused. +def _network_name_scope_naming(current_variable_scope): + """Name scope naming to match operation names to variable names. + + Used in Networks and also applied to non-Network Layers which are added to + Networks before being built. + + Args: + current_variable_scope: A VariableScope object. + Returns: + A name scope name. + """ + return current_variable_scope.name + "/" + + class Network(base.Layer): """Represents the composition of a set of Layers. @@ -72,6 +86,11 @@ class Network(base.Layer): self._variable_scope_counts_on_init = ( variable_scope._get_default_variable_store().variable_scopes_count) + def _name_scope_name(self, current_variable_scope): + """Overrides Layer op naming to match variable naming.""" + return _network_name_scope_naming( + current_variable_scope=current_variable_scope) + def _init_set_name(self, name): # Anonymous Networks (name=None) defer setting a final name until they are # (1) added to another Network, or (2) built/called (where (2) is only used @@ -205,6 +224,9 @@ class Network(base.Layer): None, use_resource=True, default_name=sublayer.name) as sub_scope: sublayer._scope = sub_scope + # Also switch op naming for this Layer to match Network conventions, + # i.e. op naming matching variable naming. + sublayer._name_scope_name = _network_name_scope_naming @base.Layer.name.getter def name(self): diff --git a/tensorflow/contrib/eager/python/network_test.py b/tensorflow/contrib/eager/python/network_test.py index 555c6e048d..8718a8b522 100644 --- a/tensorflow/contrib/eager/python/network_test.py +++ b/tensorflow/contrib/eager/python/network_test.py @@ -19,9 +19,11 @@ from __future__ import print_function import gc from tensorflow.contrib.eager.python import network +from tensorflow.python.eager import context from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import errors_impl +from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.layers import core from tensorflow.python.ops import math_ops @@ -434,6 +436,36 @@ class NetworkTest(test.TestCase): self.assertIsInstance(net.trainable_weights[0], resource_variable_ops.ResourceVariable) + def testGraphOpNames(self): + """Network operation names should match variable naming.""" + + def _check_op_prefixes(expected_prefix, checked_ops): + for operation in ops.get_default_graph().get_operations(): + if operation.name == "ignore": + continue + if operation.name in checked_ops: + continue + checked_ops.add(operation.name) + self.assertStartsWith(expected_start=expected_prefix, + actual=operation.name) + self.assertNotIn("my_network", operation.name[len(expected_prefix):]) + self.assertNotIn("dense", operation.name[len(expected_prefix):]) + + with context.graph_mode(): + net = MyNetwork() + zero = constant_op.constant([[0.]], name="ignore") + net(zero) + checked_ops = set() + _check_op_prefixes(expected_prefix="my_network/dense/", + checked_ops=checked_ops) + net.net2 = net.track_layer(MyNetwork()) + net.net2(zero) + _check_op_prefixes(expected_prefix="my_network/my_network/dense/", + checked_ops=checked_ops) + MyNetwork()(zero) + _check_op_prefixes(expected_prefix="my_network_1/dense/", + checked_ops=checked_ops) + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) def testDuplicateNameError(self): one = constant_op.constant([[1.]]) diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 9677db2bce..74b85da845 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -379,6 +379,10 @@ class Layer(object): """ return inputs + def _name_scope_name(self, current_variable_scope): + """Determines op naming for the Layer.""" + return current_variable_scope.original_name_scope + def _compute_output_shape(self, input_shape): """Computes the output shape of the layer given the input shape. @@ -474,7 +478,7 @@ class Layer(object): self._set_scope(None) with vs.variable_scope( self._scope, reuse=(self.built or self._reuse)) as scope: - with ops.name_scope(scope.original_name_scope): + with ops.name_scope(self._name_scope_name(scope)): variable = vs.get_variable(name, shape=shape, initializer=initializer, @@ -577,7 +581,7 @@ class Layer(object): scope_context_manager = vs.variable_scope( self._scope, reuse=self._reuse) with scope_context_manager as scope: - with ops.name_scope(scope.original_name_scope): + with ops.name_scope(self._name_scope_name(scope)): if not self.built: if not in_graph_mode: # Activity regularization is currently unsupported in Eager mode. -- GitLab From 2efb07ffe5d1f12a4eaef3d673f11615a8ddd6e5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Nov 2017 18:49:29 -0800 Subject: [PATCH 0498/1801] Fix a bug when printing fusion_kind in hlo_graph_dumper. PiperOrigin-RevId: 175915347 --- tensorflow/compiler/xla/service/hlo_graph_dumper.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 881b7e227c..3d963a4b1e 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -1003,7 +1003,7 @@ string HloDotDumper::GetInstructionNodeLabel(const HloInstruction* instr) { } string extended_opcode = StrCat(HloOpcodeString(instr->opcode()), - instr->opcode() == HloOpcode::kFusion + instr->opcode() != HloOpcode::kFusion ? "" : StrCat(":", xla::ToString(instr->fusion_kind()))); // If the name does not contain the opcode, render both. -- GitLab From fa15669fefdbe7e9a26ac2dd00bc7ce469ca60e1 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 15 Nov 2017 18:50:58 -0800 Subject: [PATCH 0499/1801] Rename HloToProfileIndex to HloProfileIndexMap Also fix a typo in a nearby comment. PiperOrigin-RevId: 175915436 --- .../xla/service/hlo_execution_profile.cc | 28 +++++++++---------- .../xla/service/hlo_execution_profile.h | 28 +++++++++---------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc index ecce2bd4e5..755374b91d 100644 --- a/tensorflow/compiler/xla/service/hlo_execution_profile.cc +++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc @@ -26,7 +26,7 @@ limitations under the License. #include "tensorflow/compiler/xla/util.h" namespace xla { -HloToProfileIndex::HloToProfileIndex(const HloModule& module) { +HloProfileIndexMap::HloProfileIndexMap(const HloModule& module) { size_t current_profile_index = 0; for (xla::HloComputation* computation : module.MakeComputationPostOrder()) { InsertOrDie(&computation_to_profile_idx_, computation, @@ -41,24 +41,24 @@ HloToProfileIndex::HloToProfileIndex(const HloModule& module) { } static HloProfilePrinter CreateOwnedHloProfilePrinter( - const HloToProfileIndex& hlo_to_profile_index, + const HloProfileIndexMap& hlo_profile_index_map, const HloCostAnalysis& cost_analysis) { using HloComputationInfo = HloProfilePrinter::HloComputationInfo; using HloInstructionInfo = HloProfilePrinter::HloInstructionInfo; HloComputationInfo* computation_infos = - new HloComputationInfo[hlo_to_profile_index.computation_count()]; + new HloComputationInfo[hlo_profile_index_map.computation_count()]; // There are two "indices" in play here. The first one is the index of the // HloComputationInfo or HloInstructionInfo in the array that contains said // HloComputationInfo or HloInstructionInfo. The second index is the index of // the HloComputationInfo or HloInstructionInfo in the profile counters array, - // as decided by hlo_to_profile_index. The latter index is always referred to - // as "profile_index". + // as decided by hlo_profile_index_map. The latter index is always referred + // to as "profile_index". size_t computation_index_in_static_data = 0; - size_t max_profile_index = hlo_to_profile_index.total_count(); - for (const auto& pair : hlo_to_profile_index.computation_to_profile_idx()) { + size_t max_profile_index = hlo_profile_index_map.total_count(); + for (const auto& pair : hlo_profile_index_map.computation_to_profile_idx()) { CHECK_LT(pair.second, max_profile_index); const HloComputation* computation = pair.first; size_t current_computation_index = computation_index_in_static_data++; @@ -85,7 +85,7 @@ static HloProfilePrinter CreateOwnedHloProfilePrinter( instruction_info->bytes_accessed = cost_analysis.bytes_accessed(*hlo); instruction_info->seconds = cost_analysis.seconds(*hlo); instruction_info->profile_index = - hlo_to_profile_index.GetProfileIndexFor(*hlo); + hlo_profile_index_map.GetProfileIndexFor(*hlo); CHECK_LT(instruction_info->profile_index, max_profile_index); } } @@ -109,26 +109,26 @@ static HloProfilePrinter CreateOwnedHloProfilePrinter( }; return HloProfilePrinter(computation_infos, - hlo_to_profile_index.computation_count(), deleter); + hlo_profile_index_map.computation_count(), deleter); } HloExecutionProfile::HloExecutionProfile(const HloModule& module, const HloCostAnalysis& cost_analysis) - : hlo_to_profile_index_(module), + : hlo_profile_index_map_(module), hlo_profile_printer_( - CreateOwnedHloProfilePrinter(hlo_to_profile_index_, cost_analysis)), + CreateOwnedHloProfilePrinter(hlo_profile_index_map_, cost_analysis)), profile_counters_( - /*count*/ hlo_to_profile_index_.total_count(), + /*count*/ hlo_profile_index_map_.total_count(), /*value*/ 0) {} void HloExecutionProfile::SetCyclesTakenBy(const HloInstruction* hlo, uint64 cycles_taken) { - profile_counters_[hlo_to_profile_index_.GetProfileIndexFor(*hlo)] = + profile_counters_[hlo_profile_index_map_.GetProfileIndexFor(*hlo)] = cycles_taken; } uint64 HloExecutionProfile::GetCyclesTakenBy(const HloInstruction& hlo) const { - return profile_counters_[hlo_to_profile_index_.GetProfileIndexFor(hlo)]; + return profile_counters_[hlo_profile_index_map_.GetProfileIndexFor(hlo)]; } string HloExecutionProfile::ToString( diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.h b/tensorflow/compiler/xla/service/hlo_execution_profile.h index f945b9d84c..84702680c0 100644 --- a/tensorflow/compiler/xla/service/hlo_execution_profile.h +++ b/tensorflow/compiler/xla/service/hlo_execution_profile.h @@ -29,18 +29,18 @@ namespace xla { class HloInstruction; -// Maps all HloInstructions and HloComputions in an HloModule to integers. -// These integers form the contiguous range [0, GetTotalCount()). -class HloToProfileIndex { +// Maps all HloInstructions and HloComputations in an HloModule to integers. +// These integers form the contiguous range [0, total_count()). +class HloProfileIndexMap { public: - // Scans `module` to populate this instance of HloToProfileIndex. - explicit HloToProfileIndex(const HloModule& module); + // Scans `module` to populate this instance of HloProfileIndexMap. + explicit HloProfileIndexMap(const HloModule& module); - HloToProfileIndex(const HloToProfileIndex&) = default; - HloToProfileIndex(HloToProfileIndex&&) = default; + HloProfileIndexMap(const HloProfileIndexMap&) = default; + HloProfileIndexMap(HloProfileIndexMap&&) = default; - HloToProfileIndex& operator=(const HloToProfileIndex&) = default; - HloToProfileIndex& operator=(HloToProfileIndex&&) = default; + HloProfileIndexMap& operator=(const HloProfileIndexMap&) = default; + HloProfileIndexMap& operator=(HloProfileIndexMap&&) = default; size_t GetProfileIndexFor(const HloInstruction& instruction) const { return FindOrDie(instruction_to_profile_idx(), &instruction); @@ -97,14 +97,14 @@ class HloExecutionProfile { // Return the number of cycles this computation took to execute. uint64 total_cycles_executed(const HloComputation& computation) const { - return profile_counters_[hlo_to_profile_index_.GetProfileIndexFor( + return profile_counters_[hlo_profile_index_map_.GetProfileIndexFor( computation)]; } // Record how many cycles a computation took to execute. void set_total_cycles_executed(const HloComputation& computation, uint64 total_cycles_executed) { - profile_counters_[hlo_to_profile_index_.GetProfileIndexFor(computation)] = + profile_counters_[hlo_profile_index_map_.GetProfileIndexFor(computation)] = total_cycles_executed; } @@ -117,9 +117,9 @@ class HloExecutionProfile { string ToString(const DeviceDescription& device_description) const; private: - // hlo_to_profile_index_ maps an Hlo entity (computation or instruction) to an - // index in profile_counters_. - HloToProfileIndex hlo_to_profile_index_; + // hlo_profile_index_map_ maps an Hlo entity (computation or instruction) to + // an index in profile_counters_. + HloProfileIndexMap hlo_profile_index_map_; // Used to print profile_counters_ in a human readable form. HloProfilePrinter hlo_profile_printer_; -- GitLab From 4916c64836d5f51d6b8878f429bc1622c465fcdf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Nov 2017 19:58:36 -0800 Subject: [PATCH 0500/1801] [XLA] Adding kConditional opcode that represents a conditional HLO instruction. PiperOrigin-RevId: 175919301 --- tensorflow/compiler/xla/service/hlo_graph_dumper.cc | 1 + tensorflow/compiler/xla/service/hlo_instruction.cc | 3 +++ tensorflow/compiler/xla/service/hlo_opcode.h | 1 + tensorflow/compiler/xla/service/instruction_fusion.cc | 1 + tensorflow/compiler/xla/tools/parser/hlo_parser.cc | 1 + 5 files changed, 7 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 3d963a4b1e..d71a4b42c7 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -970,6 +970,7 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) { case HloOpcode::kOutfeed: case HloOpcode::kCrossReplicaSum: return kBrown; + case HloOpcode::kConditional: case HloOpcode::kCustomCall: case HloOpcode::kWhile: case HloOpcode::kCall: diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 045abdac8b..f7b5b265d9 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1210,6 +1210,7 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( new_operands[2], new_operands[3], new_operands[4], epsilon(), feature_index()); break; + case HloOpcode::kConditional: case HloOpcode::kRecv: case HloOpcode::kRecvDone: case HloOpcode::kSend: @@ -1603,6 +1604,7 @@ bool HloInstruction::IdenticalSlowPath( return dimensions() == other.dimensions(); // These opcodes are not yet supported. + case HloOpcode::kConditional: case HloOpcode::kInfeed: case HloOpcode::kOutfeed: case HloOpcode::kSort: @@ -2355,6 +2357,7 @@ Status HloInstruction::Visit(DfsHloVisitorBase* visitor) { return visitor->HandleSendDone(this); // These opcodes are not handled here. + case HloOpcode::kConditional: case HloOpcode::kTrace: break; } diff --git a/tensorflow/compiler/xla/service/hlo_opcode.h b/tensorflow/compiler/xla/service/hlo_opcode.h index e0d02e0665..7b07027441 100644 --- a/tensorflow/compiler/xla/service/hlo_opcode.h +++ b/tensorflow/compiler/xla/service/hlo_opcode.h @@ -58,6 +58,7 @@ namespace xla { V(kClamp, "clamp") \ V(kComplex, "complex") \ V(kConcatenate, "concatenate", kHloOpcodeIsVariadic) \ + V(kConditional, "conditional") \ V(kConstant, "constant") \ V(kConvert, "convert") \ V(kConvolution, "convolution") \ diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc index dea47b1fd7..de4804996f 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion.cc @@ -92,6 +92,7 @@ namespace xla { case HloOpcode::kBatchNormInference: case HloOpcode::kBatchNormGrad: case HloOpcode::kCall: + case HloOpcode::kConditional: case HloOpcode::kConvolution: case HloOpcode::kCrossReplicaSum: case HloOpcode::kCustomCall: diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index a65e5a856f..0159d03b11 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -798,6 +798,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, shape, operands[0], config ? *config : "")); break; } + case HloOpcode::kConditional: case HloOpcode::kCustomCall: case HloOpcode::kReducePrecision: case HloOpcode::kRng: -- GitLab From 67e4add65243bf10fb09201d93f7be1f10762066 Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Wed, 15 Nov 2017 20:26:08 -0800 Subject: [PATCH 0501/1801] Make MapDataset saveable. Also integrate functionality of GraphDefBuilderWrapper::AddDatasetWithInputAsList into AddDataset since MapDatasetOp requires 2 inputs of type single tensor and tensor list. PiperOrigin-RevId: 175921068 --- .../contrib/data/python/kernel_tests/BUILD | 6 + .../kernel_tests/map_dataset_op_test.py | 516 ++++++++++++++++++ tensorflow/core/graph/graph_def_builder.h | 14 + tensorflow/core/kernels/batch_dataset_op.cc | 4 +- tensorflow/core/kernels/captured_function.h | 2 + .../core/kernels/concatenate_dataset_op.cc | 6 +- tensorflow/core/kernels/dataset.h | 161 ++++-- tensorflow/core/kernels/iterator_ops.cc | 52 +- tensorflow/core/kernels/map_dataset_op.cc | 57 +- tensorflow/core/kernels/repeat_dataset_op.cc | 4 +- tensorflow/core/kernels/shuffle_dataset_op.cc | 4 +- tensorflow/core/kernels/skip_dataset_op.cc | 4 +- tensorflow/core/kernels/take_dataset_op.cc | 4 +- tensorflow/core/kernels/tensor_dataset_op.cc | 2 +- .../core/kernels/tensor_slice_dataset_op.cc | 2 +- tensorflow/core/kernels/zip_dataset_op.cc | 8 +- 16 files changed, 768 insertions(+), 78 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 2b3843b97b..badabed701 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -269,6 +269,7 @@ py_test( srcs_version = "PY2AND3", deps = [ "//tensorflow/contrib/data/python/ops:dataset_ops", + "//tensorflow/contrib/data/python/ops:iterator_ops", "//tensorflow/contrib/data/python/ops:transformation_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -276,15 +277,20 @@ py_test( "//tensorflow/python:data_flow_ops", "//tensorflow/python:dtypes", "//tensorflow/python:errors", + "//tensorflow/python:framework_ops", "//tensorflow/python:functional_ops", "//tensorflow/python:io_ops", "//tensorflow/python:lookup_ops", "//tensorflow/python:math_ops", + "//tensorflow/python:platform", "//tensorflow/python:random_ops", "//tensorflow/python:script_ops", "//tensorflow/python:string_ops", + "//tensorflow/python:training", "//tensorflow/python:util", "//tensorflow/python:variable_scope", + "//tensorflow/python:variables", + "//tensorflow/python/data/ops:iterator_ops", "//third_party/py/numpy", ], ) diff --git a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py index 8ccf92c17a..d8e7f9d593 100644 --- a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py @@ -25,9 +25,13 @@ import numpy as np from tensorflow.contrib.data.python.ops import dataset_ops from tensorflow.contrib.data.python.ops import error_ops +from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops +from tensorflow.python.data.ops import iterator_ops from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import function +from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import data_flow_ops @@ -40,7 +44,10 @@ from tensorflow.python.ops import script_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import string_ops from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables +from tensorflow.python.platform import gfile from tensorflow.python.platform import test +from tensorflow.python.training import saver as saver_lib from tensorflow.python.util import compat @@ -668,6 +675,515 @@ class MapDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def testCaptureResourceInMapFn(self): + + def _build_ds(iterator): + + def _map_fn(x): + get_next = iterator.get_next() + return x * get_next + + return dataset_ops.Dataset.range(10).map(_map_fn) + + def _build_graph(): + captured_iterator = dataset_ops.Dataset.range( + 10).make_initializable_iterator() + ds = _build_ds(captured_iterator) + iterator = ds.make_initializable_iterator() + init_op = iterator.initializer + return captured_iterator.initializer, init_op + + with ops.Graph().as_default() as g: + captured_init_op, init_op = _build_graph() + with self.test_session(graph=g) as sess: + sess.run(captured_init_op) + with self.assertRaises(errors.UnimplementedError): + # CapturedFunction does not support capturing IteratorResource. + sess.run(init_op) + + +class MapDatasetSerializationTest(test.TestCase): + + def setUp(self): + self._tensor_slice_len = 7 + self._num_epochs = 14 + self._num_outputs = self._tensor_slice_len * self._num_epochs + + def tearDown(self): + # Remove all checkpoint files. + prefix = self._ckpt_path() + pattern = prefix + "*" + files = gfile.Glob(pattern) + map(gfile.Remove, files) + + def _build_ds(self, multiplier=37.0): + components = (np.arange(self._tensor_slice_len), np.array([[1, 2, 3]]) * + np.arange(self._tensor_slice_len)[:, np.newaxis], + np.array(multiplier) * np.arange(self._tensor_slice_len)) + + def _map_fn(x, y, z): + return math_ops.square(x), math_ops.square(y), math_ops.square(z) + + return (dataset_ops.Dataset.from_tensor_slices(components).map(_map_fn) + .repeat(self._num_epochs)) + + def _build_graph(self, multiplier=37.0, build_saveable=True): + ds = self._build_ds(multiplier) + iterator = ds.make_initializable_iterator() + + if build_saveable: + saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator) + ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable) + init_op = iterator.initializer + get_next = iterator.get_next() + self._add_iterator_ops_to_collection(init_op, get_next) + saver = saver_lib.Saver(allow_empty=True) + return init_op, get_next, saver + + def _build_empty_graph(self, output_types, output_shapes): + iterator = iterator_ops.Iterator.from_structure(output_types, output_shapes) + saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator) + ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable) + saver = saver_lib.Saver() + get_next = iterator.get_next() + return get_next, saver + + def _add_iterator_ops_to_collection(self, init_op, get_next): + ops.add_to_collection("iterator_ops", init_op) + ops.add_to_collection("iterator_ops", get_next[0]) + ops.add_to_collection("iterator_ops", get_next[1]) + ops.add_to_collection("iterator_ops", get_next[2]) + + def _get_iterator_ops_from_collection(self): + init_op, get_next_1, get_next_2, get_next_3 = ops.get_collection( + "iterator_ops") + return init_op, (get_next_1, get_next_2, get_next_3) + + def _ckpt_path(self): + return os.path.join(self.get_temp_dir(), "iterator") + + def _latest_ckpt(self): + return saver_lib.latest_checkpoint(self.get_temp_dir()) + + def _save(self, sess, saver): + saver.save(sess, self._ckpt_path()) + + def _restore(self, saver, sess): + saver.restore(sess, self._latest_ckpt()) + + def _import_meta_graph(self): + meta_file_path = self._ckpt_path() + ".meta" + return saver_lib.import_meta_graph(meta_file_path) + + def _testReadWithBreaks(self, break_points, init_before_restore=False): + expected = [] + actual = [] + # Generate the ground truth. + with ops.Graph().as_default() as g: + init_op, get_next_op, _ = self._build_graph() + with self.test_session(graph=g) as sess: + sess.run(init_op) + for _ in range(self._num_outputs): + expected.append(sess.run(get_next_op)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + # Run and checkpoint after first break_point. + with ops.Graph().as_default() as g: + init_op, get_next_op, saver = self._build_graph() + with self.test_session(graph=g) as sess: + sess.run(init_op) + for _ in range(break_points[0]): + actual.append(sess.run(get_next_op)) + self._save(sess, saver) + + # Load from checkpoint and continue running while stopping at each + # subsequent checkpoint. + for i in range(len(break_points)): + with ops.Graph().as_default() as g: + saver = self._import_meta_graph() + init_op, get_next_op = self._get_iterator_ops_from_collection() + with self.test_session(graph=g) as sess: + if init_before_restore: + sess.run(init_op) + self._restore(saver, sess) + start = break_points[i] + end = break_points[ + i + 1] if i < len(break_points) - 1 else self._num_outputs + for _ in range(end - start): + actual.append(sess.run(get_next_op)) + self._save(sess, saver) + if end == self._num_outputs: + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + self._match(expected, actual) + + def _match(self, expected, actual): + self.assertEqual(len(expected), len(actual)) + for expected_tuple, actual_tuple in zip(expected, actual): + self.assertEqual(expected_tuple[0], actual_tuple[0]) + self.assertSequenceEqual(expected_tuple[1].tolist(), + actual_tuple[1].tolist()) + self.assertEqual(expected_tuple[2], actual_tuple[2]) + + def _does_not_match(self, expected, actual): + with self.assertRaises(AssertionError): + self._match(expected, actual) + + def testSaveRestore(self): + self._testReadWithBreaks([4]) + self._testReadWithBreaks([13]) + self._testReadWithBreaks([18]) + self._testReadWithBreaks([23]) + + def testSaveUnusedIterator(self): + self._testReadWithBreaks([0]) + + def testSaveFullyUsedIterator(self): + self._testReadWithBreaks([self._num_outputs]) + + def testMultipleBreaks(self): + self._testReadWithBreaks([0, 5, 9, 15, 25, 32]) + + def testIdempotence(self): + # Attempt to save iterator immediately after restoring. + self._testReadWithBreaks([1, 1, 5, 5, 5, 25, 32]) + + def testInitThenRestore(self): + self._testReadWithBreaks([0, 5, 9, 15, 25, 32], init_before_restore=True) + + def testRestoreExhaustedIterator(self): + with ops.Graph().as_default() as g: + init_op, get_next_op, saver = self._build_graph() + with self.test_session(graph=g) as sess: + sess.run(init_op) + for _ in range(self._num_outputs): + sess.run(get_next_op) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + self._save(sess, saver) + + with ops.Graph().as_default() as g: + saver = self._import_meta_graph() + init_op, get_next_op = self._get_iterator_ops_from_collection() + with self.test_session(graph=g) as sess: + self._restore(saver, sess) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + def testResetRestoredIterator(self): + expected = [] + # Collect ground truth containing all outputs. + with ops.Graph().as_default() as g: + init_op, get_next_op, saver = self._build_graph() + break_point = self._num_outputs // 2 + with self.test_session(graph=g) as sess: + sess.run(init_op) + for _ in range(break_point): + expected.append(sess.run(get_next_op)) + self._save(sess, saver) + for _ in range(self._num_outputs - break_point): + expected.append(sess.run(get_next_op)) + + actual = [] + # Restore from checkpoint and then run init_op. + with ops.Graph().as_default() as g: + saver = self._import_meta_graph() + init_op, get_next_op = self._get_iterator_ops_from_collection() + with self.test_session(graph=g) as sess: + self._restore(saver, sess) + sess.run(init_op) + for _ in range(self._num_outputs): + actual.append(sess.run(get_next_op)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + self._match(expected, actual) + + def testRestoreInModifiedGraph(self): + expected = [] + actual_without_restore = [] + actual = [] + break_point = 10 + with ops.Graph().as_default() as g: + init_op, get_next_op, saver = self._build_graph(multiplier=15.0) + with self.test_session(graph=g) as sess: + sess.run(init_op) + for _ in range(break_point): + expected.append(sess.run(get_next_op)) + actual.extend(expected) + self._save(sess, saver) + for _ in range(self._num_outputs - break_point): + expected.append(sess.run(get_next_op)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + # Collect outputs by running modified graph. + with ops.Graph().as_default() as g: + init_op, get_next_op, saver = self._build_graph(multiplier=30.0) + with self.test_session(graph=g) as sess: + sess.run(init_op) + for _ in range(self._num_outputs): + actual_without_restore.append(sess.run(get_next_op)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + # Restore the checkpoint in the modified graph. + with ops.Graph().as_default() as g: + init_op, get_next_op, saver = self._build_graph(multiplier=30.0) + with self.test_session(graph=g) as sess: + self._restore(saver, sess) + for _ in range(self._num_outputs - break_point): + actual.append(sess.run(get_next_op)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + # Ensure the modified graph gets overridden when restoring checkpoint. + self._does_not_match(expected, actual_without_restore) + # Expect that the outputs are what we would expect if we ran the old + # graph. + self._match(expected, actual) + + # TODO(srbs): Add this test to dataset_serialization_test_base.py. + def testRestoreInEmptyGraph(self): + expected = [] + actual = [] + break_point = 10 + with ops.Graph().as_default() as g: + init_op, get_next_op, saver = self._build_graph(multiplier=15.0) + with self.test_session(graph=g) as sess: + sess.run(init_op) + for _ in range(break_point): + sess.run(get_next_op) + self._save(sess, saver) + for _ in range(self._num_outputs - break_point): + expected.append(sess.run(get_next_op)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + with ops.Graph().as_default() as g: + ds = self._build_ds() + output_types = ds.output_types + output_shapes = ds.output_shapes + + with ops.Graph().as_default() as g: + get_next_op, saver = self._build_empty_graph(output_types, output_shapes) + with self.test_session(graph=g) as sess: + self._restore(saver, sess) + for _ in range(self._num_outputs - break_point): + actual.append(sess.run(get_next_op)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + # Expect that the outputs are what we would expect if we ran the old + # graph. + self._match(expected, actual) + + def testDoNotBuildSaveable(self): + break_point = 10 + with ops.Graph().as_default() as g: + init_op, get_next_op, saver = self._build_graph(multiplier=15.0) + with self.test_session(graph=g) as sess: + sess.run(init_op) + for _ in range(break_point): + sess.run(get_next_op) + self._save(sess, saver) + + expected = [] + # Collect ground truth by running modified graph. + with ops.Graph().as_default() as g: + init_op, get_next_op, saver = self._build_graph(multiplier=30.0) + with self.test_session(graph=g) as sess: + sess.run(init_op) + for _ in range(self._num_outputs): + expected.append(sess.run(get_next_op)) + + actual = [] + with ops.Graph().as_default() as g: + init_op, get_next_op, saver = self._build_graph( + multiplier=30.0, build_saveable=False) + with self.test_session(graph=g) as sess: + # Since the SaveableObject was not added to Saver's list + # of saveables, iterator state is not restored by saver.restore(). + self._restore(saver, sess) + with self.assertRaises(errors.FailedPreconditionError): + sess.run(get_next_op) + sess.run(init_op) + for _ in range(self._num_outputs): + actual.append(sess.run(get_next_op)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + self._match(expected, actual) + + def testSaveStatefulFunction(self): + + def _build_ds(): + + def _map_fn(x): + return random_ops.random_uniform( + (), 0, 10, dtype=dtypes.int32) * math_ops.to_int32(x) + + return dataset_ops.Dataset.range(100).map(_map_fn) + + def _build_graph(): + ds = _build_ds() + iterator = ds.make_initializable_iterator() + + saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator) + ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable) + init_op = iterator.initializer + get_next = iterator.get_next() + saver = saver_lib.Saver(allow_empty=True) + return init_op, get_next, saver + + break_point = 10 + with ops.Graph().as_default() as g: + init_op, get_next_op, saver = _build_graph() + with self.test_session(graph=g) as sess: + sess.run(init_op) + for _ in range(break_point): + sess.run(get_next_op) + with self.assertRaises(errors.InvalidArgumentError): + self._save(sess, saver) + + def testCaptureVariableInMapFn(self): + + def _build_ds(): + counter_var = variable_scope.get_variable( + "counter", (), dtypes.int32, use_resource=True) + return (dataset_ops.Dataset.from_tensors(0).repeat(10).map( + lambda _: counter_var.assign_add(1))) + + def _build_graph(): + ds = _build_ds() + iterator = ds.make_initializable_iterator() + + saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator) + ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable) + init_op = iterator.initializer + get_next = iterator.get_next() + saver = saver_lib.Saver(allow_empty=True) + return init_op, get_next, saver + + break_point = 10 + with ops.Graph().as_default() as g: + init_op, get_next_op, saver = _build_graph() + with self.test_session(graph=g) as sess: + sess.run(variables.global_variables_initializer()) + sess.run(init_op) + for _ in range(break_point): + sess.run(get_next_op) + with self.assertRaises(errors.InvalidArgumentError): + self._save(sess, saver) + + def testCaptureDefunInMapFn(self): + num_outputs = 100 + + def _build_ds(): + + @function.Defun(dtypes.int64) + def defun_fn(x): + return constant_op.constant(1000) + math_ops.to_int32(x) + + return dataset_ops.Dataset.range(num_outputs).map(defun_fn) + + def _build_graph(): + ds = _build_ds() + iterator = ds.make_initializable_iterator() + + saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator) + ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable) + init_op = iterator.initializer + get_next = iterator.get_next() + saver = saver_lib.Saver(allow_empty=True) + return init_op, get_next, saver + + break_point = 10 + expected = [] + with ops.Graph().as_default() as g: + init_op, get_next_op, saver = _build_graph() + with self.test_session(graph=g) as sess: + sess.run(variables.global_variables_initializer()) + sess.run(init_op) + for _ in range(break_point): + sess.run(get_next_op) + self._save(sess, saver) + for _ in range(num_outputs - break_point): + expected.append(sess.run(get_next_op)) + + with ops.Graph().as_default() as g: + ds = _build_ds() + output_types = ds.output_types + output_shapes = ds.output_shapes + + actual = [] + with ops.Graph().as_default() as g: + get_next_op, saver = self._build_empty_graph(output_types, output_shapes) + with self.test_session(graph=g) as sess: + self._restore(saver, sess) + for _ in range(num_outputs - break_point): + actual.append(sess.run(get_next_op)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + self.assertSequenceEqual(expected, actual) + + def testBuildDefunInMapFn(self): + num_outputs = 100 + + def _build_ds(): + + @function.Defun(dtypes.int64) + def defun_fn(x): + + @function.Defun(dtypes.int32) + def defun_fn_deep(x): + return constant_op.constant(1000) + math_ops.to_int32(x) + + return constant_op.constant(11000) + defun_fn_deep(math_ops.to_int32(x)) + + return dataset_ops.Dataset.range(num_outputs).map(defun_fn) + + def _build_graph(): + ds = _build_ds() + iterator = ds.make_initializable_iterator() + + saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator) + ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable) + init_op = iterator.initializer + get_next = iterator.get_next() + saver = saver_lib.Saver(allow_empty=True) + return init_op, get_next, saver + + break_point = 10 + expected = [] + with ops.Graph().as_default() as g: + init_op, get_next_op, saver = _build_graph() + with self.test_session(graph=g) as sess: + sess.run(variables.global_variables_initializer()) + sess.run(init_op) + for _ in range(break_point): + sess.run(get_next_op) + self._save(sess, saver) + for _ in range(num_outputs - break_point): + expected.append(sess.run(get_next_op)) + + with ops.Graph().as_default() as g: + ds = _build_ds() + output_types = ds.output_types + output_shapes = ds.output_shapes + + actual = [] + with ops.Graph().as_default() as g: + get_next_op, saver = self._build_empty_graph(output_types, output_shapes) + with self.test_session(graph=g) as sess: + self._restore(saver, sess) + for _ in range(num_outputs - break_point): + actual.append(sess.run(get_next_op)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next_op) + + self.assertSequenceEqual(expected, actual) + if __name__ == "__main__": test.main() diff --git a/tensorflow/core/graph/graph_def_builder.h b/tensorflow/core/graph/graph_def_builder.h index 4d9fe1dee9..b389cd8053 100644 --- a/tensorflow/core/graph/graph_def_builder.h +++ b/tensorflow/core/graph/graph_def_builder.h @@ -165,6 +165,20 @@ class GraphDefBuilder { // by name), and makes sure the resulting graph is valid. Status ToGraph(Graph* graph) const; + // Adds the function and gradient definitions in `fdef_lib` to this graph's op + // registry. Ignores duplicate functions, and returns a bad status if an + // imported function differs from an existing function or op with the same + // name. + Status AddFunctionLibrary(const FunctionDefLibrary& fdef_lib) { + return graph_.AddFunctionLibrary(fdef_lib); + } + + // Returns whether a user-defined function with `name` already exists in the + // graph. + bool HasFunction(const string& name) { + return graph_.flib_def().Find(name) != nullptr; + } + private: Graph graph_; Status status_; diff --git a/tensorflow/core/kernels/batch_dataset_op.cc b/tensorflow/core/kernels/batch_dataset_op.cc index 6a5fd17a9e..46412a554b 100644 --- a/tensorflow/core/kernels/batch_dataset_op.cc +++ b/tensorflow/core/kernels/batch_dataset_op.cc @@ -80,10 +80,10 @@ class BatchDatasetOp : public UnaryDatasetOpKernel { } protected: - Status AsGraphDefInternal(DatasetGraphDefBuilder* b, + Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, Node** output) const override { Node* input_graph_node = nullptr; - TF_RETURN_IF_ERROR(b->AddParentDataset(input_, &input_graph_node)); + TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node)); Node* batch_size = nullptr; TF_RETURN_IF_ERROR(b->AddScalar(batch_size_, &batch_size)); TF_RETURN_IF_ERROR( diff --git a/tensorflow/core/kernels/captured_function.h b/tensorflow/core/kernels/captured_function.h index 55d337d707..9430127600 100644 --- a/tensorflow/core/kernels/captured_function.h +++ b/tensorflow/core/kernels/captured_function.h @@ -71,6 +71,8 @@ class CapturedFunction { ResourceMgr* resource_manager() const { return device_->resource_manager(); } + const std::vector& captured_inputs() { return captured_inputs_; } + static int64 generate_step_id() { // Choose a step ID that is guaranteed not to clash with any // Session-generated step ID. DirectSession only generates diff --git a/tensorflow/core/kernels/concatenate_dataset_op.cc b/tensorflow/core/kernels/concatenate_dataset_op.cc index c3bd89c479..ad78ba0186 100644 --- a/tensorflow/core/kernels/concatenate_dataset_op.cc +++ b/tensorflow/core/kernels/concatenate_dataset_op.cc @@ -79,13 +79,13 @@ class ConcatenateDatasetOp : public BinaryDatasetOpKernel { string DebugString() override { return "ConcatenateDatasetOp::Dataset"; } protected: - Status AsGraphDefInternal(DatasetGraphDefBuilder* b, + Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, Node** output) const override { Node* input_graph = nullptr; - TF_RETURN_IF_ERROR(b->AddParentDataset(input_, &input_graph)); + TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph)); Node* to_concatenate_graph = nullptr; TF_RETURN_IF_ERROR( - b->AddParentDataset(to_concatenate_, &to_concatenate_graph)); + b->AddParentDataset(ctx, to_concatenate_, &to_concatenate_graph)); TF_RETURN_IF_ERROR( b->AddDataset(this, {input_graph, to_concatenate_graph}, output)); return Status::OK(); diff --git a/tensorflow/core/kernels/dataset.h b/tensorflow/core/kernels/dataset.h index a90590fc7e..b9b0e5a7c6 100644 --- a/tensorflow/core/kernels/dataset.h +++ b/tensorflow/core/kernels/dataset.h @@ -137,6 +137,23 @@ class GraphDefBuilderWrapper { const std::vector& inputs, const std::vector>& attrs, Node** output) { + std::vector> enumerated_inputs( + inputs.size()); + for (int i = 0; i < inputs.size(); i++) { + enumerated_inputs[i] = std::make_pair(i, inputs[i]); + } + return AddDataset(dataset, enumerated_inputs, {}, attrs, output); + } + + template + Status AddDataset( + const DatasetType* dataset, + const std::vector>& inputs, + const std::vector< + std::pair>>& + list_inputs, + const std::vector>& attrs, + Node** output) { const string& op_type_name = dataset->op_name(); std::unique_ptr opts( new GraphDefBuilder::Options(b_->opts())); @@ -161,8 +178,22 @@ class GraphDefBuilderWrapper { } NodeBuilder node_builder(opts->GetNameForOp(op_type_name), op_type_name, opts->op_registry()); - for (auto node_out : inputs) { - node_builder.Input(node_out); + { + size_t total_size = inputs.size() + list_inputs.size(); + auto inputs_iter = inputs.begin(); + auto list_inputs_iter = list_inputs.begin(); + for (int i = 0; i < total_size; i++) { + if (inputs_iter != inputs.end() && inputs_iter->first == i) { + node_builder.Input(inputs_iter->second); + inputs_iter++; + } else if (list_inputs_iter != list_inputs.end() && + list_inputs_iter->first == i) { + node_builder.Input(list_inputs_iter->second); + list_inputs_iter++; + } else { + return errors::InvalidArgument("No input found for index ", i); + } + } } *output = opts->FinalizeBuilder(&node_builder); if (*output == nullptr) { @@ -172,35 +203,56 @@ class GraphDefBuilderWrapper { return Status::OK(); } - // TODO(shivaniagrawal): Single method for AddDataset for - // NodeOut/ArrraySlice - template - Status AddDatasetWithInputAsList(const DatasetType* dataset, - gtl::ArraySlice input, - Node** output) { - const string& op_type_name = dataset->op_name(); - std::unique_ptr opts( - new GraphDefBuilder::Options(b_->opts())); - bool has_output_types_attr = HasAttr(op_type_name, "output_types"); - bool has_output_shapes_attr = HasAttr(op_type_name, "output_shapes"); - if (has_output_shapes_attr) { - opts.reset(new GraphDefBuilder::Options( - opts->WithAttr("output_shapes", dataset->output_shapes()))); + // Adds a user-defined function with name `function_name` to the graph and + // recursively adds all functions it references. If a function with a matching + // name has already been added, returns with OK status. If a user-defined with + // name `function_name` is not found in the FunctionLibraryDefinition, returns + // and InvalidArgumentError. If the function with name `function_name` or any + // of its dependent functions are stateful, returns an InvalidArgument error. + Status AddFunction(OpKernelContext* ctx, const string& function_name) { + if (b_->HasFunction(function_name)) { + LOG(INFO) << "Function with name " << function_name << "already exists in" + << " the graph. It will not be added again."; + return Status::OK(); } - if (has_output_types_attr) { - opts.reset(new GraphDefBuilder::Options( - opts->WithAttr("output_types", dataset->output_dtypes()))); + TF_RETURN_IF_ERROR(EnsureFunctionIsStateless(ctx, function_name)); + const FunctionLibraryDefinition* flib_def = + ctx->function_library()->GetFunctionLibraryDefinition(); + const FunctionDef* f_def = flib_def->Find(function_name); + if (f_def == nullptr) { + return errors::InvalidArgument("Unable to find FunctionDef for ", + function_name, " in the registry."); } - if (opts->HaveError()) { - return errors::Internal("AddDataset: Error building Options."); + FunctionDefLibrary def; + *def.add_function() = *f_def; + const string gradient_func = flib_def->FindGradient(function_name); + if (!gradient_func.empty()) { + GradientDef* g_def = def.add_gradient(); + g_def->set_function_name(function_name); + g_def->set_gradient_func(gradient_func); } - NodeBuilder node_builder(opts->GetNameForOp(op_type_name), op_type_name, - opts->op_registry()); - node_builder.Input(input); - *output = opts->FinalizeBuilder(&node_builder); - if (*output == nullptr) { - return errors::Internal("AddDataset: Failed to build ", op_type_name, - " op."); + TF_RETURN_IF_ERROR(b_->AddFunctionLibrary(def)); + + // Recursively add functions in inputs of function_name. + for (const NodeDef& node_def : f_def->node_def()) { + const OpRegistrationData* op_reg_data = nullptr; + TF_RETURN_IF_ERROR(flib_def->LookUp(node_def.op(), &op_reg_data)); + if (op_reg_data->is_function_op) { + TF_RETURN_IF_ERROR(AddFunction(ctx, op_reg_data->op_def.name())); + } + } + + // Recursively add functions in attrs of function_name. + for (auto iter = f_def->attr().begin(); iter != f_def->attr().end(); + iter++) { + const AttrValue& attr_value = iter->second; + if (attr_value.has_func()) { + TF_RETURN_IF_ERROR(AddFunction(ctx, attr_value.func().name())); + } else if (attr_value.has_list()) { + for (const NameAttrList& name_attr_list : attr_value.list().func()) { + TF_RETURN_IF_ERROR(AddFunction(ctx, name_attr_list.name())); + } + } } return Status::OK(); } @@ -217,6 +269,28 @@ class GraphDefBuilderWrapper { b_->opts().WithAttr("dtype", val.dtype()).WithAttr("value", val)); } + Status EnsureFunctionIsStateless(OpKernelContext* ctx, + const string& function_name) const { + const FunctionLibraryDefinition* lib_def = + ctx->function_library()->GetFunctionLibraryDefinition(); + const FunctionDef* function_def = lib_def->Find(function_name); + if (!function_def) { + return errors::InvalidArgument("Unable to find FunctionDef for ", + function_name, " in registry."); + } + for (const NodeDef& node_def : function_def->node_def()) { + const OpDef* op_def; + TF_RETURN_IF_ERROR(lib_def->LookUpOpDef(node_def.op(), &op_def)); + if (op_def->is_stateful()) { + return errors::InvalidArgument( + "Op[name: ", node_def.name(), ", type: ", node_def.op(), "] ", + "in function ", function_name, " is stateful. ", + "Saving stateful functions is not supported yet."); + } + } + return Status::OK(); + } + bool HasAttr(const string& op_type_name, const string& attr_name) { const OpDef* op_def = nullptr; Status s = b_->opts().op_registry()->LookUpOpDef(op_type_name, &op_def); @@ -306,7 +380,7 @@ class IteratorBase { virtual const std::vector& output_shapes() const = 0; // Saves the state of this iterator. - virtual Status Save(IteratorStateWriter* writer) { + virtual Status Save(OpKernelContext* ctx, IteratorStateWriter* writer) { return SaveInternal(writer); } @@ -377,7 +451,7 @@ class DatasetBase : public core::RefCounted { virtual string DebugString() = 0; // Serializes the dataset and writes it to the `writer`. - virtual Status Save(IteratorStateWriter* writer) const { + virtual Status Save(OpKernelContext* ctx, IteratorStateWriter* writer) const { return errors::Unimplemented("DatasetBase::Save"); } @@ -389,11 +463,18 @@ class DatasetBase : public core::RefCounted { class DatasetGraphDefBuilder : public GraphDefBuilderWrapper { public: DatasetGraphDefBuilder(GraphDefBuilder* b) : GraphDefBuilderWrapper(b) {} - Status AddParentDataset(const DatasetBase* dataset, Node** output) { - return dataset->AsGraphDefInternal(this, output); + Status AddParentDataset(OpKernelContext* ctx, const DatasetBase* dataset, + Node** output) { + return dataset->AsGraphDefInternal(ctx, this, output); } }; + virtual Status AsGraphDefInternal(OpKernelContext* ctx, + DatasetGraphDefBuilder* b, + Node** node) const { + return AsGraphDefInternal(b, node); + } + virtual Status AsGraphDefInternal(DatasetGraphDefBuilder* b, Node** node) const { return errors::Unimplemented("AsGraphDefInternal"); @@ -408,10 +489,11 @@ class GraphDatasetBase : public DatasetBase { const string op_name() const { return op_name_; } - Status Save(IteratorStateWriter* writer) const override { + Status Save(OpKernelContext* ctx, + IteratorStateWriter* writer) const override { string serialized_graph_def; string output_node; - TF_RETURN_IF_ERROR(Serialize(&serialized_graph_def, &output_node)); + TF_RETURN_IF_ERROR(Serialize(ctx, &serialized_graph_def, &output_node)); TF_RETURN_IF_ERROR( writer->WriteScalar(kDatasetGraphKey, serialized_graph_def)); TF_RETURN_IF_ERROR( @@ -427,11 +509,12 @@ class GraphDatasetBase : public DatasetBase { static const char kDatasetGraphOutputNodeKey[]; private: - Status Serialize(string* serialized_graph_def, string* output_node) const { + Status Serialize(OpKernelContext* ctx, string* serialized_graph_def, + string* output_node) const { GraphDefBuilder b; DatasetGraphDefBuilder db(&b); Node* node = nullptr; - TF_RETURN_IF_ERROR(AsGraphDefInternal(&db, &node)); + TF_RETURN_IF_ERROR(AsGraphDefInternal(ctx, &db, &node)); *output_node = node->name(); GraphDef graph_def; TF_RETURN_IF_ERROR(b.ToGraphDef(&graph_def)); @@ -480,9 +563,9 @@ class DatasetIterator : public IteratorBase { return GetNextInternal(ctx, out_tensors, end_of_sequence); } - Status Save(IteratorStateWriter* writer) final { - TF_RETURN_IF_ERROR(dataset()->Save(writer)); - return IteratorBase::Save(writer); + Status Save(OpKernelContext* ctx, IteratorStateWriter* writer) final { + TF_RETURN_IF_ERROR(dataset()->Save(ctx, writer)); + return IteratorBase::Save(ctx, writer); } protected: diff --git a/tensorflow/core/kernels/iterator_ops.cc b/tensorflow/core/kernels/iterator_ops.cc index ae77ae6433..b48da5b326 100644 --- a/tensorflow/core/kernels/iterator_ops.cc +++ b/tensorflow/core/kernels/iterator_ops.cc @@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/dataset.h" - #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/graph_runner.h" #include "tensorflow/core/framework/iterator.pb.h" @@ -22,6 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/variant_op_registry.h" #include "tensorflow/core/graph/graph_constructor.h" +#include "tensorflow/core/kernels/dataset.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/cleanup.h" @@ -79,10 +78,12 @@ Status VerifyShapesCompatible(const std::vector& expected, class IteratorResource : public ResourceBase { public: IteratorResource(const DataTypeVector& output_dtypes, - const std::vector& output_shapes) + const std::vector& output_shapes, + const int graph_def_version) : iterator_(nullptr), output_dtypes_(output_dtypes), - output_shapes_(output_shapes) {} + output_shapes_(output_shapes), + graph_def_version_(graph_def_version) {} Status GetNext(IteratorContext* ctx, std::vector* out_tensors, bool* end_of_sequence) { @@ -97,10 +98,10 @@ class IteratorResource : public ResourceBase { } } - Status Save(IteratorStateWriter* writer) { + Status Save(OpKernelContext* ctx, IteratorStateWriter* writer) { std::shared_ptr captured_iterator(iterator_); if (captured_iterator) { - return captured_iterator->Save(writer); + return captured_iterator->Save(ctx, writer); } else { return errors::FailedPrecondition( "Save() failed because the iterator has not been initialized. " @@ -125,8 +126,21 @@ class IteratorResource : public ResourceBase { TF_RETURN_IF_ERROR(ImportGraphDef({}, graph_def, &graph, nullptr)); std::vector outputs; GraphRunner graph_runner(ctx->env()); - TF_RETURN_IF_ERROR(graph_runner.Run(&graph, ctx->function_library(), {}, - {output_node}, &outputs)); + + // Build a new FLR that knows about the functions in the graph. + std::unique_ptr flib_def( + new FunctionLibraryDefinition( + *ctx->function_library()->GetFunctionLibraryDefinition())); + TF_RETURN_IF_ERROR(flib_def->AddLibrary(graph_def.library())); + std::unique_ptr pflr( + new ProcessFunctionLibraryRuntime(nullptr, ctx->env(), + graph_def_version_, flib_def.get(), + {}, nullptr)); + FunctionLibraryRuntime* lib = + pflr->GetFLR(ProcessFunctionLibraryRuntime::kDefaultFLRDevice); + + TF_RETURN_IF_ERROR( + graph_runner.Run(&graph, lib, {}, {output_node}, &outputs)); TF_RETURN_IF_ERROR(GetDatasetFromVariantTensor(outputs[0], &dataset)); TF_RETURN_IF_ERROR(set_iterator(dataset->MakeIterator("Iterator"))); @@ -166,6 +180,7 @@ class IteratorResource : public ResourceBase { std::shared_ptr iterator_; const DataTypeVector output_dtypes_; const std::vector output_shapes_; + const int graph_def_version_; }; // Helper class for reading data from a VariantTensorData object. @@ -319,11 +334,12 @@ class IteratorStateVariant { } // Initializes this object with the current state of the iterator so // that it can be written on the next call to Encode(). - Status InitializeFromIterator(IteratorResource* iterator_resource) { + Status InitializeFromIterator(OpKernelContext* ctx, + IteratorResource* iterator_resource) { data_.reset(new VariantTensorData()); data_->set_type_name(TypeName()); VariantTensorDataWriter writer(data_.get()); - TF_RETURN_IF_ERROR(iterator_resource->Save(&writer)); + TF_RETURN_IF_ERROR(iterator_resource->Save(ctx, &writer)); TF_RETURN_IF_ERROR(writer.Flush()); return Status::OK(); } @@ -375,7 +391,8 @@ REGISTER_UNARY_VARIANT_DECODE_FUNCTION(IteratorStateVariant, class IteratorHandleOp : public ResourceOpKernel { public: explicit IteratorHandleOp(OpKernelConstruction* ctx) - : ResourceOpKernel(ctx) { + : ResourceOpKernel(ctx), + graph_def_version_(ctx->graph_def_version()) { OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_dtypes_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_)); } @@ -383,7 +400,8 @@ class IteratorHandleOp : public ResourceOpKernel { private: Status CreateResource(IteratorResource** ret) override EXCLUSIVE_LOCKS_REQUIRED(mu_) { - *ret = new IteratorResource(output_dtypes_, output_shapes_); + *ret = new IteratorResource(output_dtypes_, output_shapes_, + graph_def_version_); return Status::OK(); } @@ -398,6 +416,7 @@ class IteratorHandleOp : public ResourceOpKernel { private: DataTypeVector output_dtypes_; std::vector output_shapes_; + const int graph_def_version_; }; class MakeIteratorOp : public OpKernel { @@ -460,7 +479,8 @@ class OneShotIteratorOp : public AsyncOpKernel { ctx->env(), ThreadOptions(), strings::StrCat("one_shot_iterator_initialization_thread_", SanitizeThreadSuffix(name())), - 1 /* num_threads */, false /* low_latency_hint */)) + 1 /* num_threads */, false /* low_latency_hint */)), + graph_def_version_(ctx->graph_def_version()) { string shared_name; @@ -544,7 +564,8 @@ class OneShotIteratorOp : public AsyncOpKernel { ctx->resource_manager()->LookupOrCreate( cinfo->container(), cinfo->name(), iterator, [this](IteratorResource** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) { - *ret = new IteratorResource(output_dtypes_, output_shapes_); + *ret = new IteratorResource(output_dtypes_, output_shapes_, + graph_def_version_); return Status::OK(); })); @@ -634,6 +655,7 @@ class OneShotIteratorOp : public AsyncOpKernel { Status initialization_status_ GUARDED_BY(mu_); std::vector> done_callbacks_ GUARDED_BY(mu_); + const int graph_def_version_; }; class IteratorGetNextOp : public AsyncOpKernel { @@ -787,7 +809,7 @@ class SerializeIteratorOp : public OpKernel { Tensor* variant_t; OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &variant_t)); IteratorStateVariant v; - OP_REQUIRES_OK(ctx, v.InitializeFromIterator(iterator_resource)); + OP_REQUIRES_OK(ctx, v.InitializeFromIterator(ctx, iterator_resource)); variant_t->scalar()() = v; } }; diff --git a/tensorflow/core/kernels/map_dataset_op.cc b/tensorflow/core/kernels/map_dataset_op.cc index ac458701fe..4ba09bc335 100644 --- a/tensorflow/core/kernels/map_dataset_op.cc +++ b/tensorflow/core/kernels/map_dataset_op.cc @@ -53,18 +53,21 @@ class MapDatasetOp : public UnaryDatasetOpKernel { std::move(other_arguments), &captured_func)); - *output = new Dataset(input, std::move(captured_func), output_types_, - output_shapes_); + *output = new Dataset(ctx, input, func_, std::move(captured_func), + output_types_, output_shapes_); } private: - class Dataset : public DatasetBase { + class Dataset : public GraphDatasetBase { public: - Dataset(const DatasetBase* input, + Dataset(OpKernelContext* ctx, const DatasetBase* input, + const NameAttrList& func, std::unique_ptr captured_func, const DataTypeVector& output_types, const std::vector& output_shapes) - : input_(input), + : GraphDatasetBase(ctx), + input_(input), + func_(func), captured_func_(std::move(captured_func)), output_types_(output_types), output_shapes_(output_shapes) { @@ -88,6 +91,37 @@ class MapDatasetOp : public UnaryDatasetOpKernel { string DebugString() override { return "MapDatasetOp::Dataset"; } + protected: + Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, + Node** output) const override { + TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name())); + Node* input_graph_node = nullptr; + TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node)); + + DataTypeVector other_arguments_types( + captured_func_->captured_inputs().size()); + std::vector other_arguments( + captured_func_->captured_inputs().size()); + for (const Tensor& t : captured_func_->captured_inputs()) { + Node* node; + TF_RETURN_IF_ERROR(b->AddTensor(t, &node)); + other_arguments.emplace_back(node); + other_arguments_types.emplace_back(t.dtype()); + } + AttrValue f; + b->BuildAttrValue(func_, &f); + AttrValue other_arguments_types_attr; + b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr); + + TF_RETURN_IF_ERROR(b->AddDataset( + this, {std::make_pair(0, input_graph_node)}, // Single tensor inputs. + {std::make_pair(1, other_arguments)}, // Tensor list inputs. + {std::make_pair("f", f), + std::make_pair("Targuments", other_arguments_types_attr)}, // Attrs + output)); + return Status::OK(); + } + private: class Iterator : public DatasetIterator { public: @@ -133,11 +167,24 @@ class MapDatasetOp : public UnaryDatasetOpKernel { } } + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + return Status::OK(); + } + private: const std::unique_ptr input_impl_; }; const DatasetBase* const input_; + const NameAttrList func_; const std::unique_ptr captured_func_; const DataTypeVector output_types_; const std::vector output_shapes_; diff --git a/tensorflow/core/kernels/repeat_dataset_op.cc b/tensorflow/core/kernels/repeat_dataset_op.cc index 0167b9ea64..3d977a0fa3 100644 --- a/tensorflow/core/kernels/repeat_dataset_op.cc +++ b/tensorflow/core/kernels/repeat_dataset_op.cc @@ -73,10 +73,10 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { string DebugString() override { return "RepeatDatasetOp::Dataset"; } protected: - Status AsGraphDefInternal(DatasetGraphDefBuilder* b, + Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, Node** output) const override { Node* input_graph_node = nullptr; - TF_RETURN_IF_ERROR(b->AddParentDataset(input_, &input_graph_node)); + TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node)); Node* count = nullptr; TF_RETURN_IF_ERROR(b->AddScalar(count_, &count)); TF_RETURN_IF_ERROR( diff --git a/tensorflow/core/kernels/shuffle_dataset_op.cc b/tensorflow/core/kernels/shuffle_dataset_op.cc index dd0ab57e9d..72facb3a0d 100644 --- a/tensorflow/core/kernels/shuffle_dataset_op.cc +++ b/tensorflow/core/kernels/shuffle_dataset_op.cc @@ -308,10 +308,10 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel { } protected: - Status AsGraphDefInternal(DatasetGraphDefBuilder* b, + Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, Node** output) const override { Node* input_graph_node = nullptr; - TF_RETURN_IF_ERROR(b->AddParentDataset(input_, &input_graph_node)); + TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node)); Node* buffer_size = nullptr; Node* seed = nullptr; Node* seed2 = nullptr; diff --git a/tensorflow/core/kernels/skip_dataset_op.cc b/tensorflow/core/kernels/skip_dataset_op.cc index 7ee945dd4c..1fe49271e2 100644 --- a/tensorflow/core/kernels/skip_dataset_op.cc +++ b/tensorflow/core/kernels/skip_dataset_op.cc @@ -72,10 +72,10 @@ class SkipDatasetOp : public UnaryDatasetOpKernel { string DebugString() override { return "SkipDatasetOp::Dataset"; } protected: - Status AsGraphDefInternal(DatasetGraphDefBuilder* b, + Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, Node** output) const override { Node* input_graph_node = nullptr; - TF_RETURN_IF_ERROR(b->AddParentDataset(input_, &input_graph_node)); + TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node)); Node* count = nullptr; TF_RETURN_IF_ERROR(b->AddScalar(count_, &count)); TF_RETURN_IF_ERROR( diff --git a/tensorflow/core/kernels/take_dataset_op.cc b/tensorflow/core/kernels/take_dataset_op.cc index fb294a96b1..7a6d20d6c7 100644 --- a/tensorflow/core/kernels/take_dataset_op.cc +++ b/tensorflow/core/kernels/take_dataset_op.cc @@ -73,10 +73,10 @@ class TakeDatasetOp : public UnaryDatasetOpKernel { string DebugString() override { return "TakeDatasetOp::Dataset"; } protected: - Status AsGraphDefInternal(DatasetGraphDefBuilder* b, + Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, Node** output) const override { Node* input_graph_node = nullptr; - TF_RETURN_IF_ERROR(b->AddParentDataset(input_, &input_graph_node)); + TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node)); Node* count = nullptr; TF_RETURN_IF_ERROR(b->AddScalar(count_, &count)); TF_RETURN_IF_ERROR( diff --git a/tensorflow/core/kernels/tensor_dataset_op.cc b/tensorflow/core/kernels/tensor_dataset_op.cc index db7c947328..1f69082031 100644 --- a/tensorflow/core/kernels/tensor_dataset_op.cc +++ b/tensorflow/core/kernels/tensor_dataset_op.cc @@ -78,7 +78,7 @@ class TensorDatasetOp : public DatasetOpKernel { components.emplace_back(node); } TF_RETURN_IF_ERROR( - b->AddDatasetWithInputAsList(this, components, output)); + b->AddDataset(this, {}, {std::make_pair(0, components)}, {}, output)); return Status::OK(); } diff --git a/tensorflow/core/kernels/tensor_slice_dataset_op.cc b/tensorflow/core/kernels/tensor_slice_dataset_op.cc index fd36bf524c..4d0cbdd67c 100644 --- a/tensorflow/core/kernels/tensor_slice_dataset_op.cc +++ b/tensorflow/core/kernels/tensor_slice_dataset_op.cc @@ -94,7 +94,7 @@ class TensorSliceDatasetOp : public DatasetOpKernel { components.emplace_back(node); } TF_RETURN_IF_ERROR( - b->AddDatasetWithInputAsList(this, components, output)); + b->AddDataset(this, {}, {std::make_pair(0, components)}, {}, output)); return Status::OK(); } diff --git a/tensorflow/core/kernels/zip_dataset_op.cc b/tensorflow/core/kernels/zip_dataset_op.cc index f466c8b268..96080863ea 100644 --- a/tensorflow/core/kernels/zip_dataset_op.cc +++ b/tensorflow/core/kernels/zip_dataset_op.cc @@ -78,17 +78,17 @@ class ZipDatasetOp : public DatasetOpKernel { string DebugString() override { return "ZipDatasetOp::Dataset"; } protected: - Status AsGraphDefInternal(DatasetGraphDefBuilder* b, + Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, Node** output) const override { std::vector input_graph_nodes; input_graph_nodes.reserve(inputs_.size()); for (const auto& input : inputs_) { Node* input_node; - TF_RETURN_IF_ERROR(b->AddParentDataset(input, &input_node)); + TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input, &input_node)); input_graph_nodes.emplace_back(input_node); } - TF_RETURN_IF_ERROR( - b->AddDatasetWithInputAsList(this, input_graph_nodes, output)); + TF_RETURN_IF_ERROR(b->AddDataset( + this, {}, {std::make_pair(0, input_graph_nodes)}, {}, output)); return Status::OK(); } -- GitLab From 2011c2011ae30c3a40801f0543969fa8f373156a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 05:56:59 -0800 Subject: [PATCH 0502/1801] Implements specifying default approximations for layer_collection. Currently, the default approximation to use for each layer is hard-coded as a default argument to each registration function. This CL instead specifies these default values as properties which the user can modify. Additionally, the user can identify groups of linked parameters that should always use a specified approximation when registered. This should make it easier for users to experiment with different approximations. PiperOrigin-RevId: 175955141 --- .../contrib/kfac/python/kernel_tests/BUILD | 2 + .../kernel_tests/layer_collection_test.py | 76 ++++++- .../kfac/python/ops/layer_collection.py | 199 +++++++++++++++--- 3 files changed, 244 insertions(+), 33 deletions(-) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/BUILD b/tensorflow/contrib/kfac/python/kernel_tests/BUILD index 60c245166d..7d65ac9a43 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/BUILD +++ b/tensorflow/contrib/kfac/python/kernel_tests/BUILD @@ -68,6 +68,7 @@ py_test( srcs = ["layer_collection_test.py"], srcs_version = "PY2AND3", deps = [ + "//tensorflow/contrib/kfac/python/ops:fisher_blocks", "//tensorflow/contrib/kfac/python/ops:fisher_factors", "//tensorflow/contrib/kfac/python/ops:layer_collection", "//tensorflow/python:array_ops", @@ -75,6 +76,7 @@ py_test( "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", "//tensorflow/python:linalg_ops", + "//tensorflow/python:math_ops", "//tensorflow/python:random_ops", "//tensorflow/python:random_seed", "//tensorflow/python:variable_scope", diff --git a/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py b/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py index 524e8338fd..c5ad90d1dc 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.kfac.python.ops import fisher_blocks from tensorflow.contrib.kfac.python.ops import fisher_factors from tensorflow.contrib.kfac.python.ops import layer_collection from tensorflow.python.framework import dtypes @@ -25,6 +26,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops import linalg_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import variable_scope from tensorflow.python.platform import test @@ -105,8 +107,10 @@ class LayerCollectionTest(test.TestCase): array_ops.constant(4), [1, 1, 1, 1], 'SAME', array_ops.ones((1, 1, 1, 1)), array_ops.constant(3)) lc.register_conv2d( - array_ops.constant(4), [1, 1, 1, 1], 'SAME', - array_ops.ones((1, 1, 1, 1)), array_ops.constant(3), + array_ops.constant(4), [1, 1, 1, 1], + 'SAME', + array_ops.ones((1, 1, 1, 1)), + array_ops.constant(3), approx=layer_collection.APPROX_DIAGONAL_NAME) lc.register_generic( array_ops.constant(5), 16, approx=layer_collection.APPROX_FULL_NAME) @@ -122,8 +126,8 @@ class LayerCollectionTest(test.TestCase): random_seed.set_random_seed(200) lc = layer_collection.LayerCollection() key = array_ops.constant(1) - lc.register_fully_connected(key, - array_ops.constant(2), array_ops.constant(3)) + lc.register_fully_connected(key, array_ops.constant(2), + array_ops.constant(3)) with self.assertRaises(ValueError): lc.register_generic(key, 16) @@ -191,8 +195,8 @@ class LayerCollectionTest(test.TestCase): lc.register_block((x, y), MockFisherBlock('foo')) self.assertEqual( - set([MockFisherBlock('2'), MockFisherBlock('foo')]), - set(lc.get_blocks())) + set([MockFisherBlock('2'), MockFisherBlock('foo')]), set( + lc.get_blocks())) def testRegisterTupleVarSomeRegisteredInOtherTuples(self): x = variable_scope.get_variable('x', initializer=array_ops.constant(1,)) @@ -464,6 +468,66 @@ class LayerCollectionTest(test.TestCase): use_count_map = lc.get_use_count_map() self.assertDictEqual({'a': 4, 'b': 2, 'c': 4}, use_count_map) + def testIdentifyLinkedParametersSomeRegisteredInOtherTuples(self): + x = variable_scope.get_variable('x', shape=()) + y = variable_scope.get_variable('y', shape=()) + z = variable_scope.get_variable('z', shape=()) + lc = layer_collection.LayerCollection() + lc.define_linked_parameters((x, y)) + + with self.assertRaises(ValueError): + lc.define_linked_parameters((x, z)) + + def testIdentifySubsetPreviouslyRegisteredTensor(self): + x = variable_scope.get_variable('x', shape=()) + y = variable_scope.get_variable('y', shape=()) + lc = layer_collection.LayerCollection() + lc.define_linked_parameters((x, y)) + + with self.assertRaises(ValueError): + lc.define_linked_parameters(x) + + def testSpecifyApproximation(self): + w_0 = variable_scope.get_variable('w_0', [10, 10]) + w_1 = variable_scope.get_variable('w_1', [10, 10]) + + b_0 = variable_scope.get_variable('b_0', [10]) + b_1 = variable_scope.get_variable('b_1', [10]) + + x_0 = array_ops.placeholder(dtypes.float32, shape=(32, 10)) + x_1 = array_ops.placeholder(dtypes.float32, shape=(32, 10)) + + pre_bias_0 = math_ops.matmul(x_0, w_0) + pre_bias_1 = math_ops.matmul(x_1, w_1) + + # Build the fully connected layers in the graph. + pre_bias_0 + b_0 # pylint: disable=pointless-statement + pre_bias_1 + b_1 # pylint: disable=pointless-statement + + lc = layer_collection.LayerCollection() + lc.define_linked_parameters( + w_0, approximation=layer_collection.APPROX_DIAGONAL_NAME) + lc.define_linked_parameters( + w_1, approximation=layer_collection.APPROX_DIAGONAL_NAME) + lc.define_linked_parameters( + b_0, approximation=layer_collection.APPROX_FULL_NAME) + lc.define_linked_parameters( + b_1, approximation=layer_collection.APPROX_FULL_NAME) + + lc.register_fully_connected(w_0, x_0, pre_bias_0) + lc.register_fully_connected( + w_1, x_1, pre_bias_1, approx=layer_collection.APPROX_KRONECKER_NAME) + self.assertIsInstance(lc.fisher_blocks[w_0], + fisher_blocks.FullyConnectedDiagonalFB) + self.assertIsInstance(lc.fisher_blocks[w_1], + fisher_blocks.FullyConnectedKFACBasicFB) + + lc.register_generic(b_0, batch_size=1) + lc.register_generic( + b_1, batch_size=1, approx=layer_collection.APPROX_DIAGONAL_NAME) + self.assertIsInstance(lc.fisher_blocks[b_0], fisher_blocks.FullFB) + self.assertIsInstance(lc.fisher_blocks[b_1], fisher_blocks.NaiveDiagonalFB) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py index 7300a7998c..2139a261e0 100644 --- a/tensorflow/contrib/kfac/python/ops/layer_collection.py +++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py @@ -38,12 +38,26 @@ from tensorflow.python.ops import variable_scope from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import nest - # Names for various approximations that can be requested for Fisher blocks. APPROX_KRONECKER_NAME = "kron" APPROX_DIAGONAL_NAME = "diagonal" APPROX_FULL_NAME = "full" +_GENERIC_APPROX_TO_BLOCK_TYPES = { + APPROX_FULL_NAME: fb.FullFB, + APPROX_DIAGONAL_NAME: fb.NaiveDiagonalFB, +} + +_FULLY_CONNECTED_APPROX_TO_BLOCK_TYPES = { + APPROX_KRONECKER_NAME: fb.FullyConnectedKFACBasicFB, + APPROX_DIAGONAL_NAME: fb.FullyConnectedDiagonalFB, +} + +_CONV2D_APPROX_TO_BLOCK_TYPES = { + APPROX_KRONECKER_NAME: fb.ConvKFCBasicFB, + APPROX_DIAGONAL_NAME: fb.ConvDiagonalFB, +} + # Possible value for 'reuse' keyword argument. Sets 'reuse' to # tf.get_variable_scope().reuse. VARIABLE_SCOPE = "VARIABLE_SCOPE" @@ -51,6 +65,14 @@ VARIABLE_SCOPE = "VARIABLE_SCOPE" # TODO(jamesmartens): need to add find_canonical_output back into this somewhere +def ensure_sequence(obj): + """If `obj` isn't a tuple or list, return a tuple containing `obj`.""" + if isinstance(obj, (tuple, list)): + return obj + else: + return (obj,) + + class LayerParametersDict(OrderedDict): """An OrderedDict where keys are Tensors or tuples of Tensors. @@ -110,9 +132,14 @@ class LayerCollection(object): def __init__(self, graph=None, name="LayerCollection"): self.fisher_blocks = LayerParametersDict() self.fisher_factors = OrderedDict() + self._linked_parameters = dict( + ) # dict mapping sets of variables to optionally specified approximations. self._graph = graph or ops.get_default_graph() self._loss_dict = {} # {str: LossFunction} self._subgraph = None + self._default_generic_approximation = APPROX_FULL_NAME + self._default_fully_connected_approximation = APPROX_KRONECKER_NAME + self._default_convolution_2d_approximation = APPROX_KRONECKER_NAME with variable_scope.variable_scope(None, default_name=name) as scope: self._var_scope = scope.name @@ -122,6 +149,70 @@ class LayerCollection(object): """LossFunctions registered with this LayerCollection.""" return list(self._loss_dict.values()) + def is_variable_registered(self, variable): + """Checks whether the variable has already been registered. + + Args: + variable: A single variable or tensor. + Returns: + True if the variable has been registered either by itself or as part of a + tuple. + """ + return any([ + variable in key if isinstance(key, (tuple, list)) else variable == key + for key in self.fisher_blocks.keys() + ]) + + @property + def linked_parameters(self): + """Groups of parameters with an optionally specified approximation. + + Linked parameters can be added using `define_linked_parameters`. + If an approximation is specified, then this approximation will be used + when registering a layer with exactly these parameters, unless an + approximation is specified when calling the registration function. + + Returns: + A `dict` mapping tuples of parameters to an optional string. + """ + return self._linked_parameters + + @property + def default_generic_approximation(self): + return self._default_generic_approximation + + @default_generic_approximation.setter + def default_generic_approximation(self, value): + if value not in _GENERIC_APPROX_TO_BLOCK_TYPES: + raise ValueError( + "{} is not a valid approximation for generic variables.".format( + value)) + self._default_generic_approximation = value + + @property + def default_fully_connected_approximation(self): + return self._default_fully_connected_approximation + + @default_fully_connected_approximation.setter + def default_fully_connected_approximation(self, value): + if value not in _FULLY_CONNECTED_APPROX_TO_BLOCK_TYPES: + raise ValueError( + "{} is not a valid approximation for fully connected layers.".format( + value)) + self._default_fully_connected_approximation = value + + @property + def default_conv2d_approximation(self): + return self._default_convolution_2d_approximation + + @default_conv2d_approximation.setter + def default_conv2d_approximation(self, value): + if value not in _CONV2D_APPROX_TO_BLOCK_TYPES: + raise ValueError( + "{} is not a valid approximation for 2d convolutional layers.".format( + value)) + self._default_convolution_2d_approximation = value + def register_block(self, layer_key, fisher_block, reuse=VARIABLE_SCOPE): """Validates and registers the layer_key associated with the fisher_block. @@ -187,7 +278,8 @@ class LayerCollection(object): # Find all keys that are either supersets or subsets of 'layer_key'. inclusions = { fisher_elt - for layer_elt in layer_key for fisher_elt in self.fisher_blocks + for layer_elt in layer_key + for fisher_elt in self.fisher_blocks if self._equal_or_subset(layer_elt, fisher_elt) } @@ -294,6 +386,49 @@ class LayerCollection(object): def subgraph(self): return self._subgraph + def define_linked_parameters(self, params, approximation=None): + """Identify a set of parameters that should be grouped together. + + During automatic graph scanning, any matches containing variables that have + been identified as part of a linked group will be filtered out unless + the match parameters are exactly equal to the ones specified in the linked + group. + + Args: + params: A variable, or a tuple or list of variables. The variables + to be linked. + approximation: Optional string specifying the type of approximation to use + for these variables. If unspecified, this layer collection's default + approximation for the layer type will be used. + + Raises: + ValueError: If the parameters were already registered in a layer or + identified as part of an incompatible group. + """ + params = frozenset(ensure_sequence(params)) + + # Check if any of the variables in 'params' is already in + # 'self.fisher_blocks.keys()'. + for registered_params, fisher_block in self.fisher_blocks.items(): + registered_params_set = set(ensure_sequence(registered_params)) + for variable in params: + if (variable in registered_params_set and + params != registered_params_set): + raise ValueError( + "Can't link parameters {}, variable {} was already registered in " + "group {} with layer {}".format(params, variable, + registered_params, fisher_block)) + + # Check if any of the variables in 'params' is already in + # 'self.linked_parameters'. + for variable in params: + for other_linked_params in self.linked_parameters: + if variable in other_linked_params: + raise ValueError("Can't link parameters {}, variable {} was already " + "linked in group {}.".format(params, variable, + other_linked_params)) + self._linked_parameters[params] = approximation + def create_subgraph(self): if not self.losses: raise ValueError("Must have at least one registered loss.") @@ -307,11 +442,19 @@ class LayerCollection(object): return math_ops.add_n( tuple(loss.evaluate_on_sample() for loss in self.losses)) + def _get_linked_approx(self, params): + """If params were linked, return their specified approximation.""" + params_set = frozenset(ensure_sequence(params)) + if params_set in self.linked_parameters: + return self.linked_parameters[params_set] + else: + return None + def register_fully_connected(self, params, inputs, outputs, - approx=APPROX_KRONECKER_NAME, + approx=None, reuse=VARIABLE_SCOPE): """Registers a fully connnected layer. @@ -332,15 +475,15 @@ class LayerCollection(object): KeyError: If reuse == True but no FisherBlock found for 'params'. ValueError: If reuse == True and FisherBlock found but of the wrong type. """ - approx_to_block_types = { - APPROX_KRONECKER_NAME: fb.FullyConnectedKFACBasicFB, - APPROX_DIAGONAL_NAME: fb.FullyConnectedDiagonalFB, - } + if approx is None: + approx = self._get_linked_approx(params) + if approx is None: + approx = self.default_fully_connected_approximation - if approx not in approx_to_block_types: + if approx not in _FULLY_CONNECTED_APPROX_TO_BLOCK_TYPES: raise ValueError("Bad value {} for approx.".format(approx)) - block_type = approx_to_block_types[approx] + block_type = _FULLY_CONNECTED_APPROX_TO_BLOCK_TYPES[approx] has_bias = isinstance(params, (tuple, list)) block = self.register_block(params, block_type(self, has_bias), reuse=reuse) @@ -352,7 +495,7 @@ class LayerCollection(object): padding, inputs, outputs, - approx=APPROX_KRONECKER_NAME, + approx=None, reuse=VARIABLE_SCOPE): """Registers a convolutional layer. @@ -377,15 +520,16 @@ class LayerCollection(object): KeyError: If reuse == True but no FisherBlock found for 'params'. ValueError: If reuse == True and FisherBlock found but of the wrong type. """ - approx_to_block_types = { - APPROX_KRONECKER_NAME: fb.ConvKFCBasicFB, - APPROX_DIAGONAL_NAME: fb.ConvDiagonalFB, - } - if approx not in approx_to_block_types: + if approx is None: + approx = self._get_linked_approx(params) + if approx is None: + approx = self.default_conv2d_approximation + + if approx not in _CONV2D_APPROX_TO_BLOCK_TYPES: raise ValueError("Bad value {} for approx.".format(approx)) - block_type = approx_to_block_types[approx] + block_type = _CONV2D_APPROX_TO_BLOCK_TYPES[approx] block = self.register_block( params, block_type(self, params, strides, padding), reuse=reuse) block.register_additional_minibatch(inputs, outputs) @@ -393,7 +537,7 @@ class LayerCollection(object): def register_generic(self, params, batch_size, - approx=APPROX_DIAGONAL_NAME, + approx=None, reuse=VARIABLE_SCOPE): """Registers a generic layer. @@ -413,15 +557,16 @@ class LayerCollection(object): KeyError: If reuse == True but no FisherBlock found for 'params'. ValueError: If reuse == True and FisherBlock found but of the wrong type. """ - approx_to_block_types = { - APPROX_FULL_NAME: fb.FullFB, - APPROX_DIAGONAL_NAME: fb.NaiveDiagonalFB, - } - if approx not in approx_to_block_types: + if approx is None: + approx = self._get_linked_approx(params) + if approx is None: + approx = self.default_generic_approximation + + if approx not in _GENERIC_APPROX_TO_BLOCK_TYPES: raise ValueError("Bad value {} for approx.".format(approx)) - block_type = approx_to_block_types[approx] + block_type = _GENERIC_APPROX_TO_BLOCK_TYPES[approx] block = self.register_block(params, block_type(self, params), reuse=reuse) block.register_additional_minibatch(batch_size) @@ -560,10 +705,10 @@ class LayerCollection(object): try: hash(args) except TypeError: - raise TypeError(( - "Unable to use (cls, args) = ({}, {}) as a key in " - "LayerCollection.fisher_factors. The pair cannot be hashed." - ).format(cls, args)) + raise TypeError( + ("Unable to use (cls, args) = ({}, {}) as a key in " + "LayerCollection.fisher_factors. The pair cannot be hashed.").format( + cls, args)) with variable_scope.variable_scope(self._var_scope): return utils.setdefault(self.fisher_factors, (cls, args), -- GitLab From 2a3429d702699012425eb3fa9cd2a1d796a14b20 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 06:06:49 -0800 Subject: [PATCH 0503/1801] Add docstring note about .transform() backprop behaviour. PiperOrigin-RevId: 175955706 --- tensorflow/contrib/image/python/ops/image_ops.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/image/python/ops/image_ops.py b/tensorflow/contrib/image/python/ops/image_ops.py index 011ddeaa9a..faedee6f87 100644 --- a/tensorflow/contrib/image/python/ops/image_ops.py +++ b/tensorflow/contrib/image/python/ops/image_ops.py @@ -224,7 +224,8 @@ def transform(images, transforms, interpolation="NEAREST", name=None): `(x, y)` to a transformed *input* point `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where `k = c0 x + c1 y + 1`. The transforms are *inverted* compared to - the transform mapping input points to output points. + the transform mapping input points to output points. Note that gradients + are not backpropagated into transformation parameters. interpolation: Interpolation mode. Supported values: "NEAREST", "BILINEAR". Returns: -- GitLab From d43d00be13ff271eb8a2e6a14eb7ac01a51934ff Mon Sep 17 00:00:00 2001 From: dariavel Date: Thu, 16 Nov 2017 17:12:06 +0200 Subject: [PATCH 0504/1801] Renaming and comment fix Signed-off-by: dariavel --- tensorflow/contrib/verbs/rdma.cc | 6 +++--- tensorflow/contrib/verbs/rdma.h | 6 +++--- tensorflow/contrib/verbs/rdma_mgr.cc | 2 +- tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc index 1fa98a1f01..59bc65f937 100644 --- a/tensorflow/contrib/verbs/rdma.cc +++ b/tensorflow/contrib/verbs/rdma.cc @@ -569,7 +569,7 @@ int RdmaChannel::PingPostRecv() { memset(&wr, 0, sizeof(wr)); wr.sg_list = &ping_sge_list_; wr.num_sge = 1; - wr.wr_id = PingRecvWrid; + wr.wr_id = kPingRecvWrid; return ibv_post_recv(qp_, &wr, &bad_wr); } @@ -592,13 +592,13 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, struct ibv_sge list; - mr_ = ibv_reg_mr(adapter_->pd_, ping_buff_, PingBuffSize, + mr_ = ibv_reg_mr(adapter_->pd_, ping_buff_, kPingBuffSize, IBV_ACCESS_LOCAL_WRITE); CHECK(mr_) << "Failed to register memory region"; memset(&list, 0, sizeof(list)); list.addr = (uintptr_t)ping_buff_; - list.length = PingBuffSize; + list.length = kPingBuffSize; list.lkey = mr_->lkey; ping_sge_list_ = list; diff --git a/tensorflow/contrib/verbs/rdma.h b/tensorflow/contrib/verbs/rdma.h index 92391d6a57..fea2327d77 100644 --- a/tensorflow/contrib/verbs/rdma.h +++ b/tensorflow/contrib/verbs/rdma.h @@ -162,11 +162,11 @@ class RdmaChannel { void RemoveRecvCallback(const string& key); void RunRecvCallback(const string& key); static const int kNumMessageBuffers = 4; - static const int PingRecvWrid = 0; + static const int kPingRecvWrid = 0; private: - static const int PingBuffSize = 1024; - char ping_buff_[PingBuffSize]; + static const int kPingBuffSize = 1024; + char ping_buff_[kPingBuffSize]; struct ibv_mr* mr_; struct ibv_sge ping_sge_list_; int PingPostRecv(); diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc index e7df0528b5..9cb307bcfa 100644 --- a/tensorflow/contrib/verbs/rdma_mgr.cc +++ b/tensorflow/contrib/verbs/rdma_mgr.cc @@ -143,7 +143,7 @@ bool RdmaMgr::ConnectivityCheck() { for (i = 0; i < ne; ++i) { ibv_wc_status s = rdma_adapter_->wc_[i].status; // recv complete - if ((int)rdma_adapter_->wc_[i].wr_id == RdmaChannel::PingRecvWrid) { + if ((int)rdma_adapter_->wc_[i].wr_id == RdmaChannel::kPingRecvWrid) { CHECK(s == IBV_WC_SUCCESS) << ": " << ibv_wc_status_str( rdma_adapter_->wc_[i].status) << "(" << rdma_adapter_->wc_[i].status diff --git a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc index dbb3d25f45..74f6681af3 100644 --- a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc +++ b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc @@ -63,7 +63,7 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync( &unused) || !DeviceNameUtils::SplitDeviceName(parsed.dst_device, &dst_name, &unused)) { - s = errors::Internal("Could not parse src name."); + s = errors::Internal("Could not parse src or dst name."); } if (!s.ok()) { LOG(ERROR) << "s is not ok, error code " << s.error_message(); -- GitLab From 8dccbde8ab5fe0c7dd2ee0af0e4a91c1a807c004 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 09:02:22 -0800 Subject: [PATCH 0505/1801] MultiHead adds individual head loss in eval_metric_ops. PiperOrigin-RevId: 175970818 --- tensorflow/contrib/estimator/BUILD | 1 + .../estimator/python/estimator/multi_head.py | 22 ++++++++++++------- .../python/estimator/multi_head_test.py | 2 ++ 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index bc67ef8354..008ca7a5d1 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -208,6 +208,7 @@ py_library( "//tensorflow/python:control_flow_ops", "//tensorflow/python:framework_ops", "//tensorflow/python:math_ops", + "//tensorflow/python:metrics", "//tensorflow/python:summary", "//tensorflow/python/estimator:head", "//tensorflow/python/estimator:metric_keys", diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head.py b/tensorflow/contrib/estimator/python/estimator/multi_head.py index 73bae5acf9..f2a6eae03e 100644 --- a/tensorflow/contrib/estimator/python/estimator/multi_head.py +++ b/tensorflow/contrib/estimator/python/estimator/multi_head.py @@ -27,6 +27,7 @@ from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import metrics as metrics_lib from tensorflow.python.saved_model import signature_constants from tensorflow.python.summary import summary @@ -342,14 +343,19 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access predictions = {} metrics = {} losses = [] - for head, spec in zip(self._heads, all_estimator_spec): - losses.append(spec.loss) - head_name = head.name - # Metric keys already contain head.name. - metrics.update(spec.eval_metric_ops or {}) - for k, v in six.iteritems(spec.predictions): - predictions[(head_name, k)] = v - loss = _merge_losses(losses, self._head_weights) + with ops.name_scope('merge_eval'): + for head, spec in zip(self._heads, all_estimator_spec): + losses.append(spec.loss) + head_name = head.name + # Loss metric is not added by default. + loss_name = head_lib._summary_key( # pylint:disable=protected-access + head_name, metric_keys.MetricKeys.LOSS) + metrics[loss_name] = metrics_lib.mean(spec.loss, name=loss_name) + # Metric keys already contain head.name. + metrics.update(spec.eval_metric_ops or {}) + for k, v in six.iteritems(spec.predictions): + predictions[(head_name, k)] = v + loss = _merge_losses(losses, self._head_weights) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.EVAL, diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py index 8d51a298b2..68f2d5d1cd 100644 --- a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py @@ -297,6 +297,8 @@ class MultiHeadTest(test.TestCase): keys = metric_keys.MetricKeys expected_metrics = { + keys.LOSS + '/head1': expected_loss_head1, + keys.LOSS + '/head2': expected_loss_head2, # Average loss over examples. keys.LOSS_MEAN + '/head1': expected_loss_head1 / 2, keys.LOSS_MEAN + '/head2': expected_loss_head2 / 2, -- GitLab From 1e3c712e32d5796ff4c93aa64570fb454b2c499e Mon Sep 17 00:00:00 2001 From: Ian Langmore Date: Thu, 16 Nov 2017 09:18:49 -0800 Subject: [PATCH 0506/1801] linear_operator_test_util.py. Adding hooks for turning off placeholders/adjoint in tests. PiperOrigin-RevId: 175972993 --- .../ops/linalg/linear_operator_test_util.py | 36 ++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/ops/linalg/linear_operator_test_util.py b/tensorflow/python/ops/linalg/linear_operator_test_util.py index 3d0ea3e11b..2c11f90e6d 100644 --- a/tensorflow/python/ops/linalg/linear_operator_test_util.py +++ b/tensorflow/python/ops/linalg/linear_operator_test_util.py @@ -66,11 +66,23 @@ class LinearOperatorDerivedClassTest(test.TestCase): rtol = self._rtol[dtype] self.assertAllClose(x, y, atol=atol, rtol=rtol) + @property + def _adjoint_options(self): + return [False, True] + + @property + def _adjoint_arg_options(self): + return [False, True] + @property def _dtypes_to_test(self): # TODO(langmore) Test tf.float16 once tf.matrix_solve works in 16bit. return [dtypes.float32, dtypes.float64, dtypes.complex64, dtypes.complex128] + @property + def _use_placeholder_options(self): + return [False, True] + @abc.abstractproperty def _shapes_to_test(self): """Returns list of tuples, each is one shape that will be tested.""" @@ -151,7 +163,7 @@ class LinearOperatorDerivedClassTest(test.TestCase): def test_to_dense(self): self._skip_if_tests_to_skip_contains("to_dense") - for use_placeholder in False, True: + for use_placeholder in self._use_placeholder_options: for shape in self._shapes_to_test: for dtype in self._dtypes_to_test: with self.test_session(graph=ops.Graph()) as sess: @@ -166,7 +178,7 @@ class LinearOperatorDerivedClassTest(test.TestCase): def test_det(self): self._skip_if_tests_to_skip_contains("det") - for use_placeholder in False, True: + for use_placeholder in self._use_placeholder_options: for shape in self._shapes_to_test: for dtype in self._dtypes_to_test: with self.test_session(graph=ops.Graph()) as sess: @@ -183,7 +195,7 @@ class LinearOperatorDerivedClassTest(test.TestCase): def test_log_abs_det(self): self._skip_if_tests_to_skip_contains("log_abs_det") - for use_placeholder in False, True: + for use_placeholder in self._use_placeholder_options: for shape in self._shapes_to_test: for dtype in self._dtypes_to_test: with self.test_session(graph=ops.Graph()) as sess: @@ -200,11 +212,11 @@ class LinearOperatorDerivedClassTest(test.TestCase): def test_matmul(self): self._skip_if_tests_to_skip_contains("matmul") - for use_placeholder in False, True: + for use_placeholder in self._use_placeholder_options: for shape in self._shapes_to_test: for dtype in self._dtypes_to_test: - for adjoint in False, True: - for adjoint_arg in False, True: + for adjoint in self._adjoint_options: + for adjoint_arg in self._adjoint_arg_options: with self.test_session(graph=ops.Graph()) as sess: sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED operator, mat, feed_dict = self._operator_and_mat_and_feed_dict( @@ -228,11 +240,11 @@ class LinearOperatorDerivedClassTest(test.TestCase): def test_solve(self): self._skip_if_tests_to_skip_contains("solve") - for use_placeholder in False, True: + for use_placeholder in self._use_placeholder_options: for shape in self._shapes_to_test: for dtype in self._dtypes_to_test: - for adjoint in False, True: - for adjoint_arg in False, True: + for adjoint in self._adjoint_options: + for adjoint_arg in self._adjoint_arg_options: with self.test_session(graph=ops.Graph()) as sess: sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED operator, mat, feed_dict = self._operator_and_mat_and_feed_dict( @@ -257,7 +269,7 @@ class LinearOperatorDerivedClassTest(test.TestCase): def test_trace(self): self._skip_if_tests_to_skip_contains("trace") - for use_placeholder in False, True: + for use_placeholder in self._use_placeholder_options: for shape in self._shapes_to_test: for dtype in self._dtypes_to_test: with self.test_session(graph=ops.Graph()) as sess: @@ -274,7 +286,7 @@ class LinearOperatorDerivedClassTest(test.TestCase): def test_add_to_tensor(self): self._skip_if_tests_to_skip_contains("add_to_tensor") - for use_placeholder in False, True: + for use_placeholder in self._use_placeholder_options: for shape in self._shapes_to_test: for dtype in self._dtypes_to_test: with self.test_session(graph=ops.Graph()) as sess: @@ -293,7 +305,7 @@ class LinearOperatorDerivedClassTest(test.TestCase): def test_diag_part(self): self._skip_if_tests_to_skip_contains("diag_part") - for use_placeholder in False, True: + for use_placeholder in self._use_placeholder_options: for shape in self._shapes_to_test: for dtype in self._dtypes_to_test: with self.test_session(graph=ops.Graph()) as sess: -- GitLab From 9d737356147a730326cfcbdc08b0b876dd0766e6 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 16 Nov 2017 09:44:06 -0800 Subject: [PATCH 0507/1801] Simplify reductions in more cases. PiperOrigin-RevId: 175975917 --- .../grappler/optimizers/constant_folding.cc | 234 ++++++++++++------ .../grappler/optimizers/constant_folding.h | 6 + .../optimizers/constant_folding_test.cc | 41 ++- 3 files changed, 207 insertions(+), 74 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 993831c412..b722905032 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -339,92 +339,180 @@ bool ExtractShape(const NodeDef& shape_node, const GraphProperties& properties, } } // namespace -Status ConstantFolding::MaterializeConstants( - const GrapplerItem& item, const GraphProperties& properties) { - const int node_count = graph_.node_size(); - for (int i = 0; i < node_count; ++i) { - NodeDef& node = *graph_.mutable_node(i); - const string& op = node.op(); - if (op != "BroadcastGradientArgs") { - continue; - } - const NodeDef* shape_node1 = node_map_->GetNode(node.input(0)); - const NodeDef* shape_node2 = node_map_->GetNode(node.input(1)); - if (shape_node1 == nullptr || - (shape_node1->op() != "Shape" && shape_node1->op() != "Const") || - shape_node2 == nullptr || - (shape_node2->op() != "Shape" && shape_node2->op() != "Const")) { - continue; - } - int64 min_id = 0; - BCast::Vec shape1; - if (!ExtractShape(*shape_node1, properties, &shape1, &min_id)) { - continue; +Status ConstantFolding::MaterializeBroadcastGradientArgs( + const NodeDef& node, const GraphProperties& properties) { + const NodeDef* shape_node1 = node_map_->GetNode(node.input(0)); + const NodeDef* shape_node2 = node_map_->GetNode(node.input(1)); + if (shape_node1 == nullptr || + (shape_node1->op() != "Shape" && shape_node1->op() != "Const") || + shape_node2 == nullptr || + (shape_node2->op() != "Shape" && shape_node2->op() != "Const")) { + return Status::OK(); + } + int64 min_id = 0; + BCast::Vec shape1; + if (!ExtractShape(*shape_node1, properties, &shape1, &min_id)) { + return Status::OK(); + } + BCast::Vec shape2; + if (!ExtractShape(*shape_node2, properties, &shape2, &min_id)) { + return Status::OK(); + } + // A value of -1 means we don't known anything about the dimension. Replace + // the -1 values with unique dimension ids since we don't want two '-1' + // dimensions to be considered equal. + for (auto& id : shape1) { + if (id == -1) { + id = --min_id; } - BCast::Vec shape2; - if (!ExtractShape(*shape_node2, properties, &shape2, &min_id)) { - continue; + } + for (auto& id : shape2) { + if (id == -1) { + id = --min_id; } - // A value of -1 means we don't known anything about the dimension. Replace - // the -1 values with unique dimension ids since we don't want two '-1' - // dimensions to be considered equal. - for (auto& id : shape1) { - if (id == -1) { - id = --min_id; + } + BCast bcast(shape1, shape2); + if (!bcast.IsValid()) { + return Status::OK(); + } + BCast::Vec reduce_dims[2]; + reduce_dims[0] = bcast.grad_x_reduce_idx(); + reduce_dims[1] = bcast.grad_y_reduce_idx(); + + const DataType type = node.attr().at("T").type(); + NodeDef* out[2]; + for (int j = 0; j < 2; ++j) { + if (!reduce_dims[j].empty()) { + // This is the case when a tensor dimension of 1 is matched against an + // unknown dimension. The unknown dimension could also be equal to 1, in + // which case there would be no reduction. + out[j] = nullptr; + } else { + string const_name = AddPrefixToNodeName( + strings::StrCat(node.name(), "-", j), kConstantFoldingConst); + out[j] = node_map_->GetNode(const_name); + if (out[j] == nullptr) { + out[j] = graph_.add_node(); + Tensor value(type, TensorShape({0})); + *out[j] = CreateNodeDef(const_name, TensorValue(&value)); + out[j]->set_device(node.device()); + node_map_->AddNode(const_name, out[j]); + string ctrl_dep = + AddControlDependency(node.name(), &graph_, node_map_.get()); + *out[j]->add_input() = ctrl_dep; + node_map_->AddOutput(NodeName(ctrl_dep), const_name); } } - for (auto& id : shape2) { - if (id == -1) { - id = --min_id; + } + + auto outputs = node_map_->GetOutputs(node.name()); + for (const auto& output : outputs) { + for (int k = 0; k < output->input_size(); ++k) { + int port; + string node_name = ParseNodeName(output->input(k), &port); + if (node_name == node.name() && port >= 0 && port < 2 && out[port]) { + *output->mutable_input(k) = out[port]->name(); + node_map_->UpdateInput(output->name(), node_name, out[port]->name()); } } - BCast bcast(shape1, shape2); - if (!bcast.IsValid()) { - continue; - } - BCast::Vec reduce_dims[2]; - reduce_dims[0] = bcast.grad_x_reduce_idx(); - reduce_dims[1] = bcast.grad_y_reduce_idx(); - - const DataType type = node.attr().at("T").type(); - NodeDef* out[2]; - for (int j = 0; j < 2; ++j) { - if (!reduce_dims[j].empty()) { - // This is the case when a tensor dimension 1 is matched against an - // unknown dimension. The unknown dimension could also be equal to 1, in - // which case there would be no reduction. - out[j] = nullptr; + } + + return Status::OK(); +} + +Status ConstantFolding::MaterializeReductionIndices( + NodeDef* node, const GraphProperties& properties) { + if (node->input_size() < 2) { + return Status::OK(); + } + const NodeDef* indices = node_map_->GetNode(node->input(1)); + if (!indices || IsConstant(*indices)) { + // The reduction indices are already constant, there's nothing to do. + return Status::OK(); + } + + const OpInfo::TensorProperties& input_prop = + properties.GetInputProperties(node->name())[0]; + if (input_prop.shape().unknown_rank()) { + // We can't do anything if we don't know the rank of the input. + return Status::OK(); + } + const int rank = input_prop.shape().dim_size(); + if (rank == 0) { + // Unexpected graph, don't try to change it. + return Status::OK(); + } + const OpInfo::TensorProperties& output_prop = + properties.GetOutputProperties(node->name())[0]; + PartialTensorShape output_shape(output_prop.shape()); + if (output_shape.num_elements() != 1) { + bool full_reduction = false; + for (const NodeDef* fanout : node_map_->GetOutputs(node->name())) { + if (!IsReshape(*fanout)) { + continue; + } + const OpInfo::TensorProperties& reshape_prop = + properties.GetOutputProperties(fanout->name())[0]; + PartialTensorShape shape(reshape_prop.shape()); + if (shape.num_elements() != 1) { + return Status::OK(); } else { - Tensor value(type, TensorShape({0})); - string const_name = AddPrefixToNodeName( - strings::StrCat(node.name(), "-", j), kConstantFoldingConst); - out[j] = node_map_->GetNode(const_name); - if (!out[j]) { - out[j] = graph_.add_node(); - *out[j] = CreateNodeDef(const_name, TensorValue(&value)); - out[j]->set_device(node.device()); - node_map_->AddNode(const_name, out[j]); - string ctrl_dep = - AddControlDependency(node.name(), &graph_, node_map_.get()); - *out[j]->add_input() = ctrl_dep; - node_map_->AddOutput(NodeName(ctrl_dep), const_name); - } + full_reduction = true; } } + if (!full_reduction) { + return Status::OK(); + } + } - auto outputs = node_map_->GetOutputs(node.name()); - for (const auto& output : outputs) { - for (int k = 0; k < output->input_size(); ++k) { - int port; - string node_name = ParseNodeName(output->input(k), &port); - if (node_name == node.name() && port >= 0 && port < 2 && out[port]) { - *output->mutable_input(k) = out[port]->name(); - node_map_->UpdateInput(output->name(), node_name, out[port]->name()); - } - } + const OpInfo::TensorProperties& reduction_prop = + properties.GetInputProperties(node->name())[1]; + DataType dtype = reduction_prop.dtype(); + if (dtype != DT_INT32 && dtype != DT_INT64) { + return Status::OK(); + } + // We know it's a full reduction. We can generate the set of indices to + // reduce. + string const_name = + AddPrefixToNodeName(strings::StrCat(node->name(), "-reduction_indices"), + kConstantFoldingConst); + if (node_map_->GetNode(const_name)) { + return Status::OK(); + } + NodeDef* reduction_indices = graph_.add_node(); + Tensor value(dtype, TensorShape({rank})); + for (int i = 0; i < rank; ++i) { + if (dtype == DT_INT32) { + value.vec()(i) = i; + } else { + value.vec()(i) = i; } } + *reduction_indices = CreateNodeDef(const_name, TensorValue(&value)); + reduction_indices->set_device(node->device()); + *reduction_indices->add_input() = + AddControlDependency(node->input(1), &graph_, node_map_.get()); + node_map_->AddNode(const_name, reduction_indices); + + node->set_input(1, reduction_indices->name()); + node_map_->UpdateInput(node->name(), indices->name(), + reduction_indices->name()); + + return Status::OK(); +} +Status ConstantFolding::MaterializeConstants( + const GrapplerItem& item, const GraphProperties& properties) { + const int node_count = graph_.node_size(); + for (int i = 0; i < node_count; ++i) { + NodeDef& node = *graph_.mutable_node(i); + const string& op = node.op(); + if (op == "BroadcastGradientArgs") { + TF_RETURN_IF_ERROR(MaterializeBroadcastGradientArgs(node, properties)); + } else if (IsReduction(node)) { + TF_RETURN_IF_ERROR(MaterializeReductionIndices(&node, properties)); + } + } return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index dd988f336c..f04f413c10 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -53,6 +53,12 @@ class ConstantFolding : public GraphOptimizer { private: Status MaterializeShapes(const GrapplerItem& item, const GraphProperties& properties); + + Status MaterializeBroadcastGradientArgs(const NodeDef& node, + const GraphProperties& properties); + Status MaterializeReductionIndices(NodeDef* node, + const GraphProperties& properties); + Status MaterializeConstants(const GrapplerItem& item, const GraphProperties& properties); bool IsFoldable(const NodeDef& node) const; diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 43f84b1ddf..428376c02c 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -840,7 +840,7 @@ TEST_F(ConstantFoldingTest, Packing) { EXPECT_GT(8000, output.ByteSizeLong()); } -TEST_F(ConstantFoldingTest, ConstantMaterialization) { +TEST_F(ConstantFoldingTest, MaterializeBroadcastGradientArgs) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output a = ops::Placeholder(s.WithOpName("a"), DT_FLOAT, @@ -918,6 +918,45 @@ TEST_F(ConstantFoldingTest, ConstantMaterialization) { EXPECT_EQ(7, found); } +TEST_F(ConstantFoldingTest, MaterializeReductionIndices) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output input = + ops::Placeholder(s.WithOpName("input"), DT_FLOAT, + ops::Placeholder::Shape(PartialTensorShape({-1, -1}))); + Output indices = ops::Placeholder(s.WithOpName("indices"), DT_INT32); + Output sum = ops::Sum(s.WithOpName("sum"), input, indices); + Output size = ops::Const(s.WithOpName("size"), 1, {1}); + Output reshape = ops::Reshape(s.WithOpName("reshape"), sum, size); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + ConstantFolding fold(RewriterConfig::AGGRESSIVE, nullptr /* cpu_device */); + GraphDef output; + Status status = fold.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + // Run a second time to make sure the optimization is idempotent. + item.graph.Swap(&output); + status = fold.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + int found = 0; + for (const auto& node : output.node()) { + if (node.name() == "ConstantFolding/sum-reduction_indices") { + ++found; + EXPECT_EQ("Const", node.op()); + EXPECT_EQ("^indices", node.input(0)); + EXPECT_EQ(2, TensorShape(node.attr().at("value").tensor().tensor_shape()) + .num_elements()); + } else if (node.name() == "sum") { + ++found; + EXPECT_EQ("ConstantFolding/sum-reduction_indices", node.input(1)); + } + } + EXPECT_EQ(2, found); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 42cf0e045ced26734aa20c4de8d6a0b9e73f15c5 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 16 Nov 2017 10:19:47 -0800 Subject: [PATCH 0508/1801] [Speech commands] Add `num_classes=label_count` when constructing the confusion matrix (#14600) This seems like it should fix an issue where you get unlucky, the batch doesn't contain the largest label, and the returned matrix is smaller than other runs. --- tensorflow/examples/speech_commands/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/examples/speech_commands/train.py b/tensorflow/examples/speech_commands/train.py index a54bcbdb32..f46d5e59b4 100644 --- a/tensorflow/examples/speech_commands/train.py +++ b/tensorflow/examples/speech_commands/train.py @@ -156,7 +156,7 @@ def main(_): predicted_indices = tf.argmax(logits, 1) expected_indices = tf.argmax(ground_truth_input, 1) correct_prediction = tf.equal(predicted_indices, expected_indices) - confusion_matrix = tf.confusion_matrix(expected_indices, predicted_indices) + confusion_matrix = tf.confusion_matrix(expected_indices, predicted_indices, num_classes=label_count) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', evaluation_step) -- GitLab From d56ff503a6f12b30f1126d3476483f687660ca42 Mon Sep 17 00:00:00 2001 From: brett koonce Date: Thu, 16 Nov 2017 10:20:05 -0800 Subject: [PATCH 0509/1801] minor spelling tweaks in headers (#14570) --- tensorflow/contrib/lite/g3doc/apis.md | 2 +- tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/g3doc/apis.md b/tensorflow/contrib/lite/g3doc/apis.md index 311fc69696..662ae2032c 100644 --- a/tensorflow/contrib/lite/g3doc/apis.md +++ b/tensorflow/contrib/lite/g3doc/apis.md @@ -267,7 +267,7 @@ try (Interpreter interpreter = new Interpreter(file_of_a_tensorflowlite_model)) The `Interpreter.java` class drives model inference with TensorFlow Lite. In most of the cases, this is the only class an app developer will need. -#### Initializing an `Interpreter` Mith a Model Mile +#### Initializing an `Interpreter` With a Model File The `Interpreter` can be initialized with a model file using the constructor: diff --git a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md index 121c4c2c95..9ade04eb8c 100644 --- a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md +++ b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md @@ -54,7 +54,7 @@ counterparts: * [tf.sigmoid](https://www.tensorflow.org/api_docs/python/tf/sigmoid) * [tf.space_to_depth](https://www.tensorflow.org/api_docs/python/tf/space_to_depth) -## Straighforward Conversions, Constant-Folding and Fusing +## Straightforward Conversions, Constant-Folding and Fusing A number of TensorFlow operations can be processed by TensorFlow Lite even though they have no direct equivalent. This is the case for operations that can -- GitLab From ec6ae0092530f032f4db9330b4b6843bcb605da2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jeroen=20B=C3=A9dorf?= Date: Thu, 16 Nov 2017 19:20:17 +0100 Subject: [PATCH 0510/1801] Add missing dependency (#14587) --- tensorflow/contrib/mpi/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/mpi/BUILD b/tensorflow/contrib/mpi/BUILD index 20ceef5004..d9d55faf50 100644 --- a/tensorflow/contrib/mpi/BUILD +++ b/tensorflow/contrib/mpi/BUILD @@ -72,6 +72,7 @@ cc_library( "//tensorflow/core:worker_proto_cc", "//tensorflow/core/distributed_runtime:base_rendezvous_mgr", "//tensorflow/core/distributed_runtime:session_mgr", + "//tensorflow/core/distributed_runtime:tensor_coding", "//tensorflow/core/distributed_runtime:worker_env", "//third_party/mpi", ], -- GitLab From aa4162ac9f1812a0966d3cd9b5e441e47f035828 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Thu, 16 Nov 2017 10:23:48 -0800 Subject: [PATCH 0511/1801] contrib/summary: refactor summary_test_util A logdir may contain files other than summary event files, e.g., checkpoints. So add a method "events_from_file" to load events from a single file. The existing "events_from_logdir" method now calls the new method. PiperOrigin-RevId: 175981886 --- .../contrib/eager/python/evaluator_test.py | 4 +-- .../examples/resnet50/resnet50_graph_test.py | 2 +- .../python/examples/resnet50/resnet50_test.py | 2 +- .../contrib/eager/python/metrics_test.py | 2 +- .../contrib/summary/summary_ops_test.py | 6 ++-- .../contrib/summary/summary_test_util.py | 35 +++++++++++++++---- 6 files changed, 36 insertions(+), 15 deletions(-) diff --git a/tensorflow/contrib/eager/python/evaluator_test.py b/tensorflow/contrib/eager/python/evaluator_test.py index 02f82cb216..7d2274db9b 100644 --- a/tensorflow/contrib/eager/python/evaluator_test.py +++ b/tensorflow/contrib/eager/python/evaluator_test.py @@ -87,7 +87,7 @@ class EvaluatorTest(test.TestCase): e.all_metric_results(logdir) - events = summary_test_util.events_from_file(logdir) + events = summary_test_util.events_from_logdir(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].simple_value, 6.0) @@ -136,7 +136,7 @@ class EvaluatorTest(test.TestCase): variables.global_variables_initializer().run() e.run_evaluation(init_op, call_op, results_op) - events = summary_test_util.events_from_file(logdir) + events = summary_test_util.events_from_logdir(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].simple_value, 6.0) diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py index 736a75332f..14c82c87a7 100644 --- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py +++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py @@ -95,7 +95,7 @@ class ResNet50GraphTest(tf.test.TestCase): sess.run([train_op, tf.contrib.summary.all_summary_ops()], feed_dict={images: np_images, labels: np_labels}) - events = summary_test_util.events_from_file(logdir) + events = summary_test_util.events_from_logdir(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'loss') diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py index d6389f2e38..582f4837c6 100644 --- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py +++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py @@ -103,7 +103,7 @@ class ResNet50Test(tf.test.TestCase): images, labels = random_batch(2) train_one_step(model, images, labels, optimizer) self.assertEqual(320, len(model.variables)) - events = summary_test_util.events_from_file(logdir) + events = summary_test_util.events_from_logdir(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'loss') diff --git a/tensorflow/contrib/eager/python/metrics_test.py b/tensorflow/contrib/eager/python/metrics_test.py index b4f5973bd1..96eb1b4f2a 100644 --- a/tensorflow/contrib/eager/python/metrics_test.py +++ b/tensorflow/contrib/eager/python/metrics_test.py @@ -72,7 +72,7 @@ class MetricsTest(test.TestCase): name="t0").as_default(), summary_ops.always_record_summaries(): m.result() # As a side-effect will write summaries. - events = summary_test_util.events_from_file(logdir) + events = summary_test_util.events_from_logdir(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].simple_value, 37.0) diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py index 09169fa6d7..c5ca054f77 100644 --- a/tensorflow/contrib/summary/summary_ops_test.py +++ b/tensorflow/contrib/summary/summary_ops_test.py @@ -79,7 +79,7 @@ class TargetTest(test_util.TensorFlowTestCase): summary_ops.scalar('scalar', 2.0) write() - events = summary_test_util.events_from_file(logdir) + events = summary_test_util.events_from_logdir(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].simple_value, 2.0) @@ -92,7 +92,7 @@ class TargetTest(test_util.TensorFlowTestCase): summary_ops.scalar('scalar', 2.0) - events = summary_test_util.events_from_file(logdir) + events = summary_test_util.events_from_logdir(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'scalar') @@ -105,7 +105,7 @@ class TargetTest(test_util.TensorFlowTestCase): summary_ops.scalar('scalar', 2.0, global_step=global_step) - events = summary_test_util.events_from_file(logdir) + events = summary_test_util.events_from_logdir(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'scalar') diff --git a/tensorflow/contrib/summary/summary_test_util.py b/tensorflow/contrib/summary/summary_test_util.py index 37b546d3ab..794c5b8bab 100644 --- a/tensorflow/contrib/summary/summary_test_util.py +++ b/tensorflow/contrib/summary/summary_test_util.py @@ -26,16 +26,37 @@ from tensorflow.python.lib.io import tf_record from tensorflow.python.platform import gfile -def events_from_file(logdir): - """Returns all events in the single eventfile in logdir.""" - assert gfile.Exists(logdir) - files = gfile.ListDirectory(logdir) - assert len(files) == 1, "Found more than one file in logdir: %s" % files - records = list( - tf_record.tf_record_iterator(os.path.join(logdir, files[0]))) +def events_from_file(filepath): + """Returns all events in a single event file. + + Args: + filepath: Path to the event file. + + Returns: + A list of all tf.Event protos in the event file. + """ + records = list(tf_record.tf_record_iterator(filepath)) result = [] for r in records: event = event_pb2.Event() event.ParseFromString(r) result.append(event) return result + + +def events_from_logdir(logdir): + """Returns all events in the single eventfile in logdir. + + Args: + logdir: The directory in which the single event file is sought. + + Returns: + A list of all tf.Event protos from the single event file. + + Raises: + AssertionError: If logdir does not contain exactly one file. + """ + assert gfile.Exists(logdir) + files = gfile.ListDirectory(logdir) + assert len(files) == 1, "Found not exactly one file in logdir: %s" % files + return events_from_file(os.path.join(logdir, files[0])) -- GitLab From de8453ff5d72ab64408e627ac9f4f184be3f9173 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Thu, 16 Nov 2017 10:31:07 -0800 Subject: [PATCH 0512/1801] Do not change the default graph in variable_scope when building a function. When building a TensorFlow function, we need precise control over the default graph. This change ensures that, when a function is being built, variable_scope preserves the default graph. PiperOrigin-RevId: 175983226 --- tensorflow/contrib/eager/python/BUILD | 1 + .../contrib/eager/python/network_test.py | 18 ++++++++++++++++++ tensorflow/python/ops/variable_scope.py | 10 ++++++++-- 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 92746b866a..bf2e883bc5 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -241,6 +241,7 @@ py_test( "//tensorflow/python:resource_variable_ops", "//tensorflow/python:training", "//tensorflow/python:variable_scope", + "//tensorflow/python/eager:function", "//tensorflow/python/eager:test", ], ) diff --git a/tensorflow/contrib/eager/python/network_test.py b/tensorflow/contrib/eager/python/network_test.py index 8718a8b522..e7835a63e6 100644 --- a/tensorflow/contrib/eager/python/network_test.py +++ b/tensorflow/contrib/eager/python/network_test.py @@ -20,6 +20,7 @@ import gc from tensorflow.contrib.eager.python import network from tensorflow.python.eager import context +from tensorflow.python.eager import function from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import errors_impl @@ -87,6 +88,23 @@ class NetworkTest(test.TestCase): result = net(constant_op.constant([[2.0]])) self.assertEqual(34.0, self.evaluate(result)) + # TODO(akshayka): This test should be changed once an API for compiling + # `call` into a defun is implemented. + def testReplacingNetworkCallWithDefun(self): + net = MyNetwork(name="abcd") + x = constant_op.constant([[2.0]]) + net(x) # Force variables to be created. + self.evaluate(net.trainable_variables[0].assign([[17.0]])) + + net.call = function.defun(net.call) + result = net(x) # Build and execute the TensorFlow function + self.assertEqual(34.0, self.evaluate(result)) + + # Force the creation of another TensorFlow function by changing input shape + y = constant_op.constant([[1.0], [2.0]]) + result = net(y) + self.assertAllEqual([[17.0], [34.0]], self.evaluate(result)) + # TODO(allenl): This test creates garbage in some Python versions @test_util.run_in_graph_and_eager_modes() def testNetworkSaveRestoreAlreadyBuilt(self): diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index 2cdf585503..91dea12da2 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -1828,7 +1828,13 @@ class variable_scope(object): # pylint: disable=invalid-name self._current_name_scope = None def __enter__(self): - if self._in_graph_mode: + # If the default graph is building a function, then we should not replace it + # with the cached graph. + if ops.get_default_graph().building_function: + self._building_function = True + else: + self._building_function = False + if self._in_graph_mode and not self._building_function: self._graph_context_manager = self._graph.as_default() self._graph_context_manager.__enter__() if self._cached_pure_variable_scope is not None: @@ -1907,7 +1913,7 @@ class variable_scope(object): # pylint: disable=invalid-name type_arg, value_arg, traceback_arg) if self._current_name_scope: self._current_name_scope.__exit__(type_arg, value_arg, traceback_arg) - if self._in_graph_mode: + if self._in_graph_mode and not self._building_function: self._graph_context_manager.__exit__(type_arg, value_arg, traceback_arg) -- GitLab From b20d11d36c3baa2e7c9b49d423a39c1e5cc0ceac Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 16 Nov 2017 10:34:42 -0800 Subject: [PATCH 0513/1801] Ops with no outputs in eager should return None instead of [] PiperOrigin-RevId: 175983704 --- tensorflow/python/eager/ops_test.py | 4 ++++ tensorflow/python/eager/python_eager_op_gen.cc | 2 ++ 2 files changed, 6 insertions(+) diff --git a/tensorflow/python/eager/ops_test.py b/tensorflow/python/eager/ops_test.py index 51550c9f51..70e23b9311 100644 --- a/tensorflow/python/eager/ops_test.py +++ b/tensorflow/python/eager/ops_test.py @@ -30,6 +30,7 @@ from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util from tensorflow.python.layers import core from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import sparse_ops @@ -349,6 +350,9 @@ class OpsTest(test_util.TensorFlowTestCase): x = constant_op.constant(3.1415) self.assertEqual('3.14', '{:.2f}'.format(x)) + def testNoOpIsNone(self): + self.assertTrue(control_flow_ops.no_op() is None) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/eager/python_eager_op_gen.cc b/tensorflow/python/eager/python_eager_op_gen.cc index 374894733a..956fbdac50 100644 --- a/tensorflow/python/eager/python_eager_op_gen.cc +++ b/tensorflow/python/eager/python_eager_op_gen.cc @@ -531,6 +531,8 @@ string GenEagerPythonOp::Code() { strings::StrAppend(&result_, " _result = _", op_def_.name(), "Output._make(_result)\n"); } + } else { + strings::StrAppend(&result_, " _result = None\n"); } strings::StrAppend(&result_, " return _result\n\n"); return prelude_ + result_; -- GitLab From c47aabb187d26be89d03901e8b8da9540a3709f0 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Thu, 16 Nov 2017 10:40:57 -0800 Subject: [PATCH 0514/1801] Watch variables when executing defun-compiled functions. Variables are automatically watched upon creation of a _GraphModeFunction; this change ensures that the same variables are watched during subsequent executions of it. PiperOrigin-RevId: 175984583 --- tensorflow/python/eager/function.py | 47 ++++++++++++++++++----- tensorflow/python/eager/function_test.py | 11 ++++++ tensorflow/python/eager/graph_callable.py | 35 +++-------------- 3 files changed, 54 insertions(+), 39 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index c542dd77a6..9bcd9c23c7 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -211,7 +211,7 @@ def _map_sequence_obj_to_idx(sequence): return {id(x): i for i, x in enumerate(sequence)} -class _GraphModeFunction(object): +class GraphModeFunction(object): """Callable object representing a graph-mode function. Args: @@ -232,10 +232,19 @@ class _GraphModeFunction(object): func_outputs structure. output_shapes: List of shapes of all tensors which are output by the internal function. + variables: (optional) List of variables to watch during function execution. """ - def __init__(self, input_placeholders, extra_inputs, fdef, graph, operations, - func_outputs, func_outputs_to_fdef_outputs, output_shapes): + def __init__(self, + input_placeholders, + extra_inputs, + fdef, + graph, + operations, + func_outputs, + func_outputs_to_fdef_outputs, + output_shapes, + variables=None): assert len(input_placeholders) == len(fdef.signature.input_arg), "%s %s" % ( len(input_placeholders), len(fdef.signature.input_arg)) self._input_placeholders = input_placeholders @@ -251,6 +260,11 @@ class _GraphModeFunction(object): func_outputs, (ops.Tensor, type(None))) else list(func_outputs) self._returns_to_fedf_outputs = func_outputs_to_fdef_outputs self._output_shapes = output_shapes + self._variables = variables if variables is not None else [] + + @property + def variables(self): + return self._variables def _compute_backprop(self): """Computes the backprop function object for this function.""" @@ -282,7 +296,7 @@ class _GraphModeFunction(object): ] + list(sorted(c.known_ops, key=lambda x: x.name)), all_inputs, backward_outputs) _register_with_name(_backward_name(self._func_name), backward_function_def) - self._backward_function = _GraphModeFunction( + self._backward_function = GraphModeFunction( all_inputs, [], backward_function_def, self._graph, c.known_ops, in_gradients, _map_sequence_obj_to_idx(backward_outputs), shapes) @@ -332,10 +346,15 @@ class _GraphModeFunction(object): def __call__(self, *args): """Executes the passed function in eager mode.""" + for v in self._variables: + if v._trainable: # pylint: disable=protected-access + tape.watch_variable(v) + tensor_inputs = [ x for x in nest.flatten(args) if isinstance(x, ops.Tensor) ] + if tape.should_record(tensor_inputs) or tape.should_record( self._extra_inputs): if not self._has_backprop: @@ -427,7 +446,11 @@ def _defun_internal(name, func, args, kwds): func_inputs = _get_defun_inputs(args) with capture_tensors(captures): - func_outputs = func(*func_inputs, **kwds) + tape.push_new_tape() + try: + func_outputs = func(*func_inputs, **kwds) + finally: + variables = tape.pop_tape().watched_variables() ids = list(sorted(captures.keys())) if ids: extra_inputs, extra_placeholders = zip(* [captures[x] for x in ids]) @@ -452,10 +475,16 @@ def _defun_internal(name, func, args, kwds): _register_with_name(f.name, f.definition) _register_with_name(_inference_name(name), inference_function_def) - return _GraphModeFunction( - all_inputs, extra_inputs, inference_function_def, tmp_graph, - tmp_graph.get_operations(), func_outputs, - _map_sequence_obj_to_idx(func_def_outputs), output_shapes) + return GraphModeFunction( + all_inputs, + extra_inputs, + inference_function_def, + tmp_graph, + tmp_graph.get_operations(), + func_outputs, + _map_sequence_obj_to_idx(func_def_outputs), + output_shapes, + variables=variables) # Defun uses this instead of Tensor as a cache key. Using dtype because diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 65776ca177..c55f2f1d59 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -96,6 +96,17 @@ class FunctionTest(test.TestCase): self.assertAllEqual(backprop.implicit_grad(f)()[0][0], 2.0) + def testDefunCanBeDifferentiatedTwice(self): + v = resource_variable_ops.ResourceVariable(1.0) + + @function.defun + def f(): + return v * v + + self.assertAllEqual(backprop.implicit_grad(f)()[0][0], 2.0) + # Ensure that v is watched again. + self.assertAllEqual(backprop.implicit_grad(f)()[0][0], 2.0) + def testGraphModeCaptureVariable(self): with context.graph_mode(), self.test_session() as sess: diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py index ce51d17cfc..837a75c808 100644 --- a/tensorflow/python/eager/graph_callable.py +++ b/tensorflow/python/eager/graph_callable.py @@ -165,32 +165,6 @@ class _VariableCapturingScope(object): yield -class _FunctionObject(function._GraphModeFunction): # pylint: disable=protected-access - """Captured graph-mode function with read-only variables. - - Calling this function object will read the current values of the variables and - pass them to the graph mode function, which will use them as constants. - """ - - def __init__(self, variables, placeholder_inputs, extra_inputs, fdef, - graph, operations, outputs, func_outputs_to_fdef_outputs, - output_shapes): - self._variables = variables - super(_FunctionObject, self).__init__( - placeholder_inputs, - extra_inputs, - fdef, - graph, - operations, - outputs, - func_outputs_to_fdef_outputs, - output_shapes) - - @property - def variables(self): - return [x.variable for x in self._variables] - - class _InitializingFunctionObject(object): """Responsible for deciding which version of func-to-object to call. @@ -355,7 +329,7 @@ def _graph_callable_internal(func, shape_and_dtypes): function._register_with_name(f.name, f.definition) # pylint: disable=protected-access function._register_with_name(function._inference_name(func.__name__), # pylint: disable=protected-access initializer_function_def) - initializer_function = function._GraphModeFunction( # pylint: disable=protected-access + initializer_function = function.GraphModeFunction( placeholder_inputs, extra_inputs, initializer_function_def, @@ -374,8 +348,8 @@ def _graph_callable_internal(func, shape_and_dtypes): capture_func_def_outputs) function._register_with_name(function._inference_name(func.__name__), # pylint: disable=protected-access captured_function_def) - captured_function = _FunctionObject( - sorted_variables, + + captured_function = function.GraphModeFunction( placeholder_inputs, extra_inputs, captured_function_def, @@ -383,7 +357,8 @@ def _graph_callable_internal(func, shape_and_dtypes): capturing_operations, captured_outputs, function._map_sequence_obj_to_idx(capture_func_def_outputs), # pylint: disable=protected-access - output_shapes) + output_shapes, + variables=[x.variable for x in sorted_variables]) return _InitializingFunctionObject(captured_function, initializer_function, shape_and_dtypes) -- GitLab From 230abb7565225c9cc3f19d6c3a67b636b27792a0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 10:51:11 -0800 Subject: [PATCH 0515/1801] Adds _var_scope_name property to FeatureColumn. PiperOrigin-RevId: 175986319 --- .../python/feature_column/feature_column.py | 26 +++++++---- .../feature_column/feature_column_test.py | 45 +++++++++++++++++-- 2 files changed, 59 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index 5ff7516246..5ee93be7c3 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -233,9 +233,8 @@ def input_layer(features, ordered_columns = [] for column in sorted(feature_columns, key=lambda x: x.name): ordered_columns.append(column) - # TODO(b/67952670): Implement a column._var_scope_name property and use - # that instead of column.name. - with variable_scope.variable_scope(None, default_name=column.name): + with variable_scope.variable_scope( + None, default_name=column._var_scope_name): # pylint: disable=protected-access tensor = column._get_dense_tensor( # pylint: disable=protected-access builder, weight_collections=weight_collections, @@ -342,9 +341,8 @@ def linear_model(features, ordered_columns = [] builder = _LazyBuilder(features) for column in sorted(feature_columns, key=lambda x: x.name): - # TODO(b/67952670): Implement a column._var_scope_name property and use - # that instead of column.name. - with variable_scope.variable_scope(None, default_name=column.name): + with variable_scope.variable_scope( + None, default_name=column._var_scope_name): # pylint: disable=protected-access ordered_columns.append(column) if isinstance(column, _CategoricalColumn): weighted_sum = _create_categorical_column_weighted_sum( @@ -659,7 +657,8 @@ def _shared_embedding_columns( `1/sqrt(dimension)`. shared_embedding_collection_name: Optional name of the collection where shared embedding weights are added. If not given, a reasonable name will - be chosen based on the names of `categorical_columns`. + be chosen based on the names of `categorical_columns`. This is also used + in `variable_scope` when creating shared embedding weights. ckpt_to_load_from: String representing checkpoint name/pattern from which to restore column weights. Required if `tensor_name_in_ckpt` is not `None`. tensor_name_in_ckpt: Name of the `Tensor` in `ckpt_to_load_from` from @@ -1463,9 +1462,14 @@ class _FeatureColumn(object): @abc.abstractproperty def name(self): - """Returns string. used for variable_scope and naming.""" + """Returns string. Used for naming.""" pass + @property + def _var_scope_name(self): + """Returns string. Used for variable_scope. Defaults to self.name.""" + return self.name + @abc.abstractmethod def _transform_feature(self, inputs): """Returns intermediate representation (usually a `Tensor`). @@ -2018,6 +2022,10 @@ class _EmbeddingColumn( self._name = '{}_embedding'.format(self.categorical_column.name) return self._name + @property + def _var_scope_name(self): + return self.shared_embedding_collection_name or self.name + @property def _parse_example_spec(self): return self.categorical_column._parse_example_spec # pylint: disable=protected-access @@ -2063,7 +2071,7 @@ class _EmbeddingColumn( embedding_weights.shape, embedding_shape)) else: embedding_weights = variable_scope.get_variable( - name=self.shared_embedding_collection_name + '_weights', + name='embedding_weights', shape=embedding_shape, dtype=dtypes.float32, initializer=self.initializer, diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index 4b06a85ad3..9981f358b1 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -169,6 +169,8 @@ class NumericColumnTest(test.TestCase): def test_defaults(self): a = fc.numeric_column('aaa') self.assertEqual('aaa', a.key) + self.assertEqual('aaa', a.name) + self.assertEqual('aaa', a._var_scope_name) self.assertEqual((1,), a.shape) self.assertIsNone(a.default_value) self.assertEqual(dtypes.float32, a.dtype) @@ -370,6 +372,11 @@ class BucketizedColumnTest(test.TestCase): b = fc.bucketized_column(a, boundaries=[0, 1]) self.assertEqual('aaa_bucketized', b.name) + def test_var_scope_name(self): + a = fc.numeric_column('aaa', dtype=dtypes.int32) + b = fc.bucketized_column(a, boundaries=[0, 1]) + self.assertEqual('aaa_bucketized', b._var_scope_name) + def test_parse_spec(self): a = fc.numeric_column('aaa', shape=[2], dtype=dtypes.int32) b = fc.bucketized_column(a, boundaries=[0, 1]) @@ -557,6 +564,7 @@ class HashedCategoricalColumnTest(test.TestCase): def test_defaults(self): a = fc.categorical_column_with_hash_bucket('aaa', 10) self.assertEqual('aaa', a.name) + self.assertEqual('aaa', a._var_scope_name) self.assertEqual('aaa', a.key) self.assertEqual(10, a.hash_bucket_size) self.assertEqual(dtypes.string, a.dtype) @@ -819,6 +827,14 @@ class CrossedColumnTest(test.TestCase): crossed2 = fc.crossed_column([crossed1, 'd1', b], 10) self.assertEqual('a_bucketized_X_c_X_d1_X_d2', crossed2.name) + def test_var_scope_name(self): + a = fc.numeric_column('a', dtype=dtypes.int32) + b = fc.bucketized_column(a, boundaries=[0, 1]) + crossed1 = fc.crossed_column(['d1', 'd2'], 10) + + crossed2 = fc.crossed_column([b, 'c', crossed1], 10) + self.assertEqual('a_bucketized_X_c_X_d1_X_d2', crossed2._var_scope_name) + def test_parse_spec(self): a = fc.numeric_column('a', shape=[2], dtype=dtypes.int32) b = fc.bucketized_column(a, boundaries=[0, 1]) @@ -2189,6 +2205,8 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): column = fc.categorical_column_with_vocabulary_file( key='aaa', vocabulary_file='path_to_file', vocabulary_size=3) self.assertEqual('aaa', column.name) + self.assertEqual('aaa', column._var_scope_name) + self.assertEqual('aaa', column.key) self.assertEqual(3, column._num_buckets) self.assertEqual({ 'aaa': parsing_ops.VarLenFeature(dtypes.string) @@ -2572,6 +2590,8 @@ class VocabularyListCategoricalColumnTest(test.TestCase): column = fc.categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) self.assertEqual('aaa', column.name) + self.assertEqual('aaa', column.key) + self.assertEqual('aaa', column._var_scope_name) self.assertEqual(3, column._num_buckets) self.assertEqual({ 'aaa': parsing_ops.VarLenFeature(dtypes.string) @@ -2581,6 +2601,8 @@ class VocabularyListCategoricalColumnTest(test.TestCase): column = fc.categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=(12, 24, 36)) self.assertEqual('aaa', column.name) + self.assertEqual('aaa', column.key) + self.assertEqual('aaa', column._var_scope_name) self.assertEqual(3, column._num_buckets) self.assertEqual({ 'aaa': parsing_ops.VarLenFeature(dtypes.int64) @@ -2934,6 +2956,8 @@ class IdentityCategoricalColumnTest(test.TestCase): def test_constructor(self): column = fc.categorical_column_with_identity(key='aaa', num_buckets=3) self.assertEqual('aaa', column.name) + self.assertEqual('aaa', column.key) + self.assertEqual('aaa', column._var_scope_name) self.assertEqual(3, column._num_buckets) self.assertEqual({ 'aaa': parsing_ops.VarLenFeature(dtypes.int64) @@ -3218,11 +3242,15 @@ class IndicatorColumnTest(test.TestCase): a = fc.categorical_column_with_hash_bucket('a', 4) indicator_a = fc.indicator_column(a) self.assertEqual(indicator_a.categorical_column.name, 'a') + self.assertEqual(indicator_a.name, 'a_indicator') + self.assertEqual(indicator_a._var_scope_name, 'a_indicator') self.assertEqual(indicator_a._variable_shape, [1, 4]) b = fc.categorical_column_with_hash_bucket('b', hash_bucket_size=100) indicator_b = fc.indicator_column(b) self.assertEqual(indicator_b.categorical_column.name, 'b') + self.assertEqual(indicator_b.name, 'b_indicator') + self.assertEqual(indicator_b._var_scope_name, 'b_indicator') self.assertEqual(indicator_b._variable_shape, [1, 100]) def test_1D_shape_succeeds(self): @@ -3409,6 +3437,7 @@ class EmbeddingColumnTest(test.TestCase): self.assertIsNone(embedding_column.max_norm) self.assertTrue(embedding_column.trainable) self.assertEqual('aaa_embedding', embedding_column.name) + self.assertEqual('aaa_embedding', embedding_column._var_scope_name) self.assertEqual( (embedding_dimension,), embedding_column._variable_shape) self.assertEqual({ @@ -3434,6 +3463,7 @@ class EmbeddingColumnTest(test.TestCase): self.assertEqual(42., embedding_column.max_norm) self.assertFalse(embedding_column.trainable) self.assertEqual('aaa_embedding', embedding_column.name) + self.assertEqual('aaa_embedding', embedding_column._var_scope_name) self.assertEqual( (embedding_dimension,), embedding_column._variable_shape) self.assertEqual({ @@ -4016,6 +4046,10 @@ class SharedEmbeddingColumnTest(test.TestCase): self.assertTrue(embedding_column_b.trainable) self.assertEqual('aaa_shared_embedding', embedding_column_a.name) self.assertEqual('bbb_shared_embedding', embedding_column_b.name) + self.assertEqual( + 'aaa_bbb_shared_embedding', embedding_column_a._var_scope_name) + self.assertEqual( + 'aaa_bbb_shared_embedding', embedding_column_b._var_scope_name) self.assertEqual( (embedding_dimension,), embedding_column_a._variable_shape) self.assertEqual( @@ -4065,6 +4099,10 @@ class SharedEmbeddingColumnTest(test.TestCase): self.assertFalse(embedding_column_b.trainable) self.assertEqual('aaa_shared_embedding', embedding_column_a.name) self.assertEqual('bbb_shared_embedding', embedding_column_b.name) + self.assertEqual( + 'shared_embedding_collection_name', embedding_column_a._var_scope_name) + self.assertEqual( + 'shared_embedding_collection_name', embedding_column_b._var_scope_name) self.assertEqual( (embedding_dimension,), embedding_column_a._variable_shape) self.assertEqual( @@ -4231,15 +4269,15 @@ class SharedEmbeddingColumnTest(test.TestCase): # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertItemsEqual( - ['input_layer/aaa_shared_embedding/aaa_bbb_shared_embedding_weights:0'], + ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'], tuple([v.name for v in global_vars])) trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) self.assertItemsEqual( - ['input_layer/aaa_shared_embedding/aaa_bbb_shared_embedding_weights:0'], + ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'], tuple([v.name for v in trainable_vars])) shared_embedding_vars = ops.get_collection('aaa_bbb_shared_embedding') self.assertItemsEqual( - ['input_layer/aaa_shared_embedding/aaa_bbb_shared_embedding_weights:0'], + ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'], tuple([v.name for v in shared_embedding_vars])) with _initialized_session(): self.assertAllEqual(embedding_values, trainable_vars[0].eval()) @@ -4254,6 +4292,7 @@ class WeightedCategoricalColumnTest(test.TestCase): key='ids', num_buckets=3), weight_feature_key='values') self.assertEqual('ids_weighted_by_values', column.name) + self.assertEqual('ids_weighted_by_values', column._var_scope_name) self.assertEqual(3, column._num_buckets) self.assertEqual({ 'ids': parsing_ops.VarLenFeature(dtypes.int64), -- GitLab From 7e2bac6b8d75b810493415f0b06c8d9408f7858c Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Thu, 16 Nov 2017 10:59:47 -0800 Subject: [PATCH 0516/1801] Fix ci_parameterized_build.sh --- tensorflow/tools/ci_build/ci_parameterized_build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh index c27f4953e3..2217b110e3 100755 --- a/tensorflow/tools/ci_build/ci_parameterized_build.sh +++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh @@ -546,8 +546,8 @@ echo "" TMP_DIR="" DOCKERFILE_FLAG="" -if [[ "${TF_BUILD_PYTHON_VERSION}" == "python3.5" ] || - ["${TF_BUILD_PYTHON_VERSION}" == "python3.6" ]]; then +if [[ "${TF_BUILD_PYTHON_VERSION}" == "python3.5" ]] || + [[ "${TF_BUILD_PYTHON_VERSION}" == "python3.6" ]]; then # Modify Dockerfile for Python3.5 | Python3.6 build TMP_DIR=$(mktemp -d) echo "Docker build will occur in temporary directory: ${TMP_DIR}" -- GitLab From 9700f7f716be8c4650a5dbdb48d510327fb2336e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 11:06:43 -0800 Subject: [PATCH 0517/1801] Fixed typo in comment. PiperOrigin-RevId: 175988884 --- tensorflow/core/ops/nn_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index de059a3e7e..e245c8ba91 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -2290,7 +2290,7 @@ REGISTER_OP("NthElement") return Status::OK(); }) .Doc(R"doc( -Finds values of the `n`-th order statistic for the last dmension. +Finds values of the `n`-th order statistic for the last dimension. If the input is a vector (rank-1), finds the entries which is the nth-smallest value in the vector and outputs their values as scalar tensor. -- GitLab From e47032ece9b5fb8f5683e1eedb8ee8870bd48022 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 16 Nov 2017 11:18:10 -0800 Subject: [PATCH 0518/1801] Execution callbacks monkey-patch the execute function. PiperOrigin-RevId: 175990829 --- tensorflow/python/eager/execute.py | 14 ++++++++++---- tensorflow/python/eager/execution_callbacks.py | 2 ++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/eager/execute.py b/tensorflow/python/eager/execute.py index e392c6bb53..306cf07aab 100644 --- a/tensorflow/python/eager/execute.py +++ b/tensorflow/python/eager/execute.py @@ -30,7 +30,7 @@ from tensorflow.python.framework import tensor_shape from tensorflow.python.util import compat -def execute(op_name, num_outputs, inputs, attrs, ctx, name=None): +def quick_execute(op_name, num_outputs, inputs, attrs, ctx, name=None): """Execute a TensorFlow operation. Args: @@ -64,16 +64,22 @@ def execute(op_name, num_outputs, inputs, attrs, ctx, name=None): else: message = e.message six.raise_from(core._status_to_exception(e.code, message), None) - # pylint: enable=protected-access - # TODO(cais): Optimize this, perhaps by replacing this execute function with - # a different one when there are execution callback(s). + return tensors + + +def execute_with_callbacks(op_name, num_outputs, inputs, attrs, ctx, name=None): + """Monkey-patch to execute to enable execution callbacks.""" + tensors = quick_execute(op_name, num_outputs, inputs, attrs, ctx, name) for callback in ctx.post_execution_callbacks: callback(op_name, name, attrs, inputs, tensors) return tensors +execute = quick_execute + + def record_gradient(unused_op_name, unused_inputs, unused_attrs, unused_results, unused_name): """Import backprop if you want gradients recorded.""" diff --git a/tensorflow/python/eager/execution_callbacks.py b/tensorflow/python/eager/execution_callbacks.py index 6b0e7f5c3f..2f1654dda4 100644 --- a/tensorflow/python/eager/execution_callbacks.py +++ b/tensorflow/python/eager/execution_callbacks.py @@ -25,6 +25,7 @@ import numpy as np from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import context from tensorflow.python.eager import core +from tensorflow.python.eager import execute from tensorflow.python.platform import tf_logging as logging _DEFAULT_CALLBACK_ACTION = "raise" @@ -249,6 +250,7 @@ def add_execution_callback(callback): `outputs` is the `list` of output `Tensor`(s) from the op. Return value(s) from the callback are ignored. """ + execute.execute = execute.execute_with_callbacks context.get_default_context().add_post_execution_callback(callback) -- GitLab From 7065160c6c67499df859012c55545218aa6a549a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 11:20:21 -0800 Subject: [PATCH 0519/1801] Plumb worker max_wait_secs arguments up to tf.contrib.train.train. PiperOrigin-RevId: 175991159 --- .../training/python/training/training.py | 10 +++++++-- .../python/training/monitored_session.py | 22 +++++++++++++++---- ...orflow.train.-worker-session-creator.pbtxt | 2 +- .../tools/api/golden/tensorflow.train.pbtxt | 2 +- 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/training/python/training/training.py b/tensorflow/contrib/training/python/training/training.py index 6a4d79796d..eee2b88812 100644 --- a/tensorflow/contrib/training/python/training/training.py +++ b/tensorflow/contrib/training/python/training/training.py @@ -483,7 +483,8 @@ def train(train_op, chief_only_hooks=None, save_checkpoint_secs=600, save_summaries_steps=100, - config=None): + config=None, + max_wait_secs=7200): """Runs the training loop. Args: @@ -506,6 +507,10 @@ def train(train_op, `save_summaries_steps` is set to `None`, then the default summary saver isn't used. config: An instance of `tf.ConfigProto`. + max_wait_secs: Maximum time workers should wait for the session to + become available. This should be kept relatively short to help detect + incorrect code, but sometimes may need to be increased if the chief takes + a while to start up. Returns: the value of the loss function after training. @@ -532,7 +537,8 @@ def train(train_op, chief_only_hooks=chief_only_hooks, save_checkpoint_secs=save_checkpoint_secs, save_summaries_steps=save_summaries_steps, - config=config) as session: + config=config, + max_wait_secs=max_wait_secs) as session: loss = None while not session.should_stop(): loss = session.run(train_op) diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py index 1f6016a91b..e931555470 100644 --- a/tensorflow/python/training/monitored_session.py +++ b/tensorflow/python/training/monitored_session.py @@ -281,7 +281,8 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name save_summaries_secs=USE_DEFAULT, config=None, stop_grace_period_secs=120, - log_step_count_steps=100): + log_step_count_steps=100, + max_wait_secs=7200): """Creates a `MonitoredSession` for training. For a chief, this utility sets proper session initializer/restorer. It also @@ -320,6 +321,10 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name `close()` has been called. log_step_count_steps: The frequency, in number of global steps, that the global step/sec is logged. + max_wait_secs: Maximum time workers should wait for the session to + become available. This should be kept relatively short to help detect + incorrect code, but sometimes may need to be increased if the chief takes + a while to start up. Returns: A `MonitoredSession` object. @@ -335,7 +340,10 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name scaffold = scaffold or Scaffold() if not is_chief: session_creator = WorkerSessionCreator( - scaffold=scaffold, master=master, config=config) + scaffold=scaffold, + master=master, + config=config, + max_wait_secs=max_wait_secs) return MonitoredSession(session_creator=session_creator, hooks=hooks or [], stop_grace_period_secs=stop_grace_period_secs) @@ -434,7 +442,11 @@ class ChiefSessionCreator(SessionCreator): class WorkerSessionCreator(SessionCreator): """Creates a tf.Session for a worker.""" - def __init__(self, scaffold=None, master='', config=None): + def __init__(self, + scaffold=None, + master='', + config=None, + max_wait_secs=30 * 60): """Initializes a worker session creator. Args: @@ -442,11 +454,13 @@ class WorkerSessionCreator(SessionCreator): not specified a default one is created. It's used to finalize the graph. master: `String` representation of the TensorFlow master to use. config: `ConfigProto` proto used to configure the session. + max_wait_secs: Maximum time to wait for the session to become available. """ self._scaffold = scaffold or Scaffold() self._session_manager = None self._master = master self._config = config + self._max_wait_secs = max_wait_secs def _get_session_manager(self): if self._session_manager: @@ -463,7 +477,7 @@ class WorkerSessionCreator(SessionCreator): self._scaffold.finalize() return self._get_session_manager().wait_for_session( self._master, config=self._config, - max_wait_secs=30 * 60 # Wait up to 30 mins for the session to be ready. + max_wait_secs=self._max_wait_secs ) diff --git a/tensorflow/tools/api/golden/tensorflow.train.-worker-session-creator.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-worker-session-creator.pbtxt index 140407651a..ac26358068 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-worker-session-creator.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-worker-session-creator.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'scaffold\', \'master\', \'config\'], varargs=None, keywords=None, defaults=[\'None\', \'\', \'None\'], " + argspec: "args=[\'self\', \'scaffold\', \'master\', \'config\', \'max_wait_secs\'], varargs=None, keywords=None, defaults=[\'None\', \'\', \'None\', \'1800\'], " } member_method { name: "create_session" diff --git a/tensorflow/tools/api/golden/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.pbtxt index e73f6f6e63..3ffc640730 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.pbtxt @@ -234,7 +234,7 @@ tf_module { } member_method { name: "MonitoredTrainingSession" - argspec: "args=[\'master\', \'is_chief\', \'checkpoint_dir\', \'scaffold\', \'hooks\', \'chief_only_hooks\', \'save_checkpoint_secs\', \'save_summaries_steps\', \'save_summaries_secs\', \'config\', \'stop_grace_period_secs\', \'log_step_count_steps\'], varargs=None, keywords=None, defaults=[\'\', \'True\', \'None\', \'None\', \'None\', \'None\', \'600\', \'\', \'\', \'None\', \'120\', \'100\'], " + argspec: "args=[\'master\', \'is_chief\', \'checkpoint_dir\', \'scaffold\', \'hooks\', \'chief_only_hooks\', \'save_checkpoint_secs\', \'save_summaries_steps\', \'save_summaries_secs\', \'config\', \'stop_grace_period_secs\', \'log_step_count_steps\', \'max_wait_secs\'], varargs=None, keywords=None, defaults=[\'\', \'True\', \'None\', \'None\', \'None\', \'None\', \'600\', \'\', \'\', \'None\', \'120\', \'100\', \'7200\'], " } member_method { name: "NewCheckpointReader" -- GitLab From 7a73ce7a147a37cfb84479d3f67fed0c06115b6b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 11:29:24 -0800 Subject: [PATCH 0520/1801] Revise tf.substr so that it returns an empty string when "pos" argument is equal to the string length. The updated behavior is consistent with strings::substr() PiperOrigin-RevId: 175992569 --- tensorflow/core/kernels/substr_op.cc | 8 ++++---- .../python/kernel_tests/substr_op_test.py | 17 ++++++++++++++--- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/kernels/substr_op.cc b/tensorflow/core/kernels/substr_op.cc index 5c72c9e1ae..743f113150 100644 --- a/tensorflow/core/kernels/substr_op.cc +++ b/tensorflow/core/kernels/substr_op.cc @@ -66,7 +66,7 @@ class SubstrOp : public OpKernel { for (size_t i = 0; i < input_tensor.NumElements(); ++i) { string in = input(i); OP_REQUIRES( - context, FastBoundsCheck(pos, in.size()), + context, FastBoundsCheck(pos, in.size() + 1), errors::InvalidArgument("pos ", pos, " out of range for string", "b'", in, "' at index ", i)); output(i) = in.substr(pos, len); @@ -80,7 +80,7 @@ class SubstrOp : public OpKernel { const T pos = tensorflow::internal::SubtleMustCopy(pos_flat(i)); const T len = tensorflow::internal::SubtleMustCopy(len_flat(i)); OP_REQUIRES( - context, FastBoundsCheck(pos, in.size()), + context, FastBoundsCheck(pos, in.size() + 1), errors::InvalidArgument("pos ", pos, " out of range for string", "b'", in, "' at index ", i)); output(i) = in.substr(pos, len); @@ -146,7 +146,7 @@ class SubstrOp : public OpKernel { const T pos = tensorflow::internal::SubtleMustCopy(pos_bcast(i)); const T len = tensorflow::internal::SubtleMustCopy(len_bcast(i)); OP_REQUIRES( - context, FastBoundsCheck(pos, input_bcast(i).size()), + context, FastBoundsCheck(pos, input_bcast(i).size() + 1), errors::InvalidArgument("pos ", pos, " out of range for string", "b'", in, "' at index ", i)); output(i) = in.substr(pos, len); @@ -197,7 +197,7 @@ class SubstrOp : public OpKernel { tensorflow::internal::SubtleMustCopy(pos_bcast(i, j)); const T len = tensorflow::internal::SubtleMustCopy(len_bcast(i, j)); - OP_REQUIRES(context, FastBoundsCheck(pos, in.size()), + OP_REQUIRES(context, FastBoundsCheck(pos, in.size() + 1), errors::InvalidArgument( "pos ", pos, " out of range for ", "string b'", in, "' at index (", i, ", ", j, ")")); diff --git a/tensorflow/python/kernel_tests/substr_op_test.py b/tensorflow/python/kernel_tests/substr_op_test.py index 854394b0dd..73ac71e1f5 100644 --- a/tensorflow/python/kernel_tests/substr_op_test.py +++ b/tensorflow/python/kernel_tests/substr_op_test.py @@ -38,6 +38,17 @@ class SubstrOpTest(test.TestCase): substr = substr_op.eval() self.assertAllEqual(substr, expected_value) + # position is equal to the length of string. + test_string = b"" + position = np.array(0, dtype) + length = np.array(2, dtype) + expected_value = b"" + + substr_op = string_ops.substr(test_string, position, length) + with self.test_session(): + substr = substr_op.eval() + self.assertAllEqual(substr, expected_value) + def _testVectorStrings(self, dtype): test_string = [b"Hello", b"World"] position = np.array(1, dtype) @@ -136,7 +147,7 @@ class SubstrOpTest(test.TestCase): # Vector/Scalar test_string = [b"good", b"good", b"bad", b"good"] - position = np.array(3, dtype) + position = np.array(4, dtype) length = np.array(1, dtype) substr_op = string_ops.substr(test_string, position, length) with self.test_session(): @@ -155,7 +166,7 @@ class SubstrOpTest(test.TestCase): # Matrix/Matrix test_string = [[b"good", b"good", b"good"], [b"good", b"good", b"bad"], [b"good", b"good", b"good"]] - position = np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3]], dtype) + position = np.array([[1, 2, 3], [1, 2, 4], [1, 2, 3]], dtype) length = np.array([[3, 2, 1], [1, 2, 3], [2, 2, 2]], dtype) substr_op = string_ops.substr(test_string, position, length) with self.test_session(): @@ -164,7 +175,7 @@ class SubstrOpTest(test.TestCase): # Broadcast test_string = [[b"good", b"good", b"good"], [b"good", b"good", b"bad"]] - position = np.array([1, 2, 3], dtype) + position = np.array([1, 2, 4], dtype) length = np.array([1, 2, 3], dtype) substr_op = string_ops.substr(test_string, position, length) with self.test_session(): -- GitLab From 1962f68c61f063b9b3d377e2d2ff875a7b5785f4 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 16 Nov 2017 11:36:20 -0800 Subject: [PATCH 0521/1801] Make CPU's IrEmitter::hlo_to_profile_idx_ a value I think the performance advantages of keeping it as a maybe-null pointer are minimal, and it instead complicates the signature generation code. For example, the code to generate calls to __xla_cpu_runtime_ParallelForkJoin is buggy when hlo_to_profile_idx_ is nullptr today. This bug isn't visible today because we always have hlo_to_profile_idx_ as nullptr in JIT mode and in AOT mode we don't parallelize Hlo operations. PiperOrigin-RevId: 175993645 --- .../compiler/xla/service/cpu/cpu_compiler.cc | 8 +++--- .../compiler/xla/service/cpu/ir_emitter.cc | 25 +++++++------------ .../compiler/xla/service/cpu/ir_emitter.h | 14 +++++------ 3 files changed, 21 insertions(+), 26 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index def801d9d6..2b8927f953 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -538,7 +538,7 @@ StatusOr> CpuCompiler::Compile( } IrEmitter ir_emitter(*module, *assignment, llvm_module.get(), - &hlo_to_profile_idx, jit->target_machine(), + std::move(hlo_to_profile_idx), jit->target_machine(), jit->external_constant_pool()); std::unique_ptr> function_names( @@ -618,7 +618,7 @@ StatusOr> CpuCompiler::Compile( // GetEmbeddedComputations guarantees that a called computation occurs // before a caller computation. IrEmitter ir_emitter(*module, *assignment, llvm_module.get(), - &hlo_to_profile_idx, jit->target_machine(), + std::move(hlo_to_profile_idx), jit->target_machine(), jit->external_constant_pool()); for (auto embedded_computation : @@ -787,7 +787,9 @@ CpuCompiler::CompileAheadOfTime(std::vector> modules, } IrEmitter ir_emitter(*module, *assignment, &llvm_module, - /*hlo_to_profile_idx=*/nullptr, target_machine.get(), + /*hlo_to_profile_idx=*/ + std::unordered_map{}, + target_machine.get(), /*external_constant_pool=*/nullptr); HloComputation* computation = module->entry_computation(); for (auto embedded_computation : diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index e547f291b8..ba4cf14d64 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -76,14 +76,14 @@ namespace cpu { IrEmitter::IrEmitter( const HloModule& hlo_module, const BufferAssignment& assignment, llvm::Module* llvm_module, - const std::unordered_map* hlo_to_profile_idx, + std::unordered_map hlo_to_profile_idx, llvm::TargetMachine* target_machine, ExternalConstantPool* external_constant_pool) : assignment_(assignment), module_(llvm_module), arch_type_(llvm::Triple(llvm_module->getTargetTriple()).getArch()), ir_builder_(llvm_module->getContext()), - hlo_to_profile_idx_(hlo_to_profile_idx), + hlo_to_profile_idx_(std::move(hlo_to_profile_idx)), alias_analysis_(hlo_module, assignment, &llvm_module->getContext()), hlo_module_config_(hlo_module.config()), parallel_cpu_backend_( @@ -214,9 +214,7 @@ void IrEmitter::InitializeIrFunction(const string& function_name) { if (num_dynamic_loop_bounds_ > 0) { (++arg_iter)->setName("dynamic_loop_bounds"); } - if (hlo_to_profile_idx_) { - (++arg_iter)->setName("prof_counters"); - } + (++arg_iter)->setName("prof_counters"); // We know a-priori that the function arguments are guaranteed to point to // disjoint objects. @@ -2642,19 +2640,16 @@ Status IrEmitter::FinishVisit(HloInstruction* root) { llvm::Value* IrEmitter::GetProfileCounterFor(const HloInstruction* hlo) { string counter_name; size_t prof_counter_idx; - if (!hlo_to_profile_idx_) { - return nullptr; - } if (hlo) { - auto it = hlo_to_profile_idx_->find(hlo); - if (it == hlo_to_profile_idx_->end()) { + auto it = hlo_to_profile_idx_.find(hlo); + if (it == hlo_to_profile_idx_.end()) { return nullptr; } prof_counter_idx = it->second; counter_name = IrName("prof_counter", hlo->name()); } else { - prof_counter_idx = hlo_to_profile_idx_->size(); + prof_counter_idx = hlo_to_profile_idx_.size(); counter_name = "prof_counter.computation"; } return ir_builder_.CreateGEP(GetProfileCountersArgument(), @@ -2733,7 +2728,7 @@ void IrEmitter::ProfilingState::RecordCompleteComputation( Status IrEmitter::Preprocess(HloInstruction* hlo) { VLOG(3) << "Visiting: " << hlo->ToString(); - if (hlo_to_profile_idx_ && hlo_to_profile_idx_->count(hlo)) { + if (hlo_to_profile_idx_.count(hlo)) { profiling_state_.RecordCycleStart(&ir_builder_, hlo); } return Status::OK(); @@ -2785,9 +2780,7 @@ std::vector IrEmitter::GetComputeFunctionParams() { if (num_dynamic_loop_bounds_ > 0) { compute_function_params.push_back(i64_ptr_type); } - if (hlo_to_profile_idx_) { - compute_function_params.push_back(i64_ptr_type); - } + compute_function_params.push_back(i64_ptr_type); return compute_function_params; } @@ -2797,7 +2790,7 @@ llvm::Argument* IrEmitter::GetResultArgument() { llvm::Argument* IrEmitter::GetProfileCountersArgument() { const int64 arg_index = num_dynamic_loop_bounds_ > 0 ? 5 : 4; - return hlo_to_profile_idx_ ? GetArg(compute_function_, arg_index) : nullptr; + return GetArg(compute_function_, arg_index); } llvm::Value* IrEmitter::GetTempBuffersArgument() { diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h index 83eded5ad8..14f98867a4 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h @@ -108,12 +108,12 @@ class IrEmitter : public DfsHloVisitorWithDefault { // external_constant_pool: if non-null, points to an ExternalConstantPool // instance into which the Ir emitter can spill // constants. - IrEmitter(const HloModule& hlo_module, const BufferAssignment& assignment, - llvm::Module* llvm_module, - const std::unordered_map* - hlo_to_profile_idx, - llvm::TargetMachine* target_machine, - ExternalConstantPool* external_constant_pool); + IrEmitter( + const HloModule& hlo_module, const BufferAssignment& assignment, + llvm::Module* llvm_module, + std::unordered_map hlo_to_profile_idx, + llvm::TargetMachine* target_machine, + ExternalConstantPool* external_constant_pool); ~IrEmitter() override; // Emit and return the given HLO computation as an LLVM IR @@ -473,7 +473,7 @@ class IrEmitter : public DfsHloVisitorWithDefault { llvm::IRBuilder<> ir_builder_; // Maps HLOs to their index into the profile counter array. - const std::unordered_map* hlo_to_profile_idx_; + std::unordered_map hlo_to_profile_idx_; // Maps HLOs to Values emitted for them. std::unordered_map emitted_value_; -- GitLab From 23cd49298153458a8dcb3c65232b94427062a511 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 12:00:55 -0800 Subject: [PATCH 0522/1801] Internal Change PiperOrigin-RevId: 175997220 --- tensorflow/contrib/lite/python/lite.py | 99 +++++++++++++++----------- 1 file changed, 56 insertions(+), 43 deletions(-) diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py index 5e8edbb937..dcae16d5ae 100644 --- a/tensorflow/contrib/lite/python/lite.py +++ b/tensorflow/contrib/lite/python/lite.py @@ -32,8 +32,8 @@ from tensorflow.contrib.lite.toco import model_flags_pb2 as _model_flags_pb2 from tensorflow.contrib.lite.toco import toco_flags_pb2 as _toco_flags_pb2 from tensorflow.contrib.lite.toco.python.tensorflow_wrap_toco import TocoConvert as _toco_convert_protos from tensorflow.python.framework import dtypes as _dtypes -# from tensorflow.python.platform import -# resource_loader as _resource_loader +from tensorflow.python.platform import resource_loader as _resource_loader +from tensorflow.python.util.all_util import remove_undocumented # Enum types from the protobuf promoted to the API FLOAT = _toco_flags_pb2.FLOAT @@ -46,16 +46,22 @@ TFLITE = _toco_flags_pb2.TFLITE GRAPHVIZ_DOT = _toco_flags_pb2.GRAPHVIZ_DOT # Currently the default mode of operation is to shell to another python process -# to protect against crashes. +# to protect against crashes. However, it breaks some dependent targets because +# it forces us to depend on an external py_binary. The experimental API doesn't +# have that drawback. EXPERIMENTAL_USE_TOCO_API_DIRECTLY = True # Find the toco_from_protos binary using the resource loader if using from # bazel, otherwise we are in a pip where console_scripts already has # the toco_from_protos tool. -# toco_from_proto_bin = _resource_loader.get_path_to_datafile( -# "../toco/python/toco_from_protos") -# if not os.path.exists(toco_from_proto_bin): -# toco_from_proto_bin = "toco_from_protos" +if EXPERIMENTAL_USE_TOCO_API_DIRECTLY: + _toco_from_proto_bin = "" +else: + _toco_from_proto_bin = _resource_loader.get_path_to_datafile( + "../toco/python/toco_from_protos") + +if _toco_from_proto_bin and not os.path.exists(_toco_from_proto_bin): + _toco_from_proto_bin = "toco_from_protos" def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str): @@ -78,39 +84,39 @@ def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str): """ # TODO(aselle): When toco does not use fatal errors for failure, we can # switch this on. - if EXPERIMENTAL_USE_TOCO_API_DIRECTLY: + if not _toco_from_proto_bin: return _toco_convert_protos(model_flags_str, toco_flags_str, input_data_str) - # with tempfile.NamedTemporaryFile() as fp_toco, \ - # tempfile.NamedTemporaryFile() as fp_model, \ - # tempfile.NamedTemporaryFile() as fp_input, \ - # tempfile.NamedTemporaryFile() as fp_output: - # fp_model.write(model_flags_str) - # fp_toco.write(toco_flags_str) - # fp_input.write(input_data_str) - # fp_model.flush() - # fp_toco.flush() - # fp_input.flush() - - # cmd = [ - # toco_from_proto_bin, fp_model.name, fp_toco.name, fp_input.name, - # fp_output.name - # ] - # cmdline = " ".join(cmd) - # proc = subprocess.Popen( - # cmdline, - # shell=True, - # stdout=subprocess.PIPE, - # stderr=subprocess.STDOUT, - # close_fds=True) - # stdout, stderr = proc.communicate() - # exitcode = proc.returncode - # if exitcode == 0: - # stuff = fp_output.read() - # return stuff - # else: - # raise RuntimeError("TOCO failed see console for info.\n%s\n%s\n" % - # (stdout, stderr)) + with tempfile.NamedTemporaryFile() as fp_toco, \ + tempfile.NamedTemporaryFile() as fp_model, \ + tempfile.NamedTemporaryFile() as fp_input, \ + tempfile.NamedTemporaryFile() as fp_output: + fp_model.write(model_flags_str) + fp_toco.write(toco_flags_str) + fp_input.write(input_data_str) + fp_model.flush() + fp_toco.flush() + fp_input.flush() + + cmd = [ + _toco_from_proto_bin, fp_model.name, fp_toco.name, fp_input.name, + fp_output.name + ] + cmdline = " ".join(cmd) + proc = subprocess.Popen( + cmdline, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + close_fds=True) + stdout, stderr = proc.communicate() + exitcode = proc.returncode + if exitcode == 0: + stuff = fp_output.read() + return stuff + else: + raise RuntimeError("TOCO failed see console for info.\n%s\n%s\n" % + (stdout, stderr)) def _tensor_name(x): @@ -192,8 +198,15 @@ def toco_convert(input_data, return data -# remove_undocumented(__name__) - -del os -del subprocess -del tempfile +_allowed_symbols = [ + "FLOAT", + "INT32", + "INT64", + "STRING", + "QUANTIZED_UINT8", + "TENSORFLOW_GRAPHDEF", + "TFLITE", + "GRAPHVIZ_DOT", + "EXPERIMENTAL_USE_TOCO_API_DIRECTLY", +] +remove_undocumented(__name__, _allowed_symbols) -- GitLab From 423f8f0bf7e414af1839e9b47bf18f360a23ee81 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 12:13:10 -0800 Subject: [PATCH 0523/1801] Update ops-related pbtxt files. PiperOrigin-RevId: 175999357 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 30b293a28a..d9e3dbdbb7 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -15487,7 +15487,7 @@ op { } } } - summary: "Finds values of the `n`-th order statistic for the last dmension." + summary: "Finds values of the `n`-th order statistic for the last dimension." description: "If the input is a vector (rank-1), finds the entries which is the nth-smallest\nvalue in the vector and outputs their values as scalar tensor.\n\nFor matrices (resp. higher rank input), computes the entries which is the\nnth-smallest value in each row (resp. vector along the last dimension). Thus,\n\n values.shape = input.shape[:-1]" } op { -- GitLab From 0cf37a5b3aa7176775eb0b80768ad2ab6d5ef65c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 12:20:20 -0800 Subject: [PATCH 0524/1801] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 176000517 --- tensorflow/go/op/wrappers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 869213eb17..a910b51fb9 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -24262,7 +24262,7 @@ func NthElementReverse(value bool) NthElementAttr { } } -// Finds values of the `n`-th order statistic for the last dmension. +// Finds values of the `n`-th order statistic for the last dimension. // // If the input is a vector (rank-1), finds the entries which is the nth-smallest // value in the vector and outputs their values as scalar tensor. -- GitLab From bf24cbb36b478fbcb8fb01216cec2052ac5d51aa Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 12:25:39 -0800 Subject: [PATCH 0525/1801] Multidimensional sparse float tensors support - changing split criteria, stats accumulator and quantiles op to take the dimension into account. PiperOrigin-RevId: 176001234 --- .../boosted_trees/kernels/quantile_ops.cc | 45 ++- .../kernels/split_handler_ops.cc | 263 +++++++++++++----- .../kernels/stats_accumulator_ops.cc | 59 ++-- .../batch/categorical_split_handler.py | 11 +- .../learner/batch/ordinal_split_handler.py | 31 ++- .../contrib/boosted_trees/ops/quantile_ops.cc | 12 +- .../boosted_trees/ops/split_handler_ops.cc | 23 +- .../ops/stats_accumulator_ops.cc | 84 +++--- .../kernel_tests/prediction_ops_test.py | 111 +++++++- .../python/kernel_tests/quantile_ops_test.py | 98 ++++--- .../kernel_tests/split_handler_ops_test.py | 160 ++++++++++- .../stats_accumulator_ops_test.py | 209 +++++++++----- .../python/training/functions/gbdt_batch.py | 6 +- 13 files changed, 813 insertions(+), 299 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/kernels/quantile_ops.cc b/tensorflow/contrib/boosted_trees/kernels/quantile_ops.cc index b08028eb63..8600c8c53c 100644 --- a/tensorflow/contrib/boosted_trees/kernels/quantile_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/quantile_ops.cc @@ -50,6 +50,7 @@ const char* const kAreBucketsReadyName = "are_buckets_ready"; const char* const kNumSparseFeaturesName = "num_sparse_features"; const char* const kSparseBucketsName = "sparse_buckets"; const char* const kSparseValuesName = "sparse_values"; +const char* const kSparseIndicesName = "sparse_indices"; const char* const kSparseStreamsStateName = "sparse_streams_state"; const char* const kSparseSummariesName = "sparse_summaries"; const char* const kSparseConfigName = "sparse_config"; @@ -85,9 +86,23 @@ std::vector GetBuckets(const int32 feature, return buckets_vector; } -void QuantizeFeatures(const string& output_name, const OpInputList& values_list, - const OpInputList& buckets_list, - OpKernelContext* const context) { +int32 GetFeatureDimension(const int32 feature_index, const int64 instance, + const OpInputList* const indices_list) { + if (indices_list != nullptr) { + // Sparse multidimensional. + return (*indices_list)[feature_index].matrix()(instance, 1); + } + // No indices, assume one-dimensional tensor. + return 0; +} + +// Allows quantization for each of multiple dimensions of a sparse feature. +void QuantizeFeatures( + const string& output_name, const OpInputList& values_list, + const OpInputList& buckets_list, + const OpInputList* const + indices_list /** Optional, provide for sparse features **/, + OpKernelContext* const context) { if (values_list.size() == 0) { return; } @@ -100,10 +115,13 @@ void QuantizeFeatures(const string& output_name, const OpInputList& values_list, const int64 num_values = values_tensor.dim_size(0); Tensor* output_t = nullptr; + // Output will have bucket id and dimension of the features for that bucket. OP_REQUIRES_OK( - context, output_list.allocate(feature_index, TensorShape({num_values}), - &output_t)); - TTypes::Vec output = output_t->vec(); + context, output_list.allocate(feature_index, + TensorShape({num_values, 2}), &output_t)); + + auto output = output_t->matrix(); + const std::vector& buckets_vector = GetBuckets(feature_index, buckets_list); auto flat_values = values_tensor.flat(); @@ -116,7 +134,11 @@ void QuantizeFeatures(const string& output_name, const OpInputList& values_list, } const int32 bucket = static_cast(bucket_iter - buckets_vector.begin()); - output(instance) = bucket; + // Bucket id. + output(instance, 0) = bucket; + // Dimension. + output(instance, 1) = + GetFeatureDimension(feature_index, instance, indices_list); } } } @@ -851,6 +873,11 @@ class QuantilesOp : public OpKernel { OP_REQUIRES_OK(context, context->input_list(kSparseValuesName, &sparse_float_feature_values_list)); + + OpInputList sparse_float_indices_list; + OP_REQUIRES_OK(context, context->input_list(kSparseIndicesName, + &sparse_float_indices_list)); + OpInputList sparse_buckets_list; OP_REQUIRES_OK( context, context->input_list(kSparseBucketsName, &sparse_buckets_list)); @@ -865,10 +892,10 @@ class QuantilesOp : public OpKernel { // Quantize the feature values QuantizeFeatures(kDenseOutputTensorName, dense_float_features_list, - dense_buckets_list, context); + dense_buckets_list, nullptr, context); QuantizeFeatures(kSparseOutputTensorName, sparse_float_feature_values_list, - sparse_buckets_list, context); + sparse_buckets_list, &sparse_float_indices_list, context); } }; diff --git a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc index 29635bb3c4..3bd30d8678 100644 --- a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc @@ -39,6 +39,10 @@ using boosted_trees::learner::stochastic::GradientStats; using boosted_trees::learner::stochastic::NodeStats; using boosted_trees::learner::LearnerConfig_MultiClassStrategy; +namespace { +const int32 DUMMY_FEATURE_DIMENSION = -1; +} // namespace + class BaseBuildSplitOp : public OpKernel { public: explicit BaseBuildSplitOp(OpKernelConstruction* const context) @@ -128,7 +132,7 @@ class BuildDenseInequalitySplitsOp : public BaseBuildSplitOp { const Tensor* bucket_ids_t; OP_REQUIRES_OK(context, context->input("bucket_ids", &bucket_ids_t)); - const auto& bucket_ids = bucket_ids_t->vec(); + const auto& bucket_ids = bucket_ids_t->matrix(); const Tensor* gradients_t; OP_REQUIRES_OK(context, context->input("gradients", &gradients_t)); @@ -219,7 +223,7 @@ class BuildDenseInequalitySplitsOp : public BaseBuildSplitOp { split_info.mutable_split_node()->mutable_dense_float_binary_split(); dense_split->set_feature_column(feature_column_group_id_); dense_split->set_threshold( - bucket_boundaries(bucket_ids(best_bucket_idx))); + bucket_boundaries(bucket_ids(best_bucket_idx, 0))); auto* left_child = split_info.mutable_left_child(); auto* right_child = split_info.mutable_right_child(); @@ -262,7 +266,9 @@ class BuildSparseInequalitySplitsOp : public BaseBuildSplitOp { const Tensor* bucket_ids_t; OP_REQUIRES_OK(context, context->input("bucket_ids", &bucket_ids_t)); - const auto& bucket_ids = bucket_ids_t->vec(); + const auto& bucket_ids_and_dimensions = bucket_ids_t->matrix(); + + const int32 tensor_elements = partition_ids.size(); const Tensor* gradients_t; OP_REQUIRES_OK(context, context->input("gradients", &gradients_t)); @@ -273,24 +279,59 @@ class BuildSparseInequalitySplitsOp : public BaseBuildSplitOp { int class_id; ReadClassId(context, &class_id); - // Find the number of unique partitions before we allocate the output. - std::vector partition_boundaries; + // For each partition (tree node), store starting index for each dimension. + PartitionAndDimensionBoundaries partition_boundaries; + // Stores indices in partition_boundaries for those partitions that are + // not empty (have at least one dimension and a bucket apart from catch-all + // bucket of -1 bucket id and dimension 0. std::vector non_empty_partitions; - for (int i = 0; i < partition_ids.size() - 1; ++i) { + bool non_empty_partition = false; + + for (int i = 0; i < partition_ids.size(); ++i) { // Make sure the input is sorted by partition_ids; - CHECK_LE(partition_ids(i), partition_ids(i + 1)); - if (i == 0 || partition_ids(i) != partition_ids(i - 1)) { - partition_boundaries.push_back(i); - // Some partitions might only have bias feature. We don't want to split - // those so check that the partition has at least 2 buckets. - if (partition_ids(i) == partition_ids(i + 1)) { - non_empty_partitions.push_back(partition_boundaries.size() - 1); + if (i > 0) { + CHECK_LE(partition_ids(i - 1), partition_ids(i)) + << "Partition ids should be sorted. Not sorted for " << i; + } + const int32 dimension = bucket_ids_and_dimensions(i, 1); + + if (i == 0 || (partition_ids(i) != partition_ids(i - 1))) { + if (i != 0) { + // Not the first entry, so partition has changed. + if (non_empty_partition) { + // Saves the id of a previous partition in a list of non empty + // partitions, since it was non empty (had more than just a bias + // bucket -1. + non_empty_partitions.push_back(partition_boundaries.size() - 1); + } + // Add dummy dimension to signify the end for the previous dimension. + partition_boundaries.back().emplace_back(DUMMY_FEATURE_DIMENSION, i); } + // Allocate for a new partition. + partition_boundaries.emplace_back(); + // Save info about the first dimension for a new partition. + partition_boundaries.back().emplace_back(dimension, i); + + // Each partition has dummy -1 bucket with all gradients and then info + // for all other dimensions -> if we have >1 elements for a partition, + // then it is not empty. + non_empty_partition = (i < partition_ids.size() - 1) && + (partition_ids(i) == partition_ids(i + 1)); + } else if (bucket_ids_and_dimensions(i, 1) != + bucket_ids_and_dimensions(i - 1, 1)) { + // Dimension changed. + partition_boundaries.back().emplace_back(dimension, i); } } - if (partition_ids.size() > 0) { - partition_boundaries.push_back(partition_ids.size()); + if (tensor_elements > 0) { + if (non_empty_partition) { + non_empty_partitions.push_back(partition_boundaries.size() - 1); + } + // Add dummy dimension to signify the end for the previous dimension. + partition_boundaries.back().emplace_back(DUMMY_FEATURE_DIMENSION, + partition_ids.size()); } + int num_elements = non_empty_partitions.size(); Tensor* output_partition_ids_t = nullptr; OP_REQUIRES_OK(context, @@ -314,73 +355,128 @@ class BuildSparseInequalitySplitsOp : public BaseBuildSplitOp { &output_splits_t)); tensorflow::TTypes::Vec output_splits = output_splits_t->vec(); + // For each tree node that needs to be split. for (int root_idx = 0; root_idx < num_elements; ++root_idx) { + const auto& dimension_boundaries = + partition_boundaries[non_empty_partitions[root_idx]]; + float best_gain = std::numeric_limits::lowest(); - int start_index = partition_boundaries[non_empty_partitions[root_idx]]; - int end_index = partition_boundaries[non_empty_partitions[root_idx] + 1]; - // First bucket ID in each partition should be the bias feature. - OP_REQUIRES(context, bucket_ids(start_index) == bias_feature_id_, - errors::InvalidArgument("Bias feature ID missing.")); + int32 best_dimension_idx = 0; + bool default_right = false; + int32 best_element_idx = 0; + + NodeStats best_right_node_stats(0); + NodeStats best_left_node_stats(0); + + // For each partition, the first bucket is dummy catch all. + int32 bias_start_index = dimension_boundaries[0].start_index; + + OP_REQUIRES( + context, + bucket_ids_and_dimensions(bias_start_index, 0) == bias_feature_id_, + errors::InvalidArgument("Bias feature ID missing.")); + + // Dimension for bias feature is always 0 + OP_REQUIRES( + context, bucket_ids_and_dimensions(bias_start_index, 1) == 0, + errors::InvalidArgument("Bias feature ID must be with dimension 0.")); + // For each root, we do two passes over the quantized feature buckets // accumulating gradients on one side and using the root aggregate // gradients to get the gradients for the other side. // Split gains are evaluated for each pass at every threshold and the best // split is picked. - GradientStats root_gradient_stats(*gradients_t, *hessians_t, start_index); + GradientStats root_gradient_stats(*gradients_t, *hessians_t, + bias_start_index); root_gradient_stats *= normalizer_ratio; NodeStats root_stats = ComputeNodeStats(root_gradient_stats); - GradientStats present_gradient_stats; - for (int64 bucket_idx = start_index + 1; bucket_idx < end_index; - ++bucket_idx) { - present_gradient_stats += - GradientStats(*gradients_t, *hessians_t, bucket_idx); - } - present_gradient_stats *= normalizer_ratio; - int32 best_bucket_idx = 0; - NodeStats best_right_node_stats(0); - NodeStats best_left_node_stats(0); - GradientStats left_gradient_stats; - bool default_right = false; - for (int64 bucket_idx = start_index + 1; bucket_idx < end_index; - ++bucket_idx) { - GradientStats g(*gradients_t, *hessians_t, bucket_idx); - g *= normalizer_ratio; - left_gradient_stats += g; - // We have the sum of all present gradients. Use that to compute the - // backward pass gradients. - GradientStats right_gradient_stats = - present_gradient_stats - left_gradient_stats; - { - NodeStats left_stats_default_left = - ComputeNodeStats(root_gradient_stats - right_gradient_stats); - NodeStats right_stats_default_left = - ComputeNodeStats(right_gradient_stats); - if (left_stats_default_left.gain + right_stats_default_left.gain > - best_gain) { - best_gain = - left_stats_default_left.gain + right_stats_default_left.gain; - best_left_node_stats = left_stats_default_left; - best_right_node_stats = right_stats_default_left; - best_bucket_idx = bucket_idx; - default_right = false; - } + + // Iterate through dimensions. + for (int j = 0; j < dimension_boundaries.size() - 1; ++j) { + const DimensionBoundary& dimension_and_start = dimension_boundaries[j]; + const int32 dimension_id = dimension_and_start.dimension_id; + + int start_index = dimension_and_start.start_index; + // Even for the last dimension, we always have additional dummy + // dimension that we can use to find the end index. + const int end_index = + partition_boundaries[non_empty_partitions[root_idx]][j + 1] + .start_index; + CHECK(bucket_ids_and_dimensions(start_index, 1) == + bucket_ids_and_dimensions(end_index - 1, 1)) + << "For bucket " << bucket_ids_and_dimensions(start_index, 0) + << " the dimension was " + << bucket_ids_and_dimensions(start_index, 1) << " and for " + << bucket_ids_and_dimensions(end_index - 1, 0) << " " + << bucket_ids_and_dimensions(end_index - 1, 1); + if (bucket_ids_and_dimensions(start_index, 0) == bias_feature_id_) { + // 0-dimension case which has a first bucket for catch all feature. + CHECK(bucket_ids_and_dimensions(start_index, 1) == 0) + << "Dimension of bias feature should be 0"; + ++start_index; } - { - NodeStats left_stats_default_right = - ComputeNodeStats(left_gradient_stats); - NodeStats right_stats_default_right = - ComputeNodeStats(root_gradient_stats - left_gradient_stats); - if (left_stats_default_right.gain + right_stats_default_right.gain > - best_gain) { - best_gain = - left_stats_default_right.gain + right_stats_default_right.gain; - best_left_node_stats = left_stats_default_right; - best_right_node_stats = right_stats_default_right; - best_bucket_idx = bucket_idx; - default_right = true; + + GradientStats present_gradient_stats; + for (int64 bucket_idx = start_index; bucket_idx < end_index; + ++bucket_idx) { + present_gradient_stats += + GradientStats(*gradients_t, *hessians_t, bucket_idx); + } + present_gradient_stats *= normalizer_ratio; + + GradientStats left_gradient_stats; + for (int64 element_idx = start_index; element_idx < end_index; + ++element_idx) { + // Check that bucket ids are sorted. + if (element_idx != start_index) { + CHECK(bucket_ids_and_dimensions(element_idx - 1, 0) < + bucket_ids_and_dimensions(element_idx, 0)) + << "Bucket ids must be sorted." + << ", problem on " << element_idx << " and dimension is " << j; + } + + GradientStats g(*gradients_t, *hessians_t, element_idx); + g *= normalizer_ratio; + left_gradient_stats += g; + // We have the sum of all present gradients. Use that to compute the + // backward pass gradients. + GradientStats right_gradient_stats = + present_gradient_stats - left_gradient_stats; + { + NodeStats left_stats_default_left = + ComputeNodeStats(root_gradient_stats - right_gradient_stats); + NodeStats right_stats_default_left = + ComputeNodeStats(right_gradient_stats); + if (left_stats_default_left.gain + right_stats_default_left.gain > + best_gain) { + best_gain = + left_stats_default_left.gain + right_stats_default_left.gain; + best_left_node_stats = left_stats_default_left; + best_right_node_stats = right_stats_default_left; + best_element_idx = element_idx; + default_right = false; + best_dimension_idx = dimension_id; + } + } + { + NodeStats left_stats_default_right = + ComputeNodeStats(left_gradient_stats); + NodeStats right_stats_default_right = + ComputeNodeStats(root_gradient_stats - left_gradient_stats); + if (left_stats_default_right.gain + right_stats_default_right.gain > + best_gain) { + best_gain = left_stats_default_right.gain + + right_stats_default_right.gain; + best_left_node_stats = left_stats_default_right; + best_right_node_stats = right_stats_default_right; + best_element_idx = element_idx; + default_right = true; + best_dimension_idx = dimension_id; + } } } } + SplitInfo split_info; boosted_trees::trees::DenseFloatBinarySplit* dense_split = nullptr; if (default_right) { @@ -393,8 +489,13 @@ class BuildSparseInequalitySplitsOp : public BaseBuildSplitOp { ->mutable_split(); } dense_split->set_feature_column(feature_column_group_id_); - dense_split->set_threshold( - bucket_boundaries(bucket_ids(best_bucket_idx))); + // Set the feature index for the best feature column. + const int64 best_feature_id = + bucket_ids_and_dimensions(best_element_idx, 1); + const int32 best_bucket_id = + bucket_ids_and_dimensions(best_element_idx, 0); + dense_split->set_feature_id(best_feature_id); + dense_split->set_threshold(bucket_boundaries(best_bucket_id)); auto* left_child = split_info.mutable_left_child(); auto* right_child = split_info.mutable_right_child(); @@ -403,11 +504,23 @@ class BuildSparseInequalitySplitsOp : public BaseBuildSplitOp { split_info.SerializeToString(&output_splits(root_idx)); gains(root_idx) = best_gain - root_stats.gain - tree_complexity_regularization_; - output_partition_ids(root_idx) = partition_ids(start_index); + output_partition_ids(root_idx) = partition_ids(bias_start_index); } } private: + struct DimensionBoundary { + DimensionBoundary(const int32 dimension_id, const int32 start_index) + : dimension_id(dimension_id), start_index(start_index) {} + + int32 dimension_id; + int32 start_index; + }; + + // For each partition, store start indices of feature column dimensions. + typedef std::vector> + PartitionAndDimensionBoundaries; + int64 bias_feature_id_; }; REGISTER_KERNEL_BUILDER(Name("BuildSparseInequalitySplits").Device(DEVICE_CPU), @@ -434,7 +547,7 @@ class BuildCategoricalEqualitySplitsOp : public BaseBuildSplitOp { const Tensor* feature_ids_t; OP_REQUIRES_OK(context, context->input("feature_ids", &feature_ids_t)); - const auto& feature_ids = feature_ids_t->vec(); + const auto& feature_ids = feature_ids_t->matrix(); const Tensor* gradients_t; OP_REQUIRES_OK(context, context->input("gradients", &gradients_t)); @@ -491,7 +604,7 @@ class BuildCategoricalEqualitySplitsOp : public BaseBuildSplitOp { int start_index = partition_boundaries[non_empty_partitions[root_idx]]; int end_index = partition_boundaries[non_empty_partitions[root_idx] + 1]; // First feature ID in each partition should be the bias feature. - OP_REQUIRES(context, feature_ids(start_index) == bias_feature_id_, + OP_REQUIRES(context, feature_ids(start_index, 0) == bias_feature_id_, errors::InvalidArgument("Bias feature ID missing.")); GradientStats root_gradient_stats(*gradients_t, *hessians_t, start_index); root_gradient_stats *= normalizer_ratio; @@ -519,7 +632,7 @@ class BuildCategoricalEqualitySplitsOp : public BaseBuildSplitOp { auto* equality_split = split_info.mutable_split_node() ->mutable_categorical_id_binary_split(); equality_split->set_feature_column(feature_column_group_id_); - equality_split->set_feature_id(feature_ids(best_feature_idx)); + equality_split->set_feature_id(feature_ids(best_feature_idx, 0)); auto* left_child = split_info.mutable_left_child(); auto* right_child = split_info.mutable_right_child(); FillLeaf(class_id, best_left_node_stats, left_child); diff --git a/tensorflow/contrib/boosted_trees/kernels/stats_accumulator_ops.cc b/tensorflow/contrib/boosted_trees/kernels/stats_accumulator_ops.cc index cff75e71d9..a9a229c8ae 100644 --- a/tensorflow/contrib/boosted_trees/kernels/stats_accumulator_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/stats_accumulator_ops.cc @@ -39,13 +39,14 @@ const char* const kStampTokenName = "stamp_token"; const char* const kNextStampTokenName = "next_stamp_token"; struct PartitionKey { - PartitionKey() : partition_id(-1), feature_id(-1) {} + PartitionKey() : partition_id(-1), feature_id(-1), dimension(-1) {} - PartitionKey(int32 p, int64 f) : partition_id(p), feature_id(f) {} + PartitionKey(int32 p, int64 f, int32 d) + : partition_id(p), feature_id(f), dimension(d) {} bool operator==(const PartitionKey& other) const { - return (feature_id == other.feature_id) && - (partition_id == other.partition_id); + return (partition_id == other.partition_id) && + (dimension == other.dimension) && (feature_id == other.feature_id); } // Compare for PartitionKey. @@ -54,7 +55,11 @@ struct PartitionKey { if (a.partition_id < b.partition_id) { return true; } - if ((a.partition_id == b.partition_id) && (a.feature_id < b.feature_id)) { + if ((a.partition_id == b.partition_id) && (a.dimension < b.dimension)) { + return true; + } + if ((a.partition_id == b.partition_id) && (a.dimension == b.dimension) && + (a.feature_id < b.feature_id)) { return true; } return false; @@ -64,8 +69,11 @@ struct PartitionKey { // Tree partition defined by traversing the tree to the leaf. int32 partition_id; - // Feature Id within the feature column. + // Feature column id. int64 feature_id; + + // Dimension within feature column. + int32 dimension; }; template @@ -132,12 +140,12 @@ void SerializeScalarAccumulatorToOutput( &partition_ids_t)); auto partition_ids = partition_ids_t->vec(); + // Feature ids tensor has ids of feature columns and their dimensions. Tensor* feature_ids_t = nullptr; - OP_REQUIRES_OK( - context, - context->allocate_output("output_feature_ids", TensorShape({num_slots}), - &feature_ids_t)); - auto feature_ids = feature_ids_t->vec(); + OP_REQUIRES_OK(context, context->allocate_output("output_feature_ids", + TensorShape({num_slots, 2}), + &feature_ids_t)); + auto feature_ids = feature_ids_t->matrix(); Tensor* gradients_t = nullptr; OP_REQUIRES_OK( @@ -155,7 +163,9 @@ void SerializeScalarAccumulatorToOutput( int i = 0; for (const auto& iter : accumulator_resource.values()) { partition_ids(i) = iter.first.partition_id; - feature_ids(i) = iter.first.feature_id; + feature_ids(i, 0) = iter.first.feature_id; + feature_ids(i, 1) = iter.first.dimension; + gradients(i) = iter.second.first; hessians(i) = iter.second.second; ++i; @@ -174,11 +184,10 @@ void SerializeTensorAccumulatorToOutput( auto partition_ids = partition_ids_t->vec(); Tensor* feature_ids_t = nullptr; - OP_REQUIRES_OK( - context, - context->allocate_output("output_feature_ids", TensorShape({num_slots}), - &feature_ids_t)); - auto feature_ids = feature_ids_t->vec(); + OP_REQUIRES_OK(context, context->allocate_output("output_feature_ids", + TensorShape({num_slots, 2}), + &feature_ids_t)); + auto feature_ids = feature_ids_t->matrix(); TensorShape gradient_shape = accumulator_resource.gradient_shape(); int64 num_gradient_elements = gradient_shape.num_elements(); @@ -201,7 +210,9 @@ void SerializeTensorAccumulatorToOutput( int i = 0; for (const auto& iter : accumulator_resource.values()) { partition_ids(i) = iter.first.partition_id; - feature_ids(i) = iter.first.feature_id; + feature_ids(i, 0) = iter.first.feature_id; + feature_ids(i, 1) = iter.first.dimension; + for (int j = 0; j < num_gradient_elements; ++j) { gradients(i, j) = iter.second.first[j]; } @@ -220,14 +231,16 @@ void AddToScalarAccumulator( 1); const TensorShape& partition_ids_shape = partition_ids_t.shape(); const auto& partition_ids = partition_ids_t.vec(); - const auto& feature_ids = feature_ids_t.vec(); + const auto& feature_ids_and_dimensions = feature_ids_t.matrix(); const auto& gradients = gradients_t.vec(); const auto& hessians = hessians_t.vec(); int64 num_updates = partition_ids_shape.dim_size(0); auto stats_map = accumulator_resource->mutable_values(); for (int64 i = 0; i < num_updates; ++i) { - const auto key = PartitionKey(partition_ids(i), feature_ids(i)); + const auto key = + PartitionKey(partition_ids(i), feature_ids_and_dimensions(i, 0), + feature_ids_and_dimensions(i, 1)); auto itr = stats_map->find(key); if (itr != stats_map->end()) { itr->second.first += gradients(i); @@ -263,7 +276,7 @@ void AddToTensorAccumulator( const TensorShape& partition_ids_shape = partition_ids_t.shape(); const auto& partition_ids = partition_ids_t.vec(); - const auto& feature_ids = feature_ids_t.vec(); + const auto& feature_ids_and_dimensions = feature_ids_t.matrix(); TensorShape gradients_shape = gradients_t.shape(); const auto& gradients = gradients_t.flat_outer_dims(); TensorShape hessians_shape = hessians_t.shape(); @@ -288,7 +301,9 @@ void AddToTensorAccumulator( int64 num_updates = partition_ids_shape.dim_size(0); auto stats_map = accumulator_resource->mutable_values(); for (int64 i = 0; i < num_updates; ++i) { - const auto key = PartitionKey(partition_ids(i), feature_ids(i)); + const auto key = + PartitionKey(partition_ids(i), feature_ids_and_dimensions(i, 0), + feature_ids_and_dimensions(i, 1)); auto itr = stats_map->find(key); if (itr == stats_map->end()) { std::vector new_gradients(gradients_shape.num_elements()); diff --git a/tensorflow/contrib/boosted_trees/lib/learner/batch/categorical_split_handler.py b/tensorflow/contrib/boosted_trees/lib/learner/batch/categorical_split_handler.py index 83dad7e4b3..9f78ab2024 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/batch/categorical_split_handler.py +++ b/tensorflow/contrib/boosted_trees/lib/learner/batch/categorical_split_handler.py @@ -110,8 +110,8 @@ class EqualitySplitHandler(base_split_handler.BaseSplitHandler): def not_active_inputs(): return (constant_op.constant([], dtype=dtypes.int32), - constant_op.constant([], dtype=dtypes.int64), empty_gradients, - empty_hessians) + constant_op.constant([], dtype=dtypes.int64, shape=[1, 2]), + empty_gradients, empty_hessians) def active_inputs(): """The normal flow when the handler is active.""" @@ -154,7 +154,12 @@ class EqualitySplitHandler(base_split_handler.BaseSplitHandler): [per_partition_hessians, filtered_hessians], 0) feature_ids = array_ops.concat( [bias_feature_ids, self._sparse_int_column.values], 0) - return partition_ids, feature_ids, filtered_gradients, filtered_hessians + # Dimension is always zero for sparse int features. + dimension_ids = array_ops.zeros_like(feature_ids, dtype=dtypes.int64) + feature_ids_and_dimensions = array_ops.stack( + [feature_ids, dimension_ids], axis=1) + return (partition_ids, feature_ids_and_dimensions, filtered_gradients, + filtered_hessians) partition_ids, feature_ids, gradients_out, hessians_out = ( control_flow_ops.cond(is_active[0], active_inputs, not_active_inputs)) diff --git a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py index 8c0a3f0d91..72e20aaa12 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py +++ b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py @@ -257,6 +257,7 @@ class DenseSplitHandler(InequalitySplitHandler): # Put quantile and stats accumulator flushing in the dependency path. are_splits_ready = control_flow_ops.with_dependencies( [flush_quantiles, partition_ids], are_splits_ready) + partition_ids, gains, split_infos = ( split_handler_ops.build_dense_inequality_splits( num_minibatches=num_minibatches, @@ -433,14 +434,15 @@ def dense_make_stats_update(is_active, are_buckets_ready, float_column, def ready_inputs_fn(): """Branch to execute when quantiles are ready.""" quantized_feature = quantile_ops.quantiles([float_column], [], - [quantile_buckets], []) + [quantile_buckets], [], []) quantized_feature = math_ops.cast(quantized_feature[0], dtypes.int64) - quantized_feature = array_ops.reshape(quantized_feature, [-1]) + quantized_feature = array_ops.squeeze(quantized_feature) return (example_partition_ids, quantized_feature, gradients, hessians) def not_ready_inputs_fn(): - return (constant_op.constant([], dtype=dtypes.int32), constant_op.constant( - [], dtype=dtypes.int64), empty_gradients, empty_hessians) + return (constant_op.constant([], dtype=dtypes.int32), + constant_op.constant([[]], dtype=dtypes.int64, shape=[1, 2]), + empty_gradients, empty_hessians) example_partition_ids, feature_ids, gradients, hessians = ( control_flow_ops.cond( @@ -461,10 +463,13 @@ def sparse_make_stats_update( def quantiles_ready(): """The subgraph for when the quantiles are ready.""" - quantized_feature = quantile_ops.quantiles([sparse_column_values], [], - [quantile_buckets], []) - quantized_feature = math_ops.cast(quantized_feature[0], dtypes.int64) - quantized_feature = array_ops.reshape(quantized_feature, [-1]) + quantized_feature = quantile_ops.quantiles([], [sparse_column_values], [], + [quantile_buckets], + [sparse_column_indices]) + + quantized_feature = math_ops.cast(quantized_feature[1], dtypes.int64) + quantized_feature = array_ops.squeeze(quantized_feature) + example_indices, _ = array_ops.split( sparse_column_indices, num_or_size_splits=2, axis=1) example_indices = array_ops.squeeze(example_indices, [1]) @@ -486,19 +491,25 @@ def sparse_make_stats_update( bias_feature_ids = array_ops.fill( array_ops.shape(unique_partitions), _BIAS_FEATURE_ID) bias_feature_ids = math_ops.cast(bias_feature_ids, dtypes.int64) + zeros = array_ops.zeros_like(bias_feature_ids) + bias_feature_ids = array_ops.stack([bias_feature_ids, zeros], axis=1) + partition_ids = array_ops.concat( [unique_partitions, filtered_partition_ids], 0) filtered_gradients = array_ops.concat( [per_partition_gradients, filtered_gradients], 0) filtered_hessians = array_ops.concat( [per_partition_hessians, filtered_hessians], 0) + bucket_ids = array_ops.concat([bias_feature_ids, quantized_feature], 0) + return partition_ids, bucket_ids, filtered_gradients, filtered_hessians def quantiles_not_ready(): """The subgraph for when the quantiles are not ready.""" - return (constant_op.constant([], dtype=dtypes.int32), constant_op.constant( - [], dtype=dtypes.int64), empty_gradients, empty_hessians) + return (constant_op.constant([], dtype=dtypes.int32), + constant_op.constant([], dtype=dtypes.int64, shape=[1, 2]), + empty_gradients, empty_hessians) empty_float = constant_op.constant([], dtype=dtypes.float32) handler_not_active = (constant_op.constant( diff --git a/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc b/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc index 4ca73ef6e3..1fa70bafdd 100644 --- a/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc +++ b/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc @@ -268,6 +268,7 @@ REGISTER_OP("Quantiles") .Input("sparse_values: num_sparse_features * float") .Input("dense_buckets: num_dense_features * float") .Input("sparse_buckets: num_sparse_features * float") + .Input("sparse_indices: num_sparse_features * int64") .Output("dense_quantiles: num_dense_features * int32") .Output("sparse_quantiles: num_sparse_features * int32") .Doc(R"doc( @@ -280,10 +281,13 @@ dense_values: List of rank 1 tensors containing the dense values. sparse_values: List of rank 1 tensors containing the sparse feature values. dense_buckets: Quantile summary for each of the dense float tensor. sparse_buckets: Quantile summary for each of the sparse feature float tensor. -dense_quantiles: Rank 1 tensors representing associated quantiles for each of -dense float tensors. -sparse_quantiles: Rank 1 tensors representing associated quantiles for each of -the sparse feature tensors. +sparse_indices: List of rank 2 tensors with indices for sparse float +tensors. +dense_quantiles: Rank 2 tensors representing associated quantiles for each of +dense float tensors and the dimension. +sparse_quantiles: Rank 2 tensors representing associated quantiles for each of +the sparse feature tensors for each of sparse feature dimensions: +[quantile id, dimension id]. )doc"); REGISTER_OP("BucketizeWithInputBoundaries") diff --git a/tensorflow/contrib/boosted_trees/ops/split_handler_ops.cc b/tensorflow/contrib/boosted_trees/ops/split_handler_ops.cc index 07cfd413bb..0d27ddaf3a 100644 --- a/tensorflow/contrib/boosted_trees/ops/split_handler_ops.cc +++ b/tensorflow/contrib/boosted_trees/ops/split_handler_ops.cc @@ -47,9 +47,7 @@ REGISTER_OP("BuildDenseInequalitySplits") ShapeHandle partition_ids_shape; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &partition_ids_shape)); ShapeHandle bucket_ids_shape; - TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &bucket_ids_shape)); - TF_RETURN_IF_ERROR(c->Merge(c->Dim(partition_ids_shape, 0), - c->Dim(bucket_ids_shape, 0), &unused_dim)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 2, &bucket_ids_shape)); ShapeHandle gradients_shape; TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(3), 1, &gradients_shape)); TF_RETURN_IF_ERROR(c->Merge(c->Dim(partition_ids_shape, 0), @@ -71,7 +69,7 @@ Find the split that has the best gain for the accumulated stats. num_minibatches: A scalar, the number of times per example gradients & hessians were accumulated. The stats are divided by this to get per example stats. partition_ids: A rank 1 tensor of partition IDs. -bucket_ids: A rank 1 tensor of buckets IDs. +bucket_ids: A rank 2 tensor of buckets IDs and dimensions. gradients: A rank 1 tensor of gradients. hessians: A rank 1 tensor of hessians. bucket_boundaries: A rank 1 tensor, thresholds that were used for bucketization. @@ -108,9 +106,7 @@ REGISTER_OP("BuildSparseInequalitySplits") ShapeHandle partition_ids_shape; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &partition_ids_shape)); ShapeHandle bucket_ids_shape; - TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &bucket_ids_shape)); - TF_RETURN_IF_ERROR(c->Merge(c->Dim(partition_ids_shape, 0), - c->Dim(bucket_ids_shape, 0), &unused_dim)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 2, &bucket_ids_shape)); ShapeHandle gradients_shape; TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(3), 1, &gradients_shape)); TF_RETURN_IF_ERROR(c->Merge(c->Dim(partition_ids_shape, 0), @@ -127,12 +123,13 @@ REGISTER_OP("BuildSparseInequalitySplits") return Status::OK(); }) .Doc(R"doc( -Find the split that has the best gain for the accumulated stats. +Find the split that has the best gain for the accumulated stats for a particular +feature column. num_minibatches: A scalar, the number of times per example gradients & hessians were accumulated. The stats are divided by this to get per example stats. -partition_ids: A rank 1 tensor of partition IDs. -bucket_ids: A rank 1 tensor of buckets IDs. +partition_ids: A rank 2 tensor of partition IDs for each dimension of feature column. +bucket_ids: A rank 2 tensor of buckets IDs and dimensions. gradients: A rank 1 tensor of gradients. hessians: A rank 1 tensor of hessians. bucket_boundaries: A rank 1 tensor, thresholds that were used for bucketization. @@ -168,9 +165,7 @@ REGISTER_OP("BuildCategoricalEqualitySplits") ShapeHandle partition_ids_shape; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &partition_ids_shape)); ShapeHandle bucket_ids_shape; - TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &bucket_ids_shape)); - TF_RETURN_IF_ERROR(c->Merge(c->Dim(partition_ids_shape, 0), - c->Dim(bucket_ids_shape, 0), &unused_dim)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 2, &bucket_ids_shape)); ShapeHandle gradients_shape; TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(3), 1, &gradients_shape)); TF_RETURN_IF_ERROR(c->Merge(c->Dim(partition_ids_shape, 0), @@ -190,7 +185,7 @@ Find the split that has the best gain for the accumulated stats. num_minibatches: A scalar, the number of times per example gradients & hessians were accumulated. The stats are divided by this to get per example stats. partition_ids: A rank 1 tensor of partition IDs. -feature_ids: A rank 1 tensor of feature IDs. +feature_ids: A rank 2 tensor of feature IDs and dimensions. gradients: A rank 1 tensor of gradients. hessians: A rank 1 tensor of hessians. output_partition_ids: A rank 1 tensor, the partition IDs that we created splits diff --git a/tensorflow/contrib/boosted_trees/ops/stats_accumulator_ops.cc b/tensorflow/contrib/boosted_trees/ops/stats_accumulator_ops.cc index f988755de0..0354f7853c 100644 --- a/tensorflow/contrib/boosted_trees/ops/stats_accumulator_ops.cc +++ b/tensorflow/contrib/boosted_trees/ops/stats_accumulator_ops.cc @@ -73,9 +73,7 @@ REGISTER_OP("StatsAccumulatorScalarAdd") 1, &partition_ids_shape)); ShapeHandle feature_ids_shape; TF_RETURN_IF_ERROR(c->WithRank( - c->input(num_resource_handles * 2 + i + 1), 1, &feature_ids_shape)); - TF_RETURN_IF_ERROR(c->Merge(c->Dim(partition_ids_shape, 0), - c->Dim(feature_ids_shape, 0), &unused_dim)); + c->input(num_resource_handles * 2 + i + 1), 2, &feature_ids_shape)); ShapeHandle gradients_shape; TF_RETURN_IF_ERROR(c->WithRank( c->input(num_resource_handles * 3 + i + 1), 1, &gradients_shape)); @@ -96,11 +94,11 @@ stamp_token: Stamp token for Read/Write operations. Any operation with a mismatching token will be dropped. stats_accumulator_handles: A list of handles to the stats accumulator. partition_ids: A list of vectors of partition_ids. -feature_ids: A list of vectors of feature_ids. +feature_ids: Rank 2 tensor of feature id and feature dimension ids. gradients: A list of vectors of gradients for each slot in - . + . hessians: A list of vectors of hessians for each slot in - . + . )doc"); REGISTER_OP("StatsAccumulatorScalarFlush") @@ -119,7 +117,7 @@ REGISTER_OP("StatsAccumulatorScalarFlush") TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused_input)); c->set_output(0, c->Scalar()); c->set_output(1, c->Vector(c->UnknownDim())); - c->set_output(2, c->Vector(c->UnknownDim())); + c->set_output(2, c->UnknownShape()); c->set_output(3, c->Vector(c->UnknownDim())); c->set_output(4, c->Vector(c->UnknownDim())); return Status::OK(); @@ -134,7 +132,7 @@ next_stamp_token: Stamp token for the next iteration. num_updates: Number of times stats were added to this accumulator since last flush. output_partition_ids A vector of partition_ids for the slots. -output_feature_ids: A vector of feature_ids for the slots. +output_feature_ids: Rank 2 tensor of feature id and feature dimension ids. output_gradients: A vector of gradients, with a value for each slot in . output_hessians: A vector of hessians, with a value for each slot @@ -161,9 +159,7 @@ REGISTER_OP("StatsAccumulatorScalarDeserialize") ShapeHandle partition_ids_shape; TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 1, &partition_ids_shape)); ShapeHandle feature_ids_shape; - TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 1, &feature_ids_shape)); - TF_RETURN_IF_ERROR(c->Merge(c->Dim(partition_ids_shape, 0), - c->Dim(feature_ids_shape, 0), &unused_dim)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 2, &feature_ids_shape)); ShapeHandle gradients_shape; TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 1, &gradients_shape)); TF_RETURN_IF_ERROR(c->Merge(c->Dim(partition_ids_shape, 0), @@ -183,9 +179,11 @@ stamp_token: Stamp token for Read/Write operations. num_updates: Number of times stats were added to this accumulator since last flush. partition_ids: A vector of partition_ids. -feature_ids: A vector of feature_ids. -gradients: A vector of gradients for each slot in . -hessians: A vector of hessians for each slot in . +feature_ids: Rank 2 tensor of feature id and feature dimension ids. +gradients: A vector of gradients for each slot in . +hessians: A vector of hessians for each slot in )doc"); REGISTER_OP("StatsAccumulatorScalarSerialize") @@ -204,7 +202,7 @@ REGISTER_OP("StatsAccumulatorScalarSerialize") // num_updates c->set_output(1, c->Scalar()); c->set_output(2, c->Vector(c->UnknownDim())); - c->set_output(3, c->Vector(c->UnknownDim())); + c->set_output(3, c->UnknownShape()); c->set_output(4, c->Vector(c->UnknownDim())); c->set_output(5, c->Vector(c->UnknownDim())); return Status::OK(); @@ -217,7 +215,7 @@ stamp_token: The current stamp token for the resource. num_updates: Number of times stats were added to this accumulator since last flush. output_partition_ids A vector of partition_ids for the slots. -output_feature_ids: A vector of feature_ids for the slots. +output_feature_ids: Rank 2 tensor of feature id and feature dimension ids. output_gradients: A vector of gradients, with a value for each slot in . output_hessians: A vector of hessians, with a value for each slot @@ -293,9 +291,7 @@ REGISTER_OP("StatsAccumulatorTensorAdd") 1, &partition_ids_shape)); ShapeHandle feature_ids_shape; TF_RETURN_IF_ERROR(c->WithRank( - c->input(num_resource_handles * 2 + i + 1), 1, &feature_ids_shape)); - TF_RETURN_IF_ERROR(c->Merge(c->Dim(partition_ids_shape, 0), - c->Dim(feature_ids_shape, 0), &unused_dim)); + c->input(num_resource_handles * 2 + i + 1), 2, &feature_ids_shape)); ShapeHandle gradients_shape; TF_RETURN_IF_ERROR(c->WithRankAtLeast( c->input(num_resource_handles * 3 + i + 1), 2, &gradients_shape)); @@ -316,11 +312,11 @@ stats_accumulator_handles: A list of handles to the stats accumulator. stamp_token: Stamp token for Read/Write operations. Any operation with a mismatching token will be dropped. partition_ids: A list of vectors of partition_ids. -feature_ids: A list of vectors of feature_ids. +feature_ids: Rank 2 tensor of feature id and feature dimension ids. gradients: A list of vectors of gradients for each slot in - . + . hessians: A list of vectors of hessians for each slot in - . + . )doc"); REGISTER_OP("StatsAccumulatorTensorFlush") @@ -340,7 +336,7 @@ REGISTER_OP("StatsAccumulatorTensorFlush") // num_updates c->set_output(0, c->Scalar()); c->set_output(1, c->Vector(c->UnknownDim())); - c->set_output(2, c->Vector(c->UnknownDim())); + c->set_output(2, c->UnknownShape()); c->set_output(3, c->UnknownShape()); c->set_output(4, c->UnknownShape()); return Status::OK(); @@ -355,11 +351,11 @@ next_stamp_token: Stamp token to be used for the next iteration. num_updates: Number of times stats were added to this accumulator since last flush. output_partition_ids: A vector of partition_ids for the slots. -output_feature_ids: A vector of feature_ids for the slots. +output_feature_ids: Rank 2 tensor of feature id and feature dimension ids. output_gradients: A tensor of gradients, first dimension matches slots - in . + in . output_hessians: A tensor of hessians, first dimension matches slots - in . + in >. )doc"); REGISTER_OP("StatsAccumulatorTensorDeserialize") @@ -382,9 +378,7 @@ REGISTER_OP("StatsAccumulatorTensorDeserialize") ShapeHandle partition_ids_shape; TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 1, &partition_ids_shape)); ShapeHandle feature_ids_shape; - TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 1, &feature_ids_shape)); - TF_RETURN_IF_ERROR(c->Merge(c->Dim(partition_ids_shape, 0), - c->Dim(feature_ids_shape, 0), &unused_dim)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 2, &feature_ids_shape)); ShapeHandle gradients_shape; TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(5), 2, &gradients_shape)); TF_RETURN_IF_ERROR(c->Merge(c->Dim(partition_ids_shape, 0), @@ -405,9 +399,11 @@ stamp_token: Stamp token for Read/Write operations. num_updates: Number of times stats were added to this accumulator since last flush. partition_ids: A vector of partition_ids. -feature_ids: A vector of feature_ids. -gradients: A vector of gradients for each slot in . -hessians: A vector of hessians for each slot in . +feature_ids: Rank 2 tensor of feature id and feature dimension ids. +gradients: A vector of gradients for each slot in +hessians: A vector of hessians for each slot in . )doc"); REGISTER_OP("StatsAccumulatorTensorSerialize") @@ -426,7 +422,7 @@ REGISTER_OP("StatsAccumulatorTensorSerialize") // num_updates c->set_output(1, c->Scalar()); c->set_output(2, c->Vector(c->UnknownDim())); - c->set_output(3, c->Vector(c->UnknownDim())); + c->set_output(3, c->UnknownShape()); c->set_output(4, c->UnknownShape()); c->set_output(5, c->UnknownShape()); return Status::OK(); @@ -440,11 +436,11 @@ stamp_token: Stamp token for Read/Write operations. num_updates: Number of times stats were added to this accumulator since last flush. output_partition_ids: A vector of partition_ids for the slots. -output_feature_ids: A vector of feature_ids for the slots. +output_feature_ids: Rank 2 tensor of feature id and feature dimension ids. output_gradients: A tensor of gradients, first dimension matches slots - in . + in . output_hessians: A tensor of hessians, first dimension matches slots - in . + in . )doc"); REGISTER_OP("StatsAccumulatorTensorMakeSummary") @@ -458,18 +454,20 @@ REGISTER_OP("StatsAccumulatorTensorMakeSummary") .Output("output_hessians: float") .Doc(R"doc( Summarizes the stats by summing the that are for the same -. +. partition_ids: A vector of partition_ids. -feature_ids: A vector of feature_ids. -gradients: A vector of gradients for each slot in . -hessians: A vector of hessians for each slot in . +feature_ids: Rank 2 tensor of feature id and feature dimension ids. +gradients: A vector of gradients for each slot in . +hessians: A vector of hessians for each slot in . output_partition_ids: A vector of partition_ids for the slots. -output_feature_ids: A vector of feature_ids for the slots. +output_feature_ids: A rank2 tensor of feature_ids and dimensions for the slots. output_gradients: A tensor of gradients, first dimension matches slots - in . + in . output_hessians: A tensor of hessians, first dimension matches slots - in . + in . )doc"); } // namespace boosted_trees } // namespace tensorflow diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py index 79802922ca..9ada844601 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py @@ -75,7 +75,7 @@ def _append_multi_values_to_dense_leaf(leaf, w): leaf.vector.value.append(x) -def _set_float_split(split, feat_col, thresh, l_id, r_id): +def _set_float_split(split, feat_col, thresh, l_id, r_id, feature_dim_id=None): """Helper method for building tree float splits. Sets split feature column, threshold and children. @@ -86,11 +86,14 @@ def _set_float_split(split, feat_col, thresh, l_id, r_id): thresh: threshold to split on forming rule x <= thresh. l_id: left child Id. r_id: right child Id. + feature_dim_id: dimension of the feature column to be used in the split. """ split.feature_column = feat_col split.threshold = thresh split.left_id = l_id split.right_id = r_id + if feature_dim_id is not None: + split.feature_id = feature_dim_id def _set_categorical_id_split(split, feat_col, feat_id, l_id, r_id): @@ -116,12 +119,12 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): def setUp(self): """Sets up the prediction tests. - Create a batch of two examples having one dense float, two sparse float and - one sparse int features. + Create a batch of two examples having one dense float, two sparse float + single valued, one sparse float multidimensionl and one sparse int features. The data looks like the following: - | Instance | Dense0 | SparseF0 | SparseF1 | SparseI0 | - | 0 | 7 | -3 | | 9,1 | - | 1 | -2 | | 4 | | + | Instance | Dense0 | SparseF0 | SparseF1 | SparseI0 | SparseM + | 0 | 7 | -3 | | 9,1 | __, 5.0 + | 1 | -2 | | 4 | | 3, ___ """ super(PredictionOpsTest, self).setUp() self._dense_float_tensor = np.array([[7.0], [-2.0]]) @@ -131,6 +134,11 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): self._sparse_float_indices2 = np.array([[1, 0]]) self._sparse_float_values2 = np.array([4.0]) self._sparse_float_shape2 = np.array([2, 1]) + # Multi dimensional sparse float + self._sparse_float_indices_m = np.array([[0, 1], [1, 0]]) + self._sparse_float_values_m = np.array([5.0, 3.0]) + self._sparse_float_shape_m = np.array([2, 2]) + self._sparse_int_indices1 = np.array([[0, 0], [0, 1]]) self._sparse_int_values1 = np.array([9, 1]) self._sparse_int_shape1 = np.array([2, 2]) @@ -287,6 +295,94 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): # Empty dropout. self.assertAllEqual([[], []], dropout_info.eval()) + def testFullEnsembleWithMultidimensionalSparseSingleClass(self): + with self.test_session(): + tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() + # Bias tree. + tree1 = tree_ensemble_config.trees.add() + tree_ensemble_config.tree_metadata.add().is_finalized = True + _append_to_leaf(tree1.nodes.add().leaf, 0, -0.4) + + # Depth 3 tree. + tree2 = tree_ensemble_config.trees.add() + tree_ensemble_config.tree_metadata.add().is_finalized = True + # Use feature column 2 (sparse multidimensional), split on first value + # node 0. + _set_float_split( + tree2.nodes.add().sparse_float_binary_split_default_right.split, + 2, + 7.0, + 1, + 2, + feature_dim_id=0) + # Leafs split on second dimension of sparse multidimensional feature. + # Node 1. + _set_float_split( + tree2.nodes.add().sparse_float_binary_split_default_left.split, + 2, + 4.5, + 3, + 4, + feature_dim_id=1) + # Node 2. + _set_float_split( + tree2.nodes.add().sparse_float_binary_split_default_right.split, + 2, + 9, + 5, + 6, + feature_dim_id=1) + + # Node 3. + _append_to_leaf(tree2.nodes.add().leaf, 0, 0.6) + # Node 4. + _append_to_leaf(tree2.nodes.add().leaf, 0, 1.3) + + # Node 5. + _append_to_leaf(tree2.nodes.add().leaf, 0, -0.1) + # Node 6. + _append_to_leaf(tree2.nodes.add().leaf, 0, 0.8) + + tree_ensemble_config.tree_weights.append(1.0) + tree_ensemble_config.tree_weights.append(1.0) + + tree_ensemble_handle = model_ops.tree_ensemble_variable( + stamp_token=0, + tree_ensemble_config=tree_ensemble_config.SerializeToString(), + name="full_ensemble") + resources.initialize_resources(resources.shared_resources()).run() + + # Prepare learner config. + learner_config = learner_pb2.LearnerConfig() + learner_config.num_classes = 2 + + result, dropout_info = prediction_ops.gradient_trees_prediction( + tree_ensemble_handle, + self._seed, [self._dense_float_tensor], [ + self._sparse_float_indices1, self._sparse_float_indices2, + self._sparse_float_indices_m + ], [ + self._sparse_float_values1, self._sparse_float_values2, + self._sparse_float_values_m + ], [ + self._sparse_float_shape1, self._sparse_float_shape2, + self._sparse_float_shape_m + ], [self._sparse_int_indices1], [self._sparse_int_values1], + [self._sparse_int_shape1], + learner_config=learner_config.SerializeToString(), + apply_dropout=False, + apply_averaging=False, + center_bias=False, + reduce_dim=True) + + # The first example will get bias -0.4 from first tree and + # leaf 5 payload of -0.1 hence -0.5, the second example will + # get the same bias -0.4 and leaf 3 payload (0.6) hence 0.2 + self.assertAllClose([[-0.5], [0.2]], result.eval()) + + # Empty dropout. + self.assertAllEqual([[], []], dropout_info.eval()) + def testExcludeNonFinalTree(self): with self.test_session(): tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() @@ -322,7 +418,6 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE - result, dropout_info = self._get_predictions( tree_ensemble_handle, learner_config=learner_config.SerializeToString(), @@ -370,7 +465,6 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER - result, dropout_info = self._get_predictions( tree_ensemble_handle, learner_config=learner_config.SerializeToString(), @@ -420,7 +514,6 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): # Prepare learner config. learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 - result, dropout_info = self._get_predictions( tree_ensemble_handle, learner_config=learner_config.SerializeToString(), diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/quantile_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/quantile_ops_test.py index 1513c11c33..2a72961504 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/quantile_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/quantile_ops_test.py @@ -349,19 +349,21 @@ class QuantilesOpTest(test_util.TensorFlowTestCase): def setUp(self): """Sets up the quantile op tests. - Create a batch of 4 examples having 2 dense and 3 sparse features. + Create a batch of 4 examples having 2 dense and 4 sparse features. + Forth sparse feature is multivalent (3 dimensional) The data looks like this - | Instance | Dense 0 | Dense 1 | Sparse 0 | Sparse 1 | Sparse 2 - | 0 | -0.1 | -1 | -2 | 0.1 | - | 1 | 0.4 | -15 | 5.5 | | 2 - | 2 | 3.2 | 18 | 16 | 3 | - | 3 | 190 | 1000 | 17.5 | -3 | 4 + | Instance | Dense 0 | Dense 1 | Sparse 0 | Sparse 1 |Sparse 2| SparseM + | 0 | -0.1 | -1 | -2 | 0.1 | |_ ,1,_ + | 1 | 0.4 | -15 | 5.5 | | 2 |2 ,_,_ + | 2 | 3.2 | 18 | 16 | 3 | |__,_,_ + | 3 | 190 | 1000 | 17.5 | -3 | 4 |1 ,8,1 Quantiles are: Dense 0: (-inf,0.4], (0.4,5], (5, 190] Dense 1: (-inf, -9], (-9,15], (15, 1000) Sparse 0: (-inf, 5], (5,16], (16, 100] Sparse 1: (-inf, 2], (2, 5] Sparse 2: (-inf, 100] + SparseM: (-inf, 1], (1,2], (2,1000] """ super(QuantilesOpTest, self).setUp() self._dense_float_tensor_0 = constant_op.constant( @@ -369,18 +371,26 @@ class QuantilesOpTest(test_util.TensorFlowTestCase): self._dense_float_tensor_1 = constant_op.constant( [[-1], [-15], [18], [1000]], dtype=dtypes.float32) # Sparse feature 0 - self._sparse_indices_0 = constant_op.constant([[0, 0], [1, 0], [2, 0], - [3, 0]]) + self._sparse_indices_0 = constant_op.constant( + [[0, 0], [1, 0], [2, 0], [3, 0]], dtype=dtypes.int64) self._sparse_values_0 = constant_op.constant([-2, 5.5, 16, 17.5]) self._sparse_shape_0 = constant_op.constant([4, 1]) # Sprase feature 1 - self._sparse_indices_1 = constant_op.constant([[0, 0], [2, 0], [3, 0]]) + self._sparse_indices_1 = constant_op.constant( + [[0, 0], [2, 0], [3, 0]], dtype=dtypes.int64) self._sparse_values_1 = constant_op.constant([0.1, 3, -3]) self._sparse_shape_1 = constant_op.constant([4, 1]) # Sprase feature 2 - self._sparse_indices_2 = constant_op.constant([[1, 0], [3, 0]]) + self._sparse_indices_2 = constant_op.constant( + [[1, 0], [3, 0]], dtype=dtypes.int64) self._sparse_values_2 = constant_op.constant([2, 4], dtype=dtypes.float32) self._sparse_shape_2 = constant_op.constant([4, 1]) + # Sprase feature M + self._sparse_indices_m = constant_op.constant( + [[0, 1], [1, 0], [3, 0], [3, 1], [3, 2]], dtype=dtypes.int64) + self._sparse_values_m = constant_op.constant( + [1, 2, 1, 8, 1], dtype=dtypes.float32) + self._sparse_shape_m = constant_op.constant([4, 1]) # Quantiles self._dense_thresholds_0 = [0.4, 5, 190] self._dense_thresholds_1 = [-9, 15, 1000] @@ -388,52 +398,76 @@ class QuantilesOpTest(test_util.TensorFlowTestCase): self._sparse_thresholds_0 = [5, 16, 100] self._sparse_thresholds_1 = [2, 5] self._sparse_thresholds_2 = [100] + self._sparse_thresholds_m = [1, 2, 1000] def testDenseFeaturesOnly(self): with self.test_session(): dense_quantiles, _ = quantile_ops.quantiles( [self._dense_float_tensor_0, self._dense_float_tensor_1], [], - [self._dense_thresholds_0, self._dense_thresholds_1], []) + [self._dense_thresholds_0, self._dense_thresholds_1], [], []) # Dense feature 0 - self.assertAllEqual([0, 0, 1, 2], dense_quantiles[0].eval()) + self.assertAllEqual([[0, 0], [0, 0], [1, 0], [2, 0]], + dense_quantiles[0].eval()) # Dense feature 1 - self.assertAllEqual([1, 0, 2, 2], dense_quantiles[1].eval()) + self.assertAllEqual([[1, 0], [0, 0], [2, 0], [2, 0]], + dense_quantiles[1].eval()) def testSparseFeaturesOnly(self): with self.test_session(): - _, sparse_quantiles = quantile_ops.quantiles( - [], - [self._sparse_values_0, self._sparse_values_1, self._sparse_values_2], - [], [self._sparse_thresholds_0, self._sparse_thresholds_1, - self._sparse_thresholds_2]) - + _, sparse_quantiles = quantile_ops.quantiles([], [ + self._sparse_values_0, self._sparse_values_1, self._sparse_values_2, + self._sparse_values_m + ], [], [ + self._sparse_thresholds_0, self._sparse_thresholds_1, + self._sparse_thresholds_2, self._sparse_thresholds_m + ], [ + self._sparse_indices_0, self._sparse_indices_1, + self._sparse_indices_2, self._sparse_indices_m + ]) + + self.assertAllEqual(4, len(sparse_quantiles)) # Sparse feature 0 - self.assertAllEqual([0, 1, 1, 2], sparse_quantiles[0].eval()) + self.assertAllEqual([[0, 0], [1, 0], [1, 0], [2, 0]], + sparse_quantiles[0].eval()) # Sparse feature 1 - self.assertAllEqual([0, 1, 0], sparse_quantiles[1].eval()) + self.assertAllEqual([[0, 0], [1, 0], [0, 0]], sparse_quantiles[1].eval()) # Sparse feature 2 - self.assertAllEqual([0, 0], sparse_quantiles[2].eval()) + self.assertAllEqual([[0, 0], [0, 0]], sparse_quantiles[2].eval()) + # Multidimensional feature. + self.assertAllEqual([[0, 1], [1, 0], [0, 0], [2, 1], [0, 2]], + sparse_quantiles[3].eval()) def testDenseAndSparseFeatures(self): with self.test_session(): dense_quantiles, sparse_quantiles = quantile_ops.quantiles( - [self._dense_float_tensor_0, self._dense_float_tensor_1], - [self._sparse_values_0, self._sparse_values_1, self._sparse_values_2], - [self._dense_thresholds_0, self._dense_thresholds_1], - [self._sparse_thresholds_0, self._sparse_thresholds_1, - self._sparse_thresholds_2]) + [self._dense_float_tensor_0, self._dense_float_tensor_1], [ + self._sparse_values_0, self._sparse_values_1, + self._sparse_values_2, self._sparse_values_m + ], [self._dense_thresholds_0, self._dense_thresholds_1], [ + self._sparse_thresholds_0, self._sparse_thresholds_1, + self._sparse_thresholds_2, self._sparse_thresholds_m + ], [ + self._sparse_indices_0, self._sparse_indices_1, + self._sparse_indices_2, self._sparse_indices_m + ]) # Dense feature 0 - self.assertAllEqual([0, 0, 1, 2], dense_quantiles[0].eval()) + self.assertAllEqual([[0, 0], [0, 0], [1, 0], [2, 0]], + dense_quantiles[0].eval()) # Dense feature 1 - self.assertAllEqual([1, 0, 2, 2], dense_quantiles[1].eval()) + self.assertAllEqual([[1, 0], [0, 0], [2, 0], [2, 0]], + dense_quantiles[1].eval()) # Sparse feature 0 - self.assertAllEqual([0, 1, 1, 2], sparse_quantiles[0].eval()) + self.assertAllEqual([[0, 0], [1, 0], [1, 0], [2, 0]], + sparse_quantiles[0].eval()) # Sparse feature 1 - self.assertAllEqual([0, 1, 0], sparse_quantiles[1].eval()) + self.assertAllEqual([[0, 0], [1, 0], [0, 0]], sparse_quantiles[1].eval()) # Sparse feature 2 - self.assertAllEqual([0, 0], sparse_quantiles[2].eval()) + self.assertAllEqual([[0, 0], [0, 0]], sparse_quantiles[2].eval()) + # Multidimensional feature. + self.assertAllEqual([[0, 1], [1, 0], [0, 0], [2, 1], [0, 2]], + sparse_quantiles[3].eval()) def testBucketizeWithInputBoundaries(self): with self.test_session(): diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/split_handler_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/split_handler_ops_test.py index edf088b5fa..7c2e3a3b20 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/split_handler_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/split_handler_ops_test.py @@ -38,7 +38,8 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase): # (-0.3, 0.19) | 0 | 1 | # (4.0, 0.13) | 1 | 1 | partition_ids = array_ops.constant([0, 0, 1], dtype=dtypes.int32) - bucket_ids = array_ops.constant([0, 1, 1], dtype=dtypes.int64) + bucket_ids = array_ops.constant( + [[0, 0], [1, 0], [1, 0]], dtype=dtypes.int64) gradients = array_ops.constant([2.4, -0.6, 8.0]) hessians = array_ops.constant([0.4, 0.38, 0.26]) bucket_boundaries = [0.3, 0.52] @@ -109,7 +110,8 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase): """Tests split handler op.""" with self.test_session() as sess: partition_ids = array_ops.constant([0, 0, 1], dtype=dtypes.int32) - bucket_ids = array_ops.constant([0, 1, 1], dtype=dtypes.int64) + bucket_ids = array_ops.constant( + [[0, 0], [1, 0], [1, 0]], dtype=dtypes.int64) gradients = array_ops.constant([[2.4, 3.0], [-0.6, 0.1], [8.0, 1.0]]) hessians = array_ops.constant([[[0.4, 1], [1, 1]], [[0.38, 1], [1, 1]], [[0.26, 1], [1, 1]]]) @@ -149,7 +151,7 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase): """Tests empty inputs op.""" with self.test_session() as sess: partition_ids = array_ops.constant([], dtype=dtypes.int32) - bucket_ids = array_ops.constant([], dtype=dtypes.int64) + bucket_ids = array_ops.constant([[]], dtype=dtypes.int64) gradients = array_ops.constant([]) hessians = array_ops.constant([]) bucket_boundaries = [0.3, 0.52] @@ -185,7 +187,11 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase): # (4.0, 0.13) | 1 | -1 | # (4.0, 0.13) | 1 | 1 | partition_ids = array_ops.constant([0, 0, 0, 1, 1], dtype=dtypes.int32) + # We have only 1 dimension in our sparse feature column. bucket_ids = array_ops.constant([-1, 0, 1, -1, 1], dtype=dtypes.int64) + dimension_ids = array_ops.constant([0, 0, 0, 0, 0], dtype=dtypes.int64) + bucket_ids = array_ops.stack([bucket_ids, dimension_ids], axis=1) + gradients = array_ops.constant([1.8, 2.4, 0.4, 8.0, 8.0]) hessians = array_ops.constant([0.78, 0.4, 0.24, 0.26, 0.26]) bucket_boundaries = array_ops.constant([0.3, 0.52]) @@ -207,6 +213,7 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase): multiclass_strategy=learner_pb2.LearnerConfig.TREE_PER_CLASS)) partitions, gains, splits = (sess.run([partitions, gains, splits])) self.assertAllEqual([0, 1], partitions) + self.assertEqual(2, len(splits)) # Check the split on partition 0. # -(0.2 + 1.2) / (0.12 + 0.2 + 2) expected_left_weight = -0.603448275862069 @@ -232,6 +239,8 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase): self.assertAllClose([expected_right_weight], right_child.value) self.assertEqual(0, split_node.split.feature_column) + # Sparse is one dimensional. + self.assertEqual(0, split_node.split.feature_id) self.assertAllClose(0.52, split_node.split.threshold) @@ -253,14 +262,149 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase): self.assertAllClose([expected_right_weight], right_child.value) self.assertEqual(0, split_node.split.feature_column) + # Sparse is one dimensional. + self.assertEqual(0, split_node.split.feature_id) self.assertAllClose(0.52, split_node.split.threshold) + def testMakeSparseSplitAllEmptyDimensions(self): + """Tests split handler op when all dimensions have only bias bucket id.""" + with self.test_session() as sess: + # The data looks like the following after dividing by number of steps (2). + # Gradients | Partition | Dimension | bucket ID | + # (0.9, 0.39) | 0 | 0 | -1 | + # (4.0, 0.13) | 1 | 0 | -1 | + partition_ids = array_ops.constant([0, 1], dtype=dtypes.int32) + # We have only 1 dimension in our sparse feature column. + bucket_ids = array_ops.constant([[-1, 0], [-1, 0]], dtype=dtypes.int64) + gradients = array_ops.constant([1.8, 8.0]) + hessians = array_ops.constant([0.78, 0.26]) + bucket_boundaries = array_ops.constant([0.3, 0.52]) + partitions, gains, splits = ( + split_handler_ops.build_sparse_inequality_splits( + num_minibatches=2, + partition_ids=partition_ids, + bucket_ids=bucket_ids, + gradients=gradients, + hessians=hessians, + bucket_boundaries=bucket_boundaries, + l1_regularization=0, + l2_regularization=2, + tree_complexity_regularization=0, + min_node_weight=0, + feature_column_group_id=0, + bias_feature_id=-1, + class_id=-1, + multiclass_strategy=learner_pb2.LearnerConfig.TREE_PER_CLASS)) + partitions, gains, splits = (sess.run([partitions, gains, splits])) + self.assertEqual(0, len(partitions)) + self.assertEqual(0, len(splits)) + + def testMakeSparseMultidimensionalSplit(self): + """Tests split handler op.""" + with self.test_session() as sess: + # Num of steps is 2. + # The feature column is three dimensional. + # First dimension has bias bucket only, the second has bias bucket and + # two valid buckets, the third has just one bias bucket and one valid + # bucket. + # Gradients | Partition | Dimension | bucket ID | + # (0.9, 0.39) | 0 | 0 | -1 | + # (1.2, 0.2) | 0 | 1 | 0 | + # (0.2, 0.12) | 0 | 1 | 2 | + # (0.1, 0.1) | 0 | 2 | 3 | + # Now second node - nothing interesting there, just one dimension. + # Second node has the same bucket ids for all dimensions. + # (4.0, 0.13) | 1 | 0 | -1 | + # (4.0, 0.13) | 1 | 2 | 3 | + + # Tree node ids. + partition_ids = array_ops.constant([0, 0, 0, 0, 1, 1], dtype=dtypes.int32) + + dimension_ids = array_ops.constant([0, 1, 1, 2, 0, 2], dtype=dtypes.int64) + bucket_ids = array_ops.constant([-1, 0, 2, 3, -1, 3], dtype=dtypes.int64) + bucket_ids = array_ops.stack([bucket_ids, dimension_ids], axis=1) + + gradients = array_ops.constant([1.8, 2.4, 0.4, 0.2, 8.0, 8.0]) + hessians = array_ops.constant([0.78, 0.4, 0.24, 0.2, 0.26, 0.26]) + bucket_boundaries = array_ops.constant([0.3, 0.52, 0.58, 0.6]) + partitions, gains, splits = ( + split_handler_ops.build_sparse_inequality_splits( + num_minibatches=2, + partition_ids=partition_ids, + bucket_ids=bucket_ids, + gradients=gradients, + hessians=hessians, + bucket_boundaries=bucket_boundaries, + l1_regularization=0, + l2_regularization=2, + tree_complexity_regularization=0, + min_node_weight=0, + feature_column_group_id=0, + bias_feature_id=-1, + class_id=-1, + multiclass_strategy=learner_pb2.LearnerConfig.TREE_PER_CLASS)) + partitions, gains, splits = (sess.run([partitions, gains, splits])) + self.assertAllEqual([0, 1], partitions) + self.assertEqual(2, len(splits)) + # Check the split on node 0 - it should split on second dimension + # -(0.2 + 1.2) / (0.12 + 0.2 + 2) + expected_left_weight = -0.603448275862069 + # (0.2 + 1.2) ** 2 / (0.12 + 0.2 + 2) + expected_left_gain = 0.8448275862068965 + # 0.5 / (0.07 + 2) + expected_right_weight = 0.24154589371980678 + # 0.5 ** 2 / (0.07 + 2) + expected_right_gain = 0.12077294685990339 + # (0.2 + 1.2 - 0.5) ** 2 / (0.12 + 0.2 + 0.07 + 2) + expected_bias_gain = 0.3389121338912133 + + split_info = split_info_pb2.SplitInfo() + split_info.ParseFromString(splits[0]) + left_child = split_info.left_child.vector + right_child = split_info.right_child.vector + split_node = split_info.split_node.sparse_float_binary_split_default_right + self.assertAllClose( + expected_left_gain + expected_right_gain - expected_bias_gain, gains[0]) + + self.assertAllClose([expected_left_weight], left_child.value) + + self.assertAllClose([expected_right_weight], right_child.value) + + self.assertEqual(0, split_node.split.feature_column) + # Split happened on second dimension. + self.assertEqual(1, split_node.split.feature_id) + + self.assertAllClose(0.58, split_node.split.threshold) + + # Check the split on partition 1. + expected_left_weight = -1.8779342723004695 + expected_right_weight = 0 + + # Verify candidate for partition 1, there's only one active bucket here + # so zero gain is expected. + split_info.ParseFromString(splits[1]) + left_child = split_info.left_child.vector + right_child = split_info.right_child.vector + split_node = split_info.split_node.sparse_float_binary_split_default_left + + self.assertAllClose(0.0, gains[1]) + + self.assertAllClose([expected_left_weight], left_child.value) + + self.assertAllClose([expected_right_weight], right_child.value) + + self.assertEqual(0, split_node.split.feature_column) + self.assertEqual(2, split_node.split.feature_id) + + self.assertAllClose(0.6, split_node.split.threshold) + def testMakeMulticlassSparseSplit(self): """Tests split handler op.""" with self.test_session() as sess: partition_ids = array_ops.constant([0, 0, 0, 1, 1], dtype=dtypes.int32) - bucket_ids = array_ops.constant([-1, 0, 1, -1, 1], dtype=dtypes.int64) + bucket_ids = array_ops.constant( + [[-1, 0], [0, 0], [1, 0], [-1, 0], [1, 0]], dtype=dtypes.int64) gradients = array_ops.constant([[1.8, 3.5], [2.4, 1.0], [0.4, 4.0], [8.0, 3.1], [8.0, 0.8]]) @@ -317,7 +461,8 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase): gradients = [1.8, 0.4, 2.8, 8.0, 8.0] hessians = [0.78, 0.24, 0.64, 0.26, 0.26] partition_ids = [0, 0, 0, 1, 1] - feature_ids = array_ops.constant([-1, 1, 2, -1, 1], dtype=dtypes.int64) + feature_ids = array_ops.constant( + [[-1, 0], [1, 0], [2, 0], [-1, 0], [1, 0]], dtype=dtypes.int64) partitions, gains, splits = ( split_handler_ops.build_categorical_equality_splits( num_minibatches=2, @@ -412,7 +557,8 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase): hessians = array_ops.constant( [hessian_0, hessian_1, hessian_2, hessian_3, hessian_4]) partition_ids = [0, 0, 0, 1, 1] - feature_ids = array_ops.constant([-1, 1, 2, -1, 1], dtype=dtypes.int64) + feature_ids = array_ops.constant( + [[-1, 0], [1, 0], [2, 0], [-1, 0], [1, 0]], dtype=dtypes.int64) partitions, gains, splits = ( split_handler_ops.build_categorical_equality_splits( num_minibatches=2, @@ -449,7 +595,7 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase): gradients = [] hessians = [] partition_ids = [] - feature_ids = [] + feature_ids = [[]] partitions, gains, splits = ( split_handler_ops.build_categorical_equality_splits( num_minibatches=0, diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/stats_accumulator_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/stats_accumulator_ops_test.py index 0022d4ad52..978bf530cd 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/stats_accumulator_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/stats_accumulator_ops_test.py @@ -38,22 +38,52 @@ class StatsAccumulatorScalarTest(test_util.TensorFlowTestCase): op1 = accumulator.add( stamp_token=0, partition_ids=[1, 2], - feature_ids=[2, 3], + feature_ids=[[2, 0], [3, 0]], gradients=[0.1, 0.3], hessians=[0.2, 0.4]) - op2 = accumulator.add(0, [1], [2], [0.1], [0.2]) + op2 = accumulator.add(0, [1], [[2, 0]], [0.1], [0.2]) with ops.control_dependencies([op1, op2]): - num_updates, partition, feature, grads, hessians = accumulator.flush( + num_updates, partition, bucket_ids, grads, hessians = accumulator.flush( stamp_token=0, next_stamp_token=1) - num_updates, partition, feature, grads, hessians = sess.run( - [num_updates, partition, feature, grads, hessians]) + num_updates, partition, bucket_ids, grads, hessians = sess.run( + [num_updates, partition, bucket_ids, grads, hessians]) - result = _AccumulatorResultToDict(partition, feature, grads, hessians) + result = _AccumulatorResultToDict(partition, bucket_ids, grads, hessians) self.assertEqual(num_updates, 2) self.assertEqual(len(result), 2) - self.assertAllClose(result[(1, 2)], [0.2, 0.4]) - self.assertAllClose(result[(2, 3)], [0.3, 0.4]) + # Key is partion, bucket, dimension + self.assertAllClose(result[(1, 2, 0)], [0.2, 0.4]) + self.assertAllClose(result[(2, 3, 0)], [0.3, 0.4]) + + def testMultidimensionalAcculumator(self): + with self.test_session() as sess: + accumulator = stats_accumulator_ops.StatsAccumulator( + stamp_token=0, + gradient_shape=tensor_shape.scalar(), + hessian_shape=tensor_shape.scalar()) + with ops.control_dependencies([accumulator._create_op]): + op1 = accumulator.add( + stamp_token=0, + partition_ids=[1, 2, 1], + feature_ids=[[2, 2], [3, 0], [2, 2]], + gradients=[0.1, 0.3, 0.8], + hessians=[0.2, 0.4, -9]) + op2 = accumulator.add(0, [2, 1], [[3, 1], [2, 2]], [0.1, 1], [0.2, -1]) + + with ops.control_dependencies([op1, op2]): + num_updates, partition, bucket_ids, grads, hessians = accumulator.flush( + stamp_token=0, next_stamp_token=1) + num_updates, partition, bucket_ids, grads, hessians = sess.run( + [num_updates, partition, bucket_ids, grads, hessians]) + + result = _AccumulatorResultToDict(partition, bucket_ids, grads, hessians) + self.assertEqual(num_updates, 2) + self.assertEqual(len(result), 3) + # Key is partion, bucket, dimension. + self.assertAllClose(result[(1, 2, 2)], [1.9, -9.8]) + self.assertAllClose(result[(2, 3, 0)], [0.3, 0.4]) + self.assertAllClose(result[(2, 3, 1)], [0.1, 0.2]) def testDropStaleUpdate(self): with self.test_session() as sess: @@ -65,13 +95,13 @@ class StatsAccumulatorScalarTest(test_util.TensorFlowTestCase): op1 = accumulator.add( stamp_token=0, partition_ids=[1, 2], - feature_ids=[2, 3], + feature_ids=[[2, 0], [3, 0]], gradients=[0.1, 0.3], hessians=[0.2, 0.4]) op2 = accumulator.add( stamp_token=-1, partition_ids=[1], - feature_ids=[2], + feature_ids=[[2, 0]], gradients=[0.1], hessians=[0.2]) @@ -84,8 +114,8 @@ class StatsAccumulatorScalarTest(test_util.TensorFlowTestCase): result = _AccumulatorResultToDict(partition, feature, grads, hessians) self.assertEqual(num_updates, 1) self.assertEqual(len(result), 2) - self.assertAllClose(result[(1, 2)], [0.1, 0.2]) - self.assertAllClose(result[(2, 3)], [0.3, 0.4]) + self.assertAllClose(result[(1, 2, 0)], [0.1, 0.2]) + self.assertAllClose(result[(2, 3, 0)], [0.3, 0.4]) def testSerialize(self): with self.test_session() as sess: @@ -97,7 +127,7 @@ class StatsAccumulatorScalarTest(test_util.TensorFlowTestCase): op1 = accumulator.add( stamp_token=0, partition_ids=[1, 2], - feature_ids=[2, 3], + feature_ids=[[2, 0], [3, 0]], gradients=[0.1, 0.3], hessians=[0.2, 0.4]) @@ -123,8 +153,8 @@ class StatsAccumulatorScalarTest(test_util.TensorFlowTestCase): self.assertEqual(num_updates, 1) self.assertEqual(num_updates_2, 1) self.assertEqual(len(result_1), 2) - self.assertAllClose(result_1[(1, 2)], [0.1, 0.2]) - self.assertAllClose(result_1[(2, 3)], [0.3, 0.4]) + self.assertAllClose(result_1[(1, 2, 0)], [0.1, 0.2]) + self.assertAllClose(result_1[(2, 3, 0)], [0.3, 0.4]) self.assertAllEqual(result_1, result_2) self.assertEqual(0, stamp_token) @@ -139,18 +169,19 @@ class StatsAccumulatorScalarTest(test_util.TensorFlowTestCase): op1 = accumulator.add( stamp_token=0, partition_ids=[1, 2], - feature_ids=[2, 3], + feature_ids=[[2, 0], [3, 1]], gradients=[0.1, 0.3], hessians=[0.2, 0.4]) with ops.control_dependencies([op1]): - deserialize = (accumulator.deserialize( - stamp_token=2, - num_updates=3, - partition_ids=[3, 4], - feature_ids=[5, 6], - gradients=[0.4, 0.5], - hessians=[0.6, 0.7])) + deserialize = ( + accumulator.deserialize( + stamp_token=2, + num_updates=3, + partition_ids=[3, 4], + feature_ids=[[5, 0], [6, 2]], + gradients=[0.4, 0.5], + hessians=[0.6, 0.7])) with ops.control_dependencies([deserialize]): num_updates, partition, feature, grads, hessians = accumulator.flush( stamp_token=2, next_stamp_token=3) @@ -161,8 +192,8 @@ class StatsAccumulatorScalarTest(test_util.TensorFlowTestCase): hessians) self.assertEqual(num_updates, 3) self.assertEqual(len(result), 2) - self.assertAllClose(result[(3, 5)], [0.4, 0.6]) - self.assertAllClose(result[(4, 6)], [0.5, 0.7]) + self.assertAllClose(result[(3, 5, 0)], [0.4, 0.6]) + self.assertAllClose(result[(4, 6, 2)], [0.5, 0.7]) def testMakeSummary(self): with self.test_session() as sess: @@ -172,15 +203,15 @@ class StatsAccumulatorScalarTest(test_util.TensorFlowTestCase): hessian_shape=tensor_shape.scalar()) partition, feature, grads, hessians = accumulator._make_summary( partition_ids=[1, 2, 1], - feature_ids=[2, 3, 2], + feature_ids=[[2, 0], [3, 1], [2, 0]], gradients=[0.1, 0.3, 0.1], hessians=[0.2, 0.4, 0.2]) partition, feature, grads, hessians = sess.run( [partition, feature, grads, hessians]) result = _AccumulatorResultToDict(partition, feature, grads, hessians) self.assertEqual(len(result), 2) - self.assertAllClose(result[(1, 2)], [0.2, 0.4]) - self.assertAllClose(result[(2, 3)], [0.3, 0.4]) + self.assertAllClose(result[(1, 2, 0)], [0.2, 0.4]) + self.assertAllClose(result[(2, 3, 1)], [0.3, 0.4]) class StatsAccumulatorTensorTest(test_util.TensorFlowTestCase): @@ -196,16 +227,54 @@ class StatsAccumulatorTensorTest(test_util.TensorFlowTestCase): op1 = accumulator.add( stamp_token=0, partition_ids=[1, 2], - feature_ids=[2, 3], + feature_ids=[[2, 0], [3, 0]], + # Two values for gradients, + gradients=[[0.1, 0.1], [0.2, 0.2]], + # A 2x2 matrix for each hessian. + hessians=[[[0.01, 0.02], [0.03, 0.04]], [[0.05, 0.06], [0.07, + 0.08]]]) + op2 = accumulator.add( + stamp_token=0, + partition_ids=[1], + feature_ids=[[2, 0]], + gradients=[[0.10, 0.11]], + hessians=[[[0.011, 0.022], [0.033, 0.044]]]) + + with ops.control_dependencies([op1, op2]): + num_updates, partition, feature, grads, hessians = accumulator.flush( + stamp_token=0, next_stamp_token=1) + num_updates, partition, feature, grads, hessians = sess.run( + [num_updates, partition, feature, grads, hessians]) + + result = _AccumulatorResultToDict(partition, feature, grads, hessians) + self.assertEqual(num_updates, 2) + self.assertEqual(len(result), 2) + self.assertAllClose(result[(1, 2, 0)][0], [0.20, 0.21]) + self.assertAllClose(result[(1, 2, 0)][1], + [[0.021, 0.042], [0.063, 0.084]]) + self.assertAllClose(result[(2, 3, 0)][0], [0.2, 0.2]) + self.assertAllClose(result[(2, 3, 0)][1], [[0.05, 0.06], [0.07, 0.08]]) + + def testMultidimensionalAcculumator(self): + with self.test_session() as sess: + accumulator = stats_accumulator_ops.StatsAccumulator( + stamp_token=0, + gradient_shape=tensor_shape.TensorShape([2]), + hessian_shape=tensor_shape.TensorShape([2, 2])) + with ops.control_dependencies([accumulator._create_op]): + op1 = accumulator.add( + stamp_token=0, + partition_ids=[1, 2], + feature_ids=[[2, 4], [3, 1]], # Two values for gradients, gradients=[[0.1, 0.1], [0.2, 0.2]], # A 2x2 matrix for each hessian. - hessians=[[[0.01, 0.02], [0.03, 0.04]], - [[0.05, 0.06], [0.07, 0.08]]]) + hessians=[[[0.01, 0.02], [0.03, 0.04]], [[0.05, 0.06], [0.07, + 0.08]]]) op2 = accumulator.add( stamp_token=0, partition_ids=[1], - feature_ids=[2], + feature_ids=[[2, 4]], gradients=[[0.10, 0.11]], hessians=[[[0.011, 0.022], [0.033, 0.044]]]) @@ -218,10 +287,11 @@ class StatsAccumulatorTensorTest(test_util.TensorFlowTestCase): result = _AccumulatorResultToDict(partition, feature, grads, hessians) self.assertEqual(num_updates, 2) self.assertEqual(len(result), 2) - self.assertAllClose(result[(1, 2)][0], [0.20, 0.21]) - self.assertAllClose(result[(1, 2)][1], [[0.021, 0.042], [0.063, 0.084]]) - self.assertAllClose(result[(2, 3)][0], [0.2, 0.2]) - self.assertAllClose(result[(2, 3)][1], [[0.05, 0.06], [0.07, 0.08]]) + self.assertAllClose(result[(1, 2, 4)][0], [0.20, 0.21]) + self.assertAllClose(result[(1, 2, 4)][1], + [[0.021, 0.042], [0.063, 0.084]]) + self.assertAllClose(result[(2, 3, 1)][0], [0.2, 0.2]) + self.assertAllClose(result[(2, 3, 1)][1], [[0.05, 0.06], [0.07, 0.08]]) def testDropStaleUpdate(self): with self.test_session() as sess: @@ -233,16 +303,16 @@ class StatsAccumulatorTensorTest(test_util.TensorFlowTestCase): op1 = accumulator.add( stamp_token=0, partition_ids=[1, 2], - feature_ids=[2, 3], + feature_ids=[[2, 5], [3, 0]], # Two values for gradients, gradients=[[0.1, 0.1], [0.2, 0.2]], # A 2x2 matrix for each hessian. - hessians=[[[0.01, 0.02], [0.03, 0.04]], - [[0.05, 0.06], [0.07, 0.08]]]) + hessians=[[[0.01, 0.02], [0.03, 0.04]], [[0.05, 0.06], [0.07, + 0.08]]]) op2 = accumulator.add( stamp_token=-1, partition_ids=[1], - feature_ids=[2], + feature_ids=[[2, 5]], gradients=[[0.10, 0.11]], hessians=[[[0.011, 0.022], [0.033, 0.044]]]) @@ -255,10 +325,10 @@ class StatsAccumulatorTensorTest(test_util.TensorFlowTestCase): result = _AccumulatorResultToDict(partition, feature, grads, hessians) self.assertEqual(num_updates, 1) self.assertEqual(len(result), 2) - self.assertAllClose(result[(1, 2)][0], [0.1, 0.1]) - self.assertAllClose(result[(1, 2)][1], [[0.01, 0.02], [0.03, 0.04]]) - self.assertAllClose(result[(2, 3)][0], [0.2, 0.2]) - self.assertAllClose(result[(2, 3)][1], [[0.05, 0.06], [0.07, 0.08]]) + self.assertAllClose(result[(1, 2, 5)][0], [0.1, 0.1]) + self.assertAllClose(result[(1, 2, 5)][1], [[0.01, 0.02], [0.03, 0.04]]) + self.assertAllClose(result[(2, 3, 0)][0], [0.2, 0.2]) + self.assertAllClose(result[(2, 3, 0)][1], [[0.05, 0.06], [0.07, 0.08]]) def testSerialize(self): with self.test_session() as sess: @@ -270,12 +340,12 @@ class StatsAccumulatorTensorTest(test_util.TensorFlowTestCase): op1 = accumulator.add( stamp_token=0, partition_ids=[1, 2], - feature_ids=[2, 3], + feature_ids=[[2, 0], [3, 0]], # Two values for gradients, gradients=[[0.1, 0.1], [0.2, 0.2]], # A 2x2 matrix for each hessian. - hessians=[[[0.01, 0.02], [0.03, 0.04]], - [[0.05, 0.06], [0.07, 0.08]]]) + hessians=[[[0.01, 0.02], [0.03, 0.04]], [[0.05, 0.06], [0.07, + 0.08]]]) with ops.control_dependencies([op1]): (stamp_token, num_updates_1, partition_1, feature_1, grads_1, @@ -300,15 +370,15 @@ class StatsAccumulatorTensorTest(test_util.TensorFlowTestCase): self.assertEqual(num_updates_1, 1) self.assertEqual(num_updates_2, 1) self.assertEqual(len(result_1), 2) - self.assertAllClose(result_1[(1, 2)][0], [0.1, 0.1]) - self.assertAllClose(result_1[(1, 2)][1], [[0.01, 0.02], [0.03, 0.04]]) - self.assertAllClose(result_1[(2, 3)][0], [0.2, 0.2]) - self.assertAllClose(result_1[(2, 3)][1], [[0.05, 0.06], [0.07, 0.08]]) + self.assertAllClose(result_1[(1, 2, 0)][0], [0.1, 0.1]) + self.assertAllClose(result_1[(1, 2, 0)][1], [[0.01, 0.02], [0.03, 0.04]]) + self.assertAllClose(result_1[(2, 3, 0)][0], [0.2, 0.2]) + self.assertAllClose(result_1[(2, 3, 0)][1], [[0.05, 0.06], [0.07, 0.08]]) - self.assertAllEqual(result_1[1, 2][0], result_2[1, 2][0]) - self.assertAllEqual(result_1[1, 2][1], result_2[1, 2][1]) - self.assertAllEqual(result_1[2, 3][0], result_2[2, 3][0]) - self.assertAllEqual(result_1[2, 3][1], result_2[2, 3][1]) + self.assertAllEqual(result_1[1, 2, 0][0], result_2[1, 2, 0][0]) + self.assertAllEqual(result_1[1, 2, 0][1], result_2[1, 2, 0][1]) + self.assertAllEqual(result_1[2, 3, 0][0], result_2[2, 3, 0][0]) + self.assertAllEqual(result_1[2, 3, 0][1], result_2[2, 3, 0][1]) def testDeserialize(self): with self.test_session() as sess: @@ -321,19 +391,19 @@ class StatsAccumulatorTensorTest(test_util.TensorFlowTestCase): op1 = accumulator.add( stamp_token=0, partition_ids=[1, 2], - feature_ids=[2, 3], + feature_ids=[[2, 0], [3, 0]], # Two values for gradients, gradients=[[0.1, 0.1], [0.2, 0.2]], # A 2x2 matrix for each hessian. - hessians=[[[0.01, 0.02], [0.03, 0.04]], - [[0.05, 0.06], [0.07, 0.08]]]) + hessians=[[[0.01, 0.02], [0.03, 0.04]], [[0.05, 0.06], [0.07, + 0.08]]]) with ops.control_dependencies([op1]): deserialize = accumulator.deserialize( stamp_token=2, num_updates=3, partition_ids=[3, 4], - feature_ids=[4, 5], + feature_ids=[[4, 0], [5, 0]], # Two values for gradients, gradients=[[0.3, 0.3], [0.5, 0.5]], # A 2x2 matrix for each hessian. @@ -349,10 +419,10 @@ class StatsAccumulatorTensorTest(test_util.TensorFlowTestCase): hessians) self.assertEqual(num_updates, 3) self.assertEqual(len(result), 2) - self.assertAllClose(result[(3, 4)][0], [0.3, 0.3]) - self.assertAllClose(result[(3, 4)][1], [[0.03, 0.04], [0.05, 0.06]]) - self.assertAllClose(result[(4, 5)][0], [0.5, 0.5]) - self.assertAllClose(result[(4, 5)][1], [[0.07, 0.08], [0.09, 0.10]]) + self.assertAllClose(result[(3, 4, 0)][0], [0.3, 0.3]) + self.assertAllClose(result[(3, 4, 0)][1], [[0.03, 0.04], [0.05, 0.06]]) + self.assertAllClose(result[(4, 5, 0)][0], [0.5, 0.5]) + self.assertAllClose(result[(4, 5, 0)][1], [[0.07, 0.08], [0.09, 0.10]]) def testMakeSummary(self): with self.test_session() as sess: @@ -362,7 +432,7 @@ class StatsAccumulatorTensorTest(test_util.TensorFlowTestCase): hessian_shape=tensor_shape.TensorShape([2, 2])) partition, feature, grads, hessians = accumulator._make_summary( partition_ids=[1, 2, 1], - feature_ids=[2, 3, 2], + feature_ids=[[2, 0], [3, 2], [2, 0]], # Two values for gradients, gradients=[[0.1, 0.1], [0.2, 0.2], [0.10, 0.11]], # A 2x2 matrix for each hessian. @@ -373,15 +443,16 @@ class StatsAccumulatorTensorTest(test_util.TensorFlowTestCase): result = _AccumulatorResultToDict(partition, feature, grads, hessians) self.assertEqual(len(result), 2) - self.assertAllClose(result[(1, 2)][0], [0.20, 0.21]) - self.assertAllClose(result[(1, 2)][1], [[0.021, 0.042], [0.063, 0.084]]) - self.assertAllClose(result[(2, 3)][0], [0.2, 0.2]) - self.assertAllClose(result[(2, 3)][1], [[0.05, 0.06], [0.07, 0.08]]) + self.assertAllClose(result[(1, 2, 0)][0], [0.20, 0.21]) + self.assertAllClose(result[(1, 2, 0)][1], + [[0.021, 0.042], [0.063, 0.084]]) + self.assertAllClose(result[(2, 3, 2)][0], [0.2, 0.2]) + self.assertAllClose(result[(2, 3, 2)][1], [[0.05, 0.06], [0.07, 0.08]]) def _AccumulatorResultToDict(partition, feature, grads, hessians): """Converts the inputs to a dictionary since the ordering changes.""" - return {(partition[i], feature[i]): (grads[i], hessians[i]) + return {(partition[i], feature[i, 0], feature[i, 1]): (grads[i], hessians[i]) for i in range(len(partition))} diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py index cebe3474ca..6094dae6b5 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py @@ -739,7 +739,7 @@ class GradientBoostedDecisionTreeModel(object): # Accumulate a step after updating stats. batch_size = math_ops.cast(array_ops.shape(labels)[0], dtypes.float32) with ops.control_dependencies(stats_update_ops): - add_step_op = steps_accumulator.add(ensemble_stamp, [0], [0], + add_step_op = steps_accumulator.add(ensemble_stamp, [0], [[0, 0]], [batch_size], [1.0]) # Determine learning rate. @@ -892,7 +892,9 @@ class GradientBoostedDecisionTreeModel(object): # Accumulate gradients and hessians. partition_ids = math_ops.range(self._logits_dimension) - feature_ids = array_ops.zeros_like(partition_ids, dtype=dtypes.int64) + feature_ids = array_ops.zeros( + [self._logits_dimension, 2], dtype=dtypes.int64) + add_stats_op = bias_stats_accumulator.add( ensemble_stamp, partition_ids, feature_ids, grads_sum, hess_sum) return control_flow_ops.group(*[add_stats_op], name="update_bias_stats") -- GitLab From cb5ef27106980d4f3e7a3cf51c48c3b3079de6f8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 12:29:35 -0800 Subject: [PATCH 0526/1801] Update links to point to tflite PiperOrigin-RevId: 176001760 --- tensorflow/contrib/lite/README.md | 2 +- tensorflow/contrib/lite/g3doc/models.md | 40 ++++++++++++------------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md index b173936f5b..b5df986686 100644 --- a/tensorflow/contrib/lite/README.md +++ b/tensorflow/contrib/lite/README.md @@ -43,7 +43,7 @@ The simplest way to compile the demo app, and try out changes to the project cod ### Install Bazel If bazel is not installed on your system, install it now by following [these directions](https://bazel.build/versions/master/docs/install.html) -NOTE: Bazel does not currently support building for Android on Windows. Full support for gradle/cmake builds is coming soon, but in the meantime Windows users should download the [prebuilt binary](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/demo/TfLiteCameraDemo.apk) instead. +NOTE: Bazel does not currently support building for Android on Windows. Full support for gradle/cmake builds is coming soon, but in the meantime Windows users should download the [prebuilt binary](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk) instead. ### Install Android NDK and SDK Bazel is the primary build system for TensorFlow. Bazel and the Android NDK and SDK must be installed on your system. diff --git a/tensorflow/contrib/lite/g3doc/models.md b/tensorflow/contrib/lite/g3doc/models.md index 0508c160c6..5b393140d6 100644 --- a/tensorflow/contrib/lite/g3doc/models.md +++ b/tensorflow/contrib/lite/g3doc/models.md @@ -1,22 +1,22 @@ #List of Hosted Models -* [Inception V3 2015](https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2015_2017_11_10.zip) -* [Inception V3 Slim 2016](https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_slim_2016_android_2017_11_10.zip) -* [Mobilenet 0.25 128 Float](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.25_128_float_2017_11_08.zip) -* [Mobilenet 0.25 160 Float](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.25_160_float_2017_11_08.zip) -* [Mobilenet 0.25 192 Float](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.25_192_float_2017_11_08.zip) -* [Mobilenet 0.25 224 Float](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.25_224_float_2017_11_08.zip) -* [Mobilenet 0.50 128 Float](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_128_float_2017_11_08.zip) -* [Mobilenet 0.50 160 Float](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_160_float_2017_11_08.zip) -* [Mobilenet 0.50 192 Float](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_192_float_2017_11_08.zip) -* [Mobilenet 0.50 224 Float](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_224_float_2017_11_08.zip) -* [Mobilenet 0.75 128 Float](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.75_128_float_2017_11_08.zip) -* [Mobilenet 0.75 160 Float](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.75_160_float_2017_11_08.zip) -* [Mobilenet 0.75 192 Float](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.75_192_float_2017_11_08.zip) -* [Mobilenet 0.75 224 Float](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.75_224_float_2017_11_08.zip) -* [Mobilenet 1.0 128 Float](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_1.0_128_float_2017_11_08.zip) -* [Mobilenet 1.0 160 Float](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_1.0_160_float_2017_11_08.zip) -* [Mobilenet 1.0 192 Float](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_1.0_192_float_2017_11_08.zip) -* [Mobilenet 1.0 224 Float](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_1.0_224_float_2017_11_08.zip) -* [Mobilenet 1.0 224 Quant](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_224_android_quant_2017_11_08.zip) -* [Smart Reply 1.0 Android ](https://storage.googleapis.com/download.tensorflow.org/models/smartreply_1.0_2017_11_01.zip) +* [Inception V3 2015](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_2015_2017_11_10.zip) +* [Inception V3 Slim 2016](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_slim_2016_android_2017_11_10.zip) +* [Mobilenet 0.25 128 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.25_128_float_2017_11_08.zip) +* [Mobilenet 0.25 160 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.25_160_float_2017_11_08.zip) +* [Mobilenet 0.25 192 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.25_192_float_2017_11_08.zip) +* [Mobilenet 0.25 224 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.25_224_float_2017_11_08.zip) +* [Mobilenet 0.50 128 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.50_128_float_2017_11_08.zip) +* [Mobilenet 0.50 160 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.50_160_float_2017_11_08.zip) +* [Mobilenet 0.50 192 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.50_192_float_2017_11_08.zip) +* [Mobilenet 0.50 224 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.50_224_float_2017_11_08.zip) +* [Mobilenet 0.75 128 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.75_128_float_2017_11_08.zip) +* [Mobilenet 0.75 160 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.75_160_float_2017_11_08.zip) +* [Mobilenet 0.75 192 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.75_192_float_2017_11_08.zip) +* [Mobilenet 0.75 224 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.75_224_float_2017_11_08.zip) +* [Mobilenet 1.0 128 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_1.0_128_float_2017_11_08.zip) +* [Mobilenet 1.0 160 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_1.0_160_float_2017_11_08.zip) +* [Mobilenet 1.0 192 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_1.0_192_float_2017_11_08.zip) +* [Mobilenet 1.0 224 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_1.0_224_float_2017_11_08.zip) +* [Mobilenet 1.0 224 Quant](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip) +* [Smart Reply 1.0 Android ](https://storage.googleapis.com/download.tensorflow.org/models/tflite/smartreply_1.0_2017_11_01.zip) -- GitLab From 664a19dee1d4fd4c58b6413a5b07acb2d3016a86 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 16 Nov 2017 12:38:52 -0800 Subject: [PATCH 0527/1801] Keep the node map up to date to ensure that nodes won't get deleted by mistake PiperOrigin-RevId: 176003007 --- tensorflow/core/grappler/optimizers/constant_folding.cc | 4 +++- tensorflow/core/grappler/optimizers/constant_folding_test.cc | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index b722905032..8ae0d57068 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -490,9 +490,11 @@ Status ConstantFolding::MaterializeReductionIndices( } *reduction_indices = CreateNodeDef(const_name, TensorValue(&value)); reduction_indices->set_device(node->device()); - *reduction_indices->add_input() = + string ctrl_dep = AddControlDependency(node->input(1), &graph_, node_map_.get()); + *reduction_indices->add_input() = ctrl_dep; node_map_->AddNode(const_name, reduction_indices); + node_map_->AddOutput(NodeName(ctrl_dep), const_name); node->set_input(1, reduction_indices->name()); node_map_->UpdateInput(node->name(), indices->name(), diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 428376c02c..b2d9b02c68 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -930,6 +930,7 @@ TEST_F(ConstantFoldingTest, MaterializeReductionIndices) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); + item.fetch.push_back("reshape"); ConstantFolding fold(RewriterConfig::AGGRESSIVE, nullptr /* cpu_device */); GraphDef output; @@ -952,9 +953,11 @@ TEST_F(ConstantFoldingTest, MaterializeReductionIndices) { } else if (node.name() == "sum") { ++found; EXPECT_EQ("ConstantFolding/sum-reduction_indices", node.input(1)); + } else if (node.name() == "indices") { + ++found; } } - EXPECT_EQ(2, found); + EXPECT_EQ(3, found); } } // namespace -- GitLab From 2eee9288d779371b2f3beb491e4b3d5bbfe5b772 Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Thu, 16 Nov 2017 12:43:30 -0800 Subject: [PATCH 0528/1801] Use the provided optimizer for computing gradients. Since the user provides an optimizer, gradients should be computed via optimizer.compute_gradients() rather than just the tf.gradients() call. PiperOrigin-RevId: 176003625 --- .../estimator/python/estimator/replicate_model_fn.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py index dcc48d1fd9..d9c83aa865 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py @@ -37,12 +37,10 @@ from tensorflow.python.framework import ops as ops_lib from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import gradients as gradients_lib from tensorflow.python.ops import math_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope -from tensorflow.python.ops import variables as variables_lib from tensorflow.python.platform import tf_logging from tensorflow.python.training import training_util @@ -293,10 +291,7 @@ def _minimize_towers(tower_specs, optimizer): grad_lists = {} for tower_spec in tower_specs: with ops_lib.device(tower_spec.loss.device): - variables = variables_lib.trainable_variables() - gradients = gradients_lib.gradients(tower_spec.loss, variables) - - for var, grad in zip(variables, gradients): + for grad, var in optimizer.compute_gradients(tower_spec.loss): if grad is not None: grad_lists.setdefault(var, []).append(grad) -- GitLab From a4ad92579f5f39d2b8a8d9722f3fa1696bce7374 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 13:34:49 -0800 Subject: [PATCH 0529/1801] Tag tensorflow/python:basic_session_run_hooks_test with "notsan". The test gets intermittent TSan warnings. PiperOrigin-RevId: 176011109 --- tensorflow/python/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 970f3ecaff..a20898e40e 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3655,7 +3655,10 @@ py_test( size = "small", srcs = ["training/basic_session_run_hooks_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], + tags = [ + "no_windows", + "notsan", # intermittent races on a few percent of runs + ], deps = [ ":client", ":client_testlib", -- GitLab From 30a7d27b1f5427eeb74915255b63d408d60b94a1 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Thu, 16 Nov 2017 13:34:50 -0800 Subject: [PATCH 0530/1801] Fix nopip build tag typo in contrib/predictor:test_export_dir. PiperOrigin-RevId: 176011110 --- tensorflow/contrib/predictor/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/predictor/BUILD b/tensorflow/contrib/predictor/BUILD index 1bf40ab6b2..82cd7b4c8a 100644 --- a/tensorflow/contrib/predictor/BUILD +++ b/tensorflow/contrib/predictor/BUILD @@ -165,5 +165,5 @@ py_test( filegroup( name = "test_export_dir", srcs = glob(["test_export_dir/**/*"]), - tags = ["nopip"], + tags = ["no_pip"], ) -- GitLab From 40ac2d38dbee3c5c43a9e8fe2f3a1905e9c538f0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 13:50:40 -0800 Subject: [PATCH 0531/1801] Hlo parser: allow rank 2 convolution dimension lables. PiperOrigin-RevId: 176013570 --- tensorflow/compiler/xla/tools/parser/README.md | 2 +- tensorflow/compiler/xla/tools/parser/hlo_lexer.cc | 8 ++++---- tensorflow/compiler/xla/tools/parser/hlo_parser.cc | 4 ++-- .../compiler/xla/tools/parser/hlo_parser_test.cc | 13 +++++++++++++ tensorflow/compiler/xla/tools/parser/hlo_token.h | 2 +- 5 files changed, 21 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/xla/tools/parser/README.md b/tensorflow/compiler/xla/tools/parser/README.md index b768b94e77..6232967f5f 100644 --- a/tensorflow/compiler/xla/tools/parser/README.md +++ b/tensorflow/compiler/xla/tools/parser/README.md @@ -54,7 +54,7 @@ attribute attribute_value : kInt | kName - | [0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,} /*dim_labels_pattern*/ + | [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,} /*dim_labels_pattern*/ | [0-9]+(x[0-9]+)+ /*dxd_pattern*/ | [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)* /*pad_pattern*/ | '{' sub_attributes '}' diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc index 0140c121f8..56744440db 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc @@ -161,7 +161,7 @@ TokKind HloLexer::LexToken() { // keyword ::= HloModule, ENTRY, ... // opcode ::= add, greater-than, ... // attribute_name ::= condition, body, dimensions, ... -// dim_labels_pattern ::= [0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,} +// dim_labels_pattern ::= [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,} TokKind HloLexer::LexIdentifier() { { auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end()); @@ -237,7 +237,7 @@ TokKind HloLexer::LexIdentifier() { { auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end()); static LazyRE2 dim_labels_pattern = { - R"([0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,})"}; + R"([0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,})"}; if (RE2::Consume(&consumable, *dim_labels_pattern)) { current_ptr_ = consumable.begin(); str_val_.assign(token_start_, current_ptr_); @@ -269,7 +269,7 @@ TokKind HloLexer::LexPercent() { // // fp with exp ::= [-]?([0-9]+|[0-9]+[.][0-9]*|[0-9]*[.][0-9]+)([eE][+-]?[0-9]+) // fp without exp ::= [-]?([0-9]+[.][0-9]*|[0-9]*[.][0-9]+) -// dim_labels_pattern ::= [0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,} +// dim_labels_pattern ::= [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,} // dxd_pattern ::= [0-9]+(x[0-9]+)+ // pad_pattern ::= [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)* // int ::= [-]?[0-9]+ @@ -286,7 +286,7 @@ TokKind HloLexer::LexNumberOrPattern() { } static LazyRE2 dim_labels_pattern = { - R"([0-9bf]{3,}_[0-9io]{3,}->[0-9bf]{3,})"}; + R"([0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,})"}; static LazyRE2 dxd_pattern = {R"([0-9]+(x[0-9]+)+)"}; static LazyRE2 pad_pattern = { R"([0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)*)"}; diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index 0159d03b11..2112b3e710 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -1656,8 +1656,8 @@ bool HloParser::ParseConvolutionDimensionNumbers( return TokenError( "convolution lhs, rhs, and output must have the same rank"); } - if (rank < 3) { - return TokenError("convolution rank must >=3"); + if (rank < 2) { + return TokenError("convolution rank must >=2"); } auto is_unique = [](string str) -> bool { diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index 0ebc0ca44b..cb02ef84a9 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -302,6 +302,19 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2 ROOT %convolution = f32[1,2,1]{2,0,1} convolution(f32[1,2,1]{2,0,1} %copy, f32[1,1,1]{2,1,0} %filter), window={size=1}, dim_labels=b0f_0io->b0f } +)" +}, +// convolution rank 2 +{ +"ConvolutionR2", +R"(HloModule ConvolveR2_module: + +ENTRY %ConvolveR2.v3 (input: f32[1,2], filter: f32[1,1]) -> f32[1,2] { + %input = f32[1,2]{1,0} parameter(0) + %filter = f32[1,1]{1,0} parameter(1) + ROOT %convolution = f32[1,2]{0,1} convolution(f32[1,2]{1,0} %input, f32[1,1]{1,0} %filter), window={size=1}, dim_labels=bf_io->bf +} + )" }, // reverse(constant) diff --git a/tensorflow/compiler/xla/tools/parser/hlo_token.h b/tensorflow/compiler/xla/tools/parser/hlo_token.h index 181760bdeb..07e48804d0 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_token.h +++ b/tensorflow/compiler/xla/tools/parser/hlo_token.h @@ -57,7 +57,7 @@ enum class TokKind { // Typed tokens. kName, // %foo kAttributeName, // dimensions= - kDimLabels, // [0-9bf]+_[0-9io]+->[0-9bf]+ + kDimLabels, // [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,} kDxD, // [0-9]+(x[0-9]+)+ kPad, // [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)* kString, // "abcd\"\n" -- GitLab From e2a60582bf28fa29c871736d10edad06e660776d Mon Sep 17 00:00:00 2001 From: James Keeling Date: Thu, 16 Nov 2017 14:00:50 -0800 Subject: [PATCH 0532/1801] Correct markdown for code segments in gradients_impl PiperOrigin-RevId: 176015049 --- tensorflow/python/ops/gradients_impl.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index 64ad124c3f..8d00a3c6ab 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -425,18 +425,22 @@ def gradients(ys, other things, this allows computation of partial derivatives as opposed to total derivatives. For example: - a = tf.constant(0.) - b = 2 * a - g = tf.gradients(a + b, [a, b], stop_gradients=[a, b]) + ```python + a = tf.constant(0.) + b = 2 * a + g = tf.gradients(a + b, [a, b], stop_gradients=[a, b]) + ``` Here the partial derivatives `g` evaluate to `[1.0, 1.0]`, compared to the total derivatives `tf.gradients(a + b, [a, b])`, which take into account the influence of `a` on `b` and evaluate to `[3.0, 1.0]`. Note that the above is equivalent to: - a = tf.stop_gradient(tf.constant(0.)) - b = tf.stop_gradient(2 * a) - g = tf.gradients(a + b, [a, b]) + ```python + a = tf.stop_gradient(tf.constant(0.)) + b = tf.stop_gradient(2 * a) + g = tf.gradients(a + b, [a, b]) + ``` `stop_gradients` provides a way of stopping gradient after the graph has already been constructed, as compared to `tf.stop_gradient` which is used -- GitLab From 22d948d2739ecaadfb4091302f2050ba9cf0d0c1 Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Thu, 16 Nov 2017 14:03:51 -0800 Subject: [PATCH 0533/1801] Add methods on TransferManager which transfer to/from device memory specified by ShapedBuffer rather than DeviceMemoryBase. This is part of a broader replacement of DeviceMemoryBase->ShapedBuffer in several XLA interfaces. With this change TransferManager no longer has to allocate memory to transfer tuples to the device. The existing methods using DeviceMemoryBase will be removed in a followup cl. Various related changes: * Make the transfer_manager_test an xla_test so that it runs on all the platforms. * Make several of the TransferManager methods protected. * Change ScopedShapedBuffer::Allocate to only allocate device memory buffers, and not fill in the tuple index table. The index table is filled in by the transfer manager. This is a cleaner separation of concerns. PiperOrigin-RevId: 176015628 --- .../compiler/xla/client/local_client.cc | 60 ++--- tensorflow/compiler/xla/client/local_client.h | 6 +- tensorflow/compiler/xla/service/BUILD | 20 +- .../xla/service/generic_transfer_manager.cc | 87 ++++++- .../xla/service/generic_transfer_manager.h | 13 +- .../compiler/xla/service/shaped_buffer.cc | 79 +++---- .../compiler/xla/service/shaped_buffer.h | 9 +- .../compiler/xla/service/transfer_manager.cc | 33 +++ .../compiler/xla/service/transfer_manager.h | 76 +++--- .../xla/service/transfer_manager_test.cc | 161 ------------- tensorflow/compiler/xla/tests/BUILD | 20 ++ .../compiler/xla/tests/dynamic_ops_test.cc | 6 +- tensorflow/compiler/xla/tests/fusion_test.cc | 28 +-- .../xla/tests/local_client_execute_test.cc | 9 +- .../xla/tests/local_client_test_base.cc | 23 +- .../xla/tests/local_client_test_base.h | 4 - .../xla/tests/transfer_manager_test.cc | 219 ++++++++++++++++++ 17 files changed, 499 insertions(+), 354 deletions(-) delete mode 100644 tensorflow/compiler/xla/service/transfer_manager_test.cc create mode 100644 tensorflow/compiler/xla/tests/transfer_manager_test.cc diff --git a/tensorflow/compiler/xla/client/local_client.cc b/tensorflow/compiler/xla/client/local_client.cc index b50425a09c..c3c664f76a 100644 --- a/tensorflow/compiler/xla/client/local_client.cc +++ b/tensorflow/compiler/xla/client/local_client.cc @@ -210,9 +210,9 @@ tensorflow::Status LocalExecutable::RecordArguments( SessionModule* session_module) { session_module->clear_arguments(); for (const ShapedBuffer* argument : arguments) { - Literal literal; - TF_RETURN_IF_ERROR(LiteralFromShapedBuffer(*argument, &literal)); - *session_module->add_arguments() = literal.ToProto(); + TF_ASSIGN_OR_RETURN(std::unique_ptr literal, + LiteralFromShapedBuffer(*argument)); + *session_module->add_arguments() = literal->ToProto(); } return Status::OK(); } @@ -220,21 +220,19 @@ tensorflow::Status LocalExecutable::RecordArguments( tensorflow::Status LocalExecutable::RecordResult( const ShapedBuffer* result, SessionModule* session_module) { session_module->clear_result(); - Literal literal(session_module->result()); - TF_RETURN_IF_ERROR(LiteralFromShapedBuffer(*result, &literal)); - *session_module->mutable_result() = literal.ToProto(); + TF_ASSIGN_OR_RETURN(std::unique_ptr literal, + LiteralFromShapedBuffer(*result)); + *session_module->mutable_result() = literal->ToProto(); return Status::OK(); } -// TODO(dnovillo) Change signature to return StatusOr. -tensorflow::Status LocalExecutable::LiteralFromShapedBuffer( - const ShapedBuffer& shaped_buffer, Literal* literal) { +StatusOr> LocalExecutable::LiteralFromShapedBuffer( + const ShapedBuffer& shaped_buffer) { TF_ASSIGN_OR_RETURN( se::StreamExecutor * executor, backend_->stream_executor(shaped_buffer.device_ordinal())); - return backend_->transfer_manager()->TransferLiteralFromDevice( - executor, shaped_buffer.buffer({}), shaped_buffer.shape(), - shaped_buffer.shape(), literal); + return backend_->transfer_manager()->TransferLiteralFromDevice(executor, + shaped_buffer); } se::Platform* LocalClient::platform() const { @@ -288,20 +286,15 @@ LocalClient::LiteralToShapedBuffer(const Literal& literal, int device_ordinal, } TF_ASSIGN_OR_RETURN( auto scoped_buffer, - ScopedShapedBuffer::Allocate(literal.shape(), allocator, device_ordinal)); + ScopedShapedBuffer::Allocate( + literal.shape(), allocator, device_ordinal, + [this](const Shape& shape) { + return backend().transfer_manager()->GetByteSizeRequirement(shape); + })); TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor, backend().stream_executor(device_ordinal)); - TF_RETURN_IF_ERROR(ShapeUtil::ForEachSubshapeWithStatus( - literal.shape(), [&](const Shape& subshape, const ShapeIndex& index) { - if (ShapeUtil::IsArray(subshape)) { - // This is a leaf of the shape. Transfer the literal array data to the - // device buffer. - return backend().transfer_manager()->TransferLiteralToDevice( - executor, literal.GetSubliteral(index), - scoped_buffer->mutable_buffer(index)); - } - return Status::OK(); - })); + TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralToDevice( + executor, literal, *scoped_buffer)); return std::move(scoped_buffer); } @@ -309,26 +302,11 @@ LocalClient::LiteralToShapedBuffer(const Literal& literal, int device_ordinal, // return as a Literal. StatusOr> LocalClient::ShapedBufferToLiteral( const ShapedBuffer& shaped_buffer) { - std::unique_ptr literal = - Literal::CreateFromShape(shaped_buffer.shape()); TF_ASSIGN_OR_RETURN( se::StreamExecutor * executor, backend().stream_executor(shaped_buffer.device_ordinal())); - TF_RETURN_IF_ERROR(ShapeUtil::ForEachSubshapeWithStatus( - literal->shape(), [&](const Shape& subshape, const ShapeIndex& index) { - if (ShapeUtil::IsArray(subshape)) { - // This is a leaf of the shape. Transfer the device buffer into the - // literal. The layout of the literal and the device buffer are - // necessarily the same so we pass 'subshape' for both device and - // literal shapes. - return backend().transfer_manager()->TransferLiteralFromDevice( - executor, shaped_buffer.buffer(index), - /*device_shape=*/subshape, - /*literal_shape*/ subshape, &literal->GetSubliteral(index)); - } - return Status::OK(); - })); - return std::move(literal); + return backend().transfer_manager()->TransferLiteralFromDevice(executor, + shaped_buffer); } } // namespace xla diff --git a/tensorflow/compiler/xla/client/local_client.h b/tensorflow/compiler/xla/client/local_client.h index e9eeaa0aa2..32fe0d9f84 100644 --- a/tensorflow/compiler/xla/client/local_client.h +++ b/tensorflow/compiler/xla/client/local_client.h @@ -113,9 +113,9 @@ class LocalExecutable { tensorflow::Status RecordResult(const ShapedBuffer* result, SessionModule* session_module); - // Copies the contents of a ShapedBuffer into a Literal proto. - tensorflow::Status LiteralFromShapedBuffer(const ShapedBuffer& shaped_buffer, - Literal* literal); + // Returns a literal containing the contents of the given ShapedBuffer. + StatusOr> LiteralFromShapedBuffer( + const ShapedBuffer& shaped_buffer); // Compiled computation. std::unique_ptr executable_; diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 4ff8302568..7bb4479ce0 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -568,7 +568,6 @@ cc_library( hdrs = ["shaped_buffer.h"], deps = [ ":device_memory_allocator", - ":transfer_manager", "//tensorflow/compiler/xla:shape_tree", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", @@ -645,6 +644,7 @@ cc_library( srcs = ["transfer_manager.cc"], hdrs = ["transfer_manager.h"], deps = [ + ":shaped_buffer", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", @@ -1294,24 +1294,6 @@ cc_library( alwayslink = True, # Contains per-platform transfer manager registration ) -tf_cc_test( - name = "transfer_manager_test", - srcs = ["transfer_manager_test.cc"], - deps = [ - ":generic_transfer_manager", - "//tensorflow/compiler/xla:literal_util", - "//tensorflow/compiler/xla:shape_util", - "//tensorflow/compiler/xla:statusor", - "//tensorflow/compiler/xla:types", - "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/service/cpu:cpu_transfer_manager", - "//tensorflow/compiler/xla/tests:literal_test_util", - "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/core:lib", - "//tensorflow/core:stream_executor_no_cuda", - ], -) - cc_library( name = "hlo_cost_analysis", srcs = ["hlo_cost_analysis.cc"], diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.cc b/tensorflow/compiler/xla/service/generic_transfer_manager.cc index b4fbed1562..74aa77b4f1 100644 --- a/tensorflow/compiler/xla/service/generic_transfer_manager.cc +++ b/tensorflow/compiler/xla/service/generic_transfer_manager.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/interpreter/platform_id.h" #include "tensorflow/compiler/xla/shape_util.h" @@ -103,8 +104,7 @@ GenericTransferManager::ShallowCopyTupleFromDevice( // a vector of void* pointers. std::vector element_pointers(ShapeUtil::TupleElementCount(shape), nullptr); - int64 tuple_size = - ShapeUtil::ByteSizeOf(shape, /*pointer_size=*/sizeof(void*)); + int64 tuple_size = ShapeUtil::ByteSizeOf(shape, pointer_size_); auto copy_status = executor->SynchronousMemcpyD2H(source, tuple_size, element_pointers.data()); if (!copy_status.ok()) { @@ -121,9 +121,8 @@ GenericTransferManager::ShallowCopyTupleFromDevice( !ShapeUtil::HasZeroElements(shape.tuple_shapes(i))) { return FailedPrecondition("tuple contains nullptr at element %lu", i); } - int64 buffer_size = ShapeUtil::ByteSizeOf(shape.tuple_shapes(i), - /*pointer_size=*/sizeof(void*)); - destination.emplace_back(element_pointers[i], buffer_size); + destination.emplace_back(element_pointers[i], + GetByteSizeRequirement(shape.tuple_shapes(i))); } return std::move(destination); } @@ -138,11 +137,79 @@ Status GenericTransferManager::WriteTuplePointersToDevice( for (const se::DeviceMemoryBase& element : elements) { element_pointers.push_back(element.opaque()); } - int64 tuple_size = - ShapeUtil::ByteSizeOf(shape, /*pointer_size=*/sizeof(void*)); + return TransferBufferToDevice(executor, GetByteSizeRequirement(shape), + element_pointers.data(), region); +} + +StatusOr> +GenericTransferManager::TransferLiteralFromDevice( + se::StreamExecutor* executor, const ShapedBuffer& device_buffer) { + VLOG(2) << "transferring literal from device ordinal " + << executor->device_ordinal() << "; device shape: " + << ShapeUtil::HumanStringWithLayout(device_buffer.shape()) + << "; opaque: " << device_buffer.buffer(/*index=*/{}).opaque(); + TF_RET_CHECK(executor->device_ordinal() == device_buffer.device_ordinal()); + + std::unique_ptr literal = + Literal::CreateFromShape(device_buffer.shape()); + + TF_RETURN_IF_ERROR(ShapeUtil::ForEachSubshapeWithStatus( + device_buffer.shape(), + [&](const Shape& subshape, const ShapeIndex& index) -> Status { + if (!ShapeUtil::IsTuple(subshape)) { + TF_RETURN_IF_ERROR(TransferBufferFromDevice( + executor, + /*source=*/device_buffer.buffer(index), + /*size=*/GetByteSizeRequirement(subshape), + /*destination=*/ + literal->GetSubliteral(index).MutableInternalData())); + } + + return Status::OK(); + })); + return std::move(literal); +} + +Status GenericTransferManager::TransferLiteralToDevice( + se::StreamExecutor* executor, const Literal& literal, + const ShapedBuffer& device_buffer) { + const Shape& shape = literal.shape(); + VLOG(2) << "transferring literal shape to device: " + << ShapeUtil::HumanString(shape) << "; device location: " + << device_buffer.buffer(/*index=*/{}).opaque(); + + TF_RET_CHECK(ShapeUtil::Compatible(literal.shape(), device_buffer.shape())); + TF_RET_CHECK(executor->device_ordinal() == device_buffer.device_ordinal()); + + TF_RETURN_IF_ERROR(WriteTupleIndexTables(executor, device_buffer)); - return TransferBufferToDevice(executor, tuple_size, element_pointers.data(), - region); + return ShapeUtil::ForEachSubshapeWithStatus( + device_buffer.shape(), + [&](const Shape& device_subshape, const ShapeIndex& index) -> Status { + se::DeviceMemoryBase device_memory = device_buffer.buffer(index); + if (ShapeUtil::IsArray(device_subshape)) { + TF_RET_CHECK(GetByteSizeRequirement(device_subshape) == + device_memory.size()); + // Element is array-shaped: transfer array data to device buffer. + const Literal& subliteral = literal.GetSubliteral(index); + std::unique_ptr relayed_out_literal; + const void* source; + if (LayoutUtil::Equal(device_subshape.layout(), + subliteral.shape().layout())) { + source = subliteral.InternalData(); + } else { + // Relayout data before transferring. + relayed_out_literal = subliteral.Relayout(device_subshape.layout(), + /*shape_index=*/{}); + source = relayed_out_literal->InternalData(); + } + return TransferBufferToDevice( + executor, + /*size=*/GetByteSizeRequirement(device_subshape), source, + &device_memory); + } + return Status::OK(); + }); } Status GenericTransferManager::TransferLiteralToDevice( @@ -198,7 +265,7 @@ Status GenericTransferManager::ResetDevices( } int64 GenericTransferManager::GetByteSizeRequirement(const Shape& shape) const { - return ShapeUtil::ByteSizeOf(shape, /*pointer_size=*/sizeof(void*)); + return ShapeUtil::ByteSizeOf(shape, pointer_size_); } } // namespace xla diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.h b/tensorflow/compiler/xla/service/generic_transfer_manager.h index ef9a50676a..50dca6aec5 100644 --- a/tensorflow/compiler/xla/service/generic_transfer_manager.h +++ b/tensorflow/compiler/xla/service/generic_transfer_manager.h @@ -52,6 +52,14 @@ class GenericTransferManager : public TransferManager { perftools::gputools::StreamExecutor* executor, const Literal& literal, perftools::gputools::DeviceMemoryBase* destination) override; + StatusOr> TransferLiteralFromDevice( + perftools::gputools::StreamExecutor* executor, + const ShapedBuffer& device_buffer) override; + + Status TransferLiteralToDevice(perftools::gputools::StreamExecutor* executor, + const Literal& literal, + const ShapedBuffer& device_buffer) override; + Status TransferLiteralToInfeed(perftools::gputools::StreamExecutor* executor, const Literal& literal) override; Status TransferBufferToInfeed(perftools::gputools::StreamExecutor* executor, @@ -71,6 +79,9 @@ class GenericTransferManager : public TransferManager { const perftools::gputools::DeviceMemoryBase& source, const Shape& shape) override; + int64 GetByteSizeRequirement(const Shape& shape) const override; + + protected: Status WriteTuplePointersToDevice( perftools::gputools::StreamExecutor* executor, tensorflow::gtl::ArraySlice @@ -78,8 +89,6 @@ class GenericTransferManager : public TransferManager { const Shape& shape, perftools::gputools::DeviceMemoryBase* region) override; - int64 GetByteSizeRequirement(const Shape& shape) const override; - private: // The platform this transfer manager targets. const perftools::gputools::Platform::Id platform_id_; diff --git a/tensorflow/compiler/xla/service/shaped_buffer.cc b/tensorflow/compiler/xla/service/shaped_buffer.cc index a57ebf59e7..a7539a1a11 100644 --- a/tensorflow/compiler/xla/service/shaped_buffer.cc +++ b/tensorflow/compiler/xla/service/shaped_buffer.cc @@ -21,17 +21,19 @@ limitations under the License. #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/ptr_util.h" -#include "tensorflow/compiler/xla/service/transfer_manager.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" namespace se = ::perftools::gputools; namespace xla { +using ::tensorflow::strings::Appendf; + /* static */ StatusOr> ShapedBuffer::MakeArrayShapedBuffer(const Shape& shape, const se::Platform* platform, @@ -80,10 +82,33 @@ se::DeviceMemoryBase* ShapedBuffer::mutable_buffer(const ShapeIndex& index) { return &buffers_[shape_index_to_buffer_entry_.element(index)]; } +string ShapedBuffer::ToString() const { + string s = "ShapedBuffer(" + platform_->Name() + "):\n"; + ShapeUtil::ForEachSubshape( + shape(), [this, &s](const Shape& subshape, const ShapeIndex& index) { + string shape_str; + if (ShapeUtil::IsTuple(subshape)) { + shape_str = "tuple"; + } else { + shape_str = ShapeUtil::HumanStringWithLayout(subshape); + } + const se::DeviceMemoryBase& memory = buffer(index); + Appendf(&s, " %s%p (%lld bytes) : %s\n", + string(index.size() * 2, ' ').c_str(), memory.opaque(), + memory.size(), shape_str.c_str()); + }); + return s; +} + +std::ostream& operator<<(std::ostream& out, const ShapedBuffer& buffer) { + out << buffer.ToString(); + return out; +} + /* static */ StatusOr> -ScopedShapedBuffer::Allocate(const Shape& shape, - DeviceMemoryAllocator* allocator, - int device_ordinal) { +ScopedShapedBuffer::Allocate( + const Shape& shape, DeviceMemoryAllocator* allocator, int device_ordinal, + const std::function& shape_size_fn) { if (!LayoutUtil::HasLayout(shape)) { return InvalidArgument("Shape must have a layout: %s", ShapeUtil::HumanStringWithLayout(shape).c_str()); @@ -93,51 +118,17 @@ ScopedShapedBuffer::Allocate(const Shape& shape, WrapUnique(new ScopedShapedBuffer(shape, allocator, device_ordinal)); // Allocate an appropriate sized buffer for each element in the shape - // including the tuple pointer arrays. Gather tuple element addresses in - // 'element_addresses'. These will be written in the respective tuple's array - // of pointers on the device. - TF_ASSIGN_OR_RETURN(TransferManager * transfer_manager, - TransferManager::GetForPlatform(allocator->platform())); - ShapeTree> element_addresses(shape); + // including the tuple pointer arrays. for (auto& pair : shaped_buffer->shape_index_to_buffer_entry_) { const ShapeIndex& index = pair.first; size_t& buffer_entry = pair.second; - TF_ASSIGN_OR_RETURN( - se::DeviceMemoryBase memory_base, - shaped_buffer->allocator_->Allocate( - shaped_buffer->device_ordinal(), - transfer_manager->GetByteSizeRequirement( - ShapeUtil::GetSubshape(shaped_buffer->shape(), index)))); + TF_ASSIGN_OR_RETURN(se::DeviceMemoryBase memory_base, + shaped_buffer->allocator_->Allocate( + shaped_buffer->device_ordinal(), + shape_size_fn(ShapeUtil::GetSubshape( + shaped_buffer->shape(), index)))); shaped_buffer->buffers_.push_back(memory_base); buffer_entry = shaped_buffer->buffers_.size() - 1; - - // If this is a tuple element, then push the address on to the - // vector of tuple element addresses. - if (!index.empty()) { - ShapeIndex parent_index = index; - parent_index.pop_back(); - element_addresses.mutable_element(parent_index)->push_back(memory_base); - } - } - - // Fill in the tuple pointer arrays with the addresses of their respective - // elements. - TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor, - allocator->platform()->ExecutorForDevice( - shaped_buffer->device_ordinal())); - for (const auto& pair : element_addresses) { - const ShapeIndex& index = pair.first; - const std::vector& addresses = pair.second; - const Shape& subshape = ShapeUtil::GetSubshape(shape, index); - - if (addresses.empty()) { - TF_RET_CHECK(!ShapeUtil::IsTuple(subshape) || - ShapeUtil::TupleElementCount(subshape) == 0); - continue; - } - TF_RET_CHECK(ShapeUtil::IsTuple(subshape)); - TF_RETURN_IF_ERROR(transfer_manager->WriteTuplePointersToDevice( - executor, addresses, subshape, shaped_buffer->mutable_buffer(index))); } return std::move(shaped_buffer); diff --git a/tensorflow/compiler/xla/service/shaped_buffer.h b/tensorflow/compiler/xla/service/shaped_buffer.h index b440948700..fa88caa13f 100644 --- a/tensorflow/compiler/xla/service/shaped_buffer.h +++ b/tensorflow/compiler/xla/service/shaped_buffer.h @@ -17,6 +17,8 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_SERVICE_SHAPED_BUFFER_H_ #include +#include +#include #include "tensorflow/compiler/xla/service/device_memory_allocator.h" #include "tensorflow/compiler/xla/shape_tree.h" @@ -79,6 +81,8 @@ class ShapedBuffer { void AddBufferAtIndex(const perftools::gputools::DeviceMemoryBase& buffer, const ShapeIndex& shape_index); + string ToString() const; + protected: // The shape of the device buffer with layout. const Shape shape_; @@ -99,6 +103,8 @@ class ShapedBuffer { ShapeTree shape_index_to_buffer_entry_; }; +std::ostream& operator<<(std::ostream& out, const ShapedBuffer& buffer); + // ShapedBuffer derived class which allocates all internal buffers on // construction and deallocates the memory when the object is // destructed. @@ -109,7 +115,8 @@ class ScopedShapedBuffer : public ShapedBuffer { // buffers (if any) are allocated and initialized to the backend-specific // representation of an array of pointers to the tuple elements. static StatusOr> Allocate( - const Shape& shape, DeviceMemoryAllocator* allocator, int device_ordinal); + const Shape& shape, DeviceMemoryAllocator* allocator, int device_ordinal, + const std::function& shape_size_fn); // Takes a ShapedBuffer and returns a ScopedShapedBuffer which manages the // deallocation of the device memory held in the shaped buffer. All device diff --git a/tensorflow/compiler/xla/service/transfer_manager.cc b/tensorflow/compiler/xla/service/transfer_manager.cc index fef131d19f..d5f53ad56f 100644 --- a/tensorflow/compiler/xla/service/transfer_manager.cc +++ b/tensorflow/compiler/xla/service/transfer_manager.cc @@ -72,6 +72,39 @@ TransferManager::GetPlatformTransferManagers() { return it->second.manager.get(); } +Status TransferManager::WriteTupleIndexTables( + perftools::gputools::StreamExecutor* executor, + const ShapedBuffer& device_buffer) { + VLOG(2) << "Writing tuple index tables to ShapedBuffer rooted at " + << device_buffer.buffer(/*index=*/{}).opaque() + << "; shape: " << ShapeUtil::HumanString(device_buffer.shape()); + + TF_RET_CHECK(executor->device_ordinal() == device_buffer.device_ordinal()); + + return ShapeUtil::ForEachSubshapeWithStatus( + device_buffer.shape(), + [&](const Shape& device_subshape, const ShapeIndex& index) -> Status { + if (ShapeUtil::IsTuple(device_subshape)) { + se::DeviceMemoryBase device_memory = device_buffer.buffer(index); + TF_RET_CHECK(GetByteSizeRequirement(device_subshape) == + device_memory.size()); + + std::vector elements; + ShapeIndex element_index = index; + for (int64 i = 0; i < ShapeUtil::TupleElementCount(device_subshape); + ++i) { + element_index.push_back(i); + elements.push_back(device_buffer.buffer(element_index)); + element_index.pop_back(); + } + return WriteTuplePointersToDevice(executor, elements, device_subshape, + &device_memory); + } + + return Status::OK(); + }); +} + Status TransferManager::TransferBufferFromDevice( se::StreamExecutor* executor, const se::DeviceMemoryBase& source, int64 size, void* destination) { diff --git a/tensorflow/compiler/xla/service/transfer_manager.h b/tensorflow/compiler/xla/service/transfer_manager.h index d7f85f5765..fdc123e54e 100644 --- a/tensorflow/compiler/xla/service/transfer_manager.h +++ b/tensorflow/compiler/xla/service/transfer_manager.h @@ -21,6 +21,7 @@ limitations under the License. #include #include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/shaped_buffer.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" @@ -47,6 +48,8 @@ class TransferManager { // executor. device_shape is the shape, including layout, of the data on the // device, while literal_shape will be the shape for the literal. device_shape // and literal_shape must be compatible, but need not have the same layout. + // TODO(b/66694934): Remove TransferLiteral* methods which accept bare + // DeviceMemoryBase. virtual Status TransferLiteralFromDevice( perftools::gputools::StreamExecutor* executor, const perftools::gputools::DeviceMemoryBase& region, @@ -59,6 +62,20 @@ class TransferManager { perftools::gputools::StreamExecutor* executor, const Literal& literal, perftools::gputools::DeviceMemoryBase* region) = 0; + // Transfers the data held in the given ShapedBuffer into the provided literal + // using the provided executor. literal_shape will be the shape for the + // literal. The shape of the ShapedBuffer and literal_shape must be + // compatible, but need not have the same layout. + virtual StatusOr> TransferLiteralFromDevice( + perftools::gputools::StreamExecutor* executor, + const ShapedBuffer& device_buffer) = 0; + + // Transfers the given literal into the previously allocated device memory + // represented by the given ShapedBuffer using the given executor. + virtual Status TransferLiteralToDevice( + perftools::gputools::StreamExecutor* executor, const Literal& literal, + const ShapedBuffer& device_buffer) = 0; + // Transfers the given literal into the Infeed interface of the device, // using the given executor. virtual Status TransferLiteralToInfeed( @@ -97,15 +114,11 @@ class TransferManager { const perftools::gputools::DeviceMemoryBase& source, const Shape& shape) = 0; - // Writes the given device-memory pointers in 'elements' to the given region - // to construct a tuple in the platform-specific tuple representation. This - // can handle nested tuples as well. In the nested case, the element - // DeviceMemoryBase points to another array of pointers on the device. - virtual Status WriteTuplePointersToDevice( - perftools::gputools::StreamExecutor* executor, - tensorflow::gtl::ArraySlice - elements, - const Shape& shape, perftools::gputools::DeviceMemoryBase* region) = 0; + // Given an allocated ShapedBuffer, constructs the tuple index table(s) in + // each buffer of the given ShapedBuffer corresponding to tuple shapes. If the + // ShapedBuffer is array-shaped this method does nothing. + Status WriteTupleIndexTables(perftools::gputools::StreamExecutor* executor, + const ShapedBuffer& device_buffer); // Returns all buffer pointers that the tuple `source` refers to. Unlike // ShallowCopyTupleFromDevice, this function gather buffer pointers in nested @@ -121,23 +134,6 @@ class TransferManager { // region for a host-to-device transfer. virtual int64 GetByteSizeRequirement(const Shape& shape) const = 0; - // Transfer a memory block of the given size from the device source into the - // 'destination' buffer. - // - // size is the size to transfer to destination in bytes. - virtual Status TransferBufferFromDevice( - perftools::gputools::StreamExecutor* executor, - const perftools::gputools::DeviceMemoryBase& source, int64 size, - void* destination); - - // Transfer a memory block of the given size from 'source' buffer to the given - // destination of the device. - // - // size is the size to transfer from source in bytes. - virtual Status TransferBufferToDevice( - perftools::gputools::StreamExecutor* executor, int64 size, - const void* source, perftools::gputools::DeviceMemoryBase* destination); - typedef std::unique_ptr (*TransferManagerCreationFunction)(); ///// @@ -157,6 +153,34 @@ class TransferManager { static StatusOr GetForPlatform( const perftools::gputools::Platform* platform); + protected: + // Transfer a memory block of the given size from the device source into the + // 'destination' buffer. + // + // size is the size to transfer to destination in bytes. + virtual Status TransferBufferFromDevice( + perftools::gputools::StreamExecutor* executor, + const perftools::gputools::DeviceMemoryBase& source, int64 size, + void* destination); + + // Transfer a memory block of the given size from 'source' buffer to the given + // destination of the device. + // + // size is the size to transfer from source in bytes. + virtual Status TransferBufferToDevice( + perftools::gputools::StreamExecutor* executor, int64 size, + const void* source, perftools::gputools::DeviceMemoryBase* destination); + + // Writes the given device-memory pointers in 'elements' to the given region + // to construct a tuple in the platform-specific tuple representation. This + // can handle nested tuples as well. In the nested case, the element + // DeviceMemoryBase points to another array of pointers on the device. + virtual Status WriteTuplePointersToDevice( + perftools::gputools::StreamExecutor* executor, + tensorflow::gtl::ArraySlice + elements, + const Shape& shape, perftools::gputools::DeviceMemoryBase* region) = 0; + private: // The mutex that guards the platform-to-transfer manager map. static tensorflow::mutex platform_transfer_manager_mutex_; diff --git a/tensorflow/compiler/xla/service/transfer_manager_test.cc b/tensorflow/compiler/xla/service/transfer_manager_test.cc deleted file mode 100644 index c25a0861e9..0000000000 --- a/tensorflow/compiler/xla/service/transfer_manager_test.cc +++ /dev/null @@ -1,161 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include -#include -#include - -#include "tensorflow/compiler/xla/literal_util.h" -#include "tensorflow/compiler/xla/service/generic_transfer_manager.h" -#include "tensorflow/compiler/xla/shape_util.h" -#include "tensorflow/compiler/xla/tests/literal_test_util.h" -#include "tensorflow/compiler/xla/xla_data.pb.h" -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/stream_executor_no_cuda.h" - -#include "tensorflow/compiler/xla/statusor.h" -#include "tensorflow/compiler/xla/types.h" -#include "tensorflow/core/platform/types.h" - -namespace se = ::perftools::gputools; - -namespace xla { - -namespace { - -class CpuTransferManagerTest : public ::testing::Test { - protected: - CpuTransferManagerTest() - : transfer_manager_(se::host::kHostPlatformId, - /*pointer_size=*/sizeof(void*)) { - se::Platform* platform = - se::MultiPlatformManager::PlatformWithId(se::host::kHostPlatformId) - .ValueOrDie(); - stream_exec_ = - platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) - .ValueOrDie(); - } - - ~CpuTransferManagerTest() override {} - - se::StreamExecutor* stream_exec_; - GenericTransferManager transfer_manager_; -}; - -TEST_F(CpuTransferManagerTest, TransferR0U32ToDevice) { - std::vector storage(sizeof(uint32), '\x00'); - se::DeviceMemoryBase memptr(storage.data(), storage.size()); - std::unique_ptr literal = Literal::CreateR0(42); - TF_CHECK_OK(transfer_manager_.TransferLiteralToDevice(stream_exec_, *literal, - &memptr)); - - CHECK_EQ(42, *reinterpret_cast(&storage[0])); -} - -TEST_F(CpuTransferManagerTest, TransferR1F32ToDevice) { - std::vector storage(4 * sizeof(float), '\x00'); - se::DeviceMemoryBase memptr(storage.data(), storage.size()); - std::unique_ptr literal = - Literal::CreateR1({1.25f, 2.5f, -17.0f, -20.125f}); - TF_CHECK_OK(transfer_manager_.TransferLiteralToDevice(stream_exec_, *literal, - &memptr)); - - CHECK_EQ(1.25f, *reinterpret_cast(&storage[0])); - CHECK_EQ(2.5f, *reinterpret_cast(&storage[sizeof(float)])); - CHECK_EQ(-17.0f, *reinterpret_cast(&storage[2 * sizeof(float)])); - CHECK_EQ(-20.125f, *reinterpret_cast(&storage[3 * sizeof(float)])); -} - -TEST_F(CpuTransferManagerTest, TransferR1U8ToDevice) { - std::vector storage(16, '\x00'); - se::DeviceMemoryBase memptr(storage.data(), storage.size()); - const char* str = "0123456789abcdef"; - std::unique_ptr literal = Literal::CreateR1U8(str); - TF_CHECK_OK(transfer_manager_.TransferLiteralToDevice(stream_exec_, *literal, - &memptr)); - - CHECK_EQ('0', storage[0]); - CHECK_EQ('8', storage[8]); - CHECK_EQ('f', storage[15]); -} - -TEST_F(CpuTransferManagerTest, TransferR0U32FromDevice) { - std::vector storage(1, 42); - se::DeviceMemoryBase memptr(storage.data(), - storage.size() * sizeof(storage[0])); - Literal literal; - const Shape shape = ShapeUtil::MakeShape(U32, {}); - TF_CHECK_OK(transfer_manager_.TransferLiteralFromDevice( - stream_exec_, memptr, shape, shape, &literal)); - - LiteralTestUtil::ExpectR0Equal(42, literal); -} - -TEST_F(CpuTransferManagerTest, TransferR1F32FromDevice) { - std::vector storage{1.25f, 2.5f, -17.0f, -20.125f}; - se::DeviceMemoryBase memptr(storage.data(), - storage.size() * sizeof(storage[0])); - Literal literal; - const Shape shape = ShapeUtil::MakeShape(F32, {4}); - TF_CHECK_OK(transfer_manager_.TransferLiteralFromDevice( - stream_exec_, memptr, shape, shape, &literal)); - - LiteralTestUtil::ExpectR1Equal({1.25, 2.5, -17.0, -20.125}, literal); -} - -TEST_F(CpuTransferManagerTest, TransferR1U8FromDevice) { - std::vector storage{'k', 'l', 'm', 'n'}; - se::DeviceMemoryBase memptr(storage.data(), - storage.size() * sizeof(storage[0])); - Literal literal; - const Shape shape = ShapeUtil::MakeShape(U8, {4}); - TF_CHECK_OK(transfer_manager_.TransferLiteralFromDevice( - stream_exec_, memptr, shape, shape, &literal)); - CHECK_EQ("klmn", literal.u8s_string()); -} - -TEST_F(CpuTransferManagerTest, TransferBufferFromDevice) { - std::vector storage{1, 5, 42}; - int64 size = storage.size() * sizeof(storage[0]); - se::DeviceMemoryBase memptr(storage.data(), size); - - std::vector dest(3, 0); - TF_CHECK_OK(transfer_manager_.TransferBufferFromDevice(stream_exec_, memptr, - size, dest.data())); - ASSERT_EQ(1, dest[0]); - ASSERT_EQ(5, dest[1]); - ASSERT_EQ(42, dest[2]); -} - -TEST_F(CpuTransferManagerTest, TransferBufferToDevice) { - int64 size = 3 * sizeof(uint64); - std::vector storage(size, 0); - se::DeviceMemoryBase memptr(storage.data(), size); - - std::vector dest{1, 5, 42}; - TF_CHECK_OK(transfer_manager_.TransferBufferToDevice(stream_exec_, size, - dest.data(), &memptr)); - std::vector* storage64 = - reinterpret_cast*>(&storage); - ASSERT_EQ(1, (*storage64)[0]); - ASSERT_EQ(5, (*storage64)[1]); - ASSERT_EQ(42, (*storage64)[2]); -} - -// TODO(b/24679870): add similar tests for GPUs - -} // namespace - -} // namespace xla diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 63c3541e14..f3885e9021 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1601,6 +1601,26 @@ tf_cc_test( ], ) +xla_test( + name = "transfer_manager_test", + srcs = ["transfer_manager_test.cc"], + deps = [ + ":literal_test_util", + ":local_client_test_base", + ":xla_internal_test_main", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:device_memory_allocator", + "//tensorflow/compiler/xla/service:generic_transfer_manager", + "//tensorflow/compiler/xla/service:shaped_buffer", + "//tensorflow/core:lib", + "//tensorflow/core:stream_executor_no_cuda", + ], +) + # ----------------------------------------------------------------------------- filegroup( diff --git a/tensorflow/compiler/xla/tests/dynamic_ops_test.cc b/tensorflow/compiler/xla/tests/dynamic_ops_test.cc index ab8047c748..8baaf39e3c 100644 --- a/tensorflow/compiler/xla/tests/dynamic_ops_test.cc +++ b/tensorflow/compiler/xla/tests/dynamic_ops_test.cc @@ -559,7 +559,11 @@ void BM_DynamicSlice(int num_iters) { auto computation = builder.Build().ConsumeValueOrDie(); // Initialize and transfer parameter buffer. - auto buffer = ScopedShapedBuffer::Allocate(start_indices_shape, &allocator, 0) + auto shape_size_fn = [client](const Shape& shape) { + return client->backend().transfer_manager()->GetByteSizeRequirement(shape); + }; + auto buffer = ScopedShapedBuffer::Allocate(start_indices_shape, &allocator, 0, + shape_size_fn) .ConsumeValueOrDie(); auto start_indices_literal = Literal::CreateR1({0, 1, 2, 3}); diff --git a/tensorflow/compiler/xla/tests/fusion_test.cc b/tensorflow/compiler/xla/tests/fusion_test.cc index a8f6488996..2686afccc2 100644 --- a/tensorflow/compiler/xla/tests/fusion_test.cc +++ b/tensorflow/compiler/xla/tests/fusion_test.cc @@ -770,8 +770,6 @@ void BM_ParallelFusion(int num_iters) { auto client = ClientLibrary::GetOrCreateLocalClient(client_options).ValueOrDie(); - auto* transfer_manager = - TransferManager::GetForPlatform(platform).ValueOrDie(); int device_ordinal = client->default_device_ordinal(); // Computation shape parameters. @@ -796,29 +794,23 @@ void BM_ParallelFusion(int num_iters) { auto computation = builder.Build().ConsumeValueOrDie(); // Transfer literals to device. - auto buffer0 = - ScopedShapedBuffer::Allocate(shape0, &allocator, /*device_ordinal=*/0) - .ConsumeValueOrDie(); auto param0_literal = Literal::CreateR2F32Linspace(1.0, 2.0, param0_dim0, param0_dim1); - ASSERT_IS_OK(transfer_manager->TransferLiteralToDevice( - executors[device_ordinal], *param0_literal, buffer0->mutable_buffer({}))); - - auto buffer1 = - ScopedShapedBuffer::Allocate(shape1, &allocator, /*device_ordinal=*/0) + std::unique_ptr buffer0 = + client->LiteralToShapedBuffer(*param0_literal, device_ordinal) .ConsumeValueOrDie(); + auto param1_literal = Literal::CreateR2F32Linspace(1.0, 2.0, param1_dim0, param1_dim1); - ASSERT_IS_OK(transfer_manager->TransferLiteralToDevice( - executors[device_ordinal], *param1_literal, buffer1->mutable_buffer({}))); - - auto buffer2 = - ScopedShapedBuffer::Allocate(shape2, &allocator, /*device_ordinal=*/0) + std::unique_ptr buffer1 = + client->LiteralToShapedBuffer(*param1_literal, device_ordinal) .ConsumeValueOrDie(); + auto param2_literal = Literal::CreateR2F32Linspace(1.0, 2.0, param2_dim0, param2_dim1); - ASSERT_IS_OK(transfer_manager->TransferLiteralToDevice( - executors[device_ordinal], *param2_literal, buffer2->mutable_buffer({}))); + std::unique_ptr buffer2 = + client->LiteralToShapedBuffer(*param2_literal, device_ordinal) + .ConsumeValueOrDie(); // Build executable. std::unique_ptr executable = @@ -828,7 +820,7 @@ void BM_ParallelFusion(int num_iters) { ExecutableBuildOptions()) .ConsumeValueOrDie(); - se::Stream stream(executors[client->default_device_ordinal()]); + se::Stream stream(executors[device_ordinal]); stream.Init(); // Initialize thread pool. diff --git a/tensorflow/compiler/xla/tests/local_client_execute_test.cc b/tensorflow/compiler/xla/tests/local_client_execute_test.cc index a196e250d1..fbf9739dbc 100644 --- a/tensorflow/compiler/xla/tests/local_client_execute_test.cc +++ b/tensorflow/compiler/xla/tests/local_client_execute_test.cc @@ -904,9 +904,12 @@ void BM_LocalClientOverhead(int num_iters) { builder.Add(x, x); auto computation = builder.Build().ConsumeValueOrDie(); - auto buffer = - ScopedShapedBuffer::Allocate(shape, &allocator, /*device_ordinal=*/0) - .ConsumeValueOrDie(); + auto shape_size_fn = [client](const Shape& shape) { + return client->backend().transfer_manager()->GetByteSizeRequirement(shape); + }; + auto buffer = ScopedShapedBuffer::Allocate( + shape, &allocator, /*device_ordinal=*/0, shape_size_fn) + .ConsumeValueOrDie(); auto literal = Literal::CreateR2({{0, 0, 0}, {0, 0, 0}}); ASSERT_IS_OK(transfer_manager->TransferLiteralToDevice( executors[device_ordinal], *literal, buffer->mutable_buffer({}))); diff --git a/tensorflow/compiler/xla/tests/local_client_test_base.cc b/tensorflow/compiler/xla/tests/local_client_test_base.cc index d98875dbc2..062a9246e4 100644 --- a/tensorflow/compiler/xla/tests/local_client_test_base.cc +++ b/tensorflow/compiler/xla/tests/local_client_test_base.cc @@ -135,29 +135,10 @@ std::unique_ptr LocalClientTestBase::LiteralToShapedBuffer( .ConsumeValueOrDie(); } -void LocalClientTestBase::CopyShapedBufferToLiteral( - const ShapedBuffer& shaped_buffer, ShapeIndex* index, Literal* literal) { - const Shape& shape = ShapeUtil::GetSubshape(shaped_buffer.shape(), *index); - if (ShapeUtil::IsTuple(shape)) { - *literal->mutable_shape() = shape; - for (int i = 0; i < ShapeUtil::TupleElementCount(shape); ++i) { - Literal* element_literal = literal->add_tuple_literals(); - index->push_back(i); - CopyShapedBufferToLiteral(shaped_buffer, index, element_literal); - index->pop_back(); - } - } else { - ASSERT_IS_OK(transfer_manager_->TransferLiteralFromDevice( - stream_executor_, shaped_buffer.buffer(*index), shape, shape, literal)); - } -} - std::unique_ptr LocalClientTestBase::ShapedBufferToLiteral( const ShapedBuffer& shaped_buffer) { - auto literal = MakeUnique(); - ShapeIndex index; - CopyShapedBufferToLiteral(shaped_buffer, &index, literal.get()); - return literal; + return local_client_->ShapedBufferToLiteral(shaped_buffer) + .ConsumeValueOrDie(); } ExecutableBuildOptions LocalClientTestBase::DefaultExecutableBuildOptions() diff --git a/tensorflow/compiler/xla/tests/local_client_test_base.h b/tensorflow/compiler/xla/tests/local_client_test_base.h index 3edfcb656e..f0c73f04f6 100644 --- a/tensorflow/compiler/xla/tests/local_client_test_base.h +++ b/tensorflow/compiler/xla/tests/local_client_test_base.h @@ -93,10 +93,6 @@ class LocalClientTestBase : public ::testing::Test { std::unique_ptr ShapedBufferToLiteral( const ShapedBuffer& shaped_buffer); - // Helper for converting a ShapedBuffer into a literal. - void CopyShapedBufferToLiteral(const ShapedBuffer& shaped_buffer, - ShapeIndex* index, Literal* literal); - // Execute the given computation on the local client. With and without // options. StatusOr> ExecuteLocally( diff --git a/tensorflow/compiler/xla/tests/transfer_manager_test.cc b/tensorflow/compiler/xla/tests/transfer_manager_test.cc new file mode 100644 index 0000000000..c30cd1b7b8 --- /dev/null +++ b/tensorflow/compiler/xla/tests/transfer_manager_test.cc @@ -0,0 +1,219 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include + +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/device_memory_allocator.h" +#include "tensorflow/compiler/xla/service/generic_transfer_manager.h" +#include "tensorflow/compiler/xla/service/shaped_buffer.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/local_client_test_base.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/stream_executor_no_cuda.h" +#include "tensorflow/core/platform/types.h" + +namespace se = ::perftools::gputools; + +namespace xla { + +namespace { + +class TransferManagerTest : public LocalClientTestBase { + protected: + TransferManagerTest() { + shape_size_fn_ = [this](const Shape& shape) { + return transfer_manager_->GetByteSizeRequirement(shape); + }; + } + + ~TransferManagerTest() override {} + + std::unique_ptr AllocateDeviceBuffer(const Shape& shape) { + return ScopedShapedBuffer::Allocate( + shape, GetOrCreateAllocator(local_client_->platform()), + /*device_ordinal=*/0, shape_size_fn_) + .ConsumeValueOrDie(); + } + + std::function shape_size_fn_; +}; + +XLA_TEST_F(TransferManagerTest, TransferR0U32) { + std::unique_ptr literal = Literal::CreateR0(42); + const Shape& shape = literal->shape(); + auto device_buffer = AllocateDeviceBuffer(shape); + + // Round trip literal through device. + ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice( + stream_executor_, *literal, *device_buffer)); + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, + transfer_manager_->TransferLiteralFromDevice( + stream_executor_, *device_buffer)); + + LiteralTestUtil::ExpectR0Equal(42, *result); +} + +XLA_TEST_F(TransferManagerTest, TransferR1F32) { + std::unique_ptr literal = + Literal::CreateR1({1.25f, 2.5f, -17.0f, -20.125f}); + const Shape& shape = literal->shape(); + auto device_buffer = AllocateDeviceBuffer(shape); + + // Round trip literal through device. + ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice( + stream_executor_, *literal, *device_buffer)); + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, + transfer_manager_->TransferLiteralFromDevice( + stream_executor_, *device_buffer)); + + LiteralTestUtil::ExpectR1Equal({1.25f, 2.5f, -17.0f, -20.125f}, + *result); +} + +XLA_TEST_F(TransferManagerTest, TransferR1LargeF32) { + std::vector test_vector(1024 * 1024); + std::iota(test_vector.begin(), test_vector.end(), 0); + std::unique_ptr literal = Literal::CreateR1(test_vector); + const Shape& shape = literal->shape(); + auto device_buffer = AllocateDeviceBuffer(shape); + + // Round trip literal through device. + ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice( + stream_executor_, *literal, *device_buffer)); + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, + transfer_manager_->TransferLiteralFromDevice( + stream_executor_, *device_buffer)); + + LiteralTestUtil::ExpectR1Equal(test_vector, *result); +} + +XLA_TEST_F(TransferManagerTest, TransferR1U8) { + const char* test_string = "0123456789abcdef"; + std::unique_ptr literal = Literal::CreateR1U8(test_string); + const Shape& shape = literal->shape(); + auto device_buffer = AllocateDeviceBuffer(shape); + + // Round trip literal through device. + ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice( + stream_executor_, *literal, *device_buffer)); + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, + transfer_manager_->TransferLiteralFromDevice( + stream_executor_, *device_buffer)); + + EXPECT_EQ(result->u8s_string(), test_string); +} + +XLA_TEST_F(TransferManagerTest, TransferR2F32) { + std::unique_ptr literal = + Literal::CreateR2({{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}}); + const Shape& shape = literal->shape(); + auto device_buffer = AllocateDeviceBuffer(shape); + + // Round trip literal through device. + ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice( + stream_executor_, *literal, *device_buffer)); + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, + transfer_manager_->TransferLiteralFromDevice( + stream_executor_, *device_buffer)); + + LiteralTestUtil::ExpectR2Equal( + {{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}}, *result); +} + +XLA_TEST_F(TransferManagerTest, + TransferR2F32AndChangeLayoutTransferringToDevice) { + std::unique_ptr literal = Literal::CreateR2WithLayout( + {{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}}, LayoutUtil::MakeLayout({0, 1})); + const Shape ondevice_shape = + ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {1, 0}); + auto device_buffer = AllocateDeviceBuffer(ondevice_shape); + + // Round trip literal through device. Set the on-device layout to something + // different than the literal layout. + ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice( + stream_executor_, *literal, *device_buffer)); + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, + transfer_manager_->TransferLiteralFromDevice( + stream_executor_, *device_buffer)); + + EXPECT_FALSE( + LayoutUtil::Equal(result->shape().layout(), literal->shape().layout())); + LiteralTestUtil::ExpectR2Equal( + {{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}}, *result); +} + +XLA_TEST_F(TransferManagerTest, TransferTuple) { + std::unique_ptr literal = Literal::MakeTuple( + {Literal::CreateR0(123.0f).get(), + Literal::CreateR2({{1.0f, 2.0f}, {4.0f, 5.0f}}).get(), + Literal::CreateR1({44.0f, -10.0f, 3333333.3f}).get()}); + auto device_buffer = AllocateDeviceBuffer(literal->shape()); + + // Round trip literal through device. + ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice( + stream_executor_, *literal, *device_buffer)); + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, + transfer_manager_->TransferLiteralFromDevice( + stream_executor_, *device_buffer)); + + LiteralTestUtil::ExpectEqual(*literal, *result); +} + +XLA_TEST_F(TransferManagerTest, TransferEmptyTuple) { + std::unique_ptr literal = Literal::MakeTuple({}); + auto device_buffer = AllocateDeviceBuffer(literal->shape()); + + // Round trip literal through device. + ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice( + stream_executor_, *literal, *device_buffer)); + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, + transfer_manager_->TransferLiteralFromDevice( + stream_executor_, *device_buffer)); + + LiteralTestUtil::ExpectEqual(*literal, *result); +} + +XLA_TEST_F(TransferManagerTest, TransferNestedTuple) { + std::unique_ptr literal = Literal::MakeTuple( + {Literal::CreateR0(123.0f).get(), + Literal::MakeTuple( + {Literal::CreateR2({{1.0f, 2.0f}, {4.0f, 5.0f}}).get(), + Literal::CreateR1({44.0f, -10.0f, 3333333.3f}).get()}) + .get(), + Literal::CreateR1({-10.0f, 123.0f}).get()}); + auto device_buffer = AllocateDeviceBuffer(literal->shape()); + + // Round trip literal through device. + ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice( + stream_executor_, *literal, *device_buffer)); + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, + transfer_manager_->TransferLiteralFromDevice( + stream_executor_, *device_buffer)); + + LiteralTestUtil::ExpectEqual(*literal, *result); +} + +} // namespace + +} // namespace xla -- GitLab From faf36c03c1de9530e45aa05272699fb1811a9503 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 14:24:22 -0800 Subject: [PATCH 0534/1801] [XLA] Make a LOG(FATAL) a bit more useful. PiperOrigin-RevId: 176019239 --- tensorflow/compiler/xla/service/hlo_instruction.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index f7b5b265d9..c35ca1eb99 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1403,7 +1403,7 @@ int64 HloInstruction::operand_index(const HloInstruction* target) const { return i; } } - LOG(FATAL) << "target was not an operand"; + LOG(FATAL) << "target was not an operand: " << target->ToString(); } Status HloInstruction::AddControlDependencyTo(HloInstruction* instruction) { -- GitLab From 9784fd52f2d40ef5b0f0b4f3192501d9f670451a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 14:25:57 -0800 Subject: [PATCH 0535/1801] [tf.data] Iterator save and restore patch for Dataset.from_tensors(..) and Dataset.from_tensor_slices(..) PiperOrigin-RevId: 176019513 --- tensorflow/core/kernels/tensor_dataset_op.cc | 6 ++++-- tensorflow/core/kernels/tensor_slice_dataset_op.cc | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/tensor_dataset_op.cc b/tensorflow/core/kernels/tensor_dataset_op.cc index 1f69082031..fe53434d17 100644 --- a/tensorflow/core/kernels/tensor_dataset_op.cc +++ b/tensorflow/core/kernels/tensor_dataset_op.cc @@ -77,8 +77,10 @@ class TensorDatasetOp : public DatasetOpKernel { TF_RETURN_IF_ERROR(b->AddTensor(t, &node)); components.emplace_back(node); } - TF_RETURN_IF_ERROR( - b->AddDataset(this, {}, {std::make_pair(0, components)}, {}, output)); + AttrValue dtypes; + b->BuildAttrValue(dtypes_, &dtypes); + TF_RETURN_IF_ERROR(b->AddDataset(this, {}, {{0, components}}, + {{"Toutput_types", dtypes}}, output)); return Status::OK(); } diff --git a/tensorflow/core/kernels/tensor_slice_dataset_op.cc b/tensorflow/core/kernels/tensor_slice_dataset_op.cc index 4d0cbdd67c..e85f59b584 100644 --- a/tensorflow/core/kernels/tensor_slice_dataset_op.cc +++ b/tensorflow/core/kernels/tensor_slice_dataset_op.cc @@ -93,8 +93,10 @@ class TensorSliceDatasetOp : public DatasetOpKernel { TF_RETURN_IF_ERROR(b->AddTensor(t, &node)); components.emplace_back(node); } - TF_RETURN_IF_ERROR( - b->AddDataset(this, {}, {std::make_pair(0, components)}, {}, output)); + AttrValue dtypes; + b->BuildAttrValue(dtypes_, &dtypes); + TF_RETURN_IF_ERROR(b->AddDataset(this, {}, {{0, components}}, + {{"Toutput_types", dtypes}}, output)); return Status::OK(); } -- GitLab From 2040491d132fd10e79f3f531a3defa0c2e248ce0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 14:34:48 -0800 Subject: [PATCH 0536/1801] [tf.data] Saveable iterator for TFRecordDatasetOp. PiperOrigin-RevId: 176020932 --- .../contrib/data/python/kernel_tests/BUILD | 7 +- .../kernel_tests/reader_dataset_ops_test.py | 679 +++--------------- tensorflow/core/kernels/reader_dataset_ops.cc | 90 ++- tensorflow/core/lib/io/record_reader.cc | 13 + tensorflow/core/lib/io/record_reader.h | 19 + 5 files changed, 226 insertions(+), 582 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index badabed701..c61f61263f 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -121,7 +121,6 @@ py_library( "dataset_serialization_test_base.py", ], srcs_version = "PY2AND3", - visibility = ["//visibility:private"], deps = [ "//tensorflow/contrib/data/python/ops:iterator_ops", "//tensorflow/python:client_testlib", @@ -327,21 +326,17 @@ py_test( srcs = ["reader_dataset_ops_test.py"], srcs_version = "PY2AND3", deps = [ - "//tensorflow/contrib/data/python/ops:iterator_ops", + ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:readers", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", - "//tensorflow/python:dataset_ops_gen", "//tensorflow/python:dtypes", "//tensorflow/python:errors", "//tensorflow/python:framework_ops", - "//tensorflow/python:io_ops", "//tensorflow/python:lib", "//tensorflow/python:parsing_ops", - "//tensorflow/python:tensor_shape", - "//tensorflow/python:training", "//tensorflow/python:util", "//tensorflow/python/data/ops:iterator_ops", ], diff --git a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py index 3ae8f71d77..1c42a3d855 100644 --- a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py @@ -21,7 +21,7 @@ import gzip import os import zlib -from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops +from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base from tensorflow.contrib.data.python.ops import readers from tensorflow.core.example import example_pb2 from tensorflow.core.example import feature_pb2 @@ -30,18 +30,14 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape from tensorflow.python.lib.io import python_io from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_dataset_ops -from tensorflow.python.ops import io_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.platform import test -from tensorflow.python.training import saver as saver_lib from tensorflow.python.util import compat -class TextLineDatasetTest(test.TestCase): +class TextLineDatasetTestBase(test.TestCase): def _lineText(self, f, l): return compat.as_bytes("%d: %d" % (f, l)) @@ -79,6 +75,9 @@ class TextLineDatasetTest(test.TestCase): return filenames + +class TextLineDatasetTest(TextLineDatasetTestBase): + def _testTextLineDataset(self, compression_type=None): test_filenames = self._createFiles( 2, 5, crlf=True, compression_type=compression_type) @@ -165,282 +164,37 @@ class TextLineDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(iterator.get_next()) - def _ckpt_path(self): - return os.path.join(self.get_temp_dir(), "iterator") - - def _latest_ckpt(self): - return saver_lib.latest_checkpoint(self.get_temp_dir()) - - def _save(self, saver, sess): - saver.save(sess, self._ckpt_path()) - - def _restore(self, saver, sess): - saver.restore(sess, self._latest_ckpt()) - def _import_meta_graph(self): - meta_file_path = self._ckpt_path() + ".meta" - return saver_lib.import_meta_graph(meta_file_path) +class TextLineDatasetSerializationTest( + TextLineDatasetTestBase, + dataset_serialization_test_base.DatasetSerializationTestBase): - def _build_graph(self, - test_filenames, - compression_type=None, - build_saveable=True): - ds = readers.TextLineDataset( + def _build_iterator_graph(self, test_filenames, compression_type=None): + return readers.TextLineDataset( test_filenames, compression_type=compression_type, buffer_size=10) - iterator = ds.make_initializable_iterator() - if build_saveable: - saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator) - ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable) - init_op = iterator.initializer - get_next = iterator.get_next() - ops.add_to_collection("iterator_ops", init_op) - ops.add_to_collection("iterator_ops", get_next) - saver = saver_lib.Saver(allow_empty=True) - return init_op, get_next, saver - - def _testReadWithBreaks(self, breaks, num_files=5, lines_per_file=5): - """Tests reading from input pipeline with regular breaks. - - At each break point the iterator state gets saved using Saver and reloaded - in a new Graph and session. - - Args: - breaks: List of counts of records after reading which iterator state is - checkpointed. Must to in non-decreasing order. - num_files: Total number of files. - lines_per_file: Total number of lines per file. - """ + + def testTextLineCore(self): compression_types = [None, "GZIP", "ZLIB"] + num_files = 5 + lines_per_file = 5 + num_outputs = num_files * lines_per_file for compression_type in compression_types: test_filenames = self._createFiles( num_files, lines_per_file, crlf=True, compression_type=compression_type) + # pylint: disable=cell-var-from-loop + self.run_core_tests( + lambda: self._build_iterator_graph(test_filenames, compression_type), + lambda: self._build_iterator_graph(test_filenames), num_outputs) + # pylint: enable=cell-var-from-loop - # Collect ground truth. - total_records = num_files * lines_per_file - expected_records = [] - with ops.Graph().as_default() as g: - init_op, get_next, saver = self._build_graph( - test_filenames, compression_type=compression_type) - with self.test_session(graph=g) as sess: - sess.run(init_op) - for _ in range(total_records): - expected_records.append(sess.run(get_next)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - # Simulate run with breaks. - actual_records = [] - next_record_index = 0 - load_from_ckpt = False - breaks.append(total_records) - for break_index in breaks: - with ops.Graph().as_default() as g: - if not load_from_ckpt: - init_op, get_next, saver = self._build_graph( - test_filenames, compression_type=compression_type) - else: - saver = self._import_meta_graph() - init_op, get_next = ops.get_collection("iterator_ops") - with self.test_session(graph=g) as sess: - if not load_from_ckpt: - sess.run(init_op) - else: - self._restore(saver, sess) - while next_record_index != break_index: - actual_records.append(sess.run(get_next)) - next_record_index += 1 - if break_index == total_records: - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - self._save(saver, sess) - load_from_ckpt = True - self.assertEqual(actual_records, expected_records) - - def testSaveAtFileBoundary(self): - self._testReadWithBreaks([10]) - - def testSaveWithinFile(self): - self._testReadWithBreaks([12]) - - def testSaveUnusedIterator(self): - self._testReadWithBreaks([0]) - - def testSaveRestoreIdempotence(self): - # Attempt to save an iterator immediately after it has been - # restored. - self._testReadWithBreaks([0, 0]) - self._testReadWithBreaks([10, 10]) - self._testReadWithBreaks([12, 12]) - - def testMultipleBreaks(self): - self._testReadWithBreaks([0, 4, 20]) - - def testRestoreExhaustedIterator(self): - num_files = 2 - lines_per_file = 5 - test_filenames = self._createFiles(num_files, lines_per_file, crlf=True) - - with ops.Graph().as_default() as g: - init_op, get_next, saver = self._build_graph(test_filenames) - with self.test_session(graph=g) as sess: - sess.run(init_op) - for _ in range(num_files * lines_per_file): - sess.run(get_next) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - self._save(saver, sess) - - with ops.Graph().as_default() as g: - with self.test_session(graph=g) as sess: - saver = self._import_meta_graph() - self._restore(saver, sess) - _, get_next = ops.get_collection("iterator_ops") - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testInitThenRestore(self): - num_files = 5 - lines_per_file = 5 - total_records = num_files * lines_per_file - break_record = 8 - test_filenames = self._createFiles(num_files, lines_per_file, crlf=True) - - expected_records = [] - with ops.Graph().as_default() as g: - init_op, get_next, saver = self._build_graph(test_filenames) - with self.test_session(graph=g) as sess: - sess.run(init_op) - for _ in range(break_record): - sess.run(get_next) - self._save(saver, sess) - for _ in range(total_records - break_record): - expected_records.append(sess.run(get_next)) - - actual_records = [] - with ops.Graph().as_default() as g: - with self.test_session(graph=g) as sess: - saver = self._import_meta_graph() - init_op, get_next = ops.get_collection("iterator_ops") - sess.run(init_op) - self._restore(saver, sess) - for _ in range(total_records - break_record): - actual_records.append(sess.run(get_next)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - self.assertEqual(actual_records, expected_records) - - def testRestoreInModifiedGraph(self): - num_files = 5 - lines_per_file = 5 - total_records = num_files * lines_per_file - break_record = 8 - test_filenames = self._createFiles(num_files, lines_per_file, crlf=True) - - expected_records = [] - with ops.Graph().as_default() as g: - init_op, get_next, saver = self._build_graph(test_filenames) - with self.test_session(graph=g) as sess: - sess.run(init_op) - for _ in range(break_record): - sess.run(get_next) - self._save(saver, sess) - for _ in range(total_records - break_record): - expected_records.append(sess.run(get_next)) - - actual_records = [] - with ops.Graph().as_default() as g: - with self.test_session(graph=g) as sess: - init_op, get_next, saver = self._build_graph( - test_filenames, compression_type="GZIP") - self._restore(saver, sess) - for _ in range(total_records - break_record): - actual_records.append(sess.run(get_next)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - self.assertEqual(actual_records, expected_records) - - def testRestoreInModifiedGraphThenInit(self): - num_files = 5 - lines_per_file = 5 - total_records = num_files * lines_per_file - break_record = 8 - test_filenames = self._createFiles(num_files, lines_per_file, crlf=True) - - expected_records = [] - with ops.Graph().as_default() as g: - init_op, get_next, saver = self._build_graph(test_filenames) - with self.test_session(graph=g) as sess: - sess.run(init_op) - for _ in range(break_record): - expected_records.append(sess.run(get_next)) - self._save(saver, sess) - for _ in range(total_records - break_record): - expected_records.append(sess.run(get_next)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - # Test that calling the init_op overrides the restored iterator. The - # iterator for the old graph was build to read uncompressed files and - # would fail when trying to read the new files. - actual_records = [] - with ops.Graph().as_default() as g: - with self.test_session(graph=g) as sess: - test_filenames = self._createFiles( - num_files, lines_per_file, crlf=True, compression_type="GZIP") - init_op, get_next, saver = self._build_graph( - test_filenames, compression_type="GZIP") - self._restore(saver, sess) - sess.run(init_op) - for _ in range(total_records): - actual_records.append(sess.run(get_next)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - self.assertEqual(actual_records, expected_records) - - def testDoNotRestoreIterator(self): - num_files = 5 - lines_per_file = 5 - total_records = num_files * lines_per_file - break_record = 8 - test_filenames = self._createFiles(num_files, lines_per_file, crlf=True) - - expected_records = [] - with ops.Graph().as_default() as g: - init_op, get_next, saver = self._build_graph(test_filenames) - with self.test_session(graph=g) as sess: - sess.run(init_op) - for _ in range(break_record): - expected_records.append(sess.run(get_next)) - self._save(saver, sess) - for _ in range(total_records - break_record): - expected_records.append(sess.run(get_next)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - actual_records = [] - with ops.Graph().as_default() as g: - with self.test_session(graph=g) as sess: - init_op, get_next, saver = self._build_graph( - test_filenames, build_saveable=False) - self._restore(saver, sess) - with self.assertRaises(errors.FailedPreconditionError): - sess.run(get_next) - sess.run(init_op) - for _ in range(total_records): - actual_records.append(sess.run(get_next)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - self.assertEqual(actual_records, expected_records) - - -class FixedLengthRecordReaderTest(test.TestCase): +class FixedLengthRecordReaderTestBase(test.TestCase): def setUp(self): - super(FixedLengthRecordReaderTest, self).setUp() + super(FixedLengthRecordReaderTestBase, self).setUp() self._num_files = 2 self._num_records = 7 self._header_bytes = 5 @@ -462,6 +216,9 @@ class FixedLengthRecordReaderTest(test.TestCase): f.write(b"F" * self._footer_bytes) return filenames + +class FixedLengthRecordReaderTest(FixedLengthRecordReaderTestBase): + def testFixedLengthRecordDataset(self): test_filenames = self._createFiles() filenames = array_ops.placeholder(dtypes.string, shape=[None]) @@ -547,304 +304,29 @@ class FixedLengthRecordReaderTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(iterator.get_next()) - def _iterator_checkpoint_path(self): - return os.path.join(self.get_temp_dir(), "iterator") - - def _save_op(self, iterator_resource): - iterator_state_variant = gen_dataset_ops.serialize_iterator( - iterator_resource) - save_op = io_ops.write_file( - self._iterator_checkpoint_path(), - parsing_ops.serialize_tensor(iterator_state_variant)) - return save_op - - def _restore_op(self, iterator_resource): - iterator_state_variant = parsing_ops.parse_tensor( - io_ops.read_file(self._iterator_checkpoint_path()), dtypes.variant) - restore_op = gen_dataset_ops.deserialize_iterator(iterator_resource, - iterator_state_variant) - return restore_op - - def _build_iterator_graph(self, num_epochs): + +class FixedLengthRecordDatasetSerializationTest( + FixedLengthRecordReaderTestBase, + dataset_serialization_test_base.DatasetSerializationTestBase): + + def _build_iterator_graph(self, num_epochs, compression_type=None): filenames = self._createFiles() - dataset = (readers.FixedLengthRecordDataset( - filenames, self._record_bytes, self._header_bytes, self._footer_bytes) - .repeat(num_epochs)) - iterator = dataset.make_initializable_iterator() - init_op = iterator.initializer - get_next_op = iterator.get_next() - save_op = self._save_op(iterator._iterator_resource) - restore_op = self._restore_op(iterator._iterator_resource) - return init_op, get_next_op, save_op, restore_op - - def _restore_iterator(self): - output_types = dtypes.string - output_shapes = tensor_shape.scalar() - iterator = iterator_ops.Iterator.from_structure(output_types, output_shapes) - get_next = iterator.get_next() - restore_op = self._restore_op(iterator._iterator_resource) - return restore_op, get_next - - def testSaveRestore(self): - num_epochs = 10 - epoch_break = 5 - file_break = self._num_files // 2 - record_break = self._num_records // 2 - - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs) - with self.test_session(graph=g) as sess: - sess.run(init_op) - # Note: There is no checkpoint saved currently so a NotFoundError is - # raised. - with self.assertRaises(errors.NotFoundError): - sess.run(restore_op) - for epoch in range(num_epochs): - for f in range(self._num_files): - for r in range(self._num_records): - if (epoch == epoch_break and f == file_break and - r == record_break): - sess.run(save_op) - break - self.assertEqual(self._record(f, r), sess.run(get_next_op)) - else: - continue - break - else: - continue - break - else: - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs) - with self.test_session(graph=g) as sess: - sess.run(restore_op) - for epoch in range(num_epochs): - for f in range(self._num_files): - for r in range(self._num_records): - if (epoch < epoch_break or - (epoch == epoch_break and f < file_break) or - (epoch == epoch_break and f == file_break and - r < record_break)): - continue - self.assertEqual(self._record(f, r), sess.run(get_next_op)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - - def testInitThenRestore(self): - # Note: Calling init_op before restore_op is redundant. This test just makes - # sure we do not fail if restore is called on an already initialized - # iterator resource. - num_epochs = 10 - epoch_break = 5 - file_break = self._num_files // 2 - record_break = self._num_records // 2 - - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs) - with self.test_session(graph=g) as sess: - sess.run(init_op) - # Note: There is no checkpoint saved currently so a NotFoundError is - # raised. - with self.assertRaises(errors.NotFoundError): - sess.run(restore_op) - for epoch in range(num_epochs): - for f in range(self._num_files): - for r in range(self._num_records): - if (epoch == epoch_break and f == file_break and - r == record_break): - sess.run(save_op) - break - self.assertEqual(self._record(f, r), sess.run(get_next_op)) - else: - continue - break - else: - continue - break - else: - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs) - with self.test_session(graph=g) as sess: - sess.run(init_op) - sess.run(restore_op) - for epoch in range(num_epochs): - for f in range(self._num_files): - for r in range(self._num_records): - if (epoch < epoch_break or - (epoch == epoch_break and f < file_break) or - (epoch == epoch_break and f == file_break and - r < record_break)): - continue - self.assertEqual(self._record(f, r), sess.run(get_next_op)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - - def testRestoreInModifiedGraph(self): - num_epochs = 10 - num_epochs_1 = 20 - epoch_break = 5 - file_break = self._num_files // 2 - record_break = self._num_records // 2 - - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs) - with self.test_session(graph=g) as sess: - sess.run(init_op) - # Note: There is no checkpoint saved currently so a NotFoundError is - # raised. - with self.assertRaises(errors.NotFoundError): - sess.run(restore_op) - for epoch in range(num_epochs): - for f in range(self._num_files): - for r in range(self._num_records): - if (epoch == epoch_break and f == file_break and - r == record_break): - sess.run(save_op) - break - self.assertEqual(self._record(f, r), sess.run(get_next_op)) - else: - continue - break - else: - continue - break - else: - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs_1) - with self.test_session(graph=g) as sess: - sess.run(restore_op) - for epoch in range(num_epochs): - for f in range(self._num_files): - for r in range(self._num_records): - if (epoch < epoch_break or - (epoch == epoch_break and f < file_break) or - (epoch == epoch_break and f == file_break and - r < record_break)): - continue - self.assertEqual(self._record(f, r), sess.run(get_next_op)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - - def testRestoreWithoutBuildingDatasetGraph(self): - num_epochs = 10 - epoch_break = 5 - file_break = self._num_files // 2 - record_break = self._num_records // 2 - - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs) - with self.test_session(graph=g) as sess: - sess.run(init_op) - # Note: There is no checkpoint saved currently so a NotFoundError is - # raised. - with self.assertRaises(errors.NotFoundError): - sess.run(restore_op) - for epoch in range(num_epochs): - for f in range(self._num_files): - for r in range(self._num_records): - if (epoch == epoch_break and f == file_break and - r == record_break): - sess.run(save_op) - break - self.assertEqual(self._record(f, r), sess.run(get_next_op)) - else: - continue - break - else: - continue - break - else: - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - - with ops.Graph().as_default() as g: - restore_op, get_next_op = self._restore_iterator() - with self.test_session(graph=g) as sess: - sess.run(restore_op) - for epoch in range(num_epochs): - for f in range(self._num_files): - for r in range(self._num_records): - if (epoch < epoch_break or - (epoch == epoch_break and f < file_break) or - (epoch == epoch_break and f == file_break and - r < record_break)): - continue - self.assertEqual(self._record(f, r), sess.run(get_next_op)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - - def testRestoreUnusedIterator(self): - num_epochs = 10 - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs) - with self.test_session(graph=g) as sess: - sess.run(init_op) - # Note: There is no checkpoint saved currently so a NotFoundError is - # raised. - with self.assertRaises(errors.NotFoundError): - sess.run(restore_op) - # Save unused iterator. - sess.run(save_op) - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs) - with self.test_session(graph=g) as sess: - sess.run(restore_op) - for _ in range(num_epochs * self._num_files * self._num_records): - sess.run(get_next_op) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - - def testRestoreExhaustedIterator(self): - num_epochs = 10 - - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs) - with self.test_session(graph=g) as sess: - sess.run(init_op) - # Note: There is no checkpoint saved currently so a NotFoundError is - # raised. - with self.assertRaises(errors.NotFoundError): - sess.run(restore_op) - for _ in range(num_epochs): - for f in range(self._num_files): - for r in range(self._num_records): - self.assertEqual(self._record(f, r), sess.run(get_next_op)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - sess.run(save_op) - - with ops.Graph().as_default() as g: - init_op, get_next_op, save_op, restore_op = self._build_iterator_graph( - num_epochs=num_epochs) - with self.test_session(graph=g) as sess: - sess.run(restore_op) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next_op) - - -class TFRecordDatasetTest(test.TestCase): + return readers.FixedLengthRecordDataset( + filenames, self._record_bytes, self._header_bytes, + self._footer_bytes).repeat(num_epochs) + + def testFixedLengthRecordCore(self): + num_epochs = 5 + num_outputs = num_epochs * self._num_files * self._num_records + self.run_core_tests(lambda: self._build_iterator_graph(num_epochs), + lambda: self._build_iterator_graph(num_epochs * 2), + num_outputs) + + +class TFRecordDatasetTestBase(test.TestCase): def setUp(self): - super(TFRecordDatasetTest, self).setUp() + super(TFRecordDatasetTestBase, self).setUp() self._num_files = 2 self._num_records = 7 @@ -880,6 +362,9 @@ class TFRecordDatasetTest(test.TestCase): writer.close() return filenames + +class TFRecordDatasetTest(TFRecordDatasetTestBase): + def testReadOneEpoch(self): with self.test_session() as sess: # Basic test: read from file 0. @@ -1001,6 +486,74 @@ class TFRecordDatasetTest(test.TestCase): sess.run(iterator.get_next()) +class TFRecordDatasetSerializationTest( + TFRecordDatasetTestBase, + dataset_serialization_test_base.DatasetSerializationTestBase): + + def _build_iterator_graph(self, + num_epochs, + batch_size=1, + compression_type=None, + buffer_size=None): + filenames = self._createFiles() + if compression_type is "ZLIB": + zlib_files = [] + for i, fn in enumerate(filenames): + with open(fn, "rb") as f: + cdata = zlib.compress(f.read()) + zfn = os.path.join(self.get_temp_dir(), "tfrecord_%s.z" % i) + with open(zfn, "wb") as f: + f.write(cdata) + zlib_files.append(zfn) + filenames = zlib_files + + elif compression_type is "GZIP": + gzip_files = [] + for i, fn in enumerate(self.test_filenames): + with open(fn, "rb") as f: + gzfn = os.path.join(self.get_temp_dir(), "tfrecord_%s.gz" % i) + with gzip.GzipFile(gzfn, "wb") as gzf: + gzf.write(f.read()) + gzip_files.append(gzfn) + filenames = gzip_files + + return readers.TFRecordDataset( + filenames, compression_type, + buffer_size=buffer_size).repeat(num_epochs).batch(batch_size) + + def testTFRecordWithoutBufferCore(self): + num_epochs = 5 + batch_size = num_epochs + num_outputs = num_epochs * self._num_files * self._num_records // batch_size + # pylint: disable=g-long-lambda + self.run_core_tests( + lambda: self._build_iterator_graph(num_epochs, batch_size, + buffer_size=0), + lambda: self._build_iterator_graph(num_epochs * 2, batch_size), + num_outputs) + self.run_core_tests( + lambda: self._build_iterator_graph(num_epochs, buffer_size=0), None, + num_outputs * batch_size) + # pylint: enable=g-long-lambda + + def testTFRecordWithBufferCore(self): + num_epochs = 5 + num_outputs = num_epochs * self._num_files * self._num_records + self.run_core_tests(lambda: self._build_iterator_graph(num_epochs), + lambda: self._build_iterator_graph(num_epochs * 2), + num_outputs) + + def testTFRecordWithCompressionCore(self): + num_epochs = 5 + num_outputs = num_epochs * self._num_files * self._num_records + self.run_core_tests( + lambda: self._build_iterator_graph(num_epochs, compression_type="ZLIB"), + lambda: self._build_iterator_graph(num_epochs * 2), num_outputs) + self.run_core_tests( + lambda: self._build_iterator_graph(num_epochs, compression_type="GZIP"), + lambda: self._build_iterator_graph(num_epochs * 2), num_outputs) + + class ReadBatchFeaturesTest(test.TestCase): def setUp(self): diff --git a/tensorflow/core/kernels/reader_dataset_ops.cc b/tensorflow/core/kernels/reader_dataset_ops.cc index c08e42be1d..d942ddc4a7 100644 --- a/tensorflow/core/kernels/reader_dataset_ops.cc +++ b/tensorflow/core/kernels/reader_dataset_ops.cc @@ -511,15 +511,18 @@ class TFRecordDatasetOp : public DatasetOpKernel { errors::InvalidArgument( "`buffer_size` must be >= 0 (0 == no buffering)")); - *output = new Dataset(std::move(filenames), compression_type, buffer_size); + *output = + new Dataset(ctx, std::move(filenames), compression_type, buffer_size); } private: - class Dataset : public DatasetBase { + class Dataset : public GraphDatasetBase { public: - explicit Dataset(std::vector filenames, + explicit Dataset(OpKernelContext* ctx, std::vector filenames, const string& compression_type, int64 buffer_size) - : filenames_(std::move(filenames)), + : GraphDatasetBase(ctx), + filenames_(std::move(filenames)), + compression_type_(compression_type), options_(io::RecordReaderOptions::CreateRecordReaderOptions( compression_type)) { if (buffer_size > 0) { @@ -546,6 +549,20 @@ class TFRecordDatasetOp : public DatasetOpKernel { string DebugString() override { return "TFRecordDatasetOp::Dataset"; } + protected: + Status AsGraphDefInternal(DatasetGraphDefBuilder* b, + Node** output) const override { + Node* filenames = nullptr; + TF_RETURN_IF_ERROR(b->AddVector(filenames_, &filenames)); + Node* compression_type = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(compression_type_, &compression_type)); + Node* buffer_size = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(options_.buffer_size, &buffer_size)); + TF_RETURN_IF_ERROR(b->AddDataset( + this, {filenames, compression_type, buffer_size}, output)); + return Status::OK(); + } + private: class Iterator : public DatasetIterator { public: @@ -571,8 +588,7 @@ class TFRecordDatasetOp : public DatasetOpKernel { // We have reached the end of the current file, so maybe // move on to next file. - reader_.reset(); - file_.reset(); + ResetStreamsLocked(); ++current_file_index_; } @@ -582,17 +598,64 @@ class TFRecordDatasetOp : public DatasetOpKernel { return Status::OK(); } - // Actually move on to next file. - const string& next_filename = - dataset()->filenames_[current_file_index_]; - TF_RETURN_IF_ERROR( - ctx->env()->NewRandomAccessFile(next_filename, &file_)); - reader_.reset( - new io::SequentialRecordReader(file_.get(), dataset()->options_)); + TF_RETURN_IF_ERROR(SetupStreamsLocked(ctx->env())); } while (true); } + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("current_file_index"), + current_file_index_)); + + if (reader_) { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("offset"), reader_->TellOffset())); + } + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + ResetStreamsLocked(); + int64 current_file_index; + TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("current_file_index"), + ¤t_file_index)); + current_file_index_ = size_t(current_file_index); + if (reader->Contains(full_name("offset"))) { + int64 offset; + TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("offset"), &offset)); + TF_RETURN_IF_ERROR(SetupStreamsLocked(ctx->env())); + TF_RETURN_IF_ERROR(reader_->SeekOffset(offset)); + } + return Status::OK(); + } + private: + // Sets up reader streams to read from the file at `current_file_index_`. + Status SetupStreamsLocked(Env* env) EXCLUSIVE_LOCKS_REQUIRED(mu_) { + if (current_file_index_ >= dataset()->filenames_.size()) { + return errors::InvalidArgument( + "current_file_index_:", current_file_index_, + " >= filenames_.size():", dataset()->filenames_.size()); + } + + // Actually move on to next file. + const string& next_filename = + dataset()->filenames_[current_file_index_]; + TF_RETURN_IF_ERROR(env->NewRandomAccessFile(next_filename, &file_)); + reader_.reset( + new io::SequentialRecordReader(file_.get(), dataset()->options_)); + return Status::OK(); + } + + // Resets all reader streams. + void ResetStreamsLocked() EXCLUSIVE_LOCKS_REQUIRED(mu_) { + reader_.reset(); + file_.reset(); + } + mutex mu_; size_t current_file_index_ GUARDED_BY(mu_) = 0; @@ -603,6 +666,7 @@ class TFRecordDatasetOp : public DatasetOpKernel { }; const std::vector filenames_; + const string compression_type_; io::RecordReaderOptions options_; }; }; diff --git a/tensorflow/core/lib/io/record_reader.cc b/tensorflow/core/lib/io/record_reader.cc index c3b87ee5bf..403c82818e 100644 --- a/tensorflow/core/lib/io/record_reader.cc +++ b/tensorflow/core/lib/io/record_reader.cc @@ -196,6 +196,19 @@ Status RecordReader::ReadRecord(uint64* offset, string* record) { return Status::OK(); } +Status RecordReader::SkipNBytes(uint64 offset) { +#if !defined(IS_SLIM_BUILD) + if (zlib_input_stream_) { + TF_RETURN_IF_ERROR(zlib_input_stream_->SkipNBytes(offset)); + } else { +#endif + if (options_.buffer_size > 0) { + TF_RETURN_IF_ERROR(input_stream_->SkipNBytes(offset)); + } + } + return Status::OK(); +} + SequentialRecordReader::SequentialRecordReader( RandomAccessFile* file, const RecordReaderOptions& options) : underlying_(file, options), offset_(0) {} diff --git a/tensorflow/core/lib/io/record_reader.h b/tensorflow/core/lib/io/record_reader.h index e4f6a5b492..62dd2efb79 100644 --- a/tensorflow/core/lib/io/record_reader.h +++ b/tensorflow/core/lib/io/record_reader.h @@ -74,6 +74,10 @@ class RecordReader { // sequential. Status ReadRecord(uint64* offset, string* record); + // Skip the records till "offset". Returns OK on success, + // OUT_OF_RANGE for end of file, or something else for an error. + Status SkipNBytes(uint64 offset); + private: Status ReadChecksummed(uint64 offset, size_t n, StringPiece* result, string* storage); @@ -107,6 +111,21 @@ class SequentialRecordReader { return underlying_.ReadRecord(&offset_, record); } + // Returns the current offset in the file. + uint64 TellOffset() { return offset_; } + + // Seek to this offset within the file and set this offset as the current + // offset. Trying to seek backward will throw error. + Status SeekOffset(uint64 offset) { + if (offset < offset_) + return errors::InvalidArgument( + "Trying to seek offset: ", offset, + " which is less than the current offset: ", offset_); + TF_RETURN_IF_ERROR(underlying_.SkipNBytes(offset - offset_)); + offset_ = offset; + return Status::OK(); + } + private: RecordReader underlying_; uint64 offset_ = 0; -- GitLab From a4fccfd808d0fc13881d6c05365205c172d22ef7 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 16 Nov 2017 14:45:42 -0800 Subject: [PATCH 0537/1801] [XLA:CPU] Add an explicit code path for the entry computation cycle count While this does change the profile counter entry for the entry computation during AOT compiles (earlier it would always be some non-null llvm::Value, but now it can be null), it does not change any observable behavior since RecordCompleteComputation is a no-op for an empty hlo_to_profile_idx_ map. PiperOrigin-RevId: 176022629 --- .../compiler/xla/service/cpu/cpu_compiler.cc | 28 +++++--- .../compiler/xla/service/cpu/ir_emitter.cc | 69 +++++++++++-------- .../compiler/xla/service/cpu/ir_emitter.h | 11 ++- 3 files changed, 66 insertions(+), 42 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index 2b8927f953..f5b95d3657 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -537,9 +537,11 @@ StatusOr> CpuCompiler::Compile( parallel_computations.emplace(to_apply, instruction); } - IrEmitter ir_emitter(*module, *assignment, llvm_module.get(), - std::move(hlo_to_profile_idx), jit->target_machine(), - jit->external_constant_pool()); + size_t entry_computation_profile_idx = hlo_to_profile_idx.size(); + IrEmitter ir_emitter( + *module, *assignment, llvm_module.get(), std::move(hlo_to_profile_idx), + /*entry_computation_profile_idx=*/entry_computation_profile_idx, + jit->target_machine(), jit->external_constant_pool()); std::unique_ptr> function_names( new HloInstructionMap()); @@ -617,9 +619,11 @@ StatusOr> CpuCompiler::Compile( // before the entry computation. The order of computations returned from // GetEmbeddedComputations guarantees that a called computation occurs // before a caller computation. - IrEmitter ir_emitter(*module, *assignment, llvm_module.get(), - std::move(hlo_to_profile_idx), jit->target_machine(), - jit->external_constant_pool()); + size_t entry_computation_profile_idx = hlo_to_profile_idx.size(); + IrEmitter ir_emitter( + *module, *assignment, llvm_module.get(), std::move(hlo_to_profile_idx), + /*entry_computation_profile_idx=*/entry_computation_profile_idx, + jit->target_machine(), jit->external_constant_pool()); for (auto embedded_computation : computation->MakeEmbeddedComputationsList()) { @@ -786,11 +790,13 @@ CpuCompiler::CompileAheadOfTime(std::vector> modules, proto, xla_dump_hlo_proto_to, module->name())); } - IrEmitter ir_emitter(*module, *assignment, &llvm_module, - /*hlo_to_profile_idx=*/ - std::unordered_map{}, - target_machine.get(), - /*external_constant_pool=*/nullptr); + IrEmitter ir_emitter( + *module, *assignment, &llvm_module, + /*hlo_to_profile_idx=*/ + std::unordered_map{}, + /*entry_computation_profile_idx=*/tensorflow::gtl::nullopt, + target_machine.get(), + /*external_constant_pool=*/nullptr); HloComputation* computation = module->entry_computation(); for (auto embedded_computation : computation->MakeEmbeddedComputationsList()) { diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index ba4cf14d64..c00f1d5c1d 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -77,6 +77,7 @@ IrEmitter::IrEmitter( const HloModule& hlo_module, const BufferAssignment& assignment, llvm::Module* llvm_module, std::unordered_map hlo_to_profile_idx, + tensorflow::gtl::optional entry_computation_profile_idx, llvm::TargetMachine* target_machine, ExternalConstantPool* external_constant_pool) : assignment_(assignment), @@ -84,6 +85,7 @@ IrEmitter::IrEmitter( arch_type_(llvm::Triple(llvm_module->getTargetTriple()).getArch()), ir_builder_(llvm_module->getContext()), hlo_to_profile_idx_(std::move(hlo_to_profile_idx)), + entry_computation_profile_idx_(std::move(entry_computation_profile_idx)), alias_analysis_(hlo_module, assignment, &llvm_module->getContext()), hlo_module_config_(hlo_module.config()), parallel_cpu_backend_( @@ -2613,50 +2615,57 @@ Status IrEmitter::FinishVisit(HloInstruction* root) { llvm::Value* root_value = GetEmittedValueFor(root); VLOG(2) << " value: " << llvm_ir::DumpToString(*root_value); - // For the parallel cpu backend, we record the total for each embedded - // computation callee with its caller kCall HLO. - HloInstruction* hlo_to_lookup = nullptr; - if (parallel_cpu_backend_ && is_top_level_computation_) { - auto* computation = root->parent(); - auto* entry_computation = computation->parent()->entry_computation(); - if (computation != entry_computation) { - for (HloInstruction* instruction : entry_computation->instructions()) { - if (instruction->opcode() == HloOpcode::kCall && - instruction->to_apply()->root_instruction() == root) { - hlo_to_lookup = instruction; - break; + llvm::Value* prof_counter = [&]() { + // For the parallel cpu backend, we record the total for each embedded + // computation callee with its caller kCall HLO. + if (parallel_cpu_backend_ && is_top_level_computation_) { + auto* computation = root->parent(); + auto* entry_computation = computation->parent()->entry_computation(); + if (computation != entry_computation) { + for (HloInstruction* instruction : entry_computation->instructions()) { + if (instruction->opcode() == HloOpcode::kCall && + instruction->to_apply()->root_instruction() == root) { + return GetProfileCounterFor(*instruction); + } } } } - } - if (auto* prof_counter = GetProfileCounterFor(hlo_to_lookup)) { + + // Otherwise we record the total computation cycles in a dedicated slot for + // the entry computation. + return GetProfileCounterForEntryComputation(); + }(); + + if (prof_counter) { profiling_state_.RecordCompleteComputation(&ir_builder_, prof_counter); } - ir_builder_.CreateRetVoid(); return Status::OK(); } -llvm::Value* IrEmitter::GetProfileCounterFor(const HloInstruction* hlo) { - string counter_name; - size_t prof_counter_idx; - if (hlo) { - auto it = hlo_to_profile_idx_.find(hlo); - if (it == hlo_to_profile_idx_.end()) { - return nullptr; - } - - prof_counter_idx = it->second; - counter_name = IrName("prof_counter", hlo->name()); - } else { - prof_counter_idx = hlo_to_profile_idx_.size(); - counter_name = "prof_counter.computation"; +llvm::Value* IrEmitter::GetProfileCounterFor(const HloInstruction& hlo) { + auto it = hlo_to_profile_idx_.find(&hlo); + if (it == hlo_to_profile_idx_.end()) { + return nullptr; } + + size_t prof_counter_idx = it->second; + string counter_name = IrName("prof_counter", hlo.name()); return ir_builder_.CreateGEP(GetProfileCountersArgument(), ir_builder_.getInt64(prof_counter_idx), AsStringRef(counter_name)); } +llvm::Value* IrEmitter::GetProfileCounterForEntryComputation() { + if (entry_computation_profile_idx_) { + return ir_builder_.CreateGEP( + GetProfileCountersArgument(), + ir_builder_.getInt64(*entry_computation_profile_idx_), + "prof_counter.computation"); + } + return nullptr; +} + void IrEmitter::ProfilingState::UpdateProfileCounter( llvm::IRBuilder<>* ir_builder, llvm::Value* prof_counter, llvm::Value* cycle_end, llvm::Value* cycle_start) { @@ -2735,7 +2744,7 @@ Status IrEmitter::Preprocess(HloInstruction* hlo) { } Status IrEmitter::Postprocess(HloInstruction* hlo) { - if (auto* prof_counter = GetProfileCounterFor(hlo)) { + if (auto* prof_counter = GetProfileCounterFor(*hlo)) { profiling_state_.RecordCycleDelta(&ir_builder_, hlo, prof_counter); } return Status::OK(); diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h index 14f98867a4..351c95278c 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h @@ -105,6 +105,8 @@ class IrEmitter : public DfsHloVisitorWithDefault { // llvm_module: the LLVM module to emit IR into. // hlo_to_profile_idx: the mapping from HLO to its index in the profiling // array. + // entry_computation_profile_idx: the index in the profiling array + // for the entry computation. // external_constant_pool: if non-null, points to an ExternalConstantPool // instance into which the Ir emitter can spill // constants. @@ -112,6 +114,7 @@ class IrEmitter : public DfsHloVisitorWithDefault { const HloModule& hlo_module, const BufferAssignment& assignment, llvm::Module* llvm_module, std::unordered_map hlo_to_profile_idx, + tensorflow::gtl::optional entry_computation_profile_idx, llvm::TargetMachine* target_machine, ExternalConstantPool* external_constant_pool); ~IrEmitter() override; @@ -197,7 +200,12 @@ class IrEmitter : public DfsHloVisitorWithDefault { // Convenience function to generate a GEP into the profile counter parameter // which would correspond to the index for a given HLO. - llvm::Value* GetProfileCounterFor(const HloInstruction* hlo); + llvm::Value* GetProfileCounterFor(const HloInstruction& hlo); + + // Convenience function to generate a GEP into the profile counter parameter + // corresponding to the index for the entry computation. Returns nullptr if + // profiling the entry computation is disabled. + llvm::Value* GetProfileCounterForEntryComputation(); // Gets the IR Value emitted previously for the given hlo. // @@ -474,6 +482,7 @@ class IrEmitter : public DfsHloVisitorWithDefault { // Maps HLOs to their index into the profile counter array. std::unordered_map hlo_to_profile_idx_; + const tensorflow::gtl::optional entry_computation_profile_idx_; // Maps HLOs to Values emitted for them. std::unordered_map emitted_value_; -- GitLab From 9c4bdb865452e418a1d69cd5f5cdccb51d6a0e1d Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Thu, 16 Nov 2017 14:51:05 -0800 Subject: [PATCH 0538/1801] Disable for generated_examples_zip_test in open-source (#14635) * Disable for OSS * Buildifier --- tensorflow/contrib/lite/testing/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index 5e40a13d3c..ecddb4b807 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -187,6 +187,7 @@ tf_cc_test( srcs = ["generated_examples_zip_test.cc"], data = [":optest"], shard_count = 10, + tags = ["no_oss"], deps = [ ":parse_testdata_lib", "//tensorflow/contrib/lite:builtin_op_data", -- GitLab From f5250528a952a37abb8ffae92e77e8f8961c2499 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 14:54:56 -0800 Subject: [PATCH 0539/1801] Move IODataType to a new types.proto in preparation for using it in ModelFlags PiperOrigin-RevId: 176024307 --- tensorflow/contrib/lite/python/lite.py | 11 +++--- tensorflow/contrib/lite/toco/BUILD | 24 ++++++++++++ .../contrib/lite/toco/model_cmdline_flags.h | 1 + .../contrib/lite/toco/model_flags.proto | 1 + .../lite/toco/python/toco_from_protos_test.py | 5 ++- .../contrib/lite/toco/toco_cmdline_flags.h | 1 + tensorflow/contrib/lite/toco/toco_flags.proto | 23 +----------- tensorflow/contrib/lite/toco/tooling_util.h | 1 + tensorflow/contrib/lite/toco/types.proto | 37 +++++++++++++++++++ 9 files changed, 76 insertions(+), 28 deletions(-) create mode 100644 tensorflow/contrib/lite/toco/types.proto diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py index dcae16d5ae..759677121f 100644 --- a/tensorflow/contrib/lite/python/lite.py +++ b/tensorflow/contrib/lite/python/lite.py @@ -30,17 +30,18 @@ import tempfile from tensorflow.contrib.lite.toco import model_flags_pb2 as _model_flags_pb2 from tensorflow.contrib.lite.toco import toco_flags_pb2 as _toco_flags_pb2 +from tensorflow.contrib.lite.toco import types_pb2 as _types_pb2 from tensorflow.contrib.lite.toco.python.tensorflow_wrap_toco import TocoConvert as _toco_convert_protos from tensorflow.python.framework import dtypes as _dtypes from tensorflow.python.platform import resource_loader as _resource_loader from tensorflow.python.util.all_util import remove_undocumented # Enum types from the protobuf promoted to the API -FLOAT = _toco_flags_pb2.FLOAT -INT32 = _toco_flags_pb2.INT32 -INT64 = _toco_flags_pb2.INT64 -STRING = _toco_flags_pb2.STRING -QUANTIZED_UINT8 = _toco_flags_pb2.QUANTIZED_UINT8 +FLOAT = _types_pb2.FLOAT +INT32 = _types_pb2.INT32 +INT64 = _types_pb2.INT64 +STRING = _types_pb2.STRING +QUANTIZED_UINT8 = _types_pb2.QUANTIZED_UINT8 TENSORFLOW_GRAPHDEF = _toco_flags_pb2.TENSORFLOW_GRAPHDEF TFLITE = _toco_flags_pb2.TFLITE GRAPHVIZ_DOT = _toco_flags_pb2.GRAPHVIZ_DOT diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD index eb08b5d1e5..0bf8d067a3 100644 --- a/tensorflow/contrib/lite/toco/BUILD +++ b/tensorflow/contrib/lite/toco/BUILD @@ -13,15 +13,31 @@ load( "tf_cc_test", ) +tf_proto_library_cc( + name = "types_proto", + srcs = ["types.proto"], + visibility = ["//visibility:public"], +) + tf_proto_library_cc( name = "toco_flags_proto", srcs = ["toco_flags.proto"], + protodeps = [":types_proto"], visibility = ["//visibility:public"], ) tf_proto_library_cc( name = "model_flags_proto", srcs = ["model_flags.proto"], + protodeps = [":types_proto"], + visibility = ["//visibility:public"], +) + +tf_proto_library_py( + name = "types_proto", + srcs = [ + "types.proto", + ], visibility = ["//visibility:public"], ) @@ -30,6 +46,7 @@ tf_proto_library_py( srcs = [ "toco_flags.proto", ], + protodeps = [":types_proto"], visibility = ["//visibility:public"], ) @@ -38,6 +55,7 @@ tf_proto_library_py( srcs = [ "model_flags.proto", ], + protodeps = [":types_proto"], visibility = ["//visibility:public"], ) @@ -102,6 +120,7 @@ cc_library( ":model_cmdline_flags", ":toco_flags_proto_cc", ":toco_port", + ":types_proto_cc", "//tensorflow/core:framework_internal", "//tensorflow/core:lib", "@com_google_absl//absl/strings", @@ -122,6 +141,7 @@ cc_library( ":model_flags_proto_cc", ":toco_graphviz_dump_options", ":toco_port", + ":types_proto_cc", "//tensorflow/core:framework_internal", "//tensorflow/core:lib", "@com_google_absl//absl/strings", @@ -215,6 +235,7 @@ cc_library( ":runtime", ":toco_port", ":tooling_util", + ":types_proto_cc", "//tensorflow/core:lib", "@com_google_absl//absl/strings", ], @@ -247,6 +268,7 @@ cc_library( ":graph_transformations", ":model", ":model_flags_proto_cc", + ":types_proto_cc", ":runtime", ":toco_graphviz_dump_options", ":toco_flags_proto_cc", @@ -286,6 +308,7 @@ cc_library( ":toco_flags_proto_cc", ":toco_graphviz_dump_options", ":toco_port", + ":types_proto_cc", "//tensorflow/core:lib", "@com_google_absl//absl/strings", "@protobuf_archive//:protobuf_headers", @@ -316,6 +339,7 @@ tf_cc_binary( ":toco_flags_proto_cc", ":toco_port", ":toco_tooling", + ":types_proto_cc", "//tensorflow/core:lib", "@com_google_absl//absl/strings", ], diff --git a/tensorflow/contrib/lite/toco/model_cmdline_flags.h b/tensorflow/contrib/lite/toco/model_cmdline_flags.h index dfa3d3c1ef..027d7ae1aa 100644 --- a/tensorflow/contrib/lite/toco/model_cmdline_flags.h +++ b/tensorflow/contrib/lite/toco/model_cmdline_flags.h @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/contrib/lite/toco/args.h" #include "tensorflow/contrib/lite/toco/model_flags.pb.h" +#include "tensorflow/contrib/lite/toco/types.pb.h" namespace toco { // Parse and remove arguments for models (in toco). Returns true if parsing diff --git a/tensorflow/contrib/lite/toco/model_flags.proto b/tensorflow/contrib/lite/toco/model_flags.proto index 743e08b16f..b016f34621 100644 --- a/tensorflow/contrib/lite/toco/model_flags.proto +++ b/tensorflow/contrib/lite/toco/model_flags.proto @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto2"; +import "tensorflow/contrib/lite/toco/types.proto"; package toco; diff --git a/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py b/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py index 2a593beeca..ce19b7efbe 100644 --- a/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py +++ b/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py @@ -22,6 +22,7 @@ import tempfile import tensorflow as tf from tensorflow.contrib.lite.toco import model_flags_pb2 from tensorflow.contrib.lite.toco import toco_flags_pb2 +from tensorflow.contrib.lite.toco import types_pb2 from tensorflow.python.platform import googletest from tensorflow.python.platform import resource_loader @@ -47,8 +48,8 @@ class TocoFromProtosTest(googletest.TestCase): toco_flags = toco_flags_pb2.TocoFlags() toco_flags.input_format = toco_flags_pb2.TENSORFLOW_GRAPHDEF toco_flags.output_format = toco_flags_pb2.TFLITE - toco_flags.input_types.append(toco_flags_pb2.FLOAT) - toco_flags.inference_type = toco_flags_pb2.FLOAT + toco_flags.input_types.append(types_pb2.FLOAT) + toco_flags.inference_type = types_pb2.FLOAT model_flags = model_flags_pb2.ModelFlags() input_array = model_flags.input_arrays.add() input_array.name = TensorName(in_tensor) diff --git a/tensorflow/contrib/lite/toco/toco_cmdline_flags.h b/tensorflow/contrib/lite/toco/toco_cmdline_flags.h index 155a6fea87..ba35ca8d5d 100644 --- a/tensorflow/contrib/lite/toco/toco_cmdline_flags.h +++ b/tensorflow/contrib/lite/toco/toco_cmdline_flags.h @@ -19,6 +19,7 @@ limitations under the License. #include #include "tensorflow/contrib/lite/toco/args.h" #include "tensorflow/contrib/lite/toco/toco_flags.pb.h" +#include "tensorflow/contrib/lite/toco/types.pb.h" namespace toco { // Parse and remove arguments handled from toco. Returns true if parsing diff --git a/tensorflow/contrib/lite/toco/toco_flags.proto b/tensorflow/contrib/lite/toco/toco_flags.proto index fd7c29fdc7..e900e1a25a 100644 --- a/tensorflow/contrib/lite/toco/toco_flags.proto +++ b/tensorflow/contrib/lite/toco/toco_flags.proto @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto2"; +import "tensorflow/contrib/lite/toco/types.proto"; + package toco; // Supported I/O file formats. Some formats may be input-only or output-only. @@ -30,27 +32,6 @@ enum FileFormat { GRAPHVIZ_DOT = 3; } -// IODataType describes the numeric data types to be used by the output format. -// See input_type and inference_type below. -enum IODataType { - IO_DATA_TYPE_UNKNOWN = 0; - - // Float32, not quantized - FLOAT = 1; - - // Uint8, quantized - QUANTIZED_UINT8 = 2; - - // Int32, not quantized - INT32 = 3; - - // Int64, not quantized - INT64 = 4; - - // String, not quantized - STRING = 5; -} - // TocoFlags encodes extra parameters that drive tooling operations, that // are not normally encoded in model files and in general may not be thought // of as properties of models, instead describing how models are to be diff --git a/tensorflow/contrib/lite/toco/tooling_util.h b/tensorflow/contrib/lite/toco/tooling_util.h index 093945edb3..e863996d7b 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.h +++ b/tensorflow/contrib/lite/toco/tooling_util.h @@ -33,6 +33,7 @@ limitations under the License. #include "tensorflow/contrib/lite/toco/runtime/types.h" #include "tensorflow/contrib/lite/toco/toco_flags.pb.h" #include "tensorflow/contrib/lite/toco/toco_port.h" +#include "tensorflow/contrib/lite/toco/types.pb.h" // TODO(aselle): Replace with using a container specific hash override instead. namespace std { diff --git a/tensorflow/contrib/lite/toco/types.proto b/tensorflow/contrib/lite/toco/types.proto new file mode 100644 index 0000000000..318fd4b7b2 --- /dev/null +++ b/tensorflow/contrib/lite/toco/types.proto @@ -0,0 +1,37 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +syntax = "proto2"; + +package toco; + +// IODataType describes the numeric data types of input and output arrays +// of a model. +enum IODataType { + IO_DATA_TYPE_UNKNOWN = 0; + + // Float32, not quantized + FLOAT = 1; + + // Uint8, quantized + QUANTIZED_UINT8 = 2; + + // Int32, not quantized + INT32 = 3; + + // Int64, not quantized + INT64 = 4; + + // String, not quantized + STRING = 5; +} -- GitLab From 1a63168ff0196f1579a1f6b4cfae2d65f1e7c04e Mon Sep 17 00:00:00 2001 From: Dave MacLachlan Date: Thu, 16 Nov 2017 15:05:58 -0800 Subject: [PATCH 0540/1801] Add LICENSES to gitignore Update gitignore file for ios to cover the license files that get installed following the install instructions. --- tensorflow/examples/ios/.gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/examples/ios/.gitignore b/tensorflow/examples/ios/.gitignore index e572b3012c..dbabfb33bf 100644 --- a/tensorflow/examples/ios/.gitignore +++ b/tensorflow/examples/ios/.gitignore @@ -2,3 +2,6 @@ project.xcworkspace xcuserdata imagenet_comp_graph_label_strings.txt tensorflow_inception_graph.pb +simple/data/LICENSE +camera/data/LICENSE +benchmark/data/LICENSE -- GitLab From 5dc9d20f9717f7c097188b3ce7ec1ca1d127afd6 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Thu, 16 Nov 2017 15:08:35 -0800 Subject: [PATCH 0541/1801] Disable non-hermetic testcase in gcs_dns_cache_test. PiperOrigin-RevId: 176027515 --- tensorflow/core/platform/cloud/gcs_dns_cache_test.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc b/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc index cba6caff22..8d1a108f30 100644 --- a/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc +++ b/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc @@ -104,7 +104,8 @@ class GcsDnsCacheTest : public ::testing::Test { } }; -TEST_F(GcsDnsCacheTest, ResolveName) { ResolveNameTest(); } +// This sends a DNS name resolution request, thus it is flaky. +// TEST_F(GcsDnsCacheTest, ResolveName) { ResolveNameTest(); } TEST_F(GcsDnsCacheTest, AnnotateRequest) { AnnotateRequestTest(); } -- GitLab From 86f452fdd0e59febe98455bf17ebb8c44b45e1bb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 15:33:22 -0800 Subject: [PATCH 0542/1801] Small (literally) typo in description. PiperOrigin-RevId: 176031889 --- tensorflow/core/kernels/dataset.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/dataset.h b/tensorflow/core/kernels/dataset.h index b9b0e5a7c6..df75deacbe 100644 --- a/tensorflow/core/kernels/dataset.h +++ b/tensorflow/core/kernels/dataset.h @@ -207,7 +207,7 @@ class GraphDefBuilderWrapper { // recursively adds all functions it references. If a function with a matching // name has already been added, returns with OK status. If a user-defined with // name `function_name` is not found in the FunctionLibraryDefinition, returns - // and InvalidArgumentError. If the function with name `function_name` or any + // an InvalidArgumentError. If the function with name `function_name` or any // of its dependent functions are stateful, returns an InvalidArgument error. Status AddFunction(OpKernelContext* ctx, const string& function_name) { if (b_->HasFunction(function_name)) { -- GitLab From 0f9a9c854f7dfee904c4e88130cc496ec9f2611e Mon Sep 17 00:00:00 2001 From: Alex Rothberg Date: Thu, 16 Nov 2017 18:53:42 -0500 Subject: [PATCH 0543/1801] Use get_or_create_global_step --- tensorflow/contrib/training/python/training/training.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/training/python/training/training.py b/tensorflow/contrib/training/python/training/training.py index 59f02fa38f..8e0139bdd6 100644 --- a/tensorflow/contrib/training/python/training/training.py +++ b/tensorflow/contrib/training/python/training/training.py @@ -410,7 +410,7 @@ def create_train_op(total_loss, loss value. """ if global_step is _USE_GLOBAL_STEP: - global_step = training_util.get_global_step() + global_step = training_util.get_or_create_global_step() # Update ops use GraphKeys.UPDATE_OPS collection if update_ops is None. global_update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS)) -- GitLab From 55ee41a98d50e200eda314ebf08f092000477f6e Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Thu, 16 Nov 2017 15:54:17 -0800 Subject: [PATCH 0544/1801] When constructing fusion computations from a proto, do not uniquify the names. The names are already unique and uniquifying them again will mutate them resulting in inconsistent names between the proto and the constructed HLO. PiperOrigin-RevId: 176035108 --- .../compiler/xla/service/hlo_computation.cc | 12 ++++---- .../compiler/xla/service/hlo_computation.h | 12 +++++--- .../compiler/xla/service/hlo_instruction.cc | 28 +++++++++++-------- .../compiler/xla/service/hlo_instruction.h | 11 ++++++-- tensorflow/compiler/xla/service/hlo_module.cc | 13 +++++++-- 5 files changed, 49 insertions(+), 27 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc index 8056bcf0f7..c215cc48d6 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.cc +++ b/tensorflow/compiler/xla/service/hlo_computation.cc @@ -407,16 +407,18 @@ HloComputationProto HloComputation::ToProto() const { /* static */ StatusOr> HloComputation::CreateFromProto( HloModule* module, const HloComputationProto& proto, - tensorflow::gtl::FlatMap* computation_map, + const tensorflow::gtl::FlatMap& computation_map, + const std::function)>& + add_fused_computation, HloInstruction* fusion_instruction) { std::vector> instructions; tensorflow::gtl::FlatMap instruction_map; int64 parameter_count = 0; for (const HloInstructionProto& instruction_proto : proto.instructions()) { - TF_ASSIGN_OR_RETURN( - std::unique_ptr instruction, - HloInstruction::CreateFromProto(module, instruction_proto, - instruction_map, computation_map)); + TF_ASSIGN_OR_RETURN(std::unique_ptr instruction, + HloInstruction::CreateFromProto( + module, instruction_proto, instruction_map, + computation_map, add_fused_computation)); if (instruction->opcode() == HloOpcode::kParameter) { parameter_count++; } diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h index 2835dbbb84..353b30bc69 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.h +++ b/tensorflow/compiler/xla/service/hlo_computation.h @@ -152,12 +152,16 @@ class HloComputation { // computation_map: a map from computation name to HloComputation*. This map // must contain all computations which the newly constructed computation // calls. - // fusion_instruction: if non-null then the newly created computation will be - // constructed as a fused computation with this instruction as its fusion - // parent. + // add_fused_computation: A function to call to add a fused + // computation. Used only when the instruction is a fusion instruction. + // fusion_instruction: if non-null then the newly created computation will + // be constructed as a fused computation with this instruction as its + // fusion parent. static StatusOr> CreateFromProto( HloModule* module, const HloComputationProto& proto, - tensorflow::gtl::FlatMap* computation_map, + const tensorflow::gtl::FlatMap& computation_map, + const std::function)>& + add_fused_computation, HloInstruction* fusion_instruction = nullptr); // Gets the instructions in this computation. diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index c35ca1eb99..c046b6d9c8 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -52,7 +52,9 @@ using ::tensorflow::strings::StrCat; StatusOr> HloInstruction::CreateFromProto( HloModule* module, const HloInstructionProto& proto, const tensorflow::gtl::FlatMap& instruction_map, - tensorflow::gtl::FlatMap* computation_map) { + const tensorflow::gtl::FlatMap& computation_map, + const std::function)>& + add_fused_computation) { TF_RET_CHECK(!proto.opcode().empty()); TF_ASSIGN_OR_RETURN(HloOpcode opcode, StringToHloOpcode(proto.opcode())); TF_RET_CHECK(proto.has_shape()); @@ -78,19 +80,19 @@ StatusOr> HloInstruction::CreateFromProto( TF_RET_CHECK(!proto.fusion_kind().empty()); TF_ASSIGN_OR_RETURN(instruction->fusion_kind_, StringToFusionKind(proto.fusion_kind())); - TF_ASSIGN_OR_RETURN( - std::unique_ptr fused_computation, - HloComputation::CreateFromProto( - module, proto.fused_instructions_computation(), computation_map, - /*fusion_instruction=*/instruction.get())); - instruction->called_computations_.push_back( - module->AddEmbeddedComputation(std::move(fused_computation))); + TF_ASSIGN_OR_RETURN(std::unique_ptr fused_computation, + HloComputation::CreateFromProto( + module, proto.fused_instructions_computation(), + computation_map, add_fused_computation, + /*fusion_instruction=*/instruction.get())); + instruction->called_computations_.push_back(fused_computation.get()); + add_fused_computation(std::move(fused_computation)); } else { for (const string& computation_name : proto.called_computation_names()) { - TF_RET_CHECK(ContainsKey(*computation_map, computation_name)) + TF_RET_CHECK(ContainsKey(computation_map, computation_name)) << "No computation named " << computation_name; instruction->called_computations_.push_back( - computation_map->at(computation_name)); + computation_map.at(computation_name)); } } @@ -2076,8 +2078,10 @@ string HloInstruction::ToCategory() const { bool saw_rank_1 = false; bool saw_higher_rank = false; for (const auto* operand : operands()) { - saw_rank_1 |= ShapeUtil::Rank(operand->shape()) == 1; - saw_higher_rank |= ShapeUtil::Rank(operand->shape()) > 1; + if (!ShapeUtil::IsTuple(operand->shape())) { + saw_rank_1 |= ShapeUtil::Rank(operand->shape()) == 1; + saw_higher_rank |= ShapeUtil::Rank(operand->shape()) > 1; + } } if (saw_rank_1 && saw_higher_rank) { return "rank-1-broadcast binary fusion"; diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 6b2762ff14..8c6449d73b 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -44,6 +44,7 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" #include "tensorflow/core/lib/gtl/iterator_range.h" #include "tensorflow/core/platform/logging.h" @@ -83,12 +84,16 @@ class HloInstruction { // must contain all operands of the newly constructed instruction. // computation_map: a map from computation name to HloComputation*. This map // must contain all computations which the newly constructed instruction - // calls. If the instruction is a fusion instruction, then the fusion - // computation is added to this map and the module. + // calls. + // add_fused_computation: A function to call to add a fused + // computation. Used (clearly) when the instruction is a fusion + // instruction. static StatusOr> CreateFromProto( HloModule* module, const HloInstructionProto& proto, const tensorflow::gtl::FlatMap& instruction_map, - tensorflow::gtl::FlatMap* computation_map); + const tensorflow::gtl::FlatMap& computation_map, + const std::function)>& + add_fused_computation); // Creates a parameter-retrieving instruction. static std::unique_ptr CreateParameter(int64 parameter_number, diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index d9c223fbba..faaf73ea1c 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -290,9 +290,16 @@ StatusOr> HloModule::CreateFromProto( tensorflow::gtl::FlatMap computation_map; for (const HloComputationProto& computation_proto : proto.computations()) { - TF_ASSIGN_OR_RETURN(std::unique_ptr computation, - HloComputation::CreateFromProto( - module.get(), computation_proto, &computation_map)); + TF_ASSIGN_OR_RETURN( + std::unique_ptr computation, + HloComputation::CreateFromProto( + module.get(), computation_proto, computation_map, + /*add_fused_computation=*/ + [&module](std::unique_ptr fused_computation) { + module->AddComputationInternal(std::move(fused_computation), + /*is_entry=*/false, + /*uniquify_names=*/false); + })); CHECK_NE(computation.get(), nullptr); TF_RET_CHECK(!ContainsKey(computation_map, computation->name())); string computation_name = computation->name(); -- GitLab From 9a72855893a7ca2832a08e1c5c4060f8674e0c7d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 16:10:25 -0800 Subject: [PATCH 0545/1801] Update fuse_op to eliminate duplicate nodes being created in the graph when injecting artificial dependency to the fused op. PiperOrigin-RevId: 176037465 --- .../framework/python/framework/graph_util.py | 2 +- .../python/framework/graph_util_test.py | 24 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/framework/python/framework/graph_util.py b/tensorflow/contrib/framework/python/framework/graph_util.py index 8ab8711db4..9ba9c77b92 100644 --- a/tensorflow/contrib/framework/python/framework/graph_util.py +++ b/tensorflow/contrib/framework/python/framework/graph_util.py @@ -91,7 +91,7 @@ def fuse_op(graph_def, input_nodes, output_nodes, output_dtypes, (n, cur_node)) if cur_node not in input_nodes_set: next_to_visit += name_to_input_name[cur_node] - else: + elif n not in reachable_by_input: nodes_post_output.append(n) # Add all nodes upto the input nodes diff --git a/tensorflow/contrib/framework/python/framework/graph_util_test.py b/tensorflow/contrib/framework/python/framework/graph_util_test.py index 87b992e22e..0c531fb290 100644 --- a/tensorflow/contrib/framework/python/framework/graph_util_test.py +++ b/tensorflow/contrib/framework/python/framework/graph_util_test.py @@ -56,6 +56,30 @@ class GraphUtilTest(test.TestCase): self.assertEqual(fused_graph_def.node[2].name, 'D') self.assertEqual(fused_graph_def.node[3].name, 'E') + def testGraphUtilArtificialDependencyInjection(self): + graph_def = graph_pb2.GraphDef() + node_a = GetNewNode('A', 'Placeholder', []) + node_a1 = GetNewNode('A1', 'Placeholder', []) + node_b = GetNewNode('B', 'Op1', ['A']) + node_c = GetNewNode('C', 'Op1', ['B']) + node_d = GetNewNode('D', 'Op1', ['C']) + node_e = GetNewNode('E', 'Op1', ['D']) + graph_def.node.extend([node_a, node_a1, node_b, node_c, node_d, node_e]) + fused_graph_def = graph_util.fuse_op(graph_def, ['A', 'A1'], ['D'], + [types_pb2.DT_FLOAT], True, 'FusedOp', + 'Op2') + self.assertEqual(len(fused_graph_def.node), 5) + self.assertEqual(fused_graph_def.node[0].name, 'A') + self.assertEqual(fused_graph_def.node[1].name, 'A1') + self.assertEqual(fused_graph_def.node[2].name, 'FusedOp') + self.assertEqual(fused_graph_def.node[2].input[0], 'A') + self.assertEqual(fused_graph_def.node[2].op, 'Op2') + self.assertEqual(fused_graph_def.node[2].attr['_output_quantized'].b, True) + self.assertEqual(fused_graph_def.node[2].attr['_output_types'].list.type, + [types_pb2.DT_FLOAT]) + self.assertEqual(fused_graph_def.node[3].name, 'D') + self.assertEqual(fused_graph_def.node[4].name, 'E') + if __name__ == '__main__': test.main() -- GitLab From 780c64e3e872269e76efa27b5bb7fe2465c26dfe Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Thu, 16 Nov 2017 18:23:32 -0800 Subject: [PATCH 0546/1801] Turn off graph optimization in max pooling test because of the inconsistent behavior on handling NaN and -Inf in different MaxPooling implementations. Split the tests as ConfigProto could interfere with each other. PiperOrigin-RevId: 176054079 --- .../python/kernel_tests/pooling_ops_test.py | 76 ++++++++++++++----- 1 file changed, 59 insertions(+), 17 deletions(-) diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index c699d50c02..30c777d12f 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -20,6 +20,8 @@ from __future__ import print_function import numpy as np +from tensorflow.core.protobuf import config_pb2 +from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl @@ -1172,12 +1174,27 @@ class PoolingTest(test.TestCase): [1, window_rows, window_cols, 1], [1, row_stride, col_stride, 1], padding) - def _testMaxPoolGradDirect(self, input_data, output_backprop, - expected_input_backprop, input_sizes, output_sizes, - window_rows, window_cols, row_stride, col_stride, - padding, use_gpu, v2): + def _testMaxPoolGradDirect(self, + input_data, + output_backprop, + expected_input_backprop, + input_sizes, + output_sizes, + window_rows, + window_cols, + row_stride, + col_stride, + padding, + use_gpu, + v2, + graph_optimization=False): pool_func = gen_nn_ops._max_pool_v2 if v2 else nn_ops.max_pool - with self.test_session(use_gpu=use_gpu): + + config = config_pb2.ConfigProto() + if graph_optimization: + config.graph_options.rewrite_options.layout_optimizer = ( + rewriter_config_pb2.RewriterConfig.ON) + with self.test_session(use_gpu=use_gpu, config=config): input_tensor = constant_op.constant(input_data, shape=input_sizes) output_tensor = pool_func(input_tensor, [1, window_rows, window_cols, 1], @@ -1314,7 +1331,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu, v2=v2) - def _testMaxPoolGradDirectWithNans2_1(self): + def _testMaxPoolGradDirectWithNans2_1CPU(self): input_data = [float("nan")] * 16 output_backprop = [11.0, 12.0, 13.0, 15.0, 16.0, 17.0, 19.0, 20.0, 21.0] # Test the CPU implementation, which propagates diffs in case of NaN @@ -1337,11 +1354,23 @@ class PoolingTest(test.TestCase): use_gpu=False, v2=v2) + def _testMaxPoolGradDirectWithNans2_1GPU(self): if not test.is_gpu_available(): return - - # Test the GPU implementation that uses cudnn for now. - # It does not propagate the diff in cases of NaNs + input_data = [float("nan")] * 16 + output_backprop = [11.0, 12.0, 13.0, 15.0, 16.0, 17.0, 19.0, 20.0, 21.0] + # (1) For the NHWC format (used by default below), TensorFlow currently uses + # custom MaxPoolingNoMask for the forward op, cuDNN for the gradient op. + # With NaNs as input, MaxPoolingNoMask outputs -Inf, which is then fed into + # the gradient op. The cuDNN gradient op currently doesn't propagate the + # diff if input is -Inf and as a result outputs zeros. + # (2) For the NCHW format, TensorFlow currently uses + # cuDNN for both the forward and the gradient op. With NaNs as input, cuDNN + # forward op outputs NaNs, which is then fed into cuDNN gradient op. cuDNN + # gradient op is able to propagate NaNs and as a result the output is the + # same as expected_input_backprop_tf_cpu. + # We turn off graph optimization (layout optimizer) as the behavior of the + # above two cases are different. expected_input_backprop_cudnn = [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 @@ -1359,9 +1388,10 @@ class PoolingTest(test.TestCase): col_stride=1, padding="VALID", use_gpu=True, - v2=v2) + v2=v2, + graph_optimization=False) - def _testMaxPoolGradDirectWithNans2_2(self): + def _testMaxPoolGradDirectWithNans2_2CPU(self): input_data = [float("nan")] * 16 output_backprop = [ float("nan"), 12.0, 13.0, 15.0, float("nan"), 17.0, 19.0, 20.0, @@ -1387,11 +1417,16 @@ class PoolingTest(test.TestCase): use_gpu=False, v2=v2) + def _testMaxPoolGradDirectWithNans2_2GPU(self): if not test.is_gpu_available(): return - - # Test the GPU implementation that uses cudnn for now. - # It does not propagate the diff in cases of NaNs + input_data = [float("nan")] * 16 + output_backprop = [ + float("nan"), 12.0, 13.0, 15.0, + float("nan"), 17.0, 19.0, 20.0, + float("nan") + ] + # See the correspoinding comment in _testMaxPoolGradDirectWithNans2_1GPU(). expected_input_backprop_cudnn = [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 @@ -1409,14 +1444,21 @@ class PoolingTest(test.TestCase): col_stride=1, padding="VALID", use_gpu=True, - v2=v2) + v2=v2, + graph_optimization=False) def testMaxPoolGradDirect(self): self._testMaxPoolGradDirect1_1() self._testMaxPoolGradDirect1_2() self._testMaxPoolGradDirect1_3() - self._testMaxPoolGradDirectWithNans2_1() - self._testMaxPoolGradDirectWithNans2_2() + self._testMaxPoolGradDirectWithNans2_1CPU() + self._testMaxPoolGradDirectWithNans2_2CPU() + + def testMaxPoolGradDirectNans2_1GPU(self): + self._testMaxPoolGradDirectWithNans2_1GPU() + + def testMaxPoolGradDirectNans2_2GPU(self): + self._testMaxPoolGradDirectWithNans2_2GPU() def _testMaxPoolGradGradValidPadding1_1(self, data_format, use_gpu): for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: -- GitLab From 0beff6bd1342f399173fc4e9d0e79afa3c54503b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 18:27:53 -0800 Subject: [PATCH 0547/1801] [tpu:profiler] Add run environment to TfOpStats. PiperOrigin-RevId: 176054460 --- .../contrib/tpu/profiler/tf_op_stats.proto | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto index 2d2207a43f..6943ff5f47 100644 --- a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto +++ b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto @@ -104,6 +104,8 @@ message HloExtraInfoResult { optional string category = 1; // The long name of the HLO that includes the dimensions. optional string long_name = 2; + // The per-TPU-core batch size inferred from this HLO. + optional int64 per_core_batch_size = 3; } // Result proto for HloExtraInfoMap. @@ -112,6 +114,20 @@ message HloExtraInfoMapResult { map hlo_extrainfo_map = 1; } +// Result proto for RunEnvironment (the run environment of a profiling session). +message RunEnvironmentResult { + // Number of hosts used. + optional int32 host_count = 1; + // The type of TPU used. + optional string tpu_type = 2; + // The number of TPU cores used. + optional int32 tpu_core_count = 3; + // The per-TPU-core batch size. + optional int32 per_core_batch_size = 4; + // Job information including build target and command line. + optional string job_info = 5; +} + // Result proto for TfStatsHelper. message TfOpStats { // The result for the TF-metric database. @@ -126,4 +142,6 @@ message TfOpStats { optional HloExtraInfoMapResult hlo_extrainfo_map = 5; // Overall matrix unit utilization in percentage. optional double matrix_unit_utilization_percent = 6; + // The run environment of this profiling session. + optional RunEnvironmentResult run_environment = 7; } -- GitLab From 466040ca83a29d9842c4f44b56f51e99a16083dc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 18:39:43 -0800 Subject: [PATCH 0548/1801] Renaming feature_id to dimension_id in dense float split PiperOrigin-RevId: 176055428 --- .../kernels/split_handler_ops.cc | 4 ++-- .../boosted_trees/lib/trees/decision_tree.cc | 24 +++++++++---------- .../lib/trees/decision_tree_test.cc | 6 ++--- .../boosted_trees/proto/tree_config.proto | 6 ++--- .../kernel_tests/prediction_ops_test.py | 2 +- .../kernel_tests/split_handler_ops_test.py | 8 +++---- 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc index 3bd30d8678..5c31980359 100644 --- a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc @@ -490,11 +490,11 @@ class BuildSparseInequalitySplitsOp : public BaseBuildSplitOp { } dense_split->set_feature_column(feature_column_group_id_); // Set the feature index for the best feature column. - const int64 best_feature_id = + const int64 best_dimension_id = bucket_ids_and_dimensions(best_element_idx, 1); const int32 best_bucket_id = bucket_ids_and_dimensions(best_element_idx, 0); - dense_split->set_feature_id(best_feature_id); + dense_split->set_dimension_id(best_dimension_id); dense_split->set_threshold(bucket_boundaries(best_bucket_id)); auto* left_child = split_info.mutable_left_child(); diff --git a/tensorflow/contrib/boosted_trees/lib/trees/decision_tree.cc b/tensorflow/contrib/boosted_trees/lib/trees/decision_tree.cc index f8750e7191..0e5578693a 100644 --- a/tensorflow/contrib/boosted_trees/lib/trees/decision_tree.cc +++ b/tensorflow/contrib/boosted_trees/lib/trees/decision_tree.cc @@ -52,13 +52,13 @@ int DecisionTree::Traverse(const DecisionTreeConfig& config, example.sparse_float_features[split.feature_column()]; // Feature id for the split when multivalent sparse float column, or 0 // by default. - const int32 feature_id = split.feature_id(); + const int32 dimension_id = split.dimension_id(); - node_id = - !sparse_feature[feature_id].has_value() || - sparse_feature[feature_id].get_value() <= split.threshold() - ? split.left_id() - : split.right_id(); + node_id = !sparse_feature[dimension_id].has_value() || + sparse_feature[dimension_id].get_value() <= + split.threshold() + ? split.left_id() + : split.right_id(); break; } case TreeNode::kSparseFloatBinarySplitDefaultRight: { @@ -68,12 +68,12 @@ int DecisionTree::Traverse(const DecisionTreeConfig& config, example.sparse_float_features[split.feature_column()]; // Feature id for the split when multivalent sparse float column, or 0 // by default. - const int32 feature_id = split.feature_id(); - node_id = - sparse_feature[feature_id].has_value() && - sparse_feature[feature_id].get_value() <= split.threshold() - ? split.left_id() - : split.right_id(); + const int32 dimension_id = split.dimension_id(); + node_id = sparse_feature[dimension_id].has_value() && + sparse_feature[dimension_id].get_value() <= + split.threshold() + ? split.left_id() + : split.right_id(); break; } case TreeNode::kCategoricalIdBinarySplit: { diff --git a/tensorflow/contrib/boosted_trees/lib/trees/decision_tree_test.cc b/tensorflow/contrib/boosted_trees/lib/trees/decision_tree_test.cc index 93924d429c..58fe8e335a 100644 --- a/tensorflow/contrib/boosted_trees/lib/trees/decision_tree_test.cc +++ b/tensorflow/contrib/boosted_trees/lib/trees/decision_tree_test.cc @@ -190,7 +190,7 @@ TEST_F(DecisionTreeTest, TraverseSparseBinarySplit) { tree_config.add_nodes()->mutable_leaf(); // Split on first column - split_node->set_feature_id(0); + split_node->set_dimension_id(0); split_node->set_threshold(2.0f); // Both instances have this feature value. @@ -199,7 +199,7 @@ TEST_F(DecisionTreeTest, TraverseSparseBinarySplit) { EXPECT_EQ(1, DecisionTree::Traverse(tree_config, 0, *++example_it)); // Split on second column - split_node->set_feature_id(1); + split_node->set_dimension_id(1); split_node->set_threshold(5.0f); // First instance does not have it (default right), second does have it. @@ -208,7 +208,7 @@ TEST_F(DecisionTreeTest, TraverseSparseBinarySplit) { EXPECT_EQ(1, DecisionTree::Traverse(tree_config, 0, *++example_it)); // Split on third column - split_node->set_feature_id(2); + split_node->set_dimension_id(2); split_node->set_threshold(3.0f); example_it = example_iterable.begin(); diff --git a/tensorflow/contrib/boosted_trees/proto/tree_config.proto b/tensorflow/contrib/boosted_trees/proto/tree_config.proto index f14abf45a5..fc570c1083 100644 --- a/tensorflow/contrib/boosted_trees/proto/tree_config.proto +++ b/tensorflow/contrib/boosted_trees/proto/tree_config.proto @@ -53,9 +53,9 @@ message DenseFloatBinarySplit { // Float feature column and split threshold describing // the rule feature <= threshold. int32 feature_column = 1; - // If feature column is multivalent, this holds the index of the feature for - // the split. Defaults to 0. - int32 feature_id = 5; + // If feature column is multivalent, this holds the index of the dimensiong + // for the split. Defaults to 0. + int32 dimension_id = 5; float threshold = 2; // Node children indexing into a contiguous diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py index 9ada844601..c1acf35160 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py @@ -93,7 +93,7 @@ def _set_float_split(split, feat_col, thresh, l_id, r_id, feature_dim_id=None): split.left_id = l_id split.right_id = r_id if feature_dim_id is not None: - split.feature_id = feature_dim_id + split.dimension_id = feature_dim_id def _set_categorical_id_split(split, feat_col, feat_id, l_id, r_id): diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/split_handler_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/split_handler_ops_test.py index 7c2e3a3b20..28834ef55b 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/split_handler_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/split_handler_ops_test.py @@ -240,7 +240,7 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase): self.assertEqual(0, split_node.split.feature_column) # Sparse is one dimensional. - self.assertEqual(0, split_node.split.feature_id) + self.assertEqual(0, split_node.split.dimension_id) self.assertAllClose(0.52, split_node.split.threshold) @@ -263,7 +263,7 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase): self.assertEqual(0, split_node.split.feature_column) # Sparse is one dimensional. - self.assertEqual(0, split_node.split.feature_id) + self.assertEqual(0, split_node.split.dimension_id) self.assertAllClose(0.52, split_node.split.threshold) @@ -373,7 +373,7 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase): self.assertEqual(0, split_node.split.feature_column) # Split happened on second dimension. - self.assertEqual(1, split_node.split.feature_id) + self.assertEqual(1, split_node.split.dimension_id) self.assertAllClose(0.58, split_node.split.threshold) @@ -395,7 +395,7 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase): self.assertAllClose([expected_right_weight], right_child.value) self.assertEqual(0, split_node.split.feature_column) - self.assertEqual(2, split_node.split.feature_id) + self.assertEqual(2, split_node.split.dimension_id) self.assertAllClose(0.6, split_node.split.threshold) -- GitLab From 0833a3646f90ebaa9d92e90f4ae6326aac13a01c Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Thu, 16 Nov 2017 18:46:19 -0800 Subject: [PATCH 0549/1801] Adds sleep before close session in TPU Estimator PiperOrigin-RevId: 176055885 --- .../contrib/tpu/python/tpu/tpu_estimator.py | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 97b2d25e0c..fe17664d7f 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -23,6 +23,8 @@ import collections from contextlib import contextmanager import copy import threading +import time + import six from six.moves import queue as Queue # pylint: disable=redefined-builtin @@ -490,11 +492,28 @@ class _InfeedThreadController(_InfeedOutfeedThreadBaseController): count += 1 except Exception: # pylint: disable=broad-except + # Close the session to avoid the main thread from hanging. If input + # pipeline triggers any error, the infeed thread dies but the main thread + # for TPU computation waits for the infeed enqueue forever. Close the + # Session to cancel the main thread Session.run execution. + # + # However, sleep for 2 minutes before explicit closing to give some time + # for the TPU compilation error, if any, propagating, from TPU to CPU + # host. Compilation errors should be reported by the main thread so that + # the program can be interrupted and users can take action. Due to a race + # condition, the infeed thread might see an error first. Closing the + # session here immediately would result in a session cancellation + # exception in the main thread, instead of the expected compile error. + # User code that depends on having the proper exception type will + # therefore be confused. logging.error( 'Failed running infeed, closing session.\n' - 'You may see an exception from your main session after this.', + 'You may see an exception from your main session after this. ' + 'Sleep for 2 minutes before close Session from infeed thread to ' + 'allow the main thread returning an error first, if any.', exc_info=1 ) + time.sleep(120) session.close() def join(self): -- GitLab From 6434efb9a7db19171d7a3f6e4608af0f03882267 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 19:04:39 -0800 Subject: [PATCH 0550/1801] Use idiomatic grpc::Slice API that allows use of different backing buffer PiperOrigin-RevId: 176057178 --- .../rpc/grpc_tensor_coding.cc | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc b/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc index 5639691804..e51894b4c7 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc @@ -214,22 +214,13 @@ void EncodeTensorToByteBuffer(bool is_dead, const Tensor& val, if (tensor_data_is_large) { // (E) Encode tensor data, but by sharing backing store - - // TODO(vpai): Use the pure C++ ::grpc::Slice constructor that uses - // grpc_slice_new_with_user_data once TensorFlow pins a version of gRPC - // that includes https://github.com/grpc/grpc/pull/12065 - const TensorBuffer* buf = DMAHelper::buffer(&val); buf->Ref(); slices[1] = ::grpc::Slice( - grpc_slice_new_with_user_data( - const_cast(static_cast(tdata.data())), - tdata.size(), - [](void* backing) { - static_cast(backing)->Unref(); - }, - const_cast(buf)), - ::grpc::Slice::STEAL_REF); + const_cast(static_cast(tdata.data())), + tdata.size(), + [](void* backing) { static_cast(backing)->Unref(); }, + const_cast(buf)); num_slices += 1; } size_t total_bytes = 0; -- GitLab From 75775514239bbbf2916c5aa93ef2fbd29b02cb7f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 19:05:35 -0800 Subject: [PATCH 0551/1801] Hlo parser: allow empty convolution window. Window is not required for a convolution on a 2D shape. PiperOrigin-RevId: 176057261 --- tensorflow/compiler/xla/service/hlo_instruction.cc | 2 +- tensorflow/compiler/xla/tools/parser/hlo_parser.cc | 5 ++++- tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index c046b6d9c8..a0795a7b36 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1898,7 +1898,7 @@ std::vector HloInstruction::ExtraAttributesToString() const { if (CanHaveDimensionsField()) { extra.push_back(StrCat("dimensions={", Join(dimensions(), ","), "}")); } - if (window_ != nullptr) { + if (window_ != nullptr && window_->dimensions_size() != 0) { extra.push_back(StrCat("window={", window_util::ToString(*window_), "}")); } if (padding_config_ != nullptr) { diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index 2112b3e710..1767d712d7 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -564,13 +564,16 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, case HloOpcode::kConvolution: { optional window; optional dnums; - attrs["window"] = {/*required=*/true, AttrTy::kWindow, &window}; + attrs["window"] = {/*required=*/false, AttrTy::kWindow, &window}; attrs["dim_labels"] = {/*required=*/true, AttrTy::kConvolutionDimensionNumbers, &dnums}; if (!ParseOperands(&operands, /*expected_size=*/2) || !ParseAttributes(attrs)) { return false; } + if (!window) { + window.emplace(); + } instruction = builder->AddInstruction(HloInstruction::CreateConvolve( shape, /*lhs=*/operands[0], /*rhs=*/operands[1], *window, *dnums)); break; diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index cb02ef84a9..3fbbfbdead 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -312,7 +312,7 @@ R"(HloModule ConvolveR2_module: ENTRY %ConvolveR2.v3 (input: f32[1,2], filter: f32[1,1]) -> f32[1,2] { %input = f32[1,2]{1,0} parameter(0) %filter = f32[1,1]{1,0} parameter(1) - ROOT %convolution = f32[1,2]{0,1} convolution(f32[1,2]{1,0} %input, f32[1,1]{1,0} %filter), window={size=1}, dim_labels=bf_io->bf + ROOT %convolution = f32[1,2]{0,1} convolution(f32[1,2]{1,0} %input, f32[1,1]{1,0} %filter), dim_labels=bf_io->bf } )" -- GitLab From 15907659888a3e36e8de3d5a95de8d3327cb7c46 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 16 Nov 2017 19:10:45 -0800 Subject: [PATCH 0552/1801] [tf.data] Add experimental API for gathering statistics from an Iterator. PiperOrigin-RevId: 176057576 --- .../contrib/data/python/kernel_tests/BUILD | 13 ++ .../kernel_tests/stats_dataset_ops_test.py | 213 ++++++++++++++++++ tensorflow/contrib/data/python/ops/BUILD | 1 + .../contrib/data/python/ops/stats_ops.py | 177 +++++++++++++++ tensorflow/core/kernels/BUILD | 37 +++ tensorflow/core/kernels/dataset.h | 22 +- tensorflow/core/kernels/iterator_ops.cc | 43 ++++ tensorflow/core/kernels/stats_aggregator.h | 84 +++++++ .../core/kernels/stats_aggregator_ops.cc | 108 +++++++++ tensorflow/core/kernels/stats_dataset_ops.cc | 181 +++++++++++++++ tensorflow/core/ops/dataset_ops.cc | 47 ++++ 11 files changed, 924 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py create mode 100644 tensorflow/contrib/data/python/ops/stats_ops.py create mode 100644 tensorflow/core/kernels/stats_aggregator.h create mode 100644 tensorflow/core/kernels/stats_aggregator_ops.cc create mode 100644 tensorflow/core/kernels/stats_dataset_ops.cc diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index c61f61263f..0dac03d7d8 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -424,6 +424,19 @@ py_test( ], ) +py_test( + name = "stats_dataset_ops_test", + size = "small", + srcs = ["stats_dataset_ops_test.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/data/python/ops:dataset_ops", + "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:errors", + ], +) + py_test( name = "zip_dataset_op_test", size = "small", diff --git a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py new file mode 100644 index 0000000000..8f24d6b2f6 --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py @@ -0,0 +1,213 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the experimental input pipeline statistics gathering ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.data.python.ops import stats_ops +from tensorflow.core.framework import summary_pb2 +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test + + +class StatsDatasetTest(test.TestCase): + + def _assertSummaryHasCount(self, summary_str, tag, expected_value): + summary_proto = summary_pb2.Summary() + summary_proto.ParseFromString(summary_str) + for value in summary_proto.value: + if tag == value.tag: + self.assertEqual(expected_value, value.histo.num) + return + self.fail("Expected tag %r not found in summary %r" % (tag, summary_proto)) + + def _assertSummaryHasSum(self, summary_str, tag, expected_value): + summary_proto = summary_pb2.Summary() + summary_proto.ParseFromString(summary_str) + for value in summary_proto.value: + if tag == value.tag: + self.assertEqual(expected_value, value.histo.sum) + return + self.fail("Expected tag %r not found in summary %r" % (tag, summary_proto)) + + def testBytesProduced(self): + dataset = dataset_ops.Dataset.range(100).map( + lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).apply( + stats_ops.bytes_produced_stats("bytes_produced")) + iterator = dataset.make_initializable_iterator() + stats_aggregator = stats_ops.StatsAggregator() + stats_aggregator_subscriber = stats_aggregator.subscribe(iterator) + next_element = iterator.get_next() + summary_t = stats_aggregator.get_summary() + + with self.test_session() as sess: + sess.run([iterator.initializer, stats_aggregator_subscriber]) + expected_sum = 0.0 + for i in range(100): + self.assertAllEqual( + np.array([i] * i, dtype=np.int64), sess.run(next_element)) + summary_str = sess.run(summary_t) + self._assertSummaryHasCount(summary_str, "bytes_produced", float(i + 1)) + expected_sum += i * 8.0 + self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + summary_str = sess.run(summary_t) + self._assertSummaryHasCount(summary_str, "bytes_produced", 100.0) + self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum) + + def testLatencyStats(self): + dataset = dataset_ops.Dataset.range(100).apply( + stats_ops.latency_stats("record_latency")) + iterator = dataset.make_initializable_iterator() + stats_aggregator = stats_ops.StatsAggregator() + stats_aggregator_subscriber = stats_aggregator.subscribe(iterator) + next_element = iterator.get_next() + summary_t = stats_aggregator.get_summary() + + with self.test_session() as sess: + sess.run([iterator.initializer, stats_aggregator_subscriber]) + for i in range(100): + self.assertEqual(i, sess.run(next_element)) + self._assertSummaryHasCount( + sess.run(summary_t), "record_latency", float(i + 1)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 100.0) + + def testReinitialize(self): + dataset = dataset_ops.Dataset.range(100).apply( + stats_ops.latency_stats("record_latency")) + iterator = dataset.make_initializable_iterator() + stats_aggregator = stats_ops.StatsAggregator() + stats_aggregator_subscriber = stats_aggregator.subscribe(iterator) + next_element = iterator.get_next() + summary_t = stats_aggregator.get_summary() + + with self.test_session() as sess: + sess.run(stats_aggregator_subscriber) + for j in range(5): + sess.run(iterator.initializer) + for i in range(100): + self.assertEqual(i, sess.run(next_element)) + self._assertSummaryHasCount( + sess.run(summary_t), "record_latency", float((j * 100) + i + 1)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + self._assertSummaryHasCount( + sess.run(summary_t), "record_latency", (j + 1) * 100.0) + + def testNoAggregatorRegistered(self): + dataset = dataset_ops.Dataset.range(100).apply( + stats_ops.latency_stats("record_latency")) + iterator = dataset.make_initializable_iterator() + next_element = iterator.get_next() + + with self.test_session() as sess: + sess.run(iterator.initializer) + for i in range(100): + self.assertEqual(i, sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + def testMultipleTags(self): + dataset = dataset_ops.Dataset.range(100).apply( + stats_ops.latency_stats("record_latency")).apply( + stats_ops.latency_stats("record_latency_2")) + iterator = dataset.make_initializable_iterator() + stats_aggregator = stats_ops.StatsAggregator() + stats_aggregator_subscriber = stats_aggregator.subscribe(iterator) + next_element = iterator.get_next() + summary_t = stats_aggregator.get_summary() + + with self.test_session() as sess: + sess.run([iterator.initializer, stats_aggregator_subscriber]) + for i in range(100): + self.assertEqual(i, sess.run(next_element)) + self._assertSummaryHasCount( + sess.run(summary_t), "record_latency", float(i + 1)) + self._assertSummaryHasCount( + sess.run(summary_t), "record_latency_2", float(i + 1)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 100.0) + self._assertSummaryHasCount( + sess.run(summary_t), "record_latency_2", 100.0) + + def testRepeatedTags(self): + dataset = dataset_ops.Dataset.range(100).apply( + stats_ops.latency_stats("record_latency")).apply( + stats_ops.latency_stats("record_latency")) + iterator = dataset.make_initializable_iterator() + stats_aggregator = stats_ops.StatsAggregator() + stats_aggregator_subscriber = stats_aggregator.subscribe(iterator) + next_element = iterator.get_next() + summary_t = stats_aggregator.get_summary() + + with self.test_session() as sess: + sess.run([iterator.initializer, stats_aggregator_subscriber]) + for i in range(100): + self.assertEqual(i, sess.run(next_element)) + self._assertSummaryHasCount( + sess.run(summary_t), "record_latency", float(2 * (i + 1))) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 200.0) + + def testMultipleIteratorsSameAggregator(self): + dataset = dataset_ops.Dataset.range(100).apply( + stats_ops.latency_stats("record_latency")) + iterator_0 = dataset.make_initializable_iterator() + iterator_1 = dataset.make_initializable_iterator() + stats_aggregator = stats_ops.StatsAggregator() + stats_aggregator_subscribers = [stats_aggregator.subscribe(iterator_0), + stats_aggregator.subscribe(iterator_1)] + next_element = iterator_0.get_next() + iterator_1.get_next() + summary_t = stats_aggregator.get_summary() + + with self.test_session() as sess: + sess.run([iterator_0.initializer, iterator_1.initializer, + stats_aggregator_subscribers]) + for i in range(100): + self.assertEqual(i * 2, sess.run(next_element)) + self._assertSummaryHasCount( + sess.run(summary_t), "record_latency", float(2 * (i + 1))) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 200.0) + + def testMultipleStatsAggregatorsSameIteratorFail(self): + dataset = dataset_ops.Dataset.range(100).apply( + stats_ops.latency_stats("record_latency")) + iterator = dataset.make_initializable_iterator() + stats_aggregator_0 = stats_ops.StatsAggregator() + stats_aggregator_1 = stats_ops.StatsAggregator() + + with self.test_session() as sess: + sess.run(stats_aggregator_0.subscribe(iterator)) + # TODO(mrry): Consider making this allowable (and also allowing + # aggregators to unsubscribe). + with self.assertRaises(errors.FailedPreconditionError): + sess.run(stats_aggregator_1.subscribe(iterator)) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index d6aaa12f5b..86035f3a69 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -71,6 +71,7 @@ py_library( "interleave_ops.py", "resampling.py", "scan_ops.py", + "stats_ops.py", ], srcs_version = "PY2AND3", deps = [ diff --git a/tensorflow/contrib/data/python/ops/stats_ops.py b/tensorflow/contrib/data/python/ops/stats_ops.py new file mode 100644 index 0000000000..b8875bd533 --- /dev/null +++ b/tensorflow/contrib/data/python/ops/stats_ops.py @@ -0,0 +1,177 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Experimental API for gathering statistics from `tf.data` pipelines.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import iterator_ops +from tensorflow.python.data.util import nest +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import gen_dataset_ops + + +class StatsAggregator(object): + """A stateful resource that aggregates statistics from one or more iterators. + + To record statistics, use one of the custom transformation functions defined + in this module when defining your @{tf.data.Dataset}. All statistics will be + aggregated by the `StatsAggregator` that is associated with a particular + iterator (see below). For example, to record the total number of bytes + produced by iterating over a dataset: + + ```python + dataset = ... + dataset = dataset.apply(stats_ops.bytes_produced_stats("total_bytes")) + ``` + + To associate a `StatsAggregator` with a @{tf.data.Iterator} object, use + the following pattern: + + ```python + dataset = ... + iterator = dataset.make_one_shot_iterator() + stats_aggregator = stats_ops.StatsAggregator() + set_op = stats_op.set_stats_aggregator_op(iterator, stats_aggregator) + + with tf.Session() as sess: + # Running `set_op` will associate `iterator` with `stats_aggregator`. + sess.run(set_op) + ``` + + To get a protocol buffer summary of the currently aggregated statistics, + use the `StatsAggregator.get_summary()` tensor. The easiest way to do this + is to add the returned tensor to the @{tf.GraphKeys.SUMMARIES} collection, + so that the summaries will be included with any existing summaries. + + ```python + stats_aggregator = stats_ops.StatsAggregator() + stats_summary = stats_aggregator.get_summary() + tf.add_to_collection(tf.GraphKeys.SUMMARIES, stats_summary) + ``` + + Note: This interface is experimental and expected to change. In particular, + we expect to add other implementations of `StatsAggregator` that provide + different ways of exporting statistics, and add more types of statistics. + """ + + def __init__(self): + """Creates a `StatsAggregator`.""" + self._resource = gen_dataset_ops.stats_aggregator_handle() + + def get_summary(self): + """Returns a string @{tf.Tensor} that summarizes the aggregated statistics. + + The returned tensor will contain a serialized @{tf.summary.Summary} protocol + buffer, which can be used with the standard TensorBoard logging facilities. + + Returns: + A scalar string @{tf.Tensor} that summarizes the aggregated statistics. + """ + return gen_dataset_ops.stats_aggregator_summary(self._resource) + + def subscribe(self, iterator): + """Returns a @{tf.Operation} to associate this aggregator with `iterator`. + + Note: Each @{tf.data.Iterator} can be associated with at most one + `StatsAggregator`. After running the operation that this function + returns, all statistics recorded in the iteration of `iterator` + will be stored in `stats_aggregator`. + + Args: + iterator: A @{tf.data.Iterator} object. + + Returns: + A @{tf.Operation} that, when run, associates this aggregator with + `iterator`. + """ + if not isinstance(iterator, iterator_ops.Iterator): + raise TypeError("`iterator` must be a `tf.data.Iterator` object.") + return gen_dataset_ops.iterator_set_stats_aggregator( + iterator._iterator_resource, self._resource) # pylint: disable=protected-access + + +def bytes_produced_stats(tag): + """Records the number of bytes produced by each element of the input dataset. + + To consume the statistics, associate a `StatsAggregator` with an iterator + over the output dataset. + + Args: + tag: String. All statistics recorded by the returned transformation will + be associated with the given `tag`. + + Returns: + A `Dataset` transformation function, which can be passed to + @{tf.contrib.data.Dataset.apply}. + """ + + def _apply_fn(dataset): + return _StatsDataset(dataset, gen_dataset_ops.bytes_produced_stats_dataset, + tag) + + return _apply_fn + + +def latency_stats(tag): + """Records the latency of producing each element of the input dataset. + + To consume the statistics, associate a `StatsAggregator` with an iterator + over the output dataset. + + Args: + tag: String. All statistics recorded by the returned transformation will + be associated with the given `tag`. + + Returns: + A `Dataset` transformation function, which can be passed to + @{tf.contrib.data.Dataset.apply}. + """ + + def _apply_fn(dataset): + return _StatsDataset(dataset, gen_dataset_ops.latency_stats_dataset, tag) + + return _apply_fn + + +class _StatsDataset(dataset_ops.Dataset): + """A `Dataset` that acts as an identity, and also records statistics.""" + + def __init__(self, input_dataset, op_function, tag): + super(_StatsDataset, self).__init__() + self._input_dataset = input_dataset + self._op_function = op_function + self._tag = ops.convert_to_tensor(tag, dtype=dtypes.string) + + def _as_variant_tensor(self): + return self._op_function( + self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access + self._tag, + output_shapes=nest.flatten(self.output_shapes), + output_types=nest.flatten(self.output_types)) + + @property + def output_shapes(self): + return self._input_dataset.output_shapes + + @property + def output_types(self): + return self._input_dataset.output_types + + @property + def output_classes(self): + return self._input_dataset.output_classes diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index b7386abdea..00cf3f90e9 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -5752,6 +5752,26 @@ tf_mkl_kernel_library( ], ) +cc_library( + name = "stats_aggregator", + hdrs = ["stats_aggregator.h"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ], +) + +tf_kernel_library( + name = "stats_aggregator_ops", + srcs = ["stats_aggregator_ops.cc"], + deps = [ + ":stats_aggregator", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + ], +) + cc_library( name = "dataset", srcs = ["dataset.cc"], @@ -5760,6 +5780,7 @@ cc_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", "//tensorflow/core/util/tensor_bundle", ], ) @@ -6032,6 +6053,19 @@ tf_kernel_library( ], ) +tf_kernel_library( + name = "stats_dataset_ops", + srcs = ["stats_dataset_ops.cc"], + deps = [ + ":dataset", + ":stats_aggregator", + "//tensorflow/core:dataset_ops_op_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + ], +) + tf_kernel_library( name = "range_dataset_op", srcs = ["range_dataset_op.cc"], @@ -6157,6 +6191,7 @@ tf_kernel_library( deps = [ ":dataset", ":ops_util", + ":stats_aggregator", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:dataset_ops_op_lib", "//tensorflow/core:framework", @@ -6206,6 +6241,8 @@ tf_kernel_library( ":skip_dataset_op", ":sparse_tensor_slice_dataset_op", ":sql_dataset_ops", + ":stats_aggregator_ops", + ":stats_dataset_ops", ":take_dataset_op", ":tensor_dataset_op", ":tensor_slice_dataset_op", diff --git a/tensorflow/core/kernels/dataset.h b/tensorflow/core/kernels/dataset.h index df75deacbe..c266bc07c1 100644 --- a/tensorflow/core/kernels/dataset.h +++ b/tensorflow/core/kernels/dataset.h @@ -41,8 +41,6 @@ limitations under the License. namespace tensorflow { -class ResourceMgr; - // Interface for reading values from a key-value store. // Used for restoring iterator state. class IteratorStateReader { @@ -308,6 +306,8 @@ class GraphDefBuilderWrapper { GraphDefBuilder* b_; }; +class StatsAggregator; + // A cut-down version of OpKernelContext for running computations in // iterators. Note that we cannot simply use OpKernelContext here // because we might run computation in an iterator whose lifetime is @@ -331,6 +331,16 @@ class IteratorContext { // Function call support. std::function)> runner = nullptr; + + // A function that returns the current `StatsAggregator` instance to be + // used when recording statistics about the iterator. + // + // NOTE(mrry): This is somewhat awkward, because (i) the `StatsAggregator` + // is a property of the `IteratorResource` (which this class does not know + // about), and (ii) it can change after the `IteratorContext` has been + // created. Better suggestions are welcome! + std::function()> stats_aggregator_getter = + nullptr; }; explicit IteratorContext(Params params) : params_(std::move(params)) {} @@ -341,6 +351,14 @@ class IteratorContext { return ¶ms_.runner; } + std::shared_ptr stats_aggregator() { + if (params_.stats_aggregator_getter) { + return params_.stats_aggregator_getter(); + } else { + return nullptr; + } + } + private: Params params_; }; diff --git a/tensorflow/core/kernels/iterator_ops.cc b/tensorflow/core/kernels/iterator_ops.cc index b48da5b326..439775157b 100644 --- a/tensorflow/core/kernels/iterator_ops.cc +++ b/tensorflow/core/kernels/iterator_ops.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/kernels/dataset.h" #include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/kernels/stats_aggregator.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/random/random.h" @@ -168,6 +169,16 @@ class IteratorResource : public ResourceBase { return Status::OK(); } + void set_stats_aggregator(std::shared_ptr stats_aggregator) { + mutex_lock l(mu_); + stats_aggregator_ = std::move(stats_aggregator); + } + + std::shared_ptr stats_aggregator() { + tf_shared_lock l(mu_); + return stats_aggregator_; + } + string DebugString() override { return "Iterator resource"; } const DataTypeVector& output_dtypes() const { return output_dtypes_; } @@ -178,6 +189,8 @@ class IteratorResource : public ResourceBase { private: std::shared_ptr iterator_; + mutex mu_; + std::shared_ptr stats_aggregator_ GUARDED_BY(mu_); const DataTypeVector output_dtypes_; const std::vector output_shapes_; const int graph_def_version_; @@ -684,6 +697,9 @@ class IteratorGetNextOp : public AsyncOpKernel { IteratorContext::Params params; params.env = ctx->env(); + params.stats_aggregator_getter = [iterator]() { + return iterator->stats_aggregator(); + }; params.runner = *(ctx->runner()); IteratorContext iter_ctx(std::move(params)); @@ -835,6 +851,31 @@ class DeserializeIteratorOp : public OpKernel { } }; +class IteratorSetStatsAggregatorOp : public OpKernel { + public: + explicit IteratorSetStatsAggregatorOp(OpKernelConstruction* ctx) + : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + IteratorResource* iterator_resource; + OP_REQUIRES_OK( + ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &iterator_resource)); + core::ScopedUnref unref_iterator(iterator_resource); + + StatsAggregatorResource* stats_aggregator_resource; + OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 1), + &stats_aggregator_resource)); + core::ScopedUnref unref_stats_aggregator(stats_aggregator_resource); + // TODO(mrry): Consider allowing multiple StatsAggregator ops to + // subscribe to updates, and/or unsubscribing. + OP_REQUIRES(ctx, !iterator_resource->stats_aggregator(), + errors::FailedPrecondition( + "Iterator already associated with a StatsAggregator")); + iterator_resource->set_stats_aggregator( + stats_aggregator_resource->stats_aggregator()); + } +}; + REGISTER_KERNEL_BUILDER(Name("Iterator").Device(DEVICE_CPU), IteratorHandleOp); REGISTER_KERNEL_BUILDER(Name("MakeIterator").Device(DEVICE_CPU), MakeIteratorOp); @@ -852,6 +893,8 @@ REGISTER_KERNEL_BUILDER(Name("SerializeIterator").Device(DEVICE_CPU), SerializeIteratorOp); REGISTER_KERNEL_BUILDER(Name("DeserializeIterator").Device(DEVICE_CPU), DeserializeIteratorOp); +REGISTER_KERNEL_BUILDER(Name("IteratorSetStatsAggregator").Device(DEVICE_CPU), + IteratorSetStatsAggregatorOp); } // namespace diff --git a/tensorflow/core/kernels/stats_aggregator.h b/tensorflow/core/kernels/stats_aggregator.h new file mode 100644 index 0000000000..5f602c5f3b --- /dev/null +++ b/tensorflow/core/kernels/stats_aggregator.h @@ -0,0 +1,84 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_STATS_AGGREGATOR_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_STATS_AGGREGATOR_H_ + +#include +#include + +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/lib/gtl/array_slice.h" + +namespace tensorflow { + +class Summary; + +// A `StatsAggregator` accumulates statistics incrementally. A +// `StatsAggregator` can accumulate multiple different statistics, distinguished +// by a string name. +// +// The class currently supports accumulating `Histogram` objects, and we expect +// to add other methods in future. +// +// NOTE(mrry): `StatsAggregator` is a virtual interface because we anticipate +// that many different implementations will the same interface. For example, the +// current implementation in "stats_aggregator_ops.cc" is a simple in-memory +// implementation that integrates with the pull-based summary API, and we may +// add implementations that work with the push-based `SummaryWriterInterface`, +// as well as custom monitoring services. +class StatsAggregator { + public: + virtual ~StatsAggregator() {} + + // Add the given `values` to the histogram with the given `name`. Each + // element of `values` will be treated as a separate sample in the histogram. + virtual void AddToHistogram(const string& name, + gtl::ArraySlice values) = 0; + + // Stores a protocol buffer representation of the aggregator state in the + // given `out_summary`. + // TODO(mrry): Consider separating this method from the `StatsAggregator` + // interface. It is possible that not all implementations will support + // encoding their state as a protocol buffer. + virtual void EncodeToProto(Summary* out_summary) = 0; +}; + +// A `StatsAggregatorResource` wraps a shareable `StatsAggregator` as a resource +// in the TensorFlow resource manager. +// +// NOTE(mrry): This class is separate from `StatsAggregator` in order to +// simplify the memory management of the shared object. Most users of +// `StatsAggregator` interact with a `std::shared_ptr` whereas +// the `ResourceBase` API requires explicit reference counting. +class StatsAggregatorResource : public ResourceBase { + public: + // Creates a new resource from the given `stats_aggregator`. + StatsAggregatorResource(std::unique_ptr stats_aggregator) + : stats_aggregator_(stats_aggregator.release()) {} + + // Returns the wrapped `StatsAggregator`. + std::shared_ptr stats_aggregator() const { + return stats_aggregator_; + } + + string DebugString() { return "StatsAggregatorResource"; } + + private: + const std::shared_ptr stats_aggregator_; +}; + +} // namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_STATS_AGGREGATOR_H_ diff --git a/tensorflow/core/kernels/stats_aggregator_ops.cc b/tensorflow/core/kernels/stats_aggregator_ops.cc new file mode 100644 index 0000000000..037ec64a83 --- /dev/null +++ b/tensorflow/core/kernels/stats_aggregator_ops.cc @@ -0,0 +1,108 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/kernels/stats_aggregator.h" + +#include + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/resource_op_kernel.h" +#include "tensorflow/core/framework/summary.pb.h" +#include "tensorflow/core/lib/histogram/histogram.h" +#include "tensorflow/core/platform/macros.h" + +namespace tensorflow { +namespace { + +class StatsAggregatorImpl : public StatsAggregator { + public: + StatsAggregatorImpl() {} + + void AddToHistogram(const string& name, + gtl::ArraySlice values) override { + mutex_lock l(mu_); + histogram::Histogram& histogram = histograms_[name]; + for (double value : values) { + histogram.Add(value); + } + } + + void EncodeToProto(Summary* out_summary) override { + mutex_lock l(mu_); + for (const auto& pair : histograms_) { + const string& name = pair.first; + const histogram::Histogram& histogram = pair.second; + + Summary::Value* value = out_summary->add_value(); + value->set_tag(name); + histogram.EncodeToProto(value->mutable_histo(), + true /* preserve_zero_buckets */); + } + } + + private: + mutex mu_; + std::unordered_map histograms_ GUARDED_BY(mu_); + TF_DISALLOW_COPY_AND_ASSIGN(StatsAggregatorImpl); +}; + +class StatsAggregatorHandleOp + : public ResourceOpKernel { + public: + explicit StatsAggregatorHandleOp(OpKernelConstruction* ctx) + : ResourceOpKernel(ctx) {} + + private: + Status CreateResource(StatsAggregatorResource** ret) override + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + *ret = new StatsAggregatorResource( + std::unique_ptr(new StatsAggregatorImpl)); + return Status::OK(); + } + + Status VerifyResource(StatsAggregatorResource* resource) override { + return Status::OK(); + } +}; + +class StatsAggregatorSummaryOp : public OpKernel { + public: + explicit StatsAggregatorSummaryOp(OpKernelConstruction* ctx) + : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + const Tensor& resource_handle_t = ctx->input(0); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(resource_handle_t.shape()), + errors::InvalidArgument("resource_handle must be a scalar")); + + StatsAggregatorResource* resource; + OP_REQUIRES_OK(ctx, + LookupResource(ctx, HandleFromInput(ctx, 0), &resource)); + core::ScopedUnref unref_iterator(resource); + + Tensor* summary_t; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &summary_t)); + Summary summary; + resource->stats_aggregator()->EncodeToProto(&summary); + summary_t->scalar()() = summary.SerializeAsString(); + } +}; + +REGISTER_KERNEL_BUILDER(Name("StatsAggregatorHandle").Device(DEVICE_CPU), + StatsAggregatorHandleOp); +REGISTER_KERNEL_BUILDER(Name("StatsAggregatorSummary").Device(DEVICE_CPU), + StatsAggregatorSummaryOp); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/kernels/stats_dataset_ops.cc b/tensorflow/core/kernels/stats_dataset_ops.cc new file mode 100644 index 0000000000..7b1853aba6 --- /dev/null +++ b/tensorflow/core/kernels/stats_dataset_ops.cc @@ -0,0 +1,181 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/dataset.h" +#include "tensorflow/core/kernels/stats_aggregator.h" +#include "tensorflow/core/lib/random/random.h" + +namespace tensorflow { +namespace { + +// This op defines a `Dataset` that passes through its input elements and +// records the latency of producing each element in the context's +// `StatsAggregator`. +// +// TODO(mrry): It is likely that many *StatsDatasetOp kernels will have the +// same or similar structure. We should abstract the common boilerplate into +// a base case and/or investigate how to make general-purpose *StatsDatasetOp +// kernels that use TensorFlow functions to represent their logic. For example, +// if the performance were adequate, we might replace this kernel with an +// implementation that executes functions before and after the `GetNext()` call +// on the input, each executing an op that gets the current time and performing +// the subtraction. +class LatencyStatsDatasetOp : public UnaryDatasetOpKernel { + public: + explicit LatencyStatsDatasetOp(OpKernelConstruction* ctx) + : UnaryDatasetOpKernel(ctx) {} + + void MakeDataset(OpKernelContext* ctx, DatasetBase* input, + DatasetBase** output) override { + string tag; + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "tag", &tag)); + *output = new Dataset(input, std::move(tag)); + } + + private: + class Dataset : public DatasetBase { + public: + explicit Dataset(const DatasetBase* input, string tag) + : input_(input), tag_(std::move(tag)) { + input_->Ref(); + } + + ~Dataset() override { input_->Unref(); } + + std::unique_ptr MakeIterator( + const string& prefix) const override { + return std::unique_ptr( + new Iterator({this, strings::StrCat(prefix, "::LatencyStats")})); + } + + const DataTypeVector& output_dtypes() const override { + return input_->output_dtypes(); + } + const std::vector& output_shapes() const override { + return input_->output_shapes(); + } + + string DebugString() override { return "LatencyStatsDatasetOp::Dataset"; } + + private: + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params& params) + : DatasetIterator(params), + input_impl_(params.dataset->input_->MakeIterator(params.prefix)) {} + + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + uint64 start = ctx->env()->NowMicros(); + Status s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence); + uint64 end = ctx->env()->NowMicros(); + auto stats_aggregator = ctx->stats_aggregator(); + if (stats_aggregator && !*end_of_sequence) { + ctx->stats_aggregator()->AddToHistogram( + dataset()->tag_, {static_cast(end - start)}); + } + return s; + } + + private: + const std::unique_ptr input_impl_; + }; + + const DatasetBase* const input_; + const string tag_; + }; +}; + +class BytesProducedStatsDatasetOp : public UnaryDatasetOpKernel { + public: + explicit BytesProducedStatsDatasetOp(OpKernelConstruction* ctx) + : UnaryDatasetOpKernel(ctx) {} + + void MakeDataset(OpKernelContext* ctx, DatasetBase* input, + DatasetBase** output) override { + string tag; + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "tag", &tag)); + *output = new Dataset(input, std::move(tag)); + } + + private: + class Dataset : public DatasetBase { + public: + explicit Dataset(const DatasetBase* input, string tag) + : input_(input), tag_(std::move(tag)) { + input_->Ref(); + } + + ~Dataset() override { input_->Unref(); } + + std::unique_ptr MakeIterator( + const string& prefix) const override { + return std::unique_ptr(new Iterator( + {this, strings::StrCat(prefix, "::BytesProducedStats")})); + } + + const DataTypeVector& output_dtypes() const override { + return input_->output_dtypes(); + } + const std::vector& output_shapes() const override { + return input_->output_shapes(); + } + + string DebugString() override { + return "BytesProducedStatsDatasetOp::Dataset"; + } + + private: + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params& params) + : DatasetIterator(params), + input_impl_(params.dataset->input_->MakeIterator(params.prefix)) {} + + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + Status s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence); + auto stats_aggregator = ctx->stats_aggregator(); + if (stats_aggregator && s.ok() && !*end_of_sequence) { + size_t total_bytes = 0; + for (const Tensor& t : *out_tensors) { + total_bytes += t.TotalBytes(); + } + ctx->stats_aggregator()->AddToHistogram( + dataset()->tag_, {static_cast(total_bytes)}); + } + return s; + } + + private: + const std::unique_ptr input_impl_; + }; + + const DatasetBase* const input_; + const string tag_; + }; +}; + +REGISTER_KERNEL_BUILDER(Name("LatencyStatsDataset").Device(DEVICE_CPU), + LatencyStatsDatasetOp); +REGISTER_KERNEL_BUILDER(Name("BytesProducedStatsDataset").Device(DEVICE_CPU), + BytesProducedStatsDatasetOp); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index f512213964..6bf226e7a5 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -151,6 +151,28 @@ REGISTER_OP("IgnoreErrorsDataset") Creates a dataset that contains the elements of `input_dataset` ignoring errors. )doc"); +REGISTER_OP("BytesProducedStatsDataset") + .Input("input_dataset: variant") + .Input("tag: string") + .Output("handle: variant") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Records the bytes size of each element of `input_dataset` in a StatsAggregator. +)doc"); + +REGISTER_OP("LatencyStatsDataset") + .Input("input_dataset: variant") + .Input("tag: string") + .Output("handle: variant") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Records the latency of producing `input_dataset` elements in a StatsAggregator. +)doc"); + REGISTER_OP("MapDataset") .Input("input_dataset: variant") .Input("other_arguments: Targuments") @@ -744,4 +766,29 @@ serialized: A variant tensor storing the state of the iterator contained in the resource. )doc"); +REGISTER_OP("StatsAggregatorHandle") + .Output("handle: resource") + .SetShapeFn(shape_inference::ScalarShape) + .Attr("container: string = ''") + .Attr("shared_name: string = ''") + .Doc(R"doc( +Creates a statistics manager resource. +)doc"); + +REGISTER_OP("IteratorSetStatsAggregator") + .Input("iterator_handle: resource") + .Input("stats_aggregator_handle: resource") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Associates the given iterator with the given statistics aggregator. +)doc"); + +REGISTER_OP("StatsAggregatorSummary") + .Input("iterator: resource") + .Output("summary: string") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Produces a summary of any statistics recorded by the given statistics manager. +)doc"); + } // namespace tensorflow -- GitLab From 929178e1046f6387d9245c3d89ba5c3c1f3078d5 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Thu, 16 Nov 2017 19:11:55 -0800 Subject: [PATCH 0553/1801] Add documentation for how to get input names for input_fn for keras model converted estimator. PiperOrigin-RevId: 176057647 --- .../docs_src/programmers_guide/estimators.md | 26 ++++++++++++++++--- .../python/keras/_impl/keras/estimator.py | 3 +++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/estimators.md b/tensorflow/docs_src/programmers_guide/estimators.md index d465679817..6544a16f2b 100644 --- a/tensorflow/docs_src/programmers_guide/estimators.md +++ b/tensorflow/docs_src/programmers_guide/estimators.md @@ -166,11 +166,29 @@ keras_inception_v3 = tf.keras.applications.inception_v3.InceptionV3(weights=None keras_inception_v3.compile(optimizer=tf.keras.optimizers.SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metric='accuracy') -# Create an Estimator from the compiled Keras model. +# Create an Estimator from the compiled Keras model. Note the initial model +# state of the keras model is preserved in the created Estimator. est_inception_v3 = tf.keras.estimator.model_to_estimator(keras_model=keras_inception_v3) -# Treat the derived Estimator as you would any other Estimator. For example, -# the following derived Estimator calls the train method: -est_inception_v3.train(input_fn=my_training_set, steps=2000) + +# Treat the derived Estimator as you would with any other Estimator. +# First, recover the input name(s) of Keras model, so we can use them as the +# feature column name(s) of the Estimator input function: +keras_inception_v3.input_names # print out: ['input_1'] +# Once we have the input name(s), we can create the input function, for example, +# for input(s) in the format of numpy ndarray: +train_input_fn = tf.estimator.inputs.numpy_input_fn( + x={"input_1": train_data}, + y=train_labels, + num_epochs=1, + shuffle=False) +# To train, we call Estimator's train function: +est_inception_v3.train(input_fn=train_input_fn, steps=2000) ``` +Note that the names of feature columns and labels of a keras estimator come from +the corresponding compiled keras model. For example, the input key names for +@{$get_started/input_fn} in above `est_inception_v3` estimator can be obtained +from `keras_inception_v3.input_names`, and similarily, the predicted output +names can be obtained from `keras_inception_v3.output_names`. + For more details, please refer to the documentation for @{tf.keras.estimator.model_to_estimator}. diff --git a/tensorflow/python/keras/_impl/keras/estimator.py b/tensorflow/python/keras/_impl/keras/estimator.py index 125e63e1b8..2e931769c7 100644 --- a/tensorflow/python/keras/_impl/keras/estimator.py +++ b/tensorflow/python/keras/_impl/keras/estimator.py @@ -232,6 +232,9 @@ def model_to_estimator(keras_model=None, config=None): """Constructs an `Estimator` instance from given keras model. + For usage example, please see + @{$programmers_guide/estimators$creating_estimators_from_keras_models}. + Args: keras_model: Keras model in memory. keras_model_path: Directory to a keras model on disk. -- GitLab From 7d17d27940aa915583b0b3e2ba77d9f708af6783 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Thu, 16 Nov 2017 19:30:05 -0800 Subject: [PATCH 0554/1801] Add WriteScalar support to SummaryDbWriter PiperOrigin-RevId: 176058700 --- tensorflow/contrib/summary/summary_ops.py | 22 ++++- .../tensorboard/db/summary_db_writer.cc | 81 ++++++++++++++----- .../tensorboard/db/summary_db_writer_test.cc | 27 +++++++ 3 files changed, 109 insertions(+), 21 deletions(-) diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py index a72c0c80aa..bf810744a1 100644 --- a/tensorflow/contrib/summary/summary_ops.py +++ b/tensorflow/contrib/summary/summary_ops.py @@ -364,16 +364,34 @@ def generic(name, tensor, metadata=None, family=None, global_step=None): def scalar(name, tensor, family=None, global_step=None): - """Writes a scalar summary if possible.""" + """Writes a scalar summary if possible. + + Unlike @{tf.contrib.summary.generic} this op may change the dtype + depending on the writer, for both practical and efficiency concerns. + + Args: + name: An arbitrary name for this summary. + tensor: A @{tf.Tensor} Must be one of the following types: + `float32`, `float64`, `int32`, `int64`, `uint8`, `int16`, + `int8`, `uint16`, `half`, `uint32`, `uint64`. + family: Optional, the summary's family. + global_step: The `int64` monotonic step variable, which defaults + to @{tf.train.get_global_step}. + + Returns: + The created @{tf.Operation} or a @{tf.no_op} if summary writing has + not been enabled for this context. + """ if global_step is None: global_step = training_util.get_global_step() + else: + global_step = ops.convert_to_tensor(global_step, dtypes.int64) def function(tag, scope): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_scalar_summary( context.context().summary_writer_resource, global_step, tag, array_ops.identity(tensor), name=scope) - return summary_writer_function(name, tensor, function, family=family) diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc index ae063d24ef..857e731ef2 100644 --- a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc +++ b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc @@ -81,6 +81,55 @@ Status BindTensor(SqliteStatement* stmt, int parameter, const Tensor& t) { return BindProto(stmt, parameter, p); } +// Tries to fudge shape and dtype to something with smaller storage. +Status CoerceScalar(const Tensor& t, Tensor* out) { + switch (t.dtype()) { + case DT_DOUBLE: + *out = t; + break; + case DT_INT64: + *out = t; + break; + case DT_FLOAT: + *out = {DT_DOUBLE, {}}; + out->scalar()() = t.scalar()(); + break; + case DT_HALF: + *out = {DT_DOUBLE, {}}; + out->scalar()() = static_cast(t.scalar()()); + break; + case DT_INT32: + *out = {DT_INT64, {}}; + out->scalar()() = t.scalar()(); + break; + case DT_INT16: + *out = {DT_INT64, {}}; + out->scalar()() = t.scalar()(); + break; + case DT_INT8: + *out = {DT_INT64, {}}; + out->scalar()() = t.scalar()(); + break; + case DT_UINT32: + *out = {DT_INT64, {}}; + out->scalar()() = t.scalar()(); + break; + case DT_UINT16: + *out = {DT_INT64, {}}; + out->scalar()() = t.scalar()(); + break; + case DT_UINT8: + *out = {DT_INT64, {}}; + out->scalar()() = t.scalar()(); + break; + default: + return errors::Unimplemented("Scalar summary for dtype ", + DataTypeString(t.dtype()), + " is not supported."); + } + return Status::OK(); +} + class Transactor { public: explicit Transactor(std::shared_ptr db) @@ -280,20 +329,23 @@ class SummaryDbWriter : public SummaryWriterInterface { insert_tensor_.BindInt(1, tag_id); insert_tensor_.BindInt(2, global_step); insert_tensor_.BindDouble(3, GetWallTime(env_)); - switch (t.dtype()) { - case DT_INT64: - insert_tensor_.BindInt(4, t.scalar()()); - break; - case DT_DOUBLE: - insert_tensor_.BindDouble(4, t.scalar()()); - break; - default: - TF_RETURN_IF_ERROR(BindTensor(&insert_tensor_, 4, t)); - break; + if (t.shape().dims() == 0 && t.dtype() == DT_INT64) { + insert_tensor_.BindInt(4, t.scalar()()); + } else if (t.shape().dims() == 0 && t.dtype() == DT_DOUBLE) { + insert_tensor_.BindDouble(4, t.scalar()()); + } else { + TF_RETURN_IF_ERROR(BindTensor(&insert_tensor_, 4, t)); } return insert_tensor_.StepAndReset(); } + Status WriteScalar(int64 global_step, Tensor t, const string& tag) override { + Tensor t2; + TF_RETURN_IF_ERROR(CoerceScalar(t, &t2)); + // TODO(jart): Generate scalars plugin metadata on this value. + return WriteTensor(global_step, std::move(t2), tag, ""); + } + Status WriteGraph(int64 global_step, std::unique_ptr g) override { mutex_lock ml(mu_); TF_RETURN_IF_ERROR(InitializeParents()); @@ -325,15 +377,6 @@ class SummaryDbWriter : public SummaryWriterInterface { } } - Status WriteScalar(int64 global_step, Tensor t, const string& tag) override { - // TODO(@jart): Unlike WriteTensor, this method would be granted leniency - // to change the dtype if it saves storage space. For example, - // DT_UINT32 would be stored in the database as an INTEGER - // rather than a serialized BLOB. But when reading it back, - // the dtype would become DT_INT64. - return errors::Unimplemented("WriteScalar"); - } - Status WriteHistogram(int64 global_step, Tensor t, const string& tag) override { return errors::Unimplemented( diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc index 3431842ca2..625861fa6b 100644 --- a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc +++ b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc @@ -290,5 +290,32 @@ TEST_F(SummaryDbWriterTest, WriteGraph) { EXPECT_EQ(1LL, QueryInt("SELECT is_control FROM NodeInputs WHERE idx = 2")); } +TEST_F(SummaryDbWriterTest, WriteScalarInt32_CoercesToInt64) { + TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "", "", &env_, &writer_)); + Tensor t(DT_INT32, {}); + t.scalar()() = -17; + TF_ASSERT_OK(writer_->WriteScalar(1, t, "t")); + TF_ASSERT_OK(writer_->Flush()); + ASSERT_EQ(-17LL, QueryInt("SELECT tensor FROM Tensors")); +} + +TEST_F(SummaryDbWriterTest, WriteScalarInt8_CoercesToInt64) { + TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "", "", &env_, &writer_)); + Tensor t(DT_INT8, {}); + t.scalar()() = static_cast(-17); + TF_ASSERT_OK(writer_->WriteScalar(1, t, "t")); + TF_ASSERT_OK(writer_->Flush()); + ASSERT_EQ(-17LL, QueryInt("SELECT tensor FROM Tensors")); +} + +TEST_F(SummaryDbWriterTest, WriteScalarUint8_CoercesToInt64) { + TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "", "", &env_, &writer_)); + Tensor t(DT_UINT8, {}); + t.scalar()() = static_cast(254); + TF_ASSERT_OK(writer_->WriteScalar(1, t, "t")); + TF_ASSERT_OK(writer_->Flush()); + ASSERT_EQ(254LL, QueryInt("SELECT tensor FROM Tensors")); +} + } // namespace } // namespace tensorflow -- GitLab From 2397a7f375ab91b071126948dcae7abd3e775d3f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 19:34:35 -0800 Subject: [PATCH 0555/1801] Update ops-related pbtxt files. PiperOrigin-RevId: 176059000 --- .../core/ops/compat/ops_history.v1.pbtxt | 100 +++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 105 ++++++++++++++++++ 2 files changed, 205 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index ffb608d600..daeb6763c8 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -6059,6 +6059,33 @@ op { type: "list(float)" } } +op { + name: "BytesProducedStatsDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "tag" + type: DT_STRING + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "CTCBeamSearchDecoder" input_arg { @@ -15001,6 +15028,18 @@ op { } is_stateful: true } +op { + name: "IteratorSetStatsAggregator" + input_arg { + name: "iterator_handle" + type: DT_RESOURCE + } + input_arg { + name: "stats_aggregator_handle" + type: DT_RESOURCE + } + is_stateful: true +} op { name: "IteratorToStringHandle" input_arg { @@ -15192,6 +15231,33 @@ op { } } } +op { + name: "LatencyStatsDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "tag" + type: DT_STRING + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "LearnedUnigramCandidateSampler" input_arg { @@ -38418,6 +38484,40 @@ op { } } } +op { + name: "StatsAggregatorHandle" + output_arg { + name: "handle" + type: DT_RESOURCE + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + is_stateful: true +} +op { + name: "StatsAggregatorSummary" + input_arg { + name: "iterator" + type: DT_RESOURCE + } + output_arg { + name: "summary" + type: DT_STRING + } + is_stateful: true +} op { name: "StopGradient" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index d9e3dbdbb7..55a8fc9032 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -4270,6 +4270,34 @@ op { summary: "Bucketizes \'input\' based on \'boundaries\'." description: "For example, if the inputs are\n boundaries = [0, 10, 100]\n input = [[-5, 10000]\n [150, 10]\n [5, 100]]\n\nthen the output will be\n output = [[0, 3]\n [3, 2]\n [1, 3]]" } +op { + name: "BytesProducedStatsDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "tag" + type: DT_STRING + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Records the bytes size of each element of `input_dataset` in a StatsAggregator." +} op { name: "CTCBeamSearchDecoder" input_arg { @@ -11502,6 +11530,19 @@ op { summary: "Gets the next output from the given iterator." is_stateful: true } +op { + name: "IteratorSetStatsAggregator" + input_arg { + name: "iterator_handle" + type: DT_RESOURCE + } + input_arg { + name: "stats_aggregator_handle" + type: DT_RESOURCE + } + summary: "Associates the given iterator with the given statistics aggregator." + is_stateful: true +} op { name: "IteratorToStringHandle" input_arg { @@ -11698,6 +11739,34 @@ op { } summary: "Gradients for Local Response Normalization." } +op { + name: "LatencyStatsDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "tag" + type: DT_STRING + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Records the latency of producing `input_dataset` elements in a StatsAggregator." +} op { name: "LearnedUnigramCandidateSampler" input_arg { @@ -29980,6 +30049,42 @@ op { summary: "Outputs deterministic pseudorandom values from a truncated normal distribution." description: "The generated values follow a normal distribution with mean 0 and standard\ndeviation 1, except that values whose magnitude is more than 2 standard\ndeviations from the mean are dropped and re-picked.\n\nThe outputs are a deterministic function of `shape` and `seed`." } +op { + name: "StatsAggregatorHandle" + output_arg { + name: "handle" + type: DT_RESOURCE + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + summary: "Creates a statistics manager resource." + is_stateful: true +} +op { + name: "StatsAggregatorSummary" + input_arg { + name: "iterator" + type: DT_RESOURCE + } + output_arg { + name: "summary" + type: DT_STRING + } + summary: "Produces a summary of any statistics recorded by the given statistics manager." + is_stateful: true +} op { name: "StopGradient" input_arg { -- GitLab From a764ec152ce8a4ebe6faf42c55a3177182389c9f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 19:45:12 -0800 Subject: [PATCH 0556/1801] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 176059622 --- tensorflow/go/op/wrappers.go | 102 +++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index a910b51fb9..e650d25a32 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -5334,6 +5334,21 @@ func DynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged return op.Output(0) } +// Produces a summary of any statistics recorded by the given statistics manager. +func StatsAggregatorSummary(scope *Scope, iterator tf.Output) (summary tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "StatsAggregatorSummary", + Input: []tf.Input{ + iterator, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // FIFOQueueV2Attr is an optional argument to FIFOQueueV2. type FIFOQueueV2Attr func(optionalAttr) @@ -5950,6 +5965,23 @@ func DeserializeIterator(scope *Scope, resource_handle tf.Output, serialized tf. return scope.AddOperation(opspec) } +// Records the latency of producing `input_dataset` elements in a StatsAggregator. +func LatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "LatencyStatsDataset", + Input: []tf.Input{ + input_dataset, tag, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Concatenates tensors along one dimension. // // Arguments: @@ -6146,6 +6178,43 @@ func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_ou return op.Output(0) } +// StatsAggregatorHandleAttr is an optional argument to StatsAggregatorHandle. +type StatsAggregatorHandleAttr func(optionalAttr) + +// StatsAggregatorHandleContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func StatsAggregatorHandleContainer(value string) StatsAggregatorHandleAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// StatsAggregatorHandleSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func StatsAggregatorHandleSharedName(value string) StatsAggregatorHandleAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Creates a statistics manager resource. +func StatsAggregatorHandle(scope *Scope, optional ...StatsAggregatorHandleAttr) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StatsAggregatorHandle", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // CropAndResizeGradBoxesAttr is an optional argument to CropAndResizeGradBoxes. type CropAndResizeGradBoxesAttr func(optionalAttr) @@ -19067,6 +19136,22 @@ func ReadVariableOp(scope *Scope, resource tf.Output, dtype tf.DataType) (value return op.Output(0) } +// Associates the given iterator with the given statistics aggregator. +// +// Returns the created operation. +func IteratorSetStatsAggregator(scope *Scope, iterator_handle tf.Output, stats_aggregator_handle tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IteratorSetStatsAggregator", + Input: []tf.Input{ + iterator_handle, stats_aggregator_handle, + }, + } + return scope.AddOperation(opspec) +} + // ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2. type ResourceSparseApplyFtrlV2Attr func(optionalAttr) @@ -24785,6 +24870,23 @@ func Save(scope *Scope, filename tf.Output, tensor_names tf.Output, data []tf.Ou return scope.AddOperation(opspec) } +// Records the bytes size of each element of `input_dataset` in a StatsAggregator. +func BytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "BytesProducedStatsDataset", + Input: []tf.Input{ + input_dataset, tag, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // QrAttr is an optional argument to Qr. type QrAttr func(optionalAttr) -- GitLab From 10581c8afee392f2455acb700ece8217a3a19a4b Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Thu, 16 Nov 2017 20:50:28 -0800 Subject: [PATCH 0557/1801] Rename global_step -> step in contrib/summary API Since it's more succinct and the API doesn't actually care if the provided step is the one true global step. PiperOrigin-RevId: 176063779 --- tensorflow/contrib/summary/summary_ops.py | 72 ++++++++++--------- .../contrib/summary/summary_ops_test.py | 4 +- tensorflow/core/kernels/summary_kernels.cc | 40 +++++------ tensorflow/core/ops/summary_ops.cc | 24 +++---- 4 files changed, 73 insertions(+), 67 deletions(-) diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py index bf810744a1..3e65f83051 100644 --- a/tensorflow/contrib/summary/summary_ops.py +++ b/tensorflow/contrib/summary/summary_ops.py @@ -344,10 +344,9 @@ def summary_writer_function(name, tensor, function, family=None): return op -def generic(name, tensor, metadata=None, family=None, global_step=None): +def generic(name, tensor, metadata=None, family=None, step=None): """Writes a tensor summary if possible.""" - if global_step is None: - global_step = training_util.get_global_step() + def function(tag, scope): if metadata is None: serialized_metadata = constant_op.constant("") @@ -358,12 +357,15 @@ def generic(name, tensor, metadata=None, family=None, global_step=None): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_summary( context.context().summary_writer_resource, - global_step, array_ops.identity(tensor), - tag, serialized_metadata, name=scope) + _choose_step(step), + array_ops.identity(tensor), + tag, + serialized_metadata, + name=scope) return summary_writer_function(name, tensor, function, family=family) -def scalar(name, tensor, family=None, global_step=None): +def scalar(name, tensor, family=None, step=None): """Writes a scalar summary if possible. Unlike @{tf.contrib.summary.generic} this op may change the dtype @@ -375,68 +377,68 @@ def scalar(name, tensor, family=None, global_step=None): `float32`, `float64`, `int32`, `int64`, `uint8`, `int16`, `int8`, `uint16`, `half`, `uint32`, `uint64`. family: Optional, the summary's family. - global_step: The `int64` monotonic step variable, which defaults + step: The `int64` monotonic step variable, which defaults to @{tf.train.get_global_step}. Returns: The created @{tf.Operation} or a @{tf.no_op} if summary writing has not been enabled for this context. """ - if global_step is None: - global_step = training_util.get_global_step() - else: - global_step = ops.convert_to_tensor(global_step, dtypes.int64) + def function(tag, scope): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_scalar_summary( context.context().summary_writer_resource, - global_step, tag, array_ops.identity(tensor), + _choose_step(step), + tag, + array_ops.identity(tensor), name=scope) + return summary_writer_function(name, tensor, function, family=family) -def histogram(name, tensor, family=None, global_step=None): +def histogram(name, tensor, family=None, step=None): """Writes a histogram summary if possible.""" - if global_step is None: - global_step = training_util.get_global_step() + def function(tag, scope): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_histogram_summary( context.context().summary_writer_resource, - global_step, tag, array_ops.identity(tensor), + _choose_step(step), + tag, + array_ops.identity(tensor), name=scope) return summary_writer_function(name, tensor, function, family=family) -def image(name, tensor, bad_color=None, max_images=3, family=None, - global_step=None): +def image(name, tensor, bad_color=None, max_images=3, family=None, step=None): """Writes an image summary if possible.""" - if global_step is None: - global_step = training_util.get_global_step() + def function(tag, scope): bad_color_ = (constant_op.constant([255, 0, 0, 255], dtype=dtypes.uint8) if bad_color is None else bad_color) # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_image_summary( context.context().summary_writer_resource, - global_step, tag, array_ops.identity(tensor), + _choose_step(step), + tag, + array_ops.identity(tensor), bad_color_, - max_images, name=scope) + max_images, + name=scope) return summary_writer_function(name, tensor, function, family=family) -def audio(name, tensor, sample_rate, max_outputs, family=None, - global_step=None): +def audio(name, tensor, sample_rate, max_outputs, family=None, step=None): """Writes an audio summary if possible.""" - if global_step is None: - global_step = training_util.get_global_step() + def function(tag, scope): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_audio_summary( context.context().summary_writer_resource, - global_step, + _choose_step(step), tag, array_ops.identity(tensor), sample_rate=sample_rate, @@ -483,15 +485,13 @@ def graph(param, step=None, name=None): if writer is None: return control_flow_ops.no_op() with ops.device("cpu:0"): - if step is None: - step = training_util.get_global_step() - else: - step = ops.convert_to_tensor(step, dtypes.int64) if isinstance(param, (ops.Graph, graph_pb2.GraphDef)): tensor = ops.convert_to_tensor(_serialize_graph(param), dtypes.string) else: tensor = array_ops.identity(param) - return gen_summary_ops.write_graph_summary(writer, step, tensor, name=name) + return gen_summary_ops.write_graph_summary( + writer, _choose_step(step), tensor, name=name) + _graph = graph # for functions with a graph parameter @@ -527,3 +527,11 @@ def _serialize_graph(arbitrary_graph): return arbitrary_graph.as_graph_def(add_shapes=True).SerializeToString() else: return arbitrary_graph.SerializeToString() + + +def _choose_step(step): + if step is None: + return training_util.get_global_step() + if not isinstance(step, ops.Tensor): + return ops.convert_to_tensor(step, dtypes.int64) + return step diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py index c5ca054f77..ad89c0c36a 100644 --- a/tensorflow/contrib/summary/summary_ops_test.py +++ b/tensorflow/contrib/summary/summary_ops_test.py @@ -97,13 +97,13 @@ class TargetTest(test_util.TensorFlowTestCase): self.assertEqual(events[1].summary.value[0].tag, 'scalar') def testSummaryGlobalStep(self): - global_step = training_util.get_or_create_global_step() + step = training_util.get_or_create_global_step() logdir = tempfile.mkdtemp() with summary_ops.create_summary_file_writer( logdir, max_queue=0, name='t2').as_default(), summary_ops.always_record_summaries(): - summary_ops.scalar('scalar', 2.0, global_step=global_step) + summary_ops.scalar('scalar', 2.0, step=step) events = summary_test_util.events_from_logdir(logdir) self.assertEqual(len(events), 2) diff --git a/tensorflow/core/kernels/summary_kernels.cc b/tensorflow/core/kernels/summary_kernels.cc index 3706f51cf4..7487e70acc 100644 --- a/tensorflow/core/kernels/summary_kernels.cc +++ b/tensorflow/core/kernels/summary_kernels.cc @@ -111,8 +111,8 @@ class WriteSummaryOp : public OpKernel { OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); core::ScopedUnref unref(s); const Tensor* tmp; - OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp)); - const int64 global_step = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("step", &tmp)); + const int64 step = tmp->scalar()(); OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp)); const string& tag = tmp->scalar()(); OP_REQUIRES_OK(ctx, ctx->input("summary_metadata", &tmp)); @@ -121,8 +121,7 @@ class WriteSummaryOp : public OpKernel { const Tensor* t; OP_REQUIRES_OK(ctx, ctx->input("tensor", &t)); - OP_REQUIRES_OK(ctx, - s->WriteTensor(global_step, *t, tag, serialized_metadata)); + OP_REQUIRES_OK(ctx, s->WriteTensor(step, *t, tag, serialized_metadata)); } }; REGISTER_KERNEL_BUILDER(Name("WriteSummary").Device(DEVICE_CPU), @@ -158,15 +157,15 @@ class WriteScalarSummaryOp : public OpKernel { OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); core::ScopedUnref unref(s); const Tensor* tmp; - OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp)); - const int64 global_step = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("step", &tmp)); + const int64 step = tmp->scalar()(); OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp)); const string& tag = tmp->scalar()(); const Tensor* t; OP_REQUIRES_OK(ctx, ctx->input("value", &t)); - OP_REQUIRES_OK(ctx, s->WriteScalar(global_step, *t, tag)); + OP_REQUIRES_OK(ctx, s->WriteScalar(step, *t, tag)); } }; REGISTER_KERNEL_BUILDER(Name("WriteScalarSummary").Device(DEVICE_CPU), @@ -181,15 +180,15 @@ class WriteHistogramSummaryOp : public OpKernel { OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); core::ScopedUnref unref(s); const Tensor* tmp; - OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp)); - const int64 global_step = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("step", &tmp)); + const int64 step = tmp->scalar()(); OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp)); const string& tag = tmp->scalar()(); const Tensor* t; OP_REQUIRES_OK(ctx, ctx->input("values", &t)); - OP_REQUIRES_OK(ctx, s->WriteHistogram(global_step, *t, tag)); + OP_REQUIRES_OK(ctx, s->WriteHistogram(step, *t, tag)); } }; REGISTER_KERNEL_BUILDER(Name("WriteHistogramSummary").Device(DEVICE_CPU), @@ -210,8 +209,8 @@ class WriteImageSummaryOp : public OpKernel { OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); core::ScopedUnref unref(s); const Tensor* tmp; - OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp)); - const int64 global_step = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("step", &tmp)); + const int64 step = tmp->scalar()(); OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp)); const string& tag = tmp->scalar()(); const Tensor* bad_color; @@ -224,8 +223,7 @@ class WriteImageSummaryOp : public OpKernel { const Tensor* t; OP_REQUIRES_OK(ctx, ctx->input("tensor", &t)); - OP_REQUIRES_OK( - ctx, s->WriteImage(global_step, *t, tag, max_images_, *bad_color)); + OP_REQUIRES_OK(ctx, s->WriteImage(step, *t, tag, max_images_, *bad_color)); } private: @@ -247,8 +245,8 @@ class WriteAudioSummaryOp : public OpKernel { OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); core::ScopedUnref unref(s); const Tensor* tmp; - OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp)); - const int64 global_step = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("step", &tmp)); + const int64 step = tmp->scalar()(); OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp)); const string& tag = tmp->scalar()(); OP_REQUIRES_OK(ctx, ctx->input("sample_rate", &tmp)); @@ -257,8 +255,8 @@ class WriteAudioSummaryOp : public OpKernel { const Tensor* t; OP_REQUIRES_OK(ctx, ctx->input("tensor", &t)); - OP_REQUIRES_OK( - ctx, s->WriteAudio(global_step, *t, tag, max_outputs_, sample_rate)); + OP_REQUIRES_OK(ctx, + s->WriteAudio(step, *t, tag, max_outputs_, sample_rate)); } private: @@ -278,8 +276,8 @@ class WriteGraphSummaryOp : public OpKernel { OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); core::ScopedUnref unref(s); const Tensor* t; - OP_REQUIRES_OK(ctx, ctx->input("global_step", &t)); - const int64 global_step = t->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("step", &t)); + const int64 step = t->scalar()(); OP_REQUIRES_OK(ctx, ctx->input("tensor", &t)); std::unique_ptr graph{new GraphDef}; if (!ParseProtoUnlimited(graph.get(), t->scalar()())) { @@ -287,7 +285,7 @@ class WriteGraphSummaryOp : public OpKernel { errors::DataLoss("Bad tf.GraphDef binary proto tensor string")); return; } - OP_REQUIRES_OK(ctx, s->WriteGraph(global_step, std::move(graph))); + OP_REQUIRES_OK(ctx, s->WriteGraph(step, std::move(graph))); } }; REGISTER_KERNEL_BUILDER(Name("WriteGraphSummary").Device(DEVICE_CPU), diff --git a/tensorflow/core/ops/summary_ops.cc b/tensorflow/core/ops/summary_ops.cc index 7f6d8b06cd..029ff09906 100644 --- a/tensorflow/core/ops/summary_ops.cc +++ b/tensorflow/core/ops/summary_ops.cc @@ -99,7 +99,7 @@ writer: A handle to the summary writer resource. REGISTER_OP("WriteSummary") .Input("writer: resource") - .Input("global_step: int64") + .Input("step: int64") .Input("tensor: T") .Input("tag: string") .Input("summary_metadata: string") @@ -109,7 +109,7 @@ REGISTER_OP("WriteSummary") Outputs a `Summary` protocol buffer with a tensor. writer: A handle to a summary writer. -global_step: The step to write the summary for. +step: The step to write the summary for. tensor: A tensor to serialize. tag: The summary's tag. summary_metadata: Serialized SummaryMetadata protocol buffer containing @@ -132,7 +132,7 @@ event: A string containing a binary-encoded tf.Event proto. REGISTER_OP("WriteScalarSummary") .Input("writer: resource") - .Input("global_step: int64") + .Input("step: int64") .Input("tag: string") .Input("value: T") .Attr("T: realnumbertype") @@ -143,14 +143,14 @@ Writes a `Summary` protocol buffer with scalar values. The input `tag` and `value` must have the scalars. writer: A handle to a summary writer. -global_step: The step to write the summary for. +step: The step to write the summary for. tag: Tag for the summary. value: Value for the summary. )doc"); REGISTER_OP("WriteHistogramSummary") .Input("writer: resource") - .Input("global_step: int64") + .Input("step: int64") .Input("tag: string") .Input("values: T") .Attr("T: realnumbertype = DT_FLOAT") @@ -165,14 +165,14 @@ has one summary value containing a histogram for `values`. This op reports an `InvalidArgument` error if any value is not finite. writer: A handle to a summary writer. -global_step: The step to write the summary for. +step: The step to write the summary for. tag: Scalar. Tag to use for the `Summary.Value`. values: Any shape. Values to use to build the histogram. )doc"); REGISTER_OP("WriteImageSummary") .Input("writer: resource") - .Input("global_step: int64") + .Input("step: int64") .Input("tag: string") .Input("tensor: T") .Input("bad_color: uint8") @@ -217,7 +217,7 @@ replaced by this tensor in the output image. The default value is the color red. writer: A handle to a summary writer. -global_step: The step to write the summary for. +step: The step to write the summary for. tag: Scalar. Used to build the `tag` attribute of the summary values. tensor: 4-D of shape `[batch_size, height, width, channels]` where `channels` is 1, 3, or 4. @@ -227,7 +227,7 @@ bad_color: Color to use for pixels with non-finite values. REGISTER_OP("WriteAudioSummary") .Input("writer: resource") - .Input("global_step: int64") + .Input("step: int64") .Input("tag: string") .Input("tensor: float") .Input("sample_rate: float") @@ -249,7 +249,7 @@ build the `tag` of the summary values: generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. writer: A handle to a summary writer. -global_step: The step to write the summary for. +step: The step to write the summary for. tag: Scalar. Used to build the `tag` attribute of the summary values. tensor: 2-D of shape `[batch_size, frames]`. sample_rate: The sample rate of the signal in hertz. @@ -258,14 +258,14 @@ max_outputs: Max number of batch elements to generate audio for. REGISTER_OP("WriteGraphSummary") .Input("writer: resource") - .Input("global_step: int64") + .Input("step: int64") .Input("tensor: string") .SetShapeFn(shape_inference::NoOutputs) .Doc(R"doc( Writes a `GraphDef` protocol buffer to a `SummaryWriter`. writer: Handle of `SummaryWriter`. -global_step: The step to write the summary for. +step: The step to write the summary for. tensor: A scalar string of the serialized tf.GraphDef proto. )doc"); -- GitLab From 7a2a3b40d518baa0c9bc4231df434fa09857cee4 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Thu, 16 Nov 2017 21:04:50 -0800 Subject: [PATCH 0558/1801] [XLA] Rematerialization and fusion need to consider nested computations to determine if rematerializablity or Fusabiltiy. PiperOrigin-RevId: 176064783 --- .../compiler/xla/service/hlo_instruction.cc | 18 +++--------------- .../xla/service/hlo_rematerialization.cc | 9 +-------- 2 files changed, 4 insertions(+), 23 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index a0795a7b36..e3fdc53b7f 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -793,7 +793,7 @@ HloInstruction* HloInstruction::FuseInstructionInternal( HloInstruction* HloInstruction::CloneAndFuseInternal( HloInstruction* instruction_to_fuse, bool add_output) { CHECK_EQ(opcode_, HloOpcode::kFusion); - CHECK(instruction_to_fuse->IsFusable()); + CHECK(instruction_to_fuse->IsFusable()) << instruction_to_fuse->ToString(); VLOG(3) << "CloneAndFuseInternal:\n" << instruction_to_fuse->ToString(); HloInstruction* clone = nullptr; if (called_computations_.empty()) { @@ -2134,25 +2134,13 @@ bool HloInstruction::IsFusable() const { if (tracing()) { return false; } - // Some kinds of instructions don't make sense to fuse. switch (opcode_) { - case HloOpcode::kInfeed: - case HloOpcode::kOutfeed: case HloOpcode::kParameter: - case HloOpcode::kTrace: - case HloOpcode::kRecv: - case HloOpcode::kRecvDone: - case HloOpcode::kSend: - case HloOpcode::kSendDone: return false; - // Only fuse Rng if it is used once, otherwise the random numbers generated - // will be different in each fusion. If it is the root (user count = 0) - // then it is the equivalent of having one user. - case HloOpcode::kRng: - return users_.size() <= 1; + // Side effecting instrutions cannot be fused. default: - return true; + return !HasSideEffect(); } } diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc index 828be8490c..017f996bc4 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc @@ -62,18 +62,11 @@ bool IsRematerializable(const HloInstruction* instruction) { case HloOpcode::kConstant: case HloOpcode::kCrossReplicaSum: case HloOpcode::kCustomCall: - case HloOpcode::kOutfeed: - case HloOpcode::kInfeed: case HloOpcode::kParameter: - case HloOpcode::kRecv: - case HloOpcode::kRecvDone: - case HloOpcode::kSend: - case HloOpcode::kSendDone: - case HloOpcode::kTrace: case HloOpcode::kWhile: return false; default: - return true; + return !instruction->HasSideEffect(); } } -- GitLab From 389d4001261df5a0f0db1ed869e2c72fefb2297e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 21:13:31 -0800 Subject: [PATCH 0559/1801] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 176065246 --- tensorflow/go/op/wrappers.go | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index e650d25a32..1d1383ec82 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -44,19 +44,19 @@ func makeOutputList(op *tf.Operation, start int, output string) ([]tf.Output, in // // Arguments: // writer: A handle to a summary writer. -// global_step: The step to write the summary for. +// step: The step to write the summary for. // tag: Tag for the summary. // value: Value for the summary. // // Returns the created operation. -func WriteScalarSummary(scope *Scope, writer tf.Output, global_step tf.Output, tag tf.Output, value tf.Output) (o *tf.Operation) { +func WriteScalarSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, value tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ Type: "WriteScalarSummary", Input: []tf.Input{ - writer, global_step, tag, value, + writer, step, tag, value, }, } return scope.AddOperation(opspec) @@ -89,21 +89,21 @@ func ImportEvent(scope *Scope, writer tf.Output, event tf.Output) (o *tf.Operati // // Arguments: // writer: A handle to a summary writer. -// global_step: The step to write the summary for. +// step: The step to write the summary for. // tensor: A tensor to serialize. // tag: The summary's tag. // summary_metadata: Serialized SummaryMetadata protocol buffer containing // plugin-related metadata for this summary. // // Returns the created operation. -func WriteSummary(scope *Scope, writer tf.Output, global_step tf.Output, tensor tf.Output, tag tf.Output, summary_metadata tf.Output) (o *tf.Operation) { +func WriteSummary(scope *Scope, writer tf.Output, step tf.Output, tensor tf.Output, tag tf.Output, summary_metadata tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ Type: "WriteSummary", Input: []tf.Input{ - writer, global_step, tensor, tag, summary_metadata, + writer, step, tensor, tag, summary_metadata, }, } return scope.AddOperation(opspec) @@ -2147,19 +2147,19 @@ func ConcatOffset(scope *Scope, concat_dim tf.Output, shape []tf.Output) (offset // // Arguments: // writer: A handle to a summary writer. -// global_step: The step to write the summary for. +// step: The step to write the summary for. // tag: Scalar. Tag to use for the `Summary.Value`. // values: Any shape. Values to use to build the histogram. // // Returns the created operation. -func WriteHistogramSummary(scope *Scope, writer tf.Output, global_step tf.Output, tag tf.Output, values tf.Output) (o *tf.Operation) { +func WriteHistogramSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, values tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ Type: "WriteHistogramSummary", Input: []tf.Input{ - writer, global_step, tag, values, + writer, step, tag, values, }, } return scope.AddOperation(opspec) @@ -11101,13 +11101,13 @@ func WriteAudioSummaryMaxOutputs(value int64) WriteAudioSummaryAttr { // // Arguments: // writer: A handle to a summary writer. -// global_step: The step to write the summary for. +// step: The step to write the summary for. // tag: Scalar. Used to build the `tag` attribute of the summary values. // tensor: 2-D of shape `[batch_size, frames]`. // sample_rate: The sample rate of the signal in hertz. // // Returns the created operation. -func WriteAudioSummary(scope *Scope, writer tf.Output, global_step tf.Output, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...WriteAudioSummaryAttr) (o *tf.Operation) { +func WriteAudioSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...WriteAudioSummaryAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -11118,7 +11118,7 @@ func WriteAudioSummary(scope *Scope, writer tf.Output, global_step tf.Output, ta opspec := tf.OpSpec{ Type: "WriteAudioSummary", Input: []tf.Input{ - writer, global_step, tag, tensor, sample_rate, + writer, step, tag, tensor, sample_rate, }, Attrs: attrs, } @@ -18248,14 +18248,14 @@ func WriteImageSummaryMaxImages(value int64) WriteImageSummaryAttr { // // Arguments: // writer: A handle to a summary writer. -// global_step: The step to write the summary for. +// step: The step to write the summary for. // tag: Scalar. Used to build the `tag` attribute of the summary values. // tensor: 4-D of shape `[batch_size, height, width, channels]` where // `channels` is 1, 3, or 4. // bad_color: Color to use for pixels with non-finite values. // // Returns the created operation. -func WriteImageSummary(scope *Scope, writer tf.Output, global_step tf.Output, tag tf.Output, tensor tf.Output, bad_color tf.Output, optional ...WriteImageSummaryAttr) (o *tf.Operation) { +func WriteImageSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, tensor tf.Output, bad_color tf.Output, optional ...WriteImageSummaryAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -18266,7 +18266,7 @@ func WriteImageSummary(scope *Scope, writer tf.Output, global_step tf.Output, ta opspec := tf.OpSpec{ Type: "WriteImageSummary", Input: []tf.Input{ - writer, global_step, tag, tensor, bad_color, + writer, step, tag, tensor, bad_color, }, Attrs: attrs, } @@ -20657,18 +20657,18 @@ func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { // // Arguments: // writer: Handle of `SummaryWriter`. -// global_step: The step to write the summary for. +// step: The step to write the summary for. // tensor: A scalar string of the serialized tf.GraphDef proto. // // Returns the created operation. -func WriteGraphSummary(scope *Scope, writer tf.Output, global_step tf.Output, tensor tf.Output) (o *tf.Operation) { +func WriteGraphSummary(scope *Scope, writer tf.Output, step tf.Output, tensor tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ Type: "WriteGraphSummary", Input: []tf.Input{ - writer, global_step, tensor, + writer, step, tensor, }, } return scope.AddOperation(opspec) -- GitLab From 724ca9f1a5a7428e74b62c8e2e6061244af93ace Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 07:21:52 -0800 Subject: [PATCH 0560/1801] [XLA] Enable operand buffer aliasing for Call instructions where the unique use of operand in the called computation is the root instruction which is elemenentwise on the operand. This eliminates copies of Call instruction result buffers induced by parallel computation outlining on the XLA:CPU backend. PiperOrigin-RevId: 176106140 --- .../compiler/xla/service/liveness_util.cc | 52 ++++++++++++++++++- .../xla/service/liveness_util_test.cc | 39 ++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/liveness_util.cc b/tensorflow/compiler/xla/service/liveness_util.cc index 53d88eda7a..68c99256a2 100644 --- a/tensorflow/compiler/xla/service/liveness_util.cc +++ b/tensorflow/compiler/xla/service/liveness_util.cc @@ -103,7 +103,7 @@ namespace { // Returns all uses of all aliases of 'instruction' at 'index' in 'uses'. // Each use in 'uses' is a pair (HloInstruction* user, int64 operand_index) -// where 'user' is a user of an alias of 'intruction' at 'index', and +// where 'user' is a user of an alias of 'instruction' at 'index', and // 'operand_index' is the operand index at which the alias appears in the // operand list of 'user'. std::vector> GetAllUsesOfInstructionAtIndex( @@ -243,6 +243,31 @@ bool CanShareOperandBufferWithUser( std::vector operand_indices = user->OperandIndices(operand); return operand_indices.size() == 1 && operand_indices[0] == 0; } + if (user->opcode() == HloOpcode::kCall) { + // TODO(b/62548313): Remove when buffer assignment is module scoped and + // does not assign buffers to calls. + // Find called computation parameter associated with 'operand'. + const std::vector operand_indices = user->OperandIndices(operand); + if (operand_indices.size() > 1) { + return false; + } + CHECK_EQ(1, operand_indices.size()); + auto* param = user->to_apply()->parameter_instruction(operand_indices[0]); + // Get all uses of 'operand' at 'index' in called computation. + auto param_uses = GetAllUsesOfInstructionAtIndex(param, operand_index, + points_to_analysis); + + // Return true iff: + // *) There exists exactly one use of 'operand' in called computation. + // *) The unique use is by the root instruction of called computation. + // (Note: we check the root of the called computation, because the + // root result buffer is required to alias with the Call result buffer). + // *) The root instruction of the called computation is element-wise on + // 'operand'. + auto* callee_root = user->to_apply()->root_instruction(); + return param_uses.size() == 1 && param_uses[0].first == callee_root && + callee_root->IsElementwiseOnOperand(param_uses[0].second); + } // Check if 'user' is element-wise. return user->IsElementwise(); } @@ -322,6 +347,31 @@ bool CanShareOperandBufferWithUser(HloInstruction* operand, std::vector operand_indices = user->OperandIndices(operand); return operand_indices.size() == 1 && operand_indices[0] == 0; } + if (user->opcode() == HloOpcode::kCall) { + // Get all uses of value defined by 'operand' at 'operand_index'. + const auto& uses = + dataflow.GetValueDefinedAt(operand, operand_index).uses(); + // Return true iff: + // *) There exists two uses of 'operand'. + // *) One use is by 'user' (caller). + // *) One use is by root instruction of called computation (callee root). + // (Note: we check the root of the called computation, because the + // root result buffer is required to alias with the Call result buffer). + // *) The root instruction of the called computation is element-wise on + // 'operand'. + const bool found_caller_use = + std::find_if(uses.begin(), uses.end(), [user](const HloUse& use) { + return use.instruction == user; + }) != uses.end(); + auto* callee_root = user->to_apply()->root_instruction(); + const bool found_elementwise_callee_use = + std::find_if( + uses.begin(), uses.end(), [callee_root](const HloUse& use) { + return use.instruction == callee_root && + callee_root->IsElementwiseOnOperand(use.operand_number); + }) != uses.end(); + return uses.size() == 2 && found_caller_use && found_elementwise_callee_use; + } // Check if 'user' is element-wise. return user->IsElementwise(); } diff --git a/tensorflow/compiler/xla/service/liveness_util_test.cc b/tensorflow/compiler/xla/service/liveness_util_test.cc index b5e15906d3..476e86fa72 100644 --- a/tensorflow/compiler/xla/service/liveness_util_test.cc +++ b/tensorflow/compiler/xla/service/liveness_util_test.cc @@ -415,5 +415,44 @@ TEST_F(CanShareOperandBufferWithUserTest, WhileCanShare) { CanShareOperandBufferWithUser(data, {}, whil, {}, *dataflow_analysis_)); } +// Tests that Call can alias operand buffer if the only use of the operand +// in the called computation is an elementwise instruction. +TEST_F(CanShareOperandBufferWithUserTest, CallToComputationWithFusionRoot) { + Shape shape = ShapeUtil::MakeShape(F32, {8}); + // Build sub-computation with fusion root. + auto sub_builder = HloComputation::Builder(TestName() + "_sub"); + auto sub_param = sub_builder.AddInstruction( + HloInstruction::CreateParameter(0, shape, "sub_param")); + auto one = sub_builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(1.0))); + auto ones = sub_builder.AddInstruction( + HloInstruction::CreateBroadcast(shape, one, {1})); + auto add = sub_builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, sub_param, ones)); + + module_ = CreateNewModule(); + auto sub_computation = module_->AddEmbeddedComputation(sub_builder.Build()); + sub_computation->CreateFusionInstruction({add, ones}, + HloInstruction::FusionKind::kLoop); + + // Build entry-computation with kCall which calls 'sub_computation'. + auto builder = HloComputation::Builder(TestName()); + + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, shape, "param")); + auto reverse = + builder.AddInstruction(HloInstruction::CreateReverse(shape, param, {0})); + auto call = builder.AddInstruction( + HloInstruction::CreateCall(shape, {reverse}, sub_computation)); + computation_ = module_->AddEntryComputation(builder.Build()); + + RunAnalysis(); + + EXPECT_TRUE(CanShareOperandBufferWithUser(reverse, {}, call, {}, + *points_to_analysis_)); + EXPECT_TRUE(CanShareOperandBufferWithUser(reverse, {}, call, {}, + *dataflow_analysis_)); +} + } // namespace } // namespace xla -- GitLab From 881f84796f2559c0e7fd8081d7449a214a4cf7ac Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 07:36:46 -0800 Subject: [PATCH 0561/1801] internal change PiperOrigin-RevId: 176107131 --- tensorflow/contrib/lite/toco/format_port.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/toco/format_port.h b/tensorflow/contrib/lite/toco/format_port.h index 3bc3295d04..0e999001e0 100644 --- a/tensorflow/contrib/lite/toco/format_port.h +++ b/tensorflow/contrib/lite/toco/format_port.h @@ -36,7 +36,7 @@ inline const char* IdentityOrConvertStringToRaw(const std::string& foo) { return foo.c_str(); } -#if defined(PLATFORM_GOOGLE) +#if defined(PLATFORM_GOOGLE) && defined(HAS_GLOBAL_STRING) // Overloaded case where we return string. inline const char* IdentityOrConvertStringToRaw(const string& foo) { return foo.c_str(); -- GitLab From 573a652ec5512a35d84d5b4b4400d7430baa854a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 09:26:32 -0800 Subject: [PATCH 0562/1801] Add Speech ASR Language Model test. PiperOrigin-RevId: 176117985 --- .../lite/models/speech_terse_lm_model_test.cc | 122 ++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc diff --git a/tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc b/tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc new file mode 100644 index 0000000000..04c54ffb22 --- /dev/null +++ b/tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc @@ -0,0 +1,122 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Unit test for speech ASR LM model using TFLite Ops. + +#include + +#include +#include + +#include "base/logging.h" +#include "file/base/path.h" +#include "testing/base/public/googletest.h" +#include +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/models/test_utils.h" + +namespace tflite { +namespace models { + +constexpr int kModelInput1Tensor = 0; +constexpr int kModelInput2Tensor = 66; +constexpr int kLstmLayer1OutputStateTensor = 21; +constexpr int kLstmLayer1CellStateTensor = 22; +constexpr int kLstmLayer2OutputStateTensor = 42; +constexpr int kLstmLayer2CellStateTensor = 43; +constexpr int kLstmLayer3OutputStateTensor = 63; +constexpr int kLstmLayer3CellStateTensor = 64; +constexpr int kModelOutputTensor = 75; + +static void ClearLstmStates(Interpreter* interpreter) { + memset(interpreter->tensor(kLstmLayer1OutputStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer1OutputStateTensor)->bytes); + memset(interpreter->tensor(kLstmLayer1CellStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer1CellStateTensor)->bytes); + + memset(interpreter->tensor(kLstmLayer2OutputStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer2OutputStateTensor)->bytes); + memset(interpreter->tensor(kLstmLayer2CellStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer2CellStateTensor)->bytes); + + memset(interpreter->tensor(kLstmLayer3OutputStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer3OutputStateTensor)->bytes); + memset(interpreter->tensor(kLstmLayer3CellStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer3CellStateTensor)->bytes); +} + +TEST(SpeechTerseLm, EndToEndTest) { + // Read the model. + string tflite_file_path = + file::JoinPath(TestDataPath(), "speech_terse_lm_model.tflite"); + auto model = FlatBufferModel::BuildFromFile(tflite_file_path.c_str()); + CHECK(model) << "Failed to mmap model " << tflite_file_path; + + // Initialize the interpreter. + ops::builtin::BuiltinOpResolver builtins; + std::unique_ptr interpreter; + InterpreterBuilder(*model, builtins)(&interpreter); + CHECK(interpreter != nullptr); + interpreter->AllocateTensors(); + + // Load the input frames. + Frames input_frames; + const string input_file_path = + file::JoinPath(TestDataPath(), "speech_terse_lm_model_in.csv"); + ReadFrames(input_file_path, &input_frames); + + // Load the golden output results. + Frames output_frames; + const string output_file_path = + file::JoinPath(TestDataPath(), "speech_terse_lm_model_out.csv"); + ReadFrames(output_file_path, &output_frames); + + CHECK_EQ(interpreter->tensor(kModelInput1Tensor)->dims->size, 1); + const int input1_size = + interpreter->tensor(kModelInput1Tensor)->dims->data[0]; + CHECK_EQ(input1_size, 1); + CHECK_EQ(interpreter->tensor(kModelInput2Tensor)->dims->size, 1); + const int output_size = + interpreter->tensor(kModelOutputTensor)->dims->data[0]; + CHECK_EQ(output_size, 1); + + int* input_lookup_ptr = interpreter->tensor(kModelInput1Tensor)->data.i32; + int* output_lookup_ptr = interpreter->tensor(kModelInput2Tensor)->data.i32; + float* output_ptr = interpreter->tensor(kModelOutputTensor)->data.f; + + + for (int i = 0; i < input_frames.size(); i++) { + float output_score = 0.0f; + // Reset LSTM states for each sequence. + ClearLstmStates(interpreter.get()); + // For subsequent inputs feed them sequentially, one-by-one. + for (int k = 1; k < input_frames[i].size(); k++) { + // Feed the inputs to model. + input_lookup_ptr[0] = static_cast(input_frames[i][k - 1]); + output_lookup_ptr[0] = static_cast(input_frames[i][k]); + // Run the model. + interpreter->Invoke(); + // Sum up the outputs. + output_score += output_ptr[0]; + } + // Validate the output. + ASSERT_NEAR(output_score, output_frames[i][0], 1.4e-5); + } +} + +} // namespace models +} // namespace tflite -- GitLab From be4295e796437d18ffb7242942c963a8857e5003 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 17 Nov 2017 10:10:37 -0800 Subject: [PATCH 0563/1801] Created new shared IsFreeOfSideEffect and ModifiedFrameInfo functions. PiperOrigin-RevId: 176124088 --- tensorflow/core/grappler/BUILD | 2 ++ tensorflow/core/grappler/op_types.cc | 28 +++++++++++++++++++ tensorflow/core/grappler/op_types.h | 3 ++ .../optimizers/arithmetic_optimizer.cc | 26 ++++------------- .../optimizers/arithmetic_optimizer.h | 8 ++---- .../optimizers/dependency_optimizer.cc | 9 ++++-- 6 files changed, 48 insertions(+), 28 deletions(-) diff --git a/tensorflow/core/grappler/BUILD b/tensorflow/core/grappler/BUILD index 7b18e79c8d..c81c6c0f21 100644 --- a/tensorflow/core/grappler/BUILD +++ b/tensorflow/core/grappler/BUILD @@ -21,6 +21,8 @@ cc_library( hdrs = ["op_types.h"], visibility = ["//visibility:public"], deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", ], ) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index acb8498142..69bdef33c6 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -14,6 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/lib/core/status.h" namespace tensorflow { namespace grappler { @@ -120,5 +122,31 @@ bool IsVariable(const NodeDef& node) { op == "VarHandleOp" || op == "ReadVariableOp"; } +bool IsFreeOfSideEffect(const NodeDef& node) { + // Placeholders must be preserved to keep the graph feedable. + if (IsPlaceholder(node)) { + return false; + } + const OpDef* op_def = nullptr; + Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def); + if (!status.ok()) { + return false; + } + if (op_def->is_stateful()) { + return false; + } + // Nodes such as Assign or AssignAdd modify one of their inputs. + for (const auto& input : op_def->input_arg()) { + if (input.is_ref()) { + return false; + } + } + return true; +} + +bool ModifiesFrameInfo(const NodeDef& node) { + return IsEnter(node) || IsExit(node) || IsNextIteration(node); +} + } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index 0de954fcb4..a7c556c1ed 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -42,6 +42,9 @@ bool IsSwitch(const NodeDef& node); bool IsTranspose(const NodeDef& node); bool IsVariable(const NodeDef& node); +bool IsFreeOfSideEffect(const NodeDef& node); +bool ModifiesFrameInfo(const NodeDef& node); + } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 0cd0d4351e..2677888fcb 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -512,33 +512,17 @@ bool UniqueNodes::SameNode(const NodeDef& node1, const NodeDef& node2) const { return true; } -// static -bool ArithmeticOptimizer::CanDedup( - const NodeDef& node, const std::unordered_set& nodes_to_preserve) { - if (nodes_to_preserve.find(node.name()) != nodes_to_preserve.end()) { +bool ArithmeticOptimizer::CanDedup(const NodeDef& node) const { + if (nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) { return false; } - if (IsEnter(node) || IsExit(node) || IsPlaceholder(node)) { + if (IsEnter(node) || IsExit(node)) { return false; } if (node.device().find("SPU") != string::npos) { return false; } - const OpDef* op_def = nullptr; - Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def); - if (!status.ok()) { - return false; - } - if (op_def->is_stateful()) { - return false; - } - // Don't consolidate ops such as AssignAdd - for (const auto& input : op_def->input_arg()) { - if (input.is_ref()) { - return false; - } - } - return true; + return IsFreeOfSideEffect(node); } void ArithmeticOptimizer::DedupComputations(GraphDef* optimized_graph) const { @@ -553,7 +537,7 @@ void ArithmeticOptimizer::DedupComputations(GraphDef* optimized_graph) const { continue; } NodeDef* node = optimized_graph->mutable_node(i); - if (!CanDedup(*node, nodes_to_preserve_)) { + if (!CanDedup(*node)) { continue; } NodeDef* rep = nodes.FindOrAddRepresentative(node); diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index c8cc292295..c22e2d5363 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -28,11 +28,6 @@ namespace grappler { // run a model. class ArithmeticOptimizer : public GraphOptimizer { public: - // Returns true if it is safe to dedup node from the graph. - // TODO(rmlarsen): Refactor to op_types.{h,cc}. - static bool CanDedup(const NodeDef& node, - const std::unordered_set& nodes_to_preserve); - ArithmeticOptimizer() : opt_level_(RewriterConfig::ON) {} explicit ArithmeticOptimizer(RewriterConfig::Toggle opt_level) : opt_level_(opt_level) {} @@ -47,6 +42,9 @@ class ArithmeticOptimizer : public GraphOptimizer { const GraphDef& optimized_graph, double result) override; private: + // Returns true if it is safe to dedup node from the graph. + bool CanDedup(const NodeDef& node) const; + void DedupComputations(GraphDef* optimized_graph) const; // Runs peep-hole optimizations on `optimized_graph`, e.g., removing inverse // transposes. diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc index 49eb29d037..57eee60646 100644 --- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc @@ -93,11 +93,16 @@ bool DependencyOptimizer::SafeToConvertToNoOp(const NodeDef& node) { if (!has_fetch_ || HasRegularOutputs(node, *node_map_)) { return false; } - + if (nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) { + return false; + } if (IsMerge(node)) { return false; } - if (!ArithmeticOptimizer::CanDedup(node, nodes_to_preserve_)) { + if (ModifiesFrameInfo(node)) { + return false; + } + if (!IsFreeOfSideEffect(node)) { return false; } -- GitLab From 049a34d692095b7e137bca27d2445415314ceaf7 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Fri, 17 Nov 2017 10:30:58 -0800 Subject: [PATCH 0564/1801] iOS Camera example for TensorFlow Lite. (#14591) The code is derived from the TensorFlow mobile camera example in tensorflow/tensorflow/examples/ios/camera/. --- tensorflow/contrib/lite/Makefile | 2 +- .../contrib/lite/download_dependencies.sh | 5 +- .../lite/examples/ios/camera/.gitignore | 2 + .../ios/camera/CameraExampleAppDelegate.h | 21 + .../ios/camera/CameraExampleAppDelegate.m | 44 ++ .../ios/camera/CameraExampleViewController.h | 48 ++ .../ios/camera/CameraExampleViewController.mm | 506 ++++++++++++++++++ .../lite/examples/ios/camera/Info.plist | 44 ++ .../camera/MainStoryboard_iPhone.storyboard | 46 ++ .../contrib/lite/examples/ios/camera/Podfile | 5 + .../contrib/lite/examples/ios/camera/main.mm | 28 + .../project.pbxproj | 419 +++++++++++++++ 12 files changed, 1168 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/lite/examples/ios/camera/.gitignore create mode 100644 tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.h create mode 100644 tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.m create mode 100644 tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.h create mode 100644 tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm create mode 100644 tensorflow/contrib/lite/examples/ios/camera/Info.plist create mode 100644 tensorflow/contrib/lite/examples/ios/camera/MainStoryboard_iPhone.storyboard create mode 100644 tensorflow/contrib/lite/examples/ios/camera/Podfile create mode 100644 tensorflow/contrib/lite/examples/ios/camera/main.mm create mode 100644 tensorflow/contrib/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj diff --git a/tensorflow/contrib/lite/Makefile b/tensorflow/contrib/lite/Makefile index 8c65a0cc34..78402727ab 100644 --- a/tensorflow/contrib/lite/Makefile +++ b/tensorflow/contrib/lite/Makefile @@ -28,7 +28,7 @@ GENDIR := $(MAKEFILE_DIR)/gen/obj/ # Settings for the host compiler. CXX := $(CC_PREFIX) gcc -CXXFLAGS := --std=c++11 +CXXFLAGS := --std=c++11 -O3 -DNDEBUG CC := $(CC_PREFIX) gcc CFLAGS := LDOPTS := diff --git a/tensorflow/contrib/lite/download_dependencies.sh b/tensorflow/contrib/lite/download_dependencies.sh index 0d9842fefa..41480c2007 100755 --- a/tensorflow/contrib/lite/download_dependencies.sh +++ b/tensorflow/contrib/lite/download_dependencies.sh @@ -27,6 +27,7 @@ NEON_2_SSE_URL="https://github.com/intel/ARM_NEON_2_x86_SSE/archive/master.zip" FARMHASH_URL="https://mirror.bazel.build/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz" FLATBUFFERS_URL="https://github.com/google/flatbuffers/archive/master.zip" MODELS_URL="https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_1.0_224_ios_lite_float_2017_11_08.zip" +QUANTIZED_MODELS_URL="https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip" # TODO(petewarden): Some new code in Eigen triggers a clang bug with iOS arm64, # so work around it by patching the source. @@ -59,7 +60,7 @@ download_and_extract() { unzip ${tempdir}/* -d ${tempdir2} # unzip has no strip components, so unzip to a temp dir, and move the files # we want from the tempdir to destination. - cp -R ${tempdir2}/*/* ${dir}/ + echo cp `find ${tempdir2} -type f` ${dir}/ rm -rf ${tempdir2} ${tempdir} fi @@ -75,6 +76,7 @@ download_and_extract "${NEON_2_SSE_URL}" "${DOWNLOADS_DIR}/neon_2_sse" download_and_extract "${FARMHASH_URL}" "${DOWNLOADS_DIR}/farmhash" download_and_extract "${FLATBUFFERS_URL}" "${DOWNLOADS_DIR}/flatbuffers" download_and_extract "${MODELS_URL}" "${DOWNLOADS_DIR}/models" +download_and_extract "${QUANTIZED_MODELS_URL}" "${DOWNLOADS_DIR}/quantized_models" replace_by_sed 's#static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA );#static uint32x4_t p4ui_CONJ_XOR; // = vld1q_u32( conj_XOR_DATA ); - Removed by script#' \ "${DOWNLOADS_DIR}/eigen/Eigen/src/Core/arch/NEON/Complex.h" @@ -84,5 +86,6 @@ replace_by_sed 's#static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DAT "${DOWNLOADS_DIR}/eigen/Eigen/src/Core/arch/NEON/Complex.h" cp ${DOWNLOADS_DIR}/models/models/* tensorflow/contrib/lite/examples/ios/simple/data/ +cp ${DOWNLOADS_DIR}/quantized_models/* tensorflow/contrib/lite/examples/ios/camera/data/ echo "download_dependencies.sh completed successfully." >&2 diff --git a/tensorflow/contrib/lite/examples/ios/camera/.gitignore b/tensorflow/contrib/lite/examples/ios/camera/.gitignore new file mode 100644 index 0000000000..9e8962f4c6 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/camera/.gitignore @@ -0,0 +1,2 @@ +/data/*.txt +/data/*.tflite diff --git a/tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.h b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.h new file mode 100644 index 0000000000..55891c3ee1 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.h @@ -0,0 +1,21 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +@interface CameraExampleAppDelegate : UIResponder + +@property(strong, nonatomic) UIWindow* window; + +@end diff --git a/tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.m b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.m new file mode 100644 index 0000000000..128266d53f --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.m @@ -0,0 +1,44 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "CameraExampleAppDelegate.h" + +@implementation CameraExampleAppDelegate + +@synthesize window = _window; + +- (BOOL)application:(UIApplication *)application + didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { + [self.window makeKeyAndVisible]; + return YES; +} + +- (void)applicationWillResignActive:(UIApplication *)application { + [[UIApplication sharedApplication] setIdleTimerDisabled:NO]; +} + +- (void)applicationDidEnterBackground:(UIApplication *)application { +} + +- (void)applicationWillEnterForeground:(UIApplication *)application { +} + +- (void)applicationDidBecomeActive:(UIApplication *)application { + [[UIApplication sharedApplication] setIdleTimerDisabled:YES]; +} + +- (void)applicationWillTerminate:(UIApplication *)application { +} + +@end diff --git a/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.h b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.h new file mode 100644 index 0000000000..fb5800e86d --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.h @@ -0,0 +1,48 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import +#import + +#include + +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" + +@interface CameraExampleViewController + : UIViewController { + IBOutlet UIView* previewView; + AVCaptureVideoPreviewLayer* previewLayer; + AVCaptureVideoDataOutput* videoDataOutput; + dispatch_queue_t videoDataOutputQueue; + UIView* flashView; + BOOL isUsingFrontFacingCamera; + NSMutableDictionary* oldPredictionValues; + NSMutableArray* labelLayers; + AVCaptureSession* session; + + std::vector labels; + std::unique_ptr model; + tflite::ops::builtin::BuiltinOpResolver resolver; + std::unique_ptr interpreter; + + double total_latency; + int total_count; +} +@property(strong, nonatomic) CATextLayer* predictionTextLayer; + +- (IBAction)takePicture:(id)sender; +- (IBAction)switchCameras:(id)sender; + +@end diff --git a/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm new file mode 100644 index 0000000000..ea398ad14e --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm @@ -0,0 +1,506 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "CameraExampleViewController.h" +#import +#import +#import +#import + +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/string_util.h" +#include "tensorflow/contrib/lite/tools/mutable_op_resolver.h" + +#define LOG(x) std::cerr + +// If you have your own model, modify this to the file name, and make sure +// you've added the file to your app resources too. +static NSString* model_file_name = @"mobilenet_quant_v1_224"; +static NSString* model_file_type = @"tflite"; + +// If you have your own model, point this to the labels file. +static NSString* labels_file_name = @"labels"; +static NSString* labels_file_type = @"txt"; + +// These dimensions need to match those the model was trained with. +static const int wanted_input_width = 224; +static const int wanted_input_height = 224; +static const int wanted_input_channels = 3; + +static NSString* FilePathForResourceName(NSString* name, NSString* extension) { + NSString* file_path = [[NSBundle mainBundle] pathForResource:name ofType:extension]; + if (file_path == NULL) { + LOG(FATAL) << "Couldn't find '" << [name UTF8String] << "." << [extension UTF8String] + << "' in bundle."; + } + return file_path; +} + +static void LoadLabels(NSString* file_name, NSString* file_type, + std::vector* label_strings) { + NSString* labels_path = FilePathForResourceName(file_name, file_type); + if (!labels_path) { + LOG(ERROR) << "Failed to find model proto at" << [file_name UTF8String] + << [file_type UTF8String]; + } + std::ifstream t; + t.open([labels_path UTF8String]); + std::string line; + while (t) { + std::getline(t, line); + label_strings->push_back(line); + } + t.close(); +} + +// Returns the top N confidence values over threshold in the provided vector, +// sorted by confidence in descending order. +static void GetTopN(const uint8_t* prediction, const int prediction_size, const int num_results, + const float threshold, std::vector>* top_results) { + // Will contain top N results in ascending order. + std::priority_queue, std::vector>, + std::greater>> + top_result_pq; + + const long count = prediction_size; + for (int i = 0; i < count; ++i) { + const float value = prediction[i] / 255.0; + // Only add it if it beats the threshold and has a chance at being in + // the top N. + if (value < threshold) { + continue; + } + + top_result_pq.push(std::pair(value, i)); + + // If at capacity, kick the smallest value out. + if (top_result_pq.size() > num_results) { + top_result_pq.pop(); + } + } + + // Copy to output vector and reverse into descending order. + while (!top_result_pq.empty()) { + top_results->push_back(top_result_pq.top()); + top_result_pq.pop(); + } + std::reverse(top_results->begin(), top_results->end()); +} + +@interface CameraExampleViewController (InternalMethods) +- (void)setupAVCapture; +- (void)teardownAVCapture; +@end + +@implementation CameraExampleViewController + +- (void)setupAVCapture { + NSError* error = nil; + + session = [AVCaptureSession new]; + if ([[UIDevice currentDevice] userInterfaceIdiom] == UIUserInterfaceIdiomPhone) + [session setSessionPreset:AVCaptureSessionPreset640x480]; + else + [session setSessionPreset:AVCaptureSessionPresetPhoto]; + + AVCaptureDevice* device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]; + AVCaptureDeviceInput* deviceInput = + [AVCaptureDeviceInput deviceInputWithDevice:device error:&error]; + assert(error == nil); + + if ([session canAddInput:deviceInput]) [session addInput:deviceInput]; + + videoDataOutput = [AVCaptureVideoDataOutput new]; + + NSDictionary* rgbOutputSettings = + [NSDictionary dictionaryWithObject:[NSNumber numberWithInt:kCMPixelFormat_32BGRA] + forKey:(id)kCVPixelBufferPixelFormatTypeKey]; + [videoDataOutput setVideoSettings:rgbOutputSettings]; + [videoDataOutput setAlwaysDiscardsLateVideoFrames:YES]; + videoDataOutputQueue = dispatch_queue_create("VideoDataOutputQueue", DISPATCH_QUEUE_SERIAL); + [videoDataOutput setSampleBufferDelegate:self queue:videoDataOutputQueue]; + + if ([session canAddOutput:videoDataOutput]) [session addOutput:videoDataOutput]; + [[videoDataOutput connectionWithMediaType:AVMediaTypeVideo] setEnabled:YES]; + + previewLayer = [[AVCaptureVideoPreviewLayer alloc] initWithSession:session]; + [previewLayer setBackgroundColor:[[UIColor blackColor] CGColor]]; + [previewLayer setVideoGravity:AVLayerVideoGravityResizeAspect]; + CALayer* rootLayer = [previewView layer]; + [rootLayer setMasksToBounds:YES]; + [previewLayer setFrame:[rootLayer bounds]]; + [rootLayer addSublayer:previewLayer]; + [session startRunning]; + + if (error) { + NSString* title = [NSString stringWithFormat:@"Failed with error %d", (int)[error code]]; + UIAlertController* alertController = + [UIAlertController alertControllerWithTitle:title + message:[error localizedDescription] + preferredStyle:UIAlertControllerStyleAlert]; + UIAlertAction* dismiss = + [UIAlertAction actionWithTitle:@"Dismiss" style:UIAlertActionStyleDefault handler:nil]; + [alertController addAction:dismiss]; + [self presentViewController:alertController animated:YES completion:nil]; + [self teardownAVCapture]; + } +} + +- (void)teardownAVCapture { + [previewLayer removeFromSuperlayer]; +} + +- (AVCaptureVideoOrientation)avOrientationForDeviceOrientation: + (UIDeviceOrientation)deviceOrientation { + AVCaptureVideoOrientation result = (AVCaptureVideoOrientation)(deviceOrientation); + if (deviceOrientation == UIDeviceOrientationLandscapeLeft) + result = AVCaptureVideoOrientationLandscapeRight; + else if (deviceOrientation == UIDeviceOrientationLandscapeRight) + result = AVCaptureVideoOrientationLandscapeLeft; + return result; +} + +- (IBAction)takePicture:(id)sender { + if ([session isRunning]) { + [session stopRunning]; + [sender setTitle:@"Continue" forState:UIControlStateNormal]; + + flashView = [[UIView alloc] initWithFrame:[previewView frame]]; + [flashView setBackgroundColor:[UIColor whiteColor]]; + [flashView setAlpha:0.f]; + [[[self view] window] addSubview:flashView]; + + [UIView animateWithDuration:.2f + animations:^{ + [flashView setAlpha:1.f]; + } + completion:^(BOOL finished) { + [UIView animateWithDuration:.2f + animations:^{ + [flashView setAlpha:0.f]; + } + completion:^(BOOL finished) { + [flashView removeFromSuperview]; + flashView = nil; + }]; + }]; + + } else { + [session startRunning]; + [sender setTitle:@"Freeze Frame" forState:UIControlStateNormal]; + } +} + +- (void)captureOutput:(AVCaptureOutput*)captureOutput + didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer + fromConnection:(AVCaptureConnection*)connection { + CVPixelBufferRef pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer); + CFRetain(pixelBuffer); + [self runModelOnFrame:pixelBuffer]; + CFRelease(pixelBuffer); +} + +- (void)runModelOnFrame:(CVPixelBufferRef)pixelBuffer { + assert(pixelBuffer != NULL); + + OSType sourcePixelFormat = CVPixelBufferGetPixelFormatType(pixelBuffer); + int doReverseChannels; + if (kCVPixelFormatType_32ARGB == sourcePixelFormat) { + doReverseChannels = 1; + } else if (kCVPixelFormatType_32BGRA == sourcePixelFormat) { + doReverseChannels = 0; + } else { + assert(false); // Unknown source format + } + + const int sourceRowBytes = (int)CVPixelBufferGetBytesPerRow(pixelBuffer); + const int image_width = (int)CVPixelBufferGetWidth(pixelBuffer); + const int fullHeight = (int)CVPixelBufferGetHeight(pixelBuffer); + + CVPixelBufferLockFlags unlockFlags = kNilOptions; + CVPixelBufferLockBaseAddress(pixelBuffer, unlockFlags); + + unsigned char* sourceBaseAddr = (unsigned char*)(CVPixelBufferGetBaseAddress(pixelBuffer)); + int image_height; + unsigned char* sourceStartAddr; + if (fullHeight <= image_width) { + image_height = fullHeight; + sourceStartAddr = sourceBaseAddr; + } else { + image_height = image_width; + const int marginY = ((fullHeight - image_width) / 2); + sourceStartAddr = (sourceBaseAddr + (marginY * sourceRowBytes)); + } + const int image_channels = 4; + assert(image_channels >= wanted_input_channels); + uint8_t* in = sourceStartAddr; + + int input = interpreter->inputs()[0]; + + uint8_t* out = interpreter->typed_tensor(input); + for (int y = 0; y < wanted_input_height; ++y) { + uint8_t* out_row = out + (y * wanted_input_width * wanted_input_channels); + for (int x = 0; x < wanted_input_width; ++x) { + const int in_x = (y * image_width) / wanted_input_width; + const int in_y = (x * image_height) / wanted_input_height; + uint8_t* in_pixel = in + (in_y * image_width * image_channels) + (in_x * image_channels); + uint8_t* out_pixel = out_row + (x * wanted_input_channels); + for (int c = 0; c < wanted_input_channels; ++c) { + out_pixel[c] = in_pixel[c]; + } + } + } + + double startTimestamp = [[NSDate new] timeIntervalSince1970]; + if (interpreter->Invoke() != kTfLiteOk) { + LOG(FATAL) << "Failed to invoke!"; + } + double endTimestamp = [[NSDate new] timeIntervalSince1970]; + total_latency += (endTimestamp - startTimestamp); + total_count += 1; + NSLog(@"Time: %.4lf, avg: %.4lf, count: %d", endTimestamp - startTimestamp, + total_latency / total_count, total_count); + + const int output_size = 1000; + const int kNumResults = 5; + const float kThreshold = 0.1f; + + std::vector> top_results; + + uint8_t* output = interpreter->typed_output_tensor(0); + GetTopN(output, output_size, kNumResults, kThreshold, &top_results); + + NSMutableDictionary* newValues = [NSMutableDictionary dictionary]; + for (const auto& result : top_results) { + const float confidence = result.first; + const int index = result.second; + NSString* labelObject = [NSString stringWithUTF8String:labels[index].c_str()]; + NSNumber* valueObject = [NSNumber numberWithFloat:confidence]; + [newValues setObject:valueObject forKey:labelObject]; + } + dispatch_async(dispatch_get_main_queue(), ^(void) { + [self setPredictionValues:newValues]; + }); + + CVPixelBufferUnlockBaseAddress(pixelBuffer, unlockFlags); + + CVPixelBufferUnlockBaseAddress(pixelBuffer, 0); +} + +- (void)dealloc { + [self teardownAVCapture]; +} + +- (void)didReceiveMemoryWarning { + [super didReceiveMemoryWarning]; +} + +- (void)viewDidLoad { + [super viewDidLoad]; + labelLayers = [[NSMutableArray alloc] init]; + oldPredictionValues = [[NSMutableDictionary alloc] init]; + + NSString* graph_path = FilePathForResourceName(model_file_name, @"tflite"); + model = tflite::FlatBufferModel::BuildFromFile([graph_path UTF8String]); + if (!model) { + LOG(FATAL) << "Failed to mmap model " << graph_path; + } + LOG(INFO) << "Loaded model " << graph_path; + model->error_reporter(); + LOG(INFO) << "resolved reporter"; + + tflite::ops::builtin::BuiltinOpResolver resolver; + LoadLabels(labels_file_name, labels_file_type, &labels); + + tflite::InterpreterBuilder(*model, resolver)(&interpreter); + if (!interpreter) { + LOG(FATAL) << "Failed to construct interpreter"; + } + if (interpreter->AllocateTensors() != kTfLiteOk) { + LOG(FATAL) << "Failed to allocate tensors!"; + } + + [self setupAVCapture]; +} + +- (void)viewDidUnload { + [super viewDidUnload]; +} + +- (void)viewWillAppear:(BOOL)animated { + [super viewWillAppear:animated]; +} + +- (void)viewDidAppear:(BOOL)animated { + [super viewDidAppear:animated]; +} + +- (void)viewWillDisappear:(BOOL)animated { + [super viewWillDisappear:animated]; +} + +- (void)viewDidDisappear:(BOOL)animated { + [super viewDidDisappear:animated]; +} + +- (BOOL)shouldAutorotateToInterfaceOrientation:(UIInterfaceOrientation)interfaceOrientation { + return (interfaceOrientation == UIInterfaceOrientationPortrait); +} + +- (BOOL)prefersStatusBarHidden { + return YES; +} + +- (void)setPredictionValues:(NSDictionary*)newValues { + const float decayValue = 0.75f; + const float updateValue = 0.25f; + const float minimumThreshold = 0.01f; + + NSMutableDictionary* decayedPredictionValues = [[NSMutableDictionary alloc] init]; + for (NSString* label in oldPredictionValues) { + NSNumber* oldPredictionValueObject = [oldPredictionValues objectForKey:label]; + const float oldPredictionValue = [oldPredictionValueObject floatValue]; + const float decayedPredictionValue = (oldPredictionValue * decayValue); + if (decayedPredictionValue > minimumThreshold) { + NSNumber* decayedPredictionValueObject = [NSNumber numberWithFloat:decayedPredictionValue]; + [decayedPredictionValues setObject:decayedPredictionValueObject forKey:label]; + } + } + oldPredictionValues = decayedPredictionValues; + + for (NSString* label in newValues) { + NSNumber* newPredictionValueObject = [newValues objectForKey:label]; + NSNumber* oldPredictionValueObject = [oldPredictionValues objectForKey:label]; + if (!oldPredictionValueObject) { + oldPredictionValueObject = [NSNumber numberWithFloat:0.0f]; + } + const float newPredictionValue = [newPredictionValueObject floatValue]; + const float oldPredictionValue = [oldPredictionValueObject floatValue]; + const float updatedPredictionValue = (oldPredictionValue + (newPredictionValue * updateValue)); + NSNumber* updatedPredictionValueObject = [NSNumber numberWithFloat:updatedPredictionValue]; + [oldPredictionValues setObject:updatedPredictionValueObject forKey:label]; + } + NSArray* candidateLabels = [NSMutableArray array]; + for (NSString* label in oldPredictionValues) { + NSNumber* oldPredictionValueObject = [oldPredictionValues objectForKey:label]; + const float oldPredictionValue = [oldPredictionValueObject floatValue]; + if (oldPredictionValue > 0.05f) { + NSDictionary* entry = @{@"label" : label, @"value" : oldPredictionValueObject}; + candidateLabels = [candidateLabels arrayByAddingObject:entry]; + } + } + NSSortDescriptor* sort = [NSSortDescriptor sortDescriptorWithKey:@"value" ascending:NO]; + NSArray* sortedLabels = + [candidateLabels sortedArrayUsingDescriptors:[NSArray arrayWithObject:sort]]; + + const float leftMargin = 10.0f; + const float topMargin = 10.0f; + + const float valueWidth = 48.0f; + const float valueHeight = 18.0f; + + const float labelWidth = 246.0f; + const float labelHeight = 18.0f; + + const float labelMarginX = 5.0f; + const float labelMarginY = 5.0f; + + [self removeAllLabelLayers]; + + int labelCount = 0; + for (NSDictionary* entry in sortedLabels) { + NSString* label = [entry objectForKey:@"label"]; + NSNumber* valueObject = [entry objectForKey:@"value"]; + const float value = [valueObject floatValue]; + const float originY = topMargin + ((labelHeight + labelMarginY) * labelCount); + const int valuePercentage = (int)roundf(value * 100.0f); + + const float valueOriginX = leftMargin; + NSString* valueText = [NSString stringWithFormat:@"%d%%", valuePercentage]; + + [self addLabelLayerWithText:valueText + originX:valueOriginX + originY:originY + width:valueWidth + height:valueHeight + alignment:kCAAlignmentRight]; + + const float labelOriginX = (leftMargin + valueWidth + labelMarginX); + + [self addLabelLayerWithText:[label capitalizedString] + originX:labelOriginX + originY:originY + width:labelWidth + height:labelHeight + alignment:kCAAlignmentLeft]; + + labelCount += 1; + if (labelCount > 4) { + break; + } + } +} + +- (void)removeAllLabelLayers { + for (CATextLayer* layer in labelLayers) { + [layer removeFromSuperlayer]; + } + [labelLayers removeAllObjects]; +} + +- (void)addLabelLayerWithText:(NSString*)text + originX:(float)originX + originY:(float)originY + width:(float)width + height:(float)height + alignment:(NSString*)alignment { + CFTypeRef font = (CFTypeRef) @"Menlo-Regular"; + const float fontSize = 12.0; + const float marginSizeX = 5.0f; + const float marginSizeY = 2.0f; + + const CGRect backgroundBounds = CGRectMake(originX, originY, width, height); + const CGRect textBounds = CGRectMake((originX + marginSizeX), (originY + marginSizeY), + (width - (marginSizeX * 2)), (height - (marginSizeY * 2))); + + CATextLayer* background = [CATextLayer layer]; + [background setBackgroundColor:[UIColor blackColor].CGColor]; + [background setOpacity:0.5f]; + [background setFrame:backgroundBounds]; + background.cornerRadius = 5.0f; + + [[self.view layer] addSublayer:background]; + [labelLayers addObject:background]; + + CATextLayer* layer = [CATextLayer layer]; + [layer setForegroundColor:[UIColor whiteColor].CGColor]; + [layer setFrame:textBounds]; + [layer setAlignmentMode:alignment]; + [layer setWrapped:YES]; + [layer setFont:font]; + [layer setFontSize:fontSize]; + layer.contentsScale = [[UIScreen mainScreen] scale]; + [layer setString:text]; + + [[self.view layer] addSublayer:layer]; + [labelLayers addObject:layer]; +} + +@end diff --git a/tensorflow/contrib/lite/examples/ios/camera/Info.plist b/tensorflow/contrib/lite/examples/ios/camera/Info.plist new file mode 100644 index 0000000000..f3d96bab16 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/camera/Info.plist @@ -0,0 +1,44 @@ + + + + + CFBundleDevelopmentRegion + en + CFBundleDisplayName + tflite_camera_example + CFBundleExecutable + ${EXECUTABLE_NAME} + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + ${PRODUCT_NAME} + CFBundlePackageType + APPL + CFBundleShortVersionString + 1.0 + CFBundleSignature + ???? + CFBundleVersion + 1.0 + LSRequiresIPhoneOS + + NSCameraUsageDescription + Capture images to detect object + UIMainStoryboardFile + MainStoryboard_iPhone + UIRequiresFullScreen + + UIStatusBarHidden + + UISupportedInterfaceOrientations + + UIInterfaceOrientationPortrait + + UISupportedInterfaceOrientations~ipad + + UIInterfaceOrientationPortrait + + + diff --git a/tensorflow/contrib/lite/examples/ios/camera/MainStoryboard_iPhone.storyboard b/tensorflow/contrib/lite/examples/ios/camera/MainStoryboard_iPhone.storyboard new file mode 100644 index 0000000000..0f10a22e41 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/camera/MainStoryboard_iPhone.storyboard @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tensorflow/contrib/lite/examples/ios/camera/Podfile b/tensorflow/contrib/lite/examples/ios/camera/Podfile new file mode 100644 index 0000000000..4ae6fb6b94 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/camera/Podfile @@ -0,0 +1,5 @@ +platform :ios, '8.0' +inhibit_all_warnings! + +target 'tflite_camera_example' + pod 'TensorFlow-experimental' diff --git a/tensorflow/contrib/lite/examples/ios/camera/main.mm b/tensorflow/contrib/lite/examples/ios/camera/main.mm new file mode 100644 index 0000000000..1a9e542f7c --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/camera/main.mm @@ -0,0 +1,28 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "CameraExampleAppDelegate.h" + +int main(int argc, char* argv[]) { + int retVal = 0; + + @autoreleasepool { + retVal = + UIApplicationMain(argc, argv, nil, NSStringFromClass([CameraExampleAppDelegate class])); + } + return retVal; +} diff --git a/tensorflow/contrib/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj b/tensorflow/contrib/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj new file mode 100644 index 0000000000..c98183276b --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj @@ -0,0 +1,419 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 46; + objects = { + +/* Begin PBXBuildFile section */ + 1C3C9DCC1ED3AB4200B8B5FA /* main.mm in Sources */ = {isa = PBXBuildFile; fileRef = 1C3C9DCA1ED3AB4200B8B5FA /* main.mm */; }; + 1C99111C1ED3B0E600A6BFB9 /* MainStoryboard_iPhone.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 1C99111B1ED3B0E600A6BFB9 /* MainStoryboard_iPhone.storyboard */; }; + 1CA5EB931ED3ABFB00247A34 /* CoreMedia.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 1CA5EB921ED3ABFB00247A34 /* CoreMedia.framework */; }; + 1CB47D491ED3AD1700DF7666 /* AVFoundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 1CB47D481ED3AD1700DF7666 /* AVFoundation.framework */; }; + 1CDB2D491ED3A9CD007929E9 /* CameraExampleAppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 1CDB2D431ED3A9CD007929E9 /* CameraExampleAppDelegate.m */; }; + 1CDB2D4A1ED3A9CD007929E9 /* CameraExampleViewController.mm in Sources */ = {isa = PBXBuildFile; fileRef = 1CDB2D451ED3A9CD007929E9 /* CameraExampleViewController.mm */; }; + 1CDB2D4E1ED3AA35007929E9 /* Info.plist in Resources */ = {isa = PBXBuildFile; fileRef = 1CDB2D4D1ED3AA35007929E9 /* Info.plist */; }; + 54DC6C3C5F734F3A58069F0C /* libPods-tflite_camera_example.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 3BA8BF92C84895BFE59D8236 /* libPods-tflite_camera_example.a */; }; + AC1F82661FBA3CBD0052BA77 /* labels.txt in Resources */ = {isa = PBXBuildFile; fileRef = AC1F82641FBA3CBD0052BA77 /* labels.txt */; }; + AC1F82691FBA3F930052BA77 /* libtensorflow-lite.a in Frameworks */ = {isa = PBXBuildFile; fileRef = AC1F82681FBA3F930052BA77 /* libtensorflow-lite.a */; }; + ACA1A4CA1FBB6C28009B8D86 /* mobilenet_quant_v1_224.tflite in Resources */ = {isa = PBXBuildFile; fileRef = ACA1A4C91FBB6C28009B8D86 /* mobilenet_quant_v1_224.tflite */; }; +/* End PBXBuildFile section */ + +/* Begin PBXFileReference section */ + 1C0D73481ECCC41B008C1DAB /* CoreImage.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreImage.framework; path = System/Library/Frameworks/CoreImage.framework; sourceTree = SDKROOT; }; + 1C0D734A1ECCC460008C1DAB /* CoreGraphics.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreGraphics.framework; path = System/Library/Frameworks/CoreGraphics.framework; sourceTree = SDKROOT; }; + 1C3C9DCA1ED3AB4200B8B5FA /* main.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = main.mm; sourceTree = ""; }; + 1C564C0D1ED3A92E00087306 /* tflite_camera_example.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = tflite_camera_example.app; sourceTree = BUILT_PRODUCTS_DIR; }; + 1C99111B1ED3B0E600A6BFB9 /* MainStoryboard_iPhone.storyboard */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = file.storyboard; path = MainStoryboard_iPhone.storyboard; sourceTree = ""; }; + 1CA45FFE1ECCC356002FA6A4 /* UIKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = UIKit.framework; path = System/Library/Frameworks/UIKit.framework; sourceTree = SDKROOT; }; + 1CA5EB921ED3ABFB00247A34 /* CoreMedia.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreMedia.framework; path = System/Library/Frameworks/CoreMedia.framework; sourceTree = SDKROOT; }; + 1CB47D481ED3AD1700DF7666 /* AVFoundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = AVFoundation.framework; path = System/Library/Frameworks/AVFoundation.framework; sourceTree = SDKROOT; }; + 1CDB2D421ED3A9CD007929E9 /* CameraExampleAppDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CameraExampleAppDelegate.h; sourceTree = ""; }; + 1CDB2D431ED3A9CD007929E9 /* CameraExampleAppDelegate.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = CameraExampleAppDelegate.m; sourceTree = ""; }; + 1CDB2D441ED3A9CD007929E9 /* CameraExampleViewController.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CameraExampleViewController.h; sourceTree = ""; }; + 1CDB2D451ED3A9CD007929E9 /* CameraExampleViewController.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = CameraExampleViewController.mm; sourceTree = ""; }; + 1CDB2D4D1ED3AA35007929E9 /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 3BA8BF92C84895BFE59D8236 /* libPods-tflite_camera_example.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libPods-tflite_camera_example.a"; sourceTree = BUILT_PRODUCTS_DIR; }; + 3BC5BE4BBD09374D3E98F082 /* Pods-tflite_camera_example.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-tflite_camera_example.debug.xcconfig"; path = "Pods/Target Support Files/Pods-tflite_camera_example/Pods-tflite_camera_example.debug.xcconfig"; sourceTree = ""; }; + 55ED318E8D29C8AFEF03DF1E /* Pods-tflite_camera_example.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-tflite_camera_example.release.xcconfig"; path = "Pods/Target Support Files/Pods-tflite_camera_example/Pods-tflite_camera_example.release.xcconfig"; sourceTree = ""; }; + AC1F82641FBA3CBD0052BA77 /* labels.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = labels.txt; sourceTree = ""; }; + AC1F82681FBA3F930052BA77 /* libtensorflow-lite.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libtensorflow-lite.a"; path = "../../../gen/lib/libtensorflow-lite.a"; sourceTree = ""; }; + ACA1A4C91FBB6C28009B8D86 /* mobilenet_quant_v1_224.tflite */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_quant_v1_224.tflite; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 1C564C0A1ED3A92E00087306 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + AC1F82691FBA3F930052BA77 /* libtensorflow-lite.a in Frameworks */, + 1CB47D491ED3AD1700DF7666 /* AVFoundation.framework in Frameworks */, + 1CA5EB931ED3ABFB00247A34 /* CoreMedia.framework in Frameworks */, + 54DC6C3C5F734F3A58069F0C /* libPods-tflite_camera_example.a in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 24D7686C331131624F4454A0 /* Frameworks */ = { + isa = PBXGroup; + children = ( + AC1F82681FBA3F930052BA77 /* libtensorflow-lite.a */, + 1CB47D481ED3AD1700DF7666 /* AVFoundation.framework */, + 1CA5EB921ED3ABFB00247A34 /* CoreMedia.framework */, + 1C0D734A1ECCC460008C1DAB /* CoreGraphics.framework */, + 1C0D73481ECCC41B008C1DAB /* CoreImage.framework */, + 1CA45FFE1ECCC356002FA6A4 /* UIKit.framework */, + 3BA8BF92C84895BFE59D8236 /* libPods-tflite_camera_example.a */, + ); + name = Frameworks; + sourceTree = ""; + }; + 3E9FC355632FB928EA23BEED /* Pods */ = { + isa = PBXGroup; + children = ( + 3BC5BE4BBD09374D3E98F082 /* Pods-tflite_camera_example.debug.xcconfig */, + 55ED318E8D29C8AFEF03DF1E /* Pods-tflite_camera_example.release.xcconfig */, + ); + name = Pods; + sourceTree = ""; + }; + 591157921CF4011C00C31E3A = { + isa = PBXGroup; + children = ( + 1C99111B1ED3B0E600A6BFB9 /* MainStoryboard_iPhone.storyboard */, + 1C3C9DCA1ED3AB4200B8B5FA /* main.mm */, + 1CDB2D4D1ED3AA35007929E9 /* Info.plist */, + 1CDB2D421ED3A9CD007929E9 /* CameraExampleAppDelegate.h */, + 1CDB2D431ED3A9CD007929E9 /* CameraExampleAppDelegate.m */, + 1CDB2D441ED3A9CD007929E9 /* CameraExampleViewController.h */, + 1CDB2D451ED3A9CD007929E9 /* CameraExampleViewController.mm */, + 59A3CFF31CF4E68100C4259F /* data */, + 5911579C1CF4011C00C31E3A /* Products */, + 3E9FC355632FB928EA23BEED /* Pods */, + 24D7686C331131624F4454A0 /* Frameworks */, + ); + sourceTree = ""; + }; + 5911579C1CF4011C00C31E3A /* Products */ = { + isa = PBXGroup; + children = ( + 1C564C0D1ED3A92E00087306 /* tflite_camera_example.app */, + ); + name = Products; + sourceTree = ""; + }; + 59A3CFF31CF4E68100C4259F /* data */ = { + isa = PBXGroup; + children = ( + ACA1A4C91FBB6C28009B8D86 /* mobilenet_quant_v1_224.tflite */, + AC1F82641FBA3CBD0052BA77 /* labels.txt */, + ); + path = data; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 1C564C0C1ED3A92E00087306 /* tflite_camera_example */ = { + isa = PBXNativeTarget; + buildConfigurationList = 1C564C351ED3A92E00087306 /* Build configuration list for PBXNativeTarget "tflite_camera_example" */; + buildPhases = ( + 66DAEAAEE9EF6550C3A061E0 /* [CP] Check Pods Manifest.lock */, + 1C564C091ED3A92E00087306 /* Sources */, + 1C564C0A1ED3A92E00087306 /* Frameworks */, + 1C564C0B1ED3A92E00087306 /* Resources */, + 00E875C3B066535AE6B77101 /* [CP] Embed Pods Frameworks */, + 5C2D02120E3E5E09567AA946 /* [CP] Copy Pods Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = tflite_camera_example; + productName = tflite_camera_example; + productReference = 1C564C0D1ED3A92E00087306 /* tflite_camera_example.app */; + productType = "com.apple.product-type.application"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 591157931CF4011C00C31E3A /* Project object */ = { + isa = PBXProject; + attributes = { + LastSwiftUpdateCheck = 0830; + LastUpgradeCheck = 0830; + ORGANIZATIONNAME = Google; + TargetAttributes = { + 1C564C0C1ED3A92E00087306 = { + CreatedOnToolsVersion = 8.3.2; + DevelopmentTeam = EQHXZ8M8AV; + ProvisioningStyle = Automatic; + }; + }; + }; + buildConfigurationList = 591157961CF4011C00C31E3A /* Build configuration list for PBXProject "tflite_camera_example" */; + compatibilityVersion = "Xcode 3.2"; + developmentRegion = English; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = 591157921CF4011C00C31E3A; + productRefGroup = 5911579C1CF4011C00C31E3A /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 1C564C0C1ED3A92E00087306 /* tflite_camera_example */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 1C564C0B1ED3A92E00087306 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ACA1A4CA1FBB6C28009B8D86 /* mobilenet_quant_v1_224.tflite in Resources */, + 1C99111C1ED3B0E600A6BFB9 /* MainStoryboard_iPhone.storyboard in Resources */, + 1CDB2D4E1ED3AA35007929E9 /* Info.plist in Resources */, + AC1F82661FBA3CBD0052BA77 /* labels.txt in Resources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXShellScriptBuildPhase section */ + 00E875C3B066535AE6B77101 /* [CP] Embed Pods Frameworks */ = { + isa = PBXShellScriptBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + inputPaths = ( + ); + name = "[CP] Embed Pods Frameworks"; + outputPaths = ( + ); + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "\"${SRCROOT}/Pods/Target Support Files/Pods-tflite_camera_example/Pods-tflite_camera_example-frameworks.sh\"\n"; + showEnvVarsInLog = 0; + }; + 5C2D02120E3E5E09567AA946 /* [CP] Copy Pods Resources */ = { + isa = PBXShellScriptBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + inputPaths = ( + ); + name = "[CP] Copy Pods Resources"; + outputPaths = ( + ); + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "\"${SRCROOT}/Pods/Target Support Files/Pods-tflite_camera_example/Pods-tflite_camera_example-resources.sh\"\n"; + showEnvVarsInLog = 0; + }; + 66DAEAAEE9EF6550C3A061E0 /* [CP] Check Pods Manifest.lock */ = { + isa = PBXShellScriptBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + inputPaths = ( + "${PODS_PODFILE_DIR_PATH}/Podfile.lock", + "${PODS_ROOT}/Manifest.lock", + ); + name = "[CP] Check Pods Manifest.lock"; + outputPaths = ( + "$(DERIVED_FILE_DIR)/Pods-tflite_camera_example-checkManifestLockResult.txt", + ); + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n # print error to STDERR\n echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n"; + showEnvVarsInLog = 0; + }; +/* End PBXShellScriptBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 1C564C091ED3A92E00087306 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 1CDB2D4A1ED3A9CD007929E9 /* CameraExampleViewController.mm in Sources */, + 1CDB2D491ED3A9CD007929E9 /* CameraExampleAppDelegate.m in Sources */, + 1C3C9DCC1ED3AB4200B8B5FA /* main.mm in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 1C564C361ED3A92E00087306 /* Debug */ = { + isa = XCBuildConfiguration; + baseConfigurationReference = 3BC5BE4BBD09374D3E98F082 /* Pods-tflite_camera_example.debug.xcconfig */; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + DEVELOPMENT_TEAM = EQHXZ8M8AV; + INFOPLIST_FILE = Info.plist; + IPHONEOS_DEPLOYMENT_TARGET = 10.3; + LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; + PRODUCT_BUNDLE_IDENTIFIER = "com.pf.tf-camera-example"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + SWIFT_VERSION = 3.0; + }; + name = Debug; + }; + 1C564C371ED3A92E00087306 /* Release */ = { + isa = XCBuildConfiguration; + baseConfigurationReference = 55ED318E8D29C8AFEF03DF1E /* Pods-tflite_camera_example.release.xcconfig */; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + DEVELOPMENT_TEAM = EQHXZ8M8AV; + INFOPLIST_FILE = Info.plist; + IPHONEOS_DEPLOYMENT_TARGET = 10.3; + LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; + PRODUCT_BUNDLE_IDENTIFIER = "com.pf.tf-camera-example"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_OPTIMIZATION_LEVEL = "-Owholemodule"; + SWIFT_VERSION = 3.0; + }; + name = Release; + }; + 591157B01CF4011D00C31E3A /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer"; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + HEADER_SEARCH_PATHS = ( + "$(inherited)", + ../../../../../../, + ../../../downloads/flatbuffers/include/, + ../../../downloads/eigen/, + ../../../downloads/, + ); + IPHONEOS_DEPLOYMENT_TARGET = 8.0; + LIBRARY_SEARCH_PATHS = ../../../gen/lib/; + MTL_ENABLE_DEBUG_INFO = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = iphoneos; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Debug; + }; + 591157B11CF4011D00C31E3A /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer"; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + HEADER_SEARCH_PATHS = ( + "$(inherited)", + ../../../../../../, + ../../../downloads/flatbuffers/include/, + ../../../downloads/eigen/, + ../../../downloads/, + ); + IPHONEOS_DEPLOYMENT_TARGET = 8.0; + LIBRARY_SEARCH_PATHS = ../../../gen/lib/; + MTL_ENABLE_DEBUG_INFO = NO; + SDKROOT = iphoneos; + TARGETED_DEVICE_FAMILY = "1,2"; + VALIDATE_PRODUCT = YES; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 1C564C351ED3A92E00087306 /* Build configuration list for PBXNativeTarget "tflite_camera_example" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1C564C361ED3A92E00087306 /* Debug */, + 1C564C371ED3A92E00087306 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 591157961CF4011C00C31E3A /* Build configuration list for PBXProject "tflite_camera_example" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 591157B01CF4011D00C31E3A /* Debug */, + 591157B11CF4011D00C31E3A /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 591157931CF4011C00C31E3A /* Project object */; +} -- GitLab From 35d3750854948ce13812240204c92b6dc86e91fe Mon Sep 17 00:00:00 2001 From: Mats Linander Date: Fri, 17 Nov 2017 13:31:15 -0500 Subject: [PATCH 0565/1801] typo: s/cesnus/census/ (#14588) --- tensorflow/tools/dist_test/python/census_widendeep.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/dist_test/python/census_widendeep.py b/tensorflow/tools/dist_test/python/census_widendeep.py index 3a55781496..6f578d6f67 100644 --- a/tensorflow/tools/dist_test/python/census_widendeep.py +++ b/tensorflow/tools/dist_test/python/census_widendeep.py @@ -263,7 +263,7 @@ if __name__ == "__main__": "--data_dir", type=str, default="/tmp/census-data", - help="Directory for storing the cesnsus data" + help="Directory for storing the census data" ) parser.add_argument( "--model_dir", -- GitLab From 5bdd513cd6ebea989400d8c16216804794f3e67b Mon Sep 17 00:00:00 2001 From: brett koonce Date: Fri, 17 Nov 2017 10:31:52 -0800 Subject: [PATCH 0566/1801] minor spelling tweaks (#14564) --- tensorflow/docs_src/mobile/ios_build.md | 2 +- tensorflow/docs_src/mobile/mobile_intro.md | 2 +- tensorflow/docs_src/mobile/optimizing.md | 2 +- tensorflow/docs_src/performance/xla/operation_semantics.md | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/docs_src/mobile/ios_build.md b/tensorflow/docs_src/mobile/ios_build.md index 2e6d3bf90e..6943b3c4b8 100644 --- a/tensorflow/docs_src/mobile/ios_build.md +++ b/tensorflow/docs_src/mobile/ios_build.md @@ -98,7 +98,7 @@ There are three demo applications for iOS, all defined in Xcode projects inside ## Building the TensorFlow iOS libraries from source -While Cocapods is the quickest and easiest way of getting started, you sometimes +While Cocoapods is the quickest and easiest way of getting started, you sometimes need more flexibility to determine which parts of TensorFlow your app should be shipped with. For such cases, you can build the iOS libraries from the sources. [This diff --git a/tensorflow/docs_src/mobile/mobile_intro.md b/tensorflow/docs_src/mobile/mobile_intro.md index 948563292a..73b2396e69 100644 --- a/tensorflow/docs_src/mobile/mobile_intro.md +++ b/tensorflow/docs_src/mobile/mobile_intro.md @@ -156,7 +156,7 @@ easy cases on device. Doing on-device computation can also signal when it's time to switch to working on the cloud. A good example of this is hotword detection in speech. Since devices are able to constantly listen out for the keywords, this then triggers a -lot of traffic to cloud-based speech recognition once one is recognised. Without +lot of traffic to cloud-based speech recognition once one is recognized. Without the on-device component, the whole application wouldn’t be feasible, and this pattern exists across several other applications as well. Recognizing that some sensor input is interesting enough for further processing makes a lot of diff --git a/tensorflow/docs_src/mobile/optimizing.md b/tensorflow/docs_src/mobile/optimizing.md index 1da8be5689..5abc68bb61 100644 --- a/tensorflow/docs_src/mobile/optimizing.md +++ b/tensorflow/docs_src/mobile/optimizing.md @@ -115,7 +115,7 @@ If you look at the resulting file size, you should see that it’s about a quart of the original at 23MB. Another transform is `round_weights`, which doesn't make the file smaller, but it -makes the file compressable to about the same size as when `quantize_weights` is +makes the file compressible to about the same size as when `quantize_weights` is used. This is particularly useful for mobile development, taking advantage of the fact that app bundles are compressed before they’re downloaded by consumers. diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index ccced8792e..d532efea0c 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -674,7 +674,7 @@ The output type is a tuple of three ComputationDataHandles: | `batch_var` | `ComputationDataHandle` | 1 dimensional array (\\(\sigma^2\\)) | The `batch_mean` and `batch_var` are moments calculated across the batch and -spatial dimensions using the formulars above. +spatial dimensions using the formulas above. ## BatchNormInference -- GitLab From b70aa4d55480d58cd9e7f8cf355126ad56b869d3 Mon Sep 17 00:00:00 2001 From: PinkySan Date: Fri, 17 Nov 2017 19:32:00 +0100 Subject: [PATCH 0567/1801] Exporting tensorflow package for cmake (#14554) * Exporting Targets * reverting changes within tf_core_framework.cmake --- tensorflow/contrib/cmake/tf_shared_lib.cmake | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/tf_shared_lib.cmake b/tensorflow/contrib/cmake/tf_shared_lib.cmake index 3e3fe0cdfa..d4099f3279 100644 --- a/tensorflow/contrib/cmake/tf_shared_lib.cmake +++ b/tensorflow/contrib/cmake/tf_shared_lib.cmake @@ -95,10 +95,18 @@ if(WIN32) add_dependencies(tensorflow tensorflow_static) endif(WIN32) -install(TARGETS tensorflow +target_include_directories(tensorflow PUBLIC + $ + $) + +install(TARGETS tensorflow EXPORT tensorflow_export RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) + +install(EXPORT tensorflow_export + FILE TensorflowConfig.cmake + DESTINATION lib/cmake) # install necessary headers # tensorflow headers -- GitLab From 34d4986e661b8d45f7cec2a717c401c65f0a242f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 10:28:28 -0800 Subject: [PATCH 0568/1801] Fix the numbering of the LSTM layers in the figure. PiperOrigin-RevId: 176126886 --- tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg b/tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg index ca96556422..9f841c219b 100644 --- a/tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg +++ b/tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg @@ -1,4 +1,4 @@ - + -- GitLab From e37904e4545d2c14b8d03698e486c9e170c1211a Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 17 Nov 2017 10:34:06 -0800 Subject: [PATCH 0569/1801] Move ptr_util to core/util. (#14647) * Move ptr_util to core/util. Break the dependency of core onto XLA. * Reformat build file. * Fix the include directive. --- tensorflow/compiler/xla/BUILD | 1 + tensorflow/compiler/xla/ptr_util.h | 47 +++--------- tensorflow/core/BUILD | 6 ++ tensorflow/core/kernels/BUILD | 2 +- tensorflow/core/kernels/summary_interface.cc | 4 +- tensorflow/core/util/ptr_util.h | 80 ++++++++++++++++++++ 6 files changed, 100 insertions(+), 40 deletions(-) create mode 100644 tensorflow/core/util/ptr_util.h diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD index 515b572b0e..d3f292207f 100644 --- a/tensorflow/compiler/xla/BUILD +++ b/tensorflow/compiler/xla/BUILD @@ -175,6 +175,7 @@ cc_library( ":types", ":xla_data_proto", "//tensorflow/core:lib", + "//tensorflow/core:ptr_util", ], ) diff --git a/tensorflow/compiler/xla/ptr_util.h b/tensorflow/compiler/xla/ptr_util.h index fa67030313..627ddf535f 100644 --- a/tensorflow/compiler/xla/ptr_util.h +++ b/tensorflow/compiler/xla/ptr_util.h @@ -16,7 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_PTR_UTIL_H_ #define TENSORFLOW_COMPILER_XLA_PTR_UTIL_H_ -// Utility functions for pointers. +// As this was moved to tensorflow/core/util, provide indirections here to +// maintain current functionality of the library. #include @@ -24,55 +25,27 @@ limitations under the License. #include #include -namespace xla { - -namespace internal { - -// Trait to select overloads and return types for MakeUnique. -template -struct MakeUniqueResult { - using scalar = std::unique_ptr; -}; -template -struct MakeUniqueResult { - using array = std::unique_ptr; -}; -template -struct MakeUniqueResult { - using invalid = void; -}; +#include "tensorflow/core/util/ptr_util.h" -} // namespace internal +namespace xla { -// Transfers ownership of a raw pointer to a std::unique_ptr of deduced type. -// Example: -// X* NewX(int, int); -// auto x = WrapUnique(NewX(1, 2)); // 'x' is std::unique_ptr. -// -// WrapUnique is useful for capturing the output of a raw pointer factory. -// However, prefer 'MakeUnique(args...) over 'WrapUnique(new T(args...))'. -// auto x = WrapUnique(new X(1, 2)); // works, but nonideal. -// auto x = MakeUnique(1, 2); // safer, standard, avoids raw 'new'. -// -// Note: Cannot wrap pointers to array of unknown bound (i.e. U(*)[]). template std::unique_ptr WrapUnique(T* ptr) { - static_assert(!std::is_array::value || std::extent::value != 0, - "types T[0] or T[] are unsupported"); - return std::unique_ptr(ptr); + return tensorflow::WrapUnique(ptr); } template -typename internal::MakeUniqueResult::scalar MakeUnique(Args&&... args) { - return std::unique_ptr(new T(std::forward(args)...)); +typename tensorflow::helper::MakeUniqueResult::scalar MakeUnique( + Args&&... args) { + return tensorflow::MakeUnique(std::forward(args)...); } // Overload for array of unknown bound. // The allocation of arrays needs to use the array form of new, // and cannot take element constructor arguments. template -typename internal::MakeUniqueResult::array MakeUnique(size_t n) { - return std::unique_ptr(new typename std::remove_extent::type[n]()); +typename tensorflow::helper::MakeUniqueResult::array MakeUnique(size_t n) { + return tensorflow::MakeUnique(n); } } // namespace xla diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 19d3d52ede..206ccc1c72 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -455,6 +455,7 @@ tf_cuda_library( "util/mirror_pad_mode.h", "util/padding.h", "util/port.h", + "util/ptr_util.h", "util/reffed_status_callback.h", "util/saved_tensor_slice_util.h", "util/sparse/group_iterator.h", @@ -493,6 +494,11 @@ cc_library( ], ) +cc_library( + name = "ptr_util", + hdrs = ["util/ptr_util.h"], +) + cc_library( name = "reader_base", srcs = ["framework/reader_base.cc"], diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index cf95c6781a..f491132777 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -6241,11 +6241,11 @@ cc_library( srcs = ["summary_interface.cc"], hdrs = ["summary_interface.h"], deps = [ - "//tensorflow/compiler/xla:util", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", + "//tensorflow/core:ptr_util", ], ) diff --git a/tensorflow/core/kernels/summary_interface.cc b/tensorflow/core/kernels/summary_interface.cc index ad28d77ffd..97c0c2c099 100644 --- a/tensorflow/core/kernels/summary_interface.cc +++ b/tensorflow/core/kernels/summary_interface.cc @@ -16,7 +16,6 @@ limitations under the License. #include -#include "tensorflow/compiler/xla/ptr_util.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/resource_mgr.h" @@ -28,6 +27,7 @@ limitations under the License. #include "tensorflow/core/lib/png/png_io.h" #include "tensorflow/core/lib/wav/wav_io.h" #include "tensorflow/core/util/events_writer.h" +#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { namespace { @@ -229,7 +229,7 @@ class SummaryWriterImpl : public SummaryWriterInterface { } mutex_lock ml(mu_); events_writer_ = - xla::MakeUnique(io::JoinPath(logdir, "events")); + tensorflow::MakeUnique(io::JoinPath(logdir, "events")); if (!events_writer_->InitWithSuffix(filename_suffix)) { return errors::Unknown("Could not initialize events writer."); } diff --git a/tensorflow/core/util/ptr_util.h b/tensorflow/core/util/ptr_util.h new file mode 100644 index 0000000000..f902b3ffa1 --- /dev/null +++ b/tensorflow/core/util/ptr_util.h @@ -0,0 +1,80 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_UTIL_PTR_UTIL_H_ +#define TENSORFLOW_CORE_UTIL_PTR_UTIL_H_ + +// Utility functions for pointers. + +#include + +#include +#include +#include + +namespace tensorflow { + +namespace helper { + +// Trait to select overloads and return types for MakeUnique. +template +struct MakeUniqueResult { + using scalar = std::unique_ptr; +}; +template +struct MakeUniqueResult { + using array = std::unique_ptr; +}; +template +struct MakeUniqueResult { + using invalid = void; +}; + +} // namespace helper + +// Transfers ownership of a raw pointer to a std::unique_ptr of deduced type. +// Example: +// X* NewX(int, int); +// auto x = WrapUnique(NewX(1, 2)); // 'x' is std::unique_ptr. +// +// WrapUnique is useful for capturing the output of a raw pointer factory. +// However, prefer 'MakeUnique(args...) over 'WrapUnique(new T(args...))'. +// auto x = WrapUnique(new X(1, 2)); // works, but nonideal. +// auto x = MakeUnique(1, 2); // safer, standard, avoids raw 'new'. +// +// Note: Cannot wrap pointers to array of unknown bound (i.e. U(*)[]). +template +std::unique_ptr WrapUnique(T* ptr) { + static_assert(!std::is_array::value || std::extent::value != 0, + "types T[0] or T[] are unsupported"); + return std::unique_ptr(ptr); +} + +template +typename helper::MakeUniqueResult::scalar MakeUnique(Args&&... args) { + return std::unique_ptr(new T(std::forward(args)...)); +} + +// Overload for array of unknown bound. +// The allocation of arrays needs to use the array form of new, +// and cannot take element constructor arguments. +template +typename helper::MakeUniqueResult::array MakeUnique(size_t n) { + return std::unique_ptr(new typename std::remove_extent::type[n]()); +} + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_UTIL_PTR_UTIL_H_ -- GitLab From 3e53570d3bf518ec2b6cfeed4b5fd57d11370289 Mon Sep 17 00:00:00 2001 From: CSJY Date: Sat, 18 Nov 2017 02:34:19 +0800 Subject: [PATCH 0570/1801] fix #14542 a bug of model_to_dot() (#14553) --- tensorflow/python/keras/_impl/keras/utils/vis_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/_impl/keras/utils/vis_utils.py b/tensorflow/python/keras/_impl/keras/utils/vis_utils.py index ce2faf2d96..d56c4484ce 100644 --- a/tensorflow/python/keras/_impl/keras/utils/vis_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/vis_utils.py @@ -120,7 +120,7 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True, rankdir='TB'): layer_id = str(id(layer)) for i, node in enumerate(layer._inbound_nodes): # pylint: disable=protected-access node_key = layer.name + '_ib-' + str(i) - if node_key in model.container_nodes: + if node_key in model._network_nodes: # pylint: disable=protected-access for inbound_layer in node.inbound_layers: inbound_layer_id = str(id(inbound_layer)) layer_id = str(id(layer)) -- GitLab From 8020abcc5ef691e2a19a0249981210d433047c19 Mon Sep 17 00:00:00 2001 From: Samuel He Date: Sat, 18 Nov 2017 02:34:36 +0800 Subject: [PATCH 0571/1801] Fix the wrong ValurError formatting in convolutional.py (#14551) --- tensorflow/python/layers/convolutional.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py index ad2988fb89..7213fa1db8 100644 --- a/tensorflow/python/layers/convolutional.py +++ b/tensorflow/python/layers/convolutional.py @@ -1232,9 +1232,7 @@ class Conv2DTranspose(Conv2D): def build(self, input_shape): if len(input_shape) != 4: - raise ValueError('Inputs should have rank ' + - str(4) + - 'Received input shape:', str(input_shape)) + raise ValueError('Inputs should have rank 4. Received input shape: ' + str(input_shape)) if self.data_format == 'channels_first': channel_axis = 1 else: -- GitLab From 4db40ceec83ddc9499fc10e93ab364a1450ac9ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=94=B0=E4=BC=A0=E6=AD=A6?= Date: Sat, 18 Nov 2017 02:34:47 +0800 Subject: [PATCH 0572/1801] Removes non-existent link (#14538) --- tensorflow/docs_src/get_started/input_fn.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/get_started/input_fn.md b/tensorflow/docs_src/get_started/input_fn.md index 0db5c6143a..f0dcdc47ff 100644 --- a/tensorflow/docs_src/get_started/input_fn.md +++ b/tensorflow/docs_src/get_started/input_fn.md @@ -211,8 +211,8 @@ def get_input_fn_from_numpy(data_set, num_epochs=None, shuffle=True): ### A Neural Network Model for Boston House Values In the remainder of this tutorial, you'll write an input function for -preprocessing a subset of Boston housing data pulled from the [UCI Housing Data -Set](https://archive.ics.uci.edu/ml/datasets/Housing) and use it to feed data to +preprocessing a subset of Boston housing data pulled from the UCI Housing Data +Set and use it to feed data to a neural network regressor for predicting median house values. The [Boston CSV data sets](#setup) you'll use to train your neural network -- GitLab From 11eefcd21f9f3d92740cb85d9576198507eeb118 Mon Sep 17 00:00:00 2001 From: Winnie Tsang Date: Fri, 17 Nov 2017 10:35:00 -0800 Subject: [PATCH 0573/1801] fix broken section links in the document (#14524) --- tensorflow/docs_src/extend/adding_an_op.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tensorflow/docs_src/extend/adding_an_op.md b/tensorflow/docs_src/extend/adding_an_op.md index 15d6d77f5e..a3a0272059 100644 --- a/tensorflow/docs_src/extend/adding_an_op.md +++ b/tensorflow/docs_src/extend/adding_an_op.md @@ -451,17 +451,17 @@ Now that you know how to build a basic (and somewhat restricted) op and implementation, we'll look at some of the more complicated things you will typically need to build into your op. This includes: -* [Conditional checks and validation](#conditional_checks_and_validation) -* [Op registration](#op_registration) +* [Conditional checks and validation](#conditional-checks-and-validation) +* [Op registration](#op-registration) * [Attrs](#attrs) - * [Attr types](#attr_types) + * [Attr types](#attr-types) * [Polymorphism](#polymorphism) - * [Inputs and outputs](#inputs_and_outputs) - * [Backwards compatibility](#backwards_compatibility) -* [GPU support](#gpu_support) - * [Compiling the kernel for the GPU device](#compiling_the_kernel_for_the_gpu_device) -* [Implement the gradient in Python](#implement_the_gradient_in_python) -* [Shape functions in C++](#shape_functions_in_c) + * [Inputs and outputs](#inputs-and-outputs) + * [Backwards compatibility](#backwards-compatibility) +* [GPU support](#gpu-support) + * [Compiling the kernel for the GPU device](#compiling-the-kernel-for-the-gpu-device) +* [Implement the gradient in Python](#implement-the-gradient-in-python) +* [Shape functions in C++](#shape-functions-in-c) ### Conditional checks and validation -- GitLab From 6fecbc39f37643f30ebd0681240b2c2fdede5b09 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 10:32:07 -0800 Subject: [PATCH 0574/1801] Added tests for tensorflow::StringPiece::Hasher. PiperOrigin-RevId: 176127449 --- tensorflow/core/lib/core/stringpiece_test.cc | 72 ++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/tensorflow/core/lib/core/stringpiece_test.cc b/tensorflow/core/lib/core/stringpiece_test.cc index ad70d41873..11554554e8 100644 --- a/tensorflow/core/lib/core/stringpiece_test.cc +++ b/tensorflow/core/lib/core/stringpiece_test.cc @@ -14,6 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/lib/core/stringpiece.h" + +#include #include "tensorflow/core/platform/test.h" namespace tensorflow { @@ -63,4 +65,74 @@ TEST(StringPiece, Contains) { EXPECT_TRUE(!a.contains(d)); } +TEST(StringPieceHasher, Equality) { + StringPiece::Hasher hasher; + + StringPiece s1("foo"); + StringPiece s2("bar"); + StringPiece s3("baz"); + StringPiece s4("zot"); + + EXPECT_TRUE(hasher(s1) != hasher(s2)); + EXPECT_TRUE(hasher(s1) != hasher(s3)); + EXPECT_TRUE(hasher(s1) != hasher(s4)); + EXPECT_TRUE(hasher(s2) != hasher(s3)); + EXPECT_TRUE(hasher(s2) != hasher(s4)); + EXPECT_TRUE(hasher(s3) != hasher(s4)); + + EXPECT_TRUE(hasher(s1) == hasher(s1)); + EXPECT_TRUE(hasher(s2) == hasher(s2)); + EXPECT_TRUE(hasher(s3) == hasher(s3)); + EXPECT_TRUE(hasher(s4) == hasher(s4)); +} + +TEST(StringPieceHasher, HashMap) { + string s1("foo"); + string s2("bar"); + string s3("baz"); + + StringPiece p1(s1); + StringPiece p2(s2); + StringPiece p3(s3); + + std::unordered_map map; + + map.insert(std::make_pair(p1, 0)); + map.insert(std::make_pair(p2, 1)); + map.insert(std::make_pair(p3, 2)); + EXPECT_EQ(map.size(), 3); + + bool found[3] = {false, false, false}; + for (auto const& val : map) { + int x = val.second; + EXPECT_TRUE(x >= 0 && x < 3); + EXPECT_TRUE(!found[x]); + found[x] = true; + } + EXPECT_EQ(found[0], true); + EXPECT_EQ(found[1], true); + EXPECT_EQ(found[2], true); + + auto new_iter = map.find("zot"); + EXPECT_TRUE(new_iter == map.end()); + + new_iter = map.find("bar"); + EXPECT_TRUE(new_iter != map.end()); + + map.erase(new_iter); + EXPECT_EQ(map.size(), 2); + + found[0] = false; + found[1] = false; + found[2] = false; + for (const auto& iter : map) { + int x = iter.second; + EXPECT_TRUE(x >= 0 && x < 3); + EXPECT_TRUE(!found[x]); + found[x] = true; + } + EXPECT_EQ(found[0], true); + EXPECT_EQ(found[1], false); + EXPECT_EQ(found[2], true); +} } // namespace tensorflow -- GitLab From fedb844013194539e23cb971df793b4029396c2f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 10:44:57 -0800 Subject: [PATCH 0575/1801] Throw error if context exists but graph is empty when enabling eager mode. PiperOrigin-RevId: 176129497 --- tensorflow/python/framework/ops.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 6ac3b862c8..7cca260d73 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -4703,6 +4703,9 @@ def enable_eager_execution(config=None, device_policy=None): " policy: %s." % (config, context._context._config, device_policy, context._context._device_policy)) + else: + raise ValueError( + "tfe.enable_eager_execution has to be called at program startup.") def eager_run(main=None, argv=None): -- GitLab From de1cd503235a32ec216533d198dd6f6318655ab2 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Fri, 17 Nov 2017 10:58:28 -0800 Subject: [PATCH 0576/1801] Minor tf_session.i refactoring. Moves inline C++ helper functions to top of file, and adds CreateWrappedTFOutput and CreateWrappedTFOperation helper functions (this is pulling out existing functionality that will be useful moving forward). PiperOrigin-RevId: 176131555 --- tensorflow/python/client/tf_session.i | 82 +++++++++++++++------------ 1 file changed, 45 insertions(+), 37 deletions(-) diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i index 40731aba7d..41c707ae63 100644 --- a/tensorflow/python/client/tf_session.i +++ b/tensorflow/python/client/tf_session.i @@ -24,6 +24,49 @@ limitations under the License. #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/public/version.h" +// Helper function to convert a Python list of Tensors to a C++ vector of +// TF_Outputs. +// +// Returns true if successful. Otherwise, returns false and sets error_msg. +bool PyTensorListToVector(PyObject* py_tensor_list, + std::vector* vec, + string* error_msg) { + if (!PyList_Check(py_tensor_list)) { + *error_msg = "expected Python list."; + return false; + } + size_t size = PyList_Size(py_tensor_list); + for (int i = 0; i < size; ++i) { + PyObject* item = PyList_GetItem(py_tensor_list, i); + TF_Output* input_ptr; + if (!SWIG_IsOK(SWIG_ConvertPtr(item, reinterpret_cast(&input_ptr), + SWIGTYPE_p_TF_Output, 0))) { + *error_msg = "expected Python list of wrapped TF_Output objects. " + "Found python list of something else."; + return false; + } + vec->push_back(*input_ptr); + } + return true; +} + +// Helper function to convert a TF_Output to a wrapped TF_Output Python object. +PyObject* CreateWrappedTFOutput(TF_Output tf_output) { + // We used heap-allocated pointers in the Python runtime (this is what SWIG + // generates by default for functions returning TF_Output). + TF_Output* tf_output_ptr = new TF_Output(tf_output); + // Use SWIG_POINTER_OWN so the TF_Output* is deleted by Python. + return SWIG_NewPointerObj(tf_output_ptr, SWIGTYPE_p_TF_Output, + SWIG_POINTER_OWN); +} + +// Helper function to convert a TF_Operation to a wrapped TF_Operation Python +// object. +PyObject* CreateWrappedTFOperation(TF_Operation* tf_operation) { + // No flags since operation is owned by TF_Graph. + return SWIG_NewPointerObj(tf_operation, SWIGTYPE_p_TF_Operation, 0); +} + %} %include "tensorflow/python/client/tf_sessionrun_wrapper.i" @@ -98,8 +141,7 @@ tensorflow::ImportNumpy(); } for (size_t i = 0; i < $1.size(); ++i) { - PyList_SET_ITEM($result, i, SWIG_NewPointerObj( - $1[i], SWIGTYPE_p_TF_Operation, 0)); + PyList_SET_ITEM($result, i, CreateWrappedTFOperation($1[i])); } } @@ -118,13 +160,7 @@ tensorflow::ImportNumpy(); // Unwrap the generated SwigValueWrapper> via & std::vector* tf_outputs = &$1; for (size_t i = 0; i < $1.size(); ++i) { - // We used wrapped heap-allocated pointers in the Python runtime (this is - // what SWIG generates by default for functions returning TF_Output). - TF_Output* tf_output_ptr = new TF_Output((*tf_outputs)[i]); - // Use SWIG_POINTER_OWN so the TF_Output* is deleted by Python. - PyList_SET_ITEM($result, i, - SWIG_NewPointerObj(tf_output_ptr, SWIGTYPE_p_TF_Output, - SWIG_POINTER_OWN)); + PyList_SET_ITEM($result, i, CreateWrappedTFOutput((*tf_outputs)[i])); } } @@ -268,34 +304,6 @@ tensorflow::ImportNumpy(); reinterpret_cast($1.data), $1.length); } -%inline %{ -// Helper function to convert a Python list of Tensors to a C++ vector of -// TF_Outputs. -// -// Returns true if successful. Otherwise, returns false and sets error_msg. -bool PyTensorListToVector(PyObject* py_tensor_list, - std::vector* vec, - string* error_msg) { - if (!PyList_Check(py_tensor_list)) { - *error_msg = "expected Python list."; - return false; - } - size_t size = PyList_Size(py_tensor_list); - for (int i = 0; i < size; ++i) { - PyObject* item = PyList_GetItem(py_tensor_list, i); - TF_Output* input_ptr; - if (!SWIG_IsOK(SWIG_ConvertPtr(item, reinterpret_cast(&input_ptr), - SWIGTYPE_p_TF_Output, 0))) { - *error_msg = "expected Python list of wrapped TF_Output objects. " - "Found python list of something else."; - return false; - } - vec->push_back(*input_ptr); - } - return true; -} -%} - // Converts input Python list of wrapped TF_Outputs into a single array %typemap(in) (const TF_Output* inputs, int num_inputs) (std::vector inputs) { -- GitLab From 684c02d91116022bbceea13fc4a0cff9267d8534 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 11:14:12 -0800 Subject: [PATCH 0577/1801] Add g3doc for the ASR LM model. PiperOrigin-RevId: 176134219 --- .../contrib/lite/models/testdata/g3doc/README.md | 13 +++++++++++++ .../contrib/lite/models/testdata/g3doc/asr_lm.svg | 4 ++++ 2 files changed, 17 insertions(+) create mode 100644 tensorflow/contrib/lite/models/testdata/g3doc/asr_lm.svg diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/README.md b/tensorflow/contrib/lite/models/testdata/g3doc/README.md index d0c21d2833..da4802b07d 100644 --- a/tensorflow/contrib/lite/models/testdata/g3doc/README.md +++ b/tensorflow/contrib/lite/models/testdata/g3doc/README.md @@ -61,6 +61,19 @@ the corresponding parameters as shown in the figure. ![asr_am_model](asr_am.svg "ASR AM model") +### Automatic Speech Recognizer (ASR) Language Model (LM) + +The language model for automatic speech recognition is the neural network model +for predicting the probability of a word given previous words in a sentence. +It generates posterior probabilities of the next word based from a sequence of +words. The words are encoded as indices in a fixed size dictionary. +The model has two inputs both of size one (integer), an output size of one +(float). It consits of three embedding layer, three LSTM layers, followed by a +multiplication, a fully connected layers and an addition. +The corresponding parameters as shown in the figure. + +![asr_lm_model](asr_lm.svg "ASR LM model") + ## Speech models test input/output generation As mentioned above the input to models are generated from a pre-processing diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/asr_lm.svg b/tensorflow/contrib/lite/models/testdata/g3doc/asr_lm.svg new file mode 100644 index 0000000000..84d5f95b6a --- /dev/null +++ b/tensorflow/contrib/lite/models/testdata/g3doc/asr_lm.svg @@ -0,0 +1,4 @@ + + + + -- GitLab From 90ec1d476073276f7039a832afa18f6bd12b9376 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 17 Nov 2017 11:29:13 -0800 Subject: [PATCH 0578/1801] Portable path parsing in speech_commands/input_data.py (#14519) * Portable path parsing in speech_commands/input_data.py Convert some code using a regular expression with `'/'` characters into code that uses `os.path` methods and handles `'\'` characters in Windows paths. * Move the `.lower()` to the proper spot. --- tensorflow/examples/speech_commands/input_data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/examples/speech_commands/input_data.py b/tensorflow/examples/speech_commands/input_data.py index 6d75fbb92b..751652b330 100644 --- a/tensorflow/examples/speech_commands/input_data.py +++ b/tensorflow/examples/speech_commands/input_data.py @@ -240,7 +240,8 @@ class AudioProcessor(object): # Look through all the subfolders to find audio samples search_path = os.path.join(self.data_dir, '*', '*.wav') for wav_path in gfile.Glob(search_path): - word = re.search('.*/([^/]+)/.*.wav', wav_path).group(1).lower() + _, word = os.path.split(os.path.dirname(wav_path)) + word = word.lower() # Treat the '_background_noise_' folder as a special case, since we expect # it to contain long audio samples we mix in to improve training. if word == BACKGROUND_NOISE_DIR_NAME: -- GitLab From 9c55a55cbcc4a8da8f840ff834d2b511a2025ec6 Mon Sep 17 00:00:00 2001 From: Karl Lessard Date: Fri, 17 Nov 2017 14:29:51 -0500 Subject: [PATCH 0579/1801] Remove obsolete Input interface (#14517) --- .../src/main/java/org/tensorflow/Input.java | 48 ------------------- 1 file changed, 48 deletions(-) delete mode 100644 tensorflow/java/src/main/java/org/tensorflow/Input.java diff --git a/tensorflow/java/src/main/java/org/tensorflow/Input.java b/tensorflow/java/src/main/java/org/tensorflow/Input.java deleted file mode 100644 index 13bc463e7d..0000000000 --- a/tensorflow/java/src/main/java/org/tensorflow/Input.java +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -package org.tensorflow; - -/** - * Interface implemented by operands of a TensorFlow operation. - * - *

    Example usage: - * - *

    {@code
    - * // The "decodeJpeg" operation can be used as input to the "cast" operation
    - * Input decodeJpeg = ops.image().decodeJpeg(...);
    - * ops.math().cast(decodeJpeg, DataType.FLOAT);
    - *
    - * // The output "y" of the "unique" operation can be used as input to the "cast" operation
    - * Output y = ops.array().unique(...).y();
    - * ops.math().cast(y, DataType.FLOAT);
    - *
    - * // The "split" operation can be used as input list to the "concat" operation
    - * Iterable split = ops.array().split(...);
    - * ops.array().concat(0, split);
    - * }
    - */ -public interface Input { - - /** - * Returns the symbolic handle of a tensor. - * - *

    Inputs to TensorFlow operations are outputs of another TensorFlow operation. This method is - * used to obtain a symbolic handle that represents the computation of the input. - * - * @see OperationBuilder#addInput(Output) - */ - Output asOutput(); -} -- GitLab From b8bef6e6c89931768ac1f6b28d834d359e761410 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 17 Nov 2017 11:26:54 -0800 Subject: [PATCH 0580/1801] Rename optimal to optimal_seconds in HloCostAnalysis etc. PiperOrigin-RevId: 176136105 --- .../compiler/xla/service/hlo_cost_analysis.cc | 20 +++++++++---------- .../compiler/xla/service/hlo_cost_analysis.h | 6 +++--- .../xla/service/hlo_cost_analysis_test.cc | 2 +- .../xla/service/hlo_execution_profile.cc | 2 +- .../xla/service/hlo_profile_printer.cc | 2 +- .../xla/service/hlo_profile_printer.h | 2 +- 6 files changed, 17 insertions(+), 17 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc index 1877065f67..a24457edbf 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc @@ -28,7 +28,7 @@ namespace xla { constexpr char HloCostAnalysis::kFlopsKey[]; constexpr char HloCostAnalysis::kTranscendentalsKey[]; constexpr char HloCostAnalysis::kBytesAccessedKey[]; -constexpr char HloCostAnalysis::kSecondsKey[]; +constexpr char HloCostAnalysis::kOptimalSecondsKey[]; HloCostAnalysis::HloCostAnalysis(const ShapeSizeFunction& shape_size) : HloCostAnalysis(shape_size, {}) {} @@ -60,16 +60,16 @@ Status HloCostAnalysis::Postprocess(const HloInstruction* hlo) { if (current_should_compute_bottleneck_time_) { // Compute the time as the time of the bottleneck, i.e. the slowest property // given the per-second rate of each property. - float max_seconds = 0.0f; + float optimal_seconds = 0.0f; for (const auto& property : current_properties_) { - if (property.first != kSecondsKey) { - max_seconds = std::max( - max_seconds, + if (property.first != kOptimalSecondsKey) { + optimal_seconds = std::max( + optimal_seconds, property.second / GetProperty(property.first, per_second_rates_, INFINITY)); } } - current_properties_[kSecondsKey] = max_seconds; + current_properties_[kOptimalSecondsKey] = optimal_seconds; } TF_RET_CHECK(hlo_properties_.emplace(hlo, current_properties_).second); @@ -496,8 +496,8 @@ float HloCostAnalysis::bytes_accessed() const { return GetProperty(kBytesAccessedKey, properties_sum_); } -float HloCostAnalysis::seconds() const { - return GetProperty(kSecondsKey, properties_sum_); +float HloCostAnalysis::optimal_seconds() const { + return GetProperty(kOptimalSecondsKey, properties_sum_); } int64 HloCostAnalysis::flop_count(const HloInstruction& hlo) const { @@ -512,8 +512,8 @@ int64 HloCostAnalysis::bytes_accessed(const HloInstruction& hlo) const { return GetPropertyForHlo(hlo, kBytesAccessedKey, hlo_properties_); } -float HloCostAnalysis::seconds(const HloInstruction& hlo) const { - return GetPropertyForHlo(hlo, kSecondsKey, hlo_properties_); +float HloCostAnalysis::optimal_seconds(const HloInstruction& hlo) const { + return GetPropertyForHlo(hlo, kOptimalSecondsKey, hlo_properties_); } StatusOr HloCostAnalysis::ProcessSubcomputation( diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.h b/tensorflow/compiler/xla/service/hlo_cost_analysis.h index 0f44775378..e785596c8e 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.h +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.h @@ -42,7 +42,7 @@ class HloCostAnalysis : public ConstDfsHloVisitor { static constexpr char kFlopsKey[] = "flops"; static constexpr char kTranscendentalsKey[] = "transcendentals"; static constexpr char kBytesAccessedKey[] = "bytes accessed"; - static constexpr char kSecondsKey[] = "seconds"; + static constexpr char kOptimalSecondsKey[] = "optimal_seconds"; // shape_size is a function which returns the size in bytes of the top-level // buffer of a shape. @@ -118,14 +118,14 @@ class HloCostAnalysis : public ConstDfsHloVisitor { float flop_count() const; float transcendental_count() const; float bytes_accessed() const; - float seconds() const; + float optimal_seconds() const; // Returns the respective cost computed for a particular HLO instruction, or 0 // if the HLO was not found to have a cost in the analysis. int64 flop_count(const HloInstruction& hlo) const; int64 transcendental_count(const HloInstruction& hlo) const; int64 bytes_accessed(const HloInstruction& hlo) const; - float seconds(const HloInstruction& hlo) const; + float optimal_seconds(const HloInstruction& hlo) const; const Properties& properties() const { return properties_sum_; } const float property(const string& key) const { diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc index 0eaa21ef25..3b289c240a 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc @@ -389,7 +389,7 @@ TEST_F(FusionCostAnalysis, LoopFusion) { static_assert(bytes_accessed == 64, ""); EXPECT_EQ(fusion_analysis.bytes_accessed(), bytes_accessed); - EXPECT_EQ(fusion_analysis.seconds(), 1 << i); + EXPECT_EQ(fusion_analysis.optimal_seconds(), 1 << i); } } diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc index 755374b91d..9e256b9b37 100644 --- a/tensorflow/compiler/xla/service/hlo_execution_profile.cc +++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc @@ -83,7 +83,7 @@ static HloProfilePrinter CreateOwnedHloProfilePrinter( instruction_info->transcendental_count = cost_analysis.transcendental_count(*hlo); instruction_info->bytes_accessed = cost_analysis.bytes_accessed(*hlo); - instruction_info->seconds = cost_analysis.seconds(*hlo); + instruction_info->optimal_seconds = cost_analysis.optimal_seconds(*hlo); instruction_info->profile_index = hlo_profile_index_map.GetProfileIndexFor(*hlo); CHECK_LT(instruction_info->profile_index, max_profile_index); diff --git a/tensorflow/compiler/xla/service/hlo_profile_printer.cc b/tensorflow/compiler/xla/service/hlo_profile_printer.cc index 071c5a6629..e944ad1513 100644 --- a/tensorflow/compiler/xla/service/hlo_profile_printer.cc +++ b/tensorflow/compiler/xla/service/hlo_profile_printer.cc @@ -50,7 +50,7 @@ string HloProfilePrinter::ToString(const int64* counters, /*short_name=*/instruction->short_name, instruction->category, counters[instruction->profile_index], instruction->flop_count, instruction->transcendental_count, instruction->bytes_accessed, - instruction->seconds); + instruction->optimal_seconds); } result += builder.ToString(); diff --git a/tensorflow/compiler/xla/service/hlo_profile_printer.h b/tensorflow/compiler/xla/service/hlo_profile_printer.h index 45921c66f6..316753a82a 100644 --- a/tensorflow/compiler/xla/service/hlo_profile_printer.h +++ b/tensorflow/compiler/xla/service/hlo_profile_printer.h @@ -41,7 +41,7 @@ class HloProfilePrinter { float flop_count; float transcendental_count; float bytes_accessed; - float seconds; + float optimal_seconds; // The index into the profile counters array for the HloInstruction // corresponding to this HloInstructionInfo. -- GitLab From 9dd9246d76aeada08f07d8c9550d7eedb0809713 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 12:08:27 -0800 Subject: [PATCH 0581/1801] Open-sourcing AddSign and PowerSign optimizers, found in Neural Optimizer Search with Reinforcement Learning [Bello et al, ICML2017] PiperOrigin-RevId: 176142062 --- tensorflow/contrib/opt/BUILD | 51 ++++ tensorflow/contrib/opt/__init__.py | 5 +- .../contrib/opt/python/training/addsign.py | 169 +++++++++++ .../opt/python/training/addsign_test.py | 262 +++++++++++++++++ .../contrib/opt/python/training/powersign.py | 173 +++++++++++ .../opt/python/training/powersign_test.py | 268 ++++++++++++++++++ .../contrib/opt/python/training/sign_decay.py | 158 +++++++++++ .../opt/python/training/sign_decay_test.py | 110 +++++++ tensorflow/core/kernels/training_ops.cc | 250 +++++++++++++++- tensorflow/core/kernels/training_ops.h | 23 ++ .../core/kernels/training_ops_gpu.cu.cc | 74 +++++ tensorflow/core/kernels/training_ops_test.cc | 74 +++++ tensorflow/core/ops/training_ops.cc | 137 +++++++++ tensorflow/core/ops/training_ops_test.cc | 34 +++ 14 files changed, 1786 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/opt/python/training/addsign.py create mode 100644 tensorflow/contrib/opt/python/training/addsign_test.py create mode 100644 tensorflow/contrib/opt/python/training/powersign.py create mode 100644 tensorflow/contrib/opt/python/training/powersign_test.py create mode 100644 tensorflow/contrib/opt/python/training/sign_decay.py create mode 100644 tensorflow/contrib/opt/python/training/sign_decay_test.py diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD index 096d2270e4..8c46becf2c 100644 --- a/tensorflow/contrib/opt/BUILD +++ b/tensorflow/contrib/opt/BUILD @@ -14,11 +14,14 @@ py_library( name = "opt_py", srcs = [ "__init__.py", + "python/training/addsign.py", "python/training/drop_stale_gradient_optimizer.py", "python/training/external_optimizer.py", "python/training/lazy_adam_optimizer.py", "python/training/moving_average_optimizer.py", "python/training/nadam_optimizer.py", + "python/training/powersign.py", + "python/training/sign_decay.py", "python/training/variable_clipping_optimizer.py", ], srcs_version = "PY2AND3", @@ -152,6 +155,54 @@ tf_py_test( ], ) +py_test( + name = "sign_decay_test", + srcs = ["python/training/sign_decay_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":opt_py", + "//tensorflow/python:client_testlib", + ], +) + +py_test( + name = "addsign_test", + srcs = ["python/training/addsign_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":opt_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:resource_variable_ops", + "//tensorflow/python:session", + "//tensorflow/python:variables", + "//third_party/py/numpy", + ], +) + +py_test( + name = "powersign_test", + srcs = ["python/training/powersign_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":opt_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:resource_variable_ops", + "//tensorflow/python:session", + "//tensorflow/python:variables", + "//third_party/py/numpy", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py index e194fa2d4d..caf22536bb 100644 --- a/tensorflow/contrib/opt/__init__.py +++ b/tensorflow/contrib/opt/__init__.py @@ -19,12 +19,14 @@ from __future__ import division from __future__ import print_function # pylint: disable=wildcard-import +from tensorflow.contrib.opt.python.training.addsign import * from tensorflow.contrib.opt.python.training.drop_stale_gradient_optimizer import * from tensorflow.contrib.opt.python.training.external_optimizer import * from tensorflow.contrib.opt.python.training.lazy_adam_optimizer import * -from tensorflow.contrib.opt.python.training.nadam_optimizer import * from tensorflow.contrib.opt.python.training.moving_average_optimizer import * from tensorflow.contrib.opt.python.training.nadam_optimizer import * +from tensorflow.contrib.opt.python.training.nadam_optimizer import * +from tensorflow.contrib.opt.python.training.powersign import * from tensorflow.contrib.opt.python.training.variable_clipping_optimizer import * # pylint: enable=wildcard-import @@ -32,6 +34,7 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ + 'PowerSignOptimizer', 'AddSignOptimizer' 'DelayCompensatedGradientDescentOptimizer', 'DropStaleGradientOptimizer', 'ExternalOptimizerInterface', 'LazyAdamOptimizer', 'NadamOptimizer', 'MovingAverageOptimizer', diff --git a/tensorflow/contrib/opt/python/training/addsign.py b/tensorflow/contrib/opt/python/training/addsign.py new file mode 100644 index 0000000000..729e59cb0a --- /dev/null +++ b/tensorflow/contrib/opt/python/training/addsign.py @@ -0,0 +1,169 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Implementation of AddSign.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.training import optimizer +from tensorflow.python.training import training_ops + + +class AddSignOptimizer(optimizer.Optimizer): + """Optimizer that implements the AddSign update. + + See Neural Optimizer Search with Reinforcement Learning + [Bello et al., ICML2017]. + """ + + def __init__(self, + learning_rate=0.1, + alpha=1.0, + beta=0.9, + sign_decay_fn=None, + use_locking=False, + name='AddSignOptimizer'): + """Constructs a new AddSignOptimizer object. + + Initialization: + + m_0 <- 0 (Initialize initial 1st moment vector) + t <- 0 (Initialize timestep) + ``` + + Update: + + ``` + t <- t + 1 + m_t <- beta1 * m_{t-1} + (1 - beta1) * g + sign_decay <- sign_decay(t) + update <- (alpha + sign_decay * sign(g) *sign(m)) * g + variable <- variable - lr_t * update + ``` + + Example for AddSign-ld (AddSign with linear sign decay) + ``` + decay_steps = 1000 + linear_decay_fn = sign_decays.get_linear_decay_fn(decay_steps) + opt = AddSignOptimizer(learning_rate=0.1, sign_decay_fn=linear_decay_fn) + ``` + + Args: + learning_rate: learning_rate used when taking a step. + alpha: alpha used in optimizer. + beta: decay used for computing the moving average m. + sign_decay_fn: decay function applied to the sign(g*m) quantity. + Takes global_step as an argument and returns the quantity to multiply + the sign(g*m) by. + compute (1.0 + alpha * decay * sign(g) * sign(m)) * m. + use_locking: If True use locks for update operations. + name: Optional name for the operations created when applying gradients. + Defaults to "AddSignOptimizer". + """ + super(AddSignOptimizer, self).__init__(use_locking, name) + self._lr = learning_rate + self._alpha = alpha + self._beta = beta + + self._sign_decay_fn = sign_decay_fn + + # Tensor versions of the constructor arguments, created in _prepare(). + self._lr_t = None + self._alpha_t = None + self._beta_t = None + + def apply_gradients(self, grads_and_vars, global_step=None, name=None): + if self._sign_decay_fn is not None: + self._sign_decay_t = ops.convert_to_tensor( + self._sign_decay_fn(global_step), name='sign_decay') + return super(AddSignOptimizer, self).apply_gradients( + grads_and_vars, global_step=global_step, name=name) + + def _create_slots(self, var_list): + # Create slots for the first moment. + for v in var_list: + self._zeros_slot(v, 'm', self._name) + + def _prepare(self): + self._lr_t = ops.convert_to_tensor(self._lr, name='learning_rate') + self._beta_t = ops.convert_to_tensor(self._beta, name='beta') + self._alpha_t = ops.convert_to_tensor(self._alpha, name='alpha') + if self._sign_decay_fn is None: + self._sign_decay_t = ops.convert_to_tensor(1.0, name='sign_decay') + + def _apply_dense(self, grad, var): + m = self.get_slot(var, 'm') + return training_ops.apply_add_sign( + var, + m, + math_ops.cast(self._lr_t, var.dtype.base_dtype), + math_ops.cast(self._alpha_t, var.dtype.base_dtype), + math_ops.cast(self._sign_decay_t, var.dtype.base_dtype), + math_ops.cast(self._beta_t, var.dtype.base_dtype), + grad, + use_locking=self._use_locking).op + + def _resource_apply_dense(self, grad, var): + m = self.get_slot(var, 'm') + return training_ops.resource_apply_add_sign( + var.handle, + m.handle, + math_ops.cast(self._lr_t, var.dtype.base_dtype), + math_ops.cast(self._alpha_t, var.dtype.base_dtype), + math_ops.cast(self._sign_decay_t, var.dtype.base_dtype), + math_ops.cast(self._beta_t, var.dtype.base_dtype), + grad, + use_locking=self._use_locking) + + def _apply_sparse(self, grad, var): + lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) + alpha_t = math_ops.cast(self._alpha_t, var.dtype.base_dtype) + beta_t = math_ops.cast(self._beta_t, var.dtype.base_dtype) + + m = self.get_slot(var, 'm') + m_t = state_ops.assign( + m, (m * beta_t) + (grad * (1 - beta_t)), use_locking=self._use_locking) + + sign_g = ops.IndexedSlices( + math_ops.sign(grad.values), grad.indices, dense_shape=grad.dense_shape) + sign_gm = ops.IndexedSlices( + array_ops.gather(math_ops.sign(m_t), sign_g.indices) * sign_g.values, + sign_g.indices, + dense_shape=sign_g.dense_shape) + + sign_decayed = math_ops.cast( + self._sign_decay_t, var.dtype.base_dtype) + multiplier_values = alpha_t + sign_decayed * sign_gm.values + multiplier = ops.IndexedSlices( + multiplier_values, sign_gm.indices, dense_shape=sign_gm.dense_shape) + + final_update = ops.IndexedSlices( + lr_t * multiplier.values * grad.values, + multiplier.indices, + dense_shape=multiplier.dense_shape) + + var_update = state_ops.scatter_sub( + var, + final_update.indices, + final_update.values, + use_locking=self._use_locking) + + return control_flow_ops.group(* [var_update, m_t]) diff --git a/tensorflow/contrib/opt/python/training/addsign_test.py b/tensorflow/contrib/opt/python/training/addsign_test.py new file mode 100644 index 0000000000..bd19ee3e7a --- /dev/null +++ b/tensorflow/contrib/opt/python/training/addsign_test.py @@ -0,0 +1,262 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for AddSign.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.opt.python.training import addsign +from tensorflow.contrib.opt.python.training import sign_decay +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +def py_linear_decay_fn(decay_steps): + def linear_decay(step): + step = min(step, decay_steps) + return float(decay_steps - step) / decay_steps + return linear_decay + + +def addsign_update_numpy(params, + g_t, + m, + lr, + alpha=1.0, + beta=0.9, + py_sign_decay_fn=None, + t=None): + m_t = beta * m + (1 - beta) * g_t + if py_sign_decay_fn is None: + sign_decayed = 1.0 + else: + sign_decayed = py_sign_decay_fn(t-1) + multiplier = alpha + sign_decayed * np.sign(g_t) * np.sign(m_t) + params_t = params - lr * multiplier * g_t + return params_t, m_t + + +class AddSignTest(test.TestCase): + + def _testDense(self, + use_resource=False, + learning_rate=0.1, + sign_decay_fn=None, + py_sign_decay_fn=None, + alpha=1.0, + beta=0.9): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(use_gpu=True): + # Initialize variables for numpy implementation. + m0, m1 = 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable(var0_np) + var1 = resource_variable_ops.ResourceVariable(var1_np) + global_step = resource_variable_ops.ResourceVariable( + 0, trainable=False) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + global_step = variables.Variable( + 0, trainable=False) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + + opt = addsign.AddSignOptimizer( + learning_rate=learning_rate, + alpha=alpha, + beta=beta, + sign_decay_fn=sign_decay_fn, + ) + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]), + global_step=global_step) + neg_update = opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]), + global_step=global_step) + if context.in_graph_mode(): + self.evaluate(variables.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Run 7 steps of AddSign + # first 4 steps with positive gradient + # last 3 steps with negative gradient (sign(gm) should be -1) + for t in range(1, 8): + if t < 5: + if context.in_graph_mode(): + self.evaluate(update) + elif t > 1: + opt.apply_gradients(zip([grads0, grads1], [var0, var1]), + global_step=global_step) + else: + if context.in_graph_mode(): + self.evaluate(neg_update) + elif t > 1: + opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]), + global_step=global_step) + + var0_np, m0 = addsign_update_numpy( + var0_np, + grads0_np if t < 5 else -grads0_np, + m0, + learning_rate, + alpha=alpha, + beta=beta, + py_sign_decay_fn=py_sign_decay_fn, + t=t, + ) + var1_np, m1 = addsign_update_numpy( + var1_np, + grads1_np if t < 5 else -grads1_np, + m1, + learning_rate, + alpha=alpha, + beta=beta, + py_sign_decay_fn=py_sign_decay_fn, + t=t, + ) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + + def testDense(self): + decay_steps = 10 + sign_decay_fn = sign_decay.get_linear_decay_fn(decay_steps) + py_sign_decay_fn = py_linear_decay_fn(decay_steps) + self._testDense(use_resource=False) + self._testDense(use_resource=False, learning_rate=0.01, alpha=0.1, beta=0.8) + self._testDense(use_resource=False, + sign_decay_fn=sign_decay_fn, + py_sign_decay_fn=py_sign_decay_fn) + + self._testDense(use_resource=True) + self._testDense(use_resource=True, learning_rate=0.01, alpha=0.1, beta=0.8) + self._testDense(use_resource=True, + sign_decay_fn=sign_decay_fn, + py_sign_decay_fn=py_sign_decay_fn) + + def _testSparse(self, + use_resource=False, + learning_rate=0.1, + sign_decay_fn=None, + py_sign_decay_fn=None, + alpha=1.0, + beta=0.9): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(use_gpu=True): + # Initialize variables for numpy implementation. + m0, m1 = 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable(var0_np) + var1 = resource_variable_ops.ResourceVariable(var1_np) + global_step = resource_variable_ops.ResourceVariable( + 0, trainable=False) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + global_step = variables.Variable( + 0, trainable=False) + grads0_np_indices = np.array([0, 1], dtype=np.int32) + grads0 = ops.IndexedSlices( + constant_op.constant(grads0_np), + constant_op.constant(grads0_np_indices), constant_op.constant([2])) + grads1_np_indices = np.array([0, 1], dtype=np.int32) + grads1 = ops.IndexedSlices( + constant_op.constant(grads1_np), + constant_op.constant(grads1_np_indices), constant_op.constant([2])) + opt = addsign.AddSignOptimizer( + learning_rate=learning_rate, + alpha=alpha, + beta=beta, + sign_decay_fn=sign_decay_fn, + ) + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]), + global_step=global_step) + neg_update = opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]), + global_step=global_step) + variables.global_variables_initializer().run() + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + + # Run 7 steps of AddSign + # first 4 steps with positive gradient + # last 3 steps with negative gradient (sign(gm) should be -1) + for t in range(1, 4): + if t < 5: + update.run() + else: + neg_update.run() + + var0_np, m0 = addsign_update_numpy( + var0_np, + grads0_np, + m0, + learning_rate, + alpha=alpha, + beta=beta, + py_sign_decay_fn=py_sign_decay_fn, + t=t, + ) + var1_np, m1 = addsign_update_numpy( + var1_np, + grads1_np, + m1, + learning_rate, + alpha=alpha, + beta=beta, + py_sign_decay_fn=py_sign_decay_fn, + t=t, + ) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testSparse(self): + decay_steps = 10 + sign_decay_fn = sign_decay.get_linear_decay_fn(decay_steps) + py_sign_decay_fn = py_linear_decay_fn(decay_steps) + self._testSparse(use_resource=False) + self._testSparse(use_resource=False, + learning_rate=0.01, + alpha=0.1, + beta=0.8) + self._testSparse(use_resource=False, + sign_decay_fn=sign_decay_fn, + py_sign_decay_fn=py_sign_decay_fn) + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/opt/python/training/powersign.py b/tensorflow/contrib/opt/python/training/powersign.py new file mode 100644 index 0000000000..7f7521581f --- /dev/null +++ b/tensorflow/contrib/opt/python/training/powersign.py @@ -0,0 +1,173 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Implementation of PowerSign.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.training import optimizer +from tensorflow.python.training import training_ops + + +class PowerSignOptimizer(optimizer.Optimizer): + """Optimizer that implements the PowerSign update. + + See Neural Optimizer Search with Reinforcement Learning + [Bello et al., ICML2017]. + """ + + def __init__(self, + learning_rate=0.1, + base=math.e, + beta=0.9, + sign_decay_fn=None, + use_locking=False, + name='PowerSignOptimizer'): + """Constructs a new PowerSignOptimizer object. + + Initialization: + + ``` + m_0 <- 0 (Initialize initial 1st moment vector) + t <- 0 (Initialize timestep) + ``` + + Update: + + ``` + t <- t + 1 + m_t <- beta1 * m_{t-1} + (1 - beta1) * g + sign_decay <- sign_decay(t) + update <- base ** (sign_decay * sign(g) * sign(m)) * g + variable <- variable - lr_t * update + ``` + + Example usage for PowerSign-cd (PowerSign with cosine sign decay) + ``` + decay_steps = 1000 + linear_decay_fn = sign_decays.get_linear_decay_fn(decay_steps) + opt = PowerSignOptimizer(learning_rate=0.1, sign_decay_fn=linear_decay_fn) + ``` + + Args: + learning_rate: learning_rate used when taking a step. + base: base used in optimizer. + beta: decay used for computing the moving average m. + sign_decay_fn: decay function applied to the sign(g*m) quantity. + Takes global_step as an argument and returns the quantity to multiply + the sign(g*m) by. + use_locking: If True use locks for update operations. + name: Optional name for the operations created iwhen applying gradients. + Defaults to "PowerSignOptimizer". + """ + super(PowerSignOptimizer, self).__init__(use_locking, name) + self._lr = learning_rate + self._beta = beta + self._logbase = math.log(base) + + self._sign_decay_fn = sign_decay_fn + + # Tensor versions of the constructor arguments, created in _prepare(). + self._lr_t = None + self._beta_t = None + self._logbase_t = None + + def apply_gradients(self, grads_and_vars, global_step=None, name=None): + if self._sign_decay_fn is not None: + self._sign_decay_t = ops.convert_to_tensor( + self._sign_decay_fn(global_step), name='sign_decay') + return super(PowerSignOptimizer, self).apply_gradients( + grads_and_vars, global_step=global_step, name=name) + + def _create_slots(self, var_list): + # Create slots for the first moment. + for v in var_list: + self._zeros_slot(v, 'm', self._name) + + def _prepare(self): + self._lr_t = ops.convert_to_tensor(self._lr, name='learning_rate') + self._beta_t = ops.convert_to_tensor(self._beta, name='beta') + self._logbase_t = ops.convert_to_tensor(self._logbase, name='logbase') + if self._sign_decay_fn is None: + self._sign_decay_t = ops.convert_to_tensor(1.0, name='sign_decay') + + def _apply_dense(self, grad, var): + m = self.get_slot(var, 'm') + return training_ops.apply_power_sign( + var, + m, + math_ops.cast(self._lr_t, var.dtype.base_dtype), + math_ops.cast(self._logbase_t, var.dtype.base_dtype), + math_ops.cast(self._sign_decay_t, var.dtype.base_dtype), + math_ops.cast(self._beta_t, var.dtype.base_dtype), + grad, + use_locking=self._use_locking).op + + def _resource_apply_dense(self, grad, var): + m = self.get_slot(var, 'm') + return training_ops.resource_apply_power_sign( + var.handle, + m.handle, + math_ops.cast(self._lr_t, var.dtype.base_dtype), + math_ops.cast(self._logbase_t, var.dtype.base_dtype), + math_ops.cast(self._sign_decay_t, var.dtype.base_dtype), + math_ops.cast(self._beta_t, var.dtype.base_dtype), + grad, + use_locking=self._use_locking) + + def _apply_sparse(self, grad, var): + lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) + beta_t = math_ops.cast(self._beta_t, var.dtype.base_dtype) + logbase_t = math_ops.cast(self._logbase_t, var.dtype.base_dtype) + e_t = math_ops.cast(math.e, var.dtype.base_dtype) + + m = self.get_slot(var, 'm') + m_t = state_ops.assign( + m, (m * beta_t) + (grad * (1 - beta_t)), use_locking=self._use_locking) + + sign_g = ops.IndexedSlices( + math_ops.sign(grad.values), grad.indices, dense_shape=grad.dense_shape) + sign_gm = ops.IndexedSlices( + array_ops.gather(math_ops.sign(m_t), sign_g.indices) * sign_g.values, + sign_g.indices, + dense_shape=sign_g.dense_shape) + + sign_decayed = math_ops.cast( + self._sign_decay_t, var.dtype.base_dtype) + multiplier_values = math_ops.pow( + e_t, logbase_t * sign_decayed * sign_gm.values) + multiplier = ops.IndexedSlices( + multiplier_values, sign_gm.indices, dense_shape=sign_gm.dense_shape) + + final_update = ops.IndexedSlices( + lr_t * multiplier.values * grad.values, + multiplier.indices, + dense_shape=multiplier.dense_shape) + + var_update = state_ops.scatter_sub( + var, + final_update.indices, + final_update.values, + use_locking=self._use_locking) + + return control_flow_ops.group(* [var_update, m_t]) diff --git a/tensorflow/contrib/opt/python/training/powersign_test.py b/tensorflow/contrib/opt/python/training/powersign_test.py new file mode 100644 index 0000000000..ff7b1a72d4 --- /dev/null +++ b/tensorflow/contrib/opt/python/training/powersign_test.py @@ -0,0 +1,268 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for PowerSign.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import numpy as np + +from tensorflow.contrib.opt.python.training import powersign +from tensorflow.contrib.opt.python.training import sign_decay +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +def py_linear_decay_fn(decay_steps): + def linear_decay(step): + step = min(step, decay_steps) + return float(decay_steps - step) / decay_steps + return linear_decay + + +def powersign_update_numpy(params, + g_t, + m, + lr, + base=math.e, + beta=0.9, + py_sign_decay_fn=None, + t=None): + m_t = beta * m + (1 - beta) * g_t + if py_sign_decay_fn is None: + sign_decayed = 1.0 + else: + sign_decayed = py_sign_decay_fn(t-1) + multiplier = base ** (sign_decayed * np.sign(g_t) * np.sign(m_t)) + params_t = params - lr * multiplier * g_t + return params_t, m_t + + +class PowerSignTest(test.TestCase): + + def _testDense(self, + use_resource=False, + learning_rate=0.1, + sign_decay_fn=None, + py_sign_decay_fn=None, + base=math.e, + beta=0.9): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(use_gpu=True): + # Initialize variables for numpy implementation. + m0, m1 = 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable(var0_np) + var1 = resource_variable_ops.ResourceVariable(var1_np) + global_step = resource_variable_ops.ResourceVariable( + 0, trainable=False) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + global_step = variables.Variable( + 0, trainable=False) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + + opt = powersign.PowerSignOptimizer( + learning_rate=learning_rate, + base=base, + beta=beta, + sign_decay_fn=sign_decay_fn, + ) + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]), + global_step=global_step) + neg_update = opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]), + global_step=global_step) + + if context.in_graph_mode(): + self.evaluate(variables.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Run 7 steps of powersign + # first 4 steps with positive gradient + # last 3 steps with negative gradient (sign(gm) should be -1) + for t in range(1, 8): + if t < 5: + if context.in_graph_mode(): + self.evaluate(update) + elif t > 1: + opt.apply_gradients(zip([grads0, grads1], [var0, var1]), + global_step=global_step) + else: + if context.in_graph_mode(): + self.evaluate(neg_update) + elif t > 1: + opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]), + global_step=global_step) + + var0_np, m0 = powersign_update_numpy( + var0_np, + grads0_np if t < 5 else -grads0_np, + m0, + learning_rate, + base=base, + beta=beta, + py_sign_decay_fn=py_sign_decay_fn, + t=t, + ) + var1_np, m1 = powersign_update_numpy( + var1_np, + grads1_np if t < 5 else -grads1_np, + m1, + learning_rate, + base=base, + beta=beta, + py_sign_decay_fn=py_sign_decay_fn, + t=t, + ) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + + def testDense(self): + decay_steps = 10 + sign_decay_fn = sign_decay.get_linear_decay_fn(decay_steps) + py_sign_decay_fn = py_linear_decay_fn(decay_steps) + self._testDense(use_resource=False) + self._testDense(use_resource=False, + learning_rate=0.1, + base=10.0, + beta=0.8) + self._testDense(use_resource=False, + sign_decay_fn=sign_decay_fn, + py_sign_decay_fn=py_sign_decay_fn) + + self._testDense(use_resource=True) + self._testDense(use_resource=True, learning_rate=0.1, base=10.0, beta=0.8) + self._testDense(use_resource=True, + sign_decay_fn=sign_decay_fn, + py_sign_decay_fn=py_sign_decay_fn) + + def _testSparse(self, + use_resource=False, + learning_rate=0.1, + sign_decay_fn=None, + py_sign_decay_fn=None, + base=math.e, + beta=0.9): + with self.test_session(use_gpu=True): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + # Initialize variables for numpy implementation. + m0, m1 = 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable(var0_np) + var1 = resource_variable_ops.ResourceVariable(var1_np) + global_step = resource_variable_ops.ResourceVariable( + 0, trainable=False) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + global_step = variables.Variable( + 0, trainable=False) + grads0_np_indices = np.array([0, 1], dtype=np.int32) + grads0 = ops.IndexedSlices( + constant_op.constant(grads0_np), + constant_op.constant(grads0_np_indices), constant_op.constant([2])) + grads1_np_indices = np.array([0, 1], dtype=np.int32) + grads1 = ops.IndexedSlices( + constant_op.constant(grads1_np), + constant_op.constant(grads1_np_indices), constant_op.constant([2])) + opt = powersign.PowerSignOptimizer( + learning_rate=learning_rate, + base=base, + beta=beta, + sign_decay_fn=sign_decay_fn, + ) + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]), + global_step=global_step) + neg_update = opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]), + global_step=global_step) + variables.global_variables_initializer().run() + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + + # Run 3 steps of powersign + # first 4 steps with positive gradient + # last 3 steps with negative gradient (sign(gm) should be -1) + for t in range(1, 8): + if t < 5: + update.run() + else: + neg_update.run() + + var0_np, m0 = powersign_update_numpy( + var0_np, + grads0_np if t < 5 else -grads0_np, + m0, + learning_rate, + base=base, + beta=beta, + py_sign_decay_fn=py_sign_decay_fn, + t=t, + ) + var1_np, m1 = powersign_update_numpy( + var1_np, + grads1_np if t < 5 else -grads1_np, + m1, + learning_rate, + base=base, + beta=beta, + py_sign_decay_fn=py_sign_decay_fn, + t=t, + ) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testSparse(self): + decay_steps = 10 + sign_decay_fn = sign_decay.get_linear_decay_fn(decay_steps) + py_sign_decay_fn = py_linear_decay_fn(decay_steps) + self._testSparse(use_resource=False) + self._testSparse(use_resource=False, + learning_rate=0.01, + base=2.0, + beta=0.8) + self._testSparse(use_resource=False, + sign_decay_fn=sign_decay_fn, + py_sign_decay_fn=py_sign_decay_fn) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/opt/python/training/sign_decay.py b/tensorflow/contrib/opt/python/training/sign_decay.py new file mode 100644 index 0000000000..e8870c0721 --- /dev/null +++ b/tensorflow/contrib/opt/python/training/sign_decay.py @@ -0,0 +1,158 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Implementation of the sign decay functions used in PowerSign and AddSign. + +See [Bello et al., ICML 2017] Neural Optimizer Search with Reinforcement +Learning for details. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +from tensorflow.python.framework import constant_op +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops + + +def get_linear_decay_fn(decay_steps): + """Returns a function that computes a linear decay. + + This decay computes linear annealing: + max(0, (decay_steps - global_step) / decay_steps) + + Example usage: + ``` + decay_steps = 1000 + linear_decay_fn = get_linear_decay_fn(decay_steps) + decayed = linear_decay_fn(global_step) + x *= decayed + ``` + Args: + decay_steps: number of steps to decay over. + Returns: + linear_decay_fn: a function that computes the linear decay. + """ + # pylint:disable=missing-docstring + def linear_decay_fn(global_step): + if global_step is None: + raise ValueError("global_step is required for linear_decay.") + global_step = math_ops.minimum(global_step, decay_steps) + remaining_steps = math_ops.to_int32(decay_steps) - math_ops.to_int32( + global_step) + decayed = math_ops.to_float(remaining_steps) / math_ops.to_float( + decay_steps) + return math_ops.maximum(0.0, decayed) + # pylint:enable=missing-docstring + return linear_decay_fn + + +def get_cosine_decay_fn(decay_steps, num_periods=0.5, zero_after=None): + """Returns a function that computes a cosine decay. + + This decay computes cosine annealing: + 0.5 * (1.0 + cos(2.0 * pi * num_periods * global_step / decay_steps)) + + This decay can be used to decay the sign quantity in the AddSign and PowerSign + optimizers discovered in + [Bello et al., ICML 2017] Neural Optimizer Search with RL. + + Example usage: + ``` + decay_steps = 1000 + num_periods = 2 + cosine_decay_fn = get_cosine_decay_fn(decay_steps, num_periods=num_periods) + decayed = cosine_decay_fn(global_step) + x *= decayed + ``` + Args: + decay_steps: number of steps to decay over. + num_periods: number of periods for cosine signal. 0.5 by default, + which maps the last decay step to 0. + zero_after: if not None, number after which the decay function + will just return 0. + Returns: + cosine_decay_fn: a function that computes the cosine decay. + """ + # pylint:disable=missing-docstring + def cosine_decay_fn(global_step): + if global_step is None: + raise ValueError("global_step is required for cosine_decay.") + global_step = math_ops.minimum(global_step, decay_steps) + completed_fraction = math_ops.to_float(global_step) / math_ops.to_float( + decay_steps) + fraction = 2.0 * num_periods * completed_fraction + decayed = 0.5 * ( + 1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction)) + if zero_after is not None: + decayed = array_ops.where( + math_ops.greater_equal(fraction, 2 * zero_after), 0.0, decayed) + return decayed + # pylint:enable=missing-docstring + return cosine_decay_fn + + +def get_restart_decay_fn(decay_steps, num_periods=1, zero_after=None): + """Returns a function that computes a restart decay. + + This decay computes + 0.5 * (1.0 + cos(pi * (num_periods * global_step) % num_training_steps)) + + This is a simplified version of the restart decay introduced in + "SGDR: Stochastic Gradient Descent with Warm Restarts" + by Ilya Loshchilov & Frank Hutter, Proceedings of + ICLR'2017, available at https://arxiv.org/pdf/1608.03983.pdf + + This decay can be used to decay the sign quantity in the AddSign and PowerSign + optimizers discovered in + [Bello et al., ICML 2017] Neural Optimizer Search with RL. + + Example usage: + ``` + decay_steps = 1000 + num_periods = 2.0 + restart_decay_fn = get_restart_decay_fn(decay_steps, + num_periods=num_periods) + decayed = restart_decay_fn(global_step) + x *= decayed + ``` + Args: + decay_steps: number of steps to decay over. + num_periods: number of periods for cosine signal. 1 by default, + which maps the last decay step to 0. + zero_after: if not None, number after which the decay function + will return 0. + Returns: + restart_decay_fn: a function that computes the restart decay. + """ + # pylint:disable=missing-docstring + def restart_decay_fn(global_step): + if global_step is None: + raise ValueError("global_step is required for cosine_decay.") + global_step = math_ops.minimum(global_step, decay_steps) + num = math_ops.mod(num_periods * math_ops.to_float(global_step), + decay_steps) + fraction = num / math_ops.to_float(decay_steps) + decayed = 0.5 * ( + 1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction)) + if zero_after is not None: + tmp = math_ops.to_float( + num_periods * global_step) / math_ops.to_float(decay_steps) + decayed = array_ops.where( + math_ops.greater_equal(tmp, zero_after), 0.0, decayed) + return decayed + # pylint:enable=missing-docstring + return restart_decay_fn diff --git a/tensorflow/contrib/opt/python/training/sign_decay_test.py b/tensorflow/contrib/opt/python/training/sign_decay_test.py new file mode 100644 index 0000000000..c31cb924ea --- /dev/null +++ b/tensorflow/contrib/opt/python/training/sign_decay_test.py @@ -0,0 +1,110 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for sign_decay.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +from tensorflow.contrib.opt.python.training import sign_decay +from tensorflow.python.platform import test + + +def py_linear_decay_fn(decay_steps): + + def linear_decay(step): + step = min(step, decay_steps) + return float(decay_steps - step) / decay_steps + + return linear_decay + + +def py_cosine_decay_fn(decay_steps, num_periods=0.5, zero_after=None): + + def cosine_decay(step): + step = min(step, decay_steps) + fraction = 2.0 * num_periods * step / float(decay_steps) + if zero_after is not None and fraction >= 2 * zero_after: + return 0.0 + return 0.5 * (1.0 + math.cos(math.pi * fraction)) + + return cosine_decay + + +def py_restart_decay_fn(decay_steps, num_periods=1, zero_after=None): + + def restart_decay(step): + step = min(step, decay_steps) + tmp = num_periods * step / float(decay_steps) + fraction = ( + num_periods * step % decay_steps) / float(decay_steps) + if zero_after is not None and tmp >= zero_after: + return 0 + return 0.5 * (1.0 + math.cos(math.pi * fraction)) + + return restart_decay + + +class SignDecaysTest(test.TestCase): + + def testLinearDecay(self): + num_training_steps = 1000 + linear_decay_fn = sign_decay.get_linear_decay_fn(num_training_steps) + + for step in range(0, 1000, 100): + with self.test_session(): + tf_decayed = linear_decay_fn(step).eval() + py_decayed = py_linear_decay_fn(num_training_steps)(step) + self.assertAlmostEqual(tf_decayed, py_decayed, places=4) + + def testCosineDecay(self): + num_training_steps = 1000 + cosine_decay_fn = sign_decay.get_cosine_decay_fn(num_training_steps) + cosine_decay_2_fn = sign_decay.get_cosine_decay_fn( + num_training_steps, num_periods=5, zero_after=2) + + for step in range(0, 1000, 100): + with self.test_session(): + tf_decayed = cosine_decay_fn(step).eval() + py_decayed = py_cosine_decay_fn(num_training_steps)(step) + self.assertAlmostEqual(tf_decayed, py_decayed, places=4) + + tf_decayed = cosine_decay_2_fn(step).eval() + py_decayed = py_cosine_decay_fn( + num_training_steps, num_periods=5, zero_after=2)(step) + self.assertAlmostEqual(tf_decayed, py_decayed, places=4) + + def testRestartDecay(self): + num_training_steps = 1000 + restart_decay_fn = sign_decay.get_restart_decay_fn(num_training_steps) + restart_decay_2_fn = sign_decay.get_restart_decay_fn( + num_training_steps, num_periods=5, zero_after=2) + + for step in range(0, 1000, 100): + with self.test_session(): + tf_decayed = restart_decay_fn(step).eval() + py_decayed = py_restart_decay_fn(num_training_steps)(step) + self.assertAlmostEqual(tf_decayed, py_decayed, places=4) + + tf_decayed = restart_decay_2_fn(step).eval() + py_decayed = py_restart_decay_fn( + num_training_steps, num_periods=5, zero_after=2)(step) + self.assertAlmostEqual(tf_decayed, py_decayed, places=4) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc index 98dfa5a3dd..76c30c5a46 100644 --- a/tensorflow/core/kernels/training_ops.cc +++ b/tensorflow/core/kernels/training_ops.cc @@ -15,12 +15,13 @@ limitations under the License. #define EIGEN_USE_THREADS -#include "tensorflow/core/kernels/training_ops.h" #include + #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/kernels/bounds_check.h" #include "tensorflow/core/kernels/training_op_helpers.h" +#include "tensorflow/core/kernels/training_ops.h" #include "tensorflow/core/kernels/variable_ops.h" #ifdef TENSORFLOW_USE_SYCL @@ -361,6 +362,37 @@ struct ApplyCenteredRMSProp { } }; +template +struct ApplyAddSign { + void operator()(const CPUDevice& d, typename TTypes::Flat var, + typename TTypes::Flat m, + typename TTypes::ConstScalar lr, + typename TTypes::ConstScalar alpha, + typename TTypes::ConstScalar sign_decay, + typename TTypes::ConstScalar beta, + typename TTypes::ConstFlat grad) { + m.device(d) = m * beta() + grad * (static_cast(1) - beta()); + auto sign_gm = grad.sign() * m.sign(); + var.device(d) -= lr() * (alpha() + sign_decay() * sign_gm) * grad; + } +}; + +template +struct ApplyPowerSign { + void operator()(const CPUDevice& d, typename TTypes::Flat var, + typename TTypes::Flat m, + typename TTypes::ConstScalar lr, + typename TTypes::ConstScalar logbase, + typename TTypes::ConstScalar sign_decay, + typename TTypes::ConstScalar beta, + typename TTypes::ConstFlat grad) { + m.device(d) = m * beta() + grad * (static_cast(1) - beta()); + auto sign_gm = grad.sign() * m.sign(); + auto grad_scale = (logbase() * sign_decay() * sign_gm).exp(); + var.device(d) -= lr() * grad_scale * grad; + } +}; + } // namespace functor template @@ -3243,4 +3275,220 @@ REGISTER_KERNELS(double, int64); #undef REGISTER_KERNELS + +template +class ApplyAddSignOp : public OpKernel { + public: + explicit ApplyAddSignOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_)); + } + + void Compute(OpKernelContext* ctx) override { + auto locks = + MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_, {0, 1}); + + Tensor var; + OP_REQUIRES_OK(ctx, GetInputTensorFromVariable( + ctx, 0, use_exclusive_lock_, false, &var)); + Tensor m; + OP_REQUIRES_OK(ctx, GetInputTensorFromVariable( + ctx, 1, use_exclusive_lock_, false, &m)); + OP_REQUIRES( + ctx, var.IsInitialized(), + errors::FailedPrecondition( + "Attempting to use uninitialized variables: ", requested_input(0))); + OP_REQUIRES( + ctx, m.IsInitialized(), + errors::FailedPrecondition( + "Attempting to use uninitialized variables: ", requested_input(1))); + const Tensor& lr = ctx->input(2); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(lr.shape()), + errors::InvalidArgument("lr is not a scalar: ", + lr.shape().DebugString())); + const Tensor& alpha = ctx->input(3); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(alpha.shape()), + errors::InvalidArgument("alpha is not a scalar: ", + alpha.shape().DebugString())); + const Tensor& sign_decay = ctx->input(4); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(alpha.shape()), + errors::InvalidArgument("sign_decay is not a scalar: ", + sign_decay.shape().DebugString())); + const Tensor& beta = ctx->input(5); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(beta.shape()), + errors::InvalidArgument("beta is not a scalar: ", + beta.shape().DebugString())); + const Tensor& grad = ctx->input(6); + OP_REQUIRES(ctx, var.shape().IsSameSize(m.shape()), + errors::InvalidArgument("var and m do not have the same shape", + var.shape().DebugString(), " ", + m.shape().DebugString())); + OP_REQUIRES( + ctx, var.shape().IsSameSize(grad.shape()), + errors::InvalidArgument("var and grad do not have the same shape", + var.shape().DebugString(), " ", + grad.shape().DebugString())); + + const Device& device = ctx->template eigen_device(); + functor::ApplyAddSign()( + device, var.flat(), m.flat(), lr.scalar(), alpha.scalar(), + sign_decay.scalar(), beta.scalar(), grad.flat()); + MaybeForwardRefInputToRefOutput(ctx, 0, 0); + } + + private: + bool use_exclusive_lock_; +}; + +#define REGISTER_KERNELS(D, T) \ + REGISTER_KERNEL_BUILDER( \ + Name("ApplyAddSign").Device(DEVICE_##D).TypeConstraint("T"), \ + ApplyAddSignOp); \ + REGISTER_KERNEL_BUILDER(Name("ResourceApplyAddSign") \ + .Device(DEVICE_##D) \ + .HostMemory("var") \ + .HostMemory("m") \ + .TypeConstraint("T"), \ + ApplyAddSignOp); +#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); + +TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_float(REGISTER_CPU_KERNELS); +TF_CALL_double(REGISTER_CPU_KERNELS); + +#if GOOGLE_CUDA +// Forward declarations of the functor specializations for GPU. +namespace functor { +#define DECLARE_GPU_SPEC(T) \ + template <> \ + void ApplyAddSign::operator()( \ + const GPUDevice& d, \ + typename TTypes::Flat var, \ + typename TTypes::Flat m, \ + typename TTypes::ConstScalar lr, \ + typename TTypes::ConstScalar alpha, \ + typename TTypes::ConstScalar sign_decay, \ + typename TTypes::ConstScalar beta, \ + typename TTypes::ConstFlat grad); \ + extern template struct ApplyAddSign; +DECLARE_GPU_SPEC(Eigen::half); +DECLARE_GPU_SPEC(float); +DECLARE_GPU_SPEC(double); +#undef DECLARE_GPU_SPEC +} // namespace functor + +REGISTER_KERNELS(GPU, Eigen::half); +REGISTER_KERNELS(GPU, float); +REGISTER_KERNELS(GPU, double); +#endif +#undef REGISTER_CPU_KERNELS +#undef REGISTER_KERNELS + + +template +class ApplyPowerSignOp : public OpKernel { + public: + explicit ApplyPowerSignOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_)); + } + + void Compute(OpKernelContext* ctx) override { + auto locks = + MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_, {0, 1}); + + Tensor var; + OP_REQUIRES_OK(ctx, GetInputTensorFromVariable( + ctx, 0, use_exclusive_lock_, false, &var)); + Tensor m; + OP_REQUIRES_OK(ctx, GetInputTensorFromVariable( + ctx, 1, use_exclusive_lock_, false, &m)); + OP_REQUIRES( + ctx, var.IsInitialized(), + errors::FailedPrecondition( + "Attempting to use uninitialized variables: ", requested_input(0))); + OP_REQUIRES( + ctx, m.IsInitialized(), + errors::FailedPrecondition( + "Attempting to use uninitialized variables: ", requested_input(1))); + const Tensor& lr = ctx->input(2); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(lr.shape()), + errors::InvalidArgument("lr is not a scalar: ", + lr.shape().DebugString())); + const Tensor& logbase = ctx->input(3); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(logbase.shape()), + errors::InvalidArgument("logbase is not a scalar: ", + logbase.shape().DebugString())); + const Tensor& sign_decay = ctx->input(4); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(logbase.shape()), + errors::InvalidArgument("sign_decay is not a scalar: ", + sign_decay.shape().DebugString())); + const Tensor& beta = ctx->input(5); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(beta.shape()), + errors::InvalidArgument("beta is not a scalar: ", + beta.shape().DebugString())); + const Tensor& grad = ctx->input(6); + OP_REQUIRES(ctx, var.shape().IsSameSize(m.shape()), + errors::InvalidArgument("var and m do not have the same shape", + var.shape().DebugString(), " ", + m.shape().DebugString())); + OP_REQUIRES( + ctx, var.shape().IsSameSize(grad.shape()), + errors::InvalidArgument("var and grad do not have the same shape", + var.shape().DebugString(), " ", + grad.shape().DebugString())); + + const Device& device = ctx->template eigen_device(); + functor::ApplyPowerSign()( + device, var.flat(), m.flat(), lr.scalar(), logbase.scalar(), + sign_decay.scalar(), beta.scalar(), grad.flat()); + MaybeForwardRefInputToRefOutput(ctx, 0, 0); + } + + private: + bool use_exclusive_lock_; +}; + +#define REGISTER_KERNELS(D, T) \ + REGISTER_KERNEL_BUILDER( \ + Name("ApplyPowerSign").Device(DEVICE_##D).TypeConstraint("T"), \ + ApplyPowerSignOp); \ + REGISTER_KERNEL_BUILDER(Name("ResourceApplyPowerSign") \ + .Device(DEVICE_##D) \ + .HostMemory("var") \ + .HostMemory("m") \ + .TypeConstraint("T"), \ + ApplyPowerSignOp); +#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); + +TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_float(REGISTER_CPU_KERNELS); +TF_CALL_double(REGISTER_CPU_KERNELS); + +#if GOOGLE_CUDA +// Forward declarations of the functor specializations for GPU. +namespace functor { +#define DECLARE_GPU_SPEC(T) \ + template <> \ + void ApplyPowerSign::operator()( \ + const GPUDevice& d, \ + typename TTypes::Flat var, \ + typename TTypes::Flat m, \ + typename TTypes::ConstScalar lr, \ + typename TTypes::ConstScalar logbase, \ + typename TTypes::ConstScalar sign_decay, \ + typename TTypes::ConstScalar beta, \ + typename TTypes::ConstFlat grad); \ + extern template struct ApplyPowerSign; +DECLARE_GPU_SPEC(Eigen::half); +DECLARE_GPU_SPEC(float); +DECLARE_GPU_SPEC(double); +#undef DECLARE_GPU_SPEC +} // namespace functor + +REGISTER_KERNELS(GPU, Eigen::half); +REGISTER_KERNELS(GPU, float); +REGISTER_KERNELS(GPU, double); +#endif +#undef REGISTER_CPU_KERNELS +#undef REGISTER_KERNELS + } // namespace tensorflow diff --git a/tensorflow/core/kernels/training_ops.h b/tensorflow/core/kernels/training_ops.h index 99a714e0a2..7ee956053a 100644 --- a/tensorflow/core/kernels/training_ops.h +++ b/tensorflow/core/kernels/training_ops.h @@ -161,6 +161,29 @@ struct ApplyCenteredRMSProp { typename TTypes::ConstScalar epsilon, typename TTypes::ConstFlat grad); }; + +template +struct ApplyAddSign { + void operator()(const Device& d, typename TTypes::Flat var, + typename TTypes::Flat m, + typename TTypes::ConstScalar lr, + typename TTypes::ConstScalar alpha, + typename TTypes::ConstScalar sign_decay, + typename TTypes::ConstScalar beta, + typename TTypes::ConstFlat grad); +}; + +template +struct ApplyPowerSign { + void operator()(const Device& d, typename TTypes::Flat var, + typename TTypes::Flat m, + typename TTypes::ConstScalar lr, + typename TTypes::ConstScalar logbase, + typename TTypes::ConstScalar sign_decay, + typename TTypes::ConstScalar beta, + typename TTypes::ConstFlat grad); +}; + } // end namespace functor } // end namespace tensorflow diff --git a/tensorflow/core/kernels/training_ops_gpu.cu.cc b/tensorflow/core/kernels/training_ops_gpu.cu.cc index 3678b96e98..f501161095 100644 --- a/tensorflow/core/kernels/training_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/training_ops_gpu.cu.cc @@ -193,6 +193,71 @@ struct ApplyCenteredRMSProp { } }; +template +struct ApplyAddSign { + void operator()(const GPUDevice& d, typename TTypes::Flat var, + typename TTypes::Flat m, + typename TTypes::ConstScalar lr, + typename TTypes::ConstScalar alpha, + typename TTypes::ConstScalar sign_decay, + typename TTypes::ConstScalar beta, + typename TTypes::ConstFlat grad) { + Eigen::array::Tensor::Index, 1> bcast; + bcast[0] = grad.dimension(0); + Eigen::Sizes<1> single; + + // The following is the GPU equivalent of the CPU version: + // m.device(d) = m * beta() + grad * (static_cast(1) - beta()); + const auto one = static_cast(1.0); + auto beta_bcast = beta.reshape(single).broadcast(bcast); + auto one_minus_beta = + (beta.constant(one) - beta).reshape(single).broadcast(bcast); + m.device(d) = m * beta_bcast + grad * one_minus_beta; + + // The following is the GPU equivalent of the CPU version: + // var.device(d) -= lr() * (alpha() + sign_decay() * sign_gm) * grad; + auto sign_gm = grad.sign() * m.sign(); + auto lr_bcast = lr.reshape(single).broadcast(bcast); + auto alpha_bcast = alpha.reshape(single).broadcast(bcast); + auto sign_decay_bcast = sign_decay.reshape(single).broadcast(bcast); + var.device(d) -= + lr_bcast * (alpha_bcast + sign_decay_bcast * sign_gm) * grad; + } +}; + +template +struct ApplyPowerSign { + void operator()(const GPUDevice& d, typename TTypes::Flat var, + typename TTypes::Flat m, + typename TTypes::ConstScalar lr, + typename TTypes::ConstScalar logbase, + typename TTypes::ConstScalar sign_decay, + typename TTypes::ConstScalar beta, + typename TTypes::ConstFlat grad) { + Eigen::array::Tensor::Index, 1> bcast; + bcast[0] = grad.dimension(0); + Eigen::Sizes<1> single; + + // The following is the GPU equivalent of the CPU version: + // m.device(d) = m * beta() + grad * (static_cast(1) - beta()); + const auto one = static_cast(1.0); + auto beta_bcast = beta.reshape(single).broadcast(bcast); + auto one_minus_beta = + (beta.constant(one) - beta).reshape(single).broadcast(bcast); + m.device(d) = m * beta_bcast + grad * one_minus_beta; + + // The following is the GPU equivalent of the CPU version: + // auto grad_scale = (logbase() * sign_decay() * sign_gm).exp(); + // var.device(d) -= lr() * grad_scale * grad; + auto sign_gm = grad.sign() * m.sign(); + auto lr_bcast = lr.reshape(single).broadcast(bcast); + auto logbase_bcast = logbase.reshape(single).broadcast(bcast); + auto sign_decay_bcast = sign_decay.reshape(single).broadcast(bcast); + auto grad_scale = (logbase_bcast * sign_decay_bcast * sign_gm).exp(); + var.device(d) -= lr_bcast * grad_scale * grad; + } +}; + } // namespace functor template struct functor::ApplyGradientDescent; @@ -222,6 +287,15 @@ template struct functor::ApplyRMSProp; template struct functor::ApplyCenteredRMSProp; template struct functor::ApplyCenteredRMSProp; template struct functor::ApplyCenteredRMSProp; + +template struct functor::ApplyAddSign; +template struct functor::ApplyAddSign; +template struct functor::ApplyAddSign; + +template struct functor::ApplyPowerSign; +template struct functor::ApplyPowerSign; +template struct functor::ApplyPowerSign; + } // end namespace tensorflow #endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/training_ops_test.cc b/tensorflow/core/kernels/training_ops_test.cc index 4b1c9eb8bb..ffa7f87c9e 100644 --- a/tensorflow/core/kernels/training_ops_test.cc +++ b/tensorflow/core/kernels/training_ops_test.cc @@ -233,4 +233,78 @@ static void BM_RMSProp(int iters, int params) { } BENCHMARK(BM_RMSProp)->Arg(128 << 10)->Arg(256 << 10); +static void AddSign(int32 n, Graph** init_g, Graph** train_g) { + TensorShape shape({n}); + { + Graph* g = new Graph(OpRegistry::Global()); + auto var = Var(g, n); + auto m = Var(g, n); + auto zero = Zeros(g, n); + test::graph::Assign(g, var, zero); + test::graph::Assign(g, m, zero); + *init_g = g; + } + { + Graph* g = new Graph(OpRegistry::Global()); + auto var = Var(g, n); + auto m = Var(g, n); + auto lr = Scalar(g, 0.01); + auto alpha = Scalar(g, 0.1); + auto sign_decay = Scalar(g, 0.9); + auto beta = Scalar(g, 0.8); + auto grad = Random(g, n); + test::graph::Multi(g, "ApplyAddSign", + {var, m, lr, alpha, sign_decay, beta, grad}); + *train_g = g; + } +} + +static void BM_AddSign(int iters, int params) { + const int64 tot = static_cast(iters) * params; + testing::ItemsProcessed(tot); + testing::BytesProcessed(tot * sizeof(float)); + Graph* init; + Graph* train; + AddSign(params, &init, &train); + test::Benchmark("cpu", train, GetOptions(), init).Run(iters); +} +BENCHMARK(BM_AddSign)->Arg(128 << 10)->Arg(256 << 10); + +static void PowerSign(int32 n, Graph** init_g, Graph** train_g) { + TensorShape shape({n}); + { + Graph* g = new Graph(OpRegistry::Global()); + auto var = Var(g, n); + auto m = Var(g, n); + auto zero = Zeros(g, n); + test::graph::Assign(g, var, zero); + test::graph::Assign(g, m, zero); + *init_g = g; + } + { + Graph* g = new Graph(OpRegistry::Global()); + auto var = Var(g, n); + auto m = Var(g, n); + auto lr = Scalar(g, 0.01); + auto logbase = Scalar(g, 2); + auto sign_decay = Scalar(g, 0.9); + auto beta = Scalar(g, 0.8); + auto grad = Random(g, n); + test::graph::Multi(g, "ApplyPowerSign", + {var, m, lr, logbase, sign_decay, beta, grad}); + *train_g = g; + } +} + +static void BM_PowerSign(int iters, int params) { + const int64 tot = static_cast(iters) * params; + testing::ItemsProcessed(tot); + testing::BytesProcessed(tot * sizeof(float)); + Graph* init; + Graph* train; + PowerSign(params, &init, &train); + test::Benchmark("cpu", train, GetOptions(), init).Run(iters); +} +BENCHMARK(BM_PowerSign)->Arg(128 << 10)->Arg(256 << 10); + } // end namespace tensorflow diff --git a/tensorflow/core/ops/training_ops.cc b/tensorflow/core/ops/training_ops.cc index 6f06b87d58..405318caf2 100644 --- a/tensorflow/core/ops/training_ops.cc +++ b/tensorflow/core/ops/training_ops.cc @@ -22,6 +22,48 @@ using shape_inference::DimensionHandle; using shape_inference::InferenceContext; using shape_inference::ShapeHandle; +const char kAddSignCommonDocStr[] = R"doc( +Update '*var' according to the AddSign update. + +m_t <- beta1 * m_{t-1} + (1 - beta1) * g +update <- (alpha + sign_decay * sign(g) *sign(m)) * g +variable <- variable - lr_t * update + +var: Should be from a Variable(). +m: Should be from a Variable(). +lr: Scaling factor. Must be a scalar. +sign_decay: Must be a scalar. +alpha: Must be a scalar. +beta: Must be a scalar. +grad: The gradient. +)doc"; + +const char kPowerSignCommonDocStr[] = R"doc( +Update '*var' according to the AddSign update. + +m_t <- beta1 * m_{t-1} + (1 - beta1) * g +update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g +variable <- variable - lr_t * update + +var: Should be from a Variable(). +m: Should be from a Variable(). +lr: Scaling factor. Must be a scalar. +logbase: Must be a scalar. +sign_decay: Must be a scalar. +beta: Must be a scalar. +grad: The gradient. +)doc"; + +const char kOutDocStr[] = R"doc( +out: Same as "var". +)doc"; + +const char kLockDocStr[] = R"doc( +use_locking: If `True`, updating of the var and m tensors is + protected by a lock; otherwise the behavior is undefined, but may exhibit less + contention. +)doc"; + static ShapeHandle ShapeOrHandleShape(InferenceContext* c, int input) { auto* handle_data = c->input_handle_shapes_and_types(input); if (handle_data != nullptr && !handle_data->empty() && @@ -1796,4 +1838,99 @@ use_locking: If `True`, updating of the var, mg, ms, and mom tensors is contention. )doc"); +static Status ApplyAddSignShapeFn(InferenceContext* c, bool sparse) { + ShapeHandle unused; + ShapeHandle s = ShapeOrHandleShape(c, 0); // var + TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // m + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); // lr + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); // alpha + TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); // sign_decay + TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); // beta + TF_RETURN_IF_ERROR( + HandleGradAndIndicesInputs(c, sparse, 6 /* grad_idx */, &s)); + if (c->num_outputs() > 0) { + c->set_output(0, s); + } + return Status::OK(); +} + +REGISTER_OP("ApplyAddSign") + .Input("var: Ref(T)") + .Input("m: Ref(T)") + .Input("lr: T") + .Input("alpha: T") + .Input("sign_decay: T") + .Input("beta: T") + .Input("grad: T") + .Output("out: Ref(T)") + .Attr("T: numbertype") + .Attr("use_locking: bool = false") + .SetShapeFn([](InferenceContext* c) { + return ApplyAddSignShapeFn(c, /*sparse=*/false); + }) + .Doc(strings::StrCat(kAddSignCommonDocStr, kOutDocStr, kLockDocStr)); + +REGISTER_OP("ResourceApplyAddSign") + .Input("var: resource") + .Input("m: resource") + .Input("lr: T") + .Input("alpha: T") + .Input("sign_decay: T") + .Input("beta: T") + .Input("grad: T") + .Attr("T: numbertype") + .Attr("use_locking: bool = false") + .SetShapeFn([](InferenceContext* c) { + return ApplyAddSignShapeFn(c, /*sparse=*/false); + }) + .Doc(strings::StrCat(kAddSignCommonDocStr, kLockDocStr)); + +static Status ApplyPowerSignShapeFn(InferenceContext* c, bool sparse) { + ShapeHandle unused; + ShapeHandle s = ShapeOrHandleShape(c, 0); // var + TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // m + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); // lr + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); // logbase + TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); // sign_delay + TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); // beta + TF_RETURN_IF_ERROR( + HandleGradAndIndicesInputs(c, sparse, 6 /* grad_idx */, &s)); + if (c->num_outputs() > 0) { + c->set_output(0, s); + } + return Status::OK(); +} + +REGISTER_OP("ApplyPowerSign") + .Input("var: Ref(T)") + .Input("m: Ref(T)") + .Input("lr: T") + .Input("logbase: T") + .Input("sign_decay: T") + .Input("beta: T") + .Input("grad: T") + .Output("out: Ref(T)") + .Attr("T: numbertype") + .Attr("use_locking: bool = false") + .SetShapeFn([](InferenceContext* c) { + return ApplyPowerSignShapeFn(c, /*sparse=*/false); + }) + .Doc(strings::StrCat(kPowerSignCommonDocStr, kOutDocStr, kLockDocStr)); + +REGISTER_OP("ResourceApplyPowerSign") + .Input("var: resource") + .Input("m: resource") + .Input("lr: T") + .Input("logbase: T") + .Input("sign_decay: T") + .Input("beta: T") + .Input("grad: T") + .Attr("T: numbertype") + .Attr("use_locking: bool = false") + .SetShapeFn([](InferenceContext* c) { + return ApplyPowerSignShapeFn(c, /*sparse=*/false); + }) + .Doc(strings::StrCat(kPowerSignCommonDocStr, kLockDocStr)); + + } // namespace tensorflow diff --git a/tensorflow/core/ops/training_ops_test.cc b/tensorflow/core/ops/training_ops_test.cc index 92d5ad9964..de4e3cd9e7 100644 --- a/tensorflow/core/ops/training_ops_test.cc +++ b/tensorflow/core/ops/training_ops_test.cc @@ -332,4 +332,38 @@ TEST(TrainingOpsTest, SparseApplyRMSProp_ShapeFn) { INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;?;?;?;[?];?;?"); } +TEST(TrainingOpsTest, ApplyAddSign_ShapeFn) { + ShapeInferenceTestOp op("ApplyAddSign"); + + // Output is a merge of inputs 0, 1, and 6 (var, ms, and grad). + INFER_OK(op, "[1,?,?];[?,2,?];[];[];[];[];[?,?,2]", "[d0_0,d1_1,d6_2]"); + INFER_ERROR("Dimension 0 in both shapes must be equal, but are 1 and 2", op, + "[1];[2];[];[];[];[];[1]"); + INFER_ERROR("Dimension 0 in both shapes must be equal, but are 1 and 2", op, + "[1];[1];[];[];[];[];[2]"); + + // lr, alpha, sign_decay, and beta must be scalars. + INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;[?];?;?;?;?"); + INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;[?];?;?;?"); + INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;?;[?];?;?"); + INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;?;?;[?];?"); +} + +TEST(TrainingOpsTest, ApplyPowerSign_ShapeFn) { + ShapeInferenceTestOp op("ApplyPowerSign"); + + // Output is a merge of inputs 0, 1, and 6 (var, ms, and grad). + INFER_OK(op, "[1,?,?];[?,2,?];[];[];[];[];[?,?,2]", "[d0_0,d1_1,d6_2]"); + INFER_ERROR("Dimension 0 in both shapes must be equal, but are 1 and 2", op, + "[1];[2];[];[];[];[];[1]"); + INFER_ERROR("Dimension 0 in both shapes must be equal, but are 1 and 2", op, + "[1];[1];[];[];[];[];[2]"); + + // lr, logbase, sign_decay, and beta must be scalars. + INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;[?];?;?;?;?"); + INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;[?];?;?;?"); + INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;?;[?];?;?"); + INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;?;?;[?];?"); +} + } // end namespace tensorflow -- GitLab From aab5a41eb139812a50a728a9e888bb0290c4c95e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 12:20:44 -0800 Subject: [PATCH 0582/1801] Update ops-related pbtxt files. PiperOrigin-RevId: 176143870 --- .../core/ops/compat/ops_history.v1.pbtxt | 264 +++++++++++++++ tensorflow/core/ops/ops.pbtxt | 306 ++++++++++++++++++ 2 files changed, 570 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index daeb6763c8..c7a296d938 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -1536,6 +1536,75 @@ op { } } } +op { + name: "ApplyAddSign" + input_arg { + name: "var" + type_attr: "T" + is_ref: true + } + input_arg { + name: "m" + type_attr: "T" + is_ref: true + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "alpha" + type_attr: "T" + } + input_arg { + name: "sign_decay" + type_attr: "T" + } + input_arg { + name: "beta" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + output_arg { + name: "out" + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } +} op { name: "ApplyCenteredRMSProp" input_arg { @@ -2228,6 +2297,75 @@ op { } } } +op { + name: "ApplyPowerSign" + input_arg { + name: "var" + type_attr: "T" + is_ref: true + } + input_arg { + name: "m" + type_attr: "T" + is_ref: true + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "logbase" + type_attr: "T" + } + input_arg { + name: "sign_decay" + type_attr: "T" + } + input_arg { + name: "beta" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + output_arg { + name: "out" + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } +} op { name: "ApplyProximalAdagrad" input_arg { @@ -26847,6 +26985,69 @@ op { } is_stateful: true } +op { + name: "ResourceApplyAddSign" + input_arg { + name: "var" + type: DT_RESOURCE + } + input_arg { + name: "m" + type: DT_RESOURCE + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "alpha" + type_attr: "T" + } + input_arg { + name: "sign_decay" + type_attr: "T" + } + input_arg { + name: "beta" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } + is_stateful: true +} op { name: "ResourceApplyCenteredRMSProp" input_arg { @@ -27473,6 +27674,69 @@ op { } is_stateful: true } +op { + name: "ResourceApplyPowerSign" + input_arg { + name: "var" + type: DT_RESOURCE + } + input_arg { + name: "m" + type: DT_RESOURCE + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "logbase" + type_attr: "T" + } + input_arg { + name: "sign_decay" + type_attr: "T" + } + input_arg { + name: "beta" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } + is_stateful: true +} op { name: "ResourceApplyProximalAdagrad" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 55a8fc9032..d043696a94 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1103,6 +1103,86 @@ op { summary: "Update \'*var\' according to the Adam algorithm." description: "lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)\nm_t <- beta1 * m_{t-1} + (1 - beta1) * g_t\nv_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t\nvariable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)" } +op { + name: "ApplyAddSign" + input_arg { + name: "var" + description: "Should be from a Variable()." + type_attr: "T" + is_ref: true + } + input_arg { + name: "m" + description: "Should be from a Variable()." + type_attr: "T" + is_ref: true + } + input_arg { + name: "lr" + description: "Scaling factor. Must be a scalar." + type_attr: "T" + } + input_arg { + name: "alpha" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "sign_decay" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "beta" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "grad" + description: "The gradient." + type_attr: "T" + } + output_arg { + name: "out" + description: "Same as \"var\"." + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + description: "If `True`, updating of the var and m tensors is\nprotected by a lock; otherwise the behavior is undefined, but may exhibit less\ncontention." + } + summary: "Update \'*var\' according to the AddSign update." + description: "m_t <- beta1 * m_{t-1} + (1 - beta1) * g\nupdate <- (alpha + sign_decay * sign(g) *sign(m)) * g\nvariable <- variable - lr_t * update" +} op { name: "ApplyCenteredRMSProp" input_arg { @@ -1506,6 +1586,86 @@ op { summary: "Update \'*var\' according to the momentum scheme. Set use_nesterov = True if you" description: "want to use Nesterov momentum.\n\naccum = accum * momentum + grad\nvar -= lr * accum" } +op { + name: "ApplyPowerSign" + input_arg { + name: "var" + description: "Should be from a Variable()." + type_attr: "T" + is_ref: true + } + input_arg { + name: "m" + description: "Should be from a Variable()." + type_attr: "T" + is_ref: true + } + input_arg { + name: "lr" + description: "Scaling factor. Must be a scalar." + type_attr: "T" + } + input_arg { + name: "logbase" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "sign_decay" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "beta" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "grad" + description: "The gradient." + type_attr: "T" + } + output_arg { + name: "out" + description: "Same as \"var\"." + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + description: "If `True`, updating of the var and m tensors is\nprotected by a lock; otherwise the behavior is undefined, but may exhibit less\ncontention." + } + summary: "Update \'*var\' according to the AddSign update." + description: "m_t <- beta1 * m_{t-1} + (1 - beta1) * g\nupdate <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g\nvariable <- variable - lr_t * update" +} op { name: "ApplyProximalAdagrad" input_arg { @@ -21774,6 +21934,79 @@ op { description: "lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)\nm_t <- beta1 * m_{t-1} + (1 - beta1) * g_t\nv_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t\nvariable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)" is_stateful: true } +op { + name: "ResourceApplyAddSign" + input_arg { + name: "var" + description: "Should be from a Variable()." + type: DT_RESOURCE + } + input_arg { + name: "m" + description: "Should be from a Variable()." + type: DT_RESOURCE + } + input_arg { + name: "lr" + description: "Scaling factor. Must be a scalar." + type_attr: "T" + } + input_arg { + name: "alpha" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "sign_decay" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "beta" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "grad" + description: "The gradient." + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + description: "If `True`, updating of the var and m tensors is\nprotected by a lock; otherwise the behavior is undefined, but may exhibit less\ncontention." + } + summary: "Update \'*var\' according to the AddSign update." + description: "m_t <- beta1 * m_{t-1} + (1 - beta1) * g\nupdate <- (alpha + sign_decay * sign(g) *sign(m)) * g\nvariable <- variable - lr_t * update" + is_stateful: true +} op { name: "ResourceApplyCenteredRMSProp" input_arg { @@ -22139,6 +22372,79 @@ op { description: "want to use Nesterov momentum.\n\naccum = accum * momentum + grad\nvar -= lr * accum" is_stateful: true } +op { + name: "ResourceApplyPowerSign" + input_arg { + name: "var" + description: "Should be from a Variable()." + type: DT_RESOURCE + } + input_arg { + name: "m" + description: "Should be from a Variable()." + type: DT_RESOURCE + } + input_arg { + name: "lr" + description: "Scaling factor. Must be a scalar." + type_attr: "T" + } + input_arg { + name: "logbase" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "sign_decay" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "beta" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "grad" + description: "The gradient." + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + description: "If `True`, updating of the var and m tensors is\nprotected by a lock; otherwise the behavior is undefined, but may exhibit less\ncontention." + } + summary: "Update \'*var\' according to the AddSign update." + description: "m_t <- beta1 * m_{t-1} + (1 - beta1) * g\nupdate <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g\nvariable <- variable - lr_t * update" + is_stateful: true +} op { name: "ResourceApplyProximalAdagrad" input_arg { -- GitLab From d32150d14f1651e20bafa07e6f1b51a32fd75999 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 12:30:54 -0800 Subject: [PATCH 0583/1801] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 176145113 --- tensorflow/go/op/wrappers.go | 2276 ++++++++++++++++++---------------- 1 file changed, 1187 insertions(+), 1089 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 1d1383ec82..664e37d3a1 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -2681,21 +2681,6 @@ func Abort(scope *Scope, optional ...AbortAttr) (o *tf.Operation) { return scope.AddOperation(opspec) } -// Does nothing. Serves as a control trigger for scheduling. -// -// Only useful as a placeholder for control edges. -// -// Returns the created operation. -func ControlTrigger(scope *Scope) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ControlTrigger", - } - return scope.AddOperation(opspec) -} - // SpaceToDepthAttr is an optional argument to SpaceToDepth. type SpaceToDepthAttr func(optionalAttr) @@ -8123,88 +8108,82 @@ func MatrixExponential(scope *Scope, input tf.Output) (output tf.Output) { return op.Output(0) } -// QuantizeAndDequantizeV3Attr is an optional argument to QuantizeAndDequantizeV3. -type QuantizeAndDequantizeV3Attr func(optionalAttr) - -// QuantizeAndDequantizeV3SignedInput sets the optional signed_input attribute to value. -// If not specified, defaults to true -func QuantizeAndDequantizeV3SignedInput(value bool) QuantizeAndDequantizeV3Attr { - return func(m optionalAttr) { - m["signed_input"] = value - } -} - -// QuantizeAndDequantizeV3RangeGiven sets the optional range_given attribute to value. -// If not specified, defaults to true -func QuantizeAndDequantizeV3RangeGiven(value bool) QuantizeAndDequantizeV3Attr { - return func(m optionalAttr) { - m["range_given"] = value - } -} - -// Quantizes then dequantizes a tensor. +// Merges summaries. // -// This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a -// tensor, so its value can change during training. -func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, num_bits tf.Output, optional ...QuantizeAndDequantizeV3Attr) (output tf.Output) { +// This op creates a +// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) +// protocol buffer that contains the union of all the values in the input +// summaries. +// +// When the Op is run, it reports an `InvalidArgument` error if multiple values +// in the summaries to merge use the same tag. +// +// Arguments: +// inputs: Can be of any shape. Each must contain serialized `Summary` protocol +// buffers. +// +// Returns Scalar. Serialized `Summary` protocol buffer. +func MergeSummary(scope *Scope, inputs []tf.Output) (summary tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "QuantizeAndDequantizeV3", + Type: "MergeSummary", Input: []tf.Input{ - input, input_min, input_max, num_bits, + tf.OutputList(inputs), }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// AvgPool3DAttr is an optional argument to AvgPool3D. -type AvgPool3DAttr func(optionalAttr) +// AudioSummaryV2Attr is an optional argument to AudioSummaryV2. +type AudioSummaryV2Attr func(optionalAttr) -// AvgPool3DDataFormat sets the optional data_format attribute to value. +// AudioSummaryV2MaxOutputs sets the optional max_outputs attribute to value. // -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func AvgPool3DDataFormat(value string) AvgPool3DAttr { +// value: Max number of batch elements to generate audio for. +// If not specified, defaults to 3 +// +// REQUIRES: value >= 1 +func AudioSummaryV2MaxOutputs(value int64) AudioSummaryV2Attr { return func(m optionalAttr) { - m["data_format"] = value + m["max_outputs"] = value } } -// Performs 3D average pooling on the input. +// Outputs a `Summary` protocol buffer with audio. +// +// The summary has up to `max_outputs` summary values containing audio. The +// audio is built from `tensor` which must be 3-D with shape `[batch_size, +// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are +// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. +// +// The `tag` argument is a scalar `Tensor` of type `string`. It is used to +// build the `tag` of the summary values: +// +// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. +// * If `max_outputs` is greater than 1, the summary value tags are +// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. // // Arguments: -// input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. +// tag: Scalar. Used to build the `tag` attribute of the summary values. +// tensor: 2-D of shape `[batch_size, frames]`. +// sample_rate: The sample rate of the signal in hertz. // -// Returns The average pooled output tensor. -func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DAttr) (output tf.Output) { +// Returns Scalar. Serialized `Summary` protocol buffer. +func AudioSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...AudioSummaryV2Attr) (summary tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "AvgPool3D", + Type: "AudioSummaryV2", Input: []tf.Input{ - input, + tag, tensor, sample_rate, }, Attrs: attrs, } @@ -8212,35 +8191,6 @@ func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, pa return op.Output(0) } -// Produces the max pool of the input tensor for quantized types. -// -// Arguments: -// input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over. -// min_input: The float value that the lowest quantized input value represents. -// max_input: The float value that the highest quantized input value represents. -// ksize: The size of the window for each dimension of the input tensor. -// The length must be 4 to match the number of dimensions of the input. -// strides: The stride of the sliding window for each dimension of the input -// tensor. The length must be 4 to match the number of dimensions of the input. -// padding: The type of padding algorithm to use. -// -// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. -func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - opspec := tf.OpSpec{ - Type: "QuantizedMaxPool", - Input: []tf.Input{ - input, min_input, max_input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - // Conv3DBackpropInputV2Attr is an optional argument to Conv3DBackpropInputV2. type Conv3DBackpropInputV2Attr func(optionalAttr) @@ -8725,32 +8675,6 @@ func MaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad return op.Output(0) } -// Adds `bias` to `value`. -// -// This is a deprecated version of BiasAdd and will be soon removed. -// -// This is a special case of `tf.add` where `bias` is restricted to be 1-D. -// Broadcasting is supported, so `value` may have any number of dimensions. -// -// Arguments: -// value: Any number of dimensions. -// bias: 1-D with size the last dimension of `value`. -// -// Returns Broadcasted sum of `value` and `bias`. -func BiasAddV1(scope *Scope, value tf.Output, bias tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BiasAddV1", - Input: []tf.Input{ - value, bias, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // EncodeJpegAttr is an optional argument to EncodeJpeg. type EncodeJpegAttr func(optionalAttr) @@ -13128,41 +13052,264 @@ func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Outp return op.Output(0) } -// Batch normalization. +// WriteImageSummaryAttr is an optional argument to WriteImageSummary. +type WriteImageSummaryAttr func(optionalAttr) + +// WriteImageSummaryMaxImages sets the optional max_images attribute to value. // -// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization() +// value: Max number of batch elements to generate images for. +// If not specified, defaults to 3 // -// This op is deprecated. Prefer `tf.nn.batch_normalization`. +// REQUIRES: value >= 1 +func WriteImageSummaryMaxImages(value int64) WriteImageSummaryAttr { + return func(m optionalAttr) { + m["max_images"] = value + } +} + +// Writes a `Summary` protocol buffer with images. +// +// The summary has up to `max_images` summary values containing images. The +// images are built from `tensor` which must be 4-D with shape `[batch_size, +// height, width, channels]` and where `channels` can be: +// +// * 1: `tensor` is interpreted as Grayscale. +// * 3: `tensor` is interpreted as RGB. +// * 4: `tensor` is interpreted as RGBA. +// +// The images have the same number of channels as the input tensor. For float +// input, the values are normalized one image at a time to fit in the range +// `[0, 255]`. `uint8` values are unchanged. The op uses two different +// normalization algorithms: +// +// * If the input values are all positive, they are rescaled so the largest one +// is 255. +// +// * If any input value is negative, the values are shifted so input value 0.0 +// is at 127. They are then rescaled so that either the smallest value is 0, +// or the largest one is 255. +// +// The `tag` argument is a scalar `Tensor` of type `string`. It is used to +// build the `tag` of the summary values: +// +// * If `max_images` is 1, the summary value tag is '*tag*/image'. +// * If `max_images` is greater than 1, the summary value tags are +// generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. +// +// The `bad_color` argument is the color to use in the generated images for +// non-finite input values. It is a `unit8` 1-D tensor of length `channels`. +// Each element must be in the range `[0, 255]` (It represents the value of a +// pixel in the output image). Non-finite values in the input tensor are +// replaced by this tensor in the output image. The default value is the color +// red. // // Arguments: -// t: A 4D input Tensor. -// m: A 1D mean Tensor with size matching the last dimension of t. -// This is the first output from tf.nn.moments, -// or a saved moving average thereof. -// v: A 1D variance Tensor with size matching the last dimension of t. -// This is the second output from tf.nn.moments, -// or a saved moving average thereof. -// beta: A 1D beta Tensor with size matching the last dimension of t. -// An offset to be added to the normalized tensor. -// gamma: A 1D gamma Tensor with size matching the last dimension of t. -// If "scale_after_normalization" is true, this tensor will be multiplied -// with the normalized tensor. -// variance_epsilon: A small float number to avoid dividing by 0. -// scale_after_normalization: A bool indicating whether the resulted tensor -// needs to be multiplied with gamma. -func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) { +// writer: A handle to a summary writer. +// step: The step to write the summary for. +// tag: Scalar. Used to build the `tag` attribute of the summary values. +// tensor: 4-D of shape `[batch_size, height, width, channels]` where +// `channels` is 1, 3, or 4. +// bad_color: Color to use for pixels with non-finite values. +// +// Returns the created operation. +func WriteImageSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, tensor tf.Output, bad_color tf.Output, optional ...WriteImageSummaryAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "BatchNormWithGlobalNormalization", + Type: "WriteImageSummary", Input: []tf.Input{ - t, m, v, beta, gamma, + writer, step, tag, tensor, bad_color, }, Attrs: attrs, } - op := scope.AddOperation(opspec) + return scope.AddOperation(opspec) +} + +// Pads a tensor with zeros. +// +// This operation pads a `input` with zeros according to the `paddings` you +// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the +// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates +// how many zeros to add before the contents of `input` in that dimension, and +// `paddings[D, 1]` indicates how many zeros to add after the contents of `input` +// in that dimension. +// +// The padded size of each dimension D of the output is: +// +// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` +// +// For example: +// +// ``` +// # 't' is [[1, 1], [2, 2]] +// # 'paddings' is [[1, 1], [2, 2]] +// # rank of 't' is 2 +// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0] +// [0, 0, 1, 1, 0, 0] +// [0, 0, 2, 2, 0, 0] +// [0, 0, 0, 0, 0, 0]] +// ``` +func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Pad", + Input: []tf.Input{ + input, paddings, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the number of elements in the given queue. +// +// Arguments: +// handle: The handle to a queue. +// +// Returns The number of elements in the given queue. +func QueueSizeV2(scope *Scope, handle tf.Output) (size tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "QueueSizeV2", + Input: []tf.Input{ + handle, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Outputs a `Summary` protocol buffer with a histogram. +// +// The generated +// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) +// has one summary value containing a histogram for `values`. +// +// This op reports an `InvalidArgument` error if any value is not finite. +// +// Arguments: +// tag: Scalar. Tag to use for the `Summary.Value`. +// values: Any shape. Values to use to build the histogram. +// +// Returns Scalar. Serialized `Summary` protocol buffer. +func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "HistogramSummary", + Input: []tf.Input{ + tag, values, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset that emits the lines of one or more text files. +// +// Arguments: +// filenames: A scalar or a vector containing the name(s) of the file(s) to be +// read. +// compression_type: A scalar containing either (i) the empty string (no +// compression), (ii) "ZLIB", or (iii) "GZIP". +// buffer_size: A scalar containing the number of bytes to buffer. +func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TextLineDataset", + Input: []tf.Input{ + filenames, compression_type, buffer_size, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns the number of records this Reader has produced. +// +// This is the same as the number of ReaderRead executions that have +// succeeded. +// +// Arguments: +// reader_handle: Handle to a Reader. +func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ReaderNumRecordsProducedV2", + Input: []tf.Input{ + reader_handle, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes exponential of x - 1 element-wise. +// +// I.e., \\(y = (\exp x) - 1\\). +func Expm1(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Expm1", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Batch normalization. +// +// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization() +// +// This op is deprecated. Prefer `tf.nn.batch_normalization`. +// +// Arguments: +// t: A 4D input Tensor. +// m: A 1D mean Tensor with size matching the last dimension of t. +// This is the first output from tf.nn.moments, +// or a saved moving average thereof. +// v: A 1D variance Tensor with size matching the last dimension of t. +// This is the second output from tf.nn.moments, +// or a saved moving average thereof. +// beta: A 1D beta Tensor with size matching the last dimension of t. +// An offset to be added to the normalized tensor. +// gamma: A 1D gamma Tensor with size matching the last dimension of t. +// If "scale_after_normalization" is true, this tensor will be multiplied +// with the normalized tensor. +// variance_epsilon: A small float number to avoid dividing by 0. +// scale_after_normalization: A bool indicating whether the resulted tensor +// needs to be multiplied with gamma. +func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization} + opspec := tf.OpSpec{ + Type: "BatchNormWithGlobalNormalization", + Input: []tf.Input{ + t, m, v, beta, gamma, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) return op.Output(0) } @@ -13284,35 +13431,6 @@ func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataTyp return key, values } -// Merges summaries. -// -// This op creates a -// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) -// protocol buffer that contains the union of all the values in the input -// summaries. -// -// When the Op is run, it reports an `InvalidArgument` error if multiple values -// in the summaries to merge use the same tag. -// -// Arguments: -// inputs: Can be of any shape. Each must contain serialized `Summary` protocol -// buffers. -// -// Returns Scalar. Serialized `Summary` protocol buffer. -func MergeSummary(scope *Scope, inputs []tf.Output) (summary tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MergeSummary", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Read an element from the TensorArray into output `value`. // // Arguments: @@ -14006,56 +14124,120 @@ func FractionalMaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Ou return op.Output(0) } -// Reorders a SparseTensor into the canonical, row-major ordering. -// -// Note that by convention, all sparse ops preserve the canonical ordering along -// increasing dimension number. The only time ordering can be violated is during -// manual manipulation of the indices and values vectors to add entries. -// -// Reordering does not affect the shape of the SparseTensor. -// -// If the tensor has rank `R` and `N` non-empty values, `input_indices` has -// shape `[N, R]`, input_values has length `N`, and input_shape has length `R`. +// Does nothing. Serves as a control trigger for scheduling. // -// Arguments: -// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. -// input_shape: 1-D. Shape of the input SparseTensor. +// Only useful as a placeholder for control edges. // -// Returns 2-D. `N x R` matrix with the same indices as input_indices, but -// in canonical row-major ordering.1-D. `N` non-empty values corresponding to `output_indices`. -func SparseReorder(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output) (output_indices tf.Output, output_values tf.Output) { +// Returns the created operation. +func ControlTrigger(scope *Scope) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseReorder", - Input: []tf.Input{ - input_indices, input_values, input_shape, - }, + Type: "ControlTrigger", } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return scope.AddOperation(opspec) } -// PackAttr is an optional argument to Pack. -type PackAttr func(optionalAttr) +// ResourceApplyAddSignAttr is an optional argument to ResourceApplyAddSign. +type ResourceApplyAddSignAttr func(optionalAttr) -// PackAxis sets the optional axis attribute to value. +// ResourceApplyAddSignUseLocking sets the optional use_locking attribute to value. // -// value: Dimension along which to pack. Negative values wrap around, so the -// valid range is `[-(R+1), R+1)`. -// If not specified, defaults to 0 -func PackAxis(value int64) PackAttr { +// value: If `True`, updating of the var and m tensors is +// protected by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyAddSignUseLocking(value bool) ResourceApplyAddSignAttr { return func(m optionalAttr) { - m["axis"] = value + m["use_locking"] = value } } -// Packs a list of `N` rank-`R` tensors into one rank-`(R+1)` tensor. +// Update '*var' according to the AddSign update. // -// Packs the `N` tensors in `values` into a tensor with rank one higher than each +// m_t <- beta1 * m_{t-1} + (1 - beta1) * g +// update <- (alpha + sign_decay * sign(g) *sign(m)) * g +// variable <- variable - lr_t * update +// +// Arguments: +// var_: Should be from a Variable(). +// m: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// alpha: Must be a scalar. +// sign_decay: Must be a scalar. +// beta: Must be a scalar. +// grad: The gradient. +// +// Returns the created operation. +func ResourceApplyAddSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, alpha tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyAddSignAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceApplyAddSign", + Input: []tf.Input{ + var_, m, lr, alpha, sign_decay, beta, grad, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// Reorders a SparseTensor into the canonical, row-major ordering. +// +// Note that by convention, all sparse ops preserve the canonical ordering along +// increasing dimension number. The only time ordering can be violated is during +// manual manipulation of the indices and values vectors to add entries. +// +// Reordering does not affect the shape of the SparseTensor. +// +// If the tensor has rank `R` and `N` non-empty values, `input_indices` has +// shape `[N, R]`, input_values has length `N`, and input_shape has length `R`. +// +// Arguments: +// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. +// input_shape: 1-D. Shape of the input SparseTensor. +// +// Returns 2-D. `N x R` matrix with the same indices as input_indices, but +// in canonical row-major ordering.1-D. `N` non-empty values corresponding to `output_indices`. +func SparseReorder(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output) (output_indices tf.Output, output_values tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseReorder", + Input: []tf.Input{ + input_indices, input_values, input_shape, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// PackAttr is an optional argument to Pack. +type PackAttr func(optionalAttr) + +// PackAxis sets the optional axis attribute to value. +// +// value: Dimension along which to pack. Negative values wrap around, so the +// valid range is `[-(R+1), R+1)`. +// If not specified, defaults to 0 +func PackAxis(value int64) PackAttr { + return func(m optionalAttr) { + m["axis"] = value + } +} + +// Packs a list of `N` rank-`R` tensors into one rank-`(R+1)` tensor. +// +// Packs the `N` tensors in `values` into a tensor with rank one higher than each // tensor in `values`, by packing them along the `axis` dimension. // Given a list of tensors of shape `(A, B, C)`; // @@ -14151,6 +14333,133 @@ func QuantizedRelu(scope *Scope, features tf.Output, min_features tf.Output, max return op.Output(0), op.Output(1), op.Output(2) } +// Records the bytes size of each element of `input_dataset` in a StatsAggregator. +func BytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "BytesProducedStatsDataset", + Input: []tf.Input{ + input_dataset, tag, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// QrAttr is an optional argument to Qr. +type QrAttr func(optionalAttr) + +// QrFullMatrices sets the optional full_matrices attribute to value. +// +// value: If true, compute full-sized `q` and `r`. If false +// (the default), compute only the leading `P` columns of `q`. +// If not specified, defaults to false +func QrFullMatrices(value bool) QrAttr { + return func(m optionalAttr) { + m["full_matrices"] = value + } +} + +// Computes the QR decompositions of one or more matrices. +// +// Computes the QR decomposition of each inner matrix in `tensor` such that +// `tensor[..., :, :] = q[..., :, :] * r[..., :,:])` +// +// ```python +// # a is a tensor. +// # q is a tensor of orthonormal matrices. +// # r is a tensor of upper triangular matrices. +// q, r = qr(a) +// q_full, r_full = qr(a, full_matrices=True) +// ``` +// +// Arguments: +// input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions +// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`. +// +// Returns Orthonormal basis for range of `a`. If `full_matrices` is `False` then +// shape is `[..., M, P]`; if `full_matrices` is `True` then shape is +// `[..., M, M]`.Triangular factor. If `full_matrices` is `False` then shape is +// `[..., P, N]`. If `full_matrices` is `True` then shape is `[..., M, N]`. +func Qr(scope *Scope, input tf.Output, optional ...QrAttr) (q tf.Output, r tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Qr", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// AudioSummaryAttr is an optional argument to AudioSummary. +type AudioSummaryAttr func(optionalAttr) + +// AudioSummaryMaxOutputs sets the optional max_outputs attribute to value. +// +// value: Max number of batch elements to generate audio for. +// If not specified, defaults to 3 +// +// REQUIRES: value >= 1 +func AudioSummaryMaxOutputs(value int64) AudioSummaryAttr { + return func(m optionalAttr) { + m["max_outputs"] = value + } +} + +// Outputs a `Summary` protocol buffer with audio. +// +// DEPRECATED at GraphDef version 15: Use AudioSummaryV2. +// +// The summary has up to `max_outputs` summary values containing audio. The +// audio is built from `tensor` which must be 3-D with shape `[batch_size, +// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are +// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. +// +// The `tag` argument is a scalar `Tensor` of type `string`. It is used to +// build the `tag` of the summary values: +// +// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. +// * If `max_outputs` is greater than 1, the summary value tags are +// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. +// +// Arguments: +// tag: Scalar. Used to build the `tag` attribute of the summary values. +// tensor: 2-D of shape `[batch_size, frames]`. +// sample_rate: The sample rate of the signal in hertz. +// +// Returns Scalar. Serialized `Summary` protocol buffer. +func AudioSummary(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate float32, optional ...AudioSummaryAttr) (summary tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"sample_rate": sample_rate} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "AudioSummary", + Input: []tf.Input{ + tag, tensor, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Reverses specific dimensions of a tensor. // // NOTE `tf.reverse` has now changed behavior in preparation for 1.0. @@ -14671,6 +14980,24 @@ func SoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.O return op.Output(0), op.Output(1) } +// Returns x - y element-wise. +// +// *NOTE*: `Sub` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Sub", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Get the value of the tensor specified by its handle. // // Arguments: @@ -17551,69 +17878,21 @@ func LRN(scope *Scope, input tf.Output, optional ...LRNAttr) (output tf.Output) return op.Output(0) } -// AvgPool3DGradAttr is an optional argument to AvgPool3DGrad. -type AvgPool3DGradAttr func(optionalAttr) - -// AvgPool3DGradDataFormat sets the optional data_format attribute to value. +// Inverse fast Fourier transform. // -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func AvgPool3DGradDataFormat(value string) AvgPool3DGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes gradients of average pooling function. +// Computes the inverse 1-dimensional discrete Fourier transform over the +// inner-most dimension of `input`. // // Arguments: -// orig_input_shape: The original input dimensions. -// grad: Output backprop of shape `[batch, depth, rows, cols, channels]`. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. +// input: A complex64 tensor. // -// Returns The backprop for input. -func AvgPool3DGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AvgPool3DGrad", - Input: []tf.Input{ - orig_input_shape, grad, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Inverse fast Fourier transform. -// -// Computes the inverse 1-dimensional discrete Fourier transform over the -// inner-most dimension of `input`. -// -// Arguments: -// input: A complex64 tensor. -// -// Returns A complex64 tensor of the same shape as `input`. The inner-most -// dimension of `input` is replaced with its inverse 1D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.ifft -// @end_compatibility -func IFFT(scope *Scope, input tf.Output) (output tf.Output) { +// Returns A complex64 tensor of the same shape as `input`. The inner-most +// dimension of `input` is replaced with its inverse 1D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.ifft +// @end_compatibility +func IFFT(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } @@ -17850,6 +18129,55 @@ func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) return op.Output(0) } +// ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign. +type ResourceApplyPowerSignAttr func(optionalAttr) + +// ResourceApplyPowerSignUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and m tensors is +// protected by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyPowerSignUseLocking(value bool) ResourceApplyPowerSignAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' according to the AddSign update. +// +// m_t <- beta1 * m_{t-1} + (1 - beta1) * g +// update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g +// variable <- variable - lr_t * update +// +// Arguments: +// var_: Should be from a Variable(). +// m: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// logbase: Must be a scalar. +// sign_decay: Must be a scalar. +// beta: Must be a scalar. +// grad: The gradient. +// +// Returns the created operation. +func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, logbase tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyPowerSignAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceApplyPowerSign", + Input: []tf.Input{ + var_, m, lr, logbase, sign_decay, beta, grad, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + // DestroyResourceOpAttr is an optional argument to DestroyResourceOp. type DestroyResourceOpAttr func(optionalAttr) @@ -17988,6 +18316,32 @@ func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Outpu return op.Output(0), op.Output(1) } +// Adds `bias` to `value`. +// +// This is a deprecated version of BiasAdd and will be soon removed. +// +// This is a special case of `tf.add` where `bias` is restricted to be 1-D. +// Broadcasting is supported, so `value` may have any number of dimensions. +// +// Arguments: +// value: Any number of dimensions. +// bias: 1-D with size the last dimension of `value`. +// +// Returns Broadcasted sum of `value` and `bias`. +func BiasAddV1(scope *Scope, value tf.Output, bias tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "BiasAddV1", + Input: []tf.Input{ + value, bias, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // FixedLengthRecordReaderV2Attr is an optional argument to FixedLengthRecordReaderV2. type FixedLengthRecordReaderV2Attr func(optionalAttr) @@ -18195,169 +18549,6 @@ func Cumsum(scope *Scope, x tf.Output, axis tf.Output, optional ...CumsumAttr) ( return op.Output(0) } -// WriteImageSummaryAttr is an optional argument to WriteImageSummary. -type WriteImageSummaryAttr func(optionalAttr) - -// WriteImageSummaryMaxImages sets the optional max_images attribute to value. -// -// value: Max number of batch elements to generate images for. -// If not specified, defaults to 3 -// -// REQUIRES: value >= 1 -func WriteImageSummaryMaxImages(value int64) WriteImageSummaryAttr { - return func(m optionalAttr) { - m["max_images"] = value - } -} - -// Writes a `Summary` protocol buffer with images. -// -// The summary has up to `max_images` summary values containing images. The -// images are built from `tensor` which must be 4-D with shape `[batch_size, -// height, width, channels]` and where `channels` can be: -// -// * 1: `tensor` is interpreted as Grayscale. -// * 3: `tensor` is interpreted as RGB. -// * 4: `tensor` is interpreted as RGBA. -// -// The images have the same number of channels as the input tensor. For float -// input, the values are normalized one image at a time to fit in the range -// `[0, 255]`. `uint8` values are unchanged. The op uses two different -// normalization algorithms: -// -// * If the input values are all positive, they are rescaled so the largest one -// is 255. -// -// * If any input value is negative, the values are shifted so input value 0.0 -// is at 127. They are then rescaled so that either the smallest value is 0, -// or the largest one is 255. -// -// The `tag` argument is a scalar `Tensor` of type `string`. It is used to -// build the `tag` of the summary values: -// -// * If `max_images` is 1, the summary value tag is '*tag*/image'. -// * If `max_images` is greater than 1, the summary value tags are -// generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. -// -// The `bad_color` argument is the color to use in the generated images for -// non-finite input values. It is a `unit8` 1-D tensor of length `channels`. -// Each element must be in the range `[0, 255]` (It represents the value of a -// pixel in the output image). Non-finite values in the input tensor are -// replaced by this tensor in the output image. The default value is the color -// red. -// -// Arguments: -// writer: A handle to a summary writer. -// step: The step to write the summary for. -// tag: Scalar. Used to build the `tag` attribute of the summary values. -// tensor: 4-D of shape `[batch_size, height, width, channels]` where -// `channels` is 1, 3, or 4. -// bad_color: Color to use for pixels with non-finite values. -// -// Returns the created operation. -func WriteImageSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, tensor tf.Output, bad_color tf.Output, optional ...WriteImageSummaryAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "WriteImageSummary", - Input: []tf.Input{ - writer, step, tag, tensor, bad_color, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Pads a tensor with zeros. -// -// This operation pads a `input` with zeros according to the `paddings` you -// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the -// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates -// how many zeros to add before the contents of `input` in that dimension, and -// `paddings[D, 1]` indicates how many zeros to add after the contents of `input` -// in that dimension. -// -// The padded size of each dimension D of the output is: -// -// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` -// -// For example: -// -// ``` -// # 't' is [[1, 1], [2, 2]] -// # 'paddings' is [[1, 1], [2, 2]] -// # rank of 't' is 2 -// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0] -// [0, 0, 1, 1, 0, 0] -// [0, 0, 2, 2, 0, 0] -// [0, 0, 0, 0, 0, 0]] -// ``` -func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Pad", - Input: []tf.Input{ - input, paddings, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the number of elements in the given queue. -// -// Arguments: -// handle: The handle to a queue. -// -// Returns The number of elements in the given queue. -func QueueSizeV2(scope *Scope, handle tf.Output) (size tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "QueueSizeV2", - Input: []tf.Input{ - handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Outputs a `Summary` protocol buffer with a histogram. -// -// The generated -// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) -// has one summary value containing a histogram for `values`. -// -// This op reports an `InvalidArgument` error if any value is not finite. -// -// Arguments: -// tag: Scalar. Tag to use for the `Summary.Value`. -// values: Any shape. Values to use to build the histogram. -// -// Returns Scalar. Serialized `Summary` protocol buffer. -func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "HistogramSummary", - Input: []tf.Input{ - tag, values, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // AsStringAttr is an optional argument to AsString. type AsStringAttr func(optionalAttr) @@ -18989,7 +19180,158 @@ func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, option return op.Output(0) } -// Computes the sum along sparse segments of a tensor divided by the sqrt of N. +// UniqueWithCountsAttr is an optional argument to UniqueWithCounts. +type UniqueWithCountsAttr func(optionalAttr) + +// UniqueWithCountsOutIdx sets the optional out_idx attribute to value. +// If not specified, defaults to DT_INT32 +func UniqueWithCountsOutIdx(value tf.DataType) UniqueWithCountsAttr { + return func(m optionalAttr) { + m["out_idx"] = value + } +} + +// Finds unique elements in a 1-D tensor. +// +// This operation returns a tensor `y` containing all of the unique elements of `x` +// sorted in the same order that they occur in `x`. This operation also returns a +// tensor `idx` the same size as `x` that contains the index of each value of `x` +// in the unique output `y`. Finally, it returns a third tensor `count` that +// contains the count of each element of `y` in `x`. In other words: +// +// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` +// +// For example: +// +// ``` +// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] +// y, idx, count = unique_with_counts(x) +// y ==> [1, 2, 4, 7, 8] +// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] +// count ==> [2, 1, 3, 1, 2] +// ``` +// +// Arguments: +// x: 1-D. +// +// Returns 1-D.1-D.1-D. +func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAttr) (y tf.Output, idx tf.Output, count tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "UniqueWithCounts", + Input: []tf.Input{ + x, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// RestoreSliceAttr is an optional argument to RestoreSlice. +type RestoreSliceAttr func(optionalAttr) + +// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value. +// +// value: Index of file to open first if multiple files match +// `file_pattern`. See the documentation for `Restore`. +// If not specified, defaults to -1 +func RestoreSlicePreferredShard(value int64) RestoreSliceAttr { + return func(m optionalAttr) { + m["preferred_shard"] = value + } +} + +// Restores a tensor from checkpoint files. +// +// This is like `Restore` except that restored tensor can be listed as filling +// only a slice of a larger tensor. `shape_and_slice` specifies the shape of the +// larger tensor and the slice that the restored tensor covers. +// +// The `shape_and_slice` input has the same format as the +// elements of the `shapes_and_slices` input of the `SaveSlices` op. +// +// Arguments: +// file_pattern: Must have a single element. The pattern of the files from +// which we read the tensor. +// tensor_name: Must have a single element. The name of the tensor to be +// restored. +// shape_and_slice: Scalar. The shapes and slice specifications to use when +// restoring a tensors. +// dt: The type of the tensor to be restored. +// +// Returns The restored tensor. +func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dt": dt} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RestoreSlice", + Input: []tf.Input{ + file_pattern, tensor_name, shape_and_slice, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal. +type StatelessTruncatedNormalAttr func(optionalAttr) + +// StatelessTruncatedNormalDtype sets the optional dtype attribute to value. +// +// value: The type of the output. +// If not specified, defaults to DT_FLOAT +func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr { + return func(m optionalAttr) { + m["dtype"] = value + } +} + +// Outputs deterministic pseudorandom values from a truncated normal distribution. +// +// The generated values follow a normal distribution with mean 0 and standard +// deviation 1, except that values whose magnitude is more than 2 standard +// deviations from the mean are dropped and re-picked. +// +// The outputs are a deterministic function of `shape` and `seed`. +// +// Arguments: +// shape: The shape of the output tensor. +// seed: 2 seeds (shape [2]). +// +// Returns Random values with specified shape. +func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StatelessTruncatedNormal", + Input: []tf.Input{ + shape, seed, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the sum along sparse segments of a tensor divided by the sqrt of N. // // N is the size of the segment being reduced. // @@ -19891,92 +20233,205 @@ func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) { return op.Output(0) } -// Inverse real-valued fast Fourier transform. -// -// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued -// signal over the inner-most dimension of `input`. -// -// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the -// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If -// `fft_length` is not provided, it is computed from the size of the inner-most -// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to -// compute `input` is odd, it should be provided since it cannot be inferred -// properly. -// -// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller -// than the corresponding dimension of `input`, the dimension is cropped. If it is -// larger, the dimension is padded with zeros. +// Creates a dataset that skips `count` elements from the `input_dataset`. // // Arguments: -// input: A complex64 tensor. -// fft_length: An int32 tensor of shape [1]. The FFT length. // -// Returns A float32 tensor of the same rank as `input`. The inner-most -// dimension of `input` is replaced with the `fft_length` samples of its inverse -// 1D Fourier transform. +// count: A scalar representing the number of elements from the `input_dataset` +// that should be skipped. If count is -1, skips everything. // -// @compatibility(numpy) -// Equivalent to np.fft.irfft -// @end_compatibility -func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { +// +func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "IRFFT", + Type: "SkipDataset", Input: []tf.Input{ - input, fft_length, + input_dataset, count, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Adds a value to the current value of a variable. -// -// Any ReadVariableOp which depends directly or indirectly on this assign is -// guaranteed to see the incremented value or a subsequent newer one. +// ImagAttr is an optional argument to Imag. +type ImagAttr func(optionalAttr) + +// ImagTout sets the optional Tout attribute to value. +// If not specified, defaults to DT_FLOAT +func ImagTout(value tf.DataType) ImagAttr { + return func(m optionalAttr) { + m["Tout"] = value + } +} + +// Returns the imaginary part of a complex number. // -// Outputs the incremented value, which can be used to totally order the -// increments to this variable. +// Given a tensor `input` of complex numbers, this operation returns a tensor of +// type `float` that is the imaginary part of each element in `input`. All +// elements in `input` must be complex numbers of the form \\(a + bj\\), where *a* +// is the real part and *b* is the imaginary part returned by this operation. // -// Arguments: -// resource: handle to the resource in which to store the variable. -// value: the value by which the variable will be incremented. +// For example: // -// Returns the created operation. -func AssignAddVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { +// ``` +// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] +// tf.imag(input) ==> [4.75, 5.75] +// ``` +func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output) { if scope.Err() != nil { return } - opspec := tf.OpSpec{ - Type: "AssignAddVariableOp", - Input: []tf.Input{ - resource, value, - }, - } - return scope.AddOperation(opspec) -} - -// Computes inverse hyperbolic sine of x element-wise. -func Asinh(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) } opspec := tf.OpSpec{ - Type: "Asinh", + Type: "Imag", Input: []tf.Input{ - x, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Real-valued fast Fourier transform. +// ComplexAttr is an optional argument to Complex. +type ComplexAttr func(optionalAttr) + +// ComplexTout sets the optional Tout attribute to value. +// If not specified, defaults to DT_COMPLEX64 +func ComplexTout(value tf.DataType) ComplexAttr { + return func(m optionalAttr) { + m["Tout"] = value + } +} + +// Converts two real numbers to a complex number. // -// Computes the 1-dimensional discrete Fourier transform of a real-valued signal -// over the inner-most dimension of `input`. +// Given a tensor `real` representing the real part of a complex number, and a +// tensor `imag` representing the imaginary part of a complex number, this +// operation returns complex numbers elementwise of the form \\(a + bj\\), where +// *a* represents the `real` part and *b* represents the `imag` part. +// +// The input tensors `real` and `imag` must have the same shape. +// +// For example: +// +// ``` +// # tensor 'real' is [2.25, 3.25] +// # tensor `imag` is [4.75, 5.75] +// tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]] +// ``` +func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAttr) (out tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Complex", + Input: []tf.Input{ + real, imag, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Inverse real-valued fast Fourier transform. +// +// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued +// signal over the inner-most dimension of `input`. +// +// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the +// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If +// `fft_length` is not provided, it is computed from the size of the inner-most +// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to +// compute `input` is odd, it should be provided since it cannot be inferred +// properly. +// +// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller +// than the corresponding dimension of `input`, the dimension is cropped. If it is +// larger, the dimension is padded with zeros. +// +// Arguments: +// input: A complex64 tensor. +// fft_length: An int32 tensor of shape [1]. The FFT length. +// +// Returns A float32 tensor of the same rank as `input`. The inner-most +// dimension of `input` is replaced with the `fft_length` samples of its inverse +// 1D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.irfft +// @end_compatibility +func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IRFFT", + Input: []tf.Input{ + input, fft_length, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Adds a value to the current value of a variable. +// +// Any ReadVariableOp which depends directly or indirectly on this assign is +// guaranteed to see the incremented value or a subsequent newer one. +// +// Outputs the incremented value, which can be used to totally order the +// increments to this variable. +// +// Arguments: +// resource: handle to the resource in which to store the variable. +// value: the value by which the variable will be incremented. +// +// Returns the created operation. +func AssignAddVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "AssignAddVariableOp", + Input: []tf.Input{ + resource, value, + }, + } + return scope.AddOperation(opspec) +} + +// Computes inverse hyperbolic sine of x element-wise. +func Asinh(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Asinh", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Real-valued fast Fourier transform. +// +// Computes the 1-dimensional discrete Fourier transform of a real-valued signal +// over the inner-most dimension of `input`. // // Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the // `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term, @@ -20311,85 +20766,30 @@ func RFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Outp return op.Output(0) } -// RestoreSliceAttr is an optional argument to RestoreSlice. -type RestoreSliceAttr func(optionalAttr) +// QuantizeAndDequantizeV3Attr is an optional argument to QuantizeAndDequantizeV3. +type QuantizeAndDequantizeV3Attr func(optionalAttr) -// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value. -// -// value: Index of file to open first if multiple files match -// `file_pattern`. See the documentation for `Restore`. -// If not specified, defaults to -1 -func RestoreSlicePreferredShard(value int64) RestoreSliceAttr { +// QuantizeAndDequantizeV3SignedInput sets the optional signed_input attribute to value. +// If not specified, defaults to true +func QuantizeAndDequantizeV3SignedInput(value bool) QuantizeAndDequantizeV3Attr { return func(m optionalAttr) { - m["preferred_shard"] = value + m["signed_input"] = value } } -// Restores a tensor from checkpoint files. -// -// This is like `Restore` except that restored tensor can be listed as filling -// only a slice of a larger tensor. `shape_and_slice` specifies the shape of the -// larger tensor and the slice that the restored tensor covers. -// -// The `shape_and_slice` input has the same format as the -// elements of the `shapes_and_slices` input of the `SaveSlices` op. -// -// Arguments: -// file_pattern: Must have a single element. The pattern of the files from -// which we read the tensor. -// tensor_name: Must have a single element. The name of the tensor to be -// restored. -// shape_and_slice: Scalar. The shapes and slice specifications to use when -// restoring a tensors. -// dt: The type of the tensor to be restored. -// -// Returns The restored tensor. -func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dt": dt} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RestoreSlice", - Input: []tf.Input{ - file_pattern, tensor_name, shape_and_slice, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal. -type StatelessTruncatedNormalAttr func(optionalAttr) - -// StatelessTruncatedNormalDtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr { +// QuantizeAndDequantizeV3RangeGiven sets the optional range_given attribute to value. +// If not specified, defaults to true +func QuantizeAndDequantizeV3RangeGiven(value bool) QuantizeAndDequantizeV3Attr { return func(m optionalAttr) { - m["dtype"] = value + m["range_given"] = value } } -// Outputs deterministic pseudorandom values from a truncated normal distribution. -// -// The generated values follow a normal distribution with mean 0 and standard -// deviation 1, except that values whose magnitude is more than 2 standard -// deviations from the mean are dropped and re-picked. -// -// The outputs are a deterministic function of `shape` and `seed`. -// -// Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). +// Quantizes then dequantizes a tensor. // -// Returns Random values with specified shape. -func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) { +// This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a +// tensor, so its value can change during training. +func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, num_bits tf.Output, optional ...QuantizeAndDequantizeV3Attr) (output tf.Output) { if scope.Err() != nil { return } @@ -20398,9 +20798,9 @@ func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, opt a(attrs) } opspec := tf.OpSpec{ - Type: "StatelessTruncatedNormal", + Type: "QuantizeAndDequantizeV3", Input: []tf.Input{ - shape, seed, + input, input_min, input_max, num_bits, }, Attrs: attrs, } @@ -20408,246 +20808,125 @@ func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, opt return op.Output(0) } -// UniqueWithCountsAttr is an optional argument to UniqueWithCounts. -type UniqueWithCountsAttr func(optionalAttr) +// AvgPool3DAttr is an optional argument to AvgPool3D. +type AvgPool3DAttr func(optionalAttr) -// UniqueWithCountsOutIdx sets the optional out_idx attribute to value. -// If not specified, defaults to DT_INT32 -func UniqueWithCountsOutIdx(value tf.DataType) UniqueWithCountsAttr { +// AvgPool3DDataFormat sets the optional data_format attribute to value. +// +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func AvgPool3DDataFormat(value string) AvgPool3DAttr { return func(m optionalAttr) { - m["out_idx"] = value + m["data_format"] = value } } -// Finds unique elements in a 1-D tensor. -// -// This operation returns a tensor `y` containing all of the unique elements of `x` -// sorted in the same order that they occur in `x`. This operation also returns a -// tensor `idx` the same size as `x` that contains the index of each value of `x` -// in the unique output `y`. Finally, it returns a third tensor `count` that -// contains the count of each element of `y` in `x`. In other words: -// -// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` -// -// For example: -// -// ``` -// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] -// y, idx, count = unique_with_counts(x) -// y ==> [1, 2, 4, 7, 8] -// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] -// count ==> [2, 1, 3, 1, 2] -// ``` +// Performs 3D average pooling on the input. // // Arguments: -// x: 1-D. +// input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over. +// ksize: 1-D tensor of length 5. The size of the window for each dimension of +// the input tensor. Must have `ksize[0] = ksize[4] = 1`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. // -// Returns 1-D.1-D.1-D. -func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAttr) (y tf.Output, idx tf.Output, count tf.Output) { +// Returns The average pooled output tensor. +func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "UniqueWithCounts", + Type: "AvgPool3D", Input: []tf.Input{ - x, + input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Creates a dataset that skips `count` elements from the `input_dataset`. +// Produces the max pool of the input tensor for quantized types. // // Arguments: +// input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over. +// min_input: The float value that the lowest quantized input value represents. +// max_input: The float value that the highest quantized input value represents. +// ksize: The size of the window for each dimension of the input tensor. +// The length must be 4 to match the number of dimensions of the input. +// strides: The stride of the sliding window for each dimension of the input +// tensor. The length must be 4 to match the number of dimensions of the input. +// padding: The type of padding algorithm to use. // -// count: A scalar representing the number of elements from the `input_dataset` -// that should be skipped. If count is -1, skips everything. -// -// -func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} opspec := tf.OpSpec{ - Type: "SkipDataset", + Type: "QuantizedMaxPool", Input: []tf.Input{ - input_dataset, count, + input, min_input, max_input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// ComplexAttr is an optional argument to Complex. -type ComplexAttr func(optionalAttr) - -// ComplexTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_COMPLEX64 -func ComplexTout(value tf.DataType) ComplexAttr { - return func(m optionalAttr) { - m["Tout"] = value - } -} +// AvgPool3DGradAttr is an optional argument to AvgPool3DGrad. +type AvgPool3DGradAttr func(optionalAttr) -// Converts two real numbers to a complex number. -// -// Given a tensor `real` representing the real part of a complex number, and a -// tensor `imag` representing the imaginary part of a complex number, this -// operation returns complex numbers elementwise of the form \\(a + bj\\), where -// *a* represents the `real` part and *b* represents the `imag` part. -// -// The input tensors `real` and `imag` must have the same shape. -// -// For example: +// AvgPool3DGradDataFormat sets the optional data_format attribute to value. // -// ``` -// # tensor 'real' is [2.25, 3.25] -// # tensor `imag` is [4.75, 5.75] -// tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]] -// ``` -func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAttr) (out tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Complex", - Input: []tf.Input{ - real, imag, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ImagAttr is an optional argument to Imag. -type ImagAttr func(optionalAttr) - -// ImagTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_FLOAT -func ImagTout(value tf.DataType) ImagAttr { +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func AvgPool3DGradDataFormat(value string) AvgPool3DGradAttr { return func(m optionalAttr) { - m["Tout"] = value - } -} - -// Returns the imaginary part of a complex number. -// -// Given a tensor `input` of complex numbers, this operation returns a tensor of -// type `float` that is the imaginary part of each element in `input`. All -// elements in `input` must be complex numbers of the form \\(a + bj\\), where *a* -// is the real part and *b* is the imaginary part returned by this operation. -// -// For example: -// -// ``` -// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] -// tf.imag(input) ==> [4.75, 5.75] -// ``` -func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Imag", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that emits the lines of one or more text files. -// -// Arguments: -// filenames: A scalar or a vector containing the name(s) of the file(s) to be -// read. -// compression_type: A scalar containing either (i) the empty string (no -// compression), (ii) "ZLIB", or (iii) "GZIP". -// buffer_size: A scalar containing the number of bytes to buffer. -func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TextLineDataset", - Input: []tf.Input{ - filenames, compression_type, buffer_size, - }, + m["data_format"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Returns the number of records this Reader has produced. -// -// This is the same as the number of ReaderRead executions that have -// succeeded. +// Computes gradients of average pooling function. // // Arguments: -// reader_handle: Handle to a Reader. -func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderNumRecordsProducedV2", - Input: []tf.Input{ - reader_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes exponential of x - 1 element-wise. -// -// I.e., \\(y = (\exp x) - 1\\). -func Expm1(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Expm1", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns x - y element-wise. +// orig_input_shape: The original input dimensions. +// grad: Output backprop of shape `[batch, depth, rows, cols, channels]`. +// ksize: 1-D tensor of length 5. The size of the window for each dimension of +// the input tensor. Must have `ksize[0] = ksize[4] = 1`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. // -// *NOTE*: `Sub` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Returns The backprop for input. +func AvgPool3DGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DGradAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Sub", + Type: "AvgPool3DGrad", Input: []tf.Input{ - x, y, + orig_input_shape, grad, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) @@ -24870,133 +25149,6 @@ func Save(scope *Scope, filename tf.Output, tensor_names tf.Output, data []tf.Ou return scope.AddOperation(opspec) } -// Records the bytes size of each element of `input_dataset` in a StatsAggregator. -func BytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "BytesProducedStatsDataset", - Input: []tf.Input{ - input_dataset, tag, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QrAttr is an optional argument to Qr. -type QrAttr func(optionalAttr) - -// QrFullMatrices sets the optional full_matrices attribute to value. -// -// value: If true, compute full-sized `q` and `r`. If false -// (the default), compute only the leading `P` columns of `q`. -// If not specified, defaults to false -func QrFullMatrices(value bool) QrAttr { - return func(m optionalAttr) { - m["full_matrices"] = value - } -} - -// Computes the QR decompositions of one or more matrices. -// -// Computes the QR decomposition of each inner matrix in `tensor` such that -// `tensor[..., :, :] = q[..., :, :] * r[..., :,:])` -// -// ```python -// # a is a tensor. -// # q is a tensor of orthonormal matrices. -// # r is a tensor of upper triangular matrices. -// q, r = qr(a) -// q_full, r_full = qr(a, full_matrices=True) -// ``` -// -// Arguments: -// input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions -// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`. -// -// Returns Orthonormal basis for range of `a`. If `full_matrices` is `False` then -// shape is `[..., M, P]`; if `full_matrices` is `True` then shape is -// `[..., M, M]`.Triangular factor. If `full_matrices` is `False` then shape is -// `[..., P, N]`. If `full_matrices` is `True` then shape is `[..., M, N]`. -func Qr(scope *Scope, input tf.Output, optional ...QrAttr) (q tf.Output, r tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Qr", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// AudioSummaryAttr is an optional argument to AudioSummary. -type AudioSummaryAttr func(optionalAttr) - -// AudioSummaryMaxOutputs sets the optional max_outputs attribute to value. -// -// value: Max number of batch elements to generate audio for. -// If not specified, defaults to 3 -// -// REQUIRES: value >= 1 -func AudioSummaryMaxOutputs(value int64) AudioSummaryAttr { - return func(m optionalAttr) { - m["max_outputs"] = value - } -} - -// Outputs a `Summary` protocol buffer with audio. -// -// DEPRECATED at GraphDef version 15: Use AudioSummaryV2. -// -// The summary has up to `max_outputs` summary values containing audio. The -// audio is built from `tensor` which must be 3-D with shape `[batch_size, -// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are -// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. -// -// The `tag` argument is a scalar `Tensor` of type `string`. It is used to -// build the `tag` of the summary values: -// -// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. -// * If `max_outputs` is greater than 1, the summary value tags are -// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. -// -// Arguments: -// tag: Scalar. Used to build the `tag` attribute of the summary values. -// tensor: 2-D of shape `[batch_size, frames]`. -// sample_rate: The sample rate of the signal in hertz. -// -// Returns Scalar. Serialized `Summary` protocol buffer. -func AudioSummary(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate float32, optional ...AudioSummaryAttr) (summary tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"sample_rate": sample_rate} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AudioSummary", - Input: []tf.Input{ - tag, tensor, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // BiasAddAttr is an optional argument to BiasAdd. type BiasAddAttr func(optionalAttr) @@ -25198,74 +25350,195 @@ func LogUniformCandidateSamplerSeed(value int64) LogUniformCandidateSamplerAttr } } -// LogUniformCandidateSamplerSeed2 sets the optional seed2 attribute to value. +// LogUniformCandidateSamplerSeed2 sets the optional seed2 attribute to value. +// +// value: An second seed to avoid seed collision. +// If not specified, defaults to 0 +func LogUniformCandidateSamplerSeed2(value int64) LogUniformCandidateSamplerAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Generates labels for candidate sampling with a log-uniform distribution. +// +// See explanations of candidate sampling and the data formats at +// go/candidate-sampling. +// +// For each batch, this op picks a single set of sampled candidate labels. +// +// The advantages of sampling candidates per-batch are simplicity and the +// possibility of efficient dense matrix multiplication. The disadvantage is that +// the sampled candidates must be chosen independently of the context and of the +// true labels. +// +// Arguments: +// true_classes: A batch_size * num_true matrix, in which each row contains the +// IDs of the num_true target_classes in the corresponding original label. +// num_true: Number of true labels per context. +// num_sampled: Number of candidates to randomly sample. +// unique: If unique is true, we sample with rejection, so that all sampled +// candidates in a batch are unique. This requires some approximation to +// estimate the post-rejection sampling probabilities. +// range_max: The sampler will sample integers from the interval [0, range_max). +// +// Returns A vector of length num_sampled, in which each element is +// the ID of a sampled candidate.A batch_size * num_true matrix, representing +// the number of times each candidate is expected to occur in a batch +// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled +// candidate representing the number of times the candidate is expected +// to occur in a batch of sampled candidates. If unique=true, then this is a +// probability. +func LogUniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LogUniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "LogUniformCandidateSampler", + Input: []tf.Input{ + true_classes, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// Returns the truth value of (x < y) element-wise. +// +// *NOTE*: `Less` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Less(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Less", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// FakeQuantWithMinMaxVarsGradientAttr is an optional argument to FakeQuantWithMinMaxVarsGradient. +type FakeQuantWithMinMaxVarsGradientAttr func(optionalAttr) + +// FakeQuantWithMinMaxVarsGradientNumBits sets the optional num_bits attribute to value. +// +// value: The bitwidth of the quantization; between 2 and 8, inclusive. +// If not specified, defaults to 8 +func FakeQuantWithMinMaxVarsGradientNumBits(value int64) FakeQuantWithMinMaxVarsGradientAttr { + return func(m optionalAttr) { + m["num_bits"] = value + } +} + +// FakeQuantWithMinMaxVarsGradientNarrowRange sets the optional narrow_range attribute to value. +// +// value: Whether to quantize into 2^num_bits - 1 distinct values. +// If not specified, defaults to false +func FakeQuantWithMinMaxVarsGradientNarrowRange(value bool) FakeQuantWithMinMaxVarsGradientAttr { + return func(m optionalAttr) { + m["narrow_range"] = value + } +} + +// Compute gradients for a FakeQuantWithMinMaxVars operation. +// +// Arguments: +// gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation. +// inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation. +// min, max: Quantization interval, scalar floats. +// +// +// +// Returns Backpropagated gradients w.r.t. inputs: +// `gradients * (inputs >= min && inputs <= max)`.Backpropagated gradients w.r.t. min parameter: +// `sum(gradients * (inputs < min))`.Backpropagated gradients w.r.t. max parameter: +// `sum(gradients * (inputs > max))`. +func FakeQuantWithMinMaxVarsGradient(scope *Scope, gradients tf.Output, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsGradientAttr) (backprops_wrt_input tf.Output, backprop_wrt_min tf.Output, backprop_wrt_max tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "FakeQuantWithMinMaxVarsGradient", + Input: []tf.Input{ + gradients, inputs, min, max, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// MaxPoolGradV2Attr is an optional argument to MaxPoolGradV2. +type MaxPoolGradV2Attr func(optionalAttr) + +// MaxPoolGradV2DataFormat sets the optional data_format attribute to value. // -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func LogUniformCandidateSamplerSeed2(value int64) LogUniformCandidateSamplerAttr { +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func MaxPoolGradV2DataFormat(value string) MaxPoolGradV2Attr { return func(m optionalAttr) { - m["seed2"] = value + m["data_format"] = value } } -// Generates labels for candidate sampling with a log-uniform distribution. -// -// See explanations of candidate sampling and the data formats at -// go/candidate-sampling. -// -// For each batch, this op picks a single set of sampled candidate labels. -// -// The advantages of sampling candidates per-batch are simplicity and the -// possibility of efficient dense matrix multiplication. The disadvantage is that -// the sampled candidates must be chosen independently of the context and of the -// true labels. +// Computes gradients of the maxpooling function. // // Arguments: -// true_classes: A batch_size * num_true matrix, in which each row contains the -// IDs of the num_true target_classes in the corresponding original label. -// num_true: Number of true labels per context. -// num_sampled: Number of candidates to randomly sample. -// unique: If unique is true, we sample with rejection, so that all sampled -// candidates in a batch are unique. This requires some approximation to -// estimate the post-rejection sampling probabilities. -// range_max: The sampler will sample integers from the interval [0, range_max). +// orig_input: The original input tensor. +// orig_output: The original output tensor. +// grad: 4-D. Gradients w.r.t. the output of `max_pool`. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. // -// Returns A vector of length num_sampled, in which each element is -// the ID of a sampled candidate.A batch_size * num_true matrix, representing -// the number of times each candidate is expected to occur in a batch -// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled -// candidate representing the number of times the candidate is expected -// to occur in a batch of sampled candidates. If unique=true, then this is a -// probability. -func LogUniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LogUniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { +// Returns Gradients w.r.t. the input to `max_pool`. +func MaxPoolGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradV2Attr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} + attrs := map[string]interface{}{"padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "LogUniformCandidateSampler", + Type: "MaxPoolGradV2", Input: []tf.Input{ - true_classes, + orig_input, orig_output, grad, ksize, strides, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Returns the truth value of (x < y) element-wise. +// Returns the min of x and y (i.e. x < y ? x : y) element-wise. // -// *NOTE*: `Less` supports broadcasting. More about broadcasting +// *NOTE*: `Minimum` supports broadcasting. More about broadcasting // [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Less(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Less", + Type: "Minimum", Input: []tf.Input{ x, y, }, @@ -27136,127 +27409,6 @@ func InitializeTableV2(scope *Scope, table_handle tf.Output, keys tf.Output, val return scope.AddOperation(opspec) } -// FakeQuantWithMinMaxVarsGradientAttr is an optional argument to FakeQuantWithMinMaxVarsGradient. -type FakeQuantWithMinMaxVarsGradientAttr func(optionalAttr) - -// FakeQuantWithMinMaxVarsGradientNumBits sets the optional num_bits attribute to value. -// -// value: The bitwidth of the quantization; between 2 and 8, inclusive. -// If not specified, defaults to 8 -func FakeQuantWithMinMaxVarsGradientNumBits(value int64) FakeQuantWithMinMaxVarsGradientAttr { - return func(m optionalAttr) { - m["num_bits"] = value - } -} - -// FakeQuantWithMinMaxVarsGradientNarrowRange sets the optional narrow_range attribute to value. -// -// value: Whether to quantize into 2^num_bits - 1 distinct values. -// If not specified, defaults to false -func FakeQuantWithMinMaxVarsGradientNarrowRange(value bool) FakeQuantWithMinMaxVarsGradientAttr { - return func(m optionalAttr) { - m["narrow_range"] = value - } -} - -// Compute gradients for a FakeQuantWithMinMaxVars operation. -// -// Arguments: -// gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation. -// inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation. -// min, max: Quantization interval, scalar floats. -// -// -// -// Returns Backpropagated gradients w.r.t. inputs: -// `gradients * (inputs >= min && inputs <= max)`.Backpropagated gradients w.r.t. min parameter: -// `sum(gradients * (inputs < min))`.Backpropagated gradients w.r.t. max parameter: -// `sum(gradients * (inputs > max))`. -func FakeQuantWithMinMaxVarsGradient(scope *Scope, gradients tf.Output, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsGradientAttr) (backprops_wrt_input tf.Output, backprop_wrt_min tf.Output, backprop_wrt_max tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FakeQuantWithMinMaxVarsGradient", - Input: []tf.Input{ - gradients, inputs, min, max, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// MaxPoolGradV2Attr is an optional argument to MaxPoolGradV2. -type MaxPoolGradV2Attr func(optionalAttr) - -// MaxPoolGradV2DataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolGradV2DataFormat(value string) MaxPoolGradV2Attr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes gradients of the maxpooling function. -// -// Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: 4-D. Gradients w.r.t. the output of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns Gradients w.r.t. the input to `max_pool`. -func MaxPoolGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPoolGradV2", - Input: []tf.Input{ - orig_input, orig_output, grad, ksize, strides, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the min of x and y (i.e. x < y ? x : y) element-wise. -// -// *NOTE*: `Minimum` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Minimum", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Creates a dataset that asynchronously prefetches elements from `input_dataset`. // // Arguments: @@ -27301,57 +27453,3 @@ func TensorSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, serialized_s op := scope.AddOperation(opspec) return op.Output(0) } - -// AudioSummaryV2Attr is an optional argument to AudioSummaryV2. -type AudioSummaryV2Attr func(optionalAttr) - -// AudioSummaryV2MaxOutputs sets the optional max_outputs attribute to value. -// -// value: Max number of batch elements to generate audio for. -// If not specified, defaults to 3 -// -// REQUIRES: value >= 1 -func AudioSummaryV2MaxOutputs(value int64) AudioSummaryV2Attr { - return func(m optionalAttr) { - m["max_outputs"] = value - } -} - -// Outputs a `Summary` protocol buffer with audio. -// -// The summary has up to `max_outputs` summary values containing audio. The -// audio is built from `tensor` which must be 3-D with shape `[batch_size, -// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are -// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. -// -// The `tag` argument is a scalar `Tensor` of type `string`. It is used to -// build the `tag` of the summary values: -// -// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. -// * If `max_outputs` is greater than 1, the summary value tags are -// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. -// -// Arguments: -// tag: Scalar. Used to build the `tag` attribute of the summary values. -// tensor: 2-D of shape `[batch_size, frames]`. -// sample_rate: The sample rate of the signal in hertz. -// -// Returns Scalar. Serialized `Summary` protocol buffer. -func AudioSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...AudioSummaryV2Attr) (summary tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AudioSummaryV2", - Input: []tf.Input{ - tag, tensor, sample_rate, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} -- GitLab From 859df2a2a1bdfb02cf370f7b68e3c6802e822b15 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 17 Nov 2017 12:32:58 -0800 Subject: [PATCH 0584/1801] Remove the existence of unused HloProtos. PiperOrigin-RevId: 176145413 --- tensorflow/compiler/xla/service/hlo.proto | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index 79493c4112..e984bdb5f7 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -250,7 +250,3 @@ message HloProto { HloOrderingProto hlo_ordering = 2; BufferAssignmentProto buffer_assignment = 3; } - -message HloProtos { - repeated HloProto hlo_protos = 1; -} -- GitLab From a6b577c9e9e266a4bb6dabeb08ccfd63cdb44437 Mon Sep 17 00:00:00 2001 From: Karl Lessard Date: Fri, 17 Nov 2017 15:43:22 -0500 Subject: [PATCH 0585/1801] Allow passing a list of shapes as an attribute to an operation (#14644) --- .../java/org/tensorflow/OperationBuilder.java | 34 ++++++++++++++++-- .../src/main/native/operation_builder_jni.cc | 36 +++++++++++++++++++ .../src/main/native/operation_builder_jni.h | 8 +++++ .../org/tensorflow/OperationBuilderTest.java | 34 ++++++++++++++++++ 4 files changed, 110 insertions(+), 2 deletions(-) diff --git a/tensorflow/java/src/main/java/org/tensorflow/OperationBuilder.java b/tensorflow/java/src/main/java/org/tensorflow/OperationBuilder.java index 9a1b7592b3..beb3635585 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/OperationBuilder.java +++ b/tensorflow/java/src/main/java/org/tensorflow/OperationBuilder.java @@ -265,6 +265,36 @@ public final class OperationBuilder { return this; } + public OperationBuilder setAttr(String name, Shape[] value) { + int[] numDimensions = new int[value.length]; + int totalNumDimensions = 0; + for (int idx = 0; idx < value.length; ++idx) { + int n = value[idx].numDimensions(); + numDimensions[idx] = n; + if (n > 0) { + totalNumDimensions += n; + } + } + // Flatten the shapes into a single array to avoid too much overhead in the + // native part + long[] shapes = new long[totalNumDimensions]; + int shapeIdx = 0; + for (Shape shape : value) { + if (shape.numDimensions() > 0) { + for (long dim : shape.asArray()) { + shapes[shapeIdx++] = dim; + } + } + } + Graph.Reference r = graph.ref(); + try { + setAttrShapeList(unsafeNativeHandle, name, shapes, numDimensions); + } finally { + r.close(); + } + return this; + } + public OperationBuilder setAttr(String name, String[] value) { Charset utf8 = Charset.forName("UTF-8"); Object[] objects = new Object[value.length]; @@ -297,8 +327,6 @@ public final class OperationBuilder { // The names of all the setAttr* family functions below correspond to the C library types, not the // Java library types. Roughly, setAttrFoo calls the TensorFlow C library function: TF_SetAttrFoo. - // TODO(ashankar): - // - setAttrShapeList: Which would take in a long[][] private static native void setAttrString(long handle, String name, byte[] value); @@ -324,5 +352,7 @@ public final class OperationBuilder { private static native void setAttrShape(long handle, String name, long[] shape, int numDims); + private static native void setAttrShapeList(long handle, String name, long[] shapes, int[] numDims); + private static native void setAttrStringList(long handle, String name, Object[] value); } diff --git a/tensorflow/java/src/main/native/operation_builder_jni.cc b/tensorflow/java/src/main/native/operation_builder_jni.cc index e03be7b110..71a451ad13 100644 --- a/tensorflow/java/src/main/native/operation_builder_jni.cc +++ b/tensorflow/java/src/main/native/operation_builder_jni.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/java/src/main/native/operation_builder_jni.h" +#include #include #include "tensorflow/c/c_api.h" #include "tensorflow/java/src/main/native/exception_jni.h" @@ -262,6 +263,41 @@ JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrShape( env->ReleaseStringUTFChars(name, cname); } +JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrShapeList( + JNIEnv* env, jclass clazz, jlong handle, jstring name, jlongArray shapes, + jintArray num_dims) { + TF_OperationDescription* d = requireHandle(env, handle); + if (d == nullptr) return; + std::unique_ptr cshapes; + std::unique_ptr cdims; + std::unique_ptr cnum_dims; + const int num_dims_length = env->GetArrayLength(num_dims); + if (num_dims_length > 0) { + const int shapes_length = env->GetArrayLength(shapes); + cshapes.reset(new int64_t[shapes_length]); + cdims.reset(new int64_t* [num_dims_length]); + cnum_dims.reset(new int[num_dims_length]); + jlong* shapes_elems = + (jlong*) env->GetPrimitiveArrayCritical(shapes, nullptr); + std::memcpy(cshapes.get(), shapes_elems, shapes_length << 3); + env->ReleasePrimitiveArrayCritical(shapes, shapes_elems, JNI_ABORT); + int64_t* cshapes_ptr = cshapes.get(); + jint* num_dims_elems = + (jint*) env->GetPrimitiveArrayCritical(num_dims, nullptr); + for (int i = 0; i < num_dims_length; ++i) { + cnum_dims[i] = static_cast(num_dims_elems[i]); + cdims[i] = cshapes_ptr; + if (cnum_dims[i] > 0) { + cshapes_ptr += cnum_dims[i]; + } + } + env->ReleasePrimitiveArrayCritical(num_dims, num_dims_elems, JNI_ABORT); + } + const char* cname = env->GetStringUTFChars(name, nullptr); + TF_SetAttrShapeList(d, cname, cdims.get(), cnum_dims.get(), num_dims_length); + env->ReleaseStringUTFChars(name, cname); +} + JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrStringList( JNIEnv* env, jclass object, jlong handle, jstring name, jobjectArray values) { diff --git a/tensorflow/java/src/main/native/operation_builder_jni.h b/tensorflow/java/src/main/native/operation_builder_jni.h index 2e72bd68da..cf0abe4829 100644 --- a/tensorflow/java/src/main/native/operation_builder_jni.h +++ b/tensorflow/java/src/main/native/operation_builder_jni.h @@ -169,6 +169,14 @@ JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrTensorList( JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrShape( JNIEnv *, jclass, jlong, jstring, jlongArray, jint); +/* + * Class: org_tensorflow_OperationBuilder + * Method: setAttrShapeList + * Signature: (JLjava/lang/String;[J[I)V + */ +JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrShapeList( + JNIEnv *, jclass, jlong, jstring, jlongArray, jintArray); + /* * Class: org_tensorflow_OperationBuilder * Method: setAttrStringList diff --git a/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java b/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java index 6dc233987b..2430816725 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java @@ -148,6 +148,19 @@ public class OperationBuilderTest { } } + @Test + public void setAttrShapeList() { + // Those shapes match tensors ones, so no exception is thrown + testSetAttrShapeList(new Shape[] { Shape.make(2, 2), Shape.make(2, 2, 2) }); + try { + // Those shapes do not match tensors ones, exception is thrown + testSetAttrShapeList(new Shape[] { Shape.make(2, 2), Shape.make(2, 2, 2, 2) }); + fail("Shapes are incompatible and an exception was expected"); + } catch (IllegalArgumentException e) { + // expected + } + } + @Test public void addControlInput() { try (Graph g = new Graph(); @@ -175,6 +188,27 @@ public class OperationBuilderTest { } } + private static void testSetAttrShapeList(Shape[] shapes) { + try (Graph g = new Graph(); Session s = new Session(g)) { + int[][] matrix = new int[][] { { 0, 0 }, { 0, 0 } }; + Output queue = g.opBuilder("FIFOQueue", "queue") + .setAttr("component_types", new DataType[] { DataType.INT32, DataType.INT32 }) + .setAttr("shapes", shapes) + .build() + .output(0); + assertTrue(hasNode(g, "queue")); + Output c1 = TestUtil.constant(g, "const1", matrix); + Output c2 = TestUtil.constant(g, "const2", new int[][][] { matrix, matrix }); + Operation enqueue = g.opBuilder("QueueEnqueue", "enqueue") + .addInput(queue) + .addInputList(new Output[] { c1, c2 }) + .build(); + assertTrue(hasNode(g, "enqueue")); + + s.runner().addTarget(enqueue).run(); + } + } + private static boolean hasNode(Graph g, String name) { return g.operation(name) != null; } -- GitLab From a715b06555a0c14e95f30569f40a97019af6a6b0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 12:34:28 -0800 Subject: [PATCH 0586/1801] [XLA:CPU/GPU] Revert back to previous buffer aliasing calculation for fused DynamicUpdateSlice in-place updates (fused instructions compared in current calculation are not assigned buffers, so I think the current calculation is always returning false). PiperOrigin-RevId: 176145589 --- tensorflow/compiler/xla/service/llvm_ir/ops.h | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/llvm_ir/ops.h b/tensorflow/compiler/xla/service/llvm_ir/ops.h index 11e84d9cb5..f72f482e31 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ops.h +++ b/tensorflow/compiler/xla/service/llvm_ir/ops.h @@ -40,11 +40,24 @@ bool CanUpdateDynamicSliceInPlace(HloInstruction* dynamic_update_slice, inline bool CanEmitFusedDynamicUpdateSliceInPlace( HloInstruction* fusion, const BufferAssignment& assignment) { CHECK_EQ(fusion->opcode(), HloOpcode::kFusion); - return fusion->fusion_kind() == HloInstruction::FusionKind::kLoop && - fusion->fused_expression_root()->opcode() == - HloOpcode::kDynamicUpdateSlice && - CanUpdateDynamicSliceInPlace(fusion->fused_expression_root(), - assignment); + HloInstruction* fused_root = fusion->fused_expression_root(); + if (fused_root->opcode() != HloOpcode::kDynamicUpdateSlice || + fusion->fusion_kind() != HloInstruction::FusionKind::kLoop) { + return false; + } + // Walk DynamicUpdateSlice operand(0) to fused parameter and get its + // associated operand. See if it shares an allocation with this operand. + HloInstruction* fusion_operand; + ShapeIndex index; + std::tie(fusion_operand, index) = + fused_root->mutable_operand(0)->LatestNonGteAncestorAndIndex(); + if (fusion_operand->opcode() != HloOpcode::kParameter) { + return false; + } + auto* operand = fusion->operand(fusion_operand->parameter_number()); + return assignment.HasAllocationAt(operand, index) && + assignment.HasAllocationAt(fusion, {}) && + assignment.SharesSliceAtIndex(fusion, {}, operand, index); } // Emits IR for running the given dynamic-update-slice op in-place -- that is, -- GitLab From 281d10b6cc27ba8c717a7378198a61bb196d05fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 18 Nov 2017 04:46:26 +0800 Subject: [PATCH 0587/1801] PrefetchDatasetOp checks buffer_size in c++ side (#14508) * TST: add test case * BUG: check buffer_size > 0 on c++ side --- .../core/kernels/prefetch_dataset_op.cc | 2 + tensorflow/python/kernel_tests/BUILD | 14 +++++ .../kernel_tests/prefetch_dataset_op_test.py | 58 +++++++++++++++++++ 3 files changed, 74 insertions(+) create mode 100644 tensorflow/python/kernel_tests/prefetch_dataset_op_test.py diff --git a/tensorflow/core/kernels/prefetch_dataset_op.cc b/tensorflow/core/kernels/prefetch_dataset_op.cc index 80592aa353..93ff7cff57 100644 --- a/tensorflow/core/kernels/prefetch_dataset_op.cc +++ b/tensorflow/core/kernels/prefetch_dataset_op.cc @@ -36,6 +36,8 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel { int64 buffer_size; OP_REQUIRES_OK( ctx, ParseScalarArgument(ctx, "buffer_size", &buffer_size)); + OP_REQUIRES(ctx, buffer_size > 0, + errors::InvalidArgument("buffer_size must be > 0")); *output = new Dataset(input, buffer_size); } diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 9ad5733f34..4fffdfda7d 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2943,6 +2943,20 @@ tf_py_test( ], ) +tf_py_test( + name = "prefetch_dataset_op_test", + size = "small", + srcs = ["prefetch_dataset_op_test.py"], + additional_deps = [ + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dataset_ops_gen", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python/data/ops:dataset_ops", + ], +) + tf_py_test( name = "range_dataset_op_test", size = "small", diff --git a/tensorflow/python/kernel_tests/prefetch_dataset_op_test.py b/tensorflow/python/kernel_tests/prefetch_dataset_op_test.py new file mode 100644 index 0000000000..edea9c9027 --- /dev/null +++ b/tensorflow/python/kernel_tests/prefetch_dataset_op_test.py @@ -0,0 +1,58 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Test PrefetchDataset.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test + + +class PrefetchDatasetTest(test.TestCase): + def testBufferSize(self): + buffer_size = array_ops.placeholder(dtypes.int64, shape=[]) + iterator = dataset_ops.Dataset.range(10).prefetch( + buffer_size=buffer_size).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op, feed_dict={buffer_size: 5}) + for m in range(10): + self.assertEqual(m, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testInvalidBufferSize(self): + buffer_size = array_ops.placeholder(dtypes.int64, shape=[]) + iterator = dataset_ops.Dataset.range(10).prefetch( + buffer_size=buffer_size).make_initializable_iterator() + init_op = iterator.initializer + + with self.assertRaisesRegexp(errors.InvalidArgumentError, "buffer_size"): + with self.test_session() as sess: + sess.run(init_op, feed_dict={buffer_size: 0}) + + with self.assertRaisesRegexp(errors.InvalidArgumentError, "buffer_size"): + with self.test_session() as sess: + sess.run(init_op, feed_dict={buffer_size: -5}) + + +if __name__ == "__main__": + test.main() -- GitLab From 6a7cdfa8c973f3ce6a31664233fc8b096f2ba393 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 17 Nov 2017 12:46:18 -0800 Subject: [PATCH 0588/1801] Improved shape inference PiperOrigin-RevId: 176147013 --- .../core/common_runtime/shape_refiner.cc | 31 +- .../core/common_runtime/shape_refiner_test.cc | 15 +- tensorflow/core/framework/shape_inference.cc | 60 +- tensorflow/core/framework/shape_inference.h | 27 +- .../core/framework/shape_inference_test.cc | 13 +- .../core/grappler/costs/graph_properties.cc | 669 ++++++++++++------ .../core/grappler/costs/graph_properties.h | 50 +- .../grappler/costs/graph_properties_test.cc | 23 +- .../while_loop.pbtxt | 20 +- 9 files changed, 591 insertions(+), 317 deletions(-) diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc index 10901da192..d66865e45b 100644 --- a/tensorflow/core/common_runtime/shape_refiner.cc +++ b/tensorflow/core/common_runtime/shape_refiner.cc @@ -335,10 +335,14 @@ Status ShapeRefiner::UpdateNode(const Node* node, bool relax, bool* refined) { InferenceContext* c = iter->second->get_context(); DCHECK_GE(dst_input, 0); ShapeHandle existing_input = node_context->input(dst_input); - if (!relax && node_context->MergeInput(dst_input, c->output(src_output)) && - !existing_input.SameHandle(node_context->input(dst_input))) { - *refined = true; - } else if (relax) { + if (!relax) { + if (node_context->MergeInput(dst_input, c->output(src_output))) { + if (!SameDefinedShape(node_context, node_context->input(dst_input), + existing_input)) { + *refined = true; + } + } + } else { if (node_context->RelaxInput(dst_input, c->output(src_output))) { if (!SameDefinedShape(node_context, node_context->input(dst_input), existing_input)) { @@ -865,15 +869,22 @@ Status ShapeRefiner::RunShapeFn(const Node* node, bool ShapeRefiner::SameDefinedShape(InferenceContext* c, ShapeHandle s0, ShapeHandle s1) { - if (!c->RankKnown(s0)) { - return !c->RankKnown(s1); - } else if (!c->RankKnown(s1) || c->Rank(s0) != c->Rank(s1)) { + if (s0.SameHandle(s1)) { + return true; + } + if (c->Rank(s0) != c->Rank(s1)) { + return false; + } + if (!c->RankKnown(s0) && !c->RankKnown(s1)) { return false; } - for (int i = 0; i < c->Rank(s0); ++i) { - if (c->Value(c->Dim(s0, i)) != c->Value(c->Dim(s1, i))) { - return false; + if (!c->Dim(s0, i).SameHandle(c->Dim(s1, i))) { + int64 val0 = c->Value(c->Dim(s0, i)); + int64 val1 = c->Value(c->Dim(s1, i)); + if (val0 < 0 || val1 < 0 || val0 != val1) { + return false; + } } } diff --git a/tensorflow/core/common_runtime/shape_refiner_test.cc b/tensorflow/core/common_runtime/shape_refiner_test.cc index ff32e855d5..e4eef1dbe2 100644 --- a/tensorflow/core/common_runtime/shape_refiner_test.cc +++ b/tensorflow/core/common_runtime/shape_refiner_test.cc @@ -1161,11 +1161,13 @@ TEST_F(ShapeRefinerTest, SameDefinedShape) { auto s_unknown_2 = ctx->MakeShape({-1, 2}); auto s_unknown_2_b = ctx->MakeShape({-1, 2}); - EXPECT_TRUE(SameDefinedShape(ctx, unknown, unknown_b)); + EXPECT_TRUE(SameDefinedShape(ctx, unknown, unknown)); + EXPECT_FALSE(SameDefinedShape(ctx, unknown, unknown_b)); EXPECT_FALSE(SameDefinedShape(ctx, unknown, s_1_2)); EXPECT_TRUE(SameDefinedShape(ctx, s_1_2, s_1_2_b)); EXPECT_FALSE(SameDefinedShape(ctx, s_1_2, s_2_2)); - EXPECT_TRUE(SameDefinedShape(ctx, s_unknown_2, s_unknown_2_b)); + EXPECT_TRUE(SameDefinedShape(ctx, s_unknown_2, s_unknown_2)); + EXPECT_FALSE(SameDefinedShape(ctx, s_unknown_2, s_unknown_2_b)); } TEST_F(ShapeRefinerTest, IsUpdatedShapesOrTypes) { @@ -1178,14 +1180,15 @@ TEST_F(ShapeRefinerTest, IsUpdatedShapesOrTypes) { TF_ASSERT_OK(m.AddNode(test)); shape_inference::InferenceContext* ctx = m.GetContext(test); + shape_inference::ShapeHandle unknown = ctx->UnknownShape(); std::vector t0{ {ctx->MakeShape({1, 2, 3}), DT_FLOAT}, - {ctx->UnknownShape(), DT_INVALID}, + {unknown, DT_INVALID}, {ctx->MakeShape({4, 3, 2, 1}), DT_INT32}}; std::vector t1{ {ctx->MakeShape({1, 2, 3}), DT_FLOAT}, - {ctx->UnknownShape(), DT_INVALID}, + {unknown, DT_INVALID}, {ctx->MakeShape({4, 3, 2, 1}), DT_INT32}}; std::vector t2{ @@ -1256,10 +1259,10 @@ TEST_F(ShapeRefinerTest, IncrementalUpdates) { 0, std::vector{{shp, DT_FLOAT}}); refined = false; TF_ASSERT_OK(m.UpdateNode(dequeue, true /* relax */, &refined)); - EXPECT_FALSE(refined); + EXPECT_TRUE(refined); ctx = m.GetContext(dequeue); EXPECT_EQ("[?,7]", ctx->DebugString(ctx->output(0))); - EXPECT_FALSE(SameHandle(ctx->Dim(ctx->output(0), 0), ctx->Dim(shp, 0))); + EXPECT_TRUE(SameHandle(ctx->Dim(ctx->output(0), 0), ctx->Dim(shp, 0))); // Inject a shape of the same handle and expect refined to not change. ctx = m.GetContext(queue); diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc index fe0742e1db..f30272e250 100644 --- a/tensorflow/core/framework/shape_inference.cc +++ b/tensorflow/core/framework/shape_inference.cc @@ -403,15 +403,28 @@ Status InferenceContext::WithValue(DimensionHandle dim, int64 value, existing); } -void InferenceContext::Relax(DimensionHandle d0, DimensionHandle d1, +void InferenceContext::Relax(DimensionHandle d_old, DimensionHandle d_new, DimensionHandle* out) { - if (d0.SameHandle(d1)) { - *out = d0; - } else if (!ValueKnown(d0) || !ValueKnown(d1)) { - *out = UnknownDim(); - } else if (Value(d0) == Value(d1)) { - *out = d0; + if (d_old.SameHandle(d_new)) { + *out = d_old; + } else if (!ValueKnown(d_old) && !ValueKnown(d_new)) { + // The node will be fed by the dimension d_new instead of d_old: any + // equality assertion between d_old and other input dimension on this node + // may not be true anymore, so forget them all. + ForgetMerges(); + // Return the new shape handle to force the relaxation to propagate to the + // fanout of the context. + *out = d_new; + } else if (!ValueKnown(d_new)) { + ForgetMerges(); + *out = d_new; + } else if (Value(d_old) == Value(d_new)) { + // Return the old shape handle. This will stop the relaxation in the fanout + // of the context. + *out = d_old; } else { + // Return a new handle that encodes a different unknown dim. + ForgetMerges(); *out = UnknownDim(); } } @@ -463,45 +476,48 @@ Status InferenceContext::MergePrefix(ShapeHandle s, ShapeHandle prefix, return Status::OK(); } -void InferenceContext::Relax(ShapeHandle s0, ShapeHandle s1, ShapeHandle* out) { - if (s0.SameHandle(s1)) { - *out = s0; +void InferenceContext::Relax(ShapeHandle s_old, ShapeHandle s_new, + ShapeHandle* out) { + if (s_old.SameHandle(s_new)) { + *out = s_old; return; - } else if (!RankKnown(s0) || !RankKnown(s1)) { - *out = UnknownShape(); + } else if (!RankKnown(s_new) || !s_old.IsSet()) { + ForgetMerges(); + *out = s_new; return; } - const int32 rank = Rank(s0); - if (rank != Rank(s1)) { + const int32 rank = Rank(s_old); + if (rank != Rank(s_new)) { + ForgetMerges(); *out = UnknownShape(); return; } - bool return_s0 = true; + bool return_s_old = true; for (int i = 0; i < rank; ++i) { - auto d0 = Dim(s0, i); - auto d1 = Dim(s1, i); + auto d0 = Dim(s_old, i); + auto d1 = Dim(s_new, i); if (d0.SameHandle(d1)) continue; auto v0 = Value(d0); auto v1 = Value(d1); if (v0 == kUnknownDim || v1 == kUnknownDim || v0 != v1) { - return_s0 = false; + return_s_old = false; break; } } - if (return_s0) { - *out = s0; + if (return_s_old) { + *out = s_old; return; } // Relax dims. std::vector dims(rank); for (int i = 0; i < rank; ++i) { - // Invariant for relax was checked earlier, so CHECK is ok. - Relax(Dim(s0, i), Dim(s1, i), &dims[i]); + Relax(Dim(s_old, i), Dim(s_new, i), &dims[i]); } + ForgetMerges(); *out = MakeShape(dims); } diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h index b12d37b4c0..4a4ef12635 100644 --- a/tensorflow/core/framework/shape_inference.h +++ b/tensorflow/core/framework/shape_inference.h @@ -62,7 +62,7 @@ class DimensionHandle { private: DimensionHandle(const Dimension* dim) { ptr_ = dim; } - const Dimension* operator->() { return ptr_; } + const Dimension* operator->() const { return ptr_; } bool IsSet() const { return ptr_ != nullptr; } const Dimension* ptr_ = nullptr; @@ -104,7 +104,7 @@ class ShapeHandle { private: ShapeHandle(const Shape* shape) { ptr_ = shape; } - const Shape* operator->() { return ptr_; } + const Shape* operator->() const { return ptr_; } bool IsSet() const { return ptr_ != nullptr; } const Shape* ptr_ = nullptr; @@ -678,14 +678,17 @@ class InferenceContext { // Adds additional context to the given status. Status AttachContext(const Status& status); - // Relaxes and and returns the relaxed dimension in <*out>. If - // and have incompatible values, returns an error. + // Relaxes an existing value with a new value and returns the + // relaxed dimension in <*out>. If and have incompatible + // values, returns an error. // - // Note that <*out> may be set to or . - void Relax(DimensionHandle d0, DimensionHandle d1, DimensionHandle* out); - // Relaxes and and returns the relaxed shape in <*out>. See - // 'RelaxInput' function for full details and examples. - void Relax(ShapeHandle s0, ShapeHandle s1, ShapeHandle* out); + // Note that <*out> may be set to or . + void Relax(DimensionHandle d_old, DimensionHandle d_new, + DimensionHandle* out); + // Relaxes an existing shape with a new shape and returns the + // relaxed shape in <*out>. See 'RelaxInput' function for full details and + // examples. + void Relax(ShapeHandle s_old, ShapeHandle s_new, ShapeHandle* out); // Used to implement MergeInputHandleShapesAndTypes and // MergeOutputHandleShapesAndTypes. @@ -698,6 +701,12 @@ class InferenceContext { const std::vector& shapes_and_types, std::vector* to_update) TF_MUST_USE_RESULT; + // Forget all the previous merged shapes and dims. + void ForgetMerges() { + merged_shapes_.clear(); + merged_dims_.clear(); + } + ShapeManager shape_manager_; // inputs_, outputs_, and input_tensors_as_shapes_ refer to values from diff --git a/tensorflow/core/framework/shape_inference_test.cc b/tensorflow/core/framework/shape_inference_test.cc index d03cc8ce6d..68156e63ca 100644 --- a/tensorflow/core/framework/shape_inference_test.cc +++ b/tensorflow/core/framework/shape_inference_test.cc @@ -544,9 +544,10 @@ TEST_F(ShapeInferenceTest, RelaxDim) { auto d_unknown_b = c.Dim(c.input(0), 4); DimensionHandle out; - // Relaxing anything with unknown returns a new unknown. + // Relaxing anything with unknown returns a new unknown or the existing + // unknown. Relax(&c, d2, d_unknown, &out); - EXPECT_FALSE(SameHandle(d_unknown, out)); + EXPECT_TRUE(SameHandle(d_unknown, out)); EXPECT_FALSE(SameHandle(d_unknown_b, out)); EXPECT_EQ(InferenceContext::kUnknownDim, c.Value(out)); Relax(&c, d_unknown, d2, &out); @@ -554,7 +555,7 @@ TEST_F(ShapeInferenceTest, RelaxDim) { EXPECT_EQ(InferenceContext::kUnknownDim, c.Value(out)); Relax(&c, d_unknown, d_unknown_b, &out); EXPECT_FALSE(SameHandle(d_unknown, out)); - EXPECT_FALSE(SameHandle(d_unknown_b, out)); + EXPECT_TRUE(SameHandle(d_unknown_b, out)); EXPECT_EQ(InferenceContext::kUnknownDim, c.Value(out)); // Relaxing with self returns self. @@ -602,7 +603,7 @@ TEST_F(ShapeInferenceTest, RelaxShape) { EXPECT_EQ("?", c.DebugString(out)); Relax(&c, s_unknown, s_unknown_b, &out); EXPECT_FALSE(SameHandle(s_unknown, out)); - EXPECT_FALSE(SameHandle(s_unknown_b, out)); + EXPECT_TRUE(SameHandle(s_unknown_b, out)); EXPECT_EQ("?", c.DebugString(out)); // Relaxing with self returns self. @@ -623,7 +624,7 @@ TEST_F(ShapeInferenceTest, RelaxShape) { Relax(&c, s_u_2, s_1_u, &out); EXPECT_EQ("[?,?]", c.DebugString(out)); EXPECT_FALSE(SameHandle(c.Dim(s_u_2, 0), c.Dim(out, 0))); - EXPECT_FALSE(SameHandle(c.Dim(s_1_u, 1), c.Dim(out, 1))); + EXPECT_TRUE(SameHandle(c.Dim(s_1_u, 1), c.Dim(out, 1))); auto s_u1 = c.UnknownShapeOfRank(1); auto s_u2 = c.UnknownShapeOfRank(1); Relax(&c, s_u1, s_u2, &out); @@ -637,7 +638,7 @@ TEST_F(ShapeInferenceTest, RelaxShape) { EXPECT_EQ("[?,?]", c.DebugString(out)); out = s_unknown; Relax(&c, s_1_3, s_u_2, &out); - EXPECT_FALSE(SameHandle(c.Dim(s_u_2, 0), c.Dim(out, 0))); + EXPECT_TRUE(SameHandle(c.Dim(s_u_2, 0), c.Dim(out, 0))); EXPECT_EQ("[?,?]", c.DebugString(out)); out = s_unknown; diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index d33d86df3a..46c6841023 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -249,106 +249,252 @@ typename DisjointSet::Rep* DisjointSet::Find(Handle value) { return root; } -// If a Merge node has a NextIteration node as an input then that input will -// try to forward an UnknownShape at graph construction time. However, the -// Merge shape function will always propagate an UnknownShape if any of its -// inputs are UnknownShapes. So we need to ignore the input from NextIteration -// nodes to propagate any known shape from the Merge node. -Status ShapeOfMergeNode(const Node* node, InferenceContext* c) { - ShapeHandle out = c->input(0); - if (!c->RankKnown(out)) { - out = c->UnknownShape(); - } else { - int32 rank = c->Rank(out); - for (const Edge* e : node->in_edges()) { - if (e->src()->IsNextIteration() || e->dst_input() <= 0) { - continue; - } - ShapeHandle input = c->input(e->dst_input()); - if (!c->RankKnown(input) || c->Rank(input) != rank) { - out = c->UnknownShape(); - break; - } +bool IsQueue(const Node& node) { + StringPiece type(node.type_string()); + return type.ends_with("QueueV2"); +} + +// Returns true if the node is an Enter op AND its input is a Queue. +bool IsEnterWithQueue(const Node& node) { + if (node.IsEnter()) { + const Node* in_node; + TF_CHECK_OK(node.input_node(0, &in_node)); + return IsQueue(*in_node); + } + return false; +} + +} // namespace + +// Queue of nodes to process. Nodes can be enqueued in any order, but will be +// dequeued in (roughly) topological order. Propagating shapes following a +// topological ordering isn't required for correctness but helps speed things up +// since it avoids processing the same node multiple times as its inputs +// information is refined. +class TopoQueue { + public: + void push(const Node* n) { queue_.insert(n); } + const Node* pop() { + CHECK(!empty()); + auto it = queue_.begin(); + const Node* n = *it; + queue_.erase(it); + return n; + } + + bool empty() const { return queue_.empty(); } + private: + // Graph nodes are created in (roughly) topological order. Therefore we can + // use their id to ensure they're sorted topologically. + struct CompareNodes { + bool operator()(const Node* lhs, const Node* rhs) const { + return lhs->id() > rhs->id(); + } + }; + std::set queue_; +}; + +// Merge and relax symbolic shapes. +// Each symbolic shape or dimension is represented by a handle. Unlike the TF +// shape refiner which creates new handles every time it processes an unknown +// shape/dimension, the symbolic shape refiner assigns a specific handle to each +// unknown shape/dimension of a given node. +class SymbolicShapeRefiner { + public: + explicit SymbolicShapeRefiner(ShapeRefiner* shape_refiner) + : shape_refiner_(shape_refiner) {} + + InferenceContext* GetContext(const Node* node) { + return shape_refiner_->GetContext(node); + } + Status UpdateNode(const Node* node, bool relax, bool* refined) { + return shape_refiner_->UpdateNode(node, relax, refined); + } + Status SetShape(const Node* node, int output_port, + shape_inference::ShapeHandle shape) { + return shape_refiner_->SetShape(node, output_port, shape); + } + + struct ShapeId { + const Node* node; + int port_id; + bool operator==(const ShapeId& other) const { + return node == other.node && port_id == other.port_id; + } + }; + struct HashShapeId { + std::size_t operator()(const ShapeId& shp) const { + return std::hash{}(shp.node) + shp.port_id; + } + }; + + struct DimId { + const Node* node; + int port_id; + int dim_index; + bool operator==(const DimId& other) const { + return node == other.node && port_id == other.port_id && + dim_index == other.dim_index; + } + }; + + struct HashDimId { + std::size_t operator()(const DimId& dim) const { + return std::hash{}(dim.node) + dim.port_id + dim.dim_index; + } + }; + + // Compute the shape of the tensors outputed by node 'node' at output port + // 'port_index' as the intersection of shape1 and shape2. + ShapeHandle OutputAsIntersection(const Node* node, int port_index, + ShapeHandle shape1, ShapeHandle shape2) { + if (shape1.SameHandle(shape2)) { + return shape1; + } + InferenceContext* ctx = shape_refiner_->GetContext(node); + ShapeHandle merged = shape1; + if (!ctx->RankKnown(shape2) && !ctx->RankKnown(shape1)) { + // Return either one since they're expected to represent the same value. + return shape1; + } else if (!ctx->RankKnown(shape2) && ctx->RankKnown(shape1)) { + return shape1; + } else if (ctx->RankKnown(shape2) && !ctx->RankKnown(shape1)) { + return shape2; + } else { + const int rank = ctx->Rank(shape1); + if (ctx->Rank(shape2) != rank) { + // We detected an inconsistency, return an unknown shape. This can + // happen in the fanout of a merge node since during the initial + // propagation we optimistically assume that all the inputs to the merge + // node have the same shape. + return GetUnknownOutputShape(node, port_index); + } for (int d = 0; d < rank; ++d) { - if (c->Value(c->Dim(input, d)) != c->Value(c->Dim(out, d))) { - TF_RETURN_IF_ERROR(c->ReplaceDim(out, d, c->UnknownDim(), &out)); + if (!ctx->Dim(shape1, d).SameHandle(ctx->Dim(shape2, d))) { + if (ctx->Value(ctx->Dim(shape1, d)) != + ctx->Value(ctx->Dim(shape2, d))) { + DimensionHandle new_dim; + if (ctx->Value(ctx->Dim(shape1, d)) < 0) { + new_dim = ctx->Dim(shape2, d); + } else if (ctx->Value(ctx->Dim(shape2, d)) < 0) { + new_dim = ctx->Dim(shape1, d); + } else { + new_dim = GetUnknownOutputDim(node, port_index, d); + } + TF_CHECK_OK(ctx->ReplaceDim(merged, d, new_dim, &merged)); + } } } } + return merged; } - c->set_output(0, out); - c->set_output(1, c->Scalar()); - return Status::OK(); -} -// Manually propagate the input shape for Enter nodes and update any Merge node -// outputs. -Status UpdateEnter(ShapeRefiner* shape_refiner, const Node* node, bool relax, - std::queue* new_shapes) { - auto enter_ctx = shape_refiner->GetContext(node); - CHECK_NE(enter_ctx, nullptr); - for (int i = 0; i < enter_ctx->num_outputs(); i++) { - TF_RETURN_IF_ERROR(shape_refiner->SetShape(node, i, enter_ctx->input(0))); - } - for (const Edge* e : node->out_edges()) { - Node* dst = e->dst(); - if (dst->IsMerge()) { - bool updated = false; - TF_RETURN_IF_ERROR(shape_refiner->UpdateNode(dst, relax, &updated)); - if (!updated) { - continue; + // Compute the shape of the tensors outputed by node 'node' at output port + // 'port_index' as the union of shape1 and shape2. + ShapeHandle OutputAsUnion(const Node* node, int port_index, + ShapeHandle shape1, ShapeHandle shape2) { + if (shape1.SameHandle(shape2)) { + return shape1; + } + InferenceContext* ctx = shape_refiner_->GetContext(node); + ShapeHandle relaxed = shape1; + const int rank = ctx->Rank(shape1); + if (!ctx->RankKnown(shape2) || ctx->Rank(shape2) != rank) { + relaxed = GetUnknownOutputShape(node, port_index); + } else { + for (int d = 0; d < rank; ++d) { + if (!ctx->Dim(shape1, d).SameHandle(ctx->Dim(shape2, d))) { + int64 val1 = ctx->Value(ctx->Dim(shape1, d)); + int64 val2 = ctx->Value(ctx->Dim(shape2, d)); + if (val1 != val2 || (val1 < 0 && val2 < 0)) { + DimensionHandle new_dim = GetUnknownOutputDim(node, port_index, d); + TF_CHECK_OK(ctx->ReplaceDim(relaxed, d, new_dim, &relaxed)); + } + } } - InferenceContext* merge_ctx = shape_refiner->GetContext(dst); - CHECK_NE(merge_ctx, nullptr); - TF_RETURN_IF_ERROR(ShapeOfMergeNode(dst, merge_ctx)); - new_shapes->push(dst); } + return relaxed; } - return Status::OK(); -} -// Propagates the shapes in the transitive fan-out of . -Status PropagateShapes(ShapeRefiner* shape_refiner, bool relax, - std::queue* new_shapes) { - while (!new_shapes->empty()) { - const Node* n = new_shapes->front(); - new_shapes->pop(); - for (const Node* fanout : n->out_nodes()) { - bool updated = false; - TF_RETURN_IF_ERROR(shape_refiner->UpdateNode(fanout, relax, &updated)); - if (fanout->IsEnter()) { - TF_RETURN_IF_ERROR( - UpdateEnter(shape_refiner, fanout, relax, new_shapes)); - } else if (updated) { - // We want to avoid propagating through loops on the merge pass because - // the shapes are not guaranteed to converge. - if (!relax && fanout->IsNextIteration()) { + bool EquivalentShapes(ShapeHandle s1, ShapeHandle s2) const { + if (s1.SameHandle(s2)) { + return true; + } + if (InferenceContext::Rank(s1) != InferenceContext::Rank(s2)) { + return false; + } + if (!InferenceContext::RankKnown(s1) && !InferenceContext::RankKnown(s2)) { + return true; + } + const int rank = InferenceContext::Rank(s1); + for (int i = 0; i < rank; ++i) { + if (!InferenceContext::DimKnownRank(s1, i).SameHandle( + InferenceContext::DimKnownRank(s2, i))) { + int64 val1 = + InferenceContext::Value(InferenceContext::DimKnownRank(s1, i)); + int64 val2 = + InferenceContext::Value(InferenceContext::DimKnownRank(s2, i)); + if (val1 >= 0 && val2 >= 0 && val1 == val2) { continue; } - new_shapes->push(fanout); + return false; } } + return true; } - return Status::OK(); -} -bool IsQueue(const Node& node) { - StringPiece type(node.type_string()); - return type.ends_with("QueueV2"); -} + bool EquivalentShapesAndTypes(const std::vector& st1, + const std::vector& st2) const { + if (st1.size() != st2.size()) { + return false; + } + for (int i = 0; i < st1.size(); ++i) { + const ShapeAndType& s1 = st1[i]; + const ShapeAndType& s2 = st2[i]; + if (s1.dtype != s2.dtype) { + return false; + } + if (!EquivalentShapes(s1.shape, s2.shape)) { + return false; + } + } + return true; + } -// Returns true if the node is an Enter op AND its input is a Queue. -bool IsEnterWithQueue(const Node& node) { - if (node.IsEnter()) { - const Node* in_node; - TF_CHECK_OK(node.input_node(0, &in_node)); - return IsQueue(*in_node); + private: + // Return the one ShapeHandle used to denote a fully unknown shape for a node + // output. + ShapeHandle GetUnknownOutputShape(const Node* node, int index) { + ShapeId id{node, index}; + auto it = unknown_shapes_.find(id); + if (it != unknown_shapes_.end()) { + return it->second; + } + InferenceContext* c = shape_refiner_->GetContext(node); + ShapeHandle shp = c->UnknownShape(); + unknown_shapes_[id] = shp; + return shp; + } + // Return the one ShapeHandle used to denote a fully unknown dimension for a + // node output. + DimensionHandle GetUnknownOutputDim(const Node* node, int index, int dim_id) { + DimId id{node, index, dim_id}; + auto it = unknown_dims_.find(id); + if (it != unknown_dims_.end()) { + return it->second; + } + InferenceContext* c = shape_refiner_->GetContext(node); + DimensionHandle dim = c->UnknownDim(); + unknown_dims_[id] = dim; + return dim; } - return false; -} -} // namespace + ShapeRefiner* shape_refiner_; + + std::unordered_map unknown_shapes_; + std::unordered_map unknown_dims_; +}; // Keep track of shapes and dimensions in a graph. // In particular, use disjoint sets to track equivalence between shapes and @@ -401,24 +547,9 @@ class SymbolicShapeManager { DisjointSet dims_; }; -void GraphProperties::Relax(InferenceContext* c, ShapeHandle s0, ShapeHandle s1, - ShapeHandle* out) { - c->Relax(s0, s1, out); -} - -bool GraphProperties::SameDefinedShape(InferenceContext* c, ShapeHandle s0, - ShapeHandle s1) { - return ShapeRefiner::SameDefinedShape(c, s0, s1); -} - -bool GraphProperties::IsUpdatedShapesOrTypes( - InferenceContext* c, const std::vector& existing, - const std::vector& updated) { - return ShapeRefiner::IsUpdatedShapesOrTypes(c, existing, updated); -} - Status GraphProperties::MergeEnqueueShapesAndTypes( - const std::vector& shapes_and_types, InferenceContext* qctx, + SymbolicShapeRefiner* shape_refiner, const Node* qnode, + const std::vector& shapes_and_types, std::vector* queue_shapes_and_types) { if (shapes_and_types.size() != queue_shapes_and_types->size()) { return errors::InvalidArgument( @@ -434,13 +565,14 @@ Status GraphProperties::MergeEnqueueShapesAndTypes( DataTypeString(b.dtype)); } - TF_RETURN_IF_ERROR(qctx->Merge(a.shape, b.shape, &b.shape)); + b.shape = shape_refiner->OutputAsIntersection(qnode, i, a.shape, b.shape); } return Status::OK(); } Status GraphProperties::RelaxEnqueueShapesAndMergeTypes( - const std::vector& shapes_and_types, InferenceContext* qctx, + SymbolicShapeRefiner* shape_refiner, const Node* qnode, + const std::vector& shapes_and_types, std::vector* queue_shapes_and_types) { if (shapes_and_types.size() != queue_shapes_and_types->size()) { return errors::InvalidArgument( @@ -456,11 +588,197 @@ Status GraphProperties::RelaxEnqueueShapesAndMergeTypes( DataTypeString(b.dtype)); } - Relax(qctx, a.shape, b.shape, &b.shape); + b.shape = shape_refiner->OutputAsUnion(qnode, i, a.shape, b.shape); } return Status::OK(); } +// If a Merge node has a NextIteration node as an input then that input will +// try to forward an UnknownShape at graph construction time. However, the +// Merge shape function will always propagate an UnknownShape if any of its +// inputs are UnknownShapes. So we need to ignore the input from NextIteration +// nodes to propagate any known shape from the Merge node. +Status GraphProperties::UpdateMergeNode(SymbolicShapeRefiner* shape_refiner, + const Node* node, bool relax, + TopoQueue* new_shapes) { + InferenceContext* c = shape_refiner->GetContext(node); + CHECK_NE(c, nullptr); + + ShapeHandle out; + bool out_initialized = false; + for (const Edge* e : node->in_edges()) { + if (e->IsControlEdge()) { + continue; + } + // Skip back edges during the initial propagation phase. This is equivalent + // to assuming that all the inputs to the merge nodes are fed by the same + // shape, and will be corrected as needed in the relaxation phase. + if (!relax && e->src()->IsNextIteration()) { + continue; + } + + InferenceContext* in = shape_refiner->GetContext(e->src()); + ShapeHandle input = in->output(e->src_output()); + if (relax) { + c->RelaxInput(e->dst_input(), input); + } else { + c->MergeInput(e->dst_input(), input); + } + if (!out_initialized) { + out_initialized = true; + out = input; + continue; + } + if (relax) { + out = shape_refiner->OutputAsUnion(node, 0, input, out); + } else { + out = shape_refiner->OutputAsIntersection(node, 0, input, out); + } + } + + if (!shape_refiner->EquivalentShapes(out, c->output(0))) { + c->set_output(0, out); + c->set_output(1, c->Scalar()); + new_shapes->push(node); + } + + return Status::OK(); +} + +// Manually propagate the input shape for Enter nodes and update any Merge node +// outputs. +Status GraphProperties::UpdateEnter(SymbolicShapeRefiner* shape_refiner, + const Node* node, bool relax, + TopoQueue* new_shapes) { + auto enter_ctx = shape_refiner->GetContext(node); + CHECK_NE(enter_ctx, nullptr); + + for (const Edge* e : node->in_edges()) { + if (e->IsControlEdge()) { + continue; + } + InferenceContext* in = shape_refiner->GetContext(e->src()); + ShapeHandle input = in->output(e->src_output()); + if (!enter_ctx->output(0).SameHandle(input)) { + if (relax) { + enter_ctx->RelaxInput(0, input); + } else { + enter_ctx->MergeInput(0, input); + } + enter_ctx->set_output(0, input); + new_shapes->push(node); + } + } + return Status::OK(); +} + +Status GraphProperties::UpdateShapes(SymbolicShapeRefiner* shape_refiner, + bool relax, const Node* n, + TopoQueue* new_shapes) { + if (n->IsEnter()) { + // The Enter shape function always forwards an UnknownShape, so do the right + // thing here. + TF_RETURN_IF_ERROR(UpdateEnter(shape_refiner, n, relax, new_shapes)); + } else if (n->IsMerge()) { + // Properly handle merge nodes. + TF_RETURN_IF_ERROR(UpdateMergeNode(shape_refiner, n, relax, new_shapes)); + } else { + // Rely on regular TF shape refinement for all the other nodes. + bool updated = false; + TF_RETURN_IF_ERROR(shape_refiner->UpdateNode(n, relax, &updated)); + if (updated) { + // We want to avoid propagating through loops on the merge pass because + // the shapes are not guaranteed to converge. + if (relax || !n->IsNextIteration()) { + new_shapes->push(n); + } + } + } + return Status::OK(); +} + +// Propagates the shapes in the transitive fan-out of . +Status GraphProperties::PropagateShapes( + SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes, + const std::unordered_map>& + resources) { + do { + while (!new_shapes->empty()) { + const Node* n = new_shapes->pop(); + for (const Node* fanout : n->out_nodes()) { + TF_RETURN_IF_ERROR( + UpdateShapes(shape_refiner, relax, fanout, new_shapes)); + } + } + + for (const auto& resource : resources) { + // Resources need special handling: since the enqueue nodes are in the + // fanout of the queues, we need to manually propagate the shapes from + // enqueue node to the corresponding queue. + TF_RETURN_IF_ERROR(UpdateResource(resource.first, resource.second, + shape_refiner, relax, new_shapes)); + } + } while (!new_shapes->empty()); + + return Status::OK(); +} + +Status GraphProperties::UpdateResource( + const Node* qnode, const std::unordered_set& queue_inputs, + SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes) { + // Proceed only if qnode is a queue or an Enter with queue input. + if (!IsQueue(*qnode) && !IsEnterWithQueue(*qnode)) { + return Status::OK(); + } + auto qctx = shape_refiner->GetContext(qnode); + if (!qctx) { + return Status::OK(); + } + auto* queue_handle_data = qctx->output_handle_shapes_and_types(0); + + // Merge all inputs into the enqueue node, regardless of which phase we + // are in. + std::vector queue_shapes_and_types; + if (queue_handle_data) { + queue_shapes_and_types = *queue_handle_data; + } + for (const auto& node : queue_inputs) { + auto ctx = shape_refiner->GetContext(node); + if (!ctx) { + continue; + } + // TODO(bsteiner): handle EnqueueMany as well. + if (node->type_string().find("Enqueue") != std::string::npos && + node->type_string().find("EnqueueMany") == std::string::npos) { + std::vector shapes_and_types; + for (int i = 1; i < ctx->num_inputs(); ++i) { + shapes_and_types.push_back({ctx->input(i), node->input_type(i)}); + } + if (queue_shapes_and_types.empty()) { + queue_shapes_and_types = shapes_and_types; + } else { + if (relax) { + TF_RETURN_IF_ERROR(RelaxEnqueueShapesAndMergeTypes( + shape_refiner, qnode, shapes_and_types, &queue_shapes_and_types)); + } else { + TF_RETURN_IF_ERROR(MergeEnqueueShapesAndTypes( + shape_refiner, qnode, shapes_and_types, &queue_shapes_and_types)); + } + } + } + } + + if (queue_handle_data == nullptr || + !shape_refiner->EquivalentShapesAndTypes(*queue_handle_data, + queue_shapes_and_types)) { + qctx->set_output_handle_shapes_and_types(0, queue_shapes_and_types); + + new_shapes->push(qnode); + } + + return Status::OK(); +} + Status GraphProperties::InferStatically() { Graph graph(OpRegistry::Global()); FunctionLibraryDefinition function_library(graph.op_registry(), @@ -493,146 +811,35 @@ Status GraphProperties::InferStatically() { } if (node->IsEnter()) { enter_nodes.insert(node); - } else if (node->IsNextIteration()) { - for (const Node* output : node->out_nodes()) { - if (output->IsMerge()) { - merge_nodes.insert(output); - } - } + } else if (node->IsMerge()) { + merge_nodes.insert(node); } } - // Propagate the initial shapes of Enter nodes manually (the Enter shape - // function always forwards an UnknownShape). - std::queue new_shapes; - for (const Node* node : enter_nodes) { - TF_RETURN_IF_ERROR( - UpdateEnter(&shape_refiner, node, false /* relax */, &new_shapes)); - } - TF_RETURN_IF_ERROR( - PropagateShapes(&shape_refiner, false /* relax */, &new_shapes)); + SymbolicShapeRefiner refiner(&shape_refiner); // We propagate shapes through the graph in two phases. In the first phase, we - // exclusively merge shapes but we do not propagate shapes through loops. Then - // on the second phase, we exclusively relax shapes and propagate shapes - // through loops until reaching fixed point. + // exclusively merge shapes but we do not propagate shapes through the + // backedge of loops (i.e. the NextIteration node). Then on the second phase, + // we exclusively relax shapes and propagate shapes through loops until + // reaching fixed point. for (int relax = 0; relax < 2; relax++) { - // We don't update Merge nodes with the input of NextIteration nodes on the - // merge pass. So we do that at the beginning of the relax pass instead. - if (relax) { - bool updated = false; - for (const Node* node : merge_nodes) { - TF_RETURN_IF_ERROR( - shape_refiner.UpdateNode(node, false /* relax */, &updated)); - } + TopoQueue new_shapes; + // Force the propagation of shapes of Enter nodes manually (the Enter shape + // function always forwards an UnknownShape). + for (const Node* node : enter_nodes) { + TF_RETURN_IF_ERROR(UpdateShapes(&refiner, relax, node, &new_shapes)); } - - bool done = true; - do { - if (relax) { - // Propagate shapes through any loops in the graph by relaxing. - for (const Node* node : merge_nodes) { - new_shapes.push(node); - } - TF_RETURN_IF_ERROR(PropagateShapes(&shape_refiner, relax, &new_shapes)); - } - - // If we found a resource, try to propagate the shapes through it. - new_shapes = std::queue(); - for (const auto& resource_data : resources) { - const Node* qnode = resource_data.first; - // Proceed only if qnode is a queue or an Enter with queue input. - if (!IsQueue(*qnode) && !IsEnterWithQueue(*qnode)) { - continue; - } - auto qctx = shape_refiner.GetContext(qnode); - if (!qctx) { - continue; - } - - // Check to see if the shape is fully defined. - auto* queue_handle_data = qctx->output_handle_shapes_and_types(0); - if (queue_handle_data != nullptr) { - bool fully_defined = true; - for (const auto& shape_and_type : *queue_handle_data) { - if (!qctx->FullyDefined(shape_and_type.shape) || - shape_and_type.dtype == DT_INVALID) { - fully_defined = false; - } - } - // If we are merging, then we are done. If we are relaxing, then we - // could potentially propagate a less specific shape. - if (fully_defined && !relax) { - continue; - } - } - - // Merge all inputs into the enqueue node, regardless of which phase we - // are in. - std::vector queue_shapes_and_types; - for (const auto& node : resource_data.second) { - auto ctx = shape_refiner.GetContext(node); - if (!ctx) { - continue; - } - // TODO(bsteiner): handle EnqueueMany as well. - if (node->type_string().find("Enqueue") != std::string::npos && - node->type_string().find("EnqueueMany") == std::string::npos) { - std::vector shapes_and_types; - for (int i = 1; i < ctx->num_inputs(); ++i) { - shapes_and_types.push_back({ctx->input(i), node->input_type(i)}); - } - - if (queue_shapes_and_types.empty()) { - queue_shapes_and_types = shapes_and_types; - } else { - TF_RETURN_IF_ERROR(MergeEnqueueShapesAndTypes( - shapes_and_types, qctx, &queue_shapes_and_types)); - } - } - } - // Combine the input shapes with the existing output shape. We either - // merge or relax depending on which phase we are in. - if (queue_handle_data != nullptr) { - if (relax) { - TF_RETURN_IF_ERROR(RelaxEnqueueShapesAndMergeTypes( - *queue_handle_data, qctx, &queue_shapes_and_types)); - } else { - TF_RETURN_IF_ERROR(MergeEnqueueShapesAndTypes( - *queue_handle_data, qctx, &queue_shapes_and_types)); - } - } - // Set the output ShapeAndType handles. If we successfully update the - // resource node, add its fan-out to the queue. - const std::vector* outputs = - qctx->output_handle_shapes_and_types(0); - std::vector existing_outputs; - if (outputs) { - existing_outputs = *outputs; - } - if (!queue_shapes_and_types.empty()) { - if (!relax && qctx->MergeOutputHandleShapesAndTypes( - 0, queue_shapes_and_types)) { - new_shapes.push(qnode); - } else if (relax && qctx->RelaxOutputHandleShapesAndMergeTypes( - 0, queue_shapes_and_types)) { - if (IsUpdatedShapesOrTypes( - qctx, existing_outputs, - *qctx->output_handle_shapes_and_types(0))) { - new_shapes.push(qnode); - } - } - } - } - // Propagate the shapes in the transitive fan-out of the queue. - done = new_shapes.empty(); - if (!done) { - TF_RETURN_IF_ERROR(PropagateShapes(&shape_refiner, relax, &new_shapes)); - } - } while (!done); + // Seed the propagation of shapes through merge nodes. + for (const Node* node : merge_nodes) { + TF_RETURN_IF_ERROR(UpdateShapes(&refiner, relax, node, &new_shapes)); + } + // Propagate shapes normally. + TF_RETURN_IF_ERROR( + PropagateShapes(&refiner, relax, &new_shapes, resources)); } - // Track shapes globally accross the graph. + // Track shapes globally across the graph. SymbolicShapeManager shape_manager; bool found_error = false; for (const Node* const node : graph.nodes()) { diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h index a6aed0bba6..37c8654541 100644 --- a/tensorflow/core/grappler/costs/graph_properties.h +++ b/tensorflow/core/grappler/costs/graph_properties.h @@ -26,6 +26,9 @@ limitations under the License. namespace tensorflow { namespace grappler { +class SymbolicShapeRefiner; +class TopoQueue; + // A TensorFlow model to optimize. // Models are represented by the combination of a graph, one of more fetch // nodes, and potentially a set of nodes to feed. @@ -64,31 +67,42 @@ class GraphProperties { // Merges shapes , determined from an EnqueueV2 node, into // <*queue_shapes_and_types>. - Status MergeEnqueueShapesAndTypes( + static Status MergeEnqueueShapesAndTypes( + SymbolicShapeRefiner* shape_refiner, const Node* qnode, const std::vector& shapes_and_types, - shape_inference::InferenceContext* qctx, std::vector* queue_shapes_and_types); // Relaxes shapes , determined from an EnqueueV2 node, into // <*queue_shapes_and_types>. - Status RelaxEnqueueShapesAndMergeTypes( + static Status RelaxEnqueueShapesAndMergeTypes( + SymbolicShapeRefiner* shape_refiner, const Node* qnode, const std::vector& shapes_and_types, - shape_inference::InferenceContext* qctx, std::vector* queue_shapes_and_types); - // This gives access to private function of InferenceContext. - static void Relax(shape_inference::InferenceContext* c, - shape_inference::ShapeHandle s0, - shape_inference::ShapeHandle s1, - shape_inference::ShapeHandle* out); - - // These give access to private functions of ShapeRefiner. - static bool SameDefinedShape(shape_inference::InferenceContext* c, - shape_inference::ShapeHandle s0, - shape_inference::ShapeHandle s1); - static bool IsUpdatedShapesOrTypes( - shape_inference::InferenceContext* c, - const std::vector& existing, - const std::vector& updated); + // Update the shapes for qnode. If output shapes of qnode have changed, + // enqueue its fanout in 'new_shapes'. + static Status UpdateResource( + const Node* qnode, const std::unordered_set& queue_inputs, + SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes); + + // Update the output shapes of a Merge node, and enqueue its fanout in + // new_shapes if needed. + static Status UpdateMergeNode(SymbolicShapeRefiner* shape_refiner, + const Node* node, bool relax, + TopoQueue* new_shapes); + // Process the Enter node, and enqueue its fanout in new_shapes if needed. + static Status UpdateEnter(SymbolicShapeRefiner* shape_refiner, + const Node* node, bool relax, + TopoQueue* new_shapes); + // Update the shapes for node 'n'. If output shapes for n have changed, + // enqueue its fanout in 'new_shapes'. + static Status UpdateShapes(SymbolicShapeRefiner* shape_refiner, bool relax, + const Node* n, TopoQueue* new_shapes); + // Propagate the shapes for the nodes enqueued in new_shapes and their + // transitive fanout until a fixed point is reached. + static Status PropagateShapes( + SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes, + const std::unordered_map>& + resources); }; } // end namespace grappler diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc index f785f627e1..74d48158a9 100644 --- a/tensorflow/core/grappler/costs/graph_properties_test.cc +++ b/tensorflow/core/grappler/costs/graph_properties_test.cc @@ -362,7 +362,7 @@ TEST_F(GraphPropertiesTest, WhileLoop) { /* with tf.Graph().as_default(): i0 = tf.constant(0) - m0 = tf.ones([2, 2]) + m0 = tf.placeholder([-1, 2]) c = lambda i, m: i < 10 b = lambda i, m: [i+1, tf.concat([m, m], axis=0)] r = tf.while_loop( @@ -387,6 +387,14 @@ TEST_F(GraphPropertiesTest, WhileLoop) { EXPECT_EQ(DT_FLOAT, prop.dtype()); EXPECT_EQ("float: [-1,2]", PropToString(prop)); } + + // The loop outputs batch dim should be different from the input batch dim + // since we concatenated along the batch dim. + auto shape_in = properties.GetOutputProperties("ones").at(0).shape(); + auto shape_out = properties.GetOutputProperties("while/Exit_1").at(0).shape(); + EXPECT_GE(-2, shape_in.dim(0).size()); + EXPECT_GE(-2, shape_out.dim(0).size()); + EXPECT_NE(shape_in.dim(0).size(), shape_out.dim(0).size()); } TEST_F(GraphPropertiesTest, NestedLoop) { @@ -750,6 +758,10 @@ TEST_F(GraphPropertiesTest, SymbolicShapes) { Output e = ops::Add(s.WithOpName("e"), c, d); Output f = ops::Add(s.WithOpName("f"), a, c); + Output zero = ops::Const(s.WithOpName("zero"), 0.0f, {}); + Output g = ops::Shape(s.WithOpName("g"), c); + Output h = ops::Fill(s.WithOpName("h"), g, zero); + GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); @@ -773,15 +785,20 @@ TEST_F(GraphPropertiesTest, SymbolicShapes) { EXPECT_EQ(shape_b.dim(0).size(), shape_d.dim(0).size()); const auto shape_e = properties.GetOutputProperties("e").at(0).shape(); - EXPECT_EQ(2, shape_e.dim_size()); + ASSERT_EQ(2, shape_e.dim_size()); EXPECT_EQ(shape_e.dim(0).size(), shape_c.dim(0).size()); EXPECT_NE(shape_e.dim(1).size(), shape_c.dim(1).size()); EXPECT_NE(shape_e.dim(0).size(), shape_d.dim(0).size()); const auto shape_f = properties.GetOutputProperties("f").at(0).shape(); - EXPECT_EQ(2, shape_f.dim_size()); + ASSERT_EQ(2, shape_f.dim_size()); EXPECT_EQ(shape_f.dim(0).size(), shape_a.dim(0).size()); EXPECT_EQ(shape_f.dim(1).size(), shape_a.dim(1).size()); + + const auto shape_h = properties.GetOutputProperties("h").at(0).shape(); + ASSERT_EQ(2, shape_f.dim_size()); + EXPECT_EQ(shape_h.dim(0).size(), shape_c.dim(0).size()); + EXPECT_EQ(shape_h.dim(1).size(), shape_c.dim(1).size()); } TEST_F(GraphPropertiesTest, DoNotValidateColocationConstraints) { diff --git a/tensorflow/core/grappler/costs/graph_properties_testdata/while_loop.pbtxt b/tensorflow/core/grappler/costs/graph_properties_testdata/while_loop.pbtxt index c11833bd1a..fbc3659d9a 100644 --- a/tensorflow/core/grappler/costs/graph_properties_testdata/while_loop.pbtxt +++ b/tensorflow/core/grappler/costs/graph_properties_testdata/while_loop.pbtxt @@ -21,7 +21,7 @@ node { } node { name: "ones" - op: "Const" + op: "PlaceholderV2" attr { key: "dtype" value { @@ -29,19 +29,15 @@ node { } } attr { - key: "value" + key: "shape" value { - tensor { - dtype: DT_FLOAT - tensor_shape { - dim { - size: 2 - } - dim { - size: 2 - } + shape { + dim { + size: -1 + } + dim { + size: 2 } - float_val: 1.0 } } } -- GitLab From e01949c0248b80ed25bd3caee5e4db19c5a574f6 Mon Sep 17 00:00:00 2001 From: Taehoon Lee Date: Sat, 18 Nov 2017 05:54:36 +0900 Subject: [PATCH 0589/1801] Add missing conv1d in `tf.contrib.layers` (#14513) --- tensorflow/contrib/layers/__init__.py | 2 ++ tensorflow/contrib/layers/python/layers/layers.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/tensorflow/contrib/layers/__init__.py b/tensorflow/contrib/layers/__init__.py index d309ba958d..66412b7fe2 100644 --- a/tensorflow/contrib/layers/__init__.py +++ b/tensorflow/contrib/layers/__init__.py @@ -19,6 +19,7 @@ See the @{$python/contrib.layers} guide. @@avg_pool2d @@avg_pool3d @@batch_norm +@@convolution1d @@convolution2d @@convolution3d @@conv2d_in_plane @@ -112,6 +113,7 @@ from tensorflow.contrib.layers.python.layers import * from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = ['bias_add', + 'conv1d', 'conv2d', 'conv3d', 'elu', diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index 3063085218..bad4d4dd48 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -58,12 +58,14 @@ __all__ = ['avg_pool2d', 'avg_pool3d', 'batch_norm', 'bias_add', + 'conv1d', 'conv2d', 'conv3d', 'conv2d_in_plane', 'conv2d_transpose', 'conv3d_transpose', 'convolution', + 'convolution1d', 'convolution2d', 'convolution2d_in_plane', 'convolution2d_transpose', @@ -1070,6 +1072,7 @@ def convolution(inputs, outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs) +convolution1d = convolution convolution2d = convolution convolution3d = convolution @@ -2971,6 +2974,7 @@ relu6 = functools.partial(fully_connected, activation_fn=nn.relu6) linear = functools.partial(fully_connected, activation_fn=None) # Simple alias. +conv1d = convolution1d conv2d = convolution2d conv3d = convolution3d conv2d_transpose = convolution2d_transpose -- GitLab From 98ef53d5541049655c9160130595253fdefd4590 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Fri, 17 Nov 2017 12:49:48 -0800 Subject: [PATCH 0590/1801] Remove `tf.data.SparseType` and instead use `tf.data.Dataset.output_classes` as the means for recording the class type of the dataset elements. PiperOrigin-RevId: 176147440 --- .../contrib/data/python/ops/batching.py | 40 ++- .../contrib/data/python/ops/dataset_ops.py | 4 + .../contrib/data/python/ops/error_ops.py | 9 +- .../contrib/data/python/ops/grouping.py | 36 +- .../contrib/data/python/ops/interleave_ops.py | 22 +- tensorflow/contrib/data/python/ops/readers.py | 4 + .../contrib/data/python/ops/scan_ops.py | 23 +- tensorflow/python/data/__init__.py | 2 - tensorflow/python/data/ops/dataset_ops.py | 254 ++++++++++---- tensorflow/python/data/ops/iterator_ops.py | 93 +++-- tensorflow/python/data/ops/readers.py | 12 + tensorflow/python/data/util/BUILD | 3 + tensorflow/python/data/util/sparse.py | 150 ++++----- tensorflow/python/data/util/sparse_test.py | 318 ++++++++++++++---- tensorflow/python/kernel_tests/BUILD | 17 +- tensorflow/python/ops/sparse_ops.py | 2 +- .../api/golden/tensorflow.data.-dataset.pbtxt | 4 + ...ow.data.-fixed-length-record-dataset.pbtxt | 4 + .../golden/tensorflow.data.-iterator.pbtxt | 10 +- .../golden/tensorflow.data.-sparse-type.pbtxt | 13 - .../tensorflow.data.-t-f-record-dataset.pbtxt | 4 + .../tensorflow.data.-text-line-dataset.pbtxt | 4 + .../tools/api/golden/tensorflow.data.pbtxt | 4 - 23 files changed, 728 insertions(+), 304 deletions(-) delete mode 100644 tensorflow/tools/api/golden/tensorflow.data.-sparse-type.pbtxt diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py index cc63baed81..1ac059b374 100644 --- a/tensorflow/contrib/data/python/ops/batching.py +++ b/tensorflow/contrib/data/python/ops/batching.py @@ -112,8 +112,10 @@ def filter_irregular_batches(batch_size): tensor_batch_size = ops.convert_to_tensor( batch_size, dtype=dtypes.int64, name="batch_size") - flattened = _RestructuredDataset(dataset, - tuple(nest.flatten(dataset.output_types))) + flattened = _RestructuredDataset( + dataset, + tuple(nest.flatten(dataset.output_types)), + output_classes=tuple(nest.flatten(dataset.output_classes))) def _predicate(*xs): """Return `True` if this element is a full batch.""" @@ -135,7 +137,11 @@ def filter_irregular_batches(batch_size): known_shapes = nest.map_structure(_set_first_dimension, dataset.output_shapes) - return _RestructuredDataset(filtered, dataset.output_types, known_shapes) + return _RestructuredDataset( + filtered, + dataset.output_types, + known_shapes, + output_classes=dataset.output_classes) return _apply_fn @@ -237,6 +243,10 @@ class DenseToSparseBatchDataset(dataset_ops.Dataset): output_shapes=self.output_shapes, output_types=self.output_types) + @property + def output_classes(self): + return (ops.Tensor, ops.Tensor, ops.Tensor) + @property def output_shapes(self): num_elements = tensor_shape.Dimension(None) @@ -252,7 +262,11 @@ class DenseToSparseBatchDataset(dataset_ops.Dataset): class _RestructuredDataset(dataset_ops.Dataset): """An internal helper for changing the structure and shape of a dataset.""" - def __init__(self, dataset, output_types, output_shapes=None): + def __init__(self, + dataset, + output_types, + output_shapes=None, + output_classes=None): """Creates a new dataset with the given output types and shapes. The given `dataset` must have a structure that is convertible: @@ -268,6 +282,8 @@ class _RestructuredDataset(dataset_ops.Dataset): output_types: A nested structure of `tf.DType` objects. output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects. If omitted, the shapes will be inherited from `dataset`. + output_classes: (Optional.) A nested structure of class types. + If omitted, the class types will be inherited from `dataset`. Raises: ValueError: If either `output_types` or `output_shapes` is not compatible @@ -307,10 +323,21 @@ class _RestructuredDataset(dataset_ops.Dataset): output_shapes)) self._output_shapes = nest.map_structure_up_to( output_types, tensor_shape.as_shape, output_shapes) + if output_classes is None: + # Inherit class types from the original `dataset`. + self._output_classes = nest.pack_sequence_as(output_types, + nest.flatten( + dataset.output_classes)) + else: + self._output_classes = output_classes def _as_variant_tensor(self): return self._dataset._as_variant_tensor() # pylint: disable=protected-access + @property + def output_classes(self): + return self._output_classes + @property def output_types(self): return self._output_types @@ -345,8 +372,9 @@ class _MapAndBatchDataset(dataset_ops.MapDataset): batch_size=self._batch_size, num_parallel_batches=self._num_parallel_batches, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) # pylint: enable=protected-access @property diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py index 45d6dbe743..863c94ef9f 100644 --- a/tensorflow/contrib/data/python/ops/dataset_ops.py +++ b/tensorflow/contrib/data/python/ops/dataset_ops.py @@ -48,6 +48,10 @@ class Dataset(dataset_ops.Dataset): def _as_variant_tensor(self): return self._dataset._as_variant_tensor() # pylint: disable=protected-access + @property + def output_classes(self): + return self._dataset.output_classes + @property def output_shapes(self): return self._dataset.output_shapes diff --git a/tensorflow/contrib/data/python/ops/error_ops.py b/tensorflow/contrib/data/python/ops/error_ops.py index 194b611513..aa629cba47 100644 --- a/tensorflow/contrib/data/python/ops/error_ops.py +++ b/tensorflow/contrib/data/python/ops/error_ops.py @@ -63,9 +63,14 @@ class IgnoreErrorsDataset(dataset_ops.Dataset): def _as_variant_tensor(self): return gen_dataset_ops.ignore_errors_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access - output_shapes=nest.flatten(self.output_shapes), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types))) + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes @property def output_shapes(self): diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py index 86337271bc..ef91c56726 100644 --- a/tensorflow/contrib/data/python/ops/grouping.py +++ b/tensorflow/contrib/data/python/ops/grouping.py @@ -88,15 +88,21 @@ def group_by_window(key_func, class _VariantDataset(dataset_ops.Dataset): """A Dataset wrapper for a tf.variant-typed function argument.""" - def __init__(self, dataset_variant, output_types, output_shapes): + def __init__(self, dataset_variant, output_types, output_shapes, + output_classes): super(_VariantDataset, self).__init__() self._dataset_variant = dataset_variant self._output_types = output_types self._output_shapes = output_shapes + self._output_classes = output_classes def _as_variant_tensor(self): return self._dataset_variant + @property + def output_classes(self): + return self._output_classes + @property def output_shapes(self): return self._output_shapes @@ -138,17 +144,21 @@ class GroupByWindowDataset(dataset_ops.Dataset): def _make_key_func(self, key_func, input_dataset): """Make wrapping Defun for key_func.""" - @function.Defun( - *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types))) + @function.Defun(*nest.flatten( + sparse.as_dense_types(input_dataset.output_types, + input_dataset.output_classes))) def tf_key_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the input_dataset. - for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)): + dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, + input_dataset.output_classes) + for arg, shape in zip(args, nest.flatten(dense_shapes)): arg.set_shape(shape) nested_args = nest.pack_sequence_as(input_dataset.output_types, args) nested_args = sparse.deserialize_sparse_tensors( - nested_args, input_dataset.output_types) + nested_args, input_dataset.output_types, input_dataset.output_shapes, + input_dataset.output_classes) # pylint: disable=protected-access if dataset_ops._should_unpack_args(nested_args): ret = key_func(*nested_args) @@ -170,14 +180,15 @@ class GroupByWindowDataset(dataset_ops.Dataset): def tf_reduce_func(key, window_dataset_variant): """A wrapper for Defun that facilitates shape inference.""" key.set_shape([]) - window_dataset = _VariantDataset(window_dataset_variant, - input_dataset.output_types, - input_dataset.output_shapes) + window_dataset = _VariantDataset( + window_dataset_variant, input_dataset.output_types, + input_dataset.output_shapes, input_dataset.output_classes) if not isinstance(window_dataset, dataset_ops.Dataset): raise TypeError("`window_dataset` must return a `Dataset` object.") output_dataset = reduce_func(key, window_dataset) if not isinstance(output_dataset, dataset_ops.Dataset): raise TypeError("`reduce_func` must return a `Dataset` object.") + self._output_classes = output_dataset.output_classes self._output_types = output_dataset.output_types self._output_shapes = output_dataset.output_shapes return output_dataset._as_variant_tensor() # pylint: disable=protected-access @@ -185,6 +196,10 @@ class GroupByWindowDataset(dataset_ops.Dataset): self._reduce_func = tf_reduce_func self._reduce_func.add_to_graph(ops.get_default_graph()) + @property + def output_classes(self): + return self._output_classes + @property def output_shapes(self): return self._output_shapes @@ -203,5 +218,6 @@ class GroupByWindowDataset(dataset_ops.Dataset): reduce_func=self._reduce_func, window_size_func=self._window_size_func, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) diff --git a/tensorflow/contrib/data/python/ops/interleave_ops.py b/tensorflow/contrib/data/python/ops/interleave_ops.py index 830642c040..53324e06e7 100644 --- a/tensorflow/contrib/data/python/ops/interleave_ops.py +++ b/tensorflow/contrib/data/python/ops/interleave_ops.py @@ -36,17 +36,21 @@ class ParallelInterleaveDataset(dataset_ops.Dataset): super(ParallelInterleaveDataset, self).__init__() self._input_dataset = input_dataset - @function.Defun( - *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types))) + @function.Defun(*nest.flatten( + sparse.as_dense_types(input_dataset.output_types, + input_dataset.output_classes))) def tf_map_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the input_dataset. - for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)): + dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, + input_dataset.output_classes) + for arg, shape in zip(args, nest.flatten(dense_shapes)): arg.set_shape(shape) nested_args = nest.pack_sequence_as(input_dataset.output_types, args) nested_args = sparse.deserialize_sparse_tensors( - nested_args, input_dataset.output_types) + nested_args, input_dataset.output_types, input_dataset.output_shapes, + input_dataset.output_classes) if dataset_ops._should_unpack_args(nested_args): # pylint: disable=protected-access dataset = map_func(*nested_args) else: @@ -55,6 +59,7 @@ class ParallelInterleaveDataset(dataset_ops.Dataset): if not isinstance(dataset, dataset_ops.Dataset): raise TypeError("`map_func` must return a `Dataset` object.") + self._output_classes = dataset.output_classes self._output_types = dataset.output_types self._output_shapes = dataset.output_shapes @@ -79,8 +84,13 @@ class ParallelInterleaveDataset(dataset_ops.Dataset): self._sloppy, f=self._map_func, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_classes(self): + return self._output_classes @property def output_shapes(self): diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py index 632082b5f1..bb47832fe9 100644 --- a/tensorflow/contrib/data/python/ops/readers.py +++ b/tensorflow/contrib/data/python/ops/readers.py @@ -269,6 +269,10 @@ class _SqlDataset(dataset_ops.Dataset): nest.flatten(self.output_types), nest.flatten(self.output_shapes)) + @property + def output_classes(self): + return nest.map_structure(lambda _: ops.Tensor, self._output_types) + @property def output_shapes(self): return nest.map_structure(lambda _: tensor_shape.TensorShape([]), diff --git a/tensorflow/contrib/data/python/ops/scan_ops.py b/tensorflow/contrib/data/python/ops/scan_ops.py index 2cfc0709cd..7c595b1814 100644 --- a/tensorflow/contrib/data/python/ops/scan_ops.py +++ b/tensorflow/contrib/data/python/ops/scan_ops.py @@ -53,6 +53,7 @@ class _ScanDataset(dataset_ops.Dataset): [t.dtype for t in nest.flatten(self._initial_state)]) # Will be populated by calling `tf_scan_func`. + self._output_classes = None self._output_shapes = None self._output_types = None @@ -68,13 +69,16 @@ class _ScanDataset(dataset_ops.Dataset): flat_new_state_shapes = [] @function.Defun(*(flat_state_types + nest.flatten( - sparse.unwrap_sparse_types(input_dataset.output_types)))) + sparse.as_dense_types(input_dataset.output_types, + input_dataset.output_classes)))) # pylint: disable=protected-access def tf_scan_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the state and input_dataset. - for arg, shape in zip( - args, - flat_state_shapes + nest.flatten(input_dataset.output_shapes)): + # TODO(b/69424092): Check that neither inputs nor outputs are sparse. + dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, + input_dataset.output_classes) # pylint: disable=protected-access + for arg, shape in zip(args, + flat_state_shapes + nest.flatten(dense_shapes)): arg.set_shape(shape) pivot = len(flat_state_shapes) @@ -108,6 +112,8 @@ class _ScanDataset(dataset_ops.Dataset): "state. Expected %s; got %s." % (self._state_types, nest.pack_sequence_as( self._state_types, [t.dtype for t in flat_new_state]))) + self._output_classes = nest.pack_sequence_as( + output_value, [ops.Tensor for _ in flat_output_value]) self._output_types = nest.pack_sequence_as( output_value, [t.dtype for t in flat_output_value]) @@ -147,8 +153,13 @@ class _ScanDataset(dataset_ops.Dataset): self._scan_func.captured_inputs, f=self._scan_func, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_classes(self): + return self._output_classes @property def output_shapes(self): diff --git a/tensorflow/python/data/__init__.py b/tensorflow/python/data/__init__.py index 504500d245..239f9b0d59 100644 --- a/tensorflow/python/data/__init__.py +++ b/tensorflow/python/data/__init__.py @@ -21,7 +21,6 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@FixedLengthRecordDataset @@TextLineDataset @@TFRecordDataset -@@SparseType """ from __future__ import absolute_import @@ -34,7 +33,6 @@ from tensorflow.python.data.ops.iterator_ops import Iterator from tensorflow.python.data.ops.readers import FixedLengthRecordDataset from tensorflow.python.data.ops.readers import TextLineDataset from tensorflow.python.data.ops.readers import TFRecordDataset -from tensorflow.python.data.util.sparse import SparseType # pylint: enable=unused-import from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 5f981e2670..d434c8e522 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -97,13 +97,15 @@ class Dataset(object): container="", shared_name=shared_name, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) with ops.colocate_with(iterator_resource): initializer = gen_dataset_ops.make_iterator(self._as_variant_tensor(), iterator_resource) return iterator_ops.Iterator(iterator_resource, initializer, - self.output_types, self.output_shapes) + self.output_types, self.output_shapes, + self.output_classes) def make_one_shot_iterator(self): """Creates an `Iterator` for enumerating the elements of this dataset. @@ -144,9 +146,23 @@ class Dataset(object): gen_dataset_ops.one_shot_iterator( dataset_factory=_make_dataset, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)), None, - self.output_types, self.output_shapes) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, + self.output_classes))), None, + self.output_types, self.output_shapes, self.output_classes) + + @abc.abstractproperty + def output_classes(self): + """Returns the class of each component of an element of this dataset. + + The expected values are `tf.Tensor` and `tf.SparseTensor`. + + Returns: + A nested structure of Python `type` objects corresponding to each + component of an element of this dataset. + """ + raise NotImplementedError("Dataset.output_classes") @abc.abstractproperty def output_shapes(self): @@ -163,9 +179,8 @@ class Dataset(object): """Returns the type of each component of an element of this dataset. Returns: - A nested structure of `tf.DType` (or `tf.data.SparseType`) objects - corresponding to each `tf.Tensor` (or `tf.SparseTensor`) component of an - element of this dataset. + A nested structure of `tf.DType` objects corresponding to each component + of an element of this dataset. """ raise NotImplementedError("Dataset.output_types") @@ -882,7 +897,13 @@ class TensorDataset(Dataset): def _as_variant_tensor(self): return gen_dataset_ops.tensor_dataset( nest.flatten(self._tensors), - output_shapes=nest.flatten(self.output_shapes)) + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_classes(self): + return nest.pack_sequence_as( + self._tensors, [ops.Tensor for _ in nest.flatten(self._tensors)]) @property def output_shapes(self): @@ -915,7 +936,13 @@ class TensorSliceDataset(Dataset): def _as_variant_tensor(self): return gen_dataset_ops.tensor_slice_dataset( nest.flatten(self._tensors), - output_shapes=nest.flatten(self.output_shapes)) + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_classes(self): + return nest.pack_sequence_as( + self._tensors, [ops.Tensor for _ in nest.flatten(self._tensors)]) @property def output_shapes(self): @@ -945,6 +972,10 @@ class SparseTensorSliceDataset(Dataset): self._sparse_tensor.indices, self._sparse_tensor.values, self._sparse_tensor.dense_shape) + @property + def output_classes(self): + return (ops.Tensor, ops.Tensor, ops.Tensor) + @property def output_shapes(self): indices_shape = self._sparse_tensor.indices.get_shape() @@ -994,6 +1025,12 @@ class ZipDataset(Dataset): ]) # pylint: enable=protected-access + @property + def output_classes(self): + return nest.pack_sequence_as( + self._datasets, + [ds.output_classes for ds in nest.flatten(self._datasets)]) + @property def output_shapes(self): return nest.pack_sequence_as( @@ -1030,11 +1067,16 @@ class ConcatenateDataset(Dataset): return gen_dataset_ops.concatenate_dataset( self._input_dataset._as_variant_tensor(), self._dataset_to_concatenate._as_variant_tensor(), - output_shapes=nest.flatten(self.output_shapes), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types))) + sparse.as_dense_types(self.output_types, self.output_classes))) # pylint: enable=protected-access + @property + def output_classes(self): + return self._input_dataset.output_classes + @property def output_shapes(self): return nest.pack_sequence_as(self._input_dataset.output_shapes, [ @@ -1066,9 +1108,14 @@ class RepeatDataset(Dataset): return gen_dataset_ops.repeat_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access count=self._count, - output_shapes=nest.flatten(self.output_shapes), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types))) + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes @property def output_shapes(self): @@ -1111,9 +1158,14 @@ class RangeDataset(Dataset): start=self._start, stop=self._stop, step=self._step, - output_shapes=nest.flatten(self.output_shapes), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types))) + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return ops.Tensor @property def output_shapes(self): @@ -1138,9 +1190,14 @@ class CacheDataset(Dataset): return gen_dataset_ops.cache_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access filename=self._filename, - output_shapes=nest.flatten(self.output_shapes), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types))) + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes @property def output_shapes(self): @@ -1186,9 +1243,14 @@ class ShuffleDataset(Dataset): seed=self._seed, seed2=self._seed2, reshuffle_each_iteration=self._reshuffle_each_iteration, - output_shapes=nest.flatten(self.output_shapes), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types))) + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes @property def output_shapes(self): @@ -1212,9 +1274,14 @@ class TakeDataset(Dataset): return gen_dataset_ops.take_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access count=self._count, - output_shapes=nest.flatten(self.output_shapes), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types))) + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes @property def output_shapes(self): @@ -1238,9 +1305,14 @@ class SkipDataset(Dataset): return gen_dataset_ops.skip_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access count=self._count, - output_shapes=nest.flatten(self.output_shapes), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types))) + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes @property def output_shapes(self): @@ -1257,7 +1329,7 @@ class BatchDataset(Dataset): def __init__(self, input_dataset, batch_size): """See `Dataset.batch()` for details.""" super(BatchDataset, self).__init__() - if sparse.any_sparse(input_dataset.output_types): + if sparse.any_sparse(input_dataset.output_classes): # TODO(b/63669786): support batching of sparse tensors raise TypeError("Batching of sparse tensors is not currently supported") self._input_dataset = input_dataset @@ -1268,9 +1340,14 @@ class BatchDataset(Dataset): return gen_dataset_ops.batch_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access batch_size=self._batch_size, - output_shapes=nest.flatten(self.output_shapes), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types))) + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes @property def output_shapes(self): @@ -1330,7 +1407,7 @@ class PaddedBatchDataset(Dataset): def __init__(self, input_dataset, batch_size, padded_shapes, padding_values): """See `Dataset.batch()` for details.""" super(PaddedBatchDataset, self).__init__() - if sparse.any_sparse(input_dataset.output_types): + if sparse.any_sparse(input_dataset.output_classes): # TODO(b/63669786): support batching of sparse tensors raise TypeError("Batching of sparse tensors is not currently supported") self._input_dataset = input_dataset @@ -1364,7 +1441,12 @@ class PaddedBatchDataset(Dataset): for s in nest.flatten(self._padded_shapes) ], padding_values=nest.flatten(self._padding_values), - output_shapes=nest.flatten(self.output_shapes)) + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes @property def output_shapes(self): @@ -1393,20 +1475,25 @@ class MapDataset(Dataset): super(MapDataset, self).__init__() self._input_dataset = input_dataset + self._output_classes = None self._output_shapes = None self._output_types = None - @function.Defun( - *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types))) + @function.Defun(*nest.flatten( + sparse.as_dense_types(input_dataset.output_types, + input_dataset.output_classes))) def tf_map_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the input_dataset. - for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)): + dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, + input_dataset.output_classes) + for arg, shape in zip(args, nest.flatten(dense_shapes)): arg.set_shape(shape) nested_args = nest.pack_sequence_as(input_dataset.output_types, args) nested_args = sparse.deserialize_sparse_tensors( - nested_args, input_dataset.output_types) + nested_args, input_dataset.output_types, input_dataset.output_shapes, + input_dataset.output_classes) if _should_unpack_args(nested_args): ret = map_func(*nested_args) else: @@ -1425,16 +1512,17 @@ class MapDataset(Dataset): if isinstance(ret, list): ret = tuple(ret) - # Identify components that hold sparse tensor values. - types = sparse.get_sparse_types(ret) + self._output_classes = sparse.get_classes(ret) + self._output_shapes = nest.pack_sequence_as( + ret, [t.get_shape() for t in nest.flatten(ret)]) + self._output_types = nest.pack_sequence_as( + ret, [t.dtype for t in nest.flatten(ret)]) + # Serialize any sparse tensors and convert result to tensors. ret = nest.pack_sequence_as(ret, [ ops.convert_to_tensor(t) for t in nest.flatten(sparse.serialize_sparse_tensors(ret)) ]) - self._output_shapes = nest.pack_sequence_as( - types, [t.get_shape() for t in nest.flatten(ret)]) - self._output_types = sparse.wrap_sparse_types(ret, types) return nest.flatten(ret) self._map_func = tf_map_func @@ -1447,8 +1535,13 @@ class MapDataset(Dataset): self._map_func.captured_inputs, f=self._map_func, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_classes(self): + return self._output_classes @property def output_shapes(self): @@ -1478,8 +1571,9 @@ class ParallelMapDataset(MapDataset): f=self._map_func, num_parallel_calls=self._num_parallel_calls, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) # pylint: enable=protected-access @@ -1491,17 +1585,21 @@ class FlatMapDataset(Dataset): super(FlatMapDataset, self).__init__() self._input_dataset = input_dataset - @function.Defun( - *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types))) + @function.Defun(*nest.flatten( + sparse.as_dense_types(input_dataset.output_types, + input_dataset.output_classes))) def tf_map_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the input_dataset. - for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)): + dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, + input_dataset.output_classes) + for arg, shape in zip(args, nest.flatten(dense_shapes)): arg.set_shape(shape) nested_args = nest.pack_sequence_as(input_dataset.output_types, args) nested_args = sparse.deserialize_sparse_tensors( - nested_args, input_dataset.output_types) + nested_args, input_dataset.output_types, input_dataset.output_shapes, + input_dataset.output_classes) if _should_unpack_args(nested_args): dataset = map_func(*nested_args) else: @@ -1510,6 +1608,7 @@ class FlatMapDataset(Dataset): if not isinstance(dataset, Dataset): raise TypeError("`map_func` must return a `Dataset` object.") + self._output_classes = dataset.output_classes self._output_types = dataset.output_types self._output_shapes = dataset.output_shapes @@ -1524,8 +1623,13 @@ class FlatMapDataset(Dataset): self._map_func.captured_inputs, f=self._map_func, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_classes(self): + return self._output_classes @property def output_shapes(self): @@ -1545,17 +1649,21 @@ class InterleaveDataset(Dataset): super(InterleaveDataset, self).__init__() self._input_dataset = input_dataset - @function.Defun( - *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types))) + @function.Defun(*nest.flatten( + sparse.as_dense_types(input_dataset.output_types, + input_dataset.output_classes))) def tf_map_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the input_dataset. - for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)): + dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, + input_dataset.output_classes) + for arg, shape in zip(args, nest.flatten(dense_shapes)): arg.set_shape(shape) nested_args = nest.pack_sequence_as(input_dataset.output_types, args) nested_args = sparse.deserialize_sparse_tensors( - nested_args, input_dataset.output_types) + nested_args, input_dataset.output_types, input_dataset.output_shapes, + input_dataset.output_classes) if _should_unpack_args(nested_args): dataset = map_func(*nested_args) else: @@ -1564,6 +1672,7 @@ class InterleaveDataset(Dataset): if not isinstance(dataset, Dataset): raise TypeError("`map_func` must return a `Dataset` object.") + self._output_classes = dataset.output_classes self._output_types = dataset.output_types self._output_shapes = dataset.output_shapes @@ -1585,8 +1694,13 @@ class InterleaveDataset(Dataset): self._block_length, f=self._map_func, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_classes(self): + return self._output_classes @property def output_shapes(self): @@ -1605,17 +1719,21 @@ class FilterDataset(Dataset): super(FilterDataset, self).__init__() self._input_dataset = input_dataset - @function.Defun( - *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types))) + @function.Defun(*nest.flatten( + sparse.as_dense_types(input_dataset.output_types, + input_dataset.output_classes))) def tf_predicate(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the input_dataset. - for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)): + dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, + input_dataset.output_classes) + for arg, shape in zip(args, nest.flatten(dense_shapes)): arg.set_shape(shape) nested_args = nest.pack_sequence_as(input_dataset.output_types, args) nested_args = sparse.deserialize_sparse_tensors( - nested_args, input_dataset.output_types) + nested_args, input_dataset.output_types, input_dataset.output_shapes, + input_dataset.output_classes) if _should_unpack_args(nested_args): ret = predicate(*nested_args) else: @@ -1637,8 +1755,13 @@ class FilterDataset(Dataset): other_arguments=self._predicate.captured_inputs, predicate=self._predicate, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes @property def output_shapes(self): @@ -1663,9 +1786,14 @@ class PrefetchDataset(Dataset): return gen_dataset_ops.prefetch_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access buffer_size=self._buffer_size, - output_shapes=nest.flatten(self.output_shapes), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types))) + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes @property def output_shapes(self): diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py index 987a9b53ad..663bed07b2 100644 --- a/tensorflow/python/data/ops/iterator_ops.py +++ b/tensorflow/python/data/ops/iterator_ops.py @@ -29,7 +29,7 @@ class Iterator(object): """Represents the state of iterating through a `Dataset`.""" def __init__(self, iterator_resource, initializer, output_types, - output_shapes): + output_shapes, output_classes): """Creates a new iterator from the given iterator resource. Note: Most users will not call this initializer directly, and will @@ -41,21 +41,27 @@ class Iterator(object): iterator. initializer: A `tf.Operation` that should be run to initialize this iterator. - output_types: A nested structure of `tf.DType` (or `tf.data.SparseType`) - objects corresponding to each `tf.Tensor` (or `tf.SparseTensor`) - component of an element of this dataset. + output_types: A nested structure of `tf.DType` objects corresponding to + each component of an element of this dataset. output_shapes: A nested structure of `tf.TensorShape` objects corresponding to each component of an element of this dataset. + output_classes: A nested structure of Python `type` object corresponding + to each + component of an element of this iterator. """ self._iterator_resource = iterator_resource self._initializer = initializer + self._output_classes = output_classes self._output_types = output_types self._output_shapes = output_shapes self._string_handle = gen_dataset_ops.iterator_to_string_handle( self._iterator_resource) @staticmethod - def from_structure(output_types, output_shapes=None, shared_name=None): + def from_structure(output_types, + output_shapes=None, + shared_name=None, + output_classes=None): """Creates a new, uninitialized `Iterator` with the given structure. This iterator-constructing method can be used to create an iterator that @@ -102,15 +108,17 @@ class Iterator(object): ``` Args: - output_types: A nested structure of `tf.DType` (or `tf.data.SparseType`) - objects corresponding to each `tf.Tensor` (or `tf.SparseTensor`) - component of an element of this dataset. + output_types: A nested structure of `tf.DType` objects corresponding to + each component of an element of this dataset. output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects corresponding to each component of an element of this dataset. If omitted, each component will have an unconstrainted shape. shared_name: (Optional.) If non-empty, this iterator will be shared under the given name across multiple sessions that share the same devices (e.g. when using a remote server). + output_classes: (Optional.) A nested structure of Python `type` objects + corresponding to each component of an element of this iterator. If + omitted, each component is assumed to be of type `tf.Tensor`. Returns: An `Iterator`. @@ -126,18 +134,24 @@ class Iterator(object): else: output_shapes = nest.map_structure_up_to( output_types, tensor_shape.as_shape, output_shapes) + if output_classes is None: + output_classes = nest.map_structure(lambda _: ops.Tensor, output_types) nest.assert_same_structure(output_types, output_shapes) if shared_name is None: shared_name = "" iterator_resource = gen_dataset_ops.iterator( container="", shared_name=shared_name, - output_types=nest.flatten(sparse.unwrap_sparse_types(output_types)), + output_types=nest.flatten(output_types), output_shapes=nest.flatten(output_shapes)) - return Iterator(iterator_resource, None, output_types, output_shapes) + return Iterator(iterator_resource, None, output_types, output_shapes, + output_classes) @staticmethod - def from_string_handle(string_handle, output_types, output_shapes=None): + def from_string_handle(string_handle, + output_types, + output_shapes=None, + output_classes=None): """Creates a new, uninitialized `Iterator` based on the given handle. This method allows you to define a "feedable" iterator where you can choose @@ -170,12 +184,14 @@ class Iterator(object): Args: string_handle: A scalar `tf.Tensor` of type `tf.string` that evaluates to a handle produced by the `Iterator.string_handle()` method. - output_types: A nested structure of `tf.DType` (or `tf.data.SparseType`) - objects corresponding to each `tf.Tensor` (or `tf.SparseTensor`) - component of an element of this dataset. + output_types: A nested structure of `tf.DType` objects corresponding to + each component of an element of this dataset. output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects corresponding to each component of an element of this dataset. If omitted, each component will have an unconstrainted shape. + output_classes: (Optional.) A nested structure of Python `type` objects + corresponding to each component of an element of this iterator. If + omitted, each component is assumed to be of type `tf.Tensor`. Returns: An `Iterator`. @@ -187,13 +203,16 @@ class Iterator(object): else: output_shapes = nest.map_structure_up_to( output_types, tensor_shape.as_shape, output_shapes) + if output_classes is None: + output_classes = nest.map_structure(lambda _: ops.Tensor, output_types) nest.assert_same_structure(output_types, output_shapes) string_handle = ops.convert_to_tensor(string_handle, dtype=dtypes.string) iterator_resource = gen_dataset_ops.iterator_from_string_handle( string_handle, - output_types=nest.flatten(sparse.unwrap_sparse_types(output_types)), + output_types=nest.flatten(output_types), output_shapes=nest.flatten(output_shapes)) - return Iterator(iterator_resource, None, output_types, output_shapes) + return Iterator(iterator_resource, None, output_types, output_shapes, + output_classes) @property def initializer(self): @@ -230,6 +249,13 @@ class Iterator(object): with ops.name_scope(name, "make_initializer") as name: nest.assert_same_structure(self._output_types, dataset.output_types) nest.assert_same_structure(self._output_shapes, dataset.output_shapes) + for iterator_class, dataset_class in zip( + nest.flatten(self._output_classes), + nest.flatten(dataset.output_classes)): + if iterator_class is not dataset_class: + raise TypeError( + "Expected output classes %r but got dataset with output class %r." + % (self._output_classes, dataset.output_classes)) for iterator_dtype, dataset_dtype in zip( nest.flatten(self._output_types), nest.flatten(dataset.output_types)): if iterator_dtype != dataset_dtype: @@ -237,8 +263,8 @@ class Iterator(object): "Expected output types %r but got dataset with output types %r." % (self._output_types, dataset.output_types)) for iterator_shape, dataset_shape in zip( - nest.flatten(self._output_shapes), - nest.flatten(dataset.output_shapes)): + nest.flatten(self._output_shapes), nest.flatten( + dataset.output_shapes)): if not iterator_shape.is_compatible_with(dataset_shape): raise TypeError("Expected output shapes compatible with %r but got " "dataset with output shapes %r." % @@ -261,11 +287,15 @@ class Iterator(object): gen_dataset_ops.iterator_get_next( self._iterator_resource, output_types=nest.flatten( - sparse.unwrap_sparse_types( - self._output_types)), + sparse.as_dense_types( + self._output_types, + self._output_classes)), output_shapes=nest.flatten( - self._output_shapes), - name=name)), self._output_types) + sparse.as_dense_shapes( + self._output_shapes, + self._output_classes)), + name=name)), self._output_types, + self._output_shapes, self._output_classes) def string_handle(self, name=None): """Returns a string-valued `tf.Tensor` that represents this iterator. @@ -282,13 +312,25 @@ class Iterator(object): return gen_dataset_ops.iterator_to_string_handle( self._iterator_resource, name=name) + @property + def output_classes(self): + """Returns the class of each component of an element of this iterator. + + The expected values are `tf.Tensor` and `tf.SparseTensor`. + + Returns: + A nested structure of Python `type` objects corresponding to each + component of an element of this dataset. + """ + return self._output_classes + @property def output_shapes(self): """Returns the shape of each component of an element of this iterator. Returns: A nested structure of `tf.TensorShape` objects corresponding to each - component of an element of this iterator. + component of an element of this dataset. """ return self._output_shapes @@ -297,8 +339,7 @@ class Iterator(object): """Returns the type of each component of an element of this iterator. Returns: - A nested structure of `tf.DType` (or `tf.data.SparseType`) objects - corresponding to each `tf.Tensor` (or `tf.SparseTensor`) component of an - element of this dataset. + A nested structure of `tf.DType` objects corresponding to each component + of an element of this dataset. """ return self._output_types diff --git a/tensorflow/python/data/ops/readers.py b/tensorflow/python/data/ops/readers.py index 531716581f..c6fb8531ae 100644 --- a/tensorflow/python/data/ops/readers.py +++ b/tensorflow/python/data/ops/readers.py @@ -70,6 +70,10 @@ class TextLineDataset(Dataset): return gen_dataset_ops.text_line_dataset( self._filenames, self._compression_type, self._buffer_size) + @property + def output_classes(self): + return ops.Tensor + @property def output_shapes(self): return tensor_shape.scalar() @@ -110,6 +114,10 @@ class TFRecordDataset(Dataset): return gen_dataset_ops.tf_record_dataset( self._filenames, self._compression_type, self._buffer_size) + @property + def output_classes(self): + return ops.Tensor + @property def output_shapes(self): return tensor_shape.TensorShape([]) @@ -159,6 +167,10 @@ class FixedLengthRecordDataset(Dataset): self._filenames, self._header_bytes, self._record_bytes, self._footer_bytes, self._buffer_size) + @property + def output_classes(self): + return ops.Tensor + @property def output_shapes(self): return tensor_shape.scalar() diff --git a/tensorflow/python/data/util/BUILD b/tensorflow/python/data/util/BUILD index 41d8513b16..f7d7fe98d3 100644 --- a/tensorflow/python/data/util/BUILD +++ b/tensorflow/python/data/util/BUILD @@ -38,8 +38,10 @@ py_library( deps = [ ":nest", "//tensorflow/python:dtypes", + "//tensorflow/python:ops", "//tensorflow/python:sparse_ops", "//tensorflow/python:sparse_tensor", + "//tensorflow/python:tensor_shape", "//tensorflow/python:util", "@six_archive//:six", ], @@ -56,6 +58,7 @@ py_test( "//tensorflow/python:client_testlib", "//tensorflow/python:dtypes", "//tensorflow/python:sparse_tensor", + "//tensorflow/python:tensor_shape", ], ) diff --git a/tensorflow/python/data/util/sparse.py b/tensorflow/python/data/util/sparse.py index 673fac095c..4d25f6a963 100644 --- a/tensorflow/python/data/util/sparse.py +++ b/tensorflow/python/data/util/sparse.py @@ -19,29 +19,70 @@ from __future__ import print_function from tensorflow.python.data.util import nest from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import sparse_ops -def any_sparse(types): - """Checks for sparse tensor types. +def any_sparse(classes): + """Checks for sparse tensor. Args: - types: a structure with tensor types. + classes: a structure of objects that identify the dataset item classes Returns: - `True` if `types` contains a sparse tensor type and `False` otherwise. + `True` if `classes` contains a sparse tensor type and `False` otherwise. """ - return any([isinstance(ty, SparseType) for ty in nest.flatten(types)]) + return any([c is sparse_tensor.SparseTensor for c in nest.flatten(classes)]) -def deserialize_sparse_tensors(tensors, types): +def as_dense_shapes(shapes, classes): + """Converts sparse tensor shapes to their physical shapes. + + Args: + shapes: a structure of shapes to convert. + classes: a structure of objects that identify the dataset item classes + + Returns: + a structure matching the nested structure of `shapes`, containing + `tensor_shape.unknown_shape()` at positions where `classes` contains + `tf.SparseTensor` and matching contents of `shapes` otherwise + """ + ret = nest.pack_sequence_as(shapes, [ + tensor_shape.unknown_shape() if c is sparse_tensor.SparseTensor else shape + for shape, c in zip(nest.flatten(shapes), nest.flatten(classes)) + ]) + return ret + + +def as_dense_types(types, classes): + """Converts sparse tensor types to `dtypes.string`. + + Args: + types: a structure of types to convert. + classes: a structure of objects that identify the dataset item classes + + Returns: + a structure matching the nested structure of `types`, containing + `dtypes.string` at positions where `classes` contains `tf.SparseTensor` and + matching contents of `types` otherwise + """ + ret = nest.pack_sequence_as(types, [ + dtypes.string if c is sparse_tensor.SparseTensor else ty + for ty, c in zip(nest.flatten(types), nest.flatten(classes)) + ]) + return ret + + +def deserialize_sparse_tensors(tensors, types, shapes, classes): """Deserializes sparse tensors. Args: tensors: a structure of tensors to deserialize. - types: a structure object the holds information about which tensors in - `tensors` represent serialized sparse tensors + types: a structure that holds information about types of `tensors` + shapes: a structure that holds information about shapes of `tensors` + classes: a structure of objects that identify the dataset item classes Returns: `tensors` with any serialized sparse tensors replaced by their deserialized @@ -49,27 +90,29 @@ def deserialize_sparse_tensors(tensors, types): """ # TODO(b/63669786): support batching of sparse tensors ret = nest.pack_sequence_as(types, [ - sparse_ops.deserialize_sparse(tensor, ty.dtype) - if isinstance(ty, SparseType) else tensor - for (tensor, ty) in zip(nest.flatten(tensors), nest.flatten(types)) + sparse_ops.deserialize_sparse(tensor, dtype=ty, rank=shape.ndims) + if c is sparse_tensor.SparseTensor else tensor + for (tensor, ty, shape, c) in zip( + nest.flatten(tensors), nest.flatten(types), nest.flatten(shapes), + nest.flatten(classes)) ]) return ret -def get_sparse_types(tensors): - """Gets sparse types for a structure of tensors. +def get_classes(tensors): + """Gets classes for a structure of tensors. Args: - tensors: the tensor structure to get sparse types for. + tensors: the tensor structure to get classes for. Returns: a structure matching the nested structure of `tensors`, containing - `SparseType` at positions where `tensors` contains a sparse tensor and - `None` otherwise + `tf.SparseTensor` at positions where `tensors` contains a sparse tensor and + `tf.Tensor` otherwise """ return nest.pack_sequence_as(tensors, [ - SparseType(tensor.dtype) - if isinstance(tensor, sparse_tensor.SparseTensor) else None + sparse_tensor.SparseTensor + if isinstance(tensor, sparse_tensor.SparseTensor) else ops.Tensor for tensor in nest.flatten(tensors) ]) @@ -90,74 +133,3 @@ def serialize_sparse_tensors(tensors): for tensor in nest.flatten(tensors) ]) return ret - - -def unwrap_sparse_types(types): - """Unwraps sparse tensor types as `dtypes.string`. - - Args: - types: a structure of types to unwrap. - - Returns: - a structure matching the nested structure of `types`, containing - `dtypes.string` at positions where `types` contains a sparse tensor and - matching contents of `types` otherwise - """ - ret = nest.pack_sequence_as(types, [ - dtypes.string if isinstance(ty, SparseType) else ty - for ty in nest.flatten(types) - ]) - return ret - - -def wrap_sparse_types(tensors, types): - """Wraps sparse tensor types in `SparseType`. - - Args: - tensors: a structure of tensors for which to wrap types. - types: a structure that holds information about which tensors in - `tensors` represent serialized sparse tensors - - Returns: - a structure matching the nested structure of `tensors`, containing - `SparseType` at positions where `tensors` contains a sparse tensor and - `DType` otherwise - """ - ret = nest.pack_sequence_as(types, [ - tensor.dtype if ty is None else ty - for tensor, ty in zip(nest.flatten(tensors), nest.flatten(types)) - ]) - return ret - - -class SparseType(object): - """Wrapper class for representing types of sparse tensors in tf.data.""" - - def __init__(self, dtype): - """Creates a new instace of `SparseType`. - - Args: - dtype: the sparse tensor type to wrap. - """ - self._dtype = dtype - - def __repr__(self): - return "SparseType({0!r})".format(self._dtype) - - def __eq__(self, other): - """Returns `True` iff `self == other`.""" - if not isinstance(other, SparseType): - return False - return self._dtype == other.dtype - - def __ne__(self, other): - """Returns `True` iff `self != other`.""" - return not self.__eq__(other) - - def __hash__(self): - return self._dtype.__hash__() - - @property - def dtype(self): - """Returns the wrapped sparse tensor type.""" - return self._dtype diff --git a/tensorflow/python/data/util/sparse_test.py b/tensorflow/python/data/util/sparse_test.py index e30ed639c2..a707570bab 100644 --- a/tensorflow/python/data/util/sparse_test.py +++ b/tensorflow/python/data/util/sparse_test.py @@ -22,7 +22,9 @@ from tensorflow.python.data.util import nest from tensorflow.python.data.util import sparse from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_shape from tensorflow.python.platform import test @@ -30,17 +32,200 @@ class SparseTest(test.TestCase): def testAnySparse(self): test_cases = ( - ((), False), - ((None), False), - ((dtypes.string), False), - ((None, -1, dtypes.string), False), - ((sparse.SparseType(dtypes.string)), True), - ((None, sparse.SparseType(dtypes.string)), True), - ((sparse.SparseType(dtypes.string), dtypes.string), True), - ((((sparse.SparseType(dtypes.string)))), True) + { + "classes": (), + "expected": False + }, + { + "classes": (ops.Tensor), + "expected": False + }, + { + "classes": (((ops.Tensor))), + "expected": False + }, + { + "classes": (ops.Tensor, ops.Tensor), + "expected": False + }, + { + "classes": (ops.Tensor, sparse_tensor.SparseTensor), + "expected": True + }, + { + "classes": (sparse_tensor.SparseTensor, sparse_tensor.SparseTensor), + "expected": + True + }, + { + "classes": (sparse_tensor.SparseTensor, ops.Tensor), + "expected": True + }, + { + "classes": (((sparse_tensor.SparseTensor))), + "expected": True + }, ) for test_case in test_cases: - self.assertEqual(sparse.any_sparse(test_case[0]), test_case[1]) + self.assertEqual( + sparse.any_sparse(test_case["classes"]), test_case["expected"]) + + def assertShapesEqual(self, a, b): + for a, b in zip(nest.flatten(a), nest.flatten(b)): + self.assertEqual(a.ndims, b.ndims) + if a.ndims is None: + continue + for c, d in zip(a.as_list(), b.as_list()): + self.assertEqual(c, d) + + def testAsDenseShapes(self): + test_cases = ( + { + "types": (), + "classes": (), + "expected": () + }, + { + "types": tensor_shape.scalar(), + "classes": ops.Tensor, + "expected": tensor_shape.scalar() + }, + { + "types": tensor_shape.scalar(), + "classes": sparse_tensor.SparseTensor, + "expected": tensor_shape.unknown_shape() + }, + { + "types": (tensor_shape.scalar()), + "classes": (ops.Tensor), + "expected": (tensor_shape.scalar()) + }, + { + "types": (tensor_shape.scalar()), + "classes": (sparse_tensor.SparseTensor), + "expected": (tensor_shape.unknown_shape()) + }, + { + "types": (tensor_shape.scalar(), ()), + "classes": (ops.Tensor, ()), + "expected": (tensor_shape.scalar(), ()) + }, + { + "types": ((), tensor_shape.scalar()), + "classes": ((), ops.Tensor), + "expected": ((), tensor_shape.scalar()) + }, + { + "types": (tensor_shape.scalar(), ()), + "classes": (sparse_tensor.SparseTensor, ()), + "expected": (tensor_shape.unknown_shape(), ()) + }, + { + "types": ((), tensor_shape.scalar()), + "classes": ((), sparse_tensor.SparseTensor), + "expected": ((), tensor_shape.unknown_shape()) + }, + { + "types": (tensor_shape.scalar(), (), tensor_shape.scalar()), + "classes": (ops.Tensor, (), ops.Tensor), + "expected": (tensor_shape.scalar(), (), tensor_shape.scalar()) + }, + { + "types": (tensor_shape.scalar(), (), tensor_shape.scalar()), + "classes": (sparse_tensor.SparseTensor, (), + sparse_tensor.SparseTensor), + "expected": (tensor_shape.unknown_shape(), (), + tensor_shape.unknown_shape()) + }, + { + "types": ((), tensor_shape.scalar(), ()), + "classes": ((), ops.Tensor, ()), + "expected": ((), tensor_shape.scalar(), ()) + }, + { + "types": ((), tensor_shape.scalar(), ()), + "classes": ((), sparse_tensor.SparseTensor, ()), + "expected": ((), tensor_shape.unknown_shape(), ()) + }, + ) + for test_case in test_cases: + self.assertShapesEqual( + sparse.as_dense_shapes(test_case["types"], test_case["classes"]), + test_case["expected"]) + + def testAsDenseTypes(self): + test_cases = ( + { + "types": (), + "classes": (), + "expected": () + }, + { + "types": dtypes.int32, + "classes": ops.Tensor, + "expected": dtypes.int32 + }, + { + "types": dtypes.int32, + "classes": sparse_tensor.SparseTensor, + "expected": dtypes.string + }, + { + "types": (dtypes.int32), + "classes": (ops.Tensor), + "expected": (dtypes.int32) + }, + { + "types": (dtypes.int32), + "classes": (sparse_tensor.SparseTensor), + "expected": (dtypes.string) + }, + { + "types": (dtypes.int32, ()), + "classes": (ops.Tensor, ()), + "expected": (dtypes.int32, ()) + }, + { + "types": ((), dtypes.int32), + "classes": ((), ops.Tensor), + "expected": ((), dtypes.int32) + }, + { + "types": (dtypes.int32, ()), + "classes": (sparse_tensor.SparseTensor, ()), + "expected": (dtypes.string, ()) + }, + { + "types": ((), dtypes.int32), + "classes": ((), sparse_tensor.SparseTensor), + "expected": ((), dtypes.string) + }, + { + "types": (dtypes.int32, (), dtypes.int32), + "classes": (ops.Tensor, (), ops.Tensor), + "expected": (dtypes.int32, (), dtypes.int32) + }, + { + "types": (dtypes.int32, (), dtypes.int32), + "classes": (sparse_tensor.SparseTensor, (), + sparse_tensor.SparseTensor), + "expected": (dtypes.string, (), dtypes.string) + }, + { + "types": ((), dtypes.int32, ()), + "classes": ((), ops.Tensor, ()), + "expected": ((), dtypes.int32, ()) + }, + { + "types": ((), dtypes.int32, ()), + "classes": ((), sparse_tensor.SparseTensor, ()), + "expected": ((), dtypes.string, ()) + }, + ) + for test_case in test_cases: + self.assertEqual( + sparse.as_dense_types(test_case["types"], test_case["classes"]), + test_case["expected"]) def assertSparseValuesEqual(self, a, b): if not isinstance(a, sparse_tensor.SparseTensor): @@ -70,71 +255,74 @@ class SparseTest(test.TestCase): indices=[[0, 0]], values=[1], dense_shape=[1, 1])), ) for expected in test_cases: + classes = sparse.get_classes(expected) + shapes = nest.map_structure(lambda _: tensor_shape.TensorShape(None), + classes) + types = nest.map_structure(lambda _: dtypes.int32, classes) actual = sparse.deserialize_sparse_tensors( - sparse.serialize_sparse_tensors(expected), - sparse.get_sparse_types(expected)) + sparse.serialize_sparse_tensors(expected), types, shapes, + sparse.get_classes(expected)) nest.assert_same_structure(expected, actual) for a, e in zip(nest.flatten(actual), nest.flatten(expected)): self.assertSparseValuesEqual(a, e) - def testGetSparseTypes(self): - s = sparse_tensor.SparseTensor( - indices=[[0, 0]], values=[1], dense_shape=[1, 1]) - t = sparse.SparseType(dtypes.int32) - test_cases = ( - ((), ()), - (s, t), - ((s), (t)), - ((s, ()), (t, ())), - (((), s), ((), t)), - ) - for test_case in test_cases: - self.assertEqual(sparse.get_sparse_types(test_case[0]), test_case[1]) - - def testWrapSparseTypes(self): - c = constant_op.constant([1]) - d = dtypes.int32 - s = sparse_tensor.SparseTensor( - indices=[[0, 0]], values=[1], dense_shape=[1, 1]) - t = sparse.SparseType(dtypes.int32) + def testGetClasses(self): + s = sparse_tensor.SparseTensor(indices=[[0]], values=[1], dense_shape=[1]) + d = ops.Tensor + t = sparse_tensor.SparseTensor test_cases = ( - ((), ()), - (s, t), - (c, d), - ((s), (t)), - ((c), (d)), - ((s, ()), (t, ())), - (((), s), ((), t)), - ((c, ()), (d, ())), - (((), c), ((), d)), - ((s, (), c), (t, (), d)), - (((), s, ()), ((), t, ())), - (((), c, ()), ((), d, ())), + { + "classes": (), + "expected": () + }, + { + "classes": s, + "expected": t + }, + { + "classes": constant_op.constant([1]), + "expected": d + }, + { + "classes": (s), + "expected": (t) + }, + { + "classes": (constant_op.constant([1])), + "expected": (d) + }, + { + "classes": (s, ()), + "expected": (t, ()) + }, + { + "classes": ((), s), + "expected": ((), t) + }, + { + "classes": (constant_op.constant([1]), ()), + "expected": (d, ()) + }, + { + "classes": ((), constant_op.constant([1])), + "expected": ((), d) + }, + { + "classes": (s, (), constant_op.constant([1])), + "expected": (t, (), d) + }, + { + "classes": ((), s, ()), + "expected": ((), t, ()) + }, + { + "classes": ((), constant_op.constant([1]), ()), + "expected": ((), d, ()) + }, ) for test_case in test_cases: self.assertEqual( - sparse.wrap_sparse_types(test_case[0], sparse.get_sparse_types( - test_case[0])), test_case[1]) - - def testUnwrapSparseTypes(self): - d = dtypes.string - t = sparse.SparseType(dtypes.int32) - test_cases = ( - ((), ()), - (t, d), - (d, d), - ((t), (d)), - ((d), (d)), - ((t, ()), (d, ())), - (((), t), ((), d)), - ((d, ()), (d, ())), - (((), d), ((), d)), - ((t, (), d), (d, (), d)), - (((), t, ()), ((), d, ())), - (((), d, ()), ((), d, ())), - ) - for test_case in test_cases: - self.assertEqual(sparse.unwrap_sparse_types(test_case[0]), test_case[1]) + sparse.get_classes(test_case["classes"]), test_case["expected"]) if __name__ == "__main__": diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index d3fa5cb778..7643cf2ddc 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2807,11 +2807,14 @@ tf_py_test( "//tensorflow/python:errors", "//tensorflow/python:framework_ops", "//tensorflow/python:math_ops", + "//tensorflow/python:random_ops", "//tensorflow/python:resource_variable_ops", "//tensorflow/python:session", "//tensorflow/python:sparse_tensor", + "//tensorflow/python:tensor_shape", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:sparse", ], tags = [ "manual", @@ -2825,18 +2828,12 @@ tf_py_test( srcs = ["dataset_from_generator_op_test.py"], additional_deps = [ "//third_party/py/numpy", - "//tensorflow/core:protos_all_py", - "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:dtypes", "//tensorflow/python:errors", - "//tensorflow/python:framework_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:resource_variable_ops", - "//tensorflow/python:session", - "//tensorflow/python:sparse_tensor", + "//tensorflow/python:tensor_shape", "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:sparse", ], ) @@ -3082,6 +3079,7 @@ tf_py_test( "//tensorflow/core:protos_all_py", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/ops:iterator_ops", + "//tensorflow/python/data/util:sparse", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", @@ -3096,8 +3094,11 @@ tf_py_test( "//tensorflow/python:io_ops", "//tensorflow/python:math_ops", "//tensorflow/python:parsing_ops", + "//tensorflow/python:random_ops", "//tensorflow/python:script_ops", "//tensorflow/python:session", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python:tensor_shape", "//tensorflow/python:training", ], ) diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index 2ef6a0015b..3d6f942dca 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -1442,7 +1442,7 @@ def deserialize_sparse(serialized_sparse, dtype, rank=None, name=None): Must have 3 columns. dtype: The `dtype` of the serialized `SparseTensor` object. rank: (optional) Python int, the rank of the `SparseTensor` object. - name: A name prefix for the returned tensors (optional) + name: A name prefix for the returned tensors (optional). Returns: A `SparseTensor` representing the deserialized `SparseTensor` object. diff --git a/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt index d12514fe77..42de5c0c80 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt @@ -2,6 +2,10 @@ path: "tensorflow.data.Dataset" tf_class { is_instance: "" is_instance: "" + member { + name: "output_classes" + mtype: "" + } member { name: "output_shapes" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt index 002d0c6a9f..e2fc8d6cb1 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt @@ -3,6 +3,10 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "output_classes" + mtype: "" + } member { name: "output_shapes" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.data.-iterator.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-iterator.pbtxt index e62f6b247a..1f9aeb6ad6 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-iterator.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-iterator.pbtxt @@ -6,6 +6,10 @@ tf_class { name: "initializer" mtype: "" } + member { + name: "output_classes" + mtype: "" + } member { name: "output_shapes" mtype: "" @@ -16,15 +20,15 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'iterator_resource\', \'initializer\', \'output_types\', \'output_shapes\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'iterator_resource\', \'initializer\', \'output_types\', \'output_shapes\', \'output_classes\'], varargs=None, keywords=None, defaults=None" } member_method { name: "from_string_handle" - argspec: "args=[\'string_handle\', \'output_types\', \'output_shapes\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'string_handle\', \'output_types\', \'output_shapes\', \'output_classes\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "from_structure" - argspec: "args=[\'output_types\', \'output_shapes\', \'shared_name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'output_types\', \'output_shapes\', \'shared_name\', \'output_classes\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "get_next" diff --git a/tensorflow/tools/api/golden/tensorflow.data.-sparse-type.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-sparse-type.pbtxt deleted file mode 100644 index b25f9a029f..0000000000 --- a/tensorflow/tools/api/golden/tensorflow.data.-sparse-type.pbtxt +++ /dev/null @@ -1,13 +0,0 @@ -path: "tensorflow.data.SparseType" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "dtype" - mtype: "" - } - member_method { - name: "__init__" - argspec: "args=[\'self\', \'dtype\'], varargs=None, keywords=None, defaults=None" - } -} diff --git a/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt index 2b476dab66..9770389e5e 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt @@ -3,6 +3,10 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "output_classes" + mtype: "" + } member { name: "output_shapes" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt index c4c5ac0775..7263230c1c 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt @@ -3,6 +3,10 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "output_classes" + mtype: "" + } member { name: "output_shapes" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.data.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.pbtxt index b9f54a4d72..56fb270a49 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.pbtxt @@ -12,10 +12,6 @@ tf_module { name: "Iterator" mtype: "" } - member { - name: "SparseType" - mtype: "" - } member { name: "TFRecordDataset" mtype: "" -- GitLab From 62a890504620a10abac12d893c6878213469a648 Mon Sep 17 00:00:00 2001 From: Taehoon Lee Date: Sat, 18 Nov 2017 05:55:56 +0900 Subject: [PATCH 0591/1801] Fix typos (#14516) --- tensorflow/contrib/boosted_trees/lib/utils/example.h | 10 +++++----- tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h | 2 +- tensorflow/contrib/lite/testing/parse_testdata.cc | 2 +- tensorflow/contrib/lite/testing/test_runner.h | 2 +- tensorflow/contrib/lite/toco/model.h | 2 +- tensorflow/core/grappler/costs/graph_properties.cc | 2 +- .../core/grappler/optimizers/arithmetic_optimizer.cc | 2 +- tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc | 2 +- tensorflow/core/kernels/slice_op.h | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/lib/utils/example.h b/tensorflow/contrib/boosted_trees/lib/utils/example.h index e388cf332c..54f60e1dee 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/example.h +++ b/tensorflow/contrib/boosted_trees/lib/utils/example.h @@ -63,7 +63,7 @@ class SparseFloatFeatureColumn { public: void Reserve(const int32 size) { if (!single_dimensional_) { - mutlidimensional_values.Reserve(size); + multidimensional_values.Reserve(size); } } @@ -76,7 +76,7 @@ class SparseFloatFeatureColumn { DCHECK_EQ(0, feature_idx); single_value_ = value; } else { - mutlidimensional_values.Add(feature_idx, value); + multidimensional_values.Add(feature_idx, value); } initialized_ = true; } @@ -84,7 +84,7 @@ class SparseFloatFeatureColumn { void Clear() { single_dimensional_ = false; initialized_ = false; - mutlidimensional_values.Clear(); + multidimensional_values.Clear(); } OptionalValue operator[](int feature_idx) const { @@ -94,7 +94,7 @@ class SparseFloatFeatureColumn { if (single_dimensional_) { return OptionalValue(single_value_); } else { - return mutlidimensional_values[feature_idx]; + return multidimensional_values[feature_idx]; } } @@ -102,7 +102,7 @@ class SparseFloatFeatureColumn { bool single_dimensional_; bool initialized_; T single_value_; - SparseMultidimensionalValues mutlidimensional_values; + SparseMultidimensionalValues multidimensional_values; }; // Holds data for one example and enables lookup by feature column. diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h index 5d06165772..b78e958e7f 100644 --- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h +++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h @@ -1454,7 +1454,7 @@ inline int ANeuralNetworksModel_finish(ANeuralNetworksModel* model) { * {@link ANeuralNetworksExecution_setOutputFromMemory} and * {@link ANeuralNetworksExecution_setOperandValue}. * - * To build a model that can accomodate inputs of various sizes, as you may want + * To build a model that can accommodate inputs of various sizes, as you may want * to do for a CNN, set the size of the dimensions that will vary at run time to * 0. If you do so, provide the full dimensions when calling * {@link ANeuralNetworksExecution_setInput} or {@link diff --git a/tensorflow/contrib/lite/testing/parse_testdata.cc b/tensorflow/contrib/lite/testing/parse_testdata.cc index 2b67052cad..d745ed2715 100644 --- a/tensorflow/contrib/lite/testing/parse_testdata.cc +++ b/tensorflow/contrib/lite/testing/parse_testdata.cc @@ -232,7 +232,7 @@ TfLiteStatus CheckOutputs(tflite::Interpreter* interpreter, // invoke { // id: xyz // input: 1,2,1,1,1,2,3,4 -// ouput: 4,5,6 +// output: 4,5,6 // } class Invoke : public Message { public: diff --git a/tensorflow/contrib/lite/testing/test_runner.h b/tensorflow/contrib/lite/testing/test_runner.h index 04ee4d9f7d..f4b26949b5 100644 --- a/tensorflow/contrib/lite/testing/test_runner.h +++ b/tensorflow/contrib/lite/testing/test_runner.h @@ -63,7 +63,7 @@ class TestRunner { // Run the model. virtual void Invoke() = 0; - // Verify that the contents of all ouputs conform to the existing + // Verify that the contents of all outputs conform to the existing // expectations. Return true if there are no expectations or they are all // satisfied. virtual bool CheckResults() = 0; diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index d992f8458f..63953a1e28 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -129,7 +129,7 @@ enum class AxesOrder { // The type of the scalars in an array. // Note that that does not by itself tell whether the values in the array are // real (are literally interpreted as real numbers) or quantized (only acquire -// a meaning as real numbers in conjuction with QuantizationParams). +// a meaning as real numbers in conjunction with QuantizationParams). // // In practice though: // float values are always real diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index 548a2c6f70..fc6d02cf15 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -634,7 +634,7 @@ Status GraphProperties::InferStatically() { std::unordered_map dim_ids; - // Track shapes globally accross the graph. + // Track shapes globally across the graph. SymbolicShapeManager shape_manager; bool found_error = false; for (const Node* const node : graph.nodes()) { diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index c014f8898a..2394c07e18 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -1146,7 +1146,7 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps( if (simplified_node != nullptr) { nodes_to_simplify.PushBack(simplified_node); } - // When `node` is simplifed to another node rather than in-place, the + // When `node` is simplified to another node rather than in-place, the // consumers of `node` are already redirected to `simplified_tensor`. // Re-push the consumers into `nodes_to_simplify` for further // optimizations. diff --git a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc index 7249c8c66c..fc98556440 100644 --- a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc +++ b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc @@ -153,7 +153,7 @@ class DynamicPartitionOpGPU : public AsyncOpKernel { Tensor* partitions_out, Tensor* indices_out, DoneCallback done) { int32 M = std::max(N, num_partitions_); - // indices_in will be made slightly larger to accomodate + // indices_in will be made slightly larger to accommodate // later computations. OP_REQUIRES_OK_ASYNC( c, c->allocate_temp(DT_INT32, TensorShape({M}), indices_in), done); diff --git a/tensorflow/core/kernels/slice_op.h b/tensorflow/core/kernels/slice_op.h index 55a4be985b..5fd6ce4067 100644 --- a/tensorflow/core/kernels/slice_op.h +++ b/tensorflow/core/kernels/slice_op.h @@ -103,7 +103,7 @@ void SliceUsingEigen(const Device& d, Tensor* out, const Tensor& in, namespace functor { // Template parameter NDIM is not neccesary here. The aim of keeping it -// is to compile struct slice seperately which minimizes the compiling time. +// is to compile struct slice separately which minimizes the compiling time. template struct Slice { void operator()(const Device& d, Tensor* out, const Tensor& in, -- GitLab From bf1683fda6541d20349befbce62af3c0a55d1d9c Mon Sep 17 00:00:00 2001 From: Christoph Boeddeker Date: Fri, 17 Nov 2017 21:56:42 +0100 Subject: [PATCH 0592/1801] fix assert_shallow_structure for dicts (#14499) * fix assert_shallow_structure for dicts * fix assert_shallow_structure for dict in util.nest * fix wrong NestTest (wrong key) --- tensorflow/python/data/util/nest.py | 10 ++++++++++ tensorflow/python/data/util/nest_test.py | 8 ++++++++ tensorflow/python/util/nest.py | 11 +++++++++++ tensorflow/python/util/nest_test.py | 11 ++++++++++- 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/data/util/nest.py b/tensorflow/python/data/util/nest.py index 421513cafc..3ee490dbcf 100644 --- a/tensorflow/python/data/util/nest.py +++ b/tensorflow/python/data/util/nest.py @@ -367,6 +367,16 @@ def assert_shallow_structure(shallow_tree, input_tree, check_types=True): "structure has length %s, while shallow structure has length %s." % (len(input_tree), len(shallow_tree))) + if check_types and isinstance(shallow_tree, dict): + if set(input_tree) != set(shallow_tree): + raise ValueError( + "The two structures don't have the same keys. Input " + "structure has keys %s, while shallow structure has keys %s." + % (list(_six.iterkeys(input_tree)), + list(_six.iterkeys(shallow_tree)))) + input_tree = list(_six.iteritems(input_tree)) + shallow_tree = list(_six.iteritems(shallow_tree)) + for shallow_branch, input_branch in zip(shallow_tree, input_tree): assert_shallow_structure(shallow_branch, input_branch, check_types=check_types) diff --git a/tensorflow/python/data/util/nest_test.py b/tensorflow/python/data/util/nest_test.py index 6416e2850d..47547eb49f 100644 --- a/tensorflow/python/data/util/nest_test.py +++ b/tensorflow/python/data/util/nest_test.py @@ -254,6 +254,14 @@ class NestTest(test.TestCase): nest.assert_shallow_structure(inp_ab2, inp_ab1) nest.assert_shallow_structure(inp_ab2, inp_ab1, check_types=False) + inp_ab1 = {"a": (1, 1), "b": {"c": (2, 2)}} + inp_ab2 = {"a": (1, 1), "b": {"d": (2, 2)}} + expected_message = ( + "The two structures don't have the same keys. Input " + "structure has keys \['c'\], while shallow structure has keys \['d'\].") + with self.assertRaisesRegexp(ValueError, expected_message): + nest.assert_shallow_structure(inp_ab2, inp_ab1) + def testFlattenUpTo(self): input_tree = (((2, 2), (3, 3)), ((4, 9), (5, 5))) shallow_tree = ((True, True), (False, True)) diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py index dd6acee3c7..25dbc78d7a 100644 --- a/tensorflow/python/util/nest.py +++ b/tensorflow/python/util/nest.py @@ -452,6 +452,17 @@ def assert_shallow_structure(shallow_tree, input_tree, check_types=True): "structure has length %s, while shallow structure has length %s." % (len(input_tree), len(shallow_tree))) + if check_types and isinstance(shallow_tree, dict): + if set(input_tree) != set(shallow_tree): + raise ValueError( + "The two structures don't have the same keys. Input " + "structure has keys %s, while shallow structure has keys %s." + % (list(_six.iterkeys(input_tree)), + list(_six.iterkeys(shallow_tree)))) + + input_tree = list(_six.iteritems(input_tree)) + shallow_tree = list(_six.iteritems(shallow_tree)) + for shallow_branch, input_branch in zip(shallow_tree, input_tree): assert_shallow_structure(shallow_branch, input_branch, check_types=check_types) diff --git a/tensorflow/python/util/nest_test.py b/tensorflow/python/util/nest_test.py index c4020f4f3c..26aeaeec19 100644 --- a/tensorflow/python/util/nest_test.py +++ b/tensorflow/python/util/nest_test.py @@ -385,6 +385,15 @@ class NestTest(test.TestCase): nest.assert_shallow_structure(inp_ab2, inp_ab1) nest.assert_shallow_structure(inp_ab2, inp_ab1, check_types=False) + inp_ab1 = {"a": (1, 1), "b": {"c": (2, 2)}} + inp_ab2 = {"a": (1, 1), "b": {"d": (2, 2)}} + expected_message = ( + "The two structures don't have the same keys. Input " + "structure has keys \['c'\], while shallow structure has keys \['d'\].") + + with self.assertRaisesRegexp(ValueError, expected_message): + nest.assert_shallow_structure(inp_ab2, inp_ab1) + def testFlattenUpTo(self): # Shallow tree ends at scalar. input_tree = [[[2, 2], [3, 3]], [[4, 9], [5, 5]]] @@ -430,7 +439,7 @@ class NestTest(test.TestCase): input_tree) self.assertEqual(input_tree_flattened_as_shallow_tree, [0, 1, 2, 3, 4]) shallow_tree = collections.OrderedDict([("a", 0), - ("b", {"d": 3, "e": 1})]) + ("c", {"d": 3, "e": 1})]) input_tree_flattened_as_shallow_tree = nest.flatten_up_to(shallow_tree, input_tree) self.assertEqual(input_tree_flattened_as_shallow_tree, -- GitLab From 215591a355e8c758c6b1d901a96c5f71101426af Mon Sep 17 00:00:00 2001 From: FirefoxMetzger Date: Fri, 17 Nov 2017 21:56:54 +0100 Subject: [PATCH 0593/1801] use with when calling TFRecordWriter (#14497) TFRecordWriter supports __enter__ and __exit__ . Calling it through with is more pythonic and does cleanup in case something throws an uncaught exception. --- .../reading_data/convert_to_records.py | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/tensorflow/examples/how_tos/reading_data/convert_to_records.py b/tensorflow/examples/how_tos/reading_data/convert_to_records.py index d14c1f7c86..a402eac053 100644 --- a/tensorflow/examples/how_tos/reading_data/convert_to_records.py +++ b/tensorflow/examples/how_tos/reading_data/convert_to_records.py @@ -52,17 +52,16 @@ def convert_to(data_set, name): filename = os.path.join(FLAGS.directory, name + '.tfrecords') print('Writing', filename) - writer = tf.python_io.TFRecordWriter(filename) - for index in range(num_examples): - image_raw = images[index].tostring() - example = tf.train.Example(features=tf.train.Features(feature={ - 'height': _int64_feature(rows), - 'width': _int64_feature(cols), - 'depth': _int64_feature(depth), - 'label': _int64_feature(int(labels[index])), - 'image_raw': _bytes_feature(image_raw)})) - writer.write(example.SerializeToString()) - writer.close() + with tf.python_io.TFRecordWriter(filename) as writer: + for index in range(num_examples): + image_raw = images[index].tostring() + example = tf.train.Example(features=tf.train.Features(feature={ + 'height': _int64_feature(rows), + 'width': _int64_feature(cols), + 'depth': _int64_feature(depth), + 'label': _int64_feature(int(labels[index])), + 'image_raw': _bytes_feature(image_raw)})) + writer.write(example.SerializeToString()) def main(unused_argv): -- GitLab From e4f2018c54f77f10005e664d87d8152bbde35c74 Mon Sep 17 00:00:00 2001 From: Sandeep Dcunha Date: Fri, 17 Nov 2017 15:57:04 -0500 Subject: [PATCH 0594/1801] Fixed incorrect documentation in tf.contrib.nn.deprecated_flipped_sparse_softmax_cross_entropy_with_logits where it referenced labels rather than logits. (#14485) --- tensorflow/contrib/nn/python/ops/cross_entropy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/nn/python/ops/cross_entropy.py b/tensorflow/contrib/nn/python/ops/cross_entropy.py index 61c1d1c6d9..5045f2c957 100644 --- a/tensorflow/contrib/nn/python/ops/cross_entropy.py +++ b/tensorflow/contrib/nn/python/ops/cross_entropy.py @@ -116,7 +116,7 @@ def deprecated_flipped_sparse_softmax_cross_entropy_with_logits(logits, Raises: ValueError: If logits are scalars (need to have rank >= 1) or if the rank - of the labels is not equal to the rank of the labels minus one. + of the labels is not equal to the rank of the logits minus one. """ return nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits, name=name) -- GitLab From fa4977f14fa7049f681bd7119e327254acf3c072 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Fri, 17 Nov 2017 13:06:05 -0800 Subject: [PATCH 0595/1801] Revert "Add missing conv1d in `tf.contrib.layers` (#14513)" (#14668) This reverts commit e01949c0248b80ed25bd3caee5e4db19c5a574f6. --- tensorflow/contrib/layers/__init__.py | 2 -- tensorflow/contrib/layers/python/layers/layers.py | 4 ---- 2 files changed, 6 deletions(-) diff --git a/tensorflow/contrib/layers/__init__.py b/tensorflow/contrib/layers/__init__.py index 66412b7fe2..d309ba958d 100644 --- a/tensorflow/contrib/layers/__init__.py +++ b/tensorflow/contrib/layers/__init__.py @@ -19,7 +19,6 @@ See the @{$python/contrib.layers} guide. @@avg_pool2d @@avg_pool3d @@batch_norm -@@convolution1d @@convolution2d @@convolution3d @@conv2d_in_plane @@ -113,7 +112,6 @@ from tensorflow.contrib.layers.python.layers import * from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = ['bias_add', - 'conv1d', 'conv2d', 'conv3d', 'elu', diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index bad4d4dd48..3063085218 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -58,14 +58,12 @@ __all__ = ['avg_pool2d', 'avg_pool3d', 'batch_norm', 'bias_add', - 'conv1d', 'conv2d', 'conv3d', 'conv2d_in_plane', 'conv2d_transpose', 'conv3d_transpose', 'convolution', - 'convolution1d', 'convolution2d', 'convolution2d_in_plane', 'convolution2d_transpose', @@ -1072,7 +1070,6 @@ def convolution(inputs, outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs) -convolution1d = convolution convolution2d = convolution convolution3d = convolution @@ -2974,7 +2971,6 @@ relu6 = functools.partial(fully_connected, activation_fn=nn.relu6) linear = functools.partial(fully_connected, activation_fn=None) # Simple alias. -conv1d = convolution1d conv2d = convolution2d conv3d = convolution3d conv2d_transpose = convolution2d_transpose -- GitLab From cdb9f312f1a00e3fb90f14d79aca2fa9dcab8f21 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Fri, 17 Nov 2017 13:05:02 -0800 Subject: [PATCH 0596/1801] Add field to HLO proto field to enable reversing a convolution filter. PiperOrigin-RevId: 176149369 --- .../xla/client/computation_builder.cc | 1 + .../compiler/xla/service/hlo_evaluator.cc | 4 +- .../xla/service/hlo_evaluator_test.cc | 77 +++++++++++++++++++ tensorflow/compiler/xla/window_util.cc | 3 + tensorflow/compiler/xla/xla_data.proto | 4 + 5 files changed, 88 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc index 763d94e94c..b1f4ea8ab6 100644 --- a/tensorflow/compiler/xla/client/computation_builder.cc +++ b/tensorflow/compiler/xla/client/computation_builder.cc @@ -153,6 +153,7 @@ bool ComputationBuilder::MakeWindow( } else { dim->set_window_dilation(1); } + dim->set_window_reversal(false); } return true; } diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index a722d1b3d9..2bd9723dbe 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -814,7 +814,9 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { } rhs_index[dnums.kernel_spatial_dimensions(ki)] = - rhs_spatial_index[ki]; + window_dim.window_reversal() + ? ((window_dim.size() - 1) - rhs_spatial_index[ki]) + : rhs_spatial_index[ki]; } result_val += lhs_literal.Get(lhs_index) * diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc index 85477af6fe..94929dda6a 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc @@ -794,6 +794,83 @@ TEST_F(HloEvaluatorTest, Simple4x4Conv2DWith2x2Kernel) { LiteralTestUtil::ExpectEqual(*expected, *result); } +TEST_F(HloEvaluatorTest, Conv2DGeneralDimensionsReversed) { + HloComputation::Builder b(TestName()); + + // clang-format off + // Input dimensions: [feature=2, height=3, batch=1, width=4] + Array4D input({ + {{{1, 2, 3, 4}}, + {{5, 6, 7, 8}}, + {{9, 10, 11, 12}}}, + {{{13, 14, 15, 16}}, + {{17, 18, 19, 20}}, + {{21, 22, 23, 24}}} + }); + // Weight dimensions: + // [kernel_output_feature=1, width=3, kernel_input_feature=2, height=3] + Array4D weight({{ + {{1, 7, 13}, + {4, 10, 16}}, + {{2, 8, 14}, + {5, 11, 17}}, + {{3, 9, 15}, + {6, 12, 18}} + }}); + // clang-format on + + auto lhs_literal = Literal::CreateR4FromArray4D(input); + HloInstruction* lhs_instruction = + b.AddInstruction(HloInstruction::CreateConstant(std::move(lhs_literal))); + + auto rhs_literal = Literal::CreateR4FromArray4D(weight); + HloInstruction* rhs_instruction = + b.AddInstruction(HloInstruction::CreateConstant(std::move(rhs_literal))); + rhs_instruction = b.AddInstruction(HloInstruction::CreateReverse( + rhs_instruction->shape(), rhs_instruction, {3, 1})); + + Window window; + WindowDimension dim; + dim.set_size(3); + dim.set_stride(1); + dim.set_padding_low(0); + dim.set_padding_high(0); + dim.set_window_dilation(1); + dim.set_base_dilation(1); + dim.set_window_reversal(true); + *window.add_dimensions() = dim; + *window.add_dimensions() = dim; + + ConvolutionDimensionNumbers dnums; + dnums.set_input_batch_dimension(2); + dnums.set_output_batch_dimension(2); + dnums.set_input_feature_dimension(0); + dnums.set_output_feature_dimension(0); + dnums.add_spatial_dimensions(1); + dnums.add_spatial_dimensions(3); + + dnums.set_kernel_output_feature_dimension(0); + dnums.set_kernel_input_feature_dimension(2); + dnums.add_kernel_spatial_dimensions(3); + dnums.add_kernel_spatial_dimensions(1); + + const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); + b.AddInstruction(HloInstruction::CreateConvolve( + shape, lhs_instruction, rhs_instruction, window, dnums)); + auto computation = module().AddEntryComputation(b.Build()); + + std::unique_ptr result = + evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie(); + + // clang-format off + // Result dimensions: [feature=1, height=1, batch=1, width=2] + Array4D expected_array({{{{2514, 2685}}}}); + // clang-format on + auto expected = Literal::CreateR4FromArray4D(expected_array); + + LiteralTestUtil::ExpectEqual(*expected, *result); +} + TEST_F(HloEvaluatorTest, Conv2DGeneralDimensions) { HloComputation::Builder b(TestName()); diff --git a/tensorflow/compiler/xla/window_util.cc b/tensorflow/compiler/xla/window_util.cc index 6f7f1479b9..2e0eba8de0 100644 --- a/tensorflow/compiler/xla/window_util.cc +++ b/tensorflow/compiler/xla/window_util.cc @@ -44,6 +44,9 @@ namespace window_util { if (dim.window_dilation() != 1) { StrAppend(&str, ",window_dilation=", dim.window_dilation()); } + if (dim.window_reversal()) { + StrAppend(&str, ",window_reversal"); + } StrAppend(&str, ")"); return str; } diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index eac8f2ff07..39f5806739 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -357,6 +357,10 @@ message WindowDimension { // means no dilation. base_dilation - 1 no-op entries ("holes") are implicitly // placed between each base area element. See documentation for convolution. int64 base_dilation = 6; + + // Window reversal means that this dimension was logically reversed before the + // operation. + bool window_reversal = 7; } // Describes the windowing in an operation such as convolution. -- GitLab From 704d66d66508d10bd12f39d2f99de4eb8c8ad7b0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 13:11:10 -0800 Subject: [PATCH 0597/1801] Temporarily disable tsan for wals_test. PiperOrigin-RevId: 176150090 --- tensorflow/contrib/factorization/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD index fe86a20ab1..29a0a4221a 100644 --- a/tensorflow/contrib/factorization/BUILD +++ b/tensorflow/contrib/factorization/BUILD @@ -270,6 +270,7 @@ tf_py_test( "manual", "noasan", # times out b/63678675 "nomsan", + "notsan", # b/69374301 ], ) -- GitLab From d79dd4993061670c1ec5ea01db3022f28d72d0a3 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 17 Nov 2017 13:55:17 -0800 Subject: [PATCH 0598/1801] Fix shutdown race in ClusterSpec propagation. Previously, the DeregisterGraph and DeleteWorkerSession RPCs could race against each other, leading to undefined behavior. This change inhibits the unnecessary DeregisterGraph RPCs when DeleteWorkerSession is being used, which both fixes the race and cuts down on unnecessary network traffic on the Session::Close path. PiperOrigin-RevId: 176155626 --- .../core/distributed_runtime/master_session.cc | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc index 91a1fa7d1e..b3e499be79 100644 --- a/tensorflow/core/distributed_runtime/master_session.cc +++ b/tensorflow/core/distributed_runtime/master_session.cc @@ -67,13 +67,14 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { const SessionOptions& session_opts, const StatsPublisherFactory& stats_publisher_factory, GraphExecutionState* execution_state, bool is_partial, - WorkerCacheInterface* worker_cache) + WorkerCacheInterface* worker_cache, bool should_deregister) : session_handle_(handle), client_graph_(std::move(cg)), session_opts_(session_opts), is_partial_(is_partial), debug_opts_(bopts.debug_options), - worker_cache_(worker_cache) { + worker_cache_(worker_cache), + should_deregister_(should_deregister) { VLOG(1) << "Created ReffedClientGraph for node with " << client_graph()->graph.num_node_ids(); @@ -85,7 +86,11 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { } } - ~ReffedClientGraph() override { DeregisterPartitions(); } + ~ReffedClientGraph() override { + if (should_deregister_) { + DeregisterPartitions(); + } + } const ClientGraph* client_graph() { return client_graph_.get(); } @@ -209,6 +214,7 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { const DebugOptions& debug_opts_; WorkerCacheInterface* const worker_cache_; // Not owned. std::unordered_map name_to_node_; + const bool should_deregister_; // Graph partitioned into per-location subgraphs. struct Part { @@ -1262,7 +1268,7 @@ Status MasterSession::StartStep(const BuildGraphOptions& opts, int64* count, auto entry = new ReffedClientGraph( handle_, opts, std::move(client_graph), session_opts_, stats_publisher_factory_, execution_state_.get(), is_partial, - worker_cache); + worker_cache, !should_delete_worker_sessions_); iter = m->insert({hash, entry}).first; VLOG(1) << "Preparing to execute new graph"; } -- GitLab From 204e88b8387b9bc95a72981548bd4b14c1bb17d7 Mon Sep 17 00:00:00 2001 From: Gregg Helt Date: Fri, 17 Nov 2017 14:06:25 -0800 Subject: [PATCH 0599/1801] Fixed bug in code within programmer's guide markdown docs for Variables. Had a call to {var}.run() method, but Variable instances have no run() method, switched to eval() instead. --- tensorflow/docs_src/programmers_guide/variables.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/programmers_guide/variables.md b/tensorflow/docs_src/programmers_guide/variables.md index f310b89380..bda39cc28e 100644 --- a/tensorflow/docs_src/programmers_guide/variables.md +++ b/tensorflow/docs_src/programmers_guide/variables.md @@ -205,7 +205,7 @@ methods: v = tf.get_variable("v", shape=(), initializer=tf.zeros_initializer()) assignment = v.assign_add(1) tf.global_variables_initializer().run() -assignment.run() +assignment.eval() ``` Most TensorFlow optimizers have specialized ops that efficiently update the -- GitLab From 3f888e1539db5551cfcf9ee837a0555c224e0018 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 14:16:09 -0800 Subject: [PATCH 0600/1801] Add a Compiler::BuildExecutable interface that compiles the given Hlo module without optimizations. PiperOrigin-RevId: 176158846 --- tensorflow/compiler/xla/service/compiler.h | 17 +++++++++--- .../compiler/xla/service/cpu/cpu_compiler.cc | 26 ++++++++++++------- .../compiler/xla/service/cpu/cpu_compiler.h | 6 ++++- .../compiler/xla/service/gpu/gpu_compiler.cc | 22 +++++++++------- .../compiler/xla/service/gpu/gpu_compiler.h | 6 ++++- tensorflow/compiler/xla/service/hlo_runner.cc | 19 +++++++++----- tensorflow/compiler/xla/service/hlo_runner.h | 15 +++++++---- .../xla/service/interpreter/compiler.cc | 12 +++++++-- .../xla/service/interpreter/compiler.h | 8 ++++-- .../compiler/xla/service/llvm_compiler.cc | 4 ++- .../compiler/xla/service/llvm_compiler.h | 12 ++++++--- tensorflow/compiler/xla/service/service.cc | 5 +++- .../compiler/xla/tests/codegen_test_base.cc | 7 +++-- .../compiler/xla/tests/llvm_compiler_test.cc | 4 +-- 14 files changed, 114 insertions(+), 49 deletions(-) diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h index 5f021900c8..fc67330f5c 100644 --- a/tensorflow/compiler/xla/service/compiler.h +++ b/tensorflow/compiler/xla/service/compiler.h @@ -97,21 +97,32 @@ class Compiler { // Returns the ID of the platform that this compiler targets. virtual perftools::gputools::Platform::Id PlatformId() const = 0; + // Runs Hlo passes to optimize the given Hlo module, returns the optimized + // module. + virtual StatusOr> RunHloPasses( + std::unique_ptr module, + perftools::gputools::StreamExecutor* executor) = 0; + // Compiles the HLO module for execution on a device given by the executor, - // and returns an executable object or an error status. Takes ownership of the - // HLO module and is free to transform it. + // and returns an executable object or an error status. No HLO passes are + // applied to module. Generally a module should be passed through RunHloPasses + // prior to calling this method because the some HLO passes are required for + // correctness. Takes ownership of the HLO module and is free to transform it. // // The compiler may optionally specialize to the individual device // (not just type of device) indicated by the executor. // // Use the overload below to compile computations that run in parallel. - virtual StatusOr> Compile( + virtual StatusOr> RunBackend( std::unique_ptr module, perftools::gputools::StreamExecutor* executor) = 0; // Compiles a set of HLO modules that can run in parallel, potentially // communicating data between the modules, and returns a corresponding // sequence of executable objects. + // + // TODO(b/68666782): Remove this method after adding support for multiple + // modules to RunHloPasses and RunBackends. virtual StatusOr>> Compile( std::vector> modules, std::vector> diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index f5b95d3657..b04a279395 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -426,8 +426,22 @@ Status InitializeModuleHooks( } // namespace -StatusOr> CpuCompiler::Compile( - std::unique_ptr module, se::StreamExecutor* stream_exec) { +StatusOr> CpuCompiler::RunHloPasses( + std::unique_ptr module, + perftools::gputools::StreamExecutor* /*stream_exec*/) { + VLOG(2) << "Before optimization:"; + XLA_VLOG_LINES(2, module->ToString()); + + TF_RETURN_IF_ERROR(RunHloPasses(module.get(), /*is_aot_compile=*/false)); + + VLOG(2) << "After optimization:"; + XLA_VLOG_LINES(2, module->ToString()); + return std::move(module); +} + +StatusOr> CpuCompiler::RunBackend( + std::unique_ptr module, + perftools::gputools::StreamExecutor* stream_exec) { const string timer_message = "Compiling [" + module->name() + "] for CPU using JIT"; ScopedLoggingTimer compiling_timer(timer_message, 1); @@ -458,14 +472,6 @@ StatusOr> CpuCompiler::Compile( llvm_module->setDataLayout(jit->data_layout()); llvm_module->setTargetTriple(jit->target_triple().getTriple()); - VLOG(2) << "Before optimization:"; - XLA_VLOG_LINES(2, module->ToString()); - - TF_RETURN_IF_ERROR(RunHloPasses(module.get(), /*is_aot_compile=*/false)); - - VLOG(2) << "After optimization:"; - XLA_VLOG_LINES(2, module->ToString()); - HloComputation* computation = module->entry_computation(); std::unordered_map hlo_to_profile_idx; if (module->config().hlo_profiling_enabled()) { diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h index 963aced208..ebed7058d8 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h @@ -116,7 +116,11 @@ class CpuCompiler : public LLVMCompiler { // stream_execs) using LLVMCompiler::Compile; - StatusOr> Compile( + StatusOr> RunHloPasses( + std::unique_ptr module, + perftools::gputools::StreamExecutor* stream_exec) override; + + StatusOr> RunBackend( std::unique_ptr module, perftools::gputools::StreamExecutor* stream_exec) override; diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 23fb308ec6..937d453a5c 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -126,7 +126,7 @@ string GetLibdeviceDir(const string& config_cuda_data_dir) { // Runs optimization passes on the given HLO module. tensorflow::Status OptimizeHloModule( - HloModule* hlo_module, const se::DeviceDescription& device_desc, + HloModule* hlo_module, const HloCostAnalysis::ShapeSizeFunction& shape_size_function) { { HloPassPipeline pipeline("optimization"); @@ -297,19 +297,23 @@ StatusOr> CompilePtx(const string& ptx, int cc_major, GpuCompiler::GpuCompiler() : pointer_size_(llvm::DataLayout(kDataLayout).getPointerSize()) {} -StatusOr> GpuCompiler::Compile( - std::unique_ptr module, se::StreamExecutor* stream_exec) { - TF_RET_CHECK(stream_exec != nullptr); - +StatusOr> GpuCompiler::RunHloPasses( + std::unique_ptr module, se::StreamExecutor* /*stream_exec*/) { { Tracing::TraceMe annotation("HLO Transforms", module->name(), /*is_expensive=*/true); - TF_RETURN_IF_ERROR(OptimizeHloModule(module.get(), - stream_exec->GetDeviceDescription(), - ShapeSizeBytesFunction())); TF_RETURN_IF_ERROR( - PrepareHloModuleForIrEmitting(module.get(), ShapeSizeBytesFunction())); + OptimizeHloModule(module.get(), ShapeSizeBytesFunction())); } + return std::move(module); +} + +StatusOr> GpuCompiler::RunBackend( + std::unique_ptr module, se::StreamExecutor* stream_exec) { + TF_RET_CHECK(stream_exec != nullptr); + + TF_RETURN_IF_ERROR( + PrepareHloModuleForIrEmitting(module.get(), ShapeSizeBytesFunction())); llvm::LLVMContext llvm_context; std::string buffer; diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.h b/tensorflow/compiler/xla/service/gpu/gpu_compiler.h index fe5fce615f..18e3434020 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.h @@ -49,7 +49,11 @@ class GpuCompiler : public LLVMCompiler { // stream_execs) using LLVMCompiler::Compile; - StatusOr> Compile( + StatusOr> RunHloPasses( + std::unique_ptr module, + perftools::gputools::StreamExecutor* stream_exec) override; + + StatusOr> RunBackend( std::unique_ptr module, perftools::gputools::StreamExecutor* stream_exec) override; diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index 63f2b1296e..6b6d48233a 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -114,11 +114,16 @@ HloRunner::~HloRunner() { StatusOr HloRunner::Execute( std::unique_ptr module, tensorflow::gtl::ArraySlice arguments, - Shape* result_shape) { + Shape* result_shape, bool run_hlo_passes) { + if (run_hlo_passes) { + TF_ASSIGN_OR_RETURN( + module, backend().compiler()->RunHloPasses( + std::move(module), backend().default_stream_executor())); + } TF_ASSIGN_OR_RETURN( std::unique_ptr executable, - backend().compiler()->Compile(std::move(module), - backend().default_stream_executor())); + backend().compiler()->RunBackend(std::move(module), + backend().default_stream_executor())); se::Stream stream(backend().default_stream_executor()); stream.Init(); @@ -193,10 +198,12 @@ StatusOr> HloRunner::TransferFromDevice( StatusOr> HloRunner::ExecuteAndTransfer( std::unique_ptr module, - tensorflow::gtl::ArraySlice arguments) { + tensorflow::gtl::ArraySlice arguments, + bool run_hlo_passes) { Shape result_shape; - TF_ASSIGN_OR_RETURN(se::DeviceMemoryBase device_base, - Execute(std::move(module), arguments, &result_shape)); + TF_ASSIGN_OR_RETURN( + se::DeviceMemoryBase device_base, + Execute(std::move(module), arguments, &result_shape, run_hlo_passes)); return TransferFromDevice(result_shape, device_base); } diff --git a/tensorflow/compiler/xla/service/hlo_runner.h b/tensorflow/compiler/xla/service/hlo_runner.h index a5732848c6..95cddafc91 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.h +++ b/tensorflow/compiler/xla/service/hlo_runner.h @@ -65,17 +65,20 @@ class HloRunner { // Executes the given module with given literals as input and returns the // result as a Literal. The LiteralPtr type accepts Literal* or // std::unique_ptr. + // If run_hlo_passes is true, the module will be executed without Hlo + // optimization. template StatusOr> Execute( std::unique_ptr module, - const tensorflow::gtl::ArraySlice literals); + const tensorflow::gtl::ArraySlice literals, + bool run_hlo_passes = true); // Executes the given module and returns a global data handle. StatusOr Execute( std::unique_ptr module, tensorflow::gtl::ArraySlice arguments, - Shape* result_shape); + Shape* result_shape, bool run_hlo_passes = true); // Transfers the given literal to the device and returns the data handle. StatusOr TransferToDevice( @@ -90,7 +93,8 @@ class HloRunner { StatusOr> ExecuteAndTransfer( std::unique_ptr module, tensorflow::gtl::ArraySlice - arguments); + arguments, + bool run_hlo_passes = true); // If backend is not created in the constructor, creates and returns the // default backend. If creation fails, crashes the program. @@ -112,14 +116,15 @@ class HloRunner { template StatusOr> HloRunner::Execute( std::unique_ptr module, - const tensorflow::gtl::ArraySlice literals) { + const tensorflow::gtl::ArraySlice literals, + bool run_hlo_passes) { std::vector arguments; for (const auto& literal : literals) { TF_ASSIGN_OR_RETURN(perftools::gputools::DeviceMemoryBase argument, TransferToDevice(*literal)); arguments.push_back(argument); } - return ExecuteAndTransfer(std::move(module), arguments); + return ExecuteAndTransfer(std::move(module), arguments, run_hlo_passes); } } // namespace xla diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc index 6d5796a24b..c9a5285a4f 100644 --- a/tensorflow/compiler/xla/service/interpreter/compiler.cc +++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc @@ -69,11 +69,19 @@ Status InterpreterCompiler::RunHloOptimization(HloModule* hlo_module) { return pipeline.Run(hlo_module).status(); } -StatusOr> InterpreterCompiler::Compile( +StatusOr> InterpreterCompiler::RunHloPasses( + std::unique_ptr hlo_module, + se::StreamExecutor* /*stream_exec*/) { + VLOG(1) << "Run hlo passes on graph " << hlo_module->name(); + TF_RETURN_IF_ERROR(RunHloOptimization(hlo_module.get())); + return std::move(hlo_module); +} + +StatusOr> InterpreterCompiler::RunBackend( std::unique_ptr hlo_module, se::StreamExecutor* stream_exec) { TF_RET_CHECK(stream_exec != nullptr); - VLOG(1) << "Generate graph " << hlo_module->name(); + VLOG(1) << "Run backend " << hlo_module->name(); TF_RETURN_IF_ERROR(RunHloOptimization(hlo_module.get())); diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.h b/tensorflow/compiler/xla/service/interpreter/compiler.h index cfdc9b6256..278cf51842 100644 --- a/tensorflow/compiler/xla/service/interpreter/compiler.h +++ b/tensorflow/compiler/xla/service/interpreter/compiler.h @@ -43,8 +43,12 @@ class InterpreterCompiler : public Compiler { InterpreterCompiler() {} ~InterpreterCompiler() override {} - StatusOr> Compile( - std::unique_ptr hlo_modules, + StatusOr> RunHloPasses( + std::unique_ptr hlo_module, + perftools::gputools::StreamExecutor* stream_exec) override; + + StatusOr> RunBackend( + std::unique_ptr hlo_module, perftools::gputools::StreamExecutor* stream_exec) override; StatusOr>> Compile( diff --git a/tensorflow/compiler/xla/service/llvm_compiler.cc b/tensorflow/compiler/xla/service/llvm_compiler.cc index ba0304fb8c..34f3419269 100644 --- a/tensorflow/compiler/xla/service/llvm_compiler.cc +++ b/tensorflow/compiler/xla/service/llvm_compiler.cc @@ -27,8 +27,10 @@ StatusOr>> LLVMCompiler::Compile( "Model partitioning not implemented for the CPU/GPU compilers!"); } + TF_ASSIGN_OR_RETURN( + modules[i], RunHloPasses(std::move(modules[i]), stream_execs[i][0])); TF_ASSIGN_OR_RETURN(std::unique_ptr executable, - Compile(std::move(modules[i]), stream_execs[i][0])); + RunBackend(std::move(modules[i]), stream_execs[i][0])); result.push_back(std::move(executable)); } diff --git a/tensorflow/compiler/xla/service/llvm_compiler.h b/tensorflow/compiler/xla/service/llvm_compiler.h index c4f689eabe..c5393cef4f 100644 --- a/tensorflow/compiler/xla/service/llvm_compiler.h +++ b/tensorflow/compiler/xla/service/llvm_compiler.h @@ -58,10 +58,14 @@ class LLVMCompiler : public Compiler { void RemovePostOptimizationHook() { user_post_optimization_hook_ = nullptr; } // Bring in - // StatusOr> Compile( - // std::unique_ptr module, - // perftools::gputools::StreamExecutor* executor) - using Compiler::Compile; + // StatusOr> RunBackend( + // std::unique_ptr module, + // perftools::gputools::StreamExecutor* stream_exec) + // StatusOr> RunHloPasses( + // std::unique_ptr module, + // perftools::gputools::StreamExecutor* stream_exec) + using Compiler::RunBackend; + using Compiler::RunHloPasses; StatusOr>> Compile( std::vector> modules, diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index ee9501dd48..0544a1697b 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -430,9 +430,12 @@ StatusOr> Service::BuildExecutable( /*include_unreachable_instructions=*/ true)); + TF_ASSIGN_OR_RETURN( + module, backend->compiler()->RunHloPasses(std::move(module), executor)); + TF_ASSIGN_OR_RETURN( std::unique_ptr executable, - backend->compiler()->Compile(std::move(module), executor)); + backend->compiler()->RunBackend(std::move(module), executor)); if (!other_directory_path.empty()) { executable->set_session_module(std::move(session_module)); diff --git a/tensorflow/compiler/xla/tests/codegen_test_base.cc b/tensorflow/compiler/xla/tests/codegen_test_base.cc index 43ea7f6019..e472408dcf 100644 --- a/tensorflow/compiler/xla/tests/codegen_test_base.cc +++ b/tensorflow/compiler/xla/tests/codegen_test_base.cc @@ -19,8 +19,11 @@ namespace xla { StatusOr> CodegenTestBase::CompileToExecutable( std::unique_ptr hlo_module) { - return backend().compiler()->Compile(std::move(hlo_module), - backend().default_stream_executor()); + TF_ASSIGN_OR_RETURN(hlo_module, backend().compiler()->RunHloPasses( + std::move(hlo_module), + backend().default_stream_executor())); + return backend().compiler()->RunBackend(std::move(hlo_module), + backend().default_stream_executor()); } StatusOr> diff --git a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc index 62fab6a224..b5b95967ff 100644 --- a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc +++ b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc @@ -73,8 +73,8 @@ class LLVMCompilerTest : public ::testing::Test { compiler->SetPostOptimizationHook(post_opt_hook); ASSERT_TRUE(compiler - ->Compile(std::move(hlo_module), - backend_->default_stream_executor()) + ->RunBackend(std::move(hlo_module), + backend_->default_stream_executor()) .ok()); // Test that hooks were called. -- GitLab From 6610eb74981ffcbaaba6befc241ad6d34aded81e Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Fri, 17 Nov 2017 14:17:22 -0800 Subject: [PATCH 0601/1801] tfdbg: fix missing space in grpc error message PiperOrigin-RevId: 176159019 --- tensorflow/core/kernels/debug_ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/debug_ops.h b/tensorflow/core/kernels/debug_ops.h index 2c21053121..381add3fb3 100644 --- a/tensorflow/core/kernels/debug_ops.h +++ b/tensorflow/core/kernels/debug_ops.h @@ -185,7 +185,7 @@ class BaseDebugOp : public OpKernel { if (!status.ok()) { LOG(ERROR) << "Debug node of watch key " << debug_watch_key_->debug_node_name - << "failed to publish debug tensor data to all URLs " + << " failed to publish debug tensor data to all URLs " << str_util::Join(debug_urls_, ", ") << ", due to: " << status.error_message(); } -- GitLab From 61b0ddca2570215a625e22f76348f51ffd661ddf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 14:23:01 -0800 Subject: [PATCH 0602/1801] Modify QuantizeAddContexts so that ops are added deterministically. This is needed when using multiple worker replicas so that the ops can be initialized consistently. PiperOrigin-RevId: 176159819 --- tensorflow/contrib/quantize/python/quantize.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index 7db2d863aa..50a2b4c91c 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -164,7 +164,10 @@ class _QuantizeContext(object): def QuantizeAddContexts(self): """Quantizes all add ops in self.add_contexts.""" - for add_context in self.add_contexts: + # Loop through sorted self.add_contexts so that op creation is + # deterministic. This is needed when using multiple worker replicas so that + # the ops can be initialized consistently. + for add_context in sorted(self.add_contexts): add_op = self.GetOperationByNamesDontThrow([ add_context + '/Add', add_context + '/add']) if add_op is not None: -- GitLab From 3094dfcf387c122b678230b6c0df778aad594d1e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 14:36:39 -0800 Subject: [PATCH 0603/1801] [XLA:GPU] Partially enable Winograd convolution algorithm. Diable the algorithm for certain inputs to avoid a known bug in cuDNNv5 and cuDNNv6. PiperOrigin-RevId: 176161830 --- .../xla/service/gpu/convolution_thunk.cc | 37 +++++++++++++++---- .../xla/service/gpu/convolution_thunk.h | 1 + 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc index e79d0a4c79..5fe5f55857 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc @@ -258,22 +258,19 @@ tensorflow::Status ConvolutionThunk::Convolve( } std::vector ConvolutionThunk::GetAlgorithms( - se::StreamExecutor* stream_exec) const { + bool with_winograd_nonfused, se::StreamExecutor* stream_exec) const { std::vector algorithms; - // TODO(yangzihao): Currently disable the use of winograd nonfused in XLA - // by default. Should send in conv parameters and enable it when - // ShouldIncludeWinogradNonfusedAlgo() returns true. switch (convolution_kind_) { case ConvolutionKind::kBackwardFilter: CHECK(stream_exec->GetConvolveBackwardFilterAlgorithms( - /*with_winograd_nonfused=*/false, &algorithms)); + with_winograd_nonfused, &algorithms)); break; case ConvolutionKind::kBackwardInput: CHECK(stream_exec->GetConvolveBackwardDataAlgorithms( - /*with_winograd_nonfused=*/false, &algorithms)); + with_winograd_nonfused, &algorithms)); break; case ConvolutionKind::kForward: - CHECK(stream_exec->GetConvolveAlgorithms(/*with_winograd_nonfused=*/false, + CHECK(stream_exec->GetConvolveAlgorithms(with_winograd_nonfused, &algorithms)); break; } @@ -287,6 +284,26 @@ static string AlgorithmToString(const se::dnn::AlgorithmDesc& algo) { return tensorflow::strings::StrCat(algo.algo_id()); } +// Determines whether we can safely perform a winograd non-fused convolution for +// the given input and output descriptors. This works around b/68264959, an +// integer overflow in cuDNNv5 and cuDNNv6. +static bool ShouldIncludeWinogradNonfusedAlgo( + const BatchDescriptor& input_descriptor, + const BatchDescriptor& output_descriptor) { + int64 batch = input_descriptor.count(); + int64 in_depths = input_descriptor.feature_map_count(); + int64 in_rows = input_descriptor.height(); + int64 in_cols = input_descriptor.width(); + int64 out_depths = output_descriptor.feature_map_count(); + + int64 total_size = 16 * std::ceil(batch / 16.0) * + std::max(in_depths, out_depths) * in_cols * in_rows * + sizeof(float); + int64 threshold = 1L << 31; + + return total_size < threshold; +} + tensorflow::Status ConvolutionThunk::ConvolveWithTune( const BatchDescriptor& input_descriptor, se::DeviceMemory input_data, const FilterDescriptor& filter_descriptor, @@ -303,9 +320,13 @@ tensorflow::Status ConvolutionThunk::ConvolveWithTune( "ConvolutionThunk: " << this; + bool with_winograd_nonfused = + ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor); + se::dnn::ProfileResult best_result; se::dnn::ProfileResult best_result_without_scratch; - std::vector algorithms = GetAlgorithms(stream->parent()); + std::vector algorithms = + GetAlgorithms(with_winograd_nonfused, stream->parent()); for (auto algorithm : algorithms) { ConvolveScratchAllocator scratch_allocator( buffer_allocations.device_ordinal(), diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h index 13432301b2..5ac5db2f04 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h @@ -116,6 +116,7 @@ class ConvolutionThunk : public Thunk { // Returns the convolve algorithms that can be used for this ConvolutionThunk. std::vector GetAlgorithms( + bool with_winograd_nonfused, perftools::gputools::StreamExecutor* stream_exec) const; // Fastest cuDNN convolution algorithm for this thunk learned from -- GitLab From 9b858b88784b6a9232d23d3a13353cd6ef43cd18 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Fri, 17 Nov 2017 14:53:59 -0800 Subject: [PATCH 0604/1801] Avoid reordering in ports in SwitchGrad for CondContext. PiperOrigin-RevId: 176164285 --- tensorflow/python/ops/control_flow_grad.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/python/ops/control_flow_grad.py b/tensorflow/python/ops/control_flow_grad.py index 3c082b19b6..22dc6771ec 100644 --- a/tensorflow/python/ops/control_flow_grad.py +++ b/tensorflow/python/ops/control_flow_grad.py @@ -69,13 +69,12 @@ def _SwitchGrad(op, *grad): # meaning the output is not differentiable. return None, None elif isinstance(op_ctxt, CondContext): - good_grad = grad[op_ctxt.branch] zero_grad = grad[1 - op_ctxt.branch] # At this point, we have created zero_grad guarded by the right switch. # Unfortunately, we may still get None here for not trainable data types. if zero_grad is None: return None, None - return merge([good_grad, zero_grad], name="cond_grad")[0], None + return merge(grad, name="cond_grad")[0], None else: false_grad = switch(grad[0], op.inputs[1])[0] true_grad = switch(grad[1], op.inputs[1])[1] -- GitLab From c86793dd597649fdf64964f87e6f8e896966e490 Mon Sep 17 00:00:00 2001 From: Rui Zhao Date: Fri, 17 Nov 2017 15:11:09 -0800 Subject: [PATCH 0605/1801] Register tile_ops GPU kernel for bool types. PiperOrigin-RevId: 176166731 --- tensorflow/core/kernels/tile_functor_gpu.cu.cc | 1 + tensorflow/core/kernels/tile_ops.cc | 14 ++++++++++++-- tensorflow/core/util/cuda_kernel_helper.h | 10 ++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/tile_functor_gpu.cu.cc b/tensorflow/core/kernels/tile_functor_gpu.cu.cc index 5a36e7567b..84a5060fc3 100644 --- a/tensorflow/core/kernels/tile_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/tile_functor_gpu.cu.cc @@ -90,6 +90,7 @@ typedef Eigen::GpuDevice GPUDevice; template struct Tile; \ template struct Tile; +TF_CALL_bool(DEFINE_TYPE); TF_CALL_int16(DEFINE_TYPE); TF_CALL_int32(DEFINE_TYPE); TF_CALL_int64(DEFINE_TYPE); diff --git a/tensorflow/core/kernels/tile_ops.cc b/tensorflow/core/kernels/tile_ops.cc index fa5afe6a31..68cdae3249 100644 --- a/tensorflow/core/kernels/tile_ops.cc +++ b/tensorflow/core/kernels/tile_ops.cc @@ -222,6 +222,7 @@ TF_CALL_complex128(HANDLE_TYPE_NAME_CPU); TF_CALL_string(HANDLE_TYPE_NAME_CPU); #if GOOGLE_CUDA +TF_CALL_bool(HANDLE_TYPE_NAME_GPU); TF_CALL_float(HANDLE_TYPE_NAME_GPU); TF_CALL_double(HANDLE_TYPE_NAME_GPU); TF_CALL_int16(HANDLE_TYPE_NAME_GPU); @@ -534,7 +535,7 @@ REGISTER_KERNEL_BUILDER(Name("TileGrad") TileGradientOp); #if GOOGLE_CUDA -#define REGISTER_GPU(type) \ +#define REGISTER_GPU_TILE(type) \ REGISTER_KERNEL_BUILDER(Name("Tile") \ .Device(DEVICE_GPU) \ .TypeConstraint("T") \ @@ -546,7 +547,9 @@ REGISTER_KERNEL_BUILDER(Name("TileGrad") .TypeConstraint("T") \ .TypeConstraint("Tmultiples") \ .HostMemory("multiples"), \ - TileOp); \ + TileOp); + +#define REGISTER_GPU_TILE_GRAD(type) \ REGISTER_KERNEL_BUILDER(Name("TileGrad") \ .Device(DEVICE_GPU) \ .TypeConstraint("T") \ @@ -560,6 +563,11 @@ REGISTER_KERNEL_BUILDER(Name("TileGrad") .HostMemory("multiples"), \ TileGradientOp); +#define REGISTER_GPU(type) \ + REGISTER_GPU_TILE(type); \ + REGISTER_GPU_TILE_GRAD(type); + +TF_CALL_bool(REGISTER_GPU_TILE); TF_CALL_float(REGISTER_GPU); TF_CALL_double(REGISTER_GPU); TF_CALL_half(REGISTER_GPU); @@ -568,6 +576,8 @@ TF_CALL_int32(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU) +#undef REGISTER_GPU_TILE +#undef REGISTER_GPU_TILE_GRAD #undef REGISTER_GPU #endif // GOOGLE_CUDA diff --git a/tensorflow/core/util/cuda_kernel_helper.h b/tensorflow/core/util/cuda_kernel_helper.h index 8315f208e7..8fa0dfbed9 100644 --- a/tensorflow/core/util/cuda_kernel_helper.h +++ b/tensorflow/core/util/cuda_kernel_helper.h @@ -374,6 +374,16 @@ __device__ __host__ inline Eigen::half ldg(const Eigen::half* address) { #endif } +template <> +__device__ __host__ inline bool ldg(const bool* address) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 + return *reinterpret_cast( + __ldg(reinterpret_cast(address))); +#else + return *address; +#endif +} + // CUDA provides atomic ops, but not for all types. We provide wrappers // for some ops and provide implementation for all reasonable types. #define CUDA_ATOMIC_WRAPPER(op, T) \ -- GitLab From 3cc43816cda27c497399bf94429b174db5ed6d6b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 15:19:17 -0800 Subject: [PATCH 0606/1801] Adds validation for categorical_columns in shared_embedding_columns. PiperOrigin-RevId: 176167775 --- .../python/feature_column/feature_column.py | 28 +++++++++++++--- .../feature_column/feature_column_test.py | 32 +++++++++++++++++++ 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index 5ee93be7c3..a19636474b 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -689,12 +689,30 @@ def _shared_embedding_columns( raise ValueError('initializer must be callable if specified.') if initializer is None: initializer = init_ops.truncated_normal_initializer( - mean=0.0, stddev=1 / math.sqrt(dimension)) - # TODO(b/67952670): Validate categorical_columns. + mean=0.0, stddev=1. / math.sqrt(dimension)) + + # Sort the columns so the default collection name is deterministic even if the + # user passes columns from an unsorted collection, such as dict.values(). + sorted_columns = sorted(categorical_columns, key=lambda x: x.name) + + c0 = sorted_columns[0] + if not isinstance(c0, _CategoricalColumn): + raise ValueError( + 'All categorical_columns must be subclasses of _CategoricalColumn. ' + 'Given: {}, of type: {}'.format(c0, type(c0))) + if isinstance(c0, _WeightedCategoricalColumn): + c0 = c0.categorical_column + for c in sorted_columns[1:]: + if isinstance(c, _WeightedCategoricalColumn): + c = c.categorical_column + if not isinstance(c, type(c0)): + raise ValueError( + 'To use shared_embedding_column, all categorical_columns must have ' + 'the same type, or be weighted_categorical_column of the same type. ' + 'Given column: {} of type: {} does not match given column: {} of ' + 'type: {}'.format(c0, type(c0), c, type(c))) + if not shared_embedding_collection_name: - # Sort the columns so the name is deterministic even if the user passes - # columns from an unsorted collection, such as dict.values(). - sorted_columns = sorted(categorical_columns, key=lambda x: x.name) shared_embedding_collection_name = '_'.join(c.name for c in sorted_columns) shared_embedding_collection_name += '_shared_embedding' diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index 9981f358b1..6ac5ce8757 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -4162,6 +4162,38 @@ class SharedEmbeddingColumnTest(test.TestCase): [categorical_column_a, categorical_column_b], dimension=2, initializer='not_fn') + def test_incompatible_column_type(self): + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) + categorical_column_c = fc.categorical_column_with_hash_bucket( + key='ccc', hash_bucket_size=3) + with self.assertRaisesRegexp( + ValueError, + 'all categorical_columns must have the same type.*' + '_IdentityCategoricalColumn.*_HashedCategoricalColumn'): + fc_lib._shared_embedding_columns( + [categorical_column_a, categorical_column_b, categorical_column_c], + dimension=2) + + def test_weighted_categorical_column_ok(self): + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) + weighted_categorical_column_a = fc.weighted_categorical_column( + categorical_column_a, weight_feature_key='aaa_weights') + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) + weighted_categorical_column_b = fc.weighted_categorical_column( + categorical_column_b, weight_feature_key='bbb_weights') + fc_lib._shared_embedding_columns( + [weighted_categorical_column_a, categorical_column_b], dimension=2) + fc_lib._shared_embedding_columns( + [categorical_column_a, weighted_categorical_column_b], dimension=2) + fc_lib._shared_embedding_columns( + [weighted_categorical_column_a, weighted_categorical_column_b], + dimension=2) + def test_parse_example(self): a = fc.categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) -- GitLab From cb12ebe044ad8fb8515bc9d95d27c0ab19ec314b Mon Sep 17 00:00:00 2001 From: Anna R Date: Fri, 17 Nov 2017 15:20:49 -0800 Subject: [PATCH 0607/1801] Provide an option to use ApiDef instead of OpGenOverrides when generating C++ API. Also, updating UpdateDocs method to ApiDef to replace names in docs. PiperOrigin-RevId: 176167953 --- tensorflow/cc/BUILD | 27 +++ tensorflow/cc/framework/cc_op_gen.cc | 218 ++++++++++-------- tensorflow/cc/framework/cc_op_gen.h | 6 +- tensorflow/cc/framework/cc_op_gen_main.cc | 38 ++- tensorflow/cc/framework/cc_op_gen_test.cc | 195 ++++++++++++++++ tensorflow/contrib/cmake/tf_cc_ops.cmake | 2 +- tensorflow/core/BUILD | 6 +- tensorflow/core/api_def/api_test.cc | 9 + .../base_api/api_def_ApplyAddSign.pbtxt | 65 ++++++ .../base_api/api_def_ApplyPowerSign.pbtxt | 65 ++++++ .../api_def_BytesProducedStatsDataset.pbtxt | 4 + .../base_api/api_def_DeserializeSparse.pbtxt | 19 ++ .../api_def_GenerateVocabRemapping.pbtxt | 13 +- .../api_def_IteratorSetStatsAggregator.pbtxt | 4 + .../api_def_LatencyStatsDataset.pbtxt | 4 + .../base_api/api_def_MatrixExponential.pbtxt | 32 +++ .../api_def/base_api/api_def_NthElement.pbtxt | 2 +- .../api_def_ResourceApplyAddSign.pbtxt | 59 +++++ .../api_def_ResourceApplyPowerSign.pbtxt | 59 +++++ .../api_def_StatsAggregatorHandle.pbtxt | 4 + .../api_def_StatsAggregatorSummary.pbtxt | 4 + .../base_api/api_def_TensorArrayV3.pbtxt | 11 + .../api_def_DeserializeSparse.pbtxt | 4 + .../api_def_MatrixExponential.pbtxt | 4 + tensorflow/core/framework/op_gen_lib.cc | 60 +++++ tensorflow/core/framework/op_gen_lib.h | 6 + tensorflow/core/framework/op_gen_lib_test.cc | 57 +++++ tensorflow/tensorflow.bzl | 31 ++- 28 files changed, 894 insertions(+), 114 deletions(-) create mode 100644 tensorflow/cc/framework/cc_op_gen_test.cc create mode 100644 tensorflow/core/api_def/base_api/api_def_ApplyAddSign.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ApplyPowerSign.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_BytesProducedStatsDataset.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_IteratorSetStatsAggregator.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LatencyStatsDataset.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ResourceApplyAddSign.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ResourceApplyPowerSign.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_StatsAggregatorHandle.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_StatsAggregatorSummary.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DeserializeSparse.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MatrixExponential.pbtxt diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD index 80112f9b44..e354831d7d 100644 --- a/tensorflow/cc/BUILD +++ b/tensorflow/cc/BUILD @@ -421,6 +421,7 @@ tf_cc_test( tf_gen_op_wrappers_cc( name = "cc_ops", + api_def_srcs = ["//tensorflow/core:base_api_def"], op_lib_names = [ "array_ops", "audio_ops", @@ -525,6 +526,30 @@ cc_library_with_android_deps( "//tensorflow/core:android_tensorflow_lib", ], copts = tf_copts(), + data = [ + "//tensorflow/core:base_api_def", + ], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:op_gen_lib", + "//tensorflow/core:op_gen_overrides_proto_cc", + "//tensorflow/core:proto_text", + "//tensorflow/core:protos_all_cc", + ], +) + +tf_cc_test( + name = "cc_op_gen_test", + srcs = [ + "framework/cc_op_gen.cc", + "framework/cc_op_gen.h", + "framework/cc_op_gen_test.cc", + ], + data = [ + "//tensorflow/cc:ops/op_gen_overrides.pbtxt", + ], deps = [ "//tensorflow/core:framework", "//tensorflow/core:lib", @@ -533,6 +558,8 @@ cc_library_with_android_deps( "//tensorflow/core:op_gen_overrides_proto_cc", "//tensorflow/core:proto_text", "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", ], ) diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc index 38a17598b8..6f2b7acb82 100644 --- a/tensorflow/cc/framework/cc_op_gen.cc +++ b/tensorflow/cc/framework/cc_op_gen.cc @@ -18,8 +18,10 @@ limitations under the License. #include #include "tensorflow/cc/framework/cc_op_gen.h" +#include "tensorflow/core/framework/api_def.pb.h" #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/attr_value_util.h" +#include "tensorflow/core/framework/op_def_util.h" #include "tensorflow/core/framework/op_gen_lib.h" #include "tensorflow/core/framework/op_gen_overrides.pb.h" #include "tensorflow/core/framework/tensor.pb.h" @@ -385,10 +387,10 @@ bool ArgIsList(const OpDef::ArgDef& arg) { } bool HasOptionalAttrs( - const OpDef& op_def, + const ApiDef& api_def, const std::unordered_map& inferred_input_attrs) { - for (int i = 0; i < op_def.attr_size(); ++i) { - const auto& attr(op_def.attr(i)); + for (int i = 0; i < api_def.attr_size(); ++i) { + const auto& attr(api_def.attr(i)); if ((inferred_input_attrs.find(attr.name()) == inferred_input_attrs.end()) && attr.has_default_value()) { @@ -398,12 +400,21 @@ bool HasOptionalAttrs( return false; } +const ApiDef::Arg* FindInputArg(StringPiece name, const ApiDef& api_def) { + for (int i = 0; i < api_def.in_arg_size(); ++i) { + if (api_def.in_arg(i).name() == name) { + return &api_def.in_arg(i); + } + } + return nullptr; +} + struct OpInfo { // graph_op_def: The OpDef used by the runtime, has the names that // must be used when calling NodeBuilder. // interface_op_def: The OpDef used in the interface in the generated // code, with possibly overridden names and defaults. - explicit OpInfo(const OpDef& graph_op_def, const OpDef& inteface_op_def, + explicit OpInfo(const OpDef& graph_op_def, const ApiDef& api_def, const std::vector& aliases); string GetOpAttrStruct() const; string GetConstructorDecl(StringPiece op_name_prefix, @@ -423,74 +434,81 @@ struct OpInfo { string comment; const OpDef& graph_op_def; - const OpDef& op_def; + const ApiDef& api_def; const std::vector& aliases; + // Map from type attribute to corresponding original argument name. std::unordered_map inferred_input_attrs; }; -OpInfo::OpInfo(const OpDef& g_op_def, const OpDef& i_op_def, - const std::vector& a) - : graph_op_def(g_op_def), op_def(i_op_def), aliases(a) { - op_name = op_def.name(); - InferOpAttributes(op_def, &inferred_input_attrs); - has_optional_attrs = HasOptionalAttrs(op_def, inferred_input_attrs); +OpInfo::OpInfo(const OpDef& graph_op_def, const ApiDef& api_def, + const std::vector& aliases) + : graph_op_def(graph_op_def), api_def(api_def), aliases(aliases) { + op_name = api_def.endpoint(0).name(); + InferOpAttributes(graph_op_def, &inferred_input_attrs); + has_optional_attrs = HasOptionalAttrs(api_def, inferred_input_attrs); arg_types.push_back("const ::tensorflow::Scope&"); arg_names.push_back("scope"); - if (op_def.has_deprecation()) { - if (!op_def.summary().empty()) { - comment = strings::StrCat(op_def.summary(), "\n"); + if (graph_op_def.has_deprecation()) { + if (!api_def.summary().empty()) { + comment = strings::StrCat(api_def.summary(), "\n"); } strings::StrAppend(&comment, "DEPRECATED at GraphDef version ", - op_def.deprecation().version(), ":\n", - op_def.deprecation().explanation(), ".\n"); - } else if (op_def.summary().empty()) { + graph_op_def.deprecation().version(), ":\n", + graph_op_def.deprecation().explanation(), ".\n"); + } else if (api_def.summary().empty()) { comment = "TODO: add doc.\n"; } else { - comment = strings::StrCat(op_def.summary(), "\n"); + comment = strings::StrCat(api_def.summary(), "\n"); } - if (!op_def.description().empty()) { - strings::StrAppend(&comment, "\n", op_def.description(), "\n"); + if (!api_def.description().empty()) { + strings::StrAppend(&comment, "\n", api_def.description(), "\n"); } strings::StrAppend(&comment, "\nArguments:\n* scope: A Scope object\n"); // Process inputs - for (int i = 0; i < op_def.input_arg_size(); ++i) { - const auto& arg(op_def.input_arg(i)); + for (int i = 0; i < api_def.arg_order_size(); ++i) { + const auto& arg = *FindInputArg(api_def.arg_order(i), graph_op_def); + const auto& api_def_arg = *FindInputArg(api_def.arg_order(i), api_def); arg_types.push_back(strings::StrCat( "::tensorflow::", ArgIsList(arg) ? "InputList" : "Input")); - arg_names.push_back(AvoidCPPKeywords(arg.name())); + arg_names.push_back(AvoidCPPKeywords(api_def_arg.rename_to())); // TODO(keveman): Include input type information. - StringPiece description = arg.description(); + StringPiece description = api_def_arg.description(); if (!description.empty()) { ConsumeEquals(&description); - strings::StrAppend(&comment, "* ", AvoidCPPKeywords(arg.name()), ": ", - arg.description(), "\n"); + strings::StrAppend(&comment, "* ", + AvoidCPPKeywords(api_def_arg.rename_to()), ": ", + api_def_arg.description(), "\n"); } } // Process attrs string required_attrs_comment; string optional_attrs_comment; - for (int i = 0; i < op_def.attr_size(); ++i) { - const auto& attr(op_def.attr(i)); + for (int i = 0; i < graph_op_def.attr_size(); ++i) { + // ApiDef attributes must be in the same order as in OpDef since + // we initialize ApiDef based on OpDef. + const auto& attr(graph_op_def.attr(i)); + const auto& api_def_attr(api_def.attr(i)); + CHECK_EQ(attr.name(), api_def_attr.name()); // Skip inferred arguments if (inferred_input_attrs.count(attr.name()) > 0) continue; const auto entry = AttrTypeName(attr.type()); const auto attr_type_name = entry.first; const bool use_const = entry.second; - string attr_name = AvoidCPPKeywords(attr.name()); + string attr_name = AvoidCPPKeywords(api_def_attr.rename_to()); string attr_comment; - if (!attr.description().empty()) { + if (!api_def_attr.description().empty()) { // TODO(keveman): Word wrap and indent this, to handle multi-line // descriptions. strings::StrAppend(&attr_comment, "* ", attr_name, ": ", - attr.description(), "\n"); + api_def_attr.description(), "\n"); } - if (attr.has_default_value()) { + if (api_def_attr.has_default_value()) { strings::StrAppend(&optional_attrs_comment, attr_comment); } else { strings::StrAppend(&required_attrs_comment, attr_comment); @@ -508,44 +526,49 @@ OpInfo::OpInfo(const OpDef& g_op_def, const OpDef& i_op_def, } // Process outputs - for (int i = 0; i < op_def.output_arg_size(); ++i) { - const auto& arg = op_def.output_arg(i); + for (int i = 0; i < graph_op_def.output_arg_size(); ++i) { + // ApiDef arguments must be in the same order as in OpDef since + // we initialize ApiDef based on OpDef. + const auto& arg = graph_op_def.output_arg(i); + const auto& api_def_arg(api_def.out_arg(i)); + CHECK_EQ(arg.name(), api_def_arg.name()); + bool is_list = ArgIsList(arg); output_types.push_back( strings::StrCat("::tensorflow::", is_list ? "OutputList" : "Output")); - output_names.push_back(AvoidCPPKeywords(arg.name())); + output_names.push_back(AvoidCPPKeywords(api_def_arg.rename_to())); is_list_output.push_back(is_list); } strings::StrAppend(&comment, "\nReturns:\n"); - if (op_def.output_arg_size() == 0) { // No outputs. + if (graph_op_def.output_arg_size() == 0) { // No outputs. strings::StrAppend(&comment, "* the created `Operation`\n"); - } else if (op_def.output_arg_size() == 1) { // One output + } else if (graph_op_def.output_arg_size() == 1) { // One output if (is_list_output[0]) { strings::StrAppend(&comment, "* `OutputList`: "); } else { strings::StrAppend(&comment, "* `Output`: "); } - if (op_def.output_arg(0).description().empty()) { - strings::StrAppend(&comment, "The ", op_def.output_arg(0).name(), + if (api_def.out_arg(0).description().empty()) { + strings::StrAppend(&comment, "The ", api_def.out_arg(0).name(), " tensor.\n"); } else { // TODO(josh11b): Word wrap this. - strings::StrAppend(&comment, op_def.output_arg(0).description(), "\n"); + strings::StrAppend(&comment, api_def.out_arg(0).description(), "\n"); } } else { // Multiple outputs. - for (int i = 0; i < op_def.output_arg_size(); ++i) { + for (int i = 0; i < graph_op_def.output_arg_size(); ++i) { if (is_list_output[i]) { strings::StrAppend(&comment, "* `OutputList`"); } else { strings::StrAppend(&comment, "* `Output`"); } strings::StrAppend(&comment, " ", output_names[i]); - if (op_def.output_arg(i).description().empty()) { + if (api_def.out_arg(i).description().empty()) { strings::StrAppend(&comment, "\n"); } else { // TODO(josh11b): Word wrap this. - strings::StrAppend(&comment, ": ", op_def.output_arg(i).description(), + strings::StrAppend(&comment, ": ", api_def.out_arg(i).description(), "\n"); } } @@ -564,19 +587,20 @@ string OpInfo::GetOpAttrStruct() const { string struct_fields; string setters; - for (int i = 0; i < op_def.attr_size(); ++i) { - const auto& attr(op_def.attr(i)); + for (int i = 0; i < graph_op_def.attr_size(); ++i) { + const auto& attr(graph_op_def.attr(i)); + const auto& api_def_attr(api_def.attr(i)); // If attr will be inferred or it doesn't have a default value, don't // add it to the struct. if ((inferred_input_attrs.find(attr.name()) != inferred_input_attrs.end()) || - !attr.has_default_value()) { + !api_def_attr.has_default_value()) { continue; } const auto entry = AttrTypeName(attr.type()); const auto attr_type_name = entry.first; const bool use_const = entry.second; - const string camel_case_name = ToCamelCase(attr.name()); + const string camel_case_name = ToCamelCase(api_def_attr.rename_to()); const string suffix = (camel_case_name == op_name || camel_case_name == "Attrs") ? "_" : ""; const string attr_func_def = @@ -584,22 +608,25 @@ string OpInfo::GetOpAttrStruct() const { attr_type_name, use_const ? "&" : ""); string attr_comment; - if (!attr.description().empty()) { - strings::StrAppend(&attr_comment, attr.description(), "\n\n"); + if (!api_def_attr.description().empty()) { + strings::StrAppend(&attr_comment, api_def_attr.description(), "\n\n"); } strings::StrAppend(&attr_comment, "Defaults to ", - SummarizeAttrValue(attr.default_value()), "\n"); + SummarizeAttrValue(api_def_attr.default_value()), "\n"); attr_comment = MakeComment(attr_comment, " "); strings::StrAppend(&setters, attr_comment); strings::StrAppend(&setters, " Attrs ", attr_func_def, " x) {\n"); strings::StrAppend(&setters, " Attrs ret = *this;\n"); - strings::StrAppend(&setters, " ret.", attr.name(), "_ = x;\n"); + strings::StrAppend(&setters, " ret.", api_def_attr.rename_to(), + "_ = x;\n"); strings::StrAppend(&setters, " return ret;\n }\n\n"); strings::StrAppend( - &struct_fields, " ", attr_type_name, " ", attr.name(), "_ = ", - PrintAttrValue(op_def.name(), attr.default_value()), ";\n"); + &struct_fields, " ", attr_type_name, " ", api_def_attr.rename_to(), + "_ = ", + PrintAttrValue(graph_op_def.name(), api_def_attr.default_value()), + ";\n"); } if (struct_fields.empty()) { @@ -676,17 +703,18 @@ void OpInfo::WriteClassDecl(WritableFile* h) const { // Add the static functions to set optional attrs if (has_optional_attrs) { strings::StrAppend(&class_decl, "\n"); - for (int i = 0; i < op_def.attr_size(); ++i) { - const auto& attr(op_def.attr(i)); + for (int i = 0; i < graph_op_def.attr_size(); ++i) { + const auto& attr(graph_op_def.attr(i)); + const auto& api_def_attr(api_def.attr(i)); if ((inferred_input_attrs.find(attr.name()) != inferred_input_attrs.end()) || - !attr.has_default_value()) { + !api_def_attr.has_default_value()) { continue; } const auto entry = AttrTypeName(attr.type()); const auto attr_type_name = entry.first; const bool use_const = entry.second; - const string camel_case_name = ToCamelCase(attr.name()); + const string camel_case_name = ToCamelCase(api_def_attr.rename_to()); const string suffix = (camel_case_name == op_name || camel_case_name == "Attrs") ? "_" : ""; const string attr_func_def = strings::StrCat( @@ -726,11 +754,11 @@ void OpInfo::GetOutput(string* out) const { strings::StrCat("if (!", scope_str, ".ok()) return;"); // No outputs. - if (op_def.output_arg_size() == 0) { + if (graph_op_def.output_arg_size() == 0) { strings::StrAppend(out, " this->operation = Operation(ret);\n return;\n"); return; } - if (op_def.output_arg_size() == 1) { + if (graph_op_def.output_arg_size() == 1) { // One output, no need for NameRangeMap if (is_list_output[0]) { strings::StrAppend(out, @@ -752,7 +780,7 @@ void OpInfo::GetOutput(string* out) const { ".UpdateStatus(_status_);\n", " return;\n"); strings::StrAppend(out, " }\n\n"); - for (int i = 0; i < op_def.output_arg_size(); ++i) { + for (int i = 0; i < graph_op_def.output_arg_size(); ++i) { const string arg_range = strings::StrCat( "_outputs_range[\"", graph_op_def.output_arg(i).name(), "\"]"); if (is_list_output[i]) { @@ -776,11 +804,13 @@ string OpInfo::GetConstructorBody() const { strings::StrAppend(&body, " ", return_on_error, "\n"); - for (int i = 0; i < op_def.input_arg_size(); ++i) { - const auto& arg(op_def.input_arg(i)); - strings::StrAppend(&body, " auto _", arg.name(), " = ::tensorflow::ops::", - ArgIsList(arg) ? "AsNodeOutList" : "AsNodeOut", "(", - scope_str, ", ", AvoidCPPKeywords(arg.name()), ");\n"); + for (int i = 0; i < graph_op_def.input_arg_size(); ++i) { + const auto& arg(graph_op_def.input_arg(i)); + const auto& api_def_arg(api_def.in_arg(i)); + strings::StrAppend( + &body, " auto _", api_def_arg.rename_to(), " = ::tensorflow::ops::", + ArgIsList(arg) ? "AsNodeOutList" : "AsNodeOut", "(", scope_str, ", ", + AvoidCPPKeywords(api_def_arg.rename_to()), ");\n"); strings::StrAppend(&body, " ", return_on_error, "\n"); } @@ -791,19 +821,21 @@ string OpInfo::GetConstructorBody() const { &body, " auto builder = ::tensorflow::NodeBuilder(unique_name, \"", graph_op_def.name(), "\")\n"); const string spaces = " "; - for (int i = 0; i < op_def.input_arg_size(); ++i) { - const auto& arg(op_def.input_arg(i)); - strings::StrAppend(&body, spaces, ".Input(_", arg.name(), ")\n"); + for (int i = 0; i < api_def.in_arg_size(); ++i) { + const auto& arg(api_def.in_arg(i)); + strings::StrAppend(&body, spaces, ".Input(_", arg.rename_to(), ")\n"); } - for (int i = 0; i < op_def.attr_size(); ++i) { + for (int i = 0; i < api_def.attr_size(); ++i) { const auto& graph_attr(graph_op_def.attr(i)); - const auto& attr(op_def.attr(i)); - if (inferred_input_attrs.find(attr.name()) != inferred_input_attrs.end()) { + const auto& api_def_attr(api_def.attr(i)); + if (inferred_input_attrs.find(api_def_attr.name()) != + inferred_input_attrs.end()) { continue; } - const string attr_name = attr.has_default_value() - ? strings::StrCat("attrs.", attr.name(), "_") - : AvoidCPPKeywords(attr.name()); + const string attr_name = + api_def_attr.has_default_value() + ? strings::StrCat("attrs.", api_def_attr.rename_to(), "_") + : AvoidCPPKeywords(api_def_attr.rename_to()); strings::StrAppend(&body, spaces, ".Attr(\"", graph_attr.name(), "\", ", attr_name, ")\n"); } @@ -845,10 +877,10 @@ void OpInfo::WriteClassDef(WritableFile* cc) const { TF_CHECK_OK(cc->Append(class_def)); } -void WriteCCOp(const OpDef& graph_op_def, const OpDef& interface_op_def, +void WriteCCOp(const OpDef& graph_op_def, const ApiDef& api_def, const std::vector& aliases, WritableFile* h, WritableFile* cc) { - OpInfo op_info(graph_op_def, interface_op_def, aliases); + OpInfo op_info(graph_op_def, api_def, aliases); op_info.WriteClassDecl(h); op_info.WriteClassDef(cc); @@ -943,8 +975,9 @@ string MakeInternal(const string& fname) { } // namespace -void WriteCCOps(const OpList& ops, const string& dot_h_fname, - const string& dot_cc_fname, const string& overrides_fnames) { +void WriteCCOps(const OpList& ops, const ApiDefMap& api_def_map, + const string& dot_h_fname, const string& dot_cc_fname, + const string& overrides_fnames) { Env* env = Env::Default(); // Load the override map. @@ -984,24 +1017,23 @@ void WriteCCOps(const OpList& ops, const string& dot_h_fname, // code depends on it. if (graph_op_def.name() == "Const") continue; - // Incorporate overrides from override_map. - OpDef interface_op_def = graph_op_def; - const OpGenOverride* op_override = - override_map.ApplyOverride(&interface_op_def); + const auto* api_def = api_def_map.GetApiDef(graph_op_def.name()); + std::vector aliases; - if (op_override) { - if (op_override->skip()) continue; - aliases.assign(op_override->alias().begin(), op_override->alias().end()); - if (op_override->hide()) { - // Write hidden ops to _internal.h and _internal.cc. - WriteCCOp(graph_op_def, interface_op_def, aliases, internal_h.get(), - internal_cc.get()); - continue; - } + if (api_def->visibility() == ApiDef::SKIP) continue; + // First endpoint is canonical, the rest are aliases. + for (int endpoint_i = 1; endpoint_i < api_def->endpoint_size(); + ++endpoint_i) { + aliases.push_back(api_def->endpoint(endpoint_i).name()); + } + if (api_def->visibility() == ApiDef::HIDDEN) { + // Write hidden ops to _internal.h and _internal.cc. + WriteCCOp(graph_op_def, *api_def, aliases, internal_h.get(), + internal_cc.get()); + continue; } - // This isn't a hidden op, write it to the main files. - WriteCCOp(graph_op_def, interface_op_def, aliases, h.get(), cc.get()); + WriteCCOp(graph_op_def, *api_def, aliases, h.get(), cc.get()); } FinishFiles(false, h.get(), cc.get(), op_header_guard); diff --git a/tensorflow/cc/framework/cc_op_gen.h b/tensorflow/cc/framework/cc_op_gen.h index fa5e004f03..cea2899014 100644 --- a/tensorflow/cc/framework/cc_op_gen.h +++ b/tensorflow/cc/framework/cc_op_gen.h @@ -17,13 +17,15 @@ limitations under the License. #define THIRD_PARTY_TENSORFLOW_CC_FRAMEWORK_CC_OP_GEN_H_ #include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/core/framework/op_gen_lib.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { /// Result is written to files dot_h and dot_cc. -void WriteCCOps(const OpList& ops, const string& dot_h_fname, - const string& dot_cc_fname, const string& overrides_fnames); +void WriteCCOps(const OpList& ops, const ApiDefMap& api_def_map, + const string& dot_h_fname, const string& dot_cc_fname, + const string& overrides_fnames); } // namespace tensorflow diff --git a/tensorflow/cc/framework/cc_op_gen_main.cc b/tensorflow/cc/framework/cc_op_gen_main.cc index 3b80cf993e..326d5668b8 100644 --- a/tensorflow/cc/framework/cc_op_gen_main.cc +++ b/tensorflow/cc/framework/cc_op_gen_main.cc @@ -16,7 +16,11 @@ limitations under the License. #include "tensorflow/cc/framework/cc_op_gen.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/core/framework/op_gen_lib.h" #include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/init_main.h" #include "tensorflow/core/platform/types.h" @@ -24,10 +28,28 @@ namespace tensorflow { namespace { void PrintAllCCOps(const std::string& dot_h, const std::string& dot_cc, - const std::string& overrides_fnames, bool include_internal) { + const std::string& overrides_fnames, bool include_internal, + const std::vector& api_def_dirs) { OpList ops; OpRegistry::Global()->Export(include_internal, &ops); - WriteCCOps(ops, dot_h, dot_cc, overrides_fnames); + ApiDefMap api_def_map(ops); + if (!api_def_dirs.empty()) { + Env* env = Env::Default(); + // Only load files that correspond to "ops". + for (const auto& op : ops.op()) { + for (const auto& api_def_dir : api_def_dirs) { + const std::string api_def_file_pattern = + io::JoinPath(api_def_dir, "api_def_" + op.name() + ".pbtxt"); + if (env->FileExists(api_def_file_pattern).ok()) { + TF_CHECK_OK(api_def_map.LoadFile(env, api_def_file_pattern)); + } + } + } + } + + api_def_map.UpdateDocs(); + + WriteCCOps(ops, api_def_map, dot_h, dot_cc, overrides_fnames); } } // namespace @@ -35,18 +57,24 @@ void PrintAllCCOps(const std::string& dot_h, const std::string& dot_cc, int main(int argc, char* argv[]) { tensorflow::port::InitMain(argv[0], &argc, &argv); - if (argc != 5) { + // TODO(annarev): Update this file to no longer take op_gen_overrides.pbtxt + // as an argument. + if (argc != 6) { for (int i = 1; i < argc; ++i) { fprintf(stderr, "Arg %d = %s\n", i, argv[i]); } fprintf(stderr, - "Usage: %s out.h out.cc overrides1.pbtxt,2.pbtxt include_internal\n" + "Usage: %s out.h out.cc overrides1.pbtxt,2.pbtxt include_internal " + "api_def_dirs1,api_def_dir2 ...\n" " include_internal: 1 means include internal ops\n", argv[0]); exit(1); } bool include_internal = tensorflow::StringPiece("1") == argv[4]; - tensorflow::PrintAllCCOps(argv[1], argv[2], argv[3], include_internal); + std::vector api_def_dirs = tensorflow::str_util::Split( + argv[5], ",", tensorflow::str_util::SkipEmpty()); + tensorflow::PrintAllCCOps(argv[1], argv[2], argv[3], include_internal, + api_def_dirs); return 0; } diff --git a/tensorflow/cc/framework/cc_op_gen_test.cc b/tensorflow/cc/framework/cc_op_gen_test.cc new file mode 100644 index 0000000000..0b7e720a5c --- /dev/null +++ b/tensorflow/cc/framework/cc_op_gen_test.cc @@ -0,0 +1,195 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/cc/framework/cc_op_gen.h" + +#include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/core/framework/op_gen_lib.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { + +// TODO(annarev): Remove this op_gen_overrides.pbtxt reference. +// It is needed only because WriteCCOps takes it as an argument. +constexpr char kOverridesFnames[] = + "tensorflow/cc/ops/op_gen_overrides.pbtxt"; +constexpr char kBaseOpDef[] = R"( +op { + name: "Foo" + input_arg { + name: "images" + description: "Images to process." + } + input_arg { + name: "dim" + description: "Description for dim." + type: DT_FLOAT + } + output_arg { + name: "output" + description: "Description for output." + type: DT_FLOAT + } + attr { + name: "T" + type: "type" + description: "Type for images" + allowed_values { + list { + type: DT_UINT8 + type: DT_INT8 + } + } + default_value { + i: 1 + } + } + summary: "Summary for op Foo." + description: "Description for op Foo." +} +)"; + +void ExpectHasSubstr(StringPiece s, StringPiece expected) { + EXPECT_TRUE(s.contains(expected)) + << "'" << s << "' does not contain '" << expected << "'"; +} + +void ExpectDoesNotHaveSubstr(StringPiece s, StringPiece expected) { + EXPECT_FALSE(s.contains(expected)) + << "'" << s << "' contains '" << expected << "'"; +} + +void ExpectSubstrOrder(const string& s, const string& before, + const string& after) { + int before_pos = s.find(before); + int after_pos = s.find(after); + ASSERT_NE(std::string::npos, before_pos); + ASSERT_NE(std::string::npos, after_pos); + EXPECT_LT(before_pos, after_pos) + << before << " is not before " << after << " in " << s; +} + +// Runs WriteCCOps and stores output in (internal_)cc_file_path and +// (internal_)h_file_path. +void GenerateCcOpFiles(Env* env, const OpList& ops, + const ApiDefMap& api_def_map, string* h_file_text, + string* internal_h_file_text) { + const string& tmpdir = testing::TmpDir(); + + const auto h_file_path = io::JoinPath(tmpdir, "test.h"); + const auto cc_file_path = io::JoinPath(tmpdir, "test.cc"); + const auto internal_h_file_path = io::JoinPath(tmpdir, "test_internal.h"); + const auto internal_cc_file_path = io::JoinPath(tmpdir, "test_internal.cc"); + + WriteCCOps(ops, api_def_map, h_file_path, cc_file_path, kOverridesFnames); + + TF_ASSERT_OK(ReadFileToString(env, h_file_path, h_file_text)); + TF_ASSERT_OK( + ReadFileToString(env, internal_h_file_path, internal_h_file_text)); +} + +TEST(CcOpGenTest, TestVisibilityChangedToHidden) { + const string api_def = R"( +op { + graph_op_name: "Foo" + visibility: HIDDEN +} +)"; + Env* env = Env::Default(); + OpList op_defs; + protobuf::TextFormat::ParseFromString(kBaseOpDef, &op_defs); // NOLINT + ApiDefMap api_def_map(op_defs); + + string h_file_text, internal_h_file_text; + // Without ApiDef + GenerateCcOpFiles(env, op_defs, api_def_map, &h_file_text, + &internal_h_file_text); + ExpectHasSubstr(h_file_text, "class Foo"); + ExpectDoesNotHaveSubstr(internal_h_file_text, "class Foo"); + + // With ApiDef + TF_ASSERT_OK(api_def_map.LoadApiDef(api_def)); + GenerateCcOpFiles(env, op_defs, api_def_map, &h_file_text, + &internal_h_file_text); + ExpectHasSubstr(internal_h_file_text, "class Foo"); + ExpectDoesNotHaveSubstr(h_file_text, "class Foo"); +} + +TEST(CcOpGenTest, TestArgNameChanges) { + const string api_def = R"( +op { + graph_op_name: "Foo" + arg_order: "dim" + arg_order: "images" +} +)"; + Env* env = Env::Default(); + OpList op_defs; + protobuf::TextFormat::ParseFromString(kBaseOpDef, &op_defs); // NOLINT + + ApiDefMap api_def_map(op_defs); + string cc_file_text, h_file_text; + string internal_cc_file_text, internal_h_file_text; + // Without ApiDef + GenerateCcOpFiles(env, op_defs, api_def_map, &h_file_text, + &internal_h_file_text); + ExpectSubstrOrder(h_file_text, "Input images", "Input dim"); + + // With ApiDef + TF_ASSERT_OK(api_def_map.LoadApiDef(api_def)); + GenerateCcOpFiles(env, op_defs, api_def_map, &h_file_text, + &internal_h_file_text); + ExpectSubstrOrder(h_file_text, "Input dim", "Input images"); +} + +TEST(CcOpGenTest, TestEndpoints) { + const string api_def = R"( +op { + graph_op_name: "Foo" + endpoint { + name: "Foo1" + } + endpoint { + name: "Foo2" + } +} +)"; + Env* env = Env::Default(); + OpList op_defs; + protobuf::TextFormat::ParseFromString(kBaseOpDef, &op_defs); // NOLINT + + ApiDefMap api_def_map(op_defs); + string cc_file_text, h_file_text; + string internal_cc_file_text, internal_h_file_text; + // Without ApiDef + GenerateCcOpFiles(env, op_defs, api_def_map, &h_file_text, + &internal_h_file_text); + ExpectHasSubstr(h_file_text, "class Foo {"); + ExpectDoesNotHaveSubstr(h_file_text, "class Foo1"); + ExpectDoesNotHaveSubstr(h_file_text, "class Foo2"); + + // With ApiDef + TF_ASSERT_OK(api_def_map.LoadApiDef(api_def)); + GenerateCcOpFiles(env, op_defs, api_def_map, &h_file_text, + &internal_h_file_text); + ExpectHasSubstr(h_file_text, "class Foo1"); + ExpectHasSubstr(h_file_text, "typedef Foo1 Foo2"); + ExpectDoesNotHaveSubstr(h_file_text, "class Foo {"); +} +} // namespace +} // namespace tensorflow diff --git a/tensorflow/contrib/cmake/tf_cc_ops.cmake b/tensorflow/contrib/cmake/tf_cc_ops.cmake index a5f5ae5478..45eeb11062 100644 --- a/tensorflow/contrib/cmake/tf_cc_ops.cmake +++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake @@ -83,7 +83,7 @@ foreach(tf_cc_op_lib_name ${tf_cc_op_lib_names}) ${cc_ops_target_dir}/${tf_cc_op_lib_name}.cc ${cc_ops_target_dir}/${tf_cc_op_lib_name}_internal.h ${cc_ops_target_dir}/${tf_cc_op_lib_name}_internal.cc - COMMAND ${tf_cc_op_lib_name}_gen_cc ${cc_ops_target_dir}/${tf_cc_op_lib_name}.h ${cc_ops_target_dir}/${tf_cc_op_lib_name}.cc ${tensorflow_source_dir}/tensorflow/cc/ops/op_gen_overrides.pbtxt ${cc_ops_include_internal} + COMMAND ${tf_cc_op_lib_name}_gen_cc ${cc_ops_target_dir}/${tf_cc_op_lib_name}.h ${cc_ops_target_dir}/${tf_cc_op_lib_name}.cc ${tensorflow_source_dir}/tensorflow/cc/ops/op_gen_overrides.pbtxt ${cc_ops_include_internal} ${tensorflow_source_dir}/tensorflow/core/api_def/base_api DEPENDS ${tf_cc_op_lib_name}_gen_cc create_cc_ops_header_dir ) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 01ddbfc2d4..ee14078496 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -3371,7 +3371,7 @@ tf_cc_test( filegroup( name = "base_api_def", - data = glob(["api_def/base_api/*"]), + srcs = glob(["api_def/base_api/*"]), ) filegroup( @@ -3386,10 +3386,6 @@ tf_cc_test( ":base_api_def", "//tensorflow/cc:ops/op_gen_overrides.pbtxt", ], - tags = [ - "manual", - "notap", - ], deps = [ ":framework", ":framework_internal", diff --git a/tensorflow/core/api_def/api_test.cc b/tensorflow/core/api_def/api_test.cc index f222d345ab..2cdc14843f 100644 --- a/tensorflow/core/api_def/api_test.cc +++ b/tensorflow/core/api_def/api_test.cc @@ -221,9 +221,18 @@ std::unordered_map GenerateApiDef( std::unordered_map api_defs_map; + // These ops are included in OpList only if TF_NEED_GCP + // is set to true. So, we skip them for now so that this test passes + // whether TF_NEED_GCP is set or not. + const std::unordered_set ops_to_exclude = { + "BigQueryReader", "GenerateBigQueryReaderPartitions"}; for (const auto& op : ops.op()) { CHECK(!op.name().empty()) << "Encountered empty op name: %s" << op.DebugString(); + if (ops_to_exclude.find(op.name()) != ops_to_exclude.end()) { + LOG(INFO) << "Skipping " << op.name(); + continue; + } string file_path = io::JoinPath(api_def_dir, kApiDefFileFormat); file_path = strings::Printf(file_path.c_str(), op.name().c_str()); ApiDef* api_def = api_defs_map[file_path].add_op(); diff --git a/tensorflow/core/api_def/base_api/api_def_ApplyAddSign.pbtxt b/tensorflow/core/api_def/base_api/api_def_ApplyAddSign.pbtxt new file mode 100644 index 0000000000..dd46095252 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ApplyAddSign.pbtxt @@ -0,0 +1,65 @@ +op { + graph_op_name: "ApplyAddSign" + in_arg { + name: "var" + description: <

    ::Type T; - DCHECK_NE(parent->dim_size(0), 0); - DCHECK_GE(index, 0); - if (element.NumElements() != (parent->NumElements() / parent->dim_size(0))) { - TensorShape chip_shape = parent->shape(); - chip_shape.RemoveDim(0); - return errors::Internal( - "HandleElementToSlice Cannot copy slice: number of elements does not " - "match. Shapes are: [element]: ", - element.shape().DebugString(), ", [parent slice]: ", - chip_shape.DebugString()); - } - auto parent_as_matrix = parent->flat_outer_dims(); - parent_as_matrix.chip(index, 0) = element.flat(); - return Status::OK(); -} - } // namespace QueueBase::QueueBase(int32 capacity, const DataTypeVector& component_dtypes, @@ -382,35 +364,10 @@ Status QueueBase::CopySliceToElement(const Tensor& parent, Tensor* element, parent.dtype()); } -// Static method +/* static */ Status QueueBase::CopyElementToSlice(const Tensor& element, Tensor* parent, int64 index) { -#define HANDLE_TYPE(DT) \ - if (element.dtype() == DT) { \ - TF_RETURN_IF_ERROR(HandleElementToSlice
    (element, parent, index)); \ - return Status::OK(); \ - } - HANDLE_TYPE(DT_FLOAT); - HANDLE_TYPE(DT_HALF); - HANDLE_TYPE(DT_DOUBLE); - HANDLE_TYPE(DT_INT32); - HANDLE_TYPE(DT_UINT8); - HANDLE_TYPE(DT_INT16); - HANDLE_TYPE(DT_INT8); - HANDLE_TYPE(DT_STRING); - HANDLE_TYPE(DT_COMPLEX64); - HANDLE_TYPE(DT_COMPLEX128); - HANDLE_TYPE(DT_INT64); - HANDLE_TYPE(DT_BOOL); - HANDLE_TYPE(DT_QINT8); - HANDLE_TYPE(DT_QUINT8); - HANDLE_TYPE(DT_QINT32); - HANDLE_TYPE(DT_QINT16); - HANDLE_TYPE(DT_QUINT16); - HANDLE_TYPE(DT_UINT16); -#undef HANDLE_TYPE - return errors::Unimplemented("CopyElementToSlice Unhandled data type: ", - element.dtype()); + return batch_util::CopyElementToSlice(element, parent, index); } } // namespace tensorflow diff --git a/tensorflow/core/kernels/queue_base.h b/tensorflow/core/kernels/queue_base.h index c101fb3579..5fb1c92f94 100644 --- a/tensorflow/core/kernels/queue_base.h +++ b/tensorflow/core/kernels/queue_base.h @@ -79,6 +79,9 @@ class QueueBase : public QueueInterface { int64 index); // Copies element into the index^th slice (in the first dimension) of parent. + // NOTE(mrry): This method is deprecated. Use + // `tensorflow::batch_util::CopySliceToElement()` defined in + // "./batch_util.h" instead. static Status CopyElementToSlice(const Tensor& element, Tensor* parent, int64 index); diff --git a/tensorflow/core/kernels/random_shuffle_queue_op.cc b/tensorflow/core/kernels/random_shuffle_queue_op.cc index 30bbbd4aed..7a40e9ddf2 100644 --- a/tensorflow/core/kernels/random_shuffle_queue_op.cc +++ b/tensorflow/core/kernels/random_shuffle_queue_op.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/batch_util.h" #include "tensorflow/core/kernels/queue_op.h" #include "tensorflow/core/kernels/typed_queue.h" #include "tensorflow/core/lib/core/errors.h" @@ -407,8 +408,8 @@ void RandomShuffleQueue::TryDequeueMany(int num_elements, OpKernelContext* ctx, const int index = attempt->tuple[0].dim_size(0) - attempt->elements_requested; for (int i = 0; i < num_components(); ++i) { - attempt->context->SetStatus(CopyElementToSlice( - tuple[i], &attempt->tuple[i], index)); + attempt->context->SetStatus(batch_util::CopyElementToSlice( + std::move(tuple[i]), &attempt->tuple[i], index)); if (!attempt->context->status().ok()) return kComplete; } tuple.clear(); -- GitLab From 46b383781f731ff3dab757e53278874780729167 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Nov 2017 16:16:55 -0800 Subject: [PATCH 0678/1801] Hlo parser: make the window attribute optional for other ops. We printed the window attribute only when it's not empty, regardless of the opcode. Before this change, window is only optional for the convolution op, but empty window could happen as well to the other ops (reduce-window, select-and-scatter). PiperOrigin-RevId: 176433808 --- .../compiler/xla/tools/parser/hlo_parser.cc | 10 ++++- .../xla/tools/parser/hlo_parser_test.cc | 45 +++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index 6f5c7b8d0f..a102bdc3aa 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -549,13 +549,16 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, case HloOpcode::kReduceWindow: { optional reduce_computation; optional window; - attrs["window"] = {/*required=*/true, AttrTy::kWindow, &window}; + attrs["window"] = {/*required=*/false, AttrTy::kWindow, &window}; attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation, &reduce_computation}; if (!ParseOperands(&operands, /*expected_size=*/2) || !ParseAttributes(attrs)) { return false; } + if (!window) { + window.emplace(); + } instruction = builder->AddInstruction(HloInstruction::CreateReduceWindow( shape, /*operand=*/operands[0], /*init_value=*/operands[1], *window, *reduce_computation)); @@ -647,11 +650,14 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, optional scatter; attrs["scatter"] = {/*required=*/true, AttrTy::kHloComputation, &scatter}; optional window; - attrs["window"] = {/*required=*/true, AttrTy::kWindow, &window}; + attrs["window"] = {/*required=*/false, AttrTy::kWindow, &window}; if (!ParseOperands(&operands, /*expected_size=*/3) || !ParseAttributes(attrs)) { return false; } + if (!window) { + window.emplace(); + } instruction = builder->AddInstruction(HloInstruction::CreateSelectAndScatter( shape, /*operand=*/operands[0], *select, *window, diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index b67b4b816d..e56f120def 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -310,6 +310,25 @@ ENTRY %R4UnitWindow.v3 (operand: f32[13,12,8,15]) -> f32[13,3,8,15] { ROOT %reduce-window = f32[13,3,8,15]{0,3,2,1} reduce-window(f32[13,12,8,15]{0,3,2,1} %operand, f32[] %constant), window={size=1x1x7x1 stride=1x4x1x1 pad=0_0x0_0x3_3x0_0}, to_apply=%add_F32.v3 } +)" +}, +// reduce window on scalar +{ +"ReduceWindowScalar", +R"(HloModule reduce_window_scalar: + +%add_F32.v3 (lhs: f32[], rhs: f32[]) -> f32[] { + %lhs = f32[] parameter(0) + %rhs = f32[] parameter(1) + ROOT %add = f32[] add(f32[] %lhs, f32[] %rhs) +} + +ENTRY %R4UnitWindowScalar () -> f32[] { + %constant = f32[] constant(42) + %constant.1 = f32[] constant(1) + ROOT %reduce-window = f32[] reduce-window(f32[] %constant, f32[] %constant.1), to_apply=%add_F32.v3 +} + )" }, // convolution @@ -426,6 +445,32 @@ ENTRY %R4F32OverlapSmall.v4 () -> f32[4,5,1,1] { ROOT %select-and-scatter = f32[4,5,1,1]{3,2,1,0} select-and-scatter(f32[4,5,1,1]{3,2,1,0} %constant, f32[2,2,1,1]{3,2,1,0} %constant.1, f32[] %constant.2), window={size=2x3x1x1 stride=2x2x1x1}, select=%ge_F32.v3, scatter=%add_F32.v3 } +)" +}, +// select and scatter on scalar +{ +"SelectAndScatterScalar", +R"(HloModule select_and_scatter_scalar: + +%ge_F32.v3 (lhs: f32[], rhs: f32[]) -> pred[] { + %lhs = f32[] parameter(0) + %rhs = f32[] parameter(1) + ROOT %greater-than-or-equal-to = pred[] greater-than-or-equal-to(f32[] %lhs, f32[] %rhs) +} + +%add_F32.v3 (lhs.1: f32[], rhs.1: f32[]) -> f32[] { + %lhs.1 = f32[] parameter(0) + %rhs.1 = f32[] parameter(1) + ROOT %add = f32[] add(f32[] %lhs.1, f32[] %rhs.1) +} + +ENTRY %SelectAndScatterScalar () -> f32[] { + %constant = f32[] constant(42) + %constant.1 = f32[] constant(1) + %constant.2 = f32[] constant(2) + ROOT %select-and-scatter = f32[] select-and-scatter(f32[] %constant, f32[] %constant.1, f32[] %constant.2), select=%ge_F32.v3, scatter=%add_F32.v3 +} + )" }, // slice -- GitLab From 138b00934a436b9207afde330731a49f2187ea9d Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Mon, 20 Nov 2017 16:18:31 -0800 Subject: [PATCH 0679/1801] Rename GPUTracer to DeviceTracer. PiperOrigin-RevId: 176434090 --- tensorflow/core/BUILD | 31 ++++----- .../core/common_runtime/direct_session.cc | 29 ++++----- tensorflow/core/debug/BUILD | 2 +- .../core/platform/default/build_config.bzl | 8 +-- .../{gpu_tracer.cc => device_tracer.cc} | 64 +++++++++---------- .../{gpu_tracer.h => device_tracer.h} | 32 +++++----- ...u_tracer_test.cc => device_tracer_test.cc} | 44 ++++++------- .../ci_build/windows/bazel/bazel_test_lib.sh | 2 +- 8 files changed, 104 insertions(+), 108 deletions(-) rename tensorflow/core/platform/default/{gpu_tracer.cc => device_tracer.cc} (93%) rename tensorflow/core/platform/{gpu_tracer.h => device_tracer.h} (69%) rename tensorflow/core/platform/{gpu_tracer_test.cc => device_tracer_test.cc} (84%) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index ee14078496..d71f314e11 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -127,9 +127,9 @@ load( "tf_additional_verbs_lib_defines", "tf_additional_mpi_lib_defines", "tf_additional_gdr_lib_defines", - "tf_additional_gpu_tracer_srcs", - "tf_additional_gpu_tracer_deps", - "tf_additional_gpu_tracer_cuda_deps", + "tf_additional_device_tracer_srcs", + "tf_additional_device_tracer_deps", + "tf_additional_device_tracer_cuda_deps", "tf_pyclif_proto_library", "tf_jspb_proto_library", "tf_nano_proto_library", @@ -1461,7 +1461,7 @@ cc_library( "lib/jpeg/**/*", "platform/**/env_time.cc", "platform/**/cuda_libdevice_path.cc", - "platform/**/gpu_tracer.cc", + "platform/**/device_tracer.cc", "platform/variant_coding.cc", "platform/**/variant_cord_coding.cc", ], @@ -1472,7 +1472,7 @@ cc_library( "platform/**/cuda_libdevice_path.cc", "platform/**/stream_executor.h", "platform/**/env_time.cc", - "platform/**/gpu_tracer.cc", + "platform/**/device_tracer.cc", "platform/variant_coding.cc", "platform/**/variant_cord_coding.cc", ] + @@ -2085,12 +2085,9 @@ tf_cuda_library( "util/env_var.h", ], copts = tf_copts(), - cuda_deps = [ - ":gpu_tracer", - ], - linkstatic = 1, deps = [ ":core_cpu_internal", + ":device_tracer", ":framework", ":lib", ":lib_internal", @@ -2122,18 +2119,18 @@ cc_library( ) tf_cuda_library( - name = "gpu_tracer", - srcs = tf_additional_gpu_tracer_srcs(), + name = "device_tracer", + srcs = tf_additional_device_tracer_srcs(), hdrs = [ - "platform/gpu_tracer.h", + "platform/device_tracer.h", ], copts = tf_copts(), - cuda_deps = tf_additional_cupti_wrapper_deps() + tf_additional_gpu_tracer_cuda_deps(), + cuda_deps = tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps(), deps = [ ":core_cpu_internal", ":lib", ":protos_all_cc", - ] + tf_additional_gpu_tracer_deps(), + ] + tf_additional_device_tracer_deps(), ) GPU_RUNTIME_HEADERS = [ @@ -3401,9 +3398,9 @@ tf_cc_test( ) tf_cc_test_gpu( - name = "gpu_tracer_test", + name = "device_tracer_test", size = "small", - srcs = ["platform/gpu_tracer_test.cc"], + srcs = ["platform/device_tracer_test.cc"], args = ["--heap_check=local"], linkstatic = tf_kernel_tests_linkstatic(), tags = tf_cuda_tests_tags() + ["nomac"], @@ -3411,12 +3408,12 @@ tf_cc_test_gpu( ":all_kernels", ":core_cpu", ":core_cpu_internal", + ":device_tracer", ":direct_session", ":direct_session_internal", ":framework", ":framework_internal", ":gpu_runtime", - ":gpu_tracer", ":lib", ":lib_internal", ":protos_all_cc", diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 2f57164dcd..6dfe17405c 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -54,15 +54,13 @@ limitations under the License. #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/platform/device_tracer.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/device_name_utils.h" #include "tensorflow/core/util/env_var.h" -#if GOOGLE_CUDA -#include "tensorflow/core/platform/gpu_tracer.h" -#endif // GOOGLE_CUDA namespace tensorflow { @@ -555,15 +553,19 @@ Status DirectSession::Run(const RunOptions& run_options, args.stats_collector = run_state.collector.get(); } -#if GOOGLE_CUDA - std::unique_ptr tracer; + std::unique_ptr tracer; if (run_options.trace_level() >= RunOptions::HARDWARE_TRACE) { - tracer = CreateGPUTracer(); - // tracer will be NULL on non-GPU platforms. - // TODO(b/32704451): Don't just ignore the ::tensorflow::Status object! - if (tracer) tracer->Start().IgnoreError(); + tracer = CreateDeviceTracer(); + // tracer may be NULL on platforms without accelerators. + if (tracer) { + Status s = tracer->Start(); + if (!s.ok()) { + run_state.executors_done.Notify(); + delete barrier; + return s; + } + } } -#endif // GOOGLE_CUDA // Register this step with session's cancellation manager, so that // `Session::Close()` will cancel the step. @@ -598,13 +600,10 @@ Status DirectSession::Run(const RunOptions& run_options, run_state.status.Update(errors::Cancelled("Run call was cancelled")); } -#if GOOGLE_CUDA if (tracer) { - // TODO(b/32704451): Don't just ignore the ::tensorflow::Status object! - tracer->Stop().IgnoreError(); - tracer->Collect(args.stats_collector).IgnoreError(); + TF_RETURN_IF_ERROR(tracer->Stop()); + TF_RETURN_IF_ERROR(tracer->Collect(args.stats_collector)); } -#endif // GOOGLE_CUDA { mutex_lock l(run_state.mu_); diff --git a/tensorflow/core/debug/BUILD b/tensorflow/core/debug/BUILD index 525f96a3de..6d796768de 100644 --- a/tensorflow/core/debug/BUILD +++ b/tensorflow/core/debug/BUILD @@ -89,9 +89,9 @@ tf_cuda_library( deps = [ ":debug", "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:device_tracer", "//tensorflow/core:direct_session_internal", "//tensorflow/core:framework", - "//tensorflow/core:gpu_tracer", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:proto_text", diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 5eeb861bdd..0f8cf8f122 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -399,13 +399,13 @@ def tf_env_time_srcs(): def tf_additional_cupti_wrapper_deps(): return ["//tensorflow/core/platform/default/gpu:cupti_wrapper"] -def tf_additional_gpu_tracer_srcs(): - return ["platform/default/gpu_tracer.cc"] +def tf_additional_device_tracer_srcs(): + return ["platform/default/device_tracer.cc"] -def tf_additional_gpu_tracer_cuda_deps(): +def tf_additional_device_tracer_cuda_deps(): return [] -def tf_additional_gpu_tracer_deps(): +def tf_additional_device_tracer_deps(): return [] def tf_additional_libdevice_data(): diff --git a/tensorflow/core/platform/default/gpu_tracer.cc b/tensorflow/core/platform/default/device_tracer.cc similarity index 93% rename from tensorflow/core/platform/default/gpu_tracer.cc rename to tensorflow/core/platform/default/device_tracer.cc index d6489f2f00..f4b0f16393 100644 --- a/tensorflow/core/platform/default/gpu_tracer.cc +++ b/tensorflow/core/platform/default/device_tracer.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/platform/gpu_tracer.h" +#include "tensorflow/core/platform/device_tracer.h" #if GOOGLE_CUDA @@ -101,7 +101,7 @@ const char *getActivityOverheadKindString(CUpti_ActivityOverheadKind kind) { } // namespace namespace tensorflow { -namespace gputracer { +namespace devicetracer { // Forward declaration. class CUPTIManager; @@ -286,14 +286,14 @@ CUPTIManager *GetCUPTIManager() { // for the duration of the CUPTI API callback. TF_STATIC_THREAD_LOCAL_POD(const char *, tls_current_annotation); -class GPUTracerImpl : public GPUTracer, - public CUPTIClient, - public port::Tracing::Engine { +class DeviceTracerImpl : public DeviceTracer, + public CUPTIClient, + public port::Tracing::Engine { public: - GPUTracerImpl(); - ~GPUTracerImpl() override; + DeviceTracerImpl(); + ~DeviceTracerImpl() override; - // GPUTracer interface: + // DeviceTracer interface: Status Start() override; Status Stop() override; Status Collect(StepStatsCollector *collector) override; @@ -348,7 +348,7 @@ class GPUTracerImpl : public GPUTracer, }; // This is the subscriber callback which is invoked directly by CUPTI. - // The 'userdata' argument will be a pointer to the active 'GPUTracerImpl'. + // The 'userdata' argument will be a pointer to the active 'DeviceTracerImpl'. static void CUPTIAPI ApiCallback(void *userdata, CUpti_CallbackDomain domain, CUpti_CallbackId cbid, const void *cbdata); @@ -375,28 +375,28 @@ class GPUTracerImpl : public GPUTracer, uint64_t start_timestamp_ GUARDED_BY(mu_); uint64_t end_timestamp_ GUARDED_BY(mu_); - TF_DISALLOW_COPY_AND_ASSIGN(GPUTracerImpl); + TF_DISALLOW_COPY_AND_ASSIGN(DeviceTracerImpl); }; -GPUTracerImpl::GPUTracerImpl() { - VLOG(1) << "GPUTracer created."; +DeviceTracerImpl::DeviceTracerImpl() { + VLOG(1) << "DeviceTracer created."; cupti_manager_ = GetCUPTIManager(); CHECK(cupti_manager_); cupti_wrapper_.reset(new perftools::gputools::profiler::CuptiWrapper()); enabled_ = false; } -GPUTracerImpl::~GPUTracerImpl() { +DeviceTracerImpl::~DeviceTracerImpl() { // Unregister the CUPTI callbacks if needed to prevent them from accessing // freed memory. Stop().IgnoreError(); } -Status GPUTracerImpl::Start() { - VLOG(1) << "GPUTracer::Start"; +Status DeviceTracerImpl::Start() { + VLOG(1) << "DeviceTracer::Start"; mutex_lock l(mu_); if (enabled_) { - return errors::FailedPrecondition("GPUTracer is already enabled."); + return errors::FailedPrecondition("DeviceTracer is already enabled."); } // There can only be one CUPTI subscriber. If we can't create one then // there is another trace in progress (possibly by external code). @@ -451,8 +451,8 @@ Status GPUTracerImpl::Start() { return Status::OK(); } -Status GPUTracerImpl::Stop() { - VLOG(1) << "GPUTracer::Stop"; +Status DeviceTracerImpl::Stop() { + VLOG(1) << "DeviceTracer::Stop"; mutex_lock l(mu_); if (!enabled_) { return Status::OK(); @@ -466,20 +466,20 @@ Status GPUTracerImpl::Stop() { return Status::OK(); } -void GPUTracerImpl::AddCorrelationId(uint32 correlation_id, - const string &name) { +void DeviceTracerImpl::AddCorrelationId(uint32 correlation_id, + const string &name) { VLOG(2) << correlation_id << " : " << name; mutex_lock l(trace_mu_); if (correlations_.size() >= kMaxRecords) return; correlations_.emplace(correlation_id, name); } -/*static*/ void GPUTracerImpl::ApiCallback(void *userdata, - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid, - const void *cbdata) { +/*static*/ void DeviceTracerImpl::ApiCallback(void *userdata, + CUpti_CallbackDomain domain, + CUpti_CallbackId cbid, + const void *cbdata) { auto *cbInfo = reinterpret_cast(cbdata); - GPUTracerImpl *tracer = reinterpret_cast(userdata); + DeviceTracerImpl *tracer = reinterpret_cast(userdata); VLOG(2) << "ApiCallback " << domain << ":" << cbid << " func: " << cbInfo->functionName; @@ -533,7 +533,7 @@ void GPUTracerImpl::AddCorrelationId(uint32 correlation_id, } } -void GPUTracerImpl::ActivityCallback(const CUpti_Activity &record) { +void DeviceTracerImpl::ActivityCallback(const CUpti_Activity &record) { VLOG(2) << "ActivityCallback " << record.kind; mutex_lock l(trace_mu_); switch (record.kind) { @@ -570,10 +570,10 @@ void GPUTracerImpl::ActivityCallback(const CUpti_Activity &record) { } } -Status GPUTracerImpl::Collect(StepStatsCollector *collector) { +Status DeviceTracerImpl::Collect(StepStatsCollector *collector) { mutex_lock l(mu_); if (enabled_) { - return errors::FailedPrecondition("GPUTracer is still enabled."); + return errors::FailedPrecondition("DeviceTracer is still enabled."); } // TODO(pbar) Handle device IDs and prefix properly. @@ -630,10 +630,10 @@ Status GPUTracerImpl::Collect(StepStatsCollector *collector) { return Status::OK(); } -} // namespace gputracer +} // namespace devicetracer -std::unique_ptr CreateGPUTracer() { - std::unique_ptr tracer(new gputracer::GPUTracerImpl()); +std::unique_ptr CreateDeviceTracer() { + std::unique_ptr tracer(new devicetracer::DeviceTracerImpl()); return tracer; } @@ -643,7 +643,7 @@ std::unique_ptr CreateGPUTracer() { namespace tensorflow { -std::unique_ptr CreateGPUTracer() { return nullptr; } +std::unique_ptr CreateDeviceTracer() { return nullptr; } } // namespace tensorflow diff --git a/tensorflow/core/platform/gpu_tracer.h b/tensorflow/core/platform/device_tracer.h similarity index 69% rename from tensorflow/core/platform/gpu_tracer.h rename to tensorflow/core/platform/device_tracer.h index 3373d974e3..d0f86a5103 100644 --- a/tensorflow/core/platform/gpu_tracer.h +++ b/tensorflow/core/platform/device_tracer.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_PLATFORM_GPU_TRACER_H_ -#define TENSORFLOW_CORE_PLATFORM_GPU_TRACER_H_ +#ifndef TENSORFLOW_CORE_PLATFORM_DEVICE_TRACER_H_ +#define TENSORFLOW_CORE_PLATFORM_DEVICE_TRACER_H_ #include @@ -24,16 +24,16 @@ namespace tensorflow { class StepStatsCollector; -// 'GPUTracer' is an interface for collecting low-level execution timings -// of GPU computation and DMA transfers. +// 'DeviceTracer' is an interface for collecting low-level execution timings +// of hardware accelerator (e.g. GPU) computation and DMA transfers. // // Typical usage pattern is as follows: // -// GPUTracer* tracer = CreateGPUTracer(); +// DeviceTracer* tracer = CreateDeviceTracer(); // if (tracer) { // tracer->Start(); // -// ... perform some GPU computations. +// ... perform some computations on a hardware accelerator. // // tracer->Stop(); // @@ -44,23 +44,23 @@ class StepStatsCollector; // // Notes: // Tracing is not supported on all plaforms. On platforms -// with no GPU tracing support, 'CreateGPUTracer' will return 'nullptr'. -// On most plaforms, GPU tracing will be a system-wide activity and -// a single 'GPUTracer' will collect activity from all GPUs. +// with no tracing support, 'CreateDeviceTracer' will return 'nullptr'. +// On most plaforms, hardware tracing will be a system-wide activity and +// a single 'DeviceTracer' will collect activity from all devices. // It is also common that only a single tracer may be active at any // given time. The 'Start' method will return an error if tracing is // already in progress elsewhere. // -class GPUTracer { +class DeviceTracer { public: - virtual ~GPUTracer() {} + virtual ~DeviceTracer() {} - // Start GPU tracing. + // Start device tracing. // Note that only a single trace can be active, in which case this // methods will return an 'Unavailable' error. virtual Status Start() = 0; - // Stop GPU tracing. + // Stop device tracing. // It is safe to call 'Stop' on a tracer which is not enabled. virtual Status Stop() = 0; @@ -70,10 +70,10 @@ class GPUTracer { virtual Status Collect(StepStatsCollector* collector) = 0; }; -// Creates a platform-specific GPUTracer. +// Creates a platform-specific DeviceTracer. // Returns 'nullptr' on platforms where tracing is not supported. -std::unique_ptr CreateGPUTracer(); +std::unique_ptr CreateDeviceTracer(); } // namespace tensorflow -#endif // TENSORFLOW_CORE_PLATFORM_GPU_TRACER_H_ +#endif // TENSORFLOW_CORE_PLATFORM_DEVICE_TRACER_H_ diff --git a/tensorflow/core/platform/gpu_tracer_test.cc b/tensorflow/core/platform/device_tracer_test.cc similarity index 84% rename from tensorflow/core/platform/gpu_tracer_test.cc rename to tensorflow/core/platform/device_tracer_test.cc index ce2985fd47..c0c08dabac 100644 --- a/tensorflow/core/platform/gpu_tracer_test.cc +++ b/tensorflow/core/platform/device_tracer_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/platform/gpu_tracer.h" +#include "tensorflow/core/platform/device_tracer.h" #include #include @@ -50,7 +50,7 @@ std::unique_ptr CreateSession() { return std::unique_ptr(NewSession(options)); } -class GPUTracerTest : public ::testing::Test { +class DeviceTracerTest : public ::testing::Test { public: void Initialize(std::initializer_list a_values) { Graph graph(OpRegistry::Global()); @@ -84,10 +84,10 @@ class GPUTracerTest : public ::testing::Test { protected: void ExpectFailure(const Status& status, error::Code code) { - EXPECT_FALSE(status.ok()); + EXPECT_FALSE(status.ok()) << status.ToString(); if (!status.ok()) { LOG(INFO) << "Status message: " << status.error_message(); - EXPECT_EQ(code, status.code()); + EXPECT_EQ(code, status.code()) << status.ToString(); } } @@ -97,22 +97,22 @@ class GPUTracerTest : public ::testing::Test { GraphDef def_; }; -TEST_F(GPUTracerTest, StartStop) { - std::unique_ptr tracer(CreateGPUTracer()); +TEST_F(DeviceTracerTest, StartStop) { + std::unique_ptr tracer(CreateDeviceTracer()); if (!tracer) return; TF_EXPECT_OK(tracer->Start()); TF_EXPECT_OK(tracer->Stop()); } -TEST_F(GPUTracerTest, StopBeforeStart) { - std::unique_ptr tracer(CreateGPUTracer()); +TEST_F(DeviceTracerTest, StopBeforeStart) { + std::unique_ptr tracer(CreateDeviceTracer()); if (!tracer) return; TF_EXPECT_OK(tracer->Stop()); TF_EXPECT_OK(tracer->Stop()); } -TEST_F(GPUTracerTest, CollectBeforeStart) { - std::unique_ptr tracer(CreateGPUTracer()); +TEST_F(DeviceTracerTest, CollectBeforeStart) { + std::unique_ptr tracer(CreateDeviceTracer()); if (!tracer) return; StepStats stats; StepStatsCollector collector(&stats); @@ -120,8 +120,8 @@ TEST_F(GPUTracerTest, CollectBeforeStart) { EXPECT_EQ(stats.dev_stats_size(), 0); } -TEST_F(GPUTracerTest, CollectBeforeStop) { - std::unique_ptr tracer(CreateGPUTracer()); +TEST_F(DeviceTracerTest, CollectBeforeStop) { + std::unique_ptr tracer(CreateDeviceTracer()); if (!tracer) return; TF_EXPECT_OK(tracer->Start()); StepStats stats; @@ -131,9 +131,9 @@ TEST_F(GPUTracerTest, CollectBeforeStop) { TF_EXPECT_OK(tracer->Stop()); } -TEST_F(GPUTracerTest, StartTwoTracers) { - std::unique_ptr tracer1(CreateGPUTracer()); - std::unique_ptr tracer2(CreateGPUTracer()); +TEST_F(DeviceTracerTest, StartTwoTracers) { + std::unique_ptr tracer1(CreateDeviceTracer()); + std::unique_ptr tracer2(CreateDeviceTracer()); if (!tracer1 || !tracer2) return; TF_EXPECT_OK(tracer1->Start()); @@ -144,9 +144,9 @@ TEST_F(GPUTracerTest, StartTwoTracers) { TF_EXPECT_OK(tracer2->Stop()); } -TEST_F(GPUTracerTest, RunWithTracer) { - // On non-GPU platforms, we may not support GPUTracer. - std::unique_ptr tracer(CreateGPUTracer()); +TEST_F(DeviceTracerTest, RunWithTracer) { + // On non-GPU platforms, we may not support DeviceTracer. + std::unique_ptr tracer(CreateDeviceTracer()); if (!tracer) return; Initialize({3, 2, -1, 0}); @@ -172,8 +172,8 @@ TEST_F(GPUTracerTest, RunWithTracer) { EXPECT_FLOAT_EQ(5.0, mat(0, 0)); } -TEST_F(GPUTracerTest, TraceToStepStatsCollector) { - std::unique_ptr tracer(CreateGPUTracer()); +TEST_F(DeviceTracerTest, TraceToStepStatsCollector) { + std::unique_ptr tracer(CreateDeviceTracer()); if (!tracer) return; Initialize({3, 2, -1, 0}); @@ -198,10 +198,10 @@ TEST_F(GPUTracerTest, TraceToStepStatsCollector) { collector.Finalize(); // Depending on whether this runs on CPU or GPU, we will have a // different number of devices. - EXPECT_GE(stats.dev_stats_size(), 1); + EXPECT_GE(stats.dev_stats_size(), 1) << "Saw stats: " << stats.DebugString(); } -TEST_F(GPUTracerTest, RunWithTraceOption) { +TEST_F(DeviceTracerTest, RunWithTraceOption) { Initialize({3, 2, -1, 0}); auto session = CreateSession(); ASSERT_TRUE(session != nullptr); diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh index 6a8b6417d6..924ab1a4ae 100644 --- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh +++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh @@ -88,7 +88,7 @@ extra_failing_gpu_cc_tests="\ //tensorflow/core:cuda_libdevice_path_test + \ //tensorflow/core:common_runtime_direct_session_test + \ //tensorflow/core:common_runtime_direct_session_with_tracking_alloc_test + \ - //tensorflow/core:gpu_tracer_test + \ + //tensorflow/core:device_tracer_test + \ //tensorflow/core:ops_math_grad_test \ " -- GitLab From 8fc41bbabcc041460cf3a123f0595ea0df6bc30f Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Mon, 20 Nov 2017 16:20:46 -0800 Subject: [PATCH 0680/1801] Fix potential use-after-move in `RecvOp`. Note that has not been a problem in practice because the callback would only be used-after-move in an error condition (`Rendezvous::ParseKey()` failing) that would only occur if a malformed graph was fed to a TensorFlow server. PiperOrigin-RevId: 176434460 --- tensorflow/core/kernels/sendrecv_ops.cc | 33 +++++++++++++++---------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/kernels/sendrecv_ops.cc b/tensorflow/core/kernels/sendrecv_ops.cc index 542382872c..206fd40fa6 100644 --- a/tensorflow/core/kernels/sendrecv_ops.cc +++ b/tensorflow/core/kernels/sendrecv_ops.cc @@ -142,17 +142,12 @@ RecvOp::RecvOp(OpKernelConstruction* ctx) : AsyncOpKernel(ctx) { } } -void RecvOp::ComputeAsync(OpKernelContext* ctx, DoneCallback done) { - OP_REQUIRES( - ctx, ctx->rendezvous() != nullptr, - errors::Internal("Op kernel context needs to provide a rendezvous.")); - - Rendezvous::Args args; - args.device_context = ctx->op_device_context(); - args.alloc_attrs = ctx->output_alloc_attr(0); +namespace { +Rendezvous::DoneCallback make_recv_callback(OpKernelContext* ctx, + AsyncOpKernel::DoneCallback done) { using namespace std::placeholders; - Rendezvous::DoneCallback done_cb = std::bind( - [ctx](DoneCallback done, + return std::bind( + [ctx](AsyncOpKernel::DoneCallback done, // Begin unbound arguments. const Status& s, const Rendezvous::Args& send_args, const Rendezvous::Args& recv_args, const Tensor& val, @@ -170,19 +165,31 @@ void RecvOp::ComputeAsync(OpKernelContext* ctx, DoneCallback done) { done(); }, std::move(done), _1, _2, _3, _4, _5); +} +} // namespace + +void RecvOp::ComputeAsync(OpKernelContext* ctx, DoneCallback done) { + OP_REQUIRES( + ctx, ctx->rendezvous() != nullptr, + errors::Internal("Op kernel context needs to provide a rendezvous.")); + + Rendezvous::Args args; + args.device_context = ctx->op_device_context(); + args.alloc_attrs = ctx->output_alloc_attr(0); FrameAndIter frame_iter = GetFrameAndIter(ctx, hostmem_sendrecv_); if (frame_iter == FrameAndIter(0, 0)) { VLOG(2) << "Recv " << parsed_key_.buf_; - ctx->rendezvous()->RecvAsync(parsed_key_, args, std::move(done_cb)); + ctx->rendezvous()->RecvAsync(parsed_key_, args, + make_recv_callback(ctx, std::move(done))); } else { Rendezvous::ParsedKey in_loop_parsed; GetRendezvousKey(key_prefix_, frame_iter, &in_loop_parsed.buf_); VLOG(2) << "Recv " << in_loop_parsed.buf_; OP_REQUIRES_OK_ASYNC( ctx, Rendezvous::ParseKey(in_loop_parsed.buf_, &in_loop_parsed), done); - - ctx->rendezvous()->RecvAsync(in_loop_parsed, args, std::move(done_cb)); + ctx->rendezvous()->RecvAsync(in_loop_parsed, args, + make_recv_callback(ctx, std::move(done))); } } -- GitLab From dad3670237d9943c7780c5daa3a171e6ca1bf959 Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Mon, 20 Nov 2017 16:34:19 -0800 Subject: [PATCH 0681/1801] [XLA:Doc] Correct parameter ordering for Clamp operation: operand should be in the middle of min and max to be consistent with the actual implementation. PiperOrigin-RevId: 176436283 --- tensorflow/docs_src/performance/xla/operation_semantics.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index ccced8792e..dfd9c12c89 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -75,14 +75,14 @@ Clamps an operand to within the range between a minimum and maximum value. | `computation` | `Computation` | computation of type `T_0, T_1, | : : : ..., T_N -> S` with N parameters : : : : of arbitrary type : -| `operand` | `ComputationDataHandle` | array of type T | | `min` | `ComputationDataHandle` | array of type T | +| `operand` | `ComputationDataHandle` | array of type T | | `max` | `ComputationDataHandle` | array of type T | Given an operand and minimum and maximum values, returns the operand if it is in the range between the minimum and maximum, else returns the minimum value if the operand is below this range or the maximum value if the operand is above this -range. That is, `clamp(x, a, b) = max(min(x, a), b)`. +range. That is, `clamp(a, x, b) = max(min(a, x), b)`. All three arrays must be the same shape. Alternately, as a restricted form of [broadcasting](broadcasting.md), `min` and/or `max` can be a scalar of type `T`. @@ -94,7 +94,7 @@ let operand: s32[3] = {-1, 5, 9}; let min: s32 = 0; let max: s32 = 6; ==> -Clamp(operand, min, max) = s32[3]{0, 5, 6}; +Clamp(min, operand, max) = s32[3]{0, 5, 6}; ``` ## Collapse -- GitLab From 3b7ff59bf10680a1520272bd3a738bd7c741f61f Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Mon, 20 Nov 2017 16:38:28 -0800 Subject: [PATCH 0682/1801] Improving documentation of `tf.data.Dataset.repeat`. PiperOrigin-RevId: 176436826 --- tensorflow/python/data/ops/dataset_ops.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index d434c8e522..d6efb7fa9a 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -549,11 +549,14 @@ class Dataset(object): def repeat(self, count=None): """Repeats this dataset `count` times. + NOTE: If this dataset is a function of global state (e.g. a random number + generator), then different repetitions may produce different elements. + Args: count: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the - number of times the elements of this dataset should be repeated. The - default behavior (if `count` is `None` or `-1`) is for the elements to - be repeated indefinitely. + number of times the dataset should be repeated. The default behavior + (if `count` is `None` or `-1`) is for the dataset be repeated + indefinitely. Returns: A `Dataset`. -- GitLab From 76195b6a84d89e5648a6911a5c3577a9e2bf0ce5 Mon Sep 17 00:00:00 2001 From: Sergio Guadarrama Date: Mon, 20 Nov 2017 16:56:12 -0800 Subject: [PATCH 0683/1801] Allow test_util to eval callables in Eager mode. PiperOrigin-RevId: 176438865 --- tensorflow/python/framework/test_util.py | 2 ++ tensorflow/python/framework/test_util_test.py | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 6e3a35af3c..cfa5fe5e3e 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -730,6 +730,8 @@ class TensorFlowTestCase(googletest.TestCase): return dict() elif tensors is None: return None + elif callable(tensors): + return self._eval_helper(tensors()) else: raise ValueError("Unsupported type %s." % type(tensors)) diff --git a/tensorflow/python/framework/test_util_test.py b/tensorflow/python/framework/test_util_test.py index 3ea28e6334..9aed3457a6 100644 --- a/tensorflow/python/framework/test_util_test.py +++ b/tensorflow/python/framework/test_util_test.py @@ -330,6 +330,15 @@ class TestUtilTest(test_util.TensorFlowTestCase): self.assertEqual(a_np_rand, b_np_rand) self.assertEqual(a_rand, b_rand) + @test_util.run_in_graph_and_eager_modes() + def test_callable_evaluate(self): + def model(): + return resource_variable_ops.ResourceVariable( + name="same_name", + initial_value=1) + 1 + with context.eager_mode(): + self.assertEqual(2, self.evaluate(model)) + class GarbageCollectionTest(test_util.TensorFlowTestCase): @@ -421,6 +430,5 @@ class IsolationTest(test_util.TensorFlowTestCase): with self.assertRaises(ValueError): first_container_variable.read_value() - if __name__ == "__main__": googletest.main() -- GitLab From 60a0666a40767baea6b21e57714a553c217cad8f Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Mon, 20 Nov 2017 16:59:00 -0800 Subject: [PATCH 0684/1801] [tf.data] Add two simple microbenchmarks for `Dataset.map()` performance. PiperOrigin-RevId: 176439139 --- .../kernel_tests/map_dataset_op_test.py | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/tensorflow/python/kernel_tests/map_dataset_op_test.py b/tensorflow/python/kernel_tests/map_dataset_op_test.py index c6c36d133c..51f43bfd89 100644 --- a/tensorflow/python/kernel_tests/map_dataset_op_test.py +++ b/tensorflow/python/kernel_tests/map_dataset_op_test.py @@ -19,13 +19,16 @@ from __future__ import print_function from collections import namedtuple import threading +import time import numpy as np +from tensorflow.python.client import session from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import data_flow_ops @@ -595,5 +598,64 @@ class MapDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + +class MapDatasetBenchmark(test.Benchmark): + + def benchmarkChainOfMaps(self): + chain_lengths = [0, 1, 2, 5, 10, 20, 50] + for chain_length in chain_lengths: + with ops.Graph().as_default(): + dataset = dataset_ops.Dataset.from_tensors(0).repeat(None) + for _ in range(chain_length): + dataset = dataset.map(lambda x: x) + iterator = dataset.make_one_shot_iterator() + next_element = iterator.get_next() + + with session.Session() as sess: + for _ in range(5): + sess.run(next_element.op) + deltas = [] + for _ in range(100): + start = time.time() + for _ in range(100): + sess.run(next_element.op) + end = time.time() + deltas.append(end - start) + + median_wall_time = np.median(deltas) / 100 + print("Map dataset chain length: %d Median wall time: %f" + % (chain_length, median_wall_time)) + self.report_benchmark( + iters=1000, wall_time=median_wall_time, + name="benchmark_map_dataset_chain_latency_%d" % chain_length) + + def benchmarkMapFanOut(self): + fan_outs = [1, 2, 5, 10, 20, 50, 100] + for fan_out in fan_outs: + with ops.Graph().as_default(): + dataset = dataset_ops.Dataset.from_tensors( + tuple(0 for _ in range(fan_out))).repeat(None).map(lambda *xs: xs) + iterator = dataset.make_one_shot_iterator() + next_element = iterator.get_next() + + with session.Session() as sess: + for _ in range(5): + sess.run(next_element[0].op) + deltas = [] + for _ in range(100): + start = time.time() + for _ in range(100): + sess.run(next_element[0].op) + end = time.time() + deltas.append(end - start) + + median_wall_time = np.median(deltas) / 100 + print("Map dataset fan out: %d Median wall time: %f" + % (fan_out, median_wall_time)) + self.report_benchmark( + iters=1000, wall_time=median_wall_time, + name="benchmark_map_dataset_fan_out_%d" % fan_out) + + if __name__ == "__main__": test.main() -- GitLab From 385da92355a1b24d081843d4ffb509fd0a7a983e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Nov 2017 17:02:25 -0800 Subject: [PATCH 0685/1801] [TF:XLA] Make two test cases to run only with NDEBUG defined. PiperOrigin-RevId: 176439578 --- tensorflow/compiler/xla/tests/BUILD | 1 - tensorflow/compiler/xla/tests/params_test.cc | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index f3885e9021..c64d5aca4f 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -382,7 +382,6 @@ xla_test( name = "params_test", srcs = ["params_test.cc"], shard_count = 30, - tags = ["optonly"], deps = [ "//tensorflow/compiler/xla:array2d", "//tensorflow/compiler/xla:literal_util", diff --git a/tensorflow/compiler/xla/tests/params_test.cc b/tensorflow/compiler/xla/tests/params_test.cc index fda4389f47..b7f62b8aa1 100644 --- a/tensorflow/compiler/xla/tests/params_test.cc +++ b/tensorflow/compiler/xla/tests/params_test.cc @@ -252,8 +252,8 @@ XLA_TEST_F(ParamsTest, HundredLargeR1Parameters) { } // Only run the 3,000-parameter tests in opt mode to avoid test timeouts. -// Timeout last observed on 2017-09-12. -#ifndef NDEBUG +// Timeout last observed on 2017-11-20. +#ifdef NDEBUG // TODO(b/65525254) Fails on GPU on 2017-09-10 because we try to reserve too // much space in parameter memory for the kernel. -- GitLab From fd92829df41984de014fd5f6807ad061fa45090a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Nov 2017 17:27:35 -0800 Subject: [PATCH 0686/1801] [tf.data] Remove redundant else statement. PiperOrigin-RevId: 176442381 --- tensorflow/core/kernels/zip_dataset_op.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/core/kernels/zip_dataset_op.cc b/tensorflow/core/kernels/zip_dataset_op.cc index 96080863ea..9381915ae9 100644 --- a/tensorflow/core/kernels/zip_dataset_op.cc +++ b/tensorflow/core/kernels/zip_dataset_op.cc @@ -128,8 +128,6 @@ class ZipDatasetOp : public DatasetOpKernel { if (*end_of_sequence) { out_tensors->clear(); input_impls_.clear(); - } else { - *end_of_sequence = false; } return Status::OK(); } -- GitLab From 55672b52559c73b5bf12c4827277959bda765e59 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 20 Nov 2017 17:34:19 -0800 Subject: [PATCH 0687/1801] TFE_ContextAddFunction to interface with the TFE_Function* API PiperOrigin-RevId: 176443014 --- tensorflow/c/eager/c_api.cc | 6 ++++ tensorflow/c/eager/c_api.h | 7 ++++ tensorflow/c/eager/c_api_test.cc | 60 ++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 8359de62b7..706c89536d 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -571,6 +571,12 @@ void TFE_ContextAddFunctionDef(TFE_Context* ctx, status->status = ctx->func_lib_def.AddFunctionDef(function_def); } +void TFE_ContextAddFunction(TFE_Context* ctx, TF_Function* function, + TF_Status* status) { + tensorflow::mutex_lock l(ctx->functions_mu); + status->status = ctx->func_lib_def.AddFunctionDef(function->fdef); +} + } // extern "C" TFE_TensorHandle* TFE_NewTensorHandle(const tensorflow::Tensor& t) { diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index 865580c5f3..ca105962df 100644 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -200,6 +200,13 @@ TF_CAPI_EXPORT extern void TFE_ContextAddFunctionDef(TFE_Context* ctx, const char* serialized_function_def, size_t size, TF_Status* status); +// Adds a function (created from TF_GraphToFunction or +// TF_FunctionImportFunctionDef) to the context, allowing it to be executed with +// TFE_Execute by creating an op with the same name as the function. +TF_CAPI_EXPORT extern void TFE_ContextAddFunction(TFE_Context* ctx, + TF_Function* function, + TF_Status* status); + #ifdef __cplusplus } /* end extern "C" */ #endif diff --git a/tensorflow/c/eager/c_api_test.cc b/tensorflow/c/eager/c_api_test.cc index 4af91b8853..03843fa913 100644 --- a/tensorflow/c/eager/c_api_test.cc +++ b/tensorflow/c/eager/c_api_test.cc @@ -295,6 +295,66 @@ TEST(CAPI, Execute) { TF_DeleteStatus(status); } +TEST(CAPI, Function) { + // First create a simple identity function. + TF_Graph* function_graph = TF_NewGraph(); + TF_OperationDescription* arg_descr = + TF_NewOperation(function_graph, "Placeholder", "arg"); + TF_SetAttrType(arg_descr, "dtype", TF_INT32); + TF_Status* status = TF_NewStatus(); + TF_Operation* arg = TF_FinishOperation(arg_descr, status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + TF_OperationDescription* id_descr = + TF_NewOperation(function_graph, "Identity", "id"); + TF_SetAttrType(id_descr, "T", TF_INT32); + TF_AddInput(id_descr, {arg, 0}); + TF_Operation* id = TF_FinishOperation(id_descr, status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + TF_Output input{arg, 0}; + TF_Output output{id, 0}; + TF_Function* fn = + TF_GraphToFunction(function_graph, "ident", 0, 1, &id, 1, &input, 1, + &output, nullptr, nullptr, "test", status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + TF_DeleteGraph(function_graph); + TFE_ContextOptions* opts = TFE_NewContextOptions(); + TFE_Context* ctx = TFE_NewContext(opts, status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + TFE_DeleteContextOptions(opts); + TFE_ContextAddFunction(ctx, fn, status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + TF_DeleteFunction(fn); + + TF_Tensor* t = TF_AllocateTensor(TF_INT32, nullptr, 0, 1); + *reinterpret_cast(TF_TensorData(t)) = 42; + TFE_TensorHandle* h = TFE_NewTensorHandle(t, status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + TF_DeleteTensor(t); + + TFE_Op* op = TFE_NewOp(ctx, "ident", status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + TFE_OpAddInput(op, h, status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + + std::vector result; + result.push_back(nullptr); + int num_retvals = 1; + TFE_Execute(op, result.data(), &num_retvals, status); + TFE_DeleteOp(op); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + ASSERT_EQ(num_retvals, 1); + + TF_Tensor* r = TFE_TensorHandleResolve(result[0], status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + EXPECT_EQ(*reinterpret_cast(TF_TensorData(r)), 42); + TFE_DeleteTensorHandle(h); + TF_DeleteTensor(r); + TFE_DeleteTensorHandle(result[0]); + TFE_DeleteContext(ctx, status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + TF_DeleteStatus(status); +} + string MatMulFunction() { tensorflow::FunctionDef def; CHECK(tensorflow::protobuf::TextFormat::ParseFromString( -- GitLab From c1885d49cb83801b6e8ca60c7ed9289f9f2ae13d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Nov 2017 17:40:21 -0800 Subject: [PATCH 0688/1801] removing unused code PiperOrigin-RevId: 176443600 --- tensorflow/contrib/boosted_trees/BUILD | 6 +- .../kernels/split_handler_ops.cc | 2 +- tensorflow/contrib/boosted_trees/lib/BUILD | 42 ----- .../handlers/bias-feature-column-handler.cc | 59 ------ .../handlers/bias-feature-column-handler.h | 57 ------ .../bias-feature-column-handler_test.cc | 135 -------------- .../categorical-feature-column-handler.cc | 140 -------------- .../categorical-feature-column-handler.h | 64 ------- ...categorical-feature-column-handler_test.cc | 165 ----------------- .../dense-quantized-feature-column-handler.cc | 116 ------------ .../dense-quantized-feature-column-handler.h | 62 ------- ...e-quantized-feature-column-handler_test.cc | 155 ---------------- .../handlers/feature-column-handler.h | 83 --------- ...sparse-quantized-feature-column-handler.cc | 172 ------------------ .../sparse-quantized-feature-column-handler.h | 67 ------- ...e-quantized-feature-column-handler_test.cc | 162 ----------------- 16 files changed, 5 insertions(+), 1482 deletions(-) delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.cc delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.h delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler_test.cc delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.cc delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.h delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler_test.cc delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.cc delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.h delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler_test.cc delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/feature-column-handler.h delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.cc delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.h delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler_test.cc diff --git a/tensorflow/contrib/boosted_trees/BUILD b/tensorflow/contrib/boosted_trees/BUILD index 66a04d42e9..7072f56420 100644 --- a/tensorflow/contrib/boosted_trees/BUILD +++ b/tensorflow/contrib/boosted_trees/BUILD @@ -359,8 +359,8 @@ tf_custom_op_library( ], deps = [ "//tensorflow/contrib/boosted_trees/lib:example_partitioner", - "//tensorflow/contrib/boosted_trees/lib:feature-column-handlers", "//tensorflow/contrib/boosted_trees/lib:models", + "//tensorflow/contrib/boosted_trees/lib:node-stats", "//tensorflow/contrib/boosted_trees/lib:utils", "//tensorflow/contrib/boosted_trees/lib:weighted_quantiles", "//tensorflow/contrib/boosted_trees/proto:learner_proto_cc", @@ -404,10 +404,12 @@ tf_kernel_library( name = "split_handler_ops_kernels", srcs = ["kernels/split_handler_ops.cc"], deps = [ - "//tensorflow/contrib/boosted_trees/lib:feature-column-handlers", + "//tensorflow/contrib/boosted_trees/lib:node-stats", "//tensorflow/contrib/boosted_trees/proto:split_info_proto_cc", "//tensorflow/contrib/boosted_trees/proto:tree_config_proto_cc", "//tensorflow/core:framework_headers_lib", + "//tensorflow/core:protos_all_cc", + "//third_party/eigen3", ], alwayslink = 1, ) diff --git a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc index 5c31980359..a5de1340b9 100644 --- a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc @@ -16,7 +16,7 @@ #include #include -#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/feature-column-handler.h" +#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats.h" #include "tensorflow/contrib/boosted_trees/proto/split_info.pb.h" #include "tensorflow/contrib/boosted_trees/proto/tree_config.pb.h" #include "tensorflow/core/framework/device_base.h" diff --git a/tensorflow/contrib/boosted_trees/lib/BUILD b/tensorflow/contrib/boosted_trees/lib/BUILD index 107ff0d295..af389849b4 100644 --- a/tensorflow/contrib/boosted_trees/lib/BUILD +++ b/tensorflow/contrib/boosted_trees/lib/BUILD @@ -406,48 +406,6 @@ tf_cc_test( ) # Learner/stochastic - -cc_library( - name = "feature-column-handlers", - srcs = [ - "learner/stochastic/handlers/bias-feature-column-handler.cc", - "learner/stochastic/handlers/categorical-feature-column-handler.cc", - "learner/stochastic/handlers/dense-quantized-feature-column-handler.cc", - "learner/stochastic/handlers/sparse-quantized-feature-column-handler.cc", - ], - hdrs = [ - "learner/stochastic/handlers/bias-feature-column-handler.h", - "learner/stochastic/handlers/categorical-feature-column-handler.h", - "learner/stochastic/handlers/dense-quantized-feature-column-handler.h", - "learner/stochastic/handlers/feature-column-handler.h", - "learner/stochastic/handlers/sparse-quantized-feature-column-handler.h", - ], - deps = [ - ":feature-split-candidate", - ":feature-stats-accumulator", - "//tensorflow/contrib/boosted_trees/proto:learner_proto_cc", - "//tensorflow/core:framework_headers_lib", - "//tensorflow/core:protos_all_cc", - ], -) - -tf_cc_test( - name = "feature-column-handlers_test", - size = "small", - srcs = [ - "learner/stochastic/handlers/bias-feature-column-handler_test.cc", - "learner/stochastic/handlers/categorical-feature-column-handler_test.cc", - "learner/stochastic/handlers/dense-quantized-feature-column-handler_test.cc", - "learner/stochastic/handlers/sparse-quantized-feature-column-handler_test.cc", - ], - deps = [ - ":feature-column-handlers", - "//tensorflow/core:tensor_testutil", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - ], -) - cc_library( name = "gradient-stats", hdrs = ["learner/stochastic/stats/gradient-stats.h"], diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.cc deleted file mode 100644 index b880cf2c47..0000000000 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.cc +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2017 The TensorFlow Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// ============================================================================= -#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.h" - -namespace tensorflow { -namespace boosted_trees { -namespace learner { -namespace stochastic { - -void BiasFeatureColumnHandler::AggregateGradientStats( - const std::vector& example_partition_ids, - const Tensor& example_first_order_gradients, - const Tensor& example_second_order_gradients, - FeatureStatsAccumulator* - gradient_stats_accumulator) const { - // Pass over all examples and aggregate gradient stats for each sub-root. - for (int64 example_idx = 0; example_idx < batch_size_; ++example_idx) { - auto partition_id = example_partition_ids[example_idx]; - gradient_stats_accumulator->AddStats( - slot_id_, class_id_, partition_id, kBiasFeatureId, - GradientStats(example_first_order_gradients, - example_second_order_gradients, example_idx)); - } -} - -void BiasFeatureColumnHandler::GenerateFeatureSplitCandidates( - const LearnerConfig& learner_config, const std::vector& roots, - const std::vector& root_stats, - const FeatureStatsAccumulator& - gradient_stats_accumulator, - std::vector* split_candidates) const { - split_candidates->clear(); - split_candidates->reserve(roots.size()); - boosted_trees::trees::TreeNode tree_node; - for (size_t root_idx = 0; root_idx < roots.size(); ++root_idx) { - const NodeStats& root_node_stats = root_stats[root_idx]; - tree_node.Clear(); - root_node_stats.FillLeaf(class_id_, tree_node.mutable_leaf()); - split_candidates->emplace_back(slot_id_, tree_node, - SplitStats(learner_config, root_node_stats)); - } -} - -} // namespace stochastic -} // namespace learner -} // namespace boosted_trees -} // namespace tensorflow diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.h b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.h deleted file mode 100644 index 5c0f99185a..0000000000 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.h +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2017 The TensorFlow Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// ============================================================================= -#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_H_ // NOLINT -#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_H_ // NOLINT - -#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/feature-column-handler.h" - -namespace tensorflow { -namespace boosted_trees { -namespace learner { -namespace stochastic { - -// Handler for a bias feature column in the single class case. -// This handler is useful even if we don't introduce a bias feature because -// it allows us to aggregate stats per partition which in turn allows us -// to compute node stats for each root to split. -class BiasFeatureColumnHandler : public FeatureColumnHandler { - public: - BiasFeatureColumnHandler(const uint32 class_id, const uint32 slot_id, - const int64 batch_size) - : FeatureColumnHandler(class_id, slot_id, batch_size) {} - - void AggregateGradientStats( - const std::vector& example_partition_ids, - const Tensor& example_first_order_gradients, - const Tensor& example_second_order_gradients, - FeatureStatsAccumulator* - gradient_stats_accumulator) const override; - - void GenerateFeatureSplitCandidates( - const LearnerConfig& learner_config, const std::vector& roots, - const std::vector& root_stats, - const FeatureStatsAccumulator& - gradient_stats_accumulator, - std::vector* split_candidates) const override; - - static constexpr auto kBiasFeatureId = 0; -}; - -} // namespace stochastic -} // namespace learner -} // namespace boosted_trees -} // namespace tensorflow - -#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_H_ // NOLINT diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler_test.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler_test.cc deleted file mode 100644 index f4c7df7fab..0000000000 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler_test.cc +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright 2017 The TensorFlow Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// ============================================================================= -#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.h" - -#include "tensorflow/core/framework/tensor_testutil.h" -#include "tensorflow/core/platform/test.h" - -namespace tensorflow { -namespace boosted_trees { -namespace learner { -namespace stochastic { -namespace { - -using boosted_trees::learner::LearnerConfig; - -const auto kClassId = 7; -const auto kSlotId = 0; -const auto kBatchSize = 4; - -using FeatureStatsAccumulator = - FeatureStatsAccumulator; - -class BiasFeatureColumnHandlerTest : public ::testing::Test { - protected: - BiasFeatureColumnHandlerTest() - : example_first_order_gradients_( - test::AsTensor({0.2f, -0.5f, 1.2f, 4.0f}, {4})), - example_second_order_gradients_( - test::AsTensor({0.12f, 0.07f, 0.2f, 0.13f}, {4})), - example_partitions_({0, 0, 1, 3}) { - // Set L2 regularization. - learner_config_.mutable_regularization()->set_l2(2.0f); - learner_config_.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); - - // Create handler. - handler_.reset(new BiasFeatureColumnHandler(kClassId, kSlotId, kBatchSize)); - } - - LearnerConfig learner_config_; - const Tensor example_first_order_gradients_; - const Tensor example_second_order_gradients_; - const std::vector example_partitions_; - std::unique_ptr handler_; -}; - -TEST_F(BiasFeatureColumnHandlerTest, AggregateGradientStats) { - // Create handler. - FeatureStatsAccumulator accumulator(1); - handler_->AggregateGradientStats( - example_partitions_, example_first_order_gradients_, - example_second_order_gradients_, &accumulator); - - // Check stats for each partition. - // Partition 0. - EXPECT_GRADIENT_STATS_EQ( - GradientStats(-0.3f, 0.19f), - accumulator.GetStats(kSlotId, kClassId, 0, - BiasFeatureColumnHandler::kBiasFeatureId)); - // Partition 1. - EXPECT_GRADIENT_STATS_EQ( - GradientStats(1.2f, 0.2f), - accumulator.GetStats(kSlotId, kClassId, 1, - BiasFeatureColumnHandler::kBiasFeatureId)); - // Partition 2. - EXPECT_GRADIENT_STATS_EQ( - GradientStats(0.0f, 0.0f), - accumulator.GetStats(kSlotId, kClassId, 2, - BiasFeatureColumnHandler::kBiasFeatureId)); - // Partition 3. - EXPECT_GRADIENT_STATS_EQ( - GradientStats(4.0f, 0.13f), - accumulator.GetStats(kSlotId, kClassId, 3, - BiasFeatureColumnHandler::kBiasFeatureId)); -} - -TEST_F(BiasFeatureColumnHandlerTest, GenerateFeatureSplitCandidates) { - // Create handler. - FeatureStatsAccumulator accumulator(1); - handler_->AggregateGradientStats( - example_partitions_, example_first_order_gradients_, - example_second_order_gradients_, &accumulator); - - // Get feature split candidates for two roots 0 and 3. - // Root 0 has zero gain and root 3 has the same gain as the leaf. - const std::vector roots = {0, 3}; - const std::vector& root_stats = { - NodeStats(1), NodeStats(learner_config_, GradientStats(4.0f, 0.13f))}; - std::vector split_candidates; - handler_->GenerateFeatureSplitCandidates(learner_config_, roots, root_stats, - accumulator, &split_candidates); - // Expect two candidate splits (one per root). - EXPECT_EQ(2, split_candidates.size()); - - // Verify first candidate for root 0, gain is expected to be the same as - // the left child since the root node gain is zero. - const SplitStats expected_split_stats0(learner_config_, root_stats[0]); - EXPECT_SPLIT_STATS_EQ(expected_split_stats0, split_candidates[0].split_stats); - const auto& tree_node0 = split_candidates[0].tree_node; - EXPECT_EQ(boosted_trees::trees::TreeNode::kLeaf, tree_node0.node_case()); - EXPECT_EQ(1, tree_node0.leaf().sparse_vector().index_size()); - EXPECT_EQ(kClassId, tree_node0.leaf().sparse_vector().index(0)); - EXPECT_EQ(1, tree_node0.leaf().sparse_vector().value_size()); - EXPECT_EQ(root_stats[0].weight_contribution[0], - tree_node0.leaf().sparse_vector().value(0)); - - // Verify second candidate for root 3, gain is expected to be zero as - // the left child gain is equal to the parent gain. - const SplitStats expected_split_stats1(learner_config_, root_stats[1]); - EXPECT_SPLIT_STATS_EQ(expected_split_stats1, split_candidates[1].split_stats); - const auto& tree_node1 = split_candidates[1].tree_node; - EXPECT_EQ(boosted_trees::trees::TreeNode::kLeaf, tree_node1.node_case()); - EXPECT_EQ(1, tree_node1.leaf().sparse_vector().index_size()); - EXPECT_EQ(kClassId, tree_node1.leaf().sparse_vector().index(0)); - EXPECT_EQ(1, tree_node1.leaf().sparse_vector().value_size()); - EXPECT_EQ(root_stats[1].weight_contribution[0], - tree_node1.leaf().sparse_vector().value(0)); -} - -} // namespace -} // namespace stochastic -} // namespace learner -} // namespace boosted_trees -} // namespace tensorflow diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.cc deleted file mode 100644 index 3a6c409f84..0000000000 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.cc +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright 2017 The TensorFlow Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// ============================================================================= -#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.h" - -#include "tensorflow/core/platform/macros.h" - -namespace tensorflow { -namespace boosted_trees { -namespace learner { -namespace stochastic { - -namespace { - -// Creates a categorical Id split node without assigning children. -boosted_trees::trees::TreeNode CreateCategoricalIdNode( - const int32 feature_column, const int32 id) { - boosted_trees::trees::TreeNode split_node; - auto* split = split_node.mutable_categorical_id_binary_split(); - split->set_feature_column(feature_column); - split->set_feature_id(id); - return split_node; -} - -} // namespace - -void CategoricalFeatureColumnHandler::AggregateGradientStats( - const std::vector& example_partition_ids, - const Tensor& example_first_order_gradients, - const Tensor& example_second_order_gradients, - FeatureStatsAccumulator* - gradient_stats_accumulator) const { - // Pass over all rows and aggregate gradient stats for each feature id. - const int64 num_rows = indices_.dimension(0); - for (int64 row_idx = 0; row_idx < num_rows; ++row_idx) { - auto example_idx = indices_(row_idx, 0); - auto feature_id = values_(row_idx); - const GradientStats norm_gradient_stats(example_first_order_gradients, - example_second_order_gradients, - example_idx); - auto partition_id = example_partition_ids[example_idx]; - gradient_stats_accumulator->AddStats(slot_id_, class_id_, partition_id, - feature_id, norm_gradient_stats); - } -} - -void CategoricalFeatureColumnHandler::GenerateFeatureSplitCandidates( - const LearnerConfig& learner_config, const std::vector& roots, - const std::vector& root_stats, - const FeatureStatsAccumulator& - gradient_stats_accumulator, - std::vector* split_candidates) const { - // Build a reverse lookup of partition id to root idx. - std::unordered_map partition_id_to_root_idx; - partition_id_to_root_idx.reserve(roots.size()); - for (size_t root_idx = 0; root_idx < roots.size(); ++root_idx) { - partition_id_to_root_idx[roots[root_idx]] = root_idx; - } - - // Initialize split candidates. - split_candidates->clear(); - if (!roots.empty()) { - FeatureSplitCandidate empty_candidate( - root_stats[0].weight_contribution.size()); - split_candidates->resize(roots.size(), empty_candidate); - } - for (auto& split_candidate : *split_candidates) { - split_candidate.split_stats.gain = std::numeric_limits::lowest(); - } - - // Evaluate split candidates for every root as each is a separate - // logical partition over the examples. - // Then for each root, we evaluate every feature id as an equality split - // and pick the highest split gain. - for (const auto& entry : - gradient_stats_accumulator.GetFeatureStats(slot_id_)) { - DCHECK_EQ(entry.first.class_id, class_id_); - - // Get partition id and root node stats. - const int32 partition_id = entry.first.partition_id; - auto root_idx_it = partition_id_to_root_idx.find(partition_id); - if (root_idx_it == partition_id_to_root_idx.end()) { - // Inactive partition. - continue; - } - size_t root_idx = root_idx_it->second; - const NodeStats& root_node_stats = root_stats[root_idx]; - - // Get gradient stats. - const auto& left_gradient_stats = entry.second; - auto right_gradient_stats = - root_node_stats.gradient_stats - left_gradient_stats; - - // Get node stats. - NodeStats left_node_stats(learner_config, left_gradient_stats); - NodeStats right_node_stats(learner_config, right_gradient_stats); - - // Generate split candidate and update best split candidate for the - // current root if needed. - FeatureSplitCandidate split_candidate( - slot_id_, - CreateCategoricalIdNode(feature_column_, entry.first.feature_id), - SplitStats(learner_config, root_node_stats, left_node_stats, - right_node_stats)); - FeatureSplitCandidate& best_split_candidate = (*split_candidates)[root_idx]; - if (TF_PREDICT_FALSE(best_split_candidate.tree_node.node_case() == - boosted_trees::trees::TreeNode::NODE_NOT_SET)) { - // Always replace candidates with no node set. - best_split_candidate = std::move(split_candidate); - } else if (TF_PREDICT_FALSE(split_candidate.split_stats.gain == - best_split_candidate.split_stats.gain)) { - // Tie break on feature id. - auto best_split_feature_id = - best_split_candidate.tree_node.categorical_id_binary_split() - .feature_id(); - if (entry.first.feature_id < best_split_feature_id) { - best_split_candidate = std::move(split_candidate); - } - } else if (split_candidate.split_stats.gain > - best_split_candidate.split_stats.gain) { - best_split_candidate = std::move(split_candidate); - } - } -} - -} // namespace stochastic -} // namespace learner -} // namespace boosted_trees -} // namespace tensorflow diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.h b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.h deleted file mode 100644 index ef964ba716..0000000000 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.h +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright 2017 The TensorFlow Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// ============================================================================= - -#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_CATEGORICAL_FEATURE_COLUMN_HANDLER_H_ -#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_CATEGORICAL_FEATURE_COLUMN_HANDLER_H_ - -#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/feature-column-handler.h" - -namespace tensorflow { -namespace boosted_trees { -namespace learner { -namespace stochastic { - -// Handler for a categorical feature column in the single class case. -class CategoricalFeatureColumnHandler : public FeatureColumnHandler { - public: - CategoricalFeatureColumnHandler(const int32 class_id, const int32 slot_id, - const int64 batch_size, - const int32 feature_column, - TTypes::ConstMatrix indices, - TTypes::ConstVec values) - : FeatureColumnHandler(class_id, slot_id, batch_size), - feature_column_(feature_column), - indices_(indices), - values_(values) {} - - void AggregateGradientStats( - const std::vector& example_partition_ids, - const Tensor& example_first_order_gradients, - const Tensor& example_second_order_gradients, - FeatureStatsAccumulator* - gradient_stats_accumulator) const override; - - void GenerateFeatureSplitCandidates( - const LearnerConfig& learner_config, const std::vector& roots, - const std::vector& root_stats, - const FeatureStatsAccumulator& - gradient_stats_accumulator, - std::vector* split_candidates) const override; - - protected: - const int32 feature_column_; - TTypes::ConstMatrix indices_; - TTypes::ConstVec values_; -}; - -} // namespace stochastic -} // namespace learner -} // namespace boosted_trees -} // namespace tensorflow - -#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_CATEGORICAL_FEATURE_COLUMN_HANDLER_H_ diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler_test.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler_test.cc deleted file mode 100644 index ea82b3f086..0000000000 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler_test.cc +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright 2017 The TensorFlow Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// ============================================================================= -#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.h" - -#include "tensorflow/core/framework/tensor_testutil.h" -#include "tensorflow/core/platform/test.h" - -namespace tensorflow { -namespace boosted_trees { -namespace learner { -namespace stochastic { -namespace { - -using boosted_trees::learner::LearnerConfig; - -const auto kClassId = 7; -const auto kSlotId = 0; -const auto kBatchSize = 4; -const auto kFeatureColumn = 3; - -using FeatureStatsAccumulator = - FeatureStatsAccumulator; - -class CategoricalFeatureColumnHandlerTest : public ::testing::Test { - protected: - // The data looks like the following: - // Example | Gradients | Partition | Feature Id | - // i0 | (0.2, 0.12) | 0 | 1,2 | - // i1 | (-0.5, 0.07) | 0 | | - // i2 | (1.2, 0.2) | 0 | 2 | - // i3 | (4.0, 0.13) | 1 | 0 | - CategoricalFeatureColumnHandlerTest() - : example_first_order_gradients_( - test::AsTensor({0.2f, -0.5f, 1.2f, 4.0f}, {4})), - example_second_order_gradients_( - test::AsTensor({0.12f, 0.07f, 0.2f, 0.13f}, {4})), - example_partitions_({0, 0, 0, 1}), - indices_(test::AsTensor({0, 0, 0, 1, 2, 0, 3, 0}, {4, 2})), - values_(test::AsTensor({1, 2, 2, 0}, {4})) { - // Set L2 regularization. - learner_config_.mutable_regularization()->set_l2(2.0f); - learner_config_.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); - // Create handler. - handler_.reset(new CategoricalFeatureColumnHandler( - kClassId, kSlotId, kBatchSize, kFeatureColumn, indices_.matrix(), - values_.vec())); - } - - LearnerConfig learner_config_; - const Tensor example_first_order_gradients_; - const Tensor example_second_order_gradients_; - const std::vector example_partitions_; - const Tensor indices_; - const Tensor values_; - std::unique_ptr handler_; -}; - -TEST_F(CategoricalFeatureColumnHandlerTest, AggregateGradientStats) { - // Create handler. - FeatureStatsAccumulator accumulator(1); - handler_->AggregateGradientStats( - example_partitions_, example_first_order_gradients_, - example_second_order_gradients_, &accumulator); - - // Check stats for each partition and feature. - // Partition 0, Feature 0. - EXPECT_GRADIENT_STATS_EQ(GradientStats(0.0f, 0.0f), - accumulator.GetStats(kSlotId, kClassId, 0, 0)); - // Partition 0, Feature 1. - EXPECT_GRADIENT_STATS_EQ(GradientStats(0.2f, 0.12f), - accumulator.GetStats(kSlotId, kClassId, 0, 1)); - // Partition 0, Feature 2. - EXPECT_GRADIENT_STATS_EQ(GradientStats(0.2f + 1.2f, 0.12f + 0.2f), - accumulator.GetStats(kSlotId, kClassId, 0, 2)); - - // Partition 1, Feature 0. - EXPECT_GRADIENT_STATS_EQ(GradientStats(4.0f, 0.13f), - accumulator.GetStats(kSlotId, kClassId, 1, 0)); - // Partition 1, Feature 1. - EXPECT_GRADIENT_STATS_EQ(GradientStats(0.0f, 0.0f), - accumulator.GetStats(kSlotId, kClassId, 1, 1)); - // Partition 1, Feature 2. - EXPECT_GRADIENT_STATS_EQ(GradientStats(0.0f, 0.0f), - accumulator.GetStats(kSlotId, kClassId, 1, 2)); -} - -TEST_F(CategoricalFeatureColumnHandlerTest, GenerateFeatureSplitCandidates) { - // Create handler. - FeatureStatsAccumulator accumulator(1); - handler_->AggregateGradientStats( - example_partitions_, example_first_order_gradients_, - example_second_order_gradients_, &accumulator); - - // Get feature split candidates for two roots 0 and 1. - // The root stats are derived from the per-partition total gradient stats. - const std::vector roots = {0, 1, 5}; - const std::vector& root_stats = { - NodeStats(learner_config_, GradientStats(0.9f, 0.39f)), - NodeStats(learner_config_, GradientStats(4.0f, 0.13f)), NodeStats(1)}; - std::vector split_candidates; - handler_->GenerateFeatureSplitCandidates(learner_config_, roots, root_stats, - accumulator, &split_candidates); - // Expect three candidate splits (one per root). - EXPECT_EQ(3, split_candidates.size()); - - // Verify candidate for root 0, the best split occurs when we route - // example i0, i2 left and i1 right. - const NodeStats expected_left_node0(learner_config_, - GradientStats(0.2f + 1.2f, 0.12f + 0.2f)); - const NodeStats expected_right_node0( - learner_config_, - root_stats[0].gradient_stats - expected_left_node0.gradient_stats); - const SplitStats expected_split_stats0(learner_config_, root_stats[0], - expected_left_node0, - expected_right_node0); - EXPECT_SPLIT_STATS_EQ(expected_split_stats0, split_candidates[0].split_stats); - - const auto& tree_node0 = split_candidates[0].tree_node; - EXPECT_EQ( - boosted_trees::trees::TreeNode::kCategoricalIdBinarySplitFieldNumber, - tree_node0.node_case()); - const auto& split0 = tree_node0.categorical_id_binary_split(); - EXPECT_EQ(2, split0.feature_id()); - EXPECT_EQ(kFeatureColumn, split0.feature_column()); - - // Verify candidate for root 1, there's only one active feature here - // so zero gain is expected. - const NodeStats expected_left_node1(learner_config_, - root_stats[1].gradient_stats); - const NodeStats expected_right_node1(learner_config_, GradientStats(0, 0)); - const SplitStats expected_split_stats1(learner_config_, root_stats[1], - expected_left_node1, - expected_right_node1); - EXPECT_SPLIT_STATS_EQ(expected_split_stats1, split_candidates[1].split_stats); - const auto& tree_node1 = split_candidates[1].tree_node; - EXPECT_EQ( - boosted_trees::trees::TreeNode::kCategoricalIdBinarySplitFieldNumber, - tree_node1.node_case()); - const auto& split1 = tree_node1.categorical_id_binary_split(); - EXPECT_EQ(0, split1.feature_id()); - EXPECT_EQ(kFeatureColumn, split1.feature_column()); - - // Verify there are no candidate splits for root 5. - const auto& tree_node2 = split_candidates[2].tree_node; - EXPECT_EQ(boosted_trees::trees::TreeNode::NODE_NOT_SET, - tree_node2.node_case()); -} - -} // namespace -} // namespace stochastic -} // namespace learner -} // namespace boosted_trees -} // namespace tensorflow diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.cc deleted file mode 100644 index ca7bb71e7d..0000000000 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.cc +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright 2017 The TensorFlow Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// ============================================================================= -#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.h" - -namespace tensorflow { -namespace boosted_trees { -namespace learner { -namespace stochastic { - -namespace { - -// Creates a dense split node without assigning children. -boosted_trees::trees::TreeNode CreateDenseSplitNode(const int32 feature_column, - const float threshold) { - boosted_trees::trees::TreeNode split_node; - auto* split = split_node.mutable_dense_float_binary_split(); - split->set_feature_column(feature_column); - split->set_threshold(threshold); - return split_node; -} - -} // namespace - -void DenseQuantizedFeatureColumnHandler::AggregateGradientStats( - const std::vector& example_partition_ids, - const Tensor& example_first_order_gradients, - const Tensor& example_second_order_gradients, - FeatureStatsAccumulator* - gradient_stats_accumulator) const { - // Pass over all examples and aggregate gradient stats for each partition - // and quantized feature bucket. - for (int64 example_idx = 0; example_idx < batch_size_; ++example_idx) { - auto partition_id = example_partition_ids[example_idx]; - auto feature_id = dense_quantized_values_(example_idx); - gradient_stats_accumulator->AddStats( - slot_id_, class_id_, partition_id, feature_id, - GradientStats(example_first_order_gradients, - example_second_order_gradients, example_idx)); - } -} - -void DenseQuantizedFeatureColumnHandler::GenerateFeatureSplitCandidates( - const LearnerConfig& learner_config, const std::vector& roots, - const std::vector& root_stats, - const FeatureStatsAccumulator& - gradient_stats_accumulator, - std::vector* split_candidates) const { - // Evaluate split candidates for every root as each is a separate - // logical partition over the examples. - // Then for each root, we do a forward-only pass over the quantized - // feature buckets accumulating gradients from left to right. - // Split gains are evaluated at every threshold and the best split is picked. - split_candidates->clear(); - split_candidates->reserve(roots.size()); - for (size_t root_idx = 0; root_idx < roots.size(); ++root_idx) { - // Get partition Id and root node stats. - const int32 partition_id = roots[root_idx]; - const NodeStats& root_node_stats = root_stats[root_idx]; - - // Forward left to right pass over quantiles. - GradientStats left_gradient_stats; - GradientStats right_gradient_stats(root_node_stats.gradient_stats); - FeatureSplitCandidate best_split_candidate( - root_node_stats.weight_contribution.size()); - best_split_candidate.split_stats.gain = - std::numeric_limits::lowest(); - for (int bucket_id = 0; bucket_id < dense_quantiles_.size(); ++bucket_id) { - // Get gradient stats. - auto gradient_stats = gradient_stats_accumulator.GetStats( - slot_id_, class_id_, partition_id, bucket_id); - if (gradient_stats.IsZero()) { - continue; - } - - // Update gradient stats. - left_gradient_stats += gradient_stats; - right_gradient_stats = - root_node_stats.gradient_stats - left_gradient_stats; - - // Get node stats - NodeStats left_node_stats(learner_config, left_gradient_stats); - NodeStats right_node_stats(learner_config, right_gradient_stats); - - // Generate split candidate. - const float threshold = dense_quantiles_(bucket_id); - FeatureSplitCandidate split_candidate( - slot_id_, CreateDenseSplitNode(dense_feature_column_, threshold), - SplitStats(learner_config, root_node_stats, left_node_stats, - right_node_stats)); - if (split_candidate.split_stats.gain > - best_split_candidate.split_stats.gain) { - best_split_candidate = std::move(split_candidate); - } - } - - // Add best candidate for partition. - split_candidates->push_back(std::move(best_split_candidate)); - } -} - -} // namespace stochastic -} // namespace learner -} // namespace boosted_trees -} // namespace tensorflow diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.h b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.h deleted file mode 100644 index 0f3858e4d8..0000000000 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.h +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2017 The TensorFlow Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// ============================================================================= -#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_DENSE_QUANTIZED_FEATURE_COLUMN_HANDLER_H_ -#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_DENSE_QUANTIZED_FEATURE_COLUMN_HANDLER_H_ - -#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/feature-column-handler.h" - -namespace tensorflow { -namespace boosted_trees { -namespace learner { -namespace stochastic { - -// Handler for a dense quantized feature column in the single class case. -class DenseQuantizedFeatureColumnHandler : public FeatureColumnHandler { - public: - DenseQuantizedFeatureColumnHandler( - const int32 class_id, const int32 slot_id, const int64 batch_size, - const int32 dense_feature_column, TTypes::ConstVec dense_quantiles, - TTypes::ConstVec dense_quantized_values) - : FeatureColumnHandler(class_id, slot_id, batch_size), - dense_feature_column_(dense_feature_column), - dense_quantiles_(dense_quantiles), - dense_quantized_values_(dense_quantized_values) {} - - void AggregateGradientStats( - const std::vector& example_partition_ids, - const Tensor& example_first_order_gradients, - const Tensor& example_second_order_gradients, - FeatureStatsAccumulator* - gradient_stats_accumulator) const override; - - void GenerateFeatureSplitCandidates( - const LearnerConfig& learner_config, const std::vector& roots, - const std::vector& root_stats, - const FeatureStatsAccumulator& - gradient_stats_accumulator, - std::vector* split_candidates) const override; - - protected: - const int32 dense_feature_column_; - TTypes::ConstVec dense_quantiles_; - TTypes::ConstVec dense_quantized_values_; -}; - -} // namespace stochastic -} // namespace learner -} // namespace boosted_trees -} // namespace tensorflow - -#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_DENSE_QUANTIZED_FEATURE_COLUMN_HANDLER_H_ diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler_test.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler_test.cc deleted file mode 100644 index 1bc9d733ad..0000000000 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler_test.cc +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright 2017 The TensorFlow Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// ============================================================================= -#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.h" - -#include "tensorflow/core/framework/tensor_testutil.h" -#include "tensorflow/core/platform/test.h" - -namespace tensorflow { -namespace boosted_trees { -namespace learner { -namespace stochastic { -namespace { - -using boosted_trees::learner::LearnerConfig; - -const auto kClassId = 1; -const auto kSlotId = 0; -const auto kBatchSize = 4; -const auto kFeatureColumn = 2; - -using FeatureStatsAccumulator = - FeatureStatsAccumulator; - -class DenseQuantizedFeatureColumnHandlerTest : public ::testing::Test { - protected: - // The data looks like the following: - // Example | Gradients | Partition | Dense Quantile | - // i0 | (0.2, 0.12) | 0 | 1 | - // i1 | (-0.5, 0.07) | 0 | 1 | - // i2 | (1.2, 0.2) | 0 | 0 | - // i3 | (4.0, 0.13) | 1 | 1 | - DenseQuantizedFeatureColumnHandlerTest() - : example_first_order_gradients_( - test::AsTensor({0.2f, -0.5f, 1.2f, 4.0f}, {4})), - example_second_order_gradients_( - test::AsTensor({0.12f, 0.07f, 0.2f, 0.13f}, {4})), - example_partitions_({0, 0, 0, 1}), - dense_quantiles_(test::AsTensor({0.3f, 0.52f}, {2})), - dense_quantized_values_(test::AsTensor({1, 1, 0, 1}, {4})) { - // Set L2 regularization. - learner_config_.mutable_regularization()->set_l2(2.0f); - learner_config_.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); - // Create handler. - handler_.reset(new DenseQuantizedFeatureColumnHandler( - kClassId, kSlotId, kBatchSize, kFeatureColumn, - dense_quantiles_.vec(), dense_quantized_values_.vec())); - } - - LearnerConfig learner_config_; - const Tensor example_first_order_gradients_; - const Tensor example_second_order_gradients_; - const std::vector example_partitions_; - const Tensor dense_quantiles_; - const Tensor dense_quantized_values_; - std::unique_ptr handler_; -}; - -TEST_F(DenseQuantizedFeatureColumnHandlerTest, AggregateGradientStats) { - // Create handler. - FeatureStatsAccumulator accumulator(1); - handler_->AggregateGradientStats( - example_partitions_, example_first_order_gradients_, - example_second_order_gradients_, &accumulator); - - // Check stats for each partition and feature. - // Partition 0, Feature 0. - EXPECT_GRADIENT_STATS_EQ(GradientStats(1.2f, 0.2f), - accumulator.GetStats(kSlotId, kClassId, 0, 0)); - // Partition 0, Feature 1. - EXPECT_GRADIENT_STATS_EQ(GradientStats(-0.3f, 0.19f), - accumulator.GetStats(kSlotId, kClassId, 0, 1)); - // Partition 1, Feature 0. - EXPECT_GRADIENT_STATS_EQ(GradientStats(0.0f, 0.0f), - accumulator.GetStats(kSlotId, kClassId, 1, 0)); - // Partition 1, Feature 1. - EXPECT_GRADIENT_STATS_EQ(GradientStats(4.0f, 0.13f), - accumulator.GetStats(kSlotId, kClassId, 1, 1)); -} - -TEST_F(DenseQuantizedFeatureColumnHandlerTest, GenerateFeatureSplitCandidates) { - // Create handler. - FeatureStatsAccumulator accumulator(1); - handler_->AggregateGradientStats( - example_partitions_, example_first_order_gradients_, - example_second_order_gradients_, &accumulator); - - // Get feature split candidates for two roots 0 and 1. - // The root stats are derived from the per-partition total gradient stats. - const std::vector roots = {0, 1, 5}; - const std::vector& root_stats = { - NodeStats(learner_config_, GradientStats(0.9f, 0.39f)), - NodeStats(learner_config_, GradientStats(4.0f, 0.13f)), NodeStats(1)}; - std::vector split_candidates; - handler_->GenerateFeatureSplitCandidates(learner_config_, roots, root_stats, - accumulator, &split_candidates); - // Expect three candidate splits (one per root). - EXPECT_EQ(3, split_candidates.size()); - - // Verify candidate for root 0, the best split occurs when we route - // example i2 left and i0, i1 right. - const NodeStats expected_left_node0(learner_config_, - GradientStats(1.2f, 0.2f)); - const NodeStats expected_right_node0( - learner_config_, - root_stats[0].gradient_stats - expected_left_node0.gradient_stats); - const SplitStats expected_split_stats0(learner_config_, root_stats[0], - expected_left_node0, - expected_right_node0); - EXPECT_SPLIT_STATS_EQ(expected_split_stats0, split_candidates[0].split_stats); - const auto& tree_node0 = split_candidates[0].tree_node; - EXPECT_EQ(boosted_trees::trees::TreeNode::kDenseFloatBinarySplit, - tree_node0.node_case()); - const auto& split0 = tree_node0.dense_float_binary_split(); - EXPECT_FLOAT_EQ(dense_quantiles_.vec()(0), split0.threshold()); - EXPECT_EQ(kFeatureColumn, split0.feature_column()); - - // Verify candidate for root 1, there's only one active bucket here - // so zero gain is expected. - const NodeStats expected_left_node1(learner_config_, - root_stats[1].gradient_stats); - const NodeStats expected_right_node1(learner_config_, GradientStats(0, 0)); - const SplitStats expected_split_stats1(learner_config_, root_stats[1], - expected_left_node1, - expected_right_node1); - EXPECT_SPLIT_STATS_EQ(expected_split_stats1, split_candidates[1].split_stats); - const auto& tree_node1 = split_candidates[1].tree_node; - EXPECT_EQ(boosted_trees::trees::TreeNode::kDenseFloatBinarySplit, - tree_node1.node_case()); - const auto& split1 = tree_node1.dense_float_binary_split(); - EXPECT_FLOAT_EQ(dense_quantiles_.vec()(1), split1.threshold()); - EXPECT_EQ(kFeatureColumn, split1.feature_column()); - - // Verify there are no candidate splits for root 5. - const auto& tree_node2 = split_candidates[2].tree_node; - EXPECT_EQ(boosted_trees::trees::TreeNode::NODE_NOT_SET, - tree_node2.node_case()); -} - -} // namespace -} // namespace stochastic -} // namespace learner -} // namespace boosted_trees -} // namespace tensorflow diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/feature-column-handler.h b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/feature-column-handler.h deleted file mode 100644 index 8bd2092f96..0000000000 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/feature-column-handler.h +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright 2017 The TensorFlow Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// ============================================================================= - -#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_FEATURE_COLUMN_HANDLER_H_ -#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_FEATURE_COLUMN_HANDLER_H_ - -#include -#include "tensorflow/contrib/boosted_trees/lib/learner/common/accumulators/feature-stats-accumulator.h" -#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/feature-split-candidate.h" -#include "tensorflow/contrib/boosted_trees/proto/learner.pb.h" -#include "tensorflow/core/framework/attr_value.pb.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_types.h" - -namespace tensorflow { -namespace boosted_trees { -namespace learner { -namespace stochastic { - -// Handler interface for feature columns. Each feature column type may -// have its own handler which encapsulates the logic of aggregating gradient -// stats as well as generating split candidates for each partition. -// Handlers can be stateful and must be thread compatible. -class FeatureColumnHandler { - public: - FeatureColumnHandler(const int32 class_id, const int32 slot_id, - const int64 batch_size) - : class_id_(class_id), slot_id_(slot_id), batch_size_(batch_size) {} - - virtual ~FeatureColumnHandler() {} - FeatureColumnHandler(const FeatureColumnHandler& other) = delete; - FeatureColumnHandler& operator=(const FeatureColumnHandler& other) = delete; - - // Aggregates example gradient stats for the feature column. - virtual void AggregateGradientStats( - const std::vector& example_partition_ids, - const Tensor& example_first_order_gradients, - const Tensor& example_second_order_gradients, - FeatureStatsAccumulator* - gradient_stats_accumulator) const = 0; - - // Generates feature column split candidates for the specified roots. - virtual void GenerateFeatureSplitCandidates( - const LearnerConfig& learner_config, const std::vector& roots, - const std::vector& root_stats, - const FeatureStatsAccumulator& - gradient_stats_accumulator, - std::vector* split_candidates) const = 0; - - // Accessors. - int32 class_id() const { return class_id_; } - int32 slot_id() const { return slot_id_; } - int64 batch_size() const { return batch_size_; } - - protected: - // The class Id. - const int32 class_id_; - - // The slod Id for use as a unique Id across all feature columns. - const int32 slot_id_; - - // Size of the batch of examples. - const int64 batch_size_; -}; - -} // namespace stochastic -} // namespace learner -} // namespace boosted_trees -} // namespace tensorflow - -#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_FEATURE_COLUMN_HANDLER_H_ diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.cc deleted file mode 100644 index a0e9efbbc5..0000000000 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.cc +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright 2017 The TensorFlow Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// ============================================================================= -#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.h" - -namespace tensorflow { -namespace boosted_trees { -namespace learner { -namespace stochastic { - -namespace { - -// Creates a sparse default right split node without assigning children. -boosted_trees::trees::TreeNode CreateSparseSplitNodeDefaultRight( - int32 feature_column, float threshold) { - boosted_trees::trees::TreeNode split_node; - auto* split = split_node.mutable_sparse_float_binary_split_default_right() - ->mutable_split(); - split->set_feature_column(feature_column); - split->set_threshold(threshold); - return split_node; -} - -// Creates a sparse default left split node without assigning children. -boosted_trees::trees::TreeNode CreateSparseSplitNodeDefaultLeft( - int32 feature_column, float threshold) { - boosted_trees::trees::TreeNode split_node; - auto* split = split_node.mutable_sparse_float_binary_split_default_left() - ->mutable_split(); - split->set_feature_column(feature_column); - split->set_threshold(threshold); - return split_node; -} - -} // namespace - -void SparseQuantizedFeatureColumnHandler::AggregateGradientStats( - const std::vector& example_partition_ids, - const Tensor& example_first_order_gradients, - const Tensor& example_second_order_gradients, - FeatureStatsAccumulator* - gradient_stats_accumulator) const { - // Pass over all rows and aggregate gradient stats for each partition - // and quantized feature bucket. - const int64 num_rows = sparse_indices_.dimension(0); - for (int64 row_idx = 0; row_idx < num_rows; ++row_idx) { - auto example_idx = sparse_indices_(row_idx, 0); - auto partition_id = example_partition_ids[example_idx]; - auto feature_id = sparse_quantized_values_(row_idx); - gradient_stats_accumulator->AddStats( - slot_id_, class_id_, partition_id, feature_id, - GradientStats(example_first_order_gradients, - example_second_order_gradients, example_idx)); - } -} - -void SparseQuantizedFeatureColumnHandler::GenerateFeatureSplitCandidates( - const LearnerConfig& learner_config, const std::vector& roots, - const std::vector& root_stats, - const FeatureStatsAccumulator& - gradient_stats_accumulator, - std::vector* split_candidates) const { - // Evaluate split candidates for every root as each is a separate - // logical partition over the examples. - // Then for each root, we do both a forward left to right pass and a backward - // right to left pass over the quantized feature buckets accumulating - // gradients on one side and using the root aggregate gradients to get the - // gradients for the other side. Split gains are evaluated for each pass at - // every threshold and the best split is picked. - split_candidates->clear(); - split_candidates->reserve(roots.size()); - for (size_t root_idx = 0; root_idx < roots.size(); ++root_idx) { - // Get partition Id and root node stats. - const int32 partition_id = roots[root_idx]; - const NodeStats& root_node_stats = root_stats[root_idx]; - - // Forward pass with right default direction. - GradientStats left_gradient_stats; - GradientStats right_gradient_stats(root_node_stats.gradient_stats); - FeatureSplitCandidate best_split_candidate( - root_node_stats.weight_contribution.size()); - best_split_candidate.split_stats.gain = - std::numeric_limits::lowest(); - for (int bucket_id = 0; bucket_id < sparse_quantiles_.size(); ++bucket_id) { - // Get gradient stats. - auto gradient_stats = gradient_stats_accumulator.GetStats( - slot_id_, class_id_, partition_id, bucket_id); - if (gradient_stats.IsZero()) { - continue; - } - - // Update gradient stats. - left_gradient_stats += gradient_stats; - right_gradient_stats = - root_node_stats.gradient_stats - left_gradient_stats; - - // Get node stats - NodeStats left_node_stats(learner_config, left_gradient_stats); - NodeStats right_node_stats(learner_config, right_gradient_stats); - - // Generate split candidate. - const float threshold = sparse_quantiles_(bucket_id); - FeatureSplitCandidate split_candidate( - slot_id_, - CreateSparseSplitNodeDefaultRight(sparse_feature_column_, threshold), - SplitStats(learner_config, root_node_stats, left_node_stats, - right_node_stats)); - if (split_candidate.split_stats.gain > - best_split_candidate.split_stats.gain) { - best_split_candidate = std::move(split_candidate); - } - } - - // Determine if we need a backward pass by checking if the residual gradient - // after forward aggregation is almost the same as the aggregated gradient. - // for the current root. This helps avoid unnecessary computation as well - // as consistency due to floating point precision. - if (!right_gradient_stats.IsAlmostZero()) { - // Backward pass with left default direction. - right_gradient_stats = GradientStats(); - left_gradient_stats = root_node_stats.gradient_stats; - for (int bucket_id = sparse_quantiles_.size() - 1; bucket_id > 0; - --bucket_id) { - // Get gradient stats. - auto gradient_stats = gradient_stats_accumulator.GetStats( - slot_id_, class_id_, partition_id, bucket_id); - if (gradient_stats.IsZero()) { - continue; - } - - // Update gradient stats. - right_gradient_stats += gradient_stats; - left_gradient_stats = root_node_stats.gradient_stats - gradient_stats; - - // Get node stats - NodeStats left_node_stats(learner_config, left_gradient_stats); - NodeStats right_node_stats(learner_config, right_gradient_stats); - - // Generate split candidate. - const float threshold = sparse_quantiles_(bucket_id - 1); - FeatureSplitCandidate split_candidate( - slot_id_, - CreateSparseSplitNodeDefaultLeft(sparse_feature_column_, threshold), - SplitStats(learner_config, root_node_stats, left_node_stats, - right_node_stats)); - if (split_candidate.split_stats.gain > - best_split_candidate.split_stats.gain) { - best_split_candidate = std::move(split_candidate); - } - } - } - - // Add best candidate for partition. - split_candidates->push_back(std::move(best_split_candidate)); - } -} - -} // namespace stochastic -} // namespace learner -} // namespace boosted_trees -} // namespace tensorflow diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.h b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.h deleted file mode 100644 index eb63e70547..0000000000 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.h +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2017 The TensorFlow Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// ============================================================================= - -#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_SPARSE_QUANTIZED_FEATURE_COLUMN_HANDLER_H_ -#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_SPARSE_QUANTIZED_FEATURE_COLUMN_HANDLER_H_ - -#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/feature-column-handler.h" - -namespace tensorflow { -namespace boosted_trees { -namespace learner { -namespace stochastic { - -// Handler for a sparse quantized feature column in the single class case. -class SparseQuantizedFeatureColumnHandler : public FeatureColumnHandler { - public: - SparseQuantizedFeatureColumnHandler( - const int32 class_id, const int32 slot_id, const int64 batch_size, - const int32 sparse_feature_column, - TTypes::ConstVec sparse_quantiles, - TTypes::ConstMatrix sparse_indices, - TTypes::ConstVec sparse_quantized_values) - : FeatureColumnHandler(class_id, slot_id, batch_size), - sparse_feature_column_(sparse_feature_column), - sparse_quantiles_(sparse_quantiles), - sparse_indices_(sparse_indices), - sparse_quantized_values_(sparse_quantized_values) {} - - void AggregateGradientStats( - const std::vector& example_partition_ids, - const Tensor& example_first_order_gradients, - const Tensor& example_second_order_gradients, - FeatureStatsAccumulator* - gradient_stats_accumulator) const override; - - void GenerateFeatureSplitCandidates( - const LearnerConfig& learner_config, const std::vector& roots, - const std::vector& root_stats, - const FeatureStatsAccumulator& - gradient_stats_accumulator, - std::vector* split_candidates) const override; - - protected: - const int32 sparse_feature_column_; - TTypes::ConstVec sparse_quantiles_; - TTypes::ConstMatrix sparse_indices_; - TTypes::ConstVec sparse_quantized_values_; -}; - -} // namespace stochastic -} // namespace learner -} // namespace boosted_trees -} // namespace tensorflow - -#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_SPARSE_QUANTIZED_FEATURE_COLUMN_HANDLER_H_ diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler_test.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler_test.cc deleted file mode 100644 index 643d936ad2..0000000000 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler_test.cc +++ /dev/null @@ -1,162 +0,0 @@ -// Copyright 2017 The TensorFlow Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// ============================================================================= - -#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.h" - -#include "tensorflow/core/framework/tensor_testutil.h" -#include "tensorflow/core/platform/test.h" - -namespace tensorflow { -namespace boosted_trees { -namespace learner { -namespace stochastic { -namespace { - -using boosted_trees::learner::LearnerConfig; - -const auto kClassId = 3; -const auto kSlotId = 0; -const auto kBatchSize = 4; -const auto kFeatureColumn = 4; - -using FeatureStatsAccumulator = - FeatureStatsAccumulator; - -class SparseQuantizedFeatureColumnHandlerTest : public ::testing::Test { - protected: - // The data looks like the following: - // Example | Gradients | Partition | Sparse Quantile | - // i0 | (0.2, 0.12) | 0 | 1 | - // i1 | (-0.5, 0.07) | 0 | N/A | - // i2 | (1.2, 0.2) | 0 | 0 | - // i3 | (4.0, 0.13) | 1 | 1 | - SparseQuantizedFeatureColumnHandlerTest() - : example_first_order_gradients_( - test::AsTensor({0.2f, -0.5f, 1.2f, 4.0f}, {4})), - example_second_order_gradients_( - test::AsTensor({0.12f, 0.07f, 0.2f, 0.13f}, {4})), - example_partitions_({0, 0, 0, 1}), - sparse_quantiles_(test::AsTensor({0.3f, 0.52f}, {2})), - sparse_indices_(test::AsTensor({0, 0, 2, 0, 3, 0}, {3, 2})), - sparse_quantized_values_(test::AsTensor({1, 0, 1}, {3})) { - // Set L2 regularization. - learner_config_.mutable_regularization()->set_l2(2.0f); - learner_config_.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); - // Create handler. - handler_.reset(new SparseQuantizedFeatureColumnHandler( - kClassId, kSlotId, kBatchSize, kFeatureColumn, - sparse_quantiles_.vec(), sparse_indices_.matrix(), - sparse_quantized_values_.vec())); - } - - LearnerConfig learner_config_; - const Tensor example_first_order_gradients_; - const Tensor example_second_order_gradients_; - const std::vector example_partitions_; - const Tensor sparse_quantiles_; - const Tensor sparse_indices_; - const Tensor sparse_quantized_values_; - std::unique_ptr handler_; -}; - -TEST_F(SparseQuantizedFeatureColumnHandlerTest, AggregateGradientStats) { - // Create handler. - FeatureStatsAccumulator accumulator(1); - handler_->AggregateGradientStats( - example_partitions_, example_first_order_gradients_, - example_second_order_gradients_, &accumulator); - - // Check stats for each partition and feature. - // Partition 0, Feature 0. - EXPECT_GRADIENT_STATS_EQ(GradientStats(1.2f, 0.2f), - accumulator.GetStats(kSlotId, kClassId, 0, 0)); - // Partition 0, Feature 1. - EXPECT_GRADIENT_STATS_EQ(GradientStats(0.2f, 0.12f), - accumulator.GetStats(kSlotId, kClassId, 0, 1)); - // Partition 1, Feature 0. - EXPECT_GRADIENT_STATS_EQ(GradientStats(0.0f, 0.0f), - accumulator.GetStats(kSlotId, kClassId, 1, 0)); - // Partition 1, Feature 1. - EXPECT_GRADIENT_STATS_EQ(GradientStats(4.0f, 0.13f), - accumulator.GetStats(kSlotId, kClassId, 1, 1)); -} - -TEST_F(SparseQuantizedFeatureColumnHandlerTest, - GenerateFeatureSplitCandidates) { - // Create handler. - FeatureStatsAccumulator accumulator(1); - handler_->AggregateGradientStats( - example_partitions_, example_first_order_gradients_, - example_second_order_gradients_, &accumulator); - - // Get feature split candidates for two roots 0 and 1. - // The root stats are derived from the per-partition total gradient stats. - const std::vector roots = {0, 1, 9}; - const std::vector& root_stats = { - NodeStats(learner_config_, GradientStats(0.9f, 0.39f)), - NodeStats(learner_config_, GradientStats(4.0f, 0.13f)), NodeStats(1)}; - std::vector split_candidates; - handler_->GenerateFeatureSplitCandidates(learner_config_, roots, root_stats, - accumulator, &split_candidates); - // Expect three candidate splits (one per root). - EXPECT_EQ(3, split_candidates.size()); - - // Verify candidate for root 0, the best split occurs when we route - // example i0 and i2 to the left and i1 to the right (by default direction). - const NodeStats expected_left_node0(learner_config_, - GradientStats(0.2f + 1.2f, 0.12f + 0.2f)); - const NodeStats expected_right_node0( - learner_config_, - root_stats[0].gradient_stats - expected_left_node0.gradient_stats); - const SplitStats expected_split_stats0(learner_config_, root_stats[0], - expected_left_node0, - expected_right_node0); - EXPECT_SPLIT_STATS_EQ(expected_split_stats0, split_candidates[0].split_stats); - const auto& tree_node0 = split_candidates[0].tree_node; - EXPECT_EQ(boosted_trees::trees::TreeNode::kSparseFloatBinarySplitDefaultRight, - tree_node0.node_case()); - const auto& split0 = - tree_node0.sparse_float_binary_split_default_right().split(); - EXPECT_FLOAT_EQ(sparse_quantiles_.vec()(1), split0.threshold()); - EXPECT_EQ(kFeatureColumn, split0.feature_column()); - - // Verify candidate for root 1, there's only one active bucket here - // so zero gain is expected. - const NodeStats expected_left_node1(learner_config_, - root_stats[1].gradient_stats); - const NodeStats expected_right_node1(learner_config_, GradientStats(0, 0)); - const SplitStats expected_split_stats1(learner_config_, root_stats[1], - expected_left_node1, - expected_right_node1); - EXPECT_SPLIT_STATS_EQ(expected_split_stats1, split_candidates[1].split_stats); - const auto& tree_node1 = split_candidates[1].tree_node; - EXPECT_EQ(boosted_trees::trees::TreeNode::kSparseFloatBinarySplitDefaultRight, - tree_node1.node_case()); - const auto& split1 = - tree_node1.sparse_float_binary_split_default_right().split(); - EXPECT_FLOAT_EQ(sparse_quantiles_.vec()(1), split1.threshold()); - EXPECT_EQ(kFeatureColumn, split1.feature_column()); - - // Verify there are no candidate splits for root 9. - const auto& tree_node2 = split_candidates[2].tree_node; - EXPECT_EQ(boosted_trees::trees::TreeNode::NODE_NOT_SET, - tree_node2.node_case()); -} - -} // namespace -} // namespace stochastic -} // namespace learner -} // namespace boosted_trees -} // namespace tensorflow -- GitLab From 7c7ccb0ba476d12814b5be2a0b87f30784977a7e Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Mon, 20 Nov 2017 17:53:18 -0800 Subject: [PATCH 0689/1801] Adding support for (nested) batching of sparse tensor for tf.data. PiperOrigin-RevId: 176444931 --- .../kernel_tests/batch_dataset_op_test.py | 122 +++++++-- .../contrib/data/python/ops/batching.py | 7 - tensorflow/contrib/makefile/tf_op_files.txt | 1 + .../base_api/api_def_DeserializeSparse.pbtxt | 10 +- tensorflow/core/kernels/BUILD | 22 +- tensorflow/core/kernels/reshape_util.cc | 149 +++++++++++ tensorflow/core/kernels/reshape_util.h | 31 +++ .../core/kernels/serialize_sparse_op.cc | 238 +++++++++++++----- tensorflow/core/kernels/sparse_reshape_op.cc | 123 +-------- tensorflow/core/ops/sparse_ops.cc | 16 +- tensorflow/python/data/ops/dataset_ops.py | 6 +- tensorflow/python/data/util/sparse.py | 1 - .../kernel_tests/batch_dataset_op_test.py | 55 +++- .../kernel_tests/sparse_reshape_op_test.py | 2 +- .../sparse_serialization_ops_test.py | 81 ++++++ tensorflow/python/ops/sparse_ops.py | 17 +- 16 files changed, 638 insertions(+), 243 deletions(-) create mode 100644 tensorflow/core/kernels/reshape_util.cc create mode 100644 tensorflow/core/kernels/reshape_util.h diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py index 09416f8302..d7437cba73 100644 --- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py @@ -104,14 +104,58 @@ class BatchDatasetTest(test.TestCase): with self.assertRaises(errors.InvalidArgumentError): sess.run(init_op, feed_dict={count: 14, batch_size: 0}) - def testBatchSparseError(self): + def assertSparseValuesEqual(self, a, b): + self.assertAllEqual(a.indices, b.indices) + self.assertAllEqual(a.values, b.values) + self.assertAllEqual(a.dense_shape, b.dense_shape) - def _map_fn(i): + def testBatchSparse(self): + + def _sparse(i): return sparse_tensor.SparseTensor( - indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i + indices=[[0]], values=(i * [1]), dense_shape=[1]) - with self.assertRaises(TypeError): - _ = dataset_ops.Dataset.range(10).map(_map_fn).batch(10) + iterator = dataset_ops.Dataset.range(10).map(_sparse).batch( + 5).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + for i in range(2): + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]], + values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4], + dense_shape=[5, 1]) + self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) + self.assertSparseValuesEqual(actual, expected.eval()) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testNestedBatchSparse(self): + + def _sparse(i): + return sparse_tensor.SparseTensor( + indices=[[0]], values=(i * [1]), dense_shape=[1]) + + iterator = dataset_ops.Dataset.range(10).map(_sparse).batch(5).batch( + 2).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensor( + indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], [0, 4, 0], + [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0], [1, 4, 0]], + values=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + dense_shape=[2, 5, 1]) + self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) + self.assertSparseValuesEqual(actual, expected.eval()) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) def testPaddedBatchDataset(self): seq_lens = array_ops.placeholder(dtypes.int32, shape=[None]) @@ -438,6 +482,30 @@ class BatchDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) + def testBatchAndDropRemainderSparse(self): + + def _sparse(i): + return sparse_tensor.SparseTensor( + indices=[[0]], values=(i * [1]), dense_shape=[1]) + + iterator = dataset_ops.Dataset.range(12).map(_sparse).apply( + batching.batch_and_drop_remainder(5)).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + for i in range(2): + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]], + values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4], + dense_shape=[5, 1]) + self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) + self.assertSparseValuesEqual(actual, expected.eval()) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + def testPaddedBatchAndDropRemainder(self): els = [] for length in [3, 6, 9, 4, 12, 10, 2]: @@ -474,6 +542,16 @@ class BatchDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) + def testPaddedBatchAndDropRemainderSparseError(self): + + def _map_fn(i): + return sparse_tensor.SparseTensor( + indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i + + with self.assertRaises(TypeError): + _ = dataset_ops.Dataset.range(10).map(_map_fn).apply( + batching.padded_batch_and_drop_remainder(5)) + def testBatchAndDropRemainderShapeInference(self): components = (array_ops.placeholder(dtypes.int32), (array_ops.placeholder(dtypes.int32, shape=[None]), @@ -499,16 +577,6 @@ class BatchDatasetTest(test.TestCase): self.assertEqual([None], dataset.output_shapes[1][0].as_list()) self.assertEqual([None, 30], dataset.output_shapes[1][1].as_list()) - def testBatchAndDropRemainderSparseError(self): - - def _map_fn(i): - return sparse_tensor.SparseTensor( - indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i - - with self.assertRaises(TypeError): - _ = dataset_ops.Dataset.range(10).map(_map_fn).apply( - batching.batch_and_drop_remainder(10)) - def testBatchAndMapDataset(self): """Test a dataset that maps a TF function across its input elements.""" # The pipeline is TensorSliceDataset -> @@ -572,6 +640,30 @@ class BatchDatasetTest(test.TestCase): with self.assertRaises(errors.InvalidArgumentError): sess.run(init_op, feed_dict={count: 14, batch_size: 0}) + def testMapAndBatchSparse(self): + + def _sparse(i): + return sparse_tensor.SparseTensor( + indices=[[0]], values=(i * [1]), dense_shape=[1]) + + iterator = dataset_ops.Dataset.range(10).apply( + batching.map_and_batch(_sparse, 5)).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + for i in range(2): + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]], + values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4], + dense_shape=[5, 1]) + self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) + self.assertSparseValuesEqual(actual, expected.eval()) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + def testBatchAndMapDatasetFails(self): """Test a dataset that maps a TF function across its input elements.""" dataset = dataset_ops.Dataset.from_tensors( diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py index 1ac059b374..63782d229e 100644 --- a/tensorflow/contrib/data/python/ops/batching.py +++ b/tensorflow/contrib/data/python/ops/batching.py @@ -353,10 +353,6 @@ class _MapAndBatchDataset(dataset_ops.MapDataset): def __init__(self, input_dataset, map_func, batch_size, num_parallel_batches): """See `Dataset.map()` for details.""" super(_MapAndBatchDataset, self).__init__(input_dataset, map_func) - if sparse.any_sparse(self._output_types): - # TODO(b/63669786): support batching of sparse tensors - raise TypeError("Batching of sparse tensors is not currently supported") - self._batch_size = ops.convert_to_tensor( batch_size, dtype=dtypes.int64, name="batch_size") self._num_parallel_batches = ops.convert_to_tensor( @@ -422,9 +418,6 @@ def map_and_batch(map_func, batch_size, num_parallel_batches=1): """ def _apply_fn(dataset): - if sparse.any_sparse(dataset.output_types): - # TODO(b/63669786): support batching of sparse tensors - raise TypeError("Batching of sparse tensors is not currently supported") return _MapAndBatchDataset(dataset, map_func, batch_size, num_parallel_batches) diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt index 97351b2c51..ff612f1fdf 100644 --- a/tensorflow/contrib/makefile/tf_op_files.txt +++ b/tensorflow/contrib/makefile/tf_op_files.txt @@ -78,6 +78,7 @@ tensorflow/core/kernels/reverse_op.cc tensorflow/core/kernels/restore_op.cc tensorflow/core/kernels/resize_nearest_neighbor_op.cc tensorflow/core/kernels/resize_bilinear_op.cc +tensorflow/core/kernels/reshape_util.cc tensorflow/core/kernels/reshape_op.cc tensorflow/core/kernels/relu_op.cc tensorflow/core/kernels/reduction_ops_sum.cc diff --git a/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt b/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt index c86f059eb3..00e96c8a15 100644 --- a/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt @@ -3,17 +3,15 @@ op { in_arg { name: "serialized_sparse" description: < +#include +#include +#include +#include + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_util.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" +#include "tensorflow/core/util/sparse/sparse_tensor.h" + +namespace tensorflow { + +void Reshape(OpKernelContext *context, const Tensor &input_indices_in, + const Tensor &input_shape_in, const Tensor &target_shape_in, + int output_indices_idx, int output_shape_idx) { + OP_REQUIRES(context, TensorShapeUtils::IsMatrix(input_indices_in.shape()), + errors::InvalidArgument( + "Input indices should be a matrix but received shape ", + input_indices_in.shape().DebugString())); + OP_REQUIRES(context, TensorShapeUtils::IsVector(input_shape_in.shape()), + errors::InvalidArgument( + "Input shape should be a vector but received shape ", + input_shape_in.shape().DebugString())); + OP_REQUIRES(context, TensorShapeUtils::IsVector(target_shape_in.shape()), + errors::InvalidArgument( + "Target shape should be a vector but received shape ", + target_shape_in.shape().DebugString())); + + const int64 input_rank = input_shape_in.NumElements(); + const int64 output_rank = target_shape_in.NumElements(); + const TensorShape input_shape(input_shape_in.vec()); + const int64 dense_size = input_shape.num_elements(); + const int64 nnz = input_indices_in.shape().dim_size(0); + + // Compute the output shape. Determine product of specified dimensions, and + // find the index of the unspecified one. + TensorShape output_shape; + int64 product = 1; + int unknown_index = -1; + auto target_shape = target_shape_in.vec(); + for (int d = 0; d < output_rank; ++d) { + const int64 size = target_shape(d); + if (size == -1) { + OP_REQUIRES( + context, unknown_index == -1, + errors::InvalidArgument("only one output dimension may be -1, " + "not both ", + unknown_index, " and ", d)); + unknown_index = d; + output_shape.AddDim(1); + } else { + OP_REQUIRES(context, size >= 0, + errors::InvalidArgument("size ", d, + " must be non-negative, not ", size)); + product *= size; + output_shape.AddDim(size); + } + } + if (unknown_index != -1) { + OP_REQUIRES( + context, product > 0, + errors::InvalidArgument("reshape cannot infer the missing " + "input size for an empty tensor unless all " + "specified input sizes are non-zero")); + const int64 missing = dense_size / product; + OP_REQUIRES( + context, product * missing == dense_size, + errors::InvalidArgument( + "Input to reshape is a SparseTensor with ", dense_size, + " dense values, but the requested shape requires a multiple of ", + product)); + output_shape.set_dim(unknown_index, missing); + } + + OP_REQUIRES( + context, output_shape.num_elements() == dense_size, + errors::InvalidArgument("Input to reshape is a tensor with ", dense_size, + " dense values, but the requested shape has ", + output_shape.num_elements())); + + // Optimize for reshaping to the same shape. + if (input_shape == output_shape) { + context->set_output(output_indices_idx, input_indices_in); + context->set_output(output_shape_idx, input_shape_in); + return; + } + + gtl::InlinedVector input_strides(input_rank); + input_strides[input_rank - 1] = 1; + for (int d = input_rank - 2; d >= 0; --d) { + input_strides[d] = input_strides[d + 1] * input_shape.dim_size(d + 1); + } + + gtl::InlinedVector output_strides(output_rank); + output_strides[output_rank - 1] = 1; + for (int d = output_rank - 2; d >= 0; --d) { + output_strides[d] = output_strides[d + 1] * output_shape.dim_size(d + 1); + } + + Tensor *result_indices = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(output_indices_idx, + TensorShape({nnz, output_rank}), + &result_indices)); + auto input_ind = input_indices_in.matrix(); + auto output_ind = result_indices->matrix(); + for (int i = 0; i < nnz; ++i) { + int64 id = 0; + for (int j = 0; j < input_rank; ++j) { + id += input_ind(i, j) * input_strides[j]; + } + for (int j = 0; j < output_rank; ++j) { + output_ind(i, j) = id / output_strides[j]; + id %= output_strides[j]; + } + } + + Tensor *result_shape = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(output_shape_idx, + TensorShape({output_rank}), + &result_shape)); + auto output_shape_vec = result_shape->vec(); + for (int j = 0; j < output_shape.dims(); ++j) { + output_shape_vec(j) = output_shape.dim_size(j); + } +} + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/reshape_util.h b/tensorflow/core/kernels/reshape_util.h new file mode 100644 index 0000000000..ed583afd13 --- /dev/null +++ b/tensorflow/core/kernels/reshape_util.h @@ -0,0 +1,31 @@ + +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_RESHAPE_UTIL_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_RESHAPE_UTIL_H_ + +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +// Reshapes the input indices and input shape to the target shape. +void Reshape(OpKernelContext *context, const Tensor &input_indices_in, + const Tensor &input_shape_in, const Tensor &target_shape_in, + int output_indices_idx, int output_shape_idx); + +} // namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_RESHAPE_UTIL_H_ diff --git a/tensorflow/core/kernels/serialize_sparse_op.cc b/tensorflow/core/kernels/serialize_sparse_op.cc index ac58c3d1ea..161c505e84 100644 --- a/tensorflow/core/kernels/serialize_sparse_op.cc +++ b/tensorflow/core/kernels/serialize_sparse_op.cc @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/framework/tensor_util.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/reshape_util.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" #include "tensorflow/core/util/sparse/sparse_tensor.h" @@ -215,84 +216,185 @@ class DeserializeSparseOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& serialized_sparse = context->input(0); - OP_REQUIRES(context, TensorShapeUtils::IsVector(serialized_sparse.shape()), + const int ndims = serialized_sparse.shape().dims(); + + OP_REQUIRES( + context, ndims > 0, + errors::InvalidArgument("Serialized sparse should have non-zero rank ", + serialized_sparse.shape().DebugString())); + + OP_REQUIRES(context, serialized_sparse.shape().dim_size(ndims - 1) == 3, errors::InvalidArgument( - "Serialized sparse should be a vector but received shape ", + "Serialized sparse should have 3 as the last dimension ", serialized_sparse.shape().DebugString())); + + int num_sparse_tensors = 1; + for (int i = 0; i < ndims - 1; ++i) { + num_sparse_tensors *= serialized_sparse.shape().dim_size(i); + } + OP_REQUIRES( - context, serialized_sparse.shape().dim_size(0) == 3, + context, num_sparse_tensors > 0, errors::InvalidArgument( - "Serialize sparse should have 3 columns but received shape ", + "Serialized sparse should have at least 1 serialized tensor, " + "but has a zero dimension ", serialized_sparse.shape().DebugString())); - Tensor output_indices(DT_INT64); - Tensor output_values(DataTypeToEnum::value); - Tensor output_shape(DT_INT64); - TensorProto proto_indices; - TensorProto proto_values; - TensorProto proto_shape; + std::vector indices; + std::vector values; + TensorShape shape; + indices.reserve(num_sparse_tensors); + values.reserve(num_sparse_tensors); - const auto& serialized_sparse_t = serialized_sparse.vec(); + const auto& serialized_sparse_t = + serialized_sparse.flat_inner_dims(); - OP_REQUIRES( - context, ParseProtoUnlimited(&proto_indices, serialized_sparse_t(0)), - errors::InvalidArgument("Could not parse serialized_sparse[0]")); - OP_REQUIRES( - context, ParseProtoUnlimited(&proto_values, serialized_sparse_t(1)), - errors::InvalidArgument("Could not parse serialized_sparse[1]")); - OP_REQUIRES( - context, ParseProtoUnlimited(&proto_shape, serialized_sparse_t(2)), - errors::InvalidArgument("Could not parse serialized_sparse[2]")); + for (int i = 0; i < num_sparse_tensors; ++i) { + Tensor output_indices(DT_INT64); + Tensor output_values(DataTypeToEnum::value); + Tensor output_shape(DT_INT64); + TensorProto proto_indices; + TensorProto proto_values; + TensorProto proto_shape; - OP_REQUIRES( - context, output_indices.FromProto(proto_indices), - errors::InvalidArgument( - "Could not construct Tensor serialized_sparse[0] (indices)")); - OP_REQUIRES( - context, TensorShapeUtils::IsMatrix(output_indices.shape()), - errors::InvalidArgument("Expected serialized_sparse[0] to represent an " - "index matrix but received shape ", - output_indices.shape().DebugString())); - OP_REQUIRES( - context, output_values.FromProto(proto_values), - errors::InvalidArgument( - "Could not construct Tensor serialized_sparse[1] (values)")); - OP_REQUIRES( - context, TensorShapeUtils::IsVector(output_values.shape()), - errors::InvalidArgument("Expected serialized_sparse[1] to represent a " - "values vector but received shape ", - output_values.shape().DebugString())); - OP_REQUIRES(context, output_shape.FromProto(proto_shape), - errors::InvalidArgument( - "Could not construct Tensor serialized_sparse[2] (shape)")); - OP_REQUIRES(context, TensorShapeUtils::IsVector(output_shape.shape()), - errors::InvalidArgument("Expected serialized_sparse[2] to be a " - "shape vector but its shape is ", - output_shape.shape().DebugString())); + OP_REQUIRES( + context, + ParseProtoUnlimited(&proto_indices, serialized_sparse_t(i, 0)), + errors::InvalidArgument("Could not parse serialized_sparse[", i, + ", 0]")); + OP_REQUIRES(context, + ParseProtoUnlimited(&proto_values, serialized_sparse_t(i, 1)), + errors::InvalidArgument("Could not parse serialized_sparse[", + i, ", 1]")); + OP_REQUIRES(context, + ParseProtoUnlimited(&proto_shape, serialized_sparse_t(i, 2)), + errors::InvalidArgument("Could not parse serialized_sparse[", + i, ", 2]")); - OP_REQUIRES( - context, DataTypeToEnum::value == output_values.dtype(), - errors::InvalidArgument("Requested SparseTensor of type ", - DataTypeString(DataTypeToEnum::value), - " but SparseTensor.values.dtype() == ", - DataTypeString(output_values.dtype()))); - - int64 num_entries = output_indices.dim_size(0); - OP_REQUIRES(context, num_entries == output_values.dim_size(0), - errors::InvalidArgument( - "Expected row counts of SparseTensor.indices and " - "SparseTensor.values to match but they do not: ", - num_entries, " vs. ", output_values.dim_size(0))); - int rank = output_indices.dim_size(1); - OP_REQUIRES(context, rank == output_shape.dim_size(0), - errors::InvalidArgument( - "Expected column counts of SparseTensor.indices to match " - "size of SparseTensor.shape but they do not: ", - rank, " vs. ", output_shape.dim_size(0))); + OP_REQUIRES(context, output_indices.FromProto(proto_indices), + errors::InvalidArgument( + "Could not construct Tensor serialized_sparse[", i, + ", 0] (indices)")); + OP_REQUIRES(context, TensorShapeUtils::IsMatrix(output_indices.shape()), + errors::InvalidArgument( + "Expected serialized_sparse[", i, + ", 0] to represent an index matrix but received shape ", + output_indices.shape().DebugString())); + OP_REQUIRES(context, output_values.FromProto(proto_values), + errors::InvalidArgument( + "Could not construct Tensor serialized_sparse[", i, + ", 1] (values)")); + OP_REQUIRES(context, TensorShapeUtils::IsVector(output_values.shape()), + errors::InvalidArgument( + "Expected serialized_sparse[", i, + ", 1] to represent a values vector but received shape ", + output_values.shape().DebugString())); + OP_REQUIRES(context, output_shape.FromProto(proto_shape), + errors::InvalidArgument( + "Could not construct Tensor serialized_sparse[", i, + ", 2] (shape)")); + OP_REQUIRES( + context, TensorShapeUtils::IsVector(output_shape.shape()), + errors::InvalidArgument("Expected serialized_sparse[", i, + ", 1] to be a shape vector but its shape is ", + output_shape.shape().DebugString())); + + OP_REQUIRES( + context, DataTypeToEnum::value == output_values.dtype(), + errors::InvalidArgument( + "Requested SparseTensor of type ", + DataTypeString(DataTypeToEnum::value), " but SparseTensor[", i, + "].values.dtype() == ", DataTypeString(output_values.dtype()))); + + int64 num_entries = output_indices.dim_size(0); + OP_REQUIRES(context, num_entries == output_values.dim_size(0), + errors::InvalidArgument( + "Expected row counts of SparseTensor[", i, + "].indices and SparseTensor[", i, + "].values to match but they do not: ", num_entries, + " vs. ", output_values.dim_size(0))); + int rank = output_indices.dim_size(1); + OP_REQUIRES( + context, rank == output_shape.dim_size(0), + errors::InvalidArgument("Expected column counts of SparseTensor[", i, + "].indices to match size of SparseTensor[", i, + "].shape but they do not: ", rank, " vs. ", + output_shape.dim_size(0))); + + // Now we expand each SparseTensors' indices and shape by + // prefixing a dimension + Tensor expanded_indices(DT_INT64, TensorShape({num_entries, 1 + rank})); + const auto& output_indices_t = output_indices.matrix(); + auto expanded_indices_t = expanded_indices.matrix(); + expanded_indices_t.chip<1>(0).setZero(); + Eigen::DSizes indices_start(0, 1); + Eigen::DSizes indices_sizes(num_entries, rank); + expanded_indices_t.slice(indices_start, indices_sizes) = output_indices_t; + + Tensor expanded_shape(DT_INT64, TensorShape({1 + rank})); + const auto& output_shape_t = output_shape.vec(); + auto expanded_shape_t = expanded_shape.vec(); + expanded_shape_t(0) = 1; + std::copy_n(&output_shape_t(0), rank, &expanded_shape_t(1)); + + TensorShape expanded_tensor_shape(expanded_shape.vec()); - context->set_output(0, output_indices); - context->set_output(1, output_values); - context->set_output(2, output_shape); + indices.push_back(expanded_indices); + values.push_back(output_values); + if (i == 0) { + shape = expanded_tensor_shape; + } else { + OP_REQUIRES( + context, shape.dims() == expanded_tensor_shape.dims(), + errors::InvalidArgument( + "Inconsistent shape across SparseTensors: rank prior to " + "SparseTensor[", + i, "] was: ", shape.dims() - 1, " but rank of SparseTensor[", i, + "] is: ", expanded_tensor_shape.dims() - 1)); + for (int j = 1; j < shape.dims(); ++j) { + OP_REQUIRES( + context, shape.dim_size(j) == expanded_tensor_shape.dim_size(j), + errors::InvalidArgument( + "Inconsistent shape across SparseTensors: dimension ", j - 1, + " prior to SparseTensor[", i, "] was: ", shape.dim_size(j), + " but rank of SparseTensor[", i, + "] is: ", expanded_tensor_shape.dim_size(j))); + } + } + } + + // Dimension 0 is the primary dimension. + int rank = shape.dims(); + gtl::InlinedVector std_order(rank); + std::iota(std_order.begin(), std_order.end(), 0); + + std::vector tensors; + tensors.reserve(num_sparse_tensors); + for (int i = 0; i < num_sparse_tensors; ++i) { + tensors.emplace_back(indices[i], values[i], shape, std_order); + } + + SparseTensor output = SparseTensor::Concat(tensors); + + // Compute the input shape for the reshape operation. + Tensor input_shape(DT_INT64, TensorShape({output.dims()})); + std::copy_n(output.shape().data(), output.dims(), + input_shape.vec().data()); + + // Compute the target shape for the reshape operation. + Tensor target_shape(DT_INT64, TensorShape({ndims + output.dims() - 2})); + for (int i = 0; i < ndims - 1; ++i) { + target_shape.vec()(i) = serialized_sparse.shape().dim_size(i); + } + for (int i = 0; i < output.dims() - 1; ++i) { + target_shape.vec()(i + ndims - 1) = output.shape().data()[i + 1]; + } + + Tensor output_indices; + Tensor output_shape; + Reshape(context, output.indices(), input_shape, target_shape, + 0 /* output indices index */, 2 /* output shape index */); + context->set_output(1, output.values()); } }; @@ -320,7 +422,7 @@ class DeserializeManySparseOp : public OpKernel { OP_REQUIRES( context, serialized_sparse.shape().dim_size(1) == 3, errors::InvalidArgument( - "Serialize sparse should have 3 columns but received shape ", + "Serialized sparse should have 3 columns but received shape ", serialized_sparse.shape().DebugString())); int num_sparse_tensors = serialized_sparse.shape().dim_size(0); diff --git a/tensorflow/core/kernels/sparse_reshape_op.cc b/tensorflow/core/kernels/sparse_reshape_op.cc index f0f353871d..939d404aa4 100644 --- a/tensorflow/core/kernels/sparse_reshape_op.cc +++ b/tensorflow/core/kernels/sparse_reshape_op.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/reshape_util.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" namespace tensorflow { @@ -33,124 +34,10 @@ class SparseReshapeOp : public OpKernel { explicit SparseReshapeOp(OpKernelConstruction* context) : OpKernel(context) {} void Compute(OpKernelContext* context) override { - const Tensor& input_ind_in = context->input(0); - OP_REQUIRES(context, TensorShapeUtils::IsMatrix(input_ind_in.shape()), - errors::InvalidArgument( - "Input indices should be a matrix but received shape ", - input_ind_in.shape().DebugString())); - - const Tensor& input_shape_in = context->input(1); - OP_REQUIRES(context, TensorShapeUtils::IsVector(input_shape_in.shape()), - errors::InvalidArgument( - "Input shape should be a vector but received shape ", - input_shape_in.shape().DebugString())); - - const Tensor& new_shape_in = context->input(2); - OP_REQUIRES(context, TensorShapeUtils::IsVector(new_shape_in.shape()), - errors::InvalidArgument( - "New shape should be a vector but received shape ", - new_shape_in.shape().DebugString())); - - const int64 input_rank = input_shape_in.NumElements(); - const int64 output_rank = new_shape_in.NumElements(); - - const TensorShape input_shape(input_shape_in.vec()); - const int64 dense_size = input_shape.num_elements(); - - const int64 nnz = input_ind_in.shape().dim_size(0); - - // Compute the output shape. Determine product of specified - // dimensions, and find the index of the unspecified one. Largely the - // same calculation as reshape_op - TensorShape output_shape; - int64 product = 1; - int unknown_index = -1; - auto new_shape = new_shape_in.vec(); - for (int d = 0; d < output_rank; ++d) { - const int64 size = new_shape(d); - if (size == -1) { - OP_REQUIRES( - context, unknown_index == -1, - errors::InvalidArgument("only one output shape size may be -1, " - "not both ", - unknown_index, " and ", d)); - unknown_index = d; - output_shape.AddDim(1); - } else { - OP_REQUIRES(context, size >= 0, - errors::InvalidArgument( - "size ", d, " must be non-negative, not ", size)); - output_shape.AddDim(size); - product *= size; - } - } - if (unknown_index != -1) { - OP_REQUIRES( - context, product > 0, - errors::InvalidArgument("SparseReshape cannot infer the missing " - "input size for an empty tensor unless all " - "specified input sizes are non-zero")); - const int64 missing = dense_size / product; - OP_REQUIRES( - context, product * missing == dense_size, - errors::InvalidArgument( - "Input to reshape is a SparseTensor with ", dense_size, - " dense values, but the requested shape requires a multiple of ", - product)); - output_shape.set_dim(unknown_index, missing); - } - - OP_REQUIRES(context, output_shape.num_elements() == dense_size, - errors::InvalidArgument("Input to reshape is a tensor with ", - dense_size, - " dense values, but the " - "requested shape has ", - output_shape.num_elements())); - - // Optimize for reshaping to the same shape. - if (input_shape == output_shape) { - context->set_output(0, input_ind_in); - context->set_output(1, input_shape_in); - return; - } - - gtl::InlinedVector input_strides(input_rank); - input_strides[input_rank - 1] = 1; - for (int d = input_rank - 2; d >= 0; --d) { - input_strides[d] = input_strides[d + 1] * input_shape.dim_size(d + 1); - } - - gtl::InlinedVector output_strides(output_rank); - output_strides[output_rank - 1] = 1; - for (int d = output_rank - 2; d >= 0; --d) { - output_strides[d] = output_strides[d + 1] * output_shape.dim_size(d + 1); - } - - Tensor* output_ind_out = nullptr; - OP_REQUIRES_OK(context, - context->allocate_output(0, TensorShape({nnz, output_rank}), - &output_ind_out)); - auto input_ind = input_ind_in.matrix(); - auto output_ind = output_ind_out->matrix(); - for (int i = 0; i < nnz; ++i) { - int64 id = 0; - for (int j = 0; j < input_rank; ++j) { - id += input_ind(i, j) * input_strides[j]; - } - for (int j = 0; j < output_rank; ++j) { - output_ind(i, j) = id / output_strides[j]; - id %= output_strides[j]; - } - } - - Tensor* output_shape_out = nullptr; - OP_REQUIRES_OK(context, - context->allocate_output(1, TensorShape({output_rank}), - &output_shape_out)); - auto output_shape_vec = output_shape_out->vec(); - for (int j = 0; j < output_shape.dims(); ++j) { - output_shape_vec(j) = output_shape.dim_size(j); - } + Tensor output_indices; + Tensor output_shape; + Reshape(context, context->input(0), context->input(1), context->input(2), + 0 /* output indices index */, 1 /* output shape index */); } }; diff --git a/tensorflow/core/ops/sparse_ops.cc b/tensorflow/core/ops/sparse_ops.cc index 8b6106f2a4..8414519f0b 100644 --- a/tensorflow/core/ops/sparse_ops.cc +++ b/tensorflow/core/ops/sparse_ops.cc @@ -244,13 +244,9 @@ REGISTER_OP("DeserializeSparse") .Output("sparse_values: dtype") .Output("sparse_shape: int64") .SetShapeFn([](InferenceContext* c) { - // serialized sparse is [3] vector. - ShapeHandle serialized_sparse; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &serialized_sparse)); + // serialized sparse is [?, ..., ?, 3] vector. DimensionHandle unused; - TF_RETURN_IF_ERROR( - c->WithValue(c->Dim(serialized_sparse, 0), 3, &unused)); - + TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), -1), 3, &unused)); c->set_output(0, c->Matrix(InferenceContext::kUnknownDim, InferenceContext::kUnknownDim)); c->set_output(1, c->Vector(InferenceContext::kUnknownDim)); @@ -258,11 +254,11 @@ REGISTER_OP("DeserializeSparse") return Status::OK(); }) .Doc(R"doc( -Deserialize `SparseTensor` from a (serialized) string 3-vector (1-D `Tensor`) -object. +Deserialize `SparseTensor` objects. -serialized_sparse: 1-D, The serialized `SparseTensor` object. Must have 3 columns. -dtype: The `dtype` of the serialized `SparseTensor` object. +serialized_sparse: The serialized `SparseTensor` objects. The last dimension + must have 3 columns. +dtype: The `dtype` of the serialized `SparseTensor` objects. )doc"); REGISTER_OP("DeserializeManySparse") diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index d6efb7fa9a..00ac3334b0 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -1332,9 +1332,6 @@ class BatchDataset(Dataset): def __init__(self, input_dataset, batch_size): """See `Dataset.batch()` for details.""" super(BatchDataset, self).__init__() - if sparse.any_sparse(input_dataset.output_classes): - # TODO(b/63669786): support batching of sparse tensors - raise TypeError("Batching of sparse tensors is not currently supported") self._input_dataset = input_dataset self._batch_size = ops.convert_to_tensor( batch_size, dtype=dtypes.int64, name="batch_size") @@ -1412,7 +1409,8 @@ class PaddedBatchDataset(Dataset): super(PaddedBatchDataset, self).__init__() if sparse.any_sparse(input_dataset.output_classes): # TODO(b/63669786): support batching of sparse tensors - raise TypeError("Batching of sparse tensors is not currently supported") + raise TypeError( + "Batching of padded sparse tensors is not currently supported") self._input_dataset = input_dataset self._batch_size = ops.convert_to_tensor( batch_size, dtype=dtypes.int64, name="batch_size") diff --git a/tensorflow/python/data/util/sparse.py b/tensorflow/python/data/util/sparse.py index 4d25f6a963..b4219198d3 100644 --- a/tensorflow/python/data/util/sparse.py +++ b/tensorflow/python/data/util/sparse.py @@ -88,7 +88,6 @@ def deserialize_sparse_tensors(tensors, types, shapes, classes): `tensors` with any serialized sparse tensors replaced by their deserialized version. """ - # TODO(b/63669786): support batching of sparse tensors ret = nest.pack_sequence_as(types, [ sparse_ops.deserialize_sparse(tensor, dtype=ty, rank=shape.ndims) if c is sparse_tensor.SparseTensor else tensor diff --git a/tensorflow/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/python/kernel_tests/batch_dataset_op_test.py index 236c5bc4ff..513dfb1ec3 100644 --- a/tensorflow/python/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/python/kernel_tests/batch_dataset_op_test.py @@ -101,13 +101,58 @@ class BatchDatasetTest(test.TestCase): with self.assertRaises(errors.InvalidArgumentError): sess.run(init_op, feed_dict={count: 14, batch_size: 0}) - def testBatchSparseError(self): - def _map_fn(i): + def assertSparseValuesEqual(self, a, b): + self.assertAllEqual(a.indices, b.indices) + self.assertAllEqual(a.values, b.values) + self.assertAllEqual(a.dense_shape, b.dense_shape) + + def testBatchSparse(self): + + def _sparse(i): return sparse_tensor.SparseTensor( - indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i + indices=[[0]], values=(i * [1]), dense_shape=[1]) - with self.assertRaises(TypeError): - _ = dataset_ops.Dataset.range(10).map(_map_fn).batch(10) + iterator = dataset_ops.Dataset.range(10).map(_sparse).batch( + 5).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + for i in range(2): + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]], + values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4], + dense_shape=[5, 1]) + self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) + self.assertSparseValuesEqual(actual, expected.eval()) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testNestedBatchSparse(self): + + def _sparse(i): + return sparse_tensor.SparseTensor( + indices=[[0]], values=(i * [1]), dense_shape=[1]) + + iterator = dataset_ops.Dataset.range(10).map(_sparse).batch(5).batch( + 2).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensor( + indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], [0, 4, 0], + [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0], [1, 4, 0]], + values=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + dense_shape=[2, 5, 1]) + self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) + self.assertSparseValuesEqual(actual, expected.eval()) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) def testPaddedBatchDataset(self): seq_lens = array_ops.placeholder(dtypes.int32, shape=[None]) diff --git a/tensorflow/python/kernel_tests/sparse_reshape_op_test.py b/tensorflow/python/kernel_tests/sparse_reshape_op_test.py index e87fa0c94c..0d2887f3ce 100644 --- a/tensorflow/python/kernel_tests/sparse_reshape_op_test.py +++ b/tensorflow/python/kernel_tests/sparse_reshape_op_test.py @@ -196,7 +196,7 @@ class SparseReshapeTest(test.TestCase): sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = sparse_ops.sparse_reshape(sp_input, [4, -1, -1]) - with self.assertRaisesOpError("only one output shape size may be -1"): + with self.assertRaisesOpError("only one output dimension may be -1"): sess.run(sp_output, {sp_input: input_val}) def testProvideStaticallyMismatchedSizes(self): diff --git a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py index af395b31bf..d0d6cc4c0f 100644 --- a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py +++ b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py @@ -64,6 +64,87 @@ class SerializeSparseTest(test.TestCase): shape = np.array([3, 4, 5]).astype(np.int64) return sparse_tensor_lib.SparseTensorValue(ind, val, shape) + def testSerializeDeserialize(self): + with self.test_session(use_gpu=False) as sess: + sp_input = self._SparseTensorValue_5x6(np.arange(6)) + serialized = sparse_ops.serialize_sparse(sp_input) + sp_deserialized = sparse_ops.deserialize_sparse( + serialized, dtype=dtypes.int32) + + indices, values, shape = sess.run(sp_deserialized) + + self.assertAllEqual(indices, sp_input[0]) + self.assertAllEqual(values, sp_input[1]) + self.assertAllEqual(shape, sp_input[2]) + + def testSerializeDeserializeBatch(self): + with self.test_session(use_gpu=False) as sess: + sp_input = self._SparseTensorValue_5x6(np.arange(6)) + serialized = sparse_ops.serialize_sparse(sp_input) + serialized = array_ops.stack([serialized, serialized]) + + sp_deserialized = sparse_ops.deserialize_sparse( + serialized, dtype=dtypes.int32) + + combined_indices, combined_values, combined_shape = sess.run( + sp_deserialized) + + self.assertAllEqual(combined_indices[:6, 0], [0] * 6) # minibatch 0 + self.assertAllEqual(combined_indices[:6, 1:], sp_input[0]) + self.assertAllEqual(combined_indices[6:, 0], [1] * 6) # minibatch 1 + self.assertAllEqual(combined_indices[6:, 1:], sp_input[0]) + self.assertAllEqual(combined_values[:6], sp_input[1]) + self.assertAllEqual(combined_values[6:], sp_input[1]) + self.assertAllEqual(combined_shape, [2, 5, 6]) + + def testSerializeDeserializeBatchInconsistentShape(self): + with self.test_session(use_gpu=False) as sess: + sp_input0 = self._SparseTensorValue_5x6(np.arange(6)) + sp_input1 = self._SparseTensorValue_3x4(np.arange(6)) + serialized0 = sparse_ops.serialize_sparse(sp_input0) + serialized1 = sparse_ops.serialize_sparse(sp_input1) + serialized = array_ops.stack([serialized0, serialized1]) + + sp_deserialized = sparse_ops.deserialize_sparse( + serialized, dtype=dtypes.int32) + + with self.assertRaisesOpError( + r"Inconsistent shape across SparseTensors: dimension 0 prior to " + r"SparseTensor\[1\] was: 5 but rank of SparseTensor\[1\] is: 3"): + sess.run(sp_deserialized) + + def testSerializeDeserializeNestedBatch(self): + with self.test_session(use_gpu=False) as sess: + sp_input = self._SparseTensorValue_5x6(np.arange(6)) + serialized = sparse_ops.serialize_sparse(sp_input) + serialized = array_ops.stack([serialized, serialized]) + serialized = array_ops.stack([serialized, serialized]) + + sp_deserialized = sparse_ops.deserialize_sparse( + serialized, dtype=dtypes.int32) + + combined_indices, combined_values, combined_shape = sess.run( + sp_deserialized) + + # minibatch 0 + self.assertAllEqual(combined_indices[:6, :2], [[0, 0]] * 6) + self.assertAllEqual(combined_indices[:6, 2:], sp_input[0]) + self.assertAllEqual(combined_values[:6], sp_input[1]) + # minibatch 1 + self.assertAllEqual(combined_indices[6:12, :2], [[0, 1]] * 6) + self.assertAllEqual(combined_indices[6:12, 2:], sp_input[0]) + self.assertAllEqual(combined_values[6:12], sp_input[1]) + # minibatch 2 + self.assertAllEqual(combined_indices[12:18, :2], [[1, 0]] * 6) + self.assertAllEqual(combined_indices[12:18, 2:], sp_input[0]) + self.assertAllEqual(combined_values[12:18], sp_input[1]) + # minibatch 3 + self.assertAllEqual(combined_indices[18:, :2], [[1, 1]] * 6) + self.assertAllEqual(combined_indices[18:, 2:], sp_input[0]) + self.assertAllEqual(combined_values[18:], sp_input[1]) + + self.assertAllEqual(combined_shape, [2, 2, 5, 6]) + def testSerializeDeserializeMany(self): with self.test_session(use_gpu=False) as sess: sp_input0 = self._SparseTensorValue_5x6(np.arange(6)) diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index 3d6f942dca..cdfe9e1c1e 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -1435,17 +1435,22 @@ def serialize_many_sparse(sp_input, name=None): def deserialize_sparse(serialized_sparse, dtype, rank=None, name=None): - """Deserialize `SparseTensor` from a string 3-vector (1-D `Tensor`) object. + """Deserialize `SparseTensor` objects. + + The input is expected to have shape [d_1, ..., d_m, 3], where the last + dimension stores a serialized `SparseTensor`. The method deserializes + all input `SparseTensor`s, concatenates them into a single tensor, and + reshapes the sparse tensor to preserve the structure of the input. Args: - serialized_sparse: 1-D, The serialized `SparseTensor` object. - Must have 3 columns. - dtype: The `dtype` of the serialized `SparseTensor` object. - rank: (optional) Python int, the rank of the `SparseTensor` object. + serialized_sparse: The serialized `SparseTensor` objects. + The last dimension must have 3 columns. + dtype: The `dtype` of the serialized `SparseTensor` objects. + rank: (optional) Python int, the rank of the `SparseTensor` objects. name: A name prefix for the returned tensors (optional). Returns: - A `SparseTensor` representing the deserialized `SparseTensor` object. + A `SparseTensor` representing the deserialized `SparseTensor` objects. """ output_indices, output_values, output_shape = ( -- GitLab From 20e2fdc2f95f213eef5a736a140d8591ef7a5b6e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Nov 2017 17:54:51 -0800 Subject: [PATCH 0690/1801] Update the Boston example to include the feature importance and a custom export format for the model. PiperOrigin-RevId: 176445066 --- .../contrib/boosted_trees/examples/boston.py | 38 +++++++++++++------ 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/examples/boston.py b/tensorflow/contrib/boosted_trees/examples/boston.py index 2c0a3c4912..e9dbdb0fd7 100644 --- a/tensorflow/contrib/boosted_trees/examples/boston.py +++ b/tensorflow/contrib/boosted_trees/examples/boston.py @@ -22,7 +22,7 @@ r"""Demonstrates a regression on Boston housing data. python tensorflow/contrib/boosted_trees/examples/boston.py \ --batch_size=404 --output_dir="/tmp/boston" --depth=4 --learning_rate=0.1 \ - --num_eval_steps=1 --num_trees=500 --l2=4 \ + --num_eval_steps=1 --num_trees=500 --l2=0.001 \ --vmodule=training_ops=1 When training is done, mean squared error on eval data is reported. @@ -37,8 +37,10 @@ from __future__ import division from __future__ import print_function import argparse +import os import sys import tensorflow as tf +from tensorflow.contrib.boosted_trees.estimator_batch import custom_export_strategy from tensorflow.contrib.boosted_trees.estimator_batch.estimator import GradientBoostedDecisionTreeRegressor from tensorflow.contrib.boosted_trees.proto import learner_pb2 from tensorflow.contrib.layers.python.layers import feature_column @@ -51,22 +53,18 @@ _BOSTON_NUM_FEATURES = 13 def _get_tfbt(output_dir, feature_cols): """Configures TF Boosted Trees estimator based on flags.""" learner_config = learner_pb2.LearnerConfig() - learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate learner_config.regularization.l1 = 0.0 - # Set the regularization per instance in such a way that - # regularization for the full training data is equal to l2 flag. - learner_config.regularization.l2 = FLAGS.l2 / FLAGS.batch_size + learner_config.regularization.l2 = FLAGS.l2 learner_config.constraints.max_tree_depth = FLAGS.depth - learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300) # Create a TF Boosted trees regression estimator. estimator = GradientBoostedDecisionTreeRegressor( learner_config=learner_config, - # For the WHOLE_TREE strategy, set the examples_per_layer to be equal to - # batch size. + # This should be the number of examples. For large datasets it can be + # larger than the batch_size. examples_per_layer=FLAGS.batch_size, feature_columns=feature_cols, label_dimension=1, @@ -77,6 +75,14 @@ def _get_tfbt(output_dir, feature_cols): return estimator +def _convert_fn(dtec, sorted_feature_names, num_dense, num_sparse_float, + num_sparse_int, export_dir, unused_eval_result): + universal_format = custom_export_strategy.convert_to_universal_format( + dtec, sorted_feature_names, num_dense, num_sparse_float, num_sparse_int) + with tf.gfile.GFile(os.path.join(export_dir, "tree_proto"), "w") as f: + f.write(str(universal_format)) + + def _make_experiment_fn(output_dir): """Creates experiment for gradient boosted decision trees.""" (x_train, y_train), (x_test, @@ -88,21 +94,31 @@ def _make_experiment_fn(output_dir): batch_size=FLAGS.batch_size, num_epochs=None, shuffle=True) - eval_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": x_test}, y=y_test, num_epochs=1, shuffle=False) feature_columns = [ feature_column.real_valued_column("x", dimension=_BOSTON_NUM_FEATURES) ] - + feature_spec = tf.contrib.layers.create_feature_spec_for_parsing( + feature_columns) + serving_input_fn = tf.contrib.learn.utils.build_parsing_serving_input_fn( + feature_spec) + # An export strategy that outputs the feature importance and also exports + # the internal tree representation in another format. + export_strategy = custom_export_strategy.make_custom_export_strategy( + "exports", + convert_fn=_convert_fn, + feature_columns=feature_columns, + export_input_fn=serving_input_fn) return tf.contrib.learn.Experiment( estimator=_get_tfbt(output_dir, feature_columns), train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, train_steps=None, eval_steps=FLAGS.num_eval_steps, - eval_metrics=None) + eval_metrics=None, + export_strategies=[export_strategy]) def main(unused_argv): -- GitLab From cf57817f554fa7bfe7c134453ef9cf4374aef23d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Nov 2017 17:56:28 -0800 Subject: [PATCH 0691/1801] Automated g4 rollback of changelist 175593063 PiperOrigin-RevId: 176445215 --- tensorflow/cc/framework/cc_op_gen.cc | 119 +++++++++++++++--- tensorflow/core/common_runtime/device_mgr.h | 2 +- .../core/common_runtime/direct_session.cc | 2 +- .../core/common_runtime/direct_session.h | 3 +- tensorflow/core/common_runtime/placer.cc | 4 +- .../common_runtime/step_stats_collector.cc | 4 +- .../distributed_runtime/master_session.cc | 4 +- .../core/framework/variant_op_registry.h | 9 +- tensorflow/core/graph/costmodel.h | 2 +- tensorflow/core/graph/graph_constructor.cc | 6 +- tensorflow/core/graph/quantize_training.cc | 4 +- tensorflow/core/graph/subgraph.h | 2 +- .../graph_transforms/fold_constants_lib.cc | 4 +- 13 files changed, 123 insertions(+), 42 deletions(-) diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc index c0b8cc2e41..d889c518f9 100644 --- a/tensorflow/cc/framework/cc_op_gen.cc +++ b/tensorflow/cc/framework/cc_op_gen.cc @@ -297,7 +297,7 @@ string ToCamelCase(const string& str) { // argument to a function. std::pair AttrTypeName(StringPiece attr_type) { static const std::unordered_map, - StringPiece::Hasher> + StringPieceHasher> attr_type_map{ {"string", {"StringPiece", false}}, {"list(string)", {"gtl::ArraySlice", true}}, @@ -325,29 +325,112 @@ std::pair AttrTypeName(StringPiece attr_type) { } bool IsCPPKeyword(StringPiece name) { - static const std::unordered_set + static const std::unordered_set // Keywords obtained from http://en.cppreference.com/w/cpp/keyword kCPPReserved{ - "alignas", "alignof", "and", "and_eq", "asm", "atomic_cancel", - "atomic_commit", "atomic_noexcept", "auto", "bitand", "bitor", "bool", - "break", "case", "catch", "char", "char16_t", "char32_t", "class", - "compl", "concept", "const", "const_cast", "constexpr", "continue", - "decltype", "default", "delete", "do", "double", "dynamic_cast", - "else", "enum", "explicit", "export", "extern", "false", "final", - "float", "for", "friend", "goto", "if", "import", "inline", "int", - "long", "module", "mutable", "namespace", "new", "noexcept", "not", - "not_eq", "nullptr", "operator", "or", "or_eq", "override", "private", - "protected", "public", "register", "reinterpret_cast", "requires", - "return", "short", "signed", "sizeof", "static", "static_assert", - "static_cast", "struct", "switch", "synchronized", "template", "this", - "thread_local", "throw", "true", "try", "typedef", "typeid", - "typename", "union", "unsigned", "using", "virtual", "void", - "volatile", "wchar_t", "while", "xor", "xor_eq", + "alignas", + "alignof", + "and", + "and_eq", + "asm", + "atomic_cancel", + "atomic_commit", + "atomic_noexcept", + "auto", + "bitand", + "bitor", + "bool", + "break", + "case", + "catch", + "char", + "char16_t", + "char32_t", + "class", + "compl", + "concept", + "const", + "const_cast", + "constexpr", + "continue", + "decltype", + "default", + "delete", + "do", + "double", + "dynamic_cast", + "else", + "enum", + "explicit", + "export", + "extern", + "false", + "final", + "float", + "for", + "friend", + "goto", + "if", + "import", + "inline", + "int", + "long", + "module", + "mutable", + "namespace", + "new", + "noexcept", + "not", + "not_eq", + "nullptr", + "operator", + "or", + "or_eq", + "override", + "private", + "protected", + "public", + "register", + "reinterpret_cast", + "requires", + "return", + "short", + "signed", + "sizeof", + "static", + "static_assert", + "static_cast", + "struct", + "switch", + "synchronized", + "template", + "this", + "thread_local", + "throw", + "true", + "try", + "typedef", + "typeid", + "typename", + "union", + "unsigned", + "using", + "virtual", + "void", + "volatile", + "wchar_t", + "while", + "xor", + "xor_eq", // The following are not C++ keywords, but names of local variables // and parameters used in the op constructor. Treating them as // keywords, so that other parameter names don't conflict with these. - "builder", "node", "ret", "scope", "unique_name", + "builder", + "node", + "ret", + "scope", + "unique_name", }; return kCPPReserved.count(name) > 0; } diff --git a/tensorflow/core/common_runtime/device_mgr.h b/tensorflow/core/common_runtime/device_mgr.h index d16681ac59..cd93f76324 100644 --- a/tensorflow/core/common_runtime/device_mgr.h +++ b/tensorflow/core/common_runtime/device_mgr.h @@ -68,7 +68,7 @@ class DeviceMgr { StringPiece CopyToBackingStore(StringPiece s); - std::unordered_map device_map_; + std::unordered_map device_map_; core::Arena name_backing_store_; // Storage for keys in device_map_ std::unordered_map device_type_counts_; diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 6dfe17405c..2d4f2a2d90 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -1135,7 +1135,7 @@ Status DirectSession::GetOrCreateExecutors( if (run_state_args->is_partial_run) { ek->graph = std::move(run_state_args->graph); - std::unordered_set names; + std::unordered_set names; for (const string& input : inputs) { TensorId id(ParseTensorName(input)); names.emplace(id.first); diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h index 7fbabf6d81..780d0b46a8 100644 --- a/tensorflow/core/common_runtime/direct_session.h +++ b/tensorflow/core/common_runtime/direct_session.h @@ -64,8 +64,7 @@ class DirectSession : public Session { ~DirectSession() override; typedef std::vector> NamedTensorList; - typedef std::unordered_map - NameNodeMap; + typedef std::unordered_map NameNodeMap; ::tensorflow::Status Create(const GraphDef& graph) override; ::tensorflow::Status Extend(const GraphDef& graph) override; diff --git a/tensorflow/core/common_runtime/placer.cc b/tensorflow/core/common_runtime/placer.cc index 73fdf60fd5..54f082e823 100644 --- a/tensorflow/core/common_runtime/placer.cc +++ b/tensorflow/core/common_runtime/placer.cc @@ -129,7 +129,7 @@ class ColocationGraph { // 'string' values stored in NodeDef attribute lists, as well as StringPiece // values that refer to 'string' values from NodeDef::name(), without // performing any string allocations. - std::unordered_map + std::unordered_map colocation_group_root; for (Node* node : graph_->nodes()) { @@ -171,7 +171,7 @@ class ColocationGraph { } Status ColocateNodeToGroup( - std::unordered_map* + std::unordered_map* colocation_group_root, Node* node, StringPiece colocation_group) { const Node*& root_node = (*colocation_group_root)[colocation_group]; diff --git a/tensorflow/core/common_runtime/step_stats_collector.cc b/tensorflow/core/common_runtime/step_stats_collector.cc index bfe7a32b1b..d7e01144c9 100644 --- a/tensorflow/core/common_runtime/step_stats_collector.cc +++ b/tensorflow/core/common_runtime/step_stats_collector.cc @@ -150,7 +150,7 @@ void StepStatsCollector::BuildCostModel( const DeviceStepStats* hardware_stats; }; - std::unordered_map + std::unordered_map per_device_stats; std::unordered_map gpu_hardware_stats; @@ -190,7 +190,7 @@ void StepStatsCollector::BuildCostModel( CostModel* cm = cost_model_manager->FindOrCreateCostModel(graph); cm->IncrementUpdateTimes(); - std::unordered_map name_to_node; + std::unordered_map name_to_node; for (Node* n : graph->nodes()) { name_to_node.emplace(n->name(), n); } diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc index b3e499be79..3379302b9b 100644 --- a/tensorflow/core/distributed_runtime/master_session.cc +++ b/tensorflow/core/distributed_runtime/master_session.cc @@ -213,7 +213,7 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { const bool is_partial_; const DebugOptions& debug_opts_; WorkerCacheInterface* const worker_cache_; // Not owned. - std::unordered_map name_to_node_; + std::unordered_map name_to_node_; const bool should_deregister_; // Graph partitioned into per-location subgraphs. @@ -492,7 +492,7 @@ Status MasterSession::ReffedClientGraph::RunPartitions( VLOG(2) << "RunPartitions step_id " << step_id << " execution_count " << execution_count; // Maps the names of fed tensors to their index in `req`. - std::unordered_map feeds(3); + std::unordered_map feeds(3); for (size_t i = 0; i < req.num_feeds(); ++i) { if (!feeds.insert({req.feed_name(i), i}).second) { diff --git a/tensorflow/core/framework/variant_op_registry.h b/tensorflow/core/framework/variant_op_registry.h index 831dbd3dff..13f6908cae 100644 --- a/tensorflow/core/framework/variant_op_registry.h +++ b/tensorflow/core/framework/variant_op_registry.h @@ -145,9 +145,8 @@ class UnaryVariantOpRegistry { static std::unordered_set* PersistentStringStorage(); private: - std::unordered_map - shape_fns; - std::unordered_map + std::unordered_map shape_fns; + std::unordered_map decode_fns; // Map std::pair to function. @@ -159,7 +158,7 @@ class UnaryVariantOpRegistry { ret = Hash64Combine(ret, sp_hasher_(std::get<1>(x))); return ret; } - StringPiece::Hasher sp_hasher_; + StringPieceHasher sp_hasher_; }; std::unordered_map, @@ -177,7 +176,7 @@ class UnaryVariantOpRegistry { ret = Hash64Combine(ret, sp_hasher_(std::get<2>(x))); return ret; } - StringPiece::Hasher sp_hasher_; + StringPieceHasher sp_hasher_; }; std::unordered_map, VariantUnaryOpFn, TupleHash> diff --git a/tensorflow/core/graph/costmodel.h b/tensorflow/core/graph/costmodel.h index a908a4843c..8afa4971ad 100644 --- a/tensorflow/core/graph/costmodel.h +++ b/tensorflow/core/graph/costmodel.h @@ -30,7 +30,7 @@ limitations under the License. #include "tensorflow/core/platform/protobuf.h" namespace tensorflow { -typedef std::unordered_map +typedef std::unordered_map NodeNameToCostIdMap; class StepStats; diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc index e45828b7ba..8890a9fb0f 100644 --- a/tensorflow/core/graph/graph_constructor.cc +++ b/tensorflow/core/graph/graph_constructor.cc @@ -241,13 +241,13 @@ class GraphConstructor { }; // TODO(vrv): Profile this data structure to see if we should use an // alternative implementation of std::unordered_map. - std::unordered_map gdef_nodes_; + std::unordered_map gdef_nodes_; // Mapping from node name to the existing node in g_. - std::unordered_map existing_nodes_; + std::unordered_map existing_nodes_; // Prefixes already used in the graph. - std::unordered_set existing_prefixes_; + std::unordered_set existing_prefixes_; // Imported node names that have been uniquified. The key is the original // name, the value is the new unique name. diff --git a/tensorflow/core/graph/quantize_training.cc b/tensorflow/core/graph/quantize_training.cc index d9cb55f448..cb0fc8a154 100644 --- a/tensorflow/core/graph/quantize_training.cc +++ b/tensorflow/core/graph/quantize_training.cc @@ -42,7 +42,7 @@ const float kEMADecay = 0.999; // Node types to rewrite. Insert quantize_and_dequantize op for their inputs. const auto* nodes_to_rewrite = - new std::unordered_set{"MatMul", "Conv2D"}; + new std::unordered_set{"MatMul", "Conv2D"}; // Contains necessary parameters to convert an edge. struct EdgeToConvert { @@ -563,7 +563,7 @@ Status ProcessTargetEdges(Graph* graph, const string& quant_op_type, const std::vector& target_edges) { // Remember previously converted ops to avoid duplicated conversion on the // same input. - std::unordered_map name_index; + std::unordered_map name_index; std::vector added_variables; for (const EdgeToConvert edge : target_edges) { Node* convert_node; diff --git a/tensorflow/core/graph/subgraph.h b/tensorflow/core/graph/subgraph.h index 8ccc27914b..3c1f8870f5 100644 --- a/tensorflow/core/graph/subgraph.h +++ b/tensorflow/core/graph/subgraph.h @@ -71,7 +71,7 @@ Status RewriteGraphForExecution( const DeviceAttributes& device_info, bool use_function_convention, RewriteGraphMetadata* out_metadata); -typedef std::unordered_map NameIndex; +typedef std::unordered_map NameIndex; // Augment "*g" by adding special "fetch" nodes that connect to the // tensor outputs specified in "fetch_outputs" to retrieve the output diff --git a/tensorflow/tools/graph_transforms/fold_constants_lib.cc b/tensorflow/tools/graph_transforms/fold_constants_lib.cc index f2934a79bd..250f54e20f 100644 --- a/tensorflow/tools/graph_transforms/fold_constants_lib.cc +++ b/tensorflow/tools/graph_transforms/fold_constants_lib.cc @@ -39,9 +39,9 @@ limitations under the License. namespace tensorflow { namespace graph_transforms { namespace { -using StringPieceSet = std::unordered_set; +using StringPieceSet = std::unordered_set; template -using StringPieceMap = std::unordered_map; +using StringPieceMap = std::unordered_map; } // namespace Status ReplaceSendRecvs(const GraphDef& original_graph_def, -- GitLab From 1c7661be3337d5ab6c44300aee6a2d4001c81b27 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Mon, 20 Nov 2017 18:04:51 -0800 Subject: [PATCH 0692/1801] [TF2XLA] Flow down across switch edges separately. * Change the way that the clustering was done by flowing down along the branches of the switch node separately; - It was previously wrong to assume that the operands of an op are in the same control scope if they are not a switch or a merge node, as a zero-input op (such as a const) could be referenced by both "branches" of a switch without this op not being exclusively in either branch. * Change from matching a switch for a merge cluster, to matching a merge for a switch cluster: - The new matching considers switch-merge subgraphs where all nodes within the subgraph are dominated by the switch nodes, so reversing the matching makes it easier to perform the dominance checking. - This allows for cases where there is a cluster with a control dependency on a switch node and used by a branch of the switch. PiperOrigin-RevId: 176446211 --- .../tf2xla/functionalize_control_flow.cc | 329 ++++++++++++------ 1 file changed, 220 insertions(+), 109 deletions(-) diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc index 40a484da09..5726d8294a 100644 --- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc +++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc @@ -623,11 +623,12 @@ class FunctionalizeCond { FunctionalizeCond(Graph* graph, FunctionLibraryDefinition* library) : clusters_(graph->num_node_ids()), library_(library), graph_(graph) {} - // Returns a vector of Merge nodes from the clustered graph where the nodes + // Returns a vector of Switch nodes from the clustered graph where the nodes // are sorted by the number of switch nodes minus number of merge nodes // from a root of the clustered graph to the given Merge node, with ties - // broken by the representative of the Cluster. - std::vector> SortedMergeNodes(); + // broken by the representative of the Cluster. This corresponds to sorting by + // nesting depth, from deepest nested to outermost. + std::vector> SortedSwitchNodes(); // Returns whether the graph has no conditionals. bool NoConditionals() const { return merge_nodes_.empty(); } @@ -654,15 +655,17 @@ class FunctionalizeCond { // extracting the bodies needed for the then and else branch, creates a XlaIf // node, removing the nodes of the branches from the graph and replacing the // merge node with a XlaIf. - Status ConvertMergeToXlaIf(Cluster* merge_cluster); + Status ConvertCorrespondingMergeToXlaIf(Cluster* switch_cluster); // Removes a Switch cluster feeding directly into a Merge cluster by removing // the Switch and Merge nodes and collapsing into a single cluster. - Status RemoveTrivialMerge(Cluster* merge_cluster); + Status RemoveTrivialSwitch(Cluster* switch_cluster); - // Returns the switch cluster corresponding to the merge node. This function - // only returns the switch cluster in the simple case where we have a switch - // node is the entry of a diamond corresponding to a conditional: + // Returns the merge cluster corresponding to the switch node. This function + // only returns the merge cluster in the case where we have a switch node that + // is the single entry point for all paths to a common merge cluster, this + // merge cluster may be created by combining multiple merge clusters, that + // share the switch cluster as common ancestor, together. // // Switch // / \ @@ -671,8 +674,9 @@ class FunctionalizeCond { // merge_cluster // // Note: either of the branches may be empty. The case where both branches are - // empty is handled by RemoveTrivialMerge. - gtl::optional GetSwitchCluster(const Cluster& merge_cluster); + // empty is handled by RemoveTrivialSwitch. + gtl::optional CreateCorrespondingMergeCluster( + Cluster* switch_cluster); // Determines the arguments needed as input to the Merge cluster originating // from the Switch cluster. @@ -793,6 +797,10 @@ bool IsDeadSwitch(const Node* node) { } void FunctionalizeCond::CreateClusters() { + ClusterHandle source_cluster = ClusterHandle(Graph::kSourceId); + auto& source = clusters_.at(source_cluster); + std::deque>> workqueue; + workqueue.push_back({source_cluster, {}}); for (Node* node : graph_->nodes()) { if (IsSwitch(node)) { switch_nodes_.insert(node); @@ -801,6 +809,12 @@ void FunctionalizeCond::CreateClusters() { } ClusterHandle& cluster = clusters_.at(node).Get(); cluster = ClusterHandle(node->id()); + // Group all source clusters together. + if (node->IsSource() || node->in_edges().empty()) { + clusters_.at(node).Merge(&source); + source.Merge(&clusters_.at(node)); + workqueue.front().second.push_back(node); + } } // If there are no Merge nodes, then terminate. @@ -815,20 +829,118 @@ void FunctionalizeCond::CreateClusters() { // conservatively assuming all merge nodes become XlaIf nodes. clusters_.resize(clusters_.size() + merge_nodes_.size()); - // Merge a cluster with its input, unless the input is a Switch node or - // the node is a Merge node. - for (const Node* node : graph_->nodes()) { - if (IsMerge(node) || IsSwitch(node) || !node->IsOp()) { - continue; + std::unordered_set marked; + while (!workqueue.empty()) { + auto cluster_queue = workqueue.front(); + VLOG(4) << "Cluster: " << cluster_queue.first << " Queue: {" + << str_util::Join(cluster_queue.second, ",", + [](string* output, const Node* node) { + strings::StrAppend(output, node->id()); + }) + << "}"; + + UnionFind& repr = clusters_.at(cluster_queue.first); + workqueue.pop_front(); + std::deque switch_nodes; + std::deque merge_nodes; + std::unordered_set cluster_member; + while (!cluster_queue.second.empty()) { + // Iterate node workqueue and flow forward merging all nodes reachable + // that are neither a Switch or a Merge and whose inputs are all part of + // the same cluster. + Node* cur = cluster_queue.second.front(); + cluster_queue.second.pop_front(); + if (marked.find(cur) != marked.end()) { + continue; + } + if (IsMerge(cur)) { + merge_nodes.push_back(cur); + marked.insert(cur); + continue; + } + if (IsSwitch(cur)) { + switch_nodes.push_back(cur); + marked.insert(cur); + continue; + } + clusters_.at(cur).Merge(&repr); + cluster_member.insert(cur); + for (Node* out : cur->out_nodes()) { + bool all_ancestors_in_cluster = true; + for (Node* in : out->in_nodes()) { + if (IsMerge(out)) { + merge_nodes.push_back(out); + } + if (IsSwitch(out)) { + switch_nodes.push_back(out); + } + if (cluster_member.find(in) == cluster_member.end()) { + all_ancestors_in_cluster = false; + break; + } + } + if (all_ancestors_in_cluster && out->IsOp()) { + cluster_queue.second.push_back(out); + marked.insert(cur); + } + } } - for (const Node* in : node->in_nodes()) { - if (in->IsOp() && !IsSwitch(in) && !IsMerge(in)) { - clusters_.at(node).Merge(&clusters_.at(in)); + + VLOG(4) << "Switches: {" + << str_util::Join(switch_nodes, ",", + [](string* output, const Node* node) { + strings::StrAppend(output, node->id()); + }) + << "}"; + + // Merge Switch nodes with common predicate. + std::unordered_map> predicate_to_switch; + for (Node* node : switch_nodes) { + Node* tmp; + TF_CHECK_OK(node->input_node(1, &tmp)); + predicate_to_switch[tmp].push_back(node); + } + for (auto kv : predicate_to_switch) { + Node* first = kv.second.front(); + for (Node* switch_node : kv.second) { + clusters_.at(first).Merge(&clusters_.at(switch_node)); } } - // Group all source clusters together. - if (node->IsSource() || node->in_edges().empty()) { - clusters_.at(node).Merge(&clusters_.at(ClusterHandle(Graph::kSourceId))); + + // Enqueue each edge of the switch node separately. That is, group all the + // nodes that are due to the true/false edge of the switch together and + // consider all nodes that only have a control dependency on the switch node + // separately. We want to group together all nodes that are part of the same + // branch, as these will be extracted into the `then` and `else` functions + // of the functional if. The ops due to control edges are different as they + // could be involved with either branch and merging them here could result + // in invalid graphs. + for (auto kv : predicate_to_switch) { + ClusterHandle none = ClusterHandle(-1); + ClusterHandle first[2] = {none, none}; + std::deque* queue[2]; + for (auto switch_node : kv.second) { + for (const auto e : switch_node->out_edges()) { + if (IsSwitch(e->dst()) || IsMerge(e->dst())) { + continue; + } + // Control edges are enqueued on their own. + if (e->IsControlEdge()) { + workqueue.push_back({Representative(e->dst()), {e->dst()}}); + continue; + } + // Combine all outputs of the same output port of a switch cluster + // into the same workqueue entry. + if (first[e->src_output()] == none) { + ClusterHandle repr = Representative(e->dst()); + first[e->src_output()] = repr; + workqueue.push_back({repr, {}}); + queue[e->src_output()] = &workqueue.back().second; + } + clusters_.at(first[e->src_output()]).Merge(&clusters_.at(e->dst())); + queue[e->src_output()]->push_back(e->dst()); + } + } } } } @@ -910,74 +1022,60 @@ void FunctionalizeCond::CreateClusteredGraph() { update_cluster_for_node(node).merge_nodes.insert(node); } - // Merge Switch nodes with common predicate. - std::unordered_map> predicate_to_switch; - for (Node* node : switch_nodes_) { - Node* tmp; - TF_CHECK_OK(node->input_node(1, &tmp)); - predicate_to_switch[tmp].push_back(node); - } - for (auto kv : predicate_to_switch) { - Cluster& first = clustered_graph_.at(Representative(kv.second.front())); - for (Node* switch_node : kv.second) { - ClusterHandle handle = Representative(switch_node); - Cluster& cluster = clustered_graph_.at(handle); - ContractEdge(&cluster, &first, /*remove_from_graph=*/true); - } - } - - // Merge Merge nodes with common input together. - for (Node* node : merge_nodes_) { - Cluster& cluster = clustered_graph_.at(Representative(node)); - for (const Node* in : node->in_nodes()) { - if (!in->IsOp()) { - continue; - } - Cluster& cluster_node_in = clustered_graph_.at(Representative(in)); - // ContractEdge can modify out_nodes of cluster_node_in, so traverse - // over out_nodes assuming it does. - for (auto it = cluster_node_in.out_nodes.begin(); - it != cluster_node_in.out_nodes.end();) { - if (!(*it)->merge_nodes.empty()) { - ContractEdge(*it++, &cluster, /*remove_from_graph=*/true); - } else { - ++it; - } - } - } - } - VLOG(3) << "Graph with clusters: " << DebugString(*graph_, &clusters_); VLOG(3) << "ClusteredGraph: " << DebugString(clustered_graph_); } -gtl::optional FunctionalizeCond::GetSwitchCluster( - const Cluster& merge_cluster) { - VLOG(3) << "GetSwitchCluster for " << merge_cluster.representative; - gtl::optional switch_cluster; - if (merge_cluster.in_nodes.size() > 2) { - return gtl::nullopt; +gtl::optional +FunctionalizeCond::CreateCorrespondingMergeCluster(Cluster* switch_cluster) { + VLOG(3) << "CreateCorrespondingMergeCluster for " + << switch_cluster->representative; + std::unordered_set merges; + std::unordered_set dominated; + dominated.insert(switch_cluster); + std::deque queue; + auto enqueue_or_update_merge = [this, &queue, &merges](Cluster* c) { + if (c->merge_nodes.empty()) { + queue.push_back(c); + } else { + merges.insert(c); + } + }; + // Enqueue all the outputs of the switch cluster in the workqueue. + for (auto* out : switch_cluster->out_nodes) { + enqueue_or_update_merge(out); } - for (Cluster* in : merge_cluster.in_nodes) { - Cluster* cluster = in; - if (in->switch_nodes.empty()) { - if (in->in_nodes.size() != 1 || in->out_nodes.size() != 1) { + std::unordered_set visited; + while (!queue.empty()) { + Cluster* cur = queue.front(); + queue.pop_front(); + if (visited.find(cur) != visited.end()) { + continue; + } + visited.insert(cur); + // Ensure all inputs to the current node are in the dominated set. + for (Cluster* in : cur->in_nodes) { + if (dominated.find(in) == dominated.end()) { return gtl::nullopt; } - // There is only a single `in` cluster. - cluster = *in->in_nodes.begin(); - } - if (cluster->switch_nodes.empty()) { - return gtl::nullopt; } - - if (switch_cluster.has_value() && *switch_cluster != cluster) { - return gtl::nullopt; - } else { - switch_cluster = cluster; + for (Cluster* out : cur->out_nodes) { + // No switch nodes beyond the entry one is expected. + if (!out->switch_nodes.empty()) { + return gtl::nullopt; + } + enqueue_or_update_merge(out); } } - return switch_cluster; + auto it = merges.begin(); + Cluster* merge_cluster = *it; + for (++it; it != merges.end(); ++it) { + ContractEdge(*it, merge_cluster); + } + + // TODO(jpienaar): Clean up graph, merging nodes. + + return merge_cluster; } xla::StatusOr FunctionalizeCond::DetermineCondArgs( @@ -1221,11 +1319,11 @@ void FunctionalizeCond::RemoveMergeNodes(Cluster* merge_cluster) { } } -Status FunctionalizeCond::RemoveTrivialMerge(Cluster* merge_cluster) { - Cluster* switch_cluster = *merge_cluster->in_nodes.begin(); - if (switch_cluster->switch_nodes.empty()) { +Status FunctionalizeCond::RemoveTrivialSwitch(Cluster* switch_cluster) { + Cluster* merge_cluster = *switch_cluster->out_nodes.begin(); + if (merge_cluster->merge_nodes.empty()) { return errors::FailedPrecondition( - "Not a trivial merge: no Switch node feeding into Merge node"); + "Not a trivial switch: no Merge node feeding into Switch node"); } for (auto it = merge_cluster->merge_nodes.begin(); @@ -1252,17 +1350,25 @@ Status FunctionalizeCond::RemoveTrivialMerge(Cluster* merge_cluster) { return Status::OK(); } -Status FunctionalizeCond::ConvertMergeToXlaIf(Cluster* merge_cluster) { - VLOG(1) << "ConvertMergeToXlaIf for " << merge_cluster->representative; - gtl::optional switch_cluster = GetSwitchCluster(*merge_cluster); - if (!switch_cluster.has_value()) { +Status FunctionalizeCond::ConvertCorrespondingMergeToXlaIf( + Cluster* switch_cluster) { + VLOG(1) << "ConvertMergeToXlaIf for " << switch_cluster->representative; + gtl::optional maybe_merge = + CreateCorrespondingMergeCluster(switch_cluster); + if (!maybe_merge.has_value()) { return errors::FailedPrecondition( - "Merge cluster was not part of a simple conditional in the clustered " - "graph. Graph nodes in merge cluster ", - NodesToString(merge_cluster->merge_nodes)); + "Switch cluster was not part of a simple conditional in the clustered " + "graph. Graph nodes in switch cluster ", + NodesToString(switch_cluster->switch_nodes)); + } + Cluster* merge_cluster = *maybe_merge; + if (merge_cluster->merge_nodes.empty()) { + return errors::Internal( + "Merge node in clustered graph contains no merge nodes: ", + merge_cluster->representative.ToString()); } TF_ASSIGN_OR_RETURN(auto cond_args, - DetermineCondArgs(*merge_cluster, **switch_cluster)); + DetermineCondArgs(*merge_cluster, *switch_cluster)); // Sort the outputs by ID to produce more stable output. std::vector outputs(merge_cluster->merge_nodes.begin(), @@ -1278,7 +1384,7 @@ Status FunctionalizeCond::ConvertMergeToXlaIf(Cluster* merge_cluster) { // Remove the old nodes from the graph_ and contract the edges of the // clustered graph. for (auto in : merge_cluster->in_nodes) { - if (in != *switch_cluster) { + if (in != switch_cluster) { RemoveClusterNodes(in); } } @@ -1286,20 +1392,20 @@ Status FunctionalizeCond::ConvertMergeToXlaIf(Cluster* merge_cluster) { RemoveUnusedArgs(cond_args.args); auto in_nodes = merge_cluster->in_nodes; for (auto it = in_nodes.begin(); it != in_nodes.end();) { - ContractEdge(*it++, merge_cluster); + ContractEdge(*it++, switch_cluster); } - ContractEdge(*switch_cluster, merge_cluster); - clusters_[if_node].Get() = ClusterHandle(merge_cluster->representative); + ContractEdge(merge_cluster, switch_cluster); + clusters_[if_node].Get() = ClusterHandle(switch_cluster->representative); return Status::OK(); } std::vector> -FunctionalizeCond::SortedMergeNodes() { +FunctionalizeCond::SortedSwitchNodes() { VLOG(2) << "ProcessClusteredGraph"; std::stack> stack; // Initialize with the source node. - stack.push({0, &clustered_graph_[ClusterHandle(Graph::kSourceId)]}); + stack.push({0, &clustered_graph_[Representative(graph_->source_node())]}); // Perform a depth-first traversal of the clustered graph computing the // switch-merge depth. @@ -1317,10 +1423,10 @@ FunctionalizeCond::SortedMergeNodes() { size_t new_depth = depth; if (!n->merge_nodes.empty()) { - queue.emplace_back(depth, n); --new_depth; } if (!n->switch_nodes.empty()) { + queue.emplace_back(depth, n); ++new_depth; } for (Cluster* e : n->out_nodes) { @@ -1350,25 +1456,30 @@ Status FunctionalizeCond::Functionalize(Graph* graph, } fc.CreateClusteredGraph(); - auto queue = fc.SortedMergeNodes(); + auto queue = fc.SortedSwitchNodes(); for (auto it = queue.begin(); it != queue.end();) { - Cluster* merge_cluster = (*it).second; + Cluster* switch_cluster = (*it).second; ++it; - if (merge_cluster->in_nodes.size() == 1) { - TF_RETURN_IF_ERROR(fc.RemoveTrivialMerge(merge_cluster)); + if (switch_cluster->out_nodes.size() == 1) { + TF_RETURN_IF_ERROR(fc.RemoveTrivialSwitch(switch_cluster)); } else { - TF_RETURN_IF_ERROR(fc.ConvertMergeToXlaIf(merge_cluster)); + TF_RETURN_IF_ERROR(fc.ConvertCorrespondingMergeToXlaIf(switch_cluster)); } - // Contract newly Merge free merge_cluster with incoming nodes without + // Contract newly Switch free switch_cluster with outgoing nodes without // Switch or Merge nodes. - std::vector in_nodes(merge_cluster->in_nodes.begin(), - merge_cluster->in_nodes.end()); - for (auto in : in_nodes) { - if (in->merge_nodes.empty() && in->switch_nodes.empty()) { - fc.ContractEdge(in, merge_cluster); + for (auto& nodes : {switch_cluster->out_nodes, switch_cluster->in_nodes}) { + std::vector copy_nodes(nodes.begin(), nodes.end()); + for (auto* node : copy_nodes) { + if (node->merge_nodes.empty() && node->switch_nodes.empty()) { + fc.ContractEdge(node, switch_cluster); + } } } + + VLOG(3) << "Graph with clusters: " + << DebugString(*fc.graph_, &fc.clusters_); + VLOG(3) << "ClusteredGraph: " << DebugString(fc.clustered_graph_); } if (!fc.switch_nodes_.empty()) { -- GitLab From 3a164021037b005452d07d325bdd4f5e8ce8465e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Nov 2017 18:18:25 -0800 Subject: [PATCH 0693/1801] Update ops-related pbtxt files. PiperOrigin-RevId: 176447787 --- tensorflow/core/ops/ops.pbtxt | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index d043696a94..6ce0b70c9d 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -7323,7 +7323,7 @@ op { name: "DeserializeSparse" input_arg { name: "serialized_sparse" - description: "1-D, The serialized `SparseTensor` object. Must have 3 columns." + description: "The serialized `SparseTensor` objects. The last dimension\nmust have 3 columns." type: DT_STRING } output_arg { @@ -7341,10 +7341,9 @@ op { attr { name: "dtype" type: "type" - description: "The `dtype` of the serialized `SparseTensor` object." + description: "The `dtype` of the serialized `SparseTensor` objects." } - summary: "Deserialize `SparseTensor` from a (serialized) string 3-vector (1-D `Tensor`)" - description: "object." + summary: "Deserialize `SparseTensor` objects." } op { name: "DestroyResourceOp" -- GitLab From bb96a309730b9ae409ca5107535493ae40bc58e1 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 20 Nov 2017 18:48:29 -0800 Subject: [PATCH 0694/1801] Added the ability to report peak memory usage PiperOrigin-RevId: 176450440 --- tensorflow/python/BUILD | 6 ++- tensorflow/python/grappler/cluster.py | 4 ++ tensorflow/python/grappler/cost_analyzer.i | 17 ++----- tensorflow/python/grappler/cost_analyzer.py | 51 +++++++++++++++++-- .../python/grappler/cost_analyzer_test.py | 32 ++++++++++-- 5 files changed, 88 insertions(+), 22 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index a20898e40e..590dbcd462 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -4422,7 +4422,11 @@ py_library( "grappler/cost_analyzer.py", ], srcs_version = "PY2AND3", - deps = [":pywrap_tensorflow_internal"], + deps = [ + ":pywrap_tensorflow_internal", + ":tf_cluster", + ":tf_item", + ], ) py_binary( diff --git a/tensorflow/python/grappler/cluster.py b/tensorflow/python/grappler/cluster.py index c6ddb803f4..58c7bbbac1 100644 --- a/tensorflow/python/grappler/cluster.py +++ b/tensorflow/python/grappler/cluster.py @@ -52,6 +52,10 @@ class Cluster(object): if self._tf_cluster is not None: tf_cluster.TF_DeleteCluster(self._tf_cluster) + @property + def tf_cluster(self): + return self._tf_cluster + def ListDevices(self): """Returns the list of available hardware devices.""" devices = [] diff --git a/tensorflow/python/grappler/cost_analyzer.i b/tensorflow/python/grappler/cost_analyzer.i index 1f024e439d..0318ff762c 100644 --- a/tensorflow/python/grappler/cost_analyzer.i +++ b/tensorflow/python/grappler/cost_analyzer.i @@ -43,7 +43,7 @@ limitations under the License. %{ string GenerateCostReport(const tensorflow::MetaGraphDef& metagraph, bool -per_node_report) { +per_node_report, tensorflow::grappler::Cluster* cluster) { tensorflow::grappler::ItemConfig cfg; cfg.apply_optimizations = false; std::unique_ptr item = @@ -51,20 +51,9 @@ per_node_report) { if (!item) { return "Error: failed to preprocess metagraph: check your log file for errors"; } - - // TODO(bsteiner): we should wrap the tf session instead to properly handle the case of a - // distributed setup. - const int timeout_s = 3600; - int num_cpu_cores = tensorflow::grappler::GetNumAvailableLogicalCPUCores(); - int num_gpus = tensorflow::grappler::GetNumAvailableGPUs(); - tensorflow::grappler::SingleMachine cluster(timeout_s, num_cpu_cores, num_gpus); - cluster.SetNumWarmupSteps(10); - cluster.AllowSoftPlacement(true); - cluster.DisableDetailedStats(false); - TF_CHECK_OK(cluster.Provision()); string suffix; - tensorflow::grappler::CostAnalyzer analyzer(*item, &cluster, suffix); + tensorflow::grappler::CostAnalyzer analyzer(*item, cluster, suffix); std::stringstream os; analyzer.GenerateReport(os, per_node_report); @@ -74,4 +63,4 @@ per_node_report) { %} string GenerateCostReport(const tensorflow::MetaGraphDef& metagraph, bool -per_node_report); + per_node_report, tensorflow::grappler::Cluster* cluster); diff --git a/tensorflow/python/grappler/cost_analyzer.py b/tensorflow/python/grappler/cost_analyzer.py index 75c21e5727..a1ff915c61 100644 --- a/tensorflow/python/grappler/cost_analyzer.py +++ b/tensorflow/python/grappler/cost_analyzer.py @@ -20,21 +20,64 @@ from __future__ import print_function from tensorflow.python import pywrap_tensorflow as tf_wrap from tensorflow.python.framework import errors +from tensorflow.python.grappler import cluster as gcluster +from tensorflow.python.grappler import item as gitem -def GenerateCostReport(metagraph, per_node_report=False): +def GenerateCostReport(metagraph, per_node_report=False, cluster=None): """Analyze the cost of each TensorFlow op and node in the provided metagraph. Args: - metagraph: An TensorFlow MetaGraphDef. + metagraph: A TensorFlow MetaGraphDef. per_node_report: by default the report contains stats aggregated on a per op type basis, setting per_node_report to True adds results for each individual node to the report. + cluster: Analyze the costs using the specified cluster, or the local machine + if no cluster was specified. Returns: A string of cost report. """ + if cluster is None: + cluster = gcluster.Cluster(disable_detailed_stats=False) + with errors.raise_exception_on_not_ok_status(): - ret_from_swig = tf_wrap.GenerateCostReport(metagraph.SerializeToString(), - per_node_report) + ret_from_swig = tf_wrap.GenerateCostReport( + metagraph.SerializeToString(), per_node_report, cluster.tf_cluster) return ret_from_swig + + +def GenerateMemoryReport(metagraph, detailed_report=True, cluster=None): + """Analyze the peak memory usage for the provided metagraph. + + Args: + metagraph: A TensorFlow MetaGraphDef. + detailed_report: print the live tensors in addition to the peak memory + usage. + cluster: Analyze the memory using the specified cluster, or the local + machine if no cluster was specified. + + Returns: + A string with the formatted memory usage. + """ + if cluster is None: + cluster = gcluster.Cluster( + disable_detailed_stats=True, disable_timeline=True) + + item = gitem.Item(metagraph) + peak_usage = cluster.DeterminePeakMemoryUsage(item) + report = "" + for device, snapshot in peak_usage.items(): + peak_usage = snapshot[0] + report += "Peak usage for device " + device + ": " + str( + peak_usage) + " bytes\n" + if detailed_report: + live_tensors = snapshot[1] + for tensor in live_tensors: + op_name = tensor[0] + output_id = tensor[1] + mem_used = tensor[2] + report += " " + str(op_name) + ":" + str(output_id) + " uses " + str( + mem_used) + " bytes\n" + + return report diff --git a/tensorflow/python/grappler/cost_analyzer_test.py b/tensorflow/python/grappler/cost_analyzer_test.py index d59f1d04f6..f4933a4514 100644 --- a/tensorflow/python/grappler/cost_analyzer_test.py +++ b/tensorflow/python/grappler/cost_analyzer_test.py @@ -24,6 +24,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import meta_graph from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util from tensorflow.python.grappler import cost_analyzer from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops @@ -35,9 +36,9 @@ from tensorflow.python.platform import test from tensorflow.python.training import adam -class PyWrapOptimizeGraphTest(test.TestCase): +class CostAnalysisTest(test.TestCase): - def testBasic(self): + def testBasicCost(self): """Make sure arguments can be passed correctly.""" a = constant_op.constant(10, name="a") b = constant_op.constant(20, name="b") @@ -60,7 +61,7 @@ class PyWrapOptimizeGraphTest(test.TestCase): # Also print the report to make it easier to debug print("{}".format(report)) - def testSmallNetwork(self): + def testSmallNetworkCost(self): image = array_ops.placeholder(dtypes.float32, shape=[1, 28, 28, 1]) label = array_ops.placeholder(dtypes.float32, shape=[1, 10]) w = variables.Variable( @@ -111,6 +112,31 @@ class PyWrapOptimizeGraphTest(test.TestCase): # self.assertTrue(0 < upper) # self.assertTrue(lower <= upper) + def testBasicMemory(self): + """Make sure arguments can be passed correctly.""" + with test_util.device(use_gpu=False): + a = constant_op.constant(10, name="a") + b = constant_op.constant(20, name="b") + c = math_ops.add_n([a, b], name="c") + d = math_ops.add_n([b, c], name="d") + train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) + train_op.append(d) + mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph()) + + report = cost_analyzer.GenerateMemoryReport(mg) + + # Print the report to make it easier to debug + print("{}".format(report)) + + # Check the report + self.assertTrue( + "Peak usage for device /job:localhost/replica:0/task:0/cpu:0: 16 bytes" + in report) + self.assertTrue(" a:0 uses 4 bytes" in report) + self.assertTrue(" b:0 uses 4 bytes" in report) + self.assertTrue(" c:0 uses 4 bytes" in report) + self.assertTrue(" d:0 uses 4 bytes" in report) + if __name__ == "__main__": test.main() -- GitLab From b525ea6798175f4c95996a3666c70de5c00a9a0c Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Mon, 20 Nov 2017 20:10:19 -0800 Subject: [PATCH 0695/1801] [XLA] Rework ScopedLoggingTimer into macros, and add some tracing to gpu_compiler. PiperOrigin-RevId: 176455799 --- .../compiler/xla/service/cpu/cpu_compiler.cc | 2 +- .../compiler/xla/service/gpu/gpu_compiler.cc | 29 +++++++++------ .../gpu/llvm_gpu_backend/gpu_backend_lib.cc | 5 ++- tensorflow/compiler/xla/util.cc | 8 ++--- tensorflow/compiler/xla/util.h | 36 +++++++++++++++++-- 5 files changed, 58 insertions(+), 22 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index 592751e118..88f7e7a93f 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -444,7 +444,7 @@ StatusOr> CpuCompiler::RunBackend( perftools::gputools::StreamExecutor* stream_exec) { const string timer_message = "Compiling [" + module->name() + "] for CPU using JIT"; - ScopedLoggingTimer compiling_timer(timer_message, 1); + XLA_SCOPED_LOGGING_TIMER(timer_message); VLOG(1) << "Compiling: " << module->name(); TF_RET_CHECK(stream_exec != nullptr); diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 937d453a5c..e84c390745 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -299,17 +299,17 @@ GpuCompiler::GpuCompiler() StatusOr> GpuCompiler::RunHloPasses( std::unique_ptr module, se::StreamExecutor* /*stream_exec*/) { - { - Tracing::TraceMe annotation("HLO Transforms", module->name(), - /*is_expensive=*/true); - TF_RETURN_IF_ERROR( - OptimizeHloModule(module.get(), ShapeSizeBytesFunction())); - } + XLA_SCOPED_LOGGING_TIMER("GpuCompiler::RunHloPasses"); + Tracing::TraceMe annotation("HLO Transforms", module->name(), + /*is_expensive=*/true); + TF_RETURN_IF_ERROR(OptimizeHloModule(module.get(), ShapeSizeBytesFunction())); return std::move(module); } StatusOr> GpuCompiler::RunBackend( std::unique_ptr module, se::StreamExecutor* stream_exec) { + XLA_SCOPED_LOGGING_TIMER("GpuCompiler::RunBackend"); + TF_RET_CHECK(stream_exec != nullptr); TF_RETURN_IF_ERROR( @@ -366,8 +366,11 @@ StatusOr> GpuCompiler::RunBackend( HloComputation* entry_computation = module->entry_computation(); IrEmitterUnnested ir_emitter(module->config(), entry_computation, &ir_emitter_context); - TF_RETURN_IF_ERROR( - entry_computation->root_instruction()->Accept(&ir_emitter)); + { + XLA_SCOPED_LOGGING_TIMER("GpuCompiler::RunBackend - IR emission"); + TF_RETURN_IF_ERROR( + entry_computation->root_instruction()->Accept(&ir_emitter)); + } if (user_pre_optimization_hook_) { TF_CHECK_OK(user_pre_optimization_hook_(llvm_module)); @@ -416,9 +419,12 @@ StatusOr> GpuCompiler::RunBackend( cc_minor = 0; } - TF_ASSIGN_OR_RETURN(string ptx, - CompileToPtx(&llvm_module, {cc_major, cc_minor}, - module->config(), libdevice_dir)); + string ptx; + { + XLA_SCOPED_LOGGING_TIMER("GpuCompiler::RunBackend - CompileToPtx"); + TF_ASSIGN_OR_RETURN(ptx, CompileToPtx(&llvm_module, {cc_major, cc_minor}, + module->config(), libdevice_dir)); + } if (!ir_dump_directory.empty()) { TF_RETURN_IF_ERROR(llvm_ir::DumpIRToDirectory( @@ -474,6 +480,7 @@ StatusOr> GpuCompiler::RunBackend( std::vector GpuCompiler::CompilePtxOrGetCachedResult(const string& ptx, int cc_major, int cc_minor) { + XLA_SCOPED_LOGGING_TIMER("GpuCompiler::CompilePtxOrGetCachedResult"); Tracing::TraceMe annotation("PTX->CUBIN", /*is_expensive=*/true); bool inserted; decltype(compilation_cache_.begin()) iter; diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc index 1cb963be61..a574123d6b 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc @@ -492,9 +492,8 @@ StatusOr CompileToPtx(llvm::Module* module, tensorflow::port::Tracing::TraceMe annotation( "Compiling IR", llvm_ir::AsString(module->getName()), /*is_expensive=*/true); - ScopedLoggingTimer compilation_timer( - "Compile module " + llvm_ir::AsString(module->getName()), - /*vlog_level=*/2); + XLA_SCOPED_LOGGING_TIMER("Compile module " + + llvm_ir::AsString(module->getName())); TF_ASSIGN_OR_RETURN( ptx, CompileModuleToPtx(module, compute_capability, hlo_module_config, libdevice_dir_path)); diff --git a/tensorflow/compiler/xla/util.cc b/tensorflow/compiler/xla/util.cc index 2624ef0252..e595df3052 100644 --- a/tensorflow/compiler/xla/util.cc +++ b/tensorflow/compiler/xla/util.cc @@ -42,15 +42,15 @@ Status WithLogBacktrace(const Status& status) { } // namespace -ScopedLoggingTimer::ScopedLoggingTimer(const string& label, int32 vlog_level) - : label(label), vlog_level(vlog_level) { - if (VLOG_IS_ON(vlog_level)) { +ScopedLoggingTimer::ScopedLoggingTimer(const string& label, bool enabled) + : enabled(enabled), label(label) { + if (enabled) { start_micros = tensorflow::Env::Default()->NowMicros(); } } ScopedLoggingTimer::~ScopedLoggingTimer() { - if (VLOG_IS_ON(vlog_level)) { + if (enabled) { uint64 end_micros = tensorflow::Env::Default()->NowMicros(); double secs = (end_micros - start_micros) / 1000000.0; diff --git a/tensorflow/compiler/xla/util.h b/tensorflow/compiler/xla/util.h index f58f57b443..b722095d1f 100644 --- a/tensorflow/compiler/xla/util.h +++ b/tensorflow/compiler/xla/util.h @@ -50,13 +50,43 @@ using DimensionVector = tensorflow::gtl::InlinedVector; // RAII timer that logs with a given label the wall clock time duration in human // readable form. This differs from base's ElapsedTimer primarily in that it // spits out the human-readable duration form. +// +// By default, the timing traces are only printed at VLOG(1) and above: +// +// XLA_SCOPED_LOGGING_TIMER("fooing bar"); // nop if !VLOG_IS_ON(1). +// +// but you can control this via: +// +// XLA_SCOPED_LOGGING_TIMER_LEVEL("fooing bar", 2); // nop if !VLOG_IS_ON(2) +// +#define XLA_SCOPED_LOGGING_TIMER(label) \ + XLA_SCOPED_LOGGING_TIMER_HELPER(label, 1, __COUNTER__) +#define XLA_SCOPED_LOGGING_TIMER_LEVEL(label, level) \ + XLA_SCOPED_LOGGING_TIMER_HELPER(label, level, __COUNTER__) + +// Helper for implementing macros above. Do not use directly. +// +// Forces the evaluation of "counter", which we expect is equal to __COUNTER__. +#define XLA_SCOPED_LOGGING_TIMER_HELPER(label, level, counter) \ + XLA_SCOPED_LOGGING_TIMER_HELPER2(label, level, counter) + +// Helper for macros above. Don't use directly. +#define XLA_SCOPED_LOGGING_TIMER_HELPER2(label, level, counter) \ + ::xla::ScopedLoggingTimer XLA_ScopedLoggingTimerInstance##counter( \ + label, VLOG_IS_ON(level)) + +// RAII timer for XLA_SCOPED_LOGGING_TIMER and XLA_SCOPED_LOGGING_TIMER_LEVEL +// macros above. Recommended usage is via the macros so you don't have to give +// the timer a name or worry about calling VLOG_IS_ON yourself. struct ScopedLoggingTimer { - explicit ScopedLoggingTimer(const string& label, int32 vlog_level = 1); + // The timer does nothing if enabled is false. This lets you pass in your + // file's VLOG_IS_ON value. + ScopedLoggingTimer(const string& label, bool enabled); ~ScopedLoggingTimer(); - uint64 start_micros; + bool enabled; string label; - int32 vlog_level; + uint64 start_micros; }; // Given a vector, returns a MutableArraySlice that points at its -- GitLab From 9a267fe324044578ec997510d9b2859b5e67842e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Nov 2017 20:27:40 -0800 Subject: [PATCH 0696/1801] [XLA] Handle reduce window on a scalar value. PiperOrigin-RevId: 176457058 --- .../compiler/xla/service/algebraic_simplifier.cc | 9 +++++++++ tensorflow/compiler/xla/tests/reduce_window_test.cc | 10 ++++++++++ 2 files changed, 19 insertions(+) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index bc9a3ac43d..1764f7f3dc 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -1398,6 +1398,15 @@ Status AlgebraicSimplifierVisitor::HandleReduceWindow( auto operand = reduce_window->mutable_operand(0); const Window& window = reduce_window->window(); auto function = reduce_window->to_apply(); + if (ShapeUtil::IsScalar(operand->shape())) { + TF_RET_CHECK(ShapeUtil::IsScalar(reduce_window->shape())); + return ReplaceWithNewInstruction( + reduce_window, + HloInstruction::CreateMap(reduce_window->shape(), + {operand, reduce_window->mutable_operand(1)}, + function)); + } + VLOG(10) << "Considering folding Pad: " << operand->ToString() << "\ninto reduce-window: " << reduce_window->ToString(); diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc index 6c9b62b48d..0601a1466b 100644 --- a/tensorflow/compiler/xla/tests/reduce_window_test.cc +++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc @@ -90,6 +90,16 @@ TEST_F(ReduceWindowTest, MismatchedRanksGivesErrorStatus) { ::testing::HasSubstr("Want input dimensions size")); } +// Regression test for b/68964348. +TEST_F(ReduceWindowTest, R0ReduceWindow) { + auto input = builder_.ConstantR0(42); + auto init = builder_.ConstantR0(1.0); + builder_.ReduceWindow(input, init, CreateScalarAddComputation(F32, &builder_), + /*window_dimensions=*/{}, + /*window_strides=*/{}, Padding::kSame); + ComputeAndCompareR0(&builder_, 43, {}, ErrorSpec(0.00001)); +} + TEST_F(ReduceWindowTest, Min3In5Stride2) { const auto input = builder_.ConstantR1({10000, 1000, 100, 10, 1}); ReduceWindowMin(input, {3}, {2}, Padding::kValid); -- GitLab From 044cb401046401b7956234b31ecdafe4d86cc6d9 Mon Sep 17 00:00:00 2001 From: scott Date: Tue, 21 Nov 2017 21:52:48 +0800 Subject: [PATCH 0697/1801] fix: import error --- tensorflow/contrib/slim/README.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/slim/README.md b/tensorflow/contrib/slim/README.md index f7a85557ca..dc92ae0c85 100644 --- a/tensorflow/contrib/slim/README.md +++ b/tensorflow/contrib/slim/README.md @@ -441,7 +441,8 @@ module. Consider the simple case where we want to train the VGG network: ```python import tensorflow as tf -vgg = tf.contrib.slim.nets.vgg +import tensorflow.contrib.slim.nets as nets +vgg = nets.vgg # Load the images and labels. images, labels = ... @@ -559,9 +560,10 @@ examine the following sample of training the VGG network: ```python import tensorflow as tf +import tensorflow.contrib.slim.nets as nets slim = tf.contrib.slim -vgg = tf.contrib.slim.nets.vgg +vgg = nets.vgg ... @@ -809,9 +811,10 @@ Putting it all together: ```python import tensorflow as tf +import tensorflow.contrib.slim.nets as nets slim = tf.contrib.slim -vgg = tf.contrib.slim.nets.vgg +vgg = nets.vgg # Load the data -- GitLab From 3def704f255c3af59fd3225dba862834e35b3493 Mon Sep 17 00:00:00 2001 From: Christopher Shallue Date: Tue, 21 Nov 2017 09:40:32 -0800 Subject: [PATCH 0698/1801] Add method HParams.get(key, default=None) PiperOrigin-RevId: 176520519 --- .../training/python/training/hparam.py | 27 ++++++++++++ .../training/python/training/hparam_test.py | 43 +++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/tensorflow/contrib/training/python/training/hparam.py b/tensorflow/contrib/training/python/training/hparam.py index 7db625cdd5..8d5f47ca4d 100644 --- a/tensorflow/contrib/training/python/training/hparam.py +++ b/tensorflow/contrib/training/python/training/hparam.py @@ -582,6 +582,33 @@ class HParams(object): """ return {n: getattr(self, n) for n in self._hparam_types.keys()} + def get(self, key, default=None): + """Returns the value of `key` if it exists, else `default`.""" + if key in self._hparam_types: + # Ensure that default is compatible with the parameter type. + if default is not None: + param_type, is_param_list = self._hparam_types[key] + type_str = 'list<%s>' % param_type if is_param_list else str(param_type) + fail_msg = ("Hparam '%s' of type '%s' is incompatible with " + 'default=%s' % (key, type_str, default)) + + is_default_list = isinstance(default, list) + if is_param_list != is_default_list: + raise ValueError(fail_msg) + + try: + if is_default_list: + for value in default: + _cast_to_type_if_compatible(key, param_type, value) + else: + _cast_to_type_if_compatible(key, param_type, default) + except ValueError as e: + raise ValueError('%s. %s' % (fail_msg, e)) + + return getattr(self, key) + + return default + def __contains__(self, key): return key in self._hparam_types diff --git a/tensorflow/contrib/training/python/training/hparam_test.py b/tensorflow/contrib/training/python/training/hparam_test.py index 949c262f5b..643905d3a6 100644 --- a/tensorflow/contrib/training/python/training/hparam_test.py +++ b/tensorflow/contrib/training/python/training/hparam_test.py @@ -364,6 +364,49 @@ class HParamsTest(test.TestCase): with self.assertRaisesRegexp(AssertionError, ''): hparam.HParams(hparam_def=[1, 2, 3]) + def testGet(self): + hparams = hparam.HParams(aaa=1, b=2.0, c_c='relu6', d=True, e=[5.0, 6.0]) + + # Existing parameters with default=None. + self.assertEqual(1, hparams.get('aaa')) + self.assertEqual(2.0, hparams.get('b')) + self.assertEqual('relu6', hparams.get('c_c')) + self.assertEqual(True, hparams.get('d')) + self.assertEqual([5.0, 6.0], hparams.get('e', None)) + + # Existing parameters with compatible defaults. + self.assertEqual(1, hparams.get('aaa', 2)) + self.assertEqual(2.0, hparams.get('b', 3.0)) + self.assertEqual(2.0, hparams.get('b', 3)) + self.assertEqual('relu6', hparams.get('c_c', 'default')) + self.assertEqual(True, hparams.get('d', True)) + self.assertEqual([5.0, 6.0], hparams.get('e', [1.0, 2.0, 3.0])) + self.assertEqual([5.0, 6.0], hparams.get('e', [1, 2, 3])) + + # Existing parameters with incompatible defaults. + with self.assertRaises(ValueError): + hparams.get('aaa', 2.0) + + with self.assertRaises(ValueError): + hparams.get('b', False) + + with self.assertRaises(ValueError): + hparams.get('c_c', [1, 2, 3]) + + with self.assertRaises(ValueError): + hparams.get('d', 'relu') + + with self.assertRaises(ValueError): + hparams.get('e', 123.0) + + with self.assertRaises(ValueError): + hparams.get('e', ['a', 'b', 'c']) + + # Nonexistent parameters. + self.assertEqual(None, hparams.get('unknown')) + self.assertEqual(123, hparams.get('unknown', 123)) + self.assertEqual([1, 2, 3], hparams.get('unknown', [1, 2, 3])) + if __name__ == '__main__': test.main() -- GitLab From 745eb9242d3b3b8e860abce018c74444a02e0926 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Nov 2017 10:01:57 -0800 Subject: [PATCH 0699/1801] Fix tests that prevent enabling the dependency optimizer in grappler. - Most of these use constant inputs that cause the new optimizer to turn various backprop nodes into NoOps because their outputs are not consumed. Improve or fix a few issues in the dependency optimizer: - Prune duplicate control inputs. Don't add new control inputs if the target already has the source as a regular input. - Don't turn Merge, Switch, Enter, Exit, NextIteration, _TPUExecute or _TPUCompile nodes into NoOps. - Don't call ConstantFolding::AddControlDependency() when re-routing, since the nodes in questions already have control inputs, so cannot be Switch nodes. - Don't clear inputs from NoOps unless we know the fetch nodes. PiperOrigin-RevId: 176523299 --- tensorflow/core/debug/debug_gateway_test.cc | 13 +- tensorflow/core/grappler/optimizers/BUILD | 2 - .../optimizers/dependency_optimizer.cc | 150 ++++++++++++------ .../optimizers/dependency_optimizer.h | 2 +- .../optimizers/dependency_optimizer_test.cc | 1 + .../lib/debug_graph_reconstruction_test.py | 9 +- .../python/debug/lib/session_debug_testlib.py | 5 +- 7 files changed, 122 insertions(+), 60 deletions(-) diff --git a/tensorflow/core/debug/debug_gateway_test.cc b/tensorflow/core/debug/debug_gateway_test.cc index 3903040e4d..5758334906 100644 --- a/tensorflow/core/debug/debug_gateway_test.cc +++ b/tensorflow/core/debug/debug_gateway_test.cc @@ -40,6 +40,9 @@ std::unique_ptr CreateSession() { options.config.mutable_graph_options() ->mutable_rewrite_options() ->set_constant_folding(RewriterConfig::OFF); + options.config.mutable_graph_options() + ->mutable_rewrite_options() + ->set_dependency_optimization(RewriterConfig::OFF); return std::unique_ptr( dynamic_cast(NewSession(options))); @@ -55,7 +58,7 @@ class SessionDebugMinusAXTest : public ::testing::Test { #elif defined(TENSORFLOW_USE_SYCL) const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0"; #else - const string kDeviceName = "/job:localhost/replica:0/task:0/cpu:0"; + const string kDeviceName = "/job:localhost/replica:0/task:0/device:CPU:0"; #endif Tensor a_tensor(DT_FLOAT, TensorShape({2, 2})); @@ -503,7 +506,7 @@ TEST_F(SessionDebugMinusAXTest, } #endif -class SessionDebugOutputSlotWithoutOngoingEdgeTest : public ::testing::Test { +class SessionDebugOutputSlotWithoutOutgoingEdgeTest : public ::testing::Test { public: void Initialize() { Graph graph(OpRegistry::Global()); @@ -513,7 +516,7 @@ class SessionDebugOutputSlotWithoutOngoingEdgeTest : public ::testing::Test { #elif defined(TENSORFLOW_USE_SYCL) const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0"; #else - const string kDeviceName = "/job:localhost/replica:0/task:0/cpu:0"; + const string kDeviceName = "/job:localhost/replica:0/task:0/device:CPU:0"; #endif Tensor a_tensor(DT_FLOAT, TensorShape({1, 1})); @@ -540,7 +543,7 @@ class SessionDebugOutputSlotWithoutOngoingEdgeTest : public ::testing::Test { GraphDef def_; }; -TEST_F(SessionDebugOutputSlotWithoutOngoingEdgeTest, +TEST_F(SessionDebugOutputSlotWithoutOutgoingEdgeTest, WatchSlotWithoutOutgoingEdge) { Initialize(); auto session = CreateSession(); @@ -615,7 +618,7 @@ class SessionDebugVariableTest : public ::testing::Test { #elif defined(TENSORFLOW_USE_SYCL) const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0"; #else - const string kDeviceName = "/job:localhost/replica:0/task:0/cpu:0"; + const string kDeviceName = "/job:localhost/replica:0/task:0/device:CPU:0"; #endif // Define variable node. diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index dbfa8ae503..08344b0ada 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -202,7 +202,6 @@ cc_library( ], visibility = ["//visibility:public"], deps = [ - ":arithmetic_optimizer", ":constant_folding", ":graph_optimizer", "//tensorflow/core:framework", @@ -213,7 +212,6 @@ cc_library( "//tensorflow/core/grappler:op_types", "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/costs:graph_properties", - "//tensorflow/core/grappler/utils:frame", ], ) diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc index 57eee60646..0cc4585ba4 100644 --- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc @@ -22,9 +22,7 @@ limitations under the License. #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/op_types.h" -#include "tensorflow/core/grappler/optimizers/arithmetic_optimizer.h" #include "tensorflow/core/grappler/optimizers/constant_folding.h" -#include "tensorflow/core/grappler/utils/frame.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/strings/strcat.h" @@ -70,7 +68,7 @@ class SetVector { bool HasRegularOutputs(const NodeDef& node, const NodeMap& node_map) { for (const NodeDef* output : node_map.GetOutputs(node.name())) { for (const string& input : output->input()) { - if (input == node.name()) { + if (!IsControlInput(input) && NodeName(input) == node.name()) { return true; } } @@ -78,25 +76,53 @@ bool HasRegularOutputs(const NodeDef& node, const NodeMap& node_map) { return false; } -int FindInputSlot(const NodeDef& node, const string& input) { - for (int i = 0; i < node.input_size(); ++i) { - if (node.input(i) == input) { - return i; +int RemoveInput(NodeDef* node, const string& input, NodeMap* node_map) { + int num_removed = 0; + int pos = 0; + while (pos < node->input_size()) { + if (node->input(pos) == input) { + node->mutable_input()->SwapElements(pos, node->input_size() - 1); + node->mutable_input()->RemoveLast(); + node_map->RemoveOutput(node->name(), NodeName(input)); + } else { + ++pos; + } + ++num_removed; + } + return num_removed; +} + +// Remove dulicate control inputs. +void PruneControlInputs(NodeDef* node) { + std::unordered_set inputs; + int pos = 0; + while (pos < node->input_size()) { + const string& input = node->input(pos); + // TODO(rmlarsen): Remove control inputs that also appears as a regular + // inputs. Currently, doing so breaks testControlFlowStrictness in + // python/framework/function_test. + // if (!inputs.insert(NodeName(input)).second && IsControlInput(input)) { + if (IsControlInput(input) && !inputs.insert(input).second) { + VLOG(1) << "**** Removing duplicate control input: " << input + << " from node " << node->DebugString(); + node->mutable_input()->SwapElements(pos, node->input_size() - 1); + node->mutable_input()->RemoveLast(); + } else { + ++pos; } } - return -1; } } // namespace bool DependencyOptimizer::SafeToConvertToNoOp(const NodeDef& node) { - if (!has_fetch_ || HasRegularOutputs(node, *node_map_)) { + if (nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) { return false; } - if (nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) { + if (!fetch_nodes_known_ || HasRegularOutputs(node, *node_map_)) { return false; } - if (IsMerge(node)) { + if (IsMerge(node) || IsSwitch(node)) { return false; } if (ModifiesFrameInfo(node)) { @@ -105,21 +131,21 @@ bool DependencyOptimizer::SafeToConvertToNoOp(const NodeDef& node) { if (!IsFreeOfSideEffect(node)) { return false; } - + if (node.op().rfind("Submodel", 0) == 0) { + return false; + } const OpDef* op_def = nullptr; Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def); if (!status.ok() || op_def->output_arg_size() == 0) { return false; } - // TODO(rmlarsen): We have to skip Const nodes to make - // core/debug/debug_gateway_test pass. See if we can fix that test. // TODO(rmlarsen): We have to skip Identity nodes to make an obsolete test in // python/training/session_manager_test.py pass. See if we can fix or get rid // of that test. - const std::unordered_set do_not_rewrite_ops = { - "Assert", "CheckNumerics", "Const", "Identity", "_Retval", - "_Arg", "_ParallelConcatUpdate", "_TPUExecute"}; + const std::unordered_set do_not_rewrite_ops{ + "Assert", "CheckNumerics", "Identity", "_Retval", + "_Arg", "_ParallelConcatUpdate", "_TPUExecute", "_TPUCompile"}; return do_not_rewrite_ops.find(node.op()) == do_not_rewrite_ops.end(); } @@ -127,20 +153,33 @@ string DependencyOptimizer::TryOptimizeDependencies( NodeDef* node, GraphDef* graph, std::vector* new_nodes) { // Change ops that only have control dependencies as outputs to NoOps. if (node->op() != "NoOp" && SafeToConvertToNoOp(*node)) { - VLOG(2) << "***** Replacing " << node->name() << " (" << node->op() + VLOG(1) << "***** Replacing " << node->name() << " (" << node->op() << ") with NoOp."; // The outputs of this node are not consumed. Replace its inputs with // control dependencies and replace the op itself with the NoOp op. - for (int i = 0; i < node->input_size(); ++i) { - const string& old_input = node->input(i); + std::unordered_set ctrl_inputs; + int pos = 0; + while (pos < node->input_size()) { + const string& old_input = node->input(pos); if (IsControlInput(old_input)) { + if (!ctrl_inputs.insert(old_input).second) { + // We found a duplicate control input. Remove it. + node->mutable_input()->SwapElements(pos, node->input_size() - 1); + node->mutable_input()->RemoveLast(); + } else { + ++pos; + } continue; } const string ctrl_input = ConstantFolding::AddControlDependency( old_input, graph, node_map_.get()); - node->set_input(i, ctrl_input); - node_map_->UpdateInput(node->name(), old_input, ctrl_input); - new_nodes->push_back(node_map_->GetNode(old_input)); + if (ctrl_inputs.insert(ctrl_input).second) { + node->set_input(pos, ctrl_input); + node_map_->UpdateInput(node->name(), old_input, ctrl_input); + auto old_input_node = node_map_->GetNode(old_input); + new_nodes->push_back(old_input_node); + } + ++pos; } node->set_op("NoOp"); node->clear_attr(); @@ -164,40 +203,50 @@ string DependencyOptimizer::TryOptimizeDependencies( // +------+ --^> c +---+ --^> c if (node->op() == "NoOp" && nodes_to_preserve_.find(node->name()) == nodes_to_preserve_.end()) { - auto outputs = node_map_->GetOutputs(node->name()); - const int num_outputs = outputs.size(); + const auto output_nodes = node_map_->GetOutputs(node->name()); + const int num_outputs = output_nodes.size(); const int num_inputs = node->input_size(); if (num_inputs > 1 && num_outputs > 1) { return ""; } - - for (auto consumer : outputs) { + VLOG(1) << "***** Rerouting input around " << node->name(); + std::vector input_nodes; + for (int i = 0; i < num_inputs; ++i) { + NodeDef* tmp = node_map_->GetNode(node->input(i)); + if (tmp != nullptr) { + input_nodes.push_back(tmp); + } + } + for (auto consumer : output_nodes) { + bool updated_consumer = false; + VLOG(1) << "***** Considering consumer " << consumer->name() << "\n" + << consumer->DebugString(); for (int i = 0; i < num_inputs; ++i) { const string& input = node->input(i); - // Forward dependencies from inputs to consumer if it doesn't already + // Forward dependency from input to consumer if it doesn't already // depend on it. - if (node_map_->GetOutputs(input).count(consumer) == 0) { - consumer->add_input(ConstantFolding::AddControlDependency( - input, graph, node_map_.get())); + if (node_map_->GetOutputs(NodeName(input)).count(consumer) == 0) { + consumer->add_input(input); + updated_consumer = true; node_map_->AddOutput(NodeName(input), consumer->name()); + new_nodes->push_back(input_nodes[i]); } - new_nodes->push_back(node_map_->GetNode(input)); } // Remove dependency on node from consumer. - int pos = FindInputSlot(*consumer, AsControlDependency(node->name())); - if (pos >= 0) { - consumer->mutable_input()->SwapElements(pos, - consumer->input_size() - 1); - consumer->mutable_input()->RemoveLast(); - node_map_->RemoveOutput(node->name(), consumer->name()); + updated_consumer |= RemoveInput( + consumer, AsControlDependency(node->name()), node_map_.get()); + if (updated_consumer) { + VLOG(1) << "***** Updated consumer " << consumer->name() << " (" + << consumer->op() << ")"; new_nodes->push_back(consumer); } } // Clear all control inputs to node. - node_map_->RemoveInputs(node->name()); - node->clear_input(); - return ""; + if (fetch_nodes_known_) { + node_map_->RemoveInputs(node->name()); + node->clear_input(); + } } return ""; @@ -208,9 +257,10 @@ Status DependencyOptimizer::OptimizeDependencies(GraphDef* optimized_graph) { // in the ArithmeticOptimizer. Dedup this. SetVector nodes_to_simplify; for (int i = 0; i < optimized_graph->node_size(); ++i) { - const NodeDef& node = optimized_graph->node(i); - if (node.op() == "NoOp" || SafeToConvertToNoOp(node)) { - nodes_to_simplify.PushBack(optimized_graph->mutable_node()->Mutable(i)); + NodeDef* node = optimized_graph->mutable_node(i); + if (node->op() == "NoOp" || SafeToConvertToNoOp(*node)) { + PruneControlInputs(node); + nodes_to_simplify.PushBack(node); } } while (!nodes_to_simplify.Empty()) { @@ -244,8 +294,6 @@ Status DependencyOptimizer::OptimizeDependencies(GraphDef* optimized_graph) { ? AsControlDependency(NodeName(simplified_tensor)) : simplified_tensor); } - VLOG(2) << "Update input " << consumer->input(i) << " of " - << consumer->name() << " to " << simplified_tensor; } node_map_->UpdateInput(consumer->name(), node->name(), simplified_tensor); @@ -256,6 +304,10 @@ Status DependencyOptimizer::OptimizeDependencies(GraphDef* optimized_graph) { nodes_to_simplify.PushBack(new_node); } } + for (int i = 0; i < optimized_graph->node_size(); ++i) { + NodeDef* node = optimized_graph->mutable_node(i); + PruneControlInputs(node); + } return Status::OK(); } @@ -264,10 +316,10 @@ Status DependencyOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, *optimized_graph = item.graph; nodes_to_preserve_ = item.NodesToPreserve(); node_map_.reset(new NodeMap(optimized_graph)); - has_fetch_ = !item.fetch.empty(); - VLOG(2) << "Graph before optimization:\n" << optimized_graph->DebugString(); + fetch_nodes_known_ = !item.fetch.empty(); + VLOG(1) << "Graph before optimization:\n" << optimized_graph->DebugString(); TF_RETURN_IF_ERROR(OptimizeDependencies(optimized_graph)); - VLOG(2) << "Graph after optimization:\n" << optimized_graph->DebugString(); + VLOG(1) << "Graph after optimization:\n" << optimized_graph->DebugString(); return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.h b/tensorflow/core/grappler/optimizers/dependency_optimizer.h index 13ece87aff..cab9383b94 100644 --- a/tensorflow/core/grappler/optimizers/dependency_optimizer.h +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.h @@ -56,7 +56,7 @@ class DependencyOptimizer : public GraphOptimizer { bool HasOnlyControlOutputs(const NodeDef* node); - bool has_fetch_; + bool fetch_nodes_known_; RewriterConfig::Toggle opt_level_; std::unordered_set nodes_to_preserve_; std::unique_ptr node_map_; diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc index d54d7b2093..90f5ec8c3f 100644 --- a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc @@ -104,6 +104,7 @@ TEST_F(DependencyOptimizerTest, ChangeToNoop) { } } +// TODO(rmlarsen): Add test to make sure we skip Switch and Merge. TEST_F(DependencyOptimizerTest, ChangeToNoop_NoFetch) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2}); diff --git a/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py b/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py index 442dfb7b3f..cc1a380538 100644 --- a/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py +++ b/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py @@ -22,6 +22,7 @@ import tempfile from tensorflow.core.framework import graph_pb2 from tensorflow.core.protobuf import config_pb2 +from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.client import session from tensorflow.python.debug.lib import debug_data from tensorflow.python.debug.lib import debug_graphs @@ -41,6 +42,12 @@ class ReconstructNonDebugGraphTest(test_util.TensorFlowTestCase): _OP_TYPE_BLACKLIST = ( "_Send", "_Recv", "_HostSend", "_HostRecv", "_Retval") + def _no_rewrite_session_config(self): + rewriter_config = rewriter_config_pb2.RewriterConfig( + dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF) + graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config) + return config_pb2.ConfigProto(graph_options=graph_options) + def setUp(self): super(ReconstructNonDebugGraphTest, self).setUp() self._dump_dir = tempfile.mkdtemp() @@ -136,7 +143,7 @@ class ReconstructNonDebugGraphTest(test_util.TensorFlowTestCase): sess, c, expected_output=400.0) def testReonstructGraphWithCond(self): - with session.Session() as sess: + with session.Session(config=self._no_rewrite_session_config()) as sess: x = variables.Variable(10.0, name="x") y = variables.Variable(20.0, name="y") cond = control_flow_ops.cond( diff --git a/tensorflow/python/debug/lib/session_debug_testlib.py b/tensorflow/python/debug/lib/session_debug_testlib.py index ed31a8c8cd..20a40018bf 100644 --- a/tensorflow/python/debug/lib/session_debug_testlib.py +++ b/tensorflow/python/debug/lib/session_debug_testlib.py @@ -58,7 +58,8 @@ from tensorflow.python.training import gradient_descent def no_rewrite_session_config(): rewriter_config = rewriter_config_pb2.RewriterConfig( disable_model_pruning=True, - arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF) + arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, + dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF) graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config) return config_pb2.ConfigProto(graph_options=graph_options) @@ -963,7 +964,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): def testOutputSlotWithoutOutgoingEdgeCanBeWatched(self): """Test watching output slots not attached to any outgoing edges.""" - with session.Session() as sess: + with session.Session(config=no_rewrite_session_config()) as sess: u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]]) u = constant_op.constant(u_init_val, shape=[2, 2], name="u") -- GitLab From c33fc377309eb72e94077eb091dc51d198cb1afb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Nov 2017 10:07:57 -0800 Subject: [PATCH 0700/1801] Remove deleted files from CMake PiperOrigin-RevId: 176524446 --- tensorflow/contrib/cmake/tf_core_kernels.cmake | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake index f978c8ccd5..d6b8990664 100644 --- a/tensorflow/contrib/cmake/tf_core_kernels.cmake +++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake @@ -55,10 +55,6 @@ if(tensorflow_BUILD_CONTRIB_KERNELS) "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable.cc" "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/lib/utils/tensor_utils.cc" "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/lib/learner/common/partitioners/example_partitioner.cc" - "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.cc" - "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.cc" - "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.cc" - "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.cc" "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.cc" "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/lib/trees/decision_tree.cc" "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/ops/model_ops.cc" -- GitLab From ff47768027a66d550667015e1f238541169414c5 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 21 Nov 2017 10:18:13 -0800 Subject: [PATCH 0701/1801] Limit the number of iterations to avoid creating infinite loops if a shape function isn't implemented correctly. PiperOrigin-RevId: 176526135 --- .../core/grappler/costs/graph_properties.cc | 23 ++++++++++++++++--- .../core/grappler/costs/graph_properties.h | 4 ++-- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index 46c6841023..c28498ef6f 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -283,6 +283,7 @@ class TopoQueue { } bool empty() const { return queue_.empty(); } + std::size_t size() const { return queue_.size(); } private: // Graph nodes are created in (roughly) topological order. Therefore we can @@ -701,9 +702,24 @@ Status GraphProperties::UpdateShapes(SymbolicShapeRefiner* shape_refiner, Status GraphProperties::PropagateShapes( SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes, const std::unordered_map>& - resources) { + resources) const { + // Limit the number of iterations to prevent infinite loops in the presence of + // incorrect shape functions. The algoritm should converge in at most + // num_nested_loops^2 * max_rank. We approximate max_rank with the constant 4. + // The same applies to resources. + const int num_loops = new_shapes->size(); + const int max_loop_length = item_.graph.node_size(); + const int max_rank = 4; + const int max_loop_iterations = + max_rank * max_loop_length * std::max(1, num_loops * num_loops); + const int num_queues = resources.size(); + const int max_resource_iterations = num_queues * num_queues * max_rank; + + int num_resource_iterations = 0; do { - while (!new_shapes->empty()) { + int num_loop_iterations = 0; + while (!new_shapes->empty() && + num_loop_iterations++ < max_loop_iterations) { const Node* n = new_shapes->pop(); for (const Node* fanout : n->out_nodes()) { TF_RETURN_IF_ERROR( @@ -718,7 +734,8 @@ Status GraphProperties::PropagateShapes( TF_RETURN_IF_ERROR(UpdateResource(resource.first, resource.second, shape_refiner, relax, new_shapes)); } - } while (!new_shapes->empty()); + } while (!new_shapes->empty() && + num_resource_iterations++ < max_resource_iterations); return Status::OK(); } diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h index 37c8654541..ee279b7e0a 100644 --- a/tensorflow/core/grappler/costs/graph_properties.h +++ b/tensorflow/core/grappler/costs/graph_properties.h @@ -99,10 +99,10 @@ class GraphProperties { const Node* n, TopoQueue* new_shapes); // Propagate the shapes for the nodes enqueued in new_shapes and their // transitive fanout until a fixed point is reached. - static Status PropagateShapes( + Status PropagateShapes( SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes, const std::unordered_map>& - resources); + resources) const; }; } // end namespace grappler -- GitLab From 893fb86ab81503f2b608e700874c76f83a4b07a7 Mon Sep 17 00:00:00 2001 From: Kiril Gorovoy Date: Tue, 21 Nov 2017 10:28:38 -0800 Subject: [PATCH 0702/1801] Fix flatbuffers workspace.bzl definition to work when TF is imported as a submodule. PiperOrigin-RevId: 176527761 --- tensorflow/workspace.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 9bbc0cb1c4..dd5dc37a87 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -819,7 +819,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): native.new_http_archive( name = "flatbuffers", - build_file = "third_party/flatbuffers/flatbuffers.BUILD", + build_file = str(Label("//third_party/flatbuffers:flatbuffers.BUILD")), strip_prefix = "flatbuffers-971a68110e4fc1bace10fcb6deeb189e7e1a34ce", sha256 = "874088d2ee0d9f8524191f77209556415f03dd44e156276edf19e5b90ceb5f55", urls = [ -- GitLab From f282ad32cf4bd2cba873ab2ebbc98cc1a7329a7f Mon Sep 17 00:00:00 2001 From: Kyle Mills Date: Tue, 21 Nov 2017 14:24:32 -0500 Subject: [PATCH 0703/1801] Fixed typo in usage docstring Changed tf.SyncReplicasOptimizer to tf.train.SyncReplicasOptimizer. --- tensorflow/python/training/sync_replicas_optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/training/sync_replicas_optimizer.py b/tensorflow/python/training/sync_replicas_optimizer.py index 2a97d45daa..b52d101a21 100644 --- a/tensorflow/python/training/sync_replicas_optimizer.py +++ b/tensorflow/python/training/sync_replicas_optimizer.py @@ -99,7 +99,7 @@ class SyncReplicasOptimizer(optimizer.Optimizer): # Note that if you want to have 2 backup replicas, you can change # total_num_replicas=52 and make sure this number matches how many physical # replicas you started in your job. - opt = tf.SyncReplicasOptimizer(opt, replicas_to_aggregate=50, + opt = tf.train.SyncReplicasOptimizer(opt, replicas_to_aggregate=50, total_num_replicas=50) # Some models have startup_delays to help stabilize the model but when using -- GitLab From 0d4b94b7eddfff07f3a722ec2747568894256428 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Tue, 21 Nov 2017 11:56:56 -0800 Subject: [PATCH 0704/1801] Fix bug in WhileLoopContext. PiperOrigin-RevId: 176540820 --- tensorflow/python/ops/control_flow_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 194df5957c..38c959df8d 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -2734,7 +2734,7 @@ class WhileContext(ControlFlowContext): if shape is not None: xs.append(shape) for x in xs: - inp_op = x.op.inputs[0] + inp_op = x.op.inputs[0].op control_inputs = graph._control_dependencies_for_inputs([inp_op]) outer_control_inputs = [op for op in control_inputs if self._IsInOuterContext(op)] -- GitLab From 01fec325b3b4b26bf5338930eb37a252a16786df Mon Sep 17 00:00:00 2001 From: Sergio Guadarrama Date: Tue, 21 Nov 2017 12:06:36 -0800 Subject: [PATCH 0705/1801] Allow creating contrib.framework.local_variable that use_resource. Allow creating contrib.framework.global_variable that use_resource. PiperOrigin-RevId: 176542393 --- tensorflow/contrib/framework/BUILD | 1 + tensorflow/contrib/framework/__init__.py | 1 + .../contrib/framework/python/ops/variables.py | 35 ++++++++- .../framework/python/ops/variables_test.py | 77 +++++++++++++++++++ tensorflow/python/ops/variable_scope.py | 6 +- 5 files changed, 115 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/framework/BUILD b/tensorflow/contrib/framework/BUILD index e8dad886a1..5b659ddaa1 100644 --- a/tensorflow/contrib/framework/BUILD +++ b/tensorflow/contrib/framework/BUILD @@ -276,6 +276,7 @@ py_test( "//tensorflow/python:nn_ops", "//tensorflow/python:partitioned_variables", "//tensorflow/python:platform", + "//tensorflow/python:resource_variable_ops", "//tensorflow/python:session", "//tensorflow/python:training", "//tensorflow/python:variable_scope", diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py index 3f59261183..4edc77f86b 100644 --- a/tensorflow/contrib/framework/__init__.py +++ b/tensorflow/contrib/framework/__init__.py @@ -65,6 +65,7 @@ See the @{$python/contrib.framework} guide. @@get_variable_full_name @@get_variables_to_restore @@get_variables +@@global_variable @@local_variable @@model_variable @@variable diff --git a/tensorflow/contrib/framework/python/ops/variables.py b/tensorflow/contrib/framework/python/ops/variables.py index b766837968..07b7857e7b 100644 --- a/tensorflow/contrib/framework/python/ops/variables.py +++ b/tensorflow/contrib/framework/python/ops/variables.py @@ -60,6 +60,7 @@ __all__ = ['add_model_variable', 'get_variable_full_name', 'get_variables_to_restore', 'get_variables', + 'global_variable', 'local_variable', 'model_variable', 'variable', @@ -147,20 +148,48 @@ def get_or_create_global_step(graph=None): return training_util.get_or_create_global_step(graph) -def local_variable(initial_value, validate_shape=True, name=None): - """Create variable and add it to `GraphKeys.LOCAL_VARIABLES` collection. +def local_variable(initial_value, + validate_shape=True, + name=None, + use_resource=None): + """Create a variable with a value and add it to `GraphKeys.LOCAL_VARIABLES`. Args: initial_value: See variables.Variable.__init__. validate_shape: See variables.Variable.__init__. name: See variables.Variable.__init__. + use_resource: If `True` use a ResourceVariable instead of a Variable. Returns: New variable. """ return variable_scope.variable( initial_value, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], - validate_shape=validate_shape, name=name) + validate_shape=validate_shape, + use_resource=use_resource, + name=name) + + +def global_variable(initial_value, + validate_shape=True, + name=None, + use_resource=None): + """Create a variable with a value and add it to `GraphKeys.GLOBAL_VARIABLES`. + + Args: + initial_value: See variables.Variable.__init__. + validate_shape: See variables.Variable.__init__. + name: See variables.Variable.__init__. + use_resource: If `True` use a ResourceVariable instead of a Variable. + Returns: + New variable. + """ + return variable_scope.variable( + initial_value, trainable=False, + collections=[ops.GraphKeys.GLOBAL_VARIABLES], + validate_shape=validate_shape, + use_resource=use_resource, + name=name) @contrib_add_arg_scope diff --git a/tensorflow/contrib/framework/python/ops/variables_test.py b/tensorflow/contrib/framework/python/ops/variables_test.py index 6a74e4e866..2f06df93ac 100644 --- a/tensorflow/contrib/framework/python/ops/variables_test.py +++ b/tensorflow/contrib/framework/python/ops/variables_test.py @@ -33,6 +33,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import partitioned_variables +from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib from tensorflow.python.platform import gfile @@ -102,6 +103,82 @@ class LocalVariableTest(test.TestCase): sess.run(variables_lib.local_variables_initializer()) self.assertAllEqual(a.eval(), [0] * 5) + def testResourceVariable(self): + a = variables_lib2.local_variable(0) + b = variables_lib2.local_variable(0, use_resource=True) + self.assertEqual(type(a), variables_lib.Variable) + self.assertEqual(type(b), resource_variable_ops.ResourceVariable) + + +class GlobalVariableTest(test.TestCase): + + def test_global_variable(self): + with self.test_session() as sess: + self.assertEquals([], variables_lib.global_variables()) + value0 = 42 + variables_lib2.global_variable(value0) + value1 = 43 + variables_lib2.global_variable(value1) + variables = variables_lib.global_variables() + self.assertEquals(2, len(variables)) + with self.assertRaisesOpError( + 'Attempting to use uninitialized value Variable'): + sess.run(variables) + variables_lib.variables_initializer(variables).run() + self.assertAllEqual(set([value0, value1]), set(sess.run(variables))) + + def testVariableNameAndShape(self): + with self.test_session(): + with variable_scope.variable_scope('A'): + a = variables_lib2.global_variable([1, 1, 1, 1, 1], name='a') + self.assertEquals(a.op.name, 'A/a') + self.assertListEqual(a.get_shape().as_list(), [5]) + self.assertListEqual([a], variables_lib.global_variables()) + + def testGlobalVariableNotInLocalVariables(self): + with self.test_session(): + with variable_scope.variable_scope('A'): + a = variables_lib2.global_variable(0) + self.assertFalse(a in variables_lib.local_variables()) + self.assertTrue(a in variables_lib.global_variables()) + + def testGlobalVariableInVariablesToRestore(self): + with self.test_session(): + with variable_scope.variable_scope('A'): + a = variables_lib2.global_variable(0) + self.assertFalse(a in variables_lib.local_variables()) + self.assertTrue(a in variables_lib2.get_variables_to_restore()) + + def testGetVariablesReturnsThem(self): + with self.test_session(): + with variable_scope.variable_scope('A'): + a = variables_lib2.global_variable(0) + with variable_scope.variable_scope('B'): + b = variables_lib2.global_variable(0) + self.assertEquals([a], variables_lib2.get_variables('A')) + self.assertEquals([b], variables_lib2.get_variables('B')) + + def testGetLocalVariablesDontReturnsThem(self): + with self.test_session(): + with variable_scope.variable_scope('A'): + variables_lib2.global_variable(0) + with variable_scope.variable_scope('B'): + variables_lib2.global_variable(0) + self.assertEquals([], variables_lib2.get_local_variables('A')) + self.assertEquals([], variables_lib2.get_local_variables('B')) + + def testInitializedVariableValue(self): + with self.test_session() as sess: + a = variables_lib2.global_variable([0, 0, 0, 0, 0], name='a') + sess.run(variables_lib.global_variables_initializer()) + self.assertAllEqual(a.eval(), [0] * 5) + + def testResourceVariable(self): + a = variables_lib2.global_variable(0) + b = variables_lib2.global_variable(0, use_resource=True) + self.assertEqual(type(a), variables_lib.Variable) + self.assertEqual(type(b), resource_variable_ops.ResourceVariable) + class GlobalStepTest(test.TestCase): diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index 91dea12da2..dd435249f4 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -1985,8 +1985,10 @@ def variable(initial_value=None, validate_shape=True, caching_device=None, name=None, - dtype=None): - use_resource = get_variable_scope().use_resource + dtype=None, + use_resource=None): + if use_resource is None: + use_resource = get_variable_scope().use_resource if use_resource or (use_resource is None and context.in_eager_mode()): return resource_variable_ops.ResourceVariable( initial_value=initial_value, trainable=trainable, -- GitLab From 89449ee332974650f39b5cee468ee20a53f8ef33 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Nov 2017 12:11:14 -0800 Subject: [PATCH 0706/1801] Fix error messages in ops.register_dense_tensor_like_type(). PiperOrigin-RevId: 176543190 --- tensorflow/python/framework/ops.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 132571dd05..2785aed13e 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -157,14 +157,18 @@ def register_dense_tensor_like_type(tensor_type): """ try: if not isinstance(tensor_type.name, property): - raise TypeError("Type %s does not define a `name` property") + raise TypeError("Type %s does not define a `name` property" % + tensor_type.__name__) except AttributeError: - raise TypeError("Type %s does not define a `name` property") + raise TypeError("Type %s does not define a `name` property" % + tensor_type.__name__) try: if not isinstance(tensor_type.dtype, property): - raise TypeError("Type %s does not define a `dtype` property") + raise TypeError("Type %s does not define a `dtype` property" % + tensor_type.__name__) except AttributeError: - raise TypeError("Type %s does not define a `dtype` property") + raise TypeError("Type %s does not define a `dtype` property" % + tensor_type.__name__) # We expect this list to be small, so choose quadratic complexity # for registration, so that we have a tuple that can be used for # more efficient `isinstance` checks later. -- GitLab From 9ad26eb766ac6e742503c1533efa324815ee4653 Mon Sep 17 00:00:00 2001 From: Christopher Shallue Date: Tue, 21 Nov 2017 12:16:33 -0800 Subject: [PATCH 0707/1801] Add **kwargs to HParams.to_json() to be passed to json.dumps() PiperOrigin-RevId: 176543810 --- .../contrib/training/python/training/hparam.py | 17 +++++++++++++++-- .../training/python/training/hparam_test.py | 10 ++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/training/python/training/hparam.py b/tensorflow/contrib/training/python/training/hparam.py index 8d5f47ca4d..80de0f6eb7 100644 --- a/tensorflow/contrib/training/python/training/hparam.py +++ b/tensorflow/contrib/training/python/training/hparam.py @@ -550,13 +550,26 @@ class HParams(object): def get_model_structure(self): return self._model_structure - def to_json(self): + def to_json(self, indent=None, separators=None, sort_keys=False): """Serializes the hyperparameters into JSON. + Args: + indent: If a non-negative integer, JSON array elements and object members + will be pretty-printed with that indent level. An indent level of 0, or + negative, will only insert newlines. `None` (the default) selects the + most compact representation. + separators: Optional `(item_separator, key_separator)` tuple. Default is + `(', ', ': ')`. + sort_keys: If `True`, the output dictionaries will be sorted by key. + Returns: A JSON string. """ - return json.dumps(self.values()) + return json.dumps( + self.values(), + indent=indent, + separators=separators, + sort_keys=sort_keys) def parse_json(self, values_json): """Override hyperparameter values, parsing new values from a json object. diff --git a/tensorflow/contrib/training/python/training/hparam_test.py b/tensorflow/contrib/training/python/training/hparam_test.py index 643905d3a6..28e4b4d01e 100644 --- a/tensorflow/contrib/training/python/training/hparam_test.py +++ b/tensorflow/contrib/training/python/training/hparam_test.py @@ -292,6 +292,16 @@ class HParamsTest(test.TestCase): self.assertEqual('relu4', hparams2.c_c) self.assertEqual(False, hparams2.d) + hparams3 = hparam.HParams(aaa=123) + self.assertEqual('{"aaa": 123}', hparams3.to_json()) + self.assertEqual('{\n "aaa": 123\n}', hparams3.to_json(indent=2)) + self.assertEqual('{"aaa"=123}', hparams3.to_json(separators=(';', '='))) + + hparams4 = hparam.HParams(aaa=123, b='hello', c_c=False) + self.assertEqual( + '{"aaa": 123, "b": "hello", "c_c": false}', + hparams4.to_json(sort_keys=True)) + def testSetHParam(self): hparams = hparam.HParams(aaa=1, b=2.0, c_c='relu6', d=True) self.assertDictEqual({ -- GitLab From b5dcb0161942c467be6cba19aa0ee05aef742d2e Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Tue, 21 Nov 2017 12:18:17 -0800 Subject: [PATCH 0708/1801] Make all assert_* ops work in eager mode PiperOrigin-RevId: 176544038 --- .../python/kernel_tests/check_ops_test.py | 701 ++++++++++-------- tensorflow/python/ops/check_ops.py | 102 ++- 2 files changed, 456 insertions(+), 347 deletions(-) diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py index 43785adcee..7ce0f1e7b8 100644 --- a/tensorflow/python/kernel_tests/check_ops_test.py +++ b/tensorflow/python/kernel_tests/check_ops_test.py @@ -34,38 +34,45 @@ from tensorflow.python.platform import test class AssertProperIterableTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_single_tensor_raises(self): tensor = constant_op.constant(1) with self.assertRaisesRegexp(TypeError, "proper"): check_ops.assert_proper_iterable(tensor) + @test_util.run_in_graph_and_eager_modes() def test_single_sparse_tensor_raises(self): ten = sparse_tensor.SparseTensor( indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4]) with self.assertRaisesRegexp(TypeError, "proper"): check_ops.assert_proper_iterable(ten) + @test_util.run_in_graph_and_eager_modes() def test_single_ndarray_raises(self): array = np.array([1, 2, 3]) with self.assertRaisesRegexp(TypeError, "proper"): check_ops.assert_proper_iterable(array) + @test_util.run_in_graph_and_eager_modes() def test_single_string_raises(self): mystr = "hello" with self.assertRaisesRegexp(TypeError, "proper"): check_ops.assert_proper_iterable(mystr) + @test_util.run_in_graph_and_eager_modes() def test_non_iterable_object_raises(self): non_iterable = 1234 with self.assertRaisesRegexp(TypeError, "to be iterable"): check_ops.assert_proper_iterable(non_iterable) + @test_util.run_in_graph_and_eager_modes() def test_list_does_not_raise(self): list_of_stuff = [ constant_op.constant([11, 22]), constant_op.constant([1, 2]) ] check_ops.assert_proper_iterable(list_of_stuff) + @test_util.run_in_graph_and_eager_modes() def test_generator_does_not_raise(self): generator_of_stuff = (constant_op.constant([11, 22]), constant_op.constant( [1, 2])) @@ -333,265 +340,283 @@ class AssertLessTest(test.TestCase): class AssertLessEqualTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_equal(self): - with self.test_session(): - small = constant_op.constant([1, 2], name="small") - with ops.control_dependencies( - [check_ops.assert_less_equal(small, small)]): - out = array_ops.identity(small) - out.eval() + small = constant_op.constant([1, 2], name="small") + with ops.control_dependencies( + [check_ops.assert_less_equal(small, small)]): + out = array_ops.identity(small) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_raises_when_greater(self): - with self.test_session(): - small = constant_op.constant([1, 2], name="small") - big = constant_op.constant([3, 4], name="big") + small = constant_op.constant([1, 2], name="small") + big = constant_op.constant([3, 4], name="big") + with self.assertRaisesOpError("fail"): with ops.control_dependencies( [check_ops.assert_less_equal( big, small, message="fail")]): out = array_ops.identity(small) - with self.assertRaisesOpError("fail.*big.*small"): - out.eval() + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_less_equal(self): - with self.test_session(): - small = constant_op.constant([1, 2], name="small") - big = constant_op.constant([3, 2], name="big") - with ops.control_dependencies([check_ops.assert_less_equal(small, big)]): - out = array_ops.identity(small) - out.eval() + small = constant_op.constant([1, 2], name="small") + big = constant_op.constant([3, 2], name="big") + with ops.control_dependencies([check_ops.assert_less_equal(small, big)]): + out = array_ops.identity(small) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_less_equal_and_broadcastable_shapes(self): - with self.test_session(): - small = constant_op.constant([1], name="small") - big = constant_op.constant([3, 1], name="big") - with ops.control_dependencies([check_ops.assert_less_equal(small, big)]): - out = array_ops.identity(small) - out.eval() + small = constant_op.constant([1], name="small") + big = constant_op.constant([3, 1], name="big") + with ops.control_dependencies([check_ops.assert_less_equal(small, big)]): + out = array_ops.identity(small) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_raises_when_less_equal_but_non_broadcastable_shapes(self): - with self.test_session(): - small = constant_op.constant([1, 1, 1], name="small") - big = constant_op.constant([3, 1], name="big") - with self.assertRaisesRegexp(ValueError, "must be"): - with ops.control_dependencies( - [check_ops.assert_less_equal(small, big)]): - out = array_ops.identity(small) - out.eval() + small = constant_op.constant([3, 1], name="small") + big = constant_op.constant([1, 1, 1], name="big") + # The exception in eager and non-eager mode is different because + # eager mode relies on shape check done as part of the C++ op, while + # graph mode does shape checks when creating the `Operation` instance. + with self.assertRaisesRegexp( + (errors.InvalidArgumentError, ValueError), + (r"Incompatible shapes: \[2\] vs. \[3\]|" + r"Dimensions must be equal, but are 2 and 3")): + with ops.control_dependencies( + [check_ops.assert_less_equal(small, big)]): + out = array_ops.identity(small) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_both_empty(self): - with self.test_session(): - larry = constant_op.constant([]) - curly = constant_op.constant([]) - with ops.control_dependencies( - [check_ops.assert_less_equal(larry, curly)]): - out = array_ops.identity(larry) - out.eval() + larry = constant_op.constant([]) + curly = constant_op.constant([]) + with ops.control_dependencies( + [check_ops.assert_less_equal(larry, curly)]): + out = array_ops.identity(larry) + self.evaluate(out) class AssertGreaterTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_raises_when_equal(self): - with self.test_session(): - small = constant_op.constant([1, 2], name="small") + small = constant_op.constant([1, 2], name="small") + with self.assertRaisesOpError("fail"): with ops.control_dependencies( [check_ops.assert_greater( small, small, message="fail")]): out = array_ops.identity(small) - with self.assertRaisesOpError("fail.*small.*small"): - out.eval() + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_raises_when_less(self): - with self.test_session(): - small = constant_op.constant([1, 2], name="small") - big = constant_op.constant([3, 4], name="big") + small = constant_op.constant([1, 2], name="small") + big = constant_op.constant([3, 4], name="big") + with self.assertRaisesOpError("x > y did not hold"): with ops.control_dependencies([check_ops.assert_greater(small, big)]): out = array_ops.identity(big) - with self.assertRaisesOpError("small.*big"): - out.eval() + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_greater(self): - with self.test_session(): - small = constant_op.constant([3, 1], name="small") - big = constant_op.constant([4, 2], name="big") - with ops.control_dependencies([check_ops.assert_greater(big, small)]): - out = array_ops.identity(small) - out.eval() + small = constant_op.constant([3, 1], name="small") + big = constant_op.constant([4, 2], name="big") + with ops.control_dependencies([check_ops.assert_greater(big, small)]): + out = array_ops.identity(small) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_greater_and_broadcastable_shapes(self): - with self.test_session(): - small = constant_op.constant([1], name="small") - big = constant_op.constant([3, 2], name="big") - with ops.control_dependencies([check_ops.assert_greater(big, small)]): - out = array_ops.identity(small) - out.eval() + small = constant_op.constant([1], name="small") + big = constant_op.constant([3, 2], name="big") + with ops.control_dependencies([check_ops.assert_greater(big, small)]): + out = array_ops.identity(small) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_raises_when_greater_but_non_broadcastable_shapes(self): - with self.test_session(): - small = constant_op.constant([1, 1, 1], name="small") - big = constant_op.constant([3, 2], name="big") - with self.assertRaisesRegexp(ValueError, "must be"): - with ops.control_dependencies([check_ops.assert_greater(big, small)]): - out = array_ops.identity(small) - out.eval() + small = constant_op.constant([1, 1, 1], name="small") + big = constant_op.constant([3, 2], name="big") + # The exception in eager and non-eager mode is different because + # eager mode relies on shape check done as part of the C++ op, while + # graph mode does shape checks when creating the `Operation` instance. + with self.assertRaisesRegexp( + (errors.InvalidArgumentError, ValueError), + (r"Incompatible shapes: \[2\] vs. \[3\]|" + r"Dimensions must be equal, but are 2 and 3")): + with ops.control_dependencies([check_ops.assert_greater(big, small)]): + out = array_ops.identity(small) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_both_empty(self): - with self.test_session(): - larry = constant_op.constant([]) - curly = constant_op.constant([]) - with ops.control_dependencies([check_ops.assert_greater(larry, curly)]): - out = array_ops.identity(larry) - out.eval() + larry = constant_op.constant([]) + curly = constant_op.constant([]) + with ops.control_dependencies([check_ops.assert_greater(larry, curly)]): + out = array_ops.identity(larry) + self.evaluate(out) class AssertGreaterEqualTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_equal(self): - with self.test_session(): - small = constant_op.constant([1, 2], name="small") - with ops.control_dependencies( - [check_ops.assert_greater_equal(small, small)]): - out = array_ops.identity(small) - out.eval() + small = constant_op.constant([1, 2], name="small") + with ops.control_dependencies( + [check_ops.assert_greater_equal(small, small)]): + out = array_ops.identity(small) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_raises_when_less(self): - with self.test_session(): - small = constant_op.constant([1, 2], name="small") - big = constant_op.constant([3, 4], name="big") + small = constant_op.constant([1, 2], name="small") + big = constant_op.constant([3, 4], name="big") + with self.assertRaisesOpError("fail"): with ops.control_dependencies( [check_ops.assert_greater_equal( small, big, message="fail")]): out = array_ops.identity(small) - with self.assertRaisesOpError("fail.*small.*big"): - out.eval() + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_greater_equal(self): - with self.test_session(): - small = constant_op.constant([1, 2], name="small") - big = constant_op.constant([3, 2], name="big") - with ops.control_dependencies( - [check_ops.assert_greater_equal(big, small)]): - out = array_ops.identity(small) - out.eval() + small = constant_op.constant([1, 2], name="small") + big = constant_op.constant([3, 2], name="big") + with ops.control_dependencies( + [check_ops.assert_greater_equal(big, small)]): + out = array_ops.identity(small) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_greater_equal_and_broadcastable_shapes(self): - with self.test_session(): - small = constant_op.constant([1], name="small") - big = constant_op.constant([3, 1], name="big") + small = constant_op.constant([1], name="small") + big = constant_op.constant([3, 1], name="big") + with ops.control_dependencies( + [check_ops.assert_greater_equal(big, small)]): + out = array_ops.identity(small) + self.evaluate(out) + + @test_util.run_in_graph_and_eager_modes() + def test_raises_when_less_equal_but_non_broadcastable_shapes(self): + small = constant_op.constant([1, 1, 1], name="big") + big = constant_op.constant([3, 1], name="small") + # The exception in eager and non-eager mode is different because + # eager mode relies on shape check done as part of the C++ op, while + # graph mode does shape checks when creating the `Operation` instance. + with self.assertRaisesRegexp( + (errors.InvalidArgumentError, ValueError), + (r"Incompatible shapes: \[2\] vs. \[3\]|" + r"Dimensions must be equal, but are 2 and 3")): with ops.control_dependencies( [check_ops.assert_greater_equal(big, small)]): out = array_ops.identity(small) - out.eval() - - def test_raises_when_less_equal_but_non_broadcastable_shapes(self): - with self.test_session(): - small = constant_op.constant([1, 1, 1], name="big") - big = constant_op.constant([3, 1], name="small") - with self.assertRaisesRegexp(ValueError, "Dimensions must be equal"): - with ops.control_dependencies( - [check_ops.assert_greater_equal(big, small)]): - out = array_ops.identity(small) - out.eval() + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_both_empty(self): - with self.test_session(): - larry = constant_op.constant([]) - curly = constant_op.constant([]) - with ops.control_dependencies( - [check_ops.assert_greater_equal(larry, curly)]): - out = array_ops.identity(larry) - out.eval() + larry = constant_op.constant([]) + curly = constant_op.constant([]) + with ops.control_dependencies( + [check_ops.assert_greater_equal(larry, curly)]): + out = array_ops.identity(larry) + self.evaluate(out) class AssertNegativeTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_negative(self): - with self.test_session(): - frank = constant_op.constant([-1, -2], name="frank") - with ops.control_dependencies([check_ops.assert_negative(frank)]): - out = array_ops.identity(frank) - out.eval() + frank = constant_op.constant([-1, -2], name="frank") + with ops.control_dependencies([check_ops.assert_negative(frank)]): + out = array_ops.identity(frank) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_raises_when_positive(self): - with self.test_session(): - doug = constant_op.constant([1, 2], name="doug") + doug = constant_op.constant([1, 2], name="doug") + with self.assertRaisesOpError("fail"): with ops.control_dependencies( [check_ops.assert_negative( doug, message="fail")]): out = array_ops.identity(doug) - with self.assertRaisesOpError("fail.*doug"): - out.eval() + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_raises_when_zero(self): - with self.test_session(): - claire = constant_op.constant([0], name="claire") + claire = constant_op.constant([0], name="claire") + with self.assertRaisesOpError("x < 0 did not hold"): with ops.control_dependencies([check_ops.assert_negative(claire)]): out = array_ops.identity(claire) - with self.assertRaisesOpError("claire"): - out.eval() + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_empty_tensor_doesnt_raise(self): # A tensor is negative when it satisfies: # For every element x_i in x, x_i < 0 # and an empty tensor has no elements, so this is trivially satisfied. # This is standard set theory. - with self.test_session(): - empty = constant_op.constant([], name="empty") - with ops.control_dependencies([check_ops.assert_negative(empty)]): - out = array_ops.identity(empty) - out.eval() + empty = constant_op.constant([], name="empty") + with ops.control_dependencies([check_ops.assert_negative(empty)]): + out = array_ops.identity(empty) + self.evaluate(out) class AssertPositiveTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_raises_when_negative(self): - with self.test_session(): - freddie = constant_op.constant([-1, -2], name="freddie") + freddie = constant_op.constant([-1, -2], name="freddie") + with self.assertRaisesOpError("fail"): with ops.control_dependencies( [check_ops.assert_positive( freddie, message="fail")]): out = array_ops.identity(freddie) - with self.assertRaisesOpError("fail.*freddie"): - out.eval() + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_positive(self): - with self.test_session(): - remmy = constant_op.constant([1, 2], name="remmy") - with ops.control_dependencies([check_ops.assert_positive(remmy)]): - out = array_ops.identity(remmy) - out.eval() + remmy = constant_op.constant([1, 2], name="remmy") + with ops.control_dependencies([check_ops.assert_positive(remmy)]): + out = array_ops.identity(remmy) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_raises_when_zero(self): - with self.test_session(): - meechum = constant_op.constant([0], name="meechum") + meechum = constant_op.constant([0], name="meechum") + with self.assertRaisesOpError("x > 0 did not hold"): with ops.control_dependencies([check_ops.assert_positive(meechum)]): out = array_ops.identity(meechum) - with self.assertRaisesOpError("meechum"): - out.eval() + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_empty_tensor_doesnt_raise(self): # A tensor is positive when it satisfies: # For every element x_i in x, x_i > 0 # and an empty tensor has no elements, so this is trivially satisfied. # This is standard set theory. - with self.test_session(): - empty = constant_op.constant([], name="empty") - with ops.control_dependencies([check_ops.assert_positive(empty)]): - out = array_ops.identity(empty) - out.eval() + empty = constant_op.constant([], name="empty") + with ops.control_dependencies([check_ops.assert_positive(empty)]): + out = array_ops.identity(empty) + self.evaluate(out) class AssertRankTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_rank_zero_tensor_raises_if_rank_too_small_static_rank(self): - with self.test_session(): - tensor = constant_op.constant(1, name="my_tensor") - desired_rank = 1 - with self.assertRaisesRegexp(ValueError, - "fail.*my_tensor.*must have rank 1"): - with ops.control_dependencies( - [check_ops.assert_rank( - tensor, desired_rank, message="fail")]): - array_ops.identity(tensor).eval() + tensor = constant_op.constant(1, name="my_tensor") + desired_rank = 1 + with self.assertRaisesRegexp(ValueError, + "fail.*must have rank 1"): + with ops.control_dependencies( + [check_ops.assert_rank( + tensor, desired_rank, message="fail")]): + self.evaluate(array_ops.identity(tensor)) def test_rank_zero_tensor_raises_if_rank_too_small_dynamic_rank(self): with self.test_session(): @@ -603,13 +628,13 @@ class AssertRankTest(test.TestCase): with self.assertRaisesOpError("fail.*my_tensor.*rank"): array_ops.identity(tensor).eval(feed_dict={tensor: 0}) + @test_util.run_in_graph_and_eager_modes() def test_rank_zero_tensor_doesnt_raise_if_rank_just_right_static_rank(self): - with self.test_session(): - tensor = constant_op.constant(1, name="my_tensor") - desired_rank = 0 - with ops.control_dependencies( - [check_ops.assert_rank(tensor, desired_rank)]): - array_ops.identity(tensor).eval() + tensor = constant_op.constant(1, name="my_tensor") + desired_rank = 0 + with ops.control_dependencies( + [check_ops.assert_rank(tensor, desired_rank)]): + self.evaluate(array_ops.identity(tensor)) def test_rank_zero_tensor_doesnt_raise_if_rank_just_right_dynamic_rank(self): with self.test_session(): @@ -619,14 +644,14 @@ class AssertRankTest(test.TestCase): [check_ops.assert_rank(tensor, desired_rank)]): array_ops.identity(tensor).eval(feed_dict={tensor: 0}) + @test_util.run_in_graph_and_eager_modes() def test_rank_one_tensor_raises_if_rank_too_large_static_rank(self): - with self.test_session(): - tensor = constant_op.constant([1, 2], name="my_tensor") - desired_rank = 0 - with self.assertRaisesRegexp(ValueError, "my_tensor.*rank"): - with ops.control_dependencies( - [check_ops.assert_rank(tensor, desired_rank)]): - array_ops.identity(tensor).eval() + tensor = constant_op.constant([1, 2], name="my_tensor") + desired_rank = 0 + with self.assertRaisesRegexp(ValueError, "rank"): + with ops.control_dependencies( + [check_ops.assert_rank(tensor, desired_rank)]): + self.evaluate(array_ops.identity(tensor)) def test_rank_one_tensor_raises_if_rank_too_large_dynamic_rank(self): with self.test_session(): @@ -637,13 +662,13 @@ class AssertRankTest(test.TestCase): with self.assertRaisesOpError("my_tensor.*rank"): array_ops.identity(tensor).eval(feed_dict={tensor: [1, 2]}) + @test_util.run_in_graph_and_eager_modes() def test_rank_one_tensor_doesnt_raise_if_rank_just_right_static_rank(self): - with self.test_session(): - tensor = constant_op.constant([1, 2], name="my_tensor") - desired_rank = 1 - with ops.control_dependencies( - [check_ops.assert_rank(tensor, desired_rank)]): - array_ops.identity(tensor).eval() + tensor = constant_op.constant([1, 2], name="my_tensor") + desired_rank = 1 + with ops.control_dependencies( + [check_ops.assert_rank(tensor, desired_rank)]): + self.evaluate(array_ops.identity(tensor)) def test_rank_one_tensor_doesnt_raise_if_rank_just_right_dynamic_rank(self): with self.test_session(): @@ -653,14 +678,14 @@ class AssertRankTest(test.TestCase): [check_ops.assert_rank(tensor, desired_rank)]): array_ops.identity(tensor).eval(feed_dict={tensor: [1, 2]}) + @test_util.run_in_graph_and_eager_modes() def test_rank_one_tensor_raises_if_rank_too_small_static_rank(self): - with self.test_session(): - tensor = constant_op.constant([1, 2], name="my_tensor") - desired_rank = 2 - with self.assertRaisesRegexp(ValueError, "my_tensor.*rank"): - with ops.control_dependencies( - [check_ops.assert_rank(tensor, desired_rank)]): - array_ops.identity(tensor).eval() + tensor = constant_op.constant([1, 2], name="my_tensor") + desired_rank = 2 + with self.assertRaisesRegexp(ValueError, "rank"): + with ops.control_dependencies( + [check_ops.assert_rank(tensor, desired_rank)]): + self.evaluate(array_ops.identity(tensor)) def test_rank_one_tensor_raises_if_rank_too_small_dynamic_rank(self): with self.test_session(): @@ -671,11 +696,11 @@ class AssertRankTest(test.TestCase): with self.assertRaisesOpError("my_tensor.*rank"): array_ops.identity(tensor).eval(feed_dict={tensor: [1, 2]}) + @test_util.run_in_graph_and_eager_modes() def test_raises_if_rank_is_not_scalar_static(self): - with self.test_session(): - tensor = constant_op.constant([1, 2], name="my_tensor") - with self.assertRaisesRegexp(ValueError, "Rank must be a scalar"): - check_ops.assert_rank(tensor, np.array([], dtype=np.int32)) + tensor = constant_op.constant([1, 2], name="my_tensor") + with self.assertRaisesRegexp(ValueError, "Rank must be a scalar"): + check_ops.assert_rank(tensor, np.array([], dtype=np.int32)) def test_raises_if_rank_is_not_scalar_dynamic(self): with self.test_session(): @@ -687,12 +712,12 @@ class AssertRankTest(test.TestCase): [check_ops.assert_rank(tensor, rank_tensor)]): array_ops.identity(tensor).eval(feed_dict={rank_tensor: [1, 2]}) + @test_util.run_in_graph_and_eager_modes() def test_raises_if_rank_is_not_integer_static(self): - with self.test_session(): - tensor = constant_op.constant([1, 2], name="my_tensor") - with self.assertRaisesRegexp(TypeError, - "must be of type "): - check_ops.assert_rank(tensor, .5) + tensor = constant_op.constant([1, 2], name="my_tensor") + with self.assertRaisesRegexp(TypeError, + "must be of type "): + check_ops.assert_rank(tensor, .5) def test_raises_if_rank_is_not_integer_dynamic(self): with self.test_session(): @@ -708,14 +733,14 @@ class AssertRankTest(test.TestCase): class AssertRankInTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_rank_zero_tensor_raises_if_rank_mismatch_static_rank(self): - with self.test_session(): - tensor_rank0 = constant_op.constant(42, name="my_tensor") - with self.assertRaisesRegexp( - ValueError, "fail.*my_tensor.*must have rank.*in.*1.*2"): - with ops.control_dependencies([ - check_ops.assert_rank_in(tensor_rank0, (1, 2), message="fail")]): - array_ops.identity(tensor_rank0).eval() + tensor_rank0 = constant_op.constant(42, name="my_tensor") + with self.assertRaisesRegexp( + ValueError, "fail.*must have rank.*in.*1.*2"): + with ops.control_dependencies([ + check_ops.assert_rank_in(tensor_rank0, (1, 2), message="fail")]): + self.evaluate(array_ops.identity(tensor_rank0)) def test_rank_zero_tensor_raises_if_rank_mismatch_dynamic_rank(self): with self.test_session(): @@ -725,13 +750,13 @@ class AssertRankInTest(test.TestCase): with self.assertRaisesOpError("fail.*my_tensor.*rank"): array_ops.identity(tensor_rank0).eval(feed_dict={tensor_rank0: 42.0}) + @test_util.run_in_graph_and_eager_modes() def test_rank_zero_tensor_doesnt_raise_if_rank_matches_static_rank(self): - with self.test_session(): - tensor_rank0 = constant_op.constant(42, name="my_tensor") - for desired_ranks in ((0, 1, 2), (1, 0, 2), (1, 2, 0)): - with ops.control_dependencies([ - check_ops.assert_rank_in(tensor_rank0, desired_ranks)]): - array_ops.identity(tensor_rank0).eval() + tensor_rank0 = constant_op.constant(42, name="my_tensor") + for desired_ranks in ((0, 1, 2), (1, 0, 2), (1, 2, 0)): + with ops.control_dependencies([ + check_ops.assert_rank_in(tensor_rank0, desired_ranks)]): + self.evaluate(array_ops.identity(tensor_rank0)) def test_rank_zero_tensor_doesnt_raise_if_rank_matches_dynamic_rank(self): with self.test_session(): @@ -741,13 +766,13 @@ class AssertRankInTest(test.TestCase): check_ops.assert_rank_in(tensor_rank0, desired_ranks)]): array_ops.identity(tensor_rank0).eval(feed_dict={tensor_rank0: 42.0}) + @test_util.run_in_graph_and_eager_modes() def test_rank_one_tensor_doesnt_raise_if_rank_matches_static_rank(self): - with self.test_session(): - tensor_rank1 = constant_op.constant([42, 43], name="my_tensor") - for desired_ranks in ((0, 1, 2), (1, 0, 2), (1, 2, 0)): - with ops.control_dependencies([ - check_ops.assert_rank_in(tensor_rank1, desired_ranks)]): - array_ops.identity(tensor_rank1).eval() + tensor_rank1 = constant_op.constant([42, 43], name="my_tensor") + for desired_ranks in ((0, 1, 2), (1, 0, 2), (1, 2, 0)): + with ops.control_dependencies([ + check_ops.assert_rank_in(tensor_rank1, desired_ranks)]): + self.evaluate(array_ops.identity(tensor_rank1)) def test_rank_one_tensor_doesnt_raise_if_rank_matches_dynamic_rank(self): with self.test_session(): @@ -759,13 +784,13 @@ class AssertRankInTest(test.TestCase): tensor_rank1: (42.0, 43.0) }) + @test_util.run_in_graph_and_eager_modes() def test_rank_one_tensor_raises_if_rank_mismatches_static_rank(self): - with self.test_session(): - tensor_rank1 = constant_op.constant((42, 43), name="my_tensor") - with self.assertRaisesRegexp(ValueError, "my_tensor.*rank"): - with ops.control_dependencies([ - check_ops.assert_rank_in(tensor_rank1, (0, 2))]): - array_ops.identity(tensor_rank1).eval() + tensor_rank1 = constant_op.constant((42, 43), name="my_tensor") + with self.assertRaisesRegexp(ValueError, "rank"): + with ops.control_dependencies([ + check_ops.assert_rank_in(tensor_rank1, (0, 2))]): + self.evaluate(array_ops.identity(tensor_rank1)) def test_rank_one_tensor_raises_if_rank_mismatches_dynamic_rank(self): with self.test_session(): @@ -777,14 +802,14 @@ class AssertRankInTest(test.TestCase): tensor_rank1: (42.0, 43.0) }) + @test_util.run_in_graph_and_eager_modes() def test_raises_if_rank_is_not_scalar_static(self): - with self.test_session(): - tensor = constant_op.constant((42, 43), name="my_tensor") - desired_ranks = ( - np.array(1, dtype=np.int32), - np.array((2, 1), dtype=np.int32)) - with self.assertRaisesRegexp(ValueError, "Rank must be a scalar"): - check_ops.assert_rank_in(tensor, desired_ranks) + tensor = constant_op.constant((42, 43), name="my_tensor") + desired_ranks = ( + np.array(1, dtype=np.int32), + np.array((2, 1), dtype=np.int32)) + with self.assertRaisesRegexp(ValueError, "Rank must be a scalar"): + check_ops.assert_rank_in(tensor, desired_ranks) def test_raises_if_rank_is_not_scalar_dynamic(self): with self.test_session(): @@ -801,12 +826,12 @@ class AssertRankInTest(test.TestCase): desired_ranks[1]: [2, 1], }) + @test_util.run_in_graph_and_eager_modes() def test_raises_if_rank_is_not_integer_static(self): - with self.test_session(): - tensor = constant_op.constant((42, 43), name="my_tensor") - with self.assertRaisesRegexp(TypeError, - "must be of type "): - check_ops.assert_rank_in(tensor, (1, .5,)) + tensor = constant_op.constant((42, 43), name="my_tensor") + with self.assertRaisesRegexp(TypeError, + "must be of type "): + check_ops.assert_rank_in(tensor, (1, .5,)) def test_raises_if_rank_is_not_integer_dynamic(self): with self.test_session(): @@ -822,14 +847,14 @@ class AssertRankInTest(test.TestCase): class AssertRankAtLeastTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_rank_zero_tensor_raises_if_rank_too_small_static_rank(self): - with self.test_session(): - tensor = constant_op.constant(1, name="my_tensor") - desired_rank = 1 - with self.assertRaisesRegexp(ValueError, "my_tensor.*rank at least 1"): - with ops.control_dependencies( - [check_ops.assert_rank_at_least(tensor, desired_rank)]): - array_ops.identity(tensor).eval() + tensor = constant_op.constant(1, name="my_tensor") + desired_rank = 1 + with self.assertRaisesRegexp(ValueError, "rank at least 1"): + with ops.control_dependencies( + [check_ops.assert_rank_at_least(tensor, desired_rank)]): + self.evaluate(array_ops.identity(tensor)) def test_rank_zero_tensor_raises_if_rank_too_small_dynamic_rank(self): with self.test_session(): @@ -840,13 +865,13 @@ class AssertRankAtLeastTest(test.TestCase): with self.assertRaisesOpError("my_tensor.*rank"): array_ops.identity(tensor).eval(feed_dict={tensor: 0}) + @test_util.run_in_graph_and_eager_modes() def test_rank_zero_tensor_doesnt_raise_if_rank_just_right_static_rank(self): - with self.test_session(): - tensor = constant_op.constant(1, name="my_tensor") - desired_rank = 0 - with ops.control_dependencies( - [check_ops.assert_rank_at_least(tensor, desired_rank)]): - array_ops.identity(tensor).eval() + tensor = constant_op.constant(1, name="my_tensor") + desired_rank = 0 + with ops.control_dependencies( + [check_ops.assert_rank_at_least(tensor, desired_rank)]): + self.evaluate(array_ops.identity(tensor)) def test_rank_zero_tensor_doesnt_raise_if_rank_just_right_dynamic_rank(self): with self.test_session(): @@ -856,13 +881,13 @@ class AssertRankAtLeastTest(test.TestCase): [check_ops.assert_rank_at_least(tensor, desired_rank)]): array_ops.identity(tensor).eval(feed_dict={tensor: 0}) + @test_util.run_in_graph_and_eager_modes() def test_rank_one_ten_doesnt_raise_raise_if_rank_too_large_static_rank(self): - with self.test_session(): - tensor = constant_op.constant([1, 2], name="my_tensor") - desired_rank = 0 - with ops.control_dependencies( - [check_ops.assert_rank_at_least(tensor, desired_rank)]): - array_ops.identity(tensor).eval() + tensor = constant_op.constant([1, 2], name="my_tensor") + desired_rank = 0 + with ops.control_dependencies( + [check_ops.assert_rank_at_least(tensor, desired_rank)]): + self.evaluate(array_ops.identity(tensor)) def test_rank_one_ten_doesnt_raise_if_rank_too_large_dynamic_rank(self): with self.test_session(): @@ -872,13 +897,13 @@ class AssertRankAtLeastTest(test.TestCase): [check_ops.assert_rank_at_least(tensor, desired_rank)]): array_ops.identity(tensor).eval(feed_dict={tensor: [1, 2]}) + @test_util.run_in_graph_and_eager_modes() def test_rank_one_tensor_doesnt_raise_if_rank_just_right_static_rank(self): - with self.test_session(): - tensor = constant_op.constant([1, 2], name="my_tensor") - desired_rank = 1 - with ops.control_dependencies( - [check_ops.assert_rank_at_least(tensor, desired_rank)]): - array_ops.identity(tensor).eval() + tensor = constant_op.constant([1, 2], name="my_tensor") + desired_rank = 1 + with ops.control_dependencies( + [check_ops.assert_rank_at_least(tensor, desired_rank)]): + self.evaluate(array_ops.identity(tensor)) def test_rank_one_tensor_doesnt_raise_if_rank_just_right_dynamic_rank(self): with self.test_session(): @@ -888,14 +913,14 @@ class AssertRankAtLeastTest(test.TestCase): [check_ops.assert_rank_at_least(tensor, desired_rank)]): array_ops.identity(tensor).eval(feed_dict={tensor: [1, 2]}) + @test_util.run_in_graph_and_eager_modes() def test_rank_one_tensor_raises_if_rank_too_small_static_rank(self): - with self.test_session(): - tensor = constant_op.constant([1, 2], name="my_tensor") - desired_rank = 2 - with self.assertRaisesRegexp(ValueError, "my_tensor.*rank"): - with ops.control_dependencies( - [check_ops.assert_rank_at_least(tensor, desired_rank)]): - array_ops.identity(tensor).eval() + tensor = constant_op.constant([1, 2], name="my_tensor") + desired_rank = 2 + with self.assertRaisesRegexp(ValueError, "rank at least 2"): + with ops.control_dependencies( + [check_ops.assert_rank_at_least(tensor, desired_rank)]): + self.evaluate(array_ops.identity(tensor)) def test_rank_one_tensor_raises_if_rank_too_small_dynamic_rank(self): with self.test_session(): @@ -909,144 +934,165 @@ class AssertRankAtLeastTest(test.TestCase): class AssertNonNegativeTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_raises_when_negative(self): - with self.test_session(): - zoe = constant_op.constant([-1, -2], name="zoe") + zoe = constant_op.constant([-1, -2], name="zoe") + with self.assertRaisesOpError("x >= 0 did not hold"): with ops.control_dependencies([check_ops.assert_non_negative(zoe)]): out = array_ops.identity(zoe) - with self.assertRaisesOpError("zoe"): - out.eval() + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_zero_and_positive(self): - with self.test_session(): - lucas = constant_op.constant([0, 2], name="lucas") - with ops.control_dependencies([check_ops.assert_non_negative(lucas)]): - out = array_ops.identity(lucas) - out.eval() + lucas = constant_op.constant([0, 2], name="lucas") + with ops.control_dependencies([check_ops.assert_non_negative(lucas)]): + out = array_ops.identity(lucas) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_empty_tensor_doesnt_raise(self): # A tensor is non-negative when it satisfies: # For every element x_i in x, x_i >= 0 # and an empty tensor has no elements, so this is trivially satisfied. # This is standard set theory. - with self.test_session(): - empty = constant_op.constant([], name="empty") - with ops.control_dependencies([check_ops.assert_non_negative(empty)]): - out = array_ops.identity(empty) - out.eval() + empty = constant_op.constant([], name="empty") + with ops.control_dependencies([check_ops.assert_non_negative(empty)]): + out = array_ops.identity(empty) + self.evaluate(out) class AssertNonPositiveTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_zero_and_negative(self): - with self.test_session(): - tom = constant_op.constant([0, -2], name="tom") - with ops.control_dependencies([check_ops.assert_non_positive(tom)]): - out = array_ops.identity(tom) - out.eval() + tom = constant_op.constant([0, -2], name="tom") + with ops.control_dependencies([check_ops.assert_non_positive(tom)]): + out = array_ops.identity(tom) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_raises_when_positive(self): - with self.test_session(): - rachel = constant_op.constant([0, 2], name="rachel") + rachel = constant_op.constant([0, 2], name="rachel") + with self.assertRaisesOpError("x <= 0 did not hold"): with ops.control_dependencies([check_ops.assert_non_positive(rachel)]): out = array_ops.identity(rachel) - with self.assertRaisesOpError("rachel"): - out.eval() + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_empty_tensor_doesnt_raise(self): # A tensor is non-positive when it satisfies: # For every element x_i in x, x_i <= 0 # and an empty tensor has no elements, so this is trivially satisfied. # This is standard set theory. - with self.test_session(): - empty = constant_op.constant([], name="empty") - with ops.control_dependencies([check_ops.assert_non_positive(empty)]): - out = array_ops.identity(empty) - out.eval() + empty = constant_op.constant([], name="empty") + with ops.control_dependencies([check_ops.assert_non_positive(empty)]): + out = array_ops.identity(empty) + self.evaluate(out) class AssertIntegerTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_integer(self): - with self.test_session(): - integers = constant_op.constant([1, 2], name="integers") - with ops.control_dependencies([check_ops.assert_integer(integers)]): - out = array_ops.identity(integers) - out.eval() + integers = constant_op.constant([1, 2], name="integers") + with ops.control_dependencies([check_ops.assert_integer(integers)]): + out = array_ops.identity(integers) + self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() def test_raises_when_float(self): - with self.test_session(): - floats = constant_op.constant([1.0, 2.0], name="floats") - with self.assertRaisesRegexp(TypeError, "Expected.*integer"): - check_ops.assert_integer(floats) + floats = constant_op.constant([1.0, 2.0], name="floats") + with self.assertRaisesRegexp(TypeError, "Expected.*integer"): + check_ops.assert_integer(floats) + + +class AssertTypeTest(test.TestCase): + + @test_util.run_in_graph_and_eager_modes() + def test_doesnt_raise_when_correct_type(self): + integers = constant_op.constant([1, 2], dtype=dtypes.int64) + with ops.control_dependencies([ + check_ops.assert_type(integers, dtypes.int64)]): + out = array_ops.identity(integers) + self.evaluate(out) + + @test_util.run_in_graph_and_eager_modes() + def test_raises_when_wrong_type(self): + floats = constant_op.constant([1.0, 2.0], dtype=dtypes.float16) + with self.assertRaisesRegexp(TypeError, "must be of type.*float32"): + check_ops.assert_type(floats, dtypes.float32) class IsStrictlyIncreasingTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_constant_tensor_is_not_strictly_increasing(self): - with self.test_session(): - self.assertFalse(check_ops.is_strictly_increasing([1, 1, 1]).eval()) + self.assertFalse(self.evaluate(check_ops.is_strictly_increasing([1, 1, 1]))) + @test_util.run_in_graph_and_eager_modes() def test_decreasing_tensor_is_not_strictly_increasing(self): - with self.test_session(): - self.assertFalse(check_ops.is_strictly_increasing([1, 0, -1]).eval()) + self.assertFalse(self.evaluate( + check_ops.is_strictly_increasing([1, 0, -1]))) + @test_util.run_in_graph_and_eager_modes() def test_2d_decreasing_tensor_is_not_strictly_increasing(self): - with self.test_session(): - self.assertFalse( - check_ops.is_strictly_increasing([[1, 3], [2, 4]]).eval()) + self.assertFalse( + self.evaluate(check_ops.is_strictly_increasing([[1, 3], [2, 4]]))) + @test_util.run_in_graph_and_eager_modes() def test_increasing_tensor_is_increasing(self): - with self.test_session(): - self.assertTrue(check_ops.is_strictly_increasing([1, 2, 3]).eval()) + self.assertTrue(self.evaluate(check_ops.is_strictly_increasing([1, 2, 3]))) + @test_util.run_in_graph_and_eager_modes() def test_increasing_rank_two_tensor(self): - with self.test_session(): - self.assertTrue( - check_ops.is_strictly_increasing([[-1, 2], [3, 4]]).eval()) + self.assertTrue( + self.evaluate(check_ops.is_strictly_increasing([[-1, 2], [3, 4]]))) + @test_util.run_in_graph_and_eager_modes() def test_tensor_with_one_element_is_strictly_increasing(self): - with self.test_session(): - self.assertTrue(check_ops.is_strictly_increasing([1]).eval()) + self.assertTrue(self.evaluate(check_ops.is_strictly_increasing([1]))) + @test_util.run_in_graph_and_eager_modes() def test_empty_tensor_is_strictly_increasing(self): - with self.test_session(): - self.assertTrue(check_ops.is_strictly_increasing([]).eval()) + self.assertTrue(self.evaluate(check_ops.is_strictly_increasing([]))) class IsNonDecreasingTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_constant_tensor_is_non_decreasing(self): - with self.test_session(): - self.assertTrue(check_ops.is_non_decreasing([1, 1, 1]).eval()) + self.assertTrue(self.evaluate(check_ops.is_non_decreasing([1, 1, 1]))) + @test_util.run_in_graph_and_eager_modes() def test_decreasing_tensor_is_not_non_decreasing(self): - with self.test_session(): - self.assertFalse(check_ops.is_non_decreasing([3, 2, 1]).eval()) + self.assertFalse(self.evaluate(check_ops.is_non_decreasing([3, 2, 1]))) + @test_util.run_in_graph_and_eager_modes() def test_2d_decreasing_tensor_is_not_non_decreasing(self): - with self.test_session(): - self.assertFalse(check_ops.is_non_decreasing([[1, 3], [2, 4]]).eval()) + self.assertFalse(self.evaluate( + check_ops.is_non_decreasing([[1, 3], [2, 4]]))) + @test_util.run_in_graph_and_eager_modes() def test_increasing_rank_one_tensor_is_non_decreasing(self): - with self.test_session(): - self.assertTrue(check_ops.is_non_decreasing([1, 2, 3]).eval()) + self.assertTrue(self.evaluate(check_ops.is_non_decreasing([1, 2, 3]))) + @test_util.run_in_graph_and_eager_modes() def test_increasing_rank_two_tensor(self): - with self.test_session(): - self.assertTrue(check_ops.is_non_decreasing([[-1, 2], [3, 3]]).eval()) + self.assertTrue(self.evaluate( + check_ops.is_non_decreasing([[-1, 2], [3, 3]]))) + @test_util.run_in_graph_and_eager_modes() def test_tensor_with_one_element_is_non_decreasing(self): - with self.test_session(): - self.assertTrue(check_ops.is_non_decreasing([1]).eval()) + self.assertTrue(self.evaluate(check_ops.is_non_decreasing([1]))) + @test_util.run_in_graph_and_eager_modes() def test_empty_tensor_is_non_decreasing(self): - with self.test_session(): - self.assertTrue(check_ops.is_non_decreasing([]).eval()) + self.assertTrue(self.evaluate(check_ops.is_non_decreasing([]))) class FloatDTypeTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_assert_same_float_dtype(self): self.assertIs(dtypes.float32, check_ops.assert_same_float_dtype(None, None)) @@ -1100,6 +1146,7 @@ class FloatDTypeTest(test.TestCase): class AssertScalarTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_assert_scalar(self): check_ops.assert_scalar(constant_op.constant(3)) check_ops.assert_scalar(constant_op.constant("foo")) diff --git a/tensorflow/python/ops/check_ops.py b/tensorflow/python/ops/check_ops.py index 7e509f72c1..1377af3eac 100644 --- a/tensorflow/python/ops/check_ops.py +++ b/tensorflow/python/ops/check_ops.py @@ -104,6 +104,11 @@ def _assert_static(condition, data): message='\n'.join(data_static)) +def _shape_and_dtype_str(tensor): + """Returns a string containing tensor's shape and dtype.""" + return 'shape=%s dtype=%s' % (tensor.shape, tensor.dtype.name) + + def assert_proper_iterable(values): """Static assert that values is a "proper" iterable. @@ -159,10 +164,14 @@ def assert_negative(x, data=None, summarize=None, message=None, name=None): with ops.name_scope(name, 'assert_negative', [x, data]): x = ops.convert_to_tensor(x, name='x') if data is None: + if context.in_eager_mode(): + name = _shape_and_dtype_str(x) + else: + name = x.name data = [ message, 'Condition x < 0 did not hold element-wise:', - 'x (%s) = ' % x.name, x] + 'x (%s) = ' % name, x] zero = ops.convert_to_tensor(0, dtype=x.dtype) return assert_less(x, zero, data=data, summarize=summarize) @@ -195,9 +204,13 @@ def assert_positive(x, data=None, summarize=None, message=None, name=None): with ops.name_scope(name, 'assert_positive', [x, data]): x = ops.convert_to_tensor(x, name='x') if data is None: + if context.in_eager_mode(): + name = _shape_and_dtype_str(x) + else: + name = x.name data = [ message, 'Condition x > 0 did not hold element-wise:', - 'x (%s) = ' % x.name, x] + 'x (%s) = ' % name, x] zero = ops.convert_to_tensor(0, dtype=x.dtype) return assert_less(zero, x, data=data, summarize=summarize) @@ -232,7 +245,7 @@ def assert_non_negative(x, data=None, summarize=None, message=None, name=None): x = ops.convert_to_tensor(x, name='x') if data is None: if context.in_eager_mode(): - name = str(x) + name = _shape_and_dtype_str(x) else: name = x.name data = [ @@ -272,10 +285,14 @@ def assert_non_positive(x, data=None, summarize=None, message=None, name=None): with ops.name_scope(name, 'assert_non_positive', [x, data]): x = ops.convert_to_tensor(x, name='x') if data is None: + if context.in_eager_mode(): + name = _shape_and_dtype_str(x) + else: + name = x.name data = [ message, 'Condition x <= 0 did not hold element-wise:' - 'x (%s) = ' % x.name, x] + 'x (%s) = ' % name, x] zero = ops.convert_to_tensor(0, dtype=x.dtype) return assert_less_equal(x, zero, data=data, summarize=summarize) @@ -408,8 +425,8 @@ def assert_none_equal( x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') if context.in_eager_mode(): - x_name = 'x' - y_name = 'y' + x_name = _shape_and_dtype_str(x) + y_name = _shape_and_dtype_str(y) else: x_name = x.name y_name = y.name @@ -456,8 +473,8 @@ def assert_less(x, y, data=None, summarize=None, message=None, name=None): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') if context.in_eager_mode(): - x_name = 'x' - y_name = 'y' + x_name = _shape_and_dtype_str(x) + y_name = _shape_and_dtype_str(y) else: x_name = x.name y_name = y.name @@ -502,11 +519,18 @@ def assert_less_equal(x, y, data=None, summarize=None, message=None, name=None): with ops.name_scope(name, 'assert_less_equal', [x, y, data]): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') + if context.in_eager_mode(): + x_name = _shape_and_dtype_str(x) + y_name = _shape_and_dtype_str(y) + else: + x_name = x.name + y_name = y.name + if data is None: data = [ message, 'Condition x <= y did not hold element-wise:' - 'x (%s) = ' % x.name, x, 'y (%s) = ' % y.name, y + 'x (%s) = ' % x_name, x, 'y (%s) = ' % y_name, y ] condition = math_ops.reduce_all(math_ops.less_equal(x, y)) return control_flow_ops.Assert(condition, data, summarize=summarize) @@ -542,11 +566,18 @@ def assert_greater(x, y, data=None, summarize=None, message=None, name=None): with ops.name_scope(name, 'assert_greater', [x, y, data]): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') + if context.in_eager_mode(): + x_name = _shape_and_dtype_str(x) + y_name = _shape_and_dtype_str(y) + else: + x_name = x.name + y_name = y.name + if data is None: data = [ message, 'Condition x > y did not hold element-wise:' - 'x (%s) = ' % x.name, x, 'y (%s) = ' % y.name, y + 'x (%s) = ' % x_name, x, 'y (%s) = ' % y_name, y ] condition = math_ops.reduce_all(math_ops.greater(x, y)) return control_flow_ops.Assert(condition, data, summarize=summarize) @@ -584,11 +615,18 @@ def assert_greater_equal(x, y, data=None, summarize=None, message=None, with ops.name_scope(name, 'assert_greater_equal', [x, y, data]): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') + if context.in_eager_mode(): + x_name = _shape_and_dtype_str(x) + y_name = _shape_and_dtype_str(y) + else: + x_name = x.name + y_name = y.name + if data is None: data = [ message, 'Condition x >= y did not hold element-wise:' - 'x (%s) = ' % x.name, x, 'y (%s) = ' % y.name, y + 'x (%s) = ' % x_name, x, 'y (%s) = ' % y_name, y ] condition = math_ops.reduce_all(math_ops.greater_equal(x, y)) return control_flow_ops.Assert(condition, data, summarize=summarize) @@ -676,10 +714,15 @@ def assert_rank(x, rank, data=None, summarize=None, message=None, name=None): static_condition = lambda actual_rank, given_rank: actual_rank == given_rank dynamic_condition = math_ops.equal + if context.in_eager_mode(): + name = '' + else: + name = x.name + if data is None: data = [ message, - 'Tensor %s must have rank' % x.name, rank, 'Received shape: ', + 'Tensor %s must have rank' % name, rank, 'Received shape: ', array_ops.shape(x) ] @@ -691,7 +734,7 @@ def assert_rank(x, rank, data=None, summarize=None, message=None, name=None): if e.args[0] == 'Static rank condition failed': raise ValueError( '%s. Tensor %s must have rank %d. Received rank %d, shape %s' % - (message, x.name, e.args[2], e.args[1], x.get_shape())) + (message, name, e.args[2], e.args[1], x.get_shape())) else: raise @@ -734,10 +777,16 @@ def assert_rank_at_least( static_condition = lambda actual_rank, given_rank: actual_rank >= given_rank dynamic_condition = math_ops.greater_equal + + if context.in_eager_mode(): + name = '' + else: + name = x.name + if data is None: data = [ message, - 'Tensor %s must have rank at least' % x.name, rank, + 'Tensor %s must have rank at least' % name, rank, 'Received shape: ', array_ops.shape(x) ] @@ -749,7 +798,7 @@ def assert_rank_at_least( if e.args[0] == 'Static rank condition failed': raise ValueError( '%s. Tensor %s must have rank at least %d. Received rank %d, ' - 'shape %s' % (message, x.name, e.args[2], e.args[1], x.get_shape())) + 'shape %s' % (message, name, e.args[2], e.args[1], x.get_shape())) else: raise @@ -856,9 +905,14 @@ def assert_rank_in( ranks = tuple([ops.convert_to_tensor(rank, name='rank') for rank in ranks]) message = message or '' + if context.in_eager_mode(): + name = '' + else: + name = x.name + if data is None: data = [ - message, 'Tensor %s must have rank in' % x.name + message, 'Tensor %s must have rank in' % name ] + list(ranks) + [ 'Received shape: ', array_ops.shape(x) ] @@ -871,7 +925,7 @@ def assert_rank_in( if e.args[0] == 'Static rank condition failed': raise ValueError( '%s. Tensor %s must have rank in %s. Received rank %d, ' - 'shape %s' % (message, x.name, e.args[2], e.args[1], x.get_shape())) + 'shape %s' % (message, name, e.args[2], e.args[1], x.get_shape())) else: raise @@ -903,9 +957,13 @@ def assert_integer(x, message=None, name=None): with ops.name_scope(name, 'assert_integer', [x]): x = ops.convert_to_tensor(x, name='x') if not x.dtype.is_integer: + if context.in_eager_mode(): + name = 'tensor' + else: + name = x.name err_msg = ( '%s Expected "x" to be integer type. Found: %s of dtype %s' - % (message, x.name, x.dtype)) + % (message, name, x.dtype)) raise TypeError(err_msg) return control_flow_ops.no_op('statically_determined_was_integer') @@ -1079,6 +1137,10 @@ def assert_scalar(tensor, name=None): tensor = ops.convert_to_tensor(tensor, name=name_scope) shape = tensor.get_shape() if shape.ndims != 0: - raise ValueError('Expected scalar shape for %s, saw shape: %s.' - % (tensor.name, shape)) + if context.in_eager_mode(): + raise ValueError('Expected scalar shape, saw shape: %s.' + % (shape,)) + else: + raise ValueError('Expected scalar shape for %s, saw shape: %s.' + % (tensor.name, shape)) return tensor -- GitLab From 6c7bd707ce26cc89d542bbb326882026a613748c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Nov 2017 12:24:12 -0800 Subject: [PATCH 0709/1801] Add tpu saved model tags. No cpu tag is added because cpu is assumed to be the implicit device. PiperOrigin-RevId: 176544698 --- tensorflow/cc/saved_model/tag_constants.h | 3 +++ .../saved_model/python/saved_model/reader_test.py | 10 +++++++++- tensorflow/python/saved_model/saved_model_test.py | 14 ++++++++++++++ tensorflow/python/saved_model/tag_constants.py | 6 +++++- .../tensorflow.saved_model.tag_constants.pbtxt | 4 ++++ 5 files changed, 35 insertions(+), 2 deletions(-) diff --git a/tensorflow/cc/saved_model/tag_constants.h b/tensorflow/cc/saved_model/tag_constants.h index 2b0b2d5c7f..b71cb263ca 100644 --- a/tensorflow/cc/saved_model/tag_constants.h +++ b/tensorflow/cc/saved_model/tag_constants.h @@ -21,6 +21,9 @@ namespace tensorflow { /// Tag for the `gpu` graph. constexpr char kSavedModelTagGpu[] = "gpu"; +/// Tag for the `tpu` graph. +constexpr char kSavedModelTagTpu[] = "tpu"; + /// Tag for the `serving` graph. constexpr char kSavedModelTagServe[] = "serve"; diff --git a/tensorflow/contrib/saved_model/python/saved_model/reader_test.py b/tensorflow/contrib/saved_model/python/saved_model/reader_test.py index a8331cbc8f..d10ec9cf0c 100644 --- a/tensorflow/contrib/saved_model/python/saved_model/reader_test.py +++ b/tensorflow/contrib/saved_model/python/saved_model/reader_test.py @@ -86,6 +86,13 @@ class ReaderTest(test.TestCase): self._init_and_validate_variable(sess, "v", 44) builder.add_meta_graph([tag_constants.SERVING, tag_constants.GPU]) + # Graph that updates the single variable. SavedModel is invoked: + # - to add the model (weights are not updated). + # - multiple predefined tags for serving on TPU. + with self.test_session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 44) + builder.add_meta_graph([tag_constants.SERVING, tag_constants.TPU]) + # Graph that updates the single variable. SavedModel is invoked: # - to add the model (weights are not updated). # - multiple custom tags. @@ -97,7 +104,8 @@ class ReaderTest(test.TestCase): builder.save() actual_tags = reader.get_saved_model_tag_sets(saved_model_dir) - expected_tags = [["train"], ["serve"], ["serve", "gpu"], ["foo", "bar"]] + expected_tags = [["train"], ["serve"], ["serve", "gpu"], ["serve", "tpu"], + ["foo", "bar"]] self.assertEqual(expected_tags, actual_tags) diff --git a/tensorflow/python/saved_model/saved_model_test.py b/tensorflow/python/saved_model/saved_model_test.py index c6d2c32293..92ca7dec6f 100644 --- a/tensorflow/python/saved_model/saved_model_test.py +++ b/tensorflow/python/saved_model/saved_model_test.py @@ -214,6 +214,13 @@ class SavedModelTest(test.TestCase): self._init_and_validate_variable(sess, "v", 45) builder.add_meta_graph([tag_constants.SERVING, tag_constants.GPU]) + # Graph that updates the single variable. SavedModel invoked to: + # - simply add the model (weights are not updated). + # - multiple tags (from predefined constants for serving on TPU). + with self.test_session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 45) + builder.add_meta_graph([tag_constants.SERVING, tag_constants.TPU]) + # Graph that updates the single variable. SavedModel is invoked: # - to add the model (weights are not updated). # - multiple custom tags. @@ -244,6 +251,13 @@ class SavedModelTest(test.TestCase): self.assertEqual( 42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval()) + # Restore the graph with multiple predefined tags (for serving on TPU) + # whose variables were not saved. + with self.test_session(graph=ops.Graph()) as sess: + loader.load(sess, [tag_constants.SERVING, tag_constants.TPU], export_dir) + self.assertEqual( + 42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval()) + # Restore the graph with multiple tags. Provide duplicate tags to test set # semantics. with self.test_session(graph=ops.Graph()) as sess: diff --git a/tensorflow/python/saved_model/tag_constants.py b/tensorflow/python/saved_model/tag_constants.py index 52868bdf99..e2facafda5 100644 --- a/tensorflow/python/saved_model/tag_constants.py +++ b/tensorflow/python/saved_model/tag_constants.py @@ -31,9 +31,13 @@ TRAINING = "train" # Tag for the `gpu` graph. GPU = "gpu" +# Tag for the `tpu` graph. +TPU = "tpu" + _allowed_symbols = [ "SERVING", "TRAINING", - "GPU" + "GPU", + "TPU" ] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/tools/api/golden/tensorflow.saved_model.tag_constants.pbtxt b/tensorflow/tools/api/golden/tensorflow.saved_model.tag_constants.pbtxt index 35e49ee9f4..6af72498d7 100644 --- a/tensorflow/tools/api/golden/tensorflow.saved_model.tag_constants.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.saved_model.tag_constants.pbtxt @@ -8,6 +8,10 @@ tf_module { name: "SERVING" mtype: "" } + member { + name: "TPU" + mtype: "" + } member { name: "TRAINING" mtype: "" -- GitLab From e8366158a2ed2c971315e1e4b9b4f235ea97f8da Mon Sep 17 00:00:00 2001 From: RJ Ryan Date: Tue, 21 Nov 2017 13:01:38 -0800 Subject: [PATCH 0710/1801] Replace tf.contrib.framework.get_or_create_global_step with tf.train.get_or_create_global_step in tf.contrib.training.create_train_op. PiperOrigin-RevId: 176549279 --- tensorflow/contrib/training/python/training/training.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/training/python/training/training.py b/tensorflow/contrib/training/python/training/training.py index eee2b88812..f72e0a3f83 100644 --- a/tensorflow/contrib/training/python/training/training.py +++ b/tensorflow/contrib/training/python/training/training.py @@ -244,7 +244,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.framework.python.ops import variables from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops @@ -255,6 +254,7 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.summary import summary from tensorflow.python.training import monitored_session from tensorflow.python.training import optimizer as tf_optimizer +from tensorflow.python.training import training_util # TODO(nsilberman): move add_gradients_summaries, clip_gradient_norms and # multiply_gradients into contrib/summaries and contrib/optimizers.py @@ -409,7 +409,7 @@ def create_train_op(total_loss, loss value. """ if global_step is _USE_GLOBAL_STEP: - global_step = variables.get_or_create_global_step() + global_step = training_util.get_or_create_global_step() # Update ops use GraphKeys.UPDATE_OPS collection if update_ops is None. global_update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS)) -- GitLab From 1a9212a7eda7c347e6320991ac219165d0eb9788 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Tue, 21 Nov 2017 13:16:32 -0800 Subject: [PATCH 0711/1801] Delete duplicate mpi.bzl PiperOrigin-RevId: 176551183 --- tensorflow/BUILD | 1 + tensorflow/third_party/mpi/mpi.bzl | 17 ----------------- 2 files changed, 1 insertion(+), 17 deletions(-) delete mode 100644 tensorflow/third_party/mpi/mpi.bzl diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 6a66d1d44b..49828cd4d6 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -628,6 +628,7 @@ filegroup( "//tensorflow/tools/test:all_files", "//tensorflow/user_ops:all_files", "//third_party/hadoop:all_files", + "//third_party/mpi:all_files", "//third_party/sycl:all_files", "//third_party/sycl/sycl:all_files", ], diff --git a/tensorflow/third_party/mpi/mpi.bzl b/tensorflow/third_party/mpi/mpi.bzl deleted file mode 100644 index 38ce91c4d0..0000000000 --- a/tensorflow/third_party/mpi/mpi.bzl +++ /dev/null @@ -1,17 +0,0 @@ -#OpenMPI and Mvapich/mpich require different headers -#based on the configuration options return one or the other - -def mpi_hdr(): - MPI_LIB_IS_OPENMPI=True - hdrs = [] - if MPI_LIB_IS_OPENMPI: - hdrs = ["mpi.h", "mpi_portable_platform.h"] #When using OpenMPI - else: - hdrs = ["mpi.h", "mpio.h", "mpicxx.h"] #When using MVAPICH - return hdrs - -def if_mpi(if_true, if_false = []): - return select({ - "//tensorflow:with_mpi_support": if_true, - "//conditions:default": if_false - }) -- GitLab From a784520596cdf1e62702ff656dcd7cc0137d6d67 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 21 Nov 2017 13:25:30 -0800 Subject: [PATCH 0712/1801] Update the tf_item whenever the underlying metagraph is updated PiperOrigin-RevId: 176552317 --- tensorflow/python/grappler/item.py | 23 ++++++++++++++++------ tensorflow/python/grappler/item_test.py | 26 +++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/grappler/item.py b/tensorflow/python/grappler/item.py index f53fc7f337..cfbe014de5 100644 --- a/tensorflow/python/grappler/item.py +++ b/tensorflow/python/grappler/item.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function from tensorflow.core.grappler.costs import op_performance_data_pb2 +from tensorflow.core.protobuf import meta_graph_pb2 from tensorflow.python import pywrap_tensorflow as tf_item from tensorflow.python.framework import errors @@ -42,21 +43,22 @@ class Item(object): ValueError: the metagraph is incomplete or invalid. """ self._metagraph = metagraph + self._item_graph = meta_graph_pb2.MetaGraphDef() + self._item_graph.CopyFrom(metagraph) + self._ignore_colocation = ignore_colocation + self._ignore_user_placement = ignore_user_placement self._tf_item = None - with errors.raise_exception_on_not_ok_status() as status: - self._tf_item = tf_item.TF_NewItem(metagraph.SerializeToString(), - ignore_colocation, - ignore_user_placement, status) + self._BuildTFItem() def __del__(self): if self._tf_item: tf_item.TF_DeleteItem(self._tf_item) def IdentifyImportantOps(self): - return tf_item.TF_IdentifyImportantOps(self._tf_item) + return tf_item.TF_IdentifyImportantOps(self.tf_item) def GetOpProperties(self): - ret_from_swig = tf_item.TF_GetOpProperties(self._tf_item) + ret_from_swig = tf_item.TF_GetOpProperties(self.tf_item) properties = {} for key, values in ret_from_swig.items(): prop = [] @@ -72,4 +74,13 @@ class Item(object): @property def tf_item(self): + if self._item_graph != self._metagraph: + self._BuildTFItem() + self._item_graph.CopyFrom(self._metagraph) return self._tf_item + + def _BuildTFItem(self): + with errors.raise_exception_on_not_ok_status() as status: + self._tf_item = tf_item.TF_NewItem(self._metagraph.SerializeToString(), + self._ignore_colocation, + self._ignore_user_placement, status) diff --git a/tensorflow/python/grappler/item_test.py b/tensorflow/python/grappler/item_test.py index 0739a7a0e4..69835761bc 100644 --- a/tensorflow/python/grappler/item_test.py +++ b/tensorflow/python/grappler/item_test.py @@ -73,6 +73,32 @@ class ItemTest(test.TestCase): self.assertEqual(dtypes.int32, node_prop[0].dtype) self.assertEqual(tensor_shape.scalar(), node_prop[0].shape) + def testUpdates(self): + with ops.Graph().as_default() as g: + a = constant_op.constant(10) + b = constant_op.constant(20) + c = a + b + train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) + train_op.append(c) + mg = meta_graph.create_meta_graph_def(graph=g) + grappler_item = item.Item(mg) + + initial_tf_item = grappler_item.tf_item + no_change_tf_item = grappler_item.tf_item + self.assertEqual(initial_tf_item, no_change_tf_item) + + # Modify the placement. + for node in grappler_item.metagraph.graph_def.node: + node.device = '/cpu:0' + new_tf_item = grappler_item.tf_item + self.assertNotEqual(initial_tf_item, new_tf_item) + + # Assign the same placement. + for node in grappler_item.metagraph.graph_def.node: + node.device = '/cpu:0' + newest_tf_item = grappler_item.tf_item + self.assertEqual(new_tf_item, newest_tf_item) + if __name__ == '__main__': test.main() -- GitLab From b5198a3c9b093f0d574c21b5496f045e18c74bae Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Tue, 21 Nov 2017 13:28:16 -0800 Subject: [PATCH 0713/1801] Make parameter names in declarations consistent with names in definitions PiperOrigin-RevId: 176552613 --- tensorflow/compiler/xla/service/user_computation.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/xla/service/user_computation.h b/tensorflow/compiler/xla/service/user_computation.h index ac879ce55a..473a8b8f73 100644 --- a/tensorflow/compiler/xla/service/user_computation.h +++ b/tensorflow/compiler/xla/service/user_computation.h @@ -70,7 +70,7 @@ class UserComputation { // Enqueues a pad instruction onto this user computation. StatusOr AddPadInstruction( - const PadRequest& parameter_request); + const PadRequest& pad_request); // Enqueues a tracing instruction onto this user computation. // Returns an error status if the operand cannot be resolved. @@ -105,7 +105,7 @@ class UserComputation { // Enqueues a ternary instruction onto this user computation. // Returns an error status if the operand indices are out of bounds. StatusOr AddTernaryInstruction( - const TernaryOpRequest& request); + const TernaryOpRequest& ternary_request); // Enqueues a variadic instruction onto this user computation. // Returns an error status if the operand indices are out of bounds. @@ -179,7 +179,7 @@ class UserComputation { // Enqueues a concatenate instruction onto this user computation. StatusOr AddConcatenateInstruction( - const ConcatenateRequest& slice_request); + const ConcatenateRequest& concatenate_request); // Enqueues a convert instruction onto this user computation. StatusOr AddConvertInstruction( @@ -188,17 +188,17 @@ class UserComputation { // Enqueues a reduce instruction onto this user computation. StatusOr AddReduceInstruction( const ReduceRequest& reduce_request, - const UserComputation& reduction_computation); + const UserComputation& to_apply_computation); // Enqueues a windowed reduce instruction onto this user computation. StatusOr AddReduceWindowInstruction( const ReduceWindowRequest& reduce_window_request, - const UserComputation& reduction_computation); + const UserComputation& to_apply_computation); // Enqueues a select-and-scatter instruction onto this user // computation. StatusOr AddSelectAndScatterInstruction( - const SelectAndScatterRequest& scatter_to_selected_window_element_request, + const SelectAndScatterRequest& select_and_scatter_request, const UserComputation& select_computation, const UserComputation& scatter_computation); -- GitLab From fe69b11b578b40bcf6127e54ef307d822d13e123 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Tue, 21 Nov 2017 14:34:09 -0800 Subject: [PATCH 0714/1801] Run xla FunctionTest with (and without) C API PiperOrigin-RevId: 176562355 --- tensorflow/compiler/tests/function_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/compiler/tests/function_test.py b/tensorflow/compiler/tests/function_test.py index cbe2888696..11d8a99ffe 100644 --- a/tensorflow/compiler/tests/function_test.py +++ b/tensorflow/compiler/tests/function_test.py @@ -24,10 +24,12 @@ from tensorflow.compiler.tests.xla_test import XLATestCase from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import function +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.platform import googletest +@test_util.with_c_api class FunctionTest(XLATestCase): def testFunction(self): -- GitLab From c3ea6ed46123a719b478c508ecf1c1b5628d3b10 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Nov 2017 14:36:54 -0800 Subject: [PATCH 0715/1801] Add support for saving_listeners to Experiment. PiperOrigin-RevId: 176562782 --- .../contrib/learn/python/learn/experiment.py | 47 +++++++++++++------ .../learn/python/learn/experiment_test.py | 43 ++++++++++++----- 2 files changed, 64 insertions(+), 26 deletions(-) diff --git a/tensorflow/contrib/learn/python/learn/experiment.py b/tensorflow/contrib/learn/python/learn/experiment.py index 307db76afe..fc4bd1f461 100644 --- a/tensorflow/contrib/learn/python/learn/experiment.py +++ b/tensorflow/contrib/learn/python/learn/experiment.py @@ -140,7 +140,8 @@ class Experiment(object): delay_workers_by_global_step=False, export_strategies=None, train_steps_per_iteration=None, - checkpoint_and_export=False): + checkpoint_and_export=False, + saving_listeners=None): """Constructor for `Experiment`. Creates an Experiment instance. None of the functions passed to this @@ -200,6 +201,9 @@ class Experiment(object): `save_checkpoints_steps`. Also, this parameter leads to the creation of a default `CheckpointSaverHook` instead of a `ValidationMonitor`, so the provided `train_monitors` will need to be adjusted accordingly. + saving_listeners: list of `CheckpointSaverListener` objects. Used by + tf.estimator.Estimator for callbacks that run immediately before or + after checkpoint savings. Raises: ValueError: if `estimator` does not implement Estimator interface, @@ -221,6 +225,9 @@ class Experiment(object): raise ValueError( "`estimator` must implement `tf.contrib.learn.Trainable`" "or `tf.estimator.`Estimator`.") + if saving_listeners is not None: + raise ValueError("`saving_listeners` must be `None` with " + "`tf.contrib.learn.Estimator`.") if isinstance(estimator, tpu_estimator.TPUEstimator): logging.warn( @@ -242,6 +249,7 @@ class Experiment(object): self._eval_delay_secs = eval_delay_secs self._continuous_eval_throttle_secs = continuous_eval_throttle_secs self._checkpoint_and_export = checkpoint_and_export + self._saving_listeners = saving_listeners # Using 1 on a non-cached file system requires a lot of overhead to # read the checkpoint state file. This is particular bad on GCS, so # we use a different default. This is a temporary band-aid, to be @@ -362,9 +370,11 @@ class Experiment(object): logging.info("Waiting %d secs before starting training.", remaining) time.sleep(delay_secs) - return self._call_train(input_fn=self._train_input_fn, - max_steps=self._train_steps, - hooks=self._train_monitors + extra_hooks) + return self._call_train( + input_fn=self._train_input_fn, + max_steps=self._train_steps, + hooks=self._train_monitors + extra_hooks, + saving_listeners=self._saving_listeners) def evaluate(self, delay_secs=None, name=None): """Evaluate on the evaluation data. @@ -712,9 +722,11 @@ class Experiment(object): break logging.info("Training model for %s steps", train_steps_per_iteration) - self._call_train(input_fn=self._train_input_fn, - steps=train_steps_per_iteration, - hooks=self._train_monitors) + self._call_train( + input_fn=self._train_input_fn, + steps=train_steps_per_iteration, + hooks=self._train_monitors, + saving_listeners=self._saving_listeners) logging.info("Evaluating model now.") eval_result = self._call_evaluate(input_fn=self._eval_input_fn, @@ -762,9 +774,11 @@ class Experiment(object): Returns: The result of the `evaluate` call to the `Estimator`. """ - self._call_train(input_fn=self._train_input_fn, - steps=1, - hooks=self._train_monitors) + self._call_train( + input_fn=self._train_input_fn, + steps=1, + hooks=self._train_monitors, + saving_listeners=self._saving_listeners) eval_result = self._call_evaluate(input_fn=self._eval_input_fn, steps=1, @@ -792,7 +806,8 @@ class Experiment(object): return server def _call_train(self, _sentinel=None, # pylint: disable=invalid-name, - input_fn=None, steps=None, hooks=None, max_steps=None): + input_fn=None, steps=None, hooks=None, max_steps=None, + saving_listeners=None): if _sentinel is not None: raise ValueError("_call_train should be called with keyword args only") @@ -801,10 +816,12 @@ class Experiment(object): # safe to convert for both cases. hooks = monitors.replace_monitors_with_hooks(hooks, self._estimator) if self._core_estimator_used: - return self._estimator.train(input_fn=input_fn, - steps=steps, - max_steps=max_steps, - hooks=hooks) + return self._estimator.train( + input_fn=input_fn, + steps=steps, + max_steps=max_steps, + hooks=hooks, + saving_listeners=saving_listeners) else: return self._estimator.fit(input_fn=input_fn, steps=steps, diff --git a/tensorflow/contrib/learn/python/learn/experiment_test.py b/tensorflow/contrib/learn/python/learn/experiment_test.py index fe40d27c44..c29c198d09 100644 --- a/tensorflow/contrib/learn/python/learn/experiment_test.py +++ b/tensorflow/contrib/learn/python/learn/experiment_test.py @@ -232,14 +232,19 @@ class ExperimentTest(test.TestCase): def test_train(self): for est in self._estimators_for_tests(): - eval_metrics = 'eval_metrics' if not isinstance( - est, core_estimator.Estimator) else None + if isinstance(est, core_estimator.Estimator): + eval_metrics = None + saving_listeners = 'saving_listeners' + else: + eval_metrics = 'eval_metrics' + saving_listeners = None ex = experiment.Experiment( est, train_input_fn='train_input', train_steps='train_steps', eval_input_fn='eval_input', - eval_metrics=eval_metrics) + eval_metrics=eval_metrics, + saving_listeners=saving_listeners) fit_args = ex.train(delay_secs=0) self.assertEqual(1, est.fit_count) self.assertIn(('max_steps', 'train_steps'), fit_args) @@ -675,8 +680,12 @@ class ExperimentTest(test.TestCase): def test_continuous_train_and_eval(self): for est in self._estimators_for_tests(eval_dict={'global_step': 100}): - eval_metrics = 'eval_metrics' if not isinstance( - est, core_estimator.Estimator) else None + if isinstance(est, core_estimator.Estimator): + eval_metrics = None + saving_listeners = 'saving_listeners' + else: + eval_metrics = 'eval_metrics' + saving_listeners = None noop_hook = _NoopHook() export_strategy = saved_model_export_utils.make_export_strategy( est, @@ -690,7 +699,8 @@ class ExperimentTest(test.TestCase): eval_hooks=[noop_hook], train_steps=100, eval_steps=100, - export_strategies=export_strategy) + export_strategies=export_strategy, + saving_listeners=saving_listeners) ex.continuous_train_and_eval() self.assertEqual(1, est.fit_count) self.assertEqual(1, est.eval_count) @@ -742,9 +752,10 @@ class ExperimentTest(test.TestCase): ex.continuous_train_and_eval(continuous_eval_predicate_fn=predicate_fn) mock_estimator.train.assert_called_once_with( input_fn='train_input', - steps=int(total_steps/10), + steps=int(total_steps / 10), max_steps=test.mock.ANY, - hooks=test.mock.ANY) + hooks=test.mock.ANY, + saving_listeners=test.mock.ANY) def test_continuous_train_and_eval_with_steps_per_iteration_from_user(self): mock_estimator = test.mock.Mock(core_estimator.Estimator) @@ -768,7 +779,8 @@ class ExperimentTest(test.TestCase): input_fn='train_input', steps=1234, max_steps=test.mock.ANY, - hooks=test.mock.ANY) + hooks=test.mock.ANY, + saving_listeners=test.mock.ANY) def test_continuous_train_and_eval_with_default_steps_per_iteration(self): mock_estimator = test.mock.Mock(core_estimator.Estimator) @@ -791,7 +803,8 @@ class ExperimentTest(test.TestCase): input_fn='train_input', steps=1000, max_steps=test.mock.ANY, - hooks=test.mock.ANY) + hooks=test.mock.ANY, + saving_listeners=test.mock.ANY) def test_continuous_train_and_eval_with_invalid_predicate_fn(self): for est in self._estimators_for_tests(): @@ -857,11 +870,19 @@ class ExperimentTest(test.TestCase): est, None if isinstance(est, core_estimator.Estimator) else 'export_input', exports_to_keep=None) + if isinstance(est, core_estimator.Estimator): + eval_metrics = None + saving_listeners = 'saving_listeners' + else: + eval_metrics = 'eval_metrics' + saving_listeners = None ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', - export_strategies=(exp_strategy,)) + export_strategies=(exp_strategy,), + eval_metrics=eval_metrics, + saving_listeners=saving_listeners) ex.test() self.assertEqual(1, est.fit_count) self.assertEqual(1, est.eval_count) -- GitLab From c6117cacf6ca91e442aea9bacb88a9444be60b33 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Nov 2017 14:42:24 -0800 Subject: [PATCH 0716/1801] Adds name_scope for each column in shared_embedding_columns. PiperOrigin-RevId: 176563698 --- .../python/feature_column/feature_column.py | 84 +++++++++++++++---- 1 file changed, 69 insertions(+), 15 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index 55969c4b75..452f84192c 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -236,20 +236,29 @@ def input_layer(features, ordered_columns.append(column) with variable_scope.variable_scope( None, default_name=column._var_scope_name): # pylint: disable=protected-access - tensor = column._get_dense_tensor( # pylint: disable=protected-access - builder, - weight_collections=weight_collections, - trainable=trainable) + if column._var_scope_name == column.name: # pylint: disable=protected-access + tensor = _get_dense_tensor( + column=column, + builder=builder, + weight_collections=weight_collections, + trainable=trainable) + else: + # This is typically the case for shared_embedding_columns. The + # embedding weights variable will be under the common variable_scope, + # but the ops for each column will be under a separate name_scope. + with ops.name_scope(column.name): + tensor = _get_dense_tensor( + column=column, + builder=builder, + weight_collections=weight_collections, + trainable=trainable) + output_tensors.append(tensor) if cols_to_vars is not None: # Retrieve any variables created (some _DenseColumn's don't create # variables, in which case an empty list is returned). cols_to_vars[column] = ops.get_collection( ops.GraphKeys.GLOBAL_VARIABLES, scope=variable_scope.get_variable_scope().name) - num_elements = column._variable_shape.num_elements() # pylint: disable=protected-access - batch_size = array_ops.shape(tensor)[0] - tensor = array_ops.reshape(tensor, shape=(batch_size, num_elements)) - output_tensors.append(tensor) _verify_static_batch_size_equality(output_tensors, ordered_columns) return array_ops.concat(output_tensors, 1) @@ -345,13 +354,26 @@ def linear_model(features, with variable_scope.variable_scope( None, default_name=column._var_scope_name): # pylint: disable=protected-access ordered_columns.append(column) - if isinstance(column, _CategoricalColumn): - weighted_sum = _create_categorical_column_weighted_sum( - column, builder, units, sparse_combiner, weight_collections, - trainable) + if column._var_scope_name == column.name: # pylint: disable=protected-access + weighted_sum = _create_weighted_sum( + column=column, + builder=builder, + units=units, + sparse_combiner=sparse_combiner, + weight_collections=weight_collections, + trainable=trainable) else: - weighted_sum = _create_dense_column_weighted_sum( - column, builder, units, weight_collections, trainable) + # This is typically the case for shared_embedding_columns. The + # embedding weights variable will be under the common variable_scope, + # but the ops for each column will be under a separate name_scope. + with ops.name_scope(column.name): + weighted_sum = _create_weighted_sum( + column=column, + builder=builder, + units=units, + sparse_combiner=sparse_combiner, + weight_collections=weight_collections, + trainable=trainable) weighted_sums.append(weighted_sum) if cols_to_vars is not None: # Retrieve the variables created. @@ -1488,7 +1510,7 @@ class _FeatureColumn(object): @abc.abstractproperty def name(self): - """Returns string. Used for naming.""" + """Returns string. Used for naming and for name_scope.""" pass @property @@ -1586,6 +1608,38 @@ class _DenseColumn(_FeatureColumn): pass +def _get_dense_tensor( + column, + builder, + weight_collections, + trainable): + """Creates a dense Tensor for a _DenseColumn for input_layer.""" + tensor = column._get_dense_tensor( # pylint: disable=protected-access + builder, + weight_collections=weight_collections, + trainable=trainable) + num_elements = column._variable_shape.num_elements() # pylint: disable=protected-access + batch_size = array_ops.shape(tensor)[0] + return array_ops.reshape(tensor, shape=(batch_size, num_elements)) + + +def _create_weighted_sum( + column, + builder, + units, + sparse_combiner, + weight_collections, + trainable): + """Creates a weighted sum for a dense or sparse column for linear_model.""" + if isinstance(column, _CategoricalColumn): + return _create_categorical_column_weighted_sum( + column, builder, units, sparse_combiner, weight_collections, + trainable) + else: + return _create_dense_column_weighted_sum( + column, builder, units, weight_collections, trainable) + + def _create_dense_column_weighted_sum( column, builder, units, weight_collections, trainable): """Create a weighted sum of a dense column for linear_model.""" -- GitLab From f0d1abbf2389aa2a29fe6fd090ba68ab6b8fd76f Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Tue, 21 Nov 2017 14:47:00 -0800 Subject: [PATCH 0717/1801] Pass VirtualCluster to tf_optimizer to enable cross-optimization of models. PiperOrigin-RevId: 176564391 --- .../grappler/costs/op_level_cost_estimator.cc | 3 +- tensorflow/core/grappler/optimizers/BUILD | 1 + .../grappler/optimizers/layout_optimizer.cc | 23 ++++- .../grappler/optimizers/layout_optimizer.h | 4 - .../optimizers/layout_optimizer_test.cc | 36 +++---- tensorflow/python/BUILD | 3 + tensorflow/python/grappler/cluster.i | 87 ++++++++++++++--- tensorflow/python/grappler/cluster.py | 30 +++--- tensorflow/python/grappler/cluster_test.py | 21 +++++ .../python/grappler/layout_optimizer_test.py | 93 +++++++++++-------- tensorflow/python/grappler/tf_optimizer.i | 16 +++- tensorflow/python/grappler/tf_optimizer.py | 7 +- 12 files changed, 226 insertions(+), 98 deletions(-) diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index bd84331b67..f7905d7798 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -324,7 +324,8 @@ OpLevelCostEstimator::DeviceInfo OpLevelCostEstimator::GetDeviceInfo( // Maxwell cores_per_multiprocessor = 128; } else { - // Pascal + // Pascal (compute capability version 6) and Volta (compute capability + // version 7) cores_per_multiprocessor = 64; } gflops = device.num_cores() * device.frequency() * 1e-3 * diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 08344b0ada..e127556054 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -336,6 +336,7 @@ tf_cc_test( "//tensorflow/core:testlib", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/clusters:virtual_cluster", ], ) diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc index f186fdb895..aaa1b7a316 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc @@ -1404,6 +1404,24 @@ int GetNumTranspose(const GraphDef& graph) { LOG(INFO) << "Number of Transpose nodes: " << number; return number; } + +int GetNumGPUs(const Cluster& cluster) { + auto devices = cluster.GetDevices(); + int num_gpus = 0; + for (const auto& device : devices) { + if (device.second.type() == "GPU") { + if (device.second.environment().find("architecture") != + device.second.environment().end()) { + const string arch = device.second.environment().at("architecture"); + // TODO(yaozhang): Enable for Volta GPUs (compute capability version 7). + if (arch < "7") { + num_gpus++; + } + } + } + } + return num_gpus; +} } // namespace Status LayoutOptimizer::Tune(const GrapplerItem& item, @@ -1424,10 +1442,7 @@ Status LayoutOptimizer::Tune(const GrapplerItem& item, Status LayoutOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* output) { - if (num_gpus_ == 0) { - num_gpus_ = GetNumAvailableGPUs(); - } - if (num_gpus_ < 1) { + if (GetNumGPUs(*cluster) < 1) { // LayoutOptimizer is currently only tuned for GPU. *output = item.graph; return Status::OK(); diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.h b/tensorflow/core/grappler/optimizers/layout_optimizer.h index 621c286976..a22fadd9e7 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.h +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.h @@ -29,9 +29,6 @@ class LayoutOptimizer : public GraphOptimizer { string name() const override { return "layout"; }; - // This is for testing only. - void set_num_gpus(int num_gpus) { num_gpus_ = num_gpus; }; - struct TuningConfig { // If true, do not use the NHWC GEMM implementation. When filter size is // one or filter size is equal to input image size, @@ -50,7 +47,6 @@ class LayoutOptimizer : public GraphOptimizer { const GraphDef& optimize_output, double result) override; private: - int num_gpus_ = 0; Status Tune(const GrapplerItem& item, const GraphProperties& graph_properties, const string& default_device, const TuningConfig& config, GraphDef* output); diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc index b760cf2ff2..156e6710a6 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc @@ -17,10 +17,12 @@ limitations under the License. #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/grappler/clusters/virtual_cluster.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/protobuf/device_properties.pb.h" namespace tensorflow { namespace grappler { @@ -28,6 +30,13 @@ namespace { class LayoutOptimizerTest : public ::testing::Test { protected: + void SetUp() override { + DeviceProperties device_properties; + device_properties.set_type("GPU"); + device_properties.mutable_environment()->insert({"architecture", "6"}); + virtual_cluster_.reset(new VirtualCluster({{"/GPU:0", device_properties}})); + } + Output SimpleConv2D(tensorflow::Scope* s, int input_size, int filter_size, const string& padding) { int batch_size = 128; @@ -99,6 +108,8 @@ class LayoutOptimizerTest : public ::testing::Test { CHECK(tensor.FromProto(node.attr().at({"value"}).tensor())); return tensor; } + + std::unique_ptr virtual_cluster_; }; TEST_F(LayoutOptimizerTest, Conv2DBackpropInput) { @@ -108,9 +119,9 @@ TEST_F(LayoutOptimizerTest, Conv2DBackpropInput) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); LayoutOptimizer optimizer; - optimizer.set_num_gpus(1); GraphDef output; - Status status = optimizer.Optimize(nullptr, item, &output); + + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); NodeMap node_map(&output); string input_name = AddPrefixToNodeName("Conv2DBackpropInput-InputSizes", "LayoutOptimizer", "-"); @@ -132,9 +143,8 @@ TEST_F(LayoutOptimizerTest, FilterSizeIsOne) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); LayoutOptimizer optimizer; - optimizer.set_num_gpus(1); GraphDef output; - Status status = optimizer.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); NodeMap node_map(&output); EXPECT_FALSE( node_map.GetNode("LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Input")); @@ -147,9 +157,8 @@ TEST_F(LayoutOptimizerTest, FilterSizeNotOne) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); LayoutOptimizer optimizer; - optimizer.set_num_gpus(1); GraphDef output; - Status status = optimizer.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); NodeMap node_map(&output); EXPECT_FALSE( node_map.GetNode("LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Input")); @@ -162,9 +171,8 @@ TEST_F(LayoutOptimizerTest, EqualSizeWithValidPadding) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); LayoutOptimizer optimizer; - optimizer.set_num_gpus(1); GraphDef output; - Status status = optimizer.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); NodeMap node_map(&output); EXPECT_FALSE( node_map.GetNode("LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Input")); @@ -177,9 +185,8 @@ TEST_F(LayoutOptimizerTest, EqualSizeWithSamePadding) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); LayoutOptimizer optimizer; - optimizer.set_num_gpus(1); GraphDef output; - Status status = optimizer.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); NodeMap node_map(&output); EXPECT_TRUE( node_map.GetNode("LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Input-0")); @@ -192,9 +199,8 @@ TEST_F(LayoutOptimizerTest, NotEqualSizeWithValidPadding) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); LayoutOptimizer optimizer; - optimizer.set_num_gpus(1); GraphDef output; - Status status = optimizer.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); NodeMap node_map(&output); EXPECT_TRUE( node_map.GetNode("LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Input-0")); @@ -209,9 +215,8 @@ TEST_F(LayoutOptimizerTest, Pad) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); LayoutOptimizer optimizer; - optimizer.set_num_gpus(1); GraphDef output; - Status status = optimizer.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); NodeMap node_map(&output); auto pad = node_map.GetNode("p"); @@ -246,9 +251,8 @@ TEST_F(LayoutOptimizerTest, Connectivity) { auto node_i2 = node_map_original.GetNode("i2"); node_i2->Swap(node_i1); LayoutOptimizer optimizer; - optimizer.set_num_gpus(1); GraphDef output; - Status status = optimizer.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); NodeMap node_map_output(&output); auto node_i2_output = node_map_output.GetNode("i2"); // Layout optimizer should process i2, as it detects i2 is connected with the diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 590dbcd462..5ae4aace16 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3042,7 +3042,9 @@ tf_py_wrap_cc( "//tensorflow/core/distributed_runtime/rpc:grpc_session", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:grappler_item_builder", + "//tensorflow/core/grappler/clusters:cluster", "//tensorflow/core/grappler/clusters:single_machine", + "//tensorflow/core/grappler/clusters:virtual_cluster", "//tensorflow/core/grappler/costs:graph_memory", "//tensorflow/core/grappler/optimizers:meta_optimizer", "//tensorflow/core:lib", @@ -4408,6 +4410,7 @@ cuda_py_test( ":nn", ":ops", ":random_ops", + ":tf_cluster", ":tf_optimizer", ":training", "//third_party/py/numpy", diff --git a/tensorflow/python/grappler/cluster.i b/tensorflow/python/grappler/cluster.i index 1e06074188..1838c40e46 100644 --- a/tensorflow/python/grappler/cluster.i +++ b/tensorflow/python/grappler/cluster.i @@ -15,6 +15,37 @@ limitations under the License. %include "tensorflow/python/platform/base.i" +%{ +#include "tensorflow/core/protobuf/device_properties.pb.h" + +template <> +bool _PyObjAs(PyObject *input, tensorflow::NamedDevice *out) { + char* c_string; + Py_ssize_t py_size; + if (PyBytes_AsStringAndSize(input, &c_string, &py_size) == -1) { + // Python has raised an error (likely TypeError or UnicodeEncodeError). + return false; + } + + tensorflow::NamedDevice named_device; + if (!named_device.ParseFromString(string(c_string, py_size))) { + PyErr_SetString( + PyExc_TypeError, + "The NamedDevice could not be parsed as a valid protocol buffer"); + return false; + } + if (out) *out = named_device; + return true; +} +%} + +%typemap(in) const std::vector& (std::vector temp) { + if (!tf_vector_input_helper($input, &temp, &_PyObjAs)) { + SWIG_fail; + } + $1 = &temp; +} + %typemap(in) const tensorflow::RunMetadata& (tensorflow::RunMetadata temp) { char* c_string; Py_ssize_t py_size; @@ -26,7 +57,7 @@ limitations under the License. if (!temp.ParseFromString(string(c_string, py_size))) { PyErr_SetString( PyExc_TypeError, - "The MetaGraphDef could not be parsed as a valid protocol buffer"); + "The RunMetadata could not be parsed as a valid protocol buffer"); SWIG_fail; } $1 = &temp; @@ -44,6 +75,7 @@ limitations under the License. #include #include "tensorflow/core/grappler/devices.h" #include "tensorflow/core/grappler/clusters/single_machine.h" +#include "tensorflow/core/grappler/clusters/virtual_cluster.h" #include "tensorflow/core/grappler/costs/graph_memory.h" #include "tensorflow/core/grappler/costs/op_performance_data.pb.h" #include "tensorflow/core/grappler/costs/measuring_cost_estimator.h" @@ -51,12 +83,14 @@ limitations under the License. #include "tensorflow/core/protobuf/device_properties.pb.h" static tensorflow::grappler::Cluster* TF_NewCluster( - bool allow_soft_placement, bool disable_detailed_stats, TF_Status* out_status) { + bool allow_soft_placement, + bool disable_detailed_stats, TF_Status* out_status) { int num_cpu_cores = tensorflow::grappler::GetNumAvailableLogicalCPUCores(); int num_gpus = tensorflow::grappler::GetNumAvailableGPUs();; int timeout_s = 60 * 10; - tensorflow::grappler::Cluster* cluster = new tensorflow::grappler::SingleMachine( - timeout_s, num_cpu_cores, num_gpus); + tensorflow::grappler::Cluster* cluster = + new tensorflow::grappler::SingleMachine( + timeout_s, num_cpu_cores, num_gpus); cluster->DisableDetailedStats(disable_detailed_stats); cluster->AllowSoftPlacement(allow_soft_placement); tensorflow::Status status = cluster->Provision(); @@ -64,15 +98,30 @@ static tensorflow::grappler::Cluster* TF_NewCluster( return cluster; } +static tensorflow::grappler::Cluster* TF_NewVirtualCluster( + const std::vector& named_devices, + TF_Status* out_status) { + std::unordered_map devices; + for (const auto& named_device : named_devices) { + devices[named_device.name()]= named_device.properties(); + } + tensorflow::grappler::Cluster* cluster = + new tensorflow::grappler::VirtualCluster(devices); + tensorflow::Status status = cluster->Provision(); + tensorflow::Set_TF_Status_from_Status(out_status, status); + return cluster; +} + static void TF_DeleteCluster(tensorflow::grappler::Cluster* cluster) { cluster->Shutdown(); delete cluster; } -tensorflow::Status _GetOpPerformanceDataAndRunTime(const tensorflow::grappler::GrapplerItem& item, - tensorflow::grappler::CostEstimator* cost_measure, - tensorflow::OpPerformanceList* op_performance_data, - tensorflow::grappler::Costs* costs) { +tensorflow::Status _GetOpPerformanceDataAndRunTime( + const tensorflow::grappler::GrapplerItem& item, + tensorflow::grappler::CostEstimator* cost_measure, + tensorflow::OpPerformanceList* op_performance_data, + tensorflow::grappler::Costs* costs) { tensorflow::Status status = cost_measure->Initialize(item); if (!status.ok()) return status; @@ -105,7 +154,8 @@ static PyObject* TF_ListDevices(tensorflow::grappler::Cluster* cluster) { } static PyObject* TF_MeasureCosts( - const tensorflow::grappler::GrapplerItem* item, tensorflow::grappler::Cluster* cluster, + const tensorflow::grappler::GrapplerItem* item, + tensorflow::grappler::Cluster* cluster, bool generate_timeline, TF_Status* out_status) { tensorflow::OpPerformanceList op_performance_data; tensorflow::StepStats step_stats; @@ -113,15 +163,16 @@ static PyObject* TF_MeasureCosts( tensorflow::grappler::MeasuringCostEstimator cost_measure(cluster, 10, 0); tensorflow::grappler::Costs costs; - tensorflow::Status status = _GetOpPerformanceDataAndRunTime(*item, &cost_measure, - &op_performance_data, &costs); + tensorflow::Status status = _GetOpPerformanceDataAndRunTime( + *item, &cost_measure, &op_performance_data, &costs); double run_time = FLT_MAX; if (status.ok()) { run_time = static_cast(costs.execution_time.count()) / 1e9; } if (generate_timeline) { tensorflow::RunMetadata metadata; - tensorflow::Status s = cluster->Run(item->graph, item->feed, item->fetch, &metadata); + tensorflow::Status s = cluster->Run( + item->graph, item->feed, item->fetch, &metadata); if (s.ok()) { step_stats = metadata.step_stats(); } else { @@ -133,9 +184,11 @@ static PyObject* TF_MeasureCosts( if (!status.ok()) { Py_RETURN_NONE; } - PyObject* op_perf_objs = PyList_New(op_performance_data.op_performance_size()); + PyObject* op_perf_objs = PyList_New( + op_performance_data.op_performance_size()); for (int i = 0; i < op_performance_data.op_performance_size(); i++) { - string op_perf_str = op_performance_data.op_performance(i).SerializeAsString(); + string op_perf_str = + op_performance_data.op_performance(i).SerializeAsString(); PyObject* op_perf_obj = PyBytes_FromStringAndSize(op_perf_str.data(), op_perf_str.size()); PyList_SetItem(op_perf_objs, i, op_perf_obj); @@ -165,7 +218,8 @@ static PyObject* TF_MeasureCosts( static PyObject* TF_DeterminePeakMemoryUsage( - const tensorflow::grappler::GrapplerItem* item, tensorflow::grappler::Cluster* cluster, + const tensorflow::grappler::GrapplerItem* item, + tensorflow::grappler::Cluster* cluster, TF_Status* out_status) { if (!item || !cluster) { tensorflow::Status status(tensorflow::error::Code::INTERNAL, @@ -216,6 +270,9 @@ static PyObject* TF_DeterminePeakMemoryUsage( static tensorflow::grappler::Cluster* TF_NewCluster( bool allow_soft_placement, bool disable_detailed_stats, TF_Status* out_status); +static tensorflow::grappler::Cluster* TF_NewVirtualCluster( + const std::vector& named_devices, + TF_Status* out_status); static void TF_DeleteCluster(tensorflow::grappler::Cluster* cluster); static PyObject* TF_ListDevices(tensorflow::grappler::Cluster* cluster); static PyObject* TF_MeasureCosts( diff --git a/tensorflow/python/grappler/cluster.py b/tensorflow/python/grappler/cluster.py index 58c7bbbac1..9864e86811 100644 --- a/tensorflow/python/grappler/cluster.py +++ b/tensorflow/python/grappler/cluster.py @@ -31,21 +31,29 @@ class Cluster(object): def __init__(self, allow_soft_placement=True, disable_detailed_stats=True, - disable_timeline=True): + disable_timeline=True, + devices=None): """Creates a Cluster. Args: - allow_soft_placement: if True, TF will automatically fix illegal + allow_soft_placement: If True, TF will automatically fix illegal placements instead of erroring out if the placement isn't legal. - disable_detailed_stats: if True, detailed statistics will not be + disable_detailed_stats: If True, detailed statistics will not be available. - disable_timeline: if True, the timeline information will not be - reported. + disable_timeline: If True, the timeline information will not be reported. + devices: A list of devices of type device_properties_pb2.NamedDevice. + If None, a device list will be created based on the spec of + the local machine. """ self._tf_cluster = None with errors.raise_exception_on_not_ok_status() as status: - self._tf_cluster = tf_cluster.TF_NewCluster( - allow_soft_placement, disable_detailed_stats, status) + if devices is None: + self._tf_cluster = tf_cluster.TF_NewCluster( + allow_soft_placement, disable_detailed_stats, status) + else: + devices_serialized = [device.SerializeToString() for device in devices] + self._tf_cluster = tf_cluster.TF_NewVirtualCluster( + devices_serialized, status) self._generate_timeline = not disable_timeline def __del__(self): @@ -71,8 +79,8 @@ class Cluster(object): """Returns the cost of running the specified item. Args: - item: the item for which to measure the costs. - Returns: the triplet op_perfs, runtime, step_stats. + item: The item for which to measure the costs. + Returns: The triplet op_perfs, runtime, step_stats. """ with errors.raise_exception_on_not_ok_status() as status: ret_from_swig = tf_cluster.TF_MeasureCosts( @@ -93,8 +101,8 @@ class Cluster(object): """Returns a snapshot of the peak memory usage. Args: - item: the item for which to measure the costs. - Returns: a hashtable indexed by device name. + item: The item for which to measure the costs. + Returns: A hashtable indexed by device name. """ with errors.raise_exception_on_not_ok_status() as status: ret_from_swig = tf_cluster.TF_DeterminePeakMemoryUsage( diff --git a/tensorflow/python/grappler/cluster_test.py b/tensorflow/python/grappler/cluster_test.py index de4ded571f..a71a860a59 100644 --- a/tensorflow/python/grappler/cluster_test.py +++ b/tensorflow/python/grappler/cluster_test.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.core.protobuf import device_properties_pb2 from tensorflow.python.framework import meta_graph from tensorflow.python.framework import ops from tensorflow.python.grappler import cluster @@ -82,6 +83,26 @@ class ClusterTest(test.TestCase): live_tensors = snapshot[1] self.assertEqual(15, len(live_tensors)) + def testVirtualCluster(self): + with ops.Graph().as_default() as g: + a = random_ops.random_uniform(shape=()) + b = random_ops.random_uniform(shape=()) + c = a + b + train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) + train_op.append(c) + mg = meta_graph.create_meta_graph_def(graph=g) + grappler_item = item.Item(mg) + device_properties = device_properties_pb2.DeviceProperties( + type='GPU', environment={ + 'architecture': '7' + }) + named_device = device_properties_pb2.NamedDevice( + properties=device_properties, name='/GPU:0') + grappler_cluster = cluster.Cluster(devices=[named_device]) + op_perfs, run_time, _ = grappler_cluster.MeasureCosts(grappler_item) + self.assertGreater(run_time, 0) + self.assertEqual(len(op_perfs), 15) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py index 99a4d23b6a..350c8434ce 100644 --- a/tensorflow/python/grappler/layout_optimizer_test.py +++ b/tensorflow/python/grappler/layout_optimizer_test.py @@ -21,6 +21,7 @@ from __future__ import print_function import numpy as np from tensorflow.core.protobuf import config_pb2 +from tensorflow.core.protobuf import device_properties_pb2 from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.core.protobuf import saver_pb2 from tensorflow.python.client import session @@ -28,6 +29,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed +from tensorflow.python.grappler import cluster as gcluster from tensorflow.python.grappler import tf_optimizer from tensorflow.python.layers import convolutional as conv_layers from tensorflow.python.ops import array_ops @@ -41,53 +43,53 @@ from tensorflow.python.training import gradient_descent from tensorflow.python.training import saver as saver_lib -def weight(shape): - """weights generates a weight of a given shape.""" +def _weight(shape): + """Generates a weight of a given shape.""" return random_ops.truncated_normal(shape, seed=0, stddev=0.1) -def bias(shape): - """bias generates a bias of a given shape.""" +def _bias(shape): + """Generates a bias of a given shape.""" return constant_op.constant(0.1, shape=shape) -def conv2d(x, w): - """conv2d returns a 2d convolution layer with full stride.""" +def _conv2d(x, w): + """Returns a 2d convolution layer with full stride.""" return nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME') -def max_pool_2x2(x): - """max_pool_2x2 downsamples a feature map by 2X.""" +def _max_pool_2x2(x): + """Downsamples a feature map by 2X.""" return nn.max_pool( x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # Taken from tensorflow/examples/tutorials/mnist/mnist_deep.py -def two_layer_model(x): +def _two_layer_model(x): x_image = array_ops.reshape(x, [-1, 28, 28, 1]) - w_conv1 = weight([5, 5, 1, 32]) - b_conv1 = bias([32]) - h_conv1 = nn.relu(conv2d(x_image, w_conv1) + b_conv1) - h_pool1 = max_pool_2x2(h_conv1) - w_conv2 = weight([5, 5, 32, 64]) - b_conv2 = bias([64]) - h_conv2 = nn.relu(conv2d(h_pool1, w_conv2) + b_conv2) - h_pool2 = max_pool_2x2(h_conv2) + w_conv1 = _weight([5, 5, 1, 32]) + b_conv1 = _bias([32]) + h_conv1 = nn.relu(_conv2d(x_image, w_conv1) + b_conv1) + h_pool1 = _max_pool_2x2(h_conv1) + w_conv2 = _weight([5, 5, 32, 64]) + b_conv2 = _bias([64]) + h_conv2 = nn.relu(_conv2d(h_pool1, w_conv2) + b_conv2) + h_pool2 = _max_pool_2x2(h_conv2) return h_pool2 -def loop(): +def _loop(): random_seed.set_random_seed(0) x1 = random_ops.truncated_normal([1, 784], seed=0) x2 = random_ops.truncated_normal([1, 784], seed=0) x3 = random_ops.truncated_normal([1, 784], seed=0) x4 = random_ops.truncated_normal([1, 784], seed=0) elems = (x1, x2, x3, x4) - outputs = functional_ops.map_fn(two_layer_model, elems, dtype=dtypes.float32) + outputs = functional_ops.map_fn(_two_layer_model, elems, dtype=dtypes.float32) return outputs -def get_config(layout_optimizer=True): +def _get_config(layout_optimizer=True): if layout_optimizer: rewrite_options = rewriter_config_pb2.RewriterConfig( layout_optimizer=rewriter_config_pb2.RewriterConfig.ON) @@ -100,6 +102,20 @@ def get_config(layout_optimizer=True): return config +def _simple_metagraph(): + random_seed.set_random_seed(0) + x = variables.Variable(random_ops.truncated_normal([1, 200, 200, 3], seed=0)) + y = conv_layers.conv2d(x, 32, [3, 3]) + z = conv_layers.conv2d(y, 32, [3, 3]) + optimizer = gradient_descent.GradientDescentOptimizer(1e-4) + loss = math_ops.reduce_mean(z) + train_op = optimizer.minimize(loss) + graph = ops.get_default_graph() + graph.add_to_collection('train_op', train_op) + meta_graph = saver_lib.export_meta_graph(graph_def=graph.as_graph_def()) + return meta_graph + + class LayoutOptimizerTest(test.TestCase): """Tests the Grappler layout optimizer.""" @@ -107,7 +123,7 @@ class LayoutOptimizerTest(test.TestCase): ops.reset_default_graph() graph = ops.get_default_graph() with session.Session( - config=get_config(layout_optimizer), graph=graph) as sess: + config=_get_config(layout_optimizer), graph=graph) as sess: batch = 2 height = 6 width = 7 @@ -142,12 +158,12 @@ class LayoutOptimizerTest(test.TestCase): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) - output = two_layer_model(x) + output = _two_layer_model(x) with session.Session() as sess: output_val_ref = sess.run(output) - with session.Session(config=get_config()) as sess: + with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run(output, run_metadata=metadata) @@ -171,36 +187,28 @@ class LayoutOptimizerTest(test.TestCase): def testLoop(self): if test.is_gpu_available(cuda_only=True): - output = loop() + output = _loop() with session.Session() as sess: output_val_ref = sess.run(output) - with session.Session(config=get_config()) as sess: + with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run(output, run_metadata=metadata) self.assertAllClose(output_val_ref, output_val, atol=1e-3) def testGradient(self): - if not test.is_gpu_available(cuda_only=True): - self.skipTest('GPU required') - - random_seed.set_random_seed(0) - x = variables.Variable( - random_ops.truncated_normal([1, 200, 200, 3], seed=0)) - y = conv_layers.conv2d(x, 32, [3, 3]) - z = conv_layers.conv2d(y, 32, [3, 3]) - optimizer = gradient_descent.GradientDescentOptimizer(1e-4) - loss = math_ops.reduce_mean(z) - train_op = optimizer.minimize(loss) - graph = ops.get_default_graph() - graph.add_to_collection('train_op', train_op) - meta_graph = saver_lib.export_meta_graph(graph_def=graph.as_graph_def()) - + meta_graph = _simple_metagraph() rewrite_options = rewriter_config_pb2.RewriterConfig( layout_optimizer=rewriter_config_pb2.RewriterConfig.ON) - optimized_graph = tf_optimizer.OptimizeGraph(rewrite_options, meta_graph) + named_device = device_properties_pb2.NamedDevice() + named_device.name = '/GPU:0' + named_device.properties.type = 'GPU' + named_device.properties.environment['architecture'] = '4' + cluster = gcluster.Cluster(devices=[named_device]) + optimized_graph = tf_optimizer.OptimizeGraph( + rewrite_options, meta_graph, cluster=cluster) found = 0 for node in optimized_graph.node: @@ -210,6 +218,9 @@ class LayoutOptimizerTest(test.TestCase): self.assertEqual(found, 5) def testCheckpointCompatibility(self): + if not test.is_gpu_available(cuda_only=True): + self.skipTest('GPU required') + checkpoint_path = self.get_temp_dir() self._train(checkpoint_path) vars_expected = self._train(checkpoint_path, restore=True) diff --git a/tensorflow/python/grappler/tf_optimizer.i b/tensorflow/python/grappler/tf_optimizer.i index 719ddaae21..3965c65bb9 100644 --- a/tensorflow/python/grappler/tf_optimizer.i +++ b/tensorflow/python/grappler/tf_optimizer.i @@ -62,6 +62,7 @@ limitations under the License. #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/grappler_item_builder.h" + #include "tensorflow/core/grappler/clusters/cluster.h" #include "tensorflow/core/grappler/clusters/utils.h" #include "tensorflow/core/grappler/clusters/virtual_cluster.h" #include "tensorflow/core/grappler/optimizers/meta_optimizer.h" @@ -91,6 +92,7 @@ void DetectDevices(std::unordered_map* dev } PyObject* TF_OptimizeGraph( + tensorflow::grappler::Cluster* cluster, const tensorflow::RewriterConfig& rewriter_config, const tensorflow::MetaGraphDef& metagraph, bool verbose, const string& graph_id, TF_Status* out_status) { @@ -99,13 +101,18 @@ PyObject* TF_OptimizeGraph( item_config.apply_optimizations = false; std::unique_ptr grappler_item = tensorflow::grappler::GrapplerItemFromMetaGraphDef(graph_id, metagraph, item_config); - std::unordered_map device_map; - DetectDevices(&device_map); + + std::unique_ptr virtual_cluster; + if (cluster == nullptr) { + std::unordered_map device_map; + DetectDevices(&device_map); + virtual_cluster.reset(new tensorflow::grappler::VirtualCluster(device_map)); + cluster = virtual_cluster.get(); + } tensorflow::DeviceBase* cpu_device = nullptr; - tensorflow::grappler::VirtualCluster cluster(device_map); tensorflow::GraphDef out_graph; tensorflow::grappler::MetaOptimizer optimizer(cpu_device, rewriter_config); - tensorflow::Status status = optimizer.Optimize(&cluster, *grappler_item, &out_graph); + tensorflow::Status status = optimizer.Optimize(cluster, *grappler_item, &out_graph); if (verbose) { optimizer.PrintResult(); } @@ -120,6 +127,7 @@ PyObject* TF_OptimizeGraph( // Wrap this function PyObject* TF_OptimizeGraph( + tensorflow::grappler::Cluster* cluster, const tensorflow::RewriterConfig& rewriter_config, const tensorflow::MetaGraphDef& metagraph, bool verbose, const string& graph_id, TF_Status* out_status); diff --git a/tensorflow/python/grappler/tf_optimizer.py b/tensorflow/python/grappler/tf_optimizer.py index 1c608ce319..d430dd9e2f 100644 --- a/tensorflow/python/grappler/tf_optimizer.py +++ b/tensorflow/python/grappler/tf_optimizer.py @@ -26,10 +26,13 @@ from tensorflow.python.framework import errors def OptimizeGraph(rewriter_config, metagraph, verbose=True, - graph_id=b'graph_to_optimize'): + graph_id=b'graph_to_optimize', + cluster=None): """Optimize the provided metagraph.""" with errors.raise_exception_on_not_ok_status() as status: - ret_from_swig = tf_opt.TF_OptimizeGraph(rewriter_config.SerializeToString(), + ret_from_swig = tf_opt.TF_OptimizeGraph(None if cluster is None else + cluster.tf_cluster, + rewriter_config.SerializeToString(), metagraph.SerializeToString(), verbose, graph_id, status) if ret_from_swig is None: -- GitLab From db8447528c1f7d6055d9a0145aa35bbea7bfd810 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Nov 2017 14:47:49 -0800 Subject: [PATCH 0718/1801] Removed forward declarations of tensorflow::StringPiece so that it may more easily be replaced with absl::string_view. PiperOrigin-RevId: 176564520 --- tensorflow/core/lib/io/path.h | 20 ++++++++++---------- tensorflow/core/lib/io/proto_encode_helper.h | 2 +- tensorflow/core/lib/strings/ordered_code.h | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tensorflow/core/lib/io/path.h b/tensorflow/core/lib/io/path.h index 955098f5b5..8d02baa5bb 100644 --- a/tensorflow/core/lib/io/path.h +++ b/tensorflow/core/lib/io/path.h @@ -20,10 +20,9 @@ limitations under the License. #include "tensorflow/core/lib/core/stringpiece.h" namespace tensorflow { -class StringPiece; namespace io { namespace internal { -string JoinPathImpl(std::initializer_list paths); +string JoinPathImpl(std::initializer_list paths); } // Utility routines for processing filenames @@ -50,20 +49,20 @@ string JoinPath(const T&... args) { #endif /* SWIG */ // Return true if path is absolute. -bool IsAbsolutePath(StringPiece path); +bool IsAbsolutePath(tensorflow::StringPiece path); // Returns the part of the path before the final "/". If there is a single // leading "/" in the path, the result will be the leading "/". If there is // no "/" in the path, the result is the empty prefix of the input. -StringPiece Dirname(StringPiece path); +tensorflow::StringPiece Dirname(tensorflow::StringPiece path); // Returns the part of the path after the final "/". If there is no // "/" in the path, the result is the same as the input. -StringPiece Basename(StringPiece path); +tensorflow::StringPiece Basename(tensorflow::StringPiece path); // Returns the part of the basename of path after the final ".". If // there is no "." in the basename, the result is empty. -StringPiece Extension(StringPiece path); +tensorflow::StringPiece Extension(tensorflow::StringPiece path); // Collapse duplicate "/"s, resolve ".." and "." path elements, remove // trailing "/". @@ -72,7 +71,7 @@ StringPiece Extension(StringPiece path); // invoke any system calls (getcwd(2)) in order to resolve relative // paths with respect to the actual working directory. That is, this is purely // string manipulation, completely independent of process state. -string CleanPath(StringPiece path); +string CleanPath(tensorflow::StringPiece path); // Populates the scheme, host, and path from a URI. scheme, host, and path are // guaranteed by this function to point into the contents of uri, even if @@ -82,12 +81,13 @@ string CleanPath(StringPiece path); // - If the URI is invalid, scheme and host are set to empty strings and the // passed string is assumed to be a path // - If the URI omits the path (e.g. file://host), then the path is left empty. -void ParseURI(StringPiece uri, StringPiece* scheme, StringPiece* host, - StringPiece* path); +void ParseURI(tensorflow::StringPiece uri, tensorflow::StringPiece* scheme, + tensorflow::StringPiece* host, tensorflow::StringPiece* path); // Creates a URI from a scheme, host, and path. If the scheme is empty, we just // return the path. -string CreateURI(StringPiece scheme, StringPiece host, StringPiece path); +string CreateURI(tensorflow::StringPiece scheme, tensorflow::StringPiece host, + tensorflow::StringPiece path); } // namespace io } // namespace tensorflow diff --git a/tensorflow/core/lib/io/proto_encode_helper.h b/tensorflow/core/lib/io/proto_encode_helper.h index 5d30dda901..f70e1cbaab 100644 --- a/tensorflow/core/lib/io/proto_encode_helper.h +++ b/tensorflow/core/lib/io/proto_encode_helper.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_LIB_IO_PROTO_ENCODE_HELPER_H_ #include "tensorflow/core/lib/core/coding.h" +#include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/platform/protobuf.h" // A helper class for appending various kinds of values in protocol @@ -24,7 +25,6 @@ limitations under the License. // a buffer and a maximum size guarantee for the number of bytes they // will add to this buffer. namespace tensorflow { -class StringPiece; namespace io { class ProtoEncodeHelper { diff --git a/tensorflow/core/lib/strings/ordered_code.h b/tensorflow/core/lib/strings/ordered_code.h index ce823c3f87..91870cfec6 100644 --- a/tensorflow/core/lib/strings/ordered_code.h +++ b/tensorflow/core/lib/strings/ordered_code.h @@ -39,11 +39,11 @@ limitations under the License. #define TENSORFLOW_LIB_STRINGS_ORDERED_CODE_H__ #include +#include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { -class StringPiece; namespace strings { -- GitLab From 54dec6e7f5a790460d54ae68568fa5546942b1fe Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Mon, 20 Nov 2017 19:38:01 -0800 Subject: [PATCH 0719/1801] Have tf-nightly depend on tb-nightly TensorBoard now has an automated nightly release process! --- tensorflow/tools/pip_package/setup.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index a493c6f2aa..c18f20910a 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -37,7 +37,7 @@ REQUIRED_PACKAGES = [ 'numpy >= 1.12.1', 'six >= 1.10.0', 'protobuf >= 3.4.0', - 'tensorflow-tensorboard >= 0.4.0rc1, < 0.5.0', + 'tensorflow-tensorboard', ] project_name = 'tensorflow' @@ -55,11 +55,11 @@ else: # mock comes with unittest.mock for python3, need to install for python2 REQUIRED_PACKAGES.append('mock >= 2.0.0') -# remove tensorboard from tf-nightly packages +# tf-nightly should depend on tb-nightly if 'tf_nightly' in project_name: - for package in REQUIRED_PACKAGES: - if 'tensorflow-tensorboard' in package: - REQUIRED_PACKAGES.remove(package) + for i, pkg in enumerate(REQUIRED_PACKAGES): + if 'tensorboard' in pkg: + REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.5.0a0, < 1.6.0a0' break # weakref.finalize was introduced in Python 3.4 @@ -76,13 +76,13 @@ CONSOLE_SCRIPTS = [ # is now declared by the tensorboard pip package. If we remove the # TensorBoard command, pip will inappropriately remove it during install, # even though the command is not removed, just moved to a different wheel. - 'tensorboard = tensorboard.main:main', + 'tensorboard = tensorboard.main:run_main', ] # pylint: enable=line-too-long # remove the tensorboard console script if building tf_nightly if 'tf_nightly' in project_name: - CONSOLE_SCRIPTS.remove('tensorboard = tensorboard.main:main') + CONSOLE_SCRIPTS.remove('tensorboard = tensorboard.main:run_main') TEST_PACKAGES = [ 'scipy >= 0.15.1', -- GitLab From 34a96722c9d3ee53ed3be9db5522307637877d29 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Tue, 21 Nov 2017 15:10:21 -0800 Subject: [PATCH 0720/1801] Add the first e2e scalar test with bfloat16. This test doesn't pass yet, but it's good to use it to drive future development work. PiperOrigin-RevId: 176568226 --- tensorflow/compiler/xla/tests/BUILD | 32 ++++++++ .../compiler/xla/tests/bfloat16_test.cc | 75 +++++++++++++++++++ .../xla/tests/client_library_test_base.h | 1 + .../compiler/xla/tests/literal_test_util.cc | 10 +++ 4 files changed, 118 insertions(+) create mode 100644 tensorflow/compiler/xla/tests/bfloat16_test.cc diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index c64d5aca4f..2e220e7293 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -769,6 +769,38 @@ xla_test( ], ) +xla_test( + name = "bfloat16_test", + srcs = ["bfloat16_test.cc"], + shard_count = 40, + deps = [ + ":test_utils", + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + xla_test( name = "slice_test", srcs = ["slice_test.cc"], diff --git a/tensorflow/compiler/xla/tests/bfloat16_test.cc b/tensorflow/compiler/xla/tests/bfloat16_test.cc new file mode 100644 index 0000000000..26e2b1a95b --- /dev/null +++ b/tensorflow/compiler/xla/tests/bfloat16_test.cc @@ -0,0 +1,75 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/lib/arithmetic.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/reference_util.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/tests/test_utils.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class Bfloat16Test : public ClientLibraryTestBase { + protected: + const ErrorSpec error_spec_{0.001, 0.001}; +}; + +XLA_TEST_F(Bfloat16Test, DISABLED_ON_GPU(DISABLED_ON_CPU_PARALLEL( + DISABLED_ON_CPU(ScalarOperation)))) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR0(static_cast(2.0f)); + auto y = builder.ConstantR0(static_cast(1.0f)); + builder.Add(x, y); + + ComputeAndCompareR0(&builder, static_cast(3.0f), {}, + error_spec_); +} + +XLA_TEST_F(Bfloat16Test, DISABLED_ON_GPU(DISABLED_ON_CPU_PARALLEL( + DISABLED_ON_CPU(NegateScalarF16)))) { + ComputationBuilder builder(client_, TestName()); + builder.Neg(builder.ConstantR0(static_cast(2.1f))); + + ComputeAndCompareR0(&builder, static_cast(-2.1f), {}, + error_spec_); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h index 1dc274c591..af22c12684 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.h +++ b/tensorflow/compiler/xla/tests/client_library_test_base.h @@ -333,6 +333,7 @@ void ClientLibraryTestBase::ComputeAndCompareR0( tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { static_assert(std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value, "Float or complex type required when specifying an ErrorSpec"); std::unique_ptr expected_literal = diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc index 75c9a0d3fb..9ae5c7b6f0 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.cc +++ b/tensorflow/compiler/xla/tests/literal_test_util.cc @@ -340,6 +340,9 @@ class NearComparator { multi_index_.resize(expected.shape().dimensions_size(), 0); switch (expected.shape().element_type()) { + case BF16: + ExpectLiteralsNear(expected, actual, 0); + break; case F32: ExpectLiteralsNear(expected, actual, 0); break; @@ -525,6 +528,13 @@ void NearComparator::ExpectNear(complex64 expected, complex64 actual, << message; } +template <> +bool NearComparator::ExpectValuesNear(bfloat16 expected, + bfloat16 actual) { + return ExpectValuesNear(static_cast(expected), + static_cast(actual)); +} + } // namespace /* static */ ::testing::AssertionResult LiteralTestUtil::Near( -- GitLab From cc003b7315b30a66567f749b35c120f5af768615 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Nov 2017 15:27:22 -0800 Subject: [PATCH 0721/1801] Remove vestigial utils.setdefault function. PiperOrigin-RevId: 176570863 --- .../contrib/kfac/python/ops/layer_collection.py | 12 ++++++------ tensorflow/contrib/kfac/python/ops/utils.py | 7 ------- tensorflow/contrib/kfac/python/ops/utils_lib.py | 1 - 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py index 04f5a869bd..d8781231ed 100644 --- a/tensorflow/contrib/kfac/python/ops/layer_collection.py +++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py @@ -620,9 +620,9 @@ class LayerCollection(object): "LayerCollection.fisher_factors. The pair cannot be hashed.").format( cls, args)) - kwargs = { - "colocate_cov_ops_with_inputs": self._colocate_cov_ops_with_inputs - } - with variable_scope.variable_scope(self._var_scope): - return utils.setdefault(self.fisher_factors, (cls, args), - lambda: cls(*args, **kwargs)) + key = cls, args + if key not in self.fisher_factors: + colo = self._colocate_cov_ops_with_inputs + with variable_scope.variable_scope(self._var_scope): + self.fisher_factors[key] = cls(*args, colocate_cov_ops_with_inputs=colo) + return self.fisher_factors[key] diff --git a/tensorflow/contrib/kfac/python/ops/utils.py b/tensorflow/contrib/kfac/python/ops/utils.py index 0fd7f51477..ca6fb655b4 100644 --- a/tensorflow/contrib/kfac/python/ops/utils.py +++ b/tensorflow/contrib/kfac/python/ops/utils.py @@ -64,13 +64,6 @@ class SequenceDict(object): return list(self._dict.items()) -def setdefault(dct, key, thunk): - """Like dict.setdefault but delays evaluation of the value to be set.""" - if key not in dct: - dct[key] = thunk() - return dct[key] - - def tensors_to_column(tensors): """Converts a tensor or list of tensors to a column vector. diff --git a/tensorflow/contrib/kfac/python/ops/utils_lib.py b/tensorflow/contrib/kfac/python/ops/utils_lib.py index ddbb4485ce..9df07d69aa 100644 --- a/tensorflow/contrib/kfac/python/ops/utils_lib.py +++ b/tensorflow/contrib/kfac/python/ops/utils_lib.py @@ -25,7 +25,6 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ "SequenceDict", - "setdefault", "tensors_to_column", "column_to_tensors", "kronecker_product", -- GitLab From 9305349a4a6b6c2d265ac81091f855e5560041c4 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Tue, 21 Nov 2017 15:35:13 -0800 Subject: [PATCH 0722/1801] Improve caching behavior of GCS filesystem On a number of Cloud TPU-related workloads, these changes improve the time to restore from a checkpoint by ~20x. PiperOrigin-RevId: 176571963 --- .../core/platform/cloud/expiring_lru_cache.h | 79 ++++++--- .../platform/cloud/expiring_lru_cache_test.cc | 64 +++++++ .../core/platform/cloud/file_block_cache.cc | 158 +++++++++++------ .../core/platform/cloud/file_block_cache.h | 50 +++++- .../platform/cloud/file_block_cache_test.cc | 35 ++++ .../core/platform/cloud/gcs_file_system.cc | 164 ++++++++++-------- .../platform/cloud/gcs_file_system_test.cc | 11 +- 7 files changed, 396 insertions(+), 165 deletions(-) diff --git a/tensorflow/core/platform/cloud/expiring_lru_cache.h b/tensorflow/core/platform/cloud/expiring_lru_cache.h index 4fe4234e22..3fc23a4306 100644 --- a/tensorflow/core/platform/cloud/expiring_lru_cache.h +++ b/tensorflow/core/platform/cloud/expiring_lru_cache.h @@ -28,7 +28,7 @@ limitations under the License. namespace tensorflow { /// \brief An LRU cache of string keys and arbitrary values, with configurable -/// max item age and max entries. +/// max item age (in seconds) and max entries. /// /// This class is thread safe. template @@ -48,16 +48,7 @@ class ExpiringLRUCache { return; } mutex_lock lock(mu_); - lru_list_.push_front(key); - Entry entry{env_->NowSeconds(), value, lru_list_.begin()}; - auto insert = cache_.insert(std::make_pair(key, entry)); - if (!insert.second) { - lru_list_.erase(insert.first->second.lru_iterator); - insert.first->second = entry; - } else if (max_entries_ > 0 && cache_.size() > max_entries_) { - cache_.erase(lru_list_.back()); - lru_list_.pop_back(); - } + InsertLocked(key, value); } /// Look up the entry with key `key` and copy it to `value` if found. Returns @@ -68,19 +59,33 @@ class ExpiringLRUCache { return false; } mutex_lock lock(mu_); - auto it = cache_.find(key); - if (it == cache_.end()) { - return false; + return LookupLocked(key, value); + } + + typedef std::function ComputeFunc; + + /// Look up the entry with key `key` and copy it to `value` if found. If not + /// found, call `compute_func`. If `compute_func` returns successfully, store + /// a copy of the output parameter in the cache, and another copy in `value`. + Status LookupOrCompute(const string& key, T* value, + const ComputeFunc& compute_func) { + if (max_age_ == 0) { + return compute_func(key, value); } - lru_list_.erase(it->second.lru_iterator); - if (env_->NowSeconds() - it->second.timestamp > max_age_) { - cache_.erase(it); - return false; + + // Note: we hold onto mu_ for the rest of this function. In practice, this + // is okay, as stat requests are typically fast, and concurrent requests are + // often for the same file. Future work can split this up into one lock per + // key if this proves to be a significant performance bottleneck. + mutex_lock lock(mu_); + if (LookupLocked(key, value)) { + return Status::OK(); } - *value = it->second.value; - lru_list_.push_front(it->first); - it->second.lru_iterator = lru_list_.begin(); - return true; + Status s = compute_func(key, value); + if (s.ok()) { + InsertLocked(key, *value); + } + return s; } /// Accessors for cache parameters. @@ -99,6 +104,36 @@ class ExpiringLRUCache { std::list::iterator lru_iterator; }; + bool LookupLocked(const string& key, T* value) EXCLUSIVE_LOCKS_REQUIRED(mu_) { + auto it = cache_.find(key); + if (it == cache_.end()) { + return false; + } + lru_list_.erase(it->second.lru_iterator); + if (env_->NowSeconds() - it->second.timestamp > max_age_) { + cache_.erase(it); + return false; + } + *value = it->second.value; + lru_list_.push_front(it->first); + it->second.lru_iterator = lru_list_.begin(); + return true; + } + + void InsertLocked(const string& key, const T& value) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + lru_list_.push_front(key); + Entry entry{env_->NowSeconds(), value, lru_list_.begin()}; + auto insert = cache_.insert(std::make_pair(key, entry)); + if (!insert.second) { + lru_list_.erase(insert.first->second.lru_iterator); + insert.first->second = entry; + } else if (max_entries_ > 0 && cache_.size() > max_entries_) { + cache_.erase(lru_list_.back()); + lru_list_.pop_back(); + } + } + /// The maximum age of entries in the cache, in seconds. A value of 0 means /// that no entry is ever placed in the cache. const uint64 max_age_; diff --git a/tensorflow/core/platform/cloud/expiring_lru_cache_test.cc b/tensorflow/core/platform/cloud/expiring_lru_cache_test.cc index bf9bfcd67e..8f8d5744a4 100644 --- a/tensorflow/core/platform/cloud/expiring_lru_cache_test.cc +++ b/tensorflow/core/platform/cloud/expiring_lru_cache_test.cc @@ -88,5 +88,69 @@ TEST(ExpiringLRUCacheTest, MaxEntries) { EXPECT_EQ(value, 5); } +TEST(ExpiringLRUCacheTest, LookupOrCompute) { + // max_age of 0 means we should always compute. + uint64 num_compute_calls = 0; + ExpiringLRUCache::ComputeFunc compute_func = + [&num_compute_calls](const string& key, int* value) { + *value = num_compute_calls; + num_compute_calls++; + return Status::OK(); + }; + ExpiringLRUCache cache1(0, 4); + + int value = -1; + TF_EXPECT_OK(cache1.LookupOrCompute("a", &value, compute_func)); + EXPECT_EQ(value, 0); + EXPECT_EQ(num_compute_calls, 1); + // re-read the same value, expect another lookup + TF_EXPECT_OK(cache1.LookupOrCompute("a", &value, compute_func)); + EXPECT_EQ(value, 1); + EXPECT_EQ(num_compute_calls, 2); + + // Define a new cache with max_age > 0 and verify correct behavior. + ExpiringLRUCache cache2(2, 4); + num_compute_calls = 0; + value = -1; + + // Read our first value + TF_EXPECT_OK(cache2.LookupOrCompute("a", &value, compute_func)); + EXPECT_EQ(value, 0); + EXPECT_EQ(num_compute_calls, 1); + // Re-read, exepct no additional function compute_func calls. + TF_EXPECT_OK(cache2.LookupOrCompute("a", &value, compute_func)); + EXPECT_EQ(value, 0); + EXPECT_EQ(num_compute_calls, 1); + + // Read a sequence of additional values, eventually evicting "a". + TF_EXPECT_OK(cache2.LookupOrCompute("b", &value, compute_func)); + EXPECT_EQ(value, 1); + EXPECT_EQ(num_compute_calls, 2); + TF_EXPECT_OK(cache2.LookupOrCompute("c", &value, compute_func)); + EXPECT_EQ(value, 2); + EXPECT_EQ(num_compute_calls, 3); + TF_EXPECT_OK(cache2.LookupOrCompute("d", &value, compute_func)); + EXPECT_EQ(value, 3); + EXPECT_EQ(num_compute_calls, 4); + TF_EXPECT_OK(cache2.LookupOrCompute("e", &value, compute_func)); + EXPECT_EQ(value, 4); + EXPECT_EQ(num_compute_calls, 5); + // Verify the other values remain in the cache. + TF_EXPECT_OK(cache2.LookupOrCompute("b", &value, compute_func)); + EXPECT_EQ(value, 1); + EXPECT_EQ(num_compute_calls, 5); + TF_EXPECT_OK(cache2.LookupOrCompute("c", &value, compute_func)); + EXPECT_EQ(value, 2); + EXPECT_EQ(num_compute_calls, 5); + TF_EXPECT_OK(cache2.LookupOrCompute("d", &value, compute_func)); + EXPECT_EQ(value, 3); + EXPECT_EQ(num_compute_calls, 5); + + // Re-read "a", ensure it is re-computed. + TF_EXPECT_OK(cache2.LookupOrCompute("a", &value, compute_func)); + EXPECT_EQ(value, 5); + EXPECT_EQ(num_compute_calls, 6); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/core/platform/cloud/file_block_cache.cc b/tensorflow/core/platform/cloud/file_block_cache.cc index a05c18c069..a472ae52fc 100644 --- a/tensorflow/core/platform/cloud/file_block_cache.cc +++ b/tensorflow/core/platform/cloud/file_block_cache.cc @@ -16,79 +16,137 @@ limitations under the License. #include "tensorflow/core/platform/cloud/file_block_cache.h" #include #include +#include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/platform/env.h" namespace tensorflow { -std::shared_ptr FileBlockCache::Lookup(const Key& key) { - mutex_lock lock(mu_); - auto entry = block_map_.find(key); - if (entry == block_map_.end()) { - return std::shared_ptr(); - } - // If we're enforcing max staleness and the block is stale, remove all of the - // file's cached blocks so we reload them. - if (max_staleness_ > 0 && - env_->NowSeconds() - entry->second->timestamp > max_staleness_) { - RemoveFile_Locked(key.first); - return std::shared_ptr(); +bool FileBlockCache::BlockNotStale(const std::shared_ptr& block) { + mutex_lock l(block->mu); + if (block->state != FetchState::FINISHED) { + return true; // No need to check for staleness. } - return entry->second; + if (max_staleness_ == 0) return true; // Not enforcing staleness. + return env_->NowSeconds() - block->timestamp <= max_staleness_; } -std::shared_ptr FileBlockCache::Insert( - const Key& key, std::shared_ptr block) { +std::shared_ptr FileBlockCache::Lookup(const Key& key) { mutex_lock lock(mu_); auto entry = block_map_.find(key); if (entry != block_map_.end()) { - // Use the block that's already in the cache. - return entry->second; - } - // Sanity check to detect interrupted reads leading to partial blocks: a - // partial block must have a higher key than the highest existing key in the - // block map for the file. Note that since this check relies on the existence - // of a cached block with a higher key, some incomplete reads may still go - // undetected (if their key happens to be higher than anything in the cache). - if (block->data.size() < block_size_ && !block_map_.empty()) { - Key fmax = std::make_pair(key.first, std::numeric_limits::max()); - auto fcmp = block_map_.upper_bound(fmax); - if (fcmp != block_map_.begin() && key < (--fcmp)->first) { - // We expected to read a full block at this position. - return std::shared_ptr(); + if (BlockNotStale(entry->second)) { + return entry->second; + } else { + // Remove the stale block and continue. + RemoveFile_Locked(key.first); } } - // Add the block to the cache (with necessary bookkeeping). + + // Insert a new empty block, setting the bookkeeping to sentinel values + // in order to update them as appropriate. + auto new_entry = std::make_shared(); lru_list_.push_front(key); lra_list_.push_front(key); - block->lru_iterator = lru_list_.begin(); - block->lra_iterator = lra_list_.begin(); - block->timestamp = env_->NowSeconds(); - cache_size_ += block->data.size(); - block_map_.emplace(std::make_pair(key, block)); - return block; + new_entry->lru_iterator = lru_list_.begin(); + new_entry->lra_iterator = lra_list_.begin(); + new_entry->timestamp = env_->NowSeconds(); + block_map_.emplace(std::make_pair(key, new_entry)); + return new_entry; } -// Remove blocks from the cache until there is space for a full sized block. +// Remove blocks from the cache until we do not exceed our maximum size. void FileBlockCache::Trim() { - mutex_lock lock(mu_); - while (!lru_list_.empty() && cache_size_ + block_size_ > max_bytes_) { + while (!lru_list_.empty() && cache_size_ > max_bytes_) { RemoveBlock(block_map_.find(lru_list_.back())); } } /// Move the block to the front of the LRU list if it isn't already there. -void FileBlockCache::UpdateLRU(const Key& key, - const std::shared_ptr& block) { +Status FileBlockCache::UpdateLRU(const Key& key, + const std::shared_ptr& block) { mutex_lock lock(mu_); if (block->timestamp == 0) { // The block was evicted from another thread. Allow it to remain evicted. - return; + return Status::OK(); } if (block->lru_iterator != lru_list_.begin()) { lru_list_.erase(block->lru_iterator); lru_list_.push_front(key); block->lru_iterator = lru_list_.begin(); } + + // Check for inconsistent state. If there is a block later in the same file + // in the cache, and our current block is not block size, this likely means + // we have inconsistent state within the cache. Note: it's possible some + // incomplete reads may still go undetected. + if (block->data.size() < block_size_) { + Key fmax = std::make_pair(key.first, std::numeric_limits::max()); + auto fcmp = block_map_.upper_bound(fmax); + if (fcmp != block_map_.begin() && key < (--fcmp)->first) { + return errors::Internal("Block cache contents are inconsistent."); + } + } + + Trim(); + + return Status::OK(); +} + +Status FileBlockCache::MaybeFetch(const Key& key, + const std::shared_ptr& block) { + bool downloaded_block = false; + auto reconcile_state = + gtl::MakeCleanup([this, &downloaded_block, &key, &block] { + // Perform this action in a cleanup callback to avoid locking mu_ after + // locking block->mu. + if (downloaded_block) { + mutex_lock l(mu_); + // Do not update state if the block is already to be evicted. + if (block->timestamp != 0) { + cache_size_ += block->data.size(); + // Put to beginning of LRA list. + lra_list_.erase(block->lra_iterator); + lra_list_.push_front(key); + block->lra_iterator = lra_list_.begin(); + block->timestamp = env_->NowSeconds(); + } + } + }); + // Loop until either block content is successfully fetched, or our request + // encounters an error. + mutex_lock l(block->mu); + Status status = Status::OK(); + while (true) { + switch (block->state) { + case FetchState::ERROR: + TF_FALLTHROUGH_INTENDED; + case FetchState::CREATED: + block->state = FetchState::FETCHING; + block->mu.unlock(); // Release the lock while making the API call. + status.Update( + block_fetcher_(key.first, key.second, block_size_, &block->data)); + block->mu.lock(); // Reacquire the lock immediately afterwards + if (status.ok()) { + downloaded_block = true; + block->state = FetchState::FINISHED; + } else { + block->state = FetchState::ERROR; + } + block->cond_var.notify_all(); + return status; + case FetchState::FETCHING: + block->cond_var.wait_for(l, std::chrono::seconds(60)); + if (block->state == FetchState::FINISHED) { + return Status::OK(); + } + // Re-loop in case of errors. + break; + case FetchState::FINISHED: + return Status::OK(); + } + } + return errors::Internal( + "Control flow should never reach the end of FileBlockCache::Fetch."); } Status FileBlockCache::Read(const string& filename, size_t offset, size_t n, @@ -114,15 +172,9 @@ Status FileBlockCache::Read(const string& filename, size_t offset, size_t n, // Look up the block, fetching and inserting it if necessary, and update the // LRU iterator for the key and block. std::shared_ptr block = Lookup(key); - if (!block) { - Trim(); - auto fetch = std::make_shared(); - auto status = block_fetcher_(filename, pos, block_size_, &fetch->data); - if (!(block = Insert(key, fetch))) { - return errors::Internal("File contents are inconsistent"); - } - } - UpdateLRU(key, block); + DCHECK(block) << "No block for key " << key.first << "@" << key.second; + TF_RETURN_IF_ERROR(MaybeFetch(key, block)); + TF_RETURN_IF_ERROR(UpdateLRU(key, block)); // Copy the relevant portion of the block into the result buffer. const auto& data = block->data; if (offset >= pos + data.size()) { @@ -190,11 +242,11 @@ void FileBlockCache::RemoveFile_Locked(const string& filename) { } void FileBlockCache::RemoveBlock(BlockMap::iterator entry) { - lru_list_.erase(entry->second->lru_iterator); - lra_list_.erase(entry->second->lra_iterator); // This signals that the block is removed, and should not be inadvertently // reinserted into the cache in UpdateLRU. entry->second->timestamp = 0; + lru_list_.erase(entry->second->lru_iterator); + lra_list_.erase(entry->second->lra_iterator); cache_size_ -= entry->second->data.size(); block_map_.erase(entry); } diff --git a/tensorflow/core/platform/cloud/file_block_cache.h b/tensorflow/core/platform/cloud/file_block_cache.h index b45d226095..36dbf9db83 100644 --- a/tensorflow/core/platform/cloud/file_block_cache.h +++ b/tensorflow/core/platform/cloud/file_block_cache.h @@ -115,11 +115,35 @@ class FileBlockCache { /// The file block cache key is a {filename, offset} pair. typedef std::pair Key; + /// \brief The state of a block. + /// + /// A block begins in the CREATED stage. The first thread will attempt to read + /// the block from the filesystem, transitioning the state of the block to + /// FETCHING. After completing, if the read was successful the state should + /// be FINISHED. Otherwise the state should be ERROR. A subsequent read can + /// re-fetch the block if the state is ERROR. + enum class FetchState { + CREATED, + FETCHING, + FINISHED, + ERROR, + }; + /// \brief A block of a file. /// /// A file block consists of the block data, the block's current position in - /// the LRU cache, and the timestamp (seconds since epoch) at which the block - /// was cached. + /// the LRU cache, the timestamp (seconds since epoch) at which the block + /// was cached, a coordination lock, and state & condition variables. + /// + /// Thread safety: + /// The iterator and timestamp fields should only be accessed while holding + /// the block-cache-wide mu_ instance variable. The state variable should only + /// be accessed while holding the Block's mu lock. The data vector should only + /// be accessed after state == FINISHED, and it should never be modified. + /// + /// In order to prevent deadlocks, never grab the block-cache-wide mu_ lock + /// AFTER grabbing any block's mu lock. It is safe to grab mu without locking + /// mu_. struct Block { /// The block data. std::vector data; @@ -129,6 +153,12 @@ class FileBlockCache { std::list::iterator lra_iterator; /// The timestamp (seconds since epoch) at which the block was cached. uint64 timestamp; + /// Mutex to guard state variable + mutex mu; + /// The state of the block. + FetchState state GUARDED_BY(mu) = FetchState::CREATED; + /// Wait on cond_var if state is FETCHING. + condition_variable cond_var; }; /// \brief The block map type for the file block cache. @@ -139,19 +169,20 @@ class FileBlockCache { /// Prune the cache by removing files with expired blocks. void Prune() LOCKS_EXCLUDED(mu_); + bool BlockNotStale(const std::shared_ptr& block) + EXCLUSIVE_LOCKS_REQUIRED(mu_); + /// Look up a Key in the block cache. std::shared_ptr Lookup(const Key& key) LOCKS_EXCLUDED(mu_); - /// Insert a block in the block cache with the given key. - std::shared_ptr Insert(const Key& key, - std::shared_ptr block) + Status MaybeFetch(const Key& key, const std::shared_ptr& block) LOCKS_EXCLUDED(mu_); /// Trim the block cache to make room for another entry. - void Trim() LOCKS_EXCLUDED(mu_); + void Trim() EXCLUSIVE_LOCKS_REQUIRED(mu_); - /// Update LRU and LRA iterators for the block at `key`. - void UpdateLRU(const Key& key, const std::shared_ptr& block) + /// Update the LRU iterator for the block at `key`. + Status UpdateLRU(const Key& key, const std::shared_ptr& block) LOCKS_EXCLUDED(mu_); /// Remove all blocks of a file, with mu_ already held. @@ -179,6 +210,9 @@ class FileBlockCache { /// The LRA (least recently added) list of block keys. The front of the list /// identifies the most recently added block. + /// + /// Note: blocks are added to lra_list_ only after they have successfully been + /// fetched from the underlying block store. std::list lra_list_ GUARDED_BY(mu_); /// The combined number of bytes in all of the cached blocks. diff --git a/tensorflow/core/platform/cloud/file_block_cache_test.cc b/tensorflow/core/platform/cloud/file_block_cache_test.cc index 5fa738b452..2a9eb7d524 100644 --- a/tensorflow/core/platform/cloud/file_block_cache_test.cc +++ b/tensorflow/core/platform/cloud/file_block_cache_test.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/cloud/now_seconds_env.h" #include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/notification.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { @@ -435,5 +436,39 @@ TEST(FileBlockCacheTest, ParallelReads) { // executed, or 10 seconds have passed). } +TEST(FileBlockCacheTest, CoalesceConcurrentReads) { + // Concurrent reads to the same file blocks should be de-duplicated. + const size_t block_size = 16; + int num_requests = 0; + Notification notification; + auto fetcher = [&num_requests, ¬ification, block_size]( + const string& filename, size_t offset, size_t n, + std::vector* out) { + EXPECT_EQ(n, block_size); + EXPECT_EQ(offset, 0); + num_requests++; + out->resize(n, 'x'); + notification.Notify(); + // Wait for other thread to issue read. + Env::Default()->SleepForMicroseconds(100000); // 0.1 secs + return Status::OK(); + }; + FileBlockCache cache(block_size, block_size, 0, fetcher); + // Fork off thread for parallel read. + std::unique_ptr concurrent( + Env::Default()->StartThread({}, "concurrent", [&cache] { + std::vector out; + TF_EXPECT_OK(cache.Read("", 0, block_size / 2, &out)); + EXPECT_EQ(out.size(), block_size / 2); + })); + EXPECT_TRUE(WaitForNotificationWithTimeout(¬ification, 1000)) + << "Timeout waiting for concurrent thread to start."; + std::vector out; + TF_EXPECT_OK(cache.Read("", block_size / 2, block_size / 2, &out)); + EXPECT_EQ(out.size(), block_size / 2); + + EXPECT_EQ(1, num_requests); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index 9287de7237..d5e2a518e9 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -695,6 +695,7 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& filename, size_t offset, TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when reading gs://", bucket, "/", object); + return Status::OK(); } @@ -814,53 +815,55 @@ Status GcsFileSystem::StatForObject(const string& fname, const string& bucket, if (!stat) { return errors::Internal("'stat' cannot be nullptr."); } - if (stat_cache_->Lookup(fname, stat)) { - if (stat->is_directory) { - return errors::NotFound(fname, " is a directory."); - } else { - return Status::OK(); - } - } if (object.empty()) { return errors::InvalidArgument("'object' must be a non-empty string."); } - string auth_token; - TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_.get(), &auth_token)); + StatCache::ComputeFunc compute_func = + [this, &bucket, &object](const string& fname, FileStatistics* stat) { + string auth_token; + TF_RETURN_IF_ERROR( + AuthProvider::GetToken(auth_provider_.get(), &auth_token)); - std::vector output_buffer; - std::unique_ptr request(http_request_factory_->Create()); - TF_RETURN_IF_ERROR(request->Init()); - TF_RETURN_IF_ERROR(request->SetUri(strings::StrCat( - kGcsUriBase, "b/", bucket, "/o/", request->EscapeString(object), - "?fields=size%2Cupdated"))); - TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token)); - TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer)); + std::vector output_buffer; + std::unique_ptr request(http_request_factory_->Create()); + TF_RETURN_IF_ERROR(request->Init()); + TF_RETURN_IF_ERROR(request->SetUri(strings::StrCat( + kGcsUriBase, "b/", bucket, "/o/", request->EscapeString(object), + "?fields=size%2Cupdated"))); + TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token)); + TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer)); - if (dns_cache_) { - TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get())); + if (dns_cache_) { + TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get())); + } + TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), + " when reading metadata of gs://", + bucket, "/", object); + + StringPiece response_piece = + StringPiece(output_buffer.data(), output_buffer.size()); + Json::Value root; + TF_RETURN_IF_ERROR(ParseJson(response_piece, &root)); + + // Parse file size. + TF_RETURN_IF_ERROR(GetInt64Value(root, "size", &(stat->length))); + + // Parse file modification time. + string updated; + TF_RETURN_IF_ERROR(GetStringValue(root, "updated", &updated)); + TF_RETURN_IF_ERROR(ParseRfc3339Time(updated, &(stat->mtime_nsec))); + + stat->is_directory = false; + return Status::OK(); + }; + + TF_RETURN_IF_ERROR(stat_cache_->LookupOrCompute(fname, stat, compute_func)); + if (stat->is_directory) { + return errors::NotFound(fname, " is a directory."); + } else { + return Status::OK(); } - - TF_RETURN_WITH_CONTEXT_IF_ERROR( - request->Send(), " when reading metadata of gs://", bucket, "/", object); - - StringPiece response_piece = - StringPiece(output_buffer.data(), output_buffer.size()); - Json::Value root; - TF_RETURN_IF_ERROR(ParseJson(response_piece, &root)); - - // Parse file size. - TF_RETURN_IF_ERROR(GetInt64Value(root, "size", &(stat->length))); - - // Parse file modification time. - string updated; - TF_RETURN_IF_ERROR(GetStringValue(root, "updated", &updated)); - TF_RETURN_IF_ERROR(ParseRfc3339Time(updated, &(stat->mtime_nsec))); - - stat->is_directory = false; - stat_cache_->Insert(fname, *stat); - - return Status::OK(); } Status GcsFileSystem::BucketExists(const string& bucket, bool* result) { @@ -892,19 +895,30 @@ Status GcsFileSystem::FolderExists(const string& dirname, bool* result) { if (!result) { return errors::Internal("'result' cannot be nullptr."); } + StatCache::ComputeFunc compute_func = [this](const string& dirname, + FileStatistics* stat) { + std::vector children; + TF_RETURN_IF_ERROR( + GetChildrenBounded(dirname, 1, &children, true /* recursively */, + true /* include_self_directory_marker */)); + if (!children.empty()) { + *stat = DIRECTORY_STAT; + return Status::OK(); + } else { + return errors::InvalidArgument("Not a directory!"); + } + }; FileStatistics stat; - if (stat_cache_->Lookup(dirname, &stat)) { + Status s = stat_cache_->LookupOrCompute(dirname, &stat, compute_func); + if (s.ok()) { *result = stat.is_directory; return Status::OK(); } - std::vector children; - TF_RETURN_IF_ERROR( - GetChildrenBounded(dirname, 1, &children, true /* recursively */, - true /* include_self_directory_marker */)); - if ((*result = !children.empty())) { - stat_cache_->Insert(dirname, DIRECTORY_STAT); + if (errors::IsInvalidArgument(s)) { + *result = false; + return Status::OK(); } - return Status::OK(); + return s; } Status GcsFileSystem::GetChildren(const string& dirname, @@ -916,33 +930,35 @@ Status GcsFileSystem::GetChildren(const string& dirname, Status GcsFileSystem::GetMatchingPaths(const string& pattern, std::vector* results) { - if (matching_paths_cache_->Lookup(pattern, results)) { - return Status::OK(); - } - results->clear(); - // Find the fixed prefix by looking for the first wildcard. - const string& fixed_prefix = - pattern.substr(0, pattern.find_first_of("*?[\\")); - const string& dir = io::Dirname(fixed_prefix).ToString(); - if (dir.empty()) { - return errors::InvalidArgument("A GCS pattern doesn't have a bucket name: ", - pattern); - } - std::vector all_files; + MatchingPathsCache::ComputeFunc compute_func = + [this](const string& pattern, std::vector* results) { + results->clear(); + // Find the fixed prefix by looking for the first wildcard. + const string& fixed_prefix = + pattern.substr(0, pattern.find_first_of("*?[\\")); + const string& dir = io::Dirname(fixed_prefix).ToString(); + if (dir.empty()) { + return errors::InvalidArgument( + "A GCS pattern doesn't have a bucket name: ", pattern); + } + std::vector all_files; + TF_RETURN_IF_ERROR(GetChildrenBounded( + dir, UINT64_MAX, &all_files, true /* recursively */, + false /* include_self_directory_marker */)); + + const auto& files_and_folders = AddAllSubpaths(all_files); + + // Match all obtained paths to the input pattern. + for (const auto& path : files_and_folders) { + const string& full_path = io::JoinPath(dir, path); + if (Env::Default()->MatchPath(full_path, pattern)) { + results->push_back(full_path); + } + } + return Status::OK(); + }; TF_RETURN_IF_ERROR( - GetChildrenBounded(dir, UINT64_MAX, &all_files, true /* recursively */, - false /* include_self_directory_marker */)); - - const auto& files_and_folders = AddAllSubpaths(all_files); - - // Match all obtained paths to the input pattern. - for (const auto& path : files_and_folders) { - const string& full_path = io::JoinPath(dir, path); - if (Env::Default()->MatchPath(full_path, pattern)) { - results->push_back(full_path); - } - } - matching_paths_cache_->Insert(pattern, *results); + matching_paths_cache_->LookupOrCompute(pattern, results, compute_func)); return Status::OK(); } diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc index 911176365f..7614ec4d7f 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc @@ -127,12 +127,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache) { "Uri: https://storage.googleapis.com/bucket/random_access.txt\n" "Auth Token: fake_token\n" "Range: 18-26\n", - ""), - new FakeHttpRequest( - "Uri: https://storage.googleapis.com/bucket/random_access.txt\n" - "Auth Token: fake_token\n" - "Range: 0-8\n", - "012345678")}); + "")}); GcsFileSystem fs( std::unique_ptr(new FakeAuthProvider), std::unique_ptr( @@ -182,8 +177,8 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache) { file->Read(20, 10, &result, scratch).code()); EXPECT_TRUE(result.empty()); - // The beginning of the file has been evicted from the LRU cache. This will - // result in another request. The buffer size is still 15. + // The beginning of the file should still be in the LRU cache. There should + // not be another request. The buffer size is still 15. TF_EXPECT_OK(file->Read(0, 4, &result, scratch)); } -- GitLab From c4ec569953069f689fd42bae92e15f6ccf40e364 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Nov 2017 16:21:50 -0800 Subject: [PATCH 0723/1801] Get rid of some code duplication in Grappler optimizers by refactoring some utilities to a shared location. Generalize the GetTailOfXXXChain to a more generic graph walker that takes a predicate functor that controls when to stop. PiperOrigin-RevId: 176577743 --- tensorflow/core/grappler/BUILD | 2 + tensorflow/core/grappler/op_types.cc | 41 +++++- tensorflow/core/grappler/op_types.h | 18 +++ .../optimizers/arithmetic_optimizer.cc | 139 ++---------------- .../optimizers/dependency_optimizer.cc | 45 +----- .../core/grappler/optimizers/model_pruner.cc | 10 -- tensorflow/core/grappler/utils.cc | 52 +++++++ tensorflow/core/grappler/utils.h | 65 +++++++- tensorflow/core/grappler/utils_test.cc | 62 +++++++- 9 files changed, 251 insertions(+), 183 deletions(-) diff --git a/tensorflow/core/grappler/BUILD b/tensorflow/core/grappler/BUILD index c81c6c0f21..99f1318072 100644 --- a/tensorflow/core/grappler/BUILD +++ b/tensorflow/core/grappler/BUILD @@ -21,6 +21,7 @@ cc_library( hdrs = ["op_types.h"], visibility = ["//visibility:public"], deps = [ + ":utils", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", @@ -45,6 +46,7 @@ tf_cc_test( srcs = ["utils_test.cc"], deps = [ ":utils", + "//tensorflow/cc:cc_ops", "//tensorflow/core:all_kernels", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index 48b17fd20f..3a39045a4a 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -13,8 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/grappler/op_types.h" +#include + #include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/lib/core/status.h" namespace tensorflow { @@ -233,5 +237,38 @@ bool ModifiesFrameInfo(const NodeDef& node) { return IsEnter(node) || IsExit(node) || IsNextIteration(node); } -} // end namespace grappler +#define OPDEF_PROPERTY_HELPER(PROPERTY_CAP, PROPERTY) \ + bool Is##PROPERTY_CAP(const NodeDef& node) { \ + if (node.op() == "Add") { \ + /* Workaround for "Add" not being marked is_commutative and */ \ + /* is_aggregate. (See cl/173915048). */ \ + const auto type = GetDataTypeFromAttr(node, "T"); \ + return type != DT_INVALID && type != DT_STRING; \ + } \ + const OpDef* op_def = nullptr; \ + Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def); \ + return status.ok() && op_def->is_##PROPERTY(); \ + } + +OPDEF_PROPERTY_HELPER(Aggregate, aggregate) +OPDEF_PROPERTY_HELPER(Commutative, commutative) + +bool IsInvolution(const NodeDef& node) { + const std::unordered_set involution_ops{ + "Conj", "Reciprocal", "Invert", "Neg", "LogicalNot"}; + return involution_ops.count(node.op()) > 0; +} + +bool IsValuePreserving(const NodeDef& node) { + if (NumNonControlInputs(node) == 1 && IsAggregate(node)) { + return true; + } + const std::unordered_set value_preserving_ops{ + "Transpose", "Reshape", "Identity", "InvertPermutation", + "Reverse", "StopGradient", "PreventGradient", "CheckNumerics", + "ExpandDims", "Squeeze"}; + return value_preserving_ops.count(node.op()) > 0; +} + +} // namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index 17ba3603c5..b7a55f3f21 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_GRAPPLER_OP_TYPES_H_ #include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/lib/core/status.h" namespace tensorflow { namespace grappler { @@ -59,9 +60,26 @@ bool IsSwitch(const NodeDef& node); bool IsTranspose(const NodeDef& node); bool IsVariable(const NodeDef& node); +// Return true if the op is an aggregation (e.g. Add, AddN). +// Returns false if it could not be determined to be so. +bool IsAggregate(const NodeDef& node); + +// Return true if the op is commutative (e.g. Mul, Add). +// Returns false if it could not be determined to be so. +bool IsCommutative(const NodeDef& node); + bool IsFreeOfSideEffect(const NodeDef& node); bool ModifiesFrameInfo(const NodeDef& node); +// Returns true if the op is an element-wise involution, i.e. if it is its +// own inverse such that f(f(x)) == x. +bool IsInvolution(const NodeDef& node); + +// Returns true if the op in node only rearranges the order of elements in its +// first input tensor and possible changes its shape. More precisely, this +// function returns true if the op commutes with all element-wise operations. +bool IsValuePreserving(const NodeDef& node); + } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 2677888fcb..33eac79c01 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/framework/types.h" #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/op_types.h" @@ -80,22 +81,6 @@ Status SetTensorValue(DataType dtype, int value, Tensor* tensor) { return Status::OK(); } -bool IsInvolution(const NodeDef& node) { - const std::unordered_set involution_ops = { - "Conj", "Reciprocal", "Invert", "Neg", "LogicalNot"}; - return involution_ops.count(node.op()) > 0; -} - -// Returns true if the op in node only rearranges the order of elements in an -// input tensor, or more specifically, if it commutes with all element-wise -// operations on the values. -bool IsValuePreserving(const NodeDef& node) { - const std::unordered_set value_preserving_ops = { - "Transpose", "Reshape", "Identity", "InvertPermutation", - "Reverse", "StopGradient", "PreventGradient", "CheckNumerics", - "ExpandDims", "Squeeze"}; - return value_preserving_ops.count(node.op()) > 0; -} template bool AreInversePermutations(const std::vector& a, const std::vector& b) { @@ -185,39 +170,6 @@ bool IsInnerMatrixTransposeNode(const NodeDef& transpose_node, return false; } -// Follow a chain (through input(0)) of ops starting at `source->input(0)` as -// long as they -// 1. preserve the values of their first input, -// 2. have a single (non-control) output, -// 3. are not in nodes_to_preserve. -// Returns the last node in the chain satisfying these properties or source -// itself if a chain of length zero was found. -// -// source <- vp <- vp <- vp <- non_vp -// ^^ -// return value -NodeDef* GetTailOfValuePreservingChain( - const NodeDef* source, const NodeMap* node_map, - const std::unordered_set& nodes_to_preserve) { - const NodeDef* source_parent = source; - if (!IsControlInput(source->input(0))) { - source = node_map->GetNode(source->input(0)); - while (IsValuePreserving(*source) && - node_map->GetOutputs(source->name()).size() == 1 && - // Do not skip over preserved nodes, because folding will change - // the results of these skipped data-reordering nodes. - // TODO(jingyue): A more elegant way is to copy this chain of - // data-reordering nodes and modify only the copy. - !nodes_to_preserve.count(source->name())) { - source_parent = source; - if (IsControlInput(source->input(0))) { - break; - } - source = node_map->GetNode(source->input(0)); - } - } - return const_cast(source_parent); -} bool MaybeAddControlInput(const string& new_input, NodeDef* node, GraphDef* graph, NodeMap* node_map) { @@ -249,43 +201,6 @@ int CopyControlInputs(const NodeDef& from, NodeDef* to, GraphDef* graph, return num_copied; } -// Returns the data type in attribute `attr_name` of `node`. If that attribute -// doesn't exist, returns DT_INVALID. -DataType GetDataTypeFromAttr(const NodeDef& node, const string& attr_name) { - if (!node.attr().count(attr_name)) { - return DT_INVALID; - } - const auto& attr = node.attr().at(attr_name); - if (attr.value_case() != AttrValue::kType) { - return DT_INVALID; - } - return attr.type(); -} - -bool IsCommutative(const NodeDef& node) { - if (node.op() == "Add" && node.input_size() > 0) { - // Workaround for "Add" not being marked is_commutative and is_aggregate. - // (See cl/173915048). - const auto type = GetDataTypeFromAttr(node, "T"); - return type != DT_INVALID && type != DT_STRING; - } - const OpDef* op_def = nullptr; - const Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def); - return status.ok() && op_def->is_commutative(); -} - -bool IsAggregate(const NodeDef& node) { - if (node.op() == "Add" && node.input_size() > 0) { - // Workaround for "Add" not being marked is_commutative and is_aggregate. - // (See cl/173915048). - const auto type = GetDataTypeFromAttr(node, "T"); - return type != DT_INVALID && type != DT_STRING; - } - const OpDef* op_def = nullptr; - const Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def); - return status.ok() && op_def->is_aggregate(); -} - void SetDataTypeToAttr(DataType dtype, const string& attr_name, NodeDef* node) { (*node->mutable_attr())[attr_name].set_type(dtype); } @@ -407,6 +322,18 @@ void AddFrameControlDeps(const NodeDef* old_node, } } +NodeDef* GetTailOfValuePreservingChain( + const NodeDef& node, const NodeMap& node_map, + const std::unordered_set& nodes_to_preserve) { + auto is_value_preserving_non_branching = [&](const NodeDef& node) { + return IsValuePreserving(node) && + NumNonControlOutputs(node, node_map) == 1 && + nodes_to_preserve.count(node.name()) == 0; + }; + return GetTailOfChain(node, node_map, /*follow_control_input=*/false, + is_value_preserving_non_branching); +} + } // namespace class UniqueNodes { @@ -591,7 +518,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( // the two instances of the involution from the graph, since they cancel // each other. NodeDef* tail = - GetTailOfValuePreservingChain(node, node_map, nodes_to_preserve_); + GetTailOfValuePreservingChain(*node, *node_map, nodes_to_preserve_); NodeDef* involution = node_map->GetNode(tail->input(0)); if (involution->op() == node->op()) { // Skip both *node and *involution since they cancel each other. @@ -609,7 +536,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( // Remove inverse transposes. if (node->op() == "Transpose" || node->op() == "ConjugateTranspose") { - const NodeDef* input = node_map->GetNode(node->input(0)); + NodeDef* input = node_map->GetNode(node->input(0)); if (input->op() == node->op()) { const NodeDef* node_perm = node_map->GetNode(node->input(1)); const NodeDef* input_perm = node_map->GetNode(input->input(1)); @@ -798,7 +725,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( // since the weights tend to be smaller than the activations. if (weights->op() == "Const") { const NodeDef* source = node_map->GetNode( - GetTailOfValuePreservingChain(node, node_map, nodes_to_preserve_) + GetTailOfValuePreservingChain(*node, *node_map, nodes_to_preserve_) ->input(0)); if (source->op() == "Mul" && node_map->GetOutputs(source->name()).size() == 1) { @@ -1066,40 +993,6 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( return ""; } -namespace { -// A vector with a set. The set stores the same elements as the vector, and -// quickly answers whether a value is in the vector. Duplicated elements are not -// allowed for now. -template -class SetVector { - public: - // Returns false if value already existed in the set, true otherwise. - bool PushBack(const T& value) { - if (!set_.insert(value).second) { - VLOG(2) << "Value " << value << " is already in the set."; - return false; - } - vector_.push_back(value); - return true; - } - - T PopBack() { - T back = vector_.back(); - set_.erase(back); - vector_.pop_back(); - return back; - } - - bool Exists(const T& value) const { return set_.count(value); } - - bool Empty() const { return vector_.empty(); } - - private: - std::unordered_set set_; - std::vector vector_; -}; -} // namespace - Status ArithmeticOptimizer::SimplifyArithmeticOps( GraphDef* optimized_graph) const { NodeMap node_map(optimized_graph); diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc index 0cc4585ba4..7a9db9bebb 100644 --- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc @@ -32,49 +32,6 @@ namespace tensorflow { namespace grappler { namespace { -// A vector with a set. The set stores the same elements as the vector, and -// quickly answers whether a value is in the vector. Duplicated elements are not -// allowed for now. -template -class SetVector { - public: - // Returns false if value already existed in the set, true otherwise. - bool PushBack(const T& value) { - if (!set_.insert(value).second) { - return false; - } - vector_.push_back(value); - return true; - } - - T PopBack() { - T back = vector_.back(); - set_.erase(back); - vector_.pop_back(); - return back; - } - - bool Exists(const T& value) const { return set_.count(value); } - - bool Empty() const { return vector_.empty(); } - - void Reserve(int64 size) { vector_.reserve(size); } - - private: - std::unordered_set set_; - std::vector vector_; -}; - -bool HasRegularOutputs(const NodeDef& node, const NodeMap& node_map) { - for (const NodeDef* output : node_map.GetOutputs(node.name())) { - for (const string& input : output->input()) { - if (!IsControlInput(input) && NodeName(input) == node.name()) { - return true; - } - } - } - return false; -} int RemoveInput(NodeDef* node, const string& input, NodeMap* node_map) { int num_removed = 0; @@ -119,7 +76,7 @@ bool DependencyOptimizer::SafeToConvertToNoOp(const NodeDef& node) { if (nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) { return false; } - if (!fetch_nodes_known_ || HasRegularOutputs(node, *node_map_)) { + if (!fetch_nodes_known_ || NumNonControlOutputs(node, *node_map_) > 0) { return false; } if (IsMerge(node) || IsSwitch(node)) { diff --git a/tensorflow/core/grappler/optimizers/model_pruner.cc b/tensorflow/core/grappler/optimizers/model_pruner.cc index b9df196f83..c9bec7890e 100644 --- a/tensorflow/core/grappler/optimizers/model_pruner.cc +++ b/tensorflow/core/grappler/optimizers/model_pruner.cc @@ -26,16 +26,6 @@ limitations under the License. namespace tensorflow { namespace grappler { -int NumNonControlInputs(const NodeDef& node) { - int num_inputs = node.input_size(); - for (int i = 0; i < node.input_size(); ++i) { - if (!node.input(i).empty() && node.input(i)[0] == '^') { - num_inputs--; - } - } - return num_inputs; -} - bool IsTrivialOp(const NodeDef& node) { // Remove the stop gradient nodes since they serve no purpose once the graph // is built. Also remove Identity ops. diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index 66f8c537ed..7fd1876371 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/core/framework/types.h" #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/scanner.h" @@ -247,5 +248,56 @@ int NumOutputs(const NodeDef& node) { return num_outputs; } +int NumNonControlInputs(const NodeDef& node) { + int num_inputs = node.input_size(); + for (int i = 0; i < node.input_size(); ++i) { + if (IsControlInput(node.input(i))) { + --num_inputs; + } + } + return num_inputs; +} + +int NumNonControlOutputs(const NodeDef& node, const NodeMap& node_map) { + int num_outputs = 0; + for (const NodeDef* output : node_map.GetOutputs(node.name())) { + for (const string& input : output->input()) { + if (input == node.name()) { + ++num_outputs; + } + } + } + return num_outputs; +} + +// Returns the data type in attribute `attr_name` of `node`. If that attribute +// doesn't exist, returns DT_INVALID. +DataType GetDataTypeFromAttr(const NodeDef& node, const string& attr_name) { + if (!node.attr().count(attr_name)) { + return DT_INVALID; + } + const auto& attr = node.attr().at(attr_name); + if (attr.value_case() != AttrValue::kType) { + return DT_INVALID; + } + return attr.type(); +} + +NodeDef* GetTailOfChain(const NodeDef& source, const NodeMap& node_map, + bool follow_control_input, + const std::function& pred_fn) { + const NodeDef* current = &source; + const NodeDef* next = current; + while (next == &source || pred_fn(*next)) { + current = next; + if (current->input_size() == 0 || + (!follow_control_input && IsControlInput(current->input(0)))) { + break; + } + next = node_map.GetNode(current->input(0)); + } + return const_cast(current); +} + } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h index f9fb418140..b98b8656e2 100644 --- a/tensorflow/core/grappler/utils.h +++ b/tensorflow/core/grappler/utils.h @@ -17,12 +17,15 @@ limitations under the License. #define TENSORFLOW_GRAPPLER_UTILS_H_ #include +#include +#include +#include #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/types.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/threadpool.h" -#include "tensorflow/core/platform/types.h" namespace tensorflow { namespace grappler { @@ -68,6 +71,39 @@ class OutputMap { std::unordered_map> outputs_; }; +// A vector with a set. The set stores the same elements as the vector, and +// quickly answers whether a value is in the vector. Duplicated elements are not +// allowed for now. +template +class SetVector { + public: + // Returns false if value already existed in the set, true otherwise. + bool PushBack(const T& value) { + if (!set_.insert(value).second) { + return false; + } + vector_.push_back(value); + return true; + } + + T PopBack() { + T back = vector_.back(); + set_.erase(back); + vector_.pop_back(); + return back; + } + + bool Exists(const T& value) const { return set_.find(value) != set_.end(); } + + bool Empty() const { return vector_.empty(); } + + void Reserve(int64 size) { vector_.reserve(size); } + + private: + std::unordered_set set_; + std::vector vector_; +}; + // True iff 'name' refers to a control inputs, i.e. a node name prefixed with // the ^ character. bool IsControlInput(const string& name); @@ -109,10 +145,33 @@ string AsControlDependency(const NodeDef& node); // for control dependency, given a node name string AsControlDependency(const string& node); -// Returns the number of outputs of a node. Note that some of the outputs may be -// unconnected. +// Returns the number of outputs of a node according to its OpDef. Note that +// some of the outputs may be unconnected. int NumOutputs(const NodeDef& node); +// Number of connected non-control inputs. +int NumNonControlInputs(const NodeDef& node); + +// Number of connected non-control outputs. +int NumNonControlOutputs(const NodeDef& node, const NodeMap& node_map); + +// Returns the data type in attribute `attr_name` of `node`. If that attribute +// doesn't exist, returns DT_INVALID. +DataType GetDataTypeFromAttr(const NodeDef& node, const string& attr_name); + +// Returns the last node in the simple chain starting at source and traversing +// through the input(0) edge from each node as long as the next node satisfies +// the predicate given in pred_fn. If no nodes satisfy the predicate, &source +// will be returned. Example: For the chain +// source <- a <- b <- ... <- y <- z +// where +// pred_fn(a) = pred_fn(b) = ... = pred_fn(y) = true, +// pred_fn(z) = false, +// the return value will be a pointer to y. +NodeDef* GetTailOfChain(const NodeDef& source, const NodeMap& node_map, + bool follow_control_input, + const std::function& pred_fn); + } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/utils_test.cc b/tensorflow/core/grappler/utils_test.cc index 9d747fe7dc..77371c399e 100644 --- a/tensorflow/core/grappler/utils_test.cc +++ b/tensorflow/core/grappler/utils_test.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/threadpool.h" @@ -181,7 +182,7 @@ TEST_F(UtilsTest, NumOutputs) { EXPECT_EQ(1, NumOutputs(CreateDequeueNode())); } -TEST(AsControlDependency, BasicTest) { +TEST_F(UtilsTest, AsControlDependency) { NodeDef node; node.set_name("foo"); EXPECT_EQ("^foo", AsControlDependency(node)); @@ -189,6 +190,65 @@ TEST(AsControlDependency, BasicTest) { EXPECT_EQ("^foo", AsControlDependency("^foo")); } +TEST_F(UtilsTest, GetTailOfChain) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output c0 = ops::Const(s.WithOpName("c0"), {1.0f, 2.0f}, {1, 2}); + Output c1 = ops::Const(s.WithOpName("c1"), {3.0f, 4.0f}, {1, 2}); + // Add a node with only connected by control output. + Output neg0 = ops::Neg(s.WithOpName("neg0"), c1); + // Add a node with two outputs. + Output neg1 = + ops::Neg(s.WithControlDependencies(neg0).WithOpName("neg1"), c0); + Output neg2 = ops::Neg(s.WithOpName("neg2"), neg1); + Output id1 = ops::Identity(s.WithOpName("id1"), neg2); + Output id2 = ops::Identity(s.WithOpName("id2"), neg1); + auto noop = ops::NoOp(s.WithControlDependencies(neg0).WithOpName("noop")); + GraphDef graph; + TF_CHECK_OK(s.ToGraphDef(&graph)); + LOG(INFO) << graph.DebugString(); + + ASSERT_EQ("c0", graph.node(0).name()); + ASSERT_EQ("c1", graph.node(1).name()); + ASSERT_EQ("neg0", graph.node(2).name()); + ASSERT_EQ("neg1", graph.node(3).name()); + ASSERT_EQ("neg2", graph.node(4).name()); + ASSERT_EQ("id1", graph.node(5).name()); + ASSERT_EQ("id2", graph.node(6).name()); + ASSERT_EQ("noop", graph.node(7).name()); + + NodeMap node_map(&graph); + auto is_neg = [&](const NodeDef& node) { return node.op() == "Neg"; }; + // We walk backwards, starting as "id1", so tail should be "neg1". + NodeDef* tail = GetTailOfChain(graph.node(5), node_map, + /*follow_control_input=*/false, is_neg); + EXPECT_NE(tail, nullptr); + EXPECT_EQ("neg1", tail->name()); + + // We stop at branching nodes, so tail should be "neg2". + auto is_neg_and_non_branching = [&](const NodeDef& node) { + return node.op() == "Neg" && NumNonControlOutputs(node, node_map) == 1; + }; + tail = + GetTailOfChain(graph.node(5), node_map, + /*follow_control_input=*/false, is_neg_and_non_branching); + EXPECT_NE(tail, nullptr); + EXPECT_EQ("neg2", tail->name()); + + // We walk backwards, starting from "noop", also following control inputs, + // so tail should be "neg0". + tail = GetTailOfChain(graph.node(7), node_map, + /*follow_control_input=*/true, is_neg); + EXPECT_NE(tail, nullptr); + EXPECT_EQ("neg0", tail->name()); + + // We walk backwards, starting from "noop", not following control inputs, + // so tail should be "noop" itself. + tail = GetTailOfChain(graph.node(7), node_map, + /*follow_control_input=*/false, is_neg); + EXPECT_NE(tail, nullptr); + EXPECT_EQ("noop", tail->name()); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 94c122b25f2d5c0695cb9e73c0f8eee9992286ed Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Nov 2017 16:27:36 -0800 Subject: [PATCH 0724/1801] Adds LinearEstimator and DNNLinearCombinedEstimator. PiperOrigin-RevId: 176578390 --- tensorflow/contrib/estimator/BUILD | 77 ++++++ tensorflow/contrib/estimator/__init__.py | 4 + .../python/estimator/dnn_linear_combined.py | 164 +++++++++++++ .../estimator/dnn_linear_combined_test.py | 220 ++++++++++++++++++ .../estimator/python/estimator/linear.py | 118 ++++++++++ .../estimator/python/estimator/linear_test.py | 153 ++++++++++++ 6 files changed, 736 insertions(+) create mode 100644 tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py create mode 100644 tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py create mode 100644 tensorflow/contrib/estimator/python/estimator/linear.py create mode 100644 tensorflow/contrib/estimator/python/estimator/linear_test.py diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index fe643659d8..197cf7e56f 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -27,8 +27,10 @@ py_library( srcs_version = "PY2AND3", deps = [ ":dnn", + ":dnn_linear_combined", ":extenders", ":head", + ":linear", ":logit_fns", ":multi_head", ":replicate_model_fn", @@ -73,6 +75,45 @@ py_test( ], ) +py_library( + name = "dnn_linear_combined", + srcs = ["python/estimator/dnn_linear_combined.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:nn", + "//tensorflow/python/estimator", + "//tensorflow/python/estimator:dnn_linear_combined", + ], +) + +py_test( + name = "dnn_linear_combined_test", + size = "small", + srcs = ["python/estimator/dnn_linear_combined_test.py"], + srcs_version = "PY2AND3", + tags = [ + "no_pip", + "notsan", + ], + deps = [ + ":dnn_linear_combined", + ":head", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_ops", + "//tensorflow/python:nn", + "//tensorflow/python:platform", + "//tensorflow/python:summary", + "//tensorflow/python/estimator:dnn_testing_utils", + "//tensorflow/python/estimator:export_export", + "//tensorflow/python/estimator:linear_testing_utils", + "//tensorflow/python/estimator:numpy_io", + "//tensorflow/python/estimator:prediction_keys", + "//tensorflow/python/feature_column", + "//third_party/py/numpy", + "@six_archive//:six", + ], +) + py_library( name = "extenders", srcs = [ @@ -169,6 +210,42 @@ py_test( ], ) +py_library( + name = "linear", + srcs = ["python/estimator/linear.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python/estimator", + "//tensorflow/python/estimator:linear", + ], +) + +py_test( + name = "linear_test", + size = "small", + srcs = ["python/estimator/linear_test.py"], + srcs_version = "PY2AND3", + tags = [ + "no_pip", + "notsan", + ], + deps = [ + ":head", + ":linear", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_ops", + "//tensorflow/python:platform", + "//tensorflow/python:summary", + "//tensorflow/python/estimator:export_export", + "//tensorflow/python/estimator:linear_testing_utils", + "//tensorflow/python/estimator:numpy_io", + "//tensorflow/python/estimator:prediction_keys", + "//tensorflow/python/feature_column", + "//third_party/py/numpy", + "@six_archive//:six", + ], +) + py_library( name = "logit_fns", srcs = [ diff --git a/tensorflow/contrib/estimator/__init__.py b/tensorflow/contrib/estimator/__init__.py index cf727264cd..8191e06fae 100644 --- a/tensorflow/contrib/estimator/__init__.py +++ b/tensorflow/contrib/estimator/__init__.py @@ -20,8 +20,10 @@ from __future__ import print_function # pylint: disable=unused-import,line-too-long,wildcard-import from tensorflow.contrib.estimator.python.estimator.dnn import * +from tensorflow.contrib.estimator.python.estimator.dnn_linear_combined import * from tensorflow.contrib.estimator.python.estimator.extenders import * from tensorflow.contrib.estimator.python.estimator.head import * +from tensorflow.contrib.estimator.python.estimator.linear import * from tensorflow.contrib.estimator.python.estimator.logit_fns import * from tensorflow.contrib.estimator.python.estimator.multi_head import * @@ -38,6 +40,8 @@ _allowed_symbols = [ 'multi_label_head', 'regression_head', 'DNNEstimator', + 'DNNLinearCombinedEstimator', + 'LinearEstimator', 'call_logit_fn', 'dnn_logit_fn_builder', 'linear_logit_fn_builder', diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py new file mode 100644 index 0000000000..ccaf1128bf --- /dev/null +++ b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py @@ -0,0 +1,164 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TensorFlow estimator for Linear and DNN joined training models.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.estimator import estimator +from tensorflow.python.estimator.canned import dnn_linear_combined as dnn_linear_combined_lib +from tensorflow.python.ops import nn + + +class DNNLinearCombinedEstimator(estimator.Estimator): + """An estimator for TensorFlow Linear and DNN joined models with custom head. + + Note: This estimator is also known as wide-n-deep. + + Example: + + ```python + numeric_feature = numeric_column(...) + categorical_column_a = categorical_column_with_hash_bucket(...) + categorical_column_b = categorical_column_with_hash_bucket(...) + + categorical_feature_a_x_categorical_feature_b = crossed_column(...) + categorical_feature_a_emb = embedding_column( + categorical_column=categorical_feature_a, ...) + categorical_feature_b_emb = embedding_column( + categorical_column=categorical_feature_b, ...) + + estimator = DNNLinearCombinedEstimator( + head=tf.contrib.estimator.multi_label_head(n_classes=3), + # wide settings + linear_feature_columns=[categorical_feature_a_x_categorical_feature_b], + linear_optimizer=tf.train.FtrlOptimizer(...), + # deep settings + dnn_feature_columns=[ + categorical_feature_a_emb, categorical_feature_b_emb, + numeric_feature], + dnn_hidden_units=[1000, 500, 100], + dnn_optimizer=tf.train.ProximalAdagradOptimizer(...)) + + # To apply L1 and L2 regularization, you can set optimizers as follows: + tf.train.ProximalAdagradOptimizer( + learning_rate=0.1, + l1_regularization_strength=0.001, + l2_regularization_strength=0.001) + # It is same for FtrlOptimizer. + + # Input builders + def input_fn_train: # returns x, y + pass + estimator.train(input_fn=input_fn_train, steps=100) + + def input_fn_eval: # returns x, y + pass + metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10) + def input_fn_predict: # returns x, None + pass + predictions = estimator.predict(input_fn=input_fn_predict) + ``` + + Input of `train` and `evaluate` should have following features, + otherwise there will be a `KeyError`: + + * for each `column` in `dnn_feature_columns` + `linear_feature_columns`: + - if `column` is a `_CategoricalColumn`, a feature with `key=column.name` + whose `value` is a `SparseTensor`. + - if `column` is a `_WeightedCategoricalColumn`, two features: the first + with `key` the id column name, the second with `key` the weight column + name. Both features' `value` must be a `SparseTensor`. + - if `column` is a `_DenseColumn`, a feature with `key=column.name` + whose `value` is a `Tensor`. + + Loss is calculated by using mean squared error. + + @compatibility(eager) + Estimators are not compatible with eager execution. + @end_compatibility + """ + + def __init__(self, + head, + model_dir=None, + linear_feature_columns=None, + linear_optimizer='Ftrl', + dnn_feature_columns=None, + dnn_optimizer='Adagrad', + dnn_hidden_units=None, + dnn_activation_fn=nn.relu, + dnn_dropout=None, + input_layer_partitioner=None, + config=None): + """Initializes a DNNLinearCombinedEstimator instance. + + Args: + head: A `_Head` instance constructed with a method such as + `tf.contrib.estimator.multi_label_head`. + model_dir: Directory to save model parameters, graph and etc. This can + also be used to load checkpoints from the directory into a estimator + to continue training a previously saved model. + linear_feature_columns: An iterable containing all the feature columns + used by linear part of the model. All items in the set must be + instances of classes derived from `FeatureColumn`. + linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to + the linear part of the model. Defaults to FTRL optimizer. + dnn_feature_columns: An iterable containing all the feature columns used + by deep part of the model. All items in the set must be instances of + classes derived from `FeatureColumn`. + dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to + the deep part of the model. Defaults to Adagrad optimizer. + dnn_hidden_units: List of hidden units per layer. All layers are fully + connected. + dnn_activation_fn: Activation function applied to each layer. If None, + will use `tf.nn.relu`. + dnn_dropout: When not None, the probability we will drop out + a given coordinate. + input_layer_partitioner: Partitioner for input layer. Defaults to + `min_max_variable_partitioner` with `min_slice_size` 64 << 20. + config: RunConfig object to configure the runtime settings. + + Raises: + ValueError: If both linear_feature_columns and dnn_features_columns are + empty at the same time. + """ + linear_feature_columns = linear_feature_columns or [] + dnn_feature_columns = dnn_feature_columns or [] + self._feature_columns = ( + list(linear_feature_columns) + list(dnn_feature_columns)) + if not self._feature_columns: + raise ValueError('Either linear_feature_columns or dnn_feature_columns ' + 'must be defined.') + + def _model_fn(features, labels, mode, config): + return dnn_linear_combined_lib._dnn_linear_combined_model_fn( # pylint: disable=protected-access + features=features, + labels=labels, + mode=mode, + head=head, + linear_feature_columns=linear_feature_columns, + linear_optimizer=linear_optimizer, + dnn_feature_columns=dnn_feature_columns, + dnn_optimizer=dnn_optimizer, + dnn_hidden_units=dnn_hidden_units, + dnn_activation_fn=dnn_activation_fn, + dnn_dropout=dnn_dropout, + input_layer_partitioner=input_layer_partitioner, + config=config) + + super(DNNLinearCombinedEstimator, self).__init__( + model_fn=_model_fn, model_dir=model_dir, config=config) diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py new file mode 100644 index 0000000000..b5e4d34dc7 --- /dev/null +++ b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py @@ -0,0 +1,220 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for dnn_linear_combined.py.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import shutil +import tempfile + +import numpy as np +import six + +from tensorflow.contrib.estimator.python.estimator import dnn_linear_combined +from tensorflow.contrib.estimator.python.estimator import head as head_lib +from tensorflow.python.estimator.canned import dnn_testing_utils +from tensorflow.python.estimator.canned import linear_testing_utils +from tensorflow.python.estimator.canned import prediction_keys +from tensorflow.python.estimator.export import export +from tensorflow.python.estimator.inputs import numpy_io +from tensorflow.python.feature_column import feature_column +from tensorflow.python.framework import ops +from tensorflow.python.ops import nn +from tensorflow.python.platform import gfile +from tensorflow.python.platform import test +from tensorflow.python.summary.writer import writer_cache + + +def _dnn_only_estimator_fn( + hidden_units, + feature_columns, + model_dir=None, + label_dimension=1, + weight_column=None, + optimizer='Adagrad', + activation_fn=nn.relu, + dropout=None, + input_layer_partitioner=None, + config=None): + return dnn_linear_combined.DNNLinearCombinedEstimator( + head=head_lib.regression_head( + weight_column=weight_column, label_dimension=label_dimension), + model_dir=model_dir, + dnn_feature_columns=feature_columns, + dnn_optimizer=optimizer, + dnn_hidden_units=hidden_units, + dnn_activation_fn=activation_fn, + dnn_dropout=dropout, + input_layer_partitioner=input_layer_partitioner, + config=config) + + +class DNNOnlyEstimatorEvaluateTest( + dnn_testing_utils.BaseDNNRegressorEvaluateTest, test.TestCase): + + def __init__(self, methodName='runTest'): # pylint: disable=invalid-name + test.TestCase.__init__(self, methodName) + dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__( + self, _dnn_only_estimator_fn) + + +class DNNOnlyEstimatorPredictTest( + dnn_testing_utils.BaseDNNRegressorPredictTest, test.TestCase): + + def __init__(self, methodName='runTest'): # pylint: disable=invalid-name + test.TestCase.__init__(self, methodName) + dnn_testing_utils.BaseDNNRegressorPredictTest.__init__( + self, _dnn_only_estimator_fn) + + +class DNNOnlyEstimatorTrainTest( + dnn_testing_utils.BaseDNNRegressorTrainTest, test.TestCase): + + def __init__(self, methodName='runTest'): # pylint: disable=invalid-name + test.TestCase.__init__(self, methodName) + dnn_testing_utils.BaseDNNRegressorTrainTest.__init__( + self, _dnn_only_estimator_fn) + + +def _linear_only_estimator_fn( + feature_columns, + model_dir=None, + label_dimension=1, + weight_column=None, + optimizer='Ftrl', + config=None, + partitioner=None): + return dnn_linear_combined.DNNLinearCombinedEstimator( + head=head_lib.regression_head( + weight_column=weight_column, label_dimension=label_dimension), + model_dir=model_dir, + linear_feature_columns=feature_columns, + linear_optimizer=optimizer, + input_layer_partitioner=partitioner, + config=config) + + +class LinearOnlyEstimatorEvaluateTest( + linear_testing_utils.BaseLinearRegressorEvaluationTest, test.TestCase): + + def __init__(self, methodName='runTest'): # pylint: disable=invalid-name + test.TestCase.__init__(self, methodName) + linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__( + self, _linear_only_estimator_fn) + + +class LinearOnlyEstimatorPredictTest( + linear_testing_utils.BaseLinearRegressorPredictTest, test.TestCase): + + def __init__(self, methodName='runTest'): # pylint: disable=invalid-name + test.TestCase.__init__(self, methodName) + linear_testing_utils.BaseLinearRegressorPredictTest.__init__( + self, _linear_only_estimator_fn) + + +class LinearOnlyEstimatorTrainTest( + linear_testing_utils.BaseLinearRegressorTrainingTest, test.TestCase): + + def __init__(self, methodName='runTest'): # pylint: disable=invalid-name + test.TestCase.__init__(self, methodName) + linear_testing_utils.BaseLinearRegressorTrainingTest.__init__( + self, _linear_only_estimator_fn) + + +class DNNLinearCombinedEstimatorIntegrationTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + writer_cache.FileWriterCache.clear() + shutil.rmtree(self._model_dir) + + def _test_complete_flow( + self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, + label_dimension, batch_size): + linear_feature_columns = [ + feature_column.numeric_column('x', shape=(input_dimension,))] + dnn_feature_columns = [ + feature_column.numeric_column('x', shape=(input_dimension,))] + feature_columns = linear_feature_columns + dnn_feature_columns + est = dnn_linear_combined.DNNLinearCombinedEstimator( + head=head_lib.regression_head(label_dimension=label_dimension), + linear_feature_columns=linear_feature_columns, + dnn_feature_columns=dnn_feature_columns, + dnn_hidden_units=(2, 2), + model_dir=self._model_dir) + + # TRAIN + num_steps = 10 + est.train(train_input_fn, steps=num_steps) + + # EVALUTE + scores = est.evaluate(eval_input_fn) + self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) + self.assertIn('loss', six.iterkeys(scores)) + + # PREDICT + predictions = np.array([ + x[prediction_keys.PredictionKeys.PREDICTIONS] + for x in est.predict(predict_input_fn) + ]) + self.assertAllEqual((batch_size, label_dimension), predictions.shape) + + # EXPORT + feature_spec = feature_column.make_parse_example_spec(feature_columns) + serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( + feature_spec) + export_dir = est.export_savedmodel(tempfile.mkdtemp(), + serving_input_receiver_fn) + self.assertTrue(gfile.Exists(export_dir)) + + def test_numpy_input_fn(self): + """Tests complete flow with numpy_input_fn.""" + label_dimension = 2 + batch_size = 10 + data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) + data = data.reshape(batch_size, label_dimension) + # learn y = x + train_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, + y=data, + batch_size=batch_size, + num_epochs=None, + shuffle=True) + eval_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, + y=data, + batch_size=batch_size, + shuffle=False) + predict_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, + batch_size=batch_size, + shuffle=False) + + self._test_complete_flow( + train_input_fn=train_input_fn, + eval_input_fn=eval_input_fn, + predict_input_fn=predict_input_fn, + input_dimension=label_dimension, + label_dimension=label_dimension, + batch_size=batch_size) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/estimator/python/estimator/linear.py b/tensorflow/contrib/estimator/python/estimator/linear.py new file mode 100644 index 0000000000..3bf4abe83d --- /dev/null +++ b/tensorflow/contrib/estimator/python/estimator/linear.py @@ -0,0 +1,118 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Linear estimator.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.estimator import estimator +from tensorflow.python.estimator.canned import linear as linear_lib + + +class LinearEstimator(estimator.Estimator): + """An estimator for TensorFlow linear models with user-specified head. + + Example: + + ```python + categorical_column_a = categorical_column_with_hash_bucket(...) + categorical_column_b = categorical_column_with_hash_bucket(...) + + categorical_feature_a_x_categorical_feature_b = crossed_column(...) + + # Estimator using the default optimizer. + estimator = LinearEstimator( + head=tf.contrib.estimator.multi_label_head(n_classes=3), + feature_columns=[categorical_column_a, + categorical_feature_a_x_categorical_feature_b]) + + # Or estimator using the FTRL optimizer with regularization. + estimator = LinearEstimator( + head=tf.contrib.estimator.multi_label_head(n_classes=3), + feature_columns=[categorical_column_a, + categorical_feature_a_x_categorical_feature_b]) + optimizer=tf.train.FtrlOptimizer( + learning_rate=0.1, + l1_regularization_strength=0.001 + )) + + def input_fn_train: # returns x, y (where y represents label's class index). + ... + estimator.train(input_fn=input_fn_train, steps=100) + def input_fn_eval: # returns x, y (where y represents label's class index). + ... + metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10) + def input_fn_predict: # returns x, None + ... + predictions = estimator.predict(input_fn=input_fn_predict) + ``` + + Input of `train` and `evaluate` should have following features, + otherwise there will be a `KeyError`: + + * if `weight_column` is not `None`, a feature with + `key=weight_column` whose value is a `Tensor`. + * for each `column` in `feature_columns`: + - if `column` is a `_CategoricalColumn`, a feature with `key=column.name` + whose `value` is a `SparseTensor`. + - if `column` is a `_WeightedCategoricalColumn`, two features: the first + with `key` the id column name, the second with `key` the weight column + name. Both features' `value` must be a `SparseTensor`. + - if `column` is a `_DenseColumn`, a feature with `key=column.name` + whose `value` is a `Tensor`. + + Loss and predicted output are determined by the specified head. + + @compatibility(eager) + Estimators are not compatible with eager execution. + @end_compatibility + """ + + def __init__(self, + head, + feature_columns, + model_dir=None, + optimizer='Ftrl', + config=None, + partitioner=None): + """Initializes a `LinearEstimator` instance. + + Args: + head: A `_Head` instance constructed with a method such as + `tf.contrib.estimator.multi_label_head`. + feature_columns: An iterable containing all the feature columns used by + the model. All items in the set should be instances of classes derived + from `FeatureColumn`. + model_dir: Directory to save model parameters, graph and etc. This can + also be used to load checkpoints from the directory into a estimator + to continue training a previously saved model. + optimizer: An instance of `tf.Optimizer` used to train the model. Defaults + to FTRL optimizer. + config: `RunConfig` object to configure the runtime settings. + partitioner: Optional. Partitioner for input layer. + """ + def _model_fn(features, labels, mode, config): + return linear_lib._linear_model_fn( # pylint: disable=protected-access + features=features, + labels=labels, + mode=mode, + head=head, + feature_columns=tuple(feature_columns or []), + optimizer=optimizer, + partitioner=partitioner, + config=config) + super(LinearEstimator, self).__init__( + model_fn=_model_fn, model_dir=model_dir, config=config) diff --git a/tensorflow/contrib/estimator/python/estimator/linear_test.py b/tensorflow/contrib/estimator/python/estimator/linear_test.py new file mode 100644 index 0000000000..c63514eb68 --- /dev/null +++ b/tensorflow/contrib/estimator/python/estimator/linear_test.py @@ -0,0 +1,153 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for linear.py.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import shutil +import tempfile + +import numpy as np +import six + +from tensorflow.contrib.estimator.python.estimator import head as head_lib +from tensorflow.contrib.estimator.python.estimator import linear +from tensorflow.python.estimator.canned import linear_testing_utils +from tensorflow.python.estimator.canned import prediction_keys +from tensorflow.python.estimator.export import export +from tensorflow.python.estimator.inputs import numpy_io +from tensorflow.python.feature_column import feature_column +from tensorflow.python.framework import ops +from tensorflow.python.platform import gfile +from tensorflow.python.platform import test +from tensorflow.python.summary.writer import writer_cache + + +def _linear_estimator_fn( + weight_column=None, label_dimension=1, *args, **kwargs): + """Returns a LinearEstimator that uses regression_head.""" + return linear.LinearEstimator( + head=head_lib.regression_head( + weight_column=weight_column, label_dimension=label_dimension), + *args, **kwargs) + + +class LinearEstimatorEvaluateTest( + linear_testing_utils.BaseLinearRegressorEvaluationTest, test.TestCase): + + def __init__(self, methodName='runTest'): # pylint: disable=invalid-name + test.TestCase.__init__(self, methodName) + linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__( + self, _linear_estimator_fn) + + +class LinearEstimatorPredictTest( + linear_testing_utils.BaseLinearRegressorPredictTest, test.TestCase): + + def __init__(self, methodName='runTest'): # pylint: disable=invalid-name + test.TestCase.__init__(self, methodName) + linear_testing_utils.BaseLinearRegressorPredictTest.__init__( + self, _linear_estimator_fn) + + +class LinearEstimatorTrainTest( + linear_testing_utils.BaseLinearRegressorTrainingTest, test.TestCase): + + def __init__(self, methodName='runTest'): # pylint: disable=invalid-name + test.TestCase.__init__(self, methodName) + linear_testing_utils.BaseLinearRegressorTrainingTest.__init__( + self, _linear_estimator_fn) + + +class LinearEstimatorIntegrationTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + writer_cache.FileWriterCache.clear() + shutil.rmtree(self._model_dir) + + def _test_complete_flow( + self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, + label_dimension, batch_size): + feature_columns = [ + feature_column.numeric_column('x', shape=(input_dimension,))] + est = linear.LinearEstimator( + head=head_lib.regression_head(label_dimension=label_dimension), + feature_columns=feature_columns, + model_dir=self._model_dir) + + # TRAIN + num_steps = 10 + est.train(train_input_fn, steps=num_steps) + + # EVALUTE + scores = est.evaluate(eval_input_fn) + self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) + self.assertIn('loss', six.iterkeys(scores)) + + # PREDICT + predictions = np.array([ + x[prediction_keys.PredictionKeys.PREDICTIONS] + for x in est.predict(predict_input_fn) + ]) + self.assertAllEqual((batch_size, label_dimension), predictions.shape) + + # EXPORT + feature_spec = feature_column.make_parse_example_spec(feature_columns) + serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( + feature_spec) + export_dir = est.export_savedmodel(tempfile.mkdtemp(), + serving_input_receiver_fn) + self.assertTrue(gfile.Exists(export_dir)) + + def test_numpy_input_fn(self): + """Tests complete flow with numpy_input_fn.""" + label_dimension = 2 + batch_size = 10 + data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) + data = data.reshape(batch_size, label_dimension) + # learn y = x + train_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, + y=data, + batch_size=batch_size, + num_epochs=None, + shuffle=True) + eval_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, + y=data, + batch_size=batch_size, + shuffle=False) + predict_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, + batch_size=batch_size, + shuffle=False) + + self._test_complete_flow( + train_input_fn=train_input_fn, + eval_input_fn=eval_input_fn, + predict_input_fn=predict_input_fn, + input_dimension=label_dimension, + label_dimension=label_dimension, + batch_size=batch_size) + + +if __name__ == '__main__': + test.main() -- GitLab From 6c0dd242e76f494dbab42b5b7621d2513168c5bb Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Tue, 21 Nov 2017 16:58:23 -0800 Subject: [PATCH 0725/1801] Fix a non-critical problem in TFLite README Currently it uses `bazel build` to build non-optimized binary, then `bazel run` to build and run optimized binary. It doubles the required build time to try Toco converter. PiperOrigin-RevId: 176581770 --- tensorflow/contrib/lite/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md index b5df986686..385ccf4680 100644 --- a/tensorflow/contrib/lite/README.md +++ b/tensorflow/contrib/lite/README.md @@ -154,7 +154,7 @@ Here is a sample command line to convert the frozen Graphdef to '.lite' format f ``` bazel build tensorflow/contrib/lite/toco:toco -bazel run --config=opt tensorflow/contrib/lite/toco:toco -- \ +bazel-bin/tensorflow/contrib/lite/toco/toco -- \ --input_file=(pwd)/mobilenet_v1_1.0_224/frozen_graph.pb \ --input_format=TENSORFLOW_GRAPHDEF --output_format=TFLITE \ --output_file=/tmp/mobilenet_v1_1.0_224.lite --inference_type=FLOAT \ -- GitLab From 88fe67d1432124cb74df8318603e4a82e3763fb3 Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Tue, 21 Nov 2017 17:01:33 -0800 Subject: [PATCH 0726/1801] Make FlapMapDataset saveable. Add test to verify restoring iterator in an empty graph works to the testing base class. Fix a bug in GraphDefBuilderWrapper::AddFunction where we were not adding functions referenced by the attrs of the NodeDefs in the FunctionDef. FlatMapDatasetSerializationTest.testMapThenFlatMap makes sure this works now. PiperOrigin-RevId: 176582131 --- .../contrib/data/python/kernel_tests/BUILD | 6 + .../kernel_tests/flat_map_dataset_op_test.py | 79 ++++++++++ tensorflow/core/kernels/dataset.h | 42 +++-- .../core/kernels/flat_map_dataset_op.cc | 147 ++++++++++++++++-- 4 files changed, 254 insertions(+), 20 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 3b5f2db322..1923c0586a 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -159,13 +159,19 @@ py_test( srcs = ["flat_map_dataset_op_test.py"], srcs_version = "PY2AND3", deps = [ + ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", "//tensorflow/python:dtypes", "//tensorflow/python:errors", + "//tensorflow/python:function", + "//tensorflow/python:math_ops", + "//tensorflow/python:random_ops", "//tensorflow/python:session", "//tensorflow/python:training", + "//tensorflow/python:variable_scope", "//third_party/py/numpy", ], ) diff --git a/tensorflow/contrib/data/python/kernel_tests/flat_map_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/flat_map_dataset_op_test.py index c950e4857e..ddb4bc34f3 100644 --- a/tensorflow/contrib/data/python/kernel_tests/flat_map_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/flat_map_dataset_op_test.py @@ -21,11 +21,18 @@ import random import numpy as np +from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base from tensorflow.contrib.data.python.ops import dataset_ops from tensorflow.python.client import session +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import function from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops from tensorflow.python.ops import sparse_ops +from tensorflow.python.ops import variable_scope from tensorflow.python.platform import test from tensorflow.python.training import server_lib @@ -147,5 +154,77 @@ class FlatMapDatasetTest(test.TestCase): sess.run(get_next) +class FlatMapDatasetSerializationTest( + dataset_serialization_test_base.DatasetSerializationTestBase): + + def testCore(self): + # Complicated way of saying range(start, start+25). + def build_ds(start): + + def map_fn(x): + return dataset_ops.Dataset.range(x, x + 5) + + return dataset_ops.Dataset.range(start, start + 5 * 5, 5).flat_map(map_fn) + + self.run_core_tests(lambda: build_ds(0), lambda: build_ds(10), 25) + + def testMapThenFlatMap(self): + + def build_ds(): + + def flat_map_fn(_): + + def map_fn(y): + return 10 * math_ops.to_int32(y) + + return dataset_ops.Dataset.range(100).map(map_fn) + + return dataset_ops.Dataset.range(5).flat_map(flat_map_fn) + + self.run_core_tests(build_ds, None, 500) + + def testCaptureDefunInMapFn(self): + + def build_ds(): + + def map_fn(x): + + @function.Defun(dtypes.int64) + def defun_fn(x): + return constant_op.constant(1000) + math_ops.to_int32(x) + + return dataset_ops.Dataset.from_tensor_slices([defun_fn(x)]) + + return dataset_ops.Dataset.range(100).flat_map(map_fn) + + self.run_core_tests(build_ds, None, 100) + + def testDisallowVariableCapture(self): + + def build_ds(): + test_var = variable_scope.get_variable( + name="test_var", shape=(), use_resource=True) + return dataset_ops.Dataset.range(5).flat_map( + lambda _: dataset_ops.Dataset.from_tensor_slices([test_var])) + + self.verify_error_on_save(build_ds, 5, errors.InvalidArgumentError) + + def testDisallowCapturingStatefulOps(self): + + def build_ds(): + + def flat_map_fn(_): + + def map_fn(x): + return random_ops.random_uniform( + (), 0, 10, dtype=dtypes.int32) * math_ops.to_int32(x) + + return dataset_ops.Dataset.range(100).map(map_fn) + + return dataset_ops.Dataset.range(5).flat_map(flat_map_fn) + + self.verify_error_on_save(build_ds, 500, errors.InvalidArgumentError) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/core/kernels/dataset.h b/tensorflow/core/kernels/dataset.h index c266bc07c1..18b57ec97a 100644 --- a/tensorflow/core/kernels/dataset.h +++ b/tensorflow/core/kernels/dataset.h @@ -238,19 +238,16 @@ class GraphDefBuilderWrapper { if (op_reg_data->is_function_op) { TF_RETURN_IF_ERROR(AddFunction(ctx, op_reg_data->op_def.name())); } + // Recursively add functions in attrs of this NodeDef. + for (const auto& pair : node_def.attr()) { + TF_RETURN_IF_ERROR(AddAttrFunctions(pair.second, ctx)); + } } // Recursively add functions in attrs of function_name. for (auto iter = f_def->attr().begin(); iter != f_def->attr().end(); iter++) { - const AttrValue& attr_value = iter->second; - if (attr_value.has_func()) { - TF_RETURN_IF_ERROR(AddFunction(ctx, attr_value.func().name())); - } else if (attr_value.has_list()) { - for (const NameAttrList& name_attr_list : attr_value.list().func()) { - TF_RETURN_IF_ERROR(AddFunction(ctx, name_attr_list.name())); - } - } + TF_RETURN_IF_ERROR(AddAttrFunctions(iter->second, ctx)); } return Status::OK(); } @@ -279,6 +276,13 @@ class GraphDefBuilderWrapper { for (const NodeDef& node_def : function_def->node_def()) { const OpDef* op_def; TF_RETURN_IF_ERROR(lib_def->LookUpOpDef(node_def.op(), &op_def)); + // TODO(b/65524810): Hack to allow functions to capture Dataset op + // nodes needed for FlatMap. Currently, source datasets nodes have been + // marked stateful to avoid constant folding since we do not have a + // good way of serializing them. + if (IsOpWhitelisted(op_def)) { + continue; + } if (op_def->is_stateful()) { return errors::InvalidArgument( "Op[name: ", node_def.name(), ", type: ", node_def.op(), "] ", @@ -289,12 +293,21 @@ class GraphDefBuilderWrapper { return Status::OK(); } - bool HasAttr(const string& op_type_name, const string& attr_name) { + bool IsOpWhitelisted(const OpDef* op_def) const { + return StringPiece(op_def->name()).ends_with("Dataset") && + HasAttr(op_def, "output_shapes"); + } + + bool HasAttr(const string& op_type_name, const string& attr_name) const { const OpDef* op_def = nullptr; Status s = b_->opts().op_registry()->LookUpOpDef(op_type_name, &op_def); if (!s.ok() || op_def == nullptr) { return false; } + return HasAttr(op_def, attr_name); + } + + bool HasAttr(const OpDef* op_def, const string& attr_name) const { for (auto attr : op_def->attr()) { if (attr.name() == attr_name) { return true; @@ -303,6 +316,17 @@ class GraphDefBuilderWrapper { return false; } + Status AddAttrFunctions(const AttrValue& attr_value, OpKernelContext* ctx) { + if (attr_value.has_func()) { + TF_RETURN_IF_ERROR(AddFunction(ctx, attr_value.func().name())); + } else if (attr_value.has_list()) { + for (const NameAttrList& name_attr_list : attr_value.list().func()) { + TF_RETURN_IF_ERROR(AddFunction(ctx, name_attr_list.name())); + } + } + return Status::OK(); + } + GraphDefBuilder* b_; }; diff --git a/tensorflow/core/kernels/flat_map_dataset_op.cc b/tensorflow/core/kernels/flat_map_dataset_op.cc index e62a43e94c..ac1689e5bf 100644 --- a/tensorflow/core/kernels/flat_map_dataset_op.cc +++ b/tensorflow/core/kernels/flat_map_dataset_op.cc @@ -54,18 +54,21 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel { std::move(other_arguments), &captured_func)); - *output = new Dataset(input, std::move(captured_func), output_types_, - output_shapes_); + *output = new Dataset(ctx, input, func_, std::move(captured_func), + output_types_, output_shapes_); } private: - class Dataset : public DatasetBase { + class Dataset : public GraphDatasetBase { public: - Dataset(const DatasetBase* input, + Dataset(OpKernelContext* ctx, const DatasetBase* input, + const NameAttrList& func, std::unique_ptr captured_func, const DataTypeVector& output_types, const std::vector& output_shapes) - : input_(input), + : GraphDatasetBase(ctx), + input_(input), + func_(func), captured_func_(std::move(captured_func)), output_types_(output_types), output_shapes_(output_shapes) { @@ -90,6 +93,37 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel { string DebugString() override { return "FlatMapDatasetOp::Dataset"; } + protected: + Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, + Node** output) const override { + TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name())); + Node* input_graph_node = nullptr; + TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node)); + + DataTypeVector other_arguments_types; + other_arguments_types.reserve(captured_func_->captured_inputs().size()); + std::vector other_arguments; + other_arguments.reserve(captured_func_->captured_inputs().size()); + for (const Tensor& t : captured_func_->captured_inputs()) { + Node* node; + TF_RETURN_IF_ERROR(b->AddTensor(t, &node)); + other_arguments.emplace_back(node); + other_arguments_types.emplace_back(t.dtype()); + } + AttrValue f; + b->BuildAttrValue(func_, &f); + AttrValue other_arguments_types_attr; + b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr); + + TF_RETURN_IF_ERROR(b->AddDataset( + this, {std::make_pair(0, input_graph_node)}, // Single tensor inputs. + {std::make_pair(1, other_arguments)}, // Tensor list inputs. + {std::make_pair("f", f), + std::make_pair("Targuments", other_arguments_types_attr)}, // Attrs + output)); + return Status::OK(); + } + private: class Iterator : public DatasetIterator { public: @@ -102,6 +136,10 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel { bool* end_of_sequence) override { mutex_lock l(mu_); do { + if (!input_impl_) { + *end_of_sequence = true; + return Status::OK(); + } if (current_element_iterator_) { // We are currently precessing a mapped element, so try to get the // next subelement. @@ -120,26 +158,113 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel { } // Get the next element from the input dataset. - std::vector args; - TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &args, end_of_sequence)); + captured_func_inputs_.clear(); + TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &captured_func_inputs_, + end_of_sequence)); if (*end_of_sequence) { + input_impl_.reset(); return Status::OK(); } - TF_RETURN_IF_ERROR(dataset::MakeIteratorFromInputElement( - ctx, args, element_index_++, dataset()->captured_func_.get(), - prefix(), ¤t_element_iterator_)); + TF_RETURN_IF_ERROR(BuildCurrentElementIteratorLocked(ctx)); } while (true); } + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + if (input_impl_) { + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("element_index"), element_index_)); + if (current_element_iterator_) { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("captured_func_inputs_size"), + captured_func_inputs_.size())); + for (int i = 0; i < captured_func_inputs_.size(); i++) { + TF_RETURN_IF_ERROR(writer->WriteTensor( + full_name(strings::StrCat("captured_func_inputs[", i, "]")), + captured_func_inputs_[i])); + } + TF_RETURN_IF_ERROR(SaveParent(writer, current_element_iterator_)); + } else { + TF_RETURN_IF_ERROR(writer->WriteScalar( + full_name("current_element_iterator_uninitialized"), "")); + } + } else { + TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("exhausted"), "")); + } + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + input_impl_.reset(); + element_index_ = 0; + current_element_iterator_.reset(); + captured_func_inputs_.clear(); + if (!reader->Contains(full_name("exhausted"))) { + input_impl_ = dataset()->input_->MakeIterator(prefix()); + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + { + int64 temp; + TF_RETURN_IF_ERROR( + reader->ReadScalar(full_name("element_index"), &temp)); + element_index_ = temp; + } + if (!reader->Contains( + full_name("current_element_iterator_uninitialized"))) { + size_t captured_func_inputs_size; + { + int64 temp; + TF_RETURN_IF_ERROR(reader->ReadScalar( + full_name("captured_func_inputs_size"), &temp)); + captured_func_inputs_size = static_cast(temp); + } + captured_func_inputs_.reserve(captured_func_inputs_size); + for (int i = 0; i < captured_func_inputs_size; i++) { + captured_func_inputs_.emplace_back(); + TF_RETURN_IF_ERROR(reader->ReadTensor( + full_name(strings::StrCat("captured_func_inputs[", i, "]")), + &captured_func_inputs_.back())); + } + element_index_--; + TF_RETURN_IF_ERROR(BuildCurrentElementIteratorLocked(ctx)); + TF_RETURN_IF_ERROR( + RestoreParent(ctx, reader, current_element_iterator_)); + } + } + return Status::OK(); + } + private: + Status BuildCurrentElementIteratorLocked(IteratorContext* ctx) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + return dataset::MakeIteratorFromInputElement( + ctx, captured_func_inputs_, element_index_++, + dataset()->captured_func_.get(), prefix(), + ¤t_element_iterator_); + } + + Status BuildCurrentElementIteratorLocked(OpKernelContext* ctx) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + IteratorContext::Params params; + params.env = ctx->env(); + params.runner = *(ctx->runner()); + IteratorContext iter_ctx(std::move(params)); + return BuildCurrentElementIteratorLocked(&iter_ctx); + } + mutex mu_; size_t element_index_ GUARDED_BY(mu_) = 0; - const std::unique_ptr input_impl_ GUARDED_BY(mu_); + std::unique_ptr input_impl_ GUARDED_BY(mu_); std::unique_ptr current_element_iterator_ GUARDED_BY(mu_); + std::vector captured_func_inputs_ GUARDED_BY(mu_); }; const DatasetBase* const input_; + const NameAttrList func_; const std::unique_ptr captured_func_; const DataTypeVector output_types_; const std::vector output_shapes_; -- GitLab From c518d35b9077bd193321f8b66dfb958ce9ab61cd Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Tue, 21 Nov 2017 17:02:29 -0800 Subject: [PATCH 0727/1801] [XLA] Enable explicit broadcast for ternary operations. Also explicitly broadcast constant 1 in algsimp for pow(x, -1) => 1/x transformation, so that: - we can avoid implicit broadcast which we are trying to eliminate at HLO level. - interpreter, which does not support implicit broadcast, now passes the PowSpecialF32 test case in array_elementwise_ops_test which generates a divide(1.F32[], param.F[4]) instruction that requires implicit broadcast. PiperOrigin-RevId: 176582286 --- .../xla/service/algebraic_simplifier.cc | 8 +++++++- .../xla/service/algebraic_simplifier_test.cc | 6 ++++-- .../compiler/xla/service/user_computation.cc | 19 +++++++++++++++++++ tensorflow/compiler/xla/tests/BUILD | 1 + 4 files changed, 31 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 1764f7f3dc..5dcc1318c9 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -1108,9 +1108,15 @@ Status AlgebraicSimplifierVisitor::HandlePower(HloInstruction* power) { if (IsAll(rhs, -1)) { auto* one = computation_->AddInstruction(HloInstruction::CreateConstant( Literal::One(rhs->shape().element_type()).CloneToUnique())); + + // Explicitly broadcast scalar 1 to the output shape, to avoid implicit + // broadcast in divide HLO as we are trying to eliminate implicit + // broadcasting at HLO level. + auto* broadcast_one = computation_->AddInstruction( + HloInstruction::CreateBroadcast(power->shape(), one, {})); return ReplaceWithNewInstruction( power, HloInstruction::CreateBinary(power->shape(), HloOpcode::kDivide, - one, lhs)); + broadcast_one, lhs)); } return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 620f0a54fa..097f30be32 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -761,8 +761,10 @@ TEST_F(AlgebraicSimplifierTest, PowNegative1) { ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); HloInstruction* root = computation->root_instruction(); - EXPECT_THAT(root, op::Divide(op::Constant(), param0)); - EXPECT_EQ(root->operand(0)->literal().GetFirstElement(), 1); + EXPECT_THAT(root, op::Divide(op::Broadcast(), param0)); + EXPECT_EQ(root->operand(0)->opcode(), HloOpcode::kBroadcast); + EXPECT_EQ(root->operand(0)->operand(0)->literal().GetFirstElement(), + 1); } TEST_F(AlgebraicSimplifierTest, ReshapeBroadcast) { diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc index 8f63c92e5b..b449b4f288 100644 --- a/tensorflow/compiler/xla/service/user_computation.cc +++ b/tensorflow/compiler/xla/service/user_computation.cc @@ -2978,6 +2978,25 @@ void ComputationLowerer::Visit( HloInstruction* rhs = lookup_instruction(ternary_op_request.rhs()); HloInstruction* ehs = lookup_instruction(ternary_op_request.ehs()); auto hlo_opcode = TernaryOperationToHloOpcode(ternary_op_request.triop()); + + if (debug_options_.xla_eliminate_hlo_implicit_broadcast()) { + if (!ShapeUtil::SameDimensions(request.output_shape(), lhs->shape())) { + // lhs side is being implicitly broadcast. Change to explicit. + lhs = + ImplicitBroadcastToExplicitBroadcast(lhs, request.output_shape()); + } + + if (!ShapeUtil::SameDimensions(request.output_shape(), rhs->shape())) { + rhs = + ImplicitBroadcastToExplicitBroadcast(rhs, request.output_shape()); + } + + if (!ShapeUtil::SameDimensions(request.output_shape(), ehs->shape())) { + ehs = + ImplicitBroadcastToExplicitBroadcast(ehs, request.output_shape()); + } + } + hlo_instruction = add_instruction(HloInstruction::CreateTernary( request.output_shape(), hlo_opcode, lhs, rhs, ehs)); break; diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 2e220e7293..aa1804cc21 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -511,6 +511,7 @@ xla_test( name = "array_elementwise_ops_test", srcs = ["array_elementwise_ops_test.cc"], shard_count = 25, + tags = ["enable_for_xla_interpreter"], deps = [ "//tensorflow/compiler/xla:array2d", "//tensorflow/compiler/xla:array3d", -- GitLab From ecfd154cacf9d886b02b91bc7f518e75e5f9c6b9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Nov 2017 17:22:18 -0800 Subject: [PATCH 0728/1801] Changed StringPiece::Hasher to StringPieceHasher in various places. This will allow the Hasher alias to be removed from StringPiece. PiperOrigin-RevId: 176584316 --- tensorflow/contrib/tensorboard/db/summary_db_writer.cc | 2 +- tensorflow/core/lib/core/stringpiece_test.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc index 18f0f1e97a..37a32acb1e 100644 --- a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc +++ b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc @@ -278,7 +278,7 @@ class GraphSaver { GraphDef* graph_; int64 graph_id_; std::vector name_copies_; - std::unordered_map name_to_node_id_; + std::unordered_map name_to_node_id_; }; class SummaryDbWriter : public SummaryWriterInterface { diff --git a/tensorflow/core/lib/core/stringpiece_test.cc b/tensorflow/core/lib/core/stringpiece_test.cc index 11554554e8..a1d335c4e1 100644 --- a/tensorflow/core/lib/core/stringpiece_test.cc +++ b/tensorflow/core/lib/core/stringpiece_test.cc @@ -95,7 +95,7 @@ TEST(StringPieceHasher, HashMap) { StringPiece p2(s2); StringPiece p3(s3); - std::unordered_map map; + std::unordered_map map; map.insert(std::make_pair(p1, 0)); map.insert(std::make_pair(p2, 1)); -- GitLab From 6ad9e45abf68f62caf7a9fe7adb7ef30da5ea6ca Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Tue, 21 Nov 2017 17:22:49 -0800 Subject: [PATCH 0729/1801] Support depthwise conv ops. PiperOrigin-RevId: 176584368 --- tensorflow/core/grappler/op_types.cc | 15 +++++++ tensorflow/core/grappler/op_types.h | 3 ++ .../grappler/optimizers/layout_optimizer.cc | 35 ++++++++++------ .../python/grappler/layout_optimizer_test.py | 40 ++++++++++++++----- 4 files changed, 72 insertions(+), 21 deletions(-) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index 3a39045a4a..1b23a4caba 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -69,6 +69,21 @@ bool IsConv2DBackpropInput(const NodeDef& node) { return op == "Conv2DBackpropInput"; } +bool IsDepthwiseConv2dNative(const NodeDef& node) { + const auto op = node.op(); + return op == "DepthwiseConv2dNative"; +} + +bool IsDepthwiseConv2dNativeBackpropFilter(const NodeDef& node) { + const auto op = node.op(); + return op == "DepthwiseConv2dNativeBackpropFilter"; +} + +bool IsDepthwiseConv2dNativeBackpropInput(const NodeDef& node) { + const auto op = node.op(); + return op == "DepthwiseConv2dNativeBackpropInput"; +} + bool IsDequeueOp(const NodeDef& node) { const auto& op = node.op(); return op == "QueueDequeueManyV2" || op == "QueueDequeueMany" || diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index b7a55f3f21..85260efa93 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -31,6 +31,9 @@ bool IsConstant(const NodeDef& node); bool IsConv2D(const NodeDef& node); bool IsConv2DBackpropFilter(const NodeDef& node); bool IsConv2DBackpropInput(const NodeDef& node); +bool IsDepthwiseConv2dNative(const NodeDef& node); +bool IsDepthwiseConv2dNativeBackpropFilter(const NodeDef& node); +bool IsDepthwiseConv2dNativeBackpropInput(const NodeDef& node); bool IsDequeueOp(const NodeDef& node); bool IsEnter(const NodeDef& node); bool IsExit(const NodeDef& node); diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc index aaa1b7a316..b00e1cb011 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc @@ -45,18 +45,22 @@ const char kReshapeConst[] = "LayoutOptimizerReshapeConst"; const char kReductionConst[] = "LayoutOptimizerReductionConst"; std::set GetOpsFormatSupported() { - std::set ops_format_supported = {"AvgPool", - "AvgPoolGrad", - "Conv2D", - "Conv2DBackpropFilter", - "Conv2DBackpropInput", - "BiasAdd", - "BiasAddGrad", - "FusedBatchNorm", - "FusedBatchNormGrad", - "FusedConv2DBiasActivation", - "MaxPool", - "MaxPoolGrad"}; + std::set ops_format_supported = { + "AvgPool", + "AvgPoolGrad", + "Conv2D", + "Conv2DBackpropFilter", + "Conv2DBackpropInput", + "BiasAdd", + "BiasAddGrad", + "DepthwiseConv2dNative", + "DepthwiseConv2dNativeBackpropInput", + "DepthwiseConv2dNativeBackpropFilter", + "FusedBatchNorm", + "FusedBatchNormGrad", + "FusedConv2DBiasActivation", + "MaxPool", + "MaxPoolGrad"}; return ops_format_supported; } @@ -1289,6 +1293,13 @@ class DataLayoutOptimizer : GraphProcessor { } else if (IsConv2DBackpropInput(*node)) { node_processor.reset( new Conv2DBackpropInputProcessor(opt_cxt, config_.no_gemm)); + } else if (IsDepthwiseConv2dNative(*node)) { + node_processor.reset(new Conv2DProcessor(opt_cxt, true)); + } else if (IsDepthwiseConv2dNativeBackpropFilter(*node)) { + node_processor.reset( + new Conv2DBackpropFilterProcessor(opt_cxt, true)); + } else if (IsDepthwiseConv2dNativeBackpropInput(*node)) { + node_processor.reset(new Conv2DBackpropInputProcessor(opt_cxt, true)); } else if (IsFusedBatchNormGradV1(*node)) { node_processor.reset(new FusedBatchNormGradProcessor(opt_cxt)); } else if (IsMaxPoolGradV1(*node)) { diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py index 350c8434ce..626e0502cb 100644 --- a/tensorflow/python/grappler/layout_optimizer_test.py +++ b/tensorflow/python/grappler/layout_optimizer_test.py @@ -102,11 +102,12 @@ def _get_config(layout_optimizer=True): return config -def _simple_metagraph(): +def _simple_metagraph(depthwise=False): random_seed.set_random_seed(0) x = variables.Variable(random_ops.truncated_normal([1, 200, 200, 3], seed=0)) - y = conv_layers.conv2d(x, 32, [3, 3]) - z = conv_layers.conv2d(y, 32, [3, 3]) + conv = conv_layers.separable_conv2d if depthwise else conv_layers.conv2d + y = conv(x, 32, [3, 3]) + z = conv(y, 32, [3, 3]) optimizer = gradient_descent.GradientDescentOptimizer(1e-4) loss = math_ops.reduce_mean(z) train_op = optimizer.minimize(loss) @@ -116,6 +117,15 @@ def _simple_metagraph(): return meta_graph +def _get_cluster(): + named_device = device_properties_pb2.NamedDevice() + named_device.name = '/GPU:0' + named_device.properties.type = 'GPU' + named_device.properties.environment['architecture'] = '4' + cluster = gcluster.Cluster(devices=[named_device]) + return cluster + + class LayoutOptimizerTest(test.TestCase): """Tests the Grappler layout optimizer.""" @@ -202,13 +212,8 @@ class LayoutOptimizerTest(test.TestCase): meta_graph = _simple_metagraph() rewrite_options = rewriter_config_pb2.RewriterConfig( layout_optimizer=rewriter_config_pb2.RewriterConfig.ON) - named_device = device_properties_pb2.NamedDevice() - named_device.name = '/GPU:0' - named_device.properties.type = 'GPU' - named_device.properties.environment['architecture'] = '4' - cluster = gcluster.Cluster(devices=[named_device]) optimized_graph = tf_optimizer.OptimizeGraph( - rewrite_options, meta_graph, cluster=cluster) + rewrite_options, meta_graph, cluster=_get_cluster()) found = 0 for node in optimized_graph.node: @@ -217,6 +222,23 @@ class LayoutOptimizerTest(test.TestCase): self.assertEqual(node.attr['data_format'].s, 'NCHW') self.assertEqual(found, 5) + def testDepthwise(self): + meta_graph = _simple_metagraph(depthwise=True) + rewrite_options = rewriter_config_pb2.RewriterConfig( + layout_optimizer=rewriter_config_pb2.RewriterConfig.ON) + optimized_graph = tf_optimizer.OptimizeGraph( + rewrite_options, meta_graph, cluster=_get_cluster()) + + found = 0 + for node in optimized_graph.node: + if node.op in [ + 'DepthwiseConv2dNative', 'DepthwiseConv2dNativeBackpropFilter', + 'DepthwiseConv2dNativeBackpropInput' + ]: + found += 1 + self.assertEqual(node.attr['data_format'].s, 'NCHW') + self.assertEqual(found, 6) + def testCheckpointCompatibility(self): if not test.is_gpu_available(cuda_only=True): self.skipTest('GPU required') -- GitLab From f1c2ed214dc470ef22ecd7a7c977f783c533e977 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Tue, 21 Nov 2017 18:16:29 -0800 Subject: [PATCH 0730/1801] Test combining tensors from different graphs PiperOrigin-RevId: 176589756 --- tensorflow/python/framework/ops_test.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 7ecdea8275..a4780fdc05 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -577,6 +577,16 @@ class OperationTest(test_util.TensorFlowTestCase): self.assertEqual(len(z.op.op_def.input_arg), 2) self.assertEqual(len(z.op.op_def.output_arg), 1) + def testInputFromDifferentGraphError(self): + g_0 = ops.Graph() + g_1 = ops.Graph() + with g_0.as_default(): + x = constant_op.constant(1) + with g_1.as_default(): + y = constant_op.constant(2) + with self.assertRaisesRegexp(ValueError, "must be from the same graph"): + y * x # pylint: disable=pointless-statement + @test_util.with_c_api class CreateOpTest(test_util.TensorFlowTestCase): -- GitLab From 59998d9150271e6e051a2ca638fb6041f8018224 Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Tue, 21 Nov 2017 18:53:53 -0800 Subject: [PATCH 0731/1801] Preserve fetch nodes. PiperOrigin-RevId: 176594442 --- tensorflow/core/grappler/grappler_item.cc | 2 + .../grappler/optimizers/layout_optimizer.cc | 73 ++++++++++++------- .../grappler/optimizers/layout_optimizer.h | 1 + .../optimizers/layout_optimizer_test.cc | 15 ++++ 4 files changed, 64 insertions(+), 27 deletions(-) diff --git a/tensorflow/core/grappler/grappler_item.cc b/tensorflow/core/grappler/grappler_item.cc index 844a1fa328..149f6fc735 100644 --- a/tensorflow/core/grappler/grappler_item.cc +++ b/tensorflow/core/grappler/grappler_item.cc @@ -72,9 +72,11 @@ std::vector GrapplerItem::MainVariables() const { std::unordered_set GrapplerItem::NodesToPreserve() const { std::unordered_set result; for (const string& f : fetch) { + VLOG(1) << "Add fetch " << f; result.insert(NodeName(f)); } for (const auto& f : feed) { + VLOG(1) << "Add feed " << f.first; result.insert(NodeName(f.first)); } for (const auto& node : init_ops) { diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc index b00e1cb011..89ebd8e98f 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc @@ -109,8 +109,11 @@ bool IsMaxPoolGradV1(const NodeDef& node) { class GraphProcessor { public: - GraphProcessor(GraphDef* graph, NodeMap* node_map) - : graph_(graph), node_map_(node_map) {} + GraphProcessor(GraphDef* graph, NodeMap* node_map, + const std::unordered_set& nodes_to_preserve) + : graph_(graph), + node_map_(node_map), + nodes_to_preserve_(nodes_to_preserve) {} protected: NodeDef* AddNodePermConst(const string& name, const string& device, @@ -174,27 +177,30 @@ class GraphProcessor { GraphDef* graph_; NodeMap* node_map_; - - private: + const std::unordered_set& nodes_to_preserve_; }; struct OptimizeContext { OptimizeContext(GraphDef* graph, NodeDef* node, NodeMap* node_map, + const std::unordered_set& nodes_to_preserve, bool is_in_frame) : graph(graph), node(node), node_map(node_map), + nodes_to_preserve(nodes_to_preserve), is_in_frame(is_in_frame) {} GraphDef* graph; NodeDef* node; NodeMap* node_map; + const std::unordered_set& nodes_to_preserve; bool is_in_frame; }; class NodeProcessor : public GraphProcessor { public: explicit NodeProcessor(const OptimizeContext& opt_cxt) - : GraphProcessor(opt_cxt.graph, opt_cxt.node_map), + : GraphProcessor(opt_cxt.graph, opt_cxt.node_map, + opt_cxt.nodes_to_preserve), node_(opt_cxt.node), is_in_frame_(opt_cxt.is_in_frame) {} virtual ~NodeProcessor() {} @@ -246,8 +252,12 @@ class NodeProcessor : public GraphProcessor { return Status::OK(); } + bool MustPreserve() const { + return nodes_to_preserve_.find(node_->name()) != nodes_to_preserve_.end(); + } + virtual bool ShouldProcess() const { - return IsNHWC() && IsDimsFour(*node_) && HasOutputs(); + return !MustPreserve() && IsNHWC() && IsDimsFour(*node_) && HasOutputs(); } void UpdateAttrDataFormat() { @@ -523,6 +533,9 @@ class BiasAddGradProcessor : public NodeProcessor { protected: bool ShouldProcess() const override { + if (MustPreserve()) { + return false; + } auto input = node_map_->GetNode(node_->input(0)); if (input) { if ((IsNHWC() && IsDimsFour(*input)) || IsNodeNCHWToNHWC(input->name())) { @@ -542,7 +555,7 @@ class Conv2DProcessor : public NodeProcessor { protected: bool ShouldProcess() const override { - return IsNHWC() && IsDimsFour(*node_) && HasOutputs() && + return !MustPreserve() && IsNHWC() && IsDimsFour(*node_) && HasOutputs() && (!IsGemmUsed() || no_gemm_); } @@ -679,7 +692,8 @@ class AgnosticNodeProcessor : public NodeProcessor { protected: bool ShouldProcess() const override { - return IsDimsFour(*node_) && HasOutputs() && IsNodeAfterNCHWToNHWC(); + return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() && + IsNodeAfterNCHWToNHWC(); } bool IsNodeAfterNCHWToNHWC() const { @@ -729,7 +743,8 @@ class BinaryOpProcessor : public AgnosticNodeProcessor { protected: bool ShouldProcess() const override { - return IsDimsFour(*node_) && HasOutputs() && IsNodeAfterNCHWToNHWC() && + return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() && + IsNodeAfterNCHWToNHWC() && (Is4DOperateWithND(4) || Is4DOperateWithScalar() || Is4DOperateWithVector()); } @@ -839,8 +854,8 @@ class ConcatProcessor : public AgnosticNodeProcessor { protected: bool ShouldProcess() const override { - return IsDimsFour(*node_) && HasOutputs() && IsNodeAfterNCHWToNHWC() && - IsAlongDimC(); + return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() && + IsNodeAfterNCHWToNHWC() && IsAlongDimC(); } std::vector GetInputPos() const override { @@ -904,8 +919,8 @@ class PadProcessor : public AgnosticNodeProcessor { protected: bool ShouldProcess() const override { - return IsDimsFour(*node_) && HasOutputs() && IsNodeAfterNCHWToNHWC() && - PaddingSupported(); + return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() && + IsNodeAfterNCHWToNHWC() && PaddingSupported(); } Status CustomizedProcessing() override { return UpdateAttrValueOfInput(1); } @@ -1116,8 +1131,8 @@ class SqueezeProcessor : public AgnosticNodeProcessor { protected: bool ShouldProcess() const override { - return IsDimsN(*node_, 2) && HasOutputs() && IsNodeAfterNCHWToNHWC() && - IsInputConvertible() && IsAlongDimHW(); + return !MustPreserve() && IsDimsN(*node_, 2) && HasOutputs() && + IsNodeAfterNCHWToNHWC() && IsInputConvertible() && IsAlongDimHW(); } Status AddLayoutTransposeToOutputs() override { return Status::OK(); } @@ -1166,7 +1181,7 @@ class SumProcessor : public AgnosticNodeProcessor { protected: bool ShouldProcess() const override { auto input0 = node_map_->GetNode(node_->input(0)); - return HasOutputs() && IsNodeAfterNCHWToNHWC() && + return !MustPreserve() && HasOutputs() && IsNodeAfterNCHWToNHWC() && (IsDimsFour(*input0) || IsNodeNCHWToNHWC(input0->name())) && IsAlongDimNHW(); } @@ -1227,12 +1242,13 @@ class SumProcessor : public AgnosticNodeProcessor { class DataLayoutOptimizer : GraphProcessor { public: - explicit DataLayoutOptimizer(const string& default_device, GraphDef* graph, - NodeMap* node_map, - LayoutOptimizer::TuningConfig config) - : GraphProcessor(graph, node_map), - default_device_(default_device), - config_(config) {} + explicit DataLayoutOptimizer( + LayoutOptimizer::TuningConfig config, + const std::unordered_set& nodes_to_preserve, + const string& default_device, GraphDef* graph, NodeMap* node_map) + : GraphProcessor(graph, node_map, nodes_to_preserve), + config_(config), + default_device_(default_device) {} Status Optimize() { LOG(INFO) << "Number of nodes for original graph: " << graph_->node_size(); @@ -1279,7 +1295,8 @@ class DataLayoutOptimizer : GraphProcessor { ops_format_supported.end()) { auto node = graph_->mutable_node(i); bool is_in_frame = !frames[node].empty(); - OptimizeContext opt_cxt(graph_, node, node_map_, is_in_frame); + OptimizeContext opt_cxt(graph_, node, node_map_, nodes_to_preserve_, + is_in_frame); std::unique_ptr node_processor; if (IsAvgPoolGrad(*node)) { node_processor.reset(new AvgPoolGradProcessor(opt_cxt)); @@ -1326,7 +1343,8 @@ class DataLayoutOptimizer : GraphProcessor { ops_format_agnostic.end()) { auto node = graph_->mutable_node(i); bool is_in_frame = !frames[node].empty(); - OptimizeContext opt_cxt(graph_, node, node_map_, is_in_frame); + OptimizeContext opt_cxt(graph_, node, node_map_, nodes_to_preserve_, + is_in_frame); std::unique_ptr node_processor; if (IsAddN(*node)) { node_processor.reset(new AddNProcessor(opt_cxt)); @@ -1401,8 +1419,8 @@ class DataLayoutOptimizer : GraphProcessor { return Status::OK(); } - string default_device_; LayoutOptimizer::TuningConfig config_; + string default_device_; }; int GetNumTranspose(const GraphDef& graph) { @@ -1445,8 +1463,8 @@ Status LayoutOptimizer::Tune(const GrapplerItem& item, return status; } NodeMap node_map(output); - DataLayoutOptimizer layout_optimizer(default_device, output, &node_map, - config); + DataLayoutOptimizer layout_optimizer(config, nodes_to_preserve_, + default_device, output, &node_map); status = layout_optimizer.Optimize(); return status; } @@ -1459,6 +1477,7 @@ Status LayoutOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, return Status::OK(); } + nodes_to_preserve_ = item.NodesToPreserve(); GraphProperties graph_properties(item); auto status = graph_properties.InferStatically(); if (!status.ok()) { diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.h b/tensorflow/core/grappler/optimizers/layout_optimizer.h index a22fadd9e7..f5dd70356a 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.h +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.h @@ -47,6 +47,7 @@ class LayoutOptimizer : public GraphOptimizer { const GraphDef& optimize_output, double result) override; private: + std::unordered_set nodes_to_preserve_; Status Tune(const GrapplerItem& item, const GraphProperties& graph_properties, const string& default_device, const TuningConfig& config, GraphDef* output); diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc index 156e6710a6..5d2d90b193 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc @@ -263,6 +263,21 @@ TEST_F(LayoutOptimizerTest, Connectivity) { EXPECT_EQ(node_i2_output->input(0), "i1"); } +TEST_F(LayoutOptimizerTest, PreserveFetch) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto i = ops::Identity(s.WithOpName("i"), conv); + GrapplerItem item; + item.fetch.push_back("Conv2D"); + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto conv_node = node_map.GetNode("Conv2D"); + EXPECT_EQ(conv_node->attr().at({"data_format"}).s(), "NHWC"); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 8b4b63b9710a3d242dad7640bc3fd7c6515b41fa Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Tue, 21 Nov 2017 19:34:35 -0800 Subject: [PATCH 0732/1801] Add tf.contrib.data.Counter. PiperOrigin-RevId: 176597546 --- tensorflow/contrib/data/__init__.py | 4 ++ .../kernel_tests/range_dataset_op_test.py | 22 ++++++++ tensorflow/contrib/data/python/ops/BUILD | 1 + tensorflow/contrib/data/python/ops/counter.py | 52 +++++++++++++++++++ .../contrib/data/python/ops/scan_ops.py | 4 +- 5 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/data/python/ops/counter.py diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index 6e43ae0e63..7c6244f22b 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -17,6 +17,7 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@Dataset +@@Counter @@Iterator @@TFRecordDataset @@FixedLengthRecordDataset @@ -33,6 +34,7 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@unbatch @@parallel_interleave @@rejection_resample +@@scan @@sloppy_interleave @@get_single_element @@ -48,6 +50,7 @@ from tensorflow.contrib.data.python.ops.batching import batch_and_drop_remainder from tensorflow.contrib.data.python.ops.batching import dense_to_sparse_batch from tensorflow.contrib.data.python.ops.batching import padded_batch_and_drop_remainder from tensorflow.contrib.data.python.ops.batching import unbatch +from tensorflow.contrib.data.python.ops.counter import Counter from tensorflow.contrib.data.python.ops.dataset_ops import Dataset from tensorflow.contrib.data.python.ops.dataset_ops import get_single_element from tensorflow.contrib.data.python.ops.enumerate_ops import enumerate_dataset @@ -62,6 +65,7 @@ from tensorflow.contrib.data.python.ops.readers import SqlDataset from tensorflow.contrib.data.python.ops.readers import TextLineDataset from tensorflow.contrib.data.python.ops.readers import TFRecordDataset from tensorflow.contrib.data.python.ops.resampling import rejection_resample +from tensorflow.contrib.data.python.ops.scan_ops import scan from tensorflow.python.data.ops.iterator_ops import Iterator # pylint: enable=unused-import diff --git a/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py index f59ac760dc..8e6ad061a1 100644 --- a/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py @@ -19,6 +19,7 @@ from __future__ import print_function import os +from tensorflow.contrib.data.python.ops import counter from tensorflow.contrib.data.python.ops import dataset_ops from tensorflow.contrib.data.python.ops import enumerate_ops from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops @@ -194,6 +195,27 @@ class RangeDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def testCounter(self): + """Test dataset construction using `count`.""" + iterator = (counter.Counter(start=3, step=4) + .make_one_shot_iterator()) + get_next = iterator.get_next() + self.assertEqual([], get_next.shape.as_list()) + self.assertEqual(dtypes.int64, get_next.dtype) + + negative_iterator = (counter.Counter(start=0, step=-1) + .make_one_shot_iterator()) + negative_get_next = negative_iterator.get_next() + + with self.test_session() as sess: + self.assertEqual(3, sess.run(get_next)) + self.assertEqual(3 + 4, sess.run(get_next)) + self.assertEqual(3 + 2 * 4, sess.run(get_next)) + + self.assertEqual(0, sess.run(negative_get_next)) + self.assertEqual(-1, sess.run(negative_get_next)) + self.assertEqual(-2, sess.run(negative_get_next)) + def _iterator_checkpoint_prefix(self): return os.path.join(self.get_temp_dir(), "iterator") diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index 86035f3a69..25ed58cdf5 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -14,6 +14,7 @@ load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library") py_library( name = "dataset_ops", srcs = [ + "counter.py", "dataset_ops.py", ], srcs_version = "PY2AND3", diff --git a/tensorflow/contrib/data/python/ops/counter.py b/tensorflow/contrib/data/python/ops/counter.py new file mode 100644 index 0000000000..63226fe781 --- /dev/null +++ b/tensorflow/contrib/data/python/ops/counter.py @@ -0,0 +1,52 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""The Counter Dataset.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.data.python.ops import scan_ops + +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops + + +def Counter(start=0, step=1, dtype=dtypes.int64): + """Creates a `Dataset` of a `step`-separated count startin from `start`. + + For example: + + ```python + Dataset.count() == [0, 1, 2, ...) + Dataset.count(2) == [2, 3, ...) + Dataset.count(2, 5) == [2, 7, 12, ...) + Dataset.count(0, -1) == [0, -1, -2, ...) + Dataset.count(10, -1) == [10, 9, ...) + ``` + + Args: + start: starting value for count. + step: step size. + dtype: counter data type. + + Returns: + A `Dataset` of scalar elements. + """ + with ops.name_scope("counter"): + start = ops.convert_to_tensor(start, dtype=dtype, name="start") + step = ops.convert_to_tensor(step, dtype=dtype, name="step") + return dataset_ops.Dataset.from_tensors(0).repeat(None).apply( + scan_ops.scan(start, lambda state, _: (state + step, state))) diff --git a/tensorflow/contrib/data/python/ops/scan_ops.py b/tensorflow/contrib/data/python/ops/scan_ops.py index 7c595b1814..2744786e9e 100644 --- a/tensorflow/contrib/data/python/ops/scan_ops.py +++ b/tensorflow/contrib/data/python/ops/scan_ops.py @@ -70,13 +70,13 @@ class _ScanDataset(dataset_ops.Dataset): @function.Defun(*(flat_state_types + nest.flatten( sparse.as_dense_types(input_dataset.output_types, - input_dataset.output_classes)))) # pylint: disable=protected-access + input_dataset.output_classes)))) def tf_scan_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the state and input_dataset. # TODO(b/69424092): Check that neither inputs nor outputs are sparse. dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, - input_dataset.output_classes) # pylint: disable=protected-access + input_dataset.output_classes) for arg, shape in zip(args, flat_state_shapes + nest.flatten(dense_shapes)): arg.set_shape(shape) -- GitLab From 7f88363810e77a39db919fb4000583ad0138e53c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Nov 2017 20:19:18 -0800 Subject: [PATCH 0733/1801] Fix an integer overflow problem in PropagateShapes. PiperOrigin-RevId: 176601510 --- .../core/grappler/costs/graph_properties.cc | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index c28498ef6f..bf49d78a1a 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -707,17 +707,17 @@ Status GraphProperties::PropagateShapes( // incorrect shape functions. The algoritm should converge in at most // num_nested_loops^2 * max_rank. We approximate max_rank with the constant 4. // The same applies to resources. - const int num_loops = new_shapes->size(); - const int max_loop_length = item_.graph.node_size(); - const int max_rank = 4; - const int max_loop_iterations = - max_rank * max_loop_length * std::max(1, num_loops * num_loops); - const int num_queues = resources.size(); - const int max_resource_iterations = num_queues * num_queues * max_rank; - - int num_resource_iterations = 0; + const int64 num_loops = new_shapes->size(); + const int64 max_loop_length = item_.graph.node_size(); + const int64 max_rank = 4; + const int64 max_loop_iterations = + max_rank * max_loop_length * std::max(1, num_loops * num_loops); + const int64 num_queues = resources.size(); + const int64 max_resource_iterations = num_queues * num_queues * max_rank; + + int64 num_resource_iterations = 0; do { - int num_loop_iterations = 0; + int64 num_loop_iterations = 0; while (!new_shapes->empty() && num_loop_iterations++ < max_loop_iterations) { const Node* n = new_shapes->pop(); -- GitLab From b06b072e787740c15ef49b0f2285bb170abea020 Mon Sep 17 00:00:00 2001 From: Seungil You <31752931+si-you@users.noreply.github.com> Date: Wed, 22 Nov 2017 13:52:11 +0900 Subject: [PATCH 0734/1801] Add str(Label(...)) to bazel macros (#14737) * Add Label to bazel configurations to make bazel macros work in supermodule. * Roll back unnecessary auto formatting --- .../platform/default/build_config_root.bzl | 24 ++++++++++--------- third_party/mkl/build_defs.bzl | 2 +- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/tensorflow/core/platform/default/build_config_root.bzl b/tensorflow/core/platform/default/build_config_root.bzl index caeed0aa4a..6e98f12114 100644 --- a/tensorflow/core/platform/default/build_config_root.bzl +++ b/tensorflow/core/platform/default/build_config_root.bzl @@ -10,7 +10,9 @@ def tf_sycl_tests_tags(): def tf_additional_plugin_deps(): return select({ - "//tensorflow:with_xla_support": ["//tensorflow/compiler/jit"], + str(Label("//tensorflow:with_xla_support")): [ + str(Label("//tensorflow/compiler/jit")) + ], "//conditions:default": [], }) @@ -19,37 +21,37 @@ def tf_additional_xla_deps_py(): def tf_additional_license_deps(): return select({ - "//tensorflow:with_xla_support": ["@llvm//:LICENSE.TXT"], + str(Label("//tensorflow:with_xla_support")): ["@llvm//:LICENSE.TXT"], "//conditions:default": [], }) def tf_additional_verbs_deps(): return select({ - "//tensorflow:with_verbs_support": [ - "//tensorflow/contrib/verbs:verbs_server_lib", - "//tensorflow/contrib/verbs:grpc_verbs_client", - ], + str(Label("//tensorflow:with_verbs_support")): [ + str(Label("//tensorflow/contrib/verbs:verbs_server_lib")), + str(Label("//tensorflow/contrib/verbs:grpc_verbs_client")), + ], "//conditions:default": [], }) def tf_additional_mpi_deps(): return select({ - "//tensorflow:with_mpi_support": [ - "//tensorflow/contrib/mpi:mpi_server_lib", + str(Label("//tensorflow:with_mpi_support")): [ + str(Label("//tensorflow/contrib/mpi:mpi_server_lib")), ], "//conditions:default": [], }) def tf_additional_gdr_deps(): return select({ - "//tensorflow:with_gdr_support": [ - "//tensorflow/contrib/gdr:gdr_server_lib", + str(Label("//tensorflow:with_gdr_support")): [ + str(Label("//tensorflow/contrib/gdr:gdr_server_lib")), ], "//conditions:default": [], }) def if_static(extra_deps, otherwise=[]): return select({ - "//tensorflow:framework_shared_object": otherwise, + str(Label("//tensorflow:framework_shared_object")): otherwise, "//conditions:default": extra_deps, }) diff --git a/third_party/mkl/build_defs.bzl b/third_party/mkl/build_defs.bzl index 533c0766c7..6574f25092 100644 --- a/third_party/mkl/build_defs.bzl +++ b/third_party/mkl/build_defs.bzl @@ -20,7 +20,7 @@ def if_mkl(if_true, if_false = []): """ return select({ - "//third_party/mkl:using_mkl": if_true, + str(Label("//third_party/mkl:using_mkl")): if_true, "//conditions:default": if_false }) -- GitLab From 4816a8c641e55253ec95767e08440dffe0c65bb9 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Tue, 21 Nov 2017 20:53:59 -0800 Subject: [PATCH 0735/1801] Fixing download_dependencies.sh bugs for generating TFLite iOS exmaples (#14734) * Fix: Can't build TFLite after running download_dependencies.sh. Root cause: The script downloads files for building TFLite for iOS example. It writes to `downloads/` directory and conflicts with the visibility rule "**/*" in BUILD * Retain lite/examples/ios/camera/data directory in git. * Fix some bugs in download_dependencies.sh * Handle both the cases that the zip file has nested directories or not. * Always use `curl` since `wget` sometimes has certificate problem in some Mac machines. --- tensorflow/contrib/lite/BUILD | 3 +++ .../contrib/lite/download_dependencies.sh | 18 +++++++++++++----- .../lite/examples/ios/camera/data/.gitignore | 0 3 files changed, 16 insertions(+), 5 deletions(-) create mode 100644 tensorflow/contrib/lite/examples/ios/camera/data/.gitignore diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD index 96a9e281ad..52460123cc 100644 --- a/tensorflow/contrib/lite/BUILD +++ b/tensorflow/contrib/lite/BUILD @@ -191,6 +191,9 @@ filegroup( exclude = [ "**/METADATA", "**/OWNERS", + "downloads", + "examples", + "gen", ], ), visibility = ["//tensorflow:__subpackages__"], diff --git a/tensorflow/contrib/lite/download_dependencies.sh b/tensorflow/contrib/lite/download_dependencies.sh index 41480c2007..e13df2fa1c 100755 --- a/tensorflow/contrib/lite/download_dependencies.sh +++ b/tensorflow/contrib/lite/download_dependencies.sh @@ -56,11 +56,19 @@ download_and_extract() { elif [[ "${url}" == *zip ]]; then tempdir=$(mktemp -d) tempdir2=$(mktemp -d) - wget -P ${tempdir} ${url} - unzip ${tempdir}/* -d ${tempdir2} - # unzip has no strip components, so unzip to a temp dir, and move the files - # we want from the tempdir to destination. - echo cp `find ${tempdir2} -type f` ${dir}/ + + curl -L ${url} > ${tempdir}/zipped.zip + unzip ${tempdir}/zipped.zip -d ${tempdir2} + + # If the zip file contains nested directories, extract the files from the + # inner directory. + if ls ${tempdir2}/*/* 1> /dev/null 2>&1; then + # unzip has no strip components, so unzip to a temp dir, and move the + # files we want from the tempdir to destination. + cp -R ${tempdir2}/*/* ${dir}/ + else + cp -R ${tempdir2}/* ${dir}/ + fi rm -rf ${tempdir2} ${tempdir} fi diff --git a/tensorflow/contrib/lite/examples/ios/camera/data/.gitignore b/tensorflow/contrib/lite/examples/ios/camera/data/.gitignore new file mode 100644 index 0000000000..e69de29bb2 -- GitLab From 6a35171131331a31d70e67e6d244422f3d15aafb Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Tue, 21 Nov 2017 20:55:22 -0800 Subject: [PATCH 0736/1801] [CMake] Don't build tests for RE2 (#14696) * [CMake] Don't build tests for RE2 Issue #14691 shows a build error on Windows in the RE2 tests. Since we do not run these tests, and they seem to be causing problems on some platforms, do not build them as part of the TensorFlow build. * replace tab with spaces --- tensorflow/contrib/cmake/external/re2.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/cmake/external/re2.cmake b/tensorflow/contrib/cmake/external/re2.cmake index b56f4b0898..d10f5959f7 100644 --- a/tensorflow/contrib/cmake/external/re2.cmake +++ b/tensorflow/contrib/cmake/external/re2.cmake @@ -45,4 +45,5 @@ ExternalProject_Add(re2 endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_INSTALL_PREFIX:STRING=${re2_INSTALL} + -DRE2_BUILD_TESTING:BOOL=OFF ) -- GitLab From 6e3e1d317a5d157da7773d35cb18c23892d931e4 Mon Sep 17 00:00:00 2001 From: Chris Hoyean Song Date: Wed, 22 Nov 2017 13:56:37 +0900 Subject: [PATCH 0737/1801] fix misspellings (#14702) --- .../contrib/distributions/python/ops/poisson_lognormal.py | 2 +- tensorflow/contrib/eager/python/metrics_impl.py | 2 +- tensorflow/contrib/factorization/python/ops/wals.py | 2 +- tensorflow/contrib/gan/python/train.py | 2 +- tensorflow/contrib/lite/schema/upgrade_schema_test.py | 2 +- tensorflow/contrib/nn/python/ops/sampling_ops.py | 2 +- tensorflow/contrib/slim/python/slim/evaluation.py | 4 ++-- .../hybrid/python/models/decisions_to_data_then_nn_test.py | 6 +++--- tensorflow/python/estimator/export/export.py | 2 +- tensorflow/python/estimator/training_test.py | 4 ++-- tensorflow/python/keras/_impl/keras/backend.py | 2 +- tensorflow/python/ops/math_ops_test.py | 2 +- tensorflow/python/ops/variables.py | 4 ++-- tensorflow/python/profiler/model_analyzer_test.py | 2 +- tensorflow/python/util/nest.py | 4 ++-- 15 files changed, 21 insertions(+), 21 deletions(-) diff --git a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py index 8a95038a3c..e1118ed431 100644 --- a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py +++ b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py @@ -292,7 +292,7 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution): # where, # # Z|v ~ interpolate_affine[v](distribution) - # V ~ mixture_distrubution + # V ~ mixture_distribution # # thus, # diff --git a/tensorflow/contrib/eager/python/metrics_impl.py b/tensorflow/contrib/eager/python/metrics_impl.py index aa359b7a0d..2f8016ede3 100644 --- a/tensorflow/contrib/eager/python/metrics_impl.py +++ b/tensorflow/contrib/eager/python/metrics_impl.py @@ -73,7 +73,7 @@ class Metric(object): * `result()`: Computes and returns a final value for the metric from the variables in `self`. - Decendants may override `aggregate()`, but usually won't need to. It + Descendants may override `aggregate()`, but usually won't need to. It adds in the state from a list of metrics of the same type as `self`. (Default is to sum all the variables.) Note that users should not call `aggregate()`, it is for use by TensorFlow infrastructure. diff --git a/tensorflow/contrib/factorization/python/ops/wals.py b/tensorflow/contrib/factorization/python/ops/wals.py index b2f22eb2fc..0c5661e4a1 100644 --- a/tensorflow/contrib/factorization/python/ops/wals.py +++ b/tensorflow/contrib/factorization/python/ops/wals.py @@ -166,7 +166,7 @@ def _wals_factorization_model_function(features, labels, mode, params): # TRAIN mode: if mode == model_fn.ModeKeys.TRAIN: - # Training consists of the folowing ops (controlled using a SweepHook). + # Training consists of the following ops (controlled using a SweepHook). # Before a row sweep: # row_update_prep_gramian_op # initialize_row_update_op diff --git a/tensorflow/contrib/gan/python/train.py b/tensorflow/contrib/gan/python/train.py index ad2d5eb86c..e9443f766b 100644 --- a/tensorflow/contrib/gan/python/train.py +++ b/tensorflow/contrib/gan/python/train.py @@ -422,7 +422,7 @@ def gan_loss( ac_disc_loss = tfgan_losses.acgan_discriminator_loss( model, add_summaries=add_summaries) dis_loss += aux_cond_discriminator_weight * ac_disc_loss - # Gathers auxilliary losses. + # Gathers auxiliary losses. if model.generator_scope: gen_reg_loss = losses.get_regularization_loss(model.generator_scope.name) else: diff --git a/tensorflow/contrib/lite/schema/upgrade_schema_test.py b/tensorflow/contrib/lite/schema/upgrade_schema_test.py index 754400e888..b5002e6f75 100644 --- a/tensorflow/contrib/lite/schema/upgrade_schema_test.py +++ b/tensorflow/contrib/lite/schema/upgrade_schema_test.py @@ -252,7 +252,7 @@ def JsonDumpAndFlush(data, fp): class TestSchemaUpgrade(test_util.TensorFlowTestCase): - def testNonExistantFile(self): + def testNonExistentFile(self): converter = upgrade_schema_lib.Converter() non_existent = tempfile.mktemp(suffix=".json") with self.assertRaisesRegexp(IOError, "No such file or directory"): diff --git a/tensorflow/contrib/nn/python/ops/sampling_ops.py b/tensorflow/contrib/nn/python/ops/sampling_ops.py index 2ae529e015..98749cff7e 100644 --- a/tensorflow/contrib/nn/python/ops/sampling_ops.py +++ b/tensorflow/contrib/nn/python/ops/sampling_ops.py @@ -34,7 +34,7 @@ def _rank_resample(weights, biases, inputs, sampled_values, num_resampled, log(sum_j exp((w_i * x_j + b_i) / resampling_temperature)) - where w_i, b_i are the weight and bias of the i-th class, repsectively, + where w_i, b_i are the weight and bias of the i-th class, respectively, and j ranges over the rows of `inputs`. For efficiency, we rearrange the computation to diff --git a/tensorflow/contrib/slim/python/slim/evaluation.py b/tensorflow/contrib/slim/python/slim/evaluation.py index cdb720b36b..3caf4e02da 100644 --- a/tensorflow/contrib/slim/python/slim/evaluation.py +++ b/tensorflow/contrib/slim/python/slim/evaluation.py @@ -34,7 +34,7 @@ the metrics and finally call the `evaluation` method: "mse": slim.metrics.mean_squared_error(predictions, labels), }) - inital_op = tf.group( + initial_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer()) @@ -42,7 +42,7 @@ the metrics and finally call the `evaluation` method: metric_values = slim.evaluation( sess, num_evals=1, - inital_op=initial_op, + initial_op=initial_op, eval_op=names_to_updates.values(), final_op=name_to_values.values()) diff --git a/tensorflow/contrib/tensor_forest/hybrid/python/models/decisions_to_data_then_nn_test.py b/tensorflow/contrib/tensor_forest/hybrid/python/models/decisions_to_data_then_nn_test.py index cccf444db8..a56beeeb2c 100644 --- a/tensorflow/contrib/tensor_forest/hybrid/python/models/decisions_to_data_then_nn_test.py +++ b/tensorflow/contrib/tensor_forest/hybrid/python/models/decisions_to_data_then_nn_test.py @@ -80,7 +80,7 @@ class DecisionsToDataThenNNTest(test_util.TensorFlowTestCase): isinstance(self.params.num_trees, tensor_forest.ForestHParams)) with variable_scope.variable_scope( - "DecisionsToDataThenNNTest_testContructionPollution"): + "DecisionsToDataThenNNTest_testConstructionPollution"): graph_builder = decisions_to_data_then_nn.DecisionsToDataThenNN( self.params) @@ -95,7 +95,7 @@ class DecisionsToDataThenNNTest(test_util.TensorFlowTestCase): for _ in range(100)]) with variable_scope.variable_scope( - "DecisionsToDataThenNNTest_testInferenceContruction"): + "DecisionsToDataThenNNTest_testInferenceConstruction"): graph_builder = decisions_to_data_then_nn.DecisionsToDataThenNN( self.params) graph = graph_builder.inference_graph(data, None) @@ -111,7 +111,7 @@ class DecisionsToDataThenNNTest(test_util.TensorFlowTestCase): labels = [1 for _ in range(100)] with variable_scope.variable_scope( - "DecisionsToDataThenNNTest_testTrainingContruction"): + "DecisionsToDataThenNNTest_testTrainingConstruction"): graph_builder = decisions_to_data_then_nn.DecisionsToDataThenNN( self.params) graph = graph_builder.training_graph(data, labels, None) diff --git a/tensorflow/python/estimator/export/export.py b/tensorflow/python/estimator/export/export.py index 31e9933c6f..3b295a7e35 100644 --- a/tensorflow/python/estimator/export/export.py +++ b/tensorflow/python/estimator/export/export.py @@ -57,7 +57,7 @@ class ServingInputReceiver(collections.namedtuple( groups of receiver tensors, each of which may be a `Tensor` or a dict of string to `Tensor`. These named receiver tensor alternatives generate additional serving signatures, which may be used to feed inputs at - different points within the input reciever subgraph. A typical usage is + different points within the input receiver subgraph. A typical usage is to allow feeding raw feature `Tensor`s *downstream* of the tf.parse_example() op. Defaults to None. """ diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py index 1862e325e2..17d018aa88 100644 --- a/tensorflow/python/estimator/training_test.py +++ b/tensorflow/python/estimator/training_test.py @@ -1016,7 +1016,7 @@ class TrainingExecutorRunEvaluatorTest(test.TestCase): is_the_final_export): del export_path, checkpoint_path, eval_result estimator.times_export_was_called += 1 - # final_export is happend at the end. + # final_export is happened at the end. self.assertEqual(0, estimator.times_final_export_was_called) if is_the_final_export: estimator.times_final_export_was_called += 1 @@ -1361,7 +1361,7 @@ class TrainingExecutorRunLocalTest(test.TestCase): is_the_final_export): del export_path, checkpoint_path, eval_result estimator.times_export_was_called += 1 - # final_export is happend at the end. + # final_export is happened at the end. self.assertEqual(0, estimator.times_final_export_was_called) if is_the_final_export: estimator.times_final_export_was_called += 1 diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py index b029e5161f..ec7a5dcffd 100644 --- a/tensorflow/python/keras/_impl/keras/backend.py +++ b/tensorflow/python/keras/_impl/keras/backend.py @@ -2487,7 +2487,7 @@ class Function(object): """Runs a computation graph. It's possible to pass arguments to `tf.Session.run()` via `session_kwargs`. - In particular additonal operations via `fetches` argument and additional + In particular additional operations via `fetches` argument and additional tensor substitutions via `feed_dict` arguments. Note that given substitutions are merged with substitutions from `inputs`. Even though `feed_dict` is passed once in the constructor (called in `model.compile()`) diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index 4642f4c580..39be804eee 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -62,7 +62,7 @@ class ReduceTest(test_util.TensorFlowTestCase): @test_util.run_in_graph_and_eager_modes() def testReduceInvalidAxis(self): if context.in_eager_mode(): - # The shape check is in run a graph contruction time. In eager mode, + # The shape check is in run a graph construction time. In eager mode, # it misses the check, magically return result given wrong shape. return x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32) diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index eab7c3828f..36c03ec2a1 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -200,7 +200,7 @@ class Variable(object): @compatibility(eager) `tf.Variable` is not compatible with eager execution. Use - `tfe.Variable` instead which is compatable with both eager execution + `tfe.Variable` instead which is compatible with both eager execution and graph construction. See [the TensorFlow Eager Execution guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/g3doc/guide.md#variables-and-optimizers) for details on how variables work in eager execution. @@ -1064,7 +1064,7 @@ class PartitionedVariable(object): """A container for partitioned `Variable` objects. @compatibility(eager) `tf.PartitionedVariable` is not compatible with - eager execution. Use `tfe.Variable` instead which is compatable + eager execution. Use `tfe.Variable` instead which is compatible with both eager execution and graph construction. See [the TensorFlow Eager Execution guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/g3doc/guide.md#variables-and-optimizers) diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py index 698f8906d4..26fb99efe6 100644 --- a/tensorflow/python/profiler/model_analyzer_test.py +++ b/tensorflow/python/profiler/model_analyzer_test.py @@ -62,7 +62,7 @@ class PrintModelAnalysisTest(test.TestCase): ' ScalarW (1, 1/1 params)\n', f.read()) - def testSelectEverthingDetail(self): + def testSelectEverythingDetail(self): ops.reset_default_graph() dev = '/device:GPU:0' if test.is_gpu_available() else '/device:CPU:0' outfile = os.path.join(test.get_temp_dir(), 'dump') diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py index 25dbc78d7a..cdd53fb995 100644 --- a/tensorflow/python/util/nest.py +++ b/tensorflow/python/util/nest.py @@ -116,7 +116,7 @@ def flatten(nest): used instead. The same convention is followed in `pack_sequence_as`. This correctly repacks dicts and `OrderedDict`s after they have been flattened, and also allows flattening an `OrderedDict` and then repacking it back using - a correponding plain dict, or vice-versa. + a corresponding plain dict, or vice-versa. Dictionaries with non-sortable keys cannot be flattened. Users must not modify any collections used in `nest` while this function is @@ -296,7 +296,7 @@ def pack_sequence_as(structure, flat_sequence): keys is used instead. The same convention is followed in `pack_sequence_as`. This correctly repacks dicts and `OrderedDict`s after they have been flattened, and also allows flattening an `OrderedDict` and then repacking it - back using a correponding plain dict, or vice-versa. + back using a corresponding plain dict, or vice-versa. Dictionaries with non-sortable keys cannot be flattened. Args: -- GitLab From fc34cf3a4a7fb946ea0e15d33dfe05f042db7f9f Mon Sep 17 00:00:00 2001 From: CSJY Date: Wed, 22 Nov 2017 12:57:23 +0800 Subject: [PATCH 0738/1801] Fix docstring of variable_scope() (#14707) --- tensorflow/python/ops/variable_scope.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index 91dea12da2..0ebebc7131 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -1691,7 +1691,7 @@ class variable_scope(object): # pylint: disable=invalid-name v1 = foo() # Creates v. v2 = foo() # Gets the same, existing v. assert v1 == v2 - + ``` Basic example of sharing a variable with reuse=True: -- GitLab From 4ddc2866e2ae1aa4ac4b345fccd97990b6ccca01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20Thom=C3=A9?= Date: Wed, 22 Nov 2017 06:44:27 +0100 Subject: [PATCH 0739/1801] Add back whitespace (#14721) --- tensorflow/compiler/aot/tfcompile.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl index 1e22b760b8..e6ca7a2750 100644 --- a/tensorflow/compiler/aot/tfcompile.bzl +++ b/tensorflow/compiler/aot/tfcompile.bzl @@ -152,7 +152,7 @@ def tf_library(name, graph, config, " --target_triple=" + target_llvm_triple() + " --out_header=$(@D)/" + header_file + " --out_object=$(@D)/" + object_file + - flags), + " " + flags), tools=[tfcompile_tool], visibility=visibility, testonly=testonly, -- GitLab From ef3ee202659a2a49afcd9898451bf9b1256a2757 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Nov 2017 22:27:21 -0800 Subject: [PATCH 0740/1801] [XLA] Add BitcastConvert HLO op to enable bitwise operations on floating point types. PiperOrigin-RevId: 176610007 --- .../xla/client/computation_builder.cc | 28 ++++ .../compiler/xla/client/computation_builder.h | 15 +- .../compiler/xla/service/dfs_hlo_visitor.h | 3 + .../xla/service/elemental_ir_emitter.cc | 49 +++++- .../compiler/xla/service/hlo_graph_dumper.cc | 35 ++--- .../compiler/xla/service/hlo_instruction.cc | 17 +++ .../compiler/xla/service/hlo_instruction.h | 5 + tensorflow/compiler/xla/service/hlo_opcode.h | 1 + .../compiler/xla/service/hlo_verifier.cc | 12 +- .../xla/service/instruction_fusion.cc | 19 +-- tensorflow/compiler/xla/service/service.cc | 4 + .../compiler/xla/service/shape_inference.cc | 38 +++++ .../compiler/xla/service/shape_inference.h | 7 + .../compiler/xla/service/user_computation.cc | 44 +++++- .../compiler/xla/service/user_computation.h | 4 + tensorflow/compiler/xla/tests/BUILD | 17 +++ .../xla/tests/bitcast_convert_test.cc | 141 ++++++++++++++++++ .../compiler/xla/tools/parser/hlo_parser.cc | 9 ++ tensorflow/compiler/xla/xla_data.proto | 3 +- .../performance/xla/operation_semantics.md | 42 ++++-- 20 files changed, 439 insertions(+), 54 deletions(-) create mode 100644 tensorflow/compiler/xla/tests/bitcast_convert_test.cc diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc index b1f4ea8ab6..b17d221ef5 100644 --- a/tensorflow/compiler/xla/client/computation_builder.cc +++ b/tensorflow/compiler/xla/client/computation_builder.cc @@ -1164,6 +1164,34 @@ ComputationDataHandle ComputationBuilder::ConvertElementType( return ParseOpResponse(s, &response); } +ComputationDataHandle ComputationBuilder::BitcastConvertType( + const ComputationDataHandle& operand, PrimitiveType new_element_type) { + if (!first_error_.ok() || !PrepareComputation().ok()) { + return ComputationDataHandle(); + } + + StatusOr> shape_status = GetShape(operand); + if (!shape_status.ok()) { + first_error_ = shape_status.status(); + return ComputationDataHandle(); + } + std::unique_ptr original = shape_status.ConsumeValueOrDie(); + + ConvertRequest request; + *request.mutable_operand() = operand; + request.set_new_element_type(new_element_type); + OpRequest op_request; + *op_request.mutable_computation() = computation_.handle(); + *op_request.mutable_bitcast_convert_request() = request; + AddCommonFieldsToOpRequest(&op_request); + OpResponse response; + + VLOG(2) << "making bitcast convert request"; + Status s = client_->stub()->Op(&op_request, &response); + + return ParseOpResponse(s, &response); +} + ComputationDataHandle ComputationBuilder::SquareF32( const ComputationDataHandle& operand) { return BinaryOp(BINOP_POW, operand, ConstantR0(2.0), diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index 4c6e320557..3a34010e6a 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -121,14 +121,10 @@ class ComputationBuilder { // result, OpMetadata is set on the Computation Builder. All subsequent // instructions generated via this Computation Builder will have the same // OpMetadata attached until a call to ClearOpMetdata. - void SetOpMetadata(const OpMetadata& metadata) { - metadata_ = metadata; - } + void SetOpMetadata(const OpMetadata& metadata) { metadata_ = metadata; } // Clears the HloMetadata state. - void ClearOpMetadata() { - metadata_.Clear(); - } + void ClearOpMetadata() { metadata_.Clear(); } // Sets an OpSharding that will be attached to all instructions until cleared. void SetSharding(const OpSharding& sharding) { sharding_ = sharding; } @@ -673,6 +669,13 @@ class ComputationBuilder { ComputationDataHandle ConvertElementType(const ComputationDataHandle& operand, PrimitiveType new_element_type); + // Enqueues a no-op instruction onto the computation that changes + // the element type of the operand array to primitive_type. The + // bit-widths of the source and destination element types must be + // identical. + ComputationDataHandle BitcastConvertType(const ComputationDataHandle& operand, + PrimitiveType new_element_type); + // Enqueues a float32 reciprocal instruction onto the computation. // (float32 is specified as there is an implicit float32 -1.0f constant // exponent). diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h index 7b95325601..91086fd4a5 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h @@ -86,6 +86,9 @@ class DfsHloVisitorBase { virtual Status HandleConvert(HloInstructionPtr hlo) { return HandleElementwiseUnary(hlo); } + virtual Status HandleBitcastConvert(HloInstructionPtr hlo) { + return HandleElementwiseUnary(hlo); + } virtual Status HandleCopy(HloInstructionPtr hlo) { return HandleElementwiseUnary(hlo); } diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc index 606868034a..97ced5dfdc 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc @@ -110,6 +110,26 @@ StatusOr ElementalIrEmitter::EmitIntegerUnaryOp( PrimitiveType_Name(from_type).c_str(), PrimitiveType_Name(to_type).c_str()); } + case HloOpcode::kBitcastConvert: { + PrimitiveType from_type = op->operand(0)->shape().element_type(); + PrimitiveType to_type = op->shape().element_type(); + CHECK(primitive_util::IsIntegralType(from_type)); + if (from_type == to_type) { + return operand_value; + } + if (primitive_util::BitWidth(from_type) == + primitive_util::BitWidth(to_type)) { + return ir_builder_->CreateBitCast( + operand_value, llvm_ir::PrimitiveTypeToIrType(to_type, module_)); + } + return InvalidArgument( + "bitcast conversion from primitive type %s to %s with unequal " + "bit-widths (%u versus %u) ", + PrimitiveType_Name(from_type).c_str(), + PrimitiveType_Name(to_type).c_str(), + primitive_util::BitWidth(from_type), + primitive_util::BitWidth(to_type)); + } case HloOpcode::kAbs: { bool is_signed = primitive_util::IsSignedIntegralType(op->shape().element_type()); @@ -203,6 +223,26 @@ StatusOr ElementalIrEmitter::EmitFloatUnaryOp( PrimitiveType_Name(from_type).c_str(), PrimitiveType_Name(to_type).c_str()); } + case HloOpcode::kBitcastConvert: { + PrimitiveType from_type = op->operand(0)->shape().element_type(); + PrimitiveType to_type = op->shape().element_type(); + CHECK(primitive_util::IsFloatingPointType(from_type)); + if (from_type == to_type) { + return operand_value; + } + if (primitive_util::BitWidth(from_type) == + primitive_util::BitWidth(to_type)) { + return ir_builder_->CreateBitCast( + operand_value, llvm_ir::PrimitiveTypeToIrType(to_type, module_)); + } + return InvalidArgument( + "bitcast conversion from primitive type %s to %s with unequal " + "bit-widths (%u versus %u) ", + PrimitiveType_Name(from_type).c_str(), + PrimitiveType_Name(to_type).c_str(), + primitive_util::BitWidth(from_type), + primitive_util::BitWidth(to_type)); + } case HloOpcode::kExp: return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::exp, {operand_value}, {operand_value->getType()}, @@ -1073,6 +1113,7 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator( case HloOpcode::kRoundNearestAfz: case HloOpcode::kCeil: case HloOpcode::kConvert: + case HloOpcode::kBitcastConvert: case HloOpcode::kCopy: case HloOpcode::kCos: case HloOpcode::kExp: @@ -1081,11 +1122,11 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator( case HloOpcode::kIsFinite: case HloOpcode::kLog: case HloOpcode::kNegate: + case HloOpcode::kNot: case HloOpcode::kReal: case HloOpcode::kSign: case HloOpcode::kSin: case HloOpcode::kTanh: - case HloOpcode::kNot: return [this, hlo, &operand_to_generator]( const IrArray::Index& index) -> StatusOr { TF_ASSIGN_OR_RETURN(llvm::Value * operand_value, @@ -1094,6 +1135,7 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator( return EmitUnaryOp(hlo, operand_value); }; case HloOpcode::kAdd: + case HloOpcode::kAnd: case HloOpcode::kAtan2: case HloOpcode::kComplex: case HloOpcode::kDivide: @@ -1106,14 +1148,13 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator( case HloOpcode::kMinimum: case HloOpcode::kMultiply: case HloOpcode::kNe: + case HloOpcode::kOr: case HloOpcode::kPower: case HloOpcode::kRemainder: - case HloOpcode::kSubtract: - case HloOpcode::kAnd: - case HloOpcode::kOr: case HloOpcode::kShiftLeft: case HloOpcode::kShiftRightArithmetic: case HloOpcode::kShiftRightLogical: + case HloOpcode::kSubtract: return [this, hlo, &operand_to_generator]( const IrArray::Index& index) -> StatusOr { const HloInstruction* lhs = hlo->operand(0); diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index d71a4b42c7..84187d5783 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -864,9 +864,10 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) { // (eg, parameter). switch (instr->opcode()) { case HloOpcode::kAbs: - case HloOpcode::kRoundNearestAfz: case HloOpcode::kAdd: + case HloOpcode::kAnd: case HloOpcode::kAtan2: + case HloOpcode::kBitcastConvert: case HloOpcode::kCeil: case HloOpcode::kClamp: case HloOpcode::kComplex: @@ -882,18 +883,19 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) { case HloOpcode::kIsFinite: case HloOpcode::kLe: case HloOpcode::kLog: - case HloOpcode::kAnd: - case HloOpcode::kNot: - case HloOpcode::kOr: case HloOpcode::kLt: case HloOpcode::kMaximum: case HloOpcode::kMinimum: case HloOpcode::kMultiply: case HloOpcode::kNe: case HloOpcode::kNegate: + case HloOpcode::kNot: + case HloOpcode::kOr: case HloOpcode::kPower: case HloOpcode::kReal: case HloOpcode::kRemainder: + case HloOpcode::kRng: + case HloOpcode::kRoundNearestAfz: case HloOpcode::kShiftLeft: case HloOpcode::kShiftRightArithmetic: case HloOpcode::kShiftRightLogical: @@ -903,7 +905,6 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) { case HloOpcode::kSort: case HloOpcode::kSubtract: case HloOpcode::kTanh: - case HloOpcode::kRng: // De-emphasize scalar-shaped elementwise ops -- they're generally // uninteresting. if (ShapeUtil::IsEffectiveScalar(instr->shape())) { @@ -911,9 +912,9 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) { } return kYellow; case HloOpcode::kBitcast: - case HloOpcode::kTuple: - case HloOpcode::kTrace: case HloOpcode::kGetTupleElement: + case HloOpcode::kTrace: + case HloOpcode::kTuple: return kWhite; case HloOpcode::kBroadcast: // De-emphasize nodes which broadcast a scalar within a fusion node -- @@ -952,28 +953,28 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) { return kRed; case HloOpcode::kParameter: return kParameterColor; - case HloOpcode::kBatchNormTraining: - case HloOpcode::kBatchNormInference: case HloOpcode::kBatchNormGrad: + case HloOpcode::kBatchNormInference: + case HloOpcode::kBatchNormTraining: case HloOpcode::kReduce: - case HloOpcode::kSelectAndScatter: case HloOpcode::kReduceWindow: + case HloOpcode::kSelectAndScatter: return kPurple; - case HloOpcode::kMap: case HloOpcode::kFusion: + case HloOpcode::kMap: return kGray; - case HloOpcode::kSend: - case HloOpcode::kSendDone: - case HloOpcode::kRecv: - case HloOpcode::kRecvDone: + case HloOpcode::kCrossReplicaSum: case HloOpcode::kInfeed: case HloOpcode::kOutfeed: - case HloOpcode::kCrossReplicaSum: + case HloOpcode::kRecv: + case HloOpcode::kRecvDone: + case HloOpcode::kSend: + case HloOpcode::kSendDone: return kBrown; + case HloOpcode::kCall: case HloOpcode::kConditional: case HloOpcode::kCustomCall: case HloOpcode::kWhile: - case HloOpcode::kCall: return kDarkGreen; case HloOpcode::kConstant: LOG(FATAL) << "Constants don't get their own nodes in the graph."; diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 464af7c554..854185af56 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -518,6 +518,15 @@ HloInstruction::CreateDynamicUpdateSlice(const Shape& shape, return instruction; } +/* static */ std::unique_ptr +HloInstruction::CreateBitcastConvert(const Shape& shape, + HloInstruction* operand) { + auto instruction = + WrapUnique(new HloInstruction(HloOpcode::kBitcastConvert, shape)); + instruction->AppendOperand(operand); + return instruction; +} + /* static */ std::unique_ptr HloInstruction::CreateReduce( const Shape& shape, HloInstruction* arg, HloInstruction* init_value, tensorflow::gtl::ArraySlice dimensions_to_reduce, @@ -1115,6 +1124,10 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( CHECK_EQ(new_operands.size(), 1); clone = CreateConvert(shape, new_operands[0]); break; + case HloOpcode::kBitcastConvert: + CHECK_EQ(new_operands.size(), 1); + clone = CreateBitcastConvert(shape, new_operands[0]); + break; case HloOpcode::kReducePrecision: CHECK_EQ(new_operands.size(), 1); clone = CreateReducePrecision(shape, new_operands[0], exponent_bits_, @@ -1555,6 +1568,7 @@ bool HloInstruction::IdenticalSlowPath( // A convert result is determined by the primitive type that the operand is // converted into. case HloOpcode::kConvert: + case HloOpcode::kBitcastConvert: return shape().element_type() == other.shape().element_type(); // A reduce-precision operation is determined by the bit sizes. @@ -2295,6 +2309,8 @@ Status HloInstruction::Visit(DfsHloVisitorBase* visitor) { return visitor->HandleConcatenate(this); case HloOpcode::kConvert: return visitor->HandleConvert(this); + case HloOpcode::kBitcastConvert: + return visitor->HandleBitcastConvert(this); case HloOpcode::kCopy: return visitor->HandleCopy(this); case HloOpcode::kMultiply: @@ -2667,6 +2683,7 @@ bool HloInstruction::IsElementwise() const { case HloOpcode::kRoundNearestAfz: case HloOpcode::kCeil: case HloOpcode::kConvert: + case HloOpcode::kBitcastConvert: case HloOpcode::kCopy: case HloOpcode::kCos: case HloOpcode::kExp: diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index f3dbe9e33f..1bd0cca945 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -176,6 +176,11 @@ class HloInstruction { static std::unique_ptr CreateConvert(const Shape& shape, HloInstruction* operand); + // Creates a bitcast conversion instruction, where operand is the data to + // convert and shape is the target shape for the conversion. + static std::unique_ptr CreateBitcastConvert( + const Shape& shape, HloInstruction* operand); + // Creates an infeed instruction, which reads data of the given shape from the // Infeed interface of the device. static std::unique_ptr CreateInfeed(const Shape& shape, diff --git a/tensorflow/compiler/xla/service/hlo_opcode.h b/tensorflow/compiler/xla/service/hlo_opcode.h index 7b07027441..f3f7935758 100644 --- a/tensorflow/compiler/xla/service/hlo_opcode.h +++ b/tensorflow/compiler/xla/service/hlo_opcode.h @@ -52,6 +52,7 @@ namespace xla { V(kBatchNormInference, "batch-norm-inference") \ V(kBatchNormTraining, "batch-norm-training") \ V(kBitcast, "bitcast") \ + V(kBitcastConvert, "bitcast-convert") \ V(kBroadcast, "broadcast") \ V(kCall, "call", kHloOpcodeIsVariadic) \ V(kCeil, "ceil") \ diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index e353a75cab..f2a739c1e2 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -59,15 +59,17 @@ class ShapeVerifier : public DfsHloVisitor { } Status HandleConvert(HloInstruction* convert) override { - if (ShapeUtil::ElementIsComplex(convert->operand(0)->shape())) { - TF_RET_CHECK(ShapeUtil::ElementIsComplex(convert->shape())) - << "Unsupported complex->real kConvert"; - } return CheckShape(convert, ShapeInference::InferConvertShape( convert->operand(0)->shape(), convert->shape().element_type())); } + Status HandleBitcastConvert(HloInstruction* convert) override { + return CheckShape(convert, ShapeInference::InferBitcastConvertShape( + convert->operand(0)->shape(), + convert->shape().element_type())); + } + Status HandleCopy(HloInstruction* copy) override { return CheckUnaryShape(copy); } @@ -580,7 +582,7 @@ StatusOr HloVerifier::Run(HloModule* module) { // or ComputationLowerer::Visit() TF_RET_CHECK(instruction->dimensions().size() == ShapeUtil::Rank(instruction->operand(0)->shape())) - << "Broadcast HLO has invalid number of dimensions."; + << "Broadcast HLO has invalid number of dimensions."; } else if (instruction->opcode() == HloOpcode::kWhile) { auto* while_cond = instruction->while_condition(); auto* while_body = instruction->while_body(); diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc index de4804996f..ba901b99e4 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion.cc @@ -33,7 +33,9 @@ namespace xla { switch (instruction.opcode()) { // Cheap instructions. case HloOpcode::kAdd: + case HloOpcode::kAnd: case HloOpcode::kBitcast: + case HloOpcode::kBitcastConvert: case HloOpcode::kBroadcast: case HloOpcode::kCeil: case HloOpcode::kClamp: @@ -53,15 +55,14 @@ namespace xla { case HloOpcode::kInfeed: case HloOpcode::kIsFinite: case HloOpcode::kLe: - case HloOpcode::kAnd: - case HloOpcode::kNot: - case HloOpcode::kOr: case HloOpcode::kLt: case HloOpcode::kMaximum: case HloOpcode::kMinimum: case HloOpcode::kMultiply: case HloOpcode::kNe: case HloOpcode::kNegate: + case HloOpcode::kNot: + case HloOpcode::kOr: case HloOpcode::kOutfeed: case HloOpcode::kPad: case HloOpcode::kReal: @@ -88,9 +89,9 @@ namespace xla { // Expensive instructions. case HloOpcode::kAtan2: - case HloOpcode::kBatchNormTraining: - case HloOpcode::kBatchNormInference: case HloOpcode::kBatchNormGrad: + case HloOpcode::kBatchNormInference: + case HloOpcode::kBatchNormTraining: case HloOpcode::kCall: case HloOpcode::kConditional: case HloOpcode::kConvolution: @@ -104,19 +105,19 @@ namespace xla { case HloOpcode::kMap: case HloOpcode::kParameter: case HloOpcode::kPower: + case HloOpcode::kRecv: + case HloOpcode::kRecvDone: case HloOpcode::kReduce: case HloOpcode::kReduceWindow: case HloOpcode::kRemainder: case HloOpcode::kRng: case HloOpcode::kSelectAndScatter: + case HloOpcode::kSend: + case HloOpcode::kSendDone: case HloOpcode::kSort: case HloOpcode::kTanh: case HloOpcode::kTrace: case HloOpcode::kWhile: - case HloOpcode::kSend: - case HloOpcode::kSendDone: - case HloOpcode::kRecv: - case HloOpcode::kRecvDone: return true; } diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index 0544a1697b..902a1afb45 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -1364,6 +1364,10 @@ tensorflow::Status Service::Op(const OpRequest* arg, OpResponse* result) { handle_status = computation->AddConvertInstruction(arg->convert_request()); break; + case OpRequest::kBitcastConvertRequest: + handle_status = computation->AddBitcastConvertInstruction( + arg->bitcast_convert_request()); + break; case OpRequest::kConvolveRequest: handle_status = computation->AddConvolveInstruction(arg->convolve_request()); diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index dcd726f22c..0a2bf939c1 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -441,6 +441,14 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, /* static */ StatusOr ShapeInference::InferConvertShape( const Shape& operand_shape, PrimitiveType new_element_type) { + auto old_element_type = operand_shape.element_type(); + if (primitive_util::IsComplexType(old_element_type) && + !primitive_util::IsComplexType(new_element_type)) { + return Unimplemented( + "Unsupported conversion from complex to real type: %s => %s", + ShapeUtil::HumanString(operand_shape).c_str(), + PrimitiveType_Name(new_element_type).c_str()); + } if (ShapeUtil::IsTuple(operand_shape) || new_element_type == TUPLE) { // Note: we may want to support tuple conversions via this operation in the // future, by recursing into the tuple elements to check all sub-conversions @@ -454,6 +462,36 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, return ShapeUtil::ChangeElementType(operand_shape, new_element_type); } +/* static */ StatusOr ShapeInference::InferBitcastConvertShape( + const Shape& operand_shape, PrimitiveType new_element_type) { + auto old_element_type = operand_shape.element_type(); + if (primitive_util::IsComplexType(old_element_type) != + primitive_util::IsComplexType(new_element_type)) { + return Unimplemented( + "Unsupported conversion between real and complex types: %s => %s", + ShapeUtil::HumanString(operand_shape).c_str(), + PrimitiveType_Name(new_element_type).c_str()); + } + if (ShapeUtil::IsTuple(operand_shape) || new_element_type == TUPLE) { + // Note: we may want to support tuple conversions via this operation in the + // future, by recursing into the tuple elements to check all sub-conversions + // are valid. For now we just reject them, though. + return InvalidArgument( + "cannot convert from or to tuple type; requested conversion: %s => %s", + ShapeUtil::HumanString(operand_shape).c_str(), + PrimitiveType_Name(new_element_type).c_str()); + } + if (primitive_util::BitWidth(old_element_type) != + primitive_util::BitWidth(new_element_type)) { + return InvalidArgument( + "cannot bitcast types with different bit-widths: %s => %s", + PrimitiveType_Name(old_element_type).c_str(), + PrimitiveType_Name(new_element_type).c_str()); + } + + return ShapeUtil::ChangeElementType(operand_shape, new_element_type); +} + /* static */ StatusOr ShapeInference::InferReducePrecisionShape( const Shape& operand_shape, const int exponent_bits, const int mantissa_bits) { diff --git a/tensorflow/compiler/xla/service/shape_inference.h b/tensorflow/compiler/xla/service/shape_inference.h index d5d497176d..0aadb98a40 100644 --- a/tensorflow/compiler/xla/service/shape_inference.h +++ b/tensorflow/compiler/xla/service/shape_inference.h @@ -204,6 +204,13 @@ class ShapeInference { static StatusOr InferConvertShape(const Shape& operand_shape, PrimitiveType new_element_type); + // Helper that validates the given operand shape can be bitcast converted to + // the target output_shape via a bitcast convert instruction -- the + // requirement is that the shape is identical except for the element type and + // the element types have identical bit-widths. + static StatusOr InferBitcastConvertShape( + const Shape& operand_shape, PrimitiveType new_element_type); + // Helper that validates the input data type for a reduce-precision operation, // and returns the result shape. static StatusOr InferReducePrecisionShape(const Shape& operand_shape, diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc index b449b4f288..b0b15bb571 100644 --- a/tensorflow/compiler/xla/service/user_computation.cc +++ b/tensorflow/compiler/xla/service/user_computation.cc @@ -994,6 +994,32 @@ StatusOr UserComputation::AddConvertInstruction( return handle; } +StatusOr UserComputation::AddBitcastConvertInstruction( + const ConvertRequest& convert_request) { + tensorflow::mutex_lock lock(mutex_); + + TF_ASSIGN_OR_RETURN(const OperationRequest* operand, + LookUpRequest(convert_request.operand())); + + TF_ASSIGN_OR_RETURN(Shape new_shape, ShapeInference::InferConvertShape( + operand->output_shape(), + convert_request.new_element_type())); + + ComputationDataHandle handle = CreateComputationDataHandle(); + + OperationRequest& request = + (*session_computation_.mutable_requests())[handle.handle()]; + *request.mutable_output_handle() = handle; + *request.mutable_output_shape() = new_shape; + *request.mutable_request()->mutable_bitcast_convert_request() = + convert_request; + + VLOG(1) << "AddBitcastConvertInstruction (" << GetVersionedHandleInternal() + << "), data handle " << handle.handle() << ": " + << convert_request.ShortDebugString(); + return handle; +} + StatusOr UserComputation::AddReducePrecisionInstruction( const ReducePrecisionRequest& reduce_precision_request) { tensorflow::mutex_lock lock(mutex_); @@ -2370,6 +2396,13 @@ static void ForEachOperand( break; } + case OpRequest::kBitcastConvertRequest: { + const ConvertRequest& convert_request = + request.request().bitcast_convert_request(); + apply(convert_request.operand()); + break; + } + case OpRequest::kWhileRequest: { const WhileRequest& while_request = request.request().while_request(); apply(while_request.init()); @@ -2954,6 +2987,15 @@ void ComputationLowerer::Visit( break; } + case OpRequest::kBitcastConvertRequest: { + const ConvertRequest& convert_request = + request.request().bitcast_convert_request(); + HloInstruction* operand = lookup_instruction(convert_request.operand()); + hlo_instruction = add_instruction(HloInstruction::CreateBitcastConvert( + request.output_shape(), operand)); + break; + } + case OpRequest::kWhileRequest: { const WhileRequest& while_request = request.request().while_request(); CHECK_EQ(2, request.embedded_computation_versions_size()); @@ -3156,7 +3198,7 @@ void ComputationLowerer::Visit( LOG(FATAL) << "Unexpected request type: " << request.request().op_case(); } (*instructions)[handle.handle()] = hlo_instruction; -} +} // NOLINT(readability/fn_size) } // namespace diff --git a/tensorflow/compiler/xla/service/user_computation.h b/tensorflow/compiler/xla/service/user_computation.h index 473a8b8f73..317c631dca 100644 --- a/tensorflow/compiler/xla/service/user_computation.h +++ b/tensorflow/compiler/xla/service/user_computation.h @@ -185,6 +185,10 @@ class UserComputation { StatusOr AddConvertInstruction( const ConvertRequest& convert_request); + // Enqueues a bitcast element instruction onto this user computation. + StatusOr AddBitcastConvertInstruction( + const ConvertRequest& convert_request); + // Enqueues a reduce instruction onto this user computation. StatusOr AddReduceInstruction( const ReduceRequest& reduce_request, diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index aa1804cc21..6811dbb39f 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1262,6 +1262,23 @@ xla_test( ], ) +xla_test( + name = "bitcast_convert_test", + srcs = ["bitcast_convert_test.cc"], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:lib", + "//tensorflow/core:stream_executor_no_cuda", + "//tensorflow/core:test", + ], +) + xla_test( name = "compilation_cache_test", srcs = ["compilation_cache_test.cc"], diff --git a/tensorflow/compiler/xla/tests/bitcast_convert_test.cc b/tensorflow/compiler/xla/tests/bitcast_convert_test.cc new file mode 100644 index 0000000000..0d94d65c10 --- /dev/null +++ b/tensorflow/compiler/xla/tests/bitcast_convert_test.cc @@ -0,0 +1,141 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include + +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/stream_executor_no_cuda.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class BitcastConvertTest : public ClientLibraryTestBase { + public: + explicit BitcastConvertTest(perftools::gputools::Platform* platform = nullptr) + : ClientLibraryTestBase(platform) { + mutable_debug_options()->add_xla_disable_hlo_passes("algsimp"); + mutable_debug_options()->add_xla_disable_hlo_passes("inline"); + } +}; + +TEST_F(BitcastConvertTest, ConvertR1S32ToR1S32) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1({42, 64}); + builder.BitcastConvertType(a, S32); + + std::vector expected = {42, 64}; + ComputeAndCompareR1(&builder, expected, {}); +} + +TEST_F(BitcastConvertTest, ConvertR1F32ToR1F32) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1({42.0f, 64.0f}); + builder.BitcastConvertType(a, F32); + + std::vector expected = {42.0f, 64.0f}; + ComputeAndCompareR1(&builder, expected, {}); +} + +TEST_F(BitcastConvertTest, BitcastR1S32ToR1F32) { + ComputationBuilder builder(client_, TestName()); + auto a = + builder.ConstantR1({0, static_cast(0x80000000), 0x3F800000, + static_cast(0xBF800000), 0x3F000000, + static_cast(0xBF000000)}); + builder.BitcastConvertType(a, F32); + + std::vector expected = {0.0f, -0.0f, 1.0f, -1.0f, 0.5f, -0.5f}; + ComputeAndCompareR1(&builder, expected, {}); +} + +XLA_TEST_F(BitcastConvertTest, ConvertR1S0S32ToR1S0F32) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1({}); + builder.BitcastConvertType(a, F32); + + std::vector expected = {}; + ComputeAndCompareR1(&builder, expected, {}); +} + +TEST_F(BitcastConvertTest, ConvertR1F32ToR1S32) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1({42.6, 64.4}); + builder.BitcastConvertType(a, S32); + + std::vector expected = {0x422a6666, 0x4280cccd}; + ComputeAndCompareR1(&builder, expected, {}); +} + +TEST_F(BitcastConvertTest, ConvertS32Extremes) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1( + {std::numeric_limits::min(), std::numeric_limits::max()}); + builder.BitcastConvertType(a, F32); + + std::vector expected = {-0.0f, NAN}; + ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0, 0)); +} + +TEST_F(BitcastConvertTest, ConvertMapToS32) { + ComputationBuilder builder(client_, TestName()); + auto b = builder.CreateSubBuilder("convert"); + auto param = b->Parameter(0, ShapeUtil::MakeShape(F32, {}), "in"); + b->BitcastConvertType(param, S32); + auto a = builder.ConstantR1({42.0f, 64.0f}); + builder.Map({a}, b->BuildAndNoteError(), {0}); + + std::vector expected = {0x42280000, 0x42800000}; + ComputeAndCompareR1(&builder, expected, {}); +} + +TEST_F(BitcastConvertTest, ConvertMapToF32) { + ComputationBuilder builder(client_, TestName()); + auto b = builder.CreateSubBuilder("convert"); + auto param = b->Parameter(0, ShapeUtil::MakeShape(S32, {}), "in"); + b->BitcastConvertType(param, F32); + auto a = builder.ConstantR1({0x42280000, 0x42800000}); + builder.Map({a}, b->BuildAndNoteError(), {0}); + + std::vector expected = {42.0f, 64.0f}; + ComputeAndCompareR1(&builder, expected, {}); +} + +// Regression test for b/31758660. When ReshapeMover transforms +// input -> reshape -> convert +// to +// input -> convert -> reshape +// the new convert should have the same element type as the old convert. +TEST_F(BitcastConvertTest, ConvertReshape) { + ComputationBuilder builder(client_, TestName()); + auto input = builder.ConstantR1({0x42280000}); + auto reshape = builder.Reshape(input, /*dimensions=*/{0}, /*new_sizes=*/{}); + builder.BitcastConvertType(reshape, F32); + + ComputeAndCompareR0(&builder, 42.0f, {}); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index a102bdc3aa..a10497665a 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -434,6 +434,15 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, HloInstruction::CreateConvert(shape, operands[0])); break; } + case HloOpcode::kBitcastConvert: { + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction( + HloInstruction::CreateBitcastConvert(shape, operands[0])); + break; + } case HloOpcode::kCrossReplicaSum: { if (!ParseOperands(&operands, /*expected_size=*/1) || !ParseAttributes(attrs)) { diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index 39f5806739..d3c5a88807 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -907,7 +907,8 @@ message OpRequest { BatchNormGradRequest batch_norm_grad_request = 37; BatchNormInferenceRequest batch_norm_inference_request = 38; FftRequest fft_request = 41; - // Next: 42 + ConvertRequest bitcast_convert_request = 42; + // Next: 43 } } diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index dfd9c12c89..8831b3d0fd 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -13,6 +13,28 @@ arbitrary-dimensional array. For convenience, special cases have more specific and familiar names; for example a *vector* is a 1-dimensional array and a *matrix* is a 2-dimensional array. +## BitcastConvertType + +See also +[`ComputationBuilder::BitcastConvertType`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h). + +Similar to a `tf.bitcast` in TensorFlow, performs an element-wise bitcast +operation from a data shape to a target shape. The dimensions must match, and +the conversion is an element-wise one; e.g. `s32` elements become `f32` elements +via bitcast routine. Bitcast is implemented as a low-level cast, so machines +with different floating point representations will give different results. + + `BitcastConvertType(operand, new_element_type)` + +Arguments | Type | Semantics +------------------ | ----------------------- | --------------------------- +`operand` | `ComputationDataHandle` | array of type T with dims D +`new_element_type` | `PrimitiveType` | type U + +The dimensions of the operand and the target shape must match. The bit-width of +the source and destination element types must be equal. The source +and destination element types must not be tuples. + ## Broadcast See also @@ -234,9 +256,8 @@ Arguments | Type | Semantics `operand` | `ComputationDataHandle` | array of type T with dims D `new_element_type` | `PrimitiveType` | type U -If the dimensions of the operand and the target shape do not match, or an -invalid conversion is requested (e.g. to/from a tuple) an error will be -produced. +The dimensions of the operand and the target shape must match. The source and +destination element types must not be tuples. A conversion such as `T=s32` to `U=f32` will perform a normalizing int-to-float conversion routine such as round-to-nearest-even. @@ -646,8 +667,8 @@ Normalizes an array across batch and spatial dimensions. For each feature in the feature dimension (`feature_index` is the index for the feature dimension in `operand`), the operation calculates the mean and variance across all the other dimensions and use the mean and variance to normalize each -element in `operand`. If an invalid `feature_index` is passed, an error is -produced. +element in `operand`. The `feature_index` must be a valid index for the feature +dimension in `operand`. The algorithm goes as follows for each batch in `operand` \\(x\\) that contains `m` elements with `w` and `h` as the size of spatial dimensions ( @@ -702,8 +723,8 @@ Normalizes an array across batch and spatial dimensions. For each feature in the feature dimension (`feature_index` is the index for the feature dimension in `operand`), the operation calculates the mean and variance across all the other dimensions and use the mean and variance to normalize each -element in `operand`. If an invalid `feature_index` is passed, an error is -produced. +element in `operand`. The `feature_index` must be a valid index for the feature +dimension in `operand`. `BatchNormInference` is equivalent to calling `BatchNormTraining` without computing `mean` and `variance` for each batch. It uses the input `mean` and @@ -742,8 +763,8 @@ Calculates gradients of batch norm. For each feature in the feature dimension (`feature_index` is the index for the feature dimension in `operand`), the operation calculates the gradients with -respect to `operand`, `offset` and `scale` across all the other dimensions. If -an invalid `feature_index` is passed, an error is produced. +respect to `operand`, `offset` and `scale` across all the other dimensions. The +`feature_index` must be a valid index for the feature dimension in `operand`. The three gradients are defined by the following formulas: @@ -808,8 +829,7 @@ device, interpreting the data as the given shape and its layout, and returns a `ComputationDataHandle` of the data. Multiple Infeed operations are allowed in a computation, but there must be a total order among the Infeed operations. For example, two Infeeds in the code below have a total order since there is a -dependency between the while loops. The compiler issues an error if there isn't -a total order. +dependency between the while loops. ``` result1 = while (condition, init = init_value) { -- GitLab From f93c48dc061d23495a4425fcad17d55159cb02b1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Nov 2017 23:05:17 -0800 Subject: [PATCH 0741/1801] Use LINKER_INITIALIZED for mutexes with static storage class. This was causing exit-time races as some threads were accessing the mutex as it was being destructed. --------------- It is illegal to use any static type with a constructor/destructor with static storage class in a multithreaded C++ programme that can exit(), even if the constructor is protected by C++11's function-scope static initialization rules, because exit-time destruction is unsafe in the presence of multiple threads. For things that are not function-scope, the construction is also unsafe, because global contruction ordering is undefined in general. The LINKER_INITIALIZED variant constructor for TensorFlow's mutex avoids these problems, at the cost of relying on the linker to zero-initialize the BSS region. PiperOrigin-RevId: 176612772 --- tensorflow/contrib/ffmpeg/default/ffmpeg_lib_test.cc | 2 +- tensorflow/contrib/nccl/kernels/nccl_manager.cc | 2 +- tensorflow/core/common_runtime/device_factory.cc | 2 +- tensorflow/core/common_runtime/session_factory.cc | 2 +- tensorflow/core/debug/debug_io_utils.cc | 2 +- tensorflow/core/distributed_runtime/local_master.cc | 2 +- tensorflow/core/distributed_runtime/server_lib.cc | 2 +- tensorflow/core/framework/load_library.cc | 2 +- tensorflow/core/framework/op_def_util.cc | 2 +- tensorflow/core/kernels/meta_support.cc | 2 +- tensorflow/core/lib/random/random.cc | 4 ++-- tensorflow/core/platform/s3/s3_file_system.cc | 2 +- tensorflow/python/lib/core/py_func.cc | 2 +- 13 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_test.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_test.cc index 2871c14628..85b61b2616 100644 --- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_test.cc +++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_test.cc @@ -39,7 +39,7 @@ const char kTestMp3Filename[] = // Set to true via a command line flag iff the test is expected to have FFmpeg // installed. -mutex mu; +mutex mu(LINKER_INITIALIZED); bool should_ffmpeg_be_installed GUARDED_BY(mu) = false; string ParseTestFlags(int* argc, char** argv) { diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager.cc b/tensorflow/contrib/nccl/kernels/nccl_manager.cc index 1eb1481675..31a35b0d53 100644 --- a/tensorflow/contrib/nccl/kernels/nccl_manager.cc +++ b/tensorflow/contrib/nccl/kernels/nccl_manager.cc @@ -370,7 +370,7 @@ void NcclManager::AddParticipant(int num_devices, const string& key, } void NcclManager::RunCollective(const string& key, Collective* collective) { - static mutex collective_mu; + static mutex collective_mu(LINKER_INITIALIZED); auto* communicator = GetCommunicator(collective); collective->communicator = communicator; diff --git a/tensorflow/core/common_runtime/device_factory.cc b/tensorflow/core/common_runtime/device_factory.cc index fa12c48fb9..b43c718817 100644 --- a/tensorflow/core/common_runtime/device_factory.cc +++ b/tensorflow/core/common_runtime/device_factory.cc @@ -32,7 +32,7 @@ namespace tensorflow { namespace { static mutex* get_device_factory_lock() { - static mutex device_factory_lock; + static mutex device_factory_lock(LINKER_INITIALIZED); return &device_factory_lock; } diff --git a/tensorflow/core/common_runtime/session_factory.cc b/tensorflow/core/common_runtime/session_factory.cc index dba7a9253e..0234d4c372 100644 --- a/tensorflow/core/common_runtime/session_factory.cc +++ b/tensorflow/core/common_runtime/session_factory.cc @@ -29,7 +29,7 @@ namespace tensorflow { namespace { static mutex* get_session_factory_lock() { - static mutex session_factory_lock; + static mutex session_factory_lock(LINKER_INITIALIZED); return &session_factory_lock; } diff --git a/tensorflow/core/debug/debug_io_utils.cc b/tensorflow/core/debug/debug_io_utils.cc index 85d04daa65..f81445c20b 100644 --- a/tensorflow/core/debug/debug_io_utils.cc +++ b/tensorflow/core/debug/debug_io_utils.cc @@ -736,7 +736,7 @@ Status DebugGrpcChannel::ReceiveServerRepliesAndClose() { } } -mutex DebugGrpcIO::streams_mu; +mutex DebugGrpcIO::streams_mu(LINKER_INITIALIZED); int64 DebugGrpcIO::channel_connection_timeout_micros = 900 * 1000 * 1000; // TODO(cais): Make this configurable? diff --git a/tensorflow/core/distributed_runtime/local_master.cc b/tensorflow/core/distributed_runtime/local_master.cc index c7ba7abeaf..aaa4cfa734 100644 --- a/tensorflow/core/distributed_runtime/local_master.cc +++ b/tensorflow/core/distributed_runtime/local_master.cc @@ -159,7 +159,7 @@ Status LocalMaster::Reset(CallOptions* call_options, namespace { mutex* get_local_master_registry_lock() { - static mutex local_master_registry_lock; + static mutex local_master_registry_lock(LINKER_INITIALIZED); return &local_master_registry_lock; } diff --git a/tensorflow/core/distributed_runtime/server_lib.cc b/tensorflow/core/distributed_runtime/server_lib.cc index 0b7fed79cd..7d308bb723 100644 --- a/tensorflow/core/distributed_runtime/server_lib.cc +++ b/tensorflow/core/distributed_runtime/server_lib.cc @@ -24,7 +24,7 @@ namespace tensorflow { namespace { mutex* get_server_factory_lock() { - static mutex server_factory_lock; + static mutex server_factory_lock(LINKER_INITIALIZED); return &server_factory_lock; } diff --git a/tensorflow/core/framework/load_library.cc b/tensorflow/core/framework/load_library.cc index f825335300..b9e33b148f 100644 --- a/tensorflow/core/framework/load_library.cc +++ b/tensorflow/core/framework/load_library.cc @@ -45,7 +45,7 @@ struct Library { // perform initialization again, so the OpList would be empty. Status LoadLibrary(const char* library_filename, void** result, const void** buf, size_t* len) { - static mutex mu; + static mutex mu(LINKER_INITIALIZED); static std::unordered_map loaded_libs; Env* env = Env::Default(); Library library; diff --git a/tensorflow/core/framework/op_def_util.cc b/tensorflow/core/framework/op_def_util.cc index f7d4166f97..29feda499f 100644 --- a/tensorflow/core/framework/op_def_util.cc +++ b/tensorflow/core/framework/op_def_util.cc @@ -332,7 +332,7 @@ Status CheckOpDeprecation(const OpDef& op_def, int graph_def_version) { ". ", dep.explanation(), "."); } else { // Warn only once for each op name, and do it in a threadsafe manner. - static mutex mu; + static mutex mu(LINKER_INITIALIZED); static std::unordered_set warned; bool warn; { diff --git a/tensorflow/core/kernels/meta_support.cc b/tensorflow/core/kernels/meta_support.cc index b29feb0032..9fed01189f 100644 --- a/tensorflow/core/kernels/meta_support.cc +++ b/tensorflow/core/kernels/meta_support.cc @@ -82,7 +82,7 @@ gemmlowp::WorkersPool* GetWorkersPool() { } mutex& GetMutex() { - static mutex mu; + static mutex mu(LINKER_INITIALIZED); return mu; } diff --git a/tensorflow/core/lib/random/random.cc b/tensorflow/core/lib/random/random.cc index 723c1100f8..82dc829507 100644 --- a/tensorflow/core/lib/random/random.cc +++ b/tensorflow/core/lib/random/random.cc @@ -33,14 +33,14 @@ std::mt19937_64 InitRngWithDefaultSeed() { return std::mt19937_64(); } uint64 New64() { static std::mt19937_64* rng = InitRngWithRandomSeed(); - static mutex mu; + static mutex mu(LINKER_INITIALIZED); mutex_lock l(mu); return (*rng)(); } uint64 New64DefaultSeed() { static std::mt19937_64 rng = InitRngWithDefaultSeed(); - static mutex mu; + static mutex mu(LINKER_INITIALIZED); mutex_lock l(mu); return rng(); } diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc index 51c85592bf..234f3c3aed 100644 --- a/tensorflow/core/platform/s3/s3_file_system.cc +++ b/tensorflow/core/platform/s3/s3_file_system.cc @@ -38,7 +38,7 @@ static const size_t kS3ReadAppendableFileBufferSize = 1024 * 1024; static const int kS3GetChildrenMaxKeys = 100; Aws::Client::ClientConfiguration& GetDefaultClientConfig() { - static mutex cfg_lock; + static mutex cfg_lock(LINKER_INITIALIZED); static bool init(false); static Aws::Client::ClientConfiguration cfg; diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc index b30125761f..8bf831f8ba 100644 --- a/tensorflow/python/lib/core/py_func.cc +++ b/tensorflow/python/lib/core/py_func.cc @@ -32,7 +32,7 @@ limitations under the License. namespace tensorflow { namespace { -static mutex mu; +static mutex mu(LINKER_INITIALIZED); static PyObject* py_trampoline GUARDED_BY(mu) = nullptr; // Returns the py_trampoline that is used to pass the control to the -- GitLab From 5fbda9d8da7b98f62e83a392f047adf307b48b02 Mon Sep 17 00:00:00 2001 From: zxcqwe4906 Date: Wed, 22 Nov 2017 15:13:18 +0800 Subject: [PATCH 0742/1801] Change ndimage.imread to imageio.imread. (#14710) Scipy will not support imread from 1.0.0 as its document says: https://docs.scipy.org/doc/scipy-1.0.0/reference/generated/scipy.misc.imread.html Change to imageio.imread and add its correspond exception. --- tensorflow/examples/udacity/1_notmnist.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/examples/udacity/1_notmnist.ipynb b/tensorflow/examples/udacity/1_notmnist.ipynb index 39674e1aa4..dffe5d37c6 100644 --- a/tensorflow/examples/udacity/1_notmnist.ipynb +++ b/tensorflow/examples/udacity/1_notmnist.ipynb @@ -46,13 +46,13 @@ "# These are all the modules we'll be using later. Make sure you can import them\n", "# before proceeding further.\n", "from __future__ import print_function\n", + "import imageio\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import os\n", "import sys\n", "import tarfile\n", "from IPython.display import display, Image\n", - "from scipy import ndimage\n", "from sklearn.linear_model import LogisticRegression\n", "from six.moves.urllib.request import urlretrieve\n", "from six.moves import cPickle as pickle\n", @@ -325,13 +325,13 @@ " for image in image_files:\n", " image_file = os.path.join(folder, image)\n", " try:\n", - " image_data = (ndimage.imread(image_file).astype(float) - \n", + " image_data = (imageio.imread(image_file).astype(float) - \n", " pixel_depth / 2) / pixel_depth\n", " if image_data.shape != (image_size, image_size):\n", " raise Exception('Unexpected image shape: %s' % str(image_data.shape))\n", " dataset[num_images, :, :] = image_data\n", " num_images = num_images + 1\n", - " except IOError as e:\n", + " except (IOError, ValueError) as e:\n", " print('Could not read:', image_file, ':', e, '- it\\'s ok, skipping.')\n", " \n", " dataset = dataset[0:num_images, :, :]\n", -- GitLab From c6d603f02e1a98f871912cda6716cdcbed6b439e Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Tue, 21 Nov 2017 23:32:43 -0800 Subject: [PATCH 0743/1801] Merge changes from github. PiperOrigin-RevId: 176615107 --- CODE_OF_CONDUCT.md | 6 +- README.md | 4 +- configure.py | 38 +- tensorflow/BUILD | 16 + tensorflow/compiler/aot/tfcompile.bzl | 15 +- tensorflow/compiler/tests/BUILD | 2 +- .../compiler/tests/fused_batchnorm_test.py | 25 +- tensorflow/compiler/xla/service/BUILD | 2 - .../compiler/xla/service/hlo_instruction.h | 2 +- .../xla/service/hlo_instruction_test.cc | 4 +- tensorflow/contrib/batching/BUILD | 1 + .../contrib/batching/kernels/batch_kernels.cc | 2 +- .../kernel_tests/csiszar_divergence_test.py | 2 +- tensorflow/contrib/cmake/CMakeLists.txt | 147 +++- .../contrib/cmake/external/boringssl.cmake | 6 +- .../contrib/cmake/external/jsoncpp.cmake | 6 +- tensorflow/contrib/cmake/external/lmdb.cmake | 6 +- tensorflow/contrib/cmake/external/png.cmake | 6 +- .../contrib/cmake/external/protobuf.cmake | 6 +- tensorflow/contrib/cmake/external/re2.cmake | 8 +- .../contrib/cmake/external/snappy.cmake | 8 +- .../contrib/cmake/external/sqlite.cmake | 6 +- tensorflow/contrib/cmake/external/zlib.cmake | 6 +- tensorflow/contrib/cmake/tf_cc_ops.cmake | 36 +- .../contrib/cmake/tf_core_kernels.cmake | 23 +- .../cmake/tf_label_image_example.cmake | 5 + tensorflow/contrib/cmake/tf_python.cmake | 38 +- tensorflow/contrib/cmake/tf_shared_lib.cmake | 45 +- .../contrib/cmake/tf_stream_executor.cmake | 3 + tensorflow/contrib/cmake/tf_tools.cmake | 13 +- tensorflow/contrib/cmake/tf_tutorials.cmake | 5 + tensorflow/contrib/crf/python/ops/crf.py | 19 +- .../contrib/data/python/kernel_tests/BUILD | 8 +- tensorflow/contrib/distributions/BUILD | 17 + tensorflow/contrib/distributions/__init__.py | 2 + .../python/kernel_tests/cauchy_test.py | 437 +++++++++++ .../distributions/python/ops/cauchy.py | 223 ++++++ .../python/examples/notebooks/1_basics.ipynb | 4 +- .../examples/notebooks/2_gradients.ipynb | 6 +- .../examples/notebooks/3_datasets.ipynb | 10 +- .../contrib/layers/python/layers/layers.py | 18 +- .../layers/python/layers/layers_test.py | 73 +- .../learn/python/learn/estimators/head.py | 2 +- .../learn/python/learn/estimators/model_fn.py | 6 +- .../python/learn/learn_io/data_feeder.py | 12 +- .../linear_optimizer/python/ops/sdca_ops.py | 11 +- tensorflow/contrib/lite/python/BUILD | 1 + .../contrib/lite/testing/generate_examples.py | 17 +- tensorflow/contrib/lite/toco/python/BUILD | 1 + tensorflow/contrib/makefile/Makefile | 3 +- tensorflow/contrib/makefile/README.md | 41 +- tensorflow/contrib/makefile/build_all_ios.sh | 54 +- .../contrib/makefile/compile_ios_protobuf.sh | 369 ++++++---- .../makefile/compile_ios_tensorflow.sh | 155 ++-- tensorflow/contrib/makefile/compile_nsync.sh | 5 +- tensorflow/contrib/nn/__init__.py | 2 + tensorflow/contrib/opt/BUILD | 18 + tensorflow/contrib/opt/__init__.py | 5 +- .../training/multitask_optimizer_wrapper.py | 138 ++++ .../multitask_optimizer_wrapper_test.py | 119 +++ .../python/kernel_tests/core_rnn_cell_test.py | 42 ++ .../rnn/python/kernel_tests/rnn_cell_test.py | 44 ++ tensorflow/contrib/rnn/python/ops/rnn_cell.py | 344 ++++++++- .../seq2seq/python/ops/attention_wrapper.py | 51 +- tensorflow/contrib/slim/README.md | 2 +- .../slim/python/slim/nets/resnet_v1_test.py | 2 +- tensorflow/contrib/verbs/README.md | 14 +- tensorflow/contrib/verbs/rdma.cc | 413 +++++++++-- tensorflow/contrib/verbs/rdma.h | 40 +- tensorflow/core/BUILD | 1 + .../core/common_runtime/mkl_cpu_allocator.h | 2 +- .../core/common_runtime/sycl/sycl_device.h | 22 +- tensorflow/core/graph/graph.cc | 15 + tensorflow/core/graph/graph.h | 5 + tensorflow/core/graph/graph_partition.cc | 4 +- tensorflow/core/graph/graph_test.cc | 64 +- tensorflow/core/graph/mkl_graph_util.h | 179 ++--- tensorflow/core/graph/mkl_layout_pass.cc | 2 +- .../core/graph/mkl_tfconversion_pass.cc | 4 +- .../core/grappler/costs/graph_properties.h | 6 + tensorflow/core/grappler/utils.cc | 2 +- tensorflow/core/kernels/BUILD | 31 +- tensorflow/core/kernels/avgpooling_op.cc | 7 +- tensorflow/core/kernels/bincount_op.cc | 115 +-- tensorflow/core/kernels/bincount_op.h | 41 ++ tensorflow/core/kernels/bincount_op_gpu.cu.cc | 114 +++ tensorflow/core/kernels/bincount_op_test.cc | 75 ++ tensorflow/core/kernels/bucketize_op.cc | 66 +- tensorflow/core/kernels/bucketize_op.h | 41 ++ .../core/kernels/bucketize_op_gpu.cu.cc | 101 +++ tensorflow/core/kernels/conv_grad_ops_3d.cc | 42 +- tensorflow/core/kernels/conv_ops_3d.cc | 5 + tensorflow/core/kernels/cwise_op_acosh.cc | 12 +- tensorflow/core/kernels/cwise_op_asinh.cc | 14 +- tensorflow/core/kernels/cwise_op_atanh.cc | 14 +- tensorflow/core/kernels/cwise_ops.h | 12 + .../core/kernels/depthwise_conv_grad_op.cc | 10 +- tensorflow/core/kernels/depthwise_conv_op.cc | 10 +- tensorflow/core/kernels/depthwise_conv_op.h | 4 +- .../core/kernels/depthwise_conv_op_gpu.cu.cc | 19 +- .../kernels/dynamic_partition_op_gpu.cu.cc | 376 ++++++++++ .../core/kernels/dynamic_partition_op_test.cc | 58 ++ .../core/kernels/fused_batch_norm_op.cc | 70 +- tensorflow/core/kernels/fused_batch_norm_op.h | 22 +- tensorflow/core/kernels/lmdb_reader_op.cc | 7 +- tensorflow/core/kernels/maxpooling_op.cc | 47 +- .../core/kernels/maxpooling_op_gpu.cu.cc | 40 +- tensorflow/core/kernels/maxpooling_op_gpu.h | 2 +- .../core/kernels/mkl_conv_grad_filter_ops.cc | 78 +- .../core/kernels/mkl_conv_grad_input_ops.cc | 86 +-- tensorflow/core/kernels/mkl_conv_ops.cc | 82 ++- tensorflow/core/kernels/mkl_conv_ops.h | 140 ++-- tensorflow/core/kernels/mkl_tfconv_op.h | 80 +- tensorflow/core/kernels/pooling_ops_common.cc | 10 +- .../core/kernels/pooling_ops_common_gpu.h | 4 +- tensorflow/core/kernels/quantized_add_op.cc | 2 +- tensorflow/core/kernels/random_op.cc | 4 +- .../core/kernels/segment_reduction_ops.cc | 3 + .../core/kernels/segment_reduction_ops.h | 36 +- tensorflow/core/kernels/shape_ops.cc | 43 +- tensorflow/core/kernels/shape_ops.h | 13 +- tensorflow/core/kernels/slice_op.cc | 116 ++- tensorflow/core/kernels/slice_op.h | 109 ++- tensorflow/core/kernels/slice_op_gpu.cu.cc | 56 ++ tensorflow/core/kernels/strided_slice_op.cc | 1 - .../core/kernels/strided_slice_op_impl.h | 25 +- .../core/kernels/strided_slice_op_test.cc | 49 ++ tensorflow/core/kernels/transpose_op.cc | 35 +- tensorflow/core/kernels/unique_op.cc | 113 ++- tensorflow/core/ops/array_ops.cc | 44 +- tensorflow/core/ops/math_ops.cc | 2 + tensorflow/core/ops/nn_ops.cc | 12 +- tensorflow/core/ops/ops.pbtxt | 5 + .../core/platform/default/build_config/BUILD | 20 +- .../core/platform/default/notification.h | 2 +- tensorflow/core/platform/posix/error.cc | 11 +- tensorflow/core/platform/posix/port.cc | 6 +- tensorflow/core/public/version.h | 2 +- tensorflow/core/util/mkl_util.h | 691 ++++++++++++++++-- tensorflow/core/util/mkl_util_test.cc | 92 +++ .../api_guides/python/threading_and_queues.md | 2 +- .../docs_src/get_started/get_started.md | 6 +- tensorflow/docs_src/get_started/input_fn.md | 6 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 18 +- tensorflow/docs_src/install/install_linux.md | 22 +- tensorflow/docs_src/install/install_mac.md | 10 +- .../docs_src/install/install_sources.md | 19 +- tensorflow/docs_src/mobile/prepare_models.md | 2 +- .../docs_src/programmers_guide/debugger.md | 19 +- .../docs_src/programmers_guide/tensors.md | 12 +- tensorflow/examples/speech_commands/models.py | 2 +- tensorflow/go/android.go | 20 + tensorflow/go/operation_test.go | 8 + tensorflow/go/tensor.go | 9 +- tensorflow/go/tensor_test.go | 9 +- .../src/main/java/org/tensorflow/Shape.java | 32 + .../test/java/org/tensorflow/ShapeTest.java | 26 + tensorflow/python/BUILD | 4 + tensorflow/python/estimator/canned/head.py | 2 +- .../python/estimator/inputs/numpy_io.py | 83 ++- .../python/estimator/inputs/numpy_io_test.py | 87 +++ tensorflow/python/framework/ops.py | 4 + tensorflow/python/framework/tensor_util.py | 1 + tensorflow/python/framework/test_util.py | 3 +- .../python/kernel_tests/array_ops_test.py | 52 +- .../python/kernel_tests/bincount_op_test.py | 25 +- .../python/kernel_tests/bucketize_op_test.py | 8 +- .../python/kernel_tests/constant_op_test.py | 14 +- tensorflow/python/kernel_tests/conv1d_test.py | 43 ++ .../python/kernel_tests/conv_ops_3d_test.py | 267 +++---- .../kernel_tests/depthwise_conv_op_test.py | 20 +- .../python/kernel_tests/distributions/BUILD | 1 + .../distributions/multinomial_test.py | 12 +- .../kernel_tests/dynamic_partition_op_test.py | 106 ++- .../python/kernel_tests/pooling_ops_test.py | 60 +- .../python/kernel_tests/reader_ops_test.py | 41 ++ .../segment_reduction_ops_test.py | 29 +- .../python/kernel_tests/shape_ops_test.py | 10 + .../python/kernel_tests/slice_op_test.py | 25 +- .../python/kernel_tests/unique_op_test.py | 26 + tensorflow/python/layers/base.py | 8 +- tensorflow/python/layers/convolutional.py | 2 + tensorflow/python/layers/normalization.py | 22 +- .../python/layers/normalization_test.py | 98 ++- tensorflow/python/ops/array_ops.py | 38 +- .../python/ops/distributions/dirichlet.py | 2 +- .../python/ops/distributions/multinomial.py | 49 +- tensorflow/python/ops/image_ops_impl.py | 23 +- tensorflow/python/ops/linalg_ops.py | 31 +- tensorflow/python/ops/math_grad_test.py | 17 + tensorflow/python/ops/math_ops.py | 258 ++++--- tensorflow/python/ops/metrics_impl.py | 2 +- .../python/ops/nn_fused_batchnorm_test.py | 119 +-- tensorflow/python/ops/nn_impl.py | 16 +- tensorflow/python/ops/nn_ops.py | 125 +++- tensorflow/python/ops/variables.py | 4 +- .../python/tools/import_pb_to_tensorboard.py | 0 tensorflow/stream_executor/cuda/cuda_dnn.cc | 9 +- tensorflow/stream_executor/dnn.cc | 16 +- tensorflow/stream_executor/dnn.h | 6 + .../tools/api/golden/tensorflow.linalg.pbtxt | 2 +- .../tools/api/golden/tensorflow.nn.pbtxt | 6 +- tensorflow/tools/api/golden/tensorflow.pbtxt | 22 +- .../tools/ci_build/ci_parameterized_build.sh | 2 +- .../tools/ci_build/install/install_golang.sh | 2 +- .../ci_build/linux/libtensorflow_docker.sh | 2 +- .../tools/ci_build/osx/libtensorflow_cpu.sh | 2 +- .../tools/ci_build/osx/libtensorflow_gpu.sh | 2 +- .../tools/ci_build/pi/build_raspberry_pi.sh | 6 + .../ci_build/windows/bazel/bazel_test_lib.sh | 4 +- .../docker/Dockerfile.devel-gpu-cuda9-cudnn7 | 7 +- tensorflow/tools/docker/Dockerfile.gpu | 2 +- tensorflow/tools/docker/README.md | 14 + tensorflow/tools/graph_transforms/BUILD | 2 + .../tools/graph_transforms/quantize_nodes.cc | 2 + tensorflow/tools/pip_package/setup.py | 2 +- third_party/aws.BUILD | 3 + third_party/curl.BUILD | 1 - third_party/sycl/crosstool/CROSSTOOL.tpl | 8 +- third_party/sycl/crosstool/trisycl.tpl | 73 ++ third_party/sycl/sycl/BUILD.tpl | 17 +- third_party/sycl/sycl/build_defs.bzl.tpl | 17 +- third_party/sycl/sycl_configure.bzl | 86 ++- third_party/zlib.BUILD | 2 +- tools/bazel.rc | 7 +- util/python/BUILD | 2 +- 228 files changed, 7332 insertions(+), 1810 deletions(-) create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py create mode 100644 tensorflow/contrib/distributions/python/ops/cauchy.py create mode 100644 tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py create mode 100644 tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py create mode 100644 tensorflow/core/kernels/bincount_op.h create mode 100644 tensorflow/core/kernels/bincount_op_gpu.cu.cc create mode 100644 tensorflow/core/kernels/bincount_op_test.cc create mode 100644 tensorflow/core/kernels/bucketize_op.h create mode 100644 tensorflow/core/kernels/bucketize_op_gpu.cu.cc create mode 100644 tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc create mode 100644 tensorflow/core/util/mkl_util_test.cc create mode 100644 tensorflow/go/android.go mode change 100644 => 100755 tensorflow/python/tools/import_pb_to_tensorboard.py create mode 100644 third_party/sycl/crosstool/trisycl.tpl diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 10fd595fec..ff11d13140 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -42,7 +42,7 @@ The Code of Conduct also applies within project spaces and in public spaces when Conflicts in an open source project can take many forms, from someone having a bad day and using harsh and hurtful language in the issue queue, to more serious instances such as sexist/racist statements or threats of violence, and everything in between. -If the behaviour is threatening or harassing, or for other reasons requires immediate escalation, please see below. +If the behavior is threatening or harassing, or for other reasons requires immediate escalation, please see below. However, for the vast majority of issues, we aim to empower individuals to first resolve conflicts themselves, asking for help when needed, and only after that fails to escalate further. This approach gives people more control over the outcome of their dispute. @@ -55,14 +55,14 @@ If you are experiencing or witnessing conflict, we ask you to use the following ## Reporting Violations -Violations of the Code of Conduct can be reported to TensorFlow’s Project Steward at conduct@tensorflow.org. The Project Steward will determine whether the Code of Conduct was violated, and will issue an appropriate sanction, possibly including a written warning or expulsion from the project, project sponsored spaces, or project forums. We ask that you make a good-faith effort to resolve your conflict via the conflict resolution policy before submitting a report. +Violations of the Code of Conduct can be reported to TensorFlow’s Project Stewards, Edd Wilder-James (ewj@google.com) and Sarah Novotny (sarahnovotny@google.com). The Project Steward will determine whether the Code of Conduct was violated, and will issue an appropriate sanction, possibly including a written warning or expulsion from the project, project sponsored spaces, or project forums. We ask that you make a good-faith effort to resolve your conflict via the conflict resolution policy before submitting a report. Violations of the Code of Conduct can occur in any setting, even those unrelated to the project. We will only consider complaints about conduct that has occurred within one year of the report. ## Enforcement -If the Project Steward receives a report alleging a violation of the Code of Conduct, the Project Steward will notify the accused of the report, and provide them an opportunity to discuss the report before a sanction is issued. The Project Steward will do their utmost to keep the reporter anonymous. If the act is ongoing (such as someone engaging in harassment), or involves a threat to anyone's safety (e.g. threats of violence), the Project Steward may issue sanctions without notice. +If the Project Stewards receive a report alleging a violation of the Code of Conduct, the Project Stewards will notify the accused of the report, and provide them an opportunity to discuss the report before a sanction is issued. The Project Stewards will do their utmost to keep the reporter anonymous. If the act is ongoing (such as someone engaging in harassment), or involves a threat to anyone's safety (e.g. threats of violence), the Project Stewards may issue sanctions without notice. ## Attribution diff --git a/README.md b/README.md index 24bbb6cec1..aff3427bdd 100644 --- a/README.md +++ b/README.md @@ -73,11 +73,11 @@ $ python ## For more information -* [TensorFlow website](https://www.tensorflow.org) +* [TensorFlow Website](https://www.tensorflow.org) * [TensorFlow White Papers](https://www.tensorflow.org/about/bib) * [TensorFlow Model Zoo](https://github.com/tensorflow/models) * [TensorFlow MOOC on Udacity](https://www.udacity.com/course/deep-learning--ud730) -* [TensorFlow course at Stanford](https://web.stanford.edu/class/cs20si) +* [TensorFlow Course at Stanford](https://web.stanford.edu/class/cs20si) Learn more about the TensorFlow community at the [community page of tensorflow.org](https://www.tensorflow.org/community) for a few ways to participate. diff --git a/configure.py b/configure.py index 0d1afbfe15..26da09bd94 100644 --- a/configure.py +++ b/configure.py @@ -43,6 +43,7 @@ _DEFAULT_CUDA_PATH_WIN = ('C:/Program Files/NVIDIA GPU Computing ' 'Toolkit/CUDA/v%s' % _DEFAULT_CUDA_VERSION) _TF_OPENCL_VERSION = '1.2' _DEFAULT_COMPUTECPP_TOOLKIT_PATH = '/usr/local/computecpp' +_DEFAULT_TRISYCL_INCLUDE_DIR = '/usr/local/triSYCL/include' def is_windows(): @@ -636,7 +637,7 @@ def set_tf_cuda_version(environ_cp): write_action_env_to_bazelrc('TF_CUDA_VERSION', tf_cuda_version) -def set_tf_cunn_version(environ_cp): +def set_tf_cudnn_version(environ_cp): """Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION.""" ask_cudnn_version = ( 'Please specify the cuDNN version you want to use. ' @@ -882,6 +883,27 @@ def set_computecpp_toolkit_path(environ_cp): write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH', computecpp_toolkit_path) +def set_trisycl_include_dir(environ_cp): + """Set TRISYCL_INCLUDE_DIR""" + ask_trisycl_include_dir = ('Please specify the location of the triSYCL ' + 'include directory. (Use --config=sycl_trisycl ' + 'when building with Bazel) ' + '[Default is %s]: ' + ) % (_DEFAULT_TRISYCL_INCLUDE_DIR) + while True: + trisycl_include_dir = get_from_env_or_user_or_default( + environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir, + _DEFAULT_TRISYCL_INCLUDE_DIR) + if os.path.exists(trisycl_include_dir): + break + + print('Invalid triSYCL include directory, %s cannot be found' + % (trisycl_include_dir)) + + # Set TRISYCL_INCLUDE_DIR + environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir + write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', + trisycl_include_dir) def set_mpi_home(environ_cp): """Set MPI_HOME.""" @@ -997,6 +1019,8 @@ def main(): environ_cp['TF_NEED_GCP'] = '0' environ_cp['TF_NEED_HDFS'] = '0' environ_cp['TF_NEED_JEMALLOC'] = '0' + environ_cp['TF_NEED_OPENCL_SYCL'] = '0' + environ_cp['TF_NEED_COMPUTECPP'] = '0' environ_cp['TF_NEED_OPENCL'] = '0' environ_cp['TF_CUDA_CLANG'] = '0' @@ -1018,17 +1042,21 @@ def main(): set_build_var(environ_cp, 'TF_NEED_VERBS', 'VERBS', 'with_verbs_support', False, 'verbs') - set_action_env_var(environ_cp, 'TF_NEED_OPENCL', 'OpenCL', False) - if environ_cp.get('TF_NEED_OPENCL') == '1': + set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False) + if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1': set_host_cxx_compiler(environ_cp) set_host_c_compiler(environ_cp) - set_computecpp_toolkit_path(environ_cp) + set_action_env_var(environ_cp, 'TF_NEED_COMPUTECPP', 'ComputeCPP', True) + if environ_cp.get('TF_NEED_COMPUTECPP') == '1': + set_computecpp_toolkit_path(environ_cp) + else: + set_trisycl_include_dir(environ_cp) set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False) if (environ_cp.get('TF_NEED_CUDA') == '1' and 'TF_CUDA_CONFIG_REPO' not in environ_cp): set_tf_cuda_version(environ_cp) - set_tf_cunn_version(environ_cp) + set_tf_cudnn_version(environ_cp) set_tf_cuda_compute_capabilities(environ_cp) set_tf_cuda_clang(environ_cp) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 49828cd4d6..c8f0b6b061 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -54,6 +54,15 @@ config_setting( visibility = ["//visibility:public"], ) +config_setting( + name = "raspberry_pi_armeabi", + values = { + "crosstool_top": "@local_config_arm_compiler//:toolchain", + "cpu": "armeabi", + }, + visibility = ["//visibility:public"], +) + config_setting( name = "android_arm", values = { @@ -760,6 +769,13 @@ tf_cc_shared_object( ], ) +exports_files( + [ + "tf_version_script.lds", + "tf_exported_symbols.lds", + ], +) + py_library( name = "tensorflow_py", srcs = ["__init__.py"], diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl index ee291c12d0..1e22b760b8 100644 --- a/tensorflow/compiler/aot/tfcompile.bzl +++ b/tensorflow/compiler/aot/tfcompile.bzl @@ -119,7 +119,7 @@ def tf_library(name, graph, config, out_nodes_file, ] + freeze_saver_srcs, outs=[freeze_file], - cmd=("$(location //tensorflow/python/tools:freeze_graph)" + + cmd=("$(location @org_tensorflow//tensorflow/python/tools:freeze_graph)" + freeze_args), tools=["@org_tensorflow//tensorflow/python/tools:freeze_graph"], tags=tags, @@ -130,6 +130,10 @@ def tf_library(name, graph, config, header_file = name + ".h" object_file = name + ".o" ep = ("__" + PACKAGE_NAME + "__" + name).replace("/", "_") + if type(tfcompile_flags) == type(""): + flags = tfcompile_flags + else: + flags = " ".join(["'" + arg.replace("'", "'\\''") + "'" for arg in (tfcompile_flags or [])]) native.genrule( name=("gen_" + name), srcs=[ @@ -148,7 +152,7 @@ def tf_library(name, graph, config, " --target_triple=" + target_llvm_triple() + " --out_header=$(@D)/" + header_file + " --out_object=$(@D)/" + object_file + - " " + (tfcompile_flags or "")), + flags), tools=[tfcompile_tool], visibility=visibility, testonly=testonly, @@ -185,7 +189,7 @@ def tf_library(name, graph, config, " --cpp_class=" + cpp_class + " --target_triple=" + target_llvm_triple() + " --out_session_module=$(@D)/" + session_module_pb + - " " + (tfcompile_flags or "")), + flags), tools=[tfcompile_tool], visibility=visibility, testonly=testonly, @@ -195,8 +199,7 @@ def tf_library(name, graph, config, # The cc_library rule packaging up the header and object file, and needed # kernel implementations. - need_xla_data_proto = (tfcompile_flags and - tfcompile_flags.find("--gen_program_shape") != -1) + need_xla_data_proto = (flags and flags.find("--gen_program_shape") != -1) native.cc_library( name=name, srcs=[object_file], @@ -253,7 +256,7 @@ def tf_library(name, graph, config, ], outs=[test_file], cmd=("sed " + sed_replace + - " $(location //tensorflow/compiler/aot:test.cc) " + + " $(location @org_tensorflow//tensorflow/compiler/aot:test.cc) " + "> $(OUTS)"), tags=tags, ) diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index c372e05474..79c4befd36 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -672,7 +672,7 @@ tf_library( cpp_class = "LSTMLayerInference", graph = "lstm_layer_inference.pbtxt", tags = ["manual"], - tfcompile_flags = "--xla_cpu_multi_thread_eigen=false", + tfcompile_flags = ["--xla_cpu_multi_thread_eigen=false"], ) # ----------------------------------------------------------------------------- diff --git a/tensorflow/compiler/tests/fused_batchnorm_test.py b/tensorflow/compiler/tests/fused_batchnorm_test.py index 936fcf8b6b..a773b5a947 100644 --- a/tensorflow/compiler/tests/fused_batchnorm_test.py +++ b/tensorflow/compiler/tests/fused_batchnorm_test.py @@ -36,7 +36,7 @@ class FusedBatchNormTest(XLATestCase): x_square = x * x x_square_sum = np.sum(x_square, (0, 1, 2)) x_sum = np.sum(x, axis=(0, 1, 2)) - element_count = np.size(x) / int(np.shape(x)[0]) + element_count = np.size(x) / int(np.shape(x)[-1]) mean = x_sum / element_count var = x_square_sum / element_count - mean * mean normalized = (x - mean) / np.sqrt(var + epsilon) @@ -64,8 +64,9 @@ class FusedBatchNormTest(XLATestCase): return grad_x, grad_scale, grad_offset def testInference(self): - x_shape = [2, 2, 6, 2] - scale_shape = [2] + channel = 3 + x_shape = [2, 2, 6, channel] + scale_shape = [channel] x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) @@ -74,8 +75,8 @@ class FusedBatchNormTest(XLATestCase): with self.test_session() as sess, self.test_scope(): # To avoid constant folding t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x") - scale = array_ops.placeholder(np.float32, shape=[2], name="scale") - offset = array_ops.placeholder(np.float32, shape=[2], name="offset") + scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale") + offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset") epsilon = 0.001 y_ref, mean_ref, var_ref = self._reference_training( x_val, scale_val, offset_val, epsilon, data_format) @@ -97,8 +98,9 @@ class FusedBatchNormTest(XLATestCase): self.assertAllClose(y_val, y_ref, atol=1e-3) def _testLearning(self, use_gradient_checker): - x_shape = [2, 2, 6, 2] - scale_shape = [2] + channel = 3 + x_shape = [2, 2, 6, channel] + scale_shape = [channel] x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) @@ -109,8 +111,8 @@ class FusedBatchNormTest(XLATestCase): with self.test_session() as sess, self.test_scope(): # To avoid constant folding t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x") - scale = array_ops.placeholder(np.float32, shape=[2], name="scale") - offset = array_ops.placeholder(np.float32, shape=[2], name="offset") + scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale") + offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset") epsilon = 0.001 y, mean, var = nn.fused_batch_norm( t_val, @@ -154,8 +156,9 @@ class FusedBatchNormTest(XLATestCase): def testGradient(self): # TODO(b/64270657): Use gradient_checker here in addition to comparing with # this reference implementation. - x_shape = [2, 2, 6, 2] - scale_shape = [2] + channel = 3 + x_shape = [2, 2, 6, channel] + scale_shape = [channel] grad_val = np.random.random_sample(x_shape).astype(np.float32) x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index db265510f2..fb980e7056 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -90,8 +90,6 @@ cc_library( ":shape_inference", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", - "//tensorflow/compiler/xla:status", - "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 1bd0cca945..cda8b07c61 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -222,7 +222,7 @@ class HloInstruction { tensorflow::gtl::ArraySlice strides); // Creates a slice instruction, where the first operand is sliced by - // start indices specified in the second operand, and by size specfied in + // start indices specified in the second operand, and by size specified in // 'slice_sizes'. static std::unique_ptr CreateDynamicSlice( const Shape& shape, HloInstruction* operand, diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc index 070bb4bc42..76b12fc8d3 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc @@ -792,8 +792,8 @@ TEST_F(HloInstructionTest, ComplexFusionOp) { // sub = Sub(mul, clamp) // tuple = Tuple({sub, sub, mul, C1}) // - // Notable complexities are repeated operands in a same instruction, different - // shapes, use of value in different expressions. + // Notable complexities are repeated operands in the same instruction, + // different shapes, use of value in different expressions. auto c1 = builder.AddInstruction( HloInstruction::CreateConstant(Literal::CreateR0(1.1f))); auto c2 = builder.AddInstruction( diff --git a/tensorflow/contrib/batching/BUILD b/tensorflow/contrib/batching/BUILD index 8b7df4a84c..a111cfecb3 100644 --- a/tensorflow/contrib/batching/BUILD +++ b/tensorflow/contrib/batching/BUILD @@ -82,6 +82,7 @@ cc_library( tf_cc_test( name = "adaptive_shared_batch_scheduler_test", srcs = ["adaptive_shared_batch_scheduler_test.cc"], + tags = ["manual"], # b/69013768 deps = [ ":adaptive_shared_batch_scheduler", "//tensorflow/contrib/batching/test_util:fake_clock_env", diff --git a/tensorflow/contrib/batching/kernels/batch_kernels.cc b/tensorflow/contrib/batching/kernels/batch_kernels.cc index 3b7c538fcc..6041d8c9b2 100644 --- a/tensorflow/contrib/batching/kernels/batch_kernels.cc +++ b/tensorflow/contrib/batching/kernels/batch_kernels.cc @@ -461,7 +461,7 @@ class BatchResource : public ResourceBase { return Status::OK(); } - // Looks up the batcher queue for 'queue_name'. If it did't previously exist, + // Looks up the batcher queue for 'queue_name'. If it didn't previously exist, // creates it. Status LookupOrCreateBatcherQueue(const string& queue_name, BatcherQueue** queue) { diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py index 8c6a614beb..2e94b7206d 100644 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py +++ b/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py @@ -759,7 +759,7 @@ class CsiszarVIMCOTest(test.TestCase): def _csiszar_vimco_helper_grad(self, logu, delta): """Finite difference approximation of `grad(csiszar_vimco_helper, logu)`.""" - # This code actually estimates the sum of the Jacobiab because thats what + # This code actually estimates the sum of the Jacobiab because that's what # TF's `gradients` does. np_log_avg_u1, np_log_sooavg_u1 = self._csiszar_vimco_helper( logu[..., None] + np.diag([delta]*len(logu))) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 8744fc492f..77a3fc0c83 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -34,13 +34,41 @@ option(tensorflow_BUILD_SHARED_LIB "Build TensorFlow as a shared library" OFF) option(tensorflow_OPTIMIZE_FOR_NATIVE_ARCH "Enable compiler optimizations for the native processor architecture (if available)" ON) option(tensorflow_WIN_CPU_SIMD_OPTIONS "Enables CPU SIMD instructions") option(tensorflow_ENABLE_SNAPPY_SUPPORT "Enable SNAPPY compression support" ON) +if(HAIKU) + option(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE "Enable PIE support" OFF) +else() + option(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE "Enable PIE support" ON) +endif() + if (NOT WIN32) # Threads: defines CMAKE_THREAD_LIBS_INIT and adds -pthread compile option # for targets that link ${CMAKE_THREAD_LIBS_INIT}. find_package (Threads) + + option(tensorflow_PATH_STATIC_LIB "Additional library search path for libcudnn_static.a, libnccl_static.a, libculibos.a" /usr/local/cuda/lib64/) + option(tensorflow_CUDNN_INCLUDE "cudnn.h header install path" /usr/include/) + if (NOT tensorflow_CUDNN_INCLUDE) + # option's default value is OFF. Fill it with real default values + set(tensorflow_CUDNN_INCLUDE /usr/include) + endif (NOT tensorflow_CUDNN_INCLUDE) + option(tensorflow_PATH_CUDNN_STATIC_LIB "Override PATH_STATIC_LIB for libcudnn_static.a" ${tensorflow_PATH_STATIC_LIB}) + option(tensorflow_PATH_NCCL_STATIC_LIB "Override PATH_STATIC_LIB for libnccl_static.a" ${tensorflow_PATH_STATIC_LIB}) + option(tensorflow_CUDA_LIBRARY_PATH "Designate the default CUDA library paths" /usr/local/cuda/lib64) + if (NOT tensorflow_CUDA_LIBRARY_PATH) + # option's default value is OFF. Fill it with real default values + set(tensorflow_CUDA_LIBRARY_PATH /usr/local/cuda/lib64) + endif (NOT tensorflow_CUDA_LIBRARY_PATH) endif() +if (WIN32) + set(BOOL_WIN32 ON) +else (WIN32) + set(BOOL_WIN32 OFF) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") +endif (WIN32) + # [CLEANUP] Remove when done # For debugging function(SHOW_VARIABLES) @@ -58,7 +86,12 @@ set (DOWNLOAD_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/downloads" CACHE PATH "Location where external projects will be downloaded.") mark_as_advanced(DOWNLOAD_LOCATION) -set(CMAKE_POSITION_INDEPENDENT_CODE ON) +if (tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) + set(CMAKE_POSITION_INDEPENDENT_CODE ON) +else() + set(CMAKE_POSITION_INDEPENDENT_CODE OFF) +endif() + add_definitions(-DEIGEN_AVOID_STL_ARRAY) if(WIN32) add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11 -DCOMPILER_MSVC) @@ -217,20 +250,35 @@ endif() if(UNIX) list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS}) endif() +if(HAIKU) + list(APPEND tensorflow_EXTERNAL_LIBRARIES network) +endif() if (tensorflow_ENABLE_GPU) + if (NOT WIN32) + # Default install paths for cuda libraries in Linux + # In some Linux distros, find_package(CUDA) seems to require CMAKE_LIBRARY_PATH to include cuda-lib paths + list(APPEND CMAKE_LIBRARY_PATH "${tensorflow_CUDA_LIBRARY_PATH}") + list(APPEND CMAKE_LIBRARY_PATH "${tensorflow_CUDA_LIBRARY_PATH}/stubs") + endif (NOT WIN32) + + find_package(CUDA 8.0 REQUIRED) + + # by default we assume compute cabability 3.5 and 5.2. If you change this change it in + # CUDA_NVCC_FLAGS and cuda_config.h below + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_30,code=\"sm_30,compute_30\";-gencode arch=compute_35,code=\"sm_35,compute_35\";-gencode arch=compute_52,code=\"sm_52,compute_52\") + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--include-path ${PROJECT_BINARY_DIR}/$\{build_configuration\};--expt-relaxed-constexpr) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-ftz=true) # Flush denormals to zero + set(CUDA_INCLUDE ${CUDA_TOOLKIT_TARGET_DIR} ${CUDA_TOOLKIT_TARGET_DIR}/extras/CUPTI/include) + include_directories(${CUDA_INCLUDE}) if (WIN32) - find_package(CUDA 8.0 REQUIRED) - - # by default we assume compute cabability 3.5 and 5.2. If you change this change it in - # CUDA_NVCC_FLAGS and cuda_config.h below - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_30,code=\"sm_30,compute_30\";-gencode arch=compute_35,code=\"sm_35,compute_35\";-gencode arch=compute_52,code=\"sm_52,compute_52\") - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--include-path ${PROJECT_BINARY_DIR}/$\{build_configuration\};--expt-relaxed-constexpr) - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-ftz=true) # Flush denormals to zero - set(CUDA_INCLUDE ${CUDA_TOOLKIT_TARGET_DIR} ${CUDA_TOOLKIT_TARGET_DIR}/extras/CUPTI/include) - include_directories(${CUDA_INCLUDE}) add_definitions(-DGOOGLE_CUDA=1 -DTF_EXTRA_CUDA_CAPABILITIES=3.0,3.5,5.2) + else (WIN32) + # Without these double quotes, cmake in Linux makes it "-DTF_EXTRA_CUDA_CAPABILITIES=3.0, -D3.5, -D5.2" for cc, which incurs build breaks + add_definitions(-DGOOGLE_CUDA=1 -D"TF_EXTRA_CUDA_CAPABILITIES=3.0,3.5,5.2") + endif (WIN32) + if (WIN32) # add cudnn if(NOT CUDNN_HOME) set(CUDNN_HOME ${CUDA_TOOLKIT_TARGET_DIR}) @@ -238,18 +286,48 @@ if (tensorflow_ENABLE_GPU) include_directories(${CUDNN_HOME}) set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_CUFFT_LIBRARIES} ${CUDA_curand_LIBRARY} ${CUDA_cupti_LIBRARY} ${CUDA_cusolver_LIBRARY} ${CUDNN_HOME}/lib/x64/cudnn.lib) + else (WIN32) + set(CUDNN_INCLUDE "${tensorflow_CUDNN_INCLUDE}") - # create cuda_config.h - FILE(WRITE ${tensorflow_source_dir}/third_party/gpus/cuda/cuda_config.h - "#ifndef CUDA_CUDA_CONFIG_H_\n" - "#define CUDA_CUDA_CONFIG_H_\n" - "#define TF_CUDA_CAPABILITIES CudaVersion(\"3.0\"),CudaVersion(\"3.5\"),CudaVersion(\"5.2\")\n" - "#define TF_CUDA_VERSION \"64_80\"\n" - "#define TF_CUDNN_VERSION \"64_6\"\n" - "#define TF_CUDA_TOOLKIT_PATH \"${CUDA_TOOLKIT_ROOT_DIR}\"\n" - "#endif // CUDA_CUDA_CONFIG_H_\n" - ) + find_library(nccl_STATIC_LIBRARY NAMES libnccl_static.a PATHS ${tensorflow_PATH_NCCL_STATIC_LIB} ${CUDA_TOOLKIT_ROOT_DIR}) + if (NOT nccl_STATIC_LIBRARY) + message(FATAL_ERROR "NCCL is required for GPU-build") + else (NOT nccl_STATIC_LIBRARY) + message("nccl-static: ${nccl_STATIC_LIBRARY}") + # something like /usr/lib64/libnccl_static.a + endif (NOT nccl_STATIC_LIBRARY) + + find_library(cudnn_STATIC_LIBRARY NAMES libcudnn_static.a PATHS ${tensorflow_PATH_CUDNN_STATIC_LIB} ${CUDA_TOOLKIT_ROOT_DIR}) + if (NOT cudnn_STATIC_LIBRARY) + message(FATAL_ERROR "CUDNN is required for GPU-build") + else (NOT cudnn_STATIC_LIBRARY) + message("cudnn-static: ${cudnn_STATIC_LIBRARY}") + endif (NOT cudnn_STATIC_LIBRARY) + + find_library(culibos_STATIC_LIBRARY NAMES libculibos.a PATHS ${tensorflow_PATH_STATIC_LIB} ${CUDA_TOOLKIT_ROOT_DIR}) + if (NOT culibos_STATIC_LIBRARY) + message(FATAL_ERROR "CULIBOS is required for GPU-build") + else (NOT culibos_STATIC_LIBRARY) + message("culibos-static: ${culibos_STATIC_LIBRARY}") + endif (NOT culibos_STATIC_LIBRARY) + + include_directories(${CUDNN_INCLUDE}) + set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_CUFFT_LIBRARIES} + ${CUDA_curand_LIBRARY} ${CUDA_cupti_LIBRARY} ${CUDA_cusolver_LIBRARY} ${cudnn_STATIC_LIBRARY} ${culibos_STATIC_LIBRARY} ${nccl_STATIC_LIBRARY}) + endif (WIN32) + + # create cuda_config.h + FILE(WRITE ${tensorflow_source_dir}/third_party/gpus/cuda/cuda_config.h + "#ifndef CUDA_CUDA_CONFIG_H_\n" + "#define CUDA_CUDA_CONFIG_H_\n" + "#define TF_CUDA_CAPABILITIES CudaVersion(\"3.0\"),CudaVersion(\"3.5\"),CudaVersion(\"5.2\")\n" + "#define TF_CUDA_VERSION \"64_80\"\n" + "#define TF_CUDNN_VERSION \"64_6\"\n" + "#define TF_CUDA_TOOLKIT_PATH \"${CUDA_TOOLKIT_ROOT_DIR}\"\n" + "#endif // CUDA_CUDA_CONFIG_H_\n" + ) + if (WIN32) # tf assumes in various places header files to be in cuda/include. On windows the cuda sdk # installs them under cuda/version/include and to avoid that we need to change tf we copy a # few files to cuda/include @@ -261,12 +339,25 @@ if (tensorflow_ENABLE_GPU) ${CUDA_TOOLKIT_TARGET_DIR}/include/cusolverDn.h DESTINATION ${tensorflow_source_dir}/third_party/gpus/cuda/include ) - include_directories(${tensorflow_source_dir}/third_party/gpus) - # add cuda libraries to tensorflow_EXTERNAL_LIBRARIES - list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CUDA_LIBRARIES}) + else(WIN32) + # Linux has slightly differnt install paths than Windows + FILE(COPY + ${CUDA_TOOLKIT_TARGET_DIR}/include/cuda.h ${CUDA_TOOLKIT_TARGET_DIR}/include/cuComplex.h + ${CUDA_TOOLKIT_TARGET_DIR}/include/cublas_v2.h ${CUDNN_INCLUDE}/cudnn.h + ${CUDA_TOOLKIT_TARGET_DIR}/include/cufft.h ${CUDA_TOOLKIT_TARGET_DIR}/include/curand.h + ${CUDA_TOOLKIT_TARGET_DIR}/include/cuda_runtime_api.h + ${CUDA_TOOLKIT_TARGET_DIR}/include/cusolverDn.h + DESTINATION ${tensorflow_source_dir}/third_party/gpus/cuda/include + ) + endif(WIN32) - # NOTE(mrry): Update these flags when the version of CUDA or cuDNN used - # in the default build is upgraded. + include_directories(${tensorflow_source_dir}/third_party/gpus) + # add cuda libraries to tensorflow_EXTERNAL_LIBRARIES + list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CUDA_LIBRARIES}) + + # NOTE(mrry): Update these flags when the version of CUDA or cuDNN used + # in the default build is upgraded. + if(WIN32) set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value msvcp_dll_name=msvcp140.dll cudart_dll_name=cudart64_80.dll @@ -275,7 +366,9 @@ if (tensorflow_ENABLE_GPU) cudnn_dll_name=cudnn64_6.dll cudnn_version_number=6) else(WIN32) - message(FATAL_ERROR "CMake GPU build is currently only supported on Windows.") + set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value + cuda_version_number=8.0 + cudnn_version_number=6) endif(WIN32) else(tensorflow_ENABLE_GPU) set(tensorflow_BUILD_INFO_FLAGS --build_config cpu --key_value @@ -293,9 +386,7 @@ include(tf_core_framework.cmake) # NOTE: Disabled until issue #3996 is fixed. # include(tf_stream_executor.cmake) if (tensorflow_ENABLE_GPU) - if (WIN32) include(tf_stream_executor.cmake) - endif() endif() include(tf_core_cpu.cmake) diff --git a/tensorflow/contrib/cmake/external/boringssl.cmake b/tensorflow/contrib/cmake/external/boringssl.cmake index dc27eadaca..cca8444e2a 100644 --- a/tensorflow/contrib/cmake/external/boringssl.cmake +++ b/tensorflow/contrib/cmake/external/boringssl.cmake @@ -39,8 +39,12 @@ ExternalProject_Add(boringssl # BUILD_IN_SOURCE 1 INSTALL_COMMAND "" CMAKE_CACHE_ARGS + if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + else() + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF + endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON ) diff --git a/tensorflow/contrib/cmake/external/jsoncpp.cmake b/tensorflow/contrib/cmake/external/jsoncpp.cmake index 5127d7e8f7..d2ae4c76e8 100644 --- a/tensorflow/contrib/cmake/external/jsoncpp.cmake +++ b/tensorflow/contrib/cmake/external/jsoncpp.cmake @@ -42,8 +42,12 @@ ExternalProject_Add(jsoncpp BUILD_IN_SOURCE 1 INSTALL_COMMAND "" CMAKE_CACHE_ARGS + if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + else() + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF + endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON ) diff --git a/tensorflow/contrib/cmake/external/lmdb.cmake b/tensorflow/contrib/cmake/external/lmdb.cmake index 79971b7cfc..e41384f023 100644 --- a/tensorflow/contrib/cmake/external/lmdb.cmake +++ b/tensorflow/contrib/cmake/external/lmdb.cmake @@ -29,10 +29,14 @@ ExternalProject_Add(lmdb INSTALL_DIR ${lmdb_INSTALL} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" CMAKE_CACHE_ARGS + if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + else() + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF + endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DCMAKE_INSTALL_PREFIX:STRING=${lmdb_INSTALL} - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON ) if(WIN32) diff --git a/tensorflow/contrib/cmake/external/png.cmake b/tensorflow/contrib/cmake/external/png.cmake index 2b2bd47d1c..aad6618f52 100644 --- a/tensorflow/contrib/cmake/external/png.cmake +++ b/tensorflow/contrib/cmake/external/png.cmake @@ -41,10 +41,14 @@ ExternalProject_Add(png INSTALL_DIR ${png_INSTALL} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" CMAKE_CACHE_ARGS + if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + else() + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF + endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DCMAKE_INSTALL_PREFIX:STRING=${png_INSTALL} - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DZLIB_ROOT:STRING=${ZLIB_INSTALL} ) diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake index 1e300e21df..b53857a47b 100644 --- a/tensorflow/contrib/cmake/external/protobuf.cmake +++ b/tensorflow/contrib/cmake/external/protobuf.cmake @@ -44,8 +44,12 @@ ExternalProject_Add(protobuf ${PROTOBUF_ADDITIONAL_CMAKE_OPTIONS} INSTALL_COMMAND "" CMAKE_CACHE_ARGS + if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + else() + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF + endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DZLIB_ROOT:STRING=${ZLIB_INSTALL} ) diff --git a/tensorflow/contrib/cmake/external/re2.cmake b/tensorflow/contrib/cmake/external/re2.cmake index cb4ec9c2de..b56f4b0898 100644 --- a/tensorflow/contrib/cmake/external/re2.cmake +++ b/tensorflow/contrib/cmake/external/re2.cmake @@ -38,7 +38,11 @@ ExternalProject_Add(re2 BUILD_IN_SOURCE 1 DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" CMAKE_CACHE_ARGS + if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + else() + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF + endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_INSTALL_PREFIX:STRING=${re2_INSTALL} - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -) \ No newline at end of file +) diff --git a/tensorflow/contrib/cmake/external/snappy.cmake b/tensorflow/contrib/cmake/external/snappy.cmake index 2d2451521c..926c271fd9 100644 --- a/tensorflow/contrib/cmake/external/snappy.cmake +++ b/tensorflow/contrib/cmake/external/snappy.cmake @@ -40,11 +40,15 @@ ExternalProject_Add(snappy LOG_CONFIGURE ON LOG_BUILD ON CMAKE_CACHE_ARGS + if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + else() + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF + endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DSNAPPY_BUILD_TESTS:BOOL=OFF - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON ) # actually enables snappy in the source code -add_definitions(-DTF_USE_SNAPPY) +add_definitions(-DTF_USE_SNAPPY) \ No newline at end of file diff --git a/tensorflow/contrib/cmake/external/sqlite.cmake b/tensorflow/contrib/cmake/external/sqlite.cmake index 1770dcb1fd..785039a469 100644 --- a/tensorflow/contrib/cmake/external/sqlite.cmake +++ b/tensorflow/contrib/cmake/external/sqlite.cmake @@ -53,9 +53,13 @@ else() INSTALL_DIR ${sqlite_INSTALL} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" CMAKE_CACHE_ARGS + if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + else() + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF + endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_INSTALL_PREFIX:STRING=${sqlite_INSTALL} ) diff --git a/tensorflow/contrib/cmake/external/zlib.cmake b/tensorflow/contrib/cmake/external/zlib.cmake index c8af611e1e..f10f84336e 100644 --- a/tensorflow/contrib/cmake/external/zlib.cmake +++ b/tensorflow/contrib/cmake/external/zlib.cmake @@ -42,9 +42,13 @@ ExternalProject_Add(zlib BUILD_IN_SOURCE 1 DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" CMAKE_CACHE_ARGS + if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + else() + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF + endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_INSTALL_PREFIX:STRING=${ZLIB_INSTALL} - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON ) # put zlib includes in the directory where they are expected diff --git a/tensorflow/contrib/cmake/tf_cc_ops.cmake b/tensorflow/contrib/cmake/tf_cc_ops.cmake index 45eeb11062..6e2ac203f9 100644 --- a/tensorflow/contrib/cmake/tf_cc_ops.cmake +++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake @@ -148,7 +148,11 @@ list(REMOVE_ITEM tf_cc_srcs ${tf_cc_test_srcs}) add_library(tf_cc OBJECT ${tf_cc_srcs}) add_dependencies(tf_cc tf_cc_framework tf_cc_ops) -set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.lib") +if (WIN32) + set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.lib") +else (WIN32) + set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so") +endif (WIN32) add_custom_target(tf_extension_ops) function(AddUserOps) @@ -164,15 +168,13 @@ function(AddUserOps) # create shared library from source and cuda obj add_library(${_AT_TARGET} SHARED ${_AT_SOURCES} ${gpu_lib}) target_link_libraries(${_AT_TARGET} ${pywrap_tensorflow_lib}) - if(WIN32) - if (tensorflow_ENABLE_GPU AND _AT_GPUSOURCES) - # some ops call out to cuda directly; need to link libs for the cuda dlls - target_link_libraries(${_AT_TARGET} ${CUDA_LIBRARIES}) - endif() - if (_AT_DISTCOPY) - add_custom_command(TARGET ${_AT_TARGET} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy $ ${_AT_DISTCOPY}/) - endif() + if (tensorflow_ENABLE_GPU AND _AT_GPUSOURCES) + # some ops call out to cuda directly; need to link libs for the cuda dlls + target_link_libraries(${_AT_TARGET} ${CUDA_LIBRARIES}) + endif() + if (_AT_DISTCOPY) + add_custom_command(TARGET ${_AT_TARGET} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy $ ${_AT_DISTCOPY}/) endif() if (_AT_DEPENDS) add_dependencies(${_AT_TARGET} ${_AT_DEPENDS}) @@ -180,9 +182,19 @@ function(AddUserOps) # make sure TF_COMPILE_LIBRARY is not defined for this target get_target_property(target_compile_flags ${_AT_TARGET} COMPILE_FLAGS) if(target_compile_flags STREQUAL "target_compile_flags-NOTFOUND") - set(target_compile_flags "/UTF_COMPILE_LIBRARY") + if (WIN32) + set(target_compile_flags "/UTF_COMPILE_LIBRARY") + else (WIN32) + # gcc uses UTF as default + set(target_compile_flags "-finput-charset=UTF-8") + endif (WIN32) else() - set(target_compile_flags "${target_compile_flags} /UTF_COMPILE_LIBRARY") + if (WIN32) + set(target_compile_flags "${target_compile_flags} /UTF_COMPILE_LIBRARY") + else (WIN32) + # gcc uses UTF as default + set(target_compile_flags "${target_compile_flags} -finput-charset=UTF-8") + endif (WIN32) endif() set_target_properties(${_AT_TARGET} PROPERTIES COMPILE_FLAGS ${target_compile_flags}) add_dependencies(tf_extension_ops ${_AT_TARGET}) diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake index d6b8990664..2d015908a8 100644 --- a/tensorflow/contrib/cmake/tf_core_kernels.cmake +++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake @@ -179,6 +179,7 @@ file(GLOB_RECURSE tf_core_gpu_kernels_srcs "${tensorflow_source_dir}/tensorflow/contrib/image/kernels/*.cu.cc" "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/*.cu.cc" "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/*.cu.cc" + "${tensorflow_source_dir}/tensorflow/contrib/resampler/kernels/*.cu.cc" ) if(WIN32 AND tensorflow_ENABLE_GPU) @@ -202,16 +203,16 @@ endif(WIN32 AND tensorflow_ENABLE_GPU) add_library(tf_core_kernels OBJECT ${tf_core_kernels_srcs}) add_dependencies(tf_core_kernels tf_core_cpu) -if(WIN32) +if (WIN32) target_compile_options(tf_core_kernels PRIVATE /MP) - if (tensorflow_ENABLE_GPU) - set_source_files_properties(${tf_core_gpu_kernels_srcs} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ) - set(tf_core_gpu_kernels_lib tf_core_gpu_kernels) - cuda_add_library(${tf_core_gpu_kernels_lib} ${tf_core_gpu_kernels_srcs}) - set_target_properties(${tf_core_gpu_kernels_lib} - PROPERTIES DEBUG_POSTFIX "" - COMPILE_FLAGS "${TF_REGULAR_CXX_FLAGS}" - ) - add_dependencies(${tf_core_gpu_kernels_lib} tf_core_cpu) - endif() +endif (WIN32) +if (tensorflow_ENABLE_GPU) + set_source_files_properties(${tf_core_gpu_kernels_srcs} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ) + set(tf_core_gpu_kernels_lib tf_core_gpu_kernels) + cuda_add_library(${tf_core_gpu_kernels_lib} ${tf_core_gpu_kernels_srcs}) + set_target_properties(${tf_core_gpu_kernels_lib} + PROPERTIES DEBUG_POSTFIX "" + COMPILE_FLAGS "${TF_REGULAR_CXX_FLAGS}" + ) + add_dependencies(${tf_core_gpu_kernels_lib} tf_core_cpu) endif() diff --git a/tensorflow/contrib/cmake/tf_label_image_example.cmake b/tensorflow/contrib/cmake/tf_label_image_example.cmake index 0d3a4699eb..7f2f60b089 100644 --- a/tensorflow/contrib/cmake/tf_label_image_example.cmake +++ b/tensorflow/contrib/cmake/tf_label_image_example.cmake @@ -34,3 +34,8 @@ target_link_libraries(tf_label_image_example PUBLIC ${tf_core_gpu_kernels_lib} ${tensorflow_EXTERNAL_LIBRARIES} ) + +install(TARGETS tf_label_image_example + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) \ No newline at end of file diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 9b863f7bc6..61b3fd715d 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -715,6 +715,9 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name) set(require_shape_fn 1) endif() + get_filename_component(GENERATE_PYTHON_OP_LIB_MKDIRPATH ${GENERATE_PYTHON_OP_LIB_DESTINATION} PATH) + file(MAKE_DIRECTORY ${GENERATE_PYTHON_OP_LIB_MKDIRPATH}) + # Create a C++ executable that links in the appropriate op # registrations and generates Python wrapper code based on the # registered ops. @@ -743,6 +746,7 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name) ${GENERATE_PYTHON_OP_LIB_DESTINATION} PARENT_SCOPE) endfunction() +GENERATE_PYTHON_OP_LIB("audio_ops") GENERATE_PYTHON_OP_LIB("array_ops") GENERATE_PYTHON_OP_LIB("bitwise_ops") GENERATE_PYTHON_OP_LIB("math_ops") @@ -987,7 +991,7 @@ add_library(pywrap_tensorflow_internal SHARED $ $<$:$> $ - $<$:$> + $<$:$<$:$>> $<$:$> ${pywrap_tensorflow_deffile} ) @@ -1063,25 +1067,23 @@ if(WIN32) DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/rnn/python/ops/) endif(WIN32) -if(WIN32) - # include contrib/seq2seq as .so - # - set(tf_beam_search_srcs - "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc" - "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.h" - "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/ops/beam_search_ops.cc" - ) +# include contrib/seq2seq as .so +# +set(tf_beam_search_srcs + "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc" + "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.h" + "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/ops/beam_search_ops.cc" +) - set(tf_beam_search_gpu_srcs - "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops_gpu.cu.cc" - ) +set(tf_beam_search_gpu_srcs + "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops_gpu.cu.cc" +) - AddUserOps(TARGET _beam_search_ops - SOURCES "${tf_beam_search_srcs}" - GPUSOURCES ${tf_beam_search_gpu_srcs} - DEPENDS pywrap_tensorflow_internal tf_python_ops - DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/seq2seq/python/ops/) -endif(WIN32) +AddUserOps(TARGET _beam_search_ops + SOURCES "${tf_beam_search_srcs}" + GPUSOURCES ${tf_beam_search_gpu_srcs} + DEPENDS pywrap_tensorflow_internal tf_python_ops + DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/seq2seq/python/ops/) ############################################################ # Build a PIP package containing the TensorFlow runtime. diff --git a/tensorflow/contrib/cmake/tf_shared_lib.cmake b/tensorflow/contrib/cmake/tf_shared_lib.cmake index 9bf45bab30..3e3fe0cdfa 100644 --- a/tensorflow/contrib/cmake/tf_shared_lib.cmake +++ b/tensorflow/contrib/cmake/tf_shared_lib.cmake @@ -73,7 +73,7 @@ add_library(tensorflow SHARED $ $<$:$> $ - $<$:$> + $<$:$<$:$>> $<$:$> ${tensorflow_deffile} ) @@ -94,3 +94,46 @@ endif() if(WIN32) add_dependencies(tensorflow tensorflow_static) endif(WIN32) + +install(TARGETS tensorflow + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) + +# install necessary headers +# tensorflow headers +install(DIRECTORY ${tensorflow_source_dir}/tensorflow/cc/ + DESTINATION include/tensorflow/cc + FILES_MATCHING PATTERN "*.h") +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tensorflow/cc/ + DESTINATION include/tensorflow/cc + FILES_MATCHING PATTERN "*.h") +install(DIRECTORY ${tensorflow_source_dir}/tensorflow/core/ + DESTINATION include/tensorflow/core + FILES_MATCHING PATTERN "*.h") +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tensorflow/core/ + DESTINATION include/tensorflow/core + FILES_MATCHING PATTERN "*.h") +install(DIRECTORY ${tensorflow_source_dir}/tensorflow/stream_executor/ + DESTINATION include/tensorflow/stream_executor + FILES_MATCHING PATTERN "*.h") +# google protobuf headers +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src/google/ + DESTINATION include/google + FILES_MATCHING PATTERN "*.h") +# nsync headers +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/nsync/ + DESTINATION include/external/nsync + FILES_MATCHING PATTERN "*.h") +# Eigen directory +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen/Eigen/ + DESTINATION include/Eigen) +# external directory +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/eigen_archive/ + DESTINATION include/external/eigen_archive) +# third_party eigen directory +install(DIRECTORY ${tensorflow_source_dir}/third_party/eigen3/ + DESTINATION include/third_party/eigen3) +# unsupported Eigen directory +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen/unsupported/Eigen/ + DESTINATION include/unsupported/Eigen) diff --git a/tensorflow/contrib/cmake/tf_stream_executor.cmake b/tensorflow/contrib/cmake/tf_stream_executor.cmake index 3d84f1ebb9..8d95f0d3e8 100644 --- a/tensorflow/contrib/cmake/tf_stream_executor.cmake +++ b/tensorflow/contrib/cmake/tf_stream_executor.cmake @@ -74,6 +74,9 @@ endif() #) #list(REMOVE_ITEM tf_stream_executor_srcs ${tf_stream_executor_test_srcs}) +if (NOT WIN32) + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lgomp") +endif (NOT WIN32) add_library(tf_stream_executor OBJECT ${tf_stream_executor_srcs}) add_dependencies(tf_stream_executor diff --git a/tensorflow/contrib/cmake/tf_tools.cmake b/tensorflow/contrib/cmake/tf_tools.cmake index 6ef9598963..cb58a2e7df 100644 --- a/tensorflow/contrib/cmake/tf_tools.cmake +++ b/tensorflow/contrib/cmake/tf_tools.cmake @@ -73,7 +73,7 @@ add_executable(${transform_graph} $ $ $ - $<$:$> + $<$:$<$:$>> $<$:$> ) @@ -95,7 +95,7 @@ add_executable(${summarize_graph} $ $ $ - $<$:$> + $<$:$<$:$>> $<$:$> ) @@ -117,7 +117,7 @@ add_executable(${compare_graphs} $ $ $ - $<$:$> + $<$:$<$:$>> $<$:$> ) @@ -138,7 +138,7 @@ add_executable(${benchmark_model} $ $ $ - $<$:$> + $<$:$<$:$>> $<$:$> ) @@ -147,3 +147,8 @@ target_link_libraries(${benchmark_model} PUBLIC ${tf_core_gpu_kernels_lib} ${tensorflow_EXTERNAL_LIBRARIES} ) + +install(TARGETS ${transform_graph} ${summarize_graph} ${compare_graphs} ${benchmark_model} + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) diff --git a/tensorflow/contrib/cmake/tf_tutorials.cmake b/tensorflow/contrib/cmake/tf_tutorials.cmake index 858e7dda92..e63fccc181 100644 --- a/tensorflow/contrib/cmake/tf_tutorials.cmake +++ b/tensorflow/contrib/cmake/tf_tutorials.cmake @@ -34,3 +34,8 @@ target_link_libraries(tf_tutorials_example_trainer PUBLIC ${tf_core_gpu_kernels_lib} ${tensorflow_EXTERNAL_LIBRARIES} ) + +install(TARGETS tf_tutorials_example_trainer + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) diff --git a/tensorflow/contrib/crf/python/ops/crf.py b/tensorflow/contrib/crf/python/ops/crf.py index 4282be5ec8..1612c75179 100644 --- a/tensorflow/contrib/crf/python/ops/crf.py +++ b/tensorflow/contrib/crf/python/ops/crf.py @@ -363,8 +363,8 @@ class CrfDecodeForwardRnnCell(rnn_cell.RNNCell): scope: Unused variable scope of this cell. Returns: - backpointers: [batch_size, num_tags], containing backpointers. - new_state: [batch_size, num_tags], containing new score values. + backpointers: A [batch_size, num_tags] matrix of backpointers. + new_state: A [batch_size, num_tags] matrix of new score values. """ # For simplicity, in shape comments, denote: # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output). @@ -404,8 +404,9 @@ class CrfDecodeBackwardRnnCell(rnn_cell.RNNCell): """Build the CrfDecodeBackwardRnnCell. Args: - inputs: [batch_size, num_tags], backpointer of next step (in time order). - state: [batch_size, 1], next position's tag index. + inputs: A [batch_size, num_tags] matrix of + backpointer of next step (in time order). + state: A [batch_size, 1] matrix of tag index of next step. scope: Unused variable scope of this cell. Returns: @@ -429,16 +430,16 @@ def crf_decode(potentials, transition_params, sequence_length): This is a function for tensor. Args: - potentials: A [batch_size, max_seq_len, num_tags] tensor, matrix of + potentials: A [batch_size, max_seq_len, num_tags] tensor of unary potentials. - transition_params: A [num_tags, num_tags] tensor, matrix of + transition_params: A [num_tags, num_tags] matrix of binary potentials. - sequence_length: A [batch_size] tensor, containing sequence lengths. + sequence_length: A [batch_size] vector of true sequence lengths. Returns: - decode_tags: A [batch_size, max_seq_len] tensor, with dtype tf.int32. + decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`. Contains the highest scoring tag indices. - best_score: A [batch_size] tensor, containing the score of decode_tags. + best_score: A [batch_size] vector, containing the score of `decode_tags`. """ # For simplicity, in shape comments, denote: # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output). diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 1923c0586a..dd0457d54b 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -11,6 +11,7 @@ py_test( size = "small", srcs = ["batch_dataset_op_test.py"], srcs_version = "PY2AND3", + tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", @@ -372,6 +373,7 @@ py_test( size = "small", srcs = ["sequence_dataset_op_test.py"], srcs_version = "PY2AND3", + tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", @@ -449,6 +451,7 @@ py_test( size = "small", srcs = ["zip_dataset_op_test.py"], srcs_version = "PY2AND3", + tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", @@ -465,7 +468,10 @@ py_test( size = "small", srcs = ["prefetching_ops_test.py"], srcs_version = "PY2AND3", - tags = ["no_oss"], # b/68785503 + tags = [ + "manual", + "no_oss", # b/68785503 + ], deps = [ "//tensorflow/contrib/data/python/ops:prefetching_py", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 2dc8ad9483..145b9495ff 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -140,6 +140,23 @@ cuda_py_test( ], ) +cuda_py_test( + name = "cauchy_test", + size = "medium", + srcs = ["python/kernel_tests/cauchy_test.py"], + additional_deps = [ + ":distributions_py", + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:gradients", + "//tensorflow/python:platform_test", + "//tensorflow/python:variables", + ], +) + cuda_py_test( name = "chi2_test", srcs = ["python/kernel_tests/chi2_test.py"], diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py index 16f6533e57..0d12d83893 100644 --- a/tensorflow/contrib/distributions/__init__.py +++ b/tensorflow/contrib/distributions/__init__.py @@ -24,6 +24,7 @@ from __future__ import print_function from tensorflow.contrib.distributions.python.ops import bijectors from tensorflow.contrib.distributions.python.ops.binomial import * +from tensorflow.contrib.distributions.python.ops.cauchy import * from tensorflow.contrib.distributions.python.ops.chi2 import * from tensorflow.contrib.distributions.python.ops.conditional_distribution import * from tensorflow.contrib.distributions.python.ops.conditional_transformed_distribution import * @@ -83,6 +84,7 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ 'bijectors', + 'Cauchy', 'ConditionalDistribution', 'ConditionalTransformedDistribution', 'FULLY_REPARAMETERIZED', diff --git a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py new file mode 100644 index 0000000000..7f7697357c --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py @@ -0,0 +1,437 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Cauchy.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import importlib +import numpy as np + +from tensorflow.contrib.distributions.python.ops import cauchy as cauchy_lib +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import variables +from tensorflow.python.platform import test +from tensorflow.python.platform import tf_logging + + +def try_import(name): # pylint: disable=invalid-name + module = None + try: + module = importlib.import_module(name) + except ImportError as e: + tf_logging.warning("Could not import %s: %s" % (name, str(e))) + return module + +stats = try_import("scipy.stats") + + +class CauchyTest(test.TestCase): + + def setUp(self): + self._rng = np.random.RandomState(123) + + def assertAllFinite(self, tensor): + is_finite = np.isfinite(tensor.eval()) + all_true = np.ones_like(is_finite, dtype=np.bool) + self.assertAllEqual(all_true, is_finite) + + def _testParamShapes(self, sample_shape, expected): + with self.test_session(): + param_shapes = cauchy_lib.Cauchy.param_shapes(sample_shape) + loc_shape, scale_shape = param_shapes["loc"], param_shapes["scale"] + self.assertAllEqual(expected, loc_shape.eval()) + self.assertAllEqual(expected, scale_shape.eval()) + loc = array_ops.zeros(loc_shape) + scale = array_ops.ones(scale_shape) + self.assertAllEqual( + expected, + array_ops.shape(cauchy_lib.Cauchy(loc, scale).sample()).eval()) + + def _testParamStaticShapes(self, sample_shape, expected): + param_shapes = cauchy_lib.Cauchy.param_static_shapes(sample_shape) + loc_shape, scale_shape = param_shapes["loc"], param_shapes["scale"] + self.assertEqual(expected, loc_shape) + self.assertEqual(expected, scale_shape) + + def testParamShapes(self): + sample_shape = [10, 3, 4] + self._testParamShapes(sample_shape, sample_shape) + self._testParamShapes(constant_op.constant(sample_shape), sample_shape) + + def testParamStaticShapes(self): + sample_shape = [10, 3, 4] + self._testParamStaticShapes(sample_shape, sample_shape) + self._testParamStaticShapes( + tensor_shape.TensorShape(sample_shape), sample_shape) + + def testCauchyLogPDF(self): + with self.test_session(): + batch_size = 6 + loc = constant_op.constant([3.0] * batch_size) + scale = constant_op.constant([np.sqrt(10.0)] * batch_size) + x = np.array([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0], dtype=np.float32) + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + log_pdf = cauchy.log_prob(x) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), + log_pdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), + log_pdf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, log_pdf.shape) + self.assertAllEqual(cauchy.batch_shape, log_pdf.eval().shape) + + pdf = cauchy.prob(x) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, pdf.shape) + self.assertAllEqual(cauchy.batch_shape, pdf.eval().shape) + + if not stats: + return + expected_log_pdf = stats.cauchy(loc.eval(), scale.eval()).logpdf(x) + self.assertAllClose(expected_log_pdf, log_pdf.eval()) + self.assertAllClose(np.exp(expected_log_pdf), pdf.eval()) + + def testCauchyLogPDFMultidimensional(self): + with self.test_session(): + batch_size = 6 + loc = constant_op.constant([[3.0, -3.0]] * batch_size) + scale = constant_op.constant([[np.sqrt(10.0), np.sqrt(15.0)]] * + batch_size) + x = np.array([[-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]], dtype=np.float32).T + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + log_pdf = cauchy.log_prob(x) + log_pdf_values = log_pdf.eval() + self.assertEqual(log_pdf.shape, (6, 2)) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), + log_pdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), + log_pdf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, log_pdf.shape) + self.assertAllEqual(cauchy.batch_shape, log_pdf.eval().shape) + + pdf = cauchy.prob(x) + pdf_values = pdf.eval() + self.assertEqual(pdf.shape, (6, 2)) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf_values.shape) + self.assertAllEqual(cauchy.batch_shape, pdf.shape) + self.assertAllEqual(cauchy.batch_shape, pdf_values.shape) + + if not stats: + return + expected_log_pdf = stats.cauchy(loc.eval(), scale.eval()).logpdf(x) + self.assertAllClose(expected_log_pdf, log_pdf_values) + self.assertAllClose(np.exp(expected_log_pdf), pdf_values) + + def testCauchyCDF(self): + with self.test_session(): + batch_size = 50 + loc = self._rng.randn(batch_size) + scale = self._rng.rand(batch_size) + 1.0 + x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64) + + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + cdf = cauchy.cdf(x) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, cdf.shape) + self.assertAllEqual(cauchy.batch_shape, cdf.eval().shape) + if not stats: + return + expected_cdf = stats.cauchy(loc, scale).cdf(x) + self.assertAllClose(expected_cdf, cdf.eval(), atol=0) + + def testCauchySurvivalFunction(self): + with self.test_session(): + batch_size = 50 + loc = self._rng.randn(batch_size) + scale = self._rng.rand(batch_size) + 1.0 + x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64) + + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + sf = cauchy.survival_function(x) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, sf.shape) + self.assertAllEqual(cauchy.batch_shape, sf.eval().shape) + if not stats: + return + expected_sf = stats.cauchy(loc, scale).sf(x) + self.assertAllClose(expected_sf, sf.eval(), atol=0) + + def testCauchyLogCDF(self): + with self.test_session(): + batch_size = 50 + loc = self._rng.randn(batch_size) + scale = self._rng.rand(batch_size) + 1.0 + x = np.linspace(-100.0, 10.0, batch_size).astype(np.float64) + + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + cdf = cauchy.log_cdf(x) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, cdf.shape) + self.assertAllEqual(cauchy.batch_shape, cdf.eval().shape) + + if not stats: + return + expected_cdf = stats.cauchy(loc, scale).logcdf(x) + self.assertAllClose(expected_cdf, cdf.eval(), atol=0, rtol=1e-5) + + def testFiniteGradientAtDifficultPoints(self): + for dtype in [np.float32, np.float64]: + g = ops.Graph() + with g.as_default(): + loc = variables.Variable(dtype(0.0)) + scale = variables.Variable(dtype(1.0)) + dist = cauchy_lib.Cauchy(loc=loc, scale=scale) + x = np.array([-100., -20., -5., 0., 5., 20., 100.]).astype(dtype) + for func in [ + dist.cdf, dist.log_cdf, dist.survival_function, + dist.log_survival_function, dist.log_prob, dist.prob + ]: + value = func(x) + grads = gradients_impl.gradients(value, [loc, scale]) + with self.test_session(graph=g): + variables.global_variables_initializer().run() + self.assertAllFinite(value) + self.assertAllFinite(grads[0]) + self.assertAllFinite(grads[1]) + + def testCauchyLogSurvivalFunction(self): + with self.test_session(): + batch_size = 50 + loc = self._rng.randn(batch_size) + scale = self._rng.rand(batch_size) + 1.0 + x = np.linspace(-10.0, 100.0, batch_size).astype(np.float64) + + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + sf = cauchy.log_survival_function(x) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, sf.shape) + self.assertAllEqual(cauchy.batch_shape, sf.eval().shape) + + if not stats: + return + expected_sf = stats.cauchy(loc, scale).logsf(x) + self.assertAllClose(expected_sf, sf.eval(), atol=0, rtol=1e-5) + + def testCauchyEntropy(self): + with self.test_session(): + loc = np.array([1.0, 1.0, 1.0]) + scale = np.array([[1.0, 2.0, 3.0]]) + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + entropy = cauchy.entropy() + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), + entropy.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), + entropy.eval().shape) + self.assertAllEqual(cauchy.batch_shape, entropy.shape) + self.assertAllEqual(cauchy.batch_shape, entropy.eval().shape) + + if not stats: + return + expected_entropy = stats.cauchy(loc, scale).entropy() + self.assertAllClose(expected_entropy, entropy.eval()) + + def testCauchyMode(self): + with self.test_session(): + # Mu will be broadcast to [7, 7, 7]. + loc = [7.] + scale = [11., 12., 13.] + + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + self.assertAllEqual((3,), cauchy.mode().shape) + self.assertAllEqual([7., 7, 7], cauchy.mode().eval()) + + def testCauchyMean(self): + with self.test_session(): + loc = [1., 2., 3.] + scale = [7.] + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + self.assertAllEqual((3,), cauchy.mean().shape) + self.assertAllEqual([np.nan] * 3, cauchy.mean().eval()) + + def testCauchyNanMean(self): + with self.test_session(): + loc = [1., 2., 3.] + scale = [7.] + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False) + + with self.assertRaises(ValueError): + cauchy.mean().eval() + + def testCauchyQuantile(self): + with self.test_session(): + batch_size = 50 + loc = self._rng.randn(batch_size) + scale = self._rng.rand(batch_size) + 1.0 + p = np.linspace(0.000001, 0.999999, batch_size).astype(np.float64) + + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + x = cauchy.quantile(p) + + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), x.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), x.eval().shape) + self.assertAllEqual(cauchy.batch_shape, x.shape) + self.assertAllEqual(cauchy.batch_shape, x.eval().shape) + + if not stats: + return + expected_x = stats.cauchy(loc, scale).ppf(p) + self.assertAllClose(expected_x, x.eval(), atol=0.) + + def testCauchyVariance(self): + with self.test_session(): + # scale will be broadcast to [7, 7, 7] + loc = [1., 2., 3.] + scale = [7.] + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + self.assertAllEqual((3,), cauchy.variance().shape) + self.assertAllEqual([np.nan] * 3, cauchy.variance().eval()) + + def testCauchyNanVariance(self): + with self.test_session(): + # scale will be broadcast to [7, 7, 7] + loc = [1., 2., 3.] + scale = [7.] + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False) + + with self.assertRaises(ValueError): + cauchy.variance().eval() + + def testCauchyStandardDeviation(self): + with self.test_session(): + # scale will be broadcast to [7, 7, 7] + loc = [1., 2., 3.] + scale = [7.] + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + self.assertAllEqual((3,), cauchy.stddev().shape) + self.assertAllEqual([np.nan] * 3, cauchy.stddev().eval()) + + def testCauchyNanStandardDeviation(self): + with self.test_session(): + # scale will be broadcast to [7, 7, 7] + loc = [1., 2., 3.] + scale = [7.] + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False) + + with self.assertRaises(ValueError): + cauchy.stddev().eval() + + def testCauchySample(self): + with self.test_session(): + loc = constant_op.constant(3.0) + scale = constant_op.constant(1.0) + loc_v = 3.0 + n = constant_op.constant(100000) + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + samples = cauchy.sample(n) + sample_values = samples.eval() + + self.assertEqual(sample_values.shape, (100000,)) + self.assertAllClose(np.median(sample_values), loc_v, atol=1e-1) + + expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate( + tensor_shape.TensorShape(cauchy.batch_shape_tensor().eval())) + + self.assertAllEqual(expected_shape, samples.shape) + self.assertAllEqual(expected_shape, sample_values.shape) + + expected_shape = (tensor_shape.TensorShape( + [n.eval()]).concatenate(cauchy.batch_shape)) + + self.assertAllEqual(expected_shape, samples.shape) + self.assertAllEqual(expected_shape, sample_values.shape) + + def testCauchySampleMultiDimensional(self): + with self.test_session(): + batch_size = 2 + loc = constant_op.constant([[3.0, -3.0]] * batch_size) + scale = constant_op.constant([[0.5, 1.0]] * batch_size) + loc_v = [3.0, -3.0] + n = constant_op.constant(100000) + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + samples = cauchy.sample(n) + sample_values = samples.eval() + self.assertEqual(samples.shape, (100000, batch_size, 2)) + self.assertAllClose(np.median(sample_values[:, 0, 0]), + loc_v[0], atol=1e-1) + self.assertAllClose(np.median(sample_values[:, 0, 1]), + loc_v[1], atol=1e-1) + + expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate( + tensor_shape.TensorShape(cauchy.batch_shape_tensor().eval())) + self.assertAllEqual(expected_shape, samples.shape) + self.assertAllEqual(expected_shape, sample_values.shape) + + expected_shape = (tensor_shape.TensorShape( + [n.eval()]).concatenate(cauchy.batch_shape)) + self.assertAllEqual(expected_shape, samples.shape) + self.assertAllEqual(expected_shape, sample_values.shape) + + def testCauchyNegativeLocFails(self): + with self.test_session(): + cauchy = cauchy_lib.Cauchy(loc=[1.], scale=[-5.], validate_args=True) + with self.assertRaisesOpError("Condition x > 0 did not hold"): + cauchy.mode().eval() + + def testCauchyShape(self): + with self.test_session(): + loc = constant_op.constant([-3.0] * 5) + scale = constant_op.constant(11.0) + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + self.assertEqual(cauchy.batch_shape_tensor().eval(), [5]) + self.assertEqual(cauchy.batch_shape, tensor_shape.TensorShape([5])) + self.assertAllEqual(cauchy.event_shape_tensor().eval(), []) + self.assertEqual(cauchy.event_shape, tensor_shape.TensorShape([])) + + def testCauchyShapeWithPlaceholders(self): + loc = array_ops.placeholder(dtype=dtypes.float32) + scale = array_ops.placeholder(dtype=dtypes.float32) + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + with self.test_session() as sess: + # get_batch_shape should return an "" tensor. + self.assertEqual(cauchy.batch_shape, tensor_shape.TensorShape(None)) + self.assertEqual(cauchy.event_shape, ()) + self.assertAllEqual(cauchy.event_shape_tensor().eval(), []) + self.assertAllEqual( + sess.run(cauchy.batch_shape_tensor(), + feed_dict={loc: 5.0, + scale: [1.0, 2.0]}), [2]) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distributions/python/ops/cauchy.py b/tensorflow/contrib/distributions/python/ops/cauchy.py new file mode 100644 index 0000000000..a17bb091f6 --- /dev/null +++ b/tensorflow/contrib/distributions/python/ops/cauchy.py @@ -0,0 +1,223 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""The Cauchy distribution class.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.ops.distributions import distribution + + +__all__ = [ + "Cauchy", +] + + +class Cauchy(distribution.Distribution): + """The Cauchy distribution with location `loc` and scale `scale`. + + #### Mathematical details + + The probability density function (pdf) is, + + ```none + pdf(x; loc, scale) = 1 / (pi * scale * (1 + ((x - loc) / scale)**2)) + ``` + where `loc` is the location, and `scale` is the scale. + + The Cauchy distribution is a member of the [location-scale family]( + https://en.wikipedia.org/wiki/Location-scale_family), i.e. + + ```none + X ~ Cauchy(loc=0, scale=1) + Y ~ Cauchy(loc=loc, scale=scale) + Y = loc + scale * X + ``` + + #### Examples + + Examples of initialization of one or a batch of distributions. + + ```python + # Define a single scalar Cauchy distribution. + dist = Cauchy(loc=0., scale=3.) + + # Evaluate the cdf at 1, returning a scalar. + dist.cdf(1.) + + # Define a batch of two scalar valued Cauchy distributions. + dist = Cauchy(loc=[1, 2.], scale=[11, 22.]) + + # Evaluate the pdf of the first distribution on 0, and the second on 1.5, + # returning a length two tensor. + dist.prob([0, 1.5]) + + # Get 3 samples, returning a 3 x 2 tensor. + dist.sample([3]) + ``` + + Arguments are broadcast when possible. + + ```python + # Define a batch of two scalar valued Cauchy distributions. + # Both have median 1, but different scales. + dist = tf.contrib.distributions.Cauchy(loc=1., scale=[11, 22.]) + # Evaluate the pdf of both distributions on the same point, 3.0, + # returning a length 2 tensor. + dist.prob(3.0) + ``` + """ + + def __init__(self, + loc, + scale, + validate_args=False, + allow_nan_stats=True, + name="Cauchy"): + """Construct Cauchy distributions with loc and and scale `loc` and `scale`. + + The parameters `loc` and `scale` must be shaped in a way that supports + broadcasting (e.g. `loc + scale` is a valid operation). + + Args: + loc: Floating point tensor; the modes of the distribution(s). + scale: Floating point tensor; the locations of the distribution(s). + Must contain only positive values. + validate_args: Python `bool`, default `False`. When `True` distribution + parameters are checked for validity despite possibly degrading runtime + performance. When `False` invalid inputs may silently render incorrect + outputs. + allow_nan_stats: Python `bool`, default `True`. When `True`, + statistics (e.g., mean, mode, variance) use the value "`NaN`" to + indicate the result is undefined. When `False`, an exception is raised + if one or more of the statistic's batch members are undefined. + name: Python `str` name prefixed to Ops created by this class. + + Raises: + TypeError: if `loc` and `scale` have different `dtype`. + """ + parameters = locals() + with ops.name_scope(name, values=[loc, scale]): + with ops.control_dependencies([check_ops.assert_positive(scale)] if + validate_args else []): + self._loc = array_ops.identity(loc, name="loc") + self._scale = array_ops.identity(scale, name="scale") + check_ops.assert_same_float_dtype([self._loc, self._scale]) + super(Cauchy, self).__init__( + dtype=self._scale.dtype, + reparameterization_type=distribution.FULLY_REPARAMETERIZED, + validate_args=validate_args, + allow_nan_stats=allow_nan_stats, + parameters=parameters, + graph_parents=[self._loc, self._scale], + name=name) + + @staticmethod + def _param_shapes(sample_shape): + return dict( + zip(("loc", "scale"), ([ops.convert_to_tensor( + sample_shape, dtype=dtypes.int32)] * 2))) + + @property + def loc(self): + """Distribution parameter for the mean.""" + return self._loc + + @property + def scale(self): + """Distribution parameter for standard deviation.""" + return self._scale + + def _batch_shape_tensor(self): + return array_ops.broadcast_dynamic_shape( + array_ops.shape(self.loc), + array_ops.shape(self.scale)) + + def _batch_shape(self): + return array_ops.broadcast_static_shape( + self.loc.shape, + self.scale.shape) + + def _event_shape_tensor(self): + return constant_op.constant([], dtype=dtypes.int32) + + def _event_shape(self): + return tensor_shape.scalar() + + def _sample_n(self, n, seed=None): + shape = array_ops.concat([[n], self.batch_shape_tensor()], 0) + probs = random_ops.random_uniform( + shape=shape, minval=0., maxval=1., dtype=self.dtype, seed=seed) + return self._quantile(probs) + + def _log_prob(self, x): + return self._log_unnormalized_prob(x) - self._log_normalization() + + def _cdf(self, x): + return math_ops.atan(self._z(x)) / np.pi + 0.5 + + def _log_cdf(self, x): + return math_ops.log1p(2 / np.pi * math_ops.atan(self._z(x))) - np.log(2) + + def _log_unnormalized_prob(self, x): + return -math_ops.log1p(math_ops.square(self._z(x))) + + def _log_normalization(self): + return np.log(np.pi) + math_ops.log(self.scale) + + def _entropy(self): + h = np.log(4 * np.pi) + math_ops.log(self.scale) + return h * array_ops.ones_like(self.loc) + + def _quantile(self, p): + return self.loc + self.scale * math_ops.tan(np.pi * (p - 0.5)) + + def _mode(self): + return self.loc * array_ops.ones_like(self.scale) + + def _z(self, x): + """Standardize input `x`.""" + with ops.name_scope("standardize", values=[x]): + return (x - self.loc) / self.scale + + def _inv_z(self, z): + """Reconstruct input `x` from a its normalized version.""" + with ops.name_scope("reconstruct", values=[z]): + return z * self.scale + self.loc + + def _mean(self): + if self.allow_nan_stats: + return array_ops.fill(self.batch_shape_tensor(), + self.dtype.as_numpy_dtype(np.nan)) + else: + raise ValueError("`mean` is undefined for Cauchy distribution.") + + def _stddev(self): + if self.allow_nan_stats: + return array_ops.fill(self.batch_shape_tensor(), + self.dtype.as_numpy_dtype(np.nan)) + else: + raise ValueError("`stddev` is undefined for Cauchy distribution.") diff --git a/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb index 01616f2e7d..459f2f4a7d 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb @@ -429,7 +429,9 @@ "cpu_tensor = tf.random_normal([SIZE, SIZE])\n", "\n", "if is_gpu_available:\n", - " gpu_tensor = cpu_tensor.gpu()" + " gpu_tensor = cpu_tensor.gpu()\n", + "else:\n", + " print(\"GPU not available.\")" ] }, { diff --git a/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb index 3b7e2cd435..e6c7c11733 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb @@ -383,7 +383,7 @@ "\n", "`implicit_value_and_gradients()` returns a function that accepts the same inputs as the function passed in, and returns a tuple consisting of:\n", "\n", - "1. the value returned by the function passed in (in this case, the loss calculated by `calculate_linear_model_loss()`), and\n", + "1. the value returned by the function passed in (in this case, the loss calculated by `loss_fn()`), and\n", "1. a list of tuples consisting of:\n", " 1. The value of the gradient (a `tf.Tensor`) with respect to a given variable\n", " 1. The corresponding variable (`tf.Variable`)\n", @@ -698,7 +698,7 @@ "source": [ "## Other Ways to Compute Gradients\n", "\n", - "Using our loss function as an example (`calculate_linear_model_loss()`), there are several other ways we could compute gradients:\n", + "Using our loss function as an example (`loss_fn()`), there are several other ways we could compute gradients:\n", "\n", "1. `tfe.implicit_gradients()`\n", "1. `tfe.gradients_function()`\n", @@ -841,7 +841,7 @@ "# tfe.implicit_value_and_gradients() demo\n", "value_gradients_fn = tfe.implicit_value_and_gradients(loss_fn)\n", "\n", - "# Returns only gradients:\n", + "# Returns the value returned by the function passed in, gradients, and variables:\n", "value_gradients_fn(inputs, labels, wb)" ] } diff --git a/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb index ebcc7027c1..0088da5c4b 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb @@ -9,7 +9,7 @@ "source": [ "# Eager Execution Tutorial: Importing Data\n", "\n", - "This notebook demonstrates the use of the [`tf.contrib.data.Dataset` API](https://www.tensorflow.org/programmers_guide/datasets) to build pipelines to feed data to your program. It covers:\n", + "This notebook demonstrates the use of the [`tf.data.Dataset` API](https://www.tensorflow.org/programmers_guide/datasets) to build pipelines to feed data to your program. It covers:\n", "\n", "* Creating a `Dataset`.\n", "* Iteration over a `Dataset` with eager execution enabled.\n", @@ -64,7 +64,7 @@ "source": [ "# Step 1: Create a source `Dataset`\n", "\n", - "Create a _source_ dataset using one of the factory functions like [`Dataset.from_tensors`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#from_tensor_slices) or using objects that read from files like [`TextLineDataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/TextLineDataset) or [`TFRecordDataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/TFRecordDataset). See the [Programmer's Guide](https://www.google.com/url?sa=D\u0026q=https%3A%2F%2Fwww.tensorflow.org%2Fprogrammers_guide%2Fdatasets%23reading_input_data) for more information." + "Create a _source_ dataset using one of the factory functions like [`Dataset.from_tensors`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices) or using objects that read from files like [`TextLineDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TextLineDataset) or [`TFRecordDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TFRecordDataset). See the [Programmer's Guide](https://www.google.com/url?sa=D\u0026q=https%3A%2F%2Fwww.tensorflow.org%2Fprogrammers_guide%2Fdatasets%23reading_input_data) for more information." ] }, { @@ -83,7 +83,7 @@ }, "outputs": [], "source": [ - "ds_tensors = tf.contrib.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6])\n", + "ds_tensors = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6])\n", "\n", "# Create a CSV file\n", "import tempfile\n", @@ -93,7 +93,7 @@ "Line 2\n", "Line 3\n", " \"\"\")\n", - "ds_file = tf.contrib.data.TextLineDataset(filename)\n" + "ds_file = tf.data.TextLineDataset(filename)\n" ] }, { @@ -105,7 +105,7 @@ "source": [ "# Step 2: Apply transformations\n", "\n", - "Use the transformations functions like [`map`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#map), [`batch`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#batch), [`shuffle`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#shuffle) etc. to apply transformations to the records of the dataset. See the [API documentation for `tf.contrib.data.Dataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset) for details." + "Use the transformations functions like [`map`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map), [`batch`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch), [`shuffle`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#shuffle) etc. to apply transformations to the records of the dataset. See the [API documentation for `tf.data.Dataset`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) for details." ] }, { diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index 46b3eeae91..9378fe8799 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -286,7 +286,6 @@ def _fused_batch_norm(inputs, ValueError: If the rank of `inputs` is neither 2 or 4. ValueError: If rank or `C` dimension of `inputs` is undefined. """ - # TODO(reedwm): Add support for fp16 inputs. if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): raise ValueError('data_format has to be either NCHW or NHWC.') with variable_scope.variable_scope( @@ -320,9 +319,10 @@ def _fused_batch_norm(inputs, (inputs.name, params_shape)) # Allocate parameters for the beta and gamma of the normalization. - trainable_beta = trainable and center beta_collections = utils.get_variable_collections(variables_collections, 'beta') + # Float32 required to avoid precision-loss when using fp16 input/output + variable_dtype = dtypes.float32 if not param_initializers: param_initializers = {} if not param_regularizers: @@ -336,13 +336,13 @@ def _fused_batch_norm(inputs, beta = variables.model_variable( 'beta', shape=params_shape, - dtype=dtype, + dtype=variable_dtype, initializer=beta_initializer, regularizer=beta_regularizer, collections=beta_collections, - trainable=trainable_beta) + trainable=trainable) else: - beta = array_ops.constant(0.0, shape=params_shape) + beta = array_ops.constant(0.0, dtype=variable_dtype, shape=params_shape) if scale: gamma_collections = utils.get_variable_collections( @@ -352,13 +352,13 @@ def _fused_batch_norm(inputs, gamma = variables.model_variable( 'gamma', shape=params_shape, - dtype=dtype, + dtype=variable_dtype, initializer=gamma_initializer, regularizer=gamma_regularizer, collections=gamma_collections, trainable=trainable) else: - gamma = array_ops.constant(1.0, shape=params_shape) + gamma = array_ops.constant(1.0, dtype=variable_dtype, shape=params_shape) # Create moving_mean and moving_variance variables and add them to the # appropriate collections. We disable variable partitioning while creating @@ -375,7 +375,7 @@ def _fused_batch_norm(inputs, moving_mean = variables.model_variable( 'moving_mean', shape=params_shape, - dtype=dtype, + dtype=variable_dtype, initializer=moving_mean_initializer, trainable=False, collections=moving_mean_collections) @@ -386,7 +386,7 @@ def _fused_batch_norm(inputs, moving_variance = variables.model_variable( 'moving_variance', shape=params_shape, - dtype=dtype, + dtype=variable_dtype, initializer=moving_variance_initializer, trainable=False, collections=moving_variance_collections) diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index ff7f0e4462..5aa2253516 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -1774,10 +1774,12 @@ class BatchNormTest(test.TestCase): with self.assertRaisesRegexp(ValueError, 'undefined'): _layers.batch_norm(inputs, data_format='NCHW') - def _testCreateOp(self, fused): + def _testCreateOp(self, fused, dtype=None): + if dtype is None: + dtype = dtypes.float32 height, width = 3, 3 with self.test_session(): - images = np.random.uniform(size=(5, height, width, 3)).astype('f') + images = np.random.uniform(size=(5, height, width, 3)).astype(dtype.as_numpy_dtype) output = _layers.batch_norm(images, fused=fused) expected_name = ('BatchNorm/FusedBatchNorm' if fused else 'BatchNorm/batchnorm') @@ -1792,6 +1794,9 @@ class BatchNormTest(test.TestCase): def testCreateOpFused(self): self._testCreateOp(True) + def testCreateOpFusedFloat16(self): + self._testCreateOp(True, dtypes.float16) + def _testCreateOpBetaRegularizer(self, fused=True): height, width = 3, 3 with self.test_session(): @@ -2659,10 +2664,68 @@ class BatchNormTest(test.TestCase): def testBatchNormBeta(self): # Test case for 11673 with self.test_session() as sess: - a = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10)) - b = _layers.batch_norm(a, center=False, data_format='NCHW', - zero_debias_moving_mean=True) + a_32 = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10)) + b_32 = _layers.batch_norm(a_32, center=False, data_format='NCHW', + zero_debias_moving_mean=True) + a_16 = array_ops.placeholder(dtypes.float16, shape=(10, 10, 10, 10)) + b_16 = _layers.batch_norm(a_16, center=False, data_format='NCHW', + zero_debias_moving_mean=True) + sess.run(variables_lib.global_variables_initializer()) + + def testVariablesAreFloat32(self): + height, width = 3, 3 + with self.test_session(): + images = random_ops.random_uniform((5, height, width, 3), + seed=1, dtype=dtypes.float16) + _layers.batch_norm(images, scale=True) + beta = variables.get_variables_by_name('beta')[0] + gamma = variables.get_variables_by_name('gamma')[0] + self.assertEqual(beta.dtype, dtypes.float32_ref) + self.assertEqual(gamma.dtype, dtypes.float32_ref) + moving_mean = variables.get_variables_by_name('moving_mean')[0] + moving_variance = variables.get_variables_by_name('moving_variance')[0] + self.assertEqual(moving_mean.dtype, dtypes.float32_ref) + self.assertEqual(moving_variance.dtype, dtypes.float32_ref) + + def _runFusedBatchNorm(self, shape, dtype): + channels = shape[1] + images = np.arange(np.product(shape), dtype=dtype).reshape(shape) + beta = init_ops.constant_initializer( + np.arange( + 2, channels + 2, dtype=np.float32)) + gamma = init_ops.constant_initializer( + np.arange( + 10, channels + 10, dtype=np.float32) * 2.0) + mean = init_ops.constant_initializer( + np.arange( + 3, channels + 3, dtype=np.float32) * 5.0) + variance = init_ops.constant_initializer( + np.arange( + 1, channels + 1, dtype=np.float32) * 4.0) + output = _layers.batch_norm( + images, + fused=True, + is_training=True, + scale=True, + epsilon=0.5, + param_initializers={ + 'beta': beta, + 'gamma': gamma, + 'moving_mean': mean, + 'moving_variance': variance, + }, + data_format='NCHW') + with self.test_session(use_gpu=True) as sess: sess.run(variables_lib.global_variables_initializer()) + return sess.run(output) + + def testFusedBatchNormFloat16MatchesFloat32(self): + if test.is_gpu_available(cuda_only=True): + shape = [5, 4, 2, 3] + res_32 = self._runFusedBatchNorm(shape, np.float32) + res_16 = self._runFusedBatchNorm(shape, np.float16) + self.assertAllClose(res_32, res_16, rtol=1e-3) + def testAdjustmentCreated(self): # Tests that the adjustment is appropriately passed to and used by the core diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py index 468d792a0d..bc0e6fc009 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -119,7 +119,7 @@ class Head(object): update_op = tf.contrib.layers.optimize_loss(optimizer=sync, loss=model_fn_ops.loss, ...) hooks = [sync.make_session_run_hook(is_chief)] - ... upate train_op and hooks in ModelFnOps and return + ... update train_op and hooks in ModelFnOps and return ``` """ __metaclass__ = abc.ABCMeta diff --git a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py index 8be9c72adf..44e6c7c52d 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py @@ -23,7 +23,6 @@ import collections import six -from tensorflow.contrib import framework as contrib_framework from tensorflow.contrib.framework import get_graph_from_inputs from tensorflow.contrib.learn.python.learn.estimators import constants from tensorflow.contrib.learn.python.learn.estimators import metric_key @@ -32,6 +31,7 @@ from tensorflow.python.estimator import model_fn as core_model_fn_lib from tensorflow.python.estimator.export import export_output as core_export_lib from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging @@ -156,11 +156,11 @@ class ModelFnOps( else: if isinstance(predictions, dict): predictions = { - k: contrib_framework.convert_to_tensor_or_sparse_tensor(v) + k: sparse_tensor.convert_to_tensor_or_sparse_tensor(v) for k, v in six.iteritems(predictions) } else: - predictions = contrib_framework.convert_to_tensor_or_sparse_tensor( + predictions = sparse_tensor.convert_to_tensor_or_sparse_tensor( predictions) # Validate eval_metric_ops diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py index 4c50d40aaa..db18ebf05d 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py @@ -28,13 +28,14 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging # pylint: disable=g-multiple-import,g-bad-import-order from .pandas_io import HAS_PANDAS, extract_pandas_data, extract_pandas_matrix, extract_pandas_labels from .dask_io import HAS_DASK, extract_dask_data, extract_dask_labels - # pylint: enable=g-multiple-import,g-bad-import-order @@ -365,8 +366,13 @@ class DataFeeder(object): self.random_state = np.random.RandomState( 42) if random_state is None else random_state - num_samples = list(self._x.values())[0].shape[ - 0] if x_is_dict else self._x.shape[0] + if x_is_dict: + num_samples = list(self._x.values())[0].shape[0] + elif tensor_util.is_tensor(self._x): + num_samples = self._x.shape[0].value # shape will be a Dimension, extract an int + else: + num_samples = self._x.shape[0] + if self._shuffle: self.indices = self.random_state.permutation(num_samples) else: diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py index 13f2f0f502..86d8484391 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py @@ -238,10 +238,10 @@ class SdcaModel(object): with name_scope('sdca/prediction'): sparse_variables = self._convert_n_to_tensor(self._variables[ 'sparse_features_weights']) - result = 0.0 + result_sparse = 0.0 for sfc, sv in zip(examples['sparse_features'], sparse_variables): # TODO(sibyl-Aix6ihai): following does not take care of missing features. - result += math_ops.segment_sum( + result_sparse += math_ops.segment_sum( math_ops.multiply( array_ops.gather(sv, sfc.feature_indices), sfc.feature_values), sfc.example_indices) @@ -249,12 +249,13 @@ class SdcaModel(object): dense_variables = self._convert_n_to_tensor(self._variables[ 'dense_features_weights']) + result_dense = 0.0 for i in range(len(dense_variables)): - result += math_ops.matmul(dense_features[i], - array_ops.expand_dims(dense_variables[i], -1)) + result_dense += math_ops.matmul( + dense_features[i], array_ops.expand_dims(dense_variables[i], -1)) # Reshaping to allow shape inference at graph construction time. - return array_ops.reshape(result, [-1]) + return array_ops.reshape(result_dense, [-1]) + result_sparse def predictions(self, examples): """Add operations to compute predictions by the model. diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD index b4aa032ff8..89e8693490 100644 --- a/tensorflow/contrib/lite/python/BUILD +++ b/tensorflow/contrib/lite/python/BUILD @@ -23,6 +23,7 @@ py_library( py_test( name = "lite_test", srcs = ["lite_test.py"], + srcs_version = "PY2AND3", deps = [ ":lite", "//tensorflow/python:array_ops", diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 86540d58a6..b122818221 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -36,6 +36,10 @@ import traceback import zipfile import numpy as np from six import StringIO + +# TODO(aselle): Disable GPU for now +os.environ["CUDA_VISIBLE_DEVICES"] = "-1" + import tensorflow as tf from google.protobuf import text_format # TODO(aselle): switch to TensorFlow's resource_loader @@ -379,12 +383,13 @@ def make_zip_of_tests(zip_path, report["toco_log"] = "" tf.reset_default_graph() - try: - inputs, outputs = make_graph(param_dict_real) - except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError, - ValueError): - report["tf_log"] += traceback.format_exc() - return None, report + with tf.device('/cpu:0'): + try: + inputs, outputs = make_graph(param_dict_real) + except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError, + ValueError): + report["tf_log"] += traceback.format_exc() + return None, report sess = tf.Session() try: diff --git a/tensorflow/contrib/lite/toco/python/BUILD b/tensorflow/contrib/lite/toco/python/BUILD index 92246a8aed..17115047d2 100644 --- a/tensorflow/contrib/lite/toco/python/BUILD +++ b/tensorflow/contrib/lite/toco/python/BUILD @@ -61,6 +61,7 @@ tf_py_test( data = [ ":toco_from_protos", ], + tags = ["no_pip"], ) filegroup( diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile index dba1464653..e2e6c05591 100644 --- a/tensorflow/contrib/makefile/Makefile +++ b/tensorflow/contrib/makefile/Makefile @@ -314,7 +314,8 @@ ifeq ($(TARGET),ANDROID) -Wno-narrowing \ -fomit-frame-pointer \ $(MARCH_OPTION) \ --fPIE +-fPIE \ +-fPIC INCLUDES = \ -I$(NDK_ROOT)/sources/android/support/include \ -I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/include \ diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md index 715eb51577..65bd60c12a 100644 --- a/tensorflow/contrib/makefile/README.md +++ b/tensorflow/contrib/makefile/README.md @@ -174,10 +174,26 @@ tensorflow/contrib/makefile/build_all_ios.sh This process will take around twenty minutes on a modern MacBook Pro. -When it completes, you will have a library for a single architecture and the -benchmark program. Although successfully compiling the benchmark program is a +When it completes, you will have a unified library for all architectures +(i386sim, x86_64sim, armv7, armv7s and arm64) and the benchmark program. +Although successfully compiling the benchmark program is a sign of success, the program is not a complete iOS app. +If you would only like to build only one architecture to save time: +(iOS 11+ only supports 64bit so you can get away with arm64) + +```bash +tensorflow/contrib/makefile/build_all_ios.sh -a arm64 +``` + +After the first build if you would like to just build the tensorflow +library you can pass the -T flag to avoid a clean & rebuild. This should +take you just a few seconds to generate the library if you modified one file. + +```bash +tensorflow/contrib/makefile/build_all_ios.sh -a arm64 -T +``` + To see TensorFlow running on iOS, the example Xcode project in [tensorflow/examples/ios](../../examples/ios/) shows how to use the static library in a simple app. @@ -193,19 +209,18 @@ If you have not already, you will need to download dependencies: tensorflow/contrib/makefile/download_dependencies.sh ``` -Next, you will need to compile protobufs for iOS: +Next, you will need to compile protobufs for iOS (optionally takes the -a $ARCH flag): ```bash -tensorflow/contrib/makefile/compile_ios_protobuf.sh +tensorflow/contrib/makefile/compile_ios_protobuf.sh ``` -Then, you will need to compile the nsync library for iOS: +Then, you will need to compile the nsync library for iOS (optionally takes -a $ARCH flag): ```bash export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh` export TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios` ``` - Then, you can run the makefile specifying iOS as the target, along with the architecture you want to build for: @@ -219,10 +234,6 @@ This creates a library in `tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a` that you can link any xcode project against. -At this point, you will have a library for a single architecture and the -benchmark program. Although successfully compiling the benchmark program is a -sign of success, the program is not a complete iOS app. - To see TensorFlow running on iOS, the example Xcode project in [tensorflow/examples/ios](../../examples/ios/) shows how to use the static library in a simple app. @@ -237,6 +248,14 @@ time follow it with: compile_ios_tensorflow.sh ``` +`compile_ios_tensorflow.sh` takes the -a flag to build only for one architecture. +In case you run into issues with unresolved symbols with nsync you can also pass +-h ${HOST_NSYNC_LIB} and -n {TARGET_NSYNC_LIB} so it would look like: + +```bash +tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -h tensorflow/contrib/makefile/downloads/nsync/builds/default.macos.c++11/nsync.a -n tensorflow/contrib/makefile/downloads/nsync/builds/lipo.ios.c++11/nsync.a -a arm64 +``` + In XCode, you will need to use -force_load in the linker flags section of the build settings to pull in the global constructors that are used to register ops and kernels. @@ -249,7 +268,7 @@ debug mode. If you are concerned about performance or are working on a release build, you would likely want a higher optimization setting, like so: ```bash -compile_ios_tensorflow.sh "-Os" +compile_ios_tensorflow.sh -f "-Os" ``` For other variations of valid optimization flags, see [clang optimization levels](http://stackoverflow.com/questions/15548023/clang-optimization-levels). diff --git a/tensorflow/contrib/makefile/build_all_ios.sh b/tensorflow/contrib/makefile/build_all_ios.sh index a49bbe4565..988e12b482 100755 --- a/tensorflow/contrib/makefile/build_all_ios.sh +++ b/tensorflow/contrib/makefile/build_all_ios.sh @@ -23,14 +23,29 @@ if [[ $(uname) != "Darwin" ]]; then exit 1 fi +usage() { + echo "Usage: $(basename "$0") [-a:T]" + echo "-a [build_arch] build only for specified arch x86_64 [default=all]" + echo "-T only build tensorflow (dont download other deps etc)" + exit 1 +} + +while getopts "a:T" opt_name; do + case "$opt_name" in + a) BUILD_ARCH="${OPTARG}";; + T) ONLY_MAKE_TENSORFLOW="true";; + *) usage;; + esac +done +shift $((OPTIND - 1)) + + # Make sure we're in the correct directory, at the root of the source tree. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd ${SCRIPT_DIR}/../../../ - -# Remove any old files first. -make -f tensorflow/contrib/makefile/Makefile clean -rm -rf tensorflow/contrib/makefile/downloads +source "${SCRIPT_DIR}/build_helper.subr" +JOB_COUNT="${JOB_COUNT:-$(get_job_count)}" # Setting a deployment target is required for building with bitcode, # otherwise linking will fail with: @@ -41,20 +56,37 @@ if [[ -n MACOSX_DEPLOYMENT_TARGET ]]; then export MACOSX_DEPLOYMENT_TARGET=$(sw_vers -productVersion) fi -# Pull down the required versions of the frameworks we need. -tensorflow/contrib/makefile/download_dependencies.sh +if [[ "${ONLY_MAKE_TENSORFLOW}" != "true" ]]; then + # Remove any old files first. + make -f tensorflow/contrib/makefile/Makefile clean + rm -rf tensorflow/contrib/makefile/downloads -# Compile protobuf for the target iOS device architectures. -tensorflow/contrib/makefile/compile_ios_protobuf.sh + # Pull down the required versions of the frameworks we need. + tensorflow/contrib/makefile/download_dependencies.sh + + # Compile protobuf for the target iOS device architectures. + tensorflow/contrib/makefile/compile_ios_protobuf.sh +fi # Compile nsync for the target iOS device architectures. # Don't use export var=`something` syntax; it swallows the exit status. HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh` -TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios` +if [[ -z "${BUILD_ARCH}" ]]; then + # No arch specified so build all architectures + TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios` +else + # arch specified so build just that + TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios -a ${BUILD_ARCH}` +fi export HOST_NSYNC_LIB TARGET_NSYNC_LIB -# Build the iOS TensorFlow libraries. -tensorflow/contrib/makefile/compile_ios_tensorflow.sh "-O3" +if [[ -z "${BUILD_ARCH}" ]]; then + # build the ios tensorflow libraries. + tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -h $HOST_NSYNC_LIB -n $TARGET_NSYNC_LIB +else + # arch specified so build just that + tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -a "${BUILD_ARCH}" -h $HOST_NSYNC_LIB -n $TARGET_NSYNC_LIB +fi # Creates a static universal library in # tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a diff --git a/tensorflow/contrib/makefile/compile_ios_protobuf.sh b/tensorflow/contrib/makefile/compile_ios_protobuf.sh index 4056db18a7..43e5809dd2 100755 --- a/tensorflow/contrib/makefile/compile_ios_protobuf.sh +++ b/tensorflow/contrib/makefile/compile_ios_protobuf.sh @@ -21,10 +21,28 @@ if [[ -n MACOSX_DEPLOYMENT_TARGET ]]; then export MACOSX_DEPLOYMENT_TARGET=$(sw_vers -productVersion) fi -SCRIPT_DIR=$(dirname $0) +usage() { + echo "Usage: $(basename "$0") [-a]" + echo "-a [build_arch] build for specified arch comma separate for multiple archs (eg: x86_64,arm64)" + echo "default arch i386, x86_64, armv7, armv7s, arm64" + exit 1 +} + +BUILD_TARGET="i386 x86_64 armv7 armv7s arm64" +while getopts "a:" opt_name; do + case "$opt_name" in + a) BUILD_TARGET="${OPTARG}";; + *) usage;; + esac +done +shift $((OPTIND - 1)) + +IFS=' ' read -r -a build_targets <<< "${BUILD_TARGET}" + +SCRIPT_DIR=$(cd `dirname $0` && pwd) source "${SCRIPT_DIR}/build_helper.subr" -cd tensorflow/contrib/makefile +cd ${SCRIPT_DIR} HOST_GENDIR="$(pwd)/gen/protobuf-host" mkdir -p "${HOST_GENDIR}" @@ -64,6 +82,10 @@ else echo "protoc found. Skip building host tools." fi +# Remove old libs +rm -f ${LIBDIR}/libprotobuf.a +rm -f ${LIBDIR}/libprotobuf-lite.a + ./autogen.sh if [ $? -ne 0 ] then @@ -71,157 +93,192 @@ then exit 1 fi -make distclean -./configure \ ---host=i386-apple-${OSX_VERSION} \ ---disable-shared \ ---enable-cross-compile \ ---with-protoc="${PROTOC_PATH}" \ ---prefix=${LIBDIR}/iossim_386 \ ---exec-prefix=${LIBDIR}/iossim_386 \ -"CFLAGS=${CFLAGS} \ --mios-simulator-version-min=${MIN_SDK_VERSION} \ --arch i386 \ --fembed-bitcode \ --isysroot ${IPHONESIMULATOR_SYSROOT}" \ -"CXX=${CXX}" \ -"CXXFLAGS=${CXXFLAGS} \ --mios-simulator-version-min=${MIN_SDK_VERSION} \ --arch i386 \ --fembed-bitcode \ --isysroot \ -${IPHONESIMULATOR_SYSROOT}" \ -LDFLAGS="-arch i386 \ --fembed-bitcode \ --mios-simulator-version-min=${MIN_SDK_VERSION} \ -${LDFLAGS} \ --L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \ --L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \ -"LIBS=${LIBS}" -make -j"${JOB_COUNT}" -make install - -make distclean -./configure \ ---host=x86_64-apple-${OSX_VERSION} \ ---disable-shared \ ---enable-cross-compile \ ---with-protoc="${PROTOC_PATH}" \ ---prefix=${LIBDIR}/iossim_x86_64 \ ---exec-prefix=${LIBDIR}/iossim_x86_64 \ -"CFLAGS=${CFLAGS} \ --mios-simulator-version-min=${MIN_SDK_VERSION} \ --arch x86_64 \ --fembed-bitcode \ --isysroot ${IPHONESIMULATOR_SYSROOT}" \ -"CXX=${CXX}" \ -"CXXFLAGS=${CXXFLAGS} \ --mios-simulator-version-min=${MIN_SDK_VERSION} \ --arch x86_64 \ --fembed-bitcode \ --isysroot \ -${IPHONESIMULATOR_SYSROOT}" \ -LDFLAGS="-arch x86_64 \ --fembed-bitcode \ --mios-simulator-version-min=${MIN_SDK_VERSION} \ -${LDFLAGS} \ --L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \ --L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \ -"LIBS=${LIBS}" -make -j"${JOB_COUNT}" -make install - -make distclean -./configure \ ---host=armv7-apple-${OSX_VERSION} \ ---with-protoc="${PROTOC_PATH}" \ ---disable-shared \ ---prefix=${LIBDIR}/ios_arm7 \ ---exec-prefix=${LIBDIR}/ios_arm7 \ -"CFLAGS=${CFLAGS} \ --miphoneos-version-min=${MIN_SDK_VERSION} \ --arch armv7 \ --fembed-bitcode \ --isysroot ${IPHONEOS_SYSROOT}" \ -"CXX=${CXX}" \ -"CXXFLAGS=${CXXFLAGS} \ --miphoneos-version-min=${MIN_SDK_VERSION} \ --arch armv7 \ --fembed-bitcode \ --isysroot ${IPHONEOS_SYSROOT}" \ -LDFLAGS="-arch armv7 \ --fembed-bitcode \ --miphoneos-version-min=${MIN_SDK_VERSION} \ -${LDFLAGS}" \ -"LIBS=${LIBS}" -make -j"${JOB_COUNT}" -make install - -make distclean -./configure \ ---host=armv7s-apple-${OSX_VERSION} \ ---with-protoc="${PROTOC_PATH}" \ ---disable-shared \ ---prefix=${LIBDIR}/ios_arm7s \ ---exec-prefix=${LIBDIR}/ios_arm7s \ -"CFLAGS=${CFLAGS} \ --miphoneos-version-min=${MIN_SDK_VERSION} \ --arch armv7s \ --fembed-bitcode \ --isysroot ${IPHONEOS_SYSROOT}" \ -"CXX=${CXX}" \ -"CXXFLAGS=${CXXFLAGS} \ --miphoneos-version-min=${MIN_SDK_VERSION} \ --arch armv7s \ --fembed-bitcode \ --isysroot ${IPHONEOS_SYSROOT}" \ -LDFLAGS="-arch armv7s \ --fembed-bitcode \ --miphoneos-version-min=${MIN_SDK_VERSION} \ -${LDFLAGS}" \ -"LIBS=${LIBS}" -make -j"${JOB_COUNT}" -make install - -make distclean -./configure \ ---host=arm \ ---with-protoc="${PROTOC_PATH}" \ ---disable-shared \ ---prefix=${LIBDIR}/ios_arm64 \ ---exec-prefix=${LIBDIR}/ios_arm64 \ -"CFLAGS=${CFLAGS} \ --miphoneos-version-min=${MIN_SDK_VERSION} \ --arch arm64 \ --fembed-bitcode \ --isysroot ${IPHONEOS_SYSROOT}" \ -"CXXFLAGS=${CXXFLAGS} \ --miphoneos-version-min=${MIN_SDK_VERSION} \ --arch arm64 \ --fembed-bitcode \ --isysroot ${IPHONEOS_SYSROOT}" \ -LDFLAGS="-arch arm64 \ --fembed-bitcode \ --miphoneos-version-min=${MIN_SDK_VERSION} \ -${LDFLAGS}" \ -"LIBS=${LIBS}" -make -j"${JOB_COUNT}" -make install - -lipo \ -${LIBDIR}/iossim_386/lib/libprotobuf.a \ -${LIBDIR}/iossim_x86_64/lib/libprotobuf.a \ -${LIBDIR}/ios_arm7/lib/libprotobuf.a \ -${LIBDIR}/ios_arm7s/lib/libprotobuf.a \ -${LIBDIR}/ios_arm64/lib/libprotobuf.a \ --create \ --output ${LIBDIR}/libprotobuf.a - -lipo \ -${LIBDIR}/iossim_386/lib/libprotobuf-lite.a \ -${LIBDIR}/iossim_x86_64/lib/libprotobuf-lite.a \ -${LIBDIR}/ios_arm7/lib/libprotobuf-lite.a \ -${LIBDIR}/ios_arm7s/lib/libprotobuf-lite.a \ -${LIBDIR}/ios_arm64/lib/libprotobuf-lite.a \ --create \ --output ${LIBDIR}/libprotobuf-lite.a +package_pb_library() { + pb_libs="${LIBDIR}/${1}/lib/libprotobuf.a" + if [ -f "${LIBDIR}/libprotobuf.a" ]; then + pb_libs="$pb_libs ${LIBDIR}/libprotobuf.a" + fi + lipo \ + $pb_libs \ + -create \ + -output ${LIBDIR}/libprotobuf.a + + pblite_libs="${LIBDIR}/${1}/lib/libprotobuf-lite.a" + if [ -f "${LIBDIR}/libprotobuf-lite.a" ]; then + pblite_libs="$pblite_libs ${LIBDIR}/libprotobuf-lite.a" + fi + lipo \ + $pblite_libs \ + -create \ + -output ${LIBDIR}/libprotobuf-lite.a +} + +build_target() { +case "$1" in + i386) make distclean + ./configure \ + --host=i386-apple-${OSX_VERSION} \ + --disable-shared \ + --enable-cross-compile \ + --with-protoc="${PROTOC_PATH}" \ + --prefix=${LIBDIR}/iossim_386 \ + --exec-prefix=${LIBDIR}/iossim_386 \ + "CFLAGS=${CFLAGS} \ + -mios-simulator-version-min=${MIN_SDK_VERSION} \ + -arch i386 \ + -fembed-bitcode \ + -isysroot ${IPHONESIMULATOR_SYSROOT}" \ + "CXX=${CXX}" \ + "CXXFLAGS=${CXXFLAGS} \ + -mios-simulator-version-min=${MIN_SDK_VERSION} \ + -arch i386 \ + -fembed-bitcode \ + -isysroot \ + ${IPHONESIMULATOR_SYSROOT}" \ + LDFLAGS="-arch i386 \ + -fembed-bitcode \ + -mios-simulator-version-min=${MIN_SDK_VERSION} \ + ${LDFLAGS} \ + -L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \ + -L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \ + "LIBS=${LIBS}" + make -j"${JOB_COUNT}" + make install + + package_pb_library "iossim_386" + ;; + + x86_64) make distclean + ./configure \ + --host=x86_64-apple-${OSX_VERSION} \ + --disable-shared \ + --enable-cross-compile \ + --with-protoc="${PROTOC_PATH}" \ + --prefix=${LIBDIR}/iossim_x86_64 \ + --exec-prefix=${LIBDIR}/iossim_x86_64 \ + "CFLAGS=${CFLAGS} \ + -mios-simulator-version-min=${MIN_SDK_VERSION} \ + -arch x86_64 \ + -fembed-bitcode \ + -isysroot ${IPHONESIMULATOR_SYSROOT}" \ + "CXX=${CXX}" \ + "CXXFLAGS=${CXXFLAGS} \ + -mios-simulator-version-min=${MIN_SDK_VERSION} \ + -arch x86_64 \ + -fembed-bitcode \ + -isysroot \ + ${IPHONESIMULATOR_SYSROOT}" \ + LDFLAGS="-arch x86_64 \ + -fembed-bitcode \ + -mios-simulator-version-min=${MIN_SDK_VERSION} \ + ${LDFLAGS} \ + -L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \ + -L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \ + "LIBS=${LIBS}" + make -j"${JOB_COUNT}" + make install + + package_pb_library "iossim_x86_64" + ;; + + armv7) make distclean + ./configure \ + --host=armv7-apple-${OSX_VERSION} \ + --with-protoc="${PROTOC_PATH}" \ + --disable-shared \ + --prefix=${LIBDIR}/ios_arm7 \ + --exec-prefix=${LIBDIR}/ios_arm7 \ + "CFLAGS=${CFLAGS} \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + -arch armv7 \ + -fembed-bitcode \ + -isysroot ${IPHONEOS_SYSROOT}" \ + "CXX=${CXX}" \ + "CXXFLAGS=${CXXFLAGS} \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + -arch armv7 \ + -fembed-bitcode \ + -isysroot ${IPHONEOS_SYSROOT}" \ + LDFLAGS="-arch armv7 \ + -fembed-bitcode \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + ${LDFLAGS}" \ + "LIBS=${LIBS}" + make -j"${JOB_COUNT}" + make install + + package_pb_library "ios_arm7" + ;; + + armv7s) make distclean + ./configure \ + --host=armv7s-apple-${OSX_VERSION} \ + --with-protoc="${PROTOC_PATH}" \ + --disable-shared \ + --prefix=${LIBDIR}/ios_arm7s \ + --exec-prefix=${LIBDIR}/ios_arm7s \ + "CFLAGS=${CFLAGS} \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + -arch armv7s \ + -fembed-bitcode \ + -isysroot ${IPHONEOS_SYSROOT}" \ + "CXX=${CXX}" \ + "CXXFLAGS=${CXXFLAGS} \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + -arch armv7s \ + -fembed-bitcode \ + -isysroot ${IPHONEOS_SYSROOT}" \ + LDFLAGS="-arch armv7s \ + -fembed-bitcode \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + ${LDFLAGS}" \ + "LIBS=${LIBS}" + make -j"${JOB_COUNT}" + make install + + package_pb_library "ios_arm7s" + ;; + + arm64) make distclean + ./configure \ + --host=arm \ + --with-protoc="${PROTOC_PATH}" \ + --disable-shared \ + --prefix=${LIBDIR}/ios_arm64 \ + --exec-prefix=${LIBDIR}/ios_arm64 \ + "CFLAGS=${CFLAGS} \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + -arch arm64 \ + -fembed-bitcode \ + -isysroot ${IPHONEOS_SYSROOT}" \ + "CXXFLAGS=${CXXFLAGS} \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + -arch arm64 \ + -fembed-bitcode \ + -isysroot ${IPHONEOS_SYSROOT}" \ + LDFLAGS="-arch arm64 \ + -fembed-bitcode \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + ${LDFLAGS}" \ + "LIBS=${LIBS}" + make -j"${JOB_COUNT}" + make install + + package_pb_library "ios_arm64" + ;; + *) + echo "Unknown ARCH" + exit 1 + ;; +esac +} + +for build_element in "${build_targets[@]}" +do + echo "$build_element" + build_target "$build_element" +done + +file ${LIBDIR}/libprotobuf.a +file ${LIBDIR}/libprotobuf-lite.a +echo "Done building and packaging the libraries" diff --git a/tensorflow/contrib/makefile/compile_ios_tensorflow.sh b/tensorflow/contrib/makefile/compile_ios_tensorflow.sh index 5d1cc8b375..ae82163e11 100755 --- a/tensorflow/contrib/makefile/compile_ios_tensorflow.sh +++ b/tensorflow/contrib/makefile/compile_ios_tensorflow.sh @@ -43,55 +43,124 @@ then exit 1 fi +usage() { + echo "Usage: $(basename "$0") [-a]" + echo "-a [build_arch] build for specified arch comma separate for multiple archs (eg: x86_64,arm64)" + echo "default is [i386, x86_64, armv7, armv7s, arm64]" + exit 1 +} + +BUILD_TARGET="i386 x86_64 armv7 armv7s arm64" +while getopts "a:f:h:n:" opt_name; do + case "$opt_name" in + a) BUILD_TARGET="${OPTARG}";; + f) BUILD_OPT="${OPTARG}";; + h) NSYNC_HOST="${OPTARG}";; + n) NSYNC_TARGET="${OPTARG}";; + *) usage;; + esac +done +shift $((OPTIND - 1)) + +IFS=' ' read -r -a build_targets <<< "${BUILD_TARGET}" + +SCRIPT_DIR=$(cd `dirname $0` && pwd) +source "${SCRIPT_DIR}/build_helper.subr" + + GENDIR=tensorflow/contrib/makefile/gen/ LIBDIR=${GENDIR}lib LIB_PREFIX=libtensorflow-core -make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ -TARGET=IOS IOS_ARCH=ARMV7 LIB_NAME=${LIB_PREFIX}-armv7.a OPTFLAGS="$1" -if [ $? -ne 0 ] -then - echo "armv7 compilation failed." - exit 1 -fi +#remove any old artifacts +rm -rf ${LIBDIR}/${LIB_PREFIX}.a -make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ -TARGET=IOS IOS_ARCH=ARMV7S LIB_NAME=${LIB_PREFIX}-armv7s.a OPTFLAGS="$1" -if [ $? -ne 0 ] -then - echo "arm7vs compilation failed." - exit 1 -fi +package_tf_library() { + CAP_DIR=`echo $1 | tr 'a-z' 'A-Z'` + tf_libs="${LIBDIR}/ios_${CAP_DIR}/${LIB_PREFIX}-${1}.a" + if [ -f "${LIBDIR}/${LIB_PREFIX}.a" ]; then + tf_libs="$tf_libs ${LIBDIR}/${LIB_PREFIX}.a" + fi + lipo \ + $tf_libs \ + -create \ + -output ${LIBDIR}/${LIB_PREFIX}.a +} -make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ -TARGET=IOS IOS_ARCH=ARM64 LIB_NAME=${LIB_PREFIX}-arm64.a OPTFLAGS="$1" -if [ $? -ne 0 ] -then - echo "arm64 compilation failed." - exit 1 -fi +build_tf_target() { +case "$1" in + armv7) + make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ + TARGET=IOS IOS_ARCH=ARMV7 LIB_NAME=${LIB_PREFIX}-armv7.a \ + OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \ + TARGET_NSYNC_LIB="${NSYNC_TARGET}" + if [ $? -ne 0 ] + then + echo "armv7 compilation failed." + exit 1 + fi + package_tf_library "armv7" + ;; + armv7s) + make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ + TARGET=IOS IOS_ARCH=ARMV7S LIB_NAME=${LIB_PREFIX}-armv7s.a \ + OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \ + TARGET_NSYNC_LIB="${NSYNC_TARGET}" -make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ -TARGET=IOS IOS_ARCH=I386 LIB_NAME=${LIB_PREFIX}-i386.a OPTFLAGS="$1" -if [ $? -ne 0 ] -then - echo "i386 compilation failed." - exit 1 -fi + if [ $? -ne 0 ] + then + echo "arm7vs compilation failed." + exit 1 + fi + package_tf_library "armv7s" + ;; + arm64) + make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ + TARGET=IOS IOS_ARCH=ARM64 LIB_NAME=${LIB_PREFIX}-arm64.a \ + OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \ + TARGET_NSYNC_LIB="${NSYNC_TARGET}" + if [ $? -ne 0 ] + then + echo "arm64 compilation failed." + exit 1 + fi + package_tf_library "arm64" + ;; + i386) + make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ + TARGET=IOS IOS_ARCH=I386 LIB_NAME=${LIB_PREFIX}-i386.a \ + OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \ + TARGET_NSYNC_LIB="${NSYNC_TARGET}" + if [ $? -ne 0 ] + then + echo "i386 compilation failed." + exit 1 + fi + package_tf_library "i386" + ;; + x86_64) + make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ + TARGET=IOS IOS_ARCH=X86_64 LIB_NAME=${LIB_PREFIX}-x86_64.a \ + OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \ + TARGET_NSYNC_LIB="${NSYNC_TARGET}" + if [ $? -ne 0 ] + then + echo "x86_64 compilation failed." + exit 1 + fi + package_tf_library "x86_64" + ;; + *) + echo "Unknown ARCH" + exit 1 +esac +} -make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ -TARGET=IOS IOS_ARCH=X86_64 LIB_NAME=${LIB_PREFIX}-x86_64.a OPTFLAGS="$1" -if [ $? -ne 0 ] -then - echo "x86_64 compilation failed." - exit 1 -fi +for build_tf_element in "${build_targets[@]}" +do + echo "$build_tf_element" + build_tf_target "$build_tf_element" +done -lipo \ -${LIBDIR}/ios_ARMV7/${LIB_PREFIX}-armv7.a \ -${LIBDIR}/ios_ARMV7S/${LIB_PREFIX}-armv7s.a \ -${LIBDIR}/ios_ARM64/${LIB_PREFIX}-arm64.a \ -${LIBDIR}/ios_I386/${LIB_PREFIX}-i386.a \ -${LIBDIR}/ios_X86_64/${LIB_PREFIX}-x86_64.a \ --create \ --output ${LIBDIR}/${LIB_PREFIX}.a +echo "Done building and packaging TF" +file ${LIBDIR}/${LIB_PREFIX}.a diff --git a/tensorflow/contrib/makefile/compile_nsync.sh b/tensorflow/contrib/makefile/compile_nsync.sh index ecbd9bb825..930e6b8dea 100755 --- a/tensorflow/contrib/makefile/compile_nsync.sh +++ b/tensorflow/contrib/makefile/compile_nsync.sh @@ -265,7 +265,7 @@ for arch in $archs; do -I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/libs/'"$arch"'/include \ -I../../platform/c++11 -I../../platform/gcc \ -I../../platform/posix -pthread - PLATFORM_CFLAGS=-std=c++11 -Wno-narrowing '"$march_option"' -fPIE + PLATFORM_CFLAGS=-std=c++11 -Wno-narrowing '"$march_option"' -fPIE -fPIC PLATFORM_LDFLAGS=-pthread MKDEP=${CC} -M -std=c++11 PLATFORM_C=../../platform/c++11/src/nsync_semaphore_mutex.cc \ @@ -301,6 +301,9 @@ done case "$target_platform" in ios) nsync_platform_dir="$nsync_builds_dir/lipo.$target_platform.c++11" + if [ -d "$nsync_platform_dir" ]; then + rm -rf "$nsync_platform_dir" + fi mkdir "$nsync_platform_dir" eval lipo $platform_libs -create -output '$nsync_platform_dir/nsync.a' echo "$nsync_platform_dir/nsync.a" diff --git a/tensorflow/contrib/nn/__init__.py b/tensorflow/contrib/nn/__init__.py index 3bf795d19a..0bc133a00e 100644 --- a/tensorflow/contrib/nn/__init__.py +++ b/tensorflow/contrib/nn/__init__.py @@ -15,6 +15,7 @@ """Module for variants of ops in tf.nn. @@alpha_dropout +@@conv1d_transpose @@deprecated_flipped_softmax_cross_entropy_with_logits @@deprecated_flipped_sparse_softmax_cross_entropy_with_logits @@deprecated_flipped_sigmoid_cross_entropy_with_logits @@ -32,6 +33,7 @@ from tensorflow.contrib.nn.python.ops.alpha_dropout import * from tensorflow.contrib.nn.python.ops.cross_entropy import * from tensorflow.contrib.nn.python.ops.sampling_ops import * from tensorflow.contrib.nn.python.ops.scaled_softplus import * +from tensorflow.python.ops.nn_ops import conv1d_transpose from tensorflow.python.ops.nn_ops import nth_element # pylint: enable=unused-import,wildcard-import diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD index 8c46becf2c..a9a63cbce0 100644 --- a/tensorflow/contrib/opt/BUILD +++ b/tensorflow/contrib/opt/BUILD @@ -19,6 +19,7 @@ py_library( "python/training/external_optimizer.py", "python/training/lazy_adam_optimizer.py", "python/training/moving_average_optimizer.py", + "python/training/multitask_optimizer_wrapper.py", "python/training/nadam_optimizer.py", "python/training/powersign.py", "python/training/sign_decay.py", @@ -98,6 +99,23 @@ py_test( ], ) +py_test( + name = "multitask_optimizer_wrapper_test", + srcs = ["python/training/multitask_optimizer_wrapper_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":opt_py", + "//tensorflow/python:client", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:training", + "//tensorflow/python:variables", + "//third_party/py/numpy", + "@six_archive//:six", + ], +) + py_test( name = "lazy_adam_optimizer_test", srcs = ["python/training/lazy_adam_optimizer_test.py"], diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py index caf22536bb..4c60c99342 100644 --- a/tensorflow/contrib/opt/__init__.py +++ b/tensorflow/contrib/opt/__init__.py @@ -24,7 +24,7 @@ from tensorflow.contrib.opt.python.training.drop_stale_gradient_optimizer import from tensorflow.contrib.opt.python.training.external_optimizer import * from tensorflow.contrib.opt.python.training.lazy_adam_optimizer import * from tensorflow.contrib.opt.python.training.moving_average_optimizer import * -from tensorflow.contrib.opt.python.training.nadam_optimizer import * +from tensorflow.contrib.opt.python.training.multitask_optimizer_wrapper import * from tensorflow.contrib.opt.python.training.nadam_optimizer import * from tensorflow.contrib.opt.python.training.powersign import * from tensorflow.contrib.opt.python.training.variable_clipping_optimizer import * @@ -38,7 +38,8 @@ _allowed_symbols = [ 'DelayCompensatedGradientDescentOptimizer', 'DropStaleGradientOptimizer', 'ExternalOptimizerInterface', 'LazyAdamOptimizer', 'NadamOptimizer', 'MovingAverageOptimizer', - 'ScipyOptimizerInterface', 'VariableClippingOptimizer' + 'ScipyOptimizerInterface', 'VariableClippingOptimizer', + 'MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm', ] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py new file mode 100644 index 0000000000..c26037935d --- /dev/null +++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py @@ -0,0 +1,138 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""An optimizer wrapper that ensures correct behaviour +of stateful optimizers with multitask loss.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import types +import six + +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import clip_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.training import optimizer + +__all__ = ["MultitaskOptimizerWrapper", + "clip_gradients_by_global_norm"] + +def _is_all_zeros(grad): + all_zeros = math_ops.equal(math_ops.count_nonzero(grad), 0) + return all_zeros + +def _get_wrapper(fn, opt): + def wrapper(self, grad, *args, **kwargs): # pylint: disable=unused-argument + all_zeros = _is_all_zeros(grad) + return control_flow_ops.cond( + all_zeros, + control_flow_ops.no_op, + lambda: fn(grad, *args, **kwargs)) + wrapper = types.MethodType(wrapper, opt) + return wrapper + +class MultitaskOptimizerWrapper(object): + """Optimizer wrapper that ensures that + all-zero gradients don't affect the optimizer state. + + This might be useful when a multi-task loss is used, + and some components of the loss might be + not present (e.g. masked out) in some training batches. + Technically their gradient would be zero, + which would normally affect the optimizer state + (e.g. push running average to zero). + However this is not the desired behaviour, + since the missing loss component + should be treated as unknown rather than zero. + + This wrapper filters out all-zero gradient tensors, + therefore preserving the optimizer state. + + If gradient clipping by global norm is used, + the provided function clip_gradients_by_global_norm + should be used (and specified explicitly by the user). + Otherwise the global norm would be underestimated + because of all-zero tensors that should be ignored. + + The gradient calculation and application + are delegated to an underlying optimizer. + The gradient application is altered only for all-zero tensors. + + Example: + ```python + momentum_optimizer = tf.train.MomentumOptimizer( + learning_rate, momentum=0.9) + multitask_momentum_optimizer = tf.contrib.opt.MultitaskOptimizerWrapper( + momentum_optimizer) + gradvars = multitask_momentum_optimizer.compute_gradients( + loss) + gradvars_clipped, _ = tf.contrib.opt.clip_gradients_by_global_norm( + gradvars, 15.0) + train_op = multitask_momentum_optimizer.apply_gradients( + gradvars_clipped, global_step=batch) + ``` + """ + def __init__(self, opt): + """ + Args: + opt: an instance of a class that implements tf.train.Optimizer. + """ + if not isinstance(opt, optimizer.Optimizer): + raise TypeError( + "Supplied optimizer must be an instance of tf.train.Optimizer") + self._opt = opt + overriden_methods = ('_apply_dense', + '_resource_apply_dense', + '_apply_sparse', + '_resource_apply_sparse') + for name in overriden_methods: + fn = getattr(self._opt, name) + wrapper = _get_wrapper(fn, self._opt) + setattr(self._opt, name, wrapper) + + def __getattr__(self, name): + return getattr(self._opt, name) + + +def clip_gradients_by_global_norm(gradients_variables, clip_norm=20.): + """Clips gradients of a multitask loss by their global norm. + Ignores all-zero tensors when computing the global norm. + + Args: + gradients_variables: a list of pairs (gradient, variable). + clip_norm: a float Tensor, the global norm to clip on. Default is 20.0. + + Returns: + list: A list of pairs of the same type as gradients_variables,. + fixed_global_norm: A 0-D (scalar) Tensor representing the global norm. + """ + gradients, variables = six.moves.zip(*gradients_variables) + def _replace_nonexisting_grad(grad): + if grad is None: + return grad + all_zeros = _is_all_zeros(grad) + return control_flow_ops.cond(all_zeros, + lambda: array_ops.zeros( + [], dtype=dtypes.as_dtype(grad.dtype)), + lambda: grad) + nonzero_gradients = [_replace_nonexisting_grad(g) for g in gradients] + fixed_global_norm = clip_ops.global_norm(nonzero_gradients) + gradients, _ = clip_ops.clip_by_global_norm(gradients, clip_norm, + use_norm=fixed_global_norm) + return list(six.moves.zip(gradients, variables)), fixed_global_norm diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py new file mode 100644 index 0000000000..b06213f715 --- /dev/null +++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py @@ -0,0 +1,119 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for MultitaskOptimizerWrapper.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.opt.python.training import multitask_optimizer_wrapper +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import variables +from tensorflow.python.platform import test +from tensorflow.python.training import momentum + +import numpy as np +import six + +class MultitaskOptimizerWrapperTest(test.TestCase): + """ + Tests for the multitask optimizer wrapper. + """ + def testWrapper(self): + with self.test_session(): + var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32) + var1 = variables.Variable([3.0, 4.0], dtype=dtypes.float32) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtypes.float32) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtypes.float32) + grads_allzero = constant_op.constant([0.0, 0.0], dtype=dtypes.float32) + mom_opt_impl = momentum.MomentumOptimizer( + learning_rate=2.0, momentum=0.9) + mom_opt = multitask_optimizer_wrapper.MultitaskOptimizerWrapper( + mom_opt_impl) + mom_update = mom_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + mom_update_partial = mom_opt.apply_gradients( + zip([grads_allzero, grads1], [var0, var1])) + mom_update_no_action = mom_opt.apply_gradients( + zip([grads_allzero, grads_allzero], [var0, var1])) + self.evaluate(variables.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + self.assertEqual(["momentum"], mom_opt.get_slot_names()) + slot0 = mom_opt.get_slot(var0, "momentum") + self.assertEquals(slot0.get_shape(), var0.get_shape()) + slot1 = mom_opt.get_slot(var1, "momentum") + self.assertEquals(slot1.get_shape(), var1.get_shape()) + + # Step 1: normal momentum update. + self.evaluate(mom_update) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), + self.evaluate(slot0)) + self.assertAllCloseAccordingToType(np.array([0.01, 0.01]), + self.evaluate(slot1)) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), + self.evaluate(var0)) + self.assertAllCloseAccordingToType( + np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), + self.evaluate(var1)) + + # Step 2: momentum update that changes only slot1 but not slot0. + self.evaluate(mom_update_partial) + # Check that only the relevant momentum accumulator has been updated. + self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), + self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), + self.evaluate(slot1)) + + # Step 3: momentum update that does not change anything. + self.evaluate(mom_update_no_action) + # Check that the momentum accumulators have *NOT* been updated. + self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), + self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), + self.evaluate(slot1)) + + def testGradientClipping(self): + with self.test_session(): + var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32) + var1 = variables.Variable([3.0, 4.0], dtype=dtypes.float32) + var2 = variables.Variable([3.0, 4.0], dtype=dtypes.float32) + var3 = variables.Variable([3.0, 4.0], dtype=dtypes.float32) + grads0 = constant_op.constant([10.0, 15.0], dtype=dtypes.float32) + grads1 = constant_op.constant([0.0, 5.0], dtype=dtypes.float32) + grads2 = constant_op.constant([0.0, 0.0], dtype=dtypes.float32) + grads3 = None + varlist = [var0, var1, var2, var3] + gradients = [grads0, grads1, grads2, grads3] + clipped_gradvars, global_norm = multitask_optimizer_wrapper.clip_gradients_by_global_norm( + six.moves.zip(gradients, varlist), clip_norm=1.0) + clipped_grads = list(six.moves.zip(*clipped_gradvars))[0] + reference_global_norm = np.sqrt(np.sum(np.square([10.0, 15.0, 0.0, 5.0]))) + self.assertAllCloseAccordingToType( + self.evaluate(global_norm), reference_global_norm) + self.assertAllCloseAccordingToType( + self.evaluate(clipped_grads[2]), np.array([0., 0.])) + self.assertEqual(clipped_grads[3], None) + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py index 909c6aba2b..16b6d145e3 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py @@ -38,6 +38,9 @@ from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib from tensorflow.python.platform import test +from tensorflow.python.framework import test_util +from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell + # pylint: enable=protected-access @@ -358,6 +361,45 @@ class RNNCellTest(test.TestCase): self.assertEquals(variables[2].op.name, "root/lstm_cell/projection/kernel") + def testLSTMCellLayerNorm(self): + with self.test_session() as sess: + num_units = 2 + num_proj = 3 + batch_size = 1 + input_size = 4 + with variable_scope.variable_scope( + "root", initializer=init_ops.constant_initializer(0.5)): + x = array_ops.zeros([batch_size, input_size]) + c = array_ops.zeros([batch_size, num_units]) + h = array_ops.zeros([batch_size, num_proj]) + state = rnn_cell_impl.LSTMStateTuple(c, h) + cell = contrib_rnn_cell.LayerNormLSTMCell( + num_units=num_units, + num_proj=num_proj, + forget_bias=1.0, + layer_norm=True, + norm_gain=1.0, + norm_shift=0.0) + g, out_m = cell(x, state) + sess.run([variables_lib.global_variables_initializer()]) + res = sess.run([g, out_m], { + x.name: np.ones((batch_size, input_size)), + c.name: 0.1 * np.ones((batch_size, num_units)), + h.name: 0.1 * np.ones((batch_size, num_proj)) + }) + self.assertEqual(len(res), 2) + # The numbers in results were not calculated, this is mostly just a + # smoke test. + self.assertEqual(res[0].shape, (batch_size, num_proj)) + self.assertEqual(res[1][0].shape, (batch_size, num_units)) + self.assertEqual(res[1][1].shape, (batch_size, num_proj)) + # Different inputs so different outputs and states + for i in range(1, batch_size): + self.assertTrue( + float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6) + self.assertTrue( + float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6) + def testOutputProjectionWrapper(self): with self.test_session() as sess: with variable_scope.variable_scope( diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py index ebd4564f12..b4a5f2d7eb 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py @@ -37,6 +37,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import rnn from tensorflow.python.ops import rnn_cell +from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -1275,6 +1276,49 @@ class LayerNormBasicLSTMCellTest(test.TestCase): self.assertAllClose(res[2].c, expected_c1, 1e-5) self.assertAllClose(res[2].h, expected_h1, 1e-5) + + def testBasicLSTMCellWithStateTupleLayerNorm(self): + """The results of LSTMCell and LayerNormBasicLSTMCell + should be same. """ + with self.test_session() as sess: + with variable_scope.variable_scope( + "root", initializer=init_ops.constant_initializer(0.5)): + x = array_ops.zeros([1, 2]) + c0 = array_ops.zeros([1, 2]) + h0 = array_ops.zeros([1, 2]) + state0 = rnn_cell_impl.LSTMStateTuple(c0, h0) + c1 = array_ops.zeros([1, 2]) + h1 = array_ops.zeros([1, 2]) + state1 = rnn_cell_impl.LSTMStateTuple(c1, h1) + cell = rnn_cell_impl.MultiRNNCell( + [contrib_rnn_cell.LayerNormLSTMCell( + 2, + layer_norm=True, + norm_gain=1.0, + norm_shift=0.0) for _ in range(2)]) + h, (s0, s1) = cell(x, (state0, state1)) + sess.run([variables.global_variables_initializer()]) + res = sess.run([h, s0, s1], { + x.name: np.array([[1., 1.]]), + c0.name: 0.1 * np.asarray([[0, 1]]), + h0.name: 0.1 * np.asarray([[2, 3]]), + c1.name: 0.1 * np.asarray([[4, 5]]), + h1.name: 0.1 * np.asarray([[6, 7]]), + }) + + expected_h = np.array([[-0.38079708, 0.38079708]]) + expected_h0 = np.array([[-0.38079708, 0.38079708]]) + expected_c0 = np.array([[-1.0, 1.0]]) + expected_h1 = np.array([[-0.38079708, 0.38079708]]) + expected_c1 = np.array([[-1.0, 1.0]]) + + self.assertEqual(len(res), 3) + self.assertAllClose(res[0], expected_h, 1e-5) + self.assertAllClose(res[1].c, expected_c0, 1e-5) + self.assertAllClose(res[1].h, expected_h0, 1e-5) + self.assertAllClose(res[2].c, expected_c1, 1e-5) + self.assertAllClose(res[2].h, expected_h1, 1e-5) + def testBasicLSTMCellWithDropout(self): def _is_close(x, y, digits=4): diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index d4691f2c27..5e85c125df 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -36,6 +36,7 @@ from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.ops import partitioned_variables from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import nest @@ -76,6 +77,18 @@ def _get_sharded_variable(name, shape, dtype, num_shards): return shards +def _norm(g, b, inp, scope): + shape = inp.get_shape()[-1:] + gamma_init = init_ops.constant_initializer(g) + beta_init = init_ops.constant_initializer(b) + with vs.variable_scope(scope): + # Initialize beta and gamma for use by layer_norm. + vs.get_variable("gamma", shape=shape, initializer=gamma_init) + vs.get_variable("beta", shape=shape, initializer=beta_init) + normalized = layers.layer_norm(inp, reuse=True, scope=scope) + return normalized + + class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): """Long short-term memory unit (LSTM) recurrent network cell. @@ -102,13 +115,24 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): The class uses optional peep-hole connections, and an optional projection layer. + + Layer normalization implementation is based on: + + https://arxiv.org/abs/1607.06450. + + "Layer Normalization" + Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton + + and is applied before the internal nonlinearities. + """ def __init__(self, num_units, use_peepholes=False, initializer=None, num_proj=None, proj_clip=None, num_unit_shards=1, num_proj_shards=1, forget_bias=1.0, state_is_tuple=True, - activation=math_ops.tanh, reuse=None): + activation=math_ops.tanh, reuse=None, + layer_norm=False, norm_gain=1.0, norm_shift=0.0): """Initialize the parameters for an LSTM cell. Args: @@ -135,6 +159,13 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. + layer_norm: If `True`, layer normalization will be applied. + norm_gain: float, The layer normalization gain initial value. If + `layer_norm` has been set to `False`, this argument will be ignored. + norm_shift: float, The layer normalization shift initial value. If + `layer_norm` has been set to `False`, this argument will be ignored. + + """ super(CoupledInputForgetGateLSTMCell, self).__init__(_reuse=reuse) if not state_is_tuple: @@ -152,6 +183,9 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): self._state_is_tuple = state_is_tuple self._activation = activation self._reuse = reuse + self._layer_norm = layer_norm + self._norm_gain = norm_gain + self._norm_shift = norm_shift if num_proj: self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_proj) @@ -220,9 +254,20 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): # j = new_input, f = forget_gate, o = output_gate cell_inputs = array_ops.concat([inputs, m_prev], 1) - lstm_matrix = nn_ops.bias_add(math_ops.matmul(cell_inputs, concat_w), b) + lstm_matrix = math_ops.matmul(cell_inputs, concat_w) + + # If layer nomalization is applied, do not add bias + if not self._layer_norm: + lstm_matrix = nn_ops.bias_add(lstm_matrix, b) + j, f, o = array_ops.split(value=lstm_matrix, num_or_size_splits=3, axis=1) + # Apply layer normalization + if self._layer_norm: + j = _norm(self._norm_gain, self._norm_shift, j, "transform") + f = _norm(self._norm_gain, self._norm_shift, f, "forget") + o = _norm(self._norm_gain, self._norm_shift, o, "output") + # Diagonal connections if self._use_peepholes: w_f_diag = vs.get_variable( @@ -236,6 +281,10 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): f_act = sigmoid(f + self._forget_bias) c = (f_act * c_prev + (1 - f_act) * self._activation(j)) + # Apply layer normalization + if self._layer_norm: + c = _norm(self._norm_gain, self._norm_shift, c, "state") + if self._use_peepholes: m = sigmoid(o + w_o_diag * c) * self._activation(c) else: @@ -1301,8 +1350,8 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell): self._keep_prob = dropout_keep_prob self._seed = dropout_prob_seed self._layer_norm = layer_norm - self._g = norm_gain - self._b = norm_shift + self._norm_gain = norm_gain + self._norm_shift = norm_shift self._reuse = reuse @property @@ -1313,24 +1362,25 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell): def output_size(self): return self._num_units - def _norm(self, inp, scope): + def _norm(self, inp, scope, dtype=dtypes.float32): shape = inp.get_shape()[-1:] - gamma_init = init_ops.constant_initializer(self._g) - beta_init = init_ops.constant_initializer(self._b) + gamma_init = init_ops.constant_initializer(self._norm_gain) + beta_init = init_ops.constant_initializer(self._norm_shift) with vs.variable_scope(scope): # Initialize beta and gamma for use by layer_norm. - vs.get_variable("gamma", shape=shape, initializer=gamma_init) - vs.get_variable("beta", shape=shape, initializer=beta_init) + vs.get_variable("gamma", shape=shape, initializer=gamma_init, dtype=dtype) + vs.get_variable("beta", shape=shape, initializer=beta_init, dtype=dtype) normalized = layers.layer_norm(inp, reuse=True, scope=scope) return normalized def _linear(self, args): out_size = 4 * self._num_units proj_size = args.get_shape()[-1] - weights = vs.get_variable("kernel", [proj_size, out_size]) + dtype = args.dtype + weights = vs.get_variable("kernel", [proj_size, out_size], dtype=dtype) out = math_ops.matmul(args, weights) if not self._layer_norm: - bias = vs.get_variable("bias", [out_size]) + bias = vs.get_variable("bias", [out_size], dtype=dtype) out = nn_ops.bias_add(out, bias) return out @@ -1339,13 +1389,14 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell): c, h = state args = array_ops.concat([inputs, h], 1) concat = self._linear(args) + dtype = args.dtype i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1) if self._layer_norm: - i = self._norm(i, "input") - j = self._norm(j, "transform") - f = self._norm(f, "forget") - o = self._norm(o, "output") + i = self._norm(i, "input", dtype=dtype) + j = self._norm(j, "transform", dtype=dtype) + f = self._norm(f, "forget", dtype=dtype) + o = self._norm(o, "output", dtype=dtype) g = self._activation(j) if (not isinstance(self._keep_prob, float)) or self._keep_prob < 1: @@ -1354,7 +1405,7 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell): new_c = (c * math_ops.sigmoid(f + self._forget_bias) + math_ops.sigmoid(i) * g) if self._layer_norm: - new_c = self._norm(new_c, "state") + new_c = self._norm(new_c, "state", dtype=dtype) new_h = self._activation(new_c) * math_ops.sigmoid(o) new_state = rnn_cell_impl.LSTMStateTuple(new_c, new_h) @@ -2306,3 +2357,264 @@ class GLSTMCell(rnn_cell_impl.RNNCell): new_state = rnn_cell_impl.LSTMStateTuple(c, m) return m, new_state + + +class LayerNormLSTMCell(rnn_cell_impl.RNNCell): + """Long short-term memory unit (LSTM) recurrent network cell. + + The default non-peephole implementation is based on: + + http://www.bioinf.jku.at/publications/older/2604.pdf + + S. Hochreiter and J. Schmidhuber. + "Long Short-Term Memory". Neural Computation, 9(8):1735-1780, 1997. + + The peephole implementation is based on: + + https://research.google.com/pubs/archive/43905.pdf + + Hasim Sak, Andrew Senior, and Francoise Beaufays. + "Long short-term memory recurrent neural network architectures for + large scale acoustic modeling." INTERSPEECH, 2014. + + The class uses optional peep-hole connections, optional cell clipping, and + an optional projection layer. + + Layer normalization implementation is based on: + + https://arxiv.org/abs/1607.06450. + + "Layer Normalization" + Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton + + and is applied before the internal nonlinearities. + + """ + + def __init__(self, num_units, + use_peepholes=False, cell_clip=None, + initializer=None, num_proj=None, proj_clip=None, + forget_bias=1.0, + activation=None, layer_norm=False, + norm_gain=1.0, norm_shift=0.0, reuse=None): + """Initialize the parameters for an LSTM cell. + + Args: + num_units: int, The number of units in the LSTM cell + use_peepholes: bool, set True to enable diagonal/peephole connections. + cell_clip: (optional) A float value, if provided the cell state is clipped + by this value prior to the cell output activation. + initializer: (optional) The initializer to use for the weight and + projection matrices. + num_proj: (optional) int, The output dimensionality for the projection + matrices. If None, no projection is performed. + proj_clip: (optional) A float value. If `num_proj > 0` and `proj_clip` is + provided, then the projected values are clipped elementwise to within + `[-proj_clip, proj_clip]`. + forget_bias: Biases of the forget gate are initialized by default to 1 + in order to reduce the scale of forgetting at the beginning of + the training. Must set it manually to `0.0` when restoring from + CudnnLSTM trained checkpoints. + activation: Activation function of the inner states. Default: `tanh`. + layer_norm: If `True`, layer normalization will be applied. + norm_gain: float, The layer normalization gain initial value. If + `layer_norm` has been set to `False`, this argument will be ignored. + norm_shift: float, The layer normalization shift initial value. If + `layer_norm` has been set to `False`, this argument will be ignored. + reuse: (optional) Python boolean describing whether to reuse variables + in an existing scope. If not `True`, and the existing scope already has + the given variables, an error is raised. + + When restoring from CudnnLSTM-trained checkpoints, must use + CudnnCompatibleLSTMCell instead. + """ + super(LayerNormLSTMCell, self).__init__(_reuse=reuse) + + self._num_units = num_units + self._use_peepholes = use_peepholes + self._cell_clip = cell_clip + self._initializer = initializer + self._num_proj = num_proj + self._proj_clip = proj_clip + self._forget_bias = forget_bias + self._activation = activation or math_ops.tanh + self._layer_norm = layer_norm + self._norm_gain = norm_gain + self._norm_shift = norm_shift + + if num_proj: + self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_proj)) + self._output_size = num_proj + else: + self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_units)) + self._output_size = num_units + + @property + def state_size(self): + return self._state_size + + @property + def output_size(self): + return self._output_size + + + def _linear(self, + args, + output_size, + bias, + bias_initializer=None, + kernel_initializer=None, + layer_norm=False): + """Linear map: sum_i(args[i] * W[i]), where W[i] is a Variable. + + Args: + args: a 2D Tensor or a list of 2D, batch x n, Tensors. + output_size: int, second dimension of W[i]. + bias: boolean, whether to add a bias term or not. + bias_initializer: starting value to initialize the bias + (default is all zeros). + kernel_initializer: starting value to initialize the weight. + layer_norm: boolean, whether to apply layer normalization. + + + Returns: + A 2D Tensor with shape [batch x output_size] taking value + sum_i(args[i] * W[i]), where each W[i] is a newly created Variable. + + Raises: + ValueError: if some of the arguments has unspecified or wrong shape. + """ + if args is None or (nest.is_sequence(args) and not args): + raise ValueError("`args` must be specified") + if not nest.is_sequence(args): + args = [args] + + # Calculate the total size of arguments on dimension 1. + total_arg_size = 0 + shapes = [a.get_shape() for a in args] + for shape in shapes: + if shape.ndims != 2: + raise ValueError("linear is expecting 2D arguments: %s" % shapes) + if shape[1].value is None: + raise ValueError("linear expects shape[1] to be provided for shape %s, " + "but saw %s" % (shape, shape[1])) + else: + total_arg_size += shape[1].value + + dtype = [a.dtype for a in args][0] + + # Now the computation. + scope = vs.get_variable_scope() + with vs.variable_scope(scope) as outer_scope: + weights = vs.get_variable( + "kernel", [total_arg_size, output_size], + dtype=dtype, + initializer=kernel_initializer) + if len(args) == 1: + res = math_ops.matmul(args[0], weights) + else: + res = math_ops.matmul(array_ops.concat(args, 1), weights) + if not bias: + return res + with vs.variable_scope(outer_scope) as inner_scope: + inner_scope.set_partitioner(None) + if bias_initializer is None: + bias_initializer = init_ops.constant_initializer(0.0, dtype=dtype) + biases = vs.get_variable( + "bias", [output_size], + dtype=dtype, + initializer=bias_initializer) + + if not layer_norm: + res = nn_ops.bias_add(res, biases) + + return res + + def call(self, inputs, state): + """Run one step of LSTM. + + Args: + inputs: input Tensor, 2D, batch x num_units. + state: this must be a tuple of state Tensors, + both `2-D`, with column sizes `c_state` and + `m_state`. + + Returns: + A tuple containing: + + - A `2-D, [batch x output_dim]`, Tensor representing the output of the + LSTM after reading `inputs` when previous state was `state`. + Here output_dim is: + num_proj if num_proj was set, + num_units otherwise. + - Tensor(s) representing the new state of LSTM after reading `inputs` when + the previous state was `state`. Same type and shape(s) as `state`. + + Raises: + ValueError: If input size cannot be inferred from inputs via + static shape inference. + """ + num_proj = self._num_units if self._num_proj is None else self._num_proj + sigmoid = math_ops.sigmoid + + (c_prev, m_prev) = state + + dtype = inputs.dtype + input_size = inputs.get_shape().with_rank(2)[1] + if input_size.value is None: + raise ValueError("Could not infer input size from inputs.get_shape()[-1]") + scope = vs.get_variable_scope() + with vs.variable_scope(scope, initializer=self._initializer) as unit_scope: + + # i = input_gate, j = new_input, f = forget_gate, o = output_gate + lstm_matrix = self._linear([inputs, m_prev], 4 * self._num_units, bias=True, + bias_initializer=None, layer_norm=self._layer_norm) + i, j, f, o = array_ops.split( + value=lstm_matrix, num_or_size_splits=4, axis=1) + + if self._layer_norm: + i = _norm(self._norm_gain, self._norm_shift, i, "input") + j = _norm(self._norm_gain, self._norm_shift, j, "transform") + f = _norm(self._norm_gain, self._norm_shift, f, "forget") + o = _norm(self._norm_gain, self._norm_shift, o, "output") + + # Diagonal connections + if self._use_peepholes: + with vs.variable_scope(unit_scope) as projection_scope: + w_f_diag = vs.get_variable( + "w_f_diag", shape=[self._num_units], dtype=dtype) + w_i_diag = vs.get_variable( + "w_i_diag", shape=[self._num_units], dtype=dtype) + w_o_diag = vs.get_variable( + "w_o_diag", shape=[self._num_units], dtype=dtype) + + if self._use_peepholes: + c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev + + sigmoid(i + w_i_diag * c_prev) * self._activation(j)) + else: + c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * + self._activation(j)) + + if self._layer_norm: + c = _norm(self._norm_gain, self._norm_shift, c, "state") + + if self._cell_clip is not None: + # pylint: disable=invalid-unary-operand-type + c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip) + # pylint: enable=invalid-unary-operand-type + if self._use_peepholes: + m = sigmoid(o + w_o_diag * c) * self._activation(c) + else: + m = sigmoid(o) * self._activation(c) + + if self._num_proj is not None: + with vs.variable_scope("projection") as proj_scope: + m = self._linear(m, self._num_proj, bias=False) + + if self._proj_clip is not None: + # pylint: disable=invalid-unary-operand-type + m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip) + # pylint: enable=invalid-unary-operand-type + + new_state = (rnn_cell_impl.LSTMStateTuple(c, m)) + return m, new_state diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index 87230e3355..c3b180d9f4 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -149,7 +149,7 @@ class _BaseAttentionMechanism(AttentionMechanism): memory_sequence_length=None, memory_layer=None, check_inner_dims_defined=True, - score_mask_value=float("-inf"), + score_mask_value=None, name=None): """Construct base AttentionMechanism class. @@ -187,9 +187,12 @@ class _BaseAttentionMechanism(AttentionMechanism): "memory_layer is not a Layer: %s" % type(memory_layer).__name__) self._query_layer = query_layer self._memory_layer = memory_layer + self.dtype = memory_layer.dtype if not callable(probability_fn): raise TypeError("probability_fn must be callable, saw type: %s" % type(probability_fn).__name__) + if score_mask_value is None: + score_mask_value = dtypes.as_dtype(self._memory_layer.dtype).as_numpy_dtype(-np.inf) self._probability_fn = lambda score, prev: ( # pylint:disable=g-long-lambda probability_fn( _maybe_mask_score(score, memory_sequence_length, score_mask_value), @@ -334,7 +337,8 @@ class LuongAttention(_BaseAttentionMechanism): memory_sequence_length=None, scale=False, probability_fn=None, - score_mask_value=float("-inf"), + score_mask_value=None, + dtype=None, name="LuongAttention"): """Construct the AttentionMechanism mechanism. @@ -353,17 +357,20 @@ class LuongAttention(_BaseAttentionMechanism): score_mask_value: (optional) The mask value for score before passing into `probability_fn`. The default is -inf. Only used if `memory_sequence_length` is not None. + dtype: The data type for the memory layer of the attention mechanism. name: Name to use when creating ops. """ # For LuongAttention, we only transform the memory layer; thus # num_units **must** match expected the query depth. if probability_fn is None: probability_fn = nn_ops.softmax + if dtype is None: + dtype = dtypes.float32 wrapped_probability_fn = lambda score, _: probability_fn(score) super(LuongAttention, self).__init__( query_layer=None, memory_layer=layers_core.Dense( - num_units, name="memory_layer", use_bias=False), + num_units, name="memory_layer", use_bias=False, dtype=dtype), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, @@ -475,7 +482,8 @@ class BahdanauAttention(_BaseAttentionMechanism): memory_sequence_length=None, normalize=False, probability_fn=None, - score_mask_value=float("-inf"), + score_mask_value=None, + dtype=None, name="BahdanauAttention"): """Construct the Attention mechanism. @@ -494,16 +502,20 @@ class BahdanauAttention(_BaseAttentionMechanism): score_mask_value: (optional): The mask value for score before passing into `probability_fn`. The default is -inf. Only used if `memory_sequence_length` is not None. + dtype: The data type for the query and memory layers of the attention + mechanism. name: Name to use when creating ops. """ if probability_fn is None: probability_fn = nn_ops.softmax + if dtype is None: + dtype = dtypes.float32 wrapped_probability_fn = lambda score, _: probability_fn(score) super(BahdanauAttention, self).__init__( query_layer=layers_core.Dense( - num_units, name="query_layer", use_bias=False), + num_units, name="query_layer", use_bias=False, dtype=dtype), memory_layer=layers_core.Dense( - num_units, name="memory_layer", use_bias=False), + num_units, name="memory_layer", use_bias=False, dtype=dtype), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, @@ -738,11 +750,12 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism): memory, memory_sequence_length=None, normalize=False, - score_mask_value=float("-inf"), + score_mask_value=None, sigmoid_noise=0., sigmoid_noise_seed=None, score_bias_init=0., mode="parallel", + dtype=None, name="BahdanauMonotonicAttention"): """Construct the Attention mechanism. @@ -766,17 +779,21 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism): mode: How to compute the attention distribution. Must be one of 'recursive', 'parallel', or 'hard'. See the docstring for `tf.contrib.seq2seq.monotonic_attention` for more information. + dtype: The data type for the query and memory layers of the attention + mechanism. name: Name to use when creating ops. """ # Set up the monotonic probability fn with supplied parameters + if dtype is None: + dtype = dtypes.float32 wrapped_probability_fn = functools.partial( _monotonic_probability_fn, sigmoid_noise=sigmoid_noise, mode=mode, seed=sigmoid_noise_seed) super(BahdanauMonotonicAttention, self).__init__( query_layer=layers_core.Dense( - num_units, name="query_layer", use_bias=False), + num_units, name="query_layer", use_bias=False, dtype=dtype), memory_layer=layers_core.Dense( - num_units, name="memory_layer", use_bias=False), + num_units, name="memory_layer", use_bias=False, dtype=dtype), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, @@ -834,11 +851,12 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism): memory, memory_sequence_length=None, scale=False, - score_mask_value=float("-inf"), + score_mask_value=None, sigmoid_noise=0., sigmoid_noise_seed=None, score_bias_init=0., mode="parallel", + dtype=None, name="LuongMonotonicAttention"): """Construct the Attention mechanism. @@ -862,17 +880,21 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism): mode: How to compute the attention distribution. Must be one of 'recursive', 'parallel', or 'hard'. See the docstring for `tf.contrib.seq2seq.monotonic_attention` for more information. + dtype: The data type for the query and memory layers of the attention + mechanism. name: Name to use when creating ops. """ # Set up the monotonic probability fn with supplied parameters + if dtype is None: + dtype = dtypes.float32 wrapped_probability_fn = functools.partial( _monotonic_probability_fn, sigmoid_noise=sigmoid_noise, mode=mode, seed=sigmoid_noise_seed) super(LuongMonotonicAttention, self).__init__( query_layer=layers_core.Dense( - num_units, name="query_layer", use_bias=False), + num_units, name="query_layer", use_bias=False, dtype=dtype), memory_layer=layers_core.Dense( - num_units, name="memory_layer", use_bias=False), + num_units, name="memory_layer", use_bias=False, dtype=dtype), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, @@ -1123,8 +1145,9 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): % (len(attention_layer_sizes), len(attention_mechanisms))) self._attention_layers = tuple( layers_core.Dense( - attention_layer_size, name="attention_layer", use_bias=False) - for attention_layer_size in attention_layer_sizes) + attention_layer_size, name="attention_layer", use_bias=False, + dtype=attention_mechanisms[i].dtype) + for i, attention_layer_size in enumerate(attention_layer_sizes)) self._attention_layer_size = sum(attention_layer_sizes) else: self._attention_layers = None diff --git a/tensorflow/contrib/slim/README.md b/tensorflow/contrib/slim/README.md index 0bfd0801d5..f7a85557ca 100644 --- a/tensorflow/contrib/slim/README.md +++ b/tensorflow/contrib/slim/README.md @@ -237,7 +237,7 @@ One way to reduce this code duplication would be via a `for` loop: ```python net = ... for i in range(3): - net = slim.conv2d(net, 256, [3, 3], scope='conv3_' % (i+1)) + net = slim.conv2d(net, 256, [3, 3], scope='conv3_%d' % (i+1)) net = slim.max_pool2d(net, [2, 2], scope='pool2') ``` diff --git a/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py b/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py index b4fd2580c2..576444214d 100644 --- a/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py +++ b/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py @@ -386,7 +386,7 @@ class ResnetCompleteNetworkTest(test.TestCase): inputs, None, is_training=False, global_pool=False) sess.run(variables.global_variables_initializer()) self.assertAllClose( - output.eval(), expected.eval(), atol=1e-4, rtol=1e-4) + output.eval(), expected.eval(), atol=2e-4, rtol=1e-4) def testUnknownBatchSize(self): batch = 2 diff --git a/tensorflow/contrib/verbs/README.md b/tensorflow/contrib/verbs/README.md index da5f2b0223..dcb390b0a5 100644 --- a/tensorflow/contrib/verbs/README.md +++ b/tensorflow/contrib/verbs/README.md @@ -1,4 +1,4 @@ -## How to compile and use RDMA-enabled TensorFlow +## How to compile, use and configure RDMA-enabled TensorFlow 1. Follow the regular TF compilation instructions. During configure step, if you want ibverbs based RDMA support, answer yes to this question: ```Do you wish to build TensorFlow with VERBS-RDMA support [y/N]``` @@ -7,6 +7,18 @@ ```server = tf.train.Server(cluster, job_name="local", task_index=0, protocol='grpc+verbs') # default protocol is 'grpc'``` +3. RDMA configuration is done by setting the following environment variables: + * **RDMA_DEVICE**: The RDMA device name to be used. If not defined by user, a default device with an active port will be set if exists. + * **RDMA_DEVICE_PORT**: The port within the selected device. Not relevant if RDMA_DEVICE is not defined. If not defined by user, a default active port will be set if exists. + * **RDMA_GID_INDEX**: The GID index of the port. If not defined by user, a default suitable GID index will be set (RoCEV2 is favourable as default). + * **RDMA_QP_PKEY_INDEX**: The Pkey for the QP. If not defined by user, the default value is 0. + * **RDMA_QP_QUEUE_DEPTH**: TX/RX queue size for the QP. If not defined by user, the default value is 1024. + * **RDMA_QP_TIMEOUT**: The retransmission timeout for QPs. If not defined by user, the default value is 14. + * **RDMA_QP_RETRY_COUNT**: Number of retransmission for QPs. If not defined by user, the default value is 7. + * **RDMA_QP_SL**: Service level configuration for QOS and ECN, valid values are 0-7. If not defined by user, the default value is 0. + * **RDMA_QP_MTU**: MTU configuration for the QPs. If not defined by user, the default value is active MTU from query_port. + * **RDMA_TRAFFIC_CLASS**: Traffic class configuration for QP, in case of DSCP trust level QoS configuration. If not defined by user, the default value is 0. For more info see [HowTo Configure Trust state on Mellanox Adapters](https://community.mellanox.com/docs/DOC-2866). + ## Overview The design is based on TensorFlow r1.0. An RDMA path is added between servers for tensor transfer (weights, gradients, etc). The existing GRPC path remains and is responsible for "administrative" tasks, such as setting up the RDMA path, exchanging computation graphs, etc. diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc index 26e18b28aa..331943a3ef 100644 --- a/tensorflow/contrib/verbs/rdma.cc +++ b/tensorflow/contrib/verbs/rdma.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/contrib/verbs/rdma.h" #include +#include #include "tensorflow/contrib/verbs/verbs_util.h" #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/dma_helper.h" @@ -33,6 +34,8 @@ limitations under the License. namespace tensorflow { +#define RoCE_V2 "RoCE v2" + namespace { // hash name to 32-bit integer uint32_t NameHash(const string& name) { @@ -66,16 +69,337 @@ string MessageTypeToString(RdmaMessageType rmt) { } } // namespace -ibv_context* open_default_device() { +// Function to get environment variable +// Args: +// var_name - the name of the environmental variable +// Returns: +// string with it's value or empty string if not set +string get_env_var(char const* var_name) { + char const* var_temp = getenv(var_name); + + return (var_temp == NULL) ? string() : string(var_temp); +} + +// Function to open device +// Args: +// ibv_dev device to open +// Returns: +// context of the opened device +ibv_context* open_device(ibv_device* ibv_dev) { + ibv_context* context = ibv_open_device(ibv_dev); + + CHECK(context) << "Open context failed for " << ibv_get_device_name(ibv_dev); + return context; +} + +// Function to count the number of active ports for device +// Args: +// device - to check active ports +// Returns: +// number of active ports of the given device +int get_dev_active_port_count(ibv_device* device) { + ibv_device_attr device_att; + ibv_port_attr port_attr; + ibv_context* context = NULL; + int rc, port_index, active_ports = 0; + + context = ibv_open_device(device); + CHECK(context) << "Open context failed for " << ibv_get_device_name(device); + rc = ibv_query_device(context, &device_att); + CHECK(!rc) << "Failed to query the device"; + + for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) { + rc = ibv_query_port(context, port_index, &port_attr); + CHECK(!rc) << "Failed to query the port" << port_index; + if (port_attr.state == IBV_PORT_ACTIVE) { + active_ports++; + } + } + ibv_close_device(context); + return active_ports; +} + +// Function to set device. If RDMA_DEVICE not set, search for device with active +// port. +// Fails if more than one device with active port was found. +// Returns: +// device to use +ibv_device* set_device() { ibv_device** dev_list; - ibv_device* ib_dev; - dev_list = ibv_get_device_list(NULL); + int dev_num, device_index, device_to_open = 0; + int num_devs_with_active_port = 0; + string env_p_rdma_device, str_port_num; + + dev_list = ibv_get_device_list(&dev_num); CHECK(dev_list) << "No InfiniBand device found"; - ib_dev = dev_list[0]; - CHECK(ib_dev) << "No InfiniBand device found"; - ibv_context* context = ibv_open_device(ib_dev); - CHECK(context) << "Open context failed for " << ibv_get_device_name(ib_dev); - return context; + + env_p_rdma_device = get_env_var("RDMA_DEVICE"); + if (!env_p_rdma_device.empty()) { + for (device_index = 0; device_index < dev_num; device_index++) { + if (!env_p_rdma_device.compare( + ibv_get_device_name(dev_list[device_index]))) { + CHECK(get_dev_active_port_count(dev_list[device_index]) != 0) + << "Device " << ibv_get_device_name(dev_list[device_index]) + << " has no active ports"; + return dev_list[device_index]; + } + } + // check validity of input device + CHECK(false) << "The device " << env_p_rdma_device << " wasn't found"; + } else { + // set default device + str_port_num = get_env_var("RDMA_DEVICE_PORT"); + CHECK(str_port_num.empty()) + << "RDMA_DEVICE should be provided if RDMA_DEVICE_PORT is set by user"; + for (device_index = 0; device_index < dev_num; device_index++) { + // get port_num + if (get_dev_active_port_count(dev_list[device_index]) > 0) { + num_devs_with_active_port++; + CHECK(num_devs_with_active_port <= 1) << ". More than one device with " + "active port in the system. " + "Please enter RDMA_DEVICE"; + // found device with at least 1 active port + device_to_open = device_index; + } + } + CHECK(num_devs_with_active_port > 0) + << "There is no active port in the system"; + return dev_list[device_to_open]; + } + CHECK(false) << "No device was set!"; + return NULL; // never happens +} + +// Function to set port for device. +// If RDMA_DEVICE_PORT not set, first active port of the device will be set. +// Args: +// context of the device +// Returns: +// port to use +uint8_t set_port(ibv_context* context) { + uint8_t port_num = 0; //0 is illegal port number + string str_port_num; + ibv_device_attr device_att; + ibv_port_attr port_attr; + int rc, port_index; + + rc = ibv_query_device(context, &device_att); + CHECK(!rc) << "Failed to query the device\n"; + + str_port_num = get_env_var("RDMA_DEVICE_PORT"); + // user defined port + if (!str_port_num.empty()) { + port_num = stoi(str_port_num); + CHECK(port_num > 0) << "RDMA_DEVICE_PORT should be positive"; + CHECK(port_num <= device_att.phys_port_cnt) << "RDMA_DEVICE_PORT should be " + "less or equal to amount of " + "available ports"; + rc = ibv_query_port(context, port_num, &port_attr); + CHECK(!rc) << "Failed to query the port" << port_num; + // check if port id active + CHECK(port_attr.state == IBV_PORT_ACTIVE) + << "Selected RDMA_DEVICE_PORT is not active"; + } + // set default port + else { + for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) { + rc = ibv_query_port(context, port_index, &port_attr); + CHECK(!rc) << "Failed to query the port" << port_index; + if (port_attr.state == IBV_PORT_ACTIVE) { + port_num = port_index; + break; + } + } + CHECK_GT(port_num, 0) << "No active ports"; + } + return port_num; +} + +// Function read from sysfs file +// Args: +// dir - directory +// file - file +// buff - buffer for the result +// size - buffer size +// Returns: +// number of bytes were read or -1 if failed +int read_sysfs_file(const char* dir, const char* file, char* buf, size_t size) { + char* path; + int fd; + int len; + + if (asprintf(&path, "%s/%s", dir, file) < 0) return -1; + + fd = open(path, O_RDONLY); + if (fd < 0) { + free(path); + return -1; + } + + len = read(fd, buf, size); + + close(fd); + free(path); + + if (len > 0 && buf[len - 1] == '\n') buf[--len] = '\0'; + + return len; +} + +// Function to check if GID index support RoCE V2 +// Args: +// context - device context +// port_num - port number +// index - GID index +// Returns: +// if GID supports RoCE V2 - true, otherwise - false. +bool is_gid_type_roce_v2(ibv_context* context, uint8_t port_num, + uint8_t index) { + char name[32]; + char buff[41]; + + snprintf(name, sizeof(name), "ports/%d/gid_attrs/types/%d", port_num, index); + if (read_sysfs_file(context->device->ibdev_path, name, buff, sizeof(buff)) <= + 0) { + return false; + } + return !strcmp(buff, RoCE_V2); +} + +// Function to set GID index. +// If the port link is IB, no GID index should be selected. +// If Ethernet but RDMA_GID_INDEX not set gid index that supports +// RoCE V2 will be chosen(fails if more then one IP is configured) +// Args: +// context - device context +// port_num - port number +// Returns: +// GID index to use +uint8_t set_gid(uint8_t port_num, ibv_context* context) { + ibv_port_attr port_attr; + string gid_str; + int rc, i, gids_num = 0, v2_ip_num = 0; + union ibv_gid gid; + uint8_t gid_index = 0; + + rc = ibv_query_port(context, port_num, &port_attr); + CHECK(!rc) << "Failed to query the port" << port_num; + + for (i = 0; i < port_attr.gid_tbl_len; i++) { + rc = ibv_query_gid(context, port_num, i, &gid); + CHECK(!rc) << "Failed to query gid to port " << (int)port_num << " index " + << i; + if (gid.global.interface_id) { + gids_num++; + if (gid.global.subnet_prefix == 0 && + is_gid_type_roce_v2(context, port_num, i)) { + if (v2_ip_num == 0) { + // can be overwritten by RDMA_GID_INDEX later + gid_index = i; + } + v2_ip_num++; + } + } + } + switch (port_attr.link_layer) { + case(IBV_LINK_LAYER_ETHERNET) : + gid_str = get_env_var("RDMA_GID_INDEX"); + if (!gid_str.empty()) { + gid_index = stoi(gid_str); + CHECK(gid_index < gids_num) + << "RDMA_GID_INDEX should be less than GIDs amount" << gids_num; + } else { + CHECK(v2_ip_num <= 1) + << "More than one IP is available, please specify GID_INDEX"; + } + break; + case(IBV_LINK_LAYER_INFINIBAND) : // no need in GID index + break; + default: + LOG(INFO) << "Unknown port link layer. Currently supporting Ethernet and " + "InfiniBand only. "; + } + if (!is_gid_type_roce_v2(context, port_num, gid_index)) { + LOG(INFO) << "RoCE v2 is not configured for GID_INDEX " << (int)gid_index; + } + return gid_index; +} + +// set the default or environment value to the configuration parameter. +// Args: +// default_val- the default value for this parameter +// env_param- the environment parameter's name +// Returns: +// 32-bit value +uint32_t set_param(uint32_t default_val, const char* env_param) { + uint32_t val = default_val; + string val_s; + + val_s = get_env_var(env_param); + + if (!val_s.empty()) { + val = stoi(val_s); + } + return val; +} + +enum ibv_mtu set_mtu(uint8_t port_num, ibv_context* context) { + ibv_port_attr port_attr; + enum ibv_mtu mtu; + string mtu_s; + int rc, mtu_i; + + rc = ibv_query_port(context, port_num, &port_attr); + CHECK(!rc) << "Failed to query the port" << port_num; + + mtu_s = get_env_var("RDMA_MTU"); + + if (!mtu_s.empty()) { + mtu_i = stoi(mtu_s); + switch (mtu_i) { + case 256: + mtu = IBV_MTU_256; + break; + case 512: + mtu = IBV_MTU_512; + break; + case 1024: + mtu = IBV_MTU_1024; + break; + case 2048: + mtu = IBV_MTU_2048; + break; + case 4096: + mtu = IBV_MTU_4096; + break; + default: + CHECK(0) << "Error: MTU input value must be one of the following: 256, " + "512, 1024, 2048, 4096. MTU " << mtu << " is invalid\n"; + break; + } + CHECK(mtu < port_attr.active_mtu) + << "MTU configuration for the QPs is larger than active MTU"; + } else { + mtu = port_attr.active_mtu; + } + return mtu; +} + +RdmaParams params_init(ibv_context* context) { + RdmaParams params; + + params.port_num = set_port(context); + params.sgid_index = set_gid(params.port_num, context); + params.pkey_index = (uint8_t)set_param(PKEY_DEFAULT, "RDMA_PKEY"); + params.queue_depth = set_param(QUEUE_DEPTH_DEFAULT, "RDMA_QUEUE_DEPTH"); + params.timeout = (uint8_t)set_param(TIMEOUT_DEFAULT, "RDMA_TIMEOUT"); + params.retry_cnt = (uint8_t)set_param(RETRY_CNT_DEFAULT, "RDMA_RETRY_CNT"); + params.sl = (uint8_t)set_param(SL_DEFAULT, "RDMA_SL"); + CHECK(params.sl <= 7) << "SL value is " << (int)params.sl + << ". Valid values are 0-7."; + params.mtu = set_mtu(params.port_num, context); + params.traffic_class = set_param(TRAFFIC_CLASS, "RDMA_TRAFFIC_CLASS"); + return params; } ibv_pd* alloc_protection_domain(ibv_context* context) { @@ -85,7 +409,8 @@ ibv_pd* alloc_protection_domain(ibv_context* context) { } RdmaAdapter::RdmaAdapter(const WorkerEnv* worker_env) - : context_(open_default_device()), + : context_(open_device(set_device())), + params_(params_init(context_)), pd_(alloc_protection_domain(context_)), worker_env_(worker_env) { event_channel_ = ibv_create_comp_channel(context_); @@ -128,9 +453,9 @@ void RdmaAdapter::Process_CQ() { CHECK_GE(ne, 0); for (int i = 0; i < ne; ++i) { CHECK(wc_[i].status == IBV_WC_SUCCESS) - << "Failed status \n" - << ibv_wc_status_str(wc_[i].status) << " " << wc_[i].status << " " - << static_cast(wc_[i].wr_id) << " " << wc_[i].vendor_err; + << "Failed status \n" << ibv_wc_status_str(wc_[i].status) << " " + << wc_[i].status << " " << static_cast(wc_[i].wr_id) << " " + << wc_[i].vendor_err; if (wc_[i].opcode == IBV_WC_RECV_RDMA_WITH_IMM) { RdmaChannel* rc = reinterpret_cast(wc_[i].wr_id); // put back a recv wr. @@ -242,8 +567,8 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, memset(&attr, 0, sizeof(ibv_qp_init_attr)); attr.send_cq = adapter_->cq_; attr.recv_cq = adapter_->cq_; - attr.cap.max_send_wr = RdmaAdapter::MAX_CONCURRENT_WRITES; - attr.cap.max_recv_wr = RdmaAdapter::MAX_CONCURRENT_WRITES; + attr.cap.max_send_wr = adapter_->params_.queue_depth; + attr.cap.max_recv_wr = adapter_->params_.queue_depth; attr.cap.max_send_sge = 1; attr.cap.max_recv_sge = 1; attr.qp_type = IBV_QPT_RC; @@ -257,8 +582,8 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, struct ibv_qp_attr attr; memset(&attr, 0, sizeof(ibv_qp_attr)); attr.qp_state = IBV_QPS_INIT; - attr.pkey_index = 0; - attr.port_num = 1; + attr.pkey_index = adapter_->params_.pkey_index; + attr.port_num = adapter_->params_.port_num; attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE; int mask = @@ -269,13 +594,15 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, // Local address { struct ibv_port_attr attr; - CHECK(!ibv_query_port(adapter_->context_, (uint8_t)1, &attr)) + CHECK( + !ibv_query_port(adapter_->context_, adapter_->params_.port_num, &attr)) << "Query port"; self_.lid = attr.lid; self_.qpn = qp_->qp_num; self_.psn = static_cast(random::New64()) & 0xffffff; union ibv_gid gid; - CHECK(!ibv_query_gid(adapter_->context_, (uint8_t)1, 0, &gid)) + CHECK(!ibv_query_gid(adapter_->context_, adapter_->params_.port_num, + adapter_->params_.sgid_index, &gid)) << "Query gid"; self_.snp = gid.global.subnet_prefix; self_.iid = gid.global.interface_id; @@ -284,7 +611,7 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, // create message and ack buffers, then initialize the tables. { const string buffer_names[] = {"tx_message_buffer", "rx_message_buffer", - "tx_ack_buffer", "rx_ack_buffer"}; + "tx_ack_buffer", "rx_ack_buffer"}; tx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[0]); rx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[1]); tx_ack_buffer_ = new RdmaAckBuffer(this, buffer_names[2]); @@ -345,7 +672,7 @@ void RdmaChannel::SetRemoteAddress(const RdmaAddress& ra, bool override) { void RdmaChannel::Recv() { struct ibv_recv_wr wr; memset(&wr, 0, sizeof(wr)); - wr.wr_id = (uint64_t)this; + wr.wr_id = (uint64_t) this; struct ibv_recv_wr* bad_wr; CHECK(!ibv_post_recv(qp_, &wr, &bad_wr)) << "Failed to post recv"; } @@ -479,11 +806,9 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) { struct ibv_qp_attr attr; memset(&attr, 0, sizeof(ibv_qp_attr)); attr.qp_state = IBV_QPS_RTR; - struct ibv_port_attr port_attr; - CHECK(!ibv_query_port(adapter_->context_, (uint8_t)1, &port_attr)) - << "Query port failed"; + // This assumes both QP's ports are configured with the same MTU - attr.path_mtu = port_attr.active_mtu; + attr.path_mtu = adapter_->params_.mtu; attr.dest_qp_num = remoteAddr.qpn; attr.rq_psn = remoteAddr.psn; attr.max_dest_rd_atomic = 1; @@ -494,30 +819,32 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) { attr.ah_attr.grh.flow_label = 0; attr.ah_attr.grh.hop_limit = 255; attr.ah_attr.dlid = remoteAddr.lid; - attr.ah_attr.sl = 0; + attr.ah_attr.sl = adapter_->params_.sl; attr.ah_attr.src_path_bits = 0; - attr.ah_attr.port_num = 1; + attr.ah_attr.port_num = adapter_->params_.port_num; + attr.ah_attr.grh.sgid_index = adapter_->params_.sgid_index; + attr.ah_attr.grh.traffic_class = adapter_->params_.traffic_class; int r; - CHECK(!(r = ibv_modify_qp(qp_, &attr, - IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | - IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | - IBV_QP_MAX_DEST_RD_ATOMIC | - IBV_QP_MIN_RNR_TIMER))) + CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_AV | + IBV_QP_PATH_MTU | + IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | + IBV_QP_MAX_DEST_RD_ATOMIC | + IBV_QP_MIN_RNR_TIMER))) << "QP to Ready to Receive " << r; memset(&attr, 0, sizeof(ibv_qp_attr)); attr.qp_state = IBV_QPS_RTS; attr.sq_psn = self_.psn; - attr.timeout = 14; - attr.retry_cnt = 7; + attr.timeout = adapter_->params_.timeout; + attr.retry_cnt = adapter_->params_.retry_cnt; attr.rnr_retry = 7; /* infinite */ attr.max_rd_atomic = 1; - CHECK(!(r = ibv_modify_qp(qp_, &attr, - IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | - IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | - IBV_QP_MAX_QP_RD_ATOMIC))) + CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_TIMEOUT | + IBV_QP_RETRY_CNT | + IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | + IBV_QP_MAX_QP_RD_ATOMIC))) << "QP to Ready to Send " << r; connected_ = true; @@ -604,7 +931,7 @@ void RdmaBuffer::Write(uint32_t imm_data, size_t buffer_size) { struct ibv_send_wr wr; memset(&wr, 0, sizeof(wr)); - wr.wr_id = (uint64_t)this; + wr.wr_id = (uint64_t) this; wr.sg_list = &list; wr.num_sge = 1; wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM; @@ -699,9 +1026,9 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback( TensorProto proto; if (src_dev->tensorflow_gpu_device_info() && (!send_args.alloc_attrs.on_host())) { - CHECK(send_args.device_context) - << "send dev name: " << src_dev->name() - << " gpu_info: " << src_dev->tensorflow_gpu_device_info(); + CHECK(send_args.device_context) << "send dev name: " << src_dev->name() + << " gpu_info: " + << src_dev->tensorflow_gpu_device_info(); if (can_memcpy) { AllocatorAttributes host_alloc_attrs; @@ -727,8 +1054,8 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback( // aync instead GPUUtil::SetProtoFromGPU( in, src_dev, send_args.device_context, &proto, is_dead, - [this, proto, buffer_size, key, in, step_id, key_with_step_id, - is_dead, send_args, recv_args](const Status& s) mutable { + [this, proto, buffer_size, key, in, step_id, key_with_step_id, + is_dead, send_args, recv_args](const Status& s) mutable { CHECK(s.ok()) << "copy proto from gpu sync"; auto tensor_bytes = proto.ByteSize(); buffer_size += tensor_bytes; diff --git a/tensorflow/contrib/verbs/rdma.h b/tensorflow/contrib/verbs/rdma.h index e1e07db776..52d92a7c5b 100644 --- a/tensorflow/contrib/verbs/rdma.h +++ b/tensorflow/contrib/verbs/rdma.h @@ -36,7 +36,24 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" namespace tensorflow { - +#define PKEY_DEFAULT 0 +#define QUEUE_DEPTH_DEFAULT 1024 +#define TIMEOUT_DEFAULT 14 +#define RETRY_CNT_DEFAULT 7 +#define SL_DEFAULT 0 +#define TRAFFIC_CLASS 0 + +struct RdmaParams { + uint8_t port_num; + uint8_t sgid_index; + uint8_t pkey_index; + uint32_t queue_depth; + uint8_t timeout; + uint8_t retry_cnt; + uint8_t sl; + enum ibv_mtu mtu; + uint8_t traffic_class; +}; // structure to save the address of remote channels. struct RdmaAddress { uint32_t lid; @@ -50,9 +67,20 @@ struct RemoteMR { uint64_t remote_addr; uint32_t rkey; }; -enum BufferStatus { none, idle, busy }; -enum Location { local, remote }; -enum BufferType { ACK, MESSAGE, TENSOR }; +enum BufferStatus { + none, + idle, + busy +}; +enum Location { + local, + remote +}; +enum BufferType { + ACK, + MESSAGE, + TENSOR +}; enum RdmaMessageType { RDMA_MESSAGE_ACK, RDMA_MESSAGE_BUFFER_IDLE, @@ -84,6 +112,8 @@ class RdmaAdapter { protected: static const int MAX_CONCURRENT_WRITES = 1000; ibv_context* context_; + // RDMA configuration parameters + RdmaParams params_; // ibverbs protection domain ibv_pd* pd_; // Completion event channel, to wait for work completions @@ -183,7 +213,7 @@ class RdmaBuffer { } void FreeBuffer(); void EnqueueItem(string Item); - virtual void SendNextItem(){}; + virtual void SendNextItem() {}; void CreateCPUBuffer(size_t size, bool lock = true); void SetRemoteMR(RemoteMR rmi, bool override); uint32_t LookupBufferIndex(const string& buffer_name) { diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index d71f314e11..30ff4ef358 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2710,6 +2710,7 @@ tf_cc_test_mkl( srcs = [ "graph/mkl_layout_pass_test.cc", "graph/mkl_tfconversion_pass_test.cc", + "util/mkl_util_test.cc", ], linkstatic = 1, deps = [ diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index 53e80b1ee3..63b74e8dbf 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -81,7 +81,7 @@ class MklCPUAllocator : public Allocator { } #if defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE) if (user_val > max_mem_bytes) { - LOG(WARNING) << "The user specifed a memory limit " << kMaxLimitStr + LOG(WARNING) << "The user specified a memory limit " << kMaxLimitStr << "=" << user_val << " greater than available physical memory: " << max_mem_bytes diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.h b/tensorflow/core/common_runtime/sycl/sycl_device.h index 9caa076c72..cc272d156e 100644 --- a/tensorflow/core/common_runtime/sycl/sycl_device.h +++ b/tensorflow/core/common_runtime/sycl/sycl_device.h @@ -46,8 +46,8 @@ class GSYCLInterface { if (!found_device) { // Currently Intel GPU is not supported - LOG(WARNING) << "No OpenCL GPU found that is supported by ComputeCpp, " - "trying OpenCL CPU"; + LOG(WARNING) << "No OpenCL GPU found that is supported by " + << "ComputeCpp/triSYCL, trying OpenCL CPU"; } for (const auto& device : device_list) { @@ -58,10 +58,24 @@ class GSYCLInterface { } } + if (!found_device) { + LOG(WARNING) << "No OpenCL CPU found that is supported by " + << "ComputeCpp/triSYCL, checking for host sycl device"; + } + + for (const auto& device : device_list) { + // triSYCL only supports the host device for now + if (device.is_host()) { + LOG(WARNING) << "Found SYCL host device"; + AddDevice(device); + found_device = true; + } + } + if (!found_device) { // Currently Intel GPU is not supported - LOG(FATAL) - << "No OpenCL GPU nor CPU found that is supported by ComputeCpp"; + LOG(FATAL) << "No SYCL host and no OpenCL GPU nor CPU" + << " supported by ComputeCPP/triSYCL was found"; } else { LOG(INFO) << "Found following OpenCL devices:"; for (int i = 0; i < device_list.size(); i++) { diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc index 87c41186d5..fd1b5d33b9 100644 --- a/tensorflow/core/graph/graph.cc +++ b/tensorflow/core/graph/graph.cc @@ -453,6 +453,21 @@ const Edge* Graph::AddControlEdge(Node* source, Node* dest, return AddEdge(source, kControlSlot, dest, kControlSlot); } +void Graph::RemoveControlEdge(const Edge* e) { + if (!e->src_->IsSource() && !e->dst_->IsSink()) { + e->dst_->MaybeCopyOnWrite(); + std::string e_src_name = strings::StrCat("^", e->src_->name()); + auto* inputs = e->dst_->props_->node_def.mutable_input(); + for (auto it = inputs->begin(); it != inputs->end(); ++it) { + if (*it == e_src_name) { + inputs->erase(it); + break; + } + } + } + RemoveEdge(e); +} + Status Graph::UpdateEdge(Node* new_src, int new_src_index, Node* dst, int dst_index) { TF_RETURN_IF_ERROR(IsValidOutputTensor(new_src, new_src_index)); diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index c5dde722fa..d0dba6e1f0 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -451,6 +451,11 @@ class Graph { // REQUIRES: The edge must exist. void RemoveEdge(const Edge* edge); + // Removes control edge `edge` from the graph. Note that this also updates + // the corresponding NodeDef to reflect the change. + // REQUIRES: The control edge must exist. + void RemoveControlEdge(const Edge* e); + // Updates the input to a node. The existing edge to `dst` is removed and an // edge from `new_src` to `dst` is created. The NodeDef associated with `dst` // is also updated. diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc index b9e3cba035..1924c05d3d 100644 --- a/tensorflow/core/graph/graph_partition.cc +++ b/tensorflow/core/graph/graph_partition.cc @@ -117,7 +117,7 @@ DataType EdgeType(const Edge* e) { } } -// Return true iff we need to add a same device send/recv for 'edge'. +// Return true iff we need to add the same device send/recv for 'edge'. bool NeedSameDeviceSendRecv(const Edge* edge, const GraphInfo& info) { if (edge->IsControlEdge()) { return false; @@ -1116,7 +1116,7 @@ Status Partition(const PartitionOptions& opts, Graph* g, // before the data is available. AddInput(real_recv, send->name(), Graph::kControlSlot); } else if (control_flow_edge != nullptr) { - // Redirect control edge to the real recv since this is not a same + // Redirect control edge to the real recv since this is not the same // device send/recv. --num_control_flow_edges; AddInput(real_recv, control_flow_edge->src()->name(), diff --git a/tensorflow/core/graph/graph_test.cc b/tensorflow/core/graph/graph_test.cc index 7686cef219..2aa1b31e15 100644 --- a/tensorflow/core/graph/graph_test.cc +++ b/tensorflow/core/graph/graph_test.cc @@ -118,6 +118,25 @@ class GraphTest : public ::testing::Test { LOG(FATAL) << name; } + bool ControlEdgeExistsInGraphOrNodeDef(const Node* src, + const Node* dst) { + for (const Edge *e : dst->in_edges()) { + if (e->IsControlEdge() && + e->src() == src && + e->src_output() == Graph::kControlSlot && + e->dst_input() == Graph::kControlSlot) { + return true; + } + } + std::string control_edge_name = strings::StrCat("^", src->name()); + for (int i = 0; i < dst->def().input_size(); ++i) { + if (dst->def().input(i) == control_edge_name) { + return true; + } + } + return false; + } + Graph graph_; private: @@ -458,8 +477,8 @@ TEST_F(GraphTest, AddControlEdge) { EXPECT_TRUE(edge == nullptr); EXPECT_EQ(b->def().input_size(), 2); - // Can add redundant control edge with create_duplicate. - edge = graph_.AddControlEdge(a, b, /*create_duplicate=*/true); + // Can add redundant control edge with allow_duplicates. + edge = graph_.AddControlEdge(a, b, /*allow_duplicates=*/true); EXPECT_TRUE(edge != nullptr); // create_duplicate causes the NodeDef not to be updated. ASSERT_EQ(b->def().input_size(), 2); @@ -477,6 +496,47 @@ TEST_F(GraphTest, AddControlEdge) { EXPECT_EQ(b->def().input_size(), 2); } +TEST_F(GraphTest, RemoveControlEdge) { + FromGraphDef( + "node { name: 'A' op: 'OneOutput' }" + "node { name: 'B' op: 'OneInputTwoOutputs' input: [ 'A:0' ] }" + "node { name: 'C' op: 'NoOp' } "); + Node* a = FindNode("A"); + Node* b = FindNode("B"); + Node* c = FindNode("C"); + + // Add a control edge. + const Edge* edge_1 = graph_.AddControlEdge(c, a); + const Edge* edge_2 = graph_.AddControlEdge(a, b); + ASSERT_TRUE(edge_1 != nullptr); + ASSERT_TRUE(edge_2 != nullptr); + + ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(c, a)); + ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(a, b)); + + graph_.RemoveControlEdge(edge_1); + ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a)); + ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(a, b)); + + graph_.RemoveControlEdge(edge_2); + ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a)); + ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(a, b)); + + // Test removing a duplicate control edge. + // Note that unless allow_duplicates is true, the duplicate edge + // will not be added. That's why we expect edge_4 to be a null + // pointer. We are not testing with allow_duplicates set to true, + // as that is a highly unlikely use case that does not make much + // sense. + const Edge* edge_3 = graph_.AddControlEdge(c, a); + const Edge* edge_4 = graph_.AddControlEdge(c, a); + ASSERT_TRUE(edge_3 != nullptr); + ASSERT_TRUE(edge_4 == nullptr); + + graph_.RemoveControlEdge(edge_3); + ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a)); +} + TEST_F(GraphTest, UpdateEdge) { // Build a little graph Node* a = FromNodeDef("A", "OneOutput", 0); diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h index cb32d64334..880e4e712e 100644 --- a/tensorflow/core/graph/mkl_graph_util.h +++ b/tensorflow/core/graph/mkl_graph_util.h @@ -21,107 +21,108 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" namespace tensorflow { -// Since our ops are going to produce and also consume N addition tensors -// (Mkl) for N Tensorflow tensors, we can have following different -// orderings among these 2N tensors. -// -// E.g., for Tensorflow tensors A, B, and C, our ops will produce and -// consume A_m, B_m, and C_m additionally. -// -// INTERLEAVED: in this case 2N tensors are interleaved. So for above -// example, the ordering looks like: A, A_m, B, B_m, C, C_m. -// -// CONTIGUOUS: in thi case N Tensorflow tensors are contiguous followed -// by N Mkl tensors. So for above example, the ordering looks -// like: A, B, C, A_m, B_m, C_m -// -// Following APIs map index of original Tensorflow tensors to their -// appropriate position based on selected ordering. For contiguous ordering, -// we need to know the total number of tensors (parameter total). -// -typedef enum { TENSORS_INTERLEAVED, TENSORS_CONTIGUOUS } MklTfTensorOrdering; -// NOTE: Currently, we use contiguous ordering. If you change this, then you -// would need to change Mkl op definitions in nn_ops.cc. -static MklTfTensorOrdering kTensorOrdering = TENSORS_CONTIGUOUS; + // Since our ops are going to produce and also consume N addition tensors + // (Mkl) for N Tensorflow tensors, we can have following different + // orderings among these 2N tensors. + // + // E.g., for Tensorflow tensors A, B, and C, our ops will produce and + // consume A_m, B_m, and C_m additionally. + // + // INTERLEAVED: in this case 2N tensors are interleaved. So for above + // example, the ordering looks like: A, A_m, B, B_m, C, C_m. + // + // CONTIGUOUS: in thi case N Tensorflow tensors are contiguous followed + // by N Mkl tensors. So for above example, the ordering looks + // like: A, B, C, A_m, B_m, C_m + // + // Following APIs map index of original Tensorflow tensors to their + // appropriate position based on selected ordering. For contiguous ordering, + // we need to know the total number of tensors (parameter total). + // + typedef enum { TENSORS_INTERLEAVED, TENSORS_CONTIGUOUS } MklTfTensorOrdering; + // NOTE: Currently, we use contiguous ordering. If you change this, then you + // would need to change Mkl op definitions in nn_ops.cc. + static MklTfTensorOrdering kTensorOrdering = TENSORS_CONTIGUOUS; -// Get index of MetaData tensor from index 'n' of Data tensor. -inline int DataIndexToMetaDataIndex(int n, int total_tensors) { - if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { - // For interleaved ordering, Mkl tensor follows immediately after - // Tensorflow tensor. - return n + 1; - } else { - CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); - // For contiguous ordering, Mkl tensor is n+total_tensors / 2 away. - return n + total_tensors / 2; + // Get index of MetaData tensor from index 'n' of Data tensor. + inline int DataIndexToMetaDataIndex(int n, int total_tensors) { + if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { + // For interleaved ordering, Mkl tensor follows immediately after + // Tensorflow tensor. + return n + 1; + } else { + CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); + // For contiguous ordering, Mkl tensor is n+total_tensors / 2 away. + return n + total_tensors / 2; + } } -} -int inline GetTensorDataIndex(int n, int total_tensors) { - if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { - return 2 * n; // index corresponding to nth input/output tensor - } else { - CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); - return n; - } -} + int inline GetTensorDataIndex(int n, int total_tensors) { + if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { + return 2 * n; // index corresponding to nth input/output tensor + } else { + CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); + return n; + } + } -int inline GetTensorMetaDataIndex(int n, int total_tensors) { - // Get index for TensorData first and then use mapping function - // to get TensorMetaData index from TensorData index. - int tidx = GetTensorDataIndex(n, total_tensors); - return DataIndexToMetaDataIndex(tidx, total_tensors); -} + int inline GetTensorMetaDataIndex(int n, int total_tensors) { + // Get index for TensorData first and then use mapping function + // to get TensorMetaData index from TensorData index. + int tidx = GetTensorDataIndex(n, total_tensors); + return DataIndexToMetaDataIndex(tidx, total_tensors); + } namespace mkl_op_registry { -static const char* kMklOpLabel = "MklOp"; -static const char* kMklOpLabelPattern = "label='MklOp'"; - -// Get the name of Mkl op from original TensorFlow op -// We prefix 'Mkl' to the original op to get Mkl op. -inline string GetMklOpName(const string& name) { - // Prefix that we add to Tensorflow op name to construct Mkl op name. - const char* const kMklOpPrefix = "_Mkl"; - return string(kMklOpPrefix) + name; -} + static const char* kMklOpLabel = "MklOp"; + static const char* kMklOpLabelPattern = "label='MklOp'"; -// Check whether opname with type T is registered as MKL-compliant. -// -// @input: name of the op -// @input: T datatype to be used for checking op -// @return: true if opname is registered as Mkl op; false otherwise -static inline bool IsMklOp(const std::string& op_name, DataType T) { - string kernel = KernelsRegisteredForOp(op_name); - bool result = - kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT); - if (result) { - VLOG(1) << "mkl_op_registry::" << op_name << " is " << kMklOpLabel; + // Get the name of Mkl op from original TensorFlow op + // We prefix 'Mkl' to the original op to get Mkl op. + inline string GetMklOpName(const string& name) { + // Prefix that we add to Tensorflow op name to construct Mkl op name. + const char* const kMklOpPrefix = "_Mkl"; + return string(kMklOpPrefix) + name; } - return result; -} -// Check whether opname with type T is registered as MKL-compliant and -// is element-wise. -// -// @input: name of the op -// @input: T datatype to be used for checking op -// @return: true if opname is registered as element-wise Mkl op; -// false otherwise -static inline bool IsMklElementWiseOp(const std::string& op_name, DataType T) { - if (!IsMklOp(op_name, T)) { - return false; + // Check whether opname with type T is registered as MKL-compliant. + // + // @input: name of the op + // @input: T datatype to be used for checking op + // @return: true if opname is registered as Mkl op; false otherwise + static inline bool IsMklOp(const std::string& op_name, DataType T) { + string kernel = KernelsRegisteredForOp(op_name); + bool result = + kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT); + if (result) { + VLOG(1) << "mkl_op_registry::" << op_name << " is " << kMklOpLabel; + } + return result; } - bool result = (0 == op_name.compare(GetMklOpName("Add")) || - 0 == op_name.compare(GetMklOpName("Sub")) || - 0 == op_name.compare(GetMklOpName("Mul")) || - 0 == op_name.compare(GetMklOpName("Maximum")) || - 0 == op_name.compare(GetMklOpName("SquaredDifference"))); + // Check whether opname with type T is registered as MKL-compliant and + // is element-wise. + // + // @input: name of the op + // @input: T datatype to be used for checking op + // @return: true if opname is registered as element-wise Mkl op; + // false otherwise + static inline bool IsMklElementWiseOp(const std::string& op_name, + DataType T) { + if (!IsMklOp(op_name, T)) { + return false; + } - VLOG(1) << "mkl_op_registry::" << op_name - << " is elementwise MKL op: " << result; - return result; -} + bool result = (0 == op_name.compare(GetMklOpName("Add")) || + 0 == op_name.compare(GetMklOpName("Sub")) || + 0 == op_name.compare(GetMklOpName("Mul")) || + 0 == op_name.compare(GetMklOpName("Maximum")) || + 0 == op_name.compare(GetMklOpName("SquaredDifference"))); + + VLOG(1) << "mkl_op_registry::" << op_name + << " is elementwise MKL op: " << result; + return result; + } } // namespace mkl_op_registry } // namespace tensorflow #endif // INTEL_MKL diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index f4c9073dee..912075aa28 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -37,8 +37,8 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/util/tensor_format.h" -#include "tensorflow/core/graph/mkl_graph_util.h" #include "tensorflow/core/graph/mkl_layout_pass.h" +#include "tensorflow/core/graph/mkl_graph_util.h" namespace tensorflow { diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc index fe4588389e..599bb88f01 100644 --- a/tensorflow/core/graph/mkl_tfconversion_pass.cc +++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc @@ -33,8 +33,8 @@ limitations under the License. #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/graph/mkl_graph_util.h" #include "tensorflow/core/graph/mkl_tfconversion_pass.h" +#include "tensorflow/core/graph/mkl_graph_util.h" namespace tensorflow { @@ -68,7 +68,7 @@ namespace tensorflow { // take place before we hit the op. For this, we add a new op before each // element-wise MKL op to deal with the inputs, called _MklInputConversion. // This pass has been enhanced to add this capability. -// +// // The _MklInputConversion op will check the inputs to the elementwise op and // make sure that either both are in MKL format or both are in TF format, // depending on their initial state and whether broadcast is needed or not. diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h index ee279b7e0a..239b5ac244 100644 --- a/tensorflow/core/grappler/costs/graph_properties.h +++ b/tensorflow/core/grappler/costs/graph_properties.h @@ -58,6 +58,12 @@ class GraphProperties { const std::vector& GetOutputProperties( const string& node_name) const; + static void FillTensorPropertiesFromContext( + const shape_inference::ShapeHandle&, const DataType&, + shape_inference::InferenceContext*, + std::unordered_map* dim_ids, + OpInfo::TensorProperties*); + private: // Inputs GrapplerItem item_; diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index 7fd1876371..9ab889beb5 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -62,7 +62,7 @@ const std::set& NodeMap::GetOutputs(const string& node_name) const { void NodeMap::AddNode(const string& name, NodeDef* node) { auto ret = nodes_.insert(std::make_pair(name, node)); CHECK(ret.second) << "Pair (" << name << "," << node - << ") is not inserted because a same key already exists."; + << ") is not inserted because the same key already exists."; } void NodeMap::AddOutput(const string& node_name, const string& output_name) { diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index d7b457eab7..f1cb9a1860 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -929,6 +929,25 @@ tf_cc_test( ], ) +tf_cuda_cc_test( + name = "bincount_op_test", + size = "small", + srcs = ["bincount_op_test.cc"], + deps = [ + ":bincount_op", + ":ops_testutil", + ":ops_util", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_cuda_cc_test( name = "constant_op_test", size = "small", @@ -1617,7 +1636,10 @@ DYNAMIC_DEPS = [ tf_kernel_library( name = "dynamic_partition_op", prefix = "dynamic_partition_op", - deps = DYNAMIC_DEPS, + deps = DYNAMIC_DEPS + [ + ":fill_functor", + ":gather_functor", + ] + if_cuda(["@cub_archive//:cub"]), ) tf_kernel_library( @@ -1687,7 +1709,7 @@ tf_kernel_library( ], ) -tf_cc_tests( +tf_cuda_cc_tests( name = "dynamic_op_test", size = "small", srcs = [ @@ -2572,8 +2594,9 @@ tf_kernel_library( tf_kernel_library( name = "bucketize_op", + gpu_srcs = ["cuda_device_array.h"], prefix = "bucketize_op", - deps = MATH_DEPS, + deps = ARRAY_DEPS, ) tf_kernel_library( @@ -3174,7 +3197,7 @@ tf_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//third_party/eigen3", - ], + ] + if_cuda(["@cub_archive//:cub"]), ) tf_kernel_library( diff --git a/tensorflow/core/kernels/avgpooling_op.cc b/tensorflow/core/kernels/avgpooling_op.cc index af629d0de8..f918023693 100644 --- a/tensorflow/core/kernels/avgpooling_op.cc +++ b/tensorflow/core/kernels/avgpooling_op.cc @@ -153,7 +153,8 @@ class AvgPoolingOp : public UnaryOp { if (data_format_ == FORMAT_NCHW) { DnnPoolingOp::Compute( context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_, - stride_, padding_, data_format_, tensor_in, output_shape); + stride_, padding_, data_format_, tensor_in, output_shape, + /*propagate_nans=*/false); } else { Tensor* output = nullptr; OP_REQUIRES_OK(context, @@ -408,7 +409,7 @@ class AvgPoolingGradOp : public OpKernel { DnnPoolingGradOp::Compute( context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_, stride_, padding_, data_format_, nullptr, nullptr, out_backprop, - output_shape); + output_shape, /*propagate_nans=*/false); } private: @@ -532,7 +533,7 @@ class AvgPoolingGradOpCustomGPUKernel : public OpKernel { DnnPoolingGradOp::Compute( context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_, stride_, padding_, data_format_, nullptr, nullptr, out_backprop, - output_shape); + output_shape, /*propagate_nans=*/false); } } diff --git a/tensorflow/core/kernels/bincount_op.cc b/tensorflow/core/kernels/bincount_op.cc index 1cd5943ef3..766d63e3be 100644 --- a/tensorflow/core/kernels/bincount_op.cc +++ b/tensorflow/core/kernels/bincount_op.cc @@ -17,6 +17,7 @@ limitations under the License. #define EIGEN_USE_THREADS +#include "tensorflow/core/kernels/bincount_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/types.h" @@ -27,46 +28,37 @@ namespace tensorflow { using thread::ThreadPool; -template -class BincountOp : public OpKernel { - public: - explicit BincountOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; - void Compute(OpKernelContext* ctx) override { - const Tensor& arr_t = ctx->input(0); - const Tensor& size_tensor = ctx->input(1); - const Tensor& weights_t = ctx->input(2); - int32 size = size_tensor.scalar()(); - OP_REQUIRES( - ctx, size >= 0, - errors::InvalidArgument("size (", size, ") must be non-negative")); - const bool has_weights = weights_t.NumElements() > 0; - OP_REQUIRES(ctx, !(has_weights && arr_t.shape() != weights_t.shape()), - errors::InvalidArgument( - "If weights are passed, they must have the same shape (" + - weights_t.shape().DebugString() + ") as arr (" + - arr_t.shape().DebugString() + ")")); - const auto arr = arr_t.flat(); - const auto weights = weights_t.flat(); +namespace functor { + +template +struct BincountFunctor { + static Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& arr, + const typename TTypes::ConstTensor& weights, + typename TTypes::Tensor& output) { + int size = output.size(); Tensor all_nonneg_t; - OP_REQUIRES_OK(ctx, - ctx->allocate_temp(DT_BOOL, TensorShape({}), &all_nonneg_t, - AllocatorAttributes())); - all_nonneg_t.scalar().device(ctx->eigen_cpu_device()) = + TF_RETURN_IF_ERROR(context->allocate_temp( + DT_BOOL, TensorShape({}), &all_nonneg_t, AllocatorAttributes())); + all_nonneg_t.scalar().device(context->eigen_cpu_device()) = (arr >= 0).all(); - OP_REQUIRES(ctx, all_nonneg_t.scalar()(), - errors::InvalidArgument("Input arr must be non-negative!")); + if (!all_nonneg_t.scalar()()) { + return errors::InvalidArgument("Input arr must be non-negative!"); + } // Allocate partial output bin sums for each worker thread. Worker ids in // ParallelForWithWorkerId range from 0 to NumThreads() inclusive. ThreadPool* thread_pool = - ctx->device()->tensorflow_cpu_worker_threads()->workers; + context->device()->tensorflow_cpu_worker_threads()->workers; const int64 num_threads = thread_pool->NumThreads() + 1; Tensor partial_bins_t; - OP_REQUIRES_OK(ctx, ctx->allocate_temp(weights_t.dtype(), - TensorShape({num_threads, size}), - &partial_bins_t)); + TF_RETURN_IF_ERROR(context->allocate_temp(DataTypeToEnum::value, + TensorShape({num_threads, size}), + &partial_bins_t)); auto partial_bins = partial_bins_t.matrix(); partial_bins.setZero(); thread_pool->ParallelForWithWorkerId( @@ -75,7 +67,7 @@ class BincountOp : public OpKernel { for (int64 i = start_ind; i < limit_ind; i++) { int32 value = arr(i); if (value < size) { - if (has_weights) { + if (weights.size()) { partial_bins(worker_id, value) += weights(i); } else { // Complex numbers don't support "++". @@ -84,25 +76,62 @@ class BincountOp : public OpKernel { } } }); - TensorShape output_shape({size}); - Tensor* output_t; - OP_REQUIRES_OK(ctx, ctx->allocate_output(0, output_shape, &output_t)); + // Sum the partial bins along the 0th axis. Eigen::array reduce_dims({0}); - output_t->flat().device(ctx->eigen_cpu_device()) = - partial_bins.sum(reduce_dims); + output.device(context->eigen_cpu_device()) = partial_bins.sum(reduce_dims); + return Status::OK(); + } +}; + +} // namespace functor + +template +class BincountOp : public OpKernel { + public: + explicit BincountOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + const Tensor& arr_t = ctx->input(0); + const Tensor& size_tensor = ctx->input(1); + const Tensor& weights_t = ctx->input(2); + + int32 size = size_tensor.scalar()(); + OP_REQUIRES(ctx, size >= 0, errors::InvalidArgument( + "size (", size, ") must be non-negative")); + + const auto arr = arr_t.flat(); + const auto weights = weights_t.flat(); + Tensor* output_t; + OP_REQUIRES_OK(ctx, + ctx->allocate_output(0, TensorShape({size}), &output_t)); + auto output = output_t->flat(); + OP_REQUIRES_OK(ctx, functor::BincountFunctor::Compute( + ctx, arr, weights, output)); } }; -#define REGISTER(TYPE) \ +#define REGISTER_KERNELS(type) \ REGISTER_KERNEL_BUILDER( \ - Name("Bincount").Device(DEVICE_CPU).TypeConstraint("T"), \ - BincountOp) + Name("Bincount").Device(DEVICE_CPU).TypeConstraint("T"), \ + BincountOp) + +TF_CALL_NUMBER_TYPES(REGISTER_KERNELS); +#undef REGISTER_KERNELS + +#if GOOGLE_CUDA + +#define REGISTER_KERNELS(type) \ + REGISTER_KERNEL_BUILDER(Name("Bincount") \ + .Device(DEVICE_GPU) \ + .HostMemory("size") \ + .TypeConstraint("T"), \ + BincountOp) -TF_CALL_NUMBER_TYPES(REGISTER); +TF_CALL_int32(REGISTER_KERNELS); +TF_CALL_float(REGISTER_KERNELS); +#undef REGISTER_KERNELS -// TODO(ringwalt): Add a GPU implementation. We probably want to take a -// different approach, e.g. threads in a warp each taking a pass over the same -// data, and each thread summing a single bin. +#endif // GOOGLE_CUDA } // end namespace tensorflow diff --git a/tensorflow/core/kernels/bincount_op.h b/tensorflow/core/kernels/bincount_op.h new file mode 100644 index 0000000000..0f8dd2b82a --- /dev/null +++ b/tensorflow/core/kernels/bincount_op.h @@ -0,0 +1,41 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_BINCOUNT_OP_H_ +#define TENSORFLOW_BINCOUNT_OP_H_ + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace tensorflow { + +namespace functor { + +template +struct BincountFunctor { + static Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& arr, + const typename TTypes::ConstTensor& weights, + typename TTypes::Tensor& output); +}; + +} // end namespace functor + +} // end namespace tensorflow + +#endif // TENSORFLOW_BINCOUNT_OP_H_ diff --git a/tensorflow/core/kernels/bincount_op_gpu.cu.cc b/tensorflow/core/kernels/bincount_op_gpu.cu.cc new file mode 100644 index 0000000000..ae9e26ffdf --- /dev/null +++ b/tensorflow/core/kernels/bincount_op_gpu.cu.cc @@ -0,0 +1,114 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include "tensorflow/core/kernels/bincount_op.h" +#include "external/cub_archive/cub/device/device_histogram.cuh" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" + +namespace tensorflow { + +typedef Eigen::GpuDevice GPUDevice; + +namespace functor { + +template +struct BincountFunctor { + static Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& arr, + const typename TTypes::ConstTensor& weights, + typename TTypes::Tensor& output) { + if (weights.size() != 0) { + return errors::InvalidArgument( + "Weights should not be passed as it should be " + "handled by unsorted_segment_sum"); + } + if (output.size() == 0) { + return Status::OK(); + } + // In case weight.size() == 0, use CUB + size_t temp_storage_bytes = 0; + const int32* d_samples = arr.data(); + T* d_histogram = output.data(); + int num_levels = output.size() + 1; + int32 lower_level = 0; + int32 upper_level = output.size(); + int num_samples = arr.size(); + const cudaStream_t& stream = GetCudaStream(context); + + // The first HistogramEven is to obtain the temp storage size required + // with d_temp_storage = NULL passed to the call. + auto err = cub::DeviceHistogram::HistogramEven( + /* d_temp_storage */ NULL, + /* temp_storage_bytes */ temp_storage_bytes, + /* d_samples */ d_samples, + /* d_histogram */ d_histogram, + /* num_levels */ num_levels, + /* lower_level */ lower_level, + /* upper_level */ upper_level, + /* num_samples */ num_samples, + /* stream */ stream); + if (err != cudaSuccess) { + return errors::Internal( + "Could not launch HistogramEven to get temp storage: ", + cudaGetErrorString(err), "."); + } + Tensor temp_storage; + TF_RETURN_IF_ERROR(context->allocate_temp( + DataTypeToEnum::value, + TensorShape({static_cast(temp_storage_bytes)}), &temp_storage)); + + void* d_temp_storage = temp_storage.flat().data(); + // The second HistogramEven is to actual run with d_temp_storage + // allocated with temp_storage_bytes. + err = cub::DeviceHistogram::HistogramEven( + /* d_temp_storage */ d_temp_storage, + /* temp_storage_bytes */ temp_storage_bytes, + /* d_samples */ d_samples, + /* d_histogram */ d_histogram, + /* num_levels */ num_levels, + /* lower_level */ lower_level, + /* upper_level */ upper_level, + /* num_samples */ num_samples, + /* stream */ stream); + if (err != cudaSuccess) { + return errors::Internal("Could not launch HistogramEven: ", + cudaGetErrorString(err), "."); + } + return Status::OK(); + } +}; + +} // end namespace functor + +#define REGISTER_GPU_SPEC(type) \ + template struct functor::BincountFunctor; + +TF_CALL_int32(REGISTER_GPU_SPEC); +TF_CALL_float(REGISTER_GPU_SPEC); +#undef REGISTER_GPU_SPEC + +} // namespace tensorflow + +#endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/bincount_op_test.cc b/tensorflow/core/kernels/bincount_op_test.cc new file mode 100644 index 0000000000..14becc87a7 --- /dev/null +++ b/tensorflow/core/kernels/bincount_op_test.cc @@ -0,0 +1,75 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" + +namespace tensorflow { + +static Graph* Bincount(int arr_size, int nbins) { + Graph* g = new Graph(OpRegistry::Global()); + + Tensor arr(DT_INT32, TensorShape({arr_size})); + arr.flat() = arr.flat().setRandom().abs(); + + Tensor size(DT_INT32, TensorShape({(int32)1})); + size.flat()(0) = (int32)nbins; + + Tensor weights(DT_INT32, TensorShape({0})); + + Node* node; + TF_CHECK_OK(NodeBuilder(g->NewName("n"), "Bincount") + .Input(test::graph::Constant(g, arr)) + .Input(test::graph::Constant(g, size)) + .Input(test::graph::Constant(g, weights)) + .Attr("T", DT_INT32) + .Finalize(g, &node)); + return g; +} + +#define BM_BincountDev(K, NBINS, type) \ + static void BM_Bincount##_##type##_##K##_##NBINS(int iters) { \ + testing::ItemsProcessed(static_cast(iters) * K * 1024); \ + test::Benchmark(#type, Bincount(K * 1024, NBINS)).Run(iters); \ + } \ + BENCHMARK(BM_Bincount##_##type##_##K##_##NBINS); + +BM_BincountDev(32, 1000, cpu); +BM_BincountDev(32, 2000, cpu); +BM_BincountDev(32, 5000, cpu); +BM_BincountDev(64, 1000, cpu); +BM_BincountDev(64, 2000, cpu); +BM_BincountDev(64, 5000, cpu); +BM_BincountDev(128, 1000, cpu); +BM_BincountDev(128, 2000, cpu); +BM_BincountDev(128, 5000, cpu); + +BM_BincountDev(32, 1000, gpu); +BM_BincountDev(32, 2000, gpu); +BM_BincountDev(32, 5000, gpu); +BM_BincountDev(64, 1000, gpu); +BM_BincountDev(64, 2000, gpu); +BM_BincountDev(64, 5000, gpu); +BM_BincountDev(128, 1000, gpu); +BM_BincountDev(128, 2000, gpu); +BM_BincountDev(128, 5000, gpu); + +} // end namespace tensorflow diff --git a/tensorflow/core/kernels/bucketize_op.cc b/tensorflow/core/kernels/bucketize_op.cc index 93c2d01221..c1693de538 100644 --- a/tensorflow/core/kernels/bucketize_op.cc +++ b/tensorflow/core/kernels/bucketize_op.cc @@ -15,15 +15,43 @@ limitations under the License. // See docs in ../ops/math_ops.cc. -#include -#include - +#include "tensorflow/core/kernels/bucketize_op.h" #include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" namespace tensorflow { +using thread::ThreadPool; + +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; + +namespace functor { + template +struct BucketizeFunctor { + // PRECONDITION: boundaries_vector must be sorted. + static Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& input, + const std::vector& boundaries_vector, + typename TTypes::Tensor& output) { + const int N = input.size(); + for (int i = 0; i < N; i++) { + auto first_bigger_it = std::upper_bound( + boundaries_vector.begin(), boundaries_vector.end(), input(i)); + output(i) = first_bigger_it - boundaries_vector.begin(); + } + + return Status::OK(); + } +}; +} // namespace functor + +template class BucketizeOp : public OpKernel { public: explicit BucketizeOp(OpKernelConstruction* context) : OpKernel(context) { @@ -34,36 +62,42 @@ class BucketizeOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& input_tensor = context->input(0); - auto input = input_tensor.flat(); + const auto input = input_tensor.flat(); + Tensor* output_tensor = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(), &output_tensor)); auto output = output_tensor->template flat(); - - const int N = input.size(); - for (int i = 0; i < N; i++) { - output(i) = CalculateBucketIndex(input(i)); - } + OP_REQUIRES_OK(context, functor::BucketizeFunctor::Compute( + context, input, boundaries_, output)); } private: - int32 CalculateBucketIndex(const T value) { - auto first_bigger_it = - std::upper_bound(boundaries_.begin(), boundaries_.end(), value); - return first_bigger_it - boundaries_.begin(); - } std::vector boundaries_; }; #define REGISTER_KERNEL(T) \ REGISTER_KERNEL_BUILDER( \ Name("Bucketize").Device(DEVICE_CPU).TypeConstraint("T"), \ - BucketizeOp); + BucketizeOp); + +REGISTER_KERNEL(int32); +REGISTER_KERNEL(int64); +REGISTER_KERNEL(float); +REGISTER_KERNEL(double); +#undef REGISTER_KERNEL + +#if GOOGLE_CUDA +#define REGISTER_KERNEL(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("Bucketize").Device(DEVICE_GPU).TypeConstraint("T"), \ + BucketizeOp); REGISTER_KERNEL(int32); REGISTER_KERNEL(int64); REGISTER_KERNEL(float); REGISTER_KERNEL(double); #undef REGISTER_KERNEL +#endif // GOOGLE_CUDA } // namespace tensorflow diff --git a/tensorflow/core/kernels/bucketize_op.h b/tensorflow/core/kernels/bucketize_op.h new file mode 100644 index 0000000000..c8e461beb9 --- /dev/null +++ b/tensorflow/core/kernels/bucketize_op.h @@ -0,0 +1,41 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_BUCKETIZE_OP_H_ +#define TENSORFLOW_BUCKETIZE_OP_H_ + +#include +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace tensorflow { +namespace functor { + +template +struct BucketizeFunctor { + static Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& input, + const std::vector& boundaries_vector, + typename TTypes::Tensor& output); +}; + +} // namespace functor +} // namespace tensorflow + +#endif // TENSORFLOW_BUCKETIZE_OP_H_ diff --git a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc new file mode 100644 index 0000000000..aafbbe41b4 --- /dev/null +++ b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc @@ -0,0 +1,101 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/kernels/bucketize_op.h" +#include "tensorflow/core/kernels/cuda_device_array.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" + +namespace tensorflow { + +typedef Eigen::GpuDevice GPUDevice; + +template +__global__ void BucketizeCustomKernel( + const int32 size_in, const T* in, const int32 size_boundaries, + CudaDeviceArrayStruct boundaries_array, int32* out) { + const float* boundaries = GetCudaDeviceArrayOnDevice(&boundaries_array); + CUDA_1D_KERNEL_LOOP(i, size_in) { + T value = in[i]; + int32 bucket = 0; + int32 count = size_boundaries; + while (count > 0) { + int32 l = bucket; + int32 step = count / 2; + l += step; + if (!(value < static_cast(boundaries[l]))) { + bucket = ++l; + count -= step + 1; + } else { + count = step; + } + } + out[i] = bucket; + } +} + +namespace functor { + +template +struct BucketizeFunctor { + // PRECONDITION: boundaries_vector must be sorted. + static Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& input, + const std::vector& boundaries_vector, + typename TTypes::Tensor& output) { + const GPUDevice& d = context->eigen_device(); + + CudaDeviceArrayOnHost boundaries_array(context, + boundaries_vector.size()); + TF_RETURN_IF_ERROR(boundaries_array.Init()); + for (int i = 0; i < boundaries_vector.size(); ++i) { + boundaries_array.Set(i, boundaries_vector[i]); + } + TF_RETURN_IF_ERROR(boundaries_array.Finalize()); + + CudaLaunchConfig config = GetCudaLaunchConfig(input.size(), d); + BucketizeCustomKernel< + T><<>>( + input.size(), input.data(), boundaries_vector.size(), + boundaries_array.data(), output.data()); + + return Status::OK(); + } +}; +} // namespace functor + +#define REGISTER_GPU_SPEC(type) \ + template struct functor::BucketizeFunctor; + +REGISTER_GPU_SPEC(int32); +REGISTER_GPU_SPEC(int64); +REGISTER_GPU_SPEC(float); +REGISTER_GPU_SPEC(double); +#undef REGISTER_GPU_SPEC + +} // namespace tensorflow + +#endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc index 21f5cb1716..f819fccbfb 100644 --- a/tensorflow/core/kernels/conv_grad_ops_3d.cc +++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc @@ -236,6 +236,7 @@ class Conv3DBackpropInputOp : public OpKernel { REGISTER_KERNEL_BUILDER( \ Name("Conv3DBackpropInputV2").Device(DEVICE_CPU).TypeConstraint("T"), \ Conv3DBackpropInputOp); +TF_CALL_half(REGISTER_CPU_KERNEL); TF_CALL_float(REGISTER_CPU_KERNEL); TF_CALL_double(REGISTER_CPU_KERNEL); #undef REGISTER_CPU_KERNEL @@ -383,6 +384,7 @@ class Conv3DBackpropFilterOp : public OpKernel { .Device(DEVICE_CPU) \ .TypeConstraint("T"), \ Conv3DBackpropFilterOp); +TF_CALL_half(REGISTER_CPU_KERNEL); TF_CALL_float(REGISTER_CPU_KERNEL); TF_CALL_double(REGISTER_CPU_KERNEL); #undef REGISTER_CPU_KERNEL @@ -409,6 +411,7 @@ namespace functor { const std::array& padding_right, \ typename TTypes::Tensor out, TensorFormat format); +DECLARE_GPU_SPEC(Eigen::half); DECLARE_GPU_SPEC(float); #undef DECLARE_GPU_SPEC } // namespace functor @@ -1098,22 +1101,29 @@ class Conv3DBackpropFilterOp : public OpKernel { bool cudnn_use_autotune_; }; -REGISTER_KERNEL_BUILDER( - Name("Conv3DBackpropInput").Device(DEVICE_GPU).TypeConstraint("T"), - Conv3DBackpropInputOp); -REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropInputV2") - .Device(DEVICE_GPU) - .TypeConstraint("T") - .HostMemory("input_sizes"), - Conv3DBackpropInputOp); -REGISTER_KERNEL_BUILDER( - Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint("T"), - Conv3DBackpropFilterOp); -REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropFilterV2") - .Device(DEVICE_GPU) - .TypeConstraint("T") - .HostMemory("filter_sizes"), - Conv3DBackpropFilterOp); + + +#define REGISTER_GPU_KERNEL(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("Conv3DBackpropInput").Device(DEVICE_GPU).TypeConstraint("T"), \ + Conv3DBackpropInputOp); \ + REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropInputV2") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("input_sizes"), \ + Conv3DBackpropInputOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint("T"), \ + Conv3DBackpropFilterOp); \ + REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropFilterV2") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("filter_sizes"), \ + Conv3DBackpropFilterOp); +TF_CALL_half(REGISTER_GPU_KERNEL); +TF_CALL_float(REGISTER_GPU_KERNEL); +#undef REGISTER_GPU_KERNEL + #endif // GOOGLE_CUDA } // namespace tensorflow diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc index 8a89d564de..37cb67bc51 100644 --- a/tensorflow/core/kernels/conv_ops_3d.cc +++ b/tensorflow/core/kernels/conv_ops_3d.cc @@ -145,6 +145,7 @@ class Conv3DOp : public BinaryOp { REGISTER_KERNEL_BUILDER( \ Name("Conv3D").Device(DEVICE_CPU).TypeConstraint("T"), \ Conv3DOp); +TF_CALL_half(REGISTER_CPU_KERNEL); TF_CALL_float(REGISTER_CPU_KERNEL); TF_CALL_double(REGISTER_CPU_KERNEL); #undef REGISTER_CPU_KERNEL @@ -482,12 +483,16 @@ namespace functor { const std::array& padding_right, \ typename TTypes::Tensor out, TensorFormat format); +DECLARE_GPU_SPEC(Eigen::half); DECLARE_GPU_SPEC(float); #undef DECLARE_GPU_SPEC } // namespace functor // Registration of the GPU implementations. +REGISTER_KERNEL_BUILDER( + Name("Conv3D").Device(DEVICE_GPU).TypeConstraint("T"), + Conv3DOp); REGISTER_KERNEL_BUILDER( Name("Conv3D").Device(DEVICE_GPU).TypeConstraint("T"), Conv3DOp); diff --git a/tensorflow/core/kernels/cwise_op_acosh.cc b/tensorflow/core/kernels/cwise_op_acosh.cc index 7bdd8d22a3..39c8814073 100644 --- a/tensorflow/core/kernels/cwise_op_acosh.cc +++ b/tensorflow/core/kernels/cwise_op_acosh.cc @@ -20,16 +20,8 @@ namespace tensorflow { REGISTER4(UnaryOp, CPU, "Acosh", functor::acosh, float, double, complex64, complex128); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Acosh") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T"), \ - UnaryOp>); -REGISTER_SYCL_KERNEL(float); -REGISTER_SYCL_KERNEL(double); -#undef REGISTER_SYCL_KERNEL +#ifdef TENSORFLOW_USE_SYCL +REGISTER2(UnaryOp, SYCL, "Acosh", functor::acosh, float, double); #endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA diff --git a/tensorflow/core/kernels/cwise_op_asinh.cc b/tensorflow/core/kernels/cwise_op_asinh.cc index e0644323c0..8d44208aa7 100644 --- a/tensorflow/core/kernels/cwise_op_asinh.cc +++ b/tensorflow/core/kernels/cwise_op_asinh.cc @@ -20,17 +20,9 @@ namespace tensorflow { REGISTER4(UnaryOp, CPU, "Asinh", functor::asinh, float, double, complex64, complex128); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Asinh") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T"), \ - UnaryOp>); -REGISTER_SYCL_KERNEL(float); -REGISTER_SYCL_KERNEL(double); -#undef REGISTER_SYCL_KERNEL -#endif // TENSORFLOW_USE_SYC +#ifdef TENSORFLOW_USE_SYCL +REGISTER2(UnaryOp, SYCL, "Asinh", functor::asinh, float, double); +#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER2(UnaryOp, GPU, "Asinh", functor::asinh, float, double); diff --git a/tensorflow/core/kernels/cwise_op_atanh.cc b/tensorflow/core/kernels/cwise_op_atanh.cc index 058f5140c5..bbc69e45aa 100644 --- a/tensorflow/core/kernels/cwise_op_atanh.cc +++ b/tensorflow/core/kernels/cwise_op_atanh.cc @@ -20,17 +20,9 @@ namespace tensorflow { REGISTER4(UnaryOp, CPU, "Atanh", functor::atanh, float, double, complex64, complex128); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Atanh") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T"), \ - UnaryOp>); -REGISTER_SYCL_KERNEL(float); -REGISTER_SYCL_KERNEL(double); -#undef REGISTER_SYCL_KERNEL -#endif // TENSORFLOW_USE_SYC +#ifdef TENSORFLOW_USE_SYCL +REGISTER2(UnaryOp, SYCL, "Atanh", functor::atanh, float, double); +#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER2(UnaryOp, GPU, "Atanh", functor::atanh, float, double); diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h index 6c22b124de..d32185b6bf 100644 --- a/tensorflow/core/kernels/cwise_ops.h +++ b/tensorflow/core/kernels/cwise_ops.h @@ -49,7 +49,11 @@ template struct scalar_asinh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_asinh_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const { +#if EIGEN_HAS_CXX11_MATH + return numext::asinh(a); +#else return std::asinh(a); +#endif // EIGEN_HAS_CXX11_MATH } }; template @@ -61,7 +65,11 @@ template struct scalar_acosh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_acosh_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const { +#if EIGEN_HAS_CXX11_MATH + return numext::acosh(a); +#else return std::acosh(a); +#endif // EIGEN_HAS_CXX11_MATH } }; template @@ -73,7 +81,11 @@ template struct scalar_atanh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_atanh_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const { +#if EIGEN_HAS_CXX11_MATH + return numext::atanh(a); +#else return std::atanh(a); +#endif // EIGEN_HAS_CXX11_MATH } }; template diff --git a/tensorflow/core/kernels/depthwise_conv_grad_op.cc b/tensorflow/core/kernels/depthwise_conv_grad_op.cc index 9804d7d38e..53d65a22d1 100644 --- a/tensorflow/core/kernels/depthwise_conv_grad_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_grad_op.cc @@ -231,7 +231,7 @@ static void CopyOutputBackpropRegion(const DepthwiseArgs& args, } // Pad to vector-register width (if needed). for (int64 d = 0; d < pad_size; ++d) { - buffer[buf_base + vectorized_size + scalar_size + d] = 0; + buffer[buf_base + vectorized_size + scalar_size + d] = static_cast(0); } } } @@ -297,7 +297,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args, for (int i = 0; i < output_vectorized_size; i += kPacketSize) { // Reset accumulator. - auto vaccum = Eigen::internal::pset1(0); + auto vaccum = Eigen::internal::pset1(static_cast(0)); for (int j = 0; j < filter_spatial_size; ++j) { // Calculate index. const int64 index = i + j * padded_filter_inner_dim_size; @@ -318,7 +318,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args, } if (output_scalar_size > 0) { - auto vaccum = Eigen::internal::pset1(0); + auto vaccum = Eigen::internal::pset1(static_cast(0)); for (int j = 0; j < filter_spatial_size; ++j) { const int64 index = output_vectorized_size + j * padded_filter_inner_dim_size; @@ -346,7 +346,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args, if (depth_multiplier > 1) { for (int64 d = 0; d < in_depth; ++d) { const int64 index = d * args.depth_multiplier; - T accum = 0; + T accum = static_cast(0); for (int64 dm = 0; dm < dm_vectorized_size; dm += kPacketSize) { const auto v = Eigen::internal::ploadu(out_buffer + index + dm); accum += Eigen::internal::predux(v); @@ -510,6 +510,7 @@ static void DepthwiseConvBackpropInputReference(const DepthwiseArgs& args, #if GOOGLE_CUDA +extern template struct LaunchDepthwiseConvBackpropInputOp; extern template struct LaunchDepthwiseConvBackpropInputOp; extern template struct LaunchDepthwiseConvBackpropInputOp; @@ -884,6 +885,7 @@ static void DepthwiseConvBackpropFilterReference(const DepthwiseArgs& args, #if GOOGLE_CUDA +extern template struct LaunchDepthwiseConvBackpropFilterOp; extern template struct LaunchDepthwiseConvBackpropFilterOp; extern template struct LaunchDepthwiseConvBackpropFilterOp; diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc index bbeeaf7895..2759ecb2f1 100644 --- a/tensorflow/core/kernels/depthwise_conv_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_op.cc @@ -94,7 +94,7 @@ struct DepthwiseConv2DKernel { for (int i = 0; i < output_vectorized_size; i += kPacketSize) { // Reset accumulator. - auto vaccum = Eigen::internal::pset1(0); + auto vaccum = Eigen::internal::pset1(static_cast(0)); for (int j = 0; j < filter_spatial_size; ++j) { // Calculate index. const int64 index = i + j * padded_filter_inner_dim_size; @@ -115,7 +115,7 @@ struct DepthwiseConv2DKernel { } if (output_scalar_size > 0) { - auto vaccum = Eigen::internal::pset1(0); + auto vaccum = Eigen::internal::pset1(static_cast(0)); for (int j = 0; j < filter_spatial_size; ++j) { const int64 index = output_vectorized_size + j * padded_filter_inner_dim_size; @@ -246,6 +246,7 @@ extern template class LaunchConv2DOp; #if GOOGLE_CUDA // Extern template instantiated in depthwise_conv_op_gpu.cc. +extern template struct LaunchDepthwiseConvOp; extern template struct LaunchDepthwiseConvOp; extern template struct LaunchDepthwiseConvOp; @@ -419,12 +420,17 @@ class DepthwiseConv2dNativeOp : public BinaryOp { Name("DepthwiseConv2dNative").Device(DEVICE_CPU).TypeConstraint("T"), \ DepthwiseConv2dNativeOp); +TF_CALL_half(REGISTER_CPU_KERNEL); TF_CALL_float(REGISTER_CPU_KERNEL); #if !defined(PLATFORM_WINDOWS) || !defined(_DEBUG) TF_CALL_double(REGISTER_CPU_KERNEL); #endif #if GOOGLE_CUDA +REGISTER_KERNEL_BUILDER( + Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint("T"), + DepthwiseConv2dNativeOp); + REGISTER_KERNEL_BUILDER( Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint("T"), DepthwiseConv2dNativeOp); diff --git a/tensorflow/core/kernels/depthwise_conv_op.h b/tensorflow/core/kernels/depthwise_conv_op.h index aa5b5c76f6..11aed5b415 100644 --- a/tensorflow/core/kernels/depthwise_conv_op.h +++ b/tensorflow/core/kernels/depthwise_conv_op.h @@ -158,7 +158,7 @@ struct DepthwiseFilterPadOp { } // Pad the remainder of output to vector-register boundary. for (int64 j = 0; j < pad_size; ++j) { - padded_filter[output_base + vectorized_size + scalar_size + j] = 0; + padded_filter[output_base + vectorized_size + scalar_size + j] = static_cast(0); } } } @@ -266,7 +266,7 @@ struct DepthwiseInputCopyOp { // Pad the remainder of the output to vector register boundary. for (int64 d = 0; d < output_pad_size; ++d) { - in_buf[d] = 0; + in_buf[d] = static_cast(0); } in_buf += output_pad_size; diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc index ecfe51d599..903aac5d68 100644 --- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc +++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc @@ -105,7 +105,7 @@ __global__ void __launch_bounds__(1024, 2) const int input_row_end = input_row_start + filter_rows; const int input_col_end = input_col_start + filter_cols; - T sum = 0; + T sum = static_cast(0); const int input_offset_temp = in_rows * OB; if (input_row_start >= 0 && input_col_start >= 0 && @@ -258,8 +258,8 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNHWCSmall( __syncthreads(); if (depth_in_range) { - T sum1 = 0; - T sum2 = 0; + T sum1 = static_cast(0); + T sum2 = static_cast(0); int shared_offset = data_idx; const T* filter_ptr = filter_read_offset + shared_data; UNROLL for (int r = 0; r < filter_rows; ++r) { @@ -369,7 +369,7 @@ __global__ void __launch_bounds__(1024, 2) const int input_row_end = input_row_start + filter_rows; const int input_col_end = input_col_start + filter_cols; - T sum = 0; + T sum = static_cast(0); if (input_row_start >= 0 && input_col_start >= 0 && input_row_end < in_rows && input_col_end < in_cols) { // Loop that doesn't need to check for boundary conditions. @@ -529,8 +529,8 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNCHWSmall( __syncthreads(); if (slice_in_range) { - T sum1 = 0; - T sum2 = 0; + T sum1 = static_cast(0); + T sum2 = static_cast(0); int shared_offset = data_idx; const T* filter_ptr = filter_read_offset + shared_data; UNROLL for (int r = 0; r < filter_rows; ++r) { @@ -710,6 +710,7 @@ void LaunchDepthwiseConvOp::operator()(OpKernelContext* ctx, "Launch of gpu kernel for DepthwiseConv2dGPULaunch failed")); } +template struct LaunchDepthwiseConvOp; template struct LaunchDepthwiseConvOp; template struct LaunchDepthwiseConvOp; @@ -744,7 +745,7 @@ __global__ void __launch_bounds__(640, 2) const int in_r = (thread_id / in_depth / in_cols) % in_rows; const int b = thread_id / in_depth / in_cols / in_rows; - T sum = 0; + T sum = static_cast(0); const int out_r_start = tf_max(0, (in_r - filter_rows + pad_rows + stride) / stride); @@ -810,7 +811,7 @@ __global__ void __launch_bounds__(640, 2) const int in_d = (thread_id / in_cols / in_rows) % in_depth; const int b = thread_id / in_depth / in_cols / in_rows; - T sum = 0; + T sum = static_cast(0); const int out_d_start = in_d * depth_multiplier; const int out_d_end = out_d_start + depth_multiplier; @@ -919,6 +920,7 @@ void LaunchDepthwiseConvBackpropInputOp::operator()( "utGPULaunch failed")); } +template struct LaunchDepthwiseConvBackpropInputOp; template struct LaunchDepthwiseConvBackpropInputOp; template struct LaunchDepthwiseConvBackpropInputOp; @@ -1631,6 +1633,7 @@ void LaunchDepthwiseConvBackpropFilterOp::operator()( "terGPULaunch failed")); } +template struct LaunchDepthwiseConvBackpropFilterOp; template struct LaunchDepthwiseConvBackpropFilterOp; template struct LaunchDepthwiseConvBackpropFilterOp; } // namespace tensorflow diff --git a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc new file mode 100644 index 0000000000..7249c8c66c --- /dev/null +++ b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc @@ -0,0 +1,376 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// The algorithm for dynamic partition has the following steps: +// 1. Let N be the size of partitions. We initialize a new vector indices_in +// with the values 0, 1, 2, ..., N-1. +// 2. We apply cub::DeviceRadixSort::SortPairs to the key - value pairs given +// by partitions and indices_in. This will result in two new vectors +// partitions_out and indices_out, with partitions_out sorted. +// 3. The first dimension of outputs[i] is equal to the length of the interval +// of i-values in partitions_out. We determine it in two steps: +// - compute the starting and ending point of each interval, +// - subtract the starting and ending points to find the length. +// The result is placed in partition_count. +// 4. Because partition_count is on the GPU, we bring it asynchronously to +// the CPU. Then we can allocate the output tensors. +// 5. Finally, we use indices_out and the gather functor to collect the output. +// This works, because for each interval of i-values, indices_out points +// to the slices which should form output[i]. + +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include "external/cub_archive/cub/device/device_radix_sort.cuh" +#include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/bounds_check.h" +#include "tensorflow/core/kernels/fill_functor.h" +#include "tensorflow/core/kernels/gather_functor_gpu.cu.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" + +namespace tensorflow { + +typedef Eigen::GpuDevice GPUDevice; + +namespace { + +template +__global__ void RangeInitKernel(const T start, const T delta, const int32 size, + T* out) { + CUDA_1D_KERNEL_LOOP(i, size) { out[i] = start + i * delta; } +} + +__global__ void FindEndpointsKernel(const int32* partitions, int32 size, + int32 nump, int32* start, int32* end) { + CUDA_1D_KERNEL_LOOP(i, size) { + int32 current = ldg(partitions + i); + if (FastBoundsCheck(current, nump)) { + if (i == 0) + start[current] = i; + else { + int32 before = ldg(partitions + i - 1); + if (before != current) start[current] = i; + } + if (i == size - 1) + end[current] = i + 1; + else { + int32 after = ldg(partitions + i + 1); + if (after != current) end[current] = i + 1; + } + } + } +} + +// We create a local version of subtract, because the tf.subtract kernel +// is not defined for int32. We use it to compute the length of an interval +// by subtracting the endpoints. +__global__ void IntervalLengthKernel(int32* start, int32 size, int32* end) { + CUDA_1D_KERNEL_LOOP(i, size) { + int32 start_point = ldg(start + i); + end[i] = end[i] - start_point; + } +} + +// Initialize out with range start, start + delta, start + 2 * delta, ... +// This is needed because tf.range has no GPU implementation. +template +void RangeInit(const GPUDevice& d, const T start, const T delta, + const int32 size, typename TTypes::Flat out) { + CudaLaunchConfig config = GetCudaLaunchConfig(size, d); + RangeInitKernel< + T><<>>( + start, delta, size, out.data()); +} + +// Partitions is a sorted vector of N non-negative integer numbers. +// This function computes the starting and ending points of each interval +// of values. +void ComputeIntervals(const GPUDevice& d, Tensor* partitions, int32 N, + int32 nump, int32* start_ptr, int32* end_ptr) { + CudaLaunchConfig config = GetCudaLaunchConfig(N, d); + FindEndpointsKernel<<>>(partitions->flat().data(), N, nump, + start_ptr, end_ptr); +} + +// Subtract the ending points of each interval to obtain the interval length. +void ComputeItvLength(const GPUDevice& d, int32 num, int32* start_ptr, + int32* end_ptr) { + CudaLaunchConfig config = GetCudaLaunchConfig(num, d); + IntervalLengthKernel<<>>(start_ptr, num, end_ptr); +} + +template +void CallGatherKernel(const GPUDevice& d, const T* params, const int32* indices, + T* out, int64 gather_dim_size, int64 indices_size, + int64 slice_size, int64 out_size) { + CudaLaunchConfig config = GetCudaLaunchConfig(out_size, d); + GatherOpKernel< + T, int32, + true><<>>( + params, indices, out, gather_dim_size, indices_size, slice_size, + out_size); +} + +} // namespace + +// The current implementation has memory cost on GPU +// I + P + max(3N + R, O + N), where: +// I - the size of the input +// N - the size of the partitions tensor +// R - the temporary storage used by cub::RadixSort, about 2N +// P - the number of partitions +// O - the size of the output +// So roughly the cost is I + P + max(5N, O + N). +template +class DynamicPartitionOpGPU : public AsyncOpKernel { + public: + explicit DynamicPartitionOpGPU(OpKernelConstruction* c) : AsyncOpKernel(c) { + OP_REQUIRES_OK(c, c->GetAttr("num_partitions", &num_partitions_)); + OP_REQUIRES(c, num_partitions_ >= 1, + errors::InvalidArgument("num_partitions must be at least 1")); + } + + void AllocateTempSpace(OpKernelContext* c, int32 N, Tensor* indices_in, + Tensor* partitions_out, Tensor* indices_out, + DoneCallback done) { + int32 M = std::max(N, num_partitions_); + // indices_in will be made slightly larger to accomodate + // later computations. + OP_REQUIRES_OK_ASYNC( + c, c->allocate_temp(DT_INT32, TensorShape({M}), indices_in), done); + OP_REQUIRES_OK_ASYNC( + c, c->allocate_temp(DT_INT32, TensorShape({N}), partitions_out), done); + OP_REQUIRES_OK_ASYNC( + c, c->allocate_temp(DT_INT32, TensorShape({N}), indices_out), done); + } + + void AllocateOutputs(OpKernelContext* c, const Tensor* data, + const Tensor* partitions, const Tensor* partition_count, + OpOutputList* Tout, DoneCallback done) { + auto e_part_count = partition_count->flat(); + // Allocate output tensors of the right size + OP_REQUIRES_OK_ASYNC(c, c->output_list("outputs", Tout), done); + for (int p = 0; p < num_partitions_; p++) { + TensorShape shape; + shape.AddDim(e_part_count(p)); + for (int i = partitions->dims(); i < data->dims(); i++) { + shape.AddDim(data->dim_size(i)); + } + Tensor* out; + OP_REQUIRES_OK_ASYNC(c, Tout->allocate(p, shape, &out), done); + } + } + + void ComputeAsync(OpKernelContext* c, DoneCallback done) { + const Tensor& data = c->input(0); + const Tensor& partitions = c->input(1); + + OP_REQUIRES_ASYNC( + c, TensorShapeUtils::StartsWith(data.shape(), partitions.shape()), + errors::InvalidArgument("data.shape must start with partitions.shape, ", + "got data.shape = ", data.shape().DebugString(), + ", partitions.shape = ", + partitions.shape().DebugString()), + done); + + Tensor partition_count; + + // We must handle the case of empty partitions separately, + // because kernels don't work with 0-sized tensors. + if (partitions.NumElements() == 0) { + AllocatorAttributes alloc_attr; + alloc_attr.set_on_host(true); + OP_REQUIRES_OK_ASYNC( + c, c->allocate_temp(DT_INT32, TensorShape({num_partitions_}), + &partition_count, alloc_attr), + done); + auto e_part_count = partition_count.flat(); + for (int i = 0; i < num_partitions_; i++) e_part_count(i) = 0; + OpOutputList outputs; + this->AllocateOutputs(c, &data, &partitions, &partition_count, &outputs, + done); + if (c->status().ok()) done(); + return; + } + + // Prepare for counting. + OP_REQUIRES_OK_ASYNC( + c, c->allocate_temp(DT_INT32, TensorShape({num_partitions_}), + &partition_count), + done); + Tensor indices_out; + // Count how many times each partition index occurs. + // Also sort the info in partitions and output it in indices_out, + // in preparation for the next step. + this->CountAndSortParts(c, &partitions, &partition_count, &indices_out, + done); + if (!c->status().ok()) return; + + // In order to allocate the output tensor we have to move partition_count + // to CPU. + auto* stream = c->op_device_context()->stream(); + OP_REQUIRES_ASYNC(c, stream, errors::Internal("No GPU stream available."), + done); + Tensor cpu_tensor; + AllocatorAttributes alloc_attr; + alloc_attr.set_on_host(true); + alloc_attr.set_gpu_compatible(true); + OP_REQUIRES_OK_ASYNC( + c, c->allocate_temp(partition_count.dtype(), partition_count.shape(), + &cpu_tensor, alloc_attr), + done); + perftools::gputools::DeviceMemoryBase wrapped( + partition_count.flat().data(), num_partitions_ * sizeof(int32)); + const bool status = + stream + ->ThenMemcpy(cpu_tensor.flat().data(), wrapped, + num_partitions_ * sizeof(int32)) + .ok(); + OP_REQUIRES_ASYNC( + c, status, + errors::Internal("Failed to launch copy from device to host."), done); + + // Keep a reference to partition_count so that the buffer + // is not deallocated at the end of the function, before + // memcpy is completed. + TensorReference partition_ref(partition_count); + auto wrapped_callback = [this, c, &data, &partitions, indices_out, + partition_ref, cpu_tensor, done]() { + OpOutputList outputs; + this->AllocateOutputs(c, &data, &partitions, &cpu_tensor, &outputs, done); + if (!c->status().ok()) { + partition_ref.Unref(); + return; + } + int32 N = partitions.NumElements(); + int64 slice_size = data.NumElements() / N; + this->GatherSlices(c, &data, &indices_out, N, slice_size, outputs); + partition_ref.Unref(); + done(); + }; + + c->device()->tensorflow_gpu_device_info()->event_mgr->ThenExecute( + stream, wrapped_callback); + } + + protected: + void RadixSort(OpKernelContext* c, const Tensor* partitions, + Tensor* indices_in, Tensor* partitions_out, + Tensor* indices_out, DoneCallback done) { + int32 N = partitions->NumElements(); + const GPUDevice& device = c->eigen_device(); + const cudaStream_t& cu_stream = GetCudaStream(c); + + // Initialize the indices_in tensor using the Range GPU kernel. + RangeInit(device, 0, 1, N, indices_in->flat()); + // Obtain the pointers to inner buffers. + const int32* partitions_ptr = partitions->flat().data(); + int32* partitions_out_ptr = partitions_out->flat().data(); + int32* indices_in_ptr = indices_in->flat().data(); + int32* indices_out_ptr = indices_out->flat().data(); + // Determine temporary device storage requirements. + Tensor cub_temp_storage; + size_t temp_storage_bytes = 0; + cub::DeviceRadixSort::SortPairs( + NULL, temp_storage_bytes, partitions_ptr, partitions_out_ptr, + indices_in_ptr, indices_out_ptr, N, 0, sizeof(int32) * 8, cu_stream); + // Allocate temporary storage. + OP_REQUIRES_OK_ASYNC( + c, c->allocate_temp( + DT_INT8, TensorShape({static_cast(temp_storage_bytes)}), + &cub_temp_storage), + done); + // Radix-sort the partition information. + cub::DeviceRadixSort::SortPairs( + cub_temp_storage.flat().data(), temp_storage_bytes, + partitions_ptr, partitions_out_ptr, indices_in_ptr, indices_out_ptr, N, + 0, sizeof(int32) * 8, cu_stream); + } // At this point cub_temp_storage will be marked for deallocation. + + void CountAndSortParts(OpKernelContext* c, const Tensor* partitions, + Tensor* partition_count, Tensor* indices_out, + DoneCallback done) { + const GPUDevice& device = c->eigen_device(); + int32 N = partitions->NumElements(); + Tensor indices_in; + Tensor partitions_out; + + // Allocate memory for Radix-Sort. + this->AllocateTempSpace(c, N, &indices_in, &partitions_out, indices_out, + done); + if (!c->status().ok()) return; + this->RadixSort(c, partitions, &indices_in, &partitions_out, indices_out, + done); + if (!c->status().ok()) return; + // We still need a little bit of additional memory. However, + // we can reuse the indices_in tensor. We could also use atomic + // operations and no additional memory, but this approach seems faster. + + // Zero-out the allocated memory. + functor::SetZeroFunctor zero_functor; + zero_functor(device, partition_count->flat()); + zero_functor(device, indices_in.flat()); + // Obtain the pointers to inner buffers. + int32* start_ptr = indices_in.flat().data(); + int32* end_ptr = partition_count->flat().data(); + // Obtain the starting and ending points of each interval. + ComputeIntervals(device, &partitions_out, N, num_partitions_, start_ptr, + end_ptr); + // Subtract to compute the number of appearances of each id. + ComputeItvLength(device, num_partitions_, start_ptr, end_ptr); + } // At this point indices_in and partitions_out will be marked + // for deallocation. + + void GatherSlices(OpKernelContext* c, const Tensor* data, + const Tensor* indices, int32 N, int64 slice_size, + OpOutputList& outs) { + const GPUDevice& device = c->eigen_device(); + const int32* ind_base = indices->flat().data(); + const T* data_base = data->flat().data(); + + for (int p = 0; p < num_partitions_; p++) { + int32 indices_size = outs[p]->dim_size(0); + int64 out_size = outs[p]->NumElements(); + T* out_base = outs[p]->flat().data(); + if (out_size > 0) + CallGatherKernel(device, data_base, ind_base, out_base, N, + indices_size, slice_size, out_size); + ind_base += indices_size; + } + } + + int num_partitions_; +}; + +#define REGISTER_DYNAMIC_PARTITION_GPU(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("DynamicPartition").Device(DEVICE_GPU).TypeConstraint("T"), \ + DynamicPartitionOpGPU) + +TF_CALL_GPU_NUMBER_TYPES(REGISTER_DYNAMIC_PARTITION_GPU); +TF_CALL_complex64(REGISTER_DYNAMIC_PARTITION_GPU); +TF_CALL_complex128(REGISTER_DYNAMIC_PARTITION_GPU); +#undef REGISTER_DYNAMIC_PARTITION_GPU + +} // namespace tensorflow + +#endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/dynamic_partition_op_test.cc b/tensorflow/core/kernels/dynamic_partition_op_test.cc index 0e8fbc0a67..9a7ed0af21 100644 --- a/tensorflow/core/kernels/dynamic_partition_op_test.cc +++ b/tensorflow/core/kernels/dynamic_partition_op_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include #include +#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/fake_input.h" #include "tensorflow/core/framework/node_def_builder.h" @@ -23,10 +24,14 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/graph/testlib.h" #include "tensorflow/core/kernels/ops_testutil.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/random/simple_philox.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" namespace tensorflow { namespace { @@ -153,5 +158,58 @@ TEST_F(DynamicPartitionOpTest, Error_IndexOutOfRange) { << s; } +Node* DynamicPartitionNode(Graph* g, Node* in0, Node* in1, int num_partitions) { + Node* ret; + TF_CHECK_OK(NodeBuilder(g->NewName("n"), "DynamicPartition") + .Input(in0) + .Input(in1) + .Attr("num_partitions", num_partitions) + .Finalize(g, &ret)); + return ret; +} + +template +static Graph* DynamicPartition(int num_partitions, int dim) { + Graph* g = new Graph(OpRegistry::Global()); + // Always use a 128MB buffer. + const int kRows = ((128 << 20) / sizeof(T)) / dim; + Tensor data(DataTypeToEnum::value, TensorShape({kRows, dim})); + data.flat().setRandom(); + + random::PhiloxRandom philox(301, 17); + random::SimplePhilox rnd(&philox); + Tensor partitions(DT_INT32, TensorShape({kRows})); + for (int i = 0; i < kRows; i++) { + partitions.flat()(i) = rnd.Uniform(num_partitions); + } + DynamicPartitionNode(g, test::graph::Constant(g, data), + test::graph::Constant(g, partitions), num_partitions); + return g; +} + +#define BM_DYNAMIC_PARTITION(DEVICE, T, num) \ + static void BM_##DEVICE##_dynpart_##T##_##num(int iters, int dim) { \ + const int64 items = ((128 << 20) / sizeof(T)); \ + const int64 tot = static_cast(iters) * items; \ + testing::ItemsProcessed(tot); \ + testing::UseRealTime(); \ + test::Benchmark(#DEVICE, DynamicPartition(num, dim)).Run(iters); \ + } \ + BENCHMARK(BM_##DEVICE##_dynpart_##T##_##num)->Arg(1)->Arg(256) + +BM_DYNAMIC_PARTITION(cpu, float, 2); +BM_DYNAMIC_PARTITION(cpu, float, 100); +BM_DYNAMIC_PARTITION(cpu, double, 2); +BM_DYNAMIC_PARTITION(cpu, double, 100); +BM_DYNAMIC_PARTITION(cpu, complex64, 2); +BM_DYNAMIC_PARTITION(cpu, complex64, 100); + +BM_DYNAMIC_PARTITION(gpu, float, 2); +BM_DYNAMIC_PARTITION(gpu, float, 100); +BM_DYNAMIC_PARTITION(gpu, double, 2); +BM_DYNAMIC_PARTITION(gpu, double, 100); +BM_DYNAMIC_PARTITION(gpu, complex64, 2); +BM_DYNAMIC_PARTITION(gpu, complex64, 100); + } // namespace } // namespace tensorflow diff --git a/tensorflow/core/kernels/fused_batch_norm_op.cc b/tensorflow/core/kernels/fused_batch_norm_op.cc index 0ecb829f34..1688674eb7 100644 --- a/tensorflow/core/kernels/fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/fused_batch_norm_op.cc @@ -54,25 +54,20 @@ struct FusedBatchNorm { Tensor* batch_var_output, Tensor* saved_mean_output, Tensor* saved_var_output, TensorFormat tensor_format, bool is_training) { - // Currently U is ignored, since we only support the case where T and U are - // both float32. - // TODO(reedwm): Add float16 support, use U, and remove these asserts. - static_assert(std::is_same::value, "T currently must be float."); - static_assert(std::is_same::value, "U currently must be float."); OP_REQUIRES(context, tensor_format == FORMAT_NHWC, errors::Internal("The CPU implementation of FusedBatchNorm " "only supports NHWC tensor format for now.")); typename TTypes::ConstTensor x(x_input.tensor()); - typename TTypes::ConstVec scale(scale_input.vec()); - typename TTypes::ConstVec offset(offset_input.vec()); - typename TTypes::ConstVec estimated_mean(estimated_mean_input.vec()); - typename TTypes::ConstVec estimated_variance( - estimated_variance_input.vec()); + typename TTypes::ConstVec scale(scale_input.vec()); + typename TTypes::ConstVec offset(offset_input.vec()); + typename TTypes::ConstVec estimated_mean(estimated_mean_input.vec()); + typename TTypes::ConstVec estimated_variance( + estimated_variance_input.vec()); typename TTypes::Tensor y(y_output->tensor()); - typename TTypes::Vec batch_mean(batch_mean_output->vec()); - typename TTypes::Vec batch_var(batch_var_output->vec()); - typename TTypes::Vec saved_mean(saved_mean_output->vec()); - typename TTypes::Vec saved_var(saved_var_output->vec()); + typename TTypes::Vec batch_mean(batch_mean_output->vec()); + typename TTypes::Vec batch_var(batch_var_output->vec()); + typename TTypes::Vec saved_mean(saved_mean_output->vec()); + typename TTypes::Vec saved_var(saved_var_output->vec()); const CPUDevice& d = context->eigen_device(); @@ -93,15 +88,15 @@ struct FusedBatchNorm { bcast_spec.set(0, rest_size); #endif - auto x_rest_by_depth = x.reshape(rest_by_depth); + auto x_rest_by_depth = x.reshape(rest_by_depth).template cast(); const int rest_size_minus_one = (rest_size > 1) ? (rest_size - 1) : 1; - T rest_size_inv = static_cast(1.0f / static_cast(rest_size)); + U rest_size_inv = static_cast(1.0f / static_cast(rest_size)); // This adjustment is for Bessel's correction - T rest_size_adjust = - static_cast(rest_size) / static_cast(rest_size_minus_one); + U rest_size_adjust = + static_cast(rest_size) / static_cast(rest_size_minus_one); - Eigen::Tensor mean(depth); - Eigen::Tensor variance(depth); + Eigen::Tensor mean(depth); + Eigen::Tensor variance(depth); if (is_training) { mean.device(d) = (x_rest_by_depth.sum(reduce_dims) * rest_size_inv); batch_mean.device(d) = mean; @@ -129,7 +124,7 @@ struct FusedBatchNorm { auto x_shifted = x_scaled + offset.reshape(one_by_depth).broadcast(bcast_spec); - y.reshape(rest_by_depth).device(d) = x_shifted; + y.reshape(rest_by_depth).device(d) = x_shifted.template cast(); } }; @@ -138,7 +133,7 @@ struct FusedBatchNormGrad { void operator()(OpKernelContext* context, const Tensor& y_backprop_input, const Tensor& x_input, const Tensor& scale_input, const Tensor& mean_input, const Tensor& variance_input, - T epsilon, Tensor* x_backprop_output, + U epsilon, Tensor* x_backprop_output, Tensor* scale_backprop_output, Tensor* offset_backprop_output, TensorFormat tensor_format) { OP_REQUIRES(context, tensor_format == FORMAT_NHWC, @@ -147,12 +142,12 @@ struct FusedBatchNormGrad { typename TTypes::ConstTensor y_backprop( y_backprop_input.tensor()); typename TTypes::ConstTensor x(x_input.tensor()); - typename TTypes::ConstVec scale(scale_input.vec()); - typename TTypes::ConstVec mean(mean_input.vec()); - typename TTypes::ConstVec variance(variance_input.vec()); + typename TTypes::ConstVec scale(scale_input.vec()); + typename TTypes::ConstVec mean(mean_input.vec()); + typename TTypes::ConstVec variance(variance_input.vec()); typename TTypes::Tensor x_backprop(x_backprop_output->tensor()); - typename TTypes::Vec scale_backprop(scale_backprop_output->vec()); - typename TTypes::Vec offset_backprop(offset_backprop_output->vec()); + typename TTypes::Vec scale_backprop(scale_backprop_output->vec()); + typename TTypes::Vec offset_backprop(offset_backprop_output->vec()); // Note: the following formulas are used to compute the gradients for // back propagation. @@ -181,8 +176,8 @@ struct FusedBatchNormGrad { bcast_spec.set(0, rest_size); #endif - auto x_rest_by_depth = x.reshape(rest_by_depth); - T rest_size_inv = static_cast(1.0f / static_cast(rest_size)); + auto x_rest_by_depth = x.reshape(rest_by_depth).template cast(); + U rest_size_inv = static_cast(1.0f / static_cast(rest_size)); auto x_mean_rest_by_depth = mean.reshape(one_by_depth).broadcast(bcast_spec); @@ -192,7 +187,8 @@ struct FusedBatchNormGrad { coef0.eval().reshape(one_by_depth).broadcast(bcast_spec); auto x_scaled = x_centered * coef0_rest_by_depth; - auto y_backprop_rest_by_depth = y_backprop.eval().reshape(rest_by_depth); + auto y_backprop_rest_by_depth = + y_backprop.eval().reshape(rest_by_depth).template cast(); scale_backprop.device(d) = (y_backprop_rest_by_depth * x_scaled).sum(reduce_dims); auto y_backprop_sum = y_backprop_rest_by_depth.sum(reduce_dims); @@ -214,7 +210,7 @@ struct FusedBatchNormGrad { .reshape(one_by_depth) .broadcast(bcast_spec); x_backprop.reshape(rest_by_depth).device(d) = - coef1 * (y_backprop_centered - x_centered * coef2); + (coef1 * (y_backprop_centered - x_centered * coef2)).template cast(); } }; @@ -689,6 +685,18 @@ REGISTER_KERNEL_BUILDER(Name("FusedBatchNormGradV2") .TypeConstraint("U"), FusedBatchNormGradOp); +REGISTER_KERNEL_BUILDER(Name("FusedBatchNormV2") + .Device(DEVICE_CPU) + .TypeConstraint("T") + .TypeConstraint("U"), + FusedBatchNormOp); + +REGISTER_KERNEL_BUILDER(Name("FusedBatchNormGradV2") + .Device(DEVICE_CPU) + .TypeConstraint("T") + .TypeConstraint("U"), + FusedBatchNormGradOp); + #if GOOGLE_CUDA REGISTER_KERNEL_BUILDER( diff --git a/tensorflow/core/kernels/fused_batch_norm_op.h b/tensorflow/core/kernels/fused_batch_norm_op.h index 38b24d7011..3af104bf95 100644 --- a/tensorflow/core/kernels/fused_batch_norm_op.h +++ b/tensorflow/core/kernels/fused_batch_norm_op.h @@ -92,26 +92,28 @@ struct FusedBatchNormFreezeGrad { // offset_backprop = sum(y_backprop) // scale_backprop = y_backprop * ((x - pop_mean) * rsqrt(pop_var + epsilon)) // x_backprop = y_backprop * (scale * rsqrt(pop_var + epsilon)) - offset_backprop.device(d) = y_backprop.reshape(rest_by_depth) - .template cast() - .sum(reduction_axis); + + auto y_backprop_rest_by_depth = + y_backprop.reshape(rest_by_depth).template cast(); + auto input_rest_by_depth = input.reshape(rest_by_depth).template cast(); + + offset_backprop.device(d) = y_backprop_rest_by_depth.sum(reduction_axis); // scratch1 = rsqrt(pop_var + epsilon) scratch1.device(d) = (pop_var + pop_var.constant(epsilon)).rsqrt(); // scratch2 = sum(y_backprop * (x - mean)) scratch2.device(d) = - (y_backprop.reshape(rest_by_depth).template cast() * - (input.reshape(rest_by_depth).template cast() - + (y_backprop_rest_by_depth * + (input_rest_by_depth - pop_mean.reshape(one_by_depth).broadcast(rest_by_one))) .sum(reduction_axis); x_backprop.reshape(rest_by_depth).device(d) = - (y_backprop.reshape(rest_by_depth).template cast() * - ((scratch1 * scale) - .eval() - .reshape(one_by_depth) - .broadcast(rest_by_one))) + (y_backprop_rest_by_depth * ((scratch1 * scale) + .eval() + .reshape(one_by_depth) + .broadcast(rest_by_one))) .template cast(); scale_backprop.device(d) = scratch2 * scratch1; } diff --git a/tensorflow/core/kernels/lmdb_reader_op.cc b/tensorflow/core/kernels/lmdb_reader_op.cc index 3bb07301b5..31a427f2c9 100755 --- a/tensorflow/core/kernels/lmdb_reader_op.cc +++ b/tensorflow/core/kernels/lmdb_reader_op.cc @@ -36,7 +36,7 @@ class LMDBReader : public ReaderBase { Status OnWorkStartedLocked() override { MDB_CHECK(mdb_env_create(&mdb_env_)); - int flags = MDB_RDONLY | MDB_NOTLS; + int flags = MDB_RDONLY | MDB_NOTLS | MDB_NOLOCK; // Check if the LMDB filename is actually a file instead of a directory. // If so, set appropriate flags so we can open it. @@ -57,10 +57,13 @@ class LMDBReader : public ReaderBase { if (mdb_env_ != nullptr) { if (mdb_cursor_) { mdb_cursor_close(mdb_cursor_); + mdb_cursor_ = nullptr; } - mdb_txn_abort(mdb_txn_); mdb_dbi_close(mdb_env_, mdb_dbi_); + mdb_txn_abort(mdb_txn_); mdb_env_close(mdb_env_); + mdb_txn_ = nullptr; + mdb_dbi_ = 0; mdb_env_ = nullptr; } return Status::OK(); diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc index e2cf605811..157ce106ce 100644 --- a/tensorflow/core/kernels/maxpooling_op.cc +++ b/tensorflow/core/kernels/maxpooling_op.cc @@ -20,7 +20,6 @@ limitations under the License. #include "tensorflow/core/kernels/maxpooling_op.h" #include -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" @@ -34,9 +33,11 @@ limitations under the License. #include "tensorflow/core/kernels/pooling_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/util/env_var.h" #include "tensorflow/core/util/padding.h" #include "tensorflow/core/util/tensor_format.h" #include "tensorflow/core/util/use_cudnn.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #if GOOGLE_CUDA #include "tensorflow/core/kernels/maxpooling_op_gpu.h" @@ -358,6 +359,7 @@ class MaxPoolingGradOp : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); use_dnn_ = CanUseCudnn(); + ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); } void Compute(OpKernelContext* context) override { @@ -405,7 +407,7 @@ class MaxPoolingGradOp : public OpKernel { DnnPoolingGradOp::Compute( context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize, stride, padding_, data_format_, &tensor_in, &tensor_out, out_backprop, - output_shape); + output_shape, propagate_nans_); } else { CHECK(data_format_ == FORMAT_NHWC) << "Non-Cudnn MaxPoolGrad only supports NHWC format"; @@ -420,6 +422,7 @@ class MaxPoolingGradOp : public OpKernel { Padding padding_; TensorFormat data_format_; bool use_dnn_; + bool propagate_nans_; }; #endif // GOOGLE_CUDA @@ -884,6 +887,8 @@ class MaxPoolingWithArgmaxOp : public OpKernel { OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1, errors::Unimplemented( "Pooling is not yet supported on the batch dimension.")); + + ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); } void Compute(OpKernelContext* context) override { @@ -902,14 +907,15 @@ class MaxPoolingWithArgmaxOp : public OpKernel { Tensor* argmax = nullptr; OP_REQUIRES_OK(context, context->allocate_output(1, out_shape, &argmax)); - LaunchMaxPoolingWithArgmax::launch(context, params, tensor_in, - output, argmax); + LaunchMaxPoolingWithArgmax::launch( + context, params, tensor_in, output, argmax, propagate_nans_); } private: std::vector ksize_; std::vector stride_; Padding padding_; + bool propagate_nans_; }; template @@ -1045,6 +1051,8 @@ class MaxPoolingNoMaskOp : public OpKernel { errors::Unimplemented( "Pooling is not yet supported on the batch dimension.")); use_dnn_ = CanUseCudnn(); + + ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); } void Compute(OpKernelContext* context) override { @@ -1068,9 +1076,10 @@ class MaxPoolingNoMaskOp : public OpKernel { // These is_int8x4 checks avoid linker errors for missing qint8 kernels. if (!is_int8x4 && use_dnn_ && data_format_ == FORMAT_NCHW) { - DnnPoolingOp::Compute( - context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize_, - stride_, padding_, data_format_, tensor_in, out_shape); + DnnPoolingOp::Compute(context, + perftools::gputools::dnn::PoolingMode::kMaximum, + ksize_, stride_, padding_, data_format_, + tensor_in, out_shape, propagate_nans_); } else { Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output)); @@ -1079,7 +1088,7 @@ class MaxPoolingNoMaskOp : public OpKernel { tensor_in, output); } else if (data_format_ == FORMAT_NHWC) { LaunchMaxPoolingNoMask::launch(context, params, tensor_in, - output); + output, propagate_nans_); } else { LOG(FATAL) << "MaxPool currently only supports the following (layout, " "type) combinations: (NHWC, non-qint8), " @@ -1098,6 +1107,7 @@ class MaxPoolingNoMaskOp : public OpKernel { Padding padding_; TensorFormat data_format_; bool use_dnn_; + bool propagate_nans_; }; template @@ -1127,6 +1137,7 @@ class MaxPoolingNoMaskV2Op : public OpKernel { } OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); use_dnn_ = CanUseCudnn(); + ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); } void Compute(OpKernelContext* context) override { @@ -1168,16 +1179,17 @@ class MaxPoolingNoMaskV2Op : public OpKernel { ShapeFromFormat(data_format_, params.tensor_in_batch, params.out_height, params.out_width, params.depth); if (use_dnn_ && data_format_ == FORMAT_NCHW) { - DnnPoolingOp::Compute( - context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize, - stride, padding_, data_format_, tensor_in, out_shape); + DnnPoolingOp::Compute(context, + perftools::gputools::dnn::PoolingMode::kMaximum, + ksize, stride, padding_, data_format_, tensor_in, + out_shape, propagate_nans_); } else { CHECK(data_format_ == FORMAT_NHWC) << "Non-Cudnn MaxPool only supports NHWC format"; Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output)); LaunchMaxPoolingNoMask::launch(context, params, tensor_in, - output); + output, propagate_nans_); } } @@ -1187,18 +1199,20 @@ class MaxPoolingNoMaskV2Op : public OpKernel { Padding padding_; TensorFormat data_format_; bool use_dnn_; + bool propagate_nans_; }; template struct LaunchMaxPoolingNoMask { static void launch(OpKernelContext* context, const PoolParameters& params, - const Tensor& input, Tensor* output) { + const Tensor& input, Tensor* output, bool propagate_nans) { bool status = functor::MaxPoolForwardWithOptionalArgmax()( input.flat().data(), params.tensor_in_batch, params.tensor_in_rows, params.tensor_in_cols, params.depth, params.out_height, params.out_width, params.window_rows, params.window_cols, params.row_stride, params.col_stride, params.pad_rows, params.pad_cols, - output->flat().data(), nullptr, context->eigen_gpu_device()); + output->flat().data(), nullptr, context->eigen_gpu_device(), + propagate_nans); if (!status) { context->SetStatus( errors::Internal("Failed launching MaxPoolForwardNoMask")); @@ -1209,7 +1223,8 @@ struct LaunchMaxPoolingNoMask { template struct LaunchMaxPoolingWithArgmax { static void launch(OpKernelContext* context, const PoolParameters& params, - const Tensor& input, Tensor* output, Tensor* argmax) { + const Tensor& input, Tensor* output, Tensor* argmax, + bool propagate_nans) { bool status = functor::MaxPoolForwardWithOptionalArgmax()( input.flat().data(), params.tensor_in_batch, params.tensor_in_rows, params.tensor_in_cols, params.depth, params.out_height, @@ -1217,7 +1232,7 @@ struct LaunchMaxPoolingWithArgmax { params.row_stride, params.col_stride, params.pad_rows, params.pad_cols, output->flat().data(), reinterpret_cast(argmax->flat().data()), - context->eigen_gpu_device()); + context->eigen_gpu_device(), propagate_nans); if (!status) { context->SetStatus( errors::Internal("Failed launching MaxPoolForwardWithArgmax")); diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc index 26f5274804..d96b844383 100644 --- a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc +++ b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc @@ -29,6 +29,15 @@ limitations under the License. namespace tensorflow { namespace { +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool IsGreaterThan(dtype a, dtype b) { + if (propagate_nans) { + return !(a <= b); + } else { + return a > b; + } +} + // This is Yangqing's custom kernel for the maxpooling operation. There are // three functions: MaxPoolForwardNCHW and MaxPoolForwardNHWC are the two // forward functions, dealing with the forward case. MaxPoolBackward is the @@ -51,7 +60,7 @@ namespace { // const int output_size = batch * channels * pooled_height * pooled_width; // MaxPoolForwardNCHW<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, // kThreadsPerBlock, 0, cuda_stream>>>(...); -template +template __global__ void MaxPoolForwardNCHW(const int nthreads, const dtype* bottom_data, const int channels, const int height, const int width, const int pooled_height, @@ -77,7 +86,7 @@ __global__ void MaxPoolForwardNCHW(const int nthreads, const dtype* bottom_data, for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { int idx = c * height * width + h * width + w; - if (bottom_data_n[idx] > maxval) { + if (IsGreaterThan(bottom_data_n[idx], maxval)) { maxidx = idx; maxval = bottom_data_n[idx]; } @@ -126,7 +135,7 @@ __global__ void MaxPoolForwardNoMaskKernel_NCHW_VECT_C( } } -template +template __global__ void MaxPoolForwardNHWC(const int nthreads, const dtype* bottom_data, const int height, const int width, const int channels, const int pooled_height, @@ -153,7 +162,7 @@ __global__ void MaxPoolForwardNHWC(const int nthreads, const dtype* bottom_data, for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { int idx = (h * width + w) * channels + c; - if (bottom_data_n[idx] > maxval) { + if (IsGreaterThan(bottom_data_n[idx], maxval)) { maxidx = idx; maxval = bottom_data_n[idx]; } @@ -390,15 +399,24 @@ bool MaxPoolForwardWithOptionalArgmax::operator()( const int channels, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_t, const int pad_l, T* top_data, - int64* mask, const Eigen::GpuDevice& d) { + int64* mask, const Eigen::GpuDevice& d, bool propagate_nans) { const int kThreadsPerBlock = 1024; const int output_size = batch * channels * pooled_height * pooled_width; - - MaxPoolForwardNHWC<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, - kThreadsPerBlock, 0, d.stream()>>>( - output_size, bottom_data, height, width, channels, pooled_height, - pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, - top_data, mask); + if (propagate_nans) { + MaxPoolForwardNHWC + <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, + kThreadsPerBlock, 0, d.stream()>>> + (output_size, bottom_data, height, width, channels, pooled_height, + pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, + top_data, mask); + } else { + MaxPoolForwardNHWC + <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, + kThreadsPerBlock, 0, d.stream()>>> + (output_size, bottom_data, height, width, channels, pooled_height, + pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, + top_data, mask); + } return d.ok(); } diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.h b/tensorflow/core/kernels/maxpooling_op_gpu.h index 34203797cf..38ebb34248 100644 --- a/tensorflow/core/kernels/maxpooling_op_gpu.h +++ b/tensorflow/core/kernels/maxpooling_op_gpu.h @@ -39,7 +39,7 @@ struct MaxPoolForwardWithOptionalArgmax { const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_t, const int pad_l, T* top_data, int64* mask, - const Eigen::GpuDevice& d); + const Eigen::GpuDevice& d, bool propagate_nans); }; struct MaxPoolForwardNoMask_NCHW_VECT_C { diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc index 9080bf7be8..f291281108 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc @@ -45,12 +45,12 @@ limitations under the License. #ifdef INTEL_MKL_DNN #include "mkldnn.hpp" -using mkldnn::prop_kind; using mkldnn::stream; +using mkldnn::prop_kind; +using mkldnn::convolution_forward; using mkldnn::convolution_backward_weights; using mkldnn::convolution_direct; -using mkldnn::convolution_forward; #endif @@ -463,13 +463,12 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel { // Generate input shapes. TensorShape filter_shape; - OP_REQUIRES( - context, TensorShapeUtils::IsVector(filter_tensor.shape()), - errors::InvalidArgument( + OP_REQUIRES(context, TensorShapeUtils::IsVector(filter_tensor.shape()), + errors::InvalidArgument( "Conv2DBackpropFilter: filter_sizes input must be 1-dim, not ", filter_tensor.dims())); OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape( - filter_tensor.vec(), &filter_shape)); + filter_tensor.vec(), &filter_shape)); TensorShape input_shape = input_tensor.shape(); TensorShape obp_shape = obp_tensor.shape(); @@ -481,26 +480,27 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel { // Get forward convolution parameters. MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); - conv_utl.GetConvFwdSizesInMklOrder( - input_shape, filter_shape, &fwd_input_dims, &fwd_filter_dims, - &strides, &fwd_output_dims_tf_order, &fwd_output_dims, &padding_l, - &padding_r); + conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape, + &fwd_input_dims, &fwd_filter_dims, + &strides, + &fwd_output_dims_tf_order, + &fwd_output_dims, + &padding_l, &padding_r); if (!context->status().ok()) return; // Create Convolution forward descriptor since Convolution backward // API needs it. For that, we first need to create input, filter // and output memory descriptors. auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_); - auto fwd_src_md = - memory::desc(fwd_input_dims, MklDnnType(), mkl_data_format); - auto fwd_filter_md = - memory::desc(fwd_filter_dims, MklDnnType(), memory::format::hwio); - auto fwd_out_md = - memory::desc(fwd_output_dims, MklDnnType(), mkl_data_format); - auto fwd_desc = convolution_forward::desc( - prop_kind::forward, convolution_direct, fwd_src_md, fwd_filter_md, - fwd_out_md, strides, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)); + auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType(), + mkl_data_format); + auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType(), + memory::format::hwio); + auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType(), + mkl_data_format); + auto fwd_desc = convolution_forward::desc(prop_kind::forward, + convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md, + strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_)); auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine); // Allocate output tensor and shape @@ -537,22 +537,23 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel { output.SetOpMemDesc(bwd_output_dims, memory::format::any); // Create convolution backward weights primitive. - auto bwd_desc = convolution_backward_weights::desc( - convolution_direct, input.GetOpMemDesc(), output.GetOpMemDesc(), - outbackprop.GetOpMemDesc(), strides, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)); + auto bwd_desc = convolution_backward_weights::desc(convolution_direct, + input.GetOpMemDesc(), output.GetOpMemDesc(), + outbackprop.GetOpMemDesc(), strides, padding_l, + padding_r, TFPaddingToMklDnnPadding(padding_)); - auto bwd_pd = convolution_backward_weights::primitive_desc( - bwd_desc, cpu_engine, fwd_pd); + auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc, + cpu_engine, + fwd_pd); PrepareAndExecutePrimitive(bwd_pd, &input, &outbackprop, &output); - } catch (mkldnn::error& e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + ", in file " + - string(__FILE__) + ":" + std::to_string(__LINE__); - OP_REQUIRES_OK( - context, - errors::Aborted("Operation received an exception:", error_msg)); + } catch (mkldnn::error &e) { + string error_msg = "Status: " + std::to_string(e.status) + + ", message: " + string(e.message) + + ", in file " + string(__FILE__) + ":" + + std::to_string(__LINE__); + OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:", + error_msg)); } } @@ -563,8 +564,9 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel { // Prepare and execute net - checks for input and output reorders. void PrepareAndExecutePrimitive( - const convolution_backward_weights::primitive_desc& conv_pd, - MklDnnData* input, MklDnnData* obp, MklDnnData* output) { + const convolution_backward_weights::primitive_desc& conv_pd, + MklDnnData* input, MklDnnData* obp, + MklDnnData* output) { // Create reorders between user layout and MKL layout if it is needed and // add it to the net before convolution. std::vector net; @@ -575,10 +577,10 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel { // output side, we will prepare reorder primitive in case output // reorder to user memory is required. bool output_reorder_required = output->PrepareReorderToUserMemIfReq( - conv_pd.diff_weights_primitive_desc()); + conv_pd.diff_weights_primitive_desc()); - net.push_back(convolution_backward_weights( - conv_pd, input->GetOpMem(), obp->GetOpMem(), output->GetOpMem())); + net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(), + obp->GetOpMem(), output->GetOpMem())); // Insert reorder primitive in the net for output reorder if reorder is // required. diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc index 4b6bf92e42..4a47d0463e 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc @@ -23,8 +23,6 @@ limitations under the License. #define EIGEN_USE_THREADS #include #include -#include "mkl_dnn.h" -#include "mkl_dnn_types.h" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" @@ -43,16 +41,18 @@ limitations under the License. #include "tensorflow/core/util/tensor_format.h" #include "tensorflow/core/util/use_cudnn.h" #include "tensorflow/core/util/work_sharder.h" +#include "mkl_dnn.h" +#include "mkl_dnn_types.h" #ifdef INTEL_MKL_DNN #include "mkldnn.hpp" -using mkldnn::prop_kind; using mkldnn::stream; +using mkldnn::prop_kind; -using mkldnn::convolution_backward_data; -using mkldnn::convolution_direct; using mkldnn::convolution_forward; +using mkldnn::convolution_direct; +using mkldnn::convolution_backward_data; #endif namespace tensorflow { @@ -397,13 +397,12 @@ class MklConv2DCustomBackpropInputOp : public OpKernel { // Generate input shape. TensorShape input_shape; - OP_REQUIRES( - context, TensorShapeUtils::IsVector(input_tensor.shape()), - errors::InvalidArgument( + OP_REQUIRES(context, TensorShapeUtils::IsVector(input_tensor.shape()), + errors::InvalidArgument( "Conv2DBackpropInput: input_sizes input must be 1-dim, not ", input_tensor.dims())); OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape( - input_tensor.vec(), &input_shape)); + input_tensor.vec(), &input_shape)); TensorShape filter_shape = filter_tensor.shape(); TensorShape obp_shape = obp_tensor.shape(); @@ -415,26 +414,27 @@ class MklConv2DCustomBackpropInputOp : public OpKernel { // Get forward convolution parameters. MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); - conv_utl.GetConvFwdSizesInMklOrder( - input_shape, filter_shape, &fwd_input_dims, &fwd_filter_dims, - &strides, &fwd_output_dims_tf_order, &fwd_output_dims, &padding_l, - &padding_r); + conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape, + &fwd_input_dims, &fwd_filter_dims, + &strides, + &fwd_output_dims_tf_order, + &fwd_output_dims, + &padding_l, &padding_r); if (!context->status().ok()) return; // Create Convolution forward descriptor since Convolution backward // API needs it. For that, we first need to create input, filter // and output memory descriptors. auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_); - auto fwd_src_md = - memory::desc(fwd_input_dims, MklDnnType(), mkl_data_format); - auto fwd_filter_md = - memory::desc(fwd_filter_dims, MklDnnType(), memory::format::hwio); - auto fwd_out_md = - memory::desc(fwd_output_dims, MklDnnType(), mkl_data_format); - auto fwd_desc = convolution_forward::desc( - prop_kind::forward, convolution_direct, fwd_src_md, fwd_filter_md, - fwd_out_md, strides, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)); + auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType(), + mkl_data_format); + auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType(), + memory::format::hwio); + auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType(), + mkl_data_format); + auto fwd_desc = convolution_forward::desc(prop_kind::forward, + convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md, + strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_)); auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine); // Allocate output tensor and shape @@ -475,22 +475,23 @@ class MklConv2DCustomBackpropInputOp : public OpKernel { output.SetOpMemDesc(bwd_output_dims, memory::format::any); // Create convolution backward data primitive. - auto bwd_desc = convolution_backward_data::desc( - convolution_direct, output.GetOpMemDesc(), filter.GetOpMemDesc(), - outbackprop.GetOpMemDesc(), strides, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)); + auto bwd_desc = convolution_backward_data::desc(convolution_direct, + output.GetOpMemDesc(), filter.GetOpMemDesc(), + outbackprop.GetOpMemDesc(), strides, padding_l, + padding_r, TFPaddingToMklDnnPadding(padding_)); - auto bwd_pd = convolution_backward_data::primitive_desc( - bwd_desc, cpu_engine, fwd_pd); + auto bwd_pd = convolution_backward_data::primitive_desc(bwd_desc, + cpu_engine, + fwd_pd); PrepareAndExecutePrimitive(bwd_pd, &filter, &outbackprop, &output); - } catch (mkldnn::error& e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + ", in file " + - string(__FILE__) + ":" + std::to_string(__LINE__); - OP_REQUIRES_OK( - context, - errors::Aborted("Operation received an exception:", error_msg)); + } catch (mkldnn::error &e) { + string error_msg = "Status: " + std::to_string(e.status) + + ", message: " + string(e.message) + + ", in file " + string(__FILE__) + ":" + + std::to_string(__LINE__); + OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:", + error_msg)); } } @@ -501,8 +502,9 @@ class MklConv2DCustomBackpropInputOp : public OpKernel { // Prepare and execute net - checks for input and output reorders. void PrepareAndExecutePrimitive( - const convolution_backward_data::primitive_desc& conv_pd, - MklDnnData* filter, MklDnnData* obp, MklDnnData* output) { + const convolution_backward_data::primitive_desc& conv_pd, + MklDnnData* filter, MklDnnData* obp, + MklDnnData* output) { // Create reorders between user layout and MKL layout if it is needed and // add it to the net before convolution. std::vector net; @@ -512,11 +514,11 @@ class MklConv2DCustomBackpropInputOp : public OpKernel { // Memory for output of convolution. Since we may need reorder on the // output side, we will prepare reorder primitive in case output // reorder to user memory is required. - bool output_reorder_required = - output->PrepareReorderToUserMemIfReq(conv_pd.diff_src_primitive_desc()); + bool output_reorder_required = output->PrepareReorderToUserMemIfReq( + conv_pd.diff_src_primitive_desc()); - net.push_back(convolution_backward_data( - conv_pd, obp->GetOpMem(), filter->GetOpMem(), output->GetOpMem())); + net.push_back(convolution_backward_data(conv_pd, obp->GetOpMem(), + filter->GetOpMem(), output->GetOpMem())); // Insert reorder primitive in the net for output reorder if reorder is // required. diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 369f632fb4..a9872b8d6d 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -18,8 +18,8 @@ limitations under the License. #include #include -#include #include +#include #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" @@ -46,11 +46,11 @@ limitations under the License. #ifdef INTEL_MKL_DNN #include "mkldnn.hpp" -using mkldnn::prop_kind; using mkldnn::stream; +using mkldnn::prop_kind; -using mkldnn::convolution_direct; using mkldnn::convolution_forward; +using mkldnn::convolution_direct; #endif namespace tensorflow { @@ -523,16 +523,19 @@ class MklConv2DOp : public OpKernel { // Get shapes of input tensors in MKL-DNN order MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); - conv_utl.GetConvFwdSizesInMklOrder( - src_tensor.shape(), filter_tensor.shape(), &src_dims, &filter_dims, - &strides, &output_dims_tf_order, &output_dims_mkl_order, &padding_l, - &padding_r); + conv_utl.GetConvFwdSizesInMklOrder(src_tensor.shape(), + filter_tensor.shape(), + &src_dims, &filter_dims, &strides, + &output_dims_tf_order, + &output_dims_mkl_order, &padding_l, + &padding_r); if (!context->status().ok()) return; // Check for corner case - if there is nothing to compute, return. - TensorShape tf_output_shape( - {output_dims_tf_order[0], output_dims_tf_order[1], - output_dims_tf_order[2], output_dims_tf_order[3]}); + TensorShape tf_output_shape({output_dims_tf_order[0], + output_dims_tf_order[1], + output_dims_tf_order[2], + output_dims_tf_order[3]}); Tensor* output_tensor = nullptr; MklShape mkl_output_mkl_shape; mkl_output_mkl_shape.SetMklTensor(false); @@ -569,13 +572,13 @@ class MklConv2DOp : public OpKernel { // the layout is Tensorflow's layout (NHWC or NCHW depending on data // format). src.SetUsrMem(src_dims, TFDataFormatToMklDnnDataFormat(data_format_), - const_cast( - static_cast(src_tensor.flat().data()))); + const_cast(static_cast( + src_tensor.flat().data()))); // Although filter shape (filter_dims) required is in MKL-DNN order, // the layout is Tensorflow's layout (HWIO). filter.SetUsrMem(filter_dims, memory::format::hwio, const_cast(static_cast( - filter_tensor.flat().data()))); + filter_tensor.flat().data()))); // Although output shape (output_dims) required is in MKL-DNN order, // layout is Tensorflow's layout (NHWC or NCHW depending on data format). output.SetUsrMem(output_dims_mkl_order, @@ -595,36 +598,36 @@ class MklConv2DOp : public OpKernel { const Tensor& bias_tensor = MklGetInput(context, 2); bias.SetUsrMem(bias_size, memory::format::x, const_cast(static_cast( - bias_tensor.flat().data()))); + bias_tensor.flat().data()))); bias.SetOpMemDesc(bias_size, memory::format::any); // Create convolution primitive with Bias. - auto conv_desc = convolution_forward::desc( - prop_kind::forward, convolution_direct, src.GetOpMemDesc(), - filter.GetOpMemDesc(), bias.GetOpMemDesc(), output.GetOpMemDesc(), - strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_)); + auto conv_desc = convolution_forward::desc(prop_kind::forward, + convolution_direct, src.GetOpMemDesc(), filter.GetOpMemDesc(), + bias.GetOpMemDesc(), output.GetOpMemDesc(), strides, + padding_l, padding_r, TFPaddingToMklDnnPadding(padding_)); - auto conv_prim_desc = - convolution_forward::primitive_desc(conv_desc, cpu_engine); + auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc, + cpu_engine); PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output); } else { // Create convolution primitive without Bias. - auto conv_desc = convolution_forward::desc( - prop_kind::forward, convolution_direct, src.GetOpMemDesc(), - filter.GetOpMemDesc(), output.GetOpMemDesc(), strides, padding_l, - padding_r, TFPaddingToMklDnnPadding(padding_)); + auto conv_desc = convolution_forward::desc(prop_kind::forward, + convolution_direct, src.GetOpMemDesc(), filter.GetOpMemDesc(), + output.GetOpMemDesc(), strides, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); - auto conv_prim_desc = - convolution_forward::primitive_desc(conv_desc, cpu_engine); + auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc, + cpu_engine); PrepareAndExecuteNet(conv_prim_desc, &src, &filter, nullptr, &output); } - } catch (mkldnn::error& e) { + } catch (mkldnn::error &e) { string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + std::string(e.message) + ", in file " + - std::string(__FILE__) + ":" + std::to_string(__LINE__); - OP_REQUIRES_OK( - context, - errors::Aborted("Operation received an exception:", error_msg)); + ", message: " + std::string(e.message) + + ", in file " + std::string(__FILE__) + ":" + + std::to_string(__LINE__); + OP_REQUIRES_OK(context, + errors::Aborted("Operation received an exception:", error_msg)); } } @@ -635,9 +638,9 @@ class MklConv2DOp : public OpKernel { // Prepare and execute net - checks for input and output reorders. void PrepareAndExecuteNet( - const convolution_forward::primitive_desc& conv_prim_desc, - MklDnnData* src, MklDnnData* filter, MklDnnData* bias, - MklDnnData* output) { + const convolution_forward::primitive_desc& conv_prim_desc, + MklDnnData* src, MklDnnData* filter, + MklDnnData* bias, MklDnnData* output) { // Create reorders between user layout and MKL layout if it is needed and // add it to the net before convolution. std::vector net; @@ -648,19 +651,18 @@ class MklConv2DOp : public OpKernel { // output side, we will prepare reorder primitive in case output // reorder to user memory is required. bool output_reorder_required = output->PrepareReorderToUserMemIfReq( - conv_prim_desc.dst_primitive_desc()); + conv_prim_desc.dst_primitive_desc()); // Create convolution primitive and add it to net. if (bias) { CHECK_EQ(biasEnabled, true); net.push_back(convolution_forward(conv_prim_desc, src->GetOpMem(), - filter->GetOpMem(), bias->GetOpMem(), - output->GetOpMem())); + filter->GetOpMem(), bias->GetOpMem(), + output->GetOpMem())); } else { CHECK_EQ(biasEnabled, false); net.push_back(convolution_forward(conv_prim_desc, src->GetOpMem(), - filter->GetOpMem(), - output->GetOpMem())); + filter->GetOpMem(), output->GetOpMem())); } // Insert reorder primitive in the net for output reorder if reorder is diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h index e29af19ca9..f0cb37f8a4 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.h +++ b/tensorflow/core/kernels/mkl_conv_ops.h @@ -16,8 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_ #define TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_ -#include #include +#include #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" @@ -26,8 +26,8 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_slice.h" #include "tensorflow/core/kernels/bounds_check.h" -#include "tensorflow/core/kernels/conv_grad_ops.h" #include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/kernels/conv_grad_ops.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/lib/strings/numbers.h" @@ -49,15 +49,15 @@ namespace tensorflow { class MklDnnConvUtil { protected: - OpKernelContext *context_; // We don't own this. + OpKernelContext* context_; // We don't own this. std::vector strides_; Padding padding_; TensorFormat data_format_; public: - MklDnnConvUtil(OpKernelContext *context, const std::vector &strides, - Padding pad, TensorFormat fm) - : context_(context), strides_(strides), padding_(pad), data_format_(fm) {} + MklDnnConvUtil(OpKernelContext* context, const std::vector& strides, + Padding pad, TensorFormat fm) : context_(context), + strides_(strides), padding_(pad), data_format_(fm) {} virtual ~MklDnnConvUtil() { context_ = nullptr; } @@ -75,14 +75,14 @@ class MklDnnConvUtil { // requires input in NCHW format. Function does not return anything. // But errors arising from sanity checks are returned in context's // status. - virtual inline void GetInputSizeInMklOrder(const TensorShape &input_shape, - memory::dims *input_dims) { -#define CHECK_BOUNDS(val, err_msg) \ - do { \ - OP_REQUIRES(context_, \ - FastBoundsCheck(val, std::numeric_limits::max()), \ - errors::InvalidArgument(err_msg)); \ - } while (0) + virtual inline void + GetInputSizeInMklOrder(const TensorShape& input_shape, + memory::dims *input_dims) { + #define CHECK_BOUNDS(val, err_msg) do { \ + OP_REQUIRES(context_, FastBoundsCheck(val, \ + std::numeric_limits::max()), \ + errors::InvalidArgument(err_msg)); \ + }while(0) CHECK_NOTNULL(input_dims); @@ -105,7 +105,7 @@ class MklDnnConvUtil { CHECK_BOUNDS(input_batch_raw, "Input batch too large"); int input_batch = static_cast(input_batch_raw); -#undef CHECK_BOUNDS + #undef CHECK_BOUNDS // MKL-DNN always requires input in NCHW format. *input_dims = {input_batch, input_depth, input_rows, input_cols}; @@ -125,9 +125,10 @@ class MklDnnConvUtil { // forward gets actual tensor as input). // // TODO(nhasabni): Add similar function for input and filter in MklShape. - virtual inline void GetFilterSizeInMklOrder(const TensorShape &input_shape, - const TensorShape &filter_shape, - memory::dims *filter_dims) { + virtual inline void + GetFilterSizeInMklOrder(const TensorShape& input_shape, + const TensorShape& filter_shape, + memory::dims *filter_dims) { CHECK_NOTNULL(filter_dims); OP_REQUIRES(context_, filter_shape.dims() == 4, @@ -135,18 +136,17 @@ class MklDnnConvUtil { filter_shape.DebugString())); for (int i = 0; i < 3; i++) { - OP_REQUIRES(context_, - FastBoundsCheck(filter_shape.dim_size(i), - std::numeric_limits::max()), - errors::InvalidArgument("filter too large")); + OP_REQUIRES(context_, FastBoundsCheck(filter_shape.dim_size(i), + std::numeric_limits::max()), + errors::InvalidArgument("filter too large")); } int input_depth = GetTensorDim(input_shape, data_format_, 'C'); - OP_REQUIRES(context_, input_depth == filter_shape.dim_size(2), - errors::InvalidArgument( - "input and filter must have the same depth: ", input_depth, - " vs ", filter_shape.dim_size(2))); + OP_REQUIRES( + context_, input_depth == filter_shape.dim_size(2), + errors::InvalidArgument("input and filter must have the same depth: ", + input_depth, " vs ", filter_shape.dim_size(2))); // TF filter is always in (rows, cols, in_depth, out_depth) order. int filter_rows = static_cast(filter_shape.dim_size(0)); @@ -163,25 +163,25 @@ class MklDnnConvUtil { // requires filter in OIHW format. Function does not return anything. // But errors arising from sanity checks are returned in context's // status. - virtual inline void GetFilterSizeInMklOrder(size_t src_index, - size_t filter_index, - memory::dims *filter_dims) { + virtual inline void + GetFilterSizeInMklOrder(size_t src_index, size_t filter_index, + memory::dims *filter_dims) { CHECK_NOTNULL(filter_dims); - const Tensor &input = MklGetInput(context_, src_index); - const Tensor &filter = MklGetInput(context_, filter_index); + const Tensor& input = MklGetInput(context_, src_index); + const Tensor& filter = MklGetInput(context_, filter_index); GetFilterSizeInMklOrder(input.shape(), filter.shape(), filter_dims); } // Calculate Bias size for 2D Convolution. Function does not return // anything, but sets error in context status. - virtual inline void GetBiasSizeInMklOrder(size_t bias_index, - memory::dims *bias_dims) { - const Tensor &bias = MklGetInput(context_, bias_index); + virtual inline void + GetBiasSizeInMklOrder(size_t bias_index, memory::dims *bias_dims) { + const Tensor& bias = MklGetInput(context_, bias_index); OP_REQUIRES(context_, bias.dims() == 1, errors::InvalidArgument("bias must be 1-dimensional: ", bias.shape().DebugString())); - *bias_dims = {static_cast(bias.dim_size(0))}; + *bias_dims = { static_cast(bias.dim_size(0)) }; } // Function to calculate output and padding size for 2D convolution. @@ -193,11 +193,13 @@ class MklDnnConvUtil { // status is returned via context status. // // TODO(nhasabni): Add similar function for input and filter in MklShape. - virtual inline void GetOutputAndPadSizeInMklOrder( - const TensorShape &input_shape, const TensorShape &filter_shape, - const memory::dims &strides, memory::dims *output_dims_tf_order, - memory::dims *output_dims_mkl_order, memory::dims *pad_l, - memory::dims *pad_r) { + virtual inline void + GetOutputAndPadSizeInMklOrder(const TensorShape& input_shape, + const TensorShape& filter_shape, + const memory::dims& strides, + memory::dims *output_dims_tf_order, + memory::dims *output_dims_mkl_order, + memory::dims *pad_l, memory::dims *pad_r) { CHECK_NOTNULL(output_dims_tf_order); CHECK_NOTNULL(output_dims_mkl_order); CHECK_NOTNULL(pad_l); @@ -223,21 +225,21 @@ class MklDnnConvUtil { int64 out_rows = 0, out_cols = 0; int64 pad_top = 0, pad_bottom = 0, pad_left, pad_right; - OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( - input_rows, filter_rows, stride_rows, padding_, - &out_rows, &pad_top, &pad_bottom)); - OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( - input_cols, filter_cols, stride_cols, padding_, - &out_cols, &pad_left, &pad_right)); + OP_REQUIRES_OK(context_, + GetWindowedOutputSizeVerbose(input_rows, filter_rows, stride_rows, + padding_, &out_rows, &pad_top, &pad_bottom)); + OP_REQUIRES_OK(context_, + GetWindowedOutputSizeVerbose(input_cols, filter_cols, stride_cols, + padding_, &out_cols, &pad_left, &pad_right)); // Tensorflow output is in data_format order. (NHWC or NCHW) - TensorShape out_shape = - ShapeFromFormat(data_format_, out_batch, out_rows, out_cols, out_depth); + TensorShape out_shape = ShapeFromFormat(data_format_, out_batch, + out_rows, out_cols, out_depth); *output_dims_tf_order = TFShapeToMklDnnDims(out_shape); // MKL-DNN always needs output in NCHW format. *output_dims_mkl_order = {out_batch, out_depth, static_cast(out_rows), - static_cast(out_cols)}; + static_cast(out_cols)}; // Now handle padding. MKL-DNN uses asymetric padding. *pad_l = {static_cast(pad_top), static_cast(pad_left)}; @@ -248,25 +250,27 @@ class MklDnnConvUtil { // See comment on GetConvOutputAndPadSizeInMklOrder for parameters. // // Function does not return anything, but sets error in context status. - inline void GetOutputAndPadSizeInMklOrder( - size_t src_index, size_t filter_index, const memory::dims &strides, - memory::dims *output_dims_tf_order, memory::dims *output_dims_mkl_order, - memory::dims *pad_l, memory::dims *pad_r) { + inline void + GetOutputAndPadSizeInMklOrder(size_t src_index, size_t filter_index, + const memory::dims& strides, + memory::dims *output_dims_tf_order, + memory::dims *output_dims_mkl_order, + memory::dims *pad_l, memory::dims *pad_r) { CHECK_NOTNULL(output_dims_tf_order); CHECK_NOTNULL(output_dims_mkl_order); CHECK_NOTNULL(pad_l); CHECK_NOTNULL(pad_r); - const Tensor &input = MklGetInput(context_, src_index); - const Tensor &filter = MklGetInput(context_, filter_index); + const Tensor& input = MklGetInput(context_, src_index); + const Tensor& filter = MklGetInput(context_, filter_index); OP_REQUIRES(context_, input.dims() == 4, errors::InvalidArgument("input must be 4-dimensional", - input.shape().DebugString())); + input.shape().DebugString())); - GetOutputAndPadSizeInMklOrder(input.shape(), filter.shape(), strides, - output_dims_tf_order, output_dims_mkl_order, - pad_l, pad_r); + GetOutputAndPadSizeInMklOrder(input.shape(), filter.shape(), + strides, output_dims_tf_order, + output_dims_mkl_order, pad_l, pad_r); } // Wrapper function to calculate input, filter, and output sizes of @@ -275,12 +279,15 @@ class MklDnnConvUtil { // also calculates strides and paddings for 2D Convolution. // // Function does not return anything, but sets error in context status. - inline void GetConvFwdSizesInMklOrder( - const TensorShape &input_shape, const TensorShape &filter_shape, - memory::dims *input_dims, memory::dims *filter_dims, - memory::dims *strides, memory::dims *output_dims_tf_order, - memory::dims *output_dims_mkl_order, memory::dims *pad_l, - memory::dims *pad_r) { + inline void GetConvFwdSizesInMklOrder(const TensorShape& input_shape, + const TensorShape& filter_shape, + memory::dims *input_dims, + memory::dims *filter_dims, + memory::dims *strides, + memory::dims *output_dims_tf_order, + memory::dims *output_dims_mkl_order, + memory::dims *pad_l, + memory::dims *pad_r) { CHECK_NOTNULL(input_dims); CHECK_NOTNULL(filter_dims); CHECK_NOTNULL(strides); @@ -295,7 +302,8 @@ class MklDnnConvUtil { if (!context_->status().ok()) return; GetStridesInMklOrder(strides); GetOutputAndPadSizeInMklOrder(input_shape, filter_shape, *strides, - output_dims_tf_order, output_dims_mkl_order, + output_dims_tf_order, + output_dims_mkl_order, pad_l, pad_r); if (!context_->status().ok()) return; } diff --git a/tensorflow/core/kernels/mkl_tfconv_op.h b/tensorflow/core/kernels/mkl_tfconv_op.h index a240ee44fb..0a5be4fec9 100644 --- a/tensorflow/core/kernels/mkl_tfconv_op.h +++ b/tensorflow/core/kernels/mkl_tfconv_op.h @@ -13,11 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifdef INTEL_MKL - #ifndef TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_ #define TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_ +#ifdef INTEL_MKL + #include #include #include "tensorflow/core/framework/numeric_op.h" @@ -35,6 +35,10 @@ limitations under the License. #include "mkl_dnn_types.h" #include "tensorflow/core/util/mkl_util.h" +#ifdef INTEL_MKL_DNN +using mkldnn::stream; +#endif + namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; @@ -57,6 +61,71 @@ class MklToTfOp : public OpKernel { VLOG(1) << "MKLToTFConversion complete successfully."; } +#ifdef INTEL_MKL_DNN + static void ConvertMklToTf(OpKernel* op_kernel, OpKernelContext* context, + string data_format_str, DataType op_data_type, + bool has_avx512f, uint input_number) { + try { + // Check that input tensor is in MKL format. + const Tensor& input_tensor = MklGetInput(context, input_number); + MklDnnShape input_shape; + GetMklShape(context, input_number, &input_shape); + + // if input is already in Tf format, then copy input tensor to output. + if (!input_shape.IsMklTensor()) { + context->set_output(input_number, input_tensor); + VLOG(1) << "MKLToTFConversion: No conversion needed, " + << "copying input to output"; + return; + } + + // Check that input data type is same as operator data type and that it + // is same as output data type. + DataType input_data_type = op_kernel->input_type(input_number); + DataType output_data_type = op_kernel->output_type(input_number); + CHECK_EQ(op_data_type, input_data_type); + CHECK_EQ(op_data_type, output_data_type); + + auto cpu_engine = engine(engine::cpu, 0); + MklDnnData input(&cpu_engine); + + // Get Mkl layout of input tensor. + auto input_mkl_md = input_shape.GetMklLayout(); + // Get TensorFlow layout of input tensor. Expected output of conversion + // has same layout as Tensorflow layout of input tensor. + auto output_tf_md = input_shape.GetTfLayout(); + auto output_tf_pd = memory::primitive_desc(output_tf_md, cpu_engine); + // Set input Mkl layout as the user layout. + input.SetUsrMem(input_mkl_md, &input_tensor); + + // Allocate output tensor. + TensorShape output_shape = input_shape.GetTfShape(); + Tensor* output_tensor = NULL; + OP_REQUIRES_OK(context, context->allocate_output(input_number, + output_shape, &output_tensor)); + CHECK_NOTNULL(output_tensor); + + // Do we need to reorder Mkl layout into TensorFlow layout? + if (input.IsReorderNeeded(output_tf_pd)) { + // Insert reorder between Mkl layout and TensorFlow layout. + std::vector net; + CHECK_EQ(input.CheckReorderToOpMem(output_tf_pd, output_tensor, &net), + true); + stream(stream::kind::eager).submit(net).wait(); + } else { + // If not, just forward input tensor to output tensor. + CHECK(output_tensor->CopyFrom(input_tensor, output_shape)); + } + } catch (mkldnn::error &e) { + string error_msg = "Status: " + std::to_string(e.status) + + ", message: " + std::string(e.message) + + ", in file " + std::string(__FILE__) + ":" + + std::to_string(__LINE__); + OP_REQUIRES_OK(context, + errors::Aborted("Operation received an exception:", error_msg)); + } + } +#else static void ConvertMklToTf(OpKernel* op_kernel, OpKernelContext* context, string data_format_str, DataType op_data_type, bool has_avx512f, uint input_number) { @@ -91,8 +160,8 @@ class MklToTfOp : public OpKernel { // Allocate output tensor. Tensor* output_tensor = NULL; - OP_REQUIRES_OK(context, - context->allocate_output(input_number, output_shape, &output_tensor)); + OP_REQUIRES_OK(context, context->allocate_output(input_number, + output_shape, &output_tensor)); dnnLayout_t output_layout = static_cast(input_shape.GetTfLayout()); @@ -106,6 +175,7 @@ class MklToTfOp : public OpKernel { output_buffer); VLOG(1) << "MKLToTFConversion complete successfully."; } +#endif private: /// Data format of the operation @@ -132,5 +202,5 @@ class MklToTfOp : public OpKernel { TF_CALL_NUMBER_TYPES(REGISTER_CPU); #undef REGISTER_CPU } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_ #endif // INTEL_MKL +#endif // TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_ diff --git a/tensorflow/core/kernels/pooling_ops_common.cc b/tensorflow/core/kernels/pooling_ops_common.cc index 7dee751c4f..ac90f67ce0 100644 --- a/tensorflow/core/kernels/pooling_ops_common.cc +++ b/tensorflow/core/kernels/pooling_ops_common.cc @@ -143,7 +143,7 @@ void DnnPoolingOp::Compute( perftools::gputools::dnn::PoolingMode pooling_mode, const std::vector& size, const std::vector& stride, Padding padding, TensorFormat data_format, const Tensor& tensor_in, - const TensorShape& tensor_out_shape) { + const TensorShape& tensor_out_shape, bool propagate_nans) { Tensor* tensor_out = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, tensor_out_shape, &tensor_out)); @@ -188,7 +188,8 @@ void DnnPoolingOp::Compute( .set_vertical_stride(params.row_stride) .set_horizontal_stride(params.col_stride) .set_vertical_padding(params.pad_rows) - .set_horizontal_padding(params.pad_cols); + .set_horizontal_padding(params.pad_cols) + .set_propagate_nans(propagate_nans); perftools::gputools::dnn::BatchDescriptor input_desc; input_desc.set_count(params.tensor_in_batch) @@ -237,7 +238,7 @@ void DnnPoolingGradOp::Compute( const std::vector& size, const std::vector& stride, Padding padding, TensorFormat data_format, const Tensor* tensor_in, const Tensor* tensor_out, const Tensor& out_backprop, - const TensorShape& tensor_in_shape) { + const TensorShape& tensor_in_shape, bool propagate_nans) { CHECK((pooling_mode != perftools::gputools::dnn::PoolingMode::kMaximum) || (tensor_in && tensor_out)) << "For MaxPoolGrad, both tensor_in and tensor_out needs to be " @@ -327,7 +328,8 @@ void DnnPoolingGradOp::Compute( .set_vertical_stride(params.row_stride) .set_horizontal_stride(params.col_stride) .set_vertical_padding(params.pad_rows) - .set_horizontal_padding(params.pad_cols); + .set_horizontal_padding(params.pad_cols) + .set_propagate_nans(propagate_nans); perftools::gputools::dnn::BatchDescriptor orig_output_desc; orig_output_desc.set_count(params.tensor_in_batch) diff --git a/tensorflow/core/kernels/pooling_ops_common_gpu.h b/tensorflow/core/kernels/pooling_ops_common_gpu.h index b594f39fad..1458456585 100644 --- a/tensorflow/core/kernels/pooling_ops_common_gpu.h +++ b/tensorflow/core/kernels/pooling_ops_common_gpu.h @@ -44,7 +44,7 @@ class DnnPoolingOp { const std::vector& size, const std::vector& stride, Padding padding, TensorFormat data_format, const Tensor& tensor_in, - const TensorShape& tensor_out_shape); + const TensorShape& tensor_out_shape, bool propagate_nans); }; // A helper class that launch the cudnn pooling backward operations. @@ -60,7 +60,7 @@ class DnnPoolingGradOp { const std::vector& stride, Padding padding, TensorFormat data_format, const Tensor* tensor_in, const Tensor* tensor_out, const Tensor& out_backprop, - const TensorShape& tensor_in_shape); + const TensorShape& tensor_in_shape, bool propagate_nans); }; } // namespace tensorflow diff --git a/tensorflow/core/kernels/quantized_add_op.cc b/tensorflow/core/kernels/quantized_add_op.cc index 8be0c56798..337c8e5c17 100644 --- a/tensorflow/core/kernels/quantized_add_op.cc +++ b/tensorflow/core/kernels/quantized_add_op.cc @@ -489,7 +489,7 @@ class QuantizedAddOp : public OpKernel { // adding zero leaves the result unchanged, and to contain the largest of // the two input values with some room to spare. const float smallest_min = std::min(min_x, min_y); - const float largest_max = std::min(max_x, max_y); + const float largest_max = std::max(max_x, max_y); const float biggest_range = std::max(std::abs(smallest_min), std::abs(largest_max)); const float output_range = (biggest_range * (1 << 14)); diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc index a37c757865..55a8b9c9b6 100644 --- a/tensorflow/core/kernels/random_op.cc +++ b/tensorflow/core/kernels/random_op.cc @@ -577,7 +577,7 @@ struct FillPhiloxRandomKernel { const size_t kGroupSize = Distribution::kResultElementCount; const size_t item_id = item.get_global(0); - const size_t total_item_count = item.get_global_range(0); + const size_t total_item_count = item.get_global_range(); size_t offset = item_id * kGroupSize; gen_.Skip(item_id); @@ -633,7 +633,7 @@ struct FillPhiloxRandomKernel { PhiloxRandom::kResultElementCount; const size_t item_id = item.get_global(0); - const size_t total_item_count = item.get_global_range(0); + const size_t total_item_count = item.get_global_range(); size_t group_index = item_id; size_t offset = group_index * kGroupSize; diff --git a/tensorflow/core/kernels/segment_reduction_ops.cc b/tensorflow/core/kernels/segment_reduction_ops.cc index 4302a68a18..2334e50f1d 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.cc +++ b/tensorflow/core/kernels/segment_reduction_ops.cc @@ -376,6 +376,9 @@ struct UnsortedSegmentSumFunctor auto data_flat = typename TTypes::ConstTensor(data, N, data_size / N); for (int64 i = 0; i < N; ++i) { Index j = internal::SubtleMustCopy(segment_ids(i)); + if (j < 0) { + continue; + } OP_REQUIRES(ctx, FastBoundsCheck(j, output_rows), errors::InvalidArgument( "segment_ids", SliceDebugString(segment_ids_shape, i), diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h index 412c1d601d..b10bea72ba 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.h +++ b/tensorflow/core/kernels/segment_reduction_ops.h @@ -30,14 +30,14 @@ namespace functor { #ifdef GOOGLE_CUDA typedef Eigen::GpuDevice GPUDevice; // Functor for SegmentSumGPUOp. -// 'output_rows': the number of output segments (unique segment ids in +// output_rows: the number of output segments (unique segment ids in // 'segment_ids'). -// 'segment_ids_shape': shape of 'segment_ids' tensor. -// 'segment_ids': unsorted map from input to output segment ids at which to +// segment_ids_shape: shape of 'segment_ids' tensor. +// segment_ids: unsorted map from input to output segment ids at which to // perform segment sum operation. -// 'data_size': size of input data tensor. -// 'data': input data tensor. -// 'output': output reshaped to {output_rows, output.size/output_rows} +// data_size: size of input data tensor. +// data: input data tensor. +// output: output reshaped to {output_rows, output.size/output_rows} template struct SegmentSumFunctor { void operator()(OpKernelContext* ctx, const GPUDevice& d, @@ -61,14 +61,14 @@ struct UnsortedSegmentBaseFunctor{ }; // Functor for UnsortedSegmentSumOp. -// 'output_rows': the number of output segments (unique segment ids in +// output_rows: the number of output segments (unique segment ids in // 'segment_ids'). -// 'segment_ids_shape': shape of 'segment_ids' tensor. -// 'segment_ids': unsorted map from input to output segment ids at which to +// segment_ids_shape: shape of 'segment_ids' tensor. +// segment_ids: unsorted map from input to output segment ids at which to // perform segment sum operation. -// 'data_size': size of input data tensor. -// 'data': input data tensor. -// 'output': output reshaped to {output_rows, output.size/output_rows} +// data_size: size of input data tensor. +// data: input data tensor. +// output: output reshaped to {output_rows, output.size/output_rows} template struct UnsortedSegmentSumFunctor: public UnsortedSegmentBaseFunctor { void operator()(OpKernelContext* ctx, const Device& d, @@ -79,14 +79,14 @@ struct UnsortedSegmentSumFunctor: public UnsortedSegmentBaseFunctor struct UnsortedSegmentMaxFunctor: public UnsortedSegmentBaseFunctor { void operator()(OpKernelContext* ctx, const Device& d, diff --git a/tensorflow/core/kernels/shape_ops.cc b/tensorflow/core/kernels/shape_ops.cc index 721f9b949b..28a39bae3f 100644 --- a/tensorflow/core/kernels/shape_ops.cc +++ b/tensorflow/core/kernels/shape_ops.cc @@ -341,7 +341,12 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") .Device(DEVICE_CPU) .HostMemory("dim") .TypeConstraint("Tdim"), - ExpandDimsOp); + ExpandDimsOp); +REGISTER_KERNEL_BUILDER(Name("ExpandDims") + .Device(DEVICE_CPU) + .HostMemory("dim") + .TypeConstraint("Tdim"), + ExpandDimsOp); #if GOOGLE_CUDA #define REGISTER_GPU_KERNEL(type) \ @@ -350,7 +355,13 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") .TypeConstraint("T") \ .TypeConstraint("Tdim") \ .HostMemory("dim"), \ - ExpandDimsOp); + ExpandDimsOp); \ + REGISTER_KERNEL_BUILDER(Name("ExpandDims") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .TypeConstraint("Tdim") \ + .HostMemory("dim"), \ + ExpandDimsOp); TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL); TF_CALL_bool(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL @@ -362,7 +373,15 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") .HostMemory("input") .HostMemory("dim") .HostMemory("output"), - ExpandDimsOp); + ExpandDimsOp); +REGISTER_KERNEL_BUILDER(Name("ExpandDims") + .Device(DEVICE_GPU) + .TypeConstraint("T") + .TypeConstraint("Tdim") + .HostMemory("input") + .HostMemory("dim") + .HostMemory("output"), + ExpandDimsOp); #endif // GOOGLE_CUDA #ifdef TENSORFLOW_USE_SYCL @@ -372,7 +391,13 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") .TypeConstraint("T") \ .TypeConstraint("Tdim") \ .HostMemory("dim"), \ - ExpandDimsOp); + ExpandDimsOp); \ + REGISTER_KERNEL_BUILDER(Name("ExpandDims") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint("T") \ + .TypeConstraint("Tdim") \ + .HostMemory("dim"), \ + ExpandDimsOp); TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); TF_CALL_bool(REGISTER_SYCL_KERNEL); #undef REGISTER_SYCL_KERNEL @@ -384,7 +409,15 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") .HostMemory("input") .HostMemory("dim") .HostMemory("output"), - ExpandDimsOp); + ExpandDimsOp); +REGISTER_KERNEL_BUILDER(Name("ExpandDims") + .Device(DEVICE_SYCL) + .TypeConstraint("T") + .TypeConstraint("Tdim") + .HostMemory("input") + .HostMemory("dim") + .HostMemory("output"), + ExpandDimsOp); #endif // TENSORFLOW_USE_SYCL // Squeeze --------------------------------------- diff --git a/tensorflow/core/kernels/shape_ops.h b/tensorflow/core/kernels/shape_ops.h index ac607f4e8b..8d9d0ea846 100644 --- a/tensorflow/core/kernels/shape_ops.h +++ b/tensorflow/core/kernels/shape_ops.h @@ -145,6 +145,7 @@ class SizeOp : public OpKernel { bool IsExpensive() override { return false; } }; +template class ExpandDimsOp : public OpKernel { public: explicit ExpandDimsOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} @@ -153,7 +154,7 @@ class ExpandDimsOp : public OpKernel { OP_REQUIRES(ctx, ctx->input(0).dtype() != DT_VARIANT, errors::InvalidArgument("ExpandDims on Variant not supported")); - int32 dim = ctx->input(1).flat()(0); + Tdim dim = ctx->input(1).flat()(0); OP_REQUIRES( ctx, (dim >= -1 - ctx->input(0).dims() && dim <= ctx->input(0).dims()), errors::InvalidArgument("Tried to expand dim index ", dim, @@ -175,7 +176,7 @@ class ExpandDimsOp : public OpKernel { } // Clamp to the end if needed. - dim = std::min(dim, existing_dims_size); + dim = std::min(dim, existing_dims_size); new_shape.emplace(new_shape.begin() + dim, 1); const TensorShape output_shape(new_shape); @@ -234,10 +235,10 @@ class SqueezeOp : public OpKernel { if (!wrapped_squeeze_dims.empty()) { if (wrapped_squeeze_dims.count(i) > 0) { OP_REQUIRES(ctx, existing_dim == 1, - errors::InvalidArgument( - "Tried to explicitly squeeze " - "dimension ", - i, " but dimension was not 1: ", existing_dim)); + errors::InvalidArgument("Tried to explicitly squeeze " + "dimension ", + i, " but dimension was not 1: ", + existing_dim)); } else { // This dimension is not being squeezed. new_shape.push_back(existing_dim); diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc index d46701749b..28a379774b 100644 --- a/tensorflow/core/kernels/slice_op.cc +++ b/tensorflow/core/kernels/slice_op.cc @@ -190,41 +190,25 @@ class SliceOp : public OpKernel { } return; } -#define HANDLE_DIM(NDIM) \ - if (input_dims == NDIM) { \ - HandleCase(context, begin, size, result); \ - return; \ +#define HANDLE_DIM(NDIM) \ + if (input_dims == NDIM) { \ + functor::Slice()( \ + context->eigen_device(), result, input, begin, size); \ + return; \ } - HANDLE_DIM(1); HANDLE_DIM(2); HANDLE_DIM(3); HANDLE_DIM(4); HANDLE_DIM(5); HANDLE_DIM(6); - HANDLE_DIM(7); #undef HANDLE_DIM - OP_REQUIRES(context, false, errors::Unimplemented( - "SliceOp : Unhandled input dimensions")); - } - } - - private: - template - void HandleCase(OpKernelContext* context, const gtl::ArraySlice& begin, - const gtl::ArraySlice& size, Tensor* result) { - Eigen::DSizes indices; - Eigen::DSizes sizes; - for (int i = 0; i < NDIM; ++i) { - indices[i] = begin[i]; - sizes[i] = size[i]; + // handle cases which dim >= 7 + functor::Slice()( + context->eigen_device(), result, input, begin, size); } - - functor::Slice()( - context->eigen_device(), result->tensor(), - context->input(0).tensor(), indices, sizes); } }; @@ -264,11 +248,16 @@ class MklSliceOp : public OpKernel { } return; } -#define HANDLE_DIM(NDIM) \ - if (input_dims == NDIM) { \ - HandleCase(context, begin, size, result); \ - return; \ - } + // Special case for handling 4-D tensor slice. + if (input_dims == 4) { + HandleCase4D(context, begin, size, result); + } else { +#define HANDLE_DIM(NDIM) \ + if (input_dims == NDIM) { \ + functor::Slice()( \ + context->eigen_device(), result, input, begin, size); \ + return; \ + } HANDLE_DIM(1); HANDLE_DIM(2); @@ -276,12 +265,13 @@ class MklSliceOp : public OpKernel { HANDLE_DIM(4); HANDLE_DIM(5); HANDLE_DIM(6); - HANDLE_DIM(7); #undef HANDLE_DIM - OP_REQUIRES(context, false, errors::Unimplemented( - "SliceOp : Unhandled input dimensions")); + // handle cases which dim >= 7 + functor::Slice()( + context->eigen_device(), result, input, begin, size); + } } } @@ -328,8 +318,7 @@ class MklSliceOp : public OpKernel { return false; } - template - void HandleCase(OpKernelContext* context, + void HandleCase4D(OpKernelContext* context, const gtl::ArraySlice& begin, const gtl::ArraySlice& size, Tensor* result) { int slice_dim = -1; @@ -338,8 +327,7 @@ class MklSliceOp : public OpKernel { // differs from the input tensor in only 1 out of 4 dimensions. // This case arises in the context of Slice of 4-D tensor in NHWC or NCHW // format over channel dimension. - if (NDIM == 4 && - DoesSliceShapeDifferInOnly1D(in_shape, begin, size, &slice_dim)) { + if (DoesSliceShapeDifferInOnly1D(in_shape, begin, size, &slice_dim)) { size_t in_strides[4] = { (size_t) in_shape.dim_size(1) * in_shape.dim_size(2) * in_shape.dim_size(3), @@ -403,16 +391,8 @@ class MklSliceOp : public OpKernel { // slice_dim is not 1 or 3, then we fallback to Eigen implementation. } - Eigen::DSizes indices; - Eigen::DSizes sizes; - for (int i = 0; i < NDIM; ++i) { - indices[i] = begin[i]; - sizes[i] = size[i]; - } - - functor::Slice()( - context->eigen_device(), result->tensor(), - context->input(0).tensor(), indices, sizes); + functor::Slice()( + context->eigen_device(), result, context->input(0), begin, size); } }; #endif @@ -420,13 +400,13 @@ class MklSliceOp : public OpKernel { // Forward declarations of the functor specializations for declared in the // sharded source files. namespace functor { -#define DECLARE_CPU_SPEC(T, NDIM) \ - template <> \ - void Slice::operator()( \ - const CPUDevice& d, typename TTypes::Tensor output, \ - typename TTypes::ConstTensor input, \ - const Eigen::DSizes& indices, \ - const Eigen::DSizes& sizes); \ +#define DECLARE_CPU_SPEC(T, NDIM) \ + template <> \ + void Slice::operator()( \ + const CPUDevice& d, Tensor* output, \ + const Tensor& input, \ + const gtl::ArraySlice& slice_indices, \ + const gtl::ArraySlice& slice_sizes); \ extern template struct Slice; #define DECLARE_FOR_N(T) \ @@ -476,13 +456,14 @@ REGISTER_SLICE(bfloat16); #if GOOGLE_CUDA // Forward declarations of the functor specializations for GPU. namespace functor { -#define DECLARE_GPU_SPEC(T, NDIM) \ - template <> \ - void Slice::operator()( \ - const GPUDevice& d, typename TTypes::Tensor output, \ - typename TTypes::ConstTensor input, \ - const Eigen::DSizes& indices, \ - const Eigen::DSizes& sizes); \ +#define DECLARE_GPU_SPEC(T, NDIM) \ + template <> \ + void Slice::operator()( \ + const GPUDevice& d, \ + Tensor* output, \ + const Tensor& input, \ + const gtl::ArraySlice& slice_indices, \ + const gtl::ArraySlice& slice_sizes); \ extern template struct Slice; #define DECLARE_FOR_N(T) \ @@ -536,13 +517,14 @@ REGISTER_KERNEL_BUILDER(Name("Slice") #ifdef TENSORFLOW_USE_SYCL // Forward declarations of the functor specializations for SYCL. namespace functor { -#define DECLARE_SYCL_SPEC(T, NDIM) \ - template <> \ - void Slice::operator()( \ - const SYCLDevice& d, typename TTypes::Tensor output,\ - typename TTypes::ConstTensor input, \ - const Eigen::DSizes& indices, \ - const Eigen::DSizes& sizes); \ +#define DECLARE_SYCL_SPEC(T, NDIM) \ + template <> \ + void Slice::operator()( \ + const SYCLDevice& d, \ + Tensor* output, \ + const Tensor& input, \ + const gtl::ArraySlice& slice_indices, \ + const gtl::ArraySlice& slice_sizes); \ extern template struct Slice; #define DECLARE_FOR_N(T) \ diff --git a/tensorflow/core/kernels/slice_op.h b/tensorflow/core/kernels/slice_op.h index db7eded745..55a4be985b 100644 --- a/tensorflow/core/kernels/slice_op.h +++ b/tensorflow/core/kernels/slice_op.h @@ -19,31 +19,104 @@ limitations under the License. // Functor definition for SliceOp, must be compilable by nvcc. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/kernels/ops_util.h" namespace tensorflow { -namespace functor { + +namespace internal { + +template +void SliceSimple(const Device& d, Tensor* out, const Tensor& in, + const gtl::ArraySlice& slice_indices); +template +void SliceSimpleGpu(const Device& d, Tensor* out, const Tensor& in, + const gtl::ArraySlice& slice_indices); + +template +void SliceSimple(const Device& d, Tensor* out, const Tensor& in, + const gtl::ArraySlice& slice_indices) { + const int ndims = in.dims(); + const int64 nelem = out->NumElements(); + const gtl::InlinedVector in_strides = ComputeStride(in.shape()); + const gtl::InlinedVector out_strides = ComputeStride(out->shape()); + const T* p = in.flat().data(); + T* q = out->flat().data(); + + std::vector i_idx(nelem, 0); + std::vector t(nelem, 0); + + for (int64 o_idx = 0; o_idx < nelem; ++o_idx) { + t[o_idx] = o_idx; + } + for (int i = 0; i < ndims; ++i) { + int64 n = (nelem + 7) / 8; + int64 o_idx = 0; + switch (nelem % 8) { +#define CALC_INPUT_IDX \ + i_idx[o_idx] += (t[o_idx] / out_strides[i] + slice_indices[i]) * in_strides[i]; \ + t[o_idx] %= out_strides[i]; \ + ++o_idx; + case 0: do { CALC_INPUT_IDX; + case 7: CALC_INPUT_IDX; + case 6: CALC_INPUT_IDX; + case 5: CALC_INPUT_IDX; + case 4: CALC_INPUT_IDX; + case 3: CALC_INPUT_IDX; + case 2: CALC_INPUT_IDX; + case 1: CALC_INPUT_IDX; +#undef CALC_INPUT_IDX + } while (--n > 0); + } + } + for (int64 o_idx = 0; o_idx < nelem; ++o_idx) { + q[o_idx] = p[i_idx[o_idx]]; + } +} template +void SliceUsingEigen(const Device& d, Tensor* out, const Tensor& in, + const gtl::ArraySlice& slice_indices, + const gtl::ArraySlice& slice_sizes) { + auto input = in.tensor(); + auto output = out->tensor(); + Eigen::DSizes indices; + for (int i = 0; i < NDIMS; ++i) { + indices[i] = slice_indices[i]; + } + Eigen::DSizes sizes; + for (int i = 0; i < NDIMS; ++i) { + sizes[i] = slice_sizes[i]; + } + const bool use_64bit = input.size() > Eigen::NumTraits::highest(); + if (!use_64bit && + Eigen::internal::is_same::value) { + To32Bit(output).device(d) = To32Bit(input).slice(indices, sizes); + } else { + output.device(d) = input.slice(indices, sizes); + } +} + +} // namespace internal + +namespace functor { + +// Template parameter NDIM is not neccesary here. The aim of keeping it +// is to compile struct slice seperately which minimizes the compiling time. +template struct Slice { - void operator()(const Device& d, typename TTypes::Tensor output, - typename TTypes::ConstTensor input, - const Eigen::DSizes& slice_indices, - const Eigen::DSizes& slice_sizes) { - bool use_64bit = (input.size() > Eigen::NumTraits::highest()); - if (!use_64bit && - Eigen::internal::is_same::value) { - Eigen::DSizes indices; - for (int i = 0; i < NDIMS; ++i) { - indices[i] = slice_indices[i]; - } - Eigen::DSizes sizes; - for (int i = 0; i < NDIMS; ++i) { - sizes[i] = slice_sizes[i]; - } - To32Bit(output).device(d) = To32Bit(input).slice(indices, sizes); + void operator()(const Device& d, Tensor* out, const Tensor& in, + const gtl::ArraySlice& slice_indices, + const gtl::ArraySlice& slice_sizes) { + if (in.dims() == NDIM) { + internal::SliceUsingEigen(d, out, in, slice_indices, slice_sizes); } else { - output.device(d) = input.slice(slice_indices, slice_sizes); + if (Eigen::internal::is_same::value) { + internal::SliceSimpleGpu(d, out, in, slice_indices); + } else { + internal::SliceSimple(d, out, in, slice_indices); + } } } }; diff --git a/tensorflow/core/kernels/slice_op_gpu.cu.cc b/tensorflow/core/kernels/slice_op_gpu.cu.cc index a301986f2f..3039b3d777 100644 --- a/tensorflow/core/kernels/slice_op_gpu.cu.cc +++ b/tensorflow/core/kernels/slice_op_gpu.cu.cc @@ -21,9 +21,65 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" namespace tensorflow { +namespace internal { + +template +__global__ void SliceKernel(int nthreads, const T* src, const int32* buf, + const int32 ndims, T* dst) { + const int32* in_strides = buf; + const int32* out_strides = buf + ndims; + const int32* slice_indices = buf + ndims * 2; + CUDA_1D_KERNEL_LOOP(o_idx, nthreads) { + int32 i_idx = 0; + int32 t = o_idx; + for (int i = 0; i < ndims; ++i) { + i_idx += (t / out_strides[i] + slice_indices[i]) * in_strides[i]; + t %= out_strides[i]; + } + dst[o_idx] = ldg(src + i_idx); + } +} + +template +void SliceSimpleGpu(const Device& d, Tensor* out, const Tensor& in, + const gtl::ArraySlice& slice_indices) { + // Ensures we can use 32-bit index. + const int64 in_nelem = in.NumElements(); + CHECK_LT(in_nelem, kint32max) << "Tensor too large to transpose on GPU"; + const int64 out_nelem = out->NumElements(); + CHECK_LT(out_nelem, kint32max) << "Tensor too large to transpose on GPU"; + // Pack strides and slice indices sizes into one buffer. + const int32 ndims = in.dims(); + gtl::InlinedVector host_buf(ndims * 3); + gtl::InlinedVector in_strides = ComputeStride(in.shape()); + gtl::InlinedVector out_strides = ComputeStride(out->shape()); + for (int i = 0; i < ndims; ++i) { + host_buf[i] = in_strides[i]; + host_buf[ndims + i] = out_strides[i]; + host_buf[ndims * 2 + i] = slice_indices[i]; + } + auto num_bytes = sizeof(int64) * host_buf.size(); + auto dev_buf = d.allocate(num_bytes); + // NOTE: host_buf is not allocated by CudaHostAllocator, and + // therefore we are doing a sync copy effectively. + d.memcpyHostToDevice(dev_buf, host_buf.data(), num_bytes); + // Launch kernel to q[...] = p[...]. + const T* p = in.flat().data(); + T* q = out->flat().data(); + CudaLaunchConfig cfg = GetCudaLaunchConfig(out_nelem, d); + SliceKernel<<>>( + cfg.virtual_thread_count, p, reinterpret_cast(dev_buf), + ndims, q); + // Safe to deallocate immediately after the kernel launch. + d.deallocate(dev_buf); +} + +} // namespace internal typedef Eigen::GpuDevice GPUDevice; diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc index 73b6d4cf6a..8fc40db3cc 100644 --- a/tensorflow/core/kernels/strided_slice_op.cc +++ b/tensorflow/core/kernels/strided_slice_op.cc @@ -427,7 +427,6 @@ REGISTER_STRIDED_SLICE(bfloat16); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); -TF_CALL_int64(REGISTER_GPU); // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel diff --git a/tensorflow/core/kernels/strided_slice_op_impl.h b/tensorflow/core/kernels/strided_slice_op_impl.h index afe3a051e6..7d42887426 100644 --- a/tensorflow/core/kernels/strided_slice_op_impl.h +++ b/tensorflow/core/kernels/strided_slice_op_impl.h @@ -84,16 +84,16 @@ void HandleStridedSliceCase(OpKernelContext* context, gtl::InlinedVector processing_dims = processing_shape.dim_sizes(); if (is_simple_slice) { - Eigen::DSizes begin_di; - Eigen::DSizes sizes_di; + gtl::InlinedVector sizes(begin.size()); for (int i = 0; i < NDIM; ++i) { - begin_di[i] = begin[i]; - sizes_di[i] = end[i] - begin[i]; + sizes[i] = end[i] - begin[i]; } - functor::Slice()( - context->eigen_device(), - result->bit_casted_shaped(processing_dims), - context->input(0).bit_casted_tensor(), begin_di, sizes_di); + const TensorShape final_shape = result->shape(); + CHECK(result->CopyFrom(*result, processing_shape)); + const Tensor input = context->input(0); + functor::Slice()( + context->eigen_device(), result, input, begin, sizes); + CHECK(result->CopyFrom(*result, final_shape)); } else { Eigen::DSizes begin_di; Eigen::DSizes end_di; @@ -196,10 +196,9 @@ class HandleStridedSliceAssignCase { extern template struct StridedSlice; \ template <> \ void Slice::operator()( \ - const GPUDevice& d, typename TTypes::Tensor output, \ - typename TTypes::ConstTensor input, \ - const Eigen::DSizes& indices, \ - const Eigen::DSizes& sizes); \ + const GPUDevice& d, Tensor* output, const Tensor& input, \ + const gtl::ArraySlice& slice_indices, \ + const gtl::ArraySlice& slice_sizes); \ extern template struct Slice; \ template <> \ void StridedSliceGrad::operator()( \ @@ -284,7 +283,6 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_FOR_N_GPU); TF_CALL_complex64(DECLARE_FOR_N_GPU); TF_CALL_complex128(DECLARE_FOR_N_GPU); DECLARE_FOR_N_GPU(int32); -DECLARE_FOR_N_GPU(int64); #endif // END GOOGLE_CUDA TF_CALL_ALL_TYPES(DECLARE_FOR_N_CPU); @@ -300,7 +298,6 @@ DECLARE_FOR_N_CPU(bfloat16); TF_CALL_SYCL_PROXY_TYPES(PREVENT_FOR_N_SYCL); TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DECLARE_FOR_N_SYCL); DECLARE_FOR_N_SYCL(int32); -DECLARE_FOR_N_SYCL(int64); #undef DECLARE_FOR_N_SYCL #endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/kernels/strided_slice_op_test.cc b/tensorflow/core/kernels/strided_slice_op_test.cc index 281ca0f58f..78bb15463c 100644 --- a/tensorflow/core/kernels/strided_slice_op_test.cc +++ b/tensorflow/core/kernels/strided_slice_op_test.cc @@ -76,20 +76,69 @@ static void SliceHelper(int iters, int size) { testing::UseRealTime(); } +template +static void Dim8SliceHelper(int iters, int size) { + testing::StopTiming(); + Graph* g = new Graph(OpRegistry::Global()); + DataType dt = DataTypeToEnum::v(); + int kDim = 100; + int kMaxSize = 15000; + CHECK_LT(size, kMaxSize); + + Tensor begin(DT_INT32, TensorShape({8})); + begin.flat()(10) = 10; + for (int i = 1; i < 7; ++i) { + begin.flat()(i) = 0; + } + begin.flat()(7) = 10; + + Tensor end(DT_INT32, TensorShape({8})); + end.flat()(0) = 10 + kDim; + for (int i = 1; i < 7; ++i) { + end.flat()(i) = 1; + } + end.flat()(7) = 10 + size; + + Tensor strides(DT_INT32, TensorShape({8})); + for (int i = 0; i < 8; ++i) { + strides.flat()(i) = 1; + } + + Tensor input(dt, TensorShape({2*kDim, 1, 1, 1, 1, 1, 1, kMaxSize})); + input.flat().setRandom(); + + Node* node; + TF_CHECK_OK(NodeBuilder(g->NewName("n"), "StridedSlice") + .Input(test::graph::Constant(g, input)) + .Input(test::graph::Constant(g, begin)) + .Input(test::graph::Constant(g, end)) + .Input(test::graph::Constant(g, strides)) + .Attr("T", dt) + .Finalize(g, &node)); + + testing::BytesProcessed(static_cast(iters) * kDim * size * sizeof(T)); + testing::StartTiming(); + test::Benchmark("cpu", g).Run(iters); + testing::UseRealTime(); +} + static void BM_SliceFloat(int iters, int dim2) { SliceHelper(iters, dim2); + Dim8SliceHelper(iters, dim2); } BENCHMARK(BM_SliceFloat)->Arg(100)->Arg(1000)->Arg(10000); static void BM_SliceComplex64(int iters, int dim2) { SliceHelper>(iters, dim2); + Dim8SliceHelper>(iters, dim2); } BENCHMARK(BM_SliceComplex64)->Arg(100)->Arg(1000)->Arg(10000); static void BM_SliceBFloat16(int iters, int dim2) { SliceHelper(iters, dim2); + Dim8SliceHelper(iters, dim2); } BENCHMARK(BM_SliceBFloat16)->Arg(100)->Arg(1000)->Arg(10000); diff --git a/tensorflow/core/kernels/transpose_op.cc b/tensorflow/core/kernels/transpose_op.cc index 20f0edf309..96c051c636 100644 --- a/tensorflow/core/kernels/transpose_op.cc +++ b/tensorflow/core/kernels/transpose_op.cc @@ -31,13 +31,14 @@ limitations under the License. namespace tensorflow { -// inv = InvertPermutationOp(T p) takes a permutation of +// inv = InvertPermutationOp(T p) takes a permutation of // integers 0, 1, ..., n - 1 and returns the inverted // permutation of p. I.e., inv[p[i]] == i, for i in [0 .. n). // -// REQUIRES: input is a vector of int32. +// REQUIRES: input is a vector of int32 or int64. // REQUIRES: input is a permutation of 0, 1, ..., n-1. +template class InvertPermutationOp : public OpKernel { public: explicit InvertPermutationOp(OpKernelConstruction* context) @@ -48,20 +49,19 @@ class InvertPermutationOp : public OpKernel { OP_REQUIRES( context, TensorShapeUtils::IsVector(input.shape()), errors::InvalidArgument("invert_permutation expects a 1D vector.")); - auto Tin = input.vec(); + auto Tin = input.vec(); OP_REQUIRES(context, FastBoundsCheck(Tin.size(), std::numeric_limits::max()), errors::InvalidArgument("permutation of nonnegative int32s " "must have <= int32 max elements")); - const int32 N = - static_cast(Tin.size()); // Safe: bounds-checked above. + const T N = static_cast(Tin.size()); // Safe: bounds-checked above. Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, input.shape(), &output)); - auto Tout = output->vec(); + auto Tout = output->vec(); std::fill_n(Tout.data(), N, -1); for (int i = 0; i < N; ++i) { - const int32 d = internal::SubtleMustCopy(Tin(i)); + const T d = internal::SubtleMustCopy(Tin(i)); OP_REQUIRES(context, FastBoundsCheck(d, N), errors::InvalidArgument(d, " is not between 0 and ", N)); OP_REQUIRES(context, Tout(d) == -1, @@ -73,14 +73,23 @@ class InvertPermutationOp : public OpKernel { REGISTER_KERNEL_BUILDER( Name("InvertPermutation").Device(DEVICE_CPU).TypeConstraint("T"), - InvertPermutationOp); + InvertPermutationOp); +REGISTER_KERNEL_BUILDER( + Name("InvertPermutation").Device(DEVICE_CPU).TypeConstraint("T"), + InvertPermutationOp); REGISTER_KERNEL_BUILDER(Name("InvertPermutation") .Device(DEVICE_GPU) .TypeConstraint("T") .HostMemory("x") .HostMemory("y"), - InvertPermutationOp); + InvertPermutationOp); +REGISTER_KERNEL_BUILDER(Name("InvertPermutation") + .Device(DEVICE_GPU) + .TypeConstraint("T") + .HostMemory("x") + .HostMemory("y"), + InvertPermutationOp); #ifdef TENSORFLOW_USE_SYCL REGISTER_KERNEL_BUILDER(Name("InvertPermutation") @@ -88,7 +97,13 @@ REGISTER_KERNEL_BUILDER(Name("InvertPermutation") .TypeConstraint("T") .HostMemory("x") .HostMemory("y"), - InvertPermutationOp); + InvertPermutationOp); +REGISTER_KERNEL_BUILDER(Name("InvertPermutation") + .Device(DEVICE_SYCL) + .TypeConstraint("T") + .HostMemory("x") + .HostMemory("y"), + InvertPermutationOp); #endif // TENSORFLOW_USE_SYCL namespace { diff --git a/tensorflow/core/kernels/unique_op.cc b/tensorflow/core/kernels/unique_op.cc index 701c5f6d2b..d087784c8a 100644 --- a/tensorflow/core/kernels/unique_op.cc +++ b/tensorflow/core/kernels/unique_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include #include #include @@ -21,6 +22,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/hash/hash.h" namespace tensorflow { @@ -33,8 +35,6 @@ class UniqueOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& input = context->input(0); - OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()), - errors::InvalidArgument("unique expects a 1D vector.")); // TODO(dga): Make unique polymorphic for returning int32 and int64 // vectors to support large tensors. OP_REQUIRES(context, @@ -42,31 +42,102 @@ class UniqueOp : public OpKernel { errors::InvalidArgument( "unique does not support input tensors larger than ", std::numeric_limits::max(), " elements")); - auto Tin = input.vec(); - const int64 N = static_cast(Tin.size()); + + int64 axis = 0; + std::vector new_sizes{1, input.NumElements(), 1}; + if (context->num_inputs() == 1) { + OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()), + errors::InvalidArgument("unique expects a 1D vector.")); + } else { + // In case of UniqueV2, the axis is a 1D vector. The purpose is + // to allow specifying either "no axis" or "axis". The `[]` means + // "no axis", while `[x]` means `axis = x`. + const Tensor& axis_tensor = context->input(1); + OP_REQUIRES(context, TensorShapeUtils::IsVector(axis_tensor.shape()), + errors::InvalidArgument("axis expects a 1D vector.")); + OP_REQUIRES( + context, axis_tensor.NumElements() <= 1, + errors::InvalidArgument( + "axis does not support input tensors larger than 1 elements")); + if (axis_tensor.NumElements() == 0) { + OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()), + errors::InvalidArgument("unique expects a 1D vector.")); + } else { + auto axis_vec = axis_tensor.vec(); + axis = axis_vec(0); + axis = axis < 0 ? axis + input.dims() : axis; + OP_REQUIRES(context, 0 <= axis && axis < input.dims(), + errors::InvalidArgument("axis has to be between [0, ", + input.dims(), ")")); + if (axis > 0) { + for (int64 i = 0; i < axis; i++) { + new_sizes[0] *= input.dim_size(i); + } + } + new_sizes[1] = input.dim_size(axis); + if (axis + 1 < input.dims()) { + for (int64 i = axis + 1; i < input.dims(); i++) { + new_sizes[2] *= input.dim_size(i); + } + } + } + } + + auto Tin = input.shaped(new_sizes); Tensor* idx = nullptr; - OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {0}, 1, input.shape(), &idx)); + OP_REQUIRES_OK(context, context->allocate_output( + 1, TensorShape({Tin.dimension(1)}), &idx)); auto idx_vec = idx->template vec(); - std::unordered_map uniq; - uniq.reserve(2 * N); - for (int64 i = 0, j = 0; i < N; ++i) { - auto it = uniq.insert(std::make_pair(Tin(i), j)); + auto hash_fn = [&Tin](const int64& key) -> unsigned long { + size_t h = 0; + for (int64 i = 0; i < Tin.dimension(0); i++) { + for (int64 j = 0; j < Tin.dimension(2); j++) { + h = Hash64Combine(h, hash{}(Tin(i, key, j))); + } + } + return h; + }; + + auto equal_to_fn = [&Tin](const int64& lhs, const int64& rhs) { + for (int64 i = 0; i < Tin.dimension(0); i++) { + for (int64 j = 0; j < Tin.dimension(2); j++) { + if (Tin(i, lhs, j) != Tin(i, rhs, j)) { + return false; + } + } + } + return true; + }; + + std::unordered_map + uniq(0, hash_fn, equal_to_fn); + + uniq.reserve(2 * Tin.dimension(1)); + + for (int64 i = 0, j = 0; i < Tin.dimension(1); ++i) { + auto it = uniq.insert(std::make_pair(i, j)); idx_vec(i) = it.first->second; if (it.second) { ++j; } } + int64 uniq_size = static_cast(uniq.size()); + new_sizes[1] = uniq_size; + TensorShape output_shape(input.shape()); + output_shape.set_dim(axis, uniq_size); Tensor* output = nullptr; - OP_REQUIRES_OK(context, context->allocate_output( - 0, TensorShape({uniq_size}), &output)); - auto output_vec = output->template vec(); + OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output)); + auto Tout = output->shaped(new_sizes); for (auto it : uniq) { - output_vec(it.second) = it.first; + for (int64 i = 0; i < Tin.dimension(0); i++) { + for (int64 j = 0; j < Tin.dimension(2); j++) { + Tout(i, it.second, j) = Tin(i, it.first, j); + } + } } if (num_outputs() > 2) { @@ -74,7 +145,7 @@ class UniqueOp : public OpKernel { 2, TensorShape({uniq_size}), &output)); auto count_output_vec = output->template vec(); count_output_vec.setZero(); - for (int64 i = 0; i < N; ++i) { + for (int64 i = 0; i < Tin.dimension(1); ++i) { count_output_vec(idx_vec(i))++; } } @@ -92,6 +163,16 @@ class UniqueOp : public OpKernel { .TypeConstraint("T") \ .TypeConstraint("out_idx"), \ UniqueOp); \ + REGISTER_KERNEL_BUILDER(Name("UniqueV2") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("out_idx"), \ + UniqueOp); \ + REGISTER_KERNEL_BUILDER(Name("UniqueV2") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("out_idx"), \ + UniqueOp); \ REGISTER_KERNEL_BUILDER(Name("UniqueWithCounts") \ .Device(DEVICE_CPU) \ .TypeConstraint("T") \ @@ -176,5 +257,5 @@ REGISTER_KERNEL_BUILDER(Name("Unique") .HostMemory("y") .HostMemory("idx"), UniqueOp); -#endif // TENSORFLOW_USE_SYCL +#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index be2916f154..9fa6423d59 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -723,7 +723,9 @@ y: a tensor of the same shape and type as x but filled with zeros. REGISTER_OP("OnesLike") .Input("x: T") .Output("y: T") - .Attr("T: {float, double, int32, int64, complex64, complex128}") + .Attr( + "T: {float, double, int8, uint8, int16, uint16, int32, int64, " + "complex64, complex128, bool}") .SetShapeFn(shape_inference::UnchangedShape) .Doc(R"doc( Returns a tensor of ones with the same shape and type as x. @@ -2031,6 +2033,46 @@ y: 1-D. idx: 1-D. )doc"); +REGISTER_OP("UniqueV2") + .Input("x: T") + .Input("axis: int64") + .Output("y: T") + .Output("idx: out_idx") + .Attr("T: type") + .Attr("out_idx: {int32, int64} = DT_INT32") + .SetShapeFn([](InferenceContext* c) { + c->set_output(0, c->Vector(InferenceContext::kUnknownDim)); + c->set_output(1, c->input(0)); + return Status::OK(); + }) + .Doc(R"doc( +Finds unique elements in a 1-D tensor. + +This operation returns a tensor `y` containing all of the unique elements of `x` +sorted in the same order that they occur in `x`. This operation also returns a +tensor `idx` the same size as `x` that contains the index of each value of `x` +in the unique output `y`. In other words: + +`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` + +For example: + +``` +# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] +y, idx = unique(x) +y ==> [1, 2, 4, 7, 8] +idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] +``` + + +x: A `Tensor`. +axis: A `Tensor` of type `int64` (default: 0). The axis of the Tensor to + find the unique elements. +y: A `Tensor`. Unique elements along the `axis` of `Tensor` x. +idx: A 1-D Tensor. Has the same type as x that contains the index of each + value of x in the output y. +)doc"); + // -------------------------------------------------------------------------- REGISTER_OP("UniqueWithCounts") .Input("x: T") diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 7b10af9f44..d30b847696 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1829,6 +1829,8 @@ need not be sorted and need not cover all values in the full range of valid values. If the sum is empty for a given segment ID `i`, `output[i] = 0`. +If the given segment ID `i` is negative, the value is dropped and will not be +added to the sum of the segment. `num_segments` should equal the number of distinct segment IDs. diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index e245c8ba91..a242a13878 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -819,7 +819,7 @@ REGISTER_OP("DepthwiseConv2dNative") .Input("input: T") .Input("filter: T") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {half, float, double}") .Attr("strides: list(int)") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) @@ -945,7 +945,7 @@ REGISTER_OP("Conv3D") .Input("input: T") .Input("filter: T") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {half, float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) @@ -977,7 +977,7 @@ REGISTER_OP("Conv3DBackpropInput") .Input("filter: T") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {half, float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Deprecated(10, "Use Conv3DBackpropInputV2") @@ -1003,7 +1003,7 @@ REGISTER_OP("Conv3DBackpropFilter") .Input("filter: T") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {half, float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Deprecated(10, "Use Conv3DBackpropFilterV2") @@ -1032,7 +1032,7 @@ REGISTER_OP("Conv3DBackpropInputV2") .Input("filter: T") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {half, float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) @@ -1069,7 +1069,7 @@ REGISTER_OP("Conv3DBackpropFilterV2") .Input("filter_sizes: int32") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {half, float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 6ce0b70c9d..9c41957ae6 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -5449,6 +5449,7 @@ op { type: "type" allowed_values { list { + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } @@ -5515,6 +5516,7 @@ op { type: "type" allowed_values { list { + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } @@ -5570,6 +5572,7 @@ op { type: "type" allowed_values { list { + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } @@ -5635,6 +5638,7 @@ op { type: "type" allowed_values { list { + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } @@ -5690,6 +5694,7 @@ op { type: "type" allowed_values { list { + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD index f746b15fee..f2fadb4558 100644 --- a/tensorflow/core/platform/default/build_config/BUILD +++ b/tensorflow/core/platform/default/build_config/BUILD @@ -12,6 +12,7 @@ load("//tensorflow:tensorflow.bzl", "tf_copts") load("//tensorflow:tensorflow.bzl", "tf_cuda_library") load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static") load("@local_config_sycl//sycl:platform.bzl", "sycl_library_path") +load("@local_config_sycl//sycl:build_defs.bzl", "if_ccpp") cc_library( name = "gtest", @@ -194,17 +195,16 @@ cc_library( cc_library( name = "sycl", - data = [ + data = if_ccpp([ "@local_config_sycl//sycl:{}".format(sycl_library_path("ComputeCpp")), - ], - linkopts = select({ - "//conditions:default": [ - "-Wl,-rpath,../local_config_sycl/sycl/lib", - ], - }), - deps = [ - "@local_config_sycl//sycl:syclrt", - ], + ]), + linkopts = if_ccpp([ + "-Wl,-rpath,../local_config_sycl/sycl/lib", + ]), + deps = if_ccpp( + ["@local_config_sycl//sycl:syclrt"], + ["@local_config_sycl//sycl:sycl_headers"], + ), ) filegroup( diff --git a/tensorflow/core/platform/default/notification.h b/tensorflow/core/platform/default/notification.h index 6a214dbd0a..5c401b7477 100644 --- a/tensorflow/core/platform/default/notification.h +++ b/tensorflow/core/platform/default/notification.h @@ -73,7 +73,7 @@ class Notification { } mutex mu_; // protects mutations of notified_ - condition_variable cv_; // signalled when notified_ becomes non-zero + condition_variable cv_; // signaled when notified_ becomes non-zero std::atomic notified_; // mutations under mu_ }; diff --git a/tensorflow/core/platform/posix/error.cc b/tensorflow/core/platform/posix/error.cc index e9baad5422..f8b0285c50 100644 --- a/tensorflow/core/platform/posix/error.cc +++ b/tensorflow/core/platform/posix/error.cc @@ -72,7 +72,7 @@ error::Code ErrnoToCode(int err_number) { case EBUSY: // Device or resource busy case ECHILD: // No child processes case EISCONN: // Socket is connected -#if !defined(_WIN32) +#if !defined(_WIN32) && !defined(__HAIKU__) case ENOTBLK: // Block device required #endif case ENOTCONN: // The socket is not connected @@ -94,7 +94,7 @@ error::Code ErrnoToCode(int err_number) { case ENODATA: // No message is available on the STREAM read queue case ENOMEM: // Not enough space case ENOSR: // No STREAM resources -#if !defined(_WIN32) +#if !defined(_WIN32) && !defined(__HAIKU__) case EUSERS: // Too many users #endif code = error::RESOURCE_EXHAUSTED; @@ -111,7 +111,7 @@ error::Code ErrnoToCode(int err_number) { case EPFNOSUPPORT: // Protocol family not supported #endif case EPROTONOSUPPORT: // Protocol not supported -#if !defined(_WIN32) +#if !defined(_WIN32) && !defined(__HAIKU__) case ESOCKTNOSUPPORT: // Socket type not supported #endif case EXDEV: // Improper link @@ -131,7 +131,8 @@ error::Code ErrnoToCode(int err_number) { case ENETUNREACH: // Network unreachable case ENOLCK: // No locks available case ENOLINK: // Link has been severed -#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32)) +#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32) \ + || defined(__HAIKU__)) case ENONET: // Machine is not on the network #endif code = error::UNAVAILABLE; @@ -156,7 +157,7 @@ error::Code ErrnoToCode(int err_number) { case ENOEXEC: // Exec format error case ENOMSG: // No message of the desired type case EPROTO: // Protocol error -#if !defined(_WIN32) +#if !defined(_WIN32) && !defined(__HAIKU__) case EREMOTE: // Object is remote #endif code = error::UNKNOWN; diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc index 6cba40ccfc..09f69a95c1 100644 --- a/tensorflow/core/platform/posix/port.cc +++ b/tensorflow/core/platform/posix/port.cc @@ -37,7 +37,8 @@ limitations under the License. #ifdef TF_USE_SNAPPY #include "snappy.h" #endif -#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) +#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \ + || defined(__HAIKU__) #include #endif @@ -61,7 +62,8 @@ int NumSchedulableCPUs() { } perror("sched_getaffinity"); #endif -#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) +#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \ + || defined(__HAIKU__) unsigned int count = std::thread::hardware_concurrency(); if (count > 0) return static_cast(count); #endif diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 1bf9c93101..ec077c4283 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -24,7 +24,7 @@ limitations under the License. // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "-rc1" +#define TF_VERSION_SUFFIX "" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 1bfa4f83a3..118ff0d0d6 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -26,18 +26,23 @@ limitations under the License. #include "mkl_trans.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" - #include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/graph/mkl_graph_util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/util/padding.h" #include "tensorflow/core/util/tensor_format.h" +#include "tensorflow/core/graph/mkl_graph_util.h" #ifdef INTEL_MKL_DNN #include "mkldnn.hpp" + +using mkldnn::memory; +using mkldnn::reorder; +using mkldnn::primitive; +using mkldnn::padding_kind; +using mkldnn::engine; #endif // The file contains a number of utility classes and functions used by MKL @@ -51,6 +56,8 @@ namespace tensorflow { // Tensorflow tensor. typedef enum { W = 0, H = 1, C = 2, N = 3 } MklDims; +typedef enum { Dim_N = 0, Dim_C = 1, Dim_H = 2, Dim_W = 3, + Dim_O = 0, Dim_I = 1 } MklDnnDims; class MklShape { public: @@ -143,7 +150,9 @@ class MklShape { size_t GetDimension() const { return dimension_; } const size_t* GetSizes() const { return sizes_; } int64 dim_size(int index) const { return sizes_[index]; } - int64 tf_dim_size(int index) const { return sizes_[tf_to_mkl_dim_map_[index]]; } + int64 tf_dim_size(int index) const { + return sizes_[tf_to_mkl_dim_map_[index]]; + } const size_t* GetStrides() const { return strides_; } const size_t* GetTfToMklDimMap() const { return tf_to_mkl_dim_map_; } size_t tf_dim_idx(int index) const { return tf_to_mkl_dim_map_[index]; } @@ -227,7 +236,8 @@ class MklShape { (IS_MKL_TENSOR_OFFSET + sizeof(size_t)) // Location of dimension_ // Location of sizes. Note dim is not used here, left here // to make macros consistent. -#define SIZES_OFFSET(dims) (DIMS_OFFSET + sizeof(size_t)) +#define SIZES_OFFSET(dims) \ + (DIMS_OFFSET + sizeof(size_t)) #define STRIDES_OFFSET(dims) \ (SIZES_OFFSET(dims) + dims * sizeof(size_t)) // Location of strides #define MKL_LAYOUT_OFFSET(dims) \ @@ -309,6 +319,266 @@ class MklShape { nullptr; // TF dimension corresponding to this MKL dimension }; +#ifdef INTEL_MKL_DNN + +// Forward decl +TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format); + +class MklDnnShape { + private: + typedef struct { + /// Flag to indicate if the tensor is an MKL tensor or not + bool is_mkl_tensor_ = false; + /// Number of dimensions in Tensorflow format + size_t dimension_ = 0; + /// Required by MKLDNN for conversions + mkldnn_dims_t sizes_; // Required by MKL for conversions + memory::format tf_data_format_ = memory::format::format_undef; + memory::data_type T_ = memory::data_type::data_undef; + // MKL layout + mkldnn_memory_desc_t mkl_md_; + /// TF dimension corresponding to this MKL dimension + mkldnn_dims_t map_; + } MklShapeData; + MklShapeData data_; + + typedef std::remove_extent::type mkldnn_dim_t; +#define INVALID_DIM_SIZE -1 + + + public: + MklDnnShape() { + for (size_t i = 0; i < sizeof(data_.sizes_) / + sizeof(data_.sizes_[0]); ++i) { + data_.sizes_[i] = -1; + } + for (size_t i = 0; i < sizeof(data_.map_) / + sizeof(data_.map_[0]); ++i) { + data_.map_[i] = -1; + } + } + + ~MklDnnShape() {} + TF_DISALLOW_COPY_AND_ASSIGN(MklDnnShape); // Cannot copy + + inline const bool IsMklTensor() const { return data_.is_mkl_tensor_; } + inline void SetMklTensor(bool is_mkl_tensor) { + data_.is_mkl_tensor_ = is_mkl_tensor; + } + + inline void SetDimensions(const size_t dimension) { + data_.dimension_ = dimension; + } + inline size_t GetDimension(char dimension)const { + int index = GetMklDnnTensorDimIndex(dimension); + CHECK(index >= 0 && index < this->GetDimension()) + << "Invalid index from the dimension: " << index << ", " << dimension; + return this->DimSize(index); + } + + inline int32 GetMklDnnTensorDimIndex(char dimension)const { + switch (dimension) { + case 'N': + return MklDnnDims::Dim_N; + case 'C': + return MklDnnDims::Dim_C; + case 'H': + return MklDnnDims::Dim_H; + case 'W': + return MklDnnDims::Dim_W; + default: + LOG(FATAL) << "Invalid dimension: " << dimension; + return -1; // Avoid compiler warning about missing return value + } + } + + inline size_t GetDimension() const { return data_.dimension_; } + inline const int* GetSizes() const { + return reinterpret_cast(&data_.sizes_[0]); + } + + // Returns an mkldnn::memory::dims object that contains the sizes of this + // MklDnnShape object. + inline memory::dims GetSizesAsMklDnnDims() const { + memory::dims retVal; + if (data_.is_mkl_tensor_) { + int dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]); + for (size_t i = 0 ; i < dimensions; i++) { + if (data_.sizes_[i] != INVALID_DIM_SIZE) + retVal.push_back(data_.sizes_[i]); + } + } else { + CHECK_EQ(data_.is_mkl_tensor_, true); + } + return retVal; + } + + inline int64 DimSize(int index) const { + CHECK_LT(index, sizeof(data_.sizes_)/sizeof(data_.sizes_[0])); + return data_.sizes_[index]; + } + + /// Return TensorShape that describes the Tensorflow shape of the tensor + /// represented by this MklShape. + inline TensorShape GetTfShape() { + CHECK_EQ(data_.is_mkl_tensor_, true); + + std::vector shape(data_.dimension_, -1); + for (size_t idx = 0; idx < data_.dimension_; ++idx) { + shape[idx] = data_.sizes_[TfDimIdx(idx)]; + } + + TensorShape ts; + bool ret = TensorShapeUtils::MakeShape(shape, &ts).ok(); + CHECK_EQ(ret, true); + return ts; + } + + inline void SetElemType(memory::data_type dt) { data_.T_ = dt; } + inline const memory::data_type GetElemType() { return data_.T_; } + + inline void SetMklLayout(memory::primitive_desc* pd) { + CHECK_NOTNULL(pd); + data_.mkl_md_ = pd->desc().data; + } + inline const memory::desc GetMklLayout() const { + return memory::desc(data_.mkl_md_); + } + + inline memory::format GetTfDataFormat() const { + return data_.tf_data_format_; + } + /// We don't create primitive_descriptor for TensorFlow layout now. + /// We use lazy evaluation and create it only when needed. + inline void SetTfLayout(size_t dims, const memory::dims& sizes, + memory::format format) { + CHECK_EQ(dims, sizes.size()); + data_.dimension_ = dims; + for (size_t ii = 0; ii < dims; ii++) { + data_.sizes_[ii] = sizes[ii]; + } + data_.tf_data_format_ = format; + SetTfDimOrder(dims, format); + } + inline const memory::desc GetTfLayout() const { + memory::dims dims; + for (size_t ii = 0; ii < data_.dimension_; ii++) { + dims.push_back(data_.sizes_[ii]); + } + return memory::desc(dims, data_.T_, data_.tf_data_format_); + } + inline const memory::desc GetCurLayout() const { + return IsMklTensor() ? GetMklLayout() : GetTfLayout(); + } + + // nhasabni - I've removed SetTfDimOrder that was setting default order in + // case of MKL-ML. We don't need a case of default dimension order because + // when an operator that does not get data_format attribute gets all inputs + // in Tensorflow format, it will produce output in Tensorflow format. + inline void SetTfDimOrder(const size_t dimension, const mkldnn_dims_t map) { + CHECK(dimension == data_.dimension_); + for (size_t ii = 0; ii < dimension; ii++) { + data_.map_[ii] = map[ii]; + } + } + + inline void SetTfDimOrder(const size_t dimension, TensorFormat data_format) { + // TODO(nhasabni): Why do we restrict this to 4D? + CHECK_EQ(dimension, 4); + CHECK(dimension == data_.dimension_); + data_.map_[GetTensorDimIndex<2>(data_format, 'W')] = MklDnnDims::Dim_W; + data_.map_[GetTensorDimIndex<2>(data_format, 'H')] = MklDnnDims::Dim_H; + data_.map_[GetTensorDimIndex<2>(data_format, 'C')] = MklDnnDims::Dim_C; + data_.map_[GetTensorDimIndex<2>(data_format, 'N')] = MklDnnDims::Dim_N; + } + + inline void SetTfDimOrder(const size_t dimension, memory::format format) { + TensorFormat data_format = MklDnnDataFormatToTFDataFormat(format); + SetTfDimOrder(dimension, data_format); + } + + inline const mkldnn_dim_t* GetTfToMklDimMap() const { + return &data_.map_[0]; + } + inline size_t TfDimIdx(int index) const { return data_.map_[index]; } + inline int64 TfDimSize(int index) const { + return data_.sizes_[TfDimIdx(index)]; + } + + /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd' + /// corresponds to MKL's Channel dimension. + inline bool IsMklChannelDim(int d) const { + return TfDimIdx(d) == MklDnnDims::Dim_C; + } + /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd' + /// corresponds to MKL's Batch dimension. + inline bool IsMklBatchDim(int d) const { + return TfDimIdx(d) == MklDnnDims::Dim_N; + } + /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd' + /// corresponds to MKL's Width dimension. + inline bool IsMklWidthDim(int d) const { + return TfDimIdx(d) == MklDnnDims::Dim_W; + } + /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd' + /// corresponds to MKL's Height dimension. + inline bool IsMklHeightDim(int d) const { + return TfDimIdx(d) == MklDnnDims::Dim_H; + } + + /// Check if the TF-Mkl dimension ordering map specifies if the input + /// tensor is in NCHW format. + inline bool IsTensorInNCHWFormat() const { + TensorFormat data_format = FORMAT_NCHW; + return (IsMklBatchDim(GetTensorDimIndex<2>(data_format, 'N')) && + IsMklChannelDim(GetTensorDimIndex<2>(data_format, 'C')) && + IsMklHeightDim(GetTensorDimIndex<2>(data_format, 'H')) && + IsMklWidthDim(GetTensorDimIndex<2>(data_format, 'W'))); + } + + /// Check if the TF-Mkl dimension ordering map specifies if the input + /// tensor is in NHWC format. + inline bool IsTensorInNHWCFormat() const { + TensorFormat data_format = FORMAT_NHWC; + return (IsMklBatchDim(GetTensorDimIndex<2>(data_format, 'N')) && + IsMklChannelDim(GetTensorDimIndex<2>(data_format, 'C')) && + IsMklHeightDim(GetTensorDimIndex<2>(data_format, 'H')) && + IsMklWidthDim(GetTensorDimIndex<2>(data_format, 'W'))); + } + + /// The following methods are used for serializing and de-serializing the + /// contents of the mklshape object. + /// The data is serialized in this order + /// is_mkl_tensor_ : dimension_ : sizes_ : map_: format_ : T_ : mkl_pd_; + + /// Size of buffer to hold the serialized object, the size is computed by + /// following above mentioned order + inline size_t GetSerializeBufferSize() const { + return sizeof(MklShapeData); + } + + void SerializeMklDnnShape(unsigned char* buf, size_t buf_size) const { + CHECK(buf_size >= GetSerializeBufferSize()) + << "Buffer size is too small to SerializeMklDnnShape"; + *reinterpret_cast(buf) = data_; + } + + void DeSerializeMklDnnShape(const unsigned char* buf, size_t buf_size) { + // Make sure buffer holds at least is_mkl_tensor_. + CHECK(buf_size >= sizeof(data_.is_mkl_tensor_)) + << "Buffer size is too small in DeSerializeMklDnnShape"; + + const bool is_mkl_tensor = *reinterpret_cast(buf); + if (is_mkl_tensor) { // If it is an MKL Tensor then read the rest + CHECK(buf_size >= GetSerializeBufferSize()) + << "Buffer size is too small in DeSerializeMklDnnShape"; + data_ = *reinterpret_cast(buf); + } + } +}; + +#endif + // List of MklShape objects. Used in Concat/Split layers. typedef std::vector MklShapeList; @@ -347,6 +617,36 @@ inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor, return output_tensor; } +#ifdef INTEL_MKL_DNN +template +inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor, + const MklDnnShape& mkl_shape) { + Tensor output_tensor; + TensorShape output_shape; + +#if 0 + // TODO(nhasabni): need to implement + for (size_t j = 0; j < mkl_shape.GetDimension(); j++) { + // Outermost to innermost dimension + output_shape.AddDim(mkl_shape.GetSizes()[mkl_shape.tf_dim_idx(j)]); + } + + // Allocate output tensor. + context->allocate_temp(DataTypeToEnum::v(), output_shape, &output_tensor); + + dnnLayout_t output_layout = static_cast(mkl_shape.GetTfLayout()); + void* input_buffer = const_cast(mkl_tensor.flat().data()); + void* output_buffer = const_cast(output_tensor.flat().data()); + + if (mkl_tensor.NumElements() != 0) { + mkl_shape.GetConvertedFlatData(output_layout, input_buffer, output_buffer); + } +#endif + + return output_tensor; +} +#endif + // Get the MKL shape from the second string tensor inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) { mklshape->DeSerializeMklShape( @@ -359,6 +659,20 @@ inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) { sizeof(uint8)); } +#ifdef INTEL_MKL_DNN +inline void GetMklShape(OpKernelContext* ctext, int n, + MklDnnShape* mklshape) { + mklshape->DeSerializeMklDnnShape( + ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs())) + .flat() + .data(), + ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs())) + .flat() + .size() * + sizeof(uint8)); +} +#endif + // Gets the actual input inline const Tensor& MklGetInput(OpKernelContext* ctext, int n) { return ctext->input(GetTensorDataIndex(n, ctext->num_inputs())); @@ -382,6 +696,27 @@ inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name, } } +#ifdef INTEL_MKL_DNN +/// Get shape of input tensor pointed by 'input_idx' in TensorShape format. +/// If the input tensor is in MKL layout, then obtains TensorShape from +/// MklShape. +inline TensorShape GetTfShape(OpKernelContext* context, + size_t input_idx) { + // Sanity check. + CHECK_NOTNULL(context); + CHECK_LT(input_idx, context->num_inputs()); + + MklDnnShape input_mkl_shape; + GetMklShape(context, input_idx, &input_mkl_shape); + if (input_mkl_shape.IsMklTensor()) { + return input_mkl_shape.GetTfShape(); + } else { + const Tensor& t = MklGetInput(context, input_idx); + return t.shape(); + } +} +#endif + // Allocate the second output tensor that will contain // the MKL shape serialized inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, @@ -397,6 +732,23 @@ inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, second_tensor->flat().size() * sizeof(uint8)); } +#ifdef INTEL_MKL_DNN +// Allocate the second output tensor that will contain +// the MKL shape serialized +inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, + const MklDnnShape& mkl_shape) { + Tensor* second_tensor = nullptr; + TensorShape second_shape; + second_shape.AddDim(mkl_shape.GetSerializeBufferSize()); + OP_REQUIRES_OK(ctext, ctext->allocate_output( + GetTensorMetaDataIndex(n, ctext->num_outputs()), + second_shape, &second_tensor)); + mkl_shape.SerializeMklDnnShape( + second_tensor->flat().data(), + second_tensor->flat().size() * sizeof(uint8)); +} +#endif + // Allocate the output tensor, create a second output tensor that will contain // the MKL shape serialized inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, @@ -417,9 +769,43 @@ inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, second_tensor->flat().size() * sizeof(uint8)); } +#ifdef INTEL_MKL_DNN +// Allocate the output tensor, create a second output tensor that will contain +// the MKL shape serialized +inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, + Tensor** output, + const TensorShape& tf_shape, + const MklDnnShape& mkl_shape) { + Tensor* second_tensor = nullptr; + TensorShape second_shape; + second_shape.AddDim(mkl_shape.GetSerializeBufferSize()); + OP_REQUIRES_OK( + ctext, ctext->allocate_output(GetTensorDataIndex(n, ctext->num_outputs()), + tf_shape, output)); + OP_REQUIRES_OK(ctext, ctext->allocate_output( + GetTensorMetaDataIndex(n, ctext->num_outputs()), + second_shape, &second_tensor)); + mkl_shape.SerializeMklDnnShape( + second_tensor->flat().data(), + second_tensor->flat().size() * sizeof(uint8)); +} +#endif + // Allocates a temp tensor and returns the data buffer for temporary storage. // Currently -// we only support F32, will need to templatize if other types are added +#ifdef INTEL_MKL_DNN +template +inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, + const memory::primitive_desc& pd, void** buf_out) { + TensorShape tf_shape; + + tf_shape.AddDim(pd.get_size() / sizeof(T) + 1); + OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::v(), + tf_shape, tensor_out)); + *buf_out = static_cast(tensor_out->flat().data()); +} +#endif + inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, dnnLayout_t lt_buff, void** buf_out) { TensorShape tf_shape; @@ -435,7 +821,7 @@ inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, template inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, - TensorShape tf_shape) { + TensorShape tf_shape) { OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::v(), tf_shape, tensor_out)); } @@ -669,6 +1055,8 @@ inline bool MklCompareShapes(const TensorShape* input_shape_0, return true; } +// These functions do not compile with MKL-DNN since mkl.h is missing. +// We may need to remove them later. // TODO(intel_tf): Remove this routine when faster MKL layout conversion is // out. inline void MklNHWCToNCHW(const Tensor& input, Tensor** output) { @@ -707,18 +1095,11 @@ inline void MklNCHWToNHWC(const Tensor& input, Tensor** output) { #ifdef INTEL_MKL_DNN -using mkldnn::engine; -using mkldnn::memory; -using mkldnn::padding_kind; -using mkldnn::primitive; -using mkldnn::reorder; - /// Return MKL-DNN data type (memory::data_type) for input type T /// /// @input None /// @return memory::data_type corresponding to type T -template -static memory::data_type MklDnnType(); +template static memory::data_type MklDnnType(); /// Instantiation for float type. Add similar instantiations for other /// type if needed. @@ -733,15 +1114,26 @@ memory::data_type MklDnnType() { /// @return: memory::format corresponding to TensorFlow data format; /// Fails with an error if invalid data format. inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) { - if (format == FORMAT_NHWC) - return memory::format::nhwc; - else if (format == FORMAT_NCHW) - return memory::format::nchw; - TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format")); + if (format == FORMAT_NHWC) return memory::format::nhwc; + else if (format == FORMAT_NCHW) return memory::format::nchw; + TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, + "Unsupported data format")); // Return to get rid of compiler warning return memory::format::format_undef; } +/// Map MKL-DNN data format to TensorFlow's data format +/// +/// @input: memory::format +/// @return: Tensorflow data format corresponding to memory::format +/// Fails with an error if invalid data format. +inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) { + if (format == memory::format::nhwc) return FORMAT_NHWC; + else if (format == memory::format::nchw) return FORMAT_NCHW; + TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, + "Unsupported data format")); +} + /// Map TensorShape object into memory::dims required by MKL-DNN /// /// This function will simply map input TensorShape into MKL-DNN dims @@ -753,7 +1145,7 @@ inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) { /// @return memory::dims corresponding to TensorShape inline memory::dims TFShapeToMklDnnDims(const TensorShape& shape) { memory::dims dims(shape.dims()); - for (unsigned int d = 0; d < shape.dims(); ++d) { + for (int d = 0; d < shape.dims(); ++d) { dims[d] = shape.dim_size(d); } return dims; @@ -769,7 +1161,7 @@ inline memory::dims TFShapeToMklDnnDims(const TensorShape& shape) { /// @input TensorShape object in shape /// @return memory::dims in MKL-DNN required NCHW format inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape, - TensorFormat format) { + TensorFormat format) { // Check validity of format. CHECK_NE(TFDataFormatToMklDnnDataFormat(format), memory::format::format_undef); @@ -783,6 +1175,43 @@ inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape, return memory::dims({n, c, h, w}); } +/// Map MklDnn memory::dims object into TensorShape object. +/// +/// This function will simply map input shape in MKL-DNN memory::dims format +/// in Tensorflow's TensorShape object by perserving dimension order. +/// +/// @input MKL-DNN memory::dims object +/// @output TensorShape corresponding to memory::dims +inline TensorShape MklDnnDimsToTFShape(const memory::dims& dims) { + std::vector shape(dims.size(), -1); + for (int d = 0; d < dims.size(); d++) { + shape[d] = dims[d]; + } + + TensorShape ret; + CHECK_EQ(TensorShapeUtils::MakeShape(shape, &ret).ok(), true); + return ret; +} + +/// Function to calculate strides given tensor shape in Tensorflow order +/// E.g., if dims_tf_order is {1, 2, 3, 4}, then as per Tensorflow convention, +/// dimesion with size 1 is outermost dimension; while dimension with size 4 is +/// innermost dimension. So strides for this tensor would be {4 * 3 * 2, +/// 4 * 3, 4, 1}, i.e., {24, 12, 4, 1}. +/// +/// @input Tensorflow shape in memory::dims type +/// @return memory::dims containing strides for the tensor. +inline memory::dims CalculateTFStrides(const memory::dims& dims_tf_order) { + CHECK_GT(dims_tf_order.size(), 0); + memory::dims strides(dims_tf_order.size()); + int last_dim_idx = dims_tf_order.size() - 1; + strides[last_dim_idx] = 1; + for (int d = last_dim_idx - 1; d >= 0; d--) { + strides[d] = strides[d + 1] * dims_tf_order[d + 1]; + } + return strides; +} + inline padding_kind TFPaddingToMklDnnPadding(Padding pad) { // MKL-DNN only supports zero padding. return padding_kind::zero; @@ -808,23 +1237,21 @@ class MklDnnData { const engine* cpu_engine_; public: - explicit MklDnnData(const engine* e) - : user_memory_(nullptr), - reorder_memory_(nullptr), - op_md_(nullptr), - cpu_engine_(e) {} + explicit MklDnnData(const engine* e) : user_memory_(nullptr), + reorder_memory_(nullptr), + op_md_(nullptr), cpu_engine_(e) {} ~MklDnnData() { cpu_engine_ = nullptr; // We don't own this. - delete (user_memory_); - delete (reorder_memory_); - delete (op_md_); + delete(user_memory_); + delete(reorder_memory_); + delete(op_md_); } - void* GetTensorBuffer(const Tensor* tensor) { + inline void* GetTensorBuffer(const Tensor* tensor) const { CHECK_NOTNULL(tensor); - return const_cast( - static_cast(tensor->flat().data())); + return const_cast(static_cast( + tensor->flat().data())); } /// Set user memory primitive using specified dimensions, memory format and @@ -835,35 +1262,83 @@ class MklDnnData { /// an operation. E.g., filter of Conv2D is of shape {1, 2, 3, 4}, and /// memory format HWIO, and the buffer that contains actual values is /// pointed by data_buffer. - void SetUsrMem(memory::dims dim, memory::format fm, void* data_buffer) { - CHECK_NOTNULL(data_buffer); - CHECK_NOTNULL(cpu_engine_); - // TODO(nhasabni): can we remove dynamic memory allocation? - user_memory_ = - new memory(memory::primitive_desc( - memory::desc(dim, MklDnnType(), fm), *cpu_engine_), - data_buffer); + inline void SetUsrMem(const memory::dims& dim, memory::format fm, + void* data_buffer = nullptr) { + auto md = memory::desc(dim, MklDnnType(), fm); + SetUsrMem(md, data_buffer); } - void SetUsrMem(memory::dims dim, memory::format fm, const Tensor* tensor) { + inline void SetUsrMem(const memory::dims& dim, memory::format fm, + const Tensor* tensor) { CHECK_NOTNULL(tensor); SetUsrMem(dim, fm, GetTensorBuffer(tensor)); } + /// Helper function to create memory descriptor in Blocked format + /// + /// @input: Tensor dimensions + /// @input: strides corresponding to dimensions. One can use utility + /// function such as CalculateTFStrides to compute strides + /// for given dimensions. + /// @return: memory::desc object corresponding to blocked memory format + /// for given dimensions and strides. + static inline memory::desc CreateBlockedMemDesc(const memory::dims& dim, + const memory::dims& strides) { + CHECK_EQ(dim.size(), strides.size()); + + // We have to construct memory descriptor in a C style. This is not at all + // ideal but MKLDNN does not offer any API to construct descriptor in + // blocked format except a copy constructor that accepts + // mkldnn_memory_desc_t. + mkldnn_memory_desc_t md; + md.primitive_kind = mkldnn_memory; + md.ndims = dim.size(); + md.format = mkldnn_blocked; + md.data_type = memory::convert_to_c(MklDnnType()); + + for (size_t i = 0; i < dim.size(); i++) { + md.layout_desc.blocking.block_dims[i] = 1; + md.layout_desc.blocking.strides[1][i] = 1; + md.layout_desc.blocking.strides[0][i] = strides[i]; + md.layout_desc.blocking.padding_dims[i] = dim[i]; + md.layout_desc.blocking.offset_padding_to_data[i] = 0; + md.dims[i] = dim[i]; + } + md.layout_desc.blocking.offset_padding = 0; + + return memory::desc(md); + } + + /// A version of SetUsrMem call that allows user to create memory in blocked + /// format. So in addition to accepting dimensions, it also accepts strides. + /// This allows user to create memory for tensor in a format that is not + /// supported by MKLDNN. E.g., MKLDNN does not support tensor format for 6 + /// dimensional tensor as a native format. But by using blocked format, a user + /// can create memory for 6D tensor. + inline void SetUsrMem(const memory::dims& dim, const memory::dims& strides, + void* data_buffer = nullptr) { + CHECK_EQ(dim.size(), strides.size()); + auto blocked_md = MklDnnData::CreateBlockedMemDesc(dim, strides); + SetUsrMem(blocked_md, data_buffer); + } + + inline void SetUsrMem(const memory::dims& dim, const memory::dims& strides, + const Tensor* tensor) { + CHECK_NOTNULL(tensor); + SetUsrMem(dim, strides, GetTensorBuffer(tensor)); + } + /// A version of function to set user memory primitive that accepts memory /// descriptor directly, instead of accepting dimensions and format. This /// function is more generic that the one above, but the function above is /// sufficient in most cases. - void SetUsrMem(memory::desc md, void* data_buffer) { - CHECK_NOTNULL(data_buffer); - CHECK_NOTNULL(cpu_engine_); - // TODO(nhasabni): can we remove dynamic memory allocation? - user_memory_ = - new memory(memory::primitive_desc(md, *cpu_engine_), data_buffer); + inline void SetUsrMem(const memory::desc& md, void* data_buffer = nullptr) { + auto pd = memory::primitive_desc(md, *cpu_engine_); + SetUsrMem(pd, data_buffer); } /// A version of SetUsrMem with memory descriptor and tensor - void SetUsrMem(memory::desc md, const Tensor* tensor) { + inline void SetUsrMem(const memory::desc& md, const Tensor* tensor) { CHECK_NOTNULL(tensor); SetUsrMem(md, GetTensorBuffer(tensor)); } @@ -872,41 +1347,60 @@ class MklDnnData { /// descriptor directly, instead of accepting dimensions and format. This /// function is more generic that the one above, but the function above is /// sufficient in most cases. - void SetUsrMem(memory::primitive_desc pd, void* data_buffer) { - CHECK_NOTNULL(data_buffer); + inline void SetUsrMem(const memory::primitive_desc& pd, + void* data_buffer = nullptr) { CHECK_NOTNULL(cpu_engine_); // TODO(nhasabni): can we remove dynamic memory allocation? - user_memory_ = new memory(pd, data_buffer); + if (data_buffer) { + user_memory_ = new memory(pd, data_buffer); + } else { + user_memory_ = new memory(pd); + } } /// A version of SetUsrMem with primitive descriptor and tensor - void SetUsrMem(memory::primitive_desc pd, const Tensor* tensor) { + inline void SetUsrMem(const memory::primitive_desc& pd, + const Tensor* tensor) { CHECK_NOTNULL(tensor); SetUsrMem(pd, GetTensorBuffer(tensor)); } /// Get function for user memory primitive. - const memory* GetUsrMem() const { return user_memory_; } + inline const memory* GetUsrMem() const { return user_memory_; } /// Get function for primitive descriptor of user memory primitive. - const memory::primitive_desc GetUsrMemPrimDesc() const { + inline const memory::primitive_desc GetUsrMemPrimDesc() const { CHECK_NOTNULL(user_memory_); return user_memory_->get_primitive_desc(); } /// Get function for descriptor of user memory. - memory::desc GetUsrMemDesc() { + inline memory::desc GetUsrMemDesc() { // This is ugly. Why MKL-DNN does not provide desc() method of const type?? const memory::primitive_desc pd = GetUsrMemPrimDesc(); return const_cast(&pd)->desc(); } /// Get function for data buffer of user memory primitive. - void* GetUsrMemDataHandle() const { + inline void* GetUsrMemDataHandle() const { CHECK_NOTNULL(user_memory_); return user_memory_->get_data_handle(); } + /// Set function for data buffer of user memory primitive. + inline void* SetUsrMemDataHandle(void* data_buffer) { + CHECK_NOTNULL(user_memory_); + CHECK_NOTNULL(data_buffer); + return user_memory_->set_data_handle(data_buffer); + } + + /// Set function for data buffer of user memory primitive. + inline void SetUsrMemDataHandle(const Tensor* tensor) { + CHECK_NOTNULL(user_memory_); + CHECK_NOTNULL(tensor); + user_memory_->set_data_handle(GetTensorBuffer(tensor)); + } + /// Get the memory primitive for input and output of an op. If inputs /// to an op require reorders, then this function returns memory primitive /// for reorder. Otherwise, it will return memory primitive for user memory. @@ -915,7 +1409,7 @@ class MklDnnData { /// execute Conv2D, we need memory primitive for I and F. Buf if reorder is /// required for I and F (say I_r is reorder primitive for I; F_r is reorder /// primitive for F), then we need I_r and F_r to perform Conv2D. - const memory& GetOpMem() const { + inline const memory& GetOpMem() const { return reorder_memory_ ? *reorder_memory_ : *user_memory_; } @@ -923,13 +1417,32 @@ class MklDnnData { /// format. E.g., For Conv2D, the dimensions would be same as user dimensions /// but memory::format would be mkldnn::any because we want MKL-DNN to choose /// best layout/format for given input dimensions. - void SetOpMemDesc(const memory::dims& dim, memory::format fm) { + inline void SetOpMemDesc(const memory::dims& dim, memory::format fm) { // TODO(nhasabni): can we remove dynamic memory allocation? op_md_ = new memory::desc(dim, MklDnnType(), fm); } /// Get function for memory descriptor for an operation - const memory::desc& GetOpMemDesc() const { return *op_md_; } + inline const memory::desc& GetOpMemDesc() const { return *op_md_; } + + /// Predicate that checks if we need to reorder user's memory into memory + /// pointed by op_pd. + /// + /// @input: op_pd - memory primitive descriptor of the given input of an + /// operation + /// @return: true in case reorder of input is needed; false, otherwise. + inline bool IsReorderNeeded(const memory::primitive_desc& op_pd) const { + CHECK_NOTNULL(user_memory_); + return op_pd != user_memory_->get_primitive_desc(); + } + + /// Function to create a reorder from memory pointed by from to memory pointed + /// by to. Returns created primitive. + inline primitive CreateReorder(const memory* from, const memory* to) const { + CHECK_NOTNULL(from); + CHECK_NOTNULL(to); + return reorder(*from, *to); + } /// Function to handle input reordering /// @@ -945,19 +1458,62 @@ class MklDnnData { /// operation /// @input: net - net to which to add reorder primitive in case it is needed. /// @return: true in case reorder of input is needed; false, otherwise. - bool CheckReorderToOpMem(const memory::primitive_desc& op_pd, - std::vector* net) { + inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd, + std::vector* net) { CHECK_NOTNULL(net); CHECK_NOTNULL(user_memory_); - if (op_pd != user_memory_->get_primitive_desc()) { + if (IsReorderNeeded(op_pd)) { // TODO(nhasabni): can we remove dynamic memory allocation? reorder_memory_ = new memory(op_pd); - net->push_back(reorder(*user_memory_, *reorder_memory_)); + net->push_back(CreateReorder(user_memory_, reorder_memory_)); + return true; + } + return false; + } + + /// Overloaded version of above function that accepts memory buffer + /// where output of reorder needs to be stored. + /// + /// @input: op_pd - memory primitive descriptor of the given input of an + /// operation + /// @reorder_data_handle - memory buffer where output of reorder needs to be + /// stored. Primitive does not check if buffer is + /// enough size to write. + /// @input: net - net to which to add reorder primitive in case it is needed. + /// @return: true in case reorder of input is needed; false, otherwise. + inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd, + void* reorder_data_handle, + std::vector* net) { + CHECK_NOTNULL(net); + CHECK_NOTNULL(reorder_data_handle); + CHECK_NOTNULL(user_memory_); + if (IsReorderNeeded(op_pd)) { + // TODO(nhasabni): can we remove dynamic memory allocation? + reorder_memory_ = new memory(op_pd, reorder_data_handle); + net->push_back(CreateReorder(user_memory_, reorder_memory_)); return true; } return false; } + /// Another overloaded version of CheckReorderToOpMem that accepts Tensor + /// where output of reorder needs to be stored. + /// + /// @input: op_pd - memory primitive descriptor of the given input of an + /// operation + /// @reorder_tensor - Tensor whose buffer is to be used to store output of + /// reorder. Primitive does not check if buffer is + /// enough size to write. + /// @input: net - net to which to add reorder primitive in case it is needed. + /// @return: true in case reorder of input is needed; false, otherwise. + inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd, + Tensor* reorder_tensor, + std::vector* net) { + CHECK_NOTNULL(net); + CHECK_NOTNULL(reorder_tensor); + return CheckReorderToOpMem(op_pd, GetTensorBuffer(reorder_tensor), net); + } + /// Function to handle output reorder /// /// This function performs very similar functionality as input reordering @@ -970,9 +1526,10 @@ class MklDnnData { /// /// @input memory primitive descriptor for the given output of an operation /// @return: true in case reorder of output is needed; false, otherwise. - bool PrepareReorderToUserMemIfReq(const memory::primitive_desc& op_pd) { + inline bool PrepareReorderToUserMemIfReq( + const memory::primitive_desc& op_pd) { CHECK_NOTNULL(user_memory_); - if (op_pd != user_memory_->get_primitive_desc()) { + if (IsReorderNeeded(op_pd)) { // TODO(nhasabni): can we remove dynamic memory allocation? reorder_memory_ = new memory(op_pd); return true; @@ -987,11 +1544,11 @@ class MklDnnData { /// to the user-specified output buffer. /// /// @input: net - net to which to add reorder primitive - void InsertReorderToUserMem(std::vector* net) { + inline void InsertReorderToUserMem(std::vector* net) { CHECK_NOTNULL(net); CHECK_NOTNULL(user_memory_); CHECK_NOTNULL(reorder_memory_); - net->push_back(reorder(*reorder_memory_, *user_memory_)); + net->push_back(CreateReorder(reorder_memory_, user_memory_)); } }; diff --git a/tensorflow/core/util/mkl_util_test.cc b/tensorflow/core/util/mkl_util_test.cc new file mode 100644 index 0000000000..6aef3d86e9 --- /dev/null +++ b/tensorflow/core/util/mkl_util_test.cc @@ -0,0 +1,92 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifdef INTEL_MKL + +#include "tensorflow/core/util/mkl_util.h" + +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { + +#ifdef INTEL_MKL_DNN + +TEST(MklUtilTest, MklDnnTfShape) { + auto cpu_engine = engine(engine::cpu, 0); + MklDnnData a(&cpu_engine); + + const int N = 1, C = 2, H = 3, W = 4; + memory::dims a_dims = {N, C, H, W}; + MklDnnShape a_mkldnn_shape; + a_mkldnn_shape.SetMklTensor(true); + // Create TF layout in NCHW. + a_mkldnn_shape.SetTfLayout(a_dims.size(), a_dims, memory::format::nchw); + TensorShape a_tf_shape_nchw({N, C, H, W}); + TensorShape a_tf_shape_nhwc({N, H, W, C}); + TensorShape a_mkldnn_tf_shape = a_mkldnn_shape.GetTfShape(); + // Check that returned shape is in NCHW format. + EXPECT_EQ(a_tf_shape_nchw, a_mkldnn_tf_shape); + EXPECT_NE(a_tf_shape_nhwc, a_mkldnn_tf_shape); + + memory::dims b_dims = {N, C, H, W}; + MklDnnShape b_mkldnn_shape; + b_mkldnn_shape.SetMklTensor(true); + // Create TF layout in NHWC. + b_mkldnn_shape.SetTfLayout(b_dims.size(), b_dims, memory::format::nhwc); + TensorShape b_tf_shape_nhwc({N, H, W, C}); + TensorShape b_tf_shape_nchw({N, C, H, W}); + TensorShape b_mkldnn_tf_shape = b_mkldnn_shape.GetTfShape(); + // Check that returned shape is in NHWC format. + EXPECT_EQ(b_tf_shape_nhwc, b_mkldnn_tf_shape); + EXPECT_NE(b_tf_shape_nchw, b_mkldnn_tf_shape); +} + + +TEST(MklUtilTest, MklDnnBlockedFormatTest) { + // Let's create 2D tensor of shape {3, 4} with 3 being innermost dimension + // first (case 1) and then it being outermost dimension (case 2). + auto cpu_engine = engine(engine::cpu, 0); + + // Setting for case 1 + MklDnnData a(&cpu_engine); + memory::dims dim1 = {3, 4}; + memory::dims strides1 = {1, 3}; + a.SetUsrMem(dim1, strides1); + + memory::desc a_md1 = a.GetUsrMemDesc(); + EXPECT_EQ(a_md1.data.ndims, 2); + EXPECT_EQ(a_md1.data.dims[0], 3); + EXPECT_EQ(a_md1.data.dims[1], 4); + EXPECT_EQ(a_md1.data.format, mkldnn_blocked); + + // Setting for case 2 + MklDnnData b(&cpu_engine); + memory::dims dim2 = {3, 4}; + memory::dims strides2 = {4, 1}; + b.SetUsrMem(dim2, strides2); + + memory::desc b_md2 = b.GetUsrMemDesc(); + EXPECT_EQ(b_md2.data.ndims, 2); + EXPECT_EQ(b_md2.data.dims[0], 3); + EXPECT_EQ(b_md2.data.dims[1], 4); + EXPECT_EQ(b_md2.data.format, mkldnn_blocked); +} + +#endif // INTEL_MKL_DNN +} // namespace +} // namespace tensorflow + +#endif // INTEL_MKL diff --git a/tensorflow/docs_src/api_guides/python/threading_and_queues.md b/tensorflow/docs_src/api_guides/python/threading_and_queues.md index ab95ce0af9..8ad4c4c075 100644 --- a/tensorflow/docs_src/api_guides/python/threading_and_queues.md +++ b/tensorflow/docs_src/api_guides/python/threading_and_queues.md @@ -3,7 +3,7 @@ Note: In versions of TensorFlow before 1.2, we recommended using multi-threaded, queue-based input pipelines for performance. Beginning with TensorFlow 1.4, however, we recommend using the `tf.data` module instead. (See -[Datasets](datasets) for details. In TensorFlow 1.2 and 1.3, the module was +@{$datasets$Datasets} for details. In TensorFlow 1.2 and 1.3, the module was called `tf.contrib.data`.) The `tf.data` module offers an easier-to-use interface for constructing efficient input pipelines. Furthermore, we've stopped developing the old multi-threaded, queue-based input pipelines. We've retained diff --git a/tensorflow/docs_src/get_started/get_started.md b/tensorflow/docs_src/get_started/get_started.md index 8409962744..be14ab4026 100644 --- a/tensorflow/docs_src/get_started/get_started.md +++ b/tensorflow/docs_src/get_started/get_started.md @@ -272,7 +272,7 @@ train = optimizer.minimize(loss) ``` ```python -sess.run(init) # reset values to incorrect defaults. +sess.run(init) # reset variables to incorrect defaults. for i in range(1000): sess.run(train, {x: [1, 2, 3, 4], y: [0, -1, -2, -3]}) @@ -317,7 +317,7 @@ y_train = [0, -1, -2, -3] # training loop init = tf.global_variables_initializer() sess = tf.Session() -sess.run(init) # reset values to wrong +sess.run(init) # initialize variables with incorrect defaults. for i in range(1000): sess.run(train, {x: x_train, y: y_train}) @@ -383,7 +383,7 @@ train_input_fn = tf.estimator.inputs.numpy_input_fn( eval_input_fn = tf.estimator.inputs.numpy_input_fn( {"x": x_eval}, y_eval, batch_size=4, num_epochs=1000, shuffle=False) -# We can invoke 1000 training steps by invoking the method and passing the +# We can invoke 1000 training steps by invoking the method and passing the # training data set. estimator.train(input_fn=input_fn, steps=1000) diff --git a/tensorflow/docs_src/get_started/input_fn.md b/tensorflow/docs_src/get_started/input_fn.md index 9d3af5d96a..0db5c6143a 100644 --- a/tensorflow/docs_src/get_started/input_fn.md +++ b/tensorflow/docs_src/get_started/input_fn.md @@ -191,7 +191,7 @@ import pandas as pd def get_input_fn_from_pandas(data_set, num_epochs=None, shuffle=True): return tf.estimator.inputs.pandas_input_fn( - x=pdDataFrame(...), + x=pd.DataFrame(...), y=pd.Series(...), num_epochs=num_epochs, shuffle=shuffle) @@ -267,8 +267,8 @@ tf.logging.set_verbosity(tf.logging.INFO) Define the column names for the data set in `COLUMNS`. To distinguish features from the label, also define `FEATURES` and `LABEL`. Then read the three CSVs -(@{tf.train}, -@{tf.test}, and +([train](http://download.tensorflow.org/data/boston_train.csv), +[test](http://download.tensorflow.org/data/boston_test.csv), and [predict](http://download.tensorflow.org/data/boston_predict.csv)) into _pandas_ `DataFrame`s: diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 3a153e8114..df622c6ac5 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.4.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.4.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index df43255896..8b3da49a0d 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.4.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.4.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index f7f2c3cdc7..6eb8158249 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.4.0-rc1 + 1.4.0 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.4.0-rc1 + 1.4.0 @@ -124,7 +124,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0-rc1.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -143,7 +143,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.4.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.4.0.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -151,10 +151,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0-rc1.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.4.0-rc1.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.4.0.zip). 3. Extract this .zip file. @@ -202,7 +202,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
    javac -cp libtensorflow-1.4.0-rc1.jar HelloTF.java
    +
    javac -cp libtensorflow-1.4.0.jar HelloTF.java
    ### Running @@ -216,11 +216,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
    java -cp libtensorflow-1.4.0-rc1.jar:. -Djava.library.path=./jni HelloTF
    +
    java -cp libtensorflow-1.4.0.jar:. -Djava.library.path=./jni HelloTF
    And the following command line executes the `HelloTF` program on Windows: -
    java -cp libtensorflow-1.4.0-rc1.jar;. -Djava.library.path=jni HelloTF
    +
    java -cp libtensorflow-1.4.0.jar;. -Djava.library.path=jni HelloTF
    If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 414ab7b1f7..f7380bac8a 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -188,7 +188,7 @@ Take the following steps to install TensorFlow with Virtualenv: Virtualenv environment:
    (tensorflow)$ pip3 install --upgrade \
    -     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
    + https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl If you encounter installation problems, see [Common Installation Problems](#common_installation_problems). @@ -293,7 +293,7 @@ take the following steps:
          $ sudo pip3 install --upgrade \
    -     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
    +     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl
          
    If this step fails, see @@ -480,7 +480,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
          (tensorflow)$ pip install --ignore-installed --upgrade \
    -     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
    + https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl @@ -648,14 +648,14 @@ This section documents the relevant values for Linux installations. CPU only:
    -https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp27-none-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp27-none-linux_x86_64.whl
     
    GPU support:
    -https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp27-none-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp27-none-linux_x86_64.whl
     
    Note that GPU support requires the NVIDIA hardware and software described in @@ -667,14 +667,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
    -https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl
     
    GPU support:
    -https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp34-cp34m-linux_x86_64.whl
     
    Note that GPU support requires the NVIDIA hardware and software described in @@ -686,14 +686,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
    -https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp35-cp35m-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp35-cp35m-linux_x86_64.whl
     
    GPU support:
    -https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp35-cp35m-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp35-cp35m-linux_x86_64.whl
     
    @@ -705,14 +705,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
    -https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp36-cp36m-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp36-cp36m-linux_x86_64.whl
     
    GPU support:
    -https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp36-cp36m-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp36-cp36m-linux_x86_64.whl
     
    diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 9a95710bfa..79b383817b 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -114,7 +114,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
     $ pip3 install --upgrade \
    -     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl
    + https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -235,7 +235,7 @@ take the following steps: issue the following command:
     $ sudo pip3 install --upgrade \
    -     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl 
    + https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -344,7 +344,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
     (tensorflow)$ pip install --ignore-installed --upgrade \
    -     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl
    + https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl @@ -517,7 +517,7 @@ This section documents the relevant values for Mac OS installations.
    -https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl
    +https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl
     
    @@ -525,7 +525,7 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-a
    -https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py3-none-any.whl
    +https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py3-none-any.whl
     
    diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 6d0dcdcd4a..aa4ae6c876 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -355,10 +355,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.4.0rc1 on Linux: +for TensorFlow 1.4.0 on Linux:
    -$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.4.0rc1-py2-none-any.whl
    +$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.4.0-py2-none-any.whl
     
    ## Validate your installation @@ -447,8 +447,10 @@ Stack Overflow and specify the `tensorflow` tag. **Linux** - - + + + + @@ -460,7 +462,8 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
    Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
    tensorflow-1.4.0rc1CPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.5N/AN/A
    tensorflow_gpu-1.4.0rc1GPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.568
    tensorflow-1.4.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.5.4N/AN/A
    tensorflow_gpu-1.4.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.5.468
    tensorflow-1.3.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.5N/AN/A
    tensorflow_gpu-1.3.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.568
    tensorflow-1.2.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.5N/AN/A
    tensorflow_gpu-1.2.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.55.18
    tensorflow-1.1.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.2N/AN/A
    - + + @@ -471,8 +474,10 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
    Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
    tensorflow-1.4.0rc1CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.5N/AN/A
    tensorflow-1.4.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.5.4N/AN/A
    tensorflow-1.3.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.5N/AN/A
    tensorflow-1.2.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.5N/AN/A
    tensorflow-1.1.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.2N/AN/A
    tensorflow_gpu-1.1.0GPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.25.18
    - - + + + + diff --git a/tensorflow/docs_src/mobile/prepare_models.md b/tensorflow/docs_src/mobile/prepare_models.md index c5a560e074..8fc65be35a 100644 --- a/tensorflow/docs_src/mobile/prepare_models.md +++ b/tensorflow/docs_src/mobile/prepare_models.md @@ -296,6 +296,6 @@ complains about missing header files, add the .h’s that are needed into the [`android_extended_ops`](https://www.tensorflow.org/code/tensorflow/core/kernels/BUILD#L3525) target. -If you’re using a makefile targetting iOS, Raspberry Pi, etc, go to +If you’re using a makefile targeting iOS, Raspberry Pi, etc, go to [`tensorflow/contrib/makefile/tf_op_files.txt`](https://www.tensorflow.org/code/tensorflow/contrib/makefile/tf_op_files.txt) and add the right implementation files there. diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md index 1f856bbf3f..25cb72008d 100644 --- a/tensorflow/docs_src/programmers_guide/debugger.md +++ b/tensorflow/docs_src/programmers_guide/debugger.md @@ -9,11 +9,19 @@ lets you view the internal structure and states of running TensorFlow graphs during training and inference, which is difficult to debug with general-purpose debuggers such as Python's `pdb` due to TensorFlow's computation-graph paradigm. -> NOTE: The system requirements of tfdbg on supported external platforms include -> the following. On Mac OS X, the `ncurses` library is required. It can be -> installed with `brew install homebrew/dupes/ncurses`. On Windows, `pyreadline` -> is required. If you use Anaconda3, you can install it with a command +> NOTE: TensorFlow debugger uses a +> [curses](https://en.wikipedia.org/wiki/Curses_\(programming_library\))-based +> text user interface. On Mac OS X, the `ncurses` library is required and can +> be installed with `brew install homebrew/dupes/ncurses`. On Windows, curses +> isn't as well supported, so a +> [readline](https://en.wikipedia.org/wiki/GNU_Readline)-based interface can +> be used with tfdbg by installing `pyreadline` with pip. +> If you use Anaconda3, you can install it with a command > such as `"C:\Program Files\Anaconda3\Scripts\pip.exe" install pyreadline`. +> Unofficial Windows curses packages can be downloaded +> [here](https://www.lfd.uci.edu/~gohlke/pythonlibs/#curses), then subsequently +> installed using `pip install .whl`, however curses on Windows +> may not work as reliably as curses on Linux or Mac. This tutorial demonstrates how to use the **tfdbg** command-line interface (CLI) to debug the appearance of [`nan`s](https://en.wikipedia.org/wiki/NaN) @@ -149,6 +157,7 @@ Try the following commands at the `tfdbg>` prompt (referencing the code at | | `pt [slicing]` | Print a subarray of tensor, using [numpy](http://www.numpy.org/)-style array slicing. | `pt hidden/Relu:0[0:50,:]` | | | `-a` | Print the entirety of a large tensor, without using ellipses. (May take a long time for large tensors.) | `pt -a hidden/Relu:0[0:50,:]` | | | `-r ` | Highlight elements falling into specified numerical range. Multiple ranges can be used in conjunction. | `pt hidden/Relu:0 -a -r [[-inf,-1],[1,inf]]` | +| | `-n ` | Print dump corresponding to specified 0-based dump number. Required for tensors with multiple dumps. | `pt -n 0 hidden/Relu:0` | | | `-s` | Include a summary of the numeric values of the tensor (applicable only to non-empty tensors with Boolean and numeric types such as `int*` and `float*`.) | `pt -s hidden/Relu:0[0:50,:]` | | **`@[coordinates]`** | | Navigate to specified element in `pt` output. | `@[10,0]` or `@10,0` | | **`/regex`** | | [less](https://linux.die.net/man/1/less)-style search for given regular expression. | `/inf` | @@ -166,10 +175,12 @@ Try the following commands at the `tfdbg>` prompt (referencing the code at | | `-r` | List the inputs to node, recursively (the input tree.) | `li -r hidden/Relu:0` | | | `-d ` | Limit recursion depth under the `-r` mode. | `li -r -d 3 hidden/Relu:0` | | | `-c` | Include control inputs. | `li -c -r hidden/Relu:0` | +| | `-t` | Show op types of input nodes. | `li -t -r hidden/Relu:0` | | **`lo`** | | **List output recipients of node** | | | | `-r` | List the output recipients of node, recursively (the output tree.) | `lo -r hidden/Relu:0` | | | `-d ` | Limit recursion depth under the `-r` mode. | `lo -r -d 3 hidden/Relu:0` | | | `-c` | Include recipients via control edges. | `lo -c -r hidden/Relu:0` | +| | `-t` | Show op types of recipient nodes. | `lo -t -r hidden/Relu:0` | | **`ls`** | | **List Python source files involved in node creation.** | | | | `-p ` | Limit output to source files matching given regular-expression path pattern. | `ls -p .*debug_mnist.*` | | | `-n` | Limit output to node names matching given regular-expression pattern. | `ls -n Softmax.*` | diff --git a/tensorflow/docs_src/programmers_guide/tensors.md b/tensorflow/docs_src/programmers_guide/tensors.md index d6f80430cd..88eb277e35 100644 --- a/tensorflow/docs_src/programmers_guide/tensors.md +++ b/tensorflow/docs_src/programmers_guide/tensors.md @@ -29,8 +29,8 @@ Some types of tensors are special, and these will be covered in other units of the Programmer's guide. The main ones are: * `tf.Variable` - * `tf.Constant` - * `tf.Placeholder` + * `tf.constant` + * `tf.placeholder` * `tf.SparseTensor` With the exception of `tf.Variable`, the value of a tensor is immutable, which @@ -64,7 +64,7 @@ The following snippet demonstrates creating a few rank 0 variables: mammal = tf.Variable("Elephant", tf.string) ignition = tf.Variable(451, tf.int16) floating = tf.Variable(3.14159265359, tf.float64) -its_complicated = tf.Variable((12.3, -4.85), tf.complex64) +its_complicated = tf.Variable(12.3 - 4.85j, tf.complex64) ``` Note: A string is treated as a single item in TensorFlow, not as a sequence of @@ -79,7 +79,7 @@ initial value. For example: mystr = tf.Variable(["Hello"], tf.string) cool_numbers = tf.Variable([3.14159, 2.71828], tf.float32) first_primes = tf.Variable([2, 3, 5, 7, 11], tf.int32) -its_very_complicated = tf.Variable([(12.3, -4.85), (7.5, -6.23)], tf.complex64) +its_very_complicated = tf.Variable([12.3 - 4.85j, 7.5 - 6.23j], tf.complex64) ``` @@ -275,8 +275,8 @@ Graphs and Sessions for more information). Sometimes it is not possible to evaluate a `tf.Tensor` with no context because its value might depend on dynamic information that is not available. For -example, tensors that depend on `Placeholder`s can't be evaluated without -providing a value for the `Placeholder`. +example, tensors that depend on `placeholder`s can't be evaluated without +providing a value for the `placeholder`. ``` python p = tf.placeholder(tf.float32) diff --git a/tensorflow/examples/speech_commands/models.py b/tensorflow/examples/speech_commands/models.py index 82d6a94ea1..ab611f414a 100644 --- a/tensorflow/examples/speech_commands/models.py +++ b/tensorflow/examples/speech_commands/models.py @@ -326,7 +326,7 @@ def create_low_latency_conv_model(fingerprint_input, model_settings, first_filter_height = input_time_size first_filter_count = 186 first_filter_stride_x = 1 - first_filter_stride_y = 4 + first_filter_stride_y = 1 first_weights = tf.Variable( tf.truncated_normal( [first_filter_height, first_filter_width, 1, first_filter_count], diff --git a/tensorflow/go/android.go b/tensorflow/go/android.go new file mode 100644 index 0000000000..3db3ddfec5 --- /dev/null +++ b/tensorflow/go/android.go @@ -0,0 +1,20 @@ +// Copyright 2016 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build android + +package tensorflow + +// #cgo LDFLAGS: -landroid -llog -lm -lz -ldl +import "C" diff --git a/tensorflow/go/operation_test.go b/tensorflow/go/operation_test.go index 7cba043af2..40c951ab8c 100644 --- a/tensorflow/go/operation_test.go +++ b/tensorflow/go/operation_test.go @@ -123,6 +123,14 @@ func TestOutputDataTypeAndShape(t *testing.T) { []int64{2, 3}, Double, }, + { // Matrix of Uint64 + [][]uint64{ + {1, 2, 3}, + {4, 5, 6}, + }, + []int64{2, 3}, + Uint64, + }, } for idx, test := range testdata { t.Run(fmt.Sprintf("#%d Value %T", idx, test.Value), func(t *testing.T) { diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go index 36a74c0081..1326a95278 100644 --- a/tensorflow/go/tensor.go +++ b/tensorflow/go/tensor.go @@ -101,7 +101,7 @@ func NewTensor(value interface{}) (*Tensor, error) { return nil, bug("NewTensor incorrectly calculated the size of a tensor with type %v and shape %v as %v bytes instead of %v", dataType, shape, nbytes, buf.Len()) } } else { - e := stringEncoder{offsets: buf, data: raw[nflattened*8 : len(raw)], status: newStatus()} + e := stringEncoder{offsets: buf, data: raw[nflattened*8:], status: newStatus()} if err := e.encode(reflect.ValueOf(value), shape); err != nil { return nil, err } @@ -207,6 +207,9 @@ func (t *Tensor) WriteContentsTo(w io.Writer) (int64, error) { func tensorData(c *C.TF_Tensor) []byte { // See: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices cbytes := C.TF_TensorData(c) + if cbytes == nil { + return nil + } length := int(C.TF_TensorByteSize(c)) slice := (*[1 << 30]byte)(unsafe.Pointer(cbytes))[:length:length] return slice @@ -310,7 +313,7 @@ func encodeTensor(w *bytes.Buffer, v reflect.Value, shape []int64) error { if err := w.WriteByte(b); err != nil { return err } - case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: + case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: if err := binary.Write(w, nativeEndian, v.Interface()); err != nil { return err } @@ -349,7 +352,7 @@ func decodeTensor(r *bytes.Reader, shape []int64, typ reflect.Type, ptr reflect. return err } ptr.Elem().SetBool(b == 1) - case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: + case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: if err := binary.Read(r, nativeEndian, ptr.Interface()); err != nil { return err } diff --git a/tensorflow/go/tensor_test.go b/tensorflow/go/tensor_test.go index 35bd2fd9a5..674a8ce86f 100644 --- a/tensorflow/go/tensor_test.go +++ b/tensorflow/go/tensor_test.go @@ -34,11 +34,15 @@ func TestNewTensor(t *testing.T) { {nil, int64(5)}, {nil, uint8(5)}, {nil, uint16(5)}, + {nil, uint32(5)}, + {nil, uint64(5)}, {nil, float32(5)}, {nil, float64(5)}, {nil, complex(float32(5), float32(6))}, {nil, complex(float64(5), float64(6))}, {nil, "a string"}, + {[]int64{1}, []uint32{1}}, + {[]int64{1}, []uint64{1}}, {[]int64{2}, []bool{true, false}}, {[]int64{1}, []float64{1}}, {[]int64{1}, [1]float64{1}}, @@ -71,11 +75,6 @@ func TestNewTensor(t *testing.T) { // native ints not supported int(5), []int{5}, - // uint32 and uint64 are not supported in TensorFlow - uint32(5), - []uint32{5}, - uint64(5), - []uint64{5}, // Mismatched dimensions [][]float32{{1, 2, 3}, {4}}, // Mismatched dimensions. Should return "mismatched slice lengths" error instead of "BUG" diff --git a/tensorflow/java/src/main/java/org/tensorflow/Shape.java b/tensorflow/java/src/main/java/org/tensorflow/Shape.java index 9aa92be111..d533c3d480 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/Shape.java +++ b/tensorflow/java/src/main/java/org/tensorflow/Shape.java @@ -77,6 +77,24 @@ public final class Shape { return shape[i]; } + @Override + public int hashCode() { + return Arrays.hashCode(shape); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + + if (obj instanceof Shape && Arrays.equals(this.shape, ((Shape) obj).shape)) { + return !hasUnknownDimension(); + } + + return super.equals(obj); + } + /** Succinct description of the shape meant for debugging. */ @Override public String toString() { @@ -98,4 +116,18 @@ public final class Shape { } private long[] shape; + + private boolean hasUnknownDimension() { + if (shape == null) { + return true; + } + + for (long dimension : shape) { + if (dimension == -1) { + return true; + } + } + + return false; + } } diff --git a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java index 3b027700c5..92cc3bd60e 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java @@ -16,6 +16,7 @@ limitations under the License. package org.tensorflow; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; import org.junit.Test; import org.junit.runner.RunWith; @@ -77,4 +78,29 @@ public class ShapeTest { assertEquals(5, n.shape().size(1)); } } + + @Test + public void equalsWorksCorrectly() { + assertEquals(Shape.scalar(), Shape.scalar()); + assertEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 3)); + + assertNotEquals(Shape.make(1,2), null); + assertNotEquals(Shape.make(1,2), new Object()); + assertNotEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 4)); + + + assertNotEquals(Shape.unknown(), Shape.unknown()); + assertNotEquals(Shape.make(-1), Shape.make(-1)); + assertNotEquals(Shape.make(1, -1, 3), Shape.make(1, -1, 3)); + } + + @Test + public void hashCodeIsAsExpected() { + assertEquals(Shape.make(1, 2, 3, 4).hashCode(), Shape.make(1, 2, 3, 4).hashCode()); + assertEquals(Shape.scalar().hashCode(), Shape.scalar().hashCode()); + assertEquals(Shape.unknown().hashCode(), Shape.unknown().hashCode()); + + assertNotEquals(Shape.make(1, 2).hashCode(), Shape.make(1, 3).hashCode()); + } } + diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 5ae4aace16..54c43c1337 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -5,7 +5,10 @@ package( default_visibility = [ "//engedu/ml/tf_from_scratch:__pkg__", "//tensorflow:internal", + "//tensorflow/contrib/lite/toco/python:__pkg__", "//tensorflow_models:__subpackages__", + # TODO(aselle): to pass open source test. + "//bazel_pip/tensorflow/contrib/lite/toco/python:__pkg__", ], ) @@ -45,6 +48,7 @@ py_library( "//tensorflow/compiler/aot/tests:__pkg__", # TODO(b/34059704): remove when fixed "//tensorflow/contrib/learn:__pkg__", # TODO(b/34059704): remove when fixed "//tensorflow/contrib/learn/python/learn/datasets:__pkg__", # TODO(b/34059704): remove when fixed + "//tensorflow/contrib/lite/toco/python:__pkg__", # TODO(b/34059704): remove when fixed "//tensorflow/python/debug:__pkg__", # TODO(b/34059704): remove when fixed "//tensorflow/python/tools:__pkg__", # TODO(b/34059704): remove when fixed "//tensorflow/tools/api/generator:__pkg__", diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py index 62fea05867..fa5d02c476 100644 --- a/tensorflow/python/estimator/canned/head.py +++ b/tensorflow/python/estimator/canned/head.py @@ -117,7 +117,7 @@ class _Head(object): update_op = tf.contrib.layers.optimize_loss(optimizer=sync, loss=estimator_spec.loss, ...) hooks = [sync.make_session_run_hook(is_chief)] - ... upate train_op and hooks in EstimatorSpec and return + ... update train_op and hooks in EstimatorSpec and return ``` """ __metaclass__ = abc.ABCMeta diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py index c9f37f06e8..3512f66284 100644 --- a/tensorflow/python/estimator/inputs/numpy_io.py +++ b/tensorflow/python/estimator/inputs/numpy_io.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import collections +from six import string_types from tensorflow.python.estimator.inputs.queues import feeding_functions # Key name to pack the target into dict of `features`. See @@ -51,8 +52,9 @@ def numpy_input_fn(x, num_threads=1): """Returns input function that would feed dict of numpy arrays into the model. - This returns a function outputting `features` and `target` based on the dict - of numpy arrays. The dict `features` has the same keys as the `x`. + This returns a function outputting `features` and `targets` based on the dict + of numpy arrays. The dict `features` has the same keys as the `x`. The dict + `targets` has the same keys as the `y` if `y` is a dict. Example: @@ -69,7 +71,7 @@ def numpy_input_fn(x, Args: x: dict of numpy array object. - y: numpy array object. `None` if absent. + y: numpy array object or dict of numpy array object. `None` if absent. batch_size: Integer, size of batches to return. num_epochs: Integer, number of epochs to iterate over data. If `None` will run forever. @@ -81,11 +83,13 @@ def numpy_input_fn(x, such as in prediction and evaluation mode, `num_threads` should be 1. Returns: - Function, that has signature of ()->(dict of `features`, `target`) + Function, that has signature of ()->(dict of `features`, `targets`) Raises: ValueError: if the shape of `y` mismatches the shape of values in `x` (i.e., values in `x` have same shape). + ValueError: if duplicate keys are in both `x` and `y` when `y` is a dict. + ValueError: if x or y is an empty dict. TypeError: `x` is not a dict or `shuffle` is not bool. """ @@ -97,43 +101,76 @@ def numpy_input_fn(x, """Numpy input function.""" if not isinstance(x, dict): raise TypeError('x must be dict; got {}'.format(type(x).__name__)) + if not x: + raise ValueError('x cannot be empty') # Make a shadow copy and also ensure the order of iteration is consistent. - ordered_dict_x = collections.OrderedDict( + ordered_dict_data = collections.OrderedDict( sorted(x.items(), key=lambda t: t[0])) + # Deep copy keys which is a view in python 3 + feature_keys = list(ordered_dict_data.keys()) + + if y is None: + target_keys = None + elif isinstance(y, dict): + if not y: + raise ValueError('y cannot be empty dict, use None instead.') + + ordered_dict_y = collections.OrderedDict( + sorted(y.items(), key=lambda t: t[0])) + target_keys = list(ordered_dict_y.keys()) + + duplicate_keys = set(feature_keys).intersection(set(target_keys)) + if len(duplicate_keys): + raise ValueError('{} duplicate keys are found in both x and y: ' + '{}'.format(len(duplicate_keys), duplicate_keys)) + + ordered_dict_data.update(ordered_dict_y) + else: + target_keys = _get_unique_target_key(ordered_dict_data) + ordered_dict_data[target_keys] = y + + if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1: + shape_dict_of_x = {k: ordered_dict_data[k].shape + for k in feature_keys} + + if target_keys is None: + shape_of_y = None + elif isinstance(target_keys, string_types): + shape_of_y = y.shape + else: + shape_of_y = {k: ordered_dict_data[k].shape + for k in target_keys} - unique_target_key = _get_unique_target_key(ordered_dict_x) - if y is not None: - ordered_dict_x[unique_target_key] = y - - if len(set(v.shape[0] for v in ordered_dict_x.values())) != 1: - shape_dict_of_x = {k: ordered_dict_x[k].shape - for k in ordered_dict_x.keys()} - shape_of_y = None if y is None else y.shape raise ValueError('Length of tensors in x and y is mismatched. All ' 'elements in x and y must have the same length.\n' 'Shapes in x: {}\n' - 'Shape for y: {}\n'.format(shape_dict_of_x, shape_of_y)) + 'Shapes in y: {}\n'.format(shape_dict_of_x, shape_of_y)) queue = feeding_functions._enqueue_data( # pylint: disable=protected-access - ordered_dict_x, + ordered_dict_data, queue_capacity, shuffle=shuffle, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) - features = (queue.dequeue_many(batch_size) if num_epochs is None + batch = (queue.dequeue_many(batch_size) if num_epochs is None else queue.dequeue_up_to(batch_size)) - # Remove the first `Tensor` in `features`, which is the row number. - if len(features) > 0: - features.pop(0) + # Remove the first `Tensor` in `batch`, which is the row number. + if len(batch) > 0: + batch.pop(0) - features = dict(zip(ordered_dict_x.keys(), features)) - if y is not None: - target = features.pop(unique_target_key) + features = dict(zip(feature_keys, batch[:len(feature_keys)])) + if target_keys is None: + # TODO(martinwicke), return consistent result + return features + elif isinstance(target_keys, string_types): + target = batch[-1] + return features, target + else: + target = dict(zip(target_keys, batch[-len(target_keys):])) return features, target - return features return input_fn diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py index 02df22b632..65eae7a7dc 100644 --- a/tensorflow/python/estimator/inputs/numpy_io_test.py +++ b/tensorflow/python/estimator/inputs/numpy_io_test.py @@ -239,6 +239,40 @@ class NumpyIoTest(test.TestCase): x, y, batch_size=2, shuffle=False, num_epochs=1) failing_input_fn() + def testNumpyInputFnWithXIsEmptyDict(self): + x = {} + y = np.arange(4) + with self.test_session(): + with self.assertRaisesRegexp(ValueError, 'x cannot be empty'): + failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False) + failing_input_fn() + + def testNumpyInputFnWithYIsNone(self): + a = np.arange(4) * 1.0 + b = np.arange(32, 36) + x = {'a': a, 'b': b} + y = None + + with self.test_session() as session: + input_fn = numpy_io.numpy_input_fn( + x, y, batch_size=2, shuffle=False, num_epochs=1) + features_tensor = input_fn() + + coord = coordinator.Coordinator() + threads = queue_runner_impl.start_queue_runners(session, coord=coord) + + feature = session.run(features_tensor) + self.assertEqual(len(feature), 2) + self.assertAllEqual(feature['a'], [0, 1]) + self.assertAllEqual(feature['b'], [32, 33]) + + session.run([features_tensor]) + with self.assertRaises(errors.OutOfRangeError): + session.run([features_tensor]) + + coord.request_stop() + coord.join(threads) + def testNumpyInputFnWithNonBoolShuffle(self): x = np.arange(32, 36) y = np.arange(4) @@ -285,6 +319,59 @@ class NumpyIoTest(test.TestCase): num_epochs=1) failing_input_fn() + def testNumpyInputFnWithYAsDict(self): + a = np.arange(4) * 1.0 + b = np.arange(32, 36) + x = {'a': a, 'b': b} + y = {'y1': np.arange(-32, -28), 'y2': np.arange(32, 28, -1)} + + with self.test_session() as session: + input_fn = numpy_io.numpy_input_fn( + x, y, batch_size=2, shuffle=False, num_epochs=1) + features_tensor, targets_tensor = input_fn() + + coord = coordinator.Coordinator() + threads = queue_runner_impl.start_queue_runners(session, coord=coord) + + features, targets = session.run([features_tensor, targets_tensor]) + self.assertEqual(len(features), 2) + self.assertAllEqual(features['a'], [0, 1]) + self.assertAllEqual(features['b'], [32, 33]) + self.assertEqual(len(targets), 2) + self.assertAllEqual(targets['y1'], [-32, -31]) + self.assertAllEqual(targets['y2'], [32, 31]) + + session.run([features_tensor, targets_tensor]) + with self.assertRaises(errors.OutOfRangeError): + session.run([features_tensor, targets_tensor]) + + coord.request_stop() + coord.join(threads) + + def testNumpyInputFnWithYIsEmptyDict(self): + a = np.arange(4) * 1.0 + b = np.arange(32, 36) + x = {'a': a, 'b': b} + y = {} + with self.test_session(): + with self.assertRaisesRegexp(ValueError, 'y cannot be empty'): + failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False) + failing_input_fn() + + def testNumpyInputFnWithDuplicateKeysInXAndY(self): + a = np.arange(4) * 1.0 + b = np.arange(32, 36) + x = {'a': a, 'b': b} + y = {'y1': np.arange(-32, -28), + 'a': a, + 'y2': np.arange(32, 28, -1), + 'b': b} + with self.test_session(): + with self.assertRaisesRegexp( + ValueError, '2 duplicate keys are found in both x and y'): + failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False) + failing_input_fn() + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 2785aed13e..dc4ffb1747 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -860,6 +860,10 @@ def convert_to_tensor(value, dtype=None, name=None, preferred_dtype=None): inputs, which allows those ops to accept numpy arrays, Python lists, and scalars in addition to `Tensor` objects. + Note: This function diverges from default Numpy behavior for `float` and + `string` types when `None` is present in a Python list or scalar. Rather + than silently converting `None` values, an error will be thrown. + Args: value: An object whose type has a registered `Tensor` conversion function. dtype: Optional element type for the returned tensor. If missing, the diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index 7e74c19124..e283542172 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -286,6 +286,7 @@ _TF_TO_IS_OK = { dtypes.bool: [_FilterBool], dtypes.complex128: [_FilterComplex], dtypes.complex64: [_FilterComplex], + dtypes.float16: [_FilterFloat], dtypes.float32: [_FilterFloat], dtypes.float64: [_FilterFloat], dtypes.int16: [_FilterInt], diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index cfa5fe5e3e..1610214d54 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -986,8 +986,9 @@ class TensorFlowTestCase(googletest.TestCase): err: A float value. msg: An optional string message to append to the failure message. """ + # f1 == f2 is needed here as we might have: f1, f2 = inf, inf self.assertTrue( - math.fabs(f1 - f2) <= err, + f1 == f2 or math.fabs(f1 - f2) <= err, "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg if msg is not None else "")) diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 6eb9c66d06..76b80e60ea 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -107,22 +107,41 @@ class BooleanMaskTest(test_util.TensorFlowTestCase): def setUp(self): self.rng = np.random.RandomState(42) - def CheckVersusNumpy(self, ndims_mask, arr_shape, make_mask=None): + def CheckVersusNumpy(self, ndims_mask, arr_shape, make_mask=None, axis=None): """Check equivalence between boolean_mask and numpy masking.""" if make_mask is None: make_mask = lambda shape: self.rng.randint(0, 2, size=shape).astype(bool) arr = np.random.rand(*arr_shape) mask = make_mask(arr_shape[:ndims_mask]) - masked_arr = arr[mask] - with self.test_session(): - masked_tensor = array_ops.boolean_mask(arr, mask) + if axis is not None: + mask = make_mask(arr_shape[axis:ndims_mask+axis]) + if axis is None or axis == 0: + masked_arr = arr[mask] + elif axis == 1: + masked_arr = arr[:,mask] + elif axis == 2: + masked_arr = arr[:,:,mask] + with self.test_session() as sess: + masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis) # Leading dimension size of masked_tensor is always unknown until runtime # since we don't how many elements will be kept. - self.assertAllEqual(masked_tensor.get_shape()[1:], masked_arr.shape[1:]) + leading = 1 if axis is None else axis + 1 + self.assertAllEqual(masked_tensor.get_shape()[leading:], + masked_arr.shape[leading:]) self.assertAllClose(masked_arr, masked_tensor.eval()) + def testMaskDim1ArrDim2Axis1(self): + ndims_mask = 1 + for arr_shape in [(1, 1), (2, 2), (2, 5)]: + self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1) + + def testMaskDim2ArrDim2Axis1(self): + ndims_mask = 2 + for arr_shape in [(1, 1), (2, 2), (2, 5)]: + self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1) + def testMaskDim1ArrDim1(self): ndims_mask = 1 for arr_shape in [(1,), (2,), (3,), (10,)]: @@ -486,7 +505,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase): _ = checker2[...] _ = checker2[tuple()] - def testFloatSlicedArrayAndInt64IndicesGPU(self): + def testInt64GPU(self): if not test_util.is_gpu_available(): self.skipTest("No GPU available") with self.test_session(use_gpu=True, force_gpu=True): @@ -497,17 +516,6 @@ class StridedSliceTest(test_util.TensorFlowTestCase): s = array_ops.strided_slice(x, begin, end, strides) self.assertAllEqual([3.], self.evaluate(s)) - def testInt64SlicedArrayAndIndicesGPU(self): - if not test_util.is_gpu_available(): - self.skipTest("No GPU available") - with self.test_session(use_gpu=True, force_gpu=True): - x = constant_op.constant([1, 2, 3], dtype=dtypes.int64) - begin = constant_op.constant([2], dtype=dtypes.int64) - end = constant_op.constant([3], dtype=dtypes.int64) - strides = constant_op.constant([1], dtype=dtypes.int64) - s = array_ops.strided_slice(x, begin, end, strides) - self.assertAllEqual([3], self.evaluate(s)) - def testDegenerateSlices(self): with self.test_session(use_gpu=True): checker = StridedSliceChecker(self, StridedSliceChecker.REF_TENSOR) @@ -1070,6 +1078,16 @@ class PadTest(test_util.TensorFlowTestCase): [0, 0, 4, 5, 6, 0, 0], [0, 0, 0, 0, 0, 0, 0]]) +class InvertPermutationTest(test_util.TensorFlowTestCase): + + def testInvertPermutation(self): + for dtype in [dtypes.int32, dtypes.int64]: + with self.test_session(use_gpu=True): + x = constant_op.constant([3, 4, 0, 2, 1], dtype=dtype) + y = array_ops.invert_permutation(x) + self.assertAllEqual(y.get_shape(), [5]) + self.assertAllEqual(y.eval(), [2, 4, 3, 0, 1]) + if __name__ == "__main__": test_lib.main() diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py index 7a610debd1..79285476b4 100644 --- a/tensorflow/python/kernel_tests/bincount_op_test.py +++ b/tensorflow/python/kernel_tests/bincount_op_test.py @@ -25,11 +25,10 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import math_ops from tensorflow.python.platform import googletest - class BincountTest(test_util.TensorFlowTestCase): def test_empty(self): - with self.test_session(): + with self.test_session(use_gpu=True): self.assertAllEqual( math_ops.bincount([], minlength=5).eval(), [0, 0, 0, 0, 0]) self.assertAllEqual(math_ops.bincount([], minlength=1).eval(), [0]) @@ -42,7 +41,7 @@ class BincountTest(test_util.TensorFlowTestCase): np.float64) def test_values(self): - with self.test_session(): + with self.test_session(use_gpu=True): self.assertAllEqual( math_ops.bincount([1, 1, 1, 2, 2, 3]).eval(), [0, 3, 2, 1]) arr = [1, 1, 2, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5] @@ -57,14 +56,14 @@ class BincountTest(test_util.TensorFlowTestCase): math_ops.bincount(np.arange(10000)).eval(), np.ones(10000)) def test_maxlength(self): - with self.test_session(): + with self.test_session(use_gpu=True): self.assertAllEqual(math_ops.bincount([5], maxlength=3).eval(), [0, 0, 0]) self.assertAllEqual(math_ops.bincount([1], maxlength=3).eval(), [0, 1]) self.assertAllEqual(math_ops.bincount([], maxlength=3).eval(), []) def test_random_with_weights(self): num_samples = 10000 - with self.test_session(): + with self.test_session(use_gpu=True): np.random.seed(42) for dtype in [dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]: arr = np.random.randint(0, 1000, num_samples) @@ -72,17 +71,29 @@ class BincountTest(test_util.TensorFlowTestCase): weights = np.random.randint(-100, 100, num_samples) else: weights = np.random.random(num_samples) - self.assertAllEqual( + self.assertAllClose( math_ops.bincount(arr, weights).eval(), np.bincount(arr, weights)) + def test_random_without_weights(self): + num_samples = 10000 + with self.test_session(use_gpu=True): + np.random.seed(42) + for dtype in [np.int32, np.float32]: + arr = np.random.randint(0, 1000, num_samples) + weights = np.ones(num_samples).astype(dtype) + self.assertAllClose( + math_ops.bincount(arr, None).eval(), + np.bincount(arr, weights)) + def test_zero_weights(self): - with self.test_session(): + with self.test_session(use_gpu=True): self.assertAllEqual( math_ops.bincount(np.arange(1000), np.zeros(1000)).eval(), np.zeros(1000)) def test_negative(self): + # unsorted_segment_sum will only report InvalidArgumentError on CPU with self.test_session(): with self.assertRaises(errors.InvalidArgumentError): math_ops.bincount([1, 2, 3, -1, 6, 8]).eval() diff --git a/tensorflow/python/kernel_tests/bucketize_op_test.py b/tensorflow/python/kernel_tests/bucketize_op_test.py index 6db3592055..e612b1c134 100644 --- a/tensorflow/python/kernel_tests/bucketize_op_test.py +++ b/tensorflow/python/kernel_tests/bucketize_op_test.py @@ -31,7 +31,7 @@ class BucketizationOpTest(test.TestCase): constant_op.constant([-5, 0, 2, 3, 5, 8, 10, 11, 12]), boundaries=[0, 3, 8, 11]) expected_out = [0, 1, 1, 2, 2, 3, 3, 4, 4] - with self.test_session() as sess: + with self.test_session(use_gpu=True) as sess: self.assertAllEqual(expected_out, sess.run(op)) def testFloat(self): @@ -39,7 +39,7 @@ class BucketizationOpTest(test.TestCase): constant_op.constant([-5., 0., 2., 3., 5., 8., 10., 11., 12.]), boundaries=[0., 3., 8., 11.]) expected_out = [0, 1, 1, 2, 2, 3, 3, 4, 4] - with self.test_session() as sess: + with self.test_session(use_gpu=True) as sess: self.assertAllEqual(expected_out, sess.run(op)) def test2DInput(self): @@ -47,13 +47,13 @@ class BucketizationOpTest(test.TestCase): constant_op.constant([[-5, 0, 2, 3, 5], [8, 10, 11, 12, 0]]), boundaries=[0, 3, 8, 11]) expected_out = [[0, 1, 1, 2, 2], [3, 3, 4, 4, 1]] - with self.test_session() as sess: + with self.test_session(use_gpu=True) as sess: self.assertAllEqual(expected_out, sess.run(op)) def testInvalidBoundariesOrder(self): op = math_ops._bucketize( constant_op.constant([-5, 0]), boundaries=[0, 8, 3, 11]) - with self.test_session() as sess: + with self.test_session(use_gpu=True) as sess: with self.assertRaisesRegexp( errors_impl.InvalidArgumentError, "Expected sorted boundaries"): sess.run(op) diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py index 6167cb9999..6cbdd4cbb3 100644 --- a/tensorflow/python/kernel_tests/constant_op_test.py +++ b/tensorflow/python/kernel_tests/constant_op_test.py @@ -439,9 +439,10 @@ class ZerosLikeTest(test.TestCase): def testZerosLikeCPU(self): for dtype in [ - dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int32, - dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.int8, - dtypes_lib.complex64, dtypes_lib.complex128, dtypes_lib.int64, + dtypes_lib.float32, dtypes_lib.float64, + dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, + dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool, + dtypes_lib.complex64, dtypes_lib.complex128, dtypes_lib.string ]: self._compareZeros(dtype, fully_defined_shape=False, use_gpu=False) @@ -573,9 +574,10 @@ class OnesLikeTest(test.TestCase): def testOnesLike(self): for dtype in [ - dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int32, - dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.int8, - dtypes_lib.complex64, dtypes_lib.complex128, dtypes_lib.int64 + dtypes_lib.float32, dtypes_lib.float64, + dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, + dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool, + dtypes_lib.complex64, dtypes_lib.complex128 ]: numpy_dtype = dtype.as_numpy_dtype with self.test_session(): diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py index b67a4e3f89..a7e23ead1c 100644 --- a/tensorflow/python/kernel_tests/conv1d_test.py +++ b/tensorflow/python/kernel_tests/conv1d_test.py @@ -17,6 +17,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin + from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops @@ -50,5 +53,45 @@ class Conv1DTest(test.TestCase): self.assertAllClose(output, [2 * 1 + 1 * 2, 2 * 3 + 1 * 4]) + def testConv1DTranspose(self): + with self.test_session(): + stride = 2 + + # Input, output: [batch, width, depth] + x_shape = [2, 4, 3] + y_shape = [2, 9, 2] + + # Filter: [kernel_width, output_depth, input_depth] + f_shape = [3, 2, 3] + + x = constant_op.constant( + 1.0, shape=x_shape, name="x", dtype=dtypes.float32) + f = constant_op.constant( + 1.0, shape=f_shape, name="filter", dtype=dtypes.float32) + output = nn_ops.conv1d_transpose( + x, f, y_shape, stride=stride, padding="VALID") + value = output.eval() + + cache_values = np.zeros(y_shape, dtype=np.float32) + + # The amount of padding added + pad = 1 + + for n in xrange(x_shape[0]): + for k in xrange(f_shape[1]): + for w in xrange(pad, y_shape[1] - pad): + target = 3.0 + # We add a case for locations divisible by the stride. + w_in = w % stride == 0 and w > pad and w < y_shape[1] - 1 - pad + if w_in: + target += 3.0 + cache_values[n, w, k] = target + + # copy values in the border + cache_values[n, 0, k] = cache_values[n, 1, k] + cache_values[n, -1, k] = cache_values[n, -2, k] + + self.assertAllClose(cache_values, value) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py index 14622ab467..116681fc4c 100644 --- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py @@ -21,6 +21,8 @@ from __future__ import print_function import collections import math +import numpy as np + from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import test_util @@ -45,8 +47,19 @@ def GetTestConfigs(): class Conv3DTest(test.TestCase): + def _DtypesToTest(self, use_gpu): + if use_gpu: + if not test_util.CudaSupportsHalfMatMulAndConv(): + return [dtypes.float32] + else: + # It is important that float32 comes before float16 here, + # as we will be using its gradients as reference for fp16 gradients. + return [dtypes.float32, dtypes.float16] + else: + return [dtypes.float64, dtypes.float32, dtypes.float16] + def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, stride, - padding, data_format, use_gpu): + padding, data_format, dtype, use_gpu): total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: @@ -54,13 +67,14 @@ class Conv3DTest(test.TestCase): for s in filter_in_sizes: total_size_2 *= s - # Initializes the input tensor with array containing incrementing - # numbers from 1. - x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] - x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] + # Initializes the input tensor with array containing numbers from 0 to 1. + # We keep the input tensor values fairly small to avoid overflowing a float16 + # tensor during the conv3d + x1 = [f * 1.0 / total_size_1 for f in range(1, total_size_1 + 1)] + x2 = [f * 1.0 / total_size_2 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu): - t1 = constant_op.constant(x1, shape=tensor_in_sizes) - t2 = constant_op.constant(x2, shape=filter_in_sizes) + t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype) + t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype) if isinstance(stride, collections.Iterable): strides = [1] + list(stride) + [1] @@ -81,27 +95,35 @@ class Conv3DTest(test.TestCase): expected): results = [] for data_format, use_gpu in GetTestConfigs(): - result = self._SetupValuesForDevice( - tensor_in_sizes, - filter_in_sizes, - stride, - padding, - data_format, - use_gpu=use_gpu) - results.append(result) - tolerance = 1e-2 if use_gpu else 1e-5 + for dtype in self._DtypesToTest(use_gpu): + result = self._SetupValuesForDevice( + tensor_in_sizes, + filter_in_sizes, + stride, + padding, + data_format, + dtype, + use_gpu=use_gpu) + results.append(result) + with self.test_session() as sess: values = sess.run(results) for value in values: print("expected = ", expected) print("actual = ", value) - self.assertAllClose(expected, value.flatten(), atol=tolerance, - rtol=1e-6) + tol = 1e-6 + if value.dtype == np.float16: + tol = 1e-3 + + self.assertAllClose(expected, value.flatten(), atol=tol, + rtol=tol) def testConv3D1x1x1Filter(self): expected_output = [ - 30.0, 36.0, 42.0, 66.0, 81.0, 96.0, 102.0, 126.0, 150.0, 138.0, 171.0, - 204.0, 174.0, 216.0, 258.0, 210.0, 261.0, 312.0 + 0.18518519, 0.22222222, 0.25925926, 0.40740741, 0.5 , + 0.59259259, 0.62962963, 0.77777778, 0.92592593, 0.85185185, + 1.05555556, 1.25925926, 1.07407407, 1.33333333, 1.59259259, + 1.2962963 , 1.61111111, 1.92592593 ] # These are equivalent to the Conv2D1x1 case. @@ -127,8 +149,10 @@ class Conv3DTest(test.TestCase): # Expected values computed using scipy's correlate function. def testConv3D2x2x2Filter(self): expected_output = [ - 19554., 19962., 20370., 22110., 22590., 23070., 34890., 35730., 36570., - 37446., 38358., 39270., 50226., 51498., 52770., 52782., 54126., 55470. + 3.77199074, 3.85069444, 3.92939815, 4.2650463 , 4.35763889, + 4.45023148, 6.73032407, 6.89236111, 7.05439815, 7.22337963, + 7.39930556, 7.57523148, 9.68865741, 9.93402778, 10.17939815, + 10.18171296, 10.44097222, 10.70023148 ] # expected_shape = [1, 3, 1, 2, 5] self._VerifyValues( @@ -140,69 +164,19 @@ class Conv3DTest(test.TestCase): def testConv3DStrides(self): expected_output = [ - 102., - 151., - 172., - 193., - 214., - 235., - 142., - 438., - 592., - 613., - 634., - 655., - 676., - 394., - 774., - 1033., - 1054., - 1075., - 1096., - 1117., - 646., - 1894., - 2503., - 2524., - 2545., - 2566., - 2587., - 1486., - 2230., - 2944., - 2965., - 2986., - 3007., - 3028., - 1738., - 2566., - 3385., - 3406., - 3427., - 3448., - 3469., - 1990., - 3686., - 4855., - 4876., - 4897., - 4918., - 4939., - 2830., - 4022., - 5296., - 5317., - 5338., - 5359., - 5380., - 3082., - 4358., - 5737., - 5758., - 5779., - 5800., - 5821., - 3334., + 0.06071429, 0.08988095, 0.10238095, 0.11488095, 0.12738095, + 0.13988095, 0.08452381, 0.26071429, 0.35238095, 0.36488095, + 0.37738095, 0.38988095, 0.40238095, 0.23452381, 0.46071429, + 0.61488095, 0.62738095, 0.63988095, 0.65238095, 0.66488095, + 0.38452381, 1.12738095, 1.48988095, 1.50238095, 1.51488095, + 1.52738095, 1.53988095, 0.88452381, 1.32738095, 1.75238095, + 1.76488095, 1.77738095, 1.78988095, 1.80238095, 1.03452381, + 1.52738095, 2.01488095, 2.02738095, 2.03988095, 2.05238095, + 2.06488095, 1.18452381, 2.19404762, 2.88988095, 2.90238095, + 2.91488095, 2.92738095, 2.93988095, 1.68452381, 2.39404762, + 3.15238095, 3.16488095, 3.17738095, 3.18988095, 3.20238095, + 1.83452381, 2.59404762, 3.41488095, 3.42738095, 3.43988095, + 3.45238095, 3.46488095, 1.98452381 ] self._VerifyValues( tensor_in_sizes=[1, 5, 8, 7, 1], @@ -212,7 +186,10 @@ class Conv3DTest(test.TestCase): expected=expected_output) def testConv3D2x2x2FilterStride2(self): - expected_output = [19554., 19962., 20370., 50226., 51498., 52770.] + expected_output = [ + 3.77199074, 3.85069444, 3.92939815, 9.68865741, 9.93402778, + 10.17939815 + ] self._VerifyValues( tensor_in_sizes=[1, 4, 2, 3, 3], filter_in_sizes=[2, 2, 2, 3, 3], @@ -222,11 +199,14 @@ class Conv3DTest(test.TestCase): def testConv3DStride3(self): expected_output = [ - 36564., 38022., 39480., 37824., 39354., 40884., 39084., 40686., 42288., - 46644., 48678., 50712., 47904., 50010., 52116., 49164., 51342., 53520., - 107124., 112614., 118104., 108384., 113946., 119508., 109644., 115278., - 120912., 117204., 123270., 129336., 118464., 124602., 130740., 119724., - 125934., 132144. + 1.51140873, 1.57167659, 1.63194444, 1.56349206, 1.62673611, + 1.68998016, 1.6155754 , 1.68179563, 1.74801587, 1.9280754 , + 2.01215278, 2.09623016, 1.98015873, 2.0672123 , 2.15426587, + 2.03224206, 2.12227183, 2.21230159, 4.4280754 , 4.65500992, + 4.88194444, 4.48015873, 4.71006944, 4.93998016, 4.53224206, + 4.76512897, 4.99801587, 4.84474206, 5.09548611, 5.34623016, + 4.8968254 , 5.15054563, 5.40426587, 4.94890873, 5.20560516, + 5.46230159 ] self._VerifyValues( tensor_in_sizes=[1, 6, 7, 8, 2], @@ -237,8 +217,9 @@ class Conv3DTest(test.TestCase): def testConv3D2x2x2FilterStride2Same(self): expected_output = [ - 19554., 19962., 20370., 10452., 10710., 10968., 50226., 51498., 52770., - 23844., 24534., 25224. + 3.77199074, 3.85069444, 3.92939815, 2.0162037 , 2.06597222, + 2.11574074, 9.68865741, 9.93402778, 10.17939815, 4.59953704, + 4.73263889, 4.86574074 ] self._VerifyValues( tensor_in_sizes=[1, 4, 2, 3, 3], @@ -248,7 +229,10 @@ class Conv3DTest(test.TestCase): expected=expected_output) def testKernelSmallerThanStride(self): - expected_output = [1., 3., 7., 9., 19., 21., 25., 27.] + expected_output = [ + 0.03703704, 0.11111111, 0.25925926, 0.33333333, 0.7037037 , + 0.77777778, 0.92592593, 1. + ] self._VerifyValues( tensor_in_sizes=[1, 3, 3, 3, 1], filter_in_sizes=[1, 1, 1, 1, 1], @@ -263,9 +247,12 @@ class Conv3DTest(test.TestCase): expected=expected_output) expected_output = [ - 1484., 1592., 770., 2240., 2348., 1106., 1149., 1191., 539., 6776., - 6884., 3122., 7532., 7640., 3458., 3207., 3249., 1421., 3005., 3035., - 1225., 3215., 3245., 1309., 1013., 1022., 343. + 0.54081633, 0.58017493, 0.28061224, 0.81632653, 0.85568513, + 0.40306122, 0.41873178, 0.4340379 , 0.19642857, 2.46938776, + 2.50874636, 1.1377551 , 2.74489796, 2.78425656, 1.26020408, + 1.16873178, 1.1840379 , 0.51785714, 1.09511662, 1.10604956, + 0.44642857, 1.17164723, 1.18258017, 0.47704082, 0.3691691 , + 0.37244898, 0.125 ] self._VerifyValues( tensor_in_sizes=[1, 7, 7, 7, 1], @@ -274,7 +261,10 @@ class Conv3DTest(test.TestCase): padding="SAME", expected=expected_output) - expected_output = [1484., 1592., 2240., 2348., 6776., 6884., 7532., 7640.] + expected_output = [ + 0.540816, 0.580175, 0.816327, 0.855685, 2.469388, 2.508746, + 2.744898, 2.784257 + ] self._VerifyValues( tensor_in_sizes=[1, 7, 7, 7, 1], filter_in_sizes=[2, 2, 2, 1, 1], @@ -288,7 +278,7 @@ class Conv3DTest(test.TestCase): filter_in_sizes=[2, 1, 2, 1, 2], stride=1, padding="VALID", - expected=[50, 60]) + expected=[1.5625, 1.875]) def _ConstructAndTestGradientForConfig( self, batch, input_shape, filter_shape, in_depth, out_depth, stride, @@ -328,50 +318,63 @@ class Conv3DTest(test.TestCase): input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] - if test.is_gpu_available() and use_gpu: - data_type = dtypes.float32 + + for data_type in self._DtypesToTest(use_gpu=use_gpu): # TODO(mjanusz): Modify gradient_checker to also provide max relative # error and synchronize the tolerance levels between the tests for forward # and backward computations. - if test.is_gpu_available(): + if data_type == dtypes.float64: + tolerance = 1e-8 + elif data_type == dtypes.float32: tolerance = 5e-3 - else: - # As of Aug 2016, higher tolerance is needed for some CPU architectures. - # Runs on a single machine can also generate slightly different errors - # because of multithreading. - tolerance = 8e-3 - else: - data_type = dtypes.float64 - tolerance = 1e-8 - with self.test_session(use_gpu=use_gpu): - orig_input_tensor = constant_op.constant( + elif data_type == dtypes.float16: + tolerance = 1e-3 + + + with self.test_session(use_gpu=use_gpu): + orig_input_tensor = constant_op.constant( input_data, shape=input_shape, dtype=data_type, name="input") - filter_tensor = constant_op.constant( + filter_tensor = constant_op.constant( filter_data, shape=filter_shape, dtype=data_type, name="filter") - if data_format == "NCDHW": - input_tensor = test_util.NHWCToNCHW(orig_input_tensor) - strides = test_util.NHWCToNCHW(strides) - else: - input_tensor = orig_input_tensor + if data_format == "NCDHW": + input_tensor = test_util.NHWCToNCHW(orig_input_tensor) + new_strides = test_util.NHWCToNCHW(strides) + else: + input_tensor = orig_input_tensor + new_strides = strides - conv = nn_ops.conv3d( - input_tensor, filter_tensor, strides, padding, + conv = nn_ops.conv3d( + input_tensor, filter_tensor, new_strides, padding, data_format=data_format, name="conv") - if data_format == "NCDHW": - conv = test_util.NCHWToNHWC(conv) + if data_format == "NCDHW": + conv = test_util.NCHWToNHWC(conv) + + + if test_input: + jacob_t, jacob_n = gradient_checker.compute_gradient(orig_input_tensor, + input_shape, + conv, + output_shape) + else: + jacob_t, jacob_n = gradient_checker.compute_gradient(filter_tensor, + filter_shape, + conv, + output_shape) + + + if data_type != dtypes.float16: + reference_jacob_t = jacob_t + err = np.fabs(jacob_t - jacob_n).max() + else: + # Compare fp16 theoretical gradients to fp32 theoretical gradients, + # since fp16 numerical gradients are too imprecise. + err = np.fabs(jacob_t - reference_jacob_t).max() + + print("conv3d gradient error = ", err) + self.assertLess(err, tolerance) - if test_input: - err = gradient_checker.compute_gradient_error(orig_input_tensor, - input_shape, - conv, output_shape) - else: - err = gradient_checker.compute_gradient_error(filter_tensor, - filter_shape, conv, - output_shape) - print("conv3d gradient error = ", err) - self.assertLess(err, tolerance) def ConstructAndTestGradient(self, **kwargs): for data_format, use_gpu in GetTestConfigs(): diff --git a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py index 3298092fbe..f7ae1a0f37 100644 --- a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py +++ b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py @@ -122,7 +122,9 @@ class DepthwiseConv2DTest(test.TestCase): x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu) as sess: - if data_type == dtypes.float32: + if data_type == dtypes.float16: + tolerance = 1e-5 + elif data_type == dtypes.float32: tolerance = 1e-5 else: self.assertEqual(data_type, dtypes.float64) @@ -169,7 +171,7 @@ class DepthwiseConv2DTest(test.TestCase): padding) in enumerate(ConfigsToTest()): print("Testing DepthwiseConv2D,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) - for data_type in [dtypes.float32, dtypes.float64]: + for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]: self._VerifyValues( input_size, filter_size, stride, padding, data_type, use_gpu=True) @@ -181,7 +183,7 @@ class DepthwiseConv2DTest(test.TestCase): padding) in enumerate(ConfigsToTest()): print("Testing DepthwiseConv2DFormat,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) - for data_type in [dtypes.float32, dtypes.float64]: + for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]: self._VerifyValues( input_size, filter_size, @@ -318,7 +320,9 @@ class DepthwiseConv2DTest(test.TestCase): input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] with self.test_session(use_gpu=use_gpu): - if data_type == dtypes.float32: + if data_type == dtypes.float16: + tolerance = 0.002 + elif data_type == dtypes.float32: tolerance = 0.002 else: self.assertEqual(data_type, dtypes.float64) @@ -369,6 +373,8 @@ class DepthwiseConv2DTest(test.TestCase): print("Testing DepthwiseConv2DInputGrad,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) + # Note: float16 test for DepthwiseConv2DInputGrad is not enabled, + # calculations are not very precise. for data_type in [dtypes.float32, dtypes.float64]: self._ConstructAndTestGradient( input_size, @@ -389,6 +395,8 @@ class DepthwiseConv2DTest(test.TestCase): print("Testing DepthwiseConv2DInputGradFormat,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) + # Note: float16 test for DepthwiseConv2DInputGradFormat is not enabled, + # calculations are not very precise. for data_type in [dtypes.float32, dtypes.float64]: self._ConstructAndTestGradient( input_size, @@ -407,6 +415,8 @@ class DepthwiseConv2DTest(test.TestCase): print("Testing DepthwiseConv2DFilterGrad,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) + # Note: float16 test for DepthwiseConv2DFilterGrad is not enabled, + # calculations are not very precise. for data_type in [dtypes.float32, dtypes.float64]: self._ConstructAndTestGradient( input_size, @@ -427,6 +437,8 @@ class DepthwiseConv2DTest(test.TestCase): print("Testing DepthwiseConv2DFilterGradFormat,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) + # Note: float16 test for DepthwiseConv2DFilterGradFormat is not enabled, + # calculations are not very precise. for data_type in [dtypes.float32, dtypes.float64]: self._ConstructAndTestGradient( input_size, diff --git a/tensorflow/python/kernel_tests/distributions/BUILD b/tensorflow/python/kernel_tests/distributions/BUILD index e21446c2ef..e220d05692 100644 --- a/tensorflow/python/kernel_tests/distributions/BUILD +++ b/tensorflow/python/kernel_tests/distributions/BUILD @@ -193,6 +193,7 @@ cuda_py_test( "//tensorflow/python:math_ops", "//tensorflow/python:platform_test", ], + tags = ["manual"], # b/69001419 ) cuda_py_test( diff --git a/tensorflow/python/kernel_tests/distributions/multinomial_test.py b/tensorflow/python/kernel_tests/distributions/multinomial_test.py index ebc89f15c5..e24e8ade73 100644 --- a/tensorflow/python/kernel_tests/distributions/multinomial_test.py +++ b/tensorflow/python/kernel_tests/distributions/multinomial_test.py @@ -250,13 +250,11 @@ class MultinomialTest(test.TestCase): theta = np.array([[1., 2, 3], [2.5, 4, 0.01]], dtype=np.float32) theta /= np.sum(theta, 1)[..., array_ops.newaxis] - # Ideally we'd be able to test broadcasting but, the multinomial sampler - # doesn't support different total counts. - n = np.float32(5) + n = np.array([[10., 9.], [8., 7.], [6., 5.]], dtype=np.float32) with self.test_session() as sess: - # batch_shape=[2], event_shape=[3] + # batch_shape=[3, 2], event_shape=[3] dist = multinomial.Multinomial(n, theta) - x = dist.sample(int(250e3), seed=1) + x = dist.sample(int(1000e3), seed=1) sample_mean = math_ops.reduce_mean(x, 0) x_centered = x - sample_mean[array_ops.newaxis, ...] sample_cov = math_ops.reduce_mean(math_ops.matmul( @@ -291,9 +289,9 @@ class MultinomialTest(test.TestCase): def testSampleUnbiasedNonScalarBatch(self): with self.test_session() as sess: dist = multinomial.Multinomial( - total_count=5., + total_count=[7., 6., 5.], logits=math_ops.log(2. * self._rng.rand(4, 3, 2).astype(np.float32))) - n = int(3e3) + n = int(3e4) x = dist.sample(n, seed=0) sample_mean = math_ops.reduce_mean(x, 0) # Cyclically rotate event dims left. diff --git a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py index 4883095707..2460950aa9 100644 --- a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py +++ b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py @@ -33,8 +33,8 @@ from tensorflow.python.platform import test class DynamicPartitionTest(test.TestCase): def testSimpleOneDimensional(self): - with self.test_session() as sess: - data = constant_op.constant([0, 13, 2, 39, 4, 17]) + with self.test_session(use_gpu=True) as sess: + data = constant_op.constant([0, 13, 2, 39, 4, 17], dtype=dtypes.float32) indices = constant_op.constant([0, 0, 2, 3, 2, 1]) partitions = data_flow_ops.dynamic_partition( data, indices, num_partitions=4) @@ -52,9 +52,10 @@ class DynamicPartitionTest(test.TestCase): self.assertEqual([None], partitions[3].get_shape().as_list()) def testSimpleTwoDimensional(self): - with self.test_session() as sess: + with self.test_session(use_gpu=True) as sess: data = constant_op.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11], - [12, 13, 14], [15, 16, 17]]) + [12, 13, 14], [15, 16, 17]], + dtype=dtypes.float32) indices = constant_op.constant([0, 0, 2, 3, 2, 1]) partitions = data_flow_ops.dynamic_partition( data, indices, num_partitions=4) @@ -71,9 +72,61 @@ class DynamicPartitionTest(test.TestCase): self.assertEqual([None, 3], partitions[2].get_shape().as_list()) self.assertEqual([None, 3], partitions[3].get_shape().as_list()) + def testLargeOneDimensional(self): + num = 100000 + data_list = [x for x in range(num)] + indices_list = [x % 2 for x in range(num)] + part1 = [x for x in range(num) if x % 2 == 0] + part2 = [x for x in range(num) if x % 2 == 1] + with self.test_session(use_gpu=True) as sess: + data = constant_op.constant(data_list, dtype=dtypes.float32) + indices = constant_op.constant(indices_list, dtype=dtypes.int32) + partitions = data_flow_ops.dynamic_partition( + data, indices, num_partitions=2) + partition_vals = sess.run(partitions) + + self.assertAllEqual(part1, partition_vals[0]) + self.assertAllEqual(part2, partition_vals[1]) + + def testLargeTwoDimensional(self): + rows = 100000 + cols = 100 + data_list = [None] * rows + for i in range(rows): + data_list[i] = [i for _ in range(cols)] + num_partitions = 97 + indices_list = [(i ** 2) % num_partitions for i in range(rows)] + parts = [[] for _ in range(num_partitions)] + for i in range(rows): + parts[(i ** 2) % num_partitions].append(data_list[i]) + with self.test_session(use_gpu=True) as sess: + data = constant_op.constant(data_list, dtype=dtypes.float32) + indices = constant_op.constant(indices_list, dtype=dtypes.int32) + partitions = data_flow_ops.dynamic_partition( + data, indices, num_partitions=num_partitions) + partition_vals = sess.run(partitions) + + for i in range(num_partitions): + # reshape because of empty parts + parts_np = np.array(parts[i], dtype=np.float).reshape(-1, cols) + self.assertAllEqual(parts_np, partition_vals[i]) + + def testSimpleComplex(self): + data_list = [1 + 2j, 3 + 4j, 5 + 6j, 7 + 8j] + indices_list = [1, 0, 1, 0] + with self.test_session(use_gpu=True) as sess: + data = constant_op.constant(data_list, dtype=dtypes.complex64) + indices = constant_op.constant(indices_list, dtype=dtypes.int32) + partitions = data_flow_ops.dynamic_partition( + data, indices, num_partitions=2) + partition_vals = sess.run(partitions) + + self.assertAllEqual([3 + 4j, 7 + 8j], partition_vals[0]) + self.assertAllEqual([1 + 2j, 5 + 6j], partition_vals[1]) + def testHigherRank(self): np.random.seed(7) - with self.test_session() as sess: + with self.test_session(use_gpu=True) as sess: for n in 2, 3: for shape in (4,), (4, 5), (4, 5, 2): partitions = np.random.randint(n, size=np.prod(shape)).reshape(shape) @@ -95,6 +148,49 @@ class DynamicPartitionTest(test.TestCase): self.assertEqual(grads[1], None) # Partitions has no gradients self.assertAllEqual(7 * data, sess.run(grads[0])) + def testEmptyParts(self): + data_list = [1, 2, 3, 4] + indices_list = [1, 3, 1, 3] + with self.test_session(use_gpu=True) as sess: + data = constant_op.constant(data_list, dtype=dtypes.float32) + indices = constant_op.constant(indices_list, dtype=dtypes.int32) + partitions = data_flow_ops.dynamic_partition( + data, indices, num_partitions=4) + partition_vals = sess.run(partitions) + + self.assertAllEqual([], partition_vals[0]) + self.assertAllEqual([1, 3], partition_vals[1]) + self.assertAllEqual([], partition_vals[2]) + self.assertAllEqual([2, 4], partition_vals[3]) + + def testEmptyDataTwoDimensional(self): + data_list = [[], []] + indices_list = [0, 1] + with self.test_session(use_gpu=True) as sess: + data = constant_op.constant(data_list, dtype=dtypes.float32) + indices = constant_op.constant(indices_list, dtype=dtypes.int32) + partitions = data_flow_ops.dynamic_partition( + data, indices, num_partitions=3) + partition_vals = sess.run(partitions) + + self.assertAllEqual([[]], partition_vals[0]) + self.assertAllEqual([[]], partition_vals[1]) + self.assertAllEqual(np.array([], dtype=np.float).reshape(0, 0), + partition_vals[2]) + + def testEmptyPartitions(self): + data_list = [] + indices_list = [] + with self.test_session(use_gpu=True) as sess: + data = constant_op.constant(data_list, dtype=dtypes.float32) + indices = constant_op.constant(indices_list, dtype=dtypes.int32) + partitions = data_flow_ops.dynamic_partition( + data, indices, num_partitions=2) + partition_vals = sess.run(partitions) + + self.assertAllEqual([], partition_vals[0]) + self.assertAllEqual([], partition_vals[1]) + def testErrorIndexOutOfRange(self): with self.test_session() as sess: data = constant_op.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11], diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index a126180414..150e2ff7f2 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import numpy as np +import os from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -1341,11 +1342,33 @@ class PoolingTest(test.TestCase): return # Test the GPU implementation that uses cudnn for now. - # It does not propagate the diff in cases of NaNs + saved_nanprop = os.environ.get("TF_ENABLE_MAXPOOL_NANPROP") + # Do not propagate the diff in cases of NaNs + os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "0" expected_input_backprop_cudnn = [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] + + for v2 in [True, False]: + self._testMaxPoolGradDirect( + input_data, + output_backprop, + expected_input_backprop_cudnn, + input_sizes=[1, 4, 4, 1], + output_sizes=[1, 3, 3, 1], + window_rows=2, + window_cols=2, + row_stride=1, + col_stride=1, + padding="VALID", + use_gpu=True, + v2=v2) + + # Propagate the diff in cases of NaNs + os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "1" + expected_input_backprop_cudnn = expected_input_backprop_tf_cpu + for v2 in [True, False]: self._testMaxPoolGradDirect( input_data, @@ -1361,6 +1384,11 @@ class PoolingTest(test.TestCase): use_gpu=True, v2=v2) + if saved_nanprop: + os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = saved_nanprop + else: + del os.environ["TF_ENABLE_MAXPOOL_NANPROP"] + def _testMaxPoolGradDirectWithNans2_2(self): input_data = [float("nan")] * 16 output_backprop = [ @@ -1391,11 +1419,14 @@ class PoolingTest(test.TestCase): return # Test the GPU implementation that uses cudnn for now. - # It does not propagate the diff in cases of NaNs + saved_nanprop = os.environ.get("TF_ENABLE_MAXPOOL_NANPROP") + # Do not propagate the diff in cases of NaNs + os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "0" expected_input_backprop_cudnn = [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] + for v2 in [True, False]: self._testMaxPoolGradDirect( input_data, @@ -1411,6 +1442,31 @@ class PoolingTest(test.TestCase): use_gpu=True, v2=v2) + + # Propagate the diff in cases of NaNs + os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "1" + expected_input_backprop_cudnn = expected_input_backprop_tf_cpu + + for v2 in [True, False]: + self._testMaxPoolGradDirect( + input_data, + output_backprop, + expected_input_backprop_cudnn, + input_sizes=[1, 4, 4, 1], + output_sizes=[1, 3, 3, 1], + window_rows=2, + window_cols=2, + row_stride=1, + col_stride=1, + padding="VALID", + use_gpu=True, + v2=v2) + + if saved_nanprop: + os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = saved_nanprop + else: + del os.environ["TF_ENABLE_MAXPOOL_NANPROP"] + def testMaxPoolGradDirect(self): self._testMaxPoolGradDirect1_1() self._testMaxPoolGradDirect1_2() diff --git a/tensorflow/python/kernel_tests/reader_ops_test.py b/tensorflow/python/kernel_tests/reader_ops_test.py index 5630259b7b..8e54d10f32 100644 --- a/tensorflow/python/kernel_tests/reader_ops_test.py +++ b/tensorflow/python/kernel_tests/reader_ops_test.py @@ -35,6 +35,9 @@ from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import io_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test +from tensorflow.python.training import coordinator +from tensorflow.python.training import input as input_lib +from tensorflow.python.training import queue_runner_impl from tensorflow.python.util import compat prefix_path = "tensorflow/core/lib" @@ -1011,6 +1014,25 @@ class LMDBReaderTest(test.TestCase): "\\(requested 1, current size 0\\)"): k, v = sess.run([key, value]) + def testReadFromSameFile(self): + with self.test_session() as sess: + reader1 = io_ops.LMDBReader(name="test_read_from_same_file1") + reader2 = io_ops.LMDBReader(name="test_read_from_same_file2") + filename_queue = input_lib.string_input_producer([self.db_path], + num_epochs=None) + key1, value1 = reader1.read(filename_queue) + key2, value2 = reader2.read(filename_queue) + + coord = coordinator.Coordinator() + threads = queue_runner_impl.start_queue_runners(sess, coord=coord) + for i in range(3): + for j in range(10): + k1, v1, k2, v2 = sess.run([key1, value1, key2, value2]) + self.assertAllEqual(compat.as_bytes(k1), compat.as_bytes(k2)) + self.assertAllEqual(compat.as_bytes(v1), compat.as_bytes(v2)) + coord.request_stop() + coord.join(threads) + def testReadFromFolder(self): with self.test_session() as sess: reader = io_ops.LMDBReader(name="test_read_from_folder") @@ -1029,6 +1051,25 @@ class LMDBReaderTest(test.TestCase): "\\(requested 1, current size 0\\)"): k, v = sess.run([key, value]) + def testReadFromFileRepeatedly(self): + with self.test_session() as sess: + reader = io_ops.LMDBReader(name="test_read_from_file_repeated") + filename_queue = input_lib.string_input_producer([self.db_path], + num_epochs=None) + key, value = reader.read(filename_queue) + + coord = coordinator.Coordinator() + threads = queue_runner_impl.start_queue_runners(sess, coord=coord) + # Iterate over the lmdb 3 times. + for i in range(3): + # Go over all 10 records each time. + for j in range(10): + k, v = sess.run([key, value]) + self.assertAllEqual(compat.as_bytes(k), compat.as_bytes(str(j))) + self.assertAllEqual( + compat.as_bytes(v), compat.as_bytes(str(chr(ord("a") + j)))) + coord.request_stop() + coord.join(threads) if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index 516a9d000e..3a02f24902 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -323,8 +323,9 @@ class UnsortedSegmentSumTest(SegmentReductionHelper): def testBadIndices(self): # Note: GPU kernel does not return the out-of-range error needed for this # test, so this test is marked as cpu-only. + # Note: With PR #13055 a negative index will be ignored silently. with self.test_session(use_gpu=False): - for bad in [[-1]], [[7]]: + for bad in [[2]], [[7]]: unsorted = math_ops.unsorted_segment_sum([[17]], bad, num_segments=2) with self.assertRaisesOpError( r"segment_ids\[0,0\] = %d is out of range \[0, 2\)" % bad[0][0]): @@ -360,6 +361,32 @@ class UnsortedSegmentSumTest(SegmentReductionHelper): x_init_value=np_x.astype(np.double), delta=1) self.assertAllClose(jacob_t, jacob_n) + def testDropNegatives(self): + # Note: the test is done by replacing segment_ids with 8 to -1 + # for index and replace values generated by numpy with 0. + dtypes = [ + dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int64, + dtypes_lib.int32, dtypes_lib.complex64, dtypes_lib.complex128 + ] + indices_flat = np.array([0, 4, 0, 8, 3, 8, 4, 7, 7, 3]) + num_segments = 12 + for indices in indices_flat, indices_flat.reshape(5, 2): + shape = indices.shape + (2,) + for dtype in dtypes: + with self.test_session(use_gpu=True): + tf_x, np_x = self._input(shape, dtype=dtype) + np_ans = self._segmentReduce( + indices, np_x, np.add, op2=None, num_out_rows=num_segments) + # Replace np_ans[8] with 0 for the value + np_ans[8:] = 0 + # Replace 8 with -1 in indices + np.place(indices, indices==8, [-1]) + s = math_ops.unsorted_segment_sum( + data=tf_x, segment_ids=indices, num_segments=num_segments) + tf_ans = s.eval() + self.assertAllClose(np_ans, tf_ans) + self.assertShapeEqual(np_ans, s) + class SparseSegmentReductionHelper(SegmentReductionHelper): diff --git a/tensorflow/python/kernel_tests/shape_ops_test.py b/tensorflow/python/kernel_tests/shape_ops_test.py index a9fc699b21..7368251ab6 100644 --- a/tensorflow/python/kernel_tests/shape_ops_test.py +++ b/tensorflow/python/kernel_tests/shape_ops_test.py @@ -258,6 +258,16 @@ class ShapeOpsTest(test.TestCase): self.assertAllEqual([True], array_ops.expand_dims(inp, 0).eval()) self.assertAllEqual([True], array_ops.expand_dims(inp, -1).eval()) + def testExpandDimsDimType(self): + for dtype in [dtypes.int32, dtypes.int64]: + x = np.zeros([2]) + np_ans = np.expand_dims(x, axis=0) + with self.test_session(use_gpu=True): + tensor = array_ops.expand_dims(x, constant_op.constant(0, dtype)) + tf_ans = tensor.eval() + self.assertShapeEqual(np_ans, tensor) + self.assertAllEqual(np_ans, tf_ans) + def _compareSqueeze(self, x, squeeze_dims, use_gpu): with self.test_session(use_gpu=use_gpu): if squeeze_dims: diff --git a/tensorflow/python/kernel_tests/slice_op_test.py b/tensorflow/python/kernel_tests/slice_op_test.py index 051a25080b..6cdc7872f9 100644 --- a/tensorflow/python/kernel_tests/slice_op_test.py +++ b/tensorflow/python/kernel_tests/slice_op_test.py @@ -217,6 +217,30 @@ class SliceTest(test.TestCase): self.assertEqual(expected_val.shape, slice_t.get_shape()) self.assertEqual(expected_val.shape, slice2_t.get_shape()) + def testRandomHighRank(self): + # Random dims of rank 8 + input_shape = np.random.randint(0, 20, size=8) + inp = np.random.rand(*input_shape).astype("f") + with self.test_session(use_gpu=True) as sess: + a = constant_op.constant( + [float(x) for x in inp.ravel(order="C")], + shape=input_shape, + dtype=dtypes.float32) + indices = [0 if x == 0 else np.random.randint(x) for x in input_shape] + sizes = [ + np.random.randint(0, input_shape[i] - indices[i] + 1) + for i in range(8) + ] + slice_t = array_ops.slice(a, indices, sizes) + slice_val = sess.run(slice_t) + + expected_val = inp[indices[0]:indices[0] + sizes[0], indices[1]:indices[1] + sizes[ + 1], indices[2]:indices[2] + sizes[2], indices[3]:indices[3] + sizes[3], indices[ + 4]:indices[4] + sizes[4], indices[5]:indices[5] + sizes[5], indices[6]:indices[ + 6] + sizes[6], indices[7]:indices[7] + sizes[7]] + self.assertAllEqual(slice_val, expected_val) + self.assertEqual(expected_val.shape, slice_t.get_shape()) + def testPartialShapeInference(self): z = array_ops.zeros((1, 2, 3)) self.assertAllEqual(z.get_shape().as_list(), [1, 2, 3]) @@ -227,7 +251,6 @@ class SliceTest(test.TestCase): m2 = array_ops.slice(z, [0, 0, 0], [constant_op.constant(1) + 0, 2, -1]) self.assertAllEqual(m2.get_shape().as_list(), [None, 2, None]) - def _testGradientSlice(self, input_shape, slice_begin, slice_size): with self.test_session(use_gpu=True): num_inputs = np.prod(input_shape) diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py index a50f53b3cd..04758ce45a 100644 --- a/tensorflow/python/kernel_tests/unique_op_test.py +++ b/tensorflow/python/kernel_tests/unique_op_test.py @@ -22,6 +22,7 @@ import numpy as np from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_array_ops from tensorflow.python.platform import test @@ -61,6 +62,31 @@ class UniqueTest(test.TestCase): for i in range(len(x)): self.assertEqual(x[i], tf_y[tf_idx[i]].decode('ascii')) + def testInt32Axis(self): + x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]]) + with self.test_session() as sess: + y0, idx0 = gen_array_ops.unique_v2(x, axis=[0]) + tf_y0, tf_idx0 = sess.run([y0, idx0]) + y1, idx1 = gen_array_ops.unique_v2(x, axis=[1]) + tf_y1, tf_idx1 = sess.run([y1, idx1]) + self.assertAllEqual(tf_y0, np.array([[1, 0, 0], [2, 0, 0]])) + self.assertAllEqual(tf_idx0, np.array([0, 0, 1])) + self.assertAllEqual(tf_y1, np.array([[1, 0], [1, 0], [2, 0]])) + self.assertAllEqual(tf_idx1, np.array([0, 1, 1])) + + def testInt32V2(self): + # This test is only temporary, once V2 is used + # by default, the axis will be wrapped to allow `axis=None`. + x = np.random.randint(2, high=10, size=7000) + with self.test_session() as sess: + y, idx = gen_array_ops.unique_v2(x, axis=[]) + tf_y, tf_idx = sess.run([y, idx]) + + self.assertEqual(len(x), len(tf_idx)) + self.assertEqual(len(tf_y), len(np.unique(x))) + for i in range(len(x)): + self.assertEqual(x[i], tf_y[tf_idx[i]]) + class UniqueWithCountsTest(test.TestCase): def testInt32(self): diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 74b85da845..6be2bc3e76 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -221,7 +221,7 @@ class Layer(object): Weight updates (for instance, the updates of the moving mean and variance in a BatchNormalization layer) may be dependent on the inputs passed - when calling a layer. Hence, when reusing a same layer on + when calling a layer. Hence, when reusing the same layer on different inputs `a` and `b`, some entries in `layer.updates` may be dependent on `a` and some on `b`. This method automatically keeps track of dependencies. @@ -295,9 +295,9 @@ class Layer(object): """Add loss tensor(s), potentially dependent on layer inputs. Some losses (for instance, activity regularization losses) may be dependent - on the inputs passed when calling a layer. Hence, when reusing a same layer - on different inputs `a` and `b`, some entries in `layer.losses` may be - dependent on `a` and some on `b`. This method automatically keeps track + on the inputs passed when calling a layer. Hence, when reusing the same + layer on different inputs `a` and `b`, some entries in `layer.losses` may + be dependent on `a` and some on `b`. This method automatically keeps track of dependencies. The `get_losses_for` method allows to retrieve the losses relevant to a diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py index 0c7ce02835..8c327d7e27 100644 --- a/tensorflow/python/layers/convolutional.py +++ b/tensorflow/python/layers/convolutional.py @@ -813,6 +813,7 @@ def conv3d(inputs, bias_constraint=bias_constraint, trainable=trainable, name=name, + dtype=inputs.dtype.base_dtype, _reuse=reuse, _scope=name) return layer.apply(inputs) @@ -1746,6 +1747,7 @@ def conv3d_transpose(inputs, bias_constraint=bias_constraint, trainable=trainable, name=name, + dtype=inputs.dtype.base_dtype, _reuse=reuse, _scope=name) return layer.apply(inputs) diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index 9d9b2b3941..4d5fb97845 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -26,6 +26,7 @@ import numpy as np from tensorflow.python.eager import context from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.layers import base @@ -239,6 +240,12 @@ class BatchNormalization(base.Layer): raise ValueError('Unsupported axis, fused batch norm only supports ' 'axis == [1] or axis == [3]') + # Raise parameters of fp16 batch norm to fp32 + if self.dtype == dtypes.float16: + param_dtype = dtypes.float32 + else: + param_dtype = self.dtype or dtypes.float32 + axis_to_dim = {x: input_shape[x].value for x in self.axis} for x in axis_to_dim: if axis_to_dim[x] is None: @@ -262,6 +269,7 @@ class BatchNormalization(base.Layer): if self.scale: self.gamma = self.add_variable(name='gamma', shape=param_shape, + dtype=param_dtype, initializer=self.gamma_initializer, regularizer=self.gamma_regularizer, constraint=self.gamma_constraint, @@ -269,11 +277,14 @@ class BatchNormalization(base.Layer): else: self.gamma = None if self.fused: - self._gamma_const = array_ops.constant(1.0, shape=param_shape) + self._gamma_const = array_ops.constant(1.0, + dtype=param_dtype, + shape=param_shape) if self.center: self.beta = self.add_variable(name='beta', shape=param_shape, + dtype=param_dtype, initializer=self.beta_initializer, regularizer=self.beta_regularizer, constraint=self.beta_constraint, @@ -281,7 +292,9 @@ class BatchNormalization(base.Layer): else: self.beta = None if self.fused: - self._beta_const = array_ops.constant(0.0, shape=param_shape) + self._beta_const = array_ops.constant(0.0, + dtype=param_dtype, + shape=param_shape) # Disable variable partitioning when creating the moving mean and variance try: @@ -293,12 +306,14 @@ class BatchNormalization(base.Layer): self.moving_mean = self.add_variable( name='moving_mean', shape=param_shape, + dtype=param_dtype, initializer=self.moving_mean_initializer, trainable=False) self.moving_variance = self.add_variable( name='moving_variance', shape=param_shape, + dtype=param_dtype, initializer=self.moving_variance_initializer, trainable=False) @@ -314,6 +329,7 @@ class BatchNormalization(base.Layer): def _renorm_variable(name, shape): var = self.add_variable(name=name, shape=shape, + dtype=param_dtype, initializer=init_ops.zeros_initializer(), trainable=False) return var @@ -356,7 +372,6 @@ class BatchNormalization(base.Layer): def _fused_batch_norm(self, inputs, training): """Returns the output of fused batch norm.""" - # TODO(reedwm): Add support for fp16 inputs. beta = self.beta if self.center else self._beta_const gamma = self.gamma if self.scale else self._gamma_const @@ -752,6 +767,7 @@ def batch_normalization(inputs, virtual_batch_size=virtual_batch_size, adjustment=adjustment, name=name, + dtype=inputs.dtype.base_dtype, _reuse=reuse, _scope=name) return layer.apply(inputs, training=training) diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py index 90ebdc8c86..b2876c58c2 100644 --- a/tensorflow/python/layers/normalization_test.py +++ b/tensorflow/python/layers/normalization_test.py @@ -68,11 +68,12 @@ class BNTest(test.TestCase): use_gpu, is_fused, restore=False, - freeze_mode=False): + freeze_mode=False, + dtype=dtypes.float32): ops.reset_default_graph() graph = ops.get_default_graph() with self.test_session(graph=graph, use_gpu=use_gpu) as sess: - image = array_ops.placeholder(dtype='float32', shape=shape) + image = array_ops.placeholder(dtype=dtype, shape=shape) loss, train_op, saver = self._simple_model(image, is_fused, freeze_mode) if restore: saver.restore(sess, checkpoint_path) @@ -80,7 +81,7 @@ class BNTest(test.TestCase): sess.run(variables.global_variables_initializer()) np.random.seed(0) for _ in range(2): - image_val = np.random.rand(*shape).astype(np.float32) + image_val = np.random.rand(*shape).astype(dtype.as_numpy_dtype) sess.run([loss, train_op], feed_dict={image: image_val}) if restore: all_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) @@ -90,15 +91,74 @@ class BNTest(test.TestCase): saver.save(sess, checkpoint_path) def _infer(self, checkpoint_path, image_val, shape, use_gpu, is_fused): + dtype = image_val.dtype ops.reset_default_graph() graph = ops.get_default_graph() with self.test_session(graph=graph, use_gpu=use_gpu) as sess: - image = array_ops.placeholder(dtype='float32', shape=shape) + image = array_ops.placeholder(dtype=dtype, shape=shape) loss, _, saver = self._simple_model(image, is_fused, True) saver.restore(sess, checkpoint_path) loss_val = sess.run(loss, feed_dict={image: image_val}) return loss_val + def _trainEvalSequence(self, + dtype, + train1_use_gpu, + train2_use_gpu, + infer_use_gpu): + batch, height, width, input_channels = 2, 4, 5, 3 + shape = [batch, height, width, input_channels] + checkpoint = os.path.join(self.get_temp_dir(), 'cp_%s_%s_%s_%s' % + (dtype, train1_use_gpu, train2_use_gpu, infer_use_gpu)) + + self._train( + checkpoint, + shape, + use_gpu=train1_use_gpu, + is_fused=True, + restore=False, + freeze_mode=False, + dtype=dtype) + + train_vars = self._train( + checkpoint, + shape, + use_gpu=train2_use_gpu, + is_fused=True, + restore=True, + freeze_mode=False, + dtype=dtype) + + np.random.seed(0) + image_val = np.random.rand(batch, + height, + width, + input_channels).astype(dtype.as_numpy_dtype) + loss_val = self._infer(checkpoint, image_val, shape, + use_gpu=infer_use_gpu, is_fused=True) + + return train_vars, loss_val + + def testHalfPrecision(self): + ref_vars, ref_loss = self._trainEvalSequence(dtype=dtypes.float32, + train1_use_gpu=True, + train2_use_gpu=True, + infer_use_gpu=True) + + self.assertEqual(len(ref_vars), 5) + + for train1_use_gpu in [True, False]: + for train2_use_gpu in [True, False]: + for infer_use_gpu in [True, False]: + test_vars, test_loss = self._trainEvalSequence(dtypes.float16, + train1_use_gpu, + train2_use_gpu, + infer_use_gpu) + self.assertEqual(len(test_vars), 5) + for test_var, ref_var in zip(test_vars, ref_vars): + self.assertAllClose(test_var, ref_var, rtol=1.e-3, atol=1.e-3) + self.assertAllClose(test_loss, ref_loss, rtol=1.e-3, atol=1.e-3) + def _testCheckpoint(self, is_fused_checkpoint_a, is_fused_checkpoint_b, use_gpu_checkpoint_a, use_gpu_checkpoint_b, use_gpu_test_a, use_gpu_test_b, freeze_mode): @@ -218,6 +278,36 @@ class BNTest(test.TestCase): ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES), bn.trainable_variables) + def testCreateFusedBNFloat16(self): + # Call layer. + bn = normalization_layers.BatchNormalization(axis=1, fused=True) + inputs = random_ops.random_uniform((5, 4, 3, 3), + seed=1, + dtype=dtypes.float16) + training = array_ops.placeholder(dtype='bool') + outputs = bn.apply(inputs, training=training) + + # Verify shape. + self.assertListEqual(outputs.get_shape().as_list(), [5, 4, 3, 3]) + + # Verify layer attributes. + self.assertEqual(len(bn.updates), 2) + self.assertEqual(len(bn.variables), 4) + self.assertEqual(len(bn.trainable_variables), 2) + self.assertEqual(len(bn.non_trainable_variables), 2) + for var in bn.variables: + self.assertEqual(var.dtype, dtypes.float32_ref) + + # Test that updates were created and added to UPDATE_OPS. + self.assertEqual(len(bn.updates), 2) + self.assertListEqual( + ops.get_collection(ops.GraphKeys.UPDATE_OPS), bn.updates) + + # Test that weights were created and added to TRAINABLE_VARIABLES. + self.assertListEqual( + ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES), + bn.trainable_variables) + def test3DInputAxis1(self): epsilon = 1e-3 bn = normalization_layers.BatchNormalization( diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index c3c7ecd080..43238757c7 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1132,7 +1132,7 @@ def concat(values, axis, name="concat"): return gen_array_ops._concat_v2(values=values, axis=axis, name=name) -def boolean_mask(tensor, mask, name="boolean_mask"): +def boolean_mask(tensor, mask, name="boolean_mask", axis=None): """Apply boolean mask to tensor. Numpy equivalent is `tensor[mask]`. ```python @@ -1146,11 +1146,17 @@ def boolean_mask(tensor, mask, name="boolean_mask"): the first K dimensions of `tensor`'s shape. We then have: `boolean_mask(tensor, mask)[i, j1,...,jd] = tensor[i1,...,iK,j1,...,jd]` where `(i1,...,iK)` is the ith `True` entry of `mask` (row-major order). + The `axis` could be used with `mask` to indicate the axis to mask from. + In that case, `axis + dim(mask) <= dim(tensor)` and `mask`'s shape must match + the first `axis + dim(mask)` dimensions of `tensor`'s shape. Args: tensor: N-D tensor. mask: K-D boolean tensor, K <= N and K must be known statically. name: A name for this operation (optional). + axis: A 0-D int Tensor representing the axis in `tensor` to mask from. + By default, axis is 0 which will mask from the first dimension. Otherwise + K + axis <= N. Returns: (N-K+1)-dimensional tensor populated by entries in `tensor` corresponding @@ -1169,10 +1175,10 @@ def boolean_mask(tensor, mask, name="boolean_mask"): ``` """ - def _apply_mask_1d(reshaped_tensor, mask): + def _apply_mask_1d(reshaped_tensor, mask, axis=None): """Mask tensor along dimension 0 with a 1-D mask.""" indices = squeeze(where(mask), squeeze_dims=[1]) - return gather(reshaped_tensor, indices) + return gather(reshaped_tensor, indices, axis=axis) with ops.name_scope(name, values=[tensor, mask]): tensor = ops.convert_to_tensor(tensor, name="tensor") @@ -1187,19 +1193,22 @@ def boolean_mask(tensor, mask, name="boolean_mask"): raise ValueError( "Number of mask dimensions must be specified, even if some dimensions" " are None. E.g. shape=[None] is ok, but shape=None is not.") - shape_tensor[:ndims_mask].assert_is_compatible_with(shape_mask) + axis = 0 if axis is None else axis + shape_tensor[axis:axis+ndims_mask].assert_is_compatible_with(shape_mask) - leading_size = gen_math_ops._prod(shape(tensor)[:ndims_mask], [0]) + leading_size = gen_math_ops._prod(shape(tensor)[axis:axis+ndims_mask], [0]) tensor = reshape(tensor, - concat([[leading_size], - shape(tensor)[ndims_mask:]], 0)) - first_dim = shape_tensor[:ndims_mask].num_elements() + concat([shape(tensor)[:axis], + [leading_size], + shape(tensor)[axis+ndims_mask:]], 0)) + first_dim = shape_tensor[axis:axis+ndims_mask].num_elements() tensor.set_shape( - tensor_shape.as_shape([first_dim]) - .concatenate(shape_tensor[ndims_mask:])) + tensor_shape.as_shape(shape_tensor[:axis]) + .concatenate([first_dim]) + .concatenate(shape_tensor[axis+ndims_mask:])) mask = reshape(mask, [-1]) - return _apply_mask_1d(tensor, mask) + return _apply_mask_1d(tensor, mask, axis) def sparse_mask(a, mask_indices, name=None): @@ -1521,7 +1530,8 @@ def zeros_like(tensor, dtype=None, name=None, optimize=True): Args: tensor: A `Tensor`. dtype: A type for the returned `Tensor`. Must be `float32`, `float64`, - `int8`, `int16`, `int32`, `int64`, `uint8`, `complex64`, or `complex128`. + `int8`, `uint8`, `int16`, `uint16`, int32`, `int64`, + `complex64`, `complex128` or `bool`. name: A name for the operation (optional). optimize: if true, attempt to statically determine the shape of 'tensor' and encode it as a constant. @@ -1572,8 +1582,8 @@ def ones_like(tensor, dtype=None, name=None, optimize=True): Args: tensor: A `Tensor`. dtype: A type for the returned `Tensor`. Must be `float32`, `float64`, - `int8`, `int16`, `int32`, `int64`, `uint8`, `complex64`, `complex128` or - `bool`. + `int8`, `uint8`, `int16`, `uint16`, int32`, `int64`, + `complex64`, `complex128` or `bool`. name: A name for the operation (optional). optimize: if true, attempt to statically determine the shape of 'tensor' and encode it as a constant. diff --git a/tensorflow/python/ops/distributions/dirichlet.py b/tensorflow/python/ops/distributions/dirichlet.py index 923696a553..2accedf1b9 100644 --- a/tensorflow/python/ops/distributions/dirichlet.py +++ b/tensorflow/python/ops/distributions/dirichlet.py @@ -196,7 +196,7 @@ class Dirichlet(distribution.Distribution): alpha=self.concentration, dtype=self.dtype, seed=seed) - return gamma_sample / math_ops.reduce_sum(gamma_sample, -1, keep_dims=True) + return gamma_sample / math_ops.reduce_sum(gamma_sample, -1, keepdims=True) @distribution_util.AppendDocstring(_dirichlet_sample_note) def _log_prob(self, x): diff --git a/tensorflow/python/ops/distributions/multinomial.py b/tensorflow/python/ops/distributions/multinomial.py index 00b5697c83..d49fac59ca 100644 --- a/tensorflow/python/ops/distributions/multinomial.py +++ b/tensorflow/python/ops/distributions/multinomial.py @@ -26,6 +26,7 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops +from tensorflow.python.ops import functional_ops from tensorflow.python.ops.distributions import distribution from tensorflow.python.ops.distributions import util as distribution_util @@ -140,6 +141,8 @@ class Multinomial(distribution.Distribution): counts = [[2., 1, 1], [3, 1, 1]] dist.prob(counts) # Shape [2] + + dist.sample(5) # Shape [5, 2, 3] ``` """ @@ -231,29 +234,35 @@ class Multinomial(distribution.Distribution): def _sample_n(self, n, seed=None): n_draws = math_ops.cast(self.total_count, dtype=dtypes.int32) - if self.total_count.get_shape().ndims is not None: - if self.total_count.get_shape().ndims != 0: - raise NotImplementedError( - "Sample only supported for scalar number of draws.") - elif self.validate_args: - is_scalar = check_ops.assert_rank( - n_draws, 0, - message="Sample only supported for scalar number of draws.") - n_draws = control_flow_ops.with_dependencies([is_scalar], n_draws) k = self.event_shape_tensor()[0] - # Flatten batch dims so logits has shape [B, k], - # where B = reduce_prod(self.batch_shape_tensor()). - x = random_ops.multinomial( - logits=array_ops.reshape(self.logits, [-1, k]), - num_samples=n * n_draws, - seed=seed) - x = array_ops.reshape(x, shape=[-1, n, n_draws]) - x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), - axis=-2) # shape: [B, n, k] + + # boardcast the total_count and logits to same shape + n_draws = array_ops.ones_like( + self.logits[..., 0], dtype=n_draws.dtype) * n_draws + logits = array_ops.ones_like( + n_draws[..., array_ops.newaxis], dtype=self.logits.dtype) * self.logits + + # flatten the total_count and logits + flat_logits = array_ops.reshape(logits, [-1, k]) # [B1B2...Bm, k] + flat_ndraws = n * array_ops.reshape(n_draws, [-1]) # [B1B2...Bm] + + # computes each total_count and logits situation by map_fn + def _sample_single(args): + logits, n_draw = args[0], args[1] # [K], [] + x = random_ops.multinomial(logits[array_ops.newaxis, ...], + n_draw, seed) # [1, n*n_draw] + x = array_ops.reshape(x, shape=[n, -1]) # [n, n_draw] + x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), axis=-2) # [n, k] + return x + x = functional_ops.map_fn(_sample_single, + [flat_logits, flat_ndraws], + dtype=self.dtype) # [B1B2...Bm, n, k] + + # reshape the results to proper shape x = array_ops.transpose(x, perm=[1, 0, 2]) final_shape = array_ops.concat([[n], self.batch_shape_tensor(), [k]], 0) - x = array_ops.reshape(x, final_shape) - return math_ops.cast(x, self.dtype) + x = array_ops.reshape(x, final_shape) # [n, B1, B2,..., Bm, k] + return x @distribution_util.AppendDocstring(_multinomial_sample_note) def _log_prob(self, counts): diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 2946dbe81e..7c23321ca5 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1121,7 +1121,7 @@ def rgb_to_grayscale(images, name=None): rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0) gray_float = math_ops.reduce_sum(flt_image * rgb_weights, rank_1, - keep_dims=True) + keepdims=True) gray_float.set_shape(images.get_shape()[:-1].concatenate([1])) return convert_image_dtype(gray_float, orig_dtype, name=name) @@ -1212,26 +1212,7 @@ def adjust_hue(image, delta, name=None): orig_dtype = image.dtype flt_image = convert_image_dtype(image, dtypes.float32) - # TODO(zhengxq): we will switch to the fused version after we add a GPU - # kernel for that. - fused = os.environ.get('TF_ADJUST_HUE_FUSED', '') - fused = fused.lower() in ('true', 't', '1') - - if not fused: - hsv = gen_image_ops.rgb_to_hsv(flt_image) - - hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1]) - saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1]) - value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1]) - - # Note that we add 2*pi to guarantee that the resulting hue is a positive - # floating point number since delta is [-0.5, 0.5]. - hue = math_ops.mod(hue + (delta + 1.), 1.) - - hsv_altered = array_ops.concat([hue, saturation, value], 2) - rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered) - else: - rgb_altered = gen_image_ops.adjust_hue(flt_image, delta) + rgb_altered = gen_image_ops.adjust_hue(flt_image, delta) return convert_image_dtype(rgb_altered, orig_dtype) diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index 2cb467c891..14a039ffd0 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -30,6 +30,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops.gen_linalg_ops import * # pylint: enable=wildcard-import from tensorflow.python.util import compat +from tensorflow.python.util.deprecation import deprecated_args # Names below are lower_case. # pylint: disable=invalid-name @@ -438,7 +439,10 @@ def svd(tensor, full_matrices=False, compute_uv=True, name=None): # pylint: disable=redefined-builtin -def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None): +@deprecated_args(None, "keep_dims is deprecated, use keepdims instead", + "keep_dims") +def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None, + keep_dims=None): r"""Computes the norm of vectors, matrices, and tensors. This function can compute several different vector norms (the 1-norm, the @@ -471,13 +475,13 @@ def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None): can be either a matrix or a batch of matrices at runtime, pass `axis=[-2,-1]` instead of `axis=None` to make sure that matrix norms are computed. - keep_dims: If True, the axis indicated in `axis` are kept with size 1. + keepdims: If True, the axis indicated in `axis` are kept with size 1. Otherwise, the dimensions in `axis` are removed from the output shape. name: The name of the op. Returns: output: A `Tensor` of the same type as tensor, containing the vector or - matrix norms. If `keep_dims` is True then the rank of output is equal to + matrix norms. If `keepdims` is True then the rank of output is equal to the rank of `tensor`. Otherwise, if `axis` is none the output is a scalar, if `axis` is an integer, the rank of `output` is one less than the rank of `tensor`, if `axis` is a 2-tuple the rank of `output` is two less @@ -497,6 +501,13 @@ def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None): @end_compatibility """ + if keep_dims is not None: + if keepdims is not None: + raise ValueError("Cannot specify both 'keep_dims' and 'keepdims'") + keepdims = keep_dims + if keepdims is None: + keepdims = False + is_matrix_norm = ((isinstance(axis, tuple) or isinstance(axis, list)) and len(axis) == 2) if is_matrix_norm: @@ -528,25 +539,25 @@ def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None): # matrices. result = math_ops.sqrt( math_ops.reduce_sum( - tensor * math_ops.conj(tensor), axis, keep_dims=True)) + tensor * math_ops.conj(tensor), axis, keepdims=True)) else: result = math_ops.abs(tensor) if ord == 1: sum_axis = None if axis is None else axis[0] - result = math_ops.reduce_sum(result, sum_axis, keep_dims=True) + result = math_ops.reduce_sum(result, sum_axis, keepdims=True) if is_matrix_norm: - result = math_ops.reduce_max(result, axis[-1], keep_dims=True) + result = math_ops.reduce_max(result, axis[-1], keepdims=True) elif ord == np.inf: if is_matrix_norm: - result = math_ops.reduce_sum(result, axis[1], keep_dims=True) + result = math_ops.reduce_sum(result, axis[1], keepdims=True) max_axis = None if axis is None else axis[0] - result = math_ops.reduce_max(result, max_axis, keep_dims=True) + result = math_ops.reduce_max(result, max_axis, keepdims=True) else: # General p-norms (positive p only) result = math_ops.pow( math_ops.reduce_sum( - math_ops.pow(result, ord), axis, keep_dims=True), 1.0 / ord) - if not keep_dims: + math_ops.pow(result, ord), axis, keepdims=True), 1.0 / ord) + if not keepdims: result = array_ops.squeeze(result, axis) return result diff --git a/tensorflow/python/ops/math_grad_test.py b/tensorflow/python/ops/math_grad_test.py index 5732c756ce..04eeb00518 100644 --- a/tensorflow/python/ops/math_grad_test.py +++ b/tensorflow/python/ops/math_grad_test.py @@ -113,6 +113,23 @@ class MinOrMaxGradientTest(test.TestCase): self.assertLess(error, 1e-4) +class MaximumOrMinimumGradientTest(test.TestCase): + + def testMaximumGradient(self): + inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32) + outputs = math_ops.maximum(inputs, 3.0) + with self.test_session(): + error = gradient_checker.compute_gradient_error(inputs, [4], outputs, [4]) + self.assertLess(error, 1e-4) + + def testMinimumGradient(self): + inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32) + outputs = math_ops.minimum(inputs, 2.0) + with self.test_session(): + error = gradient_checker.compute_gradient_error(inputs, [4], outputs, [4]) + self.assertLess(error, 1e-4) + + class ProdGradientTest(test.TestCase): def testProdGradient(self): diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 4c400423b6..e2e23dccef 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -170,14 +170,13 @@ from tensorflow.python.ops import state_ops from tensorflow.python.ops.gen_math_ops import * # pylint: enable=wildcard-import from tensorflow.python.util import compat -from tensorflow.python.util.deprecation import deprecated -from tensorflow.python.util.deprecation import deprecated_args +from tensorflow.python.util import deprecation # Aliases for some automatically-generated names. linspace = gen_math_ops.lin_space -arg_max = deprecated(None, "Use `argmax` instead")(arg_max) # pylint: disable=used-before-assignment -arg_min = deprecated(None, "Use `argmin` instead")(arg_min) # pylint: disable=used-before-assignment +arg_max = deprecation.deprecated(None, "Use `argmax` instead")(arg_max) # pylint: disable=used-before-assignment +arg_min = deprecation.deprecated(None, "Use `argmin` instead")(arg_min) # pylint: disable=used-before-assignment def _set_doc(doc): @@ -190,7 +189,8 @@ def _set_doc(doc): # pylint: disable=redefined-builtin -@deprecated_args(None, "Use the `axis` argument instead", "dimension") +@deprecation.deprecated_args(None, "Use the `axis` argument instead", + "dimension") @_set_doc( gen_math_ops.arg_max.__doc__.replace("dimensions", "axes").replace( "dimension", "axis")) @@ -208,7 +208,8 @@ def argmax(input, return gen_math_ops.arg_max(input, axis, name=name, output_type=output_type) -@deprecated_args(None, "Use the `axis` argument instead", "dimension") +@deprecation.deprecated_args(None, "Use the `axis` argument instead", + "dimension") @_set_doc( gen_math_ops.arg_min.__doc__.replace("dimensions", "axes").replace( "dimension", "axis")) @@ -324,7 +325,7 @@ multiply.__doc__ = gen_math_ops._mul.__doc__.replace("Mul", "`tf.multiply`") # TODO(aselle): put deprecation in after another round of global code changes -@deprecated( +@deprecation.deprecated( "2016-12-30", "`tf.mul(x, y)` is deprecated, please use `tf.multiply(x, y)` or `x * y`") def _mul(x, y, name=None): @@ -343,7 +344,7 @@ subtract.__doc__ = gen_math_ops._sub.__doc__.replace("`Sub`", "`tf.subtract`") # TODO(aselle): put deprecation in after another round of global code changes -@deprecated( +@deprecation.deprecated( "2016-12-30", "`tf.sub(x, y)` is deprecated, please use `tf.subtract(x, y)` or `x - y`") def _sub(x, y, name=None): @@ -381,8 +382,9 @@ def negative(x, name=None): # pylint: disable=g-docstring-has-escape -@deprecated("2016-12-30", - "`tf.neg(x)` is deprecated, please use `tf.negative(x)` or `-x`") +@deprecation.deprecated( + "2016-12-30", + "`tf.neg(x)` is deprecated, please use `tf.negative(x)` or `-x`") def _neg(x, name=None): """Computes numerical negative value element-wise. @@ -1269,24 +1271,27 @@ def _ReductionDims(x, axis, reduction_indices): return range(0, array_ops.rank(x)) -def _may_reduce_to_scalar(keep_dims, axis, reduction_indices, output): +def _may_reduce_to_scalar(keepdims, axis, reduction_indices, output): """Set a reduction's output's shape to be a scalar if we are certain.""" - if (not output.shape.is_fully_defined()) and (not keep_dims) and ( + if (not output.shape.is_fully_defined()) and (not keepdims) and ( axis is None) and (reduction_indices is None): output.set_shape(()) return output +@deprecation.deprecated_args( + None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_sum(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the sum of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1299,7 +1304,7 @@ def reduce_sum(input_tensor, tf.reduce_sum(x) # 6 tf.reduce_sum(x, 0) # [2, 2, 2] tf.reduce_sum(x, 1) # [3, 3] - tf.reduce_sum(x, 1, keep_dims=True) # [[3], [3]] + tf.reduce_sum(x, 1, keepdims=True) # [[3], [3]] tf.reduce_sum(x, [0, 1]) # 6 ``` @@ -1308,9 +1313,10 @@ def reduce_sum(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. + keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @@ -1319,26 +1325,34 @@ def reduce_sum(input_tensor, Equivalent to np.sum @end_compatibility """ - return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, + keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, + "keep_dims", keep_dims) + if keepdims is None: + keepdims = False + + return _may_reduce_to_scalar(keepdims, axis, reduction_indices, gen_math_ops._sum( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name)) +@deprecation.deprecated_args( + None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def count_nonzero(input_tensor, axis=None, - keep_dims=False, + keepdims=None, dtype=dtypes.int64, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes number of nonzero elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1355,7 +1369,7 @@ def count_nonzero(input_tensor, tf.count_nonzero(x) # 3 tf.count_nonzero(x, 0) # [1, 2, 0] tf.count_nonzero(x, 1) # [1, 2] - tf.count_nonzero(x, 1, keep_dims=True) # [[1], [2]] + tf.count_nonzero(x, 1, keepdims=True) # [[1], [2]] tf.count_nonzero(x, [0, 1]) # 3 ``` @@ -1364,14 +1378,20 @@ def count_nonzero(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. dtype: The output dtype; defaults to `tf.int64`. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. + keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor (number of nonzero values). """ + keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, + "keep_dims", keep_dims) + if keepdims is None: + keepdims = False + with ops.name_scope(name, "count_nonzero", [input_tensor]): input_tensor = ops.convert_to_tensor(input_tensor, name="input_tensor") zero = input_tensor.dtype.as_numpy_dtype() @@ -1380,21 +1400,24 @@ def count_nonzero(input_tensor, # int64 reduction happens on GPU to_int64(gen_math_ops.not_equal(input_tensor, zero)), axis=axis, - keep_dims=keep_dims, + keepdims=keepdims, reduction_indices=reduction_indices), dtype=dtype) +@deprecation.deprecated_args( + None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_mean(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the mean of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1414,36 +1437,58 @@ def reduce_mean(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. + keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @compatibility(numpy) Equivalent to np.mean + + Please note that `np.mean` has a `dtype` parameter that could be used to + specify the output type. By default this is `dtype=float64`. On the other + hand, `tf.reduce_mean` has an aggressive type inference from `input_tensor`, + for example: + + ```python + x = tf.constant([1, 0, 1, 0]) + tf.reduce_mean(x) # 0 + y = tf.constant([1., 0., 1., 0.]) + tf.reduce_mean(y) # 0.5 + ``` + @end_compatibility """ - return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, + keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, + "keep_dims", keep_dims) + + if keepdims is None: + keepdims = False + return _may_reduce_to_scalar(keepdims, axis, reduction_indices, gen_math_ops._mean( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name)) +@deprecation.deprecated_args( + None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_prod(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the product of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1454,9 +1499,10 @@ def reduce_prod(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. + keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @@ -1465,25 +1511,33 @@ def reduce_prod(input_tensor, Equivalent to np.prod @end_compatibility """ - return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, + keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, + "keep_dims", keep_dims) + + if keepdims is None: + keepdims = False + return _may_reduce_to_scalar(keepdims, axis, reduction_indices, gen_math_ops._prod( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name)) +@deprecation.deprecated_args( + None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_min(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the minimum of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1494,9 +1548,10 @@ def reduce_min(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. + keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @@ -1505,25 +1560,32 @@ def reduce_min(input_tensor, Equivalent to np.min @end_compatibility """ - return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, + keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, + "keep_dims", keep_dims) + if keepdims is None: + keepdims = False + return _may_reduce_to_scalar(keepdims, axis, reduction_indices, gen_math_ops._min( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name)) +@deprecation.deprecated_args( + None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_max(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the maximum of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1534,9 +1596,10 @@ def reduce_max(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. + keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @@ -1545,25 +1608,32 @@ def reduce_max(input_tensor, Equivalent to np.max @end_compatibility """ - return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, + keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, + "keep_dims", keep_dims) + if keepdims is None: + keepdims = False + return _may_reduce_to_scalar(keepdims, axis, reduction_indices, gen_math_ops._max( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name)) +@deprecation.deprecated_args( + None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_all(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the "logical and" of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1583,9 +1653,10 @@ def reduce_all(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. + keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @@ -1594,25 +1665,32 @@ def reduce_all(input_tensor, Equivalent to np.all @end_compatibility """ - return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, + keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, + "keep_dims", keep_dims) + if keepdims is None: + keepdims = False + return _may_reduce_to_scalar(keepdims, axis, reduction_indices, gen_math_ops._all( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name)) +@deprecation.deprecated_args( + None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_any(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the "logical or" of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1632,9 +1710,10 @@ def reduce_any(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. + keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @@ -1643,25 +1722,32 @@ def reduce_any(input_tensor, Equivalent to np.any @end_compatibility """ - return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, + keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, + "keep_dims", keep_dims) + if keepdims is None: + keepdims = False + return _may_reduce_to_scalar(keepdims, axis, reduction_indices, gen_math_ops._any( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name)) +@deprecation.deprecated_args( + None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_logsumexp(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes log(sum(exp(elements across dimensions of a tensor))). Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1678,7 +1764,7 @@ def reduce_logsumexp(input_tensor, tf.reduce_logsumexp(x) # log(6) tf.reduce_logsumexp(x, 0) # [log(2), log(2), log(2)] tf.reduce_logsumexp(x, 1) # [log(3), log(3)] - tf.reduce_logsumexp(x, 1, keep_dims=True) # [[log(3)], [log(3)]] + tf.reduce_logsumexp(x, 1, keepdims=True) # [[log(3)], [log(3)]] tf.reduce_logsumexp(x, [0, 1]) # log(6) ``` @@ -1687,19 +1773,24 @@ def reduce_logsumexp(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. + keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. """ + keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, + "keep_dims", keep_dims) + if keepdims is None: + keepdims = False with ops.name_scope(name, "ReduceLogSumExp", [input_tensor]) as name: raw_max = reduce_max( input_tensor, axis=axis, reduction_indices=reduction_indices, - keep_dims=True) + keepdims=True) my_max = array_ops.stop_gradient( array_ops.where( gen_math_ops.is_finite(raw_max), raw_max, @@ -1708,13 +1799,13 @@ def reduce_logsumexp(input_tensor, reduce_sum( gen_math_ops.exp(input_tensor - my_max), axis, - keep_dims=True, + keepdims=True, reduction_indices=reduction_indices)) + my_max - if not keep_dims: + if not keepdims: if isinstance(axis, int): axis = [axis] result = array_ops.squeeze(result, axis) - return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, result) + return _may_reduce_to_scalar(keepdims, axis, reduction_indices, result) def trace(x, name=None): @@ -2216,9 +2307,10 @@ def bincount(arr, maxlength = ops.convert_to_tensor( maxlength, name="maxlength", dtype=dtypes.int32) output_size = gen_math_ops.minimum(maxlength, output_size) - weights = ( - ops.convert_to_tensor(weights, name="weights") - if weights is not None else constant_op.constant([], dtype)) + if weights is not None: + weights = ops.convert_to_tensor(weights, name="weights") + return gen_math_ops.unsorted_segment_sum(weights, arr, output_size) + weights = constant_op.constant([], dtype) return gen_math_ops.bincount(arr, output_size, weights) @@ -2381,7 +2473,7 @@ def reduced_shape(input_shape, axes): input_shape: 1-D Tensor, the shape of the Tensor being reduced. axes: 1-D Tensor, the reduction axes. Returns: - A 1-D Tensor, the output shape as if keep_dims were set to True. + A 1-D Tensor, the output shape as if keepdims were set to True. """ # Example: # cast needed for SparseTensor reductions diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py index 717ee1254f..d30f6b92ad 100644 --- a/tensorflow/python/ops/metrics_impl.py +++ b/tensorflow/python/ops/metrics_impl.py @@ -794,7 +794,7 @@ def mean_cosine_distance(labels, predictions, dim, weights=None, radial_diffs = math_ops.multiply(predictions, labels) radial_diffs = math_ops.reduce_sum(radial_diffs, reduction_indices=[dim,], - keep_dims=True) + keepdims=True) mean_distance, update_op = mean(radial_diffs, weights, None, None, diff --git a/tensorflow/python/ops/nn_fused_batchnorm_test.py b/tensorflow/python/ops/nn_fused_batchnorm_test.py index 1fcd0384da..e72d34d1f7 100644 --- a/tensorflow/python/ops/nn_fused_batchnorm_test.py +++ b/tensorflow/python/ops/nn_fused_batchnorm_test.py @@ -335,22 +335,22 @@ class BatchNormalizationTest(test.TestCase): def testInference(self): x_shape = [1, 1, 6, 1] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_inference( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC') self._test_inference( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW') - self._test_inference( - x_shape, np.float32, [1], np.float32, use_gpu=False, data_format='NHWC') + self._test_inference( + x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC') x_shape = [1, 1, 6, 2] if test.is_gpu_available(cuda_only=True): for dtype in [np.float16, np.float32]: self._test_inference( x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC') - self._test_inference( - x_shape, np.float32, [2], np.float32, use_gpu=False, data_format='NHWC') + self._test_inference( + x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC') x_shape = [1, 2, 1, 6] if test.is_gpu_available(cuda_only=True): @@ -359,33 +359,33 @@ class BatchNormalizationTest(test.TestCase): x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW') x_shape = [27, 131, 127, 6] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_inference( x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW') self._test_inference( x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC') - self._test_inference( - x_shape, np.float32, [6], np.float32, use_gpu=False, data_format='NHWC') + self._test_inference( + x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC') def testTraining(self): x_shape = [1, 1, 6, 1] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_training( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC') self._test_training( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW') - self._test_training( - x_shape, np.float32, [1], np.float32, use_gpu=False, data_format='NHWC') + self._test_training( + x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC') x_shape = [1, 1, 6, 2] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_training( x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC') - self._test_training( - x_shape, np.float32, [2], np.float32, use_gpu=False, data_format='NHWC') + self._test_training( + x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC') x_shape = [1, 2, 1, 6] if test.is_gpu_available(cuda_only=True): @@ -394,20 +394,20 @@ class BatchNormalizationTest(test.TestCase): x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW') x_shape = [27, 131, 127, 6] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_training( x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW') self._test_training( x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC') - self._test_training( - x_shape, np.float32, [6], np.float32, use_gpu=False, data_format='NHWC') + self._test_training( + x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC') def testBatchNormGrad(self): for is_training in [True, False]: x_shape = [1, 1, 6, 1] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_gradient( x_shape, dtype, [1], @@ -422,17 +422,17 @@ class BatchNormalizationTest(test.TestCase): use_gpu=True, data_format='NCHW', is_training=is_training) - self._test_gradient( - x_shape, - np.float32, [1], - np.float32, - use_gpu=False, - data_format='NHWC', - is_training=is_training) + self._test_gradient( + x_shape, + dtype, [1], + np.float32, + use_gpu=False, + data_format='NHWC', + is_training=is_training) x_shape = [1, 1, 6, 2] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_gradient( x_shape, dtype, [2], @@ -440,13 +440,13 @@ class BatchNormalizationTest(test.TestCase): use_gpu=True, data_format='NHWC', is_training=is_training) - self._test_gradient( - x_shape, - np.float32, [2], - np.float32, - use_gpu=False, - data_format='NHWC', - is_training=is_training) + self._test_gradient( + x_shape, + dtype, [2], + np.float32, + use_gpu=False, + data_format='NHWC', + is_training=is_training) x_shape = [1, 2, 1, 6] if test.is_gpu_available(cuda_only=True): @@ -460,8 +460,8 @@ class BatchNormalizationTest(test.TestCase): is_training=is_training) x_shape = [5, 7, 11, 4] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_gradient( x_shape, dtype, [7], @@ -476,13 +476,13 @@ class BatchNormalizationTest(test.TestCase): use_gpu=True, data_format='NHWC', is_training=is_training) - self._test_gradient( - x_shape, - np.float32, [4], - np.float32, - use_gpu=False, - data_format='NHWC', - is_training=is_training) + self._test_gradient( + x_shape, + dtype, [4], + np.float32, + use_gpu=False, + data_format='NHWC', + is_training=is_training) def _testBatchNormGradGrad(self, config): shape = config['shape'] @@ -506,15 +506,14 @@ class BatchNormalizationTest(test.TestCase): data_format='NCHW', is_training=is_training, err_tolerance=err_tolerance) - if dtype != np.float16: - self._test_grad_grad( - shape, - np.float32, [shape[3]], - np.float32, - use_gpu=False, - data_format='NHWC', - is_training=is_training, - err_tolerance=err_tolerance) + self._test_grad_grad( + shape, + dtype, [shape[3]], + np.float32, + use_gpu=False, + data_format='NHWC', + is_training=is_training, + err_tolerance=err_tolerance) def testBatchNormGradGrad(self): configs = [{ @@ -525,6 +524,10 @@ class BatchNormalizationTest(test.TestCase): 'shape': [2, 3, 2, 2], 'err_tolerance': 1e-3, 'dtype': np.float32, + }, { + 'shape': [2, 3, 4, 5], + 'err_tolerance': 1e-2, + 'dtype': np.float16, }, { 'shape': [2, 3, 2, 2], 'err_tolerance': 2e-3, diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 431ea1186a..da037a7983 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -32,6 +32,8 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variables +from tensorflow.python.util.deprecation import deprecated_args +from tensorflow.python.util.deprecation import deprecated_argument_lookup def log_poisson_loss(targets, log_input, compute_full_loss=False, name=None): @@ -313,19 +315,20 @@ def swish(features): return features * math_ops.sigmoid(features) -def l2_normalize(x, dim, epsilon=1e-12, name=None): - """Normalizes along dimension `dim` using an L2 norm. +@deprecated_args(None, "dim is deprecated, use axis instead", "dim") +def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None): + """Normalizes along dimension `axis` using an L2 norm. - For a 1-D tensor with `dim = 0`, computes + For a 1-D tensor with `axis = 0`, computes output = x / sqrt(max(sum(x**2), epsilon)) For `x` with more dimensions, independently normalizes each 1-D slice along - dimension `dim`. + dimension `axis`. Args: x: A `Tensor`. - dim: Dimension along which to normalize. A scalar or a vector of + axis: Dimension along which to normalize. A scalar or a vector of integers. epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the divisor if `norm < sqrt(epsilon)`. @@ -335,8 +338,9 @@ def l2_normalize(x, dim, epsilon=1e-12, name=None): A `Tensor` with the same shape as `x`. """ with ops.name_scope(name, "l2_normalize", [x]) as name: + axis = deprecated_argument_lookup("axis", axis, "dim", dim) x = ops.convert_to_tensor(x, name="x") - square_sum = math_ops.reduce_sum(math_ops.square(x), dim, keep_dims=True) + square_sum = math_ops.reduce_sum(math_ops.square(x), axis, keep_dims=True) x_inv_norm = math_ops.rsqrt(math_ops.maximum(square_sum, epsilon)) return math_ops.multiply(x, x_inv_norm, name=name) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index bdaac65904..61fa462988 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -23,6 +23,7 @@ import numbers import numpy as np from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import graph_util from tensorflow.python.framework import ops @@ -37,6 +38,8 @@ from tensorflow.python.ops import random_ops # pylint: disable=wildcard-import from tensorflow.python.ops.gen_nn_ops import * # pylint: enable=wildcard-import +from tensorflow.python.util.deprecation import deprecated_args +from tensorflow.python.util.deprecation import deprecated_argument_lookup from tensorflow.python.util import deprecation @@ -1645,17 +1648,18 @@ def _softmax(logits, compute_op, dim=-1, name=None): return output -def softmax(logits, dim=-1, name=None): +@deprecated_args(None, "dim is deprecated, use axis instead", "dim") +def softmax(logits, axis=None, name=None, dim=None): """Computes softmax activations. This function performs the equivalent of - softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), dim) + softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis) Args: logits: A non-empty `Tensor`. Must be one of the following types: `half`, `float32`, `float64`. - dim: The dimension softmax would be performed on. The default is -1 which + axis: The dimension softmax would be performed on. The default is -1 which indicates the last dimension. name: A name for the operation (optional). @@ -1663,23 +1667,27 @@ def softmax(logits, dim=-1, name=None): A `Tensor`. Has the same type and shape as `logits`. Raises: - InvalidArgumentError: if `logits` is empty or `dim` is beyond the last + InvalidArgumentError: if `logits` is empty or `axis` is beyond the last dimension of `logits`. """ - return _softmax(logits, gen_nn_ops._softmax, dim, name) + axis = deprecated_argument_lookup("axis", axis, "dim", dim) + if axis is None: + axis = -1 + return _softmax(logits, gen_nn_ops._softmax, axis, name) -def log_softmax(logits, dim=-1, name=None): +@deprecated_args(None, "dim is deprecated, use axis instead", "dim") +def log_softmax(logits, axis=None, name=None, dim=None): """Computes log softmax activations. For each batch `i` and class `j` we have - logsoftmax = logits - log(reduce_sum(exp(logits), dim)) + logsoftmax = logits - log(reduce_sum(exp(logits), axis)) Args: logits: A non-empty `Tensor`. Must be one of the following types: `half`, `float32`, `float64`. - dim: The dimension softmax would be performed on. The default is -1 which + axis: The dimension softmax would be performed on. The default is -1 which indicates the last dimension. name: A name for the operation (optional). @@ -1687,10 +1695,13 @@ def log_softmax(logits, dim=-1, name=None): A `Tensor`. Has the same type as `logits`. Same shape as `logits`. Raises: - InvalidArgumentError: if `logits` is empty or `dim` is beyond the last + InvalidArgumentError: if `logits` is empty or `axis` is beyond the last dimension of `logits`. """ - return _softmax(logits, gen_nn_ops._log_softmax, dim, name) + axis = deprecated_argument_lookup("axis", axis, "dim", dim) + if axis is None: + axis = -1 + return _softmax(logits, gen_nn_ops._log_softmax, axis, name) def _ensure_xent_args(name, sentinel, labels, logits): @@ -2305,6 +2316,100 @@ def conv1d(value, filters, stride, padding, return array_ops.squeeze(result, [spatial_start_dim]) +def conv1d_transpose(value, + filter, + output_shape, + stride, + padding="SAME", + data_format="NWC", + name=None): + """The transpose of `conv1d`. + + This operation is sometimes called "deconvolution" after [Deconvolutional + Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is + actually the transpose (gradient) of `conv1d` rather than an actual + deconvolution. + + Args: + value: A 3-D `Tensor` of type `float` and shape + `[batch, in_width, in_channels]` for `NWC` data format or + `[batch, in_channels, in_width]` for `NCW` data format. + filter: A 3-D `Tensor` with the same type as `value` and shape + `[filter_width, output_channels, in_channels]`. `filter`'s + `in_channels` dimension must match that of `value`. + output_shape: A 1-D `Tensor` representing the output shape of the + deconvolution op. + stride: An `integer`. The number of entries by which + the filter is moved right at each step. + padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. + See the @{tf.nn.convolution$comment here} + data_format: A string. 'NHWC' and 'NCHW' are supported. + name: Optional name for the returned tensor. + + Returns: + A `Tensor` with the same type as `value`. + + Raises: + ValueError: If input/output depth does not match `filter`'s shape, or if + padding is other than `'VALID'` or `'SAME'`. + """ + with ops.name_scope(name, "conv1d_transpose", + [value, filter, output_shape]) as name: + output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape") + if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(3)): + raise ValueError("output_shape must have shape (3,), got {}" + .format(output_shape_.get_shape())) + + # The format could be either NWC or NCW, map to NHWC or NCHW + if data_format is None or data_format == "NWC": + data_format_2d = "NHWC" + axis = 2 + elif data_format == "NCW": + data_format_2d = "NCHW" + axis = 1 + else: + raise ValueError("data_format must be \"NWC\" or \"NCW\".") + + if not value.get_shape()[axis].is_compatible_with(filter.get_shape()[2]): + raise ValueError("input channels does not match filter's input channels, " + "{} != {}".format(value.get_shape()[axis], + filter.get_shape()[2])) + + if isinstance(output_shape, (list, np.ndarray)): + # output_shape's shape should be == [3] if reached this point. + if not filter.get_shape()[1].is_compatible_with(output_shape[axis]): + raise ValueError( + "output_shape does not match filter's output channels, " + "{} != {}".format(output_shape[axis], filter.get_shape()[1])) + + if padding != "VALID" and padding != "SAME": + raise ValueError("padding must be either VALID or SAME:" + " {}".format(padding)) + + # Reshape the input tensor to [batch, 1, in_width, in_channels] + if data_format_2d == "NHWC": + output_shape_ = array_ops.concat([output_shape_[:1], [1], + output_shape_[1:]], axis=0) + spatial_start_dim = 1 + strides = [1, 1, stride, 1] + else: + output_shape_ = array_ops.concat([output_shape_[:2], [1], + output_shape_[2:]], axis=0) + spatial_start_dim = 2 + strides = [1, 1, 1, stride] + value = array_ops.expand_dims(value, spatial_start_dim) + filter = array_ops.expand_dims(filter, 0) + + result = gen_nn_ops.conv2d_backprop_input(input_sizes=output_shape_, + filter=filter, + out_backprop=value, + strides=strides, + padding=padding, + data_format=data_format_2d, + name=name) + return array_ops.squeeze(result, [spatial_start_dim]) + + @ops.RegisterStatistics("Dilation2D", "flops") def _calc_dilation2d_flops(graph, node): """Calculates the compute resources needed for Dilation2D.""" diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index e9b1c67d16..a1e4305de1 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -1063,13 +1063,13 @@ class Variable(object): class PartitionedVariable(object): """A container for partitioned `Variable` objects. - @compatiblity(eager) `tf.PartitionedVariable` is not compatible with + @compatibility(eager) `tf.PartitionedVariable` is not compatible with eager execution. Use `tfe.Variable` instead which is compatable with both eager execution and graph construction. See [the TensorFlow Eager Execution guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/g3doc/guide.md#variables-and-optimizers) for details on how variables work in eager execution. - @end_compatiblity + @end_compatibility """ class PartitionedVariableIterator(object): diff --git a/tensorflow/python/tools/import_pb_to_tensorboard.py b/tensorflow/python/tools/import_pb_to_tensorboard.py old mode 100644 new mode 100755 diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 99bed86a17..d78362d4fb 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -232,7 +232,6 @@ CUDNN_DNN_ROUTINE_EACH_R3(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) __macro(cudnnRNNBackwardData) \ __macro(cudnnRNNBackwardWeights) \ __macro(cudnnSetRNNDescriptor) \ - __macro(cudnnSetRNNDescriptor_v6) \ __macro(cudnnGetFilterNdDescriptor) // clang-format on @@ -245,7 +244,8 @@ CUDNN_DNN_ROUTINE_EACH_R5(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) // clang-format off #if CUDNN_VERSION >= 6000 #define CUDNN_DNN_ROUTINE_EACH_R6(__macro) \ - __macro(cudnnConvolutionBiasActivationForward) + __macro(cudnnConvolutionBiasActivationForward) \ + __macro(cudnnSetRNNDescriptor_v6) // clang-format on CUDNN_DNN_ROUTINE_EACH_R6(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) @@ -665,7 +665,6 @@ class ScopedPoolingDescriptor { LOG(FATAL) << "could not create cudnn pooling descriptor: " << ToString(status); } - const std::vector strides64 = pooling_descriptor.strides(); const std::vector padding64 = pooling_descriptor.padding(); const std::vector shape64 = pooling_descriptor.window(); @@ -680,14 +679,14 @@ class ScopedPoolingDescriptor { &CheckedNarrowing); std::transform(shape64.cbegin(), shape64.cend(), shape.begin(), &CheckedNarrowing); + bool propagate_nans = pooling_descriptor.propagate_nans(); status = wrap::cudnnSetPoolingNdDescriptor( parent_, handle_, (pooling_descriptor.mode() == dnn::PoolingMode::kMaximum ? CUDNN_POOLING_MAX : CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING), #if CUDNN_VERSION >= 5000 - // Always propagate nans. - CUDNN_PROPAGATE_NAN, + propagate_nans ? CUDNN_PROPAGATE_NAN : CUDNN_NOT_PROPAGATE_NAN, #endif nd, shape.data(), padding.data(), strides.data()); if (status != CUDNN_STATUS_SUCCESS) { diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc index 07fe8a85f4..29fd6d0e87 100644 --- a/tensorflow/stream_executor/dnn.cc +++ b/tensorflow/stream_executor/dnn.cc @@ -472,7 +472,8 @@ PoolingDescriptor::PoolingDescriptor(int ndims) ndims_(ndims), window_(ndims, 0), padding_(ndims, 0), - strides_(ndims, 1) {} + strides_(ndims, 1), + propagate_nans_(false) {} PoolingDescriptor::PoolingDescriptor() : PoolingDescriptor(/*ndims=*/2) {} @@ -482,6 +483,7 @@ void PoolingDescriptor::CloneFrom(const PoolingDescriptor& other) { window_ = other.window_; padding_ = other.padding_; strides_ = other.strides_; + propagate_nans_ = other.propagate_nans_; } string PoolingDescriptor::ToString() const { @@ -495,9 +497,12 @@ string PoolingDescriptor::ToString() const { port::Appendf(&padding, "%lld", padding_[i]); } - return port::Printf("{mode: %s window: %s strides: %s padding: %s}", - mode_string, window.c_str(), strides.c_str(), - padding.c_str()); + const char* propagate_string = propagate_nans_ ? "Yes" : "No"; + + return port::Printf( + "{mode: %s window: %s strides: %s padding: %s propagate NaNs: %s}", + mode_string, window.c_str(), strides.c_str(), padding.c_str(), + propagate_string); } string PoolingDescriptor::ToShortString() const { @@ -508,7 +513,8 @@ string PoolingDescriptor::ToShortString() const { port::Appendf(&padding, "_p%d:%lld", i, padding_[i]); } return port::StrCat(mode_ == dnn::PoolingMode::kMaximum ? "max" : "avg", - window, strides, padding); + window, strides, padding, + propagate_nans_ ? "propagate_nans" : "ignore_nans"); } // -- NormalizeDescriptor diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index 49235167ab..0d2cd4a9f2 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -661,6 +661,10 @@ class PoolingDescriptor { SetDim(&strides_, dim, value); return *this; } + PoolingDescriptor& set_propagate_nans(bool value) { + propagate_nans_ = value; + return *this; + } int ndims() const { return ndims_; } void CloneFrom(const PoolingDescriptor& other); @@ -681,10 +685,12 @@ class PoolingDescriptor { std::vector window() const { return window_; } std::vector padding() const { return padding_; } std::vector strides() const { return strides_; } + bool propagate_nans() const { return propagate_nans_; } private: PoolingMode mode_; int ndims_; + bool propagate_nans_; // Stored as: ..., y, x. std::vector window_; diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt index 9fd38a29b7..62e634afb8 100644 --- a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt @@ -94,7 +94,7 @@ tf_module { } member_method { name: "norm" - argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keep_dims\', \'name\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'False\', \'None\'], " + argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keepdims\', \'name\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "qr" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt index 24c0448dea..ebd9c079b5 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt @@ -170,7 +170,7 @@ tf_module { } member_method { name: "l2_normalize" - argspec: "args=[\'x\', \'dim\', \'epsilon\', \'name\'], varargs=None, keywords=None, defaults=[\'1e-12\', \'None\'], " + argspec: "args=[\'x\', \'axis\', \'epsilon\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'1e-12\', \'None\', \'None\'], " } member_method { name: "leaky_relu" @@ -190,7 +190,7 @@ tf_module { } member_method { name: "log_softmax" - argspec: "args=[\'logits\', \'dim\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], " + argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "log_uniform_candidate_sampler" @@ -282,7 +282,7 @@ tf_module { } member_method { name: "softmax" - argspec: "args=[\'logits\', \'dim\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], " + argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "softmax_cross_entropy_with_logits" diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index bf7bc6a7c1..0edd4153d7 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -750,7 +750,7 @@ tf_module { } member_method { name: "boolean_mask" - argspec: "args=[\'tensor\', \'mask\', \'name\'], varargs=None, keywords=None, defaults=[\'boolean_mask\'], " + argspec: "args=[\'tensor\', \'mask\', \'name\', \'axis\'], varargs=None, keywords=None, defaults=[\'boolean_mask\', \'None\'], " } member_method { name: "broadcast_dynamic_shape" @@ -858,7 +858,7 @@ tf_module { } member_method { name: "count_nonzero" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'dtype\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \"\", \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'dtype\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"\", \'None\', \'None\', \'None\'], " } member_method { name: "count_up_to" @@ -1414,7 +1414,7 @@ tf_module { } member_method { name: "norm" - argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keep_dims\', \'name\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'False\', \'None\'], " + argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keepdims\', \'name\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "not_equal" @@ -1546,11 +1546,11 @@ tf_module { } member_method { name: "reduce_all" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_any" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_join" @@ -1558,27 +1558,27 @@ tf_module { } member_method { name: "reduce_logsumexp" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_max" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_mean" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_min" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_prod" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_sum" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "register_tensor_conversion_function" diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh index 5f791d7bc7..c27f4953e3 100755 --- a/tensorflow/tools/ci_build/ci_parameterized_build.sh +++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh @@ -165,7 +165,7 @@ else BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:embedding_lookup_test" BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:embedding_lookup_sparse_test" BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:fully_connected_test" - BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/testing:generated_examples_zip_test" + # BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/testing:generated_examples_zip_test" BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:hashtable_lookup_test" BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:local_response_norm_test" BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:lsh_projection_test" diff --git a/tensorflow/tools/ci_build/install/install_golang.sh b/tensorflow/tools/ci_build/install/install_golang.sh index 55c1674495..e1edd62cc5 100755 --- a/tensorflow/tools/ci_build/install/install_golang.sh +++ b/tensorflow/tools/ci_build/install/install_golang.sh @@ -16,7 +16,7 @@ set -ex -GOLANG_URL="https://storage.googleapis.com/golang/go1.9.1.linux-amd64.tar.gz" +GOLANG_URL="https://storage.googleapis.com/golang/go1.9.2.linux-amd64.tar.gz" sudo mkdir -p /usr/local wget -q -O - "${GOLANG_URL}" | sudo tar -C /usr/local -xz diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh index dcda8228bc..e5d8303c6e 100755 --- a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh +++ b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh @@ -48,6 +48,6 @@ ${DOCKER_BINARY} run \ -e "TF_NEED_GCP=0" \ -e "TF_NEED_HDFS=0" \ -e "TF_NEED_CUDA=${TF_NEED_CUDA}" \ - -e "TF_NEED_OPENCL=0" \ + -e "TF_NEED_OPENCL_SYCL=0" \ "${DOCKER_IMAGE}" \ "/workspace/tensorflow/tools/ci_build/linux/libtensorflow.sh" diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh index d90a1b905d..e1b56b9a25 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh @@ -27,7 +27,7 @@ export PYTHON_BIN_PATH="/usr/bin/python" export TF_NEED_GCP=0 export TF_NEED_HDFS=0 export TF_NEED_CUDA=0 -export TF_NEED_OPENCL=0 +export TF_NEED_OPENCL_SYCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh index 79973647c1..5a901af3e5 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh @@ -28,7 +28,7 @@ export LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib:${L export PYTHON_BIN_PATH="/usr/bin/python" export TF_NEED_GCP=0 export TF_NEED_HDFS=0 -export TF_NEED_OPENCL=0 +export TF_NEED_OPENCL_SYCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh index 5244898c40..88116d9f24 100755 --- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh +++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh @@ -75,17 +75,23 @@ if [[ $1 == "PI_ONE" ]]; then PI_COPTS="--copt=-march=armv6 --copt=-mfpu=vfp --copt=-DUSE_GEMM_FOR_CONV --copt=-DUSE_OPENBLAS --copt=-isystem --copt=${OPENBLAS_INSTALL_PATH}/include/ + --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR --linkopt=-L${OPENBLAS_INSTALL_PATH}/lib/ --linkopt=-l:libopenblas.a" echo "Building for the Pi One/Zero, with no NEON support" else PI_COPTS='--copt=-march=armv7-a --copt=-mfpu=neon-vfpv4 + --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8' echo "Building for the Pi Two/Three, with NEON acceleration" fi +# We need to pass down the environment variable with a possible alternate Python +# include path for Python 3.x builds to work. +export CROSSTOOL_PYTHON_INCLUDE_PATH + cd ${WORKSPACE_PATH} bazel build -c opt ${PI_COPTS} \ --config=monolithic \ diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh index 924ab1a4ae..44b6d52952 100644 --- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh +++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh @@ -117,7 +117,7 @@ function run_configure_for_cpu_build { export TF_NEED_VERBS=0 export TF_NEED_GCP=0 export TF_NEED_HDFS=0 - export TF_NEED_OPENCL=0 + export TF_NEED_OPENCL_SYCL=0 echo "" | ./configure } @@ -141,7 +141,7 @@ function run_configure_for_gpu_build { export TF_NEED_MKL=0 export TF_NEED_GCP=0 export TF_NEED_HDFS=0 - export TF_NEED_OPENCL=0 + export TF_NEED_OPENCL_SYCL=0 # TODO(pcloudy): Remove this after TensorFlow uses its own CRSOOTOOL # for GPU build on Windows diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 index 64ebc4607a..9bcc3925a8 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 @@ -101,12 +101,11 @@ RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/lib --jobs=${TF_AVAILABLE_CPUS} \ tensorflow/tools/pip_package:build_pip_package && \ mkdir /pip_pkg && \ - bazel-bin/tensorflow/tools/pip_package/build_pip_package /pip_pkg - -# Clean up pip wheel and Bazel cache when done. -RUN pip --no-cache-dir install --upgrade /pip_pkg/tensorflow-*.whl && \ + bazel-bin/tensorflow/tools/pip_package/build_pip_package /pip_pkg && \ + pip --no-cache-dir install --upgrade /pip_pkg/tensorflow-*.whl && \ rm -rf /pip_pkg && \ rm -rf /root/.cache +# Clean up pip wheel and Bazel cache when done. WORKDIR /root diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu index 0571dd7391..e212d10290 100644 --- a/tensorflow/tools/docker/Dockerfile.gpu +++ b/tensorflow/tools/docker/Dockerfile.gpu @@ -1,4 +1,4 @@ -FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04 +FROM nvidia/cuda:8.0-cudnn6-runtime-ubuntu16.04 LABEL maintainer="Craig Citro " diff --git a/tensorflow/tools/docker/README.md b/tensorflow/tools/docker/README.md index 2e5a0038ed..e35c58ff80 100644 --- a/tensorflow/tools/docker/README.md +++ b/tensorflow/tools/docker/README.md @@ -60,6 +60,20 @@ Building TensorFlow Docker containers should be done through the script. The raw Dockerfiles should not be used directly as they contain strings to be replaced by the script during the build. +Attempting to run [parameterized_docker_build.sh](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/docker/parameterized_docker_build.sh) +from a binary docker image such as for example `tensorflow/tensorflow:latest` will +not work. One needs to execute the script from a developer docker image since by +contrast with a binary docker image it contains not only the compiled solution but +also the tensorflow source code. Please select the appropriate developer docker +image of tensorflow at `tensorflow/tensorflow:[.](https://hub.docker.com/r/tensorflow/tensorflow/tags/)`. + +The smallest command line to generate a docker image will then be: +```docker run -it tensorflow/tensorflow:"right_tag"``` + +If you would like to start a jupyter notebook on your docker container, make sure +to map the port 8888 of your docker container by adding -p 8888:8888 to the above +command. + To use the script, specify the container type (`CPU` vs. `GPU`), the desired Python version (`PYTHON2` vs. `PYTHON3`) and whether the developer Docker image is to be built (`NO` vs. `YES`). In addition, you need to specify the central diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD index 1bf7113c9e..9216008600 100644 --- a/tensorflow/tools/graph_transforms/BUILD +++ b/tensorflow/tools/graph_transforms/BUILD @@ -131,6 +131,8 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core:tensorflow", + "//tensorflow/contrib/rnn:gru_ops_op_lib", + "//tensorflow/contrib/rnn:lstm_ops_op_lib", ] + if_not_windows([ "//tensorflow/core/kernels:quantized_ops", "//tensorflow/core/kernels:remote_fused_graph_rewriter_transform", diff --git a/tensorflow/tools/graph_transforms/quantize_nodes.cc b/tensorflow/tools/graph_transforms/quantize_nodes.cc index 2b85e7e83c..97e8f77616 100644 --- a/tensorflow/tools/graph_transforms/quantize_nodes.cc +++ b/tensorflow/tools/graph_transforms/quantize_nodes.cc @@ -759,6 +759,7 @@ Status QuantizeNodes(const GraphDef& input_graph_def, NodeDef reshape_dims; reshape_dims.set_op("Const"); reshape_dims.set_name(unique_input_name + "/reshape_dims"); + AddNodeInput("^" + input_name, &reshape_dims); SetNodeAttr("dtype", DT_INT32, &reshape_dims); Tensor reshape_dims_tensor(DT_INT32, {1}); reshape_dims_tensor.flat()(0) = -1; @@ -768,6 +769,7 @@ Status QuantizeNodes(const GraphDef& input_graph_def, NodeDef reduction_dims; reduction_dims.set_op("Const"); reduction_dims.set_name(unique_input_name + "/reduction_dims"); + AddNodeInput("^" + input_name, &reduction_dims); SetNodeAttr("dtype", DT_INT32, &reduction_dims); Tensor reduction_dims_tensor(DT_INT32, {1}); reduction_dims_tensor.flat()(0) = 0; diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 60282f6aa3..a493c6f2aa 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.4.0-rc1' +_VERSION = '1.4.0' REQUIRED_PACKAGES = [ 'absl-py', diff --git a/third_party/aws.BUILD b/third_party/aws.BUILD index bc6a2fd8cc..bc9e37ffb3 100644 --- a/third_party/aws.BUILD +++ b/third_party/aws.BUILD @@ -21,6 +21,9 @@ cc_library( "@%ws%//tensorflow:linux_ppc64le": glob([ "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp", ]), + "@%ws%//tensorflow:raspberry_pi_armeabi": glob([ + "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp", + ]), "//conditions:default": [], }) + glob([ "aws-cpp-sdk-core/include/**/*.h", diff --git a/third_party/curl.BUILD b/third_party/curl.BUILD index 882967df1c..805a30d262 100644 --- a/third_party/curl.BUILD +++ b/third_party/curl.BUILD @@ -477,7 +477,6 @@ genrule( "# define HAVE_RAND_EGD 1", "# define HAVE_RAND_STATUS 1", "# define HAVE_SSL_GET_SHUTDOWN 1", - "# define HAVE_STROPTS_H 1", "# define HAVE_TERMIOS_H 1", "# define OS \"x86_64-pc-linux-gnu\"", "# define RANDOM_FILE \"/dev/urandom\"", diff --git a/third_party/sycl/crosstool/CROSSTOOL.tpl b/third_party/sycl/crosstool/CROSSTOOL.tpl index 32884d71e7..f8e50efcc6 100755 --- a/third_party/sycl/crosstool/CROSSTOOL.tpl +++ b/third_party/sycl/crosstool/CROSSTOOL.tpl @@ -35,10 +35,10 @@ toolchain { tool_path { name: "compat-ld" path: "/usr/bin/ld" } tool_path { name: "cpp" path: "/usr/bin/cpp" } tool_path { name: "dwp" path: "/usr/bin/dwp" } - tool_path { name: "gcc" path: "computecpp" } + tool_path { name: "gcc" path: "%{sycl_impl}" } # Use "-std=c++11" for nvcc. For consistency, force both the host compiler # and the device compiler to use "-std=c++11". - cxx_flag: "-std=c++11" + cxx_flag: "%{c++_std}" linker_flag: "-Wl,-no-as-needed" linker_flag: "-lstdc++" linker_flag: "-B/usr/bin/" @@ -53,7 +53,7 @@ toolchain { cxx_builtin_include_directory: "/usr/local/include" cxx_builtin_include_directory: "/usr/include" - cxx_builtin_include_directory: "%{computecpp_toolkit_path}" + cxx_builtin_include_directory: "%{sycl_include_dir}" cxx_builtin_include_directory: "%{python_lib_path}" tool_path { name: "gcov" path: "/usr/bin/gcov" } @@ -214,4 +214,4 @@ toolchain { compiler_flag: "-O2" compiler_flag: "-DNDEBUG" } -} +} \ No newline at end of file diff --git a/third_party/sycl/crosstool/trisycl.tpl b/third_party/sycl/crosstool/trisycl.tpl new file mode 100644 index 0000000000..b470772fbf --- /dev/null +++ b/third_party/sycl/crosstool/trisycl.tpl @@ -0,0 +1,73 @@ +#!/usr/bin/env python + +import os +import sys +import tempfile +from subprocess import call + +CPU_CXX_COMPILER = ('%{host_cxx_compiler}') +CPU_C_COMPILER = ('%{host_c_compiler}') + +CURRENT_DIR = os.path.dirname(sys.argv[0]) +TRISYCL_INCLUDE_DIR = CURRENT_DIR + '/../sycl/include' + +def main(): + compiler_flags = [] + + remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable', '-Wignored-attributes', '-fno-exceptions') + # remove -fsamotoze-coverage from string with g++ + if 'g++' in CPU_CXX_COMPILER: + remove_flags += ('-fsanitize-coverage',) + compiler_flags += ['-fopenmp'] + else: + compiler_flags += ['-fopenmp=libomp'] + + compiler_flags += [flag for flag in sys.argv[1:] if not flag.startswith(remove_flags)] + + + output_file_index = compiler_flags.index('-o') + 1 + output_file_name = compiler_flags[output_file_index] + + if(output_file_index == 1): + # we are linking + return call([CPU_CXX_COMPILER] + compiler_flags + + ['-Wl,--no-undefined']) + + # find what we compile + compiling_cpp = 0 + if('-c' in compiler_flags): + compiled_file_index = compiler_flags.index('-c') + 1 + compiled_file_name = compiler_flags[compiled_file_index] + if(compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', + '.C', '.cxx'))): + compiling_cpp = 1; + + debug_flags = ['-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL', '-lpthread', '-lboost_log', '-g', '-rdynamic'] + + opt_flags = ['-DNDEBUG', '-DBOOST_DISABLE_ASSERTS', '-O3'] + + compiler_flags = compiler_flags + ['-DEIGEN_USE_SYCL=1', + '-DEIGEN_HAS_C99_MATH', + '-DEIGEN_MAX_ALIGN_BYTES=16', + '-DTENSORFLOW_USE_SYCL'] + opt_flags + + if(compiling_cpp == 1): + # create a blacklist of folders that will be skipped when compiling + # with triSYCL + skip_extensions = [".cu.cc"] + skip_folders = ["tensorflow/compiler", "tensorflow/docs_src", "tensorflow/tensorboard", "third_party", "external", "hexagon"] + skip_folders = [(folder + '/') for folder in skip_folders] + # if compiling external project skip triSYCL + if any(compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any(_folder in output_file_name for _folder in skip_folders): + return call([CPU_CXX_COMPILER] + compiler_flags) + + host_compiler_flags = ['-xc++', '-Wno-unused-variable', + '-I', TRISYCL_INCLUDE_DIR] + compiler_flags + x = call([CPU_CXX_COMPILER] + host_compiler_flags) + return x + else: + # compile for C + return call([CPU_C_COMPILER] + compiler_flags) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/third_party/sycl/sycl/BUILD.tpl b/third_party/sycl/sycl/BUILD.tpl index 6cad190630..b6ceaadda7 100755 --- a/third_party/sycl/sycl/BUILD.tpl +++ b/third_party/sycl/sycl/BUILD.tpl @@ -10,16 +10,27 @@ package(default_visibility = ["//visibility:public"]) exports_files(["LICENSE.text"]) config_setting( - name = "using_sycl", - values = { - "define": "using_sycl=true", + name = "using_sycl_ccpp", + define_values = { + "using_sycl": "true", + "using_trisycl": "false", }, ) +config_setting( + name = "using_sycl_trisycl", + define_values = { + "using_sycl": "true", + "using_trisycl": "false", + }, +) + + cc_library( name = "sycl_headers", hdrs = glob([ "**/*.h", + "**/*.hpp", ]), includes = [".", "include"], ) diff --git a/third_party/sycl/sycl/build_defs.bzl.tpl b/third_party/sycl/sycl/build_defs.bzl.tpl index 09bef0a661..33386f8957 100755 --- a/third_party/sycl/sycl/build_defs.bzl.tpl +++ b/third_party/sycl/sycl/build_defs.bzl.tpl @@ -5,9 +5,24 @@ def if_sycl(if_true, if_false = []): Returns a select statement which evaluates to if_true if we're building with SYCL enabled. Otherwise, the select statement evaluates to if_false. + If we are building with triSYCL instead of ComputeCPP, a list with + the first element of if_true is returned. + """ + return select({ + "@local_config_sycl//sycl:using_sycl_ccpp": if_true, + "@local_config_sycl//sycl:using_sycl_trisycl": if_true[0:1], + "//conditions:default": if_false + }) + +def if_ccpp(if_true, if_false = []): + """Shorthand for select()'ing if we are building with ComputeCPP. + Returns a select statement which evaluates to if_true if we're building + with ComputeCPP enabled. Otherwise, the select statement evaluates + to if_false. """ return select({ - "@local_config_sycl//sycl:using_sycl": if_true, + "@local_config_sycl//sycl:using_sycl_ccpp": if_true, + "@local_config_sycl//sycl:using_sycl_trisycl": if_false, "//conditions:default": if_false }) diff --git a/third_party/sycl/sycl_configure.bzl b/third_party/sycl/sycl_configure.bzl index 7af063178e..a0c9e4e43a 100644 --- a/third_party/sycl/sycl_configure.bzl +++ b/third_party/sycl/sycl_configure.bzl @@ -5,20 +5,26 @@ * HOST_CXX_COMPILER: The host C++ compiler * HOST_C_COMPILER: The host C compiler * COMPUTECPP_TOOLKIT_PATH: The path to the ComputeCpp toolkit. + * TRISYCL_INCLUDE_DIR: The path to the include directory of triSYCL. + (if using triSYCL instead of ComputeCPP) * PYTHON_LIB_PATH: The path to the python lib """ _HOST_CXX_COMPILER = "HOST_CXX_COMPILER" _HOST_C_COMPILER= "HOST_C_COMPILER" _COMPUTECPP_TOOLKIT_PATH = "COMPUTECPP_TOOLKIT_PATH" +_TRISYCL_INCLUDE_DIR = "TRISYCL_INCLUDE_DIR" _PYTHON_LIB_PATH = "PYTHON_LIB_PATH" def _enable_sycl(repository_ctx): - if "TF_NEED_OPENCL" in repository_ctx.os.environ: - enable_sycl = repository_ctx.os.environ["TF_NEED_OPENCL"].strip() + if "TF_NEED_OPENCL_SYCL" in repository_ctx.os.environ: + enable_sycl = repository_ctx.os.environ["TF_NEED_OPENCL_SYCL"].strip() return enable_sycl == "1" return False +def _enable_compute_cpp(repository_ctx): + return _COMPUTECPP_TOOLKIT_PATH in repository_ctx.os.environ + def auto_configure_fail(msg): """Output failure message when auto configuration fails.""" red = "\033[0;31m" @@ -59,6 +65,15 @@ def find_computecpp_root(repository_ctx): return sycl_name fail("Cannot find SYCL compiler, please correct your path") +def find_trisycl_include_dir(repository_ctx): + """Find triSYCL include directory. """ + sycl_name = "" + if _TRISYCL_INCLUDE_DIR in repository_ctx.os.environ: + sycl_name = repository_ctx.os.environ[_TRISYCL_INCLUDE_DIR].strip() + if sycl_name.startswith("/"): + return sycl_name + fail( "Cannot find triSYCL include directory, please correct your path") + def find_python_lib(repository_ctx): """Returns python path.""" if _PYTHON_LIB_PATH in repository_ctx.os.environ: @@ -171,26 +186,53 @@ def _sycl_autoconf_imp(repository_ctx): _tpl(repository_ctx, "sycl:platform.bzl") _tpl(repository_ctx, "crosstool:BUILD") _file(repository_ctx, "sycl:LICENSE.text") - _tpl(repository_ctx, "crosstool:computecpp", - { - "%{host_cxx_compiler}" : find_cc(repository_ctx), - "%{host_c_compiler}" : find_c(repository_ctx), - }) - - computecpp_root = find_computecpp_root(repository_ctx) - _check_dir(repository_ctx, computecpp_root) - - _tpl(repository_ctx, "crosstool:CROSSTOOL", - { - "%{computecpp_toolkit_path}" : computecpp_root, - "%{python_lib_path}" : find_python_lib(repository_ctx), - }) - - # symlink libraries - _check_lib(repository_ctx, computecpp_root+"/lib", "libComputeCpp.so" ) - _symlink_dir(repository_ctx, computecpp_root + "/lib", "sycl/lib") - _symlink_dir(repository_ctx, computecpp_root + "/include", "sycl/include") - _symlink_dir(repository_ctx, computecpp_root + "/bin", "sycl/bin") + + if _enable_compute_cpp(repository_ctx): + _tpl(repository_ctx, "crosstool:computecpp", + { + "%{host_cxx_compiler}" : find_cc(repository_ctx), + "%{host_c_compiler}" : find_c(repository_ctx) + }) + + computecpp_root = find_computecpp_root(repository_ctx); + _check_dir(repository_ctx, computecpp_root) + + _tpl(repository_ctx, "crosstool:CROSSTOOL", + { + "%{sycl_include_dir}" : computecpp_root, + "%{sycl_impl}" : "computecpp", + "%{c++_std}" : "-std=c++11", + "%{python_lib_path}" : find_python_lib(repository_ctx), + }) + + # symlink libraries + _check_lib(repository_ctx, computecpp_root+"/lib", "libComputeCpp.so" ) + _symlink_dir(repository_ctx, computecpp_root + "/lib", "sycl/lib") + _symlink_dir(repository_ctx, computecpp_root + "/include", "sycl/include") + _symlink_dir(repository_ctx, computecpp_root + "/bin", "sycl/bin") + else: + + trisycl_include_dir = find_trisycl_include_dir(repository_ctx); + _check_dir(repository_ctx, trisycl_include_dir) + + _tpl(repository_ctx, "crosstool:trisycl", + { + "%{host_cxx_compiler}" : find_cc(repository_ctx), + "%{host_c_compiler}" : find_c(repository_ctx), + "%{trisycl_include_dir}" : trisycl_include_dir + }) + + + _tpl(repository_ctx, "crosstool:CROSSTOOL", + { + "%{sycl_include_dir}" : trisycl_include_dir, + "%{sycl_impl}" : "trisycl", + "%{c++_std}" : "-std=c++1y", + "%{python_lib_path}" : find_python_lib(repository_ctx), + }) + + _symlink_dir(repository_ctx, trisycl_include_dir, "sycl/include") + sycl_configure = repository_rule( implementation = _sycl_autoconf_imp, diff --git a/third_party/zlib.BUILD b/third_party/zlib.BUILD index 8509668891..d164ee719c 100644 --- a/third_party/zlib.BUILD +++ b/third_party/zlib.BUILD @@ -49,7 +49,7 @@ cc_library( ":windows_msvc": [], "//conditions:default": [ "-Wno-shift-negative-value", - "-Wno-implicit-function-declaration", + "-DZ_HAVE_UNISTD_H", ], }), includes = ["."], diff --git a/tools/bazel.rc b/tools/bazel.rc index 2d7201ae57..04c24d7511 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -9,13 +9,16 @@ build:win-cuda --define=using_cuda=true --define=using_cuda_nvcc=true build:mkl --define=using_mkl=true build:sycl --crosstool_top=@local_config_sycl//crosstool:toolchain -build:sycl --define=using_sycl=true +build:sycl --define=using_sycl=true --define=using_trisycl=false build:sycl_nodouble --crosstool_top=@local_config_sycl//crosstool:toolchain build:sycl_nodouble --define=using_sycl=true --cxxopt -DTENSORFLOW_SYCL_NO_DOUBLE build:sycl_asan --crosstool_top=@local_config_sycl//crosstool:toolchain -build:sycl_asan --define=using_sycl=true --copt -fno-omit-frame-pointer --copt -fsanitize-coverage=3 --copt -DGPR_NO_DIRECT_SYSCALLS --linkopt -fPIC --linkopt -fsanitize=address +build:sycl_asan --define=using_sycl=true --define=using_trisycl=false --copt -fno-omit-frame-pointer --copt -fsanitize-coverage=3 --copt -DGPR_NO_DIRECT_SYSCALLS --linkopt -fPIC --linkopt -fsanitize=address + +build:sycl_trisycl --crosstool_top=@local_config_sycl//crosstool:toolchain +build:sycl_trisycl --define=using_sycl=true --define=using_trisycl=true build --define=use_fast_cpp_protos=true build --define=allow_oversize_protos=true diff --git a/util/python/BUILD b/util/python/BUILD index 96daf9947a..f5fa0c6d29 100644 --- a/util/python/BUILD +++ b/util/python/BUILD @@ -1,4 +1,4 @@ -licenses(["restricted"]) +licenses(["notice"]) # New BSD, Python Software Foundation package(default_visibility = ["//visibility:public"]) -- GitLab From d0a3b2d3983b970b750329088013dc5cb67d96f9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Nov 2017 23:55:59 -0800 Subject: [PATCH 0744/1801] Merged commit includes the following changes: 176617057 by yifeif: Internal change. -- 176615737 by yifeif: Fix internal tests. -- PiperOrigin-RevId: 176617057 --- configure.py | 19 +-- tensorflow/compiler/aot/tfcompile.bzl | 4 +- .../compiler/tests/fused_batchnorm_test.py | 6 +- .../contrib/android/cmake/CMakeLists.txt | 2 +- .../python/kernel_tests/cauchy_test.py | 47 ++++--- .../distributions/python/ops/cauchy.py | 18 +-- .../contrib/layers/python/layers/layers.py | 1 - .../layers/python/layers/layers_test.py | 28 ++-- .../python/learn/learn_io/data_feeder.py | 6 +- .../linear_optimizer/python/ops/sdca_ops.py | 5 +- .../contrib/lite/testing/generate_examples.py | 3 +- tensorflow/contrib/opt/__init__.py | 16 ++- .../training/multitask_optimizer_wrapper.py | 60 ++++---- .../multitask_optimizer_wrapper_test.py | 40 +++--- .../python/kernel_tests/core_rnn_cell_test.py | 31 ++-- .../rnn/python/kernel_tests/rnn_cell_test.py | 63 ++++----- tensorflow/contrib/rnn/python/ops/rnn_cell.py | 98 +++++++------ .../seq2seq/python/ops/attention_wrapper.py | 7 +- tensorflow/contrib/verbs/rdma.cc | 61 ++++---- .../api_def/base_api/api_def_UniqueV2.pbtxt | 47 +++++++ .../base_api/api_def_UnsortedSegmentSum.pbtxt | 2 + tensorflow/core/graph/graph.h | 2 +- tensorflow/core/graph/graph_test.cc | 8 +- tensorflow/core/kernels/BUILD | 1 + tensorflow/core/kernels/bincount_op.cc | 5 +- tensorflow/core/kernels/bincount_op.h | 2 +- tensorflow/core/kernels/bincount_op_gpu.cu.cc | 6 +- tensorflow/core/kernels/bincount_op_test.cc | 4 +- .../core/kernels/bucketize_op_gpu.cu.cc | 8 +- tensorflow/core/kernels/conv_grad_ops_3d.cc | 24 ++-- tensorflow/core/kernels/cwise_op_asinh.cc | 2 +- tensorflow/core/kernels/cwise_op_atanh.cc | 2 +- .../core/kernels/depthwise_conv_grad_op.cc | 9 +- tensorflow/core/kernels/depthwise_conv_op.cc | 5 + tensorflow/core/kernels/depthwise_conv_op.h | 3 +- tensorflow/core/kernels/maxpooling_op.cc | 14 +- .../core/kernels/maxpooling_op_gpu.cu.cc | 16 +-- tensorflow/core/kernels/mkl_tfconv_op.h | 20 +-- tensorflow/core/kernels/ops_util.h | 13 ++ tensorflow/core/platform/posix/error.cc | 4 +- tensorflow/core/platform/posix/port.cc | 8 +- tensorflow/core/util/cuda_kernel_helper.h | 12 ++ tensorflow/core/util/mkl_util.h | 132 +++++++++--------- tensorflow/core/util/mkl_util_test.cc | 1 - .../test/java/org/tensorflow/ShapeTest.java | 6 +- .../python/estimator/inputs/numpy_io.py | 17 ++- .../python/estimator/inputs/numpy_io_test.py | 11 +- tensorflow/python/framework/test_util.py | 7 +- .../python/kernel_tests/array_ops_test.py | 11 +- .../python/kernel_tests/bincount_op_test.py | 7 +- .../python/kernel_tests/constant_op_test.py | 17 ++- tensorflow/python/kernel_tests/conv1d_test.py | 2 +- .../python/kernel_tests/conv_ops_3d_test.py | 120 +++++++--------- .../python/kernel_tests/pooling_ops_test.py | 3 +- .../python/kernel_tests/reader_ops_test.py | 15 +- .../segment_reduction_ops_test.py | 2 +- .../python/kernel_tests/unique_op_test.py | 1 + tensorflow/python/layers/normalization.py | 51 +++---- .../python/layers/normalization_test.py | 40 +++--- tensorflow/python/ops/array_ops.py | 19 +-- .../python/ops/distributions/multinomial.py | 25 ++-- tensorflow/python/ops/image_ops_impl.py | 5 +- tensorflow/python/ops/linalg_ops.py | 24 ++-- tensorflow/python/ops/metrics_impl.py | 7 +- tensorflow/python/ops/nn_impl.py | 1 + tensorflow/python/ops/nn_ops.py | 59 ++++---- tensorflow/stream_executor/dnn.cc | 1 + third_party/sycl/crosstool/trisycl.tpl | 60 ++++---- third_party/sycl/sycl_configure.bzl | 1 - 69 files changed, 733 insertions(+), 644 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt diff --git a/configure.py b/configure.py index 26da09bd94..1f205861f1 100644 --- a/configure.py +++ b/configure.py @@ -883,27 +883,28 @@ def set_computecpp_toolkit_path(environ_cp): write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH', computecpp_toolkit_path) + def set_trisycl_include_dir(environ_cp): - """Set TRISYCL_INCLUDE_DIR""" + """Set TRISYCL_INCLUDE_DIR.""" ask_trisycl_include_dir = ('Please specify the location of the triSYCL ' 'include directory. (Use --config=sycl_trisycl ' 'when building with Bazel) ' - '[Default is %s]: ' - ) % (_DEFAULT_TRISYCL_INCLUDE_DIR) + '[Default is %s]: ') % ( + _DEFAULT_TRISYCL_INCLUDE_DIR) while True: trisycl_include_dir = get_from_env_or_user_or_default( - environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir, - _DEFAULT_TRISYCL_INCLUDE_DIR) + environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir, + _DEFAULT_TRISYCL_INCLUDE_DIR) if os.path.exists(trisycl_include_dir): break - print('Invalid triSYCL include directory, %s cannot be found' - % (trisycl_include_dir)) + print('Invalid triSYCL include directory, %s cannot be found' % + (trisycl_include_dir)) # Set TRISYCL_INCLUDE_DIR environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir - write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', - trisycl_include_dir) + write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', trisycl_include_dir) + def set_mpi_home(environ_cp): """Set MPI_HOME.""" diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl index 1e22b760b8..6c385af3b3 100644 --- a/tensorflow/compiler/aot/tfcompile.bzl +++ b/tensorflow/compiler/aot/tfcompile.bzl @@ -152,7 +152,7 @@ def tf_library(name, graph, config, " --target_triple=" + target_llvm_triple() + " --out_header=$(@D)/" + header_file + " --out_object=$(@D)/" + object_file + - flags), + " " + flags), tools=[tfcompile_tool], visibility=visibility, testonly=testonly, @@ -189,7 +189,7 @@ def tf_library(name, graph, config, " --cpp_class=" + cpp_class + " --target_triple=" + target_llvm_triple() + " --out_session_module=$(@D)/" + session_module_pb + - flags), + " " + flags), tools=[tfcompile_tool], visibility=visibility, testonly=testonly, diff --git a/tensorflow/compiler/tests/fused_batchnorm_test.py b/tensorflow/compiler/tests/fused_batchnorm_test.py index a773b5a947..00a9c9a65b 100644 --- a/tensorflow/compiler/tests/fused_batchnorm_test.py +++ b/tensorflow/compiler/tests/fused_batchnorm_test.py @@ -76,7 +76,8 @@ class FusedBatchNormTest(XLATestCase): # To avoid constant folding t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x") scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale") - offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset") + offset = array_ops.placeholder( + np.float32, shape=scale_shape, name="offset") epsilon = 0.001 y_ref, mean_ref, var_ref = self._reference_training( x_val, scale_val, offset_val, epsilon, data_format) @@ -112,7 +113,8 @@ class FusedBatchNormTest(XLATestCase): # To avoid constant folding t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x") scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale") - offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset") + offset = array_ops.placeholder( + np.float32, shape=scale_shape, name="offset") epsilon = 0.001 y, mean, var = nn.fused_batch_norm( t_val, diff --git a/tensorflow/contrib/android/cmake/CMakeLists.txt b/tensorflow/contrib/android/cmake/CMakeLists.txt index 25ada5ba27..aba356d616 100644 --- a/tensorflow/contrib/android/cmake/CMakeLists.txt +++ b/tensorflow/contrib/android/cmake/CMakeLists.txt @@ -37,7 +37,7 @@ set_target_properties(lib_tf PROPERTIES IMPORTED_LOCATION set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIS_SLIM_BUILD \ -std=c++11 -fno-rtti -fno-exceptions \ -O2 -Wno-narrowing -fomit-frame-pointer \ - -mfpu=neon -mfloat-abi=softfp -fPIE \ + -mfpu=neon -mfloat-abi=softfp -fPIE -fPIC \ -ftemplate-depth=900 \ -DGOOGLE_PROTOBUF_NO_RTTI \ -DGOOGLE_PROTOBUF_NO_STATIC_INITIALIZER") diff --git a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py index 7f7697357c..73747db31c 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py @@ -41,6 +41,7 @@ def try_import(name): # pylint: disable=invalid-name tf_logging.warning("Could not import %s: %s" % (name, str(e))) return module + stats = try_import("scipy.stats") @@ -62,9 +63,9 @@ class CauchyTest(test.TestCase): self.assertAllEqual(expected, scale_shape.eval()) loc = array_ops.zeros(loc_shape) scale = array_ops.ones(scale_shape) - self.assertAllEqual( - expected, - array_ops.shape(cauchy_lib.Cauchy(loc, scale).sample()).eval()) + self.assertAllEqual(expected, + array_ops.shape( + cauchy_lib.Cauchy(loc, scale).sample()).eval()) def _testParamStaticShapes(self, sample_shape, expected): param_shapes = cauchy_lib.Cauchy.param_static_shapes(sample_shape) @@ -92,8 +93,7 @@ class CauchyTest(test.TestCase): cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) log_pdf = cauchy.log_prob(x) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), - log_pdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.shape) self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.eval().shape) self.assertAllEqual(cauchy.batch_shape, log_pdf.shape) @@ -115,16 +115,15 @@ class CauchyTest(test.TestCase): with self.test_session(): batch_size = 6 loc = constant_op.constant([[3.0, -3.0]] * batch_size) - scale = constant_op.constant([[np.sqrt(10.0), np.sqrt(15.0)]] * - batch_size) + scale = constant_op.constant( + [[np.sqrt(10.0), np.sqrt(15.0)]] * batch_size) x = np.array([[-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]], dtype=np.float32).T cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) log_pdf = cauchy.log_prob(x) log_pdf_values = log_pdf.eval() self.assertEqual(log_pdf.shape, (6, 2)) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), - log_pdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.shape) self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.eval().shape) self.assertAllEqual(cauchy.batch_shape, log_pdf.shape) @@ -248,8 +247,7 @@ class CauchyTest(test.TestCase): cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) entropy = cauchy.entropy() - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), - entropy.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), entropy.shape) self.assertAllEqual(cauchy.batch_shape_tensor().eval(), entropy.eval().shape) self.assertAllEqual(cauchy.batch_shape, entropy.shape) @@ -257,7 +255,7 @@ class CauchyTest(test.TestCase): if not stats: return - expected_entropy = stats.cauchy(loc, scale).entropy() + expected_entropy = stats.cauchy(loc, scale[0]).entropy().reshape((1, 3)) self.assertAllClose(expected_entropy, entropy.eval()) def testCauchyMode(self): @@ -368,8 +366,8 @@ class CauchyTest(test.TestCase): self.assertAllEqual(expected_shape, samples.shape) self.assertAllEqual(expected_shape, sample_values.shape) - expected_shape = (tensor_shape.TensorShape( - [n.eval()]).concatenate(cauchy.batch_shape)) + expected_shape = ( + tensor_shape.TensorShape([n.eval()]).concatenate(cauchy.batch_shape)) self.assertAllEqual(expected_shape, samples.shape) self.assertAllEqual(expected_shape, sample_values.shape) @@ -385,18 +383,18 @@ class CauchyTest(test.TestCase): samples = cauchy.sample(n) sample_values = samples.eval() self.assertEqual(samples.shape, (100000, batch_size, 2)) - self.assertAllClose(np.median(sample_values[:, 0, 0]), - loc_v[0], atol=1e-1) - self.assertAllClose(np.median(sample_values[:, 0, 1]), - loc_v[1], atol=1e-1) + self.assertAllClose( + np.median(sample_values[:, 0, 0]), loc_v[0], atol=1e-1) + self.assertAllClose( + np.median(sample_values[:, 0, 1]), loc_v[1], atol=1e-1) expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate( tensor_shape.TensorShape(cauchy.batch_shape_tensor().eval())) self.assertAllEqual(expected_shape, samples.shape) self.assertAllEqual(expected_shape, sample_values.shape) - expected_shape = (tensor_shape.TensorShape( - [n.eval()]).concatenate(cauchy.batch_shape)) + expected_shape = ( + tensor_shape.TensorShape([n.eval()]).concatenate(cauchy.batch_shape)) self.assertAllEqual(expected_shape, samples.shape) self.assertAllEqual(expected_shape, sample_values.shape) @@ -428,9 +426,12 @@ class CauchyTest(test.TestCase): self.assertEqual(cauchy.event_shape, ()) self.assertAllEqual(cauchy.event_shape_tensor().eval(), []) self.assertAllEqual( - sess.run(cauchy.batch_shape_tensor(), - feed_dict={loc: 5.0, - scale: [1.0, 2.0]}), [2]) + sess.run( + cauchy.batch_shape_tensor(), + feed_dict={ + loc: 5.0, + scale: [1.0, 2.0] + }), [2]) if __name__ == "__main__": diff --git a/tensorflow/contrib/distributions/python/ops/cauchy.py b/tensorflow/contrib/distributions/python/ops/cauchy.py index a17bb091f6..8d59c1abfb 100644 --- a/tensorflow/contrib/distributions/python/ops/cauchy.py +++ b/tensorflow/contrib/distributions/python/ops/cauchy.py @@ -30,7 +30,6 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops.distributions import distribution - __all__ = [ "Cauchy", ] @@ -97,7 +96,7 @@ class Cauchy(distribution.Distribution): validate_args=False, allow_nan_stats=True, name="Cauchy"): - """Construct Cauchy distributions with loc and and scale `loc` and `scale`. + """Construct Cauchy distributions. The parameters `loc` and `scale` must be shaped in a way that supports broadcasting (e.g. `loc + scale` is a valid operation). @@ -121,8 +120,8 @@ class Cauchy(distribution.Distribution): """ parameters = locals() with ops.name_scope(name, values=[loc, scale]): - with ops.control_dependencies([check_ops.assert_positive(scale)] if - validate_args else []): + with ops.control_dependencies([check_ops.assert_positive(scale)] + if validate_args else []): self._loc = array_ops.identity(loc, name="loc") self._scale = array_ops.identity(scale, name="scale") check_ops.assert_same_float_dtype([self._loc, self._scale]) @@ -138,8 +137,8 @@ class Cauchy(distribution.Distribution): @staticmethod def _param_shapes(sample_shape): return dict( - zip(("loc", "scale"), ([ops.convert_to_tensor( - sample_shape, dtype=dtypes.int32)] * 2))) + zip(("loc", "scale"), + ([ops.convert_to_tensor(sample_shape, dtype=dtypes.int32)] * 2))) @property def loc(self): @@ -153,13 +152,10 @@ class Cauchy(distribution.Distribution): def _batch_shape_tensor(self): return array_ops.broadcast_dynamic_shape( - array_ops.shape(self.loc), - array_ops.shape(self.scale)) + array_ops.shape(self.loc), array_ops.shape(self.scale)) def _batch_shape(self): - return array_ops.broadcast_static_shape( - self.loc.shape, - self.scale.shape) + return array_ops.broadcast_static_shape(self.loc.shape, self.scale.shape) def _event_shape_tensor(self): return constant_op.constant([], dtype=dtypes.int32) diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index 9378fe8799..f1debc8590 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -309,7 +309,6 @@ def _fused_batch_norm(inputs, new_shape = [-1, channels, 1, 1] inputs = array_ops.reshape(inputs, new_shape) inputs_shape = inputs.get_shape() - dtype = inputs.dtype.base_dtype if data_format == DATA_FORMAT_NHWC: params_shape = inputs_shape[-1:] else: diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index 5aa2253516..27bd3172d6 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -1779,7 +1779,8 @@ class BatchNormTest(test.TestCase): dtype = dtypes.float32 height, width = 3, 3 with self.test_session(): - images = np.random.uniform(size=(5, height, width, 3)).astype(dtype.as_numpy_dtype) + images = np.random.uniform(size=(5, height, width, 3)).astype( + dtype.as_numpy_dtype) output = _layers.batch_norm(images, fused=fused) expected_name = ('BatchNorm/FusedBatchNorm' if fused else 'BatchNorm/batchnorm') @@ -2665,18 +2666,18 @@ class BatchNormTest(test.TestCase): # Test case for 11673 with self.test_session() as sess: a_32 = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10)) - b_32 = _layers.batch_norm(a_32, center=False, data_format='NCHW', - zero_debias_moving_mean=True) + _layers.batch_norm( + a_32, center=False, data_format='NCHW', zero_debias_moving_mean=True) a_16 = array_ops.placeholder(dtypes.float16, shape=(10, 10, 10, 10)) - b_16 = _layers.batch_norm(a_16, center=False, data_format='NCHW', - zero_debias_moving_mean=True) + _layers.batch_norm( + a_16, center=False, data_format='NCHW', zero_debias_moving_mean=True) sess.run(variables_lib.global_variables_initializer()) def testVariablesAreFloat32(self): height, width = 3, 3 with self.test_session(): - images = random_ops.random_uniform((5, height, width, 3), - seed=1, dtype=dtypes.float16) + images = random_ops.random_uniform( + (5, height, width, 3), seed=1, dtype=dtypes.float16) _layers.batch_norm(images, scale=True) beta = variables.get_variables_by_name('beta')[0] gamma = variables.get_variables_by_name('gamma')[0] @@ -2691,17 +2692,13 @@ class BatchNormTest(test.TestCase): channels = shape[1] images = np.arange(np.product(shape), dtype=dtype).reshape(shape) beta = init_ops.constant_initializer( - np.arange( - 2, channels + 2, dtype=np.float32)) + np.arange(2, channels + 2, dtype=np.float32)) gamma = init_ops.constant_initializer( - np.arange( - 10, channels + 10, dtype=np.float32) * 2.0) + np.arange(10, channels + 10, dtype=np.float32) * 2.0) mean = init_ops.constant_initializer( - np.arange( - 3, channels + 3, dtype=np.float32) * 5.0) + np.arange(3, channels + 3, dtype=np.float32) * 5.0) variance = init_ops.constant_initializer( - np.arange( - 1, channels + 1, dtype=np.float32) * 4.0) + np.arange(1, channels + 1, dtype=np.float32) * 4.0) output = _layers.batch_norm( images, fused=True, @@ -2726,7 +2723,6 @@ class BatchNormTest(test.TestCase): res_16 = self._runFusedBatchNorm(shape, np.float16) self.assertAllClose(res_32, res_16, rtol=1e-3) - def testAdjustmentCreated(self): # Tests that the adjustment is appropriately passed to and used by the core # BN layer. diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py index db18ebf05d..86fad4c553 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py @@ -28,7 +28,6 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging @@ -369,10 +368,11 @@ class DataFeeder(object): if x_is_dict: num_samples = list(self._x.values())[0].shape[0] elif tensor_util.is_tensor(self._x): - num_samples = self._x.shape[0].value # shape will be a Dimension, extract an int + num_samples = self._x.shape[ + 0].value # shape will be a Dimension, extract an int else: num_samples = self._x.shape[0] - + if self._shuffle: self.indices = self.random_state.permutation(num_samples) else: diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py index 86d8484391..7526f3ae0d 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py @@ -251,8 +251,9 @@ class SdcaModel(object): result_dense = 0.0 for i in range(len(dense_variables)): - result_dense += math_ops.matmul( - dense_features[i], array_ops.expand_dims(dense_variables[i], -1)) + result_dense += math_ops.matmul(dense_features[i], + array_ops.expand_dims( + dense_variables[i], -1)) # Reshaping to allow shape inference at graph construction time. return array_ops.reshape(result_dense, [-1]) + result_sparse diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index b122818221..5bca82ded0 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -40,6 +40,7 @@ from six import StringIO # TODO(aselle): Disable GPU for now os.environ["CUDA_VISIBLE_DEVICES"] = "-1" +# pylint: disable=g-import-not-at-top import tensorflow as tf from google.protobuf import text_format # TODO(aselle): switch to TensorFlow's resource_loader @@ -383,7 +384,7 @@ def make_zip_of_tests(zip_path, report["toco_log"] = "" tf.reset_default_graph() - with tf.device('/cpu:0'): + with tf.device("/cpu:0"): try: inputs, outputs = make_graph(param_dict_real) except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError, diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py index 4c60c99342..04643a6058 100644 --- a/tensorflow/contrib/opt/__init__.py +++ b/tensorflow/contrib/opt/__init__.py @@ -34,12 +34,18 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ - 'PowerSignOptimizer', 'AddSignOptimizer' + 'PowerSignOptimizer', + 'AddSignOptimizer' 'DelayCompensatedGradientDescentOptimizer', - 'DropStaleGradientOptimizer', 'ExternalOptimizerInterface', - 'LazyAdamOptimizer', 'NadamOptimizer', 'MovingAverageOptimizer', - 'ScipyOptimizerInterface', 'VariableClippingOptimizer', - 'MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm', + 'DropStaleGradientOptimizer', + 'ExternalOptimizerInterface', + 'LazyAdamOptimizer', + 'NadamOptimizer', + 'MovingAverageOptimizer', + 'ScipyOptimizerInterface', + 'VariableClippingOptimizer', + 'MultitaskOptimizerWrapper', + 'clip_gradients_by_global_norm', ] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py index c26037935d..cb6c77a86f 100644 --- a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py +++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py @@ -12,9 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - -"""An optimizer wrapper that ensures correct behaviour -of stateful optimizers with multitask loss.""" +"""An optimizer wrapper for stateful optimizers with multitask loss.""" from __future__ import absolute_import from __future__ import division @@ -30,26 +28,27 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.training import optimizer -__all__ = ["MultitaskOptimizerWrapper", - "clip_gradients_by_global_norm"] +__all__ = ['MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm'] + def _is_all_zeros(grad): all_zeros = math_ops.equal(math_ops.count_nonzero(grad), 0) return all_zeros + def _get_wrapper(fn, opt): + def wrapper(self, grad, *args, **kwargs): # pylint: disable=unused-argument all_zeros = _is_all_zeros(grad) - return control_flow_ops.cond( - all_zeros, - control_flow_ops.no_op, - lambda: fn(grad, *args, **kwargs)) + return control_flow_ops.cond(all_zeros, control_flow_ops.no_op, + lambda: fn(grad, *args, **kwargs)) + wrapper = types.MethodType(wrapper, opt) return wrapper + class MultitaskOptimizerWrapper(object): - """Optimizer wrapper that ensures that - all-zero gradients don't affect the optimizer state. + """Optimizer wrapper making all-zero gradients harmless. This might be useful when a multi-task loss is used, and some components of the loss might be @@ -88,20 +87,20 @@ class MultitaskOptimizerWrapper(object): gradvars_clipped, global_step=batch) ``` """ + def __init__(self, opt): - """ + """Constructor. + Args: - opt: an instance of a class that implements tf.train.Optimizer. + opt: an instance of a class that implements tf.train.Optimizer. """ if not isinstance(opt, optimizer.Optimizer): raise TypeError( - "Supplied optimizer must be an instance of tf.train.Optimizer") + 'Supplied optimizer must be an instance of tf.train.Optimizer') self._opt = opt - overriden_methods = ('_apply_dense', - '_resource_apply_dense', - '_apply_sparse', - '_resource_apply_sparse') - for name in overriden_methods: + overridden_methods = ('_apply_dense', '_resource_apply_dense', + '_apply_sparse', '_resource_apply_sparse') + for name in overridden_methods: fn = getattr(self._opt, name) wrapper = _get_wrapper(fn, self._opt) setattr(self._opt, name, wrapper) @@ -112,27 +111,30 @@ class MultitaskOptimizerWrapper(object): def clip_gradients_by_global_norm(gradients_variables, clip_norm=20.): """Clips gradients of a multitask loss by their global norm. + Ignores all-zero tensors when computing the global norm. Args: - gradients_variables: a list of pairs (gradient, variable). - clip_norm: a float Tensor, the global norm to clip on. Default is 20.0. + gradients_variables: a list of pairs (gradient, variable). + clip_norm: a float Tensor, the global norm to clip on. Default is 20.0. Returns: - list: A list of pairs of the same type as gradients_variables,. - fixed_global_norm: A 0-D (scalar) Tensor representing the global norm. + list: A list of pairs of the same type as gradients_variables,. + fixed_global_norm: A 0-D (scalar) Tensor representing the global norm. """ gradients, variables = six.moves.zip(*gradients_variables) + def _replace_nonexisting_grad(grad): if grad is None: return grad all_zeros = _is_all_zeros(grad) - return control_flow_ops.cond(all_zeros, - lambda: array_ops.zeros( - [], dtype=dtypes.as_dtype(grad.dtype)), - lambda: grad) + return control_flow_ops.cond( + all_zeros, + lambda: array_ops.zeros([], dtype=dtypes.as_dtype(grad.dtype)), + lambda: grad) + nonzero_gradients = [_replace_nonexisting_grad(g) for g in gradients] fixed_global_norm = clip_ops.global_norm(nonzero_gradients) - gradients, _ = clip_ops.clip_by_global_norm(gradients, clip_norm, - use_norm=fixed_global_norm) + gradients, _ = clip_ops.clip_by_global_norm( + gradients, clip_norm, use_norm=fixed_global_norm) return list(six.moves.zip(gradients, variables)), fixed_global_norm diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py index b06213f715..618d8eb18d 100644 --- a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py +++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py @@ -18,6 +18,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np +import six + from tensorflow.contrib.opt.python.training import multitask_optimizer_wrapper from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -25,13 +28,11 @@ from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.training import momentum -import numpy as np -import six class MultitaskOptimizerWrapperTest(test.TestCase): + """Tests for the multitask optimizer wrapper. """ - Tests for the multitask optimizer wrapper. - """ + def testWrapper(self): with self.test_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32) @@ -39,12 +40,10 @@ class MultitaskOptimizerWrapperTest(test.TestCase): grads0 = constant_op.constant([0.1, 0.1], dtype=dtypes.float32) grads1 = constant_op.constant([0.01, 0.01], dtype=dtypes.float32) grads_allzero = constant_op.constant([0.0, 0.0], dtype=dtypes.float32) - mom_opt_impl = momentum.MomentumOptimizer( - learning_rate=2.0, momentum=0.9) + mom_opt_impl = momentum.MomentumOptimizer(learning_rate=2.0, momentum=0.9) mom_opt = multitask_optimizer_wrapper.MultitaskOptimizerWrapper( mom_opt_impl) - mom_update = mom_opt.apply_gradients( - zip([grads0, grads1], [var0, var1])) + mom_update = mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) mom_update_partial = mom_opt.apply_gradients( zip([grads_allzero, grads1], [var0, var1])) mom_update_no_action = mom_opt.apply_gradients( @@ -63,14 +62,13 @@ class MultitaskOptimizerWrapperTest(test.TestCase): # Step 1: normal momentum update. self.evaluate(mom_update) # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), - self.evaluate(slot0)) - self.assertAllCloseAccordingToType(np.array([0.01, 0.01]), - self.evaluate(slot1)) + self.assertAllCloseAccordingToType( + np.array([0.1, 0.1]), self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([0.01, 0.01]), self.evaluate(slot1)) # Check that the parameters have been updated. self.assertAllCloseAccordingToType( - np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), - self.evaluate(var0)) + np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), self.evaluate(var1)) @@ -78,8 +76,8 @@ class MultitaskOptimizerWrapperTest(test.TestCase): # Step 2: momentum update that changes only slot1 but not slot0. self.evaluate(mom_update_partial) # Check that only the relevant momentum accumulator has been updated. - self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), - self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([0.1, 0.1]), self.evaluate(slot0)) self.assertAllCloseAccordingToType( np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), self.evaluate(slot1)) @@ -87,8 +85,8 @@ class MultitaskOptimizerWrapperTest(test.TestCase): # Step 3: momentum update that does not change anything. self.evaluate(mom_update_no_action) # Check that the momentum accumulators have *NOT* been updated. - self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), - self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([0.1, 0.1]), self.evaluate(slot0)) self.assertAllCloseAccordingToType( np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), self.evaluate(slot1)) @@ -105,8 +103,9 @@ class MultitaskOptimizerWrapperTest(test.TestCase): grads3 = None varlist = [var0, var1, var2, var3] gradients = [grads0, grads1, grads2, grads3] - clipped_gradvars, global_norm = multitask_optimizer_wrapper.clip_gradients_by_global_norm( - six.moves.zip(gradients, varlist), clip_norm=1.0) + clipped_gradvars, global_norm = ( + multitask_optimizer_wrapper.clip_gradients_by_global_norm( + six.moves.zip(gradients, varlist), clip_norm=1.0)) clipped_grads = list(six.moves.zip(*clipped_gradvars))[0] reference_global_norm = np.sqrt(np.sum(np.square([10.0, 15.0, 0.0, 5.0]))) self.assertAllCloseAccordingToType( @@ -115,5 +114,6 @@ class MultitaskOptimizerWrapperTest(test.TestCase): self.evaluate(clipped_grads[2]), np.array([0., 0.])) self.assertEqual(clipped_grads[3], None) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py index 16b6d145e3..f130a2187c 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py @@ -24,6 +24,7 @@ import numpy as np from tensorflow.contrib import rnn as contrib_rnn from tensorflow.contrib.rnn.python.ops import core_rnn_cell +from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell from tensorflow.core.protobuf import config_pb2 from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -38,9 +39,6 @@ from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib from tensorflow.python.platform import test -from tensorflow.python.framework import test_util -from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell - # pylint: enable=protected-access @@ -374,19 +372,20 @@ class RNNCellTest(test.TestCase): h = array_ops.zeros([batch_size, num_proj]) state = rnn_cell_impl.LSTMStateTuple(c, h) cell = contrib_rnn_cell.LayerNormLSTMCell( - num_units=num_units, - num_proj=num_proj, - forget_bias=1.0, - layer_norm=True, - norm_gain=1.0, - norm_shift=0.0) + num_units=num_units, + num_proj=num_proj, + forget_bias=1.0, + layer_norm=True, + norm_gain=1.0, + norm_shift=0.0) g, out_m = cell(x, state) sess.run([variables_lib.global_variables_initializer()]) - res = sess.run([g, out_m], { - x.name: np.ones((batch_size, input_size)), - c.name: 0.1 * np.ones((batch_size, num_units)), - h.name: 0.1 * np.ones((batch_size, num_proj)) - }) + res = sess.run( + [g, out_m], { + x.name: np.ones((batch_size, input_size)), + c.name: 0.1 * np.ones((batch_size, num_units)), + h.name: 0.1 * np.ones((batch_size, num_proj)) + }) self.assertEqual(len(res), 2) # The numbers in results were not calculated, this is mostly just a # smoke test. @@ -396,9 +395,9 @@ class RNNCellTest(test.TestCase): # Different inputs so different outputs and states for i in range(1, batch_size): self.assertTrue( - float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6) + float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6) self.assertTrue( - float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6) + float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6) def testOutputProjectionWrapper(self): with self.test_session() as sess: diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py index b4a5f2d7eb..46823fa364 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py @@ -996,26 +996,19 @@ class RNNCellTest(test.TestCase): output, state = cell(x, hidden) sess.run([variables.global_variables_initializer()]) - res = sess.run([output, state], { - hidden[0].name: - np.array([[[[[1.],[1.]], - [[1.],[1.]]], - [[[1.],[1.]], - [[1.],[1.]]]], - [[[[2.],[2.]], - [[2.],[2.]]], - [[[2.],[2.]], - [[2.],[2.]]]]]), - x.name: - np.array([[[[[1.],[1.]], - [[1.],[1.]]], - [[[1.],[1.]], - [[1.],[1.]]]], - [[[[2.],[2.]], - [[2.],[2.]]], - [[[2.],[2.]], - [[2.],[2.]]]]]) - }) + res = sess.run( + [output, state], { + hidden[0].name: + np.array([[[[[1.], [1.]], [[1.], [1.]]], [[[1.], [1.]], [[ + 1. + ], [1.]]]], [[[[2.], [2.]], [[2.], [2.]]], + [[[2.], [2.]], [[2.], [2.]]]]]), + x.name: + np.array([[[[[1.], [1.]], [[1.], [1.]]], [[[1.], [1.]], [[ + 1. + ], [1.]]]], [[[[2.], [2.]], [[2.], [2.]]], [[[2.], [2.]], + [[2.], [2.]]]]]) + }) # This is a smoke test, making sure expected values are unchanged. self.assertEqual(len(res), 2) self.assertAllClose(res[0], res[1].h) @@ -1276,10 +1269,8 @@ class LayerNormBasicLSTMCellTest(test.TestCase): self.assertAllClose(res[2].c, expected_c1, 1e-5) self.assertAllClose(res[2].h, expected_h1, 1e-5) - def testBasicLSTMCellWithStateTupleLayerNorm(self): - """The results of LSTMCell and LayerNormBasicLSTMCell - should be same. """ + """The results of LSTMCell and LayerNormBasicLSTMCell should be the same.""" with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): @@ -1290,21 +1281,21 @@ class LayerNormBasicLSTMCellTest(test.TestCase): c1 = array_ops.zeros([1, 2]) h1 = array_ops.zeros([1, 2]) state1 = rnn_cell_impl.LSTMStateTuple(c1, h1) - cell = rnn_cell_impl.MultiRNNCell( - [contrib_rnn_cell.LayerNormLSTMCell( - 2, - layer_norm=True, - norm_gain=1.0, - norm_shift=0.0) for _ in range(2)]) + cell = rnn_cell_impl.MultiRNNCell([ + contrib_rnn_cell.LayerNormLSTMCell( + 2, layer_norm=True, norm_gain=1.0, norm_shift=0.0) + for _ in range(2) + ]) h, (s0, s1) = cell(x, (state0, state1)) sess.run([variables.global_variables_initializer()]) - res = sess.run([h, s0, s1], { - x.name: np.array([[1., 1.]]), - c0.name: 0.1 * np.asarray([[0, 1]]), - h0.name: 0.1 * np.asarray([[2, 3]]), - c1.name: 0.1 * np.asarray([[4, 5]]), - h1.name: 0.1 * np.asarray([[6, 7]]), - }) + res = sess.run( + [h, s0, s1], { + x.name: np.array([[1., 1.]]), + c0.name: 0.1 * np.asarray([[0, 1]]), + h0.name: 0.1 * np.asarray([[2, 3]]), + c1.name: 0.1 * np.asarray([[4, 5]]), + h1.name: 0.1 * np.asarray([[6, 7]]), + }) expected_h = np.array([[-0.38079708, 0.38079708]]) expected_h0 = np.array([[-0.38079708, 0.38079708]]) diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 5e85c125df..0698d40438 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -36,7 +36,6 @@ from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import variable_scope as vs -from tensorflow.python.ops import partitioned_variables from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import nest @@ -115,7 +114,7 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): The class uses optional peep-hole connections, and an optional projection layer. - + Layer normalization implementation is based on: https://arxiv.org/abs/1607.06450. @@ -124,15 +123,24 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton and is applied before the internal nonlinearities. - + """ - def __init__(self, num_units, use_peepholes=False, - initializer=None, num_proj=None, proj_clip=None, - num_unit_shards=1, num_proj_shards=1, - forget_bias=1.0, state_is_tuple=True, - activation=math_ops.tanh, reuse=None, - layer_norm=False, norm_gain=1.0, norm_shift=0.0): + def __init__(self, + num_units, + use_peepholes=False, + initializer=None, + num_proj=None, + proj_clip=None, + num_unit_shards=1, + num_proj_shards=1, + forget_bias=1.0, + state_is_tuple=True, + activation=math_ops.tanh, + reuse=None, + layer_norm=False, + norm_gain=1.0, + norm_shift=0.0): """Initialize the parameters for an LSTM cell. Args: @@ -164,8 +172,6 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): `layer_norm` has been set to `False`, this argument will be ignored. norm_shift: float, The layer normalization shift initial value. If `layer_norm` has been set to `False`, this argument will be ignored. - - """ super(CoupledInputForgetGateLSTMCell, self).__init__(_reuse=reuse) if not state_is_tuple: @@ -2049,8 +2055,8 @@ class ConvLSTMCell(rnn_cell_impl.RNNCell): if self._skip_connection: self._total_output_channels += self._input_shape[-1] - state_size = tensor_shape.TensorShape(self._input_shape[:-1] - + [self._output_channels]) + state_size = tensor_shape.TensorShape( + self._input_shape[:-1] + [self._output_channels]) self._state_size = rnn_cell_impl.LSTMStateTuple(state_size, state_size) self._output_size = tensor_shape.TensorShape(self._input_shape[:-1] + [self._total_output_channels]) @@ -2110,11 +2116,8 @@ class Conv3DLSTMCell(ConvLSTMCell): """Construct Conv3DLSTM. See `ConvLSTMCell` for more details.""" super(Conv3DLSTMCell, self).__init__(conv_ndims=3, **kwargs) -def _conv(args, - filter_size, - num_features, - bias, - bias_start=0.0): + +def _conv(args, filter_size, num_features, bias, bias_start=0.0): """convolution: Args: args: a Tensor or a list of Tensors of dimension 3D, 4D or 5D, @@ -2391,12 +2394,19 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): """ - def __init__(self, num_units, - use_peepholes=False, cell_clip=None, - initializer=None, num_proj=None, proj_clip=None, + def __init__(self, + num_units, + use_peepholes=False, + cell_clip=None, + initializer=None, + num_proj=None, + proj_clip=None, forget_bias=1.0, - activation=None, layer_norm=False, - norm_gain=1.0, norm_shift=0.0, reuse=None): + activation=None, + layer_norm=False, + norm_gain=1.0, + norm_shift=0.0, + reuse=None): """Initialize the parameters for an LSTM cell. Args: @@ -2457,7 +2467,6 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): def output_size(self): return self._output_size - def _linear(self, args, output_size, @@ -2507,9 +2516,9 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): scope = vs.get_variable_scope() with vs.variable_scope(scope) as outer_scope: weights = vs.get_variable( - "kernel", [total_arg_size, output_size], - dtype=dtype, - initializer=kernel_initializer) + "kernel", [total_arg_size, output_size], + dtype=dtype, + initializer=kernel_initializer) if len(args) == 1: res = math_ops.matmul(args[0], weights) else: @@ -2521,9 +2530,7 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): if bias_initializer is None: bias_initializer = init_ops.constant_initializer(0.0, dtype=dtype) biases = vs.get_variable( - "bias", [output_size], - dtype=dtype, - initializer=bias_initializer) + "bias", [output_size], dtype=dtype, initializer=bias_initializer) if not layer_norm: res = nn_ops.bias_add(res, biases) @@ -2554,7 +2561,6 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): ValueError: If input size cannot be inferred from inputs via static shape inference. """ - num_proj = self._num_units if self._num_proj is None else self._num_proj sigmoid = math_ops.sigmoid (c_prev, m_prev) = state @@ -2567,10 +2573,14 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): with vs.variable_scope(scope, initializer=self._initializer) as unit_scope: # i = input_gate, j = new_input, f = forget_gate, o = output_gate - lstm_matrix = self._linear([inputs, m_prev], 4 * self._num_units, bias=True, - bias_initializer=None, layer_norm=self._layer_norm) + lstm_matrix = self._linear( + [inputs, m_prev], + 4 * self._num_units, + bias=True, + bias_initializer=None, + layer_norm=self._layer_norm) i, j, f, o = array_ops.split( - value=lstm_matrix, num_or_size_splits=4, axis=1) + value=lstm_matrix, num_or_size_splits=4, axis=1) if self._layer_norm: i = _norm(self._norm_gain, self._norm_shift, i, "input") @@ -2580,20 +2590,22 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): # Diagonal connections if self._use_peepholes: - with vs.variable_scope(unit_scope) as projection_scope: + with vs.variable_scope(unit_scope): w_f_diag = vs.get_variable( - "w_f_diag", shape=[self._num_units], dtype=dtype) + "w_f_diag", shape=[self._num_units], dtype=dtype) w_i_diag = vs.get_variable( - "w_i_diag", shape=[self._num_units], dtype=dtype) + "w_i_diag", shape=[self._num_units], dtype=dtype) w_o_diag = vs.get_variable( - "w_o_diag", shape=[self._num_units], dtype=dtype) + "w_o_diag", shape=[self._num_units], dtype=dtype) if self._use_peepholes: - c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev + - sigmoid(i + w_i_diag * c_prev) * self._activation(j)) + c = ( + sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev + + sigmoid(i + w_i_diag * c_prev) * self._activation(j)) else: - c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * - self._activation(j)) + c = ( + sigmoid(f + self._forget_bias) * c_prev + + sigmoid(i) * self._activation(j)) if self._layer_norm: c = _norm(self._norm_gain, self._norm_shift, c, "state") @@ -2608,7 +2620,7 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): m = sigmoid(o) * self._activation(c) if self._num_proj is not None: - with vs.variable_scope("projection") as proj_scope: + with vs.variable_scope("projection"): m = self._linear(m, self._num_proj, bias=False) if self._proj_clip is not None: diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index c3b180d9f4..e87ef41388 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -192,7 +192,8 @@ class _BaseAttentionMechanism(AttentionMechanism): raise TypeError("probability_fn must be callable, saw type: %s" % type(probability_fn).__name__) if score_mask_value is None: - score_mask_value = dtypes.as_dtype(self._memory_layer.dtype).as_numpy_dtype(-np.inf) + score_mask_value = dtypes.as_dtype( + self._memory_layer.dtype).as_numpy_dtype(-np.inf) self._probability_fn = lambda score, prev: ( # pylint:disable=g-long-lambda probability_fn( _maybe_mask_score(score, memory_sequence_length, score_mask_value), @@ -1145,7 +1146,9 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): % (len(attention_layer_sizes), len(attention_mechanisms))) self._attention_layers = tuple( layers_core.Dense( - attention_layer_size, name="attention_layer", use_bias=False, + attention_layer_size, + name="attention_layer", + use_bias=False, dtype=attention_mechanisms[i].dtype) for i, attention_layer_size in enumerate(attention_layer_sizes)) self._attention_layer_size = sum(attention_layer_sizes) diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc index 331943a3ef..ac8d994502 100644 --- a/tensorflow/contrib/verbs/rdma.cc +++ b/tensorflow/contrib/verbs/rdma.cc @@ -16,8 +16,8 @@ limitations under the License. #ifdef TENSORFLOW_USE_VERBS #include "tensorflow/contrib/verbs/rdma.h" -#include #include +#include #include "tensorflow/contrib/verbs/verbs_util.h" #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/dma_helper.h" @@ -137,7 +137,7 @@ ibv_device* set_device() { if (!env_p_rdma_device.empty()) { for (device_index = 0; device_index < dev_num; device_index++) { if (!env_p_rdma_device.compare( - ibv_get_device_name(dev_list[device_index]))) { + ibv_get_device_name(dev_list[device_index]))) { CHECK(get_dev_active_port_count(dev_list[device_index]) != 0) << "Device " << ibv_get_device_name(dev_list[device_index]) << " has no active ports"; @@ -147,7 +147,7 @@ ibv_device* set_device() { // check validity of input device CHECK(false) << "The device " << env_p_rdma_device << " wasn't found"; } else { - // set default device + // set default device str_port_num = get_env_var("RDMA_DEVICE_PORT"); CHECK(str_port_num.empty()) << "RDMA_DEVICE should be provided if RDMA_DEVICE_PORT is set by user"; @@ -177,7 +177,7 @@ ibv_device* set_device() { // Returns: // port to use uint8_t set_port(ibv_context* context) { - uint8_t port_num = 0; //0 is illegal port number + uint8_t port_num = 0; // 0 is illegal port number string str_port_num; ibv_device_attr device_att; ibv_port_attr port_attr; @@ -199,9 +199,7 @@ uint8_t set_port(ibv_context* context) { // check if port id active CHECK(port_attr.state == IBV_PORT_ACTIVE) << "Selected RDMA_DEVICE_PORT is not active"; - } - // set default port - else { + } else { // set default port for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) { rc = ibv_query_port(context, port_index, &port_attr); CHECK(!rc) << "Failed to query the port" << port_index; @@ -269,7 +267,7 @@ bool is_gid_type_roce_v2(ibv_context* context, uint8_t port_num, // Function to set GID index. // If the port link is IB, no GID index should be selected. // If Ethernet but RDMA_GID_INDEX not set gid index that supports -// RoCE V2 will be chosen(fails if more then one IP is configured) +// RoCE V2 will be chosen(fails if more than one IP is configured) // Args: // context - device context // port_num - port number @@ -302,7 +300,7 @@ uint8_t set_gid(uint8_t port_num, ibv_context* context) { } } switch (port_attr.link_layer) { - case(IBV_LINK_LAYER_ETHERNET) : + case (IBV_LINK_LAYER_ETHERNET): gid_str = get_env_var("RDMA_GID_INDEX"); if (!gid_str.empty()) { gid_index = stoi(gid_str); @@ -313,7 +311,7 @@ uint8_t set_gid(uint8_t port_num, ibv_context* context) { << "More than one IP is available, please specify GID_INDEX"; } break; - case(IBV_LINK_LAYER_INFINIBAND) : // no need in GID index + case (IBV_LINK_LAYER_INFINIBAND): // no need in GID index break; default: LOG(INFO) << "Unknown port link layer. Currently supporting Ethernet and " @@ -374,7 +372,8 @@ enum ibv_mtu set_mtu(uint8_t port_num, ibv_context* context) { break; default: CHECK(0) << "Error: MTU input value must be one of the following: 256, " - "512, 1024, 2048, 4096. MTU " << mtu << " is invalid\n"; + "512, 1024, 2048, 4096. MTU " + << mtu << " is invalid\n"; break; } CHECK(mtu < port_attr.active_mtu) @@ -453,9 +452,9 @@ void RdmaAdapter::Process_CQ() { CHECK_GE(ne, 0); for (int i = 0; i < ne; ++i) { CHECK(wc_[i].status == IBV_WC_SUCCESS) - << "Failed status \n" << ibv_wc_status_str(wc_[i].status) << " " - << wc_[i].status << " " << static_cast(wc_[i].wr_id) << " " - << wc_[i].vendor_err; + << "Failed status \n" + << ibv_wc_status_str(wc_[i].status) << " " << wc_[i].status << " " + << static_cast(wc_[i].wr_id) << " " << wc_[i].vendor_err; if (wc_[i].opcode == IBV_WC_RECV_RDMA_WITH_IMM) { RdmaChannel* rc = reinterpret_cast(wc_[i].wr_id); // put back a recv wr. @@ -611,7 +610,7 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, // create message and ack buffers, then initialize the tables. { const string buffer_names[] = {"tx_message_buffer", "rx_message_buffer", - "tx_ack_buffer", "rx_ack_buffer"}; + "tx_ack_buffer", "rx_ack_buffer"}; tx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[0]); rx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[1]); tx_ack_buffer_ = new RdmaAckBuffer(this, buffer_names[2]); @@ -672,7 +671,7 @@ void RdmaChannel::SetRemoteAddress(const RdmaAddress& ra, bool override) { void RdmaChannel::Recv() { struct ibv_recv_wr wr; memset(&wr, 0, sizeof(wr)); - wr.wr_id = (uint64_t) this; + wr.wr_id = (uint64_t)this; struct ibv_recv_wr* bad_wr; CHECK(!ibv_post_recv(qp_, &wr, &bad_wr)) << "Failed to post recv"; } @@ -826,11 +825,11 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) { attr.ah_attr.grh.traffic_class = adapter_->params_.traffic_class; int r; - CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_AV | - IBV_QP_PATH_MTU | - IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | - IBV_QP_MAX_DEST_RD_ATOMIC | - IBV_QP_MIN_RNR_TIMER))) + CHECK(!(r = ibv_modify_qp(qp_, &attr, + IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | + IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | + IBV_QP_MAX_DEST_RD_ATOMIC | + IBV_QP_MIN_RNR_TIMER))) << "QP to Ready to Receive " << r; memset(&attr, 0, sizeof(ibv_qp_attr)); @@ -841,10 +840,10 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) { attr.rnr_retry = 7; /* infinite */ attr.max_rd_atomic = 1; - CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_TIMEOUT | - IBV_QP_RETRY_CNT | - IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | - IBV_QP_MAX_QP_RD_ATOMIC))) + CHECK(!(r = ibv_modify_qp(qp_, &attr, + IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | + IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | + IBV_QP_MAX_QP_RD_ATOMIC))) << "QP to Ready to Send " << r; connected_ = true; @@ -931,7 +930,7 @@ void RdmaBuffer::Write(uint32_t imm_data, size_t buffer_size) { struct ibv_send_wr wr; memset(&wr, 0, sizeof(wr)); - wr.wr_id = (uint64_t) this; + wr.wr_id = (uint64_t)this; wr.sg_list = &list; wr.num_sge = 1; wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM; @@ -1026,9 +1025,9 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback( TensorProto proto; if (src_dev->tensorflow_gpu_device_info() && (!send_args.alloc_attrs.on_host())) { - CHECK(send_args.device_context) << "send dev name: " << src_dev->name() - << " gpu_info: " - << src_dev->tensorflow_gpu_device_info(); + CHECK(send_args.device_context) + << "send dev name: " << src_dev->name() + << " gpu_info: " << src_dev->tensorflow_gpu_device_info(); if (can_memcpy) { AllocatorAttributes host_alloc_attrs; @@ -1054,8 +1053,8 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback( // aync instead GPUUtil::SetProtoFromGPU( in, src_dev, send_args.device_context, &proto, is_dead, - [this, proto, buffer_size, key, in, step_id, key_with_step_id, - is_dead, send_args, recv_args](const Status& s) mutable { + [this, proto, buffer_size, key, in, step_id, key_with_step_id, + is_dead, send_args, recv_args](const Status& s) mutable { CHECK(s.ok()) << "copy proto from gpu sync"; auto tensor_bytes = proto.ByteSize(); buffer_size += tensor_bytes; diff --git a/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt new file mode 100644 index 0000000000..cd7ec6e551 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt @@ -0,0 +1,47 @@ +op { + graph_op_name: "UniqueV2" + in_arg { + name: "x" + description: < [1, 2, 4, 7, 8] +idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] +``` +END +} diff --git a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt index 0a3355cdbc..77a96d1e03 100644 --- a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt @@ -26,6 +26,8 @@ need not be sorted and need not cover all values in the full range of valid values. If the sum is empty for a given segment ID `i`, `output[i] = 0`. +If the given segment ID `i` is negative, the value is dropped and will not be +added to the sum of the segment. `num_segments` should equal the number of distinct segment IDs. diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index d0dba6e1f0..223dd12f8f 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -455,7 +455,7 @@ class Graph { // the corresponding NodeDef to reflect the change. // REQUIRES: The control edge must exist. void RemoveControlEdge(const Edge* e); - + // Updates the input to a node. The existing edge to `dst` is removed and an // edge from `new_src` to `dst` is created. The NodeDef associated with `dst` // is also updated. diff --git a/tensorflow/core/graph/graph_test.cc b/tensorflow/core/graph/graph_test.cc index 2aa1b31e15..e2ce0ba046 100644 --- a/tensorflow/core/graph/graph_test.cc +++ b/tensorflow/core/graph/graph_test.cc @@ -118,11 +118,9 @@ class GraphTest : public ::testing::Test { LOG(FATAL) << name; } - bool ControlEdgeExistsInGraphOrNodeDef(const Node* src, - const Node* dst) { - for (const Edge *e : dst->in_edges()) { - if (e->IsControlEdge() && - e->src() == src && + bool ControlEdgeExistsInGraphOrNodeDef(const Node* src, const Node* dst) { + for (const Edge* e : dst->in_edges()) { + if (e->IsControlEdge() && e->src() == src && e->src_output() == Graph::kControlSlot && e->dst_input() == Graph::kControlSlot) { return true; diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index f1cb9a1860..b4a5a3c796 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -1720,6 +1720,7 @@ tf_cuda_cc_tests( ":data_flow", ":ops_testutil", ":ops_util", + "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", diff --git a/tensorflow/core/kernels/bincount_op.cc b/tensorflow/core/kernels/bincount_op.cc index 766d63e3be..890fa3121b 100644 --- a/tensorflow/core/kernels/bincount_op.cc +++ b/tensorflow/core/kernels/bincount_op.cc @@ -97,8 +97,9 @@ class BincountOp : public OpKernel { const Tensor& weights_t = ctx->input(2); int32 size = size_tensor.scalar()(); - OP_REQUIRES(ctx, size >= 0, errors::InvalidArgument( - "size (", size, ") must be non-negative")); + OP_REQUIRES( + ctx, size >= 0, + errors::InvalidArgument("size (", size, ") must be non-negative")); const auto arr = arr_t.flat(); const auto weights = weights_t.flat(); diff --git a/tensorflow/core/kernels/bincount_op.h b/tensorflow/core/kernels/bincount_op.h index 0f8dd2b82a..cd3d560cd1 100644 --- a/tensorflow/core/kernels/bincount_op.h +++ b/tensorflow/core/kernels/bincount_op.h @@ -16,11 +16,11 @@ limitations under the License. #ifndef TENSORFLOW_BINCOUNT_OP_H_ #define TENSORFLOW_BINCOUNT_OP_H_ +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/lib/core/errors.h" -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" namespace tensorflow { diff --git a/tensorflow/core/kernels/bincount_op_gpu.cu.cc b/tensorflow/core/kernels/bincount_op_gpu.cu.cc index ae9e26ffdf..6074b3e1f6 100644 --- a/tensorflow/core/kernels/bincount_op_gpu.cu.cc +++ b/tensorflow/core/kernels/bincount_op_gpu.cu.cc @@ -17,12 +17,12 @@ limitations under the License. #define EIGEN_USE_GPU -#include "tensorflow/core/kernels/bincount_op.h" #include "external/cub_archive/cub/device/device_histogram.cuh" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/kernels/bincount_op.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/cuda_kernel_helper.h" @@ -93,8 +93,8 @@ struct BincountFunctor { /* num_samples */ num_samples, /* stream */ stream); if (err != cudaSuccess) { - return errors::Internal("Could not launch HistogramEven: ", - cudaGetErrorString(err), "."); + return errors::Internal( + "Could not launch HistogramEven: ", cudaGetErrorString(err), "."); } return Status::OK(); } diff --git a/tensorflow/core/kernels/bincount_op_test.cc b/tensorflow/core/kernels/bincount_op_test.cc index 14becc87a7..cb04b40637 100644 --- a/tensorflow/core/kernels/bincount_op_test.cc +++ b/tensorflow/core/kernels/bincount_op_test.cc @@ -30,8 +30,8 @@ static Graph* Bincount(int arr_size, int nbins) { Tensor arr(DT_INT32, TensorShape({arr_size})); arr.flat() = arr.flat().setRandom().abs(); - Tensor size(DT_INT32, TensorShape({(int32)1})); - size.flat()(0) = (int32)nbins; + Tensor size(DT_INT32, TensorShape({static_cast(1)})); + size.flat()(0) = static_cast(nbins); Tensor weights(DT_INT32, TensorShape({0})); diff --git a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc index aafbbe41b4..325dee793b 100644 --- a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc +++ b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc @@ -77,10 +77,10 @@ struct BucketizeFunctor { TF_RETURN_IF_ERROR(boundaries_array.Finalize()); CudaLaunchConfig config = GetCudaLaunchConfig(input.size(), d); - BucketizeCustomKernel< - T><<>>( - input.size(), input.data(), boundaries_vector.size(), - boundaries_array.data(), output.data()); + BucketizeCustomKernel + <<>>( + input.size(), input.data(), boundaries_vector.size(), + boundaries_array.data(), output.data()); return Status::OK(); } diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc index f819fccbfb..c2d24d1f12 100644 --- a/tensorflow/core/kernels/conv_grad_ops_3d.cc +++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc @@ -1101,29 +1101,27 @@ class Conv3DBackpropFilterOp : public OpKernel { bool cudnn_use_autotune_; }; - - #define REGISTER_GPU_KERNEL(T) \ REGISTER_KERNEL_BUILDER( \ Name("Conv3DBackpropInput").Device(DEVICE_GPU).TypeConstraint("T"), \ Conv3DBackpropInputOp); \ REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropInputV2") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("input_sizes"), \ - Conv3DBackpropInputOp); \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("input_sizes"), \ + Conv3DBackpropInputOp); \ REGISTER_KERNEL_BUILDER( \ - Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint("T"), \ - Conv3DBackpropFilterOp); \ + Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint("T"), \ + Conv3DBackpropFilterOp); \ REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropFilterV2") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("filter_sizes"), \ - Conv3DBackpropFilterOp); + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("filter_sizes"), \ + Conv3DBackpropFilterOp); TF_CALL_half(REGISTER_GPU_KERNEL); TF_CALL_float(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL - + #endif // GOOGLE_CUDA } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_asinh.cc b/tensorflow/core/kernels/cwise_op_asinh.cc index 8d44208aa7..a7673afd0b 100644 --- a/tensorflow/core/kernels/cwise_op_asinh.cc +++ b/tensorflow/core/kernels/cwise_op_asinh.cc @@ -22,7 +22,7 @@ REGISTER4(UnaryOp, CPU, "Asinh", functor::asinh, float, double, #ifdef TENSORFLOW_USE_SYCL REGISTER2(UnaryOp, SYCL, "Asinh", functor::asinh, float, double); -#endif // TENSORFLOW_USE_SYCL +#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER2(UnaryOp, GPU, "Asinh", functor::asinh, float, double); diff --git a/tensorflow/core/kernels/cwise_op_atanh.cc b/tensorflow/core/kernels/cwise_op_atanh.cc index bbc69e45aa..7b688db4c5 100644 --- a/tensorflow/core/kernels/cwise_op_atanh.cc +++ b/tensorflow/core/kernels/cwise_op_atanh.cc @@ -22,7 +22,7 @@ REGISTER4(UnaryOp, CPU, "Atanh", functor::atanh, float, double, #ifdef TENSORFLOW_USE_SYCL REGISTER2(UnaryOp, SYCL, "Atanh", functor::atanh, float, double); -#endif // TENSORFLOW_USE_SYCL +#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER2(UnaryOp, GPU, "Atanh", functor::atanh, float, double); diff --git a/tensorflow/core/kernels/depthwise_conv_grad_op.cc b/tensorflow/core/kernels/depthwise_conv_grad_op.cc index 53d65a22d1..9347978d51 100644 --- a/tensorflow/core/kernels/depthwise_conv_grad_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_grad_op.cc @@ -231,7 +231,8 @@ static void CopyOutputBackpropRegion(const DepthwiseArgs& args, } // Pad to vector-register width (if needed). for (int64 d = 0; d < pad_size; ++d) { - buffer[buf_base + vectorized_size + scalar_size + d] = static_cast(0); + buffer[buf_base + vectorized_size + scalar_size + d] = + static_cast(0); } } } @@ -510,7 +511,8 @@ static void DepthwiseConvBackpropInputReference(const DepthwiseArgs& args, #if GOOGLE_CUDA -extern template struct LaunchDepthwiseConvBackpropInputOp; +extern template struct LaunchDepthwiseConvBackpropInputOp; extern template struct LaunchDepthwiseConvBackpropInputOp; extern template struct LaunchDepthwiseConvBackpropInputOp; @@ -885,7 +887,8 @@ static void DepthwiseConvBackpropFilterReference(const DepthwiseArgs& args, #if GOOGLE_CUDA -extern template struct LaunchDepthwiseConvBackpropFilterOp; +extern template struct LaunchDepthwiseConvBackpropFilterOp; extern template struct LaunchDepthwiseConvBackpropFilterOp; extern template struct LaunchDepthwiseConvBackpropFilterOp; diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc index 2759ecb2f1..30ecd0c2ba 100644 --- a/tensorflow/core/kernels/depthwise_conv_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_op.cc @@ -427,6 +427,11 @@ TF_CALL_double(REGISTER_CPU_KERNEL); #endif #if GOOGLE_CUDA +REGISTER_KERNEL_BUILDER(Name("DepthwiseConv2dNative") + .Device(DEVICE_GPU) + .TypeConstraint("T"), + DepthwiseConv2dNativeOp); + REGISTER_KERNEL_BUILDER( Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint("T"), DepthwiseConv2dNativeOp); diff --git a/tensorflow/core/kernels/depthwise_conv_op.h b/tensorflow/core/kernels/depthwise_conv_op.h index 11aed5b415..097a9f5bfa 100644 --- a/tensorflow/core/kernels/depthwise_conv_op.h +++ b/tensorflow/core/kernels/depthwise_conv_op.h @@ -158,7 +158,8 @@ struct DepthwiseFilterPadOp { } // Pad the remainder of output to vector-register boundary. for (int64 j = 0; j < pad_size; ++j) { - padded_filter[output_base + vectorized_size + scalar_size + j] = static_cast(0); + padded_filter[output_base + vectorized_size + scalar_size + j] = + static_cast(0); } } } diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc index 157ce106ce..d8bdb700e6 100644 --- a/tensorflow/core/kernels/maxpooling_op.cc +++ b/tensorflow/core/kernels/maxpooling_op.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/kernels/maxpooling_op.h" #include +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" @@ -37,7 +38,6 @@ limitations under the License. #include "tensorflow/core/util/padding.h" #include "tensorflow/core/util/tensor_format.h" #include "tensorflow/core/util/use_cudnn.h" -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #if GOOGLE_CUDA #include "tensorflow/core/kernels/maxpooling_op_gpu.h" @@ -359,7 +359,8 @@ class MaxPoolingGradOp : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); use_dnn_ = CanUseCudnn(); - ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); + TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, + &propagate_nans_)); } void Compute(OpKernelContext* context) override { @@ -888,7 +889,8 @@ class MaxPoolingWithArgmaxOp : public OpKernel { errors::Unimplemented( "Pooling is not yet supported on the batch dimension.")); - ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); + TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, + &propagate_nans_)); } void Compute(OpKernelContext* context) override { @@ -1052,7 +1054,8 @@ class MaxPoolingNoMaskOp : public OpKernel { "Pooling is not yet supported on the batch dimension.")); use_dnn_ = CanUseCudnn(); - ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); + TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, + &propagate_nans_)); } void Compute(OpKernelContext* context) override { @@ -1137,7 +1140,8 @@ class MaxPoolingNoMaskV2Op : public OpKernel { } OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); use_dnn_ = CanUseCudnn(); - ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); + TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, + &propagate_nans_)); } void Compute(OpKernelContext* context) override { diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc index d96b844383..f8daaca4c9 100644 --- a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc +++ b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc @@ -405,17 +405,17 @@ bool MaxPoolForwardWithOptionalArgmax::operator()( if (propagate_nans) { MaxPoolForwardNHWC <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, - kThreadsPerBlock, 0, d.stream()>>> - (output_size, bottom_data, height, width, channels, pooled_height, - pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, - top_data, mask); + kThreadsPerBlock, 0, d.stream()>>>( + output_size, bottom_data, height, width, channels, pooled_height, + pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, + top_data, mask); } else { MaxPoolForwardNHWC <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, - kThreadsPerBlock, 0, d.stream()>>> - (output_size, bottom_data, height, width, channels, pooled_height, - pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, - top_data, mask); + kThreadsPerBlock, 0, d.stream()>>>( + output_size, bottom_data, height, width, channels, pooled_height, + pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, + top_data, mask); } return d.ok(); } diff --git a/tensorflow/core/kernels/mkl_tfconv_op.h b/tensorflow/core/kernels/mkl_tfconv_op.h index 0a5be4fec9..c4d5a45d3c 100644 --- a/tensorflow/core/kernels/mkl_tfconv_op.h +++ b/tensorflow/core/kernels/mkl_tfconv_op.h @@ -101,8 +101,8 @@ class MklToTfOp : public OpKernel { // Allocate output tensor. TensorShape output_shape = input_shape.GetTfShape(); Tensor* output_tensor = NULL; - OP_REQUIRES_OK(context, context->allocate_output(input_number, - output_shape, &output_tensor)); + OP_REQUIRES_OK(context, context->allocate_output( + input_number, output_shape, &output_tensor)); CHECK_NOTNULL(output_tensor); // Do we need to reorder Mkl layout into TensorFlow layout? @@ -116,13 +116,13 @@ class MklToTfOp : public OpKernel { // If not, just forward input tensor to output tensor. CHECK(output_tensor->CopyFrom(input_tensor, output_shape)); } - } catch (mkldnn::error &e) { + } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + std::string(e.message) + - ", in file " + std::string(__FILE__) + ":" + - std::to_string(__LINE__); - OP_REQUIRES_OK(context, - errors::Aborted("Operation received an exception:", error_msg)); + ", message: " + std::string(e.message) + ", in file " + + std::string(__FILE__) + ":" + std::to_string(__LINE__); + OP_REQUIRES_OK( + context, + errors::Aborted("Operation received an exception:", error_msg)); } } #else @@ -160,8 +160,8 @@ class MklToTfOp : public OpKernel { // Allocate output tensor. Tensor* output_tensor = NULL; - OP_REQUIRES_OK(context, context->allocate_output(input_number, - output_shape, &output_tensor)); + OP_REQUIRES_OK(context, context->allocate_output(input_number, output_shape, + &output_tensor)); dnnLayout_t output_layout = static_cast(input_shape.GetTfLayout()); diff --git a/tensorflow/core/kernels/ops_util.h b/tensorflow/core/kernels/ops_util.h index d3d1b56c9d..93ef512778 100644 --- a/tensorflow/core/kernels/ops_util.h +++ b/tensorflow/core/kernels/ops_util.h @@ -98,6 +98,19 @@ gtl::InlinedVector ComputeStride(const TensorShape& shape) { return strides; } +// Helper to compute 'strides' given an Eigen TensorDimensions +template +gtl::InlinedVector ComputeEigenStrides(const EigenDimensions& shape) { + const int ndims = shape.rank(); + gtl::InlinedVector strides(ndims); + T stride = 1; + for (int i = ndims - 1; i >= 0; --i) { + strides[i] = stride; + stride *= static_cast(shape[i]); + } + return strides; +} + } // namespace tensorflow #endif // TENSORFLOW_KERNELS_OPS_UTIL_H_ diff --git a/tensorflow/core/platform/posix/error.cc b/tensorflow/core/platform/posix/error.cc index f8b0285c50..cda6d7d8f9 100644 --- a/tensorflow/core/platform/posix/error.cc +++ b/tensorflow/core/platform/posix/error.cc @@ -131,8 +131,8 @@ error::Code ErrnoToCode(int err_number) { case ENETUNREACH: // Network unreachable case ENOLCK: // No locks available case ENOLINK: // Link has been severed -#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32) \ - || defined(__HAIKU__)) +#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32) || \ + defined(__HAIKU__)) case ENONET: // Machine is not on the network #endif code = error::UNAVAILABLE; diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc index 09f69a95c1..614ee00b01 100644 --- a/tensorflow/core/platform/posix/port.cc +++ b/tensorflow/core/platform/posix/port.cc @@ -37,8 +37,8 @@ limitations under the License. #ifdef TF_USE_SNAPPY #include "snappy.h" #endif -#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \ - || defined(__HAIKU__) +#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) || \ + defined(__HAIKU__) #include #endif @@ -62,8 +62,8 @@ int NumSchedulableCPUs() { } perror("sched_getaffinity"); #endif -#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \ - || defined(__HAIKU__) +#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) || \ + defined(__HAIKU__) unsigned int count = std::thread::hardware_concurrency(); if (count > 0) return static_cast(count); #endif diff --git a/tensorflow/core/util/cuda_kernel_helper.h b/tensorflow/core/util/cuda_kernel_helper.h index 8fa0dfbed9..cf11f419a4 100644 --- a/tensorflow/core/util/cuda_kernel_helper.h +++ b/tensorflow/core/util/cuda_kernel_helper.h @@ -752,6 +752,12 @@ __device__ EIGEN_ALWAYS_INLINE T CudaShuffleDown(unsigned mask, T value, return __shfl_down_sync(mask, value, delta, width); } +__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleDown( + unsigned mask, Eigen::half value, int delta, int width = warpSize) { + return Eigen::half( + __shfl_down_sync(mask, static_cast(value), delta, width)); +} + // Variant of the (undocumented) version from the CUDA SDK, but using unsigned // instead of float for lo and hi (which is incorrect with ftz, for example). // A bug has been filed with NVIDIA and will be fixed in the next CUDA release. @@ -774,6 +780,12 @@ __device__ EIGEN_ALWAYS_INLINE T CudaShuffleXor(unsigned mask, T value, return __shfl_xor_sync(mask, value, laneMask, width); } +__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleXor( + unsigned mask, Eigen::half value, int laneMask, int width = warpSize) { + return Eigen::half( + __shfl_xor_sync(mask, static_cast(value), laneMask, width)); +} + // Variant of the (undocumented) version from the CUDA SDK, but using unsigned // instead of float for lo and hi (which is incorrect with ftz, for example). // A bug has been filed with NVIDIA and will be fixed in the next CUDA release. diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 118ff0d0d6..148c7851bd 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -24,25 +24,25 @@ limitations under the License. #include "mkl_dnn_types.h" #include "mkl_service.h" #include "mkl_trans.h" +#include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/graph/mkl_graph_util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/util/padding.h" #include "tensorflow/core/util/tensor_format.h" -#include "tensorflow/core/graph/mkl_graph_util.h" #ifdef INTEL_MKL_DNN #include "mkldnn.hpp" +using mkldnn::engine; using mkldnn::memory; -using mkldnn::reorder; -using mkldnn::primitive; using mkldnn::padding_kind; -using mkldnn::engine; +using mkldnn::primitive; +using mkldnn::reorder; #endif // The file contains a number of utility classes and functions used by MKL @@ -56,8 +56,14 @@ namespace tensorflow { // Tensorflow tensor. typedef enum { W = 0, H = 1, C = 2, N = 3 } MklDims; -typedef enum { Dim_N = 0, Dim_C = 1, Dim_H = 2, Dim_W = 3, - Dim_O = 0, Dim_I = 1 } MklDnnDims; +typedef enum { + Dim_N = 0, + Dim_C = 1, + Dim_H = 2, + Dim_W = 3, + Dim_O = 0, + Dim_I = 1 +} MklDnnDims; class MklShape { public: @@ -236,8 +242,7 @@ class MklShape { (IS_MKL_TENSOR_OFFSET + sizeof(size_t)) // Location of dimension_ // Location of sizes. Note dim is not used here, left here // to make macros consistent. -#define SIZES_OFFSET(dims) \ - (DIMS_OFFSET + sizeof(size_t)) +#define SIZES_OFFSET(dims) (DIMS_OFFSET + sizeof(size_t)) #define STRIDES_OFFSET(dims) \ (SIZES_OFFSET(dims) + dims * sizeof(size_t)) // Location of strides #define MKL_LAYOUT_OFFSET(dims) \ @@ -332,7 +337,7 @@ class MklDnnShape { /// Number of dimensions in Tensorflow format size_t dimension_ = 0; /// Required by MKLDNN for conversions - mkldnn_dims_t sizes_; // Required by MKL for conversions + mkldnn_dims_t sizes_; // Required by MKL for conversions memory::format tf_data_format_ = memory::format::format_undef; memory::data_type T_ = memory::data_type::data_undef; // MKL layout @@ -345,15 +350,13 @@ class MklDnnShape { typedef std::remove_extent::type mkldnn_dim_t; #define INVALID_DIM_SIZE -1 - public: MklDnnShape() { - for (size_t i = 0; i < sizeof(data_.sizes_) / - sizeof(data_.sizes_[0]); ++i) { + for (size_t i = 0; i < sizeof(data_.sizes_) / sizeof(data_.sizes_[0]); + ++i) { data_.sizes_[i] = -1; } - for (size_t i = 0; i < sizeof(data_.map_) / - sizeof(data_.map_[0]); ++i) { + for (size_t i = 0; i < sizeof(data_.map_) / sizeof(data_.map_[0]); ++i) { data_.map_[i] = -1; } } @@ -369,26 +372,26 @@ class MklDnnShape { inline void SetDimensions(const size_t dimension) { data_.dimension_ = dimension; } - inline size_t GetDimension(char dimension)const { + inline size_t GetDimension(char dimension) const { int index = GetMklDnnTensorDimIndex(dimension); CHECK(index >= 0 && index < this->GetDimension()) << "Invalid index from the dimension: " << index << ", " << dimension; return this->DimSize(index); } - inline int32 GetMklDnnTensorDimIndex(char dimension)const { + inline int32 GetMklDnnTensorDimIndex(char dimension) const { switch (dimension) { - case 'N': - return MklDnnDims::Dim_N; - case 'C': - return MklDnnDims::Dim_C; - case 'H': - return MklDnnDims::Dim_H; - case 'W': - return MklDnnDims::Dim_W; - default: - LOG(FATAL) << "Invalid dimension: " << dimension; - return -1; // Avoid compiler warning about missing return value + case 'N': + return MklDnnDims::Dim_N; + case 'C': + return MklDnnDims::Dim_C; + case 'H': + return MklDnnDims::Dim_H; + case 'W': + return MklDnnDims::Dim_W; + default: + LOG(FATAL) << "Invalid dimension: " << dimension; + return -1; // Avoid compiler warning about missing return value } } @@ -403,9 +406,9 @@ class MklDnnShape { memory::dims retVal; if (data_.is_mkl_tensor_) { int dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]); - for (size_t i = 0 ; i < dimensions; i++) { + for (size_t i = 0; i < dimensions; i++) { if (data_.sizes_[i] != INVALID_DIM_SIZE) - retVal.push_back(data_.sizes_[i]); + retVal.push_back(data_.sizes_[i]); } } else { CHECK_EQ(data_.is_mkl_tensor_, true); @@ -414,7 +417,7 @@ class MklDnnShape { } inline int64 DimSize(int index) const { - CHECK_LT(index, sizeof(data_.sizes_)/sizeof(data_.sizes_[0])); + CHECK_LT(index, sizeof(data_.sizes_) / sizeof(data_.sizes_[0])); return data_.sizes_[index]; } @@ -451,7 +454,7 @@ class MklDnnShape { /// We don't create primitive_descriptor for TensorFlow layout now. /// We use lazy evaluation and create it only when needed. inline void SetTfLayout(size_t dims, const memory::dims& sizes, - memory::format format) { + memory::format format) { CHECK_EQ(dims, sizes.size()); data_.dimension_ = dims; for (size_t ii = 0; ii < dims; ii++) { @@ -497,9 +500,7 @@ class MklDnnShape { SetTfDimOrder(dimension, data_format); } - inline const mkldnn_dim_t* GetTfToMklDimMap() const { - return &data_.map_[0]; - } + inline const mkldnn_dim_t* GetTfToMklDimMap() const { return &data_.map_[0]; } inline size_t TfDimIdx(int index) const { return data_.map_[index]; } inline int64 TfDimSize(int index) const { return data_.sizes_[TfDimIdx(index)]; @@ -553,9 +554,7 @@ class MklDnnShape { /// Size of buffer to hold the serialized object, the size is computed by /// following above mentioned order - inline size_t GetSerializeBufferSize() const { - return sizeof(MklShapeData); - } + inline size_t GetSerializeBufferSize() const { return sizeof(MklShapeData); } void SerializeMklDnnShape(unsigned char* buf, size_t buf_size) const { CHECK(buf_size >= GetSerializeBufferSize()) @@ -566,12 +565,12 @@ class MklDnnShape { void DeSerializeMklDnnShape(const unsigned char* buf, size_t buf_size) { // Make sure buffer holds at least is_mkl_tensor_. CHECK(buf_size >= sizeof(data_.is_mkl_tensor_)) - << "Buffer size is too small in DeSerializeMklDnnShape"; + << "Buffer size is too small in DeSerializeMklDnnShape"; const bool is_mkl_tensor = *reinterpret_cast(buf); if (is_mkl_tensor) { // If it is an MKL Tensor then read the rest CHECK(buf_size >= GetSerializeBufferSize()) - << "Buffer size is too small in DeSerializeMklDnnShape"; + << "Buffer size is too small in DeSerializeMklDnnShape"; data_ = *reinterpret_cast(buf); } } @@ -660,8 +659,7 @@ inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) { } #ifdef INTEL_MKL_DNN -inline void GetMklShape(OpKernelContext* ctext, int n, - MklDnnShape* mklshape) { +inline void GetMklShape(OpKernelContext* ctext, int n, MklDnnShape* mklshape) { mklshape->DeSerializeMklDnnShape( ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs())) .flat() @@ -700,8 +698,7 @@ inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name, /// Get shape of input tensor pointed by 'input_idx' in TensorShape format. /// If the input tensor is in MKL layout, then obtains TensorShape from /// MklShape. -inline TensorShape GetTfShape(OpKernelContext* context, - size_t input_idx) { +inline TensorShape GetTfShape(OpKernelContext* context, size_t input_idx) { // Sanity check. CHECK_NOTNULL(context); CHECK_LT(input_idx, context->num_inputs()); @@ -821,7 +818,7 @@ inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, template inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, - TensorShape tf_shape) { + TensorShape tf_shape) { OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::v(), tf_shape, tensor_out)); } @@ -1099,7 +1096,8 @@ inline void MklNCHWToNHWC(const Tensor& input, Tensor** output) { /// /// @input None /// @return memory::data_type corresponding to type T -template static memory::data_type MklDnnType(); +template +static memory::data_type MklDnnType(); /// Instantiation for float type. Add similar instantiations for other /// type if needed. @@ -1114,10 +1112,11 @@ memory::data_type MklDnnType() { /// @return: memory::format corresponding to TensorFlow data format; /// Fails with an error if invalid data format. inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) { - if (format == FORMAT_NHWC) return memory::format::nhwc; - else if (format == FORMAT_NCHW) return memory::format::nchw; - TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, - "Unsupported data format")); + if (format == FORMAT_NHWC) + return memory::format::nhwc; + else if (format == FORMAT_NCHW) + return memory::format::nchw; + TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format")); // Return to get rid of compiler warning return memory::format::format_undef; } @@ -1128,10 +1127,11 @@ inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) { /// @return: Tensorflow data format corresponding to memory::format /// Fails with an error if invalid data format. inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) { - if (format == memory::format::nhwc) return FORMAT_NHWC; - else if (format == memory::format::nchw) return FORMAT_NCHW; - TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, - "Unsupported data format")); + if (format == memory::format::nhwc) + return FORMAT_NHWC; + else if (format == memory::format::nchw) + return FORMAT_NCHW; + TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format")); } /// Map TensorShape object into memory::dims required by MKL-DNN @@ -1161,7 +1161,7 @@ inline memory::dims TFShapeToMklDnnDims(const TensorShape& shape) { /// @input TensorShape object in shape /// @return memory::dims in MKL-DNN required NCHW format inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape, - TensorFormat format) { + TensorFormat format) { // Check validity of format. CHECK_NE(TFDataFormatToMklDnnDataFormat(format), memory::format::format_undef); @@ -1237,21 +1237,23 @@ class MklDnnData { const engine* cpu_engine_; public: - explicit MklDnnData(const engine* e) : user_memory_(nullptr), - reorder_memory_(nullptr), - op_md_(nullptr), cpu_engine_(e) {} + explicit MklDnnData(const engine* e) + : user_memory_(nullptr), + reorder_memory_(nullptr), + op_md_(nullptr), + cpu_engine_(e) {} ~MklDnnData() { cpu_engine_ = nullptr; // We don't own this. - delete(user_memory_); - delete(reorder_memory_); - delete(op_md_); + delete (user_memory_); + delete (reorder_memory_); + delete (op_md_); } inline void* GetTensorBuffer(const Tensor* tensor) const { CHECK_NOTNULL(tensor); - return const_cast(static_cast( - tensor->flat().data())); + return const_cast( + static_cast(tensor->flat().data())); } /// Set user memory primitive using specified dimensions, memory format and @@ -1283,7 +1285,7 @@ class MklDnnData { /// @return: memory::desc object corresponding to blocked memory format /// for given dimensions and strides. static inline memory::desc CreateBlockedMemDesc(const memory::dims& dim, - const memory::dims& strides) { + const memory::dims& strides) { CHECK_EQ(dim.size(), strides.size()); // We have to construct memory descriptor in a C style. This is not at all @@ -1352,7 +1354,7 @@ class MklDnnData { CHECK_NOTNULL(cpu_engine_); // TODO(nhasabni): can we remove dynamic memory allocation? if (data_buffer) { - user_memory_ = new memory(pd, data_buffer); + user_memory_ = new memory(pd, data_buffer); } else { user_memory_ = new memory(pd); } diff --git a/tensorflow/core/util/mkl_util_test.cc b/tensorflow/core/util/mkl_util_test.cc index 6aef3d86e9..8b73eadb40 100644 --- a/tensorflow/core/util/mkl_util_test.cc +++ b/tensorflow/core/util/mkl_util_test.cc @@ -54,7 +54,6 @@ TEST(MklUtilTest, MklDnnTfShape) { EXPECT_NE(b_tf_shape_nchw, b_mkldnn_tf_shape); } - TEST(MklUtilTest, MklDnnBlockedFormatTest) { // Let's create 2D tensor of shape {3, 4} with 3 being innermost dimension // first (case 1) and then it being outermost dimension (case 2). diff --git a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java index 92cc3bd60e..313c09e1e4 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java @@ -84,11 +84,10 @@ public class ShapeTest { assertEquals(Shape.scalar(), Shape.scalar()); assertEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 3)); - assertNotEquals(Shape.make(1,2), null); - assertNotEquals(Shape.make(1,2), new Object()); + assertNotEquals(Shape.make(1, 2), null); + assertNotEquals(Shape.make(1, 2), new Object()); assertNotEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 4)); - assertNotEquals(Shape.unknown(), Shape.unknown()); assertNotEquals(Shape.make(-1), Shape.make(-1)); assertNotEquals(Shape.make(1, -1, 3), Shape.make(1, -1, 3)); @@ -103,4 +102,3 @@ public class ShapeTest { assertNotEquals(Shape.make(1, 2).hashCode(), Shape.make(1, 3).hashCode()); } } - diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py index 3512f66284..750af20e8a 100644 --- a/tensorflow/python/estimator/inputs/numpy_io.py +++ b/tensorflow/python/estimator/inputs/numpy_io.py @@ -117,11 +117,11 @@ def numpy_input_fn(x, raise ValueError('y cannot be empty dict, use None instead.') ordered_dict_y = collections.OrderedDict( - sorted(y.items(), key=lambda t: t[0])) + sorted(y.items(), key=lambda t: t[0])) target_keys = list(ordered_dict_y.keys()) duplicate_keys = set(feature_keys).intersection(set(target_keys)) - if len(duplicate_keys): + if duplicate_keys: raise ValueError('{} duplicate keys are found in both x and y: ' '{}'.format(len(duplicate_keys), duplicate_keys)) @@ -131,16 +131,14 @@ def numpy_input_fn(x, ordered_dict_data[target_keys] = y if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1: - shape_dict_of_x = {k: ordered_dict_data[k].shape - for k in feature_keys} + shape_dict_of_x = {k: ordered_dict_data[k].shape for k in feature_keys} if target_keys is None: shape_of_y = None elif isinstance(target_keys, string_types): shape_of_y = y.shape else: - shape_of_y = {k: ordered_dict_data[k].shape - for k in target_keys} + shape_of_y = {k: ordered_dict_data[k].shape for k in target_keys} raise ValueError('Length of tensors in x and y is mismatched. All ' 'elements in x and y must have the same length.\n' @@ -155,11 +153,12 @@ def numpy_input_fn(x, enqueue_size=batch_size, num_epochs=num_epochs) - batch = (queue.dequeue_many(batch_size) if num_epochs is None - else queue.dequeue_up_to(batch_size)) + batch = ( + queue.dequeue_many(batch_size) + if num_epochs is None else queue.dequeue_up_to(batch_size)) # Remove the first `Tensor` in `batch`, which is the row number. - if len(batch) > 0: + if batch: batch.pop(0) features = dict(zip(feature_keys, batch[:len(feature_keys)])) diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py index 65eae7a7dc..1374e3f7e1 100644 --- a/tensorflow/python/estimator/inputs/numpy_io_test.py +++ b/tensorflow/python/estimator/inputs/numpy_io_test.py @@ -255,7 +255,7 @@ class NumpyIoTest(test.TestCase): with self.test_session() as session: input_fn = numpy_io.numpy_input_fn( - x, y, batch_size=2, shuffle=False, num_epochs=1) + x, y, batch_size=2, shuffle=False, num_epochs=1) features_tensor = input_fn() coord = coordinator.Coordinator() @@ -327,7 +327,7 @@ class NumpyIoTest(test.TestCase): with self.test_session() as session: input_fn = numpy_io.numpy_input_fn( - x, y, batch_size=2, shuffle=False, num_epochs=1) + x, y, batch_size=2, shuffle=False, num_epochs=1) features_tensor, targets_tensor = input_fn() coord = coordinator.Coordinator() @@ -362,13 +362,10 @@ class NumpyIoTest(test.TestCase): a = np.arange(4) * 1.0 b = np.arange(32, 36) x = {'a': a, 'b': b} - y = {'y1': np.arange(-32, -28), - 'a': a, - 'y2': np.arange(32, 28, -1), - 'b': b} + y = {'y1': np.arange(-32, -28), 'a': a, 'y2': np.arange(32, 28, -1), 'b': b} with self.test_session(): with self.assertRaisesRegexp( - ValueError, '2 duplicate keys are found in both x and y'): + ValueError, '2 duplicate keys are found in both x and y'): failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False) failing_input_fn() diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 1610214d54..4c026590c2 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -987,10 +987,9 @@ class TensorFlowTestCase(googletest.TestCase): msg: An optional string message to append to the failure message. """ # f1 == f2 is needed here as we might have: f1, f2 = inf, inf - self.assertTrue( - f1 == f2 or math.fabs(f1 - f2) <= err, - "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg - if msg is not None else "")) + self.assertTrue(f1 == f2 or math.fabs(f1 - f2) <= err, + "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg + if msg is not None else "")) def assertArrayNear(self, farray1, farray2, err): """Asserts that two float arrays are near each other. diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 76b80e60ea..1bf2b70c1b 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -114,21 +114,21 @@ class BooleanMaskTest(test_util.TensorFlowTestCase): arr = np.random.rand(*arr_shape) mask = make_mask(arr_shape[:ndims_mask]) if axis is not None: - mask = make_mask(arr_shape[axis:ndims_mask+axis]) + mask = make_mask(arr_shape[axis:ndims_mask + axis]) if axis is None or axis == 0: masked_arr = arr[mask] elif axis == 1: - masked_arr = arr[:,mask] + masked_arr = arr[:, mask] elif axis == 2: - masked_arr = arr[:,:,mask] - with self.test_session() as sess: + masked_arr = arr[:, :, mask] + with self.test_session(): masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis) # Leading dimension size of masked_tensor is always unknown until runtime # since we don't how many elements will be kept. leading = 1 if axis is None else axis + 1 self.assertAllEqual(masked_tensor.get_shape()[leading:], - masked_arr.shape[leading:]) + masked_arr.shape[leading:]) self.assertAllClose(masked_arr, masked_tensor.eval()) @@ -1078,6 +1078,7 @@ class PadTest(test_util.TensorFlowTestCase): [0, 0, 4, 5, 6, 0, 0], [0, 0, 0, 0, 0, 0, 0]]) + class InvertPermutationTest(test_util.TensorFlowTestCase): def testInvertPermutation(self): diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py index 79285476b4..2767df127e 100644 --- a/tensorflow/python/kernel_tests/bincount_op_test.py +++ b/tensorflow/python/kernel_tests/bincount_op_test.py @@ -25,6 +25,7 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import math_ops from tensorflow.python.platform import googletest + class BincountTest(test_util.TensorFlowTestCase): def test_empty(self): @@ -72,8 +73,7 @@ class BincountTest(test_util.TensorFlowTestCase): else: weights = np.random.random(num_samples) self.assertAllClose( - math_ops.bincount(arr, weights).eval(), - np.bincount(arr, weights)) + math_ops.bincount(arr, weights).eval(), np.bincount(arr, weights)) def test_random_without_weights(self): num_samples = 10000 @@ -83,8 +83,7 @@ class BincountTest(test_util.TensorFlowTestCase): arr = np.random.randint(0, 1000, num_samples) weights = np.ones(num_samples).astype(dtype) self.assertAllClose( - math_ops.bincount(arr, None).eval(), - np.bincount(arr, weights)) + math_ops.bincount(arr, None).eval(), np.bincount(arr, weights)) def test_zero_weights(self): with self.test_session(use_gpu=True): diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py index 6cbdd4cbb3..68817cc256 100644 --- a/tensorflow/python/kernel_tests/constant_op_test.py +++ b/tensorflow/python/kernel_tests/constant_op_test.py @@ -439,11 +439,10 @@ class ZerosLikeTest(test.TestCase): def testZerosLikeCPU(self): for dtype in [ - dtypes_lib.float32, dtypes_lib.float64, - dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, - dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool, - dtypes_lib.complex64, dtypes_lib.complex128, - dtypes_lib.string + dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int8, + dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, dtypes_lib.int32, + dtypes_lib.int64, dtypes_lib.bool, dtypes_lib.complex64, + dtypes_lib.complex128, dtypes_lib.string ]: self._compareZeros(dtype, fully_defined_shape=False, use_gpu=False) self._compareZeros(dtype, fully_defined_shape=True, use_gpu=False) @@ -574,10 +573,10 @@ class OnesLikeTest(test.TestCase): def testOnesLike(self): for dtype in [ - dtypes_lib.float32, dtypes_lib.float64, - dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, - dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool, - dtypes_lib.complex64, dtypes_lib.complex128 + dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int8, + dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, dtypes_lib.int32, + dtypes_lib.int64, dtypes_lib.bool, dtypes_lib.complex64, + dtypes_lib.complex128 ]: numpy_dtype = dtype.as_numpy_dtype with self.test_session(): diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py index a7e23ead1c..d92797a7d3 100644 --- a/tensorflow/python/kernel_tests/conv1d_test.py +++ b/tensorflow/python/kernel_tests/conv1d_test.py @@ -52,7 +52,6 @@ class Conv1DTest(test.TestCase): self.assertEqual(len(output), 2) self.assertAllClose(output, [2 * 1 + 1 * 2, 2 * 3 + 1 * 4]) - def testConv1DTranspose(self): with self.test_session(): stride = 2 @@ -93,5 +92,6 @@ class Conv1DTest(test.TestCase): self.assertAllClose(cache_values, value) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py index 116681fc4c..ec8ac74163 100644 --- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py @@ -68,8 +68,8 @@ class Conv3DTest(test.TestCase): total_size_2 *= s # Initializes the input tensor with array containing numbers from 0 to 1. - # We keep the input tensor values fairly small to avoid overflowing a float16 - # tensor during the conv3d + # We keep the input tensor values fairly small to avoid overflowing float16 + # during the conv3d. x1 = [f * 1.0 / total_size_1 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 / total_size_2 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu): @@ -115,15 +115,13 @@ class Conv3DTest(test.TestCase): if value.dtype == np.float16: tol = 1e-3 - self.assertAllClose(expected, value.flatten(), atol=tol, - rtol=tol) + self.assertAllClose(expected, value.flatten(), atol=tol, rtol=tol) def testConv3D1x1x1Filter(self): expected_output = [ - 0.18518519, 0.22222222, 0.25925926, 0.40740741, 0.5 , - 0.59259259, 0.62962963, 0.77777778, 0.92592593, 0.85185185, - 1.05555556, 1.25925926, 1.07407407, 1.33333333, 1.59259259, - 1.2962963 , 1.61111111, 1.92592593 + 0.18518519, 0.22222222, 0.25925926, 0.40740741, 0.5, 0.59259259, + 0.62962963, 0.77777778, 0.92592593, 0.85185185, 1.05555556, 1.25925926, + 1.07407407, 1.33333333, 1.59259259, 1.2962963, 1.61111111, 1.92592593 ] # These are equivalent to the Conv2D1x1 case. @@ -149,10 +147,10 @@ class Conv3DTest(test.TestCase): # Expected values computed using scipy's correlate function. def testConv3D2x2x2Filter(self): expected_output = [ - 3.77199074, 3.85069444, 3.92939815, 4.2650463 , 4.35763889, - 4.45023148, 6.73032407, 6.89236111, 7.05439815, 7.22337963, - 7.39930556, 7.57523148, 9.68865741, 9.93402778, 10.17939815, - 10.18171296, 10.44097222, 10.70023148 + 3.77199074, 3.85069444, 3.92939815, 4.2650463, 4.35763889, 4.45023148, + 6.73032407, 6.89236111, 7.05439815, 7.22337963, 7.39930556, 7.57523148, + 9.68865741, 9.93402778, 10.17939815, 10.18171296, 10.44097222, + 10.70023148 ] # expected_shape = [1, 3, 1, 2, 5] self._VerifyValues( @@ -164,19 +162,17 @@ class Conv3DTest(test.TestCase): def testConv3DStrides(self): expected_output = [ - 0.06071429, 0.08988095, 0.10238095, 0.11488095, 0.12738095, - 0.13988095, 0.08452381, 0.26071429, 0.35238095, 0.36488095, - 0.37738095, 0.38988095, 0.40238095, 0.23452381, 0.46071429, - 0.61488095, 0.62738095, 0.63988095, 0.65238095, 0.66488095, - 0.38452381, 1.12738095, 1.48988095, 1.50238095, 1.51488095, - 1.52738095, 1.53988095, 0.88452381, 1.32738095, 1.75238095, - 1.76488095, 1.77738095, 1.78988095, 1.80238095, 1.03452381, - 1.52738095, 2.01488095, 2.02738095, 2.03988095, 2.05238095, - 2.06488095, 1.18452381, 2.19404762, 2.88988095, 2.90238095, - 2.91488095, 2.92738095, 2.93988095, 1.68452381, 2.39404762, - 3.15238095, 3.16488095, 3.17738095, 3.18988095, 3.20238095, - 1.83452381, 2.59404762, 3.41488095, 3.42738095, 3.43988095, - 3.45238095, 3.46488095, 1.98452381 + 0.06071429, 0.08988095, 0.10238095, 0.11488095, 0.12738095, 0.13988095, + 0.08452381, 0.26071429, 0.35238095, 0.36488095, 0.37738095, 0.38988095, + 0.40238095, 0.23452381, 0.46071429, 0.61488095, 0.62738095, 0.63988095, + 0.65238095, 0.66488095, 0.38452381, 1.12738095, 1.48988095, 1.50238095, + 1.51488095, 1.52738095, 1.53988095, 0.88452381, 1.32738095, 1.75238095, + 1.76488095, 1.77738095, 1.78988095, 1.80238095, 1.03452381, 1.52738095, + 2.01488095, 2.02738095, 2.03988095, 2.05238095, 2.06488095, 1.18452381, + 2.19404762, 2.88988095, 2.90238095, 2.91488095, 2.92738095, 2.93988095, + 1.68452381, 2.39404762, 3.15238095, 3.16488095, 3.17738095, 3.18988095, + 3.20238095, 1.83452381, 2.59404762, 3.41488095, 3.42738095, 3.43988095, + 3.45238095, 3.46488095, 1.98452381 ] self._VerifyValues( tensor_in_sizes=[1, 5, 8, 7, 1], @@ -187,8 +183,7 @@ class Conv3DTest(test.TestCase): def testConv3D2x2x2FilterStride2(self): expected_output = [ - 3.77199074, 3.85069444, 3.92939815, 9.68865741, 9.93402778, - 10.17939815 + 3.77199074, 3.85069444, 3.92939815, 9.68865741, 9.93402778, 10.17939815 ] self._VerifyValues( tensor_in_sizes=[1, 4, 2, 3, 3], @@ -199,14 +194,12 @@ class Conv3DTest(test.TestCase): def testConv3DStride3(self): expected_output = [ - 1.51140873, 1.57167659, 1.63194444, 1.56349206, 1.62673611, - 1.68998016, 1.6155754 , 1.68179563, 1.74801587, 1.9280754 , - 2.01215278, 2.09623016, 1.98015873, 2.0672123 , 2.15426587, - 2.03224206, 2.12227183, 2.21230159, 4.4280754 , 4.65500992, - 4.88194444, 4.48015873, 4.71006944, 4.93998016, 4.53224206, - 4.76512897, 4.99801587, 4.84474206, 5.09548611, 5.34623016, - 4.8968254 , 5.15054563, 5.40426587, 4.94890873, 5.20560516, - 5.46230159 + 1.51140873, 1.57167659, 1.63194444, 1.56349206, 1.62673611, 1.68998016, + 1.6155754, 1.68179563, 1.74801587, 1.9280754, 2.01215278, 2.09623016, + 1.98015873, 2.0672123, 2.15426587, 2.03224206, 2.12227183, 2.21230159, + 4.4280754, 4.65500992, 4.88194444, 4.48015873, 4.71006944, 4.93998016, + 4.53224206, 4.76512897, 4.99801587, 4.84474206, 5.09548611, 5.34623016, + 4.8968254, 5.15054563, 5.40426587, 4.94890873, 5.20560516, 5.46230159 ] self._VerifyValues( tensor_in_sizes=[1, 6, 7, 8, 2], @@ -217,9 +210,8 @@ class Conv3DTest(test.TestCase): def testConv3D2x2x2FilterStride2Same(self): expected_output = [ - 3.77199074, 3.85069444, 3.92939815, 2.0162037 , 2.06597222, - 2.11574074, 9.68865741, 9.93402778, 10.17939815, 4.59953704, - 4.73263889, 4.86574074 + 3.77199074, 3.85069444, 3.92939815, 2.0162037, 2.06597222, 2.11574074, + 9.68865741, 9.93402778, 10.17939815, 4.59953704, 4.73263889, 4.86574074 ] self._VerifyValues( tensor_in_sizes=[1, 4, 2, 3, 3], @@ -230,8 +222,8 @@ class Conv3DTest(test.TestCase): def testKernelSmallerThanStride(self): expected_output = [ - 0.03703704, 0.11111111, 0.25925926, 0.33333333, 0.7037037 , - 0.77777778, 0.92592593, 1. + 0.03703704, 0.11111111, 0.25925926, 0.33333333, 0.7037037, 0.77777778, + 0.92592593, 1. ] self._VerifyValues( tensor_in_sizes=[1, 3, 3, 3, 1], @@ -247,12 +239,11 @@ class Conv3DTest(test.TestCase): expected=expected_output) expected_output = [ - 0.54081633, 0.58017493, 0.28061224, 0.81632653, 0.85568513, - 0.40306122, 0.41873178, 0.4340379 , 0.19642857, 2.46938776, - 2.50874636, 1.1377551 , 2.74489796, 2.78425656, 1.26020408, - 1.16873178, 1.1840379 , 0.51785714, 1.09511662, 1.10604956, - 0.44642857, 1.17164723, 1.18258017, 0.47704082, 0.3691691 , - 0.37244898, 0.125 + 0.54081633, 0.58017493, 0.28061224, 0.81632653, 0.85568513, 0.40306122, + 0.41873178, 0.4340379, 0.19642857, 2.46938776, 2.50874636, 1.1377551, + 2.74489796, 2.78425656, 1.26020408, 1.16873178, 1.1840379, 0.51785714, + 1.09511662, 1.10604956, 0.44642857, 1.17164723, 1.18258017, 0.47704082, + 0.3691691, 0.37244898, 0.125 ] self._VerifyValues( tensor_in_sizes=[1, 7, 7, 7, 1], @@ -262,8 +253,8 @@ class Conv3DTest(test.TestCase): expected=expected_output) expected_output = [ - 0.540816, 0.580175, 0.816327, 0.855685, 2.469388, 2.508746, - 2.744898, 2.784257 + 0.540816, 0.580175, 0.816327, 0.855685, 2.469388, 2.508746, 2.744898, + 2.784257 ] self._VerifyValues( tensor_in_sizes=[1, 7, 7, 7, 1], @@ -278,7 +269,7 @@ class Conv3DTest(test.TestCase): filter_in_sizes=[2, 1, 2, 1, 2], stride=1, padding="VALID", - expected=[1.5625, 1.875]) + expected=[1.5625, 1.875]) def _ConstructAndTestGradientForConfig( self, batch, input_shape, filter_shape, in_depth, out_depth, stride, @@ -318,7 +309,6 @@ class Conv3DTest(test.TestCase): input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] - for data_type in self._DtypesToTest(use_gpu=use_gpu): # TODO(mjanusz): Modify gradient_checker to also provide max relative # error and synchronize the tolerance levels between the tests for forward @@ -330,12 +320,11 @@ class Conv3DTest(test.TestCase): elif data_type == dtypes.float16: tolerance = 1e-3 - with self.test_session(use_gpu=use_gpu): orig_input_tensor = constant_op.constant( - input_data, shape=input_shape, dtype=data_type, name="input") + input_data, shape=input_shape, dtype=data_type, name="input") filter_tensor = constant_op.constant( - filter_data, shape=filter_shape, dtype=data_type, name="filter") + filter_data, shape=filter_shape, dtype=data_type, name="filter") if data_format == "NCDHW": input_tensor = test_util.NHWCToNCHW(orig_input_tensor) @@ -345,25 +334,23 @@ class Conv3DTest(test.TestCase): new_strides = strides conv = nn_ops.conv3d( - input_tensor, filter_tensor, new_strides, padding, - data_format=data_format, name="conv") + input_tensor, + filter_tensor, + new_strides, + padding, + data_format=data_format, + name="conv") if data_format == "NCDHW": conv = test_util.NCHWToNHWC(conv) - if test_input: - jacob_t, jacob_n = gradient_checker.compute_gradient(orig_input_tensor, - input_shape, - conv, - output_shape) + jacob_t, jacob_n = gradient_checker.compute_gradient( + orig_input_tensor, input_shape, conv, output_shape) else: - jacob_t, jacob_n = gradient_checker.compute_gradient(filter_tensor, - filter_shape, - conv, - output_shape) - - + jacob_t, jacob_n = gradient_checker.compute_gradient( + filter_tensor, filter_shape, conv, output_shape) + if data_type != dtypes.float16: reference_jacob_t = jacob_t err = np.fabs(jacob_t - jacob_n).max() @@ -375,7 +362,6 @@ class Conv3DTest(test.TestCase): print("conv3d gradient error = ", err) self.assertLess(err, tolerance) - def ConstructAndTestGradient(self, **kwargs): for data_format, use_gpu in GetTestConfigs(): self._ConstructAndTestGradientForConfig(data_format=data_format, diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index 150e2ff7f2..6be8997cab 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -18,8 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np import os +import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -1442,7 +1442,6 @@ class PoolingTest(test.TestCase): use_gpu=True, v2=v2) - # Propagate the diff in cases of NaNs os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "1" expected_input_backprop_cudnn = expected_input_backprop_tf_cpu diff --git a/tensorflow/python/kernel_tests/reader_ops_test.py b/tensorflow/python/kernel_tests/reader_ops_test.py index 8e54d10f32..223a4b2c87 100644 --- a/tensorflow/python/kernel_tests/reader_ops_test.py +++ b/tensorflow/python/kernel_tests/reader_ops_test.py @@ -1018,15 +1018,15 @@ class LMDBReaderTest(test.TestCase): with self.test_session() as sess: reader1 = io_ops.LMDBReader(name="test_read_from_same_file1") reader2 = io_ops.LMDBReader(name="test_read_from_same_file2") - filename_queue = input_lib.string_input_producer([self.db_path], - num_epochs=None) + filename_queue = input_lib.string_input_producer( + [self.db_path], num_epochs=None) key1, value1 = reader1.read(filename_queue) key2, value2 = reader2.read(filename_queue) coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) - for i in range(3): - for j in range(10): + for _ in range(3): + for _ in range(10): k1, v1, k2, v2 = sess.run([key1, value1, key2, value2]) self.assertAllEqual(compat.as_bytes(k1), compat.as_bytes(k2)) self.assertAllEqual(compat.as_bytes(v1), compat.as_bytes(v2)) @@ -1054,14 +1054,14 @@ class LMDBReaderTest(test.TestCase): def testReadFromFileRepeatedly(self): with self.test_session() as sess: reader = io_ops.LMDBReader(name="test_read_from_file_repeated") - filename_queue = input_lib.string_input_producer([self.db_path], - num_epochs=None) + filename_queue = input_lib.string_input_producer( + [self.db_path], num_epochs=None) key, value = reader.read(filename_queue) coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) # Iterate over the lmdb 3 times. - for i in range(3): + for _ in range(3): # Go over all 10 records each time. for j in range(10): k, v = sess.run([key, value]) @@ -1071,5 +1071,6 @@ class LMDBReaderTest(test.TestCase): coord.request_stop() coord.join(threads) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index 3a02f24902..99f9f09690 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -380,7 +380,7 @@ class UnsortedSegmentSumTest(SegmentReductionHelper): # Replace np_ans[8] with 0 for the value np_ans[8:] = 0 # Replace 8 with -1 in indices - np.place(indices, indices==8, [-1]) + np.place(indices, indices == 8, [-1]) s = math_ops.unsorted_segment_sum( data=tf_x, segment_ids=indices, num_segments=num_segments) tf_ans = s.eval() diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py index 04758ce45a..6390b7c518 100644 --- a/tensorflow/python/kernel_tests/unique_op_test.py +++ b/tensorflow/python/kernel_tests/unique_op_test.py @@ -87,6 +87,7 @@ class UniqueTest(test.TestCase): for i in range(len(x)): self.assertEqual(x[i], tf_y[tf_idx[i]]) + class UniqueWithCountsTest(test.TestCase): def testInt32(self): diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index 4d5fb97845..83237b8733 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -267,34 +267,34 @@ class BatchNormalization(base.Layer): self.axis[idx] = x + 1 # Account for added dimension if self.scale: - self.gamma = self.add_variable(name='gamma', - shape=param_shape, - dtype=param_dtype, - initializer=self.gamma_initializer, - regularizer=self.gamma_regularizer, - constraint=self.gamma_constraint, - trainable=True) + self.gamma = self.add_variable( + name='gamma', + shape=param_shape, + dtype=param_dtype, + initializer=self.gamma_initializer, + regularizer=self.gamma_regularizer, + constraint=self.gamma_constraint, + trainable=True) else: self.gamma = None if self.fused: - self._gamma_const = array_ops.constant(1.0, - dtype=param_dtype, - shape=param_shape) + self._gamma_const = array_ops.constant( + 1.0, dtype=param_dtype, shape=param_shape) if self.center: - self.beta = self.add_variable(name='beta', - shape=param_shape, - dtype=param_dtype, - initializer=self.beta_initializer, - regularizer=self.beta_regularizer, - constraint=self.beta_constraint, - trainable=True) + self.beta = self.add_variable( + name='beta', + shape=param_shape, + dtype=param_dtype, + initializer=self.beta_initializer, + regularizer=self.beta_regularizer, + constraint=self.beta_constraint, + trainable=True) else: self.beta = None if self.fused: - self._beta_const = array_ops.constant(0.0, - dtype=param_dtype, - shape=param_shape) + self._beta_const = array_ops.constant( + 0.0, dtype=param_dtype, shape=param_shape) # Disable variable partitioning when creating the moving mean and variance try: @@ -327,11 +327,12 @@ class BatchNormalization(base.Layer): # stack to be cleared. The nested ones use a `lambda` to set the desired # device and ignore any devices that may be set by the custom getter. def _renorm_variable(name, shape): - var = self.add_variable(name=name, - shape=shape, - dtype=param_dtype, - initializer=init_ops.zeros_initializer(), - trainable=False) + var = self.add_variable( + name=name, + shape=shape, + dtype=param_dtype, + initializer=init_ops.zeros_initializer(), + trainable=False) return var with ops.device(None): diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py index b2876c58c2..7c91c3284e 100644 --- a/tensorflow/python/layers/normalization_test.py +++ b/tensorflow/python/layers/normalization_test.py @@ -101,15 +101,13 @@ class BNTest(test.TestCase): loss_val = sess.run(loss, feed_dict={image: image_val}) return loss_val - def _trainEvalSequence(self, - dtype, - train1_use_gpu, - train2_use_gpu, + def _trainEvalSequence(self, dtype, train1_use_gpu, train2_use_gpu, infer_use_gpu): batch, height, width, input_channels = 2, 4, 5, 3 shape = [batch, height, width, input_channels] checkpoint = os.path.join(self.get_temp_dir(), 'cp_%s_%s_%s_%s' % - (dtype, train1_use_gpu, train2_use_gpu, infer_use_gpu)) + (dtype, train1_use_gpu, train2_use_gpu, + infer_use_gpu)) self._train( checkpoint, @@ -130,30 +128,27 @@ class BNTest(test.TestCase): dtype=dtype) np.random.seed(0) - image_val = np.random.rand(batch, - height, - width, - input_channels).astype(dtype.as_numpy_dtype) - loss_val = self._infer(checkpoint, image_val, shape, - use_gpu=infer_use_gpu, is_fused=True) + image_val = np.random.rand(batch, height, width, input_channels).astype( + dtype.as_numpy_dtype) + loss_val = self._infer( + checkpoint, image_val, shape, use_gpu=infer_use_gpu, is_fused=True) return train_vars, loss_val def testHalfPrecision(self): - ref_vars, ref_loss = self._trainEvalSequence(dtype=dtypes.float32, - train1_use_gpu=True, - train2_use_gpu=True, - infer_use_gpu=True) - + ref_vars, ref_loss = self._trainEvalSequence( + dtype=dtypes.float32, + train1_use_gpu=True, + train2_use_gpu=True, + infer_use_gpu=True) + self.assertEqual(len(ref_vars), 5) for train1_use_gpu in [True, False]: for train2_use_gpu in [True, False]: for infer_use_gpu in [True, False]: - test_vars, test_loss = self._trainEvalSequence(dtypes.float16, - train1_use_gpu, - train2_use_gpu, - infer_use_gpu) + test_vars, test_loss = self._trainEvalSequence( + dtypes.float16, train1_use_gpu, train2_use_gpu, infer_use_gpu) self.assertEqual(len(test_vars), 5) for test_var, ref_var in zip(test_vars, ref_vars): self.assertAllClose(test_var, ref_var, rtol=1.e-3, atol=1.e-3) @@ -281,9 +276,8 @@ class BNTest(test.TestCase): def testCreateFusedBNFloat16(self): # Call layer. bn = normalization_layers.BatchNormalization(axis=1, fused=True) - inputs = random_ops.random_uniform((5, 4, 3, 3), - seed=1, - dtype=dtypes.float16) + inputs = random_ops.random_uniform( + (5, 4, 3, 3), seed=1, dtype=dtypes.float16) training = array_ops.placeholder(dtype='bool') outputs = bn.apply(inputs, training=training) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 43238757c7..38eff54c69 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1194,18 +1194,19 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None): "Number of mask dimensions must be specified, even if some dimensions" " are None. E.g. shape=[None] is ok, but shape=None is not.") axis = 0 if axis is None else axis - shape_tensor[axis:axis+ndims_mask].assert_is_compatible_with(shape_mask) + shape_tensor[axis:axis + ndims_mask].assert_is_compatible_with(shape_mask) - leading_size = gen_math_ops._prod(shape(tensor)[axis:axis+ndims_mask], [0]) + leading_size = gen_math_ops._prod( + shape(tensor)[axis:axis + ndims_mask], [0]) tensor = reshape(tensor, - concat([shape(tensor)[:axis], - [leading_size], - shape(tensor)[axis+ndims_mask:]], 0)) - first_dim = shape_tensor[axis:axis+ndims_mask].num_elements() + concat([ + shape(tensor)[:axis], [leading_size], + shape(tensor)[axis + ndims_mask:] + ], 0)) + first_dim = shape_tensor[axis:axis + ndims_mask].num_elements() tensor.set_shape( - tensor_shape.as_shape(shape_tensor[:axis]) - .concatenate([first_dim]) - .concatenate(shape_tensor[axis+ndims_mask:])) + tensor_shape.as_shape(shape_tensor[:axis]).concatenate([first_dim]) + .concatenate(shape_tensor[axis + ndims_mask:])) mask = reshape(mask, [-1]) return _apply_mask_1d(tensor, mask, axis) diff --git a/tensorflow/python/ops/distributions/multinomial.py b/tensorflow/python/ops/distributions/multinomial.py index d49fac59ca..04762565c2 100644 --- a/tensorflow/python/ops/distributions/multinomial.py +++ b/tensorflow/python/ops/distributions/multinomial.py @@ -23,10 +23,10 @@ from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import functional_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops -from tensorflow.python.ops import functional_ops from tensorflow.python.ops.distributions import distribution from tensorflow.python.ops.distributions import util as distribution_util @@ -243,25 +243,26 @@ class Multinomial(distribution.Distribution): n_draws[..., array_ops.newaxis], dtype=self.logits.dtype) * self.logits # flatten the total_count and logits - flat_logits = array_ops.reshape(logits, [-1, k]) # [B1B2...Bm, k] - flat_ndraws = n * array_ops.reshape(n_draws, [-1]) # [B1B2...Bm] + flat_logits = array_ops.reshape(logits, [-1, k]) # [B1B2...Bm, k] + flat_ndraws = n * array_ops.reshape(n_draws, [-1]) # [B1B2...Bm] # computes each total_count and logits situation by map_fn def _sample_single(args): - logits, n_draw = args[0], args[1] # [K], [] - x = random_ops.multinomial(logits[array_ops.newaxis, ...], - n_draw, seed) # [1, n*n_draw] - x = array_ops.reshape(x, shape=[n, -1]) # [n, n_draw] - x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), axis=-2) # [n, k] + logits, n_draw = args[0], args[1] # [K], [] + x = random_ops.multinomial(logits[array_ops.newaxis, ...], n_draw, + seed) # [1, n*n_draw] + x = array_ops.reshape(x, shape=[n, -1]) # [n, n_draw] + x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), axis=-2) # [n, k] return x - x = functional_ops.map_fn(_sample_single, - [flat_logits, flat_ndraws], - dtype=self.dtype) # [B1B2...Bm, n, k] + + x = functional_ops.map_fn( + _sample_single, [flat_logits, flat_ndraws], + dtype=self.dtype) # [B1B2...Bm, n, k] # reshape the results to proper shape x = array_ops.transpose(x, perm=[1, 0, 2]) final_shape = array_ops.concat([[n], self.batch_shape_tensor(), [k]], 0) - x = array_ops.reshape(x, final_shape) # [n, B1, B2,..., Bm, k] + x = array_ops.reshape(x, final_shape) # [n, B1, B2,..., Bm, k] return x @distribution_util.AppendDocstring(_multinomial_sample_note) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 7c23321ca5..b9c89d62d5 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1119,9 +1119,8 @@ def rgb_to_grayscale(images, name=None): # https://en.wikipedia.org/wiki/Luma_%28video%29 rgb_weights = [0.2989, 0.5870, 0.1140] rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0) - gray_float = math_ops.reduce_sum(flt_image * rgb_weights, - rank_1, - keepdims=True) + gray_float = math_ops.reduce_sum( + flt_image * rgb_weights, rank_1, keepdims=True) gray_float.set_shape(images.get_shape()[:-1].concatenate([1])) return convert_image_dtype(gray_float, orig_dtype, name=name) diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index 14a039ffd0..be9beee633 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -30,7 +30,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops.gen_linalg_ops import * # pylint: enable=wildcard-import from tensorflow.python.util import compat -from tensorflow.python.util.deprecation import deprecated_args +from tensorflow.python.util import deprecation # Names below are lower_case. # pylint: disable=invalid-name @@ -439,9 +439,13 @@ def svd(tensor, full_matrices=False, compute_uv=True, name=None): # pylint: disable=redefined-builtin -@deprecated_args(None, "keep_dims is deprecated, use keepdims instead", - "keep_dims") -def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None, +@deprecation.deprecated_args( + None, 'keep_dims is deprecated, use keepdims instead', 'keep_dims') +def norm(tensor, + ord='euclidean', + axis=None, + keepdims=None, + name=None, keep_dims=None): r"""Computes the norm of vectors, matrices, and tensors. @@ -478,6 +482,7 @@ def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None, keepdims: If True, the axis indicated in `axis` are kept with size 1. Otherwise, the dimensions in `axis` are removed from the output shape. name: The name of the op. + keep_dims: Deprecated alias for `keepdims`. Returns: output: A `Tensor` of the same type as tensor, containing the vector or @@ -500,11 +505,8 @@ def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None, higher order tensors. @end_compatibility """ - - if keep_dims is not None: - if keepdims is not None: - raise ValueError("Cannot specify both 'keep_dims' and 'keepdims'") - keepdims = keep_dims + keepdims = deprecation.deprecated_argument_lookup('keepdims', keepdims, + 'keep_dims', keep_dims) if keepdims is None: keepdims = False @@ -555,8 +557,8 @@ def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None, else: # General p-norms (positive p only) result = math_ops.pow( - math_ops.reduce_sum( - math_ops.pow(result, ord), axis, keepdims=True), 1.0 / ord) + math_ops.reduce_sum(math_ops.pow(result, ord), axis, keepdims=True), + 1.0 / ord) if not keepdims: result = array_ops.squeeze(result, axis) return result diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py index d30f6b92ad..e04121ee31 100644 --- a/tensorflow/python/ops/metrics_impl.py +++ b/tensorflow/python/ops/metrics_impl.py @@ -792,9 +792,10 @@ def mean_cosine_distance(labels, predictions, dim, weights=None, predictions, labels, weights = _remove_squeezable_dimensions( predictions=predictions, labels=labels, weights=weights) radial_diffs = math_ops.multiply(predictions, labels) - radial_diffs = math_ops.reduce_sum(radial_diffs, - reduction_indices=[dim,], - keepdims=True) + radial_diffs = math_ops.reduce_sum( + radial_diffs, reduction_indices=[ + dim, + ], keepdims=True) mean_distance, update_op = mean(radial_diffs, weights, None, None, diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index da037a7983..654eb1c118 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -333,6 +333,7 @@ def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None): epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the divisor if `norm < sqrt(epsilon)`. name: A name for this operation (optional). + dim: Deprecated alias for axis. Returns: A `Tensor` with the same shape as `x`. diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 61fa462988..ec7b9372ca 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -23,7 +23,6 @@ import numbers import numpy as np from tensorflow.python.eager import context -from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import graph_util from tensorflow.python.framework import ops @@ -38,11 +37,10 @@ from tensorflow.python.ops import random_ops # pylint: disable=wildcard-import from tensorflow.python.ops.gen_nn_ops import * # pylint: enable=wildcard-import -from tensorflow.python.util.deprecation import deprecated_args -from tensorflow.python.util.deprecation import deprecated_argument_lookup from tensorflow.python.util import deprecation + # Aliases for some automatically-generated names. local_response_normalization = gen_nn_ops.lrn @@ -1648,7 +1646,7 @@ def _softmax(logits, compute_op, dim=-1, name=None): return output -@deprecated_args(None, "dim is deprecated, use axis instead", "dim") +@deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim") def softmax(logits, axis=None, name=None, dim=None): """Computes softmax activations. @@ -1662,6 +1660,7 @@ def softmax(logits, axis=None, name=None, dim=None): axis: The dimension softmax would be performed on. The default is -1 which indicates the last dimension. name: A name for the operation (optional). + dim: Deprecated alias for `axis`. Returns: A `Tensor`. Has the same type and shape as `logits`. @@ -1670,13 +1669,13 @@ def softmax(logits, axis=None, name=None, dim=None): InvalidArgumentError: if `logits` is empty or `axis` is beyond the last dimension of `logits`. """ - axis = deprecated_argument_lookup("axis", axis, "dim", dim) + axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim) if axis is None: axis = -1 return _softmax(logits, gen_nn_ops._softmax, axis, name) -@deprecated_args(None, "dim is deprecated, use axis instead", "dim") +@deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim") def log_softmax(logits, axis=None, name=None, dim=None): """Computes log softmax activations. @@ -1690,6 +1689,7 @@ def log_softmax(logits, axis=None, name=None, dim=None): axis: The dimension softmax would be performed on. The default is -1 which indicates the last dimension. name: A name for the operation (optional). + dim: Deprecated alias for `axis`. Returns: A `Tensor`. Has the same type as `logits`. Same shape as `logits`. @@ -1698,7 +1698,7 @@ def log_softmax(logits, axis=None, name=None, dim=None): InvalidArgumentError: if `logits` is empty or `axis` is beyond the last dimension of `logits`. """ - axis = deprecated_argument_lookup("axis", axis, "dim", dim) + axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim) if axis is None: axis = -1 return _softmax(logits, gen_nn_ops._log_softmax, axis, name) @@ -2316,13 +2316,14 @@ def conv1d(value, filters, stride, padding, return array_ops.squeeze(result, [spatial_start_dim]) -def conv1d_transpose(value, - filter, - output_shape, - stride, - padding="SAME", - data_format="NWC", - name=None): +def conv1d_transpose( + value, + filter, # pylint: disable=redefined-builtin + output_shape, + stride, + padding="SAME", + data_format="NWC", + name=None): """The transpose of `conv1d`. This operation is sometimes called "deconvolution" after [Deconvolutional @@ -2357,8 +2358,8 @@ def conv1d_transpose(value, [value, filter, output_shape]) as name: output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape") if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(3)): - raise ValueError("output_shape must have shape (3,), got {}" - .format(output_shape_.get_shape())) + raise ValueError("output_shape must have shape (3,), got {}".format( + output_shape_.get_shape())) # The format could be either NWC or NCW, map to NHWC or NCHW if data_format is None or data_format == "NWC": @@ -2380,7 +2381,8 @@ def conv1d_transpose(value, if not filter.get_shape()[1].is_compatible_with(output_shape[axis]): raise ValueError( "output_shape does not match filter's output channels, " - "{} != {}".format(output_shape[axis], filter.get_shape()[1])) + "{} != {}".format(output_shape[axis], + filter.get_shape()[1])) if padding != "VALID" and padding != "SAME": raise ValueError("padding must be either VALID or SAME:" @@ -2388,25 +2390,26 @@ def conv1d_transpose(value, # Reshape the input tensor to [batch, 1, in_width, in_channels] if data_format_2d == "NHWC": - output_shape_ = array_ops.concat([output_shape_[:1], [1], - output_shape_[1:]], axis=0) + output_shape_ = array_ops.concat( + [output_shape_[:1], [1], output_shape_[1:]], axis=0) spatial_start_dim = 1 strides = [1, 1, stride, 1] else: - output_shape_ = array_ops.concat([output_shape_[:2], [1], - output_shape_[2:]], axis=0) + output_shape_ = array_ops.concat( + [output_shape_[:2], [1], output_shape_[2:]], axis=0) spatial_start_dim = 2 strides = [1, 1, 1, stride] value = array_ops.expand_dims(value, spatial_start_dim) filter = array_ops.expand_dims(filter, 0) - result = gen_nn_ops.conv2d_backprop_input(input_sizes=output_shape_, - filter=filter, - out_backprop=value, - strides=strides, - padding=padding, - data_format=data_format_2d, - name=name) + result = gen_nn_ops.conv2d_backprop_input( + input_sizes=output_shape_, + filter=filter, + out_backprop=value, + strides=strides, + padding=padding, + data_format=data_format_2d, + name=name) return array_ops.squeeze(result, [spatial_start_dim]) diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc index 29fd6d0e87..6fd0e69905 100644 --- a/tensorflow/stream_executor/dnn.cc +++ b/tensorflow/stream_executor/dnn.cc @@ -470,6 +470,7 @@ string ConvolutionDescriptor::ToShortString() const { PoolingDescriptor::PoolingDescriptor(int ndims) : mode_(dnn::PoolingMode::kMaximum), ndims_(ndims), + propagate_nans_(false), window_(ndims, 0), padding_(ndims, 0), strides_(ndims, 1), diff --git a/third_party/sycl/crosstool/trisycl.tpl b/third_party/sycl/crosstool/trisycl.tpl index b470772fbf..87a70d8f95 100644 --- a/third_party/sycl/crosstool/trisycl.tpl +++ b/third_party/sycl/crosstool/trisycl.tpl @@ -11,10 +11,12 @@ CPU_C_COMPILER = ('%{host_c_compiler}') CURRENT_DIR = os.path.dirname(sys.argv[0]) TRISYCL_INCLUDE_DIR = CURRENT_DIR + '/../sycl/include' + def main(): compiler_flags = [] - remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable', '-Wignored-attributes', '-fno-exceptions') + remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable', + '-Wignored-attributes', '-fno-exceptions') # remove -fsamotoze-coverage from string with g++ if 'g++' in CPU_CXX_COMPILER: remove_flags += ('-fsanitize-coverage',) @@ -22,52 +24,62 @@ def main(): else: compiler_flags += ['-fopenmp=libomp'] - compiler_flags += [flag for flag in sys.argv[1:] if not flag.startswith(remove_flags)] - + compiler_flags += [ + flag for flag in sys.argv[1:] if not flag.startswith(remove_flags) + ] output_file_index = compiler_flags.index('-o') + 1 output_file_name = compiler_flags[output_file_index] - if(output_file_index == 1): + if (output_file_index == 1): # we are linking - return call([CPU_CXX_COMPILER] + compiler_flags + - ['-Wl,--no-undefined']) + return call([CPU_CXX_COMPILER] + compiler_flags + ['-Wl,--no-undefined']) # find what we compile compiling_cpp = 0 - if('-c' in compiler_flags): - compiled_file_index = compiler_flags.index('-c') + 1 - compiled_file_name = compiler_flags[compiled_file_index] - if(compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', - '.C', '.cxx'))): - compiling_cpp = 1; - - debug_flags = ['-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL', '-lpthread', '-lboost_log', '-g', '-rdynamic'] + if ('-c' in compiler_flags): + compiled_file_index = compiler_flags.index('-c') + 1 + compiled_file_name = compiler_flags[compiled_file_index] + if (compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', '.C', + '.cxx'))): + compiling_cpp = 1 + + debug_flags = [ + '-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL', + '-lpthread', '-lboost_log', '-g', '-rdynamic' + ] opt_flags = ['-DNDEBUG', '-DBOOST_DISABLE_ASSERTS', '-O3'] - compiler_flags = compiler_flags + ['-DEIGEN_USE_SYCL=1', - '-DEIGEN_HAS_C99_MATH', - '-DEIGEN_MAX_ALIGN_BYTES=16', - '-DTENSORFLOW_USE_SYCL'] + opt_flags + compiler_flags = compiler_flags + [ + '-DEIGEN_USE_SYCL=1', '-DEIGEN_HAS_C99_MATH', + '-DEIGEN_MAX_ALIGN_BYTES=16', '-DTENSORFLOW_USE_SYCL' + ] + opt_flags - if(compiling_cpp == 1): + if (compiling_cpp == 1): # create a blacklist of folders that will be skipped when compiling # with triSYCL - skip_extensions = [".cu.cc"] - skip_folders = ["tensorflow/compiler", "tensorflow/docs_src", "tensorflow/tensorboard", "third_party", "external", "hexagon"] + skip_extensions = ['.cu.cc'] + skip_folders = [ + 'tensorflow/compiler', 'tensorflow/docs_src', 'tensorflow/tensorboard', + 'third_party', 'external', 'hexagon' + ] skip_folders = [(folder + '/') for folder in skip_folders] # if compiling external project skip triSYCL - if any(compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any(_folder in output_file_name for _folder in skip_folders): + if any( + compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any( + _folder in output_file_name for _folder in skip_folders): return call([CPU_CXX_COMPILER] + compiler_flags) - host_compiler_flags = ['-xc++', '-Wno-unused-variable', - '-I', TRISYCL_INCLUDE_DIR] + compiler_flags + host_compiler_flags = [ + '-xc++', '-Wno-unused-variable', '-I', TRISYCL_INCLUDE_DIR + ] + compiler_flags x = call([CPU_CXX_COMPILER] + host_compiler_flags) return x else: # compile for C return call([CPU_C_COMPILER] + compiler_flags) + if __name__ == '__main__': sys.exit(main()) diff --git a/third_party/sycl/sycl_configure.bzl b/third_party/sycl/sycl_configure.bzl index a0c9e4e43a..5b9d0eb383 100644 --- a/third_party/sycl/sycl_configure.bzl +++ b/third_party/sycl/sycl_configure.bzl @@ -67,7 +67,6 @@ def find_computecpp_root(repository_ctx): def find_trisycl_include_dir(repository_ctx): """Find triSYCL include directory. """ - sycl_name = "" if _TRISYCL_INCLUDE_DIR in repository_ctx.os.environ: sycl_name = repository_ctx.os.environ[_TRISYCL_INCLUDE_DIR].strip() if sycl_name.startswith("/"): -- GitLab From 0c98a7ecf88da45469cf00edc5cc4c0c82c7d49f Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 22 Nov 2017 00:15:55 -0800 Subject: [PATCH 0745/1801] Remove duplicate propagate_nans_(false). PiperOrigin-RevId: 176619303 --- tensorflow/stream_executor/dnn.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc index 6fd0e69905..44144a0613 100644 --- a/tensorflow/stream_executor/dnn.cc +++ b/tensorflow/stream_executor/dnn.cc @@ -473,8 +473,7 @@ PoolingDescriptor::PoolingDescriptor(int ndims) propagate_nans_(false), window_(ndims, 0), padding_(ndims, 0), - strides_(ndims, 1), - propagate_nans_(false) {} + strides_(ndims, 1) {} PoolingDescriptor::PoolingDescriptor() : PoolingDescriptor(/*ndims=*/2) {} -- GitLab From ad7eeec1cc06d7fdba6ee404f03a35fab9cd3e6a Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 22 Nov 2017 00:33:29 -0800 Subject: [PATCH 0746/1801] Automated g4 rollback of changelist 176615737 PiperOrigin-RevId: 176621645 --- configure.py | 19 ++- tensorflow/compiler/aot/tfcompile.bzl | 8 +- .../compiler/tests/fused_batchnorm_test.py | 6 +- .../contrib/android/cmake/CMakeLists.txt | 2 +- .../python/kernel_tests/cauchy_test.py | 47 +++---- .../distributions/python/ops/cauchy.py | 18 ++- .../contrib/layers/python/layers/layers.py | 1 + .../layers/python/layers/layers_test.py | 28 ++-- .../python/learn/learn_io/data_feeder.py | 6 +- .../linear_optimizer/python/ops/sdca_ops.py | 5 +- .../contrib/lite/testing/generate_examples.py | 3 +- tensorflow/contrib/opt/__init__.py | 16 +-- .../training/multitask_optimizer_wrapper.py | 60 ++++---- .../multitask_optimizer_wrapper_test.py | 40 +++--- .../python/kernel_tests/core_rnn_cell_test.py | 31 ++-- .../rnn/python/kernel_tests/rnn_cell_test.py | 63 +++++---- tensorflow/contrib/rnn/python/ops/rnn_cell.py | 98 ++++++------- .../seq2seq/python/ops/attention_wrapper.py | 7 +- tensorflow/contrib/verbs/rdma.cc | 61 ++++---- .../api_def/base_api/api_def_UniqueV2.pbtxt | 47 ------- .../base_api/api_def_UnsortedSegmentSum.pbtxt | 2 - tensorflow/core/graph/graph.h | 2 +- tensorflow/core/graph/graph_test.cc | 8 +- tensorflow/core/kernels/BUILD | 1 - tensorflow/core/kernels/bincount_op.cc | 5 +- tensorflow/core/kernels/bincount_op.h | 2 +- tensorflow/core/kernels/bincount_op_gpu.cu.cc | 6 +- tensorflow/core/kernels/bincount_op_test.cc | 4 +- .../core/kernels/bucketize_op_gpu.cu.cc | 8 +- tensorflow/core/kernels/conv_grad_ops_3d.cc | 24 ++-- tensorflow/core/kernels/cwise_op_asinh.cc | 2 +- tensorflow/core/kernels/cwise_op_atanh.cc | 2 +- .../core/kernels/depthwise_conv_grad_op.cc | 9 +- tensorflow/core/kernels/depthwise_conv_op.cc | 5 - tensorflow/core/kernels/depthwise_conv_op.h | 3 +- tensorflow/core/kernels/maxpooling_op.cc | 14 +- .../core/kernels/maxpooling_op_gpu.cu.cc | 16 +-- tensorflow/core/kernels/mkl_tfconv_op.h | 20 +-- tensorflow/core/kernels/ops_util.h | 13 -- tensorflow/core/platform/posix/error.cc | 4 +- tensorflow/core/platform/posix/port.cc | 8 +- tensorflow/core/util/cuda_kernel_helper.h | 12 -- tensorflow/core/util/mkl_util.h | 132 +++++++++--------- tensorflow/core/util/mkl_util_test.cc | 1 + .../test/java/org/tensorflow/ShapeTest.java | 6 +- .../python/estimator/inputs/numpy_io.py | 17 +-- .../python/estimator/inputs/numpy_io_test.py | 11 +- tensorflow/python/framework/test_util.py | 7 +- .../python/kernel_tests/array_ops_test.py | 11 +- .../python/kernel_tests/bincount_op_test.py | 7 +- .../python/kernel_tests/constant_op_test.py | 17 +-- tensorflow/python/kernel_tests/conv1d_test.py | 2 +- .../python/kernel_tests/conv_ops_3d_test.py | 120 +++++++++------- .../python/kernel_tests/pooling_ops_test.py | 3 +- .../python/kernel_tests/reader_ops_test.py | 15 +- .../segment_reduction_ops_test.py | 2 +- .../python/kernel_tests/unique_op_test.py | 1 - tensorflow/python/layers/normalization.py | 51 ++++--- .../python/layers/normalization_test.py | 40 +++--- tensorflow/python/ops/array_ops.py | 19 ++- .../python/ops/distributions/multinomial.py | 25 ++-- tensorflow/python/ops/image_ops_impl.py | 5 +- tensorflow/python/ops/linalg_ops.py | 24 ++-- tensorflow/python/ops/metrics_impl.py | 7 +- tensorflow/python/ops/nn_impl.py | 1 - tensorflow/python/ops/nn_ops.py | 59 ++++---- tensorflow/stream_executor/dnn.cc | 1 - third_party/sycl/crosstool/trisycl.tpl | 60 ++++---- third_party/sycl/sycl_configure.bzl | 1 + 69 files changed, 646 insertions(+), 735 deletions(-) delete mode 100644 tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt diff --git a/configure.py b/configure.py index 1f205861f1..26da09bd94 100644 --- a/configure.py +++ b/configure.py @@ -883,28 +883,27 @@ def set_computecpp_toolkit_path(environ_cp): write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH', computecpp_toolkit_path) - def set_trisycl_include_dir(environ_cp): - """Set TRISYCL_INCLUDE_DIR.""" + """Set TRISYCL_INCLUDE_DIR""" ask_trisycl_include_dir = ('Please specify the location of the triSYCL ' 'include directory. (Use --config=sycl_trisycl ' 'when building with Bazel) ' - '[Default is %s]: ') % ( - _DEFAULT_TRISYCL_INCLUDE_DIR) + '[Default is %s]: ' + ) % (_DEFAULT_TRISYCL_INCLUDE_DIR) while True: trisycl_include_dir = get_from_env_or_user_or_default( - environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir, - _DEFAULT_TRISYCL_INCLUDE_DIR) + environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir, + _DEFAULT_TRISYCL_INCLUDE_DIR) if os.path.exists(trisycl_include_dir): break - print('Invalid triSYCL include directory, %s cannot be found' % - (trisycl_include_dir)) + print('Invalid triSYCL include directory, %s cannot be found' + % (trisycl_include_dir)) # Set TRISYCL_INCLUDE_DIR environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir - write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', trisycl_include_dir) - + write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', + trisycl_include_dir) def set_mpi_home(environ_cp): """Set MPI_HOME.""" diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl index 6c385af3b3..b795afd5b8 100644 --- a/tensorflow/compiler/aot/tfcompile.bzl +++ b/tensorflow/compiler/aot/tfcompile.bzl @@ -119,7 +119,7 @@ def tf_library(name, graph, config, out_nodes_file, ] + freeze_saver_srcs, outs=[freeze_file], - cmd=("$(location @org_tensorflow//tensorflow/python/tools:freeze_graph)" + + cmd=("$(location //tensorflow/python/tools:freeze_graph)" + freeze_args), tools=["@org_tensorflow//tensorflow/python/tools:freeze_graph"], tags=tags, @@ -152,7 +152,7 @@ def tf_library(name, graph, config, " --target_triple=" + target_llvm_triple() + " --out_header=$(@D)/" + header_file + " --out_object=$(@D)/" + object_file + - " " + flags), + flags), tools=[tfcompile_tool], visibility=visibility, testonly=testonly, @@ -189,7 +189,7 @@ def tf_library(name, graph, config, " --cpp_class=" + cpp_class + " --target_triple=" + target_llvm_triple() + " --out_session_module=$(@D)/" + session_module_pb + - " " + flags), + flags), tools=[tfcompile_tool], visibility=visibility, testonly=testonly, @@ -256,7 +256,7 @@ def tf_library(name, graph, config, ], outs=[test_file], cmd=("sed " + sed_replace + - " $(location @org_tensorflow//tensorflow/compiler/aot:test.cc) " + + " $(location //tensorflow/compiler/aot:test.cc) " + "> $(OUTS)"), tags=tags, ) diff --git a/tensorflow/compiler/tests/fused_batchnorm_test.py b/tensorflow/compiler/tests/fused_batchnorm_test.py index 00a9c9a65b..a773b5a947 100644 --- a/tensorflow/compiler/tests/fused_batchnorm_test.py +++ b/tensorflow/compiler/tests/fused_batchnorm_test.py @@ -76,8 +76,7 @@ class FusedBatchNormTest(XLATestCase): # To avoid constant folding t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x") scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale") - offset = array_ops.placeholder( - np.float32, shape=scale_shape, name="offset") + offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset") epsilon = 0.001 y_ref, mean_ref, var_ref = self._reference_training( x_val, scale_val, offset_val, epsilon, data_format) @@ -113,8 +112,7 @@ class FusedBatchNormTest(XLATestCase): # To avoid constant folding t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x") scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale") - offset = array_ops.placeholder( - np.float32, shape=scale_shape, name="offset") + offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset") epsilon = 0.001 y, mean, var = nn.fused_batch_norm( t_val, diff --git a/tensorflow/contrib/android/cmake/CMakeLists.txt b/tensorflow/contrib/android/cmake/CMakeLists.txt index aba356d616..25ada5ba27 100644 --- a/tensorflow/contrib/android/cmake/CMakeLists.txt +++ b/tensorflow/contrib/android/cmake/CMakeLists.txt @@ -37,7 +37,7 @@ set_target_properties(lib_tf PROPERTIES IMPORTED_LOCATION set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIS_SLIM_BUILD \ -std=c++11 -fno-rtti -fno-exceptions \ -O2 -Wno-narrowing -fomit-frame-pointer \ - -mfpu=neon -mfloat-abi=softfp -fPIE -fPIC \ + -mfpu=neon -mfloat-abi=softfp -fPIE \ -ftemplate-depth=900 \ -DGOOGLE_PROTOBUF_NO_RTTI \ -DGOOGLE_PROTOBUF_NO_STATIC_INITIALIZER") diff --git a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py index 73747db31c..7f7697357c 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py @@ -41,7 +41,6 @@ def try_import(name): # pylint: disable=invalid-name tf_logging.warning("Could not import %s: %s" % (name, str(e))) return module - stats = try_import("scipy.stats") @@ -63,9 +62,9 @@ class CauchyTest(test.TestCase): self.assertAllEqual(expected, scale_shape.eval()) loc = array_ops.zeros(loc_shape) scale = array_ops.ones(scale_shape) - self.assertAllEqual(expected, - array_ops.shape( - cauchy_lib.Cauchy(loc, scale).sample()).eval()) + self.assertAllEqual( + expected, + array_ops.shape(cauchy_lib.Cauchy(loc, scale).sample()).eval()) def _testParamStaticShapes(self, sample_shape, expected): param_shapes = cauchy_lib.Cauchy.param_static_shapes(sample_shape) @@ -93,7 +92,8 @@ class CauchyTest(test.TestCase): cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) log_pdf = cauchy.log_prob(x) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), + log_pdf.shape) self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.eval().shape) self.assertAllEqual(cauchy.batch_shape, log_pdf.shape) @@ -115,15 +115,16 @@ class CauchyTest(test.TestCase): with self.test_session(): batch_size = 6 loc = constant_op.constant([[3.0, -3.0]] * batch_size) - scale = constant_op.constant( - [[np.sqrt(10.0), np.sqrt(15.0)]] * batch_size) + scale = constant_op.constant([[np.sqrt(10.0), np.sqrt(15.0)]] * + batch_size) x = np.array([[-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]], dtype=np.float32).T cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) log_pdf = cauchy.log_prob(x) log_pdf_values = log_pdf.eval() self.assertEqual(log_pdf.shape, (6, 2)) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), + log_pdf.shape) self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.eval().shape) self.assertAllEqual(cauchy.batch_shape, log_pdf.shape) @@ -247,7 +248,8 @@ class CauchyTest(test.TestCase): cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) entropy = cauchy.entropy() - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), entropy.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), + entropy.shape) self.assertAllEqual(cauchy.batch_shape_tensor().eval(), entropy.eval().shape) self.assertAllEqual(cauchy.batch_shape, entropy.shape) @@ -255,7 +257,7 @@ class CauchyTest(test.TestCase): if not stats: return - expected_entropy = stats.cauchy(loc, scale[0]).entropy().reshape((1, 3)) + expected_entropy = stats.cauchy(loc, scale).entropy() self.assertAllClose(expected_entropy, entropy.eval()) def testCauchyMode(self): @@ -366,8 +368,8 @@ class CauchyTest(test.TestCase): self.assertAllEqual(expected_shape, samples.shape) self.assertAllEqual(expected_shape, sample_values.shape) - expected_shape = ( - tensor_shape.TensorShape([n.eval()]).concatenate(cauchy.batch_shape)) + expected_shape = (tensor_shape.TensorShape( + [n.eval()]).concatenate(cauchy.batch_shape)) self.assertAllEqual(expected_shape, samples.shape) self.assertAllEqual(expected_shape, sample_values.shape) @@ -383,18 +385,18 @@ class CauchyTest(test.TestCase): samples = cauchy.sample(n) sample_values = samples.eval() self.assertEqual(samples.shape, (100000, batch_size, 2)) - self.assertAllClose( - np.median(sample_values[:, 0, 0]), loc_v[0], atol=1e-1) - self.assertAllClose( - np.median(sample_values[:, 0, 1]), loc_v[1], atol=1e-1) + self.assertAllClose(np.median(sample_values[:, 0, 0]), + loc_v[0], atol=1e-1) + self.assertAllClose(np.median(sample_values[:, 0, 1]), + loc_v[1], atol=1e-1) expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate( tensor_shape.TensorShape(cauchy.batch_shape_tensor().eval())) self.assertAllEqual(expected_shape, samples.shape) self.assertAllEqual(expected_shape, sample_values.shape) - expected_shape = ( - tensor_shape.TensorShape([n.eval()]).concatenate(cauchy.batch_shape)) + expected_shape = (tensor_shape.TensorShape( + [n.eval()]).concatenate(cauchy.batch_shape)) self.assertAllEqual(expected_shape, samples.shape) self.assertAllEqual(expected_shape, sample_values.shape) @@ -426,12 +428,9 @@ class CauchyTest(test.TestCase): self.assertEqual(cauchy.event_shape, ()) self.assertAllEqual(cauchy.event_shape_tensor().eval(), []) self.assertAllEqual( - sess.run( - cauchy.batch_shape_tensor(), - feed_dict={ - loc: 5.0, - scale: [1.0, 2.0] - }), [2]) + sess.run(cauchy.batch_shape_tensor(), + feed_dict={loc: 5.0, + scale: [1.0, 2.0]}), [2]) if __name__ == "__main__": diff --git a/tensorflow/contrib/distributions/python/ops/cauchy.py b/tensorflow/contrib/distributions/python/ops/cauchy.py index 8d59c1abfb..a17bb091f6 100644 --- a/tensorflow/contrib/distributions/python/ops/cauchy.py +++ b/tensorflow/contrib/distributions/python/ops/cauchy.py @@ -30,6 +30,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops.distributions import distribution + __all__ = [ "Cauchy", ] @@ -96,7 +97,7 @@ class Cauchy(distribution.Distribution): validate_args=False, allow_nan_stats=True, name="Cauchy"): - """Construct Cauchy distributions. + """Construct Cauchy distributions with loc and and scale `loc` and `scale`. The parameters `loc` and `scale` must be shaped in a way that supports broadcasting (e.g. `loc + scale` is a valid operation). @@ -120,8 +121,8 @@ class Cauchy(distribution.Distribution): """ parameters = locals() with ops.name_scope(name, values=[loc, scale]): - with ops.control_dependencies([check_ops.assert_positive(scale)] - if validate_args else []): + with ops.control_dependencies([check_ops.assert_positive(scale)] if + validate_args else []): self._loc = array_ops.identity(loc, name="loc") self._scale = array_ops.identity(scale, name="scale") check_ops.assert_same_float_dtype([self._loc, self._scale]) @@ -137,8 +138,8 @@ class Cauchy(distribution.Distribution): @staticmethod def _param_shapes(sample_shape): return dict( - zip(("loc", "scale"), - ([ops.convert_to_tensor(sample_shape, dtype=dtypes.int32)] * 2))) + zip(("loc", "scale"), ([ops.convert_to_tensor( + sample_shape, dtype=dtypes.int32)] * 2))) @property def loc(self): @@ -152,10 +153,13 @@ class Cauchy(distribution.Distribution): def _batch_shape_tensor(self): return array_ops.broadcast_dynamic_shape( - array_ops.shape(self.loc), array_ops.shape(self.scale)) + array_ops.shape(self.loc), + array_ops.shape(self.scale)) def _batch_shape(self): - return array_ops.broadcast_static_shape(self.loc.shape, self.scale.shape) + return array_ops.broadcast_static_shape( + self.loc.shape, + self.scale.shape) def _event_shape_tensor(self): return constant_op.constant([], dtype=dtypes.int32) diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index f1debc8590..9378fe8799 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -309,6 +309,7 @@ def _fused_batch_norm(inputs, new_shape = [-1, channels, 1, 1] inputs = array_ops.reshape(inputs, new_shape) inputs_shape = inputs.get_shape() + dtype = inputs.dtype.base_dtype if data_format == DATA_FORMAT_NHWC: params_shape = inputs_shape[-1:] else: diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index 27bd3172d6..5aa2253516 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -1779,8 +1779,7 @@ class BatchNormTest(test.TestCase): dtype = dtypes.float32 height, width = 3, 3 with self.test_session(): - images = np.random.uniform(size=(5, height, width, 3)).astype( - dtype.as_numpy_dtype) + images = np.random.uniform(size=(5, height, width, 3)).astype(dtype.as_numpy_dtype) output = _layers.batch_norm(images, fused=fused) expected_name = ('BatchNorm/FusedBatchNorm' if fused else 'BatchNorm/batchnorm') @@ -2666,18 +2665,18 @@ class BatchNormTest(test.TestCase): # Test case for 11673 with self.test_session() as sess: a_32 = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10)) - _layers.batch_norm( - a_32, center=False, data_format='NCHW', zero_debias_moving_mean=True) + b_32 = _layers.batch_norm(a_32, center=False, data_format='NCHW', + zero_debias_moving_mean=True) a_16 = array_ops.placeholder(dtypes.float16, shape=(10, 10, 10, 10)) - _layers.batch_norm( - a_16, center=False, data_format='NCHW', zero_debias_moving_mean=True) + b_16 = _layers.batch_norm(a_16, center=False, data_format='NCHW', + zero_debias_moving_mean=True) sess.run(variables_lib.global_variables_initializer()) def testVariablesAreFloat32(self): height, width = 3, 3 with self.test_session(): - images = random_ops.random_uniform( - (5, height, width, 3), seed=1, dtype=dtypes.float16) + images = random_ops.random_uniform((5, height, width, 3), + seed=1, dtype=dtypes.float16) _layers.batch_norm(images, scale=True) beta = variables.get_variables_by_name('beta')[0] gamma = variables.get_variables_by_name('gamma')[0] @@ -2692,13 +2691,17 @@ class BatchNormTest(test.TestCase): channels = shape[1] images = np.arange(np.product(shape), dtype=dtype).reshape(shape) beta = init_ops.constant_initializer( - np.arange(2, channels + 2, dtype=np.float32)) + np.arange( + 2, channels + 2, dtype=np.float32)) gamma = init_ops.constant_initializer( - np.arange(10, channels + 10, dtype=np.float32) * 2.0) + np.arange( + 10, channels + 10, dtype=np.float32) * 2.0) mean = init_ops.constant_initializer( - np.arange(3, channels + 3, dtype=np.float32) * 5.0) + np.arange( + 3, channels + 3, dtype=np.float32) * 5.0) variance = init_ops.constant_initializer( - np.arange(1, channels + 1, dtype=np.float32) * 4.0) + np.arange( + 1, channels + 1, dtype=np.float32) * 4.0) output = _layers.batch_norm( images, fused=True, @@ -2723,6 +2726,7 @@ class BatchNormTest(test.TestCase): res_16 = self._runFusedBatchNorm(shape, np.float16) self.assertAllClose(res_32, res_16, rtol=1e-3) + def testAdjustmentCreated(self): # Tests that the adjustment is appropriately passed to and used by the core # BN layer. diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py index 86fad4c553..db18ebf05d 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py @@ -28,6 +28,7 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging @@ -368,11 +369,10 @@ class DataFeeder(object): if x_is_dict: num_samples = list(self._x.values())[0].shape[0] elif tensor_util.is_tensor(self._x): - num_samples = self._x.shape[ - 0].value # shape will be a Dimension, extract an int + num_samples = self._x.shape[0].value # shape will be a Dimension, extract an int else: num_samples = self._x.shape[0] - + if self._shuffle: self.indices = self.random_state.permutation(num_samples) else: diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py index 7526f3ae0d..86d8484391 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py @@ -251,9 +251,8 @@ class SdcaModel(object): result_dense = 0.0 for i in range(len(dense_variables)): - result_dense += math_ops.matmul(dense_features[i], - array_ops.expand_dims( - dense_variables[i], -1)) + result_dense += math_ops.matmul( + dense_features[i], array_ops.expand_dims(dense_variables[i], -1)) # Reshaping to allow shape inference at graph construction time. return array_ops.reshape(result_dense, [-1]) + result_sparse diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 5bca82ded0..b122818221 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -40,7 +40,6 @@ from six import StringIO # TODO(aselle): Disable GPU for now os.environ["CUDA_VISIBLE_DEVICES"] = "-1" -# pylint: disable=g-import-not-at-top import tensorflow as tf from google.protobuf import text_format # TODO(aselle): switch to TensorFlow's resource_loader @@ -384,7 +383,7 @@ def make_zip_of_tests(zip_path, report["toco_log"] = "" tf.reset_default_graph() - with tf.device("/cpu:0"): + with tf.device('/cpu:0'): try: inputs, outputs = make_graph(param_dict_real) except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError, diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py index 04643a6058..4c60c99342 100644 --- a/tensorflow/contrib/opt/__init__.py +++ b/tensorflow/contrib/opt/__init__.py @@ -34,18 +34,12 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ - 'PowerSignOptimizer', - 'AddSignOptimizer' + 'PowerSignOptimizer', 'AddSignOptimizer' 'DelayCompensatedGradientDescentOptimizer', - 'DropStaleGradientOptimizer', - 'ExternalOptimizerInterface', - 'LazyAdamOptimizer', - 'NadamOptimizer', - 'MovingAverageOptimizer', - 'ScipyOptimizerInterface', - 'VariableClippingOptimizer', - 'MultitaskOptimizerWrapper', - 'clip_gradients_by_global_norm', + 'DropStaleGradientOptimizer', 'ExternalOptimizerInterface', + 'LazyAdamOptimizer', 'NadamOptimizer', 'MovingAverageOptimizer', + 'ScipyOptimizerInterface', 'VariableClippingOptimizer', + 'MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm', ] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py index cb6c77a86f..c26037935d 100644 --- a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py +++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""An optimizer wrapper for stateful optimizers with multitask loss.""" + +"""An optimizer wrapper that ensures correct behaviour +of stateful optimizers with multitask loss.""" from __future__ import absolute_import from __future__ import division @@ -28,27 +30,26 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.training import optimizer -__all__ = ['MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm'] - +__all__ = ["MultitaskOptimizerWrapper", + "clip_gradients_by_global_norm"] def _is_all_zeros(grad): all_zeros = math_ops.equal(math_ops.count_nonzero(grad), 0) return all_zeros - def _get_wrapper(fn, opt): - def wrapper(self, grad, *args, **kwargs): # pylint: disable=unused-argument all_zeros = _is_all_zeros(grad) - return control_flow_ops.cond(all_zeros, control_flow_ops.no_op, - lambda: fn(grad, *args, **kwargs)) - + return control_flow_ops.cond( + all_zeros, + control_flow_ops.no_op, + lambda: fn(grad, *args, **kwargs)) wrapper = types.MethodType(wrapper, opt) return wrapper - class MultitaskOptimizerWrapper(object): - """Optimizer wrapper making all-zero gradients harmless. + """Optimizer wrapper that ensures that + all-zero gradients don't affect the optimizer state. This might be useful when a multi-task loss is used, and some components of the loss might be @@ -87,20 +88,20 @@ class MultitaskOptimizerWrapper(object): gradvars_clipped, global_step=batch) ``` """ - def __init__(self, opt): - """Constructor. - + """ Args: - opt: an instance of a class that implements tf.train.Optimizer. + opt: an instance of a class that implements tf.train.Optimizer. """ if not isinstance(opt, optimizer.Optimizer): raise TypeError( - 'Supplied optimizer must be an instance of tf.train.Optimizer') + "Supplied optimizer must be an instance of tf.train.Optimizer") self._opt = opt - overridden_methods = ('_apply_dense', '_resource_apply_dense', - '_apply_sparse', '_resource_apply_sparse') - for name in overridden_methods: + overriden_methods = ('_apply_dense', + '_resource_apply_dense', + '_apply_sparse', + '_resource_apply_sparse') + for name in overriden_methods: fn = getattr(self._opt, name) wrapper = _get_wrapper(fn, self._opt) setattr(self._opt, name, wrapper) @@ -111,30 +112,27 @@ class MultitaskOptimizerWrapper(object): def clip_gradients_by_global_norm(gradients_variables, clip_norm=20.): """Clips gradients of a multitask loss by their global norm. - Ignores all-zero tensors when computing the global norm. Args: - gradients_variables: a list of pairs (gradient, variable). - clip_norm: a float Tensor, the global norm to clip on. Default is 20.0. + gradients_variables: a list of pairs (gradient, variable). + clip_norm: a float Tensor, the global norm to clip on. Default is 20.0. Returns: - list: A list of pairs of the same type as gradients_variables,. - fixed_global_norm: A 0-D (scalar) Tensor representing the global norm. + list: A list of pairs of the same type as gradients_variables,. + fixed_global_norm: A 0-D (scalar) Tensor representing the global norm. """ gradients, variables = six.moves.zip(*gradients_variables) - def _replace_nonexisting_grad(grad): if grad is None: return grad all_zeros = _is_all_zeros(grad) - return control_flow_ops.cond( - all_zeros, - lambda: array_ops.zeros([], dtype=dtypes.as_dtype(grad.dtype)), - lambda: grad) - + return control_flow_ops.cond(all_zeros, + lambda: array_ops.zeros( + [], dtype=dtypes.as_dtype(grad.dtype)), + lambda: grad) nonzero_gradients = [_replace_nonexisting_grad(g) for g in gradients] fixed_global_norm = clip_ops.global_norm(nonzero_gradients) - gradients, _ = clip_ops.clip_by_global_norm( - gradients, clip_norm, use_norm=fixed_global_norm) + gradients, _ = clip_ops.clip_by_global_norm(gradients, clip_norm, + use_norm=fixed_global_norm) return list(six.moves.zip(gradients, variables)), fixed_global_norm diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py index 618d8eb18d..b06213f715 100644 --- a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py +++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py @@ -18,9 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np -import six - from tensorflow.contrib.opt.python.training import multitask_optimizer_wrapper from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -28,11 +25,13 @@ from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.training import momentum +import numpy as np +import six class MultitaskOptimizerWrapperTest(test.TestCase): - """Tests for the multitask optimizer wrapper. """ - + Tests for the multitask optimizer wrapper. + """ def testWrapper(self): with self.test_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32) @@ -40,10 +39,12 @@ class MultitaskOptimizerWrapperTest(test.TestCase): grads0 = constant_op.constant([0.1, 0.1], dtype=dtypes.float32) grads1 = constant_op.constant([0.01, 0.01], dtype=dtypes.float32) grads_allzero = constant_op.constant([0.0, 0.0], dtype=dtypes.float32) - mom_opt_impl = momentum.MomentumOptimizer(learning_rate=2.0, momentum=0.9) + mom_opt_impl = momentum.MomentumOptimizer( + learning_rate=2.0, momentum=0.9) mom_opt = multitask_optimizer_wrapper.MultitaskOptimizerWrapper( mom_opt_impl) - mom_update = mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + mom_update = mom_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) mom_update_partial = mom_opt.apply_gradients( zip([grads_allzero, grads1], [var0, var1])) mom_update_no_action = mom_opt.apply_gradients( @@ -62,13 +63,14 @@ class MultitaskOptimizerWrapperTest(test.TestCase): # Step 1: normal momentum update. self.evaluate(mom_update) # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType( - np.array([0.1, 0.1]), self.evaluate(slot0)) - self.assertAllCloseAccordingToType( - np.array([0.01, 0.01]), self.evaluate(slot1)) + self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), + self.evaluate(slot0)) + self.assertAllCloseAccordingToType(np.array([0.01, 0.01]), + self.evaluate(slot1)) # Check that the parameters have been updated. self.assertAllCloseAccordingToType( - np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), self.evaluate(var0)) + np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), + self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), self.evaluate(var1)) @@ -76,8 +78,8 @@ class MultitaskOptimizerWrapperTest(test.TestCase): # Step 2: momentum update that changes only slot1 but not slot0. self.evaluate(mom_update_partial) # Check that only the relevant momentum accumulator has been updated. - self.assertAllCloseAccordingToType( - np.array([0.1, 0.1]), self.evaluate(slot0)) + self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), + self.evaluate(slot0)) self.assertAllCloseAccordingToType( np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), self.evaluate(slot1)) @@ -85,8 +87,8 @@ class MultitaskOptimizerWrapperTest(test.TestCase): # Step 3: momentum update that does not change anything. self.evaluate(mom_update_no_action) # Check that the momentum accumulators have *NOT* been updated. - self.assertAllCloseAccordingToType( - np.array([0.1, 0.1]), self.evaluate(slot0)) + self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), + self.evaluate(slot0)) self.assertAllCloseAccordingToType( np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), self.evaluate(slot1)) @@ -103,9 +105,8 @@ class MultitaskOptimizerWrapperTest(test.TestCase): grads3 = None varlist = [var0, var1, var2, var3] gradients = [grads0, grads1, grads2, grads3] - clipped_gradvars, global_norm = ( - multitask_optimizer_wrapper.clip_gradients_by_global_norm( - six.moves.zip(gradients, varlist), clip_norm=1.0)) + clipped_gradvars, global_norm = multitask_optimizer_wrapper.clip_gradients_by_global_norm( + six.moves.zip(gradients, varlist), clip_norm=1.0) clipped_grads = list(six.moves.zip(*clipped_gradvars))[0] reference_global_norm = np.sqrt(np.sum(np.square([10.0, 15.0, 0.0, 5.0]))) self.assertAllCloseAccordingToType( @@ -114,6 +115,5 @@ class MultitaskOptimizerWrapperTest(test.TestCase): self.evaluate(clipped_grads[2]), np.array([0., 0.])) self.assertEqual(clipped_grads[3], None) - if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py index f130a2187c..16b6d145e3 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py @@ -24,7 +24,6 @@ import numpy as np from tensorflow.contrib import rnn as contrib_rnn from tensorflow.contrib.rnn.python.ops import core_rnn_cell -from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell from tensorflow.core.protobuf import config_pb2 from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -39,6 +38,9 @@ from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib from tensorflow.python.platform import test +from tensorflow.python.framework import test_util +from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell + # pylint: enable=protected-access @@ -372,20 +374,19 @@ class RNNCellTest(test.TestCase): h = array_ops.zeros([batch_size, num_proj]) state = rnn_cell_impl.LSTMStateTuple(c, h) cell = contrib_rnn_cell.LayerNormLSTMCell( - num_units=num_units, - num_proj=num_proj, - forget_bias=1.0, - layer_norm=True, - norm_gain=1.0, - norm_shift=0.0) + num_units=num_units, + num_proj=num_proj, + forget_bias=1.0, + layer_norm=True, + norm_gain=1.0, + norm_shift=0.0) g, out_m = cell(x, state) sess.run([variables_lib.global_variables_initializer()]) - res = sess.run( - [g, out_m], { - x.name: np.ones((batch_size, input_size)), - c.name: 0.1 * np.ones((batch_size, num_units)), - h.name: 0.1 * np.ones((batch_size, num_proj)) - }) + res = sess.run([g, out_m], { + x.name: np.ones((batch_size, input_size)), + c.name: 0.1 * np.ones((batch_size, num_units)), + h.name: 0.1 * np.ones((batch_size, num_proj)) + }) self.assertEqual(len(res), 2) # The numbers in results were not calculated, this is mostly just a # smoke test. @@ -395,9 +396,9 @@ class RNNCellTest(test.TestCase): # Different inputs so different outputs and states for i in range(1, batch_size): self.assertTrue( - float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6) + float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6) self.assertTrue( - float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6) + float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6) def testOutputProjectionWrapper(self): with self.test_session() as sess: diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py index 46823fa364..b4a5f2d7eb 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py @@ -996,19 +996,26 @@ class RNNCellTest(test.TestCase): output, state = cell(x, hidden) sess.run([variables.global_variables_initializer()]) - res = sess.run( - [output, state], { - hidden[0].name: - np.array([[[[[1.], [1.]], [[1.], [1.]]], [[[1.], [1.]], [[ - 1. - ], [1.]]]], [[[[2.], [2.]], [[2.], [2.]]], - [[[2.], [2.]], [[2.], [2.]]]]]), - x.name: - np.array([[[[[1.], [1.]], [[1.], [1.]]], [[[1.], [1.]], [[ - 1. - ], [1.]]]], [[[[2.], [2.]], [[2.], [2.]]], [[[2.], [2.]], - [[2.], [2.]]]]]) - }) + res = sess.run([output, state], { + hidden[0].name: + np.array([[[[[1.],[1.]], + [[1.],[1.]]], + [[[1.],[1.]], + [[1.],[1.]]]], + [[[[2.],[2.]], + [[2.],[2.]]], + [[[2.],[2.]], + [[2.],[2.]]]]]), + x.name: + np.array([[[[[1.],[1.]], + [[1.],[1.]]], + [[[1.],[1.]], + [[1.],[1.]]]], + [[[[2.],[2.]], + [[2.],[2.]]], + [[[2.],[2.]], + [[2.],[2.]]]]]) + }) # This is a smoke test, making sure expected values are unchanged. self.assertEqual(len(res), 2) self.assertAllClose(res[0], res[1].h) @@ -1269,8 +1276,10 @@ class LayerNormBasicLSTMCellTest(test.TestCase): self.assertAllClose(res[2].c, expected_c1, 1e-5) self.assertAllClose(res[2].h, expected_h1, 1e-5) + def testBasicLSTMCellWithStateTupleLayerNorm(self): - """The results of LSTMCell and LayerNormBasicLSTMCell should be the same.""" + """The results of LSTMCell and LayerNormBasicLSTMCell + should be same. """ with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): @@ -1281,21 +1290,21 @@ class LayerNormBasicLSTMCellTest(test.TestCase): c1 = array_ops.zeros([1, 2]) h1 = array_ops.zeros([1, 2]) state1 = rnn_cell_impl.LSTMStateTuple(c1, h1) - cell = rnn_cell_impl.MultiRNNCell([ - contrib_rnn_cell.LayerNormLSTMCell( - 2, layer_norm=True, norm_gain=1.0, norm_shift=0.0) - for _ in range(2) - ]) + cell = rnn_cell_impl.MultiRNNCell( + [contrib_rnn_cell.LayerNormLSTMCell( + 2, + layer_norm=True, + norm_gain=1.0, + norm_shift=0.0) for _ in range(2)]) h, (s0, s1) = cell(x, (state0, state1)) sess.run([variables.global_variables_initializer()]) - res = sess.run( - [h, s0, s1], { - x.name: np.array([[1., 1.]]), - c0.name: 0.1 * np.asarray([[0, 1]]), - h0.name: 0.1 * np.asarray([[2, 3]]), - c1.name: 0.1 * np.asarray([[4, 5]]), - h1.name: 0.1 * np.asarray([[6, 7]]), - }) + res = sess.run([h, s0, s1], { + x.name: np.array([[1., 1.]]), + c0.name: 0.1 * np.asarray([[0, 1]]), + h0.name: 0.1 * np.asarray([[2, 3]]), + c1.name: 0.1 * np.asarray([[4, 5]]), + h1.name: 0.1 * np.asarray([[6, 7]]), + }) expected_h = np.array([[-0.38079708, 0.38079708]]) expected_h0 = np.array([[-0.38079708, 0.38079708]]) diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 0698d40438..5e85c125df 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -36,6 +36,7 @@ from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.ops import partitioned_variables from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import nest @@ -114,7 +115,7 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): The class uses optional peep-hole connections, and an optional projection layer. - + Layer normalization implementation is based on: https://arxiv.org/abs/1607.06450. @@ -123,24 +124,15 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton and is applied before the internal nonlinearities. - + """ - def __init__(self, - num_units, - use_peepholes=False, - initializer=None, - num_proj=None, - proj_clip=None, - num_unit_shards=1, - num_proj_shards=1, - forget_bias=1.0, - state_is_tuple=True, - activation=math_ops.tanh, - reuse=None, - layer_norm=False, - norm_gain=1.0, - norm_shift=0.0): + def __init__(self, num_units, use_peepholes=False, + initializer=None, num_proj=None, proj_clip=None, + num_unit_shards=1, num_proj_shards=1, + forget_bias=1.0, state_is_tuple=True, + activation=math_ops.tanh, reuse=None, + layer_norm=False, norm_gain=1.0, norm_shift=0.0): """Initialize the parameters for an LSTM cell. Args: @@ -172,6 +164,8 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): `layer_norm` has been set to `False`, this argument will be ignored. norm_shift: float, The layer normalization shift initial value. If `layer_norm` has been set to `False`, this argument will be ignored. + + """ super(CoupledInputForgetGateLSTMCell, self).__init__(_reuse=reuse) if not state_is_tuple: @@ -2055,8 +2049,8 @@ class ConvLSTMCell(rnn_cell_impl.RNNCell): if self._skip_connection: self._total_output_channels += self._input_shape[-1] - state_size = tensor_shape.TensorShape( - self._input_shape[:-1] + [self._output_channels]) + state_size = tensor_shape.TensorShape(self._input_shape[:-1] + + [self._output_channels]) self._state_size = rnn_cell_impl.LSTMStateTuple(state_size, state_size) self._output_size = tensor_shape.TensorShape(self._input_shape[:-1] + [self._total_output_channels]) @@ -2116,8 +2110,11 @@ class Conv3DLSTMCell(ConvLSTMCell): """Construct Conv3DLSTM. See `ConvLSTMCell` for more details.""" super(Conv3DLSTMCell, self).__init__(conv_ndims=3, **kwargs) - -def _conv(args, filter_size, num_features, bias, bias_start=0.0): +def _conv(args, + filter_size, + num_features, + bias, + bias_start=0.0): """convolution: Args: args: a Tensor or a list of Tensors of dimension 3D, 4D or 5D, @@ -2394,19 +2391,12 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): """ - def __init__(self, - num_units, - use_peepholes=False, - cell_clip=None, - initializer=None, - num_proj=None, - proj_clip=None, + def __init__(self, num_units, + use_peepholes=False, cell_clip=None, + initializer=None, num_proj=None, proj_clip=None, forget_bias=1.0, - activation=None, - layer_norm=False, - norm_gain=1.0, - norm_shift=0.0, - reuse=None): + activation=None, layer_norm=False, + norm_gain=1.0, norm_shift=0.0, reuse=None): """Initialize the parameters for an LSTM cell. Args: @@ -2467,6 +2457,7 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): def output_size(self): return self._output_size + def _linear(self, args, output_size, @@ -2516,9 +2507,9 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): scope = vs.get_variable_scope() with vs.variable_scope(scope) as outer_scope: weights = vs.get_variable( - "kernel", [total_arg_size, output_size], - dtype=dtype, - initializer=kernel_initializer) + "kernel", [total_arg_size, output_size], + dtype=dtype, + initializer=kernel_initializer) if len(args) == 1: res = math_ops.matmul(args[0], weights) else: @@ -2530,7 +2521,9 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): if bias_initializer is None: bias_initializer = init_ops.constant_initializer(0.0, dtype=dtype) biases = vs.get_variable( - "bias", [output_size], dtype=dtype, initializer=bias_initializer) + "bias", [output_size], + dtype=dtype, + initializer=bias_initializer) if not layer_norm: res = nn_ops.bias_add(res, biases) @@ -2561,6 +2554,7 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): ValueError: If input size cannot be inferred from inputs via static shape inference. """ + num_proj = self._num_units if self._num_proj is None else self._num_proj sigmoid = math_ops.sigmoid (c_prev, m_prev) = state @@ -2573,14 +2567,10 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): with vs.variable_scope(scope, initializer=self._initializer) as unit_scope: # i = input_gate, j = new_input, f = forget_gate, o = output_gate - lstm_matrix = self._linear( - [inputs, m_prev], - 4 * self._num_units, - bias=True, - bias_initializer=None, - layer_norm=self._layer_norm) + lstm_matrix = self._linear([inputs, m_prev], 4 * self._num_units, bias=True, + bias_initializer=None, layer_norm=self._layer_norm) i, j, f, o = array_ops.split( - value=lstm_matrix, num_or_size_splits=4, axis=1) + value=lstm_matrix, num_or_size_splits=4, axis=1) if self._layer_norm: i = _norm(self._norm_gain, self._norm_shift, i, "input") @@ -2590,22 +2580,20 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): # Diagonal connections if self._use_peepholes: - with vs.variable_scope(unit_scope): + with vs.variable_scope(unit_scope) as projection_scope: w_f_diag = vs.get_variable( - "w_f_diag", shape=[self._num_units], dtype=dtype) + "w_f_diag", shape=[self._num_units], dtype=dtype) w_i_diag = vs.get_variable( - "w_i_diag", shape=[self._num_units], dtype=dtype) + "w_i_diag", shape=[self._num_units], dtype=dtype) w_o_diag = vs.get_variable( - "w_o_diag", shape=[self._num_units], dtype=dtype) + "w_o_diag", shape=[self._num_units], dtype=dtype) if self._use_peepholes: - c = ( - sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev + - sigmoid(i + w_i_diag * c_prev) * self._activation(j)) + c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev + + sigmoid(i + w_i_diag * c_prev) * self._activation(j)) else: - c = ( - sigmoid(f + self._forget_bias) * c_prev + - sigmoid(i) * self._activation(j)) + c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * + self._activation(j)) if self._layer_norm: c = _norm(self._norm_gain, self._norm_shift, c, "state") @@ -2620,7 +2608,7 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): m = sigmoid(o) * self._activation(c) if self._num_proj is not None: - with vs.variable_scope("projection"): + with vs.variable_scope("projection") as proj_scope: m = self._linear(m, self._num_proj, bias=False) if self._proj_clip is not None: diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index e87ef41388..c3b180d9f4 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -192,8 +192,7 @@ class _BaseAttentionMechanism(AttentionMechanism): raise TypeError("probability_fn must be callable, saw type: %s" % type(probability_fn).__name__) if score_mask_value is None: - score_mask_value = dtypes.as_dtype( - self._memory_layer.dtype).as_numpy_dtype(-np.inf) + score_mask_value = dtypes.as_dtype(self._memory_layer.dtype).as_numpy_dtype(-np.inf) self._probability_fn = lambda score, prev: ( # pylint:disable=g-long-lambda probability_fn( _maybe_mask_score(score, memory_sequence_length, score_mask_value), @@ -1146,9 +1145,7 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): % (len(attention_layer_sizes), len(attention_mechanisms))) self._attention_layers = tuple( layers_core.Dense( - attention_layer_size, - name="attention_layer", - use_bias=False, + attention_layer_size, name="attention_layer", use_bias=False, dtype=attention_mechanisms[i].dtype) for i, attention_layer_size in enumerate(attention_layer_sizes)) self._attention_layer_size = sum(attention_layer_sizes) diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc index ac8d994502..331943a3ef 100644 --- a/tensorflow/contrib/verbs/rdma.cc +++ b/tensorflow/contrib/verbs/rdma.cc @@ -16,8 +16,8 @@ limitations under the License. #ifdef TENSORFLOW_USE_VERBS #include "tensorflow/contrib/verbs/rdma.h" -#include #include +#include #include "tensorflow/contrib/verbs/verbs_util.h" #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/dma_helper.h" @@ -137,7 +137,7 @@ ibv_device* set_device() { if (!env_p_rdma_device.empty()) { for (device_index = 0; device_index < dev_num; device_index++) { if (!env_p_rdma_device.compare( - ibv_get_device_name(dev_list[device_index]))) { + ibv_get_device_name(dev_list[device_index]))) { CHECK(get_dev_active_port_count(dev_list[device_index]) != 0) << "Device " << ibv_get_device_name(dev_list[device_index]) << " has no active ports"; @@ -147,7 +147,7 @@ ibv_device* set_device() { // check validity of input device CHECK(false) << "The device " << env_p_rdma_device << " wasn't found"; } else { - // set default device + // set default device str_port_num = get_env_var("RDMA_DEVICE_PORT"); CHECK(str_port_num.empty()) << "RDMA_DEVICE should be provided if RDMA_DEVICE_PORT is set by user"; @@ -177,7 +177,7 @@ ibv_device* set_device() { // Returns: // port to use uint8_t set_port(ibv_context* context) { - uint8_t port_num = 0; // 0 is illegal port number + uint8_t port_num = 0; //0 is illegal port number string str_port_num; ibv_device_attr device_att; ibv_port_attr port_attr; @@ -199,7 +199,9 @@ uint8_t set_port(ibv_context* context) { // check if port id active CHECK(port_attr.state == IBV_PORT_ACTIVE) << "Selected RDMA_DEVICE_PORT is not active"; - } else { // set default port + } + // set default port + else { for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) { rc = ibv_query_port(context, port_index, &port_attr); CHECK(!rc) << "Failed to query the port" << port_index; @@ -267,7 +269,7 @@ bool is_gid_type_roce_v2(ibv_context* context, uint8_t port_num, // Function to set GID index. // If the port link is IB, no GID index should be selected. // If Ethernet but RDMA_GID_INDEX not set gid index that supports -// RoCE V2 will be chosen(fails if more than one IP is configured) +// RoCE V2 will be chosen(fails if more then one IP is configured) // Args: // context - device context // port_num - port number @@ -300,7 +302,7 @@ uint8_t set_gid(uint8_t port_num, ibv_context* context) { } } switch (port_attr.link_layer) { - case (IBV_LINK_LAYER_ETHERNET): + case(IBV_LINK_LAYER_ETHERNET) : gid_str = get_env_var("RDMA_GID_INDEX"); if (!gid_str.empty()) { gid_index = stoi(gid_str); @@ -311,7 +313,7 @@ uint8_t set_gid(uint8_t port_num, ibv_context* context) { << "More than one IP is available, please specify GID_INDEX"; } break; - case (IBV_LINK_LAYER_INFINIBAND): // no need in GID index + case(IBV_LINK_LAYER_INFINIBAND) : // no need in GID index break; default: LOG(INFO) << "Unknown port link layer. Currently supporting Ethernet and " @@ -372,8 +374,7 @@ enum ibv_mtu set_mtu(uint8_t port_num, ibv_context* context) { break; default: CHECK(0) << "Error: MTU input value must be one of the following: 256, " - "512, 1024, 2048, 4096. MTU " - << mtu << " is invalid\n"; + "512, 1024, 2048, 4096. MTU " << mtu << " is invalid\n"; break; } CHECK(mtu < port_attr.active_mtu) @@ -452,9 +453,9 @@ void RdmaAdapter::Process_CQ() { CHECK_GE(ne, 0); for (int i = 0; i < ne; ++i) { CHECK(wc_[i].status == IBV_WC_SUCCESS) - << "Failed status \n" - << ibv_wc_status_str(wc_[i].status) << " " << wc_[i].status << " " - << static_cast(wc_[i].wr_id) << " " << wc_[i].vendor_err; + << "Failed status \n" << ibv_wc_status_str(wc_[i].status) << " " + << wc_[i].status << " " << static_cast(wc_[i].wr_id) << " " + << wc_[i].vendor_err; if (wc_[i].opcode == IBV_WC_RECV_RDMA_WITH_IMM) { RdmaChannel* rc = reinterpret_cast(wc_[i].wr_id); // put back a recv wr. @@ -610,7 +611,7 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, // create message and ack buffers, then initialize the tables. { const string buffer_names[] = {"tx_message_buffer", "rx_message_buffer", - "tx_ack_buffer", "rx_ack_buffer"}; + "tx_ack_buffer", "rx_ack_buffer"}; tx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[0]); rx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[1]); tx_ack_buffer_ = new RdmaAckBuffer(this, buffer_names[2]); @@ -671,7 +672,7 @@ void RdmaChannel::SetRemoteAddress(const RdmaAddress& ra, bool override) { void RdmaChannel::Recv() { struct ibv_recv_wr wr; memset(&wr, 0, sizeof(wr)); - wr.wr_id = (uint64_t)this; + wr.wr_id = (uint64_t) this; struct ibv_recv_wr* bad_wr; CHECK(!ibv_post_recv(qp_, &wr, &bad_wr)) << "Failed to post recv"; } @@ -825,11 +826,11 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) { attr.ah_attr.grh.traffic_class = adapter_->params_.traffic_class; int r; - CHECK(!(r = ibv_modify_qp(qp_, &attr, - IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | - IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | - IBV_QP_MAX_DEST_RD_ATOMIC | - IBV_QP_MIN_RNR_TIMER))) + CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_AV | + IBV_QP_PATH_MTU | + IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | + IBV_QP_MAX_DEST_RD_ATOMIC | + IBV_QP_MIN_RNR_TIMER))) << "QP to Ready to Receive " << r; memset(&attr, 0, sizeof(ibv_qp_attr)); @@ -840,10 +841,10 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) { attr.rnr_retry = 7; /* infinite */ attr.max_rd_atomic = 1; - CHECK(!(r = ibv_modify_qp(qp_, &attr, - IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | - IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | - IBV_QP_MAX_QP_RD_ATOMIC))) + CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_TIMEOUT | + IBV_QP_RETRY_CNT | + IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | + IBV_QP_MAX_QP_RD_ATOMIC))) << "QP to Ready to Send " << r; connected_ = true; @@ -930,7 +931,7 @@ void RdmaBuffer::Write(uint32_t imm_data, size_t buffer_size) { struct ibv_send_wr wr; memset(&wr, 0, sizeof(wr)); - wr.wr_id = (uint64_t)this; + wr.wr_id = (uint64_t) this; wr.sg_list = &list; wr.num_sge = 1; wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM; @@ -1025,9 +1026,9 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback( TensorProto proto; if (src_dev->tensorflow_gpu_device_info() && (!send_args.alloc_attrs.on_host())) { - CHECK(send_args.device_context) - << "send dev name: " << src_dev->name() - << " gpu_info: " << src_dev->tensorflow_gpu_device_info(); + CHECK(send_args.device_context) << "send dev name: " << src_dev->name() + << " gpu_info: " + << src_dev->tensorflow_gpu_device_info(); if (can_memcpy) { AllocatorAttributes host_alloc_attrs; @@ -1053,8 +1054,8 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback( // aync instead GPUUtil::SetProtoFromGPU( in, src_dev, send_args.device_context, &proto, is_dead, - [this, proto, buffer_size, key, in, step_id, key_with_step_id, - is_dead, send_args, recv_args](const Status& s) mutable { + [this, proto, buffer_size, key, in, step_id, key_with_step_id, + is_dead, send_args, recv_args](const Status& s) mutable { CHECK(s.ok()) << "copy proto from gpu sync"; auto tensor_bytes = proto.ByteSize(); buffer_size += tensor_bytes; diff --git a/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt deleted file mode 100644 index cd7ec6e551..0000000000 --- a/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt +++ /dev/null @@ -1,47 +0,0 @@ -op { - graph_op_name: "UniqueV2" - in_arg { - name: "x" - description: < [1, 2, 4, 7, 8] -idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] -``` -END -} diff --git a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt index 77a96d1e03..0a3355cdbc 100644 --- a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt @@ -26,8 +26,6 @@ need not be sorted and need not cover all values in the full range of valid values. If the sum is empty for a given segment ID `i`, `output[i] = 0`. -If the given segment ID `i` is negative, the value is dropped and will not be -added to the sum of the segment. `num_segments` should equal the number of distinct segment IDs. diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index 223dd12f8f..d0dba6e1f0 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -455,7 +455,7 @@ class Graph { // the corresponding NodeDef to reflect the change. // REQUIRES: The control edge must exist. void RemoveControlEdge(const Edge* e); - + // Updates the input to a node. The existing edge to `dst` is removed and an // edge from `new_src` to `dst` is created. The NodeDef associated with `dst` // is also updated. diff --git a/tensorflow/core/graph/graph_test.cc b/tensorflow/core/graph/graph_test.cc index e2ce0ba046..2aa1b31e15 100644 --- a/tensorflow/core/graph/graph_test.cc +++ b/tensorflow/core/graph/graph_test.cc @@ -118,9 +118,11 @@ class GraphTest : public ::testing::Test { LOG(FATAL) << name; } - bool ControlEdgeExistsInGraphOrNodeDef(const Node* src, const Node* dst) { - for (const Edge* e : dst->in_edges()) { - if (e->IsControlEdge() && e->src() == src && + bool ControlEdgeExistsInGraphOrNodeDef(const Node* src, + const Node* dst) { + for (const Edge *e : dst->in_edges()) { + if (e->IsControlEdge() && + e->src() == src && e->src_output() == Graph::kControlSlot && e->dst_input() == Graph::kControlSlot) { return true; diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index b4a5a3c796..f1cb9a1860 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -1720,7 +1720,6 @@ tf_cuda_cc_tests( ":data_flow", ":ops_testutil", ":ops_util", - "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", diff --git a/tensorflow/core/kernels/bincount_op.cc b/tensorflow/core/kernels/bincount_op.cc index 890fa3121b..766d63e3be 100644 --- a/tensorflow/core/kernels/bincount_op.cc +++ b/tensorflow/core/kernels/bincount_op.cc @@ -97,9 +97,8 @@ class BincountOp : public OpKernel { const Tensor& weights_t = ctx->input(2); int32 size = size_tensor.scalar()(); - OP_REQUIRES( - ctx, size >= 0, - errors::InvalidArgument("size (", size, ") must be non-negative")); + OP_REQUIRES(ctx, size >= 0, errors::InvalidArgument( + "size (", size, ") must be non-negative")); const auto arr = arr_t.flat(); const auto weights = weights_t.flat(); diff --git a/tensorflow/core/kernels/bincount_op.h b/tensorflow/core/kernels/bincount_op.h index cd3d560cd1..0f8dd2b82a 100644 --- a/tensorflow/core/kernels/bincount_op.h +++ b/tensorflow/core/kernels/bincount_op.h @@ -16,11 +16,11 @@ limitations under the License. #ifndef TENSORFLOW_BINCOUNT_OP_H_ #define TENSORFLOW_BINCOUNT_OP_H_ -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/lib/core/errors.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" namespace tensorflow { diff --git a/tensorflow/core/kernels/bincount_op_gpu.cu.cc b/tensorflow/core/kernels/bincount_op_gpu.cu.cc index 6074b3e1f6..ae9e26ffdf 100644 --- a/tensorflow/core/kernels/bincount_op_gpu.cu.cc +++ b/tensorflow/core/kernels/bincount_op_gpu.cu.cc @@ -17,12 +17,12 @@ limitations under the License. #define EIGEN_USE_GPU +#include "tensorflow/core/kernels/bincount_op.h" #include "external/cub_archive/cub/device/device_histogram.cuh" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/bincount_op.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/cuda_kernel_helper.h" @@ -93,8 +93,8 @@ struct BincountFunctor { /* num_samples */ num_samples, /* stream */ stream); if (err != cudaSuccess) { - return errors::Internal( - "Could not launch HistogramEven: ", cudaGetErrorString(err), "."); + return errors::Internal("Could not launch HistogramEven: ", + cudaGetErrorString(err), "."); } return Status::OK(); } diff --git a/tensorflow/core/kernels/bincount_op_test.cc b/tensorflow/core/kernels/bincount_op_test.cc index cb04b40637..14becc87a7 100644 --- a/tensorflow/core/kernels/bincount_op_test.cc +++ b/tensorflow/core/kernels/bincount_op_test.cc @@ -30,8 +30,8 @@ static Graph* Bincount(int arr_size, int nbins) { Tensor arr(DT_INT32, TensorShape({arr_size})); arr.flat() = arr.flat().setRandom().abs(); - Tensor size(DT_INT32, TensorShape({static_cast(1)})); - size.flat()(0) = static_cast(nbins); + Tensor size(DT_INT32, TensorShape({(int32)1})); + size.flat()(0) = (int32)nbins; Tensor weights(DT_INT32, TensorShape({0})); diff --git a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc index 325dee793b..aafbbe41b4 100644 --- a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc +++ b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc @@ -77,10 +77,10 @@ struct BucketizeFunctor { TF_RETURN_IF_ERROR(boundaries_array.Finalize()); CudaLaunchConfig config = GetCudaLaunchConfig(input.size(), d); - BucketizeCustomKernel - <<>>( - input.size(), input.data(), boundaries_vector.size(), - boundaries_array.data(), output.data()); + BucketizeCustomKernel< + T><<>>( + input.size(), input.data(), boundaries_vector.size(), + boundaries_array.data(), output.data()); return Status::OK(); } diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc index c2d24d1f12..f819fccbfb 100644 --- a/tensorflow/core/kernels/conv_grad_ops_3d.cc +++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc @@ -1101,27 +1101,29 @@ class Conv3DBackpropFilterOp : public OpKernel { bool cudnn_use_autotune_; }; + + #define REGISTER_GPU_KERNEL(T) \ REGISTER_KERNEL_BUILDER( \ Name("Conv3DBackpropInput").Device(DEVICE_GPU).TypeConstraint("T"), \ Conv3DBackpropInputOp); \ REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropInputV2") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("input_sizes"), \ - Conv3DBackpropInputOp); \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("input_sizes"), \ + Conv3DBackpropInputOp); \ REGISTER_KERNEL_BUILDER( \ - Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint("T"), \ - Conv3DBackpropFilterOp); \ + Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint("T"), \ + Conv3DBackpropFilterOp); \ REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropFilterV2") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("filter_sizes"), \ - Conv3DBackpropFilterOp); + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("filter_sizes"), \ + Conv3DBackpropFilterOp); TF_CALL_half(REGISTER_GPU_KERNEL); TF_CALL_float(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL - + #endif // GOOGLE_CUDA } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_asinh.cc b/tensorflow/core/kernels/cwise_op_asinh.cc index a7673afd0b..8d44208aa7 100644 --- a/tensorflow/core/kernels/cwise_op_asinh.cc +++ b/tensorflow/core/kernels/cwise_op_asinh.cc @@ -22,7 +22,7 @@ REGISTER4(UnaryOp, CPU, "Asinh", functor::asinh, float, double, #ifdef TENSORFLOW_USE_SYCL REGISTER2(UnaryOp, SYCL, "Asinh", functor::asinh, float, double); -#endif // TENSORFLOW_USE_SYCL +#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER2(UnaryOp, GPU, "Asinh", functor::asinh, float, double); diff --git a/tensorflow/core/kernels/cwise_op_atanh.cc b/tensorflow/core/kernels/cwise_op_atanh.cc index 7b688db4c5..bbc69e45aa 100644 --- a/tensorflow/core/kernels/cwise_op_atanh.cc +++ b/tensorflow/core/kernels/cwise_op_atanh.cc @@ -22,7 +22,7 @@ REGISTER4(UnaryOp, CPU, "Atanh", functor::atanh, float, double, #ifdef TENSORFLOW_USE_SYCL REGISTER2(UnaryOp, SYCL, "Atanh", functor::atanh, float, double); -#endif // TENSORFLOW_USE_SYCL +#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER2(UnaryOp, GPU, "Atanh", functor::atanh, float, double); diff --git a/tensorflow/core/kernels/depthwise_conv_grad_op.cc b/tensorflow/core/kernels/depthwise_conv_grad_op.cc index 9347978d51..53d65a22d1 100644 --- a/tensorflow/core/kernels/depthwise_conv_grad_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_grad_op.cc @@ -231,8 +231,7 @@ static void CopyOutputBackpropRegion(const DepthwiseArgs& args, } // Pad to vector-register width (if needed). for (int64 d = 0; d < pad_size; ++d) { - buffer[buf_base + vectorized_size + scalar_size + d] = - static_cast(0); + buffer[buf_base + vectorized_size + scalar_size + d] = static_cast(0); } } } @@ -511,8 +510,7 @@ static void DepthwiseConvBackpropInputReference(const DepthwiseArgs& args, #if GOOGLE_CUDA -extern template struct LaunchDepthwiseConvBackpropInputOp; +extern template struct LaunchDepthwiseConvBackpropInputOp; extern template struct LaunchDepthwiseConvBackpropInputOp; extern template struct LaunchDepthwiseConvBackpropInputOp; @@ -887,8 +885,7 @@ static void DepthwiseConvBackpropFilterReference(const DepthwiseArgs& args, #if GOOGLE_CUDA -extern template struct LaunchDepthwiseConvBackpropFilterOp; +extern template struct LaunchDepthwiseConvBackpropFilterOp; extern template struct LaunchDepthwiseConvBackpropFilterOp; extern template struct LaunchDepthwiseConvBackpropFilterOp; diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc index 30ecd0c2ba..2759ecb2f1 100644 --- a/tensorflow/core/kernels/depthwise_conv_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_op.cc @@ -427,11 +427,6 @@ TF_CALL_double(REGISTER_CPU_KERNEL); #endif #if GOOGLE_CUDA -REGISTER_KERNEL_BUILDER(Name("DepthwiseConv2dNative") - .Device(DEVICE_GPU) - .TypeConstraint("T"), - DepthwiseConv2dNativeOp); - REGISTER_KERNEL_BUILDER( Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint("T"), DepthwiseConv2dNativeOp); diff --git a/tensorflow/core/kernels/depthwise_conv_op.h b/tensorflow/core/kernels/depthwise_conv_op.h index 097a9f5bfa..11aed5b415 100644 --- a/tensorflow/core/kernels/depthwise_conv_op.h +++ b/tensorflow/core/kernels/depthwise_conv_op.h @@ -158,8 +158,7 @@ struct DepthwiseFilterPadOp { } // Pad the remainder of output to vector-register boundary. for (int64 j = 0; j < pad_size; ++j) { - padded_filter[output_base + vectorized_size + scalar_size + j] = - static_cast(0); + padded_filter[output_base + vectorized_size + scalar_size + j] = static_cast(0); } } } diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc index d8bdb700e6..157ce106ce 100644 --- a/tensorflow/core/kernels/maxpooling_op.cc +++ b/tensorflow/core/kernels/maxpooling_op.cc @@ -20,7 +20,6 @@ limitations under the License. #include "tensorflow/core/kernels/maxpooling_op.h" #include -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" @@ -38,6 +37,7 @@ limitations under the License. #include "tensorflow/core/util/padding.h" #include "tensorflow/core/util/tensor_format.h" #include "tensorflow/core/util/use_cudnn.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #if GOOGLE_CUDA #include "tensorflow/core/kernels/maxpooling_op_gpu.h" @@ -359,8 +359,7 @@ class MaxPoolingGradOp : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); use_dnn_ = CanUseCudnn(); - TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, - &propagate_nans_)); + ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); } void Compute(OpKernelContext* context) override { @@ -889,8 +888,7 @@ class MaxPoolingWithArgmaxOp : public OpKernel { errors::Unimplemented( "Pooling is not yet supported on the batch dimension.")); - TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, - &propagate_nans_)); + ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); } void Compute(OpKernelContext* context) override { @@ -1054,8 +1052,7 @@ class MaxPoolingNoMaskOp : public OpKernel { "Pooling is not yet supported on the batch dimension.")); use_dnn_ = CanUseCudnn(); - TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, - &propagate_nans_)); + ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); } void Compute(OpKernelContext* context) override { @@ -1140,8 +1137,7 @@ class MaxPoolingNoMaskV2Op : public OpKernel { } OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); use_dnn_ = CanUseCudnn(); - TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, - &propagate_nans_)); + ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); } void Compute(OpKernelContext* context) override { diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc index f8daaca4c9..d96b844383 100644 --- a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc +++ b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc @@ -405,17 +405,17 @@ bool MaxPoolForwardWithOptionalArgmax::operator()( if (propagate_nans) { MaxPoolForwardNHWC <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, - kThreadsPerBlock, 0, d.stream()>>>( - output_size, bottom_data, height, width, channels, pooled_height, - pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, - top_data, mask); + kThreadsPerBlock, 0, d.stream()>>> + (output_size, bottom_data, height, width, channels, pooled_height, + pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, + top_data, mask); } else { MaxPoolForwardNHWC <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, - kThreadsPerBlock, 0, d.stream()>>>( - output_size, bottom_data, height, width, channels, pooled_height, - pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, - top_data, mask); + kThreadsPerBlock, 0, d.stream()>>> + (output_size, bottom_data, height, width, channels, pooled_height, + pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, + top_data, mask); } return d.ok(); } diff --git a/tensorflow/core/kernels/mkl_tfconv_op.h b/tensorflow/core/kernels/mkl_tfconv_op.h index c4d5a45d3c..0a5be4fec9 100644 --- a/tensorflow/core/kernels/mkl_tfconv_op.h +++ b/tensorflow/core/kernels/mkl_tfconv_op.h @@ -101,8 +101,8 @@ class MklToTfOp : public OpKernel { // Allocate output tensor. TensorShape output_shape = input_shape.GetTfShape(); Tensor* output_tensor = NULL; - OP_REQUIRES_OK(context, context->allocate_output( - input_number, output_shape, &output_tensor)); + OP_REQUIRES_OK(context, context->allocate_output(input_number, + output_shape, &output_tensor)); CHECK_NOTNULL(output_tensor); // Do we need to reorder Mkl layout into TensorFlow layout? @@ -116,13 +116,13 @@ class MklToTfOp : public OpKernel { // If not, just forward input tensor to output tensor. CHECK(output_tensor->CopyFrom(input_tensor, output_shape)); } - } catch (mkldnn::error& e) { + } catch (mkldnn::error &e) { string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + std::string(e.message) + ", in file " + - std::string(__FILE__) + ":" + std::to_string(__LINE__); - OP_REQUIRES_OK( - context, - errors::Aborted("Operation received an exception:", error_msg)); + ", message: " + std::string(e.message) + + ", in file " + std::string(__FILE__) + ":" + + std::to_string(__LINE__); + OP_REQUIRES_OK(context, + errors::Aborted("Operation received an exception:", error_msg)); } } #else @@ -160,8 +160,8 @@ class MklToTfOp : public OpKernel { // Allocate output tensor. Tensor* output_tensor = NULL; - OP_REQUIRES_OK(context, context->allocate_output(input_number, output_shape, - &output_tensor)); + OP_REQUIRES_OK(context, context->allocate_output(input_number, + output_shape, &output_tensor)); dnnLayout_t output_layout = static_cast(input_shape.GetTfLayout()); diff --git a/tensorflow/core/kernels/ops_util.h b/tensorflow/core/kernels/ops_util.h index 93ef512778..d3d1b56c9d 100644 --- a/tensorflow/core/kernels/ops_util.h +++ b/tensorflow/core/kernels/ops_util.h @@ -98,19 +98,6 @@ gtl::InlinedVector ComputeStride(const TensorShape& shape) { return strides; } -// Helper to compute 'strides' given an Eigen TensorDimensions -template -gtl::InlinedVector ComputeEigenStrides(const EigenDimensions& shape) { - const int ndims = shape.rank(); - gtl::InlinedVector strides(ndims); - T stride = 1; - for (int i = ndims - 1; i >= 0; --i) { - strides[i] = stride; - stride *= static_cast(shape[i]); - } - return strides; -} - } // namespace tensorflow #endif // TENSORFLOW_KERNELS_OPS_UTIL_H_ diff --git a/tensorflow/core/platform/posix/error.cc b/tensorflow/core/platform/posix/error.cc index cda6d7d8f9..f8b0285c50 100644 --- a/tensorflow/core/platform/posix/error.cc +++ b/tensorflow/core/platform/posix/error.cc @@ -131,8 +131,8 @@ error::Code ErrnoToCode(int err_number) { case ENETUNREACH: // Network unreachable case ENOLCK: // No locks available case ENOLINK: // Link has been severed -#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32) || \ - defined(__HAIKU__)) +#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32) \ + || defined(__HAIKU__)) case ENONET: // Machine is not on the network #endif code = error::UNAVAILABLE; diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc index 614ee00b01..09f69a95c1 100644 --- a/tensorflow/core/platform/posix/port.cc +++ b/tensorflow/core/platform/posix/port.cc @@ -37,8 +37,8 @@ limitations under the License. #ifdef TF_USE_SNAPPY #include "snappy.h" #endif -#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) || \ - defined(__HAIKU__) +#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \ + || defined(__HAIKU__) #include #endif @@ -62,8 +62,8 @@ int NumSchedulableCPUs() { } perror("sched_getaffinity"); #endif -#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) || \ - defined(__HAIKU__) +#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \ + || defined(__HAIKU__) unsigned int count = std::thread::hardware_concurrency(); if (count > 0) return static_cast(count); #endif diff --git a/tensorflow/core/util/cuda_kernel_helper.h b/tensorflow/core/util/cuda_kernel_helper.h index cf11f419a4..8fa0dfbed9 100644 --- a/tensorflow/core/util/cuda_kernel_helper.h +++ b/tensorflow/core/util/cuda_kernel_helper.h @@ -752,12 +752,6 @@ __device__ EIGEN_ALWAYS_INLINE T CudaShuffleDown(unsigned mask, T value, return __shfl_down_sync(mask, value, delta, width); } -__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleDown( - unsigned mask, Eigen::half value, int delta, int width = warpSize) { - return Eigen::half( - __shfl_down_sync(mask, static_cast(value), delta, width)); -} - // Variant of the (undocumented) version from the CUDA SDK, but using unsigned // instead of float for lo and hi (which is incorrect with ftz, for example). // A bug has been filed with NVIDIA and will be fixed in the next CUDA release. @@ -780,12 +774,6 @@ __device__ EIGEN_ALWAYS_INLINE T CudaShuffleXor(unsigned mask, T value, return __shfl_xor_sync(mask, value, laneMask, width); } -__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleXor( - unsigned mask, Eigen::half value, int laneMask, int width = warpSize) { - return Eigen::half( - __shfl_xor_sync(mask, static_cast(value), laneMask, width)); -} - // Variant of the (undocumented) version from the CUDA SDK, but using unsigned // instead of float for lo and hi (which is incorrect with ftz, for example). // A bug has been filed with NVIDIA and will be fixed in the next CUDA release. diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 148c7851bd..118ff0d0d6 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -24,25 +24,25 @@ limitations under the License. #include "mkl_dnn_types.h" #include "mkl_service.h" #include "mkl_trans.h" -#include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/graph/mkl_graph_util.h" +#include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/util/padding.h" #include "tensorflow/core/util/tensor_format.h" +#include "tensorflow/core/graph/mkl_graph_util.h" #ifdef INTEL_MKL_DNN #include "mkldnn.hpp" -using mkldnn::engine; using mkldnn::memory; -using mkldnn::padding_kind; -using mkldnn::primitive; using mkldnn::reorder; +using mkldnn::primitive; +using mkldnn::padding_kind; +using mkldnn::engine; #endif // The file contains a number of utility classes and functions used by MKL @@ -56,14 +56,8 @@ namespace tensorflow { // Tensorflow tensor. typedef enum { W = 0, H = 1, C = 2, N = 3 } MklDims; -typedef enum { - Dim_N = 0, - Dim_C = 1, - Dim_H = 2, - Dim_W = 3, - Dim_O = 0, - Dim_I = 1 -} MklDnnDims; +typedef enum { Dim_N = 0, Dim_C = 1, Dim_H = 2, Dim_W = 3, + Dim_O = 0, Dim_I = 1 } MklDnnDims; class MklShape { public: @@ -242,7 +236,8 @@ class MklShape { (IS_MKL_TENSOR_OFFSET + sizeof(size_t)) // Location of dimension_ // Location of sizes. Note dim is not used here, left here // to make macros consistent. -#define SIZES_OFFSET(dims) (DIMS_OFFSET + sizeof(size_t)) +#define SIZES_OFFSET(dims) \ + (DIMS_OFFSET + sizeof(size_t)) #define STRIDES_OFFSET(dims) \ (SIZES_OFFSET(dims) + dims * sizeof(size_t)) // Location of strides #define MKL_LAYOUT_OFFSET(dims) \ @@ -337,7 +332,7 @@ class MklDnnShape { /// Number of dimensions in Tensorflow format size_t dimension_ = 0; /// Required by MKLDNN for conversions - mkldnn_dims_t sizes_; // Required by MKL for conversions + mkldnn_dims_t sizes_; // Required by MKL for conversions memory::format tf_data_format_ = memory::format::format_undef; memory::data_type T_ = memory::data_type::data_undef; // MKL layout @@ -350,13 +345,15 @@ class MklDnnShape { typedef std::remove_extent::type mkldnn_dim_t; #define INVALID_DIM_SIZE -1 + public: MklDnnShape() { - for (size_t i = 0; i < sizeof(data_.sizes_) / sizeof(data_.sizes_[0]); - ++i) { + for (size_t i = 0; i < sizeof(data_.sizes_) / + sizeof(data_.sizes_[0]); ++i) { data_.sizes_[i] = -1; } - for (size_t i = 0; i < sizeof(data_.map_) / sizeof(data_.map_[0]); ++i) { + for (size_t i = 0; i < sizeof(data_.map_) / + sizeof(data_.map_[0]); ++i) { data_.map_[i] = -1; } } @@ -372,26 +369,26 @@ class MklDnnShape { inline void SetDimensions(const size_t dimension) { data_.dimension_ = dimension; } - inline size_t GetDimension(char dimension) const { + inline size_t GetDimension(char dimension)const { int index = GetMklDnnTensorDimIndex(dimension); CHECK(index >= 0 && index < this->GetDimension()) << "Invalid index from the dimension: " << index << ", " << dimension; return this->DimSize(index); } - inline int32 GetMklDnnTensorDimIndex(char dimension) const { + inline int32 GetMklDnnTensorDimIndex(char dimension)const { switch (dimension) { - case 'N': - return MklDnnDims::Dim_N; - case 'C': - return MklDnnDims::Dim_C; - case 'H': - return MklDnnDims::Dim_H; - case 'W': - return MklDnnDims::Dim_W; - default: - LOG(FATAL) << "Invalid dimension: " << dimension; - return -1; // Avoid compiler warning about missing return value + case 'N': + return MklDnnDims::Dim_N; + case 'C': + return MklDnnDims::Dim_C; + case 'H': + return MklDnnDims::Dim_H; + case 'W': + return MklDnnDims::Dim_W; + default: + LOG(FATAL) << "Invalid dimension: " << dimension; + return -1; // Avoid compiler warning about missing return value } } @@ -406,9 +403,9 @@ class MklDnnShape { memory::dims retVal; if (data_.is_mkl_tensor_) { int dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]); - for (size_t i = 0; i < dimensions; i++) { + for (size_t i = 0 ; i < dimensions; i++) { if (data_.sizes_[i] != INVALID_DIM_SIZE) - retVal.push_back(data_.sizes_[i]); + retVal.push_back(data_.sizes_[i]); } } else { CHECK_EQ(data_.is_mkl_tensor_, true); @@ -417,7 +414,7 @@ class MklDnnShape { } inline int64 DimSize(int index) const { - CHECK_LT(index, sizeof(data_.sizes_) / sizeof(data_.sizes_[0])); + CHECK_LT(index, sizeof(data_.sizes_)/sizeof(data_.sizes_[0])); return data_.sizes_[index]; } @@ -454,7 +451,7 @@ class MklDnnShape { /// We don't create primitive_descriptor for TensorFlow layout now. /// We use lazy evaluation and create it only when needed. inline void SetTfLayout(size_t dims, const memory::dims& sizes, - memory::format format) { + memory::format format) { CHECK_EQ(dims, sizes.size()); data_.dimension_ = dims; for (size_t ii = 0; ii < dims; ii++) { @@ -500,7 +497,9 @@ class MklDnnShape { SetTfDimOrder(dimension, data_format); } - inline const mkldnn_dim_t* GetTfToMklDimMap() const { return &data_.map_[0]; } + inline const mkldnn_dim_t* GetTfToMklDimMap() const { + return &data_.map_[0]; + } inline size_t TfDimIdx(int index) const { return data_.map_[index]; } inline int64 TfDimSize(int index) const { return data_.sizes_[TfDimIdx(index)]; @@ -554,7 +553,9 @@ class MklDnnShape { /// Size of buffer to hold the serialized object, the size is computed by /// following above mentioned order - inline size_t GetSerializeBufferSize() const { return sizeof(MklShapeData); } + inline size_t GetSerializeBufferSize() const { + return sizeof(MklShapeData); + } void SerializeMklDnnShape(unsigned char* buf, size_t buf_size) const { CHECK(buf_size >= GetSerializeBufferSize()) @@ -565,12 +566,12 @@ class MklDnnShape { void DeSerializeMklDnnShape(const unsigned char* buf, size_t buf_size) { // Make sure buffer holds at least is_mkl_tensor_. CHECK(buf_size >= sizeof(data_.is_mkl_tensor_)) - << "Buffer size is too small in DeSerializeMklDnnShape"; + << "Buffer size is too small in DeSerializeMklDnnShape"; const bool is_mkl_tensor = *reinterpret_cast(buf); if (is_mkl_tensor) { // If it is an MKL Tensor then read the rest CHECK(buf_size >= GetSerializeBufferSize()) - << "Buffer size is too small in DeSerializeMklDnnShape"; + << "Buffer size is too small in DeSerializeMklDnnShape"; data_ = *reinterpret_cast(buf); } } @@ -659,7 +660,8 @@ inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) { } #ifdef INTEL_MKL_DNN -inline void GetMklShape(OpKernelContext* ctext, int n, MklDnnShape* mklshape) { +inline void GetMklShape(OpKernelContext* ctext, int n, + MklDnnShape* mklshape) { mklshape->DeSerializeMklDnnShape( ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs())) .flat() @@ -698,7 +700,8 @@ inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name, /// Get shape of input tensor pointed by 'input_idx' in TensorShape format. /// If the input tensor is in MKL layout, then obtains TensorShape from /// MklShape. -inline TensorShape GetTfShape(OpKernelContext* context, size_t input_idx) { +inline TensorShape GetTfShape(OpKernelContext* context, + size_t input_idx) { // Sanity check. CHECK_NOTNULL(context); CHECK_LT(input_idx, context->num_inputs()); @@ -818,7 +821,7 @@ inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, template inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, - TensorShape tf_shape) { + TensorShape tf_shape) { OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::v(), tf_shape, tensor_out)); } @@ -1096,8 +1099,7 @@ inline void MklNCHWToNHWC(const Tensor& input, Tensor** output) { /// /// @input None /// @return memory::data_type corresponding to type T -template -static memory::data_type MklDnnType(); +template static memory::data_type MklDnnType(); /// Instantiation for float type. Add similar instantiations for other /// type if needed. @@ -1112,11 +1114,10 @@ memory::data_type MklDnnType() { /// @return: memory::format corresponding to TensorFlow data format; /// Fails with an error if invalid data format. inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) { - if (format == FORMAT_NHWC) - return memory::format::nhwc; - else if (format == FORMAT_NCHW) - return memory::format::nchw; - TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format")); + if (format == FORMAT_NHWC) return memory::format::nhwc; + else if (format == FORMAT_NCHW) return memory::format::nchw; + TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, + "Unsupported data format")); // Return to get rid of compiler warning return memory::format::format_undef; } @@ -1127,11 +1128,10 @@ inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) { /// @return: Tensorflow data format corresponding to memory::format /// Fails with an error if invalid data format. inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) { - if (format == memory::format::nhwc) - return FORMAT_NHWC; - else if (format == memory::format::nchw) - return FORMAT_NCHW; - TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format")); + if (format == memory::format::nhwc) return FORMAT_NHWC; + else if (format == memory::format::nchw) return FORMAT_NCHW; + TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, + "Unsupported data format")); } /// Map TensorShape object into memory::dims required by MKL-DNN @@ -1161,7 +1161,7 @@ inline memory::dims TFShapeToMklDnnDims(const TensorShape& shape) { /// @input TensorShape object in shape /// @return memory::dims in MKL-DNN required NCHW format inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape, - TensorFormat format) { + TensorFormat format) { // Check validity of format. CHECK_NE(TFDataFormatToMklDnnDataFormat(format), memory::format::format_undef); @@ -1237,23 +1237,21 @@ class MklDnnData { const engine* cpu_engine_; public: - explicit MklDnnData(const engine* e) - : user_memory_(nullptr), - reorder_memory_(nullptr), - op_md_(nullptr), - cpu_engine_(e) {} + explicit MklDnnData(const engine* e) : user_memory_(nullptr), + reorder_memory_(nullptr), + op_md_(nullptr), cpu_engine_(e) {} ~MklDnnData() { cpu_engine_ = nullptr; // We don't own this. - delete (user_memory_); - delete (reorder_memory_); - delete (op_md_); + delete(user_memory_); + delete(reorder_memory_); + delete(op_md_); } inline void* GetTensorBuffer(const Tensor* tensor) const { CHECK_NOTNULL(tensor); - return const_cast( - static_cast(tensor->flat().data())); + return const_cast(static_cast( + tensor->flat().data())); } /// Set user memory primitive using specified dimensions, memory format and @@ -1285,7 +1283,7 @@ class MklDnnData { /// @return: memory::desc object corresponding to blocked memory format /// for given dimensions and strides. static inline memory::desc CreateBlockedMemDesc(const memory::dims& dim, - const memory::dims& strides) { + const memory::dims& strides) { CHECK_EQ(dim.size(), strides.size()); // We have to construct memory descriptor in a C style. This is not at all @@ -1354,7 +1352,7 @@ class MklDnnData { CHECK_NOTNULL(cpu_engine_); // TODO(nhasabni): can we remove dynamic memory allocation? if (data_buffer) { - user_memory_ = new memory(pd, data_buffer); + user_memory_ = new memory(pd, data_buffer); } else { user_memory_ = new memory(pd); } diff --git a/tensorflow/core/util/mkl_util_test.cc b/tensorflow/core/util/mkl_util_test.cc index 8b73eadb40..6aef3d86e9 100644 --- a/tensorflow/core/util/mkl_util_test.cc +++ b/tensorflow/core/util/mkl_util_test.cc @@ -54,6 +54,7 @@ TEST(MklUtilTest, MklDnnTfShape) { EXPECT_NE(b_tf_shape_nchw, b_mkldnn_tf_shape); } + TEST(MklUtilTest, MklDnnBlockedFormatTest) { // Let's create 2D tensor of shape {3, 4} with 3 being innermost dimension // first (case 1) and then it being outermost dimension (case 2). diff --git a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java index 313c09e1e4..92cc3bd60e 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java @@ -84,10 +84,11 @@ public class ShapeTest { assertEquals(Shape.scalar(), Shape.scalar()); assertEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 3)); - assertNotEquals(Shape.make(1, 2), null); - assertNotEquals(Shape.make(1, 2), new Object()); + assertNotEquals(Shape.make(1,2), null); + assertNotEquals(Shape.make(1,2), new Object()); assertNotEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 4)); + assertNotEquals(Shape.unknown(), Shape.unknown()); assertNotEquals(Shape.make(-1), Shape.make(-1)); assertNotEquals(Shape.make(1, -1, 3), Shape.make(1, -1, 3)); @@ -102,3 +103,4 @@ public class ShapeTest { assertNotEquals(Shape.make(1, 2).hashCode(), Shape.make(1, 3).hashCode()); } } + diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py index 750af20e8a..3512f66284 100644 --- a/tensorflow/python/estimator/inputs/numpy_io.py +++ b/tensorflow/python/estimator/inputs/numpy_io.py @@ -117,11 +117,11 @@ def numpy_input_fn(x, raise ValueError('y cannot be empty dict, use None instead.') ordered_dict_y = collections.OrderedDict( - sorted(y.items(), key=lambda t: t[0])) + sorted(y.items(), key=lambda t: t[0])) target_keys = list(ordered_dict_y.keys()) duplicate_keys = set(feature_keys).intersection(set(target_keys)) - if duplicate_keys: + if len(duplicate_keys): raise ValueError('{} duplicate keys are found in both x and y: ' '{}'.format(len(duplicate_keys), duplicate_keys)) @@ -131,14 +131,16 @@ def numpy_input_fn(x, ordered_dict_data[target_keys] = y if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1: - shape_dict_of_x = {k: ordered_dict_data[k].shape for k in feature_keys} + shape_dict_of_x = {k: ordered_dict_data[k].shape + for k in feature_keys} if target_keys is None: shape_of_y = None elif isinstance(target_keys, string_types): shape_of_y = y.shape else: - shape_of_y = {k: ordered_dict_data[k].shape for k in target_keys} + shape_of_y = {k: ordered_dict_data[k].shape + for k in target_keys} raise ValueError('Length of tensors in x and y is mismatched. All ' 'elements in x and y must have the same length.\n' @@ -153,12 +155,11 @@ def numpy_input_fn(x, enqueue_size=batch_size, num_epochs=num_epochs) - batch = ( - queue.dequeue_many(batch_size) - if num_epochs is None else queue.dequeue_up_to(batch_size)) + batch = (queue.dequeue_many(batch_size) if num_epochs is None + else queue.dequeue_up_to(batch_size)) # Remove the first `Tensor` in `batch`, which is the row number. - if batch: + if len(batch) > 0: batch.pop(0) features = dict(zip(feature_keys, batch[:len(feature_keys)])) diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py index 1374e3f7e1..65eae7a7dc 100644 --- a/tensorflow/python/estimator/inputs/numpy_io_test.py +++ b/tensorflow/python/estimator/inputs/numpy_io_test.py @@ -255,7 +255,7 @@ class NumpyIoTest(test.TestCase): with self.test_session() as session: input_fn = numpy_io.numpy_input_fn( - x, y, batch_size=2, shuffle=False, num_epochs=1) + x, y, batch_size=2, shuffle=False, num_epochs=1) features_tensor = input_fn() coord = coordinator.Coordinator() @@ -327,7 +327,7 @@ class NumpyIoTest(test.TestCase): with self.test_session() as session: input_fn = numpy_io.numpy_input_fn( - x, y, batch_size=2, shuffle=False, num_epochs=1) + x, y, batch_size=2, shuffle=False, num_epochs=1) features_tensor, targets_tensor = input_fn() coord = coordinator.Coordinator() @@ -362,10 +362,13 @@ class NumpyIoTest(test.TestCase): a = np.arange(4) * 1.0 b = np.arange(32, 36) x = {'a': a, 'b': b} - y = {'y1': np.arange(-32, -28), 'a': a, 'y2': np.arange(32, 28, -1), 'b': b} + y = {'y1': np.arange(-32, -28), + 'a': a, + 'y2': np.arange(32, 28, -1), + 'b': b} with self.test_session(): with self.assertRaisesRegexp( - ValueError, '2 duplicate keys are found in both x and y'): + ValueError, '2 duplicate keys are found in both x and y'): failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False) failing_input_fn() diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 4c026590c2..1610214d54 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -987,9 +987,10 @@ class TensorFlowTestCase(googletest.TestCase): msg: An optional string message to append to the failure message. """ # f1 == f2 is needed here as we might have: f1, f2 = inf, inf - self.assertTrue(f1 == f2 or math.fabs(f1 - f2) <= err, - "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg - if msg is not None else "")) + self.assertTrue( + f1 == f2 or math.fabs(f1 - f2) <= err, + "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg + if msg is not None else "")) def assertArrayNear(self, farray1, farray2, err): """Asserts that two float arrays are near each other. diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 1bf2b70c1b..76b80e60ea 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -114,21 +114,21 @@ class BooleanMaskTest(test_util.TensorFlowTestCase): arr = np.random.rand(*arr_shape) mask = make_mask(arr_shape[:ndims_mask]) if axis is not None: - mask = make_mask(arr_shape[axis:ndims_mask + axis]) + mask = make_mask(arr_shape[axis:ndims_mask+axis]) if axis is None or axis == 0: masked_arr = arr[mask] elif axis == 1: - masked_arr = arr[:, mask] + masked_arr = arr[:,mask] elif axis == 2: - masked_arr = arr[:, :, mask] - with self.test_session(): + masked_arr = arr[:,:,mask] + with self.test_session() as sess: masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis) # Leading dimension size of masked_tensor is always unknown until runtime # since we don't how many elements will be kept. leading = 1 if axis is None else axis + 1 self.assertAllEqual(masked_tensor.get_shape()[leading:], - masked_arr.shape[leading:]) + masked_arr.shape[leading:]) self.assertAllClose(masked_arr, masked_tensor.eval()) @@ -1078,7 +1078,6 @@ class PadTest(test_util.TensorFlowTestCase): [0, 0, 4, 5, 6, 0, 0], [0, 0, 0, 0, 0, 0, 0]]) - class InvertPermutationTest(test_util.TensorFlowTestCase): def testInvertPermutation(self): diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py index 2767df127e..79285476b4 100644 --- a/tensorflow/python/kernel_tests/bincount_op_test.py +++ b/tensorflow/python/kernel_tests/bincount_op_test.py @@ -25,7 +25,6 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import math_ops from tensorflow.python.platform import googletest - class BincountTest(test_util.TensorFlowTestCase): def test_empty(self): @@ -73,7 +72,8 @@ class BincountTest(test_util.TensorFlowTestCase): else: weights = np.random.random(num_samples) self.assertAllClose( - math_ops.bincount(arr, weights).eval(), np.bincount(arr, weights)) + math_ops.bincount(arr, weights).eval(), + np.bincount(arr, weights)) def test_random_without_weights(self): num_samples = 10000 @@ -83,7 +83,8 @@ class BincountTest(test_util.TensorFlowTestCase): arr = np.random.randint(0, 1000, num_samples) weights = np.ones(num_samples).astype(dtype) self.assertAllClose( - math_ops.bincount(arr, None).eval(), np.bincount(arr, weights)) + math_ops.bincount(arr, None).eval(), + np.bincount(arr, weights)) def test_zero_weights(self): with self.test_session(use_gpu=True): diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py index 68817cc256..6cbdd4cbb3 100644 --- a/tensorflow/python/kernel_tests/constant_op_test.py +++ b/tensorflow/python/kernel_tests/constant_op_test.py @@ -439,10 +439,11 @@ class ZerosLikeTest(test.TestCase): def testZerosLikeCPU(self): for dtype in [ - dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int8, - dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, dtypes_lib.int32, - dtypes_lib.int64, dtypes_lib.bool, dtypes_lib.complex64, - dtypes_lib.complex128, dtypes_lib.string + dtypes_lib.float32, dtypes_lib.float64, + dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, + dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool, + dtypes_lib.complex64, dtypes_lib.complex128, + dtypes_lib.string ]: self._compareZeros(dtype, fully_defined_shape=False, use_gpu=False) self._compareZeros(dtype, fully_defined_shape=True, use_gpu=False) @@ -573,10 +574,10 @@ class OnesLikeTest(test.TestCase): def testOnesLike(self): for dtype in [ - dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int8, - dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, dtypes_lib.int32, - dtypes_lib.int64, dtypes_lib.bool, dtypes_lib.complex64, - dtypes_lib.complex128 + dtypes_lib.float32, dtypes_lib.float64, + dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, + dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool, + dtypes_lib.complex64, dtypes_lib.complex128 ]: numpy_dtype = dtype.as_numpy_dtype with self.test_session(): diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py index d92797a7d3..a7e23ead1c 100644 --- a/tensorflow/python/kernel_tests/conv1d_test.py +++ b/tensorflow/python/kernel_tests/conv1d_test.py @@ -52,6 +52,7 @@ class Conv1DTest(test.TestCase): self.assertEqual(len(output), 2) self.assertAllClose(output, [2 * 1 + 1 * 2, 2 * 3 + 1 * 4]) + def testConv1DTranspose(self): with self.test_session(): stride = 2 @@ -92,6 +93,5 @@ class Conv1DTest(test.TestCase): self.assertAllClose(cache_values, value) - if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py index ec8ac74163..116681fc4c 100644 --- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py @@ -68,8 +68,8 @@ class Conv3DTest(test.TestCase): total_size_2 *= s # Initializes the input tensor with array containing numbers from 0 to 1. - # We keep the input tensor values fairly small to avoid overflowing float16 - # during the conv3d. + # We keep the input tensor values fairly small to avoid overflowing a float16 + # tensor during the conv3d x1 = [f * 1.0 / total_size_1 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 / total_size_2 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu): @@ -115,13 +115,15 @@ class Conv3DTest(test.TestCase): if value.dtype == np.float16: tol = 1e-3 - self.assertAllClose(expected, value.flatten(), atol=tol, rtol=tol) + self.assertAllClose(expected, value.flatten(), atol=tol, + rtol=tol) def testConv3D1x1x1Filter(self): expected_output = [ - 0.18518519, 0.22222222, 0.25925926, 0.40740741, 0.5, 0.59259259, - 0.62962963, 0.77777778, 0.92592593, 0.85185185, 1.05555556, 1.25925926, - 1.07407407, 1.33333333, 1.59259259, 1.2962963, 1.61111111, 1.92592593 + 0.18518519, 0.22222222, 0.25925926, 0.40740741, 0.5 , + 0.59259259, 0.62962963, 0.77777778, 0.92592593, 0.85185185, + 1.05555556, 1.25925926, 1.07407407, 1.33333333, 1.59259259, + 1.2962963 , 1.61111111, 1.92592593 ] # These are equivalent to the Conv2D1x1 case. @@ -147,10 +149,10 @@ class Conv3DTest(test.TestCase): # Expected values computed using scipy's correlate function. def testConv3D2x2x2Filter(self): expected_output = [ - 3.77199074, 3.85069444, 3.92939815, 4.2650463, 4.35763889, 4.45023148, - 6.73032407, 6.89236111, 7.05439815, 7.22337963, 7.39930556, 7.57523148, - 9.68865741, 9.93402778, 10.17939815, 10.18171296, 10.44097222, - 10.70023148 + 3.77199074, 3.85069444, 3.92939815, 4.2650463 , 4.35763889, + 4.45023148, 6.73032407, 6.89236111, 7.05439815, 7.22337963, + 7.39930556, 7.57523148, 9.68865741, 9.93402778, 10.17939815, + 10.18171296, 10.44097222, 10.70023148 ] # expected_shape = [1, 3, 1, 2, 5] self._VerifyValues( @@ -162,17 +164,19 @@ class Conv3DTest(test.TestCase): def testConv3DStrides(self): expected_output = [ - 0.06071429, 0.08988095, 0.10238095, 0.11488095, 0.12738095, 0.13988095, - 0.08452381, 0.26071429, 0.35238095, 0.36488095, 0.37738095, 0.38988095, - 0.40238095, 0.23452381, 0.46071429, 0.61488095, 0.62738095, 0.63988095, - 0.65238095, 0.66488095, 0.38452381, 1.12738095, 1.48988095, 1.50238095, - 1.51488095, 1.52738095, 1.53988095, 0.88452381, 1.32738095, 1.75238095, - 1.76488095, 1.77738095, 1.78988095, 1.80238095, 1.03452381, 1.52738095, - 2.01488095, 2.02738095, 2.03988095, 2.05238095, 2.06488095, 1.18452381, - 2.19404762, 2.88988095, 2.90238095, 2.91488095, 2.92738095, 2.93988095, - 1.68452381, 2.39404762, 3.15238095, 3.16488095, 3.17738095, 3.18988095, - 3.20238095, 1.83452381, 2.59404762, 3.41488095, 3.42738095, 3.43988095, - 3.45238095, 3.46488095, 1.98452381 + 0.06071429, 0.08988095, 0.10238095, 0.11488095, 0.12738095, + 0.13988095, 0.08452381, 0.26071429, 0.35238095, 0.36488095, + 0.37738095, 0.38988095, 0.40238095, 0.23452381, 0.46071429, + 0.61488095, 0.62738095, 0.63988095, 0.65238095, 0.66488095, + 0.38452381, 1.12738095, 1.48988095, 1.50238095, 1.51488095, + 1.52738095, 1.53988095, 0.88452381, 1.32738095, 1.75238095, + 1.76488095, 1.77738095, 1.78988095, 1.80238095, 1.03452381, + 1.52738095, 2.01488095, 2.02738095, 2.03988095, 2.05238095, + 2.06488095, 1.18452381, 2.19404762, 2.88988095, 2.90238095, + 2.91488095, 2.92738095, 2.93988095, 1.68452381, 2.39404762, + 3.15238095, 3.16488095, 3.17738095, 3.18988095, 3.20238095, + 1.83452381, 2.59404762, 3.41488095, 3.42738095, 3.43988095, + 3.45238095, 3.46488095, 1.98452381 ] self._VerifyValues( tensor_in_sizes=[1, 5, 8, 7, 1], @@ -183,7 +187,8 @@ class Conv3DTest(test.TestCase): def testConv3D2x2x2FilterStride2(self): expected_output = [ - 3.77199074, 3.85069444, 3.92939815, 9.68865741, 9.93402778, 10.17939815 + 3.77199074, 3.85069444, 3.92939815, 9.68865741, 9.93402778, + 10.17939815 ] self._VerifyValues( tensor_in_sizes=[1, 4, 2, 3, 3], @@ -194,12 +199,14 @@ class Conv3DTest(test.TestCase): def testConv3DStride3(self): expected_output = [ - 1.51140873, 1.57167659, 1.63194444, 1.56349206, 1.62673611, 1.68998016, - 1.6155754, 1.68179563, 1.74801587, 1.9280754, 2.01215278, 2.09623016, - 1.98015873, 2.0672123, 2.15426587, 2.03224206, 2.12227183, 2.21230159, - 4.4280754, 4.65500992, 4.88194444, 4.48015873, 4.71006944, 4.93998016, - 4.53224206, 4.76512897, 4.99801587, 4.84474206, 5.09548611, 5.34623016, - 4.8968254, 5.15054563, 5.40426587, 4.94890873, 5.20560516, 5.46230159 + 1.51140873, 1.57167659, 1.63194444, 1.56349206, 1.62673611, + 1.68998016, 1.6155754 , 1.68179563, 1.74801587, 1.9280754 , + 2.01215278, 2.09623016, 1.98015873, 2.0672123 , 2.15426587, + 2.03224206, 2.12227183, 2.21230159, 4.4280754 , 4.65500992, + 4.88194444, 4.48015873, 4.71006944, 4.93998016, 4.53224206, + 4.76512897, 4.99801587, 4.84474206, 5.09548611, 5.34623016, + 4.8968254 , 5.15054563, 5.40426587, 4.94890873, 5.20560516, + 5.46230159 ] self._VerifyValues( tensor_in_sizes=[1, 6, 7, 8, 2], @@ -210,8 +217,9 @@ class Conv3DTest(test.TestCase): def testConv3D2x2x2FilterStride2Same(self): expected_output = [ - 3.77199074, 3.85069444, 3.92939815, 2.0162037, 2.06597222, 2.11574074, - 9.68865741, 9.93402778, 10.17939815, 4.59953704, 4.73263889, 4.86574074 + 3.77199074, 3.85069444, 3.92939815, 2.0162037 , 2.06597222, + 2.11574074, 9.68865741, 9.93402778, 10.17939815, 4.59953704, + 4.73263889, 4.86574074 ] self._VerifyValues( tensor_in_sizes=[1, 4, 2, 3, 3], @@ -222,8 +230,8 @@ class Conv3DTest(test.TestCase): def testKernelSmallerThanStride(self): expected_output = [ - 0.03703704, 0.11111111, 0.25925926, 0.33333333, 0.7037037, 0.77777778, - 0.92592593, 1. + 0.03703704, 0.11111111, 0.25925926, 0.33333333, 0.7037037 , + 0.77777778, 0.92592593, 1. ] self._VerifyValues( tensor_in_sizes=[1, 3, 3, 3, 1], @@ -239,11 +247,12 @@ class Conv3DTest(test.TestCase): expected=expected_output) expected_output = [ - 0.54081633, 0.58017493, 0.28061224, 0.81632653, 0.85568513, 0.40306122, - 0.41873178, 0.4340379, 0.19642857, 2.46938776, 2.50874636, 1.1377551, - 2.74489796, 2.78425656, 1.26020408, 1.16873178, 1.1840379, 0.51785714, - 1.09511662, 1.10604956, 0.44642857, 1.17164723, 1.18258017, 0.47704082, - 0.3691691, 0.37244898, 0.125 + 0.54081633, 0.58017493, 0.28061224, 0.81632653, 0.85568513, + 0.40306122, 0.41873178, 0.4340379 , 0.19642857, 2.46938776, + 2.50874636, 1.1377551 , 2.74489796, 2.78425656, 1.26020408, + 1.16873178, 1.1840379 , 0.51785714, 1.09511662, 1.10604956, + 0.44642857, 1.17164723, 1.18258017, 0.47704082, 0.3691691 , + 0.37244898, 0.125 ] self._VerifyValues( tensor_in_sizes=[1, 7, 7, 7, 1], @@ -253,8 +262,8 @@ class Conv3DTest(test.TestCase): expected=expected_output) expected_output = [ - 0.540816, 0.580175, 0.816327, 0.855685, 2.469388, 2.508746, 2.744898, - 2.784257 + 0.540816, 0.580175, 0.816327, 0.855685, 2.469388, 2.508746, + 2.744898, 2.784257 ] self._VerifyValues( tensor_in_sizes=[1, 7, 7, 7, 1], @@ -269,7 +278,7 @@ class Conv3DTest(test.TestCase): filter_in_sizes=[2, 1, 2, 1, 2], stride=1, padding="VALID", - expected=[1.5625, 1.875]) + expected=[1.5625, 1.875]) def _ConstructAndTestGradientForConfig( self, batch, input_shape, filter_shape, in_depth, out_depth, stride, @@ -309,6 +318,7 @@ class Conv3DTest(test.TestCase): input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] + for data_type in self._DtypesToTest(use_gpu=use_gpu): # TODO(mjanusz): Modify gradient_checker to also provide max relative # error and synchronize the tolerance levels between the tests for forward @@ -320,11 +330,12 @@ class Conv3DTest(test.TestCase): elif data_type == dtypes.float16: tolerance = 1e-3 + with self.test_session(use_gpu=use_gpu): orig_input_tensor = constant_op.constant( - input_data, shape=input_shape, dtype=data_type, name="input") + input_data, shape=input_shape, dtype=data_type, name="input") filter_tensor = constant_op.constant( - filter_data, shape=filter_shape, dtype=data_type, name="filter") + filter_data, shape=filter_shape, dtype=data_type, name="filter") if data_format == "NCDHW": input_tensor = test_util.NHWCToNCHW(orig_input_tensor) @@ -334,23 +345,25 @@ class Conv3DTest(test.TestCase): new_strides = strides conv = nn_ops.conv3d( - input_tensor, - filter_tensor, - new_strides, - padding, - data_format=data_format, - name="conv") + input_tensor, filter_tensor, new_strides, padding, + data_format=data_format, name="conv") if data_format == "NCDHW": conv = test_util.NCHWToNHWC(conv) + if test_input: - jacob_t, jacob_n = gradient_checker.compute_gradient( - orig_input_tensor, input_shape, conv, output_shape) + jacob_t, jacob_n = gradient_checker.compute_gradient(orig_input_tensor, + input_shape, + conv, + output_shape) else: - jacob_t, jacob_n = gradient_checker.compute_gradient( - filter_tensor, filter_shape, conv, output_shape) - + jacob_t, jacob_n = gradient_checker.compute_gradient(filter_tensor, + filter_shape, + conv, + output_shape) + + if data_type != dtypes.float16: reference_jacob_t = jacob_t err = np.fabs(jacob_t - jacob_n).max() @@ -362,6 +375,7 @@ class Conv3DTest(test.TestCase): print("conv3d gradient error = ", err) self.assertLess(err, tolerance) + def ConstructAndTestGradient(self, **kwargs): for data_format, use_gpu in GetTestConfigs(): self._ConstructAndTestGradientForConfig(data_format=data_format, diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index 6be8997cab..150e2ff7f2 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -18,8 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os import numpy as np +import os from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -1442,6 +1442,7 @@ class PoolingTest(test.TestCase): use_gpu=True, v2=v2) + # Propagate the diff in cases of NaNs os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "1" expected_input_backprop_cudnn = expected_input_backprop_tf_cpu diff --git a/tensorflow/python/kernel_tests/reader_ops_test.py b/tensorflow/python/kernel_tests/reader_ops_test.py index 223a4b2c87..8e54d10f32 100644 --- a/tensorflow/python/kernel_tests/reader_ops_test.py +++ b/tensorflow/python/kernel_tests/reader_ops_test.py @@ -1018,15 +1018,15 @@ class LMDBReaderTest(test.TestCase): with self.test_session() as sess: reader1 = io_ops.LMDBReader(name="test_read_from_same_file1") reader2 = io_ops.LMDBReader(name="test_read_from_same_file2") - filename_queue = input_lib.string_input_producer( - [self.db_path], num_epochs=None) + filename_queue = input_lib.string_input_producer([self.db_path], + num_epochs=None) key1, value1 = reader1.read(filename_queue) key2, value2 = reader2.read(filename_queue) coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) - for _ in range(3): - for _ in range(10): + for i in range(3): + for j in range(10): k1, v1, k2, v2 = sess.run([key1, value1, key2, value2]) self.assertAllEqual(compat.as_bytes(k1), compat.as_bytes(k2)) self.assertAllEqual(compat.as_bytes(v1), compat.as_bytes(v2)) @@ -1054,14 +1054,14 @@ class LMDBReaderTest(test.TestCase): def testReadFromFileRepeatedly(self): with self.test_session() as sess: reader = io_ops.LMDBReader(name="test_read_from_file_repeated") - filename_queue = input_lib.string_input_producer( - [self.db_path], num_epochs=None) + filename_queue = input_lib.string_input_producer([self.db_path], + num_epochs=None) key, value = reader.read(filename_queue) coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) # Iterate over the lmdb 3 times. - for _ in range(3): + for i in range(3): # Go over all 10 records each time. for j in range(10): k, v = sess.run([key, value]) @@ -1071,6 +1071,5 @@ class LMDBReaderTest(test.TestCase): coord.request_stop() coord.join(threads) - if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index 99f9f09690..3a02f24902 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -380,7 +380,7 @@ class UnsortedSegmentSumTest(SegmentReductionHelper): # Replace np_ans[8] with 0 for the value np_ans[8:] = 0 # Replace 8 with -1 in indices - np.place(indices, indices == 8, [-1]) + np.place(indices, indices==8, [-1]) s = math_ops.unsorted_segment_sum( data=tf_x, segment_ids=indices, num_segments=num_segments) tf_ans = s.eval() diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py index 6390b7c518..04758ce45a 100644 --- a/tensorflow/python/kernel_tests/unique_op_test.py +++ b/tensorflow/python/kernel_tests/unique_op_test.py @@ -87,7 +87,6 @@ class UniqueTest(test.TestCase): for i in range(len(x)): self.assertEqual(x[i], tf_y[tf_idx[i]]) - class UniqueWithCountsTest(test.TestCase): def testInt32(self): diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index 83237b8733..4d5fb97845 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -267,34 +267,34 @@ class BatchNormalization(base.Layer): self.axis[idx] = x + 1 # Account for added dimension if self.scale: - self.gamma = self.add_variable( - name='gamma', - shape=param_shape, - dtype=param_dtype, - initializer=self.gamma_initializer, - regularizer=self.gamma_regularizer, - constraint=self.gamma_constraint, - trainable=True) + self.gamma = self.add_variable(name='gamma', + shape=param_shape, + dtype=param_dtype, + initializer=self.gamma_initializer, + regularizer=self.gamma_regularizer, + constraint=self.gamma_constraint, + trainable=True) else: self.gamma = None if self.fused: - self._gamma_const = array_ops.constant( - 1.0, dtype=param_dtype, shape=param_shape) + self._gamma_const = array_ops.constant(1.0, + dtype=param_dtype, + shape=param_shape) if self.center: - self.beta = self.add_variable( - name='beta', - shape=param_shape, - dtype=param_dtype, - initializer=self.beta_initializer, - regularizer=self.beta_regularizer, - constraint=self.beta_constraint, - trainable=True) + self.beta = self.add_variable(name='beta', + shape=param_shape, + dtype=param_dtype, + initializer=self.beta_initializer, + regularizer=self.beta_regularizer, + constraint=self.beta_constraint, + trainable=True) else: self.beta = None if self.fused: - self._beta_const = array_ops.constant( - 0.0, dtype=param_dtype, shape=param_shape) + self._beta_const = array_ops.constant(0.0, + dtype=param_dtype, + shape=param_shape) # Disable variable partitioning when creating the moving mean and variance try: @@ -327,12 +327,11 @@ class BatchNormalization(base.Layer): # stack to be cleared. The nested ones use a `lambda` to set the desired # device and ignore any devices that may be set by the custom getter. def _renorm_variable(name, shape): - var = self.add_variable( - name=name, - shape=shape, - dtype=param_dtype, - initializer=init_ops.zeros_initializer(), - trainable=False) + var = self.add_variable(name=name, + shape=shape, + dtype=param_dtype, + initializer=init_ops.zeros_initializer(), + trainable=False) return var with ops.device(None): diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py index 7c91c3284e..b2876c58c2 100644 --- a/tensorflow/python/layers/normalization_test.py +++ b/tensorflow/python/layers/normalization_test.py @@ -101,13 +101,15 @@ class BNTest(test.TestCase): loss_val = sess.run(loss, feed_dict={image: image_val}) return loss_val - def _trainEvalSequence(self, dtype, train1_use_gpu, train2_use_gpu, + def _trainEvalSequence(self, + dtype, + train1_use_gpu, + train2_use_gpu, infer_use_gpu): batch, height, width, input_channels = 2, 4, 5, 3 shape = [batch, height, width, input_channels] checkpoint = os.path.join(self.get_temp_dir(), 'cp_%s_%s_%s_%s' % - (dtype, train1_use_gpu, train2_use_gpu, - infer_use_gpu)) + (dtype, train1_use_gpu, train2_use_gpu, infer_use_gpu)) self._train( checkpoint, @@ -128,27 +130,30 @@ class BNTest(test.TestCase): dtype=dtype) np.random.seed(0) - image_val = np.random.rand(batch, height, width, input_channels).astype( - dtype.as_numpy_dtype) - loss_val = self._infer( - checkpoint, image_val, shape, use_gpu=infer_use_gpu, is_fused=True) + image_val = np.random.rand(batch, + height, + width, + input_channels).astype(dtype.as_numpy_dtype) + loss_val = self._infer(checkpoint, image_val, shape, + use_gpu=infer_use_gpu, is_fused=True) return train_vars, loss_val def testHalfPrecision(self): - ref_vars, ref_loss = self._trainEvalSequence( - dtype=dtypes.float32, - train1_use_gpu=True, - train2_use_gpu=True, - infer_use_gpu=True) - + ref_vars, ref_loss = self._trainEvalSequence(dtype=dtypes.float32, + train1_use_gpu=True, + train2_use_gpu=True, + infer_use_gpu=True) + self.assertEqual(len(ref_vars), 5) for train1_use_gpu in [True, False]: for train2_use_gpu in [True, False]: for infer_use_gpu in [True, False]: - test_vars, test_loss = self._trainEvalSequence( - dtypes.float16, train1_use_gpu, train2_use_gpu, infer_use_gpu) + test_vars, test_loss = self._trainEvalSequence(dtypes.float16, + train1_use_gpu, + train2_use_gpu, + infer_use_gpu) self.assertEqual(len(test_vars), 5) for test_var, ref_var in zip(test_vars, ref_vars): self.assertAllClose(test_var, ref_var, rtol=1.e-3, atol=1.e-3) @@ -276,8 +281,9 @@ class BNTest(test.TestCase): def testCreateFusedBNFloat16(self): # Call layer. bn = normalization_layers.BatchNormalization(axis=1, fused=True) - inputs = random_ops.random_uniform( - (5, 4, 3, 3), seed=1, dtype=dtypes.float16) + inputs = random_ops.random_uniform((5, 4, 3, 3), + seed=1, + dtype=dtypes.float16) training = array_ops.placeholder(dtype='bool') outputs = bn.apply(inputs, training=training) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 38eff54c69..43238757c7 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1194,19 +1194,18 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None): "Number of mask dimensions must be specified, even if some dimensions" " are None. E.g. shape=[None] is ok, but shape=None is not.") axis = 0 if axis is None else axis - shape_tensor[axis:axis + ndims_mask].assert_is_compatible_with(shape_mask) + shape_tensor[axis:axis+ndims_mask].assert_is_compatible_with(shape_mask) - leading_size = gen_math_ops._prod( - shape(tensor)[axis:axis + ndims_mask], [0]) + leading_size = gen_math_ops._prod(shape(tensor)[axis:axis+ndims_mask], [0]) tensor = reshape(tensor, - concat([ - shape(tensor)[:axis], [leading_size], - shape(tensor)[axis + ndims_mask:] - ], 0)) - first_dim = shape_tensor[axis:axis + ndims_mask].num_elements() + concat([shape(tensor)[:axis], + [leading_size], + shape(tensor)[axis+ndims_mask:]], 0)) + first_dim = shape_tensor[axis:axis+ndims_mask].num_elements() tensor.set_shape( - tensor_shape.as_shape(shape_tensor[:axis]).concatenate([first_dim]) - .concatenate(shape_tensor[axis + ndims_mask:])) + tensor_shape.as_shape(shape_tensor[:axis]) + .concatenate([first_dim]) + .concatenate(shape_tensor[axis+ndims_mask:])) mask = reshape(mask, [-1]) return _apply_mask_1d(tensor, mask, axis) diff --git a/tensorflow/python/ops/distributions/multinomial.py b/tensorflow/python/ops/distributions/multinomial.py index 04762565c2..d49fac59ca 100644 --- a/tensorflow/python/ops/distributions/multinomial.py +++ b/tensorflow/python/ops/distributions/multinomial.py @@ -23,10 +23,10 @@ from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import functional_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops +from tensorflow.python.ops import functional_ops from tensorflow.python.ops.distributions import distribution from tensorflow.python.ops.distributions import util as distribution_util @@ -243,26 +243,25 @@ class Multinomial(distribution.Distribution): n_draws[..., array_ops.newaxis], dtype=self.logits.dtype) * self.logits # flatten the total_count and logits - flat_logits = array_ops.reshape(logits, [-1, k]) # [B1B2...Bm, k] - flat_ndraws = n * array_ops.reshape(n_draws, [-1]) # [B1B2...Bm] + flat_logits = array_ops.reshape(logits, [-1, k]) # [B1B2...Bm, k] + flat_ndraws = n * array_ops.reshape(n_draws, [-1]) # [B1B2...Bm] # computes each total_count and logits situation by map_fn def _sample_single(args): - logits, n_draw = args[0], args[1] # [K], [] - x = random_ops.multinomial(logits[array_ops.newaxis, ...], n_draw, - seed) # [1, n*n_draw] - x = array_ops.reshape(x, shape=[n, -1]) # [n, n_draw] - x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), axis=-2) # [n, k] + logits, n_draw = args[0], args[1] # [K], [] + x = random_ops.multinomial(logits[array_ops.newaxis, ...], + n_draw, seed) # [1, n*n_draw] + x = array_ops.reshape(x, shape=[n, -1]) # [n, n_draw] + x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), axis=-2) # [n, k] return x - - x = functional_ops.map_fn( - _sample_single, [flat_logits, flat_ndraws], - dtype=self.dtype) # [B1B2...Bm, n, k] + x = functional_ops.map_fn(_sample_single, + [flat_logits, flat_ndraws], + dtype=self.dtype) # [B1B2...Bm, n, k] # reshape the results to proper shape x = array_ops.transpose(x, perm=[1, 0, 2]) final_shape = array_ops.concat([[n], self.batch_shape_tensor(), [k]], 0) - x = array_ops.reshape(x, final_shape) # [n, B1, B2,..., Bm, k] + x = array_ops.reshape(x, final_shape) # [n, B1, B2,..., Bm, k] return x @distribution_util.AppendDocstring(_multinomial_sample_note) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index b9c89d62d5..7c23321ca5 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1119,8 +1119,9 @@ def rgb_to_grayscale(images, name=None): # https://en.wikipedia.org/wiki/Luma_%28video%29 rgb_weights = [0.2989, 0.5870, 0.1140] rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0) - gray_float = math_ops.reduce_sum( - flt_image * rgb_weights, rank_1, keepdims=True) + gray_float = math_ops.reduce_sum(flt_image * rgb_weights, + rank_1, + keepdims=True) gray_float.set_shape(images.get_shape()[:-1].concatenate([1])) return convert_image_dtype(gray_float, orig_dtype, name=name) diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index be9beee633..14a039ffd0 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -30,7 +30,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops.gen_linalg_ops import * # pylint: enable=wildcard-import from tensorflow.python.util import compat -from tensorflow.python.util import deprecation +from tensorflow.python.util.deprecation import deprecated_args # Names below are lower_case. # pylint: disable=invalid-name @@ -439,13 +439,9 @@ def svd(tensor, full_matrices=False, compute_uv=True, name=None): # pylint: disable=redefined-builtin -@deprecation.deprecated_args( - None, 'keep_dims is deprecated, use keepdims instead', 'keep_dims') -def norm(tensor, - ord='euclidean', - axis=None, - keepdims=None, - name=None, +@deprecated_args(None, "keep_dims is deprecated, use keepdims instead", + "keep_dims") +def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None, keep_dims=None): r"""Computes the norm of vectors, matrices, and tensors. @@ -482,7 +478,6 @@ def norm(tensor, keepdims: If True, the axis indicated in `axis` are kept with size 1. Otherwise, the dimensions in `axis` are removed from the output shape. name: The name of the op. - keep_dims: Deprecated alias for `keepdims`. Returns: output: A `Tensor` of the same type as tensor, containing the vector or @@ -505,8 +500,11 @@ def norm(tensor, higher order tensors. @end_compatibility """ - keepdims = deprecation.deprecated_argument_lookup('keepdims', keepdims, - 'keep_dims', keep_dims) + + if keep_dims is not None: + if keepdims is not None: + raise ValueError("Cannot specify both 'keep_dims' and 'keepdims'") + keepdims = keep_dims if keepdims is None: keepdims = False @@ -557,8 +555,8 @@ def norm(tensor, else: # General p-norms (positive p only) result = math_ops.pow( - math_ops.reduce_sum(math_ops.pow(result, ord), axis, keepdims=True), - 1.0 / ord) + math_ops.reduce_sum( + math_ops.pow(result, ord), axis, keepdims=True), 1.0 / ord) if not keepdims: result = array_ops.squeeze(result, axis) return result diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py index e04121ee31..d30f6b92ad 100644 --- a/tensorflow/python/ops/metrics_impl.py +++ b/tensorflow/python/ops/metrics_impl.py @@ -792,10 +792,9 @@ def mean_cosine_distance(labels, predictions, dim, weights=None, predictions, labels, weights = _remove_squeezable_dimensions( predictions=predictions, labels=labels, weights=weights) radial_diffs = math_ops.multiply(predictions, labels) - radial_diffs = math_ops.reduce_sum( - radial_diffs, reduction_indices=[ - dim, - ], keepdims=True) + radial_diffs = math_ops.reduce_sum(radial_diffs, + reduction_indices=[dim,], + keepdims=True) mean_distance, update_op = mean(radial_diffs, weights, None, None, diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 654eb1c118..da037a7983 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -333,7 +333,6 @@ def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None): epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the divisor if `norm < sqrt(epsilon)`. name: A name for this operation (optional). - dim: Deprecated alias for axis. Returns: A `Tensor` with the same shape as `x`. diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index ec7b9372ca..61fa462988 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -23,6 +23,7 @@ import numbers import numpy as np from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import graph_util from tensorflow.python.framework import ops @@ -37,10 +38,11 @@ from tensorflow.python.ops import random_ops # pylint: disable=wildcard-import from tensorflow.python.ops.gen_nn_ops import * # pylint: enable=wildcard-import +from tensorflow.python.util.deprecation import deprecated_args +from tensorflow.python.util.deprecation import deprecated_argument_lookup from tensorflow.python.util import deprecation - # Aliases for some automatically-generated names. local_response_normalization = gen_nn_ops.lrn @@ -1646,7 +1648,7 @@ def _softmax(logits, compute_op, dim=-1, name=None): return output -@deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim") +@deprecated_args(None, "dim is deprecated, use axis instead", "dim") def softmax(logits, axis=None, name=None, dim=None): """Computes softmax activations. @@ -1660,7 +1662,6 @@ def softmax(logits, axis=None, name=None, dim=None): axis: The dimension softmax would be performed on. The default is -1 which indicates the last dimension. name: A name for the operation (optional). - dim: Deprecated alias for `axis`. Returns: A `Tensor`. Has the same type and shape as `logits`. @@ -1669,13 +1670,13 @@ def softmax(logits, axis=None, name=None, dim=None): InvalidArgumentError: if `logits` is empty or `axis` is beyond the last dimension of `logits`. """ - axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim) + axis = deprecated_argument_lookup("axis", axis, "dim", dim) if axis is None: axis = -1 return _softmax(logits, gen_nn_ops._softmax, axis, name) -@deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim") +@deprecated_args(None, "dim is deprecated, use axis instead", "dim") def log_softmax(logits, axis=None, name=None, dim=None): """Computes log softmax activations. @@ -1689,7 +1690,6 @@ def log_softmax(logits, axis=None, name=None, dim=None): axis: The dimension softmax would be performed on. The default is -1 which indicates the last dimension. name: A name for the operation (optional). - dim: Deprecated alias for `axis`. Returns: A `Tensor`. Has the same type as `logits`. Same shape as `logits`. @@ -1698,7 +1698,7 @@ def log_softmax(logits, axis=None, name=None, dim=None): InvalidArgumentError: if `logits` is empty or `axis` is beyond the last dimension of `logits`. """ - axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim) + axis = deprecated_argument_lookup("axis", axis, "dim", dim) if axis is None: axis = -1 return _softmax(logits, gen_nn_ops._log_softmax, axis, name) @@ -2316,14 +2316,13 @@ def conv1d(value, filters, stride, padding, return array_ops.squeeze(result, [spatial_start_dim]) -def conv1d_transpose( - value, - filter, # pylint: disable=redefined-builtin - output_shape, - stride, - padding="SAME", - data_format="NWC", - name=None): +def conv1d_transpose(value, + filter, + output_shape, + stride, + padding="SAME", + data_format="NWC", + name=None): """The transpose of `conv1d`. This operation is sometimes called "deconvolution" after [Deconvolutional @@ -2358,8 +2357,8 @@ def conv1d_transpose( [value, filter, output_shape]) as name: output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape") if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(3)): - raise ValueError("output_shape must have shape (3,), got {}".format( - output_shape_.get_shape())) + raise ValueError("output_shape must have shape (3,), got {}" + .format(output_shape_.get_shape())) # The format could be either NWC or NCW, map to NHWC or NCHW if data_format is None or data_format == "NWC": @@ -2381,8 +2380,7 @@ def conv1d_transpose( if not filter.get_shape()[1].is_compatible_with(output_shape[axis]): raise ValueError( "output_shape does not match filter's output channels, " - "{} != {}".format(output_shape[axis], - filter.get_shape()[1])) + "{} != {}".format(output_shape[axis], filter.get_shape()[1])) if padding != "VALID" and padding != "SAME": raise ValueError("padding must be either VALID or SAME:" @@ -2390,26 +2388,25 @@ def conv1d_transpose( # Reshape the input tensor to [batch, 1, in_width, in_channels] if data_format_2d == "NHWC": - output_shape_ = array_ops.concat( - [output_shape_[:1], [1], output_shape_[1:]], axis=0) + output_shape_ = array_ops.concat([output_shape_[:1], [1], + output_shape_[1:]], axis=0) spatial_start_dim = 1 strides = [1, 1, stride, 1] else: - output_shape_ = array_ops.concat( - [output_shape_[:2], [1], output_shape_[2:]], axis=0) + output_shape_ = array_ops.concat([output_shape_[:2], [1], + output_shape_[2:]], axis=0) spatial_start_dim = 2 strides = [1, 1, 1, stride] value = array_ops.expand_dims(value, spatial_start_dim) filter = array_ops.expand_dims(filter, 0) - result = gen_nn_ops.conv2d_backprop_input( - input_sizes=output_shape_, - filter=filter, - out_backprop=value, - strides=strides, - padding=padding, - data_format=data_format_2d, - name=name) + result = gen_nn_ops.conv2d_backprop_input(input_sizes=output_shape_, + filter=filter, + out_backprop=value, + strides=strides, + padding=padding, + data_format=data_format_2d, + name=name) return array_ops.squeeze(result, [spatial_start_dim]) diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc index 44144a0613..43d2d3cd48 100644 --- a/tensorflow/stream_executor/dnn.cc +++ b/tensorflow/stream_executor/dnn.cc @@ -470,7 +470,6 @@ string ConvolutionDescriptor::ToShortString() const { PoolingDescriptor::PoolingDescriptor(int ndims) : mode_(dnn::PoolingMode::kMaximum), ndims_(ndims), - propagate_nans_(false), window_(ndims, 0), padding_(ndims, 0), strides_(ndims, 1) {} diff --git a/third_party/sycl/crosstool/trisycl.tpl b/third_party/sycl/crosstool/trisycl.tpl index 87a70d8f95..b470772fbf 100644 --- a/third_party/sycl/crosstool/trisycl.tpl +++ b/third_party/sycl/crosstool/trisycl.tpl @@ -11,12 +11,10 @@ CPU_C_COMPILER = ('%{host_c_compiler}') CURRENT_DIR = os.path.dirname(sys.argv[0]) TRISYCL_INCLUDE_DIR = CURRENT_DIR + '/../sycl/include' - def main(): compiler_flags = [] - remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable', - '-Wignored-attributes', '-fno-exceptions') + remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable', '-Wignored-attributes', '-fno-exceptions') # remove -fsamotoze-coverage from string with g++ if 'g++' in CPU_CXX_COMPILER: remove_flags += ('-fsanitize-coverage',) @@ -24,62 +22,52 @@ def main(): else: compiler_flags += ['-fopenmp=libomp'] - compiler_flags += [ - flag for flag in sys.argv[1:] if not flag.startswith(remove_flags) - ] + compiler_flags += [flag for flag in sys.argv[1:] if not flag.startswith(remove_flags)] + output_file_index = compiler_flags.index('-o') + 1 output_file_name = compiler_flags[output_file_index] - if (output_file_index == 1): + if(output_file_index == 1): # we are linking - return call([CPU_CXX_COMPILER] + compiler_flags + ['-Wl,--no-undefined']) + return call([CPU_CXX_COMPILER] + compiler_flags + + ['-Wl,--no-undefined']) # find what we compile compiling_cpp = 0 - if ('-c' in compiler_flags): - compiled_file_index = compiler_flags.index('-c') + 1 - compiled_file_name = compiler_flags[compiled_file_index] - if (compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', '.C', - '.cxx'))): - compiling_cpp = 1 - - debug_flags = [ - '-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL', - '-lpthread', '-lboost_log', '-g', '-rdynamic' - ] + if('-c' in compiler_flags): + compiled_file_index = compiler_flags.index('-c') + 1 + compiled_file_name = compiler_flags[compiled_file_index] + if(compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', + '.C', '.cxx'))): + compiling_cpp = 1; + + debug_flags = ['-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL', '-lpthread', '-lboost_log', '-g', '-rdynamic'] opt_flags = ['-DNDEBUG', '-DBOOST_DISABLE_ASSERTS', '-O3'] - compiler_flags = compiler_flags + [ - '-DEIGEN_USE_SYCL=1', '-DEIGEN_HAS_C99_MATH', - '-DEIGEN_MAX_ALIGN_BYTES=16', '-DTENSORFLOW_USE_SYCL' - ] + opt_flags + compiler_flags = compiler_flags + ['-DEIGEN_USE_SYCL=1', + '-DEIGEN_HAS_C99_MATH', + '-DEIGEN_MAX_ALIGN_BYTES=16', + '-DTENSORFLOW_USE_SYCL'] + opt_flags - if (compiling_cpp == 1): + if(compiling_cpp == 1): # create a blacklist of folders that will be skipped when compiling # with triSYCL - skip_extensions = ['.cu.cc'] - skip_folders = [ - 'tensorflow/compiler', 'tensorflow/docs_src', 'tensorflow/tensorboard', - 'third_party', 'external', 'hexagon' - ] + skip_extensions = [".cu.cc"] + skip_folders = ["tensorflow/compiler", "tensorflow/docs_src", "tensorflow/tensorboard", "third_party", "external", "hexagon"] skip_folders = [(folder + '/') for folder in skip_folders] # if compiling external project skip triSYCL - if any( - compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any( - _folder in output_file_name for _folder in skip_folders): + if any(compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any(_folder in output_file_name for _folder in skip_folders): return call([CPU_CXX_COMPILER] + compiler_flags) - host_compiler_flags = [ - '-xc++', '-Wno-unused-variable', '-I', TRISYCL_INCLUDE_DIR - ] + compiler_flags + host_compiler_flags = ['-xc++', '-Wno-unused-variable', + '-I', TRISYCL_INCLUDE_DIR] + compiler_flags x = call([CPU_CXX_COMPILER] + host_compiler_flags) return x else: # compile for C return call([CPU_C_COMPILER] + compiler_flags) - if __name__ == '__main__': sys.exit(main()) diff --git a/third_party/sycl/sycl_configure.bzl b/third_party/sycl/sycl_configure.bzl index 5b9d0eb383..a0c9e4e43a 100644 --- a/third_party/sycl/sycl_configure.bzl +++ b/third_party/sycl/sycl_configure.bzl @@ -67,6 +67,7 @@ def find_computecpp_root(repository_ctx): def find_trisycl_include_dir(repository_ctx): """Find triSYCL include directory. """ + sycl_name = "" if _TRISYCL_INCLUDE_DIR in repository_ctx.os.environ: sycl_name = repository_ctx.os.environ[_TRISYCL_INCLUDE_DIR].strip() if sycl_name.startswith("/"): -- GitLab From e70c00950d295c519fd9c7f8b12e13a3c5aaf710 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 22 Nov 2017 00:39:22 -0800 Subject: [PATCH 0747/1801] Automated g4 rollback of changelist 176615107 PiperOrigin-RevId: 176622438 --- CODE_OF_CONDUCT.md | 6 +- README.md | 4 +- configure.py | 38 +- tensorflow/BUILD | 16 - tensorflow/compiler/aot/tfcompile.bzl | 11 +- tensorflow/compiler/tests/BUILD | 2 +- .../compiler/tests/fused_batchnorm_test.py | 25 +- tensorflow/compiler/xla/service/BUILD | 2 + .../compiler/xla/service/hlo_instruction.h | 2 +- .../xla/service/hlo_instruction_test.cc | 4 +- tensorflow/contrib/batching/BUILD | 1 - .../contrib/batching/kernels/batch_kernels.cc | 2 +- .../kernel_tests/csiszar_divergence_test.py | 2 +- tensorflow/contrib/cmake/CMakeLists.txt | 147 +--- .../contrib/cmake/external/boringssl.cmake | 6 +- .../contrib/cmake/external/jsoncpp.cmake | 6 +- tensorflow/contrib/cmake/external/lmdb.cmake | 6 +- tensorflow/contrib/cmake/external/png.cmake | 6 +- .../contrib/cmake/external/protobuf.cmake | 6 +- tensorflow/contrib/cmake/external/re2.cmake | 8 +- .../contrib/cmake/external/snappy.cmake | 8 +- .../contrib/cmake/external/sqlite.cmake | 6 +- tensorflow/contrib/cmake/external/zlib.cmake | 6 +- tensorflow/contrib/cmake/tf_cc_ops.cmake | 36 +- .../contrib/cmake/tf_core_kernels.cmake | 23 +- .../cmake/tf_label_image_example.cmake | 5 - tensorflow/contrib/cmake/tf_python.cmake | 38 +- tensorflow/contrib/cmake/tf_shared_lib.cmake | 45 +- .../contrib/cmake/tf_stream_executor.cmake | 3 - tensorflow/contrib/cmake/tf_tools.cmake | 13 +- tensorflow/contrib/cmake/tf_tutorials.cmake | 5 - tensorflow/contrib/crf/python/ops/crf.py | 19 +- .../contrib/data/python/kernel_tests/BUILD | 8 +- tensorflow/contrib/distributions/BUILD | 17 - tensorflow/contrib/distributions/__init__.py | 2 - .../python/kernel_tests/cauchy_test.py | 437 ----------- .../distributions/python/ops/cauchy.py | 223 ------ .../python/examples/notebooks/1_basics.ipynb | 4 +- .../examples/notebooks/2_gradients.ipynb | 6 +- .../examples/notebooks/3_datasets.ipynb | 10 +- .../contrib/layers/python/layers/layers.py | 18 +- .../layers/python/layers/layers_test.py | 73 +- .../learn/python/learn/estimators/head.py | 2 +- .../learn/python/learn/estimators/model_fn.py | 6 +- .../python/learn/learn_io/data_feeder.py | 12 +- .../linear_optimizer/python/ops/sdca_ops.py | 11 +- tensorflow/contrib/lite/python/BUILD | 1 - .../contrib/lite/testing/generate_examples.py | 17 +- tensorflow/contrib/lite/toco/python/BUILD | 1 - tensorflow/contrib/makefile/Makefile | 3 +- tensorflow/contrib/makefile/README.md | 41 +- tensorflow/contrib/makefile/build_all_ios.sh | 54 +- .../contrib/makefile/compile_ios_protobuf.sh | 369 ++++------ .../makefile/compile_ios_tensorflow.sh | 155 ++-- tensorflow/contrib/makefile/compile_nsync.sh | 5 +- tensorflow/contrib/nn/__init__.py | 2 - tensorflow/contrib/opt/BUILD | 18 - tensorflow/contrib/opt/__init__.py | 5 +- .../training/multitask_optimizer_wrapper.py | 138 ---- .../multitask_optimizer_wrapper_test.py | 119 --- .../python/kernel_tests/core_rnn_cell_test.py | 42 -- .../rnn/python/kernel_tests/rnn_cell_test.py | 44 -- tensorflow/contrib/rnn/python/ops/rnn_cell.py | 344 +-------- .../seq2seq/python/ops/attention_wrapper.py | 51 +- tensorflow/contrib/slim/README.md | 2 +- .../slim/python/slim/nets/resnet_v1_test.py | 2 +- tensorflow/contrib/verbs/README.md | 14 +- tensorflow/contrib/verbs/rdma.cc | 413 ++--------- tensorflow/contrib/verbs/rdma.h | 40 +- tensorflow/core/BUILD | 1 - .../core/common_runtime/mkl_cpu_allocator.h | 2 +- .../core/common_runtime/sycl/sycl_device.h | 22 +- tensorflow/core/graph/graph.cc | 15 - tensorflow/core/graph/graph.h | 5 - tensorflow/core/graph/graph_partition.cc | 4 +- tensorflow/core/graph/graph_test.cc | 64 +- tensorflow/core/graph/mkl_graph_util.h | 179 +++-- tensorflow/core/graph/mkl_layout_pass.cc | 2 +- .../core/graph/mkl_tfconversion_pass.cc | 4 +- .../core/grappler/costs/graph_properties.h | 6 - tensorflow/core/grappler/utils.cc | 2 +- tensorflow/core/kernels/BUILD | 31 +- tensorflow/core/kernels/avgpooling_op.cc | 7 +- tensorflow/core/kernels/bincount_op.cc | 115 ++- tensorflow/core/kernels/bincount_op.h | 41 -- tensorflow/core/kernels/bincount_op_gpu.cu.cc | 114 --- tensorflow/core/kernels/bincount_op_test.cc | 75 -- tensorflow/core/kernels/bucketize_op.cc | 66 +- tensorflow/core/kernels/bucketize_op.h | 41 -- .../core/kernels/bucketize_op_gpu.cu.cc | 101 --- tensorflow/core/kernels/conv_grad_ops_3d.cc | 42 +- tensorflow/core/kernels/conv_ops_3d.cc | 5 - tensorflow/core/kernels/cwise_op_acosh.cc | 12 +- tensorflow/core/kernels/cwise_op_asinh.cc | 14 +- tensorflow/core/kernels/cwise_op_atanh.cc | 14 +- tensorflow/core/kernels/cwise_ops.h | 12 - .../core/kernels/depthwise_conv_grad_op.cc | 10 +- tensorflow/core/kernels/depthwise_conv_op.cc | 10 +- tensorflow/core/kernels/depthwise_conv_op.h | 4 +- .../core/kernels/depthwise_conv_op_gpu.cu.cc | 19 +- .../kernels/dynamic_partition_op_gpu.cu.cc | 376 ---------- .../core/kernels/dynamic_partition_op_test.cc | 58 -- .../core/kernels/fused_batch_norm_op.cc | 70 +- tensorflow/core/kernels/fused_batch_norm_op.h | 22 +- tensorflow/core/kernels/lmdb_reader_op.cc | 7 +- tensorflow/core/kernels/maxpooling_op.cc | 47 +- .../core/kernels/maxpooling_op_gpu.cu.cc | 40 +- tensorflow/core/kernels/maxpooling_op_gpu.h | 2 +- .../core/kernels/mkl_conv_grad_filter_ops.cc | 78 +- .../core/kernels/mkl_conv_grad_input_ops.cc | 86 ++- tensorflow/core/kernels/mkl_conv_ops.cc | 82 +-- tensorflow/core/kernels/mkl_conv_ops.h | 140 ++-- tensorflow/core/kernels/mkl_tfconv_op.h | 80 +- tensorflow/core/kernels/pooling_ops_common.cc | 10 +- .../core/kernels/pooling_ops_common_gpu.h | 4 +- tensorflow/core/kernels/quantized_add_op.cc | 2 +- tensorflow/core/kernels/random_op.cc | 4 +- .../core/kernels/segment_reduction_ops.cc | 3 - .../core/kernels/segment_reduction_ops.h | 36 +- tensorflow/core/kernels/shape_ops.cc | 43 +- tensorflow/core/kernels/shape_ops.h | 13 +- tensorflow/core/kernels/slice_op.cc | 116 +-- tensorflow/core/kernels/slice_op.h | 109 +-- tensorflow/core/kernels/slice_op_gpu.cu.cc | 56 -- tensorflow/core/kernels/strided_slice_op.cc | 1 + .../core/kernels/strided_slice_op_impl.h | 25 +- .../core/kernels/strided_slice_op_test.cc | 49 -- tensorflow/core/kernels/transpose_op.cc | 35 +- tensorflow/core/kernels/unique_op.cc | 113 +-- tensorflow/core/ops/array_ops.cc | 44 +- tensorflow/core/ops/math_ops.cc | 2 - tensorflow/core/ops/nn_ops.cc | 12 +- tensorflow/core/ops/ops.pbtxt | 5 - .../core/platform/default/build_config/BUILD | 20 +- .../core/platform/default/notification.h | 2 +- tensorflow/core/platform/posix/error.cc | 11 +- tensorflow/core/platform/posix/port.cc | 6 +- tensorflow/core/public/version.h | 2 +- tensorflow/core/util/mkl_util.h | 691 ++---------------- tensorflow/core/util/mkl_util_test.cc | 92 --- .../api_guides/python/threading_and_queues.md | 2 +- .../docs_src/get_started/get_started.md | 6 +- tensorflow/docs_src/get_started/input_fn.md | 6 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 18 +- tensorflow/docs_src/install/install_linux.md | 22 +- tensorflow/docs_src/install/install_mac.md | 10 +- .../docs_src/install/install_sources.md | 19 +- tensorflow/docs_src/mobile/prepare_models.md | 2 +- .../docs_src/programmers_guide/debugger.md | 19 +- .../docs_src/programmers_guide/tensors.md | 12 +- tensorflow/examples/speech_commands/models.py | 2 +- tensorflow/go/android.go | 20 - tensorflow/go/operation_test.go | 8 - tensorflow/go/tensor.go | 9 +- tensorflow/go/tensor_test.go | 9 +- .../src/main/java/org/tensorflow/Shape.java | 32 - .../test/java/org/tensorflow/ShapeTest.java | 26 - tensorflow/python/BUILD | 4 - tensorflow/python/estimator/canned/head.py | 2 +- .../python/estimator/inputs/numpy_io.py | 83 +-- .../python/estimator/inputs/numpy_io_test.py | 87 --- tensorflow/python/framework/ops.py | 4 - tensorflow/python/framework/tensor_util.py | 1 - tensorflow/python/framework/test_util.py | 3 +- .../python/kernel_tests/array_ops_test.py | 52 +- .../python/kernel_tests/bincount_op_test.py | 25 +- .../python/kernel_tests/bucketize_op_test.py | 8 +- .../python/kernel_tests/constant_op_test.py | 14 +- tensorflow/python/kernel_tests/conv1d_test.py | 43 -- .../python/kernel_tests/conv_ops_3d_test.py | 267 ++++--- .../kernel_tests/depthwise_conv_op_test.py | 20 +- .../python/kernel_tests/distributions/BUILD | 1 - .../distributions/multinomial_test.py | 12 +- .../kernel_tests/dynamic_partition_op_test.py | 106 +-- .../python/kernel_tests/pooling_ops_test.py | 60 +- .../python/kernel_tests/reader_ops_test.py | 41 -- .../segment_reduction_ops_test.py | 29 +- .../python/kernel_tests/shape_ops_test.py | 10 - .../python/kernel_tests/slice_op_test.py | 25 +- .../python/kernel_tests/unique_op_test.py | 26 - tensorflow/python/layers/base.py | 8 +- tensorflow/python/layers/convolutional.py | 2 - tensorflow/python/layers/normalization.py | 22 +- .../python/layers/normalization_test.py | 98 +-- tensorflow/python/ops/array_ops.py | 38 +- .../python/ops/distributions/dirichlet.py | 2 +- .../python/ops/distributions/multinomial.py | 49 +- tensorflow/python/ops/image_ops_impl.py | 23 +- tensorflow/python/ops/linalg_ops.py | 31 +- tensorflow/python/ops/math_grad_test.py | 17 - tensorflow/python/ops/math_ops.py | 258 +++---- tensorflow/python/ops/metrics_impl.py | 2 +- .../python/ops/nn_fused_batchnorm_test.py | 119 ++- tensorflow/python/ops/nn_impl.py | 16 +- tensorflow/python/ops/nn_ops.py | 125 +--- tensorflow/python/ops/variables.py | 4 +- .../python/tools/import_pb_to_tensorboard.py | 0 tensorflow/stream_executor/cuda/cuda_dnn.cc | 9 +- tensorflow/stream_executor/dnn.cc | 13 +- tensorflow/stream_executor/dnn.h | 6 - .../tools/api/golden/tensorflow.linalg.pbtxt | 2 +- .../tools/api/golden/tensorflow.nn.pbtxt | 6 +- tensorflow/tools/api/golden/tensorflow.pbtxt | 22 +- .../tools/ci_build/ci_parameterized_build.sh | 2 +- .../tools/ci_build/install/install_golang.sh | 2 +- .../ci_build/linux/libtensorflow_docker.sh | 2 +- .../tools/ci_build/osx/libtensorflow_cpu.sh | 2 +- .../tools/ci_build/osx/libtensorflow_gpu.sh | 2 +- .../tools/ci_build/pi/build_raspberry_pi.sh | 6 - .../ci_build/windows/bazel/bazel_test_lib.sh | 4 +- .../docker/Dockerfile.devel-gpu-cuda9-cudnn7 | 7 +- tensorflow/tools/docker/Dockerfile.gpu | 2 +- tensorflow/tools/docker/README.md | 14 - tensorflow/tools/graph_transforms/BUILD | 2 - .../tools/graph_transforms/quantize_nodes.cc | 2 - tensorflow/tools/pip_package/setup.py | 2 +- third_party/aws.BUILD | 3 - third_party/curl.BUILD | 1 + third_party/sycl/crosstool/CROSSTOOL.tpl | 8 +- third_party/sycl/crosstool/trisycl.tpl | 73 -- third_party/sycl/sycl/BUILD.tpl | 17 +- third_party/sycl/sycl/build_defs.bzl.tpl | 17 +- third_party/sycl/sycl_configure.bzl | 86 +-- third_party/zlib.BUILD | 2 +- tools/bazel.rc | 7 +- util/python/BUILD | 2 +- 228 files changed, 1807 insertions(+), 7328 deletions(-) delete mode 100644 tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py delete mode 100644 tensorflow/contrib/distributions/python/ops/cauchy.py delete mode 100644 tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py delete mode 100644 tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py delete mode 100644 tensorflow/core/kernels/bincount_op.h delete mode 100644 tensorflow/core/kernels/bincount_op_gpu.cu.cc delete mode 100644 tensorflow/core/kernels/bincount_op_test.cc delete mode 100644 tensorflow/core/kernels/bucketize_op.h delete mode 100644 tensorflow/core/kernels/bucketize_op_gpu.cu.cc delete mode 100644 tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc delete mode 100644 tensorflow/core/util/mkl_util_test.cc delete mode 100644 tensorflow/go/android.go mode change 100755 => 100644 tensorflow/python/tools/import_pb_to_tensorboard.py delete mode 100644 third_party/sycl/crosstool/trisycl.tpl diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index ff11d13140..10fd595fec 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -42,7 +42,7 @@ The Code of Conduct also applies within project spaces and in public spaces when Conflicts in an open source project can take many forms, from someone having a bad day and using harsh and hurtful language in the issue queue, to more serious instances such as sexist/racist statements or threats of violence, and everything in between. -If the behavior is threatening or harassing, or for other reasons requires immediate escalation, please see below. +If the behaviour is threatening or harassing, or for other reasons requires immediate escalation, please see below. However, for the vast majority of issues, we aim to empower individuals to first resolve conflicts themselves, asking for help when needed, and only after that fails to escalate further. This approach gives people more control over the outcome of their dispute. @@ -55,14 +55,14 @@ If you are experiencing or witnessing conflict, we ask you to use the following ## Reporting Violations -Violations of the Code of Conduct can be reported to TensorFlow’s Project Stewards, Edd Wilder-James (ewj@google.com) and Sarah Novotny (sarahnovotny@google.com). The Project Steward will determine whether the Code of Conduct was violated, and will issue an appropriate sanction, possibly including a written warning or expulsion from the project, project sponsored spaces, or project forums. We ask that you make a good-faith effort to resolve your conflict via the conflict resolution policy before submitting a report. +Violations of the Code of Conduct can be reported to TensorFlow’s Project Steward at conduct@tensorflow.org. The Project Steward will determine whether the Code of Conduct was violated, and will issue an appropriate sanction, possibly including a written warning or expulsion from the project, project sponsored spaces, or project forums. We ask that you make a good-faith effort to resolve your conflict via the conflict resolution policy before submitting a report. Violations of the Code of Conduct can occur in any setting, even those unrelated to the project. We will only consider complaints about conduct that has occurred within one year of the report. ## Enforcement -If the Project Stewards receive a report alleging a violation of the Code of Conduct, the Project Stewards will notify the accused of the report, and provide them an opportunity to discuss the report before a sanction is issued. The Project Stewards will do their utmost to keep the reporter anonymous. If the act is ongoing (such as someone engaging in harassment), or involves a threat to anyone's safety (e.g. threats of violence), the Project Stewards may issue sanctions without notice. +If the Project Steward receives a report alleging a violation of the Code of Conduct, the Project Steward will notify the accused of the report, and provide them an opportunity to discuss the report before a sanction is issued. The Project Steward will do their utmost to keep the reporter anonymous. If the act is ongoing (such as someone engaging in harassment), or involves a threat to anyone's safety (e.g. threats of violence), the Project Steward may issue sanctions without notice. ## Attribution diff --git a/README.md b/README.md index aff3427bdd..24bbb6cec1 100644 --- a/README.md +++ b/README.md @@ -73,11 +73,11 @@ $ python ## For more information -* [TensorFlow Website](https://www.tensorflow.org) +* [TensorFlow website](https://www.tensorflow.org) * [TensorFlow White Papers](https://www.tensorflow.org/about/bib) * [TensorFlow Model Zoo](https://github.com/tensorflow/models) * [TensorFlow MOOC on Udacity](https://www.udacity.com/course/deep-learning--ud730) -* [TensorFlow Course at Stanford](https://web.stanford.edu/class/cs20si) +* [TensorFlow course at Stanford](https://web.stanford.edu/class/cs20si) Learn more about the TensorFlow community at the [community page of tensorflow.org](https://www.tensorflow.org/community) for a few ways to participate. diff --git a/configure.py b/configure.py index 26da09bd94..0d1afbfe15 100644 --- a/configure.py +++ b/configure.py @@ -43,7 +43,6 @@ _DEFAULT_CUDA_PATH_WIN = ('C:/Program Files/NVIDIA GPU Computing ' 'Toolkit/CUDA/v%s' % _DEFAULT_CUDA_VERSION) _TF_OPENCL_VERSION = '1.2' _DEFAULT_COMPUTECPP_TOOLKIT_PATH = '/usr/local/computecpp' -_DEFAULT_TRISYCL_INCLUDE_DIR = '/usr/local/triSYCL/include' def is_windows(): @@ -637,7 +636,7 @@ def set_tf_cuda_version(environ_cp): write_action_env_to_bazelrc('TF_CUDA_VERSION', tf_cuda_version) -def set_tf_cudnn_version(environ_cp): +def set_tf_cunn_version(environ_cp): """Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION.""" ask_cudnn_version = ( 'Please specify the cuDNN version you want to use. ' @@ -883,27 +882,6 @@ def set_computecpp_toolkit_path(environ_cp): write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH', computecpp_toolkit_path) -def set_trisycl_include_dir(environ_cp): - """Set TRISYCL_INCLUDE_DIR""" - ask_trisycl_include_dir = ('Please specify the location of the triSYCL ' - 'include directory. (Use --config=sycl_trisycl ' - 'when building with Bazel) ' - '[Default is %s]: ' - ) % (_DEFAULT_TRISYCL_INCLUDE_DIR) - while True: - trisycl_include_dir = get_from_env_or_user_or_default( - environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir, - _DEFAULT_TRISYCL_INCLUDE_DIR) - if os.path.exists(trisycl_include_dir): - break - - print('Invalid triSYCL include directory, %s cannot be found' - % (trisycl_include_dir)) - - # Set TRISYCL_INCLUDE_DIR - environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir - write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', - trisycl_include_dir) def set_mpi_home(environ_cp): """Set MPI_HOME.""" @@ -1019,8 +997,6 @@ def main(): environ_cp['TF_NEED_GCP'] = '0' environ_cp['TF_NEED_HDFS'] = '0' environ_cp['TF_NEED_JEMALLOC'] = '0' - environ_cp['TF_NEED_OPENCL_SYCL'] = '0' - environ_cp['TF_NEED_COMPUTECPP'] = '0' environ_cp['TF_NEED_OPENCL'] = '0' environ_cp['TF_CUDA_CLANG'] = '0' @@ -1042,21 +1018,17 @@ def main(): set_build_var(environ_cp, 'TF_NEED_VERBS', 'VERBS', 'with_verbs_support', False, 'verbs') - set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False) - if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1': + set_action_env_var(environ_cp, 'TF_NEED_OPENCL', 'OpenCL', False) + if environ_cp.get('TF_NEED_OPENCL') == '1': set_host_cxx_compiler(environ_cp) set_host_c_compiler(environ_cp) - set_action_env_var(environ_cp, 'TF_NEED_COMPUTECPP', 'ComputeCPP', True) - if environ_cp.get('TF_NEED_COMPUTECPP') == '1': - set_computecpp_toolkit_path(environ_cp) - else: - set_trisycl_include_dir(environ_cp) + set_computecpp_toolkit_path(environ_cp) set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False) if (environ_cp.get('TF_NEED_CUDA') == '1' and 'TF_CUDA_CONFIG_REPO' not in environ_cp): set_tf_cuda_version(environ_cp) - set_tf_cudnn_version(environ_cp) + set_tf_cunn_version(environ_cp) set_tf_cuda_compute_capabilities(environ_cp) set_tf_cuda_clang(environ_cp) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index c8f0b6b061..49828cd4d6 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -54,15 +54,6 @@ config_setting( visibility = ["//visibility:public"], ) -config_setting( - name = "raspberry_pi_armeabi", - values = { - "crosstool_top": "@local_config_arm_compiler//:toolchain", - "cpu": "armeabi", - }, - visibility = ["//visibility:public"], -) - config_setting( name = "android_arm", values = { @@ -769,13 +760,6 @@ tf_cc_shared_object( ], ) -exports_files( - [ - "tf_version_script.lds", - "tf_exported_symbols.lds", - ], -) - py_library( name = "tensorflow_py", srcs = ["__init__.py"], diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl index b795afd5b8..ee291c12d0 100644 --- a/tensorflow/compiler/aot/tfcompile.bzl +++ b/tensorflow/compiler/aot/tfcompile.bzl @@ -130,10 +130,6 @@ def tf_library(name, graph, config, header_file = name + ".h" object_file = name + ".o" ep = ("__" + PACKAGE_NAME + "__" + name).replace("/", "_") - if type(tfcompile_flags) == type(""): - flags = tfcompile_flags - else: - flags = " ".join(["'" + arg.replace("'", "'\\''") + "'" for arg in (tfcompile_flags or [])]) native.genrule( name=("gen_" + name), srcs=[ @@ -152,7 +148,7 @@ def tf_library(name, graph, config, " --target_triple=" + target_llvm_triple() + " --out_header=$(@D)/" + header_file + " --out_object=$(@D)/" + object_file + - flags), + " " + (tfcompile_flags or "")), tools=[tfcompile_tool], visibility=visibility, testonly=testonly, @@ -189,7 +185,7 @@ def tf_library(name, graph, config, " --cpp_class=" + cpp_class + " --target_triple=" + target_llvm_triple() + " --out_session_module=$(@D)/" + session_module_pb + - flags), + " " + (tfcompile_flags or "")), tools=[tfcompile_tool], visibility=visibility, testonly=testonly, @@ -199,7 +195,8 @@ def tf_library(name, graph, config, # The cc_library rule packaging up the header and object file, and needed # kernel implementations. - need_xla_data_proto = (flags and flags.find("--gen_program_shape") != -1) + need_xla_data_proto = (tfcompile_flags and + tfcompile_flags.find("--gen_program_shape") != -1) native.cc_library( name=name, srcs=[object_file], diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 79c4befd36..c372e05474 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -672,7 +672,7 @@ tf_library( cpp_class = "LSTMLayerInference", graph = "lstm_layer_inference.pbtxt", tags = ["manual"], - tfcompile_flags = ["--xla_cpu_multi_thread_eigen=false"], + tfcompile_flags = "--xla_cpu_multi_thread_eigen=false", ) # ----------------------------------------------------------------------------- diff --git a/tensorflow/compiler/tests/fused_batchnorm_test.py b/tensorflow/compiler/tests/fused_batchnorm_test.py index a773b5a947..936fcf8b6b 100644 --- a/tensorflow/compiler/tests/fused_batchnorm_test.py +++ b/tensorflow/compiler/tests/fused_batchnorm_test.py @@ -36,7 +36,7 @@ class FusedBatchNormTest(XLATestCase): x_square = x * x x_square_sum = np.sum(x_square, (0, 1, 2)) x_sum = np.sum(x, axis=(0, 1, 2)) - element_count = np.size(x) / int(np.shape(x)[-1]) + element_count = np.size(x) / int(np.shape(x)[0]) mean = x_sum / element_count var = x_square_sum / element_count - mean * mean normalized = (x - mean) / np.sqrt(var + epsilon) @@ -64,9 +64,8 @@ class FusedBatchNormTest(XLATestCase): return grad_x, grad_scale, grad_offset def testInference(self): - channel = 3 - x_shape = [2, 2, 6, channel] - scale_shape = [channel] + x_shape = [2, 2, 6, 2] + scale_shape = [2] x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) @@ -75,8 +74,8 @@ class FusedBatchNormTest(XLATestCase): with self.test_session() as sess, self.test_scope(): # To avoid constant folding t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x") - scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale") - offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset") + scale = array_ops.placeholder(np.float32, shape=[2], name="scale") + offset = array_ops.placeholder(np.float32, shape=[2], name="offset") epsilon = 0.001 y_ref, mean_ref, var_ref = self._reference_training( x_val, scale_val, offset_val, epsilon, data_format) @@ -98,9 +97,8 @@ class FusedBatchNormTest(XLATestCase): self.assertAllClose(y_val, y_ref, atol=1e-3) def _testLearning(self, use_gradient_checker): - channel = 3 - x_shape = [2, 2, 6, channel] - scale_shape = [channel] + x_shape = [2, 2, 6, 2] + scale_shape = [2] x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) @@ -111,8 +109,8 @@ class FusedBatchNormTest(XLATestCase): with self.test_session() as sess, self.test_scope(): # To avoid constant folding t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x") - scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale") - offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset") + scale = array_ops.placeholder(np.float32, shape=[2], name="scale") + offset = array_ops.placeholder(np.float32, shape=[2], name="offset") epsilon = 0.001 y, mean, var = nn.fused_batch_norm( t_val, @@ -156,9 +154,8 @@ class FusedBatchNormTest(XLATestCase): def testGradient(self): # TODO(b/64270657): Use gradient_checker here in addition to comparing with # this reference implementation. - channel = 3 - x_shape = [2, 2, 6, channel] - scale_shape = [channel] + x_shape = [2, 2, 6, 2] + scale_shape = [2] grad_val = np.random.random_sample(x_shape).astype(np.float32) x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index fb980e7056..db265510f2 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -90,6 +90,8 @@ cc_library( ":shape_inference", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status", + "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index cda8b07c61..1bd0cca945 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -222,7 +222,7 @@ class HloInstruction { tensorflow::gtl::ArraySlice strides); // Creates a slice instruction, where the first operand is sliced by - // start indices specified in the second operand, and by size specified in + // start indices specified in the second operand, and by size specfied in // 'slice_sizes'. static std::unique_ptr CreateDynamicSlice( const Shape& shape, HloInstruction* operand, diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc index 76b12fc8d3..070bb4bc42 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc @@ -792,8 +792,8 @@ TEST_F(HloInstructionTest, ComplexFusionOp) { // sub = Sub(mul, clamp) // tuple = Tuple({sub, sub, mul, C1}) // - // Notable complexities are repeated operands in the same instruction, - // different shapes, use of value in different expressions. + // Notable complexities are repeated operands in a same instruction, different + // shapes, use of value in different expressions. auto c1 = builder.AddInstruction( HloInstruction::CreateConstant(Literal::CreateR0(1.1f))); auto c2 = builder.AddInstruction( diff --git a/tensorflow/contrib/batching/BUILD b/tensorflow/contrib/batching/BUILD index a111cfecb3..8b7df4a84c 100644 --- a/tensorflow/contrib/batching/BUILD +++ b/tensorflow/contrib/batching/BUILD @@ -82,7 +82,6 @@ cc_library( tf_cc_test( name = "adaptive_shared_batch_scheduler_test", srcs = ["adaptive_shared_batch_scheduler_test.cc"], - tags = ["manual"], # b/69013768 deps = [ ":adaptive_shared_batch_scheduler", "//tensorflow/contrib/batching/test_util:fake_clock_env", diff --git a/tensorflow/contrib/batching/kernels/batch_kernels.cc b/tensorflow/contrib/batching/kernels/batch_kernels.cc index 6041d8c9b2..3b7c538fcc 100644 --- a/tensorflow/contrib/batching/kernels/batch_kernels.cc +++ b/tensorflow/contrib/batching/kernels/batch_kernels.cc @@ -461,7 +461,7 @@ class BatchResource : public ResourceBase { return Status::OK(); } - // Looks up the batcher queue for 'queue_name'. If it didn't previously exist, + // Looks up the batcher queue for 'queue_name'. If it did't previously exist, // creates it. Status LookupOrCreateBatcherQueue(const string& queue_name, BatcherQueue** queue) { diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py index 2e94b7206d..8c6a614beb 100644 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py +++ b/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py @@ -759,7 +759,7 @@ class CsiszarVIMCOTest(test.TestCase): def _csiszar_vimco_helper_grad(self, logu, delta): """Finite difference approximation of `grad(csiszar_vimco_helper, logu)`.""" - # This code actually estimates the sum of the Jacobiab because that's what + # This code actually estimates the sum of the Jacobiab because thats what # TF's `gradients` does. np_log_avg_u1, np_log_sooavg_u1 = self._csiszar_vimco_helper( logu[..., None] + np.diag([delta]*len(logu))) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 77a3fc0c83..8744fc492f 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -34,41 +34,13 @@ option(tensorflow_BUILD_SHARED_LIB "Build TensorFlow as a shared library" OFF) option(tensorflow_OPTIMIZE_FOR_NATIVE_ARCH "Enable compiler optimizations for the native processor architecture (if available)" ON) option(tensorflow_WIN_CPU_SIMD_OPTIONS "Enables CPU SIMD instructions") option(tensorflow_ENABLE_SNAPPY_SUPPORT "Enable SNAPPY compression support" ON) -if(HAIKU) - option(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE "Enable PIE support" OFF) -else() - option(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE "Enable PIE support" ON) -endif() - if (NOT WIN32) # Threads: defines CMAKE_THREAD_LIBS_INIT and adds -pthread compile option # for targets that link ${CMAKE_THREAD_LIBS_INIT}. find_package (Threads) - - option(tensorflow_PATH_STATIC_LIB "Additional library search path for libcudnn_static.a, libnccl_static.a, libculibos.a" /usr/local/cuda/lib64/) - option(tensorflow_CUDNN_INCLUDE "cudnn.h header install path" /usr/include/) - if (NOT tensorflow_CUDNN_INCLUDE) - # option's default value is OFF. Fill it with real default values - set(tensorflow_CUDNN_INCLUDE /usr/include) - endif (NOT tensorflow_CUDNN_INCLUDE) - option(tensorflow_PATH_CUDNN_STATIC_LIB "Override PATH_STATIC_LIB for libcudnn_static.a" ${tensorflow_PATH_STATIC_LIB}) - option(tensorflow_PATH_NCCL_STATIC_LIB "Override PATH_STATIC_LIB for libnccl_static.a" ${tensorflow_PATH_STATIC_LIB}) - option(tensorflow_CUDA_LIBRARY_PATH "Designate the default CUDA library paths" /usr/local/cuda/lib64) - if (NOT tensorflow_CUDA_LIBRARY_PATH) - # option's default value is OFF. Fill it with real default values - set(tensorflow_CUDA_LIBRARY_PATH /usr/local/cuda/lib64) - endif (NOT tensorflow_CUDA_LIBRARY_PATH) endif() -if (WIN32) - set(BOOL_WIN32 ON) -else (WIN32) - set(BOOL_WIN32 OFF) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") -endif (WIN32) - # [CLEANUP] Remove when done # For debugging function(SHOW_VARIABLES) @@ -86,12 +58,7 @@ set (DOWNLOAD_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/downloads" CACHE PATH "Location where external projects will be downloaded.") mark_as_advanced(DOWNLOAD_LOCATION) -if (tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) - set(CMAKE_POSITION_INDEPENDENT_CODE ON) -else() - set(CMAKE_POSITION_INDEPENDENT_CODE OFF) -endif() - +set(CMAKE_POSITION_INDEPENDENT_CODE ON) add_definitions(-DEIGEN_AVOID_STL_ARRAY) if(WIN32) add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11 -DCOMPILER_MSVC) @@ -250,35 +217,20 @@ endif() if(UNIX) list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS}) endif() -if(HAIKU) - list(APPEND tensorflow_EXTERNAL_LIBRARIES network) -endif() if (tensorflow_ENABLE_GPU) - if (NOT WIN32) - # Default install paths for cuda libraries in Linux - # In some Linux distros, find_package(CUDA) seems to require CMAKE_LIBRARY_PATH to include cuda-lib paths - list(APPEND CMAKE_LIBRARY_PATH "${tensorflow_CUDA_LIBRARY_PATH}") - list(APPEND CMAKE_LIBRARY_PATH "${tensorflow_CUDA_LIBRARY_PATH}/stubs") - endif (NOT WIN32) - - find_package(CUDA 8.0 REQUIRED) - - # by default we assume compute cabability 3.5 and 5.2. If you change this change it in - # CUDA_NVCC_FLAGS and cuda_config.h below - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_30,code=\"sm_30,compute_30\";-gencode arch=compute_35,code=\"sm_35,compute_35\";-gencode arch=compute_52,code=\"sm_52,compute_52\") - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--include-path ${PROJECT_BINARY_DIR}/$\{build_configuration\};--expt-relaxed-constexpr) - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-ftz=true) # Flush denormals to zero - set(CUDA_INCLUDE ${CUDA_TOOLKIT_TARGET_DIR} ${CUDA_TOOLKIT_TARGET_DIR}/extras/CUPTI/include) - include_directories(${CUDA_INCLUDE}) if (WIN32) + find_package(CUDA 8.0 REQUIRED) + + # by default we assume compute cabability 3.5 and 5.2. If you change this change it in + # CUDA_NVCC_FLAGS and cuda_config.h below + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_30,code=\"sm_30,compute_30\";-gencode arch=compute_35,code=\"sm_35,compute_35\";-gencode arch=compute_52,code=\"sm_52,compute_52\") + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--include-path ${PROJECT_BINARY_DIR}/$\{build_configuration\};--expt-relaxed-constexpr) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-ftz=true) # Flush denormals to zero + set(CUDA_INCLUDE ${CUDA_TOOLKIT_TARGET_DIR} ${CUDA_TOOLKIT_TARGET_DIR}/extras/CUPTI/include) + include_directories(${CUDA_INCLUDE}) add_definitions(-DGOOGLE_CUDA=1 -DTF_EXTRA_CUDA_CAPABILITIES=3.0,3.5,5.2) - else (WIN32) - # Without these double quotes, cmake in Linux makes it "-DTF_EXTRA_CUDA_CAPABILITIES=3.0, -D3.5, -D5.2" for cc, which incurs build breaks - add_definitions(-DGOOGLE_CUDA=1 -D"TF_EXTRA_CUDA_CAPABILITIES=3.0,3.5,5.2") - endif (WIN32) - if (WIN32) # add cudnn if(NOT CUDNN_HOME) set(CUDNN_HOME ${CUDA_TOOLKIT_TARGET_DIR}) @@ -286,48 +238,18 @@ if (tensorflow_ENABLE_GPU) include_directories(${CUDNN_HOME}) set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_CUFFT_LIBRARIES} ${CUDA_curand_LIBRARY} ${CUDA_cupti_LIBRARY} ${CUDA_cusolver_LIBRARY} ${CUDNN_HOME}/lib/x64/cudnn.lib) - else (WIN32) - set(CUDNN_INCLUDE "${tensorflow_CUDNN_INCLUDE}") - - find_library(nccl_STATIC_LIBRARY NAMES libnccl_static.a PATHS ${tensorflow_PATH_NCCL_STATIC_LIB} ${CUDA_TOOLKIT_ROOT_DIR}) - if (NOT nccl_STATIC_LIBRARY) - message(FATAL_ERROR "NCCL is required for GPU-build") - else (NOT nccl_STATIC_LIBRARY) - message("nccl-static: ${nccl_STATIC_LIBRARY}") - # something like /usr/lib64/libnccl_static.a - endif (NOT nccl_STATIC_LIBRARY) - - find_library(cudnn_STATIC_LIBRARY NAMES libcudnn_static.a PATHS ${tensorflow_PATH_CUDNN_STATIC_LIB} ${CUDA_TOOLKIT_ROOT_DIR}) - if (NOT cudnn_STATIC_LIBRARY) - message(FATAL_ERROR "CUDNN is required for GPU-build") - else (NOT cudnn_STATIC_LIBRARY) - message("cudnn-static: ${cudnn_STATIC_LIBRARY}") - endif (NOT cudnn_STATIC_LIBRARY) - find_library(culibos_STATIC_LIBRARY NAMES libculibos.a PATHS ${tensorflow_PATH_STATIC_LIB} ${CUDA_TOOLKIT_ROOT_DIR}) - if (NOT culibos_STATIC_LIBRARY) - message(FATAL_ERROR "CULIBOS is required for GPU-build") - else (NOT culibos_STATIC_LIBRARY) - message("culibos-static: ${culibos_STATIC_LIBRARY}") - endif (NOT culibos_STATIC_LIBRARY) - - include_directories(${CUDNN_INCLUDE}) - set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_CUFFT_LIBRARIES} - ${CUDA_curand_LIBRARY} ${CUDA_cupti_LIBRARY} ${CUDA_cusolver_LIBRARY} ${cudnn_STATIC_LIBRARY} ${culibos_STATIC_LIBRARY} ${nccl_STATIC_LIBRARY}) - endif (WIN32) - - # create cuda_config.h - FILE(WRITE ${tensorflow_source_dir}/third_party/gpus/cuda/cuda_config.h - "#ifndef CUDA_CUDA_CONFIG_H_\n" - "#define CUDA_CUDA_CONFIG_H_\n" - "#define TF_CUDA_CAPABILITIES CudaVersion(\"3.0\"),CudaVersion(\"3.5\"),CudaVersion(\"5.2\")\n" - "#define TF_CUDA_VERSION \"64_80\"\n" - "#define TF_CUDNN_VERSION \"64_6\"\n" - "#define TF_CUDA_TOOLKIT_PATH \"${CUDA_TOOLKIT_ROOT_DIR}\"\n" - "#endif // CUDA_CUDA_CONFIG_H_\n" - ) + # create cuda_config.h + FILE(WRITE ${tensorflow_source_dir}/third_party/gpus/cuda/cuda_config.h + "#ifndef CUDA_CUDA_CONFIG_H_\n" + "#define CUDA_CUDA_CONFIG_H_\n" + "#define TF_CUDA_CAPABILITIES CudaVersion(\"3.0\"),CudaVersion(\"3.5\"),CudaVersion(\"5.2\")\n" + "#define TF_CUDA_VERSION \"64_80\"\n" + "#define TF_CUDNN_VERSION \"64_6\"\n" + "#define TF_CUDA_TOOLKIT_PATH \"${CUDA_TOOLKIT_ROOT_DIR}\"\n" + "#endif // CUDA_CUDA_CONFIG_H_\n" + ) - if (WIN32) # tf assumes in various places header files to be in cuda/include. On windows the cuda sdk # installs them under cuda/version/include and to avoid that we need to change tf we copy a # few files to cuda/include @@ -339,25 +261,12 @@ if (tensorflow_ENABLE_GPU) ${CUDA_TOOLKIT_TARGET_DIR}/include/cusolverDn.h DESTINATION ${tensorflow_source_dir}/third_party/gpus/cuda/include ) - else(WIN32) - # Linux has slightly differnt install paths than Windows - FILE(COPY - ${CUDA_TOOLKIT_TARGET_DIR}/include/cuda.h ${CUDA_TOOLKIT_TARGET_DIR}/include/cuComplex.h - ${CUDA_TOOLKIT_TARGET_DIR}/include/cublas_v2.h ${CUDNN_INCLUDE}/cudnn.h - ${CUDA_TOOLKIT_TARGET_DIR}/include/cufft.h ${CUDA_TOOLKIT_TARGET_DIR}/include/curand.h - ${CUDA_TOOLKIT_TARGET_DIR}/include/cuda_runtime_api.h - ${CUDA_TOOLKIT_TARGET_DIR}/include/cusolverDn.h - DESTINATION ${tensorflow_source_dir}/third_party/gpus/cuda/include - ) - endif(WIN32) + include_directories(${tensorflow_source_dir}/third_party/gpus) + # add cuda libraries to tensorflow_EXTERNAL_LIBRARIES + list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CUDA_LIBRARIES}) - include_directories(${tensorflow_source_dir}/third_party/gpus) - # add cuda libraries to tensorflow_EXTERNAL_LIBRARIES - list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CUDA_LIBRARIES}) - - # NOTE(mrry): Update these flags when the version of CUDA or cuDNN used - # in the default build is upgraded. - if(WIN32) + # NOTE(mrry): Update these flags when the version of CUDA or cuDNN used + # in the default build is upgraded. set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value msvcp_dll_name=msvcp140.dll cudart_dll_name=cudart64_80.dll @@ -366,9 +275,7 @@ if (tensorflow_ENABLE_GPU) cudnn_dll_name=cudnn64_6.dll cudnn_version_number=6) else(WIN32) - set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value - cuda_version_number=8.0 - cudnn_version_number=6) + message(FATAL_ERROR "CMake GPU build is currently only supported on Windows.") endif(WIN32) else(tensorflow_ENABLE_GPU) set(tensorflow_BUILD_INFO_FLAGS --build_config cpu --key_value @@ -386,7 +293,9 @@ include(tf_core_framework.cmake) # NOTE: Disabled until issue #3996 is fixed. # include(tf_stream_executor.cmake) if (tensorflow_ENABLE_GPU) + if (WIN32) include(tf_stream_executor.cmake) + endif() endif() include(tf_core_cpu.cmake) diff --git a/tensorflow/contrib/cmake/external/boringssl.cmake b/tensorflow/contrib/cmake/external/boringssl.cmake index cca8444e2a..dc27eadaca 100644 --- a/tensorflow/contrib/cmake/external/boringssl.cmake +++ b/tensorflow/contrib/cmake/external/boringssl.cmake @@ -39,12 +39,8 @@ ExternalProject_Add(boringssl # BUILD_IN_SOURCE 1 INSTALL_COMMAND "" CMAKE_CACHE_ARGS - if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - else() - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF - endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON ) diff --git a/tensorflow/contrib/cmake/external/jsoncpp.cmake b/tensorflow/contrib/cmake/external/jsoncpp.cmake index d2ae4c76e8..5127d7e8f7 100644 --- a/tensorflow/contrib/cmake/external/jsoncpp.cmake +++ b/tensorflow/contrib/cmake/external/jsoncpp.cmake @@ -42,12 +42,8 @@ ExternalProject_Add(jsoncpp BUILD_IN_SOURCE 1 INSTALL_COMMAND "" CMAKE_CACHE_ARGS - if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - else() - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF - endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON ) diff --git a/tensorflow/contrib/cmake/external/lmdb.cmake b/tensorflow/contrib/cmake/external/lmdb.cmake index e41384f023..79971b7cfc 100644 --- a/tensorflow/contrib/cmake/external/lmdb.cmake +++ b/tensorflow/contrib/cmake/external/lmdb.cmake @@ -29,14 +29,10 @@ ExternalProject_Add(lmdb INSTALL_DIR ${lmdb_INSTALL} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" CMAKE_CACHE_ARGS - if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - else() - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF - endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DCMAKE_INSTALL_PREFIX:STRING=${lmdb_INSTALL} + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON ) if(WIN32) diff --git a/tensorflow/contrib/cmake/external/png.cmake b/tensorflow/contrib/cmake/external/png.cmake index aad6618f52..2b2bd47d1c 100644 --- a/tensorflow/contrib/cmake/external/png.cmake +++ b/tensorflow/contrib/cmake/external/png.cmake @@ -41,14 +41,10 @@ ExternalProject_Add(png INSTALL_DIR ${png_INSTALL} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" CMAKE_CACHE_ARGS - if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - else() - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF - endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DCMAKE_INSTALL_PREFIX:STRING=${png_INSTALL} + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DZLIB_ROOT:STRING=${ZLIB_INSTALL} ) diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake index b53857a47b..1e300e21df 100644 --- a/tensorflow/contrib/cmake/external/protobuf.cmake +++ b/tensorflow/contrib/cmake/external/protobuf.cmake @@ -44,12 +44,8 @@ ExternalProject_Add(protobuf ${PROTOBUF_ADDITIONAL_CMAKE_OPTIONS} INSTALL_COMMAND "" CMAKE_CACHE_ARGS - if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - else() - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF - endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DZLIB_ROOT:STRING=${ZLIB_INSTALL} ) diff --git a/tensorflow/contrib/cmake/external/re2.cmake b/tensorflow/contrib/cmake/external/re2.cmake index b56f4b0898..cb4ec9c2de 100644 --- a/tensorflow/contrib/cmake/external/re2.cmake +++ b/tensorflow/contrib/cmake/external/re2.cmake @@ -38,11 +38,7 @@ ExternalProject_Add(re2 BUILD_IN_SOURCE 1 DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" CMAKE_CACHE_ARGS - if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - else() - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF - endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_INSTALL_PREFIX:STRING=${re2_INSTALL} -) + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON +) \ No newline at end of file diff --git a/tensorflow/contrib/cmake/external/snappy.cmake b/tensorflow/contrib/cmake/external/snappy.cmake index 926c271fd9..2d2451521c 100644 --- a/tensorflow/contrib/cmake/external/snappy.cmake +++ b/tensorflow/contrib/cmake/external/snappy.cmake @@ -40,15 +40,11 @@ ExternalProject_Add(snappy LOG_CONFIGURE ON LOG_BUILD ON CMAKE_CACHE_ARGS - if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - else() - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF - endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DSNAPPY_BUILD_TESTS:BOOL=OFF + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON ) # actually enables snappy in the source code -add_definitions(-DTF_USE_SNAPPY) \ No newline at end of file +add_definitions(-DTF_USE_SNAPPY) diff --git a/tensorflow/contrib/cmake/external/sqlite.cmake b/tensorflow/contrib/cmake/external/sqlite.cmake index 785039a469..1770dcb1fd 100644 --- a/tensorflow/contrib/cmake/external/sqlite.cmake +++ b/tensorflow/contrib/cmake/external/sqlite.cmake @@ -53,13 +53,9 @@ else() INSTALL_DIR ${sqlite_INSTALL} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" CMAKE_CACHE_ARGS - if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - else() - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF - endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_INSTALL_PREFIX:STRING=${sqlite_INSTALL} ) diff --git a/tensorflow/contrib/cmake/external/zlib.cmake b/tensorflow/contrib/cmake/external/zlib.cmake index f10f84336e..c8af611e1e 100644 --- a/tensorflow/contrib/cmake/external/zlib.cmake +++ b/tensorflow/contrib/cmake/external/zlib.cmake @@ -42,13 +42,9 @@ ExternalProject_Add(zlib BUILD_IN_SOURCE 1 DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" CMAKE_CACHE_ARGS - if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - else() - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF - endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_INSTALL_PREFIX:STRING=${ZLIB_INSTALL} + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON ) # put zlib includes in the directory where they are expected diff --git a/tensorflow/contrib/cmake/tf_cc_ops.cmake b/tensorflow/contrib/cmake/tf_cc_ops.cmake index 6e2ac203f9..45eeb11062 100644 --- a/tensorflow/contrib/cmake/tf_cc_ops.cmake +++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake @@ -148,11 +148,7 @@ list(REMOVE_ITEM tf_cc_srcs ${tf_cc_test_srcs}) add_library(tf_cc OBJECT ${tf_cc_srcs}) add_dependencies(tf_cc tf_cc_framework tf_cc_ops) -if (WIN32) - set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.lib") -else (WIN32) - set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so") -endif (WIN32) +set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.lib") add_custom_target(tf_extension_ops) function(AddUserOps) @@ -168,13 +164,15 @@ function(AddUserOps) # create shared library from source and cuda obj add_library(${_AT_TARGET} SHARED ${_AT_SOURCES} ${gpu_lib}) target_link_libraries(${_AT_TARGET} ${pywrap_tensorflow_lib}) - if (tensorflow_ENABLE_GPU AND _AT_GPUSOURCES) - # some ops call out to cuda directly; need to link libs for the cuda dlls - target_link_libraries(${_AT_TARGET} ${CUDA_LIBRARIES}) - endif() - if (_AT_DISTCOPY) - add_custom_command(TARGET ${_AT_TARGET} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy $ ${_AT_DISTCOPY}/) + if(WIN32) + if (tensorflow_ENABLE_GPU AND _AT_GPUSOURCES) + # some ops call out to cuda directly; need to link libs for the cuda dlls + target_link_libraries(${_AT_TARGET} ${CUDA_LIBRARIES}) + endif() + if (_AT_DISTCOPY) + add_custom_command(TARGET ${_AT_TARGET} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy $ ${_AT_DISTCOPY}/) + endif() endif() if (_AT_DEPENDS) add_dependencies(${_AT_TARGET} ${_AT_DEPENDS}) @@ -182,19 +180,9 @@ function(AddUserOps) # make sure TF_COMPILE_LIBRARY is not defined for this target get_target_property(target_compile_flags ${_AT_TARGET} COMPILE_FLAGS) if(target_compile_flags STREQUAL "target_compile_flags-NOTFOUND") - if (WIN32) - set(target_compile_flags "/UTF_COMPILE_LIBRARY") - else (WIN32) - # gcc uses UTF as default - set(target_compile_flags "-finput-charset=UTF-8") - endif (WIN32) + set(target_compile_flags "/UTF_COMPILE_LIBRARY") else() - if (WIN32) - set(target_compile_flags "${target_compile_flags} /UTF_COMPILE_LIBRARY") - else (WIN32) - # gcc uses UTF as default - set(target_compile_flags "${target_compile_flags} -finput-charset=UTF-8") - endif (WIN32) + set(target_compile_flags "${target_compile_flags} /UTF_COMPILE_LIBRARY") endif() set_target_properties(${_AT_TARGET} PROPERTIES COMPILE_FLAGS ${target_compile_flags}) add_dependencies(tf_extension_ops ${_AT_TARGET}) diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake index 2d015908a8..d6b8990664 100644 --- a/tensorflow/contrib/cmake/tf_core_kernels.cmake +++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake @@ -179,7 +179,6 @@ file(GLOB_RECURSE tf_core_gpu_kernels_srcs "${tensorflow_source_dir}/tensorflow/contrib/image/kernels/*.cu.cc" "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/*.cu.cc" "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/*.cu.cc" - "${tensorflow_source_dir}/tensorflow/contrib/resampler/kernels/*.cu.cc" ) if(WIN32 AND tensorflow_ENABLE_GPU) @@ -203,16 +202,16 @@ endif(WIN32 AND tensorflow_ENABLE_GPU) add_library(tf_core_kernels OBJECT ${tf_core_kernels_srcs}) add_dependencies(tf_core_kernels tf_core_cpu) -if (WIN32) +if(WIN32) target_compile_options(tf_core_kernels PRIVATE /MP) -endif (WIN32) -if (tensorflow_ENABLE_GPU) - set_source_files_properties(${tf_core_gpu_kernels_srcs} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ) - set(tf_core_gpu_kernels_lib tf_core_gpu_kernels) - cuda_add_library(${tf_core_gpu_kernels_lib} ${tf_core_gpu_kernels_srcs}) - set_target_properties(${tf_core_gpu_kernels_lib} - PROPERTIES DEBUG_POSTFIX "" - COMPILE_FLAGS "${TF_REGULAR_CXX_FLAGS}" - ) - add_dependencies(${tf_core_gpu_kernels_lib} tf_core_cpu) + if (tensorflow_ENABLE_GPU) + set_source_files_properties(${tf_core_gpu_kernels_srcs} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ) + set(tf_core_gpu_kernels_lib tf_core_gpu_kernels) + cuda_add_library(${tf_core_gpu_kernels_lib} ${tf_core_gpu_kernels_srcs}) + set_target_properties(${tf_core_gpu_kernels_lib} + PROPERTIES DEBUG_POSTFIX "" + COMPILE_FLAGS "${TF_REGULAR_CXX_FLAGS}" + ) + add_dependencies(${tf_core_gpu_kernels_lib} tf_core_cpu) + endif() endif() diff --git a/tensorflow/contrib/cmake/tf_label_image_example.cmake b/tensorflow/contrib/cmake/tf_label_image_example.cmake index 7f2f60b089..0d3a4699eb 100644 --- a/tensorflow/contrib/cmake/tf_label_image_example.cmake +++ b/tensorflow/contrib/cmake/tf_label_image_example.cmake @@ -34,8 +34,3 @@ target_link_libraries(tf_label_image_example PUBLIC ${tf_core_gpu_kernels_lib} ${tensorflow_EXTERNAL_LIBRARIES} ) - -install(TARGETS tf_label_image_example - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib) \ No newline at end of file diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 61b3fd715d..9b863f7bc6 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -715,9 +715,6 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name) set(require_shape_fn 1) endif() - get_filename_component(GENERATE_PYTHON_OP_LIB_MKDIRPATH ${GENERATE_PYTHON_OP_LIB_DESTINATION} PATH) - file(MAKE_DIRECTORY ${GENERATE_PYTHON_OP_LIB_MKDIRPATH}) - # Create a C++ executable that links in the appropriate op # registrations and generates Python wrapper code based on the # registered ops. @@ -746,7 +743,6 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name) ${GENERATE_PYTHON_OP_LIB_DESTINATION} PARENT_SCOPE) endfunction() -GENERATE_PYTHON_OP_LIB("audio_ops") GENERATE_PYTHON_OP_LIB("array_ops") GENERATE_PYTHON_OP_LIB("bitwise_ops") GENERATE_PYTHON_OP_LIB("math_ops") @@ -991,7 +987,7 @@ add_library(pywrap_tensorflow_internal SHARED $ $<$:$> $ - $<$:$<$:$>> + $<$:$> $<$:$> ${pywrap_tensorflow_deffile} ) @@ -1067,23 +1063,25 @@ if(WIN32) DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/rnn/python/ops/) endif(WIN32) -# include contrib/seq2seq as .so -# -set(tf_beam_search_srcs - "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc" - "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.h" - "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/ops/beam_search_ops.cc" -) +if(WIN32) + # include contrib/seq2seq as .so + # + set(tf_beam_search_srcs + "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc" + "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.h" + "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/ops/beam_search_ops.cc" + ) -set(tf_beam_search_gpu_srcs - "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops_gpu.cu.cc" -) + set(tf_beam_search_gpu_srcs + "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops_gpu.cu.cc" + ) -AddUserOps(TARGET _beam_search_ops - SOURCES "${tf_beam_search_srcs}" - GPUSOURCES ${tf_beam_search_gpu_srcs} - DEPENDS pywrap_tensorflow_internal tf_python_ops - DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/seq2seq/python/ops/) + AddUserOps(TARGET _beam_search_ops + SOURCES "${tf_beam_search_srcs}" + GPUSOURCES ${tf_beam_search_gpu_srcs} + DEPENDS pywrap_tensorflow_internal tf_python_ops + DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/seq2seq/python/ops/) +endif(WIN32) ############################################################ # Build a PIP package containing the TensorFlow runtime. diff --git a/tensorflow/contrib/cmake/tf_shared_lib.cmake b/tensorflow/contrib/cmake/tf_shared_lib.cmake index 3e3fe0cdfa..9bf45bab30 100644 --- a/tensorflow/contrib/cmake/tf_shared_lib.cmake +++ b/tensorflow/contrib/cmake/tf_shared_lib.cmake @@ -73,7 +73,7 @@ add_library(tensorflow SHARED $ $<$:$> $ - $<$:$<$:$>> + $<$:$> $<$:$> ${tensorflow_deffile} ) @@ -94,46 +94,3 @@ endif() if(WIN32) add_dependencies(tensorflow tensorflow_static) endif(WIN32) - -install(TARGETS tensorflow - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib) - -# install necessary headers -# tensorflow headers -install(DIRECTORY ${tensorflow_source_dir}/tensorflow/cc/ - DESTINATION include/tensorflow/cc - FILES_MATCHING PATTERN "*.h") -install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tensorflow/cc/ - DESTINATION include/tensorflow/cc - FILES_MATCHING PATTERN "*.h") -install(DIRECTORY ${tensorflow_source_dir}/tensorflow/core/ - DESTINATION include/tensorflow/core - FILES_MATCHING PATTERN "*.h") -install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tensorflow/core/ - DESTINATION include/tensorflow/core - FILES_MATCHING PATTERN "*.h") -install(DIRECTORY ${tensorflow_source_dir}/tensorflow/stream_executor/ - DESTINATION include/tensorflow/stream_executor - FILES_MATCHING PATTERN "*.h") -# google protobuf headers -install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src/google/ - DESTINATION include/google - FILES_MATCHING PATTERN "*.h") -# nsync headers -install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/nsync/ - DESTINATION include/external/nsync - FILES_MATCHING PATTERN "*.h") -# Eigen directory -install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen/Eigen/ - DESTINATION include/Eigen) -# external directory -install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/eigen_archive/ - DESTINATION include/external/eigen_archive) -# third_party eigen directory -install(DIRECTORY ${tensorflow_source_dir}/third_party/eigen3/ - DESTINATION include/third_party/eigen3) -# unsupported Eigen directory -install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen/unsupported/Eigen/ - DESTINATION include/unsupported/Eigen) diff --git a/tensorflow/contrib/cmake/tf_stream_executor.cmake b/tensorflow/contrib/cmake/tf_stream_executor.cmake index 8d95f0d3e8..3d84f1ebb9 100644 --- a/tensorflow/contrib/cmake/tf_stream_executor.cmake +++ b/tensorflow/contrib/cmake/tf_stream_executor.cmake @@ -74,9 +74,6 @@ endif() #) #list(REMOVE_ITEM tf_stream_executor_srcs ${tf_stream_executor_test_srcs}) -if (NOT WIN32) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lgomp") -endif (NOT WIN32) add_library(tf_stream_executor OBJECT ${tf_stream_executor_srcs}) add_dependencies(tf_stream_executor diff --git a/tensorflow/contrib/cmake/tf_tools.cmake b/tensorflow/contrib/cmake/tf_tools.cmake index cb58a2e7df..6ef9598963 100644 --- a/tensorflow/contrib/cmake/tf_tools.cmake +++ b/tensorflow/contrib/cmake/tf_tools.cmake @@ -73,7 +73,7 @@ add_executable(${transform_graph} $ $ $ - $<$:$<$:$>> + $<$:$> $<$:$> ) @@ -95,7 +95,7 @@ add_executable(${summarize_graph} $ $ $ - $<$:$<$:$>> + $<$:$> $<$:$> ) @@ -117,7 +117,7 @@ add_executable(${compare_graphs} $ $ $ - $<$:$<$:$>> + $<$:$> $<$:$> ) @@ -138,7 +138,7 @@ add_executable(${benchmark_model} $ $ $ - $<$:$<$:$>> + $<$:$> $<$:$> ) @@ -147,8 +147,3 @@ target_link_libraries(${benchmark_model} PUBLIC ${tf_core_gpu_kernels_lib} ${tensorflow_EXTERNAL_LIBRARIES} ) - -install(TARGETS ${transform_graph} ${summarize_graph} ${compare_graphs} ${benchmark_model} - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib) diff --git a/tensorflow/contrib/cmake/tf_tutorials.cmake b/tensorflow/contrib/cmake/tf_tutorials.cmake index e63fccc181..858e7dda92 100644 --- a/tensorflow/contrib/cmake/tf_tutorials.cmake +++ b/tensorflow/contrib/cmake/tf_tutorials.cmake @@ -34,8 +34,3 @@ target_link_libraries(tf_tutorials_example_trainer PUBLIC ${tf_core_gpu_kernels_lib} ${tensorflow_EXTERNAL_LIBRARIES} ) - -install(TARGETS tf_tutorials_example_trainer - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib) diff --git a/tensorflow/contrib/crf/python/ops/crf.py b/tensorflow/contrib/crf/python/ops/crf.py index 1612c75179..4282be5ec8 100644 --- a/tensorflow/contrib/crf/python/ops/crf.py +++ b/tensorflow/contrib/crf/python/ops/crf.py @@ -363,8 +363,8 @@ class CrfDecodeForwardRnnCell(rnn_cell.RNNCell): scope: Unused variable scope of this cell. Returns: - backpointers: A [batch_size, num_tags] matrix of backpointers. - new_state: A [batch_size, num_tags] matrix of new score values. + backpointers: [batch_size, num_tags], containing backpointers. + new_state: [batch_size, num_tags], containing new score values. """ # For simplicity, in shape comments, denote: # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output). @@ -404,9 +404,8 @@ class CrfDecodeBackwardRnnCell(rnn_cell.RNNCell): """Build the CrfDecodeBackwardRnnCell. Args: - inputs: A [batch_size, num_tags] matrix of - backpointer of next step (in time order). - state: A [batch_size, 1] matrix of tag index of next step. + inputs: [batch_size, num_tags], backpointer of next step (in time order). + state: [batch_size, 1], next position's tag index. scope: Unused variable scope of this cell. Returns: @@ -430,16 +429,16 @@ def crf_decode(potentials, transition_params, sequence_length): This is a function for tensor. Args: - potentials: A [batch_size, max_seq_len, num_tags] tensor of + potentials: A [batch_size, max_seq_len, num_tags] tensor, matrix of unary potentials. - transition_params: A [num_tags, num_tags] matrix of + transition_params: A [num_tags, num_tags] tensor, matrix of binary potentials. - sequence_length: A [batch_size] vector of true sequence lengths. + sequence_length: A [batch_size] tensor, containing sequence lengths. Returns: - decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`. + decode_tags: A [batch_size, max_seq_len] tensor, with dtype tf.int32. Contains the highest scoring tag indices. - best_score: A [batch_size] vector, containing the score of `decode_tags`. + best_score: A [batch_size] tensor, containing the score of decode_tags. """ # For simplicity, in shape comments, denote: # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output). diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index dd0457d54b..1923c0586a 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -11,7 +11,6 @@ py_test( size = "small", srcs = ["batch_dataset_op_test.py"], srcs_version = "PY2AND3", - tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", @@ -373,7 +372,6 @@ py_test( size = "small", srcs = ["sequence_dataset_op_test.py"], srcs_version = "PY2AND3", - tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", @@ -451,7 +449,6 @@ py_test( size = "small", srcs = ["zip_dataset_op_test.py"], srcs_version = "PY2AND3", - tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", @@ -468,10 +465,7 @@ py_test( size = "small", srcs = ["prefetching_ops_test.py"], srcs_version = "PY2AND3", - tags = [ - "manual", - "no_oss", # b/68785503 - ], + tags = ["no_oss"], # b/68785503 deps = [ "//tensorflow/contrib/data/python/ops:prefetching_py", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 145b9495ff..2dc8ad9483 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -140,23 +140,6 @@ cuda_py_test( ], ) -cuda_py_test( - name = "cauchy_test", - size = "medium", - srcs = ["python/kernel_tests/cauchy_test.py"], - additional_deps = [ - ":distributions_py", - "//third_party/py/numpy", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:gradients", - "//tensorflow/python:platform_test", - "//tensorflow/python:variables", - ], -) - cuda_py_test( name = "chi2_test", srcs = ["python/kernel_tests/chi2_test.py"], diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py index 0d12d83893..16f6533e57 100644 --- a/tensorflow/contrib/distributions/__init__.py +++ b/tensorflow/contrib/distributions/__init__.py @@ -24,7 +24,6 @@ from __future__ import print_function from tensorflow.contrib.distributions.python.ops import bijectors from tensorflow.contrib.distributions.python.ops.binomial import * -from tensorflow.contrib.distributions.python.ops.cauchy import * from tensorflow.contrib.distributions.python.ops.chi2 import * from tensorflow.contrib.distributions.python.ops.conditional_distribution import * from tensorflow.contrib.distributions.python.ops.conditional_transformed_distribution import * @@ -84,7 +83,6 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ 'bijectors', - 'Cauchy', 'ConditionalDistribution', 'ConditionalTransformedDistribution', 'FULLY_REPARAMETERIZED', diff --git a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py deleted file mode 100644 index 7f7697357c..0000000000 --- a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py +++ /dev/null @@ -1,437 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for Cauchy.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import importlib -import numpy as np - -from tensorflow.contrib.distributions.python.ops import cauchy as cauchy_lib -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gradients_impl -from tensorflow.python.ops import variables -from tensorflow.python.platform import test -from tensorflow.python.platform import tf_logging - - -def try_import(name): # pylint: disable=invalid-name - module = None - try: - module = importlib.import_module(name) - except ImportError as e: - tf_logging.warning("Could not import %s: %s" % (name, str(e))) - return module - -stats = try_import("scipy.stats") - - -class CauchyTest(test.TestCase): - - def setUp(self): - self._rng = np.random.RandomState(123) - - def assertAllFinite(self, tensor): - is_finite = np.isfinite(tensor.eval()) - all_true = np.ones_like(is_finite, dtype=np.bool) - self.assertAllEqual(all_true, is_finite) - - def _testParamShapes(self, sample_shape, expected): - with self.test_session(): - param_shapes = cauchy_lib.Cauchy.param_shapes(sample_shape) - loc_shape, scale_shape = param_shapes["loc"], param_shapes["scale"] - self.assertAllEqual(expected, loc_shape.eval()) - self.assertAllEqual(expected, scale_shape.eval()) - loc = array_ops.zeros(loc_shape) - scale = array_ops.ones(scale_shape) - self.assertAllEqual( - expected, - array_ops.shape(cauchy_lib.Cauchy(loc, scale).sample()).eval()) - - def _testParamStaticShapes(self, sample_shape, expected): - param_shapes = cauchy_lib.Cauchy.param_static_shapes(sample_shape) - loc_shape, scale_shape = param_shapes["loc"], param_shapes["scale"] - self.assertEqual(expected, loc_shape) - self.assertEqual(expected, scale_shape) - - def testParamShapes(self): - sample_shape = [10, 3, 4] - self._testParamShapes(sample_shape, sample_shape) - self._testParamShapes(constant_op.constant(sample_shape), sample_shape) - - def testParamStaticShapes(self): - sample_shape = [10, 3, 4] - self._testParamStaticShapes(sample_shape, sample_shape) - self._testParamStaticShapes( - tensor_shape.TensorShape(sample_shape), sample_shape) - - def testCauchyLogPDF(self): - with self.test_session(): - batch_size = 6 - loc = constant_op.constant([3.0] * batch_size) - scale = constant_op.constant([np.sqrt(10.0)] * batch_size) - x = np.array([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0], dtype=np.float32) - cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) - - log_pdf = cauchy.log_prob(x) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), - log_pdf.shape) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), - log_pdf.eval().shape) - self.assertAllEqual(cauchy.batch_shape, log_pdf.shape) - self.assertAllEqual(cauchy.batch_shape, log_pdf.eval().shape) - - pdf = cauchy.prob(x) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.shape) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.eval().shape) - self.assertAllEqual(cauchy.batch_shape, pdf.shape) - self.assertAllEqual(cauchy.batch_shape, pdf.eval().shape) - - if not stats: - return - expected_log_pdf = stats.cauchy(loc.eval(), scale.eval()).logpdf(x) - self.assertAllClose(expected_log_pdf, log_pdf.eval()) - self.assertAllClose(np.exp(expected_log_pdf), pdf.eval()) - - def testCauchyLogPDFMultidimensional(self): - with self.test_session(): - batch_size = 6 - loc = constant_op.constant([[3.0, -3.0]] * batch_size) - scale = constant_op.constant([[np.sqrt(10.0), np.sqrt(15.0)]] * - batch_size) - x = np.array([[-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]], dtype=np.float32).T - cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) - - log_pdf = cauchy.log_prob(x) - log_pdf_values = log_pdf.eval() - self.assertEqual(log_pdf.shape, (6, 2)) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), - log_pdf.shape) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), - log_pdf.eval().shape) - self.assertAllEqual(cauchy.batch_shape, log_pdf.shape) - self.assertAllEqual(cauchy.batch_shape, log_pdf.eval().shape) - - pdf = cauchy.prob(x) - pdf_values = pdf.eval() - self.assertEqual(pdf.shape, (6, 2)) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.shape) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf_values.shape) - self.assertAllEqual(cauchy.batch_shape, pdf.shape) - self.assertAllEqual(cauchy.batch_shape, pdf_values.shape) - - if not stats: - return - expected_log_pdf = stats.cauchy(loc.eval(), scale.eval()).logpdf(x) - self.assertAllClose(expected_log_pdf, log_pdf_values) - self.assertAllClose(np.exp(expected_log_pdf), pdf_values) - - def testCauchyCDF(self): - with self.test_session(): - batch_size = 50 - loc = self._rng.randn(batch_size) - scale = self._rng.rand(batch_size) + 1.0 - x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64) - - cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) - cdf = cauchy.cdf(x) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.shape) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.eval().shape) - self.assertAllEqual(cauchy.batch_shape, cdf.shape) - self.assertAllEqual(cauchy.batch_shape, cdf.eval().shape) - if not stats: - return - expected_cdf = stats.cauchy(loc, scale).cdf(x) - self.assertAllClose(expected_cdf, cdf.eval(), atol=0) - - def testCauchySurvivalFunction(self): - with self.test_session(): - batch_size = 50 - loc = self._rng.randn(batch_size) - scale = self._rng.rand(batch_size) + 1.0 - x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64) - - cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) - - sf = cauchy.survival_function(x) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.shape) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.eval().shape) - self.assertAllEqual(cauchy.batch_shape, sf.shape) - self.assertAllEqual(cauchy.batch_shape, sf.eval().shape) - if not stats: - return - expected_sf = stats.cauchy(loc, scale).sf(x) - self.assertAllClose(expected_sf, sf.eval(), atol=0) - - def testCauchyLogCDF(self): - with self.test_session(): - batch_size = 50 - loc = self._rng.randn(batch_size) - scale = self._rng.rand(batch_size) + 1.0 - x = np.linspace(-100.0, 10.0, batch_size).astype(np.float64) - - cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) - - cdf = cauchy.log_cdf(x) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.shape) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.eval().shape) - self.assertAllEqual(cauchy.batch_shape, cdf.shape) - self.assertAllEqual(cauchy.batch_shape, cdf.eval().shape) - - if not stats: - return - expected_cdf = stats.cauchy(loc, scale).logcdf(x) - self.assertAllClose(expected_cdf, cdf.eval(), atol=0, rtol=1e-5) - - def testFiniteGradientAtDifficultPoints(self): - for dtype in [np.float32, np.float64]: - g = ops.Graph() - with g.as_default(): - loc = variables.Variable(dtype(0.0)) - scale = variables.Variable(dtype(1.0)) - dist = cauchy_lib.Cauchy(loc=loc, scale=scale) - x = np.array([-100., -20., -5., 0., 5., 20., 100.]).astype(dtype) - for func in [ - dist.cdf, dist.log_cdf, dist.survival_function, - dist.log_survival_function, dist.log_prob, dist.prob - ]: - value = func(x) - grads = gradients_impl.gradients(value, [loc, scale]) - with self.test_session(graph=g): - variables.global_variables_initializer().run() - self.assertAllFinite(value) - self.assertAllFinite(grads[0]) - self.assertAllFinite(grads[1]) - - def testCauchyLogSurvivalFunction(self): - with self.test_session(): - batch_size = 50 - loc = self._rng.randn(batch_size) - scale = self._rng.rand(batch_size) + 1.0 - x = np.linspace(-10.0, 100.0, batch_size).astype(np.float64) - - cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) - - sf = cauchy.log_survival_function(x) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.shape) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.eval().shape) - self.assertAllEqual(cauchy.batch_shape, sf.shape) - self.assertAllEqual(cauchy.batch_shape, sf.eval().shape) - - if not stats: - return - expected_sf = stats.cauchy(loc, scale).logsf(x) - self.assertAllClose(expected_sf, sf.eval(), atol=0, rtol=1e-5) - - def testCauchyEntropy(self): - with self.test_session(): - loc = np.array([1.0, 1.0, 1.0]) - scale = np.array([[1.0, 2.0, 3.0]]) - cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) - - entropy = cauchy.entropy() - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), - entropy.shape) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), - entropy.eval().shape) - self.assertAllEqual(cauchy.batch_shape, entropy.shape) - self.assertAllEqual(cauchy.batch_shape, entropy.eval().shape) - - if not stats: - return - expected_entropy = stats.cauchy(loc, scale).entropy() - self.assertAllClose(expected_entropy, entropy.eval()) - - def testCauchyMode(self): - with self.test_session(): - # Mu will be broadcast to [7, 7, 7]. - loc = [7.] - scale = [11., 12., 13.] - - cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) - - self.assertAllEqual((3,), cauchy.mode().shape) - self.assertAllEqual([7., 7, 7], cauchy.mode().eval()) - - def testCauchyMean(self): - with self.test_session(): - loc = [1., 2., 3.] - scale = [7.] - cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) - - self.assertAllEqual((3,), cauchy.mean().shape) - self.assertAllEqual([np.nan] * 3, cauchy.mean().eval()) - - def testCauchyNanMean(self): - with self.test_session(): - loc = [1., 2., 3.] - scale = [7.] - cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False) - - with self.assertRaises(ValueError): - cauchy.mean().eval() - - def testCauchyQuantile(self): - with self.test_session(): - batch_size = 50 - loc = self._rng.randn(batch_size) - scale = self._rng.rand(batch_size) + 1.0 - p = np.linspace(0.000001, 0.999999, batch_size).astype(np.float64) - - cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) - x = cauchy.quantile(p) - - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), x.shape) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), x.eval().shape) - self.assertAllEqual(cauchy.batch_shape, x.shape) - self.assertAllEqual(cauchy.batch_shape, x.eval().shape) - - if not stats: - return - expected_x = stats.cauchy(loc, scale).ppf(p) - self.assertAllClose(expected_x, x.eval(), atol=0.) - - def testCauchyVariance(self): - with self.test_session(): - # scale will be broadcast to [7, 7, 7] - loc = [1., 2., 3.] - scale = [7.] - cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) - - self.assertAllEqual((3,), cauchy.variance().shape) - self.assertAllEqual([np.nan] * 3, cauchy.variance().eval()) - - def testCauchyNanVariance(self): - with self.test_session(): - # scale will be broadcast to [7, 7, 7] - loc = [1., 2., 3.] - scale = [7.] - cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False) - - with self.assertRaises(ValueError): - cauchy.variance().eval() - - def testCauchyStandardDeviation(self): - with self.test_session(): - # scale will be broadcast to [7, 7, 7] - loc = [1., 2., 3.] - scale = [7.] - cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) - - self.assertAllEqual((3,), cauchy.stddev().shape) - self.assertAllEqual([np.nan] * 3, cauchy.stddev().eval()) - - def testCauchyNanStandardDeviation(self): - with self.test_session(): - # scale will be broadcast to [7, 7, 7] - loc = [1., 2., 3.] - scale = [7.] - cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False) - - with self.assertRaises(ValueError): - cauchy.stddev().eval() - - def testCauchySample(self): - with self.test_session(): - loc = constant_op.constant(3.0) - scale = constant_op.constant(1.0) - loc_v = 3.0 - n = constant_op.constant(100000) - cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) - samples = cauchy.sample(n) - sample_values = samples.eval() - - self.assertEqual(sample_values.shape, (100000,)) - self.assertAllClose(np.median(sample_values), loc_v, atol=1e-1) - - expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate( - tensor_shape.TensorShape(cauchy.batch_shape_tensor().eval())) - - self.assertAllEqual(expected_shape, samples.shape) - self.assertAllEqual(expected_shape, sample_values.shape) - - expected_shape = (tensor_shape.TensorShape( - [n.eval()]).concatenate(cauchy.batch_shape)) - - self.assertAllEqual(expected_shape, samples.shape) - self.assertAllEqual(expected_shape, sample_values.shape) - - def testCauchySampleMultiDimensional(self): - with self.test_session(): - batch_size = 2 - loc = constant_op.constant([[3.0, -3.0]] * batch_size) - scale = constant_op.constant([[0.5, 1.0]] * batch_size) - loc_v = [3.0, -3.0] - n = constant_op.constant(100000) - cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) - samples = cauchy.sample(n) - sample_values = samples.eval() - self.assertEqual(samples.shape, (100000, batch_size, 2)) - self.assertAllClose(np.median(sample_values[:, 0, 0]), - loc_v[0], atol=1e-1) - self.assertAllClose(np.median(sample_values[:, 0, 1]), - loc_v[1], atol=1e-1) - - expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate( - tensor_shape.TensorShape(cauchy.batch_shape_tensor().eval())) - self.assertAllEqual(expected_shape, samples.shape) - self.assertAllEqual(expected_shape, sample_values.shape) - - expected_shape = (tensor_shape.TensorShape( - [n.eval()]).concatenate(cauchy.batch_shape)) - self.assertAllEqual(expected_shape, samples.shape) - self.assertAllEqual(expected_shape, sample_values.shape) - - def testCauchyNegativeLocFails(self): - with self.test_session(): - cauchy = cauchy_lib.Cauchy(loc=[1.], scale=[-5.], validate_args=True) - with self.assertRaisesOpError("Condition x > 0 did not hold"): - cauchy.mode().eval() - - def testCauchyShape(self): - with self.test_session(): - loc = constant_op.constant([-3.0] * 5) - scale = constant_op.constant(11.0) - cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) - - self.assertEqual(cauchy.batch_shape_tensor().eval(), [5]) - self.assertEqual(cauchy.batch_shape, tensor_shape.TensorShape([5])) - self.assertAllEqual(cauchy.event_shape_tensor().eval(), []) - self.assertEqual(cauchy.event_shape, tensor_shape.TensorShape([])) - - def testCauchyShapeWithPlaceholders(self): - loc = array_ops.placeholder(dtype=dtypes.float32) - scale = array_ops.placeholder(dtype=dtypes.float32) - cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) - - with self.test_session() as sess: - # get_batch_shape should return an "" tensor. - self.assertEqual(cauchy.batch_shape, tensor_shape.TensorShape(None)) - self.assertEqual(cauchy.event_shape, ()) - self.assertAllEqual(cauchy.event_shape_tensor().eval(), []) - self.assertAllEqual( - sess.run(cauchy.batch_shape_tensor(), - feed_dict={loc: 5.0, - scale: [1.0, 2.0]}), [2]) - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/distributions/python/ops/cauchy.py b/tensorflow/contrib/distributions/python/ops/cauchy.py deleted file mode 100644 index a17bb091f6..0000000000 --- a/tensorflow/contrib/distributions/python/ops/cauchy.py +++ /dev/null @@ -1,223 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""The Cauchy distribution class.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops.distributions import distribution - - -__all__ = [ - "Cauchy", -] - - -class Cauchy(distribution.Distribution): - """The Cauchy distribution with location `loc` and scale `scale`. - - #### Mathematical details - - The probability density function (pdf) is, - - ```none - pdf(x; loc, scale) = 1 / (pi * scale * (1 + ((x - loc) / scale)**2)) - ``` - where `loc` is the location, and `scale` is the scale. - - The Cauchy distribution is a member of the [location-scale family]( - https://en.wikipedia.org/wiki/Location-scale_family), i.e. - - ```none - X ~ Cauchy(loc=0, scale=1) - Y ~ Cauchy(loc=loc, scale=scale) - Y = loc + scale * X - ``` - - #### Examples - - Examples of initialization of one or a batch of distributions. - - ```python - # Define a single scalar Cauchy distribution. - dist = Cauchy(loc=0., scale=3.) - - # Evaluate the cdf at 1, returning a scalar. - dist.cdf(1.) - - # Define a batch of two scalar valued Cauchy distributions. - dist = Cauchy(loc=[1, 2.], scale=[11, 22.]) - - # Evaluate the pdf of the first distribution on 0, and the second on 1.5, - # returning a length two tensor. - dist.prob([0, 1.5]) - - # Get 3 samples, returning a 3 x 2 tensor. - dist.sample([3]) - ``` - - Arguments are broadcast when possible. - - ```python - # Define a batch of two scalar valued Cauchy distributions. - # Both have median 1, but different scales. - dist = tf.contrib.distributions.Cauchy(loc=1., scale=[11, 22.]) - # Evaluate the pdf of both distributions on the same point, 3.0, - # returning a length 2 tensor. - dist.prob(3.0) - ``` - """ - - def __init__(self, - loc, - scale, - validate_args=False, - allow_nan_stats=True, - name="Cauchy"): - """Construct Cauchy distributions with loc and and scale `loc` and `scale`. - - The parameters `loc` and `scale` must be shaped in a way that supports - broadcasting (e.g. `loc + scale` is a valid operation). - - Args: - loc: Floating point tensor; the modes of the distribution(s). - scale: Floating point tensor; the locations of the distribution(s). - Must contain only positive values. - validate_args: Python `bool`, default `False`. When `True` distribution - parameters are checked for validity despite possibly degrading runtime - performance. When `False` invalid inputs may silently render incorrect - outputs. - allow_nan_stats: Python `bool`, default `True`. When `True`, - statistics (e.g., mean, mode, variance) use the value "`NaN`" to - indicate the result is undefined. When `False`, an exception is raised - if one or more of the statistic's batch members are undefined. - name: Python `str` name prefixed to Ops created by this class. - - Raises: - TypeError: if `loc` and `scale` have different `dtype`. - """ - parameters = locals() - with ops.name_scope(name, values=[loc, scale]): - with ops.control_dependencies([check_ops.assert_positive(scale)] if - validate_args else []): - self._loc = array_ops.identity(loc, name="loc") - self._scale = array_ops.identity(scale, name="scale") - check_ops.assert_same_float_dtype([self._loc, self._scale]) - super(Cauchy, self).__init__( - dtype=self._scale.dtype, - reparameterization_type=distribution.FULLY_REPARAMETERIZED, - validate_args=validate_args, - allow_nan_stats=allow_nan_stats, - parameters=parameters, - graph_parents=[self._loc, self._scale], - name=name) - - @staticmethod - def _param_shapes(sample_shape): - return dict( - zip(("loc", "scale"), ([ops.convert_to_tensor( - sample_shape, dtype=dtypes.int32)] * 2))) - - @property - def loc(self): - """Distribution parameter for the mean.""" - return self._loc - - @property - def scale(self): - """Distribution parameter for standard deviation.""" - return self._scale - - def _batch_shape_tensor(self): - return array_ops.broadcast_dynamic_shape( - array_ops.shape(self.loc), - array_ops.shape(self.scale)) - - def _batch_shape(self): - return array_ops.broadcast_static_shape( - self.loc.shape, - self.scale.shape) - - def _event_shape_tensor(self): - return constant_op.constant([], dtype=dtypes.int32) - - def _event_shape(self): - return tensor_shape.scalar() - - def _sample_n(self, n, seed=None): - shape = array_ops.concat([[n], self.batch_shape_tensor()], 0) - probs = random_ops.random_uniform( - shape=shape, minval=0., maxval=1., dtype=self.dtype, seed=seed) - return self._quantile(probs) - - def _log_prob(self, x): - return self._log_unnormalized_prob(x) - self._log_normalization() - - def _cdf(self, x): - return math_ops.atan(self._z(x)) / np.pi + 0.5 - - def _log_cdf(self, x): - return math_ops.log1p(2 / np.pi * math_ops.atan(self._z(x))) - np.log(2) - - def _log_unnormalized_prob(self, x): - return -math_ops.log1p(math_ops.square(self._z(x))) - - def _log_normalization(self): - return np.log(np.pi) + math_ops.log(self.scale) - - def _entropy(self): - h = np.log(4 * np.pi) + math_ops.log(self.scale) - return h * array_ops.ones_like(self.loc) - - def _quantile(self, p): - return self.loc + self.scale * math_ops.tan(np.pi * (p - 0.5)) - - def _mode(self): - return self.loc * array_ops.ones_like(self.scale) - - def _z(self, x): - """Standardize input `x`.""" - with ops.name_scope("standardize", values=[x]): - return (x - self.loc) / self.scale - - def _inv_z(self, z): - """Reconstruct input `x` from a its normalized version.""" - with ops.name_scope("reconstruct", values=[z]): - return z * self.scale + self.loc - - def _mean(self): - if self.allow_nan_stats: - return array_ops.fill(self.batch_shape_tensor(), - self.dtype.as_numpy_dtype(np.nan)) - else: - raise ValueError("`mean` is undefined for Cauchy distribution.") - - def _stddev(self): - if self.allow_nan_stats: - return array_ops.fill(self.batch_shape_tensor(), - self.dtype.as_numpy_dtype(np.nan)) - else: - raise ValueError("`stddev` is undefined for Cauchy distribution.") diff --git a/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb index 459f2f4a7d..01616f2e7d 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb @@ -429,9 +429,7 @@ "cpu_tensor = tf.random_normal([SIZE, SIZE])\n", "\n", "if is_gpu_available:\n", - " gpu_tensor = cpu_tensor.gpu()\n", - "else:\n", - " print(\"GPU not available.\")" + " gpu_tensor = cpu_tensor.gpu()" ] }, { diff --git a/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb index e6c7c11733..3b7e2cd435 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb @@ -383,7 +383,7 @@ "\n", "`implicit_value_and_gradients()` returns a function that accepts the same inputs as the function passed in, and returns a tuple consisting of:\n", "\n", - "1. the value returned by the function passed in (in this case, the loss calculated by `loss_fn()`), and\n", + "1. the value returned by the function passed in (in this case, the loss calculated by `calculate_linear_model_loss()`), and\n", "1. a list of tuples consisting of:\n", " 1. The value of the gradient (a `tf.Tensor`) with respect to a given variable\n", " 1. The corresponding variable (`tf.Variable`)\n", @@ -698,7 +698,7 @@ "source": [ "## Other Ways to Compute Gradients\n", "\n", - "Using our loss function as an example (`loss_fn()`), there are several other ways we could compute gradients:\n", + "Using our loss function as an example (`calculate_linear_model_loss()`), there are several other ways we could compute gradients:\n", "\n", "1. `tfe.implicit_gradients()`\n", "1. `tfe.gradients_function()`\n", @@ -841,7 +841,7 @@ "# tfe.implicit_value_and_gradients() demo\n", "value_gradients_fn = tfe.implicit_value_and_gradients(loss_fn)\n", "\n", - "# Returns the value returned by the function passed in, gradients, and variables:\n", + "# Returns only gradients:\n", "value_gradients_fn(inputs, labels, wb)" ] } diff --git a/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb index 0088da5c4b..ebcc7027c1 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb @@ -9,7 +9,7 @@ "source": [ "# Eager Execution Tutorial: Importing Data\n", "\n", - "This notebook demonstrates the use of the [`tf.data.Dataset` API](https://www.tensorflow.org/programmers_guide/datasets) to build pipelines to feed data to your program. It covers:\n", + "This notebook demonstrates the use of the [`tf.contrib.data.Dataset` API](https://www.tensorflow.org/programmers_guide/datasets) to build pipelines to feed data to your program. It covers:\n", "\n", "* Creating a `Dataset`.\n", "* Iteration over a `Dataset` with eager execution enabled.\n", @@ -64,7 +64,7 @@ "source": [ "# Step 1: Create a source `Dataset`\n", "\n", - "Create a _source_ dataset using one of the factory functions like [`Dataset.from_tensors`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices) or using objects that read from files like [`TextLineDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TextLineDataset) or [`TFRecordDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TFRecordDataset). See the [Programmer's Guide](https://www.google.com/url?sa=D\u0026q=https%3A%2F%2Fwww.tensorflow.org%2Fprogrammers_guide%2Fdatasets%23reading_input_data) for more information." + "Create a _source_ dataset using one of the factory functions like [`Dataset.from_tensors`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#from_tensor_slices) or using objects that read from files like [`TextLineDataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/TextLineDataset) or [`TFRecordDataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/TFRecordDataset). See the [Programmer's Guide](https://www.google.com/url?sa=D\u0026q=https%3A%2F%2Fwww.tensorflow.org%2Fprogrammers_guide%2Fdatasets%23reading_input_data) for more information." ] }, { @@ -83,7 +83,7 @@ }, "outputs": [], "source": [ - "ds_tensors = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6])\n", + "ds_tensors = tf.contrib.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6])\n", "\n", "# Create a CSV file\n", "import tempfile\n", @@ -93,7 +93,7 @@ "Line 2\n", "Line 3\n", " \"\"\")\n", - "ds_file = tf.data.TextLineDataset(filename)\n" + "ds_file = tf.contrib.data.TextLineDataset(filename)\n" ] }, { @@ -105,7 +105,7 @@ "source": [ "# Step 2: Apply transformations\n", "\n", - "Use the transformations functions like [`map`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map), [`batch`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch), [`shuffle`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#shuffle) etc. to apply transformations to the records of the dataset. See the [API documentation for `tf.data.Dataset`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) for details." + "Use the transformations functions like [`map`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#map), [`batch`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#batch), [`shuffle`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#shuffle) etc. to apply transformations to the records of the dataset. See the [API documentation for `tf.contrib.data.Dataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset) for details." ] }, { diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index 9378fe8799..46b3eeae91 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -286,6 +286,7 @@ def _fused_batch_norm(inputs, ValueError: If the rank of `inputs` is neither 2 or 4. ValueError: If rank or `C` dimension of `inputs` is undefined. """ + # TODO(reedwm): Add support for fp16 inputs. if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): raise ValueError('data_format has to be either NCHW or NHWC.') with variable_scope.variable_scope( @@ -319,10 +320,9 @@ def _fused_batch_norm(inputs, (inputs.name, params_shape)) # Allocate parameters for the beta and gamma of the normalization. + trainable_beta = trainable and center beta_collections = utils.get_variable_collections(variables_collections, 'beta') - # Float32 required to avoid precision-loss when using fp16 input/output - variable_dtype = dtypes.float32 if not param_initializers: param_initializers = {} if not param_regularizers: @@ -336,13 +336,13 @@ def _fused_batch_norm(inputs, beta = variables.model_variable( 'beta', shape=params_shape, - dtype=variable_dtype, + dtype=dtype, initializer=beta_initializer, regularizer=beta_regularizer, collections=beta_collections, - trainable=trainable) + trainable=trainable_beta) else: - beta = array_ops.constant(0.0, dtype=variable_dtype, shape=params_shape) + beta = array_ops.constant(0.0, shape=params_shape) if scale: gamma_collections = utils.get_variable_collections( @@ -352,13 +352,13 @@ def _fused_batch_norm(inputs, gamma = variables.model_variable( 'gamma', shape=params_shape, - dtype=variable_dtype, + dtype=dtype, initializer=gamma_initializer, regularizer=gamma_regularizer, collections=gamma_collections, trainable=trainable) else: - gamma = array_ops.constant(1.0, dtype=variable_dtype, shape=params_shape) + gamma = array_ops.constant(1.0, shape=params_shape) # Create moving_mean and moving_variance variables and add them to the # appropriate collections. We disable variable partitioning while creating @@ -375,7 +375,7 @@ def _fused_batch_norm(inputs, moving_mean = variables.model_variable( 'moving_mean', shape=params_shape, - dtype=variable_dtype, + dtype=dtype, initializer=moving_mean_initializer, trainable=False, collections=moving_mean_collections) @@ -386,7 +386,7 @@ def _fused_batch_norm(inputs, moving_variance = variables.model_variable( 'moving_variance', shape=params_shape, - dtype=variable_dtype, + dtype=dtype, initializer=moving_variance_initializer, trainable=False, collections=moving_variance_collections) diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index 5aa2253516..ff7f0e4462 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -1774,12 +1774,10 @@ class BatchNormTest(test.TestCase): with self.assertRaisesRegexp(ValueError, 'undefined'): _layers.batch_norm(inputs, data_format='NCHW') - def _testCreateOp(self, fused, dtype=None): - if dtype is None: - dtype = dtypes.float32 + def _testCreateOp(self, fused): height, width = 3, 3 with self.test_session(): - images = np.random.uniform(size=(5, height, width, 3)).astype(dtype.as_numpy_dtype) + images = np.random.uniform(size=(5, height, width, 3)).astype('f') output = _layers.batch_norm(images, fused=fused) expected_name = ('BatchNorm/FusedBatchNorm' if fused else 'BatchNorm/batchnorm') @@ -1794,9 +1792,6 @@ class BatchNormTest(test.TestCase): def testCreateOpFused(self): self._testCreateOp(True) - def testCreateOpFusedFloat16(self): - self._testCreateOp(True, dtypes.float16) - def _testCreateOpBetaRegularizer(self, fused=True): height, width = 3, 3 with self.test_session(): @@ -2664,68 +2659,10 @@ class BatchNormTest(test.TestCase): def testBatchNormBeta(self): # Test case for 11673 with self.test_session() as sess: - a_32 = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10)) - b_32 = _layers.batch_norm(a_32, center=False, data_format='NCHW', - zero_debias_moving_mean=True) - a_16 = array_ops.placeholder(dtypes.float16, shape=(10, 10, 10, 10)) - b_16 = _layers.batch_norm(a_16, center=False, data_format='NCHW', - zero_debias_moving_mean=True) - sess.run(variables_lib.global_variables_initializer()) - - def testVariablesAreFloat32(self): - height, width = 3, 3 - with self.test_session(): - images = random_ops.random_uniform((5, height, width, 3), - seed=1, dtype=dtypes.float16) - _layers.batch_norm(images, scale=True) - beta = variables.get_variables_by_name('beta')[0] - gamma = variables.get_variables_by_name('gamma')[0] - self.assertEqual(beta.dtype, dtypes.float32_ref) - self.assertEqual(gamma.dtype, dtypes.float32_ref) - moving_mean = variables.get_variables_by_name('moving_mean')[0] - moving_variance = variables.get_variables_by_name('moving_variance')[0] - self.assertEqual(moving_mean.dtype, dtypes.float32_ref) - self.assertEqual(moving_variance.dtype, dtypes.float32_ref) - - def _runFusedBatchNorm(self, shape, dtype): - channels = shape[1] - images = np.arange(np.product(shape), dtype=dtype).reshape(shape) - beta = init_ops.constant_initializer( - np.arange( - 2, channels + 2, dtype=np.float32)) - gamma = init_ops.constant_initializer( - np.arange( - 10, channels + 10, dtype=np.float32) * 2.0) - mean = init_ops.constant_initializer( - np.arange( - 3, channels + 3, dtype=np.float32) * 5.0) - variance = init_ops.constant_initializer( - np.arange( - 1, channels + 1, dtype=np.float32) * 4.0) - output = _layers.batch_norm( - images, - fused=True, - is_training=True, - scale=True, - epsilon=0.5, - param_initializers={ - 'beta': beta, - 'gamma': gamma, - 'moving_mean': mean, - 'moving_variance': variance, - }, - data_format='NCHW') - with self.test_session(use_gpu=True) as sess: + a = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10)) + b = _layers.batch_norm(a, center=False, data_format='NCHW', + zero_debias_moving_mean=True) sess.run(variables_lib.global_variables_initializer()) - return sess.run(output) - - def testFusedBatchNormFloat16MatchesFloat32(self): - if test.is_gpu_available(cuda_only=True): - shape = [5, 4, 2, 3] - res_32 = self._runFusedBatchNorm(shape, np.float32) - res_16 = self._runFusedBatchNorm(shape, np.float16) - self.assertAllClose(res_32, res_16, rtol=1e-3) - def testAdjustmentCreated(self): # Tests that the adjustment is appropriately passed to and used by the core diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py index bc0e6fc009..468d792a0d 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -119,7 +119,7 @@ class Head(object): update_op = tf.contrib.layers.optimize_loss(optimizer=sync, loss=model_fn_ops.loss, ...) hooks = [sync.make_session_run_hook(is_chief)] - ... update train_op and hooks in ModelFnOps and return + ... upate train_op and hooks in ModelFnOps and return ``` """ __metaclass__ = abc.ABCMeta diff --git a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py index 44e6c7c52d..8be9c72adf 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py @@ -23,6 +23,7 @@ import collections import six +from tensorflow.contrib import framework as contrib_framework from tensorflow.contrib.framework import get_graph_from_inputs from tensorflow.contrib.learn.python.learn.estimators import constants from tensorflow.contrib.learn.python.learn.estimators import metric_key @@ -31,7 +32,6 @@ from tensorflow.python.estimator import model_fn as core_model_fn_lib from tensorflow.python.estimator.export import export_output as core_export_lib from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging @@ -156,11 +156,11 @@ class ModelFnOps( else: if isinstance(predictions, dict): predictions = { - k: sparse_tensor.convert_to_tensor_or_sparse_tensor(v) + k: contrib_framework.convert_to_tensor_or_sparse_tensor(v) for k, v in six.iteritems(predictions) } else: - predictions = sparse_tensor.convert_to_tensor_or_sparse_tensor( + predictions = contrib_framework.convert_to_tensor_or_sparse_tensor( predictions) # Validate eval_metric_ops diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py index db18ebf05d..4c50d40aaa 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py @@ -28,14 +28,13 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging # pylint: disable=g-multiple-import,g-bad-import-order from .pandas_io import HAS_PANDAS, extract_pandas_data, extract_pandas_matrix, extract_pandas_labels from .dask_io import HAS_DASK, extract_dask_data, extract_dask_labels + # pylint: enable=g-multiple-import,g-bad-import-order @@ -366,13 +365,8 @@ class DataFeeder(object): self.random_state = np.random.RandomState( 42) if random_state is None else random_state - if x_is_dict: - num_samples = list(self._x.values())[0].shape[0] - elif tensor_util.is_tensor(self._x): - num_samples = self._x.shape[0].value # shape will be a Dimension, extract an int - else: - num_samples = self._x.shape[0] - + num_samples = list(self._x.values())[0].shape[ + 0] if x_is_dict else self._x.shape[0] if self._shuffle: self.indices = self.random_state.permutation(num_samples) else: diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py index 86d8484391..13f2f0f502 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py @@ -238,10 +238,10 @@ class SdcaModel(object): with name_scope('sdca/prediction'): sparse_variables = self._convert_n_to_tensor(self._variables[ 'sparse_features_weights']) - result_sparse = 0.0 + result = 0.0 for sfc, sv in zip(examples['sparse_features'], sparse_variables): # TODO(sibyl-Aix6ihai): following does not take care of missing features. - result_sparse += math_ops.segment_sum( + result += math_ops.segment_sum( math_ops.multiply( array_ops.gather(sv, sfc.feature_indices), sfc.feature_values), sfc.example_indices) @@ -249,13 +249,12 @@ class SdcaModel(object): dense_variables = self._convert_n_to_tensor(self._variables[ 'dense_features_weights']) - result_dense = 0.0 for i in range(len(dense_variables)): - result_dense += math_ops.matmul( - dense_features[i], array_ops.expand_dims(dense_variables[i], -1)) + result += math_ops.matmul(dense_features[i], + array_ops.expand_dims(dense_variables[i], -1)) # Reshaping to allow shape inference at graph construction time. - return array_ops.reshape(result_dense, [-1]) + result_sparse + return array_ops.reshape(result, [-1]) def predictions(self, examples): """Add operations to compute predictions by the model. diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD index 89e8693490..b4aa032ff8 100644 --- a/tensorflow/contrib/lite/python/BUILD +++ b/tensorflow/contrib/lite/python/BUILD @@ -23,7 +23,6 @@ py_library( py_test( name = "lite_test", srcs = ["lite_test.py"], - srcs_version = "PY2AND3", deps = [ ":lite", "//tensorflow/python:array_ops", diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index b122818221..86540d58a6 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -36,10 +36,6 @@ import traceback import zipfile import numpy as np from six import StringIO - -# TODO(aselle): Disable GPU for now -os.environ["CUDA_VISIBLE_DEVICES"] = "-1" - import tensorflow as tf from google.protobuf import text_format # TODO(aselle): switch to TensorFlow's resource_loader @@ -383,13 +379,12 @@ def make_zip_of_tests(zip_path, report["toco_log"] = "" tf.reset_default_graph() - with tf.device('/cpu:0'): - try: - inputs, outputs = make_graph(param_dict_real) - except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError, - ValueError): - report["tf_log"] += traceback.format_exc() - return None, report + try: + inputs, outputs = make_graph(param_dict_real) + except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError, + ValueError): + report["tf_log"] += traceback.format_exc() + return None, report sess = tf.Session() try: diff --git a/tensorflow/contrib/lite/toco/python/BUILD b/tensorflow/contrib/lite/toco/python/BUILD index 17115047d2..92246a8aed 100644 --- a/tensorflow/contrib/lite/toco/python/BUILD +++ b/tensorflow/contrib/lite/toco/python/BUILD @@ -61,7 +61,6 @@ tf_py_test( data = [ ":toco_from_protos", ], - tags = ["no_pip"], ) filegroup( diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile index e2e6c05591..dba1464653 100644 --- a/tensorflow/contrib/makefile/Makefile +++ b/tensorflow/contrib/makefile/Makefile @@ -314,8 +314,7 @@ ifeq ($(TARGET),ANDROID) -Wno-narrowing \ -fomit-frame-pointer \ $(MARCH_OPTION) \ --fPIE \ --fPIC +-fPIE INCLUDES = \ -I$(NDK_ROOT)/sources/android/support/include \ -I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/include \ diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md index 65bd60c12a..715eb51577 100644 --- a/tensorflow/contrib/makefile/README.md +++ b/tensorflow/contrib/makefile/README.md @@ -174,26 +174,10 @@ tensorflow/contrib/makefile/build_all_ios.sh This process will take around twenty minutes on a modern MacBook Pro. -When it completes, you will have a unified library for all architectures -(i386sim, x86_64sim, armv7, armv7s and arm64) and the benchmark program. -Although successfully compiling the benchmark program is a +When it completes, you will have a library for a single architecture and the +benchmark program. Although successfully compiling the benchmark program is a sign of success, the program is not a complete iOS app. -If you would only like to build only one architecture to save time: -(iOS 11+ only supports 64bit so you can get away with arm64) - -```bash -tensorflow/contrib/makefile/build_all_ios.sh -a arm64 -``` - -After the first build if you would like to just build the tensorflow -library you can pass the -T flag to avoid a clean & rebuild. This should -take you just a few seconds to generate the library if you modified one file. - -```bash -tensorflow/contrib/makefile/build_all_ios.sh -a arm64 -T -``` - To see TensorFlow running on iOS, the example Xcode project in [tensorflow/examples/ios](../../examples/ios/) shows how to use the static library in a simple app. @@ -209,18 +193,19 @@ If you have not already, you will need to download dependencies: tensorflow/contrib/makefile/download_dependencies.sh ``` -Next, you will need to compile protobufs for iOS (optionally takes the -a $ARCH flag): +Next, you will need to compile protobufs for iOS: ```bash -tensorflow/contrib/makefile/compile_ios_protobuf.sh +tensorflow/contrib/makefile/compile_ios_protobuf.sh ``` -Then, you will need to compile the nsync library for iOS (optionally takes -a $ARCH flag): +Then, you will need to compile the nsync library for iOS: ```bash export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh` export TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios` ``` + Then, you can run the makefile specifying iOS as the target, along with the architecture you want to build for: @@ -234,6 +219,10 @@ This creates a library in `tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a` that you can link any xcode project against. +At this point, you will have a library for a single architecture and the +benchmark program. Although successfully compiling the benchmark program is a +sign of success, the program is not a complete iOS app. + To see TensorFlow running on iOS, the example Xcode project in [tensorflow/examples/ios](../../examples/ios/) shows how to use the static library in a simple app. @@ -248,14 +237,6 @@ time follow it with: compile_ios_tensorflow.sh ``` -`compile_ios_tensorflow.sh` takes the -a flag to build only for one architecture. -In case you run into issues with unresolved symbols with nsync you can also pass --h ${HOST_NSYNC_LIB} and -n {TARGET_NSYNC_LIB} so it would look like: - -```bash -tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -h tensorflow/contrib/makefile/downloads/nsync/builds/default.macos.c++11/nsync.a -n tensorflow/contrib/makefile/downloads/nsync/builds/lipo.ios.c++11/nsync.a -a arm64 -``` - In XCode, you will need to use -force_load in the linker flags section of the build settings to pull in the global constructors that are used to register ops and kernels. @@ -268,7 +249,7 @@ debug mode. If you are concerned about performance or are working on a release build, you would likely want a higher optimization setting, like so: ```bash -compile_ios_tensorflow.sh -f "-Os" +compile_ios_tensorflow.sh "-Os" ``` For other variations of valid optimization flags, see [clang optimization levels](http://stackoverflow.com/questions/15548023/clang-optimization-levels). diff --git a/tensorflow/contrib/makefile/build_all_ios.sh b/tensorflow/contrib/makefile/build_all_ios.sh index 988e12b482..a49bbe4565 100755 --- a/tensorflow/contrib/makefile/build_all_ios.sh +++ b/tensorflow/contrib/makefile/build_all_ios.sh @@ -23,29 +23,14 @@ if [[ $(uname) != "Darwin" ]]; then exit 1 fi -usage() { - echo "Usage: $(basename "$0") [-a:T]" - echo "-a [build_arch] build only for specified arch x86_64 [default=all]" - echo "-T only build tensorflow (dont download other deps etc)" - exit 1 -} - -while getopts "a:T" opt_name; do - case "$opt_name" in - a) BUILD_ARCH="${OPTARG}";; - T) ONLY_MAKE_TENSORFLOW="true";; - *) usage;; - esac -done -shift $((OPTIND - 1)) - - # Make sure we're in the correct directory, at the root of the source tree. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd ${SCRIPT_DIR}/../../../ -source "${SCRIPT_DIR}/build_helper.subr" -JOB_COUNT="${JOB_COUNT:-$(get_job_count)}" + +# Remove any old files first. +make -f tensorflow/contrib/makefile/Makefile clean +rm -rf tensorflow/contrib/makefile/downloads # Setting a deployment target is required for building with bitcode, # otherwise linking will fail with: @@ -56,37 +41,20 @@ if [[ -n MACOSX_DEPLOYMENT_TARGET ]]; then export MACOSX_DEPLOYMENT_TARGET=$(sw_vers -productVersion) fi -if [[ "${ONLY_MAKE_TENSORFLOW}" != "true" ]]; then - # Remove any old files first. - make -f tensorflow/contrib/makefile/Makefile clean - rm -rf tensorflow/contrib/makefile/downloads +# Pull down the required versions of the frameworks we need. +tensorflow/contrib/makefile/download_dependencies.sh - # Pull down the required versions of the frameworks we need. - tensorflow/contrib/makefile/download_dependencies.sh - - # Compile protobuf for the target iOS device architectures. - tensorflow/contrib/makefile/compile_ios_protobuf.sh -fi +# Compile protobuf for the target iOS device architectures. +tensorflow/contrib/makefile/compile_ios_protobuf.sh # Compile nsync for the target iOS device architectures. # Don't use export var=`something` syntax; it swallows the exit status. HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh` -if [[ -z "${BUILD_ARCH}" ]]; then - # No arch specified so build all architectures - TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios` -else - # arch specified so build just that - TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios -a ${BUILD_ARCH}` -fi +TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios` export HOST_NSYNC_LIB TARGET_NSYNC_LIB -if [[ -z "${BUILD_ARCH}" ]]; then - # build the ios tensorflow libraries. - tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -h $HOST_NSYNC_LIB -n $TARGET_NSYNC_LIB -else - # arch specified so build just that - tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -a "${BUILD_ARCH}" -h $HOST_NSYNC_LIB -n $TARGET_NSYNC_LIB -fi +# Build the iOS TensorFlow libraries. +tensorflow/contrib/makefile/compile_ios_tensorflow.sh "-O3" # Creates a static universal library in # tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a diff --git a/tensorflow/contrib/makefile/compile_ios_protobuf.sh b/tensorflow/contrib/makefile/compile_ios_protobuf.sh index 43e5809dd2..4056db18a7 100755 --- a/tensorflow/contrib/makefile/compile_ios_protobuf.sh +++ b/tensorflow/contrib/makefile/compile_ios_protobuf.sh @@ -21,28 +21,10 @@ if [[ -n MACOSX_DEPLOYMENT_TARGET ]]; then export MACOSX_DEPLOYMENT_TARGET=$(sw_vers -productVersion) fi -usage() { - echo "Usage: $(basename "$0") [-a]" - echo "-a [build_arch] build for specified arch comma separate for multiple archs (eg: x86_64,arm64)" - echo "default arch i386, x86_64, armv7, armv7s, arm64" - exit 1 -} - -BUILD_TARGET="i386 x86_64 armv7 armv7s arm64" -while getopts "a:" opt_name; do - case "$opt_name" in - a) BUILD_TARGET="${OPTARG}";; - *) usage;; - esac -done -shift $((OPTIND - 1)) - -IFS=' ' read -r -a build_targets <<< "${BUILD_TARGET}" - -SCRIPT_DIR=$(cd `dirname $0` && pwd) +SCRIPT_DIR=$(dirname $0) source "${SCRIPT_DIR}/build_helper.subr" -cd ${SCRIPT_DIR} +cd tensorflow/contrib/makefile HOST_GENDIR="$(pwd)/gen/protobuf-host" mkdir -p "${HOST_GENDIR}" @@ -82,10 +64,6 @@ else echo "protoc found. Skip building host tools." fi -# Remove old libs -rm -f ${LIBDIR}/libprotobuf.a -rm -f ${LIBDIR}/libprotobuf-lite.a - ./autogen.sh if [ $? -ne 0 ] then @@ -93,192 +71,157 @@ then exit 1 fi -package_pb_library() { - pb_libs="${LIBDIR}/${1}/lib/libprotobuf.a" - if [ -f "${LIBDIR}/libprotobuf.a" ]; then - pb_libs="$pb_libs ${LIBDIR}/libprotobuf.a" - fi - lipo \ - $pb_libs \ - -create \ - -output ${LIBDIR}/libprotobuf.a - - pblite_libs="${LIBDIR}/${1}/lib/libprotobuf-lite.a" - if [ -f "${LIBDIR}/libprotobuf-lite.a" ]; then - pblite_libs="$pblite_libs ${LIBDIR}/libprotobuf-lite.a" - fi - lipo \ - $pblite_libs \ - -create \ - -output ${LIBDIR}/libprotobuf-lite.a -} - -build_target() { -case "$1" in - i386) make distclean - ./configure \ - --host=i386-apple-${OSX_VERSION} \ - --disable-shared \ - --enable-cross-compile \ - --with-protoc="${PROTOC_PATH}" \ - --prefix=${LIBDIR}/iossim_386 \ - --exec-prefix=${LIBDIR}/iossim_386 \ - "CFLAGS=${CFLAGS} \ - -mios-simulator-version-min=${MIN_SDK_VERSION} \ - -arch i386 \ - -fembed-bitcode \ - -isysroot ${IPHONESIMULATOR_SYSROOT}" \ - "CXX=${CXX}" \ - "CXXFLAGS=${CXXFLAGS} \ - -mios-simulator-version-min=${MIN_SDK_VERSION} \ - -arch i386 \ - -fembed-bitcode \ - -isysroot \ - ${IPHONESIMULATOR_SYSROOT}" \ - LDFLAGS="-arch i386 \ - -fembed-bitcode \ - -mios-simulator-version-min=${MIN_SDK_VERSION} \ - ${LDFLAGS} \ - -L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \ - -L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \ - "LIBS=${LIBS}" - make -j"${JOB_COUNT}" - make install - - package_pb_library "iossim_386" - ;; - - x86_64) make distclean - ./configure \ - --host=x86_64-apple-${OSX_VERSION} \ - --disable-shared \ - --enable-cross-compile \ - --with-protoc="${PROTOC_PATH}" \ - --prefix=${LIBDIR}/iossim_x86_64 \ - --exec-prefix=${LIBDIR}/iossim_x86_64 \ - "CFLAGS=${CFLAGS} \ - -mios-simulator-version-min=${MIN_SDK_VERSION} \ - -arch x86_64 \ - -fembed-bitcode \ - -isysroot ${IPHONESIMULATOR_SYSROOT}" \ - "CXX=${CXX}" \ - "CXXFLAGS=${CXXFLAGS} \ - -mios-simulator-version-min=${MIN_SDK_VERSION} \ - -arch x86_64 \ - -fembed-bitcode \ - -isysroot \ - ${IPHONESIMULATOR_SYSROOT}" \ - LDFLAGS="-arch x86_64 \ - -fembed-bitcode \ - -mios-simulator-version-min=${MIN_SDK_VERSION} \ - ${LDFLAGS} \ - -L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \ - -L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \ - "LIBS=${LIBS}" - make -j"${JOB_COUNT}" - make install - - package_pb_library "iossim_x86_64" - ;; - - armv7) make distclean - ./configure \ - --host=armv7-apple-${OSX_VERSION} \ - --with-protoc="${PROTOC_PATH}" \ - --disable-shared \ - --prefix=${LIBDIR}/ios_arm7 \ - --exec-prefix=${LIBDIR}/ios_arm7 \ - "CFLAGS=${CFLAGS} \ - -miphoneos-version-min=${MIN_SDK_VERSION} \ - -arch armv7 \ - -fembed-bitcode \ - -isysroot ${IPHONEOS_SYSROOT}" \ - "CXX=${CXX}" \ - "CXXFLAGS=${CXXFLAGS} \ - -miphoneos-version-min=${MIN_SDK_VERSION} \ - -arch armv7 \ - -fembed-bitcode \ - -isysroot ${IPHONEOS_SYSROOT}" \ - LDFLAGS="-arch armv7 \ - -fembed-bitcode \ - -miphoneos-version-min=${MIN_SDK_VERSION} \ - ${LDFLAGS}" \ - "LIBS=${LIBS}" - make -j"${JOB_COUNT}" - make install - - package_pb_library "ios_arm7" - ;; - - armv7s) make distclean - ./configure \ - --host=armv7s-apple-${OSX_VERSION} \ - --with-protoc="${PROTOC_PATH}" \ - --disable-shared \ - --prefix=${LIBDIR}/ios_arm7s \ - --exec-prefix=${LIBDIR}/ios_arm7s \ - "CFLAGS=${CFLAGS} \ - -miphoneos-version-min=${MIN_SDK_VERSION} \ - -arch armv7s \ - -fembed-bitcode \ - -isysroot ${IPHONEOS_SYSROOT}" \ - "CXX=${CXX}" \ - "CXXFLAGS=${CXXFLAGS} \ - -miphoneos-version-min=${MIN_SDK_VERSION} \ - -arch armv7s \ - -fembed-bitcode \ - -isysroot ${IPHONEOS_SYSROOT}" \ - LDFLAGS="-arch armv7s \ - -fembed-bitcode \ - -miphoneos-version-min=${MIN_SDK_VERSION} \ - ${LDFLAGS}" \ - "LIBS=${LIBS}" - make -j"${JOB_COUNT}" - make install - - package_pb_library "ios_arm7s" - ;; - - arm64) make distclean - ./configure \ - --host=arm \ - --with-protoc="${PROTOC_PATH}" \ - --disable-shared \ - --prefix=${LIBDIR}/ios_arm64 \ - --exec-prefix=${LIBDIR}/ios_arm64 \ - "CFLAGS=${CFLAGS} \ - -miphoneos-version-min=${MIN_SDK_VERSION} \ - -arch arm64 \ - -fembed-bitcode \ - -isysroot ${IPHONEOS_SYSROOT}" \ - "CXXFLAGS=${CXXFLAGS} \ - -miphoneos-version-min=${MIN_SDK_VERSION} \ - -arch arm64 \ - -fembed-bitcode \ - -isysroot ${IPHONEOS_SYSROOT}" \ - LDFLAGS="-arch arm64 \ - -fembed-bitcode \ - -miphoneos-version-min=${MIN_SDK_VERSION} \ - ${LDFLAGS}" \ - "LIBS=${LIBS}" - make -j"${JOB_COUNT}" - make install - - package_pb_library "ios_arm64" - ;; - *) - echo "Unknown ARCH" - exit 1 - ;; -esac -} - -for build_element in "${build_targets[@]}" -do - echo "$build_element" - build_target "$build_element" -done - -file ${LIBDIR}/libprotobuf.a -file ${LIBDIR}/libprotobuf-lite.a -echo "Done building and packaging the libraries" +make distclean +./configure \ +--host=i386-apple-${OSX_VERSION} \ +--disable-shared \ +--enable-cross-compile \ +--with-protoc="${PROTOC_PATH}" \ +--prefix=${LIBDIR}/iossim_386 \ +--exec-prefix=${LIBDIR}/iossim_386 \ +"CFLAGS=${CFLAGS} \ +-mios-simulator-version-min=${MIN_SDK_VERSION} \ +-arch i386 \ +-fembed-bitcode \ +-isysroot ${IPHONESIMULATOR_SYSROOT}" \ +"CXX=${CXX}" \ +"CXXFLAGS=${CXXFLAGS} \ +-mios-simulator-version-min=${MIN_SDK_VERSION} \ +-arch i386 \ +-fembed-bitcode \ +-isysroot \ +${IPHONESIMULATOR_SYSROOT}" \ +LDFLAGS="-arch i386 \ +-fembed-bitcode \ +-mios-simulator-version-min=${MIN_SDK_VERSION} \ +${LDFLAGS} \ +-L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \ +-L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \ +"LIBS=${LIBS}" +make -j"${JOB_COUNT}" +make install + +make distclean +./configure \ +--host=x86_64-apple-${OSX_VERSION} \ +--disable-shared \ +--enable-cross-compile \ +--with-protoc="${PROTOC_PATH}" \ +--prefix=${LIBDIR}/iossim_x86_64 \ +--exec-prefix=${LIBDIR}/iossim_x86_64 \ +"CFLAGS=${CFLAGS} \ +-mios-simulator-version-min=${MIN_SDK_VERSION} \ +-arch x86_64 \ +-fembed-bitcode \ +-isysroot ${IPHONESIMULATOR_SYSROOT}" \ +"CXX=${CXX}" \ +"CXXFLAGS=${CXXFLAGS} \ +-mios-simulator-version-min=${MIN_SDK_VERSION} \ +-arch x86_64 \ +-fembed-bitcode \ +-isysroot \ +${IPHONESIMULATOR_SYSROOT}" \ +LDFLAGS="-arch x86_64 \ +-fembed-bitcode \ +-mios-simulator-version-min=${MIN_SDK_VERSION} \ +${LDFLAGS} \ +-L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \ +-L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \ +"LIBS=${LIBS}" +make -j"${JOB_COUNT}" +make install + +make distclean +./configure \ +--host=armv7-apple-${OSX_VERSION} \ +--with-protoc="${PROTOC_PATH}" \ +--disable-shared \ +--prefix=${LIBDIR}/ios_arm7 \ +--exec-prefix=${LIBDIR}/ios_arm7 \ +"CFLAGS=${CFLAGS} \ +-miphoneos-version-min=${MIN_SDK_VERSION} \ +-arch armv7 \ +-fembed-bitcode \ +-isysroot ${IPHONEOS_SYSROOT}" \ +"CXX=${CXX}" \ +"CXXFLAGS=${CXXFLAGS} \ +-miphoneos-version-min=${MIN_SDK_VERSION} \ +-arch armv7 \ +-fembed-bitcode \ +-isysroot ${IPHONEOS_SYSROOT}" \ +LDFLAGS="-arch armv7 \ +-fembed-bitcode \ +-miphoneos-version-min=${MIN_SDK_VERSION} \ +${LDFLAGS}" \ +"LIBS=${LIBS}" +make -j"${JOB_COUNT}" +make install + +make distclean +./configure \ +--host=armv7s-apple-${OSX_VERSION} \ +--with-protoc="${PROTOC_PATH}" \ +--disable-shared \ +--prefix=${LIBDIR}/ios_arm7s \ +--exec-prefix=${LIBDIR}/ios_arm7s \ +"CFLAGS=${CFLAGS} \ +-miphoneos-version-min=${MIN_SDK_VERSION} \ +-arch armv7s \ +-fembed-bitcode \ +-isysroot ${IPHONEOS_SYSROOT}" \ +"CXX=${CXX}" \ +"CXXFLAGS=${CXXFLAGS} \ +-miphoneos-version-min=${MIN_SDK_VERSION} \ +-arch armv7s \ +-fembed-bitcode \ +-isysroot ${IPHONEOS_SYSROOT}" \ +LDFLAGS="-arch armv7s \ +-fembed-bitcode \ +-miphoneos-version-min=${MIN_SDK_VERSION} \ +${LDFLAGS}" \ +"LIBS=${LIBS}" +make -j"${JOB_COUNT}" +make install + +make distclean +./configure \ +--host=arm \ +--with-protoc="${PROTOC_PATH}" \ +--disable-shared \ +--prefix=${LIBDIR}/ios_arm64 \ +--exec-prefix=${LIBDIR}/ios_arm64 \ +"CFLAGS=${CFLAGS} \ +-miphoneos-version-min=${MIN_SDK_VERSION} \ +-arch arm64 \ +-fembed-bitcode \ +-isysroot ${IPHONEOS_SYSROOT}" \ +"CXXFLAGS=${CXXFLAGS} \ +-miphoneos-version-min=${MIN_SDK_VERSION} \ +-arch arm64 \ +-fembed-bitcode \ +-isysroot ${IPHONEOS_SYSROOT}" \ +LDFLAGS="-arch arm64 \ +-fembed-bitcode \ +-miphoneos-version-min=${MIN_SDK_VERSION} \ +${LDFLAGS}" \ +"LIBS=${LIBS}" +make -j"${JOB_COUNT}" +make install + +lipo \ +${LIBDIR}/iossim_386/lib/libprotobuf.a \ +${LIBDIR}/iossim_x86_64/lib/libprotobuf.a \ +${LIBDIR}/ios_arm7/lib/libprotobuf.a \ +${LIBDIR}/ios_arm7s/lib/libprotobuf.a \ +${LIBDIR}/ios_arm64/lib/libprotobuf.a \ +-create \ +-output ${LIBDIR}/libprotobuf.a + +lipo \ +${LIBDIR}/iossim_386/lib/libprotobuf-lite.a \ +${LIBDIR}/iossim_x86_64/lib/libprotobuf-lite.a \ +${LIBDIR}/ios_arm7/lib/libprotobuf-lite.a \ +${LIBDIR}/ios_arm7s/lib/libprotobuf-lite.a \ +${LIBDIR}/ios_arm64/lib/libprotobuf-lite.a \ +-create \ +-output ${LIBDIR}/libprotobuf-lite.a diff --git a/tensorflow/contrib/makefile/compile_ios_tensorflow.sh b/tensorflow/contrib/makefile/compile_ios_tensorflow.sh index ae82163e11..5d1cc8b375 100755 --- a/tensorflow/contrib/makefile/compile_ios_tensorflow.sh +++ b/tensorflow/contrib/makefile/compile_ios_tensorflow.sh @@ -43,124 +43,55 @@ then exit 1 fi -usage() { - echo "Usage: $(basename "$0") [-a]" - echo "-a [build_arch] build for specified arch comma separate for multiple archs (eg: x86_64,arm64)" - echo "default is [i386, x86_64, armv7, armv7s, arm64]" - exit 1 -} - -BUILD_TARGET="i386 x86_64 armv7 armv7s arm64" -while getopts "a:f:h:n:" opt_name; do - case "$opt_name" in - a) BUILD_TARGET="${OPTARG}";; - f) BUILD_OPT="${OPTARG}";; - h) NSYNC_HOST="${OPTARG}";; - n) NSYNC_TARGET="${OPTARG}";; - *) usage;; - esac -done -shift $((OPTIND - 1)) - -IFS=' ' read -r -a build_targets <<< "${BUILD_TARGET}" - -SCRIPT_DIR=$(cd `dirname $0` && pwd) -source "${SCRIPT_DIR}/build_helper.subr" - - GENDIR=tensorflow/contrib/makefile/gen/ LIBDIR=${GENDIR}lib LIB_PREFIX=libtensorflow-core -#remove any old artifacts -rm -rf ${LIBDIR}/${LIB_PREFIX}.a +make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ +TARGET=IOS IOS_ARCH=ARMV7 LIB_NAME=${LIB_PREFIX}-armv7.a OPTFLAGS="$1" +if [ $? -ne 0 ] +then + echo "armv7 compilation failed." + exit 1 +fi -package_tf_library() { - CAP_DIR=`echo $1 | tr 'a-z' 'A-Z'` - tf_libs="${LIBDIR}/ios_${CAP_DIR}/${LIB_PREFIX}-${1}.a" - if [ -f "${LIBDIR}/${LIB_PREFIX}.a" ]; then - tf_libs="$tf_libs ${LIBDIR}/${LIB_PREFIX}.a" - fi - lipo \ - $tf_libs \ - -create \ - -output ${LIBDIR}/${LIB_PREFIX}.a -} +make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ +TARGET=IOS IOS_ARCH=ARMV7S LIB_NAME=${LIB_PREFIX}-armv7s.a OPTFLAGS="$1" +if [ $? -ne 0 ] +then + echo "arm7vs compilation failed." + exit 1 +fi -build_tf_target() { -case "$1" in - armv7) - make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ - TARGET=IOS IOS_ARCH=ARMV7 LIB_NAME=${LIB_PREFIX}-armv7.a \ - OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \ - TARGET_NSYNC_LIB="${NSYNC_TARGET}" - if [ $? -ne 0 ] - then - echo "armv7 compilation failed." - exit 1 - fi - package_tf_library "armv7" - ;; - armv7s) - make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ - TARGET=IOS IOS_ARCH=ARMV7S LIB_NAME=${LIB_PREFIX}-armv7s.a \ - OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \ - TARGET_NSYNC_LIB="${NSYNC_TARGET}" +make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ +TARGET=IOS IOS_ARCH=ARM64 LIB_NAME=${LIB_PREFIX}-arm64.a OPTFLAGS="$1" +if [ $? -ne 0 ] +then + echo "arm64 compilation failed." + exit 1 +fi - if [ $? -ne 0 ] - then - echo "arm7vs compilation failed." - exit 1 - fi - package_tf_library "armv7s" - ;; - arm64) - make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ - TARGET=IOS IOS_ARCH=ARM64 LIB_NAME=${LIB_PREFIX}-arm64.a \ - OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \ - TARGET_NSYNC_LIB="${NSYNC_TARGET}" - if [ $? -ne 0 ] - then - echo "arm64 compilation failed." - exit 1 - fi - package_tf_library "arm64" - ;; - i386) - make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ - TARGET=IOS IOS_ARCH=I386 LIB_NAME=${LIB_PREFIX}-i386.a \ - OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \ - TARGET_NSYNC_LIB="${NSYNC_TARGET}" - if [ $? -ne 0 ] - then - echo "i386 compilation failed." - exit 1 - fi - package_tf_library "i386" - ;; - x86_64) - make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ - TARGET=IOS IOS_ARCH=X86_64 LIB_NAME=${LIB_PREFIX}-x86_64.a \ - OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \ - TARGET_NSYNC_LIB="${NSYNC_TARGET}" - if [ $? -ne 0 ] - then - echo "x86_64 compilation failed." - exit 1 - fi - package_tf_library "x86_64" - ;; - *) - echo "Unknown ARCH" - exit 1 -esac -} +make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ +TARGET=IOS IOS_ARCH=I386 LIB_NAME=${LIB_PREFIX}-i386.a OPTFLAGS="$1" +if [ $? -ne 0 ] +then + echo "i386 compilation failed." + exit 1 +fi -for build_tf_element in "${build_targets[@]}" -do - echo "$build_tf_element" - build_tf_target "$build_tf_element" -done +make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ +TARGET=IOS IOS_ARCH=X86_64 LIB_NAME=${LIB_PREFIX}-x86_64.a OPTFLAGS="$1" +if [ $? -ne 0 ] +then + echo "x86_64 compilation failed." + exit 1 +fi -echo "Done building and packaging TF" -file ${LIBDIR}/${LIB_PREFIX}.a +lipo \ +${LIBDIR}/ios_ARMV7/${LIB_PREFIX}-armv7.a \ +${LIBDIR}/ios_ARMV7S/${LIB_PREFIX}-armv7s.a \ +${LIBDIR}/ios_ARM64/${LIB_PREFIX}-arm64.a \ +${LIBDIR}/ios_I386/${LIB_PREFIX}-i386.a \ +${LIBDIR}/ios_X86_64/${LIB_PREFIX}-x86_64.a \ +-create \ +-output ${LIBDIR}/${LIB_PREFIX}.a diff --git a/tensorflow/contrib/makefile/compile_nsync.sh b/tensorflow/contrib/makefile/compile_nsync.sh index 930e6b8dea..ecbd9bb825 100755 --- a/tensorflow/contrib/makefile/compile_nsync.sh +++ b/tensorflow/contrib/makefile/compile_nsync.sh @@ -265,7 +265,7 @@ for arch in $archs; do -I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/libs/'"$arch"'/include \ -I../../platform/c++11 -I../../platform/gcc \ -I../../platform/posix -pthread - PLATFORM_CFLAGS=-std=c++11 -Wno-narrowing '"$march_option"' -fPIE -fPIC + PLATFORM_CFLAGS=-std=c++11 -Wno-narrowing '"$march_option"' -fPIE PLATFORM_LDFLAGS=-pthread MKDEP=${CC} -M -std=c++11 PLATFORM_C=../../platform/c++11/src/nsync_semaphore_mutex.cc \ @@ -301,9 +301,6 @@ done case "$target_platform" in ios) nsync_platform_dir="$nsync_builds_dir/lipo.$target_platform.c++11" - if [ -d "$nsync_platform_dir" ]; then - rm -rf "$nsync_platform_dir" - fi mkdir "$nsync_platform_dir" eval lipo $platform_libs -create -output '$nsync_platform_dir/nsync.a' echo "$nsync_platform_dir/nsync.a" diff --git a/tensorflow/contrib/nn/__init__.py b/tensorflow/contrib/nn/__init__.py index 0bc133a00e..3bf795d19a 100644 --- a/tensorflow/contrib/nn/__init__.py +++ b/tensorflow/contrib/nn/__init__.py @@ -15,7 +15,6 @@ """Module for variants of ops in tf.nn. @@alpha_dropout -@@conv1d_transpose @@deprecated_flipped_softmax_cross_entropy_with_logits @@deprecated_flipped_sparse_softmax_cross_entropy_with_logits @@deprecated_flipped_sigmoid_cross_entropy_with_logits @@ -33,7 +32,6 @@ from tensorflow.contrib.nn.python.ops.alpha_dropout import * from tensorflow.contrib.nn.python.ops.cross_entropy import * from tensorflow.contrib.nn.python.ops.sampling_ops import * from tensorflow.contrib.nn.python.ops.scaled_softplus import * -from tensorflow.python.ops.nn_ops import conv1d_transpose from tensorflow.python.ops.nn_ops import nth_element # pylint: enable=unused-import,wildcard-import diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD index a9a63cbce0..8c46becf2c 100644 --- a/tensorflow/contrib/opt/BUILD +++ b/tensorflow/contrib/opt/BUILD @@ -19,7 +19,6 @@ py_library( "python/training/external_optimizer.py", "python/training/lazy_adam_optimizer.py", "python/training/moving_average_optimizer.py", - "python/training/multitask_optimizer_wrapper.py", "python/training/nadam_optimizer.py", "python/training/powersign.py", "python/training/sign_decay.py", @@ -99,23 +98,6 @@ py_test( ], ) -py_test( - name = "multitask_optimizer_wrapper_test", - srcs = ["python/training/multitask_optimizer_wrapper_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":opt_py", - "//tensorflow/python:client", - "//tensorflow/python:client_testlib", - "//tensorflow/python:constant_op", - "//tensorflow/python:dtypes", - "//tensorflow/python:training", - "//tensorflow/python:variables", - "//third_party/py/numpy", - "@six_archive//:six", - ], -) - py_test( name = "lazy_adam_optimizer_test", srcs = ["python/training/lazy_adam_optimizer_test.py"], diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py index 4c60c99342..caf22536bb 100644 --- a/tensorflow/contrib/opt/__init__.py +++ b/tensorflow/contrib/opt/__init__.py @@ -24,7 +24,7 @@ from tensorflow.contrib.opt.python.training.drop_stale_gradient_optimizer import from tensorflow.contrib.opt.python.training.external_optimizer import * from tensorflow.contrib.opt.python.training.lazy_adam_optimizer import * from tensorflow.contrib.opt.python.training.moving_average_optimizer import * -from tensorflow.contrib.opt.python.training.multitask_optimizer_wrapper import * +from tensorflow.contrib.opt.python.training.nadam_optimizer import * from tensorflow.contrib.opt.python.training.nadam_optimizer import * from tensorflow.contrib.opt.python.training.powersign import * from tensorflow.contrib.opt.python.training.variable_clipping_optimizer import * @@ -38,8 +38,7 @@ _allowed_symbols = [ 'DelayCompensatedGradientDescentOptimizer', 'DropStaleGradientOptimizer', 'ExternalOptimizerInterface', 'LazyAdamOptimizer', 'NadamOptimizer', 'MovingAverageOptimizer', - 'ScipyOptimizerInterface', 'VariableClippingOptimizer', - 'MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm', + 'ScipyOptimizerInterface', 'VariableClippingOptimizer' ] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py deleted file mode 100644 index c26037935d..0000000000 --- a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py +++ /dev/null @@ -1,138 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""An optimizer wrapper that ensures correct behaviour -of stateful optimizers with multitask loss.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import types -import six - -from tensorflow.python.framework import dtypes -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import clip_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.training import optimizer - -__all__ = ["MultitaskOptimizerWrapper", - "clip_gradients_by_global_norm"] - -def _is_all_zeros(grad): - all_zeros = math_ops.equal(math_ops.count_nonzero(grad), 0) - return all_zeros - -def _get_wrapper(fn, opt): - def wrapper(self, grad, *args, **kwargs): # pylint: disable=unused-argument - all_zeros = _is_all_zeros(grad) - return control_flow_ops.cond( - all_zeros, - control_flow_ops.no_op, - lambda: fn(grad, *args, **kwargs)) - wrapper = types.MethodType(wrapper, opt) - return wrapper - -class MultitaskOptimizerWrapper(object): - """Optimizer wrapper that ensures that - all-zero gradients don't affect the optimizer state. - - This might be useful when a multi-task loss is used, - and some components of the loss might be - not present (e.g. masked out) in some training batches. - Technically their gradient would be zero, - which would normally affect the optimizer state - (e.g. push running average to zero). - However this is not the desired behaviour, - since the missing loss component - should be treated as unknown rather than zero. - - This wrapper filters out all-zero gradient tensors, - therefore preserving the optimizer state. - - If gradient clipping by global norm is used, - the provided function clip_gradients_by_global_norm - should be used (and specified explicitly by the user). - Otherwise the global norm would be underestimated - because of all-zero tensors that should be ignored. - - The gradient calculation and application - are delegated to an underlying optimizer. - The gradient application is altered only for all-zero tensors. - - Example: - ```python - momentum_optimizer = tf.train.MomentumOptimizer( - learning_rate, momentum=0.9) - multitask_momentum_optimizer = tf.contrib.opt.MultitaskOptimizerWrapper( - momentum_optimizer) - gradvars = multitask_momentum_optimizer.compute_gradients( - loss) - gradvars_clipped, _ = tf.contrib.opt.clip_gradients_by_global_norm( - gradvars, 15.0) - train_op = multitask_momentum_optimizer.apply_gradients( - gradvars_clipped, global_step=batch) - ``` - """ - def __init__(self, opt): - """ - Args: - opt: an instance of a class that implements tf.train.Optimizer. - """ - if not isinstance(opt, optimizer.Optimizer): - raise TypeError( - "Supplied optimizer must be an instance of tf.train.Optimizer") - self._opt = opt - overriden_methods = ('_apply_dense', - '_resource_apply_dense', - '_apply_sparse', - '_resource_apply_sparse') - for name in overriden_methods: - fn = getattr(self._opt, name) - wrapper = _get_wrapper(fn, self._opt) - setattr(self._opt, name, wrapper) - - def __getattr__(self, name): - return getattr(self._opt, name) - - -def clip_gradients_by_global_norm(gradients_variables, clip_norm=20.): - """Clips gradients of a multitask loss by their global norm. - Ignores all-zero tensors when computing the global norm. - - Args: - gradients_variables: a list of pairs (gradient, variable). - clip_norm: a float Tensor, the global norm to clip on. Default is 20.0. - - Returns: - list: A list of pairs of the same type as gradients_variables,. - fixed_global_norm: A 0-D (scalar) Tensor representing the global norm. - """ - gradients, variables = six.moves.zip(*gradients_variables) - def _replace_nonexisting_grad(grad): - if grad is None: - return grad - all_zeros = _is_all_zeros(grad) - return control_flow_ops.cond(all_zeros, - lambda: array_ops.zeros( - [], dtype=dtypes.as_dtype(grad.dtype)), - lambda: grad) - nonzero_gradients = [_replace_nonexisting_grad(g) for g in gradients] - fixed_global_norm = clip_ops.global_norm(nonzero_gradients) - gradients, _ = clip_ops.clip_by_global_norm(gradients, clip_norm, - use_norm=fixed_global_norm) - return list(six.moves.zip(gradients, variables)), fixed_global_norm diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py deleted file mode 100644 index b06213f715..0000000000 --- a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for MultitaskOptimizerWrapper.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.contrib.opt.python.training import multitask_optimizer_wrapper -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.ops import variables -from tensorflow.python.platform import test -from tensorflow.python.training import momentum - -import numpy as np -import six - -class MultitaskOptimizerWrapperTest(test.TestCase): - """ - Tests for the multitask optimizer wrapper. - """ - def testWrapper(self): - with self.test_session(): - var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32) - var1 = variables.Variable([3.0, 4.0], dtype=dtypes.float32) - grads0 = constant_op.constant([0.1, 0.1], dtype=dtypes.float32) - grads1 = constant_op.constant([0.01, 0.01], dtype=dtypes.float32) - grads_allzero = constant_op.constant([0.0, 0.0], dtype=dtypes.float32) - mom_opt_impl = momentum.MomentumOptimizer( - learning_rate=2.0, momentum=0.9) - mom_opt = multitask_optimizer_wrapper.MultitaskOptimizerWrapper( - mom_opt_impl) - mom_update = mom_opt.apply_gradients( - zip([grads0, grads1], [var0, var1])) - mom_update_partial = mom_opt.apply_gradients( - zip([grads_allzero, grads1], [var0, var1])) - mom_update_no_action = mom_opt.apply_gradients( - zip([grads_allzero, grads_allzero], [var0, var1])) - self.evaluate(variables.global_variables_initializer()) - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - - self.assertEqual(["momentum"], mom_opt.get_slot_names()) - slot0 = mom_opt.get_slot(var0, "momentum") - self.assertEquals(slot0.get_shape(), var0.get_shape()) - slot1 = mom_opt.get_slot(var1, "momentum") - self.assertEquals(slot1.get_shape(), var1.get_shape()) - - # Step 1: normal momentum update. - self.evaluate(mom_update) - # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), - self.evaluate(slot0)) - self.assertAllCloseAccordingToType(np.array([0.01, 0.01]), - self.evaluate(slot1)) - # Check that the parameters have been updated. - self.assertAllCloseAccordingToType( - np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), - self.evaluate(var0)) - self.assertAllCloseAccordingToType( - np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), - self.evaluate(var1)) - - # Step 2: momentum update that changes only slot1 but not slot0. - self.evaluate(mom_update_partial) - # Check that only the relevant momentum accumulator has been updated. - self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), - self.evaluate(slot0)) - self.assertAllCloseAccordingToType( - np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), - self.evaluate(slot1)) - - # Step 3: momentum update that does not change anything. - self.evaluate(mom_update_no_action) - # Check that the momentum accumulators have *NOT* been updated. - self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), - self.evaluate(slot0)) - self.assertAllCloseAccordingToType( - np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), - self.evaluate(slot1)) - - def testGradientClipping(self): - with self.test_session(): - var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32) - var1 = variables.Variable([3.0, 4.0], dtype=dtypes.float32) - var2 = variables.Variable([3.0, 4.0], dtype=dtypes.float32) - var3 = variables.Variable([3.0, 4.0], dtype=dtypes.float32) - grads0 = constant_op.constant([10.0, 15.0], dtype=dtypes.float32) - grads1 = constant_op.constant([0.0, 5.0], dtype=dtypes.float32) - grads2 = constant_op.constant([0.0, 0.0], dtype=dtypes.float32) - grads3 = None - varlist = [var0, var1, var2, var3] - gradients = [grads0, grads1, grads2, grads3] - clipped_gradvars, global_norm = multitask_optimizer_wrapper.clip_gradients_by_global_norm( - six.moves.zip(gradients, varlist), clip_norm=1.0) - clipped_grads = list(six.moves.zip(*clipped_gradvars))[0] - reference_global_norm = np.sqrt(np.sum(np.square([10.0, 15.0, 0.0, 5.0]))) - self.assertAllCloseAccordingToType( - self.evaluate(global_norm), reference_global_norm) - self.assertAllCloseAccordingToType( - self.evaluate(clipped_grads[2]), np.array([0., 0.])) - self.assertEqual(clipped_grads[3], None) - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py index 16b6d145e3..909c6aba2b 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py @@ -38,9 +38,6 @@ from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib from tensorflow.python.platform import test -from tensorflow.python.framework import test_util -from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell - # pylint: enable=protected-access @@ -361,45 +358,6 @@ class RNNCellTest(test.TestCase): self.assertEquals(variables[2].op.name, "root/lstm_cell/projection/kernel") - def testLSTMCellLayerNorm(self): - with self.test_session() as sess: - num_units = 2 - num_proj = 3 - batch_size = 1 - input_size = 4 - with variable_scope.variable_scope( - "root", initializer=init_ops.constant_initializer(0.5)): - x = array_ops.zeros([batch_size, input_size]) - c = array_ops.zeros([batch_size, num_units]) - h = array_ops.zeros([batch_size, num_proj]) - state = rnn_cell_impl.LSTMStateTuple(c, h) - cell = contrib_rnn_cell.LayerNormLSTMCell( - num_units=num_units, - num_proj=num_proj, - forget_bias=1.0, - layer_norm=True, - norm_gain=1.0, - norm_shift=0.0) - g, out_m = cell(x, state) - sess.run([variables_lib.global_variables_initializer()]) - res = sess.run([g, out_m], { - x.name: np.ones((batch_size, input_size)), - c.name: 0.1 * np.ones((batch_size, num_units)), - h.name: 0.1 * np.ones((batch_size, num_proj)) - }) - self.assertEqual(len(res), 2) - # The numbers in results were not calculated, this is mostly just a - # smoke test. - self.assertEqual(res[0].shape, (batch_size, num_proj)) - self.assertEqual(res[1][0].shape, (batch_size, num_units)) - self.assertEqual(res[1][1].shape, (batch_size, num_proj)) - # Different inputs so different outputs and states - for i in range(1, batch_size): - self.assertTrue( - float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6) - self.assertTrue( - float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6) - def testOutputProjectionWrapper(self): with self.test_session() as sess: with variable_scope.variable_scope( diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py index b4a5f2d7eb..ebd4564f12 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py @@ -37,7 +37,6 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import rnn from tensorflow.python.ops import rnn_cell -from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -1276,49 +1275,6 @@ class LayerNormBasicLSTMCellTest(test.TestCase): self.assertAllClose(res[2].c, expected_c1, 1e-5) self.assertAllClose(res[2].h, expected_h1, 1e-5) - - def testBasicLSTMCellWithStateTupleLayerNorm(self): - """The results of LSTMCell and LayerNormBasicLSTMCell - should be same. """ - with self.test_session() as sess: - with variable_scope.variable_scope( - "root", initializer=init_ops.constant_initializer(0.5)): - x = array_ops.zeros([1, 2]) - c0 = array_ops.zeros([1, 2]) - h0 = array_ops.zeros([1, 2]) - state0 = rnn_cell_impl.LSTMStateTuple(c0, h0) - c1 = array_ops.zeros([1, 2]) - h1 = array_ops.zeros([1, 2]) - state1 = rnn_cell_impl.LSTMStateTuple(c1, h1) - cell = rnn_cell_impl.MultiRNNCell( - [contrib_rnn_cell.LayerNormLSTMCell( - 2, - layer_norm=True, - norm_gain=1.0, - norm_shift=0.0) for _ in range(2)]) - h, (s0, s1) = cell(x, (state0, state1)) - sess.run([variables.global_variables_initializer()]) - res = sess.run([h, s0, s1], { - x.name: np.array([[1., 1.]]), - c0.name: 0.1 * np.asarray([[0, 1]]), - h0.name: 0.1 * np.asarray([[2, 3]]), - c1.name: 0.1 * np.asarray([[4, 5]]), - h1.name: 0.1 * np.asarray([[6, 7]]), - }) - - expected_h = np.array([[-0.38079708, 0.38079708]]) - expected_h0 = np.array([[-0.38079708, 0.38079708]]) - expected_c0 = np.array([[-1.0, 1.0]]) - expected_h1 = np.array([[-0.38079708, 0.38079708]]) - expected_c1 = np.array([[-1.0, 1.0]]) - - self.assertEqual(len(res), 3) - self.assertAllClose(res[0], expected_h, 1e-5) - self.assertAllClose(res[1].c, expected_c0, 1e-5) - self.assertAllClose(res[1].h, expected_h0, 1e-5) - self.assertAllClose(res[2].c, expected_c1, 1e-5) - self.assertAllClose(res[2].h, expected_h1, 1e-5) - def testBasicLSTMCellWithDropout(self): def _is_close(x, y, digits=4): diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 5e85c125df..d4691f2c27 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -36,7 +36,6 @@ from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import variable_scope as vs -from tensorflow.python.ops import partitioned_variables from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import nest @@ -77,18 +76,6 @@ def _get_sharded_variable(name, shape, dtype, num_shards): return shards -def _norm(g, b, inp, scope): - shape = inp.get_shape()[-1:] - gamma_init = init_ops.constant_initializer(g) - beta_init = init_ops.constant_initializer(b) - with vs.variable_scope(scope): - # Initialize beta and gamma for use by layer_norm. - vs.get_variable("gamma", shape=shape, initializer=gamma_init) - vs.get_variable("beta", shape=shape, initializer=beta_init) - normalized = layers.layer_norm(inp, reuse=True, scope=scope) - return normalized - - class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): """Long short-term memory unit (LSTM) recurrent network cell. @@ -115,24 +102,13 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): The class uses optional peep-hole connections, and an optional projection layer. - - Layer normalization implementation is based on: - - https://arxiv.org/abs/1607.06450. - - "Layer Normalization" - Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton - - and is applied before the internal nonlinearities. - """ def __init__(self, num_units, use_peepholes=False, initializer=None, num_proj=None, proj_clip=None, num_unit_shards=1, num_proj_shards=1, forget_bias=1.0, state_is_tuple=True, - activation=math_ops.tanh, reuse=None, - layer_norm=False, norm_gain=1.0, norm_shift=0.0): + activation=math_ops.tanh, reuse=None): """Initialize the parameters for an LSTM cell. Args: @@ -159,13 +135,6 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. - layer_norm: If `True`, layer normalization will be applied. - norm_gain: float, The layer normalization gain initial value. If - `layer_norm` has been set to `False`, this argument will be ignored. - norm_shift: float, The layer normalization shift initial value. If - `layer_norm` has been set to `False`, this argument will be ignored. - - """ super(CoupledInputForgetGateLSTMCell, self).__init__(_reuse=reuse) if not state_is_tuple: @@ -183,9 +152,6 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): self._state_is_tuple = state_is_tuple self._activation = activation self._reuse = reuse - self._layer_norm = layer_norm - self._norm_gain = norm_gain - self._norm_shift = norm_shift if num_proj: self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_proj) @@ -254,20 +220,9 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): # j = new_input, f = forget_gate, o = output_gate cell_inputs = array_ops.concat([inputs, m_prev], 1) - lstm_matrix = math_ops.matmul(cell_inputs, concat_w) - - # If layer nomalization is applied, do not add bias - if not self._layer_norm: - lstm_matrix = nn_ops.bias_add(lstm_matrix, b) - + lstm_matrix = nn_ops.bias_add(math_ops.matmul(cell_inputs, concat_w), b) j, f, o = array_ops.split(value=lstm_matrix, num_or_size_splits=3, axis=1) - # Apply layer normalization - if self._layer_norm: - j = _norm(self._norm_gain, self._norm_shift, j, "transform") - f = _norm(self._norm_gain, self._norm_shift, f, "forget") - o = _norm(self._norm_gain, self._norm_shift, o, "output") - # Diagonal connections if self._use_peepholes: w_f_diag = vs.get_variable( @@ -281,10 +236,6 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): f_act = sigmoid(f + self._forget_bias) c = (f_act * c_prev + (1 - f_act) * self._activation(j)) - # Apply layer normalization - if self._layer_norm: - c = _norm(self._norm_gain, self._norm_shift, c, "state") - if self._use_peepholes: m = sigmoid(o + w_o_diag * c) * self._activation(c) else: @@ -1350,8 +1301,8 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell): self._keep_prob = dropout_keep_prob self._seed = dropout_prob_seed self._layer_norm = layer_norm - self._norm_gain = norm_gain - self._norm_shift = norm_shift + self._g = norm_gain + self._b = norm_shift self._reuse = reuse @property @@ -1362,25 +1313,24 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell): def output_size(self): return self._num_units - def _norm(self, inp, scope, dtype=dtypes.float32): + def _norm(self, inp, scope): shape = inp.get_shape()[-1:] - gamma_init = init_ops.constant_initializer(self._norm_gain) - beta_init = init_ops.constant_initializer(self._norm_shift) + gamma_init = init_ops.constant_initializer(self._g) + beta_init = init_ops.constant_initializer(self._b) with vs.variable_scope(scope): # Initialize beta and gamma for use by layer_norm. - vs.get_variable("gamma", shape=shape, initializer=gamma_init, dtype=dtype) - vs.get_variable("beta", shape=shape, initializer=beta_init, dtype=dtype) + vs.get_variable("gamma", shape=shape, initializer=gamma_init) + vs.get_variable("beta", shape=shape, initializer=beta_init) normalized = layers.layer_norm(inp, reuse=True, scope=scope) return normalized def _linear(self, args): out_size = 4 * self._num_units proj_size = args.get_shape()[-1] - dtype = args.dtype - weights = vs.get_variable("kernel", [proj_size, out_size], dtype=dtype) + weights = vs.get_variable("kernel", [proj_size, out_size]) out = math_ops.matmul(args, weights) if not self._layer_norm: - bias = vs.get_variable("bias", [out_size], dtype=dtype) + bias = vs.get_variable("bias", [out_size]) out = nn_ops.bias_add(out, bias) return out @@ -1389,14 +1339,13 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell): c, h = state args = array_ops.concat([inputs, h], 1) concat = self._linear(args) - dtype = args.dtype i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1) if self._layer_norm: - i = self._norm(i, "input", dtype=dtype) - j = self._norm(j, "transform", dtype=dtype) - f = self._norm(f, "forget", dtype=dtype) - o = self._norm(o, "output", dtype=dtype) + i = self._norm(i, "input") + j = self._norm(j, "transform") + f = self._norm(f, "forget") + o = self._norm(o, "output") g = self._activation(j) if (not isinstance(self._keep_prob, float)) or self._keep_prob < 1: @@ -1405,7 +1354,7 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell): new_c = (c * math_ops.sigmoid(f + self._forget_bias) + math_ops.sigmoid(i) * g) if self._layer_norm: - new_c = self._norm(new_c, "state", dtype=dtype) + new_c = self._norm(new_c, "state") new_h = self._activation(new_c) * math_ops.sigmoid(o) new_state = rnn_cell_impl.LSTMStateTuple(new_c, new_h) @@ -2357,264 +2306,3 @@ class GLSTMCell(rnn_cell_impl.RNNCell): new_state = rnn_cell_impl.LSTMStateTuple(c, m) return m, new_state - - -class LayerNormLSTMCell(rnn_cell_impl.RNNCell): - """Long short-term memory unit (LSTM) recurrent network cell. - - The default non-peephole implementation is based on: - - http://www.bioinf.jku.at/publications/older/2604.pdf - - S. Hochreiter and J. Schmidhuber. - "Long Short-Term Memory". Neural Computation, 9(8):1735-1780, 1997. - - The peephole implementation is based on: - - https://research.google.com/pubs/archive/43905.pdf - - Hasim Sak, Andrew Senior, and Francoise Beaufays. - "Long short-term memory recurrent neural network architectures for - large scale acoustic modeling." INTERSPEECH, 2014. - - The class uses optional peep-hole connections, optional cell clipping, and - an optional projection layer. - - Layer normalization implementation is based on: - - https://arxiv.org/abs/1607.06450. - - "Layer Normalization" - Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton - - and is applied before the internal nonlinearities. - - """ - - def __init__(self, num_units, - use_peepholes=False, cell_clip=None, - initializer=None, num_proj=None, proj_clip=None, - forget_bias=1.0, - activation=None, layer_norm=False, - norm_gain=1.0, norm_shift=0.0, reuse=None): - """Initialize the parameters for an LSTM cell. - - Args: - num_units: int, The number of units in the LSTM cell - use_peepholes: bool, set True to enable diagonal/peephole connections. - cell_clip: (optional) A float value, if provided the cell state is clipped - by this value prior to the cell output activation. - initializer: (optional) The initializer to use for the weight and - projection matrices. - num_proj: (optional) int, The output dimensionality for the projection - matrices. If None, no projection is performed. - proj_clip: (optional) A float value. If `num_proj > 0` and `proj_clip` is - provided, then the projected values are clipped elementwise to within - `[-proj_clip, proj_clip]`. - forget_bias: Biases of the forget gate are initialized by default to 1 - in order to reduce the scale of forgetting at the beginning of - the training. Must set it manually to `0.0` when restoring from - CudnnLSTM trained checkpoints. - activation: Activation function of the inner states. Default: `tanh`. - layer_norm: If `True`, layer normalization will be applied. - norm_gain: float, The layer normalization gain initial value. If - `layer_norm` has been set to `False`, this argument will be ignored. - norm_shift: float, The layer normalization shift initial value. If - `layer_norm` has been set to `False`, this argument will be ignored. - reuse: (optional) Python boolean describing whether to reuse variables - in an existing scope. If not `True`, and the existing scope already has - the given variables, an error is raised. - - When restoring from CudnnLSTM-trained checkpoints, must use - CudnnCompatibleLSTMCell instead. - """ - super(LayerNormLSTMCell, self).__init__(_reuse=reuse) - - self._num_units = num_units - self._use_peepholes = use_peepholes - self._cell_clip = cell_clip - self._initializer = initializer - self._num_proj = num_proj - self._proj_clip = proj_clip - self._forget_bias = forget_bias - self._activation = activation or math_ops.tanh - self._layer_norm = layer_norm - self._norm_gain = norm_gain - self._norm_shift = norm_shift - - if num_proj: - self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_proj)) - self._output_size = num_proj - else: - self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_units)) - self._output_size = num_units - - @property - def state_size(self): - return self._state_size - - @property - def output_size(self): - return self._output_size - - - def _linear(self, - args, - output_size, - bias, - bias_initializer=None, - kernel_initializer=None, - layer_norm=False): - """Linear map: sum_i(args[i] * W[i]), where W[i] is a Variable. - - Args: - args: a 2D Tensor or a list of 2D, batch x n, Tensors. - output_size: int, second dimension of W[i]. - bias: boolean, whether to add a bias term or not. - bias_initializer: starting value to initialize the bias - (default is all zeros). - kernel_initializer: starting value to initialize the weight. - layer_norm: boolean, whether to apply layer normalization. - - - Returns: - A 2D Tensor with shape [batch x output_size] taking value - sum_i(args[i] * W[i]), where each W[i] is a newly created Variable. - - Raises: - ValueError: if some of the arguments has unspecified or wrong shape. - """ - if args is None or (nest.is_sequence(args) and not args): - raise ValueError("`args` must be specified") - if not nest.is_sequence(args): - args = [args] - - # Calculate the total size of arguments on dimension 1. - total_arg_size = 0 - shapes = [a.get_shape() for a in args] - for shape in shapes: - if shape.ndims != 2: - raise ValueError("linear is expecting 2D arguments: %s" % shapes) - if shape[1].value is None: - raise ValueError("linear expects shape[1] to be provided for shape %s, " - "but saw %s" % (shape, shape[1])) - else: - total_arg_size += shape[1].value - - dtype = [a.dtype for a in args][0] - - # Now the computation. - scope = vs.get_variable_scope() - with vs.variable_scope(scope) as outer_scope: - weights = vs.get_variable( - "kernel", [total_arg_size, output_size], - dtype=dtype, - initializer=kernel_initializer) - if len(args) == 1: - res = math_ops.matmul(args[0], weights) - else: - res = math_ops.matmul(array_ops.concat(args, 1), weights) - if not bias: - return res - with vs.variable_scope(outer_scope) as inner_scope: - inner_scope.set_partitioner(None) - if bias_initializer is None: - bias_initializer = init_ops.constant_initializer(0.0, dtype=dtype) - biases = vs.get_variable( - "bias", [output_size], - dtype=dtype, - initializer=bias_initializer) - - if not layer_norm: - res = nn_ops.bias_add(res, biases) - - return res - - def call(self, inputs, state): - """Run one step of LSTM. - - Args: - inputs: input Tensor, 2D, batch x num_units. - state: this must be a tuple of state Tensors, - both `2-D`, with column sizes `c_state` and - `m_state`. - - Returns: - A tuple containing: - - - A `2-D, [batch x output_dim]`, Tensor representing the output of the - LSTM after reading `inputs` when previous state was `state`. - Here output_dim is: - num_proj if num_proj was set, - num_units otherwise. - - Tensor(s) representing the new state of LSTM after reading `inputs` when - the previous state was `state`. Same type and shape(s) as `state`. - - Raises: - ValueError: If input size cannot be inferred from inputs via - static shape inference. - """ - num_proj = self._num_units if self._num_proj is None else self._num_proj - sigmoid = math_ops.sigmoid - - (c_prev, m_prev) = state - - dtype = inputs.dtype - input_size = inputs.get_shape().with_rank(2)[1] - if input_size.value is None: - raise ValueError("Could not infer input size from inputs.get_shape()[-1]") - scope = vs.get_variable_scope() - with vs.variable_scope(scope, initializer=self._initializer) as unit_scope: - - # i = input_gate, j = new_input, f = forget_gate, o = output_gate - lstm_matrix = self._linear([inputs, m_prev], 4 * self._num_units, bias=True, - bias_initializer=None, layer_norm=self._layer_norm) - i, j, f, o = array_ops.split( - value=lstm_matrix, num_or_size_splits=4, axis=1) - - if self._layer_norm: - i = _norm(self._norm_gain, self._norm_shift, i, "input") - j = _norm(self._norm_gain, self._norm_shift, j, "transform") - f = _norm(self._norm_gain, self._norm_shift, f, "forget") - o = _norm(self._norm_gain, self._norm_shift, o, "output") - - # Diagonal connections - if self._use_peepholes: - with vs.variable_scope(unit_scope) as projection_scope: - w_f_diag = vs.get_variable( - "w_f_diag", shape=[self._num_units], dtype=dtype) - w_i_diag = vs.get_variable( - "w_i_diag", shape=[self._num_units], dtype=dtype) - w_o_diag = vs.get_variable( - "w_o_diag", shape=[self._num_units], dtype=dtype) - - if self._use_peepholes: - c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev + - sigmoid(i + w_i_diag * c_prev) * self._activation(j)) - else: - c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * - self._activation(j)) - - if self._layer_norm: - c = _norm(self._norm_gain, self._norm_shift, c, "state") - - if self._cell_clip is not None: - # pylint: disable=invalid-unary-operand-type - c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip) - # pylint: enable=invalid-unary-operand-type - if self._use_peepholes: - m = sigmoid(o + w_o_diag * c) * self._activation(c) - else: - m = sigmoid(o) * self._activation(c) - - if self._num_proj is not None: - with vs.variable_scope("projection") as proj_scope: - m = self._linear(m, self._num_proj, bias=False) - - if self._proj_clip is not None: - # pylint: disable=invalid-unary-operand-type - m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip) - # pylint: enable=invalid-unary-operand-type - - new_state = (rnn_cell_impl.LSTMStateTuple(c, m)) - return m, new_state diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index c3b180d9f4..87230e3355 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -149,7 +149,7 @@ class _BaseAttentionMechanism(AttentionMechanism): memory_sequence_length=None, memory_layer=None, check_inner_dims_defined=True, - score_mask_value=None, + score_mask_value=float("-inf"), name=None): """Construct base AttentionMechanism class. @@ -187,12 +187,9 @@ class _BaseAttentionMechanism(AttentionMechanism): "memory_layer is not a Layer: %s" % type(memory_layer).__name__) self._query_layer = query_layer self._memory_layer = memory_layer - self.dtype = memory_layer.dtype if not callable(probability_fn): raise TypeError("probability_fn must be callable, saw type: %s" % type(probability_fn).__name__) - if score_mask_value is None: - score_mask_value = dtypes.as_dtype(self._memory_layer.dtype).as_numpy_dtype(-np.inf) self._probability_fn = lambda score, prev: ( # pylint:disable=g-long-lambda probability_fn( _maybe_mask_score(score, memory_sequence_length, score_mask_value), @@ -337,8 +334,7 @@ class LuongAttention(_BaseAttentionMechanism): memory_sequence_length=None, scale=False, probability_fn=None, - score_mask_value=None, - dtype=None, + score_mask_value=float("-inf"), name="LuongAttention"): """Construct the AttentionMechanism mechanism. @@ -357,20 +353,17 @@ class LuongAttention(_BaseAttentionMechanism): score_mask_value: (optional) The mask value for score before passing into `probability_fn`. The default is -inf. Only used if `memory_sequence_length` is not None. - dtype: The data type for the memory layer of the attention mechanism. name: Name to use when creating ops. """ # For LuongAttention, we only transform the memory layer; thus # num_units **must** match expected the query depth. if probability_fn is None: probability_fn = nn_ops.softmax - if dtype is None: - dtype = dtypes.float32 wrapped_probability_fn = lambda score, _: probability_fn(score) super(LuongAttention, self).__init__( query_layer=None, memory_layer=layers_core.Dense( - num_units, name="memory_layer", use_bias=False, dtype=dtype), + num_units, name="memory_layer", use_bias=False), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, @@ -482,8 +475,7 @@ class BahdanauAttention(_BaseAttentionMechanism): memory_sequence_length=None, normalize=False, probability_fn=None, - score_mask_value=None, - dtype=None, + score_mask_value=float("-inf"), name="BahdanauAttention"): """Construct the Attention mechanism. @@ -502,20 +494,16 @@ class BahdanauAttention(_BaseAttentionMechanism): score_mask_value: (optional): The mask value for score before passing into `probability_fn`. The default is -inf. Only used if `memory_sequence_length` is not None. - dtype: The data type for the query and memory layers of the attention - mechanism. name: Name to use when creating ops. """ if probability_fn is None: probability_fn = nn_ops.softmax - if dtype is None: - dtype = dtypes.float32 wrapped_probability_fn = lambda score, _: probability_fn(score) super(BahdanauAttention, self).__init__( query_layer=layers_core.Dense( - num_units, name="query_layer", use_bias=False, dtype=dtype), + num_units, name="query_layer", use_bias=False), memory_layer=layers_core.Dense( - num_units, name="memory_layer", use_bias=False, dtype=dtype), + num_units, name="memory_layer", use_bias=False), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, @@ -750,12 +738,11 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism): memory, memory_sequence_length=None, normalize=False, - score_mask_value=None, + score_mask_value=float("-inf"), sigmoid_noise=0., sigmoid_noise_seed=None, score_bias_init=0., mode="parallel", - dtype=None, name="BahdanauMonotonicAttention"): """Construct the Attention mechanism. @@ -779,21 +766,17 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism): mode: How to compute the attention distribution. Must be one of 'recursive', 'parallel', or 'hard'. See the docstring for `tf.contrib.seq2seq.monotonic_attention` for more information. - dtype: The data type for the query and memory layers of the attention - mechanism. name: Name to use when creating ops. """ # Set up the monotonic probability fn with supplied parameters - if dtype is None: - dtype = dtypes.float32 wrapped_probability_fn = functools.partial( _monotonic_probability_fn, sigmoid_noise=sigmoid_noise, mode=mode, seed=sigmoid_noise_seed) super(BahdanauMonotonicAttention, self).__init__( query_layer=layers_core.Dense( - num_units, name="query_layer", use_bias=False, dtype=dtype), + num_units, name="query_layer", use_bias=False), memory_layer=layers_core.Dense( - num_units, name="memory_layer", use_bias=False, dtype=dtype), + num_units, name="memory_layer", use_bias=False), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, @@ -851,12 +834,11 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism): memory, memory_sequence_length=None, scale=False, - score_mask_value=None, + score_mask_value=float("-inf"), sigmoid_noise=0., sigmoid_noise_seed=None, score_bias_init=0., mode="parallel", - dtype=None, name="LuongMonotonicAttention"): """Construct the Attention mechanism. @@ -880,21 +862,17 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism): mode: How to compute the attention distribution. Must be one of 'recursive', 'parallel', or 'hard'. See the docstring for `tf.contrib.seq2seq.monotonic_attention` for more information. - dtype: The data type for the query and memory layers of the attention - mechanism. name: Name to use when creating ops. """ # Set up the monotonic probability fn with supplied parameters - if dtype is None: - dtype = dtypes.float32 wrapped_probability_fn = functools.partial( _monotonic_probability_fn, sigmoid_noise=sigmoid_noise, mode=mode, seed=sigmoid_noise_seed) super(LuongMonotonicAttention, self).__init__( query_layer=layers_core.Dense( - num_units, name="query_layer", use_bias=False, dtype=dtype), + num_units, name="query_layer", use_bias=False), memory_layer=layers_core.Dense( - num_units, name="memory_layer", use_bias=False, dtype=dtype), + num_units, name="memory_layer", use_bias=False), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, @@ -1145,9 +1123,8 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): % (len(attention_layer_sizes), len(attention_mechanisms))) self._attention_layers = tuple( layers_core.Dense( - attention_layer_size, name="attention_layer", use_bias=False, - dtype=attention_mechanisms[i].dtype) - for i, attention_layer_size in enumerate(attention_layer_sizes)) + attention_layer_size, name="attention_layer", use_bias=False) + for attention_layer_size in attention_layer_sizes) self._attention_layer_size = sum(attention_layer_sizes) else: self._attention_layers = None diff --git a/tensorflow/contrib/slim/README.md b/tensorflow/contrib/slim/README.md index f7a85557ca..0bfd0801d5 100644 --- a/tensorflow/contrib/slim/README.md +++ b/tensorflow/contrib/slim/README.md @@ -237,7 +237,7 @@ One way to reduce this code duplication would be via a `for` loop: ```python net = ... for i in range(3): - net = slim.conv2d(net, 256, [3, 3], scope='conv3_%d' % (i+1)) + net = slim.conv2d(net, 256, [3, 3], scope='conv3_' % (i+1)) net = slim.max_pool2d(net, [2, 2], scope='pool2') ``` diff --git a/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py b/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py index 576444214d..b4fd2580c2 100644 --- a/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py +++ b/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py @@ -386,7 +386,7 @@ class ResnetCompleteNetworkTest(test.TestCase): inputs, None, is_training=False, global_pool=False) sess.run(variables.global_variables_initializer()) self.assertAllClose( - output.eval(), expected.eval(), atol=2e-4, rtol=1e-4) + output.eval(), expected.eval(), atol=1e-4, rtol=1e-4) def testUnknownBatchSize(self): batch = 2 diff --git a/tensorflow/contrib/verbs/README.md b/tensorflow/contrib/verbs/README.md index dcb390b0a5..da5f2b0223 100644 --- a/tensorflow/contrib/verbs/README.md +++ b/tensorflow/contrib/verbs/README.md @@ -1,4 +1,4 @@ -## How to compile, use and configure RDMA-enabled TensorFlow +## How to compile and use RDMA-enabled TensorFlow 1. Follow the regular TF compilation instructions. During configure step, if you want ibverbs based RDMA support, answer yes to this question: ```Do you wish to build TensorFlow with VERBS-RDMA support [y/N]``` @@ -7,18 +7,6 @@ ```server = tf.train.Server(cluster, job_name="local", task_index=0, protocol='grpc+verbs') # default protocol is 'grpc'``` -3. RDMA configuration is done by setting the following environment variables: - * **RDMA_DEVICE**: The RDMA device name to be used. If not defined by user, a default device with an active port will be set if exists. - * **RDMA_DEVICE_PORT**: The port within the selected device. Not relevant if RDMA_DEVICE is not defined. If not defined by user, a default active port will be set if exists. - * **RDMA_GID_INDEX**: The GID index of the port. If not defined by user, a default suitable GID index will be set (RoCEV2 is favourable as default). - * **RDMA_QP_PKEY_INDEX**: The Pkey for the QP. If not defined by user, the default value is 0. - * **RDMA_QP_QUEUE_DEPTH**: TX/RX queue size for the QP. If not defined by user, the default value is 1024. - * **RDMA_QP_TIMEOUT**: The retransmission timeout for QPs. If not defined by user, the default value is 14. - * **RDMA_QP_RETRY_COUNT**: Number of retransmission for QPs. If not defined by user, the default value is 7. - * **RDMA_QP_SL**: Service level configuration for QOS and ECN, valid values are 0-7. If not defined by user, the default value is 0. - * **RDMA_QP_MTU**: MTU configuration for the QPs. If not defined by user, the default value is active MTU from query_port. - * **RDMA_TRAFFIC_CLASS**: Traffic class configuration for QP, in case of DSCP trust level QoS configuration. If not defined by user, the default value is 0. For more info see [HowTo Configure Trust state on Mellanox Adapters](https://community.mellanox.com/docs/DOC-2866). - ## Overview The design is based on TensorFlow r1.0. An RDMA path is added between servers for tensor transfer (weights, gradients, etc). The existing GRPC path remains and is responsible for "administrative" tasks, such as setting up the RDMA path, exchanging computation graphs, etc. diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc index 331943a3ef..26e18b28aa 100644 --- a/tensorflow/contrib/verbs/rdma.cc +++ b/tensorflow/contrib/verbs/rdma.cc @@ -17,7 +17,6 @@ limitations under the License. #include "tensorflow/contrib/verbs/rdma.h" #include -#include #include "tensorflow/contrib/verbs/verbs_util.h" #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/dma_helper.h" @@ -34,8 +33,6 @@ limitations under the License. namespace tensorflow { -#define RoCE_V2 "RoCE v2" - namespace { // hash name to 32-bit integer uint32_t NameHash(const string& name) { @@ -69,337 +66,16 @@ string MessageTypeToString(RdmaMessageType rmt) { } } // namespace -// Function to get environment variable -// Args: -// var_name - the name of the environmental variable -// Returns: -// string with it's value or empty string if not set -string get_env_var(char const* var_name) { - char const* var_temp = getenv(var_name); - - return (var_temp == NULL) ? string() : string(var_temp); -} - -// Function to open device -// Args: -// ibv_dev device to open -// Returns: -// context of the opened device -ibv_context* open_device(ibv_device* ibv_dev) { - ibv_context* context = ibv_open_device(ibv_dev); - - CHECK(context) << "Open context failed for " << ibv_get_device_name(ibv_dev); - return context; -} - -// Function to count the number of active ports for device -// Args: -// device - to check active ports -// Returns: -// number of active ports of the given device -int get_dev_active_port_count(ibv_device* device) { - ibv_device_attr device_att; - ibv_port_attr port_attr; - ibv_context* context = NULL; - int rc, port_index, active_ports = 0; - - context = ibv_open_device(device); - CHECK(context) << "Open context failed for " << ibv_get_device_name(device); - rc = ibv_query_device(context, &device_att); - CHECK(!rc) << "Failed to query the device"; - - for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) { - rc = ibv_query_port(context, port_index, &port_attr); - CHECK(!rc) << "Failed to query the port" << port_index; - if (port_attr.state == IBV_PORT_ACTIVE) { - active_ports++; - } - } - ibv_close_device(context); - return active_ports; -} - -// Function to set device. If RDMA_DEVICE not set, search for device with active -// port. -// Fails if more than one device with active port was found. -// Returns: -// device to use -ibv_device* set_device() { +ibv_context* open_default_device() { ibv_device** dev_list; - int dev_num, device_index, device_to_open = 0; - int num_devs_with_active_port = 0; - string env_p_rdma_device, str_port_num; - - dev_list = ibv_get_device_list(&dev_num); + ibv_device* ib_dev; + dev_list = ibv_get_device_list(NULL); CHECK(dev_list) << "No InfiniBand device found"; - - env_p_rdma_device = get_env_var("RDMA_DEVICE"); - if (!env_p_rdma_device.empty()) { - for (device_index = 0; device_index < dev_num; device_index++) { - if (!env_p_rdma_device.compare( - ibv_get_device_name(dev_list[device_index]))) { - CHECK(get_dev_active_port_count(dev_list[device_index]) != 0) - << "Device " << ibv_get_device_name(dev_list[device_index]) - << " has no active ports"; - return dev_list[device_index]; - } - } - // check validity of input device - CHECK(false) << "The device " << env_p_rdma_device << " wasn't found"; - } else { - // set default device - str_port_num = get_env_var("RDMA_DEVICE_PORT"); - CHECK(str_port_num.empty()) - << "RDMA_DEVICE should be provided if RDMA_DEVICE_PORT is set by user"; - for (device_index = 0; device_index < dev_num; device_index++) { - // get port_num - if (get_dev_active_port_count(dev_list[device_index]) > 0) { - num_devs_with_active_port++; - CHECK(num_devs_with_active_port <= 1) << ". More than one device with " - "active port in the system. " - "Please enter RDMA_DEVICE"; - // found device with at least 1 active port - device_to_open = device_index; - } - } - CHECK(num_devs_with_active_port > 0) - << "There is no active port in the system"; - return dev_list[device_to_open]; - } - CHECK(false) << "No device was set!"; - return NULL; // never happens -} - -// Function to set port for device. -// If RDMA_DEVICE_PORT not set, first active port of the device will be set. -// Args: -// context of the device -// Returns: -// port to use -uint8_t set_port(ibv_context* context) { - uint8_t port_num = 0; //0 is illegal port number - string str_port_num; - ibv_device_attr device_att; - ibv_port_attr port_attr; - int rc, port_index; - - rc = ibv_query_device(context, &device_att); - CHECK(!rc) << "Failed to query the device\n"; - - str_port_num = get_env_var("RDMA_DEVICE_PORT"); - // user defined port - if (!str_port_num.empty()) { - port_num = stoi(str_port_num); - CHECK(port_num > 0) << "RDMA_DEVICE_PORT should be positive"; - CHECK(port_num <= device_att.phys_port_cnt) << "RDMA_DEVICE_PORT should be " - "less or equal to amount of " - "available ports"; - rc = ibv_query_port(context, port_num, &port_attr); - CHECK(!rc) << "Failed to query the port" << port_num; - // check if port id active - CHECK(port_attr.state == IBV_PORT_ACTIVE) - << "Selected RDMA_DEVICE_PORT is not active"; - } - // set default port - else { - for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) { - rc = ibv_query_port(context, port_index, &port_attr); - CHECK(!rc) << "Failed to query the port" << port_index; - if (port_attr.state == IBV_PORT_ACTIVE) { - port_num = port_index; - break; - } - } - CHECK_GT(port_num, 0) << "No active ports"; - } - return port_num; -} - -// Function read from sysfs file -// Args: -// dir - directory -// file - file -// buff - buffer for the result -// size - buffer size -// Returns: -// number of bytes were read or -1 if failed -int read_sysfs_file(const char* dir, const char* file, char* buf, size_t size) { - char* path; - int fd; - int len; - - if (asprintf(&path, "%s/%s", dir, file) < 0) return -1; - - fd = open(path, O_RDONLY); - if (fd < 0) { - free(path); - return -1; - } - - len = read(fd, buf, size); - - close(fd); - free(path); - - if (len > 0 && buf[len - 1] == '\n') buf[--len] = '\0'; - - return len; -} - -// Function to check if GID index support RoCE V2 -// Args: -// context - device context -// port_num - port number -// index - GID index -// Returns: -// if GID supports RoCE V2 - true, otherwise - false. -bool is_gid_type_roce_v2(ibv_context* context, uint8_t port_num, - uint8_t index) { - char name[32]; - char buff[41]; - - snprintf(name, sizeof(name), "ports/%d/gid_attrs/types/%d", port_num, index); - if (read_sysfs_file(context->device->ibdev_path, name, buff, sizeof(buff)) <= - 0) { - return false; - } - return !strcmp(buff, RoCE_V2); -} - -// Function to set GID index. -// If the port link is IB, no GID index should be selected. -// If Ethernet but RDMA_GID_INDEX not set gid index that supports -// RoCE V2 will be chosen(fails if more then one IP is configured) -// Args: -// context - device context -// port_num - port number -// Returns: -// GID index to use -uint8_t set_gid(uint8_t port_num, ibv_context* context) { - ibv_port_attr port_attr; - string gid_str; - int rc, i, gids_num = 0, v2_ip_num = 0; - union ibv_gid gid; - uint8_t gid_index = 0; - - rc = ibv_query_port(context, port_num, &port_attr); - CHECK(!rc) << "Failed to query the port" << port_num; - - for (i = 0; i < port_attr.gid_tbl_len; i++) { - rc = ibv_query_gid(context, port_num, i, &gid); - CHECK(!rc) << "Failed to query gid to port " << (int)port_num << " index " - << i; - if (gid.global.interface_id) { - gids_num++; - if (gid.global.subnet_prefix == 0 && - is_gid_type_roce_v2(context, port_num, i)) { - if (v2_ip_num == 0) { - // can be overwritten by RDMA_GID_INDEX later - gid_index = i; - } - v2_ip_num++; - } - } - } - switch (port_attr.link_layer) { - case(IBV_LINK_LAYER_ETHERNET) : - gid_str = get_env_var("RDMA_GID_INDEX"); - if (!gid_str.empty()) { - gid_index = stoi(gid_str); - CHECK(gid_index < gids_num) - << "RDMA_GID_INDEX should be less than GIDs amount" << gids_num; - } else { - CHECK(v2_ip_num <= 1) - << "More than one IP is available, please specify GID_INDEX"; - } - break; - case(IBV_LINK_LAYER_INFINIBAND) : // no need in GID index - break; - default: - LOG(INFO) << "Unknown port link layer. Currently supporting Ethernet and " - "InfiniBand only. "; - } - if (!is_gid_type_roce_v2(context, port_num, gid_index)) { - LOG(INFO) << "RoCE v2 is not configured for GID_INDEX " << (int)gid_index; - } - return gid_index; -} - -// set the default or environment value to the configuration parameter. -// Args: -// default_val- the default value for this parameter -// env_param- the environment parameter's name -// Returns: -// 32-bit value -uint32_t set_param(uint32_t default_val, const char* env_param) { - uint32_t val = default_val; - string val_s; - - val_s = get_env_var(env_param); - - if (!val_s.empty()) { - val = stoi(val_s); - } - return val; -} - -enum ibv_mtu set_mtu(uint8_t port_num, ibv_context* context) { - ibv_port_attr port_attr; - enum ibv_mtu mtu; - string mtu_s; - int rc, mtu_i; - - rc = ibv_query_port(context, port_num, &port_attr); - CHECK(!rc) << "Failed to query the port" << port_num; - - mtu_s = get_env_var("RDMA_MTU"); - - if (!mtu_s.empty()) { - mtu_i = stoi(mtu_s); - switch (mtu_i) { - case 256: - mtu = IBV_MTU_256; - break; - case 512: - mtu = IBV_MTU_512; - break; - case 1024: - mtu = IBV_MTU_1024; - break; - case 2048: - mtu = IBV_MTU_2048; - break; - case 4096: - mtu = IBV_MTU_4096; - break; - default: - CHECK(0) << "Error: MTU input value must be one of the following: 256, " - "512, 1024, 2048, 4096. MTU " << mtu << " is invalid\n"; - break; - } - CHECK(mtu < port_attr.active_mtu) - << "MTU configuration for the QPs is larger than active MTU"; - } else { - mtu = port_attr.active_mtu; - } - return mtu; -} - -RdmaParams params_init(ibv_context* context) { - RdmaParams params; - - params.port_num = set_port(context); - params.sgid_index = set_gid(params.port_num, context); - params.pkey_index = (uint8_t)set_param(PKEY_DEFAULT, "RDMA_PKEY"); - params.queue_depth = set_param(QUEUE_DEPTH_DEFAULT, "RDMA_QUEUE_DEPTH"); - params.timeout = (uint8_t)set_param(TIMEOUT_DEFAULT, "RDMA_TIMEOUT"); - params.retry_cnt = (uint8_t)set_param(RETRY_CNT_DEFAULT, "RDMA_RETRY_CNT"); - params.sl = (uint8_t)set_param(SL_DEFAULT, "RDMA_SL"); - CHECK(params.sl <= 7) << "SL value is " << (int)params.sl - << ". Valid values are 0-7."; - params.mtu = set_mtu(params.port_num, context); - params.traffic_class = set_param(TRAFFIC_CLASS, "RDMA_TRAFFIC_CLASS"); - return params; + ib_dev = dev_list[0]; + CHECK(ib_dev) << "No InfiniBand device found"; + ibv_context* context = ibv_open_device(ib_dev); + CHECK(context) << "Open context failed for " << ibv_get_device_name(ib_dev); + return context; } ibv_pd* alloc_protection_domain(ibv_context* context) { @@ -409,8 +85,7 @@ ibv_pd* alloc_protection_domain(ibv_context* context) { } RdmaAdapter::RdmaAdapter(const WorkerEnv* worker_env) - : context_(open_device(set_device())), - params_(params_init(context_)), + : context_(open_default_device()), pd_(alloc_protection_domain(context_)), worker_env_(worker_env) { event_channel_ = ibv_create_comp_channel(context_); @@ -453,9 +128,9 @@ void RdmaAdapter::Process_CQ() { CHECK_GE(ne, 0); for (int i = 0; i < ne; ++i) { CHECK(wc_[i].status == IBV_WC_SUCCESS) - << "Failed status \n" << ibv_wc_status_str(wc_[i].status) << " " - << wc_[i].status << " " << static_cast(wc_[i].wr_id) << " " - << wc_[i].vendor_err; + << "Failed status \n" + << ibv_wc_status_str(wc_[i].status) << " " << wc_[i].status << " " + << static_cast(wc_[i].wr_id) << " " << wc_[i].vendor_err; if (wc_[i].opcode == IBV_WC_RECV_RDMA_WITH_IMM) { RdmaChannel* rc = reinterpret_cast(wc_[i].wr_id); // put back a recv wr. @@ -567,8 +242,8 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, memset(&attr, 0, sizeof(ibv_qp_init_attr)); attr.send_cq = adapter_->cq_; attr.recv_cq = adapter_->cq_; - attr.cap.max_send_wr = adapter_->params_.queue_depth; - attr.cap.max_recv_wr = adapter_->params_.queue_depth; + attr.cap.max_send_wr = RdmaAdapter::MAX_CONCURRENT_WRITES; + attr.cap.max_recv_wr = RdmaAdapter::MAX_CONCURRENT_WRITES; attr.cap.max_send_sge = 1; attr.cap.max_recv_sge = 1; attr.qp_type = IBV_QPT_RC; @@ -582,8 +257,8 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, struct ibv_qp_attr attr; memset(&attr, 0, sizeof(ibv_qp_attr)); attr.qp_state = IBV_QPS_INIT; - attr.pkey_index = adapter_->params_.pkey_index; - attr.port_num = adapter_->params_.port_num; + attr.pkey_index = 0; + attr.port_num = 1; attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE; int mask = @@ -594,15 +269,13 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, // Local address { struct ibv_port_attr attr; - CHECK( - !ibv_query_port(adapter_->context_, adapter_->params_.port_num, &attr)) + CHECK(!ibv_query_port(adapter_->context_, (uint8_t)1, &attr)) << "Query port"; self_.lid = attr.lid; self_.qpn = qp_->qp_num; self_.psn = static_cast(random::New64()) & 0xffffff; union ibv_gid gid; - CHECK(!ibv_query_gid(adapter_->context_, adapter_->params_.port_num, - adapter_->params_.sgid_index, &gid)) + CHECK(!ibv_query_gid(adapter_->context_, (uint8_t)1, 0, &gid)) << "Query gid"; self_.snp = gid.global.subnet_prefix; self_.iid = gid.global.interface_id; @@ -611,7 +284,7 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, // create message and ack buffers, then initialize the tables. { const string buffer_names[] = {"tx_message_buffer", "rx_message_buffer", - "tx_ack_buffer", "rx_ack_buffer"}; + "tx_ack_buffer", "rx_ack_buffer"}; tx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[0]); rx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[1]); tx_ack_buffer_ = new RdmaAckBuffer(this, buffer_names[2]); @@ -672,7 +345,7 @@ void RdmaChannel::SetRemoteAddress(const RdmaAddress& ra, bool override) { void RdmaChannel::Recv() { struct ibv_recv_wr wr; memset(&wr, 0, sizeof(wr)); - wr.wr_id = (uint64_t) this; + wr.wr_id = (uint64_t)this; struct ibv_recv_wr* bad_wr; CHECK(!ibv_post_recv(qp_, &wr, &bad_wr)) << "Failed to post recv"; } @@ -806,9 +479,11 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) { struct ibv_qp_attr attr; memset(&attr, 0, sizeof(ibv_qp_attr)); attr.qp_state = IBV_QPS_RTR; - + struct ibv_port_attr port_attr; + CHECK(!ibv_query_port(adapter_->context_, (uint8_t)1, &port_attr)) + << "Query port failed"; // This assumes both QP's ports are configured with the same MTU - attr.path_mtu = adapter_->params_.mtu; + attr.path_mtu = port_attr.active_mtu; attr.dest_qp_num = remoteAddr.qpn; attr.rq_psn = remoteAddr.psn; attr.max_dest_rd_atomic = 1; @@ -819,32 +494,30 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) { attr.ah_attr.grh.flow_label = 0; attr.ah_attr.grh.hop_limit = 255; attr.ah_attr.dlid = remoteAddr.lid; - attr.ah_attr.sl = adapter_->params_.sl; + attr.ah_attr.sl = 0; attr.ah_attr.src_path_bits = 0; - attr.ah_attr.port_num = adapter_->params_.port_num; - attr.ah_attr.grh.sgid_index = adapter_->params_.sgid_index; - attr.ah_attr.grh.traffic_class = adapter_->params_.traffic_class; + attr.ah_attr.port_num = 1; int r; - CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_AV | - IBV_QP_PATH_MTU | - IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | - IBV_QP_MAX_DEST_RD_ATOMIC | - IBV_QP_MIN_RNR_TIMER))) + CHECK(!(r = ibv_modify_qp(qp_, &attr, + IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | + IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | + IBV_QP_MAX_DEST_RD_ATOMIC | + IBV_QP_MIN_RNR_TIMER))) << "QP to Ready to Receive " << r; memset(&attr, 0, sizeof(ibv_qp_attr)); attr.qp_state = IBV_QPS_RTS; attr.sq_psn = self_.psn; - attr.timeout = adapter_->params_.timeout; - attr.retry_cnt = adapter_->params_.retry_cnt; + attr.timeout = 14; + attr.retry_cnt = 7; attr.rnr_retry = 7; /* infinite */ attr.max_rd_atomic = 1; - CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_TIMEOUT | - IBV_QP_RETRY_CNT | - IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | - IBV_QP_MAX_QP_RD_ATOMIC))) + CHECK(!(r = ibv_modify_qp(qp_, &attr, + IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | + IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | + IBV_QP_MAX_QP_RD_ATOMIC))) << "QP to Ready to Send " << r; connected_ = true; @@ -931,7 +604,7 @@ void RdmaBuffer::Write(uint32_t imm_data, size_t buffer_size) { struct ibv_send_wr wr; memset(&wr, 0, sizeof(wr)); - wr.wr_id = (uint64_t) this; + wr.wr_id = (uint64_t)this; wr.sg_list = &list; wr.num_sge = 1; wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM; @@ -1026,9 +699,9 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback( TensorProto proto; if (src_dev->tensorflow_gpu_device_info() && (!send_args.alloc_attrs.on_host())) { - CHECK(send_args.device_context) << "send dev name: " << src_dev->name() - << " gpu_info: " - << src_dev->tensorflow_gpu_device_info(); + CHECK(send_args.device_context) + << "send dev name: " << src_dev->name() + << " gpu_info: " << src_dev->tensorflow_gpu_device_info(); if (can_memcpy) { AllocatorAttributes host_alloc_attrs; @@ -1054,8 +727,8 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback( // aync instead GPUUtil::SetProtoFromGPU( in, src_dev, send_args.device_context, &proto, is_dead, - [this, proto, buffer_size, key, in, step_id, key_with_step_id, - is_dead, send_args, recv_args](const Status& s) mutable { + [this, proto, buffer_size, key, in, step_id, key_with_step_id, + is_dead, send_args, recv_args](const Status& s) mutable { CHECK(s.ok()) << "copy proto from gpu sync"; auto tensor_bytes = proto.ByteSize(); buffer_size += tensor_bytes; diff --git a/tensorflow/contrib/verbs/rdma.h b/tensorflow/contrib/verbs/rdma.h index 52d92a7c5b..e1e07db776 100644 --- a/tensorflow/contrib/verbs/rdma.h +++ b/tensorflow/contrib/verbs/rdma.h @@ -36,24 +36,7 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" namespace tensorflow { -#define PKEY_DEFAULT 0 -#define QUEUE_DEPTH_DEFAULT 1024 -#define TIMEOUT_DEFAULT 14 -#define RETRY_CNT_DEFAULT 7 -#define SL_DEFAULT 0 -#define TRAFFIC_CLASS 0 - -struct RdmaParams { - uint8_t port_num; - uint8_t sgid_index; - uint8_t pkey_index; - uint32_t queue_depth; - uint8_t timeout; - uint8_t retry_cnt; - uint8_t sl; - enum ibv_mtu mtu; - uint8_t traffic_class; -}; + // structure to save the address of remote channels. struct RdmaAddress { uint32_t lid; @@ -67,20 +50,9 @@ struct RemoteMR { uint64_t remote_addr; uint32_t rkey; }; -enum BufferStatus { - none, - idle, - busy -}; -enum Location { - local, - remote -}; -enum BufferType { - ACK, - MESSAGE, - TENSOR -}; +enum BufferStatus { none, idle, busy }; +enum Location { local, remote }; +enum BufferType { ACK, MESSAGE, TENSOR }; enum RdmaMessageType { RDMA_MESSAGE_ACK, RDMA_MESSAGE_BUFFER_IDLE, @@ -112,8 +84,6 @@ class RdmaAdapter { protected: static const int MAX_CONCURRENT_WRITES = 1000; ibv_context* context_; - // RDMA configuration parameters - RdmaParams params_; // ibverbs protection domain ibv_pd* pd_; // Completion event channel, to wait for work completions @@ -213,7 +183,7 @@ class RdmaBuffer { } void FreeBuffer(); void EnqueueItem(string Item); - virtual void SendNextItem() {}; + virtual void SendNextItem(){}; void CreateCPUBuffer(size_t size, bool lock = true); void SetRemoteMR(RemoteMR rmi, bool override); uint32_t LookupBufferIndex(const string& buffer_name) { diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 30ff4ef358..d71f314e11 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2710,7 +2710,6 @@ tf_cc_test_mkl( srcs = [ "graph/mkl_layout_pass_test.cc", "graph/mkl_tfconversion_pass_test.cc", - "util/mkl_util_test.cc", ], linkstatic = 1, deps = [ diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index 63b74e8dbf..53e80b1ee3 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -81,7 +81,7 @@ class MklCPUAllocator : public Allocator { } #if defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE) if (user_val > max_mem_bytes) { - LOG(WARNING) << "The user specified a memory limit " << kMaxLimitStr + LOG(WARNING) << "The user specifed a memory limit " << kMaxLimitStr << "=" << user_val << " greater than available physical memory: " << max_mem_bytes diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.h b/tensorflow/core/common_runtime/sycl/sycl_device.h index cc272d156e..9caa076c72 100644 --- a/tensorflow/core/common_runtime/sycl/sycl_device.h +++ b/tensorflow/core/common_runtime/sycl/sycl_device.h @@ -46,8 +46,8 @@ class GSYCLInterface { if (!found_device) { // Currently Intel GPU is not supported - LOG(WARNING) << "No OpenCL GPU found that is supported by " - << "ComputeCpp/triSYCL, trying OpenCL CPU"; + LOG(WARNING) << "No OpenCL GPU found that is supported by ComputeCpp, " + "trying OpenCL CPU"; } for (const auto& device : device_list) { @@ -58,24 +58,10 @@ class GSYCLInterface { } } - if (!found_device) { - LOG(WARNING) << "No OpenCL CPU found that is supported by " - << "ComputeCpp/triSYCL, checking for host sycl device"; - } - - for (const auto& device : device_list) { - // triSYCL only supports the host device for now - if (device.is_host()) { - LOG(WARNING) << "Found SYCL host device"; - AddDevice(device); - found_device = true; - } - } - if (!found_device) { // Currently Intel GPU is not supported - LOG(FATAL) << "No SYCL host and no OpenCL GPU nor CPU" - << " supported by ComputeCPP/triSYCL was found"; + LOG(FATAL) + << "No OpenCL GPU nor CPU found that is supported by ComputeCpp"; } else { LOG(INFO) << "Found following OpenCL devices:"; for (int i = 0; i < device_list.size(); i++) { diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc index fd1b5d33b9..87c41186d5 100644 --- a/tensorflow/core/graph/graph.cc +++ b/tensorflow/core/graph/graph.cc @@ -453,21 +453,6 @@ const Edge* Graph::AddControlEdge(Node* source, Node* dest, return AddEdge(source, kControlSlot, dest, kControlSlot); } -void Graph::RemoveControlEdge(const Edge* e) { - if (!e->src_->IsSource() && !e->dst_->IsSink()) { - e->dst_->MaybeCopyOnWrite(); - std::string e_src_name = strings::StrCat("^", e->src_->name()); - auto* inputs = e->dst_->props_->node_def.mutable_input(); - for (auto it = inputs->begin(); it != inputs->end(); ++it) { - if (*it == e_src_name) { - inputs->erase(it); - break; - } - } - } - RemoveEdge(e); -} - Status Graph::UpdateEdge(Node* new_src, int new_src_index, Node* dst, int dst_index) { TF_RETURN_IF_ERROR(IsValidOutputTensor(new_src, new_src_index)); diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index d0dba6e1f0..c5dde722fa 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -451,11 +451,6 @@ class Graph { // REQUIRES: The edge must exist. void RemoveEdge(const Edge* edge); - // Removes control edge `edge` from the graph. Note that this also updates - // the corresponding NodeDef to reflect the change. - // REQUIRES: The control edge must exist. - void RemoveControlEdge(const Edge* e); - // Updates the input to a node. The existing edge to `dst` is removed and an // edge from `new_src` to `dst` is created. The NodeDef associated with `dst` // is also updated. diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc index 1924c05d3d..b9e3cba035 100644 --- a/tensorflow/core/graph/graph_partition.cc +++ b/tensorflow/core/graph/graph_partition.cc @@ -117,7 +117,7 @@ DataType EdgeType(const Edge* e) { } } -// Return true iff we need to add the same device send/recv for 'edge'. +// Return true iff we need to add a same device send/recv for 'edge'. bool NeedSameDeviceSendRecv(const Edge* edge, const GraphInfo& info) { if (edge->IsControlEdge()) { return false; @@ -1116,7 +1116,7 @@ Status Partition(const PartitionOptions& opts, Graph* g, // before the data is available. AddInput(real_recv, send->name(), Graph::kControlSlot); } else if (control_flow_edge != nullptr) { - // Redirect control edge to the real recv since this is not the same + // Redirect control edge to the real recv since this is not a same // device send/recv. --num_control_flow_edges; AddInput(real_recv, control_flow_edge->src()->name(), diff --git a/tensorflow/core/graph/graph_test.cc b/tensorflow/core/graph/graph_test.cc index 2aa1b31e15..7686cef219 100644 --- a/tensorflow/core/graph/graph_test.cc +++ b/tensorflow/core/graph/graph_test.cc @@ -118,25 +118,6 @@ class GraphTest : public ::testing::Test { LOG(FATAL) << name; } - bool ControlEdgeExistsInGraphOrNodeDef(const Node* src, - const Node* dst) { - for (const Edge *e : dst->in_edges()) { - if (e->IsControlEdge() && - e->src() == src && - e->src_output() == Graph::kControlSlot && - e->dst_input() == Graph::kControlSlot) { - return true; - } - } - std::string control_edge_name = strings::StrCat("^", src->name()); - for (int i = 0; i < dst->def().input_size(); ++i) { - if (dst->def().input(i) == control_edge_name) { - return true; - } - } - return false; - } - Graph graph_; private: @@ -477,8 +458,8 @@ TEST_F(GraphTest, AddControlEdge) { EXPECT_TRUE(edge == nullptr); EXPECT_EQ(b->def().input_size(), 2); - // Can add redundant control edge with allow_duplicates. - edge = graph_.AddControlEdge(a, b, /*allow_duplicates=*/true); + // Can add redundant control edge with create_duplicate. + edge = graph_.AddControlEdge(a, b, /*create_duplicate=*/true); EXPECT_TRUE(edge != nullptr); // create_duplicate causes the NodeDef not to be updated. ASSERT_EQ(b->def().input_size(), 2); @@ -496,47 +477,6 @@ TEST_F(GraphTest, AddControlEdge) { EXPECT_EQ(b->def().input_size(), 2); } -TEST_F(GraphTest, RemoveControlEdge) { - FromGraphDef( - "node { name: 'A' op: 'OneOutput' }" - "node { name: 'B' op: 'OneInputTwoOutputs' input: [ 'A:0' ] }" - "node { name: 'C' op: 'NoOp' } "); - Node* a = FindNode("A"); - Node* b = FindNode("B"); - Node* c = FindNode("C"); - - // Add a control edge. - const Edge* edge_1 = graph_.AddControlEdge(c, a); - const Edge* edge_2 = graph_.AddControlEdge(a, b); - ASSERT_TRUE(edge_1 != nullptr); - ASSERT_TRUE(edge_2 != nullptr); - - ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(c, a)); - ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(a, b)); - - graph_.RemoveControlEdge(edge_1); - ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a)); - ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(a, b)); - - graph_.RemoveControlEdge(edge_2); - ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a)); - ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(a, b)); - - // Test removing a duplicate control edge. - // Note that unless allow_duplicates is true, the duplicate edge - // will not be added. That's why we expect edge_4 to be a null - // pointer. We are not testing with allow_duplicates set to true, - // as that is a highly unlikely use case that does not make much - // sense. - const Edge* edge_3 = graph_.AddControlEdge(c, a); - const Edge* edge_4 = graph_.AddControlEdge(c, a); - ASSERT_TRUE(edge_3 != nullptr); - ASSERT_TRUE(edge_4 == nullptr); - - graph_.RemoveControlEdge(edge_3); - ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a)); -} - TEST_F(GraphTest, UpdateEdge) { // Build a little graph Node* a = FromNodeDef("A", "OneOutput", 0); diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h index 880e4e712e..cb32d64334 100644 --- a/tensorflow/core/graph/mkl_graph_util.h +++ b/tensorflow/core/graph/mkl_graph_util.h @@ -21,108 +21,107 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" namespace tensorflow { - // Since our ops are going to produce and also consume N addition tensors - // (Mkl) for N Tensorflow tensors, we can have following different - // orderings among these 2N tensors. - // - // E.g., for Tensorflow tensors A, B, and C, our ops will produce and - // consume A_m, B_m, and C_m additionally. - // - // INTERLEAVED: in this case 2N tensors are interleaved. So for above - // example, the ordering looks like: A, A_m, B, B_m, C, C_m. - // - // CONTIGUOUS: in thi case N Tensorflow tensors are contiguous followed - // by N Mkl tensors. So for above example, the ordering looks - // like: A, B, C, A_m, B_m, C_m - // - // Following APIs map index of original Tensorflow tensors to their - // appropriate position based on selected ordering. For contiguous ordering, - // we need to know the total number of tensors (parameter total). - // - typedef enum { TENSORS_INTERLEAVED, TENSORS_CONTIGUOUS } MklTfTensorOrdering; - // NOTE: Currently, we use contiguous ordering. If you change this, then you - // would need to change Mkl op definitions in nn_ops.cc. - static MklTfTensorOrdering kTensorOrdering = TENSORS_CONTIGUOUS; +// Since our ops are going to produce and also consume N addition tensors +// (Mkl) for N Tensorflow tensors, we can have following different +// orderings among these 2N tensors. +// +// E.g., for Tensorflow tensors A, B, and C, our ops will produce and +// consume A_m, B_m, and C_m additionally. +// +// INTERLEAVED: in this case 2N tensors are interleaved. So for above +// example, the ordering looks like: A, A_m, B, B_m, C, C_m. +// +// CONTIGUOUS: in thi case N Tensorflow tensors are contiguous followed +// by N Mkl tensors. So for above example, the ordering looks +// like: A, B, C, A_m, B_m, C_m +// +// Following APIs map index of original Tensorflow tensors to their +// appropriate position based on selected ordering. For contiguous ordering, +// we need to know the total number of tensors (parameter total). +// +typedef enum { TENSORS_INTERLEAVED, TENSORS_CONTIGUOUS } MklTfTensorOrdering; +// NOTE: Currently, we use contiguous ordering. If you change this, then you +// would need to change Mkl op definitions in nn_ops.cc. +static MklTfTensorOrdering kTensorOrdering = TENSORS_CONTIGUOUS; - // Get index of MetaData tensor from index 'n' of Data tensor. - inline int DataIndexToMetaDataIndex(int n, int total_tensors) { - if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { - // For interleaved ordering, Mkl tensor follows immediately after - // Tensorflow tensor. - return n + 1; - } else { - CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); - // For contiguous ordering, Mkl tensor is n+total_tensors / 2 away. - return n + total_tensors / 2; - } +// Get index of MetaData tensor from index 'n' of Data tensor. +inline int DataIndexToMetaDataIndex(int n, int total_tensors) { + if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { + // For interleaved ordering, Mkl tensor follows immediately after + // Tensorflow tensor. + return n + 1; + } else { + CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); + // For contiguous ordering, Mkl tensor is n+total_tensors / 2 away. + return n + total_tensors / 2; } +} - int inline GetTensorDataIndex(int n, int total_tensors) { - if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { - return 2 * n; // index corresponding to nth input/output tensor - } else { - CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); - return n; - } - } +int inline GetTensorDataIndex(int n, int total_tensors) { + if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { + return 2 * n; // index corresponding to nth input/output tensor + } else { + CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); + return n; + } +} - int inline GetTensorMetaDataIndex(int n, int total_tensors) { - // Get index for TensorData first and then use mapping function - // to get TensorMetaData index from TensorData index. - int tidx = GetTensorDataIndex(n, total_tensors); - return DataIndexToMetaDataIndex(tidx, total_tensors); - } +int inline GetTensorMetaDataIndex(int n, int total_tensors) { + // Get index for TensorData first and then use mapping function + // to get TensorMetaData index from TensorData index. + int tidx = GetTensorDataIndex(n, total_tensors); + return DataIndexToMetaDataIndex(tidx, total_tensors); +} namespace mkl_op_registry { - static const char* kMklOpLabel = "MklOp"; - static const char* kMklOpLabelPattern = "label='MklOp'"; +static const char* kMklOpLabel = "MklOp"; +static const char* kMklOpLabelPattern = "label='MklOp'"; - // Get the name of Mkl op from original TensorFlow op - // We prefix 'Mkl' to the original op to get Mkl op. - inline string GetMklOpName(const string& name) { - // Prefix that we add to Tensorflow op name to construct Mkl op name. - const char* const kMklOpPrefix = "_Mkl"; - return string(kMklOpPrefix) + name; - } +// Get the name of Mkl op from original TensorFlow op +// We prefix 'Mkl' to the original op to get Mkl op. +inline string GetMklOpName(const string& name) { + // Prefix that we add to Tensorflow op name to construct Mkl op name. + const char* const kMklOpPrefix = "_Mkl"; + return string(kMklOpPrefix) + name; +} - // Check whether opname with type T is registered as MKL-compliant. - // - // @input: name of the op - // @input: T datatype to be used for checking op - // @return: true if opname is registered as Mkl op; false otherwise - static inline bool IsMklOp(const std::string& op_name, DataType T) { - string kernel = KernelsRegisteredForOp(op_name); - bool result = - kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT); - if (result) { - VLOG(1) << "mkl_op_registry::" << op_name << " is " << kMklOpLabel; - } - return result; +// Check whether opname with type T is registered as MKL-compliant. +// +// @input: name of the op +// @input: T datatype to be used for checking op +// @return: true if opname is registered as Mkl op; false otherwise +static inline bool IsMklOp(const std::string& op_name, DataType T) { + string kernel = KernelsRegisteredForOp(op_name); + bool result = + kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT); + if (result) { + VLOG(1) << "mkl_op_registry::" << op_name << " is " << kMklOpLabel; } + return result; +} - // Check whether opname with type T is registered as MKL-compliant and - // is element-wise. - // - // @input: name of the op - // @input: T datatype to be used for checking op - // @return: true if opname is registered as element-wise Mkl op; - // false otherwise - static inline bool IsMklElementWiseOp(const std::string& op_name, - DataType T) { - if (!IsMklOp(op_name, T)) { - return false; - } +// Check whether opname with type T is registered as MKL-compliant and +// is element-wise. +// +// @input: name of the op +// @input: T datatype to be used for checking op +// @return: true if opname is registered as element-wise Mkl op; +// false otherwise +static inline bool IsMklElementWiseOp(const std::string& op_name, DataType T) { + if (!IsMklOp(op_name, T)) { + return false; + } - bool result = (0 == op_name.compare(GetMklOpName("Add")) || - 0 == op_name.compare(GetMklOpName("Sub")) || - 0 == op_name.compare(GetMklOpName("Mul")) || - 0 == op_name.compare(GetMklOpName("Maximum")) || - 0 == op_name.compare(GetMklOpName("SquaredDifference"))); + bool result = (0 == op_name.compare(GetMklOpName("Add")) || + 0 == op_name.compare(GetMklOpName("Sub")) || + 0 == op_name.compare(GetMklOpName("Mul")) || + 0 == op_name.compare(GetMklOpName("Maximum")) || + 0 == op_name.compare(GetMklOpName("SquaredDifference"))); - VLOG(1) << "mkl_op_registry::" << op_name - << " is elementwise MKL op: " << result; - return result; - } + VLOG(1) << "mkl_op_registry::" << op_name + << " is elementwise MKL op: " << result; + return result; +} } // namespace mkl_op_registry } // namespace tensorflow #endif // INTEL_MKL diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 912075aa28..f4c9073dee 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -37,8 +37,8 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/util/tensor_format.h" -#include "tensorflow/core/graph/mkl_layout_pass.h" #include "tensorflow/core/graph/mkl_graph_util.h" +#include "tensorflow/core/graph/mkl_layout_pass.h" namespace tensorflow { diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc index 599bb88f01..fe4588389e 100644 --- a/tensorflow/core/graph/mkl_tfconversion_pass.cc +++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc @@ -33,8 +33,8 @@ limitations under the License. #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/graph/mkl_tfconversion_pass.h" #include "tensorflow/core/graph/mkl_graph_util.h" +#include "tensorflow/core/graph/mkl_tfconversion_pass.h" namespace tensorflow { @@ -68,7 +68,7 @@ namespace tensorflow { // take place before we hit the op. For this, we add a new op before each // element-wise MKL op to deal with the inputs, called _MklInputConversion. // This pass has been enhanced to add this capability. -// +// // The _MklInputConversion op will check the inputs to the elementwise op and // make sure that either both are in MKL format or both are in TF format, // depending on their initial state and whether broadcast is needed or not. diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h index 239b5ac244..ee279b7e0a 100644 --- a/tensorflow/core/grappler/costs/graph_properties.h +++ b/tensorflow/core/grappler/costs/graph_properties.h @@ -58,12 +58,6 @@ class GraphProperties { const std::vector& GetOutputProperties( const string& node_name) const; - static void FillTensorPropertiesFromContext( - const shape_inference::ShapeHandle&, const DataType&, - shape_inference::InferenceContext*, - std::unordered_map* dim_ids, - OpInfo::TensorProperties*); - private: // Inputs GrapplerItem item_; diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index 9ab889beb5..7fd1876371 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -62,7 +62,7 @@ const std::set& NodeMap::GetOutputs(const string& node_name) const { void NodeMap::AddNode(const string& name, NodeDef* node) { auto ret = nodes_.insert(std::make_pair(name, node)); CHECK(ret.second) << "Pair (" << name << "," << node - << ") is not inserted because the same key already exists."; + << ") is not inserted because a same key already exists."; } void NodeMap::AddOutput(const string& node_name, const string& output_name) { diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index f1cb9a1860..d7b457eab7 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -929,25 +929,6 @@ tf_cc_test( ], ) -tf_cuda_cc_test( - name = "bincount_op_test", - size = "small", - srcs = ["bincount_op_test.cc"], - deps = [ - ":bincount_op", - ":ops_testutil", - ":ops_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - tf_cuda_cc_test( name = "constant_op_test", size = "small", @@ -1636,10 +1617,7 @@ DYNAMIC_DEPS = [ tf_kernel_library( name = "dynamic_partition_op", prefix = "dynamic_partition_op", - deps = DYNAMIC_DEPS + [ - ":fill_functor", - ":gather_functor", - ] + if_cuda(["@cub_archive//:cub"]), + deps = DYNAMIC_DEPS, ) tf_kernel_library( @@ -1709,7 +1687,7 @@ tf_kernel_library( ], ) -tf_cuda_cc_tests( +tf_cc_tests( name = "dynamic_op_test", size = "small", srcs = [ @@ -2594,9 +2572,8 @@ tf_kernel_library( tf_kernel_library( name = "bucketize_op", - gpu_srcs = ["cuda_device_array.h"], prefix = "bucketize_op", - deps = ARRAY_DEPS, + deps = MATH_DEPS, ) tf_kernel_library( @@ -3197,7 +3174,7 @@ tf_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//third_party/eigen3", - ] + if_cuda(["@cub_archive//:cub"]), + ], ) tf_kernel_library( diff --git a/tensorflow/core/kernels/avgpooling_op.cc b/tensorflow/core/kernels/avgpooling_op.cc index f918023693..af629d0de8 100644 --- a/tensorflow/core/kernels/avgpooling_op.cc +++ b/tensorflow/core/kernels/avgpooling_op.cc @@ -153,8 +153,7 @@ class AvgPoolingOp : public UnaryOp { if (data_format_ == FORMAT_NCHW) { DnnPoolingOp::Compute( context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_, - stride_, padding_, data_format_, tensor_in, output_shape, - /*propagate_nans=*/false); + stride_, padding_, data_format_, tensor_in, output_shape); } else { Tensor* output = nullptr; OP_REQUIRES_OK(context, @@ -409,7 +408,7 @@ class AvgPoolingGradOp : public OpKernel { DnnPoolingGradOp::Compute( context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_, stride_, padding_, data_format_, nullptr, nullptr, out_backprop, - output_shape, /*propagate_nans=*/false); + output_shape); } private: @@ -533,7 +532,7 @@ class AvgPoolingGradOpCustomGPUKernel : public OpKernel { DnnPoolingGradOp::Compute( context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_, stride_, padding_, data_format_, nullptr, nullptr, out_backprop, - output_shape, /*propagate_nans=*/false); + output_shape); } } diff --git a/tensorflow/core/kernels/bincount_op.cc b/tensorflow/core/kernels/bincount_op.cc index 766d63e3be..1cd5943ef3 100644 --- a/tensorflow/core/kernels/bincount_op.cc +++ b/tensorflow/core/kernels/bincount_op.cc @@ -17,7 +17,6 @@ limitations under the License. #define EIGEN_USE_THREADS -#include "tensorflow/core/kernels/bincount_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/types.h" @@ -28,37 +27,46 @@ namespace tensorflow { using thread::ThreadPool; -typedef Eigen::ThreadPoolDevice CPUDevice; -typedef Eigen::GpuDevice GPUDevice; - -namespace functor { - template -struct BincountFunctor { - static Status Compute(OpKernelContext* context, - const typename TTypes::ConstTensor& arr, - const typename TTypes::ConstTensor& weights, - typename TTypes::Tensor& output) { - int size = output.size(); +class BincountOp : public OpKernel { + public: + explicit BincountOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + const Tensor& arr_t = ctx->input(0); + const Tensor& size_tensor = ctx->input(1); + const Tensor& weights_t = ctx->input(2); + int32 size = size_tensor.scalar()(); + OP_REQUIRES( + ctx, size >= 0, + errors::InvalidArgument("size (", size, ") must be non-negative")); + const bool has_weights = weights_t.NumElements() > 0; + OP_REQUIRES(ctx, !(has_weights && arr_t.shape() != weights_t.shape()), + errors::InvalidArgument( + "If weights are passed, they must have the same shape (" + + weights_t.shape().DebugString() + ") as arr (" + + arr_t.shape().DebugString() + ")")); + const auto arr = arr_t.flat(); + const auto weights = weights_t.flat(); Tensor all_nonneg_t; - TF_RETURN_IF_ERROR(context->allocate_temp( - DT_BOOL, TensorShape({}), &all_nonneg_t, AllocatorAttributes())); - all_nonneg_t.scalar().device(context->eigen_cpu_device()) = + OP_REQUIRES_OK(ctx, + ctx->allocate_temp(DT_BOOL, TensorShape({}), &all_nonneg_t, + AllocatorAttributes())); + all_nonneg_t.scalar().device(ctx->eigen_cpu_device()) = (arr >= 0).all(); - if (!all_nonneg_t.scalar()()) { - return errors::InvalidArgument("Input arr must be non-negative!"); - } + OP_REQUIRES(ctx, all_nonneg_t.scalar()(), + errors::InvalidArgument("Input arr must be non-negative!")); // Allocate partial output bin sums for each worker thread. Worker ids in // ParallelForWithWorkerId range from 0 to NumThreads() inclusive. ThreadPool* thread_pool = - context->device()->tensorflow_cpu_worker_threads()->workers; + ctx->device()->tensorflow_cpu_worker_threads()->workers; const int64 num_threads = thread_pool->NumThreads() + 1; Tensor partial_bins_t; - TF_RETURN_IF_ERROR(context->allocate_temp(DataTypeToEnum::value, - TensorShape({num_threads, size}), - &partial_bins_t)); + OP_REQUIRES_OK(ctx, ctx->allocate_temp(weights_t.dtype(), + TensorShape({num_threads, size}), + &partial_bins_t)); auto partial_bins = partial_bins_t.matrix(); partial_bins.setZero(); thread_pool->ParallelForWithWorkerId( @@ -67,7 +75,7 @@ struct BincountFunctor { for (int64 i = start_ind; i < limit_ind; i++) { int32 value = arr(i); if (value < size) { - if (weights.size()) { + if (has_weights) { partial_bins(worker_id, value) += weights(i); } else { // Complex numbers don't support "++". @@ -76,62 +84,25 @@ struct BincountFunctor { } } }); - + TensorShape output_shape({size}); + Tensor* output_t; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, output_shape, &output_t)); // Sum the partial bins along the 0th axis. Eigen::array reduce_dims({0}); - output.device(context->eigen_cpu_device()) = partial_bins.sum(reduce_dims); - return Status::OK(); - } -}; - -} // namespace functor - -template -class BincountOp : public OpKernel { - public: - explicit BincountOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} - - void Compute(OpKernelContext* ctx) override { - const Tensor& arr_t = ctx->input(0); - const Tensor& size_tensor = ctx->input(1); - const Tensor& weights_t = ctx->input(2); - - int32 size = size_tensor.scalar()(); - OP_REQUIRES(ctx, size >= 0, errors::InvalidArgument( - "size (", size, ") must be non-negative")); - - const auto arr = arr_t.flat(); - const auto weights = weights_t.flat(); - Tensor* output_t; - OP_REQUIRES_OK(ctx, - ctx->allocate_output(0, TensorShape({size}), &output_t)); - auto output = output_t->flat(); - OP_REQUIRES_OK(ctx, functor::BincountFunctor::Compute( - ctx, arr, weights, output)); + output_t->flat().device(ctx->eigen_cpu_device()) = + partial_bins.sum(reduce_dims); } }; -#define REGISTER_KERNELS(type) \ +#define REGISTER(TYPE) \ REGISTER_KERNEL_BUILDER( \ - Name("Bincount").Device(DEVICE_CPU).TypeConstraint("T"), \ - BincountOp) - -TF_CALL_NUMBER_TYPES(REGISTER_KERNELS); -#undef REGISTER_KERNELS - -#if GOOGLE_CUDA - -#define REGISTER_KERNELS(type) \ - REGISTER_KERNEL_BUILDER(Name("Bincount") \ - .Device(DEVICE_GPU) \ - .HostMemory("size") \ - .TypeConstraint("T"), \ - BincountOp) + Name("Bincount").Device(DEVICE_CPU).TypeConstraint("T"), \ + BincountOp) -TF_CALL_int32(REGISTER_KERNELS); -TF_CALL_float(REGISTER_KERNELS); -#undef REGISTER_KERNELS +TF_CALL_NUMBER_TYPES(REGISTER); -#endif // GOOGLE_CUDA +// TODO(ringwalt): Add a GPU implementation. We probably want to take a +// different approach, e.g. threads in a warp each taking a pass over the same +// data, and each thread summing a single bin. } // end namespace tensorflow diff --git a/tensorflow/core/kernels/bincount_op.h b/tensorflow/core/kernels/bincount_op.h deleted file mode 100644 index 0f8dd2b82a..0000000000 --- a/tensorflow/core/kernels/bincount_op.h +++ /dev/null @@ -1,41 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_BINCOUNT_OP_H_ -#define TENSORFLOW_BINCOUNT_OP_H_ - -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/framework/types.h" -#include "tensorflow/core/lib/core/errors.h" -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" - -namespace tensorflow { - -namespace functor { - -template -struct BincountFunctor { - static Status Compute(OpKernelContext* context, - const typename TTypes::ConstTensor& arr, - const typename TTypes::ConstTensor& weights, - typename TTypes::Tensor& output); -}; - -} // end namespace functor - -} // end namespace tensorflow - -#endif // TENSORFLOW_BINCOUNT_OP_H_ diff --git a/tensorflow/core/kernels/bincount_op_gpu.cu.cc b/tensorflow/core/kernels/bincount_op_gpu.cu.cc deleted file mode 100644 index ae9e26ffdf..0000000000 --- a/tensorflow/core/kernels/bincount_op_gpu.cu.cc +++ /dev/null @@ -1,114 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#if GOOGLE_CUDA - -#define EIGEN_USE_GPU - -#include "tensorflow/core/kernels/bincount_op.h" -#include "external/cub_archive/cub/device/device_histogram.cuh" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/types.h" -#include "tensorflow/core/util/cuda_kernel_helper.h" - -namespace tensorflow { - -typedef Eigen::GpuDevice GPUDevice; - -namespace functor { - -template -struct BincountFunctor { - static Status Compute(OpKernelContext* context, - const typename TTypes::ConstTensor& arr, - const typename TTypes::ConstTensor& weights, - typename TTypes::Tensor& output) { - if (weights.size() != 0) { - return errors::InvalidArgument( - "Weights should not be passed as it should be " - "handled by unsorted_segment_sum"); - } - if (output.size() == 0) { - return Status::OK(); - } - // In case weight.size() == 0, use CUB - size_t temp_storage_bytes = 0; - const int32* d_samples = arr.data(); - T* d_histogram = output.data(); - int num_levels = output.size() + 1; - int32 lower_level = 0; - int32 upper_level = output.size(); - int num_samples = arr.size(); - const cudaStream_t& stream = GetCudaStream(context); - - // The first HistogramEven is to obtain the temp storage size required - // with d_temp_storage = NULL passed to the call. - auto err = cub::DeviceHistogram::HistogramEven( - /* d_temp_storage */ NULL, - /* temp_storage_bytes */ temp_storage_bytes, - /* d_samples */ d_samples, - /* d_histogram */ d_histogram, - /* num_levels */ num_levels, - /* lower_level */ lower_level, - /* upper_level */ upper_level, - /* num_samples */ num_samples, - /* stream */ stream); - if (err != cudaSuccess) { - return errors::Internal( - "Could not launch HistogramEven to get temp storage: ", - cudaGetErrorString(err), "."); - } - Tensor temp_storage; - TF_RETURN_IF_ERROR(context->allocate_temp( - DataTypeToEnum::value, - TensorShape({static_cast(temp_storage_bytes)}), &temp_storage)); - - void* d_temp_storage = temp_storage.flat().data(); - // The second HistogramEven is to actual run with d_temp_storage - // allocated with temp_storage_bytes. - err = cub::DeviceHistogram::HistogramEven( - /* d_temp_storage */ d_temp_storage, - /* temp_storage_bytes */ temp_storage_bytes, - /* d_samples */ d_samples, - /* d_histogram */ d_histogram, - /* num_levels */ num_levels, - /* lower_level */ lower_level, - /* upper_level */ upper_level, - /* num_samples */ num_samples, - /* stream */ stream); - if (err != cudaSuccess) { - return errors::Internal("Could not launch HistogramEven: ", - cudaGetErrorString(err), "."); - } - return Status::OK(); - } -}; - -} // end namespace functor - -#define REGISTER_GPU_SPEC(type) \ - template struct functor::BincountFunctor; - -TF_CALL_int32(REGISTER_GPU_SPEC); -TF_CALL_float(REGISTER_GPU_SPEC); -#undef REGISTER_GPU_SPEC - -} // namespace tensorflow - -#endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/bincount_op_test.cc b/tensorflow/core/kernels/bincount_op_test.cc deleted file mode 100644 index 14becc87a7..0000000000 --- a/tensorflow/core/kernels/bincount_op_test.cc +++ /dev/null @@ -1,75 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" -#include "tensorflow/core/framework/fake_input.h" -#include "tensorflow/core/framework/node_def_builder.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/graph/node_builder.h" -#include "tensorflow/core/kernels/ops_testutil.h" -#include "tensorflow/core/platform/test.h" -#include "tensorflow/core/platform/test_benchmark.h" - -namespace tensorflow { - -static Graph* Bincount(int arr_size, int nbins) { - Graph* g = new Graph(OpRegistry::Global()); - - Tensor arr(DT_INT32, TensorShape({arr_size})); - arr.flat() = arr.flat().setRandom().abs(); - - Tensor size(DT_INT32, TensorShape({(int32)1})); - size.flat()(0) = (int32)nbins; - - Tensor weights(DT_INT32, TensorShape({0})); - - Node* node; - TF_CHECK_OK(NodeBuilder(g->NewName("n"), "Bincount") - .Input(test::graph::Constant(g, arr)) - .Input(test::graph::Constant(g, size)) - .Input(test::graph::Constant(g, weights)) - .Attr("T", DT_INT32) - .Finalize(g, &node)); - return g; -} - -#define BM_BincountDev(K, NBINS, type) \ - static void BM_Bincount##_##type##_##K##_##NBINS(int iters) { \ - testing::ItemsProcessed(static_cast(iters) * K * 1024); \ - test::Benchmark(#type, Bincount(K * 1024, NBINS)).Run(iters); \ - } \ - BENCHMARK(BM_Bincount##_##type##_##K##_##NBINS); - -BM_BincountDev(32, 1000, cpu); -BM_BincountDev(32, 2000, cpu); -BM_BincountDev(32, 5000, cpu); -BM_BincountDev(64, 1000, cpu); -BM_BincountDev(64, 2000, cpu); -BM_BincountDev(64, 5000, cpu); -BM_BincountDev(128, 1000, cpu); -BM_BincountDev(128, 2000, cpu); -BM_BincountDev(128, 5000, cpu); - -BM_BincountDev(32, 1000, gpu); -BM_BincountDev(32, 2000, gpu); -BM_BincountDev(32, 5000, gpu); -BM_BincountDev(64, 1000, gpu); -BM_BincountDev(64, 2000, gpu); -BM_BincountDev(64, 5000, gpu); -BM_BincountDev(128, 1000, gpu); -BM_BincountDev(128, 2000, gpu); -BM_BincountDev(128, 5000, gpu); - -} // end namespace tensorflow diff --git a/tensorflow/core/kernels/bucketize_op.cc b/tensorflow/core/kernels/bucketize_op.cc index c1693de538..93c2d01221 100644 --- a/tensorflow/core/kernels/bucketize_op.cc +++ b/tensorflow/core/kernels/bucketize_op.cc @@ -15,43 +15,15 @@ limitations under the License. // See docs in ../ops/math_ops.cc. -#include "tensorflow/core/kernels/bucketize_op.h" +#include +#include + #include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/lib/core/errors.h" namespace tensorflow { -using thread::ThreadPool; - -typedef Eigen::ThreadPoolDevice CPUDevice; -typedef Eigen::GpuDevice GPUDevice; - -namespace functor { - template -struct BucketizeFunctor { - // PRECONDITION: boundaries_vector must be sorted. - static Status Compute(OpKernelContext* context, - const typename TTypes::ConstTensor& input, - const std::vector& boundaries_vector, - typename TTypes::Tensor& output) { - const int N = input.size(); - for (int i = 0; i < N; i++) { - auto first_bigger_it = std::upper_bound( - boundaries_vector.begin(), boundaries_vector.end(), input(i)); - output(i) = first_bigger_it - boundaries_vector.begin(); - } - - return Status::OK(); - } -}; -} // namespace functor - -template class BucketizeOp : public OpKernel { public: explicit BucketizeOp(OpKernelConstruction* context) : OpKernel(context) { @@ -62,42 +34,36 @@ class BucketizeOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& input_tensor = context->input(0); - const auto input = input_tensor.flat(); - + auto input = input_tensor.flat(); Tensor* output_tensor = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(), &output_tensor)); auto output = output_tensor->template flat(); - OP_REQUIRES_OK(context, functor::BucketizeFunctor::Compute( - context, input, boundaries_, output)); + + const int N = input.size(); + for (int i = 0; i < N; i++) { + output(i) = CalculateBucketIndex(input(i)); + } } private: + int32 CalculateBucketIndex(const T value) { + auto first_bigger_it = + std::upper_bound(boundaries_.begin(), boundaries_.end(), value); + return first_bigger_it - boundaries_.begin(); + } std::vector boundaries_; }; #define REGISTER_KERNEL(T) \ REGISTER_KERNEL_BUILDER( \ Name("Bucketize").Device(DEVICE_CPU).TypeConstraint("T"), \ - BucketizeOp); - -REGISTER_KERNEL(int32); -REGISTER_KERNEL(int64); -REGISTER_KERNEL(float); -REGISTER_KERNEL(double); -#undef REGISTER_KERNEL - -#if GOOGLE_CUDA -#define REGISTER_KERNEL(T) \ - REGISTER_KERNEL_BUILDER( \ - Name("Bucketize").Device(DEVICE_GPU).TypeConstraint("T"), \ - BucketizeOp); + BucketizeOp); REGISTER_KERNEL(int32); REGISTER_KERNEL(int64); REGISTER_KERNEL(float); REGISTER_KERNEL(double); #undef REGISTER_KERNEL -#endif // GOOGLE_CUDA } // namespace tensorflow diff --git a/tensorflow/core/kernels/bucketize_op.h b/tensorflow/core/kernels/bucketize_op.h deleted file mode 100644 index c8e461beb9..0000000000 --- a/tensorflow/core/kernels/bucketize_op.h +++ /dev/null @@ -1,41 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_BUCKETIZE_OP_H_ -#define TENSORFLOW_BUCKETIZE_OP_H_ - -#include -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" - -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/framework/types.h" -#include "tensorflow/core/lib/core/errors.h" - -namespace tensorflow { -namespace functor { - -template -struct BucketizeFunctor { - static Status Compute(OpKernelContext* context, - const typename TTypes::ConstTensor& input, - const std::vector& boundaries_vector, - typename TTypes::Tensor& output); -}; - -} // namespace functor -} // namespace tensorflow - -#endif // TENSORFLOW_BUCKETIZE_OP_H_ diff --git a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc deleted file mode 100644 index aafbbe41b4..0000000000 --- a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc +++ /dev/null @@ -1,101 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#if GOOGLE_CUDA - -#define EIGEN_USE_GPU - -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" - -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/bucketize_op.h" -#include "tensorflow/core/kernels/cuda_device_array.h" -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/types.h" -#include "tensorflow/core/util/cuda_kernel_helper.h" - -namespace tensorflow { - -typedef Eigen::GpuDevice GPUDevice; - -template -__global__ void BucketizeCustomKernel( - const int32 size_in, const T* in, const int32 size_boundaries, - CudaDeviceArrayStruct boundaries_array, int32* out) { - const float* boundaries = GetCudaDeviceArrayOnDevice(&boundaries_array); - CUDA_1D_KERNEL_LOOP(i, size_in) { - T value = in[i]; - int32 bucket = 0; - int32 count = size_boundaries; - while (count > 0) { - int32 l = bucket; - int32 step = count / 2; - l += step; - if (!(value < static_cast(boundaries[l]))) { - bucket = ++l; - count -= step + 1; - } else { - count = step; - } - } - out[i] = bucket; - } -} - -namespace functor { - -template -struct BucketizeFunctor { - // PRECONDITION: boundaries_vector must be sorted. - static Status Compute(OpKernelContext* context, - const typename TTypes::ConstTensor& input, - const std::vector& boundaries_vector, - typename TTypes::Tensor& output) { - const GPUDevice& d = context->eigen_device(); - - CudaDeviceArrayOnHost boundaries_array(context, - boundaries_vector.size()); - TF_RETURN_IF_ERROR(boundaries_array.Init()); - for (int i = 0; i < boundaries_vector.size(); ++i) { - boundaries_array.Set(i, boundaries_vector[i]); - } - TF_RETURN_IF_ERROR(boundaries_array.Finalize()); - - CudaLaunchConfig config = GetCudaLaunchConfig(input.size(), d); - BucketizeCustomKernel< - T><<>>( - input.size(), input.data(), boundaries_vector.size(), - boundaries_array.data(), output.data()); - - return Status::OK(); - } -}; -} // namespace functor - -#define REGISTER_GPU_SPEC(type) \ - template struct functor::BucketizeFunctor; - -REGISTER_GPU_SPEC(int32); -REGISTER_GPU_SPEC(int64); -REGISTER_GPU_SPEC(float); -REGISTER_GPU_SPEC(double); -#undef REGISTER_GPU_SPEC - -} // namespace tensorflow - -#endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc index f819fccbfb..21f5cb1716 100644 --- a/tensorflow/core/kernels/conv_grad_ops_3d.cc +++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc @@ -236,7 +236,6 @@ class Conv3DBackpropInputOp : public OpKernel { REGISTER_KERNEL_BUILDER( \ Name("Conv3DBackpropInputV2").Device(DEVICE_CPU).TypeConstraint("T"), \ Conv3DBackpropInputOp); -TF_CALL_half(REGISTER_CPU_KERNEL); TF_CALL_float(REGISTER_CPU_KERNEL); TF_CALL_double(REGISTER_CPU_KERNEL); #undef REGISTER_CPU_KERNEL @@ -384,7 +383,6 @@ class Conv3DBackpropFilterOp : public OpKernel { .Device(DEVICE_CPU) \ .TypeConstraint("T"), \ Conv3DBackpropFilterOp); -TF_CALL_half(REGISTER_CPU_KERNEL); TF_CALL_float(REGISTER_CPU_KERNEL); TF_CALL_double(REGISTER_CPU_KERNEL); #undef REGISTER_CPU_KERNEL @@ -411,7 +409,6 @@ namespace functor { const std::array& padding_right, \ typename TTypes::Tensor out, TensorFormat format); -DECLARE_GPU_SPEC(Eigen::half); DECLARE_GPU_SPEC(float); #undef DECLARE_GPU_SPEC } // namespace functor @@ -1101,29 +1098,22 @@ class Conv3DBackpropFilterOp : public OpKernel { bool cudnn_use_autotune_; }; - - -#define REGISTER_GPU_KERNEL(T) \ - REGISTER_KERNEL_BUILDER( \ - Name("Conv3DBackpropInput").Device(DEVICE_GPU).TypeConstraint("T"), \ - Conv3DBackpropInputOp); \ - REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropInputV2") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("input_sizes"), \ - Conv3DBackpropInputOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint("T"), \ - Conv3DBackpropFilterOp); \ - REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropFilterV2") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("filter_sizes"), \ - Conv3DBackpropFilterOp); -TF_CALL_half(REGISTER_GPU_KERNEL); -TF_CALL_float(REGISTER_GPU_KERNEL); -#undef REGISTER_GPU_KERNEL - +REGISTER_KERNEL_BUILDER( + Name("Conv3DBackpropInput").Device(DEVICE_GPU).TypeConstraint("T"), + Conv3DBackpropInputOp); +REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropInputV2") + .Device(DEVICE_GPU) + .TypeConstraint("T") + .HostMemory("input_sizes"), + Conv3DBackpropInputOp); +REGISTER_KERNEL_BUILDER( + Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint("T"), + Conv3DBackpropFilterOp); +REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropFilterV2") + .Device(DEVICE_GPU) + .TypeConstraint("T") + .HostMemory("filter_sizes"), + Conv3DBackpropFilterOp); #endif // GOOGLE_CUDA } // namespace tensorflow diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc index 37cb67bc51..8a89d564de 100644 --- a/tensorflow/core/kernels/conv_ops_3d.cc +++ b/tensorflow/core/kernels/conv_ops_3d.cc @@ -145,7 +145,6 @@ class Conv3DOp : public BinaryOp { REGISTER_KERNEL_BUILDER( \ Name("Conv3D").Device(DEVICE_CPU).TypeConstraint("T"), \ Conv3DOp); -TF_CALL_half(REGISTER_CPU_KERNEL); TF_CALL_float(REGISTER_CPU_KERNEL); TF_CALL_double(REGISTER_CPU_KERNEL); #undef REGISTER_CPU_KERNEL @@ -483,16 +482,12 @@ namespace functor { const std::array& padding_right, \ typename TTypes::Tensor out, TensorFormat format); -DECLARE_GPU_SPEC(Eigen::half); DECLARE_GPU_SPEC(float); #undef DECLARE_GPU_SPEC } // namespace functor // Registration of the GPU implementations. -REGISTER_KERNEL_BUILDER( - Name("Conv3D").Device(DEVICE_GPU).TypeConstraint("T"), - Conv3DOp); REGISTER_KERNEL_BUILDER( Name("Conv3D").Device(DEVICE_GPU).TypeConstraint("T"), Conv3DOp); diff --git a/tensorflow/core/kernels/cwise_op_acosh.cc b/tensorflow/core/kernels/cwise_op_acosh.cc index 39c8814073..7bdd8d22a3 100644 --- a/tensorflow/core/kernels/cwise_op_acosh.cc +++ b/tensorflow/core/kernels/cwise_op_acosh.cc @@ -20,8 +20,16 @@ namespace tensorflow { REGISTER4(UnaryOp, CPU, "Acosh", functor::acosh, float, double, complex64, complex128); -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Acosh", functor::acosh, float, double); +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Acosh") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint("T"), \ + UnaryOp>); +REGISTER_SYCL_KERNEL(float); +REGISTER_SYCL_KERNEL(double); +#undef REGISTER_SYCL_KERNEL #endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA diff --git a/tensorflow/core/kernels/cwise_op_asinh.cc b/tensorflow/core/kernels/cwise_op_asinh.cc index 8d44208aa7..e0644323c0 100644 --- a/tensorflow/core/kernels/cwise_op_asinh.cc +++ b/tensorflow/core/kernels/cwise_op_asinh.cc @@ -20,9 +20,17 @@ namespace tensorflow { REGISTER4(UnaryOp, CPU, "Asinh", functor::asinh, float, double, complex64, complex128); -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Asinh", functor::asinh, float, double); -#endif // TENSORFLOW_USE_SYCL +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Asinh") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint("T"), \ + UnaryOp>); +REGISTER_SYCL_KERNEL(float); +REGISTER_SYCL_KERNEL(double); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYC #if GOOGLE_CUDA REGISTER2(UnaryOp, GPU, "Asinh", functor::asinh, float, double); diff --git a/tensorflow/core/kernels/cwise_op_atanh.cc b/tensorflow/core/kernels/cwise_op_atanh.cc index bbc69e45aa..058f5140c5 100644 --- a/tensorflow/core/kernels/cwise_op_atanh.cc +++ b/tensorflow/core/kernels/cwise_op_atanh.cc @@ -20,9 +20,17 @@ namespace tensorflow { REGISTER4(UnaryOp, CPU, "Atanh", functor::atanh, float, double, complex64, complex128); -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Atanh", functor::atanh, float, double); -#endif // TENSORFLOW_USE_SYCL +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Atanh") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint("T"), \ + UnaryOp>); +REGISTER_SYCL_KERNEL(float); +REGISTER_SYCL_KERNEL(double); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYC #if GOOGLE_CUDA REGISTER2(UnaryOp, GPU, "Atanh", functor::atanh, float, double); diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h index d32185b6bf..6c22b124de 100644 --- a/tensorflow/core/kernels/cwise_ops.h +++ b/tensorflow/core/kernels/cwise_ops.h @@ -49,11 +49,7 @@ template struct scalar_asinh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_asinh_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const { -#if EIGEN_HAS_CXX11_MATH - return numext::asinh(a); -#else return std::asinh(a); -#endif // EIGEN_HAS_CXX11_MATH } }; template @@ -65,11 +61,7 @@ template struct scalar_acosh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_acosh_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const { -#if EIGEN_HAS_CXX11_MATH - return numext::acosh(a); -#else return std::acosh(a); -#endif // EIGEN_HAS_CXX11_MATH } }; template @@ -81,11 +73,7 @@ template struct scalar_atanh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_atanh_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const { -#if EIGEN_HAS_CXX11_MATH - return numext::atanh(a); -#else return std::atanh(a); -#endif // EIGEN_HAS_CXX11_MATH } }; template diff --git a/tensorflow/core/kernels/depthwise_conv_grad_op.cc b/tensorflow/core/kernels/depthwise_conv_grad_op.cc index 53d65a22d1..9804d7d38e 100644 --- a/tensorflow/core/kernels/depthwise_conv_grad_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_grad_op.cc @@ -231,7 +231,7 @@ static void CopyOutputBackpropRegion(const DepthwiseArgs& args, } // Pad to vector-register width (if needed). for (int64 d = 0; d < pad_size; ++d) { - buffer[buf_base + vectorized_size + scalar_size + d] = static_cast(0); + buffer[buf_base + vectorized_size + scalar_size + d] = 0; } } } @@ -297,7 +297,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args, for (int i = 0; i < output_vectorized_size; i += kPacketSize) { // Reset accumulator. - auto vaccum = Eigen::internal::pset1(static_cast(0)); + auto vaccum = Eigen::internal::pset1(0); for (int j = 0; j < filter_spatial_size; ++j) { // Calculate index. const int64 index = i + j * padded_filter_inner_dim_size; @@ -318,7 +318,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args, } if (output_scalar_size > 0) { - auto vaccum = Eigen::internal::pset1(static_cast(0)); + auto vaccum = Eigen::internal::pset1(0); for (int j = 0; j < filter_spatial_size; ++j) { const int64 index = output_vectorized_size + j * padded_filter_inner_dim_size; @@ -346,7 +346,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args, if (depth_multiplier > 1) { for (int64 d = 0; d < in_depth; ++d) { const int64 index = d * args.depth_multiplier; - T accum = static_cast(0); + T accum = 0; for (int64 dm = 0; dm < dm_vectorized_size; dm += kPacketSize) { const auto v = Eigen::internal::ploadu(out_buffer + index + dm); accum += Eigen::internal::predux(v); @@ -510,7 +510,6 @@ static void DepthwiseConvBackpropInputReference(const DepthwiseArgs& args, #if GOOGLE_CUDA -extern template struct LaunchDepthwiseConvBackpropInputOp; extern template struct LaunchDepthwiseConvBackpropInputOp; extern template struct LaunchDepthwiseConvBackpropInputOp; @@ -885,7 +884,6 @@ static void DepthwiseConvBackpropFilterReference(const DepthwiseArgs& args, #if GOOGLE_CUDA -extern template struct LaunchDepthwiseConvBackpropFilterOp; extern template struct LaunchDepthwiseConvBackpropFilterOp; extern template struct LaunchDepthwiseConvBackpropFilterOp; diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc index 2759ecb2f1..bbeeaf7895 100644 --- a/tensorflow/core/kernels/depthwise_conv_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_op.cc @@ -94,7 +94,7 @@ struct DepthwiseConv2DKernel { for (int i = 0; i < output_vectorized_size; i += kPacketSize) { // Reset accumulator. - auto vaccum = Eigen::internal::pset1(static_cast(0)); + auto vaccum = Eigen::internal::pset1(0); for (int j = 0; j < filter_spatial_size; ++j) { // Calculate index. const int64 index = i + j * padded_filter_inner_dim_size; @@ -115,7 +115,7 @@ struct DepthwiseConv2DKernel { } if (output_scalar_size > 0) { - auto vaccum = Eigen::internal::pset1(static_cast(0)); + auto vaccum = Eigen::internal::pset1(0); for (int j = 0; j < filter_spatial_size; ++j) { const int64 index = output_vectorized_size + j * padded_filter_inner_dim_size; @@ -246,7 +246,6 @@ extern template class LaunchConv2DOp; #if GOOGLE_CUDA // Extern template instantiated in depthwise_conv_op_gpu.cc. -extern template struct LaunchDepthwiseConvOp; extern template struct LaunchDepthwiseConvOp; extern template struct LaunchDepthwiseConvOp; @@ -420,17 +419,12 @@ class DepthwiseConv2dNativeOp : public BinaryOp { Name("DepthwiseConv2dNative").Device(DEVICE_CPU).TypeConstraint("T"), \ DepthwiseConv2dNativeOp); -TF_CALL_half(REGISTER_CPU_KERNEL); TF_CALL_float(REGISTER_CPU_KERNEL); #if !defined(PLATFORM_WINDOWS) || !defined(_DEBUG) TF_CALL_double(REGISTER_CPU_KERNEL); #endif #if GOOGLE_CUDA -REGISTER_KERNEL_BUILDER( - Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint("T"), - DepthwiseConv2dNativeOp); - REGISTER_KERNEL_BUILDER( Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint("T"), DepthwiseConv2dNativeOp); diff --git a/tensorflow/core/kernels/depthwise_conv_op.h b/tensorflow/core/kernels/depthwise_conv_op.h index 11aed5b415..aa5b5c76f6 100644 --- a/tensorflow/core/kernels/depthwise_conv_op.h +++ b/tensorflow/core/kernels/depthwise_conv_op.h @@ -158,7 +158,7 @@ struct DepthwiseFilterPadOp { } // Pad the remainder of output to vector-register boundary. for (int64 j = 0; j < pad_size; ++j) { - padded_filter[output_base + vectorized_size + scalar_size + j] = static_cast(0); + padded_filter[output_base + vectorized_size + scalar_size + j] = 0; } } } @@ -266,7 +266,7 @@ struct DepthwiseInputCopyOp { // Pad the remainder of the output to vector register boundary. for (int64 d = 0; d < output_pad_size; ++d) { - in_buf[d] = static_cast(0); + in_buf[d] = 0; } in_buf += output_pad_size; diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc index 903aac5d68..ecfe51d599 100644 --- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc +++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc @@ -105,7 +105,7 @@ __global__ void __launch_bounds__(1024, 2) const int input_row_end = input_row_start + filter_rows; const int input_col_end = input_col_start + filter_cols; - T sum = static_cast(0); + T sum = 0; const int input_offset_temp = in_rows * OB; if (input_row_start >= 0 && input_col_start >= 0 && @@ -258,8 +258,8 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNHWCSmall( __syncthreads(); if (depth_in_range) { - T sum1 = static_cast(0); - T sum2 = static_cast(0); + T sum1 = 0; + T sum2 = 0; int shared_offset = data_idx; const T* filter_ptr = filter_read_offset + shared_data; UNROLL for (int r = 0; r < filter_rows; ++r) { @@ -369,7 +369,7 @@ __global__ void __launch_bounds__(1024, 2) const int input_row_end = input_row_start + filter_rows; const int input_col_end = input_col_start + filter_cols; - T sum = static_cast(0); + T sum = 0; if (input_row_start >= 0 && input_col_start >= 0 && input_row_end < in_rows && input_col_end < in_cols) { // Loop that doesn't need to check for boundary conditions. @@ -529,8 +529,8 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNCHWSmall( __syncthreads(); if (slice_in_range) { - T sum1 = static_cast(0); - T sum2 = static_cast(0); + T sum1 = 0; + T sum2 = 0; int shared_offset = data_idx; const T* filter_ptr = filter_read_offset + shared_data; UNROLL for (int r = 0; r < filter_rows; ++r) { @@ -710,7 +710,6 @@ void LaunchDepthwiseConvOp::operator()(OpKernelContext* ctx, "Launch of gpu kernel for DepthwiseConv2dGPULaunch failed")); } -template struct LaunchDepthwiseConvOp; template struct LaunchDepthwiseConvOp; template struct LaunchDepthwiseConvOp; @@ -745,7 +744,7 @@ __global__ void __launch_bounds__(640, 2) const int in_r = (thread_id / in_depth / in_cols) % in_rows; const int b = thread_id / in_depth / in_cols / in_rows; - T sum = static_cast(0); + T sum = 0; const int out_r_start = tf_max(0, (in_r - filter_rows + pad_rows + stride) / stride); @@ -811,7 +810,7 @@ __global__ void __launch_bounds__(640, 2) const int in_d = (thread_id / in_cols / in_rows) % in_depth; const int b = thread_id / in_depth / in_cols / in_rows; - T sum = static_cast(0); + T sum = 0; const int out_d_start = in_d * depth_multiplier; const int out_d_end = out_d_start + depth_multiplier; @@ -920,7 +919,6 @@ void LaunchDepthwiseConvBackpropInputOp::operator()( "utGPULaunch failed")); } -template struct LaunchDepthwiseConvBackpropInputOp; template struct LaunchDepthwiseConvBackpropInputOp; template struct LaunchDepthwiseConvBackpropInputOp; @@ -1633,7 +1631,6 @@ void LaunchDepthwiseConvBackpropFilterOp::operator()( "terGPULaunch failed")); } -template struct LaunchDepthwiseConvBackpropFilterOp; template struct LaunchDepthwiseConvBackpropFilterOp; template struct LaunchDepthwiseConvBackpropFilterOp; } // namespace tensorflow diff --git a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc deleted file mode 100644 index 7249c8c66c..0000000000 --- a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc +++ /dev/null @@ -1,376 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// The algorithm for dynamic partition has the following steps: -// 1. Let N be the size of partitions. We initialize a new vector indices_in -// with the values 0, 1, 2, ..., N-1. -// 2. We apply cub::DeviceRadixSort::SortPairs to the key - value pairs given -// by partitions and indices_in. This will result in two new vectors -// partitions_out and indices_out, with partitions_out sorted. -// 3. The first dimension of outputs[i] is equal to the length of the interval -// of i-values in partitions_out. We determine it in two steps: -// - compute the starting and ending point of each interval, -// - subtract the starting and ending points to find the length. -// The result is placed in partition_count. -// 4. Because partition_count is on the GPU, we bring it asynchronously to -// the CPU. Then we can allocate the output tensors. -// 5. Finally, we use indices_out and the gather functor to collect the output. -// This works, because for each interval of i-values, indices_out points -// to the slices which should form output[i]. - -#if GOOGLE_CUDA - -#define EIGEN_USE_GPU - -#include "external/cub_archive/cub/device/device_radix_sort.cuh" -#include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/bounds_check.h" -#include "tensorflow/core/kernels/fill_functor.h" -#include "tensorflow/core/kernels/gather_functor_gpu.cu.h" -#include "tensorflow/core/util/cuda_kernel_helper.h" - -namespace tensorflow { - -typedef Eigen::GpuDevice GPUDevice; - -namespace { - -template -__global__ void RangeInitKernel(const T start, const T delta, const int32 size, - T* out) { - CUDA_1D_KERNEL_LOOP(i, size) { out[i] = start + i * delta; } -} - -__global__ void FindEndpointsKernel(const int32* partitions, int32 size, - int32 nump, int32* start, int32* end) { - CUDA_1D_KERNEL_LOOP(i, size) { - int32 current = ldg(partitions + i); - if (FastBoundsCheck(current, nump)) { - if (i == 0) - start[current] = i; - else { - int32 before = ldg(partitions + i - 1); - if (before != current) start[current] = i; - } - if (i == size - 1) - end[current] = i + 1; - else { - int32 after = ldg(partitions + i + 1); - if (after != current) end[current] = i + 1; - } - } - } -} - -// We create a local version of subtract, because the tf.subtract kernel -// is not defined for int32. We use it to compute the length of an interval -// by subtracting the endpoints. -__global__ void IntervalLengthKernel(int32* start, int32 size, int32* end) { - CUDA_1D_KERNEL_LOOP(i, size) { - int32 start_point = ldg(start + i); - end[i] = end[i] - start_point; - } -} - -// Initialize out with range start, start + delta, start + 2 * delta, ... -// This is needed because tf.range has no GPU implementation. -template -void RangeInit(const GPUDevice& d, const T start, const T delta, - const int32 size, typename TTypes::Flat out) { - CudaLaunchConfig config = GetCudaLaunchConfig(size, d); - RangeInitKernel< - T><<>>( - start, delta, size, out.data()); -} - -// Partitions is a sorted vector of N non-negative integer numbers. -// This function computes the starting and ending points of each interval -// of values. -void ComputeIntervals(const GPUDevice& d, Tensor* partitions, int32 N, - int32 nump, int32* start_ptr, int32* end_ptr) { - CudaLaunchConfig config = GetCudaLaunchConfig(N, d); - FindEndpointsKernel<<>>(partitions->flat().data(), N, nump, - start_ptr, end_ptr); -} - -// Subtract the ending points of each interval to obtain the interval length. -void ComputeItvLength(const GPUDevice& d, int32 num, int32* start_ptr, - int32* end_ptr) { - CudaLaunchConfig config = GetCudaLaunchConfig(num, d); - IntervalLengthKernel<<>>(start_ptr, num, end_ptr); -} - -template -void CallGatherKernel(const GPUDevice& d, const T* params, const int32* indices, - T* out, int64 gather_dim_size, int64 indices_size, - int64 slice_size, int64 out_size) { - CudaLaunchConfig config = GetCudaLaunchConfig(out_size, d); - GatherOpKernel< - T, int32, - true><<>>( - params, indices, out, gather_dim_size, indices_size, slice_size, - out_size); -} - -} // namespace - -// The current implementation has memory cost on GPU -// I + P + max(3N + R, O + N), where: -// I - the size of the input -// N - the size of the partitions tensor -// R - the temporary storage used by cub::RadixSort, about 2N -// P - the number of partitions -// O - the size of the output -// So roughly the cost is I + P + max(5N, O + N). -template -class DynamicPartitionOpGPU : public AsyncOpKernel { - public: - explicit DynamicPartitionOpGPU(OpKernelConstruction* c) : AsyncOpKernel(c) { - OP_REQUIRES_OK(c, c->GetAttr("num_partitions", &num_partitions_)); - OP_REQUIRES(c, num_partitions_ >= 1, - errors::InvalidArgument("num_partitions must be at least 1")); - } - - void AllocateTempSpace(OpKernelContext* c, int32 N, Tensor* indices_in, - Tensor* partitions_out, Tensor* indices_out, - DoneCallback done) { - int32 M = std::max(N, num_partitions_); - // indices_in will be made slightly larger to accomodate - // later computations. - OP_REQUIRES_OK_ASYNC( - c, c->allocate_temp(DT_INT32, TensorShape({M}), indices_in), done); - OP_REQUIRES_OK_ASYNC( - c, c->allocate_temp(DT_INT32, TensorShape({N}), partitions_out), done); - OP_REQUIRES_OK_ASYNC( - c, c->allocate_temp(DT_INT32, TensorShape({N}), indices_out), done); - } - - void AllocateOutputs(OpKernelContext* c, const Tensor* data, - const Tensor* partitions, const Tensor* partition_count, - OpOutputList* Tout, DoneCallback done) { - auto e_part_count = partition_count->flat(); - // Allocate output tensors of the right size - OP_REQUIRES_OK_ASYNC(c, c->output_list("outputs", Tout), done); - for (int p = 0; p < num_partitions_; p++) { - TensorShape shape; - shape.AddDim(e_part_count(p)); - for (int i = partitions->dims(); i < data->dims(); i++) { - shape.AddDim(data->dim_size(i)); - } - Tensor* out; - OP_REQUIRES_OK_ASYNC(c, Tout->allocate(p, shape, &out), done); - } - } - - void ComputeAsync(OpKernelContext* c, DoneCallback done) { - const Tensor& data = c->input(0); - const Tensor& partitions = c->input(1); - - OP_REQUIRES_ASYNC( - c, TensorShapeUtils::StartsWith(data.shape(), partitions.shape()), - errors::InvalidArgument("data.shape must start with partitions.shape, ", - "got data.shape = ", data.shape().DebugString(), - ", partitions.shape = ", - partitions.shape().DebugString()), - done); - - Tensor partition_count; - - // We must handle the case of empty partitions separately, - // because kernels don't work with 0-sized tensors. - if (partitions.NumElements() == 0) { - AllocatorAttributes alloc_attr; - alloc_attr.set_on_host(true); - OP_REQUIRES_OK_ASYNC( - c, c->allocate_temp(DT_INT32, TensorShape({num_partitions_}), - &partition_count, alloc_attr), - done); - auto e_part_count = partition_count.flat(); - for (int i = 0; i < num_partitions_; i++) e_part_count(i) = 0; - OpOutputList outputs; - this->AllocateOutputs(c, &data, &partitions, &partition_count, &outputs, - done); - if (c->status().ok()) done(); - return; - } - - // Prepare for counting. - OP_REQUIRES_OK_ASYNC( - c, c->allocate_temp(DT_INT32, TensorShape({num_partitions_}), - &partition_count), - done); - Tensor indices_out; - // Count how many times each partition index occurs. - // Also sort the info in partitions and output it in indices_out, - // in preparation for the next step. - this->CountAndSortParts(c, &partitions, &partition_count, &indices_out, - done); - if (!c->status().ok()) return; - - // In order to allocate the output tensor we have to move partition_count - // to CPU. - auto* stream = c->op_device_context()->stream(); - OP_REQUIRES_ASYNC(c, stream, errors::Internal("No GPU stream available."), - done); - Tensor cpu_tensor; - AllocatorAttributes alloc_attr; - alloc_attr.set_on_host(true); - alloc_attr.set_gpu_compatible(true); - OP_REQUIRES_OK_ASYNC( - c, c->allocate_temp(partition_count.dtype(), partition_count.shape(), - &cpu_tensor, alloc_attr), - done); - perftools::gputools::DeviceMemoryBase wrapped( - partition_count.flat().data(), num_partitions_ * sizeof(int32)); - const bool status = - stream - ->ThenMemcpy(cpu_tensor.flat().data(), wrapped, - num_partitions_ * sizeof(int32)) - .ok(); - OP_REQUIRES_ASYNC( - c, status, - errors::Internal("Failed to launch copy from device to host."), done); - - // Keep a reference to partition_count so that the buffer - // is not deallocated at the end of the function, before - // memcpy is completed. - TensorReference partition_ref(partition_count); - auto wrapped_callback = [this, c, &data, &partitions, indices_out, - partition_ref, cpu_tensor, done]() { - OpOutputList outputs; - this->AllocateOutputs(c, &data, &partitions, &cpu_tensor, &outputs, done); - if (!c->status().ok()) { - partition_ref.Unref(); - return; - } - int32 N = partitions.NumElements(); - int64 slice_size = data.NumElements() / N; - this->GatherSlices(c, &data, &indices_out, N, slice_size, outputs); - partition_ref.Unref(); - done(); - }; - - c->device()->tensorflow_gpu_device_info()->event_mgr->ThenExecute( - stream, wrapped_callback); - } - - protected: - void RadixSort(OpKernelContext* c, const Tensor* partitions, - Tensor* indices_in, Tensor* partitions_out, - Tensor* indices_out, DoneCallback done) { - int32 N = partitions->NumElements(); - const GPUDevice& device = c->eigen_device(); - const cudaStream_t& cu_stream = GetCudaStream(c); - - // Initialize the indices_in tensor using the Range GPU kernel. - RangeInit(device, 0, 1, N, indices_in->flat()); - // Obtain the pointers to inner buffers. - const int32* partitions_ptr = partitions->flat().data(); - int32* partitions_out_ptr = partitions_out->flat().data(); - int32* indices_in_ptr = indices_in->flat().data(); - int32* indices_out_ptr = indices_out->flat().data(); - // Determine temporary device storage requirements. - Tensor cub_temp_storage; - size_t temp_storage_bytes = 0; - cub::DeviceRadixSort::SortPairs( - NULL, temp_storage_bytes, partitions_ptr, partitions_out_ptr, - indices_in_ptr, indices_out_ptr, N, 0, sizeof(int32) * 8, cu_stream); - // Allocate temporary storage. - OP_REQUIRES_OK_ASYNC( - c, c->allocate_temp( - DT_INT8, TensorShape({static_cast(temp_storage_bytes)}), - &cub_temp_storage), - done); - // Radix-sort the partition information. - cub::DeviceRadixSort::SortPairs( - cub_temp_storage.flat().data(), temp_storage_bytes, - partitions_ptr, partitions_out_ptr, indices_in_ptr, indices_out_ptr, N, - 0, sizeof(int32) * 8, cu_stream); - } // At this point cub_temp_storage will be marked for deallocation. - - void CountAndSortParts(OpKernelContext* c, const Tensor* partitions, - Tensor* partition_count, Tensor* indices_out, - DoneCallback done) { - const GPUDevice& device = c->eigen_device(); - int32 N = partitions->NumElements(); - Tensor indices_in; - Tensor partitions_out; - - // Allocate memory for Radix-Sort. - this->AllocateTempSpace(c, N, &indices_in, &partitions_out, indices_out, - done); - if (!c->status().ok()) return; - this->RadixSort(c, partitions, &indices_in, &partitions_out, indices_out, - done); - if (!c->status().ok()) return; - // We still need a little bit of additional memory. However, - // we can reuse the indices_in tensor. We could also use atomic - // operations and no additional memory, but this approach seems faster. - - // Zero-out the allocated memory. - functor::SetZeroFunctor zero_functor; - zero_functor(device, partition_count->flat()); - zero_functor(device, indices_in.flat()); - // Obtain the pointers to inner buffers. - int32* start_ptr = indices_in.flat().data(); - int32* end_ptr = partition_count->flat().data(); - // Obtain the starting and ending points of each interval. - ComputeIntervals(device, &partitions_out, N, num_partitions_, start_ptr, - end_ptr); - // Subtract to compute the number of appearances of each id. - ComputeItvLength(device, num_partitions_, start_ptr, end_ptr); - } // At this point indices_in and partitions_out will be marked - // for deallocation. - - void GatherSlices(OpKernelContext* c, const Tensor* data, - const Tensor* indices, int32 N, int64 slice_size, - OpOutputList& outs) { - const GPUDevice& device = c->eigen_device(); - const int32* ind_base = indices->flat().data(); - const T* data_base = data->flat().data(); - - for (int p = 0; p < num_partitions_; p++) { - int32 indices_size = outs[p]->dim_size(0); - int64 out_size = outs[p]->NumElements(); - T* out_base = outs[p]->flat().data(); - if (out_size > 0) - CallGatherKernel(device, data_base, ind_base, out_base, N, - indices_size, slice_size, out_size); - ind_base += indices_size; - } - } - - int num_partitions_; -}; - -#define REGISTER_DYNAMIC_PARTITION_GPU(T) \ - REGISTER_KERNEL_BUILDER( \ - Name("DynamicPartition").Device(DEVICE_GPU).TypeConstraint("T"), \ - DynamicPartitionOpGPU) - -TF_CALL_GPU_NUMBER_TYPES(REGISTER_DYNAMIC_PARTITION_GPU); -TF_CALL_complex64(REGISTER_DYNAMIC_PARTITION_GPU); -TF_CALL_complex128(REGISTER_DYNAMIC_PARTITION_GPU); -#undef REGISTER_DYNAMIC_PARTITION_GPU - -} // namespace tensorflow - -#endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/dynamic_partition_op_test.cc b/tensorflow/core/kernels/dynamic_partition_op_test.cc index 9a7ed0af21..0e8fbc0a67 100644 --- a/tensorflow/core/kernels/dynamic_partition_op_test.cc +++ b/tensorflow/core/kernels/dynamic_partition_op_test.cc @@ -16,7 +16,6 @@ limitations under the License. #include #include -#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/fake_input.h" #include "tensorflow/core/framework/node_def_builder.h" @@ -24,14 +23,10 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/types.pb.h" -#include "tensorflow/core/graph/node_builder.h" -#include "tensorflow/core/graph/testlib.h" #include "tensorflow/core/kernels/ops_testutil.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/lib/random/simple_philox.h" #include "tensorflow/core/platform/test.h" -#include "tensorflow/core/platform/test_benchmark.h" namespace tensorflow { namespace { @@ -158,58 +153,5 @@ TEST_F(DynamicPartitionOpTest, Error_IndexOutOfRange) { << s; } -Node* DynamicPartitionNode(Graph* g, Node* in0, Node* in1, int num_partitions) { - Node* ret; - TF_CHECK_OK(NodeBuilder(g->NewName("n"), "DynamicPartition") - .Input(in0) - .Input(in1) - .Attr("num_partitions", num_partitions) - .Finalize(g, &ret)); - return ret; -} - -template -static Graph* DynamicPartition(int num_partitions, int dim) { - Graph* g = new Graph(OpRegistry::Global()); - // Always use a 128MB buffer. - const int kRows = ((128 << 20) / sizeof(T)) / dim; - Tensor data(DataTypeToEnum::value, TensorShape({kRows, dim})); - data.flat().setRandom(); - - random::PhiloxRandom philox(301, 17); - random::SimplePhilox rnd(&philox); - Tensor partitions(DT_INT32, TensorShape({kRows})); - for (int i = 0; i < kRows; i++) { - partitions.flat()(i) = rnd.Uniform(num_partitions); - } - DynamicPartitionNode(g, test::graph::Constant(g, data), - test::graph::Constant(g, partitions), num_partitions); - return g; -} - -#define BM_DYNAMIC_PARTITION(DEVICE, T, num) \ - static void BM_##DEVICE##_dynpart_##T##_##num(int iters, int dim) { \ - const int64 items = ((128 << 20) / sizeof(T)); \ - const int64 tot = static_cast(iters) * items; \ - testing::ItemsProcessed(tot); \ - testing::UseRealTime(); \ - test::Benchmark(#DEVICE, DynamicPartition(num, dim)).Run(iters); \ - } \ - BENCHMARK(BM_##DEVICE##_dynpart_##T##_##num)->Arg(1)->Arg(256) - -BM_DYNAMIC_PARTITION(cpu, float, 2); -BM_DYNAMIC_PARTITION(cpu, float, 100); -BM_DYNAMIC_PARTITION(cpu, double, 2); -BM_DYNAMIC_PARTITION(cpu, double, 100); -BM_DYNAMIC_PARTITION(cpu, complex64, 2); -BM_DYNAMIC_PARTITION(cpu, complex64, 100); - -BM_DYNAMIC_PARTITION(gpu, float, 2); -BM_DYNAMIC_PARTITION(gpu, float, 100); -BM_DYNAMIC_PARTITION(gpu, double, 2); -BM_DYNAMIC_PARTITION(gpu, double, 100); -BM_DYNAMIC_PARTITION(gpu, complex64, 2); -BM_DYNAMIC_PARTITION(gpu, complex64, 100); - } // namespace } // namespace tensorflow diff --git a/tensorflow/core/kernels/fused_batch_norm_op.cc b/tensorflow/core/kernels/fused_batch_norm_op.cc index 1688674eb7..0ecb829f34 100644 --- a/tensorflow/core/kernels/fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/fused_batch_norm_op.cc @@ -54,20 +54,25 @@ struct FusedBatchNorm { Tensor* batch_var_output, Tensor* saved_mean_output, Tensor* saved_var_output, TensorFormat tensor_format, bool is_training) { + // Currently U is ignored, since we only support the case where T and U are + // both float32. + // TODO(reedwm): Add float16 support, use U, and remove these asserts. + static_assert(std::is_same::value, "T currently must be float."); + static_assert(std::is_same::value, "U currently must be float."); OP_REQUIRES(context, tensor_format == FORMAT_NHWC, errors::Internal("The CPU implementation of FusedBatchNorm " "only supports NHWC tensor format for now.")); typename TTypes::ConstTensor x(x_input.tensor()); - typename TTypes::ConstVec scale(scale_input.vec()); - typename TTypes::ConstVec offset(offset_input.vec()); - typename TTypes::ConstVec estimated_mean(estimated_mean_input.vec()); - typename TTypes::ConstVec estimated_variance( - estimated_variance_input.vec()); + typename TTypes::ConstVec scale(scale_input.vec()); + typename TTypes::ConstVec offset(offset_input.vec()); + typename TTypes::ConstVec estimated_mean(estimated_mean_input.vec()); + typename TTypes::ConstVec estimated_variance( + estimated_variance_input.vec()); typename TTypes::Tensor y(y_output->tensor()); - typename TTypes::Vec batch_mean(batch_mean_output->vec()); - typename TTypes::Vec batch_var(batch_var_output->vec()); - typename TTypes::Vec saved_mean(saved_mean_output->vec()); - typename TTypes::Vec saved_var(saved_var_output->vec()); + typename TTypes::Vec batch_mean(batch_mean_output->vec()); + typename TTypes::Vec batch_var(batch_var_output->vec()); + typename TTypes::Vec saved_mean(saved_mean_output->vec()); + typename TTypes::Vec saved_var(saved_var_output->vec()); const CPUDevice& d = context->eigen_device(); @@ -88,15 +93,15 @@ struct FusedBatchNorm { bcast_spec.set(0, rest_size); #endif - auto x_rest_by_depth = x.reshape(rest_by_depth).template cast(); + auto x_rest_by_depth = x.reshape(rest_by_depth); const int rest_size_minus_one = (rest_size > 1) ? (rest_size - 1) : 1; - U rest_size_inv = static_cast(1.0f / static_cast(rest_size)); + T rest_size_inv = static_cast(1.0f / static_cast(rest_size)); // This adjustment is for Bessel's correction - U rest_size_adjust = - static_cast(rest_size) / static_cast(rest_size_minus_one); + T rest_size_adjust = + static_cast(rest_size) / static_cast(rest_size_minus_one); - Eigen::Tensor mean(depth); - Eigen::Tensor variance(depth); + Eigen::Tensor mean(depth); + Eigen::Tensor variance(depth); if (is_training) { mean.device(d) = (x_rest_by_depth.sum(reduce_dims) * rest_size_inv); batch_mean.device(d) = mean; @@ -124,7 +129,7 @@ struct FusedBatchNorm { auto x_shifted = x_scaled + offset.reshape(one_by_depth).broadcast(bcast_spec); - y.reshape(rest_by_depth).device(d) = x_shifted.template cast(); + y.reshape(rest_by_depth).device(d) = x_shifted; } }; @@ -133,7 +138,7 @@ struct FusedBatchNormGrad { void operator()(OpKernelContext* context, const Tensor& y_backprop_input, const Tensor& x_input, const Tensor& scale_input, const Tensor& mean_input, const Tensor& variance_input, - U epsilon, Tensor* x_backprop_output, + T epsilon, Tensor* x_backprop_output, Tensor* scale_backprop_output, Tensor* offset_backprop_output, TensorFormat tensor_format) { OP_REQUIRES(context, tensor_format == FORMAT_NHWC, @@ -142,12 +147,12 @@ struct FusedBatchNormGrad { typename TTypes::ConstTensor y_backprop( y_backprop_input.tensor()); typename TTypes::ConstTensor x(x_input.tensor()); - typename TTypes::ConstVec scale(scale_input.vec()); - typename TTypes::ConstVec mean(mean_input.vec()); - typename TTypes::ConstVec variance(variance_input.vec()); + typename TTypes::ConstVec scale(scale_input.vec()); + typename TTypes::ConstVec mean(mean_input.vec()); + typename TTypes::ConstVec variance(variance_input.vec()); typename TTypes::Tensor x_backprop(x_backprop_output->tensor()); - typename TTypes::Vec scale_backprop(scale_backprop_output->vec()); - typename TTypes::Vec offset_backprop(offset_backprop_output->vec()); + typename TTypes::Vec scale_backprop(scale_backprop_output->vec()); + typename TTypes::Vec offset_backprop(offset_backprop_output->vec()); // Note: the following formulas are used to compute the gradients for // back propagation. @@ -176,8 +181,8 @@ struct FusedBatchNormGrad { bcast_spec.set(0, rest_size); #endif - auto x_rest_by_depth = x.reshape(rest_by_depth).template cast(); - U rest_size_inv = static_cast(1.0f / static_cast(rest_size)); + auto x_rest_by_depth = x.reshape(rest_by_depth); + T rest_size_inv = static_cast(1.0f / static_cast(rest_size)); auto x_mean_rest_by_depth = mean.reshape(one_by_depth).broadcast(bcast_spec); @@ -187,8 +192,7 @@ struct FusedBatchNormGrad { coef0.eval().reshape(one_by_depth).broadcast(bcast_spec); auto x_scaled = x_centered * coef0_rest_by_depth; - auto y_backprop_rest_by_depth = - y_backprop.eval().reshape(rest_by_depth).template cast(); + auto y_backprop_rest_by_depth = y_backprop.eval().reshape(rest_by_depth); scale_backprop.device(d) = (y_backprop_rest_by_depth * x_scaled).sum(reduce_dims); auto y_backprop_sum = y_backprop_rest_by_depth.sum(reduce_dims); @@ -210,7 +214,7 @@ struct FusedBatchNormGrad { .reshape(one_by_depth) .broadcast(bcast_spec); x_backprop.reshape(rest_by_depth).device(d) = - (coef1 * (y_backprop_centered - x_centered * coef2)).template cast(); + coef1 * (y_backprop_centered - x_centered * coef2); } }; @@ -685,18 +689,6 @@ REGISTER_KERNEL_BUILDER(Name("FusedBatchNormGradV2") .TypeConstraint("U"), FusedBatchNormGradOp); -REGISTER_KERNEL_BUILDER(Name("FusedBatchNormV2") - .Device(DEVICE_CPU) - .TypeConstraint("T") - .TypeConstraint("U"), - FusedBatchNormOp); - -REGISTER_KERNEL_BUILDER(Name("FusedBatchNormGradV2") - .Device(DEVICE_CPU) - .TypeConstraint("T") - .TypeConstraint("U"), - FusedBatchNormGradOp); - #if GOOGLE_CUDA REGISTER_KERNEL_BUILDER( diff --git a/tensorflow/core/kernels/fused_batch_norm_op.h b/tensorflow/core/kernels/fused_batch_norm_op.h index 3af104bf95..38b24d7011 100644 --- a/tensorflow/core/kernels/fused_batch_norm_op.h +++ b/tensorflow/core/kernels/fused_batch_norm_op.h @@ -92,28 +92,26 @@ struct FusedBatchNormFreezeGrad { // offset_backprop = sum(y_backprop) // scale_backprop = y_backprop * ((x - pop_mean) * rsqrt(pop_var + epsilon)) // x_backprop = y_backprop * (scale * rsqrt(pop_var + epsilon)) - - auto y_backprop_rest_by_depth = - y_backprop.reshape(rest_by_depth).template cast(); - auto input_rest_by_depth = input.reshape(rest_by_depth).template cast(); - - offset_backprop.device(d) = y_backprop_rest_by_depth.sum(reduction_axis); + offset_backprop.device(d) = y_backprop.reshape(rest_by_depth) + .template cast() + .sum(reduction_axis); // scratch1 = rsqrt(pop_var + epsilon) scratch1.device(d) = (pop_var + pop_var.constant(epsilon)).rsqrt(); // scratch2 = sum(y_backprop * (x - mean)) scratch2.device(d) = - (y_backprop_rest_by_depth * - (input_rest_by_depth - + (y_backprop.reshape(rest_by_depth).template cast() * + (input.reshape(rest_by_depth).template cast() - pop_mean.reshape(one_by_depth).broadcast(rest_by_one))) .sum(reduction_axis); x_backprop.reshape(rest_by_depth).device(d) = - (y_backprop_rest_by_depth * ((scratch1 * scale) - .eval() - .reshape(one_by_depth) - .broadcast(rest_by_one))) + (y_backprop.reshape(rest_by_depth).template cast() * + ((scratch1 * scale) + .eval() + .reshape(one_by_depth) + .broadcast(rest_by_one))) .template cast(); scale_backprop.device(d) = scratch2 * scratch1; } diff --git a/tensorflow/core/kernels/lmdb_reader_op.cc b/tensorflow/core/kernels/lmdb_reader_op.cc index 31a427f2c9..3bb07301b5 100755 --- a/tensorflow/core/kernels/lmdb_reader_op.cc +++ b/tensorflow/core/kernels/lmdb_reader_op.cc @@ -36,7 +36,7 @@ class LMDBReader : public ReaderBase { Status OnWorkStartedLocked() override { MDB_CHECK(mdb_env_create(&mdb_env_)); - int flags = MDB_RDONLY | MDB_NOTLS | MDB_NOLOCK; + int flags = MDB_RDONLY | MDB_NOTLS; // Check if the LMDB filename is actually a file instead of a directory. // If so, set appropriate flags so we can open it. @@ -57,13 +57,10 @@ class LMDBReader : public ReaderBase { if (mdb_env_ != nullptr) { if (mdb_cursor_) { mdb_cursor_close(mdb_cursor_); - mdb_cursor_ = nullptr; } - mdb_dbi_close(mdb_env_, mdb_dbi_); mdb_txn_abort(mdb_txn_); + mdb_dbi_close(mdb_env_, mdb_dbi_); mdb_env_close(mdb_env_); - mdb_txn_ = nullptr; - mdb_dbi_ = 0; mdb_env_ = nullptr; } return Status::OK(); diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc index 157ce106ce..e2cf605811 100644 --- a/tensorflow/core/kernels/maxpooling_op.cc +++ b/tensorflow/core/kernels/maxpooling_op.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/kernels/maxpooling_op.h" #include +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" @@ -33,11 +34,9 @@ limitations under the License. #include "tensorflow/core/kernels/pooling_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" -#include "tensorflow/core/util/env_var.h" #include "tensorflow/core/util/padding.h" #include "tensorflow/core/util/tensor_format.h" #include "tensorflow/core/util/use_cudnn.h" -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #if GOOGLE_CUDA #include "tensorflow/core/kernels/maxpooling_op_gpu.h" @@ -359,7 +358,6 @@ class MaxPoolingGradOp : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); use_dnn_ = CanUseCudnn(); - ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); } void Compute(OpKernelContext* context) override { @@ -407,7 +405,7 @@ class MaxPoolingGradOp : public OpKernel { DnnPoolingGradOp::Compute( context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize, stride, padding_, data_format_, &tensor_in, &tensor_out, out_backprop, - output_shape, propagate_nans_); + output_shape); } else { CHECK(data_format_ == FORMAT_NHWC) << "Non-Cudnn MaxPoolGrad only supports NHWC format"; @@ -422,7 +420,6 @@ class MaxPoolingGradOp : public OpKernel { Padding padding_; TensorFormat data_format_; bool use_dnn_; - bool propagate_nans_; }; #endif // GOOGLE_CUDA @@ -887,8 +884,6 @@ class MaxPoolingWithArgmaxOp : public OpKernel { OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1, errors::Unimplemented( "Pooling is not yet supported on the batch dimension.")); - - ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); } void Compute(OpKernelContext* context) override { @@ -907,15 +902,14 @@ class MaxPoolingWithArgmaxOp : public OpKernel { Tensor* argmax = nullptr; OP_REQUIRES_OK(context, context->allocate_output(1, out_shape, &argmax)); - LaunchMaxPoolingWithArgmax::launch( - context, params, tensor_in, output, argmax, propagate_nans_); + LaunchMaxPoolingWithArgmax::launch(context, params, tensor_in, + output, argmax); } private: std::vector ksize_; std::vector stride_; Padding padding_; - bool propagate_nans_; }; template @@ -1051,8 +1045,6 @@ class MaxPoolingNoMaskOp : public OpKernel { errors::Unimplemented( "Pooling is not yet supported on the batch dimension.")); use_dnn_ = CanUseCudnn(); - - ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); } void Compute(OpKernelContext* context) override { @@ -1076,10 +1068,9 @@ class MaxPoolingNoMaskOp : public OpKernel { // These is_int8x4 checks avoid linker errors for missing qint8 kernels. if (!is_int8x4 && use_dnn_ && data_format_ == FORMAT_NCHW) { - DnnPoolingOp::Compute(context, - perftools::gputools::dnn::PoolingMode::kMaximum, - ksize_, stride_, padding_, data_format_, - tensor_in, out_shape, propagate_nans_); + DnnPoolingOp::Compute( + context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize_, + stride_, padding_, data_format_, tensor_in, out_shape); } else { Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output)); @@ -1088,7 +1079,7 @@ class MaxPoolingNoMaskOp : public OpKernel { tensor_in, output); } else if (data_format_ == FORMAT_NHWC) { LaunchMaxPoolingNoMask::launch(context, params, tensor_in, - output, propagate_nans_); + output); } else { LOG(FATAL) << "MaxPool currently only supports the following (layout, " "type) combinations: (NHWC, non-qint8), " @@ -1107,7 +1098,6 @@ class MaxPoolingNoMaskOp : public OpKernel { Padding padding_; TensorFormat data_format_; bool use_dnn_; - bool propagate_nans_; }; template @@ -1137,7 +1127,6 @@ class MaxPoolingNoMaskV2Op : public OpKernel { } OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); use_dnn_ = CanUseCudnn(); - ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); } void Compute(OpKernelContext* context) override { @@ -1179,17 +1168,16 @@ class MaxPoolingNoMaskV2Op : public OpKernel { ShapeFromFormat(data_format_, params.tensor_in_batch, params.out_height, params.out_width, params.depth); if (use_dnn_ && data_format_ == FORMAT_NCHW) { - DnnPoolingOp::Compute(context, - perftools::gputools::dnn::PoolingMode::kMaximum, - ksize, stride, padding_, data_format_, tensor_in, - out_shape, propagate_nans_); + DnnPoolingOp::Compute( + context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize, + stride, padding_, data_format_, tensor_in, out_shape); } else { CHECK(data_format_ == FORMAT_NHWC) << "Non-Cudnn MaxPool only supports NHWC format"; Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output)); LaunchMaxPoolingNoMask::launch(context, params, tensor_in, - output, propagate_nans_); + output); } } @@ -1199,20 +1187,18 @@ class MaxPoolingNoMaskV2Op : public OpKernel { Padding padding_; TensorFormat data_format_; bool use_dnn_; - bool propagate_nans_; }; template struct LaunchMaxPoolingNoMask { static void launch(OpKernelContext* context, const PoolParameters& params, - const Tensor& input, Tensor* output, bool propagate_nans) { + const Tensor& input, Tensor* output) { bool status = functor::MaxPoolForwardWithOptionalArgmax()( input.flat().data(), params.tensor_in_batch, params.tensor_in_rows, params.tensor_in_cols, params.depth, params.out_height, params.out_width, params.window_rows, params.window_cols, params.row_stride, params.col_stride, params.pad_rows, params.pad_cols, - output->flat().data(), nullptr, context->eigen_gpu_device(), - propagate_nans); + output->flat().data(), nullptr, context->eigen_gpu_device()); if (!status) { context->SetStatus( errors::Internal("Failed launching MaxPoolForwardNoMask")); @@ -1223,8 +1209,7 @@ struct LaunchMaxPoolingNoMask { template struct LaunchMaxPoolingWithArgmax { static void launch(OpKernelContext* context, const PoolParameters& params, - const Tensor& input, Tensor* output, Tensor* argmax, - bool propagate_nans) { + const Tensor& input, Tensor* output, Tensor* argmax) { bool status = functor::MaxPoolForwardWithOptionalArgmax()( input.flat().data(), params.tensor_in_batch, params.tensor_in_rows, params.tensor_in_cols, params.depth, params.out_height, @@ -1232,7 +1217,7 @@ struct LaunchMaxPoolingWithArgmax { params.row_stride, params.col_stride, params.pad_rows, params.pad_cols, output->flat().data(), reinterpret_cast(argmax->flat().data()), - context->eigen_gpu_device(), propagate_nans); + context->eigen_gpu_device()); if (!status) { context->SetStatus( errors::Internal("Failed launching MaxPoolForwardWithArgmax")); diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc index d96b844383..26f5274804 100644 --- a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc +++ b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc @@ -29,15 +29,6 @@ limitations under the License. namespace tensorflow { namespace { -template -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool IsGreaterThan(dtype a, dtype b) { - if (propagate_nans) { - return !(a <= b); - } else { - return a > b; - } -} - // This is Yangqing's custom kernel for the maxpooling operation. There are // three functions: MaxPoolForwardNCHW and MaxPoolForwardNHWC are the two // forward functions, dealing with the forward case. MaxPoolBackward is the @@ -60,7 +51,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool IsGreaterThan(dtype a, dtype b) { // const int output_size = batch * channels * pooled_height * pooled_width; // MaxPoolForwardNCHW<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, // kThreadsPerBlock, 0, cuda_stream>>>(...); -template +template __global__ void MaxPoolForwardNCHW(const int nthreads, const dtype* bottom_data, const int channels, const int height, const int width, const int pooled_height, @@ -86,7 +77,7 @@ __global__ void MaxPoolForwardNCHW(const int nthreads, const dtype* bottom_data, for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { int idx = c * height * width + h * width + w; - if (IsGreaterThan(bottom_data_n[idx], maxval)) { + if (bottom_data_n[idx] > maxval) { maxidx = idx; maxval = bottom_data_n[idx]; } @@ -135,7 +126,7 @@ __global__ void MaxPoolForwardNoMaskKernel_NCHW_VECT_C( } } -template +template __global__ void MaxPoolForwardNHWC(const int nthreads, const dtype* bottom_data, const int height, const int width, const int channels, const int pooled_height, @@ -162,7 +153,7 @@ __global__ void MaxPoolForwardNHWC(const int nthreads, const dtype* bottom_data, for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { int idx = (h * width + w) * channels + c; - if (IsGreaterThan(bottom_data_n[idx], maxval)) { + if (bottom_data_n[idx] > maxval) { maxidx = idx; maxval = bottom_data_n[idx]; } @@ -399,24 +390,15 @@ bool MaxPoolForwardWithOptionalArgmax::operator()( const int channels, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_t, const int pad_l, T* top_data, - int64* mask, const Eigen::GpuDevice& d, bool propagate_nans) { + int64* mask, const Eigen::GpuDevice& d) { const int kThreadsPerBlock = 1024; const int output_size = batch * channels * pooled_height * pooled_width; - if (propagate_nans) { - MaxPoolForwardNHWC - <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, - kThreadsPerBlock, 0, d.stream()>>> - (output_size, bottom_data, height, width, channels, pooled_height, - pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, - top_data, mask); - } else { - MaxPoolForwardNHWC - <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, - kThreadsPerBlock, 0, d.stream()>>> - (output_size, bottom_data, height, width, channels, pooled_height, - pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, - top_data, mask); - } + + MaxPoolForwardNHWC<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, + kThreadsPerBlock, 0, d.stream()>>>( + output_size, bottom_data, height, width, channels, pooled_height, + pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, + top_data, mask); return d.ok(); } diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.h b/tensorflow/core/kernels/maxpooling_op_gpu.h index 38ebb34248..34203797cf 100644 --- a/tensorflow/core/kernels/maxpooling_op_gpu.h +++ b/tensorflow/core/kernels/maxpooling_op_gpu.h @@ -39,7 +39,7 @@ struct MaxPoolForwardWithOptionalArgmax { const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_t, const int pad_l, T* top_data, int64* mask, - const Eigen::GpuDevice& d, bool propagate_nans); + const Eigen::GpuDevice& d); }; struct MaxPoolForwardNoMask_NCHW_VECT_C { diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc index f291281108..9080bf7be8 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc @@ -45,12 +45,12 @@ limitations under the License. #ifdef INTEL_MKL_DNN #include "mkldnn.hpp" -using mkldnn::stream; using mkldnn::prop_kind; +using mkldnn::stream; -using mkldnn::convolution_forward; using mkldnn::convolution_backward_weights; using mkldnn::convolution_direct; +using mkldnn::convolution_forward; #endif @@ -463,12 +463,13 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel { // Generate input shapes. TensorShape filter_shape; - OP_REQUIRES(context, TensorShapeUtils::IsVector(filter_tensor.shape()), - errors::InvalidArgument( + OP_REQUIRES( + context, TensorShapeUtils::IsVector(filter_tensor.shape()), + errors::InvalidArgument( "Conv2DBackpropFilter: filter_sizes input must be 1-dim, not ", filter_tensor.dims())); OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape( - filter_tensor.vec(), &filter_shape)); + filter_tensor.vec(), &filter_shape)); TensorShape input_shape = input_tensor.shape(); TensorShape obp_shape = obp_tensor.shape(); @@ -480,27 +481,26 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel { // Get forward convolution parameters. MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); - conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape, - &fwd_input_dims, &fwd_filter_dims, - &strides, - &fwd_output_dims_tf_order, - &fwd_output_dims, - &padding_l, &padding_r); + conv_utl.GetConvFwdSizesInMklOrder( + input_shape, filter_shape, &fwd_input_dims, &fwd_filter_dims, + &strides, &fwd_output_dims_tf_order, &fwd_output_dims, &padding_l, + &padding_r); if (!context->status().ok()) return; // Create Convolution forward descriptor since Convolution backward // API needs it. For that, we first need to create input, filter // and output memory descriptors. auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_); - auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType(), - mkl_data_format); - auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType(), - memory::format::hwio); - auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType(), - mkl_data_format); - auto fwd_desc = convolution_forward::desc(prop_kind::forward, - convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md, - strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_)); + auto fwd_src_md = + memory::desc(fwd_input_dims, MklDnnType(), mkl_data_format); + auto fwd_filter_md = + memory::desc(fwd_filter_dims, MklDnnType(), memory::format::hwio); + auto fwd_out_md = + memory::desc(fwd_output_dims, MklDnnType(), mkl_data_format); + auto fwd_desc = convolution_forward::desc( + prop_kind::forward, convolution_direct, fwd_src_md, fwd_filter_md, + fwd_out_md, strides, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine); // Allocate output tensor and shape @@ -537,23 +537,22 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel { output.SetOpMemDesc(bwd_output_dims, memory::format::any); // Create convolution backward weights primitive. - auto bwd_desc = convolution_backward_weights::desc(convolution_direct, - input.GetOpMemDesc(), output.GetOpMemDesc(), - outbackprop.GetOpMemDesc(), strides, padding_l, - padding_r, TFPaddingToMklDnnPadding(padding_)); + auto bwd_desc = convolution_backward_weights::desc( + convolution_direct, input.GetOpMemDesc(), output.GetOpMemDesc(), + outbackprop.GetOpMemDesc(), strides, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); - auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc, - cpu_engine, - fwd_pd); + auto bwd_pd = convolution_backward_weights::primitive_desc( + bwd_desc, cpu_engine, fwd_pd); PrepareAndExecutePrimitive(bwd_pd, &input, &outbackprop, &output); - } catch (mkldnn::error &e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + - ", in file " + string(__FILE__) + ":" + - std::to_string(__LINE__); - OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:", - error_msg)); + } catch (mkldnn::error& e) { + string error_msg = "Status: " + std::to_string(e.status) + + ", message: " + string(e.message) + ", in file " + + string(__FILE__) + ":" + std::to_string(__LINE__); + OP_REQUIRES_OK( + context, + errors::Aborted("Operation received an exception:", error_msg)); } } @@ -564,9 +563,8 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel { // Prepare and execute net - checks for input and output reorders. void PrepareAndExecutePrimitive( - const convolution_backward_weights::primitive_desc& conv_pd, - MklDnnData* input, MklDnnData* obp, - MklDnnData* output) { + const convolution_backward_weights::primitive_desc& conv_pd, + MklDnnData* input, MklDnnData* obp, MklDnnData* output) { // Create reorders between user layout and MKL layout if it is needed and // add it to the net before convolution. std::vector net; @@ -577,10 +575,10 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel { // output side, we will prepare reorder primitive in case output // reorder to user memory is required. bool output_reorder_required = output->PrepareReorderToUserMemIfReq( - conv_pd.diff_weights_primitive_desc()); + conv_pd.diff_weights_primitive_desc()); - net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(), - obp->GetOpMem(), output->GetOpMem())); + net.push_back(convolution_backward_weights( + conv_pd, input->GetOpMem(), obp->GetOpMem(), output->GetOpMem())); // Insert reorder primitive in the net for output reorder if reorder is // required. diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc index 4a47d0463e..4b6bf92e42 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc @@ -23,6 +23,8 @@ limitations under the License. #define EIGEN_USE_THREADS #include #include +#include "mkl_dnn.h" +#include "mkl_dnn_types.h" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" @@ -41,18 +43,16 @@ limitations under the License. #include "tensorflow/core/util/tensor_format.h" #include "tensorflow/core/util/use_cudnn.h" #include "tensorflow/core/util/work_sharder.h" -#include "mkl_dnn.h" -#include "mkl_dnn_types.h" #ifdef INTEL_MKL_DNN #include "mkldnn.hpp" -using mkldnn::stream; using mkldnn::prop_kind; +using mkldnn::stream; -using mkldnn::convolution_forward; -using mkldnn::convolution_direct; using mkldnn::convolution_backward_data; +using mkldnn::convolution_direct; +using mkldnn::convolution_forward; #endif namespace tensorflow { @@ -397,12 +397,13 @@ class MklConv2DCustomBackpropInputOp : public OpKernel { // Generate input shape. TensorShape input_shape; - OP_REQUIRES(context, TensorShapeUtils::IsVector(input_tensor.shape()), - errors::InvalidArgument( + OP_REQUIRES( + context, TensorShapeUtils::IsVector(input_tensor.shape()), + errors::InvalidArgument( "Conv2DBackpropInput: input_sizes input must be 1-dim, not ", input_tensor.dims())); OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape( - input_tensor.vec(), &input_shape)); + input_tensor.vec(), &input_shape)); TensorShape filter_shape = filter_tensor.shape(); TensorShape obp_shape = obp_tensor.shape(); @@ -414,27 +415,26 @@ class MklConv2DCustomBackpropInputOp : public OpKernel { // Get forward convolution parameters. MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); - conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape, - &fwd_input_dims, &fwd_filter_dims, - &strides, - &fwd_output_dims_tf_order, - &fwd_output_dims, - &padding_l, &padding_r); + conv_utl.GetConvFwdSizesInMklOrder( + input_shape, filter_shape, &fwd_input_dims, &fwd_filter_dims, + &strides, &fwd_output_dims_tf_order, &fwd_output_dims, &padding_l, + &padding_r); if (!context->status().ok()) return; // Create Convolution forward descriptor since Convolution backward // API needs it. For that, we first need to create input, filter // and output memory descriptors. auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_); - auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType(), - mkl_data_format); - auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType(), - memory::format::hwio); - auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType(), - mkl_data_format); - auto fwd_desc = convolution_forward::desc(prop_kind::forward, - convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md, - strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_)); + auto fwd_src_md = + memory::desc(fwd_input_dims, MklDnnType(), mkl_data_format); + auto fwd_filter_md = + memory::desc(fwd_filter_dims, MklDnnType(), memory::format::hwio); + auto fwd_out_md = + memory::desc(fwd_output_dims, MklDnnType(), mkl_data_format); + auto fwd_desc = convolution_forward::desc( + prop_kind::forward, convolution_direct, fwd_src_md, fwd_filter_md, + fwd_out_md, strides, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine); // Allocate output tensor and shape @@ -475,23 +475,22 @@ class MklConv2DCustomBackpropInputOp : public OpKernel { output.SetOpMemDesc(bwd_output_dims, memory::format::any); // Create convolution backward data primitive. - auto bwd_desc = convolution_backward_data::desc(convolution_direct, - output.GetOpMemDesc(), filter.GetOpMemDesc(), - outbackprop.GetOpMemDesc(), strides, padding_l, - padding_r, TFPaddingToMklDnnPadding(padding_)); + auto bwd_desc = convolution_backward_data::desc( + convolution_direct, output.GetOpMemDesc(), filter.GetOpMemDesc(), + outbackprop.GetOpMemDesc(), strides, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); - auto bwd_pd = convolution_backward_data::primitive_desc(bwd_desc, - cpu_engine, - fwd_pd); + auto bwd_pd = convolution_backward_data::primitive_desc( + bwd_desc, cpu_engine, fwd_pd); PrepareAndExecutePrimitive(bwd_pd, &filter, &outbackprop, &output); - } catch (mkldnn::error &e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + - ", in file " + string(__FILE__) + ":" + - std::to_string(__LINE__); - OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:", - error_msg)); + } catch (mkldnn::error& e) { + string error_msg = "Status: " + std::to_string(e.status) + + ", message: " + string(e.message) + ", in file " + + string(__FILE__) + ":" + std::to_string(__LINE__); + OP_REQUIRES_OK( + context, + errors::Aborted("Operation received an exception:", error_msg)); } } @@ -502,9 +501,8 @@ class MklConv2DCustomBackpropInputOp : public OpKernel { // Prepare and execute net - checks for input and output reorders. void PrepareAndExecutePrimitive( - const convolution_backward_data::primitive_desc& conv_pd, - MklDnnData* filter, MklDnnData* obp, - MklDnnData* output) { + const convolution_backward_data::primitive_desc& conv_pd, + MklDnnData* filter, MklDnnData* obp, MklDnnData* output) { // Create reorders between user layout and MKL layout if it is needed and // add it to the net before convolution. std::vector net; @@ -514,11 +512,11 @@ class MklConv2DCustomBackpropInputOp : public OpKernel { // Memory for output of convolution. Since we may need reorder on the // output side, we will prepare reorder primitive in case output // reorder to user memory is required. - bool output_reorder_required = output->PrepareReorderToUserMemIfReq( - conv_pd.diff_src_primitive_desc()); + bool output_reorder_required = + output->PrepareReorderToUserMemIfReq(conv_pd.diff_src_primitive_desc()); - net.push_back(convolution_backward_data(conv_pd, obp->GetOpMem(), - filter->GetOpMem(), output->GetOpMem())); + net.push_back(convolution_backward_data( + conv_pd, obp->GetOpMem(), filter->GetOpMem(), output->GetOpMem())); // Insert reorder primitive in the net for output reorder if reorder is // required. diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index a9872b8d6d..369f632fb4 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -18,8 +18,8 @@ limitations under the License. #include #include -#include #include +#include #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" @@ -46,11 +46,11 @@ limitations under the License. #ifdef INTEL_MKL_DNN #include "mkldnn.hpp" -using mkldnn::stream; using mkldnn::prop_kind; +using mkldnn::stream; -using mkldnn::convolution_forward; using mkldnn::convolution_direct; +using mkldnn::convolution_forward; #endif namespace tensorflow { @@ -523,19 +523,16 @@ class MklConv2DOp : public OpKernel { // Get shapes of input tensors in MKL-DNN order MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); - conv_utl.GetConvFwdSizesInMklOrder(src_tensor.shape(), - filter_tensor.shape(), - &src_dims, &filter_dims, &strides, - &output_dims_tf_order, - &output_dims_mkl_order, &padding_l, - &padding_r); + conv_utl.GetConvFwdSizesInMklOrder( + src_tensor.shape(), filter_tensor.shape(), &src_dims, &filter_dims, + &strides, &output_dims_tf_order, &output_dims_mkl_order, &padding_l, + &padding_r); if (!context->status().ok()) return; // Check for corner case - if there is nothing to compute, return. - TensorShape tf_output_shape({output_dims_tf_order[0], - output_dims_tf_order[1], - output_dims_tf_order[2], - output_dims_tf_order[3]}); + TensorShape tf_output_shape( + {output_dims_tf_order[0], output_dims_tf_order[1], + output_dims_tf_order[2], output_dims_tf_order[3]}); Tensor* output_tensor = nullptr; MklShape mkl_output_mkl_shape; mkl_output_mkl_shape.SetMklTensor(false); @@ -572,13 +569,13 @@ class MklConv2DOp : public OpKernel { // the layout is Tensorflow's layout (NHWC or NCHW depending on data // format). src.SetUsrMem(src_dims, TFDataFormatToMklDnnDataFormat(data_format_), - const_cast(static_cast( - src_tensor.flat().data()))); + const_cast( + static_cast(src_tensor.flat().data()))); // Although filter shape (filter_dims) required is in MKL-DNN order, // the layout is Tensorflow's layout (HWIO). filter.SetUsrMem(filter_dims, memory::format::hwio, const_cast(static_cast( - filter_tensor.flat().data()))); + filter_tensor.flat().data()))); // Although output shape (output_dims) required is in MKL-DNN order, // layout is Tensorflow's layout (NHWC or NCHW depending on data format). output.SetUsrMem(output_dims_mkl_order, @@ -598,36 +595,36 @@ class MklConv2DOp : public OpKernel { const Tensor& bias_tensor = MklGetInput(context, 2); bias.SetUsrMem(bias_size, memory::format::x, const_cast(static_cast( - bias_tensor.flat().data()))); + bias_tensor.flat().data()))); bias.SetOpMemDesc(bias_size, memory::format::any); // Create convolution primitive with Bias. - auto conv_desc = convolution_forward::desc(prop_kind::forward, - convolution_direct, src.GetOpMemDesc(), filter.GetOpMemDesc(), - bias.GetOpMemDesc(), output.GetOpMemDesc(), strides, - padding_l, padding_r, TFPaddingToMklDnnPadding(padding_)); + auto conv_desc = convolution_forward::desc( + prop_kind::forward, convolution_direct, src.GetOpMemDesc(), + filter.GetOpMemDesc(), bias.GetOpMemDesc(), output.GetOpMemDesc(), + strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_)); - auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc, - cpu_engine); + auto conv_prim_desc = + convolution_forward::primitive_desc(conv_desc, cpu_engine); PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output); } else { // Create convolution primitive without Bias. - auto conv_desc = convolution_forward::desc(prop_kind::forward, - convolution_direct, src.GetOpMemDesc(), filter.GetOpMemDesc(), - output.GetOpMemDesc(), strides, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)); + auto conv_desc = convolution_forward::desc( + prop_kind::forward, convolution_direct, src.GetOpMemDesc(), + filter.GetOpMemDesc(), output.GetOpMemDesc(), strides, padding_l, + padding_r, TFPaddingToMklDnnPadding(padding_)); - auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc, - cpu_engine); + auto conv_prim_desc = + convolution_forward::primitive_desc(conv_desc, cpu_engine); PrepareAndExecuteNet(conv_prim_desc, &src, &filter, nullptr, &output); } - } catch (mkldnn::error &e) { + } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + std::string(e.message) + - ", in file " + std::string(__FILE__) + ":" + - std::to_string(__LINE__); - OP_REQUIRES_OK(context, - errors::Aborted("Operation received an exception:", error_msg)); + ", message: " + std::string(e.message) + ", in file " + + std::string(__FILE__) + ":" + std::to_string(__LINE__); + OP_REQUIRES_OK( + context, + errors::Aborted("Operation received an exception:", error_msg)); } } @@ -638,9 +635,9 @@ class MklConv2DOp : public OpKernel { // Prepare and execute net - checks for input and output reorders. void PrepareAndExecuteNet( - const convolution_forward::primitive_desc& conv_prim_desc, - MklDnnData* src, MklDnnData* filter, - MklDnnData* bias, MklDnnData* output) { + const convolution_forward::primitive_desc& conv_prim_desc, + MklDnnData* src, MklDnnData* filter, MklDnnData* bias, + MklDnnData* output) { // Create reorders between user layout and MKL layout if it is needed and // add it to the net before convolution. std::vector net; @@ -651,18 +648,19 @@ class MklConv2DOp : public OpKernel { // output side, we will prepare reorder primitive in case output // reorder to user memory is required. bool output_reorder_required = output->PrepareReorderToUserMemIfReq( - conv_prim_desc.dst_primitive_desc()); + conv_prim_desc.dst_primitive_desc()); // Create convolution primitive and add it to net. if (bias) { CHECK_EQ(biasEnabled, true); net.push_back(convolution_forward(conv_prim_desc, src->GetOpMem(), - filter->GetOpMem(), bias->GetOpMem(), - output->GetOpMem())); + filter->GetOpMem(), bias->GetOpMem(), + output->GetOpMem())); } else { CHECK_EQ(biasEnabled, false); net.push_back(convolution_forward(conv_prim_desc, src->GetOpMem(), - filter->GetOpMem(), output->GetOpMem())); + filter->GetOpMem(), + output->GetOpMem())); } // Insert reorder primitive in the net for output reorder if reorder is diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h index f0cb37f8a4..e29af19ca9 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.h +++ b/tensorflow/core/kernels/mkl_conv_ops.h @@ -16,8 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_ #define TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_ -#include #include +#include #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" @@ -26,8 +26,8 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_slice.h" #include "tensorflow/core/kernels/bounds_check.h" -#include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/kernels/conv_grad_ops.h" +#include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/lib/strings/numbers.h" @@ -49,15 +49,15 @@ namespace tensorflow { class MklDnnConvUtil { protected: - OpKernelContext* context_; // We don't own this. + OpKernelContext *context_; // We don't own this. std::vector strides_; Padding padding_; TensorFormat data_format_; public: - MklDnnConvUtil(OpKernelContext* context, const std::vector& strides, - Padding pad, TensorFormat fm) : context_(context), - strides_(strides), padding_(pad), data_format_(fm) {} + MklDnnConvUtil(OpKernelContext *context, const std::vector &strides, + Padding pad, TensorFormat fm) + : context_(context), strides_(strides), padding_(pad), data_format_(fm) {} virtual ~MklDnnConvUtil() { context_ = nullptr; } @@ -75,14 +75,14 @@ class MklDnnConvUtil { // requires input in NCHW format. Function does not return anything. // But errors arising from sanity checks are returned in context's // status. - virtual inline void - GetInputSizeInMklOrder(const TensorShape& input_shape, - memory::dims *input_dims) { - #define CHECK_BOUNDS(val, err_msg) do { \ - OP_REQUIRES(context_, FastBoundsCheck(val, \ - std::numeric_limits::max()), \ - errors::InvalidArgument(err_msg)); \ - }while(0) + virtual inline void GetInputSizeInMklOrder(const TensorShape &input_shape, + memory::dims *input_dims) { +#define CHECK_BOUNDS(val, err_msg) \ + do { \ + OP_REQUIRES(context_, \ + FastBoundsCheck(val, std::numeric_limits::max()), \ + errors::InvalidArgument(err_msg)); \ + } while (0) CHECK_NOTNULL(input_dims); @@ -105,7 +105,7 @@ class MklDnnConvUtil { CHECK_BOUNDS(input_batch_raw, "Input batch too large"); int input_batch = static_cast(input_batch_raw); - #undef CHECK_BOUNDS +#undef CHECK_BOUNDS // MKL-DNN always requires input in NCHW format. *input_dims = {input_batch, input_depth, input_rows, input_cols}; @@ -125,10 +125,9 @@ class MklDnnConvUtil { // forward gets actual tensor as input). // // TODO(nhasabni): Add similar function for input and filter in MklShape. - virtual inline void - GetFilterSizeInMklOrder(const TensorShape& input_shape, - const TensorShape& filter_shape, - memory::dims *filter_dims) { + virtual inline void GetFilterSizeInMklOrder(const TensorShape &input_shape, + const TensorShape &filter_shape, + memory::dims *filter_dims) { CHECK_NOTNULL(filter_dims); OP_REQUIRES(context_, filter_shape.dims() == 4, @@ -136,17 +135,18 @@ class MklDnnConvUtil { filter_shape.DebugString())); for (int i = 0; i < 3; i++) { - OP_REQUIRES(context_, FastBoundsCheck(filter_shape.dim_size(i), - std::numeric_limits::max()), - errors::InvalidArgument("filter too large")); + OP_REQUIRES(context_, + FastBoundsCheck(filter_shape.dim_size(i), + std::numeric_limits::max()), + errors::InvalidArgument("filter too large")); } int input_depth = GetTensorDim(input_shape, data_format_, 'C'); - OP_REQUIRES( - context_, input_depth == filter_shape.dim_size(2), - errors::InvalidArgument("input and filter must have the same depth: ", - input_depth, " vs ", filter_shape.dim_size(2))); + OP_REQUIRES(context_, input_depth == filter_shape.dim_size(2), + errors::InvalidArgument( + "input and filter must have the same depth: ", input_depth, + " vs ", filter_shape.dim_size(2))); // TF filter is always in (rows, cols, in_depth, out_depth) order. int filter_rows = static_cast(filter_shape.dim_size(0)); @@ -163,25 +163,25 @@ class MklDnnConvUtil { // requires filter in OIHW format. Function does not return anything. // But errors arising from sanity checks are returned in context's // status. - virtual inline void - GetFilterSizeInMklOrder(size_t src_index, size_t filter_index, - memory::dims *filter_dims) { + virtual inline void GetFilterSizeInMklOrder(size_t src_index, + size_t filter_index, + memory::dims *filter_dims) { CHECK_NOTNULL(filter_dims); - const Tensor& input = MklGetInput(context_, src_index); - const Tensor& filter = MklGetInput(context_, filter_index); + const Tensor &input = MklGetInput(context_, src_index); + const Tensor &filter = MklGetInput(context_, filter_index); GetFilterSizeInMklOrder(input.shape(), filter.shape(), filter_dims); } // Calculate Bias size for 2D Convolution. Function does not return // anything, but sets error in context status. - virtual inline void - GetBiasSizeInMklOrder(size_t bias_index, memory::dims *bias_dims) { - const Tensor& bias = MklGetInput(context_, bias_index); + virtual inline void GetBiasSizeInMklOrder(size_t bias_index, + memory::dims *bias_dims) { + const Tensor &bias = MklGetInput(context_, bias_index); OP_REQUIRES(context_, bias.dims() == 1, errors::InvalidArgument("bias must be 1-dimensional: ", bias.shape().DebugString())); - *bias_dims = { static_cast(bias.dim_size(0)) }; + *bias_dims = {static_cast(bias.dim_size(0))}; } // Function to calculate output and padding size for 2D convolution. @@ -193,13 +193,11 @@ class MklDnnConvUtil { // status is returned via context status. // // TODO(nhasabni): Add similar function for input and filter in MklShape. - virtual inline void - GetOutputAndPadSizeInMklOrder(const TensorShape& input_shape, - const TensorShape& filter_shape, - const memory::dims& strides, - memory::dims *output_dims_tf_order, - memory::dims *output_dims_mkl_order, - memory::dims *pad_l, memory::dims *pad_r) { + virtual inline void GetOutputAndPadSizeInMklOrder( + const TensorShape &input_shape, const TensorShape &filter_shape, + const memory::dims &strides, memory::dims *output_dims_tf_order, + memory::dims *output_dims_mkl_order, memory::dims *pad_l, + memory::dims *pad_r) { CHECK_NOTNULL(output_dims_tf_order); CHECK_NOTNULL(output_dims_mkl_order); CHECK_NOTNULL(pad_l); @@ -225,21 +223,21 @@ class MklDnnConvUtil { int64 out_rows = 0, out_cols = 0; int64 pad_top = 0, pad_bottom = 0, pad_left, pad_right; - OP_REQUIRES_OK(context_, - GetWindowedOutputSizeVerbose(input_rows, filter_rows, stride_rows, - padding_, &out_rows, &pad_top, &pad_bottom)); - OP_REQUIRES_OK(context_, - GetWindowedOutputSizeVerbose(input_cols, filter_cols, stride_cols, - padding_, &out_cols, &pad_left, &pad_right)); + OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( + input_rows, filter_rows, stride_rows, padding_, + &out_rows, &pad_top, &pad_bottom)); + OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( + input_cols, filter_cols, stride_cols, padding_, + &out_cols, &pad_left, &pad_right)); // Tensorflow output is in data_format order. (NHWC or NCHW) - TensorShape out_shape = ShapeFromFormat(data_format_, out_batch, - out_rows, out_cols, out_depth); + TensorShape out_shape = + ShapeFromFormat(data_format_, out_batch, out_rows, out_cols, out_depth); *output_dims_tf_order = TFShapeToMklDnnDims(out_shape); // MKL-DNN always needs output in NCHW format. *output_dims_mkl_order = {out_batch, out_depth, static_cast(out_rows), - static_cast(out_cols)}; + static_cast(out_cols)}; // Now handle padding. MKL-DNN uses asymetric padding. *pad_l = {static_cast(pad_top), static_cast(pad_left)}; @@ -250,27 +248,25 @@ class MklDnnConvUtil { // See comment on GetConvOutputAndPadSizeInMklOrder for parameters. // // Function does not return anything, but sets error in context status. - inline void - GetOutputAndPadSizeInMklOrder(size_t src_index, size_t filter_index, - const memory::dims& strides, - memory::dims *output_dims_tf_order, - memory::dims *output_dims_mkl_order, - memory::dims *pad_l, memory::dims *pad_r) { + inline void GetOutputAndPadSizeInMklOrder( + size_t src_index, size_t filter_index, const memory::dims &strides, + memory::dims *output_dims_tf_order, memory::dims *output_dims_mkl_order, + memory::dims *pad_l, memory::dims *pad_r) { CHECK_NOTNULL(output_dims_tf_order); CHECK_NOTNULL(output_dims_mkl_order); CHECK_NOTNULL(pad_l); CHECK_NOTNULL(pad_r); - const Tensor& input = MklGetInput(context_, src_index); - const Tensor& filter = MklGetInput(context_, filter_index); + const Tensor &input = MklGetInput(context_, src_index); + const Tensor &filter = MklGetInput(context_, filter_index); OP_REQUIRES(context_, input.dims() == 4, errors::InvalidArgument("input must be 4-dimensional", - input.shape().DebugString())); + input.shape().DebugString())); - GetOutputAndPadSizeInMklOrder(input.shape(), filter.shape(), - strides, output_dims_tf_order, - output_dims_mkl_order, pad_l, pad_r); + GetOutputAndPadSizeInMklOrder(input.shape(), filter.shape(), strides, + output_dims_tf_order, output_dims_mkl_order, + pad_l, pad_r); } // Wrapper function to calculate input, filter, and output sizes of @@ -279,15 +275,12 @@ class MklDnnConvUtil { // also calculates strides and paddings for 2D Convolution. // // Function does not return anything, but sets error in context status. - inline void GetConvFwdSizesInMklOrder(const TensorShape& input_shape, - const TensorShape& filter_shape, - memory::dims *input_dims, - memory::dims *filter_dims, - memory::dims *strides, - memory::dims *output_dims_tf_order, - memory::dims *output_dims_mkl_order, - memory::dims *pad_l, - memory::dims *pad_r) { + inline void GetConvFwdSizesInMklOrder( + const TensorShape &input_shape, const TensorShape &filter_shape, + memory::dims *input_dims, memory::dims *filter_dims, + memory::dims *strides, memory::dims *output_dims_tf_order, + memory::dims *output_dims_mkl_order, memory::dims *pad_l, + memory::dims *pad_r) { CHECK_NOTNULL(input_dims); CHECK_NOTNULL(filter_dims); CHECK_NOTNULL(strides); @@ -302,8 +295,7 @@ class MklDnnConvUtil { if (!context_->status().ok()) return; GetStridesInMklOrder(strides); GetOutputAndPadSizeInMklOrder(input_shape, filter_shape, *strides, - output_dims_tf_order, - output_dims_mkl_order, + output_dims_tf_order, output_dims_mkl_order, pad_l, pad_r); if (!context_->status().ok()) return; } diff --git a/tensorflow/core/kernels/mkl_tfconv_op.h b/tensorflow/core/kernels/mkl_tfconv_op.h index 0a5be4fec9..a240ee44fb 100644 --- a/tensorflow/core/kernels/mkl_tfconv_op.h +++ b/tensorflow/core/kernels/mkl_tfconv_op.h @@ -13,11 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#ifdef INTEL_MKL + #ifndef TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_ #define TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_ -#ifdef INTEL_MKL - #include #include #include "tensorflow/core/framework/numeric_op.h" @@ -35,10 +35,6 @@ limitations under the License. #include "mkl_dnn_types.h" #include "tensorflow/core/util/mkl_util.h" -#ifdef INTEL_MKL_DNN -using mkldnn::stream; -#endif - namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; @@ -61,71 +57,6 @@ class MklToTfOp : public OpKernel { VLOG(1) << "MKLToTFConversion complete successfully."; } -#ifdef INTEL_MKL_DNN - static void ConvertMklToTf(OpKernel* op_kernel, OpKernelContext* context, - string data_format_str, DataType op_data_type, - bool has_avx512f, uint input_number) { - try { - // Check that input tensor is in MKL format. - const Tensor& input_tensor = MklGetInput(context, input_number); - MklDnnShape input_shape; - GetMklShape(context, input_number, &input_shape); - - // if input is already in Tf format, then copy input tensor to output. - if (!input_shape.IsMklTensor()) { - context->set_output(input_number, input_tensor); - VLOG(1) << "MKLToTFConversion: No conversion needed, " - << "copying input to output"; - return; - } - - // Check that input data type is same as operator data type and that it - // is same as output data type. - DataType input_data_type = op_kernel->input_type(input_number); - DataType output_data_type = op_kernel->output_type(input_number); - CHECK_EQ(op_data_type, input_data_type); - CHECK_EQ(op_data_type, output_data_type); - - auto cpu_engine = engine(engine::cpu, 0); - MklDnnData input(&cpu_engine); - - // Get Mkl layout of input tensor. - auto input_mkl_md = input_shape.GetMklLayout(); - // Get TensorFlow layout of input tensor. Expected output of conversion - // has same layout as Tensorflow layout of input tensor. - auto output_tf_md = input_shape.GetTfLayout(); - auto output_tf_pd = memory::primitive_desc(output_tf_md, cpu_engine); - // Set input Mkl layout as the user layout. - input.SetUsrMem(input_mkl_md, &input_tensor); - - // Allocate output tensor. - TensorShape output_shape = input_shape.GetTfShape(); - Tensor* output_tensor = NULL; - OP_REQUIRES_OK(context, context->allocate_output(input_number, - output_shape, &output_tensor)); - CHECK_NOTNULL(output_tensor); - - // Do we need to reorder Mkl layout into TensorFlow layout? - if (input.IsReorderNeeded(output_tf_pd)) { - // Insert reorder between Mkl layout and TensorFlow layout. - std::vector net; - CHECK_EQ(input.CheckReorderToOpMem(output_tf_pd, output_tensor, &net), - true); - stream(stream::kind::eager).submit(net).wait(); - } else { - // If not, just forward input tensor to output tensor. - CHECK(output_tensor->CopyFrom(input_tensor, output_shape)); - } - } catch (mkldnn::error &e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + std::string(e.message) + - ", in file " + std::string(__FILE__) + ":" + - std::to_string(__LINE__); - OP_REQUIRES_OK(context, - errors::Aborted("Operation received an exception:", error_msg)); - } - } -#else static void ConvertMklToTf(OpKernel* op_kernel, OpKernelContext* context, string data_format_str, DataType op_data_type, bool has_avx512f, uint input_number) { @@ -160,8 +91,8 @@ class MklToTfOp : public OpKernel { // Allocate output tensor. Tensor* output_tensor = NULL; - OP_REQUIRES_OK(context, context->allocate_output(input_number, - output_shape, &output_tensor)); + OP_REQUIRES_OK(context, + context->allocate_output(input_number, output_shape, &output_tensor)); dnnLayout_t output_layout = static_cast(input_shape.GetTfLayout()); @@ -175,7 +106,6 @@ class MklToTfOp : public OpKernel { output_buffer); VLOG(1) << "MKLToTFConversion complete successfully."; } -#endif private: /// Data format of the operation @@ -202,5 +132,5 @@ class MklToTfOp : public OpKernel { TF_CALL_NUMBER_TYPES(REGISTER_CPU); #undef REGISTER_CPU } // namespace tensorflow -#endif // INTEL_MKL #endif // TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_ +#endif // INTEL_MKL diff --git a/tensorflow/core/kernels/pooling_ops_common.cc b/tensorflow/core/kernels/pooling_ops_common.cc index ac90f67ce0..7dee751c4f 100644 --- a/tensorflow/core/kernels/pooling_ops_common.cc +++ b/tensorflow/core/kernels/pooling_ops_common.cc @@ -143,7 +143,7 @@ void DnnPoolingOp::Compute( perftools::gputools::dnn::PoolingMode pooling_mode, const std::vector& size, const std::vector& stride, Padding padding, TensorFormat data_format, const Tensor& tensor_in, - const TensorShape& tensor_out_shape, bool propagate_nans) { + const TensorShape& tensor_out_shape) { Tensor* tensor_out = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, tensor_out_shape, &tensor_out)); @@ -188,8 +188,7 @@ void DnnPoolingOp::Compute( .set_vertical_stride(params.row_stride) .set_horizontal_stride(params.col_stride) .set_vertical_padding(params.pad_rows) - .set_horizontal_padding(params.pad_cols) - .set_propagate_nans(propagate_nans); + .set_horizontal_padding(params.pad_cols); perftools::gputools::dnn::BatchDescriptor input_desc; input_desc.set_count(params.tensor_in_batch) @@ -238,7 +237,7 @@ void DnnPoolingGradOp::Compute( const std::vector& size, const std::vector& stride, Padding padding, TensorFormat data_format, const Tensor* tensor_in, const Tensor* tensor_out, const Tensor& out_backprop, - const TensorShape& tensor_in_shape, bool propagate_nans) { + const TensorShape& tensor_in_shape) { CHECK((pooling_mode != perftools::gputools::dnn::PoolingMode::kMaximum) || (tensor_in && tensor_out)) << "For MaxPoolGrad, both tensor_in and tensor_out needs to be " @@ -328,8 +327,7 @@ void DnnPoolingGradOp::Compute( .set_vertical_stride(params.row_stride) .set_horizontal_stride(params.col_stride) .set_vertical_padding(params.pad_rows) - .set_horizontal_padding(params.pad_cols) - .set_propagate_nans(propagate_nans); + .set_horizontal_padding(params.pad_cols); perftools::gputools::dnn::BatchDescriptor orig_output_desc; orig_output_desc.set_count(params.tensor_in_batch) diff --git a/tensorflow/core/kernels/pooling_ops_common_gpu.h b/tensorflow/core/kernels/pooling_ops_common_gpu.h index 1458456585..b594f39fad 100644 --- a/tensorflow/core/kernels/pooling_ops_common_gpu.h +++ b/tensorflow/core/kernels/pooling_ops_common_gpu.h @@ -44,7 +44,7 @@ class DnnPoolingOp { const std::vector& size, const std::vector& stride, Padding padding, TensorFormat data_format, const Tensor& tensor_in, - const TensorShape& tensor_out_shape, bool propagate_nans); + const TensorShape& tensor_out_shape); }; // A helper class that launch the cudnn pooling backward operations. @@ -60,7 +60,7 @@ class DnnPoolingGradOp { const std::vector& stride, Padding padding, TensorFormat data_format, const Tensor* tensor_in, const Tensor* tensor_out, const Tensor& out_backprop, - const TensorShape& tensor_in_shape, bool propagate_nans); + const TensorShape& tensor_in_shape); }; } // namespace tensorflow diff --git a/tensorflow/core/kernels/quantized_add_op.cc b/tensorflow/core/kernels/quantized_add_op.cc index 337c8e5c17..8be0c56798 100644 --- a/tensorflow/core/kernels/quantized_add_op.cc +++ b/tensorflow/core/kernels/quantized_add_op.cc @@ -489,7 +489,7 @@ class QuantizedAddOp : public OpKernel { // adding zero leaves the result unchanged, and to contain the largest of // the two input values with some room to spare. const float smallest_min = std::min(min_x, min_y); - const float largest_max = std::max(max_x, max_y); + const float largest_max = std::min(max_x, max_y); const float biggest_range = std::max(std::abs(smallest_min), std::abs(largest_max)); const float output_range = (biggest_range * (1 << 14)); diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc index 55a8b9c9b6..a37c757865 100644 --- a/tensorflow/core/kernels/random_op.cc +++ b/tensorflow/core/kernels/random_op.cc @@ -577,7 +577,7 @@ struct FillPhiloxRandomKernel { const size_t kGroupSize = Distribution::kResultElementCount; const size_t item_id = item.get_global(0); - const size_t total_item_count = item.get_global_range(); + const size_t total_item_count = item.get_global_range(0); size_t offset = item_id * kGroupSize; gen_.Skip(item_id); @@ -633,7 +633,7 @@ struct FillPhiloxRandomKernel { PhiloxRandom::kResultElementCount; const size_t item_id = item.get_global(0); - const size_t total_item_count = item.get_global_range(); + const size_t total_item_count = item.get_global_range(0); size_t group_index = item_id; size_t offset = group_index * kGroupSize; diff --git a/tensorflow/core/kernels/segment_reduction_ops.cc b/tensorflow/core/kernels/segment_reduction_ops.cc index 2334e50f1d..4302a68a18 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.cc +++ b/tensorflow/core/kernels/segment_reduction_ops.cc @@ -376,9 +376,6 @@ struct UnsortedSegmentSumFunctor auto data_flat = typename TTypes::ConstTensor(data, N, data_size / N); for (int64 i = 0; i < N; ++i) { Index j = internal::SubtleMustCopy(segment_ids(i)); - if (j < 0) { - continue; - } OP_REQUIRES(ctx, FastBoundsCheck(j, output_rows), errors::InvalidArgument( "segment_ids", SliceDebugString(segment_ids_shape, i), diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h index b10bea72ba..412c1d601d 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.h +++ b/tensorflow/core/kernels/segment_reduction_ops.h @@ -30,14 +30,14 @@ namespace functor { #ifdef GOOGLE_CUDA typedef Eigen::GpuDevice GPUDevice; // Functor for SegmentSumGPUOp. -// output_rows: the number of output segments (unique segment ids in +// 'output_rows': the number of output segments (unique segment ids in // 'segment_ids'). -// segment_ids_shape: shape of 'segment_ids' tensor. -// segment_ids: unsorted map from input to output segment ids at which to +// 'segment_ids_shape': shape of 'segment_ids' tensor. +// 'segment_ids': unsorted map from input to output segment ids at which to // perform segment sum operation. -// data_size: size of input data tensor. -// data: input data tensor. -// output: output reshaped to {output_rows, output.size/output_rows} +// 'data_size': size of input data tensor. +// 'data': input data tensor. +// 'output': output reshaped to {output_rows, output.size/output_rows} template struct SegmentSumFunctor { void operator()(OpKernelContext* ctx, const GPUDevice& d, @@ -61,14 +61,14 @@ struct UnsortedSegmentBaseFunctor{ }; // Functor for UnsortedSegmentSumOp. -// output_rows: the number of output segments (unique segment ids in +// 'output_rows': the number of output segments (unique segment ids in // 'segment_ids'). -// segment_ids_shape: shape of 'segment_ids' tensor. -// segment_ids: unsorted map from input to output segment ids at which to +// 'segment_ids_shape': shape of 'segment_ids' tensor. +// 'segment_ids': unsorted map from input to output segment ids at which to // perform segment sum operation. -// data_size: size of input data tensor. -// data: input data tensor. -// output: output reshaped to {output_rows, output.size/output_rows} +// 'data_size': size of input data tensor. +// 'data': input data tensor. +// 'output': output reshaped to {output_rows, output.size/output_rows} template struct UnsortedSegmentSumFunctor: public UnsortedSegmentBaseFunctor { void operator()(OpKernelContext* ctx, const Device& d, @@ -79,14 +79,14 @@ struct UnsortedSegmentSumFunctor: public UnsortedSegmentBaseFunctor struct UnsortedSegmentMaxFunctor: public UnsortedSegmentBaseFunctor { void operator()(OpKernelContext* ctx, const Device& d, diff --git a/tensorflow/core/kernels/shape_ops.cc b/tensorflow/core/kernels/shape_ops.cc index 28a39bae3f..721f9b949b 100644 --- a/tensorflow/core/kernels/shape_ops.cc +++ b/tensorflow/core/kernels/shape_ops.cc @@ -341,12 +341,7 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") .Device(DEVICE_CPU) .HostMemory("dim") .TypeConstraint("Tdim"), - ExpandDimsOp); -REGISTER_KERNEL_BUILDER(Name("ExpandDims") - .Device(DEVICE_CPU) - .HostMemory("dim") - .TypeConstraint("Tdim"), - ExpandDimsOp); + ExpandDimsOp); #if GOOGLE_CUDA #define REGISTER_GPU_KERNEL(type) \ @@ -355,13 +350,7 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") .TypeConstraint("T") \ .TypeConstraint("Tdim") \ .HostMemory("dim"), \ - ExpandDimsOp); \ - REGISTER_KERNEL_BUILDER(Name("ExpandDims") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .TypeConstraint("Tdim") \ - .HostMemory("dim"), \ - ExpandDimsOp); + ExpandDimsOp); TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL); TF_CALL_bool(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL @@ -373,15 +362,7 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") .HostMemory("input") .HostMemory("dim") .HostMemory("output"), - ExpandDimsOp); -REGISTER_KERNEL_BUILDER(Name("ExpandDims") - .Device(DEVICE_GPU) - .TypeConstraint("T") - .TypeConstraint("Tdim") - .HostMemory("input") - .HostMemory("dim") - .HostMemory("output"), - ExpandDimsOp); + ExpandDimsOp); #endif // GOOGLE_CUDA #ifdef TENSORFLOW_USE_SYCL @@ -391,13 +372,7 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") .TypeConstraint("T") \ .TypeConstraint("Tdim") \ .HostMemory("dim"), \ - ExpandDimsOp); \ - REGISTER_KERNEL_BUILDER(Name("ExpandDims") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tdim") \ - .HostMemory("dim"), \ - ExpandDimsOp); + ExpandDimsOp); TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); TF_CALL_bool(REGISTER_SYCL_KERNEL); #undef REGISTER_SYCL_KERNEL @@ -409,15 +384,7 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") .HostMemory("input") .HostMemory("dim") .HostMemory("output"), - ExpandDimsOp); -REGISTER_KERNEL_BUILDER(Name("ExpandDims") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .TypeConstraint("Tdim") - .HostMemory("input") - .HostMemory("dim") - .HostMemory("output"), - ExpandDimsOp); + ExpandDimsOp); #endif // TENSORFLOW_USE_SYCL // Squeeze --------------------------------------- diff --git a/tensorflow/core/kernels/shape_ops.h b/tensorflow/core/kernels/shape_ops.h index 8d9d0ea846..ac607f4e8b 100644 --- a/tensorflow/core/kernels/shape_ops.h +++ b/tensorflow/core/kernels/shape_ops.h @@ -145,7 +145,6 @@ class SizeOp : public OpKernel { bool IsExpensive() override { return false; } }; -template class ExpandDimsOp : public OpKernel { public: explicit ExpandDimsOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} @@ -154,7 +153,7 @@ class ExpandDimsOp : public OpKernel { OP_REQUIRES(ctx, ctx->input(0).dtype() != DT_VARIANT, errors::InvalidArgument("ExpandDims on Variant not supported")); - Tdim dim = ctx->input(1).flat()(0); + int32 dim = ctx->input(1).flat()(0); OP_REQUIRES( ctx, (dim >= -1 - ctx->input(0).dims() && dim <= ctx->input(0).dims()), errors::InvalidArgument("Tried to expand dim index ", dim, @@ -176,7 +175,7 @@ class ExpandDimsOp : public OpKernel { } // Clamp to the end if needed. - dim = std::min(dim, existing_dims_size); + dim = std::min(dim, existing_dims_size); new_shape.emplace(new_shape.begin() + dim, 1); const TensorShape output_shape(new_shape); @@ -235,10 +234,10 @@ class SqueezeOp : public OpKernel { if (!wrapped_squeeze_dims.empty()) { if (wrapped_squeeze_dims.count(i) > 0) { OP_REQUIRES(ctx, existing_dim == 1, - errors::InvalidArgument("Tried to explicitly squeeze " - "dimension ", - i, " but dimension was not 1: ", - existing_dim)); + errors::InvalidArgument( + "Tried to explicitly squeeze " + "dimension ", + i, " but dimension was not 1: ", existing_dim)); } else { // This dimension is not being squeezed. new_shape.push_back(existing_dim); diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc index 28a379774b..d46701749b 100644 --- a/tensorflow/core/kernels/slice_op.cc +++ b/tensorflow/core/kernels/slice_op.cc @@ -190,25 +190,41 @@ class SliceOp : public OpKernel { } return; } -#define HANDLE_DIM(NDIM) \ - if (input_dims == NDIM) { \ - functor::Slice()( \ - context->eigen_device(), result, input, begin, size); \ - return; \ +#define HANDLE_DIM(NDIM) \ + if (input_dims == NDIM) { \ + HandleCase(context, begin, size, result); \ + return; \ } + HANDLE_DIM(1); HANDLE_DIM(2); HANDLE_DIM(3); HANDLE_DIM(4); HANDLE_DIM(5); HANDLE_DIM(6); + HANDLE_DIM(7); #undef HANDLE_DIM - // handle cases which dim >= 7 - functor::Slice()( - context->eigen_device(), result, input, begin, size); + OP_REQUIRES(context, false, errors::Unimplemented( + "SliceOp : Unhandled input dimensions")); + } + } + + private: + template + void HandleCase(OpKernelContext* context, const gtl::ArraySlice& begin, + const gtl::ArraySlice& size, Tensor* result) { + Eigen::DSizes indices; + Eigen::DSizes sizes; + for (int i = 0; i < NDIM; ++i) { + indices[i] = begin[i]; + sizes[i] = size[i]; } + + functor::Slice()( + context->eigen_device(), result->tensor(), + context->input(0).tensor(), indices, sizes); } }; @@ -248,16 +264,11 @@ class MklSliceOp : public OpKernel { } return; } - // Special case for handling 4-D tensor slice. - if (input_dims == 4) { - HandleCase4D(context, begin, size, result); - } else { -#define HANDLE_DIM(NDIM) \ - if (input_dims == NDIM) { \ - functor::Slice()( \ - context->eigen_device(), result, input, begin, size); \ - return; \ - } +#define HANDLE_DIM(NDIM) \ + if (input_dims == NDIM) { \ + HandleCase(context, begin, size, result); \ + return; \ + } HANDLE_DIM(1); HANDLE_DIM(2); @@ -265,13 +276,12 @@ class MklSliceOp : public OpKernel { HANDLE_DIM(4); HANDLE_DIM(5); HANDLE_DIM(6); + HANDLE_DIM(7); #undef HANDLE_DIM - // handle cases which dim >= 7 - functor::Slice()( - context->eigen_device(), result, input, begin, size); - } + OP_REQUIRES(context, false, errors::Unimplemented( + "SliceOp : Unhandled input dimensions")); } } @@ -318,7 +328,8 @@ class MklSliceOp : public OpKernel { return false; } - void HandleCase4D(OpKernelContext* context, + template + void HandleCase(OpKernelContext* context, const gtl::ArraySlice& begin, const gtl::ArraySlice& size, Tensor* result) { int slice_dim = -1; @@ -327,7 +338,8 @@ class MklSliceOp : public OpKernel { // differs from the input tensor in only 1 out of 4 dimensions. // This case arises in the context of Slice of 4-D tensor in NHWC or NCHW // format over channel dimension. - if (DoesSliceShapeDifferInOnly1D(in_shape, begin, size, &slice_dim)) { + if (NDIM == 4 && + DoesSliceShapeDifferInOnly1D(in_shape, begin, size, &slice_dim)) { size_t in_strides[4] = { (size_t) in_shape.dim_size(1) * in_shape.dim_size(2) * in_shape.dim_size(3), @@ -391,8 +403,16 @@ class MklSliceOp : public OpKernel { // slice_dim is not 1 or 3, then we fallback to Eigen implementation. } - functor::Slice()( - context->eigen_device(), result, context->input(0), begin, size); + Eigen::DSizes indices; + Eigen::DSizes sizes; + for (int i = 0; i < NDIM; ++i) { + indices[i] = begin[i]; + sizes[i] = size[i]; + } + + functor::Slice()( + context->eigen_device(), result->tensor(), + context->input(0).tensor(), indices, sizes); } }; #endif @@ -400,13 +420,13 @@ class MklSliceOp : public OpKernel { // Forward declarations of the functor specializations for declared in the // sharded source files. namespace functor { -#define DECLARE_CPU_SPEC(T, NDIM) \ - template <> \ - void Slice::operator()( \ - const CPUDevice& d, Tensor* output, \ - const Tensor& input, \ - const gtl::ArraySlice& slice_indices, \ - const gtl::ArraySlice& slice_sizes); \ +#define DECLARE_CPU_SPEC(T, NDIM) \ + template <> \ + void Slice::operator()( \ + const CPUDevice& d, typename TTypes::Tensor output, \ + typename TTypes::ConstTensor input, \ + const Eigen::DSizes& indices, \ + const Eigen::DSizes& sizes); \ extern template struct Slice; #define DECLARE_FOR_N(T) \ @@ -456,14 +476,13 @@ REGISTER_SLICE(bfloat16); #if GOOGLE_CUDA // Forward declarations of the functor specializations for GPU. namespace functor { -#define DECLARE_GPU_SPEC(T, NDIM) \ - template <> \ - void Slice::operator()( \ - const GPUDevice& d, \ - Tensor* output, \ - const Tensor& input, \ - const gtl::ArraySlice& slice_indices, \ - const gtl::ArraySlice& slice_sizes); \ +#define DECLARE_GPU_SPEC(T, NDIM) \ + template <> \ + void Slice::operator()( \ + const GPUDevice& d, typename TTypes::Tensor output, \ + typename TTypes::ConstTensor input, \ + const Eigen::DSizes& indices, \ + const Eigen::DSizes& sizes); \ extern template struct Slice; #define DECLARE_FOR_N(T) \ @@ -517,14 +536,13 @@ REGISTER_KERNEL_BUILDER(Name("Slice") #ifdef TENSORFLOW_USE_SYCL // Forward declarations of the functor specializations for SYCL. namespace functor { -#define DECLARE_SYCL_SPEC(T, NDIM) \ - template <> \ - void Slice::operator()( \ - const SYCLDevice& d, \ - Tensor* output, \ - const Tensor& input, \ - const gtl::ArraySlice& slice_indices, \ - const gtl::ArraySlice& slice_sizes); \ +#define DECLARE_SYCL_SPEC(T, NDIM) \ + template <> \ + void Slice::operator()( \ + const SYCLDevice& d, typename TTypes::Tensor output,\ + typename TTypes::ConstTensor input, \ + const Eigen::DSizes& indices, \ + const Eigen::DSizes& sizes); \ extern template struct Slice; #define DECLARE_FOR_N(T) \ diff --git a/tensorflow/core/kernels/slice_op.h b/tensorflow/core/kernels/slice_op.h index 55a4be985b..db7eded745 100644 --- a/tensorflow/core/kernels/slice_op.h +++ b/tensorflow/core/kernels/slice_op.h @@ -19,104 +19,31 @@ limitations under the License. // Functor definition for SliceOp, must be compilable by nvcc. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/kernels/ops_util.h" namespace tensorflow { - -namespace internal { - -template -void SliceSimple(const Device& d, Tensor* out, const Tensor& in, - const gtl::ArraySlice& slice_indices); -template -void SliceSimpleGpu(const Device& d, Tensor* out, const Tensor& in, - const gtl::ArraySlice& slice_indices); - -template -void SliceSimple(const Device& d, Tensor* out, const Tensor& in, - const gtl::ArraySlice& slice_indices) { - const int ndims = in.dims(); - const int64 nelem = out->NumElements(); - const gtl::InlinedVector in_strides = ComputeStride(in.shape()); - const gtl::InlinedVector out_strides = ComputeStride(out->shape()); - const T* p = in.flat().data(); - T* q = out->flat().data(); - - std::vector i_idx(nelem, 0); - std::vector t(nelem, 0); - - for (int64 o_idx = 0; o_idx < nelem; ++o_idx) { - t[o_idx] = o_idx; - } - for (int i = 0; i < ndims; ++i) { - int64 n = (nelem + 7) / 8; - int64 o_idx = 0; - switch (nelem % 8) { -#define CALC_INPUT_IDX \ - i_idx[o_idx] += (t[o_idx] / out_strides[i] + slice_indices[i]) * in_strides[i]; \ - t[o_idx] %= out_strides[i]; \ - ++o_idx; - case 0: do { CALC_INPUT_IDX; - case 7: CALC_INPUT_IDX; - case 6: CALC_INPUT_IDX; - case 5: CALC_INPUT_IDX; - case 4: CALC_INPUT_IDX; - case 3: CALC_INPUT_IDX; - case 2: CALC_INPUT_IDX; - case 1: CALC_INPUT_IDX; -#undef CALC_INPUT_IDX - } while (--n > 0); - } - } - for (int64 o_idx = 0; o_idx < nelem; ++o_idx) { - q[o_idx] = p[i_idx[o_idx]]; - } -} - -template -void SliceUsingEigen(const Device& d, Tensor* out, const Tensor& in, - const gtl::ArraySlice& slice_indices, - const gtl::ArraySlice& slice_sizes) { - auto input = in.tensor(); - auto output = out->tensor(); - Eigen::DSizes indices; - for (int i = 0; i < NDIMS; ++i) { - indices[i] = slice_indices[i]; - } - Eigen::DSizes sizes; - for (int i = 0; i < NDIMS; ++i) { - sizes[i] = slice_sizes[i]; - } - const bool use_64bit = input.size() > Eigen::NumTraits::highest(); - if (!use_64bit && - Eigen::internal::is_same::value) { - To32Bit(output).device(d) = To32Bit(input).slice(indices, sizes); - } else { - output.device(d) = input.slice(indices, sizes); - } -} - -} // namespace internal - namespace functor { -// Template parameter NDIM is not neccesary here. The aim of keeping it -// is to compile struct slice seperately which minimizes the compiling time. -template +template struct Slice { - void operator()(const Device& d, Tensor* out, const Tensor& in, - const gtl::ArraySlice& slice_indices, - const gtl::ArraySlice& slice_sizes) { - if (in.dims() == NDIM) { - internal::SliceUsingEigen(d, out, in, slice_indices, slice_sizes); + void operator()(const Device& d, typename TTypes::Tensor output, + typename TTypes::ConstTensor input, + const Eigen::DSizes& slice_indices, + const Eigen::DSizes& slice_sizes) { + bool use_64bit = (input.size() > Eigen::NumTraits::highest()); + if (!use_64bit && + Eigen::internal::is_same::value) { + Eigen::DSizes indices; + for (int i = 0; i < NDIMS; ++i) { + indices[i] = slice_indices[i]; + } + Eigen::DSizes sizes; + for (int i = 0; i < NDIMS; ++i) { + sizes[i] = slice_sizes[i]; + } + To32Bit(output).device(d) = To32Bit(input).slice(indices, sizes); } else { - if (Eigen::internal::is_same::value) { - internal::SliceSimpleGpu(d, out, in, slice_indices); - } else { - internal::SliceSimple(d, out, in, slice_indices); - } + output.device(d) = input.slice(slice_indices, slice_sizes); } } }; diff --git a/tensorflow/core/kernels/slice_op_gpu.cu.cc b/tensorflow/core/kernels/slice_op_gpu.cu.cc index 3039b3d777..a301986f2f 100644 --- a/tensorflow/core/kernels/slice_op_gpu.cu.cc +++ b/tensorflow/core/kernels/slice_op_gpu.cu.cc @@ -21,65 +21,9 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/util/cuda_kernel_helper.h" namespace tensorflow { -namespace internal { - -template -__global__ void SliceKernel(int nthreads, const T* src, const int32* buf, - const int32 ndims, T* dst) { - const int32* in_strides = buf; - const int32* out_strides = buf + ndims; - const int32* slice_indices = buf + ndims * 2; - CUDA_1D_KERNEL_LOOP(o_idx, nthreads) { - int32 i_idx = 0; - int32 t = o_idx; - for (int i = 0; i < ndims; ++i) { - i_idx += (t / out_strides[i] + slice_indices[i]) * in_strides[i]; - t %= out_strides[i]; - } - dst[o_idx] = ldg(src + i_idx); - } -} - -template -void SliceSimpleGpu(const Device& d, Tensor* out, const Tensor& in, - const gtl::ArraySlice& slice_indices) { - // Ensures we can use 32-bit index. - const int64 in_nelem = in.NumElements(); - CHECK_LT(in_nelem, kint32max) << "Tensor too large to transpose on GPU"; - const int64 out_nelem = out->NumElements(); - CHECK_LT(out_nelem, kint32max) << "Tensor too large to transpose on GPU"; - // Pack strides and slice indices sizes into one buffer. - const int32 ndims = in.dims(); - gtl::InlinedVector host_buf(ndims * 3); - gtl::InlinedVector in_strides = ComputeStride(in.shape()); - gtl::InlinedVector out_strides = ComputeStride(out->shape()); - for (int i = 0; i < ndims; ++i) { - host_buf[i] = in_strides[i]; - host_buf[ndims + i] = out_strides[i]; - host_buf[ndims * 2 + i] = slice_indices[i]; - } - auto num_bytes = sizeof(int64) * host_buf.size(); - auto dev_buf = d.allocate(num_bytes); - // NOTE: host_buf is not allocated by CudaHostAllocator, and - // therefore we are doing a sync copy effectively. - d.memcpyHostToDevice(dev_buf, host_buf.data(), num_bytes); - // Launch kernel to q[...] = p[...]. - const T* p = in.flat().data(); - T* q = out->flat().data(); - CudaLaunchConfig cfg = GetCudaLaunchConfig(out_nelem, d); - SliceKernel<<>>( - cfg.virtual_thread_count, p, reinterpret_cast(dev_buf), - ndims, q); - // Safe to deallocate immediately after the kernel launch. - d.deallocate(dev_buf); -} - -} // namespace internal typedef Eigen::GpuDevice GPUDevice; diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc index 8fc40db3cc..73b6d4cf6a 100644 --- a/tensorflow/core/kernels/strided_slice_op.cc +++ b/tensorflow/core/kernels/strided_slice_op.cc @@ -427,6 +427,7 @@ REGISTER_STRIDED_SLICE(bfloat16); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); +TF_CALL_int64(REGISTER_GPU); // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel diff --git a/tensorflow/core/kernels/strided_slice_op_impl.h b/tensorflow/core/kernels/strided_slice_op_impl.h index 7d42887426..afe3a051e6 100644 --- a/tensorflow/core/kernels/strided_slice_op_impl.h +++ b/tensorflow/core/kernels/strided_slice_op_impl.h @@ -84,16 +84,16 @@ void HandleStridedSliceCase(OpKernelContext* context, gtl::InlinedVector processing_dims = processing_shape.dim_sizes(); if (is_simple_slice) { - gtl::InlinedVector sizes(begin.size()); + Eigen::DSizes begin_di; + Eigen::DSizes sizes_di; for (int i = 0; i < NDIM; ++i) { - sizes[i] = end[i] - begin[i]; + begin_di[i] = begin[i]; + sizes_di[i] = end[i] - begin[i]; } - const TensorShape final_shape = result->shape(); - CHECK(result->CopyFrom(*result, processing_shape)); - const Tensor input = context->input(0); - functor::Slice()( - context->eigen_device(), result, input, begin, sizes); - CHECK(result->CopyFrom(*result, final_shape)); + functor::Slice()( + context->eigen_device(), + result->bit_casted_shaped(processing_dims), + context->input(0).bit_casted_tensor(), begin_di, sizes_di); } else { Eigen::DSizes begin_di; Eigen::DSizes end_di; @@ -196,9 +196,10 @@ class HandleStridedSliceAssignCase { extern template struct StridedSlice; \ template <> \ void Slice::operator()( \ - const GPUDevice& d, Tensor* output, const Tensor& input, \ - const gtl::ArraySlice& slice_indices, \ - const gtl::ArraySlice& slice_sizes); \ + const GPUDevice& d, typename TTypes::Tensor output, \ + typename TTypes::ConstTensor input, \ + const Eigen::DSizes& indices, \ + const Eigen::DSizes& sizes); \ extern template struct Slice; \ template <> \ void StridedSliceGrad::operator()( \ @@ -283,6 +284,7 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_FOR_N_GPU); TF_CALL_complex64(DECLARE_FOR_N_GPU); TF_CALL_complex128(DECLARE_FOR_N_GPU); DECLARE_FOR_N_GPU(int32); +DECLARE_FOR_N_GPU(int64); #endif // END GOOGLE_CUDA TF_CALL_ALL_TYPES(DECLARE_FOR_N_CPU); @@ -298,6 +300,7 @@ DECLARE_FOR_N_CPU(bfloat16); TF_CALL_SYCL_PROXY_TYPES(PREVENT_FOR_N_SYCL); TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DECLARE_FOR_N_SYCL); DECLARE_FOR_N_SYCL(int32); +DECLARE_FOR_N_SYCL(int64); #undef DECLARE_FOR_N_SYCL #endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/kernels/strided_slice_op_test.cc b/tensorflow/core/kernels/strided_slice_op_test.cc index 78bb15463c..281ca0f58f 100644 --- a/tensorflow/core/kernels/strided_slice_op_test.cc +++ b/tensorflow/core/kernels/strided_slice_op_test.cc @@ -76,69 +76,20 @@ static void SliceHelper(int iters, int size) { testing::UseRealTime(); } -template -static void Dim8SliceHelper(int iters, int size) { - testing::StopTiming(); - Graph* g = new Graph(OpRegistry::Global()); - DataType dt = DataTypeToEnum::v(); - int kDim = 100; - int kMaxSize = 15000; - CHECK_LT(size, kMaxSize); - - Tensor begin(DT_INT32, TensorShape({8})); - begin.flat()(10) = 10; - for (int i = 1; i < 7; ++i) { - begin.flat()(i) = 0; - } - begin.flat()(7) = 10; - - Tensor end(DT_INT32, TensorShape({8})); - end.flat()(0) = 10 + kDim; - for (int i = 1; i < 7; ++i) { - end.flat()(i) = 1; - } - end.flat()(7) = 10 + size; - - Tensor strides(DT_INT32, TensorShape({8})); - for (int i = 0; i < 8; ++i) { - strides.flat()(i) = 1; - } - - Tensor input(dt, TensorShape({2*kDim, 1, 1, 1, 1, 1, 1, kMaxSize})); - input.flat().setRandom(); - - Node* node; - TF_CHECK_OK(NodeBuilder(g->NewName("n"), "StridedSlice") - .Input(test::graph::Constant(g, input)) - .Input(test::graph::Constant(g, begin)) - .Input(test::graph::Constant(g, end)) - .Input(test::graph::Constant(g, strides)) - .Attr("T", dt) - .Finalize(g, &node)); - - testing::BytesProcessed(static_cast(iters) * kDim * size * sizeof(T)); - testing::StartTiming(); - test::Benchmark("cpu", g).Run(iters); - testing::UseRealTime(); -} - static void BM_SliceFloat(int iters, int dim2) { SliceHelper(iters, dim2); - Dim8SliceHelper(iters, dim2); } BENCHMARK(BM_SliceFloat)->Arg(100)->Arg(1000)->Arg(10000); static void BM_SliceComplex64(int iters, int dim2) { SliceHelper>(iters, dim2); - Dim8SliceHelper>(iters, dim2); } BENCHMARK(BM_SliceComplex64)->Arg(100)->Arg(1000)->Arg(10000); static void BM_SliceBFloat16(int iters, int dim2) { SliceHelper(iters, dim2); - Dim8SliceHelper(iters, dim2); } BENCHMARK(BM_SliceBFloat16)->Arg(100)->Arg(1000)->Arg(10000); diff --git a/tensorflow/core/kernels/transpose_op.cc b/tensorflow/core/kernels/transpose_op.cc index 96c051c636..20f0edf309 100644 --- a/tensorflow/core/kernels/transpose_op.cc +++ b/tensorflow/core/kernels/transpose_op.cc @@ -31,14 +31,13 @@ limitations under the License. namespace tensorflow { -// inv = InvertPermutationOp(T p) takes a permutation of +// inv = InvertPermutationOp(T p) takes a permutation of // integers 0, 1, ..., n - 1 and returns the inverted // permutation of p. I.e., inv[p[i]] == i, for i in [0 .. n). // -// REQUIRES: input is a vector of int32 or int64. +// REQUIRES: input is a vector of int32. // REQUIRES: input is a permutation of 0, 1, ..., n-1. -template class InvertPermutationOp : public OpKernel { public: explicit InvertPermutationOp(OpKernelConstruction* context) @@ -49,19 +48,20 @@ class InvertPermutationOp : public OpKernel { OP_REQUIRES( context, TensorShapeUtils::IsVector(input.shape()), errors::InvalidArgument("invert_permutation expects a 1D vector.")); - auto Tin = input.vec(); + auto Tin = input.vec(); OP_REQUIRES(context, FastBoundsCheck(Tin.size(), std::numeric_limits::max()), errors::InvalidArgument("permutation of nonnegative int32s " "must have <= int32 max elements")); - const T N = static_cast(Tin.size()); // Safe: bounds-checked above. + const int32 N = + static_cast(Tin.size()); // Safe: bounds-checked above. Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, input.shape(), &output)); - auto Tout = output->vec(); + auto Tout = output->vec(); std::fill_n(Tout.data(), N, -1); for (int i = 0; i < N; ++i) { - const T d = internal::SubtleMustCopy(Tin(i)); + const int32 d = internal::SubtleMustCopy(Tin(i)); OP_REQUIRES(context, FastBoundsCheck(d, N), errors::InvalidArgument(d, " is not between 0 and ", N)); OP_REQUIRES(context, Tout(d) == -1, @@ -73,23 +73,14 @@ class InvertPermutationOp : public OpKernel { REGISTER_KERNEL_BUILDER( Name("InvertPermutation").Device(DEVICE_CPU).TypeConstraint("T"), - InvertPermutationOp); -REGISTER_KERNEL_BUILDER( - Name("InvertPermutation").Device(DEVICE_CPU).TypeConstraint("T"), - InvertPermutationOp); + InvertPermutationOp); REGISTER_KERNEL_BUILDER(Name("InvertPermutation") .Device(DEVICE_GPU) .TypeConstraint("T") .HostMemory("x") .HostMemory("y"), - InvertPermutationOp); -REGISTER_KERNEL_BUILDER(Name("InvertPermutation") - .Device(DEVICE_GPU) - .TypeConstraint("T") - .HostMemory("x") - .HostMemory("y"), - InvertPermutationOp); + InvertPermutationOp); #ifdef TENSORFLOW_USE_SYCL REGISTER_KERNEL_BUILDER(Name("InvertPermutation") @@ -97,13 +88,7 @@ REGISTER_KERNEL_BUILDER(Name("InvertPermutation") .TypeConstraint("T") .HostMemory("x") .HostMemory("y"), - InvertPermutationOp); -REGISTER_KERNEL_BUILDER(Name("InvertPermutation") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("x") - .HostMemory("y"), - InvertPermutationOp); + InvertPermutationOp); #endif // TENSORFLOW_USE_SYCL namespace { diff --git a/tensorflow/core/kernels/unique_op.cc b/tensorflow/core/kernels/unique_op.cc index d087784c8a..701c5f6d2b 100644 --- a/tensorflow/core/kernels/unique_op.cc +++ b/tensorflow/core/kernels/unique_op.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include #include #include @@ -22,7 +21,6 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/hash/hash.h" namespace tensorflow { @@ -35,6 +33,8 @@ class UniqueOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& input = context->input(0); + OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()), + errors::InvalidArgument("unique expects a 1D vector.")); // TODO(dga): Make unique polymorphic for returning int32 and int64 // vectors to support large tensors. OP_REQUIRES(context, @@ -42,102 +42,31 @@ class UniqueOp : public OpKernel { errors::InvalidArgument( "unique does not support input tensors larger than ", std::numeric_limits::max(), " elements")); - - int64 axis = 0; - std::vector new_sizes{1, input.NumElements(), 1}; - if (context->num_inputs() == 1) { - OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()), - errors::InvalidArgument("unique expects a 1D vector.")); - } else { - // In case of UniqueV2, the axis is a 1D vector. The purpose is - // to allow specifying either "no axis" or "axis". The `[]` means - // "no axis", while `[x]` means `axis = x`. - const Tensor& axis_tensor = context->input(1); - OP_REQUIRES(context, TensorShapeUtils::IsVector(axis_tensor.shape()), - errors::InvalidArgument("axis expects a 1D vector.")); - OP_REQUIRES( - context, axis_tensor.NumElements() <= 1, - errors::InvalidArgument( - "axis does not support input tensors larger than 1 elements")); - if (axis_tensor.NumElements() == 0) { - OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()), - errors::InvalidArgument("unique expects a 1D vector.")); - } else { - auto axis_vec = axis_tensor.vec(); - axis = axis_vec(0); - axis = axis < 0 ? axis + input.dims() : axis; - OP_REQUIRES(context, 0 <= axis && axis < input.dims(), - errors::InvalidArgument("axis has to be between [0, ", - input.dims(), ")")); - if (axis > 0) { - for (int64 i = 0; i < axis; i++) { - new_sizes[0] *= input.dim_size(i); - } - } - new_sizes[1] = input.dim_size(axis); - if (axis + 1 < input.dims()) { - for (int64 i = axis + 1; i < input.dims(); i++) { - new_sizes[2] *= input.dim_size(i); - } - } - } - } - - auto Tin = input.shaped(new_sizes); + auto Tin = input.vec(); + const int64 N = static_cast(Tin.size()); Tensor* idx = nullptr; - OP_REQUIRES_OK(context, context->allocate_output( - 1, TensorShape({Tin.dimension(1)}), &idx)); + OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( + {0}, 1, input.shape(), &idx)); auto idx_vec = idx->template vec(); - auto hash_fn = [&Tin](const int64& key) -> unsigned long { - size_t h = 0; - for (int64 i = 0; i < Tin.dimension(0); i++) { - for (int64 j = 0; j < Tin.dimension(2); j++) { - h = Hash64Combine(h, hash{}(Tin(i, key, j))); - } - } - return h; - }; - - auto equal_to_fn = [&Tin](const int64& lhs, const int64& rhs) { - for (int64 i = 0; i < Tin.dimension(0); i++) { - for (int64 j = 0; j < Tin.dimension(2); j++) { - if (Tin(i, lhs, j) != Tin(i, rhs, j)) { - return false; - } - } - } - return true; - }; - - std::unordered_map - uniq(0, hash_fn, equal_to_fn); - - uniq.reserve(2 * Tin.dimension(1)); - - for (int64 i = 0, j = 0; i < Tin.dimension(1); ++i) { - auto it = uniq.insert(std::make_pair(i, j)); + std::unordered_map uniq; + uniq.reserve(2 * N); + for (int64 i = 0, j = 0; i < N; ++i) { + auto it = uniq.insert(std::make_pair(Tin(i), j)); idx_vec(i) = it.first->second; if (it.second) { ++j; } } - int64 uniq_size = static_cast(uniq.size()); - new_sizes[1] = uniq_size; - TensorShape output_shape(input.shape()); - output_shape.set_dim(axis, uniq_size); Tensor* output = nullptr; - OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output)); - auto Tout = output->shaped(new_sizes); + OP_REQUIRES_OK(context, context->allocate_output( + 0, TensorShape({uniq_size}), &output)); + auto output_vec = output->template vec(); for (auto it : uniq) { - for (int64 i = 0; i < Tin.dimension(0); i++) { - for (int64 j = 0; j < Tin.dimension(2); j++) { - Tout(i, it.second, j) = Tin(i, it.first, j); - } - } + output_vec(it.second) = it.first; } if (num_outputs() > 2) { @@ -145,7 +74,7 @@ class UniqueOp : public OpKernel { 2, TensorShape({uniq_size}), &output)); auto count_output_vec = output->template vec(); count_output_vec.setZero(); - for (int64 i = 0; i < Tin.dimension(1); ++i) { + for (int64 i = 0; i < N; ++i) { count_output_vec(idx_vec(i))++; } } @@ -163,16 +92,6 @@ class UniqueOp : public OpKernel { .TypeConstraint("T") \ .TypeConstraint("out_idx"), \ UniqueOp); \ - REGISTER_KERNEL_BUILDER(Name("UniqueV2") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T") \ - .TypeConstraint("out_idx"), \ - UniqueOp); \ - REGISTER_KERNEL_BUILDER(Name("UniqueV2") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T") \ - .TypeConstraint("out_idx"), \ - UniqueOp); \ REGISTER_KERNEL_BUILDER(Name("UniqueWithCounts") \ .Device(DEVICE_CPU) \ .TypeConstraint("T") \ @@ -257,5 +176,5 @@ REGISTER_KERNEL_BUILDER(Name("Unique") .HostMemory("y") .HostMemory("idx"), UniqueOp); -#endif // TENSORFLOW_USE_SYCL +#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 9fa6423d59..be2916f154 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -723,9 +723,7 @@ y: a tensor of the same shape and type as x but filled with zeros. REGISTER_OP("OnesLike") .Input("x: T") .Output("y: T") - .Attr( - "T: {float, double, int8, uint8, int16, uint16, int32, int64, " - "complex64, complex128, bool}") + .Attr("T: {float, double, int32, int64, complex64, complex128}") .SetShapeFn(shape_inference::UnchangedShape) .Doc(R"doc( Returns a tensor of ones with the same shape and type as x. @@ -2033,46 +2031,6 @@ y: 1-D. idx: 1-D. )doc"); -REGISTER_OP("UniqueV2") - .Input("x: T") - .Input("axis: int64") - .Output("y: T") - .Output("idx: out_idx") - .Attr("T: type") - .Attr("out_idx: {int32, int64} = DT_INT32") - .SetShapeFn([](InferenceContext* c) { - c->set_output(0, c->Vector(InferenceContext::kUnknownDim)); - c->set_output(1, c->input(0)); - return Status::OK(); - }) - .Doc(R"doc( -Finds unique elements in a 1-D tensor. - -This operation returns a tensor `y` containing all of the unique elements of `x` -sorted in the same order that they occur in `x`. This operation also returns a -tensor `idx` the same size as `x` that contains the index of each value of `x` -in the unique output `y`. In other words: - -`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` - -For example: - -``` -# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] -y, idx = unique(x) -y ==> [1, 2, 4, 7, 8] -idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] -``` - - -x: A `Tensor`. -axis: A `Tensor` of type `int64` (default: 0). The axis of the Tensor to - find the unique elements. -y: A `Tensor`. Unique elements along the `axis` of `Tensor` x. -idx: A 1-D Tensor. Has the same type as x that contains the index of each - value of x in the output y. -)doc"); - // -------------------------------------------------------------------------- REGISTER_OP("UniqueWithCounts") .Input("x: T") diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index d30b847696..7b10af9f44 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1829,8 +1829,6 @@ need not be sorted and need not cover all values in the full range of valid values. If the sum is empty for a given segment ID `i`, `output[i] = 0`. -If the given segment ID `i` is negative, the value is dropped and will not be -added to the sum of the segment. `num_segments` should equal the number of distinct segment IDs. diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index a242a13878..e245c8ba91 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -819,7 +819,7 @@ REGISTER_OP("DepthwiseConv2dNative") .Input("input: T") .Input("filter: T") .Output("output: T") - .Attr("T: {half, float, double}") + .Attr("T: {float, double}") .Attr("strides: list(int)") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) @@ -945,7 +945,7 @@ REGISTER_OP("Conv3D") .Input("input: T") .Input("filter: T") .Output("output: T") - .Attr("T: {half, float, double}") + .Attr("T: {float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) @@ -977,7 +977,7 @@ REGISTER_OP("Conv3DBackpropInput") .Input("filter: T") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {half, float, double}") + .Attr("T: {float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Deprecated(10, "Use Conv3DBackpropInputV2") @@ -1003,7 +1003,7 @@ REGISTER_OP("Conv3DBackpropFilter") .Input("filter: T") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {half, float, double}") + .Attr("T: {float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Deprecated(10, "Use Conv3DBackpropFilterV2") @@ -1032,7 +1032,7 @@ REGISTER_OP("Conv3DBackpropInputV2") .Input("filter: T") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {half, float, double}") + .Attr("T: {float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) @@ -1069,7 +1069,7 @@ REGISTER_OP("Conv3DBackpropFilterV2") .Input("filter_sizes: int32") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {half, float, double}") + .Attr("T: {float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 9c41957ae6..6ce0b70c9d 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -5449,7 +5449,6 @@ op { type: "type" allowed_values { list { - type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } @@ -5516,7 +5515,6 @@ op { type: "type" allowed_values { list { - type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } @@ -5572,7 +5570,6 @@ op { type: "type" allowed_values { list { - type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } @@ -5638,7 +5635,6 @@ op { type: "type" allowed_values { list { - type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } @@ -5694,7 +5690,6 @@ op { type: "type" allowed_values { list { - type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD index f2fadb4558..f746b15fee 100644 --- a/tensorflow/core/platform/default/build_config/BUILD +++ b/tensorflow/core/platform/default/build_config/BUILD @@ -12,7 +12,6 @@ load("//tensorflow:tensorflow.bzl", "tf_copts") load("//tensorflow:tensorflow.bzl", "tf_cuda_library") load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static") load("@local_config_sycl//sycl:platform.bzl", "sycl_library_path") -load("@local_config_sycl//sycl:build_defs.bzl", "if_ccpp") cc_library( name = "gtest", @@ -195,16 +194,17 @@ cc_library( cc_library( name = "sycl", - data = if_ccpp([ + data = [ "@local_config_sycl//sycl:{}".format(sycl_library_path("ComputeCpp")), - ]), - linkopts = if_ccpp([ - "-Wl,-rpath,../local_config_sycl/sycl/lib", - ]), - deps = if_ccpp( - ["@local_config_sycl//sycl:syclrt"], - ["@local_config_sycl//sycl:sycl_headers"], - ), + ], + linkopts = select({ + "//conditions:default": [ + "-Wl,-rpath,../local_config_sycl/sycl/lib", + ], + }), + deps = [ + "@local_config_sycl//sycl:syclrt", + ], ) filegroup( diff --git a/tensorflow/core/platform/default/notification.h b/tensorflow/core/platform/default/notification.h index 5c401b7477..6a214dbd0a 100644 --- a/tensorflow/core/platform/default/notification.h +++ b/tensorflow/core/platform/default/notification.h @@ -73,7 +73,7 @@ class Notification { } mutex mu_; // protects mutations of notified_ - condition_variable cv_; // signaled when notified_ becomes non-zero + condition_variable cv_; // signalled when notified_ becomes non-zero std::atomic notified_; // mutations under mu_ }; diff --git a/tensorflow/core/platform/posix/error.cc b/tensorflow/core/platform/posix/error.cc index f8b0285c50..e9baad5422 100644 --- a/tensorflow/core/platform/posix/error.cc +++ b/tensorflow/core/platform/posix/error.cc @@ -72,7 +72,7 @@ error::Code ErrnoToCode(int err_number) { case EBUSY: // Device or resource busy case ECHILD: // No child processes case EISCONN: // Socket is connected -#if !defined(_WIN32) && !defined(__HAIKU__) +#if !defined(_WIN32) case ENOTBLK: // Block device required #endif case ENOTCONN: // The socket is not connected @@ -94,7 +94,7 @@ error::Code ErrnoToCode(int err_number) { case ENODATA: // No message is available on the STREAM read queue case ENOMEM: // Not enough space case ENOSR: // No STREAM resources -#if !defined(_WIN32) && !defined(__HAIKU__) +#if !defined(_WIN32) case EUSERS: // Too many users #endif code = error::RESOURCE_EXHAUSTED; @@ -111,7 +111,7 @@ error::Code ErrnoToCode(int err_number) { case EPFNOSUPPORT: // Protocol family not supported #endif case EPROTONOSUPPORT: // Protocol not supported -#if !defined(_WIN32) && !defined(__HAIKU__) +#if !defined(_WIN32) case ESOCKTNOSUPPORT: // Socket type not supported #endif case EXDEV: // Improper link @@ -131,8 +131,7 @@ error::Code ErrnoToCode(int err_number) { case ENETUNREACH: // Network unreachable case ENOLCK: // No locks available case ENOLINK: // Link has been severed -#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32) \ - || defined(__HAIKU__)) +#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32)) case ENONET: // Machine is not on the network #endif code = error::UNAVAILABLE; @@ -157,7 +156,7 @@ error::Code ErrnoToCode(int err_number) { case ENOEXEC: // Exec format error case ENOMSG: // No message of the desired type case EPROTO: // Protocol error -#if !defined(_WIN32) && !defined(__HAIKU__) +#if !defined(_WIN32) case EREMOTE: // Object is remote #endif code = error::UNKNOWN; diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc index 09f69a95c1..6cba40ccfc 100644 --- a/tensorflow/core/platform/posix/port.cc +++ b/tensorflow/core/platform/posix/port.cc @@ -37,8 +37,7 @@ limitations under the License. #ifdef TF_USE_SNAPPY #include "snappy.h" #endif -#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \ - || defined(__HAIKU__) +#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) #include #endif @@ -62,8 +61,7 @@ int NumSchedulableCPUs() { } perror("sched_getaffinity"); #endif -#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \ - || defined(__HAIKU__) +#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) unsigned int count = std::thread::hardware_concurrency(); if (count > 0) return static_cast(count); #endif diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index ec077c4283..1bf9c93101 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -24,7 +24,7 @@ limitations under the License. // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "" +#define TF_VERSION_SUFFIX "-rc1" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 118ff0d0d6..1bfa4f83a3 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -26,23 +26,18 @@ limitations under the License. #include "mkl_trans.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" + #include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/graph/mkl_graph_util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/util/padding.h" #include "tensorflow/core/util/tensor_format.h" -#include "tensorflow/core/graph/mkl_graph_util.h" #ifdef INTEL_MKL_DNN #include "mkldnn.hpp" - -using mkldnn::memory; -using mkldnn::reorder; -using mkldnn::primitive; -using mkldnn::padding_kind; -using mkldnn::engine; #endif // The file contains a number of utility classes and functions used by MKL @@ -56,8 +51,6 @@ namespace tensorflow { // Tensorflow tensor. typedef enum { W = 0, H = 1, C = 2, N = 3 } MklDims; -typedef enum { Dim_N = 0, Dim_C = 1, Dim_H = 2, Dim_W = 3, - Dim_O = 0, Dim_I = 1 } MklDnnDims; class MklShape { public: @@ -150,9 +143,7 @@ class MklShape { size_t GetDimension() const { return dimension_; } const size_t* GetSizes() const { return sizes_; } int64 dim_size(int index) const { return sizes_[index]; } - int64 tf_dim_size(int index) const { - return sizes_[tf_to_mkl_dim_map_[index]]; - } + int64 tf_dim_size(int index) const { return sizes_[tf_to_mkl_dim_map_[index]]; } const size_t* GetStrides() const { return strides_; } const size_t* GetTfToMklDimMap() const { return tf_to_mkl_dim_map_; } size_t tf_dim_idx(int index) const { return tf_to_mkl_dim_map_[index]; } @@ -236,8 +227,7 @@ class MklShape { (IS_MKL_TENSOR_OFFSET + sizeof(size_t)) // Location of dimension_ // Location of sizes. Note dim is not used here, left here // to make macros consistent. -#define SIZES_OFFSET(dims) \ - (DIMS_OFFSET + sizeof(size_t)) +#define SIZES_OFFSET(dims) (DIMS_OFFSET + sizeof(size_t)) #define STRIDES_OFFSET(dims) \ (SIZES_OFFSET(dims) + dims * sizeof(size_t)) // Location of strides #define MKL_LAYOUT_OFFSET(dims) \ @@ -319,266 +309,6 @@ class MklShape { nullptr; // TF dimension corresponding to this MKL dimension }; -#ifdef INTEL_MKL_DNN - -// Forward decl -TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format); - -class MklDnnShape { - private: - typedef struct { - /// Flag to indicate if the tensor is an MKL tensor or not - bool is_mkl_tensor_ = false; - /// Number of dimensions in Tensorflow format - size_t dimension_ = 0; - /// Required by MKLDNN for conversions - mkldnn_dims_t sizes_; // Required by MKL for conversions - memory::format tf_data_format_ = memory::format::format_undef; - memory::data_type T_ = memory::data_type::data_undef; - // MKL layout - mkldnn_memory_desc_t mkl_md_; - /// TF dimension corresponding to this MKL dimension - mkldnn_dims_t map_; - } MklShapeData; - MklShapeData data_; - - typedef std::remove_extent::type mkldnn_dim_t; -#define INVALID_DIM_SIZE -1 - - - public: - MklDnnShape() { - for (size_t i = 0; i < sizeof(data_.sizes_) / - sizeof(data_.sizes_[0]); ++i) { - data_.sizes_[i] = -1; - } - for (size_t i = 0; i < sizeof(data_.map_) / - sizeof(data_.map_[0]); ++i) { - data_.map_[i] = -1; - } - } - - ~MklDnnShape() {} - TF_DISALLOW_COPY_AND_ASSIGN(MklDnnShape); // Cannot copy - - inline const bool IsMklTensor() const { return data_.is_mkl_tensor_; } - inline void SetMklTensor(bool is_mkl_tensor) { - data_.is_mkl_tensor_ = is_mkl_tensor; - } - - inline void SetDimensions(const size_t dimension) { - data_.dimension_ = dimension; - } - inline size_t GetDimension(char dimension)const { - int index = GetMklDnnTensorDimIndex(dimension); - CHECK(index >= 0 && index < this->GetDimension()) - << "Invalid index from the dimension: " << index << ", " << dimension; - return this->DimSize(index); - } - - inline int32 GetMklDnnTensorDimIndex(char dimension)const { - switch (dimension) { - case 'N': - return MklDnnDims::Dim_N; - case 'C': - return MklDnnDims::Dim_C; - case 'H': - return MklDnnDims::Dim_H; - case 'W': - return MklDnnDims::Dim_W; - default: - LOG(FATAL) << "Invalid dimension: " << dimension; - return -1; // Avoid compiler warning about missing return value - } - } - - inline size_t GetDimension() const { return data_.dimension_; } - inline const int* GetSizes() const { - return reinterpret_cast(&data_.sizes_[0]); - } - - // Returns an mkldnn::memory::dims object that contains the sizes of this - // MklDnnShape object. - inline memory::dims GetSizesAsMklDnnDims() const { - memory::dims retVal; - if (data_.is_mkl_tensor_) { - int dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]); - for (size_t i = 0 ; i < dimensions; i++) { - if (data_.sizes_[i] != INVALID_DIM_SIZE) - retVal.push_back(data_.sizes_[i]); - } - } else { - CHECK_EQ(data_.is_mkl_tensor_, true); - } - return retVal; - } - - inline int64 DimSize(int index) const { - CHECK_LT(index, sizeof(data_.sizes_)/sizeof(data_.sizes_[0])); - return data_.sizes_[index]; - } - - /// Return TensorShape that describes the Tensorflow shape of the tensor - /// represented by this MklShape. - inline TensorShape GetTfShape() { - CHECK_EQ(data_.is_mkl_tensor_, true); - - std::vector shape(data_.dimension_, -1); - for (size_t idx = 0; idx < data_.dimension_; ++idx) { - shape[idx] = data_.sizes_[TfDimIdx(idx)]; - } - - TensorShape ts; - bool ret = TensorShapeUtils::MakeShape(shape, &ts).ok(); - CHECK_EQ(ret, true); - return ts; - } - - inline void SetElemType(memory::data_type dt) { data_.T_ = dt; } - inline const memory::data_type GetElemType() { return data_.T_; } - - inline void SetMklLayout(memory::primitive_desc* pd) { - CHECK_NOTNULL(pd); - data_.mkl_md_ = pd->desc().data; - } - inline const memory::desc GetMklLayout() const { - return memory::desc(data_.mkl_md_); - } - - inline memory::format GetTfDataFormat() const { - return data_.tf_data_format_; - } - /// We don't create primitive_descriptor for TensorFlow layout now. - /// We use lazy evaluation and create it only when needed. - inline void SetTfLayout(size_t dims, const memory::dims& sizes, - memory::format format) { - CHECK_EQ(dims, sizes.size()); - data_.dimension_ = dims; - for (size_t ii = 0; ii < dims; ii++) { - data_.sizes_[ii] = sizes[ii]; - } - data_.tf_data_format_ = format; - SetTfDimOrder(dims, format); - } - inline const memory::desc GetTfLayout() const { - memory::dims dims; - for (size_t ii = 0; ii < data_.dimension_; ii++) { - dims.push_back(data_.sizes_[ii]); - } - return memory::desc(dims, data_.T_, data_.tf_data_format_); - } - inline const memory::desc GetCurLayout() const { - return IsMklTensor() ? GetMklLayout() : GetTfLayout(); - } - - // nhasabni - I've removed SetTfDimOrder that was setting default order in - // case of MKL-ML. We don't need a case of default dimension order because - // when an operator that does not get data_format attribute gets all inputs - // in Tensorflow format, it will produce output in Tensorflow format. - inline void SetTfDimOrder(const size_t dimension, const mkldnn_dims_t map) { - CHECK(dimension == data_.dimension_); - for (size_t ii = 0; ii < dimension; ii++) { - data_.map_[ii] = map[ii]; - } - } - - inline void SetTfDimOrder(const size_t dimension, TensorFormat data_format) { - // TODO(nhasabni): Why do we restrict this to 4D? - CHECK_EQ(dimension, 4); - CHECK(dimension == data_.dimension_); - data_.map_[GetTensorDimIndex<2>(data_format, 'W')] = MklDnnDims::Dim_W; - data_.map_[GetTensorDimIndex<2>(data_format, 'H')] = MklDnnDims::Dim_H; - data_.map_[GetTensorDimIndex<2>(data_format, 'C')] = MklDnnDims::Dim_C; - data_.map_[GetTensorDimIndex<2>(data_format, 'N')] = MklDnnDims::Dim_N; - } - - inline void SetTfDimOrder(const size_t dimension, memory::format format) { - TensorFormat data_format = MklDnnDataFormatToTFDataFormat(format); - SetTfDimOrder(dimension, data_format); - } - - inline const mkldnn_dim_t* GetTfToMklDimMap() const { - return &data_.map_[0]; - } - inline size_t TfDimIdx(int index) const { return data_.map_[index]; } - inline int64 TfDimSize(int index) const { - return data_.sizes_[TfDimIdx(index)]; - } - - /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd' - /// corresponds to MKL's Channel dimension. - inline bool IsMklChannelDim(int d) const { - return TfDimIdx(d) == MklDnnDims::Dim_C; - } - /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd' - /// corresponds to MKL's Batch dimension. - inline bool IsMklBatchDim(int d) const { - return TfDimIdx(d) == MklDnnDims::Dim_N; - } - /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd' - /// corresponds to MKL's Width dimension. - inline bool IsMklWidthDim(int d) const { - return TfDimIdx(d) == MklDnnDims::Dim_W; - } - /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd' - /// corresponds to MKL's Height dimension. - inline bool IsMklHeightDim(int d) const { - return TfDimIdx(d) == MklDnnDims::Dim_H; - } - - /// Check if the TF-Mkl dimension ordering map specifies if the input - /// tensor is in NCHW format. - inline bool IsTensorInNCHWFormat() const { - TensorFormat data_format = FORMAT_NCHW; - return (IsMklBatchDim(GetTensorDimIndex<2>(data_format, 'N')) && - IsMklChannelDim(GetTensorDimIndex<2>(data_format, 'C')) && - IsMklHeightDim(GetTensorDimIndex<2>(data_format, 'H')) && - IsMklWidthDim(GetTensorDimIndex<2>(data_format, 'W'))); - } - - /// Check if the TF-Mkl dimension ordering map specifies if the input - /// tensor is in NHWC format. - inline bool IsTensorInNHWCFormat() const { - TensorFormat data_format = FORMAT_NHWC; - return (IsMklBatchDim(GetTensorDimIndex<2>(data_format, 'N')) && - IsMklChannelDim(GetTensorDimIndex<2>(data_format, 'C')) && - IsMklHeightDim(GetTensorDimIndex<2>(data_format, 'H')) && - IsMklWidthDim(GetTensorDimIndex<2>(data_format, 'W'))); - } - - /// The following methods are used for serializing and de-serializing the - /// contents of the mklshape object. - /// The data is serialized in this order - /// is_mkl_tensor_ : dimension_ : sizes_ : map_: format_ : T_ : mkl_pd_; - - /// Size of buffer to hold the serialized object, the size is computed by - /// following above mentioned order - inline size_t GetSerializeBufferSize() const { - return sizeof(MklShapeData); - } - - void SerializeMklDnnShape(unsigned char* buf, size_t buf_size) const { - CHECK(buf_size >= GetSerializeBufferSize()) - << "Buffer size is too small to SerializeMklDnnShape"; - *reinterpret_cast(buf) = data_; - } - - void DeSerializeMklDnnShape(const unsigned char* buf, size_t buf_size) { - // Make sure buffer holds at least is_mkl_tensor_. - CHECK(buf_size >= sizeof(data_.is_mkl_tensor_)) - << "Buffer size is too small in DeSerializeMklDnnShape"; - - const bool is_mkl_tensor = *reinterpret_cast(buf); - if (is_mkl_tensor) { // If it is an MKL Tensor then read the rest - CHECK(buf_size >= GetSerializeBufferSize()) - << "Buffer size is too small in DeSerializeMklDnnShape"; - data_ = *reinterpret_cast(buf); - } - } -}; - -#endif - // List of MklShape objects. Used in Concat/Split layers. typedef std::vector MklShapeList; @@ -617,36 +347,6 @@ inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor, return output_tensor; } -#ifdef INTEL_MKL_DNN -template -inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor, - const MklDnnShape& mkl_shape) { - Tensor output_tensor; - TensorShape output_shape; - -#if 0 - // TODO(nhasabni): need to implement - for (size_t j = 0; j < mkl_shape.GetDimension(); j++) { - // Outermost to innermost dimension - output_shape.AddDim(mkl_shape.GetSizes()[mkl_shape.tf_dim_idx(j)]); - } - - // Allocate output tensor. - context->allocate_temp(DataTypeToEnum::v(), output_shape, &output_tensor); - - dnnLayout_t output_layout = static_cast(mkl_shape.GetTfLayout()); - void* input_buffer = const_cast(mkl_tensor.flat().data()); - void* output_buffer = const_cast(output_tensor.flat().data()); - - if (mkl_tensor.NumElements() != 0) { - mkl_shape.GetConvertedFlatData(output_layout, input_buffer, output_buffer); - } -#endif - - return output_tensor; -} -#endif - // Get the MKL shape from the second string tensor inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) { mklshape->DeSerializeMklShape( @@ -659,20 +359,6 @@ inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) { sizeof(uint8)); } -#ifdef INTEL_MKL_DNN -inline void GetMklShape(OpKernelContext* ctext, int n, - MklDnnShape* mklshape) { - mklshape->DeSerializeMklDnnShape( - ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs())) - .flat() - .data(), - ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs())) - .flat() - .size() * - sizeof(uint8)); -} -#endif - // Gets the actual input inline const Tensor& MklGetInput(OpKernelContext* ctext, int n) { return ctext->input(GetTensorDataIndex(n, ctext->num_inputs())); @@ -696,27 +382,6 @@ inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name, } } -#ifdef INTEL_MKL_DNN -/// Get shape of input tensor pointed by 'input_idx' in TensorShape format. -/// If the input tensor is in MKL layout, then obtains TensorShape from -/// MklShape. -inline TensorShape GetTfShape(OpKernelContext* context, - size_t input_idx) { - // Sanity check. - CHECK_NOTNULL(context); - CHECK_LT(input_idx, context->num_inputs()); - - MklDnnShape input_mkl_shape; - GetMklShape(context, input_idx, &input_mkl_shape); - if (input_mkl_shape.IsMklTensor()) { - return input_mkl_shape.GetTfShape(); - } else { - const Tensor& t = MklGetInput(context, input_idx); - return t.shape(); - } -} -#endif - // Allocate the second output tensor that will contain // the MKL shape serialized inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, @@ -732,23 +397,6 @@ inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, second_tensor->flat().size() * sizeof(uint8)); } -#ifdef INTEL_MKL_DNN -// Allocate the second output tensor that will contain -// the MKL shape serialized -inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, - const MklDnnShape& mkl_shape) { - Tensor* second_tensor = nullptr; - TensorShape second_shape; - second_shape.AddDim(mkl_shape.GetSerializeBufferSize()); - OP_REQUIRES_OK(ctext, ctext->allocate_output( - GetTensorMetaDataIndex(n, ctext->num_outputs()), - second_shape, &second_tensor)); - mkl_shape.SerializeMklDnnShape( - second_tensor->flat().data(), - second_tensor->flat().size() * sizeof(uint8)); -} -#endif - // Allocate the output tensor, create a second output tensor that will contain // the MKL shape serialized inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, @@ -769,43 +417,9 @@ inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, second_tensor->flat().size() * sizeof(uint8)); } -#ifdef INTEL_MKL_DNN -// Allocate the output tensor, create a second output tensor that will contain -// the MKL shape serialized -inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, - Tensor** output, - const TensorShape& tf_shape, - const MklDnnShape& mkl_shape) { - Tensor* second_tensor = nullptr; - TensorShape second_shape; - second_shape.AddDim(mkl_shape.GetSerializeBufferSize()); - OP_REQUIRES_OK( - ctext, ctext->allocate_output(GetTensorDataIndex(n, ctext->num_outputs()), - tf_shape, output)); - OP_REQUIRES_OK(ctext, ctext->allocate_output( - GetTensorMetaDataIndex(n, ctext->num_outputs()), - second_shape, &second_tensor)); - mkl_shape.SerializeMklDnnShape( - second_tensor->flat().data(), - second_tensor->flat().size() * sizeof(uint8)); -} -#endif - // Allocates a temp tensor and returns the data buffer for temporary storage. // Currently -#ifdef INTEL_MKL_DNN -template -inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, - const memory::primitive_desc& pd, void** buf_out) { - TensorShape tf_shape; - - tf_shape.AddDim(pd.get_size() / sizeof(T) + 1); - OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::v(), - tf_shape, tensor_out)); - *buf_out = static_cast(tensor_out->flat().data()); -} -#endif - +// we only support F32, will need to templatize if other types are added inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, dnnLayout_t lt_buff, void** buf_out) { TensorShape tf_shape; @@ -821,7 +435,7 @@ inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, template inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, - TensorShape tf_shape) { + TensorShape tf_shape) { OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::v(), tf_shape, tensor_out)); } @@ -1055,8 +669,6 @@ inline bool MklCompareShapes(const TensorShape* input_shape_0, return true; } -// These functions do not compile with MKL-DNN since mkl.h is missing. -// We may need to remove them later. // TODO(intel_tf): Remove this routine when faster MKL layout conversion is // out. inline void MklNHWCToNCHW(const Tensor& input, Tensor** output) { @@ -1095,11 +707,18 @@ inline void MklNCHWToNHWC(const Tensor& input, Tensor** output) { #ifdef INTEL_MKL_DNN +using mkldnn::engine; +using mkldnn::memory; +using mkldnn::padding_kind; +using mkldnn::primitive; +using mkldnn::reorder; + /// Return MKL-DNN data type (memory::data_type) for input type T /// /// @input None /// @return memory::data_type corresponding to type T -template static memory::data_type MklDnnType(); +template +static memory::data_type MklDnnType(); /// Instantiation for float type. Add similar instantiations for other /// type if needed. @@ -1114,26 +733,15 @@ memory::data_type MklDnnType() { /// @return: memory::format corresponding to TensorFlow data format; /// Fails with an error if invalid data format. inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) { - if (format == FORMAT_NHWC) return memory::format::nhwc; - else if (format == FORMAT_NCHW) return memory::format::nchw; - TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, - "Unsupported data format")); + if (format == FORMAT_NHWC) + return memory::format::nhwc; + else if (format == FORMAT_NCHW) + return memory::format::nchw; + TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format")); // Return to get rid of compiler warning return memory::format::format_undef; } -/// Map MKL-DNN data format to TensorFlow's data format -/// -/// @input: memory::format -/// @return: Tensorflow data format corresponding to memory::format -/// Fails with an error if invalid data format. -inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) { - if (format == memory::format::nhwc) return FORMAT_NHWC; - else if (format == memory::format::nchw) return FORMAT_NCHW; - TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, - "Unsupported data format")); -} - /// Map TensorShape object into memory::dims required by MKL-DNN /// /// This function will simply map input TensorShape into MKL-DNN dims @@ -1145,7 +753,7 @@ inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) { /// @return memory::dims corresponding to TensorShape inline memory::dims TFShapeToMklDnnDims(const TensorShape& shape) { memory::dims dims(shape.dims()); - for (int d = 0; d < shape.dims(); ++d) { + for (unsigned int d = 0; d < shape.dims(); ++d) { dims[d] = shape.dim_size(d); } return dims; @@ -1161,7 +769,7 @@ inline memory::dims TFShapeToMklDnnDims(const TensorShape& shape) { /// @input TensorShape object in shape /// @return memory::dims in MKL-DNN required NCHW format inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape, - TensorFormat format) { + TensorFormat format) { // Check validity of format. CHECK_NE(TFDataFormatToMklDnnDataFormat(format), memory::format::format_undef); @@ -1175,43 +783,6 @@ inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape, return memory::dims({n, c, h, w}); } -/// Map MklDnn memory::dims object into TensorShape object. -/// -/// This function will simply map input shape in MKL-DNN memory::dims format -/// in Tensorflow's TensorShape object by perserving dimension order. -/// -/// @input MKL-DNN memory::dims object -/// @output TensorShape corresponding to memory::dims -inline TensorShape MklDnnDimsToTFShape(const memory::dims& dims) { - std::vector shape(dims.size(), -1); - for (int d = 0; d < dims.size(); d++) { - shape[d] = dims[d]; - } - - TensorShape ret; - CHECK_EQ(TensorShapeUtils::MakeShape(shape, &ret).ok(), true); - return ret; -} - -/// Function to calculate strides given tensor shape in Tensorflow order -/// E.g., if dims_tf_order is {1, 2, 3, 4}, then as per Tensorflow convention, -/// dimesion with size 1 is outermost dimension; while dimension with size 4 is -/// innermost dimension. So strides for this tensor would be {4 * 3 * 2, -/// 4 * 3, 4, 1}, i.e., {24, 12, 4, 1}. -/// -/// @input Tensorflow shape in memory::dims type -/// @return memory::dims containing strides for the tensor. -inline memory::dims CalculateTFStrides(const memory::dims& dims_tf_order) { - CHECK_GT(dims_tf_order.size(), 0); - memory::dims strides(dims_tf_order.size()); - int last_dim_idx = dims_tf_order.size() - 1; - strides[last_dim_idx] = 1; - for (int d = last_dim_idx - 1; d >= 0; d--) { - strides[d] = strides[d + 1] * dims_tf_order[d + 1]; - } - return strides; -} - inline padding_kind TFPaddingToMklDnnPadding(Padding pad) { // MKL-DNN only supports zero padding. return padding_kind::zero; @@ -1237,21 +808,23 @@ class MklDnnData { const engine* cpu_engine_; public: - explicit MklDnnData(const engine* e) : user_memory_(nullptr), - reorder_memory_(nullptr), - op_md_(nullptr), cpu_engine_(e) {} + explicit MklDnnData(const engine* e) + : user_memory_(nullptr), + reorder_memory_(nullptr), + op_md_(nullptr), + cpu_engine_(e) {} ~MklDnnData() { cpu_engine_ = nullptr; // We don't own this. - delete(user_memory_); - delete(reorder_memory_); - delete(op_md_); + delete (user_memory_); + delete (reorder_memory_); + delete (op_md_); } - inline void* GetTensorBuffer(const Tensor* tensor) const { + void* GetTensorBuffer(const Tensor* tensor) { CHECK_NOTNULL(tensor); - return const_cast(static_cast( - tensor->flat().data())); + return const_cast( + static_cast(tensor->flat().data())); } /// Set user memory primitive using specified dimensions, memory format and @@ -1262,83 +835,35 @@ class MklDnnData { /// an operation. E.g., filter of Conv2D is of shape {1, 2, 3, 4}, and /// memory format HWIO, and the buffer that contains actual values is /// pointed by data_buffer. - inline void SetUsrMem(const memory::dims& dim, memory::format fm, - void* data_buffer = nullptr) { - auto md = memory::desc(dim, MklDnnType(), fm); - SetUsrMem(md, data_buffer); + void SetUsrMem(memory::dims dim, memory::format fm, void* data_buffer) { + CHECK_NOTNULL(data_buffer); + CHECK_NOTNULL(cpu_engine_); + // TODO(nhasabni): can we remove dynamic memory allocation? + user_memory_ = + new memory(memory::primitive_desc( + memory::desc(dim, MklDnnType(), fm), *cpu_engine_), + data_buffer); } - inline void SetUsrMem(const memory::dims& dim, memory::format fm, - const Tensor* tensor) { + void SetUsrMem(memory::dims dim, memory::format fm, const Tensor* tensor) { CHECK_NOTNULL(tensor); SetUsrMem(dim, fm, GetTensorBuffer(tensor)); } - /// Helper function to create memory descriptor in Blocked format - /// - /// @input: Tensor dimensions - /// @input: strides corresponding to dimensions. One can use utility - /// function such as CalculateTFStrides to compute strides - /// for given dimensions. - /// @return: memory::desc object corresponding to blocked memory format - /// for given dimensions and strides. - static inline memory::desc CreateBlockedMemDesc(const memory::dims& dim, - const memory::dims& strides) { - CHECK_EQ(dim.size(), strides.size()); - - // We have to construct memory descriptor in a C style. This is not at all - // ideal but MKLDNN does not offer any API to construct descriptor in - // blocked format except a copy constructor that accepts - // mkldnn_memory_desc_t. - mkldnn_memory_desc_t md; - md.primitive_kind = mkldnn_memory; - md.ndims = dim.size(); - md.format = mkldnn_blocked; - md.data_type = memory::convert_to_c(MklDnnType()); - - for (size_t i = 0; i < dim.size(); i++) { - md.layout_desc.blocking.block_dims[i] = 1; - md.layout_desc.blocking.strides[1][i] = 1; - md.layout_desc.blocking.strides[0][i] = strides[i]; - md.layout_desc.blocking.padding_dims[i] = dim[i]; - md.layout_desc.blocking.offset_padding_to_data[i] = 0; - md.dims[i] = dim[i]; - } - md.layout_desc.blocking.offset_padding = 0; - - return memory::desc(md); - } - - /// A version of SetUsrMem call that allows user to create memory in blocked - /// format. So in addition to accepting dimensions, it also accepts strides. - /// This allows user to create memory for tensor in a format that is not - /// supported by MKLDNN. E.g., MKLDNN does not support tensor format for 6 - /// dimensional tensor as a native format. But by using blocked format, a user - /// can create memory for 6D tensor. - inline void SetUsrMem(const memory::dims& dim, const memory::dims& strides, - void* data_buffer = nullptr) { - CHECK_EQ(dim.size(), strides.size()); - auto blocked_md = MklDnnData::CreateBlockedMemDesc(dim, strides); - SetUsrMem(blocked_md, data_buffer); - } - - inline void SetUsrMem(const memory::dims& dim, const memory::dims& strides, - const Tensor* tensor) { - CHECK_NOTNULL(tensor); - SetUsrMem(dim, strides, GetTensorBuffer(tensor)); - } - /// A version of function to set user memory primitive that accepts memory /// descriptor directly, instead of accepting dimensions and format. This /// function is more generic that the one above, but the function above is /// sufficient in most cases. - inline void SetUsrMem(const memory::desc& md, void* data_buffer = nullptr) { - auto pd = memory::primitive_desc(md, *cpu_engine_); - SetUsrMem(pd, data_buffer); + void SetUsrMem(memory::desc md, void* data_buffer) { + CHECK_NOTNULL(data_buffer); + CHECK_NOTNULL(cpu_engine_); + // TODO(nhasabni): can we remove dynamic memory allocation? + user_memory_ = + new memory(memory::primitive_desc(md, *cpu_engine_), data_buffer); } /// A version of SetUsrMem with memory descriptor and tensor - inline void SetUsrMem(const memory::desc& md, const Tensor* tensor) { + void SetUsrMem(memory::desc md, const Tensor* tensor) { CHECK_NOTNULL(tensor); SetUsrMem(md, GetTensorBuffer(tensor)); } @@ -1347,60 +872,41 @@ class MklDnnData { /// descriptor directly, instead of accepting dimensions and format. This /// function is more generic that the one above, but the function above is /// sufficient in most cases. - inline void SetUsrMem(const memory::primitive_desc& pd, - void* data_buffer = nullptr) { + void SetUsrMem(memory::primitive_desc pd, void* data_buffer) { + CHECK_NOTNULL(data_buffer); CHECK_NOTNULL(cpu_engine_); // TODO(nhasabni): can we remove dynamic memory allocation? - if (data_buffer) { - user_memory_ = new memory(pd, data_buffer); - } else { - user_memory_ = new memory(pd); - } + user_memory_ = new memory(pd, data_buffer); } /// A version of SetUsrMem with primitive descriptor and tensor - inline void SetUsrMem(const memory::primitive_desc& pd, - const Tensor* tensor) { + void SetUsrMem(memory::primitive_desc pd, const Tensor* tensor) { CHECK_NOTNULL(tensor); SetUsrMem(pd, GetTensorBuffer(tensor)); } /// Get function for user memory primitive. - inline const memory* GetUsrMem() const { return user_memory_; } + const memory* GetUsrMem() const { return user_memory_; } /// Get function for primitive descriptor of user memory primitive. - inline const memory::primitive_desc GetUsrMemPrimDesc() const { + const memory::primitive_desc GetUsrMemPrimDesc() const { CHECK_NOTNULL(user_memory_); return user_memory_->get_primitive_desc(); } /// Get function for descriptor of user memory. - inline memory::desc GetUsrMemDesc() { + memory::desc GetUsrMemDesc() { // This is ugly. Why MKL-DNN does not provide desc() method of const type?? const memory::primitive_desc pd = GetUsrMemPrimDesc(); return const_cast(&pd)->desc(); } /// Get function for data buffer of user memory primitive. - inline void* GetUsrMemDataHandle() const { + void* GetUsrMemDataHandle() const { CHECK_NOTNULL(user_memory_); return user_memory_->get_data_handle(); } - /// Set function for data buffer of user memory primitive. - inline void* SetUsrMemDataHandle(void* data_buffer) { - CHECK_NOTNULL(user_memory_); - CHECK_NOTNULL(data_buffer); - return user_memory_->set_data_handle(data_buffer); - } - - /// Set function for data buffer of user memory primitive. - inline void SetUsrMemDataHandle(const Tensor* tensor) { - CHECK_NOTNULL(user_memory_); - CHECK_NOTNULL(tensor); - user_memory_->set_data_handle(GetTensorBuffer(tensor)); - } - /// Get the memory primitive for input and output of an op. If inputs /// to an op require reorders, then this function returns memory primitive /// for reorder. Otherwise, it will return memory primitive for user memory. @@ -1409,7 +915,7 @@ class MklDnnData { /// execute Conv2D, we need memory primitive for I and F. Buf if reorder is /// required for I and F (say I_r is reorder primitive for I; F_r is reorder /// primitive for F), then we need I_r and F_r to perform Conv2D. - inline const memory& GetOpMem() const { + const memory& GetOpMem() const { return reorder_memory_ ? *reorder_memory_ : *user_memory_; } @@ -1417,32 +923,13 @@ class MklDnnData { /// format. E.g., For Conv2D, the dimensions would be same as user dimensions /// but memory::format would be mkldnn::any because we want MKL-DNN to choose /// best layout/format for given input dimensions. - inline void SetOpMemDesc(const memory::dims& dim, memory::format fm) { + void SetOpMemDesc(const memory::dims& dim, memory::format fm) { // TODO(nhasabni): can we remove dynamic memory allocation? op_md_ = new memory::desc(dim, MklDnnType(), fm); } /// Get function for memory descriptor for an operation - inline const memory::desc& GetOpMemDesc() const { return *op_md_; } - - /// Predicate that checks if we need to reorder user's memory into memory - /// pointed by op_pd. - /// - /// @input: op_pd - memory primitive descriptor of the given input of an - /// operation - /// @return: true in case reorder of input is needed; false, otherwise. - inline bool IsReorderNeeded(const memory::primitive_desc& op_pd) const { - CHECK_NOTNULL(user_memory_); - return op_pd != user_memory_->get_primitive_desc(); - } - - /// Function to create a reorder from memory pointed by from to memory pointed - /// by to. Returns created primitive. - inline primitive CreateReorder(const memory* from, const memory* to) const { - CHECK_NOTNULL(from); - CHECK_NOTNULL(to); - return reorder(*from, *to); - } + const memory::desc& GetOpMemDesc() const { return *op_md_; } /// Function to handle input reordering /// @@ -1458,62 +945,19 @@ class MklDnnData { /// operation /// @input: net - net to which to add reorder primitive in case it is needed. /// @return: true in case reorder of input is needed; false, otherwise. - inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd, - std::vector* net) { + bool CheckReorderToOpMem(const memory::primitive_desc& op_pd, + std::vector* net) { CHECK_NOTNULL(net); CHECK_NOTNULL(user_memory_); - if (IsReorderNeeded(op_pd)) { + if (op_pd != user_memory_->get_primitive_desc()) { // TODO(nhasabni): can we remove dynamic memory allocation? reorder_memory_ = new memory(op_pd); - net->push_back(CreateReorder(user_memory_, reorder_memory_)); - return true; - } - return false; - } - - /// Overloaded version of above function that accepts memory buffer - /// where output of reorder needs to be stored. - /// - /// @input: op_pd - memory primitive descriptor of the given input of an - /// operation - /// @reorder_data_handle - memory buffer where output of reorder needs to be - /// stored. Primitive does not check if buffer is - /// enough size to write. - /// @input: net - net to which to add reorder primitive in case it is needed. - /// @return: true in case reorder of input is needed; false, otherwise. - inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd, - void* reorder_data_handle, - std::vector* net) { - CHECK_NOTNULL(net); - CHECK_NOTNULL(reorder_data_handle); - CHECK_NOTNULL(user_memory_); - if (IsReorderNeeded(op_pd)) { - // TODO(nhasabni): can we remove dynamic memory allocation? - reorder_memory_ = new memory(op_pd, reorder_data_handle); - net->push_back(CreateReorder(user_memory_, reorder_memory_)); + net->push_back(reorder(*user_memory_, *reorder_memory_)); return true; } return false; } - /// Another overloaded version of CheckReorderToOpMem that accepts Tensor - /// where output of reorder needs to be stored. - /// - /// @input: op_pd - memory primitive descriptor of the given input of an - /// operation - /// @reorder_tensor - Tensor whose buffer is to be used to store output of - /// reorder. Primitive does not check if buffer is - /// enough size to write. - /// @input: net - net to which to add reorder primitive in case it is needed. - /// @return: true in case reorder of input is needed; false, otherwise. - inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd, - Tensor* reorder_tensor, - std::vector* net) { - CHECK_NOTNULL(net); - CHECK_NOTNULL(reorder_tensor); - return CheckReorderToOpMem(op_pd, GetTensorBuffer(reorder_tensor), net); - } - /// Function to handle output reorder /// /// This function performs very similar functionality as input reordering @@ -1526,10 +970,9 @@ class MklDnnData { /// /// @input memory primitive descriptor for the given output of an operation /// @return: true in case reorder of output is needed; false, otherwise. - inline bool PrepareReorderToUserMemIfReq( - const memory::primitive_desc& op_pd) { + bool PrepareReorderToUserMemIfReq(const memory::primitive_desc& op_pd) { CHECK_NOTNULL(user_memory_); - if (IsReorderNeeded(op_pd)) { + if (op_pd != user_memory_->get_primitive_desc()) { // TODO(nhasabni): can we remove dynamic memory allocation? reorder_memory_ = new memory(op_pd); return true; @@ -1544,11 +987,11 @@ class MklDnnData { /// to the user-specified output buffer. /// /// @input: net - net to which to add reorder primitive - inline void InsertReorderToUserMem(std::vector* net) { + void InsertReorderToUserMem(std::vector* net) { CHECK_NOTNULL(net); CHECK_NOTNULL(user_memory_); CHECK_NOTNULL(reorder_memory_); - net->push_back(CreateReorder(reorder_memory_, user_memory_)); + net->push_back(reorder(*reorder_memory_, *user_memory_)); } }; diff --git a/tensorflow/core/util/mkl_util_test.cc b/tensorflow/core/util/mkl_util_test.cc deleted file mode 100644 index 6aef3d86e9..0000000000 --- a/tensorflow/core/util/mkl_util_test.cc +++ /dev/null @@ -1,92 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifdef INTEL_MKL - -#include "tensorflow/core/util/mkl_util.h" - -#include "tensorflow/core/platform/test.h" - -namespace tensorflow { -namespace { - -#ifdef INTEL_MKL_DNN - -TEST(MklUtilTest, MklDnnTfShape) { - auto cpu_engine = engine(engine::cpu, 0); - MklDnnData a(&cpu_engine); - - const int N = 1, C = 2, H = 3, W = 4; - memory::dims a_dims = {N, C, H, W}; - MklDnnShape a_mkldnn_shape; - a_mkldnn_shape.SetMklTensor(true); - // Create TF layout in NCHW. - a_mkldnn_shape.SetTfLayout(a_dims.size(), a_dims, memory::format::nchw); - TensorShape a_tf_shape_nchw({N, C, H, W}); - TensorShape a_tf_shape_nhwc({N, H, W, C}); - TensorShape a_mkldnn_tf_shape = a_mkldnn_shape.GetTfShape(); - // Check that returned shape is in NCHW format. - EXPECT_EQ(a_tf_shape_nchw, a_mkldnn_tf_shape); - EXPECT_NE(a_tf_shape_nhwc, a_mkldnn_tf_shape); - - memory::dims b_dims = {N, C, H, W}; - MklDnnShape b_mkldnn_shape; - b_mkldnn_shape.SetMklTensor(true); - // Create TF layout in NHWC. - b_mkldnn_shape.SetTfLayout(b_dims.size(), b_dims, memory::format::nhwc); - TensorShape b_tf_shape_nhwc({N, H, W, C}); - TensorShape b_tf_shape_nchw({N, C, H, W}); - TensorShape b_mkldnn_tf_shape = b_mkldnn_shape.GetTfShape(); - // Check that returned shape is in NHWC format. - EXPECT_EQ(b_tf_shape_nhwc, b_mkldnn_tf_shape); - EXPECT_NE(b_tf_shape_nchw, b_mkldnn_tf_shape); -} - - -TEST(MklUtilTest, MklDnnBlockedFormatTest) { - // Let's create 2D tensor of shape {3, 4} with 3 being innermost dimension - // first (case 1) and then it being outermost dimension (case 2). - auto cpu_engine = engine(engine::cpu, 0); - - // Setting for case 1 - MklDnnData a(&cpu_engine); - memory::dims dim1 = {3, 4}; - memory::dims strides1 = {1, 3}; - a.SetUsrMem(dim1, strides1); - - memory::desc a_md1 = a.GetUsrMemDesc(); - EXPECT_EQ(a_md1.data.ndims, 2); - EXPECT_EQ(a_md1.data.dims[0], 3); - EXPECT_EQ(a_md1.data.dims[1], 4); - EXPECT_EQ(a_md1.data.format, mkldnn_blocked); - - // Setting for case 2 - MklDnnData b(&cpu_engine); - memory::dims dim2 = {3, 4}; - memory::dims strides2 = {4, 1}; - b.SetUsrMem(dim2, strides2); - - memory::desc b_md2 = b.GetUsrMemDesc(); - EXPECT_EQ(b_md2.data.ndims, 2); - EXPECT_EQ(b_md2.data.dims[0], 3); - EXPECT_EQ(b_md2.data.dims[1], 4); - EXPECT_EQ(b_md2.data.format, mkldnn_blocked); -} - -#endif // INTEL_MKL_DNN -} // namespace -} // namespace tensorflow - -#endif // INTEL_MKL diff --git a/tensorflow/docs_src/api_guides/python/threading_and_queues.md b/tensorflow/docs_src/api_guides/python/threading_and_queues.md index 8ad4c4c075..ab95ce0af9 100644 --- a/tensorflow/docs_src/api_guides/python/threading_and_queues.md +++ b/tensorflow/docs_src/api_guides/python/threading_and_queues.md @@ -3,7 +3,7 @@ Note: In versions of TensorFlow before 1.2, we recommended using multi-threaded, queue-based input pipelines for performance. Beginning with TensorFlow 1.4, however, we recommend using the `tf.data` module instead. (See -@{$datasets$Datasets} for details. In TensorFlow 1.2 and 1.3, the module was +[Datasets](datasets) for details. In TensorFlow 1.2 and 1.3, the module was called `tf.contrib.data`.) The `tf.data` module offers an easier-to-use interface for constructing efficient input pipelines. Furthermore, we've stopped developing the old multi-threaded, queue-based input pipelines. We've retained diff --git a/tensorflow/docs_src/get_started/get_started.md b/tensorflow/docs_src/get_started/get_started.md index be14ab4026..8409962744 100644 --- a/tensorflow/docs_src/get_started/get_started.md +++ b/tensorflow/docs_src/get_started/get_started.md @@ -272,7 +272,7 @@ train = optimizer.minimize(loss) ``` ```python -sess.run(init) # reset variables to incorrect defaults. +sess.run(init) # reset values to incorrect defaults. for i in range(1000): sess.run(train, {x: [1, 2, 3, 4], y: [0, -1, -2, -3]}) @@ -317,7 +317,7 @@ y_train = [0, -1, -2, -3] # training loop init = tf.global_variables_initializer() sess = tf.Session() -sess.run(init) # initialize variables with incorrect defaults. +sess.run(init) # reset values to wrong for i in range(1000): sess.run(train, {x: x_train, y: y_train}) @@ -383,7 +383,7 @@ train_input_fn = tf.estimator.inputs.numpy_input_fn( eval_input_fn = tf.estimator.inputs.numpy_input_fn( {"x": x_eval}, y_eval, batch_size=4, num_epochs=1000, shuffle=False) -# We can invoke 1000 training steps by invoking the method and passing the +# We can invoke 1000 training steps by invoking the method and passing the # training data set. estimator.train(input_fn=input_fn, steps=1000) diff --git a/tensorflow/docs_src/get_started/input_fn.md b/tensorflow/docs_src/get_started/input_fn.md index 0db5c6143a..9d3af5d96a 100644 --- a/tensorflow/docs_src/get_started/input_fn.md +++ b/tensorflow/docs_src/get_started/input_fn.md @@ -191,7 +191,7 @@ import pandas as pd def get_input_fn_from_pandas(data_set, num_epochs=None, shuffle=True): return tf.estimator.inputs.pandas_input_fn( - x=pd.DataFrame(...), + x=pdDataFrame(...), y=pd.Series(...), num_epochs=num_epochs, shuffle=shuffle) @@ -267,8 +267,8 @@ tf.logging.set_verbosity(tf.logging.INFO) Define the column names for the data set in `COLUMNS`. To distinguish features from the label, also define `FEATURES` and `LABEL`. Then read the three CSVs -([train](http://download.tensorflow.org/data/boston_train.csv), -[test](http://download.tensorflow.org/data/boston_test.csv), and +(@{tf.train}, +@{tf.test}, and [predict](http://download.tensorflow.org/data/boston_predict.csv)) into _pandas_ `DataFrame`s: diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index df622c6ac5..3a153e8114 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.4.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.4.0-rc1.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 8b3da49a0d..df43255896 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.4.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.4.0-rc1.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index 6eb8158249..f7f2c3cdc7 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.4.0 + 1.4.0-rc1 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.4.0 + 1.4.0-rc1 @@ -124,7 +124,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0-rc1.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -143,7 +143,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.4.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.4.0-rc1.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -151,10 +151,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0-rc1.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.4.0.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.4.0-rc1.zip). 3. Extract this .zip file. @@ -202,7 +202,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
    javac -cp libtensorflow-1.4.0.jar HelloTF.java
    +
    javac -cp libtensorflow-1.4.0-rc1.jar HelloTF.java
    ### Running @@ -216,11 +216,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
    java -cp libtensorflow-1.4.0.jar:. -Djava.library.path=./jni HelloTF
    +
    java -cp libtensorflow-1.4.0-rc1.jar:. -Djava.library.path=./jni HelloTF
    And the following command line executes the `HelloTF` program on Windows: -
    java -cp libtensorflow-1.4.0.jar;. -Djava.library.path=jni HelloTF
    +
    java -cp libtensorflow-1.4.0-rc1.jar;. -Djava.library.path=jni HelloTF
    If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index f7380bac8a..414ab7b1f7 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -188,7 +188,7 @@ Take the following steps to install TensorFlow with Virtualenv: Virtualenv environment:
    (tensorflow)$ pip3 install --upgrade \
    -     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl
    + https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl If you encounter installation problems, see [Common Installation Problems](#common_installation_problems). @@ -293,7 +293,7 @@ take the following steps:
          $ sudo pip3 install --upgrade \
    -     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl
    +     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
          
    If this step fails, see @@ -480,7 +480,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
          (tensorflow)$ pip install --ignore-installed --upgrade \
    -     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl
    + https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl @@ -648,14 +648,14 @@ This section documents the relevant values for Linux installations. CPU only:
    -https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp27-none-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp27-none-linux_x86_64.whl
     
    GPU support:
    -https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp27-none-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp27-none-linux_x86_64.whl
     
    Note that GPU support requires the NVIDIA hardware and software described in @@ -667,14 +667,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
    -https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
     
    GPU support:
    -https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp34-cp34m-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
     
    Note that GPU support requires the NVIDIA hardware and software described in @@ -686,14 +686,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
    -https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp35-cp35m-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp35-cp35m-linux_x86_64.whl
     
    GPU support:
    -https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp35-cp35m-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp35-cp35m-linux_x86_64.whl
     
    @@ -705,14 +705,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
    -https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp36-cp36m-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp36-cp36m-linux_x86_64.whl
     
    GPU support:
    -https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp36-cp36m-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp36-cp36m-linux_x86_64.whl
     
    diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 79b383817b..9a95710bfa 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -114,7 +114,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
     $ pip3 install --upgrade \
    -     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl
    + https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -235,7 +235,7 @@ take the following steps: issue the following command:
     $ sudo pip3 install --upgrade \
    -     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl 
    + https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -344,7 +344,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
     (tensorflow)$ pip install --ignore-installed --upgrade \
    -     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl
    + https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl @@ -517,7 +517,7 @@ This section documents the relevant values for Mac OS installations.
    -https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl
    +https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl
     
    @@ -525,7 +525,7 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.
    -https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py3-none-any.whl
    +https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py3-none-any.whl
     
    diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index aa4ae6c876..6d0dcdcd4a 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -355,10 +355,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.4.0 on Linux: +for TensorFlow 1.4.0rc1 on Linux:
    -$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.4.0-py2-none-any.whl
    +$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.4.0rc1-py2-none-any.whl
     
    ## Validate your installation @@ -447,10 +447,8 @@ Stack Overflow and specify the `tensorflow` tag. **Linux**
    Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
    tensorflow-1.4.0rc1CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
    tensorflow_gpu-1.4.0rc1GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.368
    tensorflow-1.4.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
    tensorflow_gpu-1.4.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.368
    tensorflow-1.3.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
    tensorflow_gpu-1.3.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.368
    tensorflow-1.2.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
    tensorflow_gpu-1.2.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.35.18
    tensorflow-1.1.0CPU3.5MSVC 2015 update 3Cmake v3.6.3N/AN/A
    - - - - + + @@ -462,8 +460,7 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
    Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
    tensorflow-1.4.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.5.4N/AN/A
    tensorflow_gpu-1.4.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.5.468
    tensorflow-1.3.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.5N/AN/A
    tensorflow_gpu-1.3.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.568
    tensorflow-1.4.0rc1CPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.5N/AN/A
    tensorflow_gpu-1.4.0rc1GPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.568
    tensorflow-1.2.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.5N/AN/A
    tensorflow_gpu-1.2.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.55.18
    tensorflow-1.1.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.2N/AN/A
    - - + @@ -474,10 +471,8 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
    Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
    tensorflow-1.4.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.5.4N/AN/A
    tensorflow-1.3.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.5N/AN/A
    tensorflow-1.4.0rc1CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.5N/AN/A
    tensorflow-1.2.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.5N/AN/A
    tensorflow-1.1.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.2N/AN/A
    tensorflow_gpu-1.1.0GPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.25.18
    - - - - + + diff --git a/tensorflow/docs_src/mobile/prepare_models.md b/tensorflow/docs_src/mobile/prepare_models.md index 8fc65be35a..c5a560e074 100644 --- a/tensorflow/docs_src/mobile/prepare_models.md +++ b/tensorflow/docs_src/mobile/prepare_models.md @@ -296,6 +296,6 @@ complains about missing header files, add the .h’s that are needed into the [`android_extended_ops`](https://www.tensorflow.org/code/tensorflow/core/kernels/BUILD#L3525) target. -If you’re using a makefile targeting iOS, Raspberry Pi, etc, go to +If you’re using a makefile targetting iOS, Raspberry Pi, etc, go to [`tensorflow/contrib/makefile/tf_op_files.txt`](https://www.tensorflow.org/code/tensorflow/contrib/makefile/tf_op_files.txt) and add the right implementation files there. diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md index 25cb72008d..1f856bbf3f 100644 --- a/tensorflow/docs_src/programmers_guide/debugger.md +++ b/tensorflow/docs_src/programmers_guide/debugger.md @@ -9,19 +9,11 @@ lets you view the internal structure and states of running TensorFlow graphs during training and inference, which is difficult to debug with general-purpose debuggers such as Python's `pdb` due to TensorFlow's computation-graph paradigm. -> NOTE: TensorFlow debugger uses a -> [curses](https://en.wikipedia.org/wiki/Curses_\(programming_library\))-based -> text user interface. On Mac OS X, the `ncurses` library is required and can -> be installed with `brew install homebrew/dupes/ncurses`. On Windows, curses -> isn't as well supported, so a -> [readline](https://en.wikipedia.org/wiki/GNU_Readline)-based interface can -> be used with tfdbg by installing `pyreadline` with pip. -> If you use Anaconda3, you can install it with a command +> NOTE: The system requirements of tfdbg on supported external platforms include +> the following. On Mac OS X, the `ncurses` library is required. It can be +> installed with `brew install homebrew/dupes/ncurses`. On Windows, `pyreadline` +> is required. If you use Anaconda3, you can install it with a command > such as `"C:\Program Files\Anaconda3\Scripts\pip.exe" install pyreadline`. -> Unofficial Windows curses packages can be downloaded -> [here](https://www.lfd.uci.edu/~gohlke/pythonlibs/#curses), then subsequently -> installed using `pip install .whl`, however curses on Windows -> may not work as reliably as curses on Linux or Mac. This tutorial demonstrates how to use the **tfdbg** command-line interface (CLI) to debug the appearance of [`nan`s](https://en.wikipedia.org/wiki/NaN) @@ -157,7 +149,6 @@ Try the following commands at the `tfdbg>` prompt (referencing the code at | | `pt [slicing]` | Print a subarray of tensor, using [numpy](http://www.numpy.org/)-style array slicing. | `pt hidden/Relu:0[0:50,:]` | | | `-a` | Print the entirety of a large tensor, without using ellipses. (May take a long time for large tensors.) | `pt -a hidden/Relu:0[0:50,:]` | | | `-r ` | Highlight elements falling into specified numerical range. Multiple ranges can be used in conjunction. | `pt hidden/Relu:0 -a -r [[-inf,-1],[1,inf]]` | -| | `-n ` | Print dump corresponding to specified 0-based dump number. Required for tensors with multiple dumps. | `pt -n 0 hidden/Relu:0` | | | `-s` | Include a summary of the numeric values of the tensor (applicable only to non-empty tensors with Boolean and numeric types such as `int*` and `float*`.) | `pt -s hidden/Relu:0[0:50,:]` | | **`@[coordinates]`** | | Navigate to specified element in `pt` output. | `@[10,0]` or `@10,0` | | **`/regex`** | | [less](https://linux.die.net/man/1/less)-style search for given regular expression. | `/inf` | @@ -175,12 +166,10 @@ Try the following commands at the `tfdbg>` prompt (referencing the code at | | `-r` | List the inputs to node, recursively (the input tree.) | `li -r hidden/Relu:0` | | | `-d ` | Limit recursion depth under the `-r` mode. | `li -r -d 3 hidden/Relu:0` | | | `-c` | Include control inputs. | `li -c -r hidden/Relu:0` | -| | `-t` | Show op types of input nodes. | `li -t -r hidden/Relu:0` | | **`lo`** | | **List output recipients of node** | | | | `-r` | List the output recipients of node, recursively (the output tree.) | `lo -r hidden/Relu:0` | | | `-d ` | Limit recursion depth under the `-r` mode. | `lo -r -d 3 hidden/Relu:0` | | | `-c` | Include recipients via control edges. | `lo -c -r hidden/Relu:0` | -| | `-t` | Show op types of recipient nodes. | `lo -t -r hidden/Relu:0` | | **`ls`** | | **List Python source files involved in node creation.** | | | | `-p ` | Limit output to source files matching given regular-expression path pattern. | `ls -p .*debug_mnist.*` | | | `-n` | Limit output to node names matching given regular-expression pattern. | `ls -n Softmax.*` | diff --git a/tensorflow/docs_src/programmers_guide/tensors.md b/tensorflow/docs_src/programmers_guide/tensors.md index 88eb277e35..d6f80430cd 100644 --- a/tensorflow/docs_src/programmers_guide/tensors.md +++ b/tensorflow/docs_src/programmers_guide/tensors.md @@ -29,8 +29,8 @@ Some types of tensors are special, and these will be covered in other units of the Programmer's guide. The main ones are: * `tf.Variable` - * `tf.constant` - * `tf.placeholder` + * `tf.Constant` + * `tf.Placeholder` * `tf.SparseTensor` With the exception of `tf.Variable`, the value of a tensor is immutable, which @@ -64,7 +64,7 @@ The following snippet demonstrates creating a few rank 0 variables: mammal = tf.Variable("Elephant", tf.string) ignition = tf.Variable(451, tf.int16) floating = tf.Variable(3.14159265359, tf.float64) -its_complicated = tf.Variable(12.3 - 4.85j, tf.complex64) +its_complicated = tf.Variable((12.3, -4.85), tf.complex64) ``` Note: A string is treated as a single item in TensorFlow, not as a sequence of @@ -79,7 +79,7 @@ initial value. For example: mystr = tf.Variable(["Hello"], tf.string) cool_numbers = tf.Variable([3.14159, 2.71828], tf.float32) first_primes = tf.Variable([2, 3, 5, 7, 11], tf.int32) -its_very_complicated = tf.Variable([12.3 - 4.85j, 7.5 - 6.23j], tf.complex64) +its_very_complicated = tf.Variable([(12.3, -4.85), (7.5, -6.23)], tf.complex64) ``` @@ -275,8 +275,8 @@ Graphs and Sessions for more information). Sometimes it is not possible to evaluate a `tf.Tensor` with no context because its value might depend on dynamic information that is not available. For -example, tensors that depend on `placeholder`s can't be evaluated without -providing a value for the `placeholder`. +example, tensors that depend on `Placeholder`s can't be evaluated without +providing a value for the `Placeholder`. ``` python p = tf.placeholder(tf.float32) diff --git a/tensorflow/examples/speech_commands/models.py b/tensorflow/examples/speech_commands/models.py index ab611f414a..82d6a94ea1 100644 --- a/tensorflow/examples/speech_commands/models.py +++ b/tensorflow/examples/speech_commands/models.py @@ -326,7 +326,7 @@ def create_low_latency_conv_model(fingerprint_input, model_settings, first_filter_height = input_time_size first_filter_count = 186 first_filter_stride_x = 1 - first_filter_stride_y = 1 + first_filter_stride_y = 4 first_weights = tf.Variable( tf.truncated_normal( [first_filter_height, first_filter_width, 1, first_filter_count], diff --git a/tensorflow/go/android.go b/tensorflow/go/android.go deleted file mode 100644 index 3db3ddfec5..0000000000 --- a/tensorflow/go/android.go +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2016 The TensorFlow Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build android - -package tensorflow - -// #cgo LDFLAGS: -landroid -llog -lm -lz -ldl -import "C" diff --git a/tensorflow/go/operation_test.go b/tensorflow/go/operation_test.go index 40c951ab8c..7cba043af2 100644 --- a/tensorflow/go/operation_test.go +++ b/tensorflow/go/operation_test.go @@ -123,14 +123,6 @@ func TestOutputDataTypeAndShape(t *testing.T) { []int64{2, 3}, Double, }, - { // Matrix of Uint64 - [][]uint64{ - {1, 2, 3}, - {4, 5, 6}, - }, - []int64{2, 3}, - Uint64, - }, } for idx, test := range testdata { t.Run(fmt.Sprintf("#%d Value %T", idx, test.Value), func(t *testing.T) { diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go index 1326a95278..36a74c0081 100644 --- a/tensorflow/go/tensor.go +++ b/tensorflow/go/tensor.go @@ -101,7 +101,7 @@ func NewTensor(value interface{}) (*Tensor, error) { return nil, bug("NewTensor incorrectly calculated the size of a tensor with type %v and shape %v as %v bytes instead of %v", dataType, shape, nbytes, buf.Len()) } } else { - e := stringEncoder{offsets: buf, data: raw[nflattened*8:], status: newStatus()} + e := stringEncoder{offsets: buf, data: raw[nflattened*8 : len(raw)], status: newStatus()} if err := e.encode(reflect.ValueOf(value), shape); err != nil { return nil, err } @@ -207,9 +207,6 @@ func (t *Tensor) WriteContentsTo(w io.Writer) (int64, error) { func tensorData(c *C.TF_Tensor) []byte { // See: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices cbytes := C.TF_TensorData(c) - if cbytes == nil { - return nil - } length := int(C.TF_TensorByteSize(c)) slice := (*[1 << 30]byte)(unsafe.Pointer(cbytes))[:length:length] return slice @@ -313,7 +310,7 @@ func encodeTensor(w *bytes.Buffer, v reflect.Value, shape []int64) error { if err := w.WriteByte(b); err != nil { return err } - case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: + case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: if err := binary.Write(w, nativeEndian, v.Interface()); err != nil { return err } @@ -352,7 +349,7 @@ func decodeTensor(r *bytes.Reader, shape []int64, typ reflect.Type, ptr reflect. return err } ptr.Elem().SetBool(b == 1) - case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: + case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: if err := binary.Read(r, nativeEndian, ptr.Interface()); err != nil { return err } diff --git a/tensorflow/go/tensor_test.go b/tensorflow/go/tensor_test.go index 674a8ce86f..35bd2fd9a5 100644 --- a/tensorflow/go/tensor_test.go +++ b/tensorflow/go/tensor_test.go @@ -34,15 +34,11 @@ func TestNewTensor(t *testing.T) { {nil, int64(5)}, {nil, uint8(5)}, {nil, uint16(5)}, - {nil, uint32(5)}, - {nil, uint64(5)}, {nil, float32(5)}, {nil, float64(5)}, {nil, complex(float32(5), float32(6))}, {nil, complex(float64(5), float64(6))}, {nil, "a string"}, - {[]int64{1}, []uint32{1}}, - {[]int64{1}, []uint64{1}}, {[]int64{2}, []bool{true, false}}, {[]int64{1}, []float64{1}}, {[]int64{1}, [1]float64{1}}, @@ -75,6 +71,11 @@ func TestNewTensor(t *testing.T) { // native ints not supported int(5), []int{5}, + // uint32 and uint64 are not supported in TensorFlow + uint32(5), + []uint32{5}, + uint64(5), + []uint64{5}, // Mismatched dimensions [][]float32{{1, 2, 3}, {4}}, // Mismatched dimensions. Should return "mismatched slice lengths" error instead of "BUG" diff --git a/tensorflow/java/src/main/java/org/tensorflow/Shape.java b/tensorflow/java/src/main/java/org/tensorflow/Shape.java index d533c3d480..9aa92be111 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/Shape.java +++ b/tensorflow/java/src/main/java/org/tensorflow/Shape.java @@ -77,24 +77,6 @@ public final class Shape { return shape[i]; } - @Override - public int hashCode() { - return Arrays.hashCode(shape); - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - - if (obj instanceof Shape && Arrays.equals(this.shape, ((Shape) obj).shape)) { - return !hasUnknownDimension(); - } - - return super.equals(obj); - } - /** Succinct description of the shape meant for debugging. */ @Override public String toString() { @@ -116,18 +98,4 @@ public final class Shape { } private long[] shape; - - private boolean hasUnknownDimension() { - if (shape == null) { - return true; - } - - for (long dimension : shape) { - if (dimension == -1) { - return true; - } - } - - return false; - } } diff --git a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java index 92cc3bd60e..3b027700c5 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java @@ -16,7 +16,6 @@ limitations under the License. package org.tensorflow; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; import org.junit.Test; import org.junit.runner.RunWith; @@ -78,29 +77,4 @@ public class ShapeTest { assertEquals(5, n.shape().size(1)); } } - - @Test - public void equalsWorksCorrectly() { - assertEquals(Shape.scalar(), Shape.scalar()); - assertEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 3)); - - assertNotEquals(Shape.make(1,2), null); - assertNotEquals(Shape.make(1,2), new Object()); - assertNotEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 4)); - - - assertNotEquals(Shape.unknown(), Shape.unknown()); - assertNotEquals(Shape.make(-1), Shape.make(-1)); - assertNotEquals(Shape.make(1, -1, 3), Shape.make(1, -1, 3)); - } - - @Test - public void hashCodeIsAsExpected() { - assertEquals(Shape.make(1, 2, 3, 4).hashCode(), Shape.make(1, 2, 3, 4).hashCode()); - assertEquals(Shape.scalar().hashCode(), Shape.scalar().hashCode()); - assertEquals(Shape.unknown().hashCode(), Shape.unknown().hashCode()); - - assertNotEquals(Shape.make(1, 2).hashCode(), Shape.make(1, 3).hashCode()); - } } - diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 54c43c1337..5ae4aace16 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -5,10 +5,7 @@ package( default_visibility = [ "//engedu/ml/tf_from_scratch:__pkg__", "//tensorflow:internal", - "//tensorflow/contrib/lite/toco/python:__pkg__", "//tensorflow_models:__subpackages__", - # TODO(aselle): to pass open source test. - "//bazel_pip/tensorflow/contrib/lite/toco/python:__pkg__", ], ) @@ -48,7 +45,6 @@ py_library( "//tensorflow/compiler/aot/tests:__pkg__", # TODO(b/34059704): remove when fixed "//tensorflow/contrib/learn:__pkg__", # TODO(b/34059704): remove when fixed "//tensorflow/contrib/learn/python/learn/datasets:__pkg__", # TODO(b/34059704): remove when fixed - "//tensorflow/contrib/lite/toco/python:__pkg__", # TODO(b/34059704): remove when fixed "//tensorflow/python/debug:__pkg__", # TODO(b/34059704): remove when fixed "//tensorflow/python/tools:__pkg__", # TODO(b/34059704): remove when fixed "//tensorflow/tools/api/generator:__pkg__", diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py index fa5d02c476..62fea05867 100644 --- a/tensorflow/python/estimator/canned/head.py +++ b/tensorflow/python/estimator/canned/head.py @@ -117,7 +117,7 @@ class _Head(object): update_op = tf.contrib.layers.optimize_loss(optimizer=sync, loss=estimator_spec.loss, ...) hooks = [sync.make_session_run_hook(is_chief)] - ... update train_op and hooks in EstimatorSpec and return + ... upate train_op and hooks in EstimatorSpec and return ``` """ __metaclass__ = abc.ABCMeta diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py index 3512f66284..c9f37f06e8 100644 --- a/tensorflow/python/estimator/inputs/numpy_io.py +++ b/tensorflow/python/estimator/inputs/numpy_io.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function import collections -from six import string_types from tensorflow.python.estimator.inputs.queues import feeding_functions # Key name to pack the target into dict of `features`. See @@ -52,9 +51,8 @@ def numpy_input_fn(x, num_threads=1): """Returns input function that would feed dict of numpy arrays into the model. - This returns a function outputting `features` and `targets` based on the dict - of numpy arrays. The dict `features` has the same keys as the `x`. The dict - `targets` has the same keys as the `y` if `y` is a dict. + This returns a function outputting `features` and `target` based on the dict + of numpy arrays. The dict `features` has the same keys as the `x`. Example: @@ -71,7 +69,7 @@ def numpy_input_fn(x, Args: x: dict of numpy array object. - y: numpy array object or dict of numpy array object. `None` if absent. + y: numpy array object. `None` if absent. batch_size: Integer, size of batches to return. num_epochs: Integer, number of epochs to iterate over data. If `None` will run forever. @@ -83,13 +81,11 @@ def numpy_input_fn(x, such as in prediction and evaluation mode, `num_threads` should be 1. Returns: - Function, that has signature of ()->(dict of `features`, `targets`) + Function, that has signature of ()->(dict of `features`, `target`) Raises: ValueError: if the shape of `y` mismatches the shape of values in `x` (i.e., values in `x` have same shape). - ValueError: if duplicate keys are in both `x` and `y` when `y` is a dict. - ValueError: if x or y is an empty dict. TypeError: `x` is not a dict or `shuffle` is not bool. """ @@ -101,76 +97,43 @@ def numpy_input_fn(x, """Numpy input function.""" if not isinstance(x, dict): raise TypeError('x must be dict; got {}'.format(type(x).__name__)) - if not x: - raise ValueError('x cannot be empty') # Make a shadow copy and also ensure the order of iteration is consistent. - ordered_dict_data = collections.OrderedDict( + ordered_dict_x = collections.OrderedDict( sorted(x.items(), key=lambda t: t[0])) - # Deep copy keys which is a view in python 3 - feature_keys = list(ordered_dict_data.keys()) - - if y is None: - target_keys = None - elif isinstance(y, dict): - if not y: - raise ValueError('y cannot be empty dict, use None instead.') - - ordered_dict_y = collections.OrderedDict( - sorted(y.items(), key=lambda t: t[0])) - target_keys = list(ordered_dict_y.keys()) - - duplicate_keys = set(feature_keys).intersection(set(target_keys)) - if len(duplicate_keys): - raise ValueError('{} duplicate keys are found in both x and y: ' - '{}'.format(len(duplicate_keys), duplicate_keys)) - - ordered_dict_data.update(ordered_dict_y) - else: - target_keys = _get_unique_target_key(ordered_dict_data) - ordered_dict_data[target_keys] = y - - if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1: - shape_dict_of_x = {k: ordered_dict_data[k].shape - for k in feature_keys} - - if target_keys is None: - shape_of_y = None - elif isinstance(target_keys, string_types): - shape_of_y = y.shape - else: - shape_of_y = {k: ordered_dict_data[k].shape - for k in target_keys} + unique_target_key = _get_unique_target_key(ordered_dict_x) + if y is not None: + ordered_dict_x[unique_target_key] = y + + if len(set(v.shape[0] for v in ordered_dict_x.values())) != 1: + shape_dict_of_x = {k: ordered_dict_x[k].shape + for k in ordered_dict_x.keys()} + shape_of_y = None if y is None else y.shape raise ValueError('Length of tensors in x and y is mismatched. All ' 'elements in x and y must have the same length.\n' 'Shapes in x: {}\n' - 'Shapes in y: {}\n'.format(shape_dict_of_x, shape_of_y)) + 'Shape for y: {}\n'.format(shape_dict_of_x, shape_of_y)) queue = feeding_functions._enqueue_data( # pylint: disable=protected-access - ordered_dict_data, + ordered_dict_x, queue_capacity, shuffle=shuffle, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) - batch = (queue.dequeue_many(batch_size) if num_epochs is None + features = (queue.dequeue_many(batch_size) if num_epochs is None else queue.dequeue_up_to(batch_size)) - # Remove the first `Tensor` in `batch`, which is the row number. - if len(batch) > 0: - batch.pop(0) + # Remove the first `Tensor` in `features`, which is the row number. + if len(features) > 0: + features.pop(0) - features = dict(zip(feature_keys, batch[:len(feature_keys)])) - if target_keys is None: - # TODO(martinwicke), return consistent result - return features - elif isinstance(target_keys, string_types): - target = batch[-1] - return features, target - else: - target = dict(zip(target_keys, batch[-len(target_keys):])) + features = dict(zip(ordered_dict_x.keys(), features)) + if y is not None: + target = features.pop(unique_target_key) return features, target + return features return input_fn diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py index 65eae7a7dc..02df22b632 100644 --- a/tensorflow/python/estimator/inputs/numpy_io_test.py +++ b/tensorflow/python/estimator/inputs/numpy_io_test.py @@ -239,40 +239,6 @@ class NumpyIoTest(test.TestCase): x, y, batch_size=2, shuffle=False, num_epochs=1) failing_input_fn() - def testNumpyInputFnWithXIsEmptyDict(self): - x = {} - y = np.arange(4) - with self.test_session(): - with self.assertRaisesRegexp(ValueError, 'x cannot be empty'): - failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False) - failing_input_fn() - - def testNumpyInputFnWithYIsNone(self): - a = np.arange(4) * 1.0 - b = np.arange(32, 36) - x = {'a': a, 'b': b} - y = None - - with self.test_session() as session: - input_fn = numpy_io.numpy_input_fn( - x, y, batch_size=2, shuffle=False, num_epochs=1) - features_tensor = input_fn() - - coord = coordinator.Coordinator() - threads = queue_runner_impl.start_queue_runners(session, coord=coord) - - feature = session.run(features_tensor) - self.assertEqual(len(feature), 2) - self.assertAllEqual(feature['a'], [0, 1]) - self.assertAllEqual(feature['b'], [32, 33]) - - session.run([features_tensor]) - with self.assertRaises(errors.OutOfRangeError): - session.run([features_tensor]) - - coord.request_stop() - coord.join(threads) - def testNumpyInputFnWithNonBoolShuffle(self): x = np.arange(32, 36) y = np.arange(4) @@ -319,59 +285,6 @@ class NumpyIoTest(test.TestCase): num_epochs=1) failing_input_fn() - def testNumpyInputFnWithYAsDict(self): - a = np.arange(4) * 1.0 - b = np.arange(32, 36) - x = {'a': a, 'b': b} - y = {'y1': np.arange(-32, -28), 'y2': np.arange(32, 28, -1)} - - with self.test_session() as session: - input_fn = numpy_io.numpy_input_fn( - x, y, batch_size=2, shuffle=False, num_epochs=1) - features_tensor, targets_tensor = input_fn() - - coord = coordinator.Coordinator() - threads = queue_runner_impl.start_queue_runners(session, coord=coord) - - features, targets = session.run([features_tensor, targets_tensor]) - self.assertEqual(len(features), 2) - self.assertAllEqual(features['a'], [0, 1]) - self.assertAllEqual(features['b'], [32, 33]) - self.assertEqual(len(targets), 2) - self.assertAllEqual(targets['y1'], [-32, -31]) - self.assertAllEqual(targets['y2'], [32, 31]) - - session.run([features_tensor, targets_tensor]) - with self.assertRaises(errors.OutOfRangeError): - session.run([features_tensor, targets_tensor]) - - coord.request_stop() - coord.join(threads) - - def testNumpyInputFnWithYIsEmptyDict(self): - a = np.arange(4) * 1.0 - b = np.arange(32, 36) - x = {'a': a, 'b': b} - y = {} - with self.test_session(): - with self.assertRaisesRegexp(ValueError, 'y cannot be empty'): - failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False) - failing_input_fn() - - def testNumpyInputFnWithDuplicateKeysInXAndY(self): - a = np.arange(4) * 1.0 - b = np.arange(32, 36) - x = {'a': a, 'b': b} - y = {'y1': np.arange(-32, -28), - 'a': a, - 'y2': np.arange(32, 28, -1), - 'b': b} - with self.test_session(): - with self.assertRaisesRegexp( - ValueError, '2 duplicate keys are found in both x and y'): - failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False) - failing_input_fn() - if __name__ == '__main__': test.main() diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index dc4ffb1747..2785aed13e 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -860,10 +860,6 @@ def convert_to_tensor(value, dtype=None, name=None, preferred_dtype=None): inputs, which allows those ops to accept numpy arrays, Python lists, and scalars in addition to `Tensor` objects. - Note: This function diverges from default Numpy behavior for `float` and - `string` types when `None` is present in a Python list or scalar. Rather - than silently converting `None` values, an error will be thrown. - Args: value: An object whose type has a registered `Tensor` conversion function. dtype: Optional element type for the returned tensor. If missing, the diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index e283542172..7e74c19124 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -286,7 +286,6 @@ _TF_TO_IS_OK = { dtypes.bool: [_FilterBool], dtypes.complex128: [_FilterComplex], dtypes.complex64: [_FilterComplex], - dtypes.float16: [_FilterFloat], dtypes.float32: [_FilterFloat], dtypes.float64: [_FilterFloat], dtypes.int16: [_FilterInt], diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 1610214d54..cfa5fe5e3e 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -986,9 +986,8 @@ class TensorFlowTestCase(googletest.TestCase): err: A float value. msg: An optional string message to append to the failure message. """ - # f1 == f2 is needed here as we might have: f1, f2 = inf, inf self.assertTrue( - f1 == f2 or math.fabs(f1 - f2) <= err, + math.fabs(f1 - f2) <= err, "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg if msg is not None else "")) diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 76b80e60ea..6eb9c66d06 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -107,41 +107,22 @@ class BooleanMaskTest(test_util.TensorFlowTestCase): def setUp(self): self.rng = np.random.RandomState(42) - def CheckVersusNumpy(self, ndims_mask, arr_shape, make_mask=None, axis=None): + def CheckVersusNumpy(self, ndims_mask, arr_shape, make_mask=None): """Check equivalence between boolean_mask and numpy masking.""" if make_mask is None: make_mask = lambda shape: self.rng.randint(0, 2, size=shape).astype(bool) arr = np.random.rand(*arr_shape) mask = make_mask(arr_shape[:ndims_mask]) - if axis is not None: - mask = make_mask(arr_shape[axis:ndims_mask+axis]) - if axis is None or axis == 0: - masked_arr = arr[mask] - elif axis == 1: - masked_arr = arr[:,mask] - elif axis == 2: - masked_arr = arr[:,:,mask] - with self.test_session() as sess: - masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis) + masked_arr = arr[mask] + with self.test_session(): + masked_tensor = array_ops.boolean_mask(arr, mask) # Leading dimension size of masked_tensor is always unknown until runtime # since we don't how many elements will be kept. - leading = 1 if axis is None else axis + 1 - self.assertAllEqual(masked_tensor.get_shape()[leading:], - masked_arr.shape[leading:]) + self.assertAllEqual(masked_tensor.get_shape()[1:], masked_arr.shape[1:]) self.assertAllClose(masked_arr, masked_tensor.eval()) - def testMaskDim1ArrDim2Axis1(self): - ndims_mask = 1 - for arr_shape in [(1, 1), (2, 2), (2, 5)]: - self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1) - - def testMaskDim2ArrDim2Axis1(self): - ndims_mask = 2 - for arr_shape in [(1, 1), (2, 2), (2, 5)]: - self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1) - def testMaskDim1ArrDim1(self): ndims_mask = 1 for arr_shape in [(1,), (2,), (3,), (10,)]: @@ -505,7 +486,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase): _ = checker2[...] _ = checker2[tuple()] - def testInt64GPU(self): + def testFloatSlicedArrayAndInt64IndicesGPU(self): if not test_util.is_gpu_available(): self.skipTest("No GPU available") with self.test_session(use_gpu=True, force_gpu=True): @@ -516,6 +497,17 @@ class StridedSliceTest(test_util.TensorFlowTestCase): s = array_ops.strided_slice(x, begin, end, strides) self.assertAllEqual([3.], self.evaluate(s)) + def testInt64SlicedArrayAndIndicesGPU(self): + if not test_util.is_gpu_available(): + self.skipTest("No GPU available") + with self.test_session(use_gpu=True, force_gpu=True): + x = constant_op.constant([1, 2, 3], dtype=dtypes.int64) + begin = constant_op.constant([2], dtype=dtypes.int64) + end = constant_op.constant([3], dtype=dtypes.int64) + strides = constant_op.constant([1], dtype=dtypes.int64) + s = array_ops.strided_slice(x, begin, end, strides) + self.assertAllEqual([3], self.evaluate(s)) + def testDegenerateSlices(self): with self.test_session(use_gpu=True): checker = StridedSliceChecker(self, StridedSliceChecker.REF_TENSOR) @@ -1078,16 +1070,6 @@ class PadTest(test_util.TensorFlowTestCase): [0, 0, 4, 5, 6, 0, 0], [0, 0, 0, 0, 0, 0, 0]]) -class InvertPermutationTest(test_util.TensorFlowTestCase): - - def testInvertPermutation(self): - for dtype in [dtypes.int32, dtypes.int64]: - with self.test_session(use_gpu=True): - x = constant_op.constant([3, 4, 0, 2, 1], dtype=dtype) - y = array_ops.invert_permutation(x) - self.assertAllEqual(y.get_shape(), [5]) - self.assertAllEqual(y.eval(), [2, 4, 3, 0, 1]) - if __name__ == "__main__": test_lib.main() diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py index 79285476b4..7a610debd1 100644 --- a/tensorflow/python/kernel_tests/bincount_op_test.py +++ b/tensorflow/python/kernel_tests/bincount_op_test.py @@ -25,10 +25,11 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import math_ops from tensorflow.python.platform import googletest + class BincountTest(test_util.TensorFlowTestCase): def test_empty(self): - with self.test_session(use_gpu=True): + with self.test_session(): self.assertAllEqual( math_ops.bincount([], minlength=5).eval(), [0, 0, 0, 0, 0]) self.assertAllEqual(math_ops.bincount([], minlength=1).eval(), [0]) @@ -41,7 +42,7 @@ class BincountTest(test_util.TensorFlowTestCase): np.float64) def test_values(self): - with self.test_session(use_gpu=True): + with self.test_session(): self.assertAllEqual( math_ops.bincount([1, 1, 1, 2, 2, 3]).eval(), [0, 3, 2, 1]) arr = [1, 1, 2, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5] @@ -56,14 +57,14 @@ class BincountTest(test_util.TensorFlowTestCase): math_ops.bincount(np.arange(10000)).eval(), np.ones(10000)) def test_maxlength(self): - with self.test_session(use_gpu=True): + with self.test_session(): self.assertAllEqual(math_ops.bincount([5], maxlength=3).eval(), [0, 0, 0]) self.assertAllEqual(math_ops.bincount([1], maxlength=3).eval(), [0, 1]) self.assertAllEqual(math_ops.bincount([], maxlength=3).eval(), []) def test_random_with_weights(self): num_samples = 10000 - with self.test_session(use_gpu=True): + with self.test_session(): np.random.seed(42) for dtype in [dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]: arr = np.random.randint(0, 1000, num_samples) @@ -71,29 +72,17 @@ class BincountTest(test_util.TensorFlowTestCase): weights = np.random.randint(-100, 100, num_samples) else: weights = np.random.random(num_samples) - self.assertAllClose( + self.assertAllEqual( math_ops.bincount(arr, weights).eval(), np.bincount(arr, weights)) - def test_random_without_weights(self): - num_samples = 10000 - with self.test_session(use_gpu=True): - np.random.seed(42) - for dtype in [np.int32, np.float32]: - arr = np.random.randint(0, 1000, num_samples) - weights = np.ones(num_samples).astype(dtype) - self.assertAllClose( - math_ops.bincount(arr, None).eval(), - np.bincount(arr, weights)) - def test_zero_weights(self): - with self.test_session(use_gpu=True): + with self.test_session(): self.assertAllEqual( math_ops.bincount(np.arange(1000), np.zeros(1000)).eval(), np.zeros(1000)) def test_negative(self): - # unsorted_segment_sum will only report InvalidArgumentError on CPU with self.test_session(): with self.assertRaises(errors.InvalidArgumentError): math_ops.bincount([1, 2, 3, -1, 6, 8]).eval() diff --git a/tensorflow/python/kernel_tests/bucketize_op_test.py b/tensorflow/python/kernel_tests/bucketize_op_test.py index e612b1c134..6db3592055 100644 --- a/tensorflow/python/kernel_tests/bucketize_op_test.py +++ b/tensorflow/python/kernel_tests/bucketize_op_test.py @@ -31,7 +31,7 @@ class BucketizationOpTest(test.TestCase): constant_op.constant([-5, 0, 2, 3, 5, 8, 10, 11, 12]), boundaries=[0, 3, 8, 11]) expected_out = [0, 1, 1, 2, 2, 3, 3, 4, 4] - with self.test_session(use_gpu=True) as sess: + with self.test_session() as sess: self.assertAllEqual(expected_out, sess.run(op)) def testFloat(self): @@ -39,7 +39,7 @@ class BucketizationOpTest(test.TestCase): constant_op.constant([-5., 0., 2., 3., 5., 8., 10., 11., 12.]), boundaries=[0., 3., 8., 11.]) expected_out = [0, 1, 1, 2, 2, 3, 3, 4, 4] - with self.test_session(use_gpu=True) as sess: + with self.test_session() as sess: self.assertAllEqual(expected_out, sess.run(op)) def test2DInput(self): @@ -47,13 +47,13 @@ class BucketizationOpTest(test.TestCase): constant_op.constant([[-5, 0, 2, 3, 5], [8, 10, 11, 12, 0]]), boundaries=[0, 3, 8, 11]) expected_out = [[0, 1, 1, 2, 2], [3, 3, 4, 4, 1]] - with self.test_session(use_gpu=True) as sess: + with self.test_session() as sess: self.assertAllEqual(expected_out, sess.run(op)) def testInvalidBoundariesOrder(self): op = math_ops._bucketize( constant_op.constant([-5, 0]), boundaries=[0, 8, 3, 11]) - with self.test_session(use_gpu=True) as sess: + with self.test_session() as sess: with self.assertRaisesRegexp( errors_impl.InvalidArgumentError, "Expected sorted boundaries"): sess.run(op) diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py index 6cbdd4cbb3..6167cb9999 100644 --- a/tensorflow/python/kernel_tests/constant_op_test.py +++ b/tensorflow/python/kernel_tests/constant_op_test.py @@ -439,10 +439,9 @@ class ZerosLikeTest(test.TestCase): def testZerosLikeCPU(self): for dtype in [ - dtypes_lib.float32, dtypes_lib.float64, - dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, - dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool, - dtypes_lib.complex64, dtypes_lib.complex128, + dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int32, + dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.int8, + dtypes_lib.complex64, dtypes_lib.complex128, dtypes_lib.int64, dtypes_lib.string ]: self._compareZeros(dtype, fully_defined_shape=False, use_gpu=False) @@ -574,10 +573,9 @@ class OnesLikeTest(test.TestCase): def testOnesLike(self): for dtype in [ - dtypes_lib.float32, dtypes_lib.float64, - dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, - dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool, - dtypes_lib.complex64, dtypes_lib.complex128 + dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int32, + dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.int8, + dtypes_lib.complex64, dtypes_lib.complex128, dtypes_lib.int64 ]: numpy_dtype = dtype.as_numpy_dtype with self.test_session(): diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py index a7e23ead1c..b67a4e3f89 100644 --- a/tensorflow/python/kernel_tests/conv1d_test.py +++ b/tensorflow/python/kernel_tests/conv1d_test.py @@ -17,9 +17,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np -from six.moves import xrange # pylint: disable=redefined-builtin - from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops @@ -53,45 +50,5 @@ class Conv1DTest(test.TestCase): self.assertAllClose(output, [2 * 1 + 1 * 2, 2 * 3 + 1 * 4]) - def testConv1DTranspose(self): - with self.test_session(): - stride = 2 - - # Input, output: [batch, width, depth] - x_shape = [2, 4, 3] - y_shape = [2, 9, 2] - - # Filter: [kernel_width, output_depth, input_depth] - f_shape = [3, 2, 3] - - x = constant_op.constant( - 1.0, shape=x_shape, name="x", dtype=dtypes.float32) - f = constant_op.constant( - 1.0, shape=f_shape, name="filter", dtype=dtypes.float32) - output = nn_ops.conv1d_transpose( - x, f, y_shape, stride=stride, padding="VALID") - value = output.eval() - - cache_values = np.zeros(y_shape, dtype=np.float32) - - # The amount of padding added - pad = 1 - - for n in xrange(x_shape[0]): - for k in xrange(f_shape[1]): - for w in xrange(pad, y_shape[1] - pad): - target = 3.0 - # We add a case for locations divisible by the stride. - w_in = w % stride == 0 and w > pad and w < y_shape[1] - 1 - pad - if w_in: - target += 3.0 - cache_values[n, w, k] = target - - # copy values in the border - cache_values[n, 0, k] = cache_values[n, 1, k] - cache_values[n, -1, k] = cache_values[n, -2, k] - - self.assertAllClose(cache_values, value) - if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py index 116681fc4c..14622ab467 100644 --- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py @@ -21,8 +21,6 @@ from __future__ import print_function import collections import math -import numpy as np - from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import test_util @@ -47,19 +45,8 @@ def GetTestConfigs(): class Conv3DTest(test.TestCase): - def _DtypesToTest(self, use_gpu): - if use_gpu: - if not test_util.CudaSupportsHalfMatMulAndConv(): - return [dtypes.float32] - else: - # It is important that float32 comes before float16 here, - # as we will be using its gradients as reference for fp16 gradients. - return [dtypes.float32, dtypes.float16] - else: - return [dtypes.float64, dtypes.float32, dtypes.float16] - def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, stride, - padding, data_format, dtype, use_gpu): + padding, data_format, use_gpu): total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: @@ -67,14 +54,13 @@ class Conv3DTest(test.TestCase): for s in filter_in_sizes: total_size_2 *= s - # Initializes the input tensor with array containing numbers from 0 to 1. - # We keep the input tensor values fairly small to avoid overflowing a float16 - # tensor during the conv3d - x1 = [f * 1.0 / total_size_1 for f in range(1, total_size_1 + 1)] - x2 = [f * 1.0 / total_size_2 for f in range(1, total_size_2 + 1)] + # Initializes the input tensor with array containing incrementing + # numbers from 1. + x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] + x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu): - t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype) - t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype) + t1 = constant_op.constant(x1, shape=tensor_in_sizes) + t2 = constant_op.constant(x2, shape=filter_in_sizes) if isinstance(stride, collections.Iterable): strides = [1] + list(stride) + [1] @@ -95,35 +81,27 @@ class Conv3DTest(test.TestCase): expected): results = [] for data_format, use_gpu in GetTestConfigs(): - for dtype in self._DtypesToTest(use_gpu): - result = self._SetupValuesForDevice( - tensor_in_sizes, - filter_in_sizes, - stride, - padding, - data_format, - dtype, - use_gpu=use_gpu) - results.append(result) - + result = self._SetupValuesForDevice( + tensor_in_sizes, + filter_in_sizes, + stride, + padding, + data_format, + use_gpu=use_gpu) + results.append(result) + tolerance = 1e-2 if use_gpu else 1e-5 with self.test_session() as sess: values = sess.run(results) for value in values: print("expected = ", expected) print("actual = ", value) - tol = 1e-6 - if value.dtype == np.float16: - tol = 1e-3 - - self.assertAllClose(expected, value.flatten(), atol=tol, - rtol=tol) + self.assertAllClose(expected, value.flatten(), atol=tolerance, + rtol=1e-6) def testConv3D1x1x1Filter(self): expected_output = [ - 0.18518519, 0.22222222, 0.25925926, 0.40740741, 0.5 , - 0.59259259, 0.62962963, 0.77777778, 0.92592593, 0.85185185, - 1.05555556, 1.25925926, 1.07407407, 1.33333333, 1.59259259, - 1.2962963 , 1.61111111, 1.92592593 + 30.0, 36.0, 42.0, 66.0, 81.0, 96.0, 102.0, 126.0, 150.0, 138.0, 171.0, + 204.0, 174.0, 216.0, 258.0, 210.0, 261.0, 312.0 ] # These are equivalent to the Conv2D1x1 case. @@ -149,10 +127,8 @@ class Conv3DTest(test.TestCase): # Expected values computed using scipy's correlate function. def testConv3D2x2x2Filter(self): expected_output = [ - 3.77199074, 3.85069444, 3.92939815, 4.2650463 , 4.35763889, - 4.45023148, 6.73032407, 6.89236111, 7.05439815, 7.22337963, - 7.39930556, 7.57523148, 9.68865741, 9.93402778, 10.17939815, - 10.18171296, 10.44097222, 10.70023148 + 19554., 19962., 20370., 22110., 22590., 23070., 34890., 35730., 36570., + 37446., 38358., 39270., 50226., 51498., 52770., 52782., 54126., 55470. ] # expected_shape = [1, 3, 1, 2, 5] self._VerifyValues( @@ -164,19 +140,69 @@ class Conv3DTest(test.TestCase): def testConv3DStrides(self): expected_output = [ - 0.06071429, 0.08988095, 0.10238095, 0.11488095, 0.12738095, - 0.13988095, 0.08452381, 0.26071429, 0.35238095, 0.36488095, - 0.37738095, 0.38988095, 0.40238095, 0.23452381, 0.46071429, - 0.61488095, 0.62738095, 0.63988095, 0.65238095, 0.66488095, - 0.38452381, 1.12738095, 1.48988095, 1.50238095, 1.51488095, - 1.52738095, 1.53988095, 0.88452381, 1.32738095, 1.75238095, - 1.76488095, 1.77738095, 1.78988095, 1.80238095, 1.03452381, - 1.52738095, 2.01488095, 2.02738095, 2.03988095, 2.05238095, - 2.06488095, 1.18452381, 2.19404762, 2.88988095, 2.90238095, - 2.91488095, 2.92738095, 2.93988095, 1.68452381, 2.39404762, - 3.15238095, 3.16488095, 3.17738095, 3.18988095, 3.20238095, - 1.83452381, 2.59404762, 3.41488095, 3.42738095, 3.43988095, - 3.45238095, 3.46488095, 1.98452381 + 102., + 151., + 172., + 193., + 214., + 235., + 142., + 438., + 592., + 613., + 634., + 655., + 676., + 394., + 774., + 1033., + 1054., + 1075., + 1096., + 1117., + 646., + 1894., + 2503., + 2524., + 2545., + 2566., + 2587., + 1486., + 2230., + 2944., + 2965., + 2986., + 3007., + 3028., + 1738., + 2566., + 3385., + 3406., + 3427., + 3448., + 3469., + 1990., + 3686., + 4855., + 4876., + 4897., + 4918., + 4939., + 2830., + 4022., + 5296., + 5317., + 5338., + 5359., + 5380., + 3082., + 4358., + 5737., + 5758., + 5779., + 5800., + 5821., + 3334., ] self._VerifyValues( tensor_in_sizes=[1, 5, 8, 7, 1], @@ -186,10 +212,7 @@ class Conv3DTest(test.TestCase): expected=expected_output) def testConv3D2x2x2FilterStride2(self): - expected_output = [ - 3.77199074, 3.85069444, 3.92939815, 9.68865741, 9.93402778, - 10.17939815 - ] + expected_output = [19554., 19962., 20370., 50226., 51498., 52770.] self._VerifyValues( tensor_in_sizes=[1, 4, 2, 3, 3], filter_in_sizes=[2, 2, 2, 3, 3], @@ -199,14 +222,11 @@ class Conv3DTest(test.TestCase): def testConv3DStride3(self): expected_output = [ - 1.51140873, 1.57167659, 1.63194444, 1.56349206, 1.62673611, - 1.68998016, 1.6155754 , 1.68179563, 1.74801587, 1.9280754 , - 2.01215278, 2.09623016, 1.98015873, 2.0672123 , 2.15426587, - 2.03224206, 2.12227183, 2.21230159, 4.4280754 , 4.65500992, - 4.88194444, 4.48015873, 4.71006944, 4.93998016, 4.53224206, - 4.76512897, 4.99801587, 4.84474206, 5.09548611, 5.34623016, - 4.8968254 , 5.15054563, 5.40426587, 4.94890873, 5.20560516, - 5.46230159 + 36564., 38022., 39480., 37824., 39354., 40884., 39084., 40686., 42288., + 46644., 48678., 50712., 47904., 50010., 52116., 49164., 51342., 53520., + 107124., 112614., 118104., 108384., 113946., 119508., 109644., 115278., + 120912., 117204., 123270., 129336., 118464., 124602., 130740., 119724., + 125934., 132144. ] self._VerifyValues( tensor_in_sizes=[1, 6, 7, 8, 2], @@ -217,9 +237,8 @@ class Conv3DTest(test.TestCase): def testConv3D2x2x2FilterStride2Same(self): expected_output = [ - 3.77199074, 3.85069444, 3.92939815, 2.0162037 , 2.06597222, - 2.11574074, 9.68865741, 9.93402778, 10.17939815, 4.59953704, - 4.73263889, 4.86574074 + 19554., 19962., 20370., 10452., 10710., 10968., 50226., 51498., 52770., + 23844., 24534., 25224. ] self._VerifyValues( tensor_in_sizes=[1, 4, 2, 3, 3], @@ -229,10 +248,7 @@ class Conv3DTest(test.TestCase): expected=expected_output) def testKernelSmallerThanStride(self): - expected_output = [ - 0.03703704, 0.11111111, 0.25925926, 0.33333333, 0.7037037 , - 0.77777778, 0.92592593, 1. - ] + expected_output = [1., 3., 7., 9., 19., 21., 25., 27.] self._VerifyValues( tensor_in_sizes=[1, 3, 3, 3, 1], filter_in_sizes=[1, 1, 1, 1, 1], @@ -247,12 +263,9 @@ class Conv3DTest(test.TestCase): expected=expected_output) expected_output = [ - 0.54081633, 0.58017493, 0.28061224, 0.81632653, 0.85568513, - 0.40306122, 0.41873178, 0.4340379 , 0.19642857, 2.46938776, - 2.50874636, 1.1377551 , 2.74489796, 2.78425656, 1.26020408, - 1.16873178, 1.1840379 , 0.51785714, 1.09511662, 1.10604956, - 0.44642857, 1.17164723, 1.18258017, 0.47704082, 0.3691691 , - 0.37244898, 0.125 + 1484., 1592., 770., 2240., 2348., 1106., 1149., 1191., 539., 6776., + 6884., 3122., 7532., 7640., 3458., 3207., 3249., 1421., 3005., 3035., + 1225., 3215., 3245., 1309., 1013., 1022., 343. ] self._VerifyValues( tensor_in_sizes=[1, 7, 7, 7, 1], @@ -261,10 +274,7 @@ class Conv3DTest(test.TestCase): padding="SAME", expected=expected_output) - expected_output = [ - 0.540816, 0.580175, 0.816327, 0.855685, 2.469388, 2.508746, - 2.744898, 2.784257 - ] + expected_output = [1484., 1592., 2240., 2348., 6776., 6884., 7532., 7640.] self._VerifyValues( tensor_in_sizes=[1, 7, 7, 7, 1], filter_in_sizes=[2, 2, 2, 1, 1], @@ -278,7 +288,7 @@ class Conv3DTest(test.TestCase): filter_in_sizes=[2, 1, 2, 1, 2], stride=1, padding="VALID", - expected=[1.5625, 1.875]) + expected=[50, 60]) def _ConstructAndTestGradientForConfig( self, batch, input_shape, filter_shape, in_depth, out_depth, stride, @@ -318,63 +328,50 @@ class Conv3DTest(test.TestCase): input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] - - for data_type in self._DtypesToTest(use_gpu=use_gpu): + if test.is_gpu_available() and use_gpu: + data_type = dtypes.float32 # TODO(mjanusz): Modify gradient_checker to also provide max relative # error and synchronize the tolerance levels between the tests for forward # and backward computations. - if data_type == dtypes.float64: - tolerance = 1e-8 - elif data_type == dtypes.float32: + if test.is_gpu_available(): tolerance = 5e-3 - elif data_type == dtypes.float16: - tolerance = 1e-3 - - - with self.test_session(use_gpu=use_gpu): - orig_input_tensor = constant_op.constant( + else: + # As of Aug 2016, higher tolerance is needed for some CPU architectures. + # Runs on a single machine can also generate slightly different errors + # because of multithreading. + tolerance = 8e-3 + else: + data_type = dtypes.float64 + tolerance = 1e-8 + with self.test_session(use_gpu=use_gpu): + orig_input_tensor = constant_op.constant( input_data, shape=input_shape, dtype=data_type, name="input") - filter_tensor = constant_op.constant( + filter_tensor = constant_op.constant( filter_data, shape=filter_shape, dtype=data_type, name="filter") - if data_format == "NCDHW": - input_tensor = test_util.NHWCToNCHW(orig_input_tensor) - new_strides = test_util.NHWCToNCHW(strides) - else: - input_tensor = orig_input_tensor - new_strides = strides + if data_format == "NCDHW": + input_tensor = test_util.NHWCToNCHW(orig_input_tensor) + strides = test_util.NHWCToNCHW(strides) + else: + input_tensor = orig_input_tensor - conv = nn_ops.conv3d( - input_tensor, filter_tensor, new_strides, padding, + conv = nn_ops.conv3d( + input_tensor, filter_tensor, strides, padding, data_format=data_format, name="conv") - if data_format == "NCDHW": - conv = test_util.NCHWToNHWC(conv) - - - if test_input: - jacob_t, jacob_n = gradient_checker.compute_gradient(orig_input_tensor, - input_shape, - conv, - output_shape) - else: - jacob_t, jacob_n = gradient_checker.compute_gradient(filter_tensor, - filter_shape, - conv, - output_shape) - - - if data_type != dtypes.float16: - reference_jacob_t = jacob_t - err = np.fabs(jacob_t - jacob_n).max() - else: - # Compare fp16 theoretical gradients to fp32 theoretical gradients, - # since fp16 numerical gradients are too imprecise. - err = np.fabs(jacob_t - reference_jacob_t).max() - - print("conv3d gradient error = ", err) - self.assertLess(err, tolerance) + if data_format == "NCDHW": + conv = test_util.NCHWToNHWC(conv) + if test_input: + err = gradient_checker.compute_gradient_error(orig_input_tensor, + input_shape, + conv, output_shape) + else: + err = gradient_checker.compute_gradient_error(filter_tensor, + filter_shape, conv, + output_shape) + print("conv3d gradient error = ", err) + self.assertLess(err, tolerance) def ConstructAndTestGradient(self, **kwargs): for data_format, use_gpu in GetTestConfigs(): diff --git a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py index f7ae1a0f37..3298092fbe 100644 --- a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py +++ b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py @@ -122,9 +122,7 @@ class DepthwiseConv2DTest(test.TestCase): x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu) as sess: - if data_type == dtypes.float16: - tolerance = 1e-5 - elif data_type == dtypes.float32: + if data_type == dtypes.float32: tolerance = 1e-5 else: self.assertEqual(data_type, dtypes.float64) @@ -171,7 +169,7 @@ class DepthwiseConv2DTest(test.TestCase): padding) in enumerate(ConfigsToTest()): print("Testing DepthwiseConv2D,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) - for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]: + for data_type in [dtypes.float32, dtypes.float64]: self._VerifyValues( input_size, filter_size, stride, padding, data_type, use_gpu=True) @@ -183,7 +181,7 @@ class DepthwiseConv2DTest(test.TestCase): padding) in enumerate(ConfigsToTest()): print("Testing DepthwiseConv2DFormat,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) - for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]: + for data_type in [dtypes.float32, dtypes.float64]: self._VerifyValues( input_size, filter_size, @@ -320,9 +318,7 @@ class DepthwiseConv2DTest(test.TestCase): input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] with self.test_session(use_gpu=use_gpu): - if data_type == dtypes.float16: - tolerance = 0.002 - elif data_type == dtypes.float32: + if data_type == dtypes.float32: tolerance = 0.002 else: self.assertEqual(data_type, dtypes.float64) @@ -373,8 +369,6 @@ class DepthwiseConv2DTest(test.TestCase): print("Testing DepthwiseConv2DInputGrad,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) - # Note: float16 test for DepthwiseConv2DInputGrad is not enabled, - # calculations are not very precise. for data_type in [dtypes.float32, dtypes.float64]: self._ConstructAndTestGradient( input_size, @@ -395,8 +389,6 @@ class DepthwiseConv2DTest(test.TestCase): print("Testing DepthwiseConv2DInputGradFormat,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) - # Note: float16 test for DepthwiseConv2DInputGradFormat is not enabled, - # calculations are not very precise. for data_type in [dtypes.float32, dtypes.float64]: self._ConstructAndTestGradient( input_size, @@ -415,8 +407,6 @@ class DepthwiseConv2DTest(test.TestCase): print("Testing DepthwiseConv2DFilterGrad,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) - # Note: float16 test for DepthwiseConv2DFilterGrad is not enabled, - # calculations are not very precise. for data_type in [dtypes.float32, dtypes.float64]: self._ConstructAndTestGradient( input_size, @@ -437,8 +427,6 @@ class DepthwiseConv2DTest(test.TestCase): print("Testing DepthwiseConv2DFilterGradFormat,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) - # Note: float16 test for DepthwiseConv2DFilterGradFormat is not enabled, - # calculations are not very precise. for data_type in [dtypes.float32, dtypes.float64]: self._ConstructAndTestGradient( input_size, diff --git a/tensorflow/python/kernel_tests/distributions/BUILD b/tensorflow/python/kernel_tests/distributions/BUILD index e220d05692..e21446c2ef 100644 --- a/tensorflow/python/kernel_tests/distributions/BUILD +++ b/tensorflow/python/kernel_tests/distributions/BUILD @@ -193,7 +193,6 @@ cuda_py_test( "//tensorflow/python:math_ops", "//tensorflow/python:platform_test", ], - tags = ["manual"], # b/69001419 ) cuda_py_test( diff --git a/tensorflow/python/kernel_tests/distributions/multinomial_test.py b/tensorflow/python/kernel_tests/distributions/multinomial_test.py index e24e8ade73..ebc89f15c5 100644 --- a/tensorflow/python/kernel_tests/distributions/multinomial_test.py +++ b/tensorflow/python/kernel_tests/distributions/multinomial_test.py @@ -250,11 +250,13 @@ class MultinomialTest(test.TestCase): theta = np.array([[1., 2, 3], [2.5, 4, 0.01]], dtype=np.float32) theta /= np.sum(theta, 1)[..., array_ops.newaxis] - n = np.array([[10., 9.], [8., 7.], [6., 5.]], dtype=np.float32) + # Ideally we'd be able to test broadcasting but, the multinomial sampler + # doesn't support different total counts. + n = np.float32(5) with self.test_session() as sess: - # batch_shape=[3, 2], event_shape=[3] + # batch_shape=[2], event_shape=[3] dist = multinomial.Multinomial(n, theta) - x = dist.sample(int(1000e3), seed=1) + x = dist.sample(int(250e3), seed=1) sample_mean = math_ops.reduce_mean(x, 0) x_centered = x - sample_mean[array_ops.newaxis, ...] sample_cov = math_ops.reduce_mean(math_ops.matmul( @@ -289,9 +291,9 @@ class MultinomialTest(test.TestCase): def testSampleUnbiasedNonScalarBatch(self): with self.test_session() as sess: dist = multinomial.Multinomial( - total_count=[7., 6., 5.], + total_count=5., logits=math_ops.log(2. * self._rng.rand(4, 3, 2).astype(np.float32))) - n = int(3e4) + n = int(3e3) x = dist.sample(n, seed=0) sample_mean = math_ops.reduce_mean(x, 0) # Cyclically rotate event dims left. diff --git a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py index 2460950aa9..4883095707 100644 --- a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py +++ b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py @@ -33,8 +33,8 @@ from tensorflow.python.platform import test class DynamicPartitionTest(test.TestCase): def testSimpleOneDimensional(self): - with self.test_session(use_gpu=True) as sess: - data = constant_op.constant([0, 13, 2, 39, 4, 17], dtype=dtypes.float32) + with self.test_session() as sess: + data = constant_op.constant([0, 13, 2, 39, 4, 17]) indices = constant_op.constant([0, 0, 2, 3, 2, 1]) partitions = data_flow_ops.dynamic_partition( data, indices, num_partitions=4) @@ -52,10 +52,9 @@ class DynamicPartitionTest(test.TestCase): self.assertEqual([None], partitions[3].get_shape().as_list()) def testSimpleTwoDimensional(self): - with self.test_session(use_gpu=True) as sess: + with self.test_session() as sess: data = constant_op.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11], - [12, 13, 14], [15, 16, 17]], - dtype=dtypes.float32) + [12, 13, 14], [15, 16, 17]]) indices = constant_op.constant([0, 0, 2, 3, 2, 1]) partitions = data_flow_ops.dynamic_partition( data, indices, num_partitions=4) @@ -72,61 +71,9 @@ class DynamicPartitionTest(test.TestCase): self.assertEqual([None, 3], partitions[2].get_shape().as_list()) self.assertEqual([None, 3], partitions[3].get_shape().as_list()) - def testLargeOneDimensional(self): - num = 100000 - data_list = [x for x in range(num)] - indices_list = [x % 2 for x in range(num)] - part1 = [x for x in range(num) if x % 2 == 0] - part2 = [x for x in range(num) if x % 2 == 1] - with self.test_session(use_gpu=True) as sess: - data = constant_op.constant(data_list, dtype=dtypes.float32) - indices = constant_op.constant(indices_list, dtype=dtypes.int32) - partitions = data_flow_ops.dynamic_partition( - data, indices, num_partitions=2) - partition_vals = sess.run(partitions) - - self.assertAllEqual(part1, partition_vals[0]) - self.assertAllEqual(part2, partition_vals[1]) - - def testLargeTwoDimensional(self): - rows = 100000 - cols = 100 - data_list = [None] * rows - for i in range(rows): - data_list[i] = [i for _ in range(cols)] - num_partitions = 97 - indices_list = [(i ** 2) % num_partitions for i in range(rows)] - parts = [[] for _ in range(num_partitions)] - for i in range(rows): - parts[(i ** 2) % num_partitions].append(data_list[i]) - with self.test_session(use_gpu=True) as sess: - data = constant_op.constant(data_list, dtype=dtypes.float32) - indices = constant_op.constant(indices_list, dtype=dtypes.int32) - partitions = data_flow_ops.dynamic_partition( - data, indices, num_partitions=num_partitions) - partition_vals = sess.run(partitions) - - for i in range(num_partitions): - # reshape because of empty parts - parts_np = np.array(parts[i], dtype=np.float).reshape(-1, cols) - self.assertAllEqual(parts_np, partition_vals[i]) - - def testSimpleComplex(self): - data_list = [1 + 2j, 3 + 4j, 5 + 6j, 7 + 8j] - indices_list = [1, 0, 1, 0] - with self.test_session(use_gpu=True) as sess: - data = constant_op.constant(data_list, dtype=dtypes.complex64) - indices = constant_op.constant(indices_list, dtype=dtypes.int32) - partitions = data_flow_ops.dynamic_partition( - data, indices, num_partitions=2) - partition_vals = sess.run(partitions) - - self.assertAllEqual([3 + 4j, 7 + 8j], partition_vals[0]) - self.assertAllEqual([1 + 2j, 5 + 6j], partition_vals[1]) - def testHigherRank(self): np.random.seed(7) - with self.test_session(use_gpu=True) as sess: + with self.test_session() as sess: for n in 2, 3: for shape in (4,), (4, 5), (4, 5, 2): partitions = np.random.randint(n, size=np.prod(shape)).reshape(shape) @@ -148,49 +95,6 @@ class DynamicPartitionTest(test.TestCase): self.assertEqual(grads[1], None) # Partitions has no gradients self.assertAllEqual(7 * data, sess.run(grads[0])) - def testEmptyParts(self): - data_list = [1, 2, 3, 4] - indices_list = [1, 3, 1, 3] - with self.test_session(use_gpu=True) as sess: - data = constant_op.constant(data_list, dtype=dtypes.float32) - indices = constant_op.constant(indices_list, dtype=dtypes.int32) - partitions = data_flow_ops.dynamic_partition( - data, indices, num_partitions=4) - partition_vals = sess.run(partitions) - - self.assertAllEqual([], partition_vals[0]) - self.assertAllEqual([1, 3], partition_vals[1]) - self.assertAllEqual([], partition_vals[2]) - self.assertAllEqual([2, 4], partition_vals[3]) - - def testEmptyDataTwoDimensional(self): - data_list = [[], []] - indices_list = [0, 1] - with self.test_session(use_gpu=True) as sess: - data = constant_op.constant(data_list, dtype=dtypes.float32) - indices = constant_op.constant(indices_list, dtype=dtypes.int32) - partitions = data_flow_ops.dynamic_partition( - data, indices, num_partitions=3) - partition_vals = sess.run(partitions) - - self.assertAllEqual([[]], partition_vals[0]) - self.assertAllEqual([[]], partition_vals[1]) - self.assertAllEqual(np.array([], dtype=np.float).reshape(0, 0), - partition_vals[2]) - - def testEmptyPartitions(self): - data_list = [] - indices_list = [] - with self.test_session(use_gpu=True) as sess: - data = constant_op.constant(data_list, dtype=dtypes.float32) - indices = constant_op.constant(indices_list, dtype=dtypes.int32) - partitions = data_flow_ops.dynamic_partition( - data, indices, num_partitions=2) - partition_vals = sess.run(partitions) - - self.assertAllEqual([], partition_vals[0]) - self.assertAllEqual([], partition_vals[1]) - def testErrorIndexOutOfRange(self): with self.test_session() as sess: data = constant_op.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11], diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index 150e2ff7f2..a126180414 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function import numpy as np -import os from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -1342,33 +1341,11 @@ class PoolingTest(test.TestCase): return # Test the GPU implementation that uses cudnn for now. - saved_nanprop = os.environ.get("TF_ENABLE_MAXPOOL_NANPROP") - # Do not propagate the diff in cases of NaNs - os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "0" + # It does not propagate the diff in cases of NaNs expected_input_backprop_cudnn = [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] - - for v2 in [True, False]: - self._testMaxPoolGradDirect( - input_data, - output_backprop, - expected_input_backprop_cudnn, - input_sizes=[1, 4, 4, 1], - output_sizes=[1, 3, 3, 1], - window_rows=2, - window_cols=2, - row_stride=1, - col_stride=1, - padding="VALID", - use_gpu=True, - v2=v2) - - # Propagate the diff in cases of NaNs - os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "1" - expected_input_backprop_cudnn = expected_input_backprop_tf_cpu - for v2 in [True, False]: self._testMaxPoolGradDirect( input_data, @@ -1384,11 +1361,6 @@ class PoolingTest(test.TestCase): use_gpu=True, v2=v2) - if saved_nanprop: - os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = saved_nanprop - else: - del os.environ["TF_ENABLE_MAXPOOL_NANPROP"] - def _testMaxPoolGradDirectWithNans2_2(self): input_data = [float("nan")] * 16 output_backprop = [ @@ -1419,14 +1391,11 @@ class PoolingTest(test.TestCase): return # Test the GPU implementation that uses cudnn for now. - saved_nanprop = os.environ.get("TF_ENABLE_MAXPOOL_NANPROP") - # Do not propagate the diff in cases of NaNs - os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "0" + # It does not propagate the diff in cases of NaNs expected_input_backprop_cudnn = [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] - for v2 in [True, False]: self._testMaxPoolGradDirect( input_data, @@ -1442,31 +1411,6 @@ class PoolingTest(test.TestCase): use_gpu=True, v2=v2) - - # Propagate the diff in cases of NaNs - os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "1" - expected_input_backprop_cudnn = expected_input_backprop_tf_cpu - - for v2 in [True, False]: - self._testMaxPoolGradDirect( - input_data, - output_backprop, - expected_input_backprop_cudnn, - input_sizes=[1, 4, 4, 1], - output_sizes=[1, 3, 3, 1], - window_rows=2, - window_cols=2, - row_stride=1, - col_stride=1, - padding="VALID", - use_gpu=True, - v2=v2) - - if saved_nanprop: - os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = saved_nanprop - else: - del os.environ["TF_ENABLE_MAXPOOL_NANPROP"] - def testMaxPoolGradDirect(self): self._testMaxPoolGradDirect1_1() self._testMaxPoolGradDirect1_2() diff --git a/tensorflow/python/kernel_tests/reader_ops_test.py b/tensorflow/python/kernel_tests/reader_ops_test.py index 8e54d10f32..5630259b7b 100644 --- a/tensorflow/python/kernel_tests/reader_ops_test.py +++ b/tensorflow/python/kernel_tests/reader_ops_test.py @@ -35,9 +35,6 @@ from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import io_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test -from tensorflow.python.training import coordinator -from tensorflow.python.training import input as input_lib -from tensorflow.python.training import queue_runner_impl from tensorflow.python.util import compat prefix_path = "tensorflow/core/lib" @@ -1014,25 +1011,6 @@ class LMDBReaderTest(test.TestCase): "\\(requested 1, current size 0\\)"): k, v = sess.run([key, value]) - def testReadFromSameFile(self): - with self.test_session() as sess: - reader1 = io_ops.LMDBReader(name="test_read_from_same_file1") - reader2 = io_ops.LMDBReader(name="test_read_from_same_file2") - filename_queue = input_lib.string_input_producer([self.db_path], - num_epochs=None) - key1, value1 = reader1.read(filename_queue) - key2, value2 = reader2.read(filename_queue) - - coord = coordinator.Coordinator() - threads = queue_runner_impl.start_queue_runners(sess, coord=coord) - for i in range(3): - for j in range(10): - k1, v1, k2, v2 = sess.run([key1, value1, key2, value2]) - self.assertAllEqual(compat.as_bytes(k1), compat.as_bytes(k2)) - self.assertAllEqual(compat.as_bytes(v1), compat.as_bytes(v2)) - coord.request_stop() - coord.join(threads) - def testReadFromFolder(self): with self.test_session() as sess: reader = io_ops.LMDBReader(name="test_read_from_folder") @@ -1051,25 +1029,6 @@ class LMDBReaderTest(test.TestCase): "\\(requested 1, current size 0\\)"): k, v = sess.run([key, value]) - def testReadFromFileRepeatedly(self): - with self.test_session() as sess: - reader = io_ops.LMDBReader(name="test_read_from_file_repeated") - filename_queue = input_lib.string_input_producer([self.db_path], - num_epochs=None) - key, value = reader.read(filename_queue) - - coord = coordinator.Coordinator() - threads = queue_runner_impl.start_queue_runners(sess, coord=coord) - # Iterate over the lmdb 3 times. - for i in range(3): - # Go over all 10 records each time. - for j in range(10): - k, v = sess.run([key, value]) - self.assertAllEqual(compat.as_bytes(k), compat.as_bytes(str(j))) - self.assertAllEqual( - compat.as_bytes(v), compat.as_bytes(str(chr(ord("a") + j)))) - coord.request_stop() - coord.join(threads) if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index 3a02f24902..516a9d000e 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -323,9 +323,8 @@ class UnsortedSegmentSumTest(SegmentReductionHelper): def testBadIndices(self): # Note: GPU kernel does not return the out-of-range error needed for this # test, so this test is marked as cpu-only. - # Note: With PR #13055 a negative index will be ignored silently. with self.test_session(use_gpu=False): - for bad in [[2]], [[7]]: + for bad in [[-1]], [[7]]: unsorted = math_ops.unsorted_segment_sum([[17]], bad, num_segments=2) with self.assertRaisesOpError( r"segment_ids\[0,0\] = %d is out of range \[0, 2\)" % bad[0][0]): @@ -361,32 +360,6 @@ class UnsortedSegmentSumTest(SegmentReductionHelper): x_init_value=np_x.astype(np.double), delta=1) self.assertAllClose(jacob_t, jacob_n) - def testDropNegatives(self): - # Note: the test is done by replacing segment_ids with 8 to -1 - # for index and replace values generated by numpy with 0. - dtypes = [ - dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int64, - dtypes_lib.int32, dtypes_lib.complex64, dtypes_lib.complex128 - ] - indices_flat = np.array([0, 4, 0, 8, 3, 8, 4, 7, 7, 3]) - num_segments = 12 - for indices in indices_flat, indices_flat.reshape(5, 2): - shape = indices.shape + (2,) - for dtype in dtypes: - with self.test_session(use_gpu=True): - tf_x, np_x = self._input(shape, dtype=dtype) - np_ans = self._segmentReduce( - indices, np_x, np.add, op2=None, num_out_rows=num_segments) - # Replace np_ans[8] with 0 for the value - np_ans[8:] = 0 - # Replace 8 with -1 in indices - np.place(indices, indices==8, [-1]) - s = math_ops.unsorted_segment_sum( - data=tf_x, segment_ids=indices, num_segments=num_segments) - tf_ans = s.eval() - self.assertAllClose(np_ans, tf_ans) - self.assertShapeEqual(np_ans, s) - class SparseSegmentReductionHelper(SegmentReductionHelper): diff --git a/tensorflow/python/kernel_tests/shape_ops_test.py b/tensorflow/python/kernel_tests/shape_ops_test.py index 7368251ab6..a9fc699b21 100644 --- a/tensorflow/python/kernel_tests/shape_ops_test.py +++ b/tensorflow/python/kernel_tests/shape_ops_test.py @@ -258,16 +258,6 @@ class ShapeOpsTest(test.TestCase): self.assertAllEqual([True], array_ops.expand_dims(inp, 0).eval()) self.assertAllEqual([True], array_ops.expand_dims(inp, -1).eval()) - def testExpandDimsDimType(self): - for dtype in [dtypes.int32, dtypes.int64]: - x = np.zeros([2]) - np_ans = np.expand_dims(x, axis=0) - with self.test_session(use_gpu=True): - tensor = array_ops.expand_dims(x, constant_op.constant(0, dtype)) - tf_ans = tensor.eval() - self.assertShapeEqual(np_ans, tensor) - self.assertAllEqual(np_ans, tf_ans) - def _compareSqueeze(self, x, squeeze_dims, use_gpu): with self.test_session(use_gpu=use_gpu): if squeeze_dims: diff --git a/tensorflow/python/kernel_tests/slice_op_test.py b/tensorflow/python/kernel_tests/slice_op_test.py index 6cdc7872f9..051a25080b 100644 --- a/tensorflow/python/kernel_tests/slice_op_test.py +++ b/tensorflow/python/kernel_tests/slice_op_test.py @@ -217,30 +217,6 @@ class SliceTest(test.TestCase): self.assertEqual(expected_val.shape, slice_t.get_shape()) self.assertEqual(expected_val.shape, slice2_t.get_shape()) - def testRandomHighRank(self): - # Random dims of rank 8 - input_shape = np.random.randint(0, 20, size=8) - inp = np.random.rand(*input_shape).astype("f") - with self.test_session(use_gpu=True) as sess: - a = constant_op.constant( - [float(x) for x in inp.ravel(order="C")], - shape=input_shape, - dtype=dtypes.float32) - indices = [0 if x == 0 else np.random.randint(x) for x in input_shape] - sizes = [ - np.random.randint(0, input_shape[i] - indices[i] + 1) - for i in range(8) - ] - slice_t = array_ops.slice(a, indices, sizes) - slice_val = sess.run(slice_t) - - expected_val = inp[indices[0]:indices[0] + sizes[0], indices[1]:indices[1] + sizes[ - 1], indices[2]:indices[2] + sizes[2], indices[3]:indices[3] + sizes[3], indices[ - 4]:indices[4] + sizes[4], indices[5]:indices[5] + sizes[5], indices[6]:indices[ - 6] + sizes[6], indices[7]:indices[7] + sizes[7]] - self.assertAllEqual(slice_val, expected_val) - self.assertEqual(expected_val.shape, slice_t.get_shape()) - def testPartialShapeInference(self): z = array_ops.zeros((1, 2, 3)) self.assertAllEqual(z.get_shape().as_list(), [1, 2, 3]) @@ -251,6 +227,7 @@ class SliceTest(test.TestCase): m2 = array_ops.slice(z, [0, 0, 0], [constant_op.constant(1) + 0, 2, -1]) self.assertAllEqual(m2.get_shape().as_list(), [None, 2, None]) + def _testGradientSlice(self, input_shape, slice_begin, slice_size): with self.test_session(use_gpu=True): num_inputs = np.prod(input_shape) diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py index 04758ce45a..a50f53b3cd 100644 --- a/tensorflow/python/kernel_tests/unique_op_test.py +++ b/tensorflow/python/kernel_tests/unique_op_test.py @@ -22,7 +22,6 @@ import numpy as np from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_array_ops from tensorflow.python.platform import test @@ -62,31 +61,6 @@ class UniqueTest(test.TestCase): for i in range(len(x)): self.assertEqual(x[i], tf_y[tf_idx[i]].decode('ascii')) - def testInt32Axis(self): - x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]]) - with self.test_session() as sess: - y0, idx0 = gen_array_ops.unique_v2(x, axis=[0]) - tf_y0, tf_idx0 = sess.run([y0, idx0]) - y1, idx1 = gen_array_ops.unique_v2(x, axis=[1]) - tf_y1, tf_idx1 = sess.run([y1, idx1]) - self.assertAllEqual(tf_y0, np.array([[1, 0, 0], [2, 0, 0]])) - self.assertAllEqual(tf_idx0, np.array([0, 0, 1])) - self.assertAllEqual(tf_y1, np.array([[1, 0], [1, 0], [2, 0]])) - self.assertAllEqual(tf_idx1, np.array([0, 1, 1])) - - def testInt32V2(self): - # This test is only temporary, once V2 is used - # by default, the axis will be wrapped to allow `axis=None`. - x = np.random.randint(2, high=10, size=7000) - with self.test_session() as sess: - y, idx = gen_array_ops.unique_v2(x, axis=[]) - tf_y, tf_idx = sess.run([y, idx]) - - self.assertEqual(len(x), len(tf_idx)) - self.assertEqual(len(tf_y), len(np.unique(x))) - for i in range(len(x)): - self.assertEqual(x[i], tf_y[tf_idx[i]]) - class UniqueWithCountsTest(test.TestCase): def testInt32(self): diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 6be2bc3e76..74b85da845 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -221,7 +221,7 @@ class Layer(object): Weight updates (for instance, the updates of the moving mean and variance in a BatchNormalization layer) may be dependent on the inputs passed - when calling a layer. Hence, when reusing the same layer on + when calling a layer. Hence, when reusing a same layer on different inputs `a` and `b`, some entries in `layer.updates` may be dependent on `a` and some on `b`. This method automatically keeps track of dependencies. @@ -295,9 +295,9 @@ class Layer(object): """Add loss tensor(s), potentially dependent on layer inputs. Some losses (for instance, activity regularization losses) may be dependent - on the inputs passed when calling a layer. Hence, when reusing the same - layer on different inputs `a` and `b`, some entries in `layer.losses` may - be dependent on `a` and some on `b`. This method automatically keeps track + on the inputs passed when calling a layer. Hence, when reusing a same layer + on different inputs `a` and `b`, some entries in `layer.losses` may be + dependent on `a` and some on `b`. This method automatically keeps track of dependencies. The `get_losses_for` method allows to retrieve the losses relevant to a diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py index 8c327d7e27..0c7ce02835 100644 --- a/tensorflow/python/layers/convolutional.py +++ b/tensorflow/python/layers/convolutional.py @@ -813,7 +813,6 @@ def conv3d(inputs, bias_constraint=bias_constraint, trainable=trainable, name=name, - dtype=inputs.dtype.base_dtype, _reuse=reuse, _scope=name) return layer.apply(inputs) @@ -1747,7 +1746,6 @@ def conv3d_transpose(inputs, bias_constraint=bias_constraint, trainable=trainable, name=name, - dtype=inputs.dtype.base_dtype, _reuse=reuse, _scope=name) return layer.apply(inputs) diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index 4d5fb97845..9d9b2b3941 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -26,7 +26,6 @@ import numpy as np from tensorflow.python.eager import context from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.layers import base @@ -240,12 +239,6 @@ class BatchNormalization(base.Layer): raise ValueError('Unsupported axis, fused batch norm only supports ' 'axis == [1] or axis == [3]') - # Raise parameters of fp16 batch norm to fp32 - if self.dtype == dtypes.float16: - param_dtype = dtypes.float32 - else: - param_dtype = self.dtype or dtypes.float32 - axis_to_dim = {x: input_shape[x].value for x in self.axis} for x in axis_to_dim: if axis_to_dim[x] is None: @@ -269,7 +262,6 @@ class BatchNormalization(base.Layer): if self.scale: self.gamma = self.add_variable(name='gamma', shape=param_shape, - dtype=param_dtype, initializer=self.gamma_initializer, regularizer=self.gamma_regularizer, constraint=self.gamma_constraint, @@ -277,14 +269,11 @@ class BatchNormalization(base.Layer): else: self.gamma = None if self.fused: - self._gamma_const = array_ops.constant(1.0, - dtype=param_dtype, - shape=param_shape) + self._gamma_const = array_ops.constant(1.0, shape=param_shape) if self.center: self.beta = self.add_variable(name='beta', shape=param_shape, - dtype=param_dtype, initializer=self.beta_initializer, regularizer=self.beta_regularizer, constraint=self.beta_constraint, @@ -292,9 +281,7 @@ class BatchNormalization(base.Layer): else: self.beta = None if self.fused: - self._beta_const = array_ops.constant(0.0, - dtype=param_dtype, - shape=param_shape) + self._beta_const = array_ops.constant(0.0, shape=param_shape) # Disable variable partitioning when creating the moving mean and variance try: @@ -306,14 +293,12 @@ class BatchNormalization(base.Layer): self.moving_mean = self.add_variable( name='moving_mean', shape=param_shape, - dtype=param_dtype, initializer=self.moving_mean_initializer, trainable=False) self.moving_variance = self.add_variable( name='moving_variance', shape=param_shape, - dtype=param_dtype, initializer=self.moving_variance_initializer, trainable=False) @@ -329,7 +314,6 @@ class BatchNormalization(base.Layer): def _renorm_variable(name, shape): var = self.add_variable(name=name, shape=shape, - dtype=param_dtype, initializer=init_ops.zeros_initializer(), trainable=False) return var @@ -372,6 +356,7 @@ class BatchNormalization(base.Layer): def _fused_batch_norm(self, inputs, training): """Returns the output of fused batch norm.""" + # TODO(reedwm): Add support for fp16 inputs. beta = self.beta if self.center else self._beta_const gamma = self.gamma if self.scale else self._gamma_const @@ -767,7 +752,6 @@ def batch_normalization(inputs, virtual_batch_size=virtual_batch_size, adjustment=adjustment, name=name, - dtype=inputs.dtype.base_dtype, _reuse=reuse, _scope=name) return layer.apply(inputs, training=training) diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py index b2876c58c2..90ebdc8c86 100644 --- a/tensorflow/python/layers/normalization_test.py +++ b/tensorflow/python/layers/normalization_test.py @@ -68,12 +68,11 @@ class BNTest(test.TestCase): use_gpu, is_fused, restore=False, - freeze_mode=False, - dtype=dtypes.float32): + freeze_mode=False): ops.reset_default_graph() graph = ops.get_default_graph() with self.test_session(graph=graph, use_gpu=use_gpu) as sess: - image = array_ops.placeholder(dtype=dtype, shape=shape) + image = array_ops.placeholder(dtype='float32', shape=shape) loss, train_op, saver = self._simple_model(image, is_fused, freeze_mode) if restore: saver.restore(sess, checkpoint_path) @@ -81,7 +80,7 @@ class BNTest(test.TestCase): sess.run(variables.global_variables_initializer()) np.random.seed(0) for _ in range(2): - image_val = np.random.rand(*shape).astype(dtype.as_numpy_dtype) + image_val = np.random.rand(*shape).astype(np.float32) sess.run([loss, train_op], feed_dict={image: image_val}) if restore: all_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) @@ -91,74 +90,15 @@ class BNTest(test.TestCase): saver.save(sess, checkpoint_path) def _infer(self, checkpoint_path, image_val, shape, use_gpu, is_fused): - dtype = image_val.dtype ops.reset_default_graph() graph = ops.get_default_graph() with self.test_session(graph=graph, use_gpu=use_gpu) as sess: - image = array_ops.placeholder(dtype=dtype, shape=shape) + image = array_ops.placeholder(dtype='float32', shape=shape) loss, _, saver = self._simple_model(image, is_fused, True) saver.restore(sess, checkpoint_path) loss_val = sess.run(loss, feed_dict={image: image_val}) return loss_val - def _trainEvalSequence(self, - dtype, - train1_use_gpu, - train2_use_gpu, - infer_use_gpu): - batch, height, width, input_channels = 2, 4, 5, 3 - shape = [batch, height, width, input_channels] - checkpoint = os.path.join(self.get_temp_dir(), 'cp_%s_%s_%s_%s' % - (dtype, train1_use_gpu, train2_use_gpu, infer_use_gpu)) - - self._train( - checkpoint, - shape, - use_gpu=train1_use_gpu, - is_fused=True, - restore=False, - freeze_mode=False, - dtype=dtype) - - train_vars = self._train( - checkpoint, - shape, - use_gpu=train2_use_gpu, - is_fused=True, - restore=True, - freeze_mode=False, - dtype=dtype) - - np.random.seed(0) - image_val = np.random.rand(batch, - height, - width, - input_channels).astype(dtype.as_numpy_dtype) - loss_val = self._infer(checkpoint, image_val, shape, - use_gpu=infer_use_gpu, is_fused=True) - - return train_vars, loss_val - - def testHalfPrecision(self): - ref_vars, ref_loss = self._trainEvalSequence(dtype=dtypes.float32, - train1_use_gpu=True, - train2_use_gpu=True, - infer_use_gpu=True) - - self.assertEqual(len(ref_vars), 5) - - for train1_use_gpu in [True, False]: - for train2_use_gpu in [True, False]: - for infer_use_gpu in [True, False]: - test_vars, test_loss = self._trainEvalSequence(dtypes.float16, - train1_use_gpu, - train2_use_gpu, - infer_use_gpu) - self.assertEqual(len(test_vars), 5) - for test_var, ref_var in zip(test_vars, ref_vars): - self.assertAllClose(test_var, ref_var, rtol=1.e-3, atol=1.e-3) - self.assertAllClose(test_loss, ref_loss, rtol=1.e-3, atol=1.e-3) - def _testCheckpoint(self, is_fused_checkpoint_a, is_fused_checkpoint_b, use_gpu_checkpoint_a, use_gpu_checkpoint_b, use_gpu_test_a, use_gpu_test_b, freeze_mode): @@ -278,36 +218,6 @@ class BNTest(test.TestCase): ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES), bn.trainable_variables) - def testCreateFusedBNFloat16(self): - # Call layer. - bn = normalization_layers.BatchNormalization(axis=1, fused=True) - inputs = random_ops.random_uniform((5, 4, 3, 3), - seed=1, - dtype=dtypes.float16) - training = array_ops.placeholder(dtype='bool') - outputs = bn.apply(inputs, training=training) - - # Verify shape. - self.assertListEqual(outputs.get_shape().as_list(), [5, 4, 3, 3]) - - # Verify layer attributes. - self.assertEqual(len(bn.updates), 2) - self.assertEqual(len(bn.variables), 4) - self.assertEqual(len(bn.trainable_variables), 2) - self.assertEqual(len(bn.non_trainable_variables), 2) - for var in bn.variables: - self.assertEqual(var.dtype, dtypes.float32_ref) - - # Test that updates were created and added to UPDATE_OPS. - self.assertEqual(len(bn.updates), 2) - self.assertListEqual( - ops.get_collection(ops.GraphKeys.UPDATE_OPS), bn.updates) - - # Test that weights were created and added to TRAINABLE_VARIABLES. - self.assertListEqual( - ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES), - bn.trainable_variables) - def test3DInputAxis1(self): epsilon = 1e-3 bn = normalization_layers.BatchNormalization( diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 43238757c7..c3c7ecd080 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1132,7 +1132,7 @@ def concat(values, axis, name="concat"): return gen_array_ops._concat_v2(values=values, axis=axis, name=name) -def boolean_mask(tensor, mask, name="boolean_mask", axis=None): +def boolean_mask(tensor, mask, name="boolean_mask"): """Apply boolean mask to tensor. Numpy equivalent is `tensor[mask]`. ```python @@ -1146,17 +1146,11 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None): the first K dimensions of `tensor`'s shape. We then have: `boolean_mask(tensor, mask)[i, j1,...,jd] = tensor[i1,...,iK,j1,...,jd]` where `(i1,...,iK)` is the ith `True` entry of `mask` (row-major order). - The `axis` could be used with `mask` to indicate the axis to mask from. - In that case, `axis + dim(mask) <= dim(tensor)` and `mask`'s shape must match - the first `axis + dim(mask)` dimensions of `tensor`'s shape. Args: tensor: N-D tensor. mask: K-D boolean tensor, K <= N and K must be known statically. name: A name for this operation (optional). - axis: A 0-D int Tensor representing the axis in `tensor` to mask from. - By default, axis is 0 which will mask from the first dimension. Otherwise - K + axis <= N. Returns: (N-K+1)-dimensional tensor populated by entries in `tensor` corresponding @@ -1175,10 +1169,10 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None): ``` """ - def _apply_mask_1d(reshaped_tensor, mask, axis=None): + def _apply_mask_1d(reshaped_tensor, mask): """Mask tensor along dimension 0 with a 1-D mask.""" indices = squeeze(where(mask), squeeze_dims=[1]) - return gather(reshaped_tensor, indices, axis=axis) + return gather(reshaped_tensor, indices) with ops.name_scope(name, values=[tensor, mask]): tensor = ops.convert_to_tensor(tensor, name="tensor") @@ -1193,22 +1187,19 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None): raise ValueError( "Number of mask dimensions must be specified, even if some dimensions" " are None. E.g. shape=[None] is ok, but shape=None is not.") - axis = 0 if axis is None else axis - shape_tensor[axis:axis+ndims_mask].assert_is_compatible_with(shape_mask) + shape_tensor[:ndims_mask].assert_is_compatible_with(shape_mask) - leading_size = gen_math_ops._prod(shape(tensor)[axis:axis+ndims_mask], [0]) + leading_size = gen_math_ops._prod(shape(tensor)[:ndims_mask], [0]) tensor = reshape(tensor, - concat([shape(tensor)[:axis], - [leading_size], - shape(tensor)[axis+ndims_mask:]], 0)) - first_dim = shape_tensor[axis:axis+ndims_mask].num_elements() + concat([[leading_size], + shape(tensor)[ndims_mask:]], 0)) + first_dim = shape_tensor[:ndims_mask].num_elements() tensor.set_shape( - tensor_shape.as_shape(shape_tensor[:axis]) - .concatenate([first_dim]) - .concatenate(shape_tensor[axis+ndims_mask:])) + tensor_shape.as_shape([first_dim]) + .concatenate(shape_tensor[ndims_mask:])) mask = reshape(mask, [-1]) - return _apply_mask_1d(tensor, mask, axis) + return _apply_mask_1d(tensor, mask) def sparse_mask(a, mask_indices, name=None): @@ -1530,8 +1521,7 @@ def zeros_like(tensor, dtype=None, name=None, optimize=True): Args: tensor: A `Tensor`. dtype: A type for the returned `Tensor`. Must be `float32`, `float64`, - `int8`, `uint8`, `int16`, `uint16`, int32`, `int64`, - `complex64`, `complex128` or `bool`. + `int8`, `int16`, `int32`, `int64`, `uint8`, `complex64`, or `complex128`. name: A name for the operation (optional). optimize: if true, attempt to statically determine the shape of 'tensor' and encode it as a constant. @@ -1582,8 +1572,8 @@ def ones_like(tensor, dtype=None, name=None, optimize=True): Args: tensor: A `Tensor`. dtype: A type for the returned `Tensor`. Must be `float32`, `float64`, - `int8`, `uint8`, `int16`, `uint16`, int32`, `int64`, - `complex64`, `complex128` or `bool`. + `int8`, `int16`, `int32`, `int64`, `uint8`, `complex64`, `complex128` or + `bool`. name: A name for the operation (optional). optimize: if true, attempt to statically determine the shape of 'tensor' and encode it as a constant. diff --git a/tensorflow/python/ops/distributions/dirichlet.py b/tensorflow/python/ops/distributions/dirichlet.py index 2accedf1b9..923696a553 100644 --- a/tensorflow/python/ops/distributions/dirichlet.py +++ b/tensorflow/python/ops/distributions/dirichlet.py @@ -196,7 +196,7 @@ class Dirichlet(distribution.Distribution): alpha=self.concentration, dtype=self.dtype, seed=seed) - return gamma_sample / math_ops.reduce_sum(gamma_sample, -1, keepdims=True) + return gamma_sample / math_ops.reduce_sum(gamma_sample, -1, keep_dims=True) @distribution_util.AppendDocstring(_dirichlet_sample_note) def _log_prob(self, x): diff --git a/tensorflow/python/ops/distributions/multinomial.py b/tensorflow/python/ops/distributions/multinomial.py index d49fac59ca..00b5697c83 100644 --- a/tensorflow/python/ops/distributions/multinomial.py +++ b/tensorflow/python/ops/distributions/multinomial.py @@ -26,7 +26,6 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops -from tensorflow.python.ops import functional_ops from tensorflow.python.ops.distributions import distribution from tensorflow.python.ops.distributions import util as distribution_util @@ -141,8 +140,6 @@ class Multinomial(distribution.Distribution): counts = [[2., 1, 1], [3, 1, 1]] dist.prob(counts) # Shape [2] - - dist.sample(5) # Shape [5, 2, 3] ``` """ @@ -234,35 +231,29 @@ class Multinomial(distribution.Distribution): def _sample_n(self, n, seed=None): n_draws = math_ops.cast(self.total_count, dtype=dtypes.int32) + if self.total_count.get_shape().ndims is not None: + if self.total_count.get_shape().ndims != 0: + raise NotImplementedError( + "Sample only supported for scalar number of draws.") + elif self.validate_args: + is_scalar = check_ops.assert_rank( + n_draws, 0, + message="Sample only supported for scalar number of draws.") + n_draws = control_flow_ops.with_dependencies([is_scalar], n_draws) k = self.event_shape_tensor()[0] - - # boardcast the total_count and logits to same shape - n_draws = array_ops.ones_like( - self.logits[..., 0], dtype=n_draws.dtype) * n_draws - logits = array_ops.ones_like( - n_draws[..., array_ops.newaxis], dtype=self.logits.dtype) * self.logits - - # flatten the total_count and logits - flat_logits = array_ops.reshape(logits, [-1, k]) # [B1B2...Bm, k] - flat_ndraws = n * array_ops.reshape(n_draws, [-1]) # [B1B2...Bm] - - # computes each total_count and logits situation by map_fn - def _sample_single(args): - logits, n_draw = args[0], args[1] # [K], [] - x = random_ops.multinomial(logits[array_ops.newaxis, ...], - n_draw, seed) # [1, n*n_draw] - x = array_ops.reshape(x, shape=[n, -1]) # [n, n_draw] - x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), axis=-2) # [n, k] - return x - x = functional_ops.map_fn(_sample_single, - [flat_logits, flat_ndraws], - dtype=self.dtype) # [B1B2...Bm, n, k] - - # reshape the results to proper shape + # Flatten batch dims so logits has shape [B, k], + # where B = reduce_prod(self.batch_shape_tensor()). + x = random_ops.multinomial( + logits=array_ops.reshape(self.logits, [-1, k]), + num_samples=n * n_draws, + seed=seed) + x = array_ops.reshape(x, shape=[-1, n, n_draws]) + x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), + axis=-2) # shape: [B, n, k] x = array_ops.transpose(x, perm=[1, 0, 2]) final_shape = array_ops.concat([[n], self.batch_shape_tensor(), [k]], 0) - x = array_ops.reshape(x, final_shape) # [n, B1, B2,..., Bm, k] - return x + x = array_ops.reshape(x, final_shape) + return math_ops.cast(x, self.dtype) @distribution_util.AppendDocstring(_multinomial_sample_note) def _log_prob(self, counts): diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 7c23321ca5..2946dbe81e 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1121,7 +1121,7 @@ def rgb_to_grayscale(images, name=None): rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0) gray_float = math_ops.reduce_sum(flt_image * rgb_weights, rank_1, - keepdims=True) + keep_dims=True) gray_float.set_shape(images.get_shape()[:-1].concatenate([1])) return convert_image_dtype(gray_float, orig_dtype, name=name) @@ -1212,7 +1212,26 @@ def adjust_hue(image, delta, name=None): orig_dtype = image.dtype flt_image = convert_image_dtype(image, dtypes.float32) - rgb_altered = gen_image_ops.adjust_hue(flt_image, delta) + # TODO(zhengxq): we will switch to the fused version after we add a GPU + # kernel for that. + fused = os.environ.get('TF_ADJUST_HUE_FUSED', '') + fused = fused.lower() in ('true', 't', '1') + + if not fused: + hsv = gen_image_ops.rgb_to_hsv(flt_image) + + hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1]) + saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1]) + value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1]) + + # Note that we add 2*pi to guarantee that the resulting hue is a positive + # floating point number since delta is [-0.5, 0.5]. + hue = math_ops.mod(hue + (delta + 1.), 1.) + + hsv_altered = array_ops.concat([hue, saturation, value], 2) + rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered) + else: + rgb_altered = gen_image_ops.adjust_hue(flt_image, delta) return convert_image_dtype(rgb_altered, orig_dtype) diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index 14a039ffd0..2cb467c891 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -30,7 +30,6 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops.gen_linalg_ops import * # pylint: enable=wildcard-import from tensorflow.python.util import compat -from tensorflow.python.util.deprecation import deprecated_args # Names below are lower_case. # pylint: disable=invalid-name @@ -439,10 +438,7 @@ def svd(tensor, full_matrices=False, compute_uv=True, name=None): # pylint: disable=redefined-builtin -@deprecated_args(None, "keep_dims is deprecated, use keepdims instead", - "keep_dims") -def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None, - keep_dims=None): +def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None): r"""Computes the norm of vectors, matrices, and tensors. This function can compute several different vector norms (the 1-norm, the @@ -475,13 +471,13 @@ def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None, can be either a matrix or a batch of matrices at runtime, pass `axis=[-2,-1]` instead of `axis=None` to make sure that matrix norms are computed. - keepdims: If True, the axis indicated in `axis` are kept with size 1. + keep_dims: If True, the axis indicated in `axis` are kept with size 1. Otherwise, the dimensions in `axis` are removed from the output shape. name: The name of the op. Returns: output: A `Tensor` of the same type as tensor, containing the vector or - matrix norms. If `keepdims` is True then the rank of output is equal to + matrix norms. If `keep_dims` is True then the rank of output is equal to the rank of `tensor`. Otherwise, if `axis` is none the output is a scalar, if `axis` is an integer, the rank of `output` is one less than the rank of `tensor`, if `axis` is a 2-tuple the rank of `output` is two less @@ -501,13 +497,6 @@ def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None, @end_compatibility """ - if keep_dims is not None: - if keepdims is not None: - raise ValueError("Cannot specify both 'keep_dims' and 'keepdims'") - keepdims = keep_dims - if keepdims is None: - keepdims = False - is_matrix_norm = ((isinstance(axis, tuple) or isinstance(axis, list)) and len(axis) == 2) if is_matrix_norm: @@ -539,25 +528,25 @@ def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None, # matrices. result = math_ops.sqrt( math_ops.reduce_sum( - tensor * math_ops.conj(tensor), axis, keepdims=True)) + tensor * math_ops.conj(tensor), axis, keep_dims=True)) else: result = math_ops.abs(tensor) if ord == 1: sum_axis = None if axis is None else axis[0] - result = math_ops.reduce_sum(result, sum_axis, keepdims=True) + result = math_ops.reduce_sum(result, sum_axis, keep_dims=True) if is_matrix_norm: - result = math_ops.reduce_max(result, axis[-1], keepdims=True) + result = math_ops.reduce_max(result, axis[-1], keep_dims=True) elif ord == np.inf: if is_matrix_norm: - result = math_ops.reduce_sum(result, axis[1], keepdims=True) + result = math_ops.reduce_sum(result, axis[1], keep_dims=True) max_axis = None if axis is None else axis[0] - result = math_ops.reduce_max(result, max_axis, keepdims=True) + result = math_ops.reduce_max(result, max_axis, keep_dims=True) else: # General p-norms (positive p only) result = math_ops.pow( math_ops.reduce_sum( - math_ops.pow(result, ord), axis, keepdims=True), 1.0 / ord) - if not keepdims: + math_ops.pow(result, ord), axis, keep_dims=True), 1.0 / ord) + if not keep_dims: result = array_ops.squeeze(result, axis) return result diff --git a/tensorflow/python/ops/math_grad_test.py b/tensorflow/python/ops/math_grad_test.py index 04eeb00518..5732c756ce 100644 --- a/tensorflow/python/ops/math_grad_test.py +++ b/tensorflow/python/ops/math_grad_test.py @@ -113,23 +113,6 @@ class MinOrMaxGradientTest(test.TestCase): self.assertLess(error, 1e-4) -class MaximumOrMinimumGradientTest(test.TestCase): - - def testMaximumGradient(self): - inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32) - outputs = math_ops.maximum(inputs, 3.0) - with self.test_session(): - error = gradient_checker.compute_gradient_error(inputs, [4], outputs, [4]) - self.assertLess(error, 1e-4) - - def testMinimumGradient(self): - inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32) - outputs = math_ops.minimum(inputs, 2.0) - with self.test_session(): - error = gradient_checker.compute_gradient_error(inputs, [4], outputs, [4]) - self.assertLess(error, 1e-4) - - class ProdGradientTest(test.TestCase): def testProdGradient(self): diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index e2e23dccef..4c400423b6 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -170,13 +170,14 @@ from tensorflow.python.ops import state_ops from tensorflow.python.ops.gen_math_ops import * # pylint: enable=wildcard-import from tensorflow.python.util import compat -from tensorflow.python.util import deprecation +from tensorflow.python.util.deprecation import deprecated +from tensorflow.python.util.deprecation import deprecated_args # Aliases for some automatically-generated names. linspace = gen_math_ops.lin_space -arg_max = deprecation.deprecated(None, "Use `argmax` instead")(arg_max) # pylint: disable=used-before-assignment -arg_min = deprecation.deprecated(None, "Use `argmin` instead")(arg_min) # pylint: disable=used-before-assignment +arg_max = deprecated(None, "Use `argmax` instead")(arg_max) # pylint: disable=used-before-assignment +arg_min = deprecated(None, "Use `argmin` instead")(arg_min) # pylint: disable=used-before-assignment def _set_doc(doc): @@ -189,8 +190,7 @@ def _set_doc(doc): # pylint: disable=redefined-builtin -@deprecation.deprecated_args(None, "Use the `axis` argument instead", - "dimension") +@deprecated_args(None, "Use the `axis` argument instead", "dimension") @_set_doc( gen_math_ops.arg_max.__doc__.replace("dimensions", "axes").replace( "dimension", "axis")) @@ -208,8 +208,7 @@ def argmax(input, return gen_math_ops.arg_max(input, axis, name=name, output_type=output_type) -@deprecation.deprecated_args(None, "Use the `axis` argument instead", - "dimension") +@deprecated_args(None, "Use the `axis` argument instead", "dimension") @_set_doc( gen_math_ops.arg_min.__doc__.replace("dimensions", "axes").replace( "dimension", "axis")) @@ -325,7 +324,7 @@ multiply.__doc__ = gen_math_ops._mul.__doc__.replace("Mul", "`tf.multiply`") # TODO(aselle): put deprecation in after another round of global code changes -@deprecation.deprecated( +@deprecated( "2016-12-30", "`tf.mul(x, y)` is deprecated, please use `tf.multiply(x, y)` or `x * y`") def _mul(x, y, name=None): @@ -344,7 +343,7 @@ subtract.__doc__ = gen_math_ops._sub.__doc__.replace("`Sub`", "`tf.subtract`") # TODO(aselle): put deprecation in after another round of global code changes -@deprecation.deprecated( +@deprecated( "2016-12-30", "`tf.sub(x, y)` is deprecated, please use `tf.subtract(x, y)` or `x - y`") def _sub(x, y, name=None): @@ -382,9 +381,8 @@ def negative(x, name=None): # pylint: disable=g-docstring-has-escape -@deprecation.deprecated( - "2016-12-30", - "`tf.neg(x)` is deprecated, please use `tf.negative(x)` or `-x`") +@deprecated("2016-12-30", + "`tf.neg(x)` is deprecated, please use `tf.negative(x)` or `-x`") def _neg(x, name=None): """Computes numerical negative value element-wise. @@ -1271,27 +1269,24 @@ def _ReductionDims(x, axis, reduction_indices): return range(0, array_ops.rank(x)) -def _may_reduce_to_scalar(keepdims, axis, reduction_indices, output): +def _may_reduce_to_scalar(keep_dims, axis, reduction_indices, output): """Set a reduction's output's shape to be a scalar if we are certain.""" - if (not output.shape.is_fully_defined()) and (not keepdims) and ( + if (not output.shape.is_fully_defined()) and (not keep_dims) and ( axis is None) and (reduction_indices is None): output.set_shape(()) return output -@deprecation.deprecated_args( - None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_sum(input_tensor, axis=None, - keepdims=None, + keep_dims=False, name=None, - reduction_indices=None, - keep_dims=None): + reduction_indices=None): """Computes the sum of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keepdims` is true, the reduced dimensions + Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keep_dims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1304,7 +1299,7 @@ def reduce_sum(input_tensor, tf.reduce_sum(x) # 6 tf.reduce_sum(x, 0) # [2, 2, 2] tf.reduce_sum(x, 1) # [3, 3] - tf.reduce_sum(x, 1, keepdims=True) # [[3], [3]] + tf.reduce_sum(x, 1, keep_dims=True) # [[3], [3]] tf.reduce_sum(x, [0, 1]) # 6 ``` @@ -1313,10 +1308,9 @@ def reduce_sum(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keepdims: If true, retains reduced dimensions with length 1. + keep_dims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. - keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @@ -1325,34 +1319,26 @@ def reduce_sum(input_tensor, Equivalent to np.sum @end_compatibility """ - keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, - "keep_dims", keep_dims) - if keepdims is None: - keepdims = False - - return _may_reduce_to_scalar(keepdims, axis, reduction_indices, + return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, gen_math_ops._sum( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keepdims, + keep_dims, name=name)) -@deprecation.deprecated_args( - None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def count_nonzero(input_tensor, axis=None, - keepdims=None, + keep_dims=False, dtype=dtypes.int64, name=None, - reduction_indices=None, - keep_dims=None): + reduction_indices=None): """Computes number of nonzero elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keepdims` is true, the reduced dimensions + Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keep_dims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1369,7 +1355,7 @@ def count_nonzero(input_tensor, tf.count_nonzero(x) # 3 tf.count_nonzero(x, 0) # [1, 2, 0] tf.count_nonzero(x, 1) # [1, 2] - tf.count_nonzero(x, 1, keepdims=True) # [[1], [2]] + tf.count_nonzero(x, 1, keep_dims=True) # [[1], [2]] tf.count_nonzero(x, [0, 1]) # 3 ``` @@ -1378,20 +1364,14 @@ def count_nonzero(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keepdims: If true, retains reduced dimensions with length 1. + keep_dims: If true, retains reduced dimensions with length 1. dtype: The output dtype; defaults to `tf.int64`. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. - keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor (number of nonzero values). """ - keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, - "keep_dims", keep_dims) - if keepdims is None: - keepdims = False - with ops.name_scope(name, "count_nonzero", [input_tensor]): input_tensor = ops.convert_to_tensor(input_tensor, name="input_tensor") zero = input_tensor.dtype.as_numpy_dtype() @@ -1400,24 +1380,21 @@ def count_nonzero(input_tensor, # int64 reduction happens on GPU to_int64(gen_math_ops.not_equal(input_tensor, zero)), axis=axis, - keepdims=keepdims, + keep_dims=keep_dims, reduction_indices=reduction_indices), dtype=dtype) -@deprecation.deprecated_args( - None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_mean(input_tensor, axis=None, - keepdims=None, + keep_dims=False, name=None, - reduction_indices=None, - keep_dims=None): + reduction_indices=None): """Computes the mean of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keepdims` is true, the reduced dimensions + Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keep_dims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1437,58 +1414,36 @@ def reduce_mean(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keepdims: If true, retains reduced dimensions with length 1. + keep_dims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. - keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @compatibility(numpy) Equivalent to np.mean - - Please note that `np.mean` has a `dtype` parameter that could be used to - specify the output type. By default this is `dtype=float64`. On the other - hand, `tf.reduce_mean` has an aggressive type inference from `input_tensor`, - for example: - - ```python - x = tf.constant([1, 0, 1, 0]) - tf.reduce_mean(x) # 0 - y = tf.constant([1., 0., 1., 0.]) - tf.reduce_mean(y) # 0.5 - ``` - @end_compatibility """ - keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, - "keep_dims", keep_dims) - - if keepdims is None: - keepdims = False - return _may_reduce_to_scalar(keepdims, axis, reduction_indices, + return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, gen_math_ops._mean( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keepdims, + keep_dims, name=name)) -@deprecation.deprecated_args( - None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_prod(input_tensor, axis=None, - keepdims=None, + keep_dims=False, name=None, - reduction_indices=None, - keep_dims=None): + reduction_indices=None): """Computes the product of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keepdims` is true, the reduced dimensions + Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keep_dims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1499,10 +1454,9 @@ def reduce_prod(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keepdims: If true, retains reduced dimensions with length 1. + keep_dims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. - keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @@ -1511,33 +1465,25 @@ def reduce_prod(input_tensor, Equivalent to np.prod @end_compatibility """ - keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, - "keep_dims", keep_dims) - - if keepdims is None: - keepdims = False - return _may_reduce_to_scalar(keepdims, axis, reduction_indices, + return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, gen_math_ops._prod( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keepdims, + keep_dims, name=name)) -@deprecation.deprecated_args( - None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_min(input_tensor, axis=None, - keepdims=None, + keep_dims=False, name=None, - reduction_indices=None, - keep_dims=None): + reduction_indices=None): """Computes the minimum of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keepdims` is true, the reduced dimensions + Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keep_dims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1548,10 +1494,9 @@ def reduce_min(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keepdims: If true, retains reduced dimensions with length 1. + keep_dims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. - keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @@ -1560,32 +1505,25 @@ def reduce_min(input_tensor, Equivalent to np.min @end_compatibility """ - keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, - "keep_dims", keep_dims) - if keepdims is None: - keepdims = False - return _may_reduce_to_scalar(keepdims, axis, reduction_indices, + return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, gen_math_ops._min( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keepdims, + keep_dims, name=name)) -@deprecation.deprecated_args( - None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_max(input_tensor, axis=None, - keepdims=None, + keep_dims=False, name=None, - reduction_indices=None, - keep_dims=None): + reduction_indices=None): """Computes the maximum of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keepdims` is true, the reduced dimensions + Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keep_dims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1596,10 +1534,9 @@ def reduce_max(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keepdims: If true, retains reduced dimensions with length 1. + keep_dims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. - keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @@ -1608,32 +1545,25 @@ def reduce_max(input_tensor, Equivalent to np.max @end_compatibility """ - keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, - "keep_dims", keep_dims) - if keepdims is None: - keepdims = False - return _may_reduce_to_scalar(keepdims, axis, reduction_indices, + return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, gen_math_ops._max( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keepdims, + keep_dims, name=name)) -@deprecation.deprecated_args( - None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_all(input_tensor, axis=None, - keepdims=None, + keep_dims=False, name=None, - reduction_indices=None, - keep_dims=None): + reduction_indices=None): """Computes the "logical and" of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keepdims` is true, the reduced dimensions + Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keep_dims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1653,10 +1583,9 @@ def reduce_all(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keepdims: If true, retains reduced dimensions with length 1. + keep_dims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. - keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @@ -1665,32 +1594,25 @@ def reduce_all(input_tensor, Equivalent to np.all @end_compatibility """ - keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, - "keep_dims", keep_dims) - if keepdims is None: - keepdims = False - return _may_reduce_to_scalar(keepdims, axis, reduction_indices, + return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, gen_math_ops._all( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keepdims, + keep_dims, name=name)) -@deprecation.deprecated_args( - None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_any(input_tensor, axis=None, - keepdims=None, + keep_dims=False, name=None, - reduction_indices=None, - keep_dims=None): + reduction_indices=None): """Computes the "logical or" of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keepdims` is true, the reduced dimensions + Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keep_dims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1710,10 +1632,9 @@ def reduce_any(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keepdims: If true, retains reduced dimensions with length 1. + keep_dims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. - keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @@ -1722,32 +1643,25 @@ def reduce_any(input_tensor, Equivalent to np.any @end_compatibility """ - keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, - "keep_dims", keep_dims) - if keepdims is None: - keepdims = False - return _may_reduce_to_scalar(keepdims, axis, reduction_indices, + return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, gen_math_ops._any( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keepdims, + keep_dims, name=name)) -@deprecation.deprecated_args( - None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_logsumexp(input_tensor, axis=None, - keepdims=None, + keep_dims=False, name=None, - reduction_indices=None, - keep_dims=None): + reduction_indices=None): """Computes log(sum(exp(elements across dimensions of a tensor))). Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keepdims` is true, the reduced dimensions + Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keep_dims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1764,7 +1678,7 @@ def reduce_logsumexp(input_tensor, tf.reduce_logsumexp(x) # log(6) tf.reduce_logsumexp(x, 0) # [log(2), log(2), log(2)] tf.reduce_logsumexp(x, 1) # [log(3), log(3)] - tf.reduce_logsumexp(x, 1, keepdims=True) # [[log(3)], [log(3)]] + tf.reduce_logsumexp(x, 1, keep_dims=True) # [[log(3)], [log(3)]] tf.reduce_logsumexp(x, [0, 1]) # log(6) ``` @@ -1773,24 +1687,19 @@ def reduce_logsumexp(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keepdims: If true, retains reduced dimensions with length 1. + keep_dims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. - keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. """ - keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, - "keep_dims", keep_dims) - if keepdims is None: - keepdims = False with ops.name_scope(name, "ReduceLogSumExp", [input_tensor]) as name: raw_max = reduce_max( input_tensor, axis=axis, reduction_indices=reduction_indices, - keepdims=True) + keep_dims=True) my_max = array_ops.stop_gradient( array_ops.where( gen_math_ops.is_finite(raw_max), raw_max, @@ -1799,13 +1708,13 @@ def reduce_logsumexp(input_tensor, reduce_sum( gen_math_ops.exp(input_tensor - my_max), axis, - keepdims=True, + keep_dims=True, reduction_indices=reduction_indices)) + my_max - if not keepdims: + if not keep_dims: if isinstance(axis, int): axis = [axis] result = array_ops.squeeze(result, axis) - return _may_reduce_to_scalar(keepdims, axis, reduction_indices, result) + return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, result) def trace(x, name=None): @@ -2307,10 +2216,9 @@ def bincount(arr, maxlength = ops.convert_to_tensor( maxlength, name="maxlength", dtype=dtypes.int32) output_size = gen_math_ops.minimum(maxlength, output_size) - if weights is not None: - weights = ops.convert_to_tensor(weights, name="weights") - return gen_math_ops.unsorted_segment_sum(weights, arr, output_size) - weights = constant_op.constant([], dtype) + weights = ( + ops.convert_to_tensor(weights, name="weights") + if weights is not None else constant_op.constant([], dtype)) return gen_math_ops.bincount(arr, output_size, weights) @@ -2473,7 +2381,7 @@ def reduced_shape(input_shape, axes): input_shape: 1-D Tensor, the shape of the Tensor being reduced. axes: 1-D Tensor, the reduction axes. Returns: - A 1-D Tensor, the output shape as if keepdims were set to True. + A 1-D Tensor, the output shape as if keep_dims were set to True. """ # Example: # cast needed for SparseTensor reductions diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py index d30f6b92ad..717ee1254f 100644 --- a/tensorflow/python/ops/metrics_impl.py +++ b/tensorflow/python/ops/metrics_impl.py @@ -794,7 +794,7 @@ def mean_cosine_distance(labels, predictions, dim, weights=None, radial_diffs = math_ops.multiply(predictions, labels) radial_diffs = math_ops.reduce_sum(radial_diffs, reduction_indices=[dim,], - keepdims=True) + keep_dims=True) mean_distance, update_op = mean(radial_diffs, weights, None, None, diff --git a/tensorflow/python/ops/nn_fused_batchnorm_test.py b/tensorflow/python/ops/nn_fused_batchnorm_test.py index e72d34d1f7..1fcd0384da 100644 --- a/tensorflow/python/ops/nn_fused_batchnorm_test.py +++ b/tensorflow/python/ops/nn_fused_batchnorm_test.py @@ -335,22 +335,22 @@ class BatchNormalizationTest(test.TestCase): def testInference(self): x_shape = [1, 1, 6, 1] - for dtype in [np.float16, np.float32]: - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True): + for dtype in [np.float16, np.float32]: self._test_inference( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC') self._test_inference( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW') - self._test_inference( - x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC') + self._test_inference( + x_shape, np.float32, [1], np.float32, use_gpu=False, data_format='NHWC') x_shape = [1, 1, 6, 2] if test.is_gpu_available(cuda_only=True): for dtype in [np.float16, np.float32]: self._test_inference( x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC') - self._test_inference( - x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC') + self._test_inference( + x_shape, np.float32, [2], np.float32, use_gpu=False, data_format='NHWC') x_shape = [1, 2, 1, 6] if test.is_gpu_available(cuda_only=True): @@ -359,33 +359,33 @@ class BatchNormalizationTest(test.TestCase): x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW') x_shape = [27, 131, 127, 6] - for dtype in [np.float16, np.float32]: - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True): + for dtype in [np.float16, np.float32]: self._test_inference( x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW') self._test_inference( x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC') - self._test_inference( - x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC') + self._test_inference( + x_shape, np.float32, [6], np.float32, use_gpu=False, data_format='NHWC') def testTraining(self): x_shape = [1, 1, 6, 1] - for dtype in [np.float16, np.float32]: - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True): + for dtype in [np.float16, np.float32]: self._test_training( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC') self._test_training( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW') - self._test_training( - x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC') + self._test_training( + x_shape, np.float32, [1], np.float32, use_gpu=False, data_format='NHWC') x_shape = [1, 1, 6, 2] - for dtype in [np.float16, np.float32]: - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True): + for dtype in [np.float16, np.float32]: self._test_training( x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC') - self._test_training( - x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC') + self._test_training( + x_shape, np.float32, [2], np.float32, use_gpu=False, data_format='NHWC') x_shape = [1, 2, 1, 6] if test.is_gpu_available(cuda_only=True): @@ -394,20 +394,20 @@ class BatchNormalizationTest(test.TestCase): x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW') x_shape = [27, 131, 127, 6] - for dtype in [np.float16, np.float32]: - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True): + for dtype in [np.float16, np.float32]: self._test_training( x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW') self._test_training( x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC') - self._test_training( - x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC') + self._test_training( + x_shape, np.float32, [6], np.float32, use_gpu=False, data_format='NHWC') def testBatchNormGrad(self): for is_training in [True, False]: x_shape = [1, 1, 6, 1] - for dtype in [np.float16, np.float32]: - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True): + for dtype in [np.float16, np.float32]: self._test_gradient( x_shape, dtype, [1], @@ -422,17 +422,17 @@ class BatchNormalizationTest(test.TestCase): use_gpu=True, data_format='NCHW', is_training=is_training) - self._test_gradient( - x_shape, - dtype, [1], - np.float32, - use_gpu=False, - data_format='NHWC', - is_training=is_training) + self._test_gradient( + x_shape, + np.float32, [1], + np.float32, + use_gpu=False, + data_format='NHWC', + is_training=is_training) x_shape = [1, 1, 6, 2] - for dtype in [np.float16, np.float32]: - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True): + for dtype in [np.float16, np.float32]: self._test_gradient( x_shape, dtype, [2], @@ -440,13 +440,13 @@ class BatchNormalizationTest(test.TestCase): use_gpu=True, data_format='NHWC', is_training=is_training) - self._test_gradient( - x_shape, - dtype, [2], - np.float32, - use_gpu=False, - data_format='NHWC', - is_training=is_training) + self._test_gradient( + x_shape, + np.float32, [2], + np.float32, + use_gpu=False, + data_format='NHWC', + is_training=is_training) x_shape = [1, 2, 1, 6] if test.is_gpu_available(cuda_only=True): @@ -460,8 +460,8 @@ class BatchNormalizationTest(test.TestCase): is_training=is_training) x_shape = [5, 7, 11, 4] - for dtype in [np.float16, np.float32]: - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True): + for dtype in [np.float16, np.float32]: self._test_gradient( x_shape, dtype, [7], @@ -476,13 +476,13 @@ class BatchNormalizationTest(test.TestCase): use_gpu=True, data_format='NHWC', is_training=is_training) - self._test_gradient( - x_shape, - dtype, [4], - np.float32, - use_gpu=False, - data_format='NHWC', - is_training=is_training) + self._test_gradient( + x_shape, + np.float32, [4], + np.float32, + use_gpu=False, + data_format='NHWC', + is_training=is_training) def _testBatchNormGradGrad(self, config): shape = config['shape'] @@ -506,14 +506,15 @@ class BatchNormalizationTest(test.TestCase): data_format='NCHW', is_training=is_training, err_tolerance=err_tolerance) - self._test_grad_grad( - shape, - dtype, [shape[3]], - np.float32, - use_gpu=False, - data_format='NHWC', - is_training=is_training, - err_tolerance=err_tolerance) + if dtype != np.float16: + self._test_grad_grad( + shape, + np.float32, [shape[3]], + np.float32, + use_gpu=False, + data_format='NHWC', + is_training=is_training, + err_tolerance=err_tolerance) def testBatchNormGradGrad(self): configs = [{ @@ -524,10 +525,6 @@ class BatchNormalizationTest(test.TestCase): 'shape': [2, 3, 2, 2], 'err_tolerance': 1e-3, 'dtype': np.float32, - }, { - 'shape': [2, 3, 4, 5], - 'err_tolerance': 1e-2, - 'dtype': np.float16, }, { 'shape': [2, 3, 2, 2], 'err_tolerance': 2e-3, diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index da037a7983..431ea1186a 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -32,8 +32,6 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variables -from tensorflow.python.util.deprecation import deprecated_args -from tensorflow.python.util.deprecation import deprecated_argument_lookup def log_poisson_loss(targets, log_input, compute_full_loss=False, name=None): @@ -315,20 +313,19 @@ def swish(features): return features * math_ops.sigmoid(features) -@deprecated_args(None, "dim is deprecated, use axis instead", "dim") -def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None): - """Normalizes along dimension `axis` using an L2 norm. +def l2_normalize(x, dim, epsilon=1e-12, name=None): + """Normalizes along dimension `dim` using an L2 norm. - For a 1-D tensor with `axis = 0`, computes + For a 1-D tensor with `dim = 0`, computes output = x / sqrt(max(sum(x**2), epsilon)) For `x` with more dimensions, independently normalizes each 1-D slice along - dimension `axis`. + dimension `dim`. Args: x: A `Tensor`. - axis: Dimension along which to normalize. A scalar or a vector of + dim: Dimension along which to normalize. A scalar or a vector of integers. epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the divisor if `norm < sqrt(epsilon)`. @@ -338,9 +335,8 @@ def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None): A `Tensor` with the same shape as `x`. """ with ops.name_scope(name, "l2_normalize", [x]) as name: - axis = deprecated_argument_lookup("axis", axis, "dim", dim) x = ops.convert_to_tensor(x, name="x") - square_sum = math_ops.reduce_sum(math_ops.square(x), axis, keep_dims=True) + square_sum = math_ops.reduce_sum(math_ops.square(x), dim, keep_dims=True) x_inv_norm = math_ops.rsqrt(math_ops.maximum(square_sum, epsilon)) return math_ops.multiply(x, x_inv_norm, name=name) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 61fa462988..bdaac65904 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -23,7 +23,6 @@ import numbers import numpy as np from tensorflow.python.eager import context -from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import graph_util from tensorflow.python.framework import ops @@ -38,8 +37,6 @@ from tensorflow.python.ops import random_ops # pylint: disable=wildcard-import from tensorflow.python.ops.gen_nn_ops import * # pylint: enable=wildcard-import -from tensorflow.python.util.deprecation import deprecated_args -from tensorflow.python.util.deprecation import deprecated_argument_lookup from tensorflow.python.util import deprecation @@ -1648,18 +1645,17 @@ def _softmax(logits, compute_op, dim=-1, name=None): return output -@deprecated_args(None, "dim is deprecated, use axis instead", "dim") -def softmax(logits, axis=None, name=None, dim=None): +def softmax(logits, dim=-1, name=None): """Computes softmax activations. This function performs the equivalent of - softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis) + softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), dim) Args: logits: A non-empty `Tensor`. Must be one of the following types: `half`, `float32`, `float64`. - axis: The dimension softmax would be performed on. The default is -1 which + dim: The dimension softmax would be performed on. The default is -1 which indicates the last dimension. name: A name for the operation (optional). @@ -1667,27 +1663,23 @@ def softmax(logits, axis=None, name=None, dim=None): A `Tensor`. Has the same type and shape as `logits`. Raises: - InvalidArgumentError: if `logits` is empty or `axis` is beyond the last + InvalidArgumentError: if `logits` is empty or `dim` is beyond the last dimension of `logits`. """ - axis = deprecated_argument_lookup("axis", axis, "dim", dim) - if axis is None: - axis = -1 - return _softmax(logits, gen_nn_ops._softmax, axis, name) + return _softmax(logits, gen_nn_ops._softmax, dim, name) -@deprecated_args(None, "dim is deprecated, use axis instead", "dim") -def log_softmax(logits, axis=None, name=None, dim=None): +def log_softmax(logits, dim=-1, name=None): """Computes log softmax activations. For each batch `i` and class `j` we have - logsoftmax = logits - log(reduce_sum(exp(logits), axis)) + logsoftmax = logits - log(reduce_sum(exp(logits), dim)) Args: logits: A non-empty `Tensor`. Must be one of the following types: `half`, `float32`, `float64`. - axis: The dimension softmax would be performed on. The default is -1 which + dim: The dimension softmax would be performed on. The default is -1 which indicates the last dimension. name: A name for the operation (optional). @@ -1695,13 +1687,10 @@ def log_softmax(logits, axis=None, name=None, dim=None): A `Tensor`. Has the same type as `logits`. Same shape as `logits`. Raises: - InvalidArgumentError: if `logits` is empty or `axis` is beyond the last + InvalidArgumentError: if `logits` is empty or `dim` is beyond the last dimension of `logits`. """ - axis = deprecated_argument_lookup("axis", axis, "dim", dim) - if axis is None: - axis = -1 - return _softmax(logits, gen_nn_ops._log_softmax, axis, name) + return _softmax(logits, gen_nn_ops._log_softmax, dim, name) def _ensure_xent_args(name, sentinel, labels, logits): @@ -2316,100 +2305,6 @@ def conv1d(value, filters, stride, padding, return array_ops.squeeze(result, [spatial_start_dim]) -def conv1d_transpose(value, - filter, - output_shape, - stride, - padding="SAME", - data_format="NWC", - name=None): - """The transpose of `conv1d`. - - This operation is sometimes called "deconvolution" after [Deconvolutional - Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is - actually the transpose (gradient) of `conv1d` rather than an actual - deconvolution. - - Args: - value: A 3-D `Tensor` of type `float` and shape - `[batch, in_width, in_channels]` for `NWC` data format or - `[batch, in_channels, in_width]` for `NCW` data format. - filter: A 3-D `Tensor` with the same type as `value` and shape - `[filter_width, output_channels, in_channels]`. `filter`'s - `in_channels` dimension must match that of `value`. - output_shape: A 1-D `Tensor` representing the output shape of the - deconvolution op. - stride: An `integer`. The number of entries by which - the filter is moved right at each step. - padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. - See the @{tf.nn.convolution$comment here} - data_format: A string. 'NHWC' and 'NCHW' are supported. - name: Optional name for the returned tensor. - - Returns: - A `Tensor` with the same type as `value`. - - Raises: - ValueError: If input/output depth does not match `filter`'s shape, or if - padding is other than `'VALID'` or `'SAME'`. - """ - with ops.name_scope(name, "conv1d_transpose", - [value, filter, output_shape]) as name: - output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape") - if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(3)): - raise ValueError("output_shape must have shape (3,), got {}" - .format(output_shape_.get_shape())) - - # The format could be either NWC or NCW, map to NHWC or NCHW - if data_format is None or data_format == "NWC": - data_format_2d = "NHWC" - axis = 2 - elif data_format == "NCW": - data_format_2d = "NCHW" - axis = 1 - else: - raise ValueError("data_format must be \"NWC\" or \"NCW\".") - - if not value.get_shape()[axis].is_compatible_with(filter.get_shape()[2]): - raise ValueError("input channels does not match filter's input channels, " - "{} != {}".format(value.get_shape()[axis], - filter.get_shape()[2])) - - if isinstance(output_shape, (list, np.ndarray)): - # output_shape's shape should be == [3] if reached this point. - if not filter.get_shape()[1].is_compatible_with(output_shape[axis]): - raise ValueError( - "output_shape does not match filter's output channels, " - "{} != {}".format(output_shape[axis], filter.get_shape()[1])) - - if padding != "VALID" and padding != "SAME": - raise ValueError("padding must be either VALID or SAME:" - " {}".format(padding)) - - # Reshape the input tensor to [batch, 1, in_width, in_channels] - if data_format_2d == "NHWC": - output_shape_ = array_ops.concat([output_shape_[:1], [1], - output_shape_[1:]], axis=0) - spatial_start_dim = 1 - strides = [1, 1, stride, 1] - else: - output_shape_ = array_ops.concat([output_shape_[:2], [1], - output_shape_[2:]], axis=0) - spatial_start_dim = 2 - strides = [1, 1, 1, stride] - value = array_ops.expand_dims(value, spatial_start_dim) - filter = array_ops.expand_dims(filter, 0) - - result = gen_nn_ops.conv2d_backprop_input(input_sizes=output_shape_, - filter=filter, - out_backprop=value, - strides=strides, - padding=padding, - data_format=data_format_2d, - name=name) - return array_ops.squeeze(result, [spatial_start_dim]) - - @ops.RegisterStatistics("Dilation2D", "flops") def _calc_dilation2d_flops(graph, node): """Calculates the compute resources needed for Dilation2D.""" diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index a1e4305de1..e9b1c67d16 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -1063,13 +1063,13 @@ class Variable(object): class PartitionedVariable(object): """A container for partitioned `Variable` objects. - @compatibility(eager) `tf.PartitionedVariable` is not compatible with + @compatiblity(eager) `tf.PartitionedVariable` is not compatible with eager execution. Use `tfe.Variable` instead which is compatable with both eager execution and graph construction. See [the TensorFlow Eager Execution guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/g3doc/guide.md#variables-and-optimizers) for details on how variables work in eager execution. - @end_compatibility + @end_compatiblity """ class PartitionedVariableIterator(object): diff --git a/tensorflow/python/tools/import_pb_to_tensorboard.py b/tensorflow/python/tools/import_pb_to_tensorboard.py old mode 100755 new mode 100644 diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index d78362d4fb..99bed86a17 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -232,6 +232,7 @@ CUDNN_DNN_ROUTINE_EACH_R3(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) __macro(cudnnRNNBackwardData) \ __macro(cudnnRNNBackwardWeights) \ __macro(cudnnSetRNNDescriptor) \ + __macro(cudnnSetRNNDescriptor_v6) \ __macro(cudnnGetFilterNdDescriptor) // clang-format on @@ -244,8 +245,7 @@ CUDNN_DNN_ROUTINE_EACH_R5(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) // clang-format off #if CUDNN_VERSION >= 6000 #define CUDNN_DNN_ROUTINE_EACH_R6(__macro) \ - __macro(cudnnConvolutionBiasActivationForward) \ - __macro(cudnnSetRNNDescriptor_v6) + __macro(cudnnConvolutionBiasActivationForward) // clang-format on CUDNN_DNN_ROUTINE_EACH_R6(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) @@ -665,6 +665,7 @@ class ScopedPoolingDescriptor { LOG(FATAL) << "could not create cudnn pooling descriptor: " << ToString(status); } + const std::vector strides64 = pooling_descriptor.strides(); const std::vector padding64 = pooling_descriptor.padding(); const std::vector shape64 = pooling_descriptor.window(); @@ -679,14 +680,14 @@ class ScopedPoolingDescriptor { &CheckedNarrowing); std::transform(shape64.cbegin(), shape64.cend(), shape.begin(), &CheckedNarrowing); - bool propagate_nans = pooling_descriptor.propagate_nans(); status = wrap::cudnnSetPoolingNdDescriptor( parent_, handle_, (pooling_descriptor.mode() == dnn::PoolingMode::kMaximum ? CUDNN_POOLING_MAX : CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING), #if CUDNN_VERSION >= 5000 - propagate_nans ? CUDNN_PROPAGATE_NAN : CUDNN_NOT_PROPAGATE_NAN, + // Always propagate nans. + CUDNN_PROPAGATE_NAN, #endif nd, shape.data(), padding.data(), strides.data()); if (status != CUDNN_STATUS_SUCCESS) { diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc index 43d2d3cd48..07fe8a85f4 100644 --- a/tensorflow/stream_executor/dnn.cc +++ b/tensorflow/stream_executor/dnn.cc @@ -482,7 +482,6 @@ void PoolingDescriptor::CloneFrom(const PoolingDescriptor& other) { window_ = other.window_; padding_ = other.padding_; strides_ = other.strides_; - propagate_nans_ = other.propagate_nans_; } string PoolingDescriptor::ToString() const { @@ -496,12 +495,9 @@ string PoolingDescriptor::ToString() const { port::Appendf(&padding, "%lld", padding_[i]); } - const char* propagate_string = propagate_nans_ ? "Yes" : "No"; - - return port::Printf( - "{mode: %s window: %s strides: %s padding: %s propagate NaNs: %s}", - mode_string, window.c_str(), strides.c_str(), padding.c_str(), - propagate_string); + return port::Printf("{mode: %s window: %s strides: %s padding: %s}", + mode_string, window.c_str(), strides.c_str(), + padding.c_str()); } string PoolingDescriptor::ToShortString() const { @@ -512,8 +508,7 @@ string PoolingDescriptor::ToShortString() const { port::Appendf(&padding, "_p%d:%lld", i, padding_[i]); } return port::StrCat(mode_ == dnn::PoolingMode::kMaximum ? "max" : "avg", - window, strides, padding, - propagate_nans_ ? "propagate_nans" : "ignore_nans"); + window, strides, padding); } // -- NormalizeDescriptor diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index 0d2cd4a9f2..49235167ab 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -661,10 +661,6 @@ class PoolingDescriptor { SetDim(&strides_, dim, value); return *this; } - PoolingDescriptor& set_propagate_nans(bool value) { - propagate_nans_ = value; - return *this; - } int ndims() const { return ndims_; } void CloneFrom(const PoolingDescriptor& other); @@ -685,12 +681,10 @@ class PoolingDescriptor { std::vector window() const { return window_; } std::vector padding() const { return padding_; } std::vector strides() const { return strides_; } - bool propagate_nans() const { return propagate_nans_; } private: PoolingMode mode_; int ndims_; - bool propagate_nans_; // Stored as: ..., y, x. std::vector window_; diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt index 62e634afb8..9fd38a29b7 100644 --- a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt @@ -94,7 +94,7 @@ tf_module { } member_method { name: "norm" - argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keepdims\', \'name\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keep_dims\', \'name\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'False\', \'None\'], " } member_method { name: "qr" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt index ebd9c079b5..24c0448dea 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt @@ -170,7 +170,7 @@ tf_module { } member_method { name: "l2_normalize" - argspec: "args=[\'x\', \'axis\', \'epsilon\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'1e-12\', \'None\', \'None\'], " + argspec: "args=[\'x\', \'dim\', \'epsilon\', \'name\'], varargs=None, keywords=None, defaults=[\'1e-12\', \'None\'], " } member_method { name: "leaky_relu" @@ -190,7 +190,7 @@ tf_module { } member_method { name: "log_softmax" - argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'logits\', \'dim\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], " } member_method { name: "log_uniform_candidate_sampler" @@ -282,7 +282,7 @@ tf_module { } member_method { name: "softmax" - argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'logits\', \'dim\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], " } member_method { name: "softmax_cross_entropy_with_logits" diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index 0edd4153d7..bf7bc6a7c1 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -750,7 +750,7 @@ tf_module { } member_method { name: "boolean_mask" - argspec: "args=[\'tensor\', \'mask\', \'name\', \'axis\'], varargs=None, keywords=None, defaults=[\'boolean_mask\', \'None\'], " + argspec: "args=[\'tensor\', \'mask\', \'name\'], varargs=None, keywords=None, defaults=[\'boolean_mask\'], " } member_method { name: "broadcast_dynamic_shape" @@ -858,7 +858,7 @@ tf_module { } member_method { name: "count_nonzero" - argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'dtype\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"\", \'None\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'dtype\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \"\", \'None\', \'None\'], " } member_method { name: "count_up_to" @@ -1414,7 +1414,7 @@ tf_module { } member_method { name: "norm" - argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keepdims\', \'name\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keep_dims\', \'name\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'False\', \'None\'], " } member_method { name: "not_equal" @@ -1546,11 +1546,11 @@ tf_module { } member_method { name: "reduce_all" - argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " } member_method { name: "reduce_any" - argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " } member_method { name: "reduce_join" @@ -1558,27 +1558,27 @@ tf_module { } member_method { name: "reduce_logsumexp" - argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " } member_method { name: "reduce_max" - argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " } member_method { name: "reduce_mean" - argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " } member_method { name: "reduce_min" - argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " } member_method { name: "reduce_prod" - argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " } member_method { name: "reduce_sum" - argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " } member_method { name: "register_tensor_conversion_function" diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh index c27f4953e3..5f791d7bc7 100755 --- a/tensorflow/tools/ci_build/ci_parameterized_build.sh +++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh @@ -165,7 +165,7 @@ else BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:embedding_lookup_test" BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:embedding_lookup_sparse_test" BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:fully_connected_test" - # BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/testing:generated_examples_zip_test" + BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/testing:generated_examples_zip_test" BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:hashtable_lookup_test" BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:local_response_norm_test" BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:lsh_projection_test" diff --git a/tensorflow/tools/ci_build/install/install_golang.sh b/tensorflow/tools/ci_build/install/install_golang.sh index e1edd62cc5..55c1674495 100755 --- a/tensorflow/tools/ci_build/install/install_golang.sh +++ b/tensorflow/tools/ci_build/install/install_golang.sh @@ -16,7 +16,7 @@ set -ex -GOLANG_URL="https://storage.googleapis.com/golang/go1.9.2.linux-amd64.tar.gz" +GOLANG_URL="https://storage.googleapis.com/golang/go1.9.1.linux-amd64.tar.gz" sudo mkdir -p /usr/local wget -q -O - "${GOLANG_URL}" | sudo tar -C /usr/local -xz diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh index e5d8303c6e..dcda8228bc 100755 --- a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh +++ b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh @@ -48,6 +48,6 @@ ${DOCKER_BINARY} run \ -e "TF_NEED_GCP=0" \ -e "TF_NEED_HDFS=0" \ -e "TF_NEED_CUDA=${TF_NEED_CUDA}" \ - -e "TF_NEED_OPENCL_SYCL=0" \ + -e "TF_NEED_OPENCL=0" \ "${DOCKER_IMAGE}" \ "/workspace/tensorflow/tools/ci_build/linux/libtensorflow.sh" diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh index e1b56b9a25..d90a1b905d 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh @@ -27,7 +27,7 @@ export PYTHON_BIN_PATH="/usr/bin/python" export TF_NEED_GCP=0 export TF_NEED_HDFS=0 export TF_NEED_CUDA=0 -export TF_NEED_OPENCL_SYCL=0 +export TF_NEED_OPENCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh index 5a901af3e5..79973647c1 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh @@ -28,7 +28,7 @@ export LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib:${L export PYTHON_BIN_PATH="/usr/bin/python" export TF_NEED_GCP=0 export TF_NEED_HDFS=0 -export TF_NEED_OPENCL_SYCL=0 +export TF_NEED_OPENCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh index 88116d9f24..5244898c40 100755 --- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh +++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh @@ -75,23 +75,17 @@ if [[ $1 == "PI_ONE" ]]; then PI_COPTS="--copt=-march=armv6 --copt=-mfpu=vfp --copt=-DUSE_GEMM_FOR_CONV --copt=-DUSE_OPENBLAS --copt=-isystem --copt=${OPENBLAS_INSTALL_PATH}/include/ - --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR --linkopt=-L${OPENBLAS_INSTALL_PATH}/lib/ --linkopt=-l:libopenblas.a" echo "Building for the Pi One/Zero, with no NEON support" else PI_COPTS='--copt=-march=armv7-a --copt=-mfpu=neon-vfpv4 - --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8' echo "Building for the Pi Two/Three, with NEON acceleration" fi -# We need to pass down the environment variable with a possible alternate Python -# include path for Python 3.x builds to work. -export CROSSTOOL_PYTHON_INCLUDE_PATH - cd ${WORKSPACE_PATH} bazel build -c opt ${PI_COPTS} \ --config=monolithic \ diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh index 44b6d52952..924ab1a4ae 100644 --- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh +++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh @@ -117,7 +117,7 @@ function run_configure_for_cpu_build { export TF_NEED_VERBS=0 export TF_NEED_GCP=0 export TF_NEED_HDFS=0 - export TF_NEED_OPENCL_SYCL=0 + export TF_NEED_OPENCL=0 echo "" | ./configure } @@ -141,7 +141,7 @@ function run_configure_for_gpu_build { export TF_NEED_MKL=0 export TF_NEED_GCP=0 export TF_NEED_HDFS=0 - export TF_NEED_OPENCL_SYCL=0 + export TF_NEED_OPENCL=0 # TODO(pcloudy): Remove this after TensorFlow uses its own CRSOOTOOL # for GPU build on Windows diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 index 9bcc3925a8..64ebc4607a 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 @@ -101,11 +101,12 @@ RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/lib --jobs=${TF_AVAILABLE_CPUS} \ tensorflow/tools/pip_package:build_pip_package && \ mkdir /pip_pkg && \ - bazel-bin/tensorflow/tools/pip_package/build_pip_package /pip_pkg && \ - pip --no-cache-dir install --upgrade /pip_pkg/tensorflow-*.whl && \ + bazel-bin/tensorflow/tools/pip_package/build_pip_package /pip_pkg + +# Clean up pip wheel and Bazel cache when done. +RUN pip --no-cache-dir install --upgrade /pip_pkg/tensorflow-*.whl && \ rm -rf /pip_pkg && \ rm -rf /root/.cache -# Clean up pip wheel and Bazel cache when done. WORKDIR /root diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu index e212d10290..0571dd7391 100644 --- a/tensorflow/tools/docker/Dockerfile.gpu +++ b/tensorflow/tools/docker/Dockerfile.gpu @@ -1,4 +1,4 @@ -FROM nvidia/cuda:8.0-cudnn6-runtime-ubuntu16.04 +FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04 LABEL maintainer="Craig Citro " diff --git a/tensorflow/tools/docker/README.md b/tensorflow/tools/docker/README.md index e35c58ff80..2e5a0038ed 100644 --- a/tensorflow/tools/docker/README.md +++ b/tensorflow/tools/docker/README.md @@ -60,20 +60,6 @@ Building TensorFlow Docker containers should be done through the script. The raw Dockerfiles should not be used directly as they contain strings to be replaced by the script during the build. -Attempting to run [parameterized_docker_build.sh](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/docker/parameterized_docker_build.sh) -from a binary docker image such as for example `tensorflow/tensorflow:latest` will -not work. One needs to execute the script from a developer docker image since by -contrast with a binary docker image it contains not only the compiled solution but -also the tensorflow source code. Please select the appropriate developer docker -image of tensorflow at `tensorflow/tensorflow:[.](https://hub.docker.com/r/tensorflow/tensorflow/tags/)`. - -The smallest command line to generate a docker image will then be: -```docker run -it tensorflow/tensorflow:"right_tag"``` - -If you would like to start a jupyter notebook on your docker container, make sure -to map the port 8888 of your docker container by adding -p 8888:8888 to the above -command. - To use the script, specify the container type (`CPU` vs. `GPU`), the desired Python version (`PYTHON2` vs. `PYTHON3`) and whether the developer Docker image is to be built (`NO` vs. `YES`). In addition, you need to specify the central diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD index 9216008600..1bf7113c9e 100644 --- a/tensorflow/tools/graph_transforms/BUILD +++ b/tensorflow/tools/graph_transforms/BUILD @@ -131,8 +131,6 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core:tensorflow", - "//tensorflow/contrib/rnn:gru_ops_op_lib", - "//tensorflow/contrib/rnn:lstm_ops_op_lib", ] + if_not_windows([ "//tensorflow/core/kernels:quantized_ops", "//tensorflow/core/kernels:remote_fused_graph_rewriter_transform", diff --git a/tensorflow/tools/graph_transforms/quantize_nodes.cc b/tensorflow/tools/graph_transforms/quantize_nodes.cc index 97e8f77616..2b85e7e83c 100644 --- a/tensorflow/tools/graph_transforms/quantize_nodes.cc +++ b/tensorflow/tools/graph_transforms/quantize_nodes.cc @@ -759,7 +759,6 @@ Status QuantizeNodes(const GraphDef& input_graph_def, NodeDef reshape_dims; reshape_dims.set_op("Const"); reshape_dims.set_name(unique_input_name + "/reshape_dims"); - AddNodeInput("^" + input_name, &reshape_dims); SetNodeAttr("dtype", DT_INT32, &reshape_dims); Tensor reshape_dims_tensor(DT_INT32, {1}); reshape_dims_tensor.flat()(0) = -1; @@ -769,7 +768,6 @@ Status QuantizeNodes(const GraphDef& input_graph_def, NodeDef reduction_dims; reduction_dims.set_op("Const"); reduction_dims.set_name(unique_input_name + "/reduction_dims"); - AddNodeInput("^" + input_name, &reduction_dims); SetNodeAttr("dtype", DT_INT32, &reduction_dims); Tensor reduction_dims_tensor(DT_INT32, {1}); reduction_dims_tensor.flat()(0) = 0; diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index a493c6f2aa..60282f6aa3 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.4.0' +_VERSION = '1.4.0-rc1' REQUIRED_PACKAGES = [ 'absl-py', diff --git a/third_party/aws.BUILD b/third_party/aws.BUILD index bc9e37ffb3..bc6a2fd8cc 100644 --- a/third_party/aws.BUILD +++ b/third_party/aws.BUILD @@ -21,9 +21,6 @@ cc_library( "@%ws%//tensorflow:linux_ppc64le": glob([ "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp", ]), - "@%ws%//tensorflow:raspberry_pi_armeabi": glob([ - "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp", - ]), "//conditions:default": [], }) + glob([ "aws-cpp-sdk-core/include/**/*.h", diff --git a/third_party/curl.BUILD b/third_party/curl.BUILD index 805a30d262..882967df1c 100644 --- a/third_party/curl.BUILD +++ b/third_party/curl.BUILD @@ -477,6 +477,7 @@ genrule( "# define HAVE_RAND_EGD 1", "# define HAVE_RAND_STATUS 1", "# define HAVE_SSL_GET_SHUTDOWN 1", + "# define HAVE_STROPTS_H 1", "# define HAVE_TERMIOS_H 1", "# define OS \"x86_64-pc-linux-gnu\"", "# define RANDOM_FILE \"/dev/urandom\"", diff --git a/third_party/sycl/crosstool/CROSSTOOL.tpl b/third_party/sycl/crosstool/CROSSTOOL.tpl index f8e50efcc6..32884d71e7 100755 --- a/third_party/sycl/crosstool/CROSSTOOL.tpl +++ b/third_party/sycl/crosstool/CROSSTOOL.tpl @@ -35,10 +35,10 @@ toolchain { tool_path { name: "compat-ld" path: "/usr/bin/ld" } tool_path { name: "cpp" path: "/usr/bin/cpp" } tool_path { name: "dwp" path: "/usr/bin/dwp" } - tool_path { name: "gcc" path: "%{sycl_impl}" } + tool_path { name: "gcc" path: "computecpp" } # Use "-std=c++11" for nvcc. For consistency, force both the host compiler # and the device compiler to use "-std=c++11". - cxx_flag: "%{c++_std}" + cxx_flag: "-std=c++11" linker_flag: "-Wl,-no-as-needed" linker_flag: "-lstdc++" linker_flag: "-B/usr/bin/" @@ -53,7 +53,7 @@ toolchain { cxx_builtin_include_directory: "/usr/local/include" cxx_builtin_include_directory: "/usr/include" - cxx_builtin_include_directory: "%{sycl_include_dir}" + cxx_builtin_include_directory: "%{computecpp_toolkit_path}" cxx_builtin_include_directory: "%{python_lib_path}" tool_path { name: "gcov" path: "/usr/bin/gcov" } @@ -214,4 +214,4 @@ toolchain { compiler_flag: "-O2" compiler_flag: "-DNDEBUG" } -} \ No newline at end of file +} diff --git a/third_party/sycl/crosstool/trisycl.tpl b/third_party/sycl/crosstool/trisycl.tpl deleted file mode 100644 index b470772fbf..0000000000 --- a/third_party/sycl/crosstool/trisycl.tpl +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/env python - -import os -import sys -import tempfile -from subprocess import call - -CPU_CXX_COMPILER = ('%{host_cxx_compiler}') -CPU_C_COMPILER = ('%{host_c_compiler}') - -CURRENT_DIR = os.path.dirname(sys.argv[0]) -TRISYCL_INCLUDE_DIR = CURRENT_DIR + '/../sycl/include' - -def main(): - compiler_flags = [] - - remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable', '-Wignored-attributes', '-fno-exceptions') - # remove -fsamotoze-coverage from string with g++ - if 'g++' in CPU_CXX_COMPILER: - remove_flags += ('-fsanitize-coverage',) - compiler_flags += ['-fopenmp'] - else: - compiler_flags += ['-fopenmp=libomp'] - - compiler_flags += [flag for flag in sys.argv[1:] if not flag.startswith(remove_flags)] - - - output_file_index = compiler_flags.index('-o') + 1 - output_file_name = compiler_flags[output_file_index] - - if(output_file_index == 1): - # we are linking - return call([CPU_CXX_COMPILER] + compiler_flags + - ['-Wl,--no-undefined']) - - # find what we compile - compiling_cpp = 0 - if('-c' in compiler_flags): - compiled_file_index = compiler_flags.index('-c') + 1 - compiled_file_name = compiler_flags[compiled_file_index] - if(compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', - '.C', '.cxx'))): - compiling_cpp = 1; - - debug_flags = ['-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL', '-lpthread', '-lboost_log', '-g', '-rdynamic'] - - opt_flags = ['-DNDEBUG', '-DBOOST_DISABLE_ASSERTS', '-O3'] - - compiler_flags = compiler_flags + ['-DEIGEN_USE_SYCL=1', - '-DEIGEN_HAS_C99_MATH', - '-DEIGEN_MAX_ALIGN_BYTES=16', - '-DTENSORFLOW_USE_SYCL'] + opt_flags - - if(compiling_cpp == 1): - # create a blacklist of folders that will be skipped when compiling - # with triSYCL - skip_extensions = [".cu.cc"] - skip_folders = ["tensorflow/compiler", "tensorflow/docs_src", "tensorflow/tensorboard", "third_party", "external", "hexagon"] - skip_folders = [(folder + '/') for folder in skip_folders] - # if compiling external project skip triSYCL - if any(compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any(_folder in output_file_name for _folder in skip_folders): - return call([CPU_CXX_COMPILER] + compiler_flags) - - host_compiler_flags = ['-xc++', '-Wno-unused-variable', - '-I', TRISYCL_INCLUDE_DIR] + compiler_flags - x = call([CPU_CXX_COMPILER] + host_compiler_flags) - return x - else: - # compile for C - return call([CPU_C_COMPILER] + compiler_flags) - -if __name__ == '__main__': - sys.exit(main()) diff --git a/third_party/sycl/sycl/BUILD.tpl b/third_party/sycl/sycl/BUILD.tpl index b6ceaadda7..6cad190630 100755 --- a/third_party/sycl/sycl/BUILD.tpl +++ b/third_party/sycl/sycl/BUILD.tpl @@ -10,27 +10,16 @@ package(default_visibility = ["//visibility:public"]) exports_files(["LICENSE.text"]) config_setting( - name = "using_sycl_ccpp", - define_values = { - "using_sycl": "true", - "using_trisycl": "false", + name = "using_sycl", + values = { + "define": "using_sycl=true", }, ) -config_setting( - name = "using_sycl_trisycl", - define_values = { - "using_sycl": "true", - "using_trisycl": "false", - }, -) - - cc_library( name = "sycl_headers", hdrs = glob([ "**/*.h", - "**/*.hpp", ]), includes = [".", "include"], ) diff --git a/third_party/sycl/sycl/build_defs.bzl.tpl b/third_party/sycl/sycl/build_defs.bzl.tpl index 33386f8957..09bef0a661 100755 --- a/third_party/sycl/sycl/build_defs.bzl.tpl +++ b/third_party/sycl/sycl/build_defs.bzl.tpl @@ -5,24 +5,9 @@ def if_sycl(if_true, if_false = []): Returns a select statement which evaluates to if_true if we're building with SYCL enabled. Otherwise, the select statement evaluates to if_false. - If we are building with triSYCL instead of ComputeCPP, a list with - the first element of if_true is returned. - """ - return select({ - "@local_config_sycl//sycl:using_sycl_ccpp": if_true, - "@local_config_sycl//sycl:using_sycl_trisycl": if_true[0:1], - "//conditions:default": if_false - }) - -def if_ccpp(if_true, if_false = []): - """Shorthand for select()'ing if we are building with ComputeCPP. - Returns a select statement which evaluates to if_true if we're building - with ComputeCPP enabled. Otherwise, the select statement evaluates - to if_false. """ return select({ - "@local_config_sycl//sycl:using_sycl_ccpp": if_true, - "@local_config_sycl//sycl:using_sycl_trisycl": if_false, + "@local_config_sycl//sycl:using_sycl": if_true, "//conditions:default": if_false }) diff --git a/third_party/sycl/sycl_configure.bzl b/third_party/sycl/sycl_configure.bzl index a0c9e4e43a..7af063178e 100644 --- a/third_party/sycl/sycl_configure.bzl +++ b/third_party/sycl/sycl_configure.bzl @@ -5,26 +5,20 @@ * HOST_CXX_COMPILER: The host C++ compiler * HOST_C_COMPILER: The host C compiler * COMPUTECPP_TOOLKIT_PATH: The path to the ComputeCpp toolkit. - * TRISYCL_INCLUDE_DIR: The path to the include directory of triSYCL. - (if using triSYCL instead of ComputeCPP) * PYTHON_LIB_PATH: The path to the python lib """ _HOST_CXX_COMPILER = "HOST_CXX_COMPILER" _HOST_C_COMPILER= "HOST_C_COMPILER" _COMPUTECPP_TOOLKIT_PATH = "COMPUTECPP_TOOLKIT_PATH" -_TRISYCL_INCLUDE_DIR = "TRISYCL_INCLUDE_DIR" _PYTHON_LIB_PATH = "PYTHON_LIB_PATH" def _enable_sycl(repository_ctx): - if "TF_NEED_OPENCL_SYCL" in repository_ctx.os.environ: - enable_sycl = repository_ctx.os.environ["TF_NEED_OPENCL_SYCL"].strip() + if "TF_NEED_OPENCL" in repository_ctx.os.environ: + enable_sycl = repository_ctx.os.environ["TF_NEED_OPENCL"].strip() return enable_sycl == "1" return False -def _enable_compute_cpp(repository_ctx): - return _COMPUTECPP_TOOLKIT_PATH in repository_ctx.os.environ - def auto_configure_fail(msg): """Output failure message when auto configuration fails.""" red = "\033[0;31m" @@ -65,15 +59,6 @@ def find_computecpp_root(repository_ctx): return sycl_name fail("Cannot find SYCL compiler, please correct your path") -def find_trisycl_include_dir(repository_ctx): - """Find triSYCL include directory. """ - sycl_name = "" - if _TRISYCL_INCLUDE_DIR in repository_ctx.os.environ: - sycl_name = repository_ctx.os.environ[_TRISYCL_INCLUDE_DIR].strip() - if sycl_name.startswith("/"): - return sycl_name - fail( "Cannot find triSYCL include directory, please correct your path") - def find_python_lib(repository_ctx): """Returns python path.""" if _PYTHON_LIB_PATH in repository_ctx.os.environ: @@ -186,53 +171,26 @@ def _sycl_autoconf_imp(repository_ctx): _tpl(repository_ctx, "sycl:platform.bzl") _tpl(repository_ctx, "crosstool:BUILD") _file(repository_ctx, "sycl:LICENSE.text") - - if _enable_compute_cpp(repository_ctx): - _tpl(repository_ctx, "crosstool:computecpp", - { - "%{host_cxx_compiler}" : find_cc(repository_ctx), - "%{host_c_compiler}" : find_c(repository_ctx) - }) - - computecpp_root = find_computecpp_root(repository_ctx); - _check_dir(repository_ctx, computecpp_root) - - _tpl(repository_ctx, "crosstool:CROSSTOOL", - { - "%{sycl_include_dir}" : computecpp_root, - "%{sycl_impl}" : "computecpp", - "%{c++_std}" : "-std=c++11", - "%{python_lib_path}" : find_python_lib(repository_ctx), - }) - - # symlink libraries - _check_lib(repository_ctx, computecpp_root+"/lib", "libComputeCpp.so" ) - _symlink_dir(repository_ctx, computecpp_root + "/lib", "sycl/lib") - _symlink_dir(repository_ctx, computecpp_root + "/include", "sycl/include") - _symlink_dir(repository_ctx, computecpp_root + "/bin", "sycl/bin") - else: - - trisycl_include_dir = find_trisycl_include_dir(repository_ctx); - _check_dir(repository_ctx, trisycl_include_dir) - - _tpl(repository_ctx, "crosstool:trisycl", - { - "%{host_cxx_compiler}" : find_cc(repository_ctx), - "%{host_c_compiler}" : find_c(repository_ctx), - "%{trisycl_include_dir}" : trisycl_include_dir - }) - - - _tpl(repository_ctx, "crosstool:CROSSTOOL", - { - "%{sycl_include_dir}" : trisycl_include_dir, - "%{sycl_impl}" : "trisycl", - "%{c++_std}" : "-std=c++1y", - "%{python_lib_path}" : find_python_lib(repository_ctx), - }) - - _symlink_dir(repository_ctx, trisycl_include_dir, "sycl/include") - + _tpl(repository_ctx, "crosstool:computecpp", + { + "%{host_cxx_compiler}" : find_cc(repository_ctx), + "%{host_c_compiler}" : find_c(repository_ctx), + }) + + computecpp_root = find_computecpp_root(repository_ctx) + _check_dir(repository_ctx, computecpp_root) + + _tpl(repository_ctx, "crosstool:CROSSTOOL", + { + "%{computecpp_toolkit_path}" : computecpp_root, + "%{python_lib_path}" : find_python_lib(repository_ctx), + }) + + # symlink libraries + _check_lib(repository_ctx, computecpp_root+"/lib", "libComputeCpp.so" ) + _symlink_dir(repository_ctx, computecpp_root + "/lib", "sycl/lib") + _symlink_dir(repository_ctx, computecpp_root + "/include", "sycl/include") + _symlink_dir(repository_ctx, computecpp_root + "/bin", "sycl/bin") sycl_configure = repository_rule( implementation = _sycl_autoconf_imp, diff --git a/third_party/zlib.BUILD b/third_party/zlib.BUILD index d164ee719c..8509668891 100644 --- a/third_party/zlib.BUILD +++ b/third_party/zlib.BUILD @@ -49,7 +49,7 @@ cc_library( ":windows_msvc": [], "//conditions:default": [ "-Wno-shift-negative-value", - "-DZ_HAVE_UNISTD_H", + "-Wno-implicit-function-declaration", ], }), includes = ["."], diff --git a/tools/bazel.rc b/tools/bazel.rc index 04c24d7511..2d7201ae57 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -9,16 +9,13 @@ build:win-cuda --define=using_cuda=true --define=using_cuda_nvcc=true build:mkl --define=using_mkl=true build:sycl --crosstool_top=@local_config_sycl//crosstool:toolchain -build:sycl --define=using_sycl=true --define=using_trisycl=false +build:sycl --define=using_sycl=true build:sycl_nodouble --crosstool_top=@local_config_sycl//crosstool:toolchain build:sycl_nodouble --define=using_sycl=true --cxxopt -DTENSORFLOW_SYCL_NO_DOUBLE build:sycl_asan --crosstool_top=@local_config_sycl//crosstool:toolchain -build:sycl_asan --define=using_sycl=true --define=using_trisycl=false --copt -fno-omit-frame-pointer --copt -fsanitize-coverage=3 --copt -DGPR_NO_DIRECT_SYSCALLS --linkopt -fPIC --linkopt -fsanitize=address - -build:sycl_trisycl --crosstool_top=@local_config_sycl//crosstool:toolchain -build:sycl_trisycl --define=using_sycl=true --define=using_trisycl=true +build:sycl_asan --define=using_sycl=true --copt -fno-omit-frame-pointer --copt -fsanitize-coverage=3 --copt -DGPR_NO_DIRECT_SYSCALLS --linkopt -fPIC --linkopt -fsanitize=address build --define=use_fast_cpp_protos=true build --define=allow_oversize_protos=true diff --git a/util/python/BUILD b/util/python/BUILD index f5fa0c6d29..96daf9947a 100644 --- a/util/python/BUILD +++ b/util/python/BUILD @@ -1,4 +1,4 @@ -licenses(["notice"]) # New BSD, Python Software Foundation +licenses(["restricted"]) package(default_visibility = ["//visibility:public"]) -- GitLab From 62c9c2065acce261f1c3ede8f54047b7af684178 Mon Sep 17 00:00:00 2001 From: Jay Young Date: Wed, 22 Nov 2017 16:56:52 +0800 Subject: [PATCH 0748/1801] Add dependenctyy to tensorflow/python/keras/BUILD --- tensorflow/python/keras/BUILD | 1 + 1 file changed, 1 insertion(+) mode change 100644 => 100755 tensorflow/python/keras/BUILD diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD old mode 100644 new mode 100755 index 4db48b45ed..ff5d7defa2 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -147,6 +147,7 @@ py_library( "//tensorflow/python:variables", "//tensorflow/python/estimator", "//tensorflow/python/estimator:model_fn", + "//tensorflow/python/saved_model", "@six_archive//:six", ], ) -- GitLab From 9f05fc47fc13c4f0b8dfb227e9687eb647dc740f Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Wed, 22 Nov 2017 06:46:36 -0800 Subject: [PATCH 0749/1801] Fix object_detection and skip_thoughts links PiperOrigin-RevId: 176650384 --- tensorflow/docs_src/mobile/mobile_intro.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/mobile/mobile_intro.md b/tensorflow/docs_src/mobile/mobile_intro.md index 948563292a..3a002c4da2 100644 --- a/tensorflow/docs_src/mobile/mobile_intro.md +++ b/tensorflow/docs_src/mobile/mobile_intro.md @@ -82,7 +82,7 @@ new object enters or leaves the scene. We have some sample code for this available for Android [on Github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android), and also a [more general object detection -model](https://github.com/tensorflow/models/tree/master/object_detection/README.md) +model](https://github.com/tensorflow/models/tree/master/research/object_detection/README.md) available as well. ### Gesture Recognition @@ -134,7 +134,7 @@ that covers everything from sentiment analysis to topic discovery. You’re like to have your own categories or labels that you want to apply, so the best place to start is with an example like -[Skip-Thoughts](https://github.com/tensorflow/models/tree/master/skip_thoughts/), +[Skip-Thoughts](https://github.com/tensorflow/models/tree/master/research/skip_thoughts/), and then train on your own examples. ### Voice Synthesis -- GitLab From a235f23d5babcffa05b6d190c3e1a8909afb5273 Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Wed, 22 Nov 2017 08:10:09 -0800 Subject: [PATCH 0750/1801] Roll forward new copy insertion pass. Original cl: cl/174423881, rollback cl: cl/174505237. This roll forward includes the following changes from the original to address various issues uncovered with the rollback: (1) A fix for a problem with fusion instruction serialization was broken out and submitted separately (cl/176035108). (2) A dataflow analysis fix was broken out and submitted separately (cl/176035108) (3) Adding RunBenchmarks to our unit test main was broken out. Fix for uncovered segv in while_test_cpu benchmark in pending cl/176068232. (4) Moved a cpu-specific copy-insertion pass into it's own file, and added tests. (5) Renamed gpu/copy_insertion.* to gpu/gpu_copy_insertion.* to match cpu side. PiperOrigin-RevId: 176658339 --- tensorflow/compiler/xla/service/BUILD | 10 +- .../compiler/xla/service/buffer_assignment.cc | 1 - .../xla/service/buffer_assignment_test.cc | 78 +- .../compiler/xla/service/copy_insertion.cc | 1581 +++++++++++------ .../compiler/xla/service/copy_insertion.h | 43 +- .../xla/service/copy_insertion_test.cc | 947 ++++++++-- tensorflow/compiler/xla/service/cpu/BUILD | 34 +- .../compiler/xla/service/cpu/cpu_compiler.cc | 7 +- .../xla/service/cpu/cpu_copy_insertion.cc | 43 + .../xla/service/cpu/cpu_copy_insertion.h | 42 + .../service/cpu/cpu_copy_insertion_test.cc | 139 ++ tensorflow/compiler/xla/service/gpu/BUILD | 16 +- .../xla/service/gpu/copy_insertion.cc | 71 - .../compiler/xla/service/gpu/gpu_compiler.cc | 5 +- .../xla/service/gpu/gpu_copy_insertion.cc | 112 ++ ...{copy_insertion.h => gpu_copy_insertion.h} | 21 +- .../xla/service/gpu/while_transformer_test.cc | 61 +- .../xla/service/hlo_alias_analysis.cc | 12 +- tensorflow/compiler/xla/service/hlo_dce.cc | 8 + tensorflow/compiler/xla/tests/tuple_test.cc | 3 +- 20 files changed, 2370 insertions(+), 864 deletions(-) create mode 100644 tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.cc create mode 100644 tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h create mode 100644 tensorflow/compiler/xla/service/cpu/cpu_copy_insertion_test.cc delete mode 100644 tensorflow/compiler/xla/service/gpu/copy_insertion.cc create mode 100644 tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc rename tensorflow/compiler/xla/service/gpu/{copy_insertion.h => gpu_copy_insertion.h} (56%) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index db265510f2..1023d3e5dc 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1641,10 +1641,14 @@ cc_library( deps = [ ":buffer_liveness", ":hlo", + ":hlo_alias_analysis", + ":hlo_dce", + ":hlo_graph_dumper", + ":hlo_ordering", ":hlo_pass", ":liveness_util", ":logical_buffer", - ":tuple_points_to_analysis", + ":tuple_simplifier", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:types", @@ -1659,15 +1663,17 @@ tf_cc_test( deps = [ ":copy_insertion", ":hlo", + ":hlo_graph_dumper", ":hlo_matchers", - ":tuple_points_to_analysis", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla:test_helpers", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/legacy_flags:debug_options_flags", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:test", ], ) diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index 033034b421..19a9ff04de 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -1265,7 +1265,6 @@ const LogicalBuffer* AddBufferToColocatedSet( // CopyInsertion ensures root points-to set is unambiguous and distinct. const auto& points_to = points_to_analysis.GetPointsToSet(instruction); DCHECK(!points_to.IsAmbiguous()); - DCHECK(points_to.IsDistinct()); colocated_set->push_back(points_to.element(index)[0]); return colocated_set->back(); } diff --git a/tensorflow/compiler/xla/service/buffer_assignment_test.cc b/tensorflow/compiler/xla/service/buffer_assignment_test.cc index 89410f42bd..4d4c5b953e 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment_test.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment_test.cc @@ -1538,8 +1538,6 @@ TEST_F(WhileBufferAssignmentTest, OneForwardBackwardWhileLoopSet) { HloInstruction::CreateConstant(Literal::CreateR0(0.0))); auto output0 = builder.AddInstruction( HloInstruction::CreateBroadcast(data_shape_, zero, {1})); - auto output1 = builder.AddInstruction( - HloInstruction::CreateBroadcast(data_shape_, zero, {1})); auto cond0 = module->AddEmbeddedComputation(BuildWhileConditionComputation("cond")); @@ -1556,10 +1554,8 @@ TEST_F(WhileBufferAssignmentTest, OneForwardBackwardWhileLoopSet) { auto body1 = module->AddEmbeddedComputation(BuildWhileBodyComputation("body")); - auto tuple1 = builder.AddInstruction( - HloInstruction::CreateTuple({input0, weights0, output1})); auto while1 = builder.AddInstruction( - HloInstruction::CreateWhile(loop_state_shape_, cond1, body1, tuple1)); + HloInstruction::CreateWhile(loop_state_shape_, cond1, body1, while0)); module->AddEntryComputation(builder.Build()); RunCopyInsertion(module.get()); @@ -1676,11 +1672,14 @@ TEST_F(WhileBufferAssignmentTest, WhileLoopsInterferingResultRange) { auto while1 = builder.AddInstruction( HloInstruction::CreateWhile(loop_state_shape_, cond, body, tuple1)); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(data_shape_, while0, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(data_shape_, while1, 1)); auto root_add = builder.AddInstruction(HloInstruction::CreateBinary( - while0->shape(), HloOpcode::kAdd, while0, while1)); - module->AddEntryComputation(builder.Build()); + while0->shape(), HloOpcode::kAdd, gte0, gte1)); - RunCopyInsertion(module.get()); + module->AddEntryComputation(builder.Build()); { FlattenCallGraph flatten; @@ -1688,22 +1687,22 @@ TEST_F(WhileBufferAssignmentTest, WhileLoopsInterferingResultRange) { EXPECT_TRUE(result); } + RunCopyInsertion(module.get()); + auto sequence = CreateMemoryMinimizingSequence(*module, ByteSizeOf).ConsumeValueOrDie(); // To trigger b/38494731, we want a specific Hlo sequence for the // root computation, so we overwrite that entry with a manually // crafted sequence. - std::vector sequence_for_buffer_assigment = { - input1, weights1, one, output1, tuple1, while1, input0, - weights0, zero, output0, tuple0, while0, root_add}; + sequence[module->entry_computation()] = { + input1, weights1, one, output1, while1->operand(0), while1, + input0, weights0, zero, output0, while0->operand(0), while0, + gte0, gte1, root_add}; // If this ASSERT_TRUE fails, we constructed a bogus sequence above // and this test itself is buggy. - ASSERT_TRUE(IsPostOrderTraversal(sequence_for_buffer_assigment)); - - sequence[module->entry_computation()] = - std::move(sequence_for_buffer_assigment); + ASSERT_TRUE(IsPostOrderTraversal(sequence[module->entry_computation()])); auto assignment = BufferAssigner::Run( @@ -1715,55 +1714,6 @@ TEST_F(WhileBufferAssignmentTest, WhileLoopsInterferingResultRange) { EXPECT_TRUE(BuffersDistinct({while0}, {while1}, *assignment)); } -// Test buffer assignment for while nodes with multiple uses. -// TODO(b/37245345): Fix buffer assignment for this case. -TEST_F(WhileBufferAssignmentTest, DISABLED_TwoWhiles) { - auto module = MakeUnique(TestName()); - auto builder = HloComputation::Builder(TestName()); - - auto input0 = builder.AddInstruction( - HloInstruction::CreateParameter(0, data_shape_, "input0")); - auto weights0 = builder.AddInstruction( - HloInstruction::CreateParameter(1, data_shape_, "weights0")); - - auto zero = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(0.0))); - auto output0 = builder.AddInstruction( - HloInstruction::CreateBroadcast(data_shape_, zero, {1})); - - auto cond0 = - module->AddEmbeddedComputation(BuildWhileConditionComputation("cond")); - auto body0 = - module->AddEmbeddedComputation(BuildWhileBodyComputation("body")); - - auto tuple0 = builder.AddInstruction( - HloInstruction::CreateTuple({input0, weights0, output0})); - auto while0 = builder.AddInstruction( - HloInstruction::CreateWhile(loop_state_shape_, cond0, body0, tuple0)); - auto while1 = builder.AddInstruction( - HloInstruction::CreateWhile(loop_state_shape_, cond0, body0, while0)); - - auto get0 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(data_shape_, while0, 2)); - auto get1 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(data_shape_, while1, 2)); - builder.AddInstruction( - HloInstruction::CreateBinary(data_shape_, HloOpcode::kAdd, get0, get1)); - module->AddEntryComputation(builder.Build()); - - RunCopyInsertion(module.get()); - - { - FlattenCallGraph flatten; - TF_ASSERT_OK_AND_ASSIGN(bool result, flatten.Run(module.get())); - EXPECT_TRUE(result); - } - - auto assignment = RunBufferAssignment(module.get()); - - EXPECT_TRUE(BuffersDistinct({while0}, {while1}, *assignment)); -} - TEST_F(WhileBufferAssignmentTest, WhilesDontShareEntryParamIfLiveOut) { auto module = MakeUnique(TestName()); auto builder = HloComputation::Builder("entry"); diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc index 0453a698a0..cd983bc03e 100644 --- a/tensorflow/compiler/xla/service/copy_insertion.cc +++ b/tensorflow/compiler/xla/service/copy_insertion.cc @@ -15,15 +15,17 @@ limitations under the License. #include "tensorflow/compiler/xla/service/copy_insertion.h" -#include - +#include "tensorflow/compiler/xla/service/hlo_alias_analysis.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_dce.h" +#include "tensorflow/compiler/xla/service/hlo_graph_dumper.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/hlo_ordering.h" #include "tensorflow/compiler/xla/service/liveness_util.h" #include "tensorflow/compiler/xla/service/logical_buffer.h" -#include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h" +#include "tensorflow/compiler/xla/service/tuple_simplifier.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" @@ -31,597 +33,1174 @@ limitations under the License. #include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/logging.h" namespace xla { +using ::tensorflow::str_util::Join; +using ::tensorflow::strings::StrAppend; +using ::tensorflow::strings::StrCat; + namespace { -using tensorflow::gtl::FlatMap; -using tensorflow::gtl::FlatSet; +bool IsEntryParameterValue(const HloValue& value) { + const HloComputation* computation = value.defining_instruction()->parent(); + return value.defining_instruction()->opcode() == HloOpcode::kParameter && + computation == computation->parent()->entry_computation(); +} + +bool IsConstantValue(const HloValue& value) { + return value.defining_instruction()->opcode() == HloOpcode::kConstant; +} + +bool ValueIsReadOnly(const HloValue& value) { + return IsConstantValue(value) || IsEntryParameterValue(value); +} -// InstructionCopier encapsulates indices at which to copy 'instruction'. -// All 'instruction' users in 'copy_users' are updated to use the copy. +// Deep copy the given instructions 'from' and 'to' at the ShapeIndexes given in +// 'indices_to_copy'. Add control edges from the respective kCopy instructions +// in deep copy of 'from' to the respective kCopy instruction in the deep copy +// of 'to'. // -// Instruction copies are generated in two phases: -// 1) Recording buffer indices at which 'instruction' requires copies (i.e. -// setting 'indices_to_copy_[index]'=true). -// 2) Inserting kCopy instructions based on indices recorded in phase 1). -// *) Array instructions are copied by inserting a single kCopy instruction. -// *) Tuple-shaped instructions are copied by recursively expanding tuples -// (and tuple-shaped elements), and inserting kCopy instructions for any -// tuple elements which require a copy. As the recursion unwinds, new tuple -// instructions are added to gather the copied (and uncopied) references -// into the output tuple (i.e. the copy of the tuple-shaped instruction). +// Requirements: 'from' and 'to' must have compatible shapes. // -// Example two-element tuple with one element that needs a copy: +// For example, suppose 'from' and 'to' are two-element tuples where index 0 is +// the only index to copy. Prior to deep-copying we have: // -// original-instruction -// / \ -// GTE(0) GTE(1) -// | | -// Copy | -// \ / -// Tuple // copied-instruction // -// As an optimization, if the original instruction is itself a Tuple -// instruction, we elide the unnecessary extra GTE and Tuple instructions, -// and just insert the copy into a new Tuple instruction, with control -// dependencies to ensure the copy occurs after any possible interference. -class InstructionCopier { - public: - InstructionCopier(HloInstruction* instruction, - const std::vector& copy_users) - : instruction_(instruction), - copy_users_(copy_users), - indices_to_copy_(instruction->shape()), - control_predecessors_(instruction->shape()) {} - - // Sets indices that are read-only, and thus do not need to be copied. - void SetReadOnlyIndices(const ShapeTree& read_only_indices) { - read_only_indices_ = read_only_indices; - } +// 'from' +// | +// ... +// | +// 'to' +// +// DeepCopyAndAddControlEdges produces: +// +// 'from' +// / \ +// GTE GTE +// | | +// Copy | +// / \ / +// | Tuple +// | | +// ctrl ... +// edge | +// | | +// | 'to' +// | / \ +// | GTE GTE +// \ | | +// Copy | +// \ / +// Tuple +// +StatusOr> +DeepCopyAndAddControlEdges(HloInstruction* from, HloInstruction* to, + const ShapeTree& indices_to_copy) { + DCHECK(ShapeUtil::Compatible(from->shape(), to->shape())); + // to/from_copy_tree hold the kCopy instruction produces by the deep + // copies. Elements which are not copied (indices_to_copy.element(index) == + // false) have nullptr at that index. + ShapeTree from_copy_tree(from->shape(), + /*init_value=*/nullptr); + TF_ASSIGN_OR_RETURN(HloInstruction * from_deep_copy, + from->parent()->DeepCopyInstruction( + from, &indices_to_copy, &from_copy_tree)); - // Sets copy overrides, which are copy instructions to use at each index. This - // is used to share a single copy of read-only entry parameters and constants - // between multiple While loops. - void SetCopyOverrides(const ShapeTree& copy_overrides) { - copy_overrides_ = copy_overrides; + ShapeTree to_copy_tree(to->shape(), /*init_value=*/nullptr); + TF_ASSIGN_OR_RETURN( + HloInstruction * to_deep_copy, + to->parent()->DeepCopyInstruction(to, &indices_to_copy, &to_copy_tree)); + + // Add control edges between the respective kCopy instructions. + for (const auto& pair : from_copy_tree) { + const ShapeIndex& index = pair.first; + HloInstruction* from_copy = pair.second; + HloInstruction* to_copy = to_copy_tree.element(index); + if (from_copy == nullptr) { + TF_RET_CHECK(to_copy == nullptr); + continue; + } + TF_RET_CHECK(to_copy != nullptr); + TF_RETURN_IF_ERROR(from_copy->AddControlDependencyTo(to_copy)); } - // Returns true if all recorded indices are false (returns true otherwise). - bool HasAllIndicesFalse() const; + return std::make_pair(from_deep_copy, to_deep_copy); +} - // Records instruction buffer indices which point-to a Parameter or Constant. - Status RecordIndicesWhichPointToParamOrConstant( - const TuplePointsToAnalysis& points_to_analysis); +// Compute the indices of the loop state which need copies in order to avoid +// live range interference. Generally, an element in the loop state does not +// need to be copied if the element is passed through transparently through the +// body. +// +// Returns whether any indices need to be copied. +bool IndicesToCopyForWhile(const HloDataflowAnalysis& dataflow, + const HloInstruction* xla_while, + ShapeTree* indices_to_copy) { + DCHECK(ShapeUtil::Compatible(indices_to_copy->shape(), xla_while->shape())); - // Records instruction buffer indices to copy which are necessary to ensure: - // *) PointsToSet of 'instruction_' is unambiguous and distinct. - // *) No liveness interference between 'instruction_' and 'other_instruction'. - // - // If 'read_only_indices_out' is non-null, read-only indices are set to true. - Status RecordIndicesToCopyForColocatingBuffers( - const BufferLiveness& liveness, const HloInstruction* other_instruction, - ShapeTree* read_only_indices_out); + bool any_copies = false; + const HloInstruction* init = xla_while->operand(0); + for (auto& pair : *indices_to_copy) { + const ShapeIndex& index = pair.first; + bool& should_copy = pair.second; + // If there is any ambiguity, then loop state must be copied. + if (dataflow.GetValueSet(init, index).values().size() > 1 || + dataflow.GetValueSet(xla_while, index).values().size() > 1) { + should_copy = true; + } else { + // If the output of the while instruction is not the same as the init + // value of the while, then this element is not passed through the body + // transparently and must be copied. + should_copy = dataflow.GetUniqueValueAt(xla_while, index) != + dataflow.GetUniqueValueAt(init, index); + } + any_copies |= should_copy; + } + return any_copies; +} - // Records control predecessors to add for inserted copy instructions. - // 'parameter' must have the same shape as the instruction that will be - // copied, and must define all buffers in the shape. Control predecessors are - // only recorded for indices that have already been marked for copying. - Status RecordControlPredecessors( - const TuplePointsToAnalysis& points_to_analysis, - HloInstruction* parameter); +// Add kCopy instructions around the given kWhile instruction to eliminate any +// possible live range interference of HLO values assuming a dependency-based +// ordering (HloDependencyOrdering). Copies are added conservatively. There +// likely are copies which are not strictly necessary, but there are removed +// later in the pass via CopyRemover. +// +// +// Elements (each ShapeIndex) in the loop state are considered independently. A +// copy is added to each element of the loop state which is modified in the +// while body. For each such element, a total of three kCopy instructions are +// added at following locations: +// +// (1) The init value is copied before the kWhile instruction. Before: +// +// (Init) +// | +// kWhile +// | +// ... +// +// After: +// +// (Init) +// | +// kCopy +// | +// kWhile +// | +// ... +// +// This copy is necessary in case the init value is simultaneously live +// with the kWhile. +// +// (2) Copies are added to the parameter and root of the while body +// computation. Before: +// +// kParameter +// | +// ... +// | +// (body root) +// +// After: +// +// kParameter +// | +// kCopy ----------+ +// | | +// ... ctrl +// | edge +// (body root) | +// | | +// kCopy <---------+ +// +// The root kCopy becomes the new root of the computation. Both copies are +// necessary to any potential interference between the parameter value and +// the root value. The control edge prevents potential interference +// between the copies themselves. +// +// If the loop state is a tuple then the above kCopy instructions are a deep +// copy constructed of kCopy, KGetTupleElement, and kTuple instruction as +// constructed by HloInstruction::DeepCopyInstruction. +Status AddCopiesForWhile(const HloAliasAnalysis& alias_analysis, + HloInstruction* xla_while) { + VLOG(2) << "Adding copies for kWhile instruction " << xla_while->name(); + TF_RET_CHECK(xla_while->opcode() == HloOpcode::kWhile); - // Inserts copies of 'instruction' buffers at indices in 'indices_to_copy', - // and replaces all uses for instructions in 'copy_users_' with copy. - // Returns the instruction which is a copy 'instruction'. - HloInstruction* Copy(); + ShapeTree indices_to_copy(xla_while->shape()); + if (!IndicesToCopyForWhile(alias_analysis.dataflow_analysis(), xla_while, + &indices_to_copy)) { + VLOG(2) << "No copies necessary for kWhile instruction " + << xla_while->name(); + return Status::OK(); + } - HloInstruction* instruction() { return instruction_; } + VLOG(2) << "Adding copies for " << xla_while->name() << " at indices:"; + for (auto& pair : indices_to_copy) { + if (pair.second) { + VLOG(2) << " " << pair.first; + } + } - const std::vector& copy_users() const { return copy_users_; } + // Deep copy init. + HloInstruction* while_init = xla_while->mutable_operand(0); + TF_ASSIGN_OR_RETURN( + HloInstruction * while_init_copy, + xla_while->parent()->DeepCopyInstruction(while_init, &indices_to_copy)); + TF_RETURN_IF_ERROR(while_init->ReplaceUseWith(xla_while, while_init_copy)); - private: - // Does the given index represent a read-only buffer? - bool IsReadOnlyIndex(const ShapeIndex& index) const { - return !ShapeUtil::IsNil(read_only_indices_.shape()) && - read_only_indices_.element(index); - } + // Deep copy the parameter and the root. Extend a control edge from the copy + // of the parameter value to the corresponding copy value of the root. + HloComputation* body = xla_while->while_body(); + HloInstruction* param = body->parameter_instruction(0); + HloInstruction* root = body->root_instruction(); - // Returns the copy override at the given index, or nullptr. - HloInstruction* GetCopyOverride(const ShapeIndex& index) const { - return ShapeUtil::IsNil(copy_overrides_.shape()) - ? nullptr - : copy_overrides_.element(index); - } + // If param is the root then all indices should have been passed through the + // while body and we should have returned early above. + TF_RET_CHECK(param != root); - // Records instruction buffer indices which have ambiguous or non-distinct - // points-to sets. - Status RecordAmbiguousOrNonDistinctIndices( - const TuplePointsToAnalysis& points_to_analysis); + // Copy users before making a deep copy of the parameter as the deep copy + // will create new users of the parameter (eg, the GTE instructions of the + // deep copy). + std::vector param_users = param->users(); - // Records instruction buffer indices which have interfering live ranges - // with 'other_instruction' buffers at same index. - Status RecordIndicesWhichInterfereWithOtherInstruction( - const BufferLiveness& liveness, const HloInstruction* other_instruction, - ShapeTree* read_only_indices_out); + ShapeIndex current_index; + TF_ASSIGN_OR_RETURN(auto pair, + DeepCopyAndAddControlEdges(param, root, indices_to_copy)); - // Recursively inserts copies of 'instruction' tuple elements at indices - // specified in 'indices_to_copy', and returns the copy of 'instruction'. - HloInstruction* CopyTuple(HloInstruction* instruction, ShapeIndex* index); + HloInstruction* param_copy = pair.first; + HloInstruction* root_copy = pair.second; - void RecordIndex(const ShapeIndex& index) { - *indices_to_copy_.mutable_element(index) = true; + for (HloInstruction* user : param_users) { + TF_RETURN_IF_ERROR(param->ReplaceUseWith(user, param_copy)); } - HloInstruction* instruction_; - const std::vector copy_users_; - ShapeTree indices_to_copy_; - ShapeTree> control_predecessors_; - ShapeTree read_only_indices_; - ShapeTree copy_overrides_; -}; + body->set_root_instruction(root_copy); -bool InstructionCopier::HasAllIndicesFalse() const { - bool all_indices_false = true; - indices_to_copy_.ForEachElement( - [&all_indices_false](const ShapeIndex& /*index*/, bool data) { - if (data) { - all_indices_false = false; - } - }); - return all_indices_false; + return Status::OK(); } -Status InstructionCopier::RecordIndicesWhichPointToParamOrConstant( - const TuplePointsToAnalysis& points_to_analysis) { - const PointsToSet& points_to = - points_to_analysis.GetPointsToSet(instruction_); - // Shallow copy the instruction if the points-to set of the top-level - // buffer is ambiguous. This is necessary because the backends must know - // statically what the top-level buffer of the result is. - if (points_to.element(/*index=*/{}).size() > 1) { - RecordIndex({}); +// Removes any control dependencies to or from the given instruction. +Status StripControlDependenciesFrom(HloInstruction* instruction) { + while (!instruction->control_successors().empty()) { + TF_RETURN_IF_ERROR(instruction->RemoveControlDependencyTo( + instruction->control_successors().front())); + } + + while (!instruction->control_predecessors().empty()) { + TF_RETURN_IF_ERROR( + instruction->control_predecessors().front()->RemoveControlDependencyTo( + instruction)); } - // Multiple buffers within a parameter/constant may be live out, so collect - // a set of indices at which to copy first. - points_to.ForEachElement([this](const ShapeIndex& index, - const PointsToSet::BufferList& buffers) { - if (IsReadOnlyIndex(index)) { - return; - } - for (const LogicalBuffer* buffer : buffers) { - // pointee is the HloInstruction producing the buffer which may be - // liveout. - HloInstruction* pointee = buffer->instruction(); - if (pointee->opcode() == HloOpcode::kParameter || - pointee->opcode() == HloOpcode::kConstant) { - VLOG(2) << "Parameter or constant buffer " << buffer->ToString() - << " index: " << tensorflow::str_util::Join(index, ",") - << " may be live out of computation: " << pointee->ToString(); - RecordIndex(index); - break; - } - } - }); return Status::OK(); } -Status InstructionCopier::RecordIndicesToCopyForColocatingBuffers( - const BufferLiveness& liveness, const HloInstruction* other_instruction, - ShapeTree* read_only_indices_out) { - TF_RETURN_IF_ERROR( - RecordAmbiguousOrNonDistinctIndices(liveness.points_to_analysis())); - TF_RETURN_IF_ERROR(RecordIndicesWhichInterfereWithOtherInstruction( - liveness, other_instruction, read_only_indices_out)); +// Add kCopy instructions to the given module to guarantee there is no +// live-range interference. Generally interference can only occur around kWhile +// instructions which have update-in-place semantics. +Status AddCopiesToResolveInterference(HloModule* module) { + TF_ASSIGN_OR_RETURN(std::unique_ptr alias_analysis, + HloAliasAnalysis::Run(module)); + + for (HloComputation* computation : module->computations()) { + for (HloInstruction* instruction : computation->instructions()) { + if (instruction->opcode() == HloOpcode::kWhile) { + TF_RETURN_IF_ERROR(AddCopiesForWhile(*alias_analysis, instruction)); + } + } + } return Status::OK(); } -Status InstructionCopier::RecordAmbiguousOrNonDistinctIndices( - const TuplePointsToAnalysis& points_to_analysis) { - const PointsToSet& points_to = - points_to_analysis.GetPointsToSet(instruction_); - // Mapping from LogicalBuffer to index (used to detect non-distinct indices). - FlatMap> - buffer_to_source_indices; - points_to.ForEachElement( - [this, &buffer_to_source_indices]( - const ShapeIndex& index, const PointsToSet::BufferList& buffers) { - if (buffers.size() > 1) { - // Record ambiguous points-to set at 'index'. - if (!indices_to_copy_.element(index)) { - VLOG(2) << "Adding copy of buffer for instruction: " - << instruction_->name() - << " at index: " << tensorflow::str_util::Join(index, ",") - << " with ambiguous points-to set."; - RecordIndex(index); +// Class for removing unnecessary copies from the module. +// +// kCopy instructions are added conservatively to guarantee no live range +// interference between HLO values. This class uses a more fine-grained analysis +// to remove some of these added copies which are not strictly necessary. +class CopyRemover { + public: + CopyRemover(const HloAliasAnalysis& alias_analysis, + const HloOrdering& ordering, HloModule* module) + : module_(module), + alias_analysis_(alias_analysis), + ordering_(ordering), + buffer_value_tracker_(*module, alias_analysis, ordering) {} + + // Try to elide the given copy. The copy is elided if the instruction is not + // necessary to prevent live-range interference of HLO values. Returns true if + // copy was elided. + // + // The copy instruction is not actually removed here. Instead it is left for + // dead in the graph. Later calls to DCE will remove the instruction. + StatusOr TryElideCopy(HloInstruction* copy) { + if (buffer_value_tracker_.TryElideCopy(copy)) { + TF_RETURN_IF_ERROR(StripControlDependenciesFrom(copy)); + TF_RETURN_IF_ERROR(copy->ReplaceAllUsesWith(copy->mutable_operand(0))); + return true; + } + return false; + } + + string ToString() const { + string out = StrCat("CopyRemover, module ", module_->name(), "\n"); + StrAppend(&out, " Buffer values, in dependency order:\n"); + for (const HloBuffer& buffer : alias_analysis_.buffers()) { + StrAppend(&out, " HloBuffer ", buffer.id(), ":\n"); + } + return out; + } + + private: + // Class which tracks the HLO values within each HLO buffer in the module + // during copy removal. + // + // The values are held in a linked list where there is one list for each + // buffer. Removing a copy instruction merges together the values in the + // source buffer of the copy to the destination buffer of the copy. This class + // tracks these value lists as copies are removed from the graph (and value + // lists are merged). + // + // The BufferValueTracker object is initialized to match the state of + // HloAliasAnalysis. However, as copies are removed this state diverges. The + // values-to-buffer mapping is maintained outside of HloAliasAnalysis because + // a fully updatable alias analysis is very slow. + class BufferValueTracker { + public: + // The values held in a single HLO buffer are represented using a linked + // list. An element type in this list is ValueNode. + // + // This linked list is hand-rolled to enable efficient splicing of lists + // using only references to list elements without knowing which lists are + // being spliced. std::list requires a reference to the list object to + // splice. + struct ValueNode { + explicit ValueNode(const HloValue* v) : value(v) {} + + const HloValue* value; + + // The uses are maintained outside of HloValue::uses() because + // HloValue::uses() is not updatable (a fully updatable dataflow analysis + // is slow). + std::vector uses; + + // next/prev elements in the linked list. The list is circularly linked so + // these values are never null for elements in the list. + ValueNode* prev = nullptr; + ValueNode* next = nullptr; + }; + + BufferValueTracker(const HloModule& module, + const HloAliasAnalysis& alias_analysis, + const HloOrdering& ordering) + : dataflow_(alias_analysis.dataflow_analysis()), ordering_(ordering) { + // Construct a list for each HLO buffer in the alias analysis. Maintain a + // map from HloValue to the respective list element representing that + // value. The map is used to construct the copy info map below. + tensorflow::gtl::FlatMap value_to_node; + for (const HloBuffer& buffer : alias_analysis.buffers()) { + // Verify values contained in the buffer are strictly ordered. This + // should always be the case after adding copies to eliminate + // interference. Specifically, the addition of the control flow edges + // between copies added around aliased operations (kWhile) guarantees + // this strict order. + for (const HloValue* value_a : buffer.values()) { + for (const HloValue* value_b : buffer.values()) { + if (value_a != value_b) { + DCHECK(ordering_.LiveRangeStrictlyBefore(*value_a, *value_b, + dataflow_) || + ordering_.LiveRangeStrictlyBefore(*value_b, *value_a, + dataflow_)) + << value_a->ToShortString() << " and " + << value_b->ToShortString() << " are not ordered"; + } } } - // For each 'buffer': record a mapping from 'buffer' to 'index'. - for (const LogicalBuffer* buffer : buffers) { - buffer_to_source_indices[buffer].push_back(index); - } - }); - // Record all non-distinct indices detected in 'buffer_to_source_indices'. - for (const auto& buff_to_src : buffer_to_source_indices) { - if (buff_to_src.second.size() == 1) { - continue; + std::vector values = buffer.values(); + std::sort(values.begin(), values.end(), + [this](const HloValue* a, const HloValue* b) { + return ordering_.IsDefinedBefore(*a, *b); + }); + + // Create a list containing all of the values in the buffer. + AddValueList(values, &value_to_node); + } + + // Create copy_map_ which contains the source and destination values + // of all copies. + CreateCopyMap(module, value_to_node); + + XLA_VLOG_LINES(3, ToString()); + TF_DCHECK_OK(Verify()); } - for (const ShapeIndex& src_index : buff_to_src.second) { - // Record non-distinct points-to set at 'src_index'. - if (!indices_to_copy_.element(src_index)) { - VLOG(2) << "Adding copy of buffer for instruction: " - << instruction_->name() - << " at index: " << tensorflow::str_util::Join(src_index, ",") - << " because of non-distinct points-to set."; - RecordIndex(src_index); + + // Add a list containing the given values to BufferValueTracker. This + // represents the values contained in a single buffer. For each value in + // 'values' an entry is created in value_to_node which indicates the + // respective ValueNode representing that value. + void AddValueList( + tensorflow::gtl::ArraySlice values, + tensorflow::gtl::FlatMap* value_to_node) { + ValueNode* tail = nullptr; + ValueNode* head = nullptr; + for (const HloValue* value : values) { + auto new_node = new ValueNode(value); + (*value_to_node)[value] = new_node; + + // Copy the HLO values's uses into the ValueNode for the value. These + // uses in ValueNode are updated as copies are removed. + new_node->uses.reserve(value->uses().size()); + for (const HloUse& use : value->uses()) { + new_node->uses.push_back(&use); + } + + // Connect the new node into the linked list. + if (tail == nullptr) { + head = new_node; + } else { + tail->next = new_node; + new_node->prev = tail; + } + tail = new_node; } + + // The linked list is circular so connect the head and tail. + tail->next = head; + head->prev = tail; + value_lists_.insert(head); } - } - return Status::OK(); -} -Status InstructionCopier::RecordIndicesWhichInterfereWithOtherInstruction( - const BufferLiveness& liveness, const HloInstruction* other_instruction, - ShapeTree* read_only_indices_out) { - // Record all buffer indices for 'instruction_', which interfere with - // 'other_instruction' at the same index. - ShapeUtil::ForEachSubshape( - instruction_->shape(), - [this, &liveness, other_instruction, read_only_indices_out]( - const Shape& /*subshape*/, const ShapeIndex& index) { - if (IsReadOnlyIndex(index)) { - return; + // This method also fills in copy_map_ which indicates which nodes + // in the value lists corresponding to the source and destination values of + // kCopy instructions. value_to_node should map each HloValue to its + // respective ValueNode. + void CreateCopyMap( + const HloModule& module, + const tensorflow::gtl::FlatMap& + value_to_node) { + for (HloComputation* computation : module.computations()) { + for (HloInstruction* instruction : computation->instructions()) { + // Add copies with unambiguous source values to the map. Copies with + // ambiguous sources are not removable. + if (instruction->opcode() == HloOpcode::kCopy) { + const HloValueSet& src_value_set = + dataflow_.GetValueSet(instruction->operand(0)); + if (src_value_set.values().size() == 1) { + CopyNodes& copy_node = copy_map_[instruction]; + copy_node.dest = + value_to_node.at(&dataflow_.GetUniqueValueAt(instruction)); + copy_node.src = value_to_node.at(&src_value_set.GetUniqueValue()); + } + } } - if (indices_to_copy_.element(index)) { - // Return if previous pass already set index. - return; + } + } + + ~BufferValueTracker() { + for (const ValueNode* head : value_lists_) { + const ValueNode* p = head; + do { + const ValueNode* tmp = p->next; + delete p; + p = tmp; + } while (p != head); + } + } + + // Verify invariants within the linked lists. + Status Verify() const { + for (const ValueNode* head : value_lists_) { + const ValueNode* p = head; + do { + // Verify links between elements are consistent. + TF_RET_CHECK(p->prev->next == p); + TF_RET_CHECK(p->next->prev == p); + + const HloInstruction* def = p->value->defining_instruction(); + if (def->opcode() == HloOpcode::kCopy && + ContainsKey(copy_map_, def)) { + TF_RET_CHECK(copy_map_.at(def).dest == p); + } + for (const HloUse* use : p->uses) { + if (use->instruction->opcode() == HloOpcode::kCopy && + ContainsKey(copy_map_, use->instruction)) { + TF_RET_CHECK(copy_map_.at(use->instruction).src == p); + } + } + + p = p->next; + } while (p != head); + } + return Status::OK(); + } + + // Try to elide the given copy. Elision of a copy is possible only if no + // live range interference is introduced by the copy's elimination. If + // elision is possible, then the internal state (value lists) are updated, + // and true is returned. Returns false otherwise. + bool TryElideCopy(const HloInstruction* copy) { + VLOG(2) << "Trying to remove " << copy->name(); + + if (!ContainsKey(copy_map_, copy)) { + VLOG(2) << copy->name() << " is not removable"; + return false; + } + + const CopyNodes& copy_node = copy_map_.at(copy); + ValueNode* src = copy_node.src; + ValueNode* dest = copy_node.dest; + DCHECK(src != nullptr); + DCHECK(dest != nullptr); + + auto is_live_range_before = [this](const ValueNode& a, + const ValueNode& b) { + if (LiveRangeBefore(a, b)) { + VLOG(2) << " Live range of " << a.value->ToShortString() + << " is before " << b.value->ToShortString(); + return true; + } else { + VLOG(2) << " Live range of " << a.value->ToShortString() + << " is not before " << b.value->ToShortString(); + return false; } - const auto& points_to_analysis = liveness.points_to_analysis(); - // Lookup buffers for 'instruction_' and 'other_instruction'. - const auto instruction_buffers = - points_to_analysis.GetPointsToSet(instruction_).element(index); - // If 'instruction_' has ambiguous points-to-set at 'index', it would - // have been recorded in a previous pass (and we would have returned - // early at the entry to this function). As a result, here we know that - // 'instruction_' has just one buffer in its points-to-set. - CHECK_EQ(1, instruction_buffers.size()); - const LogicalBuffer* instruction_buffer = instruction_buffers[0]; - - const auto other_instruction_buffers = - points_to_analysis.GetPointsToSet(other_instruction).element(index); - // Do not insert a copy if both instructions point at the same buffer. - // This eliminates unnecessary copies of read-only tuple elements. - // If 'instruction_' and 'other_instruction' point to the same buffer, - // then that buffer is not updated on the path between the two - // instructions. Therefore, any other (possibly interference-causing) - // users of that buffer from 'other_instruction' will see the same data, - // irrespective of whether we insert a copy of this buffer at - // 'instruction_' or not. - if (other_instruction_buffers.size() == 1 && - other_instruction_buffers[0]->id() == instruction_buffer->id()) { - if (read_only_indices_out != nullptr) { - *read_only_indices_out->mutable_element(index) = true; + }; + + VLOG(3) << copy->name() << " copies value " + << src->value->ToShortString(); + VLOG(3) << "Source buffer values: " << ValueListToString(src); + VLOG(3) << "Dest buffer values: " << ValueListToString(src); + + // A kCopy instruction copies an HLO value from a source buffer and + // defines an HLO value in a destination buffer. Most generally, the + // source and destination buffers may each hold more than one value at + // different points in the computation so we define the following: + // + // Values in source buffer: {s_0, ..., s_n} + // Values in destination buffer: {d_0, ..., d_m} + // + // A kCopy instruction between these buffers copies a value s_x in the + // source buffer and defines a value d_y in the destination buffer. The + // elision of a copy merges the source and destination buffers together, + // so the list of values for the source and destination buffers are + // merged. + // + // We handle two different cases for copy elision: + // + // (1) the kCopy defines the first value in the destination buffer (d_0). + // + // (2) the kCopy copies the last value in the source buffer (s_n). + // + // For the remaining case where the kCopy copies a not-last value from the + // source buffer to a not-first value of the destination buffer, the kCopy + // instruction cannot be removed. This case is generated, for example, if + // the kCopy copies a while body parameter of the loop state at one tuple + // index to a different tuple index in the while body root. Removal of the + // copy necessarily results in live range interference of values in the + // loop state at the two different tuple indices. + // + // We can only perform copy elision if the resulting merged values have + // totally ordered live ranges; otherwise the merged buffer would have + // live range interference. + if (IsHead(*dest)) { + // The copy copies an arbitrary value in the source buffer (call it s_x) + // and defines d_0, the first value in the destination buffer. After + // merging, the values in the combined buffer must be strictly ordered + // as follows** to elide the copy: + // + // {s_0, ..., s_x, d_1, ..., d_m, s_{x+1}, ..., s_n} + // + // Removing the copy eliminates d_0, and uses of d_0 become uses of + // s_x. In the above ordering, the live range of d_m must be ordered + // before the live range of s_{x+1} and the definition and all uses of + // s_x must be ordered before the definition of d_1. These conditions + // are checked below prior to elision. + // + // ** Technically it might be possible to have a non-interfering + // non-trivial interleaving of the values of the source and + // destination buffers in the resulting order. However, this case is + // slow and complicated to check and likely not worth it. So instead + // we simply check for the case where *all* values of the destination + // buffer (d_1 through d_m) are spliced into the point where the copy + // used to be. + VLOG(2) << copy->name() << " defines the first value in its buffer"; + ValueNode* next_dest = Next(*dest); + if (next_dest != nullptr) { + // Live range of 'from' value (s_x) must be before 'next_dest' (d_1); + if (!is_live_range_before(*src, *next_dest)) { + return false; } - return; } - // We can't say anything about the ambiguity of 'other_instruction' at - // this point, so we need to check interference between the single - // buffer in the points-to set of 'instruction_' and all buffers in - // 'other_instruction_buffers'. - for (const LogicalBuffer* other_buffer : other_instruction_buffers) { - if (liveness.MayInterfere(*instruction_buffer, *other_buffer)) { - VLOG(2) << "Adding copy of buffer for instruction: " - << instruction_->name() - << " instruction_buffer: " << instruction_buffer->ToString() - << " at index: " << tensorflow::str_util::Join(index, ",") - << " because of interference with buffer: " - << other_buffer->ToString(); - RecordIndex(index); - break; + ValueNode* next_src = Next(*src); + + if (next_src != nullptr) { + // Live range of 'last_dest' (d_m) must be before 'next_src' s_{x+1}. + ValueNode* last_dest = dest->prev; + DCHECK(IsTail(*last_dest)); + if (!is_live_range_before(*last_dest, *next_src)) { + return false; } } - }); - return Status::OK(); -} -// This is called when 'instruction_' is a while body root, and 'parameter' is -// the while body parameter. We record all users of all aliases of 'parameter' -// as control predecessors, so that when we add a copy of 'instruction_', we can -// mark the control dependencies. This is necessary because points-to and -// liveness analysis doesn't know about the aliasing between the while body root -// and param. Without these control dependencies, the copy might get scheduled -// to run at a point that interferes with users of the buffer. -Status InstructionCopier::RecordControlPredecessors( - const TuplePointsToAnalysis& points_to_analysis, - HloInstruction* parameter) { - return indices_to_copy_.ForEachElementWithStatus( - [this, &points_to_analysis, parameter](const ShapeIndex& index, - bool will_copy) { - if (will_copy) { - TF_ASSIGN_OR_RETURN( - const LogicalBuffer* buffer, - points_to_analysis.GetBufferDefinedAt(parameter, index)); - for (const BufferAlias& alias : - points_to_analysis.GetBufferAliases(*buffer)) { - for (HloInstruction* user : alias.instruction()->users()) { - if (DoesNotUseOperandBuffer(alias.instruction(), alias.index(), - user, points_to_analysis)) { - continue; - } - - if (user != instruction_) { - control_predecessors_.mutable_element(index)->push_back(user); - } - } + // Splice in destination buffer values list right after 'src'. + SpliceAfter(dest, src); + } else if (IsTail(*src)) { + // The copy copies the last value in the source buffer, s_n, and defines + // an arbitrary value in the destination buffer, d_y. After + // merging, the values in the combined buffer must be strictly ordered + // as follows** to elide the copy: + // + // {d_0, ..., d_{y-1}, s_0, ..., s_n, d_{y+1}, ..., d_m} + // + // Removing the copy eliminates d_y, and uses of d_y become uses of + // s_n. To enforce the above order, the live range of d_{y-1} must be + // before the live range of s_0, and the live range of s_n must be + // before the live range of d_{y+1}. + // + // ** See comment above in the code handling Case (1). + VLOG(2) << copy->name() << " copies the last value (" + << src->value->ToShortString() << ") in its buffer"; + + ValueNode* prev_dest = Prev(*dest); + // nullptr condition handled above in the first 'if' case. + DCHECK(prev_dest != nullptr); + ValueNode* first_src = src->next; + DCHECK(IsHead(*first_src)); + if (!is_live_range_before(*prev_dest, *first_src)) { + // Live range of value d_{y-1} is not before s_0. + return false; + } + ValueNode* next_dest = Next(*dest); + if (next_dest != nullptr) { + if (!is_live_range_before(*src, *next_dest)) { + // Live range of value s_n is not before d_{y+1}. + return false; } } - return Status::OK(); - }); -} -// Recursively inserts copies of 'instruction' tuple element buffers at -// indices in 'indices_to_copy_', expanding tuples as needed. -HloInstruction* InstructionCopier::CopyTuple(HloInstruction* instruction, - ShapeIndex* index) { - const int64 num_tuple_elements = - ShapeUtil::TupleElementCount(instruction->shape()); - std::vector elem_copies(num_tuple_elements); - for (int64 i = 0; i < num_tuple_elements; ++i) { - HloInstruction* elem; - if (instruction->opcode() == HloOpcode::kTuple) { - // If the instruction is already a Tuple instruction, we know that the - // element buffers are aliased, so we can just grab the operand directly. - elem = instruction->mutable_operand(i); - } else { - // Otherwise we need to add a GTE to unpack the element out of the tuple. - elem = instruction->parent()->AddInstruction( - HloInstruction::CreateGetTupleElement( - ShapeUtil::GetSubshape(instruction->shape(), {i}), instruction, - i)); - } - index->push_back(i); - if (ShapeUtil::IsTuple(elem->shape())) { - elem_copies[i] = CopyTuple(elem, index); - } else if (!indices_to_copy_.element(*index)) { - elem_copies[i] = elem; - } else if (HloInstruction* copy_override = GetCopyOverride(*index)) { - elem_copies[i] = copy_override; - } else { - HloInstruction* elem_copy = elem->parent()->AddInstruction( - HloInstruction::CreateUnary(elem->shape(), HloOpcode::kCopy, elem)); - for (HloInstruction* control_predecessor : - control_predecessors_.element(*index)) { - VLOG(2) << "Adding control dependency from " - << control_predecessor->ToString() << " to " - << elem_copy->ToString(); - TF_CHECK_OK(control_predecessor->AddControlDependencyTo(elem_copy)); + // Splice source buffer values list right after 'prev_dest'. + SpliceAfter(first_src, prev_dest); + } else { + VLOG(2) + << copy->name() + << " copies value in middle of source buffer to value in middle " + "of destination buffer"; + return false; } - elem_copies[i] = elem_copy; + + RemoveCopyValue(dest); + + XLA_VLOG_LINES(4, ToString()); + TF_DCHECK_OK(Verify()); + + return true; } - index->pop_back(); - } - return instruction->parent()->AddInstruction( - HloInstruction::CreateTuple(elem_copies)); -} -// Inserts copies of 'instruction_' buffers at indices in 'indices_to_copy_'. -HloInstruction* InstructionCopier::Copy() { - ShapeIndex index; - HloInstruction* copy; - if (ShapeUtil::IsTuple(instruction_->shape())) { - copy = CopyTuple(instruction_, &index); - } else { - copy = instruction_->parent()->AddInstruction(HloInstruction::CreateUnary( - instruction_->shape(), HloOpcode::kCopy, instruction_)); - } - for (HloInstruction* user : copy_users_) { - VLOG(2) << "Adding copy between instruction: " << instruction_->name() - << " and user: " << user->name(); - TF_CHECK_OK(instruction_->ReplaceUseWith(user, copy)); + // Delete the given ValueNode associated with a elided kCopy + // instruction. This should be called after splicing the value lists of the + // source and destination buffers together. + void RemoveCopyValue(ValueNode* copy_value_node) { + CHECK_EQ(copy_value_node->value->defining_instruction()->opcode(), + HloOpcode::kCopy); + ValueNode* operand_node = copy_value_node->prev; + CHECK(operand_node != copy_value_node); + + VLOG(2) << "Removing copy " << operand_node->value->ToShortString() + << " => " << copy_value_node->value->ToShortString(); + + // Splice out the copy value node. + operand_node->next = copy_value_node->next; + copy_value_node->next->prev = operand_node; + + // Patch up uses. Remove use of copy from operand_node uses. + auto it = + std::find_if(operand_node->uses.begin(), operand_node->uses.end(), + [copy_value_node](const HloUse* use) { + return use->instruction == + copy_value_node->value->defining_instruction(); + }); + CHECK(it != operand_node->uses.end()); + operand_node->uses.erase(it); + + // If the elided copy has any uses which are themselves kCopy instructions + // then patch up the copy info to reflect the that this kCopy instruction + // has a different operand (the operand of the elided copy). + for (const HloUse* copy_use : copy_value_node->uses) { + operand_node->uses.push_back(copy_use); + if (copy_use->instruction->opcode() == HloOpcode::kCopy) { + copy_map_.at(copy_use->instruction).src = operand_node; + } + } + + // Delete the copy info and the value node. + copy_map_.erase(copy_value_node->value->defining_instruction()); + delete copy_value_node; + } + + // Returns true if the live range of given value 'a' is before the live + // range of 'b'. + // + // We cannot use LiveRangeStrictlyBefore because HloValue::uses() is not + // updated as copies are removed. + bool LiveRangeBefore(const ValueNode& a, const ValueNode& b) { + if (a.uses.empty()) { + VLOG(2) << "Empty uses"; + return ordering_.IsDefinedBefore(*a.value, *b.value); + } + for (const HloUse* use : a.uses) { + VLOG(2) << "use: " << *use; + VLOG(2) << "is before:" << *b.value; + if (!ordering_.UseIsBeforeValueDefinition(*use, *b.value, dataflow_)) { + VLOG(2) << "Not before"; + return false; + } + } + return true; + } + + // Returns whether 'node' is the last node in its list. + bool IsTail(const ValueNode& node) const { + return ContainsKey(value_lists_, node.next); + } + + // Returns whether 'node' is the first node in its list. + bool IsHead(const ValueNode& node) const { + return ContainsKey(value_lists_, &node); + } + + // Returns the next node in the list after 'node'. If 'node' is the + // tail, then nullptr is returned. + ValueNode* Next(const ValueNode& node) const { + if (IsTail(node)) { + return nullptr; + } else { + return node.next; + } + } + + // Returns the previous node in the list before 'node'. If 'node' + // is the head, then nullptr is returned. + ValueNode* Prev(const ValueNode& node) const { + if (IsHead(node)) { + return nullptr; + } else { + return node.prev; + } + } + + // Splices the entire linked list with 'head' as its head right after the + // node 'insert_after' in another linked list. + void SpliceAfter(ValueNode* head, ValueNode* insert_after) { + DCHECK(IsHead(*head)); + value_lists_.erase(head); + + ValueNode* tail = head->prev; + tail->next = insert_after->next; + insert_after->next->prev = tail; + + insert_after->next = head; + head->prev = insert_after; + } + + string ValueListToString(const ValueNode* element) { + const ValueNode* head = element; + while (!IsHead(*head)) { + head = Prev(*head); + } + std::vector values; + for (const ValueNode* p = head; p != nullptr; p = Next(*p)) { + values.push_back(p->value); + } + return StrCat("{", + Join(values, ", ", + [](string* s, const HloValue* value) { + StrAppend(s, value->ToShortString()); + }), + "}"); + } + + string ToString() const { + string out = StrCat("BufferValueTracker:\n"); + StrAppend(&out, " Def-use chains in each buffer:\n"); + for (const ValueNode* head : value_lists_) { + StrAppend(&out, " Buffer defined by ", head->value->ToShortString(), + ":\n"); + const ValueNode* p = head; + do { + StrAppend(&out, " ", p->value->ToShortString(), ", uses: ", + Join(p->uses, "; ", + [](string* s, const HloUse* use) { + StrAppend(s, use->ToString()); + }), + "\n"); + + p = p->next; + } while (p != head); + } + StrAppend(&out, " Potentially removable copies:\n"); + for (const auto& pair : copy_map_) { + const HloInstruction* copy = pair.first; + const CopyNodes& copy_info = pair.second; + + StrAppend(&out, " ", copy->name(), " : ", + copy_info.src->value->ToShortString(), " => ", + copy_info.dest->value->ToShortString(), "\n"); + } + return out; + } + + private: + const HloDataflowAnalysis& dataflow_; + const HloOrdering& ordering_; + + // The heads of all the value lists. Each value list represents the HLO + // values contained in a particular HLO buffer. The values in the list are + // in dependency order. + tensorflow::gtl::FlatSet value_lists_; + + // Copy removal requires fast access to the value list elements + // corresponding to the source and destination values of the kCopy + // instruction. This data structure holds pointers to these elements for + // each kCopy instruction in the graph. + struct CopyNodes { + // The source and destinations values of the kCopy instruction. + ValueNode* src = nullptr; + ValueNode* dest = nullptr; + }; + tensorflow::gtl::FlatMap copy_map_; + }; + + HloModule* module_; + const HloAliasAnalysis& alias_analysis_; + const HloOrdering& ordering_; + + // Object tracking the HLO values contained in each HLO buffer. + BufferValueTracker buffer_value_tracker_; +}; + +// Try to remove as many copies from the module as possible without introducing +// live range interference. Copy instructions (identified by their unique id) in +// the set copies_to_exclude are not considered for removal. +Status RemoveUnnecessaryCopies( + const HloOrdering& ordering, + const tensorflow::gtl::FlatSet& copies_to_exclude, HloModule* module) { + TF_ASSIGN_OR_RETURN(std::unique_ptr alias_analysis, + HloAliasAnalysis::Run(module)); + CopyRemover copy_remover(*alias_analysis, ordering, module); + XLA_VLOG_LINES(3, copy_remover.ToString()); + + tensorflow::gtl::FlatSet existing_copies; + for (HloComputation* computation : module->computations()) { + for (HloInstruction* instruction : computation->instructions()) { + if (instruction->opcode() == HloOpcode::kCopy && + !ContainsKey(copies_to_exclude, instruction->unique_id())) { + TF_RETURN_IF_ERROR(copy_remover.TryElideCopy(instruction).status()); + } + } } - return copy; + + return Status::OK(); } -// The 'read_only_indices' are initialized based on points-to analysis on the -// while body corresponding to 'while_hlo'. If the init buffer corresponding to -// a read-only index aliases with a constant, it cannot be considered read-only, -// and must be copied. This is necessary because BufferAssignment does not -// currently assign an allocation for constants (b/32248867). -// This function performs this fix-up of 'read_only_indices'. +// Add copies to address special constraints on the roots of computations not +// related to live range interference: // -// Returns a ShapeTree of copy_overrides, which implements an optimization to -// allow multiple while loops that share the same read-only constants to -// share a single copy. -StatusOr> RevertReadOnlyIndicesForConstants( - const HloInstruction* while_hlo, - const TuplePointsToAnalysis& points_to_analysis, - ShapeTree* read_only_indices, - FlatMap* shared_copies) { - const HloInstruction* init_hlo = while_hlo->operand(0); - const PointsToSet& points_to = points_to_analysis.GetPointsToSet(init_hlo); - - // Mapping from LogicalBuffer to index (used to detect non-distinct indices). - FlatSet buffer_set; - - ShapeTree copy_overrides(init_hlo->shape()); - points_to.ForEachElement([init_hlo, read_only_indices, shared_copies, - &buffer_set, ©_overrides]( - const ShapeIndex& index, - const PointsToSet::BufferList& buffers) { - // Look for read-only entry parameters. - if (!read_only_indices->element(index)) { - return; - } - for (const LogicalBuffer* buffer : buffers) { - HloInstruction* pointee = buffer->instruction(); - const bool is_constant = pointee->opcode() == HloOpcode::kConstant; - if (!is_constant) { - continue; - } +// (1) Entry computation root must be unambiguous and distinct. +// +// (2) Any computation called by a kCall instruction must have an +// unambiguous root. +// +// (3) Constants and parameters cannot be live out of the entry computation +// +Status AddSpecialCaseCopies(const CallGraph& call_graph, HloModule* module) { + TF_ASSIGN_OR_RETURN(std::unique_ptr alias_analysis, + HloAliasAnalysis::Run(module)); + + // Identify which shape indices of which instructions need to be copied. Store + // these results in 'instructions_to_copy'. + std::unordered_map> instructions_to_copy; + auto add_index_to_copy = [&instructions_to_copy](HloInstruction* instruction, + const ShapeIndex& index) { + auto it = instructions_to_copy.find(instruction); + if (it == instructions_to_copy.end()) { + auto it_added = instructions_to_copy.emplace( + std::piecewise_construct, std::forward_as_tuple(instruction), + std::forward_as_tuple(instruction->shape(), /*init_value=*/false)); + it = it_added.first; + } + *it->second.mutable_element(index) = true; + }; - // We have found an constant that is read-only in - // the while body. These buffers are managed by the caller, and cannot - // be aliased with HLO buffers. Revert this read-only index, - // to allow it to be copied. - *read_only_indices->mutable_element(index) = false; - - // Optimization to allow multiple while loops that share the same - // read-only entry constants to share a single copy. - // Only unambiguous and distinct array-shaped buffers are allowed, to - // reduce code complexity. The shape of the entry parameter must be - // identical to the shape of the init_hlo at this index, to ensure - // there were no intervening bitcast or GTE instructions, which are - // also hard to handle. - const Shape& pointee_shape = pointee->shape(); - const Shape& init_shape = - ShapeUtil::GetSubshape(init_hlo->shape(), index); - if (buffers.size() == 1 && ShapeUtil::IsArray(pointee_shape) && - ShapeUtil::Equal(pointee_shape, init_shape) && - buffer_set.count(buffer) < 1) { - HloInstruction** copy = &(*shared_copies)[pointee]; - if (*copy == nullptr) { - *copy = pointee->parent()->AddInstruction(HloInstruction::CreateUnary( - pointee_shape, HloOpcode::kCopy, pointee)); + // Iterate through values of all constants and entry parameters. These values + // are special because they are held in read-only buffers. If any of these + // values share a buffer with other values (for example, the init value of a + // while is a constant) then copy the value at its definition and replace all + // its uses with the copy. + for (const HloValue* value : alias_analysis->dataflow_analysis().values()) { + if (ValueIsReadOnly(*value) && + alias_analysis->GetBufferContainingValue(*value).values().size() > 1) { + VLOG(2) << "Value " << value->ToShortString() + << " is read only, but its buffer contains more than one value. " + "Copying."; + add_index_to_copy(value->defining_instruction(), value->defining_index()); + } + } + + // Identify copies which must be added at root instructions + for (HloComputation* computation : module->computations()) { + const CallGraphNode& node = call_graph.GetNode(computation); + if (node.context() == CallContext::kParallel) { + continue; + } + TF_RET_CHECK(node.context() == CallContext::kSequential); + + const bool is_entry = computation == module->entry_computation(); + HloInstruction* root = computation->root_instruction(); + + // Mark nondistinct/ambiguous indices. + tensorflow::gtl::FlatSet seen; + ShapeUtil::ForEachSubshape( + root->shape(), [&](const Shape& /*subshape*/, const ShapeIndex& index) { + std::vector buffers_at_index = + alias_analysis->ComputeBuffersAt(root, index); + bool buffer_seen_before = false; + for (const HloBuffer* buffer : buffers_at_index) { + buffer_seen_before |= !seen.insert(buffer).second; + } + if (buffers_at_index.size() > 1 || (buffer_seen_before && is_entry)) { + VLOG(2) << "Index " << index << " of root of computation " + << computation->name() << " (" << root->name() + << ") has ambiguous or non-distinct buffer. Copying."; + add_index_to_copy(root, index); + } + }); + + // For entry instructions, mark any parameter or constant values. + if (is_entry) { + for (const auto& pair : + alias_analysis->dataflow_analysis().GetInstructionValueSet(root)) { + const ShapeIndex& index = pair.first; + const HloValueSet& value_set = pair.second; + for (const HloValue* value : value_set.values()) { + if (ValueIsReadOnly(*value)) { + VLOG(2) << "Root of entry computation (" << root->name() + << ") has constant or entry parameter value at index " + << index << ". Copying."; + add_index_to_copy(root, index); + } } - // Add the copy as an override. - *copy_overrides.mutable_element(index) = *copy; } + } + } - // Tracks whether this current buffer is distinct. - buffer_set.insert(buffer); + // Add copy instructions indicated in 'instructions_to_copy' to the module. + for (const auto& pair : instructions_to_copy) { + HloInstruction* instruction = pair.first; + const ShapeTree& indices_to_copy = pair.second; - // We've already reverted the read-only index and handled the - // single-copy optimization above, so there's nothing more to do. - break; + std::vector users = instruction->users(); + TF_ASSIGN_OR_RETURN(HloInstruction * deep_copy, + instruction->parent()->DeepCopyInstruction( + instruction, &indices_to_copy)); + for (HloInstruction* user : users) { + TF_RETURN_IF_ERROR(instruction->ReplaceUseWith(user, deep_copy)); + } + if (instruction == instruction->parent()->root_instruction()) { + instruction->parent()->set_root_instruction(deep_copy); } - }); - return copy_overrides; + } + + return Status::OK(); +} + +Status VerifyNoLiveRangeInterference(HloModule* module) { + TF_ASSIGN_OR_RETURN(std::unique_ptr alias_analysis, + HloAliasAnalysis::Run(module)); + DependencyHloOrdering ordering(module); + TF_RET_CHECK(!alias_analysis->HasLiveRangeInterference(ordering)); + return Status::OK(); } -} // anonymous namespace - -// NOTE: This is only called by gpu::CopyInsertion. It's not called here in the -// base class, since the regular CopyInsertion logic above selectively copies -// tuple elements, while this method assumes all buffers need to be deep copied. -StatusOr CopyInsertion::FindOrInsertCopy(HloInstruction* hlo) { - auto copy_it = inserted_copies_.find(hlo); - if (copy_it == inserted_copies_.end()) { - HloInstruction* copy = hlo->parent()->DeepCopyInstruction(hlo).ValueOrDie(); - inserted_copies_.insert({hlo, copy}); - return copy; - } else { - return copy_it->second; +void MaybeDumpModule(const string& message, const HloModule& module) { + if (VLOG_IS_ON(3)) { + VLOG(3) << message; + XLA_VLOG_LINES(3, module.ToString()); + hlo_graph_dumper::MaybeDumpHloModule(module, message); } } +} // namespace + StatusOr CopyInsertion::Run(HloModule* module) { - bool changed = false; - VLOG(2) << "CopyInsertion for module " << module->name(); + // Copy insertion is performed in three steps: + // + // (1) Add copies conservatively to guarantee that there is no live-range + // interference. This is done simplistically and usually results in more + // copies than is strictly necessary. + // + // (2) Using a more fine-grained analysis, remove as many copies that were + // added in (1) as possible while ensuring no live-range interference. + // + // (3) Add copies to resolve issues not related to live range interference + // such as parameters and constants live out of the entry computation. + // + // We add copies then remove them (step (1) then (2)) rather than simply + // adding only the copies that are necessary because, in general, it is + // difficult to figure out the minimal set of copies to add once there is + // interference. On the other hand, it is easy to determine if removing a copy + // will introduce interference. + // + // The final copy insertion in (3) is done separately to simplify the + // implementation of copy removal in (2) which is the most complicated part of + // the pass. As is, copy removal only has to reason about live range + // interference. If all copies were added in step (1) then copy removal would + // also have to reason about things like constants and parameters live out of + // the computation. + MaybeDumpModule("before copy insertion", *module); - TF_ASSIGN_OR_RETURN( - std::unique_ptr liveness, - BufferLiveness::Run(module, MakeUnique(module))); - const auto& points_to_analysis = liveness->points_to_analysis(); - XLA_VLOG_LINES(2, points_to_analysis.ToString()); - XLA_VLOG_LINES(2, module->ToString()); - - // Gather all while body computations and while instructions. - FlatSet while_body_computations; - std::vector while_instructions; - for (auto* computation : module->computations()) { + std::unique_ptr call_graph = CallGraph::Build(module); + if (!call_graph->IsFlattened()) { + return FailedPrecondition( + "Call graph must be flattened before copy insertion."); + } + + // Gather Ids of existing kCopy instructions in the module. We avoid removing + // these copies (except via DCE in TupleSimplifier) because they may have been + // added for reasons not considered by copy insertion (eg, layout assignment). + // Instruction id is used instead of HloInstruction* because the pointer + // values may be recycled. + tensorflow::gtl::FlatSet existing_copies; + for (HloComputation* computation : module->computations()) { for (HloInstruction* instruction : computation->instructions()) { - if (instruction->opcode() == HloOpcode::kWhile) { - while_body_computations.insert(instruction->while_body()); - while_instructions.push_back(instruction); + if (instruction->opcode() == HloOpcode::kCopy) { + existing_copies.insert(instruction->unique_id()); } } } - // Collect instruction buffer indices to copy in 'instructions_to_copy'. - std::vector instructions_to_copy; - - // Add copies of computation root instructions, if needed. - FlatMap> while_body_read_only_indices; - for (auto* computation : module->MakeNonfusionComputations()) { - VLOG(2) << "computation " << computation->name(); - InstructionCopier root_copier(computation->root_instruction(), - /*copy_users=*/{}); - if (while_body_computations.count(computation) > 0) { - // Record root indices to copy for while body sub-computations. We do not - // need to call RecordIndicesWhichPointToParamOrConstant for the while - // body root instruction here, because any necessary copies needed to - // avoid constants or parameters in the output are handled by while.init - // operand copy insertion below (which will share an allocation). - HloInstruction* while_body_param = computation->parameter_instruction(0); - ShapeTree read_only_indices(while_body_param->shape()); - TF_RETURN_IF_ERROR(root_copier.RecordIndicesToCopyForColocatingBuffers( - *liveness, while_body_param, &read_only_indices)); - while_body_read_only_indices[computation] = read_only_indices; - - // Mark control predecessors, based on the body param, for any copies - // we'll be inserting. This ensures the copy doesn't run too early. - TF_RETURN_IF_ERROR(root_copier.RecordControlPredecessors( - points_to_analysis, while_body_param)); - } else { - // Record root indices to copy for general computations. - TF_RETURN_IF_ERROR(root_copier.RecordIndicesWhichPointToParamOrConstant( - points_to_analysis)); + TF_RETURN_IF_ERROR(AddCopiesToResolveInterference(module)); + + // Simplify the tuple structures introduced by the deep copies. This should be + // done before removing copies (RemoveUnnecessaryCopies) because tuple + // simplification changes dependencies in the graph which changes live range + // interference in the graph. Also run DCE to remove the dead Tuple/GTE + // instructions introduced by tuple simplification. + TupleSimplifier tuple_simplifier; + HloDCE dce; + TF_RETURN_IF_ERROR(tuple_simplifier.Run(module).status()); + TF_RETURN_IF_ERROR(dce.Run(module).status()); + + TF_DCHECK_OK(VerifyNoLiveRangeInterference(module)); + + MaybeDumpModule("after adding copies to resolve interference", *module); + + DependencyHloOrdering ordering(module); + TF_RETURN_IF_ERROR( + RemoveUnnecessaryCopies(ordering, existing_copies, module)); + + MaybeDumpModule("after removing unnecessary copies", *module); + + TF_RETURN_IF_ERROR(AddSpecialCaseCopies(*call_graph, module)); + + MaybeDumpModule("after adding special-case copies", *module); + + TF_RETURN_IF_ERROR(tuple_simplifier.Run(module).status()); + TF_RETURN_IF_ERROR(dce.Run(module).status()); + TF_DCHECK_OK(VerifyNoLiveRangeInterference(module)); + + MaybeDumpModule("after copy insertion", *module); + + if (VLOG_IS_ON(1)) { + int64 num_total_copies = 0; + for (HloComputation* computation : module->computations()) { + for (HloInstruction* instruction : computation->instructions()) { + if (instruction->opcode() == HloOpcode::kCopy) { + num_total_copies++; + } + } } - instructions_to_copy.push_back(root_copier); + VLOG(1) << "Num copies before copy-insertion: " << existing_copies.size(); + VLOG(1) << "Num copies after copy-insertion: " << num_total_copies; } - // Add copies of while 'init' operand instructions, if needed. 'shared_copies' - // is used to ensure that multiple while loops can share a single copy of the - // same entry parameter or constant, if all loops use it read-only. - // - // TODO(b/33301720) Remove redundant while instruction copies. - FlatMap shared_copies; - for (HloInstruction* while_hlo : while_instructions) { - // Fix read_only_indices to account for entry constants. Also - // initialize copy_overrides, which ensures a single copy for each read-only - // constant that is used in multiple while loops. - ShapeTree* read_only_indices = - &while_body_read_only_indices[while_hlo->while_body()]; - TF_ASSIGN_OR_RETURN( - const ShapeTree copy_overrides, - RevertReadOnlyIndicesForConstants(while_hlo, points_to_analysis, - read_only_indices, &shared_copies)); - // Create InstructionCopier for init operand of while instruction. - HloInstruction* init_hlo = while_hlo->mutable_operand(0); - InstructionCopier init_copier(init_hlo, {while_hlo}); - init_copier.SetReadOnlyIndices(*read_only_indices); - init_copier.SetCopyOverrides(copy_overrides); - // Record 'init' buffer indices which point-to a Constant or Parameter. - TF_RETURN_IF_ERROR(init_copier.RecordIndicesWhichPointToParamOrConstant( - points_to_analysis)); - // Record indices necessary to colocate while and init operand buffers. - TF_RETURN_IF_ERROR(init_copier.RecordIndicesToCopyForColocatingBuffers( - *liveness, while_hlo, /*read_only_indices_out=*/nullptr)); - instructions_to_copy.push_back(init_copier); + return true; +} + +namespace { + +bool IsWhileBody(const HloComputation* computation, + const CallGraph& call_graph) { + const CallGraphNode& node = call_graph.GetNode(computation); + + if (node.context() == CallContext::kSequential && + !node.caller_callsites().empty()) { + // Callgraph should be flattened so sequential context computations can + // have at most one caller. + CHECK_EQ(node.caller_callsites().size(), 1); + const HloInstruction* calling_instruction = + node.caller_callsites()[0].instruction(); + if (calling_instruction->opcode() == HloOpcode::kWhile && + calling_instruction->while_body() == node.computation()) { + return true; + } } + return false; +} - for (InstructionCopier& to_copy : instructions_to_copy) { - if (to_copy.HasAllIndicesFalse()) { +} // namespace + +/* static */ StatusOr CopyInsertion::AddCopiesForBufferAssignment( + HloModule* module) { + std::unique_ptr call_graph = CallGraph::Build(module); + TF_ASSIGN_OR_RETURN(std::unique_ptr dataflow, + HloDataflowAnalysis::Run(module)); + + bool changed = false; + + // If a buffer live out of a computation is a constant, a parameter, or not + // defined in the computation, then copy it to account for the limited + // computation-scoped analysis in buffer assignment. An exception to this rule + // is the while body which is handled properly without copies. + for (HloComputation* computation : module->computations()) { + if (computation == module->entry_computation() || + IsWhileBody(computation, *call_graph)) { continue; } - changed = true; - // Copy instruction at recorded buffer indices. - HloComputation* computation = to_copy.instruction()->parent(); - HloInstruction* copy = to_copy.Copy(); - if (to_copy.instruction() == computation->root_instruction()) { - computation->set_root_instruction(copy); + HloInstruction* root = computation->root_instruction(); + ShapeTree indices_to_copy(root->shape(), /*init_value=*/false); + bool copy_root = false; + for (const auto& pair : dataflow->GetInstructionValueSet(root)) { + const ShapeIndex& index = pair.first; + const HloValueSet& value_set = pair.second; + for (const HloValue* value : value_set.values()) { + HloInstruction* def = value->defining_instruction(); + if (def->parent() != computation || + def->opcode() == HloOpcode::kConstant || + def->opcode() == HloOpcode::kParameter) { + *indices_to_copy.mutable_element(index) = true; + copy_root = true; + } + } + } + if (copy_root) { + TF_ASSIGN_OR_RETURN( + HloInstruction * root_copy, + computation->DeepCopyInstruction(root, &indices_to_copy)); + computation->set_root_instruction(root_copy); + changed = true; } } - VLOG(3) << "After copy insertion for module " << module->name(); - XLA_VLOG_LINES(3, module->ToString()); + TupleSimplifier tuple_simplifier; + HloDCE dce; + TF_ASSIGN_OR_RETURN(bool tuple_simplifier_changed, + tuple_simplifier.Run(module)); + TF_ASSIGN_OR_RETURN(bool dce_changed, dce.Run(module)); - return changed; + return changed || tuple_simplifier_changed || dce_changed; } } // namespace xla diff --git a/tensorflow/compiler/xla/service/copy_insertion.h b/tensorflow/compiler/xla/service/copy_insertion.h index 28bb62e40c..65e3d31e34 100644 --- a/tensorflow/compiler/xla/service/copy_insertion.h +++ b/tensorflow/compiler/xla/service/copy_insertion.h @@ -25,12 +25,25 @@ limitations under the License. namespace xla { -// HLO pass which inserts a copy of the root instruction (creating a new root) -// if the root is or points-to any constant or parameter instruction. -// If the root instruction is a Tuple, only tuple elements which point to -// constant or parameter instructions will be copied. -// Copy insertion is necessary because constant and parameter arrays have -// different lifetimes than computation results. +// Copy insertion is a legalization HLO pass which inserts copies (kCopy +// instructions) to eliminate several kinds of problems in the HLO module. +// +// (1) Entry parameter or a constant live out of the entry computation. Entry +// computation arguments and constants have different lifetimes than the +// computation result and cannot share the same allocation. Parameters and +// constants live out of non-entry computations do not need copies. +// +// (2) Different values which are simultaneously live and which must be held +// in the same buffer. This can occur in while bodies. Specifically, the +// while loop state (the arguments to the while instruction) is updated +// in-place and the update may clobber the value from the previous +// iteration before the previous value is dead. Computations called from +// kCall instructions do not need such copies because kCall has no update +// in-place semantics. +// +// (3) The buffer set of the root instruction of the entry computation must be +// unambiguous and distinct. That is, InstructionAliasSet::IsAmbiguous and +// InstructionAliasSet::IsDistinct return true. class CopyInsertion : public HloPassInterface { public: tensorflow::StringPiece name() const override { return "copy-insertion"; } @@ -39,14 +52,16 @@ class CopyInsertion : public HloPassInterface { // (copies were inserted). StatusOr Run(HloModule* module) override; - protected: - // Returns a copy of `hlo`. Looks in inserted_copies_ first to avoid making - // duplicate copies. - StatusOr FindOrInsertCopy(HloInstruction* hlo); - - // A map containing all copies inserted during the copy insertion pass. The - // key is the copied instruction and the value is the copy. - tensorflow::gtl::FlatMap inserted_copies_; + // The CPU and GPU backend need additional copies added due to deficiencies in + // buffer assignment. Specifically, copies are needed for constants live-out + // of computations, and for values which are live-in and live-out of the same + // computation. These copies are needed because buffer-assignment uses a + // computation-scoped analyis (TuplePointsToAnalysis) and has limited + // visibility across computation boundaries. This method adds these necessary + // copies. Returns whether the module was modified. + // + // TODO(b/62548313): Remove this when buffer assignment is module-scoped. + static StatusOr AddCopiesForBufferAssignment(HloModule* module); }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc index a2eacc5c7d..3278fd5f06 100644 --- a/tensorflow/compiler/xla/service/copy_insertion_test.cc +++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc @@ -17,18 +17,19 @@ limitations under the License. #include +#include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_matchers.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" -#include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" #include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test_benchmark.h" namespace op = xla::testing::opcode_matchers; @@ -37,35 +38,53 @@ namespace { using ::testing::UnorderedElementsAre; +int64 CountCopies(const HloComputation& computation) { + int64 count = 0; + for (const auto& instruction : computation.instructions()) { + if (instruction->opcode() == HloOpcode::kCopy) { + count++; + } + } + return count; +} + +int64 CountCopies(const HloModule& module) { + int64 count = 0; + for (const auto& computation : module.computations()) { + count += CountCopies(*computation); + } + return count; +} + +int64 CountControlEdges(const HloComputation& computation) { + int64 count = 0; + for (const auto& instruction : computation.instructions()) { + count += instruction->control_successors().size(); + } + return count; +} + +int64 CountControlEdges(const HloModule& module) { + int64 count = 0; + for (const auto& computation : module.computations()) { + count += CountControlEdges(*computation); + } + return count; +} + class CopyInsertionTest : public HloTestBase { protected: void InsertCopies(HloModule* module) { CopyInsertion copy_insertion; - EXPECT_IS_OK(copy_insertion.Run(module).status()); - - // Verify the points to set of the root of the computation after copy - // insertion contains no constants or parameters, and is distinct and - // non-ambiguous. - auto points_to_analysis = - TuplePointsToAnalysis::Run(module).ConsumeValueOrDie(); - const auto& points_to = points_to_analysis->GetPointsToSet( - module->entry_computation()->root_instruction()); - EXPECT_TRUE(points_to.IsDistinct()); - EXPECT_TRUE(!points_to.IsAmbiguous()); - - auto maybe_live_out_buffers = - points_to_analysis - ->GetPointsToSet(module->entry_computation()->root_instruction()) - .CreateFlattenedSet(); - - for (const LogicalBuffer* buffer : maybe_live_out_buffers) { - EXPECT_NE(buffer->instruction()->opcode(), HloOpcode::kConstant); - EXPECT_NE(buffer->instruction()->opcode(), HloOpcode::kParameter); - } + ASSERT_IS_OK(copy_insertion.Run(module).status()); } + + const Shape scalar_shape_ = ShapeUtil::MakeShape(F32, {}); }; TEST_F(CopyInsertionTest, SingleParameter) { + // Computation is a single parameter passed into a tuple. The parameter should + // be copied before entering the tuple. auto builder = HloComputation::Builder(TestName()); HloInstruction* x = builder.AddInstruction( HloInstruction::CreateParameter(0, ShapeUtil::MakeShape(F32, {}), "x")); @@ -77,14 +96,15 @@ TEST_F(CopyInsertionTest, SingleParameter) { auto module = CreateNewModule(); module->AddEntryComputation(builder.Build()); - HloInstruction* old_root = module->entry_computation()->root_instruction(); InsertCopies(module.get()); EXPECT_THAT(module->entry_computation()->root_instruction(), - op::Tuple(op::Copy(old_root->operand(0)))); + op::Tuple(op::Copy(x))); } TEST_F(CopyInsertionTest, SingleConstant) { + // Computation is a single constant passed into a tuple. The parameter should + // be copied before entering the tuple. auto builder = HloComputation::Builder(TestName()); HloInstruction* constant = builder.AddInstruction( HloInstruction::CreateConstant(Literal::CreateR0(1.0))); @@ -96,11 +116,42 @@ TEST_F(CopyInsertionTest, SingleConstant) { auto module = CreateNewModule(); module->AddEntryComputation(builder.Build()); - HloInstruction* old_root = module->entry_computation()->root_instruction(); InsertCopies(module.get()); + EXPECT_EQ(CountCopies(*module), 1); EXPECT_THAT(module->entry_computation()->root_instruction(), - op::Tuple(op::Copy(old_root->operand(0)))); + op::Tuple(op::Copy(constant))); +} + +TEST_F(CopyInsertionTest, ExistingCopiesNotRemoved) { + // Verify that an kCopy instructions which exist in the pass before + // copy-insertion remain in the graph after copy-insertion. + auto module = CreateNewModule(); + + auto builder = HloComputation::Builder(TestName()); + HloInstruction* constant = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(1.0))); + HloInstruction* copy_1 = builder.AddInstruction(HloInstruction::CreateUnary( + constant->shape(), HloOpcode::kCopy, constant)); + HloInstruction* copy_2 = builder.AddInstruction(HloInstruction::CreateUnary( + constant->shape(), HloOpcode::kCopy, constant)); + HloInstruction* add = builder.AddInstruction(HloInstruction::CreateBinary( + constant->shape(), HloOpcode::kAdd, copy_1, copy_2)); + HloInstruction* add_copy = builder.AddInstruction( + HloInstruction::CreateUnary(constant->shape(), HloOpcode::kCopy, add)); + + module->AddEntryComputation(builder.Build()); + + EXPECT_EQ(CountCopies(*module), 3); + + InsertCopies(module.get()); + + EXPECT_EQ(CountCopies(*module), 3); + + EXPECT_EQ(module->entry_computation()->root_instruction(), add_copy); + EXPECT_THAT( + module->entry_computation()->root_instruction(), + op::Copy(op::Add(op::Copy(op::Constant()), op::Copy(op::Constant())))); } TEST_F(CopyInsertionTest, MultipleConstantsAndParameters) { @@ -127,12 +178,12 @@ TEST_F(CopyInsertionTest, MultipleConstantsAndParameters) { auto module = CreateNewModule(); module->AddEntryComputation(builder.Build()); - HloInstruction* old_root = module->entry_computation()->root_instruction(); InsertCopies(module.get()); + EXPECT_EQ(CountCopies(*module), 2); - EXPECT_THAT(module->entry_computation()->root_instruction(), - op::Tuple(op::Copy(old_root->operand(0)), - op::Copy(old_root->operand(1)), old_root->operand(2))); + EXPECT_THAT( + module->entry_computation()->root_instruction(), + op::Tuple(op::Copy(constant2), op::Copy(x), op::Add(constant1, y))); } TEST_F(CopyInsertionTest, AmbiguousPointsToSet) { @@ -165,6 +216,7 @@ TEST_F(CopyInsertionTest, AmbiguousPointsToSet) { HloInstruction* old_root = module->entry_computation()->root_instruction(); InsertCopies(module.get()); + EXPECT_EQ(CountCopies(*module), 2); EXPECT_THAT(module->entry_computation()->root_instruction(), op::Tuple(op::Copy(op::GetTupleElement(old_root)), @@ -187,6 +239,7 @@ TEST_F(CopyInsertionTest, BitcastParameter) { HloInstruction* old_root = module->entry_computation()->root_instruction(); InsertCopies(module.get()); + EXPECT_EQ(CountCopies(*module), 1); EXPECT_THAT(module->entry_computation()->root_instruction(), op::Copy(old_root)); @@ -208,6 +261,7 @@ TEST_F(CopyInsertionTest, BitcastConstant) { HloInstruction* old_root = module->entry_computation()->root_instruction(); InsertCopies(module.get()); + EXPECT_EQ(CountCopies(*module), 1); EXPECT_THAT(module->entry_computation()->root_instruction(), op::Copy(old_root)); @@ -227,11 +281,11 @@ TEST_F(CopyInsertionTest, BitcastTupleElementParameter) { EXPECT_THAT(x->users(), UnorderedElementsAre(bitcast)); - HloInstruction* old_root = module->entry_computation()->root_instruction(); InsertCopies(module.get()); + EXPECT_EQ(CountCopies(*module), 1); EXPECT_THAT(module->entry_computation()->root_instruction(), - op::Tuple(op::Copy(old_root->operand(0)))); + op::Tuple(op::Copy(bitcast))); } TEST_F(CopyInsertionTest, NestedTupleParameter) { @@ -257,6 +311,8 @@ TEST_F(CopyInsertionTest, NestedTupleParameter) { HloInstruction* old_root = module->entry_computation()->root_instruction(); InsertCopies(module.get()); + EXPECT_EQ(CountCopies(*module), 3); + HloInstruction* new_root = module->entry_computation()->root_instruction(); EXPECT_NE(old_root, new_root); @@ -293,12 +349,13 @@ TEST_F(CopyInsertionTest, ElementOfNestedTupleParameter) { EXPECT_EQ(gte, module->entry_computation()->root_instruction()); - HloInstruction* old_root = module->entry_computation()->root_instruction(); InsertCopies(module.get()); + EXPECT_EQ(CountCopies(*module), 2); - EXPECT_THAT(module->entry_computation()->root_instruction(), - op::Tuple(op::Copy(op::GetTupleElement(old_root)), - op::Copy(op::GetTupleElement(old_root)))); + EXPECT_THAT( + module->entry_computation()->root_instruction(), + op::Tuple(op::Copy(op::GetTupleElement(op::GetTupleElement(param))), + op::Copy(op::GetTupleElement(op::GetTupleElement(param))))); } TEST_F(CopyInsertionTest, AmbiguousTopLevelRoot) { @@ -331,6 +388,7 @@ TEST_F(CopyInsertionTest, AmbiguousTopLevelRoot) { HloInstruction* old_root = module->entry_computation()->root_instruction(); InsertCopies(module.get()); + EXPECT_EQ(CountCopies(*module), 1); EXPECT_THAT(module->entry_computation()->root_instruction(), op::Copy(old_root)); @@ -346,12 +404,10 @@ class WhileCopyInsertionTest : public CopyInsertionTest { // The parameter 'nested' specifies the loop state shape from which to // read the induction variable. std::unique_ptr BuildConditionComputation( - bool nested = false) { + const Shape& loop_state_shape) { auto builder = HloComputation::Builder(TestName() + ".Condition"); auto limit_const = builder.AddInstruction( HloInstruction::CreateConstant(Literal::CreateR0(10))); - const Shape& loop_state_shape = - nested ? nested_loop_state_shape_ : loop_state_shape_; auto loop_state = builder.AddInstruction( HloInstruction::CreateParameter(0, loop_state_shape, "loop_state")); auto induction_variable = @@ -582,7 +638,7 @@ class WhileCopyInsertionTest : public CopyInsertionTest { auto loop_state_init = builder.AddInstruction( HloInstruction::CreateTuple({induction_var_init, inner_init})); auto while_hlo = builder.AddInstruction(HloInstruction::CreateWhile( - loop_state_shape_, condition, body, loop_state_init)); + loop_state_init->shape(), condition, body, loop_state_init)); module_->AddEntryComputation(builder.Build()); return while_hlo; } @@ -658,11 +714,28 @@ class WhileCopyInsertionTest : public CopyInsertionTest { auto one_vec = builder.AddInstruction(HloInstruction::CreateConstant( Literal::CreateR1({1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f}))); // Take a reference to 'data_init' to make it interfere with while result. - builder.AddInstruction(HloInstruction::CreateBinary( + auto add = builder.AddInstruction(HloInstruction::CreateBinary( data_shape_, HloOpcode::kAdd, data_init, one_vec)); - return BuildWhileInstructionWithCustomInit(loop_state_shape_, data_init, - &builder); + auto xla_while = BuildWhileInstructionWithCustomInit(loop_state_shape_, + data_init, &builder); + + // Add an additional binary operation operating on the while and the + // interfering add so that neither operation is dead. + auto gte = xla_while->parent()->AddInstruction( + HloInstruction::CreateGetTupleElement( + ShapeUtil::GetSubshape(xla_while->shape(), {1}), xla_while, 1)); + auto sub = xla_while->parent()->AddInstruction(HloInstruction::CreateBinary( + data_shape_, HloOpcode::kSubtract, add, gte)); + auto gte0 = xla_while->parent()->AddInstruction( + HloInstruction::CreateGetTupleElement( + ShapeUtil::GetSubshape(xla_while->shape(), {0}), xla_while, 0)); + auto tuple = xla_while->parent()->AddInstruction( + HloInstruction::CreateTuple({gte0, sub})); + + xla_while->parent()->set_root_instruction(tuple); + + return xla_while; } HloInstruction* BuildWhileInstructionWithCustomInit( @@ -672,8 +745,8 @@ class WhileCopyInsertionTest : public CopyInsertionTest { ShapeUtil::Equal(loop_state_shape, nested_loop_state_shape_); auto induction_var_init = builder->AddInstruction( HloInstruction::CreateConstant(Literal::CreateR0(0))); - auto condition = - module_->AddEmbeddedComputation(BuildConditionComputation(nested)); + auto condition = module_->AddEmbeddedComputation( + BuildConditionComputation(loop_state_shape)); auto body = module_->AddEmbeddedComputation( BuildIndependentBodyComputation(nested)); auto loop_state_init = builder->AddInstruction( @@ -706,23 +779,21 @@ class WhileCopyInsertionTest : public CopyInsertionTest { // CopyInsertion pass should not generate any copies. // TEST_F(WhileCopyInsertionTest, IndependentTupleElements) { - auto condition = module_->AddEmbeddedComputation(BuildConditionComputation()); + auto condition = module_->AddEmbeddedComputation( + BuildConditionComputation(loop_state_shape_)); auto body = module_->AddEmbeddedComputation(BuildIndependentBodyComputation()); auto while_hlo = BuildWhileInstruction(condition, body); - const HloInstruction* old_init = while_hlo->operand(0); - HloInstruction* old_root = body->root_instruction(); InsertCopies(module_.get()); - HloInstruction* new_root = body->root_instruction(); - const HloInstruction* new_init = while_hlo->operand(0); - // No copies should be inserted so root should not be updated. - EXPECT_EQ(old_root, new_root); + // Body should have no copies as the adds can be done inplace. + EXPECT_EQ(CountCopies(*body), 0); + EXPECT_EQ(CountControlEdges(*module_), 0); - // Both init indices need copies. - EXPECT_THAT(new_init, op::Tuple(op::Copy(old_init->operand(0)), - op::Copy(old_init->operand(1)))); + // Both init indices need copies as they are constants. + EXPECT_THAT(while_hlo->operand(0), + op::Tuple(op::Copy(op::Constant()), op::Copy(op::Constant()))); } // Tests while body computation with dependent tuple elements: @@ -737,20 +808,33 @@ TEST_F(WhileCopyInsertionTest, IndependentTupleElements) { // Tuple(Copy(out0), out1) // TEST_F(WhileCopyInsertionTest, DependentTupleElements) { - auto condition = module_->AddEmbeddedComputation(BuildConditionComputation()); + auto condition = module_->AddEmbeddedComputation( + BuildConditionComputation(loop_state_shape_)); auto body = module_->AddEmbeddedComputation(BuildDependentBodyComputation()); auto while_hlo = BuildWhileInstruction(condition, body); - const HloInstruction* old_init = while_hlo->operand(0); - HloInstruction* old_root = body->root_instruction(); InsertCopies(module_.get()); - HloInstruction* new_root = body->root_instruction(); - const HloInstruction* new_init = while_hlo->operand(0); - EXPECT_THAT(new_root, - op::Tuple(op::Copy(old_root->operand(0)), old_root->operand(1))); - EXPECT_THAT(new_init, op::Tuple(op::Copy(old_init->operand(0)), - op::Copy(old_init->operand(1)))); + EXPECT_EQ(CountCopies(*body), 1); + EXPECT_EQ(CountControlEdges(*body), 0); + + EXPECT_THAT( + body->root_instruction(), + op::Tuple(op::Add(), op::Add(op::GetTupleElement(), op::Broadcast()))); + + auto add = body->root_instruction()->operand(0); + auto bcast = body->root_instruction()->operand(1)->operand(1); + ASSERT_EQ(add->opcode(), HloOpcode::kAdd); + ASSERT_EQ(bcast->opcode(), HloOpcode::kBroadcast); + + EXPECT_THAT( + while_hlo->while_body()->root_instruction(), + op::Tuple(op::Add(op::Copy(), op::Constant()), + op::Add(op::GetTupleElement(), op::Broadcast(op::Copy())))); + + // Both init indices need copies as they are constants. + EXPECT_THAT(while_hlo->operand(0), + op::Tuple(op::Copy(op::Constant()), op::Copy(op::Constant()))); } // Tests while body computation with read-only tuple element 0: @@ -768,33 +852,26 @@ TEST_F(WhileCopyInsertionTest, DependentTupleElements) { // // CopyInsertion pass should not generate any copies for the while body. TEST_F(WhileCopyInsertionTest, DependentTupleElements_OneReadOnly) { - auto condition = module_->AddEmbeddedComputation(BuildConditionComputation()); + auto condition = module_->AddEmbeddedComputation( + BuildConditionComputation(loop_state_shape_)); auto body = module_->AddEmbeddedComputation( BuildDependentBodyOneReadOnlyComputation()); - auto while_hlo = BuildWhileInstruction(condition, body); + BuildWhileInstruction(condition, body); - const HloInstruction* old_init = while_hlo->operand(0); - HloInstruction* old_root = body->root_instruction(); InsertCopies(module_.get()); - HloInstruction* new_root = body->root_instruction(); - const HloInstruction* new_init = while_hlo->operand(0); - - // No copies should be inserted in the body, so root should not be updated. - EXPECT_EQ(old_root, new_root); - // Both indices need copies, even though Index 0 is read-only, since both are - // constants, which must be copied. - EXPECT_THAT(new_init, op::Tuple(op::Copy(old_init->operand(0)), - op::Copy(old_init->operand(1)))); + // No copies or control edges should be inserted. The body is legal as is. + EXPECT_EQ(CountCopies(*body), 0); + EXPECT_EQ(CountControlEdges(*body), 0); } // Same as above, but with two while loops, sharing entry parameters. TEST_F(WhileCopyInsertionTest, DependentTupleElements_OneReadOnly_TwoLoops_EntryParams) { - auto condition1 = - module_->AddEmbeddedComputation(BuildConditionComputation()); - auto condition2 = - module_->AddEmbeddedComputation(BuildConditionComputation()); + auto condition1 = module_->AddEmbeddedComputation( + BuildConditionComputation(loop_state_shape_)); + auto condition2 = module_->AddEmbeddedComputation( + BuildConditionComputation(loop_state_shape_)); auto body1 = module_->AddEmbeddedComputation( BuildDependentBodyOneReadOnlyComputation()); auto body2 = module_->AddEmbeddedComputation( @@ -812,30 +889,46 @@ TEST_F(WhileCopyInsertionTest, loop_state_shape_, condition1, body1, loop_init)); auto while_hlo2 = builder.AddInstruction(HloInstruction::CreateWhile( loop_state_shape_, condition2, body2, loop_init)); - module_->AddEntryComputation(builder.Build()); + + // Add a couple elements from each of the while so both whiles are live. + auto gte1 = builder.AddInstruction(HloInstruction::CreateGetTupleElement( + ShapeUtil::GetSubshape(while_hlo1->shape(), {0}), while_hlo1, 0)); + auto gte2 = builder.AddInstruction(HloInstruction::CreateGetTupleElement( + ShapeUtil::GetSubshape(while_hlo2->shape(), {0}), while_hlo2, 0)); + builder.AddInstruction( + HloInstruction::CreateBinary(gte1->shape(), HloOpcode::kAdd, gte1, gte2)); + + auto entry = module_->AddEntryComputation(builder.Build()); InsertCopies(module_.get()); - // Both while loops alias iter_param, since index 0 is read-only in the body. - EXPECT_EQ(while_hlo1->operand(0)->operand(0), - while_hlo2->operand(0)->operand(0)); - EXPECT_EQ(while_hlo1->operand(0)->operand(0), iter_param); + // Neither body should have any copies or control edges in them. + EXPECT_EQ(CountCopies(*body1), 0); + EXPECT_EQ(CountCopies(*body2), 0); + EXPECT_EQ(CountControlEdges(*body1), 0); + EXPECT_EQ(CountControlEdges(*body2), 0); - // Each while loop gets its own copy of data_param, since index 1 is not - // read-only in the body. + // Only two copies should be necessary. Each of the whiles should have + // a copy of tuple element 1 (init value is a parameter, and the element is + // not non-read-only) so each of the while bodies gets its own buffer to write + // element 1 into. + EXPECT_EQ(CountCopies(*entry), 2); + + EXPECT_EQ(while_hlo1->operand(0)->operand(1)->opcode(), HloOpcode::kCopy); + EXPECT_EQ(while_hlo2->operand(0)->operand(1)->opcode(), HloOpcode::kCopy); + + // The two copies of element 1 should be different. EXPECT_NE(while_hlo1->operand(0)->operand(1), while_hlo2->operand(0)->operand(1)); - EXPECT_THAT(while_hlo1->operand(0)->operand(1), op::Copy(data_param)); - EXPECT_THAT(while_hlo2->operand(0)->operand(1), op::Copy(data_param)); } // Same as above, but with two while loops, sharing non-parameters. TEST_F(WhileCopyInsertionTest, DependentTupleElements_OneReadOnly_TwoLoops_NonParams) { - auto condition1 = - module_->AddEmbeddedComputation(BuildConditionComputation()); - auto condition2 = - module_->AddEmbeddedComputation(BuildConditionComputation()); + auto condition1 = module_->AddEmbeddedComputation( + BuildConditionComputation(loop_state_shape_)); + auto condition2 = module_->AddEmbeddedComputation( + BuildConditionComputation(loop_state_shape_)); auto body1 = module_->AddEmbeddedComputation( BuildDependentBodyOneReadOnlyComputation()); auto body2 = module_->AddEmbeddedComputation( @@ -858,21 +951,28 @@ TEST_F(WhileCopyInsertionTest, loop_state_shape_, condition1, body1, loop_init)); auto while_hlo2 = builder.AddInstruction(HloInstruction::CreateWhile( loop_state_shape_, condition2, body2, loop_init)); - module_->AddEntryComputation(builder.Build()); + + // Add a couple elements from each of the while so both whiles are not dead. + auto gte1 = builder.AddInstruction(HloInstruction::CreateGetTupleElement( + ShapeUtil::GetSubshape(while_hlo1->shape(), {0}), while_hlo1, 0)); + auto gte2 = builder.AddInstruction(HloInstruction::CreateGetTupleElement( + ShapeUtil::GetSubshape(while_hlo2->shape(), {0}), while_hlo2, 0)); + builder.AddInstruction( + HloInstruction::CreateBinary(gte1->shape(), HloOpcode::kAdd, gte1, gte2)); + auto entry = module_->AddEntryComputation(builder.Build()); InsertCopies(module_.get()); - // No copies of iter_value are necessary, since index 0 is read-only in both - // while bodies. - EXPECT_EQ(while_hlo1->operand(0)->operand(0), iter_value); - EXPECT_EQ(while_hlo2->operand(0)->operand(0), iter_value); + // Ideally only one copy should be necessary. One of the whiles should + // have a copy of tuple element 1 (the non-read-only element) so each of the + // while bodies gets its own buffer to write element 1 into. However, the + // analysis isn't perfect and adds an additional copy of element 0. + EXPECT_EQ(CountCopies(*entry), 2); - // Each while loop gets its own copy of data_value, since index 1 is not - // read-only in the body. - EXPECT_NE(while_hlo1->operand(0)->operand(1), - while_hlo2->operand(0)->operand(1)); - EXPECT_THAT(while_hlo1->operand(0)->operand(1), op::Copy(data_value)); - EXPECT_THAT(while_hlo2->operand(0)->operand(1), op::Copy(data_value)); + EXPECT_THAT(while_hlo1->operand(0), + op::Tuple(op::Exp(), op::Copy(op::Exp()))); + EXPECT_THAT(while_hlo2->operand(0), + op::Tuple(op::Exp(), op::Copy(op::Exp()))); } // Tests while body computation with nested tuple elements: @@ -905,18 +1005,34 @@ TEST_F(WhileCopyInsertionTest, // Tuple // new root // TEST_F(WhileCopyInsertionTest, NestedTupleElements) { - auto condition = - module_->AddEmbeddedComputation(BuildConditionComputation(true)); + auto condition = module_->AddEmbeddedComputation( + BuildConditionComputation(nested_loop_state_shape_)); auto body = module_->AddEmbeddedComputation(BuildNestedBodyComputation()); BuildWhileInstruction(condition, body, true); - HloInstruction* old_root = body->root_instruction(); + // HloInstruction* old_root = body->root_instruction(); InsertCopies(module_.get()); - EXPECT_THAT(body->root_instruction(), - op::Tuple(old_root->operand(0), - op::Tuple(old_root->operand(1)->operand(0), - op::Copy(old_root->operand(1)->operand(1))))); + // The only copy necessary is for the kReverse as it cannot be done + // in-place (instruction can share buffer with operand). The other elements of + // the loop state are kAdd instructions which can be done in-place. + EXPECT_EQ(CountCopies(*body), 1); + + // Each element of the init needs a copy as all are constants. + EXPECT_EQ(CountCopies(*module_), 4); + + // Either the kReverse itself must be copied or the operand of the kReverse + // must be copied. + if (body->root_instruction()->operand(1)->operand(1)->opcode() == + HloOpcode::kCopy) { + EXPECT_THAT( + body->root_instruction(), + op::Tuple(op::Add(), op::Tuple(op::Add(), op::Copy(op::Reverse())))); + } else { + EXPECT_THAT( + body->root_instruction(), + op::Tuple(op::Add(), op::Tuple(op::Add(), op::Reverse(op::Copy())))); + } } // Tests while init instruction which points-to a constant. @@ -927,11 +1043,13 @@ TEST_F(WhileCopyInsertionTest, NestedTupleElements) { // TEST_F(WhileCopyInsertionTest, InitPointsToConstant) { auto while_hlo = BuildWhileInstruction_InitPointsToConstant(); - auto old_init = while_hlo->operand(0); + InsertCopies(module_.get()); + EXPECT_EQ(CountCopies(*while_hlo->while_body()), 0); + EXPECT_EQ(CountCopies(*module_), 2); - EXPECT_THAT(while_hlo->operand(0), op::Tuple(op::Copy(old_init->operand(0)), - op::Copy(old_init->operand(1)))); + EXPECT_THAT(while_hlo->operand(0), + op::Tuple(op::Copy(op::Constant()), op::Copy(op::Constant()))); } // Tests while init instruction which points-to a parameter. @@ -942,11 +1060,13 @@ TEST_F(WhileCopyInsertionTest, InitPointsToConstant) { // TEST_F(WhileCopyInsertionTest, InitPointsToParameter) { auto while_hlo = BuildWhileInstruction_InitPointsToParameter(); - auto old_init = while_hlo->operand(0); + InsertCopies(module_.get()); + EXPECT_EQ(CountCopies(*while_hlo->while_body()), 0); + EXPECT_EQ(CountCopies(*module_), 2); - EXPECT_THAT(while_hlo->operand(0), op::Tuple(op::Copy(old_init->operand(0)), - op::Copy(old_init->operand(1)))); + EXPECT_THAT(while_hlo->operand(0), + op::Tuple(op::Copy(op::Constant()), op::Copy(op::Parameter()))); } // Tests while init instruction which has an ambiguous points-to set. @@ -975,15 +1095,34 @@ TEST_F(WhileCopyInsertionTest, InitPointsToParameter) { // TEST_F(WhileCopyInsertionTest, InitPointsToAmbiguous) { auto while_hlo = BuildWhileInstruction_InitPointsToAmbiguous(); - auto old_init = while_hlo->operand(0); - InsertCopies(module_.get()); - EXPECT_THAT( - while_hlo->operand(0), - op::Tuple( - op::Copy(old_init->operand(0)), - op::Tuple(op::Copy(op::GetTupleElement(old_init->operand(1))), - op::Copy(op::GetTupleElement(old_init->operand(1)))))); + InsertCopies(module_.get()); + EXPECT_EQ(CountCopies(*module_), 4); + // The entry computation requires three copies to resolve the ambiguity of two + // init elements and the constant passed in as one of the init elements. + EXPECT_EQ(CountCopies(*module_->entry_computation()), 3); + EXPECT_THAT(while_hlo->operand(0), + op::Tuple(op::Copy(op::Constant()), + op::Tuple(op::Copy(op::GetTupleElement()), + op::Copy(op::GetTupleElement())))); + + // The body requires one copy because the buffer set is not distinct: the + // result of one of the adds is written into two elements of the output of the + // loop body. Either element might be copied. + EXPECT_EQ(CountCopies(*while_hlo->while_body()), 1); + if (while_hlo->while_body() + ->root_instruction() + ->operand(1) + ->operand(0) + ->opcode() == HloOpcode::kCopy) { + EXPECT_THAT( + while_hlo->while_body()->root_instruction(), + op::Tuple(op::Add(), op::Tuple(op::Copy(op::Add()), op::Add()))); + } else { + EXPECT_THAT( + while_hlo->while_body()->root_instruction(), + op::Tuple(op::Add(), op::Tuple(op::Add(), op::Copy(op::Add())))); + } } // Tests while init instruction which has a non-distinct points-to set. @@ -1011,13 +1150,43 @@ TEST_F(WhileCopyInsertionTest, InitPointsToAmbiguous) { // TEST_F(WhileCopyInsertionTest, InitPointsToNonDistinct) { auto while_hlo = BuildWhileInstruction_InitPointsToNonDistinct(); - auto old_init = while_hlo->operand(0); + InsertCopies(module_.get()); - EXPECT_THAT(while_hlo->operand(0), - op::Tuple(op::Copy(old_init->operand(0)), - op::Tuple(op::Copy(old_init->operand(1)->operand(0)), - op::Copy(old_init->operand(1)->operand(0))))); + // The entry computation requires two copies to resolve the non-disinctness of + // two init elements and the constant passed in as one of the init + // elements. Either element can be copied for the distinctness issue. + EXPECT_EQ(CountCopies(*module_->entry_computation()), 2); + if (while_hlo->operand(0)->operand(1)->operand(0)->opcode() == + HloOpcode::kCopy) { + EXPECT_THAT( + while_hlo->operand(0), + op::Tuple(op::Copy(op::Constant()), + op::Tuple(op::Copy(op::Broadcast()), op::Broadcast()))); + } else { + EXPECT_THAT( + while_hlo->operand(0), + op::Tuple(op::Copy(op::Constant()), + op::Tuple(op::Broadcast(), op::Copy(op::Broadcast())))); + } + + // The body requires one copy because the buffer set is not distinct: the + // result of one of the adds is written into two elements of the output of the + // loop body. Either element might be copied. + EXPECT_EQ(CountCopies(*while_hlo->while_body()), 1); + if (while_hlo->while_body() + ->root_instruction() + ->operand(1) + ->operand(0) + ->opcode() == HloOpcode::kCopy) { + EXPECT_THAT( + while_hlo->while_body()->root_instruction(), + op::Tuple(op::Add(), op::Tuple(op::Copy(op::Add()), op::Add()))); + } else { + EXPECT_THAT( + while_hlo->while_body()->root_instruction(), + op::Tuple(op::Add(), op::Tuple(op::Add(), op::Copy(op::Add())))); + } } // Tests while init instruction buffer which interferes with while result @@ -1031,11 +1200,13 @@ TEST_F(WhileCopyInsertionTest, InitPointsToNonDistinct) { // TEST_F(WhileCopyInsertionTest, InitPointsToInterfering) { auto while_hlo = BuildWhileInstruction_InitPointsToInterfering(); - auto old_init = while_hlo->operand(0); + InsertCopies(module_.get()); + EXPECT_EQ(CountCopies(*module_), 2); + EXPECT_EQ(CountCopies(*while_hlo->while_body()), 0); - EXPECT_THAT(while_hlo->operand(0), op::Tuple(op::Copy(old_init->operand(0)), - op::Copy(old_init->operand(1)))); + EXPECT_THAT(while_hlo->operand(0), + op::Tuple(op::Copy(op::Constant()), op::Copy(op::Broadcast()))); } // Tests while init instruction buffer which has a non-distinct points-to set: @@ -1044,18 +1215,21 @@ TEST_F(WhileCopyInsertionTest, InitPointsToInterfering) { // Parameter(F32, {8}))) // // where the second and third parameters are identical *and* the tuple shared -// by another while instruction.. +// by another while instruction. // // Verifies that the resulting point-to set is distinct in the resulting Tuple // (non-identical Copys). In other words, verifies that copy sharing does not // insert identical copies to the resulting tuple. TEST_F(WhileCopyInsertionTest, InitPointsToNonDistinctUsedByTwoWhileLoops) { - auto condition1 = - module_->AddEmbeddedComputation(BuildConditionComputation()); - auto condition2 = - module_->AddEmbeddedComputation(BuildConditionComputation()); // Loop body that outputs tuple comprises two elements dependent on the init // tuple. + const Shape& loop_state_shape = ShapeUtil::MakeTupleShape( + {induction_variable_shape_, data_shape_, data_shape_}); + + auto condition1 = module_->AddEmbeddedComputation( + BuildConditionComputation(loop_state_shape)); + auto condition2 = module_->AddEmbeddedComputation( + BuildConditionComputation(loop_state_shape)); auto body1 = module_->AddEmbeddedComputation(BuildDependentBodyComputation2()); auto body2 = @@ -1072,8 +1246,6 @@ TEST_F(WhileCopyInsertionTest, InitPointsToNonDistinctUsedByTwoWhileLoops) { auto loop_init = builder.AddInstruction( HloInstruction::CreateTuple({iter_param, data_param, data_param})); - const Shape& loop_state_shape = ShapeUtil::MakeTupleShape( - {induction_variable_shape_, data_shape_, data_shape_}); // Two while loops shares the same loop init tuple. auto while_hlo1 = builder.AddInstruction(HloInstruction::CreateWhile( @@ -1081,43 +1253,478 @@ TEST_F(WhileCopyInsertionTest, InitPointsToNonDistinctUsedByTwoWhileLoops) { auto while_hlo2 = builder.AddInstruction(HloInstruction::CreateWhile( loop_state_shape, condition2, body2, loop_init)); - module_->AddEntryComputation(builder.Build()); + // Add add instruction so neither while is dead. + auto gte1 = builder.AddInstruction(HloInstruction::CreateGetTupleElement( + ShapeUtil::GetSubshape(while_hlo1->shape(), {0}), while_hlo1, 0)); + auto gte2 = builder.AddInstruction(HloInstruction::CreateGetTupleElement( + ShapeUtil::GetSubshape(while_hlo1->shape(), {0}), while_hlo2, 0)); + builder.AddInstruction( + HloInstruction::CreateBinary(gte1->shape(), HloOpcode::kAdd, gte1, gte2)); - auto points_to_analysis = - TuplePointsToAnalysis::Run(module_.get()).ConsumeValueOrDie(); + module_->AddEntryComputation(builder.Build()); - // Asserts that the init tuples before copy insertion is non-distinct. - ASSERT_FALSE( - points_to_analysis->GetPointsToSet(while_hlo1->operand(0)).IsDistinct()); - ASSERT_FALSE( - points_to_analysis->GetPointsToSet(while_hlo2->operand(0)).IsDistinct()); + InsertCopies(module_.get()); - auto old_init1 = while_hlo1->operand(0); - auto old_init2 = while_hlo2->operand(0); + // None of the bodies should have copies or control flow edges. + EXPECT_EQ(CountCopies(*body1), 0); + EXPECT_EQ(CountCopies(*body2), 0); - InsertCopies(module_.get()); + // The loop bodies pass through elements 1 and 2 in the init tuple, so ideally + // these should not need to be copied before either while. However, copy + // insertion is not able to reason about the transparency of elements through + // while bodies in all circumstances so extra copies are added (b/xxx). + EXPECT_EQ(CountCopies(*module_->entry_computation()), 2); EXPECT_THAT(while_hlo1->operand(0), - op::Tuple(op::Copy(old_init1->operand(0)), - op::Copy(old_init1->operand(1)), - op::Copy(old_init1->operand(2)))); - + op::Tuple(op::Copy(), op::Parameter(), op::Parameter())); EXPECT_THAT(while_hlo2->operand(0), - op::Tuple(op::Copy(old_init2->operand(0)), - op::Copy(old_init2->operand(1)), - op::Copy(old_init2->operand(2)))); - - // Verifies the init tuples after copy insertion is distinct. - points_to_analysis = - TuplePointsToAnalysis::Run(module_.get()).ConsumeValueOrDie(); - const auto& points_to1 = - points_to_analysis->GetPointsToSet(while_hlo1->operand(0)); - EXPECT_TRUE(points_to1.IsDistinct()); - - const auto& points_to2 = - points_to_analysis->GetPointsToSet(while_hlo2->operand(0)); - EXPECT_TRUE(points_to2.IsDistinct()); + op::Tuple(op::Copy(), op::Parameter(), op::Parameter())); } +TEST_F(CopyInsertionTest, SwizzlingWhile) { + // Test a while instruction with a body which permutes its tuple parameter + // elements. + auto module = CreateNewModule(); + const Shape loop_state_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + // Body simply interchanges the two tuple elements in the loop state. + auto body_builder = HloComputation::Builder("body"); + auto body_param = body_builder.AddInstruction( + HloInstruction::CreateParameter(0, loop_state_shape, "param")); + auto body_element_0 = body_builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 0)); + auto body_element_1 = body_builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 1)); + body_builder.AddInstruction( + HloInstruction::CreateTuple({body_element_1, body_element_0})); + HloComputation* body = module->AddEmbeddedComputation(body_builder.Build()); + + auto cond_builder = HloComputation::Builder("condition"); + cond_builder.AddInstruction( + HloInstruction::CreateParameter(0, loop_state_shape, "param")); + auto cond_constant = cond_builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(false))); + cond_builder.AddInstruction(HloInstruction::CreateUnary( + cond_constant->shape(), HloOpcode::kNot, cond_constant)); + HloComputation* condition = + module->AddEmbeddedComputation(cond_builder.Build()); + + auto builder = HloComputation::Builder(TestName()); + auto constant1 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(1.0))); + auto constant2 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(2.0))); + auto tuple = builder.AddInstruction( + HloInstruction::CreateTuple({constant1, constant2})); + auto xla_while = builder.AddInstruction( + HloInstruction::CreateWhile(loop_state_shape, condition, body, tuple)); + module->AddEntryComputation(builder.Build()); + + InsertCopies(module.get()); + + EXPECT_EQ(CountCopies(*module), 6); + + // The loop state elements should be copied at the parameter and at the root + // with a control edge in between (see DeepCopyAndAddControlEdges). This is + // technically one more copy than is strictly necessary, but in order to have + // only three copies the copies of different loop state elements must be + // ordered with a control edge. + EXPECT_EQ(CountCopies(*body), 4); + EXPECT_EQ(CountControlEdges(*body), 2); + + EXPECT_THAT(body->root_instruction(), + op::Tuple(op::Copy(op::Copy()), op::Copy(op::Copy()))); + + EXPECT_EQ(CountCopies(*module->entry_computation()), 2); + EXPECT_THAT(xla_while->operand(0), op::Tuple(op::Copy(), op::Copy())); +} + +TEST_F(CopyInsertionTest, SwizzlingWhileWithOneOp) { + // Test a while instruction with a body which permutes its tuple parameter + // elements and applies one operation to one of the elements. The addition of + // the operation (instruction) on the element makes the live range of the + // respective input and output elements different than if the instruction were + // not there (as in the SwizzlingWhile test above). + auto module = CreateNewModule(); + const Shape loop_state_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + // Body interchanges the two tuple elements in the loop state and negates one + // of them. + auto body_builder = HloComputation::Builder("body"); + auto body_param = body_builder.AddInstruction( + HloInstruction::CreateParameter(0, loop_state_shape, "param")); + auto body_element_0 = body_builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 0)); + auto body_element_1 = body_builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 1)); + auto negate = body_builder.AddInstruction(HloInstruction::CreateUnary( + scalar_shape_, HloOpcode::kNegate, body_element_1)); + body_builder.AddInstruction( + HloInstruction::CreateTuple({negate, body_element_0})); + HloComputation* body = module->AddEmbeddedComputation(body_builder.Build()); + + auto cond_builder = HloComputation::Builder("condition"); + cond_builder.AddInstruction( + HloInstruction::CreateParameter(0, loop_state_shape, "param")); + auto cond_constant = cond_builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(false))); + cond_builder.AddInstruction(HloInstruction::CreateUnary( + cond_constant->shape(), HloOpcode::kNot, cond_constant)); + HloComputation* condition = + module->AddEmbeddedComputation(cond_builder.Build()); + + auto builder = HloComputation::Builder(TestName()); + auto constant1 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(1.0))); + auto constant2 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(2.0))); + auto tuple = builder.AddInstruction( + HloInstruction::CreateTuple({constant1, constant2})); + auto xla_while = builder.AddInstruction( + HloInstruction::CreateWhile(loop_state_shape, condition, body, tuple)); + module->AddEntryComputation(builder.Build()); + + InsertCopies(module.get()); + + EXPECT_EQ(CountCopies(*module), 6); + + // The loop state elements should be copied at the parameter and at the root + // with a control edge in between (see DeepCopyAndAddControlEdges). + EXPECT_EQ(CountCopies(*body), 4); + EXPECT_EQ(CountControlEdges(*body), 2); + + EXPECT_THAT( + body->root_instruction(), + op::Tuple(op::Copy(op::Negate(op::Copy())), op::Copy(op::Copy()))); + + EXPECT_EQ(CountCopies(*module->entry_computation()), 2); + EXPECT_THAT(xla_while->operand(0), op::Tuple(op::Copy(), op::Copy())); +} + +TEST_F(CopyInsertionTest, SwizzlingWhileSharedInput) { + // Test a while instruction with a body which permutes it's tuple parameter + // elements similar to SwizzlinWhile above. However, in this test the input to + // the while body is a single constant (both loop state elements are the same + // constant). This means no copies are necessary because both loop state + // elements are the same so interchanging them is a no-op. + auto module = CreateNewModule(); + const Shape loop_state_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + // Body simply interchanges the two tuple elements in the loop state. + auto body_builder = HloComputation::Builder("body"); + auto body_param = body_builder.AddInstruction( + HloInstruction::CreateParameter(0, loop_state_shape, "param")); + auto body_element_0 = body_builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 0)); + auto body_element_1 = body_builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 1)); + body_builder.AddInstruction( + HloInstruction::CreateTuple({body_element_1, body_element_0})); + HloComputation* body = module->AddEmbeddedComputation(body_builder.Build()); + + auto cond_builder = HloComputation::Builder("condition"); + cond_builder.AddInstruction( + HloInstruction::CreateParameter(0, loop_state_shape, "param")); + auto cond_constant = cond_builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(false))); + cond_builder.AddInstruction(HloInstruction::CreateUnary( + cond_constant->shape(), HloOpcode::kNot, cond_constant)); + HloComputation* condition = + module->AddEmbeddedComputation(cond_builder.Build()); + + auto builder = HloComputation::Builder(TestName()); + auto constant = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(1.0))); + auto tuple = + builder.AddInstruction(HloInstruction::CreateTuple({constant, constant})); + builder.AddInstruction( + HloInstruction::CreateWhile(loop_state_shape, condition, body, tuple)); + module->AddEntryComputation(builder.Build()); + + InsertCopies(module.get()); + + EXPECT_EQ(CountCopies(*module), 2); + EXPECT_EQ(CountCopies(*body), 0); + + EXPECT_EQ(CountCopies(*module->entry_computation()), 2); + EXPECT_THAT(module->entry_computation()->root_instruction(), + op::Tuple(op::Copy(), op::Copy())); +} + +TEST_F(CopyInsertionTest, SequentialWhiles) { + // Construct a computation with a series of sequential while instructions + // containing four loop state elements: + // + // element 0 is passed to each while directly from an entry parameter. + // + // element 1 is passed transparently in series through all the while bodies. + // + // element 2 is negated in each while body. (in-place possible) + // + // element 3 is reversed in each while body. (in-place not possible) + // + const Shape element_shape = ShapeUtil::MakeShape(F32, {42}); + const Shape loop_state_shape = ShapeUtil::MakeTupleShape( + {element_shape, element_shape, element_shape, element_shape}); + + auto module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + auto param_0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, element_shape, "param_0")); + auto param_1 = builder.AddInstruction( + HloInstruction::CreateParameter(1, element_shape, "param_1")); + auto param_2 = builder.AddInstruction( + HloInstruction::CreateParameter(2, element_shape, "param_2")); + auto param_3 = builder.AddInstruction( + HloInstruction::CreateParameter(3, element_shape, "param_3")); + + // The number of sequential kWhile instructions. + const int kNumWhiles = 3; + + HloInstruction* prev_element_1 = param_1; + HloInstruction* prev_element_2 = param_2; + HloInstruction* prev_element_3 = param_3; + + // Vector containing all of the while instructions. + std::vector whiles; + for (int i = 0; i < kNumWhiles; ++i) { + auto body_builder = HloComputation::Builder("body"); + auto body_param = body_builder.AddInstruction( + HloInstruction::CreateParameter(0, loop_state_shape, "param")); + auto body_element_0 = body_builder.AddInstruction( + HloInstruction::CreateGetTupleElement(element_shape, body_param, 0)); + auto body_element_1 = body_builder.AddInstruction( + HloInstruction::CreateGetTupleElement(element_shape, body_param, 1)); + auto body_element_2 = body_builder.AddInstruction( + HloInstruction::CreateGetTupleElement(element_shape, body_param, 2)); + auto body_element_3 = body_builder.AddInstruction( + HloInstruction::CreateGetTupleElement(element_shape, body_param, 3)); + auto negate = body_builder.AddInstruction(HloInstruction::CreateUnary( + element_shape, HloOpcode::kNegate, body_element_2)); + auto reverse = body_builder.AddInstruction( + HloInstruction::CreateReverse(element_shape, body_element_3, {0})); + body_builder.AddInstruction(HloInstruction::CreateTuple( + {body_element_0, body_element_1, negate, reverse})); + HloComputation* body = module->AddEmbeddedComputation(body_builder.Build()); + + auto cond_builder = HloComputation::Builder("condition"); + cond_builder.AddInstruction( + HloInstruction::CreateParameter(0, loop_state_shape, "param")); + auto cond_constant = cond_builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(false))); + cond_builder.AddInstruction(HloInstruction::CreateUnary( + cond_constant->shape(), HloOpcode::kNot, cond_constant)); + HloComputation* condition = + module->AddEmbeddedComputation(cond_builder.Build()); + + auto while_init = builder.AddInstruction(HloInstruction::CreateTuple( + {param_0, prev_element_1, prev_element_2, prev_element_3})); + + auto xla_while = builder.AddInstruction(HloInstruction::CreateWhile( + loop_state_shape, condition, body, while_init)); + whiles.push_back(xla_while); + if (i != kNumWhiles - 1) { + prev_element_1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(element_shape, xla_while, 1)); + prev_element_2 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(element_shape, xla_while, 2)); + prev_element_3 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(element_shape, xla_while, 3)); + } + } + + module->AddEntryComputation(builder.Build()); + + InsertCopies(module.get()); + + // Each while body has one copy. And each loop state element is copied once in + // the entry computation. + EXPECT_EQ(CountCopies(*module), 4 + kNumWhiles); + + // Each while body should have exactly one copy for element three which is an + // op (kReverse) which cannot be done in place. + for (const HloInstruction* xla_while : whiles) { + EXPECT_EQ(CountCopies(*xla_while->while_body()), 1); + } + + EXPECT_THAT(whiles[0]->operand(0), op::Tuple(op::Parameter(), op::Parameter(), + op::Copy(), op::Copy())); + EXPECT_THAT(module->entry_computation()->root_instruction(), + op::Tuple(op::Copy(), op::Copy(), op::GetTupleElement(), + op::GetTupleElement())); +} + +TEST_F(CopyInsertionTest, WhileBodyWithConstantRoot) { + // Test a while body and condition which are each simply a constant (root of + // computation is a constant). The body constant should be copied. + auto module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + auto param_0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape_, "param_0")); + + auto body_builder = HloComputation::Builder("body"); + body_builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape_, "param")); + body_builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(123.0))); + HloComputation* body = module->AddEmbeddedComputation(body_builder.Build()); + + auto cond_builder = HloComputation::Builder("condition"); + cond_builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape_, "param")); + cond_builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(false))); + HloComputation* condition = + module->AddEmbeddedComputation(cond_builder.Build()); + + auto xla_while = builder.AddInstruction( + HloInstruction::CreateWhile(scalar_shape_, condition, body, param_0)); + + module->AddEntryComputation(builder.Build()); + + InsertCopies(module.get()); + + EXPECT_EQ(CountCopies(*module), 2); + + EXPECT_THAT(xla_while->operand(0), op::Copy(op::Parameter())); + EXPECT_THAT(body->root_instruction(), op::Copy(op::Constant())); + EXPECT_THAT(condition->root_instruction(), op::Constant()); +} + +std::unique_ptr MakeTrivialCondition(const Shape& shape) { + auto builder = HloComputation::Builder("trivial_condition"); + builder.AddInstruction( + HloInstruction::CreateParameter(0, shape, "loop_state")); + auto constant = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(false))); + builder.AddInstruction(HloInstruction::CreateUnary( + constant->shape(), HloOpcode::kNot, constant)); + return builder.Build(); +} + +std::unique_ptr MakeBenchmarkWhileBody() { + auto builder = HloComputation::Builder("benchmark_loop_body"); + const Shape element_shape = ShapeUtil::MakeShape(F32, {42}); + const Shape loop_state_shape = + ShapeUtil::MakeTupleShape({element_shape, element_shape, element_shape}); + HloInstruction* param = builder.AddInstruction( + HloInstruction::CreateParameter(0, loop_state_shape, "loop_state")); + HloInstruction* element_0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(element_shape, param, 0)); + HloInstruction* element_1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(element_shape, param, 1)); + HloInstruction* element_2 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(element_shape, param, 2)); + + HloInstruction* rev_1 = builder.AddInstruction( + HloInstruction::CreateReverse(element_shape, element_1, {0})); + HloInstruction* add_1_2 = builder.AddInstruction(HloInstruction::CreateBinary( + element_shape, HloOpcode::kAdd, element_1, element_2)); + + builder.AddInstruction( + HloInstruction::CreateTuple({element_0, rev_1, add_1_2})); + return builder.Build(); +} + +void BM_SequentialWhiles(int num_iters, int num_whiles) { + // This benchmark constructs a chain of sequential while instructions. + tensorflow::testing::StopTiming(); + for (int i = 0; i < num_iters; ++i) { + HloModuleConfig config; + config.set_debug_options(legacy_flags::GetDebugOptionsFromFlags()); + HloModule module("BM_SequentialWhiles", VersionedComputationHandle(), + config); + + auto builder = HloComputation::Builder("BM_SequentialWhiles"); + HloInstruction* x = builder.AddInstruction(HloInstruction::CreateParameter( + 0, ShapeUtil::MakeShape(F32, {42}), "x")); + HloInstruction* y = builder.AddInstruction(HloInstruction::CreateParameter( + 1, ShapeUtil::MakeShape(F32, {42}), "y")); + HloInstruction* z = builder.AddInstruction(HloInstruction::CreateParameter( + 2, ShapeUtil::MakeShape(F32, {42}), "z")); + HloInstruction* init = + builder.AddInstruction(HloInstruction::CreateTuple({x, y, z})); + + HloInstruction* prev_loop_state = init; + for (int w = 0; w < num_whiles; ++w) { + HloComputation* condition = + module.AddEmbeddedComputation(MakeTrivialCondition(init->shape())); + HloComputation* body = + module.AddEmbeddedComputation(MakeBenchmarkWhileBody()); + prev_loop_state = builder.AddInstruction(HloInstruction::CreateWhile( + init->shape(), condition, body, prev_loop_state)); + } + module.AddEntryComputation(builder.Build()); + + CopyInsertion copy_insertion; + + tensorflow::testing::StartTiming(); + ASSERT_IS_OK(copy_insertion.Run(&module).status()); + tensorflow::testing::StopTiming(); + + // The entry computation should have three copies, and each body has one. + ASSERT_EQ(CountCopies(module), 3 + num_whiles); + } +} + +void BM_ParallelWhiles(int num_iters, int num_whiles) { + // This benchmark constructs a fan-out of parallel while instructions. + tensorflow::testing::StopTiming(); + for (int i = 0; i < num_iters; ++i) { + HloModuleConfig config; + config.set_debug_options(legacy_flags::GetDebugOptionsFromFlags()); + HloModule module("BM_SequentialWhiles", VersionedComputationHandle(), + config); + + auto builder = HloComputation::Builder("BM_ParallelWhiles"); + HloInstruction* x = builder.AddInstruction(HloInstruction::CreateParameter( + 0, ShapeUtil::MakeShape(F32, {42}), "x")); + HloInstruction* y = builder.AddInstruction(HloInstruction::CreateParameter( + 1, ShapeUtil::MakeShape(F32, {42}), "y")); + HloInstruction* z = builder.AddInstruction(HloInstruction::CreateParameter( + 2, ShapeUtil::MakeShape(F32, {42}), "z")); + HloInstruction* init = + builder.AddInstruction(HloInstruction::CreateTuple({x, y, z})); + + HloInstruction* sum = nullptr; + for (int w = 0; w < num_whiles; ++w) { + HloComputation* condition = + module.AddEmbeddedComputation(MakeTrivialCondition(init->shape())); + HloComputation* body = + module.AddEmbeddedComputation(MakeBenchmarkWhileBody()); + + HloInstruction* xla_while = builder.AddInstruction( + HloInstruction::CreateWhile(init->shape(), condition, body, init)); + + if (sum == nullptr) { + sum = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(x->shape(), xla_while, 0)); + } else { + HloInstruction* element_0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(x->shape(), xla_while, 0)); + sum = builder.AddInstruction(HloInstruction::CreateBinary( + x->shape(), HloOpcode::kAdd, sum, element_0)); + } + } + module.AddEntryComputation(builder.Build()); + + CopyInsertion copy_insertion; + + tensorflow::testing::StartTiming(); + ASSERT_IS_OK(copy_insertion.Run(&module).status()); + tensorflow::testing::StopTiming(); + + // Each body receives of copy of two of the parameters (the corresponding + // elements in the body are modifed), and there is one copy in each body. + ASSERT_EQ(CountCopies(module), 3 * num_whiles); + } +} + +BENCHMARK(BM_SequentialWhiles)->Arg(512)->Arg(1024)->Arg(2048)->Arg(4096); +BENCHMARK(BM_ParallelWhiles)->Arg(512)->Arg(1024)->Arg(2048)->Arg(4096); + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 8005cfac8c..e1eed498f6 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -79,6 +79,7 @@ cc_library( deps = [ ":compiler_functor", ":conv_canonicalization", + ":cpu_copy_insertion", ":cpu_executable", ":cpu_instruction_fusion", ":cpu_options", @@ -103,7 +104,6 @@ cc_library( "//tensorflow/compiler/xla/service:buffer_assignment", "//tensorflow/compiler/xla/service:buffer_liveness", "//tensorflow/compiler/xla/service:call_inliner", - "//tensorflow/compiler/xla/service:copy_insertion", "//tensorflow/compiler/xla/service:executable", "//tensorflow/compiler/xla/service:flatten_call_graph", "//tensorflow/compiler/xla/service:hlo", @@ -751,6 +751,38 @@ cc_library( ], ) +cc_library( + name = "cpu_copy_insertion", + srcs = ["cpu_copy_insertion.cc"], + hdrs = ["cpu_copy_insertion.h"], + deps = [ + "//tensorflow/compiler/xla/service:copy_insertion", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_pass", + "//tensorflow/core:lib", + ], +) + +tf_cc_test( + name = "cpu_copy_insertion_test", + srcs = ["cpu_copy_insertion_test.cc"], + deps = [ + ":cpu_copy_insertion", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/legacy_flags:debug_options_flags", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_graph_dumper", + "//tensorflow/compiler/xla/service:hlo_matchers", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:test", + ], +) + # ----------------------------------------------------------------------------- filegroup( diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index 88f7e7a93f..56940b8d63 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -46,9 +46,9 @@ limitations under the License. #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/buffer_liveness.h" #include "tensorflow/compiler/xla/service/call_inliner.h" -#include "tensorflow/compiler/xla/service/copy_insertion.h" #include "tensorflow/compiler/xla/service/cpu/compiler_functor.h" #include "tensorflow/compiler/xla/service/cpu/conv_canonicalization.h" +#include "tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h" #include "tensorflow/compiler/xla/service/cpu/cpu_executable.h" #include "tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h" #include "tensorflow/compiler/xla/service/cpu/cpu_options.h" @@ -332,15 +332,16 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) { // (and sometime after) copy insertion, to avoid dead code from interfering // with the rewrites. pipeline.AddPass(); - pipeline.AddPass(); + pipeline.AddPass(); + pipeline.AddPass(); if (options::CpuParallelBackendRequested(module->config())) { // Re-run the outlining, in case any copies were inserted into the entry // computation. pipeline.AddPass(max_parallelism, ShapeSizeBytesFunction()); + pipeline.AddPass(); } pipeline.AddPass(); - pipeline.AddPass(); return pipeline.Run(module).status(); } diff --git a/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.cc b/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.cc new file mode 100644 index 0000000000..baaacd2ecc --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.cc @@ -0,0 +1,43 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h" + +#include +#include +#include + +#include "tensorflow/compiler/xla/service/copy_insertion.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/logging.h" + +namespace xla { + +StatusOr CpuCopyInsertion::Run(HloModule* module) { + CopyInsertion generic_copy_insertion; + + TF_ASSIGN_OR_RETURN(bool generic_changed, generic_copy_insertion.Run(module)); + + // The CPU backend needs additional copies added due to deficiencies in + // buffer assignment. + TF_ASSIGN_OR_RETURN(bool buffer_assignment_changed, + CopyInsertion::AddCopiesForBufferAssignment(module)); + + return generic_changed || buffer_assignment_changed; +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h b/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h new file mode 100644 index 0000000000..3313d1e6eb --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h @@ -0,0 +1,42 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_COPY_INSERTION_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_COPY_INSERTION_H_ + +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_pass_interface.h" + +namespace xla { + +// Besides the modifications made by the generic xla::CopyInsertion, this +// CPU-specific copy insertion pass also adds copies to values live out of +// computations satisfying certain conditions (defined by constant or parameter, +// etc). This is necessary because of deficiencies of buffer +// assignment. Specifically, buffer assignment is computation-scoped and does +// not recognized aliasing between arguments and outputs of computations. +// +// TODO(b/62548313): Remove this when buffer assignment is smarter +// (module-scoped). +class CpuCopyInsertion : public HloPassInterface { + public: + tensorflow::StringPiece name() const override { return "copy-insertion"; } + + StatusOr Run(HloModule* module) override; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_COPY_INSERTION_H_ diff --git a/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion_test.cc new file mode 100644 index 0000000000..a05a269417 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion_test.cc @@ -0,0 +1,139 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h" + +#include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_matchers.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test_benchmark.h" + +namespace xla { +namespace { + +namespace op = xla::testing::opcode_matchers; + +int64 CountCopies(const HloComputation& computation) { + int64 count = 0; + for (const auto& instruction : computation.instructions()) { + if (instruction->opcode() == HloOpcode::kCopy) { + count++; + } + } + return count; +} + +int64 CountCopies(const HloModule& module) { + int64 count = 0; + for (const auto& computation : module.computations()) { + count += CountCopies(*computation); + } + return count; +} + +class CpuCopyInsertionTest : public HloTestBase { + protected: + void InsertCopies(HloModule* module) { + CpuCopyInsertion copy_insertion; + ASSERT_IS_OK(copy_insertion.Run(module).status()); + } + + const Shape scalar_shape_ = ShapeUtil::MakeShape(F32, {}); +}; + +TEST_F(CpuCopyInsertionTest, WhileBodyWithConstantRoot) { + // Test a while body and condition which are each simply a constant (root of + // computation is a constant). Each constant should be copied. + auto module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + auto param_0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape_, "param_0")); + + auto body_builder = HloComputation::Builder("body"); + body_builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape_, "param")); + body_builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(123.0))); + HloComputation* body = module->AddEmbeddedComputation(body_builder.Build()); + + auto cond_builder = HloComputation::Builder("condition"); + cond_builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape_, "param")); + cond_builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(false))); + HloComputation* condition = + module->AddEmbeddedComputation(cond_builder.Build()); + + auto xla_while = builder.AddInstruction( + HloInstruction::CreateWhile(scalar_shape_, condition, body, param_0)); + + module->AddEntryComputation(builder.Build()); + + InsertCopies(module.get()); + + EXPECT_EQ(CountCopies(*module), 3); + + EXPECT_THAT(xla_while->operand(0), op::Copy(op::Parameter())); + EXPECT_THAT(body->root_instruction(), op::Copy(op::Constant())); + EXPECT_THAT(condition->root_instruction(), op::Copy(op::Constant())); +} + +TEST_F(CpuCopyInsertionTest, TupleCall) { + // Test a kCall instruction which calls a computation which produces a three + // element tuple: one is a constant, one is a parameter, and one is produced + // in the computation. The constant and parameter should be copied. + auto module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape_, "param_0")); + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_, scalar_shape_}); + + auto sub_builder = HloComputation::Builder("subcomputation"); + auto sub_param = sub_builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape_, "param")); + auto constant = sub_builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(123.0))); + auto add = sub_builder.AddInstruction(HloInstruction::CreateBinary( + scalar_shape_, HloOpcode::kAdd, sub_param, constant)); + sub_builder.AddInstruction( + HloInstruction::CreateTuple({sub_param, constant, add})); + HloComputation* subcomputation = + module->AddEmbeddedComputation(sub_builder.Build()); + + builder.AddInstruction( + HloInstruction::CreateCall(tuple_shape, {param}, subcomputation)); + + module->AddEntryComputation(builder.Build()); + + InsertCopies(module.get()); + + EXPECT_EQ(CountCopies(*subcomputation), 2); + EXPECT_THAT(subcomputation->root_instruction(), + op::Tuple(op::Copy(op::Parameter()), op::Copy(op::Constant()), + op::Add())); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 364b76b93c..e57558b578 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -343,15 +343,16 @@ tf_cc_test( ) cc_library( - name = "copy_insertion", - srcs = ["copy_insertion.cc"], - hdrs = ["copy_insertion.h"], + name = "gpu_copy_insertion", + srcs = ["gpu_copy_insertion.cc"], + hdrs = ["gpu_copy_insertion.h"], deps = [ ":ir_emission_utils", + "//tensorflow/compiler/xla/service:call_graph", "//tensorflow/compiler/xla/service:copy_insertion", "//tensorflow/compiler/xla/service:hlo", - "//tensorflow/compiler/xla/service:logical_buffer", - "//tensorflow/compiler/xla/service:tuple_points_to_analysis", + "//tensorflow/compiler/xla/service:hlo_dataflow_analysis", + "//tensorflow/compiler/xla/service:hlo_pass", "//tensorflow/core:lib", ], ) @@ -427,8 +428,8 @@ cc_library( hdrs = ["gpu_compiler.h"], deps = [ ":convolution_folding", - ":copy_insertion", ":fusion_merger", + ":gpu_copy_insertion", ":gpu_executable", ":hlo_schedule", ":instruction_fusion", @@ -574,11 +575,14 @@ tf_cc_test( deps = [ ":instruction_fusion", ":while_transformer", + "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla:test_helpers", "//tensorflow/compiler/xla/service:copy_insertion", + "//tensorflow/compiler/xla/service:hlo_verifier", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:test", ], ) diff --git a/tensorflow/compiler/xla/service/gpu/copy_insertion.cc b/tensorflow/compiler/xla/service/gpu/copy_insertion.cc deleted file mode 100644 index 3dc8555201..0000000000 --- a/tensorflow/compiler/xla/service/gpu/copy_insertion.cc +++ /dev/null @@ -1,71 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/xla/service/gpu/copy_insertion.h" - -#include -#include -#include - -#include "tensorflow/compiler/xla/service/copy_insertion.h" -#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" -#include "tensorflow/compiler/xla/service/hlo_computation.h" -#include "tensorflow/compiler/xla/service/hlo_instruction.h" -#include "tensorflow/compiler/xla/service/hlo_opcode.h" -#include "tensorflow/compiler/xla/service/logical_buffer.h" -#include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/platform/logging.h" - -namespace xla { -namespace gpu { - -StatusOr GpuCopyInsertion::Run(HloModule* module) { - TF_ASSIGN_OR_RETURN(bool changed, CopyInsertion::Run(module)); - - TF_ASSIGN_OR_RETURN(auto points_to_analysis, - TuplePointsToAnalysis::Run(module)); - - // Make sure all operands of a library call are in memory instead of constants - // in IR. The top-level (index {}) of the points-to set of each operand - // indicates the source(s) of the array buffer. If any of these are constant, - // then add a copy to materialize the array. - HloComputation* computation = module->entry_computation(); - for (HloInstruction* hlo : computation->MakeInstructionPostOrder()) { - if (ImplementedAsLibraryCall(*hlo)) { - for (int64 i = 0; i < hlo->operand_count(); ++i) { - HloInstruction* operand = hlo->mutable_operand(i); - const PointsToSet& points_to = - points_to_analysis->GetPointsToSet(operand); - const auto& element = points_to.element(/*index=*/{}); - if (std::any_of(element.begin(), element.end(), - [](const LogicalBuffer* buffer_source) { - return buffer_source->instruction()->opcode() == - HloOpcode::kConstant; - })) { - TF_ASSIGN_OR_RETURN(HloInstruction * copy, - CopyInsertion::FindOrInsertCopy(operand)); - TF_RETURN_IF_ERROR(hlo->ReplaceOperandWith(i, copy)); - changed = true; - } - } - } - } - - return changed; -} - -} // namespace gpu -} // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index e84c390745..92c53265d0 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -33,8 +33,8 @@ limitations under the License. #include "tensorflow/compiler/xla/service/call_inliner.h" #include "tensorflow/compiler/xla/service/flatten_call_graph.h" #include "tensorflow/compiler/xla/service/gpu/convolution_folding.h" -#include "tensorflow/compiler/xla/service/gpu/copy_insertion.h" #include "tensorflow/compiler/xla/service/gpu/fusion_merger.h" +#include "tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h" #include "tensorflow/compiler/xla/service/gpu/gpu_executable.h" #include "tensorflow/compiler/xla/service/gpu/hlo_schedule.h" #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h" @@ -224,9 +224,8 @@ tensorflow::Status PrepareHloModuleForIrEmitting( // (and sometime after) copy insertion, to avoid dead code from interfering // with the rewrites. pipeline.AddPass(); - pipeline.AddPass(); - pipeline.AddPass(); pipeline.AddPass(); + pipeline.AddPass(); return pipeline.Run(hlo_module).status(); } diff --git a/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc new file mode 100644 index 0000000000..33d739b79d --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc @@ -0,0 +1,112 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h" + +#include +#include +#include + +#include "tensorflow/compiler/xla/service/call_graph.h" +#include "tensorflow/compiler/xla/service/copy_insertion.h" +#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/flatset.h" +#include "tensorflow/core/platform/logging.h" + +namespace xla { + +namespace gpu { + +StatusOr GpuCopyInsertion::FindOrInsertCopy( + HloInstruction* hlo) { + HloInstruction*& copy = inserted_copies_[hlo]; + if (copy == nullptr) { + TF_ASSIGN_OR_RETURN(copy, hlo->parent()->DeepCopyInstruction(hlo)); + } + return copy; +} + +StatusOr GpuCopyInsertion::Run(HloModule* module) { + CopyInsertion generic_copy_insertion; + + TF_ASSIGN_OR_RETURN(bool changed, generic_copy_insertion.Run(module)); + + TF_ASSIGN_OR_RETURN(std::unique_ptr dataflow, + HloDataflowAnalysis::Run(module)); + + // Make sure all operands of a library call are in memory instead of constants + // in IR. + for (HloInstruction* hlo : + module->entry_computation()->MakeInstructionPostOrder()) { + if (ImplementedAsLibraryCall(*hlo)) { + for (int64 i = 0; i < hlo->operand_count(); ++i) { + HloInstruction* operand = hlo->mutable_operand(i); + TF_RET_CHECK(ShapeUtil::IsArray(operand->shape())); + const auto& values = dataflow->GetValueSet(operand).values(); + if (std::any_of(values.begin(), values.end(), + [](const HloValue* value) { + return value->defining_instruction()->opcode() == + HloOpcode::kConstant; + })) { + TF_ASSIGN_OR_RETURN(HloInstruction * copy, FindOrInsertCopy(operand)); + TF_RETURN_IF_ERROR(hlo->ReplaceOperandWith(i, copy)); + changed = true; + } + } + } + } + + // Init values of a while node cannot be constants. Insert copies for any + // constants found at the operand of a while. + tensorflow::gtl::FlatSet copied_constants; + for (HloComputation* computation : module->computations()) { + for (HloInstruction* instruction : computation->instructions()) { + if (instruction->opcode() != HloOpcode::kWhile) { + continue; + } + for (auto& pair : + dataflow->GetInstructionValueSet(instruction->operand(0))) { + const HloValueSet& value_set = pair.second; + for (const HloValue* value : value_set.values()) { + if (value->defining_instruction()->opcode() == + HloOpcode::kConstant && + !ContainsKey(copied_constants, value->defining_instruction())) { + HloInstruction* constant = value->defining_instruction(); + TF_ASSIGN_OR_RETURN(HloInstruction * copy, + FindOrInsertCopy(constant)); + TF_RETURN_IF_ERROR(constant->ReplaceAllUsesWith(copy)); + copied_constants.insert(constant); + changed = true; + } + } + } + } + } + + // The GPU backend needs additional copies added due to deficiencies in + // buffer assignment. + TF_ASSIGN_OR_RETURN(bool buffer_assignment_changed, + CopyInsertion::AddCopiesForBufferAssignment(module)); + + return changed || buffer_assignment_changed; +} + +} // namespace gpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/copy_insertion.h b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h similarity index 56% rename from tensorflow/compiler/xla/service/gpu/copy_insertion.h rename to tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h index 11077dad2e..4d77f337e6 100644 --- a/tensorflow/compiler/xla/service/gpu/copy_insertion.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h @@ -13,11 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_COPY_INSERTION_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_COPY_INSERTION_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_COPY_INSERTION_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_COPY_INSERTION_H_ -#include "tensorflow/compiler/xla/service/copy_insertion.h" #include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_pass_interface.h" namespace xla { namespace gpu { @@ -25,12 +25,23 @@ namespace gpu { // Besides the modifications made by the generic xla::CopyInsertion, this // GPU-specific copy insertion also materializes operands of library calls by // inserting kCopy instructions. -class GpuCopyInsertion : public CopyInsertion { +class GpuCopyInsertion : public HloPassInterface { public: + tensorflow::StringPiece name() const override { return "copy-insertion"; } + StatusOr Run(HloModule* module) override; + + protected: + // Returns a copy of `hlo`. Looks in inserted_copies_ first to avoid making + // duplicate copies. + StatusOr FindOrInsertCopy(HloInstruction* hlo); + + // A map containing all copies inserted to materialize operands of library + // calls. The key is the copied instruction and the value is the copy. + tensorflow::gtl::FlatMap inserted_copies_; }; } // namespace gpu } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_COPY_INSERTION_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_COPY_INSERTION_H_ diff --git a/tensorflow/compiler/xla/service/gpu/while_transformer_test.cc b/tensorflow/compiler/xla/service/gpu/while_transformer_test.cc index 44188473d3..f16daa0b54 100644 --- a/tensorflow/compiler/xla/service/gpu/while_transformer_test.cc +++ b/tensorflow/compiler/xla/service/gpu/while_transformer_test.cc @@ -17,9 +17,12 @@ limitations under the License. #include "tensorflow/compiler/xla/service/copy_insertion.h" #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h" +#include "tensorflow/compiler/xla/service/hlo_verifier.h" +#include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/core/lib/core/status_test_util.h" namespace xla { namespace { @@ -33,8 +36,6 @@ class WhileTransformerTest : public HloTestBase { : module_(CreateNewModule()), induction_variable_shape_(ShapeUtil::MakeShape(S32, {})), data_shape_(ShapeUtil::MakeShape(F32, {8})), - loop_state_shape_(ShapeUtil::MakeTupleShape( - {induction_variable_shape_, data_shape_})), condition_result_shape_(ShapeUtil::MakeShape(PRED, {})) {} std::unique_ptr BuildConditionComputation( @@ -42,8 +43,8 @@ class WhileTransformerTest : public HloTestBase { auto builder = HloComputation::Builder(TestName() + ".Condition"); auto limit_const = builder.AddInstruction( HloInstruction::CreateConstant(Literal::CreateR0(limit))); - auto loop_state = builder.AddInstruction( - HloInstruction::CreateParameter(0, loop_state_shape_, "loop_state")); + auto loop_state = builder.AddInstruction(HloInstruction::CreateParameter( + 0, GetLoopStateShape(tuple_index), "loop_state")); auto induction_variable = builder.AddInstruction(HloInstruction::CreateGetTupleElement( limit_const->shape(), loop_state, tuple_index)); @@ -58,8 +59,8 @@ class WhileTransformerTest : public HloTestBase { const int64 increment) { auto builder = HloComputation::Builder(TestName() + ".Body"); // Create param instruction to access loop state. - auto loop_state = builder.AddInstruction( - HloInstruction::CreateParameter(0, loop_state_shape_, "loop_state")); + auto loop_state = builder.AddInstruction(HloInstruction::CreateParameter( + 0, GetLoopStateShape(ind_var_tuple_index), "loop_state")); // Update the induction variable GTE(ind_var_tuple_index). auto induction_variable = builder.AddInstruction(HloInstruction::CreateGetTupleElement( @@ -73,7 +74,7 @@ class WhileTransformerTest : public HloTestBase { data_shape_, loop_state, data_tuple_index)); // Use 'induction_variable' in computation with no path to output tuple. auto update = builder.AddInstruction( - HloInstruction::CreateBroadcast(data_shape_, induction_variable, {8})); + HloInstruction::CreateBroadcast(data_shape_, induction_variable, {})); auto add1 = builder.AddInstruction(HloInstruction::CreateBinary( data_shape_, HloOpcode::kAdd, data, update)); // Create output Tuple. @@ -98,8 +99,9 @@ class WhileTransformerTest : public HloTestBase { HloInstruction::CreateTuple({induction_var_init, data_init})) : builder.AddInstruction( HloInstruction::CreateTuple({data_init, induction_var_init})); - auto while_hlo = builder.AddInstruction(HloInstruction::CreateWhile( - loop_state_shape_, condition, body, loop_state_init)); + auto while_hlo = builder.AddInstruction( + HloInstruction::CreateWhile(GetLoopStateShape(ind_var_tuple_index), + condition, body, loop_state_init)); module_->AddEntryComputation(builder.Build()); return while_hlo; } @@ -115,18 +117,34 @@ class WhileTransformerTest : public HloTestBase { } void RunCopyInsertionPass() { + HloVerifier verifier([](const Shape& shape) { + return ShapeUtil::ByteSizeOf(shape, /*pointer_size=*/sizeof(void*)); + }); + TF_ASSERT_OK(verifier.Run(module_.get()).status()); CopyInsertion copy_insertion; - EXPECT_IS_OK(copy_insertion.Run(module_.get()).status()); + TF_ASSERT_OK(copy_insertion.Run(module_.get()).status()); + } + + Shape GetLoopStateShape(const int64 ind_var_tuple_index) { + if (ind_var_tuple_index == 0) { + return ShapeUtil::MakeTupleShape( + {induction_variable_shape_, data_shape_}); + } else { + return ShapeUtil::MakeTupleShape( + {data_shape_, induction_variable_shape_}); + } } std::unique_ptr module_; Shape induction_variable_shape_; Shape data_shape_; - Shape loop_state_shape_; Shape condition_result_shape_; }; -TEST_F(WhileTransformerTest, InductionVariableAtTupleElement0) { +// TODO(b/68830972): The while transformer is far too fragile. It patterns +// matches the exact expressions of opcodes. Re-enable when transformation is +// more general +TEST_F(WhileTransformerTest, DISABLED_InductionVariableAtTupleElement0) { // Build computation with induction variable at tuple element 0. auto condition = module_->AddEmbeddedComputation(BuildConditionComputation(0, 10)); @@ -137,13 +155,16 @@ TEST_F(WhileTransformerTest, InductionVariableAtTupleElement0) { RunCopyInsertionPass(); // Run WhileTransformer. auto result = gpu::CanTransformWhileToFor(while_hlo); - ASSERT_TRUE(result.ok()); + TF_ASSERT_OK(result.status()); // Check results. EXPECT_THAT(result.ConsumeValueOrDie(), Eq(std::tuple(0, 10, 1))); } -TEST_F(WhileTransformerTest, InductionVariableAtTupleElement1) { +// TODO(b/68830972): The while transformer is far too fragile. It patterns +// matches the exact expressions of opcodes. Re-enable when transformation is +// more general +TEST_F(WhileTransformerTest, DISABLED_InductionVariableAtTupleElement1) { // Build computation with induction variable at tuple element 1. auto condition = module_->AddEmbeddedComputation(BuildConditionComputation(1, 10)); @@ -154,13 +175,16 @@ TEST_F(WhileTransformerTest, InductionVariableAtTupleElement1) { RunCopyInsertionPass(); // Run WhileTransformer. auto result = gpu::CanTransformWhileToFor(while_hlo); - ASSERT_TRUE(result.ok()); + TF_ASSERT_OK(result.status()); // Check results. EXPECT_THAT(result.ConsumeValueOrDie(), Eq(std::tuple(0, 10, 1))); } -TEST_F(WhileTransformerTest, InvalidLoopLimit) { +// TODO(b/68830972): The while transformer is far too fragile. It patterns +// matches the exact expressions of opcodes. Re-enable when transformation is +// more general +TEST_F(WhileTransformerTest, DISABLED_InvalidLoopLimit) { // Build computation with invalid loop limit. auto condition = module_->AddEmbeddedComputation(BuildConditionComputation(0, 5)); @@ -176,7 +200,10 @@ TEST_F(WhileTransformerTest, InvalidLoopLimit) { HasSubstr("Loop start must be less than loop limit.")); } -TEST_F(WhileTransformerTest, InvalidLoopIncrement) { +// TODO(b/68830972): The while transformer is far too fragile. It patterns +// matches the exact expressions of opcodes. Re-enable when transformation is +// more general +TEST_F(WhileTransformerTest, DISABLED_InvalidLoopIncrement) { // Build computation with invalid loop increment. auto condition = module_->AddEmbeddedComputation(BuildConditionComputation(0, 10)); diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc index 6f80994751..6d2a3aa5b5 100644 --- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc @@ -144,8 +144,10 @@ class BufferValueMap { // Move the given value into the given buffer. void MoveValueToBuffer(const HloValue& value, BufferNumber buffer_number) { BufferNumber old_buffer_number = value_to_buffer_number_.at(&value); - buffers_.at(old_buffer_number).erase(&value); - if (buffers_.at(old_buffer_number).empty()) { + tensorflow::gtl::FlatSet& old_value_set = + buffers_.at(old_buffer_number); + old_value_set.erase(&value); + if (old_value_set.empty()) { buffers_.erase(old_buffer_number); } @@ -175,7 +177,7 @@ class BufferValueMap { // Value is init of a while (use is while). std::vector aliased_buffers; for (const HloUse& use : value.uses()) { - VLOG(1) << "use of value " << value.ToShortString() << ": " << use; + VLOG(2) << "use of value " << value.ToShortString() << ": " << use; if (use.instruction->opcode() == HloOpcode::kWhile) { // Determine the while value that this shares a buffer with. const HloValue& while_value = @@ -411,7 +413,7 @@ string HloAliasAnalysis::ToString() const { /* static */ StatusOr> HloAliasAnalysis::Run( HloModule* module) { - VLOG(1) << "HloAliasAnalysis::Run on module " << module->name(); + VLOG(2) << "HloAliasAnalysis::Run on module " << module->name(); XLA_VLOG_LINES(2, module->ToString()); auto alias_analysis = WrapUnique(new HloAliasAnalysis(module)); @@ -444,7 +446,7 @@ StatusOr> HloAliasAnalysis::Run( TF_DCHECK_OK(alias_analysis->Verify()); - XLA_VLOG_LINES(1, alias_analysis->ToString()); + XLA_VLOG_LINES(2, alias_analysis->ToString()); return std::move(alias_analysis); } diff --git a/tensorflow/compiler/xla/service/hlo_dce.cc b/tensorflow/compiler/xla/service/hlo_dce.cc index a4921232f5..40e67c8780 100644 --- a/tensorflow/compiler/xla/service/hlo_dce.cc +++ b/tensorflow/compiler/xla/service/hlo_dce.cc @@ -37,6 +37,9 @@ namespace xla { StatusOr HloDCE::Run(HloModule* module) { bool changed = false; + VLOG(2) << "Before dce:"; + XLA_VLOG_LINES(2, module->ToString()); + for (auto* computation : module->MakeNonfusionComputations()) { std::unordered_set live_instructions; TF_RETURN_IF_ERROR(computation->root_instruction()->Accept( @@ -58,6 +61,8 @@ StatusOr HloDCE::Run(HloModule* module) { } for (HloInstruction* dead_root : dead_roots) { + VLOG(1) << "Removing dead root " << dead_root->ToString() + << " and it's unused operands"; TF_RETURN_IF_ERROR( computation->RemoveInstructionAndUnusedOperands(dead_root)); changed = true; @@ -87,6 +92,9 @@ StatusOr HloDCE::Run(HloModule* module) { } } + VLOG(2) << "After dce:"; + XLA_VLOG_LINES(2, module->ToString()); + return changed; } diff --git a/tensorflow/compiler/xla/tests/tuple_test.cc b/tensorflow/compiler/xla/tests/tuple_test.cc index 4920f17a7e..5a012c93d6 100644 --- a/tensorflow/compiler/xla/tests/tuple_test.cc +++ b/tensorflow/compiler/xla/tests/tuple_test.cc @@ -180,7 +180,8 @@ XLA_TEST_F(TupleTest, TupleGTEToTuple) { ComputeAndCompareTuple(&builder, *expected, {}, error_spec_); } -XLA_TEST_F(TupleTest, SelectBetweenPredTuples) { +// TODO(b/68395210): GPU does not tolerate ambiguous top-level buffers. +XLA_TEST_F(TupleTest, DISABLED_ON_GPU(SelectBetweenPredTuples)) { ComputationBuilder b(client_, TestName()); ComputationDataHandle v1, v2; -- GitLab From 1a376869bdb5b18fcfb50ee2c392380d60f1896e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Nov 2017 09:14:17 -0800 Subject: [PATCH 0751/1801] dynamic_rnn now supports TensorArray in the cell state. This enables using seq2seq.AttentionWrapper with alignment_history=True in dynamic_rnn. Also fixed some minor documentation issues. PiperOrigin-RevId: 176664362 --- tensorflow/python/kernel_tests/BUILD | 1 + tensorflow/python/kernel_tests/rnn_test.py | 56 +++++++++++++++++++++- tensorflow/python/ops/rnn.py | 16 ++++--- tensorflow/python/util/nest.py | 2 +- 4 files changed, 66 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 7643cf2ddc..4522520ee4 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2338,6 +2338,7 @@ cuda_py_test( "//tensorflow/python:rnn_cell", "//tensorflow/python:sparse_grad", "//tensorflow/python:tensor_array_grad", + "//tensorflow/python:tensor_array_ops", "//tensorflow/python:variables", "//tensorflow/python/eager:context", ], diff --git a/tensorflow/python/kernel_tests/rnn_test.py b/tensorflow/python/kernel_tests/rnn_test.py index d8f4b439e3..0c77d1db92 100644 --- a/tensorflow/python/kernel_tests/rnn_test.py +++ b/tensorflow/python/kernel_tests/rnn_test.py @@ -38,6 +38,7 @@ from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import init_ops from tensorflow.python.ops import rnn from tensorflow.python.ops import rnn_cell_impl +from tensorflow.python.ops import tensor_array_ops from tensorflow.python.ops import variables as variables_lib import tensorflow.python.ops.data_flow_grad # pylint: disable=unused-import import tensorflow.python.ops.nn_grad # pylint: disable=unused-import @@ -57,7 +58,7 @@ class Plus1RNNCell(rnn_cell_impl.RNNCell): def state_size(self): return 5 - def __call__(self, input_, state, scope=None): + def call(self, input_, state, scope=None): return (input_ + 1, state + 1) @@ -75,10 +76,31 @@ class ScalarStateRNNCell(rnn_cell_impl.RNNCell): def zero_state(self, batch_size, dtype): return array_ops.zeros([], dtype=dtypes.int32) - def __call__(self, input_, state, scope=None): + def call(self, input_, state, scope=None): return (input_, state + 1) +class TensorArrayStateRNNCell(rnn_cell_impl.RNNCell): + """RNN Cell its state as a TensorArray.""" + + @property + def output_size(self): + return 1 + + @property + def state_size(self): + return (tensor_shape.TensorShape([]), ()) + + def zero_state(self, batch_size, dtype): + return (array_ops.zeros([], dtype=dtypes.int32), + tensor_array_ops.TensorArray( + dtype=dtype, size=0, dynamic_size=True)) + + def call(self, input_, state, scope=None): + new_array = state[1].write(state[0], input_) + return (input_, (state[0] + 1, new_array)) + + class RNNTest(test.TestCase): def setUp(self): @@ -171,6 +193,36 @@ class RNNTest(test.TestCase): self.assertAllEqual(outputs.numpy(), np.array([[[1], [2], [3], [4]]])) self.assertEqual(state.numpy(), 4) + @test_util.run_in_graph_and_eager_modes() + def testTensorArrayStateIsAccepted(self): + cell = TensorArrayStateRNNCell() + in_graph_mode = context.in_graph_mode() + + if in_graph_mode: + inputs = array_ops.placeholder(dtypes.float32, shape=(1, 4, 1)) + else: + inputs = np.array([[[1], [2], [3], [4]]], dtype=np.float32) + + with self.test_session() as sess: + outputs, state = rnn.dynamic_rnn( + cell, inputs, dtype=dtypes.float32, sequence_length=[4]) + state = (state[0], state[1].stack()) + if in_graph_mode: + outputs, state = sess.run( + [outputs, state], feed_dict={ + inputs: [[[1], [2], [3], [4]]] + }) + + if in_graph_mode: + self.assertAllEqual(outputs, np.array([[[1], [2], [3], [4]]])) + self.assertEqual(state[0], 4) + self.assertAllEqual(state[1], np.array([[[1]], [[2]], [[3]], [[4]]])) + else: + self.assertAllEqual(outputs.numpy(), np.array([[[1], [2], [3], [4]]])) + self.assertEqual(state[0].numpy(), 4) + self.assertAllEqual(state[1].numpy(), + np.array([[[1]], [[2]], [[3]], [[4]]])) + ######### Benchmarking RNN code diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py index df66302402..436872f044 100644 --- a/tensorflow/python/ops/rnn.py +++ b/tensorflow/python/ops/rnn.py @@ -148,7 +148,7 @@ def _rnn_step( zero_output, state, call_cell, state_size, skip_conditionals=False): """Calculate one step of a dynamic RNN minibatch. - Returns an (output, state) pair conditioned on the sequence_lengths. + Returns an (output, state) pair conditioned on `sequence_length`. When skip_conditionals=False, the pseudocode is something like: if t >= max_sequence_length: @@ -157,14 +157,14 @@ def _rnn_step( return call_cell() # Selectively output zeros or output, old state or new state depending - # on if we've finished calculating each row. + # on whether we've finished calculating each row. new_output, new_state = call_cell() final_output = np.vstack([ - zero_output if time >= sequence_lengths[r] else new_output_r + zero_output if time >= sequence_length[r] else new_output_r for r, new_output_r in enumerate(new_output) ]) final_state = np.vstack([ - state[r] if time >= sequence_lengths[r] else new_state_r + state[r] if time >= sequence_length[r] else new_state_r for r, new_state_r in enumerate(new_state) ]) return (final_output, final_state) @@ -202,9 +202,12 @@ def _rnn_step( flat_zero_output = nest.flatten(zero_output) def _copy_one_through(output, new_output): - # If the state contains a scalar value we simply pass it through. + # TensorArray and scalar get passed through. + if isinstance(output, tensor_array_ops.TensorArray): + return new_output if output.shape.ndims == 0: return new_output + # Otherwise propagate the old or the new value. copy_cond = (time >= sequence_length) with ops.colocate_with(new_output): return array_ops.where(copy_cond, output, new_output) @@ -264,7 +267,8 @@ def _rnn_step( for output, flat_output in zip(final_output, flat_zero_output): output.set_shape(flat_output.get_shape()) for substate, flat_substate in zip(final_state, flat_state): - substate.set_shape(flat_substate.get_shape()) + if not isinstance(substate, tensor_array_ops.TensorArray): + substate.set_shape(flat_substate.get_shape()) final_output = nest.pack_sequence_as( structure=zero_output, flat_sequence=final_output) diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py index dd6acee3c7..75f482e5a8 100644 --- a/tensorflow/python/util/nest.py +++ b/tensorflow/python/util/nest.py @@ -293,7 +293,7 @@ def pack_sequence_as(structure, flat_sequence): If `structure` is or contains a dict instance, the keys will be sorted to pack the flat sequence in deterministic order. This is true also for `OrderedDict` instances: their sequence order is ignored, the sorting order of - keys is used instead. The same convention is followed in `pack_sequence_as`. + keys is used instead. The same convention is followed in `flatten`. This correctly repacks dicts and `OrderedDict`s after they have been flattened, and also allows flattening an `OrderedDict` and then repacking it back using a correponding plain dict, or vice-versa. -- GitLab From 93023ee2f88cfdc019b76f6d06c679354820d150 Mon Sep 17 00:00:00 2001 From: Yun Peng Date: Wed, 22 Nov 2017 18:46:50 +0100 Subject: [PATCH 0752/1801] Fix nccl.BUILD on Windows (#14790) Bazel doesn't allow a random file name in `linkopts` attribute, so use `-DEFAULTLIB:` option to specify `ws2_32.lib` --- third_party/nccl.BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/nccl.BUILD b/third_party/nccl.BUILD index 06b9b8ff68..8c7b9bdbe9 100644 --- a/third_party/nccl.BUILD +++ b/third_party/nccl.BUILD @@ -55,7 +55,7 @@ cc_library( ], "@%ws%//tensorflow:ios": [], "@%ws%//tensorflow:windows": [ - "ws2_32.lib", + "-DEFAULTLIB:ws2_32.lib", ], "//conditions:default": [ "-lrt", -- GitLab From 47cdf5b85df658da5a57d5eb6dd29145051ddcb4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Nov 2017 09:50:51 -0800 Subject: [PATCH 0753/1801] Internal change. PiperOrigin-RevId: 176668209 --- .../contrib/lite/toco/export_tensorflow.cc | 4 + .../fuse_activation_functions.cc | 3 +- .../propagate_fixed_sizes.cc | 74 +++++++++---------- .../contrib/lite/toco/import_tensorflow.cc | 22 ++++++ tensorflow/contrib/lite/toco/model.h | 4 + 5 files changed, 66 insertions(+), 41 deletions(-) diff --git a/tensorflow/contrib/lite/toco/export_tensorflow.cc b/tensorflow/contrib/lite/toco/export_tensorflow.cc index 16b9fa2260..625a4dd83c 100644 --- a/tensorflow/contrib/lite/toco/export_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/export_tensorflow.cc @@ -1283,6 +1283,10 @@ void ConvertMeanOperator(const Model& model, const MeanOperator& src_op, const auto params_type = GetTensorFlowDataType(model, src_op.inputs[0]); (*new_op->mutable_attr())["T"].set_type(params_type); + if (src_op.keep_dims) { + (*new_op->mutable_attr())["keep_dims"].set_b(true); + } + // Create the params tensor. auto* params_op = tensorflow_graph->add_node(); params_op->set_op("Const"); diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc index 7a86510025..d129b5ecf2 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc @@ -71,7 +71,8 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) { // TODO(dkalenichenko): Great many ops don't support activation function // fusing. Switch to the whilelist approach instead. if (op->type == OperatorType::kConcatenation || - op->type == OperatorType::kSlice) { + op->type == OperatorType::kSlice || + op->type == OperatorType::kTensorFlowSplit) { AddMessageF( "Not fusing activation function because the %s op doesn't support it", LogName(*op)); diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc index 82a43bc2ce..f6daad9020 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc @@ -48,10 +48,10 @@ void ComputeConvSizes(const Shape& input_shape, int output_depth, int kwidth, LOG(FATAL) << "Only supporting SAME or VALID padding"; } - fixed_padding->height = - ((output_height - 1) * stride_height + kheight - input_height) / 2; - fixed_padding->width = - ((output_width - 1) * stride_width + kwidth - input_width) / 2; + fixed_padding->height = std::max( + 0, ((output_height - 1) * stride_height + kheight - input_height) / 2); + fixed_padding->width = std::max( + 0, ((output_width - 1) * stride_width + kwidth - input_width) / 2); // Actually had to debug a situation where those were negative due to bad // propagation of placeholder -1 sizes in TensorFlowReshape. @@ -367,23 +367,40 @@ void ProcessSimpleBinaryOperator(Model* model, Operator* op) { &output_array); } +bool KeepDims(const Operator& op) { + switch (op.type) { + case OperatorType::kTensorFlowMin: + return static_cast(op).keep_dims; + case OperatorType::kTensorFlowMax: + return static_cast(op).keep_dims; + case OperatorType::kTensorFlowSum: + return static_cast(op).keep_dims; + case OperatorType::kMean: + return static_cast(op).keep_dims; + default: + LOG(FATAL) << "Not a reduction operator!"; + return false; + } +} + void ProcessTensorFlowReductionOperator(Model* model, Operator* op) { CHECK_LE(op->inputs.size(), 2); auto& output_array = *model->arrays[op->outputs[0]]; if (output_array.has_shape()) { return; } + const auto& input_array = *model->arrays[op->inputs[0]]; + if (!input_array.has_shape()) { + return; + } + const auto& input_shape = input_array.shape(); + const bool keep_dims = KeepDims(*op); if (op->inputs.size() == 2) { // There is a reduction_indices input. - const auto& input_array = *model->arrays[op->inputs[0]]; const auto& reduction_array = *model->arrays[op->inputs[1]]; if (!reduction_array.buffer) { return; } - if (!input_array.has_shape()) { - return; - } - auto& input_shape = input_array.shape(); CHECK(reduction_array.buffer->type == ArrayDataType::kInt32); const auto& reduction_array_vals = reduction_array.GetBuffer().data; @@ -398,11 +415,17 @@ void ProcessTensorFlowReductionOperator(Model* model, Operator* op) { } if (!is_reduction_dim) { output_dims.push_back(input_shape.dims(i)); + } else if (keep_dims) { + output_dims.push_back(1); } } } else { // No reduction_indices means complete reduction to a single scalar. - output_array.copy_shape(Shape({})); + if (keep_dims) { + output_array.copy_shape(input_shape); + } else { + output_array.copy_shape(Shape({})); + } } } @@ -827,33 +850,6 @@ void ProcessPadOperator(Model* model, PadOperator* op) { output_array.copy_shape(output_shape); } -void ProcessMeanOperator(Model* model, MeanOperator* op) { - CHECK_EQ(op->inputs.size(), 2); - CHECK_EQ(op->outputs.size(), 1); - - const auto& input_array = *model->arrays[op->inputs[0]]; - - // Yield until input dims have been resolved. - if (!input_array.has_shape()) return; - const std::vector& indices = op->reduction_indices; - if (indices.empty()) return; - - auto& output_array = *model->arrays[op->outputs[0]]; - if (output_array.has_shape()) return; - - const std::vector& input_dims = input_array.shape().dims(); - std::vector output_dims; - for (int i = 0; i < input_dims.size(); ++i) { - if (std::find(indices.begin(), indices.end(), i) == indices.end()) { - output_dims.push_back(input_dims[i]); - } - } - CHECK(!output_dims.empty()); - CHECK_EQ(output_dims.size(), 2); - - *output_array.mutable_shape()->mutable_dims() = output_dims; -} - void ProcessStridedSliceOperator(Model* model, StridedSliceOperator* op) { CHECK_EQ(op->inputs.size(), 4); CHECK_EQ(op->outputs.size(), 1); @@ -1024,6 +1020,7 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) { case OperatorType::kTensorFlowMin: case OperatorType::kTensorFlowMax: case OperatorType::kTensorFlowSum: + case OperatorType::kMean: ProcessTensorFlowReductionOperator(model, op); break; @@ -1098,9 +1095,6 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) { case OperatorType::kPad: ProcessPadOperator(model, static_cast(op)); break; - case OperatorType::kMean: - ProcessMeanOperator(model, static_cast(op)); - break; case OperatorType::kStridedSlice: ProcessStridedSliceOperator(model, static_cast(op)); diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index b00365d5de..f2dc526a36 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -588,6 +588,9 @@ void ConvertSumOperator(const NodeDef& node, Model* model) { op->inputs.push_back(node.input(1)); op->outputs.push_back(node.name()); model->operators.emplace_back(op); + if (HasAttr(node, "keep_dims")) { + op->keep_dims = GetBoolAttr(node, "keep_dims"); + } } void ConvertTileOperator(const NodeDef& node, Model* model) { @@ -697,6 +700,11 @@ void ConvertMaxPoolOperator(const NodeDef& node, Model* model) { CHECK_EQ(node.op(), "MaxPool"); CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); const auto& input_name = node.input(0); + // We only support NHWC, which is the default data_format. + // So if data_format is not defined, we're all good. + if (node.attr().count("data_format")) { + CHECK_EQ(GetStringAttr(node, "data_format"), "NHWC"); + } if (HasAttr(node, "T")) { CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT); } else { @@ -732,6 +740,11 @@ void ConvertAvgPoolOperator(const NodeDef& node, Model* model) { CHECK_EQ(node.op(), "AvgPool"); CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); const auto& input_name = node.input(0); + // We only support NHWC, which is the default data_format. + // So if data_format is not defined, we're all good. + if (node.attr().count("data_format")) { + CHECK_EQ(GetStringAttr(node, "data_format"), "NHWC"); + } CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT); auto* avgpool = new AveragePoolOperator; avgpool->inputs.push_back(input_name); @@ -902,6 +915,9 @@ void ConvertMaxOperator(const NodeDef& node, Model* model) { op->inputs.push_back(node.input(1)); op->outputs.push_back(node.name()); model->operators.emplace_back(op); + if (HasAttr(node, "keep_dims")) { + op->keep_dims = GetBoolAttr(node, "keep_dims"); + } } void ConvertMinOperator(const NodeDef& node, Model* model) { @@ -912,6 +928,9 @@ void ConvertMinOperator(const NodeDef& node, Model* model) { op->inputs.push_back(node.input(1)); op->outputs.push_back(node.name()); model->operators.emplace_back(op); + if (HasAttr(node, "keep_dims")) { + op->keep_dims = GetBoolAttr(node, "keep_dims"); + } } void ConvertMaximumOperator(const NodeDef& node, Model* model) { @@ -1222,6 +1241,9 @@ void ConvertMeanOperator(const NodeDef& node, Model* model) { op->inputs.push_back(node.input(1)); op->outputs.push_back(node.name()); model->operators.emplace_back(op); + if (HasAttr(node, "keep_dims")) { + op->keep_dims = GetBoolAttr(node, "keep_dims"); + } } void ConvertSvdfOperator(const NodeDef& node, Model* model) { diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index d992f8458f..f2fce2b249 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -819,6 +819,7 @@ struct SubOperator : Operator { // of global reduction across all dimensions. struct TensorFlowSumOperator : Operator { TensorFlowSumOperator() : Operator(OperatorType::kTensorFlowSum) {} + bool keep_dims = false; }; // TensorFlow Tile equivalent. Refer to TensorFlow documentation for details. @@ -971,6 +972,7 @@ struct TensorFlowGreaterEqualOperator : Operator { // of global reduction across all dimensions. struct TensorFlowMaxOperator : Operator { TensorFlowMaxOperator() : Operator(OperatorType::kTensorFlowMax) {} + bool keep_dims = false; }; // Global min reduction: computes the min of all of entries in the input array. @@ -983,6 +985,7 @@ struct TensorFlowMaxOperator : Operator { // of global reduction across all dimensions. struct TensorFlowMinOperator : Operator { TensorFlowMinOperator() : Operator(OperatorType::kTensorFlowMin) {} + bool keep_dims = false; }; // Element-wise maximum operator. Currently it only supports scalar as @@ -1121,6 +1124,7 @@ struct MeanOperator : Operator { MeanOperator() : Operator(OperatorType::kMean) {} std::vector reduction_indices; + bool keep_dims = false; }; // Svdf operator: -- GitLab From e6840e82c3082dd367f56aee4043ccfd342abce5 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 22 Nov 2017 09:54:19 -0800 Subject: [PATCH 0754/1801] Let python handle the deletion of the item objects instead of trying to take care of this ourselves Removed a debug printf PiperOrigin-RevId: 176668566 --- tensorflow/python/grappler/cluster.py | 1 - tensorflow/python/grappler/item.i | 7 ++----- tensorflow/python/grappler/item.py | 4 ---- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/grappler/cluster.py b/tensorflow/python/grappler/cluster.py index 9864e86811..496f5255b9 100644 --- a/tensorflow/python/grappler/cluster.py +++ b/tensorflow/python/grappler/cluster.py @@ -72,7 +72,6 @@ class Cluster(object): devices = [] for raw_dev in ret_from_swig: devices.append(device_properties_pb2.NamedDevice.FromString(raw_dev)) - print(str(devices)) return devices def MeasureCosts(self, item): diff --git a/tensorflow/python/grappler/item.i b/tensorflow/python/grappler/item.i index 632f614558..8c346b4438 100644 --- a/tensorflow/python/grappler/item.i +++ b/tensorflow/python/grappler/item.i @@ -30,6 +30,8 @@ limitations under the License. $1 = &temp; } +%newobject TF_NewItem; + %{ #include #include @@ -66,10 +68,6 @@ static tensorflow::grappler::GrapplerItem* TF_NewItem( return item.release(); } -static void TF_DeleteItem(tensorflow::grappler::GrapplerItem* item) { - delete item; -} - static std::vector TF_IdentifyImportantOps(const tensorflow::grappler::GrapplerItem* item) { if (!item) { return {}; @@ -129,6 +127,5 @@ static PyObject* TF_GetOpProperties(const tensorflow::grappler::GrapplerItem* it static tensorflow::grappler::GrapplerItem* TF_NewItem( const tensorflow::MetaGraphDef& meta_graph, bool ignore_colocation, bool ignore_user_placement, TF_Status* out_status); -static void TF_DeleteItem(tensorflow::grappler::GrapplerItem* item); static std::vector TF_IdentifyImportantOps(const tensorflow::grappler::GrapplerItem* item); static PyObject* TF_GetOpProperties(const tensorflow::grappler::GrapplerItem* item); diff --git a/tensorflow/python/grappler/item.py b/tensorflow/python/grappler/item.py index cfbe014de5..4fc94ec968 100644 --- a/tensorflow/python/grappler/item.py +++ b/tensorflow/python/grappler/item.py @@ -50,10 +50,6 @@ class Item(object): self._tf_item = None self._BuildTFItem() - def __del__(self): - if self._tf_item: - tf_item.TF_DeleteItem(self._tf_item) - def IdentifyImportantOps(self): return tf_item.TF_IdentifyImportantOps(self.tf_item) -- GitLab From b579996aed210f415767a7ffaed55c6828ddf07b Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 22 Nov 2017 10:06:28 -0800 Subject: [PATCH 0755/1801] Don't infer graph properties unless they'll be used. PiperOrigin-RevId: 176670211 --- .../core/grappler/optimizers/arithmetic_optimizer.cc | 10 +++++++--- .../core/grappler/optimizers/constant_folding.cc | 9 +++++++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 33eac79c01..6c1770f0b0 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -1055,9 +1055,13 @@ Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/, GraphDef* optimized_graph) { *optimized_graph = item.graph; nodes_to_preserve_ = item.NodesToPreserve(); - GraphProperties graph_properties(item); - TF_RETURN_IF_ERROR(graph_properties.InferStatically()); - TF_RETURN_IF_ERROR(graph_properties.AnnotateOutputShapes(optimized_graph)); + + if (opt_level_ == RewriterConfig::AGGRESSIVE) { + // Shapes are only needed in aggressive mode. + GraphProperties graph_properties(item); + TF_RETURN_IF_ERROR(graph_properties.InferStatically()); + TF_RETURN_IF_ERROR(graph_properties.AnnotateOutputShapes(optimized_graph)); + } DedupComputations(optimized_graph); TF_RETURN_IF_ERROR(SimplifyArithmeticOps(optimized_graph)); diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 8ae0d57068..c77b2badf4 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1158,8 +1158,13 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, } GraphProperties properties(item); - Status s = properties.InferStatically(); - bool has_feed = !item.feed.empty(); + const bool has_feed = !item.feed.empty(); + bool needs_shapes = !has_feed || opt_level_ == RewriterConfig::AGGRESSIVE; + Status s = errors::Unknown( + "The graph properties are needed but were not initialized"); + if (needs_shapes) { + s = properties.InferStatically(); + } if (!has_feed && s.ok()) { // Only use static shape information when there is no feed in the -- GitLab From 3322d51add117acbf6df872c51a086ab1a5feb1d Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Wed, 22 Nov 2017 10:29:01 -0800 Subject: [PATCH 0756/1801] Add local client methods for infeed and outfeed. PiperOrigin-RevId: 176673166 --- .../compiler/xla/client/local_client.cc | 23 +++++++++++++++---- tensorflow/compiler/xla/client/local_client.h | 14 +++++++++++ .../xla/tests/local_client_execute_test.cc | 8 ++++--- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/xla/client/local_client.cc b/tensorflow/compiler/xla/client/local_client.cc index c3c664f76a..b051955f0f 100644 --- a/tensorflow/compiler/xla/client/local_client.cc +++ b/tensorflow/compiler/xla/client/local_client.cc @@ -275,9 +275,6 @@ StatusOr> LocalClient::Compile( device_ordinal, options)); } -// Copy the literal data to the device with the given ordinal and return as a -// ScopedShapedBuffer. The given memory allocator is used for device memory -// allocation. StatusOr> LocalClient::LiteralToShapedBuffer(const Literal& literal, int device_ordinal, DeviceMemoryAllocator* allocator) { @@ -298,8 +295,6 @@ LocalClient::LiteralToShapedBuffer(const Literal& literal, int device_ordinal, return std::move(scoped_buffer); } -// Copy the data from the device contained in the given ShapedBuffer and -// return as a Literal. StatusOr> LocalClient::ShapedBufferToLiteral( const ShapedBuffer& shaped_buffer) { TF_ASSIGN_OR_RETURN( @@ -309,4 +304,22 @@ StatusOr> LocalClient::ShapedBufferToLiteral( shaped_buffer); } +Status LocalClient::TransferToInfeedLocal(const Literal& literal, + int device_ordinal) { + TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor, + backend().stream_executor(device_ordinal)); + return backend().transfer_manager()->TransferLiteralToInfeed(executor, + literal); +} + +StatusOr> LocalClient::TransferFromOutfeedLocal( + const Shape& shape, int device_ordinal) { + TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor, + backend().stream_executor(device_ordinal)); + auto literal = MakeUnique(); + TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralFromOutfeed( + executor, shape, literal.get())); + return std::move(literal); +} + } // namespace xla diff --git a/tensorflow/compiler/xla/client/local_client.h b/tensorflow/compiler/xla/client/local_client.h index 32fe0d9f84..3ca0d2ef55 100644 --- a/tensorflow/compiler/xla/client/local_client.h +++ b/tensorflow/compiler/xla/client/local_client.h @@ -162,6 +162,20 @@ class LocalClient : public Client { StatusOr> ShapedBufferToLiteral( const ShapedBuffer& shaped_buffer); + // Transfer the given literal to the infeed queue of the given device. + // TODO(b/69670845): Remove the 'Local' from the name when LocalClient does + // not inherit from Client and there is no possibility of confusion with + // Client::TransferToInfeed. + Status TransferToInfeedLocal(const Literal& literal, int device_ordinal); + + // Transfer and return a value of the given shape from the outfeed of the + // given device. + // TODO(b/69670845): Remove the 'Local' from the name when LocalClient does + // not inherit from Client and there is no possibility of confusion with + // Client::TransferFromOutfeed. + StatusOr> TransferFromOutfeedLocal( + const Shape& shape, int device_ordinal); + // Returns the platform that the underlying service targets. perftools::gputools::Platform* platform() const; diff --git a/tensorflow/compiler/xla/tests/local_client_execute_test.cc b/tensorflow/compiler/xla/tests/local_client_execute_test.cc index fbf9739dbc..ad71d40197 100644 --- a/tensorflow/compiler/xla/tests/local_client_execute_test.cc +++ b/tensorflow/compiler/xla/tests/local_client_execute_test.cc @@ -874,11 +874,13 @@ XLA_TEST_F(LocalClientExecuteTest, tensorflow::ThreadOptions(), "execute_thread", [&] { ExecuteLocallyOrDie(builder.Build().ValueOrDie(), {}); })); - ASSERT_IS_OK(local_client_->TransferToInfeed( - *Literal::CreateR1({-5.0, 123.0, 42.0}))); + ASSERT_IS_OK(local_client_->TransferToInfeedLocal( + *Literal::CreateR1({-5.0, 123.0, 42.0}), + local_client_->default_device_ordinal())); TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result, - local_client_->TransferFromOutfeed(&shape)); + local_client_->TransferFromOutfeedLocal( + shape, local_client_->default_device_ordinal())); LiteralTestUtil::ExpectR1Equal({-4.0, 125.0, 45.0}, *result); } -- GitLab From b8f4d5b410676659da25355e1e76ec6f70522302 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 22 Nov 2017 10:29:08 -0800 Subject: [PATCH 0757/1801] [tf.data] Remove unused headers from dataset.h. This change is part of a clean-up that will reduce the dependencies of the C++ `tensorflow::Dataset` framework, and move towards the possibility of building custom datasets as external plugins. PiperOrigin-RevId: 176673196 --- tensorflow/core/kernels/dataset.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tensorflow/core/kernels/dataset.h b/tensorflow/core/kernels/dataset.h index 18b57ec97a..39c10163cf 100644 --- a/tensorflow/core/kernels/dataset.h +++ b/tensorflow/core/kernels/dataset.h @@ -17,22 +17,17 @@ limitations under the License. #include -#include "tensorflow/core/common_runtime/graph_runner.h" #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/attr_value_util.h" #include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/framework/variant_encode_decode.h" #include "tensorflow/core/framework/variant_tensor_data.h" -#include "tensorflow/core/graph/graph.h" -#include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/graph/graph_def_builder.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/tracing.h" -#include "tensorflow/core/util/tensor_bundle/naming.h" -#include "tensorflow/core/util/tensor_bundle/tensor_bundle.h" // Polymorphic datasets should support all primitive TensorFlow // types. Use this macro to expand `m(T)` once for each primitive type -- GitLab From 86f150908d9f8411159044f964f21faf30244183 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Nov 2017 10:51:01 -0800 Subject: [PATCH 0758/1801] Changed StringPiece::Hasher to StringPieceHasher in stringpiece_test. This will allow the Hasher alias to be removed from StringPiece. PiperOrigin-RevId: 176676125 --- tensorflow/core/lib/core/stringpiece_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/lib/core/stringpiece_test.cc b/tensorflow/core/lib/core/stringpiece_test.cc index a1d335c4e1..8f17b85b6d 100644 --- a/tensorflow/core/lib/core/stringpiece_test.cc +++ b/tensorflow/core/lib/core/stringpiece_test.cc @@ -66,7 +66,7 @@ TEST(StringPiece, Contains) { } TEST(StringPieceHasher, Equality) { - StringPiece::Hasher hasher; + StringPieceHasher hasher; StringPiece s1("foo"); StringPiece s2("bar"); -- GitLab From cd8ced7a2d48574908d2c9b7127960078cf41690 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Nov 2017 11:23:50 -0800 Subject: [PATCH 0759/1801] Enable deduping of Assert nodes. Add unit test for deduping Assert and CheckNumerics. PiperOrigin-RevId: 176680534 --- tensorflow/core/grappler/op_types.cc | 125 +++++------------- tensorflow/core/grappler/op_types.h | 1 + .../optimizers/arithmetic_optimizer.cc | 4 + .../optimizers/arithmetic_optimizer_test.cc | 32 +++++ 4 files changed, 67 insertions(+), 95 deletions(-) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index 1b23a4caba..1f18b56238 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -24,64 +24,40 @@ limitations under the License. namespace tensorflow { namespace grappler { -bool IsAdd(const NodeDef& node) { - const auto op = node.op(); - return op == "Add"; -} +bool IsAdd(const NodeDef& node) { return node.op() == "Add"; } -bool IsAddN(const NodeDef& node) { - const auto op = node.op(); - return op == "AddN"; -} +bool IsAddN(const NodeDef& node) { return node.op() == "AddN"; } -bool IsAvgPoolGrad(const NodeDef& node) { - const auto op = node.op(); - return op == "AvgPoolGrad"; -} +bool IsAvgPoolGrad(const NodeDef& node) { return node.op() == "AvgPoolGrad"; } -bool IsBiasAddGrad(const NodeDef& node) { - const auto op = node.op(); - return op == "BiasAddGrad"; -} +bool IsAssert(const NodeDef& node) { return node.op() == "Assert"; } -bool IsConcatOffset(const NodeDef& node) { - const auto op = node.op(); - return op == "ConcatOffset"; -} +bool IsBiasAddGrad(const NodeDef& node) { return node.op() == "BiasAddGrad"; } -bool IsConstant(const NodeDef& node) { - const auto op = node.op(); - return op == "Const"; -} +bool IsConcatOffset(const NodeDef& node) { return node.op() == "ConcatOffset"; } -bool IsConv2D(const NodeDef& node) { - const auto op = node.op(); - return op == "Conv2D"; -} +bool IsConstant(const NodeDef& node) { return node.op() == "Const"; } + +bool IsConv2D(const NodeDef& node) { return node.op() == "Conv2D"; } bool IsConv2DBackpropFilter(const NodeDef& node) { - const auto op = node.op(); - return op == "Conv2DBackpropFilter"; + return node.op() == "Conv2DBackpropFilter"; } bool IsConv2DBackpropInput(const NodeDef& node) { - const auto op = node.op(); - return op == "Conv2DBackpropInput"; + return node.op() == "Conv2DBackpropInput"; } bool IsDepthwiseConv2dNative(const NodeDef& node) { - const auto op = node.op(); - return op == "DepthwiseConv2dNative"; + return node.op() == "DepthwiseConv2dNative"; } bool IsDepthwiseConv2dNativeBackpropFilter(const NodeDef& node) { - const auto op = node.op(); - return op == "DepthwiseConv2dNativeBackpropFilter"; + return node.op() == "DepthwiseConv2dNativeBackpropFilter"; } bool IsDepthwiseConv2dNativeBackpropInput(const NodeDef& node) { - const auto op = node.op(); - return op == "DepthwiseConv2dNativeBackpropInput"; + return node.op() == "DepthwiseConv2dNativeBackpropInput"; } bool IsDequeueOp(const NodeDef& node) { @@ -101,14 +77,10 @@ bool IsExit(const NodeDef& node) { return op == "Exit" || op == "RefExit"; } -bool IsFloorMod(const NodeDef& node) { - const auto& op = node.op(); - return op == "FloorMod"; -} +bool IsFloorMod(const NodeDef& node) { return node.op() == "FloorMod"; } bool IsFusedBatchNormGradV1(const NodeDef& node) { - const auto& op = node.op(); - return op == "FusedBatchNormGrad"; + return node.op() == "FusedBatchNormGrad"; } bool IsIdentity(const NodeDef& node) { @@ -121,25 +93,16 @@ bool IsMerge(const NodeDef& node) { return op == "Merge" || op == "RefMerge"; } -bool IsMul(const NodeDef& node) { - const auto op = node.op(); - return op == "Mul"; -} +bool IsMul(const NodeDef& node) { return node.op() == "Mul"; } -bool IsNoOp(const NodeDef& node) { - const auto op = node.op(); - return op == "NoOp"; -} +bool IsNoOp(const NodeDef& node) { return node.op() == "NoOp"; } bool IsNextIteration(const NodeDef& node) { const auto& op = node.op(); return op == "NextIteration" || op == "RefNextIteration"; } -bool IsPad(const NodeDef& node) { - const auto op = node.op(); - return op == "Pad"; -} +bool IsPad(const NodeDef& node) { return node.op() == "Pad"; } bool IsPlaceholder(const NodeDef& node) { const auto op = node.op(); @@ -147,20 +110,11 @@ bool IsPlaceholder(const NodeDef& node) { op == "PlaceholderWithDefault"; } -bool IsRealDiv(const NodeDef& node) { - const auto op = node.op(); - return op == "RealDiv"; -} +bool IsRealDiv(const NodeDef& node) { return node.op() == "RealDiv"; } -bool IsReluGrad(const NodeDef& node) { - const auto op = node.op(); - return op == "ReluGrad"; -} +bool IsReluGrad(const NodeDef& node) { return node.op() == "ReluGrad"; } -bool IsRecv(const NodeDef& node) { - const auto op = node.op(); - return op == "_Recv"; -} +bool IsRecv(const NodeDef& node) { return node.op() == "_Recv"; } bool IsReduction(const NodeDef& node) { const auto& op = node.op(); @@ -175,53 +129,34 @@ bool IsRestore(const NodeDef& node) { node.op() == "RestoreSlice"); } -bool IsSend(const NodeDef& node) { - const auto op = node.op(); - return op == "_Send"; -} +bool IsSend(const NodeDef& node) { return node.op() == "_Send"; } -bool IsSlice(const NodeDef& node) { - const auto op = node.op(); - return op == "Slice"; -} +bool IsSlice(const NodeDef& node) { return node.op() == "Slice"; } bool IsSquaredDifference(const NodeDef& node) { - const auto op = node.op(); - return op == "SquaredDifference"; + return node.op() == "SquaredDifference"; } -bool IsSqueeze(const NodeDef& node) { - const auto op = node.op(); - return op == "Squeeze"; -} +bool IsSqueeze(const NodeDef& node) { return node.op() == "Squeeze"; } bool IsStopGradient(const NodeDef& node) { const auto& op = node.op(); return op == "StopGradient" || op == "PreventGradient"; } -bool IsSub(const NodeDef& node) { - const auto op = node.op(); - return op == "Sub"; -} +bool IsSub(const NodeDef& node) { return node.op() == "Sub"; } -bool IsSum(const NodeDef& node) { - const auto op = node.op(); - return op == "Sum"; -} +bool IsSum(const NodeDef& node) { return node.op() == "Sum"; } bool IsSwitch(const NodeDef& node) { const auto& op = node.op(); return op == "Switch" || op == "RefSwitch"; } -bool IsTranspose(const NodeDef& node) { - const auto op = node.op(); - return op == "Transpose"; -} +bool IsTranspose(const NodeDef& node) { return node.op() == "Transpose"; } bool IsVariable(const NodeDef& node) { - const auto op = node.op(); + const auto& op = node.op(); return op == "Variable" || op == "VariableV2" || op == "AutoReloadVariable" || op == "VarHandleOp" || op == "ReadVariableOp"; } diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index 85260efa93..66ff7a88c5 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -25,6 +25,7 @@ namespace grappler { bool IsAdd(const NodeDef& node); bool IsAddN(const NodeDef& node); bool IsAvgPoolGrad(const NodeDef& node); +bool IsAssert(const NodeDef& node); bool IsBiasAddGrad(const NodeDef& node); bool IsConcatOffset(const NodeDef& node); bool IsConstant(const NodeDef& node); diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 6c1770f0b0..d9a544d21f 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -449,6 +449,10 @@ bool ArithmeticOptimizer::CanDedup(const NodeDef& node) const { if (node.device().find("SPU") != string::npos) { return false; } + // Workaround for Assert mistakenly being labeled as stateful. + if (IsAssert(node)) { + return true; + } return IsFreeOfSideEffect(node); } diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 354a306905..8cec4e4255 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -81,6 +81,38 @@ TEST_F(ArithmeticOptimizerTest, OpDedupping) { EXPECT_EQ("c1", new_mul.input(1)); } +TEST_F(ArithmeticOptimizerTest, OpDeduppingAssertAndCheckNumerics) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output p = ops::Placeholder(s, DT_BOOL, ops::Placeholder::Shape({})); + Output c = ops::Const(s.WithOpName("c"), {3.14, 2.7}, {1, 2}); + auto check1 = ops::CheckNumerics(s.WithOpName("check1"), c, "foo"); + auto check2 = ops::CheckNumerics(s.WithOpName("check2"), c, "foo"); + auto assert1 = ops::Assert(s.WithOpName("assert1"), p, {c}); + auto assert2 = ops::Assert(s.WithOpName("assert2"), p, {c}); + Output mul = ops::Multiply(s.WithOpName("mul").WithControlDependencies( + {assert1.operation, assert2.operation}), + check1, check2); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + ArithmeticOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + EXPECT_EQ(5, output.node_size()); + const NodeDef& new_mul = output.node(3); + EXPECT_EQ(4, new_mul.input_size()); + EXPECT_EQ("check1", new_mul.input(0)); + EXPECT_EQ("check1", new_mul.input(1)); + EXPECT_EQ("^assert1", new_mul.input(2)); + EXPECT_EQ("^assert1", new_mul.input(3)); +} + TEST_F(ArithmeticOptimizerTest, OpDedupCommutative) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output c1 = ops::Const(s.WithOpName("c1"), {1.0f, 2.0f}, {1, 2}); -- GitLab From b8406da50df94dc17114c10d472a2058ff75b2d2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Nov 2017 11:25:32 -0800 Subject: [PATCH 0760/1801] Make drop_control_dependency a TocoFlag, not a ModelFlag. PiperOrigin-RevId: 176680726 --- tensorflow/contrib/lite/python/lite.py | 2 +- tensorflow/contrib/lite/toco/args.h | 2 +- .../contrib/lite/toco/import_tensorflow.cc | 508 +++++++++++------- .../contrib/lite/toco/import_tensorflow.h | 12 +- .../contrib/lite/toco/model_cmdline_flags.cc | 8 - .../contrib/lite/toco/model_flags.proto | 4 - .../contrib/lite/toco/toco_cmdline_flags.cc | 8 + tensorflow/contrib/lite/toco/toco_flags.proto | 10 +- tensorflow/contrib/lite/toco/toco_tooling.cc | 11 +- tensorflow/contrib/lite/toco/tooling_util.cc | 1 - 10 files changed, 350 insertions(+), 216 deletions(-) diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py index 3cfee443e5..0fd70f842b 100644 --- a/tensorflow/contrib/lite/python/lite.py +++ b/tensorflow/contrib/lite/python/lite.py @@ -164,8 +164,8 @@ def toco_convert(input_data, toco = _toco_flags_pb2.TocoFlags() toco.input_format = input_format toco.output_format = output_format + toco.drop_control_dependency = drop_control_dependency model = _model_flags_pb2.ModelFlags() - model.drop_control_dependency = drop_control_dependency toco.inference_type = inference_type for idx, input_tensor in enumerate(input_tensors): if input_tensor.dtype == _dtypes.float32: diff --git a/tensorflow/contrib/lite/toco/args.h b/tensorflow/contrib/lite/toco/args.h index 88e0a29350..5268902346 100644 --- a/tensorflow/contrib/lite/toco/args.h +++ b/tensorflow/contrib/lite/toco/args.h @@ -194,7 +194,6 @@ struct ParsedModelFlags { Arg input_data_type; Arg input_data_types; Arg variable_batch = Arg(false); - Arg drop_control_dependency = Arg(false); Arg input_shape; Arg rnn_states; Arg model_checks; @@ -224,6 +223,7 @@ struct ParsedTocoFlags { // Deprecated flags Arg input_type; Arg input_types; + Arg drop_control_dependency = Arg(false); }; } // namespace toco diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index f2dc526a36..1f959600f3 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/contrib/lite/toco/import_tensorflow.h" + #include #include #include @@ -23,6 +25,7 @@ limitations under the License. #include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" +//#include "absl/strings/string_view_utils.h" #include "absl/strings/strip.h" #include "tensorflow/contrib/lite/toco/model.h" #include "tensorflow/contrib/lite/toco/model_flags.pb.h" @@ -234,14 +237,14 @@ void ImportInt64Array(const TensorProto& input_tensor, Array* output_array) { } } -// Count the number of inputs of a given node. If `drop_control_dependency` is -// true, count the number of non-control-dependency inputs. -size_t GetInputsCount(const NodeDef& node, bool drop_control_dependency) { - if (drop_control_dependency) { +// Count the number of inputs of a given node. If +// `tf_import_flags.drop_control_dependency` is true, count the number of +// non-control-dependency inputs. +int GetInputsCount(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags) { + if (tf_import_flags.drop_control_dependency) { for (size_t i = 0; i < node.input_size(); ++i) { if (node.input(i)[0] == '^') { - LOG(INFO) << "Reached first control dependency input: " - << node.input(i); return i; } } @@ -251,7 +254,9 @@ size_t GetInputsCount(const NodeDef& node, bool drop_control_dependency) { } } -void ConvertConstOperator(const NodeDef& node, Model* model) { +void ConvertConstOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Const"); const auto& tensor = GetTensorAttr(node, "value"); const auto dtype = GetDataTypeAttr(node, "dtype"); @@ -277,9 +282,11 @@ void ConvertConstOperator(const NodeDef& node, Model* model) { } } -void ConvertConvOperator(const NodeDef& node, Model* model) { +void ConvertConvOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Conv2D"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); // We only support NHWC, which is the default data_format. // So if data_format is not defined, we're all good. @@ -328,9 +335,11 @@ void ConvertConvOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(conv); } -void ConvertDepthwiseConvOperator(const NodeDef& node, Model* model) { +void ConvertDepthwiseConvOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "DepthwiseConv2dNative"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); // We only support NHWC, which is the default data_format. // So if data_format is not defined, we're all good. @@ -379,9 +388,11 @@ void ConvertDepthwiseConvOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(conv); } -void ConvertDepthToSpaceOperator(const NodeDef& node, Model* model) { +void ConvertDepthToSpaceOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "DepthToSpace"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 1); CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT); auto* op = new DepthToSpaceOperator; op->inputs.push_back(node.input(0)); @@ -391,9 +402,11 @@ void ConvertDepthToSpaceOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertSpaceToDepthOperator(const NodeDef& node, Model* model) { +void ConvertSpaceToDepthOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "SpaceToDepth"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 1); CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT); auto* op = new SpaceToDepthOperator; op->inputs.push_back(node.input(0)); @@ -403,9 +416,11 @@ void ConvertSpaceToDepthOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertBiasAddOperator(const NodeDef& node, Model* model) { +void ConvertBiasAddOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "BiasAdd"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); const auto& input_name = node.input(0); const auto& bias_name = node.input(1); CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT); @@ -416,9 +431,11 @@ void ConvertBiasAddOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(biasadd); } -void ConvertReluOperator(const NodeDef& node, Model* model) { +void ConvertReluOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Relu"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 1); const auto& input_name = node.input(0); auto* relu = new ReluOperator; relu->inputs.push_back(input_name); @@ -426,9 +443,11 @@ void ConvertReluOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(relu); } -void ConvertRelu6Operator(const NodeDef& node, Model* model) { +void ConvertRelu6Operator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Relu6"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 1); const auto& input_name = node.input(0); auto* op = new Relu6Operator; op->inputs.push_back(input_name); @@ -436,9 +455,11 @@ void ConvertRelu6Operator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertLogisticOperator(const NodeDef& node, Model* model) { +void ConvertLogisticOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Sigmoid"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 1); const auto& input_name = node.input(0); auto* op = new LogisticOperator; op->inputs.push_back(input_name); @@ -446,9 +467,11 @@ void ConvertLogisticOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertTanhOperator(const NodeDef& node, Model* model) { +void ConvertTanhOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Tanh"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 1); const auto& input_name = node.input(0); auto* op = new TanhOperator; op->inputs.push_back(input_name); @@ -456,9 +479,11 @@ void ConvertTanhOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertDivOperator(const NodeDef& node, Model* model) { +void ConvertDivOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK(node.op() == "Div" || node.op() == "RealDiv"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); auto* op = new DivOperator; op->inputs.push_back(node.input(0)); op->inputs.push_back(node.input(1)); @@ -466,7 +491,9 @@ void ConvertDivOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertIdentityOperator(const NodeDef& node, Model* model) { +void ConvertIdentityOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK(node.op() == "Identity" || node.op() == "CheckNumerics" || node.op() == "PlaceholderWithDefault"); auto* op = new TensorFlowIdentityOperator; @@ -482,9 +509,11 @@ void ConvertIdentityOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertFakeQuantWithMinMaxArgs(const NodeDef& node, Model* model) { +void ConvertFakeQuantWithMinMaxArgs( + const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "FakeQuantWithMinMaxArgs"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 1); auto* op = new FakeQuantOperator; op->inputs.push_back(node.input(0)); op->minmax.reset(new MinMax); @@ -495,10 +524,11 @@ void ConvertFakeQuantWithMinMaxArgs(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertFakeQuantWithMinMaxVars(const NodeDef& node, Model* model) { +void ConvertFakeQuantWithMinMaxVars( + const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "FakeQuantWithMinMaxVars"); - const int num_inputs = - GetInputsCount(node, model->flags.drop_control_dependency()); + const int num_inputs = GetInputsCount(node, tf_import_flags); CHECK(num_inputs == 3 || num_inputs == 4); auto* op = new FakeQuantOperator; for (int i = 0; i < 3; i++) { @@ -508,27 +538,33 @@ void ConvertFakeQuantWithMinMaxVars(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertRsqrtOperator(const NodeDef& node, Model* model) { +void ConvertRsqrtOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Rsqrt"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 1); auto* op = new TensorFlowRsqrtOperator; op->inputs.push_back(node.input(0)); op->outputs.push_back(node.name()); model->operators.emplace_back(op); } -void ConvertSqrtOperator(const NodeDef& node, Model* model) { +void ConvertSqrtOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Sqrt"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 1); auto* op = new TensorFlowSqrtOperator; op->inputs.push_back(node.input(0)); op->outputs.push_back(node.name()); model->operators.emplace_back(op); } -void ConvertSqueezeOperator(const NodeDef& node, Model* model) { +void ConvertSqueezeOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Squeeze"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 1); auto* op = new SqueezeOperator; op->inputs.push_back(node.input(0)); op->outputs.push_back(node.name()); @@ -541,18 +577,22 @@ void ConvertSqueezeOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertSquareOperator(const NodeDef& node, Model* model) { +void ConvertSquareOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Square"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 1); auto* op = new TensorFlowSquareOperator; op->inputs.push_back(node.input(0)); op->outputs.push_back(node.name()); model->operators.emplace_back(op); } -void ConvertAddOperator(const NodeDef& node, Model* model) { +void ConvertAddOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Add"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); auto* op = new AddOperator; op->inputs.push_back(node.input(0)); op->inputs.push_back(node.input(1)); @@ -560,9 +600,11 @@ void ConvertAddOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertMulOperator(const NodeDef& node, Model* model) { +void ConvertMulOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Mul"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); auto* op = new MulOperator; op->inputs.push_back(node.input(0)); op->inputs.push_back(node.input(1)); @@ -570,9 +612,11 @@ void ConvertMulOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertSubOperator(const NodeDef& node, Model* model) { +void ConvertSubOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Sub"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); auto* op = new SubOperator; op->inputs.push_back(node.input(0)); op->inputs.push_back(node.input(1)); @@ -580,9 +624,11 @@ void ConvertSubOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertSumOperator(const NodeDef& node, Model* model) { +void ConvertSumOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Sum"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); auto* op = new TensorFlowSumOperator; op->inputs.push_back(node.input(0)); op->inputs.push_back(node.input(1)); @@ -593,9 +639,11 @@ void ConvertSumOperator(const NodeDef& node, Model* model) { } } -void ConvertTileOperator(const NodeDef& node, Model* model) { +void ConvertTileOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Tile"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); auto* op = new TensorFlowTileOperator; op->inputs.push_back(node.input(0)); op->inputs.push_back(node.input(1)); @@ -603,9 +651,11 @@ void ConvertTileOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertSliceOperator(const NodeDef& node, Model* model) { +void ConvertSliceOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Slice"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 3); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 3); auto* op = new SliceOperator; for (int i = 0; i < 3; ++i) { op->inputs.push_back(node.input(i)); @@ -614,9 +664,11 @@ void ConvertSliceOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertPadOperator(const NodeDef& node, Model* model) { +void ConvertPadOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Pad"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); auto* op = new PadOperator; op->inputs.push_back(node.input(0)); op->inputs.push_back(node.input(1)); @@ -624,18 +676,22 @@ void ConvertPadOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertShapeOperator(const NodeDef& node, Model* model) { +void ConvertShapeOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Shape"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 1); auto* op = new TensorFlowShapeOperator; op->inputs.push_back(node.input(0)); op->outputs.push_back(node.name()); model->operators.emplace_back(op); } -void ConvertSplitOperator(const NodeDef& node, Model* model) { +void ConvertSplitOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Split"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); auto* op = new TensorFlowSplitOperator; op->inputs.push_back(node.input(0)); op->inputs.push_back(node.input(1)); @@ -648,9 +704,11 @@ void ConvertSplitOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertMergeOperator(const NodeDef& node, Model* model) { +void ConvertMergeOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Merge"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); auto* op = new TensorFlowMergeOperator; op->inputs.push_back(node.input(0)); op->inputs.push_back(node.input(1)); @@ -658,9 +716,11 @@ void ConvertMergeOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertSwitchOperator(const NodeDef& node, Model* model) { +void ConvertSwitchOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Switch"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); auto* op = new TensorFlowSwitchOperator; op->inputs.push_back(node.input(0)); op->inputs.push_back(node.input(1)); @@ -669,9 +729,11 @@ void ConvertSwitchOperator(const NodeDef& node, Model* model) { op->outputs.push_back(node.name() + ":1"); model->operators.emplace_back(op); } -void ConvertSoftmaxOperator(const NodeDef& node, Model* model) { +void ConvertSoftmaxOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Softmax"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 1); const auto& input_name = node.input(0); auto* softmax = new SoftmaxOperator; softmax->inputs.push_back(input_name); @@ -682,9 +744,11 @@ void ConvertSoftmaxOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(softmax); } -void ConvertLRNOperator(const NodeDef& node, Model* model) { +void ConvertLRNOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "LRN"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 1); const auto& input_name = node.input(0); auto* lrn = new LocalResponseNormalizationOperator; lrn->inputs.push_back(input_name); @@ -696,9 +760,11 @@ void ConvertLRNOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(lrn); } -void ConvertMaxPoolOperator(const NodeDef& node, Model* model) { +void ConvertMaxPoolOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "MaxPool"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 1); const auto& input_name = node.input(0); // We only support NHWC, which is the default data_format. // So if data_format is not defined, we're all good. @@ -736,9 +802,11 @@ void ConvertMaxPoolOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(maxpool); } -void ConvertAvgPoolOperator(const NodeDef& node, Model* model) { +void ConvertAvgPoolOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "AvgPool"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 1); const auto& input_name = node.input(0); // We only support NHWC, which is the default data_format. // So if data_format is not defined, we're all good. @@ -772,9 +840,11 @@ void ConvertAvgPoolOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(avgpool); } -void ConvertReshapeOperator(const NodeDef& node, Model* model) { +void ConvertReshapeOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Reshape"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); auto* op = new TensorFlowReshapeOperator; op->inputs.push_back(node.input(0)); op->inputs.push_back(node.input(1)); @@ -782,9 +852,11 @@ void ConvertReshapeOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertMatMulOperator(const NodeDef& node, Model* model) { +void ConvertMatMulOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "MatMul"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); // Transpose flags should be easy to support, but we don't have a // GraphDef with them to test on at the moment. CHECK_EQ(GetBoolAttr(node, "transpose_a"), false); @@ -815,7 +887,9 @@ void ConvertMatMulOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(matmul); } -void ConvertConcatOperator(const NodeDef& node, Model* model) { +void ConvertConcatOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { Operator* op = nullptr; if (node.op() == "Concat") { op = new TensorFlowConcatOperator; @@ -824,8 +898,7 @@ void ConvertConcatOperator(const NodeDef& node, Model* model) { } else { LOG(FATAL) << "Expected Concat or ConcatV2"; } - const int num_inputs = - GetInputsCount(node, model->flags.drop_control_dependency()); + const int num_inputs = GetInputsCount(node, tf_import_flags); CHECK_GE(num_inputs, 2); CHECK_EQ(num_inputs, 1 + GetIntAttr(node, "N")); for (int i = 0; i < num_inputs; ++i) { @@ -835,11 +908,12 @@ void ConvertConcatOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertAllOperator(const NodeDef& node, Model* model) { +void ConvertAllOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "All"); auto* op = new TensorFlowAllOperator; - const int num_inputs = - GetInputsCount(node, model->flags.drop_control_dependency()); + const int num_inputs = GetInputsCount(node, tf_import_flags); for (int i = 0; i < num_inputs; ++i) { op->inputs.push_back(node.input(i)); } @@ -847,11 +921,12 @@ void ConvertAllOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertAssertOperator(const NodeDef& node, Model* model) { +void ConvertAssertOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Assert"); auto* op = new TensorFlowAssertOperator; - const int num_inputs = - GetInputsCount(node, model->flags.drop_control_dependency()); + const int num_inputs = GetInputsCount(node, tf_import_flags); for (int i = 0; i < num_inputs; ++i) { op->inputs.push_back(node.input(i)); } @@ -859,11 +934,12 @@ void ConvertAssertOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertLessOperator(const NodeDef& node, Model* model) { +void ConvertLessOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Less"); auto* op = new TensorFlowLessOperator; - const int num_inputs = - GetInputsCount(node, model->flags.drop_control_dependency()); + const int num_inputs = GetInputsCount(node, tf_import_flags); for (int i = 0; i < num_inputs; ++i) { op->inputs.push_back(node.input(i)); } @@ -871,11 +947,12 @@ void ConvertLessOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertLessEqualOperator(const NodeDef& node, Model* model) { +void ConvertLessEqualOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "LessEqual"); auto* op = new TensorFlowLessEqualOperator; - const int num_inputs = - GetInputsCount(node, model->flags.drop_control_dependency()); + const int num_inputs = GetInputsCount(node, tf_import_flags); for (int i = 0; i < num_inputs; ++i) { op->inputs.push_back(node.input(i)); } @@ -883,11 +960,12 @@ void ConvertLessEqualOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertGreaterOperator(const NodeDef& node, Model* model) { +void ConvertGreaterOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Greater"); auto* op = new TensorFlowGreaterOperator; - const int num_inputs = - GetInputsCount(node, model->flags.drop_control_dependency()); + const int num_inputs = GetInputsCount(node, tf_import_flags); for (int i = 0; i < num_inputs; ++i) { op->inputs.push_back(node.input(i)); } @@ -895,11 +973,12 @@ void ConvertGreaterOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertGreaterEqualOperator(const NodeDef& node, Model* model) { +void ConvertGreaterEqualOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "GreaterEqual"); auto* op = new TensorFlowGreaterEqualOperator; - const int num_inputs = - GetInputsCount(node, model->flags.drop_control_dependency()); + const int num_inputs = GetInputsCount(node, tf_import_flags); for (int i = 0; i < num_inputs; ++i) { op->inputs.push_back(node.input(i)); } @@ -907,9 +986,11 @@ void ConvertGreaterEqualOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertMaxOperator(const NodeDef& node, Model* model) { +void ConvertMaxOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Max"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); auto* op = new TensorFlowMaxOperator; op->inputs.push_back(node.input(0)); op->inputs.push_back(node.input(1)); @@ -920,9 +1001,11 @@ void ConvertMaxOperator(const NodeDef& node, Model* model) { } } -void ConvertMinOperator(const NodeDef& node, Model* model) { +void ConvertMinOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Min"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); auto* op = new TensorFlowMinOperator; op->inputs.push_back(node.input(0)); op->inputs.push_back(node.input(1)); @@ -933,9 +1016,11 @@ void ConvertMinOperator(const NodeDef& node, Model* model) { } } -void ConvertMaximumOperator(const NodeDef& node, Model* model) { +void ConvertMaximumOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Maximum"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); auto* op = new TensorFlowMaximumOperator; op->inputs.push_back(node.input(0)); op->inputs.push_back(node.input(1)); @@ -943,9 +1028,11 @@ void ConvertMaximumOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertMinimumOperator(const NodeDef& node, Model* model) { +void ConvertMinimumOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Minimum"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); auto* op = new TensorFlowMinimumOperator; op->inputs.push_back(node.input(0)); op->inputs.push_back(node.input(1)); @@ -953,11 +1040,12 @@ void ConvertMinimumOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertUnsupportedOperator(const NodeDef& node, Model* model) { +void ConvertUnsupportedOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { LOG(INFO) << "Converting unsupported operation: " << node.op(); auto* op = new TensorFlowUnsupportedOperator; - const int num_inputs = - GetInputsCount(node, model->flags.drop_control_dependency()); + const int num_inputs = GetInputsCount(node, tf_import_flags); for (int i = 0; i < num_inputs; ++i) { op->inputs.push_back(node.input(i)); } @@ -976,7 +1064,9 @@ void ConvertUnsupportedOperator(const NodeDef& node, Model* model) { } } -void ConvertStridedSliceOperator(const NodeDef& node, Model* model) { +void ConvertStridedSliceOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "StridedSlice"); CHECK_EQ(node.input_size(), 4); @@ -991,7 +1081,7 @@ void ConvertStridedSliceOperator(const NodeDef& node, Model* model) { // Only 4D tensors are supported. GetIntAttr(node, "begin_mask") > 15 || GetIntAttr(node, "end_mask") > 15) { - ConvertUnsupportedOperator(node, model); + ConvertUnsupportedOperator(node, tf_import_flags, model); return; } @@ -1009,10 +1099,12 @@ void ConvertStridedSliceOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertPlaceholderOperator(const NodeDef& node, Model* model) { +void ConvertPlaceholderOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK(node.op() == "Placeholder" || node.op() == "LegacyFedInput"); if (node.op() == "Placeholder") { - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 0); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 0); } auto& array = model->GetOrCreateArray(node.name()); if (node.attr().count("dtype")) { @@ -1039,7 +1131,9 @@ void ConvertPlaceholderOperator(const NodeDef& node, Model* model) { } } -void ConvertNoOpOperator(const NodeDef& node, Model* model) {} +void ConvertNoOpOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) {} ArrayDataType GetArrayDataType(tensorflow::DataType tf_data_type) { if (tf_data_type == DT_UINT8) { @@ -1053,9 +1147,11 @@ ArrayDataType GetArrayDataType(tensorflow::DataType tf_data_type) { } } -void ConvertCastOperator(const NodeDef& node, Model* model) { +void ConvertCastOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Cast"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 1); const auto tf_src_dtype = GetDataTypeAttr(node, "SrcT"); const auto tf_dst_dtype = GetDataTypeAttr(node, "DstT"); CHECK(tf_src_dtype == DT_UINT8 || tf_src_dtype == DT_INT32 || @@ -1072,9 +1168,11 @@ void ConvertCastOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertFloorOperator(const NodeDef& node, Model* model) { +void ConvertFloorOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Floor"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 1); const auto data_type = GetDataTypeAttr(node, "T"); CHECK(data_type == DT_FLOAT); auto* op = new FloorOperator; @@ -1083,9 +1181,11 @@ void ConvertFloorOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertGatherOperator(const NodeDef& node, Model* model) { +void ConvertGatherOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Gather"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); const auto indices_data_type = GetDataTypeAttr(node, "Tindices"); CHECK(indices_data_type == DT_INT32); auto* op = new GatherOperator; @@ -1095,9 +1195,11 @@ void ConvertGatherOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertResizeBilinearOperator(const NodeDef& node, Model* model) { +void ConvertResizeBilinearOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "ResizeBilinear"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 2); auto* op = new ResizeBilinearOperator; op->inputs.push_back(node.input(0)); op->inputs.push_back(node.input(1)); @@ -1105,10 +1207,11 @@ void ConvertResizeBilinearOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertBatchNormWithGlobalNormalizationOperator(const NodeDef& node, - Model* model) { +void ConvertBatchNormWithGlobalNormalizationOperator( + const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "BatchNormWithGlobalNormalization"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 5); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 5); // TODO(ahentz): to really match tensorflow we need to add variance_epsilon // to the input, before feeding it into TensorFlowRsqrtOperator. @@ -1153,7 +1256,9 @@ void ConvertBatchNormWithGlobalNormalizationOperator(const NodeDef& node, model->operators.emplace_back(op); } -void ConvertFusedBatchNormOperator(const NodeDef& node, Model* model) { +void ConvertFusedBatchNormOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "FusedBatchNorm"); CHECK_EQ(node.input_size(), 5); @@ -1207,9 +1312,11 @@ void ConvertFusedBatchNormOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertSpaceToBatchNDOperator(const NodeDef& node, Model* model) { +void ConvertSpaceToBatchNDOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "SpaceToBatchND"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 3); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 3); CHECK_EQ(GetDataTypeAttr(node, "Tblock_shape"), DT_INT32); CHECK_EQ(GetDataTypeAttr(node, "Tpaddings"), DT_INT32); auto* op = new SpaceToBatchNDOperator; @@ -1220,9 +1327,11 @@ void ConvertSpaceToBatchNDOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertBatchToSpaceNDOperator(const NodeDef& node, Model* model) { +void ConvertBatchToSpaceNDOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "BatchToSpaceND"); - CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 3); + CHECK_EQ(GetInputsCount(node, tf_import_flags), 3); CHECK_EQ(GetDataTypeAttr(node, "Tblock_shape"), DT_INT32); CHECK_EQ(GetDataTypeAttr(node, "Tcrops"), DT_INT32); auto* op = new BatchToSpaceNDOperator; @@ -1233,7 +1342,9 @@ void ConvertBatchToSpaceNDOperator(const NodeDef& node, Model* model) { model->operators.emplace_back(op); } -void ConvertMeanOperator(const NodeDef& node, Model* model) { +void ConvertMeanOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Mean"); CHECK_EQ(node.input_size(), 2); auto* op = new MeanOperator; @@ -1246,7 +1357,9 @@ void ConvertMeanOperator(const NodeDef& node, Model* model) { } } -void ConvertSvdfOperator(const NodeDef& node, Model* model) { +void ConvertSvdfOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Svdf"); bool has_bias = (node.input_size() == 4); auto* op = new SvdfOperator; @@ -1367,8 +1480,9 @@ bool InlineAllFunctions(GraphDef* graphdef) { } } // namespace -std::unique_ptr ImportTensorFlowGraphDef(const ModelFlags& model_flags, - const GraphDef& tf_graph) { +std::unique_ptr ImportTensorFlowGraphDef( + const ModelFlags& model_flags, const TensorFlowImportFlags& tf_import_flags, + const GraphDef& tf_graph) { LogDumpGraphDef(kLogLevelModelChanged, "AT IMPORT", tf_graph); GraphDef inlined_graph(tf_graph); @@ -1393,129 +1507,130 @@ std::unique_ptr ImportTensorFlowGraphDef(const ModelFlags& model_flags, for (auto node : inlined_graph.node()) { StripZeroOutputIndexFromInputs(&node); if (node.op() == "Const") { - ConvertConstOperator(node, model); + ConvertConstOperator(node, tf_import_flags, model); } else if (node.op() == "Conv2D") { - ConvertConvOperator(node, model); + ConvertConvOperator(node, tf_import_flags, model); } else if (node.op() == "DepthwiseConv2dNative") { - ConvertDepthwiseConvOperator(node, model); + ConvertDepthwiseConvOperator(node, tf_import_flags, model); } else if (node.op() == "DepthToSpace") { - ConvertDepthToSpaceOperator(node, model); + ConvertDepthToSpaceOperator(node, tf_import_flags, model); } else if (node.op() == "SpaceToDepth") { - ConvertSpaceToDepthOperator(node, model); + ConvertSpaceToDepthOperator(node, tf_import_flags, model); } else if (node.op() == "BiasAdd") { - ConvertBiasAddOperator(node, model); + ConvertBiasAddOperator(node, tf_import_flags, model); } else if (node.op() == "Relu") { - ConvertReluOperator(node, model); + ConvertReluOperator(node, tf_import_flags, model); } else if (node.op() == "Relu6") { - ConvertRelu6Operator(node, model); + ConvertRelu6Operator(node, tf_import_flags, model); } else if (node.op() == "Sigmoid") { - ConvertLogisticOperator(node, model); + ConvertLogisticOperator(node, tf_import_flags, model); } else if (node.op() == "Tanh") { - ConvertTanhOperator(node, model); + ConvertTanhOperator(node, tf_import_flags, model); } else if (node.op() == "MaxPool") { - ConvertMaxPoolOperator(node, model); + ConvertMaxPoolOperator(node, tf_import_flags, model); } else if (node.op() == "AvgPool") { - ConvertAvgPoolOperator(node, model); + ConvertAvgPoolOperator(node, tf_import_flags, model); } else if (node.op() == "Reshape") { - ConvertReshapeOperator(node, model); + ConvertReshapeOperator(node, tf_import_flags, model); } else if (node.op() == "MatMul") { - ConvertMatMulOperator(node, model); + ConvertMatMulOperator(node, tf_import_flags, model); } else if (node.op() == "Div" || node.op() == "RealDiv") { - ConvertDivOperator(node, model); + ConvertDivOperator(node, tf_import_flags, model); } else if (node.op() == "Identity" || node.op() == "CheckNumerics") { - ConvertIdentityOperator(node, model); + ConvertIdentityOperator(node, tf_import_flags, model); } else if (node.op() == "FakeQuantWithMinMaxVars") { - ConvertFakeQuantWithMinMaxVars(node, model); + ConvertFakeQuantWithMinMaxVars(node, tf_import_flags, model); } else if (node.op() == "FakeQuantWithMinMaxArgs") { - ConvertFakeQuantWithMinMaxArgs(node, model); + ConvertFakeQuantWithMinMaxArgs(node, tf_import_flags, model); } else if (node.op() == "Rsqrt") { - ConvertRsqrtOperator(node, model); + ConvertRsqrtOperator(node, tf_import_flags, model); } else if (node.op() == "Squeeze") { - ConvertSqueezeOperator(node, model); + ConvertSqueezeOperator(node, tf_import_flags, model); } else if (node.op() == "Sqrt") { - ConvertSqrtOperator(node, model); + ConvertSqrtOperator(node, tf_import_flags, model); } else if (node.op() == "Square") { - ConvertSquareOperator(node, model); + ConvertSquareOperator(node, tf_import_flags, model); } else if (node.op() == "Add") { - ConvertAddOperator(node, model); + ConvertAddOperator(node, tf_import_flags, model); } else if (node.op() == "Mul") { - ConvertMulOperator(node, model); + ConvertMulOperator(node, tf_import_flags, model); } else if (node.op() == "Sub") { - ConvertSubOperator(node, model); + ConvertSubOperator(node, tf_import_flags, model); } else if (node.op() == "Sum") { - ConvertSumOperator(node, model); + ConvertSumOperator(node, tf_import_flags, model); } else if (node.op() == "Tile") { - ConvertTileOperator(node, model); + ConvertTileOperator(node, tf_import_flags, model); } else if (node.op() == "Concat" || node.op() == "ConcatV2") { - ConvertConcatOperator(node, model); + ConvertConcatOperator(node, tf_import_flags, model); } else if (node.op() == "LRN") { - ConvertLRNOperator(node, model); + ConvertLRNOperator(node, tf_import_flags, model); } else if (node.op() == "Softmax") { - ConvertSoftmaxOperator(node, model); + ConvertSoftmaxOperator(node, tf_import_flags, model); } else if (node.op() == "All") { - ConvertAllOperator(node, model); + ConvertAllOperator(node, tf_import_flags, model); } else if (node.op() == "Assert") { - ConvertAssertOperator(node, model); + ConvertAssertOperator(node, tf_import_flags, model); } else if (node.op() == "Less") { - ConvertLessOperator(node, model); + ConvertLessOperator(node, tf_import_flags, model); } else if (node.op() == "LessEqual") { - ConvertLessEqualOperator(node, model); + ConvertLessEqualOperator(node, tf_import_flags, model); } else if (node.op() == "Greater") { - ConvertGreaterOperator(node, model); + ConvertGreaterOperator(node, tf_import_flags, model); } else if (node.op() == "GreaterEqual") { - ConvertGreaterEqualOperator(node, model); + ConvertGreaterEqualOperator(node, tf_import_flags, model); } else if (node.op() == "Max") { - ConvertMaxOperator(node, model); + ConvertMaxOperator(node, tf_import_flags, model); } else if (node.op() == "Min") { - ConvertMinOperator(node, model); + ConvertMinOperator(node, tf_import_flags, model); } else if (node.op() == "Maximum") { - ConvertMaximumOperator(node, model); + ConvertMaximumOperator(node, tf_import_flags, model); } else if (node.op() == "Minimum") { - ConvertMinimumOperator(node, model); + ConvertMinimumOperator(node, tf_import_flags, model); } else if (node.op() == "Merge") { - ConvertMergeOperator(node, model); + ConvertMergeOperator(node, tf_import_flags, model); } else if (node.op() == "Pad") { - ConvertPadOperator(node, model); + ConvertPadOperator(node, tf_import_flags, model); } else if (node.op() == "StridedSlice") { - ConvertStridedSliceOperator(node, model); + ConvertStridedSliceOperator(node, tf_import_flags, model); } else if (node.op() == "Shape") { - ConvertShapeOperator(node, model); + ConvertShapeOperator(node, tf_import_flags, model); } else if (node.op() == "Slice") { - ConvertSliceOperator(node, model); + ConvertSliceOperator(node, tf_import_flags, model); } else if (node.op() == "Split") { - ConvertSplitOperator(node, model); + ConvertSplitOperator(node, tf_import_flags, model); } else if (node.op() == "Switch") { - ConvertSwitchOperator(node, model); + ConvertSwitchOperator(node, tf_import_flags, model); } else if (node.op() == "Placeholder") { - ConvertPlaceholderOperator(node, model); + ConvertPlaceholderOperator(node, tf_import_flags, model); } else if (node.op() == "PlaceholderWithDefault") { - ConvertIdentityOperator(node, model); + ConvertIdentityOperator(node, tf_import_flags, model); } else if (node.op() == "LegacyFedInput") { - ConvertPlaceholderOperator(node, model); + ConvertPlaceholderOperator(node, tf_import_flags, model); } else if (node.op() == "NoOp") { - ConvertNoOpOperator(node, model); + ConvertNoOpOperator(node, tf_import_flags, model); } else if (node.op() == "Cast") { - ConvertCastOperator(node, model); + ConvertCastOperator(node, tf_import_flags, model); } else if (node.op() == "Floor") { - ConvertFloorOperator(node, model); + ConvertFloorOperator(node, tf_import_flags, model); } else if (node.op() == "Gather") { - ConvertGatherOperator(node, model); + ConvertGatherOperator(node, tf_import_flags, model); } else if (node.op() == "ResizeBilinear") { - ConvertResizeBilinearOperator(node, model); + ConvertResizeBilinearOperator(node, tf_import_flags, model); } else if (node.op() == "BatchNormWithGlobalNormalization") { - ConvertBatchNormWithGlobalNormalizationOperator(node, model); + ConvertBatchNormWithGlobalNormalizationOperator(node, tf_import_flags, + model); } else if (node.op() == "FusedBatchNorm") { - ConvertFusedBatchNormOperator(node, model); + ConvertFusedBatchNormOperator(node, tf_import_flags, model); } else if (node.op() == "SpaceToBatchND") { - ConvertSpaceToBatchNDOperator(node, model); + ConvertSpaceToBatchNDOperator(node, tf_import_flags, model); } else if (node.op() == "BatchToSpaceND") { - ConvertBatchToSpaceNDOperator(node, model); + ConvertBatchToSpaceNDOperator(node, tf_import_flags, model); } else if (node.op() == "Mean") { - ConvertMeanOperator(node, model); + ConvertMeanOperator(node, tf_import_flags, model); } else if (node.op() == "Svdf") { - ConvertSvdfOperator(node, model); + ConvertSvdfOperator(node, tf_import_flags, model); } else { - ConvertUnsupportedOperator(node, model); + ConvertUnsupportedOperator(node, tf_import_flags, model); } } @@ -1535,7 +1650,8 @@ std::unique_ptr ImportTensorFlowGraphDef(const ModelFlags& model_flags, } std::unique_ptr ImportTensorFlowGraphDef( - const ModelFlags& model_flags, const string& input_file_contents) { + const ModelFlags& model_flags, const TensorFlowImportFlags& tf_import_flags, + const string& input_file_contents) { std::unique_ptr tf_graph(new GraphDef); CHECK(ParseFromStringEitherTextOrBinary(input_file_contents, tf_graph.get())); @@ -1544,6 +1660,6 @@ std::unique_ptr ImportTensorFlowGraphDef( if (pruned_graph) { tf_graph = std::move(pruned_graph); } - return ImportTensorFlowGraphDef(model_flags, *tf_graph); + return ImportTensorFlowGraphDef(model_flags, tf_import_flags, *tf_graph); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.h b/tensorflow/contrib/lite/toco/import_tensorflow.h index d2eb423ca4..312e3b8f17 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.h +++ b/tensorflow/contrib/lite/toco/import_tensorflow.h @@ -23,11 +23,19 @@ limitations under the License. namespace toco { +struct TensorFlowImportFlags { + // If true, control dependencies will be dropped immediately + // during the import of the TensorFlow GraphDef. + bool drop_control_dependency = false; +}; + std::unique_ptr ImportTensorFlowGraphDef( - const ModelFlags& model_flags, const tensorflow::GraphDef& graph_def); + const ModelFlags& model_flags, const TensorFlowImportFlags& tf_import_flags, + const tensorflow::GraphDef& graph_def); std::unique_ptr ImportTensorFlowGraphDef( - const ModelFlags& model_flags, const string& input_file_contents); + const ModelFlags& model_flags, const TensorFlowImportFlags& tf_import_flags, + const string& input_file_contents); } // namespace toco diff --git a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc index 287a5d563d..dde602e186 100644 --- a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc +++ b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc @@ -112,13 +112,6 @@ bool ParseModelFlagsFromCommandLineFlags( "exclusive " "with the 'batch' field: at most one of these two fields can be " "set."), - Flag( - "drop_control_dependency", - parsed_flags.drop_control_dependency.bind(), - parsed_flags.drop_control_dependency.default_value(), - "If true, ignore control dependency requirements in input TensorFlow " - "GraphDef. Otherwise an error will be raised upon control dependency " - "inputs."), Flag("rnn_states", parsed_flags.rnn_states.bind(), parsed_flags.rnn_states.default_value(), ""), Flag("model_checks", parsed_flags.model_checks.bind(), @@ -316,7 +309,6 @@ void ReadModelFlagsFromCommandLineFlags( } while (false) READ_MODEL_FLAG(variable_batch); - READ_MODEL_FLAG(drop_control_dependency); #undef READ_MODEL_FLAG diff --git a/tensorflow/contrib/lite/toco/model_flags.proto b/tensorflow/contrib/lite/toco/model_flags.proto index bd6e374e8c..5b30904696 100644 --- a/tensorflow/contrib/lite/toco/model_flags.proto +++ b/tensorflow/contrib/lite/toco/model_flags.proto @@ -138,8 +138,4 @@ message ModelFlags { optional int32 count_max = 3 [default = -1]; } repeated ModelCheck model_checks = 14; - - // If true, ignore control dependency requirements in input TensorFlow - // GraphDef. Otherwise an error will be raised upon control dependency inputs. - optional bool drop_control_dependency = 15; } diff --git a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc index e97f59eb3f..83947d6b28 100644 --- a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc +++ b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc @@ -103,6 +103,13 @@ bool ParseTocoFlagsFromCommandLineFlags( parsed_flags.allow_custom_ops.default_value(), "If true, allow TOCO to create TF Lite Custom operators for all the" "unsupported Tensorflow ops."), + Flag( + "drop_control_dependency", + parsed_flags.drop_control_dependency.bind(), + parsed_flags.drop_control_dependency.default_value(), + "If true, ignore control dependency requirements in input TensorFlow " + "GraphDef. Otherwise an error will be raised upon control dependency " + "inputs."), }; bool asked_for_help = *argc == 2 && (!strcmp(argv[1], "--help") || !strcmp(argv[1], "-help")); @@ -163,6 +170,7 @@ void ReadTocoFlagsFromCommandLineFlags(const ParsedTocoFlags& parsed_toco_flags, READ_TOCO_FLAG(drop_fake_quant, FlagRequirement::kNone); READ_TOCO_FLAG(reorder_across_fake_quant, FlagRequirement::kNone); READ_TOCO_FLAG(allow_custom_ops, FlagRequirement::kNone); + READ_TOCO_FLAG(drop_control_dependency, FlagRequirement::kNone); // Deprecated flag handling. if (parsed_toco_flags.input_type.specified()) { diff --git a/tensorflow/contrib/lite/toco/toco_flags.proto b/tensorflow/contrib/lite/toco/toco_flags.proto index 7bddce5b03..3b9d7e2257 100644 --- a/tensorflow/contrib/lite/toco/toco_flags.proto +++ b/tensorflow/contrib/lite/toco/toco_flags.proto @@ -36,7 +36,7 @@ enum FileFormat { // are not normally encoded in model files and in general may not be thought // of as properties of models, instead describing how models are to be // processed in the context of the present tooling job. -// Next Id: 12 +// Next Id: 13 message TocoFlags { // Input file format optional FileFormat input_format = 1; @@ -128,4 +128,12 @@ message TocoFlags { // If true, allow TOCO to create TF Lite Custom operators for all the // unsupported Tensorflow ops. optional bool allow_custom_ops = 10; + + // Applies only to the case when the input format is TENSORFLOW_GRAPHDEF. + // If true, then control dependencies will be immediately dropped during + // import. + // If not set, the default behavior is as follows: + // - Default to false if the output format is TENSORFLOW_GRAPHDEF. + // - Default to true in all other cases. + optional bool drop_control_dependency = 12; } diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc index 1408f7cd7b..eabc145ad4 100644 --- a/tensorflow/contrib/lite/toco/toco_tooling.cc +++ b/tensorflow/contrib/lite/toco/toco_tooling.cc @@ -127,9 +127,16 @@ std::unique_ptr Import(const TocoFlags& toco_flags, const string& input_file_contents) { std::unique_ptr model; switch (toco_flags.input_format()) { - case TENSORFLOW_GRAPHDEF: - model = ImportTensorFlowGraphDef(model_flags, input_file_contents); + case TENSORFLOW_GRAPHDEF: { + TensorFlowImportFlags tf_import_flags; + tf_import_flags.drop_control_dependency = + toco_flags.has_drop_control_dependency() + ? toco_flags.drop_control_dependency() + : (toco_flags.output_format() != TENSORFLOW_GRAPHDEF); + model = ImportTensorFlowGraphDef(model_flags, tf_import_flags, + input_file_contents); break; + } case TFLITE: model = toco::tflite::Import(model_flags, input_file_contents); ResolveModelFlags(model_flags, model.get()); diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index ec24f76dc8..3ee060f9b9 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -1016,7 +1016,6 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) { } RESOLVE_MODEL_FLAG(variable_batch) - RESOLVE_MODEL_FLAG(drop_control_dependency) #undef RESOLVE_MODEL_FLAG -- GitLab From cf245240ca90e6b552415f720342ae1acd326590 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 22 Nov 2017 11:26:09 -0800 Subject: [PATCH 0761/1801] [XLA:CPU] Add a basic implementation for ExecuteAsyncOnStream PiperOrigin-RevId: 176680801 --- tensorflow/compiler/xla/client/client.h | 9 ++ .../xla/service/cpu/cpu_executable.cc | 96 ++++++++++++++----- tensorflow/compiler/xla/tests/BUILD | 1 + tensorflow/compiler/xla/tests/client_test.cc | 67 ++++++++++--- 4 files changed, 135 insertions(+), 38 deletions(-) diff --git a/tensorflow/compiler/xla/client/client.h b/tensorflow/compiler/xla/client/client.h index a716159f9e..cf6878dd8e 100644 --- a/tensorflow/compiler/xla/client/client.h +++ b/tensorflow/compiler/xla/client/client.h @@ -67,6 +67,15 @@ class Client { std::vector arguments; ExecutionOptions execution_options; ExecutionProfile* execution_profile; + + ComputationInstance(const Computation& computation, + std::vector arguments, + ExecutionOptions execution_options, + ExecutionProfile* execution_profile) + : computation(computation), + arguments(std::move(arguments)), + execution_options(execution_options), + execution_profile(execution_profile) {} }; // Executes a list ComputationInstances and returns global data produced from diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc index ddbe7ab341..e6ef9d6314 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc @@ -43,6 +43,7 @@ limitations under the License. #include "tensorflow/core/platform/mem.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/stream_executor/host/host_stream.h" namespace se = ::perftools::gputools; @@ -241,6 +242,37 @@ Status CpuExecutable::ExecuteComputeFunction( return Status::OK(); } +static void LogLiveAddresses( + const std::unordered_set& marked_addresses) { + VLOG(3) << "Live addresses in output marking found " + << marked_addresses.size() << " addresses:\n" + << tensorflow::str_util::Join( + marked_addresses, ", ", [](string* out, const void* address) { + tensorflow::strings::StrAppend( + out, tensorflow::strings::Printf("%p", address)); + }); +} + +static Status DeallocateTempBuffers( + DeviceMemoryAllocator* allocator, se::Stream* stream, + tensorflow::gtl::ArraySlice buffers, + const std::unordered_set& marked_addresses) { + // Keep those marked live because they are referenced by the output of the + // computation and are needed by the service. They will be deallocated by the + // service. + for (size_t i = 0; i < buffers.size(); ++i) { + se::DeviceMemoryBase alloc = buffers[i]; + if (marked_addresses.count(alloc.opaque()) == 0 && !alloc.is_null()) { + VLOG(3) << "CpuExecutable deallocating buffer #" << i << " [" + << alloc.opaque() << "]"; + TF_RETURN_IF_ERROR( + allocator->Deallocate(stream->parent()->device_ordinal(), &alloc)); + } + } + + return Status::OK(); +} + StatusOr CpuExecutable::ExecuteOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, @@ -263,26 +295,9 @@ StatusOr CpuExecutable::ExecuteOnStream( MarkLiveAddressesInOutput(top_level_output.opaque(), result_shape(), &marked_addresses); - VLOG(3) << "Live addresses in output marking found " - << marked_addresses.size() << " addresses:\n" - << tensorflow::str_util::Join( - marked_addresses, ", ", [](string* out, const void* address) { - tensorflow::strings::StrAppend( - out, tensorflow::strings::Printf("%p", address)); - }); - - // Computation is done - deallocate temp buffers. Keep those marked live - // because they are referenced by the output of the computation and are needed - // by the service. They will be deallocated by the service. - for (size_t i = 0; i < buffers.size(); ++i) { - se::DeviceMemoryBase alloc = buffers[i]; - if (marked_addresses.count(alloc.opaque()) == 0 && !alloc.is_null()) { - VLOG(3) << "CpuExecutable deallocating buffer #" << i << " [" - << alloc.opaque() << "]"; - TF_RETURN_IF_ERROR(memory_allocator->Deallocate( - stream->parent()->device_ordinal(), &alloc)); - } - } + LogLiveAddresses(marked_addresses); + TF_RETURN_IF_ERROR(DeallocateTempBuffers(memory_allocator, stream, buffers, + marked_addresses)); return top_level_output; } @@ -360,9 +375,44 @@ StatusOr CpuExecutable::ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments) { - // TODO(b/30671675): Implement asynchronous execution mode. - return Unimplemented( - "Asynchronous execution on stream is not yet supported on CPU."); + if (hlo_profiling_enabled()) { + return Unimplemented( + "Asynchronous execution on stream with hlo profiling is not yet " + "supported on CPU."); + } + + auto* host_stream = dynamic_cast( + run_options->stream()->implementation()); + se::Stream* stream = run_options->stream(); + DeviceMemoryAllocator* memory_allocator = run_options->allocator(); + std::vector buffers(assignment_->Allocations().size()); + + TF_RETURN_IF_ERROR(AllocateBuffers( + memory_allocator, stream->parent()->device_ordinal(), &buffers)); + + // Mark the buffers that are actually live (used in the output) when the + // computation finishes executing. + std::unordered_set marked_addresses; + TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice result_slice, + assignment_->GetUniqueTopLevelOutputSlice()); + se::DeviceMemoryBase top_level_output = buffers[result_slice.index()]; + MarkLiveAddressesInOutput(top_level_output.opaque(), result_shape(), + &marked_addresses); + + LogLiveAddresses(marked_addresses); + + host_stream->EnqueueTask([this, run_options, arguments, buffers, + marked_addresses, memory_allocator, stream]() { + // Failing a CHECK here is not great, but I don't see an obvious way to + // return a failed Status asynchronously. + TF_CHECK_OK(ExecuteComputeFunction(&run_options->run_options(), arguments, + buffers, + /*hlo_execution_profile=*/nullptr)); + TF_CHECK_OK(DeallocateTempBuffers(memory_allocator, stream, buffers, + marked_addresses)); + }); + + return top_level_output; } /*static*/ int64 CpuExecutable::ShapeSizeBytes(const Shape& shape) { diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 6811dbb39f..13d651ea6f 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1343,6 +1343,7 @@ xla_test( srcs = ["client_test.cc"], deps = [ "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:test_helpers", "//tensorflow/compiler/xla:xla_data_proto", diff --git a/tensorflow/compiler/xla/tests/client_test.cc b/tensorflow/compiler/xla/tests/client_test.cc index 183bcf1dd3..8853ed9e57 100644 --- a/tensorflow/compiler/xla/tests/client_test.cc +++ b/tensorflow/compiler/xla/tests/client_test.cc @@ -20,10 +20,12 @@ limitations under the License. #include "tensorflow/compiler/xla/client/global_data.h" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/client_library_test_base.h" #include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" #include "tensorflow/compiler/xla/tests/test_utils.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/platform/test.h" @@ -42,26 +44,26 @@ TEST_F(ClientTest, ExecuteWithLayout) { for (const std::vector& transfer_layout : layouts) { b.Add(b.ConstantR2({{1, 2}, {3, 4}}), b.ConstantR2({{10, 20}, {30, 40}})); - auto computation = b.Build(); - ASSERT_TRUE(computation.ok()) << computation.status(); + TF_ASSERT_OK_AND_ASSIGN(auto computation, b.Build()); ExecutionOptions execution_options = execution_options_; *execution_options.mutable_shape_with_output_layout() = ShapeUtil::MakeShapeWithLayout(S32, /*dimensions=*/{2, 2}, execute_layout); - std::unique_ptr data = - client_->Execute(computation.ValueOrDie(), {}, &execution_options) - .ConsumeValueOrDie(); + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr data, + client_->Execute(computation, {}, &execution_options)); std::unique_ptr expected_literal = Literal::CreateR2WithLayout( {{11, 22}, {33, 44}}, LayoutUtil::MakeLayout(transfer_layout)); - auto computed = client_->Transfer(*data, &expected_literal->shape()); + TF_ASSERT_OK_AND_ASSIGN( + auto computed, client_->Transfer(*data, &expected_literal->shape())); - LiteralTestUtil::AssertEqualShapesAndLayouts( - expected_literal->shape(), computed.ValueOrDie()->shape()); - LiteralTestUtil::ExpectEqual(*expected_literal, *computed.ValueOrDie()); + LiteralTestUtil::AssertEqualShapesAndLayouts(expected_literal->shape(), + computed->shape()); + LiteralTestUtil::ExpectEqual(*expected_literal, *computed); } } } @@ -72,8 +74,7 @@ TEST_F(ClientTest, ExecuteWithTupleLayout) { b.Tuple({b.ConstantR2({{1, 2}, {3, 4}}), b.ConstantR2({{10, 20}, {30, 40}})}); - auto computation = b.Build(); - ASSERT_TRUE(computation.ok()) << computation.status(); + TF_ASSERT_OK_AND_ASSIGN(auto computation, b.Build()); ExecutionOptions execution_options = execution_options_; // Create a result shape with one element column major and the other row @@ -85,10 +86,9 @@ TEST_F(ClientTest, ExecuteWithTupleLayout) { ShapeUtil::MakeShapeWithLayout(S32, /*dimensions=*/{2, 2}, /*minor_to_major=*/{1, 0})}); - auto result = - client_ - ->ExecuteAndTransfer(computation.ValueOrDie(), {}, &execution_options) - .ConsumeValueOrDie(); + TF_ASSERT_OK_AND_ASSIGN( + auto result, + client_->ExecuteAndTransfer(computation, {}, &execution_options)); LiteralTestUtil::ExpectR2Equal({{1, 2}, {3, 4}}, result->tuple_literals(0)); LiteralTestUtil::ExpectR2Equal({{10, 20}, {30, 40}}, @@ -107,5 +107,42 @@ TEST_F(ClientTest, ExecuteWithTupleLayout) { /*minor_to_major=*/{1, 0}))); } +TEST_F(ClientTest, DISABLED_ON_CPU_PARALLEL(DISABLED_ON_GPU(ExecuteParallel))) { + Computation add_with_one_arg, mul_with_two_args, dot_with_one_arg; + Shape shape = ShapeUtil::MakeShape(S32, {2, 2}); + + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr const_arg, + client_->TransferToServer(*Literal::CreateR2({{5, 6}, {7, 8}}))); + + ComputationBuilder b(client_, TestName() + ".add"); + b.Add(b.Parameter(0, shape, "param_0"), + b.ConstantR2({{1, 2}, {3, 4}})); + TF_ASSERT_OK_AND_ASSIGN(add_with_one_arg, b.Build()); + + // We can't really test parallel execution on CPU since all of the cores in a + // CPU are presented as a single device. So for now we test "parallel" + // execution on a single device. + std::vector computation_instances; + TF_ASSERT_OK_AND_ASSIGN(std::vector devices, + client_->GetDeviceHandles(1)); + ASSERT_EQ(devices.size(), 1); + + ExecutionOptions options = execution_options_; + *options.add_device_handles() = devices[0]; + computation_instances.push_back(Client::ComputationInstance( + add_with_one_arg, {const_arg.get()}, options, nullptr)); + + TF_ASSERT_OK_AND_ASSIGN(auto results, + client_->ExecuteParallel(computation_instances)); + auto expected_result = Literal::CreateR2({{6, 8}, {10, 12}}); + + TF_ASSERT_OK_AND_ASSIGN( + auto result_literal, + client_->Transfer(*results[0], &expected_result->shape())); + + LiteralTestUtil::ExpectEqual(*expected_result, *result_literal); +} + } // namespace } // namespace xla -- GitLab From b0dbca111c80bdd8f1a2c28afc24d597cb1eac89 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 22 Nov 2017 11:36:29 -0800 Subject: [PATCH 0762/1801] Fixes to windows builds. (#14803) * Fixes to windows builds. -Disable failing data_utils_test in cmake and bazel builds. -Disable session_partial_run_test in bazel build. It is already not running under cmake build. -Increase cmake build log verbosity, as we still canot see the root cause of failures. * Use unix style path dividers in cmake files. --- tensorflow/contrib/cmake/tf_tests.cmake | 1 + tensorflow/python/BUILD | 1 + tensorflow/python/keras/BUILD | 1 + tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat | 2 +- tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat | 2 +- 5 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index ba9e307835..18b71d1f9a 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -224,6 +224,7 @@ if (tensorflow_BUILD_PYTHON_TESTS) # Numerical issues, calculations off. "${tensorflow_source_dir}/tensorflow/python/kernel_tests/concat_op_test.py" "${tensorflow_source_dir}/tensorflow/contrib/factorization/python/ops/wals_test.py" + "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py" # Float division by zero "${tensorflow_source_dir}/tensorflow/python/kernel_tests/benchmark_test.py" # Flaky, for unknown reasons. Cannot reproduce in terminal. Revisit once we can get stack traces. diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index a438768809..5eb9b79ee6 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3372,6 +3372,7 @@ py_test( srcs_version = "PY2AND3", tags = [ "no_gpu", + "no_windows", ], deps = [ ":array_ops", diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index e4992afbca..d9391dd6c5 100644 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -556,6 +556,7 @@ py_test( srcs = ["_impl/keras/utils/data_utils_test.py"], srcs_version = "PY2AND3", tags = [ + "no_windows", "noasan", # times out "notsan", ], diff --git a/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat index 6e600e2dcf..56bff07774 100644 --- a/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat +++ b/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat @@ -37,4 +37,4 @@ SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe" %CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% :: Run msbuild in the resulting VS project files to build a pip package. -%MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 /verbosity:minimal tf_python_build_pip_package.vcxproj \ No newline at end of file +%MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj diff --git a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat index 44d8252a7a..832943ad6c 100644 --- a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat +++ b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat @@ -38,4 +38,4 @@ SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe" %CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_ENABLE_GPU=ON -DCUDNN_HOME=%CUDNN_HOME% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% :: Run msbuild in the resulting VS project files to build a pip package. -%MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 /verbosity:minimal tf_python_build_pip_package.vcxproj +%MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj -- GitLab From 791ef8383d165c116f4c5fc3fda12ebc7eb07edf Mon Sep 17 00:00:00 2001 From: Olivia Nordquist Date: Wed, 22 Nov 2017 11:40:46 -0800 Subject: [PATCH 0763/1801] python testing for is_feedable and is_fetchable nodes in the graph PiperOrigin-RevId: 176682768 --- tensorflow/python/framework/ops_test.py | 16 +++++++++++++++- .../kernel_tests/control_flow_ops_py_test.py | 14 ++++++++++---- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index a4780fdc05..7ae7b5cb7f 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -691,7 +691,7 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase): []) ops.get_default_graph()._create_op_from_tf_operation(c_op) else: - # Test pure-Python version to make sure C API has same behavior. + # Test pure-Python version to make sure C API has same behavior. test_ops.int_input(x, name="myop") return x @@ -1741,6 +1741,20 @@ class GraphTest(test_util.TensorFlowTestCase): self._AssertDefault(g0) self._AssertDefault(orig) + def testPreventFeeding(self): + g = ops.Graph() + a = constant_op.constant(2.0) + self.assertTrue(g.is_feedable(a)) + g.prevent_feeding(a) + self.assertFalse(g.is_feedable(a)) + + def testPreventFetching(self): + g = ops.Graph() + a = constant_op.constant(2.0) + self.assertTrue(g.is_fetchable(a)) + g.prevent_fetching(a.op) + self.assertFalse(g.is_fetchable(a)) + def testAsGraphElementConversions(self): class ConvertibleObj(object): diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py index fc125daf38..1b7f9b110c 100644 --- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py +++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py @@ -352,14 +352,20 @@ class ControlFlowTest(test.TestCase): grad = gradients_impl.gradients(y, [v]) self.assertAllEqual([None], grad) - def testFetchables(self): + def testFetchable(self): with self.test_session() as sess: x = array_ops.placeholder(dtypes.float32) control_flow_ops.cond( constant_op.constant(True), lambda: x + 2, lambda: x + 0) - tensor_names = all_fetchables() - for name in tensor_names: - sess.run(name, feed_dict={x: 3}) + graph = ops.get_default_graph() + for op in graph.get_operations(): + for t in op.inputs: + if graph.is_fetchable(t.op): + sess.run(t, feed_dict={x: 3}) + else: + with self.assertRaisesRegexp(ValueError, + "has been marked as not fetchable"): + sess.run(t, feed_dict={x: 3}) def testFeedable(self): with self.test_session() as sess: -- GitLab From e2c652ea018b9d2a4cc8453ea92b10d208ba4265 Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Wed, 22 Nov 2017 11:59:46 -0800 Subject: [PATCH 0764/1801] Make PaddedBatchDataset saveable. PiperOrigin-RevId: 176685098 --- .../kernel_tests/batch_dataset_op_test.py | 36 +++++++ .../core/kernels/padded_batch_dataset_op.cc | 99 ++++++++++++++++--- 2 files changed, 122 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py index d7437cba73..b0064f8ae7 100644 --- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py @@ -723,5 +723,41 @@ class BatchDatasetSerializationTest( num_outputs) +class PaddedBatchDatasetSerializationTest( + dataset_serialization_test_base.DatasetSerializationTestBase): + + def testPaddedBatch(self): + + def build_dataset(seq_lens): + return dataset_ops.Dataset.from_tensor_slices(seq_lens).map( + lambda x: array_ops.fill([x], x)).padded_batch( + 4, padded_shapes=[-1]) + + seq_lens1 = np.random.randint(1, 20, size=(32,)).astype(np.int32) + seq_lens2 = np.random.randint(21, 40, size=(32,)).astype(np.int32) + self.run_core_tests(lambda: build_dataset(seq_lens1), + lambda: build_dataset(seq_lens2), 8) + + def testPaddedBatchNonDefaultPadding(self): + + def build_dataset(seq_lens): + + def fill_tuple(x): + filled = array_ops.fill([x], x) + return (filled, string_ops.as_string(filled)) + + padded_shape = [-1] + return dataset_ops.Dataset.from_tensor_slices(seq_lens).map( + fill_tuple).padded_batch( + 4, + padded_shapes=(padded_shape, padded_shape), + padding_values=(-1, "")) + + seq_lens1 = np.random.randint(1, 20, size=(32,)).astype(np.int32) + seq_lens2 = np.random.randint(21, 40, size=(32,)).astype(np.int32) + self.run_core_tests(lambda: build_dataset(seq_lens1), + lambda: build_dataset(seq_lens2), 8) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/core/kernels/padded_batch_dataset_op.cc b/tensorflow/core/kernels/padded_batch_dataset_op.cc index cfc77690b5..7c28d955e1 100644 --- a/tensorflow/core/kernels/padded_batch_dataset_op.cc +++ b/tensorflow/core/kernels/padded_batch_dataset_op.cc @@ -181,16 +181,18 @@ class PaddedBatchDatasetOp : public UnaryDatasetOpKernel { padding_values.push_back(tensor::DeepCopy(padding_value_t)); } - *output = new Dataset(batch_size, std::move(padded_shapes), + *output = new Dataset(ctx, batch_size, std::move(padded_shapes), std::move(padding_values), input); } private: - class Dataset : public DatasetBase { + class Dataset : public GraphDatasetBase { public: - Dataset(int64 batch_size, std::vector padded_shapes, + Dataset(OpKernelContext* ctx, int64 batch_size, + std::vector padded_shapes, std::vector padding_values, const DatasetBase* input) - : batch_size_(batch_size), + : GraphDatasetBase(ctx), + batch_size_(batch_size), padded_shapes_(std::move(padded_shapes)), padding_values_(std::move(padding_values)), input_(input) { @@ -232,6 +234,47 @@ class PaddedBatchDatasetOp : public UnaryDatasetOpKernel { ")::Dataset"); } + protected: + Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, + Node** output) const override { + Node* input_graph_node = nullptr; + TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node)); + Node* batch_size = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(batch_size_, &batch_size)); + + std::vector padded_shapes; + padded_shapes.reserve(padded_shapes_.size()); + for (int i = 0; i < padded_shapes_.size(); i++) { + Node* node; + Tensor t(DT_INT64, TensorShape({padded_shapes_[i].dims()})); + for (int j = 0; j < padded_shapes_[i].dims(); j++) { + t.vec()(j) = padded_shapes_[i].dim_size(j); + } + TF_RETURN_IF_ERROR(b->AddTensor(t, &node)); + padded_shapes.emplace_back(node); + } + + std::vector padding_values; + padding_values.reserve(padding_values_.size()); + for (const Tensor& t : padding_values_) { + Node* node; + TF_RETURN_IF_ERROR(b->AddTensor(t, &node)); + padding_values.emplace_back(node); + } + + AttrValue output_types; + b->BuildAttrValue(output_dtypes(), &output_types); + + AttrValue N; + b->BuildAttrValue(padded_shapes_.size(), &N); + + TF_RETURN_IF_ERROR( + b->AddDataset(this, {{0, input_graph_node}, {1, batch_size}}, + {{2, padded_shapes}, {3, padding_values}}, + {{"Toutput_types", output_types}, {"N", N}}, output)); + return Status::OK(); + } + private: // Copies element into the index^th slice of parent (in the 0th dimension). // @@ -248,17 +291,25 @@ class PaddedBatchDatasetOp : public UnaryDatasetOpKernel { // Each row of `batch_elements` is a tuple of tensors from the // input iterator. std::vector> batch_elements; - batch_elements.reserve(dataset()->batch_size_); { mutex_lock l(mu_); - *end_of_sequence = false; - for (int i = 0; i < dataset()->batch_size_ && !*end_of_sequence; - ++i) { - std::vector batch_element_tuple; - TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &batch_element_tuple, - end_of_sequence)); - if (!*end_of_sequence) { - batch_elements.push_back(std::move(batch_element_tuple)); + if (!input_impl_) { + *end_of_sequence = true; + return Status::OK(); + } else { + *end_of_sequence = false; + batch_elements.reserve(dataset()->batch_size_); + for (int i = 0; i < dataset()->batch_size_ && !*end_of_sequence; + ++i) { + std::vector batch_element_tuple; + TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &batch_element_tuple, + end_of_sequence)); + if (!*end_of_sequence) { + batch_elements.push_back(std::move(batch_element_tuple)); + } + } + if (*end_of_sequence) { + input_impl_.reset(); } } } @@ -347,6 +398,28 @@ class PaddedBatchDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + if (input_impl_) + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + else + TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("exhausted"), "")); + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + if (reader->Contains(full_name("exhausted"))) { + input_impl_.reset(); + } else { + input_impl_ = dataset()->input_->MakeIterator(prefix()); + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + } + return Status::OK(); + } + private: mutex mu_; std::unique_ptr input_impl_ GUARDED_BY(mu_); -- GitLab From 8200bee9e1433a311d7b9e820c09110a88eb6c58 Mon Sep 17 00:00:00 2001 From: Shivani Agrawal Date: Wed, 22 Nov 2017 12:03:10 -0800 Subject: [PATCH 0765/1801] [tf.data] Saveable iteartor for InterleaveDataset. PiperOrigin-RevId: 176685601 --- .../contrib/data/python/kernel_tests/BUILD | 1 + .../interleave_dataset_op_test.py | 42 +++++ .../core/kernels/interleave_dataset_op.cc | 151 ++++++++++++++++-- 3 files changed, 183 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 1923c0586a..0697fbdec1 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -185,6 +185,7 @@ py_test( "manual", # b/67958761 ], deps = [ + ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/contrib/data/python/ops:transformation_ops", "//tensorflow/python:array_ops", diff --git a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py index 0299e3a1b7..c6e8ed5bdc 100644 --- a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py @@ -22,8 +22,10 @@ import math import threading import time +import numpy as np from six.moves import zip_longest +from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base from tensorflow.contrib.data.python.ops import dataset_ops from tensorflow.contrib.data.python.ops import interleave_ops from tensorflow.python.framework import dtypes @@ -209,6 +211,46 @@ class InterleaveDatasetTest(test.TestCase): sess.run(get_next) +class InterleaveDatasetSeriazationTest( + dataset_serialization_test_base.DatasetSerializationTestBase): + + def _build_iterator_graph(self, input_values, cycle_length, block_length): + repeat_count = 2 + return dataset_ops.Dataset.from_tensor_slices(input_values).repeat( + repeat_count).interleave( + lambda x: dataset_ops.Dataset.from_tensors(x).repeat(x), + cycle_length, block_length) + + def testSerializationCore(self): + input_values = np.array([4, 5, 6], dtype=np.int64) + num_outputs = np.sum(input_values) * 2 + # cycle_length > 1, block_length > 1 + cycle_length = 2 + block_length = 3 + # pylint: disable=g-long-lambda + self.run_core_tests( + lambda: self._build_iterator_graph( + input_values, cycle_length, block_length), + lambda: self._build_iterator_graph( + input_values, cycle_length * 2, block_length * 1), + num_outputs) + # cycle_length = 1 + cycle_length = 1 + block_length = 3 + self.run_core_tests( + lambda: self._build_iterator_graph( + input_values, cycle_length, block_length), + None, num_outputs) + # block_length = 1 + cycle_length = 2 + block_length = 1 + self.run_core_tests( + lambda: self._build_iterator_graph( + input_values, cycle_length, block_length), + None, num_outputs) + # pylint: enable=g-long-lambda + + class ParallelInterleaveDatasetTest(test.TestCase): def setUp(self): diff --git a/tensorflow/core/kernels/interleave_dataset_op.cc b/tensorflow/core/kernels/interleave_dataset_op.cc index c01d1c7cbb..cbee68b2db 100644 --- a/tensorflow/core/kernels/interleave_dataset_op.cc +++ b/tensorflow/core/kernels/interleave_dataset_op.cc @@ -73,18 +73,22 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel { std::move(other_arguments), &captured_func)); - *output = new Dataset(input, std::move(captured_func), cycle_length, - block_length, output_types_, output_shapes_); + *output = + new Dataset(ctx, input, func_, std::move(captured_func), cycle_length, + block_length, output_types_, output_shapes_); } private: - class Dataset : public DatasetBase { + class Dataset : public GraphDatasetBase { public: - Dataset(const DatasetBase* input, + Dataset(OpKernelContext* ctx, const DatasetBase* input, + const NameAttrList& func, std::unique_ptr captured_func, int64 cycle_length, int64 block_length, const DataTypeVector& output_types, const std::vector& output_shapes) - : input_(input), + : GraphDatasetBase(ctx), + input_(input), + func_(func), captured_func_(std::move(captured_func)), cycle_length_(cycle_length), block_length_(block_length), @@ -110,13 +114,47 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel { string DebugString() override { return "InterleaveDatasetOp::Dataset"; } + protected: + Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, + Node** output) const override { + TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name())); + Node* input_node; + TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_node)); + Node* cycle_length_node; + TF_RETURN_IF_ERROR(b->AddScalar(cycle_length_, &cycle_length_node)); + Node* block_length_node; + TF_RETURN_IF_ERROR(b->AddScalar(block_length_, &block_length_node)); + DataTypeVector other_arguments_types; + other_arguments_types.reserve(captured_func_->captured_inputs().size()); + std::vector other_arguments; + other_arguments.reserve(captured_func_->captured_inputs().size()); + for (const Tensor& t : captured_func_->captured_inputs()) { + Node* node; + TF_RETURN_IF_ERROR(b->AddTensor(t, &node)); + other_arguments.emplace_back(node); + other_arguments_types.emplace_back(t.dtype()); + } + AttrValue f; + b->BuildAttrValue(func_, &f); + AttrValue other_arguments_types_attr; + b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr); + + TF_RETURN_IF_ERROR(b->AddDataset( + this, + {{0, input_node}, {2, cycle_length_node}, {3, block_length_node}}, + {{1, other_arguments}}, + {{"f", f}, {"Targuments", other_arguments_types_attr}}, output)); + return Status::OK(); + } + private: class Iterator : public DatasetIterator { public: explicit Iterator(const Params& params) : DatasetIterator(params), input_impl_(params.dataset->input_->MakeIterator(params.prefix)), - current_elements_(params.dataset->cycle_length_) {} + current_elements_(params.dataset->cycle_length_), + args_list_(params.dataset->cycle_length_) {} void AdvanceToNextInCycle() EXCLUSIVE_LOCKS_REQUIRED(mu_) { block_index_ = 0; @@ -150,18 +188,19 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel { // We have reached the end of the current element, so move // on to the next element in the cycle. current_elements_[cycle_index_].reset(); + args_list_[cycle_index_].clear(); --num_open_; AdvanceToNextInCycle(); } else if (!end_of_input_) { // Get the next element from the input dataset, and create // an iterator from it. - std::vector args; - TF_RETURN_IF_ERROR( - input_impl_->GetNext(ctx, &args, &end_of_input_)); + TF_RETURN_IF_ERROR(input_impl_->GetNext( + ctx, &args_list_[cycle_index_], &end_of_input_)); if (!end_of_input_) { TF_RETURN_IF_ERROR(dataset::MakeIteratorFromInputElement( - ctx, args, cycle_index_, dataset()->captured_func_.get(), - prefix(), ¤t_elements_[cycle_index_])); + ctx, args_list_[cycle_index_], cycle_index_, + dataset()->captured_func_.get(), prefix(), + ¤t_elements_[cycle_index_])); ++num_open_; } } else { @@ -173,11 +212,100 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("cycle_index"), cycle_index_)); + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("block_index"), block_index_)); + if (end_of_input_) { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("end_of_input"), "")); + } + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("num_open"), num_open_)); + TF_RETURN_IF_ERROR(SaveCurrentElements(writer)); + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + int64 cycle_index; + TF_RETURN_IF_ERROR( + reader->ReadScalar(full_name("cycle_index"), &cycle_index)); + cycle_index_ = size_t(cycle_index); + TF_RETURN_IF_ERROR( + reader->ReadScalar(full_name("block_index"), &block_index_)); + if (reader->Contains(full_name("end_of_input"))) end_of_input_ = true; + int64 num_open; + TF_RETURN_IF_ERROR( + reader->ReadScalar(full_name("num_open"), &num_open)); + num_open_ = size_t(num_open); + TF_RETURN_IF_ERROR(RestoreCurrentElements(ctx, reader)); + return Status::OK(); + } + private: + Status SaveCurrentElements(IteratorStateWriter* writer) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + for (int idx = 0; idx < current_elements_.size(); idx++) { + if (current_elements_[idx]) { + TF_RETURN_IF_ERROR(SaveParent(writer, current_elements_[idx])); + TF_RETURN_IF_ERROR(writer->WriteScalar( + full_name(strings::StrCat("args_size[", idx, "]")), + args_list_[idx].size())); + for (int i = 0; i < args_list_[idx].size(); i++) { + TF_RETURN_IF_ERROR(writer->WriteTensor( + full_name(strings::StrCat("args_list_[", idx, "][", i, "]")), + args_list_[idx][i])); + } + } + } + return Status::OK(); + } + + Status RestoreCurrentElements(OpKernelContext* ctx, + IteratorStateReader* reader) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + IteratorContext::Params params; + params.env = ctx->env(); + params.runner = *(ctx->runner()); + IteratorContext iter_ctx(std::move(params)); + for (int idx = 0; idx < current_elements_.size(); idx++) { + if (reader->Contains( + full_name(strings::StrCat("args_size[", idx, "]")))) { + int64 args_size; + TF_RETURN_IF_ERROR(reader->ReadScalar( + full_name(strings::StrCat("args_size[", idx, "]")), + &args_size)); + args_list_[idx].resize(args_size); + for (int i = 0; i < args_size; i++) { + TF_RETURN_IF_ERROR(reader->ReadTensor( + full_name(strings::StrCat("args_list_[", idx, "][", i, "]")), + &args_list_[idx][i])); + } + TF_RETURN_IF_ERROR(dataset::MakeIteratorFromInputElement( + &iter_ctx, args_list_[idx], idx, + dataset()->captured_func_.get(), prefix(), + ¤t_elements_[idx])); + TF_RETURN_IF_ERROR( + RestoreParent(ctx, reader, current_elements_[idx])); + } else { + current_elements_[idx].reset(); + } + } + return Status::OK(); + } + mutex mu_; const std::unique_ptr input_impl_ GUARDED_BY(mu_); std::vector> current_elements_ GUARDED_BY(mu_); + std::vector> args_list_ GUARDED_BY(mu_); size_t cycle_index_ GUARDED_BY(mu_) = 0; int64 block_index_ GUARDED_BY(mu_) = 0; bool end_of_input_ GUARDED_BY(mu_) = false; @@ -185,6 +313,7 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel { }; const DatasetBase* const input_; + const NameAttrList func_; const std::unique_ptr captured_func_; const int64 cycle_length_; const int64 block_length_; -- GitLab From c133aff68cf9020b1eea41b6f0432d309d6b0955 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 22 Nov 2017 12:13:39 -0800 Subject: [PATCH 0766/1801] Count the number of loops instead of approximating it. Also added a few minor cleanups PiperOrigin-RevId: 176686819 --- tensorflow/core/framework/shape_inference.cc | 4 ++-- tensorflow/core/grappler/costs/graph_properties.cc | 13 ++++++++++--- tensorflow/core/grappler/costs/graph_properties.h | 3 ++- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc index f30272e250..ee9192d4a1 100644 --- a/tensorflow/core/framework/shape_inference.cc +++ b/tensorflow/core/framework/shape_inference.cc @@ -921,7 +921,7 @@ Status InferenceContext::Add(DimensionHandle first, DimensionOrConstant second, if (first_value == 0) { *out = MakeDim(second); } else if (second_value == 0) { - *out = MakeDim(first); + *out = first; } else if (first_value == kUnknownDim || second_value == kUnknownDim) { *out = UnknownDim(); } else { @@ -946,7 +946,7 @@ Status InferenceContext::Subtract(DimensionHandle first, const int64 second_value = Value(second); // Special cases. if (second_value == 0) { - *out = MakeDim(first); + *out = first; } else if (first_value == kUnknownDim || second_value == kUnknownDim) { *out = UnknownDim(); } else { diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index bf49d78a1a..abcd83a01e 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -702,12 +702,16 @@ Status GraphProperties::UpdateShapes(SymbolicShapeRefiner* shape_refiner, Status GraphProperties::PropagateShapes( SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes, const std::unordered_map>& - resources) const { + resources, + int num_loops) const { // Limit the number of iterations to prevent infinite loops in the presence of // incorrect shape functions. The algoritm should converge in at most // num_nested_loops^2 * max_rank. We approximate max_rank with the constant 4. // The same applies to resources. - const int64 num_loops = new_shapes->size(); + VLOG(1) << "Propagating (relax=" << relax << ") " << new_shapes->size() + << " new shapes through " << num_loops << " loops and " + << resources.size() << " resources" << std::endl; + const int64 max_loop_length = item_.graph.node_size(); const int64 max_rank = 4; const int64 max_loop_iterations = @@ -818,6 +822,7 @@ Status GraphProperties::InferStatically() { std::unordered_map> resources; std::unordered_set enter_nodes; std::unordered_set merge_nodes; + int num_loops = 0; for (const Node* const node : graph.nodes()) { for (int i = 0; i < node->num_inputs(); ++i) { if (node->input_type(i) == DataType::DT_RESOURCE) { @@ -830,6 +835,8 @@ Status GraphProperties::InferStatically() { enter_nodes.insert(node); } else if (node->IsMerge()) { merge_nodes.insert(node); + } else if (node->IsNextIteration()) { + ++num_loops; } } @@ -853,7 +860,7 @@ Status GraphProperties::InferStatically() { } // Propagate shapes normally. TF_RETURN_IF_ERROR( - PropagateShapes(&refiner, relax, &new_shapes, resources)); + PropagateShapes(&refiner, relax, &new_shapes, resources, num_loops)); } // Track shapes globally across the graph. diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h index ee279b7e0a..5df190ba01 100644 --- a/tensorflow/core/grappler/costs/graph_properties.h +++ b/tensorflow/core/grappler/costs/graph_properties.h @@ -102,7 +102,8 @@ class GraphProperties { Status PropagateShapes( SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes, const std::unordered_map>& - resources) const; + resources, + int num_loops) const; }; } // end namespace grappler -- GitLab From c5c642e051f1a7876d099bfcd9f8a2ecaf7227b8 Mon Sep 17 00:00:00 2001 From: Felix Abecassis Date: Wed, 22 Nov 2017 12:22:45 -0800 Subject: [PATCH 0767/1801] Remove useless statements in Dockerfiles (#14808) 'CMD ["/bin/bash"]' is not useful since it's already provided by the base ubuntu image. 'RUN ["/bin/bash"]' looks like a typo and just creates an extra empty layer. Signed-off-by: Felix Abecassis --- tensorflow/tools/docker/Dockerfile.devel | 1 - tensorflow/tools/docker/Dockerfile.devel-gpu | 2 -- tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 | 2 -- 3 files changed, 5 deletions(-) diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index 1a0145b078..3525c7524f 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -101,4 +101,3 @@ EXPOSE 6006 EXPOSE 8888 WORKDIR /root -CMD ["/bin/bash"] diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu index 21a44ee404..041f45971b 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu @@ -102,5 +102,3 @@ WORKDIR /root EXPOSE 6006 # IPython EXPOSE 8888 - -RUN ["/bin/bash"] diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 index 9bcc3925a8..3bedc8cf34 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 @@ -113,5 +113,3 @@ WORKDIR /root EXPOSE 6006 # IPython EXPOSE 8888 - -RUN ["/bin/bash"] -- GitLab From d9b3ed25816f98e8ad11d3ecb20c1fc0ed0f4166 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Wed, 22 Nov 2017 12:28:47 -0800 Subject: [PATCH 0768/1801] BF16 tests for Batchnorm. This is a good end to end test which touches many parts of the system. PiperOrigin-RevId: 176688376 --- .../xla/service/batchnorm_rewriter.cc | 63 +++++++++----- tensorflow/compiler/xla/tests/BUILD | 5 ++ .../compiler/xla/tests/bfloat16_test.cc | 84 ++++++++++++++++++- tensorflow/compiler/xla/tests/build_defs.bzl | 12 ++- 4 files changed, 139 insertions(+), 25 deletions(-) diff --git a/tensorflow/compiler/xla/service/batchnorm_rewriter.cc b/tensorflow/compiler/xla/service/batchnorm_rewriter.cc index abe881cd1a..c6193b3fbb 100644 --- a/tensorflow/compiler/xla/service/batchnorm_rewriter.cc +++ b/tensorflow/compiler/xla/service/batchnorm_rewriter.cc @@ -85,9 +85,9 @@ class BatchNormRewriterVisitor : public DfsHloVisitorWithDefault { HloOpcode opcode) { HloComputation::Builder b("scalar_computation"); auto scalar_lhs = b.AddInstruction(HloInstruction::CreateParameter( - 0, ShapeUtil::MakeShape(F32, {}), "scalar_lhs")); + 0, ShapeUtil::MakeShape(primitive_type, {}), "scalar_lhs")); auto scalar_rhs = b.AddInstruction(HloInstruction::CreateParameter( - 1, ShapeUtil::MakeShape(F32, {}), "scalar_rhs")); + 1, ShapeUtil::MakeShape(primitive_type, {}), "scalar_rhs")); auto scalar_op = b.AddInstruction( HloInstruction::CreateBinary(ShapeUtil::MakeShape(primitive_type, {}), opcode, scalar_lhs, scalar_rhs)); @@ -152,22 +152,30 @@ Status BatchNormRewriterVisitor::HandleBatchNormTraining( // Expand batch norm training into smaller HLO ops. HloInstruction* operand = batch_norm->mutable_operand(0); const Shape operand_shape = operand->shape(); + PrimitiveType ptype = operand_shape.element_type(); int64 feature_index = batch_norm->feature_index(); const int64 feature_count = operand_shape.dimensions(feature_index); const int64 size_in_elements = ShapeUtil::ElementsIn(operand_shape); - auto elements_per_feature = - computation_->AddInstruction(HloInstruction::CreateConstant( - Literal::CreateR0(size_in_elements / feature_count))); + auto elements_per_feature_literal = + Literal::CreateR0(size_in_elements / feature_count); + TF_ASSIGN_OR_RETURN(elements_per_feature_literal, + elements_per_feature_literal->Convert(ptype)); + auto elements_per_feature = computation_->AddInstruction( + HloInstruction::CreateConstant(std::move(elements_per_feature_literal))); HloInstruction* scale = batch_norm->mutable_operand(1); HloInstruction* offset = batch_norm->mutable_operand(2); const Shape feature_shape = scale->shape(); + auto zero_literal = Literal::CreateR0(0.0f); + TF_ASSIGN_OR_RETURN(zero_literal, zero_literal->Convert(ptype)); auto zero = computation_->AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(0.0f))); + HloInstruction::CreateConstant(std::move(zero_literal))); + auto epsilon_literal = Literal::CreateR0(batch_norm->epsilon()); + TF_ASSIGN_OR_RETURN(epsilon_literal, epsilon_literal->Convert(ptype)); auto epsilon = computation_->AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(batch_norm->epsilon()))); + HloInstruction::CreateConstant(std::move(epsilon_literal))); std::vector dimensions_without_feature; @@ -184,7 +192,7 @@ Status BatchNormRewriterVisitor::HandleBatchNormTraining( HloInstruction::CreateBroadcast(operand_shape, offset, {feature_index})); HloComputation* add_reduce_computation = - GetScalarBinaryComputation(F32, HloOpcode::kAdd); + GetScalarBinaryComputation(ptype, HloOpcode::kAdd); // X^2. auto operand_squared = @@ -243,8 +251,10 @@ Status BatchNormRewriterVisitor::HandleBatchNormTraining( computation_->AddInstruction(HloInstruction::CreateBinary( operand_shape, HloOpcode::kAdd, var_broadcasted, epsilon)); + auto neg_half_literal = Literal::CreateR0(-0.5f); + TF_ASSIGN_OR_RETURN(neg_half_literal, neg_half_literal->Convert(ptype)); auto neg_half = computation_->AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(-0.5f))); + HloInstruction::CreateConstant(std::move(neg_half_literal))); // 1 / Sqrt[Var[X] + epsilon]. auto rsqrt_var_add_epsilon = @@ -286,6 +296,7 @@ Status BatchNormRewriterVisitor::HandleBatchNormInference( HloInstruction* operand = batch_norm->mutable_operand(0); const Shape operand_shape = operand->shape(); int64 feature_index = batch_norm->feature_index(); + PrimitiveType ptype = operand_shape.element_type(); HloInstruction* scale = batch_norm->mutable_operand(1); HloInstruction* offset = batch_norm->mutable_operand(2); @@ -293,8 +304,10 @@ Status BatchNormRewriterVisitor::HandleBatchNormInference( HloInstruction* var = batch_norm->mutable_operand(4); const Shape feature_shape = scale->shape(); + auto epsilon_literal = Literal::CreateR0(batch_norm->epsilon()); + TF_ASSIGN_OR_RETURN(epsilon_literal, epsilon_literal->Convert(ptype)); auto epsilon = computation_->AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(batch_norm->epsilon()))); + HloInstruction::CreateConstant(std::move(epsilon_literal))); std::vector dimensions_without_feature; @@ -321,8 +334,10 @@ Status BatchNormRewriterVisitor::HandleBatchNormInference( computation_->AddInstruction(HloInstruction::CreateBinary( operand_shape, HloOpcode::kAdd, var_broadcasted, epsilon)); + auto neg_half_literal = Literal::CreateR0(-0.5f); + TF_ASSIGN_OR_RETURN(neg_half_literal, neg_half_literal->Convert(ptype)); auto neg_half = computation_->AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(-0.5f))); + HloInstruction::CreateConstant(std::move(neg_half_literal))); // 1 / Sqrt[Var[X] + epsilon]. auto rsqrt_var_add_epsilon = @@ -373,6 +388,7 @@ Status BatchNormRewriterVisitor::HandleBatchNormGrad( HloInstruction* activation = batch_norm->mutable_operand(0); const Shape activation_shape = activation->shape(); + PrimitiveType ptype = activation_shape.element_type(); HloInstruction* scale = batch_norm->mutable_operand(1); const Shape feature_shape = scale->shape(); HloInstruction* mean = batch_norm->mutable_operand(2); @@ -383,18 +399,27 @@ Status BatchNormRewriterVisitor::HandleBatchNormGrad( const int64 size_in_elements = ShapeUtil::ElementsIn(activation_shape); const int64 feature_count = activation_shape.dimensions(feature_index); - auto elements_per_feature = - computation_->AddInstruction(HloInstruction::CreateConstant( - Literal::CreateR0(size_in_elements / feature_count))); - + auto elements_per_feature_literal = + Literal::CreateR0(size_in_elements / feature_count); + TF_ASSIGN_OR_RETURN(elements_per_feature_literal, + elements_per_feature_literal->Convert(ptype)); + auto elements_per_feature = computation_->AddInstruction( + HloInstruction::CreateConstant(std::move(elements_per_feature_literal))); + + auto zero_literal = Literal::CreateR0(0.0f); + TF_ASSIGN_OR_RETURN(zero_literal, zero_literal->Convert(ptype)); auto zero = computation_->AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(0.0f))); + HloInstruction::CreateConstant(std::move(zero_literal))); + auto neg_half_literal = Literal::CreateR0(-0.5f); + TF_ASSIGN_OR_RETURN(neg_half_literal, neg_half_literal->Convert(ptype)); auto neg_half = computation_->AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(-0.5f))); + HloInstruction::CreateConstant(std::move(neg_half_literal))); + auto epsilon_literal = Literal::CreateR0(batch_norm->epsilon()); + TF_ASSIGN_OR_RETURN(epsilon_literal, epsilon_literal->Convert(ptype)); auto epsilon = computation_->AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(batch_norm->epsilon()))); + HloInstruction::CreateConstant(std::move(epsilon_literal))); std::vector dimensions_without_feature; @@ -442,7 +467,7 @@ Status BatchNormRewriterVisitor::HandleBatchNormGrad( grad_output, activation_minus_mean)); HloComputation* add_reduce_computation = - GetScalarBinaryComputation(F32, HloOpcode::kAdd); + GetScalarBinaryComputation(ptype, HloOpcode::kAdd); // sum(Grad[Y] * (X - E[X])). auto sum_grad_output_times_activiation_minus_mean = diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 13d651ea6f..addce9019b 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -773,6 +773,11 @@ xla_test( xla_test( name = "bfloat16_test", srcs = ["bfloat16_test.cc"], + blacklisted_backends = [ + "cpu", + "cpu_parallel", + "gpu", + ], shard_count = 40, deps = [ ":test_utils", diff --git a/tensorflow/compiler/xla/tests/bfloat16_test.cc b/tensorflow/compiler/xla/tests/bfloat16_test.cc index 26e2b1a95b..a1c53ef2aa 100644 --- a/tensorflow/compiler/xla/tests/bfloat16_test.cc +++ b/tensorflow/compiler/xla/tests/bfloat16_test.cc @@ -51,8 +51,7 @@ class Bfloat16Test : public ClientLibraryTestBase { const ErrorSpec error_spec_{0.001, 0.001}; }; -XLA_TEST_F(Bfloat16Test, DISABLED_ON_GPU(DISABLED_ON_CPU_PARALLEL( - DISABLED_ON_CPU(ScalarOperation)))) { +XLA_TEST_F(Bfloat16Test, ScalarOperation) { ComputationBuilder builder(client_, TestName()); auto x = builder.ConstantR0(static_cast(2.0f)); auto y = builder.ConstantR0(static_cast(1.0f)); @@ -62,8 +61,7 @@ XLA_TEST_F(Bfloat16Test, DISABLED_ON_GPU(DISABLED_ON_CPU_PARALLEL( error_spec_); } -XLA_TEST_F(Bfloat16Test, DISABLED_ON_GPU(DISABLED_ON_CPU_PARALLEL( - DISABLED_ON_CPU(NegateScalarF16)))) { +XLA_TEST_F(Bfloat16Test, NegateScalarF16) { ComputationBuilder builder(client_, TestName()); builder.Neg(builder.ConstantR0(static_cast(2.1f))); @@ -71,5 +69,83 @@ XLA_TEST_F(Bfloat16Test, DISABLED_ON_GPU(DISABLED_ON_CPU_PARALLEL( error_spec_); } +XLA_TEST_F(Bfloat16Test, BatchNormTraining) { + const int kFeatureIndex = 2; + ComputationBuilder builder(client_, TestName()); + + auto operand = builder.ConstantR4FromArray4D( + {{{{static_cast(1.f)}, {static_cast(2.f)}}, + {{static_cast(3.f)}, {static_cast(4.f)}}}, + {{{static_cast(5.f)}, {static_cast(6.f)}}, + {{static_cast(7.f)}, {static_cast(8.f)}}}}); + + auto scale = builder.ConstantR1( + {static_cast(2.0f), static_cast(3.0f)}); + + auto offset = builder.ConstantR1( + {static_cast(1.0f), static_cast(2.0f)}); + + auto tuple = builder.BatchNormTraining(operand, scale, offset, + /*epsilon=*/0.001, kFeatureIndex); + + auto expected = *Literal::MakeTuple( + {Literal::CreateR4( + {{{{static_cast(-1.7f)}, {static_cast(-2.04f)}}, + {{static_cast(0.105f)}, {static_cast(0.65f)}}}, + {{{static_cast(1.89f)}, {static_cast(3.35f)}}, + {{static_cast(3.7f)}, {static_cast(6.04f)}}}}) + .get(), + Literal::CreateR1( + {static_cast(4), static_cast(5)}) + .get(), + Literal::CreateR1( + {static_cast(5), static_cast(5)}) + .get()}); + + ComputeAndCompareTuple(&builder, expected, {}, ErrorSpec(0.01)); +} + +XLA_TEST_F(Bfloat16Test, BatchNormGrad) { + const int kFeatureIndex = 2; + ComputationBuilder builder(client_, TestName()); + + auto operand = builder.ConstantR4FromArray4D( + Array4D(2, 2, 2, 1, static_cast(0.0f))); + + auto scale = builder.ConstantR1( + {static_cast(1.0f), static_cast(1.0f)}); + + auto mean = builder.ConstantR1( + {static_cast(0.0f), static_cast(0.0f)}); + + auto var = builder.ConstantR1( + {static_cast(1.0f), static_cast(1.0f)}); + + auto grad_output = builder.ConstantR4FromArray4D( + {{{{static_cast(1.f)}, {static_cast(2.f)}}, + {{static_cast(3.f)}, {static_cast(4.f)}}}, + {{{static_cast(5.f)}, {static_cast(6.f)}}, + {{static_cast(7.f)}, {static_cast(8.f)}}}}); + + builder.BatchNormGrad(operand, scale, mean, var, grad_output, + /*epsilon=*/0.0, kFeatureIndex); + + auto expected = *Literal::MakeTuple( + {Literal::CreateR4( + {{{{static_cast(-3.f)}, {static_cast(-3.f)}}, + {{static_cast(-1.f)}, {static_cast(-1.f)}}}, + {{{static_cast(1.f)}, {static_cast(1.f)}}, + {{static_cast(3.f)}, {static_cast(3.f)}}}}) + .get(), + Literal::CreateR1( + {static_cast(0), static_cast(0)}) + .get(), + Literal::CreateR1( + {static_cast(16), static_cast(20)}) + .get()}); + + ComputeAndCompareTuple(&builder, expected, {}, ErrorSpec(0.01)); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/tests/build_defs.bzl b/tensorflow/compiler/xla/tests/build_defs.bzl index f594c609db..610302ac12 100644 --- a/tensorflow/compiler/xla/tests/build_defs.bzl +++ b/tensorflow/compiler/xla/tests/build_defs.bzl @@ -29,6 +29,7 @@ def xla_test(name, deps, xla_test_library_deps=[], backends=[], + blacklisted_backends=[], args=[], tags=[], copts=[], @@ -92,17 +93,24 @@ def xla_test(name, backends: A list of backends to generate tests for. Supported values: "cpu", "cpu_parallel", "gpu". If this list is empty, the test will be generated for all supported backends. + blacklisted_backends: A list of backends to NOT generate tests for. args: Test arguments for the target. tags: Tags for the target. - backend_args: A dict mapping backend name to list of additional args to - use for that target. + copts: Additional copts to pass to the build. + data: Additional data to pass to the build. backend_tags: A dict mapping backend name to list of additional tags to use for that target. + backend_args: A dict mapping backend name to list of additional args to + use for that target. + **kwargs: Additional keyword arguments to pass to native.cc_test. """ test_names = [] if not backends: backends = all_backends + backends = [backend for backend in backends + if backend not in blacklisted_backends] + native.cc_library( name="%s_lib" % name, srcs=srcs, -- GitLab From 8752c973150df64374f96d516aafa664de410dce Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Nov 2017 12:30:20 -0800 Subject: [PATCH 0769/1801] Fix functionality in crf_sequence_score(), crf_log_norm(), and crf_decode() for when input has max_seq_len = 1. This can happen in single-example inference. PiperOrigin-RevId: 176688502 --- .../crf/python/kernel_tests/crf_test.py | 224 +++++++++++------- tensorflow/contrib/crf/python/ops/crf.py | 163 ++++++++----- 2 files changed, 242 insertions(+), 145 deletions(-) diff --git a/tensorflow/contrib/crf/python/kernel_tests/crf_test.py b/tensorflow/contrib/crf/python/kernel_tests/crf_test.py index 964ec75441..b47fb426a1 100644 --- a/tensorflow/contrib/crf/python/kernel_tests/crf_test.py +++ b/tensorflow/contrib/crf/python/kernel_tests/crf_test.py @@ -32,27 +32,41 @@ from tensorflow.python.platform import test class CrfTest(test.TestCase): def testCrfSequenceScore(self): - inputs = np.array( - [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32) - tag_indices = np.array([1, 2, 1, 0], dtype=np.int32) transition_params = np.array( [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32) - sequence_lengths = np.array(3, dtype=np.int32) - with self.test_session() as sess: - sequence_score = crf.crf_sequence_score( - inputs=array_ops.expand_dims(inputs, 0), - tag_indices=array_ops.expand_dims(tag_indices, 0), - sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), - transition_params=constant_op.constant(transition_params)) - sequence_score = array_ops.squeeze(sequence_score, [0]) - tf_sequence_score = sess.run(sequence_score) - expected_unary_score = sum(inputs[i][tag_indices[i]] - for i in range(sequence_lengths)) - expected_binary_score = sum( - transition_params[tag_indices[i], tag_indices[i + 1]] - for i in range(sequence_lengths - 1)) - expected_sequence_score = expected_unary_score + expected_binary_score - self.assertAllClose(tf_sequence_score, expected_sequence_score) + # Test both the length-1 and regular cases. + sequence_lengths_list = [ + np.array(3, dtype=np.int32), + np.array(1, dtype=np.int32) + ] + inputs_list = [ + np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], + dtype=np.float32), + np.array([[4, 5, -3]], + dtype=np.float32), + ] + tag_indices_list = [ + np.array([1, 2, 1, 0], dtype=np.int32), + np.array([1], dtype=np.int32) + ] + for sequence_lengths, inputs, tag_indices in zip(sequence_lengths_list, + inputs_list, + tag_indices_list): + with self.test_session() as sess: + sequence_score = crf.crf_sequence_score( + inputs=array_ops.expand_dims(inputs, 0), + tag_indices=array_ops.expand_dims(tag_indices, 0), + sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), + transition_params=constant_op.constant(transition_params)) + sequence_score = array_ops.squeeze(sequence_score, [0]) + tf_sequence_score = sess.run(sequence_score) + expected_unary_score = sum(inputs[i][tag_indices[i]] + for i in range(sequence_lengths)) + expected_binary_score = sum( + transition_params[tag_indices[i], tag_indices[i + 1]] + for i in range(sequence_lengths - 1)) + expected_sequence_score = expected_unary_score + expected_binary_score + self.assertAllClose(tf_sequence_score, expected_sequence_score) def testCrfUnaryScore(self): inputs = np.array( @@ -89,38 +103,54 @@ class CrfTest(test.TestCase): self.assertAllClose(tf_binary_score, expected_binary_score) def testCrfLogNorm(self): - inputs = np.array( - [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32) transition_params = np.array( [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32) - num_words = inputs.shape[0] - num_tags = inputs.shape[1] - sequence_lengths = np.array(3, dtype=np.int32) - with self.test_session() as sess: - all_sequence_scores = [] - - # Compare the dynamic program with brute force computation. - for tag_indices in itertools.product( - range(num_tags), repeat=sequence_lengths): - tag_indices = list(tag_indices) - tag_indices.extend([0] * (num_words - sequence_lengths)) - all_sequence_scores.append( - crf.crf_sequence_score( - inputs=array_ops.expand_dims(inputs, 0), - tag_indices=array_ops.expand_dims(tag_indices, 0), - sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), - transition_params=constant_op.constant(transition_params))) - - brute_force_log_norm = math_ops.reduce_logsumexp(all_sequence_scores) - log_norm = crf.crf_log_norm( - inputs=array_ops.expand_dims(inputs, 0), - sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), - transition_params=constant_op.constant(transition_params)) - log_norm = array_ops.squeeze(log_norm, [0]) - tf_brute_force_log_norm, tf_log_norm = sess.run( - [brute_force_log_norm, log_norm]) + # Test both the length-1 and regular cases. + sequence_lengths_list = [ + np.array(3, dtype=np.int32), + np.array(1, dtype=np.int32) + ] + inputs_list = [ + np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], + dtype=np.float32), + np.array([[3, -1, 3]], + dtype=np.float32), + ] + tag_indices_list = [ + np.array([1, 2, 1, 0], dtype=np.int32), + np.array([2], dtype=np.int32) + ] + + for sequence_lengths, inputs, tag_indices in zip(sequence_lengths_list, + inputs_list, + tag_indices_list): + num_words = inputs.shape[0] + num_tags = inputs.shape[1] + with self.test_session() as sess: + all_sequence_scores = [] + + # Compare the dynamic program with brute force computation. + for tag_indices in itertools.product( + range(num_tags), repeat=sequence_lengths): + tag_indices = list(tag_indices) + tag_indices.extend([0] * (num_words - sequence_lengths)) + all_sequence_scores.append( + crf.crf_sequence_score( + inputs=array_ops.expand_dims(inputs, 0), + tag_indices=array_ops.expand_dims(tag_indices, 0), + sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), + transition_params=constant_op.constant(transition_params))) + + brute_force_log_norm = math_ops.reduce_logsumexp(all_sequence_scores) + log_norm = crf.crf_log_norm( + inputs=array_ops.expand_dims(inputs, 0), + sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), + transition_params=constant_op.constant(transition_params)) + log_norm = array_ops.squeeze(log_norm, [0]) + tf_brute_force_log_norm, tf_log_norm = sess.run( + [brute_force_log_norm, log_norm]) - self.assertAllClose(tf_log_norm, tf_brute_force_log_norm) + self.assertAllClose(tf_log_norm, tf_brute_force_log_norm) def testCrfLogLikelihood(self): inputs = np.array( @@ -201,50 +231,66 @@ class CrfTest(test.TestCase): expected_max_sequence[:sequence_lengths]) def testCrfDecode(self): - inputs = np.array( - [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32) transition_params = np.array( [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32) - sequence_lengths = np.array(3, dtype=np.int32) - num_words = inputs.shape[0] - num_tags = inputs.shape[1] + # Test both the length-1 and regular cases. + sequence_lengths_list = [ + np.array(3, dtype=np.int32), + np.array(1, dtype=np.int32) + ] + inputs_list = [ + np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], + dtype=np.float32), + np.array([[-1, 2, 1]], + dtype=np.float32), + ] + tag_indices_list = [ + np.array([1, 2, 1, 0], dtype=np.int32), + np.array([2], dtype=np.int32) + ] + + for sequence_lengths, inputs, tag_indices in zip(sequence_lengths_list, + inputs_list, + tag_indices_list): + num_words = inputs.shape[0] + num_tags = inputs.shape[1] - with self.test_session() as sess: - all_sequence_scores = [] - all_sequences = [] - - # Compare the dynamic program with brute force computation. - for tag_indices in itertools.product( - range(num_tags), repeat=sequence_lengths): - tag_indices = list(tag_indices) - tag_indices.extend([0] * (num_words - sequence_lengths)) - all_sequences.append(tag_indices) - sequence_score = crf.crf_sequence_score( - inputs=array_ops.expand_dims(inputs, 0), - tag_indices=array_ops.expand_dims(tag_indices, 0), - sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), - transition_params=constant_op.constant(transition_params)) - sequence_score = array_ops.squeeze(sequence_score, [0]) - all_sequence_scores.append(sequence_score) - - tf_all_sequence_scores = sess.run(all_sequence_scores) - - expected_max_sequence_index = np.argmax(tf_all_sequence_scores) - expected_max_sequence = all_sequences[expected_max_sequence_index] - expected_max_score = tf_all_sequence_scores[expected_max_sequence_index] - - actual_max_sequence, actual_max_score = crf.crf_decode( - array_ops.expand_dims(inputs, 0), - constant_op.constant(transition_params), - array_ops.expand_dims(sequence_lengths, 0)) - actual_max_sequence = array_ops.squeeze(actual_max_sequence, [0]) - actual_max_score = array_ops.squeeze(actual_max_score, [0]) - tf_actual_max_sequence, tf_actual_max_score = sess.run( - [actual_max_sequence, actual_max_score]) - - self.assertAllClose(tf_actual_max_score, expected_max_score) - self.assertEqual(list(tf_actual_max_sequence[:sequence_lengths]), - expected_max_sequence[:sequence_lengths]) + with self.test_session() as sess: + all_sequence_scores = [] + all_sequences = [] + + # Compare the dynamic program with brute force computation. + for tag_indices in itertools.product( + range(num_tags), repeat=sequence_lengths): + tag_indices = list(tag_indices) + tag_indices.extend([0] * (num_words - sequence_lengths)) + all_sequences.append(tag_indices) + sequence_score = crf.crf_sequence_score( + inputs=array_ops.expand_dims(inputs, 0), + tag_indices=array_ops.expand_dims(tag_indices, 0), + sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), + transition_params=constant_op.constant(transition_params)) + sequence_score = array_ops.squeeze(sequence_score, [0]) + all_sequence_scores.append(sequence_score) + + tf_all_sequence_scores = sess.run(all_sequence_scores) + + expected_max_sequence_index = np.argmax(tf_all_sequence_scores) + expected_max_sequence = all_sequences[expected_max_sequence_index] + expected_max_score = tf_all_sequence_scores[expected_max_sequence_index] + + actual_max_sequence, actual_max_score = crf.crf_decode( + array_ops.expand_dims(inputs, 0), + constant_op.constant(transition_params), + array_ops.expand_dims(sequence_lengths, 0)) + actual_max_sequence = array_ops.squeeze(actual_max_sequence, [0]) + actual_max_score = array_ops.squeeze(actual_max_score, [0]) + tf_actual_max_sequence, tf_actual_max_score = sess.run( + [actual_max_sequence, actual_max_score]) + + self.assertAllClose(tf_actual_max_score, expected_max_score) + self.assertEqual(list(tf_actual_max_sequence[:sequence_lengths]), + expected_max_sequence[:sequence_lengths]) if __name__ == "__main__": diff --git a/tensorflow/contrib/crf/python/ops/crf.py b/tensorflow/contrib/crf/python/ops/crf.py index 4282be5ec8..ca384226d4 100644 --- a/tensorflow/contrib/crf/python/ops/crf.py +++ b/tensorflow/contrib/crf/python/ops/crf.py @@ -53,7 +53,9 @@ from __future__ import print_function import numpy as np from tensorflow.python.framework import dtypes +from tensorflow.python.layers import utils from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import rnn @@ -101,12 +103,29 @@ def crf_sequence_score(inputs, tag_indices, sequence_lengths, Returns: sequence_scores: A [batch_size] vector of unnormalized sequence scores. """ - # Compute the scores of the given tag sequence. - unary_scores = crf_unary_score(tag_indices, sequence_lengths, inputs) - binary_scores = crf_binary_score(tag_indices, sequence_lengths, - transition_params) - sequence_scores = unary_scores + binary_scores - return sequence_scores + # If max_seq_len is 1, we skip the score calculation and simply gather the + # unary potentials of the single tag. + def _single_seq_fn(): + batch_size = array_ops.shape(inputs, out_type=tag_indices.dtype)[0] + example_inds = array_ops.reshape( + math_ops.range(batch_size, dtype=tag_indices.dtype), [-1, 1]) + return array_ops.gather_nd( + array_ops.squeeze(inputs, [1]), + array_ops.concat([example_inds, tag_indices], axis=1)) + + def _multi_seq_fn(): + # Compute the scores of the given tag sequence. + unary_scores = crf_unary_score(tag_indices, sequence_lengths, inputs) + binary_scores = crf_binary_score(tag_indices, sequence_lengths, + transition_params) + sequence_scores = unary_scores + binary_scores + return sequence_scores + + return utils.smart_cond( + pred=math_ops.equal(inputs.shape[1].value or array_ops.shape(inputs)[1], + 1), + fn1=_single_seq_fn, + fn2=_multi_seq_fn) def crf_log_norm(inputs, sequence_lengths, transition_params): @@ -124,19 +143,32 @@ def crf_log_norm(inputs, sequence_lengths, transition_params): # algorithm. first_input = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1]) first_input = array_ops.squeeze(first_input, [1]) - rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1]) - # Compute the alpha values in the forward algorithm in order to get the - # partition function. - forward_cell = CrfForwardRnnCell(transition_params) - _, alphas = rnn.dynamic_rnn( - cell=forward_cell, - inputs=rest_of_input, - sequence_length=sequence_lengths - 1, - initial_state=first_input, - dtype=dtypes.float32) - log_norm = math_ops.reduce_logsumexp(alphas, [1]) - return log_norm + # If max_seq_len is 1, we skip the algorithm and simply reduce_logsumexp over + # the "initial state" (the unary potentials). + def _single_seq_fn(): + return math_ops.reduce_logsumexp(first_input, [1]) + + def _multi_seq_fn(): + """Forward computation of alpha values.""" + rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1]) + + # Compute the alpha values in the forward algorithm in order to get the + # partition function. + forward_cell = CrfForwardRnnCell(transition_params) + _, alphas = rnn.dynamic_rnn( + cell=forward_cell, + inputs=rest_of_input, + sequence_length=sequence_lengths - 1, + initial_state=first_input, + dtype=dtypes.float32) + log_norm = math_ops.reduce_logsumexp(alphas, [1]) + return log_norm + + max_seq_len = array_ops.shape(inputs)[1] + return control_flow_ops.cond(pred=math_ops.equal(max_seq_len, 1), + true_fn=_single_seq_fn, + false_fn=_multi_seq_fn) def crf_log_likelihood(inputs, @@ -440,41 +472,60 @@ def crf_decode(potentials, transition_params, sequence_length): Contains the highest scoring tag indices. best_score: A [batch_size] tensor, containing the score of decode_tags. """ - # For simplicity, in shape comments, denote: - # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output). - num_tags = potentials.get_shape()[2].value - - # Computes forward decoding. Get last score and backpointers. - crf_fwd_cell = CrfDecodeForwardRnnCell(transition_params) - initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1]) - initial_state = array_ops.squeeze(initial_state, axis=[1]) # [B, O] - inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1]) # [B, T-1, O] - backpointers, last_score = rnn.dynamic_rnn( - crf_fwd_cell, - inputs=inputs, - sequence_length=sequence_length - 1, - initial_state=initial_state, - time_major=False, - dtype=dtypes.int32) # [B, T - 1, O], [B, O] - backpointers = gen_array_ops.reverse_sequence( - backpointers, sequence_length - 1, seq_dim=1) # [B, T-1, O] - - # Computes backward decoding. Extract tag indices from backpointers. - crf_bwd_cell = CrfDecodeBackwardRnnCell(num_tags) - initial_state = math_ops.cast(math_ops.argmax(last_score, axis=1), - dtype=dtypes.int32) # [B] - initial_state = array_ops.expand_dims(initial_state, axis=-1) # [B, 1] - decode_tags, _ = rnn.dynamic_rnn( - crf_bwd_cell, - inputs=backpointers, - sequence_length=sequence_length - 1, - initial_state=initial_state, - time_major=False, - dtype=dtypes.int32) # [B, T - 1, 1] - decode_tags = array_ops.squeeze(decode_tags, axis=[2]) # [B, T - 1] - decode_tags = array_ops.concat([initial_state, decode_tags], axis=1) # [B, T] - decode_tags = gen_array_ops.reverse_sequence( - decode_tags, sequence_length, seq_dim=1) # [B, T] - - best_score = math_ops.reduce_max(last_score, axis=1) # [B] - return decode_tags, best_score + # If max_seq_len is 1, we skip the algorithm and simply return the argmax tag + # and the max activation. + def _single_seq_fn(): + squeezed_potentials = array_ops.squeeze(potentials, [1]) + decode_tags = array_ops.expand_dims( + math_ops.argmax(squeezed_potentials, axis=1), 1) + best_score = math_ops.reduce_max(squeezed_potentials, axis=1) + return math_ops.cast(decode_tags, dtype=dtypes.int32), best_score + + def _multi_seq_fn(): + """Decoding of highest scoring sequence.""" + + # For simplicity, in shape comments, denote: + # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output). + num_tags = potentials.get_shape()[2].value + + # Computes forward decoding. Get last score and backpointers. + crf_fwd_cell = CrfDecodeForwardRnnCell(transition_params) + initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1]) + initial_state = array_ops.squeeze(initial_state, axis=[1]) # [B, O] + inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1]) # [B, T-1, O] + backpointers, last_score = rnn.dynamic_rnn( # [B, T - 1, O], [B, O] + crf_fwd_cell, + inputs=inputs, + sequence_length=sequence_length - 1, + initial_state=initial_state, + time_major=False, + dtype=dtypes.int32) + backpointers = gen_array_ops.reverse_sequence( # [B, T - 1, O] + backpointers, sequence_length - 1, seq_dim=1) + + # Computes backward decoding. Extract tag indices from backpointers. + crf_bwd_cell = CrfDecodeBackwardRnnCell(num_tags) + initial_state = math_ops.cast(math_ops.argmax(last_score, axis=1), # [B] + dtype=dtypes.int32) + initial_state = array_ops.expand_dims(initial_state, axis=-1) # [B, 1] + decode_tags, _ = rnn.dynamic_rnn( # [B, T - 1, 1] + crf_bwd_cell, + inputs=backpointers, + sequence_length=sequence_length - 1, + initial_state=initial_state, + time_major=False, + dtype=dtypes.int32) + decode_tags = array_ops.squeeze(decode_tags, axis=[2]) # [B, T - 1] + decode_tags = array_ops.concat([initial_state, decode_tags], # [B, T] + axis=1) + decode_tags = gen_array_ops.reverse_sequence( # [B, T] + decode_tags, sequence_length, seq_dim=1) + + best_score = math_ops.reduce_max(last_score, axis=1) # [B] + return decode_tags, best_score + + return utils.smart_cond( + pred=math_ops.equal( + potentials.shape[1].value or array_ops.shape(potentials)[1], 1), + fn1=_single_seq_fn, + fn2=_multi_seq_fn) -- GitLab From 9f63f6f4613f6fc556c245bd8b69052778f28dc2 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 22 Nov 2017 12:38:18 -0800 Subject: [PATCH 0770/1801] Remove non-existing reference. --- tensorflow/contrib/summary/BUILD | 1 - tensorflow/contrib/summary/summary_ops_graph_test.py | 1 - 2 files changed, 2 deletions(-) diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD index 237339e81e..f34291c203 100644 --- a/tensorflow/contrib/summary/BUILD +++ b/tensorflow/contrib/summary/BUILD @@ -45,7 +45,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":summary_ops", - ":summary_test_internal", ":summary_test_util", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", diff --git a/tensorflow/contrib/summary/summary_ops_graph_test.py b/tensorflow/contrib/summary/summary_ops_graph_test.py index 8fa361de84..703adb7b46 100644 --- a/tensorflow/contrib/summary/summary_ops_graph_test.py +++ b/tensorflow/contrib/summary/summary_ops_graph_test.py @@ -21,7 +21,6 @@ import tempfile import six from tensorflow.contrib.summary import summary_ops -from tensorflow.contrib.summary import summary_test_internal from tensorflow.contrib.summary import summary_test_util from tensorflow.core.framework import graph_pb2 from tensorflow.core.framework import node_def_pb2 -- GitLab From d0324067625d56e75984ef235a1b8fe6f6e15a6f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Nov 2017 12:33:13 -0800 Subject: [PATCH 0771/1801] Minor refactor: move stats files from stochastic to common, remove stochastic PiperOrigin-RevId: 176688846 --- .../contrib/boosted_trees/kernels/split_handler_ops.cc | 2 +- tensorflow/contrib/boosted_trees/lib/BUILD | 10 +++++----- .../stats/feature-split-candidate.h | 8 ++++---- .../{stochastic => common}/stats/gradient-stats.h | 6 +++--- .../learner/{stochastic => common}/stats/node-stats.h | 8 ++++---- .../{stochastic => common}/stats/node-stats_test.cc | 2 +- .../learner/{stochastic => common}/stats/split-stats.h | 8 ++++---- 7 files changed, 22 insertions(+), 22 deletions(-) rename tensorflow/contrib/boosted_trees/lib/learner/{stochastic => common}/stats/feature-split-candidate.h (90%) rename tensorflow/contrib/boosted_trees/lib/learner/{stochastic => common}/stats/gradient-stats.h (98%) rename tensorflow/contrib/boosted_trees/lib/learner/{stochastic => common}/stats/node-stats.h (98%) rename tensorflow/contrib/boosted_trees/lib/learner/{stochastic => common}/stats/node-stats_test.cc (99%) rename tensorflow/contrib/boosted_trees/lib/learner/{stochastic => common}/stats/split-stats.h (94%) diff --git a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc index a5de1340b9..18b4abd654 100644 --- a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc @@ -16,7 +16,7 @@ #include #include -#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats.h" +#include "tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats.h" #include "tensorflow/contrib/boosted_trees/proto/split_info.pb.h" #include "tensorflow/contrib/boosted_trees/proto/tree_config.pb.h" #include "tensorflow/core/framework/device_base.h" diff --git a/tensorflow/contrib/boosted_trees/lib/BUILD b/tensorflow/contrib/boosted_trees/lib/BUILD index af389849b4..131bd48562 100644 --- a/tensorflow/contrib/boosted_trees/lib/BUILD +++ b/tensorflow/contrib/boosted_trees/lib/BUILD @@ -408,7 +408,7 @@ tf_cc_test( # Learner/stochastic cc_library( name = "gradient-stats", - hdrs = ["learner/stochastic/stats/gradient-stats.h"], + hdrs = ["learner/common/stats/gradient-stats.h"], deps = [ "//tensorflow/core:framework_headers_lib", "//third_party/eigen3", @@ -417,7 +417,7 @@ cc_library( cc_library( name = "node-stats", - hdrs = ["learner/stochastic/stats/node-stats.h"], + hdrs = ["learner/common/stats/node-stats.h"], deps = [ ":gradient-stats", "//tensorflow/contrib/boosted_trees/proto:learner_proto_cc", @@ -429,7 +429,7 @@ cc_library( cc_library( name = "split-stats", - hdrs = ["learner/stochastic/stats/split-stats.h"], + hdrs = ["learner/common/stats/split-stats.h"], deps = [ ":node-stats", ], @@ -437,7 +437,7 @@ cc_library( cc_library( name = "feature-split-candidate", - hdrs = ["learner/stochastic/stats/feature-split-candidate.h"], + hdrs = ["learner/common/stats/feature-split-candidate.h"], deps = [ ":split-stats", "//tensorflow/contrib/boosted_trees/proto:tree_config_proto_cc", @@ -447,7 +447,7 @@ cc_library( tf_cc_test( name = "node-stats_test", size = "small", - srcs = ["learner/stochastic/stats/node-stats_test.cc"], + srcs = ["learner/common/stats/node-stats_test.cc"], deps = [ ":node-stats", "//tensorflow/core:tensor_testutil", diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/feature-split-candidate.h b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/feature-split-candidate.h similarity index 90% rename from tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/feature-split-candidate.h rename to tensorflow/contrib/boosted_trees/lib/learner/common/stats/feature-split-candidate.h index fe22691178..339c2e0fde 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/feature-split-candidate.h +++ b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/feature-split-candidate.h @@ -13,10 +13,10 @@ // limitations under the License. // // ============================================================================= -#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_FEATURE_SPLIT_CANDIDATE_H_ -#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_FEATURE_SPLIT_CANDIDATE_H_ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_FEATURE_SPLIT_CANDIDATE_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_FEATURE_SPLIT_CANDIDATE_H_ -#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/split-stats.h" +#include "tensorflow/contrib/boosted_trees/lib/learner/common/stats/split-stats.h" #include "tensorflow/contrib/boosted_trees/proto/tree_config.pb.h" namespace tensorflow { @@ -58,4 +58,4 @@ struct FeatureSplitCandidate { } // namespace boosted_trees } // namespace tensorflow -#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_FEATURE_SPLIT_CANDIDATE_H_ +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_FEATURE_SPLIT_CANDIDATE_H_ diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/gradient-stats.h b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/gradient-stats.h similarity index 98% rename from tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/gradient-stats.h rename to tensorflow/contrib/boosted_trees/lib/learner/common/stats/gradient-stats.h index dad64bf165..34e3ddb777 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/gradient-stats.h +++ b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/gradient-stats.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // ============================================================================= -#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_GRADIENT_STATS_H_ -#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_GRADIENT_STATS_H_ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_GRADIENT_STATS_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_GRADIENT_STATS_H_ #include @@ -190,4 +190,4 @@ inline GradientStats operator-(const GradientStats& a, const GradientStats& b) { } // namespace boosted_trees } // namespace tensorflow -#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_GRADIENT_STATS_H_ +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_GRADIENT_STATS_H_ diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats.h b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats.h similarity index 98% rename from tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats.h rename to tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats.h index 4e5f53874d..642a183aec 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats.h +++ b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats.h @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. // ============================================================================= -#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_NODE_STATS_H_ -#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_NODE_STATS_H_ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_NODE_STATS_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_NODE_STATS_H_ #include "third_party/eigen3/Eigen/Core" #include "third_party/eigen3/Eigen/Eigenvalues" -#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/gradient-stats.h" +#include "tensorflow/contrib/boosted_trees/lib/learner/common/stats/gradient-stats.h" #include "tensorflow/contrib/boosted_trees/proto/learner.pb.h" #include "tensorflow/contrib/boosted_trees/proto/tree_config.pb.h" #include "tensorflow/core/framework/shape_inference.h" @@ -298,4 +298,4 @@ struct NodeStats { } // namespace boosted_trees } // namespace tensorflow -#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_NODE_STATS_H_ +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_NODE_STATS_H_ diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats_test.cc b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats_test.cc similarity index 99% rename from tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats_test.cc rename to tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats_test.cc index ecb7a04efb..f867e77d3e 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats_test.cc +++ b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats_test.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. // ============================================================================= -#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats.h" +#include "tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats.h" #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/platform/test.h" diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/split-stats.h b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/split-stats.h similarity index 94% rename from tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/split-stats.h rename to tensorflow/contrib/boosted_trees/lib/learner/common/stats/split-stats.h index f700cbced8..054ccd9a8c 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/split-stats.h +++ b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/split-stats.h @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. // ============================================================================= -#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_SPLIT_STATS_H_ -#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_SPLIT_STATS_H_ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_SPLIT_STATS_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_SPLIT_STATS_H_ #include -#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats.h" +#include "tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats.h" namespace tensorflow { namespace boosted_trees { @@ -81,4 +81,4 @@ struct SplitStats { } // namespace boosted_trees } // namespace tensorflow -#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_SPLIT_STATS_H_ +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_SPLIT_STATS_H_ -- GitLab From f7f8de28504e13a82385d49c0a75baaf82f190bf Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 22 Nov 2017 12:41:58 -0800 Subject: [PATCH 0772/1801] Remove non-existing reference. --- tensorflow/contrib/summary/BUILD | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD index 237339e81e..f34291c203 100644 --- a/tensorflow/contrib/summary/BUILD +++ b/tensorflow/contrib/summary/BUILD @@ -45,7 +45,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":summary_ops", - ":summary_test_internal", ":summary_test_util", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", -- GitLab From 9998d927ab9a2c915326130e9cdf773276ce9db0 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 22 Nov 2017 12:42:23 -0800 Subject: [PATCH 0773/1801] Update summary_ops_graph_test.py --- tensorflow/contrib/summary/summary_ops_graph_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/contrib/summary/summary_ops_graph_test.py b/tensorflow/contrib/summary/summary_ops_graph_test.py index 8fa361de84..703adb7b46 100644 --- a/tensorflow/contrib/summary/summary_ops_graph_test.py +++ b/tensorflow/contrib/summary/summary_ops_graph_test.py @@ -21,7 +21,6 @@ import tempfile import six from tensorflow.contrib.summary import summary_ops -from tensorflow.contrib.summary import summary_test_internal from tensorflow.contrib.summary import summary_test_util from tensorflow.core.framework import graph_pb2 from tensorflow.core.framework import node_def_pb2 -- GitLab From 467131ff039bb37af36b5fc907960896a20c6f65 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Wed, 22 Nov 2017 12:33:18 -0800 Subject: [PATCH 0774/1801] Make a parameter name in a declaration consistent with a name in the definition. PiperOrigin-RevId: 176688856 --- tensorflow/compiler/xla/client/client.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/client/client.h b/tensorflow/compiler/xla/client/client.h index cf6878dd8e..c28380b689 100644 --- a/tensorflow/compiler/xla/client/client.h +++ b/tensorflow/compiler/xla/client/client.h @@ -142,7 +142,7 @@ class Client { // Returns a vector of global data handles that point to the tuple elements. StatusOr>> DeconstructTuple( - const GlobalData& computation); + const GlobalData& data); // Retrieves the statistics of the given computation. StatusOr GetComputationStats( -- GitLab From 02877b2ff172415845f5305a7a534ef4f7174cf4 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 22 Nov 2017 12:45:38 -0800 Subject: [PATCH 0775/1801] Also ignore no_oss tags in windows builds. (#14810) --- tensorflow/python/BUILD | 1 + tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh | 4 ++-- tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 5eb9b79ee6..4583b69bbf 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3326,6 +3326,7 @@ py_test( tags = [ "no_gpu", "no_oss", + "no_pip", "no_pip_gpu", "notap", ], diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh index f6e3d2e6c7..8520ca898f 100644 --- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh @@ -64,7 +64,7 @@ reinstall_tensorflow_pip ${PIP_NAME} # https://github.com/tensorflow/tensorflow/issues/12844 is fixed. bazel test -c opt $BUILD_OPTS -k --test_output=errors \ --define=no_tensorflow_py_deps=true --test_lang_filters=py \ - --test_tag_filters=-no_pip,-no_windows \ - --build_tag_filters=-no_pip,-no_windows --build_tests_only \ + --test_tag_filters=-no_pip,-no_windows,-no_oss \ + --build_tag_filters=-no_pip,-no_windows,-no_oss --build_tests_only \ --test_env=TF_SAVER_LENIENT_NAMES=True \ //${PY_TEST_DIR}/tensorflow/python/... diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh index 25d327c818..47ca42d642 100644 --- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh @@ -65,7 +65,7 @@ reinstall_tensorflow_pip ${PIP_NAME} # https://github.com/tensorflow/tensorflow/issues/12844 is fixed. bazel test -c opt $BUILD_OPTS -k --test_output=errors \ --define=no_tensorflow_py_deps=true --test_lang_filters=py \ - --test_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu \ - --build_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu \ + --test_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \ + --build_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \ --test_env=TF_SAVER_LENIENT_NAMES=True \ --local_test_jobs=1 --build_tests_only //${PY_TEST_DIR}/tensorflow/python/... -- GitLab From 780d35918354a5a06e0499c457ae7c9c0e45d172 Mon Sep 17 00:00:00 2001 From: Keven Wang Date: Wed, 22 Nov 2017 12:46:40 -0800 Subject: [PATCH 0776/1801] execute command properly in bash.exe on windows (#14745) --- tensorflow/workspace.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 8e62228c1b..9562f7e922 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -107,7 +107,7 @@ def _apply_patch(repo_ctx, patch_file): bazel_sh = _get_env_var(repo_ctx, "BAZEL_SH") if not bazel_sh: fail("BAZEL_SH environment variable is not set") - cmd = [bazel_sh, "-c", " ".join(cmd)] + cmd = [bazel_sh, "-l", "-c", " ".join(cmd)] _execute_and_check_ret_code(repo_ctx, cmd) # Download the repository and apply a patch to its root -- GitLab From 8af1600d49ff4cc16063ab1aafbde52be9347c62 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Nov 2017 12:35:30 -0800 Subject: [PATCH 0777/1801] Allow to continue when function-inlining fails. This is useful if either the function-inlining fails erroneously, or if one would like to continue at least long enough to get a graph-visualization. Also, in case of multiple inlining passes, only log once. PiperOrigin-RevId: 176689106 --- tensorflow/contrib/lite/toco/import_tensorflow.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 1f959600f3..691b4ff2a9 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -1462,18 +1462,23 @@ bool InlineAllFunctions(GraphDef* graphdef) { tensorflow::Graph graph(fld); tensorflow::GraphConstructorOptions gc_opts; - TF_CHECK_OK( - tensorflow::ConvertGraphDefToGraph(gc_opts, graphdef_copy, &graph)); + const auto& tf_convert_status = + tensorflow::ConvertGraphDefToGraph(gc_opts, graphdef_copy, &graph); + if (!tf_convert_status.ok()) { + LOG(ERROR) << "tensorflow::ConvertGraphDefToGraph failed with status: " + << tf_convert_status.ToString(); + return false; + } // Iterate over the graph until there are no more nodes to be inlined. bool graph_modified = false; while (tensorflow::ExpandInlineFunctions(flr, &graph)) { graph_modified = true; - LOG(INFO) << "Found functions that were inlined."; } // Output inlined graph if (graph_modified) { + LOG(INFO) << "Found and inlined TensorFlow functions."; graph.ToGraphDef(graphdef); } return graph_modified; -- GitLab From 4b636957604faa3361a799dd9d8749a6b85afff7 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 22 Nov 2017 12:39:54 -0800 Subject: [PATCH 0778/1801] Place HloProfilePrinter and HloProfileIndexMap in Executable This refactoring will later allow XlaCompiledCpuFunction to pull out the HloProfilePrinter from Executable and use that to display the hlo execution profile. A de/serialized HloProfilePrinter will let AOT compiled binaries display their Hlo execution profile. PiperOrigin-RevId: 176689528 --- .../compiler/xla/service/cpu/cpu_compiler.cc | 75 +++++++++++++++---- .../xla/service/cpu/cpu_executable.cc | 51 +++++++------ .../compiler/xla/service/cpu/cpu_executable.h | 19 ++--- .../service/cpu/parallel_cpu_executable.cc | 46 ++++-------- .../xla/service/cpu/parallel_cpu_executable.h | 10 +-- tensorflow/compiler/xla/service/executable.h | 35 ++++++--- .../compiler/xla/service/gpu/gpu_compiler.cc | 18 ++++- .../xla/service/gpu/gpu_executable.cc | 13 ++-- .../compiler/xla/service/gpu/gpu_executable.h | 8 +- .../xla/service/hlo_execution_profile.cc | 22 ++---- .../xla/service/hlo_execution_profile.h | 24 +++--- .../xla/service/hlo_execution_profile_test.cc | 6 +- .../xla/service/interpreter/executable.cc | 8 +- .../xla/service/interpreter/executable.h | 2 - tensorflow/compiler/xla/service/service.cc | 11 +-- 15 files changed, 193 insertions(+), 155 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index 56940b8d63..ff6042ae19 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -197,28 +197,35 @@ void InitializeLLVMCommandLineOptions(const HloModuleConfig& config) { class CollectProfileCandidates : public DfsHloVisitorWithDefault { public: static StatusOr> - GetCandidatesForComputation(HloComputation* computation) { + GetCandidatesForComputation( + HloComputation* computation, + const std::unordered_map& + assigned_indices) { std::unordered_map hlo_to_profile_idx; CollectProfileCandidates profile_candidates_for_computation( - &hlo_to_profile_idx); + &hlo_to_profile_idx, assigned_indices); TF_RETURN_IF_ERROR( computation->Accept(&profile_candidates_for_computation)); return hlo_to_profile_idx; } private: - explicit CollectProfileCandidates( - std::unordered_map* hlo_to_profile_idx) - : hlo_to_profile_idx_(hlo_to_profile_idx) {} + CollectProfileCandidates( + std::unordered_map* hlo_to_profile_idx, + const std::unordered_map& assigned_indices) + : hlo_to_profile_idx_(hlo_to_profile_idx), + assigned_indices_(assigned_indices) {} Status DefaultAction(HloInstruction* hlo_instruction) override { - hlo_to_profile_idx_->insert({hlo_instruction, hlo_to_profile_idx_->size()}); + hlo_to_profile_idx_->insert( + {hlo_instruction, FindOrDie(assigned_indices_, hlo_instruction)}); return Status::OK(); } Status HandleCall(HloInstruction* call) override { TF_RETURN_IF_ERROR(DefaultAction(call)); - CollectProfileCandidates candidates_for_call(hlo_to_profile_idx_); + CollectProfileCandidates candidates_for_call(hlo_to_profile_idx_, + assigned_indices_); TF_RETURN_IF_ERROR(call->to_apply()->Accept(&candidates_for_call)); return Status::OK(); } @@ -232,17 +239,20 @@ class CollectProfileCandidates : public DfsHloVisitorWithDefault { Status HandleWhile(HloInstruction* xla_while) override { TF_RETURN_IF_ERROR(DefaultAction(xla_while)); - CollectProfileCandidates candidates_for_condition(hlo_to_profile_idx_); + CollectProfileCandidates candidates_for_condition(hlo_to_profile_idx_, + assigned_indices_); TF_RETURN_IF_ERROR( xla_while->while_condition()->Accept(&candidates_for_condition)); - CollectProfileCandidates candidates_for_body(hlo_to_profile_idx_); + CollectProfileCandidates candidates_for_body(hlo_to_profile_idx_, + assigned_indices_); TF_RETURN_IF_ERROR(xla_while->while_body()->Accept(&candidates_for_body)); return Status::OK(); } std::unordered_map* hlo_to_profile_idx_; + const std::unordered_map& assigned_indices_; }; } // namespace @@ -475,10 +485,27 @@ StatusOr> CpuCompiler::RunBackend( HloComputation* computation = module->entry_computation(); std::unordered_map hlo_to_profile_idx; + std::unique_ptr hlo_profile_index_map; + std::unique_ptr hlo_profile_printer; if (module->config().hlo_profiling_enabled()) { + hlo_profile_index_map = MakeUnique(*module); + TF_ASSIGN_OR_RETURN( hlo_to_profile_idx, - CollectProfileCandidates::GetCandidatesForComputation(computation)); + CollectProfileCandidates::GetCandidatesForComputation( + computation, hlo_profile_index_map->instruction_to_profile_idx())); + + auto shape_size_bytes = [](const Shape& shape) { + // On the cpu, opaques are pointers. + if (ShapeUtil::IsOpaque(shape)) { + return static_cast(sizeof(void*)); + } + return ShapeUtil::ByteSizeOf(shape, sizeof(void*)); + }; + + HloCostAnalysis cost_analysis(shape_size_bytes); + hlo_profile_printer = + CreateHloProfilePrinter(*hlo_profile_index_map, cost_analysis); } std::unique_ptr cpu_executable; @@ -544,8 +571,16 @@ StatusOr> CpuCompiler::RunBackend( parallel_computations.emplace(to_apply, instruction); } + // We always profile the entire computation as a whole, even if hlo + // profiling is disabled. When hlo profiling is diabled, we pass in a + // profile counter array of just one element, which corresponds to the whole + // computation. + size_t entry_computation_profile_idx = + hlo_profile_index_map ? hlo_profile_index_map->GetProfileIndexFor( + *module->entry_computation()) + : 0; IrEmitter ir_emitter(*module, *assignment, llvm_module.get(), - hlo_to_profile_idx, hlo_to_profile_idx.size(), + hlo_to_profile_idx, entry_computation_profile_idx, jit->target_machine(), jit->external_constant_pool()); std::unique_ptr> function_names( @@ -586,8 +621,8 @@ StatusOr> CpuCompiler::RunBackend( jit->AddModule(std::move(llvm_module)); cpu_executable.reset(new ParallelCpuExecutable( std::move(jit), std::move(assignment), std::move(module), - std::move(function_names), std::move(hlo_to_profile_idx), - std::move(aligned_constants))); + std::move(function_names), std::move(aligned_constants), + std::move(hlo_profile_printer), std::move(hlo_profile_index_map))); if (embed_ir_in_executable) { static_cast(*cpu_executable) @@ -620,12 +655,22 @@ StatusOr> CpuCompiler::RunBackend( TF_RETURN_IF_ERROR(protobuf_util::DumpProtoToDirectory( proto, xla_dump_hlo_proto_to, module->name())); } + // We always profile the entire computation as a whole, even if hlo + // profiling is disabled. When hlo profiling is diabled, we pass in a + // profile counter array of just one element, which corresponds to the whole + // computation. + size_t entry_computation_profile_idx = + hlo_profile_index_map ? hlo_profile_index_map->GetProfileIndexFor( + *module->entry_computation()) + : 0; + // Each computation is a single function. Emit all embedded computations // before the entry computation. The order of computations returned from // GetEmbeddedComputations guarantees that a called computation occurs // before a caller computation. + IrEmitter ir_emitter(*module, *assignment, llvm_module.get(), - hlo_to_profile_idx, hlo_to_profile_idx.size(), + hlo_to_profile_idx, entry_computation_profile_idx, jit->target_machine(), jit->external_constant_pool()); for (auto embedded_computation : @@ -659,7 +704,7 @@ StatusOr> CpuCompiler::RunBackend( jit->AddModule(std::move(llvm_module)); cpu_executable.reset(new CpuExecutable( std::move(jit), std::move(assignment), std::move(module), function_name, - std::move(hlo_to_profile_idx))); + std::move(hlo_profile_printer), std::move(hlo_profile_index_map))); if (embed_ir_in_executable) { static_cast(*cpu_executable) diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc index e6ef9d6314..e956f478b8 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc @@ -55,11 +55,12 @@ CpuExecutable::CpuExecutable( std::unique_ptr assignment, std::unique_ptr hlo_module, const string& entry_function_name, - std::unordered_map hlo_to_profile_idx) - : Executable(std::move(hlo_module)), + std::unique_ptr hlo_profile_printer, + std::unique_ptr hlo_profile_index_map) + : Executable(std::move(hlo_module), std::move(hlo_profile_printer), + std::move(hlo_profile_index_map)), jit_(std::move(jit)), - assignment_(std::move(assignment)), - hlo_to_profile_idx_(std::move(hlo_to_profile_idx)) { + assignment_(std::move(assignment)) { // Resolve symbols in the constructor rather than at execution time to avoid // races because FindSymbol is not thread safe. llvm::JITSymbol sym = jit_->FindSymbol(entry_function_name); @@ -183,9 +184,16 @@ Status CpuExecutable::ExecuteComputeFunction( uint64 start_micros = tensorflow::Env::Default()->NowMicros(); // Allocate profiling counters for each hlo instruction that we would like to - // profile. Allocate an additional profile counter for the entire - // computation. - std::vector profile_counters(hlo_to_profile_idx_.size() + 1); + // profile. Even when not Hlo profiling, we allocate a counter for the entire + // computation, which we use to update ExecutionProfile below. + std::vector* profile_counters = nullptr; + std::vector profile_counter_for_entry_computation; + if (hlo_execution_profile) { + profile_counters = hlo_execution_profile->mutable_profile_counters(); + } else { + profile_counters = &profile_counter_for_entry_computation; + profile_counter_for_entry_computation.push_back(0); + } // Call the computation function following the calling convention. std::vector buffer_pointers; @@ -200,7 +208,7 @@ Status CpuExecutable::ExecuteComputeFunction( VLOG(3) << tensorflow::strings::Printf( " func(void* result, void* params[%zu], void* temps[%zu], " "uint64 profile_counters[%zu])", - args_array.size(), buffer_pointers.size(), profile_counters.size()); + args_array.size(), buffer_pointers.size(), profile_counters->size()); VLOG(3) << tensorflow::strings::Printf(" result = %p", result_buffer); auto ptr_printer = [](string* out, const void* p) { tensorflow::strings::StrAppend(out, tensorflow::strings::Printf("%p", p)); @@ -212,11 +220,11 @@ Status CpuExecutable::ExecuteComputeFunction( " temps = [%s]", tensorflow::str_util::Join(buffer_pointers, ", ", ptr_printer).c_str()); VLOG(3) << tensorflow::strings::Printf(" profile_counters = %p", - profile_counters.data()); + profile_counters->data()); } compute_function_(result_buffer, run_options, args_array.data(), - buffer_pointers.data(), profile_counters.data()); + buffer_pointers.data(), profile_counters->data()); uint64 end_micros = tensorflow::Env::Default()->NowMicros(); @@ -225,20 +233,15 @@ Status CpuExecutable::ExecuteComputeFunction( const double nanoseconds = (end_micros - start_micros) * 1000.0; execution_profile_.set_compute_time_ns(std::max(nanoseconds, 1.0)); - // The last profile counter is used for the computation as a whole. - execution_profile_.set_compute_cycle_count(profile_counters.back()); - } - - if (hlo_execution_profile != nullptr) { - hlo_execution_profile->set_total_cycles_executed( - *module().entry_computation(), profile_counters.back()); - - for (auto hlo_prof_idx : hlo_to_profile_idx_) { - const HloInstruction* hlo = hlo_prof_idx.first; - uint64 cycles_taken = profile_counters[hlo_prof_idx.second]; - hlo_execution_profile->SetCyclesTakenBy(hlo, cycles_taken); + if (hlo_execution_profile) { + execution_profile_.set_compute_cycle_count( + hlo_execution_profile->total_cycles_executed( + *module().entry_computation())); + } else { + execution_profile_.set_compute_cycle_count(profile_counters->back()); } } + return Status::OK(); } @@ -428,9 +431,5 @@ const PointsToSet& CpuExecutable::GetRootPointsToSet() const { module().entry_computation()->root_instruction()); } -std::unique_ptr CpuExecutable::CreateCostAnalysis() const { - return MakeUnique(ShapeSizeBytes); -} - } // namespace cpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.h b/tensorflow/compiler/xla/service/cpu/cpu_executable.h index 238bc9b46a..17ee2d673e 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.h @@ -47,12 +47,12 @@ namespace cpu { // architecture, so JIT-ed code and host code share the same ABI. class CpuExecutable : public Executable { public: - CpuExecutable( - std::unique_ptr jit, - std::unique_ptr assignment, - std::unique_ptr hlo_module, - const string& entry_function_name, - std::unordered_map hlo_to_profile_idx); + CpuExecutable(std::unique_ptr jit, + std::unique_ptr assignment, + std::unique_ptr hlo_module, + const string& entry_function_name, + std::unique_ptr hlo_profile_printer, + std::unique_ptr hlo_profile_index_map); ~CpuExecutable() override {} StatusOr ExecuteOnStream( @@ -85,12 +85,10 @@ class CpuExecutable : public Executable { static int64 ShapeSizeBytes(const Shape& shape); - std::unique_ptr CreateCostAnalysis() const override; - // Type of the computation function we expect in the JIT. using ComputeFunctionType = void (*)( void* /*result*/, const ExecutableRunOptions* /*run_options*/, - const void** /*args*/, void** /*temps*/, uint64* /*profile_counters*/); + const void** /*args*/, void** /*temps*/, int64* /*profile_counters*/); const ComputeFunctionType& compute_function() const { return compute_function_; @@ -145,9 +143,6 @@ class CpuExecutable : public Executable { // Entry function name for the computation. const string entry_function_name_; - // Maps HLOs to their index into the profile counter array. - const std::unordered_map hlo_to_profile_idx_; - TF_DISALLOW_COPY_AND_ASSIGN(CpuExecutable); }; diff --git a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc index aff61296ce..0077e344e2 100644 --- a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc +++ b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc @@ -59,19 +59,20 @@ ParallelCpuExecutable::ParallelCpuExecutable( std::unique_ptr assignment, std::unique_ptr hlo_module, std::unique_ptr> function_names, - std::unordered_map hlo_to_profile_idx, std::unordered_map> - aligned_constants) - : Executable(std::move(hlo_module)), + aligned_constants, + std::unique_ptr hlo_profile_printer, + std::unique_ptr hlo_profile_index_map) + : Executable(std::move(hlo_module), std::move(hlo_profile_printer), + std::move(hlo_profile_index_map)), jit_(std::move(jit)), assignment_(std::move(assignment)), function_names_(std::move(function_names)), - hlo_to_profile_idx_(std::move(hlo_to_profile_idx)), aligned_constants_(std::move(aligned_constants)) {} // Type of the computation function we expect in the JIT. using ComputeFunctionType = void (*)(void*, const void*, const void**, void**, - int64*, uint64*); + int64*, int64*); // Given a pointer to an output buffer (following the CPU JIT calling // conventions), mark addresses that are "live". The initial pointer itself is @@ -106,7 +107,7 @@ class Executor { const ServiceExecutableRunOptions* run_options, std::list* pending, HloInstructionMap* results, void** temps_array, - uint64* profile_counters_array, const BufferAssignment* assignment) + int64* profile_counters_array, const BufferAssignment* assignment) : functions_(functions), run_options_(run_options), pending_(pending), @@ -147,7 +148,7 @@ class Executor { std::list* pending_; HloInstructionMap* results_; void** temps_array_; - uint64* profile_counters_array_; + int64* profile_counters_array_; tensorflow::thread::ThreadPool* thread_pool_; const BufferAssignment* assignment_; @@ -389,9 +390,11 @@ Status ParallelCpuExecutable::ExecuteComputeFunctions( tensorflow::gtl::ArraySlice buffers, HloExecutionProfile* hlo_execution_profile) { // Allocate profiling counters for each hlo instruction that we would like to - // profile. Allocate an additional profile counter for the entire - // computation. - std::vector profile_counters(hlo_to_profile_idx_.size() + 1); + // profile. + std::vector* profile_counters = nullptr; + if (hlo_execution_profile) { + profile_counters = hlo_execution_profile->mutable_profile_counters(); + } std::vector buffer_pointers; buffer_pointers.reserve(buffers.size()); @@ -441,9 +444,9 @@ Status ParallelCpuExecutable::ExecuteComputeFunctions( // For example, if we expect a library conv/matmul call to run at max // concurrency, we should not dispatch runnable instructions until the // library call is finished (to avoid expensive cache invalidation). - Executor executor(functions, run_options, &pending, &results, - buffer_pointers.data(), profile_counters.data(), - assignment_.get()); + Executor executor( + functions, run_options, &pending, &results, buffer_pointers.data(), + profile_counters ? profile_counters->data() : nullptr, assignment_.get()); TF_RETURN_IF_ERROR(executor.Run()); @@ -453,18 +456,6 @@ Status ParallelCpuExecutable::ExecuteComputeFunctions( tensorflow::mutex_lock lock(mutex_); double nanoseconds = (end_micros - start_micros) * 1000.0; execution_profile_.set_compute_time_ns(std::max(nanoseconds, 1.0)); - // The last profile counter is used for the computation as a whole. - execution_profile_.set_compute_cycle_count(profile_counters.back()); - } - if (hlo_execution_profile != nullptr) { - hlo_execution_profile->set_total_cycles_executed(entry_computation, - profile_counters.back()); - - for (auto hlo_prof_idx : hlo_to_profile_idx_) { - const HloInstruction* hlo = hlo_prof_idx.first; - uint64 cycles_taken = profile_counters[hlo_prof_idx.second]; - hlo_execution_profile->SetCyclesTakenBy(hlo, cycles_taken); - } } return Status::OK(); @@ -618,10 +609,5 @@ const PointsToSet& ParallelCpuExecutable::GetRootPointsToSet() const { module().entry_computation()->root_instruction()); } -std::unique_ptr ParallelCpuExecutable::CreateCostAnalysis() - const { - return MakeUnique(ShapeSizeBytes); -} - } // namespace cpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h index db16aaf48b..d65e3f42f3 100644 --- a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h +++ b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h @@ -52,10 +52,11 @@ class ParallelCpuExecutable : public Executable { std::unique_ptr assignment, std::unique_ptr hlo_module, std::unique_ptr> function_names, - std::unordered_map hlo_to_profile_idx, std::unordered_map> - aligned_constants); + aligned_constants, + std::unique_ptr hlo_profile_printer, + std::unique_ptr hlo_profile_index_map); ~ParallelCpuExecutable() override {} StatusOr ExecuteOnStream( @@ -95,8 +96,6 @@ class ParallelCpuExecutable : public Executable { "Equality test on CPU parallel executable is not implemented."); } - std::unique_ptr CreateCostAnalysis() const override; - private: // Allocate buffers required for execution and assign them to the elements of // "buffers". "buffers" should be sized to the number of buffers in buffer @@ -143,9 +142,6 @@ class ParallelCpuExecutable : public Executable { // Map containing the JITted function names for each HLO instruction. const std::unique_ptr> function_names_; - // Maps HLOs to their index into the profile counter array. - const std::unordered_map hlo_to_profile_idx_; - // Map from HLO Constant instructions to a pointer to their literal data. // The data stored in the protocol buffer might be insufficiently aligned, // we create a sufficiently aligned copy and store it in this map. diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h index 2135707371..08862308c9 100644 --- a/tensorflow/compiler/xla/service/executable.h +++ b/tensorflow/compiler/xla/service/executable.h @@ -44,8 +44,15 @@ namespace xla { // interface that is used for launching compiled programs across platforms. class Executable { public: - explicit Executable(std::unique_ptr hlo_module) - : hlo_module_(std::move(hlo_module)) {} + explicit Executable(std::unique_ptr hlo_module, + std::unique_ptr hlo_profile_printer, + std::unique_ptr hlo_profile_index_map) + : hlo_module_(std::move(hlo_module)), + hlo_profile_printer_(std::move(hlo_profile_printer)), + hlo_profile_index_map_(std::move(hlo_profile_index_map)) { + CHECK_EQ(hlo_profile_printer_.get() == nullptr, + hlo_profile_index_map_.get() == nullptr); + } virtual ~Executable() {} // Enqueues the compilation result on the provided stream, passing the given @@ -123,12 +130,20 @@ class Executable { "Equality test on this executable is not implemented."); } + const HloProfilePrinter& hlo_profile_printer() const { + CHECK(hlo_profiling_enabled()); + return *hlo_profile_printer_; + } + + const HloProfileIndexMap& hlo_profile_index_map() const { + CHECK(hlo_profiling_enabled()); + return *hlo_profile_index_map_; + } + // Returns whether this executable was compiled with HLO profilings support // enabled. If not, the caller should not expect an hlo_execution_profile // passed to ExecuteOnStream above to be populated during execution. - bool hlo_profiling_enabled() const { - return hlo_module_->config().hlo_profiling_enabled(); - } + bool hlo_profiling_enabled() const { return hlo_profile_printer_ != nullptr; } const HloModule& module() const { return *hlo_module_; } @@ -160,10 +175,6 @@ class Executable { static Status DumpToDirectory(const string& directory_path, string filename, const SessionModule& session_module); - // Returns a cost analysis object appropriate for the platform on which this - // executable can run. - virtual std::unique_ptr CreateCostAnalysis() const = 0; - protected: mutable tensorflow::mutex mutex_; @@ -181,6 +192,9 @@ class Executable { // Execution count, used to generate a unique filename for each dumped // execution. int64 execution_count_ = 0; + + std::unique_ptr hlo_profile_printer_; + std::unique_ptr hlo_profile_index_map_; }; template @@ -200,7 +214,8 @@ StatusOr Executable::ExecuteOnStreamWrapper( std::unique_ptr profile_ptr = module_config().debug_options().xla_hlo_profile() && hlo_profiling_enabled() - ? MakeUnique(module(), *CreateCostAnalysis()) + ? MakeUnique(&hlo_profile_printer(), + &hlo_profile_index_map()) : nullptr; auto return_value = diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 92c53265d0..fcd73fd37a 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -465,10 +465,20 @@ StatusOr> GpuCompiler::RunBackend( VLOG(2) << "Printing the thunk schedule..."; XLA_VLOG_LINES(2, thunk_schedule->ToString()); - auto* gpu_executable = - new GpuExecutable(ptx, cubin, {cc_major, cc_minor}, - std::move(thunk_schedule), std::move(module), - std::move(buffer_assignment), ShapeSizeBytesFunction()); + std::unique_ptr profile_index_map; + std::unique_ptr profile_printer; + + if (module->config().hlo_profiling_enabled()) { + HloCostAnalysis cost_analysis(ShapeSizeBytesFunction()); + profile_index_map = MakeUnique(*module); + profile_printer = + CreateHloProfilePrinter(*profile_index_map, cost_analysis); + } + + auto* gpu_executable = new GpuExecutable( + ptx, cubin, {cc_major, cc_minor}, std::move(thunk_schedule), + std::move(module), std::move(buffer_assignment), + std::move(profile_printer), std::move(profile_index_map)); if (embed_ir_in_executable) { DCHECK_NE("", ir_module_string_before_opt); gpu_executable->set_ir_module_string(ir_module_string_before_opt); diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc index c6f23f9b05..0fd85e4fb0 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc @@ -113,14 +113,15 @@ GpuExecutable::GpuExecutable( std::unique_ptr thunk_schedule, std::unique_ptr hlo_module, std::unique_ptr assignment, - HloCostAnalysis::ShapeSizeFunction shape_size_function) - : Executable(std::move(hlo_module)), + std::unique_ptr hlo_profile_printer, + std::unique_ptr hlo_profile_index_map) + : Executable(std::move(hlo_module), std::move(hlo_profile_printer), + std::move(hlo_profile_index_map)), ptx_(ptx), cubin_(cubin), compute_capability_(compute_capability), thunk_schedule_(std::move(thunk_schedule)), - assignment_(std::move(assignment)), - shape_size_function_(std::move(shape_size_function)) {} + assignment_(std::move(assignment)) {} Status GpuExecutable::ExecuteThunks( const ServiceExecutableRunOptions* run_options, @@ -358,9 +359,5 @@ const PointsToSet& GpuExecutable::GetRootPointsToSet() const { module().entry_computation()->root_instruction()); } -std::unique_ptr GpuExecutable::CreateCostAnalysis() const { - return MakeUnique(shape_size_function_); -} - } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.h b/tensorflow/compiler/xla/service/gpu/gpu_executable.h index a3815370c1..e7307e07c0 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.h @@ -54,7 +54,8 @@ class GpuExecutable : public Executable { std::unique_ptr thunk_schedule, std::unique_ptr hlo_module, std::unique_ptr assignment, - HloCostAnalysis::ShapeSizeFunction shape_size_function); + std::unique_ptr hlo_profile_printer, + std::unique_ptr hlo_profile_index_map); // This should be called after set_ir_module_string. const string& ir_module_string() const { return ir_module_string_; } @@ -95,8 +96,6 @@ class GpuExecutable : public Executable { return Unimplemented("Equality test on GPU executable is not implemented."); } - std::unique_ptr CreateCostAnalysis() const override; - private: // If `block_host_until_done` is false, execution will not block the host // until the kernels have completed. This is used as an optimization for @@ -140,9 +139,6 @@ class GpuExecutable : public Executable { // memory for every output/temp buffers. const std::unique_ptr assignment_; - // Function to compute the size of a given Shape, in bytes. - const HloCostAnalysis::ShapeSizeFunction shape_size_function_; - TF_DISALLOW_COPY_AND_ASSIGN(GpuExecutable); }; diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc index 9e256b9b37..ba75e2ef1b 100644 --- a/tensorflow/compiler/xla/service/hlo_execution_profile.cc +++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc @@ -40,7 +40,7 @@ HloProfileIndexMap::HloProfileIndexMap(const HloModule& module) { } } -static HloProfilePrinter CreateOwnedHloProfilePrinter( +std::unique_ptr CreateHloProfilePrinter( const HloProfileIndexMap& hlo_profile_index_map, const HloCostAnalysis& cost_analysis) { using HloComputationInfo = HloProfilePrinter::HloComputationInfo; @@ -108,15 +108,15 @@ static HloProfilePrinter CreateOwnedHloProfilePrinter( delete[] computation_infos; }; - return HloProfilePrinter(computation_infos, - hlo_profile_index_map.computation_count(), deleter); + return MakeUnique( + computation_infos, hlo_profile_index_map.computation_count(), deleter); } -HloExecutionProfile::HloExecutionProfile(const HloModule& module, - const HloCostAnalysis& cost_analysis) - : hlo_profile_index_map_(module), - hlo_profile_printer_( - CreateOwnedHloProfilePrinter(hlo_profile_index_map_, cost_analysis)), +HloExecutionProfile::HloExecutionProfile( + const HloProfilePrinter* hlo_profile_printer, + const HloProfileIndexMap* hlo_profile_index_map) + : hlo_profile_printer_(*hlo_profile_printer), + hlo_profile_index_map_(*hlo_profile_index_map), profile_counters_( /*count*/ hlo_profile_index_map_.total_count(), /*value*/ 0) {} @@ -131,10 +131,4 @@ uint64 HloExecutionProfile::GetCyclesTakenBy(const HloInstruction& hlo) const { return profile_counters_[hlo_profile_index_map_.GetProfileIndexFor(hlo)]; } -string HloExecutionProfile::ToString( - const DeviceDescription& device_description) const { - return hlo_profile_printer_.ToString(profile_counters_.data(), - device_description.clock_rate_ghz()); -} - } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.h b/tensorflow/compiler/xla/service/hlo_execution_profile.h index 84702680c0..470fd4ce3c 100644 --- a/tensorflow/compiler/xla/service/hlo_execution_profile.h +++ b/tensorflow/compiler/xla/service/hlo_execution_profile.h @@ -77,6 +77,11 @@ class HloProfileIndexMap { std::unordered_map computation_to_profile_idx_; }; +// Create an instance of `HloProfilePrinter` that owns its memory. +std::unique_ptr CreateHloProfilePrinter( + const HloProfileIndexMap& hlo_profile_index_map, + const HloCostAnalysis& cost_analysis); + // Describes how much time each HLO operation took. // // Each HloComputation takes a certain number of cycles. This class helps break @@ -85,8 +90,8 @@ class HloExecutionProfile { public: using DeviceDescription = perftools::gputools::DeviceDescription; - HloExecutionProfile(const HloModule& module, - const HloCostAnalysis& cost_analysis); + HloExecutionProfile(const HloProfilePrinter* hlo_profile_printer, + const HloProfileIndexMap* hlo_profile_index_map); // Record how many cycles this HLO took to execute. void SetCyclesTakenBy(const HloInstruction* hlo, uint64 cycles_taken); @@ -114,15 +119,16 @@ class HloExecutionProfile { // for the operations in a given computation. Returns an empty string if it // wasn't possible to generate a printable version. cost_analysis should be a // clean analysis that can be used to visit the computation. - string ToString(const DeviceDescription& device_description) const; + string ToString(const DeviceDescription& device_description) const { + return hlo_profile_printer_.ToString(profile_counters_.data(), + device_description.clock_rate_ghz()); + } - private: - // hlo_profile_index_map_ maps an Hlo entity (computation or instruction) to - // an index in profile_counters_. - HloProfileIndexMap hlo_profile_index_map_; + std::vector* mutable_profile_counters() { return &profile_counters_; } - // Used to print profile_counters_ in a human readable form. - HloProfilePrinter hlo_profile_printer_; + private: + const HloProfilePrinter& hlo_profile_printer_; + const HloProfileIndexMap& hlo_profile_index_map_; // Stores per-Hlo profile counters. This is the only thing that changes when // we execute an XLA computation. diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile_test.cc b/tensorflow/compiler/xla/service/hlo_execution_profile_test.cc index 5ba31296ea..b1e6729e2b 100644 --- a/tensorflow/compiler/xla/service/hlo_execution_profile_test.cc +++ b/tensorflow/compiler/xla/service/hlo_execution_profile_test.cc @@ -72,7 +72,11 @@ TEST_F(HloExecutionProfileTest, Basic) { }; HloCostAnalysis cost_analysis(shape_size_function); - HloExecutionProfile execution_profile(*hlo_module, cost_analysis); + HloProfileIndexMap profile_index_map(*hlo_module); + std::unique_ptr profile_printer = + CreateHloProfilePrinter(profile_index_map, cost_analysis); + HloExecutionProfile execution_profile(profile_printer.get(), + &profile_index_map); const int64 add_cycles = 1000; const int64 dot_cycles = 4000; diff --git a/tensorflow/compiler/xla/service/interpreter/executable.cc b/tensorflow/compiler/xla/service/interpreter/executable.cc index 96f937caf9..9183a1d1bf 100644 --- a/tensorflow/compiler/xla/service/interpreter/executable.cc +++ b/tensorflow/compiler/xla/service/interpreter/executable.cc @@ -42,7 +42,8 @@ namespace sep = ::perftools::gputools::interpreter; InterpreterExecutable::InterpreterExecutable( std::unique_ptr hlo_module) - : Executable(std::move(hlo_module)) {} + : Executable(std::move(hlo_module), /*hlo_profile_printer=*/nullptr, + /*hlo_profile_index_map=*/nullptr) {} InterpreterExecutable::~InterpreterExecutable() {} @@ -156,10 +157,5 @@ StatusOr InterpreterExecutable::ExecuteAsyncOnStream( return ShapeUtil::ByteSizeOf(shape, sizeof(void*)); } -std::unique_ptr InterpreterExecutable::CreateCostAnalysis() - const { - return MakeUnique(ShapeSizeBytes); -} - } // namespace interpreter } // namespace xla diff --git a/tensorflow/compiler/xla/service/interpreter/executable.h b/tensorflow/compiler/xla/service/interpreter/executable.h index c69b0d036d..0e87eb90bf 100644 --- a/tensorflow/compiler/xla/service/interpreter/executable.h +++ b/tensorflow/compiler/xla/service/interpreter/executable.h @@ -61,8 +61,6 @@ class InterpreterExecutable : public Executable { static int64 ShapeSizeBytes(const Shape& shape); - std::unique_ptr CreateCostAnalysis() const override; - private: TF_DISALLOW_COPY_AND_ASSIGN(InterpreterExecutable); }; diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index 902a1afb45..d997cab83f 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -575,12 +575,13 @@ Service::ExecuteParallelAndRegisterResult( // profile. for (auto& index_to_profiled_stream : index_to_profiled_streams) { int64 device = index_to_profiled_stream.first; - auto& module = executables[device]->module(); se::Stream* stream = index_to_profiled_stream.second; - HloExecutionProfile hlo_profile(module, - *executables[device]->CreateCostAnalysis()); - TF_RETURN_IF_ERROR(executables[device]->PopulateExecutionProfile( - &hlo_profile, stream->parent())); + Executable* executable = executables[device]; + const HloModule& module = executable->module(); + HloExecutionProfile hlo_profile(&executable->hlo_profile_printer(), + &executable->hlo_profile_index_map()); + TF_RETURN_IF_ERROR( + executable->PopulateExecutionProfile(&hlo_profile, stream->parent())); XLA_LOG_LINES( tensorflow::INFO, hlo_profile.ToString(streams[0]->parent()->GetDeviceDescription())); -- GitLab From bb287e33f725ed65a0aeb198cb55e6d5d470145b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Nov 2017 12:47:09 -0800 Subject: [PATCH 0779/1801] Small changes to optimizer and utils. Adds a `var_list` arg to optimizer. Removes call to `convert_to_tensor` from _momentum to support non-default dtypes. Implements `posdef_inv_eig` for matrix inversion. PiperOrigin-RevId: 176690230 --- .../contrib/kfac/python/ops/optimizer.py | 31 ++++++++++++------- tensorflow/contrib/kfac/python/ops/utils.py | 17 ++++++++-- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/tensorflow/contrib/kfac/python/ops/optimizer.py b/tensorflow/contrib/kfac/python/ops/optimizer.py index a0e2fedc5c..98f8e7b230 100644 --- a/tensorflow/contrib/kfac/python/ops/optimizer.py +++ b/tensorflow/contrib/kfac/python/ops/optimizer.py @@ -40,6 +40,7 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): cov_ema_decay, damping, layer_collection, + var_list=None, momentum=0., momentum_type="regular", norm_constraint=None, @@ -66,6 +67,9 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): blocks, kronecker factors, and losses associated with the graph. The layer_collection cannot be modified after KfacOptimizer's initialization. + var_list: Optional list or tuple of variables to train. Defaults to the + list of variables collected in the graph under the key + `GraphKeys.TRAINABLE_VARIABLES`. momentum: The momentum value for this optimizer. Only applies when momentum_type is 'regular' or 'adam'. (Default: 0) momentum_type: The type of momentum to use in this optimizer, one of @@ -96,9 +100,9 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): or 'adam'. """ - # We may consider determining the set of variables some other way, but for - # now it's just all the trainable variables. - variables = tf_variables.trainable_variables() + variables = var_list + if variables is None: + variables = tf_variables.trainable_variables() self._fisher_est = est.FisherEstimator( variables, @@ -123,7 +127,7 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): raise ValueError("Momentum must be unspecified if using a momentum_type " "other than 'regular' or 'adam'.") - self._momentum = ops.convert_to_tensor(momentum, name="momentum") + self._momentum = momentum self._momentum_type = momentum_type self._norm_constraint = norm_constraint @@ -313,14 +317,17 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): self._batch_size, dtype=fft_precon_grads[0].dtype) # compute the entries of the 2x2 matrix - m_11 = (_inner_product_list(fft_precon_grads, fft_precon_grads) / batch_size - + self.damping * _inner_product_list(precon_grads, precon_grads)) + m_11 = ( + _inner_product_list(fft_precon_grads, fft_precon_grads) / batch_size + + self.damping * _inner_product_list(precon_grads, precon_grads)) - m_21 = (_inner_product_list(fft_prev_updates, fft_precon_grads) / batch_size - + self.damping * _inner_product_list(prev_updates, precon_grads)) + m_21 = ( + _inner_product_list(fft_prev_updates, fft_precon_grads) / batch_size + + self.damping * _inner_product_list(prev_updates, precon_grads)) - m_22 = (_inner_product_list(fft_prev_updates, fft_prev_updates) / batch_size - + self.damping * _inner_product_list(prev_updates, prev_updates)) + m_22 = ( + _inner_product_list(fft_prev_updates, fft_prev_updates) / batch_size + + self.damping * _inner_product_list(prev_updates, prev_updates)) def non_zero_prevupd_case(): r"""Computes optimal (alpha, mu) given non-zero previous update. @@ -406,8 +413,8 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): grads = list(grad for (grad, _) in grads_and_vars) variables = list(var for (_, var) in grads_and_vars) # previous updates are the negative velocities (up to scaling by LR) - prev_updates = list(-self._zeros_slot(var, "velocity", self._name) - for var in variables) + prev_updates = list( + -self._zeros_slot(var, "velocity", self._name) for var in variables) # Compute optimal velocity update parameters according to quadratic model alpha, mu, _ = self._compute_qmodel_hyperparams( diff --git a/tensorflow/contrib/kfac/python/ops/utils.py b/tensorflow/contrib/kfac/python/ops/utils.py index ca6fb655b4..d5461c9f2e 100644 --- a/tensorflow/contrib/kfac/python/ops/utils.py +++ b/tensorflow/contrib/kfac/python/ops/utils.py @@ -28,7 +28,6 @@ from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops - # Method used for inverting matrices. POSDEF_INV_METHOD = "cholesky" @@ -202,9 +201,18 @@ def posdef_inv_cholesky(tensor, identity, damping): return linalg_ops.cholesky_solve(chol, identity) +def posdef_inv_eig(tensor, identity, damping): + """Computes inverse(tensor + damping * identity) with eigendecomposition.""" + eigenvalues, eigenvectors = linalg_ops.self_adjoint_eig( + tensor + damping * identity) + return math_ops.matmul( + eigenvectors / eigenvalues, eigenvectors, transpose_b=True) + + posdef_inv_funcs = { "matrix_inverse": posdef_inv_matrix_inverse, "cholesky": posdef_inv_cholesky, + "eig": posdef_inv_eig, } @@ -261,8 +269,8 @@ def fwd_gradients(ys, xs, grad_xs=None, stop_gradients=None): # generated by the first gradients_impl.gradients call. us = [array_ops.zeros_like(y) + float("nan") for y in ys] - dydxs = gradients_impl.gradients(ys, xs, grad_ys=us, - stop_gradients=stop_gradients) + dydxs = gradients_impl.gradients( + ys, xs, grad_ys=us, stop_gradients=stop_gradients) # Deal with strange types that gradients_impl.gradients returns but can't # deal with. @@ -278,3 +286,6 @@ def fwd_gradients(ys, xs, grad_xs=None, stop_gradients=None): dysdx = gradients_impl.gradients(dydxs, us, grad_ys=grad_xs) return dysdx + +# TODO(b/69623235): Add a function for finding tensors that share gradients +# to eliminate redundant fisher factor computations. -- GitLab From 21040b72a38f9a7ff0c3fd6cafebbacc5286b596 Mon Sep 17 00:00:00 2001 From: Alex Sergeev Date: Wed, 22 Nov 2017 12:58:13 -0800 Subject: [PATCH 0780/1801] Update Custom Op instructions to use tf.sysconfg flags (#14307) * Update test_user_ops.sh to use new tf.sysconfig APIs * Fix the test * Modify the test to not use 'eval' * Update docs on the website * Update log message --- tensorflow/docs_src/extend/adding_an_op.md | 10 +++---- .../tools/ci_build/builds/test_user_ops.sh | 28 +++++++++---------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/tensorflow/docs_src/extend/adding_an_op.md b/tensorflow/docs_src/extend/adding_an_op.md index a3a0272059..c52279b212 100644 --- a/tensorflow/docs_src/extend/adding_an_op.md +++ b/tensorflow/docs_src/extend/adding_an_op.md @@ -341,9 +341,9 @@ Assuming you have `g++` installed, here is the sequence of commands you can use to compile your op into a dynamic library. ```bash -TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') -TF_LIB=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') -g++ -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC -I$TF_INC -I$TF_INC/external/nsync/public -L$TF_LIB -ltensorflow_framework -O2 +TF_CFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') ) +TF_LFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') ) +g++ -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2 ``` On Mac OS X, the additional flag "-undefined dynamic_lookup" is required when @@ -1228,10 +1228,10 @@ into a single dynamically loadable library: ```bash nvcc -std=c++11 -c -o cuda_op_kernel.cu.o cuda_op_kernel.cu.cc \ --I $TF_INC -I$TF_INC/external/nsync/public -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC + ${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC g++ -std=c++11 -shared -o cuda_op_kernel.so cuda_op_kernel.cc \ -cuda_op_kernel.cu.o -I $TF_INC -I$TF_INC/external/nsync/public -fPIC -lcudart -L$TF_LIB -ltensorflow_framework + cuda_op_kernel.cu.o ${TF_CFLAGS[@]} -fPIC -lcudart ${TF_LFLAGS[@]} ``` `cuda_op_kernel.so` produced above can be loaded as usual in Python, using the diff --git a/tensorflow/tools/ci_build/builds/test_user_ops.sh b/tensorflow/tools/ci_build/builds/test_user_ops.sh index 4f1c61b8e9..358f82ac5d 100755 --- a/tensorflow/tools/ci_build/builds/test_user_ops.sh +++ b/tensorflow/tools/ci_build/builds/test_user_ops.sh @@ -76,17 +76,17 @@ echo "PYTHON_BIN_PATH: ${PYTHON_BIN_PATH}" pushd "${TMP_DIR}" -# Obtain paths include and lib paths to the TensorFlow installation -TF_INC=$("${PYTHON_BIN_PATH}" \ - -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') -TF_LIB=$("${PYTHON_BIN_PATH}" \ - -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') - -if [[ -z "${TF_INC}" ]]; then - die "FAILED to determine TensorFlow include path" +# Obtain compilation and linking flags +TF_CFLAGS=( $("${PYTHON_BIN_PATH}" \ + -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') ) +TF_LFLAGS=( $("${PYTHON_BIN_PATH}" \ + -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') ) + +if [[ -z "${TF_CFLAGS}" || -z "${TF_LFLAGS}" ]]; then + die "FAILED to determine TensorFlow compilation or linking flags" else - echo "TensorFlow include path: ${TF_INC}" - TF_INCLUDE_PATH="-I${TF_INC} -I${TF_INC}/external/nsync/public" + echo "TensorFlow compile flags: ${TF_CFLAGS[@]}" + echo "TensorFlow link flags: ${TF_LFLAGS[@]}" fi # Check g++ availability @@ -145,7 +145,7 @@ if [[ ${IS_GPU} == "0" ]]; then "${GPP_BIN}" -std=c++11 ${EXTRA_GPP_FLAGS} \ -shared "${SRC_FILE}" -o "${USER_OP_SO}" \ - -fPIC ${TF_INCLUDE_PATH} -L "${TF_LIB}" -ltensorflow_framework || \ + -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} || \ die "g++ compilation of ${SRC_FILE} FAILED" else @@ -184,7 +184,7 @@ else OP_KERNEL_O=$(echo "${OP_KERNEL_CC}" | sed -e 's/\.cc/\.o/') "${NVCC_BIN}" -std=c++11 \ -c -o "${OP_KERNEL_O}" "${OP_KERNEL_CU}" \ - ${TF_INCLUDE_PATH} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC || \ + ${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC || \ die "nvcc compilation of ${OP_KERNEL_CC} FAILED" CUDA_LIB_DIR="/usr/local/cuda/lib64" @@ -203,8 +203,8 @@ else USER_OP_SO="add_one.so" "${GPP_BIN}" -std=c++11 ${EXTRA_GPP_FLAGS} \ -shared -o "${USER_OP_SO}" "${OP_KERNEL_CC}" \ - "${OP_KERNEL_O}" ${TF_INCLUDE_PATH} -L "${CUDA_LIB_DIR}" -L "${TF_LIB}" \ - -fPIC -lcudart -ltensorflow_framework || \ + "${OP_KERNEL_O}" ${TF_CFLAGS[@]} -L "${CUDA_LIB_DIR}" ${TF_LFLAGS[@]} \ + -fPIC -lcudart || \ die "g++ compilation of ${OP_KERNEL_CC}" FAILED fi -- GitLab From 1913d96f20e6d9650f4841a3a17daf83fa3a45bb Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 22 Nov 2017 12:59:58 -0800 Subject: [PATCH 0781/1801] Fix math_ops.py --- tensorflow/python/ops/math_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 939d45a022..03e9e92e31 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -1277,7 +1277,7 @@ def _may_reduce_to_scalar(keepdims, axis, reduction_indices, output): return output -@deprecation.deprecated_args( +@deprecated_args( None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_sum(input_tensor, axis=None, -- GitLab From 8067aa0862b7cd708f3a31accc2d232bafaf9442 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Wed, 22 Nov 2017 13:26:25 -0800 Subject: [PATCH 0782/1801] Create Python Operations for the TF_Operations created by import_graph_def. This change also introduces Python functionality for iterating through every TF_Operation in the graph and every newly-added TF_Operation via TF_GraphNextOperation. PiperOrigin-RevId: 176694180 --- tensorflow/python/client/tf_session.i | 18 ++++++ tensorflow/python/framework/c_api_util.py | 38 ++++++++++++ tensorflow/python/framework/importer.py | 14 ++++- tensorflow/python/framework/importer_test.py | 61 ++++++++++++++++---- 4 files changed, 118 insertions(+), 13 deletions(-) diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i index 41c707ae63..ef6f28ce07 100644 --- a/tensorflow/python/client/tf_session.i +++ b/tensorflow/python/client/tf_session.i @@ -315,6 +315,24 @@ tensorflow::ImportNumpy(); $2 = inputs.size(); } +// Typemaps for TF_GraphNextOperation(). +%typemap(in) size_t* pos (size_t pos) { + pos = PyLong_AsUnsignedLong($input); + $1 = &pos; +} + +// Returns a (TF_Operation*, int pos) tuple. +%typemap(argout) size_t* pos { + PyObject* new_result = PyTuple_New(2); + if (!new_result) { + SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create tuple"); + } + // Steals $result reference + PyTuple_SET_ITEM(new_result, 0, $result); + PyTuple_SET_ITEM(new_result, 1, PyLong_FromSize_t(*$1)); + $result = new_result; +} + // TODO(skyewm): SWIG emits a warning for the const char* in TF_WhileParams, // skip for now %ignore TF_WhileParams; diff --git a/tensorflow/python/framework/c_api_util.py b/tensorflow/python/framework/c_api_util.py index 814436fc7a..6c522de452 100644 --- a/tensorflow/python/framework/c_api_util.py +++ b/tensorflow/python/framework/c_api_util.py @@ -110,3 +110,41 @@ def tf_output(c_op, index): ret.oper = c_op ret.index = index return ret + + +def tf_operations(graph): + """Generator that yields every TF_Operation in `graph`. + + Args: + graph: Graph + + Yields: + wrapped TF_Operation + """ + # pylint: disable=protected-access + pos = 0 + c_op, pos = c_api.TF_GraphNextOperation(graph._c_graph, pos) + while c_op is not None: + yield c_op + c_op, pos = c_api.TF_GraphNextOperation(graph._c_graph, pos) + # pylint: enable=protected-access + + +def new_tf_operations(graph): + """Generator that yields newly-added TF_Operations in `graph`. + + Specifically, yields TF_Operations that don't have associated Operations in + `graph`. This is useful for processing nodes added by the C API. + + Args: + graph: Graph + + Yields: + wrapped TF_Operation + """ + # TODO(b/69679162): do this more efficiently + for c_op in tf_operations(graph): + try: + graph._get_operation_by_tf_operation(c_op) # pylint: disable=protected-access + except KeyError: + yield c_op diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py index e4b94e1a34..c00b9da0df 100644 --- a/tensorflow/python/framework/importer.py +++ b/tensorflow/python/framework/importer.py @@ -194,6 +194,14 @@ def _FindAttrInOpDef(attr_name, op_def): return None +def _ProcessNewOps(graph): + """Processes the newly-added TF_Operations in `graph`.""" + for c_op in c_api_util.new_tf_operations(graph): + graph._create_op_from_tf_operation(c_op) # pylint: disable=protected-access + + # TODO(skyewm): colocation logic + + @deprecated_args(None, 'Please file an issue at ' 'https://github.com/tensorflow/tensorflow/issues if you depend' ' on this feature.', @@ -257,11 +265,13 @@ def import_graph_def(graph_def, input_map=None, return_elements=None, if graph._c_graph: # pylint: disable=protected-access scoped_options = c_api_util.ScopedTFImportGraphDefOptions() - with errors.raise_exception_on_not_ok_status() as status: - with c_api_util.tf_buffer(graph_def.SerializeToString()) as serialized: + with c_api_util.tf_buffer(graph_def.SerializeToString()) as serialized: + with errors.raise_exception_on_not_ok_status() as status: c_api.TF_GraphImportGraphDefWithResults( graph._c_graph, serialized, scoped_options.options, status) # pylint: disable=protected-access + _ProcessNewOps(graph) + if return_elements is not None: raise ValueError('return_elements not yet implemented with C API') return None diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py index d27ec1e30c..8984282c68 100644 --- a/tensorflow/python/framework/importer_test.py +++ b/tensorflow/python/framework/importer_test.py @@ -65,19 +65,58 @@ class ImportGraphDefTest(test.TestCase): importer.import_graph_def( self._MakeGraphDef(""" node { name: 'A' op: 'IntOutputFloatOutput' } - node { name: 'B' op: 'ListOutput' - attr { key: 'T' - value { list { type: DT_INT32 type: DT_FLOAT } } } } - node { name: 'C' op: 'ListInput' - attr { key: 'N' value { i: 2 } } - attr { key: 'T' value { type: DT_INT32 } } - input: 'A:0' input: 'B:0' } - node { name: 'D' op: 'ListInput' - attr { key: 'N' value { i: 2 } } - attr { key: 'T' value { type: DT_FLOAT } } - input: 'A:1' input: 'B:1' } + node { name: 'B' op: 'ListOutput' + attr { key: 'T' + value { list { type: DT_INT32 type: DT_FLOAT } } } } + node { name: 'C' op: 'ListInput' + attr { key: 'N' value { i: 2 } } + attr { key: 'T' value { type: DT_INT32 } } + input: 'A:0' input: 'B:0' } + node { name: 'D' op: 'ListInput' + attr { key: 'N' value { i: 2 } } + attr { key: 'T' value { type: DT_FLOAT } } + input: 'A:1' input: 'B:1' } """)) + graph = ops.get_default_graph() + a = graph.get_operation_by_name("A") + b = graph.get_operation_by_name("B") + c = graph.get_operation_by_name("C") + d = graph.get_operation_by_name("D") + + # Assert that the import process creates distinct tensors. + self.assertNotEqual(a.outputs[0].name, a.outputs[1].name) + self.assertNotEqual(b.outputs[0].name, b.outputs[1].name) + self.assertNotEqual(a.outputs[0].name, b.outputs[0].name) + self.assertNotEqual(a.outputs[0].name, b.outputs[1].name) + self.assertNotEqual(a.outputs[1].name, b.outputs[0].name) + self.assertNotEqual(a.outputs[1].name, b.outputs[1].name) + + # Assert that the ops are connected according to the GraphDef topology. + self.assertEqual(c.inputs[0], a.outputs[0]) + self.assertEqual(c.inputs[1], b.outputs[0]) + self.assertEqual(d.inputs[0], a.outputs[1]) + self.assertEqual(d.inputs[1], b.outputs[1]) + + # Check the types of the returned ops and tensors. + self.assertEqual(a.type, "IntOutputFloatOutput") + self.assertEqual(b.type, "ListOutput") + self.assertEqual(c.type, "ListInput") + self.assertEqual(d.type, "ListInput") + self.assertEqual(a.outputs[0].dtype, dtypes.int32) + self.assertEqual(a.outputs[1].dtype, dtypes.float32) + self.assertEqual(b.outputs[0].dtype, dtypes.int32) + self.assertEqual(b.outputs[1].dtype, dtypes.float32) + + # Check the names of the returned ops. + self.assertEqual(a.name, "A") + self.assertEqual(b.name, "B") + self.assertEqual(c.name, "C") + self.assertEqual(d.name, "D") + + # Check that the op_def is still available. + self.assertNotEqual(None, a.op_def) + def testBasic(self): with ops.Graph().as_default(): a, b, c, d = importer.import_graph_def( -- GitLab From 07c81f4f3699d255faa88ddbe50b731223090ba1 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 22 Nov 2017 13:33:01 -0800 Subject: [PATCH 0783/1801] Fix core:api test --- .../api_def/base_api/api_def_UniqueV2.pbtxt | 47 +++++++++++++++++++ .../base_api/api_def_UnsortedSegmentSum.pbtxt | 2 + 2 files changed, 49 insertions(+) create mode 100644 tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt new file mode 100644 index 0000000000..cd7ec6e551 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt @@ -0,0 +1,47 @@ +op { + graph_op_name: "UniqueV2" + in_arg { + name: "x" + description: < [1, 2, 4, 7, 8] +idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] +``` +END +} diff --git a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt index 0a3355cdbc..77a96d1e03 100644 --- a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt @@ -26,6 +26,8 @@ need not be sorted and need not cover all values in the full range of valid values. If the sum is empty for a given segment ID `i`, `output[i] = 0`. +If the given segment ID `i` is negative, the value is dropped and will not be +added to the sum of the segment. `num_segments` should equal the number of distinct segment IDs. -- GitLab From c5b8a5ed86e133ccee62b630108ea53213df2c86 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Nov 2017 13:29:47 -0800 Subject: [PATCH 0784/1801] Minor toco changes to support new features in tfmini. PiperOrigin-RevId: 176694498 --- .../contrib/lite/toco/export_tensorflow.cc | 30 ++++++- tensorflow/contrib/lite/toco/toco_tooling.cc | 82 +++++++++++-------- tensorflow/contrib/lite/toco/tooling_util.cc | 25 +++++- 3 files changed, 100 insertions(+), 37 deletions(-) diff --git a/tensorflow/contrib/lite/toco/export_tensorflow.cc b/tensorflow/contrib/lite/toco/export_tensorflow.cc index 625a4dd83c..e18cf46c69 100644 --- a/tensorflow/contrib/lite/toco/export_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/export_tensorflow.cc @@ -35,8 +35,11 @@ limitations under the License. #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/platform/logging.h" +using tensorflow::DT_BOOL; using tensorflow::DT_FLOAT; using tensorflow::DT_INT32; +using tensorflow::DT_INT64; +using tensorflow::DT_UINT8; using tensorflow::GraphDef; using tensorflow::TensorProto; @@ -1500,10 +1503,29 @@ void ConvertOperator(const Model& model, const Operator& src_op, } } -void AddPlaceholder(const string& name, GraphDef* tensorflow_graph) { +void AddPlaceholder(const string& name, ArrayDataType type, + GraphDef* tensorflow_graph) { auto* placeholder = tensorflow_graph->add_node(); placeholder->set_op("Placeholder"); - (*placeholder->mutable_attr())["dtype"].set_type(DT_FLOAT); + switch (type) { + case ArrayDataType::kBool: + (*placeholder->mutable_attr())["dtype"].set_type(DT_BOOL); + break; + case ArrayDataType::kFloat: + (*placeholder->mutable_attr())["dtype"].set_type(DT_FLOAT); + break; + case ArrayDataType::kUint8: + (*placeholder->mutable_attr())["dtype"].set_type(DT_UINT8); + break; + case ArrayDataType::kInt32: + (*placeholder->mutable_attr())["dtype"].set_type(DT_INT32); + break; + case ArrayDataType::kInt64: + (*placeholder->mutable_attr())["dtype"].set_type(DT_INT64); + break; + default: + LOG(FATAL) << "Unexpected data type in array \"" << name << "\""; + } placeholder->set_name(name); } @@ -1531,7 +1553,9 @@ void AddPlaceholderForRNNState(const Model& model, const string& name, int size, void ExportTensorFlowGraphDefImplementation(const Model& model, GraphDef* tensorflow_graph) { for (const auto& input_array : model.flags.input_arrays()) { - AddPlaceholder(input_array.name(), tensorflow_graph); + AddPlaceholder(input_array.name(), + model.arrays.at(input_array.name())->data_type, + tensorflow_graph); } for (const auto& rnn_state : model.flags.rnn_states()) { AddPlaceholderForRNNState(model, rnn_state.state_array(), rnn_state.size(), diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc index eabc145ad4..ca092b2d72 100644 --- a/tensorflow/contrib/lite/toco/toco_tooling.cc +++ b/tensorflow/contrib/lite/toco/toco_tooling.cc @@ -85,38 +85,57 @@ void MakeGeneralGraphTransformationsSet( transformations->Add(new MakeInitialDequantizeOperator); } -void SetArrayFinalDataTypes(const TocoFlags& toco_flags, Model* model) { - const bool output_supports_only_float = - toco_flags.output_format() == TENSORFLOW_GRAPHDEF; +bool SupportsQuantization(FileFormat format) { + return (format == GRAPHVIZ_DOT || format == TFLITE); + ; +} + +bool SupportsFusedActivationFunction(FileFormat format) { + return (format == GRAPHVIZ_DOT || format == TFLITE); +} + +bool SupportsLstmCell(FileFormat format) { + return (format == TENSORFLOW_GRAPHDEF || format == GRAPHVIZ_DOT); +} + +bool SupportsPreallocatedWorkspace(FileFormat format) { + return (format == GRAPHVIZ_DOT || format == TFLITE); +} - ArrayDataType specified_final_data_type = ArrayDataType::kNone; +bool IsRealValued(toco::ArrayDataType type) { + return static_cast(type == toco::ArrayDataType::kFloat || + type == toco::ArrayDataType::kUint8); +} + +void SetFinalDataTypeOnInputs(const TocoFlags& toco_flags, Model* model) { + const FileFormat output_format = toco_flags.output_format(); + ArrayDataType type; if (toco_flags.has_inference_input_type()) { - specified_final_data_type = - ConvertIODataTypeToArrayDataType(toco_flags.inference_input_type()); + type = ConvertIODataTypeToArrayDataType(toco_flags.inference_input_type()); } else if (toco_flags.has_inference_type()) { - specified_final_data_type = - ConvertIODataTypeToArrayDataType(toco_flags.inference_type()); - } - ArrayDataType final_data_type = ArrayDataType::kNone; - if (output_supports_only_float) { - QCHECK(specified_final_data_type == ArrayDataType::kNone || - specified_final_data_type == ArrayDataType::kFloat); - final_data_type = ArrayDataType::kFloat; + type = ConvertIODataTypeToArrayDataType(toco_flags.inference_type()); + } else if (!SupportsQuantization(output_format)) { + // Data type is implicitly float for non-quantized formats + type = ArrayDataType::kFloat; } else { - final_data_type = specified_final_data_type; + // Nothing to do. Data types stay as-is. + return; } + for (int i = 0; i < model->flags.input_arrays_size(); i++) { - auto* array = model->arrays[model->flags.input_arrays(i).name()].get(); + string const& array_name = model->flags.input_arrays(i).name(); + auto* array = model->arrays[array_name].get(); // Note that the notion of changing data types only applies to real-numbers // arrays (see the documentation for inference_input_type). // TODO(benoitjacob) this is assuming that uint8 arrays are quantized, // i.e. represent real numbers by means of quantization parameters, // and not plain integer uint8 input arrays. - const bool is_real_numbers = array->data_type == ArrayDataType::kFloat || - array->data_type == ArrayDataType::kUint8; - if (is_real_numbers) { - array->final_data_type = final_data_type; + if (!IsRealValued(array->data_type)) { + // Ignore non-real data types. + continue; } + + array->final_data_type = type; } } @@ -155,23 +174,21 @@ void Transform(const TocoFlags& toco_flags, Model* model) { const FileFormat output_format = toco_flags.output_format(); const IODataType inference_type = toco_flags.inference_type(); - const bool output_is_tflite = output_format == TFLITE; - - const bool output_is_tflite_quantized = - output_is_tflite && inference_type == QUANTIZED_UINT8; + const bool quantize_output = + SupportsQuantization(output_format) && inference_type == QUANTIZED_UINT8; - if (output_is_tflite_quantized) { + if (quantize_output) { QCHECK_NE(toco_flags.inference_input_type(), FLOAT) << "Quantized inference is not allowed with float inputs."; } - SetArrayFinalDataTypes(toco_flags, model); + SetFinalDataTypeOnInputs(toco_flags, model); GraphTransformationsSet transformations; MakeGeneralGraphTransformationsSet(&transformations); auto* remove_trivial_reshape = new RemoveTrivialReshape; transformations.Add(remove_trivial_reshape); - if (output_format == TFLITE) { + if (SupportsFusedActivationFunction(output_format)) { transformations.Add(new FuseActivationFunctions); } else { transformations.Add(new UnfuseActivationFunctions); @@ -190,25 +207,24 @@ void Transform(const TocoFlags& toco_flags, Model* model) { // easy to pass a new toco flag. Once that is resolved on the DarwiNN // tests side, the special-casing of DarwiNN here can go away. // TODO(benoitjacob): so drop it when we can. - if ((output_is_tflite_quantized && - toco_flags.reorder_across_fake_quant())) { + if ((quantize_output && toco_flags.reorder_across_fake_quant())) { transformations.Add(new DropFakeQuant); } } transformations.Add(new ConvertPureConvToDepthwise); // TFLite export does not yet support fused LSTM cell. - if (output_format == TENSORFLOW_GRAPHDEF) { + if (SupportsLstmCell(output_format)) { transformations.Add(new IdentifyLstmCell); } transformations.Add(new ResolveConstantConcatenation); RunGraphTransformations(model, "general graph transformations", transformations); - if (output_is_tflite_quantized) { + if (quantize_output) { RunGraphTransformations(model, "pre-quantization graph transformations", {new HardcodeMinMax, new DropFakeQuant}); } - if (output_is_tflite_quantized) { + if (quantize_output) { if (toco_flags.has_default_ranges_min() && toco_flags.has_default_ranges_max()) { UseDefaultMinMaxRangeValues(model, toco_flags.default_ranges_min(), @@ -239,7 +255,7 @@ void Transform(const TocoFlags& toco_flags, Model* model) { CheckUnsupportedOperations(*model); } - if (output_is_tflite) { + if (SupportsPreallocatedWorkspace(output_format)) { AllocateTransientArrays(model, kDefaultTransientDataAlignment); LogDump(kLogLevelModelChanged, "AFTER ALLOCATION", *model); } diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index 3ee060f9b9..639b5f193c 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -294,6 +294,7 @@ void LogArray(int log_level, const Model& model, const string& name) { VLOG(log_level) << "Array: " << name; switch (array.data_type) { case ArrayDataType::kNone: + VLOG(log_level) << " Data type:"; break; case ArrayDataType::kFloat: VLOG(log_level) << " Data type: kFloat"; @@ -309,6 +310,24 @@ void LogArray(int log_level, const Model& model, const string& name) { << static_cast(array.data_type) << ")"; break; } + switch (array.final_data_type) { + case ArrayDataType::kNone: + VLOG(log_level) << " Final type:"; + break; + case ArrayDataType::kFloat: + VLOG(log_level) << " Final type: kFloat"; + break; + case ArrayDataType::kInt32: + VLOG(log_level) << " Final type: kInt32"; + break; + case ArrayDataType::kUint8: + VLOG(log_level) << " Final type: kUint8"; + break; + default: + VLOG(log_level) << " Final type: other (numerical value: " + << static_cast(array.data_type) << ")"; + break; + } if (array.buffer) { VLOG(log_level) << " Constant Buffer"; } @@ -1562,7 +1581,11 @@ void CheckFinalDataTypesSatisfied(const Model& model) { for (const auto& array_entry : model.arrays) { const auto& array = *array_entry.second; if (array.final_data_type != ArrayDataType::kNone) { - CHECK(array.final_data_type == array.data_type); + CHECK(array.final_data_type == array.data_type) + << "Array \"" << array_entry.first + << "\" has mis-matching actual and final data types (" + << static_cast(array.data_type) << "," + << static_cast(array.final_data_type) << ")."; } } } -- GitLab From e219aeb542779d90a582ffe16f8602cd1b275b22 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Wed, 22 Nov 2017 13:41:58 -0800 Subject: [PATCH 0785/1801] GCS: Perform additional file integrity checks In order to guard against interrupted reads or other network problems, we perform additional sanity checks to ensure we correctly load file blocks from GCS. PiperOrigin-RevId: 176695887 --- .../core/platform/cloud/gcs_file_system.cc | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index d5e2a518e9..54d38fe962 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/cloud/curl_http_request.h" #include "tensorflow/core/platform/cloud/file_block_cache.h" #include "tensorflow/core/platform/cloud/google_auth_provider.h" @@ -696,6 +697,18 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& filename, size_t offset, TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when reading gs://", bucket, "/", object); + if (out->size() < block_size()) { + // Check stat cache to see if we encountered an interrupted read. + FileStatistics stat; + if (stat_cache_->Lookup(filename, &stat)) { + if (offset + out->size() < stat.length) { + return errors::Internal(strings::Printf( + "File contents are inconsistent for file: %s @ %lu.", + filename.c_str(), offset)); + } + } + } + return Status::OK(); } @@ -816,7 +829,8 @@ Status GcsFileSystem::StatForObject(const string& fname, const string& bucket, return errors::Internal("'stat' cannot be nullptr."); } if (object.empty()) { - return errors::InvalidArgument("'object' must be a non-empty string."); + return errors::InvalidArgument(strings::Printf( + "'object' must be a non-empty string. (File: %s)", fname.c_str())); } StatCache::ComputeFunc compute_func = -- GitLab From b1d8c59e9b014b527fb2fbef9ce9afc14dbc4938 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 22 Nov 2017 13:42:21 -0800 Subject: [PATCH 0786/1801] Merge changes from github. PiperOrigin-RevId: 176695926 --- CODE_OF_CONDUCT.md | 6 +- README.md | 4 +- configure.py | 39 +- tensorflow/BUILD | 16 + tensorflow/compiler/aot/tfcompile.bzl | 15 +- tensorflow/compiler/tests/BUILD | 2 +- .../compiler/tests/fused_batchnorm_test.py | 27 +- .../compiler/xla/service/hlo_instruction.h | 2 +- .../xla/service/hlo_instruction_test.cc | 4 +- .../contrib/android/cmake/CMakeLists.txt | 2 +- tensorflow/contrib/batching/BUILD | 1 + .../contrib/batching/kernels/batch_kernels.cc | 2 +- .../kernel_tests/csiszar_divergence_test.py | 2 +- tensorflow/contrib/cmake/CMakeLists.txt | 147 +++- .../contrib/cmake/external/boringssl.cmake | 6 +- .../contrib/cmake/external/jsoncpp.cmake | 6 +- tensorflow/contrib/cmake/external/lmdb.cmake | 6 +- tensorflow/contrib/cmake/external/png.cmake | 6 +- .../contrib/cmake/external/protobuf.cmake | 6 +- tensorflow/contrib/cmake/external/re2.cmake | 8 +- .../contrib/cmake/external/snappy.cmake | 8 +- .../contrib/cmake/external/sqlite.cmake | 6 +- tensorflow/contrib/cmake/external/zlib.cmake | 6 +- tensorflow/contrib/cmake/tf_cc_ops.cmake | 36 +- .../contrib/cmake/tf_core_kernels.cmake | 23 +- .../cmake/tf_label_image_example.cmake | 5 + tensorflow/contrib/cmake/tf_python.cmake | 38 +- tensorflow/contrib/cmake/tf_shared_lib.cmake | 45 +- .../contrib/cmake/tf_stream_executor.cmake | 3 + tensorflow/contrib/cmake/tf_tools.cmake | 13 +- tensorflow/contrib/cmake/tf_tutorials.cmake | 5 + tensorflow/contrib/crf/python/ops/crf.py | 19 +- .../contrib/data/python/kernel_tests/BUILD | 8 +- tensorflow/contrib/distributions/BUILD | 17 + tensorflow/contrib/distributions/__init__.py | 2 + .../python/kernel_tests/cauchy_test.py | 438 ++++++++++++ .../distributions/python/ops/cauchy.py | 219 ++++++ .../python/examples/notebooks/1_basics.ipynb | 4 +- .../examples/notebooks/2_gradients.ipynb | 6 +- .../examples/notebooks/3_datasets.ipynb | 10 +- .../contrib/layers/python/layers/layers.py | 19 +- .../layers/python/layers/layers_test.py | 69 +- .../learn/python/learn/estimators/head.py | 2 +- .../learn/python/learn/estimators/model_fn.py | 6 +- .../python/learn/learn_io/data_feeder.py | 12 +- .../linear_optimizer/python/ops/sdca_ops.py | 12 +- tensorflow/contrib/lite/python/BUILD | 1 + .../contrib/lite/testing/generate_examples.py | 18 +- tensorflow/contrib/lite/toco/python/BUILD | 1 + tensorflow/contrib/makefile/Makefile | 3 +- tensorflow/contrib/makefile/README.md | 41 +- tensorflow/contrib/makefile/build_all_ios.sh | 54 +- .../contrib/makefile/compile_ios_protobuf.sh | 369 +++++----- .../makefile/compile_ios_tensorflow.sh | 155 +++-- tensorflow/contrib/makefile/compile_nsync.sh | 5 +- tensorflow/contrib/nn/__init__.py | 2 + tensorflow/contrib/opt/BUILD | 18 + tensorflow/contrib/opt/__init__.py | 17 +- .../training/multitask_optimizer_wrapper.py | 140 ++++ .../multitask_optimizer_wrapper_test.py | 119 ++++ .../python/kernel_tests/core_rnn_cell_test.py | 41 ++ .../rnn/python/kernel_tests/rnn_cell_test.py | 75 +- tensorflow/contrib/rnn/python/ops/rnn_cell.py | 378 +++++++++- .../seq2seq/python/ops/attention_wrapper.py | 54 +- tensorflow/contrib/slim/README.md | 2 +- .../slim/python/slim/nets/resnet_v1_test.py | 2 +- tensorflow/contrib/verbs/README.md | 14 +- tensorflow/contrib/verbs/rdma.cc | 372 +++++++++- tensorflow/contrib/verbs/rdma.h | 21 +- tensorflow/core/BUILD | 1 + .../api_def/base_api/api_def_UniqueV2.pbtxt | 47 ++ .../base_api/api_def_UnsortedSegmentSum.pbtxt | 2 + .../core/common_runtime/mkl_cpu_allocator.h | 2 +- .../core/common_runtime/sycl/sycl_device.h | 22 +- tensorflow/core/graph/graph.cc | 15 + tensorflow/core/graph/graph.h | 5 + tensorflow/core/graph/graph_partition.cc | 4 +- tensorflow/core/graph/graph_test.cc | 62 +- .../core/graph/mkl_tfconversion_pass.cc | 2 +- .../core/grappler/costs/graph_properties.h | 6 + tensorflow/core/grappler/utils.cc | 2 +- tensorflow/core/kernels/BUILD | 32 +- tensorflow/core/kernels/avgpooling_op.cc | 7 +- tensorflow/core/kernels/bincount_op.cc | 116 ++-- tensorflow/core/kernels/bincount_op.h | 41 ++ tensorflow/core/kernels/bincount_op_gpu.cu.cc | 114 +++ tensorflow/core/kernels/bincount_op_test.cc | 75 ++ tensorflow/core/kernels/bucketize_op.cc | 66 +- tensorflow/core/kernels/bucketize_op.h | 41 ++ .../core/kernels/bucketize_op_gpu.cu.cc | 101 +++ tensorflow/core/kernels/conv_grad_ops_3d.cc | 40 +- tensorflow/core/kernels/conv_ops_3d.cc | 5 + tensorflow/core/kernels/cwise_op_acosh.cc | 12 +- tensorflow/core/kernels/cwise_op_asinh.cc | 14 +- tensorflow/core/kernels/cwise_op_atanh.cc | 14 +- tensorflow/core/kernels/cwise_ops.h | 12 + .../core/kernels/depthwise_conv_grad_op.cc | 13 +- tensorflow/core/kernels/depthwise_conv_op.cc | 11 +- tensorflow/core/kernels/depthwise_conv_op.h | 5 +- .../core/kernels/depthwise_conv_op_gpu.cu.cc | 19 +- .../core/kernels/fused_batch_norm_op.cc | 70 +- tensorflow/core/kernels/fused_batch_norm_op.h | 22 +- tensorflow/core/kernels/lmdb_reader_op.cc | 7 +- tensorflow/core/kernels/maxpooling_op.cc | 49 +- .../core/kernels/maxpooling_op_gpu.cu.cc | 40 +- tensorflow/core/kernels/maxpooling_op_gpu.h | 2 +- tensorflow/core/kernels/mkl_tfconv_op.h | 80 ++- tensorflow/core/kernels/ops_util.h | 13 + tensorflow/core/kernels/pooling_ops_common.cc | 10 +- .../core/kernels/pooling_ops_common_gpu.h | 4 +- tensorflow/core/kernels/quantized_add_op.cc | 2 +- tensorflow/core/kernels/random_op.cc | 4 +- .../core/kernels/segment_reduction_ops.cc | 3 + .../core/kernels/segment_reduction_ops.h | 36 +- tensorflow/core/kernels/shape_ops.cc | 43 +- tensorflow/core/kernels/shape_ops.h | 5 +- tensorflow/core/kernels/strided_slice_op.cc | 1 - tensorflow/core/kernels/transpose_op.cc | 35 +- tensorflow/core/kernels/unique_op.cc | 113 ++- tensorflow/core/ops/array_ops.cc | 44 +- tensorflow/core/ops/math_ops.cc | 2 + tensorflow/core/ops/nn_ops.cc | 12 +- tensorflow/core/ops/ops.pbtxt | 5 + .../core/platform/default/build_config/BUILD | 20 +- .../core/platform/default/notification.h | 2 +- tensorflow/core/platform/posix/error.cc | 11 +- tensorflow/core/platform/posix/port.cc | 6 +- tensorflow/core/public/version.h | 2 +- tensorflow/core/util/cuda_kernel_helper.h | 12 + tensorflow/core/util/mkl_util.h | 653 ++++++++++++++++-- tensorflow/core/util/mkl_util_test.cc | 91 +++ .../api_guides/python/threading_and_queues.md | 2 +- .../docs_src/get_started/get_started.md | 6 +- tensorflow/docs_src/get_started/input_fn.md | 6 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 18 +- tensorflow/docs_src/install/install_linux.md | 22 +- tensorflow/docs_src/install/install_mac.md | 10 +- .../docs_src/install/install_sources.md | 19 +- tensorflow/docs_src/mobile/prepare_models.md | 2 +- .../docs_src/programmers_guide/debugger.md | 19 +- .../docs_src/programmers_guide/tensors.md | 12 +- tensorflow/examples/speech_commands/models.py | 2 +- tensorflow/go/android.go | 20 + tensorflow/go/operation_test.go | 8 + tensorflow/go/tensor.go | 9 +- tensorflow/go/tensor_test.go | 9 +- .../src/main/java/org/tensorflow/Shape.java | 32 + .../test/java/org/tensorflow/ShapeTest.java | 24 + tensorflow/python/BUILD | 4 + tensorflow/python/estimator/canned/head.py | 2 +- .../python/estimator/inputs/numpy_io.py | 84 ++- .../python/estimator/inputs/numpy_io_test.py | 84 +++ tensorflow/python/framework/ops.py | 4 + tensorflow/python/framework/tensor_util.py | 1 + tensorflow/python/framework/test_util.py | 8 +- .../python/kernel_tests/array_ops_test.py | 51 +- .../python/kernel_tests/bincount_op_test.py | 26 +- .../python/kernel_tests/bucketize_op_test.py | 8 +- .../python/kernel_tests/constant_op_test.py | 15 +- tensorflow/python/kernel_tests/conv1d_test.py | 43 ++ .../python/kernel_tests/conv_ops_3d_test.py | 267 ++++--- .../kernel_tests/depthwise_conv_op_test.py | 20 +- .../python/kernel_tests/distributions/BUILD | 1 + .../distributions/multinomial_test.py | 12 +- .../python/kernel_tests/pooling_ops_test.py | 59 +- .../python/kernel_tests/reader_ops_test.py | 42 ++ .../segment_reduction_ops_test.py | 29 +- .../python/kernel_tests/shape_ops_test.py | 10 + .../python/kernel_tests/unique_op_test.py | 27 + tensorflow/python/layers/base.py | 8 +- tensorflow/python/layers/convolutional.py | 2 + tensorflow/python/layers/normalization.py | 55 +- .../python/layers/normalization_test.py | 92 ++- tensorflow/python/ops/array_ops.py | 39 +- .../python/ops/distributions/dirichlet.py | 2 +- .../python/ops/distributions/multinomial.py | 50 +- tensorflow/python/ops/image_ops_impl.py | 26 +- tensorflow/python/ops/linalg_ops.py | 35 +- tensorflow/python/ops/math_grad_test.py | 17 + tensorflow/python/ops/math_ops.py | 258 ++++--- tensorflow/python/ops/metrics_impl.py | 7 +- .../python/ops/nn_fused_batchnorm_test.py | 119 ++-- tensorflow/python/ops/nn_impl.py | 17 +- tensorflow/python/ops/nn_ops.py | 128 +++- tensorflow/python/ops/variables.py | 4 +- .../python/tools/import_pb_to_tensorboard.py | 0 tensorflow/stream_executor/cuda/cuda_dnn.cc | 9 +- tensorflow/stream_executor/dnn.cc | 14 +- tensorflow/stream_executor/dnn.h | 6 + .../tools/api/golden/tensorflow.linalg.pbtxt | 2 +- .../tools/api/golden/tensorflow.nn.pbtxt | 6 +- tensorflow/tools/api/golden/tensorflow.pbtxt | 22 +- .../tools/ci_build/ci_parameterized_build.sh | 2 +- .../tools/ci_build/install/install_golang.sh | 2 +- .../ci_build/linux/libtensorflow_docker.sh | 2 +- .../tools/ci_build/osx/libtensorflow_cpu.sh | 2 +- .../tools/ci_build/osx/libtensorflow_gpu.sh | 2 +- .../tools/ci_build/pi/build_raspberry_pi.sh | 6 + .../ci_build/windows/bazel/bazel_test_lib.sh | 4 +- .../docker/Dockerfile.devel-gpu-cuda9-cudnn7 | 7 +- tensorflow/tools/docker/Dockerfile.gpu | 2 +- tensorflow/tools/docker/README.md | 14 + tensorflow/tools/graph_transforms/BUILD | 2 + .../tools/graph_transforms/quantize_nodes.cc | 2 + tensorflow/tools/pip_package/setup.py | 2 +- third_party/aws.BUILD | 3 + third_party/curl.BUILD | 1 - third_party/sycl/crosstool/CROSSTOOL.tpl | 8 +- third_party/sycl/crosstool/trisycl.tpl | 85 +++ third_party/sycl/sycl/BUILD.tpl | 17 +- third_party/sycl/sycl/build_defs.bzl.tpl | 17 +- third_party/sycl/sycl_configure.bzl | 85 ++- third_party/zlib.BUILD | 2 +- tools/bazel.rc | 7 +- util/python/BUILD | 2 +- 217 files changed, 6317 insertions(+), 1445 deletions(-) create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py create mode 100644 tensorflow/contrib/distributions/python/ops/cauchy.py create mode 100644 tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py create mode 100644 tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py create mode 100644 tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt create mode 100644 tensorflow/core/kernels/bincount_op.h create mode 100644 tensorflow/core/kernels/bincount_op_gpu.cu.cc create mode 100644 tensorflow/core/kernels/bincount_op_test.cc create mode 100644 tensorflow/core/kernels/bucketize_op.h create mode 100644 tensorflow/core/kernels/bucketize_op_gpu.cu.cc create mode 100644 tensorflow/core/util/mkl_util_test.cc create mode 100644 tensorflow/go/android.go mode change 100644 => 100755 tensorflow/python/tools/import_pb_to_tensorboard.py create mode 100644 third_party/sycl/crosstool/trisycl.tpl diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 10fd595fec..ff11d13140 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -42,7 +42,7 @@ The Code of Conduct also applies within project spaces and in public spaces when Conflicts in an open source project can take many forms, from someone having a bad day and using harsh and hurtful language in the issue queue, to more serious instances such as sexist/racist statements or threats of violence, and everything in between. -If the behaviour is threatening or harassing, or for other reasons requires immediate escalation, please see below. +If the behavior is threatening or harassing, or for other reasons requires immediate escalation, please see below. However, for the vast majority of issues, we aim to empower individuals to first resolve conflicts themselves, asking for help when needed, and only after that fails to escalate further. This approach gives people more control over the outcome of their dispute. @@ -55,14 +55,14 @@ If you are experiencing or witnessing conflict, we ask you to use the following ## Reporting Violations -Violations of the Code of Conduct can be reported to TensorFlow’s Project Steward at conduct@tensorflow.org. The Project Steward will determine whether the Code of Conduct was violated, and will issue an appropriate sanction, possibly including a written warning or expulsion from the project, project sponsored spaces, or project forums. We ask that you make a good-faith effort to resolve your conflict via the conflict resolution policy before submitting a report. +Violations of the Code of Conduct can be reported to TensorFlow’s Project Stewards, Edd Wilder-James (ewj@google.com) and Sarah Novotny (sarahnovotny@google.com). The Project Steward will determine whether the Code of Conduct was violated, and will issue an appropriate sanction, possibly including a written warning or expulsion from the project, project sponsored spaces, or project forums. We ask that you make a good-faith effort to resolve your conflict via the conflict resolution policy before submitting a report. Violations of the Code of Conduct can occur in any setting, even those unrelated to the project. We will only consider complaints about conduct that has occurred within one year of the report. ## Enforcement -If the Project Steward receives a report alleging a violation of the Code of Conduct, the Project Steward will notify the accused of the report, and provide them an opportunity to discuss the report before a sanction is issued. The Project Steward will do their utmost to keep the reporter anonymous. If the act is ongoing (such as someone engaging in harassment), or involves a threat to anyone's safety (e.g. threats of violence), the Project Steward may issue sanctions without notice. +If the Project Stewards receive a report alleging a violation of the Code of Conduct, the Project Stewards will notify the accused of the report, and provide them an opportunity to discuss the report before a sanction is issued. The Project Stewards will do their utmost to keep the reporter anonymous. If the act is ongoing (such as someone engaging in harassment), or involves a threat to anyone's safety (e.g. threats of violence), the Project Stewards may issue sanctions without notice. ## Attribution diff --git a/README.md b/README.md index 24bbb6cec1..aff3427bdd 100644 --- a/README.md +++ b/README.md @@ -73,11 +73,11 @@ $ python ## For more information -* [TensorFlow website](https://www.tensorflow.org) +* [TensorFlow Website](https://www.tensorflow.org) * [TensorFlow White Papers](https://www.tensorflow.org/about/bib) * [TensorFlow Model Zoo](https://github.com/tensorflow/models) * [TensorFlow MOOC on Udacity](https://www.udacity.com/course/deep-learning--ud730) -* [TensorFlow course at Stanford](https://web.stanford.edu/class/cs20si) +* [TensorFlow Course at Stanford](https://web.stanford.edu/class/cs20si) Learn more about the TensorFlow community at the [community page of tensorflow.org](https://www.tensorflow.org/community) for a few ways to participate. diff --git a/configure.py b/configure.py index 0d1afbfe15..1f205861f1 100644 --- a/configure.py +++ b/configure.py @@ -43,6 +43,7 @@ _DEFAULT_CUDA_PATH_WIN = ('C:/Program Files/NVIDIA GPU Computing ' 'Toolkit/CUDA/v%s' % _DEFAULT_CUDA_VERSION) _TF_OPENCL_VERSION = '1.2' _DEFAULT_COMPUTECPP_TOOLKIT_PATH = '/usr/local/computecpp' +_DEFAULT_TRISYCL_INCLUDE_DIR = '/usr/local/triSYCL/include' def is_windows(): @@ -636,7 +637,7 @@ def set_tf_cuda_version(environ_cp): write_action_env_to_bazelrc('TF_CUDA_VERSION', tf_cuda_version) -def set_tf_cunn_version(environ_cp): +def set_tf_cudnn_version(environ_cp): """Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION.""" ask_cudnn_version = ( 'Please specify the cuDNN version you want to use. ' @@ -883,6 +884,28 @@ def set_computecpp_toolkit_path(environ_cp): computecpp_toolkit_path) +def set_trisycl_include_dir(environ_cp): + """Set TRISYCL_INCLUDE_DIR.""" + ask_trisycl_include_dir = ('Please specify the location of the triSYCL ' + 'include directory. (Use --config=sycl_trisycl ' + 'when building with Bazel) ' + '[Default is %s]: ') % ( + _DEFAULT_TRISYCL_INCLUDE_DIR) + while True: + trisycl_include_dir = get_from_env_or_user_or_default( + environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir, + _DEFAULT_TRISYCL_INCLUDE_DIR) + if os.path.exists(trisycl_include_dir): + break + + print('Invalid triSYCL include directory, %s cannot be found' % + (trisycl_include_dir)) + + # Set TRISYCL_INCLUDE_DIR + environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir + write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', trisycl_include_dir) + + def set_mpi_home(environ_cp): """Set MPI_HOME.""" default_mpi_home = which('mpirun') or which('mpiexec') or '' @@ -997,6 +1020,8 @@ def main(): environ_cp['TF_NEED_GCP'] = '0' environ_cp['TF_NEED_HDFS'] = '0' environ_cp['TF_NEED_JEMALLOC'] = '0' + environ_cp['TF_NEED_OPENCL_SYCL'] = '0' + environ_cp['TF_NEED_COMPUTECPP'] = '0' environ_cp['TF_NEED_OPENCL'] = '0' environ_cp['TF_CUDA_CLANG'] = '0' @@ -1018,17 +1043,21 @@ def main(): set_build_var(environ_cp, 'TF_NEED_VERBS', 'VERBS', 'with_verbs_support', False, 'verbs') - set_action_env_var(environ_cp, 'TF_NEED_OPENCL', 'OpenCL', False) - if environ_cp.get('TF_NEED_OPENCL') == '1': + set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False) + if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1': set_host_cxx_compiler(environ_cp) set_host_c_compiler(environ_cp) - set_computecpp_toolkit_path(environ_cp) + set_action_env_var(environ_cp, 'TF_NEED_COMPUTECPP', 'ComputeCPP', True) + if environ_cp.get('TF_NEED_COMPUTECPP') == '1': + set_computecpp_toolkit_path(environ_cp) + else: + set_trisycl_include_dir(environ_cp) set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False) if (environ_cp.get('TF_NEED_CUDA') == '1' and 'TF_CUDA_CONFIG_REPO' not in environ_cp): set_tf_cuda_version(environ_cp) - set_tf_cunn_version(environ_cp) + set_tf_cudnn_version(environ_cp) set_tf_cuda_compute_capabilities(environ_cp) set_tf_cuda_clang(environ_cp) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 49828cd4d6..c8f0b6b061 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -54,6 +54,15 @@ config_setting( visibility = ["//visibility:public"], ) +config_setting( + name = "raspberry_pi_armeabi", + values = { + "crosstool_top": "@local_config_arm_compiler//:toolchain", + "cpu": "armeabi", + }, + visibility = ["//visibility:public"], +) + config_setting( name = "android_arm", values = { @@ -760,6 +769,13 @@ tf_cc_shared_object( ], ) +exports_files( + [ + "tf_version_script.lds", + "tf_exported_symbols.lds", + ], +) + py_library( name = "tensorflow_py", srcs = ["__init__.py"], diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl index ee291c12d0..6c385af3b3 100644 --- a/tensorflow/compiler/aot/tfcompile.bzl +++ b/tensorflow/compiler/aot/tfcompile.bzl @@ -119,7 +119,7 @@ def tf_library(name, graph, config, out_nodes_file, ] + freeze_saver_srcs, outs=[freeze_file], - cmd=("$(location //tensorflow/python/tools:freeze_graph)" + + cmd=("$(location @org_tensorflow//tensorflow/python/tools:freeze_graph)" + freeze_args), tools=["@org_tensorflow//tensorflow/python/tools:freeze_graph"], tags=tags, @@ -130,6 +130,10 @@ def tf_library(name, graph, config, header_file = name + ".h" object_file = name + ".o" ep = ("__" + PACKAGE_NAME + "__" + name).replace("/", "_") + if type(tfcompile_flags) == type(""): + flags = tfcompile_flags + else: + flags = " ".join(["'" + arg.replace("'", "'\\''") + "'" for arg in (tfcompile_flags or [])]) native.genrule( name=("gen_" + name), srcs=[ @@ -148,7 +152,7 @@ def tf_library(name, graph, config, " --target_triple=" + target_llvm_triple() + " --out_header=$(@D)/" + header_file + " --out_object=$(@D)/" + object_file + - " " + (tfcompile_flags or "")), + " " + flags), tools=[tfcompile_tool], visibility=visibility, testonly=testonly, @@ -185,7 +189,7 @@ def tf_library(name, graph, config, " --cpp_class=" + cpp_class + " --target_triple=" + target_llvm_triple() + " --out_session_module=$(@D)/" + session_module_pb + - " " + (tfcompile_flags or "")), + " " + flags), tools=[tfcompile_tool], visibility=visibility, testonly=testonly, @@ -195,8 +199,7 @@ def tf_library(name, graph, config, # The cc_library rule packaging up the header and object file, and needed # kernel implementations. - need_xla_data_proto = (tfcompile_flags and - tfcompile_flags.find("--gen_program_shape") != -1) + need_xla_data_proto = (flags and flags.find("--gen_program_shape") != -1) native.cc_library( name=name, srcs=[object_file], @@ -253,7 +256,7 @@ def tf_library(name, graph, config, ], outs=[test_file], cmd=("sed " + sed_replace + - " $(location //tensorflow/compiler/aot:test.cc) " + + " $(location @org_tensorflow//tensorflow/compiler/aot:test.cc) " + "> $(OUTS)"), tags=tags, ) diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index c372e05474..79c4befd36 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -672,7 +672,7 @@ tf_library( cpp_class = "LSTMLayerInference", graph = "lstm_layer_inference.pbtxt", tags = ["manual"], - tfcompile_flags = "--xla_cpu_multi_thread_eigen=false", + tfcompile_flags = ["--xla_cpu_multi_thread_eigen=false"], ) # ----------------------------------------------------------------------------- diff --git a/tensorflow/compiler/tests/fused_batchnorm_test.py b/tensorflow/compiler/tests/fused_batchnorm_test.py index 936fcf8b6b..00a9c9a65b 100644 --- a/tensorflow/compiler/tests/fused_batchnorm_test.py +++ b/tensorflow/compiler/tests/fused_batchnorm_test.py @@ -36,7 +36,7 @@ class FusedBatchNormTest(XLATestCase): x_square = x * x x_square_sum = np.sum(x_square, (0, 1, 2)) x_sum = np.sum(x, axis=(0, 1, 2)) - element_count = np.size(x) / int(np.shape(x)[0]) + element_count = np.size(x) / int(np.shape(x)[-1]) mean = x_sum / element_count var = x_square_sum / element_count - mean * mean normalized = (x - mean) / np.sqrt(var + epsilon) @@ -64,8 +64,9 @@ class FusedBatchNormTest(XLATestCase): return grad_x, grad_scale, grad_offset def testInference(self): - x_shape = [2, 2, 6, 2] - scale_shape = [2] + channel = 3 + x_shape = [2, 2, 6, channel] + scale_shape = [channel] x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) @@ -74,8 +75,9 @@ class FusedBatchNormTest(XLATestCase): with self.test_session() as sess, self.test_scope(): # To avoid constant folding t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x") - scale = array_ops.placeholder(np.float32, shape=[2], name="scale") - offset = array_ops.placeholder(np.float32, shape=[2], name="offset") + scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale") + offset = array_ops.placeholder( + np.float32, shape=scale_shape, name="offset") epsilon = 0.001 y_ref, mean_ref, var_ref = self._reference_training( x_val, scale_val, offset_val, epsilon, data_format) @@ -97,8 +99,9 @@ class FusedBatchNormTest(XLATestCase): self.assertAllClose(y_val, y_ref, atol=1e-3) def _testLearning(self, use_gradient_checker): - x_shape = [2, 2, 6, 2] - scale_shape = [2] + channel = 3 + x_shape = [2, 2, 6, channel] + scale_shape = [channel] x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) @@ -109,8 +112,9 @@ class FusedBatchNormTest(XLATestCase): with self.test_session() as sess, self.test_scope(): # To avoid constant folding t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x") - scale = array_ops.placeholder(np.float32, shape=[2], name="scale") - offset = array_ops.placeholder(np.float32, shape=[2], name="offset") + scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale") + offset = array_ops.placeholder( + np.float32, shape=scale_shape, name="offset") epsilon = 0.001 y, mean, var = nn.fused_batch_norm( t_val, @@ -154,8 +158,9 @@ class FusedBatchNormTest(XLATestCase): def testGradient(self): # TODO(b/64270657): Use gradient_checker here in addition to comparing with # this reference implementation. - x_shape = [2, 2, 6, 2] - scale_shape = [2] + channel = 3 + x_shape = [2, 2, 6, channel] + scale_shape = [channel] grad_val = np.random.random_sample(x_shape).astype(np.float32) x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 1bd0cca945..cda8b07c61 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -222,7 +222,7 @@ class HloInstruction { tensorflow::gtl::ArraySlice strides); // Creates a slice instruction, where the first operand is sliced by - // start indices specified in the second operand, and by size specfied in + // start indices specified in the second operand, and by size specified in // 'slice_sizes'. static std::unique_ptr CreateDynamicSlice( const Shape& shape, HloInstruction* operand, diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc index 070bb4bc42..76b12fc8d3 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc @@ -792,8 +792,8 @@ TEST_F(HloInstructionTest, ComplexFusionOp) { // sub = Sub(mul, clamp) // tuple = Tuple({sub, sub, mul, C1}) // - // Notable complexities are repeated operands in a same instruction, different - // shapes, use of value in different expressions. + // Notable complexities are repeated operands in the same instruction, + // different shapes, use of value in different expressions. auto c1 = builder.AddInstruction( HloInstruction::CreateConstant(Literal::CreateR0(1.1f))); auto c2 = builder.AddInstruction( diff --git a/tensorflow/contrib/android/cmake/CMakeLists.txt b/tensorflow/contrib/android/cmake/CMakeLists.txt index 25ada5ba27..aba356d616 100644 --- a/tensorflow/contrib/android/cmake/CMakeLists.txt +++ b/tensorflow/contrib/android/cmake/CMakeLists.txt @@ -37,7 +37,7 @@ set_target_properties(lib_tf PROPERTIES IMPORTED_LOCATION set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIS_SLIM_BUILD \ -std=c++11 -fno-rtti -fno-exceptions \ -O2 -Wno-narrowing -fomit-frame-pointer \ - -mfpu=neon -mfloat-abi=softfp -fPIE \ + -mfpu=neon -mfloat-abi=softfp -fPIE -fPIC \ -ftemplate-depth=900 \ -DGOOGLE_PROTOBUF_NO_RTTI \ -DGOOGLE_PROTOBUF_NO_STATIC_INITIALIZER") diff --git a/tensorflow/contrib/batching/BUILD b/tensorflow/contrib/batching/BUILD index 8b7df4a84c..a111cfecb3 100644 --- a/tensorflow/contrib/batching/BUILD +++ b/tensorflow/contrib/batching/BUILD @@ -82,6 +82,7 @@ cc_library( tf_cc_test( name = "adaptive_shared_batch_scheduler_test", srcs = ["adaptive_shared_batch_scheduler_test.cc"], + tags = ["manual"], # b/69013768 deps = [ ":adaptive_shared_batch_scheduler", "//tensorflow/contrib/batching/test_util:fake_clock_env", diff --git a/tensorflow/contrib/batching/kernels/batch_kernels.cc b/tensorflow/contrib/batching/kernels/batch_kernels.cc index 3b7c538fcc..6041d8c9b2 100644 --- a/tensorflow/contrib/batching/kernels/batch_kernels.cc +++ b/tensorflow/contrib/batching/kernels/batch_kernels.cc @@ -461,7 +461,7 @@ class BatchResource : public ResourceBase { return Status::OK(); } - // Looks up the batcher queue for 'queue_name'. If it did't previously exist, + // Looks up the batcher queue for 'queue_name'. If it didn't previously exist, // creates it. Status LookupOrCreateBatcherQueue(const string& queue_name, BatcherQueue** queue) { diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py index 8c6a614beb..2e94b7206d 100644 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py +++ b/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py @@ -759,7 +759,7 @@ class CsiszarVIMCOTest(test.TestCase): def _csiszar_vimco_helper_grad(self, logu, delta): """Finite difference approximation of `grad(csiszar_vimco_helper, logu)`.""" - # This code actually estimates the sum of the Jacobiab because thats what + # This code actually estimates the sum of the Jacobiab because that's what # TF's `gradients` does. np_log_avg_u1, np_log_sooavg_u1 = self._csiszar_vimco_helper( logu[..., None] + np.diag([delta]*len(logu))) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 8744fc492f..77a3fc0c83 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -34,13 +34,41 @@ option(tensorflow_BUILD_SHARED_LIB "Build TensorFlow as a shared library" OFF) option(tensorflow_OPTIMIZE_FOR_NATIVE_ARCH "Enable compiler optimizations for the native processor architecture (if available)" ON) option(tensorflow_WIN_CPU_SIMD_OPTIONS "Enables CPU SIMD instructions") option(tensorflow_ENABLE_SNAPPY_SUPPORT "Enable SNAPPY compression support" ON) +if(HAIKU) + option(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE "Enable PIE support" OFF) +else() + option(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE "Enable PIE support" ON) +endif() + if (NOT WIN32) # Threads: defines CMAKE_THREAD_LIBS_INIT and adds -pthread compile option # for targets that link ${CMAKE_THREAD_LIBS_INIT}. find_package (Threads) + + option(tensorflow_PATH_STATIC_LIB "Additional library search path for libcudnn_static.a, libnccl_static.a, libculibos.a" /usr/local/cuda/lib64/) + option(tensorflow_CUDNN_INCLUDE "cudnn.h header install path" /usr/include/) + if (NOT tensorflow_CUDNN_INCLUDE) + # option's default value is OFF. Fill it with real default values + set(tensorflow_CUDNN_INCLUDE /usr/include) + endif (NOT tensorflow_CUDNN_INCLUDE) + option(tensorflow_PATH_CUDNN_STATIC_LIB "Override PATH_STATIC_LIB for libcudnn_static.a" ${tensorflow_PATH_STATIC_LIB}) + option(tensorflow_PATH_NCCL_STATIC_LIB "Override PATH_STATIC_LIB for libnccl_static.a" ${tensorflow_PATH_STATIC_LIB}) + option(tensorflow_CUDA_LIBRARY_PATH "Designate the default CUDA library paths" /usr/local/cuda/lib64) + if (NOT tensorflow_CUDA_LIBRARY_PATH) + # option's default value is OFF. Fill it with real default values + set(tensorflow_CUDA_LIBRARY_PATH /usr/local/cuda/lib64) + endif (NOT tensorflow_CUDA_LIBRARY_PATH) endif() +if (WIN32) + set(BOOL_WIN32 ON) +else (WIN32) + set(BOOL_WIN32 OFF) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") +endif (WIN32) + # [CLEANUP] Remove when done # For debugging function(SHOW_VARIABLES) @@ -58,7 +86,12 @@ set (DOWNLOAD_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/downloads" CACHE PATH "Location where external projects will be downloaded.") mark_as_advanced(DOWNLOAD_LOCATION) -set(CMAKE_POSITION_INDEPENDENT_CODE ON) +if (tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) + set(CMAKE_POSITION_INDEPENDENT_CODE ON) +else() + set(CMAKE_POSITION_INDEPENDENT_CODE OFF) +endif() + add_definitions(-DEIGEN_AVOID_STL_ARRAY) if(WIN32) add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11 -DCOMPILER_MSVC) @@ -217,20 +250,35 @@ endif() if(UNIX) list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS}) endif() +if(HAIKU) + list(APPEND tensorflow_EXTERNAL_LIBRARIES network) +endif() if (tensorflow_ENABLE_GPU) + if (NOT WIN32) + # Default install paths for cuda libraries in Linux + # In some Linux distros, find_package(CUDA) seems to require CMAKE_LIBRARY_PATH to include cuda-lib paths + list(APPEND CMAKE_LIBRARY_PATH "${tensorflow_CUDA_LIBRARY_PATH}") + list(APPEND CMAKE_LIBRARY_PATH "${tensorflow_CUDA_LIBRARY_PATH}/stubs") + endif (NOT WIN32) + + find_package(CUDA 8.0 REQUIRED) + + # by default we assume compute cabability 3.5 and 5.2. If you change this change it in + # CUDA_NVCC_FLAGS and cuda_config.h below + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_30,code=\"sm_30,compute_30\";-gencode arch=compute_35,code=\"sm_35,compute_35\";-gencode arch=compute_52,code=\"sm_52,compute_52\") + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--include-path ${PROJECT_BINARY_DIR}/$\{build_configuration\};--expt-relaxed-constexpr) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-ftz=true) # Flush denormals to zero + set(CUDA_INCLUDE ${CUDA_TOOLKIT_TARGET_DIR} ${CUDA_TOOLKIT_TARGET_DIR}/extras/CUPTI/include) + include_directories(${CUDA_INCLUDE}) if (WIN32) - find_package(CUDA 8.0 REQUIRED) - - # by default we assume compute cabability 3.5 and 5.2. If you change this change it in - # CUDA_NVCC_FLAGS and cuda_config.h below - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_30,code=\"sm_30,compute_30\";-gencode arch=compute_35,code=\"sm_35,compute_35\";-gencode arch=compute_52,code=\"sm_52,compute_52\") - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--include-path ${PROJECT_BINARY_DIR}/$\{build_configuration\};--expt-relaxed-constexpr) - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-ftz=true) # Flush denormals to zero - set(CUDA_INCLUDE ${CUDA_TOOLKIT_TARGET_DIR} ${CUDA_TOOLKIT_TARGET_DIR}/extras/CUPTI/include) - include_directories(${CUDA_INCLUDE}) add_definitions(-DGOOGLE_CUDA=1 -DTF_EXTRA_CUDA_CAPABILITIES=3.0,3.5,5.2) + else (WIN32) + # Without these double quotes, cmake in Linux makes it "-DTF_EXTRA_CUDA_CAPABILITIES=3.0, -D3.5, -D5.2" for cc, which incurs build breaks + add_definitions(-DGOOGLE_CUDA=1 -D"TF_EXTRA_CUDA_CAPABILITIES=3.0,3.5,5.2") + endif (WIN32) + if (WIN32) # add cudnn if(NOT CUDNN_HOME) set(CUDNN_HOME ${CUDA_TOOLKIT_TARGET_DIR}) @@ -238,18 +286,48 @@ if (tensorflow_ENABLE_GPU) include_directories(${CUDNN_HOME}) set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_CUFFT_LIBRARIES} ${CUDA_curand_LIBRARY} ${CUDA_cupti_LIBRARY} ${CUDA_cusolver_LIBRARY} ${CUDNN_HOME}/lib/x64/cudnn.lib) + else (WIN32) + set(CUDNN_INCLUDE "${tensorflow_CUDNN_INCLUDE}") - # create cuda_config.h - FILE(WRITE ${tensorflow_source_dir}/third_party/gpus/cuda/cuda_config.h - "#ifndef CUDA_CUDA_CONFIG_H_\n" - "#define CUDA_CUDA_CONFIG_H_\n" - "#define TF_CUDA_CAPABILITIES CudaVersion(\"3.0\"),CudaVersion(\"3.5\"),CudaVersion(\"5.2\")\n" - "#define TF_CUDA_VERSION \"64_80\"\n" - "#define TF_CUDNN_VERSION \"64_6\"\n" - "#define TF_CUDA_TOOLKIT_PATH \"${CUDA_TOOLKIT_ROOT_DIR}\"\n" - "#endif // CUDA_CUDA_CONFIG_H_\n" - ) + find_library(nccl_STATIC_LIBRARY NAMES libnccl_static.a PATHS ${tensorflow_PATH_NCCL_STATIC_LIB} ${CUDA_TOOLKIT_ROOT_DIR}) + if (NOT nccl_STATIC_LIBRARY) + message(FATAL_ERROR "NCCL is required for GPU-build") + else (NOT nccl_STATIC_LIBRARY) + message("nccl-static: ${nccl_STATIC_LIBRARY}") + # something like /usr/lib64/libnccl_static.a + endif (NOT nccl_STATIC_LIBRARY) + + find_library(cudnn_STATIC_LIBRARY NAMES libcudnn_static.a PATHS ${tensorflow_PATH_CUDNN_STATIC_LIB} ${CUDA_TOOLKIT_ROOT_DIR}) + if (NOT cudnn_STATIC_LIBRARY) + message(FATAL_ERROR "CUDNN is required for GPU-build") + else (NOT cudnn_STATIC_LIBRARY) + message("cudnn-static: ${cudnn_STATIC_LIBRARY}") + endif (NOT cudnn_STATIC_LIBRARY) + + find_library(culibos_STATIC_LIBRARY NAMES libculibos.a PATHS ${tensorflow_PATH_STATIC_LIB} ${CUDA_TOOLKIT_ROOT_DIR}) + if (NOT culibos_STATIC_LIBRARY) + message(FATAL_ERROR "CULIBOS is required for GPU-build") + else (NOT culibos_STATIC_LIBRARY) + message("culibos-static: ${culibos_STATIC_LIBRARY}") + endif (NOT culibos_STATIC_LIBRARY) + + include_directories(${CUDNN_INCLUDE}) + set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_CUFFT_LIBRARIES} + ${CUDA_curand_LIBRARY} ${CUDA_cupti_LIBRARY} ${CUDA_cusolver_LIBRARY} ${cudnn_STATIC_LIBRARY} ${culibos_STATIC_LIBRARY} ${nccl_STATIC_LIBRARY}) + endif (WIN32) + + # create cuda_config.h + FILE(WRITE ${tensorflow_source_dir}/third_party/gpus/cuda/cuda_config.h + "#ifndef CUDA_CUDA_CONFIG_H_\n" + "#define CUDA_CUDA_CONFIG_H_\n" + "#define TF_CUDA_CAPABILITIES CudaVersion(\"3.0\"),CudaVersion(\"3.5\"),CudaVersion(\"5.2\")\n" + "#define TF_CUDA_VERSION \"64_80\"\n" + "#define TF_CUDNN_VERSION \"64_6\"\n" + "#define TF_CUDA_TOOLKIT_PATH \"${CUDA_TOOLKIT_ROOT_DIR}\"\n" + "#endif // CUDA_CUDA_CONFIG_H_\n" + ) + if (WIN32) # tf assumes in various places header files to be in cuda/include. On windows the cuda sdk # installs them under cuda/version/include and to avoid that we need to change tf we copy a # few files to cuda/include @@ -261,12 +339,25 @@ if (tensorflow_ENABLE_GPU) ${CUDA_TOOLKIT_TARGET_DIR}/include/cusolverDn.h DESTINATION ${tensorflow_source_dir}/third_party/gpus/cuda/include ) - include_directories(${tensorflow_source_dir}/third_party/gpus) - # add cuda libraries to tensorflow_EXTERNAL_LIBRARIES - list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CUDA_LIBRARIES}) + else(WIN32) + # Linux has slightly differnt install paths than Windows + FILE(COPY + ${CUDA_TOOLKIT_TARGET_DIR}/include/cuda.h ${CUDA_TOOLKIT_TARGET_DIR}/include/cuComplex.h + ${CUDA_TOOLKIT_TARGET_DIR}/include/cublas_v2.h ${CUDNN_INCLUDE}/cudnn.h + ${CUDA_TOOLKIT_TARGET_DIR}/include/cufft.h ${CUDA_TOOLKIT_TARGET_DIR}/include/curand.h + ${CUDA_TOOLKIT_TARGET_DIR}/include/cuda_runtime_api.h + ${CUDA_TOOLKIT_TARGET_DIR}/include/cusolverDn.h + DESTINATION ${tensorflow_source_dir}/third_party/gpus/cuda/include + ) + endif(WIN32) - # NOTE(mrry): Update these flags when the version of CUDA or cuDNN used - # in the default build is upgraded. + include_directories(${tensorflow_source_dir}/third_party/gpus) + # add cuda libraries to tensorflow_EXTERNAL_LIBRARIES + list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CUDA_LIBRARIES}) + + # NOTE(mrry): Update these flags when the version of CUDA or cuDNN used + # in the default build is upgraded. + if(WIN32) set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value msvcp_dll_name=msvcp140.dll cudart_dll_name=cudart64_80.dll @@ -275,7 +366,9 @@ if (tensorflow_ENABLE_GPU) cudnn_dll_name=cudnn64_6.dll cudnn_version_number=6) else(WIN32) - message(FATAL_ERROR "CMake GPU build is currently only supported on Windows.") + set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value + cuda_version_number=8.0 + cudnn_version_number=6) endif(WIN32) else(tensorflow_ENABLE_GPU) set(tensorflow_BUILD_INFO_FLAGS --build_config cpu --key_value @@ -293,9 +386,7 @@ include(tf_core_framework.cmake) # NOTE: Disabled until issue #3996 is fixed. # include(tf_stream_executor.cmake) if (tensorflow_ENABLE_GPU) - if (WIN32) include(tf_stream_executor.cmake) - endif() endif() include(tf_core_cpu.cmake) diff --git a/tensorflow/contrib/cmake/external/boringssl.cmake b/tensorflow/contrib/cmake/external/boringssl.cmake index dc27eadaca..cca8444e2a 100644 --- a/tensorflow/contrib/cmake/external/boringssl.cmake +++ b/tensorflow/contrib/cmake/external/boringssl.cmake @@ -39,8 +39,12 @@ ExternalProject_Add(boringssl # BUILD_IN_SOURCE 1 INSTALL_COMMAND "" CMAKE_CACHE_ARGS + if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + else() + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF + endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON ) diff --git a/tensorflow/contrib/cmake/external/jsoncpp.cmake b/tensorflow/contrib/cmake/external/jsoncpp.cmake index 5127d7e8f7..d2ae4c76e8 100644 --- a/tensorflow/contrib/cmake/external/jsoncpp.cmake +++ b/tensorflow/contrib/cmake/external/jsoncpp.cmake @@ -42,8 +42,12 @@ ExternalProject_Add(jsoncpp BUILD_IN_SOURCE 1 INSTALL_COMMAND "" CMAKE_CACHE_ARGS + if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + else() + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF + endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON ) diff --git a/tensorflow/contrib/cmake/external/lmdb.cmake b/tensorflow/contrib/cmake/external/lmdb.cmake index 79971b7cfc..e41384f023 100644 --- a/tensorflow/contrib/cmake/external/lmdb.cmake +++ b/tensorflow/contrib/cmake/external/lmdb.cmake @@ -29,10 +29,14 @@ ExternalProject_Add(lmdb INSTALL_DIR ${lmdb_INSTALL} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" CMAKE_CACHE_ARGS + if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + else() + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF + endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DCMAKE_INSTALL_PREFIX:STRING=${lmdb_INSTALL} - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON ) if(WIN32) diff --git a/tensorflow/contrib/cmake/external/png.cmake b/tensorflow/contrib/cmake/external/png.cmake index 2b2bd47d1c..aad6618f52 100644 --- a/tensorflow/contrib/cmake/external/png.cmake +++ b/tensorflow/contrib/cmake/external/png.cmake @@ -41,10 +41,14 @@ ExternalProject_Add(png INSTALL_DIR ${png_INSTALL} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" CMAKE_CACHE_ARGS + if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + else() + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF + endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DCMAKE_INSTALL_PREFIX:STRING=${png_INSTALL} - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DZLIB_ROOT:STRING=${ZLIB_INSTALL} ) diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake index 1e300e21df..b53857a47b 100644 --- a/tensorflow/contrib/cmake/external/protobuf.cmake +++ b/tensorflow/contrib/cmake/external/protobuf.cmake @@ -44,8 +44,12 @@ ExternalProject_Add(protobuf ${PROTOBUF_ADDITIONAL_CMAKE_OPTIONS} INSTALL_COMMAND "" CMAKE_CACHE_ARGS + if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + else() + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF + endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DZLIB_ROOT:STRING=${ZLIB_INSTALL} ) diff --git a/tensorflow/contrib/cmake/external/re2.cmake b/tensorflow/contrib/cmake/external/re2.cmake index cb4ec9c2de..b56f4b0898 100644 --- a/tensorflow/contrib/cmake/external/re2.cmake +++ b/tensorflow/contrib/cmake/external/re2.cmake @@ -38,7 +38,11 @@ ExternalProject_Add(re2 BUILD_IN_SOURCE 1 DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" CMAKE_CACHE_ARGS + if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + else() + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF + endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_INSTALL_PREFIX:STRING=${re2_INSTALL} - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -) \ No newline at end of file +) diff --git a/tensorflow/contrib/cmake/external/snappy.cmake b/tensorflow/contrib/cmake/external/snappy.cmake index 2d2451521c..926c271fd9 100644 --- a/tensorflow/contrib/cmake/external/snappy.cmake +++ b/tensorflow/contrib/cmake/external/snappy.cmake @@ -40,11 +40,15 @@ ExternalProject_Add(snappy LOG_CONFIGURE ON LOG_BUILD ON CMAKE_CACHE_ARGS + if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + else() + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF + endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DSNAPPY_BUILD_TESTS:BOOL=OFF - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON ) # actually enables snappy in the source code -add_definitions(-DTF_USE_SNAPPY) +add_definitions(-DTF_USE_SNAPPY) \ No newline at end of file diff --git a/tensorflow/contrib/cmake/external/sqlite.cmake b/tensorflow/contrib/cmake/external/sqlite.cmake index 1770dcb1fd..785039a469 100644 --- a/tensorflow/contrib/cmake/external/sqlite.cmake +++ b/tensorflow/contrib/cmake/external/sqlite.cmake @@ -53,9 +53,13 @@ else() INSTALL_DIR ${sqlite_INSTALL} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" CMAKE_CACHE_ARGS + if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + else() + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF + endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_INSTALL_PREFIX:STRING=${sqlite_INSTALL} ) diff --git a/tensorflow/contrib/cmake/external/zlib.cmake b/tensorflow/contrib/cmake/external/zlib.cmake index c8af611e1e..f10f84336e 100644 --- a/tensorflow/contrib/cmake/external/zlib.cmake +++ b/tensorflow/contrib/cmake/external/zlib.cmake @@ -42,9 +42,13 @@ ExternalProject_Add(zlib BUILD_IN_SOURCE 1 DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" CMAKE_CACHE_ARGS + if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE) + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + else() + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF + endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_INSTALL_PREFIX:STRING=${ZLIB_INSTALL} - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON ) # put zlib includes in the directory where they are expected diff --git a/tensorflow/contrib/cmake/tf_cc_ops.cmake b/tensorflow/contrib/cmake/tf_cc_ops.cmake index 45eeb11062..6e2ac203f9 100644 --- a/tensorflow/contrib/cmake/tf_cc_ops.cmake +++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake @@ -148,7 +148,11 @@ list(REMOVE_ITEM tf_cc_srcs ${tf_cc_test_srcs}) add_library(tf_cc OBJECT ${tf_cc_srcs}) add_dependencies(tf_cc tf_cc_framework tf_cc_ops) -set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.lib") +if (WIN32) + set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.lib") +else (WIN32) + set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so") +endif (WIN32) add_custom_target(tf_extension_ops) function(AddUserOps) @@ -164,15 +168,13 @@ function(AddUserOps) # create shared library from source and cuda obj add_library(${_AT_TARGET} SHARED ${_AT_SOURCES} ${gpu_lib}) target_link_libraries(${_AT_TARGET} ${pywrap_tensorflow_lib}) - if(WIN32) - if (tensorflow_ENABLE_GPU AND _AT_GPUSOURCES) - # some ops call out to cuda directly; need to link libs for the cuda dlls - target_link_libraries(${_AT_TARGET} ${CUDA_LIBRARIES}) - endif() - if (_AT_DISTCOPY) - add_custom_command(TARGET ${_AT_TARGET} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy $ ${_AT_DISTCOPY}/) - endif() + if (tensorflow_ENABLE_GPU AND _AT_GPUSOURCES) + # some ops call out to cuda directly; need to link libs for the cuda dlls + target_link_libraries(${_AT_TARGET} ${CUDA_LIBRARIES}) + endif() + if (_AT_DISTCOPY) + add_custom_command(TARGET ${_AT_TARGET} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy $ ${_AT_DISTCOPY}/) endif() if (_AT_DEPENDS) add_dependencies(${_AT_TARGET} ${_AT_DEPENDS}) @@ -180,9 +182,19 @@ function(AddUserOps) # make sure TF_COMPILE_LIBRARY is not defined for this target get_target_property(target_compile_flags ${_AT_TARGET} COMPILE_FLAGS) if(target_compile_flags STREQUAL "target_compile_flags-NOTFOUND") - set(target_compile_flags "/UTF_COMPILE_LIBRARY") + if (WIN32) + set(target_compile_flags "/UTF_COMPILE_LIBRARY") + else (WIN32) + # gcc uses UTF as default + set(target_compile_flags "-finput-charset=UTF-8") + endif (WIN32) else() - set(target_compile_flags "${target_compile_flags} /UTF_COMPILE_LIBRARY") + if (WIN32) + set(target_compile_flags "${target_compile_flags} /UTF_COMPILE_LIBRARY") + else (WIN32) + # gcc uses UTF as default + set(target_compile_flags "${target_compile_flags} -finput-charset=UTF-8") + endif (WIN32) endif() set_target_properties(${_AT_TARGET} PROPERTIES COMPILE_FLAGS ${target_compile_flags}) add_dependencies(tf_extension_ops ${_AT_TARGET}) diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake index d6b8990664..2d015908a8 100644 --- a/tensorflow/contrib/cmake/tf_core_kernels.cmake +++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake @@ -179,6 +179,7 @@ file(GLOB_RECURSE tf_core_gpu_kernels_srcs "${tensorflow_source_dir}/tensorflow/contrib/image/kernels/*.cu.cc" "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/*.cu.cc" "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/*.cu.cc" + "${tensorflow_source_dir}/tensorflow/contrib/resampler/kernels/*.cu.cc" ) if(WIN32 AND tensorflow_ENABLE_GPU) @@ -202,16 +203,16 @@ endif(WIN32 AND tensorflow_ENABLE_GPU) add_library(tf_core_kernels OBJECT ${tf_core_kernels_srcs}) add_dependencies(tf_core_kernels tf_core_cpu) -if(WIN32) +if (WIN32) target_compile_options(tf_core_kernels PRIVATE /MP) - if (tensorflow_ENABLE_GPU) - set_source_files_properties(${tf_core_gpu_kernels_srcs} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ) - set(tf_core_gpu_kernels_lib tf_core_gpu_kernels) - cuda_add_library(${tf_core_gpu_kernels_lib} ${tf_core_gpu_kernels_srcs}) - set_target_properties(${tf_core_gpu_kernels_lib} - PROPERTIES DEBUG_POSTFIX "" - COMPILE_FLAGS "${TF_REGULAR_CXX_FLAGS}" - ) - add_dependencies(${tf_core_gpu_kernels_lib} tf_core_cpu) - endif() +endif (WIN32) +if (tensorflow_ENABLE_GPU) + set_source_files_properties(${tf_core_gpu_kernels_srcs} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ) + set(tf_core_gpu_kernels_lib tf_core_gpu_kernels) + cuda_add_library(${tf_core_gpu_kernels_lib} ${tf_core_gpu_kernels_srcs}) + set_target_properties(${tf_core_gpu_kernels_lib} + PROPERTIES DEBUG_POSTFIX "" + COMPILE_FLAGS "${TF_REGULAR_CXX_FLAGS}" + ) + add_dependencies(${tf_core_gpu_kernels_lib} tf_core_cpu) endif() diff --git a/tensorflow/contrib/cmake/tf_label_image_example.cmake b/tensorflow/contrib/cmake/tf_label_image_example.cmake index 0d3a4699eb..7f2f60b089 100644 --- a/tensorflow/contrib/cmake/tf_label_image_example.cmake +++ b/tensorflow/contrib/cmake/tf_label_image_example.cmake @@ -34,3 +34,8 @@ target_link_libraries(tf_label_image_example PUBLIC ${tf_core_gpu_kernels_lib} ${tensorflow_EXTERNAL_LIBRARIES} ) + +install(TARGETS tf_label_image_example + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) \ No newline at end of file diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 9b863f7bc6..61b3fd715d 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -715,6 +715,9 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name) set(require_shape_fn 1) endif() + get_filename_component(GENERATE_PYTHON_OP_LIB_MKDIRPATH ${GENERATE_PYTHON_OP_LIB_DESTINATION} PATH) + file(MAKE_DIRECTORY ${GENERATE_PYTHON_OP_LIB_MKDIRPATH}) + # Create a C++ executable that links in the appropriate op # registrations and generates Python wrapper code based on the # registered ops. @@ -743,6 +746,7 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name) ${GENERATE_PYTHON_OP_LIB_DESTINATION} PARENT_SCOPE) endfunction() +GENERATE_PYTHON_OP_LIB("audio_ops") GENERATE_PYTHON_OP_LIB("array_ops") GENERATE_PYTHON_OP_LIB("bitwise_ops") GENERATE_PYTHON_OP_LIB("math_ops") @@ -987,7 +991,7 @@ add_library(pywrap_tensorflow_internal SHARED $ $<$:$> $ - $<$:$> + $<$:$<$:$>> $<$:$> ${pywrap_tensorflow_deffile} ) @@ -1063,25 +1067,23 @@ if(WIN32) DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/rnn/python/ops/) endif(WIN32) -if(WIN32) - # include contrib/seq2seq as .so - # - set(tf_beam_search_srcs - "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc" - "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.h" - "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/ops/beam_search_ops.cc" - ) +# include contrib/seq2seq as .so +# +set(tf_beam_search_srcs + "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc" + "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.h" + "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/ops/beam_search_ops.cc" +) - set(tf_beam_search_gpu_srcs - "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops_gpu.cu.cc" - ) +set(tf_beam_search_gpu_srcs + "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops_gpu.cu.cc" +) - AddUserOps(TARGET _beam_search_ops - SOURCES "${tf_beam_search_srcs}" - GPUSOURCES ${tf_beam_search_gpu_srcs} - DEPENDS pywrap_tensorflow_internal tf_python_ops - DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/seq2seq/python/ops/) -endif(WIN32) +AddUserOps(TARGET _beam_search_ops + SOURCES "${tf_beam_search_srcs}" + GPUSOURCES ${tf_beam_search_gpu_srcs} + DEPENDS pywrap_tensorflow_internal tf_python_ops + DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/seq2seq/python/ops/) ############################################################ # Build a PIP package containing the TensorFlow runtime. diff --git a/tensorflow/contrib/cmake/tf_shared_lib.cmake b/tensorflow/contrib/cmake/tf_shared_lib.cmake index 9bf45bab30..3e3fe0cdfa 100644 --- a/tensorflow/contrib/cmake/tf_shared_lib.cmake +++ b/tensorflow/contrib/cmake/tf_shared_lib.cmake @@ -73,7 +73,7 @@ add_library(tensorflow SHARED $ $<$:$> $ - $<$:$> + $<$:$<$:$>> $<$:$> ${tensorflow_deffile} ) @@ -94,3 +94,46 @@ endif() if(WIN32) add_dependencies(tensorflow tensorflow_static) endif(WIN32) + +install(TARGETS tensorflow + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) + +# install necessary headers +# tensorflow headers +install(DIRECTORY ${tensorflow_source_dir}/tensorflow/cc/ + DESTINATION include/tensorflow/cc + FILES_MATCHING PATTERN "*.h") +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tensorflow/cc/ + DESTINATION include/tensorflow/cc + FILES_MATCHING PATTERN "*.h") +install(DIRECTORY ${tensorflow_source_dir}/tensorflow/core/ + DESTINATION include/tensorflow/core + FILES_MATCHING PATTERN "*.h") +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tensorflow/core/ + DESTINATION include/tensorflow/core + FILES_MATCHING PATTERN "*.h") +install(DIRECTORY ${tensorflow_source_dir}/tensorflow/stream_executor/ + DESTINATION include/tensorflow/stream_executor + FILES_MATCHING PATTERN "*.h") +# google protobuf headers +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src/google/ + DESTINATION include/google + FILES_MATCHING PATTERN "*.h") +# nsync headers +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/nsync/ + DESTINATION include/external/nsync + FILES_MATCHING PATTERN "*.h") +# Eigen directory +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen/Eigen/ + DESTINATION include/Eigen) +# external directory +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/eigen_archive/ + DESTINATION include/external/eigen_archive) +# third_party eigen directory +install(DIRECTORY ${tensorflow_source_dir}/third_party/eigen3/ + DESTINATION include/third_party/eigen3) +# unsupported Eigen directory +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen/unsupported/Eigen/ + DESTINATION include/unsupported/Eigen) diff --git a/tensorflow/contrib/cmake/tf_stream_executor.cmake b/tensorflow/contrib/cmake/tf_stream_executor.cmake index 3d84f1ebb9..8d95f0d3e8 100644 --- a/tensorflow/contrib/cmake/tf_stream_executor.cmake +++ b/tensorflow/contrib/cmake/tf_stream_executor.cmake @@ -74,6 +74,9 @@ endif() #) #list(REMOVE_ITEM tf_stream_executor_srcs ${tf_stream_executor_test_srcs}) +if (NOT WIN32) + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lgomp") +endif (NOT WIN32) add_library(tf_stream_executor OBJECT ${tf_stream_executor_srcs}) add_dependencies(tf_stream_executor diff --git a/tensorflow/contrib/cmake/tf_tools.cmake b/tensorflow/contrib/cmake/tf_tools.cmake index 6ef9598963..cb58a2e7df 100644 --- a/tensorflow/contrib/cmake/tf_tools.cmake +++ b/tensorflow/contrib/cmake/tf_tools.cmake @@ -73,7 +73,7 @@ add_executable(${transform_graph} $ $ $ - $<$:$> + $<$:$<$:$>> $<$:$> ) @@ -95,7 +95,7 @@ add_executable(${summarize_graph} $ $ $ - $<$:$> + $<$:$<$:$>> $<$:$> ) @@ -117,7 +117,7 @@ add_executable(${compare_graphs} $ $ $ - $<$:$> + $<$:$<$:$>> $<$:$> ) @@ -138,7 +138,7 @@ add_executable(${benchmark_model} $ $ $ - $<$:$> + $<$:$<$:$>> $<$:$> ) @@ -147,3 +147,8 @@ target_link_libraries(${benchmark_model} PUBLIC ${tf_core_gpu_kernels_lib} ${tensorflow_EXTERNAL_LIBRARIES} ) + +install(TARGETS ${transform_graph} ${summarize_graph} ${compare_graphs} ${benchmark_model} + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) diff --git a/tensorflow/contrib/cmake/tf_tutorials.cmake b/tensorflow/contrib/cmake/tf_tutorials.cmake index 858e7dda92..e63fccc181 100644 --- a/tensorflow/contrib/cmake/tf_tutorials.cmake +++ b/tensorflow/contrib/cmake/tf_tutorials.cmake @@ -34,3 +34,8 @@ target_link_libraries(tf_tutorials_example_trainer PUBLIC ${tf_core_gpu_kernels_lib} ${tensorflow_EXTERNAL_LIBRARIES} ) + +install(TARGETS tf_tutorials_example_trainer + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) diff --git a/tensorflow/contrib/crf/python/ops/crf.py b/tensorflow/contrib/crf/python/ops/crf.py index ca384226d4..ec395e41d0 100644 --- a/tensorflow/contrib/crf/python/ops/crf.py +++ b/tensorflow/contrib/crf/python/ops/crf.py @@ -395,8 +395,8 @@ class CrfDecodeForwardRnnCell(rnn_cell.RNNCell): scope: Unused variable scope of this cell. Returns: - backpointers: [batch_size, num_tags], containing backpointers. - new_state: [batch_size, num_tags], containing new score values. + backpointers: A [batch_size, num_tags] matrix of backpointers. + new_state: A [batch_size, num_tags] matrix of new score values. """ # For simplicity, in shape comments, denote: # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output). @@ -436,8 +436,9 @@ class CrfDecodeBackwardRnnCell(rnn_cell.RNNCell): """Build the CrfDecodeBackwardRnnCell. Args: - inputs: [batch_size, num_tags], backpointer of next step (in time order). - state: [batch_size, 1], next position's tag index. + inputs: A [batch_size, num_tags] matrix of + backpointer of next step (in time order). + state: A [batch_size, 1] matrix of tag index of next step. scope: Unused variable scope of this cell. Returns: @@ -461,16 +462,16 @@ def crf_decode(potentials, transition_params, sequence_length): This is a function for tensor. Args: - potentials: A [batch_size, max_seq_len, num_tags] tensor, matrix of + potentials: A [batch_size, max_seq_len, num_tags] tensor of unary potentials. - transition_params: A [num_tags, num_tags] tensor, matrix of + transition_params: A [num_tags, num_tags] matrix of binary potentials. - sequence_length: A [batch_size] tensor, containing sequence lengths. + sequence_length: A [batch_size] vector of true sequence lengths. Returns: - decode_tags: A [batch_size, max_seq_len] tensor, with dtype tf.int32. + decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`. Contains the highest scoring tag indices. - best_score: A [batch_size] tensor, containing the score of decode_tags. + best_score: A [batch_size] vector, containing the score of `decode_tags`. """ # If max_seq_len is 1, we skip the algorithm and simply return the argmax tag # and the max activation. diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 0697fbdec1..995ce6d654 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -11,6 +11,7 @@ py_test( size = "small", srcs = ["batch_dataset_op_test.py"], srcs_version = "PY2AND3", + tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", @@ -373,6 +374,7 @@ py_test( size = "small", srcs = ["sequence_dataset_op_test.py"], srcs_version = "PY2AND3", + tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", @@ -450,6 +452,7 @@ py_test( size = "small", srcs = ["zip_dataset_op_test.py"], srcs_version = "PY2AND3", + tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", @@ -466,7 +469,10 @@ py_test( size = "small", srcs = ["prefetching_ops_test.py"], srcs_version = "PY2AND3", - tags = ["no_oss"], # b/68785503 + tags = [ + "manual", + "no_oss", # b/68785503 + ], deps = [ "//tensorflow/contrib/data/python/ops:prefetching_py", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 2dc8ad9483..145b9495ff 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -140,6 +140,23 @@ cuda_py_test( ], ) +cuda_py_test( + name = "cauchy_test", + size = "medium", + srcs = ["python/kernel_tests/cauchy_test.py"], + additional_deps = [ + ":distributions_py", + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:gradients", + "//tensorflow/python:platform_test", + "//tensorflow/python:variables", + ], +) + cuda_py_test( name = "chi2_test", srcs = ["python/kernel_tests/chi2_test.py"], diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py index 16f6533e57..0d12d83893 100644 --- a/tensorflow/contrib/distributions/__init__.py +++ b/tensorflow/contrib/distributions/__init__.py @@ -24,6 +24,7 @@ from __future__ import print_function from tensorflow.contrib.distributions.python.ops import bijectors from tensorflow.contrib.distributions.python.ops.binomial import * +from tensorflow.contrib.distributions.python.ops.cauchy import * from tensorflow.contrib.distributions.python.ops.chi2 import * from tensorflow.contrib.distributions.python.ops.conditional_distribution import * from tensorflow.contrib.distributions.python.ops.conditional_transformed_distribution import * @@ -83,6 +84,7 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ 'bijectors', + 'Cauchy', 'ConditionalDistribution', 'ConditionalTransformedDistribution', 'FULLY_REPARAMETERIZED', diff --git a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py new file mode 100644 index 0000000000..73747db31c --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py @@ -0,0 +1,438 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Cauchy.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import importlib +import numpy as np + +from tensorflow.contrib.distributions.python.ops import cauchy as cauchy_lib +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import variables +from tensorflow.python.platform import test +from tensorflow.python.platform import tf_logging + + +def try_import(name): # pylint: disable=invalid-name + module = None + try: + module = importlib.import_module(name) + except ImportError as e: + tf_logging.warning("Could not import %s: %s" % (name, str(e))) + return module + + +stats = try_import("scipy.stats") + + +class CauchyTest(test.TestCase): + + def setUp(self): + self._rng = np.random.RandomState(123) + + def assertAllFinite(self, tensor): + is_finite = np.isfinite(tensor.eval()) + all_true = np.ones_like(is_finite, dtype=np.bool) + self.assertAllEqual(all_true, is_finite) + + def _testParamShapes(self, sample_shape, expected): + with self.test_session(): + param_shapes = cauchy_lib.Cauchy.param_shapes(sample_shape) + loc_shape, scale_shape = param_shapes["loc"], param_shapes["scale"] + self.assertAllEqual(expected, loc_shape.eval()) + self.assertAllEqual(expected, scale_shape.eval()) + loc = array_ops.zeros(loc_shape) + scale = array_ops.ones(scale_shape) + self.assertAllEqual(expected, + array_ops.shape( + cauchy_lib.Cauchy(loc, scale).sample()).eval()) + + def _testParamStaticShapes(self, sample_shape, expected): + param_shapes = cauchy_lib.Cauchy.param_static_shapes(sample_shape) + loc_shape, scale_shape = param_shapes["loc"], param_shapes["scale"] + self.assertEqual(expected, loc_shape) + self.assertEqual(expected, scale_shape) + + def testParamShapes(self): + sample_shape = [10, 3, 4] + self._testParamShapes(sample_shape, sample_shape) + self._testParamShapes(constant_op.constant(sample_shape), sample_shape) + + def testParamStaticShapes(self): + sample_shape = [10, 3, 4] + self._testParamStaticShapes(sample_shape, sample_shape) + self._testParamStaticShapes( + tensor_shape.TensorShape(sample_shape), sample_shape) + + def testCauchyLogPDF(self): + with self.test_session(): + batch_size = 6 + loc = constant_op.constant([3.0] * batch_size) + scale = constant_op.constant([np.sqrt(10.0)] * batch_size) + x = np.array([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0], dtype=np.float32) + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + log_pdf = cauchy.log_prob(x) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), + log_pdf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, log_pdf.shape) + self.assertAllEqual(cauchy.batch_shape, log_pdf.eval().shape) + + pdf = cauchy.prob(x) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, pdf.shape) + self.assertAllEqual(cauchy.batch_shape, pdf.eval().shape) + + if not stats: + return + expected_log_pdf = stats.cauchy(loc.eval(), scale.eval()).logpdf(x) + self.assertAllClose(expected_log_pdf, log_pdf.eval()) + self.assertAllClose(np.exp(expected_log_pdf), pdf.eval()) + + def testCauchyLogPDFMultidimensional(self): + with self.test_session(): + batch_size = 6 + loc = constant_op.constant([[3.0, -3.0]] * batch_size) + scale = constant_op.constant( + [[np.sqrt(10.0), np.sqrt(15.0)]] * batch_size) + x = np.array([[-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]], dtype=np.float32).T + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + log_pdf = cauchy.log_prob(x) + log_pdf_values = log_pdf.eval() + self.assertEqual(log_pdf.shape, (6, 2)) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), + log_pdf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, log_pdf.shape) + self.assertAllEqual(cauchy.batch_shape, log_pdf.eval().shape) + + pdf = cauchy.prob(x) + pdf_values = pdf.eval() + self.assertEqual(pdf.shape, (6, 2)) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf_values.shape) + self.assertAllEqual(cauchy.batch_shape, pdf.shape) + self.assertAllEqual(cauchy.batch_shape, pdf_values.shape) + + if not stats: + return + expected_log_pdf = stats.cauchy(loc.eval(), scale.eval()).logpdf(x) + self.assertAllClose(expected_log_pdf, log_pdf_values) + self.assertAllClose(np.exp(expected_log_pdf), pdf_values) + + def testCauchyCDF(self): + with self.test_session(): + batch_size = 50 + loc = self._rng.randn(batch_size) + scale = self._rng.rand(batch_size) + 1.0 + x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64) + + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + cdf = cauchy.cdf(x) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, cdf.shape) + self.assertAllEqual(cauchy.batch_shape, cdf.eval().shape) + if not stats: + return + expected_cdf = stats.cauchy(loc, scale).cdf(x) + self.assertAllClose(expected_cdf, cdf.eval(), atol=0) + + def testCauchySurvivalFunction(self): + with self.test_session(): + batch_size = 50 + loc = self._rng.randn(batch_size) + scale = self._rng.rand(batch_size) + 1.0 + x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64) + + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + sf = cauchy.survival_function(x) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, sf.shape) + self.assertAllEqual(cauchy.batch_shape, sf.eval().shape) + if not stats: + return + expected_sf = stats.cauchy(loc, scale).sf(x) + self.assertAllClose(expected_sf, sf.eval(), atol=0) + + def testCauchyLogCDF(self): + with self.test_session(): + batch_size = 50 + loc = self._rng.randn(batch_size) + scale = self._rng.rand(batch_size) + 1.0 + x = np.linspace(-100.0, 10.0, batch_size).astype(np.float64) + + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + cdf = cauchy.log_cdf(x) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, cdf.shape) + self.assertAllEqual(cauchy.batch_shape, cdf.eval().shape) + + if not stats: + return + expected_cdf = stats.cauchy(loc, scale).logcdf(x) + self.assertAllClose(expected_cdf, cdf.eval(), atol=0, rtol=1e-5) + + def testFiniteGradientAtDifficultPoints(self): + for dtype in [np.float32, np.float64]: + g = ops.Graph() + with g.as_default(): + loc = variables.Variable(dtype(0.0)) + scale = variables.Variable(dtype(1.0)) + dist = cauchy_lib.Cauchy(loc=loc, scale=scale) + x = np.array([-100., -20., -5., 0., 5., 20., 100.]).astype(dtype) + for func in [ + dist.cdf, dist.log_cdf, dist.survival_function, + dist.log_survival_function, dist.log_prob, dist.prob + ]: + value = func(x) + grads = gradients_impl.gradients(value, [loc, scale]) + with self.test_session(graph=g): + variables.global_variables_initializer().run() + self.assertAllFinite(value) + self.assertAllFinite(grads[0]) + self.assertAllFinite(grads[1]) + + def testCauchyLogSurvivalFunction(self): + with self.test_session(): + batch_size = 50 + loc = self._rng.randn(batch_size) + scale = self._rng.rand(batch_size) + 1.0 + x = np.linspace(-10.0, 100.0, batch_size).astype(np.float64) + + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + sf = cauchy.log_survival_function(x) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.eval().shape) + self.assertAllEqual(cauchy.batch_shape, sf.shape) + self.assertAllEqual(cauchy.batch_shape, sf.eval().shape) + + if not stats: + return + expected_sf = stats.cauchy(loc, scale).logsf(x) + self.assertAllClose(expected_sf, sf.eval(), atol=0, rtol=1e-5) + + def testCauchyEntropy(self): + with self.test_session(): + loc = np.array([1.0, 1.0, 1.0]) + scale = np.array([[1.0, 2.0, 3.0]]) + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + entropy = cauchy.entropy() + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), entropy.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), + entropy.eval().shape) + self.assertAllEqual(cauchy.batch_shape, entropy.shape) + self.assertAllEqual(cauchy.batch_shape, entropy.eval().shape) + + if not stats: + return + expected_entropy = stats.cauchy(loc, scale[0]).entropy().reshape((1, 3)) + self.assertAllClose(expected_entropy, entropy.eval()) + + def testCauchyMode(self): + with self.test_session(): + # Mu will be broadcast to [7, 7, 7]. + loc = [7.] + scale = [11., 12., 13.] + + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + self.assertAllEqual((3,), cauchy.mode().shape) + self.assertAllEqual([7., 7, 7], cauchy.mode().eval()) + + def testCauchyMean(self): + with self.test_session(): + loc = [1., 2., 3.] + scale = [7.] + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + self.assertAllEqual((3,), cauchy.mean().shape) + self.assertAllEqual([np.nan] * 3, cauchy.mean().eval()) + + def testCauchyNanMean(self): + with self.test_session(): + loc = [1., 2., 3.] + scale = [7.] + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False) + + with self.assertRaises(ValueError): + cauchy.mean().eval() + + def testCauchyQuantile(self): + with self.test_session(): + batch_size = 50 + loc = self._rng.randn(batch_size) + scale = self._rng.rand(batch_size) + 1.0 + p = np.linspace(0.000001, 0.999999, batch_size).astype(np.float64) + + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + x = cauchy.quantile(p) + + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), x.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), x.eval().shape) + self.assertAllEqual(cauchy.batch_shape, x.shape) + self.assertAllEqual(cauchy.batch_shape, x.eval().shape) + + if not stats: + return + expected_x = stats.cauchy(loc, scale).ppf(p) + self.assertAllClose(expected_x, x.eval(), atol=0.) + + def testCauchyVariance(self): + with self.test_session(): + # scale will be broadcast to [7, 7, 7] + loc = [1., 2., 3.] + scale = [7.] + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + self.assertAllEqual((3,), cauchy.variance().shape) + self.assertAllEqual([np.nan] * 3, cauchy.variance().eval()) + + def testCauchyNanVariance(self): + with self.test_session(): + # scale will be broadcast to [7, 7, 7] + loc = [1., 2., 3.] + scale = [7.] + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False) + + with self.assertRaises(ValueError): + cauchy.variance().eval() + + def testCauchyStandardDeviation(self): + with self.test_session(): + # scale will be broadcast to [7, 7, 7] + loc = [1., 2., 3.] + scale = [7.] + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + self.assertAllEqual((3,), cauchy.stddev().shape) + self.assertAllEqual([np.nan] * 3, cauchy.stddev().eval()) + + def testCauchyNanStandardDeviation(self): + with self.test_session(): + # scale will be broadcast to [7, 7, 7] + loc = [1., 2., 3.] + scale = [7.] + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False) + + with self.assertRaises(ValueError): + cauchy.stddev().eval() + + def testCauchySample(self): + with self.test_session(): + loc = constant_op.constant(3.0) + scale = constant_op.constant(1.0) + loc_v = 3.0 + n = constant_op.constant(100000) + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + samples = cauchy.sample(n) + sample_values = samples.eval() + + self.assertEqual(sample_values.shape, (100000,)) + self.assertAllClose(np.median(sample_values), loc_v, atol=1e-1) + + expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate( + tensor_shape.TensorShape(cauchy.batch_shape_tensor().eval())) + + self.assertAllEqual(expected_shape, samples.shape) + self.assertAllEqual(expected_shape, sample_values.shape) + + expected_shape = ( + tensor_shape.TensorShape([n.eval()]).concatenate(cauchy.batch_shape)) + + self.assertAllEqual(expected_shape, samples.shape) + self.assertAllEqual(expected_shape, sample_values.shape) + + def testCauchySampleMultiDimensional(self): + with self.test_session(): + batch_size = 2 + loc = constant_op.constant([[3.0, -3.0]] * batch_size) + scale = constant_op.constant([[0.5, 1.0]] * batch_size) + loc_v = [3.0, -3.0] + n = constant_op.constant(100000) + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + samples = cauchy.sample(n) + sample_values = samples.eval() + self.assertEqual(samples.shape, (100000, batch_size, 2)) + self.assertAllClose( + np.median(sample_values[:, 0, 0]), loc_v[0], atol=1e-1) + self.assertAllClose( + np.median(sample_values[:, 0, 1]), loc_v[1], atol=1e-1) + + expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate( + tensor_shape.TensorShape(cauchy.batch_shape_tensor().eval())) + self.assertAllEqual(expected_shape, samples.shape) + self.assertAllEqual(expected_shape, sample_values.shape) + + expected_shape = ( + tensor_shape.TensorShape([n.eval()]).concatenate(cauchy.batch_shape)) + self.assertAllEqual(expected_shape, samples.shape) + self.assertAllEqual(expected_shape, sample_values.shape) + + def testCauchyNegativeLocFails(self): + with self.test_session(): + cauchy = cauchy_lib.Cauchy(loc=[1.], scale=[-5.], validate_args=True) + with self.assertRaisesOpError("Condition x > 0 did not hold"): + cauchy.mode().eval() + + def testCauchyShape(self): + with self.test_session(): + loc = constant_op.constant([-3.0] * 5) + scale = constant_op.constant(11.0) + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + self.assertEqual(cauchy.batch_shape_tensor().eval(), [5]) + self.assertEqual(cauchy.batch_shape, tensor_shape.TensorShape([5])) + self.assertAllEqual(cauchy.event_shape_tensor().eval(), []) + self.assertEqual(cauchy.event_shape, tensor_shape.TensorShape([])) + + def testCauchyShapeWithPlaceholders(self): + loc = array_ops.placeholder(dtype=dtypes.float32) + scale = array_ops.placeholder(dtype=dtypes.float32) + cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) + + with self.test_session() as sess: + # get_batch_shape should return an "" tensor. + self.assertEqual(cauchy.batch_shape, tensor_shape.TensorShape(None)) + self.assertEqual(cauchy.event_shape, ()) + self.assertAllEqual(cauchy.event_shape_tensor().eval(), []) + self.assertAllEqual( + sess.run( + cauchy.batch_shape_tensor(), + feed_dict={ + loc: 5.0, + scale: [1.0, 2.0] + }), [2]) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distributions/python/ops/cauchy.py b/tensorflow/contrib/distributions/python/ops/cauchy.py new file mode 100644 index 0000000000..8d59c1abfb --- /dev/null +++ b/tensorflow/contrib/distributions/python/ops/cauchy.py @@ -0,0 +1,219 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""The Cauchy distribution class.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.ops.distributions import distribution + +__all__ = [ + "Cauchy", +] + + +class Cauchy(distribution.Distribution): + """The Cauchy distribution with location `loc` and scale `scale`. + + #### Mathematical details + + The probability density function (pdf) is, + + ```none + pdf(x; loc, scale) = 1 / (pi * scale * (1 + ((x - loc) / scale)**2)) + ``` + where `loc` is the location, and `scale` is the scale. + + The Cauchy distribution is a member of the [location-scale family]( + https://en.wikipedia.org/wiki/Location-scale_family), i.e. + + ```none + X ~ Cauchy(loc=0, scale=1) + Y ~ Cauchy(loc=loc, scale=scale) + Y = loc + scale * X + ``` + + #### Examples + + Examples of initialization of one or a batch of distributions. + + ```python + # Define a single scalar Cauchy distribution. + dist = Cauchy(loc=0., scale=3.) + + # Evaluate the cdf at 1, returning a scalar. + dist.cdf(1.) + + # Define a batch of two scalar valued Cauchy distributions. + dist = Cauchy(loc=[1, 2.], scale=[11, 22.]) + + # Evaluate the pdf of the first distribution on 0, and the second on 1.5, + # returning a length two tensor. + dist.prob([0, 1.5]) + + # Get 3 samples, returning a 3 x 2 tensor. + dist.sample([3]) + ``` + + Arguments are broadcast when possible. + + ```python + # Define a batch of two scalar valued Cauchy distributions. + # Both have median 1, but different scales. + dist = tf.contrib.distributions.Cauchy(loc=1., scale=[11, 22.]) + # Evaluate the pdf of both distributions on the same point, 3.0, + # returning a length 2 tensor. + dist.prob(3.0) + ``` + """ + + def __init__(self, + loc, + scale, + validate_args=False, + allow_nan_stats=True, + name="Cauchy"): + """Construct Cauchy distributions. + + The parameters `loc` and `scale` must be shaped in a way that supports + broadcasting (e.g. `loc + scale` is a valid operation). + + Args: + loc: Floating point tensor; the modes of the distribution(s). + scale: Floating point tensor; the locations of the distribution(s). + Must contain only positive values. + validate_args: Python `bool`, default `False`. When `True` distribution + parameters are checked for validity despite possibly degrading runtime + performance. When `False` invalid inputs may silently render incorrect + outputs. + allow_nan_stats: Python `bool`, default `True`. When `True`, + statistics (e.g., mean, mode, variance) use the value "`NaN`" to + indicate the result is undefined. When `False`, an exception is raised + if one or more of the statistic's batch members are undefined. + name: Python `str` name prefixed to Ops created by this class. + + Raises: + TypeError: if `loc` and `scale` have different `dtype`. + """ + parameters = locals() + with ops.name_scope(name, values=[loc, scale]): + with ops.control_dependencies([check_ops.assert_positive(scale)] + if validate_args else []): + self._loc = array_ops.identity(loc, name="loc") + self._scale = array_ops.identity(scale, name="scale") + check_ops.assert_same_float_dtype([self._loc, self._scale]) + super(Cauchy, self).__init__( + dtype=self._scale.dtype, + reparameterization_type=distribution.FULLY_REPARAMETERIZED, + validate_args=validate_args, + allow_nan_stats=allow_nan_stats, + parameters=parameters, + graph_parents=[self._loc, self._scale], + name=name) + + @staticmethod + def _param_shapes(sample_shape): + return dict( + zip(("loc", "scale"), + ([ops.convert_to_tensor(sample_shape, dtype=dtypes.int32)] * 2))) + + @property + def loc(self): + """Distribution parameter for the mean.""" + return self._loc + + @property + def scale(self): + """Distribution parameter for standard deviation.""" + return self._scale + + def _batch_shape_tensor(self): + return array_ops.broadcast_dynamic_shape( + array_ops.shape(self.loc), array_ops.shape(self.scale)) + + def _batch_shape(self): + return array_ops.broadcast_static_shape(self.loc.shape, self.scale.shape) + + def _event_shape_tensor(self): + return constant_op.constant([], dtype=dtypes.int32) + + def _event_shape(self): + return tensor_shape.scalar() + + def _sample_n(self, n, seed=None): + shape = array_ops.concat([[n], self.batch_shape_tensor()], 0) + probs = random_ops.random_uniform( + shape=shape, minval=0., maxval=1., dtype=self.dtype, seed=seed) + return self._quantile(probs) + + def _log_prob(self, x): + return self._log_unnormalized_prob(x) - self._log_normalization() + + def _cdf(self, x): + return math_ops.atan(self._z(x)) / np.pi + 0.5 + + def _log_cdf(self, x): + return math_ops.log1p(2 / np.pi * math_ops.atan(self._z(x))) - np.log(2) + + def _log_unnormalized_prob(self, x): + return -math_ops.log1p(math_ops.square(self._z(x))) + + def _log_normalization(self): + return np.log(np.pi) + math_ops.log(self.scale) + + def _entropy(self): + h = np.log(4 * np.pi) + math_ops.log(self.scale) + return h * array_ops.ones_like(self.loc) + + def _quantile(self, p): + return self.loc + self.scale * math_ops.tan(np.pi * (p - 0.5)) + + def _mode(self): + return self.loc * array_ops.ones_like(self.scale) + + def _z(self, x): + """Standardize input `x`.""" + with ops.name_scope("standardize", values=[x]): + return (x - self.loc) / self.scale + + def _inv_z(self, z): + """Reconstruct input `x` from a its normalized version.""" + with ops.name_scope("reconstruct", values=[z]): + return z * self.scale + self.loc + + def _mean(self): + if self.allow_nan_stats: + return array_ops.fill(self.batch_shape_tensor(), + self.dtype.as_numpy_dtype(np.nan)) + else: + raise ValueError("`mean` is undefined for Cauchy distribution.") + + def _stddev(self): + if self.allow_nan_stats: + return array_ops.fill(self.batch_shape_tensor(), + self.dtype.as_numpy_dtype(np.nan)) + else: + raise ValueError("`stddev` is undefined for Cauchy distribution.") diff --git a/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb index 01616f2e7d..459f2f4a7d 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb @@ -429,7 +429,9 @@ "cpu_tensor = tf.random_normal([SIZE, SIZE])\n", "\n", "if is_gpu_available:\n", - " gpu_tensor = cpu_tensor.gpu()" + " gpu_tensor = cpu_tensor.gpu()\n", + "else:\n", + " print(\"GPU not available.\")" ] }, { diff --git a/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb index 3b7e2cd435..e6c7c11733 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb @@ -383,7 +383,7 @@ "\n", "`implicit_value_and_gradients()` returns a function that accepts the same inputs as the function passed in, and returns a tuple consisting of:\n", "\n", - "1. the value returned by the function passed in (in this case, the loss calculated by `calculate_linear_model_loss()`), and\n", + "1. the value returned by the function passed in (in this case, the loss calculated by `loss_fn()`), and\n", "1. a list of tuples consisting of:\n", " 1. The value of the gradient (a `tf.Tensor`) with respect to a given variable\n", " 1. The corresponding variable (`tf.Variable`)\n", @@ -698,7 +698,7 @@ "source": [ "## Other Ways to Compute Gradients\n", "\n", - "Using our loss function as an example (`calculate_linear_model_loss()`), there are several other ways we could compute gradients:\n", + "Using our loss function as an example (`loss_fn()`), there are several other ways we could compute gradients:\n", "\n", "1. `tfe.implicit_gradients()`\n", "1. `tfe.gradients_function()`\n", @@ -841,7 +841,7 @@ "# tfe.implicit_value_and_gradients() demo\n", "value_gradients_fn = tfe.implicit_value_and_gradients(loss_fn)\n", "\n", - "# Returns only gradients:\n", + "# Returns the value returned by the function passed in, gradients, and variables:\n", "value_gradients_fn(inputs, labels, wb)" ] } diff --git a/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb index ebcc7027c1..0088da5c4b 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb @@ -9,7 +9,7 @@ "source": [ "# Eager Execution Tutorial: Importing Data\n", "\n", - "This notebook demonstrates the use of the [`tf.contrib.data.Dataset` API](https://www.tensorflow.org/programmers_guide/datasets) to build pipelines to feed data to your program. It covers:\n", + "This notebook demonstrates the use of the [`tf.data.Dataset` API](https://www.tensorflow.org/programmers_guide/datasets) to build pipelines to feed data to your program. It covers:\n", "\n", "* Creating a `Dataset`.\n", "* Iteration over a `Dataset` with eager execution enabled.\n", @@ -64,7 +64,7 @@ "source": [ "# Step 1: Create a source `Dataset`\n", "\n", - "Create a _source_ dataset using one of the factory functions like [`Dataset.from_tensors`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#from_tensor_slices) or using objects that read from files like [`TextLineDataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/TextLineDataset) or [`TFRecordDataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/TFRecordDataset). See the [Programmer's Guide](https://www.google.com/url?sa=D\u0026q=https%3A%2F%2Fwww.tensorflow.org%2Fprogrammers_guide%2Fdatasets%23reading_input_data) for more information." + "Create a _source_ dataset using one of the factory functions like [`Dataset.from_tensors`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices) or using objects that read from files like [`TextLineDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TextLineDataset) or [`TFRecordDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TFRecordDataset). See the [Programmer's Guide](https://www.google.com/url?sa=D\u0026q=https%3A%2F%2Fwww.tensorflow.org%2Fprogrammers_guide%2Fdatasets%23reading_input_data) for more information." ] }, { @@ -83,7 +83,7 @@ }, "outputs": [], "source": [ - "ds_tensors = tf.contrib.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6])\n", + "ds_tensors = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6])\n", "\n", "# Create a CSV file\n", "import tempfile\n", @@ -93,7 +93,7 @@ "Line 2\n", "Line 3\n", " \"\"\")\n", - "ds_file = tf.contrib.data.TextLineDataset(filename)\n" + "ds_file = tf.data.TextLineDataset(filename)\n" ] }, { @@ -105,7 +105,7 @@ "source": [ "# Step 2: Apply transformations\n", "\n", - "Use the transformations functions like [`map`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#map), [`batch`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#batch), [`shuffle`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#shuffle) etc. to apply transformations to the records of the dataset. See the [API documentation for `tf.contrib.data.Dataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset) for details." + "Use the transformations functions like [`map`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map), [`batch`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch), [`shuffle`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#shuffle) etc. to apply transformations to the records of the dataset. See the [API documentation for `tf.data.Dataset`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) for details." ] }, { diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index 46b3eeae91..f1debc8590 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -286,7 +286,6 @@ def _fused_batch_norm(inputs, ValueError: If the rank of `inputs` is neither 2 or 4. ValueError: If rank or `C` dimension of `inputs` is undefined. """ - # TODO(reedwm): Add support for fp16 inputs. if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): raise ValueError('data_format has to be either NCHW or NHWC.') with variable_scope.variable_scope( @@ -310,7 +309,6 @@ def _fused_batch_norm(inputs, new_shape = [-1, channels, 1, 1] inputs = array_ops.reshape(inputs, new_shape) inputs_shape = inputs.get_shape() - dtype = inputs.dtype.base_dtype if data_format == DATA_FORMAT_NHWC: params_shape = inputs_shape[-1:] else: @@ -320,9 +318,10 @@ def _fused_batch_norm(inputs, (inputs.name, params_shape)) # Allocate parameters for the beta and gamma of the normalization. - trainable_beta = trainable and center beta_collections = utils.get_variable_collections(variables_collections, 'beta') + # Float32 required to avoid precision-loss when using fp16 input/output + variable_dtype = dtypes.float32 if not param_initializers: param_initializers = {} if not param_regularizers: @@ -336,13 +335,13 @@ def _fused_batch_norm(inputs, beta = variables.model_variable( 'beta', shape=params_shape, - dtype=dtype, + dtype=variable_dtype, initializer=beta_initializer, regularizer=beta_regularizer, collections=beta_collections, - trainable=trainable_beta) + trainable=trainable) else: - beta = array_ops.constant(0.0, shape=params_shape) + beta = array_ops.constant(0.0, dtype=variable_dtype, shape=params_shape) if scale: gamma_collections = utils.get_variable_collections( @@ -352,13 +351,13 @@ def _fused_batch_norm(inputs, gamma = variables.model_variable( 'gamma', shape=params_shape, - dtype=dtype, + dtype=variable_dtype, initializer=gamma_initializer, regularizer=gamma_regularizer, collections=gamma_collections, trainable=trainable) else: - gamma = array_ops.constant(1.0, shape=params_shape) + gamma = array_ops.constant(1.0, dtype=variable_dtype, shape=params_shape) # Create moving_mean and moving_variance variables and add them to the # appropriate collections. We disable variable partitioning while creating @@ -375,7 +374,7 @@ def _fused_batch_norm(inputs, moving_mean = variables.model_variable( 'moving_mean', shape=params_shape, - dtype=dtype, + dtype=variable_dtype, initializer=moving_mean_initializer, trainable=False, collections=moving_mean_collections) @@ -386,7 +385,7 @@ def _fused_batch_norm(inputs, moving_variance = variables.model_variable( 'moving_variance', shape=params_shape, - dtype=dtype, + dtype=variable_dtype, initializer=moving_variance_initializer, trainable=False, collections=moving_variance_collections) diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index ff7f0e4462..27bd3172d6 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -1774,10 +1774,13 @@ class BatchNormTest(test.TestCase): with self.assertRaisesRegexp(ValueError, 'undefined'): _layers.batch_norm(inputs, data_format='NCHW') - def _testCreateOp(self, fused): + def _testCreateOp(self, fused, dtype=None): + if dtype is None: + dtype = dtypes.float32 height, width = 3, 3 with self.test_session(): - images = np.random.uniform(size=(5, height, width, 3)).astype('f') + images = np.random.uniform(size=(5, height, width, 3)).astype( + dtype.as_numpy_dtype) output = _layers.batch_norm(images, fused=fused) expected_name = ('BatchNorm/FusedBatchNorm' if fused else 'BatchNorm/batchnorm') @@ -1792,6 +1795,9 @@ class BatchNormTest(test.TestCase): def testCreateOpFused(self): self._testCreateOp(True) + def testCreateOpFusedFloat16(self): + self._testCreateOp(True, dtypes.float16) + def _testCreateOpBetaRegularizer(self, fused=True): height, width = 3, 3 with self.test_session(): @@ -2659,10 +2665,63 @@ class BatchNormTest(test.TestCase): def testBatchNormBeta(self): # Test case for 11673 with self.test_session() as sess: - a = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10)) - b = _layers.batch_norm(a, center=False, data_format='NCHW', - zero_debias_moving_mean=True) + a_32 = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10)) + _layers.batch_norm( + a_32, center=False, data_format='NCHW', zero_debias_moving_mean=True) + a_16 = array_ops.placeholder(dtypes.float16, shape=(10, 10, 10, 10)) + _layers.batch_norm( + a_16, center=False, data_format='NCHW', zero_debias_moving_mean=True) + sess.run(variables_lib.global_variables_initializer()) + + def testVariablesAreFloat32(self): + height, width = 3, 3 + with self.test_session(): + images = random_ops.random_uniform( + (5, height, width, 3), seed=1, dtype=dtypes.float16) + _layers.batch_norm(images, scale=True) + beta = variables.get_variables_by_name('beta')[0] + gamma = variables.get_variables_by_name('gamma')[0] + self.assertEqual(beta.dtype, dtypes.float32_ref) + self.assertEqual(gamma.dtype, dtypes.float32_ref) + moving_mean = variables.get_variables_by_name('moving_mean')[0] + moving_variance = variables.get_variables_by_name('moving_variance')[0] + self.assertEqual(moving_mean.dtype, dtypes.float32_ref) + self.assertEqual(moving_variance.dtype, dtypes.float32_ref) + + def _runFusedBatchNorm(self, shape, dtype): + channels = shape[1] + images = np.arange(np.product(shape), dtype=dtype).reshape(shape) + beta = init_ops.constant_initializer( + np.arange(2, channels + 2, dtype=np.float32)) + gamma = init_ops.constant_initializer( + np.arange(10, channels + 10, dtype=np.float32) * 2.0) + mean = init_ops.constant_initializer( + np.arange(3, channels + 3, dtype=np.float32) * 5.0) + variance = init_ops.constant_initializer( + np.arange(1, channels + 1, dtype=np.float32) * 4.0) + output = _layers.batch_norm( + images, + fused=True, + is_training=True, + scale=True, + epsilon=0.5, + param_initializers={ + 'beta': beta, + 'gamma': gamma, + 'moving_mean': mean, + 'moving_variance': variance, + }, + data_format='NCHW') + with self.test_session(use_gpu=True) as sess: sess.run(variables_lib.global_variables_initializer()) + return sess.run(output) + + def testFusedBatchNormFloat16MatchesFloat32(self): + if test.is_gpu_available(cuda_only=True): + shape = [5, 4, 2, 3] + res_32 = self._runFusedBatchNorm(shape, np.float32) + res_16 = self._runFusedBatchNorm(shape, np.float16) + self.assertAllClose(res_32, res_16, rtol=1e-3) def testAdjustmentCreated(self): # Tests that the adjustment is appropriately passed to and used by the core diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py index 468d792a0d..bc0e6fc009 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -119,7 +119,7 @@ class Head(object): update_op = tf.contrib.layers.optimize_loss(optimizer=sync, loss=model_fn_ops.loss, ...) hooks = [sync.make_session_run_hook(is_chief)] - ... upate train_op and hooks in ModelFnOps and return + ... update train_op and hooks in ModelFnOps and return ``` """ __metaclass__ = abc.ABCMeta diff --git a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py index 8be9c72adf..44e6c7c52d 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py @@ -23,7 +23,6 @@ import collections import six -from tensorflow.contrib import framework as contrib_framework from tensorflow.contrib.framework import get_graph_from_inputs from tensorflow.contrib.learn.python.learn.estimators import constants from tensorflow.contrib.learn.python.learn.estimators import metric_key @@ -32,6 +31,7 @@ from tensorflow.python.estimator import model_fn as core_model_fn_lib from tensorflow.python.estimator.export import export_output as core_export_lib from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging @@ -156,11 +156,11 @@ class ModelFnOps( else: if isinstance(predictions, dict): predictions = { - k: contrib_framework.convert_to_tensor_or_sparse_tensor(v) + k: sparse_tensor.convert_to_tensor_or_sparse_tensor(v) for k, v in six.iteritems(predictions) } else: - predictions = contrib_framework.convert_to_tensor_or_sparse_tensor( + predictions = sparse_tensor.convert_to_tensor_or_sparse_tensor( predictions) # Validate eval_metric_ops diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py index 4c50d40aaa..86fad4c553 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py @@ -28,13 +28,13 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.framework import dtypes +from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging # pylint: disable=g-multiple-import,g-bad-import-order from .pandas_io import HAS_PANDAS, extract_pandas_data, extract_pandas_matrix, extract_pandas_labels from .dask_io import HAS_DASK, extract_dask_data, extract_dask_labels - # pylint: enable=g-multiple-import,g-bad-import-order @@ -365,8 +365,14 @@ class DataFeeder(object): self.random_state = np.random.RandomState( 42) if random_state is None else random_state - num_samples = list(self._x.values())[0].shape[ - 0] if x_is_dict else self._x.shape[0] + if x_is_dict: + num_samples = list(self._x.values())[0].shape[0] + elif tensor_util.is_tensor(self._x): + num_samples = self._x.shape[ + 0].value # shape will be a Dimension, extract an int + else: + num_samples = self._x.shape[0] + if self._shuffle: self.indices = self.random_state.permutation(num_samples) else: diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py index 13f2f0f502..7526f3ae0d 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py @@ -238,10 +238,10 @@ class SdcaModel(object): with name_scope('sdca/prediction'): sparse_variables = self._convert_n_to_tensor(self._variables[ 'sparse_features_weights']) - result = 0.0 + result_sparse = 0.0 for sfc, sv in zip(examples['sparse_features'], sparse_variables): # TODO(sibyl-Aix6ihai): following does not take care of missing features. - result += math_ops.segment_sum( + result_sparse += math_ops.segment_sum( math_ops.multiply( array_ops.gather(sv, sfc.feature_indices), sfc.feature_values), sfc.example_indices) @@ -249,12 +249,14 @@ class SdcaModel(object): dense_variables = self._convert_n_to_tensor(self._variables[ 'dense_features_weights']) + result_dense = 0.0 for i in range(len(dense_variables)): - result += math_ops.matmul(dense_features[i], - array_ops.expand_dims(dense_variables[i], -1)) + result_dense += math_ops.matmul(dense_features[i], + array_ops.expand_dims( + dense_variables[i], -1)) # Reshaping to allow shape inference at graph construction time. - return array_ops.reshape(result, [-1]) + return array_ops.reshape(result_dense, [-1]) + result_sparse def predictions(self, examples): """Add operations to compute predictions by the model. diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD index b4aa032ff8..89e8693490 100644 --- a/tensorflow/contrib/lite/python/BUILD +++ b/tensorflow/contrib/lite/python/BUILD @@ -23,6 +23,7 @@ py_library( py_test( name = "lite_test", srcs = ["lite_test.py"], + srcs_version = "PY2AND3", deps = [ ":lite", "//tensorflow/python:array_ops", diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 86540d58a6..5bca82ded0 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -36,6 +36,11 @@ import traceback import zipfile import numpy as np from six import StringIO + +# TODO(aselle): Disable GPU for now +os.environ["CUDA_VISIBLE_DEVICES"] = "-1" + +# pylint: disable=g-import-not-at-top import tensorflow as tf from google.protobuf import text_format # TODO(aselle): switch to TensorFlow's resource_loader @@ -379,12 +384,13 @@ def make_zip_of_tests(zip_path, report["toco_log"] = "" tf.reset_default_graph() - try: - inputs, outputs = make_graph(param_dict_real) - except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError, - ValueError): - report["tf_log"] += traceback.format_exc() - return None, report + with tf.device("/cpu:0"): + try: + inputs, outputs = make_graph(param_dict_real) + except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError, + ValueError): + report["tf_log"] += traceback.format_exc() + return None, report sess = tf.Session() try: diff --git a/tensorflow/contrib/lite/toco/python/BUILD b/tensorflow/contrib/lite/toco/python/BUILD index 92246a8aed..17115047d2 100644 --- a/tensorflow/contrib/lite/toco/python/BUILD +++ b/tensorflow/contrib/lite/toco/python/BUILD @@ -61,6 +61,7 @@ tf_py_test( data = [ ":toco_from_protos", ], + tags = ["no_pip"], ) filegroup( diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile index dba1464653..e2e6c05591 100644 --- a/tensorflow/contrib/makefile/Makefile +++ b/tensorflow/contrib/makefile/Makefile @@ -314,7 +314,8 @@ ifeq ($(TARGET),ANDROID) -Wno-narrowing \ -fomit-frame-pointer \ $(MARCH_OPTION) \ --fPIE +-fPIE \ +-fPIC INCLUDES = \ -I$(NDK_ROOT)/sources/android/support/include \ -I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/include \ diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md index 715eb51577..65bd60c12a 100644 --- a/tensorflow/contrib/makefile/README.md +++ b/tensorflow/contrib/makefile/README.md @@ -174,10 +174,26 @@ tensorflow/contrib/makefile/build_all_ios.sh This process will take around twenty minutes on a modern MacBook Pro. -When it completes, you will have a library for a single architecture and the -benchmark program. Although successfully compiling the benchmark program is a +When it completes, you will have a unified library for all architectures +(i386sim, x86_64sim, armv7, armv7s and arm64) and the benchmark program. +Although successfully compiling the benchmark program is a sign of success, the program is not a complete iOS app. +If you would only like to build only one architecture to save time: +(iOS 11+ only supports 64bit so you can get away with arm64) + +```bash +tensorflow/contrib/makefile/build_all_ios.sh -a arm64 +``` + +After the first build if you would like to just build the tensorflow +library you can pass the -T flag to avoid a clean & rebuild. This should +take you just a few seconds to generate the library if you modified one file. + +```bash +tensorflow/contrib/makefile/build_all_ios.sh -a arm64 -T +``` + To see TensorFlow running on iOS, the example Xcode project in [tensorflow/examples/ios](../../examples/ios/) shows how to use the static library in a simple app. @@ -193,19 +209,18 @@ If you have not already, you will need to download dependencies: tensorflow/contrib/makefile/download_dependencies.sh ``` -Next, you will need to compile protobufs for iOS: +Next, you will need to compile protobufs for iOS (optionally takes the -a $ARCH flag): ```bash -tensorflow/contrib/makefile/compile_ios_protobuf.sh +tensorflow/contrib/makefile/compile_ios_protobuf.sh ``` -Then, you will need to compile the nsync library for iOS: +Then, you will need to compile the nsync library for iOS (optionally takes -a $ARCH flag): ```bash export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh` export TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios` ``` - Then, you can run the makefile specifying iOS as the target, along with the architecture you want to build for: @@ -219,10 +234,6 @@ This creates a library in `tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a` that you can link any xcode project against. -At this point, you will have a library for a single architecture and the -benchmark program. Although successfully compiling the benchmark program is a -sign of success, the program is not a complete iOS app. - To see TensorFlow running on iOS, the example Xcode project in [tensorflow/examples/ios](../../examples/ios/) shows how to use the static library in a simple app. @@ -237,6 +248,14 @@ time follow it with: compile_ios_tensorflow.sh ``` +`compile_ios_tensorflow.sh` takes the -a flag to build only for one architecture. +In case you run into issues with unresolved symbols with nsync you can also pass +-h ${HOST_NSYNC_LIB} and -n {TARGET_NSYNC_LIB} so it would look like: + +```bash +tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -h tensorflow/contrib/makefile/downloads/nsync/builds/default.macos.c++11/nsync.a -n tensorflow/contrib/makefile/downloads/nsync/builds/lipo.ios.c++11/nsync.a -a arm64 +``` + In XCode, you will need to use -force_load in the linker flags section of the build settings to pull in the global constructors that are used to register ops and kernels. @@ -249,7 +268,7 @@ debug mode. If you are concerned about performance or are working on a release build, you would likely want a higher optimization setting, like so: ```bash -compile_ios_tensorflow.sh "-Os" +compile_ios_tensorflow.sh -f "-Os" ``` For other variations of valid optimization flags, see [clang optimization levels](http://stackoverflow.com/questions/15548023/clang-optimization-levels). diff --git a/tensorflow/contrib/makefile/build_all_ios.sh b/tensorflow/contrib/makefile/build_all_ios.sh index a49bbe4565..988e12b482 100755 --- a/tensorflow/contrib/makefile/build_all_ios.sh +++ b/tensorflow/contrib/makefile/build_all_ios.sh @@ -23,14 +23,29 @@ if [[ $(uname) != "Darwin" ]]; then exit 1 fi +usage() { + echo "Usage: $(basename "$0") [-a:T]" + echo "-a [build_arch] build only for specified arch x86_64 [default=all]" + echo "-T only build tensorflow (dont download other deps etc)" + exit 1 +} + +while getopts "a:T" opt_name; do + case "$opt_name" in + a) BUILD_ARCH="${OPTARG}";; + T) ONLY_MAKE_TENSORFLOW="true";; + *) usage;; + esac +done +shift $((OPTIND - 1)) + + # Make sure we're in the correct directory, at the root of the source tree. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd ${SCRIPT_DIR}/../../../ - -# Remove any old files first. -make -f tensorflow/contrib/makefile/Makefile clean -rm -rf tensorflow/contrib/makefile/downloads +source "${SCRIPT_DIR}/build_helper.subr" +JOB_COUNT="${JOB_COUNT:-$(get_job_count)}" # Setting a deployment target is required for building with bitcode, # otherwise linking will fail with: @@ -41,20 +56,37 @@ if [[ -n MACOSX_DEPLOYMENT_TARGET ]]; then export MACOSX_DEPLOYMENT_TARGET=$(sw_vers -productVersion) fi -# Pull down the required versions of the frameworks we need. -tensorflow/contrib/makefile/download_dependencies.sh +if [[ "${ONLY_MAKE_TENSORFLOW}" != "true" ]]; then + # Remove any old files first. + make -f tensorflow/contrib/makefile/Makefile clean + rm -rf tensorflow/contrib/makefile/downloads -# Compile protobuf for the target iOS device architectures. -tensorflow/contrib/makefile/compile_ios_protobuf.sh + # Pull down the required versions of the frameworks we need. + tensorflow/contrib/makefile/download_dependencies.sh + + # Compile protobuf for the target iOS device architectures. + tensorflow/contrib/makefile/compile_ios_protobuf.sh +fi # Compile nsync for the target iOS device architectures. # Don't use export var=`something` syntax; it swallows the exit status. HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh` -TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios` +if [[ -z "${BUILD_ARCH}" ]]; then + # No arch specified so build all architectures + TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios` +else + # arch specified so build just that + TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios -a ${BUILD_ARCH}` +fi export HOST_NSYNC_LIB TARGET_NSYNC_LIB -# Build the iOS TensorFlow libraries. -tensorflow/contrib/makefile/compile_ios_tensorflow.sh "-O3" +if [[ -z "${BUILD_ARCH}" ]]; then + # build the ios tensorflow libraries. + tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -h $HOST_NSYNC_LIB -n $TARGET_NSYNC_LIB +else + # arch specified so build just that + tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -a "${BUILD_ARCH}" -h $HOST_NSYNC_LIB -n $TARGET_NSYNC_LIB +fi # Creates a static universal library in # tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a diff --git a/tensorflow/contrib/makefile/compile_ios_protobuf.sh b/tensorflow/contrib/makefile/compile_ios_protobuf.sh index 4056db18a7..43e5809dd2 100755 --- a/tensorflow/contrib/makefile/compile_ios_protobuf.sh +++ b/tensorflow/contrib/makefile/compile_ios_protobuf.sh @@ -21,10 +21,28 @@ if [[ -n MACOSX_DEPLOYMENT_TARGET ]]; then export MACOSX_DEPLOYMENT_TARGET=$(sw_vers -productVersion) fi -SCRIPT_DIR=$(dirname $0) +usage() { + echo "Usage: $(basename "$0") [-a]" + echo "-a [build_arch] build for specified arch comma separate for multiple archs (eg: x86_64,arm64)" + echo "default arch i386, x86_64, armv7, armv7s, arm64" + exit 1 +} + +BUILD_TARGET="i386 x86_64 armv7 armv7s arm64" +while getopts "a:" opt_name; do + case "$opt_name" in + a) BUILD_TARGET="${OPTARG}";; + *) usage;; + esac +done +shift $((OPTIND - 1)) + +IFS=' ' read -r -a build_targets <<< "${BUILD_TARGET}" + +SCRIPT_DIR=$(cd `dirname $0` && pwd) source "${SCRIPT_DIR}/build_helper.subr" -cd tensorflow/contrib/makefile +cd ${SCRIPT_DIR} HOST_GENDIR="$(pwd)/gen/protobuf-host" mkdir -p "${HOST_GENDIR}" @@ -64,6 +82,10 @@ else echo "protoc found. Skip building host tools." fi +# Remove old libs +rm -f ${LIBDIR}/libprotobuf.a +rm -f ${LIBDIR}/libprotobuf-lite.a + ./autogen.sh if [ $? -ne 0 ] then @@ -71,157 +93,192 @@ then exit 1 fi -make distclean -./configure \ ---host=i386-apple-${OSX_VERSION} \ ---disable-shared \ ---enable-cross-compile \ ---with-protoc="${PROTOC_PATH}" \ ---prefix=${LIBDIR}/iossim_386 \ ---exec-prefix=${LIBDIR}/iossim_386 \ -"CFLAGS=${CFLAGS} \ --mios-simulator-version-min=${MIN_SDK_VERSION} \ --arch i386 \ --fembed-bitcode \ --isysroot ${IPHONESIMULATOR_SYSROOT}" \ -"CXX=${CXX}" \ -"CXXFLAGS=${CXXFLAGS} \ --mios-simulator-version-min=${MIN_SDK_VERSION} \ --arch i386 \ --fembed-bitcode \ --isysroot \ -${IPHONESIMULATOR_SYSROOT}" \ -LDFLAGS="-arch i386 \ --fembed-bitcode \ --mios-simulator-version-min=${MIN_SDK_VERSION} \ -${LDFLAGS} \ --L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \ --L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \ -"LIBS=${LIBS}" -make -j"${JOB_COUNT}" -make install - -make distclean -./configure \ ---host=x86_64-apple-${OSX_VERSION} \ ---disable-shared \ ---enable-cross-compile \ ---with-protoc="${PROTOC_PATH}" \ ---prefix=${LIBDIR}/iossim_x86_64 \ ---exec-prefix=${LIBDIR}/iossim_x86_64 \ -"CFLAGS=${CFLAGS} \ --mios-simulator-version-min=${MIN_SDK_VERSION} \ --arch x86_64 \ --fembed-bitcode \ --isysroot ${IPHONESIMULATOR_SYSROOT}" \ -"CXX=${CXX}" \ -"CXXFLAGS=${CXXFLAGS} \ --mios-simulator-version-min=${MIN_SDK_VERSION} \ --arch x86_64 \ --fembed-bitcode \ --isysroot \ -${IPHONESIMULATOR_SYSROOT}" \ -LDFLAGS="-arch x86_64 \ --fembed-bitcode \ --mios-simulator-version-min=${MIN_SDK_VERSION} \ -${LDFLAGS} \ --L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \ --L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \ -"LIBS=${LIBS}" -make -j"${JOB_COUNT}" -make install - -make distclean -./configure \ ---host=armv7-apple-${OSX_VERSION} \ ---with-protoc="${PROTOC_PATH}" \ ---disable-shared \ ---prefix=${LIBDIR}/ios_arm7 \ ---exec-prefix=${LIBDIR}/ios_arm7 \ -"CFLAGS=${CFLAGS} \ --miphoneos-version-min=${MIN_SDK_VERSION} \ --arch armv7 \ --fembed-bitcode \ --isysroot ${IPHONEOS_SYSROOT}" \ -"CXX=${CXX}" \ -"CXXFLAGS=${CXXFLAGS} \ --miphoneos-version-min=${MIN_SDK_VERSION} \ --arch armv7 \ --fembed-bitcode \ --isysroot ${IPHONEOS_SYSROOT}" \ -LDFLAGS="-arch armv7 \ --fembed-bitcode \ --miphoneos-version-min=${MIN_SDK_VERSION} \ -${LDFLAGS}" \ -"LIBS=${LIBS}" -make -j"${JOB_COUNT}" -make install - -make distclean -./configure \ ---host=armv7s-apple-${OSX_VERSION} \ ---with-protoc="${PROTOC_PATH}" \ ---disable-shared \ ---prefix=${LIBDIR}/ios_arm7s \ ---exec-prefix=${LIBDIR}/ios_arm7s \ -"CFLAGS=${CFLAGS} \ --miphoneos-version-min=${MIN_SDK_VERSION} \ --arch armv7s \ --fembed-bitcode \ --isysroot ${IPHONEOS_SYSROOT}" \ -"CXX=${CXX}" \ -"CXXFLAGS=${CXXFLAGS} \ --miphoneos-version-min=${MIN_SDK_VERSION} \ --arch armv7s \ --fembed-bitcode \ --isysroot ${IPHONEOS_SYSROOT}" \ -LDFLAGS="-arch armv7s \ --fembed-bitcode \ --miphoneos-version-min=${MIN_SDK_VERSION} \ -${LDFLAGS}" \ -"LIBS=${LIBS}" -make -j"${JOB_COUNT}" -make install - -make distclean -./configure \ ---host=arm \ ---with-protoc="${PROTOC_PATH}" \ ---disable-shared \ ---prefix=${LIBDIR}/ios_arm64 \ ---exec-prefix=${LIBDIR}/ios_arm64 \ -"CFLAGS=${CFLAGS} \ --miphoneos-version-min=${MIN_SDK_VERSION} \ --arch arm64 \ --fembed-bitcode \ --isysroot ${IPHONEOS_SYSROOT}" \ -"CXXFLAGS=${CXXFLAGS} \ --miphoneos-version-min=${MIN_SDK_VERSION} \ --arch arm64 \ --fembed-bitcode \ --isysroot ${IPHONEOS_SYSROOT}" \ -LDFLAGS="-arch arm64 \ --fembed-bitcode \ --miphoneos-version-min=${MIN_SDK_VERSION} \ -${LDFLAGS}" \ -"LIBS=${LIBS}" -make -j"${JOB_COUNT}" -make install - -lipo \ -${LIBDIR}/iossim_386/lib/libprotobuf.a \ -${LIBDIR}/iossim_x86_64/lib/libprotobuf.a \ -${LIBDIR}/ios_arm7/lib/libprotobuf.a \ -${LIBDIR}/ios_arm7s/lib/libprotobuf.a \ -${LIBDIR}/ios_arm64/lib/libprotobuf.a \ --create \ --output ${LIBDIR}/libprotobuf.a - -lipo \ -${LIBDIR}/iossim_386/lib/libprotobuf-lite.a \ -${LIBDIR}/iossim_x86_64/lib/libprotobuf-lite.a \ -${LIBDIR}/ios_arm7/lib/libprotobuf-lite.a \ -${LIBDIR}/ios_arm7s/lib/libprotobuf-lite.a \ -${LIBDIR}/ios_arm64/lib/libprotobuf-lite.a \ --create \ --output ${LIBDIR}/libprotobuf-lite.a +package_pb_library() { + pb_libs="${LIBDIR}/${1}/lib/libprotobuf.a" + if [ -f "${LIBDIR}/libprotobuf.a" ]; then + pb_libs="$pb_libs ${LIBDIR}/libprotobuf.a" + fi + lipo \ + $pb_libs \ + -create \ + -output ${LIBDIR}/libprotobuf.a + + pblite_libs="${LIBDIR}/${1}/lib/libprotobuf-lite.a" + if [ -f "${LIBDIR}/libprotobuf-lite.a" ]; then + pblite_libs="$pblite_libs ${LIBDIR}/libprotobuf-lite.a" + fi + lipo \ + $pblite_libs \ + -create \ + -output ${LIBDIR}/libprotobuf-lite.a +} + +build_target() { +case "$1" in + i386) make distclean + ./configure \ + --host=i386-apple-${OSX_VERSION} \ + --disable-shared \ + --enable-cross-compile \ + --with-protoc="${PROTOC_PATH}" \ + --prefix=${LIBDIR}/iossim_386 \ + --exec-prefix=${LIBDIR}/iossim_386 \ + "CFLAGS=${CFLAGS} \ + -mios-simulator-version-min=${MIN_SDK_VERSION} \ + -arch i386 \ + -fembed-bitcode \ + -isysroot ${IPHONESIMULATOR_SYSROOT}" \ + "CXX=${CXX}" \ + "CXXFLAGS=${CXXFLAGS} \ + -mios-simulator-version-min=${MIN_SDK_VERSION} \ + -arch i386 \ + -fembed-bitcode \ + -isysroot \ + ${IPHONESIMULATOR_SYSROOT}" \ + LDFLAGS="-arch i386 \ + -fembed-bitcode \ + -mios-simulator-version-min=${MIN_SDK_VERSION} \ + ${LDFLAGS} \ + -L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \ + -L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \ + "LIBS=${LIBS}" + make -j"${JOB_COUNT}" + make install + + package_pb_library "iossim_386" + ;; + + x86_64) make distclean + ./configure \ + --host=x86_64-apple-${OSX_VERSION} \ + --disable-shared \ + --enable-cross-compile \ + --with-protoc="${PROTOC_PATH}" \ + --prefix=${LIBDIR}/iossim_x86_64 \ + --exec-prefix=${LIBDIR}/iossim_x86_64 \ + "CFLAGS=${CFLAGS} \ + -mios-simulator-version-min=${MIN_SDK_VERSION} \ + -arch x86_64 \ + -fembed-bitcode \ + -isysroot ${IPHONESIMULATOR_SYSROOT}" \ + "CXX=${CXX}" \ + "CXXFLAGS=${CXXFLAGS} \ + -mios-simulator-version-min=${MIN_SDK_VERSION} \ + -arch x86_64 \ + -fembed-bitcode \ + -isysroot \ + ${IPHONESIMULATOR_SYSROOT}" \ + LDFLAGS="-arch x86_64 \ + -fembed-bitcode \ + -mios-simulator-version-min=${MIN_SDK_VERSION} \ + ${LDFLAGS} \ + -L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \ + -L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \ + "LIBS=${LIBS}" + make -j"${JOB_COUNT}" + make install + + package_pb_library "iossim_x86_64" + ;; + + armv7) make distclean + ./configure \ + --host=armv7-apple-${OSX_VERSION} \ + --with-protoc="${PROTOC_PATH}" \ + --disable-shared \ + --prefix=${LIBDIR}/ios_arm7 \ + --exec-prefix=${LIBDIR}/ios_arm7 \ + "CFLAGS=${CFLAGS} \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + -arch armv7 \ + -fembed-bitcode \ + -isysroot ${IPHONEOS_SYSROOT}" \ + "CXX=${CXX}" \ + "CXXFLAGS=${CXXFLAGS} \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + -arch armv7 \ + -fembed-bitcode \ + -isysroot ${IPHONEOS_SYSROOT}" \ + LDFLAGS="-arch armv7 \ + -fembed-bitcode \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + ${LDFLAGS}" \ + "LIBS=${LIBS}" + make -j"${JOB_COUNT}" + make install + + package_pb_library "ios_arm7" + ;; + + armv7s) make distclean + ./configure \ + --host=armv7s-apple-${OSX_VERSION} \ + --with-protoc="${PROTOC_PATH}" \ + --disable-shared \ + --prefix=${LIBDIR}/ios_arm7s \ + --exec-prefix=${LIBDIR}/ios_arm7s \ + "CFLAGS=${CFLAGS} \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + -arch armv7s \ + -fembed-bitcode \ + -isysroot ${IPHONEOS_SYSROOT}" \ + "CXX=${CXX}" \ + "CXXFLAGS=${CXXFLAGS} \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + -arch armv7s \ + -fembed-bitcode \ + -isysroot ${IPHONEOS_SYSROOT}" \ + LDFLAGS="-arch armv7s \ + -fembed-bitcode \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + ${LDFLAGS}" \ + "LIBS=${LIBS}" + make -j"${JOB_COUNT}" + make install + + package_pb_library "ios_arm7s" + ;; + + arm64) make distclean + ./configure \ + --host=arm \ + --with-protoc="${PROTOC_PATH}" \ + --disable-shared \ + --prefix=${LIBDIR}/ios_arm64 \ + --exec-prefix=${LIBDIR}/ios_arm64 \ + "CFLAGS=${CFLAGS} \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + -arch arm64 \ + -fembed-bitcode \ + -isysroot ${IPHONEOS_SYSROOT}" \ + "CXXFLAGS=${CXXFLAGS} \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + -arch arm64 \ + -fembed-bitcode \ + -isysroot ${IPHONEOS_SYSROOT}" \ + LDFLAGS="-arch arm64 \ + -fembed-bitcode \ + -miphoneos-version-min=${MIN_SDK_VERSION} \ + ${LDFLAGS}" \ + "LIBS=${LIBS}" + make -j"${JOB_COUNT}" + make install + + package_pb_library "ios_arm64" + ;; + *) + echo "Unknown ARCH" + exit 1 + ;; +esac +} + +for build_element in "${build_targets[@]}" +do + echo "$build_element" + build_target "$build_element" +done + +file ${LIBDIR}/libprotobuf.a +file ${LIBDIR}/libprotobuf-lite.a +echo "Done building and packaging the libraries" diff --git a/tensorflow/contrib/makefile/compile_ios_tensorflow.sh b/tensorflow/contrib/makefile/compile_ios_tensorflow.sh index 5d1cc8b375..ae82163e11 100755 --- a/tensorflow/contrib/makefile/compile_ios_tensorflow.sh +++ b/tensorflow/contrib/makefile/compile_ios_tensorflow.sh @@ -43,55 +43,124 @@ then exit 1 fi +usage() { + echo "Usage: $(basename "$0") [-a]" + echo "-a [build_arch] build for specified arch comma separate for multiple archs (eg: x86_64,arm64)" + echo "default is [i386, x86_64, armv7, armv7s, arm64]" + exit 1 +} + +BUILD_TARGET="i386 x86_64 armv7 armv7s arm64" +while getopts "a:f:h:n:" opt_name; do + case "$opt_name" in + a) BUILD_TARGET="${OPTARG}";; + f) BUILD_OPT="${OPTARG}";; + h) NSYNC_HOST="${OPTARG}";; + n) NSYNC_TARGET="${OPTARG}";; + *) usage;; + esac +done +shift $((OPTIND - 1)) + +IFS=' ' read -r -a build_targets <<< "${BUILD_TARGET}" + +SCRIPT_DIR=$(cd `dirname $0` && pwd) +source "${SCRIPT_DIR}/build_helper.subr" + + GENDIR=tensorflow/contrib/makefile/gen/ LIBDIR=${GENDIR}lib LIB_PREFIX=libtensorflow-core -make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ -TARGET=IOS IOS_ARCH=ARMV7 LIB_NAME=${LIB_PREFIX}-armv7.a OPTFLAGS="$1" -if [ $? -ne 0 ] -then - echo "armv7 compilation failed." - exit 1 -fi +#remove any old artifacts +rm -rf ${LIBDIR}/${LIB_PREFIX}.a -make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ -TARGET=IOS IOS_ARCH=ARMV7S LIB_NAME=${LIB_PREFIX}-armv7s.a OPTFLAGS="$1" -if [ $? -ne 0 ] -then - echo "arm7vs compilation failed." - exit 1 -fi +package_tf_library() { + CAP_DIR=`echo $1 | tr 'a-z' 'A-Z'` + tf_libs="${LIBDIR}/ios_${CAP_DIR}/${LIB_PREFIX}-${1}.a" + if [ -f "${LIBDIR}/${LIB_PREFIX}.a" ]; then + tf_libs="$tf_libs ${LIBDIR}/${LIB_PREFIX}.a" + fi + lipo \ + $tf_libs \ + -create \ + -output ${LIBDIR}/${LIB_PREFIX}.a +} -make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ -TARGET=IOS IOS_ARCH=ARM64 LIB_NAME=${LIB_PREFIX}-arm64.a OPTFLAGS="$1" -if [ $? -ne 0 ] -then - echo "arm64 compilation failed." - exit 1 -fi +build_tf_target() { +case "$1" in + armv7) + make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ + TARGET=IOS IOS_ARCH=ARMV7 LIB_NAME=${LIB_PREFIX}-armv7.a \ + OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \ + TARGET_NSYNC_LIB="${NSYNC_TARGET}" + if [ $? -ne 0 ] + then + echo "armv7 compilation failed." + exit 1 + fi + package_tf_library "armv7" + ;; + armv7s) + make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ + TARGET=IOS IOS_ARCH=ARMV7S LIB_NAME=${LIB_PREFIX}-armv7s.a \ + OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \ + TARGET_NSYNC_LIB="${NSYNC_TARGET}" -make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ -TARGET=IOS IOS_ARCH=I386 LIB_NAME=${LIB_PREFIX}-i386.a OPTFLAGS="$1" -if [ $? -ne 0 ] -then - echo "i386 compilation failed." - exit 1 -fi + if [ $? -ne 0 ] + then + echo "arm7vs compilation failed." + exit 1 + fi + package_tf_library "armv7s" + ;; + arm64) + make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ + TARGET=IOS IOS_ARCH=ARM64 LIB_NAME=${LIB_PREFIX}-arm64.a \ + OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \ + TARGET_NSYNC_LIB="${NSYNC_TARGET}" + if [ $? -ne 0 ] + then + echo "arm64 compilation failed." + exit 1 + fi + package_tf_library "arm64" + ;; + i386) + make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ + TARGET=IOS IOS_ARCH=I386 LIB_NAME=${LIB_PREFIX}-i386.a \ + OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \ + TARGET_NSYNC_LIB="${NSYNC_TARGET}" + if [ $? -ne 0 ] + then + echo "i386 compilation failed." + exit 1 + fi + package_tf_library "i386" + ;; + x86_64) + make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ + TARGET=IOS IOS_ARCH=X86_64 LIB_NAME=${LIB_PREFIX}-x86_64.a \ + OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \ + TARGET_NSYNC_LIB="${NSYNC_TARGET}" + if [ $? -ne 0 ] + then + echo "x86_64 compilation failed." + exit 1 + fi + package_tf_library "x86_64" + ;; + *) + echo "Unknown ARCH" + exit 1 +esac +} -make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ -TARGET=IOS IOS_ARCH=X86_64 LIB_NAME=${LIB_PREFIX}-x86_64.a OPTFLAGS="$1" -if [ $? -ne 0 ] -then - echo "x86_64 compilation failed." - exit 1 -fi +for build_tf_element in "${build_targets[@]}" +do + echo "$build_tf_element" + build_tf_target "$build_tf_element" +done -lipo \ -${LIBDIR}/ios_ARMV7/${LIB_PREFIX}-armv7.a \ -${LIBDIR}/ios_ARMV7S/${LIB_PREFIX}-armv7s.a \ -${LIBDIR}/ios_ARM64/${LIB_PREFIX}-arm64.a \ -${LIBDIR}/ios_I386/${LIB_PREFIX}-i386.a \ -${LIBDIR}/ios_X86_64/${LIB_PREFIX}-x86_64.a \ --create \ --output ${LIBDIR}/${LIB_PREFIX}.a +echo "Done building and packaging TF" +file ${LIBDIR}/${LIB_PREFIX}.a diff --git a/tensorflow/contrib/makefile/compile_nsync.sh b/tensorflow/contrib/makefile/compile_nsync.sh index ecbd9bb825..930e6b8dea 100755 --- a/tensorflow/contrib/makefile/compile_nsync.sh +++ b/tensorflow/contrib/makefile/compile_nsync.sh @@ -265,7 +265,7 @@ for arch in $archs; do -I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/libs/'"$arch"'/include \ -I../../platform/c++11 -I../../platform/gcc \ -I../../platform/posix -pthread - PLATFORM_CFLAGS=-std=c++11 -Wno-narrowing '"$march_option"' -fPIE + PLATFORM_CFLAGS=-std=c++11 -Wno-narrowing '"$march_option"' -fPIE -fPIC PLATFORM_LDFLAGS=-pthread MKDEP=${CC} -M -std=c++11 PLATFORM_C=../../platform/c++11/src/nsync_semaphore_mutex.cc \ @@ -301,6 +301,9 @@ done case "$target_platform" in ios) nsync_platform_dir="$nsync_builds_dir/lipo.$target_platform.c++11" + if [ -d "$nsync_platform_dir" ]; then + rm -rf "$nsync_platform_dir" + fi mkdir "$nsync_platform_dir" eval lipo $platform_libs -create -output '$nsync_platform_dir/nsync.a' echo "$nsync_platform_dir/nsync.a" diff --git a/tensorflow/contrib/nn/__init__.py b/tensorflow/contrib/nn/__init__.py index 3bf795d19a..0bc133a00e 100644 --- a/tensorflow/contrib/nn/__init__.py +++ b/tensorflow/contrib/nn/__init__.py @@ -15,6 +15,7 @@ """Module for variants of ops in tf.nn. @@alpha_dropout +@@conv1d_transpose @@deprecated_flipped_softmax_cross_entropy_with_logits @@deprecated_flipped_sparse_softmax_cross_entropy_with_logits @@deprecated_flipped_sigmoid_cross_entropy_with_logits @@ -32,6 +33,7 @@ from tensorflow.contrib.nn.python.ops.alpha_dropout import * from tensorflow.contrib.nn.python.ops.cross_entropy import * from tensorflow.contrib.nn.python.ops.sampling_ops import * from tensorflow.contrib.nn.python.ops.scaled_softplus import * +from tensorflow.python.ops.nn_ops import conv1d_transpose from tensorflow.python.ops.nn_ops import nth_element # pylint: enable=unused-import,wildcard-import diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD index 8c46becf2c..a9a63cbce0 100644 --- a/tensorflow/contrib/opt/BUILD +++ b/tensorflow/contrib/opt/BUILD @@ -19,6 +19,7 @@ py_library( "python/training/external_optimizer.py", "python/training/lazy_adam_optimizer.py", "python/training/moving_average_optimizer.py", + "python/training/multitask_optimizer_wrapper.py", "python/training/nadam_optimizer.py", "python/training/powersign.py", "python/training/sign_decay.py", @@ -98,6 +99,23 @@ py_test( ], ) +py_test( + name = "multitask_optimizer_wrapper_test", + srcs = ["python/training/multitask_optimizer_wrapper_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":opt_py", + "//tensorflow/python:client", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:training", + "//tensorflow/python:variables", + "//third_party/py/numpy", + "@six_archive//:six", + ], +) + py_test( name = "lazy_adam_optimizer_test", srcs = ["python/training/lazy_adam_optimizer_test.py"], diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py index caf22536bb..04643a6058 100644 --- a/tensorflow/contrib/opt/__init__.py +++ b/tensorflow/contrib/opt/__init__.py @@ -24,7 +24,7 @@ from tensorflow.contrib.opt.python.training.drop_stale_gradient_optimizer import from tensorflow.contrib.opt.python.training.external_optimizer import * from tensorflow.contrib.opt.python.training.lazy_adam_optimizer import * from tensorflow.contrib.opt.python.training.moving_average_optimizer import * -from tensorflow.contrib.opt.python.training.nadam_optimizer import * +from tensorflow.contrib.opt.python.training.multitask_optimizer_wrapper import * from tensorflow.contrib.opt.python.training.nadam_optimizer import * from tensorflow.contrib.opt.python.training.powersign import * from tensorflow.contrib.opt.python.training.variable_clipping_optimizer import * @@ -34,11 +34,18 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ - 'PowerSignOptimizer', 'AddSignOptimizer' + 'PowerSignOptimizer', + 'AddSignOptimizer' 'DelayCompensatedGradientDescentOptimizer', - 'DropStaleGradientOptimizer', 'ExternalOptimizerInterface', - 'LazyAdamOptimizer', 'NadamOptimizer', 'MovingAverageOptimizer', - 'ScipyOptimizerInterface', 'VariableClippingOptimizer' + 'DropStaleGradientOptimizer', + 'ExternalOptimizerInterface', + 'LazyAdamOptimizer', + 'NadamOptimizer', + 'MovingAverageOptimizer', + 'ScipyOptimizerInterface', + 'VariableClippingOptimizer', + 'MultitaskOptimizerWrapper', + 'clip_gradients_by_global_norm', ] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py new file mode 100644 index 0000000000..cb6c77a86f --- /dev/null +++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py @@ -0,0 +1,140 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""An optimizer wrapper for stateful optimizers with multitask loss.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import types +import six + +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import clip_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.training import optimizer + +__all__ = ['MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm'] + + +def _is_all_zeros(grad): + all_zeros = math_ops.equal(math_ops.count_nonzero(grad), 0) + return all_zeros + + +def _get_wrapper(fn, opt): + + def wrapper(self, grad, *args, **kwargs): # pylint: disable=unused-argument + all_zeros = _is_all_zeros(grad) + return control_flow_ops.cond(all_zeros, control_flow_ops.no_op, + lambda: fn(grad, *args, **kwargs)) + + wrapper = types.MethodType(wrapper, opt) + return wrapper + + +class MultitaskOptimizerWrapper(object): + """Optimizer wrapper making all-zero gradients harmless. + + This might be useful when a multi-task loss is used, + and some components of the loss might be + not present (e.g. masked out) in some training batches. + Technically their gradient would be zero, + which would normally affect the optimizer state + (e.g. push running average to zero). + However this is not the desired behaviour, + since the missing loss component + should be treated as unknown rather than zero. + + This wrapper filters out all-zero gradient tensors, + therefore preserving the optimizer state. + + If gradient clipping by global norm is used, + the provided function clip_gradients_by_global_norm + should be used (and specified explicitly by the user). + Otherwise the global norm would be underestimated + because of all-zero tensors that should be ignored. + + The gradient calculation and application + are delegated to an underlying optimizer. + The gradient application is altered only for all-zero tensors. + + Example: + ```python + momentum_optimizer = tf.train.MomentumOptimizer( + learning_rate, momentum=0.9) + multitask_momentum_optimizer = tf.contrib.opt.MultitaskOptimizerWrapper( + momentum_optimizer) + gradvars = multitask_momentum_optimizer.compute_gradients( + loss) + gradvars_clipped, _ = tf.contrib.opt.clip_gradients_by_global_norm( + gradvars, 15.0) + train_op = multitask_momentum_optimizer.apply_gradients( + gradvars_clipped, global_step=batch) + ``` + """ + + def __init__(self, opt): + """Constructor. + + Args: + opt: an instance of a class that implements tf.train.Optimizer. + """ + if not isinstance(opt, optimizer.Optimizer): + raise TypeError( + 'Supplied optimizer must be an instance of tf.train.Optimizer') + self._opt = opt + overridden_methods = ('_apply_dense', '_resource_apply_dense', + '_apply_sparse', '_resource_apply_sparse') + for name in overridden_methods: + fn = getattr(self._opt, name) + wrapper = _get_wrapper(fn, self._opt) + setattr(self._opt, name, wrapper) + + def __getattr__(self, name): + return getattr(self._opt, name) + + +def clip_gradients_by_global_norm(gradients_variables, clip_norm=20.): + """Clips gradients of a multitask loss by their global norm. + + Ignores all-zero tensors when computing the global norm. + + Args: + gradients_variables: a list of pairs (gradient, variable). + clip_norm: a float Tensor, the global norm to clip on. Default is 20.0. + + Returns: + list: A list of pairs of the same type as gradients_variables,. + fixed_global_norm: A 0-D (scalar) Tensor representing the global norm. + """ + gradients, variables = six.moves.zip(*gradients_variables) + + def _replace_nonexisting_grad(grad): + if grad is None: + return grad + all_zeros = _is_all_zeros(grad) + return control_flow_ops.cond( + all_zeros, + lambda: array_ops.zeros([], dtype=dtypes.as_dtype(grad.dtype)), + lambda: grad) + + nonzero_gradients = [_replace_nonexisting_grad(g) for g in gradients] + fixed_global_norm = clip_ops.global_norm(nonzero_gradients) + gradients, _ = clip_ops.clip_by_global_norm( + gradients, clip_norm, use_norm=fixed_global_norm) + return list(six.moves.zip(gradients, variables)), fixed_global_norm diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py new file mode 100644 index 0000000000..618d8eb18d --- /dev/null +++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py @@ -0,0 +1,119 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for MultitaskOptimizerWrapper.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import six + +from tensorflow.contrib.opt.python.training import multitask_optimizer_wrapper +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import variables +from tensorflow.python.platform import test +from tensorflow.python.training import momentum + + +class MultitaskOptimizerWrapperTest(test.TestCase): + """Tests for the multitask optimizer wrapper. + """ + + def testWrapper(self): + with self.test_session(): + var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32) + var1 = variables.Variable([3.0, 4.0], dtype=dtypes.float32) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtypes.float32) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtypes.float32) + grads_allzero = constant_op.constant([0.0, 0.0], dtype=dtypes.float32) + mom_opt_impl = momentum.MomentumOptimizer(learning_rate=2.0, momentum=0.9) + mom_opt = multitask_optimizer_wrapper.MultitaskOptimizerWrapper( + mom_opt_impl) + mom_update = mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + mom_update_partial = mom_opt.apply_gradients( + zip([grads_allzero, grads1], [var0, var1])) + mom_update_no_action = mom_opt.apply_gradients( + zip([grads_allzero, grads_allzero], [var0, var1])) + self.evaluate(variables.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + self.assertEqual(["momentum"], mom_opt.get_slot_names()) + slot0 = mom_opt.get_slot(var0, "momentum") + self.assertEquals(slot0.get_shape(), var0.get_shape()) + slot1 = mom_opt.get_slot(var1, "momentum") + self.assertEquals(slot1.get_shape(), var1.get_shape()) + + # Step 1: normal momentum update. + self.evaluate(mom_update) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array([0.1, 0.1]), self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([0.01, 0.01]), self.evaluate(slot1)) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), self.evaluate(var0)) + self.assertAllCloseAccordingToType( + np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), + self.evaluate(var1)) + + # Step 2: momentum update that changes only slot1 but not slot0. + self.evaluate(mom_update_partial) + # Check that only the relevant momentum accumulator has been updated. + self.assertAllCloseAccordingToType( + np.array([0.1, 0.1]), self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), + self.evaluate(slot1)) + + # Step 3: momentum update that does not change anything. + self.evaluate(mom_update_no_action) + # Check that the momentum accumulators have *NOT* been updated. + self.assertAllCloseAccordingToType( + np.array([0.1, 0.1]), self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), + self.evaluate(slot1)) + + def testGradientClipping(self): + with self.test_session(): + var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32) + var1 = variables.Variable([3.0, 4.0], dtype=dtypes.float32) + var2 = variables.Variable([3.0, 4.0], dtype=dtypes.float32) + var3 = variables.Variable([3.0, 4.0], dtype=dtypes.float32) + grads0 = constant_op.constant([10.0, 15.0], dtype=dtypes.float32) + grads1 = constant_op.constant([0.0, 5.0], dtype=dtypes.float32) + grads2 = constant_op.constant([0.0, 0.0], dtype=dtypes.float32) + grads3 = None + varlist = [var0, var1, var2, var3] + gradients = [grads0, grads1, grads2, grads3] + clipped_gradvars, global_norm = ( + multitask_optimizer_wrapper.clip_gradients_by_global_norm( + six.moves.zip(gradients, varlist), clip_norm=1.0)) + clipped_grads = list(six.moves.zip(*clipped_gradvars))[0] + reference_global_norm = np.sqrt(np.sum(np.square([10.0, 15.0, 0.0, 5.0]))) + self.assertAllCloseAccordingToType( + self.evaluate(global_norm), reference_global_norm) + self.assertAllCloseAccordingToType( + self.evaluate(clipped_grads[2]), np.array([0., 0.])) + self.assertEqual(clipped_grads[3], None) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py index 909c6aba2b..f130a2187c 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py @@ -24,6 +24,7 @@ import numpy as np from tensorflow.contrib import rnn as contrib_rnn from tensorflow.contrib.rnn.python.ops import core_rnn_cell +from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell from tensorflow.core.protobuf import config_pb2 from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -358,6 +359,46 @@ class RNNCellTest(test.TestCase): self.assertEquals(variables[2].op.name, "root/lstm_cell/projection/kernel") + def testLSTMCellLayerNorm(self): + with self.test_session() as sess: + num_units = 2 + num_proj = 3 + batch_size = 1 + input_size = 4 + with variable_scope.variable_scope( + "root", initializer=init_ops.constant_initializer(0.5)): + x = array_ops.zeros([batch_size, input_size]) + c = array_ops.zeros([batch_size, num_units]) + h = array_ops.zeros([batch_size, num_proj]) + state = rnn_cell_impl.LSTMStateTuple(c, h) + cell = contrib_rnn_cell.LayerNormLSTMCell( + num_units=num_units, + num_proj=num_proj, + forget_bias=1.0, + layer_norm=True, + norm_gain=1.0, + norm_shift=0.0) + g, out_m = cell(x, state) + sess.run([variables_lib.global_variables_initializer()]) + res = sess.run( + [g, out_m], { + x.name: np.ones((batch_size, input_size)), + c.name: 0.1 * np.ones((batch_size, num_units)), + h.name: 0.1 * np.ones((batch_size, num_proj)) + }) + self.assertEqual(len(res), 2) + # The numbers in results were not calculated, this is mostly just a + # smoke test. + self.assertEqual(res[0].shape, (batch_size, num_proj)) + self.assertEqual(res[1][0].shape, (batch_size, num_units)) + self.assertEqual(res[1][1].shape, (batch_size, num_proj)) + # Different inputs so different outputs and states + for i in range(1, batch_size): + self.assertTrue( + float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6) + self.assertTrue( + float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6) + def testOutputProjectionWrapper(self): with self.test_session() as sess: with variable_scope.variable_scope( diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py index ebd4564f12..46823fa364 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py @@ -37,6 +37,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import rnn from tensorflow.python.ops import rnn_cell +from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -995,26 +996,19 @@ class RNNCellTest(test.TestCase): output, state = cell(x, hidden) sess.run([variables.global_variables_initializer()]) - res = sess.run([output, state], { - hidden[0].name: - np.array([[[[[1.],[1.]], - [[1.],[1.]]], - [[[1.],[1.]], - [[1.],[1.]]]], - [[[[2.],[2.]], - [[2.],[2.]]], - [[[2.],[2.]], - [[2.],[2.]]]]]), - x.name: - np.array([[[[[1.],[1.]], - [[1.],[1.]]], - [[[1.],[1.]], - [[1.],[1.]]]], - [[[[2.],[2.]], - [[2.],[2.]]], - [[[2.],[2.]], - [[2.],[2.]]]]]) - }) + res = sess.run( + [output, state], { + hidden[0].name: + np.array([[[[[1.], [1.]], [[1.], [1.]]], [[[1.], [1.]], [[ + 1. + ], [1.]]]], [[[[2.], [2.]], [[2.], [2.]]], + [[[2.], [2.]], [[2.], [2.]]]]]), + x.name: + np.array([[[[[1.], [1.]], [[1.], [1.]]], [[[1.], [1.]], [[ + 1. + ], [1.]]]], [[[[2.], [2.]], [[2.], [2.]]], [[[2.], [2.]], + [[2.], [2.]]]]]) + }) # This is a smoke test, making sure expected values are unchanged. self.assertEqual(len(res), 2) self.assertAllClose(res[0], res[1].h) @@ -1275,6 +1269,47 @@ class LayerNormBasicLSTMCellTest(test.TestCase): self.assertAllClose(res[2].c, expected_c1, 1e-5) self.assertAllClose(res[2].h, expected_h1, 1e-5) + def testBasicLSTMCellWithStateTupleLayerNorm(self): + """The results of LSTMCell and LayerNormBasicLSTMCell should be the same.""" + with self.test_session() as sess: + with variable_scope.variable_scope( + "root", initializer=init_ops.constant_initializer(0.5)): + x = array_ops.zeros([1, 2]) + c0 = array_ops.zeros([1, 2]) + h0 = array_ops.zeros([1, 2]) + state0 = rnn_cell_impl.LSTMStateTuple(c0, h0) + c1 = array_ops.zeros([1, 2]) + h1 = array_ops.zeros([1, 2]) + state1 = rnn_cell_impl.LSTMStateTuple(c1, h1) + cell = rnn_cell_impl.MultiRNNCell([ + contrib_rnn_cell.LayerNormLSTMCell( + 2, layer_norm=True, norm_gain=1.0, norm_shift=0.0) + for _ in range(2) + ]) + h, (s0, s1) = cell(x, (state0, state1)) + sess.run([variables.global_variables_initializer()]) + res = sess.run( + [h, s0, s1], { + x.name: np.array([[1., 1.]]), + c0.name: 0.1 * np.asarray([[0, 1]]), + h0.name: 0.1 * np.asarray([[2, 3]]), + c1.name: 0.1 * np.asarray([[4, 5]]), + h1.name: 0.1 * np.asarray([[6, 7]]), + }) + + expected_h = np.array([[-0.38079708, 0.38079708]]) + expected_h0 = np.array([[-0.38079708, 0.38079708]]) + expected_c0 = np.array([[-1.0, 1.0]]) + expected_h1 = np.array([[-0.38079708, 0.38079708]]) + expected_c1 = np.array([[-1.0, 1.0]]) + + self.assertEqual(len(res), 3) + self.assertAllClose(res[0], expected_h, 1e-5) + self.assertAllClose(res[1].c, expected_c0, 1e-5) + self.assertAllClose(res[1].h, expected_h0, 1e-5) + self.assertAllClose(res[2].c, expected_c1, 1e-5) + self.assertAllClose(res[2].h, expected_h1, 1e-5) + def testBasicLSTMCellWithDropout(self): def _is_close(x, y, digits=4): diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index d4691f2c27..0698d40438 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -76,6 +76,18 @@ def _get_sharded_variable(name, shape, dtype, num_shards): return shards +def _norm(g, b, inp, scope): + shape = inp.get_shape()[-1:] + gamma_init = init_ops.constant_initializer(g) + beta_init = init_ops.constant_initializer(b) + with vs.variable_scope(scope): + # Initialize beta and gamma for use by layer_norm. + vs.get_variable("gamma", shape=shape, initializer=gamma_init) + vs.get_variable("beta", shape=shape, initializer=beta_init) + normalized = layers.layer_norm(inp, reuse=True, scope=scope) + return normalized + + class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): """Long short-term memory unit (LSTM) recurrent network cell. @@ -102,13 +114,33 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): The class uses optional peep-hole connections, and an optional projection layer. + + Layer normalization implementation is based on: + + https://arxiv.org/abs/1607.06450. + + "Layer Normalization" + Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton + + and is applied before the internal nonlinearities. + """ - def __init__(self, num_units, use_peepholes=False, - initializer=None, num_proj=None, proj_clip=None, - num_unit_shards=1, num_proj_shards=1, - forget_bias=1.0, state_is_tuple=True, - activation=math_ops.tanh, reuse=None): + def __init__(self, + num_units, + use_peepholes=False, + initializer=None, + num_proj=None, + proj_clip=None, + num_unit_shards=1, + num_proj_shards=1, + forget_bias=1.0, + state_is_tuple=True, + activation=math_ops.tanh, + reuse=None, + layer_norm=False, + norm_gain=1.0, + norm_shift=0.0): """Initialize the parameters for an LSTM cell. Args: @@ -135,6 +167,11 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. + layer_norm: If `True`, layer normalization will be applied. + norm_gain: float, The layer normalization gain initial value. If + `layer_norm` has been set to `False`, this argument will be ignored. + norm_shift: float, The layer normalization shift initial value. If + `layer_norm` has been set to `False`, this argument will be ignored. """ super(CoupledInputForgetGateLSTMCell, self).__init__(_reuse=reuse) if not state_is_tuple: @@ -152,6 +189,9 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): self._state_is_tuple = state_is_tuple self._activation = activation self._reuse = reuse + self._layer_norm = layer_norm + self._norm_gain = norm_gain + self._norm_shift = norm_shift if num_proj: self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_proj) @@ -220,9 +260,20 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): # j = new_input, f = forget_gate, o = output_gate cell_inputs = array_ops.concat([inputs, m_prev], 1) - lstm_matrix = nn_ops.bias_add(math_ops.matmul(cell_inputs, concat_w), b) + lstm_matrix = math_ops.matmul(cell_inputs, concat_w) + + # If layer nomalization is applied, do not add bias + if not self._layer_norm: + lstm_matrix = nn_ops.bias_add(lstm_matrix, b) + j, f, o = array_ops.split(value=lstm_matrix, num_or_size_splits=3, axis=1) + # Apply layer normalization + if self._layer_norm: + j = _norm(self._norm_gain, self._norm_shift, j, "transform") + f = _norm(self._norm_gain, self._norm_shift, f, "forget") + o = _norm(self._norm_gain, self._norm_shift, o, "output") + # Diagonal connections if self._use_peepholes: w_f_diag = vs.get_variable( @@ -236,6 +287,10 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): f_act = sigmoid(f + self._forget_bias) c = (f_act * c_prev + (1 - f_act) * self._activation(j)) + # Apply layer normalization + if self._layer_norm: + c = _norm(self._norm_gain, self._norm_shift, c, "state") + if self._use_peepholes: m = sigmoid(o + w_o_diag * c) * self._activation(c) else: @@ -1301,8 +1356,8 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell): self._keep_prob = dropout_keep_prob self._seed = dropout_prob_seed self._layer_norm = layer_norm - self._g = norm_gain - self._b = norm_shift + self._norm_gain = norm_gain + self._norm_shift = norm_shift self._reuse = reuse @property @@ -1313,24 +1368,25 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell): def output_size(self): return self._num_units - def _norm(self, inp, scope): + def _norm(self, inp, scope, dtype=dtypes.float32): shape = inp.get_shape()[-1:] - gamma_init = init_ops.constant_initializer(self._g) - beta_init = init_ops.constant_initializer(self._b) + gamma_init = init_ops.constant_initializer(self._norm_gain) + beta_init = init_ops.constant_initializer(self._norm_shift) with vs.variable_scope(scope): # Initialize beta and gamma for use by layer_norm. - vs.get_variable("gamma", shape=shape, initializer=gamma_init) - vs.get_variable("beta", shape=shape, initializer=beta_init) + vs.get_variable("gamma", shape=shape, initializer=gamma_init, dtype=dtype) + vs.get_variable("beta", shape=shape, initializer=beta_init, dtype=dtype) normalized = layers.layer_norm(inp, reuse=True, scope=scope) return normalized def _linear(self, args): out_size = 4 * self._num_units proj_size = args.get_shape()[-1] - weights = vs.get_variable("kernel", [proj_size, out_size]) + dtype = args.dtype + weights = vs.get_variable("kernel", [proj_size, out_size], dtype=dtype) out = math_ops.matmul(args, weights) if not self._layer_norm: - bias = vs.get_variable("bias", [out_size]) + bias = vs.get_variable("bias", [out_size], dtype=dtype) out = nn_ops.bias_add(out, bias) return out @@ -1339,13 +1395,14 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell): c, h = state args = array_ops.concat([inputs, h], 1) concat = self._linear(args) + dtype = args.dtype i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1) if self._layer_norm: - i = self._norm(i, "input") - j = self._norm(j, "transform") - f = self._norm(f, "forget") - o = self._norm(o, "output") + i = self._norm(i, "input", dtype=dtype) + j = self._norm(j, "transform", dtype=dtype) + f = self._norm(f, "forget", dtype=dtype) + o = self._norm(o, "output", dtype=dtype) g = self._activation(j) if (not isinstance(self._keep_prob, float)) or self._keep_prob < 1: @@ -1354,7 +1411,7 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell): new_c = (c * math_ops.sigmoid(f + self._forget_bias) + math_ops.sigmoid(i) * g) if self._layer_norm: - new_c = self._norm(new_c, "state") + new_c = self._norm(new_c, "state", dtype=dtype) new_h = self._activation(new_c) * math_ops.sigmoid(o) new_state = rnn_cell_impl.LSTMStateTuple(new_c, new_h) @@ -1998,8 +2055,8 @@ class ConvLSTMCell(rnn_cell_impl.RNNCell): if self._skip_connection: self._total_output_channels += self._input_shape[-1] - state_size = tensor_shape.TensorShape(self._input_shape[:-1] - + [self._output_channels]) + state_size = tensor_shape.TensorShape( + self._input_shape[:-1] + [self._output_channels]) self._state_size = rnn_cell_impl.LSTMStateTuple(state_size, state_size) self._output_size = tensor_shape.TensorShape(self._input_shape[:-1] + [self._total_output_channels]) @@ -2059,11 +2116,8 @@ class Conv3DLSTMCell(ConvLSTMCell): """Construct Conv3DLSTM. See `ConvLSTMCell` for more details.""" super(Conv3DLSTMCell, self).__init__(conv_ndims=3, **kwargs) -def _conv(args, - filter_size, - num_features, - bias, - bias_start=0.0): + +def _conv(args, filter_size, num_features, bias, bias_start=0.0): """convolution: Args: args: a Tensor or a list of Tensors of dimension 3D, 4D or 5D, @@ -2306,3 +2360,273 @@ class GLSTMCell(rnn_cell_impl.RNNCell): new_state = rnn_cell_impl.LSTMStateTuple(c, m) return m, new_state + + +class LayerNormLSTMCell(rnn_cell_impl.RNNCell): + """Long short-term memory unit (LSTM) recurrent network cell. + + The default non-peephole implementation is based on: + + http://www.bioinf.jku.at/publications/older/2604.pdf + + S. Hochreiter and J. Schmidhuber. + "Long Short-Term Memory". Neural Computation, 9(8):1735-1780, 1997. + + The peephole implementation is based on: + + https://research.google.com/pubs/archive/43905.pdf + + Hasim Sak, Andrew Senior, and Francoise Beaufays. + "Long short-term memory recurrent neural network architectures for + large scale acoustic modeling." INTERSPEECH, 2014. + + The class uses optional peep-hole connections, optional cell clipping, and + an optional projection layer. + + Layer normalization implementation is based on: + + https://arxiv.org/abs/1607.06450. + + "Layer Normalization" + Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton + + and is applied before the internal nonlinearities. + + """ + + def __init__(self, + num_units, + use_peepholes=False, + cell_clip=None, + initializer=None, + num_proj=None, + proj_clip=None, + forget_bias=1.0, + activation=None, + layer_norm=False, + norm_gain=1.0, + norm_shift=0.0, + reuse=None): + """Initialize the parameters for an LSTM cell. + + Args: + num_units: int, The number of units in the LSTM cell + use_peepholes: bool, set True to enable diagonal/peephole connections. + cell_clip: (optional) A float value, if provided the cell state is clipped + by this value prior to the cell output activation. + initializer: (optional) The initializer to use for the weight and + projection matrices. + num_proj: (optional) int, The output dimensionality for the projection + matrices. If None, no projection is performed. + proj_clip: (optional) A float value. If `num_proj > 0` and `proj_clip` is + provided, then the projected values are clipped elementwise to within + `[-proj_clip, proj_clip]`. + forget_bias: Biases of the forget gate are initialized by default to 1 + in order to reduce the scale of forgetting at the beginning of + the training. Must set it manually to `0.0` when restoring from + CudnnLSTM trained checkpoints. + activation: Activation function of the inner states. Default: `tanh`. + layer_norm: If `True`, layer normalization will be applied. + norm_gain: float, The layer normalization gain initial value. If + `layer_norm` has been set to `False`, this argument will be ignored. + norm_shift: float, The layer normalization shift initial value. If + `layer_norm` has been set to `False`, this argument will be ignored. + reuse: (optional) Python boolean describing whether to reuse variables + in an existing scope. If not `True`, and the existing scope already has + the given variables, an error is raised. + + When restoring from CudnnLSTM-trained checkpoints, must use + CudnnCompatibleLSTMCell instead. + """ + super(LayerNormLSTMCell, self).__init__(_reuse=reuse) + + self._num_units = num_units + self._use_peepholes = use_peepholes + self._cell_clip = cell_clip + self._initializer = initializer + self._num_proj = num_proj + self._proj_clip = proj_clip + self._forget_bias = forget_bias + self._activation = activation or math_ops.tanh + self._layer_norm = layer_norm + self._norm_gain = norm_gain + self._norm_shift = norm_shift + + if num_proj: + self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_proj)) + self._output_size = num_proj + else: + self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_units)) + self._output_size = num_units + + @property + def state_size(self): + return self._state_size + + @property + def output_size(self): + return self._output_size + + def _linear(self, + args, + output_size, + bias, + bias_initializer=None, + kernel_initializer=None, + layer_norm=False): + """Linear map: sum_i(args[i] * W[i]), where W[i] is a Variable. + + Args: + args: a 2D Tensor or a list of 2D, batch x n, Tensors. + output_size: int, second dimension of W[i]. + bias: boolean, whether to add a bias term or not. + bias_initializer: starting value to initialize the bias + (default is all zeros). + kernel_initializer: starting value to initialize the weight. + layer_norm: boolean, whether to apply layer normalization. + + + Returns: + A 2D Tensor with shape [batch x output_size] taking value + sum_i(args[i] * W[i]), where each W[i] is a newly created Variable. + + Raises: + ValueError: if some of the arguments has unspecified or wrong shape. + """ + if args is None or (nest.is_sequence(args) and not args): + raise ValueError("`args` must be specified") + if not nest.is_sequence(args): + args = [args] + + # Calculate the total size of arguments on dimension 1. + total_arg_size = 0 + shapes = [a.get_shape() for a in args] + for shape in shapes: + if shape.ndims != 2: + raise ValueError("linear is expecting 2D arguments: %s" % shapes) + if shape[1].value is None: + raise ValueError("linear expects shape[1] to be provided for shape %s, " + "but saw %s" % (shape, shape[1])) + else: + total_arg_size += shape[1].value + + dtype = [a.dtype for a in args][0] + + # Now the computation. + scope = vs.get_variable_scope() + with vs.variable_scope(scope) as outer_scope: + weights = vs.get_variable( + "kernel", [total_arg_size, output_size], + dtype=dtype, + initializer=kernel_initializer) + if len(args) == 1: + res = math_ops.matmul(args[0], weights) + else: + res = math_ops.matmul(array_ops.concat(args, 1), weights) + if not bias: + return res + with vs.variable_scope(outer_scope) as inner_scope: + inner_scope.set_partitioner(None) + if bias_initializer is None: + bias_initializer = init_ops.constant_initializer(0.0, dtype=dtype) + biases = vs.get_variable( + "bias", [output_size], dtype=dtype, initializer=bias_initializer) + + if not layer_norm: + res = nn_ops.bias_add(res, biases) + + return res + + def call(self, inputs, state): + """Run one step of LSTM. + + Args: + inputs: input Tensor, 2D, batch x num_units. + state: this must be a tuple of state Tensors, + both `2-D`, with column sizes `c_state` and + `m_state`. + + Returns: + A tuple containing: + + - A `2-D, [batch x output_dim]`, Tensor representing the output of the + LSTM after reading `inputs` when previous state was `state`. + Here output_dim is: + num_proj if num_proj was set, + num_units otherwise. + - Tensor(s) representing the new state of LSTM after reading `inputs` when + the previous state was `state`. Same type and shape(s) as `state`. + + Raises: + ValueError: If input size cannot be inferred from inputs via + static shape inference. + """ + sigmoid = math_ops.sigmoid + + (c_prev, m_prev) = state + + dtype = inputs.dtype + input_size = inputs.get_shape().with_rank(2)[1] + if input_size.value is None: + raise ValueError("Could not infer input size from inputs.get_shape()[-1]") + scope = vs.get_variable_scope() + with vs.variable_scope(scope, initializer=self._initializer) as unit_scope: + + # i = input_gate, j = new_input, f = forget_gate, o = output_gate + lstm_matrix = self._linear( + [inputs, m_prev], + 4 * self._num_units, + bias=True, + bias_initializer=None, + layer_norm=self._layer_norm) + i, j, f, o = array_ops.split( + value=lstm_matrix, num_or_size_splits=4, axis=1) + + if self._layer_norm: + i = _norm(self._norm_gain, self._norm_shift, i, "input") + j = _norm(self._norm_gain, self._norm_shift, j, "transform") + f = _norm(self._norm_gain, self._norm_shift, f, "forget") + o = _norm(self._norm_gain, self._norm_shift, o, "output") + + # Diagonal connections + if self._use_peepholes: + with vs.variable_scope(unit_scope): + w_f_diag = vs.get_variable( + "w_f_diag", shape=[self._num_units], dtype=dtype) + w_i_diag = vs.get_variable( + "w_i_diag", shape=[self._num_units], dtype=dtype) + w_o_diag = vs.get_variable( + "w_o_diag", shape=[self._num_units], dtype=dtype) + + if self._use_peepholes: + c = ( + sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev + + sigmoid(i + w_i_diag * c_prev) * self._activation(j)) + else: + c = ( + sigmoid(f + self._forget_bias) * c_prev + + sigmoid(i) * self._activation(j)) + + if self._layer_norm: + c = _norm(self._norm_gain, self._norm_shift, c, "state") + + if self._cell_clip is not None: + # pylint: disable=invalid-unary-operand-type + c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip) + # pylint: enable=invalid-unary-operand-type + if self._use_peepholes: + m = sigmoid(o + w_o_diag * c) * self._activation(c) + else: + m = sigmoid(o) * self._activation(c) + + if self._num_proj is not None: + with vs.variable_scope("projection"): + m = self._linear(m, self._num_proj, bias=False) + + if self._proj_clip is not None: + # pylint: disable=invalid-unary-operand-type + m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip) + # pylint: enable=invalid-unary-operand-type + + new_state = (rnn_cell_impl.LSTMStateTuple(c, m)) + return m, new_state diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index 87230e3355..e87ef41388 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -149,7 +149,7 @@ class _BaseAttentionMechanism(AttentionMechanism): memory_sequence_length=None, memory_layer=None, check_inner_dims_defined=True, - score_mask_value=float("-inf"), + score_mask_value=None, name=None): """Construct base AttentionMechanism class. @@ -187,9 +187,13 @@ class _BaseAttentionMechanism(AttentionMechanism): "memory_layer is not a Layer: %s" % type(memory_layer).__name__) self._query_layer = query_layer self._memory_layer = memory_layer + self.dtype = memory_layer.dtype if not callable(probability_fn): raise TypeError("probability_fn must be callable, saw type: %s" % type(probability_fn).__name__) + if score_mask_value is None: + score_mask_value = dtypes.as_dtype( + self._memory_layer.dtype).as_numpy_dtype(-np.inf) self._probability_fn = lambda score, prev: ( # pylint:disable=g-long-lambda probability_fn( _maybe_mask_score(score, memory_sequence_length, score_mask_value), @@ -334,7 +338,8 @@ class LuongAttention(_BaseAttentionMechanism): memory_sequence_length=None, scale=False, probability_fn=None, - score_mask_value=float("-inf"), + score_mask_value=None, + dtype=None, name="LuongAttention"): """Construct the AttentionMechanism mechanism. @@ -353,17 +358,20 @@ class LuongAttention(_BaseAttentionMechanism): score_mask_value: (optional) The mask value for score before passing into `probability_fn`. The default is -inf. Only used if `memory_sequence_length` is not None. + dtype: The data type for the memory layer of the attention mechanism. name: Name to use when creating ops. """ # For LuongAttention, we only transform the memory layer; thus # num_units **must** match expected the query depth. if probability_fn is None: probability_fn = nn_ops.softmax + if dtype is None: + dtype = dtypes.float32 wrapped_probability_fn = lambda score, _: probability_fn(score) super(LuongAttention, self).__init__( query_layer=None, memory_layer=layers_core.Dense( - num_units, name="memory_layer", use_bias=False), + num_units, name="memory_layer", use_bias=False, dtype=dtype), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, @@ -475,7 +483,8 @@ class BahdanauAttention(_BaseAttentionMechanism): memory_sequence_length=None, normalize=False, probability_fn=None, - score_mask_value=float("-inf"), + score_mask_value=None, + dtype=None, name="BahdanauAttention"): """Construct the Attention mechanism. @@ -494,16 +503,20 @@ class BahdanauAttention(_BaseAttentionMechanism): score_mask_value: (optional): The mask value for score before passing into `probability_fn`. The default is -inf. Only used if `memory_sequence_length` is not None. + dtype: The data type for the query and memory layers of the attention + mechanism. name: Name to use when creating ops. """ if probability_fn is None: probability_fn = nn_ops.softmax + if dtype is None: + dtype = dtypes.float32 wrapped_probability_fn = lambda score, _: probability_fn(score) super(BahdanauAttention, self).__init__( query_layer=layers_core.Dense( - num_units, name="query_layer", use_bias=False), + num_units, name="query_layer", use_bias=False, dtype=dtype), memory_layer=layers_core.Dense( - num_units, name="memory_layer", use_bias=False), + num_units, name="memory_layer", use_bias=False, dtype=dtype), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, @@ -738,11 +751,12 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism): memory, memory_sequence_length=None, normalize=False, - score_mask_value=float("-inf"), + score_mask_value=None, sigmoid_noise=0., sigmoid_noise_seed=None, score_bias_init=0., mode="parallel", + dtype=None, name="BahdanauMonotonicAttention"): """Construct the Attention mechanism. @@ -766,17 +780,21 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism): mode: How to compute the attention distribution. Must be one of 'recursive', 'parallel', or 'hard'. See the docstring for `tf.contrib.seq2seq.monotonic_attention` for more information. + dtype: The data type for the query and memory layers of the attention + mechanism. name: Name to use when creating ops. """ # Set up the monotonic probability fn with supplied parameters + if dtype is None: + dtype = dtypes.float32 wrapped_probability_fn = functools.partial( _monotonic_probability_fn, sigmoid_noise=sigmoid_noise, mode=mode, seed=sigmoid_noise_seed) super(BahdanauMonotonicAttention, self).__init__( query_layer=layers_core.Dense( - num_units, name="query_layer", use_bias=False), + num_units, name="query_layer", use_bias=False, dtype=dtype), memory_layer=layers_core.Dense( - num_units, name="memory_layer", use_bias=False), + num_units, name="memory_layer", use_bias=False, dtype=dtype), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, @@ -834,11 +852,12 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism): memory, memory_sequence_length=None, scale=False, - score_mask_value=float("-inf"), + score_mask_value=None, sigmoid_noise=0., sigmoid_noise_seed=None, score_bias_init=0., mode="parallel", + dtype=None, name="LuongMonotonicAttention"): """Construct the Attention mechanism. @@ -862,17 +881,21 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism): mode: How to compute the attention distribution. Must be one of 'recursive', 'parallel', or 'hard'. See the docstring for `tf.contrib.seq2seq.monotonic_attention` for more information. + dtype: The data type for the query and memory layers of the attention + mechanism. name: Name to use when creating ops. """ # Set up the monotonic probability fn with supplied parameters + if dtype is None: + dtype = dtypes.float32 wrapped_probability_fn = functools.partial( _monotonic_probability_fn, sigmoid_noise=sigmoid_noise, mode=mode, seed=sigmoid_noise_seed) super(LuongMonotonicAttention, self).__init__( query_layer=layers_core.Dense( - num_units, name="query_layer", use_bias=False), + num_units, name="query_layer", use_bias=False, dtype=dtype), memory_layer=layers_core.Dense( - num_units, name="memory_layer", use_bias=False), + num_units, name="memory_layer", use_bias=False, dtype=dtype), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, @@ -1123,8 +1146,11 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): % (len(attention_layer_sizes), len(attention_mechanisms))) self._attention_layers = tuple( layers_core.Dense( - attention_layer_size, name="attention_layer", use_bias=False) - for attention_layer_size in attention_layer_sizes) + attention_layer_size, + name="attention_layer", + use_bias=False, + dtype=attention_mechanisms[i].dtype) + for i, attention_layer_size in enumerate(attention_layer_sizes)) self._attention_layer_size = sum(attention_layer_sizes) else: self._attention_layers = None diff --git a/tensorflow/contrib/slim/README.md b/tensorflow/contrib/slim/README.md index 0bfd0801d5..f7a85557ca 100644 --- a/tensorflow/contrib/slim/README.md +++ b/tensorflow/contrib/slim/README.md @@ -237,7 +237,7 @@ One way to reduce this code duplication would be via a `for` loop: ```python net = ... for i in range(3): - net = slim.conv2d(net, 256, [3, 3], scope='conv3_' % (i+1)) + net = slim.conv2d(net, 256, [3, 3], scope='conv3_%d' % (i+1)) net = slim.max_pool2d(net, [2, 2], scope='pool2') ``` diff --git a/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py b/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py index b4fd2580c2..576444214d 100644 --- a/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py +++ b/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py @@ -386,7 +386,7 @@ class ResnetCompleteNetworkTest(test.TestCase): inputs, None, is_training=False, global_pool=False) sess.run(variables.global_variables_initializer()) self.assertAllClose( - output.eval(), expected.eval(), atol=1e-4, rtol=1e-4) + output.eval(), expected.eval(), atol=2e-4, rtol=1e-4) def testUnknownBatchSize(self): batch = 2 diff --git a/tensorflow/contrib/verbs/README.md b/tensorflow/contrib/verbs/README.md index da5f2b0223..dcb390b0a5 100644 --- a/tensorflow/contrib/verbs/README.md +++ b/tensorflow/contrib/verbs/README.md @@ -1,4 +1,4 @@ -## How to compile and use RDMA-enabled TensorFlow +## How to compile, use and configure RDMA-enabled TensorFlow 1. Follow the regular TF compilation instructions. During configure step, if you want ibverbs based RDMA support, answer yes to this question: ```Do you wish to build TensorFlow with VERBS-RDMA support [y/N]``` @@ -7,6 +7,18 @@ ```server = tf.train.Server(cluster, job_name="local", task_index=0, protocol='grpc+verbs') # default protocol is 'grpc'``` +3. RDMA configuration is done by setting the following environment variables: + * **RDMA_DEVICE**: The RDMA device name to be used. If not defined by user, a default device with an active port will be set if exists. + * **RDMA_DEVICE_PORT**: The port within the selected device. Not relevant if RDMA_DEVICE is not defined. If not defined by user, a default active port will be set if exists. + * **RDMA_GID_INDEX**: The GID index of the port. If not defined by user, a default suitable GID index will be set (RoCEV2 is favourable as default). + * **RDMA_QP_PKEY_INDEX**: The Pkey for the QP. If not defined by user, the default value is 0. + * **RDMA_QP_QUEUE_DEPTH**: TX/RX queue size for the QP. If not defined by user, the default value is 1024. + * **RDMA_QP_TIMEOUT**: The retransmission timeout for QPs. If not defined by user, the default value is 14. + * **RDMA_QP_RETRY_COUNT**: Number of retransmission for QPs. If not defined by user, the default value is 7. + * **RDMA_QP_SL**: Service level configuration for QOS and ECN, valid values are 0-7. If not defined by user, the default value is 0. + * **RDMA_QP_MTU**: MTU configuration for the QPs. If not defined by user, the default value is active MTU from query_port. + * **RDMA_TRAFFIC_CLASS**: Traffic class configuration for QP, in case of DSCP trust level QoS configuration. If not defined by user, the default value is 0. For more info see [HowTo Configure Trust state on Mellanox Adapters](https://community.mellanox.com/docs/DOC-2866). + ## Overview The design is based on TensorFlow r1.0. An RDMA path is added between servers for tensor transfer (weights, gradients, etc). The existing GRPC path remains and is responsible for "administrative" tasks, such as setting up the RDMA path, exchanging computation graphs, etc. diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc index 26e18b28aa..ac8d994502 100644 --- a/tensorflow/contrib/verbs/rdma.cc +++ b/tensorflow/contrib/verbs/rdma.cc @@ -16,6 +16,7 @@ limitations under the License. #ifdef TENSORFLOW_USE_VERBS #include "tensorflow/contrib/verbs/rdma.h" +#include #include #include "tensorflow/contrib/verbs/verbs_util.h" #include "tensorflow/core/common_runtime/device_mgr.h" @@ -33,6 +34,8 @@ limitations under the License. namespace tensorflow { +#define RoCE_V2 "RoCE v2" + namespace { // hash name to 32-bit integer uint32_t NameHash(const string& name) { @@ -66,16 +69,336 @@ string MessageTypeToString(RdmaMessageType rmt) { } } // namespace -ibv_context* open_default_device() { +// Function to get environment variable +// Args: +// var_name - the name of the environmental variable +// Returns: +// string with it's value or empty string if not set +string get_env_var(char const* var_name) { + char const* var_temp = getenv(var_name); + + return (var_temp == NULL) ? string() : string(var_temp); +} + +// Function to open device +// Args: +// ibv_dev device to open +// Returns: +// context of the opened device +ibv_context* open_device(ibv_device* ibv_dev) { + ibv_context* context = ibv_open_device(ibv_dev); + + CHECK(context) << "Open context failed for " << ibv_get_device_name(ibv_dev); + return context; +} + +// Function to count the number of active ports for device +// Args: +// device - to check active ports +// Returns: +// number of active ports of the given device +int get_dev_active_port_count(ibv_device* device) { + ibv_device_attr device_att; + ibv_port_attr port_attr; + ibv_context* context = NULL; + int rc, port_index, active_ports = 0; + + context = ibv_open_device(device); + CHECK(context) << "Open context failed for " << ibv_get_device_name(device); + rc = ibv_query_device(context, &device_att); + CHECK(!rc) << "Failed to query the device"; + + for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) { + rc = ibv_query_port(context, port_index, &port_attr); + CHECK(!rc) << "Failed to query the port" << port_index; + if (port_attr.state == IBV_PORT_ACTIVE) { + active_ports++; + } + } + ibv_close_device(context); + return active_ports; +} + +// Function to set device. If RDMA_DEVICE not set, search for device with active +// port. +// Fails if more than one device with active port was found. +// Returns: +// device to use +ibv_device* set_device() { ibv_device** dev_list; - ibv_device* ib_dev; - dev_list = ibv_get_device_list(NULL); + int dev_num, device_index, device_to_open = 0; + int num_devs_with_active_port = 0; + string env_p_rdma_device, str_port_num; + + dev_list = ibv_get_device_list(&dev_num); CHECK(dev_list) << "No InfiniBand device found"; - ib_dev = dev_list[0]; - CHECK(ib_dev) << "No InfiniBand device found"; - ibv_context* context = ibv_open_device(ib_dev); - CHECK(context) << "Open context failed for " << ibv_get_device_name(ib_dev); - return context; + + env_p_rdma_device = get_env_var("RDMA_DEVICE"); + if (!env_p_rdma_device.empty()) { + for (device_index = 0; device_index < dev_num; device_index++) { + if (!env_p_rdma_device.compare( + ibv_get_device_name(dev_list[device_index]))) { + CHECK(get_dev_active_port_count(dev_list[device_index]) != 0) + << "Device " << ibv_get_device_name(dev_list[device_index]) + << " has no active ports"; + return dev_list[device_index]; + } + } + // check validity of input device + CHECK(false) << "The device " << env_p_rdma_device << " wasn't found"; + } else { + // set default device + str_port_num = get_env_var("RDMA_DEVICE_PORT"); + CHECK(str_port_num.empty()) + << "RDMA_DEVICE should be provided if RDMA_DEVICE_PORT is set by user"; + for (device_index = 0; device_index < dev_num; device_index++) { + // get port_num + if (get_dev_active_port_count(dev_list[device_index]) > 0) { + num_devs_with_active_port++; + CHECK(num_devs_with_active_port <= 1) << ". More than one device with " + "active port in the system. " + "Please enter RDMA_DEVICE"; + // found device with at least 1 active port + device_to_open = device_index; + } + } + CHECK(num_devs_with_active_port > 0) + << "There is no active port in the system"; + return dev_list[device_to_open]; + } + CHECK(false) << "No device was set!"; + return NULL; // never happens +} + +// Function to set port for device. +// If RDMA_DEVICE_PORT not set, first active port of the device will be set. +// Args: +// context of the device +// Returns: +// port to use +uint8_t set_port(ibv_context* context) { + uint8_t port_num = 0; // 0 is illegal port number + string str_port_num; + ibv_device_attr device_att; + ibv_port_attr port_attr; + int rc, port_index; + + rc = ibv_query_device(context, &device_att); + CHECK(!rc) << "Failed to query the device\n"; + + str_port_num = get_env_var("RDMA_DEVICE_PORT"); + // user defined port + if (!str_port_num.empty()) { + port_num = stoi(str_port_num); + CHECK(port_num > 0) << "RDMA_DEVICE_PORT should be positive"; + CHECK(port_num <= device_att.phys_port_cnt) << "RDMA_DEVICE_PORT should be " + "less or equal to amount of " + "available ports"; + rc = ibv_query_port(context, port_num, &port_attr); + CHECK(!rc) << "Failed to query the port" << port_num; + // check if port id active + CHECK(port_attr.state == IBV_PORT_ACTIVE) + << "Selected RDMA_DEVICE_PORT is not active"; + } else { // set default port + for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) { + rc = ibv_query_port(context, port_index, &port_attr); + CHECK(!rc) << "Failed to query the port" << port_index; + if (port_attr.state == IBV_PORT_ACTIVE) { + port_num = port_index; + break; + } + } + CHECK_GT(port_num, 0) << "No active ports"; + } + return port_num; +} + +// Function read from sysfs file +// Args: +// dir - directory +// file - file +// buff - buffer for the result +// size - buffer size +// Returns: +// number of bytes were read or -1 if failed +int read_sysfs_file(const char* dir, const char* file, char* buf, size_t size) { + char* path; + int fd; + int len; + + if (asprintf(&path, "%s/%s", dir, file) < 0) return -1; + + fd = open(path, O_RDONLY); + if (fd < 0) { + free(path); + return -1; + } + + len = read(fd, buf, size); + + close(fd); + free(path); + + if (len > 0 && buf[len - 1] == '\n') buf[--len] = '\0'; + + return len; +} + +// Function to check if GID index support RoCE V2 +// Args: +// context - device context +// port_num - port number +// index - GID index +// Returns: +// if GID supports RoCE V2 - true, otherwise - false. +bool is_gid_type_roce_v2(ibv_context* context, uint8_t port_num, + uint8_t index) { + char name[32]; + char buff[41]; + + snprintf(name, sizeof(name), "ports/%d/gid_attrs/types/%d", port_num, index); + if (read_sysfs_file(context->device->ibdev_path, name, buff, sizeof(buff)) <= + 0) { + return false; + } + return !strcmp(buff, RoCE_V2); +} + +// Function to set GID index. +// If the port link is IB, no GID index should be selected. +// If Ethernet but RDMA_GID_INDEX not set gid index that supports +// RoCE V2 will be chosen(fails if more than one IP is configured) +// Args: +// context - device context +// port_num - port number +// Returns: +// GID index to use +uint8_t set_gid(uint8_t port_num, ibv_context* context) { + ibv_port_attr port_attr; + string gid_str; + int rc, i, gids_num = 0, v2_ip_num = 0; + union ibv_gid gid; + uint8_t gid_index = 0; + + rc = ibv_query_port(context, port_num, &port_attr); + CHECK(!rc) << "Failed to query the port" << port_num; + + for (i = 0; i < port_attr.gid_tbl_len; i++) { + rc = ibv_query_gid(context, port_num, i, &gid); + CHECK(!rc) << "Failed to query gid to port " << (int)port_num << " index " + << i; + if (gid.global.interface_id) { + gids_num++; + if (gid.global.subnet_prefix == 0 && + is_gid_type_roce_v2(context, port_num, i)) { + if (v2_ip_num == 0) { + // can be overwritten by RDMA_GID_INDEX later + gid_index = i; + } + v2_ip_num++; + } + } + } + switch (port_attr.link_layer) { + case (IBV_LINK_LAYER_ETHERNET): + gid_str = get_env_var("RDMA_GID_INDEX"); + if (!gid_str.empty()) { + gid_index = stoi(gid_str); + CHECK(gid_index < gids_num) + << "RDMA_GID_INDEX should be less than GIDs amount" << gids_num; + } else { + CHECK(v2_ip_num <= 1) + << "More than one IP is available, please specify GID_INDEX"; + } + break; + case (IBV_LINK_LAYER_INFINIBAND): // no need in GID index + break; + default: + LOG(INFO) << "Unknown port link layer. Currently supporting Ethernet and " + "InfiniBand only. "; + } + if (!is_gid_type_roce_v2(context, port_num, gid_index)) { + LOG(INFO) << "RoCE v2 is not configured for GID_INDEX " << (int)gid_index; + } + return gid_index; +} + +// set the default or environment value to the configuration parameter. +// Args: +// default_val- the default value for this parameter +// env_param- the environment parameter's name +// Returns: +// 32-bit value +uint32_t set_param(uint32_t default_val, const char* env_param) { + uint32_t val = default_val; + string val_s; + + val_s = get_env_var(env_param); + + if (!val_s.empty()) { + val = stoi(val_s); + } + return val; +} + +enum ibv_mtu set_mtu(uint8_t port_num, ibv_context* context) { + ibv_port_attr port_attr; + enum ibv_mtu mtu; + string mtu_s; + int rc, mtu_i; + + rc = ibv_query_port(context, port_num, &port_attr); + CHECK(!rc) << "Failed to query the port" << port_num; + + mtu_s = get_env_var("RDMA_MTU"); + + if (!mtu_s.empty()) { + mtu_i = stoi(mtu_s); + switch (mtu_i) { + case 256: + mtu = IBV_MTU_256; + break; + case 512: + mtu = IBV_MTU_512; + break; + case 1024: + mtu = IBV_MTU_1024; + break; + case 2048: + mtu = IBV_MTU_2048; + break; + case 4096: + mtu = IBV_MTU_4096; + break; + default: + CHECK(0) << "Error: MTU input value must be one of the following: 256, " + "512, 1024, 2048, 4096. MTU " + << mtu << " is invalid\n"; + break; + } + CHECK(mtu < port_attr.active_mtu) + << "MTU configuration for the QPs is larger than active MTU"; + } else { + mtu = port_attr.active_mtu; + } + return mtu; +} + +RdmaParams params_init(ibv_context* context) { + RdmaParams params; + + params.port_num = set_port(context); + params.sgid_index = set_gid(params.port_num, context); + params.pkey_index = (uint8_t)set_param(PKEY_DEFAULT, "RDMA_PKEY"); + params.queue_depth = set_param(QUEUE_DEPTH_DEFAULT, "RDMA_QUEUE_DEPTH"); + params.timeout = (uint8_t)set_param(TIMEOUT_DEFAULT, "RDMA_TIMEOUT"); + params.retry_cnt = (uint8_t)set_param(RETRY_CNT_DEFAULT, "RDMA_RETRY_CNT"); + params.sl = (uint8_t)set_param(SL_DEFAULT, "RDMA_SL"); + CHECK(params.sl <= 7) << "SL value is " << (int)params.sl + << ". Valid values are 0-7."; + params.mtu = set_mtu(params.port_num, context); + params.traffic_class = set_param(TRAFFIC_CLASS, "RDMA_TRAFFIC_CLASS"); + return params; } ibv_pd* alloc_protection_domain(ibv_context* context) { @@ -85,7 +408,8 @@ ibv_pd* alloc_protection_domain(ibv_context* context) { } RdmaAdapter::RdmaAdapter(const WorkerEnv* worker_env) - : context_(open_default_device()), + : context_(open_device(set_device())), + params_(params_init(context_)), pd_(alloc_protection_domain(context_)), worker_env_(worker_env) { event_channel_ = ibv_create_comp_channel(context_); @@ -242,8 +566,8 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, memset(&attr, 0, sizeof(ibv_qp_init_attr)); attr.send_cq = adapter_->cq_; attr.recv_cq = adapter_->cq_; - attr.cap.max_send_wr = RdmaAdapter::MAX_CONCURRENT_WRITES; - attr.cap.max_recv_wr = RdmaAdapter::MAX_CONCURRENT_WRITES; + attr.cap.max_send_wr = adapter_->params_.queue_depth; + attr.cap.max_recv_wr = adapter_->params_.queue_depth; attr.cap.max_send_sge = 1; attr.cap.max_recv_sge = 1; attr.qp_type = IBV_QPT_RC; @@ -257,8 +581,8 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, struct ibv_qp_attr attr; memset(&attr, 0, sizeof(ibv_qp_attr)); attr.qp_state = IBV_QPS_INIT; - attr.pkey_index = 0; - attr.port_num = 1; + attr.pkey_index = adapter_->params_.pkey_index; + attr.port_num = adapter_->params_.port_num; attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE; int mask = @@ -269,13 +593,15 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, // Local address { struct ibv_port_attr attr; - CHECK(!ibv_query_port(adapter_->context_, (uint8_t)1, &attr)) + CHECK( + !ibv_query_port(adapter_->context_, adapter_->params_.port_num, &attr)) << "Query port"; self_.lid = attr.lid; self_.qpn = qp_->qp_num; self_.psn = static_cast(random::New64()) & 0xffffff; union ibv_gid gid; - CHECK(!ibv_query_gid(adapter_->context_, (uint8_t)1, 0, &gid)) + CHECK(!ibv_query_gid(adapter_->context_, adapter_->params_.port_num, + adapter_->params_.sgid_index, &gid)) << "Query gid"; self_.snp = gid.global.subnet_prefix; self_.iid = gid.global.interface_id; @@ -479,11 +805,9 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) { struct ibv_qp_attr attr; memset(&attr, 0, sizeof(ibv_qp_attr)); attr.qp_state = IBV_QPS_RTR; - struct ibv_port_attr port_attr; - CHECK(!ibv_query_port(adapter_->context_, (uint8_t)1, &port_attr)) - << "Query port failed"; + // This assumes both QP's ports are configured with the same MTU - attr.path_mtu = port_attr.active_mtu; + attr.path_mtu = adapter_->params_.mtu; attr.dest_qp_num = remoteAddr.qpn; attr.rq_psn = remoteAddr.psn; attr.max_dest_rd_atomic = 1; @@ -494,9 +818,11 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) { attr.ah_attr.grh.flow_label = 0; attr.ah_attr.grh.hop_limit = 255; attr.ah_attr.dlid = remoteAddr.lid; - attr.ah_attr.sl = 0; + attr.ah_attr.sl = adapter_->params_.sl; attr.ah_attr.src_path_bits = 0; - attr.ah_attr.port_num = 1; + attr.ah_attr.port_num = adapter_->params_.port_num; + attr.ah_attr.grh.sgid_index = adapter_->params_.sgid_index; + attr.ah_attr.grh.traffic_class = adapter_->params_.traffic_class; int r; CHECK(!(r = ibv_modify_qp(qp_, &attr, @@ -509,8 +835,8 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) { memset(&attr, 0, sizeof(ibv_qp_attr)); attr.qp_state = IBV_QPS_RTS; attr.sq_psn = self_.psn; - attr.timeout = 14; - attr.retry_cnt = 7; + attr.timeout = adapter_->params_.timeout; + attr.retry_cnt = adapter_->params_.retry_cnt; attr.rnr_retry = 7; /* infinite */ attr.max_rd_atomic = 1; diff --git a/tensorflow/contrib/verbs/rdma.h b/tensorflow/contrib/verbs/rdma.h index e1e07db776..00217c81d4 100644 --- a/tensorflow/contrib/verbs/rdma.h +++ b/tensorflow/contrib/verbs/rdma.h @@ -36,7 +36,24 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" namespace tensorflow { - +#define PKEY_DEFAULT 0 +#define QUEUE_DEPTH_DEFAULT 1024 +#define TIMEOUT_DEFAULT 14 +#define RETRY_CNT_DEFAULT 7 +#define SL_DEFAULT 0 +#define TRAFFIC_CLASS 0 + +struct RdmaParams { + uint8_t port_num; + uint8_t sgid_index; + uint8_t pkey_index; + uint32_t queue_depth; + uint8_t timeout; + uint8_t retry_cnt; + uint8_t sl; + enum ibv_mtu mtu; + uint8_t traffic_class; +}; // structure to save the address of remote channels. struct RdmaAddress { uint32_t lid; @@ -84,6 +101,8 @@ class RdmaAdapter { protected: static const int MAX_CONCURRENT_WRITES = 1000; ibv_context* context_; + // RDMA configuration parameters + RdmaParams params_; // ibverbs protection domain ibv_pd* pd_; // Completion event channel, to wait for work completions diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index d71f314e11..30ff4ef358 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2710,6 +2710,7 @@ tf_cc_test_mkl( srcs = [ "graph/mkl_layout_pass_test.cc", "graph/mkl_tfconversion_pass_test.cc", + "util/mkl_util_test.cc", ], linkstatic = 1, deps = [ diff --git a/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt new file mode 100644 index 0000000000..cd7ec6e551 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt @@ -0,0 +1,47 @@ +op { + graph_op_name: "UniqueV2" + in_arg { + name: "x" + description: < [1, 2, 4, 7, 8] +idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] +``` +END +} diff --git a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt index 0a3355cdbc..77a96d1e03 100644 --- a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt @@ -26,6 +26,8 @@ need not be sorted and need not cover all values in the full range of valid values. If the sum is empty for a given segment ID `i`, `output[i] = 0`. +If the given segment ID `i` is negative, the value is dropped and will not be +added to the sum of the segment. `num_segments` should equal the number of distinct segment IDs. diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index 53e80b1ee3..63b74e8dbf 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -81,7 +81,7 @@ class MklCPUAllocator : public Allocator { } #if defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE) if (user_val > max_mem_bytes) { - LOG(WARNING) << "The user specifed a memory limit " << kMaxLimitStr + LOG(WARNING) << "The user specified a memory limit " << kMaxLimitStr << "=" << user_val << " greater than available physical memory: " << max_mem_bytes diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.h b/tensorflow/core/common_runtime/sycl/sycl_device.h index 9caa076c72..cc272d156e 100644 --- a/tensorflow/core/common_runtime/sycl/sycl_device.h +++ b/tensorflow/core/common_runtime/sycl/sycl_device.h @@ -46,8 +46,8 @@ class GSYCLInterface { if (!found_device) { // Currently Intel GPU is not supported - LOG(WARNING) << "No OpenCL GPU found that is supported by ComputeCpp, " - "trying OpenCL CPU"; + LOG(WARNING) << "No OpenCL GPU found that is supported by " + << "ComputeCpp/triSYCL, trying OpenCL CPU"; } for (const auto& device : device_list) { @@ -58,10 +58,24 @@ class GSYCLInterface { } } + if (!found_device) { + LOG(WARNING) << "No OpenCL CPU found that is supported by " + << "ComputeCpp/triSYCL, checking for host sycl device"; + } + + for (const auto& device : device_list) { + // triSYCL only supports the host device for now + if (device.is_host()) { + LOG(WARNING) << "Found SYCL host device"; + AddDevice(device); + found_device = true; + } + } + if (!found_device) { // Currently Intel GPU is not supported - LOG(FATAL) - << "No OpenCL GPU nor CPU found that is supported by ComputeCpp"; + LOG(FATAL) << "No SYCL host and no OpenCL GPU nor CPU" + << " supported by ComputeCPP/triSYCL was found"; } else { LOG(INFO) << "Found following OpenCL devices:"; for (int i = 0; i < device_list.size(); i++) { diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc index 87c41186d5..fd1b5d33b9 100644 --- a/tensorflow/core/graph/graph.cc +++ b/tensorflow/core/graph/graph.cc @@ -453,6 +453,21 @@ const Edge* Graph::AddControlEdge(Node* source, Node* dest, return AddEdge(source, kControlSlot, dest, kControlSlot); } +void Graph::RemoveControlEdge(const Edge* e) { + if (!e->src_->IsSource() && !e->dst_->IsSink()) { + e->dst_->MaybeCopyOnWrite(); + std::string e_src_name = strings::StrCat("^", e->src_->name()); + auto* inputs = e->dst_->props_->node_def.mutable_input(); + for (auto it = inputs->begin(); it != inputs->end(); ++it) { + if (*it == e_src_name) { + inputs->erase(it); + break; + } + } + } + RemoveEdge(e); +} + Status Graph::UpdateEdge(Node* new_src, int new_src_index, Node* dst, int dst_index) { TF_RETURN_IF_ERROR(IsValidOutputTensor(new_src, new_src_index)); diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index c5dde722fa..223dd12f8f 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -451,6 +451,11 @@ class Graph { // REQUIRES: The edge must exist. void RemoveEdge(const Edge* edge); + // Removes control edge `edge` from the graph. Note that this also updates + // the corresponding NodeDef to reflect the change. + // REQUIRES: The control edge must exist. + void RemoveControlEdge(const Edge* e); + // Updates the input to a node. The existing edge to `dst` is removed and an // edge from `new_src` to `dst` is created. The NodeDef associated with `dst` // is also updated. diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc index b9e3cba035..1924c05d3d 100644 --- a/tensorflow/core/graph/graph_partition.cc +++ b/tensorflow/core/graph/graph_partition.cc @@ -117,7 +117,7 @@ DataType EdgeType(const Edge* e) { } } -// Return true iff we need to add a same device send/recv for 'edge'. +// Return true iff we need to add the same device send/recv for 'edge'. bool NeedSameDeviceSendRecv(const Edge* edge, const GraphInfo& info) { if (edge->IsControlEdge()) { return false; @@ -1116,7 +1116,7 @@ Status Partition(const PartitionOptions& opts, Graph* g, // before the data is available. AddInput(real_recv, send->name(), Graph::kControlSlot); } else if (control_flow_edge != nullptr) { - // Redirect control edge to the real recv since this is not a same + // Redirect control edge to the real recv since this is not the same // device send/recv. --num_control_flow_edges; AddInput(real_recv, control_flow_edge->src()->name(), diff --git a/tensorflow/core/graph/graph_test.cc b/tensorflow/core/graph/graph_test.cc index 7686cef219..e2ce0ba046 100644 --- a/tensorflow/core/graph/graph_test.cc +++ b/tensorflow/core/graph/graph_test.cc @@ -118,6 +118,23 @@ class GraphTest : public ::testing::Test { LOG(FATAL) << name; } + bool ControlEdgeExistsInGraphOrNodeDef(const Node* src, const Node* dst) { + for (const Edge* e : dst->in_edges()) { + if (e->IsControlEdge() && e->src() == src && + e->src_output() == Graph::kControlSlot && + e->dst_input() == Graph::kControlSlot) { + return true; + } + } + std::string control_edge_name = strings::StrCat("^", src->name()); + for (int i = 0; i < dst->def().input_size(); ++i) { + if (dst->def().input(i) == control_edge_name) { + return true; + } + } + return false; + } + Graph graph_; private: @@ -458,8 +475,8 @@ TEST_F(GraphTest, AddControlEdge) { EXPECT_TRUE(edge == nullptr); EXPECT_EQ(b->def().input_size(), 2); - // Can add redundant control edge with create_duplicate. - edge = graph_.AddControlEdge(a, b, /*create_duplicate=*/true); + // Can add redundant control edge with allow_duplicates. + edge = graph_.AddControlEdge(a, b, /*allow_duplicates=*/true); EXPECT_TRUE(edge != nullptr); // create_duplicate causes the NodeDef not to be updated. ASSERT_EQ(b->def().input_size(), 2); @@ -477,6 +494,47 @@ TEST_F(GraphTest, AddControlEdge) { EXPECT_EQ(b->def().input_size(), 2); } +TEST_F(GraphTest, RemoveControlEdge) { + FromGraphDef( + "node { name: 'A' op: 'OneOutput' }" + "node { name: 'B' op: 'OneInputTwoOutputs' input: [ 'A:0' ] }" + "node { name: 'C' op: 'NoOp' } "); + Node* a = FindNode("A"); + Node* b = FindNode("B"); + Node* c = FindNode("C"); + + // Add a control edge. + const Edge* edge_1 = graph_.AddControlEdge(c, a); + const Edge* edge_2 = graph_.AddControlEdge(a, b); + ASSERT_TRUE(edge_1 != nullptr); + ASSERT_TRUE(edge_2 != nullptr); + + ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(c, a)); + ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(a, b)); + + graph_.RemoveControlEdge(edge_1); + ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a)); + ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(a, b)); + + graph_.RemoveControlEdge(edge_2); + ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a)); + ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(a, b)); + + // Test removing a duplicate control edge. + // Note that unless allow_duplicates is true, the duplicate edge + // will not be added. That's why we expect edge_4 to be a null + // pointer. We are not testing with allow_duplicates set to true, + // as that is a highly unlikely use case that does not make much + // sense. + const Edge* edge_3 = graph_.AddControlEdge(c, a); + const Edge* edge_4 = graph_.AddControlEdge(c, a); + ASSERT_TRUE(edge_3 != nullptr); + ASSERT_TRUE(edge_4 == nullptr); + + graph_.RemoveControlEdge(edge_3); + ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a)); +} + TEST_F(GraphTest, UpdateEdge) { // Build a little graph Node* a = FromNodeDef("A", "OneOutput", 0); diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc index fe4588389e..3fd89e2b66 100644 --- a/tensorflow/core/graph/mkl_tfconversion_pass.cc +++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc @@ -68,7 +68,7 @@ namespace tensorflow { // take place before we hit the op. For this, we add a new op before each // element-wise MKL op to deal with the inputs, called _MklInputConversion. // This pass has been enhanced to add this capability. -// +// // The _MklInputConversion op will check the inputs to the elementwise op and // make sure that either both are in MKL format or both are in TF format, // depending on their initial state and whether broadcast is needed or not. diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h index 5df190ba01..95bc5044d0 100644 --- a/tensorflow/core/grappler/costs/graph_properties.h +++ b/tensorflow/core/grappler/costs/graph_properties.h @@ -58,6 +58,12 @@ class GraphProperties { const std::vector& GetOutputProperties( const string& node_name) const; + static void FillTensorPropertiesFromContext( + const shape_inference::ShapeHandle&, const DataType&, + shape_inference::InferenceContext*, + std::unordered_map* dim_ids, + OpInfo::TensorProperties*); + private: // Inputs GrapplerItem item_; diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index 7fd1876371..9ab889beb5 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -62,7 +62,7 @@ const std::set& NodeMap::GetOutputs(const string& node_name) const { void NodeMap::AddNode(const string& name, NodeDef* node) { auto ret = nodes_.insert(std::make_pair(name, node)); CHECK(ret.second) << "Pair (" << name << "," << node - << ") is not inserted because a same key already exists."; + << ") is not inserted because the same key already exists."; } void NodeMap::AddOutput(const string& node_name, const string& output_name) { diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index d7b457eab7..b4a5a3c796 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -929,6 +929,25 @@ tf_cc_test( ], ) +tf_cuda_cc_test( + name = "bincount_op_test", + size = "small", + srcs = ["bincount_op_test.cc"], + deps = [ + ":bincount_op", + ":ops_testutil", + ":ops_util", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_cuda_cc_test( name = "constant_op_test", size = "small", @@ -1617,7 +1636,10 @@ DYNAMIC_DEPS = [ tf_kernel_library( name = "dynamic_partition_op", prefix = "dynamic_partition_op", - deps = DYNAMIC_DEPS, + deps = DYNAMIC_DEPS + [ + ":fill_functor", + ":gather_functor", + ] + if_cuda(["@cub_archive//:cub"]), ) tf_kernel_library( @@ -1687,7 +1709,7 @@ tf_kernel_library( ], ) -tf_cc_tests( +tf_cuda_cc_tests( name = "dynamic_op_test", size = "small", srcs = [ @@ -1698,6 +1720,7 @@ tf_cc_tests( ":data_flow", ":ops_testutil", ":ops_util", + "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", @@ -2572,8 +2595,9 @@ tf_kernel_library( tf_kernel_library( name = "bucketize_op", + gpu_srcs = ["cuda_device_array.h"], prefix = "bucketize_op", - deps = MATH_DEPS, + deps = ARRAY_DEPS, ) tf_kernel_library( @@ -3174,7 +3198,7 @@ tf_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//third_party/eigen3", - ], + ] + if_cuda(["@cub_archive//:cub"]), ) tf_kernel_library( diff --git a/tensorflow/core/kernels/avgpooling_op.cc b/tensorflow/core/kernels/avgpooling_op.cc index af629d0de8..f918023693 100644 --- a/tensorflow/core/kernels/avgpooling_op.cc +++ b/tensorflow/core/kernels/avgpooling_op.cc @@ -153,7 +153,8 @@ class AvgPoolingOp : public UnaryOp { if (data_format_ == FORMAT_NCHW) { DnnPoolingOp::Compute( context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_, - stride_, padding_, data_format_, tensor_in, output_shape); + stride_, padding_, data_format_, tensor_in, output_shape, + /*propagate_nans=*/false); } else { Tensor* output = nullptr; OP_REQUIRES_OK(context, @@ -408,7 +409,7 @@ class AvgPoolingGradOp : public OpKernel { DnnPoolingGradOp::Compute( context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_, stride_, padding_, data_format_, nullptr, nullptr, out_backprop, - output_shape); + output_shape, /*propagate_nans=*/false); } private: @@ -532,7 +533,7 @@ class AvgPoolingGradOpCustomGPUKernel : public OpKernel { DnnPoolingGradOp::Compute( context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_, stride_, padding_, data_format_, nullptr, nullptr, out_backprop, - output_shape); + output_shape, /*propagate_nans=*/false); } } diff --git a/tensorflow/core/kernels/bincount_op.cc b/tensorflow/core/kernels/bincount_op.cc index 1cd5943ef3..890fa3121b 100644 --- a/tensorflow/core/kernels/bincount_op.cc +++ b/tensorflow/core/kernels/bincount_op.cc @@ -17,6 +17,7 @@ limitations under the License. #define EIGEN_USE_THREADS +#include "tensorflow/core/kernels/bincount_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/types.h" @@ -27,46 +28,37 @@ namespace tensorflow { using thread::ThreadPool; -template -class BincountOp : public OpKernel { - public: - explicit BincountOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; - void Compute(OpKernelContext* ctx) override { - const Tensor& arr_t = ctx->input(0); - const Tensor& size_tensor = ctx->input(1); - const Tensor& weights_t = ctx->input(2); - int32 size = size_tensor.scalar()(); - OP_REQUIRES( - ctx, size >= 0, - errors::InvalidArgument("size (", size, ") must be non-negative")); - const bool has_weights = weights_t.NumElements() > 0; - OP_REQUIRES(ctx, !(has_weights && arr_t.shape() != weights_t.shape()), - errors::InvalidArgument( - "If weights are passed, they must have the same shape (" + - weights_t.shape().DebugString() + ") as arr (" + - arr_t.shape().DebugString() + ")")); - const auto arr = arr_t.flat(); - const auto weights = weights_t.flat(); +namespace functor { + +template +struct BincountFunctor { + static Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& arr, + const typename TTypes::ConstTensor& weights, + typename TTypes::Tensor& output) { + int size = output.size(); Tensor all_nonneg_t; - OP_REQUIRES_OK(ctx, - ctx->allocate_temp(DT_BOOL, TensorShape({}), &all_nonneg_t, - AllocatorAttributes())); - all_nonneg_t.scalar().device(ctx->eigen_cpu_device()) = + TF_RETURN_IF_ERROR(context->allocate_temp( + DT_BOOL, TensorShape({}), &all_nonneg_t, AllocatorAttributes())); + all_nonneg_t.scalar().device(context->eigen_cpu_device()) = (arr >= 0).all(); - OP_REQUIRES(ctx, all_nonneg_t.scalar()(), - errors::InvalidArgument("Input arr must be non-negative!")); + if (!all_nonneg_t.scalar()()) { + return errors::InvalidArgument("Input arr must be non-negative!"); + } // Allocate partial output bin sums for each worker thread. Worker ids in // ParallelForWithWorkerId range from 0 to NumThreads() inclusive. ThreadPool* thread_pool = - ctx->device()->tensorflow_cpu_worker_threads()->workers; + context->device()->tensorflow_cpu_worker_threads()->workers; const int64 num_threads = thread_pool->NumThreads() + 1; Tensor partial_bins_t; - OP_REQUIRES_OK(ctx, ctx->allocate_temp(weights_t.dtype(), - TensorShape({num_threads, size}), - &partial_bins_t)); + TF_RETURN_IF_ERROR(context->allocate_temp(DataTypeToEnum::value, + TensorShape({num_threads, size}), + &partial_bins_t)); auto partial_bins = partial_bins_t.matrix(); partial_bins.setZero(); thread_pool->ParallelForWithWorkerId( @@ -75,7 +67,7 @@ class BincountOp : public OpKernel { for (int64 i = start_ind; i < limit_ind; i++) { int32 value = arr(i); if (value < size) { - if (has_weights) { + if (weights.size()) { partial_bins(worker_id, value) += weights(i); } else { // Complex numbers don't support "++". @@ -84,25 +76,63 @@ class BincountOp : public OpKernel { } } }); - TensorShape output_shape({size}); - Tensor* output_t; - OP_REQUIRES_OK(ctx, ctx->allocate_output(0, output_shape, &output_t)); + // Sum the partial bins along the 0th axis. Eigen::array reduce_dims({0}); - output_t->flat().device(ctx->eigen_cpu_device()) = - partial_bins.sum(reduce_dims); + output.device(context->eigen_cpu_device()) = partial_bins.sum(reduce_dims); + return Status::OK(); + } +}; + +} // namespace functor + +template +class BincountOp : public OpKernel { + public: + explicit BincountOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + const Tensor& arr_t = ctx->input(0); + const Tensor& size_tensor = ctx->input(1); + const Tensor& weights_t = ctx->input(2); + + int32 size = size_tensor.scalar()(); + OP_REQUIRES( + ctx, size >= 0, + errors::InvalidArgument("size (", size, ") must be non-negative")); + + const auto arr = arr_t.flat(); + const auto weights = weights_t.flat(); + Tensor* output_t; + OP_REQUIRES_OK(ctx, + ctx->allocate_output(0, TensorShape({size}), &output_t)); + auto output = output_t->flat(); + OP_REQUIRES_OK(ctx, functor::BincountFunctor::Compute( + ctx, arr, weights, output)); } }; -#define REGISTER(TYPE) \ +#define REGISTER_KERNELS(type) \ REGISTER_KERNEL_BUILDER( \ - Name("Bincount").Device(DEVICE_CPU).TypeConstraint("T"), \ - BincountOp) + Name("Bincount").Device(DEVICE_CPU).TypeConstraint("T"), \ + BincountOp) + +TF_CALL_NUMBER_TYPES(REGISTER_KERNELS); +#undef REGISTER_KERNELS + +#if GOOGLE_CUDA + +#define REGISTER_KERNELS(type) \ + REGISTER_KERNEL_BUILDER(Name("Bincount") \ + .Device(DEVICE_GPU) \ + .HostMemory("size") \ + .TypeConstraint("T"), \ + BincountOp) -TF_CALL_NUMBER_TYPES(REGISTER); +TF_CALL_int32(REGISTER_KERNELS); +TF_CALL_float(REGISTER_KERNELS); +#undef REGISTER_KERNELS -// TODO(ringwalt): Add a GPU implementation. We probably want to take a -// different approach, e.g. threads in a warp each taking a pass over the same -// data, and each thread summing a single bin. +#endif // GOOGLE_CUDA } // end namespace tensorflow diff --git a/tensorflow/core/kernels/bincount_op.h b/tensorflow/core/kernels/bincount_op.h new file mode 100644 index 0000000000..cd3d560cd1 --- /dev/null +++ b/tensorflow/core/kernels/bincount_op.h @@ -0,0 +1,41 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_BINCOUNT_OP_H_ +#define TENSORFLOW_BINCOUNT_OP_H_ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace tensorflow { + +namespace functor { + +template +struct BincountFunctor { + static Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& arr, + const typename TTypes::ConstTensor& weights, + typename TTypes::Tensor& output); +}; + +} // end namespace functor + +} // end namespace tensorflow + +#endif // TENSORFLOW_BINCOUNT_OP_H_ diff --git a/tensorflow/core/kernels/bincount_op_gpu.cu.cc b/tensorflow/core/kernels/bincount_op_gpu.cu.cc new file mode 100644 index 0000000000..6074b3e1f6 --- /dev/null +++ b/tensorflow/core/kernels/bincount_op_gpu.cu.cc @@ -0,0 +1,114 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include "external/cub_archive/cub/device/device_histogram.cuh" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/kernels/bincount_op.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" + +namespace tensorflow { + +typedef Eigen::GpuDevice GPUDevice; + +namespace functor { + +template +struct BincountFunctor { + static Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& arr, + const typename TTypes::ConstTensor& weights, + typename TTypes::Tensor& output) { + if (weights.size() != 0) { + return errors::InvalidArgument( + "Weights should not be passed as it should be " + "handled by unsorted_segment_sum"); + } + if (output.size() == 0) { + return Status::OK(); + } + // In case weight.size() == 0, use CUB + size_t temp_storage_bytes = 0; + const int32* d_samples = arr.data(); + T* d_histogram = output.data(); + int num_levels = output.size() + 1; + int32 lower_level = 0; + int32 upper_level = output.size(); + int num_samples = arr.size(); + const cudaStream_t& stream = GetCudaStream(context); + + // The first HistogramEven is to obtain the temp storage size required + // with d_temp_storage = NULL passed to the call. + auto err = cub::DeviceHistogram::HistogramEven( + /* d_temp_storage */ NULL, + /* temp_storage_bytes */ temp_storage_bytes, + /* d_samples */ d_samples, + /* d_histogram */ d_histogram, + /* num_levels */ num_levels, + /* lower_level */ lower_level, + /* upper_level */ upper_level, + /* num_samples */ num_samples, + /* stream */ stream); + if (err != cudaSuccess) { + return errors::Internal( + "Could not launch HistogramEven to get temp storage: ", + cudaGetErrorString(err), "."); + } + Tensor temp_storage; + TF_RETURN_IF_ERROR(context->allocate_temp( + DataTypeToEnum::value, + TensorShape({static_cast(temp_storage_bytes)}), &temp_storage)); + + void* d_temp_storage = temp_storage.flat().data(); + // The second HistogramEven is to actual run with d_temp_storage + // allocated with temp_storage_bytes. + err = cub::DeviceHistogram::HistogramEven( + /* d_temp_storage */ d_temp_storage, + /* temp_storage_bytes */ temp_storage_bytes, + /* d_samples */ d_samples, + /* d_histogram */ d_histogram, + /* num_levels */ num_levels, + /* lower_level */ lower_level, + /* upper_level */ upper_level, + /* num_samples */ num_samples, + /* stream */ stream); + if (err != cudaSuccess) { + return errors::Internal( + "Could not launch HistogramEven: ", cudaGetErrorString(err), "."); + } + return Status::OK(); + } +}; + +} // end namespace functor + +#define REGISTER_GPU_SPEC(type) \ + template struct functor::BincountFunctor; + +TF_CALL_int32(REGISTER_GPU_SPEC); +TF_CALL_float(REGISTER_GPU_SPEC); +#undef REGISTER_GPU_SPEC + +} // namespace tensorflow + +#endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/bincount_op_test.cc b/tensorflow/core/kernels/bincount_op_test.cc new file mode 100644 index 0000000000..cb04b40637 --- /dev/null +++ b/tensorflow/core/kernels/bincount_op_test.cc @@ -0,0 +1,75 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" + +namespace tensorflow { + +static Graph* Bincount(int arr_size, int nbins) { + Graph* g = new Graph(OpRegistry::Global()); + + Tensor arr(DT_INT32, TensorShape({arr_size})); + arr.flat() = arr.flat().setRandom().abs(); + + Tensor size(DT_INT32, TensorShape({static_cast(1)})); + size.flat()(0) = static_cast(nbins); + + Tensor weights(DT_INT32, TensorShape({0})); + + Node* node; + TF_CHECK_OK(NodeBuilder(g->NewName("n"), "Bincount") + .Input(test::graph::Constant(g, arr)) + .Input(test::graph::Constant(g, size)) + .Input(test::graph::Constant(g, weights)) + .Attr("T", DT_INT32) + .Finalize(g, &node)); + return g; +} + +#define BM_BincountDev(K, NBINS, type) \ + static void BM_Bincount##_##type##_##K##_##NBINS(int iters) { \ + testing::ItemsProcessed(static_cast(iters) * K * 1024); \ + test::Benchmark(#type, Bincount(K * 1024, NBINS)).Run(iters); \ + } \ + BENCHMARK(BM_Bincount##_##type##_##K##_##NBINS); + +BM_BincountDev(32, 1000, cpu); +BM_BincountDev(32, 2000, cpu); +BM_BincountDev(32, 5000, cpu); +BM_BincountDev(64, 1000, cpu); +BM_BincountDev(64, 2000, cpu); +BM_BincountDev(64, 5000, cpu); +BM_BincountDev(128, 1000, cpu); +BM_BincountDev(128, 2000, cpu); +BM_BincountDev(128, 5000, cpu); + +BM_BincountDev(32, 1000, gpu); +BM_BincountDev(32, 2000, gpu); +BM_BincountDev(32, 5000, gpu); +BM_BincountDev(64, 1000, gpu); +BM_BincountDev(64, 2000, gpu); +BM_BincountDev(64, 5000, gpu); +BM_BincountDev(128, 1000, gpu); +BM_BincountDev(128, 2000, gpu); +BM_BincountDev(128, 5000, gpu); + +} // end namespace tensorflow diff --git a/tensorflow/core/kernels/bucketize_op.cc b/tensorflow/core/kernels/bucketize_op.cc index 93c2d01221..c1693de538 100644 --- a/tensorflow/core/kernels/bucketize_op.cc +++ b/tensorflow/core/kernels/bucketize_op.cc @@ -15,15 +15,43 @@ limitations under the License. // See docs in ../ops/math_ops.cc. -#include -#include - +#include "tensorflow/core/kernels/bucketize_op.h" #include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" namespace tensorflow { +using thread::ThreadPool; + +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; + +namespace functor { + template +struct BucketizeFunctor { + // PRECONDITION: boundaries_vector must be sorted. + static Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& input, + const std::vector& boundaries_vector, + typename TTypes::Tensor& output) { + const int N = input.size(); + for (int i = 0; i < N; i++) { + auto first_bigger_it = std::upper_bound( + boundaries_vector.begin(), boundaries_vector.end(), input(i)); + output(i) = first_bigger_it - boundaries_vector.begin(); + } + + return Status::OK(); + } +}; +} // namespace functor + +template class BucketizeOp : public OpKernel { public: explicit BucketizeOp(OpKernelConstruction* context) : OpKernel(context) { @@ -34,36 +62,42 @@ class BucketizeOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& input_tensor = context->input(0); - auto input = input_tensor.flat(); + const auto input = input_tensor.flat(); + Tensor* output_tensor = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(), &output_tensor)); auto output = output_tensor->template flat(); - - const int N = input.size(); - for (int i = 0; i < N; i++) { - output(i) = CalculateBucketIndex(input(i)); - } + OP_REQUIRES_OK(context, functor::BucketizeFunctor::Compute( + context, input, boundaries_, output)); } private: - int32 CalculateBucketIndex(const T value) { - auto first_bigger_it = - std::upper_bound(boundaries_.begin(), boundaries_.end(), value); - return first_bigger_it - boundaries_.begin(); - } std::vector boundaries_; }; #define REGISTER_KERNEL(T) \ REGISTER_KERNEL_BUILDER( \ Name("Bucketize").Device(DEVICE_CPU).TypeConstraint("T"), \ - BucketizeOp); + BucketizeOp); + +REGISTER_KERNEL(int32); +REGISTER_KERNEL(int64); +REGISTER_KERNEL(float); +REGISTER_KERNEL(double); +#undef REGISTER_KERNEL + +#if GOOGLE_CUDA +#define REGISTER_KERNEL(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("Bucketize").Device(DEVICE_GPU).TypeConstraint("T"), \ + BucketizeOp); REGISTER_KERNEL(int32); REGISTER_KERNEL(int64); REGISTER_KERNEL(float); REGISTER_KERNEL(double); #undef REGISTER_KERNEL +#endif // GOOGLE_CUDA } // namespace tensorflow diff --git a/tensorflow/core/kernels/bucketize_op.h b/tensorflow/core/kernels/bucketize_op.h new file mode 100644 index 0000000000..c8e461beb9 --- /dev/null +++ b/tensorflow/core/kernels/bucketize_op.h @@ -0,0 +1,41 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_BUCKETIZE_OP_H_ +#define TENSORFLOW_BUCKETIZE_OP_H_ + +#include +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace tensorflow { +namespace functor { + +template +struct BucketizeFunctor { + static Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& input, + const std::vector& boundaries_vector, + typename TTypes::Tensor& output); +}; + +} // namespace functor +} // namespace tensorflow + +#endif // TENSORFLOW_BUCKETIZE_OP_H_ diff --git a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc new file mode 100644 index 0000000000..325dee793b --- /dev/null +++ b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc @@ -0,0 +1,101 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/kernels/bucketize_op.h" +#include "tensorflow/core/kernels/cuda_device_array.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" + +namespace tensorflow { + +typedef Eigen::GpuDevice GPUDevice; + +template +__global__ void BucketizeCustomKernel( + const int32 size_in, const T* in, const int32 size_boundaries, + CudaDeviceArrayStruct boundaries_array, int32* out) { + const float* boundaries = GetCudaDeviceArrayOnDevice(&boundaries_array); + CUDA_1D_KERNEL_LOOP(i, size_in) { + T value = in[i]; + int32 bucket = 0; + int32 count = size_boundaries; + while (count > 0) { + int32 l = bucket; + int32 step = count / 2; + l += step; + if (!(value < static_cast(boundaries[l]))) { + bucket = ++l; + count -= step + 1; + } else { + count = step; + } + } + out[i] = bucket; + } +} + +namespace functor { + +template +struct BucketizeFunctor { + // PRECONDITION: boundaries_vector must be sorted. + static Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& input, + const std::vector& boundaries_vector, + typename TTypes::Tensor& output) { + const GPUDevice& d = context->eigen_device(); + + CudaDeviceArrayOnHost boundaries_array(context, + boundaries_vector.size()); + TF_RETURN_IF_ERROR(boundaries_array.Init()); + for (int i = 0; i < boundaries_vector.size(); ++i) { + boundaries_array.Set(i, boundaries_vector[i]); + } + TF_RETURN_IF_ERROR(boundaries_array.Finalize()); + + CudaLaunchConfig config = GetCudaLaunchConfig(input.size(), d); + BucketizeCustomKernel + <<>>( + input.size(), input.data(), boundaries_vector.size(), + boundaries_array.data(), output.data()); + + return Status::OK(); + } +}; +} // namespace functor + +#define REGISTER_GPU_SPEC(type) \ + template struct functor::BucketizeFunctor; + +REGISTER_GPU_SPEC(int32); +REGISTER_GPU_SPEC(int64); +REGISTER_GPU_SPEC(float); +REGISTER_GPU_SPEC(double); +#undef REGISTER_GPU_SPEC + +} // namespace tensorflow + +#endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc index 21f5cb1716..c2d24d1f12 100644 --- a/tensorflow/core/kernels/conv_grad_ops_3d.cc +++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc @@ -236,6 +236,7 @@ class Conv3DBackpropInputOp : public OpKernel { REGISTER_KERNEL_BUILDER( \ Name("Conv3DBackpropInputV2").Device(DEVICE_CPU).TypeConstraint("T"), \ Conv3DBackpropInputOp); +TF_CALL_half(REGISTER_CPU_KERNEL); TF_CALL_float(REGISTER_CPU_KERNEL); TF_CALL_double(REGISTER_CPU_KERNEL); #undef REGISTER_CPU_KERNEL @@ -383,6 +384,7 @@ class Conv3DBackpropFilterOp : public OpKernel { .Device(DEVICE_CPU) \ .TypeConstraint("T"), \ Conv3DBackpropFilterOp); +TF_CALL_half(REGISTER_CPU_KERNEL); TF_CALL_float(REGISTER_CPU_KERNEL); TF_CALL_double(REGISTER_CPU_KERNEL); #undef REGISTER_CPU_KERNEL @@ -409,6 +411,7 @@ namespace functor { const std::array& padding_right, \ typename TTypes::Tensor out, TensorFormat format); +DECLARE_GPU_SPEC(Eigen::half); DECLARE_GPU_SPEC(float); #undef DECLARE_GPU_SPEC } // namespace functor @@ -1098,22 +1101,27 @@ class Conv3DBackpropFilterOp : public OpKernel { bool cudnn_use_autotune_; }; -REGISTER_KERNEL_BUILDER( - Name("Conv3DBackpropInput").Device(DEVICE_GPU).TypeConstraint("T"), - Conv3DBackpropInputOp); -REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropInputV2") - .Device(DEVICE_GPU) - .TypeConstraint("T") - .HostMemory("input_sizes"), - Conv3DBackpropInputOp); -REGISTER_KERNEL_BUILDER( - Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint("T"), - Conv3DBackpropFilterOp); -REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropFilterV2") - .Device(DEVICE_GPU) - .TypeConstraint("T") - .HostMemory("filter_sizes"), - Conv3DBackpropFilterOp); +#define REGISTER_GPU_KERNEL(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("Conv3DBackpropInput").Device(DEVICE_GPU).TypeConstraint("T"), \ + Conv3DBackpropInputOp); \ + REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropInputV2") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("input_sizes"), \ + Conv3DBackpropInputOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint("T"), \ + Conv3DBackpropFilterOp); \ + REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropFilterV2") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("filter_sizes"), \ + Conv3DBackpropFilterOp); +TF_CALL_half(REGISTER_GPU_KERNEL); +TF_CALL_float(REGISTER_GPU_KERNEL); +#undef REGISTER_GPU_KERNEL + #endif // GOOGLE_CUDA } // namespace tensorflow diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc index 8a89d564de..37cb67bc51 100644 --- a/tensorflow/core/kernels/conv_ops_3d.cc +++ b/tensorflow/core/kernels/conv_ops_3d.cc @@ -145,6 +145,7 @@ class Conv3DOp : public BinaryOp { REGISTER_KERNEL_BUILDER( \ Name("Conv3D").Device(DEVICE_CPU).TypeConstraint("T"), \ Conv3DOp); +TF_CALL_half(REGISTER_CPU_KERNEL); TF_CALL_float(REGISTER_CPU_KERNEL); TF_CALL_double(REGISTER_CPU_KERNEL); #undef REGISTER_CPU_KERNEL @@ -482,12 +483,16 @@ namespace functor { const std::array& padding_right, \ typename TTypes::Tensor out, TensorFormat format); +DECLARE_GPU_SPEC(Eigen::half); DECLARE_GPU_SPEC(float); #undef DECLARE_GPU_SPEC } // namespace functor // Registration of the GPU implementations. +REGISTER_KERNEL_BUILDER( + Name("Conv3D").Device(DEVICE_GPU).TypeConstraint("T"), + Conv3DOp); REGISTER_KERNEL_BUILDER( Name("Conv3D").Device(DEVICE_GPU).TypeConstraint("T"), Conv3DOp); diff --git a/tensorflow/core/kernels/cwise_op_acosh.cc b/tensorflow/core/kernels/cwise_op_acosh.cc index 7bdd8d22a3..39c8814073 100644 --- a/tensorflow/core/kernels/cwise_op_acosh.cc +++ b/tensorflow/core/kernels/cwise_op_acosh.cc @@ -20,16 +20,8 @@ namespace tensorflow { REGISTER4(UnaryOp, CPU, "Acosh", functor::acosh, float, double, complex64, complex128); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Acosh") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T"), \ - UnaryOp>); -REGISTER_SYCL_KERNEL(float); -REGISTER_SYCL_KERNEL(double); -#undef REGISTER_SYCL_KERNEL +#ifdef TENSORFLOW_USE_SYCL +REGISTER2(UnaryOp, SYCL, "Acosh", functor::acosh, float, double); #endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA diff --git a/tensorflow/core/kernels/cwise_op_asinh.cc b/tensorflow/core/kernels/cwise_op_asinh.cc index e0644323c0..a7673afd0b 100644 --- a/tensorflow/core/kernels/cwise_op_asinh.cc +++ b/tensorflow/core/kernels/cwise_op_asinh.cc @@ -20,17 +20,9 @@ namespace tensorflow { REGISTER4(UnaryOp, CPU, "Asinh", functor::asinh, float, double, complex64, complex128); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Asinh") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T"), \ - UnaryOp>); -REGISTER_SYCL_KERNEL(float); -REGISTER_SYCL_KERNEL(double); -#undef REGISTER_SYCL_KERNEL -#endif // TENSORFLOW_USE_SYC +#ifdef TENSORFLOW_USE_SYCL +REGISTER2(UnaryOp, SYCL, "Asinh", functor::asinh, float, double); +#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER2(UnaryOp, GPU, "Asinh", functor::asinh, float, double); diff --git a/tensorflow/core/kernels/cwise_op_atanh.cc b/tensorflow/core/kernels/cwise_op_atanh.cc index 058f5140c5..7b688db4c5 100644 --- a/tensorflow/core/kernels/cwise_op_atanh.cc +++ b/tensorflow/core/kernels/cwise_op_atanh.cc @@ -20,17 +20,9 @@ namespace tensorflow { REGISTER4(UnaryOp, CPU, "Atanh", functor::atanh, float, double, complex64, complex128); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Atanh") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T"), \ - UnaryOp>); -REGISTER_SYCL_KERNEL(float); -REGISTER_SYCL_KERNEL(double); -#undef REGISTER_SYCL_KERNEL -#endif // TENSORFLOW_USE_SYC +#ifdef TENSORFLOW_USE_SYCL +REGISTER2(UnaryOp, SYCL, "Atanh", functor::atanh, float, double); +#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER2(UnaryOp, GPU, "Atanh", functor::atanh, float, double); diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h index 6c22b124de..d32185b6bf 100644 --- a/tensorflow/core/kernels/cwise_ops.h +++ b/tensorflow/core/kernels/cwise_ops.h @@ -49,7 +49,11 @@ template struct scalar_asinh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_asinh_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const { +#if EIGEN_HAS_CXX11_MATH + return numext::asinh(a); +#else return std::asinh(a); +#endif // EIGEN_HAS_CXX11_MATH } }; template @@ -61,7 +65,11 @@ template struct scalar_acosh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_acosh_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const { +#if EIGEN_HAS_CXX11_MATH + return numext::acosh(a); +#else return std::acosh(a); +#endif // EIGEN_HAS_CXX11_MATH } }; template @@ -73,7 +81,11 @@ template struct scalar_atanh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_atanh_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const { +#if EIGEN_HAS_CXX11_MATH + return numext::atanh(a); +#else return std::atanh(a); +#endif // EIGEN_HAS_CXX11_MATH } }; template diff --git a/tensorflow/core/kernels/depthwise_conv_grad_op.cc b/tensorflow/core/kernels/depthwise_conv_grad_op.cc index 9804d7d38e..9347978d51 100644 --- a/tensorflow/core/kernels/depthwise_conv_grad_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_grad_op.cc @@ -231,7 +231,8 @@ static void CopyOutputBackpropRegion(const DepthwiseArgs& args, } // Pad to vector-register width (if needed). for (int64 d = 0; d < pad_size; ++d) { - buffer[buf_base + vectorized_size + scalar_size + d] = 0; + buffer[buf_base + vectorized_size + scalar_size + d] = + static_cast(0); } } } @@ -297,7 +298,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args, for (int i = 0; i < output_vectorized_size; i += kPacketSize) { // Reset accumulator. - auto vaccum = Eigen::internal::pset1(0); + auto vaccum = Eigen::internal::pset1(static_cast(0)); for (int j = 0; j < filter_spatial_size; ++j) { // Calculate index. const int64 index = i + j * padded_filter_inner_dim_size; @@ -318,7 +319,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args, } if (output_scalar_size > 0) { - auto vaccum = Eigen::internal::pset1(0); + auto vaccum = Eigen::internal::pset1(static_cast(0)); for (int j = 0; j < filter_spatial_size; ++j) { const int64 index = output_vectorized_size + j * padded_filter_inner_dim_size; @@ -346,7 +347,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args, if (depth_multiplier > 1) { for (int64 d = 0; d < in_depth; ++d) { const int64 index = d * args.depth_multiplier; - T accum = 0; + T accum = static_cast(0); for (int64 dm = 0; dm < dm_vectorized_size; dm += kPacketSize) { const auto v = Eigen::internal::ploadu(out_buffer + index + dm); accum += Eigen::internal::predux(v); @@ -510,6 +511,8 @@ static void DepthwiseConvBackpropInputReference(const DepthwiseArgs& args, #if GOOGLE_CUDA +extern template struct LaunchDepthwiseConvBackpropInputOp; extern template struct LaunchDepthwiseConvBackpropInputOp; extern template struct LaunchDepthwiseConvBackpropInputOp; @@ -884,6 +887,8 @@ static void DepthwiseConvBackpropFilterReference(const DepthwiseArgs& args, #if GOOGLE_CUDA +extern template struct LaunchDepthwiseConvBackpropFilterOp; extern template struct LaunchDepthwiseConvBackpropFilterOp; extern template struct LaunchDepthwiseConvBackpropFilterOp; diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc index bbeeaf7895..7c43dcb670 100644 --- a/tensorflow/core/kernels/depthwise_conv_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_op.cc @@ -94,7 +94,7 @@ struct DepthwiseConv2DKernel { for (int i = 0; i < output_vectorized_size; i += kPacketSize) { // Reset accumulator. - auto vaccum = Eigen::internal::pset1(0); + auto vaccum = Eigen::internal::pset1(static_cast(0)); for (int j = 0; j < filter_spatial_size; ++j) { // Calculate index. const int64 index = i + j * padded_filter_inner_dim_size; @@ -115,7 +115,7 @@ struct DepthwiseConv2DKernel { } if (output_scalar_size > 0) { - auto vaccum = Eigen::internal::pset1(0); + auto vaccum = Eigen::internal::pset1(static_cast(0)); for (int j = 0; j < filter_spatial_size; ++j) { const int64 index = output_vectorized_size + j * padded_filter_inner_dim_size; @@ -246,6 +246,7 @@ extern template class LaunchConv2DOp; #if GOOGLE_CUDA // Extern template instantiated in depthwise_conv_op_gpu.cc. +extern template struct LaunchDepthwiseConvOp; extern template struct LaunchDepthwiseConvOp; extern template struct LaunchDepthwiseConvOp; @@ -419,12 +420,18 @@ class DepthwiseConv2dNativeOp : public BinaryOp { Name("DepthwiseConv2dNative").Device(DEVICE_CPU).TypeConstraint("T"), \ DepthwiseConv2dNativeOp); +TF_CALL_half(REGISTER_CPU_KERNEL); TF_CALL_float(REGISTER_CPU_KERNEL); #if !defined(PLATFORM_WINDOWS) || !defined(_DEBUG) TF_CALL_double(REGISTER_CPU_KERNEL); #endif #if GOOGLE_CUDA +REGISTER_KERNEL_BUILDER(Name("DepthwiseConv2dNative") + .Device(DEVICE_GPU) + .TypeConstraint("T"), + DepthwiseConv2dNativeOp); + REGISTER_KERNEL_BUILDER( Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint("T"), DepthwiseConv2dNativeOp); diff --git a/tensorflow/core/kernels/depthwise_conv_op.h b/tensorflow/core/kernels/depthwise_conv_op.h index aa5b5c76f6..097a9f5bfa 100644 --- a/tensorflow/core/kernels/depthwise_conv_op.h +++ b/tensorflow/core/kernels/depthwise_conv_op.h @@ -158,7 +158,8 @@ struct DepthwiseFilterPadOp { } // Pad the remainder of output to vector-register boundary. for (int64 j = 0; j < pad_size; ++j) { - padded_filter[output_base + vectorized_size + scalar_size + j] = 0; + padded_filter[output_base + vectorized_size + scalar_size + j] = + static_cast(0); } } } @@ -266,7 +267,7 @@ struct DepthwiseInputCopyOp { // Pad the remainder of the output to vector register boundary. for (int64 d = 0; d < output_pad_size; ++d) { - in_buf[d] = 0; + in_buf[d] = static_cast(0); } in_buf += output_pad_size; diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc index ecfe51d599..903aac5d68 100644 --- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc +++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc @@ -105,7 +105,7 @@ __global__ void __launch_bounds__(1024, 2) const int input_row_end = input_row_start + filter_rows; const int input_col_end = input_col_start + filter_cols; - T sum = 0; + T sum = static_cast(0); const int input_offset_temp = in_rows * OB; if (input_row_start >= 0 && input_col_start >= 0 && @@ -258,8 +258,8 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNHWCSmall( __syncthreads(); if (depth_in_range) { - T sum1 = 0; - T sum2 = 0; + T sum1 = static_cast(0); + T sum2 = static_cast(0); int shared_offset = data_idx; const T* filter_ptr = filter_read_offset + shared_data; UNROLL for (int r = 0; r < filter_rows; ++r) { @@ -369,7 +369,7 @@ __global__ void __launch_bounds__(1024, 2) const int input_row_end = input_row_start + filter_rows; const int input_col_end = input_col_start + filter_cols; - T sum = 0; + T sum = static_cast(0); if (input_row_start >= 0 && input_col_start >= 0 && input_row_end < in_rows && input_col_end < in_cols) { // Loop that doesn't need to check for boundary conditions. @@ -529,8 +529,8 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNCHWSmall( __syncthreads(); if (slice_in_range) { - T sum1 = 0; - T sum2 = 0; + T sum1 = static_cast(0); + T sum2 = static_cast(0); int shared_offset = data_idx; const T* filter_ptr = filter_read_offset + shared_data; UNROLL for (int r = 0; r < filter_rows; ++r) { @@ -710,6 +710,7 @@ void LaunchDepthwiseConvOp::operator()(OpKernelContext* ctx, "Launch of gpu kernel for DepthwiseConv2dGPULaunch failed")); } +template struct LaunchDepthwiseConvOp; template struct LaunchDepthwiseConvOp; template struct LaunchDepthwiseConvOp; @@ -744,7 +745,7 @@ __global__ void __launch_bounds__(640, 2) const int in_r = (thread_id / in_depth / in_cols) % in_rows; const int b = thread_id / in_depth / in_cols / in_rows; - T sum = 0; + T sum = static_cast(0); const int out_r_start = tf_max(0, (in_r - filter_rows + pad_rows + stride) / stride); @@ -810,7 +811,7 @@ __global__ void __launch_bounds__(640, 2) const int in_d = (thread_id / in_cols / in_rows) % in_depth; const int b = thread_id / in_depth / in_cols / in_rows; - T sum = 0; + T sum = static_cast(0); const int out_d_start = in_d * depth_multiplier; const int out_d_end = out_d_start + depth_multiplier; @@ -919,6 +920,7 @@ void LaunchDepthwiseConvBackpropInputOp::operator()( "utGPULaunch failed")); } +template struct LaunchDepthwiseConvBackpropInputOp; template struct LaunchDepthwiseConvBackpropInputOp; template struct LaunchDepthwiseConvBackpropInputOp; @@ -1631,6 +1633,7 @@ void LaunchDepthwiseConvBackpropFilterOp::operator()( "terGPULaunch failed")); } +template struct LaunchDepthwiseConvBackpropFilterOp; template struct LaunchDepthwiseConvBackpropFilterOp; template struct LaunchDepthwiseConvBackpropFilterOp; } // namespace tensorflow diff --git a/tensorflow/core/kernels/fused_batch_norm_op.cc b/tensorflow/core/kernels/fused_batch_norm_op.cc index 0ecb829f34..1688674eb7 100644 --- a/tensorflow/core/kernels/fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/fused_batch_norm_op.cc @@ -54,25 +54,20 @@ struct FusedBatchNorm { Tensor* batch_var_output, Tensor* saved_mean_output, Tensor* saved_var_output, TensorFormat tensor_format, bool is_training) { - // Currently U is ignored, since we only support the case where T and U are - // both float32. - // TODO(reedwm): Add float16 support, use U, and remove these asserts. - static_assert(std::is_same::value, "T currently must be float."); - static_assert(std::is_same::value, "U currently must be float."); OP_REQUIRES(context, tensor_format == FORMAT_NHWC, errors::Internal("The CPU implementation of FusedBatchNorm " "only supports NHWC tensor format for now.")); typename TTypes::ConstTensor x(x_input.tensor()); - typename TTypes::ConstVec scale(scale_input.vec()); - typename TTypes::ConstVec offset(offset_input.vec()); - typename TTypes::ConstVec estimated_mean(estimated_mean_input.vec()); - typename TTypes::ConstVec estimated_variance( - estimated_variance_input.vec()); + typename TTypes::ConstVec scale(scale_input.vec()); + typename TTypes::ConstVec offset(offset_input.vec()); + typename TTypes::ConstVec estimated_mean(estimated_mean_input.vec()); + typename TTypes::ConstVec estimated_variance( + estimated_variance_input.vec()); typename TTypes::Tensor y(y_output->tensor()); - typename TTypes::Vec batch_mean(batch_mean_output->vec()); - typename TTypes::Vec batch_var(batch_var_output->vec()); - typename TTypes::Vec saved_mean(saved_mean_output->vec()); - typename TTypes::Vec saved_var(saved_var_output->vec()); + typename TTypes::Vec batch_mean(batch_mean_output->vec()); + typename TTypes::Vec batch_var(batch_var_output->vec()); + typename TTypes::Vec saved_mean(saved_mean_output->vec()); + typename TTypes::Vec saved_var(saved_var_output->vec()); const CPUDevice& d = context->eigen_device(); @@ -93,15 +88,15 @@ struct FusedBatchNorm { bcast_spec.set(0, rest_size); #endif - auto x_rest_by_depth = x.reshape(rest_by_depth); + auto x_rest_by_depth = x.reshape(rest_by_depth).template cast(); const int rest_size_minus_one = (rest_size > 1) ? (rest_size - 1) : 1; - T rest_size_inv = static_cast(1.0f / static_cast(rest_size)); + U rest_size_inv = static_cast(1.0f / static_cast(rest_size)); // This adjustment is for Bessel's correction - T rest_size_adjust = - static_cast(rest_size) / static_cast(rest_size_minus_one); + U rest_size_adjust = + static_cast(rest_size) / static_cast(rest_size_minus_one); - Eigen::Tensor mean(depth); - Eigen::Tensor variance(depth); + Eigen::Tensor mean(depth); + Eigen::Tensor variance(depth); if (is_training) { mean.device(d) = (x_rest_by_depth.sum(reduce_dims) * rest_size_inv); batch_mean.device(d) = mean; @@ -129,7 +124,7 @@ struct FusedBatchNorm { auto x_shifted = x_scaled + offset.reshape(one_by_depth).broadcast(bcast_spec); - y.reshape(rest_by_depth).device(d) = x_shifted; + y.reshape(rest_by_depth).device(d) = x_shifted.template cast(); } }; @@ -138,7 +133,7 @@ struct FusedBatchNormGrad { void operator()(OpKernelContext* context, const Tensor& y_backprop_input, const Tensor& x_input, const Tensor& scale_input, const Tensor& mean_input, const Tensor& variance_input, - T epsilon, Tensor* x_backprop_output, + U epsilon, Tensor* x_backprop_output, Tensor* scale_backprop_output, Tensor* offset_backprop_output, TensorFormat tensor_format) { OP_REQUIRES(context, tensor_format == FORMAT_NHWC, @@ -147,12 +142,12 @@ struct FusedBatchNormGrad { typename TTypes::ConstTensor y_backprop( y_backprop_input.tensor()); typename TTypes::ConstTensor x(x_input.tensor()); - typename TTypes::ConstVec scale(scale_input.vec()); - typename TTypes::ConstVec mean(mean_input.vec()); - typename TTypes::ConstVec variance(variance_input.vec()); + typename TTypes::ConstVec scale(scale_input.vec()); + typename TTypes::ConstVec mean(mean_input.vec()); + typename TTypes::ConstVec variance(variance_input.vec()); typename TTypes::Tensor x_backprop(x_backprop_output->tensor()); - typename TTypes::Vec scale_backprop(scale_backprop_output->vec()); - typename TTypes::Vec offset_backprop(offset_backprop_output->vec()); + typename TTypes::Vec scale_backprop(scale_backprop_output->vec()); + typename TTypes::Vec offset_backprop(offset_backprop_output->vec()); // Note: the following formulas are used to compute the gradients for // back propagation. @@ -181,8 +176,8 @@ struct FusedBatchNormGrad { bcast_spec.set(0, rest_size); #endif - auto x_rest_by_depth = x.reshape(rest_by_depth); - T rest_size_inv = static_cast(1.0f / static_cast(rest_size)); + auto x_rest_by_depth = x.reshape(rest_by_depth).template cast(); + U rest_size_inv = static_cast(1.0f / static_cast(rest_size)); auto x_mean_rest_by_depth = mean.reshape(one_by_depth).broadcast(bcast_spec); @@ -192,7 +187,8 @@ struct FusedBatchNormGrad { coef0.eval().reshape(one_by_depth).broadcast(bcast_spec); auto x_scaled = x_centered * coef0_rest_by_depth; - auto y_backprop_rest_by_depth = y_backprop.eval().reshape(rest_by_depth); + auto y_backprop_rest_by_depth = + y_backprop.eval().reshape(rest_by_depth).template cast(); scale_backprop.device(d) = (y_backprop_rest_by_depth * x_scaled).sum(reduce_dims); auto y_backprop_sum = y_backprop_rest_by_depth.sum(reduce_dims); @@ -214,7 +210,7 @@ struct FusedBatchNormGrad { .reshape(one_by_depth) .broadcast(bcast_spec); x_backprop.reshape(rest_by_depth).device(d) = - coef1 * (y_backprop_centered - x_centered * coef2); + (coef1 * (y_backprop_centered - x_centered * coef2)).template cast(); } }; @@ -689,6 +685,18 @@ REGISTER_KERNEL_BUILDER(Name("FusedBatchNormGradV2") .TypeConstraint("U"), FusedBatchNormGradOp); +REGISTER_KERNEL_BUILDER(Name("FusedBatchNormV2") + .Device(DEVICE_CPU) + .TypeConstraint("T") + .TypeConstraint("U"), + FusedBatchNormOp); + +REGISTER_KERNEL_BUILDER(Name("FusedBatchNormGradV2") + .Device(DEVICE_CPU) + .TypeConstraint("T") + .TypeConstraint("U"), + FusedBatchNormGradOp); + #if GOOGLE_CUDA REGISTER_KERNEL_BUILDER( diff --git a/tensorflow/core/kernels/fused_batch_norm_op.h b/tensorflow/core/kernels/fused_batch_norm_op.h index 38b24d7011..3af104bf95 100644 --- a/tensorflow/core/kernels/fused_batch_norm_op.h +++ b/tensorflow/core/kernels/fused_batch_norm_op.h @@ -92,26 +92,28 @@ struct FusedBatchNormFreezeGrad { // offset_backprop = sum(y_backprop) // scale_backprop = y_backprop * ((x - pop_mean) * rsqrt(pop_var + epsilon)) // x_backprop = y_backprop * (scale * rsqrt(pop_var + epsilon)) - offset_backprop.device(d) = y_backprop.reshape(rest_by_depth) - .template cast() - .sum(reduction_axis); + + auto y_backprop_rest_by_depth = + y_backprop.reshape(rest_by_depth).template cast(); + auto input_rest_by_depth = input.reshape(rest_by_depth).template cast(); + + offset_backprop.device(d) = y_backprop_rest_by_depth.sum(reduction_axis); // scratch1 = rsqrt(pop_var + epsilon) scratch1.device(d) = (pop_var + pop_var.constant(epsilon)).rsqrt(); // scratch2 = sum(y_backprop * (x - mean)) scratch2.device(d) = - (y_backprop.reshape(rest_by_depth).template cast() * - (input.reshape(rest_by_depth).template cast() - + (y_backprop_rest_by_depth * + (input_rest_by_depth - pop_mean.reshape(one_by_depth).broadcast(rest_by_one))) .sum(reduction_axis); x_backprop.reshape(rest_by_depth).device(d) = - (y_backprop.reshape(rest_by_depth).template cast() * - ((scratch1 * scale) - .eval() - .reshape(one_by_depth) - .broadcast(rest_by_one))) + (y_backprop_rest_by_depth * ((scratch1 * scale) + .eval() + .reshape(one_by_depth) + .broadcast(rest_by_one))) .template cast(); scale_backprop.device(d) = scratch2 * scratch1; } diff --git a/tensorflow/core/kernels/lmdb_reader_op.cc b/tensorflow/core/kernels/lmdb_reader_op.cc index 3bb07301b5..31a427f2c9 100755 --- a/tensorflow/core/kernels/lmdb_reader_op.cc +++ b/tensorflow/core/kernels/lmdb_reader_op.cc @@ -36,7 +36,7 @@ class LMDBReader : public ReaderBase { Status OnWorkStartedLocked() override { MDB_CHECK(mdb_env_create(&mdb_env_)); - int flags = MDB_RDONLY | MDB_NOTLS; + int flags = MDB_RDONLY | MDB_NOTLS | MDB_NOLOCK; // Check if the LMDB filename is actually a file instead of a directory. // If so, set appropriate flags so we can open it. @@ -57,10 +57,13 @@ class LMDBReader : public ReaderBase { if (mdb_env_ != nullptr) { if (mdb_cursor_) { mdb_cursor_close(mdb_cursor_); + mdb_cursor_ = nullptr; } - mdb_txn_abort(mdb_txn_); mdb_dbi_close(mdb_env_, mdb_dbi_); + mdb_txn_abort(mdb_txn_); mdb_env_close(mdb_env_); + mdb_txn_ = nullptr; + mdb_dbi_ = 0; mdb_env_ = nullptr; } return Status::OK(); diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc index e2cf605811..d8bdb700e6 100644 --- a/tensorflow/core/kernels/maxpooling_op.cc +++ b/tensorflow/core/kernels/maxpooling_op.cc @@ -34,6 +34,7 @@ limitations under the License. #include "tensorflow/core/kernels/pooling_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/util/env_var.h" #include "tensorflow/core/util/padding.h" #include "tensorflow/core/util/tensor_format.h" #include "tensorflow/core/util/use_cudnn.h" @@ -358,6 +359,8 @@ class MaxPoolingGradOp : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); use_dnn_ = CanUseCudnn(); + TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, + &propagate_nans_)); } void Compute(OpKernelContext* context) override { @@ -405,7 +408,7 @@ class MaxPoolingGradOp : public OpKernel { DnnPoolingGradOp::Compute( context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize, stride, padding_, data_format_, &tensor_in, &tensor_out, out_backprop, - output_shape); + output_shape, propagate_nans_); } else { CHECK(data_format_ == FORMAT_NHWC) << "Non-Cudnn MaxPoolGrad only supports NHWC format"; @@ -420,6 +423,7 @@ class MaxPoolingGradOp : public OpKernel { Padding padding_; TensorFormat data_format_; bool use_dnn_; + bool propagate_nans_; }; #endif // GOOGLE_CUDA @@ -884,6 +888,9 @@ class MaxPoolingWithArgmaxOp : public OpKernel { OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1, errors::Unimplemented( "Pooling is not yet supported on the batch dimension.")); + + TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, + &propagate_nans_)); } void Compute(OpKernelContext* context) override { @@ -902,14 +909,15 @@ class MaxPoolingWithArgmaxOp : public OpKernel { Tensor* argmax = nullptr; OP_REQUIRES_OK(context, context->allocate_output(1, out_shape, &argmax)); - LaunchMaxPoolingWithArgmax::launch(context, params, tensor_in, - output, argmax); + LaunchMaxPoolingWithArgmax::launch( + context, params, tensor_in, output, argmax, propagate_nans_); } private: std::vector ksize_; std::vector stride_; Padding padding_; + bool propagate_nans_; }; template @@ -1045,6 +1053,9 @@ class MaxPoolingNoMaskOp : public OpKernel { errors::Unimplemented( "Pooling is not yet supported on the batch dimension.")); use_dnn_ = CanUseCudnn(); + + TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, + &propagate_nans_)); } void Compute(OpKernelContext* context) override { @@ -1068,9 +1079,10 @@ class MaxPoolingNoMaskOp : public OpKernel { // These is_int8x4 checks avoid linker errors for missing qint8 kernels. if (!is_int8x4 && use_dnn_ && data_format_ == FORMAT_NCHW) { - DnnPoolingOp::Compute( - context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize_, - stride_, padding_, data_format_, tensor_in, out_shape); + DnnPoolingOp::Compute(context, + perftools::gputools::dnn::PoolingMode::kMaximum, + ksize_, stride_, padding_, data_format_, + tensor_in, out_shape, propagate_nans_); } else { Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output)); @@ -1079,7 +1091,7 @@ class MaxPoolingNoMaskOp : public OpKernel { tensor_in, output); } else if (data_format_ == FORMAT_NHWC) { LaunchMaxPoolingNoMask::launch(context, params, tensor_in, - output); + output, propagate_nans_); } else { LOG(FATAL) << "MaxPool currently only supports the following (layout, " "type) combinations: (NHWC, non-qint8), " @@ -1098,6 +1110,7 @@ class MaxPoolingNoMaskOp : public OpKernel { Padding padding_; TensorFormat data_format_; bool use_dnn_; + bool propagate_nans_; }; template @@ -1127,6 +1140,8 @@ class MaxPoolingNoMaskV2Op : public OpKernel { } OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); use_dnn_ = CanUseCudnn(); + TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, + &propagate_nans_)); } void Compute(OpKernelContext* context) override { @@ -1168,16 +1183,17 @@ class MaxPoolingNoMaskV2Op : public OpKernel { ShapeFromFormat(data_format_, params.tensor_in_batch, params.out_height, params.out_width, params.depth); if (use_dnn_ && data_format_ == FORMAT_NCHW) { - DnnPoolingOp::Compute( - context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize, - stride, padding_, data_format_, tensor_in, out_shape); + DnnPoolingOp::Compute(context, + perftools::gputools::dnn::PoolingMode::kMaximum, + ksize, stride, padding_, data_format_, tensor_in, + out_shape, propagate_nans_); } else { CHECK(data_format_ == FORMAT_NHWC) << "Non-Cudnn MaxPool only supports NHWC format"; Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output)); LaunchMaxPoolingNoMask::launch(context, params, tensor_in, - output); + output, propagate_nans_); } } @@ -1187,18 +1203,20 @@ class MaxPoolingNoMaskV2Op : public OpKernel { Padding padding_; TensorFormat data_format_; bool use_dnn_; + bool propagate_nans_; }; template struct LaunchMaxPoolingNoMask { static void launch(OpKernelContext* context, const PoolParameters& params, - const Tensor& input, Tensor* output) { + const Tensor& input, Tensor* output, bool propagate_nans) { bool status = functor::MaxPoolForwardWithOptionalArgmax()( input.flat().data(), params.tensor_in_batch, params.tensor_in_rows, params.tensor_in_cols, params.depth, params.out_height, params.out_width, params.window_rows, params.window_cols, params.row_stride, params.col_stride, params.pad_rows, params.pad_cols, - output->flat().data(), nullptr, context->eigen_gpu_device()); + output->flat().data(), nullptr, context->eigen_gpu_device(), + propagate_nans); if (!status) { context->SetStatus( errors::Internal("Failed launching MaxPoolForwardNoMask")); @@ -1209,7 +1227,8 @@ struct LaunchMaxPoolingNoMask { template struct LaunchMaxPoolingWithArgmax { static void launch(OpKernelContext* context, const PoolParameters& params, - const Tensor& input, Tensor* output, Tensor* argmax) { + const Tensor& input, Tensor* output, Tensor* argmax, + bool propagate_nans) { bool status = functor::MaxPoolForwardWithOptionalArgmax()( input.flat().data(), params.tensor_in_batch, params.tensor_in_rows, params.tensor_in_cols, params.depth, params.out_height, @@ -1217,7 +1236,7 @@ struct LaunchMaxPoolingWithArgmax { params.row_stride, params.col_stride, params.pad_rows, params.pad_cols, output->flat().data(), reinterpret_cast(argmax->flat().data()), - context->eigen_gpu_device()); + context->eigen_gpu_device(), propagate_nans); if (!status) { context->SetStatus( errors::Internal("Failed launching MaxPoolForwardWithArgmax")); diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc index 26f5274804..f8daaca4c9 100644 --- a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc +++ b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc @@ -29,6 +29,15 @@ limitations under the License. namespace tensorflow { namespace { +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool IsGreaterThan(dtype a, dtype b) { + if (propagate_nans) { + return !(a <= b); + } else { + return a > b; + } +} + // This is Yangqing's custom kernel for the maxpooling operation. There are // three functions: MaxPoolForwardNCHW and MaxPoolForwardNHWC are the two // forward functions, dealing with the forward case. MaxPoolBackward is the @@ -51,7 +60,7 @@ namespace { // const int output_size = batch * channels * pooled_height * pooled_width; // MaxPoolForwardNCHW<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, // kThreadsPerBlock, 0, cuda_stream>>>(...); -template +template __global__ void MaxPoolForwardNCHW(const int nthreads, const dtype* bottom_data, const int channels, const int height, const int width, const int pooled_height, @@ -77,7 +86,7 @@ __global__ void MaxPoolForwardNCHW(const int nthreads, const dtype* bottom_data, for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { int idx = c * height * width + h * width + w; - if (bottom_data_n[idx] > maxval) { + if (IsGreaterThan(bottom_data_n[idx], maxval)) { maxidx = idx; maxval = bottom_data_n[idx]; } @@ -126,7 +135,7 @@ __global__ void MaxPoolForwardNoMaskKernel_NCHW_VECT_C( } } -template +template __global__ void MaxPoolForwardNHWC(const int nthreads, const dtype* bottom_data, const int height, const int width, const int channels, const int pooled_height, @@ -153,7 +162,7 @@ __global__ void MaxPoolForwardNHWC(const int nthreads, const dtype* bottom_data, for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { int idx = (h * width + w) * channels + c; - if (bottom_data_n[idx] > maxval) { + if (IsGreaterThan(bottom_data_n[idx], maxval)) { maxidx = idx; maxval = bottom_data_n[idx]; } @@ -390,15 +399,24 @@ bool MaxPoolForwardWithOptionalArgmax::operator()( const int channels, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_t, const int pad_l, T* top_data, - int64* mask, const Eigen::GpuDevice& d) { + int64* mask, const Eigen::GpuDevice& d, bool propagate_nans) { const int kThreadsPerBlock = 1024; const int output_size = batch * channels * pooled_height * pooled_width; - - MaxPoolForwardNHWC<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, - kThreadsPerBlock, 0, d.stream()>>>( - output_size, bottom_data, height, width, channels, pooled_height, - pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, - top_data, mask); + if (propagate_nans) { + MaxPoolForwardNHWC + <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, + kThreadsPerBlock, 0, d.stream()>>>( + output_size, bottom_data, height, width, channels, pooled_height, + pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, + top_data, mask); + } else { + MaxPoolForwardNHWC + <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, + kThreadsPerBlock, 0, d.stream()>>>( + output_size, bottom_data, height, width, channels, pooled_height, + pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, + top_data, mask); + } return d.ok(); } diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.h b/tensorflow/core/kernels/maxpooling_op_gpu.h index 34203797cf..38ebb34248 100644 --- a/tensorflow/core/kernels/maxpooling_op_gpu.h +++ b/tensorflow/core/kernels/maxpooling_op_gpu.h @@ -39,7 +39,7 @@ struct MaxPoolForwardWithOptionalArgmax { const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_t, const int pad_l, T* top_data, int64* mask, - const Eigen::GpuDevice& d); + const Eigen::GpuDevice& d, bool propagate_nans); }; struct MaxPoolForwardNoMask_NCHW_VECT_C { diff --git a/tensorflow/core/kernels/mkl_tfconv_op.h b/tensorflow/core/kernels/mkl_tfconv_op.h index a240ee44fb..c4d5a45d3c 100644 --- a/tensorflow/core/kernels/mkl_tfconv_op.h +++ b/tensorflow/core/kernels/mkl_tfconv_op.h @@ -13,11 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifdef INTEL_MKL - #ifndef TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_ #define TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_ +#ifdef INTEL_MKL + #include #include #include "tensorflow/core/framework/numeric_op.h" @@ -35,6 +35,10 @@ limitations under the License. #include "mkl_dnn_types.h" #include "tensorflow/core/util/mkl_util.h" +#ifdef INTEL_MKL_DNN +using mkldnn::stream; +#endif + namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; @@ -57,6 +61,71 @@ class MklToTfOp : public OpKernel { VLOG(1) << "MKLToTFConversion complete successfully."; } +#ifdef INTEL_MKL_DNN + static void ConvertMklToTf(OpKernel* op_kernel, OpKernelContext* context, + string data_format_str, DataType op_data_type, + bool has_avx512f, uint input_number) { + try { + // Check that input tensor is in MKL format. + const Tensor& input_tensor = MklGetInput(context, input_number); + MklDnnShape input_shape; + GetMklShape(context, input_number, &input_shape); + + // if input is already in Tf format, then copy input tensor to output. + if (!input_shape.IsMklTensor()) { + context->set_output(input_number, input_tensor); + VLOG(1) << "MKLToTFConversion: No conversion needed, " + << "copying input to output"; + return; + } + + // Check that input data type is same as operator data type and that it + // is same as output data type. + DataType input_data_type = op_kernel->input_type(input_number); + DataType output_data_type = op_kernel->output_type(input_number); + CHECK_EQ(op_data_type, input_data_type); + CHECK_EQ(op_data_type, output_data_type); + + auto cpu_engine = engine(engine::cpu, 0); + MklDnnData input(&cpu_engine); + + // Get Mkl layout of input tensor. + auto input_mkl_md = input_shape.GetMklLayout(); + // Get TensorFlow layout of input tensor. Expected output of conversion + // has same layout as Tensorflow layout of input tensor. + auto output_tf_md = input_shape.GetTfLayout(); + auto output_tf_pd = memory::primitive_desc(output_tf_md, cpu_engine); + // Set input Mkl layout as the user layout. + input.SetUsrMem(input_mkl_md, &input_tensor); + + // Allocate output tensor. + TensorShape output_shape = input_shape.GetTfShape(); + Tensor* output_tensor = NULL; + OP_REQUIRES_OK(context, context->allocate_output( + input_number, output_shape, &output_tensor)); + CHECK_NOTNULL(output_tensor); + + // Do we need to reorder Mkl layout into TensorFlow layout? + if (input.IsReorderNeeded(output_tf_pd)) { + // Insert reorder between Mkl layout and TensorFlow layout. + std::vector net; + CHECK_EQ(input.CheckReorderToOpMem(output_tf_pd, output_tensor, &net), + true); + stream(stream::kind::eager).submit(net).wait(); + } else { + // If not, just forward input tensor to output tensor. + CHECK(output_tensor->CopyFrom(input_tensor, output_shape)); + } + } catch (mkldnn::error& e) { + string error_msg = "Status: " + std::to_string(e.status) + + ", message: " + std::string(e.message) + ", in file " + + std::string(__FILE__) + ":" + std::to_string(__LINE__); + OP_REQUIRES_OK( + context, + errors::Aborted("Operation received an exception:", error_msg)); + } + } +#else static void ConvertMklToTf(OpKernel* op_kernel, OpKernelContext* context, string data_format_str, DataType op_data_type, bool has_avx512f, uint input_number) { @@ -91,8 +160,8 @@ class MklToTfOp : public OpKernel { // Allocate output tensor. Tensor* output_tensor = NULL; - OP_REQUIRES_OK(context, - context->allocate_output(input_number, output_shape, &output_tensor)); + OP_REQUIRES_OK(context, context->allocate_output(input_number, output_shape, + &output_tensor)); dnnLayout_t output_layout = static_cast(input_shape.GetTfLayout()); @@ -106,6 +175,7 @@ class MklToTfOp : public OpKernel { output_buffer); VLOG(1) << "MKLToTFConversion complete successfully."; } +#endif private: /// Data format of the operation @@ -132,5 +202,5 @@ class MklToTfOp : public OpKernel { TF_CALL_NUMBER_TYPES(REGISTER_CPU); #undef REGISTER_CPU } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_ #endif // INTEL_MKL +#endif // TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_ diff --git a/tensorflow/core/kernels/ops_util.h b/tensorflow/core/kernels/ops_util.h index d3d1b56c9d..93ef512778 100644 --- a/tensorflow/core/kernels/ops_util.h +++ b/tensorflow/core/kernels/ops_util.h @@ -98,6 +98,19 @@ gtl::InlinedVector ComputeStride(const TensorShape& shape) { return strides; } +// Helper to compute 'strides' given an Eigen TensorDimensions +template +gtl::InlinedVector ComputeEigenStrides(const EigenDimensions& shape) { + const int ndims = shape.rank(); + gtl::InlinedVector strides(ndims); + T stride = 1; + for (int i = ndims - 1; i >= 0; --i) { + strides[i] = stride; + stride *= static_cast(shape[i]); + } + return strides; +} + } // namespace tensorflow #endif // TENSORFLOW_KERNELS_OPS_UTIL_H_ diff --git a/tensorflow/core/kernels/pooling_ops_common.cc b/tensorflow/core/kernels/pooling_ops_common.cc index 7dee751c4f..ac90f67ce0 100644 --- a/tensorflow/core/kernels/pooling_ops_common.cc +++ b/tensorflow/core/kernels/pooling_ops_common.cc @@ -143,7 +143,7 @@ void DnnPoolingOp::Compute( perftools::gputools::dnn::PoolingMode pooling_mode, const std::vector& size, const std::vector& stride, Padding padding, TensorFormat data_format, const Tensor& tensor_in, - const TensorShape& tensor_out_shape) { + const TensorShape& tensor_out_shape, bool propagate_nans) { Tensor* tensor_out = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, tensor_out_shape, &tensor_out)); @@ -188,7 +188,8 @@ void DnnPoolingOp::Compute( .set_vertical_stride(params.row_stride) .set_horizontal_stride(params.col_stride) .set_vertical_padding(params.pad_rows) - .set_horizontal_padding(params.pad_cols); + .set_horizontal_padding(params.pad_cols) + .set_propagate_nans(propagate_nans); perftools::gputools::dnn::BatchDescriptor input_desc; input_desc.set_count(params.tensor_in_batch) @@ -237,7 +238,7 @@ void DnnPoolingGradOp::Compute( const std::vector& size, const std::vector& stride, Padding padding, TensorFormat data_format, const Tensor* tensor_in, const Tensor* tensor_out, const Tensor& out_backprop, - const TensorShape& tensor_in_shape) { + const TensorShape& tensor_in_shape, bool propagate_nans) { CHECK((pooling_mode != perftools::gputools::dnn::PoolingMode::kMaximum) || (tensor_in && tensor_out)) << "For MaxPoolGrad, both tensor_in and tensor_out needs to be " @@ -327,7 +328,8 @@ void DnnPoolingGradOp::Compute( .set_vertical_stride(params.row_stride) .set_horizontal_stride(params.col_stride) .set_vertical_padding(params.pad_rows) - .set_horizontal_padding(params.pad_cols); + .set_horizontal_padding(params.pad_cols) + .set_propagate_nans(propagate_nans); perftools::gputools::dnn::BatchDescriptor orig_output_desc; orig_output_desc.set_count(params.tensor_in_batch) diff --git a/tensorflow/core/kernels/pooling_ops_common_gpu.h b/tensorflow/core/kernels/pooling_ops_common_gpu.h index b594f39fad..1458456585 100644 --- a/tensorflow/core/kernels/pooling_ops_common_gpu.h +++ b/tensorflow/core/kernels/pooling_ops_common_gpu.h @@ -44,7 +44,7 @@ class DnnPoolingOp { const std::vector& size, const std::vector& stride, Padding padding, TensorFormat data_format, const Tensor& tensor_in, - const TensorShape& tensor_out_shape); + const TensorShape& tensor_out_shape, bool propagate_nans); }; // A helper class that launch the cudnn pooling backward operations. @@ -60,7 +60,7 @@ class DnnPoolingGradOp { const std::vector& stride, Padding padding, TensorFormat data_format, const Tensor* tensor_in, const Tensor* tensor_out, const Tensor& out_backprop, - const TensorShape& tensor_in_shape); + const TensorShape& tensor_in_shape, bool propagate_nans); }; } // namespace tensorflow diff --git a/tensorflow/core/kernels/quantized_add_op.cc b/tensorflow/core/kernels/quantized_add_op.cc index 8be0c56798..337c8e5c17 100644 --- a/tensorflow/core/kernels/quantized_add_op.cc +++ b/tensorflow/core/kernels/quantized_add_op.cc @@ -489,7 +489,7 @@ class QuantizedAddOp : public OpKernel { // adding zero leaves the result unchanged, and to contain the largest of // the two input values with some room to spare. const float smallest_min = std::min(min_x, min_y); - const float largest_max = std::min(max_x, max_y); + const float largest_max = std::max(max_x, max_y); const float biggest_range = std::max(std::abs(smallest_min), std::abs(largest_max)); const float output_range = (biggest_range * (1 << 14)); diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc index a37c757865..55a8b9c9b6 100644 --- a/tensorflow/core/kernels/random_op.cc +++ b/tensorflow/core/kernels/random_op.cc @@ -577,7 +577,7 @@ struct FillPhiloxRandomKernel { const size_t kGroupSize = Distribution::kResultElementCount; const size_t item_id = item.get_global(0); - const size_t total_item_count = item.get_global_range(0); + const size_t total_item_count = item.get_global_range(); size_t offset = item_id * kGroupSize; gen_.Skip(item_id); @@ -633,7 +633,7 @@ struct FillPhiloxRandomKernel { PhiloxRandom::kResultElementCount; const size_t item_id = item.get_global(0); - const size_t total_item_count = item.get_global_range(0); + const size_t total_item_count = item.get_global_range(); size_t group_index = item_id; size_t offset = group_index * kGroupSize; diff --git a/tensorflow/core/kernels/segment_reduction_ops.cc b/tensorflow/core/kernels/segment_reduction_ops.cc index 4302a68a18..2334e50f1d 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.cc +++ b/tensorflow/core/kernels/segment_reduction_ops.cc @@ -376,6 +376,9 @@ struct UnsortedSegmentSumFunctor auto data_flat = typename TTypes::ConstTensor(data, N, data_size / N); for (int64 i = 0; i < N; ++i) { Index j = internal::SubtleMustCopy(segment_ids(i)); + if (j < 0) { + continue; + } OP_REQUIRES(ctx, FastBoundsCheck(j, output_rows), errors::InvalidArgument( "segment_ids", SliceDebugString(segment_ids_shape, i), diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h index 412c1d601d..b10bea72ba 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.h +++ b/tensorflow/core/kernels/segment_reduction_ops.h @@ -30,14 +30,14 @@ namespace functor { #ifdef GOOGLE_CUDA typedef Eigen::GpuDevice GPUDevice; // Functor for SegmentSumGPUOp. -// 'output_rows': the number of output segments (unique segment ids in +// output_rows: the number of output segments (unique segment ids in // 'segment_ids'). -// 'segment_ids_shape': shape of 'segment_ids' tensor. -// 'segment_ids': unsorted map from input to output segment ids at which to +// segment_ids_shape: shape of 'segment_ids' tensor. +// segment_ids: unsorted map from input to output segment ids at which to // perform segment sum operation. -// 'data_size': size of input data tensor. -// 'data': input data tensor. -// 'output': output reshaped to {output_rows, output.size/output_rows} +// data_size: size of input data tensor. +// data: input data tensor. +// output: output reshaped to {output_rows, output.size/output_rows} template struct SegmentSumFunctor { void operator()(OpKernelContext* ctx, const GPUDevice& d, @@ -61,14 +61,14 @@ struct UnsortedSegmentBaseFunctor{ }; // Functor for UnsortedSegmentSumOp. -// 'output_rows': the number of output segments (unique segment ids in +// output_rows: the number of output segments (unique segment ids in // 'segment_ids'). -// 'segment_ids_shape': shape of 'segment_ids' tensor. -// 'segment_ids': unsorted map from input to output segment ids at which to +// segment_ids_shape: shape of 'segment_ids' tensor. +// segment_ids: unsorted map from input to output segment ids at which to // perform segment sum operation. -// 'data_size': size of input data tensor. -// 'data': input data tensor. -// 'output': output reshaped to {output_rows, output.size/output_rows} +// data_size: size of input data tensor. +// data: input data tensor. +// output: output reshaped to {output_rows, output.size/output_rows} template struct UnsortedSegmentSumFunctor: public UnsortedSegmentBaseFunctor { void operator()(OpKernelContext* ctx, const Device& d, @@ -79,14 +79,14 @@ struct UnsortedSegmentSumFunctor: public UnsortedSegmentBaseFunctor struct UnsortedSegmentMaxFunctor: public UnsortedSegmentBaseFunctor { void operator()(OpKernelContext* ctx, const Device& d, diff --git a/tensorflow/core/kernels/shape_ops.cc b/tensorflow/core/kernels/shape_ops.cc index 721f9b949b..28a39bae3f 100644 --- a/tensorflow/core/kernels/shape_ops.cc +++ b/tensorflow/core/kernels/shape_ops.cc @@ -341,7 +341,12 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") .Device(DEVICE_CPU) .HostMemory("dim") .TypeConstraint("Tdim"), - ExpandDimsOp); + ExpandDimsOp); +REGISTER_KERNEL_BUILDER(Name("ExpandDims") + .Device(DEVICE_CPU) + .HostMemory("dim") + .TypeConstraint("Tdim"), + ExpandDimsOp); #if GOOGLE_CUDA #define REGISTER_GPU_KERNEL(type) \ @@ -350,7 +355,13 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") .TypeConstraint("T") \ .TypeConstraint("Tdim") \ .HostMemory("dim"), \ - ExpandDimsOp); + ExpandDimsOp); \ + REGISTER_KERNEL_BUILDER(Name("ExpandDims") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .TypeConstraint("Tdim") \ + .HostMemory("dim"), \ + ExpandDimsOp); TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL); TF_CALL_bool(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL @@ -362,7 +373,15 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") .HostMemory("input") .HostMemory("dim") .HostMemory("output"), - ExpandDimsOp); + ExpandDimsOp); +REGISTER_KERNEL_BUILDER(Name("ExpandDims") + .Device(DEVICE_GPU) + .TypeConstraint("T") + .TypeConstraint("Tdim") + .HostMemory("input") + .HostMemory("dim") + .HostMemory("output"), + ExpandDimsOp); #endif // GOOGLE_CUDA #ifdef TENSORFLOW_USE_SYCL @@ -372,7 +391,13 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") .TypeConstraint("T") \ .TypeConstraint("Tdim") \ .HostMemory("dim"), \ - ExpandDimsOp); + ExpandDimsOp); \ + REGISTER_KERNEL_BUILDER(Name("ExpandDims") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint("T") \ + .TypeConstraint("Tdim") \ + .HostMemory("dim"), \ + ExpandDimsOp); TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); TF_CALL_bool(REGISTER_SYCL_KERNEL); #undef REGISTER_SYCL_KERNEL @@ -384,7 +409,15 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") .HostMemory("input") .HostMemory("dim") .HostMemory("output"), - ExpandDimsOp); + ExpandDimsOp); +REGISTER_KERNEL_BUILDER(Name("ExpandDims") + .Device(DEVICE_SYCL) + .TypeConstraint("T") + .TypeConstraint("Tdim") + .HostMemory("input") + .HostMemory("dim") + .HostMemory("output"), + ExpandDimsOp); #endif // TENSORFLOW_USE_SYCL // Squeeze --------------------------------------- diff --git a/tensorflow/core/kernels/shape_ops.h b/tensorflow/core/kernels/shape_ops.h index ac607f4e8b..55be308901 100644 --- a/tensorflow/core/kernels/shape_ops.h +++ b/tensorflow/core/kernels/shape_ops.h @@ -145,6 +145,7 @@ class SizeOp : public OpKernel { bool IsExpensive() override { return false; } }; +template class ExpandDimsOp : public OpKernel { public: explicit ExpandDimsOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} @@ -153,7 +154,7 @@ class ExpandDimsOp : public OpKernel { OP_REQUIRES(ctx, ctx->input(0).dtype() != DT_VARIANT, errors::InvalidArgument("ExpandDims on Variant not supported")); - int32 dim = ctx->input(1).flat()(0); + Tdim dim = ctx->input(1).flat()(0); OP_REQUIRES( ctx, (dim >= -1 - ctx->input(0).dims() && dim <= ctx->input(0).dims()), errors::InvalidArgument("Tried to expand dim index ", dim, @@ -175,7 +176,7 @@ class ExpandDimsOp : public OpKernel { } // Clamp to the end if needed. - dim = std::min(dim, existing_dims_size); + dim = std::min(dim, existing_dims_size); new_shape.emplace(new_shape.begin() + dim, 1); const TensorShape output_shape(new_shape); diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc index 73b6d4cf6a..8fc40db3cc 100644 --- a/tensorflow/core/kernels/strided_slice_op.cc +++ b/tensorflow/core/kernels/strided_slice_op.cc @@ -427,7 +427,6 @@ REGISTER_STRIDED_SLICE(bfloat16); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); -TF_CALL_int64(REGISTER_GPU); // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel diff --git a/tensorflow/core/kernels/transpose_op.cc b/tensorflow/core/kernels/transpose_op.cc index 20f0edf309..96c051c636 100644 --- a/tensorflow/core/kernels/transpose_op.cc +++ b/tensorflow/core/kernels/transpose_op.cc @@ -31,13 +31,14 @@ limitations under the License. namespace tensorflow { -// inv = InvertPermutationOp(T p) takes a permutation of +// inv = InvertPermutationOp(T p) takes a permutation of // integers 0, 1, ..., n - 1 and returns the inverted // permutation of p. I.e., inv[p[i]] == i, for i in [0 .. n). // -// REQUIRES: input is a vector of int32. +// REQUIRES: input is a vector of int32 or int64. // REQUIRES: input is a permutation of 0, 1, ..., n-1. +template class InvertPermutationOp : public OpKernel { public: explicit InvertPermutationOp(OpKernelConstruction* context) @@ -48,20 +49,19 @@ class InvertPermutationOp : public OpKernel { OP_REQUIRES( context, TensorShapeUtils::IsVector(input.shape()), errors::InvalidArgument("invert_permutation expects a 1D vector.")); - auto Tin = input.vec(); + auto Tin = input.vec(); OP_REQUIRES(context, FastBoundsCheck(Tin.size(), std::numeric_limits::max()), errors::InvalidArgument("permutation of nonnegative int32s " "must have <= int32 max elements")); - const int32 N = - static_cast(Tin.size()); // Safe: bounds-checked above. + const T N = static_cast(Tin.size()); // Safe: bounds-checked above. Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, input.shape(), &output)); - auto Tout = output->vec(); + auto Tout = output->vec(); std::fill_n(Tout.data(), N, -1); for (int i = 0; i < N; ++i) { - const int32 d = internal::SubtleMustCopy(Tin(i)); + const T d = internal::SubtleMustCopy(Tin(i)); OP_REQUIRES(context, FastBoundsCheck(d, N), errors::InvalidArgument(d, " is not between 0 and ", N)); OP_REQUIRES(context, Tout(d) == -1, @@ -73,14 +73,23 @@ class InvertPermutationOp : public OpKernel { REGISTER_KERNEL_BUILDER( Name("InvertPermutation").Device(DEVICE_CPU).TypeConstraint("T"), - InvertPermutationOp); + InvertPermutationOp); +REGISTER_KERNEL_BUILDER( + Name("InvertPermutation").Device(DEVICE_CPU).TypeConstraint("T"), + InvertPermutationOp); REGISTER_KERNEL_BUILDER(Name("InvertPermutation") .Device(DEVICE_GPU) .TypeConstraint("T") .HostMemory("x") .HostMemory("y"), - InvertPermutationOp); + InvertPermutationOp); +REGISTER_KERNEL_BUILDER(Name("InvertPermutation") + .Device(DEVICE_GPU) + .TypeConstraint("T") + .HostMemory("x") + .HostMemory("y"), + InvertPermutationOp); #ifdef TENSORFLOW_USE_SYCL REGISTER_KERNEL_BUILDER(Name("InvertPermutation") @@ -88,7 +97,13 @@ REGISTER_KERNEL_BUILDER(Name("InvertPermutation") .TypeConstraint("T") .HostMemory("x") .HostMemory("y"), - InvertPermutationOp); + InvertPermutationOp); +REGISTER_KERNEL_BUILDER(Name("InvertPermutation") + .Device(DEVICE_SYCL) + .TypeConstraint("T") + .HostMemory("x") + .HostMemory("y"), + InvertPermutationOp); #endif // TENSORFLOW_USE_SYCL namespace { diff --git a/tensorflow/core/kernels/unique_op.cc b/tensorflow/core/kernels/unique_op.cc index 701c5f6d2b..d087784c8a 100644 --- a/tensorflow/core/kernels/unique_op.cc +++ b/tensorflow/core/kernels/unique_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include #include #include @@ -21,6 +22,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/hash/hash.h" namespace tensorflow { @@ -33,8 +35,6 @@ class UniqueOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& input = context->input(0); - OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()), - errors::InvalidArgument("unique expects a 1D vector.")); // TODO(dga): Make unique polymorphic for returning int32 and int64 // vectors to support large tensors. OP_REQUIRES(context, @@ -42,31 +42,102 @@ class UniqueOp : public OpKernel { errors::InvalidArgument( "unique does not support input tensors larger than ", std::numeric_limits::max(), " elements")); - auto Tin = input.vec(); - const int64 N = static_cast(Tin.size()); + + int64 axis = 0; + std::vector new_sizes{1, input.NumElements(), 1}; + if (context->num_inputs() == 1) { + OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()), + errors::InvalidArgument("unique expects a 1D vector.")); + } else { + // In case of UniqueV2, the axis is a 1D vector. The purpose is + // to allow specifying either "no axis" or "axis". The `[]` means + // "no axis", while `[x]` means `axis = x`. + const Tensor& axis_tensor = context->input(1); + OP_REQUIRES(context, TensorShapeUtils::IsVector(axis_tensor.shape()), + errors::InvalidArgument("axis expects a 1D vector.")); + OP_REQUIRES( + context, axis_tensor.NumElements() <= 1, + errors::InvalidArgument( + "axis does not support input tensors larger than 1 elements")); + if (axis_tensor.NumElements() == 0) { + OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()), + errors::InvalidArgument("unique expects a 1D vector.")); + } else { + auto axis_vec = axis_tensor.vec(); + axis = axis_vec(0); + axis = axis < 0 ? axis + input.dims() : axis; + OP_REQUIRES(context, 0 <= axis && axis < input.dims(), + errors::InvalidArgument("axis has to be between [0, ", + input.dims(), ")")); + if (axis > 0) { + for (int64 i = 0; i < axis; i++) { + new_sizes[0] *= input.dim_size(i); + } + } + new_sizes[1] = input.dim_size(axis); + if (axis + 1 < input.dims()) { + for (int64 i = axis + 1; i < input.dims(); i++) { + new_sizes[2] *= input.dim_size(i); + } + } + } + } + + auto Tin = input.shaped(new_sizes); Tensor* idx = nullptr; - OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {0}, 1, input.shape(), &idx)); + OP_REQUIRES_OK(context, context->allocate_output( + 1, TensorShape({Tin.dimension(1)}), &idx)); auto idx_vec = idx->template vec(); - std::unordered_map uniq; - uniq.reserve(2 * N); - for (int64 i = 0, j = 0; i < N; ++i) { - auto it = uniq.insert(std::make_pair(Tin(i), j)); + auto hash_fn = [&Tin](const int64& key) -> unsigned long { + size_t h = 0; + for (int64 i = 0; i < Tin.dimension(0); i++) { + for (int64 j = 0; j < Tin.dimension(2); j++) { + h = Hash64Combine(h, hash{}(Tin(i, key, j))); + } + } + return h; + }; + + auto equal_to_fn = [&Tin](const int64& lhs, const int64& rhs) { + for (int64 i = 0; i < Tin.dimension(0); i++) { + for (int64 j = 0; j < Tin.dimension(2); j++) { + if (Tin(i, lhs, j) != Tin(i, rhs, j)) { + return false; + } + } + } + return true; + }; + + std::unordered_map + uniq(0, hash_fn, equal_to_fn); + + uniq.reserve(2 * Tin.dimension(1)); + + for (int64 i = 0, j = 0; i < Tin.dimension(1); ++i) { + auto it = uniq.insert(std::make_pair(i, j)); idx_vec(i) = it.first->second; if (it.second) { ++j; } } + int64 uniq_size = static_cast(uniq.size()); + new_sizes[1] = uniq_size; + TensorShape output_shape(input.shape()); + output_shape.set_dim(axis, uniq_size); Tensor* output = nullptr; - OP_REQUIRES_OK(context, context->allocate_output( - 0, TensorShape({uniq_size}), &output)); - auto output_vec = output->template vec(); + OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output)); + auto Tout = output->shaped(new_sizes); for (auto it : uniq) { - output_vec(it.second) = it.first; + for (int64 i = 0; i < Tin.dimension(0); i++) { + for (int64 j = 0; j < Tin.dimension(2); j++) { + Tout(i, it.second, j) = Tin(i, it.first, j); + } + } } if (num_outputs() > 2) { @@ -74,7 +145,7 @@ class UniqueOp : public OpKernel { 2, TensorShape({uniq_size}), &output)); auto count_output_vec = output->template vec(); count_output_vec.setZero(); - for (int64 i = 0; i < N; ++i) { + for (int64 i = 0; i < Tin.dimension(1); ++i) { count_output_vec(idx_vec(i))++; } } @@ -92,6 +163,16 @@ class UniqueOp : public OpKernel { .TypeConstraint("T") \ .TypeConstraint("out_idx"), \ UniqueOp); \ + REGISTER_KERNEL_BUILDER(Name("UniqueV2") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("out_idx"), \ + UniqueOp); \ + REGISTER_KERNEL_BUILDER(Name("UniqueV2") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("out_idx"), \ + UniqueOp); \ REGISTER_KERNEL_BUILDER(Name("UniqueWithCounts") \ .Device(DEVICE_CPU) \ .TypeConstraint("T") \ @@ -176,5 +257,5 @@ REGISTER_KERNEL_BUILDER(Name("Unique") .HostMemory("y") .HostMemory("idx"), UniqueOp); -#endif // TENSORFLOW_USE_SYCL +#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index be2916f154..9fa6423d59 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -723,7 +723,9 @@ y: a tensor of the same shape and type as x but filled with zeros. REGISTER_OP("OnesLike") .Input("x: T") .Output("y: T") - .Attr("T: {float, double, int32, int64, complex64, complex128}") + .Attr( + "T: {float, double, int8, uint8, int16, uint16, int32, int64, " + "complex64, complex128, bool}") .SetShapeFn(shape_inference::UnchangedShape) .Doc(R"doc( Returns a tensor of ones with the same shape and type as x. @@ -2031,6 +2033,46 @@ y: 1-D. idx: 1-D. )doc"); +REGISTER_OP("UniqueV2") + .Input("x: T") + .Input("axis: int64") + .Output("y: T") + .Output("idx: out_idx") + .Attr("T: type") + .Attr("out_idx: {int32, int64} = DT_INT32") + .SetShapeFn([](InferenceContext* c) { + c->set_output(0, c->Vector(InferenceContext::kUnknownDim)); + c->set_output(1, c->input(0)); + return Status::OK(); + }) + .Doc(R"doc( +Finds unique elements in a 1-D tensor. + +This operation returns a tensor `y` containing all of the unique elements of `x` +sorted in the same order that they occur in `x`. This operation also returns a +tensor `idx` the same size as `x` that contains the index of each value of `x` +in the unique output `y`. In other words: + +`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` + +For example: + +``` +# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] +y, idx = unique(x) +y ==> [1, 2, 4, 7, 8] +idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] +``` + + +x: A `Tensor`. +axis: A `Tensor` of type `int64` (default: 0). The axis of the Tensor to + find the unique elements. +y: A `Tensor`. Unique elements along the `axis` of `Tensor` x. +idx: A 1-D Tensor. Has the same type as x that contains the index of each + value of x in the output y. +)doc"); + // -------------------------------------------------------------------------- REGISTER_OP("UniqueWithCounts") .Input("x: T") diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 7b10af9f44..d30b847696 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1829,6 +1829,8 @@ need not be sorted and need not cover all values in the full range of valid values. If the sum is empty for a given segment ID `i`, `output[i] = 0`. +If the given segment ID `i` is negative, the value is dropped and will not be +added to the sum of the segment. `num_segments` should equal the number of distinct segment IDs. diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index e245c8ba91..a242a13878 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -819,7 +819,7 @@ REGISTER_OP("DepthwiseConv2dNative") .Input("input: T") .Input("filter: T") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {half, float, double}") .Attr("strides: list(int)") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) @@ -945,7 +945,7 @@ REGISTER_OP("Conv3D") .Input("input: T") .Input("filter: T") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {half, float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) @@ -977,7 +977,7 @@ REGISTER_OP("Conv3DBackpropInput") .Input("filter: T") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {half, float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Deprecated(10, "Use Conv3DBackpropInputV2") @@ -1003,7 +1003,7 @@ REGISTER_OP("Conv3DBackpropFilter") .Input("filter: T") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {half, float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Deprecated(10, "Use Conv3DBackpropFilterV2") @@ -1032,7 +1032,7 @@ REGISTER_OP("Conv3DBackpropInputV2") .Input("filter: T") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {half, float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) @@ -1069,7 +1069,7 @@ REGISTER_OP("Conv3DBackpropFilterV2") .Input("filter_sizes: int32") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {half, float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 6ce0b70c9d..9c41957ae6 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -5449,6 +5449,7 @@ op { type: "type" allowed_values { list { + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } @@ -5515,6 +5516,7 @@ op { type: "type" allowed_values { list { + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } @@ -5570,6 +5572,7 @@ op { type: "type" allowed_values { list { + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } @@ -5635,6 +5638,7 @@ op { type: "type" allowed_values { list { + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } @@ -5690,6 +5694,7 @@ op { type: "type" allowed_values { list { + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD index f746b15fee..f2fadb4558 100644 --- a/tensorflow/core/platform/default/build_config/BUILD +++ b/tensorflow/core/platform/default/build_config/BUILD @@ -12,6 +12,7 @@ load("//tensorflow:tensorflow.bzl", "tf_copts") load("//tensorflow:tensorflow.bzl", "tf_cuda_library") load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static") load("@local_config_sycl//sycl:platform.bzl", "sycl_library_path") +load("@local_config_sycl//sycl:build_defs.bzl", "if_ccpp") cc_library( name = "gtest", @@ -194,17 +195,16 @@ cc_library( cc_library( name = "sycl", - data = [ + data = if_ccpp([ "@local_config_sycl//sycl:{}".format(sycl_library_path("ComputeCpp")), - ], - linkopts = select({ - "//conditions:default": [ - "-Wl,-rpath,../local_config_sycl/sycl/lib", - ], - }), - deps = [ - "@local_config_sycl//sycl:syclrt", - ], + ]), + linkopts = if_ccpp([ + "-Wl,-rpath,../local_config_sycl/sycl/lib", + ]), + deps = if_ccpp( + ["@local_config_sycl//sycl:syclrt"], + ["@local_config_sycl//sycl:sycl_headers"], + ), ) filegroup( diff --git a/tensorflow/core/platform/default/notification.h b/tensorflow/core/platform/default/notification.h index 6a214dbd0a..5c401b7477 100644 --- a/tensorflow/core/platform/default/notification.h +++ b/tensorflow/core/platform/default/notification.h @@ -73,7 +73,7 @@ class Notification { } mutex mu_; // protects mutations of notified_ - condition_variable cv_; // signalled when notified_ becomes non-zero + condition_variable cv_; // signaled when notified_ becomes non-zero std::atomic notified_; // mutations under mu_ }; diff --git a/tensorflow/core/platform/posix/error.cc b/tensorflow/core/platform/posix/error.cc index e9baad5422..cda6d7d8f9 100644 --- a/tensorflow/core/platform/posix/error.cc +++ b/tensorflow/core/platform/posix/error.cc @@ -72,7 +72,7 @@ error::Code ErrnoToCode(int err_number) { case EBUSY: // Device or resource busy case ECHILD: // No child processes case EISCONN: // Socket is connected -#if !defined(_WIN32) +#if !defined(_WIN32) && !defined(__HAIKU__) case ENOTBLK: // Block device required #endif case ENOTCONN: // The socket is not connected @@ -94,7 +94,7 @@ error::Code ErrnoToCode(int err_number) { case ENODATA: // No message is available on the STREAM read queue case ENOMEM: // Not enough space case ENOSR: // No STREAM resources -#if !defined(_WIN32) +#if !defined(_WIN32) && !defined(__HAIKU__) case EUSERS: // Too many users #endif code = error::RESOURCE_EXHAUSTED; @@ -111,7 +111,7 @@ error::Code ErrnoToCode(int err_number) { case EPFNOSUPPORT: // Protocol family not supported #endif case EPROTONOSUPPORT: // Protocol not supported -#if !defined(_WIN32) +#if !defined(_WIN32) && !defined(__HAIKU__) case ESOCKTNOSUPPORT: // Socket type not supported #endif case EXDEV: // Improper link @@ -131,7 +131,8 @@ error::Code ErrnoToCode(int err_number) { case ENETUNREACH: // Network unreachable case ENOLCK: // No locks available case ENOLINK: // Link has been severed -#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32)) +#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32) || \ + defined(__HAIKU__)) case ENONET: // Machine is not on the network #endif code = error::UNAVAILABLE; @@ -156,7 +157,7 @@ error::Code ErrnoToCode(int err_number) { case ENOEXEC: // Exec format error case ENOMSG: // No message of the desired type case EPROTO: // Protocol error -#if !defined(_WIN32) +#if !defined(_WIN32) && !defined(__HAIKU__) case EREMOTE: // Object is remote #endif code = error::UNKNOWN; diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc index 6cba40ccfc..614ee00b01 100644 --- a/tensorflow/core/platform/posix/port.cc +++ b/tensorflow/core/platform/posix/port.cc @@ -37,7 +37,8 @@ limitations under the License. #ifdef TF_USE_SNAPPY #include "snappy.h" #endif -#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) +#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) || \ + defined(__HAIKU__) #include #endif @@ -61,7 +62,8 @@ int NumSchedulableCPUs() { } perror("sched_getaffinity"); #endif -#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) +#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) || \ + defined(__HAIKU__) unsigned int count = std::thread::hardware_concurrency(); if (count > 0) return static_cast(count); #endif diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 1bf9c93101..ec077c4283 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -24,7 +24,7 @@ limitations under the License. // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "-rc1" +#define TF_VERSION_SUFFIX "" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/core/util/cuda_kernel_helper.h b/tensorflow/core/util/cuda_kernel_helper.h index 8fa0dfbed9..cf11f419a4 100644 --- a/tensorflow/core/util/cuda_kernel_helper.h +++ b/tensorflow/core/util/cuda_kernel_helper.h @@ -752,6 +752,12 @@ __device__ EIGEN_ALWAYS_INLINE T CudaShuffleDown(unsigned mask, T value, return __shfl_down_sync(mask, value, delta, width); } +__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleDown( + unsigned mask, Eigen::half value, int delta, int width = warpSize) { + return Eigen::half( + __shfl_down_sync(mask, static_cast(value), delta, width)); +} + // Variant of the (undocumented) version from the CUDA SDK, but using unsigned // instead of float for lo and hi (which is incorrect with ftz, for example). // A bug has been filed with NVIDIA and will be fixed in the next CUDA release. @@ -774,6 +780,12 @@ __device__ EIGEN_ALWAYS_INLINE T CudaShuffleXor(unsigned mask, T value, return __shfl_xor_sync(mask, value, laneMask, width); } +__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleXor( + unsigned mask, Eigen::half value, int laneMask, int width = warpSize) { + return Eigen::half( + __shfl_xor_sync(mask, static_cast(value), laneMask, width)); +} + // Variant of the (undocumented) version from the CUDA SDK, but using unsigned // instead of float for lo and hi (which is incorrect with ftz, for example). // A bug has been filed with NVIDIA and will be fixed in the next CUDA release. diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 1bfa4f83a3..148c7851bd 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -24,10 +24,9 @@ limitations under the License. #include "mkl_dnn_types.h" #include "mkl_service.h" #include "mkl_trans.h" +#include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" - -#include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/graph/mkl_graph_util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" @@ -38,6 +37,12 @@ limitations under the License. #ifdef INTEL_MKL_DNN #include "mkldnn.hpp" + +using mkldnn::engine; +using mkldnn::memory; +using mkldnn::padding_kind; +using mkldnn::primitive; +using mkldnn::reorder; #endif // The file contains a number of utility classes and functions used by MKL @@ -51,6 +56,14 @@ namespace tensorflow { // Tensorflow tensor. typedef enum { W = 0, H = 1, C = 2, N = 3 } MklDims; +typedef enum { + Dim_N = 0, + Dim_C = 1, + Dim_H = 2, + Dim_W = 3, + Dim_O = 0, + Dim_I = 1 +} MklDnnDims; class MklShape { public: @@ -143,7 +156,9 @@ class MklShape { size_t GetDimension() const { return dimension_; } const size_t* GetSizes() const { return sizes_; } int64 dim_size(int index) const { return sizes_[index]; } - int64 tf_dim_size(int index) const { return sizes_[tf_to_mkl_dim_map_[index]]; } + int64 tf_dim_size(int index) const { + return sizes_[tf_to_mkl_dim_map_[index]]; + } const size_t* GetStrides() const { return strides_; } const size_t* GetTfToMklDimMap() const { return tf_to_mkl_dim_map_; } size_t tf_dim_idx(int index) const { return tf_to_mkl_dim_map_[index]; } @@ -309,6 +324,260 @@ class MklShape { nullptr; // TF dimension corresponding to this MKL dimension }; +#ifdef INTEL_MKL_DNN + +// Forward decl +TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format); + +class MklDnnShape { + private: + typedef struct { + /// Flag to indicate if the tensor is an MKL tensor or not + bool is_mkl_tensor_ = false; + /// Number of dimensions in Tensorflow format + size_t dimension_ = 0; + /// Required by MKLDNN for conversions + mkldnn_dims_t sizes_; // Required by MKL for conversions + memory::format tf_data_format_ = memory::format::format_undef; + memory::data_type T_ = memory::data_type::data_undef; + // MKL layout + mkldnn_memory_desc_t mkl_md_; + /// TF dimension corresponding to this MKL dimension + mkldnn_dims_t map_; + } MklShapeData; + MklShapeData data_; + + typedef std::remove_extent::type mkldnn_dim_t; +#define INVALID_DIM_SIZE -1 + + public: + MklDnnShape() { + for (size_t i = 0; i < sizeof(data_.sizes_) / sizeof(data_.sizes_[0]); + ++i) { + data_.sizes_[i] = -1; + } + for (size_t i = 0; i < sizeof(data_.map_) / sizeof(data_.map_[0]); ++i) { + data_.map_[i] = -1; + } + } + + ~MklDnnShape() {} + TF_DISALLOW_COPY_AND_ASSIGN(MklDnnShape); // Cannot copy + + inline const bool IsMklTensor() const { return data_.is_mkl_tensor_; } + inline void SetMklTensor(bool is_mkl_tensor) { + data_.is_mkl_tensor_ = is_mkl_tensor; + } + + inline void SetDimensions(const size_t dimension) { + data_.dimension_ = dimension; + } + inline size_t GetDimension(char dimension) const { + int index = GetMklDnnTensorDimIndex(dimension); + CHECK(index >= 0 && index < this->GetDimension()) + << "Invalid index from the dimension: " << index << ", " << dimension; + return this->DimSize(index); + } + + inline int32 GetMklDnnTensorDimIndex(char dimension) const { + switch (dimension) { + case 'N': + return MklDnnDims::Dim_N; + case 'C': + return MklDnnDims::Dim_C; + case 'H': + return MklDnnDims::Dim_H; + case 'W': + return MklDnnDims::Dim_W; + default: + LOG(FATAL) << "Invalid dimension: " << dimension; + return -1; // Avoid compiler warning about missing return value + } + } + + inline size_t GetDimension() const { return data_.dimension_; } + inline const int* GetSizes() const { + return reinterpret_cast(&data_.sizes_[0]); + } + + // Returns an mkldnn::memory::dims object that contains the sizes of this + // MklDnnShape object. + inline memory::dims GetSizesAsMklDnnDims() const { + memory::dims retVal; + if (data_.is_mkl_tensor_) { + int dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]); + for (size_t i = 0; i < dimensions; i++) { + if (data_.sizes_[i] != INVALID_DIM_SIZE) + retVal.push_back(data_.sizes_[i]); + } + } else { + CHECK_EQ(data_.is_mkl_tensor_, true); + } + return retVal; + } + + inline int64 DimSize(int index) const { + CHECK_LT(index, sizeof(data_.sizes_) / sizeof(data_.sizes_[0])); + return data_.sizes_[index]; + } + + /// Return TensorShape that describes the Tensorflow shape of the tensor + /// represented by this MklShape. + inline TensorShape GetTfShape() { + CHECK_EQ(data_.is_mkl_tensor_, true); + + std::vector shape(data_.dimension_, -1); + for (size_t idx = 0; idx < data_.dimension_; ++idx) { + shape[idx] = data_.sizes_[TfDimIdx(idx)]; + } + + TensorShape ts; + bool ret = TensorShapeUtils::MakeShape(shape, &ts).ok(); + CHECK_EQ(ret, true); + return ts; + } + + inline void SetElemType(memory::data_type dt) { data_.T_ = dt; } + inline const memory::data_type GetElemType() { return data_.T_; } + + inline void SetMklLayout(memory::primitive_desc* pd) { + CHECK_NOTNULL(pd); + data_.mkl_md_ = pd->desc().data; + } + inline const memory::desc GetMklLayout() const { + return memory::desc(data_.mkl_md_); + } + + inline memory::format GetTfDataFormat() const { + return data_.tf_data_format_; + } + /// We don't create primitive_descriptor for TensorFlow layout now. + /// We use lazy evaluation and create it only when needed. + inline void SetTfLayout(size_t dims, const memory::dims& sizes, + memory::format format) { + CHECK_EQ(dims, sizes.size()); + data_.dimension_ = dims; + for (size_t ii = 0; ii < dims; ii++) { + data_.sizes_[ii] = sizes[ii]; + } + data_.tf_data_format_ = format; + SetTfDimOrder(dims, format); + } + inline const memory::desc GetTfLayout() const { + memory::dims dims; + for (size_t ii = 0; ii < data_.dimension_; ii++) { + dims.push_back(data_.sizes_[ii]); + } + return memory::desc(dims, data_.T_, data_.tf_data_format_); + } + inline const memory::desc GetCurLayout() const { + return IsMklTensor() ? GetMklLayout() : GetTfLayout(); + } + + // nhasabni - I've removed SetTfDimOrder that was setting default order in + // case of MKL-ML. We don't need a case of default dimension order because + // when an operator that does not get data_format attribute gets all inputs + // in Tensorflow format, it will produce output in Tensorflow format. + inline void SetTfDimOrder(const size_t dimension, const mkldnn_dims_t map) { + CHECK(dimension == data_.dimension_); + for (size_t ii = 0; ii < dimension; ii++) { + data_.map_[ii] = map[ii]; + } + } + + inline void SetTfDimOrder(const size_t dimension, TensorFormat data_format) { + // TODO(nhasabni): Why do we restrict this to 4D? + CHECK_EQ(dimension, 4); + CHECK(dimension == data_.dimension_); + data_.map_[GetTensorDimIndex<2>(data_format, 'W')] = MklDnnDims::Dim_W; + data_.map_[GetTensorDimIndex<2>(data_format, 'H')] = MklDnnDims::Dim_H; + data_.map_[GetTensorDimIndex<2>(data_format, 'C')] = MklDnnDims::Dim_C; + data_.map_[GetTensorDimIndex<2>(data_format, 'N')] = MklDnnDims::Dim_N; + } + + inline void SetTfDimOrder(const size_t dimension, memory::format format) { + TensorFormat data_format = MklDnnDataFormatToTFDataFormat(format); + SetTfDimOrder(dimension, data_format); + } + + inline const mkldnn_dim_t* GetTfToMklDimMap() const { return &data_.map_[0]; } + inline size_t TfDimIdx(int index) const { return data_.map_[index]; } + inline int64 TfDimSize(int index) const { + return data_.sizes_[TfDimIdx(index)]; + } + + /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd' + /// corresponds to MKL's Channel dimension. + inline bool IsMklChannelDim(int d) const { + return TfDimIdx(d) == MklDnnDims::Dim_C; + } + /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd' + /// corresponds to MKL's Batch dimension. + inline bool IsMklBatchDim(int d) const { + return TfDimIdx(d) == MklDnnDims::Dim_N; + } + /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd' + /// corresponds to MKL's Width dimension. + inline bool IsMklWidthDim(int d) const { + return TfDimIdx(d) == MklDnnDims::Dim_W; + } + /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd' + /// corresponds to MKL's Height dimension. + inline bool IsMklHeightDim(int d) const { + return TfDimIdx(d) == MklDnnDims::Dim_H; + } + + /// Check if the TF-Mkl dimension ordering map specifies if the input + /// tensor is in NCHW format. + inline bool IsTensorInNCHWFormat() const { + TensorFormat data_format = FORMAT_NCHW; + return (IsMklBatchDim(GetTensorDimIndex<2>(data_format, 'N')) && + IsMklChannelDim(GetTensorDimIndex<2>(data_format, 'C')) && + IsMklHeightDim(GetTensorDimIndex<2>(data_format, 'H')) && + IsMklWidthDim(GetTensorDimIndex<2>(data_format, 'W'))); + } + + /// Check if the TF-Mkl dimension ordering map specifies if the input + /// tensor is in NHWC format. + inline bool IsTensorInNHWCFormat() const { + TensorFormat data_format = FORMAT_NHWC; + return (IsMklBatchDim(GetTensorDimIndex<2>(data_format, 'N')) && + IsMklChannelDim(GetTensorDimIndex<2>(data_format, 'C')) && + IsMklHeightDim(GetTensorDimIndex<2>(data_format, 'H')) && + IsMklWidthDim(GetTensorDimIndex<2>(data_format, 'W'))); + } + + /// The following methods are used for serializing and de-serializing the + /// contents of the mklshape object. + /// The data is serialized in this order + /// is_mkl_tensor_ : dimension_ : sizes_ : map_: format_ : T_ : mkl_pd_; + + /// Size of buffer to hold the serialized object, the size is computed by + /// following above mentioned order + inline size_t GetSerializeBufferSize() const { return sizeof(MklShapeData); } + + void SerializeMklDnnShape(unsigned char* buf, size_t buf_size) const { + CHECK(buf_size >= GetSerializeBufferSize()) + << "Buffer size is too small to SerializeMklDnnShape"; + *reinterpret_cast(buf) = data_; + } + + void DeSerializeMklDnnShape(const unsigned char* buf, size_t buf_size) { + // Make sure buffer holds at least is_mkl_tensor_. + CHECK(buf_size >= sizeof(data_.is_mkl_tensor_)) + << "Buffer size is too small in DeSerializeMklDnnShape"; + + const bool is_mkl_tensor = *reinterpret_cast(buf); + if (is_mkl_tensor) { // If it is an MKL Tensor then read the rest + CHECK(buf_size >= GetSerializeBufferSize()) + << "Buffer size is too small in DeSerializeMklDnnShape"; + data_ = *reinterpret_cast(buf); + } + } +}; + +#endif + // List of MklShape objects. Used in Concat/Split layers. typedef std::vector MklShapeList; @@ -347,6 +616,36 @@ inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor, return output_tensor; } +#ifdef INTEL_MKL_DNN +template +inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor, + const MklDnnShape& mkl_shape) { + Tensor output_tensor; + TensorShape output_shape; + +#if 0 + // TODO(nhasabni): need to implement + for (size_t j = 0; j < mkl_shape.GetDimension(); j++) { + // Outermost to innermost dimension + output_shape.AddDim(mkl_shape.GetSizes()[mkl_shape.tf_dim_idx(j)]); + } + + // Allocate output tensor. + context->allocate_temp(DataTypeToEnum::v(), output_shape, &output_tensor); + + dnnLayout_t output_layout = static_cast(mkl_shape.GetTfLayout()); + void* input_buffer = const_cast(mkl_tensor.flat().data()); + void* output_buffer = const_cast(output_tensor.flat().data()); + + if (mkl_tensor.NumElements() != 0) { + mkl_shape.GetConvertedFlatData(output_layout, input_buffer, output_buffer); + } +#endif + + return output_tensor; +} +#endif + // Get the MKL shape from the second string tensor inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) { mklshape->DeSerializeMklShape( @@ -359,6 +658,19 @@ inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) { sizeof(uint8)); } +#ifdef INTEL_MKL_DNN +inline void GetMklShape(OpKernelContext* ctext, int n, MklDnnShape* mklshape) { + mklshape->DeSerializeMklDnnShape( + ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs())) + .flat() + .data(), + ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs())) + .flat() + .size() * + sizeof(uint8)); +} +#endif + // Gets the actual input inline const Tensor& MklGetInput(OpKernelContext* ctext, int n) { return ctext->input(GetTensorDataIndex(n, ctext->num_inputs())); @@ -382,6 +694,26 @@ inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name, } } +#ifdef INTEL_MKL_DNN +/// Get shape of input tensor pointed by 'input_idx' in TensorShape format. +/// If the input tensor is in MKL layout, then obtains TensorShape from +/// MklShape. +inline TensorShape GetTfShape(OpKernelContext* context, size_t input_idx) { + // Sanity check. + CHECK_NOTNULL(context); + CHECK_LT(input_idx, context->num_inputs()); + + MklDnnShape input_mkl_shape; + GetMklShape(context, input_idx, &input_mkl_shape); + if (input_mkl_shape.IsMklTensor()) { + return input_mkl_shape.GetTfShape(); + } else { + const Tensor& t = MklGetInput(context, input_idx); + return t.shape(); + } +} +#endif + // Allocate the second output tensor that will contain // the MKL shape serialized inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, @@ -397,6 +729,23 @@ inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, second_tensor->flat().size() * sizeof(uint8)); } +#ifdef INTEL_MKL_DNN +// Allocate the second output tensor that will contain +// the MKL shape serialized +inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, + const MklDnnShape& mkl_shape) { + Tensor* second_tensor = nullptr; + TensorShape second_shape; + second_shape.AddDim(mkl_shape.GetSerializeBufferSize()); + OP_REQUIRES_OK(ctext, ctext->allocate_output( + GetTensorMetaDataIndex(n, ctext->num_outputs()), + second_shape, &second_tensor)); + mkl_shape.SerializeMklDnnShape( + second_tensor->flat().data(), + second_tensor->flat().size() * sizeof(uint8)); +} +#endif + // Allocate the output tensor, create a second output tensor that will contain // the MKL shape serialized inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, @@ -417,9 +766,43 @@ inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, second_tensor->flat().size() * sizeof(uint8)); } +#ifdef INTEL_MKL_DNN +// Allocate the output tensor, create a second output tensor that will contain +// the MKL shape serialized +inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, + Tensor** output, + const TensorShape& tf_shape, + const MklDnnShape& mkl_shape) { + Tensor* second_tensor = nullptr; + TensorShape second_shape; + second_shape.AddDim(mkl_shape.GetSerializeBufferSize()); + OP_REQUIRES_OK( + ctext, ctext->allocate_output(GetTensorDataIndex(n, ctext->num_outputs()), + tf_shape, output)); + OP_REQUIRES_OK(ctext, ctext->allocate_output( + GetTensorMetaDataIndex(n, ctext->num_outputs()), + second_shape, &second_tensor)); + mkl_shape.SerializeMklDnnShape( + second_tensor->flat().data(), + second_tensor->flat().size() * sizeof(uint8)); +} +#endif + // Allocates a temp tensor and returns the data buffer for temporary storage. // Currently -// we only support F32, will need to templatize if other types are added +#ifdef INTEL_MKL_DNN +template +inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, + const memory::primitive_desc& pd, void** buf_out) { + TensorShape tf_shape; + + tf_shape.AddDim(pd.get_size() / sizeof(T) + 1); + OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::v(), + tf_shape, tensor_out)); + *buf_out = static_cast(tensor_out->flat().data()); +} +#endif + inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, dnnLayout_t lt_buff, void** buf_out) { TensorShape tf_shape; @@ -669,6 +1052,8 @@ inline bool MklCompareShapes(const TensorShape* input_shape_0, return true; } +// These functions do not compile with MKL-DNN since mkl.h is missing. +// We may need to remove them later. // TODO(intel_tf): Remove this routine when faster MKL layout conversion is // out. inline void MklNHWCToNCHW(const Tensor& input, Tensor** output) { @@ -707,12 +1092,6 @@ inline void MklNCHWToNHWC(const Tensor& input, Tensor** output) { #ifdef INTEL_MKL_DNN -using mkldnn::engine; -using mkldnn::memory; -using mkldnn::padding_kind; -using mkldnn::primitive; -using mkldnn::reorder; - /// Return MKL-DNN data type (memory::data_type) for input type T /// /// @input None @@ -742,6 +1121,19 @@ inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) { return memory::format::format_undef; } +/// Map MKL-DNN data format to TensorFlow's data format +/// +/// @input: memory::format +/// @return: Tensorflow data format corresponding to memory::format +/// Fails with an error if invalid data format. +inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) { + if (format == memory::format::nhwc) + return FORMAT_NHWC; + else if (format == memory::format::nchw) + return FORMAT_NCHW; + TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format")); +} + /// Map TensorShape object into memory::dims required by MKL-DNN /// /// This function will simply map input TensorShape into MKL-DNN dims @@ -753,7 +1145,7 @@ inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) { /// @return memory::dims corresponding to TensorShape inline memory::dims TFShapeToMklDnnDims(const TensorShape& shape) { memory::dims dims(shape.dims()); - for (unsigned int d = 0; d < shape.dims(); ++d) { + for (int d = 0; d < shape.dims(); ++d) { dims[d] = shape.dim_size(d); } return dims; @@ -783,6 +1175,43 @@ inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape, return memory::dims({n, c, h, w}); } +/// Map MklDnn memory::dims object into TensorShape object. +/// +/// This function will simply map input shape in MKL-DNN memory::dims format +/// in Tensorflow's TensorShape object by perserving dimension order. +/// +/// @input MKL-DNN memory::dims object +/// @output TensorShape corresponding to memory::dims +inline TensorShape MklDnnDimsToTFShape(const memory::dims& dims) { + std::vector shape(dims.size(), -1); + for (int d = 0; d < dims.size(); d++) { + shape[d] = dims[d]; + } + + TensorShape ret; + CHECK_EQ(TensorShapeUtils::MakeShape(shape, &ret).ok(), true); + return ret; +} + +/// Function to calculate strides given tensor shape in Tensorflow order +/// E.g., if dims_tf_order is {1, 2, 3, 4}, then as per Tensorflow convention, +/// dimesion with size 1 is outermost dimension; while dimension with size 4 is +/// innermost dimension. So strides for this tensor would be {4 * 3 * 2, +/// 4 * 3, 4, 1}, i.e., {24, 12, 4, 1}. +/// +/// @input Tensorflow shape in memory::dims type +/// @return memory::dims containing strides for the tensor. +inline memory::dims CalculateTFStrides(const memory::dims& dims_tf_order) { + CHECK_GT(dims_tf_order.size(), 0); + memory::dims strides(dims_tf_order.size()); + int last_dim_idx = dims_tf_order.size() - 1; + strides[last_dim_idx] = 1; + for (int d = last_dim_idx - 1; d >= 0; d--) { + strides[d] = strides[d + 1] * dims_tf_order[d + 1]; + } + return strides; +} + inline padding_kind TFPaddingToMklDnnPadding(Padding pad) { // MKL-DNN only supports zero padding. return padding_kind::zero; @@ -821,7 +1250,7 @@ class MklDnnData { delete (op_md_); } - void* GetTensorBuffer(const Tensor* tensor) { + inline void* GetTensorBuffer(const Tensor* tensor) const { CHECK_NOTNULL(tensor); return const_cast( static_cast(tensor->flat().data())); @@ -835,35 +1264,83 @@ class MklDnnData { /// an operation. E.g., filter of Conv2D is of shape {1, 2, 3, 4}, and /// memory format HWIO, and the buffer that contains actual values is /// pointed by data_buffer. - void SetUsrMem(memory::dims dim, memory::format fm, void* data_buffer) { - CHECK_NOTNULL(data_buffer); - CHECK_NOTNULL(cpu_engine_); - // TODO(nhasabni): can we remove dynamic memory allocation? - user_memory_ = - new memory(memory::primitive_desc( - memory::desc(dim, MklDnnType(), fm), *cpu_engine_), - data_buffer); + inline void SetUsrMem(const memory::dims& dim, memory::format fm, + void* data_buffer = nullptr) { + auto md = memory::desc(dim, MklDnnType(), fm); + SetUsrMem(md, data_buffer); } - void SetUsrMem(memory::dims dim, memory::format fm, const Tensor* tensor) { + inline void SetUsrMem(const memory::dims& dim, memory::format fm, + const Tensor* tensor) { CHECK_NOTNULL(tensor); SetUsrMem(dim, fm, GetTensorBuffer(tensor)); } + /// Helper function to create memory descriptor in Blocked format + /// + /// @input: Tensor dimensions + /// @input: strides corresponding to dimensions. One can use utility + /// function such as CalculateTFStrides to compute strides + /// for given dimensions. + /// @return: memory::desc object corresponding to blocked memory format + /// for given dimensions and strides. + static inline memory::desc CreateBlockedMemDesc(const memory::dims& dim, + const memory::dims& strides) { + CHECK_EQ(dim.size(), strides.size()); + + // We have to construct memory descriptor in a C style. This is not at all + // ideal but MKLDNN does not offer any API to construct descriptor in + // blocked format except a copy constructor that accepts + // mkldnn_memory_desc_t. + mkldnn_memory_desc_t md; + md.primitive_kind = mkldnn_memory; + md.ndims = dim.size(); + md.format = mkldnn_blocked; + md.data_type = memory::convert_to_c(MklDnnType()); + + for (size_t i = 0; i < dim.size(); i++) { + md.layout_desc.blocking.block_dims[i] = 1; + md.layout_desc.blocking.strides[1][i] = 1; + md.layout_desc.blocking.strides[0][i] = strides[i]; + md.layout_desc.blocking.padding_dims[i] = dim[i]; + md.layout_desc.blocking.offset_padding_to_data[i] = 0; + md.dims[i] = dim[i]; + } + md.layout_desc.blocking.offset_padding = 0; + + return memory::desc(md); + } + + /// A version of SetUsrMem call that allows user to create memory in blocked + /// format. So in addition to accepting dimensions, it also accepts strides. + /// This allows user to create memory for tensor in a format that is not + /// supported by MKLDNN. E.g., MKLDNN does not support tensor format for 6 + /// dimensional tensor as a native format. But by using blocked format, a user + /// can create memory for 6D tensor. + inline void SetUsrMem(const memory::dims& dim, const memory::dims& strides, + void* data_buffer = nullptr) { + CHECK_EQ(dim.size(), strides.size()); + auto blocked_md = MklDnnData::CreateBlockedMemDesc(dim, strides); + SetUsrMem(blocked_md, data_buffer); + } + + inline void SetUsrMem(const memory::dims& dim, const memory::dims& strides, + const Tensor* tensor) { + CHECK_NOTNULL(tensor); + SetUsrMem(dim, strides, GetTensorBuffer(tensor)); + } + /// A version of function to set user memory primitive that accepts memory /// descriptor directly, instead of accepting dimensions and format. This /// function is more generic that the one above, but the function above is /// sufficient in most cases. - void SetUsrMem(memory::desc md, void* data_buffer) { - CHECK_NOTNULL(data_buffer); - CHECK_NOTNULL(cpu_engine_); - // TODO(nhasabni): can we remove dynamic memory allocation? - user_memory_ = - new memory(memory::primitive_desc(md, *cpu_engine_), data_buffer); + inline void SetUsrMem(const memory::desc& md, void* data_buffer = nullptr) { + auto pd = memory::primitive_desc(md, *cpu_engine_); + SetUsrMem(pd, data_buffer); } /// A version of SetUsrMem with memory descriptor and tensor - void SetUsrMem(memory::desc md, const Tensor* tensor) { + inline void SetUsrMem(const memory::desc& md, const Tensor* tensor) { CHECK_NOTNULL(tensor); SetUsrMem(md, GetTensorBuffer(tensor)); } @@ -872,41 +1349,60 @@ class MklDnnData { /// descriptor directly, instead of accepting dimensions and format. This /// function is more generic that the one above, but the function above is /// sufficient in most cases. - void SetUsrMem(memory::primitive_desc pd, void* data_buffer) { - CHECK_NOTNULL(data_buffer); + inline void SetUsrMem(const memory::primitive_desc& pd, + void* data_buffer = nullptr) { CHECK_NOTNULL(cpu_engine_); // TODO(nhasabni): can we remove dynamic memory allocation? - user_memory_ = new memory(pd, data_buffer); + if (data_buffer) { + user_memory_ = new memory(pd, data_buffer); + } else { + user_memory_ = new memory(pd); + } } /// A version of SetUsrMem with primitive descriptor and tensor - void SetUsrMem(memory::primitive_desc pd, const Tensor* tensor) { + inline void SetUsrMem(const memory::primitive_desc& pd, + const Tensor* tensor) { CHECK_NOTNULL(tensor); SetUsrMem(pd, GetTensorBuffer(tensor)); } /// Get function for user memory primitive. - const memory* GetUsrMem() const { return user_memory_; } + inline const memory* GetUsrMem() const { return user_memory_; } /// Get function for primitive descriptor of user memory primitive. - const memory::primitive_desc GetUsrMemPrimDesc() const { + inline const memory::primitive_desc GetUsrMemPrimDesc() const { CHECK_NOTNULL(user_memory_); return user_memory_->get_primitive_desc(); } /// Get function for descriptor of user memory. - memory::desc GetUsrMemDesc() { + inline memory::desc GetUsrMemDesc() { // This is ugly. Why MKL-DNN does not provide desc() method of const type?? const memory::primitive_desc pd = GetUsrMemPrimDesc(); return const_cast(&pd)->desc(); } /// Get function for data buffer of user memory primitive. - void* GetUsrMemDataHandle() const { + inline void* GetUsrMemDataHandle() const { CHECK_NOTNULL(user_memory_); return user_memory_->get_data_handle(); } + /// Set function for data buffer of user memory primitive. + inline void* SetUsrMemDataHandle(void* data_buffer) { + CHECK_NOTNULL(user_memory_); + CHECK_NOTNULL(data_buffer); + return user_memory_->set_data_handle(data_buffer); + } + + /// Set function for data buffer of user memory primitive. + inline void SetUsrMemDataHandle(const Tensor* tensor) { + CHECK_NOTNULL(user_memory_); + CHECK_NOTNULL(tensor); + user_memory_->set_data_handle(GetTensorBuffer(tensor)); + } + /// Get the memory primitive for input and output of an op. If inputs /// to an op require reorders, then this function returns memory primitive /// for reorder. Otherwise, it will return memory primitive for user memory. @@ -915,7 +1411,7 @@ class MklDnnData { /// execute Conv2D, we need memory primitive for I and F. Buf if reorder is /// required for I and F (say I_r is reorder primitive for I; F_r is reorder /// primitive for F), then we need I_r and F_r to perform Conv2D. - const memory& GetOpMem() const { + inline const memory& GetOpMem() const { return reorder_memory_ ? *reorder_memory_ : *user_memory_; } @@ -923,13 +1419,32 @@ class MklDnnData { /// format. E.g., For Conv2D, the dimensions would be same as user dimensions /// but memory::format would be mkldnn::any because we want MKL-DNN to choose /// best layout/format for given input dimensions. - void SetOpMemDesc(const memory::dims& dim, memory::format fm) { + inline void SetOpMemDesc(const memory::dims& dim, memory::format fm) { // TODO(nhasabni): can we remove dynamic memory allocation? op_md_ = new memory::desc(dim, MklDnnType(), fm); } /// Get function for memory descriptor for an operation - const memory::desc& GetOpMemDesc() const { return *op_md_; } + inline const memory::desc& GetOpMemDesc() const { return *op_md_; } + + /// Predicate that checks if we need to reorder user's memory into memory + /// pointed by op_pd. + /// + /// @input: op_pd - memory primitive descriptor of the given input of an + /// operation + /// @return: true in case reorder of input is needed; false, otherwise. + inline bool IsReorderNeeded(const memory::primitive_desc& op_pd) const { + CHECK_NOTNULL(user_memory_); + return op_pd != user_memory_->get_primitive_desc(); + } + + /// Function to create a reorder from memory pointed by from to memory pointed + /// by to. Returns created primitive. + inline primitive CreateReorder(const memory* from, const memory* to) const { + CHECK_NOTNULL(from); + CHECK_NOTNULL(to); + return reorder(*from, *to); + } /// Function to handle input reordering /// @@ -945,19 +1460,62 @@ class MklDnnData { /// operation /// @input: net - net to which to add reorder primitive in case it is needed. /// @return: true in case reorder of input is needed; false, otherwise. - bool CheckReorderToOpMem(const memory::primitive_desc& op_pd, - std::vector* net) { + inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd, + std::vector* net) { CHECK_NOTNULL(net); CHECK_NOTNULL(user_memory_); - if (op_pd != user_memory_->get_primitive_desc()) { + if (IsReorderNeeded(op_pd)) { // TODO(nhasabni): can we remove dynamic memory allocation? reorder_memory_ = new memory(op_pd); - net->push_back(reorder(*user_memory_, *reorder_memory_)); + net->push_back(CreateReorder(user_memory_, reorder_memory_)); + return true; + } + return false; + } + + /// Overloaded version of above function that accepts memory buffer + /// where output of reorder needs to be stored. + /// + /// @input: op_pd - memory primitive descriptor of the given input of an + /// operation + /// @reorder_data_handle - memory buffer where output of reorder needs to be + /// stored. Primitive does not check if buffer is + /// enough size to write. + /// @input: net - net to which to add reorder primitive in case it is needed. + /// @return: true in case reorder of input is needed; false, otherwise. + inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd, + void* reorder_data_handle, + std::vector* net) { + CHECK_NOTNULL(net); + CHECK_NOTNULL(reorder_data_handle); + CHECK_NOTNULL(user_memory_); + if (IsReorderNeeded(op_pd)) { + // TODO(nhasabni): can we remove dynamic memory allocation? + reorder_memory_ = new memory(op_pd, reorder_data_handle); + net->push_back(CreateReorder(user_memory_, reorder_memory_)); return true; } return false; } + /// Another overloaded version of CheckReorderToOpMem that accepts Tensor + /// where output of reorder needs to be stored. + /// + /// @input: op_pd - memory primitive descriptor of the given input of an + /// operation + /// @reorder_tensor - Tensor whose buffer is to be used to store output of + /// reorder. Primitive does not check if buffer is + /// enough size to write. + /// @input: net - net to which to add reorder primitive in case it is needed. + /// @return: true in case reorder of input is needed; false, otherwise. + inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd, + Tensor* reorder_tensor, + std::vector* net) { + CHECK_NOTNULL(net); + CHECK_NOTNULL(reorder_tensor); + return CheckReorderToOpMem(op_pd, GetTensorBuffer(reorder_tensor), net); + } + /// Function to handle output reorder /// /// This function performs very similar functionality as input reordering @@ -970,9 +1528,10 @@ class MklDnnData { /// /// @input memory primitive descriptor for the given output of an operation /// @return: true in case reorder of output is needed; false, otherwise. - bool PrepareReorderToUserMemIfReq(const memory::primitive_desc& op_pd) { + inline bool PrepareReorderToUserMemIfReq( + const memory::primitive_desc& op_pd) { CHECK_NOTNULL(user_memory_); - if (op_pd != user_memory_->get_primitive_desc()) { + if (IsReorderNeeded(op_pd)) { // TODO(nhasabni): can we remove dynamic memory allocation? reorder_memory_ = new memory(op_pd); return true; @@ -987,11 +1546,11 @@ class MklDnnData { /// to the user-specified output buffer. /// /// @input: net - net to which to add reorder primitive - void InsertReorderToUserMem(std::vector* net) { + inline void InsertReorderToUserMem(std::vector* net) { CHECK_NOTNULL(net); CHECK_NOTNULL(user_memory_); CHECK_NOTNULL(reorder_memory_); - net->push_back(reorder(*reorder_memory_, *user_memory_)); + net->push_back(CreateReorder(reorder_memory_, user_memory_)); } }; diff --git a/tensorflow/core/util/mkl_util_test.cc b/tensorflow/core/util/mkl_util_test.cc new file mode 100644 index 0000000000..8b73eadb40 --- /dev/null +++ b/tensorflow/core/util/mkl_util_test.cc @@ -0,0 +1,91 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifdef INTEL_MKL + +#include "tensorflow/core/util/mkl_util.h" + +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { + +#ifdef INTEL_MKL_DNN + +TEST(MklUtilTest, MklDnnTfShape) { + auto cpu_engine = engine(engine::cpu, 0); + MklDnnData a(&cpu_engine); + + const int N = 1, C = 2, H = 3, W = 4; + memory::dims a_dims = {N, C, H, W}; + MklDnnShape a_mkldnn_shape; + a_mkldnn_shape.SetMklTensor(true); + // Create TF layout in NCHW. + a_mkldnn_shape.SetTfLayout(a_dims.size(), a_dims, memory::format::nchw); + TensorShape a_tf_shape_nchw({N, C, H, W}); + TensorShape a_tf_shape_nhwc({N, H, W, C}); + TensorShape a_mkldnn_tf_shape = a_mkldnn_shape.GetTfShape(); + // Check that returned shape is in NCHW format. + EXPECT_EQ(a_tf_shape_nchw, a_mkldnn_tf_shape); + EXPECT_NE(a_tf_shape_nhwc, a_mkldnn_tf_shape); + + memory::dims b_dims = {N, C, H, W}; + MklDnnShape b_mkldnn_shape; + b_mkldnn_shape.SetMklTensor(true); + // Create TF layout in NHWC. + b_mkldnn_shape.SetTfLayout(b_dims.size(), b_dims, memory::format::nhwc); + TensorShape b_tf_shape_nhwc({N, H, W, C}); + TensorShape b_tf_shape_nchw({N, C, H, W}); + TensorShape b_mkldnn_tf_shape = b_mkldnn_shape.GetTfShape(); + // Check that returned shape is in NHWC format. + EXPECT_EQ(b_tf_shape_nhwc, b_mkldnn_tf_shape); + EXPECT_NE(b_tf_shape_nchw, b_mkldnn_tf_shape); +} + +TEST(MklUtilTest, MklDnnBlockedFormatTest) { + // Let's create 2D tensor of shape {3, 4} with 3 being innermost dimension + // first (case 1) and then it being outermost dimension (case 2). + auto cpu_engine = engine(engine::cpu, 0); + + // Setting for case 1 + MklDnnData a(&cpu_engine); + memory::dims dim1 = {3, 4}; + memory::dims strides1 = {1, 3}; + a.SetUsrMem(dim1, strides1); + + memory::desc a_md1 = a.GetUsrMemDesc(); + EXPECT_EQ(a_md1.data.ndims, 2); + EXPECT_EQ(a_md1.data.dims[0], 3); + EXPECT_EQ(a_md1.data.dims[1], 4); + EXPECT_EQ(a_md1.data.format, mkldnn_blocked); + + // Setting for case 2 + MklDnnData b(&cpu_engine); + memory::dims dim2 = {3, 4}; + memory::dims strides2 = {4, 1}; + b.SetUsrMem(dim2, strides2); + + memory::desc b_md2 = b.GetUsrMemDesc(); + EXPECT_EQ(b_md2.data.ndims, 2); + EXPECT_EQ(b_md2.data.dims[0], 3); + EXPECT_EQ(b_md2.data.dims[1], 4); + EXPECT_EQ(b_md2.data.format, mkldnn_blocked); +} + +#endif // INTEL_MKL_DNN +} // namespace +} // namespace tensorflow + +#endif // INTEL_MKL diff --git a/tensorflow/docs_src/api_guides/python/threading_and_queues.md b/tensorflow/docs_src/api_guides/python/threading_and_queues.md index ab95ce0af9..8ad4c4c075 100644 --- a/tensorflow/docs_src/api_guides/python/threading_and_queues.md +++ b/tensorflow/docs_src/api_guides/python/threading_and_queues.md @@ -3,7 +3,7 @@ Note: In versions of TensorFlow before 1.2, we recommended using multi-threaded, queue-based input pipelines for performance. Beginning with TensorFlow 1.4, however, we recommend using the `tf.data` module instead. (See -[Datasets](datasets) for details. In TensorFlow 1.2 and 1.3, the module was +@{$datasets$Datasets} for details. In TensorFlow 1.2 and 1.3, the module was called `tf.contrib.data`.) The `tf.data` module offers an easier-to-use interface for constructing efficient input pipelines. Furthermore, we've stopped developing the old multi-threaded, queue-based input pipelines. We've retained diff --git a/tensorflow/docs_src/get_started/get_started.md b/tensorflow/docs_src/get_started/get_started.md index 8409962744..be14ab4026 100644 --- a/tensorflow/docs_src/get_started/get_started.md +++ b/tensorflow/docs_src/get_started/get_started.md @@ -272,7 +272,7 @@ train = optimizer.minimize(loss) ``` ```python -sess.run(init) # reset values to incorrect defaults. +sess.run(init) # reset variables to incorrect defaults. for i in range(1000): sess.run(train, {x: [1, 2, 3, 4], y: [0, -1, -2, -3]}) @@ -317,7 +317,7 @@ y_train = [0, -1, -2, -3] # training loop init = tf.global_variables_initializer() sess = tf.Session() -sess.run(init) # reset values to wrong +sess.run(init) # initialize variables with incorrect defaults. for i in range(1000): sess.run(train, {x: x_train, y: y_train}) @@ -383,7 +383,7 @@ train_input_fn = tf.estimator.inputs.numpy_input_fn( eval_input_fn = tf.estimator.inputs.numpy_input_fn( {"x": x_eval}, y_eval, batch_size=4, num_epochs=1000, shuffle=False) -# We can invoke 1000 training steps by invoking the method and passing the +# We can invoke 1000 training steps by invoking the method and passing the # training data set. estimator.train(input_fn=input_fn, steps=1000) diff --git a/tensorflow/docs_src/get_started/input_fn.md b/tensorflow/docs_src/get_started/input_fn.md index 9d3af5d96a..0db5c6143a 100644 --- a/tensorflow/docs_src/get_started/input_fn.md +++ b/tensorflow/docs_src/get_started/input_fn.md @@ -191,7 +191,7 @@ import pandas as pd def get_input_fn_from_pandas(data_set, num_epochs=None, shuffle=True): return tf.estimator.inputs.pandas_input_fn( - x=pdDataFrame(...), + x=pd.DataFrame(...), y=pd.Series(...), num_epochs=num_epochs, shuffle=shuffle) @@ -267,8 +267,8 @@ tf.logging.set_verbosity(tf.logging.INFO) Define the column names for the data set in `COLUMNS`. To distinguish features from the label, also define `FEATURES` and `LABEL`. Then read the three CSVs -(@{tf.train}, -@{tf.test}, and +([train](http://download.tensorflow.org/data/boston_train.csv), +[test](http://download.tensorflow.org/data/boston_test.csv), and [predict](http://download.tensorflow.org/data/boston_predict.csv)) into _pandas_ `DataFrame`s: diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 3a153e8114..df622c6ac5 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.4.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.4.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index df43255896..8b3da49a0d 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.4.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.4.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index f7f2c3cdc7..6eb8158249 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.4.0-rc1 + 1.4.0 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.4.0-rc1 + 1.4.0 @@ -124,7 +124,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0-rc1.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -143,7 +143,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.4.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.4.0.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -151,10 +151,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0-rc1.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.4.0-rc1.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.4.0.zip). 3. Extract this .zip file. @@ -202,7 +202,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
    javac -cp libtensorflow-1.4.0-rc1.jar HelloTF.java
    +
    javac -cp libtensorflow-1.4.0.jar HelloTF.java
    ### Running @@ -216,11 +216,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
    java -cp libtensorflow-1.4.0-rc1.jar:. -Djava.library.path=./jni HelloTF
    +
    java -cp libtensorflow-1.4.0.jar:. -Djava.library.path=./jni HelloTF
    And the following command line executes the `HelloTF` program on Windows: -
    java -cp libtensorflow-1.4.0-rc1.jar;. -Djava.library.path=jni HelloTF
    +
    java -cp libtensorflow-1.4.0.jar;. -Djava.library.path=jni HelloTF
    If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 414ab7b1f7..f7380bac8a 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -188,7 +188,7 @@ Take the following steps to install TensorFlow with Virtualenv: Virtualenv environment:
    (tensorflow)$ pip3 install --upgrade \
    -     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
    + https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl If you encounter installation problems, see [Common Installation Problems](#common_installation_problems). @@ -293,7 +293,7 @@ take the following steps:
          $ sudo pip3 install --upgrade \
    -     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
    +     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl
          
    If this step fails, see @@ -480,7 +480,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
          (tensorflow)$ pip install --ignore-installed --upgrade \
    -     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
    + https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl @@ -648,14 +648,14 @@ This section documents the relevant values for Linux installations. CPU only:
    -https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp27-none-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp27-none-linux_x86_64.whl
     
    GPU support:
    -https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp27-none-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp27-none-linux_x86_64.whl
     
    Note that GPU support requires the NVIDIA hardware and software described in @@ -667,14 +667,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
    -https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl
     
    GPU support:
    -https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp34-cp34m-linux_x86_64.whl
     
    Note that GPU support requires the NVIDIA hardware and software described in @@ -686,14 +686,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
    -https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp35-cp35m-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp35-cp35m-linux_x86_64.whl
     
    GPU support:
    -https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp35-cp35m-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp35-cp35m-linux_x86_64.whl
     
    @@ -705,14 +705,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
    -https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp36-cp36m-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp36-cp36m-linux_x86_64.whl
     
    GPU support:
    -https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp36-cp36m-linux_x86_64.whl
    +https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp36-cp36m-linux_x86_64.whl
     
    diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 9a95710bfa..79b383817b 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -114,7 +114,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
     $ pip3 install --upgrade \
    -     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl
    + https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -235,7 +235,7 @@ take the following steps: issue the following command:
     $ sudo pip3 install --upgrade \
    -     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl 
    + https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -344,7 +344,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
     (tensorflow)$ pip install --ignore-installed --upgrade \
    -     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl
    + https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl @@ -517,7 +517,7 @@ This section documents the relevant values for Mac OS installations.
    -https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl
    +https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl
     
    @@ -525,7 +525,7 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-a
    -https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py3-none-any.whl
    +https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py3-none-any.whl
     
    diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 6d0dcdcd4a..aa4ae6c876 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -355,10 +355,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.4.0rc1 on Linux: +for TensorFlow 1.4.0 on Linux:
    -$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.4.0rc1-py2-none-any.whl
    +$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.4.0-py2-none-any.whl
     
    ## Validate your installation @@ -447,8 +447,10 @@ Stack Overflow and specify the `tensorflow` tag. **Linux**
    Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
    tensorflow-1.4.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
    tensorflow_gpu-1.4.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.368
    tensorflow-1.3.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
    tensorflow_gpu-1.3.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.368
    tensorflow-1.4.0rc1CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
    tensorflow_gpu-1.4.0rc1GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.368
    tensorflow-1.2.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
    tensorflow_gpu-1.2.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.35.18
    tensorflow-1.1.0CPU3.5MSVC 2015 update 3Cmake v3.6.3N/AN/A
    - - + + + + @@ -460,7 +462,8 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
    Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
    tensorflow-1.4.0rc1CPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.5N/AN/A
    tensorflow_gpu-1.4.0rc1GPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.568
    tensorflow-1.4.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.5.4N/AN/A
    tensorflow_gpu-1.4.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.5.468
    tensorflow-1.3.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.5N/AN/A
    tensorflow_gpu-1.3.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.568
    tensorflow-1.2.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.5N/AN/A
    tensorflow_gpu-1.2.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.55.18
    tensorflow-1.1.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.2N/AN/A
    - + + @@ -471,8 +474,10 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
    Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
    tensorflow-1.4.0rc1CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.5N/AN/A
    tensorflow-1.4.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.5.4N/AN/A
    tensorflow-1.3.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.5N/AN/A
    tensorflow-1.2.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.5N/AN/A
    tensorflow-1.1.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.2N/AN/A
    tensorflow_gpu-1.1.0GPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.25.18
    - - + + + + diff --git a/tensorflow/docs_src/mobile/prepare_models.md b/tensorflow/docs_src/mobile/prepare_models.md index c5a560e074..8fc65be35a 100644 --- a/tensorflow/docs_src/mobile/prepare_models.md +++ b/tensorflow/docs_src/mobile/prepare_models.md @@ -296,6 +296,6 @@ complains about missing header files, add the .h’s that are needed into the [`android_extended_ops`](https://www.tensorflow.org/code/tensorflow/core/kernels/BUILD#L3525) target. -If you’re using a makefile targetting iOS, Raspberry Pi, etc, go to +If you’re using a makefile targeting iOS, Raspberry Pi, etc, go to [`tensorflow/contrib/makefile/tf_op_files.txt`](https://www.tensorflow.org/code/tensorflow/contrib/makefile/tf_op_files.txt) and add the right implementation files there. diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md index 1f856bbf3f..25cb72008d 100644 --- a/tensorflow/docs_src/programmers_guide/debugger.md +++ b/tensorflow/docs_src/programmers_guide/debugger.md @@ -9,11 +9,19 @@ lets you view the internal structure and states of running TensorFlow graphs during training and inference, which is difficult to debug with general-purpose debuggers such as Python's `pdb` due to TensorFlow's computation-graph paradigm. -> NOTE: The system requirements of tfdbg on supported external platforms include -> the following. On Mac OS X, the `ncurses` library is required. It can be -> installed with `brew install homebrew/dupes/ncurses`. On Windows, `pyreadline` -> is required. If you use Anaconda3, you can install it with a command +> NOTE: TensorFlow debugger uses a +> [curses](https://en.wikipedia.org/wiki/Curses_\(programming_library\))-based +> text user interface. On Mac OS X, the `ncurses` library is required and can +> be installed with `brew install homebrew/dupes/ncurses`. On Windows, curses +> isn't as well supported, so a +> [readline](https://en.wikipedia.org/wiki/GNU_Readline)-based interface can +> be used with tfdbg by installing `pyreadline` with pip. +> If you use Anaconda3, you can install it with a command > such as `"C:\Program Files\Anaconda3\Scripts\pip.exe" install pyreadline`. +> Unofficial Windows curses packages can be downloaded +> [here](https://www.lfd.uci.edu/~gohlke/pythonlibs/#curses), then subsequently +> installed using `pip install .whl`, however curses on Windows +> may not work as reliably as curses on Linux or Mac. This tutorial demonstrates how to use the **tfdbg** command-line interface (CLI) to debug the appearance of [`nan`s](https://en.wikipedia.org/wiki/NaN) @@ -149,6 +157,7 @@ Try the following commands at the `tfdbg>` prompt (referencing the code at | | `pt [slicing]` | Print a subarray of tensor, using [numpy](http://www.numpy.org/)-style array slicing. | `pt hidden/Relu:0[0:50,:]` | | | `-a` | Print the entirety of a large tensor, without using ellipses. (May take a long time for large tensors.) | `pt -a hidden/Relu:0[0:50,:]` | | | `-r ` | Highlight elements falling into specified numerical range. Multiple ranges can be used in conjunction. | `pt hidden/Relu:0 -a -r [[-inf,-1],[1,inf]]` | +| | `-n ` | Print dump corresponding to specified 0-based dump number. Required for tensors with multiple dumps. | `pt -n 0 hidden/Relu:0` | | | `-s` | Include a summary of the numeric values of the tensor (applicable only to non-empty tensors with Boolean and numeric types such as `int*` and `float*`.) | `pt -s hidden/Relu:0[0:50,:]` | | **`@[coordinates]`** | | Navigate to specified element in `pt` output. | `@[10,0]` or `@10,0` | | **`/regex`** | | [less](https://linux.die.net/man/1/less)-style search for given regular expression. | `/inf` | @@ -166,10 +175,12 @@ Try the following commands at the `tfdbg>` prompt (referencing the code at | | `-r` | List the inputs to node, recursively (the input tree.) | `li -r hidden/Relu:0` | | | `-d ` | Limit recursion depth under the `-r` mode. | `li -r -d 3 hidden/Relu:0` | | | `-c` | Include control inputs. | `li -c -r hidden/Relu:0` | +| | `-t` | Show op types of input nodes. | `li -t -r hidden/Relu:0` | | **`lo`** | | **List output recipients of node** | | | | `-r` | List the output recipients of node, recursively (the output tree.) | `lo -r hidden/Relu:0` | | | `-d ` | Limit recursion depth under the `-r` mode. | `lo -r -d 3 hidden/Relu:0` | | | `-c` | Include recipients via control edges. | `lo -c -r hidden/Relu:0` | +| | `-t` | Show op types of recipient nodes. | `lo -t -r hidden/Relu:0` | | **`ls`** | | **List Python source files involved in node creation.** | | | | `-p ` | Limit output to source files matching given regular-expression path pattern. | `ls -p .*debug_mnist.*` | | | `-n` | Limit output to node names matching given regular-expression pattern. | `ls -n Softmax.*` | diff --git a/tensorflow/docs_src/programmers_guide/tensors.md b/tensorflow/docs_src/programmers_guide/tensors.md index d6f80430cd..88eb277e35 100644 --- a/tensorflow/docs_src/programmers_guide/tensors.md +++ b/tensorflow/docs_src/programmers_guide/tensors.md @@ -29,8 +29,8 @@ Some types of tensors are special, and these will be covered in other units of the Programmer's guide. The main ones are: * `tf.Variable` - * `tf.Constant` - * `tf.Placeholder` + * `tf.constant` + * `tf.placeholder` * `tf.SparseTensor` With the exception of `tf.Variable`, the value of a tensor is immutable, which @@ -64,7 +64,7 @@ The following snippet demonstrates creating a few rank 0 variables: mammal = tf.Variable("Elephant", tf.string) ignition = tf.Variable(451, tf.int16) floating = tf.Variable(3.14159265359, tf.float64) -its_complicated = tf.Variable((12.3, -4.85), tf.complex64) +its_complicated = tf.Variable(12.3 - 4.85j, tf.complex64) ``` Note: A string is treated as a single item in TensorFlow, not as a sequence of @@ -79,7 +79,7 @@ initial value. For example: mystr = tf.Variable(["Hello"], tf.string) cool_numbers = tf.Variable([3.14159, 2.71828], tf.float32) first_primes = tf.Variable([2, 3, 5, 7, 11], tf.int32) -its_very_complicated = tf.Variable([(12.3, -4.85), (7.5, -6.23)], tf.complex64) +its_very_complicated = tf.Variable([12.3 - 4.85j, 7.5 - 6.23j], tf.complex64) ``` @@ -275,8 +275,8 @@ Graphs and Sessions for more information). Sometimes it is not possible to evaluate a `tf.Tensor` with no context because its value might depend on dynamic information that is not available. For -example, tensors that depend on `Placeholder`s can't be evaluated without -providing a value for the `Placeholder`. +example, tensors that depend on `placeholder`s can't be evaluated without +providing a value for the `placeholder`. ``` python p = tf.placeholder(tf.float32) diff --git a/tensorflow/examples/speech_commands/models.py b/tensorflow/examples/speech_commands/models.py index 82d6a94ea1..ab611f414a 100644 --- a/tensorflow/examples/speech_commands/models.py +++ b/tensorflow/examples/speech_commands/models.py @@ -326,7 +326,7 @@ def create_low_latency_conv_model(fingerprint_input, model_settings, first_filter_height = input_time_size first_filter_count = 186 first_filter_stride_x = 1 - first_filter_stride_y = 4 + first_filter_stride_y = 1 first_weights = tf.Variable( tf.truncated_normal( [first_filter_height, first_filter_width, 1, first_filter_count], diff --git a/tensorflow/go/android.go b/tensorflow/go/android.go new file mode 100644 index 0000000000..3db3ddfec5 --- /dev/null +++ b/tensorflow/go/android.go @@ -0,0 +1,20 @@ +// Copyright 2016 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build android + +package tensorflow + +// #cgo LDFLAGS: -landroid -llog -lm -lz -ldl +import "C" diff --git a/tensorflow/go/operation_test.go b/tensorflow/go/operation_test.go index 7cba043af2..40c951ab8c 100644 --- a/tensorflow/go/operation_test.go +++ b/tensorflow/go/operation_test.go @@ -123,6 +123,14 @@ func TestOutputDataTypeAndShape(t *testing.T) { []int64{2, 3}, Double, }, + { // Matrix of Uint64 + [][]uint64{ + {1, 2, 3}, + {4, 5, 6}, + }, + []int64{2, 3}, + Uint64, + }, } for idx, test := range testdata { t.Run(fmt.Sprintf("#%d Value %T", idx, test.Value), func(t *testing.T) { diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go index 36a74c0081..1326a95278 100644 --- a/tensorflow/go/tensor.go +++ b/tensorflow/go/tensor.go @@ -101,7 +101,7 @@ func NewTensor(value interface{}) (*Tensor, error) { return nil, bug("NewTensor incorrectly calculated the size of a tensor with type %v and shape %v as %v bytes instead of %v", dataType, shape, nbytes, buf.Len()) } } else { - e := stringEncoder{offsets: buf, data: raw[nflattened*8 : len(raw)], status: newStatus()} + e := stringEncoder{offsets: buf, data: raw[nflattened*8:], status: newStatus()} if err := e.encode(reflect.ValueOf(value), shape); err != nil { return nil, err } @@ -207,6 +207,9 @@ func (t *Tensor) WriteContentsTo(w io.Writer) (int64, error) { func tensorData(c *C.TF_Tensor) []byte { // See: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices cbytes := C.TF_TensorData(c) + if cbytes == nil { + return nil + } length := int(C.TF_TensorByteSize(c)) slice := (*[1 << 30]byte)(unsafe.Pointer(cbytes))[:length:length] return slice @@ -310,7 +313,7 @@ func encodeTensor(w *bytes.Buffer, v reflect.Value, shape []int64) error { if err := w.WriteByte(b); err != nil { return err } - case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: + case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: if err := binary.Write(w, nativeEndian, v.Interface()); err != nil { return err } @@ -349,7 +352,7 @@ func decodeTensor(r *bytes.Reader, shape []int64, typ reflect.Type, ptr reflect. return err } ptr.Elem().SetBool(b == 1) - case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: + case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: if err := binary.Read(r, nativeEndian, ptr.Interface()); err != nil { return err } diff --git a/tensorflow/go/tensor_test.go b/tensorflow/go/tensor_test.go index 35bd2fd9a5..674a8ce86f 100644 --- a/tensorflow/go/tensor_test.go +++ b/tensorflow/go/tensor_test.go @@ -34,11 +34,15 @@ func TestNewTensor(t *testing.T) { {nil, int64(5)}, {nil, uint8(5)}, {nil, uint16(5)}, + {nil, uint32(5)}, + {nil, uint64(5)}, {nil, float32(5)}, {nil, float64(5)}, {nil, complex(float32(5), float32(6))}, {nil, complex(float64(5), float64(6))}, {nil, "a string"}, + {[]int64{1}, []uint32{1}}, + {[]int64{1}, []uint64{1}}, {[]int64{2}, []bool{true, false}}, {[]int64{1}, []float64{1}}, {[]int64{1}, [1]float64{1}}, @@ -71,11 +75,6 @@ func TestNewTensor(t *testing.T) { // native ints not supported int(5), []int{5}, - // uint32 and uint64 are not supported in TensorFlow - uint32(5), - []uint32{5}, - uint64(5), - []uint64{5}, // Mismatched dimensions [][]float32{{1, 2, 3}, {4}}, // Mismatched dimensions. Should return "mismatched slice lengths" error instead of "BUG" diff --git a/tensorflow/java/src/main/java/org/tensorflow/Shape.java b/tensorflow/java/src/main/java/org/tensorflow/Shape.java index 9aa92be111..d533c3d480 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/Shape.java +++ b/tensorflow/java/src/main/java/org/tensorflow/Shape.java @@ -77,6 +77,24 @@ public final class Shape { return shape[i]; } + @Override + public int hashCode() { + return Arrays.hashCode(shape); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + + if (obj instanceof Shape && Arrays.equals(this.shape, ((Shape) obj).shape)) { + return !hasUnknownDimension(); + } + + return super.equals(obj); + } + /** Succinct description of the shape meant for debugging. */ @Override public String toString() { @@ -98,4 +116,18 @@ public final class Shape { } private long[] shape; + + private boolean hasUnknownDimension() { + if (shape == null) { + return true; + } + + for (long dimension : shape) { + if (dimension == -1) { + return true; + } + } + + return false; + } } diff --git a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java index 3b027700c5..313c09e1e4 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java @@ -16,6 +16,7 @@ limitations under the License. package org.tensorflow; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; import org.junit.Test; import org.junit.runner.RunWith; @@ -77,4 +78,27 @@ public class ShapeTest { assertEquals(5, n.shape().size(1)); } } + + @Test + public void equalsWorksCorrectly() { + assertEquals(Shape.scalar(), Shape.scalar()); + assertEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 3)); + + assertNotEquals(Shape.make(1, 2), null); + assertNotEquals(Shape.make(1, 2), new Object()); + assertNotEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 4)); + + assertNotEquals(Shape.unknown(), Shape.unknown()); + assertNotEquals(Shape.make(-1), Shape.make(-1)); + assertNotEquals(Shape.make(1, -1, 3), Shape.make(1, -1, 3)); + } + + @Test + public void hashCodeIsAsExpected() { + assertEquals(Shape.make(1, 2, 3, 4).hashCode(), Shape.make(1, 2, 3, 4).hashCode()); + assertEquals(Shape.scalar().hashCode(), Shape.scalar().hashCode()); + assertEquals(Shape.unknown().hashCode(), Shape.unknown().hashCode()); + + assertNotEquals(Shape.make(1, 2).hashCode(), Shape.make(1, 3).hashCode()); + } } diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 5ae4aace16..54c43c1337 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -5,7 +5,10 @@ package( default_visibility = [ "//engedu/ml/tf_from_scratch:__pkg__", "//tensorflow:internal", + "//tensorflow/contrib/lite/toco/python:__pkg__", "//tensorflow_models:__subpackages__", + # TODO(aselle): to pass open source test. + "//bazel_pip/tensorflow/contrib/lite/toco/python:__pkg__", ], ) @@ -45,6 +48,7 @@ py_library( "//tensorflow/compiler/aot/tests:__pkg__", # TODO(b/34059704): remove when fixed "//tensorflow/contrib/learn:__pkg__", # TODO(b/34059704): remove when fixed "//tensorflow/contrib/learn/python/learn/datasets:__pkg__", # TODO(b/34059704): remove when fixed + "//tensorflow/contrib/lite/toco/python:__pkg__", # TODO(b/34059704): remove when fixed "//tensorflow/python/debug:__pkg__", # TODO(b/34059704): remove when fixed "//tensorflow/python/tools:__pkg__", # TODO(b/34059704): remove when fixed "//tensorflow/tools/api/generator:__pkg__", diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py index 62fea05867..fa5d02c476 100644 --- a/tensorflow/python/estimator/canned/head.py +++ b/tensorflow/python/estimator/canned/head.py @@ -117,7 +117,7 @@ class _Head(object): update_op = tf.contrib.layers.optimize_loss(optimizer=sync, loss=estimator_spec.loss, ...) hooks = [sync.make_session_run_hook(is_chief)] - ... upate train_op and hooks in EstimatorSpec and return + ... update train_op and hooks in EstimatorSpec and return ``` """ __metaclass__ = abc.ABCMeta diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py index c9f37f06e8..750af20e8a 100644 --- a/tensorflow/python/estimator/inputs/numpy_io.py +++ b/tensorflow/python/estimator/inputs/numpy_io.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import collections +from six import string_types from tensorflow.python.estimator.inputs.queues import feeding_functions # Key name to pack the target into dict of `features`. See @@ -51,8 +52,9 @@ def numpy_input_fn(x, num_threads=1): """Returns input function that would feed dict of numpy arrays into the model. - This returns a function outputting `features` and `target` based on the dict - of numpy arrays. The dict `features` has the same keys as the `x`. + This returns a function outputting `features` and `targets` based on the dict + of numpy arrays. The dict `features` has the same keys as the `x`. The dict + `targets` has the same keys as the `y` if `y` is a dict. Example: @@ -69,7 +71,7 @@ def numpy_input_fn(x, Args: x: dict of numpy array object. - y: numpy array object. `None` if absent. + y: numpy array object or dict of numpy array object. `None` if absent. batch_size: Integer, size of batches to return. num_epochs: Integer, number of epochs to iterate over data. If `None` will run forever. @@ -81,11 +83,13 @@ def numpy_input_fn(x, such as in prediction and evaluation mode, `num_threads` should be 1. Returns: - Function, that has signature of ()->(dict of `features`, `target`) + Function, that has signature of ()->(dict of `features`, `targets`) Raises: ValueError: if the shape of `y` mismatches the shape of values in `x` (i.e., values in `x` have same shape). + ValueError: if duplicate keys are in both `x` and `y` when `y` is a dict. + ValueError: if x or y is an empty dict. TypeError: `x` is not a dict or `shuffle` is not bool. """ @@ -97,43 +101,75 @@ def numpy_input_fn(x, """Numpy input function.""" if not isinstance(x, dict): raise TypeError('x must be dict; got {}'.format(type(x).__name__)) + if not x: + raise ValueError('x cannot be empty') # Make a shadow copy and also ensure the order of iteration is consistent. - ordered_dict_x = collections.OrderedDict( + ordered_dict_data = collections.OrderedDict( sorted(x.items(), key=lambda t: t[0])) + # Deep copy keys which is a view in python 3 + feature_keys = list(ordered_dict_data.keys()) + + if y is None: + target_keys = None + elif isinstance(y, dict): + if not y: + raise ValueError('y cannot be empty dict, use None instead.') + + ordered_dict_y = collections.OrderedDict( + sorted(y.items(), key=lambda t: t[0])) + target_keys = list(ordered_dict_y.keys()) + + duplicate_keys = set(feature_keys).intersection(set(target_keys)) + if duplicate_keys: + raise ValueError('{} duplicate keys are found in both x and y: ' + '{}'.format(len(duplicate_keys), duplicate_keys)) + + ordered_dict_data.update(ordered_dict_y) + else: + target_keys = _get_unique_target_key(ordered_dict_data) + ordered_dict_data[target_keys] = y + + if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1: + shape_dict_of_x = {k: ordered_dict_data[k].shape for k in feature_keys} + + if target_keys is None: + shape_of_y = None + elif isinstance(target_keys, string_types): + shape_of_y = y.shape + else: + shape_of_y = {k: ordered_dict_data[k].shape for k in target_keys} - unique_target_key = _get_unique_target_key(ordered_dict_x) - if y is not None: - ordered_dict_x[unique_target_key] = y - - if len(set(v.shape[0] for v in ordered_dict_x.values())) != 1: - shape_dict_of_x = {k: ordered_dict_x[k].shape - for k in ordered_dict_x.keys()} - shape_of_y = None if y is None else y.shape raise ValueError('Length of tensors in x and y is mismatched. All ' 'elements in x and y must have the same length.\n' 'Shapes in x: {}\n' - 'Shape for y: {}\n'.format(shape_dict_of_x, shape_of_y)) + 'Shapes in y: {}\n'.format(shape_dict_of_x, shape_of_y)) queue = feeding_functions._enqueue_data( # pylint: disable=protected-access - ordered_dict_x, + ordered_dict_data, queue_capacity, shuffle=shuffle, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) - features = (queue.dequeue_many(batch_size) if num_epochs is None - else queue.dequeue_up_to(batch_size)) + batch = ( + queue.dequeue_many(batch_size) + if num_epochs is None else queue.dequeue_up_to(batch_size)) - # Remove the first `Tensor` in `features`, which is the row number. - if len(features) > 0: - features.pop(0) + # Remove the first `Tensor` in `batch`, which is the row number. + if batch: + batch.pop(0) - features = dict(zip(ordered_dict_x.keys(), features)) - if y is not None: - target = features.pop(unique_target_key) + features = dict(zip(feature_keys, batch[:len(feature_keys)])) + if target_keys is None: + # TODO(martinwicke), return consistent result + return features + elif isinstance(target_keys, string_types): + target = batch[-1] + return features, target + else: + target = dict(zip(target_keys, batch[-len(target_keys):])) return features, target - return features return input_fn diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py index 02df22b632..1374e3f7e1 100644 --- a/tensorflow/python/estimator/inputs/numpy_io_test.py +++ b/tensorflow/python/estimator/inputs/numpy_io_test.py @@ -239,6 +239,40 @@ class NumpyIoTest(test.TestCase): x, y, batch_size=2, shuffle=False, num_epochs=1) failing_input_fn() + def testNumpyInputFnWithXIsEmptyDict(self): + x = {} + y = np.arange(4) + with self.test_session(): + with self.assertRaisesRegexp(ValueError, 'x cannot be empty'): + failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False) + failing_input_fn() + + def testNumpyInputFnWithYIsNone(self): + a = np.arange(4) * 1.0 + b = np.arange(32, 36) + x = {'a': a, 'b': b} + y = None + + with self.test_session() as session: + input_fn = numpy_io.numpy_input_fn( + x, y, batch_size=2, shuffle=False, num_epochs=1) + features_tensor = input_fn() + + coord = coordinator.Coordinator() + threads = queue_runner_impl.start_queue_runners(session, coord=coord) + + feature = session.run(features_tensor) + self.assertEqual(len(feature), 2) + self.assertAllEqual(feature['a'], [0, 1]) + self.assertAllEqual(feature['b'], [32, 33]) + + session.run([features_tensor]) + with self.assertRaises(errors.OutOfRangeError): + session.run([features_tensor]) + + coord.request_stop() + coord.join(threads) + def testNumpyInputFnWithNonBoolShuffle(self): x = np.arange(32, 36) y = np.arange(4) @@ -285,6 +319,56 @@ class NumpyIoTest(test.TestCase): num_epochs=1) failing_input_fn() + def testNumpyInputFnWithYAsDict(self): + a = np.arange(4) * 1.0 + b = np.arange(32, 36) + x = {'a': a, 'b': b} + y = {'y1': np.arange(-32, -28), 'y2': np.arange(32, 28, -1)} + + with self.test_session() as session: + input_fn = numpy_io.numpy_input_fn( + x, y, batch_size=2, shuffle=False, num_epochs=1) + features_tensor, targets_tensor = input_fn() + + coord = coordinator.Coordinator() + threads = queue_runner_impl.start_queue_runners(session, coord=coord) + + features, targets = session.run([features_tensor, targets_tensor]) + self.assertEqual(len(features), 2) + self.assertAllEqual(features['a'], [0, 1]) + self.assertAllEqual(features['b'], [32, 33]) + self.assertEqual(len(targets), 2) + self.assertAllEqual(targets['y1'], [-32, -31]) + self.assertAllEqual(targets['y2'], [32, 31]) + + session.run([features_tensor, targets_tensor]) + with self.assertRaises(errors.OutOfRangeError): + session.run([features_tensor, targets_tensor]) + + coord.request_stop() + coord.join(threads) + + def testNumpyInputFnWithYIsEmptyDict(self): + a = np.arange(4) * 1.0 + b = np.arange(32, 36) + x = {'a': a, 'b': b} + y = {} + with self.test_session(): + with self.assertRaisesRegexp(ValueError, 'y cannot be empty'): + failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False) + failing_input_fn() + + def testNumpyInputFnWithDuplicateKeysInXAndY(self): + a = np.arange(4) * 1.0 + b = np.arange(32, 36) + x = {'a': a, 'b': b} + y = {'y1': np.arange(-32, -28), 'a': a, 'y2': np.arange(32, 28, -1), 'b': b} + with self.test_session(): + with self.assertRaisesRegexp( + ValueError, '2 duplicate keys are found in both x and y'): + failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False) + failing_input_fn() + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 2785aed13e..dc4ffb1747 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -860,6 +860,10 @@ def convert_to_tensor(value, dtype=None, name=None, preferred_dtype=None): inputs, which allows those ops to accept numpy arrays, Python lists, and scalars in addition to `Tensor` objects. + Note: This function diverges from default Numpy behavior for `float` and + `string` types when `None` is present in a Python list or scalar. Rather + than silently converting `None` values, an error will be thrown. + Args: value: An object whose type has a registered `Tensor` conversion function. dtype: Optional element type for the returned tensor. If missing, the diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index 7e74c19124..e283542172 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -286,6 +286,7 @@ _TF_TO_IS_OK = { dtypes.bool: [_FilterBool], dtypes.complex128: [_FilterComplex], dtypes.complex64: [_FilterComplex], + dtypes.float16: [_FilterFloat], dtypes.float32: [_FilterFloat], dtypes.float64: [_FilterFloat], dtypes.int16: [_FilterInt], diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index cfa5fe5e3e..4c026590c2 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -986,10 +986,10 @@ class TensorFlowTestCase(googletest.TestCase): err: A float value. msg: An optional string message to append to the failure message. """ - self.assertTrue( - math.fabs(f1 - f2) <= err, - "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg - if msg is not None else "")) + # f1 == f2 is needed here as we might have: f1, f2 = inf, inf + self.assertTrue(f1 == f2 or math.fabs(f1 - f2) <= err, + "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg + if msg is not None else "")) def assertArrayNear(self, farray1, farray2, err): """Asserts that two float arrays are near each other. diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 6eb9c66d06..1bf2b70c1b 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -107,22 +107,41 @@ class BooleanMaskTest(test_util.TensorFlowTestCase): def setUp(self): self.rng = np.random.RandomState(42) - def CheckVersusNumpy(self, ndims_mask, arr_shape, make_mask=None): + def CheckVersusNumpy(self, ndims_mask, arr_shape, make_mask=None, axis=None): """Check equivalence between boolean_mask and numpy masking.""" if make_mask is None: make_mask = lambda shape: self.rng.randint(0, 2, size=shape).astype(bool) arr = np.random.rand(*arr_shape) mask = make_mask(arr_shape[:ndims_mask]) - masked_arr = arr[mask] + if axis is not None: + mask = make_mask(arr_shape[axis:ndims_mask + axis]) + if axis is None or axis == 0: + masked_arr = arr[mask] + elif axis == 1: + masked_arr = arr[:, mask] + elif axis == 2: + masked_arr = arr[:, :, mask] with self.test_session(): - masked_tensor = array_ops.boolean_mask(arr, mask) + masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis) # Leading dimension size of masked_tensor is always unknown until runtime # since we don't how many elements will be kept. - self.assertAllEqual(masked_tensor.get_shape()[1:], masked_arr.shape[1:]) + leading = 1 if axis is None else axis + 1 + self.assertAllEqual(masked_tensor.get_shape()[leading:], + masked_arr.shape[leading:]) self.assertAllClose(masked_arr, masked_tensor.eval()) + def testMaskDim1ArrDim2Axis1(self): + ndims_mask = 1 + for arr_shape in [(1, 1), (2, 2), (2, 5)]: + self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1) + + def testMaskDim2ArrDim2Axis1(self): + ndims_mask = 2 + for arr_shape in [(1, 1), (2, 2), (2, 5)]: + self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1) + def testMaskDim1ArrDim1(self): ndims_mask = 1 for arr_shape in [(1,), (2,), (3,), (10,)]: @@ -486,7 +505,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase): _ = checker2[...] _ = checker2[tuple()] - def testFloatSlicedArrayAndInt64IndicesGPU(self): + def testInt64GPU(self): if not test_util.is_gpu_available(): self.skipTest("No GPU available") with self.test_session(use_gpu=True, force_gpu=True): @@ -497,17 +516,6 @@ class StridedSliceTest(test_util.TensorFlowTestCase): s = array_ops.strided_slice(x, begin, end, strides) self.assertAllEqual([3.], self.evaluate(s)) - def testInt64SlicedArrayAndIndicesGPU(self): - if not test_util.is_gpu_available(): - self.skipTest("No GPU available") - with self.test_session(use_gpu=True, force_gpu=True): - x = constant_op.constant([1, 2, 3], dtype=dtypes.int64) - begin = constant_op.constant([2], dtype=dtypes.int64) - end = constant_op.constant([3], dtype=dtypes.int64) - strides = constant_op.constant([1], dtype=dtypes.int64) - s = array_ops.strided_slice(x, begin, end, strides) - self.assertAllEqual([3], self.evaluate(s)) - def testDegenerateSlices(self): with self.test_session(use_gpu=True): checker = StridedSliceChecker(self, StridedSliceChecker.REF_TENSOR) @@ -1071,5 +1079,16 @@ class PadTest(test_util.TensorFlowTestCase): [0, 0, 0, 0, 0, 0, 0]]) +class InvertPermutationTest(test_util.TensorFlowTestCase): + + def testInvertPermutation(self): + for dtype in [dtypes.int32, dtypes.int64]: + with self.test_session(use_gpu=True): + x = constant_op.constant([3, 4, 0, 2, 1], dtype=dtype) + y = array_ops.invert_permutation(x) + self.assertAllEqual(y.get_shape(), [5]) + self.assertAllEqual(y.eval(), [2, 4, 3, 0, 1]) + + if __name__ == "__main__": test_lib.main() diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py index 7a610debd1..2767df127e 100644 --- a/tensorflow/python/kernel_tests/bincount_op_test.py +++ b/tensorflow/python/kernel_tests/bincount_op_test.py @@ -29,7 +29,7 @@ from tensorflow.python.platform import googletest class BincountTest(test_util.TensorFlowTestCase): def test_empty(self): - with self.test_session(): + with self.test_session(use_gpu=True): self.assertAllEqual( math_ops.bincount([], minlength=5).eval(), [0, 0, 0, 0, 0]) self.assertAllEqual(math_ops.bincount([], minlength=1).eval(), [0]) @@ -42,7 +42,7 @@ class BincountTest(test_util.TensorFlowTestCase): np.float64) def test_values(self): - with self.test_session(): + with self.test_session(use_gpu=True): self.assertAllEqual( math_ops.bincount([1, 1, 1, 2, 2, 3]).eval(), [0, 3, 2, 1]) arr = [1, 1, 2, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5] @@ -57,14 +57,14 @@ class BincountTest(test_util.TensorFlowTestCase): math_ops.bincount(np.arange(10000)).eval(), np.ones(10000)) def test_maxlength(self): - with self.test_session(): + with self.test_session(use_gpu=True): self.assertAllEqual(math_ops.bincount([5], maxlength=3).eval(), [0, 0, 0]) self.assertAllEqual(math_ops.bincount([1], maxlength=3).eval(), [0, 1]) self.assertAllEqual(math_ops.bincount([], maxlength=3).eval(), []) def test_random_with_weights(self): num_samples = 10000 - with self.test_session(): + with self.test_session(use_gpu=True): np.random.seed(42) for dtype in [dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]: arr = np.random.randint(0, 1000, num_samples) @@ -72,17 +72,27 @@ class BincountTest(test_util.TensorFlowTestCase): weights = np.random.randint(-100, 100, num_samples) else: weights = np.random.random(num_samples) - self.assertAllEqual( - math_ops.bincount(arr, weights).eval(), - np.bincount(arr, weights)) + self.assertAllClose( + math_ops.bincount(arr, weights).eval(), np.bincount(arr, weights)) + + def test_random_without_weights(self): + num_samples = 10000 + with self.test_session(use_gpu=True): + np.random.seed(42) + for dtype in [np.int32, np.float32]: + arr = np.random.randint(0, 1000, num_samples) + weights = np.ones(num_samples).astype(dtype) + self.assertAllClose( + math_ops.bincount(arr, None).eval(), np.bincount(arr, weights)) def test_zero_weights(self): - with self.test_session(): + with self.test_session(use_gpu=True): self.assertAllEqual( math_ops.bincount(np.arange(1000), np.zeros(1000)).eval(), np.zeros(1000)) def test_negative(self): + # unsorted_segment_sum will only report InvalidArgumentError on CPU with self.test_session(): with self.assertRaises(errors.InvalidArgumentError): math_ops.bincount([1, 2, 3, -1, 6, 8]).eval() diff --git a/tensorflow/python/kernel_tests/bucketize_op_test.py b/tensorflow/python/kernel_tests/bucketize_op_test.py index 6db3592055..e612b1c134 100644 --- a/tensorflow/python/kernel_tests/bucketize_op_test.py +++ b/tensorflow/python/kernel_tests/bucketize_op_test.py @@ -31,7 +31,7 @@ class BucketizationOpTest(test.TestCase): constant_op.constant([-5, 0, 2, 3, 5, 8, 10, 11, 12]), boundaries=[0, 3, 8, 11]) expected_out = [0, 1, 1, 2, 2, 3, 3, 4, 4] - with self.test_session() as sess: + with self.test_session(use_gpu=True) as sess: self.assertAllEqual(expected_out, sess.run(op)) def testFloat(self): @@ -39,7 +39,7 @@ class BucketizationOpTest(test.TestCase): constant_op.constant([-5., 0., 2., 3., 5., 8., 10., 11., 12.]), boundaries=[0., 3., 8., 11.]) expected_out = [0, 1, 1, 2, 2, 3, 3, 4, 4] - with self.test_session() as sess: + with self.test_session(use_gpu=True) as sess: self.assertAllEqual(expected_out, sess.run(op)) def test2DInput(self): @@ -47,13 +47,13 @@ class BucketizationOpTest(test.TestCase): constant_op.constant([[-5, 0, 2, 3, 5], [8, 10, 11, 12, 0]]), boundaries=[0, 3, 8, 11]) expected_out = [[0, 1, 1, 2, 2], [3, 3, 4, 4, 1]] - with self.test_session() as sess: + with self.test_session(use_gpu=True) as sess: self.assertAllEqual(expected_out, sess.run(op)) def testInvalidBoundariesOrder(self): op = math_ops._bucketize( constant_op.constant([-5, 0]), boundaries=[0, 8, 3, 11]) - with self.test_session() as sess: + with self.test_session(use_gpu=True) as sess: with self.assertRaisesRegexp( errors_impl.InvalidArgumentError, "Expected sorted boundaries"): sess.run(op) diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py index 6167cb9999..68817cc256 100644 --- a/tensorflow/python/kernel_tests/constant_op_test.py +++ b/tensorflow/python/kernel_tests/constant_op_test.py @@ -439,10 +439,10 @@ class ZerosLikeTest(test.TestCase): def testZerosLikeCPU(self): for dtype in [ - dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int32, - dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.int8, - dtypes_lib.complex64, dtypes_lib.complex128, dtypes_lib.int64, - dtypes_lib.string + dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int8, + dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, dtypes_lib.int32, + dtypes_lib.int64, dtypes_lib.bool, dtypes_lib.complex64, + dtypes_lib.complex128, dtypes_lib.string ]: self._compareZeros(dtype, fully_defined_shape=False, use_gpu=False) self._compareZeros(dtype, fully_defined_shape=True, use_gpu=False) @@ -573,9 +573,10 @@ class OnesLikeTest(test.TestCase): def testOnesLike(self): for dtype in [ - dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int32, - dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.int8, - dtypes_lib.complex64, dtypes_lib.complex128, dtypes_lib.int64 + dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int8, + dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, dtypes_lib.int32, + dtypes_lib.int64, dtypes_lib.bool, dtypes_lib.complex64, + dtypes_lib.complex128 ]: numpy_dtype = dtype.as_numpy_dtype with self.test_session(): diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py index b67a4e3f89..d92797a7d3 100644 --- a/tensorflow/python/kernel_tests/conv1d_test.py +++ b/tensorflow/python/kernel_tests/conv1d_test.py @@ -17,6 +17,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin + from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops @@ -49,6 +52,46 @@ class Conv1DTest(test.TestCase): self.assertEqual(len(output), 2) self.assertAllClose(output, [2 * 1 + 1 * 2, 2 * 3 + 1 * 4]) + def testConv1DTranspose(self): + with self.test_session(): + stride = 2 + + # Input, output: [batch, width, depth] + x_shape = [2, 4, 3] + y_shape = [2, 9, 2] + + # Filter: [kernel_width, output_depth, input_depth] + f_shape = [3, 2, 3] + + x = constant_op.constant( + 1.0, shape=x_shape, name="x", dtype=dtypes.float32) + f = constant_op.constant( + 1.0, shape=f_shape, name="filter", dtype=dtypes.float32) + output = nn_ops.conv1d_transpose( + x, f, y_shape, stride=stride, padding="VALID") + value = output.eval() + + cache_values = np.zeros(y_shape, dtype=np.float32) + + # The amount of padding added + pad = 1 + + for n in xrange(x_shape[0]): + for k in xrange(f_shape[1]): + for w in xrange(pad, y_shape[1] - pad): + target = 3.0 + # We add a case for locations divisible by the stride. + w_in = w % stride == 0 and w > pad and w < y_shape[1] - 1 - pad + if w_in: + target += 3.0 + cache_values[n, w, k] = target + + # copy values in the border + cache_values[n, 0, k] = cache_values[n, 1, k] + cache_values[n, -1, k] = cache_values[n, -2, k] + + self.assertAllClose(cache_values, value) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py index 14622ab467..ec8ac74163 100644 --- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py @@ -21,6 +21,8 @@ from __future__ import print_function import collections import math +import numpy as np + from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import test_util @@ -45,8 +47,19 @@ def GetTestConfigs(): class Conv3DTest(test.TestCase): + def _DtypesToTest(self, use_gpu): + if use_gpu: + if not test_util.CudaSupportsHalfMatMulAndConv(): + return [dtypes.float32] + else: + # It is important that float32 comes before float16 here, + # as we will be using its gradients as reference for fp16 gradients. + return [dtypes.float32, dtypes.float16] + else: + return [dtypes.float64, dtypes.float32, dtypes.float16] + def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, stride, - padding, data_format, use_gpu): + padding, data_format, dtype, use_gpu): total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: @@ -54,13 +67,14 @@ class Conv3DTest(test.TestCase): for s in filter_in_sizes: total_size_2 *= s - # Initializes the input tensor with array containing incrementing - # numbers from 1. - x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] - x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] + # Initializes the input tensor with array containing numbers from 0 to 1. + # We keep the input tensor values fairly small to avoid overflowing float16 + # during the conv3d. + x1 = [f * 1.0 / total_size_1 for f in range(1, total_size_1 + 1)] + x2 = [f * 1.0 / total_size_2 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu): - t1 = constant_op.constant(x1, shape=tensor_in_sizes) - t2 = constant_op.constant(x2, shape=filter_in_sizes) + t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype) + t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype) if isinstance(stride, collections.Iterable): strides = [1] + list(stride) + [1] @@ -81,27 +95,33 @@ class Conv3DTest(test.TestCase): expected): results = [] for data_format, use_gpu in GetTestConfigs(): - result = self._SetupValuesForDevice( - tensor_in_sizes, - filter_in_sizes, - stride, - padding, - data_format, - use_gpu=use_gpu) - results.append(result) - tolerance = 1e-2 if use_gpu else 1e-5 + for dtype in self._DtypesToTest(use_gpu): + result = self._SetupValuesForDevice( + tensor_in_sizes, + filter_in_sizes, + stride, + padding, + data_format, + dtype, + use_gpu=use_gpu) + results.append(result) + with self.test_session() as sess: values = sess.run(results) for value in values: print("expected = ", expected) print("actual = ", value) - self.assertAllClose(expected, value.flatten(), atol=tolerance, - rtol=1e-6) + tol = 1e-6 + if value.dtype == np.float16: + tol = 1e-3 + + self.assertAllClose(expected, value.flatten(), atol=tol, rtol=tol) def testConv3D1x1x1Filter(self): expected_output = [ - 30.0, 36.0, 42.0, 66.0, 81.0, 96.0, 102.0, 126.0, 150.0, 138.0, 171.0, - 204.0, 174.0, 216.0, 258.0, 210.0, 261.0, 312.0 + 0.18518519, 0.22222222, 0.25925926, 0.40740741, 0.5, 0.59259259, + 0.62962963, 0.77777778, 0.92592593, 0.85185185, 1.05555556, 1.25925926, + 1.07407407, 1.33333333, 1.59259259, 1.2962963, 1.61111111, 1.92592593 ] # These are equivalent to the Conv2D1x1 case. @@ -127,8 +147,10 @@ class Conv3DTest(test.TestCase): # Expected values computed using scipy's correlate function. def testConv3D2x2x2Filter(self): expected_output = [ - 19554., 19962., 20370., 22110., 22590., 23070., 34890., 35730., 36570., - 37446., 38358., 39270., 50226., 51498., 52770., 52782., 54126., 55470. + 3.77199074, 3.85069444, 3.92939815, 4.2650463, 4.35763889, 4.45023148, + 6.73032407, 6.89236111, 7.05439815, 7.22337963, 7.39930556, 7.57523148, + 9.68865741, 9.93402778, 10.17939815, 10.18171296, 10.44097222, + 10.70023148 ] # expected_shape = [1, 3, 1, 2, 5] self._VerifyValues( @@ -140,69 +162,17 @@ class Conv3DTest(test.TestCase): def testConv3DStrides(self): expected_output = [ - 102., - 151., - 172., - 193., - 214., - 235., - 142., - 438., - 592., - 613., - 634., - 655., - 676., - 394., - 774., - 1033., - 1054., - 1075., - 1096., - 1117., - 646., - 1894., - 2503., - 2524., - 2545., - 2566., - 2587., - 1486., - 2230., - 2944., - 2965., - 2986., - 3007., - 3028., - 1738., - 2566., - 3385., - 3406., - 3427., - 3448., - 3469., - 1990., - 3686., - 4855., - 4876., - 4897., - 4918., - 4939., - 2830., - 4022., - 5296., - 5317., - 5338., - 5359., - 5380., - 3082., - 4358., - 5737., - 5758., - 5779., - 5800., - 5821., - 3334., + 0.06071429, 0.08988095, 0.10238095, 0.11488095, 0.12738095, 0.13988095, + 0.08452381, 0.26071429, 0.35238095, 0.36488095, 0.37738095, 0.38988095, + 0.40238095, 0.23452381, 0.46071429, 0.61488095, 0.62738095, 0.63988095, + 0.65238095, 0.66488095, 0.38452381, 1.12738095, 1.48988095, 1.50238095, + 1.51488095, 1.52738095, 1.53988095, 0.88452381, 1.32738095, 1.75238095, + 1.76488095, 1.77738095, 1.78988095, 1.80238095, 1.03452381, 1.52738095, + 2.01488095, 2.02738095, 2.03988095, 2.05238095, 2.06488095, 1.18452381, + 2.19404762, 2.88988095, 2.90238095, 2.91488095, 2.92738095, 2.93988095, + 1.68452381, 2.39404762, 3.15238095, 3.16488095, 3.17738095, 3.18988095, + 3.20238095, 1.83452381, 2.59404762, 3.41488095, 3.42738095, 3.43988095, + 3.45238095, 3.46488095, 1.98452381 ] self._VerifyValues( tensor_in_sizes=[1, 5, 8, 7, 1], @@ -212,7 +182,9 @@ class Conv3DTest(test.TestCase): expected=expected_output) def testConv3D2x2x2FilterStride2(self): - expected_output = [19554., 19962., 20370., 50226., 51498., 52770.] + expected_output = [ + 3.77199074, 3.85069444, 3.92939815, 9.68865741, 9.93402778, 10.17939815 + ] self._VerifyValues( tensor_in_sizes=[1, 4, 2, 3, 3], filter_in_sizes=[2, 2, 2, 3, 3], @@ -222,11 +194,12 @@ class Conv3DTest(test.TestCase): def testConv3DStride3(self): expected_output = [ - 36564., 38022., 39480., 37824., 39354., 40884., 39084., 40686., 42288., - 46644., 48678., 50712., 47904., 50010., 52116., 49164., 51342., 53520., - 107124., 112614., 118104., 108384., 113946., 119508., 109644., 115278., - 120912., 117204., 123270., 129336., 118464., 124602., 130740., 119724., - 125934., 132144. + 1.51140873, 1.57167659, 1.63194444, 1.56349206, 1.62673611, 1.68998016, + 1.6155754, 1.68179563, 1.74801587, 1.9280754, 2.01215278, 2.09623016, + 1.98015873, 2.0672123, 2.15426587, 2.03224206, 2.12227183, 2.21230159, + 4.4280754, 4.65500992, 4.88194444, 4.48015873, 4.71006944, 4.93998016, + 4.53224206, 4.76512897, 4.99801587, 4.84474206, 5.09548611, 5.34623016, + 4.8968254, 5.15054563, 5.40426587, 4.94890873, 5.20560516, 5.46230159 ] self._VerifyValues( tensor_in_sizes=[1, 6, 7, 8, 2], @@ -237,8 +210,8 @@ class Conv3DTest(test.TestCase): def testConv3D2x2x2FilterStride2Same(self): expected_output = [ - 19554., 19962., 20370., 10452., 10710., 10968., 50226., 51498., 52770., - 23844., 24534., 25224. + 3.77199074, 3.85069444, 3.92939815, 2.0162037, 2.06597222, 2.11574074, + 9.68865741, 9.93402778, 10.17939815, 4.59953704, 4.73263889, 4.86574074 ] self._VerifyValues( tensor_in_sizes=[1, 4, 2, 3, 3], @@ -248,7 +221,10 @@ class Conv3DTest(test.TestCase): expected=expected_output) def testKernelSmallerThanStride(self): - expected_output = [1., 3., 7., 9., 19., 21., 25., 27.] + expected_output = [ + 0.03703704, 0.11111111, 0.25925926, 0.33333333, 0.7037037, 0.77777778, + 0.92592593, 1. + ] self._VerifyValues( tensor_in_sizes=[1, 3, 3, 3, 1], filter_in_sizes=[1, 1, 1, 1, 1], @@ -263,9 +239,11 @@ class Conv3DTest(test.TestCase): expected=expected_output) expected_output = [ - 1484., 1592., 770., 2240., 2348., 1106., 1149., 1191., 539., 6776., - 6884., 3122., 7532., 7640., 3458., 3207., 3249., 1421., 3005., 3035., - 1225., 3215., 3245., 1309., 1013., 1022., 343. + 0.54081633, 0.58017493, 0.28061224, 0.81632653, 0.85568513, 0.40306122, + 0.41873178, 0.4340379, 0.19642857, 2.46938776, 2.50874636, 1.1377551, + 2.74489796, 2.78425656, 1.26020408, 1.16873178, 1.1840379, 0.51785714, + 1.09511662, 1.10604956, 0.44642857, 1.17164723, 1.18258017, 0.47704082, + 0.3691691, 0.37244898, 0.125 ] self._VerifyValues( tensor_in_sizes=[1, 7, 7, 7, 1], @@ -274,7 +252,10 @@ class Conv3DTest(test.TestCase): padding="SAME", expected=expected_output) - expected_output = [1484., 1592., 2240., 2348., 6776., 6884., 7532., 7640.] + expected_output = [ + 0.540816, 0.580175, 0.816327, 0.855685, 2.469388, 2.508746, 2.744898, + 2.784257 + ] self._VerifyValues( tensor_in_sizes=[1, 7, 7, 7, 1], filter_in_sizes=[2, 2, 2, 1, 1], @@ -288,7 +269,7 @@ class Conv3DTest(test.TestCase): filter_in_sizes=[2, 1, 2, 1, 2], stride=1, padding="VALID", - expected=[50, 60]) + expected=[1.5625, 1.875]) def _ConstructAndTestGradientForConfig( self, batch, input_shape, filter_shape, in_depth, out_depth, stride, @@ -328,50 +309,58 @@ class Conv3DTest(test.TestCase): input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] - if test.is_gpu_available() and use_gpu: - data_type = dtypes.float32 + for data_type in self._DtypesToTest(use_gpu=use_gpu): # TODO(mjanusz): Modify gradient_checker to also provide max relative # error and synchronize the tolerance levels between the tests for forward # and backward computations. - if test.is_gpu_available(): + if data_type == dtypes.float64: + tolerance = 1e-8 + elif data_type == dtypes.float32: tolerance = 5e-3 - else: - # As of Aug 2016, higher tolerance is needed for some CPU architectures. - # Runs on a single machine can also generate slightly different errors - # because of multithreading. - tolerance = 8e-3 - else: - data_type = dtypes.float64 - tolerance = 1e-8 - with self.test_session(use_gpu=use_gpu): - orig_input_tensor = constant_op.constant( - input_data, shape=input_shape, dtype=data_type, name="input") - filter_tensor = constant_op.constant( - filter_data, shape=filter_shape, dtype=data_type, name="filter") - - if data_format == "NCDHW": - input_tensor = test_util.NHWCToNCHW(orig_input_tensor) - strides = test_util.NHWCToNCHW(strides) - else: - input_tensor = orig_input_tensor - - conv = nn_ops.conv3d( - input_tensor, filter_tensor, strides, padding, - data_format=data_format, name="conv") - - if data_format == "NCDHW": - conv = test_util.NCHWToNHWC(conv) - - if test_input: - err = gradient_checker.compute_gradient_error(orig_input_tensor, - input_shape, - conv, output_shape) - else: - err = gradient_checker.compute_gradient_error(filter_tensor, - filter_shape, conv, - output_shape) - print("conv3d gradient error = ", err) - self.assertLess(err, tolerance) + elif data_type == dtypes.float16: + tolerance = 1e-3 + + with self.test_session(use_gpu=use_gpu): + orig_input_tensor = constant_op.constant( + input_data, shape=input_shape, dtype=data_type, name="input") + filter_tensor = constant_op.constant( + filter_data, shape=filter_shape, dtype=data_type, name="filter") + + if data_format == "NCDHW": + input_tensor = test_util.NHWCToNCHW(orig_input_tensor) + new_strides = test_util.NHWCToNCHW(strides) + else: + input_tensor = orig_input_tensor + new_strides = strides + + conv = nn_ops.conv3d( + input_tensor, + filter_tensor, + new_strides, + padding, + data_format=data_format, + name="conv") + + if data_format == "NCDHW": + conv = test_util.NCHWToNHWC(conv) + + if test_input: + jacob_t, jacob_n = gradient_checker.compute_gradient( + orig_input_tensor, input_shape, conv, output_shape) + else: + jacob_t, jacob_n = gradient_checker.compute_gradient( + filter_tensor, filter_shape, conv, output_shape) + + if data_type != dtypes.float16: + reference_jacob_t = jacob_t + err = np.fabs(jacob_t - jacob_n).max() + else: + # Compare fp16 theoretical gradients to fp32 theoretical gradients, + # since fp16 numerical gradients are too imprecise. + err = np.fabs(jacob_t - reference_jacob_t).max() + + print("conv3d gradient error = ", err) + self.assertLess(err, tolerance) def ConstructAndTestGradient(self, **kwargs): for data_format, use_gpu in GetTestConfigs(): diff --git a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py index 3298092fbe..f7ae1a0f37 100644 --- a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py +++ b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py @@ -122,7 +122,9 @@ class DepthwiseConv2DTest(test.TestCase): x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu) as sess: - if data_type == dtypes.float32: + if data_type == dtypes.float16: + tolerance = 1e-5 + elif data_type == dtypes.float32: tolerance = 1e-5 else: self.assertEqual(data_type, dtypes.float64) @@ -169,7 +171,7 @@ class DepthwiseConv2DTest(test.TestCase): padding) in enumerate(ConfigsToTest()): print("Testing DepthwiseConv2D,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) - for data_type in [dtypes.float32, dtypes.float64]: + for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]: self._VerifyValues( input_size, filter_size, stride, padding, data_type, use_gpu=True) @@ -181,7 +183,7 @@ class DepthwiseConv2DTest(test.TestCase): padding) in enumerate(ConfigsToTest()): print("Testing DepthwiseConv2DFormat,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) - for data_type in [dtypes.float32, dtypes.float64]: + for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]: self._VerifyValues( input_size, filter_size, @@ -318,7 +320,9 @@ class DepthwiseConv2DTest(test.TestCase): input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] with self.test_session(use_gpu=use_gpu): - if data_type == dtypes.float32: + if data_type == dtypes.float16: + tolerance = 0.002 + elif data_type == dtypes.float32: tolerance = 0.002 else: self.assertEqual(data_type, dtypes.float64) @@ -369,6 +373,8 @@ class DepthwiseConv2DTest(test.TestCase): print("Testing DepthwiseConv2DInputGrad,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) + # Note: float16 test for DepthwiseConv2DInputGrad is not enabled, + # calculations are not very precise. for data_type in [dtypes.float32, dtypes.float64]: self._ConstructAndTestGradient( input_size, @@ -389,6 +395,8 @@ class DepthwiseConv2DTest(test.TestCase): print("Testing DepthwiseConv2DInputGradFormat,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) + # Note: float16 test for DepthwiseConv2DInputGradFormat is not enabled, + # calculations are not very precise. for data_type in [dtypes.float32, dtypes.float64]: self._ConstructAndTestGradient( input_size, @@ -407,6 +415,8 @@ class DepthwiseConv2DTest(test.TestCase): print("Testing DepthwiseConv2DFilterGrad,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) + # Note: float16 test for DepthwiseConv2DFilterGrad is not enabled, + # calculations are not very precise. for data_type in [dtypes.float32, dtypes.float64]: self._ConstructAndTestGradient( input_size, @@ -427,6 +437,8 @@ class DepthwiseConv2DTest(test.TestCase): print("Testing DepthwiseConv2DFilterGradFormat,", index, "th config:", input_size, "*", filter_size, "stride:", stride, "padding:", padding) + # Note: float16 test for DepthwiseConv2DFilterGradFormat is not enabled, + # calculations are not very precise. for data_type in [dtypes.float32, dtypes.float64]: self._ConstructAndTestGradient( input_size, diff --git a/tensorflow/python/kernel_tests/distributions/BUILD b/tensorflow/python/kernel_tests/distributions/BUILD index e21446c2ef..e220d05692 100644 --- a/tensorflow/python/kernel_tests/distributions/BUILD +++ b/tensorflow/python/kernel_tests/distributions/BUILD @@ -193,6 +193,7 @@ cuda_py_test( "//tensorflow/python:math_ops", "//tensorflow/python:platform_test", ], + tags = ["manual"], # b/69001419 ) cuda_py_test( diff --git a/tensorflow/python/kernel_tests/distributions/multinomial_test.py b/tensorflow/python/kernel_tests/distributions/multinomial_test.py index ebc89f15c5..e24e8ade73 100644 --- a/tensorflow/python/kernel_tests/distributions/multinomial_test.py +++ b/tensorflow/python/kernel_tests/distributions/multinomial_test.py @@ -250,13 +250,11 @@ class MultinomialTest(test.TestCase): theta = np.array([[1., 2, 3], [2.5, 4, 0.01]], dtype=np.float32) theta /= np.sum(theta, 1)[..., array_ops.newaxis] - # Ideally we'd be able to test broadcasting but, the multinomial sampler - # doesn't support different total counts. - n = np.float32(5) + n = np.array([[10., 9.], [8., 7.], [6., 5.]], dtype=np.float32) with self.test_session() as sess: - # batch_shape=[2], event_shape=[3] + # batch_shape=[3, 2], event_shape=[3] dist = multinomial.Multinomial(n, theta) - x = dist.sample(int(250e3), seed=1) + x = dist.sample(int(1000e3), seed=1) sample_mean = math_ops.reduce_mean(x, 0) x_centered = x - sample_mean[array_ops.newaxis, ...] sample_cov = math_ops.reduce_mean(math_ops.matmul( @@ -291,9 +289,9 @@ class MultinomialTest(test.TestCase): def testSampleUnbiasedNonScalarBatch(self): with self.test_session() as sess: dist = multinomial.Multinomial( - total_count=5., + total_count=[7., 6., 5.], logits=math_ops.log(2. * self._rng.rand(4, 3, 2).astype(np.float32))) - n = int(3e3) + n = int(3e4) x = dist.sample(n, seed=0) sample_mean = math_ops.reduce_mean(x, 0) # Cyclically rotate event dims left. diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index a126180414..6be8997cab 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os import numpy as np from tensorflow.python.framework import constant_op @@ -1341,11 +1342,14 @@ class PoolingTest(test.TestCase): return # Test the GPU implementation that uses cudnn for now. - # It does not propagate the diff in cases of NaNs + saved_nanprop = os.environ.get("TF_ENABLE_MAXPOOL_NANPROP") + # Do not propagate the diff in cases of NaNs + os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "0" expected_input_backprop_cudnn = [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] + for v2 in [True, False]: self._testMaxPoolGradDirect( input_data, @@ -1361,6 +1365,30 @@ class PoolingTest(test.TestCase): use_gpu=True, v2=v2) + # Propagate the diff in cases of NaNs + os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "1" + expected_input_backprop_cudnn = expected_input_backprop_tf_cpu + + for v2 in [True, False]: + self._testMaxPoolGradDirect( + input_data, + output_backprop, + expected_input_backprop_cudnn, + input_sizes=[1, 4, 4, 1], + output_sizes=[1, 3, 3, 1], + window_rows=2, + window_cols=2, + row_stride=1, + col_stride=1, + padding="VALID", + use_gpu=True, + v2=v2) + + if saved_nanprop: + os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = saved_nanprop + else: + del os.environ["TF_ENABLE_MAXPOOL_NANPROP"] + def _testMaxPoolGradDirectWithNans2_2(self): input_data = [float("nan")] * 16 output_backprop = [ @@ -1391,11 +1419,14 @@ class PoolingTest(test.TestCase): return # Test the GPU implementation that uses cudnn for now. - # It does not propagate the diff in cases of NaNs + saved_nanprop = os.environ.get("TF_ENABLE_MAXPOOL_NANPROP") + # Do not propagate the diff in cases of NaNs + os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "0" expected_input_backprop_cudnn = [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] + for v2 in [True, False]: self._testMaxPoolGradDirect( input_data, @@ -1411,6 +1442,30 @@ class PoolingTest(test.TestCase): use_gpu=True, v2=v2) + # Propagate the diff in cases of NaNs + os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "1" + expected_input_backprop_cudnn = expected_input_backprop_tf_cpu + + for v2 in [True, False]: + self._testMaxPoolGradDirect( + input_data, + output_backprop, + expected_input_backprop_cudnn, + input_sizes=[1, 4, 4, 1], + output_sizes=[1, 3, 3, 1], + window_rows=2, + window_cols=2, + row_stride=1, + col_stride=1, + padding="VALID", + use_gpu=True, + v2=v2) + + if saved_nanprop: + os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = saved_nanprop + else: + del os.environ["TF_ENABLE_MAXPOOL_NANPROP"] + def testMaxPoolGradDirect(self): self._testMaxPoolGradDirect1_1() self._testMaxPoolGradDirect1_2() diff --git a/tensorflow/python/kernel_tests/reader_ops_test.py b/tensorflow/python/kernel_tests/reader_ops_test.py index 5630259b7b..223a4b2c87 100644 --- a/tensorflow/python/kernel_tests/reader_ops_test.py +++ b/tensorflow/python/kernel_tests/reader_ops_test.py @@ -35,6 +35,9 @@ from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import io_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test +from tensorflow.python.training import coordinator +from tensorflow.python.training import input as input_lib +from tensorflow.python.training import queue_runner_impl from tensorflow.python.util import compat prefix_path = "tensorflow/core/lib" @@ -1011,6 +1014,25 @@ class LMDBReaderTest(test.TestCase): "\\(requested 1, current size 0\\)"): k, v = sess.run([key, value]) + def testReadFromSameFile(self): + with self.test_session() as sess: + reader1 = io_ops.LMDBReader(name="test_read_from_same_file1") + reader2 = io_ops.LMDBReader(name="test_read_from_same_file2") + filename_queue = input_lib.string_input_producer( + [self.db_path], num_epochs=None) + key1, value1 = reader1.read(filename_queue) + key2, value2 = reader2.read(filename_queue) + + coord = coordinator.Coordinator() + threads = queue_runner_impl.start_queue_runners(sess, coord=coord) + for _ in range(3): + for _ in range(10): + k1, v1, k2, v2 = sess.run([key1, value1, key2, value2]) + self.assertAllEqual(compat.as_bytes(k1), compat.as_bytes(k2)) + self.assertAllEqual(compat.as_bytes(v1), compat.as_bytes(v2)) + coord.request_stop() + coord.join(threads) + def testReadFromFolder(self): with self.test_session() as sess: reader = io_ops.LMDBReader(name="test_read_from_folder") @@ -1029,6 +1051,26 @@ class LMDBReaderTest(test.TestCase): "\\(requested 1, current size 0\\)"): k, v = sess.run([key, value]) + def testReadFromFileRepeatedly(self): + with self.test_session() as sess: + reader = io_ops.LMDBReader(name="test_read_from_file_repeated") + filename_queue = input_lib.string_input_producer( + [self.db_path], num_epochs=None) + key, value = reader.read(filename_queue) + + coord = coordinator.Coordinator() + threads = queue_runner_impl.start_queue_runners(sess, coord=coord) + # Iterate over the lmdb 3 times. + for _ in range(3): + # Go over all 10 records each time. + for j in range(10): + k, v = sess.run([key, value]) + self.assertAllEqual(compat.as_bytes(k), compat.as_bytes(str(j))) + self.assertAllEqual( + compat.as_bytes(v), compat.as_bytes(str(chr(ord("a") + j)))) + coord.request_stop() + coord.join(threads) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index 516a9d000e..99f9f09690 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -323,8 +323,9 @@ class UnsortedSegmentSumTest(SegmentReductionHelper): def testBadIndices(self): # Note: GPU kernel does not return the out-of-range error needed for this # test, so this test is marked as cpu-only. + # Note: With PR #13055 a negative index will be ignored silently. with self.test_session(use_gpu=False): - for bad in [[-1]], [[7]]: + for bad in [[2]], [[7]]: unsorted = math_ops.unsorted_segment_sum([[17]], bad, num_segments=2) with self.assertRaisesOpError( r"segment_ids\[0,0\] = %d is out of range \[0, 2\)" % bad[0][0]): @@ -360,6 +361,32 @@ class UnsortedSegmentSumTest(SegmentReductionHelper): x_init_value=np_x.astype(np.double), delta=1) self.assertAllClose(jacob_t, jacob_n) + def testDropNegatives(self): + # Note: the test is done by replacing segment_ids with 8 to -1 + # for index and replace values generated by numpy with 0. + dtypes = [ + dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int64, + dtypes_lib.int32, dtypes_lib.complex64, dtypes_lib.complex128 + ] + indices_flat = np.array([0, 4, 0, 8, 3, 8, 4, 7, 7, 3]) + num_segments = 12 + for indices in indices_flat, indices_flat.reshape(5, 2): + shape = indices.shape + (2,) + for dtype in dtypes: + with self.test_session(use_gpu=True): + tf_x, np_x = self._input(shape, dtype=dtype) + np_ans = self._segmentReduce( + indices, np_x, np.add, op2=None, num_out_rows=num_segments) + # Replace np_ans[8] with 0 for the value + np_ans[8:] = 0 + # Replace 8 with -1 in indices + np.place(indices, indices == 8, [-1]) + s = math_ops.unsorted_segment_sum( + data=tf_x, segment_ids=indices, num_segments=num_segments) + tf_ans = s.eval() + self.assertAllClose(np_ans, tf_ans) + self.assertShapeEqual(np_ans, s) + class SparseSegmentReductionHelper(SegmentReductionHelper): diff --git a/tensorflow/python/kernel_tests/shape_ops_test.py b/tensorflow/python/kernel_tests/shape_ops_test.py index a9fc699b21..7368251ab6 100644 --- a/tensorflow/python/kernel_tests/shape_ops_test.py +++ b/tensorflow/python/kernel_tests/shape_ops_test.py @@ -258,6 +258,16 @@ class ShapeOpsTest(test.TestCase): self.assertAllEqual([True], array_ops.expand_dims(inp, 0).eval()) self.assertAllEqual([True], array_ops.expand_dims(inp, -1).eval()) + def testExpandDimsDimType(self): + for dtype in [dtypes.int32, dtypes.int64]: + x = np.zeros([2]) + np_ans = np.expand_dims(x, axis=0) + with self.test_session(use_gpu=True): + tensor = array_ops.expand_dims(x, constant_op.constant(0, dtype)) + tf_ans = tensor.eval() + self.assertShapeEqual(np_ans, tensor) + self.assertAllEqual(np_ans, tf_ans) + def _compareSqueeze(self, x, squeeze_dims, use_gpu): with self.test_session(use_gpu=use_gpu): if squeeze_dims: diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py index a50f53b3cd..6390b7c518 100644 --- a/tensorflow/python/kernel_tests/unique_op_test.py +++ b/tensorflow/python/kernel_tests/unique_op_test.py @@ -22,6 +22,7 @@ import numpy as np from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_array_ops from tensorflow.python.platform import test @@ -61,6 +62,32 @@ class UniqueTest(test.TestCase): for i in range(len(x)): self.assertEqual(x[i], tf_y[tf_idx[i]].decode('ascii')) + def testInt32Axis(self): + x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]]) + with self.test_session() as sess: + y0, idx0 = gen_array_ops.unique_v2(x, axis=[0]) + tf_y0, tf_idx0 = sess.run([y0, idx0]) + y1, idx1 = gen_array_ops.unique_v2(x, axis=[1]) + tf_y1, tf_idx1 = sess.run([y1, idx1]) + self.assertAllEqual(tf_y0, np.array([[1, 0, 0], [2, 0, 0]])) + self.assertAllEqual(tf_idx0, np.array([0, 0, 1])) + self.assertAllEqual(tf_y1, np.array([[1, 0], [1, 0], [2, 0]])) + self.assertAllEqual(tf_idx1, np.array([0, 1, 1])) + + def testInt32V2(self): + # This test is only temporary, once V2 is used + # by default, the axis will be wrapped to allow `axis=None`. + x = np.random.randint(2, high=10, size=7000) + with self.test_session() as sess: + y, idx = gen_array_ops.unique_v2(x, axis=[]) + tf_y, tf_idx = sess.run([y, idx]) + + self.assertEqual(len(x), len(tf_idx)) + self.assertEqual(len(tf_y), len(np.unique(x))) + for i in range(len(x)): + self.assertEqual(x[i], tf_y[tf_idx[i]]) + + class UniqueWithCountsTest(test.TestCase): def testInt32(self): diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 74b85da845..6be2bc3e76 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -221,7 +221,7 @@ class Layer(object): Weight updates (for instance, the updates of the moving mean and variance in a BatchNormalization layer) may be dependent on the inputs passed - when calling a layer. Hence, when reusing a same layer on + when calling a layer. Hence, when reusing the same layer on different inputs `a` and `b`, some entries in `layer.updates` may be dependent on `a` and some on `b`. This method automatically keeps track of dependencies. @@ -295,9 +295,9 @@ class Layer(object): """Add loss tensor(s), potentially dependent on layer inputs. Some losses (for instance, activity regularization losses) may be dependent - on the inputs passed when calling a layer. Hence, when reusing a same layer - on different inputs `a` and `b`, some entries in `layer.losses` may be - dependent on `a` and some on `b`. This method automatically keeps track + on the inputs passed when calling a layer. Hence, when reusing the same + layer on different inputs `a` and `b`, some entries in `layer.losses` may + be dependent on `a` and some on `b`. This method automatically keeps track of dependencies. The `get_losses_for` method allows to retrieve the losses relevant to a diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py index 0c7ce02835..8c327d7e27 100644 --- a/tensorflow/python/layers/convolutional.py +++ b/tensorflow/python/layers/convolutional.py @@ -813,6 +813,7 @@ def conv3d(inputs, bias_constraint=bias_constraint, trainable=trainable, name=name, + dtype=inputs.dtype.base_dtype, _reuse=reuse, _scope=name) return layer.apply(inputs) @@ -1746,6 +1747,7 @@ def conv3d_transpose(inputs, bias_constraint=bias_constraint, trainable=trainable, name=name, + dtype=inputs.dtype.base_dtype, _reuse=reuse, _scope=name) return layer.apply(inputs) diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index 9d9b2b3941..83237b8733 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -26,6 +26,7 @@ import numpy as np from tensorflow.python.eager import context from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.layers import base @@ -239,6 +240,12 @@ class BatchNormalization(base.Layer): raise ValueError('Unsupported axis, fused batch norm only supports ' 'axis == [1] or axis == [3]') + # Raise parameters of fp16 batch norm to fp32 + if self.dtype == dtypes.float16: + param_dtype = dtypes.float32 + else: + param_dtype = self.dtype or dtypes.float32 + axis_to_dim = {x: input_shape[x].value for x in self.axis} for x in axis_to_dim: if axis_to_dim[x] is None: @@ -260,28 +267,34 @@ class BatchNormalization(base.Layer): self.axis[idx] = x + 1 # Account for added dimension if self.scale: - self.gamma = self.add_variable(name='gamma', - shape=param_shape, - initializer=self.gamma_initializer, - regularizer=self.gamma_regularizer, - constraint=self.gamma_constraint, - trainable=True) + self.gamma = self.add_variable( + name='gamma', + shape=param_shape, + dtype=param_dtype, + initializer=self.gamma_initializer, + regularizer=self.gamma_regularizer, + constraint=self.gamma_constraint, + trainable=True) else: self.gamma = None if self.fused: - self._gamma_const = array_ops.constant(1.0, shape=param_shape) + self._gamma_const = array_ops.constant( + 1.0, dtype=param_dtype, shape=param_shape) if self.center: - self.beta = self.add_variable(name='beta', - shape=param_shape, - initializer=self.beta_initializer, - regularizer=self.beta_regularizer, - constraint=self.beta_constraint, - trainable=True) + self.beta = self.add_variable( + name='beta', + shape=param_shape, + dtype=param_dtype, + initializer=self.beta_initializer, + regularizer=self.beta_regularizer, + constraint=self.beta_constraint, + trainable=True) else: self.beta = None if self.fused: - self._beta_const = array_ops.constant(0.0, shape=param_shape) + self._beta_const = array_ops.constant( + 0.0, dtype=param_dtype, shape=param_shape) # Disable variable partitioning when creating the moving mean and variance try: @@ -293,12 +306,14 @@ class BatchNormalization(base.Layer): self.moving_mean = self.add_variable( name='moving_mean', shape=param_shape, + dtype=param_dtype, initializer=self.moving_mean_initializer, trainable=False) self.moving_variance = self.add_variable( name='moving_variance', shape=param_shape, + dtype=param_dtype, initializer=self.moving_variance_initializer, trainable=False) @@ -312,10 +327,12 @@ class BatchNormalization(base.Layer): # stack to be cleared. The nested ones use a `lambda` to set the desired # device and ignore any devices that may be set by the custom getter. def _renorm_variable(name, shape): - var = self.add_variable(name=name, - shape=shape, - initializer=init_ops.zeros_initializer(), - trainable=False) + var = self.add_variable( + name=name, + shape=shape, + dtype=param_dtype, + initializer=init_ops.zeros_initializer(), + trainable=False) return var with ops.device(None): @@ -356,7 +373,6 @@ class BatchNormalization(base.Layer): def _fused_batch_norm(self, inputs, training): """Returns the output of fused batch norm.""" - # TODO(reedwm): Add support for fp16 inputs. beta = self.beta if self.center else self._beta_const gamma = self.gamma if self.scale else self._gamma_const @@ -752,6 +768,7 @@ def batch_normalization(inputs, virtual_batch_size=virtual_batch_size, adjustment=adjustment, name=name, + dtype=inputs.dtype.base_dtype, _reuse=reuse, _scope=name) return layer.apply(inputs, training=training) diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py index 90ebdc8c86..7c91c3284e 100644 --- a/tensorflow/python/layers/normalization_test.py +++ b/tensorflow/python/layers/normalization_test.py @@ -68,11 +68,12 @@ class BNTest(test.TestCase): use_gpu, is_fused, restore=False, - freeze_mode=False): + freeze_mode=False, + dtype=dtypes.float32): ops.reset_default_graph() graph = ops.get_default_graph() with self.test_session(graph=graph, use_gpu=use_gpu) as sess: - image = array_ops.placeholder(dtype='float32', shape=shape) + image = array_ops.placeholder(dtype=dtype, shape=shape) loss, train_op, saver = self._simple_model(image, is_fused, freeze_mode) if restore: saver.restore(sess, checkpoint_path) @@ -80,7 +81,7 @@ class BNTest(test.TestCase): sess.run(variables.global_variables_initializer()) np.random.seed(0) for _ in range(2): - image_val = np.random.rand(*shape).astype(np.float32) + image_val = np.random.rand(*shape).astype(dtype.as_numpy_dtype) sess.run([loss, train_op], feed_dict={image: image_val}) if restore: all_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) @@ -90,15 +91,69 @@ class BNTest(test.TestCase): saver.save(sess, checkpoint_path) def _infer(self, checkpoint_path, image_val, shape, use_gpu, is_fused): + dtype = image_val.dtype ops.reset_default_graph() graph = ops.get_default_graph() with self.test_session(graph=graph, use_gpu=use_gpu) as sess: - image = array_ops.placeholder(dtype='float32', shape=shape) + image = array_ops.placeholder(dtype=dtype, shape=shape) loss, _, saver = self._simple_model(image, is_fused, True) saver.restore(sess, checkpoint_path) loss_val = sess.run(loss, feed_dict={image: image_val}) return loss_val + def _trainEvalSequence(self, dtype, train1_use_gpu, train2_use_gpu, + infer_use_gpu): + batch, height, width, input_channels = 2, 4, 5, 3 + shape = [batch, height, width, input_channels] + checkpoint = os.path.join(self.get_temp_dir(), 'cp_%s_%s_%s_%s' % + (dtype, train1_use_gpu, train2_use_gpu, + infer_use_gpu)) + + self._train( + checkpoint, + shape, + use_gpu=train1_use_gpu, + is_fused=True, + restore=False, + freeze_mode=False, + dtype=dtype) + + train_vars = self._train( + checkpoint, + shape, + use_gpu=train2_use_gpu, + is_fused=True, + restore=True, + freeze_mode=False, + dtype=dtype) + + np.random.seed(0) + image_val = np.random.rand(batch, height, width, input_channels).astype( + dtype.as_numpy_dtype) + loss_val = self._infer( + checkpoint, image_val, shape, use_gpu=infer_use_gpu, is_fused=True) + + return train_vars, loss_val + + def testHalfPrecision(self): + ref_vars, ref_loss = self._trainEvalSequence( + dtype=dtypes.float32, + train1_use_gpu=True, + train2_use_gpu=True, + infer_use_gpu=True) + + self.assertEqual(len(ref_vars), 5) + + for train1_use_gpu in [True, False]: + for train2_use_gpu in [True, False]: + for infer_use_gpu in [True, False]: + test_vars, test_loss = self._trainEvalSequence( + dtypes.float16, train1_use_gpu, train2_use_gpu, infer_use_gpu) + self.assertEqual(len(test_vars), 5) + for test_var, ref_var in zip(test_vars, ref_vars): + self.assertAllClose(test_var, ref_var, rtol=1.e-3, atol=1.e-3) + self.assertAllClose(test_loss, ref_loss, rtol=1.e-3, atol=1.e-3) + def _testCheckpoint(self, is_fused_checkpoint_a, is_fused_checkpoint_b, use_gpu_checkpoint_a, use_gpu_checkpoint_b, use_gpu_test_a, use_gpu_test_b, freeze_mode): @@ -218,6 +273,35 @@ class BNTest(test.TestCase): ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES), bn.trainable_variables) + def testCreateFusedBNFloat16(self): + # Call layer. + bn = normalization_layers.BatchNormalization(axis=1, fused=True) + inputs = random_ops.random_uniform( + (5, 4, 3, 3), seed=1, dtype=dtypes.float16) + training = array_ops.placeholder(dtype='bool') + outputs = bn.apply(inputs, training=training) + + # Verify shape. + self.assertListEqual(outputs.get_shape().as_list(), [5, 4, 3, 3]) + + # Verify layer attributes. + self.assertEqual(len(bn.updates), 2) + self.assertEqual(len(bn.variables), 4) + self.assertEqual(len(bn.trainable_variables), 2) + self.assertEqual(len(bn.non_trainable_variables), 2) + for var in bn.variables: + self.assertEqual(var.dtype, dtypes.float32_ref) + + # Test that updates were created and added to UPDATE_OPS. + self.assertEqual(len(bn.updates), 2) + self.assertListEqual( + ops.get_collection(ops.GraphKeys.UPDATE_OPS), bn.updates) + + # Test that weights were created and added to TRAINABLE_VARIABLES. + self.assertListEqual( + ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES), + bn.trainable_variables) + def test3DInputAxis1(self): epsilon = 1e-3 bn = normalization_layers.BatchNormalization( diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index c3c7ecd080..38eff54c69 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1132,7 +1132,7 @@ def concat(values, axis, name="concat"): return gen_array_ops._concat_v2(values=values, axis=axis, name=name) -def boolean_mask(tensor, mask, name="boolean_mask"): +def boolean_mask(tensor, mask, name="boolean_mask", axis=None): """Apply boolean mask to tensor. Numpy equivalent is `tensor[mask]`. ```python @@ -1146,11 +1146,17 @@ def boolean_mask(tensor, mask, name="boolean_mask"): the first K dimensions of `tensor`'s shape. We then have: `boolean_mask(tensor, mask)[i, j1,...,jd] = tensor[i1,...,iK,j1,...,jd]` where `(i1,...,iK)` is the ith `True` entry of `mask` (row-major order). + The `axis` could be used with `mask` to indicate the axis to mask from. + In that case, `axis + dim(mask) <= dim(tensor)` and `mask`'s shape must match + the first `axis + dim(mask)` dimensions of `tensor`'s shape. Args: tensor: N-D tensor. mask: K-D boolean tensor, K <= N and K must be known statically. name: A name for this operation (optional). + axis: A 0-D int Tensor representing the axis in `tensor` to mask from. + By default, axis is 0 which will mask from the first dimension. Otherwise + K + axis <= N. Returns: (N-K+1)-dimensional tensor populated by entries in `tensor` corresponding @@ -1169,10 +1175,10 @@ def boolean_mask(tensor, mask, name="boolean_mask"): ``` """ - def _apply_mask_1d(reshaped_tensor, mask): + def _apply_mask_1d(reshaped_tensor, mask, axis=None): """Mask tensor along dimension 0 with a 1-D mask.""" indices = squeeze(where(mask), squeeze_dims=[1]) - return gather(reshaped_tensor, indices) + return gather(reshaped_tensor, indices, axis=axis) with ops.name_scope(name, values=[tensor, mask]): tensor = ops.convert_to_tensor(tensor, name="tensor") @@ -1187,19 +1193,23 @@ def boolean_mask(tensor, mask, name="boolean_mask"): raise ValueError( "Number of mask dimensions must be specified, even if some dimensions" " are None. E.g. shape=[None] is ok, but shape=None is not.") - shape_tensor[:ndims_mask].assert_is_compatible_with(shape_mask) + axis = 0 if axis is None else axis + shape_tensor[axis:axis + ndims_mask].assert_is_compatible_with(shape_mask) - leading_size = gen_math_ops._prod(shape(tensor)[:ndims_mask], [0]) + leading_size = gen_math_ops._prod( + shape(tensor)[axis:axis + ndims_mask], [0]) tensor = reshape(tensor, - concat([[leading_size], - shape(tensor)[ndims_mask:]], 0)) - first_dim = shape_tensor[:ndims_mask].num_elements() + concat([ + shape(tensor)[:axis], [leading_size], + shape(tensor)[axis + ndims_mask:] + ], 0)) + first_dim = shape_tensor[axis:axis + ndims_mask].num_elements() tensor.set_shape( - tensor_shape.as_shape([first_dim]) - .concatenate(shape_tensor[ndims_mask:])) + tensor_shape.as_shape(shape_tensor[:axis]).concatenate([first_dim]) + .concatenate(shape_tensor[axis + ndims_mask:])) mask = reshape(mask, [-1]) - return _apply_mask_1d(tensor, mask) + return _apply_mask_1d(tensor, mask, axis) def sparse_mask(a, mask_indices, name=None): @@ -1521,7 +1531,8 @@ def zeros_like(tensor, dtype=None, name=None, optimize=True): Args: tensor: A `Tensor`. dtype: A type for the returned `Tensor`. Must be `float32`, `float64`, - `int8`, `int16`, `int32`, `int64`, `uint8`, `complex64`, or `complex128`. + `int8`, `uint8`, `int16`, `uint16`, int32`, `int64`, + `complex64`, `complex128` or `bool`. name: A name for the operation (optional). optimize: if true, attempt to statically determine the shape of 'tensor' and encode it as a constant. @@ -1572,8 +1583,8 @@ def ones_like(tensor, dtype=None, name=None, optimize=True): Args: tensor: A `Tensor`. dtype: A type for the returned `Tensor`. Must be `float32`, `float64`, - `int8`, `int16`, `int32`, `int64`, `uint8`, `complex64`, `complex128` or - `bool`. + `int8`, `uint8`, `int16`, `uint16`, int32`, `int64`, + `complex64`, `complex128` or `bool`. name: A name for the operation (optional). optimize: if true, attempt to statically determine the shape of 'tensor' and encode it as a constant. diff --git a/tensorflow/python/ops/distributions/dirichlet.py b/tensorflow/python/ops/distributions/dirichlet.py index 923696a553..2accedf1b9 100644 --- a/tensorflow/python/ops/distributions/dirichlet.py +++ b/tensorflow/python/ops/distributions/dirichlet.py @@ -196,7 +196,7 @@ class Dirichlet(distribution.Distribution): alpha=self.concentration, dtype=self.dtype, seed=seed) - return gamma_sample / math_ops.reduce_sum(gamma_sample, -1, keep_dims=True) + return gamma_sample / math_ops.reduce_sum(gamma_sample, -1, keepdims=True) @distribution_util.AppendDocstring(_dirichlet_sample_note) def _log_prob(self, x): diff --git a/tensorflow/python/ops/distributions/multinomial.py b/tensorflow/python/ops/distributions/multinomial.py index 00b5697c83..04762565c2 100644 --- a/tensorflow/python/ops/distributions/multinomial.py +++ b/tensorflow/python/ops/distributions/multinomial.py @@ -23,6 +23,7 @@ from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import functional_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops @@ -140,6 +141,8 @@ class Multinomial(distribution.Distribution): counts = [[2., 1, 1], [3, 1, 1]] dist.prob(counts) # Shape [2] + + dist.sample(5) # Shape [5, 2, 3] ``` """ @@ -231,29 +234,36 @@ class Multinomial(distribution.Distribution): def _sample_n(self, n, seed=None): n_draws = math_ops.cast(self.total_count, dtype=dtypes.int32) - if self.total_count.get_shape().ndims is not None: - if self.total_count.get_shape().ndims != 0: - raise NotImplementedError( - "Sample only supported for scalar number of draws.") - elif self.validate_args: - is_scalar = check_ops.assert_rank( - n_draws, 0, - message="Sample only supported for scalar number of draws.") - n_draws = control_flow_ops.with_dependencies([is_scalar], n_draws) k = self.event_shape_tensor()[0] - # Flatten batch dims so logits has shape [B, k], - # where B = reduce_prod(self.batch_shape_tensor()). - x = random_ops.multinomial( - logits=array_ops.reshape(self.logits, [-1, k]), - num_samples=n * n_draws, - seed=seed) - x = array_ops.reshape(x, shape=[-1, n, n_draws]) - x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), - axis=-2) # shape: [B, n, k] + + # boardcast the total_count and logits to same shape + n_draws = array_ops.ones_like( + self.logits[..., 0], dtype=n_draws.dtype) * n_draws + logits = array_ops.ones_like( + n_draws[..., array_ops.newaxis], dtype=self.logits.dtype) * self.logits + + # flatten the total_count and logits + flat_logits = array_ops.reshape(logits, [-1, k]) # [B1B2...Bm, k] + flat_ndraws = n * array_ops.reshape(n_draws, [-1]) # [B1B2...Bm] + + # computes each total_count and logits situation by map_fn + def _sample_single(args): + logits, n_draw = args[0], args[1] # [K], [] + x = random_ops.multinomial(logits[array_ops.newaxis, ...], n_draw, + seed) # [1, n*n_draw] + x = array_ops.reshape(x, shape=[n, -1]) # [n, n_draw] + x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), axis=-2) # [n, k] + return x + + x = functional_ops.map_fn( + _sample_single, [flat_logits, flat_ndraws], + dtype=self.dtype) # [B1B2...Bm, n, k] + + # reshape the results to proper shape x = array_ops.transpose(x, perm=[1, 0, 2]) final_shape = array_ops.concat([[n], self.batch_shape_tensor(), [k]], 0) - x = array_ops.reshape(x, final_shape) - return math_ops.cast(x, self.dtype) + x = array_ops.reshape(x, final_shape) # [n, B1, B2,..., Bm, k] + return x @distribution_util.AppendDocstring(_multinomial_sample_note) def _log_prob(self, counts): diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 2946dbe81e..b9c89d62d5 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1119,9 +1119,8 @@ def rgb_to_grayscale(images, name=None): # https://en.wikipedia.org/wiki/Luma_%28video%29 rgb_weights = [0.2989, 0.5870, 0.1140] rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0) - gray_float = math_ops.reduce_sum(flt_image * rgb_weights, - rank_1, - keep_dims=True) + gray_float = math_ops.reduce_sum( + flt_image * rgb_weights, rank_1, keepdims=True) gray_float.set_shape(images.get_shape()[:-1].concatenate([1])) return convert_image_dtype(gray_float, orig_dtype, name=name) @@ -1212,26 +1211,7 @@ def adjust_hue(image, delta, name=None): orig_dtype = image.dtype flt_image = convert_image_dtype(image, dtypes.float32) - # TODO(zhengxq): we will switch to the fused version after we add a GPU - # kernel for that. - fused = os.environ.get('TF_ADJUST_HUE_FUSED', '') - fused = fused.lower() in ('true', 't', '1') - - if not fused: - hsv = gen_image_ops.rgb_to_hsv(flt_image) - - hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1]) - saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1]) - value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1]) - - # Note that we add 2*pi to guarantee that the resulting hue is a positive - # floating point number since delta is [-0.5, 0.5]. - hue = math_ops.mod(hue + (delta + 1.), 1.) - - hsv_altered = array_ops.concat([hue, saturation, value], 2) - rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered) - else: - rgb_altered = gen_image_ops.adjust_hue(flt_image, delta) + rgb_altered = gen_image_ops.adjust_hue(flt_image, delta) return convert_image_dtype(rgb_altered, orig_dtype) diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index 2cb467c891..be9beee633 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -30,6 +30,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops.gen_linalg_ops import * # pylint: enable=wildcard-import from tensorflow.python.util import compat +from tensorflow.python.util import deprecation # Names below are lower_case. # pylint: disable=invalid-name @@ -438,7 +439,14 @@ def svd(tensor, full_matrices=False, compute_uv=True, name=None): # pylint: disable=redefined-builtin -def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None): +@deprecation.deprecated_args( + None, 'keep_dims is deprecated, use keepdims instead', 'keep_dims') +def norm(tensor, + ord='euclidean', + axis=None, + keepdims=None, + name=None, + keep_dims=None): r"""Computes the norm of vectors, matrices, and tensors. This function can compute several different vector norms (the 1-norm, the @@ -471,13 +479,14 @@ def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None): can be either a matrix or a batch of matrices at runtime, pass `axis=[-2,-1]` instead of `axis=None` to make sure that matrix norms are computed. - keep_dims: If True, the axis indicated in `axis` are kept with size 1. + keepdims: If True, the axis indicated in `axis` are kept with size 1. Otherwise, the dimensions in `axis` are removed from the output shape. name: The name of the op. + keep_dims: Deprecated alias for `keepdims`. Returns: output: A `Tensor` of the same type as tensor, containing the vector or - matrix norms. If `keep_dims` is True then the rank of output is equal to + matrix norms. If `keepdims` is True then the rank of output is equal to the rank of `tensor`. Otherwise, if `axis` is none the output is a scalar, if `axis` is an integer, the rank of `output` is one less than the rank of `tensor`, if `axis` is a 2-tuple the rank of `output` is two less @@ -496,6 +505,10 @@ def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None): higher order tensors. @end_compatibility """ + keepdims = deprecation.deprecated_argument_lookup('keepdims', keepdims, + 'keep_dims', keep_dims) + if keepdims is None: + keepdims = False is_matrix_norm = ((isinstance(axis, tuple) or isinstance(axis, list)) and len(axis) == 2) @@ -528,25 +541,25 @@ def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None): # matrices. result = math_ops.sqrt( math_ops.reduce_sum( - tensor * math_ops.conj(tensor), axis, keep_dims=True)) + tensor * math_ops.conj(tensor), axis, keepdims=True)) else: result = math_ops.abs(tensor) if ord == 1: sum_axis = None if axis is None else axis[0] - result = math_ops.reduce_sum(result, sum_axis, keep_dims=True) + result = math_ops.reduce_sum(result, sum_axis, keepdims=True) if is_matrix_norm: - result = math_ops.reduce_max(result, axis[-1], keep_dims=True) + result = math_ops.reduce_max(result, axis[-1], keepdims=True) elif ord == np.inf: if is_matrix_norm: - result = math_ops.reduce_sum(result, axis[1], keep_dims=True) + result = math_ops.reduce_sum(result, axis[1], keepdims=True) max_axis = None if axis is None else axis[0] - result = math_ops.reduce_max(result, max_axis, keep_dims=True) + result = math_ops.reduce_max(result, max_axis, keepdims=True) else: # General p-norms (positive p only) result = math_ops.pow( - math_ops.reduce_sum( - math_ops.pow(result, ord), axis, keep_dims=True), 1.0 / ord) - if not keep_dims: + math_ops.reduce_sum(math_ops.pow(result, ord), axis, keepdims=True), + 1.0 / ord) + if not keepdims: result = array_ops.squeeze(result, axis) return result diff --git a/tensorflow/python/ops/math_grad_test.py b/tensorflow/python/ops/math_grad_test.py index 5732c756ce..04eeb00518 100644 --- a/tensorflow/python/ops/math_grad_test.py +++ b/tensorflow/python/ops/math_grad_test.py @@ -113,6 +113,23 @@ class MinOrMaxGradientTest(test.TestCase): self.assertLess(error, 1e-4) +class MaximumOrMinimumGradientTest(test.TestCase): + + def testMaximumGradient(self): + inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32) + outputs = math_ops.maximum(inputs, 3.0) + with self.test_session(): + error = gradient_checker.compute_gradient_error(inputs, [4], outputs, [4]) + self.assertLess(error, 1e-4) + + def testMinimumGradient(self): + inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32) + outputs = math_ops.minimum(inputs, 2.0) + with self.test_session(): + error = gradient_checker.compute_gradient_error(inputs, [4], outputs, [4]) + self.assertLess(error, 1e-4) + + class ProdGradientTest(test.TestCase): def testProdGradient(self): diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 4c400423b6..e2e23dccef 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -170,14 +170,13 @@ from tensorflow.python.ops import state_ops from tensorflow.python.ops.gen_math_ops import * # pylint: enable=wildcard-import from tensorflow.python.util import compat -from tensorflow.python.util.deprecation import deprecated -from tensorflow.python.util.deprecation import deprecated_args +from tensorflow.python.util import deprecation # Aliases for some automatically-generated names. linspace = gen_math_ops.lin_space -arg_max = deprecated(None, "Use `argmax` instead")(arg_max) # pylint: disable=used-before-assignment -arg_min = deprecated(None, "Use `argmin` instead")(arg_min) # pylint: disable=used-before-assignment +arg_max = deprecation.deprecated(None, "Use `argmax` instead")(arg_max) # pylint: disable=used-before-assignment +arg_min = deprecation.deprecated(None, "Use `argmin` instead")(arg_min) # pylint: disable=used-before-assignment def _set_doc(doc): @@ -190,7 +189,8 @@ def _set_doc(doc): # pylint: disable=redefined-builtin -@deprecated_args(None, "Use the `axis` argument instead", "dimension") +@deprecation.deprecated_args(None, "Use the `axis` argument instead", + "dimension") @_set_doc( gen_math_ops.arg_max.__doc__.replace("dimensions", "axes").replace( "dimension", "axis")) @@ -208,7 +208,8 @@ def argmax(input, return gen_math_ops.arg_max(input, axis, name=name, output_type=output_type) -@deprecated_args(None, "Use the `axis` argument instead", "dimension") +@deprecation.deprecated_args(None, "Use the `axis` argument instead", + "dimension") @_set_doc( gen_math_ops.arg_min.__doc__.replace("dimensions", "axes").replace( "dimension", "axis")) @@ -324,7 +325,7 @@ multiply.__doc__ = gen_math_ops._mul.__doc__.replace("Mul", "`tf.multiply`") # TODO(aselle): put deprecation in after another round of global code changes -@deprecated( +@deprecation.deprecated( "2016-12-30", "`tf.mul(x, y)` is deprecated, please use `tf.multiply(x, y)` or `x * y`") def _mul(x, y, name=None): @@ -343,7 +344,7 @@ subtract.__doc__ = gen_math_ops._sub.__doc__.replace("`Sub`", "`tf.subtract`") # TODO(aselle): put deprecation in after another round of global code changes -@deprecated( +@deprecation.deprecated( "2016-12-30", "`tf.sub(x, y)` is deprecated, please use `tf.subtract(x, y)` or `x - y`") def _sub(x, y, name=None): @@ -381,8 +382,9 @@ def negative(x, name=None): # pylint: disable=g-docstring-has-escape -@deprecated("2016-12-30", - "`tf.neg(x)` is deprecated, please use `tf.negative(x)` or `-x`") +@deprecation.deprecated( + "2016-12-30", + "`tf.neg(x)` is deprecated, please use `tf.negative(x)` or `-x`") def _neg(x, name=None): """Computes numerical negative value element-wise. @@ -1269,24 +1271,27 @@ def _ReductionDims(x, axis, reduction_indices): return range(0, array_ops.rank(x)) -def _may_reduce_to_scalar(keep_dims, axis, reduction_indices, output): +def _may_reduce_to_scalar(keepdims, axis, reduction_indices, output): """Set a reduction's output's shape to be a scalar if we are certain.""" - if (not output.shape.is_fully_defined()) and (not keep_dims) and ( + if (not output.shape.is_fully_defined()) and (not keepdims) and ( axis is None) and (reduction_indices is None): output.set_shape(()) return output +@deprecation.deprecated_args( + None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_sum(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the sum of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1299,7 +1304,7 @@ def reduce_sum(input_tensor, tf.reduce_sum(x) # 6 tf.reduce_sum(x, 0) # [2, 2, 2] tf.reduce_sum(x, 1) # [3, 3] - tf.reduce_sum(x, 1, keep_dims=True) # [[3], [3]] + tf.reduce_sum(x, 1, keepdims=True) # [[3], [3]] tf.reduce_sum(x, [0, 1]) # 6 ``` @@ -1308,9 +1313,10 @@ def reduce_sum(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. + keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @@ -1319,26 +1325,34 @@ def reduce_sum(input_tensor, Equivalent to np.sum @end_compatibility """ - return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, + keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, + "keep_dims", keep_dims) + if keepdims is None: + keepdims = False + + return _may_reduce_to_scalar(keepdims, axis, reduction_indices, gen_math_ops._sum( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name)) +@deprecation.deprecated_args( + None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def count_nonzero(input_tensor, axis=None, - keep_dims=False, + keepdims=None, dtype=dtypes.int64, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes number of nonzero elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1355,7 +1369,7 @@ def count_nonzero(input_tensor, tf.count_nonzero(x) # 3 tf.count_nonzero(x, 0) # [1, 2, 0] tf.count_nonzero(x, 1) # [1, 2] - tf.count_nonzero(x, 1, keep_dims=True) # [[1], [2]] + tf.count_nonzero(x, 1, keepdims=True) # [[1], [2]] tf.count_nonzero(x, [0, 1]) # 3 ``` @@ -1364,14 +1378,20 @@ def count_nonzero(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. dtype: The output dtype; defaults to `tf.int64`. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. + keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor (number of nonzero values). """ + keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, + "keep_dims", keep_dims) + if keepdims is None: + keepdims = False + with ops.name_scope(name, "count_nonzero", [input_tensor]): input_tensor = ops.convert_to_tensor(input_tensor, name="input_tensor") zero = input_tensor.dtype.as_numpy_dtype() @@ -1380,21 +1400,24 @@ def count_nonzero(input_tensor, # int64 reduction happens on GPU to_int64(gen_math_ops.not_equal(input_tensor, zero)), axis=axis, - keep_dims=keep_dims, + keepdims=keepdims, reduction_indices=reduction_indices), dtype=dtype) +@deprecation.deprecated_args( + None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_mean(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the mean of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1414,36 +1437,58 @@ def reduce_mean(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. + keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @compatibility(numpy) Equivalent to np.mean + + Please note that `np.mean` has a `dtype` parameter that could be used to + specify the output type. By default this is `dtype=float64`. On the other + hand, `tf.reduce_mean` has an aggressive type inference from `input_tensor`, + for example: + + ```python + x = tf.constant([1, 0, 1, 0]) + tf.reduce_mean(x) # 0 + y = tf.constant([1., 0., 1., 0.]) + tf.reduce_mean(y) # 0.5 + ``` + @end_compatibility """ - return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, + keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, + "keep_dims", keep_dims) + + if keepdims is None: + keepdims = False + return _may_reduce_to_scalar(keepdims, axis, reduction_indices, gen_math_ops._mean( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name)) +@deprecation.deprecated_args( + None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_prod(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the product of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1454,9 +1499,10 @@ def reduce_prod(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. + keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @@ -1465,25 +1511,33 @@ def reduce_prod(input_tensor, Equivalent to np.prod @end_compatibility """ - return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, + keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, + "keep_dims", keep_dims) + + if keepdims is None: + keepdims = False + return _may_reduce_to_scalar(keepdims, axis, reduction_indices, gen_math_ops._prod( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name)) +@deprecation.deprecated_args( + None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_min(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the minimum of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1494,9 +1548,10 @@ def reduce_min(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. + keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @@ -1505,25 +1560,32 @@ def reduce_min(input_tensor, Equivalent to np.min @end_compatibility """ - return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, + keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, + "keep_dims", keep_dims) + if keepdims is None: + keepdims = False + return _may_reduce_to_scalar(keepdims, axis, reduction_indices, gen_math_ops._min( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name)) +@deprecation.deprecated_args( + None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_max(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the maximum of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1534,9 +1596,10 @@ def reduce_max(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. + keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @@ -1545,25 +1608,32 @@ def reduce_max(input_tensor, Equivalent to np.max @end_compatibility """ - return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, + keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, + "keep_dims", keep_dims) + if keepdims is None: + keepdims = False + return _may_reduce_to_scalar(keepdims, axis, reduction_indices, gen_math_ops._max( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name)) +@deprecation.deprecated_args( + None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_all(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the "logical and" of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1583,9 +1653,10 @@ def reduce_all(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. + keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @@ -1594,25 +1665,32 @@ def reduce_all(input_tensor, Equivalent to np.all @end_compatibility """ - return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, + keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, + "keep_dims", keep_dims) + if keepdims is None: + keepdims = False + return _may_reduce_to_scalar(keepdims, axis, reduction_indices, gen_math_ops._all( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name)) +@deprecation.deprecated_args( + None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_any(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes the "logical or" of elements across dimensions of a tensor. Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1632,9 +1710,10 @@ def reduce_any(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. + keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. @@ -1643,25 +1722,32 @@ def reduce_any(input_tensor, Equivalent to np.any @end_compatibility """ - return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, + keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, + "keep_dims", keep_dims) + if keepdims is None: + keepdims = False + return _may_reduce_to_scalar(keepdims, axis, reduction_indices, gen_math_ops._any( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), - keep_dims, + keepdims, name=name)) +@deprecation.deprecated_args( + None, "keep_dims is deprecated, use keepdims instead", "keep_dims") def reduce_logsumexp(input_tensor, axis=None, - keep_dims=False, + keepdims=None, name=None, - reduction_indices=None): + reduction_indices=None, + keep_dims=None): """Computes log(sum(exp(elements across dimensions of a tensor))). Reduces `input_tensor` along the dimensions given in `axis`. - Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each - entry in `axis`. If `keep_dims` is true, the reduced dimensions + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a @@ -1678,7 +1764,7 @@ def reduce_logsumexp(input_tensor, tf.reduce_logsumexp(x) # log(6) tf.reduce_logsumexp(x, 0) # [log(2), log(2), log(2)] tf.reduce_logsumexp(x, 1) # [log(3), log(3)] - tf.reduce_logsumexp(x, 1, keep_dims=True) # [[log(3)], [log(3)]] + tf.reduce_logsumexp(x, 1, keepdims=True) # [[log(3)], [log(3)]] tf.reduce_logsumexp(x, [0, 1]) # log(6) ``` @@ -1687,19 +1773,24 @@ def reduce_logsumexp(input_tensor, axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. - keep_dims: If true, retains reduced dimensions with length 1. + keepdims: If true, retains reduced dimensions with length 1. name: A name for the operation (optional). reduction_indices: The old (deprecated) name for axis. + keep_dims: Deprecated alias for `keepdims`. Returns: The reduced tensor. """ + keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims, + "keep_dims", keep_dims) + if keepdims is None: + keepdims = False with ops.name_scope(name, "ReduceLogSumExp", [input_tensor]) as name: raw_max = reduce_max( input_tensor, axis=axis, reduction_indices=reduction_indices, - keep_dims=True) + keepdims=True) my_max = array_ops.stop_gradient( array_ops.where( gen_math_ops.is_finite(raw_max), raw_max, @@ -1708,13 +1799,13 @@ def reduce_logsumexp(input_tensor, reduce_sum( gen_math_ops.exp(input_tensor - my_max), axis, - keep_dims=True, + keepdims=True, reduction_indices=reduction_indices)) + my_max - if not keep_dims: + if not keepdims: if isinstance(axis, int): axis = [axis] result = array_ops.squeeze(result, axis) - return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, result) + return _may_reduce_to_scalar(keepdims, axis, reduction_indices, result) def trace(x, name=None): @@ -2216,9 +2307,10 @@ def bincount(arr, maxlength = ops.convert_to_tensor( maxlength, name="maxlength", dtype=dtypes.int32) output_size = gen_math_ops.minimum(maxlength, output_size) - weights = ( - ops.convert_to_tensor(weights, name="weights") - if weights is not None else constant_op.constant([], dtype)) + if weights is not None: + weights = ops.convert_to_tensor(weights, name="weights") + return gen_math_ops.unsorted_segment_sum(weights, arr, output_size) + weights = constant_op.constant([], dtype) return gen_math_ops.bincount(arr, output_size, weights) @@ -2381,7 +2473,7 @@ def reduced_shape(input_shape, axes): input_shape: 1-D Tensor, the shape of the Tensor being reduced. axes: 1-D Tensor, the reduction axes. Returns: - A 1-D Tensor, the output shape as if keep_dims were set to True. + A 1-D Tensor, the output shape as if keepdims were set to True. """ # Example: # cast needed for SparseTensor reductions diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py index 717ee1254f..e04121ee31 100644 --- a/tensorflow/python/ops/metrics_impl.py +++ b/tensorflow/python/ops/metrics_impl.py @@ -792,9 +792,10 @@ def mean_cosine_distance(labels, predictions, dim, weights=None, predictions, labels, weights = _remove_squeezable_dimensions( predictions=predictions, labels=labels, weights=weights) radial_diffs = math_ops.multiply(predictions, labels) - radial_diffs = math_ops.reduce_sum(radial_diffs, - reduction_indices=[dim,], - keep_dims=True) + radial_diffs = math_ops.reduce_sum( + radial_diffs, reduction_indices=[ + dim, + ], keepdims=True) mean_distance, update_op = mean(radial_diffs, weights, None, None, diff --git a/tensorflow/python/ops/nn_fused_batchnorm_test.py b/tensorflow/python/ops/nn_fused_batchnorm_test.py index 1fcd0384da..e72d34d1f7 100644 --- a/tensorflow/python/ops/nn_fused_batchnorm_test.py +++ b/tensorflow/python/ops/nn_fused_batchnorm_test.py @@ -335,22 +335,22 @@ class BatchNormalizationTest(test.TestCase): def testInference(self): x_shape = [1, 1, 6, 1] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_inference( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC') self._test_inference( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW') - self._test_inference( - x_shape, np.float32, [1], np.float32, use_gpu=False, data_format='NHWC') + self._test_inference( + x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC') x_shape = [1, 1, 6, 2] if test.is_gpu_available(cuda_only=True): for dtype in [np.float16, np.float32]: self._test_inference( x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC') - self._test_inference( - x_shape, np.float32, [2], np.float32, use_gpu=False, data_format='NHWC') + self._test_inference( + x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC') x_shape = [1, 2, 1, 6] if test.is_gpu_available(cuda_only=True): @@ -359,33 +359,33 @@ class BatchNormalizationTest(test.TestCase): x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW') x_shape = [27, 131, 127, 6] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_inference( x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW') self._test_inference( x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC') - self._test_inference( - x_shape, np.float32, [6], np.float32, use_gpu=False, data_format='NHWC') + self._test_inference( + x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC') def testTraining(self): x_shape = [1, 1, 6, 1] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_training( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC') self._test_training( x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW') - self._test_training( - x_shape, np.float32, [1], np.float32, use_gpu=False, data_format='NHWC') + self._test_training( + x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC') x_shape = [1, 1, 6, 2] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_training( x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC') - self._test_training( - x_shape, np.float32, [2], np.float32, use_gpu=False, data_format='NHWC') + self._test_training( + x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC') x_shape = [1, 2, 1, 6] if test.is_gpu_available(cuda_only=True): @@ -394,20 +394,20 @@ class BatchNormalizationTest(test.TestCase): x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW') x_shape = [27, 131, 127, 6] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_training( x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW') self._test_training( x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC') - self._test_training( - x_shape, np.float32, [6], np.float32, use_gpu=False, data_format='NHWC') + self._test_training( + x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC') def testBatchNormGrad(self): for is_training in [True, False]: x_shape = [1, 1, 6, 1] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_gradient( x_shape, dtype, [1], @@ -422,17 +422,17 @@ class BatchNormalizationTest(test.TestCase): use_gpu=True, data_format='NCHW', is_training=is_training) - self._test_gradient( - x_shape, - np.float32, [1], - np.float32, - use_gpu=False, - data_format='NHWC', - is_training=is_training) + self._test_gradient( + x_shape, + dtype, [1], + np.float32, + use_gpu=False, + data_format='NHWC', + is_training=is_training) x_shape = [1, 1, 6, 2] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_gradient( x_shape, dtype, [2], @@ -440,13 +440,13 @@ class BatchNormalizationTest(test.TestCase): use_gpu=True, data_format='NHWC', is_training=is_training) - self._test_gradient( - x_shape, - np.float32, [2], - np.float32, - use_gpu=False, - data_format='NHWC', - is_training=is_training) + self._test_gradient( + x_shape, + dtype, [2], + np.float32, + use_gpu=False, + data_format='NHWC', + is_training=is_training) x_shape = [1, 2, 1, 6] if test.is_gpu_available(cuda_only=True): @@ -460,8 +460,8 @@ class BatchNormalizationTest(test.TestCase): is_training=is_training) x_shape = [5, 7, 11, 4] - if test.is_gpu_available(cuda_only=True): - for dtype in [np.float16, np.float32]: + for dtype in [np.float16, np.float32]: + if test.is_gpu_available(cuda_only=True): self._test_gradient( x_shape, dtype, [7], @@ -476,13 +476,13 @@ class BatchNormalizationTest(test.TestCase): use_gpu=True, data_format='NHWC', is_training=is_training) - self._test_gradient( - x_shape, - np.float32, [4], - np.float32, - use_gpu=False, - data_format='NHWC', - is_training=is_training) + self._test_gradient( + x_shape, + dtype, [4], + np.float32, + use_gpu=False, + data_format='NHWC', + is_training=is_training) def _testBatchNormGradGrad(self, config): shape = config['shape'] @@ -506,15 +506,14 @@ class BatchNormalizationTest(test.TestCase): data_format='NCHW', is_training=is_training, err_tolerance=err_tolerance) - if dtype != np.float16: - self._test_grad_grad( - shape, - np.float32, [shape[3]], - np.float32, - use_gpu=False, - data_format='NHWC', - is_training=is_training, - err_tolerance=err_tolerance) + self._test_grad_grad( + shape, + dtype, [shape[3]], + np.float32, + use_gpu=False, + data_format='NHWC', + is_training=is_training, + err_tolerance=err_tolerance) def testBatchNormGradGrad(self): configs = [{ @@ -525,6 +524,10 @@ class BatchNormalizationTest(test.TestCase): 'shape': [2, 3, 2, 2], 'err_tolerance': 1e-3, 'dtype': np.float32, + }, { + 'shape': [2, 3, 4, 5], + 'err_tolerance': 1e-2, + 'dtype': np.float16, }, { 'shape': [2, 3, 2, 2], 'err_tolerance': 2e-3, diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 431ea1186a..654eb1c118 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -32,6 +32,8 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variables +from tensorflow.python.util.deprecation import deprecated_args +from tensorflow.python.util.deprecation import deprecated_argument_lookup def log_poisson_loss(targets, log_input, compute_full_loss=False, name=None): @@ -313,30 +315,33 @@ def swish(features): return features * math_ops.sigmoid(features) -def l2_normalize(x, dim, epsilon=1e-12, name=None): - """Normalizes along dimension `dim` using an L2 norm. +@deprecated_args(None, "dim is deprecated, use axis instead", "dim") +def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None): + """Normalizes along dimension `axis` using an L2 norm. - For a 1-D tensor with `dim = 0`, computes + For a 1-D tensor with `axis = 0`, computes output = x / sqrt(max(sum(x**2), epsilon)) For `x` with more dimensions, independently normalizes each 1-D slice along - dimension `dim`. + dimension `axis`. Args: x: A `Tensor`. - dim: Dimension along which to normalize. A scalar or a vector of + axis: Dimension along which to normalize. A scalar or a vector of integers. epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the divisor if `norm < sqrt(epsilon)`. name: A name for this operation (optional). + dim: Deprecated alias for axis. Returns: A `Tensor` with the same shape as `x`. """ with ops.name_scope(name, "l2_normalize", [x]) as name: + axis = deprecated_argument_lookup("axis", axis, "dim", dim) x = ops.convert_to_tensor(x, name="x") - square_sum = math_ops.reduce_sum(math_ops.square(x), dim, keep_dims=True) + square_sum = math_ops.reduce_sum(math_ops.square(x), axis, keep_dims=True) x_inv_norm = math_ops.rsqrt(math_ops.maximum(square_sum, epsilon)) return math_ops.multiply(x, x_inv_norm, name=name) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index bdaac65904..ec7b9372ca 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -40,6 +40,7 @@ from tensorflow.python.ops.gen_nn_ops import * from tensorflow.python.util import deprecation + # Aliases for some automatically-generated names. local_response_normalization = gen_nn_ops.lrn @@ -1645,52 +1646,62 @@ def _softmax(logits, compute_op, dim=-1, name=None): return output -def softmax(logits, dim=-1, name=None): +@deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim") +def softmax(logits, axis=None, name=None, dim=None): """Computes softmax activations. This function performs the equivalent of - softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), dim) + softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis) Args: logits: A non-empty `Tensor`. Must be one of the following types: `half`, `float32`, `float64`. - dim: The dimension softmax would be performed on. The default is -1 which + axis: The dimension softmax would be performed on. The default is -1 which indicates the last dimension. name: A name for the operation (optional). + dim: Deprecated alias for `axis`. Returns: A `Tensor`. Has the same type and shape as `logits`. Raises: - InvalidArgumentError: if `logits` is empty or `dim` is beyond the last + InvalidArgumentError: if `logits` is empty or `axis` is beyond the last dimension of `logits`. """ - return _softmax(logits, gen_nn_ops._softmax, dim, name) + axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim) + if axis is None: + axis = -1 + return _softmax(logits, gen_nn_ops._softmax, axis, name) -def log_softmax(logits, dim=-1, name=None): +@deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim") +def log_softmax(logits, axis=None, name=None, dim=None): """Computes log softmax activations. For each batch `i` and class `j` we have - logsoftmax = logits - log(reduce_sum(exp(logits), dim)) + logsoftmax = logits - log(reduce_sum(exp(logits), axis)) Args: logits: A non-empty `Tensor`. Must be one of the following types: `half`, `float32`, `float64`. - dim: The dimension softmax would be performed on. The default is -1 which + axis: The dimension softmax would be performed on. The default is -1 which indicates the last dimension. name: A name for the operation (optional). + dim: Deprecated alias for `axis`. Returns: A `Tensor`. Has the same type as `logits`. Same shape as `logits`. Raises: - InvalidArgumentError: if `logits` is empty or `dim` is beyond the last + InvalidArgumentError: if `logits` is empty or `axis` is beyond the last dimension of `logits`. """ - return _softmax(logits, gen_nn_ops._log_softmax, dim, name) + axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim) + if axis is None: + axis = -1 + return _softmax(logits, gen_nn_ops._log_softmax, axis, name) def _ensure_xent_args(name, sentinel, labels, logits): @@ -2305,6 +2316,103 @@ def conv1d(value, filters, stride, padding, return array_ops.squeeze(result, [spatial_start_dim]) +def conv1d_transpose( + value, + filter, # pylint: disable=redefined-builtin + output_shape, + stride, + padding="SAME", + data_format="NWC", + name=None): + """The transpose of `conv1d`. + + This operation is sometimes called "deconvolution" after [Deconvolutional + Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is + actually the transpose (gradient) of `conv1d` rather than an actual + deconvolution. + + Args: + value: A 3-D `Tensor` of type `float` and shape + `[batch, in_width, in_channels]` for `NWC` data format or + `[batch, in_channels, in_width]` for `NCW` data format. + filter: A 3-D `Tensor` with the same type as `value` and shape + `[filter_width, output_channels, in_channels]`. `filter`'s + `in_channels` dimension must match that of `value`. + output_shape: A 1-D `Tensor` representing the output shape of the + deconvolution op. + stride: An `integer`. The number of entries by which + the filter is moved right at each step. + padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. + See the @{tf.nn.convolution$comment here} + data_format: A string. 'NHWC' and 'NCHW' are supported. + name: Optional name for the returned tensor. + + Returns: + A `Tensor` with the same type as `value`. + + Raises: + ValueError: If input/output depth does not match `filter`'s shape, or if + padding is other than `'VALID'` or `'SAME'`. + """ + with ops.name_scope(name, "conv1d_transpose", + [value, filter, output_shape]) as name: + output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape") + if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(3)): + raise ValueError("output_shape must have shape (3,), got {}".format( + output_shape_.get_shape())) + + # The format could be either NWC or NCW, map to NHWC or NCHW + if data_format is None or data_format == "NWC": + data_format_2d = "NHWC" + axis = 2 + elif data_format == "NCW": + data_format_2d = "NCHW" + axis = 1 + else: + raise ValueError("data_format must be \"NWC\" or \"NCW\".") + + if not value.get_shape()[axis].is_compatible_with(filter.get_shape()[2]): + raise ValueError("input channels does not match filter's input channels, " + "{} != {}".format(value.get_shape()[axis], + filter.get_shape()[2])) + + if isinstance(output_shape, (list, np.ndarray)): + # output_shape's shape should be == [3] if reached this point. + if not filter.get_shape()[1].is_compatible_with(output_shape[axis]): + raise ValueError( + "output_shape does not match filter's output channels, " + "{} != {}".format(output_shape[axis], + filter.get_shape()[1])) + + if padding != "VALID" and padding != "SAME": + raise ValueError("padding must be either VALID or SAME:" + " {}".format(padding)) + + # Reshape the input tensor to [batch, 1, in_width, in_channels] + if data_format_2d == "NHWC": + output_shape_ = array_ops.concat( + [output_shape_[:1], [1], output_shape_[1:]], axis=0) + spatial_start_dim = 1 + strides = [1, 1, stride, 1] + else: + output_shape_ = array_ops.concat( + [output_shape_[:2], [1], output_shape_[2:]], axis=0) + spatial_start_dim = 2 + strides = [1, 1, 1, stride] + value = array_ops.expand_dims(value, spatial_start_dim) + filter = array_ops.expand_dims(filter, 0) + + result = gen_nn_ops.conv2d_backprop_input( + input_sizes=output_shape_, + filter=filter, + out_backprop=value, + strides=strides, + padding=padding, + data_format=data_format_2d, + name=name) + return array_ops.squeeze(result, [spatial_start_dim]) + + @ops.RegisterStatistics("Dilation2D", "flops") def _calc_dilation2d_flops(graph, node): """Calculates the compute resources needed for Dilation2D.""" diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index e9b1c67d16..a1e4305de1 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -1063,13 +1063,13 @@ class Variable(object): class PartitionedVariable(object): """A container for partitioned `Variable` objects. - @compatiblity(eager) `tf.PartitionedVariable` is not compatible with + @compatibility(eager) `tf.PartitionedVariable` is not compatible with eager execution. Use `tfe.Variable` instead which is compatable with both eager execution and graph construction. See [the TensorFlow Eager Execution guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/g3doc/guide.md#variables-and-optimizers) for details on how variables work in eager execution. - @end_compatiblity + @end_compatibility """ class PartitionedVariableIterator(object): diff --git a/tensorflow/python/tools/import_pb_to_tensorboard.py b/tensorflow/python/tools/import_pb_to_tensorboard.py old mode 100644 new mode 100755 diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 99bed86a17..d78362d4fb 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -232,7 +232,6 @@ CUDNN_DNN_ROUTINE_EACH_R3(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) __macro(cudnnRNNBackwardData) \ __macro(cudnnRNNBackwardWeights) \ __macro(cudnnSetRNNDescriptor) \ - __macro(cudnnSetRNNDescriptor_v6) \ __macro(cudnnGetFilterNdDescriptor) // clang-format on @@ -245,7 +244,8 @@ CUDNN_DNN_ROUTINE_EACH_R5(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) // clang-format off #if CUDNN_VERSION >= 6000 #define CUDNN_DNN_ROUTINE_EACH_R6(__macro) \ - __macro(cudnnConvolutionBiasActivationForward) + __macro(cudnnConvolutionBiasActivationForward) \ + __macro(cudnnSetRNNDescriptor_v6) // clang-format on CUDNN_DNN_ROUTINE_EACH_R6(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) @@ -665,7 +665,6 @@ class ScopedPoolingDescriptor { LOG(FATAL) << "could not create cudnn pooling descriptor: " << ToString(status); } - const std::vector strides64 = pooling_descriptor.strides(); const std::vector padding64 = pooling_descriptor.padding(); const std::vector shape64 = pooling_descriptor.window(); @@ -680,14 +679,14 @@ class ScopedPoolingDescriptor { &CheckedNarrowing); std::transform(shape64.cbegin(), shape64.cend(), shape.begin(), &CheckedNarrowing); + bool propagate_nans = pooling_descriptor.propagate_nans(); status = wrap::cudnnSetPoolingNdDescriptor( parent_, handle_, (pooling_descriptor.mode() == dnn::PoolingMode::kMaximum ? CUDNN_POOLING_MAX : CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING), #if CUDNN_VERSION >= 5000 - // Always propagate nans. - CUDNN_PROPAGATE_NAN, + propagate_nans ? CUDNN_PROPAGATE_NAN : CUDNN_NOT_PROPAGATE_NAN, #endif nd, shape.data(), padding.data(), strides.data()); if (status != CUDNN_STATUS_SUCCESS) { diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc index 07fe8a85f4..44144a0613 100644 --- a/tensorflow/stream_executor/dnn.cc +++ b/tensorflow/stream_executor/dnn.cc @@ -470,6 +470,7 @@ string ConvolutionDescriptor::ToShortString() const { PoolingDescriptor::PoolingDescriptor(int ndims) : mode_(dnn::PoolingMode::kMaximum), ndims_(ndims), + propagate_nans_(false), window_(ndims, 0), padding_(ndims, 0), strides_(ndims, 1) {} @@ -482,6 +483,7 @@ void PoolingDescriptor::CloneFrom(const PoolingDescriptor& other) { window_ = other.window_; padding_ = other.padding_; strides_ = other.strides_; + propagate_nans_ = other.propagate_nans_; } string PoolingDescriptor::ToString() const { @@ -495,9 +497,12 @@ string PoolingDescriptor::ToString() const { port::Appendf(&padding, "%lld", padding_[i]); } - return port::Printf("{mode: %s window: %s strides: %s padding: %s}", - mode_string, window.c_str(), strides.c_str(), - padding.c_str()); + const char* propagate_string = propagate_nans_ ? "Yes" : "No"; + + return port::Printf( + "{mode: %s window: %s strides: %s padding: %s propagate NaNs: %s}", + mode_string, window.c_str(), strides.c_str(), padding.c_str(), + propagate_string); } string PoolingDescriptor::ToShortString() const { @@ -508,7 +513,8 @@ string PoolingDescriptor::ToShortString() const { port::Appendf(&padding, "_p%d:%lld", i, padding_[i]); } return port::StrCat(mode_ == dnn::PoolingMode::kMaximum ? "max" : "avg", - window, strides, padding); + window, strides, padding, + propagate_nans_ ? "propagate_nans" : "ignore_nans"); } // -- NormalizeDescriptor diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index 49235167ab..0d2cd4a9f2 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -661,6 +661,10 @@ class PoolingDescriptor { SetDim(&strides_, dim, value); return *this; } + PoolingDescriptor& set_propagate_nans(bool value) { + propagate_nans_ = value; + return *this; + } int ndims() const { return ndims_; } void CloneFrom(const PoolingDescriptor& other); @@ -681,10 +685,12 @@ class PoolingDescriptor { std::vector window() const { return window_; } std::vector padding() const { return padding_; } std::vector strides() const { return strides_; } + bool propagate_nans() const { return propagate_nans_; } private: PoolingMode mode_; int ndims_; + bool propagate_nans_; // Stored as: ..., y, x. std::vector window_; diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt index 9fd38a29b7..62e634afb8 100644 --- a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt @@ -94,7 +94,7 @@ tf_module { } member_method { name: "norm" - argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keep_dims\', \'name\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'False\', \'None\'], " + argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keepdims\', \'name\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "qr" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt index 24c0448dea..ebd9c079b5 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt @@ -170,7 +170,7 @@ tf_module { } member_method { name: "l2_normalize" - argspec: "args=[\'x\', \'dim\', \'epsilon\', \'name\'], varargs=None, keywords=None, defaults=[\'1e-12\', \'None\'], " + argspec: "args=[\'x\', \'axis\', \'epsilon\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'1e-12\', \'None\', \'None\'], " } member_method { name: "leaky_relu" @@ -190,7 +190,7 @@ tf_module { } member_method { name: "log_softmax" - argspec: "args=[\'logits\', \'dim\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], " + argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "log_uniform_candidate_sampler" @@ -282,7 +282,7 @@ tf_module { } member_method { name: "softmax" - argspec: "args=[\'logits\', \'dim\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], " + argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "softmax_cross_entropy_with_logits" diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index bf7bc6a7c1..0edd4153d7 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -750,7 +750,7 @@ tf_module { } member_method { name: "boolean_mask" - argspec: "args=[\'tensor\', \'mask\', \'name\'], varargs=None, keywords=None, defaults=[\'boolean_mask\'], " + argspec: "args=[\'tensor\', \'mask\', \'name\', \'axis\'], varargs=None, keywords=None, defaults=[\'boolean_mask\', \'None\'], " } member_method { name: "broadcast_dynamic_shape" @@ -858,7 +858,7 @@ tf_module { } member_method { name: "count_nonzero" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'dtype\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \"\", \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'dtype\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"\", \'None\', \'None\', \'None\'], " } member_method { name: "count_up_to" @@ -1414,7 +1414,7 @@ tf_module { } member_method { name: "norm" - argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keep_dims\', \'name\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'False\', \'None\'], " + argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keepdims\', \'name\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "not_equal" @@ -1546,11 +1546,11 @@ tf_module { } member_method { name: "reduce_all" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_any" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_join" @@ -1558,27 +1558,27 @@ tf_module { } member_method { name: "reduce_logsumexp" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_max" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_mean" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_min" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_prod" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "reduce_sum" - argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], " + argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "register_tensor_conversion_function" diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh index 5f791d7bc7..c27f4953e3 100755 --- a/tensorflow/tools/ci_build/ci_parameterized_build.sh +++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh @@ -165,7 +165,7 @@ else BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:embedding_lookup_test" BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:embedding_lookup_sparse_test" BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:fully_connected_test" - BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/testing:generated_examples_zip_test" + # BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/testing:generated_examples_zip_test" BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:hashtable_lookup_test" BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:local_response_norm_test" BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:lsh_projection_test" diff --git a/tensorflow/tools/ci_build/install/install_golang.sh b/tensorflow/tools/ci_build/install/install_golang.sh index 55c1674495..e1edd62cc5 100755 --- a/tensorflow/tools/ci_build/install/install_golang.sh +++ b/tensorflow/tools/ci_build/install/install_golang.sh @@ -16,7 +16,7 @@ set -ex -GOLANG_URL="https://storage.googleapis.com/golang/go1.9.1.linux-amd64.tar.gz" +GOLANG_URL="https://storage.googleapis.com/golang/go1.9.2.linux-amd64.tar.gz" sudo mkdir -p /usr/local wget -q -O - "${GOLANG_URL}" | sudo tar -C /usr/local -xz diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh index dcda8228bc..e5d8303c6e 100755 --- a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh +++ b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh @@ -48,6 +48,6 @@ ${DOCKER_BINARY} run \ -e "TF_NEED_GCP=0" \ -e "TF_NEED_HDFS=0" \ -e "TF_NEED_CUDA=${TF_NEED_CUDA}" \ - -e "TF_NEED_OPENCL=0" \ + -e "TF_NEED_OPENCL_SYCL=0" \ "${DOCKER_IMAGE}" \ "/workspace/tensorflow/tools/ci_build/linux/libtensorflow.sh" diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh index d90a1b905d..e1b56b9a25 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh @@ -27,7 +27,7 @@ export PYTHON_BIN_PATH="/usr/bin/python" export TF_NEED_GCP=0 export TF_NEED_HDFS=0 export TF_NEED_CUDA=0 -export TF_NEED_OPENCL=0 +export TF_NEED_OPENCL_SYCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh index 79973647c1..5a901af3e5 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh @@ -28,7 +28,7 @@ export LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib:${L export PYTHON_BIN_PATH="/usr/bin/python" export TF_NEED_GCP=0 export TF_NEED_HDFS=0 -export TF_NEED_OPENCL=0 +export TF_NEED_OPENCL_SYCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh index 5244898c40..88116d9f24 100755 --- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh +++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh @@ -75,17 +75,23 @@ if [[ $1 == "PI_ONE" ]]; then PI_COPTS="--copt=-march=armv6 --copt=-mfpu=vfp --copt=-DUSE_GEMM_FOR_CONV --copt=-DUSE_OPENBLAS --copt=-isystem --copt=${OPENBLAS_INSTALL_PATH}/include/ + --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR --linkopt=-L${OPENBLAS_INSTALL_PATH}/lib/ --linkopt=-l:libopenblas.a" echo "Building for the Pi One/Zero, with no NEON support" else PI_COPTS='--copt=-march=armv7-a --copt=-mfpu=neon-vfpv4 + --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8' echo "Building for the Pi Two/Three, with NEON acceleration" fi +# We need to pass down the environment variable with a possible alternate Python +# include path for Python 3.x builds to work. +export CROSSTOOL_PYTHON_INCLUDE_PATH + cd ${WORKSPACE_PATH} bazel build -c opt ${PI_COPTS} \ --config=monolithic \ diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh index 924ab1a4ae..44b6d52952 100644 --- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh +++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh @@ -117,7 +117,7 @@ function run_configure_for_cpu_build { export TF_NEED_VERBS=0 export TF_NEED_GCP=0 export TF_NEED_HDFS=0 - export TF_NEED_OPENCL=0 + export TF_NEED_OPENCL_SYCL=0 echo "" | ./configure } @@ -141,7 +141,7 @@ function run_configure_for_gpu_build { export TF_NEED_MKL=0 export TF_NEED_GCP=0 export TF_NEED_HDFS=0 - export TF_NEED_OPENCL=0 + export TF_NEED_OPENCL_SYCL=0 # TODO(pcloudy): Remove this after TensorFlow uses its own CRSOOTOOL # for GPU build on Windows diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 index 64ebc4607a..9bcc3925a8 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 @@ -101,12 +101,11 @@ RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/lib --jobs=${TF_AVAILABLE_CPUS} \ tensorflow/tools/pip_package:build_pip_package && \ mkdir /pip_pkg && \ - bazel-bin/tensorflow/tools/pip_package/build_pip_package /pip_pkg - -# Clean up pip wheel and Bazel cache when done. -RUN pip --no-cache-dir install --upgrade /pip_pkg/tensorflow-*.whl && \ + bazel-bin/tensorflow/tools/pip_package/build_pip_package /pip_pkg && \ + pip --no-cache-dir install --upgrade /pip_pkg/tensorflow-*.whl && \ rm -rf /pip_pkg && \ rm -rf /root/.cache +# Clean up pip wheel and Bazel cache when done. WORKDIR /root diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu index 0571dd7391..e212d10290 100644 --- a/tensorflow/tools/docker/Dockerfile.gpu +++ b/tensorflow/tools/docker/Dockerfile.gpu @@ -1,4 +1,4 @@ -FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04 +FROM nvidia/cuda:8.0-cudnn6-runtime-ubuntu16.04 LABEL maintainer="Craig Citro " diff --git a/tensorflow/tools/docker/README.md b/tensorflow/tools/docker/README.md index 2e5a0038ed..e35c58ff80 100644 --- a/tensorflow/tools/docker/README.md +++ b/tensorflow/tools/docker/README.md @@ -60,6 +60,20 @@ Building TensorFlow Docker containers should be done through the script. The raw Dockerfiles should not be used directly as they contain strings to be replaced by the script during the build. +Attempting to run [parameterized_docker_build.sh](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/docker/parameterized_docker_build.sh) +from a binary docker image such as for example `tensorflow/tensorflow:latest` will +not work. One needs to execute the script from a developer docker image since by +contrast with a binary docker image it contains not only the compiled solution but +also the tensorflow source code. Please select the appropriate developer docker +image of tensorflow at `tensorflow/tensorflow:[.](https://hub.docker.com/r/tensorflow/tensorflow/tags/)`. + +The smallest command line to generate a docker image will then be: +```docker run -it tensorflow/tensorflow:"right_tag"``` + +If you would like to start a jupyter notebook on your docker container, make sure +to map the port 8888 of your docker container by adding -p 8888:8888 to the above +command. + To use the script, specify the container type (`CPU` vs. `GPU`), the desired Python version (`PYTHON2` vs. `PYTHON3`) and whether the developer Docker image is to be built (`NO` vs. `YES`). In addition, you need to specify the central diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD index 1bf7113c9e..9216008600 100644 --- a/tensorflow/tools/graph_transforms/BUILD +++ b/tensorflow/tools/graph_transforms/BUILD @@ -131,6 +131,8 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core:tensorflow", + "//tensorflow/contrib/rnn:gru_ops_op_lib", + "//tensorflow/contrib/rnn:lstm_ops_op_lib", ] + if_not_windows([ "//tensorflow/core/kernels:quantized_ops", "//tensorflow/core/kernels:remote_fused_graph_rewriter_transform", diff --git a/tensorflow/tools/graph_transforms/quantize_nodes.cc b/tensorflow/tools/graph_transforms/quantize_nodes.cc index 2b85e7e83c..97e8f77616 100644 --- a/tensorflow/tools/graph_transforms/quantize_nodes.cc +++ b/tensorflow/tools/graph_transforms/quantize_nodes.cc @@ -759,6 +759,7 @@ Status QuantizeNodes(const GraphDef& input_graph_def, NodeDef reshape_dims; reshape_dims.set_op("Const"); reshape_dims.set_name(unique_input_name + "/reshape_dims"); + AddNodeInput("^" + input_name, &reshape_dims); SetNodeAttr("dtype", DT_INT32, &reshape_dims); Tensor reshape_dims_tensor(DT_INT32, {1}); reshape_dims_tensor.flat()(0) = -1; @@ -768,6 +769,7 @@ Status QuantizeNodes(const GraphDef& input_graph_def, NodeDef reduction_dims; reduction_dims.set_op("Const"); reduction_dims.set_name(unique_input_name + "/reduction_dims"); + AddNodeInput("^" + input_name, &reduction_dims); SetNodeAttr("dtype", DT_INT32, &reduction_dims); Tensor reduction_dims_tensor(DT_INT32, {1}); reduction_dims_tensor.flat()(0) = 0; diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 60282f6aa3..a493c6f2aa 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.4.0-rc1' +_VERSION = '1.4.0' REQUIRED_PACKAGES = [ 'absl-py', diff --git a/third_party/aws.BUILD b/third_party/aws.BUILD index bc6a2fd8cc..bc9e37ffb3 100644 --- a/third_party/aws.BUILD +++ b/third_party/aws.BUILD @@ -21,6 +21,9 @@ cc_library( "@%ws%//tensorflow:linux_ppc64le": glob([ "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp", ]), + "@%ws%//tensorflow:raspberry_pi_armeabi": glob([ + "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp", + ]), "//conditions:default": [], }) + glob([ "aws-cpp-sdk-core/include/**/*.h", diff --git a/third_party/curl.BUILD b/third_party/curl.BUILD index 882967df1c..805a30d262 100644 --- a/third_party/curl.BUILD +++ b/third_party/curl.BUILD @@ -477,7 +477,6 @@ genrule( "# define HAVE_RAND_EGD 1", "# define HAVE_RAND_STATUS 1", "# define HAVE_SSL_GET_SHUTDOWN 1", - "# define HAVE_STROPTS_H 1", "# define HAVE_TERMIOS_H 1", "# define OS \"x86_64-pc-linux-gnu\"", "# define RANDOM_FILE \"/dev/urandom\"", diff --git a/third_party/sycl/crosstool/CROSSTOOL.tpl b/third_party/sycl/crosstool/CROSSTOOL.tpl index 32884d71e7..f8e50efcc6 100755 --- a/third_party/sycl/crosstool/CROSSTOOL.tpl +++ b/third_party/sycl/crosstool/CROSSTOOL.tpl @@ -35,10 +35,10 @@ toolchain { tool_path { name: "compat-ld" path: "/usr/bin/ld" } tool_path { name: "cpp" path: "/usr/bin/cpp" } tool_path { name: "dwp" path: "/usr/bin/dwp" } - tool_path { name: "gcc" path: "computecpp" } + tool_path { name: "gcc" path: "%{sycl_impl}" } # Use "-std=c++11" for nvcc. For consistency, force both the host compiler # and the device compiler to use "-std=c++11". - cxx_flag: "-std=c++11" + cxx_flag: "%{c++_std}" linker_flag: "-Wl,-no-as-needed" linker_flag: "-lstdc++" linker_flag: "-B/usr/bin/" @@ -53,7 +53,7 @@ toolchain { cxx_builtin_include_directory: "/usr/local/include" cxx_builtin_include_directory: "/usr/include" - cxx_builtin_include_directory: "%{computecpp_toolkit_path}" + cxx_builtin_include_directory: "%{sycl_include_dir}" cxx_builtin_include_directory: "%{python_lib_path}" tool_path { name: "gcov" path: "/usr/bin/gcov" } @@ -214,4 +214,4 @@ toolchain { compiler_flag: "-O2" compiler_flag: "-DNDEBUG" } -} +} \ No newline at end of file diff --git a/third_party/sycl/crosstool/trisycl.tpl b/third_party/sycl/crosstool/trisycl.tpl new file mode 100644 index 0000000000..87a70d8f95 --- /dev/null +++ b/third_party/sycl/crosstool/trisycl.tpl @@ -0,0 +1,85 @@ +#!/usr/bin/env python + +import os +import sys +import tempfile +from subprocess import call + +CPU_CXX_COMPILER = ('%{host_cxx_compiler}') +CPU_C_COMPILER = ('%{host_c_compiler}') + +CURRENT_DIR = os.path.dirname(sys.argv[0]) +TRISYCL_INCLUDE_DIR = CURRENT_DIR + '/../sycl/include' + + +def main(): + compiler_flags = [] + + remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable', + '-Wignored-attributes', '-fno-exceptions') + # remove -fsamotoze-coverage from string with g++ + if 'g++' in CPU_CXX_COMPILER: + remove_flags += ('-fsanitize-coverage',) + compiler_flags += ['-fopenmp'] + else: + compiler_flags += ['-fopenmp=libomp'] + + compiler_flags += [ + flag for flag in sys.argv[1:] if not flag.startswith(remove_flags) + ] + + output_file_index = compiler_flags.index('-o') + 1 + output_file_name = compiler_flags[output_file_index] + + if (output_file_index == 1): + # we are linking + return call([CPU_CXX_COMPILER] + compiler_flags + ['-Wl,--no-undefined']) + + # find what we compile + compiling_cpp = 0 + if ('-c' in compiler_flags): + compiled_file_index = compiler_flags.index('-c') + 1 + compiled_file_name = compiler_flags[compiled_file_index] + if (compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', '.C', + '.cxx'))): + compiling_cpp = 1 + + debug_flags = [ + '-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL', + '-lpthread', '-lboost_log', '-g', '-rdynamic' + ] + + opt_flags = ['-DNDEBUG', '-DBOOST_DISABLE_ASSERTS', '-O3'] + + compiler_flags = compiler_flags + [ + '-DEIGEN_USE_SYCL=1', '-DEIGEN_HAS_C99_MATH', + '-DEIGEN_MAX_ALIGN_BYTES=16', '-DTENSORFLOW_USE_SYCL' + ] + opt_flags + + if (compiling_cpp == 1): + # create a blacklist of folders that will be skipped when compiling + # with triSYCL + skip_extensions = ['.cu.cc'] + skip_folders = [ + 'tensorflow/compiler', 'tensorflow/docs_src', 'tensorflow/tensorboard', + 'third_party', 'external', 'hexagon' + ] + skip_folders = [(folder + '/') for folder in skip_folders] + # if compiling external project skip triSYCL + if any( + compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any( + _folder in output_file_name for _folder in skip_folders): + return call([CPU_CXX_COMPILER] + compiler_flags) + + host_compiler_flags = [ + '-xc++', '-Wno-unused-variable', '-I', TRISYCL_INCLUDE_DIR + ] + compiler_flags + x = call([CPU_CXX_COMPILER] + host_compiler_flags) + return x + else: + # compile for C + return call([CPU_C_COMPILER] + compiler_flags) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/third_party/sycl/sycl/BUILD.tpl b/third_party/sycl/sycl/BUILD.tpl index 6cad190630..b6ceaadda7 100755 --- a/third_party/sycl/sycl/BUILD.tpl +++ b/third_party/sycl/sycl/BUILD.tpl @@ -10,16 +10,27 @@ package(default_visibility = ["//visibility:public"]) exports_files(["LICENSE.text"]) config_setting( - name = "using_sycl", - values = { - "define": "using_sycl=true", + name = "using_sycl_ccpp", + define_values = { + "using_sycl": "true", + "using_trisycl": "false", }, ) +config_setting( + name = "using_sycl_trisycl", + define_values = { + "using_sycl": "true", + "using_trisycl": "false", + }, +) + + cc_library( name = "sycl_headers", hdrs = glob([ "**/*.h", + "**/*.hpp", ]), includes = [".", "include"], ) diff --git a/third_party/sycl/sycl/build_defs.bzl.tpl b/third_party/sycl/sycl/build_defs.bzl.tpl index 09bef0a661..33386f8957 100755 --- a/third_party/sycl/sycl/build_defs.bzl.tpl +++ b/third_party/sycl/sycl/build_defs.bzl.tpl @@ -5,9 +5,24 @@ def if_sycl(if_true, if_false = []): Returns a select statement which evaluates to if_true if we're building with SYCL enabled. Otherwise, the select statement evaluates to if_false. + If we are building with triSYCL instead of ComputeCPP, a list with + the first element of if_true is returned. + """ + return select({ + "@local_config_sycl//sycl:using_sycl_ccpp": if_true, + "@local_config_sycl//sycl:using_sycl_trisycl": if_true[0:1], + "//conditions:default": if_false + }) + +def if_ccpp(if_true, if_false = []): + """Shorthand for select()'ing if we are building with ComputeCPP. + Returns a select statement which evaluates to if_true if we're building + with ComputeCPP enabled. Otherwise, the select statement evaluates + to if_false. """ return select({ - "@local_config_sycl//sycl:using_sycl": if_true, + "@local_config_sycl//sycl:using_sycl_ccpp": if_true, + "@local_config_sycl//sycl:using_sycl_trisycl": if_false, "//conditions:default": if_false }) diff --git a/third_party/sycl/sycl_configure.bzl b/third_party/sycl/sycl_configure.bzl index 7af063178e..5b9d0eb383 100644 --- a/third_party/sycl/sycl_configure.bzl +++ b/third_party/sycl/sycl_configure.bzl @@ -5,20 +5,26 @@ * HOST_CXX_COMPILER: The host C++ compiler * HOST_C_COMPILER: The host C compiler * COMPUTECPP_TOOLKIT_PATH: The path to the ComputeCpp toolkit. + * TRISYCL_INCLUDE_DIR: The path to the include directory of triSYCL. + (if using triSYCL instead of ComputeCPP) * PYTHON_LIB_PATH: The path to the python lib """ _HOST_CXX_COMPILER = "HOST_CXX_COMPILER" _HOST_C_COMPILER= "HOST_C_COMPILER" _COMPUTECPP_TOOLKIT_PATH = "COMPUTECPP_TOOLKIT_PATH" +_TRISYCL_INCLUDE_DIR = "TRISYCL_INCLUDE_DIR" _PYTHON_LIB_PATH = "PYTHON_LIB_PATH" def _enable_sycl(repository_ctx): - if "TF_NEED_OPENCL" in repository_ctx.os.environ: - enable_sycl = repository_ctx.os.environ["TF_NEED_OPENCL"].strip() + if "TF_NEED_OPENCL_SYCL" in repository_ctx.os.environ: + enable_sycl = repository_ctx.os.environ["TF_NEED_OPENCL_SYCL"].strip() return enable_sycl == "1" return False +def _enable_compute_cpp(repository_ctx): + return _COMPUTECPP_TOOLKIT_PATH in repository_ctx.os.environ + def auto_configure_fail(msg): """Output failure message when auto configuration fails.""" red = "\033[0;31m" @@ -59,6 +65,14 @@ def find_computecpp_root(repository_ctx): return sycl_name fail("Cannot find SYCL compiler, please correct your path") +def find_trisycl_include_dir(repository_ctx): + """Find triSYCL include directory. """ + if _TRISYCL_INCLUDE_DIR in repository_ctx.os.environ: + sycl_name = repository_ctx.os.environ[_TRISYCL_INCLUDE_DIR].strip() + if sycl_name.startswith("/"): + return sycl_name + fail( "Cannot find triSYCL include directory, please correct your path") + def find_python_lib(repository_ctx): """Returns python path.""" if _PYTHON_LIB_PATH in repository_ctx.os.environ: @@ -171,26 +185,53 @@ def _sycl_autoconf_imp(repository_ctx): _tpl(repository_ctx, "sycl:platform.bzl") _tpl(repository_ctx, "crosstool:BUILD") _file(repository_ctx, "sycl:LICENSE.text") - _tpl(repository_ctx, "crosstool:computecpp", - { - "%{host_cxx_compiler}" : find_cc(repository_ctx), - "%{host_c_compiler}" : find_c(repository_ctx), - }) - - computecpp_root = find_computecpp_root(repository_ctx) - _check_dir(repository_ctx, computecpp_root) - - _tpl(repository_ctx, "crosstool:CROSSTOOL", - { - "%{computecpp_toolkit_path}" : computecpp_root, - "%{python_lib_path}" : find_python_lib(repository_ctx), - }) - - # symlink libraries - _check_lib(repository_ctx, computecpp_root+"/lib", "libComputeCpp.so" ) - _symlink_dir(repository_ctx, computecpp_root + "/lib", "sycl/lib") - _symlink_dir(repository_ctx, computecpp_root + "/include", "sycl/include") - _symlink_dir(repository_ctx, computecpp_root + "/bin", "sycl/bin") + + if _enable_compute_cpp(repository_ctx): + _tpl(repository_ctx, "crosstool:computecpp", + { + "%{host_cxx_compiler}" : find_cc(repository_ctx), + "%{host_c_compiler}" : find_c(repository_ctx) + }) + + computecpp_root = find_computecpp_root(repository_ctx); + _check_dir(repository_ctx, computecpp_root) + + _tpl(repository_ctx, "crosstool:CROSSTOOL", + { + "%{sycl_include_dir}" : computecpp_root, + "%{sycl_impl}" : "computecpp", + "%{c++_std}" : "-std=c++11", + "%{python_lib_path}" : find_python_lib(repository_ctx), + }) + + # symlink libraries + _check_lib(repository_ctx, computecpp_root+"/lib", "libComputeCpp.so" ) + _symlink_dir(repository_ctx, computecpp_root + "/lib", "sycl/lib") + _symlink_dir(repository_ctx, computecpp_root + "/include", "sycl/include") + _symlink_dir(repository_ctx, computecpp_root + "/bin", "sycl/bin") + else: + + trisycl_include_dir = find_trisycl_include_dir(repository_ctx); + _check_dir(repository_ctx, trisycl_include_dir) + + _tpl(repository_ctx, "crosstool:trisycl", + { + "%{host_cxx_compiler}" : find_cc(repository_ctx), + "%{host_c_compiler}" : find_c(repository_ctx), + "%{trisycl_include_dir}" : trisycl_include_dir + }) + + + _tpl(repository_ctx, "crosstool:CROSSTOOL", + { + "%{sycl_include_dir}" : trisycl_include_dir, + "%{sycl_impl}" : "trisycl", + "%{c++_std}" : "-std=c++1y", + "%{python_lib_path}" : find_python_lib(repository_ctx), + }) + + _symlink_dir(repository_ctx, trisycl_include_dir, "sycl/include") + sycl_configure = repository_rule( implementation = _sycl_autoconf_imp, diff --git a/third_party/zlib.BUILD b/third_party/zlib.BUILD index 8509668891..d164ee719c 100644 --- a/third_party/zlib.BUILD +++ b/third_party/zlib.BUILD @@ -49,7 +49,7 @@ cc_library( ":windows_msvc": [], "//conditions:default": [ "-Wno-shift-negative-value", - "-Wno-implicit-function-declaration", + "-DZ_HAVE_UNISTD_H", ], }), includes = ["."], diff --git a/tools/bazel.rc b/tools/bazel.rc index 2d7201ae57..04c24d7511 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -9,13 +9,16 @@ build:win-cuda --define=using_cuda=true --define=using_cuda_nvcc=true build:mkl --define=using_mkl=true build:sycl --crosstool_top=@local_config_sycl//crosstool:toolchain -build:sycl --define=using_sycl=true +build:sycl --define=using_sycl=true --define=using_trisycl=false build:sycl_nodouble --crosstool_top=@local_config_sycl//crosstool:toolchain build:sycl_nodouble --define=using_sycl=true --cxxopt -DTENSORFLOW_SYCL_NO_DOUBLE build:sycl_asan --crosstool_top=@local_config_sycl//crosstool:toolchain -build:sycl_asan --define=using_sycl=true --copt -fno-omit-frame-pointer --copt -fsanitize-coverage=3 --copt -DGPR_NO_DIRECT_SYSCALLS --linkopt -fPIC --linkopt -fsanitize=address +build:sycl_asan --define=using_sycl=true --define=using_trisycl=false --copt -fno-omit-frame-pointer --copt -fsanitize-coverage=3 --copt -DGPR_NO_DIRECT_SYSCALLS --linkopt -fPIC --linkopt -fsanitize=address + +build:sycl_trisycl --crosstool_top=@local_config_sycl//crosstool:toolchain +build:sycl_trisycl --define=using_sycl=true --define=using_trisycl=true build --define=use_fast_cpp_protos=true build --define=allow_oversize_protos=true diff --git a/util/python/BUILD b/util/python/BUILD index 96daf9947a..f5fa0c6d29 100644 --- a/util/python/BUILD +++ b/util/python/BUILD @@ -1,4 +1,4 @@ -licenses(["restricted"]) +licenses(["notice"]) # New BSD, Python Software Foundation package(default_visibility = ["//visibility:public"]) -- GitLab From 0927a5da0ab74b7ba23e3d575e5570666c3ef5ef Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 22 Nov 2017 13:53:18 -0800 Subject: [PATCH 0787/1801] Add no_pip tests to new contrib/data/python/kernel_tests targets that uses test only dep. --- tensorflow/contrib/data/python/kernel_tests/BUILD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index c443c7f61a..9d49750c80 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -159,6 +159,7 @@ py_test( size = "small", srcs = ["flat_map_dataset_op_test.py"], srcs_version = "PY2AND3", + tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", @@ -275,6 +276,7 @@ py_test( size = "small", srcs = ["map_dataset_op_test.py"], srcs_version = "PY2AND3", + tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", -- GitLab From 9d56475bac690b7a5746e2443a3c157779342d03 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Nov 2017 13:58:11 -0800 Subject: [PATCH 0788/1801] Add tensor pool feature to tf.contrib.gan PiperOrigin-RevId: 176697680 --- tensorflow/contrib/gan/BUILD | 32 +++++ .../gan/python/features/python/tensor_pool.py | 35 ++++++ .../features/python/tensor_pool_impl.py | 118 ++++++++++++++++++ .../features/python/tensor_pool_test.py | 94 ++++++++++++++ 4 files changed, 279 insertions(+) create mode 100644 tensorflow/contrib/gan/python/features/python/tensor_pool.py create mode 100644 tensorflow/contrib/gan/python/features/python/tensor_pool_impl.py create mode 100644 tensorflow/contrib/gan/python/features/python/tensor_pool_test.py diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD index 1418c87023..abe4665caa 100644 --- a/tensorflow/contrib/gan/BUILD +++ b/tensorflow/contrib/gan/BUILD @@ -116,6 +116,7 @@ py_library( deps = [ ":clip_weights", ":conditioning_utils", + ":tensor_pool", ":virtual_batchnorm", "//tensorflow/python:util", ], @@ -219,6 +220,37 @@ py_test( ], ) +py_library( + name = "tensor_pool", + srcs = [ + "python/features/python/tensor_pool.py", + "python/features/python/tensor_pool_impl.py", + ], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:array_ops", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:data_flow_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:random_ops", + "//tensorflow/python:util", + ], +) + +py_test( + name = "tensor_pool_test", + srcs = ["python/features/python/tensor_pool_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":tensor_pool", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", + "//third_party/py/numpy", + ], +) + py_library( name = "virtual_batchnorm", srcs = [ diff --git a/tensorflow/contrib/gan/python/features/python/tensor_pool.py b/tensorflow/contrib/gan/python/features/python/tensor_pool.py new file mode 100644 index 0000000000..0bd2fa3db9 --- /dev/null +++ b/tensorflow/contrib/gan/python/features/python/tensor_pool.py @@ -0,0 +1,35 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""A tensor pool stores values from an input tensor and returns a stored one. + +See the following papers for more details. +1) `Learning from simulated and unsupervised images through adversarial + training` (https://arxiv.org/abs/1612.07828). +2) `Unpaired Image-to-Image Translation using Cycle-Consistent Adversarial + Networks` (https://arxiv.org/abs/1703.10593). +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.gan.python.features.python import tensor_pool_impl +# pylint: disable=wildcard-import +from tensorflow.contrib.gan.python.features.python.tensor_pool_impl import * +# pylint: enable=wildcard-import +from tensorflow.python.util.all_util import remove_undocumented + +__all__ = tensor_pool_impl.__all__ +remove_undocumented(__name__, __all__) diff --git a/tensorflow/contrib/gan/python/features/python/tensor_pool_impl.py b/tensorflow/contrib/gan/python/features/python/tensor_pool_impl.py new file mode 100644 index 0000000000..79318a69d2 --- /dev/null +++ b/tensorflow/contrib/gan/python/features/python/tensor_pool_impl.py @@ -0,0 +1,118 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""A tensor pool stores values from an input tensor and returns a stored one. + +We use this to keep a history of values created by a generator, such that +a discriminator can randomly be trained on some older samples, not just the +current one. This can help to not let the discriminator get too far ahead of the +generator and also to keep the system from oscilating, if the discriminator +forgets too fast what past samples from the generator looked like. + +See the following papers for more details. +1) `Learning from simulated and unsupervised images through adversarial + training` (https://arxiv.org/abs/1612.07828). +2) `Unpaired Image-to-Image Translation using Cycle-Consistent Adversarial + Networks` (https://arxiv.org/abs/1703.10593). +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import data_flow_ops +from tensorflow.python.ops import random_ops + +__all__ = [ + 'tensor_pool', +] + + +def tensor_pool(input_value, + pool_size, + pooling_probability=0.5, + name='tensor_pool'): + """Queue storing input values and returning random previously stored ones. + + Every time the returned `output_value` is evaluated, `input_value` is + evaluated and its value either directly returned (with + `1-pooling_probability`) or stored in the pool and a random one of the samples + currently in the pool is popped and returned. As long as the pool in not fully + filled, the input_value is always directly returned, as well as stored in the + pool. Note during inference / testing, it may be appropriate to set + `pool_size` = 0 or `pooling_probability` = 0. + + Args: + input_value: A `Tensor` from which to read values to be pooled. + pool_size: An integer specifying the maximum size of the pool. + pooling_probability: A float `Tensor` specifying the probability of getting + a value from the pool, as opposed to just the current input. + name: A string prefix for the name scope for all tensorflow ops. + + Returns: + A `Tensor` which is with given probability either the `input_value` or a + randomly chosen sample that was previously inserted in the pool. + + Raises: + ValueError: If `pool_size` is negative. + """ + pool_size = int(pool_size) + if pool_size < 0: + raise ValueError('`pool_size` is negative.') + elif pool_size == 0: + return input_value + + with ops.name_scope('{}_pool_queue'.format(name), + values=[input_value, pooling_probability]): + pool_queue = data_flow_ops.RandomShuffleQueue( + capacity=pool_size, + min_after_dequeue=0, + dtypes=[input_value.dtype], + shapes=None) + + # In pseudeo code this code does the following: + # if not pool_full: + # enqueue(input_value) + # return input_value + # else + # dequeue_value = dequeue_random_sample() + # enqueue(input_value) + # if rand() < pooling_probability: + # return dequeue_value + # else + # return input_value + + def _get_input_value_pooled(): + enqueue_op = pool_queue.enqueue(input_value) + with ops.control_dependencies([enqueue_op]): + return array_ops.identity(input_value) + + def _get_random_pool_value_and_enqueue_input(): + dequeue_value = pool_queue.dequeue() + with ops.control_dependencies([dequeue_value]): + enqueue_op = pool_queue.enqueue(input_value) + with ops.control_dependencies([enqueue_op]): + prob = random_ops.random_uniform( + (), dtype=dtypes.float32) < pooling_probability + return control_flow_ops.cond(prob, lambda: dequeue_value, + lambda: input_value) + + output_value = control_flow_ops.cond( + pool_queue.size() < pool_size, _get_input_value_pooled, + _get_random_pool_value_and_enqueue_input) + + return output_value diff --git a/tensorflow/contrib/gan/python/features/python/tensor_pool_test.py b/tensorflow/contrib/gan/python/features/python/tensor_pool_test.py new file mode 100644 index 0000000000..49b77bb3fc --- /dev/null +++ b/tensorflow/contrib/gan/python/features/python/tensor_pool_test.py @@ -0,0 +1,94 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for tf.contrib.gan.python.features.tensor_pool.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.gan.python.features.python import tensor_pool_impl as tensor_pool +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test + + +class TensorPoolTest(test.TestCase): + + def test_pool_unknown_input_shape(self): + """Checks that `input_value` can have unknown shape.""" + input_value = array_ops.placeholder( + dtype=dtypes.int32, shape=[None, None, 3]) + output_value = tensor_pool.tensor_pool(input_value, pool_size=10) + + with self.test_session(use_gpu=True) as session: + for i in range(10): + session.run(output_value, {input_value: [[[i] * 3]]}) + session.run(output_value, {input_value: [[[i] * 3] * 2]}) + session.run(output_value, {input_value: [[[i] * 3] * 5] * 2}) + + def test_pool_sequence(self): + """Checks that values are pooled and returned maximally twice.""" + input_value = array_ops.placeholder(dtype=dtypes.int32, shape=[]) + output_value = tensor_pool.tensor_pool(input_value, pool_size=10) + + with self.test_session(use_gpu=True) as session: + outs = [] + for i in range(50): + out = session.run(output_value, {input_value: i}) + outs.append(out) + self.assertLessEqual(out, i) + + _, counts = np.unique(outs, return_counts=True) + # Check that each value is returned maximally twice. + self.assertTrue((counts <= 2).all()) + + def test_never_pool(self): + """Checks that setting `pooling_probability` to zero works.""" + input_value = array_ops.placeholder(dtype=dtypes.int32, shape=[]) + output_value = tensor_pool.tensor_pool( + input_value, pool_size=10, pooling_probability=0.0) + + with self.test_session(use_gpu=True) as session: + for i in range(50): + out = session.run(output_value, {input_value: i}) + self.assertEqual(out, i) + + def test_pooling_probability(self): + """Checks that `pooling_probability` works.""" + input_value = array_ops.placeholder(dtype=dtypes.int32, shape=[]) + pool_size = 10 + pooling_probability = 0.2 + output_value = tensor_pool.tensor_pool( + input_value, + pool_size=pool_size, + pooling_probability=pooling_probability) + + with self.test_session(use_gpu=True) as session: + not_pooled = 0 + total = 1000 + for i in range(total): + out = session.run(output_value, {input_value: i}) + if out == i: + not_pooled += 1 + self.assertAllClose( + (not_pooled - pool_size) / (total - pool_size), + 1 - pooling_probability, + atol=0.03) + + +if __name__ == '__main__': + test.main() -- GitLab From 87f7aa9b1da2614b225e3e457aed10485f05297a Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 22 Nov 2017 13:58:31 -0800 Subject: [PATCH 0789/1801] [tf.data] Allow the DeserializeSparse op to accept inconsistent dense shapes. This changes DeserializeSparse to match the behavior of DeserializeSparseMany and TakeManySparseFromTensorsMap, and thus makes `Dataset.batch()` on sparse tensors match the existing behavior of `tf.train.batch()` and family. The rationale for this change is that the source of many `tf.SparseTensor` objects is `tf.parse[_single]_example()`, and that operation does not try to ensure that consecutive `SparseTensor` objects parsed from the same feature specification have the same `dense_shape`. As a result, the behavior of existing ops that batch `SparseTensor` objects has been to silently pad those objects to the bounding dense_shape, by taking the maximum over each dimension size. While this does reduce our ability to make consistency checks in the `SparseTensor`-handling code, pragmatically we never get consistently shaped `SparseTensor`s in real programs, so this seems like a reasonable path for usability. PiperOrigin-RevId: 176697720 --- .../core/kernels/serialize_sparse_op.cc | 16 +++++---- .../kernel_tests/batch_dataset_op_test.py | 33 +++++++++++++++++++ .../sparse_serialization_ops_test.py | 14 +++++--- 3 files changed, 52 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/kernels/serialize_sparse_op.cc b/tensorflow/core/kernels/serialize_sparse_op.cc index 161c505e84..cfb86904d5 100644 --- a/tensorflow/core/kernels/serialize_sparse_op.cc +++ b/tensorflow/core/kernels/serialize_sparse_op.cc @@ -352,13 +352,15 @@ class DeserializeSparseOp : public OpKernel { i, "] was: ", shape.dims() - 1, " but rank of SparseTensor[", i, "] is: ", expanded_tensor_shape.dims() - 1)); for (int j = 1; j < shape.dims(); ++j) { - OP_REQUIRES( - context, shape.dim_size(j) == expanded_tensor_shape.dim_size(j), - errors::InvalidArgument( - "Inconsistent shape across SparseTensors: dimension ", j - 1, - " prior to SparseTensor[", i, "] was: ", shape.dim_size(j), - " but rank of SparseTensor[", i, - "] is: ", expanded_tensor_shape.dim_size(j))); + // NOTE(mrry): For compatibility with the implementations of + // DeserializeManySparse, and many ops that generate + // SparseTensors to batch that do not have a fixed + // dense_shape (e.g. `tf.parse_single_example()`), we + // compute the maximum in each dimension to find the + // smallest dense_shape that bounds all of the input + // SparseTensors. + shape.set_dim(j, std::max(shape.dim_size(j), + expanded_tensor_shape.dim_size(j))); } } } diff --git a/tensorflow/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/python/kernel_tests/batch_dataset_op_test.py index 513dfb1ec3..660cbef302 100644 --- a/tensorflow/python/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/python/kernel_tests/batch_dataset_op_test.py @@ -130,6 +130,39 @@ class BatchDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def testBatchSparseWithDifferentDenseShapes(self): + + def _sparse(i): + return sparse_tensor.SparseTensor( + indices=array_ops.expand_dims( + math_ops.range(i, dtype=dtypes.int64), 1), + values=array_ops.fill([math_ops.to_int32(i)], i), + dense_shape=[i]) + + iterator = dataset_ops.Dataset.range(10).map(_sparse).batch( + 5).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + for i in range(2): + actual = sess.run(get_next) + expected_indices = [] + expected_values = [] + for j in range(5): + for k in range(i * 5 + j): + expected_indices.append([j, k]) + expected_values.append(i * 5 + j) + expected = sparse_tensor.SparseTensor( + indices=expected_indices, + values=expected_values, + dense_shape=[5, (i + 1) * 5 - 1]) + self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) + self.assertSparseValuesEqual(actual, expected.eval()) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + def testNestedBatchSparse(self): def _sparse(i): diff --git a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py index d0d6cc4c0f..78c113f514 100644 --- a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py +++ b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py @@ -108,10 +108,16 @@ class SerializeSparseTest(test.TestCase): sp_deserialized = sparse_ops.deserialize_sparse( serialized, dtype=dtypes.int32) - with self.assertRaisesOpError( - r"Inconsistent shape across SparseTensors: dimension 0 prior to " - r"SparseTensor\[1\] was: 5 but rank of SparseTensor\[1\] is: 3"): - sess.run(sp_deserialized) + combined_indices, combined_values, combined_shape = sess.run( + sp_deserialized) + + self.assertAllEqual(combined_indices[:6, 0], [0] * 6) # minibatch 0 + self.assertAllEqual(combined_indices[:6, 1:], sp_input0[0]) + self.assertAllEqual(combined_indices[6:, 0], [1] * 6) # minibatch 1 + self.assertAllEqual(combined_indices[6:, 1:], sp_input1[0]) + self.assertAllEqual(combined_values[:6], sp_input0[1]) + self.assertAllEqual(combined_values[6:], sp_input1[1]) + self.assertAllEqual(combined_shape, [2, 5, 6]) def testSerializeDeserializeNestedBatch(self): with self.test_session(use_gpu=False) as sess: -- GitLab From a4a3816a867ecd25600b30d3ebc9b79d6c8a9e4e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Nov 2017 14:08:18 -0800 Subject: [PATCH 0790/1801] Allow proceeding without --input_shape flags when the model already has that information (e.g. when the inputs are Placeholders with shape attribs). This required moving the call to ResolveModelFlags back until after operators are imported (where it was until it was moved recently for drop_control_dependency handling). PiperOrigin-RevId: 176699182 --- tensorflow/contrib/lite/toco/import_tensorflow.cc | 3 ++- tensorflow/contrib/lite/toco/tooling_util.cc | 14 ++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 691b4ff2a9..cde5a936af 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -1507,7 +1507,6 @@ std::unique_ptr ImportTensorFlowGraphDef( } Model* model = new Model; - ResolveModelFlags(model_flags, model); for (auto node : inlined_graph.node()) { StripZeroOutputIndexFromInputs(&node); @@ -1639,6 +1638,8 @@ std::unique_ptr ImportTensorFlowGraphDef( } } + ResolveModelFlags(model_flags, model); + StripCaretFromArrayNames(model); AddExtraOutputsFedIntoOtherOps(model); FixNoMissingArray(model); diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index 639b5f193c..e8fa7a3423 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -1062,12 +1062,6 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) { "--output_arrays flag must be given on the command-line."; for (const auto& input_array_proto : model->flags.input_arrays()) { - QCHECK(!input_array_proto.shape().empty()) - << "This model does not have shape defined for input array " - << input_array_proto.name() - << ", so one must be specified by a non-empty --input_shape " - "command-line flag."; - auto& input_array = model->GetOrCreateArray(input_array_proto.name()); if (input_array_proto.has_data_type()) { const ArrayDataType specified_type = @@ -1090,6 +1084,14 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) { input_array.data_type = ArrayDataType::kFloat; } + if (!input_array.has_shape()) { + QCHECK(!input_array_proto.shape().empty()) + << "This model does not have shape defined for input array " + << input_array_proto.name() + << ", so one must be specified by a non-empty --input_shape " + "command-line flag."; + } + // Compare/merge the model->flags describing the input_shape with // the actual input array's shape. auto& input_array_dims = *input_array.mutable_shape()->mutable_dims(); -- GitLab From d4e5b6e8a8985e6648fec1939adab8f51eed0ffe Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 22 Nov 2017 14:16:41 -0800 Subject: [PATCH 0791/1801] Proper casting in resource scatter_update PiperOrigin-RevId: 176700288 --- .../python/kernel_tests/resource_variable_ops_test.py | 6 ++++++ tensorflow/python/ops/state_ops.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index 8f328cea63..4c7a9cb0f9 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -498,6 +498,12 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): state_ops.scatter_update(v, [1], [3.0]) self.assertAllEqual([1.0, 3.0], v.numpy()) + def testScatterUpdateCast(self): + with context.eager_mode(): + v = resource_variable_ops.ResourceVariable([1.0, 2.0], name="update") + state_ops.scatter_update(v, [1], [3]) + self.assertAllEqual([1.0, 3.0], v.numpy()) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py index dbab07da42..dfc657893c 100644 --- a/tensorflow/python/ops/state_ops.py +++ b/tensorflow/python/ops/state_ops.py @@ -348,4 +348,4 @@ def scatter_update(ref, indices, updates, use_locking=True, name=None): return gen_state_ops.scatter_update(ref, indices, updates, use_locking=use_locking, name=name) return gen_resource_variable_ops.resource_scatter_update( - ref.handle, indices, updates, name=name) + ref.handle, indices, ops.convert_to_tensor(updates, ref.dtype), name=name) -- GitLab From 01a266a59783349b4d118eeb0ab1eb669aeef1db Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 22 Nov 2017 14:45:36 -0800 Subject: [PATCH 0792/1801] Skip control edges since they propagate neither shapes nor values. PiperOrigin-RevId: 176703800 --- tensorflow/core/grappler/costs/graph_properties.cc | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index abcd83a01e..c254fbef7a 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -725,9 +725,12 @@ Status GraphProperties::PropagateShapes( while (!new_shapes->empty() && num_loop_iterations++ < max_loop_iterations) { const Node* n = new_shapes->pop(); - for (const Node* fanout : n->out_nodes()) { - TF_RETURN_IF_ERROR( - UpdateShapes(shape_refiner, relax, fanout, new_shapes)); + for (const Edge* e : n->out_edges()) { + if (!e->IsControlEdge()) { + const Node* fanout = e->dst(); + TF_RETURN_IF_ERROR( + UpdateShapes(shape_refiner, relax, fanout, new_shapes)); + } } } @@ -913,6 +916,9 @@ Status GraphProperties::InferStatically() { &input_properties[i]); } for (const auto& edge : node->in_edges()) { + if (edge->IsControlEdge()) { + continue; + } if (!edge->src()->IsConstant()) { continue; } -- GitLab From d390d2774fa3b480e723ca0d1539356c8f7b37d3 Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Wed, 22 Nov 2017 14:53:42 -0800 Subject: [PATCH 0793/1801] Turn off layout optimizer (as with all other optimizers) in the memory optimizer test, because the test assumes no modification of the graph. PiperOrigin-RevId: 176704808 --- tensorflow/python/grappler/memory_optimizer_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/grappler/memory_optimizer_test.py b/tensorflow/python/grappler/memory_optimizer_test.py index 09cf5f2270..9fbadeceb3 100644 --- a/tensorflow/python/grappler/memory_optimizer_test.py +++ b/tensorflow/python/grappler/memory_optimizer_test.py @@ -128,6 +128,7 @@ class MemoryOptimizerRecomputeTest(test.TestCase): rewriter_config_pb2.RewriterConfig( disable_model_pruning=True, constant_folding=rewriter_config_pb2.RewriterConfig.OFF, + layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF, arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, memory_optimization=rewriter_config_pb2.RewriterConfig. RECOMPUTATION_HEURISTICS), original_metagraph) @@ -151,6 +152,7 @@ class MemoryOptimizerRecomputeTest(test.TestCase): rewriter_config_pb2.RewriterConfig( disable_model_pruning=True, constant_folding=rewriter_config_pb2.RewriterConfig.OFF, + layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF, arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, memory_optimization=rewriter_config_pb2.RewriterConfig. RECOMPUTATION_HEURISTICS, -- GitLab From feda969d974d02016736faab328419929428ab10 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Wed, 22 Nov 2017 15:08:37 -0800 Subject: [PATCH 0794/1801] Make 'name' and 'return_elements' arguments to import_graph_def work with C API. This enables some of the tests in import_test.py to be run using the C API. PiperOrigin-RevId: 176706698 --- tensorflow/python/client/tf_session.i | 38 ++++ tensorflow/python/framework/importer.py | 62 ++++++- tensorflow/python/framework/importer_test.py | 182 +++++++++++-------- 3 files changed, 205 insertions(+), 77 deletions(-) diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i index ef6f28ce07..099a35202c 100644 --- a/tensorflow/python/client/tf_session.i +++ b/tensorflow/python/client/tf_session.i @@ -315,6 +315,44 @@ tensorflow::ImportNumpy(); $2 = inputs.size(); } +// Typemaps for TF_ImportGraphDefResultsReturnOutputs +%typemap(in, numinputs=0) (int* num_outputs, TF_Output** outputs) + (int num_outputs, TF_Output* outputs) { + $1 = &num_outputs; + $2 = &outputs; +} + +%typemap(argout) (int* num_outputs, TF_Output** outputs) { + $result = PyList_New(*$1); + if (!$result) { + SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create list"); + } + int num_outputs = *$1; + TF_Output* outputs = *$2; + for (int i = 0; i < num_outputs; ++i) { + PyList_SET_ITEM($result, i, CreateWrappedTFOutput(outputs[i])); + } +} + +// Typemaps for TF_ImportGraphDefResultsReturnOperations +%typemap(in, numinputs=0) (int* num_opers, TF_Operation*** opers) + (int num_opers, TF_Operation** opers) { + $1 = &num_opers; + $2 = &opers; +} + +%typemap(argout) (int* num_opers, TF_Operation*** opers) { + $result = PyList_New(*$1); + if (!$result) { + SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create list"); + } + int num_opers = *$1; + TF_Operation** opers = *$2; + for (int i = 0; i < num_opers; ++i) { + PyList_SET_ITEM($result, i, CreateWrappedTFOperation(opers[i])); + } +} + // Typemaps for TF_GraphNextOperation(). %typemap(in) size_t* pos (size_t pos) { pos = PyLong_AsUnsignedLong($input); diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py index c00b9da0df..434cbda7ad 100644 --- a/tensorflow/python/framework/importer.py +++ b/tensorflow/python/framework/importer.py @@ -194,6 +194,18 @@ def _FindAttrInOpDef(attr_name, op_def): return None +def _PopulateTFImportGraphDefOptions(options, prefix, return_elements): + """Populates the TF_ImportGraphDefOptions `options`.""" + c_api.TF_ImportGraphDefOptionsSetPrefix(options, prefix) + + for name in return_elements or []: + if ':' in name: + op_name, index = _ParseTensorName(name) + c_api.TF_ImportGraphDefOptionsAddReturnOutput(options, op_name, index) + else: + c_api.TF_ImportGraphDefOptionsAddReturnOperation(options, name) + + def _ProcessNewOps(graph): """Processes the newly-added TF_Operations in `graph`.""" for c_op in c_api_util.new_tf_operations(graph): @@ -202,6 +214,35 @@ def _ProcessNewOps(graph): # TODO(skyewm): colocation logic +def _GatherReturnElements(requested_return_elements, graph, results): + """Returns the requested return elements from results. + + Args: + requested_return_elements: list of strings of operation and tensor names + graph: Graph + results: wrapped TF_ImportGraphDefResults + + Returns: + list of `Operation` and/or `Tensor` objects + """ + return_outputs = c_api.TF_ImportGraphDefResultsReturnOutputs(results) + return_opers = c_api.TF_ImportGraphDefResultsReturnOperations(results) + + combined_return_elements = [] + outputs_idx = 0 + opers_idx = 0 + for name in requested_return_elements: + if ':' in name: + combined_return_elements.append( + graph._get_tensor_by_tf_output(return_outputs[outputs_idx])) # pylint: disable=protected-access + outputs_idx += 1 + else: + combined_return_elements.append( + graph._get_operation_by_tf_operation(return_opers[opers_idx])) # pylint: disable=protected-access + opers_idx += 1 + return combined_return_elements + + @deprecated_args(None, 'Please file an issue at ' 'https://github.com/tensorflow/tensorflow/issues if you depend' ' on this feature.', @@ -263,18 +304,29 @@ def import_graph_def(graph_def, input_map=None, return_elements=None, graph = ops.get_default_graph() if graph._c_graph: # pylint: disable=protected-access + with ops.name_scope(name, 'import', input_map.values()) as scope: + # Save unique prefix generated by name_scope + if scope: + assert scope.endswith('/') + prefix = scope[:-1] + else: + prefix = '' + scoped_options = c_api_util.ScopedTFImportGraphDefOptions() + options = scoped_options.options + _PopulateTFImportGraphDefOptions(options, prefix, return_elements) with c_api_util.tf_buffer(graph_def.SerializeToString()) as serialized: with errors.raise_exception_on_not_ok_status() as status: - c_api.TF_GraphImportGraphDefWithResults( - graph._c_graph, serialized, scoped_options.options, status) # pylint: disable=protected-access + results = c_api.TF_GraphImportGraphDefWithResults( + graph._c_graph, serialized, options, status) # pylint: disable=protected-access _ProcessNewOps(graph) - if return_elements is not None: - raise ValueError('return_elements not yet implemented with C API') - return None + if return_elements is None: + return None + else: + return _GatherReturnElements(return_elements, graph, results) else: g = graph diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py index 8984282c68..5a6187c8a6 100644 --- a/tensorflow/python/framework/importer_test.py +++ b/tensorflow/python/framework/importer_test.py @@ -43,6 +43,7 @@ import tensorflow.python.ops.nn_grad # pylint: disable=unused-import from tensorflow.python.platform import test +@test_util.with_c_api class ImportGraphDefTest(test.TestCase): def _MakeGraphDef(self, @@ -56,67 +57,6 @@ class ImportGraphDefTest(test.TestCase): text_format.Merge(text, ret) return ret - # The C API doesn't currently support return elements (or anything else beyond - # the most basic import). This test only checks that the import can run - # without error, and will be removed once more functionality is implemented - # and we can get coverage from the other tests. - @test_util.enable_c_api - def testCApi(self): - importer.import_graph_def( - self._MakeGraphDef(""" - node { name: 'A' op: 'IntOutputFloatOutput' } - node { name: 'B' op: 'ListOutput' - attr { key: 'T' - value { list { type: DT_INT32 type: DT_FLOAT } } } } - node { name: 'C' op: 'ListInput' - attr { key: 'N' value { i: 2 } } - attr { key: 'T' value { type: DT_INT32 } } - input: 'A:0' input: 'B:0' } - node { name: 'D' op: 'ListInput' - attr { key: 'N' value { i: 2 } } - attr { key: 'T' value { type: DT_FLOAT } } - input: 'A:1' input: 'B:1' } - """)) - - graph = ops.get_default_graph() - a = graph.get_operation_by_name("A") - b = graph.get_operation_by_name("B") - c = graph.get_operation_by_name("C") - d = graph.get_operation_by_name("D") - - # Assert that the import process creates distinct tensors. - self.assertNotEqual(a.outputs[0].name, a.outputs[1].name) - self.assertNotEqual(b.outputs[0].name, b.outputs[1].name) - self.assertNotEqual(a.outputs[0].name, b.outputs[0].name) - self.assertNotEqual(a.outputs[0].name, b.outputs[1].name) - self.assertNotEqual(a.outputs[1].name, b.outputs[0].name) - self.assertNotEqual(a.outputs[1].name, b.outputs[1].name) - - # Assert that the ops are connected according to the GraphDef topology. - self.assertEqual(c.inputs[0], a.outputs[0]) - self.assertEqual(c.inputs[1], b.outputs[0]) - self.assertEqual(d.inputs[0], a.outputs[1]) - self.assertEqual(d.inputs[1], b.outputs[1]) - - # Check the types of the returned ops and tensors. - self.assertEqual(a.type, "IntOutputFloatOutput") - self.assertEqual(b.type, "ListOutput") - self.assertEqual(c.type, "ListInput") - self.assertEqual(d.type, "ListInput") - self.assertEqual(a.outputs[0].dtype, dtypes.int32) - self.assertEqual(a.outputs[1].dtype, dtypes.float32) - self.assertEqual(b.outputs[0].dtype, dtypes.int32) - self.assertEqual(b.outputs[1].dtype, dtypes.float32) - - # Check the names of the returned ops. - self.assertEqual(a.name, "A") - self.assertEqual(b.name, "B") - self.assertEqual(c.name, "C") - self.assertEqual(d.name, "D") - - # Check that the op_def is still available. - self.assertNotEqual(None, a.op_def) - def testBasic(self): with ops.Graph().as_default(): a, b, c, d = importer.import_graph_def( @@ -171,6 +111,8 @@ class ImportGraphDefTest(test.TestCase): self.assertNotEqual(None, a.op_def) def testMultipleImport(self): + if ops._USE_C_API: return # TODO(skyewm): set uniquify_names + graph_def = self._MakeGraphDef(""" node { name: 'A' op: 'IntOutput' } node { name: 'B' op: 'IntInput' input: 'A:0' } @@ -259,6 +201,8 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(outer_inner_c.name, "outer/inner/c_1") def testInputMap(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): feed_a_0 = constant_op.constant(0, dtype=dtypes.int32) feed_b_1 = constant_op.constant(1, dtype=dtypes.int32) @@ -286,6 +230,8 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(d.inputs[1], feed_b_1) def testInputMapBytes(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): feed_a_0 = constant_op.constant(0, dtype=dtypes.int32) feed_b_1 = constant_op.constant(1, dtype=dtypes.int32) @@ -313,6 +259,8 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(d.inputs[1], feed_b_1) def testInputMapUnicode(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): feed_a_0 = constant_op.constant(0, dtype=dtypes.int32) feed_b_1 = constant_op.constant(1, dtype=dtypes.int32) @@ -351,6 +299,8 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(b.inputs[0], a.outputs[0]) def testInputMapImplicitZerothOutput(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): feed_a_0 = constant_op.constant(0, dtype=dtypes.int32) b, = importer.import_graph_def( @@ -397,6 +347,11 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(d.outputs, []) def testCyclic(self): + # Importing cycles not supported with C API enabled (this test will + # eventually be deleted). + # TODO(skyewm): write while loop test + if ops._USE_C_API: return + with ops.Graph().as_default(): a, b = importer.import_graph_def( self._MakeGraphDef(""" @@ -411,6 +366,8 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(b.inputs[0], a.outputs[0]) def testTypeMismatchInGraphDef(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): with self.assertRaises(ValueError) as e: importer.import_graph_def( @@ -423,6 +380,8 @@ class ImportGraphDefTest(test.TestCase): str(e.exception)) def testShapeWhitelist(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + # Barrier's shape is an output vector of 2, but the # graph says it's a scalar. This is currently whitelisted. with ops.Graph().as_default(): @@ -436,6 +395,8 @@ class ImportGraphDefTest(test.TestCase): name="import") def testShapeWhitelistViolation(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + # L2 loss produces a scalar shape, but the graph # has the wrong shape, so raise an error. with ops.Graph().as_default(): @@ -455,6 +416,8 @@ class ImportGraphDefTest(test.TestCase): "Shapes () and (43,) are not compatible" in str(e.exception)) def testInvalidSignatureTooManyInputsInGraphDef(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): with self.assertRaises(ValueError) as e: importer.import_graph_def( @@ -466,6 +429,8 @@ class ImportGraphDefTest(test.TestCase): str(e.exception)) def testInvalidSignatureNotEnoughInputsInGraphDef(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): with self.assertRaises(ValueError) as e: importer.import_graph_def( @@ -477,6 +442,8 @@ class ImportGraphDefTest(test.TestCase): "got 'int32')" in str(e.exception)) def testMissingInputOpInGraphDef(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): with self.assertRaises(ValueError) as e: importer.import_graph_def( @@ -486,6 +453,8 @@ class ImportGraphDefTest(test.TestCase): self.assertTrue("Input tensor 'A:0' not found" in str(e.exception)) def testMissingInputOpInGraphDefButAppearsInInputMap(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): feed_a_0 = constant_op.constant(5.0) b, = importer.import_graph_def( @@ -497,6 +466,8 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(b.inputs[0], feed_a_0) def testMissingInputTensorInGraphDef(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): with self.assertRaises(ValueError) as e: importer.import_graph_def( @@ -507,6 +478,8 @@ class ImportGraphDefTest(test.TestCase): self.assertTrue("Input tensor 'A:1' not found" in str(e.exception)) def testMissingControlInputInGraphDef(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): with self.assertRaises(ValueError) as e: importer.import_graph_def( @@ -516,6 +489,8 @@ class ImportGraphDefTest(test.TestCase): self.assertTrue("Control input '^A' not found" in str(e.exception)) def testInvalidTensorNameOutputIndexInGraphDef(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): with self.assertRaises(ValueError) as e: importer.import_graph_def( @@ -526,6 +501,8 @@ class ImportGraphDefTest(test.TestCase): str(e.exception)) def testInvalidTensorNameInGraphDef(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): with self.assertRaises(ValueError) as e: importer.import_graph_def( @@ -536,6 +513,8 @@ class ImportGraphDefTest(test.TestCase): str(e.exception)) def testMissingReturnOperation(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): with self.assertRaises(ValueError) as e: importer.import_graph_def( @@ -547,6 +526,8 @@ class ImportGraphDefTest(test.TestCase): "return_element 'B' not found in graph_def." in str(e.exception)) def testMissingReturnTensor(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): with self.assertRaises(ValueError) as e: importer.import_graph_def( @@ -576,6 +557,8 @@ class ImportGraphDefTest(test.TestCase): "return_element 'A:B:0' not found in graph_def." in str(e.exception)) def testMissingInputMap(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): with self.assertRaises(ValueError) as e: importer.import_graph_def( @@ -586,6 +569,8 @@ class ImportGraphDefTest(test.TestCase): self.assertTrue("not found in graph_def: [B:0]" in str(e.exception)) def testInputMapUnusedAsInput(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): # Mapping an unused node output should succeed. importer.import_graph_def( @@ -604,6 +589,8 @@ class ImportGraphDefTest(test.TestCase): self.assertTrue("not found in graph_def: [A:2]" in str(e.exception)) def testInputMapTypeMismatch(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): with self.assertRaises(ValueError) as e: importer.import_graph_def( @@ -637,6 +624,16 @@ class ImportGraphDefTest(test.TestCase): name="imported_graph") self.assertEqual(a.name, "imported_graph/A") + def testDefaultNamePrefix(self): + with ops.Graph().as_default(): + a, = importer.import_graph_def( + self._MakeGraphDef(""" + node { name: 'A' op: 'None' } + """), + return_elements=["A"], + name=None) + self.assertEqual(a.name, "import/A") + def testNamePrefixColocationAttrs(self): original_graph_def = self._MakeGraphDef(""" node { name: 'A' op: 'None' } @@ -648,14 +645,14 @@ class ImportGraphDefTest(test.TestCase): with ops.Graph().as_default(): b, = importer.import_graph_def( original_graph_def, return_elements=["B"], name="imported_graph") - self.assertProtoEqualsVersion(""" - node { name: 'imported_graph/A' op: 'None' } - node { name: 'imported_graph/B' op: 'None' attr { - key: '_class' - value { list { s: 'loc:@imported_graph/A' } } - } }""", b.graph.as_graph_def()) + self.assertTrue("_class" in b.node_def.attr) + self.assertProtoEquals( + "list { s: 'loc:@imported_graph/A' }", + b.node_def.attr["_class"]) def testColocationWithDeviceFn(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + original_graph_def = self._MakeGraphDef(""" node { name: 'A' op: 'None' attr { key: '_class' @@ -738,6 +735,8 @@ class ImportGraphDefTest(test.TestCase): } }""", b.graph.as_graph_def()) def testMultipleColocationWithDeviceFn(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + original_graph_def = self._MakeGraphDef(""" node { name: 'A' op: 'None'} node { name: 'B' op: 'None'} @@ -774,6 +773,8 @@ class ImportGraphDefTest(test.TestCase): }""", c.graph.as_graph_def()) def testNamePrefixColocationAttrsMultipleImport(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + original_graph_def = self._MakeGraphDef(""" node { name: 'A' op: 'None' } node { name: 'B' op: 'None' attr { @@ -799,6 +800,8 @@ class ImportGraphDefTest(test.TestCase): } }""", b.graph.as_graph_def()) def testNamePrefixColocationAttrsNotFound(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + original_graph_def = self._MakeGraphDef(""" node { name: 'B' op: 'None' attr { key: '_class' @@ -822,6 +825,8 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual("graph_def must be a GraphDef proto.", str(e.exception)) def testInvalidInputForInputMap(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): with self.assertRaises(TypeError) as e: importer.import_graph_def( @@ -858,7 +863,17 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual("return_elements must be a list of strings.", str(e.exception)) + if ops._USE_C_API: + error_msg = "Cannot convert 'a:b:c' to a tensor name." + else: + error_msg = "Requested return_element 'a:b:c' not found in graph_def." + with self.assertRaisesRegexp(ValueError, error_msg): + importer.import_graph_def(self._MakeGraphDef(""), + return_elements=["a:b:c"]) + def testDuplicateOperationNames(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): with self.assertRaises(ValueError) as e: importer.import_graph_def( @@ -880,6 +895,8 @@ class ImportGraphDefTest(test.TestCase): self.assertAllEqual(pack.outputs[0].eval(), [5.0, 5.0]) def testWithDevice(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default() as g: # No device. a = constant_op.constant(3.0, name="a") @@ -923,6 +940,8 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(c.device + "/device:GPU:0", c5.device) def testWithDeviceFunctionDependingOnInputs(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default() as g: with ops.device("/job:ps"): v1 = constant_op.constant(1.0) @@ -948,6 +967,8 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(2, len(ops_with_two_inputs)) def testGradient(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default() as g: inputs = array_ops.placeholder( dtypes.float32, shape=[None, 100], name="input") @@ -1012,14 +1033,21 @@ class ImportGraphDefTest(test.TestCase): pat = (r"GraphDef producer version -1 below min producer %d supported " r"by TensorFlow \S+\. Please regenerate your graph.$" % versions.GRAPH_DEF_VERSION_MIN_PRODUCER) - importer.import_graph_def(self._MakeGraphDef("", producer=-1)) - x = constant_op.constant( - 7) # Need at least one op to get a C++ graph generated - with self.test_session(graph=g) as sess: + # C API throws error during import, Python-only throws error during run + if ops._USE_C_API: with self.assertRaisesRegexp(Exception, pat): - sess.run(x) + importer.import_graph_def(self._MakeGraphDef("", producer=-1)) + else: + importer.import_graph_def(self._MakeGraphDef("", producer=-1)) + x = constant_op.constant( + 7) # Need at least one op to get a C++ graph generated + with self.test_session(graph=g) as sess: + with self.assertRaisesRegexp(Exception, pat): + sess.run(x) def testVersionHigh(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default() as g: pat = (r"GraphDef min consumer version %d above current version %d " r"for TensorFlow \S+\. Please upgrade TensorFlow\.$" % @@ -1033,6 +1061,8 @@ class ImportGraphDefTest(test.TestCase): def testVersionAppliesToOpConstruction(self): """These tests rely on shape fns in test_ops.cc.""" + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + with ops.Graph().as_default(): importer.import_graph_def( self._MakeGraphDef( @@ -1059,6 +1089,8 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(123.0, a[0].get_attr("default_float")) def testDefaultAttrsRemoved(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + producer_op_list = op_def_pb2.OpList() text_format.Merge(""" op { @@ -1090,6 +1122,8 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(987, a[0].get_attr("default_int")) def testFunctions(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + dtype = dtypes.float32 @function.Defun(dtype, dtype, dtype, dtype) def Grad(x, y, dout1, dout2): # pylint: disable=unused-argument @@ -1167,6 +1201,8 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(sess.run("outer:0"), 21) def testImportInsideDefun(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + g = ops.Graph() with g.as_default(): @function.Defun() @@ -1190,6 +1226,8 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(z_val, -2.0) def testImportGraphWithFunctionTwice(self): + if ops._USE_C_API: return # TODO(skyewm): make this work with C API + g = ops.Graph() with g.as_default(): @function.Defun() -- GitLab From c0b8a07776a7f64fc45113159230a0a10273ec4a Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 22 Nov 2017 15:38:46 -0800 Subject: [PATCH 0795/1801] [tf.contrib.data.map_and_batch()] Parallelize the destruction of old invocation results. For map functions with a large number of return values, resetting each InvocationResult to an initialized state led to running `Tensor::~Tensor()` a large number of times in series. This change moves that destruction to each invocation's callback, which effectively parallelizes it. A future optimization could involve moving the tensor (and its content) into the batch. PiperOrigin-RevId: 176709725 --- .../core/kernels/map_and_batch_dataset_op.cc | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/map_and_batch_dataset_op.cc index 620efdb778..ad1e356dbd 100644 --- a/tensorflow/core/kernels/map_and_batch_dataset_op.cc +++ b/tensorflow/core/kernels/map_and_batch_dataset_op.cc @@ -258,7 +258,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { EnsureOutputAllocated(batch_result, result->return_values); const size_t num_components = result->return_values.size(); for (size_t i = 0; i < num_components; ++i) { - Tensor tensor = result->return_values[i]; + const Tensor& tensor = result->return_values[i]; Tensor* batch = &(batch_result->output)[i]; if (tensor.NumElements() != (batch->NumElements() / batch->dim_size(0))) { @@ -271,6 +271,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { ", [batch]: ", batch_shape.DebugString())); break; } + // TODO(mrry): Add a version of DoParallelConcat that allows + // us to move `tensor` where possible, to speed up string + // tensor batching. Status copy_status = ::tensorflow::functor::DoParallelConcat( *dataset()->device_, tensor, offset, batch); if (!copy_status.ok()) { @@ -279,6 +282,11 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { } } } + // NOTE(mrry): We clear the return values here to release any + // memory associated with them and to paralellize the destruction + // of the tensors (which can be surprisingly expensive for + // map functions with large numbers of return values). + result->return_values.clear(); batch_result->counter->DecrementCount(); }); } @@ -297,7 +305,10 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { for (size_t i = 0; i < dataset()->batch_size_; ++i) { size_t index = ComputeInvocationIndex(batch_index, i); InvocationResult* result = &invocation_results_[index]; - *result = InvocationResult(); + // Reset the state of `result`. + // NOTE(mrry): `result->return_values` were cleared when the previous + // invocation completed. + result->status = Status::OK(); } // Start individual invocations. for (size_t i = 0; i < dataset()->batch_size_; ++i) { -- GitLab From 8d9eda26be345ace2e110feb0cf9a2500990eb82 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Wed, 22 Nov 2017 15:51:33 -0800 Subject: [PATCH 0796/1801] Remove hardcoded discovery document now that the TPU alpha API definitions are public (https://www.googleapis.com/discovery/v1/apis/tpu/v1alpha1/rest). PiperOrigin-RevId: 176710985 --- .../python/training/tpu_cluster_resolver.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py index f0144e9faa..c74da9cabd 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py @@ -80,13 +80,9 @@ class TPUClusterResolver(ClusterResolver): raise ImportError('googleapiclient must be installed before using the ' 'TPU cluster resolver') - # TODO(b/67375680): Remove custom URL once TPU APIs are finalized self._service = discovery.build( - 'tpu', - 'v1', - credentials=self._credentials, - discoveryServiceUrl='https://storage.googleapis.com' - '/tpu-api-definition/v1alpha1.json') + 'tpu', 'v1alpha1', + credentials=self._credentials) else: self._service = service -- GitLab From 0aa09c2de25ddb321405656ae33031773690bd5e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Nov 2017 15:54:27 -0800 Subject: [PATCH 0797/1801] dynamic_rnn: put all ops in the same scope This clarifies the graph visualization a bit. PiperOrigin-RevId: 176711260 --- tensorflow/python/ops/rnn.py | 47 ++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py index 436872f044..e30b19842f 100644 --- a/tensorflow/python/ops/rnn.py +++ b/tensorflow/python/ops/rnn.py @@ -565,33 +565,34 @@ def dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None, if not _like_rnncell(cell): raise TypeError("cell must be an instance of RNNCell") - # By default, time_major==False and inputs are batch-major: shaped - # [batch, time, depth] - # For internal calculations, we transpose to [time, batch, depth] - flat_input = nest.flatten(inputs) - - if not time_major: - # (B,T,D) => (T,B,D) - flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input] - flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input) - - parallel_iterations = parallel_iterations or 32 - if sequence_length is not None: - sequence_length = math_ops.to_int32(sequence_length) - if sequence_length.get_shape().ndims not in (None, 1): - raise ValueError( - "sequence_length must be a vector of length batch_size, " - "but saw shape: %s" % sequence_length.get_shape()) - sequence_length = array_ops.identity( # Just to find it in the graph. - sequence_length, name="sequence_length") - - # Create a new scope in which the caching device is either - # determined by the parent scope, or is set to place the cached - # Variable using the same placement as for the rest of the RNN. with vs.variable_scope(scope or "rnn") as varscope: + # Create a new scope in which the caching device is either + # determined by the parent scope, or is set to place the cached + # Variable using the same placement as for the rest of the RNN. if context.in_graph_mode(): if varscope.caching_device is None: varscope.set_caching_device(lambda op: op.device) + + # By default, time_major==False and inputs are batch-major: shaped + # [batch, time, depth] + # For internal calculations, we transpose to [time, batch, depth] + flat_input = nest.flatten(inputs) + + if not time_major: + # (B,T,D) => (T,B,D) + flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input] + flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input) + + parallel_iterations = parallel_iterations or 32 + if sequence_length is not None: + sequence_length = math_ops.to_int32(sequence_length) + if sequence_length.get_shape().ndims not in (None, 1): + raise ValueError( + "sequence_length must be a vector of length batch_size, " + "but saw shape: %s" % sequence_length.get_shape()) + sequence_length = array_ops.identity( # Just to find it in the graph. + sequence_length, name="sequence_length") + batch_size = _best_effort_input_batch_size(flat_input) if initial_state is not None: -- GitLab From 806754888188e40430bc96ad33c5f51282c2d338 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 22 Nov 2017 16:30:44 -0800 Subject: [PATCH 0798/1801] Acquire the GIL before working with PyLists and PyDict. PiperOrigin-RevId: 176714705 --- tensorflow/python/grappler/cluster.i | 9 ++++++++- tensorflow/python/grappler/item.i | 5 +++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/grappler/cluster.i b/tensorflow/python/grappler/cluster.i index 1838c40e46..5a7cdf26f8 100644 --- a/tensorflow/python/grappler/cluster.i +++ b/tensorflow/python/grappler/cluster.i @@ -138,6 +138,7 @@ tensorflow::Status _GetOpPerformanceDataAndRunTime( static PyObject* TF_ListDevices(tensorflow::grappler::Cluster* cluster) { const std::unordered_map& devices = cluster->GetDevices(); + PyGILState_STATE gstate = PyGILState_Ensure(); PyObject* result = PyList_New(devices.size()); int i = 0; for (auto& dev : devices) { @@ -150,6 +151,7 @@ static PyObject* TF_ListDevices(tensorflow::grappler::Cluster* cluster) { PyList_SetItem(result, i, dev_obj); ++i; } + PyGILState_Release(gstate); return result; } @@ -184,6 +186,7 @@ static PyObject* TF_MeasureCosts( if (!status.ok()) { Py_RETURN_NONE; } + PyGILState_STATE gstate = PyGILState_Ensure(); PyObject* op_perf_objs = PyList_New( op_performance_data.op_performance_size()); for (int i = 0; i < op_performance_data.op_performance_size(); i++) { @@ -211,8 +214,10 @@ static PyObject* TF_MeasureCosts( status = tensorflow::Status(tensorflow::error::Code::INTERNAL, "Error setting return tuples."); tensorflow::Set_TF_Status_from_Status(out_status, status); - Py_RETURN_NONE; + Py_INCREF(Py_None); + ret = Py_None; } + PyGILState_Release(gstate); return ret; } @@ -240,6 +245,7 @@ static PyObject* TF_DeterminePeakMemoryUsage( Py_RETURN_NONE; } + PyGILState_STATE gstate = PyGILState_Ensure(); PyObject* result = PyDict_New(); for (const auto& device : cluster->GetDevices()) { const tensorflow::grappler::GraphMemory::MemoryUsage& usage = @@ -261,6 +267,7 @@ static PyObject* TF_DeterminePeakMemoryUsage( PyTuple_SetItem(ret, 1, per_device); PyDict_SetItem(result, PyString_FromString(device.first.c_str()), ret); } + PyGILState_Release(gstate); return result; } diff --git a/tensorflow/python/grappler/item.i b/tensorflow/python/grappler/item.i index 8c346b4438..2fa502b81d 100644 --- a/tensorflow/python/grappler/item.i +++ b/tensorflow/python/grappler/item.i @@ -101,6 +101,7 @@ static PyObject* TF_GetOpProperties(const tensorflow::grappler::GrapplerItem* it Py_RETURN_NONE; } + PyGILState_STATE gstate = PyGILState_Ensure(); PyObject* props = PyDict_New(); for (const auto& node : item->graph.node()) { const string& node_name = node.name(); @@ -115,8 +116,8 @@ static PyObject* TF_GetOpProperties(const tensorflow::grappler::GrapplerItem* it PyList_SetItem(prop, i, output_prop); } CHECK_EQ(0, PyDict_SetItem(props, PyString_FromString(node_name.c_str()), prop)); - } - + } + PyGILState_Release(gstate); return props; } -- GitLab From 85fa6bdfe40f24259b3cec19637567ed3cff7370 Mon Sep 17 00:00:00 2001 From: Shivani Agrawal Date: Wed, 22 Nov 2017 16:35:13 -0800 Subject: [PATCH 0799/1801] [tf.data] Patch for thread safe IgnoreErrorDataset. PiperOrigin-RevId: 176715082 --- .../core/kernels/ignore_errors_dataset_op.cc | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/kernels/ignore_errors_dataset_op.cc b/tensorflow/core/kernels/ignore_errors_dataset_op.cc index 43ba5ab7dd..8cf263d87f 100644 --- a/tensorflow/core/kernels/ignore_errors_dataset_op.cc +++ b/tensorflow/core/kernels/ignore_errors_dataset_op.cc @@ -79,16 +79,20 @@ class IgnoreErrorsDatasetOp : public UnaryDatasetOpKernel { Status GetNextInternal(IteratorContext* ctx, std::vector* out_tensors, bool* end_of_sequence) override { - if (!input_impl_) { - *end_of_sequence = true; - return Status::OK(); - } - Status s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence); - while (!s.ok()) { - out_tensors->clear(); - s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence); + { + tf_shared_lock l(mu_); + if (!input_impl_) { + *end_of_sequence = true; + return Status::OK(); + } + Status s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence); + while (!s.ok()) { + out_tensors->clear(); + s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence); + } } if (*end_of_sequence) { + mutex_lock l(mu_); input_impl_.reset(); } return Status::OK(); @@ -96,6 +100,7 @@ class IgnoreErrorsDatasetOp : public UnaryDatasetOpKernel { protected: Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); if (input_impl_) TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); else @@ -106,6 +111,7 @@ class IgnoreErrorsDatasetOp : public UnaryDatasetOpKernel { Status RestoreInternal(OpKernelContext* ctx, IteratorStateReader* reader) override { + mutex_lock l(mu_); if (reader->Contains(full_name("input_impls_empty"))) input_impl_.reset(); else @@ -114,7 +120,8 @@ class IgnoreErrorsDatasetOp : public UnaryDatasetOpKernel { } private: - std::unique_ptr input_impl_; + mutex mu_; + std::unique_ptr input_impl_ GUARDED_BY(mu_); }; const DatasetBase* const input_; -- GitLab From 8d84926f525dfd0728325e43cd39dbcb28fd3601 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 22 Nov 2017 16:42:07 -0800 Subject: [PATCH 0800/1801] Remove duplicated testConv1DTranspose --- tensorflow/python/kernel_tests/conv1d_test.py | 40 ------------------- 1 file changed, 40 deletions(-) diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py index a86e411e2f..d92797a7d3 100644 --- a/tensorflow/python/kernel_tests/conv1d_test.py +++ b/tensorflow/python/kernel_tests/conv1d_test.py @@ -93,45 +93,5 @@ class Conv1DTest(test.TestCase): self.assertAllClose(cache_values, value) - def testConv1DTranspose(self): - with self.test_session(): - stride = 2 - - # Input, output: [batch, width, depth] - x_shape = [2, 4, 3] - y_shape = [2, 9, 2] - - # Filter: [kernel_width, output_depth, input_depth] - f_shape = [3, 2, 3] - - x = constant_op.constant( - 1.0, shape=x_shape, name="x", dtype=dtypes.float32) - f = constant_op.constant( - 1.0, shape=f_shape, name="filter", dtype=dtypes.float32) - output = nn_ops.conv1d_transpose( - x, f, y_shape, stride=stride, padding="VALID") - value = output.eval() - - cache_values = np.zeros(y_shape, dtype=np.float32) - - # The amount of padding added - pad = 1 - - for n in xrange(x_shape[0]): - for k in xrange(f_shape[1]): - for w in xrange(pad, y_shape[1] - pad): - target = 3.0 - # We add a case for locations divisible by the stride. - w_in = w % stride == 0 and w > pad and w < y_shape[1] - 1 - pad - if w_in: - target += 3.0 - cache_values[n, w, k] = target - - # copy values in the border - cache_values[n, 0, k] = cache_values[n, 1, k] - cache_values[n, -1, k] = cache_values[n, -2, k] - - self.assertAllClose(cache_values, value) - if __name__ == "__main__": test.main() -- GitLab From 1885db7ffa6cea7bacfb7ef1507f3103cd1829f0 Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Wed, 22 Nov 2017 16:36:48 -0800 Subject: [PATCH 0801/1801] Only convert the layout if the node is placed on GPU. PiperOrigin-RevId: 176715219 --- tensorflow/core/grappler/optimizers/BUILD | 2 + .../grappler/optimizers/layout_optimizer.cc | 146 +++++++++++------- .../grappler/optimizers/layout_optimizer.h | 5 +- .../optimizers/layout_optimizer_test.cc | 67 +++++++- 4 files changed, 163 insertions(+), 57 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index e127556054..5d9eb8e0b1 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -312,6 +312,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":graph_optimizer", + "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:devices", @@ -320,6 +321,7 @@ cc_library( "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/clusters:cluster", "//tensorflow/core/grappler/costs:graph_properties", + "//tensorflow/core/grappler/costs:virtual_placer", "//tensorflow/core/grappler/utils:frame", ], ) diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc index 89ebd8e98f..31c3ba6863 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc @@ -27,7 +27,9 @@ limitations under the License. #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/grappler/utils/frame.h" #include "tensorflow/core/lib/strings/numbers.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/util/device_name_utils.h" namespace tensorflow { namespace grappler { @@ -109,11 +111,13 @@ bool IsMaxPoolGradV1(const NodeDef& node) { class GraphProcessor { public: - GraphProcessor(GraphDef* graph, NodeMap* node_map, - const std::unordered_set& nodes_to_preserve) - : graph_(graph), - node_map_(node_map), - nodes_to_preserve_(nodes_to_preserve) {} + GraphProcessor(const VirtualPlacer& virtual_placer, + const std::unordered_set& nodes_to_preserve, + GraphDef* graph, NodeMap* node_map) + : virtual_placer_(virtual_placer), + nodes_to_preserve_(nodes_to_preserve), + graph_(graph), + node_map_(node_map) {} protected: NodeDef* AddNodePermConst(const string& name, const string& device, @@ -122,7 +126,6 @@ class GraphProcessor { node_map_->AddNode(name, node); node->set_name(name); node->set_op("Const"); - node->set_device(device); AttrValue attr_data_type; attr_data_type.set_type(DT_INT32); node->mutable_attr()->insert({"dtype", attr_data_type}); @@ -133,6 +136,13 @@ class GraphProcessor { } tensor.AsProtoTensorContent(attr_tensor.mutable_tensor()); node->mutable_attr()->insert({"value", attr_tensor}); + string device_name; + if (device.empty()) { + device_name = virtual_placer_.get_canonical_device_name(*node); + } else { + device_name = device; + } + node->set_device(device_name); return node; } @@ -142,7 +152,6 @@ class GraphProcessor { node_map_->AddNode(name, node); node->set_name(name); node->set_op("Const"); - node->set_device(device); AttrValue attr_data_type; attr_data_type.set_type(dtype); node->mutable_attr()->insert({"dtype", attr_data_type}); @@ -151,6 +160,13 @@ class GraphProcessor { tensor.scalar()() = value; tensor.AsProtoTensorContent(attr_tensor.mutable_tensor()); node->mutable_attr()->insert({"value", attr_tensor}); + string device_name; + if (device.empty()) { + device_name = virtual_placer_.get_canonical_device_name(*node); + } else { + device_name = device; + } + node->set_device(device_name); return node; } @@ -159,7 +175,6 @@ class GraphProcessor { node_map_->AddNode(name, node); node->set_name(name); node->set_op("Const"); - node->set_device(device); AttrValue attr_data_type; attr_data_type.set_type(DT_INT32); node->mutable_attr()->insert({"dtype", attr_data_type}); @@ -172,26 +187,37 @@ class GraphProcessor { } tensor.AsProtoTensorContent(attr_tensor.mutable_tensor()); node->mutable_attr()->insert({"value", attr_tensor}); + string device_name; + if (device.empty()) { + device_name = virtual_placer_.get_canonical_device_name(*node); + } else { + device_name = device; + } + node->set_device(device_name); return node; } + const VirtualPlacer& virtual_placer_; + const std::unordered_set& nodes_to_preserve_; GraphDef* graph_; NodeMap* node_map_; - const std::unordered_set& nodes_to_preserve_; }; struct OptimizeContext { OptimizeContext(GraphDef* graph, NodeDef* node, NodeMap* node_map, + const VirtualPlacer& virtual_placer, const std::unordered_set& nodes_to_preserve, bool is_in_frame) : graph(graph), node(node), node_map(node_map), + virtual_placer(virtual_placer), nodes_to_preserve(nodes_to_preserve), is_in_frame(is_in_frame) {} GraphDef* graph; NodeDef* node; NodeMap* node_map; + const VirtualPlacer& virtual_placer; const std::unordered_set& nodes_to_preserve; bool is_in_frame; }; @@ -199,8 +225,8 @@ struct OptimizeContext { class NodeProcessor : public GraphProcessor { public: explicit NodeProcessor(const OptimizeContext& opt_cxt) - : GraphProcessor(opt_cxt.graph, opt_cxt.node_map, - opt_cxt.nodes_to_preserve), + : GraphProcessor(opt_cxt.virtual_placer, opt_cxt.nodes_to_preserve, + opt_cxt.graph, opt_cxt.node_map), node_(opt_cxt.node), is_in_frame_(opt_cxt.is_in_frame) {} virtual ~NodeProcessor() {} @@ -257,7 +283,25 @@ class NodeProcessor : public GraphProcessor { } virtual bool ShouldProcess() const { - return !MustPreserve() && IsNHWC() && IsDimsFour(*node_) && HasOutputs(); + return !MustPreserve() && IsNHWC() && IsDimsFour(*node_) && HasOutputs() && + IsOnGPU(); + } + + virtual bool IsOnGPU() const { + string device_name; + if (node_->device().empty()) { + device_name = virtual_placer_.get_canonical_device_name(*node_); + } else { + device_name = node_->device(); + } + string device; + string not_used; + if (DeviceNameUtils::SplitDeviceName(device_name, ¬_used, &device) && + (StringPiece(str_util::Lowercase(device))) + .contains(str_util::Lowercase(DEVICE_GPU))) { + return true; + } + return false; } void UpdateAttrDataFormat() { @@ -536,6 +580,9 @@ class BiasAddGradProcessor : public NodeProcessor { if (MustPreserve()) { return false; } + if (!IsOnGPU()) { + return false; + } auto input = node_map_->GetNode(node_->input(0)); if (input) { if ((IsNHWC() && IsDimsFour(*input)) || IsNodeNCHWToNHWC(input->name())) { @@ -556,7 +603,7 @@ class Conv2DProcessor : public NodeProcessor { protected: bool ShouldProcess() const override { return !MustPreserve() && IsNHWC() && IsDimsFour(*node_) && HasOutputs() && - (!IsGemmUsed() || no_gemm_); + (!IsGemmUsed() || no_gemm_) && IsOnGPU(); } TensorShapeProto GetShape(const string& input_name) const { @@ -693,7 +740,7 @@ class AgnosticNodeProcessor : public NodeProcessor { protected: bool ShouldProcess() const override { return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() && - IsNodeAfterNCHWToNHWC(); + IsNodeAfterNCHWToNHWC() && IsOnGPU(); } bool IsNodeAfterNCHWToNHWC() const { @@ -746,7 +793,8 @@ class BinaryOpProcessor : public AgnosticNodeProcessor { return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() && IsNodeAfterNCHWToNHWC() && (Is4DOperateWithND(4) || Is4DOperateWithScalar() || - Is4DOperateWithVector()); + Is4DOperateWithVector()) && + IsOnGPU(); } std::vector GetInputPos() const override { @@ -855,7 +903,7 @@ class ConcatProcessor : public AgnosticNodeProcessor { protected: bool ShouldProcess() const override { return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() && - IsNodeAfterNCHWToNHWC() && IsAlongDimC(); + IsNodeAfterNCHWToNHWC() && IsAlongDimC() && IsOnGPU(); } std::vector GetInputPos() const override { @@ -920,7 +968,7 @@ class PadProcessor : public AgnosticNodeProcessor { protected: bool ShouldProcess() const override { return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() && - IsNodeAfterNCHWToNHWC() && PaddingSupported(); + IsNodeAfterNCHWToNHWC() && PaddingSupported() && IsOnGPU(); } Status CustomizedProcessing() override { return UpdateAttrValueOfInput(1); } @@ -1132,7 +1180,8 @@ class SqueezeProcessor : public AgnosticNodeProcessor { protected: bool ShouldProcess() const override { return !MustPreserve() && IsDimsN(*node_, 2) && HasOutputs() && - IsNodeAfterNCHWToNHWC() && IsInputConvertible() && IsAlongDimHW(); + IsNodeAfterNCHWToNHWC() && IsInputConvertible() && IsAlongDimHW() && + IsOnGPU(); } Status AddLayoutTransposeToOutputs() override { return Status::OK(); } @@ -1183,7 +1232,7 @@ class SumProcessor : public AgnosticNodeProcessor { auto input0 = node_map_->GetNode(node_->input(0)); return !MustPreserve() && HasOutputs() && IsNodeAfterNCHWToNHWC() && (IsDimsFour(*input0) || IsNodeNCHWToNHWC(input0->name())) && - IsAlongDimNHW(); + IsAlongDimNHW() && IsOnGPU(); } Status AddLayoutTransposeToOutputs() override { return Status::OK(); } @@ -1243,42 +1292,41 @@ class SumProcessor : public AgnosticNodeProcessor { class DataLayoutOptimizer : GraphProcessor { public: explicit DataLayoutOptimizer( - LayoutOptimizer::TuningConfig config, - const std::unordered_set& nodes_to_preserve, - const string& default_device, GraphDef* graph, NodeMap* node_map) - : GraphProcessor(graph, node_map, nodes_to_preserve), - config_(config), - default_device_(default_device) {} + const VirtualPlacer& virtual_placer, + const LayoutOptimizer::TuningConfig& config, + const std::unordered_set& nodes_to_preserve, GraphDef* graph, + NodeMap* node_map) + : GraphProcessor(virtual_placer, nodes_to_preserve, graph, node_map), + config_(config) {} Status Optimize() { - LOG(INFO) << "Number of nodes for original graph: " << graph_->node_size(); + VLOG(1) << "Number of nodes for original graph: " << graph_->node_size(); TF_RETURN_IF_ERROR(Expand()); - LOG(INFO) << "Number of nodes after Expand: " << graph_->node_size(); + VLOG(1) << "Number of nodes after Expand: " << graph_->node_size(); TF_RETURN_IF_ERROR(Collapse()); - LOG(INFO) << "Number of nodes after Collapse: " << graph_->node_size(); + VLOG(1) << "Number of nodes after Collapse: " << graph_->node_size(); return Status::OK(); } private: NodeDef* AddNodePermNHWCToNCHW() { - return AddNodePermConst(kPermNHWCToNCHW, default_device_, {0, 3, 1, 2}); + return AddNodePermConst(kPermNHWCToNCHW, "", {0, 3, 1, 2}); } NodeDef* AddNodePermNCHWToNHWC() { - return AddNodePermConst(kPermNCHWToNHWC, default_device_, {0, 2, 3, 1}); + return AddNodePermConst(kPermNCHWToNHWC, "", {0, 2, 3, 1}); } NodeDef* AddNodeConcatConst() { - return AddNodeConstScalar(kConcatConst, default_device_, DT_INT32, 1); + return AddNodeConstScalar(kConcatConst, "", DT_INT32, 1); } NodeDef* AddNodeGatherAxisConst() { - return AddNodeConstScalar(kGatherAxisConst, default_device_, DT_INT32, 0); + return AddNodeConstScalar(kGatherAxisConst, "", DT_INT32, 0); } NodeDef* AddNodeReductionConst() { - return GraphProcessor::AddNodeReductionConst(kReductionConst, - default_device_); + return GraphProcessor::AddNodeReductionConst(kReductionConst, ""); } // Expand all nodes which is in NHWC, but supports NCHW or is layout agnostic. @@ -1295,8 +1343,8 @@ class DataLayoutOptimizer : GraphProcessor { ops_format_supported.end()) { auto node = graph_->mutable_node(i); bool is_in_frame = !frames[node].empty(); - OptimizeContext opt_cxt(graph_, node, node_map_, nodes_to_preserve_, - is_in_frame); + OptimizeContext opt_cxt(graph_, node, node_map_, virtual_placer_, + nodes_to_preserve_, is_in_frame); std::unique_ptr node_processor; if (IsAvgPoolGrad(*node)) { node_processor.reset(new AvgPoolGradProcessor(opt_cxt)); @@ -1343,8 +1391,8 @@ class DataLayoutOptimizer : GraphProcessor { ops_format_agnostic.end()) { auto node = graph_->mutable_node(i); bool is_in_frame = !frames[node].empty(); - OptimizeContext opt_cxt(graph_, node, node_map_, nodes_to_preserve_, - is_in_frame); + OptimizeContext opt_cxt(graph_, node, node_map_, virtual_placer_, + nodes_to_preserve_, is_in_frame); std::unique_ptr node_processor; if (IsAddN(*node)) { node_processor.reset(new AddNProcessor(opt_cxt)); @@ -1419,8 +1467,7 @@ class DataLayoutOptimizer : GraphProcessor { return Status::OK(); } - LayoutOptimizer::TuningConfig config_; - string default_device_; + const LayoutOptimizer::TuningConfig& config_; }; int GetNumTranspose(const GraphDef& graph) { @@ -1430,7 +1477,7 @@ int GetNumTranspose(const GraphDef& graph) { number++; } } - LOG(INFO) << "Number of Transpose nodes: " << number; + VLOG(1) << "Number of Transpose nodes: " << number; return number; } @@ -1455,7 +1502,6 @@ int GetNumGPUs(const Cluster& cluster) { Status LayoutOptimizer::Tune(const GrapplerItem& item, const GraphProperties& graph_properties, - const string& default_device, const TuningConfig& config, GraphDef* output) { auto status = graph_properties.AnnotateOutputShapes(output); if (!status.ok()) { @@ -1463,8 +1509,8 @@ Status LayoutOptimizer::Tune(const GrapplerItem& item, return status; } NodeMap node_map(output); - DataLayoutOptimizer layout_optimizer(config, nodes_to_preserve_, - default_device, output, &node_map); + DataLayoutOptimizer layout_optimizer(*virtual_placer_, config, + nodes_to_preserve_, output, &node_map); status = layout_optimizer.Optimize(); return status; } @@ -1477,6 +1523,7 @@ Status LayoutOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, return Status::OK(); } + virtual_placer_.reset(new VirtualPlacer(cluster)); nodes_to_preserve_ = item.NodesToPreserve(); GraphProperties graph_properties(item); auto status = graph_properties.InferStatically(); @@ -1487,20 +1534,13 @@ Status LayoutOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, TuningConfig config; config.no_gemm = false; - string default_device = "/job:localhost/replica:0/task:0/cpu:0"; - if (cluster) { - if (!cluster->GetDevices().empty()) { - default_device = cluster->GetDevices().begin()->first; - } - } - - status = Tune(item, graph_properties, default_device, config, output); + status = Tune(item, graph_properties, config, output); // This is based on an empirical observation that if the introduced Transpose // nodes is more than 30, not using GEMM implementation would result in better // performance. if (status.ok() && GetNumTranspose(*output) > 30) { config.no_gemm = true; - status = Tune(item, graph_properties, default_device, config, output); + status = Tune(item, graph_properties, config, output); } if (!status.ok()) { diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.h b/tensorflow/core/grappler/optimizers/layout_optimizer.h index f5dd70356a..357205828d 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.h +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_GRAPPLER_OPTIMIZERS_LAYOUT_OPTIMIZER_H_ #include "tensorflow/core/grappler/costs/graph_properties.h" +#include "tensorflow/core/grappler/costs/virtual_placer.h" #include "tensorflow/core/grappler/optimizers/graph_optimizer.h" namespace tensorflow { @@ -47,10 +48,10 @@ class LayoutOptimizer : public GraphOptimizer { const GraphDef& optimize_output, double result) override; private: + std::unique_ptr virtual_placer_; std::unordered_set nodes_to_preserve_; Status Tune(const GrapplerItem& item, const GraphProperties& graph_properties, - const string& default_device, const TuningConfig& config, - GraphDef* output); + const TuningConfig& config, GraphDef* output); }; } // end namespace grappler diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc index 5d2d90b193..d4ab42ad60 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc @@ -39,6 +39,11 @@ class LayoutOptimizerTest : public ::testing::Test { Output SimpleConv2D(tensorflow::Scope* s, int input_size, int filter_size, const string& padding) { + return SimpleConv2D(s, input_size, filter_size, padding, ""); + } + + Output SimpleConv2D(tensorflow::Scope* s, int input_size, int filter_size, + const string& padding, const string& device) { int batch_size = 128; int input_height = input_size; int input_width = input_size; @@ -59,8 +64,8 @@ class LayoutOptimizerTest : public ::testing::Test { Output filter = ops::Const(s->WithOpName("Filter"), Input::Initializer(filter_data)); - Output conv = ops::Conv2D(s->WithOpName("Conv2D"), input, filter, - {1, stride, stride, 1}, padding); + Output conv = ops::Conv2D(s->WithOpName("Conv2D").WithDevice(device), input, + filter, {1, stride, stride, 1}, padding); return conv; } @@ -278,6 +283,64 @@ TEST_F(LayoutOptimizerTest, PreserveFetch) { EXPECT_EQ(conv_node->attr().at({"data_format"}).s(), "NHWC"); } +TEST_F(LayoutOptimizerTest, EmptyDevice) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv}); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto conv_node = node_map.GetNode("Conv2D"); + EXPECT_EQ(conv_node->attr().at({"data_format"}).s(), "NCHW"); +} + +TEST_F(LayoutOptimizerTest, GPUDevice) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = + SimpleConv2D(&s, 3, 2, "VALID", "/job:w/replica:0/task:0/device:gpu:0"); + Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv}); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto conv_node = node_map.GetNode("Conv2D"); + EXPECT_EQ(conv_node->attr().at({"data_format"}).s(), "NCHW"); +} + +TEST_F(LayoutOptimizerTest, CPUDeviceLowercase) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = + SimpleConv2D(&s, 3, 2, "VALID", "/job:w/replica:0/task:0/device:cpu:0"); + Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv}); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto conv_node = node_map.GetNode("Conv2D"); + EXPECT_EQ(conv_node->attr().at({"data_format"}).s(), "NHWC"); +} + +TEST_F(LayoutOptimizerTest, CPUDeviceUppercase) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID", "/CPU:0"); + Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv}); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto conv_node = node_map.GetNode("Conv2D"); + EXPECT_EQ(conv_node->attr().at({"data_format"}).s(), "NHWC"); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 51f5eb77f089f17f17653b5655f8207d3ff5d36b Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 22 Nov 2017 16:47:31 -0800 Subject: [PATCH 0802/1801] Remove extra propagate_nans from tensorflow/stream_executor/dnn.cc --- tensorflow/stream_executor/dnn.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc index 6fd0e69905..44144a0613 100644 --- a/tensorflow/stream_executor/dnn.cc +++ b/tensorflow/stream_executor/dnn.cc @@ -473,8 +473,7 @@ PoolingDescriptor::PoolingDescriptor(int ndims) propagate_nans_(false), window_(ndims, 0), padding_(ndims, 0), - strides_(ndims, 1), - propagate_nans_(false) {} + strides_(ndims, 1) {} PoolingDescriptor::PoolingDescriptor() : PoolingDescriptor(/*ndims=*/2) {} -- GitLab From 5548cfc2eda3614a318e04cd922512be99aefefe Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Nov 2017 16:45:54 -0800 Subject: [PATCH 0803/1801] [XLA] Add a convenience function that returns a platform with the given name. PiperOrigin-RevId: 176715886 --- .../compiler/xla/service/platform_util.cc | 22 +++++++++++++++++++ .../compiler/xla/service/platform_util.h | 8 +++++++ 2 files changed, 30 insertions(+) diff --git a/tensorflow/compiler/xla/service/platform_util.cc b/tensorflow/compiler/xla/service/platform_util.cc index 3a1818de82..63f3bfb36c 100644 --- a/tensorflow/compiler/xla/service/platform_util.cc +++ b/tensorflow/compiler/xla/service/platform_util.cc @@ -94,6 +94,28 @@ PlatformUtil::GetSupportedPlatforms() { platforms_string.c_str()); } +/*static*/ StatusOr PlatformUtil::GetPlatform( + const string& platform_name) { + using tensorflow::str_util::Lowercase; + string platform_str = Lowercase(platform_name); + // "cpu" and "host" mean the same thing. + if (platform_str == "cpu") { + platform_str = "host"; + } + // "gpu" and "cuda" mean the same thing. + if (platform_str == "gpu") { + platform_str = "cuda"; + } + + TF_ASSIGN_OR_RETURN(auto platforms, PlatformUtil::GetSupportedPlatforms()); + for (se::Platform* platform : platforms) { + if (Lowercase(platform->Name()) == platform_str) { + return platform; + } + } + return InvalidArgument("platform %s not found", platform_name.c_str()); +} + // Returns whether the device underlying the given StreamExecutor is supported // by XLA. static bool IsDeviceSupported(se::StreamExecutor* executor) { diff --git a/tensorflow/compiler/xla/service/platform_util.h b/tensorflow/compiler/xla/service/platform_util.h index eac5737030..a59d4ffe87 100644 --- a/tensorflow/compiler/xla/service/platform_util.h +++ b/tensorflow/compiler/xla/service/platform_util.h @@ -16,11 +16,14 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_PLATFORM_UTIL_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_PLATFORM_UTIL_H_ +#include #include #include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" +#include "tensorflow/core/platform/types.h" namespace xla { @@ -39,6 +42,11 @@ class PlatformUtil { // default platform. Otherwise returns an error. static StatusOr GetDefaultPlatform(); + // Returns the platform according to the given name. Returns error if there is + // no such platform. + static StatusOr GetPlatform( + const string& platform_name); + // Returns a vector of StreamExecutors for the given platform. The vector is // indexed by device ordinal (device numbering used by StreamExecutor). If an // element is nullptr, then the device is present by not supported by XLA. -- GitLab From 58d0e5b6c3cf48acb68c87d2deca0e304b075b1a Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Wed, 22 Nov 2017 17:18:52 -0800 Subject: [PATCH 0804/1801] Add persistent GradientTape support Added two simple tests for persistent tapes and did a manual test that calling "del" on gradient tape releases all tensors. Also: - Add missing Py_DECREF to error case in MakeTensorIDList - Make a couple error messages more descriptive PiperOrigin-RevId: 176718477 --- tensorflow/c/eager/tape.h | 66 +++++++++++++++++------ tensorflow/python/eager/backprop.py | 38 +++++++++++-- tensorflow/python/eager/backprop_test.py | 31 +++++++++++ tensorflow/python/eager/pywrap_tfe.h | 3 +- tensorflow/python/eager/pywrap_tfe_src.cc | 13 +++-- tensorflow/python/eager/tape.py | 4 +- 6 files changed, 128 insertions(+), 27 deletions(-) diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index 84b40a1819..f52248e7d5 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -106,6 +106,12 @@ class VSpace { // Deletes the input tensor. virtual void DeleteGradient(Gradient* gradient) const = 0; + + // Lets this VSpace know that it can release resources held by the + // `backward_function`, It will not be called again. + // `backward_function` must not be null. + virtual void ReleaseBackwardFunction( + BackwardFunction* backward_function) const = 0; }; // Traces the execution of operations, doing eager garbage collection, and @@ -113,7 +119,11 @@ class VSpace { template class GradientTape { public: - GradientTape() {} + // If `persistent` is true, GradientTape will not eagerly delete backward + // functions (and hence the tensors they keep alive). Instead, everything + // is deleted in ~GradientTape. Persistent GradientTapes are useful when + // users want to compute multiple gradients over the same tape. + GradientTape(bool persistent) : persistent_(persistent) {} ~GradientTape() { for (const auto& pair : op_tape_) { pair.second.backward_function_deleter(); @@ -150,6 +160,10 @@ class GradientTape { // Map from tensor id to number of remaining usages (i.e. how many entries in // the tape refer to it); to aid in tape garbage collection. std::unordered_map tensor_usage_; + + // If true, all activations are deleted in the first call to ComputeGradient. + // Else, only when this is destructed. + bool persistent_; }; // Template instantiations here @@ -279,11 +293,16 @@ struct BackpropInitialState { std::unordered_map op_missing_tensor; }; +// If `persistent_tape` is true, op_tape is not changed and none of the +// backwards functions are deleted. +// If `persistent_tape` is false, op_tape is cleared and backwards functions +// not needed for gradient computation are deleted. Backwards functions that +// are needed, are copied and returned in BackpropInitialState. template BackpropInitialState PrepareBackprop( gtl::ArraySlice target, const TensorTape& tensor_tape, - OpTape op_tape, - const std::unordered_set& sources_set) { + OpTape* op_tape, + const std::unordered_set& sources_set, bool persistent_tape) { std::vector tensor_stack; tensor_stack.reserve(target.size()); for (auto t : target) { @@ -298,9 +317,9 @@ BackpropInitialState PrepareBackprop( continue; } int64 op_id = op_id_it->second; - auto op_it = op_tape.find(op_id); + auto op_it = op_tape->find(op_id); auto result_op_it = result.op_tape.find(op_id); - if (op_id == -1 || op_it == op_tape.end() || + if (op_id == -1 || op_it == op_tape->end() || result_op_it != result.op_tape.end()) { continue; } @@ -317,7 +336,9 @@ BackpropInitialState PrepareBackprop( } } } - op_tape.erase(op_it); + if (!persistent_tape) { + op_tape->erase(op_it); + } } for (auto& pair : result.tensor_usage_counts) { auto it = tensor_tape.find(pair.first); @@ -325,9 +346,15 @@ BackpropInitialState PrepareBackprop( result.op_missing_tensor[it->second] += 1; } } - // Call destructors for all unneeded gradient functions. - for (const auto& op_pair : op_tape) { - op_pair.second.backward_function_deleter(); + if (!persistent_tape) { + // Call destructors for all unneeded gradient functions and + // clear the op_tape. We can clear the tape because ownership of + // backward functions that will be used for gradient computation + // has been transfered to `result`. + for (const auto& op_pair : *op_tape) { + op_pair.second.backward_function_deleter(); + } + op_tape->clear(); } return result; } @@ -369,7 +396,8 @@ Status InitialGradients( auto op_it = op_tape.find(tensor_it->second); if (op_it == op_tape.end()) { return errors::Internal( - "Internal state of the gradient tape is invalid."); + "Internal state of the gradient tape is invalid: " + "failed to find operation producing a tensor"); } bool found = false; for (int j = 0; j < op_it->second.output_tensor_info.size(); ++j) { @@ -383,7 +411,8 @@ Status InitialGradients( } if (!found) { return errors::Internal( - "Internal state of the gradient tape is invalid."); + "Internal state of the gradient tape is invalid: " + "none of operations outputs match expected tensor"); } } else { // No record of the target tensor found on the tape, so no gradient @@ -415,17 +444,19 @@ Status GradientTape::ComputeGradient( std::unordered_set sources_set(source_tensor_ids.begin(), source_tensor_ids.end()); BackpropInitialState state = PrepareBackprop( - target_tensor_ids, tensor_tape_, std::move(op_tape_), sources_set); + target_tensor_ids, tensor_tape_, &op_tape_, sources_set, persistent_); std::vector op_stack = InitialStack(state.op_tape, state.op_missing_tensor); std::unordered_map> gradients; Status s = InitialGradients(vspace, target_tensor_ids, output_gradients, tensor_tape_, state.op_tape, state.tensor_usage_counts, &gradients); - auto cleanup = [&state]() { - // Release all backprop functions - for (const auto& pair : state.op_tape) { - pair.second.backward_function_deleter(); + auto cleanup = [this, &state]() { + if (!persistent_) { + // Release all backprop functions + for (const auto& pair : state.op_tape) { + pair.second.backward_function_deleter(); + } } }; if (!s.ok()) { @@ -484,6 +515,9 @@ Status GradientTape::ComputeGradient( std::vector in_gradients; Status s = vspace.CallBackwardFunction(trace.backward_function, out_gradients, &in_gradients); + if (!persistent_) { + vspace.ReleaseBackwardFunction(trace.backward_function); + } if (!s.ok()) { cleanup(); return s; diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 25f7ae785e..0144f3b1e5 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -798,13 +798,41 @@ class GradientTape(object): grad = g.gradient(y, [x])[0] assert grad.numpy() == 6.0 ``` + + By default, the resources held by a GradientTape are released as soon as + GradientTape.gradient() method is called. However, if one need to compute + multiple gradients over the same computation, she can create a persistent + GradientTape. Persistent tapes allow multiple calls to the gradient() method + and release resources when the tape object is destructed. + + Example usage: + + ```python + with tfe.GradientTape(persistent=True) as g: + x = tf.constant(3.0) + g.watch(x) + y = x * x + z = y * y + dz_dx = g.gradient(z, [x])[0] + assert dz_dx.numpy() == 108.0 # 4*x^3 at x = 3 + dy_dx = g.gradient(y, [x])[0] + assert dy_dx.numpy() == 6.0 + del g # Drop the reference to the tape """ - def __init__(self): + def __init__(self, persistent=False): + """Creates a new GradientTape. + + Args: + persistent: Boolean controlling whether a persistent gradient tape + is created. Must be True or False. + + """ self._tape = None + self._persistent = persistent def __enter__(self): - tape.push_new_tape() + tape.push_new_tape(persistent=self._persistent) return self def __exit__(self, typ, value, traceback): @@ -838,12 +866,14 @@ class GradientTape(object): than once. """ if self._tape is None: - raise RuntimeError("GradientTape.gradient can only be called once, and " + raise RuntimeError("GradientTape.gradient can only be called once " + "on non-persistent tapes, and " "only when the context manager has exited.") sources = [x.handle if isinstance(x, resource_variable_ops.ResourceVariable) else x for x in sources] grad = imperative_grad.imperative_grad( _default_vspace, self._tape, [target], sources) - self._tape = None + if not self._persistent: + self._tape = None return grad diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index e18ebba785..9816dd022e 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -314,6 +314,37 @@ class BackpropTest(test.TestCase): RuntimeError, 'GradientTape.gradient can only be called once'): g.gradient(y, [x]) + def testPersistentTape(self): + with backprop.GradientTape(persistent=True) as g: + x = constant_op.constant(3.0) + g.watch(x) + y = x * x + z = y * y + dz_dx = g.gradient(z, [x])[0] + self.assertEqual(dz_dx.numpy(), 4*3*3*3) + dy_dx = g.gradient(y, [x])[0] + self.assertEqual(dy_dx.numpy(), 2*3) + del g + + def testPersistentNestedTape(self): + with backprop.GradientTape(persistent=True) as g: + x = constant_op.constant(3.0) + g.watch(x) + y = x * x + with backprop.GradientTape(persistent=True) as gg: + gg.watch(y) + z = 2 * y + for _ in range(2): + inner_grad = gg.gradient(z, [y])[0] + self.assertEqual(inner_grad.numpy(), 2.0) + y += inner_grad + del gg + grad = g.gradient(y, [x])[0] + self.assertEqual(grad.numpy(), 6.0) + grad = g.gradient(z, [x])[0] + self.assertEqual(grad.numpy(), 12.0) + del g + def testGradientTapeVariable(self): v = resource_variable_ops.ResourceVariable(1.0, name='v') with backprop.GradientTape() as g: diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h index f96245f7a5..a33b17ada6 100644 --- a/tensorflow/python/eager/pywrap_tfe.h +++ b/tensorflow/python/eager/pywrap_tfe.h @@ -88,7 +88,8 @@ TFE_TensorHandle* EagerTensor_Handle(const PyObject* o); PyObject* TFE_Py_InitEagerTensor(PyObject* base_class); // Pushes a new tape into the thread-local stack. -void TFE_Py_TapeStackPushNew(); +// `persistent` must be a PyBool_Type, i.e either Py_True or Py_False +void TFE_Py_TapeStackPushNew(PyObject* persistent); // Pops the tape from the top of the stack and returns it. PyObject* TFE_Py_TapeStackPop(); diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 0a0749fd4b..ce823cb567 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -469,7 +469,8 @@ static tensorflow::int64 FastTensorId(PyObject* tensor) { class GradientTape : public tensorflow::eager::GradientTape { public: - GradientTape() {} + explicit GradientTape(bool persistent) + : tensorflow::eager::GradientTape(persistent) {} void WatchVariable(PyObject* v) { watched_variables_.insert(v); @@ -557,11 +558,11 @@ std::vector* GetTapeStack() { } #endif -void TFE_Py_TapeStackPushNew() { +void TFE_Py_TapeStackPushNew(PyObject* persistent) { TFE_Py_Tape_Type.tp_new = PyType_GenericNew; if (PyType_Ready(&TFE_Py_Tape_Type) < 0) return; TFE_Py_Tape* tape = PyObject_NEW(TFE_Py_Tape, &TFE_Py_Tape_Type); - tape->tape = new GradientTape(); + tape->tape = new GradientTape(persistent == Py_True); GetTapeStack()->push_back(tape); } @@ -704,6 +705,7 @@ std::vector MakeTensorIDList(PyObject* tensors) { PyObject* tensor = PySequence_Fast_GET_ITEM(seq, i); list.push_back(FastTensorId(tensor)); if (PyErr_Occurred()) { + Py_DECREF(seq); return list; } } @@ -889,7 +891,6 @@ class PyVSpace : public tensorflow::eager::VSpace { PyObject* py_result = PyEval_CallObject( reinterpret_cast(backward_function), grads); Py_DECREF(grads); - Py_DECREF(backward_function); if (py_result == nullptr) { return tensorflow::errors::Internal("gradient function threw exceptions"); } @@ -917,6 +918,10 @@ class PyVSpace : public tensorflow::eager::VSpace { return tensorflow::Status::OK(); } + void ReleaseBackwardFunction(PyObject* backward_function) const final { + Py_DECREF(backward_function); + } + void DeleteGradient(PyObject* tensor) const final { Py_XDECREF(tensor); } private: diff --git a/tensorflow/python/eager/tape.py b/tensorflow/python/eager/tape.py index 440c84b7ea..14b5238f74 100644 --- a/tensorflow/python/eager/tape.py +++ b/tensorflow/python/eager/tape.py @@ -33,9 +33,9 @@ class Tape(object): return pywrap_tensorflow.TFE_Py_TapeWatchedVariables(self._tape) -def push_new_tape(): +def push_new_tape(persistent=False): """Pushes a new tape onto the tape stack.""" - pywrap_tensorflow.TFE_Py_TapeStackPushNew() + pywrap_tensorflow.TFE_Py_TapeStackPushNew(persistent) def watch(tensor): -- GitLab From f25abbfb25441bec198ca7517485fbab63f07be1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Nov 2017 17:40:42 -0800 Subject: [PATCH 0805/1801] Minor cleanup: remove unnecessary GetCudaContext. Note that there is no protection against a caller of CUDAExecutor::Launch from accidentally passing a Stream associated with the wrong CUDAExecutor. This is no different from any other CUDAExecutor methods that take a Stream argument, where we similarly have no such protection. The main caller is Stream::ThenLaunch, which necessarily calls Launch on the correct corresponding CUDAExecutor. Other callers use a similar pattern. PiperOrigin-RevId: 176719918 --- .../stream_executor/cuda/cuda_gpu_executor.cc | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index 6c522264e1..64d14f29df 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -108,11 +108,6 @@ static CUdeviceptr AsCudaDevicePtr(DeviceMemoryBase *gpu_mem) { return AsCudaDevicePtr(*gpu_mem); } -static CudaContext* GetCudaContext(Stream *stream) { - return static_cast(stream->parent()->implementation()) - ->cuda_context(); -} - CudaContext* ExtractCudaContext(CUDAExecutor *cuda_exec) { CHECK(cuda_exec != nullptr); return cuda_exec->cuda_context(); @@ -380,11 +375,11 @@ bool CUDAExecutor::Launch(Stream *stream, const ThreadDim &thread_dims, void **kernel_params = const_cast(args.argument_addresses().data()); - if (!CUDADriver::LaunchKernel(GetCudaContext(stream), cufunc, block_dims.x, - block_dims.y, block_dims.z, thread_dims.x, - thread_dims.y, thread_dims.z, - args.number_of_shared_bytes(), custream, - kernel_params, nullptr /* = extra */)) { + if (!CUDADriver::LaunchKernel(context_, cufunc, block_dims.x, block_dims.y, + block_dims.z, thread_dims.x, thread_dims.y, + thread_dims.z, args.number_of_shared_bytes(), + custream, kernel_params, + nullptr /* = extra */)) { LOG(ERROR) << "failed to launch CUDA kernel with args: " << args.number_of_arguments() << "; thread dim: " << thread_dims.ToString() -- GitLab From 4e5534d3d35a72b87902212e2847ca2871cc7b75 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 22 Nov 2017 17:53:40 -0800 Subject: [PATCH 0806/1801] Prevented a couple of memory leaks in the code generated by swig PiperOrigin-RevId: 176720721 --- tensorflow/python/BUILD | 5 +- tensorflow/python/grappler/cluster.i | 87 ++++++++++++++-------- tensorflow/python/grappler/cluster.py | 4 +- tensorflow/python/grappler/cost_analyzer.i | 11 +-- tensorflow/python/grappler/item.i | 44 ++++++++--- tensorflow/python/grappler/tf_optimizer.i | 14 +--- tensorflow/python/grappler/tf_optimizer.py | 6 +- 7 files changed, 110 insertions(+), 61 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 54c43c1337..9d3974b98e 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -4350,7 +4350,10 @@ py_library( ], srcs_version = "PY2AND3", visibility = ["//visibility:public"], - deps = [":pywrap_tensorflow_internal"], + deps = [ + ":pywrap_tensorflow_internal", + ":tf_cluster", + ], ) py_test( diff --git a/tensorflow/python/grappler/cluster.i b/tensorflow/python/grappler/cluster.i index 5a7cdf26f8..18fda345e6 100644 --- a/tensorflow/python/grappler/cluster.i +++ b/tensorflow/python/grappler/cluster.i @@ -14,6 +14,14 @@ limitations under the License. ==============================================================================*/ %include "tensorflow/python/platform/base.i" +%include +%include "item.i" + +// Wrap the cluster into an object that swig can manipulate. This ensures it will call the object +// destructor upon garbage collection instead of leaking memory. +struct GCluster { + std::shared_ptr cluster_; +}; %{ #include "tensorflow/core/protobuf/device_properties.pb.h" @@ -72,6 +80,7 @@ bool _PyObjAs(PyObject *input, tensorflow::NamedDevice *out) { } %{ +#include #include #include "tensorflow/core/grappler/devices.h" #include "tensorflow/core/grappler/clusters/single_machine.h" @@ -82,39 +91,56 @@ bool _PyObjAs(PyObject *input, tensorflow::NamedDevice *out) { #include "tensorflow/core/grappler/costs/utils.h" #include "tensorflow/core/protobuf/device_properties.pb.h" -static tensorflow::grappler::Cluster* TF_NewCluster( - bool allow_soft_placement, - bool disable_detailed_stats, TF_Status* out_status) { - int num_cpu_cores = tensorflow::grappler::GetNumAvailableLogicalCPUCores(); - int num_gpus = tensorflow::grappler::GetNumAvailableGPUs();; +// Provide the implementation of the GCluster struct here. +struct GCluster { + GCluster() {} + GCluster(tensorflow::grappler::Cluster* cluster) : cluster_(cluster) {} + + tensorflow::grappler::Cluster* operator->() const { + return cluster_.get(); + } + tensorflow::grappler::Cluster* get() const { + return cluster_.get(); + } + bool is_none() const { + return cluster_.get() == nullptr; + } + + std::shared_ptr cluster_; +}; + + +static GCluster TF_NewCluster(bool allow_soft_placement, + bool disable_detailed_stats, TF_Status* out_status) { + int num_cpu_cores = tensorflow::grappler::GetNumAvailableLogicalCPUCores(); + int num_gpus = tensorflow::grappler::GetNumAvailableGPUs(); int timeout_s = 60 * 10; - tensorflow::grappler::Cluster* cluster = + tensorflow::grappler::Cluster* cluster_ = new tensorflow::grappler::SingleMachine( timeout_s, num_cpu_cores, num_gpus); - cluster->DisableDetailedStats(disable_detailed_stats); - cluster->AllowSoftPlacement(allow_soft_placement); - tensorflow::Status status = cluster->Provision(); + cluster_->DisableDetailedStats(disable_detailed_stats); + cluster_->AllowSoftPlacement(allow_soft_placement); + tensorflow::Status status = cluster_->Provision(); tensorflow::Set_TF_Status_from_Status(out_status, status); - return cluster; + return GCluster(cluster_); } -static tensorflow::grappler::Cluster* TF_NewVirtualCluster( +static GCluster TF_NewVirtualCluster( const std::vector& named_devices, TF_Status* out_status) { std::unordered_map devices; for (const auto& named_device : named_devices) { devices[named_device.name()]= named_device.properties(); } - tensorflow::grappler::Cluster* cluster = + tensorflow::grappler::Cluster*cluster_ = new tensorflow::grappler::VirtualCluster(devices); - tensorflow::Status status = cluster->Provision(); + tensorflow::Status status = cluster_->Provision(); tensorflow::Set_TF_Status_from_Status(out_status, status); - return cluster; + return GCluster(cluster_); } -static void TF_DeleteCluster(tensorflow::grappler::Cluster* cluster) { +static void TF_ShutdownCluster(GCluster cluster) { cluster->Shutdown(); - delete cluster; } tensorflow::Status _GetOpPerformanceDataAndRunTime( @@ -136,7 +162,7 @@ tensorflow::Status _GetOpPerformanceDataAndRunTime( return tensorflow::Status::OK(); } -static PyObject* TF_ListDevices(tensorflow::grappler::Cluster* cluster) { +static PyObject* TF_ListDevices(GCluster cluster) { const std::unordered_map& devices = cluster->GetDevices(); PyGILState_STATE gstate = PyGILState_Ensure(); PyObject* result = PyList_New(devices.size()); @@ -156,13 +182,13 @@ static PyObject* TF_ListDevices(tensorflow::grappler::Cluster* cluster) { } static PyObject* TF_MeasureCosts( - const tensorflow::grappler::GrapplerItem* item, - tensorflow::grappler::Cluster* cluster, + GItem item, + GCluster cluster, bool generate_timeline, TF_Status* out_status) { tensorflow::OpPerformanceList op_performance_data; tensorflow::StepStats step_stats; - tensorflow::grappler::MeasuringCostEstimator cost_measure(cluster, 10, 0); + tensorflow::grappler::MeasuringCostEstimator cost_measure(cluster.get(), 10, 0); tensorflow::grappler::Costs costs; tensorflow::Status status = _GetOpPerformanceDataAndRunTime( @@ -223,10 +249,10 @@ static PyObject* TF_MeasureCosts( static PyObject* TF_DeterminePeakMemoryUsage( - const tensorflow::grappler::GrapplerItem* item, - tensorflow::grappler::Cluster* cluster, + GItem item, + GCluster cluster, TF_Status* out_status) { - if (!item || !cluster) { + if (item.is_none() || cluster.is_none()) { tensorflow::Status status(tensorflow::error::Code::INTERNAL, "You need both a cluster and an item to determine peak memory usage"); tensorflow::Set_TF_Status_from_Status(out_status, status); @@ -236,7 +262,7 @@ static PyObject* TF_DeterminePeakMemoryUsage( tensorflow::Status status; if (cluster->DetailedStatsEnabled()) { - status = memory.InferDynamically(cluster); + status = memory.InferDynamically(cluster.get()); } else { status = memory.InferStatically(cluster->GetDevices()); } @@ -274,18 +300,17 @@ static PyObject* TF_DeterminePeakMemoryUsage( %} // Wrap these functions. - -static tensorflow::grappler::Cluster* TF_NewCluster( +static GCluster TF_NewCluster( bool allow_soft_placement, bool disable_detailed_stats, TF_Status* out_status); -static tensorflow::grappler::Cluster* TF_NewVirtualCluster( +static GCluster TF_NewVirtualCluster( const std::vector& named_devices, TF_Status* out_status); -static void TF_DeleteCluster(tensorflow::grappler::Cluster* cluster); -static PyObject* TF_ListDevices(tensorflow::grappler::Cluster* cluster); +static void TF_ShutdownCluster(GCluster cluster); +static PyObject* TF_ListDevices(GCluster cluster); static PyObject* TF_MeasureCosts( - const tensorflow::grappler::GrapplerItem* item, tensorflow::grappler::Cluster* cluster, + GItem item, GCluster cluster, bool generate_timeline, TF_Status* out_status); static PyObject* TF_DeterminePeakMemoryUsage( - const tensorflow::grappler::GrapplerItem* item, tensorflow::grappler::Cluster* cluster, + GItem item, GCluster cluster, TF_Status* out_status); diff --git a/tensorflow/python/grappler/cluster.py b/tensorflow/python/grappler/cluster.py index 496f5255b9..cf795fddb7 100644 --- a/tensorflow/python/grappler/cluster.py +++ b/tensorflow/python/grappler/cluster.py @@ -46,6 +46,7 @@ class Cluster(object): the local machine. """ self._tf_cluster = None + self._generate_timeline = not disable_timeline with errors.raise_exception_on_not_ok_status() as status: if devices is None: self._tf_cluster = tf_cluster.TF_NewCluster( @@ -54,11 +55,10 @@ class Cluster(object): devices_serialized = [device.SerializeToString() for device in devices] self._tf_cluster = tf_cluster.TF_NewVirtualCluster( devices_serialized, status) - self._generate_timeline = not disable_timeline def __del__(self): if self._tf_cluster is not None: - tf_cluster.TF_DeleteCluster(self._tf_cluster) + tf_cluster.TF_ShutdownCluster(self._tf_cluster) @property def tf_cluster(self): diff --git a/tensorflow/python/grappler/cost_analyzer.i b/tensorflow/python/grappler/cost_analyzer.i index 0318ff762c..4c0953435b 100644 --- a/tensorflow/python/grappler/cost_analyzer.i +++ b/tensorflow/python/grappler/cost_analyzer.i @@ -15,6 +15,7 @@ limitations under the License. %include "tensorflow/python/lib/core/strings.i" %include "tensorflow/python/platform/base.i" +%include "cluster.i" %typemap(in) const tensorflow::MetaGraphDef& (tensorflow::MetaGraphDef temp) { char* c_string; @@ -42,8 +43,8 @@ limitations under the License. %} %{ -string GenerateCostReport(const tensorflow::MetaGraphDef& metagraph, bool -per_node_report, tensorflow::grappler::Cluster* cluster) { +string GenerateCostReport(const tensorflow::MetaGraphDef& metagraph, bool per_node_report, + GCluster cluster) { tensorflow::grappler::ItemConfig cfg; cfg.apply_optimizations = false; std::unique_ptr item = @@ -53,7 +54,7 @@ per_node_report, tensorflow::grappler::Cluster* cluster) { } string suffix; - tensorflow::grappler::CostAnalyzer analyzer(*item, cluster, suffix); + tensorflow::grappler::CostAnalyzer analyzer(*item, cluster.get(), suffix); std::stringstream os; analyzer.GenerateReport(os, per_node_report); @@ -62,5 +63,5 @@ per_node_report, tensorflow::grappler::Cluster* cluster) { %} -string GenerateCostReport(const tensorflow::MetaGraphDef& metagraph, bool - per_node_report, tensorflow::grappler::Cluster* cluster); +string GenerateCostReport(const tensorflow::MetaGraphDef& metagraph, bool per_node_report, + GCluster cluster); diff --git a/tensorflow/python/grappler/item.i b/tensorflow/python/grappler/item.i index 2fa502b81d..7dd79f7c82 100644 --- a/tensorflow/python/grappler/item.i +++ b/tensorflow/python/grappler/item.i @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +%include %typemap(in) const tensorflow::MetaGraphDef& (tensorflow::MetaGraphDef temp) { char* c_string; Py_ssize_t py_size; @@ -30,7 +31,12 @@ limitations under the License. $1 = &temp; } -%newobject TF_NewItem; +// Wrap the item into an object that swig can manipulate. This ensures it will call the object +// destructor upon garbage collection instead of leaking memory. +struct GItem { + std::shared_ptr item_; +}; + %{ #include @@ -42,8 +48,26 @@ limitations under the License. #include "tensorflow/core/lib/core/error_codes.pb.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/protobuf/meta_graph.pb.h" +#include "tensorflow/core/lib/strings/strcat.h" + +// Provide the implementation fo the GItem struct here. +struct GItem { + GItem() {} + GItem(tensorflow::grappler::GrapplerItem* item) : item_(item) {} + + tensorflow::grappler::GrapplerItem* operator->() const { + return item_.get(); + } + const tensorflow::grappler::GrapplerItem& operator*() const { + return *item_.get(); + } + bool is_none() const { + return item_.get() == nullptr; + } + std::shared_ptr item_; +}; -static tensorflow::grappler::GrapplerItem* TF_NewItem( +static GItem TF_NewItem( const tensorflow::MetaGraphDef& meta_graph, bool ignore_colocation, bool ignore_user_placement, TF_Status* out_status) { if (meta_graph.collection_def().count("train_op") == 0) { @@ -65,11 +89,11 @@ static tensorflow::grappler::GrapplerItem* TF_NewItem( return nullptr; } tensorflow::Set_TF_Status_from_Status(out_status, tensorflow::Status::OK()); - return item.release(); + return GItem(item.release()); } -static std::vector TF_IdentifyImportantOps(const tensorflow::grappler::GrapplerItem* item) { - if (!item) { +static std::vector TF_IdentifyImportantOps(GItem item) { + if (item.is_none()) { return {}; } @@ -91,8 +115,8 @@ static std::vector TF_IdentifyImportantOps(const tensorflow::grappler::G return ops; } -static PyObject* TF_GetOpProperties(const tensorflow::grappler::GrapplerItem* item) { - if (!item) { +static PyObject* TF_GetOpProperties(GItem item) { + if (item.is_none()) { Py_RETURN_NONE; } tensorflow::grappler::GraphProperties properties(*item); @@ -125,8 +149,8 @@ static PyObject* TF_GetOpProperties(const tensorflow::grappler::GrapplerItem* it // Wrap these functions. -static tensorflow::grappler::GrapplerItem* TF_NewItem( +static GItem TF_NewItem( const tensorflow::MetaGraphDef& meta_graph, bool ignore_colocation, bool ignore_user_placement, TF_Status* out_status); -static std::vector TF_IdentifyImportantOps(const tensorflow::grappler::GrapplerItem* item); -static PyObject* TF_GetOpProperties(const tensorflow::grappler::GrapplerItem* item); +static std::vector TF_IdentifyImportantOps(GItem item); +static PyObject* TF_GetOpProperties(GItem item); diff --git a/tensorflow/python/grappler/tf_optimizer.i b/tensorflow/python/grappler/tf_optimizer.i index 3965c65bb9..f0dd4483a6 100644 --- a/tensorflow/python/grappler/tf_optimizer.i +++ b/tensorflow/python/grappler/tf_optimizer.i @@ -15,6 +15,7 @@ limitations under the License. %include "tensorflow/python/platform/base.i" +%include "cluster.i" %typemap(in) const tensorflow::MetaGraphDef& (tensorflow::MetaGraphDef temp) { char* c_string; @@ -92,7 +93,7 @@ void DetectDevices(std::unordered_map* dev } PyObject* TF_OptimizeGraph( - tensorflow::grappler::Cluster* cluster, + GCluster cluster, const tensorflow::RewriterConfig& rewriter_config, const tensorflow::MetaGraphDef& metagraph, bool verbose, const string& graph_id, TF_Status* out_status) { @@ -102,17 +103,10 @@ PyObject* TF_OptimizeGraph( std::unique_ptr grappler_item = tensorflow::grappler::GrapplerItemFromMetaGraphDef(graph_id, metagraph, item_config); - std::unique_ptr virtual_cluster; - if (cluster == nullptr) { - std::unordered_map device_map; - DetectDevices(&device_map); - virtual_cluster.reset(new tensorflow::grappler::VirtualCluster(device_map)); - cluster = virtual_cluster.get(); - } tensorflow::DeviceBase* cpu_device = nullptr; tensorflow::GraphDef out_graph; tensorflow::grappler::MetaOptimizer optimizer(cpu_device, rewriter_config); - tensorflow::Status status = optimizer.Optimize(cluster, *grappler_item, &out_graph); + tensorflow::Status status = optimizer.Optimize(cluster.get(), *grappler_item, &out_graph); if (verbose) { optimizer.PrintResult(); } @@ -127,7 +121,7 @@ PyObject* TF_OptimizeGraph( // Wrap this function PyObject* TF_OptimizeGraph( - tensorflow::grappler::Cluster* cluster, + GCluster cluster, const tensorflow::RewriterConfig& rewriter_config, const tensorflow::MetaGraphDef& metagraph, bool verbose, const string& graph_id, TF_Status* out_status); diff --git a/tensorflow/python/grappler/tf_optimizer.py b/tensorflow/python/grappler/tf_optimizer.py index d430dd9e2f..a73a4a98fc 100644 --- a/tensorflow/python/grappler/tf_optimizer.py +++ b/tensorflow/python/grappler/tf_optimizer.py @@ -21,6 +21,7 @@ from __future__ import print_function from tensorflow.core.framework import graph_pb2 from tensorflow.python import pywrap_tensorflow as tf_opt from tensorflow.python.framework import errors +from tensorflow.python.grappler import cluster as gcluster def OptimizeGraph(rewriter_config, @@ -30,8 +31,9 @@ def OptimizeGraph(rewriter_config, cluster=None): """Optimize the provided metagraph.""" with errors.raise_exception_on_not_ok_status() as status: - ret_from_swig = tf_opt.TF_OptimizeGraph(None if cluster is None else - cluster.tf_cluster, + if cluster is None: + cluster = gcluster.Cluster() + ret_from_swig = tf_opt.TF_OptimizeGraph(cluster.tf_cluster, rewriter_config.SerializeToString(), metagraph.SerializeToString(), verbose, graph_id, status) -- GitLab From b76620aed0c02d01a823df57e06a67bc4c1424c0 Mon Sep 17 00:00:00 2001 From: James Keeling Date: Wed, 22 Nov 2017 17:58:34 -0800 Subject: [PATCH 0807/1801] Default to previously specified variables when minimizing with KfacOptimizer If no variables are specified to minimize or compute_gradients, the default was previously to use all trainable variables. However, KfacOptimizer has a list of variables it is able to train, so we should use that instead. PiperOrigin-RevId: 176720954 --- .../contrib/kfac/python/ops/optimizer.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/kfac/python/ops/optimizer.py b/tensorflow/contrib/kfac/python/ops/optimizer.py index 98f8e7b230..ecf7f3e4e5 100644 --- a/tensorflow/contrib/kfac/python/ops/optimizer.py +++ b/tensorflow/contrib/kfac/python/ops/optimizer.py @@ -151,16 +151,24 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): return self._fisher_est.damping def minimize(self, *args, **kwargs): - - if "var_list" not in kwargs: - kwargs["var_list"] = tf_variables.trainable_variables() - + kwargs["var_list"] = kwargs.get("var_list") or self.variables if set(kwargs["var_list"]) != set(self.variables): raise ValueError("var_list doesn't match with set of Fisher-estimating " "variables.") - return super(KfacOptimizer, self).minimize(*args, **kwargs) + def compute_gradients(self, *args, **kwargs): + # args[1] could be our var_list + if len(args) > 1: + var_list = args[1] + else: + kwargs["var_list"] = kwargs.get("var_list") or self.variables + var_list = kwargs["var_list"] + if set(var_list) != set(self.variables): + raise ValueError("var_list doesn't match with set of Fisher-estimating " + "variables.") + return super(KfacOptimizer, self).compute_gradients(*args, **kwargs) + def apply_gradients(self, grads_and_vars, *args, **kwargs): """Applies gradients to variables. -- GitLab From c714df4c87466e632be4c78f8f55a2ffe47fef62 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 22 Nov 2017 18:03:05 -0800 Subject: [PATCH 0808/1801] Remove duplicate op registration. --- tensorflow/core/kernels/depthwise_conv_op.cc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc index 30ecd0c2ba..2759ecb2f1 100644 --- a/tensorflow/core/kernels/depthwise_conv_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_op.cc @@ -427,11 +427,6 @@ TF_CALL_double(REGISTER_CPU_KERNEL); #endif #if GOOGLE_CUDA -REGISTER_KERNEL_BUILDER(Name("DepthwiseConv2dNative") - .Device(DEVICE_GPU) - .TypeConstraint("T"), - DepthwiseConv2dNativeOp); - REGISTER_KERNEL_BUILDER( Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint("T"), DepthwiseConv2dNativeOp); -- GitLab From ebd26397ab708242d22880f789b168eb16897691 Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Wed, 22 Nov 2017 18:22:07 -0800 Subject: [PATCH 0809/1801] Do not convert layout for FusedBatchNormGrad if is_training is false (freeze mode), since NCHW is not supported on GPU in this case. PiperOrigin-RevId: 176722850 --- .../grappler/optimizers/layout_optimizer.cc | 14 +++++ .../optimizers/layout_optimizer_test.cc | 58 +++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc index 31c3ba6863..d25d9d99c5 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc @@ -714,10 +714,24 @@ class FusedBatchNormGradProcessor : public NodeProcessor { : NodeProcessor(opt_cxt) {} protected: + bool ShouldProcess() const override { + return NodeProcessor::ShouldProcess() && IsTraining(); + } + std::vector GetInputPos() const override { std::vector input_pos = {0, 1}; return input_pos; } + + private: + bool IsTraining() const { + if (node_->attr().find("is_training") != node_->attr().end()) { + if (node_->attr().at("is_training").b()) { + return true; + } + } + return false; + } }; class MaxPoolGradProcessor : public NodeProcessor { diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc index d4ab42ad60..20a971629c 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc @@ -114,6 +114,36 @@ class LayoutOptimizerTest : public ::testing::Test { return tensor; } + Output SimpleFusedBatchNormGrad(tensorflow::Scope* s, bool is_training) { + int batch_size = 16; + int input_height = 8; + int input_width = 8; + int input_channels = 3; + TensorShape shape({batch_size, input_height, input_width, input_channels}); + Tensor data(DT_FLOAT, shape); + test::FillIota(&data, 1.0f); + Output x = ops::Const(s->WithOpName("Input"), Input::Initializer(data)); + Output y_backprop = + ops::Const(s->WithOpName("YBackprop"), Input::Initializer(data)); + + TensorShape shape_vector({input_channels}); + Tensor data_vector(DT_FLOAT, shape_vector); + test::FillIota(&data_vector, 2.0f); + Output scale = + ops::Const(s->WithOpName("Scale"), Input::Initializer(data_vector)); + Output reserve1 = + ops::Const(s->WithOpName("Reserve1"), Input::Initializer(data_vector)); + Output reserve2 = + ops::Const(s->WithOpName("Reserve2"), Input::Initializer(data_vector)); + + ops::FusedBatchNormGrad::Attrs attrs; + attrs.is_training_ = is_training; + auto output = + ops::FusedBatchNormGrad(s->WithOpName("FusedBatchNormGrad"), y_backprop, + x, scale, reserve1, reserve2, attrs); + return output.x_backprop; + } + std::unique_ptr virtual_cluster_; }; @@ -341,6 +371,34 @@ TEST_F(LayoutOptimizerTest, CPUDeviceUppercase) { EXPECT_EQ(conv_node->attr().at({"data_format"}).s(), "NHWC"); } +TEST_F(LayoutOptimizerTest, FusedBatchNormGradTrainingTrue) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto x_backprop = SimpleFusedBatchNormGrad(&s, true); + Output fetch = ops::Identity(s.WithOpName("Fetch"), {x_backprop}); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto conv_node = node_map.GetNode("FusedBatchNormGrad"); + EXPECT_EQ(conv_node->attr().at({"data_format"}).s(), "NCHW"); +} + +TEST_F(LayoutOptimizerTest, FusedBatchNormGradTrainingFalse) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto x_backprop = SimpleFusedBatchNormGrad(&s, false); + Output fetch = ops::Identity(s.WithOpName("Fetch"), {x_backprop}); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto conv_node = node_map.GetNode("FusedBatchNormGrad"); + EXPECT_EQ(conv_node->attr().at({"data_format"}).s(), "NHWC"); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 622a6ec6dc79c458aac03dafffe0f0fef48e9c01 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Wed, 22 Nov 2017 18:30:37 -0800 Subject: [PATCH 0810/1801] Replace assertAlmostEqual with assertAllClose in boosted_trees losses_test.py Calling assertAlmostEqual() with the places kwarg on numpy.ndarray leads to calling __round__ on numpy.ndarray, which is no consistently defined for all relevant platforms and numpy versions. PiperOrigin-RevId: 176723366 --- .../boosted_trees/python/utils/losses_test.py | 35 +++++++------------ 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/python/utils/losses_test.py b/tensorflow/contrib/boosted_trees/python/utils/losses_test.py index dde1642686..ccb8509c03 100644 --- a/tensorflow/contrib/boosted_trees/python/utils/losses_test.py +++ b/tensorflow/contrib/boosted_trees/python/utils/losses_test.py @@ -18,8 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import math - import numpy as np from tensorflow.contrib.boosted_trees.python.utils import losses @@ -60,35 +58,27 @@ class LossesTest(test_util.TensorFlowTestCase): neg_loss = loss_for_negatives.eval() # For positive labels, points <= 0.3 get max loss of e. # For negative labels, these points have minimum loss of 1/e. - for i in range(2): - self.assertAlmostEqual(math.exp(1), pos_loss[i], places=4) - self.assertAlmostEqual(math.exp(-1), neg_loss[i], places=4) + self.assertAllClose(np.exp(np.ones([2, 1])), pos_loss[:2], atol=1e-4) + self.assertAllClose(np.exp(-np.ones([2, 1])), neg_loss[:2], atol=1e-4) # For positive lables, p oints with predictions 0.7 and larger get minimum # loss value of 1/e. For negative labels, these points are wrongly # classified and get loss e. - for i in range(6, 10): - self.assertAlmostEqual(math.exp(-1), pos_loss[i], places=4) - self.assertAlmostEqual(math.exp(1), neg_loss[i], places=4) + self.assertAllClose(np.exp(-np.ones([4, 1])), pos_loss[6:10], atol=1e-4) + self.assertAllClose(np.exp(np.ones([4, 1])), neg_loss[6:10], atol=1e-4) # Points in between 0.5-eps, 0..5+eps get loss exp(-label_m*y), where # y = 1/eps *x -1/(2eps), where x is the probability and label_m is either # 1 or -1 (for label of 0). - for i in range(2, 6): - self.assertAlmostEqual( - math.exp(-1.0 * (predictions_probs[i] * 1.0 / eps - 0.5 / eps)), - pos_loss[i], - places=4) - self.assertAlmostEqual( - math.exp(1.0 * (predictions_probs[i] * 1.0 / eps - 0.5 / eps)), - neg_loss[i], - places=4) + self.assertAllClose( + np.exp(-(predictions_probs[2:6] * 1.0 / eps - 0.5 / eps)), + pos_loss[2:6], atol=1e-4) + self.assertAllClose( + np.exp(predictions_probs[2:6] * 1.0 / eps - 0.5 / eps), + neg_loss[2:6], atol=1e-4) def test_per_example_squared_loss(self): - def _squared_loss(p, y): - return np.mean(1.0 * (p - y) * (p - y)) - labels = np.array([[0.123], [224.2], [-3], [2], [.3]], dtype=np.float32) weights = array_ops.ones([5, 1], dtypes.float32) predictions = np.array( @@ -99,9 +89,8 @@ class LossesTest(test_util.TensorFlowTestCase): predictions) loss = loss_tensor.eval() - for i in range(5): - self.assertAlmostEqual( - _squared_loss(labels[i], predictions[i]), loss[i], places=4) + self.assertAllClose( + np.square(labels[:5] - predictions[:5]), loss[:5], atol=1e-4) if __name__ == "__main__": -- GitLab From 34a69568752ef8badbe6aab5d1f568821c19e19c Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Wed, 22 Nov 2017 19:13:54 -0800 Subject: [PATCH 0811/1801] Fix flaky test. PiperOrigin-RevId: 176725659 --- tensorflow/contrib/data/python/kernel_tests/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 995ce6d654..c017cd9c77 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -331,7 +331,7 @@ py_test( py_test( name = "reader_dataset_ops_test", - size = "small", + size = "medium", srcs = ["reader_dataset_ops_test.py"], srcs_version = "PY2AND3", deps = [ -- GitLab From 059e35acc985e99e522ffe89df12cd357871309b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Nov 2017 19:28:47 -0800 Subject: [PATCH 0812/1801] Minor cleanup - replace users()[0] with users->front(). PiperOrigin-RevId: 176726299 --- tensorflow/compiler/xla/service/hlo_verifier.cc | 4 ++-- tensorflow/compiler/xla/service/while_loop_simplifier.cc | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index f2a739c1e2..15188c4057 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -283,7 +283,7 @@ class ShapeVerifier : public DfsHloVisitor { Status HandleSend(HloInstruction* send) override { TF_RET_CHECK(send->users().size() == 1); - const HloInstruction* send_done = send->users()[0]; + const HloInstruction* send_done = send->users().front(); TF_RET_CHECK(send_done->opcode() == HloOpcode::kSendDone); TF_RETURN_IF_ERROR(CheckSameChannel(send, send_done)); return CheckShape( @@ -301,7 +301,7 @@ class ShapeVerifier : public DfsHloVisitor { Status HandleRecv(HloInstruction* recv) override { TF_RET_CHECK(recv->users().size() == 1); - const HloInstruction* recv_done = recv->users()[0]; + const HloInstruction* recv_done = recv->users().front(); TF_RET_CHECK(recv_done->opcode() == HloOpcode::kRecvDone); TF_RETURN_IF_ERROR(CheckSameChannel(recv, recv_done)); return CheckShape(recv, diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc index 8f335be794..b38ee907d7 100644 --- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc +++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc @@ -342,7 +342,7 @@ static StatusOr TryRemoveDeadWhileParams(HloInstruction* while_op) { // // Careful: HloInstruction::operand_index returns the first index the // operand appears in, but it may appear more than once! - if (user->user_count() == 1 && user->users()[0] == while_body_root && + if (user->user_count() == 1 && user->users().front() == while_body_root && while_body_root->operand_index(user) == user->tuple_index() && std::count(while_body_root->operands().begin(), while_body_root->operands().end(), user) == 1) { @@ -444,7 +444,8 @@ static StatusOr TryRemoveDeadWhileParams(HloInstruction* while_op) { // This is a GTE of an index that we've removed. Remove it from the // cloned computation. CHECK(user->user_count() == 0 || - user->user_count() == 1 && user->users()[0] == while_body_root) + user->user_count() == 1 && + user->users().front() == while_body_root) << "Instruction " << user->ToStringNoMetadata() << " should be unused (except by root of while body), but has " "users: {" -- GitLab From d73e8b36d1332723f5819d07f8c44e88c49c7cec Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Wed, 22 Nov 2017 21:20:41 -0800 Subject: [PATCH 0813/1801] Make PrefetchDataset saveable. PiperOrigin-RevId: 176732156 --- .../contrib/data/python/kernel_tests/BUILD | 12 ++ .../kernel_tests/prefetch_dataset_op_test.py | 39 +++++ tensorflow/core/kernels/BUILD | 1 + .../core/kernels/prefetch_dataset_op.cc | 141 +++++++++++++++++- 4 files changed, 186 insertions(+), 7 deletions(-) create mode 100644 tensorflow/contrib/data/python/kernel_tests/prefetch_dataset_op_test.py diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index c017cd9c77..3280f1fc35 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -303,6 +303,18 @@ py_test( ], ) +py_test( + name = "prefetch_dataset_op_test", + size = "small", + srcs = ["prefetch_dataset_op_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":dataset_serialization_test", + "//tensorflow/python:platform", + "//tensorflow/python/data/ops:dataset_ops", + ], +) + py_test( name = "range_dataset_op_test", size = "small", diff --git a/tensorflow/contrib/data/python/kernel_tests/prefetch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/prefetch_dataset_op_test.py new file mode 100644 index 0000000000..3d120a3071 --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/prefetch_dataset_op_test.py @@ -0,0 +1,39 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the experimental input pipeline ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.platform import test + + +class PrefetchDatasetSerializationTest( + dataset_serialization_test_base.DatasetSerializationTestBase): + + def build_dataset(self, seed): + return dataset_ops.Dataset.range(100).prefetch(10).shuffle( + buffer_size=10, seed=seed, reshuffle_each_iteration=False) + + def testCore(self): + num_outputs = 100 + self.run_core_tests(lambda: self.build_dataset(10), + lambda: self.build_dataset(20), num_outputs) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index b4a5a3c796..b86739eea7 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -6054,6 +6054,7 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", ], ) diff --git a/tensorflow/core/kernels/prefetch_dataset_op.cc b/tensorflow/core/kernels/prefetch_dataset_op.cc index 80592aa353..1a6b7e078e 100644 --- a/tensorflow/core/kernels/prefetch_dataset_op.cc +++ b/tensorflow/core/kernels/prefetch_dataset_op.cc @@ -14,9 +14,10 @@ limitations under the License. ==============================================================================*/ #include -#include "tensorflow/core/kernels/dataset.h" #include "tensorflow/core/framework/partial_tensor_shape.h" #include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/dataset.h" +#include "tensorflow/core/lib/core/error_codes.pb.h" namespace tensorflow { @@ -37,14 +38,14 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel { OP_REQUIRES_OK( ctx, ParseScalarArgument(ctx, "buffer_size", &buffer_size)); - *output = new Dataset(input, buffer_size); + *output = new Dataset(ctx, input, buffer_size); } private: - class Dataset : public DatasetBase { + class Dataset : public GraphDatasetBase { public: - Dataset(const DatasetBase* input, int64 buffer_size) - : input_(input), buffer_size_(buffer_size) { + Dataset(OpKernelContext* ctx, const DatasetBase* input, int64 buffer_size) + : GraphDatasetBase(ctx), input_(input), buffer_size_(buffer_size) { input_->Ref(); } @@ -65,6 +66,18 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel { string DebugString() override { return "PrefetchDatasetOp::Dataset"; } + protected: + Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, + Node** output) const override { + Node* input_graph_node = nullptr; + TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node)); + Node* buffer_size = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(buffer_size_, &buffer_size)); + TF_RETURN_IF_ERROR( + b->AddDataset(this, {input_graph_node, buffer_size}, output)); + return Status::OK(); + } + private: class Iterator : public DatasetIterator { public: @@ -119,7 +132,10 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel { // Wake the prefetch thread, in case it has been waiting // for space in the buffer. - cond_var_.notify_one(); + // Also wake up threads from other calls to GetNext. + // TODO(mrry): Consider using different condition variables + // for GetNext and Prefetch. + cond_var_.notify_all(); return s; } else if (prefetch_thread_finished_) { *end_of_sequence = true; @@ -128,6 +144,69 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel { } } + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + // Acquire both locks to ensure that the prefetch thread and + // all GetNext threads are blocked. + mutex_lock parent_l(parent_mu_); + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("buffer_size"), buffer_.size())); + for (size_t i = 0; i < buffer_.size(); i++) { + auto& buffer_element = buffer_[i]; + TF_RETURN_IF_ERROR(WriteStatus(writer, i, buffer_element.status)); + if (buffer_element.status.ok()) { + TF_RETURN_IF_ERROR(writer->WriteScalar( + full_name(strings::StrCat("buffer[", i, "].size")), + buffer_element.value.size())); + for (size_t j = 0; j < buffer_element.value.size(); j++) { + TF_RETURN_IF_ERROR(writer->WriteTensor( + strings::StrCat("buffer[", i, "][", j, "]"), + buffer_element.value[j])); + } + } + } + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + mutex_lock parent_l(parent_mu_); + mutex_lock l(mu_); + buffer_.clear(); + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + size_t buffer_size; + { + int64 temp; + TF_RETURN_IF_ERROR( + reader->ReadScalar(full_name("buffer_size"), &temp)); + buffer_size = static_cast(temp); + } + for (size_t i = 0; i < buffer_size; i++) { + buffer_.emplace_back(); + auto& buffer_element = buffer_.back(); + TF_RETURN_IF_ERROR(ReadStatus(reader, i, &buffer_element.status)); + if (buffer_element.status.ok()) { + size_t value_size; + { + int64 temp; + TF_RETURN_IF_ERROR(reader->ReadScalar( + full_name(strings::StrCat("buffer[", i, "].size")), &temp)); + value_size = static_cast(temp); + } + buffer_element.value.reserve(value_size); + for (size_t j = 0; j < value_size; j++) { + buffer_element.value.emplace_back(); + TF_RETURN_IF_ERROR(reader->ReadTensor( + strings::StrCat("buffer[", i, "][", j, "]"), + &buffer_element.value.back())); + } + } + } + return Status::OK(); + } + private: // A buffer element comprises a status and (if that status is // OK) a vector of tensors, representing an element of the input dataset. @@ -171,6 +250,12 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel { } // 2. Read the next element. + // Acquire the parent lock since we will be reading an element + // from the input iterator. Note that we do not wish to release + // this lock till we have added the fetched element to the + // `buffer_` else there will be local state that may be missed + // by SaveInternal. + mutex_lock parent_l(parent_mu_); bool end_of_sequence; BufferElement buffer_element; buffer_element.status = input_impl_->GetNext( @@ -191,8 +276,50 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel { } } + Status WriteStatus(IteratorStateWriter* writer, size_t index, + const Status& status) EXCLUSIVE_LOCKS_REQUIRED(mu_) { + TF_RETURN_IF_ERROR(writer->WriteScalar( + CodeKey(index), static_cast(status.code()))); + if (!status.ok()) { + TF_RETURN_IF_ERROR(writer->WriteScalar(ErrorMessageKey(index), + status.error_message())); + } + return Status::OK(); + } + + Status ReadStatus(IteratorStateReader* reader, size_t index, + Status* status) EXCLUSIVE_LOCKS_REQUIRED(mu_) { + int64 code_int; + TF_RETURN_IF_ERROR(reader->ReadScalar(CodeKey(index), &code_int)); + error::Code code = static_cast(code_int); + + if (code != error::Code::OK) { + string error_message; + TF_RETURN_IF_ERROR( + reader->ReadScalar(ErrorMessageKey(index), &error_message)); + *status = Status(code, error_message); + } else { + *status = Status::OK(); + } + return Status::OK(); + } + + string CodeKey(size_t index) { + return full_name(strings::StrCat("status[", index, "].code")); + } + + string ErrorMessageKey(size_t index) { + return full_name(strings::StrCat("status[", index, "].error_message")); + } + + // This mutex is used to ensure exclusivity between multiple threads + // reading/writing this iterator's local state. mutex mu_; - const std::unique_ptr input_impl_; + // This mutex is used to ensure exclusivity between multiple threads + // accessing the parent iterator. We keep this separate from `mu_` to + // allow prefetching to run in parallel with GetNext calls. + mutex parent_mu_ ACQUIRED_BEFORE(mu_); + const std::unique_ptr input_impl_ GUARDED_BY(parent_mu_); condition_variable cond_var_; std::deque buffer_ GUARDED_BY(mu_); std::unique_ptr prefetch_thread_ GUARDED_BY(mu_); -- GitLab From 3dbcc9485e11d5a0643baf1cfc9ccd2e6d407c06 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Nov 2017 23:19:07 -0800 Subject: [PATCH 0814/1801] Registers StridedSlice for int64 tensor on GPUs PiperOrigin-RevId: 176737730 --- tensorflow/core/kernels/strided_slice_op.cc | 1 + tensorflow/core/kernels/strided_slice_op_gpu.cu.cc | 1 + 2 files changed, 2 insertions(+) diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc index 8fc40db3cc..73b6d4cf6a 100644 --- a/tensorflow/core/kernels/strided_slice_op.cc +++ b/tensorflow/core/kernels/strided_slice_op.cc @@ -427,6 +427,7 @@ REGISTER_STRIDED_SLICE(bfloat16); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); +TF_CALL_int64(REGISTER_GPU); // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel diff --git a/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc b/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc index a8487f49f4..8ca27e3b92 100644 --- a/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc +++ b/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc @@ -53,6 +53,7 @@ typedef Eigen::GpuDevice GPUDevice; TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); TF_CALL_complex64(DEFINE_GPU_KERNELS); TF_CALL_complex128(DEFINE_GPU_KERNELS); +TF_CALL_int64(DEFINE_GPU_KERNELS); DEFINE_GPU_KERNELS(int32); #undef DEFINE_GPU_KERNELS -- GitLab From 689bfee5f781d9645f1a415c4c2341119b865a66 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Nov 2017 09:51:34 -0800 Subject: [PATCH 0815/1801] Update TFGAN Readme to link to examples. PiperOrigin-RevId: 176779769 --- tensorflow/contrib/gan/README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/gan/README.md b/tensorflow/contrib/gan/README.md index 3ab8478070..4bca0a1d62 100644 --- a/tensorflow/contrib/gan/README.md +++ b/tensorflow/contrib/gan/README.md @@ -8,7 +8,8 @@ explicitly model the distribution and without writing an explicit loss. For example, the generator could learn to draw samples from the distribution of natural images. For more details on this technique, see ['Generative Adversarial Networks'](https://arxiv.org/abs/1406.2661) by -Goodfellow et al. +Goodfellow et al. See [tensorflow/models](https://github.com/tensorflow/models/tree/master/research/gan/) for examples, and [this tutorial](https://github.com/tensorflow/models/tree/master/research/gan/tutorial.ipynb) for an +introduction. #### Usage ```python @@ -23,8 +24,8 @@ mix TFGAN, native TF, and other custom frameworks * Use already implemented [GAN losses and penalties](https://www.tensorflow.org/code/tensorflow/contrib/gan/python/losses/python/losses_impl.py) (ex Wasserstein loss, gradient penalty, mutual information penalty, etc) * [Monitor and visualize](https://www.tensorflow.org/code/tensorflow/contrib/gan/python/eval/python/summaries_impl.py) GAN progress during training, and [evaluate](https://www.tensorflow.org/code/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py) them * Use already-implemented [tricks](https://www.tensorflow.org/code/tensorflow/contrib/gan/python/features/python/) to stabilize and improve training -* Develop based on examples of common GAN setups -* Use the TFGAN-backed tf.Learn Estimator to easily train a GAN model +* Develop based on examples of [common GAN setups](https://github.com/tensorflow/models/tree/master/research/gan/) +* Use the TFGAN-backed [GANEstimator](https://www.tensorflow.org/code/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py) to easily train a GAN model * Improvements in TFGAN infrastructure will automatically benefit your TFGAN project * Stay up-to-date with research as we add more algorithms @@ -51,7 +52,7 @@ network to evaluate your unconditional generative model. You can also use your own pretrained classifier for more specific performance numbers, or use other methods for evaluating conditional generative models. -* examples (coming soon): +* [examples](https://github.com/tensorflow/models/tree/master/research/gan/) and [tutorial](https://github.com/tensorflow/models/tree/master/research/gan/tutorial.ipynb): See examples of how to use TFGAN to make GAN training easier, or use the more complicated examples to jumpstart your own project. These include unconditional and conditional GANs, InfoGANs, adversarial losses on existing networks, and image-to-image translation. -- GitLab From fc0b63edc0116f2df9847e3083247a4613bc0f26 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Nov 2017 11:20:22 -0800 Subject: [PATCH 0816/1801] Clean up RemoveTrivialPassthroughOp and fix an issue in an edge case where we were not erasing the correct arrays. PiperOrigin-RevId: 176784020 --- .../remove_trivial_passthrough.cc | 38 ++++++++++++++++--- .../remove_trivial_passthrough.h | 10 +++-- 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc index d998dcd9f3..047389f69a 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc @@ -63,19 +63,28 @@ bool RemoveTrivialPassthroughOp(GraphTransformation* transformation, main_input_array_index = i; } } - CHECK_LE(count_nonconstant_input_arrays, 1); const string main_input_name = passthru_op->inputs[main_input_array_index]; const string output_name = passthru_op->outputs[0]; + + // Build the list of all input and output arrays of the passthrough node + // that we are considering removing. Any of these arrays is a candidate + // for being removed as well, if nothing else references it. Doing that + // arrays-removal together with the passthrough-node-removal proved too + // error-prone. + std::vector removal_candidates; + for (const string& input : passthru_op->inputs) { + removal_candidates.push_back(input); + } + removal_candidates.push_back(output_name); + if (IsDiscardableArray(*model, output_name)) { transformation->AddMessageF( "Removing %s, keeping its non-constant input array", LogName(*passthru_op)); - model->arrays.erase(output_name); for (const string& input : passthru_op->inputs) { if (IsDiscardableArray(*model, input) && input != main_input_name && CountOpsWithInput(*model, input) == 1) { - model->arrays.erase(input); } } RerouteEdges(output_name, main_input_name, model); @@ -85,13 +94,12 @@ bool RemoveTrivialPassthroughOp(GraphTransformation* transformation, for (const string& input : passthru_op->inputs) { if (IsDiscardableArray(*model, input) && (input == main_input_name || CountOpsWithInput(*model, input) == 1)) { - model->arrays.erase(input); } } RerouteEdges(main_input_name, output_name, model); } else { transformation->AddMessageF( - "Cannot remove %s, neither its nonconstant input nor its output may be " + "Cannot remove %s, neither its main input nor its output may be " "discarded", LogName(*passthru_op)); return false; @@ -100,6 +108,26 @@ bool RemoveTrivialPassthroughOp(GraphTransformation* transformation, // Remove the pass-through node. model->operators.erase(passthru_it); + // Remove any array that is no longer used. + for (const string& removal_candidate : removal_candidates) { + bool is_referenced = false; + for (const auto& op : model->operators) { + for (const string& input : op->inputs) { + if (input == removal_candidate) { + is_referenced = true; + } + } + for (const string& output : op->outputs) { + if (output == removal_candidate) { + is_referenced = true; + } + } + } + if (!is_referenced) { + model->arrays.erase(removal_candidate); + } + } + return true; } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h index b72c85c0e5..a06181ca0b 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h @@ -21,10 +21,12 @@ limitations under the License. namespace toco { // A "passthrough op" is an op that satisfies the following conditions: -// 1. It has at most one non-constant input (it may have other constant -// inputs). +// 1. One of its inputs is (per the semantics of that op) its "main input" +// for some notion of "main input" that is operator-specific; for example, +// for a Reshape op, the main input is the array being reshaped, not the +// other input which gives the new shape. // 2. It has exactly one output. -// 3. It forwards exactly its single non-constant input to its single output. +// 3. It forwards exactly its main input to its single output. // // Examples include: // 1. TensorFlow Identity ops. (Have one input). @@ -34,7 +36,7 @@ namespace toco { // where one of its inputs is a constant array filled with zeros. // // A passthrough op is "trivial" and can be removed when it is possible to -// discard either its single non-constant input or output array, rerouting any +// discard either its main input or output array, rerouting any // edge involving it to the other of these two arrays. // // It is only possible to discard such an array if it is not explicitly -- GitLab From 1d12f5d1d5aa282e503b58b41de253d1aa50fa25 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Nov 2017 11:22:10 -0800 Subject: [PATCH 0817/1801] An input that is used only by a RNN state should be counted as used. PiperOrigin-RevId: 176784134 --- tensorflow/contrib/lite/toco/tooling_util.cc | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index e8fa7a3423..3f289817e0 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -792,15 +792,19 @@ void FixOperatorOrdering(Model* model) { } // Checks that the --input_arrays of the Model are actually used by at least -// one of the --output_arrays i.e. that the graph contains a path from each one -// of the inputs to at least one of the outputs. This catches cases where the -// user passed the wrong --input_arrays or --output_arrays, which otherwise may -// result in cryptic error messages. -void CheckInputUsedByOutputs(const Model& model) { +// one of the --output_arrays or --rnn_states i.e. that the graph contains a +// path from each one of the inputs to at least one of the outputs or RNN +// states. This catches cases where the user passed the wrong --input_arrays or +// --output_arrays or --rnn_states, which otherwise may result in cryptic error +// messages. +void CheckInputsActuallyUsed(const Model& model) { std::set used_arrays; for (const string& output : model.flags.output_arrays()) { used_arrays.insert(output); } + for (const auto& rnn_state : model.flags.rnn_states()) { + used_arrays.insert(rnn_state.back_edge_source_array()); + } for (int i = model.operators.size() - 1; i >= 0; i--) { bool is_op_used = false; for (const string& op_output : model.operators[i]->outputs) { @@ -832,7 +836,7 @@ void CheckInvariants(const Model& model) { CheckNoOrphanedArray(model); CheckArrayFieldsConsistent(model); CheckOperatorOrdering(model); - CheckInputUsedByOutputs(model); + CheckInputsActuallyUsed(model); } void CheckCountInRange(const ::toco::ModelFlags::ModelCheck& model_check, @@ -1087,9 +1091,7 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) { if (!input_array.has_shape()) { QCHECK(!input_array_proto.shape().empty()) << "This model does not have shape defined for input array " - << input_array_proto.name() - << ", so one must be specified by a non-empty --input_shape " - "command-line flag."; + << input_array_proto.name(); } // Compare/merge the model->flags describing the input_shape with -- GitLab From 22b011885def468471d5f9e1d544a187e672a76b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Nov 2017 11:24:05 -0800 Subject: [PATCH 0818/1801] Correctly use IsDiscardableArray in RemoveUnusedOp (fixes edge cases where an op is only affecting e.g. a RNN state and should not be treated as unused) PiperOrigin-RevId: 176784216 --- .../lite/toco/graph_transformations/remove_unused_op.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc index 1f1f1f6948..0ab301552f 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc @@ -88,7 +88,8 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) { // Remove any input array that is not used by anything else, // and that is not the output of some other operator. for (const auto& input : op->inputs) { - if (CountOpsWithInput(*model, input) == 1 && + if (IsDiscardableArray(*model, input) && + CountOpsWithInput(*model, input) == 1 && !GetOpWithOutput(*model, input)) { model->arrays.erase(input); } @@ -98,7 +99,7 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) { for (const auto& output : op->outputs) { // If the output array is the model's input array, don't remove that. // That's the case when cropping a model at a given --input_array. - if (IsInputArray(*model, output)) { + if (!IsDiscardableArray(*model, output)) { continue; } // Likewise, if the output array is a RNN state array, don't remove that. -- GitLab From b20ec5c461031f9375274cf026a7dfff0f903acc Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Thu, 23 Nov 2017 11:40:22 -0800 Subject: [PATCH 0819/1801] Revert "Only install enum34 on Python <3.4 versions" --- tensorflow/tools/pip_package/setup.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 3852b251d9..c18f20910a 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -33,12 +33,7 @@ _VERSION = '1.4.0' REQUIRED_PACKAGES = [ 'absl-py', - # weakref.finalize introduced in Python 3.4 - 'backports.weakref >= 1.0rc1; python_version < "3.4"', - # enum module introduced in Python 3.4 - 'enum34 >= 1.1.6; python_version < "3.4"', - # Needed for unittest.mock in Python 2 - 'mock >= 2.0.0; python_version < "3.0"', + 'enum34 >= 1.1.6', 'numpy >= 1.12.1', 'six >= 1.10.0', 'protobuf >= 3.4.0', @@ -57,6 +52,8 @@ if sys.version_info.major == 3: REQUIRED_PACKAGES.append('wheel >= 0.26') else: REQUIRED_PACKAGES.append('wheel') + # mock comes with unittest.mock for python3, need to install for python2 + REQUIRED_PACKAGES.append('mock >= 2.0.0') # tf-nightly should depend on tb-nightly if 'tf_nightly' in project_name: @@ -65,6 +62,10 @@ if 'tf_nightly' in project_name: REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.5.0a0, < 1.6.0a0' break +# weakref.finalize was introduced in Python 3.4 +if sys.version_info < (3, 4): + REQUIRED_PACKAGES.append('backports.weakref >= 1.0rc1') + # pylint: disable=line-too-long CONSOLE_SCRIPTS = [ 'freeze_graph = tensorflow.python.tools.freeze_graph:main', -- GitLab From 742ca716334886b78b7ccc4cbecb33ad13c27cd5 Mon Sep 17 00:00:00 2001 From: Shivani Agrawal Date: Thu, 23 Nov 2017 14:38:46 -0800 Subject: [PATCH 0820/1801] [tf.data] Saveable iterator for FilterDataset. PiperOrigin-RevId: 176791620 --- .../contrib/data/python/kernel_tests/BUILD | 1 + .../kernel_tests/filter_dataset_op_test.py | 38 +++++++++ tensorflow/core/graph/graph_def_builder.h | 4 + tensorflow/core/kernels/dataset.h | 5 +- tensorflow/core/kernels/filter_dataset_op.cc | 79 +++++++++++++++++-- 5 files changed, 118 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 3280f1fc35..3efe5274f4 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -143,6 +143,7 @@ py_test( srcs = ["filter_dataset_op_test.py"], srcs_version = "PY2AND3", deps = [ + ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", diff --git a/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py index 67c49d77e2..95724241ef 100644 --- a/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py @@ -19,6 +19,7 @@ from __future__ import print_function import numpy as np +from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base from tensorflow.contrib.data.python.ops import dataset_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -156,5 +157,42 @@ class FilterDatasetTest(test.TestCase): sess.run(get_next) +class FilterDatasetSerializationTest( + dataset_serialization_test_base.DatasetSerializationTestBase): + + def _build_filter_range_graph(self, div): + return dataset_ops.Dataset.range(100).filter( + lambda x: math_ops.not_equal(math_ops.mod(x, div), 2)) + + def testFilterCore(self): + div = 3 + num_outputs = np.sum([x % 3 is not 2 for x in range(100)]) + self.run_core_tests(lambda: self._build_filter_range_graph(div), + lambda: self._build_filter_range_graph(div * 2), + num_outputs) + + def _build_filter_dict_graph(self): + return dataset_ops.Dataset.range(10).map( + lambda x: {"foo": x * 2, "bar": x ** 2}).filter( + lambda d: math_ops.equal(d["bar"] % 2, 0)).map( + lambda d: d["foo"] + d["bar"]) + + def testFilterDictCore(self): + num_outputs = np.sum([(x**2) % 2 == 0 for x in range(10)]) + self.run_core_tests(self._build_filter_dict_graph, None, num_outputs) + + def _build_sparse_filter(self): + + def _map_fn(i): + return sparse_tensor.SparseTensor( + indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i + + def _filter_fn(_, i): + return math_ops.equal(i % 2, 0) + + return dataset_ops.Dataset.range(10).map(_map_fn).filter(_filter_fn).map( + lambda x, i: x) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/core/graph/graph_def_builder.h b/tensorflow/core/graph/graph_def_builder.h index b389cd8053..a2c0c4d553 100644 --- a/tensorflow/core/graph/graph_def_builder.h +++ b/tensorflow/core/graph/graph_def_builder.h @@ -99,6 +99,10 @@ class GraphDefBuilder { // Use this to skip processing that may depend on prior results. bool HaveError() const { return status_ != nullptr && !status_->ok(); } + // Returns a string representation of the status associated with *this. + // Returns the string `"OK"` if the status doesn't have any error. + string StatusToString() const { return status_->ToString(); } + // Given the Op type name, return a name for a node of that type. // Uses the value set in WithName() if that has been called. Otherwise, // returns a name built out of the Op type name. diff --git a/tensorflow/core/kernels/dataset.h b/tensorflow/core/kernels/dataset.h index 39c10163cf..afbebb0692 100644 --- a/tensorflow/core/kernels/dataset.h +++ b/tensorflow/core/kernels/dataset.h @@ -167,7 +167,8 @@ class GraphDefBuilderWrapper { opts->WithAttr(attr.first, attr.second))); } if (opts->HaveError()) { - return errors::Internal("AddDataset: Error building Options."); + return errors::Internal("AddDataset: Failed to build Options with error ", + opts->StatusToString()); } NodeBuilder node_builder(opts->GetNameForOp(op_type_name), op_type_name, opts->op_registry()); @@ -191,7 +192,7 @@ class GraphDefBuilderWrapper { *output = opts->FinalizeBuilder(&node_builder); if (*output == nullptr) { return errors::Internal("AddDataset: Failed to build ", op_type_name, - " op."); + " op with error ", opts->StatusToString()); } return Status::OK(); } diff --git a/tensorflow/core/kernels/filter_dataset_op.cc b/tensorflow/core/kernels/filter_dataset_op.cc index a69040b3bb..e4d80e4ce3 100644 --- a/tensorflow/core/kernels/filter_dataset_op.cc +++ b/tensorflow/core/kernels/filter_dataset_op.cc @@ -51,17 +51,21 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { std::move(other_arguments), &captured_func)); - *output = new Dataset(input, std::move(captured_func)); + *output = new Dataset(ctx, input, func_, std::move(captured_func)); } private: const int graph_def_version_; - class Dataset : public DatasetBase { + class Dataset : public GraphDatasetBase { public: - Dataset(const DatasetBase* input, + Dataset(OpKernelContext* ctx, const DatasetBase* input, + const NameAttrList& func, std::unique_ptr captured_func) - : input_(input), captured_func_(std::move(captured_func)) { + : GraphDatasetBase(ctx), + input_(input), + func_(func), + captured_func_(std::move(captured_func)) { input_->Ref(); } @@ -82,6 +86,35 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { string DebugString() override { return "FilterDatasetOp::Dataset"; } + protected: + Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, + Node** output) const override { + TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name())); + Node* input_graph_node; + TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node)); + + DataTypeVector other_arguments_types; + other_arguments_types.reserve(captured_func_->captured_inputs().size()); + std::vector other_arguments; + other_arguments.reserve(captured_func_->captured_inputs().size()); + for (const Tensor& t : captured_func_->captured_inputs()) { + Node* node; + TF_RETURN_IF_ERROR(b->AddTensor(t, &node)); + other_arguments.emplace_back(node); + other_arguments_types.emplace_back(t.dtype()); + } + AttrValue f; + b->BuildAttrValue(func_, &f); + AttrValue other_arguments_types_attr; + b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr); + + TF_RETURN_IF_ERROR(b->AddDataset( + this, {{0, input_graph_node}}, {{1, other_arguments}}, + {{"predicate", f}, {"Targuments", other_arguments_types_attr}}, + output)); + return Status::OK(); + } + private: class Iterator : public DatasetIterator { public: @@ -98,9 +131,18 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { // non-deterministic order. bool matched; do { - TF_RETURN_IF_ERROR( - input_impl_->GetNext(ctx, out_tensors, end_of_sequence)); + { + tf_shared_lock l(mu_); + if (!input_impl_) { + *end_of_sequence = true; + return Status::OK(); + } + TF_RETURN_IF_ERROR( + input_impl_->GetNext(ctx, out_tensors, end_of_sequence)); + } if (*end_of_sequence) { + mutex_lock l(mu_); + input_impl_.reset(); return Status::OK(); } @@ -139,11 +181,34 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + if (input_impl_) + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + else + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("input_impls_empty"), "")); + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + if (reader->Contains(full_name("input_impls_empty"))) + input_impl_.reset(); + else + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + return Status::OK(); + } + private: - const std::unique_ptr input_impl_; + mutex mu_; + std::unique_ptr input_impl_ GUARDED_BY(mu_); }; const DatasetBase* const input_; + const NameAttrList func_; const std::unique_ptr captured_func_; }; -- GitLab From fed333479868935d24ec104b2ce9e9ac5dadf565 Mon Sep 17 00:00:00 2001 From: Max Galkin Date: Thu, 23 Nov 2017 16:48:38 -0800 Subject: [PATCH 0821/1801] Some extra checks and warnings in grappler to provide more helpful error messages when inputs are imperfect (cost_graph is empty and it causes graph_properties to be empty). PiperOrigin-RevId: 176796142 --- tensorflow/core/grappler/costs/graph_properties.cc | 3 +++ tensorflow/core/grappler/costs/op_level_cost_estimator.cc | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index c254fbef7a..dd389de636 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -977,6 +977,9 @@ Status GraphProperties::AnnotateOutputShapes(GraphDef* output_graph_def) const { } Status GraphProperties::InferFromCostGraph(const CostGraphDef& cost_graph) { + if (cost_graph.node_size() == 0) { + LOG(WARNING) << "cost_graph is empty: nothing can be inferred!"; + } std::unordered_map name_to_cost; std::unordered_map name_to_node; // Empty for (auto& node : cost_graph.node()) { diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index f7905d7798..b1e04ceec8 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -510,7 +510,12 @@ int64 OpLevelCostEstimator::CountMatMulOperations( bool* found_unknown_shapes) const { double ops = 0; - // first matrix + if (op_features.inputs_size() < 2) { + LOG(ERROR) << "Need 2 inputs but got " << op_features.inputs_size(); + *found_unknown_shapes = true; + return 0; + } + auto& a_matrix = op_features.inputs(0); auto& b_matrix = op_features.inputs(1); -- GitLab From 080e432f2bd5566946887ef383acf0b5d34d150a Mon Sep 17 00:00:00 2001 From: Sergio Guadarrama Date: Fri, 24 Nov 2017 10:37:32 -0800 Subject: [PATCH 0822/1801] Make sure that control_dependencies execute any pending dependency in Eager mode. PiperOrigin-RevId: 176855214 --- tensorflow/python/framework/ops.py | 11 ++++++++++- tensorflow/python/framework/ops_test.py | 23 +++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index dc4ffb1747..bcc794b9a9 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -4475,11 +4475,15 @@ def control_dependencies(control_inputs): See @{tf.Graph.control_dependencies} for more details. + When eager execution is enabled, any callable object in the `control_inputs` + list will be called. + Args: control_inputs: A list of `Operation` or `Tensor` objects which must be executed or computed before running the operations defined in the context. Can also be `None` to clear the control - dependencies. + dependencies. If eager execution is enabled, any callable object in the + `control_inputs` list will be called. Returns: A context manager that specifies control dependencies for all @@ -4488,6 +4492,11 @@ def control_dependencies(control_inputs): if context.in_graph_mode(): return get_default_graph().control_dependencies(control_inputs) else: + if control_inputs: + # Excute any pending callables. + for control in control_inputs: + if callable(control): + control() return _NullContextmanager() diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 7ae7b5cb7f..ac35f6f4f5 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -1464,6 +1464,29 @@ class ControlDependenciesTest(test_util.TensorFlowTestCase): # e should be dominated by c. self.assertEqual(e.op.control_inputs, []) + @test_util.run_in_graph_and_eager_modes() + def testEager(self): + def future(): + future.calls += 1 + return constant_op.constant(2.0) + future.calls = 0 + + if context.in_graph_mode(): + g = ops.Graph() + with g.as_default(): + a = constant_op.constant(1.0) + b = future() + with g.control_dependencies([a, b]): + c = constant_op.constant(3.0) + self.assertEqual(c.op.control_inputs, [a.op, b.op]) + self.assertEqual(future.calls, 1) + else: + a = constant_op.constant(1.0) + b = future + with ops.control_dependencies([a, b]): + c = constant_op.constant(3.0) + self.assertEqual(future.calls, 1) + def testBasicWithConversion(self): g = ops.Graph() a = _apply_op(g, "FloatOutput", [], [dtypes.float32]) -- GitLab From 93bce00552ac70cc2c9b72e5742f9de87d72985a Mon Sep 17 00:00:00 2001 From: Makoto Uchida Date: Fri, 24 Nov 2017 22:18:53 -0800 Subject: [PATCH 0823/1801] Accept None vocabulary_size to categorical_column_with_vocabulary_file() Defaults to the length of the given vocabulary file. PiperOrigin-RevId: 176881510 --- .../python/feature_column/feature_column.py | 27 ++++++++++++++----- .../feature_column/feature_column_test.py | 22 ++++++++++++--- .../golden/tensorflow.feature_column.pbtxt | 2 +- 3 files changed, 40 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index 452f84192c..0686480ca4 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -152,6 +152,7 @@ from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import string_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables +from tensorflow.python.platform import gfile from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import checkpoint_utils from tensorflow.python.util import nest @@ -980,9 +981,12 @@ def categorical_column_with_hash_bucket(key, return _HashedCategoricalColumn(key, hash_bucket_size, dtype) -def categorical_column_with_vocabulary_file( - key, vocabulary_file, vocabulary_size, num_oov_buckets=0, - default_value=None, dtype=dtypes.string): +def categorical_column_with_vocabulary_file(key, + vocabulary_file, + vocabulary_size=None, + num_oov_buckets=0, + default_value=None, + dtype=dtypes.string): """A `_CategoricalColumn` with a vocabulary file. Use this when your inputs are in string or integer format, and you have a @@ -1041,7 +1045,7 @@ def categorical_column_with_vocabulary_file( vocabulary_file: The vocabulary file name. vocabulary_size: Number of the elements in the vocabulary. This must be no greater than length of `vocabulary_file`, if less than length, later - values are ignored. + values are ignored. If None, it is set to the length of `vocabulary_file`. num_oov_buckets: Non-negative integer, the number of out-of-vocabulary buckets. All out-of-vocabulary inputs will be assigned IDs in the range `[vocabulary_size, vocabulary_size+num_oov_buckets)` based on a hash of @@ -1056,7 +1060,7 @@ def categorical_column_with_vocabulary_file( A `_CategoricalColumn` with a vocabulary file. Raises: - ValueError: `vocabulary_file` is missing. + ValueError: `vocabulary_file` is missing or cannot be opened. ValueError: `vocabulary_size` is missing or < 1. ValueError: `num_oov_buckets` is a negative integer. ValueError: `num_oov_buckets` and `default_value` are both specified. @@ -1064,8 +1068,19 @@ def categorical_column_with_vocabulary_file( """ if not vocabulary_file: raise ValueError('Missing vocabulary_file in {}.'.format(key)) + + if vocabulary_size is None: + if not gfile.Exists(vocabulary_file): + raise ValueError('vocabulary_file in {} does not exist.'.format(key)) + + with gfile.GFile(vocabulary_file) as f: + vocabulary_size = sum(1 for _ in f) + logging.info( + 'vocabulary_size = %d in %s is inferred from the number of elements ' + 'in the vocabulary_file %s.', vocabulary_size, key, vocabulary_file) + # `vocabulary_size` isn't required for lookup, but it is for `_num_buckets`. - if (vocabulary_size is None) or (vocabulary_size < 1): + if vocabulary_size < 1: raise ValueError('Invalid vocabulary_size in {}.'.format(key)) if num_oov_buckets: if default_value is not None: diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index 6ac5ce8757..d974f14b8a 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -2255,10 +2255,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): lookup_ops.tables_initializer().run() def test_invalid_vocabulary_size(self): - with self.assertRaisesRegexp(ValueError, 'Invalid vocabulary_size'): - fc.categorical_column_with_vocabulary_file( - key='aaa', vocabulary_file=self._wire_vocabulary_file_name, - vocabulary_size=None) with self.assertRaisesRegexp(ValueError, 'Invalid vocabulary_size'): fc.categorical_column_with_vocabulary_file( key='aaa', vocabulary_file=self._wire_vocabulary_file_name, @@ -2372,6 +2368,24 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): dense_shape=inputs.dense_shape), id_weight_pair.id_tensor.eval()) + def test_get_sparse_tensors_none_vocabulary_size(self): + column = fc.categorical_column_with_vocabulary_file( + key='aaa', vocabulary_file=self._wire_vocabulary_file_name) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value(self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array( + (2, -1, 0), dtype=np.int64), + dense_shape=inputs.dense_shape), + id_weight_pair.id_tensor.eval()) + def test_transform_feature(self): column = fc.categorical_column_with_vocabulary_file( key='aaa', diff --git a/tensorflow/tools/api/golden/tensorflow.feature_column.pbtxt b/tensorflow/tools/api/golden/tensorflow.feature_column.pbtxt index 9eb4cb8ce9..018e8c909a 100644 --- a/tensorflow/tools/api/golden/tensorflow.feature_column.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.feature_column.pbtxt @@ -14,7 +14,7 @@ tf_module { } member_method { name: "categorical_column_with_vocabulary_file" - argspec: "args=[\'key\', \'vocabulary_file\', \'vocabulary_size\', \'num_oov_buckets\', \'default_value\', \'dtype\'], varargs=None, keywords=None, defaults=[\'0\', \'None\', \"\"], " + argspec: "args=[\'key\', \'vocabulary_file\', \'vocabulary_size\', \'num_oov_buckets\', \'default_value\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \"\"], " } member_method { name: "categorical_column_with_vocabulary_list" -- GitLab From 758ac9cb907fdd7d9c295ea076e985c9f545667f Mon Sep 17 00:00:00 2001 From: FredZhang <654496915@qq.com> Date: Sat, 25 Nov 2017 22:13:08 +0800 Subject: [PATCH 0824/1801] Some PATH typo : no `train_dir` in tutorial This file uses `train_dir`. But in code file, there is no `train_dir` anymore, it should be replaced with `log_dir` `input_data_dir` and `checkpoint_file` respectively --- tensorflow/docs_src/get_started/mnist/mechanics.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/docs_src/get_started/mnist/mechanics.md b/tensorflow/docs_src/get_started/mnist/mechanics.md index 27fae45b5b..a5c784b30d 100644 --- a/tensorflow/docs_src/get_started/mnist/mechanics.md +++ b/tensorflow/docs_src/get_started/mnist/mechanics.md @@ -47,7 +47,7 @@ training folder and then unpack that data to return a dictionary of `DataSet` instances. ```python -data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data) +data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data) ``` **NOTE**: The `fake_data` flag is used for unit-testing purposes and may be @@ -369,7 +369,7 @@ may be instantiated to write the events files, which contain both the graph itself and the values of the summaries. ```python -summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) +summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) ``` Lastly, the events file will be updated with new summary values every time the @@ -403,7 +403,7 @@ method will periodically be called to write a checkpoint file to the training directory with the current values of all the trainable variables. ```python -saver.save(sess, FLAGS.train_dir, global_step=step) +saver.save(sess, checkpoint_file, global_step=step) ``` At some later point in the future, training might be resumed by using the @@ -411,7 +411,7 @@ At some later point in the future, training might be resumed by using the method to reload the model parameters. ```python -saver.restore(sess, FLAGS.train_dir) +saver.restore(sess, checkpoint_file) ``` ## Evaluate the Model -- GitLab From 70bf5374524bc2f19b9196eeb066b883ee504db5 Mon Sep 17 00:00:00 2001 From: concerttttt Date: Mon, 27 Nov 2017 11:58:38 +0800 Subject: [PATCH 0825/1801] Update tf_core_framework.cmake gpu_tracer.cc replaced by device_tracer.cc --- tensorflow/contrib/cmake/tf_core_framework.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake index c607546f4a..5ec1a8d04f 100644 --- a/tensorflow/contrib/cmake/tf_core_framework.cmake +++ b/tensorflow/contrib/cmake/tf_core_framework.cmake @@ -211,7 +211,7 @@ if (NOT tensorflow_ENABLE_GPU) list(REMOVE_ITEM tf_core_platform_srcs ${tf_core_platform_gpu_srcs}) else() file(GLOB tf_core_platform_srcs_exclude - "${tensorflow_source_dir}/tensorflow/core/platform/default/gpu_tracer.cc") + "${tensorflow_source_dir}/tensorflow/core/platform/default/device_tracer.cc") list(REMOVE_ITEM tf_core_platform_srcs ${tf_core_platform_srcs_exclude}) endif() -- GitLab From a264269f523467ac018708a647eab02c1f1010fe Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 04:26:18 -0800 Subject: [PATCH 0826/1801] Fixed a minor typo in FisherEstimator docstring. PiperOrigin-RevId: 176999852 --- tensorflow/contrib/kfac/python/ops/estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/kfac/python/ops/estimator.py b/tensorflow/contrib/kfac/python/ops/estimator.py index c353f3592f..27ff951f16 100644 --- a/tensorflow/contrib/kfac/python/ops/estimator.py +++ b/tensorflow/contrib/kfac/python/ops/estimator.py @@ -95,7 +95,7 @@ class FisherEstimator(object): blocks, kronecker factors, and losses associated with the graph. estimation_mode: The type of estimator to use for the Fishers. Can be - 'gradients', 'empirical', 'curvature_propagation', or 'exact'. + 'gradients', 'empirical', 'curvature_prop', or 'exact'. (Default: 'gradients'). 'gradients' is the basic estimation approach from the original K-FAC paper. 'empirical' computes the 'empirical' Fisher information matrix (which uses the data's distribution for the -- GitLab From 58970731ba6d899c827ab1ce5c853d9ac8ae1414 Mon Sep 17 00:00:00 2001 From: lanhin Date: Mon, 27 Nov 2017 20:43:30 +0800 Subject: [PATCH 0827/1801] Comment typo fix. --- tensorflow/core/common_runtime/function.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc index 93bd3a6adb..6fb0dc252e 100644 --- a/tensorflow/core/common_runtime/function.cc +++ b/tensorflow/core/common_runtime/function.cc @@ -318,7 +318,7 @@ Status FunctionLibraryRuntimeImpl::CreateKernel(const NodeDef& ndef, kernel); } - // Try to instantiate this function for the func/attr. Maybe its + // Try to instantiate this function for the func/attr. Maybe it's // cached already. Handle handle; TF_RETURN_IF_ERROR(Instantiate(ndef.op(), AttrSlice(&ndef.attr()), &handle)); -- GitLab From 191825e63f341a4e7777b85254f616e541000d5c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 06:29:45 -0800 Subject: [PATCH 0828/1801] Delete trailing whitespace PiperOrigin-RevId: 177008504 --- RELEASE.md | 2 +- .../gpu/llvm_gpu_backend/gpu_backend_lib.cc | 2 +- tensorflow/contrib/android/cmake/README.md | 2 +- .../android/TensorFlowInferenceInterface.java | 6 +-- .../kernels/bigquery_table_accessor_test.cc | 2 +- tensorflow/contrib/cmake/tf_grappler.cmake | 2 +- tensorflow/contrib/cmake/tf_shared_lib.cmake | 2 +- .../contrib/cmake/tf_stream_executor.cmake | 6 +-- ...single_image_random_dot_stereograms_ops.cc | 6 +-- tensorflow/contrib/lite/g3doc/apis.md | 2 +- .../app/src/main/res/values/base-strings.xml | 8 +-- tensorflow/contrib/makefile/README.md | 32 ++++++------ .../contrib/makefile/compile_ios_protobuf.sh | 2 +- tensorflow/contrib/makefile/compile_nsync.sh | 2 +- .../contrib/makefile/rename_protobuf.sh | 4 +- tensorflow/contrib/metrics/README.md | 2 +- tensorflow/contrib/mpi/README.md | 10 ++-- tensorflow/contrib/pi_examples/README.md | 2 +- .../contrib/pi_examples/camera/Makefile | 2 +- .../contrib/pi_examples/label_image/Makefile | 2 +- .../pi_examples/label_image/label_image.cc | 14 +++--- tensorflow/contrib/quantize/README.md | 2 +- .../stochastic_hard_routing_function_op.cc | 2 +- .../g3doc/periodic_multires_derivation.md | 2 +- tensorflow/contrib/tpu/ops/outfeed_ops.cc | 2 +- tensorflow/contrib/verbs/README.md | 2 +- .../common_runtime/accumulate_n_optimizer.cc | 2 +- tensorflow/core/framework/bfloat16.cc | 28 +++++------ tensorflow/core/framework/bfloat16.h | 6 +-- tensorflow/core/kernels/cast_op.h | 8 +-- tensorflow/core/kernels/diag_op.cc | 4 +- tensorflow/core/kernels/diag_op_gpu.cu.cc | 2 +- tensorflow/core/kernels/queue_ops.cc | 2 +- .../core/kernels/sparse_matmul_op_test.cc | 10 ++-- tensorflow/core/kernels/xsmm_conv2d_test.cc | 50 +++++++++---------- tensorflow/core/ops/image_ops.cc | 4 +- tensorflow/core/ops/nn_ops.cc | 4 +- .../platform/default/build_config_root.bzl | 2 +- tensorflow/core/profiler/README.md | 2 +- tensorflow/docs_src/about/uses.md | 2 +- tensorflow/docs_src/api_guides/python/nn.md | 4 +- .../docs_src/community/documentation.md | 6 +-- tensorflow/docs_src/community/style_guide.md | 2 +- tensorflow/docs_src/community/welcome.md | 2 +- tensorflow/docs_src/deploy/hadoop.md | 4 +- tensorflow/docs_src/extend/add_filesys.md | 2 +- tensorflow/docs_src/extend/index.md | 2 +- .../docs_src/get_started/get_started.md | 4 +- tensorflow/docs_src/install/install_linux.md | 8 +-- .../docs_src/install/install_sources.md | 2 +- tensorflow/docs_src/mobile/android_build.md | 4 +- tensorflow/docs_src/mobile/index.md | 2 +- tensorflow/docs_src/mobile/ios_build.md | 2 +- tensorflow/docs_src/mobile/optimizing.md | 4 +- tensorflow/docs_src/mobile/prepare_models.md | 14 +++--- tensorflow/docs_src/mobile/tflite/index.md | 4 +- .../docs_src/programmers_guide/saved_model.md | 2 +- .../docs_src/programmers_guide/tensors.md | 8 +-- .../docs_src/programmers_guide/variables.md | 16 +++--- .../docs_src/tutorials/image_recognition.md | 2 +- .../src/org/tensorflow/demo/Classifier.java | 2 +- tensorflow/examples/ios/README.md | 8 +-- .../examples/tutorials/deepdream/README.md | 8 +-- tensorflow/examples/udacity/README.md | 6 +-- tensorflow/g3doc/README.txt | 2 +- .../java/src/gen/perl/tftypes-runall.pl | 6 +-- tensorflow/java/src/gen/perl/tftypes.pl | 8 +-- .../java/src/gen/resources/Tensors.java.tmpl | 4 +- tensorflow/python/grappler/model_analyzer.i | 2 +- .../stream_executor/cuda/cuda_platform.cc | 2 +- .../stream_executor/lib/static_threadlocal.h | 2 +- tensorflow/tools/ci_build/README.md | 4 +- .../dist_test/scripts/dist_mnist_test.sh | 2 +- tensorflow/tools/docker/README.md | 2 +- tensorflow/tools/graph_transforms/README.md | 6 +-- 75 files changed, 203 insertions(+), 203 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index d8db1f7200..e04bd3fc50 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -494,7 +494,7 @@ answered questions, and were part of inspiring discussions. This release contains contributions from many people at Google, as well as: A. Besir Kurtulmus, Adal Chiriliuc, @akash, Alec-Desouza, Alex Rothberg, Alex -Sergeev, Alexander Heinecke, Allen Guo, Andreas Madsen, Ankesh Anand, Anton +Sergeev, Alexander Heinecke, Allen Guo, Andreas Madsen, Ankesh Anand, Anton Loss, @Aravind, @Arie, Ashutosh Das, AuréLien Geron, Bairen Yi, @bakunyo, Ben Visser, Brady Zhou, Calpa Liu, Changming Sun, Chih Cheng Liang, Christopher Berner, Clark Zinzow, @Conchylicultor, Dan Ellis, Dan J, Dan Jarvis, Daniel diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc index a574123d6b..96981534d5 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc @@ -77,7 +77,7 @@ static string GetLibdeviceFilename(const string& libdevice_dir_path, // Since CUDA 9.0, all GPU versions are included in a single file const char* unified_libdevice_filename = "libdevice.10.bc"; std::vector unified_libdevice_files; - const tensorflow::Status status = + const tensorflow::Status status = tensorflow::Env::Default()->GetMatchingPaths( tensorflow::io::JoinPath(libdevice_dir_path, unified_libdevice_filename), &unified_libdevice_files); diff --git a/tensorflow/contrib/android/cmake/README.md b/tensorflow/contrib/android/cmake/README.md index 6f19b657fe..934b58c724 100644 --- a/tensorflow/contrib/android/cmake/README.md +++ b/tensorflow/contrib/android/cmake/README.md @@ -14,7 +14,7 @@ Add TensorFlow-Android-Inference as a dependency of your Android application ``` include ':TensorFlow-Android-Inference' -findProject(":TensorFlow-Android-Inference").projectDir = +findProject(":TensorFlow-Android-Inference").projectDir = new File("${/path/to/tensorflow_repo}/contrib/android/cmake") ``` diff --git a/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java b/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java index 1f423a7a5b..dc5b9fb887 100644 --- a/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java +++ b/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java @@ -160,7 +160,7 @@ public class TensorFlowInferenceInterface { throw new RuntimeException("Failed to load model from the input stream", e); } } - + /* * Construct a TensorFlowInferenceInterface with provided Graph * @@ -168,7 +168,7 @@ public class TensorFlowInferenceInterface { */ public TensorFlowInferenceInterface(Graph g) { prepareNativeRuntime(); - + // modelName is redundant here, here is for // avoiding error in initialization as modelName is marked final. this.modelName = ""; @@ -290,7 +290,7 @@ public class TensorFlowInferenceInterface { */ public void feed(String inputName, boolean[] src, long... dims) { byte[] b = new byte[src.length]; - + for (int i = 0; i < src.length; i++) { b[i] = src[i] ? (byte) 1 : (byte) 0; } diff --git a/tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test.cc b/tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test.cc index b31b882fa1..e9b79a066d 100644 --- a/tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test.cc +++ b/tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test.cc @@ -421,7 +421,7 @@ TEST_F(BigQueryTableAccessorTest, MultiplePagesTest) { TF_EXPECT_OK(accessor_->ReadRow(&row_id, &example)); EXPECT_EQ(3, row_id); EXPECT_TRUE(accessor_->Done()); - + Example expected_example; ASSERT_TRUE(protobuf::TextFormat::ParseFromString(kTestExampleProtoWithNulls, &expected_example)); diff --git a/tensorflow/contrib/cmake/tf_grappler.cmake b/tensorflow/contrib/cmake/tf_grappler.cmake index a7841c98e8..410490531a 100644 --- a/tensorflow/contrib/cmake/tf_grappler.cmake +++ b/tensorflow/contrib/cmake/tf_grappler.cmake @@ -23,7 +23,7 @@ file(GLOB tf_grappler_srcs "${tensorflow_source_dir}/tensorflow/python/grappler/model_analyzer.cc" "${tensorflow_source_dir}/tensorflow/python/grappler/model_analyzer.h" ) - + add_library(tf_grappler OBJECT ${tf_grappler_srcs}) add_dependencies(tf_grappler tf_core_cpu) \ No newline at end of file diff --git a/tensorflow/contrib/cmake/tf_shared_lib.cmake b/tensorflow/contrib/cmake/tf_shared_lib.cmake index 3e3fe0cdfa..dcedabb333 100644 --- a/tensorflow/contrib/cmake/tf_shared_lib.cmake +++ b/tensorflow/contrib/cmake/tf_shared_lib.cmake @@ -45,7 +45,7 @@ if(WIN32) $ $ ) - + set(tensorflow_deffile "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/tensorflow.def") set_source_files_properties(${tensorflow_deffile} PROPERTIES GENERATED TRUE) diff --git a/tensorflow/contrib/cmake/tf_stream_executor.cmake b/tensorflow/contrib/cmake/tf_stream_executor.cmake index 8d95f0d3e8..91ca33f4c4 100644 --- a/tensorflow/contrib/cmake/tf_stream_executor.cmake +++ b/tensorflow/contrib/cmake/tf_stream_executor.cmake @@ -61,18 +61,18 @@ file(GLOB tf_stream_executor_srcs "${tensorflow_source_dir}/tensorflow/stream_executor/platform/default/*.h" ) -if (tensorflow_ENABLE_GPU) +if (tensorflow_ENABLE_GPU) file(GLOB tf_stream_executor_gpu_srcs "${tensorflow_source_dir}/tensorflow/stream_executor/cuda/*.cc" ) list(APPEND tf_stream_executor_srcs ${tf_stream_executor_gpu_srcs}) -endif() +endif() #file(GLOB_RECURSE tf_stream_executor_test_srcs # "${tensorflow_source_dir}/tensorflow/stream_executor/*_test.cc" # "${tensorflow_source_dir}/tensorflow/stream_executor/*_test.h" #) -#list(REMOVE_ITEM tf_stream_executor_srcs ${tf_stream_executor_test_srcs}) +#list(REMOVE_ITEM tf_stream_executor_srcs ${tf_stream_executor_test_srcs}) if (NOT WIN32) set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lgomp") diff --git a/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc b/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc index 2b67992138..f8b56ab1c5 100755 --- a/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc +++ b/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc @@ -40,7 +40,7 @@ REGISTER_OP("SingleImageRandomDotStereograms") .Doc(R"doc( Outputs a single image random dot stereogram for export via encode_PNG/JPG OP. -Given the 2-D tensor 'depth_values' with encoded Z values, this operation will +Given the 2-D tensor 'depth_values' with encoded Z values, this operation will encode 3-D data into a 2-D image. The output of this Op is suitable for the encode_PNG/JPG ops. Be careful with image compression as this may corrupt the encode 3-D data witin the image. @@ -68,14 +68,14 @@ with open('picture_out.png', 'wb') as f: f.write(png) ``` -depth_values: Z values of data to encode into 'output_data_window' window, +depth_values: Z values of data to encode into 'output_data_window' window, lower values are further away {0.0 floor(far), 1.0 ceiling(near) after normalization}, must be 2-D tensor hidden_surface_removal: Activate hidden surface removal convergence_dots_size: Black dot size in pixels to help view converge image, drawn on bottom of image dots_per_inch: Output device in dots/inch eye_separation: Separation between eyes in inches mu: Depth of field, Fraction of viewing distance (eg. 1/3 = .3333) -normalize: Normalize input data to [0.0, 1.0] +normalize: Normalize input data to [0.0, 1.0] normalize_max: Fix MAX value for Normalization - if < MIN, autoscale normalize_min: Fix MIN value for Normalization - if > MAX, autoscale border_level: Value of border depth 0.0 {far} to 1.0 {near} diff --git a/tensorflow/contrib/lite/g3doc/apis.md b/tensorflow/contrib/lite/g3doc/apis.md index 311fc69696..e8f5566f11 100644 --- a/tensorflow/contrib/lite/g3doc/apis.md +++ b/tensorflow/contrib/lite/g3doc/apis.md @@ -52,7 +52,7 @@ typedef enum { Failures can be easily verified with: ```c++ if (status != kTfLiteOk) { - // ... error handling here ... + // ... error handling here ... } ``` diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/values/base-strings.xml b/tensorflow/contrib/lite/java/demo/app/src/main/res/values/base-strings.xml index ab7d3fd496..0a71dbd0e8 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/res/values/base-strings.xml +++ b/tensorflow/contrib/lite/java/demo/app/src/main/res/values/base-strings.xml @@ -19,12 +19,12 @@ TfLiteCameraDemo diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md index 65bd60c12a..9345303ff1 100644 --- a/tensorflow/contrib/makefile/README.md +++ b/tensorflow/contrib/makefile/README.md @@ -16,17 +16,17 @@ This static library will not contain: - Python or other language bindings - GPU support - + You can target: - iOS - OS X (macOS) - Android - Raspberry-PI - + You will compile tensorflow and protobuf libraries that you can link into other applications. You will also compile the [benchmark](../../tools/benchmark/) application that will let you check your application. - + ## Before you start (all platforms) First, clone this TensorFlow repository. @@ -58,9 +58,9 @@ You should then be able to run the `build_all_linux.sh` script to compile: tensorflow/contrib/makefile/build_all_linux.sh ``` -This should compile a static library in -`tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a`, -and create an example executable at `tensorflow/contrib/makefile/gen/bin/benchmark`. +This should compile a static library in +`tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a`, +and create an example executable at `tensorflow/contrib/makefile/gen/bin/benchmark`. Get the graph file, if you have not already: @@ -201,7 +201,7 @@ library in a simple app. ### Building by hand This section covers each step of building. For all the code in one place, see -[build_all_ios.sh](build_all_ios.sh). +[build_all_ios.sh](build_all_ios.sh). If you have not already, you will need to download dependencies: @@ -232,7 +232,7 @@ make -f tensorflow/contrib/makefile/Makefile \ This creates a library in `tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a` that you can link any -xcode project against. +xcode project against. To see TensorFlow running on iOS, the example Xcode project in [tensorflow/examples/ios](../../examples/ios/) shows how to use the static @@ -258,15 +258,15 @@ tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -h tensorflow/con In XCode, you will need to use -force_load in the linker flags section of the build settings to pull in the global constructors that are used -to register ops and kernels. +to register ops and kernels. #### Optimization - + The `compile_ios_tensorflow.sh` script can take optional command-line arguments. The first argument will be passed as a C++ optimization flag and defaults to debug mode. If you are concerned about performance or are working on a release build, you would likely want a higher optimization setting, like so: - + ```bash compile_ios_tensorflow.sh -f "-Os" ``` @@ -330,7 +330,7 @@ what you need for your desired system. ## Dependency Management The Makefile loads in a list of dependencies stored in text files. These files -are generated from the main Bazel build by running +are generated from the main Bazel build by running `tensorflow/contrib/makefile/gen_file_lists.sh`. You'll need to re-run this i you make changes to the files that are included in the build. @@ -361,10 +361,10 @@ codebase can sometimes break the makefile build process. If you find that tests relying on this makefile are failing with a change you're involved in, here are some trouble-shooting steps: - - Try to reproduce the issue on your platform. If you're on Linux, running + - Try to reproduce the issue on your platform. If you're on Linux, running `make -f tensorflow/contrib/makefile/Makefile` should be enough to recreate most issues. For other platforms, see the sections earlier in this document. - + - The most common cause of breakages are files that have been added to the Bazel build scripts, but that the makefile isn't aware of. Typical symptoms of this include linker errors mentioning missing symbols or protobuf headers @@ -377,11 +377,11 @@ some trouble-shooting steps: `tensorflow/core/BUILD`, so if you change the wildcards there to include new files you'll need to also update `CORE_CC_ALL_SRCS` and `CORE_CC_EXCLUDE_SRCS` in the makefile. - + - Some of the supported platforms use clang instead of gcc as their compiler, so if you're hitting compile errors you may need to tweak your code to be more friendly to different compilers by avoiding gcc extensions or idioms. - + These are the most common reasons for makefile breakages, but it's also possible you may hit something unusual, like a platform incompatibility. For those, you'll need to see if you can reproduce the issue on that particular diff --git a/tensorflow/contrib/makefile/compile_ios_protobuf.sh b/tensorflow/contrib/makefile/compile_ios_protobuf.sh index 43e5809dd2..8fa2021363 100755 --- a/tensorflow/contrib/makefile/compile_ios_protobuf.sh +++ b/tensorflow/contrib/makefile/compile_ios_protobuf.sh @@ -270,7 +270,7 @@ case "$1" in echo "Unknown ARCH" exit 1 ;; -esac +esac } for build_element in "${build_targets[@]}" diff --git a/tensorflow/contrib/makefile/compile_nsync.sh b/tensorflow/contrib/makefile/compile_nsync.sh index 930e6b8dea..7927997678 100755 --- a/tensorflow/contrib/makefile/compile_nsync.sh +++ b/tensorflow/contrib/makefile/compile_nsync.sh @@ -28,7 +28,7 @@ usage="usage: $prog [-t linux|ios|android|macos|native] [-a architecture] [-v android_api_version] A script to build nsync for tensorflow. -This script can be run on Linux or MacOS host platforms, and can target +This script can be run on Linux or MacOS host platforms, and can target Linux, MacOS, iOS, or Android. Options: diff --git a/tensorflow/contrib/makefile/rename_protobuf.sh b/tensorflow/contrib/makefile/rename_protobuf.sh index b3bff2d503..8d52c1a169 100755 --- a/tensorflow/contrib/makefile/rename_protobuf.sh +++ b/tensorflow/contrib/makefile/rename_protobuf.sh @@ -38,7 +38,7 @@ # # Note that this script modifies the source code in-place, so once it's been run # it's no longer suitable for further manual modifications, since the difference -# with the top of tree will already be large. +# with the top of tree will already be large. mv tensorflow/contrib/makefile/downloads/protobuf/src/google/protobuf \ tensorflow/contrib/makefile/downloads/protobuf//src/google/protobuf3 @@ -71,7 +71,7 @@ sed -i '' 's%::google::protobuf;%google::protobuf3;%' \ # Fix up a couple of special build scripts that look for particular files. sed -i '' 's%src/google/protobuf/message.cc%src/google/protobuf3/message.cc%' \ - tensorflow/contrib/makefile/downloads/protobuf/configure.ac + tensorflow/contrib/makefile/downloads/protobuf/configure.ac sed -i '' 's%src/google/protobuf/stubs/common.h%src/google/protobuf3/stubs/common.h%' \ tensorflow/contrib/makefile/downloads/protobuf/autogen.sh diff --git a/tensorflow/contrib/metrics/README.md b/tensorflow/contrib/metrics/README.md index 247ebac5bb..e0f2d74fa3 100644 --- a/tensorflow/contrib/metrics/README.md +++ b/tensorflow/contrib/metrics/README.md @@ -4,7 +4,7 @@ Metrics are used in evaluation to assess the quality of a model. Most are "streaming" ops, meaning they create variables to accumulate a running total, -and return an update tensor to update these variables, and a value tensor to +and return an update tensor to update these variables, and a value tensor to read the accumulated value. Example: value, update_op = metrics.streaming_mean_squared_error( diff --git a/tensorflow/contrib/mpi/README.md b/tensorflow/contrib/mpi/README.md index b0d03d05a2..75cb823048 100644 --- a/tensorflow/contrib/mpi/README.md +++ b/tensorflow/contrib/mpi/README.md @@ -23,7 +23,7 @@ The following environment variables can be set to modify the behavior at runtime **MPI_DISABLED=[0,1]** -This environment variable allows you to disable the MPI path before launch (e.g. for performance or correctness testing). +This environment variable allows you to disable the MPI path before launch (e.g. for performance or correctness testing). **MPI_OPTIMAL_PATH=[0,1]** @@ -34,10 +34,10 @@ This path is disabled by default as it requires that the MPI library can directl ## Known problems -For certain complex neural nets the implementation sometimes crashes inside the MPI libraries. This seems to be related to memory allocations/routines that register the memory for the Infiniband transfers. (The crashes do not happen when all MPI processes are within the same physical machine). +For certain complex neural nets the implementation sometimes crashes inside the MPI libraries. This seems to be related to memory allocations/routines that register the memory for the Infiniband transfers. (The crashes do not happen when all MPI processes are within the same physical machine). **MVAPICH** -- The problem manifests itself with a segmentation fault inside a memory copy routine and during startup you will get the following warning: "WARNING: Error in initializing MVAPICH2 ptmalloc library. Continuing without InfiniBand registration cache support." +- The problem manifests itself with a segmentation fault inside a memory copy routine and during startup you will get the following warning: "WARNING: Error in initializing MVAPICH2 ptmalloc library. Continuing without InfiniBand registration cache support." **OpenMPI** - With OpenMPI corrupt data will be received resulting in an assertion or the MPI library will print an error and exit. The error is "Attempt to free memory that is still in use by an ongoing MPI communication. MPI job will now abort." @@ -58,11 +58,11 @@ Once a request has arrived from a remote process the request is forwarded to the * Receive tensor request The MPI thread will check if there are any incoming tensor request messages on the communication lines using MPI_Iprobe. Once a request has been received it will be passed on to the standard TensorFlow code and eventually will be placed on the sendQueue. -* Receive tensor +* Receive tensor At some point after a request has been sent the remote process will transmit the tensor. This tensor will be received and we look-up the callback that is associated with this tensor in our request table and execute the callback on the received data. -In the implementation all send operations are non-blocking, all probe operations are non-blocking and all receive-operations are blocking. The receive-operations are only executed after the probe has determined that there is something to receive. +In the implementation all send operations are non-blocking, all probe operations are non-blocking and all receive-operations are blocking. The receive-operations are only executed after the probe has determined that there is something to receive. The MPI processes identify each other using an MPI process ID. The TensorFlow gRPC processes identify each other using a name. During launch we create a mapping between the TensorFlow process name and the MPI process ID to allow the processes to communicate with the correct destinations when using MPI operations. diff --git a/tensorflow/contrib/pi_examples/README.md b/tensorflow/contrib/pi_examples/README.md index f550228083..177357bca6 100644 --- a/tensorflow/contrib/pi_examples/README.md +++ b/tensorflow/contrib/pi_examples/README.md @@ -13,7 +13,7 @@ sudo apt-get install -y libjpeg-dev ``` - To download the example model you'll need, run these commands: - + ```bash curl https://storage.googleapis.com/download.tensorflow.org/models/inception_dec_2015_stripped.zip \ -o /tmp/inception_dec_2015_stripped.zip diff --git a/tensorflow/contrib/pi_examples/camera/Makefile b/tensorflow/contrib/pi_examples/camera/Makefile index 578f1336f3..b354c03b6e 100644 --- a/tensorflow/contrib/pi_examples/camera/Makefile +++ b/tensorflow/contrib/pi_examples/camera/Makefile @@ -76,7 +76,7 @@ $(EXECUTABLE_NAME): $(EXECUTABLE_OBJS) $(TFLIBS) $(LIBFLAGS) $(LIB_PATH) $(LDFLAGS) $(LIBS) # Matches on C++ source files. -$(OBJDIR)%.o: %.cc +$(OBJDIR)%.o: %.cc @mkdir -p $(dir $@) $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ diff --git a/tensorflow/contrib/pi_examples/label_image/Makefile b/tensorflow/contrib/pi_examples/label_image/Makefile index 19652e581d..9d054a3133 100644 --- a/tensorflow/contrib/pi_examples/label_image/Makefile +++ b/tensorflow/contrib/pi_examples/label_image/Makefile @@ -75,7 +75,7 @@ $(EXECUTABLE_NAME): $(EXECUTABLE_OBJS) $(TFLIBS) $(LIBFLAGS) $(LIB_PATH) $(LDFLAGS) $(LIBS) # Matches on C++ source files. -$(OBJDIR)%.o: %.cc +$(OBJDIR)%.o: %.cc @mkdir -p $(dir $@) $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ diff --git a/tensorflow/contrib/pi_examples/label_image/label_image.cc b/tensorflow/contrib/pi_examples/label_image/label_image.cc index 7817cd0c64..0b18045789 100644 --- a/tensorflow/contrib/pi_examples/label_image/label_image.cc +++ b/tensorflow/contrib/pi_examples/label_image/label_image.cc @@ -89,7 +89,7 @@ Status LoadJpegFile(string file_name, std::vector* data, FILE * infile; JSAMPARRAY buffer; int row_stride; - + if ((infile = fopen(file_name.c_str(), "rb")) == NULL) { LOG(ERROR) << "Can't open " << file_name; return tensorflow::errors::NotFound("JPEG file ", file_name, @@ -105,7 +105,7 @@ Status LoadJpegFile(string file_name, std::vector* data, fclose(infile); return tensorflow::errors::Unknown("JPEG decoding failed"); } - + jpeg_create_decompress(&cinfo); jpeg_stdio_src(&cinfo, infile); jpeg_read_header(&cinfo, TRUE); @@ -119,14 +119,14 @@ Status LoadJpegFile(string file_name, std::vector* data, buffer = (*cinfo.mem->alloc_sarray) ((j_common_ptr) &cinfo, JPOOL_IMAGE, row_stride, 1); while (cinfo.output_scanline < cinfo.output_height) { - tensorflow::uint8* row_address = &((*data)[cinfo.output_scanline * row_stride]); + tensorflow::uint8* row_address = &((*data)[cinfo.output_scanline * row_stride]); jpeg_read_scanlines(&cinfo, buffer, 1); memcpy(row_address, buffer[0], row_stride); } jpeg_finish_decompress(&cinfo); jpeg_destroy_decompress(&cinfo); - fclose(infile); + fclose(infile); return Status::OK(); } @@ -167,7 +167,7 @@ Status ReadTensorFromImageFile(string file_name, const int wanted_height, const int top_y_index = static_cast(floorf(in_y)); const int bottom_y_index = std::min(static_cast(ceilf(in_y)), (image_height - 1)); - const float y_lerp = in_y - top_y_index; + const float y_lerp = in_y - top_y_index; tensorflow::uint8* in_top_row = in + (top_y_index * image_rowlen); tensorflow::uint8* in_bottom_row = in + (bottom_y_index * image_rowlen); float *out_row = out + (y * wanted_width * wanted_channels); @@ -186,7 +186,7 @@ Status ReadTensorFromImageFile(string file_name, const int wanted_height, in_bottom_row + (right_x_index * wanted_channels); const float x_lerp = in_x - left_x_index; float *out_pixel = out_row + (x * wanted_channels); - for (int c = 0; c < wanted_channels; ++c) { + for (int c = 0; c < wanted_channels; ++c) { const float top_left((in_top_left_pixel[c] - input_mean) / input_std); const float top_right((in_top_right_pixel[c] - input_mean) / input_std); const float bottom_left((in_bottom_left_pixel[c] - input_mean) / input_std); @@ -198,7 +198,7 @@ Status ReadTensorFromImageFile(string file_name, const int wanted_height, } } } - + out_tensors->push_back(image_tensor); return Status::OK(); } diff --git a/tensorflow/contrib/quantize/README.md b/tensorflow/contrib/quantize/README.md index 782232e85f..40541729da 100644 --- a/tensorflow/contrib/quantize/README.md +++ b/tensorflow/contrib/quantize/README.md @@ -13,7 +13,7 @@ through estimator [2]. Note that during back propagation, the parameters are updated at high precision as this is needed to ensure sufficient precision in accumulating tiny adjustments to the parameters. However, for the forward pass, the parameters and activations are quantized to the desired lower precision. - + ![drawing](g3doc/drawings/Fake_Quantization.jpg) ###Forward pass diff --git a/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_function_op.cc b/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_function_op.cc index 09b83e2af1..66aa293dc1 100644 --- a/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_function_op.cc +++ b/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_function_op.cc @@ -70,7 +70,7 @@ REGISTER_OP("StochasticHardRoutingFunction") return Status::OK(); }) .Doc(R"doc( - Samples a path for each instance in `input_data` and returns the + Samples a path for each instance in `input_data` and returns the probability of the path and the path taken. tree_depth: The depth of the decision tree. diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/g3doc/periodic_multires_derivation.md b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/g3doc/periodic_multires_derivation.md index b174bb6af3..872474aee1 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/g3doc/periodic_multires_derivation.md +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/g3doc/periodic_multires_derivation.md @@ -66,7 +66,7 @@ def make_eigval_mat_fn(to_power=1): if i == j: number = j // 2 + 1 powersign = ((j + 1) % 2) * 2 - 1 - return root_of_unity(matsize + 1, number=number, + return root_of_unity(matsize + 1, number=number, to_power=powersign*to_power) else: return 0 diff --git a/tensorflow/contrib/tpu/ops/outfeed_ops.cc b/tensorflow/contrib/tpu/ops/outfeed_ops.cc index ed5756cc54..5900c61a38 100644 --- a/tensorflow/contrib/tpu/ops/outfeed_ops.cc +++ b/tensorflow/contrib/tpu/ops/outfeed_ops.cc @@ -39,7 +39,7 @@ REGISTER_OP("OutfeedEnqueueTuple") .Doc(R"doc( An op which emits multiple Tensor values from an XLA computation. -inputs: A list of tensors that will be inserted into the outfeed queue as an +inputs: A list of tensors that will be inserted into the outfeed queue as an XLA tuple. )doc"); diff --git a/tensorflow/contrib/verbs/README.md b/tensorflow/contrib/verbs/README.md index dcb390b0a5..7c1c8ea459 100644 --- a/tensorflow/contrib/verbs/README.md +++ b/tensorflow/contrib/verbs/README.md @@ -38,7 +38,7 @@ The following improvements can be made in the future. First, conversion to Tenso * **RDMA channel:** Responsible for RDMA connection to a particular node. It manages multiple buffers. A channel has a callback table which stores all the callbacks for the requested tensors. * **RDMA buffer:** Responsible for sending or receiving data. It has a fixed size memory to store the data. It has a queue to store the pending jobs. There are three types of buffers, message buffer, ACK buffer and tensor buffer. A channel has two message buffers, two ack buffers and many tensor buffers. * **RDMA manager:** Manages the adapter and channels, including channel creation, channel setup via GRPC service, channel lookup, etc. -* **RDMA rendezvous manager:** manages multiple rdma rendezvous. +* **RDMA rendezvous manager:** manages multiple rdma rendezvous. * **RDMA rendezvous:** a derived class of BaseRemoteRendezvous. This class is the back end for "send" and "recv" ops. When the sendrecv_op wants to send or receive a tensor, it calls the rendezvous' "send" and "recv" functions respectively. Rendezvous are identified by "step_id", a random number, so that tensors for different iterations don't get mixed up. ### The SEND operation diff --git a/tensorflow/core/common_runtime/accumulate_n_optimizer.cc b/tensorflow/core/common_runtime/accumulate_n_optimizer.cc index 81cd44870e..a1e3b21e4f 100644 --- a/tensorflow/core/common_runtime/accumulate_n_optimizer.cc +++ b/tensorflow/core/common_runtime/accumulate_n_optimizer.cc @@ -35,7 +35,7 @@ Tensor make_zeros(const DataType& dtype, const TensorShapeProto& shape) { // Replaces occurrences of the "AccumulateNV2" stub operator with a graph of // lower-level ops. The graph is equivalent (modulo certain corner cases) // to the semantics of the original accumulate_n() Python op in math_ops.py. -// Implementing the op with a rewrite allows this new variant of accumulate_n +// Implementing the op with a rewrite allows this new variant of accumulate_n // to be differentiable. // // The binary code that generates AccumulateNV2 stub ops is located in a diff --git a/tensorflow/core/framework/bfloat16.cc b/tensorflow/core/framework/bfloat16.cc index a5ac0e1a8d..0efe43fde2 100644 --- a/tensorflow/core/framework/bfloat16.cc +++ b/tensorflow/core/framework/bfloat16.cc @@ -21,13 +21,13 @@ void FloatToBFloat16(const float* src, bfloat16* dst, int64 size) { const uint16_t* p = reinterpret_cast(src); uint16_t* q = reinterpret_cast(dst); #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - for (; size != 0; p += 2, q++, size--) { - *q = p[0]; - } + for (; size != 0; p += 2, q++, size--) { + *q = p[0]; + } #else - for (; size != 0; p += 2, q++, size--) { - *q = p[1]; - } + for (; size != 0; p += 2, q++, size--) { + *q = p[1]; + } #endif } @@ -35,15 +35,15 @@ void BFloat16ToFloat(const bfloat16* src, float* dst, int64 size) { const uint16_t* p = reinterpret_cast(src); uint16_t* q = reinterpret_cast(dst); #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - for (; size != 0; p++, q += 2, size--) { - q[0] = *p; - q[1] = 0; + for (; size != 0; p++, q += 2, size--) { + q[0] = *p; + q[1] = 0; + } +#else + for (; size != 0; p++, q += 2, size--) { + q[0] = 0; + q[1] = *p; } -#else - for (; size != 0; p++, q += 2, size--) { - q[0] = 0; - q[1] = *p; - } #endif } diff --git a/tensorflow/core/framework/bfloat16.h b/tensorflow/core/framework/bfloat16.h index b936e899d4..968c18bdd2 100644 --- a/tensorflow/core/framework/bfloat16.h +++ b/tensorflow/core/framework/bfloat16.h @@ -19,9 +19,9 @@ limitations under the License. #include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/platform/types.h" -#if defined(PLATFORM_WINDOWS) -#include "tensorflow/core/platform/windows/cpu_info.h" -#endif +#if defined(PLATFORM_WINDOWS) +#include "tensorflow/core/platform/windows/cpu_info.h" +#endif // Compact 16-bit encoding of floating point numbers. This representation uses // 1 bit for the sign, 8 bits for the exponent and 7 bits for the mantissa. It diff --git a/tensorflow/core/kernels/cast_op.h b/tensorflow/core/kernels/cast_op.h index 7d3e0cbe3d..8fedf2c271 100644 --- a/tensorflow/core/kernels/cast_op.h +++ b/tensorflow/core/kernels/cast_op.h @@ -128,10 +128,10 @@ struct scalar_cast_op<::tensorflow::bfloat16, float> { float ret; uint16_t* p = reinterpret_cast(&ret); #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - p[0] = a.value; - p[1] = 0; -#else - static_assert(::tensorflow::port::kLittleEndian, "Not a little endian system!"); + p[0] = a.value; + p[1] = 0; +#else + static_assert(::tensorflow::port::kLittleEndian, "Not a little endian system!"); p[0] = 0; p[1] = a.value; #endif diff --git a/tensorflow/core/kernels/diag_op.cc b/tensorflow/core/kernels/diag_op.cc index be862b82f1..86fa7dce36 100644 --- a/tensorflow/core/kernels/diag_op.cc +++ b/tensorflow/core/kernels/diag_op.cc @@ -108,7 +108,7 @@ class DiagPartOp : public OpKernel { }; // Implementation of the functor specialization for CPU. -// +// // According to the diagonal definition, // `output[i1,..., ik, i1,..., ik] = input[i1,..., ik]`, // @@ -116,7 +116,7 @@ class DiagPartOp : public OpKernel { // pointer can be represent by coordinate [i1,..., ik], // where `index = i1*(s2*...*sk) + i2*(s3*...*sk) +... + ik` // -// Let new_index is the offset of output's pointer with coordinate +// Let new_index is the offset of output's pointer with coordinate // [i1,..., ik, i1,..., ik], then we have // `new_index = i1*(s2*...sk*s1*...*sk) + i2*(s3*...*sk*s1*...*sk) +... + \ // ik*(s1*...*sk) + i1*(s2*...*sk) + i2*(s3*...*sk) +... + ik diff --git a/tensorflow/core/kernels/diag_op_gpu.cu.cc b/tensorflow/core/kernels/diag_op_gpu.cu.cc index 684f00ea61..d3c529d784 100644 --- a/tensorflow/core/kernels/diag_op_gpu.cu.cc +++ b/tensorflow/core/kernels/diag_op_gpu.cu.cc @@ -33,7 +33,7 @@ __global__ void DiagCudaKernel(const int num_threads, const T* in, T* out) { CUDA_1D_KERNEL_LOOP(index, num_threads) { - // Fill the diagonal elements or set to zero in other place. + // Fill the diagonal elements or set to zero in other place. if (index % (1 + size) == 0) { out[index] = in[index / (1 + size)]; } else { diff --git a/tensorflow/core/kernels/queue_ops.cc b/tensorflow/core/kernels/queue_ops.cc index d51dc4ecb0..17831b7437 100644 --- a/tensorflow/core/kernels/queue_ops.cc +++ b/tensorflow/core/kernels/queue_ops.cc @@ -429,7 +429,7 @@ class QueueIsClosedOp : public QueueOpKernel { public: explicit QueueIsClosedOp(OpKernelConstruction* context) : QueueOpKernel(context) {} - + protected: void ComputeAsync(OpKernelContext* ctx, QueueInterface* queue, DoneCallback callback) override { diff --git a/tensorflow/core/kernels/sparse_matmul_op_test.cc b/tensorflow/core/kernels/sparse_matmul_op_test.cc index a0c54805e2..f815ca9e34 100644 --- a/tensorflow/core/kernels/sparse_matmul_op_test.cc +++ b/tensorflow/core/kernels/sparse_matmul_op_test.cc @@ -284,12 +284,12 @@ class SparseMatmulOpTest : public ::testing::Test { uint16_t* data3_bfloat16_p = reinterpret_cast(data3_bfloat16) + i; #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - data3_p[1] = 0; - data3_bfloat16_p[0] = data3_p[0]; + data3_p[1] = 0; + data3_bfloat16_p[0] = data3_p[0]; #else - data3_p[0] = 0; - data3_bfloat16_p[0] = data3_p[1]; -#endif + data3_p[0] = 0; + data3_bfloat16_p[0] = data3_p[1]; +#endif } } diff --git a/tensorflow/core/kernels/xsmm_conv2d_test.cc b/tensorflow/core/kernels/xsmm_conv2d_test.cc index 381ea39b77..e294701246 100644 --- a/tensorflow/core/kernels/xsmm_conv2d_test.cc +++ b/tensorflow/core/kernels/xsmm_conv2d_test.cc @@ -73,7 +73,7 @@ LIBXSMM_INLINE void naive_copy_KCRS_to_RSCK(const float* kcrs, Tensor &rsck, in LIBXSMM_VLA_DECL(4, const float, input, kcrs, C, R, S); int r, s, c, k; auto output = rsck.flat(); - + for ( r = 0; r < R; r++ ) { for ( s = 0; s < S; s++ ) { for ( c = 0; c < C; c++ ) { @@ -94,14 +94,14 @@ LIBXSMM_INLINE void zero_buf(float* buf, long size) { buf[i] = 0.0f; } } - + LIBXSMM_INLINE void copy_buf(Tensor &dst,float *src,long size) { long i; auto output = dst.flat(); - for (i = 0; i < size; ++i) + for (i = 0; i < size; ++i) output(i) = src[i]; } - + LIBXSMM_INLINE void init_buf(float* buf, long size, int initPos, int initOne) { int i; @@ -110,7 +110,7 @@ LIBXSMM_INLINE void init_buf(float* buf, long size, int initPos, int initOne) buf[i] = (float)((initOne != 0) ? 1.0 : ((initPos != 0) ? drand48() : (0.05 - drand48()/10.0))); } } - + LIBXSMM_INLINE void naive_conv_fp(naive_conv_t* param, const float* input, float* output, const float* filter) @@ -138,11 +138,11 @@ LIBXSMM_INLINE void naive_conv_fp(naive_conv_t* param, const float* input, float int stride_w = param->stride_w; /* loop counters */ int img, ofm, ifm, oj, oi, ij, ii, kj, ki; - + LIBXSMM_VLA_DECL(4, float, output_t, output + (pad_w_out * ofwp + pad_h_out), nOfm, ofhp, ofwp); LIBXSMM_VLA_DECL(4, const float, input_t, input + (pad_w_in * ifwp + pad_h_in), nIfm, ifhp, ifwp); LIBXSMM_VLA_DECL(4, const float, filter_t, filter, nIfm, kh, kw); - + for (img = 0; img < nImg; ++img) { for (ofm = 0; ofm < nOfm; ++ofm) { for (ifm = 0; ifm < nIfm; ++ifm) { @@ -172,7 +172,7 @@ void RunXsmmVsGeneric() {} class XsmmConv2DTest : public OpsTestBase { protected: void MakeOp(int stride) { - + TF_CHECK_OK(NodeDefBuilder("xsmm", "Conv2D") .Input(FakeInput(DT_FLOAT)) .Input(FakeInput(DT_FLOAT)) @@ -184,7 +184,7 @@ class XsmmConv2DTest : public OpsTestBase { TF_ASSERT_OK(InitOp()); } }; - + TEST_F(XsmmConv2DTest, Basic) { MakeOp(1); @@ -206,13 +206,13 @@ TEST_F(XsmmConv2DTest, Basic) { int stride_h = stride; int pad_h = pad; int pad_w = pad; - + int pad_h_in = pad_h; int pad_w_in = pad_w; - + int pad_h_out = 0; int pad_w_out = 0; - + /* deriving some values for naive code */ int ofh = (ifh + 2 * pad_h - kh) / stride_h + 1; int ofw = (ifw + 2 * pad_w - kw) / stride_w + 1; @@ -223,7 +223,7 @@ TEST_F(XsmmConv2DTest, Basic) { //Initialization of Filter and Image - + /* allocate data */ float *naive_input = (float*)libxsmm_aligned_scratch( nImg*nIfm*ifhp*ifwp*sizeof(float), 2097152); float *naive_output = (float*)libxsmm_aligned_scratch( nImg*nOfm*ofhp*ofwp*sizeof(float), 2097152); @@ -232,21 +232,21 @@ TEST_F(XsmmConv2DTest, Basic) { init_buf(naive_input, nImg*nIfm*ifhp*ifwp, 0, 0); zero_buf(naive_output, nImg*nOfm*ofhp*ofwp); init_buf(naive_filter, nOfm*nIfm*kh*kw, 0, 0); - + Tensor image(DT_FLOAT, {nImg, ifhp, ifwp, nIfm}); - - + + Tensor filter(DT_FLOAT, {kh,kw,nIfm,nOfm}); - + naive_copy_NCHW_to_NHWC(naive_input, image, nImg, ifhp, ifwp, nIfm); - naive_copy_KCRS_to_RSCK(naive_filter, filter, kh, kw, nIfm, nOfm); + naive_copy_KCRS_to_RSCK(naive_filter, filter, kh, kw, nIfm, nOfm); //Run naive convolution - + naive_conv_t naive_param; naive_param.nImg = nImg; @@ -274,8 +274,8 @@ TEST_F(XsmmConv2DTest, Basic) { naive_conv_fp(&naive_param, naive_input, naive_output, naive_filter); - - + + AddInputFromArray(image.shape(), image.flat()); AddInputFromArray(filter.shape(), filter.flat()); @@ -283,7 +283,7 @@ TEST_F(XsmmConv2DTest, Basic) { //Run Op (TF) TF_ASSERT_OK(RunOpKernel()); - + // Check the output. Tensor expected(DT_FLOAT, {nImg,ofhp,ofwp, nOfm}); naive_copy_NCHW_to_NHWC(naive_output, expected, nImg, ofhp, ofwp, nOfm); @@ -329,15 +329,15 @@ TEST(XsmmConv2DTest, Basic) { desc.fuse_ops = LIBXSMM_DNN_CONV_FUSE_NONE; desc.options = LIBXSMM_DNN_CONV_OPTION_NONE; desc.datatype = LIBXSMM_DNN_DATATYPE_F32; - + if (!CanUseXsmmConv2D(desc, data_format)) { return false; } - + auto input_ptr = input.template flat().data(); auto filter_ptr = filter.template flat().data(); auto output_ptr = output->template flat().data(); - + bool success = functor::XsmmFwdConv2D()( ctx, desc, input_ptr, filter_ptr, output_ptr); return success; diff --git a/tensorflow/core/ops/image_ops.cc b/tensorflow/core/ops/image_ops.cc index c3f8006415..13fbd2fa51 100644 --- a/tensorflow/core/ops/image_ops.cc +++ b/tensorflow/core/ops/image_ops.cc @@ -818,8 +818,8 @@ bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and height of the underlying image. -For example, if an image is 100 x 200 pixels (height x width) and the bounding -box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of +For example, if an image is 100 x 200 pixels (height x width) and the bounding +box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of the bounding box will be `(40, 10)` to `(100, 50)` (in (x,y) coordinates). Parts of the bounding box may fall outside the image. diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index a242a13878..654e890b57 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -359,7 +359,7 @@ The size of 1D Tensors matches the dimension C of the 4D Tensors. y_backprop: A 4D Tensor for the gradient with respect to y. x: A 4D Tensor for input data. scale: A 1D Tensor for scaling factor, to scale the normalized x. -reserve_space_1: When is_training is True, a 1D Tensor for the computed batch +reserve_space_1: When is_training is True, a 1D Tensor for the computed batch mean to be reused in gradient computation. When is_training is False, a 1D Tensor for the population mean to be reused in both 1st and 2nd order gradient computation. @@ -407,7 +407,7 @@ The size of 1D Tensors matches the dimension C of the 4D Tensors. y_backprop: A 4D Tensor for the gradient with respect to y. x: A 4D Tensor for input data. scale: A 1D Tensor for scaling factor, to scale the normalized x. -reserve_space_1: When is_training is True, a 1D Tensor for the computed batch +reserve_space_1: When is_training is True, a 1D Tensor for the computed batch mean to be reused in gradient computation. When is_training is False, a 1D Tensor for the population mean to be reused in both 1st and 2nd order gradient computation. diff --git a/tensorflow/core/platform/default/build_config_root.bzl b/tensorflow/core/platform/default/build_config_root.bzl index caeed0aa4a..c63fb28ff9 100644 --- a/tensorflow/core/platform/default/build_config_root.bzl +++ b/tensorflow/core/platform/default/build_config_root.bzl @@ -28,7 +28,7 @@ def tf_additional_verbs_deps(): "//tensorflow:with_verbs_support": [ "//tensorflow/contrib/verbs:verbs_server_lib", "//tensorflow/contrib/verbs:grpc_verbs_client", - ], + ], "//conditions:default": [], }) diff --git a/tensorflow/core/profiler/README.md b/tensorflow/core/profiler/README.md index 8ca26fa5dc..9e628b1065 100644 --- a/tensorflow/core/profiler/README.md +++ b/tensorflow/core/profiler/README.md @@ -48,7 +48,7 @@ bazel-bin/tensorflow/python/profiler/profiler_ui \ # Create options to profile the time and memory information. builder = tf.profiler.ProfileOptionBuilder opts = builder(builder.time_and_memory()).order_by('micros').build() -# Create a profiling context, set constructor argument `trace_steps`, +# Create a profiling context, set constructor argument `trace_steps`, # `dump_steps` to empty for explicit control. with tf.contrib.tfprof.ProfileContext('/tmp/train_dir', trace_steps=[], diff --git a/tensorflow/docs_src/about/uses.md b/tensorflow/docs_src/about/uses.md index d41818e10c..8818177a28 100644 --- a/tensorflow/docs_src/about/uses.md +++ b/tensorflow/docs_src/about/uses.md @@ -5,7 +5,7 @@ This page highlights TensorFlow models in real world use. ## Model zoo -Please visit our collection of TensorFlow models in the +Please visit our collection of TensorFlow models in the [TensorFlow Zoo](https://github.com/tensorflow/models). If you have built a model with TensorFlow, please consider publishing it in diff --git a/tensorflow/docs_src/api_guides/python/nn.md b/tensorflow/docs_src/api_guides/python/nn.md index 75dbb04e7d..eb3b251099 100644 --- a/tensorflow/docs_src/api_guides/python/nn.md +++ b/tensorflow/docs_src/api_guides/python/nn.md @@ -73,7 +73,7 @@ The total padding applied along the height and width is computed as: pad_along_width = max(filter_width - strides[2], 0) else: pad_along_width = max(filter_width - (in_width % strides[2]), 0) - + Finally, the padding on the top, bottom, left and right are: pad_top = pad_along_height // 2 @@ -351,7 +351,7 @@ p_i = max(s\cdot (n_o - 1) + k - n_i, 0) \end{equation} Remember that, for `'SAME'` padding, -\\(n_o = \left \lceil{\frac{n_i}{s}}\right \rceil\\), as mentioned above. +\\(n_o = \left \lceil{\frac{n_i}{s}}\right \rceil\\), as mentioned above. We need to analyze in detail two cases: - \\(n_i \text{ mod } s = 0\\) diff --git a/tensorflow/docs_src/community/documentation.md b/tensorflow/docs_src/community/documentation.md index 77d4e0caec..003e0a25ec 100644 --- a/tensorflow/docs_src/community/documentation.md +++ b/tensorflow/docs_src/community/documentation.md @@ -10,10 +10,10 @@ particular, this document explains the following: You can view TensorFlow documentation on https://www.tensorflow.org, and you can view and edit the raw files on -[GitHub](https://www.tensorflow.org/code/tensorflow/docs_src/). +[GitHub](https://www.tensorflow.org/code/tensorflow/docs_src/). We're publishing our docs on GitHub so everybody can contribute. Whatever gets checked in to `tensorflow/docs_src` will be published soon after on -https://www.tensorflow.org. +https://www.tensorflow.org. Republishing TensorFlow documentation in different forms is absolutely allowed, but we are unlikely to accept other documentation formats (or the tooling to @@ -237,7 +237,7 @@ If a module is accidentally imported, it typically breaks the doc generator even if the doc generator succeeds, unwanted symbols may show up in the docs. Check the generated docs to make sure that all symbols that are documented are expected. If there are symbols that shouldn’t be there, you have the -following options for dealing with them: +following options for dealing with them: - Private symbols and imports - The `remove_undocumented` filter diff --git a/tensorflow/docs_src/community/style_guide.md b/tensorflow/docs_src/community/style_guide.md index 40a75a4736..a4c4e2674e 100644 --- a/tensorflow/docs_src/community/style_guide.md +++ b/tensorflow/docs_src/community/style_guide.md @@ -162,7 +162,7 @@ operation. it's present in the scope. * Layers that behave differently during training should take: - - `is_training`: `bool` indicator to conditionally choose different + - `is_training`: `bool` indicator to conditionally choose different computation paths (e.g. using `tf.cond`) during execution. Example: diff --git a/tensorflow/docs_src/community/welcome.md b/tensorflow/docs_src/community/welcome.md index 33740de5d5..a3abf25507 100644 --- a/tensorflow/docs_src/community/welcome.md +++ b/tensorflow/docs_src/community/welcome.md @@ -65,5 +65,5 @@ please read the following list carefully: [TensorFlow issues tracker](https://github.com/tensorflow/tensorflow/issues) on GitHub. For example, use the issue tracker to request a new operation in TensorFlow. - + diff --git a/tensorflow/docs_src/deploy/hadoop.md b/tensorflow/docs_src/deploy/hadoop.md index 7592cf828b..c4471562b9 100644 --- a/tensorflow/docs_src/deploy/hadoop.md +++ b/tensorflow/docs_src/deploy/hadoop.md @@ -32,8 +32,8 @@ be set: source ${HADOOP_HOME}/libexec/hadoop-config.sh ``` -* **LD_LIBRARY_PATH**: To include the path to libjvm.so, and optionally the path - to libhdfs.so if your Hadoop distribution does not install libhdfs.so in +* **LD_LIBRARY_PATH**: To include the path to libjvm.so, and optionally the path + to libhdfs.so if your Hadoop distribution does not install libhdfs.so in `$HADOOP_HDFS_HOME/lib/native`. On Linux: ```shell diff --git a/tensorflow/docs_src/extend/add_filesys.md b/tensorflow/docs_src/extend/add_filesys.md index ea3a6fe53a..44ba198998 100644 --- a/tensorflow/docs_src/extend/add_filesys.md +++ b/tensorflow/docs_src/extend/add_filesys.md @@ -32,7 +32,7 @@ Note that TensorFlow already includes many filesystem implementations, such as: Note: NFS filesystems often mount as a POSIX interface, and so standard TensorFlow can work on top of NFS-mounted remote filesystems. - + * HDFS - the Hadoop File System * GCS - Google Cloud Storage filesystem * A "memory-mapped-file" filesystem diff --git a/tensorflow/docs_src/extend/index.md b/tensorflow/docs_src/extend/index.md index 3f30b9a8c2..00b168c6be 100644 --- a/tensorflow/docs_src/extend/index.md +++ b/tensorflow/docs_src/extend/index.md @@ -20,7 +20,7 @@ TensorFlow: Python is currently the only language supported by TensorFlow's API stability promises. However, TensorFlow also provides functionality in C++, Java, and Go, -plus community support for [Haskell](https://github.com/tensorflow/haskell) and +plus community support for [Haskell](https://github.com/tensorflow/haskell) and [Rust](https://github.com/tensorflow/rust). If you'd like to create or develop TensorFlow features in a language other than these languages, read the following guide: diff --git a/tensorflow/docs_src/get_started/get_started.md b/tensorflow/docs_src/get_started/get_started.md index be14ab4026..231108215a 100644 --- a/tensorflow/docs_src/get_started/get_started.md +++ b/tensorflow/docs_src/get_started/get_started.md @@ -330,8 +330,8 @@ When run, it produces W: [-0.9999969] b: [ 0.99999082] loss: 5.69997e-11 ``` -Notice that the loss is a very small number (very close to zero). If you run -this program, your loss may not be exactly the same as the aforementioned loss +Notice that the loss is a very small number (very close to zero). If you run +this program, your loss may not be exactly the same as the aforementioned loss because the model is initialized with pseudorandom values. This more complicated program can still be visualized in TensorBoard diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index f7380bac8a..28b04bab95 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -51,15 +51,15 @@ must be installed on your system:
         $ sudo apt-get install cuda-command-line-tools
         
    - + and add its path to your `LD_LIBRARY_PATH` environment variable: -
     
    -    $ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64 
    +    
    +    $ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64
         
    For CUDA Toolkit <= 7.5 do: - +
         $ sudo apt-get install libcupti-dev
         
    diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index aa4ae6c876..dbc90e8112 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -143,7 +143,7 @@ The following NVIDIA software must be installed on your system: particularly the description of appending the appropriate pathname to your `LD_LIBRARY_PATH` environment variable. -Finally, you must also install `libcupti` which for Cuda Toolkit >= 8.0 you do via +Finally, you must also install `libcupti` which for Cuda Toolkit >= 8.0 you do via
     $ sudo apt-get install cuda-command-line-tools 
    diff --git a/tensorflow/docs_src/mobile/android_build.md b/tensorflow/docs_src/mobile/android_build.md index 030cd0d051..b5a1d5d7d1 100644 --- a/tensorflow/docs_src/mobile/android_build.md +++ b/tensorflow/docs_src/mobile/android_build.md @@ -66,7 +66,7 @@ them. ## Adding TensorFlow to your apps using Android Studio -To add TensorFlow to your own apps on Android, the simplest way is to add the +To add TensorFlow to your own apps on Android, the simplest way is to add the following lines to your Gradle build file: allprojects { @@ -74,7 +74,7 @@ following lines to your Gradle build file: jcenter() } } - + dependencies { compile 'org.tensorflow:tensorflow-android:+' } diff --git a/tensorflow/docs_src/mobile/index.md b/tensorflow/docs_src/mobile/index.md index 6bcd7d09d9..419ae7094a 100644 --- a/tensorflow/docs_src/mobile/index.md +++ b/tensorflow/docs_src/mobile/index.md @@ -2,7 +2,7 @@ TensorFlow was designed to be a good deep learning solution for mobile platforms. Currently we have two solutions for deploying machine learning -applications on mobile and embedded devices: +applications on mobile and embedded devices: @{$mobile/mobile_intro$TensorFlow for Mobile} and @{$mobile/tflite$TensorFlow Lite}. ## TensorFlow Lite versus TensorFlow Mobile diff --git a/tensorflow/docs_src/mobile/ios_build.md b/tensorflow/docs_src/mobile/ios_build.md index 2e6d3bf90e..a04655052f 100644 --- a/tensorflow/docs_src/mobile/ios_build.md +++ b/tensorflow/docs_src/mobile/ios_build.md @@ -24,7 +24,7 @@ If you'd like to add TensorFlow capabilities to your own app, do the following: - Open `YourProjectName.xcworkspace` and add your code. -- In your app's **Build Settings**, make sure to add `$(inherited)` to the +- In your app's **Build Settings**, make sure to add `$(inherited)` to the **Other Linker Flags**, and **Header Search Paths** sections. ## Running the Samples diff --git a/tensorflow/docs_src/mobile/optimizing.md b/tensorflow/docs_src/mobile/optimizing.md index 1da8be5689..d9e8875c38 100644 --- a/tensorflow/docs_src/mobile/optimizing.md +++ b/tensorflow/docs_src/mobile/optimizing.md @@ -57,7 +57,7 @@ get one inference every two seconds. Having this estimate helps you plan for what you’ll be able to realistically achieve on a device. If the model is using too many ops, then there are a lot of -opportunities to optimize the architecture to reduce that number. +opportunities to optimize the architecture to reduce that number. Advanced techniques include [SqueezeNet](https://arxiv.org/abs/1602.07360) and [MobileNet](https://arxiv.org/abs/1704.04861), which are architectures @@ -278,7 +278,7 @@ The run above was on your desktop, but the tool also works on Android, which is where it’s most useful for mobile development. Here’s an example command line to run it on a 64-bit ARM device: - bazel build -c opt --config=android_arm64 \ + bazel build -c opt --config=android_arm64 \ tensorflow/tools/benchmark:benchmark_model adb push bazel-bin/tensorflow/tools/benchmark/benchmark_model /data/local/tmp adb push /tmp/tensorflow_inception_graph.pb /data/local/tmp/ diff --git a/tensorflow/docs_src/mobile/prepare_models.md b/tensorflow/docs_src/mobile/prepare_models.md index 8fc65be35a..360ee302aa 100644 --- a/tensorflow/docs_src/mobile/prepare_models.md +++ b/tensorflow/docs_src/mobile/prepare_models.md @@ -131,9 +131,9 @@ needs to understand which parts of the graph are actually needed, and which are artifacts of the training process, like summarization ops. Only ops that contribute to calculating the given output nodes will be kept. If you know how your graph is going to be used, these should just be the names of the nodes you -pass into `Session::Run()` as your fetch targets. The easiest way to find the +pass into `Session::Run()` as your fetch targets. The easiest way to find the node names is to inspect the Node objects while building your graph in python. -Inspecting your graph in TensorBoard is another simple way. You can get some +Inspecting your graph in TensorBoard is another simple way. You can get some suggestions on likely outputs by running the [`summarize_graph` tool](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/graph_transforms/README.md#inspecting-graphs). Because the output format for TensorFlow has changed over time, there are a @@ -164,7 +164,7 @@ The trickiest part of this process is figuring out the names of the nodes you want to use as inputs and outputs during inference. You'll need these anyway once you start to run inference, but you also need them here so that the transform can calculate which nodes are not needed on the inference-only -path. These may not be obvious from the training code. The easiest way to +path. These may not be obvious from the training code. The easiest way to determine the node name is to explore the graph with TensorBoard. Remember that mobile applications typically gather their data from sensors and @@ -187,9 +187,9 @@ output nodes. If you’ve just been given a frozen `GraphDef` file, and are not sure about the contents, try using the `summarize_graph` tool to print out information about the inputs and outputs it finds from the graph structure. Here’s an -example with the original Inception v3 file: +example with the original Inception v3 file: - bazel run tensorflow/tools/graph_transforms:summarize_graph -- + bazel run tensorflow/tools/graph_transforms:summarize_graph -- --in_graph=tensorflow_inception_graph.pb Once you have an idea of what the input and output nodes are, you can feed them @@ -259,7 +259,7 @@ on how to do this, and also see @{$mobile/optimizing#binary_size$Optimizing} for more on reducing your binary size. ### Locate the implementation - + Operations are broken into two parts. The first is the op definition, which declares the signature of the operation, which inputs, outputs, and attributes it has. These take up very little space, and so all are included by default. The @@ -267,7 +267,7 @@ implementations of the op computations are done in kernels, which live in the `tensorflow/core/kernels` folder. You need to compile the C++ file containing the kernel implementation of the op you need into the library. To figure out which file that is, you can search for the operation name in the source -files. +files. [Here’s an example search in github](https://github.com/search?utf8=%E2%9C%93&q=repo%3Atensorflow%2Ftensorflow+extension%3Acc+path%3Atensorflow%2Fcore%2Fkernels+REGISTER+Mul&type=Code&ref=searchresults). diff --git a/tensorflow/docs_src/mobile/tflite/index.md b/tensorflow/docs_src/mobile/tflite/index.md index 59daa2fe25..49d93669a2 100644 --- a/tensorflow/docs_src/mobile/tflite/index.md +++ b/tensorflow/docs_src/mobile/tflite/index.md @@ -40,7 +40,7 @@ TensorFlow Lite provides an interface to leverage hardware acceleration, if available on the device. It does so via the Android Neural Networks library, released as part of Android O-MR1. -## Why do we need a new mobile-specific library? +## Why do we need a new mobile-specific library? Machine Learning is changing the computing paradigm, and we see an emerging trend of new use cases on mobile and embedded devices. Consumer expectations are @@ -67,7 +67,7 @@ There are several factors which are fueling interest in this domain: connected to a network. We believe the next wave of machine learning applications will have significant -processing on mobile and embedded devices. +processing on mobile and embedded devices. ## TensorFlow Lite developer preview highlights diff --git a/tensorflow/docs_src/programmers_guide/saved_model.md b/tensorflow/docs_src/programmers_guide/saved_model.md index 8731cae0d7..34e8e5faf5 100644 --- a/tensorflow/docs_src/programmers_guide/saved_model.md +++ b/tensorflow/docs_src/programmers_guide/saved_model.md @@ -160,7 +160,7 @@ Notes: ### Inspect variables in a checkpoint -We can quickly inspect variables in a checkpoint with the +We can quickly inspect variables in a checkpoint with the [`inspect_checkpoint`](https://www.tensorflow.org/code/tensorflow/python/tools/inspect_checkpoint.py) library. Continuing from the save/restore examples shown earlier: diff --git a/tensorflow/docs_src/programmers_guide/tensors.md b/tensorflow/docs_src/programmers_guide/tensors.md index 88eb277e35..47d4db2a56 100644 --- a/tensorflow/docs_src/programmers_guide/tensors.md +++ b/tensorflow/docs_src/programmers_guide/tensors.md @@ -43,8 +43,8 @@ generating a random number. The **rank** of a `tf.Tensor` object is its number of dimensions. Synonyms for rank include **order** or **degree** or **n-dimension**. -Note that rank in TensorFlow is not the same as matrix rank in mathematics. -As the following table shows, each rank in TensorFlow corresponds to a +Note that rank in TensorFlow is not the same as matrix rank in mathematics. +As the following table shows, each rank in TensorFlow corresponds to a different mathematical entity: Rank | Math entity @@ -56,7 +56,7 @@ Rank | Math entity n | n-Tensor (you get the idea) -### Rank 0 +### Rank 0 The following snippet demonstrates creating a few rank 0 variables: @@ -108,7 +108,7 @@ my_image = tf.zeros([10, 299, 299, 3]) # batch x height x width x color ### Getting a `tf.Tensor` object's rank To determine the rank of a `tf.Tensor` object, call the `tf.rank` method. -For example, the following method programmatically determines the rank +For example, the following method programmatically determines the rank of the `tf.Tensor` defined in the previous section: ```python diff --git a/tensorflow/docs_src/programmers_guide/variables.md b/tensorflow/docs_src/programmers_guide/variables.md index f310b89380..16753c931f 100644 --- a/tensorflow/docs_src/programmers_guide/variables.md +++ b/tensorflow/docs_src/programmers_guide/variables.md @@ -37,7 +37,7 @@ You may optionally specify the `dtype` and initializer to `tf.get_variable`. For example: ``` python -my_int_variable = tf.get_variable("my_int_variable", [1, 2, 3], dtype=tf.int32, +my_int_variable = tf.get_variable("my_int_variable", [1, 2, 3], dtype=tf.int32, initializer=tf.zeros_initializer) ``` @@ -45,7 +45,7 @@ TensorFlow provides many convenient initializers. Alternatively, you may initialize a `tf.Variable` to have the value of a `tf.Tensor`. For example: ``` python -other_variable = tf.get_variable("other_variable", dtype=tf.int32, +other_variable = tf.get_variable("other_variable", dtype=tf.int32, initializer=tf.constant([23, 42])) ``` @@ -66,13 +66,13 @@ By default every `tf.Variable` gets placed in the following two collections: multiple devices, * `tf.GraphKeys.TRAINABLE_VARIABLES`--- variables for which TensorFlow will calculate gradients. - + If you don't want a variable to be trainable, add it to the `tf.GraphKeys.LOCAL_VARIABLES` collection instead. For example, the following snippet demonstrates how to add a variable named `my_local` to this collection: ``` python -my_local = tf.get_variable("my_local", shape=(), +my_local = tf.get_variable("my_local", shape=(), collections=[tf.GraphKeys.LOCAL_VARIABLES]) ``` @@ -80,8 +80,8 @@ Alternatively, you can specify `trainable=False` as an argument to `tf.get_variable`: ``` python -my_non_trainable = tf.get_variable("my_non_trainable", - shape=(), +my_non_trainable = tf.get_variable("my_non_trainable", + shape=(), trainable=False) ``` @@ -126,7 +126,7 @@ cluster_spec = { "ps": ["ps0:2222", "ps1:2222"], "worker": ["worker0:2222", "worker1:2222", "worker2:2222"]} with tf.device(tf.train.replica_device_setter(cluster=cluster_spec)): - v = tf.get_variable("v", shape=[20, 20]) # this variable is placed + v = tf.get_variable("v", shape=[20, 20]) # this variable is placed # in the parameter server # by the replica_device_setter ``` @@ -142,7 +142,7 @@ high-level frameworks such as `tf.contrib.slim`, `tf.estimator.Estimator` and Explicit initialization is otherwise useful because it allows you not to rerun potentially expensive initializers when reloading a model from a checkpoint as well as allowing determinism when randomly-initialized variables are shared in a -distributed setting. +distributed setting. To initialize all trainable variables in one go, before training starts, call `tf.global_variables_initializer()`. This function returns a single operation diff --git a/tensorflow/docs_src/tutorials/image_recognition.md b/tensorflow/docs_src/tutorials/image_recognition.md index ddb771700a..df13eabead 100644 --- a/tensorflow/docs_src/tutorials/image_recognition.md +++ b/tensorflow/docs_src/tutorials/image_recognition.md @@ -42,7 +42,7 @@ For example, here are the results from [AlexNet] classifying some images: To compare models, we examine how often the model fails to predict the correct answer as one of their top 5 guesses -- termed "top-5 error rate". [AlexNet] achieved by setting a top-5 error rate of 15.3% on the 2012 -validation data set; [Inception (GoogLeNet)] achieved 6.67%; +validation data set; [Inception (GoogLeNet)] achieved 6.67%; [BN-Inception-v2] achieved 4.9%; [Inception-v3] reaches 3.46%. > How well do humans do on ImageNet Challenge? There's a [blog post] by diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/Classifier.java b/tensorflow/examples/android/src/org/tensorflow/demo/Classifier.java index eabc724f7f..07995febaf 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/Classifier.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/Classifier.java @@ -100,7 +100,7 @@ public interface Classifier { List recognizeImage(Bitmap bitmap); void enableStatLogging(final boolean debug); - + String getStatString(); void close(); diff --git a/tensorflow/examples/ios/README.md b/tensorflow/examples/ios/README.md index 7d2eb870be..5bdaeb43ce 100644 --- a/tensorflow/examples/ios/README.md +++ b/tensorflow/examples/ios/README.md @@ -6,7 +6,7 @@ This folder contains examples of how to build applications for iOS devices using - You'll need Xcode 7.3 or later. - There are currently three examples: simple, benchmark, and camera. For now, - you can download the sample code by cloning the main tensorflow repository + you can download the sample code by cloning the main tensorflow repository (we are planning to make the samples available as a separate repository later). @@ -48,8 +48,8 @@ open tf_simple_example.xcworkspace # obs, not the .xcodeproj directory ### Troubleshooting - Make sure you use the TensorFlow-experimental pod (and not TensorFlow). - - - The TensorFlow-experimental pod is current about ~450MB. The reason it is + + - The TensorFlow-experimental pod is current about ~450MB. The reason it is so big is because we are bundling multiple platforms, and the pod includes all TensorFlow functionality (e.g. operations). The final app size after build is substantially smaller though (~25MB). Working with the complete @@ -91,7 +91,7 @@ target 'YourProjectName' open up the Xcode project in the `camera` subfolder. Once you build and run that, you should get a live camera view that you can point at objects to get real-time recognition results. - + ### Troubleshooting If you're hitting problems, here's a checklist of common things to investigate: diff --git a/tensorflow/examples/tutorials/deepdream/README.md b/tensorflow/examples/tutorials/deepdream/README.md index 3a715f6224..403e4b34f9 100644 --- a/tensorflow/examples/tutorials/deepdream/README.md +++ b/tensorflow/examples/tutorials/deepdream/README.md @@ -2,7 +2,7 @@ by [Alexander Mordvintsev](mailto:moralex@google.com) -This directory contains Jupyter notebook that demonstrates a number of Convolutional Neural Network +This directory contains Jupyter notebook that demonstrates a number of Convolutional Neural Network image generation techniques implemented with TensorFlow: - visualizing individual feature channels and their combinations to explore the space of patterns learned by the neural network (see [GoogLeNet](http://storage.googleapis.com/deepdream/visualz/tensorflow_inception/index.html) and [VGG16](http://storage.googleapis.com/deepdream/visualz/vgg16/index.html) galleries) @@ -11,8 +11,8 @@ image generation techniques implemented with TensorFlow: - using Laplacian Pyramid Gradient Normalization to produce smooth and colorful visuals at low cost - generating DeepDream-like images with TensorFlow -You can view "deepdream.ipynb" directly on GitHub. Note that GitHub Jupyter notebook preview removes -embedded graph visualizations. You can still see them online +You can view "deepdream.ipynb" directly on GitHub. Note that GitHub Jupyter notebook preview removes +embedded graph visualizations. You can still see them online [using nbviewer](http://nbviewer.jupyter.org/github/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/deepdream/deepdream.ipynb) service. @@ -23,5 +23,5 @@ In order to run the notebook locally, the following dependencies must be install - NumPy - Jupyter Notebook -To open the notebook, run `ipython notebook` command in this directory, and +To open the notebook, run `ipython notebook` command in this directory, and select 'deepdream.ipynb' in the opened browser window. diff --git a/tensorflow/examples/udacity/README.md b/tensorflow/examples/udacity/README.md index 6faad294c2..f80c56d1c1 100644 --- a/tensorflow/examples/udacity/README.md +++ b/tensorflow/examples/udacity/README.md @@ -43,15 +43,15 @@ In addition, you may need to pass `--memory=8g` as an extra argument to `docker-machine` is a tool to provision and manage docker hosts, it supports multiple platform (ex. aws, gce, azure, virtualbox, ...). To create a new virtual machine locally with built-in docker engine, you can use docker-machine create -d virtualbox --virtualbox-memory 8196 tensorflow - + `-d` means the driver for the cloud platform, supported drivers listed [here](https://docs.docker.com/machine/drivers/). Here we use virtualbox to create a new virtual machine locally. `tensorflow` means the name of the virtual machine, feel free to use whatever you like. You can use docker-machine ip tensorflow - + to get the ip of the new virtual machine. To switch from default virtual machine to a new one (here we use tensorflow), type eval $(docker-machine env tensorflow) - + Note that `docker-machine env tensorflow` outputs some environment variables such like `DOCKER_HOST`. Then your docker client is now connected to the docker host in virtual machine `tensorflow` * **I'm getting a TLS connection error.** diff --git a/tensorflow/g3doc/README.txt b/tensorflow/g3doc/README.txt index 6eaf1e1bda..ed648f8b6b 100644 --- a/tensorflow/g3doc/README.txt +++ b/tensorflow/g3doc/README.txt @@ -7,7 +7,7 @@ Documentation (on Github, tensorflow.org, and anywhere else we decide to serve it from) is now generated from the files in tensorflow/docs_src/ (for tutorials and other guides) and TensorFlow source code (for the API reference pages). If you see a problem with -API reference, edit the code comments in the appropriate language. If you see a +API reference, edit the code comments in the appropriate language. If you see a problem with our other docs, edit the files in docs_src. To preview the results of your changes, or generate an offline copy of diff --git a/tensorflow/java/src/gen/perl/tftypes-runall.pl b/tensorflow/java/src/gen/perl/tftypes-runall.pl index a451ce92aa..65fe3b1506 100644 --- a/tensorflow/java/src/gen/perl/tftypes-runall.pl +++ b/tensorflow/java/src/gen/perl/tftypes-runall.pl @@ -1,13 +1,13 @@ #!/usr/bin/perl # # Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tensorflow/java/src/gen/perl/tftypes.pl b/tensorflow/java/src/gen/perl/tftypes.pl index 115723ac8a..c7c62e916f 100644 --- a/tensorflow/java/src/gen/perl/tftypes.pl +++ b/tensorflow/java/src/gen/perl/tftypes.pl @@ -1,13 +1,13 @@ #!/usr/bin/perl # # Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -156,7 +156,7 @@ for (my $i = 1; $i <= $#info; $i++) { ." * String elements are sequences of bytes from the last array dimension.\n"; } - + my $intro = ($trank > 0) ? "Creates a rank-$trank tensor of {\@code $jtype} elements." : "Creates a scalar tensor containing a single {\@code $jtype} element."; diff --git a/tensorflow/java/src/gen/resources/Tensors.java.tmpl b/tensorflow/java/src/gen/resources/Tensors.java.tmpl index 98e1588559..e615524c8e 100644 --- a/tensorflow/java/src/gen/resources/Tensors.java.tmpl +++ b/tensorflow/java/src/gen/resources/Tensors.java.tmpl @@ -11,7 +11,7 @@ public final class Tensors { private Tensors() {} /** Creates a scalar String tensor using the default, UTF-8 encoding. - * + * * @param data The string to put into the new scalar tensor. */ public static Tensor create(String data) { @@ -19,7 +19,7 @@ public final class Tensors { } /** Creates a scalar String tensor using a specified encoding. - * + * * @param charset The encoding from String to bytes. * @param data The string to put into the new scalar tensor. */ diff --git a/tensorflow/python/grappler/model_analyzer.i b/tensorflow/python/grappler/model_analyzer.i index d74bd37c63..726143a0bb 100644 --- a/tensorflow/python/grappler/model_analyzer.i +++ b/tensorflow/python/grappler/model_analyzer.i @@ -48,7 +48,7 @@ string GenerateModelReport(const tensorflow::MetaGraphDef& metagraph) { if (!item) { return "Error: failed to preprocess metagraph: check your log file for errors"; } - + string suffix; tensorflow::grappler::ModelAnalyzer analyzer(*item); diff --git a/tensorflow/stream_executor/cuda/cuda_platform.cc b/tensorflow/stream_executor/cuda/cuda_platform.cc index 874ac1ab65..3a73846148 100644 --- a/tensorflow/stream_executor/cuda/cuda_platform.cc +++ b/tensorflow/stream_executor/cuda/cuda_platform.cc @@ -197,7 +197,7 @@ void CudaPlatform::UnregisterTraceListener(TraceListener* listener) { static void InitializeCudaPlatform() { // Disabling leak checking, MultiPlatformManager does not destroy its // registered platforms. - + std::unique_ptr platform(new cuda::CudaPlatform); SE_CHECK_OK(MultiPlatformManager::RegisterPlatform(std::move(platform))); } diff --git a/tensorflow/stream_executor/lib/static_threadlocal.h b/tensorflow/stream_executor/lib/static_threadlocal.h index 6e2bd0d455..02720cbd26 100644 --- a/tensorflow/stream_executor/lib/static_threadlocal.h +++ b/tensorflow/stream_executor/lib/static_threadlocal.h @@ -17,7 +17,7 @@ limitations under the License. #define TENSORFLOW_STREAM_EXECUTOR_LIB_STATIC_THREADLOCAL_H_ #ifdef _MSC_VER -#define __thread __declspec(thread) +#define __thread __declspec(thread) #endif // For POD types in TLS mode, s_obj_VAR is the thread-local variable. diff --git a/tensorflow/tools/ci_build/README.md b/tensorflow/tools/ci_build/README.md index 202fcb9101..f2161b700a 100644 --- a/tensorflow/tools/ci_build/README.md +++ b/tensorflow/tools/ci_build/README.md @@ -67,10 +67,10 @@ this UI, to see the logs for a failed build: the build tool divided the target into multiple shards or ran the test multiple times. Each test log is specific to the shard, run, and attempt. To see a specific log: - + 1. Click on the log icon that is on the right next to the shard, run, and attempt number. - + 2. In the grid that appears on the right, click on the specific shard, run, and attempt to view its log. You can also type the desired shard, run, or attempt number in the field above its grid. diff --git a/tensorflow/tools/dist_test/scripts/dist_mnist_test.sh b/tensorflow/tools/dist_test/scripts/dist_mnist_test.sh index ea4906588d..e703e78531 100755 --- a/tensorflow/tools/dist_test/scripts/dist_mnist_test.sh +++ b/tensorflow/tools/dist_test/scripts/dist_mnist_test.sh @@ -43,7 +43,7 @@ # NOTES: # If you have the error "$'\r': command not found" # Please run the command below to remove trailing '\r' character that causes the error: -# sed -i 's/\r$//' dist_mnist_test.sh +# sed -i 's/\r$//' dist_mnist_test.sh # Configurations diff --git a/tensorflow/tools/docker/README.md b/tensorflow/tools/docker/README.md index e35c58ff80..f46c56e11a 100644 --- a/tensorflow/tools/docker/README.md +++ b/tensorflow/tools/docker/README.md @@ -41,7 +41,7 @@ Note: If you would have a problem running nvidia-docker you may try the old meth we have used. But it is not recommended. If you find a bug in nvidia-docker, please report it there and try using nvidia-docker as described above. - $ # The old, not recommended way to run docker with gpu support: + $ # The old, not recommended way to run docker with gpu support: $ export CUDA_SO=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}') $ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') $ docker run -it -p 8888:8888 $CUDA_SO $DEVICES gcr.io/tensorflow/tensorflow:latest-gpu diff --git a/tensorflow/tools/graph_transforms/README.md b/tensorflow/tools/graph_transforms/README.md index c7f7eca257..345d9eadb8 100644 --- a/tensorflow/tools/graph_transforms/README.md +++ b/tensorflow/tools/graph_transforms/README.md @@ -95,9 +95,9 @@ transforms to modify the graph with. The transforms are given as a list of names, and can each have arguments themselves. These transforms define the pipeline of modifications that are applied in order to produce the output. Sometimes you need some transforms to happen before others, and the ordering -within the list lets you specify which happen first. -Note that the optimization -`remove_nodes(op=Identity, op=CheckNumerics)` will break the model with control +within the list lets you specify which happen first. +Note that the optimization +`remove_nodes(op=Identity, op=CheckNumerics)` will break the model with control flow operations, such as `tf.cond`, `tf.map_fn`, and `tf.while`. ## Inspecting Graphs -- GitLab From b924e9f4c380dc85df433106df5f3c6a875318ac Mon Sep 17 00:00:00 2001 From: scott Date: Mon, 27 Nov 2017 23:14:19 +0800 Subject: [PATCH 0829/1801] add extra document to parameter:num_epochs --- .../contrib/slim/python/slim/data/dataset_data_provider.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py b/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py index 82c6b5a619..41426a6508 100644 --- a/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py +++ b/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py @@ -62,7 +62,9 @@ class DatasetDataProvider(data_provider.DataProvider): seed=None, scope=None): """Creates a DatasetDataProvider. - + Note: if `num_epochs` is not `None`, local counter `epochs` will be created + by relevant function. Use `local_variables_initializer()` to initialize + local variables. Args: dataset: An instance of the Dataset class. num_readers: The number of parallel readers to use. -- GitLab From a7c11adad8fa445be1083467ceb76b2d7c98b005 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 07:46:52 -0800 Subject: [PATCH 0830/1801] Update metric library to allow non-literal strings. PiperOrigin-RevId: 177015295 --- .../core/lib/monitoring/collection_registry.h | 4 +- tensorflow/core/lib/monitoring/metric_def.h | 55 +++++++------------ .../core/lib/monitoring/metric_def_test.cc | 18 ++++++ 3 files changed, 39 insertions(+), 38 deletions(-) diff --git a/tensorflow/core/lib/monitoring/collection_registry.h b/tensorflow/core/lib/monitoring/collection_registry.h index 030f8e360a..113d37e07d 100644 --- a/tensorflow/core/lib/monitoring/collection_registry.h +++ b/tensorflow/core/lib/monitoring/collection_registry.h @@ -321,13 +321,13 @@ void MetricCollector::CollectValue( const std::array& labels, const Value& value) { point_set_->points.emplace_back(new Point()); auto* const point = point_set_->points.back().get(); - const std::vector label_descriptions = + const std::vector label_descriptions = metric_def_->label_descriptions(); point->labels.reserve(NumLabels); for (int i = 0; i < NumLabels; ++i) { point->labels.push_back({}); auto* const label = &point->labels.back(); - label->name = label_descriptions[i].ToString(); + label->name = label_descriptions[i]; label->value = labels[i]; } internal::CollectValue(value, point); diff --git a/tensorflow/core/lib/monitoring/metric_def.h b/tensorflow/core/lib/monitoring/metric_def.h index 3459c2ab82..a7f14f9c94 100644 --- a/tensorflow/core/lib/monitoring/metric_def.h +++ b/tensorflow/core/lib/monitoring/metric_def.h @@ -43,24 +43,6 @@ enum class ValueType : int { kInt64 = 0, kHistogram, kString }; // on this. namespace internal { -// Ensures that the string is a compile-time string literal. -class StringLiteral { - public: - // We allow implicit conversions here on purpose. - template - StringLiteral(const char (&data)[N]) : literal_(data, N - 1) {} - - // This ctor will be called for non-literals, causing compile-time failure. - template - StringLiteral(const NotStringLiteral& not_string_literal) = delete; - - // Implicit conversion to StringPiece. - operator StringPiece() const { return literal_; } - - private: - const StringPiece literal_; -}; - template ValueType GetValueType(); @@ -98,7 +80,7 @@ class AbstractMetricDef { StringPiece description() const { return description_; } - const std::vector label_descriptions() const { + const std::vector& label_descriptions() const { return label_descriptions_; } @@ -106,23 +88,21 @@ class AbstractMetricDef { template friend class MetricDef; - AbstractMetricDef( - const MetricKind kind, const ValueType value_type, - const internal::StringLiteral name, - const internal::StringLiteral description, - const std::vector& label_descriptions) + AbstractMetricDef(const MetricKind kind, const ValueType value_type, + const StringPiece name, const StringPiece description, + const std::vector& label_descriptions) : kind_(kind), value_type_(value_type), - name_(name), - description_(description), - label_descriptions_(std::vector( - label_descriptions.begin(), label_descriptions.end())) {} + name_(name.ToString()), + description_(description.ToString()), + label_descriptions_(std::vector(label_descriptions.begin(), + label_descriptions.end())) {} const MetricKind kind_; const ValueType value_type_; - const StringPiece name_; - const StringPiece description_; - const std::vector label_descriptions_; + const string name_; + const string description_; + const std::vector label_descriptions_; }; // Metric definition. @@ -130,15 +110,18 @@ class AbstractMetricDef { // A metric is defined by its kind, value-type, name, description and the // description of its labels. // -// NOTE: We allow only string literals for the name, description and label -// descriptions because these should be fixed at compile-time and shouldn't be -// dynamic. +// NOTE: Name, description, and label descriptions should be logically static, +// but do not have to live for the lifetime of the MetricDef. +// +// By "logically static", we mean that they should never contain dynamic +// information, but is static for the lifetime of the MetricDef, and +// in-turn the metric; they do not need to be compile-time constants. +// This allows for e.g. prefixed metrics in a CLIF wrapped environment. template class MetricDef : public AbstractMetricDef { public: template - MetricDef(const internal::StringLiteral name, - const internal::StringLiteral description, + MetricDef(const StringPiece name, const StringPiece description, const LabelDesc&... label_descriptions) : AbstractMetricDef(metric_kind, internal::GetValueType(), name, description, {label_descriptions...}) { diff --git a/tensorflow/core/lib/monitoring/metric_def_test.cc b/tensorflow/core/lib/monitoring/metric_def_test.cc index dc07a08e4f..66973b6b5f 100644 --- a/tensorflow/core/lib/monitoring/metric_def_test.cc +++ b/tensorflow/core/lib/monitoring/metric_def_test.cc @@ -41,6 +41,24 @@ TEST(MetricDefTest, Simple) { EXPECT_EQ("LabelName", metric_def1.label_descriptions()[0]); } +TEST(MetricDefTest, StringsPersist) { + // Ensure string attributes of the metric are copied into the metric + string name = "/tensorflow/metric0"; + string description = "test description"; + string label_description = "test label description"; + const MetricDef metric_def( + name, description, label_description); + + // Mutate the strings + name[4] = 'A'; + description[4] = 'B'; + label_description[4] = 'C'; + + EXPECT_NE(name, metric_def.name()); + EXPECT_NE(description, metric_def.description()); + EXPECT_NE(label_description, metric_def.label_descriptions()[0]); +} + } // namespace } // namespace monitoring } // namespace tensorflow -- GitLab From 26f43e6a8e1c234060096f21f1fd57d3cf57cfbc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 08:18:45 -0800 Subject: [PATCH 0831/1801] Delete trailing whitespace in Python code PiperOrigin-RevId: 177018504 --- .../framework/python/ops/accumulate_n_v2.py | 6 +- .../python/ops/accumulate_n_v2_eager_test.py | 8 +- .../python/ops/accumulate_n_v2_test.py | 8 +- .../single_image_random_dot_stereograms.py | 8 +- tensorflow/contrib/rnn/python/ops/rnn_cell.py | 4 +- .../training/sgdr_learning_rate_decay.py | 6 +- .../python/client/session_partial_run_test.py | 4 +- .../inputs/queues/feeding_functions.py | 12 +-- .../python/framework/tensor_util_test.py | 78 +++++++++---------- .../python/kernel_tests/cast_op_test.py | 8 +- .../tools/optimize_for_inference_test.py | 2 +- 11 files changed, 72 insertions(+), 72 deletions(-) diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py b/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py index a0667bd489..2375ee4f55 100644 --- a/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py +++ b/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py @@ -48,7 +48,7 @@ def accumulate_n_v2(inputs, shape=None, tensor_dtype=None, name=None): tf.accumulate_n_v2([a, b, a]) # [[7, 4], [6, 14]] # Explicitly pass shape and type - tf.accumulate_n_v2([a, b, a], shape=[2, 2], tensor_dtype=tf.int32) + tf.accumulate_n_v2([a, b, a], shape=[2, 2], tensor_dtype=tf.int32) # [[7, 4], # [6, 14]] ``` @@ -93,7 +93,7 @@ def accumulate_n_v2(inputs, shape=None, tensor_dtype=None, name=None): elif len(inputs) == 1 and name is not None: return array_ops.identity(inputs[0], name=name) elif context.in_eager_mode(): - # TemporaryVariable not currently supported in eager mode; fall back + # TemporaryVariable not currently supported in eager mode; fall back # onto AddN for now. # TODO(frreiss) remove this once the lifetime of eager variables gets # addressed @@ -101,7 +101,7 @@ def accumulate_n_v2(inputs, shape=None, tensor_dtype=None, name=None): else: return gen_math_ops._accumulate_nv2(inputs, name=name, shape=shape) -# The following code should eventually be merged into +# The following code should eventually be merged into # tensorflow/python/ops/math_grad.py @ops.RegisterGradient("AccumulateNV2") def _AddNGrad(op, grad): diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py b/tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py index c2229bb8ad..8f44698da8 100644 --- a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py +++ b/tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for new version of accumulate_n op that will eventually go into +"""Tests for new version of accumulate_n op that will eventually go into `ops.math_ops`. -These test cases spefically exercise the `eager` APIs. They need to be in a +These test cases spefically exercise the `eager` APIs. They need to be in a separate file from the remaining tests because eager mode is currently something you can turn on but can't turn off for the lifetime of the current process.""" from __future__ import absolute_import @@ -64,7 +64,7 @@ class AccumulateNV2EagerTest(test_util.TensorFlowTestCase): np.random.seed(42) num_inputs = 3 input_vars = [ - resource_variable_ops.ResourceVariable(10.0 * np.random.random(), + resource_variable_ops.ResourceVariable(10.0 * np.random.random(), name="t%d" % i) for i in range(0, num_inputs) ] @@ -72,7 +72,7 @@ class AccumulateNV2EagerTest(test_util.TensorFlowTestCase): def fn(first, second, third): return av2.accumulate_n_v2([first, second, third]) - grad_fn = backprop.gradients_function(fn) + grad_fn = backprop.gradients_function(fn) grad = grad_fn(input_vars[0], input_vars[1], input_vars[2]) self.assertAllEqual(np.repeat(1.0, num_inputs), # d/dx (x + y + ...) = 1 [elem.numpy() for elem in grad]) diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py b/tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py index 3386e849d5..b5e9f8df79 100644 --- a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py +++ b/tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for new version of accumulate_n op that will eventually go into +"""Tests for new version of accumulate_n op that will eventually go into `ops.math_ops`.""" from __future__ import absolute_import from __future__ import division @@ -102,21 +102,21 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): with self.assertRaises(ValueError): a = variables.Variable(np.array([0.1,0.2])) b = variables.Variable(np.array([[0.3],[0.4]])) - tf_val = av2.accumulate_n_v2([a,b]) + tf_val = av2.accumulate_n_v2([a,b]) def testWrongType(self): with self.test_session(): with self.assertRaises(TypeError): a = variables.Variable(0.2, dtype=np.float32) b = variables.Variable(0.1, dtype=np.float32) - tf_val = av2.accumulate_n_v2([a,b], tensor_dtype=np.int32) + tf_val = av2.accumulate_n_v2([a,b], tensor_dtype=np.int32) def testWrongTypeOneInput(self): # Scenario that used to trigger a bug, even when testWrongType() worked with self.test_session(): with self.assertRaises(TypeError): a = variables.Variable(0.2, dtype=np.float32) - tf_val = av2.accumulate_n_v2([a], tensor_dtype=np.int32) + tf_val = av2.accumulate_n_v2([a], tensor_dtype=np.int32) if __name__ == "__main__": diff --git a/tensorflow/contrib/image/python/ops/single_image_random_dot_stereograms.py b/tensorflow/contrib/image/python/ops/single_image_random_dot_stereograms.py index 5cccf26028..bb766e59d2 100755 --- a/tensorflow/contrib/image/python/ops/single_image_random_dot_stereograms.py +++ b/tensorflow/contrib/image/python/ops/single_image_random_dot_stereograms.py @@ -68,7 +68,7 @@ def single_image_random_dot_stereograms( ``` Args: - depth_values: A `Tensor`. Must be one of the following types: + depth_values: A `Tensor`. Must be one of the following types: `float64`, `float32`, `int64`, `int32`. Z values of data to encode into 'output_data_window' window, lower further away {0.0 floor(far), 1.0 ceiling(near) after norm}, must be 2-D tensor @@ -84,17 +84,17 @@ def single_image_random_dot_stereograms( mu: An optional `float`. Defaults to `0.3333`. Depth of field, Fraction of viewing distance (eg. 1/3 = 0.3333) normalize: An optional `bool`. Defaults to `True`. - Normalize input data to [0.0, 1.0] + Normalize input data to [0.0, 1.0] normalize_max: An optional `float`. Defaults to `-100`. Fix MAX value for Normalization (0.0) - if < MIN, autoscale normalize_min: An optional `float`. Defaults to `100`. Fix MIN value for Normalization (0.0) - if > MAX, autoscale border_level: An optional `float`. Defaults to `0`. - Value of bord in depth 0.0 {far} to 1.0 {near} + Value of bord in depth 0.0 {far} to 1.0 {near} number_colors: An optional `int`. Defaults to `256`. 2 (Black & White), 256 (grayscale), and Numbers > 256 (Full Color) are supported - output_image_shape: An optional `tf.TensorShape` or list of `ints`. + output_image_shape: An optional `tf.TensorShape` or list of `ints`. Defaults to shape `[1024, 768, 1]`. Defines output shape of returned image in '[X,Y, Channels]' 1-grayscale, 3 color; channels will be updated to 3 if number_colors > 256 diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 0698d40438..289359e5ec 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -2120,7 +2120,7 @@ class Conv3DLSTMCell(ConvLSTMCell): def _conv(args, filter_size, num_features, bias, bias_start=0.0): """convolution: Args: - args: a Tensor or a list of Tensors of dimension 3D, 4D or 5D, + args: a Tensor or a list of Tensors of dimension 3D, 4D or 5D, batch x n, Tensors. filter_size: int tuple of filter height and width. num_features: int, number of features. @@ -2214,7 +2214,7 @@ class GLSTMCell(rnn_cell_impl.RNNCell): has the given variables, an error is raised. Raises: - ValueError: If `num_units` or `num_proj` is not divisible by + ValueError: If `num_units` or `num_proj` is not divisible by `number_of_groups`. """ super(GLSTMCell, self).__init__(_reuse=reuse) diff --git a/tensorflow/contrib/training/python/training/sgdr_learning_rate_decay.py b/tensorflow/contrib/training/python/training/sgdr_learning_rate_decay.py index 0ef5f111b2..ed0f398e30 100644 --- a/tensorflow/contrib/training/python/training/sgdr_learning_rate_decay.py +++ b/tensorflow/contrib/training/python/training/sgdr_learning_rate_decay.py @@ -28,7 +28,7 @@ from tensorflow.python.ops import math_ops, control_flow_ops def sgdr_decay(learning_rate, global_step, initial_period_steps, t_mul=2.0, m_mul=1.0, name=None): """Implements Stochastic Gradient Descent with Warm Restarts (SGDR). - + As described in "SGDR: Stochastic Gradient Descent with Warm Restarts" by Ilya Loshchilov & Frank Hutter, Proceedings of ICLR'2017, available at https://arxiv.org/pdf/1608.03983.pdf @@ -48,7 +48,7 @@ def sgdr_decay(learning_rate, global_step, initial_period_steps, where `t_0` = `initial_period_steps` is the user-defined number of batch iterations (not epochs as in the paper) to be performed before the first restart is launched. - + Then, we perform the first restart (i=1) by setting the learning rate to `learning_rate*(m_mul^i)`, where `m_mul in [0,1]` (set to 1 by default). The i-th restart runs for `t_i=t_0*(t_mul^i)` steps, i.e., every new @@ -73,7 +73,7 @@ def sgdr_decay(learning_rate, global_step, initial_period_steps, Training dataset size: 10000 If the user wants the first decay period to span across 5 epochs, then `initial_period_steps` = 5 * 10000/100 = 500 - + Train for 10000 batch iterations with the initial learning rate set to 0.1, then restart to run 2 times longer, i.e, for 20000 batch iterations and with the initial learning rate 0.05, then restart again and again, diff --git a/tensorflow/python/client/session_partial_run_test.py b/tensorflow/python/client/session_partial_run_test.py index 6ecf0fc6c7..6a389b078a 100644 --- a/tensorflow/python/client/session_partial_run_test.py +++ b/tensorflow/python/client/session_partial_run_test.py @@ -199,11 +199,11 @@ class PartialRunTestMethods(object): def testPartialRunSetupNoFeedsPassed(self): sess = session.Session() r1 = constant_op.constant([6.0]) - + h = sess.partial_run_setup([r1]) result1 = sess.partial_run(h, r1) self.assertEqual([6.0], result1) - + def testPartialRunDirect(self): self.RunTestPartialRun(session.Session()) diff --git a/tensorflow/python/estimator/inputs/queues/feeding_functions.py b/tensorflow/python/estimator/inputs/queues/feeding_functions.py index c0a287e922..75c0e61d47 100644 --- a/tensorflow/python/estimator/inputs/queues/feeding_functions.py +++ b/tensorflow/python/estimator/inputs/queues/feeding_functions.py @@ -47,13 +47,13 @@ except ImportError: def _fill_array(arr, seq, fillvalue=0): - """ - Recursively fills padded arr with elements from seq. + """ + Recursively fills padded arr with elements from seq. If length of seq is less than arr padded length, fillvalue used. Args: arr: Padded tensor of shape [batch_size, ..., max_padded_dim_len]. - seq: Non-padded list of data sampels of shape + seq: Non-padded list of data sampels of shape [batch_size, ..., padded_dim(None)] fillvalue: Default fillvalue to use. """ @@ -73,12 +73,12 @@ def _pad_if_needed(batch_key_item, fillvalue=0): """ Returns padded batch. Args: - batch_key_item: List of data samples of any type with shape + batch_key_item: List of data samples of any type with shape [batch_size, ..., padded_dim(None)]. fillvalue: Default fillvalue to use. Returns: - Padded with zeros tensor of same type and shape + Padded with zeros tensor of same type and shape [batch_size, ..., max_padded_dim_len]. Raises: @@ -375,7 +375,7 @@ def _enqueue_data(data, arrays, a numpy `ndarray`, or a generator producing these. NotImplementedError: padding and shuffling data at the same time. NotImplementedError: padding usage with non generator data type. - """ + """ with ops.name_scope(name): if isinstance(data, np.ndarray): types = [dtypes.int64, dtypes.as_dtype(data.dtype)] diff --git a/tensorflow/python/framework/tensor_util_test.py b/tensorflow/python/framework/tensor_util_test.py index b4f28cfce0..f2de69e159 100644 --- a/tensorflow/python/framework/tensor_util_test.py +++ b/tensorflow/python/framework/tensor_util_test.py @@ -51,9 +51,9 @@ class TensorUtilTest(test.TestCase): t = tensor_util.make_tensor_proto([10.0, 20.0, 30.0]) if sys.byteorder == "big": self.assertProtoEquals(""" - dtype: DT_FLOAT - tensor_shape { dim { size: 3 } } - tensor_content: "A \000\000A\240\000\000A\360\000\000" + dtype: DT_FLOAT + tensor_shape { dim { size: 3 } } + tensor_content: "A \000\000A\240\000\000A\360\000\000" """, t) else: self.assertProtoEquals(""" @@ -69,9 +69,9 @@ class TensorUtilTest(test.TestCase): t = tensor_util.make_tensor_proto([10.0, 20.0, 30.0], dtype=dtypes.float32) if sys.byteorder == "big": self.assertProtoEquals(""" - dtype: DT_FLOAT - tensor_shape { dim { size: 3 } } - tensor_content: "A \000\000A\240\000\000A\360\000\000" + dtype: DT_FLOAT + tensor_shape { dim { size: 3 } } + tensor_content: "A \000\000A\240\000\000A\360\000\000" """, t) else: self.assertProtoEquals(""" @@ -87,9 +87,9 @@ class TensorUtilTest(test.TestCase): t = tensor_util.make_tensor_proto([10, 20, 30], dtype=dtypes.float32) if sys.byteorder == "big": self.assertProtoEquals(""" - dtype: DT_FLOAT - tensor_shape { dim { size: 3 } } - tensor_content: "A \000\000A\240\000\000A\360\000\000" + dtype: DT_FLOAT + tensor_shape { dim { size: 3 } } + tensor_content: "A \000\000A\240\000\000A\360\000\000" """, t) else: self.assertProtoEquals(""" @@ -106,9 +106,9 @@ class TensorUtilTest(test.TestCase): t = tensor_util.make_tensor_proto(arr, dtype=dtypes.float32) if sys.byteorder == "big": self.assertProtoEquals(""" - dtype: DT_FLOAT - tensor_shape { dim { size: 3 } } - tensor_content: "A \000\000A\240\000\000A\360\000\000" + dtype: DT_FLOAT + tensor_shape { dim { size: 3 } } + tensor_content: "A \000\000A\240\000\000A\360\000\000" """, t) else: self.assertProtoEquals(""" @@ -124,9 +124,9 @@ class TensorUtilTest(test.TestCase): t = tensor_util.make_tensor_proto([10.0, 20.0, 30.0], shape=[1, 3]) if sys.byteorder == "big": self.assertProtoEquals(""" - dtype: DT_FLOAT - tensor_shape { dim { size: 1 } dim { size: 3 } } - tensor_content: "A \000\000A\240\000\000A\360\000\000" + dtype: DT_FLOAT + tensor_shape { dim { size: 1 } dim { size: 3 } } + tensor_content: "A \000\000A\240\000\000A\360\000\000" """, t) else: self.assertProtoEquals(""" @@ -142,9 +142,9 @@ class TensorUtilTest(test.TestCase): t = tensor_util.make_tensor_proto([10.0, 20.0, 30.0], shape=[3, 1]) if sys.byteorder == "big": self.assertProtoEquals(""" - dtype: DT_FLOAT - tensor_shape { dim { size: 3 } dim { size: 1 } } - tensor_content: "A \000\000A\240\000\000A\360\000\000" + dtype: DT_FLOAT + tensor_shape { dim { size: 3 } dim { size: 1 } } + tensor_content: "A \000\000A\240\000\000A\360\000\000" """, t) else: self.assertProtoEquals(""" @@ -170,9 +170,9 @@ class TensorUtilTest(test.TestCase): np.array([[10.0, 20.0, 30.0]], dtype=np.float64)) if sys.byteorder == "big": self.assertProtoEquals(""" - dtype: DT_DOUBLE - tensor_shape { dim { size: 1 } dim { size: 3 } } - tensor_content: "@$\000\000\000\000\000\000@4\000\000\000\000\000\000@>\000\000\000\000\000\000" + dtype: DT_DOUBLE + tensor_shape { dim { size: 1 } dim { size: 3 } } + tensor_content: "@$\000\000\000\000\000\000@4\000\000\000\000\000\000@>\000\000\000\000\000\000" """, t) else: self.assertProtoEquals(""" @@ -261,9 +261,9 @@ class TensorUtilTest(test.TestCase): t = tensor_util.make_tensor_proto([10, 20, 30, 40], shape=[2, 2]) if sys.byteorder == "big": self.assertProtoEquals(""" - dtype: DT_INT32 - tensor_shape { dim { size: 2 } dim { size: 2 } } - tensor_content: "\000\000\000\\n\000\000\000\024\000\000\000\036\000\000\000(" + dtype: DT_INT32 + tensor_shape { dim { size: 2 } dim { size: 2 } } + tensor_content: "\000\000\000\\n\000\000\000\024\000\000\000\036\000\000\000(" """, t) else: self.assertProtoEquals(""" @@ -342,9 +342,9 @@ class TensorUtilTest(test.TestCase): [10, 20, 30], shape=[1, 3], dtype=dtypes.int64) if sys.byteorder == "big": self.assertProtoEquals(""" - dtype: DT_INT64 - tensor_shape { dim { size: 1 } dim { size: 3 } } - tensor_content: "\000\000\000\000\000\000\000\\n\000\000\000\000\000\000\000\024\000\000\000\000\000\000\000\036" + dtype: DT_INT64 + tensor_shape { dim { size: 1 } dim { size: 3 } } + tensor_content: "\000\000\000\000\000\000\000\\n\000\000\000\000\000\000\000\024\000\000\000\000\000\000\000\036" """, t) else: self.assertProtoEquals(""" @@ -360,9 +360,9 @@ class TensorUtilTest(test.TestCase): t = tensor_util.make_tensor_proto(np.array([10, 20, 30])) if sys.byteorder == "big": self.assertProtoEquals(""" - dtype: DT_INT64 - tensor_shape { dim { size: 3 } } - tensor_content: "\000\000\000\000\000\000\000\\n\000\000\000\000\000\000\000\024\000\000\000\000\000\000\000\036" + dtype: DT_INT64 + tensor_shape { dim { size: 3 } } + tensor_content: "\000\000\000\000\000\000\000\\n\000\000\000\000\000\000\000\024\000\000\000\000\000\000\000\036" """, t) else: self.assertProtoEquals(""" @@ -381,9 +381,9 @@ class TensorUtilTest(test.TestCase): t = tensor_util.make_tensor_proto(data, dtype=dtypes.qint32) if sys.byteorder == "big": self.assertProtoEquals(""" - dtype: DT_QINT32 - tensor_shape { dim { size: 3 } } - tensor_content: "\000\000\000\025\000\000\000\026\000\000\000\027" + dtype: DT_QINT32 + tensor_shape { dim { size: 3 } } + tensor_content: "\000\000\000\025\000\000\000\026\000\000\000\027" """, t) else: self.assertProtoEquals(""" @@ -418,9 +418,9 @@ class TensorUtilTest(test.TestCase): t = tensor_util.make_tensor_proto(data, dtype=dtypes.quint16) if sys.byteorder == "big": self.assertProtoEquals(""" - dtype: DT_QUINT16 - tensor_shape { dim { size: 3 } } - tensor_content: "\000\025\000\026\000\027" + dtype: DT_QUINT16 + tensor_shape { dim { size: 3 } } + tensor_content: "\000\025\000\026\000\027" """, t) else: self.assertProtoEquals(""" @@ -435,9 +435,9 @@ class TensorUtilTest(test.TestCase): t = tensor_util.make_tensor_proto(data, dtype=dtypes.qint16) if sys.byteorder == "big": self.assertProtoEquals(""" - dtype: DT_QINT16 - tensor_shape { dim { size: 3 } } - tensor_content: "\000\025\000\026\000\027" + dtype: DT_QINT16 + tensor_shape { dim { size: 3 } } + tensor_content: "\000\025\000\026\000\027" """, t) else: self.assertProtoEquals(""" diff --git a/tensorflow/python/kernel_tests/cast_op_test.py b/tensorflow/python/kernel_tests/cast_op_test.py index c785f2358d..214d5cb3c0 100644 --- a/tensorflow/python/kernel_tests/cast_op_test.py +++ b/tensorflow/python/kernel_tests/cast_op_test.py @@ -144,9 +144,9 @@ class CastOpTest(test.TestCase): self._compare(np.inf, np.float32, np.inf, False) self._compare(np.inf, np.float64, np.inf, False) - if sys.byteorder == "big": - self._compare(np.inf, np.int32, i4.max, False) - self._compare(np.inf, np.int64, i8.max, False) + if sys.byteorder == "big": + self._compare(np.inf, np.int32, i4.max, False) + self._compare(np.inf, np.int64, i8.max, False) else: # np.float64("np.inf").astype(np.int32) is negative on x86 but positive on ppc64le # Numpy link to relevant discussion - https://github.com/numpy/numpy/issues/9040 @@ -156,7 +156,7 @@ class CastOpTest(test.TestCase): self._compare(-np.inf, np.int64, i8.min, False) else: self._compare(np.inf, np.int32, i4.min, False) - self._compare(np.inf, np.int64, i8.min, False) + self._compare(np.inf, np.int64, i8.min, False) self._compare(-np.inf, np.float32, -np.inf, False) self._compare(-np.inf, np.float64, -np.inf, False) self._compare(-np.inf, np.int32, i4.min, False) diff --git a/tensorflow/python/tools/optimize_for_inference_test.py b/tensorflow/python/tools/optimize_for_inference_test.py index 447057cfe9..6dd24c0dca 100644 --- a/tensorflow/python/tools/optimize_for_inference_test.py +++ b/tensorflow/python/tools/optimize_for_inference_test.py @@ -272,7 +272,7 @@ class OptimizeForInferenceTest(test.TestCase): for node in optimized_graph_def.node: self.assertNotEqual("Conv2D", node.op) self.assertNotEqual("MirrorPad", node.op) - + def testFusePadAndConv(self): with self.test_session() as sess: -- GitLab From 8a27a24e959d3711a3974cf2eb963cecffc9e17d Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 27 Nov 2017 09:20:27 -0800 Subject: [PATCH 0832/1801] update premade_estimators.md PiperOrigin-RevId: 177026849 --- .../get_started/premade_estimators.md | 425 ++++++++++++++++++ 1 file changed, 425 insertions(+) create mode 100644 tensorflow/docs_src/get_started/premade_estimators.md diff --git a/tensorflow/docs_src/get_started/premade_estimators.md b/tensorflow/docs_src/get_started/premade_estimators.md new file mode 100644 index 0000000000..ff839fd040 --- /dev/null +++ b/tensorflow/docs_src/get_started/premade_estimators.md @@ -0,0 +1,425 @@ + +# Getting Started with TensorFlow + +This document introduces the TensorFlow programming environment and shows you +how to write the Iris classification problem in TensorFlow. + +Prior to reading this document, do the following: + +* [Install TensorFlow](install/index.md). +* If you installed TensorFlow with virtualenv or Anaconda, activate your + TensorFlow environment. +* To keep the data import simple, our Iris example uses Pandas. You can + install Pandas with: + + `pip install pandas` + +## Getting the sample code + +Take the following steps to get the sample code for this program: + +1. Clone the TensorFlow Models repository from github by entering the following + command: + + `git clone https://github.com/tensorflow/models` + +1. Change directory within that branch to the location containing the examples + used in this document: + + `cd models/samples/core/get_started/` + +The program described in this document is called `premade_estimator.py`. + +### Running the program + +You run TensorFlow programs as you would run any Python program. For example: + +``` bsh +python premade_estimator.py +``` + +The program should output training logs and some predictions against a test +set. For example, the first line in the following output shows that the model +thinks there is a 99.6% chance that the first example in the test set is a +Sentosa. Since the test set `expected "Setosa"`, this appears to be a good +prediction. + +``` None +... +Prediction is "Sentosa" (99.6%), expected "Setosa" + +Prediction is "Versicolor" (99.8%), expected "Versicolor" + +Prediction is "Virginica" (97.9%), expected "Virginica" +``` + +If the program generates errors instead of answers, ask yourself the following +questions: + +* Did you install TensorFlow properly? +* Are you using the correct version of tensorflow? +* Did you activate the environment you installed TensorFlow in? (This is + only relevant in certain installation environments.) + +## The programming stack + +Before getting into the details of the program itself, let's investigate the +programming environment. As the following illustration shows, TensorFlow +provides a programming stack consisting of multiple API layers: + +
    + +
    +
    +The TensorFlow Programming Environment +
    + +We strongly recommend writing TensorFlow programs with the following APIs: + +* Estimators, which represent a complete model. The Estimator API provides + methods to train the model, to judge the model's accuracy, and to generate + predictions. +* Datasets, which build a data input pipeline. The Dataset API has methods to + load and manipulate data, and feed it into your model. The Datasets API meshes + well with the Estimators API. + +## Classifying irises: an overview + +The sample program in this document builds and tests a model that +classifies Iris flowers into three different species based on the size of their +[sepals](https://en.wikipedia.org/wiki/Sepal) and +[petals](https://en.wikipedia.org/wiki/Petal). + +
    +Petal geometry compared for three iris species: Iris setosa, Iris virginica, and Iris versicolor +
    +**From left to right, +[*Iris setosa*](https://commons.wikimedia.org/w/index.php?curid=170298) (by +[Radomil](https://commons.wikimedia.org/wiki/User:Radomil), CC BY-SA 3.0), +[*Iris versicolor*](https://commons.wikimedia.org/w/index.php?curid=248095) (by +[Dlanglois](https://commons.wikimedia.org/wiki/User:Dlanglois), CC BY-SA 3.0), +and [*Iris virginica*](https://www.flickr.com/photos/33397993@N05/3352169862) +(by [Frank Mayfield](https://www.flickr.com/photos/33397993@N05), CC BY-SA +2.0).** + +### The data set + +The Iris data set contains four features and one label. The four features +identify the following botanical characteristics of individual Iris flowers: + +* sepal length +* sepal width +* petal length +* petal width + +Our model will represent these features as float32 numerical data. + +The label identifies the Iris species, which must be one of the following: + +* Iris setosa (0) +* Iris versicolor (1) +* Iris virginica (2) + +Our model will represent the label as `int32` categorical data. + +The following table shows three examples in the data set: + +|sepal length | sepal width | petal length | petal width| species (label) | +|------------:|------------:|-------------:|-----------:|:---------------:| +| 5.1 | 3.3 | 1.7 | 0.5 | 0 (Sentosa) | +| 5.0 | 2.3 | 3.3 | 1.0 | 1 (versicolor)| +| 6.4 | 2.8 | 5.6 | 2.2 | 2 (virginica) | + +### The algorithm + +The program trains a Deep Neural Network classifier model having the following +topology: + +* 2 hidden layers. +* Each hidden layer contains 10 nodes. + +The following figure illustrates the features, hidden layers, and predictions +(not all of the nodes in the hidden layers are shown): + + +
    +A diagram of the network architecture: Inputs, 2 hidden layers, and outputs +
    +
    +The Model. +
    + +### Inference + +Running the trained model on an unlabeled example yields three predictions, +namely, the likelihood that this flower is the given Iris species. The sum of +those output predictions will be 1.0. For example, the prediction on an +unlabeled example might be something like the following: + +* 0.03 for Iris Setosa +* 0.95 for Iris Versicolor +* 0.02 for Iris Virginica + +The preceding prediction indicates a 95% probability that the given unlabeled +example is an Iris Versicolor. + +## Overview of programming with Estimators + +An Estimator is TensorFlow's high level representation of a complete model. It +handles the details of initialization, logging, saving and restoring, and many +other features so you can concentrate on your model. For more details see +@{$programmers_guide/estimators}. + +An "Estimator" is any class derived from @{tf.estimator.Estimator}. TensorFlow +provides a collection of +[pre-made Estimators](https://developers.google.com/machine-learning/glossary/#pre-made_Estimator) +(for example, `LinearRegressor`) to implement common ML algorithms. Beyond +those, you may write your own +[custom Estimators](https://developers.google.com/machine-learning/glossary/#custom_Estimator). +We recommend using pre-made Estimators when just getting started with +TensorFlow. After gaining expertise with the pre-made Estimators, we recommend +optimizing your model by creating your own custom Estimators. + +To write a TensorFlow program based on pre-made Estimators, you must perform the +following tasks: + +* Create one or more input functions. +* Define the model's feature columns. +* Instantiate an Estimator, specifying the feature columns and various + hyperparameters. +* Call one or more methods on the Estimator object, passing the appropriate + input function as the source of the data. + +Let's see how those tasks are implemented in Iris. + +## Create input functions + +You must create input functions to supply data for training, +evaluating, and prediction. + +An **input function** is a function that returns the following two-element +tuple: + +* "features" - A Python dictionary in which: + * Each key is the name of a feature. + * Each value is an array containing all of that feature's values. +* "label" - An array containing the values of the label for every example. + +Just to demonstrate the format of the input function here's a simple +implementation: + +```python +def input_evaluation_set(): + features = {'SepalLength': np.array([6.4, 5.0]), + 'SepalWidth': np.array([2.8, 2.3]), + 'PetalLength': np.array([5.6, 3.3]), + 'PetalWidth': np.array([2.2, 1.0])} + labels = np.array([2, 1]) + return features, labels +``` + +Your input function may generate the "features" dictionary and "label" list any +way you like. However, we recommend using TensorFlow's Dataset API, which can +deftly parse all sorts of data. At a high-level, the Datasets API consists of +the following classes: + +
    +A diagram showing subclasses of the Dataset class +
    + + +Where: + +* Dataset: Base class containing methods to create and transform datasets. Also + allows you to initialize a dataset from data in memory, or from a Python + generator. +* TextLineDataset: Reads lines from text files. +* TFRecordDataset: Reads records from TFRecord files. +* FixedLengthRecordDataset: Reads fixed size records from binary files. +* Iterator: Provides a way to access one data set element at a time. + +The Dataset API can handle a lot of common cases for you. For example, +using the Dataset API, you can easily read in records from a large collection +of files in parallel and join them into a single stream. + +To keep things simple in this example we are going to load the data with pandas, and build our input pipeline from this in-memory data. + +Here is the input function used for training in this program: + +``` python +def train_input_fn(features, labels, batch_size): + """An input function for training""" + # Convert the inputs to a Dataset. + dataset = tf.data.Dataset.from_tensor_slices((features, labels)) + + # Shuffle, repeat, and batch the examples. + dataset = dataset.shuffle(1000).repeat().batch(batch_size) + + # Build the Iterator, and return the read end of the pipeline. + return dataset.make_one_shot_iterator().get_next() +``` + +## Define the Feature Columns + +A [**Feature Column**](https://developers.google.com/machine-learning/glossary/#feature_columns) +is an object describing how the model should use raw input features from the +features dictionary. When you build an Estimator model, you pass it a list of +feature columns that describes each of the features you want the model to use. + +These objects are created by functions in the @{tf.feature_column} module. `tf.feature_column` methods provide many different ways to represent data. + +For Iris, the 4 raw features are numeric values, so we'll build a list of +feature columns, to tell the Estimator model to represent each of the four +features as 32-bit floating-point values. Therefore, the code to create the +Feature Column is simply: + +```python +# Feature columns describe how to use the input. +my_feature_columns = [] +for key in train_x.keys(): + my_feature_columns.append(tf.feature_column.numeric_column(key=key)) +``` + +Feature Columns can be far more sophisticated than those we're showing here. + + +Now that we have the description of how we want the model to represent the raw +features, we can build the estimator. + + +## Instantiate an Estimator + +The Iris problem is a classic classifier problem. Fortunately, TensorFlow +provides several pre-made classifier Estimators, including: + +* @{tf.estimator.DNNClassifier}—for deep models that perform multi-class + classification. +* @{tf.estimator.DNNLinearCombinedClassifier}—for wide-n-deep models. +* @{tf.estimator.LinearClassifier}—for linear models that feed results into + binary classifiers. + +For the Iris problem, `tf.estimator.DNNClassifier` seems like the best choice. +Here's how we instantiated this Estimator: + +```python +# Build 2 hidden layer DNN with 10, 10 units respectively. +classifier = tf.estimator.DNNClassifier( + feature_columns=my_feature_columns, + # Two hidden layers of 10 nodes each. + hidden_units=[10, 10], + # The model must choose between 3 classes. + n_classes=3) +``` + +## Train, Evaluate, and Predict + +Now that we have an Estimator object, we can call methods to do the following: + +* Train the model. +* Evaluate the trained model. +* Use the trained model to make predictions. + +### Train the model + +Train the model by calling the Estimator's `train` method as follows: + +```python +# Train the Model. +classifier.train( + input_fn=lambda:train_input_fn(train_x, train_y, args.batch_size), + steps=args.train_steps) +``` + +Here we wrap up our `input_fn` call in a [`lambda`](https://docs.python.org/3/tutorial/controlflow.html) +to allow the Estimator to call it, at the correct time, with no arguments. +The `steps` argument tells the method to stop training after a number of +training steps. + +### Evaluate the trained model + +Now that the model has been trained, we can get some statistics on its +performance. The following code block evaluates the accuracy of the trained +model on the test data: + +```python +# Evaluate the model. +eval_result = classifier.evaluate( + input_fn=lambda:eval_input_fn(test_x, test_y, args.batch_size)) + +print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result)) +``` + +Note how unlike our call to the `train` method, we did not pass the `steps` +argument to evaluate. Our `eval_input_fn` doesn't use the `repeat` method on +the dataset, so evaluation just runs to the end of the data. + +Running this code yields the following output (or something similar): + +```none +Test set accuracy: 0.967 +``` + +### Making predictions (inferring) from the trained model + +We now have a trained model that produces good evaluation results. +We can now use the trained model to predict the species of an Iris flower +based on some unlabeled measurments. As with training and evaluation, we make +predictions using a single function call: + +```python +# Generate predictions from the model +expected = ['Setosa', 'Versicolor', 'Virginica'] +predict_x = { + 'SepalLength': [5.1, 5.9, 6.9], + 'SepalWidth': [3.3, 3.0, 3.1], + 'PetalLength': [1.7, 4.2, 5.4], + 'PetalWidth': [0.5, 1.5, 2.1], +} + +predictions = classifier.predict( + input_fn=lambda:eval_input_fn(predict_x, batch_size=args.batch_size)) +``` + +The `predict` method returns a Python iterable, yielding a dictionary of +prediction results for each example. The following code prints a few +predictions and their probabilities: + + +``` python +for pred_dict, expec in zip(predictions, expected): + template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"') + + class_id = pred_dict['class_ids'][0] + probability = pred_dict['probabilities'][class_id] + print(template.format(SPECIES[class_id], 100 * probability, expec)) +``` + +Running the preceding code yields the following output: + +``` None +... +Prediction is "Sentosa" (99.6%), expected "Setosa" + +Prediction is "Versicolor" (99.8%), expected "Versicolor" + +Prediction is "Virginica" (97.9%), expected "Virginica" +``` + +## Next + +Now that you've gotten started writing TensorFlow programs. + +* For more on Datasets, see the + @{$programmers_guide/datasets$Programmer's guide} and + @{tf.data$reference documentation}. +* For more on Estimators, see the + @{$programmers_guide/estimators$Programmer's guide} and + @{tf.estimator$reference documentation}. + + -- GitLab From e424ba4a6d6e2c10f78f7f899de3c5d8dfb2e8c9 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 27 Nov 2017 09:47:00 -0800 Subject: [PATCH 0833/1801] Track symbolic shapes through shapeN operations PiperOrigin-RevId: 177029912 --- .../core/common_runtime/shape_refiner.cc | 2 ++ .../grappler/costs/graph_properties_test.cc | 26 +++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc index d66865e45b..c82d57694a 100644 --- a/tensorflow/core/common_runtime/shape_refiner.cc +++ b/tensorflow/core/common_runtime/shape_refiner.cc @@ -707,6 +707,8 @@ Status ShapeRefiner::ConstantPartialShape(InferenceContext* target_context, *result = target_context->Scalar(); } else if (src_op == "Shape") { *result = src_context->input(0); + } else if (src_op == "ShapeN") { + *result = src_context->input(input_edge->src_output()); } else if (src_op == "Pack") { std::vector dims; // Pack is concatenating its input scalars to form the shape tensor vector. diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc index 74d48158a9..c11af5777a 100644 --- a/tensorflow/core/grappler/costs/graph_properties_test.cc +++ b/tensorflow/core/grappler/costs/graph_properties_test.cc @@ -825,6 +825,32 @@ TEST_F(GraphPropertiesTest, DoNotValidateColocationConstraints) { TF_EXPECT_OK(properties.InferStatically()); } +TEST_F(GraphPropertiesTest, ShapeTracking) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output a = + ops::Placeholder(s.WithOpName("a"), DT_FLOAT, + ops::Placeholder::Shape(PartialTensorShape({-1, -1}))); + Output b = + ops::Placeholder(s.WithOpName("b"), DT_FLOAT, + ops::Placeholder::Shape(PartialTensorShape({-1}))); + Output zero = ops::Const(s.WithOpName("zero"), 0.0f, {}); + auto shp = ops::ShapeN(s.WithOpName("shapes"), {a, b}); + Output o1 = ops::Fill(s.WithOpName("o1"), shp[0], zero); + Output o2 = ops::Fill(s.WithOpName("o2"), shp[1], zero); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + GraphProperties properties(item); + TF_CHECK_OK(properties.InferStatically()); + const auto shape_a = properties.GetOutputProperties("a").at(0).shape(); + const auto shape_b = properties.GetOutputProperties("b").at(0).shape(); + const auto shape_o1 = properties.GetOutputProperties("o1").at(0).shape(); + const auto shape_o2 = properties.GetOutputProperties("o2").at(0).shape(); + EXPECT_EQ(shape_a.DebugString(), shape_o1.DebugString()); + EXPECT_EQ(shape_b.DebugString(), shape_o2.DebugString()); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 4fbf63a8ba991113a6e35cbb3e4d14f2343dfbe4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 10:05:11 -0800 Subject: [PATCH 0834/1801] Removed deprecated Hasher alias from StringPiece. This will allow StringPiece to be replaced by an alias to absl::string_view. PiperOrigin-RevId: 177033313 --- tensorflow/core/lib/core/stringpiece.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/core/lib/core/stringpiece.h b/tensorflow/core/lib/core/stringpiece.h index b2c6842151..89a1e26b81 100644 --- a/tensorflow/core/lib/core/stringpiece.h +++ b/tensorflow/core/lib/core/stringpiece.h @@ -105,8 +105,6 @@ class StringPiece { StringPiece substr(size_t pos, size_t n = npos) const; - using Hasher = ::tensorflow::StringPieceHasher; - // Return a string that contains the copy of the referenced data. std::string ToString() const { return std::string(data_, size_); } -- GitLab From b115a9fc735d08c55235f99a1cdb194e0f7c5d0c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 10:25:14 -0800 Subject: [PATCH 0835/1801] [XLA] Enhancement to source tensor indexing. Change ElementalIrEmitter::ElementwiseSourceIndex to use the target index as a source index for the case where the two tensors have the same shape but different element types. This improves the implementation of fusion kernels by avoiding the calculation of the dimensional indices from the linear index for the source tensors. PiperOrigin-RevId: 177036769 --- .../compiler/xla/service/elemental_ir_emitter.cc | 2 +- tensorflow/compiler/xla/shape_util.cc | 10 ++++++++++ tensorflow/compiler/xla/shape_util.h | 5 +++++ tensorflow/compiler/xla/shape_util_test.cc | 2 ++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc index 97ced5dfdc..b9407818cd 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc @@ -905,7 +905,7 @@ llvm_ir::IrArray::Index ElementalIrEmitter::ElementwiseSourceIndex( // If no implicit broadcast is needed for this operand, returns the target // index as the source index. - if (ShapeUtil::Compatible(operand_shape, hlo.shape())) { + if (ShapeUtil::CompatibleIgnoringElementType(operand_shape, hlo.shape())) { return target_index; } diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index c0a0e13f07..74fa0b2f2e 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -553,6 +553,16 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { return SameDimensions(lhs, rhs) && SameElementType(lhs, rhs); } +/* static */ bool ShapeUtil::CompatibleIgnoringElementType(const Shape& lhs, + const Shape& rhs) { + if (lhs.element_type() == TUPLE) { + return rhs.element_type() == TUPLE && + ContainersEqual(lhs.tuple_shapes(), rhs.tuple_shapes(), + CompatibleIgnoringElementType); + } + return SameDimensions(lhs, rhs); +} + /* static */ int64 ShapeUtil::GetDimension(const Shape& shape, int64 dimension_number) { return shape.dimensions(GetDimensionNumber(shape, dimension_number)); diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 82a513a65a..2ea1bd95cb 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -190,6 +190,11 @@ class ShapeUtil { // compatibility. static bool Compatible(const Shape& lhs, const Shape& rhs); + // Returns true if the rank and dimension sizes are identical. Element type + // and layout are ignored. Tuple elements are compared recursively for + // compatibility. + static bool CompatibleIgnoringElementType(const Shape& lhs, const Shape& rhs); + // Returns whether the lhs and rhs shapes are identical protobufs. static bool Equal(const Shape& lhs, const Shape& rhs); diff --git a/tensorflow/compiler/xla/shape_util_test.cc b/tensorflow/compiler/xla/shape_util_test.cc index 0ba542ad1b..4bce7ca51d 100644 --- a/tensorflow/compiler/xla/shape_util_test.cc +++ b/tensorflow/compiler/xla/shape_util_test.cc @@ -145,6 +145,7 @@ TEST(ShapeUtilTest, IncompatibleTuplesWithSwappedElements) { Shape tuple2 = ShapeUtil::MakeTupleShape( {ShapeUtil::MakeShape(F32, {3, 2}), ShapeUtil::MakeShape(PRED, {4, 5})}); EXPECT_FALSE(ShapeUtil::Compatible(tuple1, tuple2)); + EXPECT_FALSE(ShapeUtil::CompatibleIgnoringElementType(tuple1, tuple2)); } TEST(ShapeUtilTest, IncompatibleTuplesWithDifferentPrimitiveType) { @@ -153,6 +154,7 @@ TEST(ShapeUtilTest, IncompatibleTuplesWithDifferentPrimitiveType) { Shape tuple2 = ShapeUtil::MakeTupleShape( {ShapeUtil::MakeShape(PRED, {4, 5}), ShapeUtil::MakeShape(S32, {3, 2})}); EXPECT_FALSE(ShapeUtil::Compatible(tuple1, tuple2)); + EXPECT_TRUE(ShapeUtil::CompatibleIgnoringElementType(tuple1, tuple2)); } TEST(ShapeUtilTest, IncompatibleTuplesWithDifferentDimensions) { -- GitLab From bf001790e84e6dc433ac39e7eaba7896b70fa9ef Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Mon, 27 Nov 2017 10:38:30 -0800 Subject: [PATCH 0836/1801] [TPU] Change in preparation for supporting model-parallel TPU computations. PiperOrigin-RevId: 177038993 --- tensorflow/BUILD | 1 + tensorflow/compiler/xla/array3d.h | 2 + .../xla/service/computation_placer.cc | 6 + tensorflow/contrib/cmake/tf_python.cmake | 1 + tensorflow/contrib/tpu/BUILD | 3 + tensorflow/contrib/tpu/__init__.py | 6 + tensorflow/contrib/tpu/ops/replication_ops.cc | 17 +- .../contrib/tpu/ops/tpu_configuration_ops.cc | 27 +- tensorflow/contrib/tpu/proto/BUILD | 25 ++ tensorflow/contrib/tpu/proto/topology.proto | 27 ++ .../tpu/python/tpu/device_assignment.py | 299 ++++++++++++++++++ tensorflow/contrib/tpu/python/tpu/topology.py | 137 ++++++++ tensorflow/contrib/tpu/python/tpu/tpu.py | 177 ++++++----- tensorflow/contrib/tpu/python/tpu/tpu_feed.py | 26 +- 14 files changed, 636 insertions(+), 118 deletions(-) create mode 100644 tensorflow/contrib/tpu/proto/BUILD create mode 100644 tensorflow/contrib/tpu/proto/topology.proto create mode 100644 tensorflow/contrib/tpu/python/tpu/device_assignment.py create mode 100644 tensorflow/contrib/tpu/python/tpu/topology.py diff --git a/tensorflow/BUILD b/tensorflow/BUILD index c8f0b6b061..e6dc15a701 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -554,6 +554,7 @@ filegroup( "//tensorflow/contrib/timeseries/python/timeseries/state_space_models:all_files", "//tensorflow/contrib/tpu:all_files", "//tensorflow/contrib/tpu/profiler:all_files", + "//tensorflow/contrib/tpu/proto:all_files", "//tensorflow/contrib/training:all_files", "//tensorflow/contrib/util:all_files", "//tensorflow/contrib/verbs:all_files", diff --git a/tensorflow/compiler/xla/array3d.h b/tensorflow/compiler/xla/array3d.h index e9449f01ad..a1c5840a5f 100644 --- a/tensorflow/compiler/xla/array3d.h +++ b/tensorflow/compiler/xla/array3d.h @@ -36,6 +36,8 @@ namespace xla { template class Array3D : public Array { public: + Array3D() : Array(std::vector{0, 0, 0}) {} + // Creates an array of dimensions n1 x n2 x n3, uninitialized values. Array3D(const int64 n1, const int64 n2, const int64 n3) : Array(std::vector{n1, n2, n3}) {} diff --git a/tensorflow/compiler/xla/service/computation_placer.cc b/tensorflow/compiler/xla/service/computation_placer.cc index 6b7b0d25e8..657fba6b62 100644 --- a/tensorflow/compiler/xla/service/computation_placer.cc +++ b/tensorflow/compiler/xla/service/computation_placer.cc @@ -52,6 +52,12 @@ Status DeviceAssignment::Serialize(DeviceAssignmentProto* proto) const { /* static */ StatusOr> DeviceAssignment::Deserialize(const DeviceAssignmentProto& proto) { TF_RET_CHECK(proto.computation_devices_size() == proto.computation_count()); + if (proto.replica_count() <= 0 || proto.computation_count() <= 0) { + return InvalidArgument( + "Invalid device assignment topology: replica_count=%d, " + "computation_count=%d", + proto.replica_count(), proto.computation_count()); + } auto assignment = MakeUnique(proto.replica_count(), proto.computation_count()); for (int computation = 0; computation < proto.computation_count(); diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 61b3fd715d..0128946e45 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -129,6 +129,7 @@ file(GLOB_RECURSE tf_protos_python_srcs RELATIVE ${tensorflow_source_dir} "${tensorflow_source_dir}/tensorflow/contrib/session_bundle/*.proto" "${tensorflow_source_dir}/tensorflow/contrib/tensor_forest/proto/*.proto" "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/*.proto" + "${tensorflow_source_dir}/tensorflow/contrib/tpu/proto/*.proto" "${tensorflow_source_dir}/tensorflow/contrib/tpu/profiler/*.proto" "${tensorflow_source_dir}/tensorflow/contrib/training/*.proto" ) diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index 64e9d0e765..f542d94139 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -155,6 +155,8 @@ py_library( name = "tpu_lib", srcs = [ "python/tpu/__init__.py", + "python/tpu/device_assignment.py", + "python/tpu/topology.py", "python/tpu/tpu.py", "python/tpu/tpu_feed.py", "python/tpu/tpu_function.py", @@ -166,6 +168,7 @@ py_library( deps = [ ":profiler", ":tpu_py", + "//tensorflow/contrib/tpu/proto:topology_proto_py", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", diff --git a/tensorflow/contrib/tpu/__init__.py b/tensorflow/contrib/tpu/__init__.py index ec4c4e1be6..ea6e874f2d 100644 --- a/tensorflow/contrib/tpu/__init__.py +++ b/tensorflow/contrib/tpu/__init__.py @@ -23,6 +23,7 @@ @@initialize_system @@shutdown_system +@@device_assignment @@core @@replicate @@shard @@ -33,6 +34,9 @@ @@InfeedQueue +@@DeviceAssignment +@@Topology + @@while_loop @@repeat @@ -49,6 +53,8 @@ from __future__ import print_function # pylint: disable=wildcard-import,unused-import from tensorflow.contrib.tpu.python import profiler from tensorflow.contrib.tpu.python.ops.tpu_ops import * +from tensorflow.contrib.tpu.python.tpu.device_assignment import * +from tensorflow.contrib.tpu.python.tpu.topology import * from tensorflow.contrib.tpu.python.tpu.tpu import * from tensorflow.contrib.tpu.python.tpu.tpu_config import * from tensorflow.contrib.tpu.python.tpu.tpu_estimator import * diff --git a/tensorflow/contrib/tpu/ops/replication_ops.cc b/tensorflow/contrib/tpu/ops/replication_ops.cc index b40dac4717..36e865bf3c 100644 --- a/tensorflow/contrib/tpu/ops/replication_ops.cc +++ b/tensorflow/contrib/tpu/ops/replication_ops.cc @@ -24,7 +24,9 @@ using shape_inference::ShapeHandle; REGISTER_OP("TPUReplicateMetadata") .Attr("num_replicas: int >= 0") - .Attr("global_tpu_id: list(int) = []") + .Attr("topology: string = \"\"") + .Attr("device_assignment: list(int) = []") + .Attr("computation_shape: list(int) = []") .SetShapeFn(shape_inference::UnknownShape); REGISTER_OP("TPUReplicatedInput") @@ -64,7 +66,9 @@ REGISTER_OP("TPUReplicatedOutput") REGISTER_OP("TPUReplicate") .Attr("computation: func") .Attr("num_replicas: int >= 1") - .Attr("global_tpu_id: list(int) = []") + .Attr("topology: string = \"\"") + .Attr("device_assignment: list(int) = []") + .Attr("computation_shape: list(int) = []") .Attr("Tinputs: list(type) >= 0") .Attr("Tbroadcast_inputs: list(type) >= 0") .Attr("NumVariables: int >= 0") @@ -79,7 +83,14 @@ Runs replicated computations on a distributed TPU system. computation: a function containing the computation to run. num_replicas: the number of replicas of the computation to run. -global_tpu_id: map from device to global tpu id. +topology: A serialized tensorflow.tpu.TopologyProto that describes the TPU +topology. +computation_shape: a [mesh_dimension] array describing the shape of each + computation replica in numbers of cores in the TPU mesh. +device_assignment: a flattened array with shape + [replica] + computation_shape + [mesh_dimension] that maps the coordinates of + logical cores in each replica of a computation to physical coordinates in + the TPU topology. Tinputs: the types of the arguments to 'computation'. inputs: the inputs to 'computation', flattened, in replica-major order. Tbroadcast_inputs: the types of the additional arguments to broadcast to all diff --git a/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc b/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc index 8a87a91056..8c4fe5538d 100644 --- a/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc +++ b/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc @@ -107,7 +107,7 @@ in a host. REGISTER_OP("_WaitForDistributedTPU") .Input("inputs: N * int32") - .Output("global_tpu_array: int32") + .Output("topology: string") .Attr("host_specs: list(string)") .Attr("startup_timeout_sec: int = 20") .Attr("N: int") @@ -118,7 +118,7 @@ REGISTER_OP("_WaitForDistributedTPU") for (int i = 0; i < c->num_inputs(); ++i) { TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 1, &input)); } - c->set_output(0, c->UnknownShapeOfRank(2)); + c->set_output(0, c->Scalar()); return ::tensorflow::Status::OK(); }) .Doc(R"doc( @@ -129,30 +129,26 @@ _InitializeHostForDistributedTPU Ops. inputs: For each initialized host, a vector giving the global TPU id of each TPU on the host. -global_tpu_array: A two-dimensional array. For each host (the outer -dimension) the array lists the global ids of the TPUs on that host. -host_specs: For each initialized host, the partial device specification -indicating job, replica, and task. Combining this spec with -'/device:TPU:k' gives the full device name of the k'th TPU on the -host. +topology: A serialized tensorflow.tpu.TopologyProto that describes the TPU +topology. startup_timeout_sec: The number of seconds to wait for the TPU system to stabilize. )doc"); REGISTER_OP("_SetGlobalTPUArray") - .Input("global_tpu_array: int32") + .Input("topology: string") .SetIsStateful() .SetShapeFn([](InferenceContext* c) { ShapeHandle input; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &input)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &input)); return ::tensorflow::Status::OK(); }) .Doc(R"doc( An op that informs a host of the global ids of all the of TPUs in the system. -global_tpu_array: A two-dimensional array. For each host (the outer -dimension) the array lists the global ids of the TPUs on that host. +topology: A serialized tensorflow.tpu.TopologyProto that describes the TPU +topology. )doc"); REGISTER_OP("_ShutdownDistributedTPU") @@ -198,7 +194,7 @@ chips on the host. )doc"); REGISTER_OP("ConfigureDistributedTPU") - .Output("global_tpu_array: int32") + .Output("topology: string") .Attr("embedding_config: string = ''") .SetIsStateful() .SetShapeFn(shape_inference::UnknownShape) @@ -206,9 +202,8 @@ REGISTER_OP("ConfigureDistributedTPU") An op that sets up the centralized structures for a distributed TPU system. -global_tpu_array: A two-dimensional array. For each host (the outer -dimension) the array lists the global ids of the TPUs on that host. -embedding_config: Internal use. +topology: A serialized tensorflow.tpu.TopologyProto that describes the TPU +topology. )doc"); REGISTER_OP("ShutdownDistributedTPU") diff --git a/tensorflow/contrib/tpu/proto/BUILD b/tensorflow/contrib/tpu/proto/BUILD new file mode 100644 index 0000000000..79a79efb6b --- /dev/null +++ b/tensorflow/contrib/tpu/proto/BUILD @@ -0,0 +1,25 @@ +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) + +tf_proto_library( + name = "topology_proto", + srcs = [ + "topology.proto", + ], + cc_api_version = 2, + visibility = ["//visibility:public"], +) diff --git a/tensorflow/contrib/tpu/proto/topology.proto b/tensorflow/contrib/tpu/proto/topology.proto new file mode 100644 index 0000000000..17064ee5a2 --- /dev/null +++ b/tensorflow/contrib/tpu/proto/topology.proto @@ -0,0 +1,27 @@ +syntax = "proto3"; + +option cc_enable_arenas = true; + +package tensorflow.tpu; + +// Describes the geometry of a TPU mesh. +message TopologyProto { + // The dimensions of the TPU topology, in cores. Typically, this is a 3D + // topology [x, y, core], where the major dimensions correspond to TPU chips, + // and the minor dimension describes the number of cores on a multicore chip. + repeated int32 mesh_shape = 1; + + // Number of TensorFlow tasks in the cluster. + int32 num_tasks = 2; + + // Number of TPU devices per task. + int32 num_tpu_devices_per_task = 3; + + // A flattened rank 3 int32 array with shape + // [num_tasks, num_tpu_devices_per_task, len(mesh_shape)]. + // `tasks` is the number of tasks in the TPU cluster, `devices` is the number + // of TPU devices per task, and the minor dimension corresponds to a position + // in the TPU mesh topology. Each entry [task, device, axis] gives the + // `axis`-th coordinate in the topology of a task/device pair. + repeated int32 device_coordinates = 4; +} diff --git a/tensorflow/contrib/tpu/python/tpu/device_assignment.py b/tensorflow/contrib/tpu/python/tpu/device_assignment.py new file mode 100644 index 0000000000..ee202610a8 --- /dev/null +++ b/tensorflow/contrib/tpu/python/tpu/device_assignment.py @@ -0,0 +1,299 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ====================================== +"""Library of TPU helper functions.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin + +from tensorflow.contrib.tpu.python.tpu.topology import Topology + + +def _tpu_device_name(job, task, device): + """Returns the device name for the TPU `device` on `task` of `job`.""" + if job is None: + return "/task:%d/device:TPU:%d" % (task, device) + else: + return "/job:%s/task:%d/device:TPU:%d" % (job, task, device) + + +def _tpu_host_device_name(job, task): + """Returns the device name for the CPU device on `task` of `job`.""" + if job is None: + return "/task:%d/device:CPU:0" % task + else: + return "/job:%s/task:%d/device:CPU:0" % (job, task) + + +class DeviceAssignment(object): + """Mapping from logical cores in a computation to the physical TPU topology. + + Prefer to use the `device_assignment()` helper to construct a + `DeviceAssignment`; it is easier if less flexible than constructing a + `DeviceAssignment` directly. + """ + + def __init__(self, topology, core_assignment): + """Constructs a `DeviceAssignment` object. + + Args: + topology: A `Topology` object that describes the physical TPU topology. + core_assignment: A logical to physical core mapping, represented as a + rank 3 numpy array. See the description of the `core_assignment` + property for more details. + + Raises: + ValueError: If `topology` is not `Topology` object. + ValueError: If `core_assignment` is not a rank 3 numpy array. + """ + if not isinstance(topology, Topology): + raise ValueError("topology must be a Topology object, got {}".format( + type(topology))) + core_assignment = np.asarray(core_assignment, dtype=np.int32) + + self._topology = topology + self._topology_tasks, self._topology_devices = ( + self._invert_topology(topology)) + + topology_rank = self._topology_tasks.ndim + if core_assignment.ndim != topology_rank + 2: + raise ValueError("core_assignment must be a rank {} numpy array".format( + topology_rank + 2)) + + self._num_replicas = core_assignment.shape[0] + self._computation_shape = np.array( + core_assignment.shape[1:-1], dtype=np.int32) + + if core_assignment.shape[-1] != topology_rank: + raise ValueError( + "minor dimension of core_assignment must have size equal to topology " + "rank ({}), got shape {}".format(topology_rank, + core_assignment.shape)) + + self._core_assignment = core_assignment + + def _invert_topology(self, topology): + """Inverts a [task,device,axis] topology to [x,y,z] -> task/device maps.""" + mesh_shape = topology.mesh_shape + tasks = np.full(list(mesh_shape), -1, dtype=np.int32) + devices = np.full(list(mesh_shape), -1, dtype=np.int32) + for task in xrange(topology.device_coordinates.shape[0]): + for device in xrange(topology.device_coordinates.shape[1]): + x, y, z = topology.device_coordinates[task, device, :] + tasks[x, y, z] = task + devices[x, y, z] = device + return tasks, devices + + @property + def topology(self): + """A `Topology` that describes the TPU topology.""" + return self._topology + + @property + def computation_shape(self): + """The computation shape. + + Returns: + A rank-1 int32 numpy array with size equal to the TPU topology rank. + Describes the logical shape in numbers of core of each replica of the + computation in the TPU topology. + + Returns: + The computation shape. + """ + return self._computation_shape + + @property + def num_replicas(self): + """The number of replicas of the computation.""" + return self._num_replicas + + @property + def core_assignment(self): + """The logical to physical core mapping. + + Returns: + A numpy array of rank `topology_rank + 2`, with shape + `[num_replicas] + computation_shape + [topology_rank]`. Maps + (replica, logical core coordinates) pairs to physical topology + coordinates. + """ + return self._core_assignment + + def _coordinates(self, replica, logical_core): + """Returns the physical topology coordinates of a logical core.""" + if logical_core is None: + logical_core = np.array([0, 0, 0], np.int32) + + if any(logical_core < 0) or any(logical_core >= self.computation_shape): + raise ValueError("Invalid core {}; computation shape is {}".format( + logical_core, self.computation_shape)) + + logical_offset = tuple([replica] + logical_core.tolist() + [slice(3)]) + return tuple(self.core_assignment[logical_offset]) + + def tpu_ordinal(self, replica=0, logical_core=None): + """Returns the ordinal of the TPU device assigned to a logical core.""" + coordinates = self._coordinates(replica, logical_core) + return self._topology_devices[coordinates] + + def host_device(self, replica=0, logical_core=None, job=None): + """Returns the CPU device attached to a logical core.""" + coordinates = self._coordinates(replica, logical_core) + return _tpu_host_device_name(job, self._topology_tasks[coordinates]) + + def tpu_device(self, replica=0, logical_core=None, job=None): + """Returns the name of the TPU device assigned to a logical core.""" + coordinates = self._coordinates(replica, logical_core) + return _tpu_device_name(job, self._topology_tasks[coordinates], + self._topology_devices[coordinates]) + + +def device_assignment(topology, + computation_shape=None, + computation_stride=None, + num_replicas=1): + """Computes a device_assignment of a computation across a TPU topology. + + Returns a `DeviceAssignment` that describes the cores in the topology assigned + to each core of each replica. + + `computation_shape` and `computation_stride` values should be powers of 2 for + optimal packing. + + Args: + topology: A `Topology` object that describes the TPU cluster topology. + To obtain a TPU topology, evaluate the `Tensor` returned by + `initialize_system` using `Session.run`. Either a serialized + `TopologyProto` or a `Topology` object may be passed. Note: you must + evaluate the `Tensor` first; you cannot pass an unevaluated `Tensor` here. + computation_shape: A rank 1 int32 numpy array of size 3, describing the + shape of the computation's block of cores. If None, the + `computation_shape` is `[1, 1, 1]`. + computation_stride: A rank 1 int32 numpy array of size 3, describing the + inter-core spacing of the `computation_shape` cores in the TPU topology. + If None, the `computation_stride` is `[1, 1, 1]`. + num_replicas: The number of computation replicas to run. The replicas will + be packed into the free spaces of the topology. + + Returns: + A DeviceAssignment object, which describes the mapping between the logical + cores in each computation replica and the physical cores in the TPU + topology. + + Raises: + ValueError: If `topology` is not a valid `Topology` object. + ValueError: If `computation_shape` or `computation_stride` are not 1D int32 + numpy arrays with shape [3] where all values are positive. + ValueError: If computation's replicas cannot fit into the TPU topology. + """ + # Deserialize the Topology proto, if it is a string. + if isinstance(topology, bytes): + topology = Topology(serialized=topology) + + if not isinstance(topology, Topology): + raise ValueError("`topology` is not a Topology object; got {}".format( + type(topology))) + + topology_rank = len(topology.mesh_shape) + mesh_shape = topology.mesh_shape + if computation_shape is None: + computation_shape = np.array([1, 1, 1], dtype=np.int32) + else: + computation_shape = np.asarray(computation_shape, dtype=np.int32) + + if computation_stride is None: + computation_stride = np.array([1, 1, 1], dtype=np.int32) + else: + computation_stride = np.asarray(computation_stride, dtype=np.int32) + + if computation_shape.shape != (3,): + raise ValueError("computation_shape must have shape [3]; got {}".format( + computation_shape.shape)) + if computation_stride.shape != (3,): + raise ValueError("computation_stride must have shape [3]; got {}".format( + computation_stride.shape)) + + if any(computation_shape < 1): + raise ValueError( + "computation_shape must be positive; got computation_shape={}".format( + computation_shape)) + if any(computation_stride < 1): + raise ValueError( + "computation_stride must be positive; got computation_stride={}".format( + computation_stride)) + + # Computes the physical size of one computation instance. + computation_footprint = computation_shape * computation_stride + if any(computation_footprint > mesh_shape): + raise ValueError( + "computation footprint {} does not fit in TPU topology shape {}".format( + computation_footprint, mesh_shape)) + + # Computes how many copies of the computation footprint fit in the mesh. + block_counts = mesh_shape // computation_footprint + + replica_counts = block_counts * computation_stride + max_replicas = np.prod(replica_counts) + if num_replicas > max_replicas: + raise ValueError( + "requested {} replicas but only {} replicas with shape {} and " + "computation_stride {} fit in a TPU mesh of shape {}".format( + num_replicas, max_replicas, computation_shape, computation_stride, + mesh_shape)) + + # Choose a compact layout for the cores. Choose the smaller dimension in the + # topology to be close to the square root of the number of replicas. + num_chips = int(math.ceil(num_replicas / replica_counts[2])) + target_size = int(math.ceil(math.sqrt(num_chips))) + + # Prefer an even size, if possible. Odd numbered rows head back towards the + # first column, so it's best if the last row has an odd index. + if target_size % 2 != 0: + target_size -= 1 + y_size = min(replica_counts[1], target_size) + if y_size * replica_counts[0] < num_chips: + y_size = replica_counts[1] + + # Assigns an offset to each replica such that no two replicas overlap. + replica_offsets = np.full([num_replicas, 3], -1, dtype=np.int32) + for replica in xrange(num_replicas): + # Chooses a replica number in X/Y/Z axes. + z = replica % replica_counts[2] + t = replica // replica_counts[2] + y = t % y_size + x = t // y_size + replica_pos = np.array([x, y, z], dtype=np.int32) + + # Determines where that replica starts in each axis. + outer = replica_pos // computation_stride + inner = replica_pos % computation_stride + replica_offsets[replica, :] = outer * computation_footprint + inner + + # Computes a complete logical core -> physical core mapping for each replica. + indices = [ + np.arange(0, computation_shape[i] * computation_stride[i], + computation_stride[i]) for i in xrange(topology_rank) + ] + indices = np.concatenate( + [i[..., np.newaxis] for i in np.meshgrid(*indices, indexing="ij")], + axis=-1) + assignment = ( + indices + replica_offsets[:, np.newaxis, np.newaxis, np.newaxis, :]) + return DeviceAssignment(topology, core_assignment=assignment) diff --git a/tensorflow/contrib/tpu/python/tpu/topology.py b/tensorflow/contrib/tpu/python/tpu/topology.py new file mode 100644 index 0000000000..cda9a63f20 --- /dev/null +++ b/tensorflow/contrib/tpu/python/tpu/topology.py @@ -0,0 +1,137 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ====================================== +"""Defines the `Topology` class, that describes a TPU fabric topology.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.tpu.proto import topology_pb2 + + +class Topology(object): + """Describes a set of TPU devices. + + Represents both the shape of the physical mesh, and the mapping between + TensorFlow TPU devices to physical mesh coordinates. + """ + + def __init__(self, serialized=None, mesh_shape=None, device_coordinates=None): + """Builds a Topology object. + + If `serialized` is not `None`, the topology is parsed from `serialized` and + the other arguments are ignored. Otherwise, the topology is computed from + `mesh_shape` and `device_coordinates`. + + Args: + serialized: A serialized `TopologyProto`, or `None`. If not `None`, the + serialized proto is parsed to discover the topology. + mesh_shape: A sequence of 3 positive integers, or `None`. If not `None`, + the shape of the TPU topology, in number of cores. Ignored if + `serialized` is not `None`. + device_coordinates: A rank 3 numpy array that describes the mapping from + TensorFlow TPU devices to TPU fabric coordinates, or `None`. Ignored + if `serialized is not `None`. + + Raises: + ValueError: If `serialized` does not describe a well-formed topology. + ValueError: If `serialized` is `None` and `mesh_shape` is not a sequence + of 3 positive integers. + ValueError: If `serialized` is `None` and `device_coordinates` is not a + rank 3 numpy int32 array that describes a valid coordinate mapping. + """ + + if serialized: + self._serialized = serialized + self._parse_topology(serialized) + else: + self._mesh_shape = np.asarray(mesh_shape, dtype=np.int32) + self._device_coordinates = np.asarray(device_coordinates, np.int32) + if len(self._mesh_shape) != 3 or any(self._mesh_shape < 1): + raise ValueError("`mesh_shape` must be a sequence of 3 positive " + "entries; got {}".format(self._mesh_shape)) + + if (len(self._device_coordinates.shape) != 3 or + self._device_coordinates.shape[2] != len(self._mesh_shape)): + raise ValueError("`device_coordinates` must be a rank 3 int32 array " + "with minor dimension equal to the mesh shape rank") + + def _parse_topology(self, serialized): + """Parses a serialized `TopologyProto` into `self`.""" + proto = topology_pb2.TopologyProto() + proto.ParseFromString(serialized) + + self._mesh_shape = np.array(proto.mesh_shape, dtype=np.int32) + if len(self._mesh_shape) != 3 or any(self._mesh_shape < 1): + raise ValueError("`mesh_shape` must be a vector of size 3 with positive " + "entries; got {}".format(self._mesh_shape)) + + if proto.num_tasks < 0: + raise ValueError("`num_tasks` must be >= 0; got {}".format( + proto.num_tasks)) + if proto.num_tpu_devices_per_task < 0: + raise ValueError("`num_tpu_devices_per_task` must be >= 0; got {}".format( + proto.num_tpu_devices_per_task)) + + expected_coordinates_size = ( + proto.num_tasks * proto.num_tpu_devices_per_task * len( + proto.mesh_shape)) + if len(proto.device_coordinates) != expected_coordinates_size: + raise ValueError("`device_coordinates` must have shape num_tasks ({}) * " + "num_tpu_devices_per_task ({}) * len(mesh_shape) ({}); " + "got shape {}".format(proto.num_tasks, + proto.num_tpu_devices_per_task, + proto.mesh_shape, + len(proto.device_coordinates))) + + coords = np.array(proto.device_coordinates, dtype=np.int32) + if any(coords < 0): + raise ValueError("`device_coordinates` must be >= 0") + coords = coords.reshape((proto.num_tasks, proto.num_tpu_devices_per_task, + len(proto.mesh_shape))) + self._device_coordinates = coords + + @property + def mesh_shape(self): + """A rank 1 int32 array describing the shape of the TPU topology.""" + return self._mesh_shape + + @property + def device_coordinates(self): + """Describes the mapping from TPU devices to topology coordinates. + + Returns: + A rank 3 int32 array with shape `[tasks, devices, axis]`. + `tasks` is the number of tasks in the TPU cluster, `devices` is the number + of TPU devices per task, and `axis` is the number of axes in the TPU + cluster topology. Each entry gives the `axis`-th coordinate in the + topology of a task/device pair. TPU topologies are 3-dimensional, with + dimensions `(x, y, core number)`. + """ + return self._device_coordinates + + def serialized(self): + """Returns the serialized form of the topology.""" + if self._serialized is None: + proto = topology_pb2.TopologyProto() + proto.mesh_shape[:] = list(self._mesh_shape) + proto.num_tasks = self._device_coordinates.shape[0] + proto.num_tpu_devices_per_task = self._device_coordinates.shape[1] + proto.device_coordinates = list(self._device_coordinates.flatten()) + self._serialized = proto.SerializeToString() + + return self._serialized diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py index 77977b3c94..7fb8a33698 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu.py @@ -53,11 +53,19 @@ _NOT_IMPLEMENTED_OPS = set([ ]) +def _tpu_system_device_name(job): + """Returns the device name for the TPU_SYSTEM device of `job`.""" + if job is None: + return "/device:TPU_SYSTEM:0" + else: + return "/job:%s/device:TPU_SYSTEM:0" % job + + def initialize_system(embedding_config=None, job=None): """Initializes a distributed TPU system for use with TensorFlow. Args: - embedding_config: If not None, an EmbeddingLayerConfiguration proto + embedding_config: If not None, an `EmbeddingLayerConfiguration` proto describing the desired configuration of the hardware embedding lookup tables. If embedding_config is None, no hardware embeddings can be used. job: The job (the XXX in TensorFlow device specification /job:XXX) @@ -65,27 +73,18 @@ def initialize_system(embedding_config=None, job=None): it is assumed there is only one job in the TensorFlow flock, and an error will be returned if this assumption does not hold. Returns: - Op which, when executed, will initialize the system. + A serialized `TopologyProto` that describes the TPU system. Note: + the topology must be evaluated using `Session.run` before it can be used. """ - if job is None: - device_name = "/device:TPU_SYSTEM:0" - else: - device_name = "/job:%s/device:TPU_SYSTEM:0" % job config_string = ("" if embedding_config is None else embedding_config.SerializeToString()) - with ops.device(device_name): - init_distributed_tpu = tpu_ops.configure_distributed_tpu( - embedding_config=config_string) - return init_distributed_tpu + with ops.device(_tpu_system_device_name(job)): + return tpu_ops.configure_distributed_tpu(embedding_config=config_string) def shutdown_system(job=None): """Shuts down a running a distributed TPU system.""" - if job is None: - device_name = "/device:TPU_SYSTEM:0" - else: - device_name = "/job:%s/device:TPU_SYSTEM:0" % job - with ops.device(device_name): + with ops.device(_tpu_system_device_name(job)): shutdown_distributed_tpu = tpu_ops.shutdown_distributed_tpu() return shutdown_distributed_tpu @@ -97,23 +96,24 @@ def core(num): num: the virtual core number within each replica to which operators should be assigned. Returns: - A device name, suitable for passing to tf.device(). + A device name, suitable for passing to `tf.device()`. """ return "device:TPU_REPLICATED_CORE:{}".format(num) class TPUReplicateContext(control_flow_ops.ControlFlowContext): - """A ControlFlowContext for nodes inside a TPU computation. + """A `ControlFlowContext` for nodes inside a TPU computation. - The primary role of TPUReplicateContext is to mark operators inside a + The primary role of `TPUReplicateContext` is to mark operators inside a tpu.replicate() computation with the attribute "_tpu_replicate=XYZ", where XYZ is a unique name. - We use a ControlFlowContext to perform the annotation since it + We use a `ControlFlowContext` to perform the annotation since it integrates with Tensorflow constructs like ResourceVariables. For example, - if a ResourceVariable is constructed inside a tpu.replicate() block, the - ResourceVariable implementation can use "with ops.control_dependencies(None)" - to build the variable's definition outside the replicated computation. + if a `ResourceVariable` is constructed inside a tpu.replicate() block, the + `ResourceVariable` implementation can use + `with ops.control_dependencies(None)` to build the variable's definition + outside the replicated computation. """ def __init__(self, name): @@ -167,37 +167,47 @@ class TPUReplicateContext(control_flow_ops.ControlFlowContext): def replicate(computation, inputs=None, infeed_queue=None, - global_tpu_id=None, + device_assignment=None, name=None): """Builds a graph operator that runs a replicated TPU computation. Args: - computation: a Python function that builds the computation to replicate. - inputs: a list of lists of input tensors or None (equivalent to - [[]]), indexed by [replica_num][input_num]. All replicas must + computation: A Python function that builds the computation to replicate. + inputs: A list of lists of input tensors or `None` (equivalent to + `[[]]`), indexed by `[replica_num][input_num]`. All replicas must have the same number of inputs. - infeed_queue: if not None, the InfeedQueue from which to append a tuple + infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple of arguments as inputs to computation. - global_tpu_id: if not None, a Numpy 2D array indicating the global - id of each TPU device in the system. The outer dimension of the - array is host task id, and the inner dimension is device ordinal, - so e.g., global_tpu_id[x][y] indicates the global id of device - /task:x/device:TPU_NODE:y. - name: name of the operator. + device_assignment: If not `None`, a `DeviceAssignment` describing the + mapping between logical cores in the computation with physical cores in + the TPU topology. Uses a default device assignment if `None`. The + `DeviceAssignment` may be omitted if each replica of the computation uses + only one core, and there is either only one replica, or the number of + replicas is equal to the number of cores in the TPU system. + name: The name of the operator. Returns: - A list of lists of output tensors, indexed by [replica_num][output_num]. + A list of lists of output tensors, indexed by `[replica_num][output_num]`. Raises: - ValueError: if all replicas do not have equal numbers of input tensors. - ValueError: if the number of inputs per replica does not match + ValueError: If all replicas do not have equal numbers of input tensors. + ValueError: If the number of inputs per replica does not match the number of formal parameters to `computation`. """ if name is None: name = "TPUReplicate" inputs = [[]] if inputs is None else inputs - if global_tpu_id is not None: - # Turn the Numpy array into a flattened list. - global_tpu_id = global_tpu_id.flatten().tolist() + metadata_kwargs = {} + if device_assignment is not None: + # Turn the Numpy array into a flattened list so we can pass it as an + # operator attribute. + metadata_kwargs = { + "topology": + device_assignment.topology.serialized(), + "device_assignment": + device_assignment.core_assignment.flatten().tolist(), + "computation_shape": + device_assignment.computation_shape.tolist() + } if ((not isinstance(inputs, list)) or any(not isinstance(inp, (list, tuple)) for inp in inputs)): @@ -260,7 +270,7 @@ def replicate(computation, context.Enter() metadata = tpu_ops.tpu_replicate_metadata( - num_replicas=num_replicas, global_tpu_id=global_tpu_id) + num_replicas=num_replicas, **metadata_kwargs) with tpu_function.tpu_shard_context( num_replicas), ops.control_dependencies([metadata]): @@ -367,7 +377,7 @@ def shard(computation, outputs_from_all_shards=True, output_shard_axes=None, infeed_queue=None, - global_tpu_id=None, + device_assignment=None, name=None): """Shards `computation` for parallel execution. @@ -395,39 +405,40 @@ def shard(computation, Inputs and outputs of the computation must be at least rank-1 Tensors. Args: - computation: a Python function that builds a computation to apply to each + computation: A Python function that builds a computation to apply to each shard of the input. - inputs: a list of input tensors or None (equivalent to an empty + inputs: A list of input tensors or None (equivalent to an empty list). Each input tensor has a corresponding shard axes, given by `input_shard_axes`, which must have size divisible by `num_shards`. - num_shards: the number of shards. - input_shard_axes: a list of dimensions along which to shard `inputs`, or + num_shards: The number of shards. + input_shard_axes: A list of dimensions along which to shard `inputs`, or `None`. `None` means "shard all inputs along dimension 0". If not `None`, there must be one dimension per input. - outputs_from_all_shards: boolean or list of boolean. For each output, if + outputs_from_all_shards: Boolean or list of boolean. For each output, if `True`, outputs from all shards are concatenated along the corresponding `output_shard_axes` entry. Otherwise, each output is taken from an arbitrary shard. If the argument is a boolean, the argument's value is used for each output. - output_shard_axes: a list of dimensions along which to concatenate the + output_shard_axes: A list of dimensions along which to concatenate the outputs of `computation`, or `None`. `None` means "concatenate all outputs along dimension 0". If not `None`, there must be one dimension per output. Ignored if `outputs_from_all_shards` is False. - infeed_queue: if not None, the InfeedQueue to use to augment the inputs of - `computation`. - global_tpu_id: if not None, a Numpy 2D array indicating the global - id of each TPU device in the system. The outer dimension of the - array is host task id, and the inner dimension is device ordinal, - so e.g., global_tpu_id[x][y] indicates the global id of device - /task:x/device:TPU_NODE:y. - name: name of the operator. + infeed_queue: If not `None`, the `InfeedQueue` to use to augment the inputs + of `computation`. + device_assignment: If not `None`, a `DeviceAssignment` describing the + mapping between logical cores in the computation with physical cores in + the TPU topology. Uses a default device assignment if `None`. The + `DeviceAssignment` may be omitted if each shard of the computation uses + only one core, and there is either only one shard, or the number of shards + is equal to the number of cores in the TPU system. + name: The name of the operator. Returns: A list of output tensors. Raises: - ValueError: if num_shards <= 0 - ValueError: if len(input_shard_axes) != len(inputs) - ValueError: if len(output_shard_axes) != len(outputs from `computation`) + ValueError: If num_shards <= 0 + ValueError: If len(input_shard_axes) != len(inputs) + ValueError: If len(output_shard_axes) != len(outputs from `computation`) """ if num_shards <= 0: @@ -458,7 +469,7 @@ def shard(computation, computation, transposed_inputs, infeed_queue=infeed_queue, - global_tpu_id=global_tpu_id, + device_assignment=device_assignment, name=name) # There must be at least one shard since num_shards > 0. @@ -512,7 +523,7 @@ def batch_parallel(computation, inputs=None, num_shards=1, infeed_queue=None, - global_tpu_id=None, + device_assignment=None, name=None): """Shards `computation` along the batch dimension for parallel execution. @@ -536,55 +547,55 @@ def batch_parallel(computation, Inputs and outputs of the computation must be at least rank-1 Tensors. Args: - computation: a Python function that builds a computation to apply to each + computation: A Python function that builds a computation to apply to each shard of the input. - inputs: a list of input tensors or None (equivalent to an empty + inputs: A list of input tensors or None (equivalent to an empty list). The 0-th dimension of each Tensor must have size divisible by `num_shards`. - num_shards: the number of shards. - infeed_queue: if not None, the InfeedQueue from which to append a tuple + num_shards: The number of shards. + infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple of arguments as inputs to `computation`. - global_tpu_id: if not None, a Numpy 2D array indicating the global - id of each TPU device in the system. The outer dimension of the - array is host task id, and the inner dimension is device ordinal, - so e.g., global_tpu_id[x][y] indicates the global id of device - /task:x/device:TPU_NODE:y. - name: name of the operator. + device_assignment: If not `None`, a `DeviceAssignment` describing the + mapping between logical cores in the computation with physical cores in + the TPU topology. Uses a default device assignment if `None`. The + `DeviceAssignment` may be omitted if each shard of the computation uses + only one core, and there is either only one shard, or the number of shards + is equal to the number of cores in the TPU system. + name: The name of the operator. Returns: A list of output tensors. Raises: - ValueError: if num_shards <= 0 + ValueError: If `num_shards <= 0` """ return shard( computation, inputs, num_shards=num_shards, infeed_queue=infeed_queue, - global_tpu_id=global_tpu_id, + device_assignment=device_assignment, name=name) def rewrite(computation, inputs=None, infeed_queue=None, - global_tpu_id=None, + device_assignment=None, name=None): """Rewrites `computation` for execution on a TPU system. Args: - computation: a Python function that builds a computation to apply + computation: A Python function that builds a computation to apply to the input. If the function takes n inputs, 'inputs' should be a list of n tensors. If the function returns m outputs, rewrite will return a list of m tensors. - inputs: a list of input tensors or None (equivalent to an empty list). - infeed_queue: if not None, the InfeedQueue from which to append a tuple + inputs: A list of input tensors or `None` (equivalent to an empty list). + infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple of arguments as inputs to `computation`. - global_tpu_id: if not None, a Numpy 2D array indicating the global - id of each TPU device in the system. The outer dimension of the - array is host task id, and the inner dimension is device ordinal, - so e.g., global_tpu_id[x][y] indicates the global id of device - /task:x/device:TPU_NODE:y. - name: name of the operator. + device_assignment: if not `None`, a `DeviceAssignment` describing the + mapping between logical cores in the computation with physical cores in + the TPU topology. May be omitted for a single-core computation, in which + case the core attached to task 0, TPU device 0 is used. + name: The name of the operator. Returns: A list of output tensors. """ @@ -597,6 +608,6 @@ def rewrite(computation, computation, None if inputs is None else [inputs], infeed_queue=infeed_queue, - global_tpu_id=global_tpu_id, + device_assignment=device_assignment, name=name)[0] # pylint: enable=indexing-exception diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_feed.py b/tensorflow/contrib/tpu/python/tpu/tpu_feed.py index 1c8ea63f00..42ac6eb680 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_feed.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_feed.py @@ -513,7 +513,7 @@ class InfeedQueue(object): # for automatic placement of input pipelines. def split_inputs_and_generate_enqueue_ops(self, inputs, - global_tpu_id=None, + device_assignment=None, placement_function=None, tpu_ordinal_function=None): """POORLY-PERFORMING ON MULTI-HOST SYSTEMS. @@ -536,14 +536,12 @@ class InfeedQueue(object): Args: inputs: a list of Tensors which indicates the types and shapes of the queue tuple. - global_tpu_id: if not None, a Numpy 2D array indicating the global - id of each TPU device in the system. The outer dimension of the - array is host task id, and the inner dimension is device ordinal, - so e.g., global_tpu_id[x][y] indicates the global id of device - /task:x/device:TPU_NODE:y. If global_tpu_id is not None, but - placement_function and ordinal_function are None, then global_tpu_id - will be used to place infeed on the TPUs with the first k global ids, - where k is the number of shards in the queue. + device_assignment: if not `None`, a TPU `DeviceAssignment`. If + device_assignment is not `None`, but `placement_function` and + `ordinal_function` are None, then `device_assignment` will be used to + place infeeds on the first k TPU shards, where k is the number of shards + in the queue. If all three are `None`, then default placement and + ordinal functions are used. placement_function: if not None, a function that takes the shard index as input and returns a device string indicating which device the shard's infeed should be placed on. If placement_function @@ -567,22 +565,18 @@ class InfeedQueue(object): types of the elements of inputs are not compatible with the frozen configuration. """ - if global_tpu_id is None: + if device_assignment is None: if placement_function is None: placement_function = self._default_placement_function if tpu_ordinal_function is None: tpu_ordinal_function = self._default_ordinal_function else: - global_id_map = {} - for host, devices in enumerate(global_tpu_id): - for ordinal, global_id in enumerate(devices): - global_id_map[global_id] = (host, ordinal) def _placement_function_from_map(index): - return "/task:%d/device:CPU:0" % global_id_map[index][0] + return device_assignment.host_device(replica=index) def _ordinal_function_from_map(index): - return global_id_map[index][1] + return device_assignment.tpu_ordinal(replica=index) if placement_function is None: placement_function = _placement_function_from_map -- GitLab From 72d72194c1d06e66f7893915a804932b56bef5db Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 10:58:51 -0800 Subject: [PATCH 0837/1801] Simplify Mul(x, x) to Square(x) to cut the number of loads in half. PiperOrigin-RevId: 177042256 --- .../optimizers/arithmetic_optimizer.cc | 21 +++++- .../optimizers/arithmetic_optimizer_test.cc | 66 +++++++++++++------ 2 files changed, 64 insertions(+), 23 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index d9a544d21f..ec5d2abd7a 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -81,7 +81,6 @@ Status SetTensorValue(DataType dtype, int value, Tensor* tensor) { return Status::OK(); } - template bool AreInversePermutations(const std::vector& a, const std::vector& b) { if (a.size() != b.size()) { @@ -170,7 +169,6 @@ bool IsInnerMatrixTransposeNode(const NodeDef& transpose_node, return false; } - bool MaybeAddControlInput(const string& new_input, NodeDef* node, GraphDef* graph, NodeMap* node_map) { bool already_exists = false; @@ -785,6 +783,25 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( } } + if (node->op() == "Mul" && node->input(0) == node->input(1) && + node_map->GetNode(node->name() + "_square") == nullptr) { + NodeDef* factor = node_map->GetNode(node->input(0)); + VLOG(2) << "Found square : " << node->DebugString(); + if (factor != nullptr) { + NodeDef* new_mul_node = graph_def->add_node(); + *new_mul_node = *node; + new_mul_node->set_op("Square"); + new_mul_node->set_name(strings::StrCat(node->name(), "_square")); + new_nodes->push_back(new_mul_node); + node_map->AddNode(new_mul_node->name(), new_mul_node); + for (int i = 1; i < new_mul_node->input_size(); ++i) { + new_mul_node->set_input(i - 1, new_mul_node->input(i)); + } + new_mul_node->mutable_input()->RemoveLast(); + return new_mul_node->name(); + } + } + if (node->input_size() > 0 && IsAggregate(*node)) { // Discard aggregate nodes with a single input. if (node->input_size() == 1) { diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 8cec4e4255..6bbc64c7a4 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -58,7 +58,7 @@ TEST_F(ArithmeticOptimizerTest, OpDedupping) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output c1 = ops::Const(s.WithOpName("c1"), {3.14, 2.7}, {1, 2}); Output c2 = ops::Const(s.WithOpName("c2"), {3.14, 2.7}, {1, 2}); - Output mul = ops::Mul(s.WithOpName("mul"), c1, c2); + Output div = ops::Div(s.WithOpName("div"), c1, c2); GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); @@ -74,11 +74,11 @@ TEST_F(ArithmeticOptimizerTest, OpDedupping) { EXPECT_EQ(2, output.node_size()); const NodeDef& new_c1 = output.node(0); EXPECT_EQ("c1", new_c1.name()); - const NodeDef& new_mul = output.node(1); - EXPECT_EQ("mul", new_mul.name()); - EXPECT_EQ(2, new_mul.input_size()); - EXPECT_EQ("c1", new_mul.input(0)); - EXPECT_EQ("c1", new_mul.input(1)); + const NodeDef& new_div = output.node(1); + EXPECT_EQ("div", new_div.name()); + EXPECT_EQ(2, new_div.input_size()); + EXPECT_EQ("c1", new_div.input(0)); + EXPECT_EQ("c1", new_div.input(1)); } TEST_F(ArithmeticOptimizerTest, OpDeduppingAssertAndCheckNumerics) { @@ -89,9 +89,9 @@ TEST_F(ArithmeticOptimizerTest, OpDeduppingAssertAndCheckNumerics) { auto check2 = ops::CheckNumerics(s.WithOpName("check2"), c, "foo"); auto assert1 = ops::Assert(s.WithOpName("assert1"), p, {c}); auto assert2 = ops::Assert(s.WithOpName("assert2"), p, {c}); - Output mul = ops::Multiply(s.WithOpName("mul").WithControlDependencies( - {assert1.operation, assert2.operation}), - check1, check2); + Output div = ops::Div(s.WithOpName("div").WithControlDependencies( + {assert1.operation, assert2.operation}), + check1, check2); GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); @@ -105,12 +105,12 @@ TEST_F(ArithmeticOptimizerTest, OpDeduppingAssertAndCheckNumerics) { TF_EXPECT_OK(status); EXPECT_EQ(5, output.node_size()); - const NodeDef& new_mul = output.node(3); - EXPECT_EQ(4, new_mul.input_size()); - EXPECT_EQ("check1", new_mul.input(0)); - EXPECT_EQ("check1", new_mul.input(1)); - EXPECT_EQ("^assert1", new_mul.input(2)); - EXPECT_EQ("^assert1", new_mul.input(3)); + const NodeDef& new_div = output.node(3); + EXPECT_EQ(4, new_div.input_size()); + EXPECT_EQ("check1", new_div.input(0)); + EXPECT_EQ("check1", new_div.input(1)); + EXPECT_EQ("^assert1", new_div.input(2)); + EXPECT_EQ("^assert1", new_div.input(3)); } TEST_F(ArithmeticOptimizerTest, OpDedupCommutative) { @@ -119,7 +119,7 @@ TEST_F(ArithmeticOptimizerTest, OpDedupCommutative) { Output c2 = ops::Const(s.WithOpName("c2"), {3.0f, 4.0f}, {1, 2}); Output mul1 = ops::Mul(s.WithOpName("mul1"), c1, c2); Output mul2 = ops::Mul(s.WithOpName("mul2"), c2, c1); - Output mul3 = ops::Mul(s.WithOpName("mul3"), mul1, mul2); + Output div1 = ops::Div(s.WithOpName("div1"), mul1, mul2); GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); @@ -142,11 +142,35 @@ TEST_F(ArithmeticOptimizerTest, OpDedupCommutative) { EXPECT_EQ(2, new_mul1.input_size()); EXPECT_EQ("c1", new_mul1.input(0)); EXPECT_EQ("c2", new_mul1.input(1)); - const NodeDef& new_mul3 = output.node(3); - EXPECT_EQ("mul3", new_mul3.name()); - EXPECT_EQ(2, new_mul3.input_size()); - EXPECT_EQ("mul1", new_mul3.input(0)); - EXPECT_EQ("mul1", new_mul3.input(1)); + const NodeDef& new_div1 = output.node(3); + EXPECT_EQ("div1", new_div1.name()); + EXPECT_EQ(2, new_div1.input_size()); + EXPECT_EQ("mul1", new_div1.input(0)); + EXPECT_EQ("mul1", new_div1.input(1)); +} + +TEST_F(ArithmeticOptimizerTest, MulToSquare) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output c = ops::Const(s.WithOpName("c"), {1.0f, 2.0f}, {1, 2}); + Output d = ops::Const(s.WithOpName("d"), {3.0f, 4.0f}, {1, 2}); + Output mul = ops::Mul(s.WithControlDependencies(d).WithOpName("mul"), c, c); + Output id = ops::Identity(s.WithOpName("id"), mul); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + ArithmeticOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + EXPECT_EQ(5, output.node_size()); + EXPECT_EQ("Square", output.node(4).op()); + EXPECT_EQ("mul_square", output.node(4).name()); + EXPECT_EQ(2, output.node(4).input_size()); + EXPECT_EQ("c", output.node(4).input(0)); + EXPECT_EQ("^d", output.node(4).input(1)); + EXPECT_EQ("id", output.node(3).name()); + EXPECT_EQ("mul_square", output.node(3).input(0)); } TEST_F(ArithmeticOptimizerTest, SimplifyInvolutionsReal) { -- GitLab From 00e566cdfb5145d88414dffb847fd303950d18bf Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 27 Nov 2017 11:19:40 -0800 Subject: [PATCH 0838/1801] [XLA:CPU] Enable some dot simplifications Rename enable_dot_simplification to enable_dot_strength_reduction and make some dot simplifications unconditional. PiperOrigin-RevId: 177045451 --- .../xla/service/algebraic_simplifier.cc | 24 ++++++++++--------- .../xla/service/algebraic_simplifier.h | 6 ++--- .../compiler/xla/service/cpu/cpu_compiler.cc | 4 ++-- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 5dcc1318c9..71491218aa 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -180,17 +180,17 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault { static bool Run( HloComputation* computation, bool is_layout_sensitive, AlgebraicSimplifier::ValidBitcastCallback valid_bitcast_callback, - bool enable_dot_simplification, bool enable_conv_simplification); + bool enable_dot_strength_reduction, bool enable_conv_simplification); private: explicit AlgebraicSimplifierVisitor( HloComputation* computation, bool is_layout_sensitive, AlgebraicSimplifier::ValidBitcastCallback valid_bitcast_callback, - bool enable_dot_simplification, bool enable_conv_simplification) + bool enable_dot_strength_reduction, bool enable_conv_simplification) : computation_(computation), is_layout_sensitive_(is_layout_sensitive), valid_bitcast_callback_(std::move(valid_bitcast_callback)), - enable_dot_simplification_(enable_dot_simplification), + enable_dot_strength_reduction_(enable_dot_strength_reduction), enable_conv_simplification_(enable_conv_simplification) {} // Convenience method for replacing an instruction with a bitcast. @@ -265,8 +265,8 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault { // Callback used to determine if a bitcast is possible. AlgebraicSimplifier::ValidBitcastCallback valid_bitcast_callback_; - // Disable dot simplication on platforms where it causes a slowdown. - bool enable_dot_simplification_; + // Disable dot strength reduction on platforms where it causes a slowdown. + bool enable_dot_strength_reduction_; // Disable convolution simplication on platforms where it causes a slowdown. bool enable_conv_simplification_; @@ -275,10 +275,10 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault { bool AlgebraicSimplifierVisitor::Run( HloComputation* computation, bool is_layout_sensitive, AlgebraicSimplifier::ValidBitcastCallback valid_bitcast_callback, - bool enable_dot_simplification, bool enable_conv_simplification) { + bool enable_dot_strength_reduction, bool enable_conv_simplification) { AlgebraicSimplifierVisitor visitor( computation, is_layout_sensitive, std::move(valid_bitcast_callback), - enable_dot_simplification, enable_conv_simplification); + enable_dot_strength_reduction, enable_conv_simplification); TF_CHECK_OK(computation->Accept(&visitor)); return visitor.changed_; } @@ -577,9 +577,7 @@ Status AlgebraicSimplifierVisitor::HandleDivide(HloInstruction* divide) { Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { auto lhs = dot->mutable_operand(0); auto rhs = dot->mutable_operand(1); - if (!enable_dot_simplification_) { - return Status::OK(); - } + // Only optimize F32 dot operations where the dot, rhs and lhs are rank 2 or // below. if (dot->shape().element_type() != F32 || ShapeUtil::Rank(lhs->shape()) > 2 || @@ -606,6 +604,10 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { dot, HloInstruction::CreateTranspose(dot->shape(), new_dot, {1, 0})); } + if (!enable_dot_strength_reduction_) { + return Status::OK(); + } + // Simplify outer product into multiply with implicit broadcasting. // // A dot(a[M, 1], b[1, N]) = multiply(a [M,1], b [1, N]) @@ -1703,7 +1705,7 @@ StatusOr AlgebraicSimplifier::Run(HloModule* module) { for (auto* comp : module->MakeNonfusionComputations()) { if (AlgebraicSimplifierVisitor::Run( comp, is_layout_sensitive_, valid_bitcast_callback_, - enable_dot_simplification_, enable_conv_simplification_)) { + enable_dot_strength_reduction_, enable_conv_simplification_)) { changed = true; } } diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.h b/tensorflow/compiler/xla/service/algebraic_simplifier.h index a9f476178c..43315f5cdc 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.h +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.h @@ -40,11 +40,11 @@ class AlgebraicSimplifier : public HloPassInterface { // bitcasts. AlgebraicSimplifier(bool is_layout_sensitive, ValidBitcastCallback valid_bitcast_callback, - bool enable_dot_simplification = true, + bool enable_dot_strength_reduction = true, bool enable_conv_simplification = true) : is_layout_sensitive_(is_layout_sensitive), valid_bitcast_callback_(std::move(valid_bitcast_callback)), - enable_dot_simplification_(enable_dot_simplification), + enable_dot_strength_reduction_(enable_dot_strength_reduction), enable_conv_simplification_(enable_conv_simplification) {} ~AlgebraicSimplifier() override = default; tensorflow::StringPiece name() const override { return "algsimp"; } @@ -58,7 +58,7 @@ class AlgebraicSimplifier : public HloPassInterface { ValidBitcastCallback valid_bitcast_callback_; // Enable dot simplication on platforms where it is profitable. - bool enable_dot_simplification_; + bool enable_dot_strength_reduction_; // Enable convolution simplication on platforms where it is profitable. bool enable_conv_simplification_; diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index ff6042ae19..99dae793ab 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -287,7 +287,7 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) { pass.AddPass( /*is_layout_sensitive=*/false, [](const Shape&, const Shape&) { return false; }, - /*enable_dot_simplification=*/false); + /*enable_dot_strength_reduction=*/false); pass.AddPass(); pass.AddPass(); pass.AddPass(); @@ -316,7 +316,7 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) { pipeline.AddPass>( /*is_layout_sensitive=*/true, [](const Shape&, const Shape&) { return true; }, - /*enable_dot_simplification=*/false); + /*enable_dot_strength_reduction=*/false); pipeline.AddPass(/*is_layout_sensitive=*/true); // Outline ops in the entry computation into calls to subcomputations. const int max_parallelism = -- GitLab From 5b7f4f122a16decfa2d64ffdb2882966981562eb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 12:03:24 -0800 Subject: [PATCH 0839/1801] Adds SpaceToDepth and DepthToSpace to list of supported NCHW ops in Grappler layout optimizer. PiperOrigin-RevId: 177051825 --- tensorflow/core/grappler/optimizers/layout_optimizer.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc index d25d9d99c5..c760efac70 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc @@ -62,7 +62,9 @@ std::set GetOpsFormatSupported() { "FusedBatchNormGrad", "FusedConv2DBiasActivation", "MaxPool", - "MaxPoolGrad"}; + "MaxPoolGrad", + "SpaceToDepth", + "DepthToSpace"}; return ops_format_supported; } -- GitLab From 75350d385533b5aae9c33fc52ca90d359db6cc9d Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Mon, 27 Nov 2017 12:11:33 -0800 Subject: [PATCH 0840/1801] Add the missing GPU configs to avoid the division by zero (gflops) error. PiperOrigin-RevId: 177053156 --- tensorflow/python/grappler/cluster_test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/grappler/cluster_test.py b/tensorflow/python/grappler/cluster_test.py index a71a860a59..f1f02963de 100644 --- a/tensorflow/python/grappler/cluster_test.py +++ b/tensorflow/python/grappler/cluster_test.py @@ -93,7 +93,10 @@ class ClusterTest(test.TestCase): mg = meta_graph.create_meta_graph_def(graph=g) grappler_item = item.Item(mg) device_properties = device_properties_pb2.DeviceProperties( - type='GPU', environment={ + type='GPU', + frequency=1000, + num_cores=60, + environment={ 'architecture': '7' }) named_device = device_properties_pb2.NamedDevice( -- GitLab From d589382b1c17fbaae09e8877d3b8fa784100be70 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 12:33:38 -0800 Subject: [PATCH 0841/1801] Changed default approximation interface in LayerCollection to use setter functions to avoid silent failures when user misspells the property names. PiperOrigin-RevId: 177056030 --- tensorflow/contrib/kfac/python/ops/layer_collection.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py index d8781231ed..3a005ee39d 100644 --- a/tensorflow/contrib/kfac/python/ops/layer_collection.py +++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py @@ -184,8 +184,7 @@ class LayerCollection(object): def default_generic_approximation(self): return self._default_generic_approximation - @default_generic_approximation.setter - def default_generic_approximation(self, value): + def set_default_generic_approximation(self, value): if value not in _GENERIC_APPROX_TO_BLOCK_TYPES: raise ValueError( "{} is not a valid approximation for generic variables.".format( @@ -196,8 +195,7 @@ class LayerCollection(object): def default_fully_connected_approximation(self): return self._default_fully_connected_approximation - @default_fully_connected_approximation.setter - def default_fully_connected_approximation(self, value): + def set_default_fully_connected_approximation(self, value): if value not in _FULLY_CONNECTED_APPROX_TO_BLOCK_TYPES: raise ValueError( "{} is not a valid approximation for fully connected layers.".format( @@ -208,8 +206,7 @@ class LayerCollection(object): def default_conv2d_approximation(self): return self._default_convolution_2d_approximation - @default_conv2d_approximation.setter - def default_conv2d_approximation(self, value): + def set_default_conv2d_approximation(self, value): if value not in _CONV2D_APPROX_TO_BLOCK_TYPES: raise ValueError( "{} is not a valid approximation for 2d convolutional layers.".format( -- GitLab From f327ae08506cddae3bf15c8ead901269ce2c4bf9 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 27 Nov 2017 12:59:02 -0800 Subject: [PATCH 0842/1801] Improved the item_tst.py regression test. PiperOrigin-RevId: 177059258 --- tensorflow/python/grappler/item_test.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/grappler/item_test.py b/tensorflow/python/grappler/item_test.py index 69835761bc..71c68d25cd 100644 --- a/tensorflow/python/grappler/item_test.py +++ b/tensorflow/python/grappler/item_test.py @@ -25,6 +25,7 @@ from tensorflow.python.framework import meta_graph from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.grappler import item +from tensorflow.python.ops import control_flow_ops from tensorflow.python.platform import test @@ -59,6 +60,7 @@ class ItemTest(test.TestCase): a = constant_op.constant(10) b = constant_op.constant(20) c = a + b + z = control_flow_ops.no_op() train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) train_op.append(c) mg = meta_graph.create_meta_graph_def(graph=g) @@ -69,9 +71,12 @@ class ItemTest(test.TestCase): for node in grappler_item.metagraph.graph_def.node: node_prop = op_properties[node.name] - self.assertEqual(1, len(node_prop)) - self.assertEqual(dtypes.int32, node_prop[0].dtype) - self.assertEqual(tensor_shape.scalar(), node_prop[0].shape) + if node.name == z.name: + self.assertEqual(0, len(node_prop)) + else: + self.assertEqual(1, len(node_prop)) + self.assertEqual(dtypes.int32, node_prop[0].dtype) + self.assertEqual(tensor_shape.scalar(), node_prop[0].shape) def testUpdates(self): with ops.Graph().as_default() as g: -- GitLab From c67a98530d525d87cf7f7f20114d22593b930763 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 13:04:12 -0800 Subject: [PATCH 0843/1801] Exposes the table_ref in IdTableWithHashBuckets. PiperOrigin-RevId: 177060029 --- .../python/kernel_tests/lookup_ops_test.py | 18 ++++++++++++++++++ tensorflow/python/ops/lookup_ops.py | 19 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/tensorflow/python/kernel_tests/lookup_ops_test.py b/tensorflow/python/kernel_tests/lookup_ops_test.py index d4bc71f1c8..e4c799cb1c 100644 --- a/tensorflow/python/kernel_tests/lookup_ops_test.py +++ b/tensorflow/python/kernel_tests/lookup_ops_test.py @@ -488,6 +488,20 @@ class IndexTableFromFile(test.TestCase): self.assertRaises(ValueError, table.lookup, constant_op.constant(["salad", "surgery", "tarkus"])) + def test_index_table_from_file_table_ref_with_oov_buckets(self): + vocabulary_file = self._createVocabFile("f2i_vocab9.txt") + with self.test_session(): + table = lookup_ops.index_table_from_file( + vocabulary_file=vocabulary_file, num_oov_buckets=1) + self.assertIsNotNone(table.table_ref) + + def test_index_table_from_file_table_ref_without_oov_buckets(self): + vocabulary_file = self._createVocabFile("f2i_vocab10.txt") + with self.test_session(): + table = lookup_ops.index_table_from_file( + vocabulary_file=vocabulary_file, num_oov_buckets=0) + self.assertIsNotNone(table.table_ref) + class KeyValueTensorInitializerTest(test.TestCase): @@ -1431,6 +1445,10 @@ class IdTableWithHashBucketsTest(test.TestCase): oov_buckets, hasher_spec=lookup_ops.StrongHashSpec([None, 2])) + def testIdTableWithHashBucketsNoInnerTable(self): + with self.test_session(): + table = lookup_ops.IdTableWithHashBuckets(None, num_oov_buckets=1) + self.assertIsNone(table.table_ref) if __name__ == "__main__": test.main() diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py index 156e415735..8bc0bc7d06 100644 --- a/tensorflow/python/ops/lookup_ops.py +++ b/tensorflow/python/ops/lookup_ops.py @@ -789,6 +789,25 @@ class IdTableWithHashBuckets(LookupInterface): with ops.name_scope(None, "init"): return control_flow_ops.no_op() + @property + def table_ref(self): + """Returns the table_ref of the underlying table, if one exists. + + Only use the table_ref directly if you know what you are doing. The + table_ref does not have the "hash bucket" functionality, as that is provided + by this class. + + One possible use of the table_ref is subtokenization, i.e. ops which + dynamically decompose tokens into subtokens based on the contents of the + table_ref. + + Returns: + the underlying table_ref, or None if there is no underlying table + """ + if self._table is not None: + return self._table.table_ref + return None + def size(self, name=None): """Compute the number of elements in this table.""" with ops.name_scope(name, "%s_Size" % self.name) as scope: -- GitLab From 7726333292b9e3d97a033617ee53099f6c4fedd5 Mon Sep 17 00:00:00 2001 From: Clayne Robison Date: Mon, 27 Nov 2017 14:08:17 -0700 Subject: [PATCH 0844/1801] =?UTF-8?q?Added=20Ubuntu=2016.04=20Dockerfile?= =?UTF-8?q?=20with=20TF=201.4=20optimized=20for=20CPU=20with=20Inte?= =?UTF-8?q?=E2=80=A6=20(#14468)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Added Ubuntu 16.04 Dockerfile with TF 1.4 optimized for CPU with Intel(R) MKL * Edits per comments/code review. Now pulls FROM tensorflow/tensorflow. Removed superfluous build options. Now installs emacs in addition to vim. * More edits per reviewer feedback. Removed dependency on Bazel 0.7.0. --- .../tools/docker/Dockerfile.devel-cpu-mkl | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 tensorflow/tools/docker/Dockerfile.devel-cpu-mkl diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl new file mode 100644 index 0000000000..8180e5e7fb --- /dev/null +++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl @@ -0,0 +1,85 @@ +FROM tensorflow/tensorflow:latest-devel + +LABEL maintainer="Clayne Robison" + +# These arguments are parameterized. Use --build-args to override. +ARG TF_BRANCH=r1.4 +ARG WHL_DIR=/whl + +RUN apt-get update && apt-get install -y --no-install-recommends \ + golang \ + vim \ + emacs \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN pip --no-cache-dir install --upgrade \ + pip setuptools + +RUN pip --no-cache-dir install wheel + +# Download and build TensorFlow. +WORKDIR / +RUN rm -rf tensorflow && \ + git clone https://github.com/tensorflow/tensorflow.git && \ + cd tensorflow && \ + git checkout ${TF_BRANCH} +WORKDIR /tensorflow + +# Configure the build for CPU with MKL by accepting default build options and +# setting library locations +ENV CI_BUILD_PYTHON=python \ + LD_LIBRARY_PATH=${LD_LIBRARY_PATH} \ + PYTHON_BIN_PATH=/usr/bin/python \ + PYTHON_LIB_PATH=/usr/local/lib/python2.7/dist-packages \ + CC_OPT_FLAGS='-march=native' \ + TF_NEED_JEMALLOC=0 \ + TF_NEED_GCP=0 \ + TF_NEED_CUDA=0 \ + TF_NEED_HDFS=0 \ + TF_NEED_S3=0 \ + TF_NEED_OPENCL=0 \ + TF_NEED_GDR=0 \ + TF_ENABLE_XLA=0 \ + TF_NEED_VERBS=0 \ + TF_NEED_MPI=0 +RUN ./configure + +# Build and Install TensorFlow. +# The 'mkl' option builds with Intel(R) Math Kernel Library (MKL), which detects +# the platform it is currently running on and takes appropriately optimized +# paths. The -march=native option is for code that is not in MKL, and assumes +# this container will be run on the same architecture on which it is built. +RUN LD_LIBRARY_PATH=${LD_LIBRARY_PATH} \ + bazel build --config=mkl \ + --config="opt" \ + --copt="-march=native" \ + --copt="-O3" \ + //tensorflow/tools/pip_package:build_pip_package && \ + mkdir ${WHL_DIR} && \ + bazel-bin/tensorflow/tools/pip_package/build_pip_package ${WHL_DIR} + +# Clean up Bazel cache when done, but leave the whl. +# This will upgrade the default Tensorflow version with the Intel MKL version +RUN pip --no-cache-dir install --upgrade ${WHL_DIR}/tensorflow-*.whl && \ + rm -rf /root/.cache + +WORKDIR /root + +#add welcome message with instructions + +RUN echo '[ ! -z "$TERM" -a -r /etc/motd ] && cat /etc/issue && cat /etc/motd' \ + >> /etc/bash.bashrc \ + ; echo "\ +||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n\ +| \n\ +| Docker container running Ubuntu \n\ +| with TensorFlow ${TF_BRANCH} optimized for CPU \n\ +| with Intel(R) MKL \n\ +| \n\ +||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n\ +\n "\ + > /etc/motd + +CMD ["/bin/bash"] -- GitLab From 2110de0059a139ed5f4dd15c3e79102cceecad74 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 13:38:09 -0800 Subject: [PATCH 0845/1801] Fix ASAN failure in tests RELNOTES: None PiperOrigin-RevId: 177064374 --- tensorflow/c/eager/c_api_test.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/eager/c_api_test.cc b/tensorflow/c/eager/c_api_test.cc index 03843fa913..3fe0b7efa1 100644 --- a/tensorflow/c/eager/c_api_test.cc +++ b/tensorflow/c/eager/c_api_test.cc @@ -325,7 +325,8 @@ TEST(CAPI, Function) { ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); TF_DeleteFunction(fn); - TF_Tensor* t = TF_AllocateTensor(TF_INT32, nullptr, 0, 1); + TF_Tensor* t = + TF_AllocateTensor(TF_INT32, nullptr, 0, 1 * sizeof(tensorflow::int32)); *reinterpret_cast(TF_TensorData(t)) = 42; TFE_TensorHandle* h = TFE_NewTensorHandle(t, status); ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); -- GitLab From db9533e4f5fa940f704996cd6d38f40b13d40dff Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Mon, 27 Nov 2017 13:42:53 -0800 Subject: [PATCH 0846/1801] Enable a Session tests using function and set_device These tests were disabled for C API because those features were not implemented. They are now. PiperOrigin-RevId: 177064985 --- tensorflow/python/client/session_test.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py index 6b45a5f313..3e85410a97 100644 --- a/tensorflow/python/client/session_test.py +++ b/tensorflow/python/client/session_test.py @@ -1583,7 +1583,6 @@ class SessionTest(test_util.TensorFlowTestCase): sess.run(enqueue_op) self.assertEqual(sess.run(q.size()), num_epochs * 2) - @test_util.disable_c_api # set_device does not work with C API def testRegisterFetchAndFeedConversionFunctions(self): class SquaredTensor(object): def __init__(self, tensor): @@ -1733,11 +1732,9 @@ class SessionTest(test_util.TensorFlowTestCase): result = sess.run(f) self.assertEqual(result, 2.0) - @test_util.disable_c_api # functions don't work with C API def testAddFunctionToSession(self): self.runTestAddFunctionToSession() - @test_util.disable_c_api # functions don't work with C API def testAddFunctionToGrpcSession(self): server = server_lib.Server.create_local_server() self.runTestAddFunctionToSession(server.target) -- GitLab From 6cc7e387fc1b642d363b6a18877a411382a82fa5 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Mon, 27 Nov 2017 14:06:23 -0800 Subject: [PATCH 0847/1801] [TF:XLA] Implement StatelessRandomUniform and StatelessRandomNormal using the ThreeFry counter-based PRNG. Extend stateless ops to allow 32-bit integer seeds, with a 64-bit default. PiperOrigin-RevId: 177068747 --- tensorflow/compiler/tests/BUILD | 13 + .../tests/stateless_random_ops_test.py | 118 ++++++++ tensorflow/compiler/tf2xla/kernels/BUILD | 2 + .../tf2xla/kernels/stateless_random_ops.cc | 279 ++++++++++++++++++ .../compiler/xla/service/user_computation.cc | 8 + .../kernel_tests/stateless_random_ops_test.py | 21 +- .../core/kernels/stateless_random_ops.cc | 15 +- tensorflow/core/ops/stateless_random_ops.cc | 3 +- 8 files changed, 445 insertions(+), 14 deletions(-) create mode 100644 tensorflow/compiler/tests/stateless_random_ops_test.py create mode 100644 tensorflow/compiler/tf2xla/kernels/stateless_random_ops.cc diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 79c4befd36..6cad2b0824 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -457,6 +457,19 @@ tf_xla_py_test( ], ) +tf_xla_py_test( + name = "stateless_random_ops_test", + size = "small", + srcs = ["stateless_random_ops_test.py"], + tags = ["optonly"], + deps = [ + ":xla_test", + "//tensorflow/contrib/stateless", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:platform_test", + ], +) + tf_xla_py_test( name = "tensor_array_ops_test", size = "small", diff --git a/tensorflow/compiler/tests/stateless_random_ops_test.py b/tensorflow/compiler/tests/stateless_random_ops_test.py new file mode 100644 index 0000000000..4336ebdbd1 --- /dev/null +++ b/tensorflow/compiler/tests/stateless_random_ops_test.py @@ -0,0 +1,118 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for stateless random-number generation ops.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import numpy as np + +from tensorflow.compiler.tests.xla_test import XLATestCase +from tensorflow.contrib import stateless +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test + + +class StatelessRandomOpsTest(XLATestCase): + """Test cases for stateless random-number generator operators.""" + + def _random_types(self): + return [dtypes.float32] + + def testDeterminism(self): + # Stateless values should be equal iff the seeds are equal (roughly) + with self.test_session(), self.test_scope(): + seed_t = array_ops.placeholder(dtypes.int32, shape=[2]) + seeds = [(x, y) for x in range(5) for y in range(5)] * 3 + for stateless_op in [ + stateless.stateless_random_uniform, stateless.stateless_random_normal + ]: + for shape in (), (3,), (2, 5): + for dtype in self._random_types(): + pure = stateless_op(shape, seed=seed_t, dtype=dtype) + values = [(seed, pure.eval(feed_dict={ + seed_t: seed + })) for seed in seeds] + for s0, v0 in values: + for s1, v1 in values: + self.assertEqual(s0 == s1, np.all(v0 == v1)) + + def testRandomUniformIsInRange(self): + with self.test_session() as sess, self.test_scope(): + for dtype in self._random_types(): + seed_t = array_ops.placeholder(dtypes.int32, shape=[2]) + x = stateless.stateless_random_uniform( + shape=[1000], seed=seed_t, dtype=dtype) + y = sess.run(x, {seed_t: [0x12345678, 0xabcdef12]}) + self.assertTrue(np.all(y >= 0)) + self.assertTrue(np.all(y < 1)) + + def _chi_squared(self, x, bins): + """Pearson's Chi-squared test.""" + x = np.ravel(x) + n = len(x) + histogram, _ = np.histogram(x, bins=bins, range=(0, 1)) + expected = n / float(bins) + return np.sum(np.square(histogram - expected) / expected) + + def testDistributionOfStatelessRandomUniform(self): + """Use Pearson's Chi-squared test to test for uniformity.""" + with self.test_session() as sess, self.test_scope(): + for dtype in self._random_types(): + seed_t = array_ops.placeholder(dtypes.int32, shape=[2]) + n = 1000 + x = stateless.stateless_random_uniform( + shape=[n], seed=seed_t, dtype=dtype) + y = sess.run(x, {seed_t: [565656, 121212]}) + # Tests that the values are distributed amongst 10 bins with equal + # probability. 16.92 is the Chi^2 value for 9 degrees of freedom with + # p=0.05. This test is probabilistic and would be flaky if the random + # seed were not fixed. + self.assertTrue(self._chi_squared(y, 10) < 16.92) + + def _normal_cdf(self, x): + """Cumulative distribution function for a standard normal distribution.""" + return 0.5 + 0.5 * np.vectorize(math.erf)(x / math.sqrt(2)) + + def _anderson_darling(self, x): + """Anderson-Darling test for a standard normal distribution.""" + x = np.sort(np.ravel(x)) + n = len(x) + i = np.linspace(1, n, n) + z = np.sum((2 * i - 1) * np.log(self._normal_cdf(x)) + + (2 * (n - i) + 1) * np.log(1 - self._normal_cdf(x))) + return -n - z / n + + def testDistributionOfStatelessRandomNormal(self): + """Use Anderson-Darling test to test distribution appears normal.""" + with self.test_session() as sess, self.test_scope(): + for dtype in self._random_types(): + seed_t = array_ops.placeholder(dtypes.int32, shape=[2]) + n = 1000 + x = stateless.stateless_random_normal( + shape=[n], seed=seed_t, dtype=dtype) + y = sess.run(x, {seed_t: [25252, 314159]}) + # The constant 2.492 is the 5% critical value for the Anderson-Darling + # test where the mean and variance are known. This test is probabilistic + # so to avoid flakiness the seed is fixed. + self.assertTrue(self._anderson_darling(y) < 2.492) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index 948d7f0b40..6302fece1f 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -65,6 +65,7 @@ tf_kernel_library( "spacetodepth_op.cc", "split_op.cc", "stack_ops.cc", + "stateless_random_ops.cc", "strided_slice_op.cc", "tensor_array_ops.cc", "tile_ops.cc", @@ -96,6 +97,7 @@ tf_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:linalg_ops_op_lib", "//tensorflow/core:protos_all_cc", + "//tensorflow/core:stateless_random_ops_op_lib", "//tensorflow/core/kernels:bounds_check", "//tensorflow/core/kernels:concat_lib", "//tensorflow/core/kernels:constant_op", diff --git a/tensorflow/compiler/tf2xla/kernels/stateless_random_ops.cc b/tensorflow/compiler/tf2xla/kernels/stateless_random_ops.cc new file mode 100644 index 0000000000..b10880de77 --- /dev/null +++ b/tensorflow/compiler/tf2xla/kernels/stateless_random_ops.cc @@ -0,0 +1,279 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/compiler/tf2xla/shape_util.h" +#include "tensorflow/compiler/tf2xla/xla_helpers.h" +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/compiler/xla/client/lib/arithmetic.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/lib/core/casts.h" +#include "tensorflow/core/lib/math/math_util.h" + +namespace tensorflow { +namespace { + +// Rotates a 32-bit integer 'v' left by 'distance' bits. +xla::ComputationDataHandle RotateLeftS32(xla::ComputationBuilder* builder, + const xla::ComputationDataHandle& v, + int distance) { + return builder->Or( + builder->ShiftLeft(v, builder->ConstantR0(distance)), + builder->ShiftRightLogical(v, builder->ConstantR0(32 - distance))); +} + +// TODO(b/65209188): add a primitive XOR to XLA and call it here, rather than +// building XOR out of other bitwise operators. +xla::ComputationDataHandle BitwiseXor(xla::ComputationBuilder* builder, + const xla::ComputationDataHandle& x, + const xla::ComputationDataHandle& y) { + return builder->Or(builder->And(x, builder->Not(y)), + builder->And(builder->Not(x), y)); +} + +using ThreeFry2x32State = std::array; + +// Implements the ThreeFry counter-based PRNG algorithm. +// Salmon et al. SC 2011. Parallel random numbers: as easy as 1, 2, 3. +// http://www.thesalmons.org/john/random123/papers/random123sc11.pdf +ThreeFry2x32State ThreeFry2x32(xla::ComputationBuilder* builder, + ThreeFry2x32State input, ThreeFry2x32State key) { + // Rotation distances specified by the Threefry2x32 algorithm. + constexpr std::array rotations = {13, 15, 26, 6, 17, 29, 16, 24}; + ThreeFry2x32State x; + + std::array ks; + // 0x1BD11BDA is a parity constant specified by the ThreeFry2x32 algorithm. + ks[2] = builder->ConstantR0(0x1BD11BDA); + for (int i = 0; i < 2; ++i) { + ks[i] = key[i]; + x[i] = input[i]; + ks[2] = BitwiseXor(builder, ks[2], key[i]); + } + + x[0] = builder->Add(x[0], ks[0]); + x[1] = builder->Add(x[1], ks[1]); + + // Performs a single round of the Threefry2x32 algorithm, with a rotation + // amount 'rotation'. + auto round = [builder](ThreeFry2x32State v, int rotation) { + v[0] = builder->Add(v[0], v[1]); + v[1] = RotateLeftS32(builder, v[1], rotation); + v[1] = BitwiseXor(builder, v[0], v[1]); + return v; + }; + + // There are no known statistical flaws with 13 rounds of Threefry2x32. + // We are conservative and use 20 rounds. + x = round(x, rotations[0]); + x = round(x, rotations[1]); + x = round(x, rotations[2]); + x = round(x, rotations[3]); + x[0] = builder->Add(x[0], ks[1]); + x[1] = builder->Add(builder->Add(x[1], ks[2]), builder->ConstantR0(1)); + + x = round(x, rotations[4]); + x = round(x, rotations[5]); + x = round(x, rotations[6]); + x = round(x, rotations[7]); + x[0] = builder->Add(x[0], ks[2]); + x[1] = builder->Add(builder->Add(x[1], ks[0]), builder->ConstantR0(2)); + + x = round(x, rotations[0]); + x = round(x, rotations[1]); + x = round(x, rotations[2]); + x = round(x, rotations[3]); + x[0] = builder->Add(x[0], ks[0]); + x[1] = builder->Add(builder->Add(x[1], ks[1]), builder->ConstantR0(3)); + + x = round(x, rotations[4]); + x = round(x, rotations[5]); + x = round(x, rotations[6]); + x = round(x, rotations[7]); + x[0] = builder->Add(x[0], ks[1]); + x[1] = builder->Add(builder->Add(x[1], ks[2]), builder->ConstantR0(4)); + + x = round(x, rotations[0]); + x = round(x, rotations[1]); + x = round(x, rotations[2]); + x = round(x, rotations[3]); + x[0] = builder->Add(x[0], ks[2]); + x[1] = builder->Add(builder->Add(x[1], ks[0]), builder->ConstantR0(5)); + + return x; +} + +// Returns a tensor of 'shape' random values uniformly distributed in the range +// [minval, maxval) +xla::ComputationDataHandle RandomUniform(xla::ComputationBuilder* builder, + const xla::ComputationDataHandle& seed, + const TensorShape& shape, + double minval, double maxval) { + // Split the seed into two 32-bit scalars to form a key. + auto seed0 = builder->Reshape(builder->Slice(seed, {0}, {1}, {1}), {}); + auto seed1 = builder->Reshape(builder->Slice(seed, {1}, {2}, {1}), {}); + ThreeFry2x32State key = {seed0, seed1}; + const int64 size = shape.num_elements(); + + const int64 half_size = MathUtil::CeilOfRatio(size, 2); + const bool size_is_odd = (half_size * 2 != size); + + // Fill the generator inputs with unique counter values. + ThreeFry2x32State inputs; + TF_CHECK_OK(XlaHelpers::Iota(builder, DT_INT32, half_size, &inputs[0])); + inputs[1] = builder->Add(inputs[0], builder->ConstantR0(half_size)); + ThreeFry2x32State outputs = ThreeFry2x32(builder, inputs, key); + + if (size_is_odd) { + outputs[1] = builder->Slice(outputs[1], {0}, {half_size - 1}, {1}); + } + + auto bits = + builder->Reshape(builder->ConcatInDim(outputs, 0), shape.dim_sizes()); + + // Form 22 random mantissa bits, with a leading 1 bit. The leading 1 bit + // forces the random bits into the mantissa. + constexpr int kFloatBits = 32; + constexpr int kMantissaBits = 23; + bits = builder->Or( + builder->ShiftRightLogical( + bits, builder->ConstantR0(kFloatBits - kMantissaBits)), + builder->ConstantR0(bit_cast(1.0f))); + auto floats = builder->BitcastConvertType(bits, xla::F32); + + // We have a floating point number in the range [1.0, 2.0). + // Subtract 1.0f to shift to the range [0.0, 1.0) + floats = builder->Sub(floats, builder->ConstantR0(1.0f)); + // Multiply and add to shift to the range [minval, maxval). + floats = builder->Mul(floats, builder->ConstantR0(maxval - minval)); + floats = builder->Add(floats, builder->ConstantR0(minval)); + return floats; +} + +// Approximation for the inverse error function from +// Giles, M., "Approximating the erfinv function". +// The approximation has the form: +// w = -log((1 - x) * (1 + x)) +// if ( w < 5 ) { +// w = w - 2.5 +// p = sum_{i=1}^n lq[i]*w^i +// } else { +// w = sqrt(w) - 3 +// p = sum_{i=1}^n gq[i]*w^i +// } +// return p*x +xla::ComputationDataHandle ErfInvF32(xla::ComputationBuilder* b, + const xla::ComputationDataHandle& x, + const TensorShape& shape) { + constexpr int kDegree = 9; + constexpr std::array w_less_than_5_constants = { + 2.81022636e-08f, 3.43273939e-07f, -3.5233877e-06f, + -4.39150654e-06f, 0.00021858087f, -0.00125372503f, + -0.00417768164f, 0.246640727f, 1.50140941f}; + constexpr std::array w_greater_than_5_constants = { + -0.000200214257f, 0.000100950558f, 0.00134934322f, + -0.00367342844f, 0.00573950773f, -0.0076224613f, + 0.00943887047f, 1.00167406f, 2.83297682f}; + + auto one = b->ConstantR0(1.0); + auto w = b->Neg(b->Log(b->Mul(b->Sub(one, x), b->Add(one, x)))); + + auto lt = b->Lt(w, b->ConstantR0(5.0)); + auto coefficient = [&](int i) { + return b->Select( + lt, + b->Broadcast(b->ConstantR0(w_less_than_5_constants[i]), + shape.dim_sizes()), + b->Broadcast(b->ConstantR0(w_greater_than_5_constants[i]), + shape.dim_sizes())); + }; + w = b->Select(lt, b->Sub(w, b->ConstantR0(2.5f)), + b->Sub(b->SqrtF32(w), b->ConstantR0(3.0f))); + auto p = coefficient(0); + for (int i = 1; i < kDegree; ++i) { + p = b->Add(coefficient(i), b->Mul(p, w)); + } + return b->Mul(p, x); +} + +} // namespace + +class StatelessRandomUniformOp : public XlaOpKernel { + public: + explicit StatelessRandomUniformOp(OpKernelConstruction* ctx) + : XlaOpKernel(ctx) {} + + void Compile(XlaOpKernelContext* ctx) override { + xla::ComputationBuilder* builder = ctx->builder(); + + TensorShape shape; + OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(0, &shape)); + + TensorShape seed_shape = ctx->InputShape(1); + OP_REQUIRES(ctx, seed_shape.dims() == 1 && seed_shape.dim_size(0) == 2, + errors::InvalidArgument("seed must have shape [2], not ", + seed_shape.DebugString())); + xla::ComputationDataHandle seed = ctx->Input(1); + ctx->SetOutput(0, RandomUniform(builder, seed, shape, 0.0, 1.0)); + } + + private: + TF_DISALLOW_COPY_AND_ASSIGN(StatelessRandomUniformOp); +}; + +// TODO(phawkins): generalize to non-float, non-int32 seed types. +REGISTER_XLA_OP(Name("StatelessRandomUniform") + .TypeConstraint("dtype", DT_FLOAT) + .TypeConstraint("Tseed", DT_INT32), + StatelessRandomUniformOp); + +class StatelessRandomNormalOp : public XlaOpKernel { + public: + explicit StatelessRandomNormalOp(OpKernelConstruction* ctx) + : XlaOpKernel(ctx) {} + + void Compile(XlaOpKernelContext* ctx) override { + TensorShape shape; + OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(0, &shape)); + + TensorShape seed_shape = ctx->InputShape(1); + OP_REQUIRES(ctx, seed_shape == TensorShape({2}), + errors::InvalidArgument("seed must have shape [2], not ", + seed_shape.DebugString())); + xla::ComputationDataHandle seed = ctx->Input(1); + xla::ComputationBuilder* builder = ctx->builder(); + auto uniform = RandomUniform(builder, seed, shape, -1.0, 1.0); + // Convert uniform distribution to normal distribution by computing + // sqrt(2) * erfinv(x) + auto normal = builder->Mul(builder->ConstantR0(std::sqrt(2.0)), + ErfInvF32(builder, uniform, shape)); + ctx->SetOutput(0, normal); + } + + private: + TF_DISALLOW_COPY_AND_ASSIGN(StatelessRandomNormalOp); +}; + +// TODO(phawkins): generalize to non-float, non-int32 seed types. +REGISTER_XLA_OP(Name("StatelessRandomNormal") + .TypeConstraint("dtype", DT_FLOAT) + .TypeConstraint("Tseed", DT_INT32), + StatelessRandomNormalOp); + +} // namespace tensorflow diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc index b0b15bb571..4e90491b55 100644 --- a/tensorflow/compiler/xla/service/user_computation.cc +++ b/tensorflow/compiler/xla/service/user_computation.cc @@ -1739,6 +1739,14 @@ void PureFunctionalVisitor(const SessionComputation& session_computation, break; } + case OpRequest::kBitcastConvertRequest: { + const ConvertRequest& convert_request = + request.request().bitcast_convert_request(); + PureFunctionalVisitor(session_computation, convert_request.operand(), + num_parameters, visited, is_functional); + break; + } + case OpRequest::kWhileRequest: { const WhileRequest& while_request = request.request().while_request(); PureFunctionalVisitor(session_computation, while_request.init(), diff --git a/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py b/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py index cd4d46aa07..bea6341cfd 100644 --- a/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py +++ b/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py @@ -69,16 +69,17 @@ class StatelessOpsTest(test.TestCase): def testDeterminism(self): # Stateless values should be equal iff the seeds are equal (roughly) with self.test_session(use_gpu=True): - seed_t = array_ops.placeholder(dtypes.int64, shape=[2]) - seeds = [(x, y) for x in range(5) for y in range(5)] * 3 - for stateless_op, _ in CASES: - for shape in (), (3,), (2, 5): - pure = stateless_op(shape, seed=seed_t) - values = [(seed, pure.eval(feed_dict={seed_t: seed})) - for seed in seeds] - for s0, v0 in values: - for s1, v1 in values: - self.assertEqual(s0 == s1, np.all(v0 == v1)) + for seed_type in [dtypes.int32, dtypes.int64]: + seed_t = array_ops.placeholder(seed_type, shape=[2]) + seeds = [(x, y) for x in range(5) for y in range(5)] * 3 + for stateless_op, _ in CASES: + for shape in (), (3,), (2, 5): + pure = stateless_op(shape, seed=seed_t) + values = [(seed, pure.eval(feed_dict={seed_t: seed})) + for seed in seeds] + for s0, v0 in values: + for s1, v1 in values: + self.assertEqual(s0 == s1, np.all(v0 == v1)) def testShapeType(self): with self.test_session(use_gpu=True): diff --git a/tensorflow/core/kernels/stateless_random_ops.cc b/tensorflow/core/kernels/stateless_random_ops.cc index f6fb0a121d..88fcf542fb 100644 --- a/tensorflow/core/kernels/stateless_random_ops.cc +++ b/tensorflow/core/kernels/stateless_random_ops.cc @@ -50,9 +50,18 @@ class StatelessRandomOpBase : public OpKernel { if (shape.num_elements() == 0) return; // Grab the two seeds - const auto seed = seed_t.flat(); - const uint64 seed0 = internal::SubtleMustCopy(seed(0)); - const uint64 seed1 = internal::SubtleMustCopy(seed(1)); + uint64 seed0; + uint64 seed1; + if (context->input_dtype(1) == DT_INT32) { + const auto seed = seed_t.flat(); + seed0 = internal::SubtleMustCopy(seed(0)); + seed1 = internal::SubtleMustCopy(seed(1)); + } else { + CHECK_EQ(DT_INT64, context->input_dtype(1)); + const auto seed = seed_t.flat(); + seed0 = internal::SubtleMustCopy(seed(0)); + seed1 = internal::SubtleMustCopy(seed(1)); + } // Scramble the seeds so that the user doesn't need to worry about which // part of the seed needs to be strong. diff --git a/tensorflow/core/ops/stateless_random_ops.cc b/tensorflow/core/ops/stateless_random_ops.cc index 7c00fdb99f..3e1f8781fc 100644 --- a/tensorflow/core/ops/stateless_random_ops.cc +++ b/tensorflow/core/ops/stateless_random_ops.cc @@ -38,10 +38,11 @@ static Status StatelessShape(shape_inference::InferenceContext* context) { #define REGISTER_STATELESS_OP(name) \ REGISTER_OP(name) \ .Input("shape: T") \ - .Input("seed: int64") \ + .Input("seed: Tseed") \ .Output("output: dtype") \ .Attr("dtype: {half,float,double} = DT_FLOAT") \ .Attr("T: {int32, int64} = DT_INT32") \ + .Attr("Tseed: {int32, int64} = DT_INT64") \ .SetShapeFn(StatelessShape) // This op is exposed through contrib/stateless only. The interface may change. -- GitLab From c159fbe82abb7817b9e556c3607af2efe30206da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20Csomor?= Date: Mon, 27 Nov 2017 23:40:30 +0100 Subject: [PATCH 0848/1801] TFLite: get closer to build with Bazel on Windows (#14791) Bazel cannot yet build TensorFlow Lite on Windows, but this commit gets us closer. In this commit: - make the -Wno-implicit-fallthrough compiler flag in flatbuffers' BUILD file be conditional to non-Windows builds, because MSVC doesn't know this flag - fix the Bazel build command in README.md by removing single quotes around --cxxflags, because it's not needed on Bash and is harmful on Windows (because cmd.exe doesn't remove the single quotes) - fix non-ASCII quotes and apostrophes, as well as some formatting issues in README.md See https://github.com/bazelbuild/bazel/issues/4148 --- tensorflow/contrib/lite/README.md | 47 +++++++++++++---------- third_party/flatbuffers/flatbuffers.BUILD | 7 +++- 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md index 3665a63097..c7464bcc9d 100644 --- a/tensorflow/contrib/lite/README.md +++ b/tensorflow/contrib/lite/README.md @@ -1,5 +1,5 @@ # TensorFlow Lite -TensorFlow Lite is TensorFlow’s lightweight solution for mobile and embedded devices. It enables low-latency inference of on-device machine learning models with a small binary size and fast performance supporting hardware acceleration. +TensorFlow Lite is TensorFlow's lightweight solution for mobile and embedded devices. It enables low-latency inference of on-device machine learning models with a small binary size and fast performance supporting hardware acceleration. TensorFlow Lite uses many techniques for achieving low latency like optimizing the kernels for specific mobile apps, pre-fused activations, quantized kernels that allow smaller and faster (fixed-point math) models, and in the future, leverage specialized machine learning hardware to get the best possible performance for a particular model on a particular device. @@ -20,18 +20,18 @@ In the demo app, inference is done using the TensorFlow Lite Java API. The demo The fastest path to trying the demo, is to download the pre-built binary [TfLiteCameraDemo.apk](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk) -Once the apk is installed, click the app icon to start the app. The first-time the app is opened, the app asks for runtime permissions to access the device camera. The demo app opens the back-camera of the device and recognizes the objects in the camera’s field of view. At the bottom of the image (or at the left of the image if the device is in landscape mode), it shows the latency of classification and the top three objects classified. +Once the apk is installed, click the app icon to start the app. The first-time the app is opened, the app asks for runtime permissions to access the device camera. The demo app opens the back-camera of the device and recognizes the objects in the camera's field of view. At the bottom of the image (or at the left of the image if the device is in landscape mode), it shows the latency of classification and the top three objects classified. ## Building in Android Studio using TensorFlow Lite AAR from JCenter The simplest way to compile the demo app, and try out changes to the project code is to use AndroidStudio. - Install the latest version of Android Studio 3 as specified [here](https://developer.android.com/studio/index.html). - Make sure the Android SDK version is greater than 26 and NDK version is greater than 14 (in the Android Studio Settings). - - Import the tensorflow/contrib/lite/java/demo directory as a new Android Studio project. + - Import the `tensorflow/contrib/lite/java/demo` directory as a new Android Studio project. - Click through installing all the Gradle extensions it requests. - Download the quantized Mobilenet TensorFlow Lite model from [here](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip) - unzip and copy mobilenet_quant_v1_224.tflite to the assets directory: - tensorflow/contrib/lite/java/demo/app/src/main/assets/ + `tensorflow/contrib/lite/java/demo/app/src/main/assets/` - Build and run the demo app ## Building TensorFlow Lite and the demo app from source @@ -43,7 +43,7 @@ The simplest way to compile the demo app, and try out changes to the project cod ### Install Bazel If bazel is not installed on your system, install it now by following [these directions](https://bazel.build/versions/master/docs/install.html) -NOTE: Bazel does not currently support building for Android on Windows. Full support for gradle/cmake builds is coming soon, but in the meantime Windows users should download the [prebuilt binary](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk) instead. +NOTE: Bazel does not fully support building Android on Windows yet. Full support for Gradle/CMake builds is coming soon, but in the meantime Windows users should download the [prebuilt binary](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk) instead. ### Install Android NDK and SDK Bazel is the primary build system for TensorFlow. Bazel and the Android NDK and SDK must be installed on your system. @@ -53,25 +53,30 @@ Bazel is the primary build system for TensorFlow. Bazel and the Android NDK and - In the root of the TensorFlow repository update the `WORKSPACE` file with the `api_level` and location of the SDK and NDK. If you installed it with AndroidStudio the SDK path can be found in the SDK manager, and the default NDK path is:`{SDK path}/ndk-bundle.` ``` - Android_sdk_repository ( - name = "androidsdk", - api_level = 23, - build_tools_version = "23.0.2", - path = "/home/xxxx/android-sdk-linux/", ) +android_sdk_repository ( + name = "androidsdk", + api_level = 23, + build_tools_version = "23.0.2", + path = "/home/xxxx/android-sdk-linux/", +) android_ndk_repository( - name="androidndk", - path="/home/xxxx/android-ndk-r10e/", - api_level=19) - + name = "androidndk", + path = "/home/xxxx/android-ndk-r10e/", + api_level = 19, +) ``` + Additional details on building with Android can be found [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/README.md). ### Build the source code Run bazel with the following command to build the demo. Build the demo app: -bazel build --cxxopt='--std=c++11' //tensorflow/contrib/lite/java/demo/app/src/main:TfLiteCameraDemo + +``` +bazel build --cxxopt=--std=c++11 //tensorflow/contrib/lite/java/demo/app/src/main:TfLiteCameraDemo +``` ### Note @@ -105,7 +110,7 @@ The [TensorFlow for Poets](https://codelabs.developers.google.com/codelabs/tenso ### Train a custom model -A developer may choose to train a custom model using Tensorflow. TensorFlow documentation has [several tutorials](https://www.tensorflow.org/tutorials/) for building and training models. If the user has written a model using TensorFlow’s Slim Framework the first step is to export this to a GraphDef file. This is necessary because Slim does not store the model structure outside the code, so to communicate with other parts of the framework it needs to be exported. Documentation for the export can be found [here](https://github.com/tensorflow/models/tree/master/research/slim#Export). The output of this step will be a .pb file for the custom model. +A developer may choose to train a custom model using Tensorflow. TensorFlow documentation has [several tutorials](https://www.tensorflow.org/tutorials/) for building and training models. If the user has written a model using TensorFlow's Slim Framework the first step is to export this to a GraphDef file. This is necessary because Slim does not store the model structure outside the code, so to communicate with other parts of the framework it needs to be exported. Documentation for the export can be found [here](https://github.com/tensorflow/models/tree/master/research/slim#Export). The output of this step will be a .pb file for the custom model. TensorFlow Lite currently supports a subset of TensorFlow operators. Please refer to [this document](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md) for details of supported operators and their usage. This set will continue to expand in future releases of Tensorflow Lite. @@ -129,7 +134,7 @@ Since we employ several formats, the following definitions may be useful: - TensorFlow lite model (.lite) - a serialized flatbuffer, containing TensorFlow lite operators and Tensors for the TensorFlow lite interpreter. This is most analogous to TensorFlow frozen GraphDefs. ### Freeze Graph -To use this .pb GraphDef file within TensorFlow Lite, the application developer will need checkpoints containing trained weight parameters. The .pb contains only the structure of the graph. The process of merging the checkpoint values with the graph structure is known as “freezing” the graph. +To use this .pb GraphDef file within TensorFlow Lite, the application developer will need checkpoints containing trained weight parameters. The .pb contains only the structure of the graph. The process of merging the checkpoint values with the graph structure is known as "freezing" the graph. The developer should know where the checkpoints folder is present or checkpoints can also be downloaded for a pre-trained model (Example: Here is a link to the [MobileNets](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md)). @@ -156,7 +161,7 @@ Here is a sample command line to convert the frozen Graphdef to '.lite' format f bazel build tensorflow/contrib/lite/toco:toco bazel-bin/tensorflow/contrib/lite/toco/toco -- \ - --input_file=(pwd)/mobilenet_v1_1.0_224/frozen_graph.pb \ + --input_file=$(pwd)/mobilenet_v1_1.0_224/frozen_graph.pb \ --input_format=TENSORFLOW_GRAPHDEF --output_format=TFLITE \ --output_file=/tmp/mobilenet_v1_1.0_224.lite --inference_type=FLOAT \ --input_type=FLOAT --input_arrays=input \ @@ -184,7 +189,7 @@ with tf.Session() as sess: ``` For detailed instructions on how to use the Tensorflow Optimizing Converter, please see [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md). -You may refer to the [Ops compatibility guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md) for troubleshooting help. If that doesn’t help, please file an [issue](https://github.com/tensorflow/tensorflow/issues). +You may refer to the [Ops compatibility guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md) for troubleshooting help. If that doesn't help, please file an [issue](https://github.com/tensorflow/tensorflow/issues). ## Step 3. Use the TensorFlow Lite model for inference in a mobile app @@ -193,9 +198,9 @@ After completion of Step 2 the developer should have a .lite model. ### For Android Because Android apps need to be written in Java, and core TensorFlow is in C++, a JNI library is provided to interface between the two. Its interface is aimed only at inference, so it provides the ability to load a graph, set up inputs, and run the model to calculate particular outputs. The full documentation for the set of methods can be seen [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/g3doc/). The demo app is also open sourced on [github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app). -The [demo app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app) uses this interface, so it’s a good place to look for example usage. You can also download the prebuilt binary [here](http://download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk). +The [demo app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app) uses this interface, so it's a good place to look for example usage. You can also download the prebuilt binary [here](http://download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk). -Note that you’d need to follow instructions for installing TensorFlow on Android, setting up bazel and Android Studio outlined [here](https://www.tensorflow.org/mobile/android_build). +Note that you'd need to follow instructions for installing TensorFlow on Android, setting up bazel and Android Studio outlined [here](https://www.tensorflow.org/mobile/android_build). ### For iOS Follow the documentation [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/ios.md) to get integrate a TFLite model into your app. diff --git a/third_party/flatbuffers/flatbuffers.BUILD b/third_party/flatbuffers/flatbuffers.BUILD index e1563103c8..0a76adcf91 100644 --- a/third_party/flatbuffers/flatbuffers.BUILD +++ b/third_party/flatbuffers/flatbuffers.BUILD @@ -6,8 +6,11 @@ licenses(["notice"]) # Apache 2.0 FLATBUFFERS_COPTS = [ "-fexceptions", - "-Wno-implicit-fallthrough", -] +] + select({ + "@bazel_tools//src:windows": [], + "@bazel_tools//src:windows_msvc": [], + "//conditions:default": ["-Wno-implicit-fallthrough"], +}) # Public flatc library to compile flatbuffer files at runtime. cc_library( -- GitLab From 701faa76614021c50975c6e24d0e63dbcb769935 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Mon, 27 Nov 2017 15:01:16 -0800 Subject: [PATCH 0849/1801] [XLA] Canonicalize convolutions which are potentially lowered to Eigen calls The canonicalization rules needs to map to what is supported by the Eigen runtime helpers. PiperOrigin-RevId: 177076955 --- .../xla/service/cpu/conv_canonicalization.cc | 6 ++--- .../xla/service/cpu/ir_emission_utils.cc | 23 ++++++++++--------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc b/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc index 80760356e3..a3dd13811c 100644 --- a/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc +++ b/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc @@ -47,10 +47,8 @@ StatusOr ConvCanonicalization::Run(HloModule* module) { // A canonical convolution's dimension numbers need to satisfy the // following conditions (see cs/PotentiallyImplementedAsEigenConvolution). // - // - the input is in NHWC or NWHC order. - // - the kernel is in HWIO or WHIO order. - // - the spatial dimensions are in the same relative order in the input, - // kernel and output. + // - the input is in NHWC order. + // - the kernel is in HWIO order. // // For simplicity, as a first step, we reshape the input and filter to // NHWC and HWIO order, respectively. This may lose precision but won't diff --git a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc index cb5cb8a6dd..d2e7f830d1 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc @@ -29,10 +29,8 @@ bool PotentiallyImplementedAsEigenConvolution( // The following conditions are necessary (but not sufficient) for // implementing `convolution` with Eigen convolution: // - the input and kernel have a non-zero number of elements. - // - the input is in NHWC or NWHC order. - // - the kernel is in HWIO or WHIO order. - // - the spatial dimensions are in the same relative order in the input, - // kernel and output. + // - the input is in NHWC order. + // - the kernel is in HWIO order. // // To be sufficient, certain layout constraints need to be satisfied as well. const Shape& input_shape = convolution.operand(0)->shape(); @@ -51,15 +49,19 @@ bool PotentiallyImplementedAsEigenConvolution( convolution.convolution_dimension_numbers(); // Only 1D and 2D convolutions are supported at the moment. // TODO(b/32897908): add an optimized implementation for 3D convolution. - if (dnums.spatial_dimensions_size() > 2) { + const int64 num_spatial_dims = dnums.spatial_dimensions_size(); + if (num_spatial_dims > 2) { return false; } - bool input_spatial_dims_ascending = std::is_sorted( - dnums.spatial_dimensions().begin(), dnums.spatial_dimensions().end()); - bool kernel_spatial_dims_ascending = - std::is_sorted(dnums.kernel_spatial_dimensions().begin(), - dnums.kernel_spatial_dimensions().end()); + for (int64 i = 0; i < num_spatial_dims; ++i) { + if (dnums.spatial_dimensions(i) != i + 1) { + return false; + } + if (dnums.kernel_spatial_dimensions(i) != i) { + return false; + } + } const Shape& output_shape = convolution.shape(); return dnums.input_batch_dimension() == 0 && @@ -67,7 +69,6 @@ bool PotentiallyImplementedAsEigenConvolution( dnums.output_batch_dimension() == 0 && dnums.output_feature_dimension() == output_shape.dimensions_size() - 1 && - input_spatial_dims_ascending == kernel_spatial_dims_ascending && dnums.kernel_input_feature_dimension() == kernel_shape.dimensions_size() - 2 && dnums.kernel_output_feature_dimension() == -- GitLab From 20895ffc3d7049cb80e188d78402d13ca5591996 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 15:13:00 -0800 Subject: [PATCH 0850/1801] Modify static bool variable in OpRegistry::Lookup() while mutex is locked. PiperOrigin-RevId: 177078725 --- tensorflow/core/framework/op.cc | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/framework/op.cc b/tensorflow/core/framework/op.cc index 4f5a1f80a0..fadb60d744 100644 --- a/tensorflow/core/framework/op.cc +++ b/tensorflow/core/framework/op.cc @@ -63,26 +63,32 @@ Status OpRegistry::LookUp(const string& op_type_name, const OpRegistrationData* res = nullptr; bool first_call = false; + bool first_unregistered = false; { // Scope for lock. mutex_lock lock(mu_); first_call = MustCallDeferred(); res = gtl::FindWithDefault(registry_, op_type_name, nullptr); + + static bool unregistered_before = false; + first_unregistered = !unregistered_before && (res == nullptr); + if (first_unregistered) { + unregistered_before = true; + } // Note: Can't hold mu_ while calling Export() below. } if (first_call) { TF_QCHECK_OK(ValidateKernelRegistrations(*this)); } if (res == nullptr) { - static bool first_unregistered = true; if (first_unregistered) { OpList op_list; Export(true, &op_list); if (VLOG_IS_ON(3)) { - LOG(INFO) << "All registered Ops:"; - for (const auto& op : op_list.op()) - LOG(INFO) << SummarizeOpDef(op); + LOG(INFO) << "All registered Ops:"; + for (const auto& op : op_list.op()) { + LOG(INFO) << SummarizeOpDef(op); + } } - first_unregistered = false; } Status status = errors::NotFound("Op type not registered '", op_type_name, -- GitLab From 78d3ece27c08e0cf74ad02965960fed461a2951d Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 27 Nov 2017 15:16:38 -0800 Subject: [PATCH 0851/1801] Don't try to feed placeholder with default. Instead we rely on the default value. PiperOrigin-RevId: 177079220 --- tensorflow/core/grappler/grappler_item_builder.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc index 3f6183b6f1..36c7f92c49 100644 --- a/tensorflow/core/grappler/grappler_item_builder.cc +++ b/tensorflow/core/grappler/grappler_item_builder.cc @@ -297,7 +297,7 @@ std::unique_ptr GrapplerItemFromMetaGraphDef( } for (auto& node : *new_item->graph.mutable_node()) { - if (IsPlaceholder(node)) { + if (IsPlaceholder(node) && node.op() != "PlaceholderWithDefault") { if (node.attr().count("dtype") == 0) { LOG(ERROR) << "Unknown type for placeholder " << node.name() << ", skipping this input"; -- GitLab From 2be93d0d543591ebee31bcddfa4b9c6c53e5c793 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 16:03:16 -0800 Subject: [PATCH 0852/1801] Fixes punctuation in tf.nn.moments comment PiperOrigin-RevId: 177085447 --- tensorflow/python/ops/nn_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 654eb1c118..00e3c7dc0f 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -638,7 +638,7 @@ def moments(x, axes, across `axes`. If `x` is 1-D and `axes = [0]` this is just the mean and variance of a vector. - Note: shift is currently not used, the true mean is computed and used. + Note: shift is currently not used; the true mean is computed and used. When using these moments for batch normalization (see `tf.nn.batch_normalization`): -- GitLab From c17459a0acb5044fa415d11221a45bea619aa349 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 16:12:26 -0800 Subject: [PATCH 0853/1801] [tfgan] Add option to pass MODE to generator_fn, for the purpose of things like prediction. PiperOrigin-RevId: 177086828 --- .../estimator/python/gan_estimator_impl.py | 33 +++++++++++++++---- .../estimator/python/gan_estimator_test.py | 4 +-- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py index 0824ecf616..058dc1d1f8 100644 --- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import functools import enum from tensorflow.contrib.framework.python.ops import variables as variable_lib @@ -29,6 +30,7 @@ from tensorflow.python.estimator import estimator from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.framework import ops from tensorflow.python.ops import variable_scope +from tensorflow.python.util import tf_inspect as inspect __all__ = [ @@ -116,7 +118,10 @@ class GANEstimator(estimator.Estimator): to continue training a previously saved model. generator_fn: A python function that takes a Tensor, Tensor list, or Tensor dictionary as inputs and returns the outputs of the GAN - generator. See `TFGAN` for more details and examples. + generator. See `TFGAN` for more details and examples. Additionally, if + it has an argument called `mode`, the Estimator's `mode` will be passed + in (ex TRAIN, EVAL, PREDICT). This is useful for things like batch + normalization. discriminator_fn: A python function that takes the output of `generator_fn` or real data in the GAN setup, and `generator_inputs`. Outputs a Tensor in the range [-inf, inf]. See `TFGAN` for more details @@ -225,9 +230,12 @@ def _gan_model_fn( labels=None) -def _make_train_gan_model(generator_fn, discriminator_fn, real_data, - generator_inputs, generator_scope, add_summaries): - """Make a `GANModel` for training.""" +def _make_gan_model(generator_fn, discriminator_fn, real_data, + generator_inputs, generator_scope, add_summaries, mode): + """Make a `GANModel`, and optionally pass in `mode`.""" + # If `generator_fn` has an argument `mode`, pass mode to it. + if 'mode' in inspect.getargspec(generator_fn).args: + generator_fn = functools.partial(generator_fn, mode=mode) gan_model = tfgan_train.gan_model( generator_fn, discriminator_fn, @@ -245,15 +253,28 @@ def _make_train_gan_model(generator_fn, discriminator_fn, real_data, return gan_model +def _make_train_gan_model(generator_fn, discriminator_fn, real_data, + generator_inputs, generator_scope, add_summaries): + """Make a `GANModel` for training.""" + return _make_gan_model(generator_fn, discriminator_fn, real_data, + generator_inputs, generator_scope, add_summaries, + model_fn_lib.ModeKeys.TRAIN) + + def _make_eval_gan_model(generator_fn, discriminator_fn, real_data, generator_inputs, generator_scope, add_summaries): """Make a `GANModel` for evaluation.""" - return _make_train_gan_model(generator_fn, discriminator_fn, real_data, - generator_inputs, generator_scope, add_summaries) + return _make_gan_model(generator_fn, discriminator_fn, real_data, + generator_inputs, generator_scope, add_summaries, + model_fn_lib.ModeKeys.EVAL) def _make_prediction_gan_model(generator_inputs, generator_fn, generator_scope): """Make a `GANModel` from just the generator.""" + # If `generator_fn` has an argument `mode`, pass mode to it. + if 'mode' in inspect.getargspec(generator_fn).args: + generator_fn = functools.partial(generator_fn, + mode=model_fn_lib.ModeKeys.PREDICT) with variable_scope.variable_scope(generator_scope) as gen_scope: generator_inputs = tfgan_train._convert_tensor_or_l_or_d(generator_inputs) # pylint:disable=protected-access generated_data = generator_fn(generator_inputs) diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py index 1bfdce9ee9..e752f0bccc 100644 --- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py +++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py @@ -48,7 +48,8 @@ from tensorflow.python.training import training from tensorflow.python.training import training_util -def generator_fn(noise_dict): +def generator_fn(noise_dict, mode): + del mode noise = noise_dict['x'] return layers.fully_connected(noise, noise.shape[1].value) @@ -90,7 +91,6 @@ def mock_head(testcase, expected_generator_inputs, expected_real_data, generator_var_names, set([x.name for x in gan_model.generator_variables])) testcase.assertEqual(generator_scope_name, gan_model.generator_scope.name) - testcase.assertEqual(generator_fn, gan_model.generator_fn) testcase.assertEqual(_or_none(expected_real_data), gan_model.real_data) # TODO(joelshor): Add check on `discriminator_real_outputs`. # TODO(joelshor): Add check on `discriminator_gen_outputs`. -- GitLab From c2aa66115d6c2bad6752474acd3bbf9a616b487a Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 27 Nov 2017 16:28:09 -0800 Subject: [PATCH 0854/1801] Fetch shape information from the C API when enabled. This change makes set_shapes_for_outputs fetch the already-computed tensor shapes from the C API, rather than calling the C++ shape function. It also moves the set_shapes_for_outputs call so it works with Operations created from TF_Operations. PiperOrigin-RevId: 177088786 --- tensorflow/python/client/session_test.py | 2 + tensorflow/python/client/tf_session.i | 35 +++++++++++ tensorflow/python/client/tf_session_helper.cc | 27 +++++++++ tensorflow/python/client/tf_session_helper.h | 10 ++++ tensorflow/python/framework/ops.py | 58 ++++++++++++++++--- tensorflow/python/framework/ops_test.py | 57 +++++++++++++++++- tensorflow/python/ops/math_ops_test.py | 15 +++-- 7 files changed, 191 insertions(+), 13 deletions(-) diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py index 3e85410a97..f4b0271195 100644 --- a/tensorflow/python/client/session_test.py +++ b/tensorflow/python/client/session_test.py @@ -1458,6 +1458,8 @@ class SessionTest(test_util.TensorFlowTestCase): self.assertTrue(run_metadata.HasField('step_stats')) self.assertEquals(len(run_metadata.step_stats.dev_stats), 1) + # TODO(nolivia): C API doesn't yet handle marking nodes as not feedable. + @test_util.disable_c_api def testFeedShapeCompatibility(self): with session.Session() as sess: some_tensor = constant_op.constant([2.0, 2.0, 2.0, 2.0]) diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i index 099a35202c..5fa1a7e8fc 100644 --- a/tensorflow/python/client/tf_session.i +++ b/tensorflow/python/client/tf_session.i @@ -497,6 +497,41 @@ def TF_Reset(target, containers=None, config=None): } } +// Typemaps for TF_GraphGetTensorShapeHelper. + +// Convert from C++ integer vector to Python list of ints. +%typemap(out) tensorflow::gtl::InlinedVector + tensorflow::TF_GraphGetTensorShapeHelper { + $result = PyList_New($1.size()); + if (!$result) { + SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create list"); + } + + for (size_t i = 0; i < $1.size(); ++i) { + PyList_SET_ITEM($result, i, PyInt_FromLong($1[i])); + } +} + +%typemap(in, numinputs=0) bool* unknown_shape (bool temp) { + $1=&temp; +} + +// Returns a (list(int), bool) tuple. +%typemap(argout) bool* unknown_shape { + PyObject* new_result = PyTuple_New(2); + if (!new_result) { + SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create tuple"); + } + // Steals $result reference + PyTuple_SET_ITEM(new_result, 0, $result); + PyTuple_SET_ITEM(new_result, 1, PyBool_FromLong(*$1)); + $result = new_result; +} + +%unignore tensorflow; +%unignore TF_GraphGetTensorShapeHelper; +%ignore TF_GraphGetTensorShape; + %include "tensorflow/python/client/tf_session_helper.h" %unignoreall diff --git a/tensorflow/python/client/tf_session_helper.cc b/tensorflow/python/client/tf_session_helper.cc index f5472f316d..ad982e5dd8 100644 --- a/tensorflow/python/client/tf_session_helper.cc +++ b/tensorflow/python/client/tf_session_helper.cc @@ -299,6 +299,33 @@ string EqualGraphDefWrapper(const string& actual, const string& expected) { return EqualGraphDef(actual_def, expected_def, &diff) ? "" : diff; } +// Return value set to 6 inlined elements so it fits in a 64-byte cache line. +tensorflow::gtl::InlinedVector TF_GraphGetTensorShapeHelper( + TF_Graph* graph, TF_Output output, TF_Status* out_status, + bool* unknown_shape) { + // Allocate a single variable for holding the result for RVO. + tensorflow::gtl::InlinedVector result; + *unknown_shape = false; + int num_dims = TF_GraphGetTensorNumDims(graph, output, out_status); + if (TF_GetCode(out_status) != TF_OK) { + return result; + } + // If shape is unknown, set boolean and return. + if (num_dims == -1) { + *unknown_shape = true; + return result; + } + + // If shape is a scalar, avoid another C call and just return {}. + if (num_dims == 0) { + return result; + } + + result.resize(num_dims); + TF_GraphGetTensorShape(graph, output, result.data(), num_dims, out_status); + return result; +} + void TF_SessionPRunSetup_wrapper(TF_Session* session, const std::vector& inputs, const std::vector& outputs, diff --git a/tensorflow/python/client/tf_session_helper.h b/tensorflow/python/client/tf_session_helper.h index 0aca61a2b6..6ed08d3a58 100644 --- a/tensorflow/python/client/tf_session_helper.h +++ b/tensorflow/python/client/tf_session_helper.h @@ -97,6 +97,16 @@ void TF_Reset_wrapper(const TF_SessionOptions* opt, // for no difference. string EqualGraphDefWrapper(const string& actual, const string& expected); +// Gets shape from C API Graph object. +// +// If shape is known, returns shape vector where -1 means "unknown +// dimension". Sets unknown_shape to false. +// +// If shape is unknown, sets unknown_shape to true. +tensorflow::gtl::InlinedVector TF_GraphGetTensorShapeHelper( + TF_Graph* graph, TF_Output output, TF_Status* out_status, + bool* unknown_shape); + // Runs the graph associated with the session starting with the supplied inputs. // On success, `py_outputs` is populated with a numpy ndarray for each output // (the caller must decref these ndarrays, although this will likely be handled diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index bcc794b9a9..60df8f82f0 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -1439,8 +1439,12 @@ def _create_c_op(graph, node_def, inputs, control_inputs): c_api.TF_SetAttrValueProto(op_desc, compat.as_str(name), serialized, status) - with errors.raise_exception_on_not_ok_status() as status: - c_op = c_api.TF_FinishOperation(op_desc, status) + try: + with errors.raise_exception_on_not_ok_status() as status: + c_op = c_api.TF_FinishOperation(op_desc, status) + except errors.InvalidArgumentError as e: + # Convert to ValueError for backwards compatibility. + raise ValueError(str(e)) return c_op @@ -2318,8 +2322,28 @@ class RegisterShape(object): return f -def set_shapes_for_outputs(op): - """Uses the registered shape functions to set the shapes for op's outputs.""" +def _set_shapes_for_outputs_c_api(op): + """set_shapes_for_outputs implementation when C API is enabled.""" + # The C API computes the shapes when the TF_Operation is created. Fetch the + # output shapes from the C object. + for output in op.outputs: + with errors.raise_exception_on_not_ok_status() as status: + # pylint: disable=protected-access + shape_vector, unknown_shape = c_api.TF_GraphGetTensorShapeHelper( + op._graph._c_graph, output._as_tf_output(), status) + # pylint: enable=protected-access + if unknown_shape: + output.set_shape(tensor_shape.unknown_shape()) + elif not shape_vector: + output.set_shape(tensor_shape.scalar()) + else: + shape_vector = [None if d == -1 else d for d in shape_vector] + output.set_shape(tensor_shape.TensorShape(shape_vector)) + + +# TODO(skyewm): remove this when _USE_C_API flag is removed. +def _set_shapes_for_outputs(op): + """set_shapes_for_outputs implementation when C API is disabled.""" try: shape_func = _shape_registry.lookup(op.type) except LookupError: @@ -2350,6 +2374,14 @@ def set_shapes_for_outputs(op): output.set_shape(s) +def set_shapes_for_outputs(op): + """Set the shapes for op's outputs.""" + if op._c_op: # pylint: disable=protected-access + return _set_shapes_for_outputs_c_api(op) + else: + return _set_shapes_for_outputs(op) + + class OpStats(object): """A holder for statistics about an operator. @@ -3067,9 +3099,9 @@ class Graph(object): input_types=input_types, original_op=self._default_original_op, op_def=op_def) - if compute_shapes: - set_shapes_for_outputs(ret) - self._create_op_helper(ret, compute_device=compute_device) + + self._create_op_helper(ret, compute_shapes=compute_shapes, + compute_device=compute_device) return ret def _create_op_from_tf_operation(self, c_op): @@ -3095,8 +3127,18 @@ class Graph(object): self._create_op_helper(ret) return ret - def _create_op_helper(self, op, compute_device=True): + def _create_op_helper(self, op, compute_shapes=True, compute_device=True): """Common logic for creating an op in this graph.""" + # TODO(vrv): Instead of eagerly filling in shape property for every op, only + # populate the shape when requested. + # + # TODO(skyewm): unlike in the original Python implementation, the C API + # always computes shape information (even for function calls, which the + # original Python shape inference code doesn't handle). Deprecate the + # compute_shapes argument. + if op._c_op or compute_shapes: # pylint: disable=protected-access + set_shapes_for_outputs(op) + # Apply any additional attributes requested. Do not overwrite any existing # attributes. for key, value in self._attr_scope_map.items(): diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index ac35f6f4f5..cd296ccdc5 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -80,7 +80,7 @@ class ResourceTest(test_util.TensorFlowTestCase): @test_util.with_c_api -class TensorTest(test_util.TensorFlowTestCase): +class TensorAndShapeTest(test_util.TensorFlowTestCase): def testShape(self): op = ops.Operation( @@ -99,6 +99,44 @@ class TensorTest(test_util.TensorFlowTestCase): for _ in t: pass + def testAddShape(self): + with self.test_session(): + a = array_ops.zeros([2, 3]) + b = array_ops.ones([1, 3]) + c = a + b + self.assertEqual([2, 3], c.shape) + + def testUnknownDim(self): + with self.test_session(): + a = array_ops.placeholder(dtype=dtypes.float32, shape=[2, None, 3]) + b = array_ops.placeholder(dtype=dtypes.float32, shape=[2, None, 3]) + c = a + b + self.assertEqual([2, None, 3], c.shape.as_list()) + + def testUnknownShape(self): + with self.test_session(): + a = array_ops.placeholder(dtype=dtypes.float32, shape=None) + b = array_ops.ones([1, 3]) + c = a + b + self.assertEqual(tensor_shape.unknown_shape(), c.shape) + + def testScalarShape(self): + with self.test_session(): + a = array_ops.placeholder(dtype=dtypes.float32, shape=[]) + b = array_ops.ones([]) + c = a + b + self.assertEqual(tensor_shape.scalar(), c.shape) + + def testShapeFunctionError(self): + with self.test_session(): + a = array_ops.ones([1, 2, 3]) + b = array_ops.ones([4, 5, 6]) + with self.assertRaisesRegexp( + ValueError, + r"Dimensions must be equal, but are 2 and 5 for 'add' \(op: 'Add'\) " + r"with input shapes: \[1,2,3\], \[4,5,6\]."): + _ = a + b + @test_util.with_c_api class IndexedSlicesTest(test_util.TensorFlowTestCase): @@ -671,6 +709,7 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase): self.assertEqual(op.name, "myop") self.assertEqual(op.type, "IntInputIntOutput") self.assertEqual(len(op.outputs), 1) + self.assertEqual(op.outputs[0].shape, tensor_shape.unknown_shape()) self.assertEqual(list(op.inputs), [x]) self.assertEqual(op.control_inputs, []) self.assertEqual(op.graph, g) @@ -679,6 +718,22 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase): self.assertEqual(g.get_operation_by_name("myop"), op) self.assertEqual(g.get_tensor_by_name("myop:0"), op.outputs[0]) + def testShape(self): + g = ops.Graph() + with g.as_default(): + x = constant_op.constant([[1, 2, 3], [4, 5, 6]]) + if ops._USE_C_API: + c_op = ops._create_c_op(g, ops._NodeDef("Identity", "myop"), [x], []) + op = g._create_op_from_tf_operation(c_op) + else: + # Test pure-Python version to make sure C API has same behavior. + op = array_ops.identity(x, name="myop").op + + self.assertEqual(op.name, "myop") + self.assertEqual(op.type, "Identity") + self.assertEqual(len(op.outputs), 1) + self.assertEqual(op.outputs[0].shape, tensor_shape.matrix(2, 3)) + def testCond(self): g = ops.Graph() with g.as_default(): diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index 4642f4c580..81a7cf28bb 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -21,7 +21,6 @@ import numpy as np from tensorflow.python.eager import context from tensorflow.python.framework import constant_op -from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops @@ -31,12 +30,12 @@ from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables from tensorflow.python.platform import googletest -ops._USE_C_API = True exp = np.exp log = np.log +@test_util.with_c_api class ReduceTest(test_util.TensorFlowTestCase): @test_util.run_in_graph_and_eager_modes() @@ -67,11 +66,11 @@ class ReduceTest(test_util.TensorFlowTestCase): return x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32) axis = np.array([[0], [1]]) - with self.assertRaisesRegexp(errors.InvalidArgumentError, - "must be at most rank 1"): + with self.assertRaisesRegexp(ValueError, "must be at most rank 1"): math_ops.reduce_sum(x, axis) +@test_util.with_c_api class LogSumExpTest(test_util.TensorFlowTestCase): def testReduceLogSumExp(self): @@ -151,6 +150,7 @@ class LogSumExpTest(test_util.TensorFlowTestCase): self.assertEqual(-np.inf, res) +@test_util.with_c_api class RoundTest(test_util.TensorFlowTestCase): @test_util.run_in_graph_and_eager_modes() @@ -168,6 +168,7 @@ class RoundTest(test_util.TensorFlowTestCase): self.assertAllClose(y_tf_np, y_np, atol=1e-2) +@test_util.with_c_api class ModTest(test_util.TensorFlowTestCase): def testFloat(self): @@ -197,6 +198,7 @@ class ModTest(test_util.TensorFlowTestCase): self.assertAllClose(y_tf_np, y_np) +@test_util.with_c_api class SquaredDifferenceTest(test_util.TensorFlowTestCase): @test_util.run_in_graph_and_eager_modes() @@ -210,6 +212,7 @@ class SquaredDifferenceTest(test_util.TensorFlowTestCase): self.assertAllClose(z, z_tf) +@test_util.with_c_api class ApproximateEqualTest(test_util.TensorFlowTestCase): @test_util.run_in_graph_and_eager_modes() @@ -241,6 +244,7 @@ class ApproximateEqualTest(test_util.TensorFlowTestCase): self.assertAllEqual(z, z_tf) +@test_util.with_c_api class ScalarMulTest(test_util.TensorFlowTestCase): @test_util.run_in_graph_and_eager_modes() @@ -282,6 +286,7 @@ class ScalarMulTest(test_util.TensorFlowTestCase): self.assertAllEqual(self.evaluate(x.indices), [0, 2, 5]) +@test_util.with_c_api class AccumulateNTest(test_util.TensorFlowTestCase): def testFloat(self): @@ -301,6 +306,7 @@ class AccumulateNTest(test_util.TensorFlowTestCase): self.assertAllEqual(x[0] * 6, math_ops.accumulate_n([tf_x[0]] * 6).eval()) +@test_util.with_c_api class AddNTest(test_util.TensorFlowTestCase): def testPartials(self): @@ -354,6 +360,7 @@ class AddNTest(test_util.TensorFlowTestCase): [g.eval() for g in add_n_grad]) +@test_util.with_c_api class DivAndModTest(test_util.TensorFlowTestCase): # TODO(aselle): Test more types before exposing new division operators. -- GitLab From 8c81bde08cf757645f5937b84e9e97f4bfc97374 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 27 Nov 2017 16:33:11 -0800 Subject: [PATCH 0855/1801] Small code cleanup. PiperOrigin-RevId: 177089408 --- tensorflow/core/grappler/costs/virtual_scheduler.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc index 0bb98d3793..e5e1ee3292 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc @@ -752,8 +752,7 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) { if (metadata != nullptr) { StepStats* stepstats = metadata->mutable_step_stats(); for (const auto& device : device_) { - GraphDef* device_partition_graph = - metadata->mutable_partition_graphs()->Add(); + GraphDef* device_partition_graph = metadata->add_partition_graphs(); DeviceStepStats* device_stepstats = stepstats->add_dev_stats(); device_stepstats->set_device(device.first); for (const auto& node_def : device.second.nodes_executed) { @@ -804,7 +803,7 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) { mem_stats->set_host_persistent_memory_size(host_persistent_memory_size); mem_stats->set_device_persistent_memory_size( device_persistent_memory_size); - *device_partition_graph->mutable_node()->Add() = *node_def; + *device_partition_graph->add_node() = *node_def; } } } -- GitLab From 6c5ccadd434f2e6ed9634031bac2682c3dfe5216 Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Mon, 27 Nov 2017 17:00:17 -0800 Subject: [PATCH 0856/1801] Support op split. PiperOrigin-RevId: 177092757 --- tensorflow/core/grappler/op_types.cc | 2 + tensorflow/core/grappler/op_types.h | 1 + .../grappler/optimizers/layout_optimizer.cc | 92 +++++++++++++++-- .../optimizers/layout_optimizer_test.cc | 99 +++++++++++++++++++ 4 files changed, 187 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index 1f18b56238..83188ffc0d 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -133,6 +133,8 @@ bool IsSend(const NodeDef& node) { return node.op() == "_Send"; } bool IsSlice(const NodeDef& node) { return node.op() == "Slice"; } +bool IsSplit(const NodeDef& node) { return node.op() == "Split"; } + bool IsSquaredDifference(const NodeDef& node) { return node.op() == "SquaredDifference"; } diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index 66ff7a88c5..b1d81448af 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -55,6 +55,7 @@ bool IsReshape(const NodeDef& node); bool IsRestore(const NodeDef& node); bool IsSend(const NodeDef& node); bool IsSlice(const NodeDef& node); +bool IsSplit(const NodeDef& node); bool IsSquaredDifference(const NodeDef& node); bool IsSqueeze(const NodeDef& node); bool IsStopGradient(const NodeDef& node); diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc index c760efac70..d5563e9d4c 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc @@ -36,6 +36,7 @@ namespace grappler { namespace { const char kConcatConst[] = "LayoutOptimizerConcatConst"; +const char kSplitConst[] = "LayoutOptimizerSplitConst"; const char kPermNHWCToNCHW[] = "LayoutOptimizerPermConstNHWCToNCHW"; const char kPermNCHWToNHWC[] = "LayoutOptimizerPermConstNCHWToNHWC"; const char kGatherAxisConst[] = "LayoutOptimizerGatherAxisConst"; @@ -69,12 +70,25 @@ std::set GetOpsFormatSupported() { } std::set GetOpsFormatAgnostic() { - std::set ops_format_agnostic = { - "Add", "AddN", "Concat", "ConcatV2", - "Floor", "Identity", "Mul", "Neg", - "Pad", "RealDiv", "Relu", "Relu6", - "ReluGrad", "Sigmoid", "Slice", "SquaredDifference", - "Squeeze", "Sub"}; + std::set ops_format_agnostic = {"Add", + "AddN", + "Concat", + "ConcatV2", + "Floor", + "Identity", + "Mul", + "Neg", + "Pad", + "RealDiv", + "Relu", + "Relu6", + "ReluGrad", + "Sigmoid", + "Slice", + "Split", + "SquaredDifference", + "Squeeze", + "Sub"}; return ops_format_agnostic; } @@ -764,7 +778,7 @@ class AgnosticNodeProcessor : public NodeProcessor { auto node = node_map_->GetNode(node_->name()); while (node->input_size() > 0) { int data_input_pos = 0; - if (IsConcatV1(*node)) { + if (IsConcatV1(*node) || IsSplit(*node)) { data_input_pos = 1; } node = node_map_->GetNode(node->input(data_input_pos)); @@ -1007,6 +1021,68 @@ class PadProcessor : public AgnosticNodeProcessor { } }; +class SplitProcessor : public AgnosticNodeProcessor { + public: + explicit SplitProcessor(const OptimizeContext& opt_cxt) + : AgnosticNodeProcessor(opt_cxt) {} + + protected: + bool ShouldProcess() const override { + return AgnosticNodeProcessor::ShouldProcess() && SplitSupported(); + } + + std::vector GetInputPos() const override { + std::vector input_pos = {1}; + return input_pos; + } + + Status CustomizedProcessing() override { + string split_const_name = AddNodeSplitConst()->name(); + node_map_->AddOutput(split_const_name, node_->name()); + *node_->mutable_input(0) = split_const_name; + return Status::OK(); + } + + private: + bool SplitSupported() const { + auto dim_node = node_map_->GetNode(node_->input(0)); + if (!IsConstant(*dim_node)) { + return false; + } + if (HasAttribute(*dim_node, "value").ok()) { + auto tensor = dim_node->attr().at({"value"}).tensor(); + if (tensor.tensor_shape().dim_size() == 0 && tensor.int_val_size() == 1) { + if (tensor.int_val(0) < 4 && tensor.int_val(0) >= -4) { + return true; + } + } + } + return false; + } + + NodeDef* AddNodeSplitConst() { + auto dim_node = node_map_->GetNode(node_->input(0)); + auto tensor = dim_node->attr().at({"value"}).tensor(); + int value = tensor.int_val(0); + value = (value >= 0) ? value : value + 4; + if (value == 1 || value == 2) { + value = value + 1; + } else if (value == 3) { + value = 1; + } + // We created a copy of the node, so that we don't modify the original node, + // which might be used elsewhere. Note that this copy also copies the + // control dependency input in the case this node is inside a loop, + // to ensure added_node is in the same frame with the Split node. + NodeDef* added_node = graph_->add_node(); + *added_node = *dim_node; + added_node->set_name(strings::StrCat(kSplitConst, "-", node_->name())); + added_node->mutable_attr()->at({"value"}).mutable_tensor()->set_int_val( + 0, value); + return added_node; + } +}; + class ReluGradProcessor : public AgnosticNodeProcessor { public: explicit ReluGradProcessor(const OptimizeContext& opt_cxt) @@ -1431,6 +1507,8 @@ class DataLayoutOptimizer : GraphProcessor { } else { node_processor.reset(new SliceProcessor(opt_cxt)); } + } else if (IsSplit(*node)) { + node_processor.reset(new SplitProcessor(opt_cxt)); } else if (IsSqueeze(*node)) { node_processor.reset(new SqueezeProcessor(opt_cxt)); } else if (IsSum(*node)) { diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc index 20a971629c..8c89f6744b 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc @@ -399,6 +399,105 @@ TEST_F(LayoutOptimizerTest, FusedBatchNormGradTrainingFalse) { EXPECT_EQ(conv_node->attr().at({"data_format"}).s(), "NHWC"); } +TEST_F(LayoutOptimizerTest, SplitDimC) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto c = ops::Const(s.WithOpName("c"), 3, {}); + auto split = ops::Split(s.WithOpName("split"), c, conv, 2); + auto i = ops::Identity(s.WithOpName("i"), split[0]); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto split_node = node_map.GetNode("split"); + EXPECT_EQ(split_node->input(0), "LayoutOptimizerSplitConst-split"); + EXPECT_EQ(split_node->input(1), "Conv2D"); + auto split_const = node_map.GetNode("LayoutOptimizerSplitConst-split"); + EXPECT_EQ(split_const->op(), "Const"); + EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 1); +} + +TEST_F(LayoutOptimizerTest, SplitDimH) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto c = ops::Const(s.WithOpName("c"), 1, {}); + auto split = ops::Split(s.WithOpName("split"), c, conv, 2); + auto i = ops::Identity(s.WithOpName("i"), split[0]); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto split_node = node_map.GetNode("split"); + EXPECT_EQ(split_node->input(0), "LayoutOptimizerSplitConst-split"); + EXPECT_EQ(split_node->input(1), "Conv2D"); + auto split_const = node_map.GetNode("LayoutOptimizerSplitConst-split"); + EXPECT_EQ(split_const->op(), "Const"); + EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 2); +} + +TEST_F(LayoutOptimizerTest, SplitDimW) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto c = ops::Const(s.WithOpName("c"), 2, {}); + auto split = ops::Split(s.WithOpName("split"), c, conv, 2); + auto i = ops::Identity(s.WithOpName("i"), split[0]); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto split_node = node_map.GetNode("split"); + EXPECT_EQ(split_node->input(0), "LayoutOptimizerSplitConst-split"); + EXPECT_EQ(split_node->input(1), "Conv2D"); + auto split_const = node_map.GetNode("LayoutOptimizerSplitConst-split"); + EXPECT_EQ(split_const->op(), "Const"); + EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 3); +} + +TEST_F(LayoutOptimizerTest, SplitDimN) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto c = ops::Const(s.WithOpName("c"), 0, {}); + auto split = ops::Split(s.WithOpName("split"), c, conv, 2); + auto i = ops::Identity(s.WithOpName("i"), split[0]); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto split_node = node_map.GetNode("split"); + EXPECT_EQ(split_node->input(0), "LayoutOptimizerSplitConst-split"); + EXPECT_EQ(split_node->input(1), "Conv2D"); + auto split_const = node_map.GetNode("LayoutOptimizerSplitConst-split"); + EXPECT_EQ(split_const->op(), "Const"); + EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 0); +} + +TEST_F(LayoutOptimizerTest, SplitNonConstDim) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto c = ops::Const(s.WithOpName("c"), 0, {}); + auto i1 = ops::Identity(s.WithOpName("i1"), c); + auto split = ops::Split(s.WithOpName("split"), i1, conv, 2); + auto i2 = ops::Identity(s.WithOpName("i"), split[0]); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto split_node = node_map.GetNode("split"); + EXPECT_EQ(split_node->input(0), "i1"); + EXPECT_EQ(split_node->input(1), + "LayoutOptimizerTransposeNCHWToNHWC-Conv2D-split"); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 98ac3f5b7b3942eb0ede7cae1b1afab717b3090a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 17:02:58 -0800 Subject: [PATCH 0857/1801] Refactor code in arithmetic and dependency optimizers - get rid of duplicated code for node creation, - make the optimized graph, the NodeMap and FrameMap data members - misc. minor simplifications. Fix a few bugs in NodeMap: Make sure we strip port numbers off inputs before using them as keys to outputs_ or nodes_. PiperOrigin-RevId: 177093144 --- .../optimizers/arithmetic_optimizer.cc | 408 +++++++++--------- .../optimizers/arithmetic_optimizer.h | 42 +- .../optimizers/arithmetic_optimizer_test.cc | 84 ++-- .../optimizers/dependency_optimizer.cc | 47 +- .../optimizers/dependency_optimizer.h | 11 +- tensorflow/core/grappler/utils.cc | 62 ++- tensorflow/core/grappler/utils.h | 7 +- 7 files changed, 350 insertions(+), 311 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index ec5d2abd7a..eaf5f1f5cf 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/op_types.h" #include "tensorflow/core/grappler/optimizers/constant_folding.h" +#include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/grappler/utils/frame.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/stringpiece.h" @@ -38,6 +39,8 @@ limitations under the License. #include "tensorflow/core/util/device_name_utils.h" #include "tensorflow/core/util/saved_tensor_slice_util.h" +using tensorflow::strings::StrCat; + namespace tensorflow { namespace grappler { namespace { @@ -296,30 +299,6 @@ bool ReshapeIsIdentity(const NodeDef& reshape, const NodeDef& input, return true; } -// Fix frame dependencies by adding control dependencies from old_input to nodes -// in new_nodes_for_control_dep, and update frame_map for all nodes in -// new_nodes. -void AddFrameControlDeps(const NodeDef* old_node, - const std::vector& new_nodes, - const string& source_for_ctrl_dep, - const std::vector& sinks_for_control_dep, - GraphDef* graph, NodeMap* node_map, - FrameMap* frame_map) { - const auto frame_it = frame_map->find(old_node); - if (frame_it != frame_map->end()) { - for (auto node : new_nodes) { - frame_map->emplace(node, frame_it->second); - } - if (!source_for_ctrl_dep.empty() && !sinks_for_control_dep.empty()) { - const string ctrl_dep = ConstantFolding::AddControlDependency( - source_for_ctrl_dep, graph, node_map); - for (auto node : sinks_for_control_dep) { - node->add_input(ctrl_dep); - } - } - } -} - NodeDef* GetTailOfValuePreservingChain( const NodeDef& node, const NodeMap& node_map, const std::unordered_set& nodes_to_preserve) { @@ -437,6 +416,25 @@ bool UniqueNodes::SameNode(const NodeDef& node1, const NodeDef& node2) const { return true; } +NodeDef* ArithmeticOptimizer::AddNode(const string& name, + const NodeDef* node_to_copy) { + NodeDef* new_node = optimized_graph_->add_node(); + const string name_with_prefix = + AddPrefixToNodeName(name, kArithmeticOptimizer); + node_map_->AddNode(NodeName(name_with_prefix), new_node); + if (node_to_copy != nullptr) { + new_node->CopyFrom(*node_to_copy); + } + new_node->set_name(name_with_prefix); + return new_node; +} + +bool ArithmeticOptimizer::OptimizedNodeExists(const string& name) { + const string name_with_prefix = + AddPrefixToNodeName(name, kArithmeticOptimizer); + return node_map_->NodeExists(name_with_prefix); +} + bool ArithmeticOptimizer::CanDedup(const NodeDef& node) const { if (nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) { return false; @@ -454,18 +452,17 @@ bool ArithmeticOptimizer::CanDedup(const NodeDef& node) const { return IsFreeOfSideEffect(node); } -void ArithmeticOptimizer::DedupComputations(GraphDef* optimized_graph) const { - NodeMap map(optimized_graph); +void ArithmeticOptimizer::DedupComputations() { bool stop = true; std::set duplicates; do { stop = true; UniqueNodes nodes; - for (int i = 0; i < optimized_graph->node_size(); ++i) { + for (int i = 0; i < optimized_graph_->node_size(); ++i) { if (duplicates.find(i) != duplicates.end()) { continue; } - NodeDef* node = optimized_graph->mutable_node(i); + NodeDef* node = optimized_graph_->mutable_node(i); if (!CanDedup(*node)) { continue; } @@ -473,20 +470,21 @@ void ArithmeticOptimizer::DedupComputations(GraphDef* optimized_graph) const { if (rep == node) { continue; } - const std::set& fanouts = map.GetOutputs(node->name()); + const std::set& fanouts = node_map_->GetOutputs(node->name()); for (NodeDef* fanout : fanouts) { for (string& name : *fanout->mutable_input()) { int position; - string nodename = ParseNodeName(name, &position); + const string nodename = ParseNodeName(name, &position); if (nodename == node->name()) { + // Update name in-place. if (position > 0) { - name = strings::StrCat(rep->name(), ":", position); + name = StrCat(rep->name(), ":", position); } else if (position == 0) { name = rep->name(); } else { - name = strings::StrCat("^", rep->name()); + name = StrCat("^", rep->name()); } - map.AddOutput(rep->name(), fanout->name()); + node_map_->AddOutput(rep->name(), fanout->name()); } } } @@ -497,20 +495,40 @@ void ArithmeticOptimizer::DedupComputations(GraphDef* optimized_graph) const { // Delete duplicates if (!duplicates.empty()) { - int last = optimized_graph->node_size() - 1; + int last = optimized_graph_->node_size() - 1; for (auto it = duplicates.rbegin(); it != duplicates.rend(); ++it) { int index = *it; - optimized_graph->mutable_node()->SwapElements(index, last); + optimized_graph_->mutable_node()->SwapElements(index, last); last--; } - optimized_graph->mutable_node()->DeleteSubrange(last + 1, - duplicates.size()); + optimized_graph_->mutable_node()->DeleteSubrange(last + 1, + duplicates.size()); + // Rebuild the NodeMap which was invalidated by the node swapping above. + node_map_.reset(new NodeMap(optimized_graph_)); + } +} + +void ArithmeticOptimizer::AddFrameControlDeps( + const NodeDef* old_node, const std::vector& new_nodes, + const string& source_for_ctrl_dep, + const std::vector& sinks_for_control_dep) { + const auto frame_it = frame_map_.find(old_node); + if (frame_it != frame_map_.end()) { + for (auto node : new_nodes) { + frame_map_.emplace(node, frame_it->second); + } + if (!source_for_ctrl_dep.empty() && !sinks_for_control_dep.empty()) { + const string ctrl_dep = ConstantFolding::AddControlDependency( + source_for_ctrl_dep, optimized_graph_, node_map_.get()); + for (auto node : sinks_for_control_dep) { + MaybeAddControlInput(ctrl_dep, node, optimized_graph_, node_map_.get()); + } + } } } string ArithmeticOptimizer::TrySimplifyAndReplaceUses( - const NodeDef* node, GraphDef* graph_def, NodeMap* node_map, - std::vector* new_nodes, FrameMap* frame_map) const { + const NodeDef* node, SetVector* nodes_to_simplify) { // Remove involutions applied twice. if (IsInvolution(*node)) { // An involution is an element-wise function f(x) that is its own inverse, @@ -520,8 +538,8 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( // the two instances of the involution from the graph, since they cancel // each other. NodeDef* tail = - GetTailOfValuePreservingChain(*node, *node_map, nodes_to_preserve_); - NodeDef* involution = node_map->GetNode(tail->input(0)); + GetTailOfValuePreservingChain(*node, *node_map_, nodes_to_preserve_); + NodeDef* involution = node_map_->GetNode(tail->input(0)); if (involution->op() == node->op()) { // Skip both *node and *involution since they cancel each other. if (tail == node) { @@ -529,8 +547,8 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( return involution->input(0); } else { tail->set_input(0, involution->input(0)); - node_map->UpdateInput(tail->name(), involution->name(), - involution->input(0)); + node_map_->UpdateInput(tail->name(), involution->name(), + involution->input(0)); return node->input(0); } } @@ -538,10 +556,10 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( // Remove inverse transposes. if (node->op() == "Transpose" || node->op() == "ConjugateTranspose") { - NodeDef* input = node_map->GetNode(node->input(0)); + NodeDef* input = node_map_->GetNode(node->input(0)); if (input->op() == node->op()) { - const NodeDef* node_perm = node_map->GetNode(node->input(1)); - const NodeDef* input_perm = node_map->GetNode(input->input(1)); + const NodeDef* node_perm = node_map_->GetNode(node->input(1)); + const NodeDef* input_perm = node_map_->GetNode(input->input(1)); // Try 32-bit indices. std::vector node_perm_values; std::vector input_perm_values; @@ -578,14 +596,14 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( // ^ | // | | // input ---+ - NodeDef* reshape = node_map->GetNode(node->name()); + NodeDef* reshape = node_map_->GetNode(node->name()); int output_pos = 0; string input_node_name = ParseNodeName(node->input(0), &output_pos); - const NodeDef* input = node_map->GetNode(input_node_name); + const NodeDef* input = node_map_->GetNode(input_node_name); if (input->op() == "Reshape") { reshape->set_input(0, input->input(0)); - node_map->UpdateInput(reshape->name(), input->name(), input->input(0)); - new_nodes->push_back(reshape); + node_map_->UpdateInput(reshape->name(), input->name(), input->input(0)); + nodes_to_simplify->PushBack(reshape); return reshape->name(); } @@ -625,38 +643,30 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( &device) && (StringPiece(device).contains(DEVICE_CPU) || StringPiece(device).contains(DEVICE_GPU))) { - const NodeDef* cast = node_map->GetNode(transpose->input(0)); + const NodeDef* cast = node_map_->GetNode(transpose->input(0)); if (cast->op() == "Cast") { - const NodeDef* input = node_map->GetNode(cast->input(0)); + const NodeDef* input = node_map_->GetNode(cast->input(0)); const DataType src_type = GetSourceDataType(*cast); const DataType dst_type = GetDestinationDataType(*cast); if (IsNumberType(src_type) && IsNumberType(dst_type) && DataTypeSize(src_type) < DataTypeSize(dst_type)) { - NodeDef* new_transpose = graph_def->add_node(); - *new_transpose = *transpose; - new_transpose->set_name(transpose->name() + "_" + - DataTypeString(src_type)); + NodeDef* new_transpose = + AddNode(StrCat(transpose->name(), "_", DataTypeString(src_type)), + transpose); (*new_transpose->mutable_attr())["T"].set_type(src_type); - node_map->AddNode(new_transpose->name(), new_transpose); - new_transpose->set_input(0, cast->input(0)); - node_map->AddOutput(input->name(), new_transpose->name()); - node_map->AddOutput(NodeName(new_transpose->input(1)), - new_transpose->name()); - - NodeDef* new_cast = graph_def->add_node(); - *new_cast = *cast; - new_cast->set_name(cast->name() + "_new"); - node_map->AddNode(new_cast->name(), new_cast); + node_map_->AddOutput(input->name(), new_transpose->name()); + node_map_->AddOutput(NodeName(new_transpose->input(1)), + new_transpose->name()); + NodeDef* new_cast = AddNode(StrCat(cast->name(), "_new"), cast); new_cast->set_input(0, new_transpose->name()); - node_map->AddOutput(new_transpose->name(), new_cast->name()); + node_map_->AddOutput(new_transpose->name(), new_cast->name()); - new_nodes->push_back(new_transpose); + nodes_to_simplify->PushBack(new_transpose); // Add frame dependencies that the original node might have had. AddFrameControlDeps(node, {new_transpose, new_cast}, - new_transpose->input(0), {new_transpose}, - graph_def, node_map, frame_map); + new_transpose->input(0), {new_transpose}); return new_cast->name(); } @@ -665,20 +675,20 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( } if (node->op() == "Bitcast") { - NodeDef* bitcast = node_map->GetNode(node->name()); + NodeDef* bitcast = node_map_->GetNode(node->name()); // Bypass bitcasts whose source type and destination type are equal. if (GetSourceDataType(*bitcast) == GetDestinationDataType(*bitcast)) { return bitcast->input(0); } - const NodeDef* operand = node_map->GetNode(bitcast->input(0)); + const NodeDef* operand = node_map_->GetNode(bitcast->input(0)); if (operand->op() == bitcast->op()) { // Bitcast(Bitcast(x, type1), type2) => Bitcast(x, type2) bitcast->set_input(0, operand->input(0)); SetSourceDataType(GetSourceDataType(*operand), bitcast); - node_map->UpdateInput(bitcast->name(), bitcast->input(0), - operand->input(0)); - new_nodes->push_back(bitcast); + node_map_->UpdateInput(bitcast->name(), bitcast->input(0), + operand->input(0)); + nodes_to_simplify->PushBack(bitcast); return bitcast->name(); } } @@ -720,22 +730,22 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( // Conv?DBackpropInput. if (node->op() == "Conv2D" || node->op() == "Conv3D") { NodeDef* conv = const_cast(node); - const NodeDef* weights = node_map->GetNode(NodeName(conv->input(1))); + const NodeDef* weights = node_map_->GetNode(NodeName(conv->input(1))); // Fold the multiply to conv only when the weights are constant, so the // multiply can be constant-folded. TODO(jingyue): When the weights aren't // constant, this should also help performance a bit and memory usage a lot, // since the weights tend to be smaller than the activations. if (weights->op() == "Const") { - const NodeDef* source = node_map->GetNode( - GetTailOfValuePreservingChain(*node, *node_map, nodes_to_preserve_) + const NodeDef* source = node_map_->GetNode( + GetTailOfValuePreservingChain(*node, *node_map_, nodes_to_preserve_) ->input(0)); if (source->op() == "Mul" && - node_map->GetOutputs(source->name()).size() == 1) { + node_map_->GetOutputs(source->name()).size() == 1) { const NodeDef* mul = source; // `scale` is the scalar multiplier, and `other` is the other operand. // TODO(jingyue): handle the case where `scale` is 0-th operand. - const NodeDef* scale = node_map->GetNode(mul->input(1)); - const NodeDef* other = node_map->GetNode(mul->input(0)); + const NodeDef* scale = node_map_->GetNode(mul->input(1)); + const NodeDef* other = node_map_->GetNode(mul->input(0)); if (scale->op() == "Const" && scale->attr().at("dtype").type() == weights->attr().at("dtype").type()) { const TensorProto& scale_tensor = scale->attr().at("value").tensor(); @@ -743,39 +753,36 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( if (scale_tensor.has_tensor_shape() && scale_tensor.tensor_shape().dim_size() == 0) { // Create new node `scaled_weights`. - NodeDef* scaled_weights = graph_def->add_node(); - scaled_weights->set_name(weights->name() + "_scaled_" + - conv->name()); + NodeDef* scaled_weights = AddNode( + StrCat(weights->name(), "_scaled_", conv->name()), nullptr); scaled_weights->set_op("Mul"); scaled_weights->set_device(weights->device()); (*scaled_weights->mutable_attr())["T"] = weights->attr().at("dtype"); - node_map->AddNode(scaled_weights->name(), scaled_weights); - new_nodes->push_back(scaled_weights); + nodes_to_simplify->PushBack(scaled_weights); // Link in its inputs. scaled_weights->add_input(conv->input(1)); - node_map->AddOutput(weights->name(), scaled_weights->name()); + node_map_->AddOutput(weights->name(), scaled_weights->name()); scaled_weights->add_input(mul->input(1)); - node_map->AddOutput(scale->name(), scaled_weights->name()); - AddFrameControlDeps(node, {scaled_weights}, "", {}, graph_def, - node_map, frame_map); + node_map_->AddOutput(scale->name(), scaled_weights->name()); + AddFrameControlDeps(node, {scaled_weights}, "", {}); // Update `conv`'s weights to `scaled_weights`. conv->set_input(1, scaled_weights->name()); - node_map->UpdateInput(conv->name(), weights->name(), - scaled_weights->name()); - new_nodes->push_back(conv); + node_map_->UpdateInput(conv->name(), weights->name(), + scaled_weights->name()); + nodes_to_simplify->PushBack(conv); // Update `mul`'s consumer to bypass `mul` because it's folded to // the weights. - CHECK_EQ(node_map->GetOutputs(mul->name()).size(), 1); + CHECK_EQ(node_map_->GetOutputs(mul->name()).size(), 1); NodeDef* consumer_of_mul = - *node_map->GetOutputs(mul->name()).begin(); + *node_map_->GetOutputs(mul->name()).begin(); consumer_of_mul->set_input(0, mul->input(0)); - node_map->UpdateInput(consumer_of_mul->name(), mul->name(), - other->name()); - new_nodes->push_back(consumer_of_mul); + node_map_->UpdateInput(consumer_of_mul->name(), mul->name(), + other->name()); + nodes_to_simplify->PushBack(consumer_of_mul); return conv->name(); } } @@ -784,25 +791,18 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( } if (node->op() == "Mul" && node->input(0) == node->input(1) && - node_map->GetNode(node->name() + "_square") == nullptr) { - NodeDef* factor = node_map->GetNode(node->input(0)); - VLOG(2) << "Found square : " << node->DebugString(); - if (factor != nullptr) { - NodeDef* new_mul_node = graph_def->add_node(); - *new_mul_node = *node; - new_mul_node->set_op("Square"); - new_mul_node->set_name(strings::StrCat(node->name(), "_square")); - new_nodes->push_back(new_mul_node); - node_map->AddNode(new_mul_node->name(), new_mul_node); - for (int i = 1; i < new_mul_node->input_size(); ++i) { - new_mul_node->set_input(i - 1, new_mul_node->input(i)); - } - new_mul_node->mutable_input()->RemoveLast(); - return new_mul_node->name(); + !OptimizedNodeExists(StrCat(node->name(), "_square"))) { + NodeDef* new_square_node = + AddNode(strings::StrCat(node->name(), "_square"), node); + new_square_node->set_op("Square"); + for (int i = 1; i < new_square_node->input_size(); ++i) { + new_square_node->set_input(i - 1, new_square_node->input(i)); } + new_square_node->mutable_input()->RemoveLast(); + return new_square_node->name(); } - if (node->input_size() > 0 && IsAggregate(*node)) { + if (IsAggregate(*node) && NumNonControlInputs(*node) > 0) { // Discard aggregate nodes with a single input. if (node->input_size() == 1) { return node->input(0); @@ -828,7 +828,8 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( break; } } - if (all_equal && node_map->GetNode(node->name() + "_const") == nullptr) { + const string mul_node_name = StrCat(node->name(), "_mul"); + if (all_equal && !OptimizedNodeExists(mul_node_name)) { // 1. Create constant node with value N. const auto type = GetDataTypeFromAttr(*node, "T"); Tensor t(type, TensorShape({})); @@ -839,28 +840,26 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( return ""; } TensorValue value(&t); - NodeDef* new_const_node = graph_def->add_node(); + NodeDef* new_const_node = + AddNode(StrCat(node->name(), "_const"), nullptr); *new_const_node = - ConstantFolding::CreateNodeDef(node->name() + "_const", value); + ConstantFolding::CreateNodeDef(new_const_node->name(), value); new_const_node->set_device(node->device()); - node_map->AddNode(new_const_node->name(), new_const_node); - new_nodes->push_back(new_const_node); + nodes_to_simplify->PushBack(new_const_node); // 2. Replace the aggregate node with Mul(Const(N), x). - NodeDef* new_mul_node = graph_def->add_node(); - new_mul_node->set_name(node->name() + "_mul"); + NodeDef* new_mul_node = AddNode(mul_node_name, nullptr); new_mul_node->set_op("Mul"); new_mul_node->set_device(node->device()); SetDataTypeToAttr(type, "T", new_mul_node); - node_map->AddNode(new_mul_node->name(), new_mul_node); new_mul_node->add_input(new_const_node->name()); - node_map->AddOutput(new_const_node->name(), new_mul_node->name()); + node_map_->AddOutput(new_const_node->name(), new_mul_node->name()); new_mul_node->add_input(node->input(0)); - node_map->AddOutput(node->input(0), new_mul_node->name()); + node_map_->AddOutput(node->input(0), new_mul_node->name()); - CopyControlInputs(*node, new_mul_node, graph_def, node_map); + CopyControlInputs(*node, new_mul_node, optimized_graph_, node_map_.get()); AddFrameControlDeps(node, {new_const_node, new_mul_node}, node->input(0), - {new_const_node}, graph_def, node_map, frame_map); + {new_const_node}); return new_mul_node->name(); } } @@ -869,14 +868,18 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( // multiplication over addition to hoist common factors out of aggregate nodes // where all the inputs are Mul nodes. This pattern occurs frequently in // regularization terms for the gradients during training. - if (node->input_size() > 1 && IsAggregate(*node) && - node_map->GetNode(node->name() + "_hoist_add") == nullptr) { + if (IsAggregate(*node) && NumNonControlInputs(*node) > 1 && + !OptimizedNodeExists(StrCat(node->name(), "_hoist_add"))) { // Determine the set of common factors if the input nodes are all Mul nodes. std::set common_factors; - int i = 0; - while (i < node->input_size() && (i == 0 || !common_factors.empty()) && - !IsControlInput(node->input(i))) { - const NodeDef* input = node_map->GetNode(node->input(i)); + for (int i = 0; i < node->input_size(); ++i) { + if (i > 0 && common_factors.empty()) { + break; + } + if (IsControlInput(node->input(i))) { + break; + } + const NodeDef* input = node_map_->GetNode(node->input(i)); if (input->op() == "Mul") { std::set factors_i{input->input(0), input->input(1)}; if (i == 0) { @@ -891,47 +894,42 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( } } else { common_factors.clear(); - break; } - ++i; } if (common_factors.size() == 1) { + const string& common_factor = *common_factors.begin(); // In this case we have an expression of the form // AddN(Mul(x, y1), Mul(y2, x), Mul(x, y3), ... Mul(x, yn)) // that can be rewritten as // Mul(x, AddN(y1, y2, y3, ... yn)) - // 1. Hoist non-shared factors up into AddN node. - const string& common_factor = *common_factors.begin(); - NodeDef* new_mul_node = graph_def->add_node(); - NodeDef* new_add_node = graph_def->add_node(); - *new_add_node = *node; - new_add_node->set_name(node->name() + "_hoist_add"); - new_nodes->push_back(new_add_node); - node_map->AddNode(new_add_node->name(), new_add_node); + + // 1. Use a copy of the first Mul node for the outer multiplication. + NodeDef* new_mul_node = AddNode(StrCat(node->name(), "_hoist_mul"), + node_map_->GetNode(node->input(0))); + NodeDef* new_add_node = AddNode(StrCat(node->name(), "_hoist_add"), node); + new_mul_node->set_device(node->device()); + new_mul_node->set_input(0, common_factor); + node_map_->AddOutput(common_factor, new_mul_node->name()); + new_mul_node->set_input(1, new_add_node->name()); + node_map_->AddOutput(new_add_node->name(), new_mul_node->name()); + + // 2. Hoist non-shared factors up into the new AddN node. + nodes_to_simplify->PushBack(new_add_node); for (int i = 0; i < node->input_size(); ++i) { const string& input = node->input(i); if (IsControlInput(input)) { - MaybeAddControlInput(input, new_add_node, graph_def, node_map); - continue; + break; } - NodeDef* mul_node = node_map->GetNode(input); - int unique_factor_index = mul_node->input(0) == common_factor ? 1 : 0; + const NodeDef* mul_node = node_map_->GetNode(input); + const int unique_factor_index = + mul_node->input(0) == common_factor ? 1 : 0; const string unique_factor = mul_node->input(unique_factor_index); new_add_node->set_input(i, unique_factor); - // 2. Use a copy of the first Mul node for the outer multiplication. - if (i == 0) { - *new_mul_node = *mul_node; - new_mul_node->set_device(node->device()); - new_mul_node->set_name(node->name() + "_hoist_mul"); - new_mul_node->set_input(0, common_factor); - new_mul_node->set_input(1, new_add_node->name()); - node_map->AddNode(new_mul_node->name(), new_mul_node); - } } - // 3. Add frame dependencies that the original node might have had. + // 4. Add frame dependencies that the original node might have had. AddFrameControlDeps(node, {new_add_node, new_mul_node}, common_factor, - {new_add_node}, graph_def, node_map, frame_map); + {new_add_node}); return new_mul_node->name(); } @@ -940,9 +938,9 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( // Fold Transpose into matrix multiplication. if ((node->op() == "MatMul" || node->op() == "SparseMatMul" || node->op() == "BatchMatMul") && - node_map->GetNode(node->name() + "_fused") == nullptr) { - const NodeDef* a = node_map->GetNode(node->input(0)); - const NodeDef* b = node_map->GetNode(node->input(1)); + !OptimizedNodeExists(StrCat(node->name(), "_fused"))) { + const NodeDef* a = node_map_->GetNode(node->input(0)); + const NodeDef* b = node_map_->GetNode(node->input(1)); bool is_complex = false; if (node->op() != "SparseMatMul") { const DataType type = GetDataTypeFromAttr(*node, "T"); @@ -954,32 +952,27 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( ? std::set{"ConjugateTranspose"} : std::set{"Transpose"}); const bool a_is_foldable = foldable_transpose_ops.count(a->op()) > 0 && - IsInnerMatrixTransposeNode(*a, node_map); + IsInnerMatrixTransposeNode(*a, node_map_.get()); const bool b_is_foldable = foldable_transpose_ops.count(b->op()) > 0 && - IsInnerMatrixTransposeNode(*b, node_map); + IsInnerMatrixTransposeNode(*b, node_map_.get()); if (a_is_foldable || b_is_foldable) { - NodeDef* new_op = graph_def->add_node(); - *new_op = *node; - new_op->set_name(node->name() + "_fused"); - node_map->AddNode(new_op->name(), new_op); + NodeDef* new_op = AddNode(StrCat(node->name(), "_fused"), node); if (a_is_foldable) { const string attr_a = node->op() == "BatchMatMul" ? "adj_x" : "transpose_a"; FlipBooleanAttr(attr_a, new_op); new_op->set_input(0, a->input(0)); - node_map->UpdateInput(new_op->name(), a->name(), a->input(0)); - AddFrameControlDeps(node, {new_op}, a->input(0), {new_op}, graph_def, - node_map, frame_map); + node_map_->UpdateInput(new_op->name(), a->name(), a->input(0)); + AddFrameControlDeps(node, {new_op}, a->input(0), {new_op}); } if (b_is_foldable) { const string attr_b = node->op() == "BatchMatMul" ? "adj_y" : "transpose_b"; FlipBooleanAttr(attr_b, new_op); new_op->set_input(1, b->input(0)); - node_map->UpdateInput(new_op->name(), b->name(), b->input(0)); + node_map_->UpdateInput(new_op->name(), b->name(), b->input(0)); if (!a_is_foldable) { - AddFrameControlDeps(node, {new_op}, b->input(0), {new_op}, graph_def, - node_map, frame_map); + AddFrameControlDeps(node, {new_op}, b->input(0), {new_op}); } } } @@ -988,25 +981,21 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( // Fold Conj into Transpose or ConjugateTranspose. if ((node->op() == "Conj" || node->op() == "Transpose" || node->op() == "ConjugateTranspose") && - node_map->GetNode(node->name() + "_fused") == nullptr) { - const NodeDef* input = node_map->GetNode(node->input(0)); + !OptimizedNodeExists(StrCat(node->name(), "_fused"))) { + const NodeDef* input = node_map_->GetNode(node->input(0)); const NodeDef* transpose_op = node->op() == "Conj" ? input : node; const NodeDef* conj_op = node->op() == "Conj" ? node : input; if ((transpose_op->op() == "Transpose" || transpose_op->op() == "ConjugateTranspose") && conj_op->op() == "Conj") { - NodeDef* new_op = graph_def->add_node(); - *new_op = *transpose_op; - new_op->set_name(node->name() + "_fused"); + NodeDef* new_op = AddNode(StrCat(node->name(), "_fused"), transpose_op); // Flip the type of transpose op to absorb the conjugation. new_op->set_op(transpose_op->op() == "Transpose" ? "ConjugateTranspose" : "Transpose"); new_op->set_input(0, input->input(0)); - node_map->AddNode(new_op->name(), new_op); - node_map->UpdateInput(new_op->name(), node->name(), input->input(0)); - AddFrameControlDeps(node, {new_op}, "", {}, graph_def, node_map, - frame_map); + node_map_->UpdateInput(new_op->name(), node->name(), input->input(0)); + AddFrameControlDeps(node, {new_op}, "", {}); return new_op->name(); } } @@ -1014,29 +1003,23 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( return ""; } -Status ArithmeticOptimizer::SimplifyArithmeticOps( - GraphDef* optimized_graph) const { - NodeMap node_map(optimized_graph); - FrameMap frame_map; - int num_frames; - TF_RETURN_IF_ERROR(IdentifyFramesWithNodeMap(*optimized_graph, node_map, - &frame_map, &num_frames)); - SetVector nodes_to_simplify; - for (int i = 0; i < optimized_graph->node_size(); ++i) { - nodes_to_simplify.PushBack(optimized_graph->mutable_node()->Mutable(i)); +Status ArithmeticOptimizer::SimplifyArithmeticOps() { + SetVector nodes_to_simplify; + nodes_to_simplify.Reserve(optimized_graph_->node_size()); + for (int i = 0; i < optimized_graph_->node_size(); ++i) { + nodes_to_simplify.PushBack(optimized_graph_->mutable_node(i)); } while (!nodes_to_simplify.Empty()) { const NodeDef* node = nodes_to_simplify.PopBack(); - std::vector new_nodes; - const string simplified_tensor = TrySimplifyAndReplaceUses( - node, optimized_graph, &node_map, &new_nodes, &frame_map); + const string simplified_tensor = + TrySimplifyAndReplaceUses(node, &nodes_to_simplify); if (simplified_tensor.empty()) { continue; } if (NodeName(simplified_tensor) != node->name()) { // Always consider simplified_tensor for further optimizations. - const NodeDef* simplified_node = node_map.GetNode(simplified_tensor); + NodeDef* simplified_node = node_map_->GetNode(simplified_tensor); if (simplified_node != nullptr) { nodes_to_simplify.PushBack(simplified_node); } @@ -1044,7 +1027,7 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps( // consumers of `node` are already redirected to `simplified_tensor`. // Re-push the consumers into `nodes_to_simplify` for further // optimizations. - std::set consumers = node_map.GetOutputs(node->name()); + std::set consumers = node_map_->GetOutputs(node->name()); for (NodeDef* consumer : consumers) { // Update `consumer`'s use of `node` to `input`'s operand. for (int i = 0; i < consumer->input_size(); ++i) { @@ -1057,16 +1040,12 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps( ? AsControlDependency(NodeName(simplified_tensor)) : simplified_tensor); } - VLOG(2) << "Update input " << consumer->input(i) << " of " - << consumer->name() << " to " << simplified_tensor; } - node_map.UpdateInput(consumer->name(), node->name(), simplified_tensor); + node_map_->UpdateInput(consumer->name(), node->name(), + simplified_tensor); nodes_to_simplify.PushBack(consumer); } } - for (const NodeDef* new_node : new_nodes) { - nodes_to_simplify.PushBack(new_node); - } } return Status::OK(); } @@ -1074,22 +1053,31 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps( Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/, const GrapplerItem& item, GraphDef* optimized_graph) { - *optimized_graph = item.graph; - nodes_to_preserve_ = item.NodesToPreserve(); + optimized_graph_ = optimized_graph; + *optimized_graph_ = item.graph; + // Set up helper data structures. + nodes_to_preserve_ = item.NodesToPreserve(); + fetch_nodes_known_ = !item.fetch.empty(); + node_map_.reset(new NodeMap(optimized_graph_)); + int num_frames; + TF_RETURN_IF_ERROR(IdentifyFramesWithNodeMap(*optimized_graph_, *node_map_, + &frame_map_, &num_frames)); if (opt_level_ == RewriterConfig::AGGRESSIVE) { + graph_properties_.reset(new GraphProperties(item)); // Shapes are only needed in aggressive mode. - GraphProperties graph_properties(item); - TF_RETURN_IF_ERROR(graph_properties.InferStatically()); - TF_RETURN_IF_ERROR(graph_properties.AnnotateOutputShapes(optimized_graph)); + TF_RETURN_IF_ERROR(graph_properties_->InferStatically()); + TF_RETURN_IF_ERROR( + graph_properties_->AnnotateOutputShapes(optimized_graph_)); } - DedupComputations(optimized_graph); - TF_RETURN_IF_ERROR(SimplifyArithmeticOps(optimized_graph)); + // Perform the optimizations. + DedupComputations(); + TF_RETURN_IF_ERROR(SimplifyArithmeticOps()); // Clear output shapes. for (int i = 0; i < optimized_graph->node_size(); ++i) { - optimized_graph->mutable_node(i)->mutable_attr()->erase(kOutputShapesAttr); + optimized_graph_->mutable_node(i)->mutable_attr()->erase(kOutputShapesAttr); } return Status::OK(); diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index c22e2d5363..ec26979238 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -17,13 +17,17 @@ limitations under the License. #define TENSORFLOW_GRAPPLER_OPTIMIZERS_ARITHMETIC_OPTIMIZER_H_ #include +#include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/optimizers/graph_optimizer.h" #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/grappler/utils/frame.h" #include "tensorflow/core/protobuf/rewriter_config.pb.h" namespace tensorflow { namespace grappler { +constexpr char kArithmeticOptimizer[] = "ArithmeticOptimizer"; + // Optimize TF computations by reducing the arithmetic complexity required to // run a model. class ArithmeticOptimizer : public GraphOptimizer { @@ -42,13 +46,32 @@ class ArithmeticOptimizer : public GraphOptimizer { const GraphDef& optimized_graph, double result) override; private: + // Returns true is a node with given name and the optimizer prefix already + // exists. + bool OptimizedNodeExists(const string& name); + + // Creates a new node in the graph, prefixed with "ArithmeticOptimizer/", + // updates node_map_, and optionally copies *node_to_copy into the new + // node, if node_to_copy is not nullptr. + NodeDef* AddNode(const string& name, const NodeDef* node_to_copy); + // Returns true if it is safe to dedup node from the graph. bool CanDedup(const NodeDef& node) const; - void DedupComputations(GraphDef* optimized_graph) const; + // Dedup redundant nodes in the graph. + void DedupComputations(); + + // Fix frame dependencies by adding control dependencies from old_input to + // nodes in new_nodes_for_control_dep, and update frame_map for all nodes in + // new_nodes. + void AddFrameControlDeps(const NodeDef* old_node, + const std::vector& new_nodes, + const string& source_for_ctrl_dep, + const std::vector& sinks_for_control_dep); + // Runs peep-hole optimizations on `optimized_graph`, e.g., removing inverse // transposes. - Status SimplifyArithmeticOps(GraphDef* optimized_graph) const; + Status SimplifyArithmeticOps(); // Tries to simplify the expression that roots at `node` and replaces the uses // of `node` to the simplified expression. Returns the name of the simplified // tensor (e.g. "split:1") or an emtpy string if no simplification is @@ -64,14 +87,17 @@ class ArithmeticOptimizer : public GraphOptimizer { // TODO(jingyue): This interface is not suitable for optimizing nodes with // multiple output tensors. We should pass in a tensor name instead of a // NodeDef. - string TrySimplifyAndReplaceUses( - const NodeDef* node, GraphDef* graph_def, NodeMap* node_map, - std::vector* new_nodes, - std::unordered_map>* frame_map) const; - - std::unordered_set nodes_to_preserve_; + string TrySimplifyAndReplaceUses(const NodeDef* node, + SetVector* nodes_to_simplify); RewriterConfig::Toggle opt_level_; + + bool fetch_nodes_known_; + std::unordered_set nodes_to_preserve_; + std::unique_ptr node_map_; + FrameMap frame_map_; + std::unique_ptr graph_properties_; + GraphDef* optimized_graph_; // Not owned. }; } // end namespace grappler diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 6bbc64c7a4..e8a18ff9d9 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -28,6 +28,10 @@ namespace tensorflow { namespace grappler { namespace { +string OptimizedName(const string& name) { + return AddPrefixToNodeName(name, kArithmeticOptimizer); +} + class ArithmeticOptimizerTest : public ::testing::Test {}; TEST_F(ArithmeticOptimizerTest, NoOp) { @@ -164,13 +168,13 @@ TEST_F(ArithmeticOptimizerTest, MulToSquare) { TF_EXPECT_OK(status); EXPECT_EQ(5, output.node_size()); + EXPECT_EQ("id", output.node(3).name()); + EXPECT_EQ(OptimizedName("mul_square"), output.node(3).input(0)); EXPECT_EQ("Square", output.node(4).op()); - EXPECT_EQ("mul_square", output.node(4).name()); + EXPECT_EQ(OptimizedName("mul_square"), output.node(4).name()); EXPECT_EQ(2, output.node(4).input_size()); EXPECT_EQ("c", output.node(4).input(0)); EXPECT_EQ("^d", output.node(4).input(1)); - EXPECT_EQ("id", output.node(3).name()); - EXPECT_EQ("mul_square", output.node(3).input(0)); } TEST_F(ArithmeticOptimizerTest, SimplifyInvolutionsReal) { @@ -271,17 +275,17 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsSimple) { EXPECT_EQ(5, output.node_size()); const NodeDef& new_const = output.node(3); - EXPECT_EQ("add_const", new_const.name()); + EXPECT_EQ(OptimizedName("add_const"), new_const.name()); EXPECT_EQ("^x", new_const.input(0)); EXPECT_EQ(std::string("\0\0\0@", 4), new_const.attr().at("value").tensor().tensor_content()); const NodeDef& new_mul = output.node(4); - EXPECT_EQ("add_mul", new_mul.name()); - EXPECT_EQ("add_const", new_mul.input(0)); + EXPECT_EQ(OptimizedName("add_mul"), new_mul.name()); + EXPECT_EQ(OptimizedName("add_const"), new_mul.input(0)); EXPECT_EQ("x", new_mul.input(1)); const NodeDef& new_id = output.node(2); EXPECT_EQ("id", new_id.name()); - EXPECT_EQ("add_mul", new_id.input(0)); + EXPECT_EQ(OptimizedName("add_mul"), new_id.input(0)); } TEST_F(ArithmeticOptimizerTest, TrivialSumsSimpleWithControlDep) { @@ -305,18 +309,18 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsSimpleWithControlDep) { EXPECT_EQ(6, output.node_size()); const NodeDef& new_const = output.node(4); - EXPECT_EQ("add_const", new_const.name()); + EXPECT_EQ(OptimizedName("add_const"), new_const.name()); EXPECT_EQ("^x", new_const.input(0)); EXPECT_EQ(std::string("\0\0\0@", 4), new_const.attr().at("value").tensor().tensor_content()); const NodeDef& new_mul = output.node(5); - EXPECT_EQ("add_mul", new_mul.name()); - EXPECT_EQ("add_const", new_mul.input(0)); + EXPECT_EQ(OptimizedName("add_mul"), new_mul.name()); + EXPECT_EQ(OptimizedName("add_const"), new_mul.input(0)); EXPECT_EQ("x", new_mul.input(1)); EXPECT_EQ("^y", new_mul.input(2)); const NodeDef& new_id = output.node(3); EXPECT_EQ("id", new_id.name()); - EXPECT_EQ("add_mul", new_id.input(0)); + EXPECT_EQ(OptimizedName("add_mul"), new_id.input(0)); } TEST_F(ArithmeticOptimizerTest, TrivialSumsRepeatedAdd) { @@ -353,38 +357,39 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsRepeatedAdd) { // Mul(p, // Add(Add(Const(2), Const(2)), // Add(Const(2), Const(2)))) + EXPECT_EQ(17, output.node_size()); for (const auto& node : output.node()) { if ("id" == node.name()) { EXPECT_EQ(1, node.input_size()); - EXPECT_EQ("Add_6_hoist_mul", node.input(0)); - } else if ("Add_6_hoist_mul" == node.name()) { + EXPECT_EQ(OptimizedName("Add_6_hoist_mul"), node.input(0)); + } else if (OptimizedName("Add_6_hoist_mul") == node.name()) { EXPECT_EQ("Mul", node.op()); EXPECT_EQ(2, node.input_size()); EXPECT_EQ("Placeholder", node.input(0)); - EXPECT_EQ("Add_6_hoist_add", node.input(1)); - } else if ("Add_6_hoist_add" == node.name()) { + EXPECT_EQ(OptimizedName("Add_6_hoist_add"), node.input(1)); + } else if (OptimizedName("Add_6_hoist_add") == node.name()) { EXPECT_EQ("Add", node.op()); EXPECT_EQ(3, node.input_size()); - EXPECT_EQ("Add_4_hoist_add", node.input(0)); - EXPECT_EQ("Add_5_hoist_add", node.input(1)); + EXPECT_EQ(OptimizedName("Add_4_hoist_add"), node.input(0)); + EXPECT_EQ(OptimizedName("Add_5_hoist_add"), node.input(1)); EXPECT_EQ("^Placeholder", node.input(2)); - } else if ("Add_4_hoist_add" == node.name()) { + } else if (OptimizedName("Add_4_hoist_add") == node.name()) { EXPECT_EQ("Add", node.op()); EXPECT_EQ(3, node.input_size()); - EXPECT_EQ("Add_const", node.input(0)); - EXPECT_EQ("Add_1_const", node.input(1)); + EXPECT_EQ(OptimizedName("Add_const"), node.input(0)); + EXPECT_EQ(OptimizedName("Add_1_const"), node.input(1)); EXPECT_EQ("^Placeholder", node.input(2)); - } else if ("Add_5_hoist_add" == node.name()) { + } else if (OptimizedName("Add_5_hoist_add") == node.name()) { EXPECT_EQ("Add", node.op()); EXPECT_EQ(3, node.input_size()); - EXPECT_EQ("Add_const", node.input(0)); - EXPECT_EQ("Add_1_const", node.input(1)); + EXPECT_EQ(OptimizedName("Add_const"), node.input(0)); + EXPECT_EQ(OptimizedName("Add_1_const"), node.input(1)); EXPECT_EQ("^Placeholder", node.input(2)); - } else if ("Add_const" == node.name()) { + } else if (OptimizedName("Add_const") == node.name()) { EXPECT_EQ("Const", node.op()); EXPECT_EQ(1, node.input_size()); EXPECT_EQ("^Placeholder", node.input(0)); - } else if ("Add_1_const" == node.name()) { + } else if (OptimizedName("Add_1_const") == node.name()) { EXPECT_EQ("Const", node.op()); EXPECT_EQ(1, node.input_size()); EXPECT_EQ("^Placeholder", node.input(0)); @@ -416,16 +421,16 @@ TEST_F(ArithmeticOptimizerTest, HoistFactor) { EXPECT_EQ(9, output.node_size()); const NodeDef& new_add = output.node(8); - EXPECT_EQ("add_hoist_add", new_add.name()); + EXPECT_EQ(OptimizedName("add_hoist_add"), new_add.name()); EXPECT_EQ("y1", new_add.input(0)); EXPECT_EQ("y2", new_add.input(1)); const NodeDef& new_mul = output.node(7); - EXPECT_EQ("add_hoist_mul", new_mul.name()); + EXPECT_EQ(OptimizedName("add_hoist_mul"), new_mul.name()); EXPECT_EQ("x", new_mul.input(0)); - EXPECT_EQ("add_hoist_add", new_mul.input(1)); + EXPECT_EQ(OptimizedName("add_hoist_add"), new_mul.input(1)); const NodeDef& new_id = output.node(6); EXPECT_EQ("id", new_id.name()); - EXPECT_EQ("add_hoist_mul", new_id.input(0)); + EXPECT_EQ(OptimizedName("add_hoist_mul"), new_id.input(0)); } TEST_F(ArithmeticOptimizerTest, FuseConjAndTranspose) { @@ -449,7 +454,7 @@ TEST_F(ArithmeticOptimizerTest, FuseConjAndTranspose) { TF_EXPECT_OK(status); EXPECT_EQ(7, output.node_size()); - EXPECT_EQ("trans_fused", output.node(6).name()); + EXPECT_EQ(OptimizedName("trans_fused"), output.node(6).name()); EXPECT_EQ("ConjugateTranspose", output.node(6).op()); EXPECT_EQ("z", output.node(6).input(0)); EXPECT_EQ("perm", output.node(6).input(1)); @@ -473,7 +478,7 @@ TEST_F(ArithmeticOptimizerTest, FuseConjAndConjugateTranspose) { TF_EXPECT_OK(status); EXPECT_EQ(7, output.node_size()); - EXPECT_EQ("conjugate_trans_fused", output.node(6).name()); + EXPECT_EQ(OptimizedName("conjugate_trans_fused"), output.node(6).name()); EXPECT_EQ("Transpose", output.node(6).op()); EXPECT_EQ("z", output.node(6).input(0)); EXPECT_EQ("perm", output.node(6).input(1)); @@ -500,7 +505,7 @@ TEST_F(ArithmeticOptimizerTest, FuseTransposeAndConj) { TF_EXPECT_OK(status); EXPECT_EQ(7, output.node_size()); - EXPECT_EQ("conj_fused", output.node(6).name()); + EXPECT_EQ(OptimizedName("conj_fused"), output.node(6).name()); EXPECT_EQ("ConjugateTranspose", output.node(6).op()); EXPECT_EQ("z", output.node(6).input(0)); EXPECT_EQ("perm", output.node(6).input(1)); @@ -536,7 +541,7 @@ TEST_F(ArithmeticOptimizerTest, FoldTransposeIntoMatMul) { TF_EXPECT_OK(status); EXPECT_EQ(7, output.node_size()); - EXPECT_EQ("matmul_fused", output.node(6).name()); + EXPECT_EQ(OptimizedName("matmul_fused"), output.node(6).name()); EXPECT_EQ("a", output.node(6).input(0)); EXPECT_EQ("b", output.node(6).input(1)); if (matmul_type == "BatchMatMul") { @@ -574,7 +579,7 @@ TEST_F(ArithmeticOptimizerTest, FoldConjugateTransposeIntoBatchMatMul) { TF_EXPECT_OK(status); EXPECT_EQ(11, output.node_size()); - EXPECT_EQ("matmul_fused", output.node(10).name()); + EXPECT_EQ(OptimizedName("matmul_fused"), output.node(10).name()); EXPECT_EQ("a", output.node(10).input(0)); EXPECT_EQ("b", output.node(10).input(1)); EXPECT_TRUE(output.node(10).attr().at("adj_x").b()); @@ -1020,10 +1025,11 @@ TEST_F(ArithmeticOptimizerTest, OptimizeCastMulTransposeConv) { NodeMap node_map(&output); const NodeDef* inputs_node = CHECK_NOTNULL(node_map.GetNode("Placeholder")); const NodeDef* transpose_node = - CHECK_NOTNULL(node_map.GetNode("Transpose_uint8")); - const NodeDef* cast_node = CHECK_NOTNULL(node_map.GetNode("Cast_new")); + CHECK_NOTNULL(node_map.GetNode(OptimizedName("Transpose_uint8"))); + const NodeDef* cast_node = + CHECK_NOTNULL(node_map.GetNode(OptimizedName("Cast_new"))); const NodeDef* weights_node = - CHECK_NOTNULL(node_map.GetNode("weights_scaled_Conv2D")); + CHECK_NOTNULL(node_map.GetNode(OptimizedName("weights_scaled_Conv2D"))); const NodeDef* conv_node = CHECK_NOTNULL(node_map.GetNode("Conv2D")); EXPECT_EQ(output.node_size(), 7); @@ -1067,11 +1073,11 @@ TEST_F(ArithmeticOptimizerTest, OptimizeMultipleMulTransposeConv) { NodeMap node_map(&output); const NodeDef* weights_node = - CHECK_NOTNULL(node_map.GetNode("weights_scaled_Conv2D")); + CHECK_NOTNULL(node_map.GetNode(OptimizedName("weights_scaled_Conv2D"))); const NodeDef* conv_node = CHECK_NOTNULL(node_map.GetNode("Conv2D")); const NodeDef* weights_node_1 = - CHECK_NOTNULL(node_map.GetNode("weights_scaled_Conv2D_1")); + CHECK_NOTNULL(node_map.GetNode(OptimizedName("weights_scaled_Conv2D_1"))); const NodeDef* conv_node_1 = CHECK_NOTNULL(node_map.GetNode("Conv2D_1")); EXPECT_EQ(conv_node->input(1), weights_node->name()); EXPECT_EQ(conv_node_1->input(1), weights_node_1->name()); diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc index 7a9db9bebb..bd8a58d814 100644 --- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc @@ -107,7 +107,7 @@ bool DependencyOptimizer::SafeToConvertToNoOp(const NodeDef& node) { } string DependencyOptimizer::TryOptimizeDependencies( - NodeDef* node, GraphDef* graph, std::vector* new_nodes) { + NodeDef* node, SetVector* nodes_to_simplify) { // Change ops that only have control dependencies as outputs to NoOps. if (node->op() != "NoOp" && SafeToConvertToNoOp(*node)) { VLOG(1) << "***** Replacing " << node->name() << " (" << node->op() @@ -129,18 +129,18 @@ string DependencyOptimizer::TryOptimizeDependencies( continue; } const string ctrl_input = ConstantFolding::AddControlDependency( - old_input, graph, node_map_.get()); + old_input, optimized_graph_, node_map_.get()); if (ctrl_inputs.insert(ctrl_input).second) { node->set_input(pos, ctrl_input); node_map_->UpdateInput(node->name(), old_input, ctrl_input); auto old_input_node = node_map_->GetNode(old_input); - new_nodes->push_back(old_input_node); + nodes_to_simplify->PushBack(old_input_node); } ++pos; } node->set_op("NoOp"); node->clear_attr(); - new_nodes->push_back(node); + nodes_to_simplify->PushBack(node); return ""; } @@ -186,7 +186,7 @@ string DependencyOptimizer::TryOptimizeDependencies( consumer->add_input(input); updated_consumer = true; node_map_->AddOutput(NodeName(input), consumer->name()); - new_nodes->push_back(input_nodes[i]); + nodes_to_simplify->PushBack(input_nodes[i]); } } // Remove dependency on node from consumer. @@ -195,11 +195,11 @@ string DependencyOptimizer::TryOptimizeDependencies( if (updated_consumer) { VLOG(1) << "***** Updated consumer " << consumer->name() << " (" << consumer->op() << ")"; - new_nodes->push_back(consumer); + nodes_to_simplify->PushBack(consumer); } } - // Clear all control inputs to node. + // Clear all (control) inputs to this NoOp node. if (fetch_nodes_known_) { node_map_->RemoveInputs(node->name()); node->clear_input(); @@ -209,12 +209,12 @@ string DependencyOptimizer::TryOptimizeDependencies( return ""; } -Status DependencyOptimizer::OptimizeDependencies(GraphDef* optimized_graph) { - // TODO(rmlarsen,bsteiner): The folloing code is similar to the control loop +Status DependencyOptimizer::OptimizeDependencies() { + // TODO(rmlarsen,bsteiner): The following code is similar to the control loop // in the ArithmeticOptimizer. Dedup this. SetVector nodes_to_simplify; - for (int i = 0; i < optimized_graph->node_size(); ++i) { - NodeDef* node = optimized_graph->mutable_node(i); + for (int i = 0; i < optimized_graph_->node_size(); ++i) { + NodeDef* node = optimized_graph_->mutable_node(i); if (node->op() == "NoOp" || SafeToConvertToNoOp(*node)) { PruneControlInputs(node); nodes_to_simplify.PushBack(node); @@ -222,13 +222,10 @@ Status DependencyOptimizer::OptimizeDependencies(GraphDef* optimized_graph) { } while (!nodes_to_simplify.Empty()) { NodeDef* node = nodes_to_simplify.PopBack(); - std::vector new_nodes; const string simplified_tensor = - TryOptimizeDependencies(node, optimized_graph, &new_nodes); - if (simplified_tensor.empty()) { - continue; - } - if (NodeName(simplified_tensor) != node->name()) { + TryOptimizeDependencies(node, &nodes_to_simplify); + if (!simplified_tensor.empty() && + NodeName(simplified_tensor) != node->name()) { // Always consider simplified_tensor for further optimizations. NodeDef* simplified_node = node_map_->GetNode(simplified_tensor); if (simplified_node != nullptr) { @@ -257,12 +254,9 @@ Status DependencyOptimizer::OptimizeDependencies(GraphDef* optimized_graph) { nodes_to_simplify.PushBack(consumer); } } - for (auto new_node : new_nodes) { - nodes_to_simplify.PushBack(new_node); - } } - for (int i = 0; i < optimized_graph->node_size(); ++i) { - NodeDef* node = optimized_graph->mutable_node(i); + for (int i = 0; i < optimized_graph_->node_size(); ++i) { + NodeDef* node = optimized_graph_->mutable_node(i); PruneControlInputs(node); } return Status::OK(); @@ -270,13 +264,14 @@ Status DependencyOptimizer::OptimizeDependencies(GraphDef* optimized_graph) { Status DependencyOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { - *optimized_graph = item.graph; + optimized_graph_ = optimized_graph; + *optimized_graph_ = item.graph; nodes_to_preserve_ = item.NodesToPreserve(); node_map_.reset(new NodeMap(optimized_graph)); fetch_nodes_known_ = !item.fetch.empty(); - VLOG(1) << "Graph before optimization:\n" << optimized_graph->DebugString(); - TF_RETURN_IF_ERROR(OptimizeDependencies(optimized_graph)); - VLOG(1) << "Graph after optimization:\n" << optimized_graph->DebugString(); + VLOG(1) << "Graph before optimization:\n" << optimized_graph_->DebugString(); + TF_RETURN_IF_ERROR(OptimizeDependencies()); + VLOG(1) << "Graph after optimization:\n" << optimized_graph_->DebugString(); return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.h b/tensorflow/core/grappler/optimizers/dependency_optimizer.h index cab9383b94..a9d3322744 100644 --- a/tensorflow/core/grappler/optimizers/dependency_optimizer.h +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.h @@ -43,23 +43,26 @@ class DependencyOptimizer : public GraphOptimizer { const GraphDef& optimized_graph, double result) override; private: + Status OptimizeDependencies(); + // Returns true if it is safe to convert node to NoOp. bool SafeToConvertToNoOp(const NodeDef& node); - Status OptimizeDependencies(GraphDef* optimized_graph); // Tries to simplify the expression that roots at `node` and replaces the uses // of `node` to the simplified expression. Returns the name of the simplified // tensor (e.g. "split:1") or an empty string if no simplification is // performed. - string TryOptimizeDependencies(NodeDef* node, GraphDef* graph, - std::vector* new_nodes); + string TryOptimizeDependencies(NodeDef* node, + SetVector* nodes_to_simplify); bool HasOnlyControlOutputs(const NodeDef* node); - bool fetch_nodes_known_; RewriterConfig::Toggle opt_level_; + + bool fetch_nodes_known_; std::unordered_set nodes_to_preserve_; std::unique_ptr node_map_; + GraphDef* optimized_graph_; // Not owned. }; } // end namespace grappler diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index 9ab889beb5..07cf2cfc05 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -28,22 +28,29 @@ limitations under the License. namespace tensorflow { namespace grappler { -NodeMap::NodeMap(GraphDef* graph) : graph_(graph) { - for (int i = 0; i < graph_->node_size(); i++) { - auto node = graph_->mutable_node(i); - auto rslt = nodes_.insert(std::make_pair(node->name(), node)); +NodeMap::NodeMap(GraphDef* graph) { + CHECK(graph != nullptr); + for (int i = 0; i < graph->node_size(); i++) { + NodeDef* node = graph->mutable_node(i); + const string& node_name = node->name(); + auto rslt = nodes_.emplace(node_name, node); // Check that the graph doesn't contain multiple nodes with the same name. if (!rslt.second) { - LOG(WARNING) << "Duplicated node in the graph: " << node->name(); + LOG(WARNING) << "Duplicated node in the graph: " << node_name; } for (const auto& input : node->input()) { - outputs_[NodeName(input)].insert(nodes_[node->name()]); + outputs_[NodeName(input)].insert(nodes_[node_name]); } } } +void NodeMap::RemoveNode(const string& name) { + nodes_.erase(NodeName(name)); + outputs_.erase(NodeName(name)); +} + NodeDef* NodeMap::GetNode(const string& name) const { - string node_name = NodeName(name); + const string node_name = NodeName(name); auto it = nodes_.find(node_name); if (it == nodes_.end()) { return nullptr; @@ -51,6 +58,11 @@ NodeDef* NodeMap::GetNode(const string& name) const { return it->second; } +bool NodeMap::NodeExists(const string& name) const { + const string node_name = NodeName(name); + return nodes_.find(node_name) != nodes_.end(); +} + const std::set& NodeMap::GetOutputs(const string& node_name) const { auto it = outputs_.find(node_name); if (it == outputs_.end()) { @@ -59,27 +71,27 @@ const std::set& NodeMap::GetOutputs(const string& node_name) const { return it->second; } -void NodeMap::AddNode(const string& name, NodeDef* node) { - auto ret = nodes_.insert(std::make_pair(name, node)); - CHECK(ret.second) << "Pair (" << name << "," << node +void NodeMap::AddNode(const string& node_name, NodeDef* node) { + auto ret = nodes_.emplace(node_name, CHECK_NOTNULL(node)); + CHECK(ret.second) << "Pair (" << node_name << "," << node << ") is not inserted because the same key already exists."; } void NodeMap::AddOutput(const string& node_name, const string& output_name) { - auto output_node = nodes_[output_name]; + auto output_node = nodes_[NodeName(output_name)]; CHECK(output_node) << "Output node " << output_name << " is missing in NodeMap."; outputs_[node_name].insert(output_node); } void NodeMap::RemoveOutput(const string& node_name, const string& output_name) { - outputs_[node_name].erase(nodes_[output_name]); + outputs_[node_name].erase(nodes_[NodeName(output_name)]); } void NodeMap::UpdateInput(const string& node_name, const string& old_input_name, const string& new_input_name) { - RemoveOutput(old_input_name, node_name); - AddOutput(new_input_name, node_name); + RemoveOutput(NodeName(old_input_name), node_name); + AddOutput(NodeName(new_input_name), node_name); } void NodeMap::RemoveInputs(const string& node_name) { @@ -97,14 +109,14 @@ void NodeMap::UpdateOutput(const string& node_name, const string& old_output_name, const string& new_output_name) { std::set& outputs = outputs_[node_name]; - outputs.erase(nodes_[old_output_name]); - outputs.insert(nodes_[new_output_name]); + outputs.erase(nodes_[NodeName(old_output_name)]); + outputs.insert(nodes_[NodeName(new_output_name)]); } OutputMap::OutputMap(GraphDef* graph) : graph_(graph) { for (int i = 0; i < graph_->node_size(); i++) { auto node = graph_->mutable_node(i); - auto rslt = nodes_.insert(std::make_pair(node->name(), node)); + auto rslt = nodes_.emplace(node->name(), node); // Check that the graph doesn't contain multiple nodes with the same name. CHECK(rslt.second); for (const auto& input : node->input()) { @@ -250,8 +262,8 @@ int NumOutputs(const NodeDef& node) { int NumNonControlInputs(const NodeDef& node) { int num_inputs = node.input_size(); - for (int i = 0; i < node.input_size(); ++i) { - if (IsControlInput(node.input(i))) { + for (const string& input : node.input()) { + if (IsControlInput(input)) { --num_inputs; } } @@ -261,8 +273,11 @@ int NumNonControlInputs(const NodeDef& node) { int NumNonControlOutputs(const NodeDef& node, const NodeMap& node_map) { int num_outputs = 0; for (const NodeDef* output : node_map.GetOutputs(node.name())) { - for (const string& input : output->input()) { - if (input == node.name()) { + for (const string& node_as_input : output->input()) { + if (IsControlInput(node_as_input)) { + break; + } + if (NodeName(node_as_input) == node.name()) { ++num_outputs; } } @@ -288,13 +303,16 @@ NodeDef* GetTailOfChain(const NodeDef& source, const NodeMap& node_map, const std::function& pred_fn) { const NodeDef* current = &source; const NodeDef* next = current; - while (next == &source || pred_fn(*next)) { + while (next == &source || (next != nullptr && pred_fn(*next))) { current = next; if (current->input_size() == 0 || (!follow_control_input && IsControlInput(current->input(0)))) { break; } next = node_map.GetNode(current->input(0)); + if (next == nullptr) { + LOG(ERROR) << "Node not found: " << current->input(0); + } } return const_cast(current); } diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h index b98b8656e2..411e44d487 100644 --- a/tensorflow/core/grappler/utils.h +++ b/tensorflow/core/grappler/utils.h @@ -33,12 +33,16 @@ namespace grappler { // A utility class to lookup a node and its outputs by node name. class NodeMap { public: + // Note: The NodeMap will store pointers to nodes in graph, which may become + // invalid if graph is changed. explicit NodeMap(GraphDef* graph); NodeDef* GetNode(const string& name) const; + bool NodeExists(const string& name) const; const std::set& GetOutputs(const string& node_name) const; // This method doesn't record the outputs of the added node; the outputs need // to be explicitly added by the AddOutput method. void AddNode(const string& name, NodeDef* node); + void RemoveNode(const string& name); void UpdateInput(const string& node_name, const string& old_input_name, const string& new_input_name); void AddOutput(const string& node_name, const string& output_name); @@ -49,8 +53,7 @@ class NodeMap { const string& new_output_name); private: - GraphDef* graph_; - std::set empty_set_; + const std::set empty_set_; std::unordered_map nodes_; std::unordered_map> outputs_; }; -- GitLab From 148f157bb89d33db123c9519e94c2781ca3488c9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 17:41:24 -0800 Subject: [PATCH 0858/1801] Fix docs to recommend cuDNN 6.0, rather than the old 5.1 or non-existent 6.1. Also see #14805 PiperOrigin-RevId: 177097162 --- tensorflow/docs_src/install/install_sources.md | 2 +- tensorflow/docs_src/install/install_windows.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index dbc90e8112..c01aa907a3 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -138,7 +138,7 @@ The following NVIDIA software must be installed on your system: `LD_LIBRARY_PATH` environment variable as described in the NVIDIA documentation. * The NVIDIA drivers associated with NVIDIA's Cuda Toolkit. - * cuDNN (>= v3). We recommend version 5.1. For details, see + * cuDNN (>= v3). We recommend version 6.0. For details, see [NVIDIA's documentation](https://developer.nvidia.com/cudnn), particularly the description of appending the appropriate pathname to your `LD_LIBRARY_PATH` environment variable. diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md index 4098ee5b2e..63742828b0 100644 --- a/tensorflow/docs_src/install/install_windows.md +++ b/tensorflow/docs_src/install/install_windows.md @@ -36,7 +36,7 @@ installed on your system: Ensure that you append the relevant Cuda pathnames to the `%PATH%` environment variable as described in the NVIDIA documentation. * The NVIDIA drivers associated with CUDA Toolkit 8.0. - * cuDNN v6.1. For details, see + * cuDNN v6.0. For details, see [NVIDIA's documentation](https://developer.nvidia.com/cudnn). Note that cuDNN is typically installed in a different location from the other CUDA DLLs. Ensure that you add the directory where you installed -- GitLab From 446f8fd6e93199838c087b6320cbb9aa7834fd53 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 18:26:14 -0800 Subject: [PATCH 0859/1801] Fixed code for Adadelta to match correct algorithm and tightened tolerances in test to catch this problem in the future. The previous code was incorrect because it did not account for the lazy evaluation caused by "update" being declared as "const auto" (and thus using the current value of "accum_update_" at each point where it was used in the code). PiperOrigin-RevId: 177101858 --- tensorflow/core/kernels/training_ops.cc | 6 +++--- tensorflow/core/kernels/training_ops_gpu.cu.cc | 2 +- tensorflow/python/training/adadelta_test.py | 11 +++++------ 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc index 76c30c5a46..b8d601389b 100644 --- a/tensorflow/core/kernels/training_ops.cc +++ b/tensorflow/core/kernels/training_ops.cc @@ -76,9 +76,9 @@ struct ApplyAdadelta { accum * rho() + grad.square() * (static_cast(1) - rho()); const auto update = (accum_update + epsilon()).sqrt() * (accum + epsilon()).rsqrt() * grad; + var.device(d) -= update * lr(); accum_update.device(d) = accum_update * rho() + update.square() * (static_cast(1) - rho()); - var.device(d) -= update * lr(); } }; @@ -784,11 +784,11 @@ class SparseApplyAdadeltaOp : public OpKernel { const auto update = (accum_update_ + accum_update_.constant(epsilon_scalar)).sqrt() * (accum_ + accum_.constant(epsilon_scalar)).rsqrt() * grad_; + auto v = var_flat.template chip<0>(index); + v -= update * update.constant(lr_scalar); accum_update_ = accum_update_ * accum_update_.constant(rho_scalar) + update.square() * update.constant(static_cast(1) - rho_scalar); - auto v = var_flat.template chip<0>(index); - v -= update * update.constant(lr_scalar); } } if (use_exclusive_lock_) { diff --git a/tensorflow/core/kernels/training_ops_gpu.cu.cc b/tensorflow/core/kernels/training_ops_gpu.cu.cc index f501161095..d443a6b3c1 100644 --- a/tensorflow/core/kernels/training_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/training_ops_gpu.cu.cc @@ -70,11 +70,11 @@ struct ApplyAdadelta { const auto update = (accum_update + epsilon.reshape(single).broadcast(bcast)).sqrt() * (accum + epsilon.reshape(single).broadcast(bcast)).rsqrt() * grad; + var.device(d) -= update * lr.reshape(single).broadcast(bcast); accum_update.device(d) = accum_update * rho.reshape(single).broadcast(bcast) + update.square() * (grad.constant(T(1)) - rho.reshape(single).broadcast(bcast)); - var.device(d) -= update * lr.reshape(single).broadcast(bcast); } }; diff --git a/tensorflow/python/training/adadelta_test.py b/tensorflow/python/training/adadelta_test.py index de59768d0b..50f435236b 100644 --- a/tensorflow/python/training/adadelta_test.py +++ b/tensorflow/python/training/adadelta_test.py @@ -112,17 +112,16 @@ class AdadeltaOptimizerTest(test.TestCase): # Check that the accumulators have been updated for slot_idx in range(2): self.assertAllCloseAccordingToType( - np.array( - [accum, accum], dtype=dtype.as_numpy_dtype()), + np.array([accum, accum], dtype=dtype.as_numpy_dtype()), slot[slot_idx].eval(), - rtol=1e-3) + rtol=1e-5) self.assertAllCloseAccordingToType( np.array( [accum_update, accum_update], dtype=dtype.as_numpy_dtype()), slot_update[slot_idx].eval(), - rtol=1e-3) + rtol=1e-5) # Check that the parameters have been updated self.assertAllCloseAccordingToType( @@ -130,14 +129,14 @@ class AdadeltaOptimizerTest(test.TestCase): [var0_init[0] - tot_update, var0_init[1] - tot_update], dtype=dtype.as_numpy_dtype()), var0.eval(), - rtol=1e-3) + rtol=1e-5) self.assertAllCloseAccordingToType( np.array( [var1_init[0] - tot_update, var1_init[1] - tot_update], dtype=dtype.as_numpy_dtype()), var1.eval(), - rtol=1e-3) + rtol=1e-5) def testBasic(self): self.doTestBasic(use_resource=False) -- GitLab From f4a33d1c142475da42ad9812c5f0cab7704cb275 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 18:54:57 -0800 Subject: [PATCH 0860/1801] Remove old pre-C++11 non-portable helper code. With C++11, vector::data is guaranteed to return a valid pointer p regardless of the range size such that [p, p + size) is a valid range, and basic_string always contains a null terminator. Current consuming code never checks for the nullness (which already isn't guaranteed for vector by the current implementation) and passes the size separately to the eventual consumer. PiperOrigin-RevId: 177104032 --- tensorflow/core/lib/gtl/stl_util.h | 49 ++++++++---------------------- 1 file changed, 12 insertions(+), 37 deletions(-) diff --git a/tensorflow/core/lib/gtl/stl_util.h b/tensorflow/core/lib/gtl/stl_util.h index cda72a579d..ffeca4e88a 100644 --- a/tensorflow/core/lib/gtl/stl_util.h +++ b/tensorflow/core/lib/gtl/stl_util.h @@ -29,48 +29,23 @@ limitations under the License. namespace tensorflow { namespace gtl { -// Returns a mutable char* pointing to a string's internal buffer, which may not -// be null-terminated. Returns NULL for an empty string. If not non-null, -// writing through this pointer will modify the string. -// -// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the -// next call to a string method that invalidates iterators. -// -// In C++11 you may simply use &str[0] to get a mutable char*. -// -// Prior to C++11, there was no standard-blessed way of getting a mutable -// reference to a string's internal buffer. The requirement that string be -// contiguous is officially part of the C++11 standard [string.require]/5. -// According to Matt Austern, this should already work on all current C++98 -// implementations. -inline char* string_as_array(string* str) { - return str->empty() ? NULL : &*str->begin(); -} - -// Returns the T* array for the given vector, or NULL if the vector was empty. -// -// Note: If you know the array will never be empty, you can use &*v.begin() -// directly, but that is may dump core if v is empty. This function is the most -// efficient code that will work, taking into account how our STL is actually -// implemented. THIS IS NON-PORTABLE CODE, so use this function instead of -// repeating the nonportable code everywhere. If our STL implementation changes, -// we will need to change this as well. +// Returns a char* pointing to the beginning of a string's internal buffer. +// The result is a valid "null-terminated byte string", even if *str is empty. +// Up to C++14 it is not valid to *write* to the null terminator; as of C++17, +// it is valid to write zero to the null terminator (but not any other value). +inline char* string_as_array(string* str) { return &*str->begin(); } + +// The following vector_as_array functions return raw pointers to the underlying +// data buffer. The return value is unspecified (but valid) if the input range +// is empty. template inline T* vector_as_array(std::vector* v) { -#if defined NDEBUG && !defined _GLIBCXX_DEBUG - return &*v->begin(); -#else - return v->empty() ? NULL : &*v->begin(); -#endif + return v->data(); } -// vector_as_array overload for const std::vector<>. + template inline const T* vector_as_array(const std::vector* v) { -#if defined NDEBUG && !defined _GLIBCXX_DEBUG - return &*v->begin(); -#else - return v->empty() ? NULL : &*v->begin(); -#endif + return v->data(); } // Like str->resize(new_size), except any new characters added to "*str" as a -- GitLab From 28ee7877a9df4e66f81d31d48b067091726c71ab Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 19:10:43 -0800 Subject: [PATCH 0861/1801] Disable Mul hoisting out of AddN where broadcasting might be needed. PiperOrigin-RevId: 177105161 --- tensorflow/core/grappler/op_types.cc | 4 +++- tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index 83188ffc0d..15fcaa857e 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -24,7 +24,9 @@ limitations under the License. namespace tensorflow { namespace grappler { -bool IsAdd(const NodeDef& node) { return node.op() == "Add"; } +bool IsAdd(const NodeDef& node) { + return node.op() == "Add" || node.op() == "AddV2"; +} bool IsAddN(const NodeDef& node) { return node.op() == "AddN"; } diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index eaf5f1f5cf..1e39c610a4 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -868,7 +868,8 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( // multiplication over addition to hoist common factors out of aggregate nodes // where all the inputs are Mul nodes. This pattern occurs frequently in // regularization terms for the gradients during training. - if (IsAggregate(*node) && NumNonControlInputs(*node) > 1 && + // TODO(rmlarsen): Check shapes and enable for AddN. + if (IsAdd(*node) && NumNonControlInputs(*node) > 1 && !OptimizedNodeExists(StrCat(node->name(), "_hoist_add"))) { // Determine the set of common factors if the input nodes are all Mul nodes. std::set common_factors; -- GitLab From e02be0161393d255d8e1ade54c2a885b9695beab Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Mon, 27 Nov 2017 19:40:45 -0800 Subject: [PATCH 0862/1801] Fix the flaky file_block_cache_test. PiperOrigin-RevId: 177106823 --- tensorflow/core/platform/cloud/file_block_cache_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/platform/cloud/file_block_cache_test.cc b/tensorflow/core/platform/cloud/file_block_cache_test.cc index 2a9eb7d524..081b32af64 100644 --- a/tensorflow/core/platform/cloud/file_block_cache_test.cc +++ b/tensorflow/core/platform/cloud/file_block_cache_test.cc @@ -461,7 +461,7 @@ TEST(FileBlockCacheTest, CoalesceConcurrentReads) { TF_EXPECT_OK(cache.Read("", 0, block_size / 2, &out)); EXPECT_EQ(out.size(), block_size / 2); })); - EXPECT_TRUE(WaitForNotificationWithTimeout(¬ification, 1000)) + EXPECT_TRUE(WaitForNotificationWithTimeout(¬ification, 10000)) << "Timeout waiting for concurrent thread to start."; std::vector out; TF_EXPECT_OK(cache.Read("", block_size / 2, block_size / 2, &out)); -- GitLab From 587dbb404318039c37d7587b6ac5d044504d0ad1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 19:48:53 -0800 Subject: [PATCH 0863/1801] [XLA] Remove the extra RunHloOptimization in InterpreterCompiler::RunBackend. PiperOrigin-RevId: 177107209 --- tensorflow/compiler/xla/service/interpreter/compiler.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc index c9a5285a4f..dc63a2224d 100644 --- a/tensorflow/compiler/xla/service/interpreter/compiler.cc +++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc @@ -83,8 +83,6 @@ StatusOr> InterpreterCompiler::RunBackend( VLOG(1) << "Run backend " << hlo_module->name(); - TF_RETURN_IF_ERROR(RunHloOptimization(hlo_module.get())); - // Typically you would visit the HLO graph, building up a compiled equivalent // In this case we are using an HloEvaluator at execution time, so we don't // need to compile anything -- GitLab From a8a923b3be645bad6cd08c7d80a148ebbaf47445 Mon Sep 17 00:00:00 2001 From: Sergio Guadarrama Date: Mon, 27 Nov 2017 20:06:25 -0800 Subject: [PATCH 0864/1801] Add non_trainable_variables property to EagerVariableStore. PiperOrigin-RevId: 177108237 --- tensorflow/python/kernel_tests/variable_scope_test.py | 2 ++ tensorflow/python/ops/variable_scope.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py index 5396214956..70fe0a4785 100644 --- a/tensorflow/python/kernel_tests/variable_scope_test.py +++ b/tensorflow/python/kernel_tests/variable_scope_test.py @@ -128,6 +128,8 @@ class VariableScopeTest(test.TestCase): self.assertTrue(w in store.variables()) self.assertTrue(v in store.trainable_variables()) self.assertFalse(w in store.trainable_variables()) + self.assertFalse(v in store.non_trainable_variables()) + self.assertTrue(w in store.non_trainable_variables()) @test_util.run_in_graph_and_eager_modes() def testInitFromNonTensorValue(self): diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index dd435249f4..ae2d46a2b7 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -1233,6 +1233,12 @@ class EagerVariableStore(object): key=lambda x: x.name) # pylint: enable=protected-access + def non_trainable_variables(self): + # pylint: disable=protected-access + return sorted([x for x in self._store._vars.values() if not x._trainable], + key=lambda x: x.name) + # pylint: enable=protected-access + def get_variable(name, shape=None, -- GitLab From 119e3a18ce480b7f808638a2821de1d935f2df8f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 20:28:58 -0800 Subject: [PATCH 0865/1801] Make ClientLibraryTestBase automatic choose float precision based on a flag. PiperOrigin-RevId: 177109696 --- tensorflow/compiler/xla/reference_util.cc | 133 ---------------- tensorflow/compiler/xla/reference_util.h | 146 ++++++++++++++++-- .../xla/tests/client_library_test_base.cc | 87 ++++++++--- .../xla/tests/client_library_test_base.h | 49 +++++- .../compiler/xla/tests/literal_test_util.cc | 32 ++++ .../compiler/xla/tests/literal_test_util.h | 6 + 6 files changed, 289 insertions(+), 164 deletions(-) diff --git a/tensorflow/compiler/xla/reference_util.cc b/tensorflow/compiler/xla/reference_util.cc index 90aa9720a1..5a899d550b 100644 --- a/tensorflow/compiler/xla/reference_util.cc +++ b/tensorflow/compiler/xla/reference_util.cc @@ -703,137 +703,4 @@ ReferenceUtil::ReduceToRowArray2D( return result; } -/* static */ std::unique_ptr> ReferenceUtil::PadArray2D( - const Array2D& operand, const PaddingConfig& padding, - const float pad) { - int64 in0 = operand.n1(); - int64 high_padding0 = padding.dimensions(0).edge_padding_high(); - int64 low_padding0 = padding.dimensions(0).edge_padding_low(); - int64 interior_padding0 = padding.dimensions(0).interior_padding(); - int64 out0 = - in0 + low_padding0 + high_padding0 + (in0 - 1) * interior_padding0; - - int64 in1 = operand.n2(); - int64 high_padding1 = padding.dimensions(1).edge_padding_high(); - int64 low_padding1 = padding.dimensions(1).edge_padding_low(); - int64 interior_padding1 = padding.dimensions(1).interior_padding(); - int64 out1 = - in1 + low_padding1 + high_padding1 + (in1 - 1) * interior_padding1; - - auto result = MakeUnique>(out0, out1); - result->Fill(pad); - int64 o0 = low_padding0; - for (int64 i0 = 0; i0 < in0; ++i0) { - int64 o1 = low_padding1; - for (int64 i1 = 0; i1 < in1; ++i1) { - if (o0 >= 0 && o1 >= 0 && o0 < out0 && o1 < out1) { - (*result)(o0, o1) = operand(i0, i1); - } - o1 += interior_padding1 + 1; - } - o0 += interior_padding0 + 1; - } - return result; -} - -/* static */ Array3D ReferenceUtil::PadArray3D( - const Array3D& operand, const PaddingConfig& padding, - const float pad) { - CHECK_EQ(padding.dimensions_size(), 3); - - const std::vector input_bounds = {operand.n1(), operand.n2(), - operand.n3()}; - std::vector pad_low(3); - std::vector pad_high(3); - std::vector pad_interior(3); - std::vector output_bounds(3); - for (int64 i = 0; i < 3; ++i) { - pad_low[i] = padding.dimensions(i).edge_padding_low(); - pad_high[i] = padding.dimensions(i).edge_padding_high(); - CHECK_LE(0, pad_low[i]); - CHECK_LE(0, pad_high[i]); - CHECK_LE(0, padding.dimensions(i).interior_padding()) << "not implemented"; - pad_interior[i] = padding.dimensions(i).interior_padding(); - - output_bounds[i] = pad_low[i] + input_bounds[i] + pad_high[i] + - (input_bounds[i] - 1) * pad_interior[i]; - } - - Array3D result(output_bounds[0], output_bounds[1], output_bounds[2]); - std::vector indices = {0, 0, 0}; - for (indices[0] = 0; indices[0] < output_bounds[0]; ++indices[0]) { - for (indices[1] = 0; indices[1] < output_bounds[1]; ++indices[1]) { - for (indices[2] = 0; indices[2] < output_bounds[2]; ++indices[2]) { - float* value = &result(indices[0], indices[1], indices[2]); - bool value_padded = false; - for (int i = 0; i < 3; ++i) { - bool in_low_padding = indices[i] < pad_low[i]; - bool in_high_padding = indices[i] >= output_bounds[i] - pad_high[i]; - if (in_low_padding || in_high_padding) { - *value = pad; - value_padded = true; - } - if (pad_interior[i] && - (indices[i] - pad_low[i]) % (pad_interior[i] + 1)) { - *value = pad; - value_padded = true; - } - } - if (value_padded) { - continue; - } - *value = operand((indices[0] - pad_low[0]) / (pad_interior[0] + 1), - (indices[1] - pad_low[1]) / (pad_interior[1] + 1), - (indices[2] - pad_low[2]) / (pad_interior[2] + 1)); - } - } - } - return result; -} - -/* static */ Array4D ReferenceUtil::PadArray4D( - const Array4D& operand, const PaddingConfig& padding, - const float pad) { - CHECK_EQ(padding.dimensions_size(), 4); - - const std::vector input_bounds = {operand.n1(), operand.n2(), - operand.n3(), operand.n4()}; - std::vector pad_low(4); - std::vector pad_high(4); - std::vector pad_interior(4); - std::vector output_bounds(4); - for (int64 i = 0; i < 4; ++i) { - pad_low[i] = padding.dimensions(i).edge_padding_low(); - pad_high[i] = padding.dimensions(i).edge_padding_high(); - CHECK_LE(0, padding.dimensions(i).interior_padding()) << "not implemented"; - pad_interior[i] = padding.dimensions(i).interior_padding(); - - output_bounds[i] = pad_low[i] + input_bounds[i] + pad_high[i] + - (input_bounds[i] - 1) * pad_interior[i]; - } - - Array4D result(output_bounds[0], output_bounds[1], output_bounds[2], - output_bounds[3]); - result.Each([&](tensorflow::gtl::ArraySlice indices, float* value) { - for (int i = 0; i < 4; ++i) { - bool in_low_padding = indices[i] < pad_low[i]; - bool in_high_padding = indices[i] >= output_bounds[i] - pad_high[i]; - if (in_low_padding || in_high_padding) { - *value = pad; - return; - } - if (pad_interior[i] && - (indices[i] - pad_low[i]) % (pad_interior[i] + 1)) { - *value = pad; - return; - } - } - *value = operand((indices[0] - pad_low[0]) / (pad_interior[0] + 1), - (indices[1] - pad_low[1]) / (pad_interior[1] + 1), - (indices[2] - pad_low[2]) / (pad_interior[2] + 1), - (indices[3] - pad_low[3]) / (pad_interior[3] + 1)); - }); - return result; -} - } // namespace xla diff --git a/tensorflow/compiler/xla/reference_util.h b/tensorflow/compiler/xla/reference_util.h index 2da1730781..62d455d71a 100644 --- a/tensorflow/compiler/xla/reference_util.h +++ b/tensorflow/compiler/xla/reference_util.h @@ -486,19 +486,147 @@ class ReferenceUtil { } // Returns the result of a 2D pad on an input matrix. - static std::unique_ptr> PadArray2D( - const Array2D& operand, const PaddingConfig& padding, - const float pad); + template + static std::unique_ptr> PadArray2D( + const Array2D& operand, const PaddingConfig& padding, + const NativeT pad) { + int64 in0 = operand.n1(); + int64 high_padding0 = padding.dimensions(0).edge_padding_high(); + int64 low_padding0 = padding.dimensions(0).edge_padding_low(); + int64 interior_padding0 = padding.dimensions(0).interior_padding(); + int64 out0 = + in0 + low_padding0 + high_padding0 + (in0 - 1) * interior_padding0; + + int64 in1 = operand.n2(); + int64 high_padding1 = padding.dimensions(1).edge_padding_high(); + int64 low_padding1 = padding.dimensions(1).edge_padding_low(); + int64 interior_padding1 = padding.dimensions(1).interior_padding(); + int64 out1 = + in1 + low_padding1 + high_padding1 + (in1 - 1) * interior_padding1; + + auto result = MakeUnique>(out0, out1); + result->Fill(pad); + int64 o0 = low_padding0; + for (int64 i0 = 0; i0 < in0; ++i0) { + int64 o1 = low_padding1; + for (int64 i1 = 0; i1 < in1; ++i1) { + if (o0 >= 0 && o1 >= 0 && o0 < out0 && o1 < out1) { + (*result)(o0, o1) = operand(i0, i1); + } + o1 += interior_padding1 + 1; + } + o0 += interior_padding0 + 1; + } + return result; + } // Returns the result of a 3D pad on an input matrix. - static Array3D PadArray3D(const Array3D& operand, - const PaddingConfig& padding, - const float pad); + template + static Array3D PadArray3D(const Array3D& operand, + const PaddingConfig& padding, + const NativeT pad) { + CHECK_EQ(padding.dimensions_size(), 3); + + const std::vector input_bounds = {operand.n1(), operand.n2(), + operand.n3()}; + std::vector pad_low(3); + std::vector pad_high(3); + std::vector pad_interior(3); + std::vector output_bounds(3); + for (int64 i = 0; i < 3; ++i) { + pad_low[i] = padding.dimensions(i).edge_padding_low(); + pad_high[i] = padding.dimensions(i).edge_padding_high(); + CHECK_LE(0, pad_low[i]); + CHECK_LE(0, pad_high[i]); + CHECK_LE(0, padding.dimensions(i).interior_padding()) + << "not implemented"; + pad_interior[i] = padding.dimensions(i).interior_padding(); + + output_bounds[i] = pad_low[i] + input_bounds[i] + pad_high[i] + + (input_bounds[i] - 1) * pad_interior[i]; + } + + Array3D result(output_bounds[0], output_bounds[1], + output_bounds[2]); + std::vector indices = {0, 0, 0}; + for (indices[0] = 0; indices[0] < output_bounds[0]; ++indices[0]) { + for (indices[1] = 0; indices[1] < output_bounds[1]; ++indices[1]) { + for (indices[2] = 0; indices[2] < output_bounds[2]; ++indices[2]) { + NativeT* value = &result(indices[0], indices[1], indices[2]); + bool value_padded = false; + for (int i = 0; i < 3; ++i) { + bool in_low_padding = indices[i] < pad_low[i]; + bool in_high_padding = indices[i] >= output_bounds[i] - pad_high[i]; + if (in_low_padding || in_high_padding) { + *value = pad; + value_padded = true; + } + if (pad_interior[i] && + (indices[i] - pad_low[i]) % (pad_interior[i] + 1)) { + *value = pad; + value_padded = true; + } + } + if (value_padded) { + continue; + } + *value = operand((indices[0] - pad_low[0]) / (pad_interior[0] + 1), + (indices[1] - pad_low[1]) / (pad_interior[1] + 1), + (indices[2] - pad_low[2]) / (pad_interior[2] + 1)); + } + } + } + return result; + } // Returns the result of a 4D pad on an input array. - static Array4D PadArray4D(const Array4D& operand, - const PaddingConfig& padding, - const float pad); + template + static Array4D PadArray4D(const Array4D& operand, + const PaddingConfig& padding, + const NativeT pad) { + CHECK_EQ(padding.dimensions_size(), 4); + + const std::vector input_bounds = {operand.n1(), operand.n2(), + operand.n3(), operand.n4()}; + std::vector pad_low(4); + std::vector pad_high(4); + std::vector pad_interior(4); + std::vector output_bounds(4); + for (int64 i = 0; i < 4; ++i) { + pad_low[i] = padding.dimensions(i).edge_padding_low(); + pad_high[i] = padding.dimensions(i).edge_padding_high(); + CHECK_LE(0, padding.dimensions(i).interior_padding()) + << "not implemented"; + pad_interior[i] = padding.dimensions(i).interior_padding(); + + output_bounds[i] = pad_low[i] + input_bounds[i] + pad_high[i] + + (input_bounds[i] - 1) * pad_interior[i]; + } + + Array4D result(output_bounds[0], output_bounds[1], + output_bounds[2], output_bounds[3]); + result.Each( + [&](tensorflow::gtl::ArraySlice indices, NativeT* value) { + for (int i = 0; i < 4; ++i) { + bool in_low_padding = indices[i] < pad_low[i]; + bool in_high_padding = indices[i] >= output_bounds[i] - pad_high[i]; + if (in_low_padding || in_high_padding) { + *value = pad; + return; + } + if (pad_interior[i] && + (indices[i] - pad_low[i]) % (pad_interior[i] + 1)) { + *value = pad; + return; + } + } + *value = operand((indices[0] - pad_low[0]) / (pad_interior[0] + 1), + (indices[1] - pad_low[1]) / (pad_interior[1] + 1), + (indices[2] - pad_low[2]) / (pad_interior[2] + 1), + (indices[3] - pad_low[3]) / (pad_interior[3] + 1)); + }); + return result; + } // ApplyElementwise2D(f, x, y, ...) returns the Array2D formed by running // f(x[i], y[i], ...) for each array element in the Array2Ds x, y, .... diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.cc b/tensorflow/compiler/xla/tests/client_library_test_base.cc index ef54714e46..15bd273e9b 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.cc +++ b/tensorflow/compiler/xla/tests/client_library_test_base.cc @@ -262,20 +262,34 @@ tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus( expected.shape().element_type() == PRED) << ShapeUtil::HumanString(expected.shape()); } + // We allow using a float expected literal for a bfloat16 output. In this + // case, we need to convert the expected literal to bfloat16. + const Literal* expected_ptr = &expected; + std::unique_ptr converted_expected; + Shape layout_shape; + if (expected.shape().element_type() == F32 && use_bfloat16_) { + converted_expected = LiteralTestUtil::ConvertF32ToBF16(expected); + expected_ptr = converted_expected.get(); + if (shape_with_layout != nullptr) { + layout_shape = *shape_with_layout; + layout_shape.set_element_type(BF16); + shape_with_layout = &layout_shape; + } + } auto expect_equal = [&](const Literal& actual, const string& error_message) { - LiteralTestUtil::ExpectEqual(expected, actual, error_message); + LiteralTestUtil::ExpectEqual(*expected_ptr, actual, error_message); }; if (execution_options_.debug_options().xla_test_all_output_layouts()) { return ComputeAndCompareLiteralWithAllOutputLayouts( - computation, expected, arguments, expect_equal); + computation, *expected_ptr, arguments, expect_equal); } if (execution_options_.debug_options().xla_test_all_input_layouts()) { return ComputeAndCompareLiteralWithAllInputLayouts( - computation, expected, arguments, expect_equal, shape_with_layout); + computation, *expected_ptr, arguments, expect_equal, shape_with_layout); } TF_ASSIGN_OR_RETURN(auto actual, ExecuteAndTransfer(computation, arguments, shape_with_layout)); - LiteralTestUtil::ExpectEqual(expected, *actual); + LiteralTestUtil::ExpectEqual(*expected_ptr, *actual); return tensorflow::Status::OK(); } @@ -286,20 +300,35 @@ tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus( TF_RET_CHECK(ShapeUtil::ElementIsFloating(expected.shape()) || ShapeUtil::ElementIsComplex(expected.shape())); TF_ASSIGN_OR_RETURN(auto computation, builder->Build()); + // We allow using a float expected literal for a bfloat16 output. In this + // case, we need to convert the expected literal to bfloat16. + const Literal* expected_ptr = &expected; + std::unique_ptr converted_expected; + Shape layout_shape; + if (expected.shape().element_type() == F32 && use_bfloat16_) { + converted_expected = LiteralTestUtil::ConvertF32ToBF16(expected); + expected_ptr = converted_expected.get(); + layout_shape.set_element_type(BF16); + if (shape_with_layout != nullptr) { + layout_shape = *shape_with_layout; + layout_shape.set_element_type(BF16); + shape_with_layout = &layout_shape; + } + } auto expect_near = [&](const Literal& actual, const string& error_message) { - LiteralTestUtil::ExpectNear(expected, actual, error, error_message); + LiteralTestUtil::ExpectNear(*expected_ptr, actual, error, error_message); }; if (execution_options_.debug_options().xla_test_all_output_layouts()) { - return ComputeAndCompareLiteralWithAllOutputLayouts(computation, expected, - arguments, expect_near); + return ComputeAndCompareLiteralWithAllOutputLayouts( + computation, *expected_ptr, arguments, expect_near); } if (execution_options_.debug_options().xla_test_all_input_layouts()) { return ComputeAndCompareLiteralWithAllInputLayouts( - computation, expected, arguments, expect_near, shape_with_layout); + computation, *expected_ptr, arguments, expect_near, shape_with_layout); } TF_ASSIGN_OR_RETURN(auto actual, ExecuteAndTransfer(computation, arguments, shape_with_layout)); - LiteralTestUtil::ExpectNear(expected, *actual, error); + LiteralTestUtil::ExpectNear(*expected_ptr, *actual, error); return tensorflow::Status::OK(); } @@ -402,8 +431,11 @@ ClientLibraryTestBase::ComputeValueAndReference( Computation ClientLibraryTestBase::CreateScalarRelu() { ComputationBuilder builder(client_, "relu"); - auto z_value = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "z_value"); - auto zero = builder.ConstantR0(0.0); + auto shape = ShapeUtil::MakeShape(use_bfloat16_ ? BF16 : F32, {}); + auto z_value = builder.Parameter(0, shape, "z_value"); + auto zero = use_bfloat16_ + ? builder.ConstantR0(static_cast(0.0f)) + : builder.ConstantR0(0.0f); builder.Max(z_value, zero); auto computation_status = builder.Build(); TF_CHECK_OK(computation_status.status()); @@ -412,8 +444,9 @@ Computation ClientLibraryTestBase::CreateScalarRelu() { Computation ClientLibraryTestBase::CreateScalarMax() { ComputationBuilder builder(client_, "max"); - auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); - auto y = builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "y"); + auto shape = ShapeUtil::MakeShape(use_bfloat16_ ? BF16 : F32, {}); + auto x = builder.Parameter(0, shape, "x"); + auto y = builder.Parameter(1, shape, "y"); builder.Max(x, y); auto computation_status = builder.Build(); TF_CHECK_OK(computation_status.status()); @@ -422,11 +455,12 @@ Computation ClientLibraryTestBase::CreateScalarMax() { Computation ClientLibraryTestBase::CreateScalarReluSensitivity() { ComputationBuilder builder(client_, "relu_sensitivity"); - auto activation = - builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "activation"); - auto backprop = - builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "backprop"); - auto zero = builder.ConstantR0(0.0); + auto shape = ShapeUtil::MakeShape(use_bfloat16_ ? BF16 : F32, {}); + auto activation = builder.Parameter(0, shape, "activation"); + auto backprop = builder.Parameter(1, shape, "backprop"); + auto zero = use_bfloat16_ + ? builder.ConstantR0(static_cast(0.0f)) + : builder.ConstantR0(0.0f); auto activation_gtz = builder.Gt(activation, zero); builder.Select(activation_gtz, /*on_true=*/backprop, /*on_false=*/zero); @@ -461,4 +495,21 @@ ClientLibraryTestBase::CreatePatternedMatrixWithZeroPadding(int rows, int cols, return array; } +std::unique_ptr +ClientLibraryTestBase::CreateParameterAndTransferLiteral( + int64 parameter_number, const Literal& literal, const string& name, + ComputationBuilder* builder, ComputationDataHandle* data_handle) { + const Literal* param_literal = &literal; + std::unique_ptr converted_literal; + if (use_bfloat16_ && literal.shape().element_type() == F32) { + converted_literal = LiteralTestUtil::ConvertF32ToBF16(literal); + param_literal = converted_literal.get(); + } + std::unique_ptr data = + client_->TransferToServer(*param_literal).ConsumeValueOrDie(); + *data_handle = + builder->Parameter(parameter_number, param_literal->shape(), name); + return data; +} + } // namespace xla diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h index af22c12684..e8599a5cd3 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.h +++ b/tensorflow/compiler/xla/tests/client_library_test_base.h @@ -245,51 +245,76 @@ class ClientLibraryTestBase : public ::testing::Test { const int rows, const int cols, const int rows_padded, const int cols_padded); - // Create a parameter instruction that wraps a given value and then stores + // Creates a parameter instruction, transfers the literal for the parameter to + // server, then stores into "data_handle" the global handle for that + // parameter. When the use_bfloat16 flag is set but the literal has F32 + // elements, the literal will be converted to BF16 before being transferred. + std::unique_ptr CreateParameterAndTransferLiteral( + int64 parameter_number, const Literal& literal, const string& name, + ComputationBuilder* builder, ComputationDataHandle* data_handle); + + // Creates a parameter instruction that wraps a given value and then stores // into "data_handle" the global handle for that parameter. // // "parameter_number" is the parameter number. // "name" is the name of the parameter instruction. + // + // When the use_bfloat16 flag is set but NativeT is float, the data will be + // converted to bfloat16. template std::unique_ptr CreateR0Parameter( NativeT value, int64 parameter_number, const string& name, ComputationBuilder* builder, ComputationDataHandle* data_handle); - // Create a parameter instruction that wraps the given values and then stores + // Creates a parameter instruction that wraps the given values and then stores // into "data_handle" the global handle for that parameter. // // "parameter_number" is the parameter number. // "name" is the name of the parameter instruction. + // + // When the use_bfloat16 flag is set but NativeT is float, the data will be + // converted to bfloat16. template std::unique_ptr CreateR1Parameter( tensorflow::gtl::ArraySlice values, int64 parameter_number, const string& name, ComputationBuilder* builder, ComputationDataHandle* data_handle); - // Create a parameter instruction that wraps the given constant array + // Creates a parameter instruction that wraps the given constant array // "array_2d" and then stores to "data_handle" the global handle for that // parameter. // // "parameter_number" is the parameter number. // "name" is the name of the parameter instruction. + // + // When the use_bfloat16 flag is set but NativeT is float, the data will be + // converted to bfloat16. template std::unique_ptr CreateR2Parameter( const Array2D& array_2d, int64 parameter_number, const string& name, ComputationBuilder* builder, ComputationDataHandle* data_handle); - // Create a parameter instruction that wraps the given constant array + // Creates a parameter instruction that wraps the given constant array // "array_3d" and then stores to "data_handle" the global handle for that // parameter. // // "parameter_number" is the parameter number. // "name" is the name of the parameter instruction. + // + // When the use_bfloat16 flag is set but NativeT is float, the data will be + // converted to bfloat16. template std::unique_ptr CreateR3Parameter( const Array3D& array_3d, int64 parameter_number, const string& name, ComputationBuilder* builder, ComputationDataHandle* data_handle); + // Getter and setter for the use_bfloat16 flag, which indicates whether to run + // tests with all float-type input/output converted to bfloat16. + bool use_bfloat16() const { return use_bfloat16_; } + void set_use_bfloat16(bool value) { use_bfloat16_ = value; } + Client* client_; ExecutionOptions execution_options_; @@ -315,6 +340,10 @@ class ClientLibraryTestBase : public ::testing::Test { ComputeValueAndReference(ComputationBuilder* builder, const ComputationDataHandle& operand, tensorflow::gtl::ArraySlice arguments); + + // Whether to run tests with all float-type input/output converted to + // bfloat16. + bool use_bfloat16_ = false; }; template @@ -443,6 +472,9 @@ std::unique_ptr ClientLibraryTestBase::CreateR0Parameter( NativeT value, int64 parameter_number, const string& name, ComputationBuilder* builder, ComputationDataHandle* data_handle) { std::unique_ptr literal = Literal::CreateR0(value); + if (use_bfloat16_ && literal->shape().element_type() == F32) { + literal = LiteralTestUtil::ConvertF32ToBF16(*literal); + } std::unique_ptr data = client_->TransferToServer(*literal).ConsumeValueOrDie(); *data_handle = builder->Parameter(parameter_number, literal->shape(), name); @@ -455,6 +487,9 @@ std::unique_ptr ClientLibraryTestBase::CreateR1Parameter( const string& name, ComputationBuilder* builder, ComputationDataHandle* data_handle) { std::unique_ptr literal = Literal::CreateR1(values); + if (use_bfloat16_ && literal->shape().element_type() == F32) { + literal = LiteralTestUtil::ConvertF32ToBF16(*literal); + } std::unique_ptr data = client_->TransferToServer(*literal).ConsumeValueOrDie(); *data_handle = builder->Parameter(parameter_number, literal->shape(), name); @@ -467,6 +502,9 @@ std::unique_ptr ClientLibraryTestBase::CreateR2Parameter( const string& name, ComputationBuilder* builder, ComputationDataHandle* data_handle) { std::unique_ptr literal = Literal::CreateR2FromArray2D(array_2d); + if (use_bfloat16_ && literal->shape().element_type() == F32) { + literal = LiteralTestUtil::ConvertF32ToBF16(*literal); + } std::unique_ptr data = client_->TransferToServer(*literal).ConsumeValueOrDie(); *data_handle = builder->Parameter(parameter_number, literal->shape(), name); @@ -479,6 +517,9 @@ std::unique_ptr ClientLibraryTestBase::CreateR3Parameter( const string& name, ComputationBuilder* builder, ComputationDataHandle* data_handle) { std::unique_ptr literal = Literal::CreateR3FromArray3D(array_3d); + if (use_bfloat16_ && literal->shape().element_type() == F32) { + literal = LiteralTestUtil::ConvertF32ToBF16(*literal); + } std::unique_ptr data = client_->TransferToServer(*literal).ConsumeValueOrDie(); *data_handle = builder->Parameter(parameter_number, literal->shape(), name); diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc index 9ae5c7b6f0..6aa27e5470 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.cc +++ b/tensorflow/compiler/xla/tests/literal_test_util.cc @@ -100,6 +100,38 @@ namespace xla { ASSERT_EQ(expected.ShortDebugString(), actual.ShortDebugString()); } +/* static */ std::unique_ptr LiteralTestUtil::ConvertBF16ToF32( + const Literal& bf16_literal) { + CHECK_EQ(bf16_literal.shape().element_type(), BF16); + Shape converted_shape = bf16_literal.shape(); + converted_shape.set_element_type(F32); + auto converted = Literal::CreateFromShape(converted_shape); + if (!ShapeUtil::HasZeroElements(converted_shape)) { + std::vector index(converted_shape.dimensions_size(), 0); + do { + converted->Set( + index, static_cast(bf16_literal.Get(index))); + } while (IndexUtil::BumpIndices(converted_shape, &index)); + } + return converted; +} + +/* static */ std::unique_ptr LiteralTestUtil::ConvertF32ToBF16( + const Literal& f32_literal) { + CHECK_EQ(f32_literal.shape().element_type(), F32); + Shape converted_shape = f32_literal.shape(); + converted_shape.set_element_type(BF16); + auto converted = Literal::CreateFromShape(converted_shape); + if (!ShapeUtil::HasZeroElements(converted_shape)) { + std::vector index(converted_shape.dimensions_size(), 0); + do { + converted->Set( + index, static_cast(f32_literal.Get(index))); + } while (IndexUtil::BumpIndices(converted_shape, &index)); + } + return converted; +} + namespace { string Hostname() { diff --git a/tensorflow/compiler/xla/tests/literal_test_util.h b/tensorflow/compiler/xla/tests/literal_test_util.h index 467d44b857..6e4add2690 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.h +++ b/tensorflow/compiler/xla/tests/literal_test_util.h @@ -59,6 +59,12 @@ class LiteralTestUtil { static void AssertEqualShapesAndLayouts(const Shape& expected, const Shape& actual); + // Converts a bfloat16 literal to a float literal. + static std::unique_ptr ConvertBF16ToF32(const Literal& bf16_literal); + + // Converts a float literal to a bfloat16 literal. + static std::unique_ptr ConvertF32ToBF16(const Literal& f32_literal); + // Asserts that the expected and actual literals are (bitwise) equal for all // elements in the literal. Also, asserts that the rank, dimensions sizes, and // primitive type are equal. -- GitLab From 8781d69b2e619e64555cb00b13783a7eee524b81 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Nov 2017 21:48:38 -0800 Subject: [PATCH 0866/1801] Allow BF16 to use error spec. PiperOrigin-RevId: 177114689 --- tensorflow/compiler/xla/tests/client_library_test_base.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h index e8599a5cd3..1d27880fb1 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.h +++ b/tensorflow/compiler/xla/tests/client_library_test_base.h @@ -387,6 +387,7 @@ void ClientLibraryTestBase::ComputeAndCompareR1( tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { static_assert(std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value, "Float or complex type required when specifying an ErrorSpec"); std::unique_ptr expected_literal = @@ -411,6 +412,7 @@ void ClientLibraryTestBase::ComputeAndCompareR2( tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { static_assert(std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value, "Float or complex type required when specifying an ErrorSpec"); std::unique_ptr expected_literal = @@ -435,6 +437,7 @@ void ClientLibraryTestBase::ComputeAndCompareR3( tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { static_assert(std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value, "Float or complex type required when specifying an ErrorSpec"); std::unique_ptr expected_literal = @@ -459,6 +462,7 @@ void ClientLibraryTestBase::ComputeAndCompareR4( tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { static_assert(std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value, "Float or complex type required when specifying an ErrorSpec"); std::unique_ptr expected_literal = -- GitLab From 102bfdfd830f4dab6e00371e63a82561e1246518 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Mon, 27 Nov 2017 22:31:25 -0800 Subject: [PATCH 0867/1801] [XLA] Separate input and output spatial dimensions for convolution This lets us reason about input spatial dimensions as distinct from output spatial dimensions. By doing this, it opens up more opportunities for assigning more interesting, different, layouts for the activations and the output. PiperOrigin-RevId: 177117140 --- .../compiler/tf2xla/kernels/conv_ops.cc | 19 ++--- .../xla/client/computation_builder.cc | 45 +++++++----- .../compiler/xla/client/computation_builder.h | 5 +- tensorflow/compiler/xla/reference_util.cc | 11 +-- .../compiler/xla/reference_util_test.cc | 16 +++-- .../xla/service/algebraic_simplifier_test.cc | 13 ++-- .../xla/service/cpu/conv_canonicalization.cc | 23 ++++--- .../service/cpu/conv_canonicalization_test.cc | 12 ++-- .../xla/service/cpu/ir_emission_utils.cc | 7 +- .../compiler/xla/service/cpu/ir_emitter.cc | 69 ++++++++++--------- .../xla/service/gpu/convolution_folding.cc | 34 +++++---- .../service/gpu/convolution_folding_test.cc | 18 +++-- .../xla/service/gpu/convolution_thunk.cc | 9 +-- .../service/gpu/instruction_fusion_test.cc | 6 +- .../xla/service/gpu/ir_emission_utils.cc | 2 +- .../xla/service/gpu/layout_assignment.cc | 12 ++-- .../compiler/xla/service/gpu/pad_insertion.cc | 19 +++-- .../compiler/xla/service/hlo_evaluator.cc | 16 +++-- .../xla/service/hlo_evaluator_test.cc | 15 ++-- .../compiler/xla/service/hlo_instruction.cc | 14 ++-- .../compiler/xla/service/shape_inference.cc | 29 ++++++-- .../xla/service/shape_inference_test.cc | 24 ++++--- .../compiler/xla/service/transpose_folding.cc | 18 +---- .../xla/service/transpose_folding_test.cc | 16 +++-- .../convolution_dimension_numbers_test.cc | 18 +++-- .../compiler/xla/tests/convolution_test.cc | 18 +++-- .../xla/tests/convolution_variants_test.cc | 65 ++++++++++------- .../compiler/xla/tools/parser/hlo_parser.cc | 13 ++-- .../xla/tools/parser/hlo_parser_test.cc | 6 -- tensorflow/compiler/xla/xla_data.proto | 22 +++--- 30 files changed, 353 insertions(+), 241 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc index 885f716afa..c5017704e2 100644 --- a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc @@ -184,10 +184,11 @@ class ConvOp : public XlaOpKernel { dims.set_input_feature_dimension(feature_dim); dims.set_output_feature_dimension(feature_dim); for (int i = 0; i < num_spatial_dims_; ++i) { - int input_dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i); - dims.add_spatial_dimensions(input_dim); + int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i); + dims.add_input_spatial_dimensions(dim); dims.add_kernel_spatial_dimensions(i); - window_strides.push_back(strides_.at(input_dim)); + dims.add_output_spatial_dimensions(dim); + window_strides.push_back(strides_.at(dim)); } dims.set_kernel_input_feature_dimension(num_spatial_dims_); dims.set_kernel_output_feature_dimension(num_spatial_dims_ + 1); @@ -302,9 +303,10 @@ class ConvBackpropInputOp : public XlaOpKernel { std::vector lhs_dilation(num_spatial_dims_); std::vector ones(num_spatial_dims_, 1); for (int i = 0; i < num_spatial_dims_; ++i) { - dnums.add_spatial_dimensions( - GetTensorSpatialDimIndex(num_dims(), data_format_, i)); + int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i); + dnums.add_input_spatial_dimensions(dim); dnums.add_kernel_spatial_dimensions(i); + dnums.add_output_spatial_dimensions(dim); kernel_spatial_dims[i] = i; padding[i] = {dims.spatial_dims[i].pad_before, @@ -439,9 +441,10 @@ class ConvBackpropFilterOp : public XlaOpKernel { std::vector ones(num_spatial_dims_, 1); for (int i = 0; i < num_spatial_dims_; ++i) { - int dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i); - dnums.add_spatial_dimensions(dim); + int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i); + dnums.add_input_spatial_dimensions(dim); dnums.add_kernel_spatial_dimensions(dim); + dnums.add_output_spatial_dimensions(dim); // We will also need to pad the input with zeros such that after the // convolution, we get the right size for the filter. @@ -506,7 +509,7 @@ class ConvBackpropFilterOp : public XlaOpKernel { std::vector transpose_dims; transpose_dims.reserve(num_dims()); for (int i = 0; i < num_spatial_dims_; ++i) { - transpose_dims.push_back(dnums.spatial_dimensions(i)); + transpose_dims.push_back(dnums.output_spatial_dimensions(i)); } transpose_dims.push_back(c_dim); transpose_dims.push_back(n_dim); diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc index b17d221ef5..cce9310003 100644 --- a/tensorflow/compiler/xla/client/computation_builder.cc +++ b/tensorflow/compiler/xla/client/computation_builder.cc @@ -694,11 +694,15 @@ bool ComputationBuilder::VerifyConvolution( } return true; }; - return check_spatial_dimensions("spatial_dimensions", - dimension_numbers.spatial_dimensions()) && + return check_spatial_dimensions( + "input_spatial_dimensions", + dimension_numbers.input_spatial_dimensions()) && check_spatial_dimensions( "kernel_spatial_dimensions", - dimension_numbers.kernel_spatial_dimensions()); + dimension_numbers.kernel_spatial_dimensions()) && + check_spatial_dimensions( + "output_spatial_dimensions", + dimension_numbers.output_spatial_dimensions()); } ComputationDataHandle ComputationBuilder::ConvWithGeneralDimensions( @@ -730,11 +734,11 @@ ComputationDataHandle ComputationBuilder::ConvWithGeneralDimensions( } std::vector base_area_dimensions( - dimension_numbers.spatial_dimensions_size()); + dimension_numbers.input_spatial_dimensions_size()); for (std::vector::size_type i = 0; i < base_area_dimensions.size(); ++i) { base_area_dimensions[i] = - lhs_shape->dimensions(dimension_numbers.spatial_dimensions(i)); + lhs_shape->dimensions(dimension_numbers.input_spatial_dimensions(i)); } std::vector window_dimensions( @@ -1845,25 +1849,27 @@ ComputationBuilder::CreateDefaultConvDimensionNumbers(int num_spatial_dims) { dimension_numbers.set_kernel_input_feature_dimension( kConvKernelInputDimension); for (int i = 0; i < num_spatial_dims; ++i) { - dimension_numbers.add_spatial_dimensions(i + 2); + dimension_numbers.add_input_spatial_dimensions(i + 2); dimension_numbers.add_kernel_spatial_dimensions(i + 2); + dimension_numbers.add_output_spatial_dimensions(i + 2); } return dimension_numbers; } /* static */ StatusOr ComputationBuilder::CreateConvDimensionNumbers( - int64 input_batch, int64 input_feature, int64 output_batch, - int64 output_feature, int64 first_spatial, int64 second_spatial, + int64 input_batch, int64 input_feature, int64 input_first_spatial, + int64 input_second_spatial, int64 output_batch, int64 output_feature, + int64 output_first_spatial, int64 output_second_spatial, int64 kernel_output_feature, int64 kernel_input_feature, int64 kernel_first_spatial, int64 kernel_second_spatial) { - if (std::set( - {input_batch, input_feature, first_spatial, second_spatial}) + if (std::set({input_batch, input_feature, input_first_spatial, + input_second_spatial}) .size() != 4) { return FailedPrecondition( "dimension numbers for the input are not unique: (%lld, %lld, %lld, " "%lld)", - input_batch, input_feature, first_spatial, second_spatial); + input_batch, input_feature, input_first_spatial, input_second_spatial); } if (std::set({kernel_output_feature, kernel_input_feature, kernel_first_spatial, kernel_second_spatial}) @@ -1874,25 +1880,28 @@ ComputationBuilder::CreateConvDimensionNumbers( kernel_output_feature, kernel_input_feature, kernel_first_spatial, kernel_second_spatial); } - if (std::set( - {output_batch, output_feature, first_spatial, second_spatial}) + if (std::set({output_batch, output_feature, output_first_spatial, + output_second_spatial}) .size() != 4) { return FailedPrecondition( "dimension numbers for the output are not unique: (%lld, %lld, %lld, " "%lld)", - output_batch, output_feature, first_spatial, second_spatial); + output_batch, output_feature, output_first_spatial, + output_second_spatial); } ConvolutionDimensionNumbers dimension_numbers; dimension_numbers.set_input_batch_dimension(input_batch); dimension_numbers.set_input_feature_dimension(input_feature); - dimension_numbers.set_output_batch_dimension(output_batch); - dimension_numbers.set_output_feature_dimension(output_feature); - dimension_numbers.add_spatial_dimensions(first_spatial); - dimension_numbers.add_spatial_dimensions(second_spatial); + dimension_numbers.add_input_spatial_dimensions(input_first_spatial); + dimension_numbers.add_input_spatial_dimensions(input_second_spatial); dimension_numbers.set_kernel_output_feature_dimension(kernel_output_feature); dimension_numbers.set_kernel_input_feature_dimension(kernel_input_feature); dimension_numbers.add_kernel_spatial_dimensions(kernel_first_spatial); dimension_numbers.add_kernel_spatial_dimensions(kernel_second_spatial); + dimension_numbers.set_output_batch_dimension(output_batch); + dimension_numbers.set_output_feature_dimension(output_feature); + dimension_numbers.add_output_spatial_dimensions(output_first_spatial); + dimension_numbers.add_output_spatial_dimensions(output_second_spatial); return dimension_numbers; } diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index 3a34010e6a..d2dbbbbebb 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -413,8 +413,9 @@ class ComputationBuilder { // Creates a ConvolutionDimensionNumbers with the given arguments. Returns an // error if either the input or the weight dimension numbers have conflicts. static StatusOr CreateConvDimensionNumbers( - int64 input_batch, int64 input_feature, int64 output_batch, - int64 output_feature, int64 first_spatial, int64 second_spatial, + int64 input_batch, int64 input_feature, int64 input_first_spatial, + int64 input_second_spatial, int64 output_batch, int64 output_feature, + int64 output_first_spatial, int64 output_second_spatial, int64 kernel_output_feature, int64 kernel_input_feature, int64 kernel_first_spatial, int64 kernel_second_spatial); diff --git a/tensorflow/compiler/xla/reference_util.cc b/tensorflow/compiler/xla/reference_util.cc index 5a899d550b..5bb81b80dd 100644 --- a/tensorflow/compiler/xla/reference_util.cc +++ b/tensorflow/compiler/xla/reference_util.cc @@ -102,7 +102,9 @@ ReferenceUtil::ConvArray3DGeneralDimensionsDilated( const Array3D& lhs, const Array3D& rhs, int64 kernel_stride, Padding padding, int64 lhs_dilation, int64 rhs_dilation, const ConvolutionDimensionNumbers& dnums) { - CHECK_EQ(dnums.spatial_dimensions_size(), 1); + CHECK_EQ(dnums.input_spatial_dimensions_size(), 1); + CHECK_EQ(dnums.kernel_spatial_dimensions_size(), 1); + CHECK_EQ(dnums.output_spatial_dimensions_size(), 1); // Reuse the code for Array4D-convolution by extending the 3D input into a 4D // array by adding a fourth dummy dimension of size 1 without stride, padding // and dilation. @@ -120,8 +122,9 @@ ReferenceUtil::ConvArray3DGeneralDimensionsDilated( }); // Add a second dummy spatial dimensions. ConvolutionDimensionNumbers dnums2d = dnums; - dnums2d.add_spatial_dimensions(3); + dnums2d.add_input_spatial_dimensions(3); dnums2d.add_kernel_spatial_dimensions(3); + dnums2d.add_output_spatial_dimensions(3); std::unique_ptr> convr4 = ConvArray4DGeneralDimensionsDilated( a4dlhs, a4drhs, {kernel_stride, 1}, padding, {lhs_dilation, 1}, {rhs_dilation, 1}, dnums2d); @@ -465,9 +468,9 @@ ReferenceUtil::ConvArray4DGeneralDimensionsDilated( } ordered_input_dimensions[0] = - lhs_literal->shape().dimensions(dnums.spatial_dimensions(0)); + lhs_literal->shape().dimensions(dnums.input_spatial_dimensions(0)); ordered_input_dimensions[1] = - lhs_literal->shape().dimensions(dnums.spatial_dimensions(1)); + lhs_literal->shape().dimensions(dnums.input_spatial_dimensions(1)); ordered_kernel_dimensions[0] = rhs_literal->shape().dimensions(dnums.kernel_spatial_dimensions(0)); ordered_kernel_dimensions[1] = diff --git a/tensorflow/compiler/xla/reference_util_test.cc b/tensorflow/compiler/xla/reference_util_test.cc index eb6a71242f..846ccdc83d 100644 --- a/tensorflow/compiler/xla/reference_util_test.cc +++ b/tensorflow/compiler/xla/reference_util_test.cc @@ -60,7 +60,9 @@ TEST_F(ReferenceUtilTest, TransposeArray2D) { TEST_F(ReferenceUtilTest, MatmulArray2D) { Array2D rhs({ - {7.f, 8.f}, {9.f, 10.f}, {11.f, 12.f}, + {7.f, 8.f}, + {9.f, 10.f}, + {11.f, 12.f}, }); auto result = ReferenceUtil::MatmulArray2D(*matrix_, rhs); auto actual_literal = Literal::CreateR2FromArray2D(*result); @@ -326,8 +328,10 @@ TEST_F(ReferenceUtilTest, ConvGeneralDimensionsWithSamePadding) { dimension_numbers.set_input_feature_dimension(0); dimension_numbers.set_output_batch_dimension(2); dimension_numbers.set_output_feature_dimension(0); - dimension_numbers.add_spatial_dimensions(1); - dimension_numbers.add_spatial_dimensions(3); + dimension_numbers.add_input_spatial_dimensions(1); + dimension_numbers.add_output_spatial_dimensions(1); + dimension_numbers.add_input_spatial_dimensions(3); + dimension_numbers.add_output_spatial_dimensions(3); dimension_numbers.set_kernel_output_feature_dimension(0); dimension_numbers.set_kernel_input_feature_dimension(2); dimension_numbers.add_kernel_spatial_dimensions(1); @@ -380,8 +384,10 @@ TEST_F(ReferenceUtilTest, ConvGeneralDimensionsWithValidPadding) { dimension_numbers.set_input_feature_dimension(0); dimension_numbers.set_output_batch_dimension(2); dimension_numbers.set_output_feature_dimension(0); - dimension_numbers.add_spatial_dimensions(1); - dimension_numbers.add_spatial_dimensions(3); + dimension_numbers.add_input_spatial_dimensions(1); + dimension_numbers.add_output_spatial_dimensions(1); + dimension_numbers.add_input_spatial_dimensions(3); + dimension_numbers.add_output_spatial_dimensions(3); dimension_numbers.set_kernel_output_feature_dimension(0); dimension_numbers.set_kernel_input_feature_dimension(2); diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 097f30be32..56dfb1cf0b 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -1624,8 +1624,11 @@ TEST_F(AlgebraicSimplifierTest, ConvertConvToMatmul) { ConvolutionDimensionNumbers dnums; std::vector in_dims; int in_channel_idx = -1; - dnums.add_spatial_dimensions(-1); // filled in later - dnums.add_spatial_dimensions(-1); // filled in later + // filled in later + dnums.add_input_spatial_dimensions(-1); + dnums.add_output_spatial_dimensions(-1); + dnums.add_input_spatial_dimensions(-1); + dnums.add_output_spatial_dimensions(-1); for (int i = 0; i < strlen(options.dim_order); ++i) { char ch = options.dim_order[i]; if (ch == 'N') { @@ -1633,10 +1636,12 @@ TEST_F(AlgebraicSimplifierTest, ConvertConvToMatmul) { dnums.set_output_batch_dimension(i); in_dims.push_back(options.in_batch); } else if (ch == 'H') { - dnums.set_spatial_dimensions(0, i); + dnums.set_input_spatial_dimensions(0, i); + dnums.set_output_spatial_dimensions(0, i); in_dims.push_back(options.in_height); } else if (ch == 'W') { - dnums.set_spatial_dimensions(1, i); + dnums.set_input_spatial_dimensions(1, i); + dnums.set_output_spatial_dimensions(1, i); in_dims.push_back(options.in_width); } else if (ch == 'C') { dnums.set_input_feature_dimension(i); diff --git a/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc b/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc index a3dd13811c..2136aeb387 100644 --- a/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc +++ b/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc @@ -41,8 +41,8 @@ StatusOr ConvCanonicalization::Run(HloModule* module) { auto kernel_input_feature_dim = dnums.kernel_input_feature_dimension(); auto kernel_output_feature_dim = dnums.kernel_output_feature_dimension(); - int num_spatial_dims = dnums.spatial_dimensions_size(); - int num_dims = num_spatial_dims + 2; + const int64 num_spatial_dims = dnums.output_spatial_dimensions_size(); + const int64 num_dims = num_spatial_dims + 2; // A canonical convolution's dimension numbers need to satisfy the // following conditions (see cs/PotentiallyImplementedAsEigenConvolution). @@ -59,10 +59,10 @@ StatusOr ConvCanonicalization::Run(HloModule* module) { std::vector new_input_dims(num_dims); new_input_dim_order[0] = input_batch_dim; new_input_dims[0] = input->shape().dimensions(input_batch_dim); - for (int i = 0; i < num_spatial_dims; ++i) { - new_input_dim_order[i + 1] = dnums.spatial_dimensions(i); + for (int64 i = 0; i < num_spatial_dims; ++i) { + new_input_dim_order[i + 1] = dnums.input_spatial_dimensions(i); new_input_dims[i + 1] = - input->shape().dimensions(dnums.spatial_dimensions(i)); + input->shape().dimensions(dnums.input_spatial_dimensions(i)); } new_input_dim_order[num_dims - 1] = input_feature_dim; new_input_dims[num_dims - 1] = @@ -78,7 +78,7 @@ StatusOr ConvCanonicalization::Run(HloModule* module) { std::vector new_kernel_dim_order(num_dims); std::vector new_kernel_dims(num_dims); - for (int i = 0; i < num_spatial_dims; ++i) { + for (int64 i = 0; i < num_spatial_dims; ++i) { new_kernel_dim_order[i] = dnums.kernel_spatial_dimensions(i); new_kernel_dims[i] = kernel->shape().dimensions(dnums.kernel_spatial_dimensions(i)); @@ -102,10 +102,10 @@ StatusOr ConvCanonicalization::Run(HloModule* module) { auto output_feature_dim = dnums.output_feature_dimension(); new_output_dim_order[0] = output_batch_dim; new_conv_dims[0] = hlo->shape().dimensions(output_batch_dim); - for (int i = 0; i < num_spatial_dims; ++i) { - new_output_dim_order[i + 1] = dnums.spatial_dimensions(i); + for (int64 i = 0; i < num_spatial_dims; ++i) { + new_output_dim_order[i + 1] = dnums.output_spatial_dimensions(i); new_conv_dims[i + 1] = - hlo->shape().dimensions(dnums.spatial_dimensions(i)); + hlo->shape().dimensions(dnums.output_spatial_dimensions(i)); } new_output_dim_order[num_dims - 1] = output_feature_dim; new_conv_dims[num_dims - 1] = hlo->shape().dimensions(output_feature_dim); @@ -115,9 +115,10 @@ StatusOr ConvCanonicalization::Run(HloModule* module) { ConvolutionDimensionNumbers new_dnums; new_dnums.set_input_batch_dimension(0); new_dnums.set_output_batch_dimension(0); - for (int i = 0; i < num_spatial_dims; ++i) { - new_dnums.add_spatial_dimensions(i + 1); + for (int64 i = 0; i < num_spatial_dims; ++i) { + new_dnums.add_input_spatial_dimensions(i + 1); new_dnums.add_kernel_spatial_dimensions(i); + new_dnums.add_output_spatial_dimensions(i + 1); } new_dnums.set_input_feature_dimension(num_dims - 1); new_dnums.set_output_feature_dimension(num_dims - 1); diff --git a/tensorflow/compiler/xla/service/cpu/conv_canonicalization_test.cc b/tensorflow/compiler/xla/service/cpu/conv_canonicalization_test.cc index d593ba26b6..968f53d5c7 100644 --- a/tensorflow/compiler/xla/service/cpu/conv_canonicalization_test.cc +++ b/tensorflow/compiler/xla/service/cpu/conv_canonicalization_test.cc @@ -69,8 +69,10 @@ TEST_F(ConvCanonicalizationTest, NonCanonicalToCanonical) { ConvolutionDimensionNumbers dnums; dnums.set_input_batch_dimension(1); dnums.set_output_batch_dimension(1); - dnums.add_spatial_dimensions(2); - dnums.add_spatial_dimensions(3); + dnums.add_input_spatial_dimensions(2); + dnums.add_output_spatial_dimensions(2); + dnums.add_input_spatial_dimensions(3); + dnums.add_output_spatial_dimensions(3); dnums.set_input_feature_dimension(0); dnums.set_output_feature_dimension(0); dnums.add_kernel_spatial_dimensions(2); @@ -125,8 +127,10 @@ TEST_F(ConvCanonicalizationTest, CanonicalStaysTheSame) { ConvolutionDimensionNumbers dnums; dnums.set_input_batch_dimension(0); dnums.set_output_batch_dimension(0); - dnums.add_spatial_dimensions(1); - dnums.add_spatial_dimensions(2); + dnums.add_input_spatial_dimensions(1); + dnums.add_output_spatial_dimensions(1); + dnums.add_input_spatial_dimensions(2); + dnums.add_output_spatial_dimensions(2); dnums.set_input_feature_dimension(3); dnums.set_output_feature_dimension(3); dnums.add_kernel_spatial_dimensions(0); diff --git a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc index d2e7f830d1..3993779da6 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc @@ -49,18 +49,21 @@ bool PotentiallyImplementedAsEigenConvolution( convolution.convolution_dimension_numbers(); // Only 1D and 2D convolutions are supported at the moment. // TODO(b/32897908): add an optimized implementation for 3D convolution. - const int64 num_spatial_dims = dnums.spatial_dimensions_size(); + const int64 num_spatial_dims = dnums.output_spatial_dimensions_size(); if (num_spatial_dims > 2) { return false; } for (int64 i = 0; i < num_spatial_dims; ++i) { - if (dnums.spatial_dimensions(i) != i + 1) { + if (dnums.input_spatial_dimensions(i) != i + 1) { return false; } if (dnums.kernel_spatial_dimensions(i) != i) { return false; } + if (dnums.output_spatial_dimensions(i) != i + 1) { + return false; + } } const Shape& output_shape = convolution.shape(); diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 49f4782693..502dd2e738 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -822,14 +822,16 @@ Status IrEmitter::HandleSelectAndScatter(HloInstruction* select_and_scatter) { // If the initialized_flag is false, initialize the selected value and index // with the currently visiting operand. SetToFirstInsertPoint(if_initialized.false_block, &ir_builder_); - const auto save_operand_index = [&]( - const llvm_ir::IrArray::Index& operand_index) { - for (int64 i = 0; i < rank; ++i) { - llvm::Value* selected_index_address_slot = ir_builder_.CreateInBoundsGEP( - selected_index_address, {ir_builder_.getInt32(i)}); - ir_builder_.CreateStore(operand_index[i], selected_index_address_slot); - } - }; + const auto save_operand_index = + [&](const llvm_ir::IrArray::Index& operand_index) { + for (int64 i = 0; i < rank; ++i) { + llvm::Value* selected_index_address_slot = + ir_builder_.CreateInBoundsGEP(selected_index_address, + {ir_builder_.getInt32(i)}); + ir_builder_.CreateStore(operand_index[i], + selected_index_address_slot); + } + }; llvm_ir::IrArray operand_array(GetIrArrayFor(operand)); llvm::Value* operand_data = operand_array.EmitReadArrayElement(operand_index, &ir_builder_); @@ -952,11 +954,12 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) { // Input tensor. const Shape& input_shape = convolution->operand(0)->shape(); int64 input_batch = input_shape.dimensions(dnums.input_batch_dimension()); - int64 input_rows = input_shape.dimensions(dnums.spatial_dimensions(0)); + int64 input_rows = + input_shape.dimensions(dnums.input_spatial_dimensions(0)); int64 input_cols = one_dim_convolution ? 1 - : input_shape.dimensions(dnums.spatial_dimensions(1)); + : input_shape.dimensions(dnums.input_spatial_dimensions(1)); int64 input_channels = input_shape.dimensions(dnums.input_feature_dimension()); @@ -976,11 +979,11 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) { // Output tensor. const Shape& convolution_shape = convolution->shape(); int64 output_rows = - convolution_shape.dimensions(dnums.spatial_dimensions(0)); - int64 output_cols = - one_dim_convolution - ? 1 - : convolution_shape.dimensions(dnums.spatial_dimensions(1)); + convolution_shape.dimensions(dnums.output_spatial_dimensions(0)); + int64 output_cols = one_dim_convolution + ? 1 + : convolution_shape.dimensions( + dnums.output_spatial_dimensions(1)); // Extract the window stride for the convolution. const Window& window = convolution->window(); @@ -1068,10 +1071,10 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) { return EmitTargetElementLoop( convolution, [this, convolution, lhs, rhs, window, dnums](const llvm_ir::IrArray::Index& index) { - int num_spatial_dims = dnums.spatial_dimensions_size(); + int num_spatial_dims = dnums.output_spatial_dimensions_size(); std::vector output_spatial(num_spatial_dims); for (int i = 0; i < num_spatial_dims; ++i) { - output_spatial[i] = index[dnums.spatial_dimensions(i)]; + output_spatial[i] = index[dnums.output_spatial_dimensions(i)]; } llvm::Value* output_feature = index[dnums.output_feature_dimension()]; llvm::Value* batch = index[dnums.output_batch_dimension()]; @@ -1091,8 +1094,9 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) { for (int i = 0; i < num_spatial_dims; ++i) { kernel_spatial[i] = loops - .AddLoop(0, rhs->shape().dimensions( - dnums.kernel_spatial_dimensions(i)), + .AddLoop(0, + rhs->shape().dimensions( + dnums.kernel_spatial_dimensions(i)), tensorflow::strings::StrCat("k", i)) ->GetIndVarValue(); } @@ -1108,17 +1112,18 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) { // Calculate the spatial index in the input array, taking striding, // dilation and padding into account. An index in the padding will be // out of the bounds of the array. - const auto calculate_input_index = [this]( - llvm::Value* output_index, llvm::Value* kernel_index, - const WindowDimension& window_dim) { - llvm::Value* strided_index = ir_builder_.CreateNSWMul( - output_index, ir_builder_.getInt64(window_dim.stride())); - llvm::Value* dilated_kernel_index = ir_builder_.CreateNSWMul( - kernel_index, ir_builder_.getInt64(window_dim.window_dilation())); - return ir_builder_.CreateNSWSub( - ir_builder_.CreateNSWAdd(strided_index, dilated_kernel_index), - ir_builder_.getInt64(window_dim.padding_low())); - }; + const auto calculate_input_index = + [this](llvm::Value* output_index, llvm::Value* kernel_index, + const WindowDimension& window_dim) { + llvm::Value* strided_index = ir_builder_.CreateNSWMul( + output_index, ir_builder_.getInt64(window_dim.stride())); + llvm::Value* dilated_kernel_index = ir_builder_.CreateNSWMul( + kernel_index, + ir_builder_.getInt64(window_dim.window_dilation())); + return ir_builder_.CreateNSWSub( + ir_builder_.CreateNSWAdd(strided_index, dilated_kernel_index), + ir_builder_.getInt64(window_dim.padding_low())); + }; std::vector input_spatial(num_spatial_dims); for (int i = 0; i < num_spatial_dims; ++i) { input_spatial[i] = calculate_input_index( @@ -1144,7 +1149,7 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) { for (int i = 0; i < num_spatial_dims; ++i) { llvm::ConstantInt* input_bound = ir_builder_.getInt64(window_util::DilatedBound( - lhs->shape().dimensions(dnums.spatial_dimensions(i)), + lhs->shape().dimensions(dnums.input_spatial_dimensions(i)), window.dimensions(i).base_dilation())); llvm::Value* dim_in_bound = ir_builder_.CreateICmpULT(input_spatial[i], input_bound); @@ -1176,7 +1181,7 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) { int num_dims = num_spatial_dims + 2; llvm_ir::IrArray::Index input_index(num_dims); for (int i = 0; i < num_spatial_dims; ++i) { - input_index[dnums.spatial_dimensions(i)] = input_spatial[i]; + input_index[dnums.input_spatial_dimensions(i)] = input_spatial[i]; } input_index[dnums.input_feature_dimension()] = input_feature; input_index[dnums.input_batch_dimension()] = batch; diff --git a/tensorflow/compiler/xla/service/gpu/convolution_folding.cc b/tensorflow/compiler/xla/service/gpu/convolution_folding.cc index 5aaf072f9d..828ae675d7 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_folding.cc +++ b/tensorflow/compiler/xla/service/gpu/convolution_folding.cc @@ -74,9 +74,10 @@ MatchBackwardFilter(HloInstruction* conv) { conv->convolution_dimension_numbers(); auto input_batch_dim = conv_dnums.input_batch_dimension(); auto input_feature_dim = conv_dnums.input_feature_dimension(); + auto input_spatial_dims = conv_dnums.input_spatial_dimensions(); auto output_batch_dim = conv_dnums.output_batch_dimension(); auto output_feature_dim = conv_dnums.output_feature_dimension(); - auto spatial_dims = conv_dnums.spatial_dimensions(); + auto output_spatial_dims = conv_dnums.output_spatial_dimensions(); for (const WindowDimension& window_dim : conv->window().dimensions()) { if (window_dim.stride() != 1) { @@ -108,11 +109,11 @@ MatchBackwardFilter(HloInstruction* conv) { // // Compute the window of the backward convolution. Window backward_conv_window; - for (int i = 0; i < spatial_dims.size(); ++i) { + for (int i = 0; i < input_spatial_dims.size(); ++i) { WindowDimension* dim = backward_conv_window.add_dimensions(); // The window size of the backward convolution equals the output size of the // forward convolution. - int64 filter_size = conv->shape().dimensions(spatial_dims[i]); + int64 filter_size = conv->shape().dimensions(output_spatial_dims[i]); dim->set_size(filter_size); // The window stride equals the window dilation of the forward convolution. dim->set_stride(conv->window().dimensions(i).window_dilation()); @@ -120,7 +121,8 @@ MatchBackwardFilter(HloInstruction* conv) { // activations. dim->set_padding_low(conv->window().dimensions(i).padding_low()); - int64 input_size = conv->operand(0)->shape().dimensions(spatial_dims[i]); + int64 input_size = + conv->operand(0)->shape().dimensions(input_spatial_dims[i]); int64 output_size = conv->window().dimensions(i).size(); // Compute the range of the amount of valid high padding. We first compute // min_padding_high, the amount of padding on the right/bottom to ensure the @@ -189,8 +191,11 @@ MatchBackwardFilter(HloInstruction* conv) { backward_conv_dnums.set_input_feature_dimension(input_batch_dim); backward_conv_dnums.set_output_batch_dimension(output_feature_dim); backward_conv_dnums.set_output_feature_dimension(output_batch_dim); - for (int i = 0; i < spatial_dims.size(); ++i) { - backward_conv_dnums.add_spatial_dimensions(spatial_dims[i]); + for (int i = 0; i < input_spatial_dims.size(); ++i) { + backward_conv_dnums.add_input_spatial_dimensions(input_spatial_dims[i]); + } + for (int i = 0; i < output_spatial_dims.size(); ++i) { + backward_conv_dnums.add_output_spatial_dimensions(output_spatial_dims[i]); } // The dimension numbering of the output of the forward convolution (before // transposition) is the same as that of the activations (according to the @@ -205,9 +210,9 @@ MatchBackwardFilter(HloInstruction* conv) { PositionInContainer(transpose->dimensions(), output_batch_dim)); backward_conv_dnums.set_kernel_output_feature_dimension( PositionInContainer(transpose->dimensions(), output_feature_dim)); - for (int i = 0; i < spatial_dims.size(); ++i) { + for (int i = 0; i < output_spatial_dims.size(); ++i) { backward_conv_dnums.add_kernel_spatial_dimensions( - PositionInContainer(transpose->dimensions(), spatial_dims[i])); + PositionInContainer(transpose->dimensions(), output_spatial_dims[i])); } return std::make_tuple(true, std::vector({transpose, conv}), @@ -272,12 +277,14 @@ MatchBackwardInput(HloInstruction* conv) { } } - const auto& spatial_dims = dnums.spatial_dimensions(); - CHECK_EQ(conv->window().dimensions().size(), spatial_dims.size()); + const auto& input_spatial_dims = dnums.input_spatial_dimensions(); + const auto& output_spatial_dims = dnums.output_spatial_dimensions(); + CHECK_EQ(conv->window().dimensions().size(), input_spatial_dims.size()); + CHECK_EQ(output_spatial_dims.size(), input_spatial_dims.size()); const Window& old_window = conv->window(); Window new_window = old_window; - for (size_t i = 0; i < spatial_dims.size(); ++i) { + for (size_t i = 0; i < input_spatial_dims.size(); ++i) { // Restore backward convolution's padding config from the matched pattern. // See the comment in tensorflow/core/kernels/conv_grad_tuple_ops.cc // for how we convert backward input convolution to a variant of forward @@ -310,8 +317,9 @@ MatchBackwardInput(HloInstruction* conv) { // end at the border. The maximum amount (max_padding_high) equals // min_padding_high+stride-1 -- max_padding_high+1 would cause the output // size to change. - auto unpadded_input_size = conv->shape().dimensions(spatial_dims[i]); - auto output_size = conv->operand(0)->shape().dimensions(spatial_dims[i]); + auto unpadded_input_size = conv->shape().dimensions(output_spatial_dims[i]); + auto output_size = + conv->operand(0)->shape().dimensions(input_spatial_dims[i]); auto padded_input_size = kernel_size + dim->stride() * (output_size - 1); auto total_pad_size = padded_input_size - unpadded_input_size; auto min_padding_high = total_pad_size - backward_padding_low; diff --git a/tensorflow/compiler/xla/service/gpu/convolution_folding_test.cc b/tensorflow/compiler/xla/service/gpu/convolution_folding_test.cc index 19b122ba06..112c496e1f 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_folding_test.cc +++ b/tensorflow/compiler/xla/service/gpu/convolution_folding_test.cc @@ -49,8 +49,10 @@ class ConvolutionFoldingTest : public HloTestBase { tf_default_dnums_for_backward_filter_.set_output_batch_dimension(3); tf_default_dnums_for_backward_filter_.set_input_feature_dimension(0); tf_default_dnums_for_backward_filter_.set_output_feature_dimension(0); - tf_default_dnums_for_backward_filter_.add_spatial_dimensions(1); - tf_default_dnums_for_backward_filter_.add_spatial_dimensions(2); + tf_default_dnums_for_backward_filter_.add_input_spatial_dimensions(1); + tf_default_dnums_for_backward_filter_.add_output_spatial_dimensions(1); + tf_default_dnums_for_backward_filter_.add_input_spatial_dimensions(2); + tf_default_dnums_for_backward_filter_.add_output_spatial_dimensions(2); tf_default_dnums_for_backward_filter_.set_kernel_input_feature_dimension(0); tf_default_dnums_for_backward_filter_.set_kernel_output_feature_dimension( 3); @@ -61,8 +63,10 @@ class ConvolutionFoldingTest : public HloTestBase { tf_default_dnums_for_backward_input_.set_output_batch_dimension(0); tf_default_dnums_for_backward_input_.set_input_feature_dimension(3); tf_default_dnums_for_backward_input_.set_output_feature_dimension(3); - tf_default_dnums_for_backward_input_.add_spatial_dimensions(1); - tf_default_dnums_for_backward_input_.add_spatial_dimensions(2); + tf_default_dnums_for_backward_input_.add_input_spatial_dimensions(1); + tf_default_dnums_for_backward_input_.add_output_spatial_dimensions(1); + tf_default_dnums_for_backward_input_.add_input_spatial_dimensions(2); + tf_default_dnums_for_backward_input_.add_output_spatial_dimensions(2); tf_default_dnums_for_backward_input_.set_kernel_input_feature_dimension(3); tf_default_dnums_for_backward_input_.set_kernel_output_feature_dimension(2); tf_default_dnums_for_backward_input_.add_kernel_spatial_dimensions(0); @@ -258,8 +262,10 @@ TEST_F(ConvolutionFoldingTest, BackwardInputConvolveEvenPadding) { conv_dnums.set_output_batch_dimension(0); conv_dnums.set_input_feature_dimension(1); conv_dnums.set_output_feature_dimension(1); - conv_dnums.add_spatial_dimensions(2); - conv_dnums.add_spatial_dimensions(3); + conv_dnums.add_input_spatial_dimensions(2); + conv_dnums.add_output_spatial_dimensions(2); + conv_dnums.add_input_spatial_dimensions(3); + conv_dnums.add_output_spatial_dimensions(3); conv_dnums.set_kernel_input_feature_dimension(0); conv_dnums.set_kernel_output_feature_dimension(1); conv_dnums.add_kernel_spatial_dimensions(2); diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc index 5fe5f55857..037eec8ef5 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc @@ -29,12 +29,12 @@ namespace se = ::perftools::gputools; namespace xla { namespace gpu { +using se::dnn::AlgorithmDesc; using se::dnn::BatchDescriptor; using se::dnn::ConvolutionDescriptor; using se::dnn::DataLayout; using se::dnn::FilterDescriptor; using se::dnn::FilterLayout; -using se::dnn::AlgorithmDesc; ConvolveScratchAllocator::ConvolveScratchAllocator( int device_ordinal, DeviceMemoryAllocator* memory_allocator) @@ -131,8 +131,9 @@ tensorflow::Status ConvolutionThunk::ExecuteOnStream( const int effective_num_dimensions = std::max(2, num_dimensions); CHECK_EQ(F32, output_shape_.element_type()); - CHECK_EQ(num_dimensions, dim_nums_.spatial_dimensions_size()); + CHECK_EQ(num_dimensions, dim_nums_.input_spatial_dimensions_size()); CHECK_EQ(num_dimensions, dim_nums_.kernel_spatial_dimensions_size()); + CHECK_EQ(num_dimensions, dim_nums_.output_spatial_dimensions_size()); for (const WindowDimension& dim : window_.dimensions()) { CHECK_EQ(dim.padding_low(), dim.padding_high()); } @@ -148,7 +149,7 @@ tensorflow::Status ConvolutionThunk::ExecuteOnStream( // Note that the dimensions are reversed. The same holds below. input_descriptor.set_spatial_dim( static_cast(effective_num_dimensions - dim - 1), - input_shape_.dimensions(dim_nums_.spatial_dimensions(dim))); + input_shape_.dimensions(dim_nums_.input_spatial_dimensions(dim))); } FilterDescriptor filter_descriptor(effective_num_dimensions); @@ -182,7 +183,7 @@ tensorflow::Status ConvolutionThunk::ExecuteOnStream( for (int dim = 0; dim < num_dimensions; ++dim) { output_descriptor.set_spatial_dim( static_cast(effective_num_dimensions - dim - 1), - output_shape_.dimensions(dim_nums_.spatial_dimensions(dim))); + output_shape_.dimensions(dim_nums_.output_spatial_dimensions(dim))); } // Add a singleton dimension in the 1D convolution case. diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc index 9a4bfd0905..1d47ffde43 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc @@ -156,8 +156,10 @@ TEST_F(InstructionFusionTest, PotentialBitcastTransposeOfConvolutionUnfused) { conv_dnums.set_output_batch_dimension(0); conv_dnums.set_input_feature_dimension(1); conv_dnums.set_output_feature_dimension(1); - conv_dnums.add_spatial_dimensions(2); - conv_dnums.add_spatial_dimensions(3); + conv_dnums.add_input_spatial_dimensions(2); + conv_dnums.add_output_spatial_dimensions(2); + conv_dnums.add_input_spatial_dimensions(3); + conv_dnums.add_output_spatial_dimensions(3); conv_dnums.set_kernel_output_feature_dimension(0); conv_dnums.set_kernel_input_feature_dimension(1); conv_dnums.add_kernel_spatial_dimensions(2); diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc index 8fb7a6adda..658fd05cd4 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc @@ -100,7 +100,7 @@ bool ImplementedAsDnnConvolution(const HloInstruction& hlo) { if (hlo.opcode() == HloOpcode::kConvolution) { const ConvolutionDimensionNumbers& dnums = hlo.convolution_dimension_numbers(); - if (dnums.spatial_dimensions_size() > 3) { + if (dnums.input_spatial_dimensions_size() > 3) { return false; } diff --git a/tensorflow/compiler/xla/service/gpu/layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/layout_assignment.cc index 0bbd63fb7b..d475c4171b 100644 --- a/tensorflow/compiler/xla/service/gpu/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/gpu/layout_assignment.cc @@ -80,9 +80,9 @@ Status GpuLayoutAssignment::AddBackendConstraints( const ConvolutionDimensionNumbers& dimension_numbers = instruction->convolution_dimension_numbers(); std::vector input_layout; - for (int i = dimension_numbers.spatial_dimensions_size() - 1; i >= 0; - --i) { - input_layout.push_back(dimension_numbers.spatial_dimensions(i)); + for (int i = dimension_numbers.input_spatial_dimensions_size() - 1; + i >= 0; --i) { + input_layout.push_back(dimension_numbers.input_spatial_dimensions(i)); } input_layout.push_back(dimension_numbers.input_feature_dimension()); input_layout.push_back(dimension_numbers.input_batch_dimension()); @@ -102,9 +102,9 @@ Status GpuLayoutAssignment::AddBackendConstraints( *filter_shape.mutable_layout() = LayoutUtil::MakeLayout(filter_layout); std::vector output_layout; - for (int i = dimension_numbers.spatial_dimensions_size() - 1; i >= 0; - --i) { - output_layout.push_back(dimension_numbers.spatial_dimensions(i)); + for (int i = dimension_numbers.output_spatial_dimensions_size() - 1; + i >= 0; --i) { + output_layout.push_back(dimension_numbers.output_spatial_dimensions(i)); } output_layout.push_back(dimension_numbers.output_feature_dimension()); output_layout.push_back(dimension_numbers.output_batch_dimension()); diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc index 9274e16a45..11290eda4f 100644 --- a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc +++ b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc @@ -49,8 +49,8 @@ HloInstruction* MaybePaddedAndSlicedInput( // applies positive padding and dilation. PaddingConfig padding_config = MakeNoPaddingConfig(input->shape().dimensions_size()); - for (size_t i = 0; i < conv_dnums.spatial_dimensions().size(); ++i) { - int64 dim = conv_dnums.spatial_dimensions(i); + for (size_t i = 0; i < conv_dnums.input_spatial_dimensions().size(); ++i) { + int64 dim = conv_dnums.input_spatial_dimensions(i); padding_config.mutable_dimensions(dim)->set_edge_padding_low( std::max(0LL, conv_window.dimensions(i).padding_low())); padding_config.mutable_dimensions(dim)->set_edge_padding_high( @@ -81,8 +81,8 @@ HloInstruction* MaybePaddedAndSlicedInput( std::vector limit_indices(input->shape().dimensions().begin(), input->shape().dimensions().end()); std::vector strides(input->shape().dimensions_size(), 1); - for (size_t i = 0; i < conv_dnums.spatial_dimensions().size(); ++i) { - int64 dim = conv_dnums.spatial_dimensions(i); + for (size_t i = 0; i < conv_dnums.input_spatial_dimensions().size(); ++i) { + int64 dim = conv_dnums.input_spatial_dimensions(i); // If dimension "dim" has negative padding, increase the start index or // decrement the limit index by the amount of negative padding. start_indices[dim] += @@ -117,8 +117,8 @@ HloInstruction* MaybePaddedKernel(const Window& conv_window, for (size_t i = 0; i < kernel->shape().dimensions_size(); ++i) { padding_config.add_dimensions(); } - for (size_t i = 0; i < conv_dnums.spatial_dimensions().size(); ++i) { - int64 dim = conv_dnums.spatial_dimensions(i); + for (size_t i = 0; i < conv_dnums.kernel_spatial_dimensions().size(); ++i) { + int64 dim = conv_dnums.kernel_spatial_dimensions(i); padding_config.mutable_dimensions(dim)->set_interior_padding( conv_window.dimensions(i).window_dilation() - 1); } @@ -229,7 +229,7 @@ bool PadInsertion::CanonicalizeBackwardFilterConvolution( // later. Therefore, the amount of new padding (low or high) is the minimum // of the amount of old padding low and old padding high. int64 new_conv_padding = std::min(padding_low, padding_high); - int64 dim = backward_conv_dnums.spatial_dimensions(i); + int64 dim = backward_conv_dnums.input_spatial_dimensions(i); input_padding_config.mutable_dimensions(dim)->set_edge_padding_low( padding_low - new_conv_padding); input_padding_config.mutable_dimensions(dim)->set_edge_padding_high( @@ -369,12 +369,11 @@ bool PadInsertion::CanonicalizeBackwardInputConvolution( std::vector limit_indices( new_backward_conv->shape().dimensions().begin(), new_backward_conv->shape().dimensions().end()); - std::vector strides(new_backward_conv->shape().dimensions_size(), - 1LL); + std::vector strides(new_backward_conv->shape().dimensions_size(), 1LL); for (size_t i = 0; i < backward_conv->window().dimensions_size(); ++i) { int64 padding_low = backward_conv->window().dimensions(i).padding_low(); int64 padding_high = backward_conv->window().dimensions(i).padding_high(); - int64 dim = backward_conv_dnums.spatial_dimensions(i); + int64 dim = backward_conv_dnums.output_spatial_dimensions(i); if (padding_low > padding_high) { // If the amount of low padding (of the old backward convolution) is // larger, we internally pad the low end of the activations and slice diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 0a1ebe3416..e693d167a1 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -812,7 +812,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { CHECK(ShapeUtil::SameElementType(lhs_shape, result_shape)); const auto& dnums = conv->convolution_dimension_numbers(); - const int64 num_spatial_dims = dnums.spatial_dimensions_size(); + const int64 num_spatial_dims = dnums.output_spatial_dimensions_size(); + CHECK_EQ(num_spatial_dims, dnums.input_spatial_dimensions_size()); CHECK_EQ(num_spatial_dims, dnums.kernel_spatial_dimensions_size()); CHECK_GE(num_spatial_dims, 0); CHECK_EQ(window.dimensions_size(), num_spatial_dims); @@ -877,13 +878,15 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { // Find corresponding spatial dimension index for input (lhs). for (int64 ki = 0; ki < rhs_spatial_index.size(); ++ki) { // Spatial dimension number for input (lhs) and output. - const int64 spatial_dim = dnums.spatial_dimensions(ki); + const int64 input_spatial_dim = dnums.input_spatial_dimensions(ki); + const int64 output_spatial_dim = + dnums.output_spatial_dimensions(ki); // Calculate lhs (input) index without taking base dilation into // account. const auto& window_dim = window.dimensions(ki); const int64 undilated_index = - out_index[spatial_dim] * window_dim.stride() - + out_index[output_spatial_dim] * window_dim.stride() - window_dim.padding_low() + rhs_spatial_index[ki] * window_dim.window_dilation(); // Skip if the lhs (input) index is to be dilated. @@ -892,12 +895,13 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { } // Calculate the actual lhs (input) index after dilation. - lhs_index[spatial_dim] = + lhs_index[input_spatial_dim] = undilated_index / window_dim.base_dilation(); // Skip if input index is not in bound. - if (!(lhs_index[spatial_dim] >= 0 && - lhs_index[spatial_dim] < lhs_shape.dimensions(spatial_dim))) { + if (!(lhs_index[input_spatial_dim] >= 0 && + lhs_index[input_spatial_dim] < + lhs_shape.dimensions(input_spatial_dim))) { goto cnt; } diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc index d0d6029d5f..b2c4351896 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc @@ -751,7 +751,8 @@ TEST_F(HloEvaluatorTest, SimpleConv1D) { dnums.set_output_batch_dimension(0); dnums.set_input_feature_dimension(1); dnums.set_output_feature_dimension(1); - dnums.add_spatial_dimensions(2); + dnums.add_input_spatial_dimensions(2); + dnums.add_output_spatial_dimensions(2); dnums.set_kernel_output_feature_dimension(0); dnums.set_kernel_input_feature_dimension(1); @@ -886,8 +887,10 @@ TEST_F(HloEvaluatorTest, Conv2DGeneralDimensionsReversed) { dnums.set_output_batch_dimension(2); dnums.set_input_feature_dimension(0); dnums.set_output_feature_dimension(0); - dnums.add_spatial_dimensions(1); - dnums.add_spatial_dimensions(3); + dnums.add_input_spatial_dimensions(1); + dnums.add_output_spatial_dimensions(1); + dnums.add_input_spatial_dimensions(3); + dnums.add_output_spatial_dimensions(3); dnums.set_kernel_output_feature_dimension(0); dnums.set_kernel_input_feature_dimension(2); @@ -960,8 +963,10 @@ TEST_F(HloEvaluatorTest, Conv2DGeneralDimensions) { dnums.set_output_batch_dimension(2); dnums.set_input_feature_dimension(0); dnums.set_output_feature_dimension(0); - dnums.add_spatial_dimensions(1); - dnums.add_spatial_dimensions(3); + dnums.add_input_spatial_dimensions(1); + dnums.add_output_spatial_dimensions(1); + dnums.add_input_spatial_dimensions(3); + dnums.add_output_spatial_dimensions(3); dnums.set_kernel_output_feature_dimension(0); dnums.set_kernel_input_feature_dimension(2); diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 854185af56..c30c432654 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -3021,25 +3021,25 @@ string HloInstruction::ConvolutionDimensionNumbersToString() const { // lhs_dims[i] is the symbol of the logical dimension i for the lhs // operand. E.g. if batch has dimension number 2, then lhs_dims[2] == "b". - std::vector lhs_dims(2 + dnums.spatial_dimensions().size()); + std::vector lhs_dims(2 + dnums.input_spatial_dimensions().size()); lhs_dims[dnums.input_batch_dimension()] = 'b'; lhs_dims[dnums.input_feature_dimension()] = 'f'; - for (int64 i = 0; i < dnums.spatial_dimensions().size(); ++i) { - lhs_dims[dnums.spatial_dimensions(i)] = StrCat(i); + for (int64 i = 0; i < dnums.input_spatial_dimensions().size(); ++i) { + lhs_dims[dnums.input_spatial_dimensions(i)] = StrCat(i); } std::vector rhs_dims(2 + dnums.kernel_spatial_dimensions().size()); rhs_dims[dnums.kernel_input_feature_dimension()] = "i"; rhs_dims[dnums.kernel_output_feature_dimension()] = "o"; - for (int64 i = 0; i < dnums.spatial_dimensions().size(); ++i) { + for (int64 i = 0; i < dnums.kernel_spatial_dimensions().size(); ++i) { rhs_dims[dnums.kernel_spatial_dimensions(i)] = StrCat(i); } - std::vector output_dims(2 + dnums.spatial_dimensions().size()); + std::vector output_dims(2 + dnums.output_spatial_dimensions().size()); output_dims[dnums.output_batch_dimension()] = 'b'; output_dims[dnums.output_feature_dimension()] = 'f'; - for (int64 i = 0; i < dnums.spatial_dimensions().size(); ++i) { - output_dims[dnums.spatial_dimensions(i)] = StrCat(i); + for (int64 i = 0; i < dnums.output_spatial_dimensions().size(); ++i) { + output_dims[dnums.output_spatial_dimensions(i)] = StrCat(i); } result += "dim_labels="; diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 0a2bf939c1..3df1911d07 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -1445,7 +1445,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( ShapeUtil::HumanString(lhs).c_str(), ShapeUtil::HumanString(rhs).c_str()); } - if (dnums.spatial_dimensions_size() != + if (dnums.input_spatial_dimensions_size() != dnums.kernel_spatial_dimensions_size()) { return InvalidArgument( "Both arguments to convolution must have same number of dimensions.\n" @@ -1453,7 +1453,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( window.DebugString().c_str()); } - const int num_spatial_dims = dnums.spatial_dimensions_size(); + const int num_spatial_dims = dnums.input_spatial_dimensions_size(); if (window.dimensions_size() != num_spatial_dims) { return InvalidArgument( "Window must have same number of dimensions as dimension numbers.\n" @@ -1482,8 +1482,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( std::vector input_dnums(num_dims); input_dnums[0] = dnums.input_batch_dimension(); input_dnums[1] = dnums.input_feature_dimension(); - std::copy(dnums.spatial_dimensions().begin(), - dnums.spatial_dimensions().end(), input_dnums.begin() + 2); + std::copy(dnums.input_spatial_dimensions().begin(), + dnums.input_spatial_dimensions().end(), input_dnums.begin() + 2); std::sort(input_dnums.begin(), input_dnums.end()); std::vector window_dnums(num_dims); @@ -1493,12 +1493,20 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( dnums.kernel_spatial_dimensions().end(), window_dnums.begin() + 2); std::sort(window_dnums.begin(), window_dnums.end()); + std::vector output_dnums(num_dims); + output_dnums[0] = dnums.output_batch_dimension(); + output_dnums[1] = dnums.output_feature_dimension(); + std::copy(dnums.output_spatial_dimensions().begin(), + dnums.output_spatial_dimensions().end(), output_dnums.begin() + 2); + std::sort(output_dnums.begin(), output_dnums.end()); + std::vector expected_dnums(num_dims); std::iota(expected_dnums.begin(), expected_dnums.end(), 0); const auto in_range = [num_dims](int64 i) { return 0 <= i && i < num_dims; }; if (!std::all_of(input_dnums.begin(), input_dnums.end(), in_range) || - !std::all_of(window_dnums.begin(), window_dnums.end(), in_range)) { + !std::all_of(window_dnums.begin(), window_dnums.end(), in_range) || + !std::all_of(output_dnums.begin(), output_dnums.end(), in_range)) { return InvalidArgument( "A dimension number is out of range in convolution: %s", dnums.DebugString().c_str()); @@ -1516,10 +1524,16 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( "once: %s", dnums.DebugString().c_str()); } + if (output_dnums != expected_dnums) { + return InvalidArgument( + "Output dimensions of convolution must contain each dimension exactly " + "once: %s", + dnums.DebugString().c_str()); + } std::vector input_spatial_dims(num_spatial_dims); for (int i = 0; i < num_spatial_dims; ++i) { - input_spatial_dims[i] = lhs.dimensions(dnums.spatial_dimensions(i)); + input_spatial_dims[i] = lhs.dimensions(dnums.input_spatial_dimensions(i)); } const int64 input_features = lhs.dimensions(dnums.input_feature_dimension()); const int64 input_batch = lhs.dimensions(dnums.input_batch_dimension()); @@ -1567,7 +1581,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( dimensions[dnums.output_batch_dimension()] = input_batch; dimensions[dnums.output_feature_dimension()] = kernel_output_features; for (int i = 0; i < num_spatial_dims; ++i) { - dimensions[dnums.spatial_dimensions(i)] = window_output_shape.dimensions(i); + dimensions[dnums.output_spatial_dimensions(i)] = + window_output_shape.dimensions(i); } return ShapeUtil::MakeShape(lhs.element_type(), dimensions); diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc index d12f7bd145..be93c879c0 100644 --- a/tensorflow/compiler/xla/service/shape_inference_test.cc +++ b/tensorflow/compiler/xla/service/shape_inference_test.cc @@ -395,8 +395,10 @@ TEST_F(ShapeInferenceTest, Convolve) { dnums.set_output_batch_dimension(0); dnums.set_input_feature_dimension(1); dnums.set_output_feature_dimension(1); - dnums.add_spatial_dimensions(2); - dnums.add_spatial_dimensions(3); + dnums.add_input_spatial_dimensions(2); + dnums.add_output_spatial_dimensions(2); + dnums.add_input_spatial_dimensions(3); + dnums.add_output_spatial_dimensions(3); // Dimension order: x1, batch, feature, x0 Shape rhs_shape = ShapeUtil::MakeShape(F32, {2, 12, 11, 3}); @@ -437,8 +439,10 @@ TEST_F(ShapeInferenceTest, ConvolveWithWindowDilation) { dnums.set_output_batch_dimension(0); dnums.set_input_feature_dimension(1); dnums.set_output_feature_dimension(1); - dnums.add_spatial_dimensions(2); - dnums.add_spatial_dimensions(3); + dnums.add_input_spatial_dimensions(2); + dnums.add_output_spatial_dimensions(2); + dnums.add_input_spatial_dimensions(3); + dnums.add_output_spatial_dimensions(3); // Dimension order: x1, batch, feature, x0 Shape rhs_shape = ShapeUtil::MakeShape(F32, {2, 12, 11, 3}); @@ -480,8 +484,10 @@ TEST_F(ShapeInferenceTest, ConvolveWithBaseDilation) { dnums.set_output_batch_dimension(0); dnums.set_input_feature_dimension(1); dnums.set_output_feature_dimension(1); - dnums.add_spatial_dimensions(2); - dnums.add_spatial_dimensions(3); + dnums.add_input_spatial_dimensions(2); + dnums.add_output_spatial_dimensions(2); + dnums.add_input_spatial_dimensions(3); + dnums.add_output_spatial_dimensions(3); // Dimension order: x1, batch, feature, x0 Shape rhs_shape = ShapeUtil::MakeShape(F32, {2, 12, 11, 4}); @@ -524,8 +530,10 @@ TEST_F(ShapeInferenceTest, ConvolveDimensionNumbersOverlapError) { dnums.set_output_batch_dimension(3); dnums.set_input_feature_dimension(2); dnums.set_output_feature_dimension(2); - dnums.add_spatial_dimensions(0); - dnums.add_spatial_dimensions(1); + dnums.add_input_spatial_dimensions(0); + dnums.add_output_spatial_dimensions(0); + dnums.add_input_spatial_dimensions(1); + dnums.add_output_spatial_dimensions(1); dnums.set_kernel_input_feature_dimension(0); // duplicated with kernel_x0 dnums.set_kernel_output_feature_dimension(3); dnums.add_kernel_spatial_dimensions(0); diff --git a/tensorflow/compiler/xla/service/transpose_folding.cc b/tensorflow/compiler/xla/service/transpose_folding.cc index 8c2640adf5..fb55d4e543 100644 --- a/tensorflow/compiler/xla/service/transpose_folding.cc +++ b/tensorflow/compiler/xla/service/transpose_folding.cc @@ -58,27 +58,11 @@ TransposeFolding::OperandIndices CanFoldOperandsIntoConvolution( return {}; } - const ConvolutionDimensionNumbers& dnums = - convolution.convolution_dimension_numbers(); - TransposeFolding::OperandIndices operand_set; for (int64 i = 0; i < convolution.operand_count(); ++i) { auto& operand = *convolution.operand(i); if (operand.opcode() == HloOpcode::kTranspose && operand.user_count() == 1) { - const auto& transpose_dimensions = operand.dimensions(); - // We can transpose the LHS so long as it doesn't move around spatial - // dimensions because ConvolutionDimensionNumbers doesn't have different - // fields for input and output spatial dimensions. - if (i == 0 && - std::any_of(dnums.spatial_dimensions().begin(), - dnums.spatial_dimensions().end(), - [&](const int64 spatial_dimension) { - return transpose_dimensions[spatial_dimension] != - spatial_dimension; - })) { - continue; - } operand_set.push_back(i); } } @@ -137,7 +121,7 @@ bool FoldTransposeIntoConvolution(InstructionOperandsPair pair) { transpose_dimensions[dnums.input_batch_dimension()]); new_dnums.set_input_feature_dimension( transpose_dimensions[dnums.input_feature_dimension()]); - for (const auto& spatial_dimension : dnums.spatial_dimensions()) { + for (const auto& spatial_dimension : dnums.input_spatial_dimensions()) { CHECK_EQ(spatial_dimension, transpose_dimensions[spatial_dimension]); } new_lhs = &transpose_operand; diff --git a/tensorflow/compiler/xla/service/transpose_folding_test.cc b/tensorflow/compiler/xla/service/transpose_folding_test.cc index 00462f9be1..6ac32e88f1 100644 --- a/tensorflow/compiler/xla/service/transpose_folding_test.cc +++ b/tensorflow/compiler/xla/service/transpose_folding_test.cc @@ -362,10 +362,18 @@ TEST_F(TransposeFoldingTest, FoldConvTransposeLhs) { EXPECT_EQ( dnums.input_batch_dimension(), new_conv->convolution_dimension_numbers().input_feature_dimension()); - EXPECT_EQ(dnums.spatial_dimensions(0), - new_conv->convolution_dimension_numbers().spatial_dimensions(0)); - EXPECT_EQ(dnums.spatial_dimensions(1), - new_conv->convolution_dimension_numbers().spatial_dimensions(1)); + EXPECT_EQ( + dnums.input_spatial_dimensions(0), + new_conv->convolution_dimension_numbers().input_spatial_dimensions(0)); + EXPECT_EQ( + dnums.input_spatial_dimensions(1), + new_conv->convolution_dimension_numbers().input_spatial_dimensions(1)); + EXPECT_EQ( + dnums.output_spatial_dimensions(0), + new_conv->convolution_dimension_numbers().output_spatial_dimensions(0)); + EXPECT_EQ( + dnums.output_spatial_dimensions(1), + new_conv->convolution_dimension_numbers().output_spatial_dimensions(1)); } } // namespace diff --git a/tensorflow/compiler/xla/tests/convolution_dimension_numbers_test.cc b/tensorflow/compiler/xla/tests/convolution_dimension_numbers_test.cc index b0a63bccbb..896b34fb6e 100644 --- a/tensorflow/compiler/xla/tests/convolution_dimension_numbers_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_dimension_numbers_test.cc @@ -39,8 +39,8 @@ class ConvolutionDimensionNumbersTest : public ClientLibraryTestBase {}; // Tests the convolution operation with invalid input dimension numbers. TEST_F(ConvolutionDimensionNumbersTest, InvalidInputDimensionNumbers) { auto dimension_numbers_status = - ComputationBuilder::CreateConvDimensionNumbers(0, 2, 0, 2, 2, 3, 0, 1, 2, - 3); + ComputationBuilder::CreateConvDimensionNumbers(0, 2, 2, 3, 0, 1, 2, 3, 0, + 1, 2, 3); ASSERT_FALSE(dimension_numbers_status.ok()); ASSERT_THAT(dimension_numbers_status.status().error_message(), ::testing::HasSubstr("input are not unique")); @@ -49,13 +49,23 @@ TEST_F(ConvolutionDimensionNumbersTest, InvalidInputDimensionNumbers) { // Tests the convolution operation with invalid weight dimension numbers. TEST_F(ConvolutionDimensionNumbersTest, InvalidWeightDimensionNumbers) { auto dimension_numbers_status = - ComputationBuilder::CreateConvDimensionNumbers(0, 1, 0, 1, 2, 3, 2, 3, 2, - 3); + ComputationBuilder::CreateConvDimensionNumbers(0, 1, 2, 3, 0, 1, 2, 3, 0, + 2, 2, 3); ASSERT_FALSE(dimension_numbers_status.ok()); ASSERT_THAT(dimension_numbers_status.status().error_message(), ::testing::HasSubstr("weight are not unique")); } +// Tests the convolution operation with invalid output dimension numbers. +TEST_F(ConvolutionDimensionNumbersTest, InvalidOutputDimensionNumbers) { + auto dimension_numbers_status = + ComputationBuilder::CreateConvDimensionNumbers(0, 1, 2, 3, 0, 2, 2, 3, 0, + 1, 2, 3); + ASSERT_FALSE(dimension_numbers_status.ok()); + ASSERT_THAT(dimension_numbers_status.status().error_message(), + ::testing::HasSubstr("output are not unique")); +} + XLA_TEST_F(ConvolutionDimensionNumbersTest, TwoConvsWithDifferentDimensionNumbers) { auto input_array = MakeUnique>(2, 3, 5, 5); diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc index 8de7c9ffdc..2924c08615 100644 --- a/tensorflow/compiler/xla/tests/convolution_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_test.cc @@ -370,9 +370,12 @@ XLA_TEST_F(ConvolutionTest, Convolve3D_1x4x2x3x3_2x2x2x3x3_Valid) { ConvolutionDimensionNumbers dnums; dnums.set_input_batch_dimension(0); dnums.set_output_batch_dimension(0); - dnums.add_spatial_dimensions(1); - dnums.add_spatial_dimensions(2); - dnums.add_spatial_dimensions(3); + dnums.add_input_spatial_dimensions(1); + dnums.add_output_spatial_dimensions(1); + dnums.add_input_spatial_dimensions(2); + dnums.add_output_spatial_dimensions(2); + dnums.add_input_spatial_dimensions(3); + dnums.add_output_spatial_dimensions(3); dnums.set_input_feature_dimension(4); dnums.set_output_feature_dimension(4); dnums.add_kernel_spatial_dimensions(0); @@ -423,8 +426,10 @@ XLA_TEST_F(ConvolutionTest, Convolve2D_1x3x3x5_3x3x5x5_Valid) { ConvolutionDimensionNumbers dnums; dnums.set_input_batch_dimension(0); dnums.set_output_batch_dimension(0); - dnums.add_spatial_dimensions(1); - dnums.add_spatial_dimensions(2); + dnums.add_input_spatial_dimensions(1); + dnums.add_output_spatial_dimensions(1); + dnums.add_input_spatial_dimensions(2); + dnums.add_output_spatial_dimensions(2); dnums.set_input_feature_dimension(3); dnums.set_output_feature_dimension(3); dnums.add_kernel_spatial_dimensions(0); @@ -538,7 +543,8 @@ XLA_TEST_P(Convolve1D1WindowTest, Convolve1D1Window) { ConvolutionDimensionNumbers dnums; dnums.set_input_batch_dimension(0); dnums.set_output_batch_dimension(0); - dnums.add_spatial_dimensions(1); + dnums.add_input_spatial_dimensions(1); + dnums.add_output_spatial_dimensions(1); dnums.set_input_feature_dimension(2); dnums.set_output_feature_dimension(2); dnums.add_kernel_spatial_dimensions(0); diff --git a/tensorflow/compiler/xla/tests/convolution_variants_test.cc b/tensorflow/compiler/xla/tests/convolution_variants_test.cc index 9b36e3722b..9c1145def8 100644 --- a/tensorflow/compiler/xla/tests/convolution_variants_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_variants_test.cc @@ -320,9 +320,10 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter3x3in2x2Padded) { Array4D input_array(1, 1, 2, 2, {1, 2, 3, 4}); auto input = builder.ConstantR4FromArray4D(input_array); - const Array4D filter_array(1, 1, 3, 3, {10000, 0, 1000, // row 0 - 0, 100, 0, // row 1 - 10, 0, 1}); // row 2 + const Array4D filter_array(1, 1, 3, 3, + {10000, 0, 1000, // row 0 + 0, 100, 0, // row 1 + 10, 0, 1}); // row 2 auto filter = builder.ConstantR4FromArray4D(filter_array); builder.Conv(input, filter, {1, 1}, Padding::kSame); @@ -472,7 +473,9 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x2x2Input3x1x2x2) { builder.Conv(input, filter, {1, 1}, Padding::kValid); std::vector expected_data = { - 23, 33, 43, + 23, + 33, + 43, }; Array4D expected(bs, 1, 1, 1, expected_data); ComputeAndCompareR4(&builder, expected, {}, error_spec_); @@ -669,10 +672,11 @@ XLA_TEST_F(ConvolutionVariantsTest, FlatLhsDilation) { std::iota(input_data.begin(), input_data.end(), 1.0); Array4D input_array(1, 1, 3, 4, input_data); - Array4D filter_array(1, 1, 4, 3, {100, 10, 1, // - 200, 20, 2, // - 300, 30, 3, // - 400, 40, 4}); + Array4D filter_array(1, 1, 4, 3, + {100, 10, 1, // + 200, 20, 2, // + 300, 30, 3, // + 400, 40, 4}); auto input = builder.ConstantR4FromArray4D(input_array); auto filter = builder.ConstantR4FromArray4D(filter_array); builder.ConvGeneralDilated( @@ -681,9 +685,10 @@ XLA_TEST_F(ConvolutionVariantsTest, FlatLhsDilation) { /*rhs_dilation=*/{}, ComputationBuilder::CreateDefaultConvDimensionNumbers()); - Array4D expected(1, 1, 3, 5, {204, 40, 406, 60, 608, // - 1518, 180, 1821, 210, 2124, // - 4146, 460, 4651, 510, 5156}); + Array4D expected(1, 1, 3, 5, + {204, 40, 406, 60, 608, // + 1518, 180, 1821, 210, 2124, // + 4146, 460, 4651, 510, 5156}); ComputeAndCompareR4(&builder, expected, {}, error_spec_); } @@ -926,7 +931,8 @@ XLA_TEST_F(ConvolutionVariantsTest, RandomData_Input16x16x1x1_Filter16x16x1x1) { ComputeAndCompareR4(&builder, *expected, {}, error_spec_); } -XLA_TEST_F(ConvolutionVariantsTest, RandomData_Input16x16x16x16_Filter16x16x16x16) { +XLA_TEST_F(ConvolutionVariantsTest, + RandomData_Input16x16x16x16_Filter16x16x16x16) { constexpr int bs = 16; constexpr int iz = 16; constexpr int oz = 16; @@ -976,8 +982,10 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x2x1x1Input1x2x3x1GeneralPadding) { // NHWC input format. dnums.set_input_batch_dimension(0); dnums.set_output_batch_dimension(0); - dnums.add_spatial_dimensions(1); - dnums.add_spatial_dimensions(2); + dnums.add_input_spatial_dimensions(1); + dnums.add_output_spatial_dimensions(1); + dnums.add_input_spatial_dimensions(2); + dnums.add_output_spatial_dimensions(2); dnums.set_input_feature_dimension(3); dnums.set_output_feature_dimension(3); @@ -1018,8 +1026,10 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x1x1Input1x2x3x1GeneralPadding) { // NHWC input format. dnums.set_input_batch_dimension(0); dnums.set_output_batch_dimension(0); - dnums.add_spatial_dimensions(1); - dnums.add_spatial_dimensions(2); + dnums.add_input_spatial_dimensions(1); + dnums.add_output_spatial_dimensions(1); + dnums.add_input_spatial_dimensions(2); + dnums.add_output_spatial_dimensions(2); dnums.set_input_feature_dimension(3); dnums.set_output_feature_dimension(3); @@ -1060,8 +1070,10 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x1x1Input1x2x3x1NoPadding) { // NHWC input format. dnums.set_input_batch_dimension(0); dnums.set_output_batch_dimension(0); - dnums.add_spatial_dimensions(1); - dnums.add_spatial_dimensions(2); + dnums.add_input_spatial_dimensions(1); + dnums.add_output_spatial_dimensions(1); + dnums.add_input_spatial_dimensions(2); + dnums.add_output_spatial_dimensions(2); dnums.set_input_feature_dimension(3); dnums.set_output_feature_dimension(3); @@ -1099,8 +1111,10 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x2x3Input1x2x3x2NoPadding) { // NHWC input format. dnums.set_input_batch_dimension(0); dnums.set_output_batch_dimension(0); - dnums.add_spatial_dimensions(1); - dnums.add_spatial_dimensions(2); + dnums.add_input_spatial_dimensions(1); + dnums.add_output_spatial_dimensions(1); + dnums.add_input_spatial_dimensions(2); + dnums.add_output_spatial_dimensions(2); dnums.set_input_feature_dimension(3); dnums.set_output_feature_dimension(3); @@ -1131,7 +1145,8 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x2x3Input1x2x3x2NoPadding) { // Conv([1,2,3], Reverse([5,6]), padding_low=1) // into // BackwardInputConv([1,2,3], [5,6], padding_low=0, padding_high=1) -XLA_TEST_F(ConvolutionVariantsTest, BackwardInputLowPaddingLessThanHighPadding) { +XLA_TEST_F(ConvolutionVariantsTest, + BackwardInputLowPaddingLessThanHighPadding) { ComputationBuilder builder(client_, TestName()); auto gradients = builder.ConstantR4FromArray4D( @@ -1149,7 +1164,8 @@ XLA_TEST_F(ConvolutionVariantsTest, BackwardInputLowPaddingLessThanHighPadding) // Conv([1], Reverse([1,10,100]), padding_high=3, base_dilation=3) // into // BackwardInputConv([1], [1,10,100], stride=3, padding=(2,1)) -XLA_TEST_F(ConvolutionVariantsTest, BackwardInputLowPaddingGreaterThanHighPadding) { +XLA_TEST_F(ConvolutionVariantsTest, + BackwardInputLowPaddingGreaterThanHighPadding) { ComputationBuilder builder(client_, TestName()); auto gradients = builder.ConstantR4FromArray4D( @@ -1206,7 +1222,8 @@ XLA_TEST_F(ConvolutionVariantsTest, BackwardInputWithNegativePaddingHigh) { ComputeAndCompareR4(&builder, {{{{12, 23, 30, 0}}}}, {}, error_spec_); } -XLA_TEST_F(ConvolutionVariantsTest, BackwardFilterLowPaddingLessThanHighPadding) { +XLA_TEST_F(ConvolutionVariantsTest, + BackwardFilterLowPaddingLessThanHighPadding) { ComputationBuilder builder(client_, TestName()); // activations: 1,2,3,4 ---pad--> 0,1,2,3,4,0,0 @@ -1230,7 +1247,7 @@ XLA_TEST_F(ConvolutionVariantsTest, BackwardFilterLowPaddingLessThanHighPadding) } XLA_TEST_F(ConvolutionVariantsTest, - BackwardFilterLowPaddingGreaterThanHighPadding) { + BackwardFilterLowPaddingGreaterThanHighPadding) { ComputationBuilder builder(client_, TestName()); // activations: 1,2,3,4 ---pad--> 0,0,1,2,3,4 diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index a10497665a..47979ec6f3 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -1685,7 +1685,7 @@ bool HloParser::ParseConvolutionDimensionNumbers( StrCat("expects unique lhs dimension numbers, but sees ", lhs)); } for (int i = 0; i < rank - 2; i++) { - dnums->add_spatial_dimensions(-1); + dnums->add_input_spatial_dimensions(-1); } for (int i = 0; i < rank; i++) { char c = lhs[i]; @@ -1694,7 +1694,7 @@ bool HloParser::ParseConvolutionDimensionNumbers( } else if (c == 'f') { dnums->set_input_feature_dimension(i); } else if (c < '0' + rank && c >= '0') { - dnums->set_spatial_dimensions(c - '0', i); + dnums->set_input_spatial_dimensions(c - '0', i); } else { return TokenError( Printf("expects [0-%lldbf] in lhs dimension numbers", rank - 1)); @@ -1732,6 +1732,9 @@ bool HloParser::ParseConvolutionDimensionNumbers( return TokenError( StrCat("expects unique output dimension numbers, but sees ", out)); } + for (int i = 0; i < rank - 2; i++) { + dnums->add_output_spatial_dimensions(-1); + } for (int i = 0; i < rank; i++) { char c = out[i]; if (c == 'b') { @@ -1739,11 +1742,7 @@ bool HloParser::ParseConvolutionDimensionNumbers( } else if (c == 'f') { dnums->set_output_feature_dimension(i); } else if (c < '0' + rank && c >= '0') { - if (dnums->spatial_dimensions(c - '0') != i) { - return TokenError( - "output spatial dimensions should be the same as input spatial " - "dimensions"); - } + dnums->set_output_spatial_dimensions(c - '0', i); } else { return TokenError( Printf("expects [0-%lldbf] in output dimension numbers", rank - 1)); diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index e56f120def..90cdb87a1e 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -873,12 +873,6 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2 .status() .error_message(), "must have the same rank"); - - ExpectHasSubstr(Parse(StrCat(prefix, ",dim_labels=0bf_io0->b0f", suffix)) - .status() - .error_message(), - "output spatial dimensions should be the same as input " - "spatial dimensions"); } TEST_F(HloParserTest, UnexpectedAttribute) { diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index d3c5a88807..b560354050 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -417,15 +417,9 @@ message ConvolutionDimensionNumbers { // The number of the dimension that represents features in the input. int64 input_feature_dimension = 8; - // The number of the dimension that represents batch in the output. - int64 output_batch_dimension = 9; - - // The number of the dimension that represents features in the output. - int64 output_feature_dimension = 10; - // The dimension numbers for the spatial dimensions that the window - // moves through in the input (lhs) and output. - repeated int64 spatial_dimensions = 5; + // moves through in the input. + repeated int64 input_spatial_dimensions = 11; // The number of the dimension that represents input features in the // convolutional kernel (rhs). @@ -439,6 +433,18 @@ message ConvolutionDimensionNumbers { // moves through in the kernel (rhs). window.strides(0) is the // stride in the kernel_spatial_dimensions(0) dimension. repeated int64 kernel_spatial_dimensions = 6; + + // The number of the dimension that represents batch in the output. + int64 output_batch_dimension = 9; + + // The number of the dimension that represents features in the output. + int64 output_feature_dimension = 10; + + // The dimension numbers for the spatial dimensions that the window + // moves through in the output. + repeated int64 output_spatial_dimensions = 12; + + // Next = 13 }; message ConvolveRequest { -- GitLab From cf0717bfd701d3a11143e00545ced4019b067a51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20=C5=A0uppa?= Date: Tue, 28 Nov 2017 08:39:52 +0100 Subject: [PATCH 0868/1801] softmax_cross_entropy: Improve docstring Improve docstring of `softmax_cross_entropy`. --- tensorflow/python/ops/losses/losses_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py index 55a18d28ca..b74971f654 100644 --- a/tensorflow/python/ops/losses/losses_impl.py +++ b/tensorflow/python/ops/losses/losses_impl.py @@ -652,7 +652,7 @@ def softmax_cross_entropy( Args: onehot_labels: `[batch_size, num_classes]` target one-hot-encoded labels. - logits: [batch_size, num_classes] logits outputs of the network . + logits: `[batch_size, num_classes]` logits outputs of the network . weights: Optional `Tensor` whose rank is either 0, or rank 1 and is broadcastable to the loss which is a `Tensor` of shape `[batch_size]`. label_smoothing: If greater than 0 then smooth the labels. -- GitLab From 64e1459ef218263046fe7afd71b02548fc01383a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Nov 2017 03:46:56 -0800 Subject: [PATCH 0869/1801] Add DeviceFactory to list of exported headers. PiperOrigin-RevId: 177140363 --- tensorflow/core/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 30ff4ef358..4ca6fb1631 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -746,6 +746,7 @@ tf_cuda_library( name = "core_cpu", hdrs = [ "common_runtime/device.h", + "common_runtime/device_factory.h", "common_runtime/optimization_registry.h", "common_runtime/shape_refiner.h", "graph/algorithm.h", -- GitLab From b262375fa67d82d84e8cf9304c4c4d63411a0bc3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Nov 2017 04:40:54 -0800 Subject: [PATCH 0870/1801] Fixed two bugs when importing MetaGraphDefs that contain ResourceVariables. 1) In the ResourceVariable implementation, pass import_scope when creating the SaveSliceInfo. This is present in the implementation of plain variables, and was likely a copy-and-paste omission. 2) When importing a MetaGraphDef, restoring the GLOBAL_VARIABLES and TRAINABLE_VARIABLES collections will add ops to the graph for ResourceVariables. Made graph construction deterministic by fixing the order in which collections are restored. PiperOrigin-RevId: 177144138 --- tensorflow/python/framework/meta_graph.py | 2 +- .../python/framework/meta_graph_test.py | 48 ++++++++++++++++--- .../python/ops/resource_variable_ops.py | 3 +- 3 files changed, 45 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/framework/meta_graph.py b/tensorflow/python/framework/meta_graph.py index a8bc2d2e3f..44ddc013b2 100644 --- a/tensorflow/python/framework/meta_graph.py +++ b/tensorflow/python/framework/meta_graph.py @@ -663,7 +663,7 @@ def import_scoped_meta_graph(meta_graph_or_file, [part for part in [graph.get_name_scope(), import_scope] if part]) # Restores all the other collections. - for key, col_def in meta_graph_def.collection_def.items(): + for key, col_def in sorted(meta_graph_def.collection_def.items()): # Don't add unbound_inputs to the new graph. if key == unbound_inputs_col_name: continue diff --git a/tensorflow/python/framework/meta_graph_test.py b/tensorflow/python/framework/meta_graph_test.py index 06cee46bf6..4c22c913b8 100644 --- a/tensorflow/python/framework/meta_graph_test.py +++ b/tensorflow/python/framework/meta_graph_test.py @@ -662,22 +662,36 @@ class MetaGraphWithVariableScopeTest(test.TestCase): class ExportImportAcrossScopesTest(test.TestCase): def testPartionedVariables(self): - def make_graph_with_partitioned_variables(): + + def make_graph_with_partitioned_variables(use_resource): variable_scope.get_variable( name="weights", partitioner=partitioned_variables.fixed_size_partitioner(3, axis=0), - initializer=random_ops.truncated_normal([100, 10])) - self._testExportImportAcrossScopes(make_graph_with_partitioned_variables) + initializer=random_ops.truncated_normal([100, 10]), + use_resource=use_resource) + # The next variable illustrates the necessity of restoring collections + # in a deterministic fashion when using ResourceVariables. + variable_scope.get_variable( + name="another", + shape=[], + collections=["a", "b", "z", "f", "e", "d", "g"], + use_resource=use_resource) + + self._testExportImportAcrossScopes( + make_graph_with_partitioned_variables, use_resource=False) + self._testExportImportAcrossScopes( + make_graph_with_partitioned_variables, use_resource=True) - def _testExportImportAcrossScopes(self, graph_fn): + def _testExportImportAcrossScopes(self, graph_fn, use_resource): """Tests export and importing a graph across scopes. Args: graph_fn: A closure that creates a graph on the current scope. + use_resource: A bool indicating whether or not to use ResourceVariables. """ with ops.Graph().as_default() as original_graph: with variable_scope.variable_scope("dropA/dropB/keepA"): - graph_fn() + graph_fn(use_resource=use_resource) exported_meta_graph_def = meta_graph.export_scoped_meta_graph( graph=original_graph, export_scope="dropA/dropB")[0] @@ -689,10 +703,32 @@ class ExportImportAcrossScopesTest(test.TestCase): with ops.Graph().as_default() as expected_graph: with variable_scope.variable_scope("importA/keepA"): - graph_fn() + graph_fn(use_resource=use_resource) + + if use_resource: + # Bringing in a collection that contains ResourceVariables adds ops + # to the graph, so mimic the same behavior. + for collection_key in sorted([ + ops.GraphKeys.GLOBAL_VARIABLES, + ops.GraphKeys.TRAINABLE_VARIABLES, + ]): + for var in expected_graph.get_collection(collection_key): + var._read_variable_op() result = meta_graph.export_scoped_meta_graph(graph=imported_graph)[0] expected = meta_graph.export_scoped_meta_graph(graph=expected_graph)[0] + + if use_resource: + # Clear all shared_name attributes before comparing, since they are + # supposed to be orthogonal to scopes. + for meta_graph_def in [result, expected]: + for node in meta_graph_def.graph_def.node: + shared_name_attr = "shared_name" + shared_name_value = node.attr.get(shared_name_attr, None) + if shared_name_value and shared_name_value.HasField("s"): + if shared_name_value.s: + node.attr[shared_name_attr].s = b"" + self.assertProtoEquals(expected, result) diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index a746735f58..343e38f960 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -513,7 +513,8 @@ class ResourceVariable(variables.Variable): self._cached_value = None if variable_def.HasField("save_slice_info_def"): self._save_slice_info = variables.Variable.SaveSliceInfo( - save_slice_info_def=variable_def.save_slice_info_def) + save_slice_info_def=variable_def.save_slice_info_def, + import_scope=import_scope) else: self._save_slice_info = None self._caching_device = None -- GitLab From 6ec7e7680a8a1c5eaf1054a9eb81c8f608aadb90 Mon Sep 17 00:00:00 2001 From: Daniel Ylitalo Date: Tue, 28 Nov 2017 15:30:47 +0100 Subject: [PATCH 0871/1801] Add FreeBSD compatibility --- tensorflow/contrib/lite/kernels/internal/BUILD | 9 +++++++++ tensorflow/core/platform/env.cc | 14 ++++++++++++++ third_party/flatbuffers/flatbuffers.BUILD | 14 ++++++++++++-- 3 files changed, 35 insertions(+), 2 deletions(-) mode change 100644 => 100755 tensorflow/contrib/lite/kernels/internal/BUILD mode change 100644 => 100755 tensorflow/core/platform/env.cc mode change 100644 => 100755 third_party/flatbuffers/flatbuffers.BUILD diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD old mode 100644 new mode 100755 index 288534099b..a3ecb2ebf6 --- a/tensorflow/contrib/lite/kernels/internal/BUILD +++ b/tensorflow/contrib/lite/kernels/internal/BUILD @@ -124,6 +124,13 @@ config_setting( }, ) +config_setting( + name = "freebsd", + values = { + "cpu": "freebsd", + }, +) + cc_library( name = "optimized_base", srcs = [], @@ -147,6 +154,7 @@ cc_library( ":x86": tflite_deps_intel, ":x86_64": tflite_deps_intel, ":darwin": tflite_deps_intel, + ":freebsd": tflite_deps_intel, "//conditions:default": [], }), ) @@ -224,6 +232,7 @@ cc_library( ":x86": tflite_deps_intel, ":x86_64": tflite_deps_intel, ":darwin": tflite_deps_intel, + ":freebsd": tflite_deps_intel, "//conditions:default": [], }), ) diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc old mode 100644 new mode 100755 index 12ef55ec26..5118c4cb59 --- a/tensorflow/core/platform/env.cc +++ b/tensorflow/core/platform/env.cc @@ -20,6 +20,10 @@ limitations under the License. #if defined(__APPLE__) #include #endif +#if defined(__FreeBSD__) +#include +#include +#endif #if defined(PLATFORM_WINDOWS) #include #include "tensorflow/core/platform/windows/windows_file_system.h" @@ -266,6 +270,13 @@ string Env::GetExecutablePath() { char unresolved_path[buffer_size]; _NSGetExecutablePath(unresolved_path, &buffer_size); CHECK(realpath(unresolved_path, exe_path)); +#elif defined(__FreeBSD__) + int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1}; + size_t exe_path_size = PATH_MAX; + + if (sysctl(mib, 4, exe_path, &exe_path_size, NULL, 0) != 0) { + // Not sure what to do if it fails? + } #elif defined(PLATFORM_WINDOWS) HMODULE hModule = GetModuleHandleW(NULL); WCHAR wc_file_path[MAX_PATH] = {0}; @@ -293,6 +304,9 @@ bool Env::LocalTempFilename(string* filename) { pthread_threadid_np(nullptr, &tid64); int32 tid = static_cast(tid64); int32 pid = static_cast(getpid()); +#elif defined(__FreeBSD__) + int32 tid = static_cast((long) pthread_self()); + int32 pid = static_cast(getpid()); #elif defined(PLATFORM_WINDOWS) int32 tid = static_cast(GetCurrentThreadId()); int32 pid = static_cast(GetCurrentProcessId()); diff --git a/third_party/flatbuffers/flatbuffers.BUILD b/third_party/flatbuffers/flatbuffers.BUILD old mode 100644 new mode 100755 index 0a76adcf91..c06c269bb2 --- a/third_party/flatbuffers/flatbuffers.BUILD +++ b/third_party/flatbuffers/flatbuffers.BUILD @@ -4,6 +4,12 @@ package( licenses(["notice"]) # Apache 2.0 +config_setting( + name = "freebsd", + values = {"cpu": "freebsd"}, + visibility = ["//visibility:public"], +) + FLATBUFFERS_COPTS = [ "-fexceptions", ] + select({ @@ -107,10 +113,14 @@ cc_binary( "grpc/", "include/", ], - linkopts = [ + linkopts = select({ + ":freebsd": [ "-lm", - "-ldl", ], + "//conditions:default": [ + "-lm", + "-ldl", + ]}), deps = [ ":flatc_library", ], -- GitLab From 1a53e4a82f1d077859214dab4d4fb84479ae70e6 Mon Sep 17 00:00:00 2001 From: Daniel Ylitalo Date: Tue, 28 Nov 2017 15:33:34 +0100 Subject: [PATCH 0872/1801] change back file permissions --- tensorflow/contrib/lite/kernels/internal/BUILD | 0 tensorflow/core/platform/env.cc | 0 third_party/flatbuffers/flatbuffers.BUILD | 0 3 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 tensorflow/contrib/lite/kernels/internal/BUILD mode change 100755 => 100644 tensorflow/core/platform/env.cc mode change 100755 => 100644 third_party/flatbuffers/flatbuffers.BUILD diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD old mode 100755 new mode 100644 diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc old mode 100755 new mode 100644 diff --git a/third_party/flatbuffers/flatbuffers.BUILD b/third_party/flatbuffers/flatbuffers.BUILD old mode 100755 new mode 100644 -- GitLab From 92d65fe6d71b5b80c130f9d9fb4474c4587f2855 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Tue, 28 Nov 2017 09:42:43 -0800 Subject: [PATCH 0873/1801] Add `ConfigProto.isolate_session_state` option for the distributed runtime. Setting this option to true when creating a session ensures that no stateful resources (variables, queues, iterators, etc.) will be visible to any other session running on the same server, and those resources will be deleted when the session is closed. The default behavior, namely that all `tf.Variable` objects are shared by default and most other resources are shared when their `shared_name` attr is non-empty, is preserved. This change augments the semantics of the WorkerService.CreateWorkerSession RPC. Now, if the server_def in the request is empty, it implies that the worker should use its default ClusterSpec. Note that clusters created using ClusterSpec propagation always have isolated session state, and are unaffected by this change. PiperOrigin-RevId: 177173545 --- tensorflow/core/common_runtime/device.h | 2 +- .../core/common_runtime/renamed_device.cc | 11 ++- .../core/common_runtime/renamed_device.h | 16 +++- tensorflow/core/distributed_runtime/BUILD | 12 +++ .../distributed_runtime/master_session.cc | 32 ++++--- .../core/distributed_runtime/session_mgr.cc | 23 +++-- .../core/distributed_runtime/session_mgr.h | 4 +- .../distributed_runtime/session_mgr_test.cc | 66 ++++++++++++-- tensorflow/core/distributed_runtime/worker.cc | 3 +- .../worker_cache_wrapper.h | 90 +++++++++++++++++++ tensorflow/core/protobuf/config.proto | 6 +- tensorflow/core/protobuf/worker.proto | 4 + .../client/session_clusterspec_prop_test.py | 43 +++++++++ tensorflow/python/training/server_lib_test.py | 89 ++++++++++++++++++ .../api/golden/tensorflow.-config-proto.pbtxt | 4 + 15 files changed, 374 insertions(+), 31 deletions(-) create mode 100644 tensorflow/core/distributed_runtime/worker_cache_wrapper.h diff --git a/tensorflow/core/common_runtime/device.h b/tensorflow/core/common_runtime/device.h index 3912cd177b..d5a452a796 100644 --- a/tensorflow/core/common_runtime/device.h +++ b/tensorflow/core/common_runtime/device.h @@ -131,7 +131,7 @@ class Device : public DeviceBase { OpSegment* op_segment() { return &op_seg_; } // Returns the resource manager associated w/ this device. - ResourceMgr* resource_manager() { return rmgr_; } + virtual ResourceMgr* resource_manager() { return rmgr_; } // Summarizes the status of this Device, for debugging. string DebugString() const { return ProtoDebugString(device_attributes_); } diff --git a/tensorflow/core/common_runtime/renamed_device.cc b/tensorflow/core/common_runtime/renamed_device.cc index fa9713735e..56766a8df4 100644 --- a/tensorflow/core/common_runtime/renamed_device.cc +++ b/tensorflow/core/common_runtime/renamed_device.cc @@ -21,7 +21,8 @@ namespace tensorflow { /* static */ Device* RenamedDevice::NewRenamedDevice(const string& new_base, Device* underlying, - bool owns_underlying) { + bool owns_underlying, + bool isolate_session_state) { DeviceNameUtils::ParsedName parsed_name; CHECK(DeviceNameUtils::ParseFullName(new_base, &parsed_name)); DeviceNameUtils::ParsedName underlying_parsed_name = @@ -35,15 +36,17 @@ Device* RenamedDevice::NewRenamedDevice(const string& new_base, parsed_name.id); DeviceAttributes attributes(underlying->attributes()); attributes.set_name(name); - return new RenamedDevice(underlying, attributes, owns_underlying); + return new RenamedDevice(underlying, attributes, owns_underlying, + isolate_session_state); } RenamedDevice::RenamedDevice(Device* underlying, const DeviceAttributes& attributes, - bool owns_underlying) + bool owns_underlying, bool isolate_session_state) : Device(underlying->env(), attributes), underlying_(underlying), - owns_underlying_(owns_underlying) {} + owns_underlying_(owns_underlying), + isolate_session_state_(isolate_session_state) {} RenamedDevice::~RenamedDevice() { if (owns_underlying_) { diff --git a/tensorflow/core/common_runtime/renamed_device.h b/tensorflow/core/common_runtime/renamed_device.h index 3103ca0751..c5c204d4fa 100644 --- a/tensorflow/core/common_runtime/renamed_device.h +++ b/tensorflow/core/common_runtime/renamed_device.h @@ -29,7 +29,9 @@ namespace tensorflow { class RenamedDevice : public Device { public: static Device* NewRenamedDevice(const string& new_base, Device* underlying, - bool owns_underlying); + bool owns_underlying, + bool isolate_session_state); + ~RenamedDevice() override; // Below are virtual methods defined on DeviceBase @@ -113,11 +115,21 @@ class RenamedDevice : public Device { return underlying_->FillContextMap(graph, device_context_map); } + // Returns the resource manager associated w/ this device. + ResourceMgr* resource_manager() override { + if (isolate_session_state_) { + return Device::resource_manager(); + } else { + return underlying_->resource_manager(); + } + } + private: RenamedDevice(Device* underlying, const DeviceAttributes& attributes, - bool owns_underlying); + bool owns_underlying, bool isolate_session_state); Device* const underlying_; const bool owns_underlying_; + const bool isolate_session_state_; }; } // namespace tensorflow diff --git a/tensorflow/core/distributed_runtime/BUILD b/tensorflow/core/distributed_runtime/BUILD index 93adc7ef4f..29164bbffe 100644 --- a/tensorflow/core/distributed_runtime/BUILD +++ b/tensorflow/core/distributed_runtime/BUILD @@ -140,6 +140,7 @@ cc_library( hdrs = ["session_mgr.h"], deps = [ ":graph_mgr", + ":worker_cache_wrapper", ":worker_session", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:lib", @@ -263,6 +264,17 @@ cc_library( ], ) +cc_library( + name = "worker_cache_wrapper", + hdrs = ["worker_cache_wrapper.h"], + deps = [ + ":worker_cache", + ":worker_interface", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + ], +) + cc_library( name = "remote_device", srcs = ["remote_device.cc"], diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc index 3379302b9b..03b65d8cba 100644 --- a/tensorflow/core/distributed_runtime/master_session.cc +++ b/tensorflow/core/distributed_runtime/master_session.cc @@ -1049,7 +1049,10 @@ Status MasterSession::Create(GraphDef* graph_def, TF_RETURN_IF_ERROR(GraphExecutionState::MakeForBaseGraph( graph_def, execution_options, &execution_state_)); } - if (options.cluster_def != nullptr) { + // TODO(b/36574172): Remove these conditions when ClusterSpec + // propagation is supported in all servers. + if (options.cluster_def != nullptr || + session_opts_.config.isolate_session_state()) { should_delete_worker_sessions_ = true; return CreateWorkerSessions(options); } @@ -1058,10 +1061,9 @@ Status MasterSession::Create(GraphDef* graph_def, Status MasterSession::CreateWorkerSessions( const WorkerCacheFactoryOptions& options) { - CHECK(worker_cache_) << "CreateWorkerSessions should be called only with " - << "dynamic cluster membership."; std::vector worker_names; - worker_cache_->ListWorkers(&worker_names); + WorkerCacheInterface* worker_cache = get_worker_cache(); + worker_cache->ListWorkers(&worker_names); struct WorkerGroup { // The worker name. (Not owned.) @@ -1079,10 +1081,10 @@ Status MasterSession::CreateWorkerSessions( std::vector workers(worker_names.size()); // Release the workers. - auto cleanup = gtl::MakeCleanup([this, &workers] { + auto cleanup = gtl::MakeCleanup([this, &workers, worker_cache] { for (auto&& worker_group : workers) { if (worker_group.worker != nullptr) { - worker_cache_->ReleaseWorker(*worker_group.name, worker_group.worker); + worker_cache->ReleaseWorker(*worker_group.name, worker_group.worker); } } }); @@ -1091,11 +1093,19 @@ Status MasterSession::CreateWorkerSessions( // Create all the workers & kick off the computations. for (size_t i = 0; i < worker_names.size(); ++i) { workers[i].name = &worker_names[i]; - workers[i].worker = worker_cache_->CreateWorker(worker_names[i]); + workers[i].worker = worker_cache->CreateWorker(worker_names[i]); workers[i].request.set_session_handle(handle_); - *workers[i].request.mutable_server_def()->mutable_cluster() = - *options.cluster_def; - workers[i].request.mutable_server_def()->set_protocol(*options.protocol); + if (options.cluster_def) { + *workers[i].request.mutable_server_def()->mutable_cluster() = + *options.cluster_def; + workers[i].request.mutable_server_def()->set_protocol(*options.protocol); + // Session state is always isolated when ClusterSpec propagation + // is in use. + workers[i].request.set_isolate_session_state(true); + } else { + workers[i].request.set_isolate_session_state( + session_opts_.config.isolate_session_state()); + } DeviceNameUtils::ParsedName name; if (!DeviceNameUtils::ParseFullName(worker_names[i], &name)) { @@ -1162,7 +1172,7 @@ Status MasterSession::DeleteWorkerSessions() { // Create all the workers & kick off the computations. for (size_t i = 0; i < worker_names.size(); ++i) { workers[i].name = &worker_names[i]; - workers[i].worker = worker_cache_->CreateWorker(worker_names[i]); + workers[i].worker = worker_cache->CreateWorker(worker_names[i]); workers[i].request.set_session_handle(handle_); } diff --git a/tensorflow/core/distributed_runtime/session_mgr.cc b/tensorflow/core/distributed_runtime/session_mgr.cc index b97749dc41..fabcbd00f5 100644 --- a/tensorflow/core/distributed_runtime/session_mgr.cc +++ b/tensorflow/core/distributed_runtime/session_mgr.cc @@ -20,7 +20,10 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/renamed_device.h" #include "tensorflow/core/distributed_runtime/graph_mgr.h" +#include "tensorflow/core/distributed_runtime/worker_cache_wrapper.h" #include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/protobuf/cluster.pb.h" +#include "tensorflow/core/protobuf/tensorflow_server.pb.h" namespace tensorflow { @@ -29,7 +32,10 @@ SessionMgr::SessionMgr( std::unique_ptr default_worker_cache, WorkerCacheFactory worker_cache_factory) : worker_env_(worker_env), - legacy_session_("", default_worker_name, std::move(default_worker_cache), + default_worker_cache_(std::move(default_worker_cache)), + legacy_session_("", default_worker_name, + std::unique_ptr( + new WorkerCacheWrapper(default_worker_cache_.get())), std::unique_ptr(worker_env->device_mgr), std::unique_ptr( new GraphMgr(worker_env, worker_env->device_mgr))), @@ -41,7 +47,8 @@ string SessionMgr::WorkerNameFromServerDef(const ServerDef& server_def) { } Status SessionMgr::CreateSession(const string& session, - const ServerDef& server_def) { + const ServerDef& server_def, + bool isolate_session_state) { mutex_lock l(mu_); if (session.empty()) { return errors::InvalidArgument("Session must be non-empty."); @@ -50,12 +57,18 @@ Status SessionMgr::CreateSession(const string& session, const string worker_name = WorkerNameFromServerDef(server_def); WorkerCacheInterface* worker_cache = nullptr; - TF_RETURN_IF_ERROR(worker_cache_factory_(server_def, &worker_cache)); + if (server_def.cluster().job().empty()) { + worker_cache = new WorkerCacheWrapper(default_worker_cache_.get()); + } else { + TF_RETURN_IF_ERROR(worker_cache_factory_(server_def, &worker_cache)); + } + CHECK(!worker_env_->local_devices.empty()) + << "The WorkerEnv must have at least one device in `local_devices`."; std::vector renamed_devices; for (Device* d : worker_env_->local_devices) { - renamed_devices.push_back( - RenamedDevice::NewRenamedDevice(worker_name, d, false)); + renamed_devices.push_back(RenamedDevice::NewRenamedDevice( + worker_name, d, false, isolate_session_state)); } std::unique_ptr device_mgr(new DeviceMgr(renamed_devices)); diff --git a/tensorflow/core/distributed_runtime/session_mgr.h b/tensorflow/core/distributed_runtime/session_mgr.h index c44bca7b7a..d85b6c3059 100644 --- a/tensorflow/core/distributed_runtime/session_mgr.h +++ b/tensorflow/core/distributed_runtime/session_mgr.h @@ -45,7 +45,8 @@ class SessionMgr { ~SessionMgr() {} // Allocates state for a new session. - Status CreateSession(const string& session, const ServerDef& server_def); + Status CreateSession(const string& session, const ServerDef& server_def, + bool isolate_session_state); // Locates the worker session for a given session handle WorkerSession* WorkerSessionForSession(const string& session); @@ -71,6 +72,7 @@ class SessionMgr { // legacy_session_ is deleted. Further, we must ensure that WorkerSession's // device_mgr is deleted after WorkerSession's graph_mgr. + std::unique_ptr default_worker_cache_; WorkerSession legacy_session_; const WorkerCacheFactory worker_cache_factory_; diff --git a/tensorflow/core/distributed_runtime/session_mgr_test.cc b/tensorflow/core/distributed_runtime/session_mgr_test.cc index 7132f123a5..ffe4809f2b 100644 --- a/tensorflow/core/distributed_runtime/session_mgr_test.cc +++ b/tensorflow/core/distributed_runtime/session_mgr_test.cc @@ -22,14 +22,36 @@ limitations under the License. namespace tensorflow { +class FakeDevice : public Device { + private: + explicit FakeDevice(const DeviceAttributes& device_attributes) + : Device(nullptr, device_attributes) {} + + public: + Status Sync() override { return errors::Unimplemented("FakeDevice::Sync()"); } + + Allocator* GetAllocator(AllocatorAttributes attr) override { return nullptr; } + + static std::unique_ptr MakeCPU(const string& name) { + DeviceAttributes device_attributes; + device_attributes.set_name(name); + device_attributes.set_device_type(DeviceType("FakeCPU").type()); + return std::unique_ptr(new FakeDevice(device_attributes)); + } +}; + class SessionMgrTest : public ::testing::Test { protected: SessionMgrTest() - : mgr_(&env_, "/job:mnist/replica:0/task:0", - std::unique_ptr(), - factory_), - legacy_session_(mgr_.WorkerSessionForSession("novel_session_id")) {} + : device_(FakeDevice::MakeCPU( + "/job:mnist/replica:0/task:0/device:fakecpu:0")), + mgr_(&env_, "/job:mnist/replica:0/task:0", + std::unique_ptr(), factory_), + legacy_session_(mgr_.WorkerSessionForSession("novel_session_id")) { + env_.local_devices = {device_.get()}; + } + std::unique_ptr device_; WorkerEnv env_; SessionMgr::WorkerCacheFactory factory_ = [](const ServerDef& server_def, WorkerCacheInterface** worker_cache) { @@ -42,14 +64,48 @@ class SessionMgrTest : public ::testing::Test { TEST_F(SessionMgrTest, CreateSessionSimple) { ServerDef server_def; + server_def.set_job_name("worker"); + server_def.set_task_index(3); + string session_handle = "test_session_handle"; - TF_EXPECT_OK(mgr_.CreateSession(session_handle, server_def)); + TF_EXPECT_OK(mgr_.CreateSession(session_handle, server_def, true)); WorkerSession* session = mgr_.WorkerSessionForSession(session_handle); EXPECT_NE(nullptr, session) << "Session for " << session_handle << "was null"; EXPECT_NE(mgr_.LegacySession(), session); TF_EXPECT_OK(mgr_.DeleteSession(session_handle)); } +TEST_F(SessionMgrTest, CreateSessionIsolateSessionState) { + ServerDef server_def; + server_def.set_job_name("worker"); + server_def.set_task_index(3); + + TF_EXPECT_OK(mgr_.CreateSession("handle_1", server_def, false)); + WorkerSession* session_1 = mgr_.WorkerSessionForSession("handle_1"); + std::vector devices_1 = session_1->device_mgr->ListDevices(); + EXPECT_EQ(1, devices_1.size()); + + TF_EXPECT_OK(mgr_.CreateSession("handle_2", server_def, false)); + WorkerSession* session_2 = mgr_.WorkerSessionForSession("handle_2"); + std::vector devices_2 = session_2->device_mgr->ListDevices(); + EXPECT_EQ(1, devices_2.size()); + + TF_EXPECT_OK(mgr_.CreateSession("handle_3", server_def, true)); + WorkerSession* session_3 = mgr_.WorkerSessionForSession("handle_3"); + std::vector devices_3 = session_3->device_mgr->ListDevices(); + EXPECT_EQ(1, devices_3.size()); + + TF_EXPECT_OK(mgr_.CreateSession("handle_4", server_def, true)); + WorkerSession* session_4 = mgr_.WorkerSessionForSession("handle_4"); + std::vector devices_4 = session_4->device_mgr->ListDevices(); + EXPECT_EQ(1, devices_4.size()); + + EXPECT_EQ(devices_1[0]->resource_manager(), devices_2[0]->resource_manager()); + EXPECT_NE(devices_1[0]->resource_manager(), devices_3[0]->resource_manager()); + EXPECT_NE(devices_1[0]->resource_manager(), devices_4[0]->resource_manager()); + EXPECT_NE(devices_3[0]->resource_manager(), devices_4[0]->resource_manager()); +} + TEST_F(SessionMgrTest, LegacySession) { ServerDef server_def; string session_handle = ""; diff --git a/tensorflow/core/distributed_runtime/worker.cc b/tensorflow/core/distributed_runtime/worker.cc index 8bf87923ed..6cd92f5fe7 100644 --- a/tensorflow/core/distributed_runtime/worker.cc +++ b/tensorflow/core/distributed_runtime/worker.cc @@ -44,7 +44,8 @@ void Worker::CreateWorkerSessionAsync(const CreateWorkerSessionRequest* request, CreateWorkerSessionResponse* response, StatusCallback done) { Status s = env_->session_mgr->CreateSession(request->session_handle(), - request->server_def()); + request->server_def(), + request->isolate_session_state()); done(s); } diff --git a/tensorflow/core/distributed_runtime/worker_cache_wrapper.h b/tensorflow/core/distributed_runtime/worker_cache_wrapper.h new file mode 100644 index 0000000000..43c3b6285b --- /dev/null +++ b/tensorflow/core/distributed_runtime/worker_cache_wrapper.h @@ -0,0 +1,90 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_WORKER_CACHE_WRAPPER_H_ +#define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_WORKER_CACHE_WRAPPER_H_ + +#include +#include + +#include "tensorflow/core/distributed_runtime/worker_cache.h" + +namespace tensorflow { + +class WorkerCacheWrapper : public WorkerCacheInterface { + public: + WorkerCacheWrapper(WorkerCacheInterface* wrapped) : wrapped_(wrapped) {} + + // Updates *workers with strings naming the remote worker tasks to + // which open channels have been established. + virtual void ListWorkers(std::vector* workers) const { + return wrapped_->ListWorkers(workers); + } + + // If "target" names a remote task for which an RPC channel exists + // or can be constructed, returns a pointer to a WorkerInterface object + // wrapping that channel. The returned value must be destroyed by + // calling `this->ReleaseWorker(target, ret)` + // TODO(mrry): rename this to GetOrCreateWorker() or something that + // makes it more obvious that this method returns a potentially + // shared object. + virtual WorkerInterface* CreateWorker(const string& target) { + return wrapped_->CreateWorker(target); + } + + // Release a worker previously returned by this->CreateWorker(target). + // + // TODO(jeff,sanjay): Consider moving target into WorkerInterface. + // TODO(jeff,sanjay): Unify all worker-cache impls and factor out a + // per-rpc-subsystem WorkerInterface creator. + virtual void ReleaseWorker(const string& target, WorkerInterface* worker) { + return wrapped_->ReleaseWorker(target, worker); + } + + // Set *locality with the DeviceLocality of the specified remote device + // within its local environment. Returns true if *locality + // was set, using only locally cached data. Returns false + // if status data for that device was not available. Never blocks. + virtual bool GetDeviceLocalityNonBlocking(const string& device, + DeviceLocality* locality) { + return wrapped_->GetDeviceLocalityNonBlocking(device, locality); + } + + // Set *locality with the DeviceLocality of the specified remote device + // within its local environment. Callback gets Status::OK if *locality + // was set. + virtual void GetDeviceLocalityAsync(const string& device, + DeviceLocality* locality, + StatusCallback done) { + return wrapped_->GetDeviceLocalityAsync(device, locality, std::move(done)); + } + + // Start/stop logging activity. + virtual void SetLogging(bool active) { wrapped_->SetLogging(active); } + + // Discard any saved log data. + virtual void ClearLogs() { wrapped_->ClearLogs(); } + + // Return logs for the identified step in *ss. Any returned data will no + // longer be stored. + virtual bool RetrieveLogs(int64 step_id, StepStats* ss) { + return wrapped_->RetrieveLogs(step_id, ss); + } + + private: + WorkerCacheInterface* wrapped_; // Not owned. +}; +} // namespace tensorflow +#endif // TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_WORKER_CACHE_WRAPPER_H_ diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto index a956aab3dc..1916316245 100644 --- a/tensorflow/core/protobuf/config.proto +++ b/tensorflow/core/protobuf/config.proto @@ -303,7 +303,11 @@ message ConfigProto { // Optional list of all workers to use in this session. ClusterDef cluster_def = 14; - // Next: 15 + // If true, any resources such as Variables used in the session will not be + // shared with other sessions. + bool isolate_session_state = 15; + + // Next: 16 }; // Options for a single Run() call. diff --git a/tensorflow/core/protobuf/worker.proto b/tensorflow/core/protobuf/worker.proto index e7b3f36fcc..385e2dd163 100644 --- a/tensorflow/core/protobuf/worker.proto +++ b/tensorflow/core/protobuf/worker.proto @@ -59,6 +59,10 @@ message CreateWorkerSessionRequest { // Defines the configuration of a TensorFlow worker. ServerDef server_def = 2; + + // If true, any resources such as Variables used in the session will not be + // shared with other sessions. + bool isolate_session_state = 3; } message CreateWorkerSessionResponse { diff --git a/tensorflow/python/client/session_clusterspec_prop_test.py b/tensorflow/python/client/session_clusterspec_prop_test.py index 28a4dd27a7..c85b22eb15 100644 --- a/tensorflow/python/client/session_clusterspec_prop_test.py +++ b/tensorflow/python/client/session_clusterspec_prop_test.py @@ -29,6 +29,7 @@ from tensorflow.python.client import session from tensorflow.python.framework import common_shapes from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops @@ -415,6 +416,48 @@ class SessionClusterSpecPropagationTest(test_util.TensorFlowTestCase): node_stats.node_name.startswith('Const') ]), run_metadata) + def testClusterSpecPropagationIsolation(self): + """Test that two sessions using ClusterSpec propagation are isolated.""" + server = server_lib.Server.create_local_server() + init_value = array_ops.placeholder(dtypes.int32, shape=[]) + v = variables.Variable(init_value) + + cluster_def = cluster_pb2.ClusterDef() + job = cluster_def.job.add() + job.name = 'worker' + job.tasks[0] = server.target[len('grpc://'):] + config = config_pb2.ConfigProto(cluster_def=cluster_def) + + sess1 = session.Session(server.target, config=config) + sess2 = session.Session(server.target, config=config) + + # Initially, the variable is uninitialized in both sessions. + with self.assertRaises(errors.FailedPreconditionError): + sess1.run(v) + with self.assertRaises(errors.FailedPreconditionError): + sess2.run(v) + + # An update in sess1 should be visible in sess1 only. + sess1.run(v.initializer, feed_dict={init_value: 37}) + self.assertEqual(37, sess1.run(v)) + with self.assertRaises(errors.FailedPreconditionError): + sess2.run(v) + + # An update in sess2 should be visible in sess2 only. + sess2.run(v.initializer, feed_dict={init_value: 86}) + self.assertEqual(37, sess1.run(v)) + self.assertEqual(86, sess2.run(v)) + + # Closing sess2 has no effect on the state of sess1. + sess2.close() + self.assertEqual(37, sess1.run(v)) + + # Subsequent sessions will not see the state of existing sessions. + sess3 = session.Session(server.target, config=config) + self.assertEqual(37, sess1.run(v)) + with self.assertRaises(errors.FailedPreconditionError): + sess3.run(v) + @test_util.disable_c_api # Partial runs don't work with C API def testClusterSpecPropagationPartialRun(self): """Test successful partial run with ClusterSpec propagation.""" diff --git a/tensorflow/python/training/server_lib_test.py b/tensorflow/python/training/server_lib_test.py index 0a8ec4901c..26aac787ed 100644 --- a/tensorflow/python/training/server_lib_test.py +++ b/tensorflow/python/training/server_lib_test.py @@ -241,6 +241,95 @@ class GrpcServerTest(test.TestCase): queue_runner_impl.start_queue_runners(sess) sess.run(var.assign(3.0)) + def testIsolateSessionState(self): + server = self._cached_server + + init_value = array_ops.placeholder(dtypes.int32) + v = variables.Variable(init_value, validate_shape=False, name="v") + + sharing_config = config_pb2.ConfigProto(isolate_session_state=False) + sharing_sess_0 = session.Session(server.target, config=sharing_config) + sharing_sess_1 = session.Session(server.target, config=sharing_config) + + isolate_config = config_pb2.ConfigProto(isolate_session_state=True) + isolate_sess_0 = session.Session(server.target, config=isolate_config) + isolate_sess_1 = session.Session(server.target, config=isolate_config) + + # Initially all variables are initialized. + for sess in [sharing_sess_0, sharing_sess_1, + isolate_sess_0, isolate_sess_1]: + with self.assertRaises(errors_impl.FailedPreconditionError): + sess.run(v) + + # Shared sessions will see each other's updates, but isolated sessions + # will not. + sharing_sess_0.run(v.initializer, feed_dict={init_value: 86}) + self.assertAllEqual(86, sharing_sess_0.run(v)) + self.assertAllEqual(86, sharing_sess_1.run(v)) + with self.assertRaises(errors_impl.FailedPreconditionError): + isolate_sess_0.run(v) + with self.assertRaises(errors_impl.FailedPreconditionError): + isolate_sess_1.run(v) + + # Changing the shape works because `validate_shape` is False. + sharing_sess_1.run(v.initializer, feed_dict={init_value: [86, 99]}) + self.assertAllEqual([86, 99], sharing_sess_0.run(v)) + self.assertAllEqual([86, 99], sharing_sess_1.run(v)) + with self.assertRaises(errors_impl.FailedPreconditionError): + isolate_sess_0.run(v) + with self.assertRaises(errors_impl.FailedPreconditionError): + isolate_sess_1.run(v) + + # Initializing in an isolated session will only affect the state in that + # session. + isolate_sess_0.run(v.initializer, feed_dict={init_value: 37}) + self.assertAllEqual([86, 99], sharing_sess_0.run(v)) + self.assertAllEqual([86, 99], sharing_sess_1.run(v)) + self.assertAllEqual(37, isolate_sess_0.run(v)) + with self.assertRaises(errors_impl.FailedPreconditionError): + isolate_sess_1.run(v) + + # Isolated sessions can have different shapes for the same variable. + isolate_sess_1.run(v.initializer, feed_dict={init_value: [19, 86]}) + self.assertAllEqual([86, 99], sharing_sess_0.run(v)) + self.assertAllEqual([86, 99], sharing_sess_1.run(v)) + self.assertAllEqual(37, isolate_sess_0.run(v)) + self.assertAllEqual([19, 86], isolate_sess_1.run(v)) + + def testShapeChangingIsolateState(self): + server = self._cached_server + sharing_config = config_pb2.ConfigProto(isolate_session_state=False) + isolate_config = config_pb2.ConfigProto(isolate_session_state=True) + + with ops.Graph().as_default(): + w_vector = variables.Variable([1, 2, 3], name="w") + with session.Session(server.target, config=sharing_config) as sess: + with self.assertRaises(errors_impl.FailedPreconditionError): + sess.run(w_vector) + sess.run(w_vector.initializer) + self.assertAllEqual([1, 2, 3], sess.run(w_vector)) + + with ops.Graph().as_default(): + w_vector = variables.Variable([4, 5, 6], name="w") + with session.Session(server.target, config=sharing_config) as sess: + self.assertAllEqual([1, 2, 3], sess.run(w_vector)) + sess.run(w_vector.initializer) + self.assertAllEqual([4, 5, 6], sess.run(w_vector)) + + with ops.Graph().as_default(): + w_scalar = variables.Variable(86, name="w") + with session.Session(server.target, config=sharing_config) as sess: + with self.assertRaises(errors_impl.InvalidArgumentError): + sess.run(w_scalar.initializer) + + with ops.Graph().as_default(): + w_scalar = variables.Variable(37, name="w") + with session.Session(server.target, config=isolate_config) as sess: + with self.assertRaises(errors_impl.FailedPreconditionError): + sess.run(w_scalar) + sess.run(w_scalar.initializer) + self.assertAllEqual(37, sess.run(w_scalar)) + class ServerDefTest(test.TestCase): diff --git a/tensorflow/tools/api/golden/tensorflow.-config-proto.pbtxt b/tensorflow/tools/api/golden/tensorflow.-config-proto.pbtxt index da6af3919e..009d64aed0 100644 --- a/tensorflow/tools/api/golden/tensorflow.-config-proto.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-config-proto.pbtxt @@ -46,6 +46,10 @@ tf_class { name: "INTRA_OP_PARALLELISM_THREADS_FIELD_NUMBER" mtype: "" } + member { + name: "ISOLATE_SESSION_STATE_FIELD_NUMBER" + mtype: "" + } member { name: "LOG_DEVICE_PLACEMENT_FIELD_NUMBER" mtype: "" -- GitLab From 4e9fa6dcce4912a4797c48f4cb55d3564961bfca Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Nov 2017 09:44:25 -0800 Subject: [PATCH 0874/1801] Adapt upstream API change from r319082. PiperOrigin-RevId: 177173806 --- .../xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc index 96981534d5..059943d48c 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc @@ -34,7 +34,7 @@ limitations under the License. #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" -#include "llvm/CodeGen/CommandFlags.h" +#include "llvm/CodeGen/CommandFlags.def" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" -- GitLab From 82fa1e1ae5b2f8af642979fafb1cab455db1882f Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Tue, 28 Nov 2017 09:53:51 -0800 Subject: [PATCH 0875/1801] Adding support for `tf.SparseTensorValue` and `tf.SparseTensor` as inputs and outputs for various `tf.data` operations. In particular: - adding support for `tf.SparseTensorValue` as output type of `tf.data.Dataset.map()`. - adding support for both `tf.SparseTensor` and `tf.SparseTensorValue` as inputs to `tf.data.from_tensors()`. - adding support for both `tf.SparseTensor` and `tf.SparseTensorValue` as inputs to `tf.data.from_tensor_slices()`. PiperOrigin-RevId: 177175439 --- .../contrib/data/python/kernel_tests/BUILD | 1 + .../kernel_tests/batch_dataset_op_test.py | 36 ++-- .../dataset_constructor_op_test.py | 197 +++++++++++++++++- .../kernel_tests/filter_dataset_op_test.py | 11 +- .../kernel_tests/flat_map_dataset_op_test.py | 2 +- .../interleave_dataset_op_test.py | 3 +- .../kernel_tests/map_dataset_op_test.py | 32 +-- .../contrib/data/python/ops/dataset_ops.py | 1 - tensorflow/contrib/data/python/ops/readers.py | 2 +- tensorflow/python/data/ops/dataset_ops.py | 60 ++++-- tensorflow/python/data/util/nest.py | 17 +- tensorflow/python/data/util/nest_test.py | 16 +- tensorflow/python/framework/sparse_tensor.py | 17 +- .../python/framework/sparse_tensor_test.py | 12 ++ tensorflow/python/framework/tensor_util.py | 2 +- .../kernel_tests/batch_dataset_op_test.py | 26 +-- .../dataset_constructor_op_test.py | 197 +++++++++++++++++- .../kernel_tests/filter_dataset_op_test.py | 11 +- .../kernel_tests/flat_map_dataset_op_test.py | 2 +- .../interleave_dataset_op_test.py | 3 +- .../kernel_tests/map_dataset_op_test.py | 37 ++-- 21 files changed, 571 insertions(+), 114 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 3efe5274f4..0790a4a737 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -110,6 +110,7 @@ py_test( "//tensorflow/python:resource_variable_ops", "//tensorflow/python:session", "//tensorflow/python:sparse_tensor", + "//tensorflow/python:tensor_shape", "//tensorflow/python/data/util:nest", "//third_party/py/numpy", ], diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py index b0064f8ae7..a939b3c841 100644 --- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py @@ -112,7 +112,7 @@ class BatchDatasetTest(test.TestCase): def testBatchSparse(self): def _sparse(i): - return sparse_tensor.SparseTensor( + return sparse_tensor.SparseTensorValue( indices=[[0]], values=(i * [1]), dense_shape=[1]) iterator = dataset_ops.Dataset.range(10).map(_sparse).batch( @@ -124,19 +124,19 @@ class BatchDatasetTest(test.TestCase): sess.run(init_op) for i in range(2): actual = sess.run(get_next) - expected = sparse_tensor.SparseTensor( + expected = sparse_tensor.SparseTensorValue( indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]], values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4], dense_shape=[5, 1]) - self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) - self.assertSparseValuesEqual(actual, expected.eval()) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) def testNestedBatchSparse(self): def _sparse(i): - return sparse_tensor.SparseTensor( + return sparse_tensor.SparseTensorValue( indices=[[0]], values=(i * [1]), dense_shape=[1]) iterator = dataset_ops.Dataset.range(10).map(_sparse).batch(5).batch( @@ -147,13 +147,13 @@ class BatchDatasetTest(test.TestCase): with self.test_session() as sess: sess.run(init_op) actual = sess.run(get_next) - expected = sparse_tensor.SparseTensor( + expected = sparse_tensor.SparseTensorValue( indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], [0, 4, 0], [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0], [1, 4, 0]], values=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dense_shape=[2, 5, 1]) - self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) - self.assertSparseValuesEqual(actual, expected.eval()) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) @@ -294,7 +294,7 @@ class BatchDatasetTest(test.TestCase): def testPaddedBatchSparseError(self): def _map_fn(i): - return sparse_tensor.SparseTensor( + return sparse_tensor.SparseTensorValue( indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i with self.assertRaises(TypeError): @@ -485,7 +485,7 @@ class BatchDatasetTest(test.TestCase): def testBatchAndDropRemainderSparse(self): def _sparse(i): - return sparse_tensor.SparseTensor( + return sparse_tensor.SparseTensorValue( indices=[[0]], values=(i * [1]), dense_shape=[1]) iterator = dataset_ops.Dataset.range(12).map(_sparse).apply( @@ -497,12 +497,12 @@ class BatchDatasetTest(test.TestCase): sess.run(init_op) for i in range(2): actual = sess.run(get_next) - expected = sparse_tensor.SparseTensor( + expected = sparse_tensor.SparseTensorValue( indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]], values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4], dense_shape=[5, 1]) - self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) - self.assertSparseValuesEqual(actual, expected.eval()) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) @@ -545,7 +545,7 @@ class BatchDatasetTest(test.TestCase): def testPaddedBatchAndDropRemainderSparseError(self): def _map_fn(i): - return sparse_tensor.SparseTensor( + return sparse_tensor.SparseTensorValue( indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i with self.assertRaises(TypeError): @@ -643,7 +643,7 @@ class BatchDatasetTest(test.TestCase): def testMapAndBatchSparse(self): def _sparse(i): - return sparse_tensor.SparseTensor( + return sparse_tensor.SparseTensorValue( indices=[[0]], values=(i * [1]), dense_shape=[1]) iterator = dataset_ops.Dataset.range(10).apply( @@ -655,12 +655,12 @@ class BatchDatasetTest(test.TestCase): sess.run(init_op) for i in range(2): actual = sess.run(get_next) - expected = sparse_tensor.SparseTensor( + expected = sparse_tensor.SparseTensorValue( indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]], values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4], dense_shape=[5, 1]) - self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) - self.assertSparseValuesEqual(actual, expected.eval()) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) diff --git a/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py b/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py index 0f1c8838ca..55a1d3b95b 100644 --- a/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py @@ -31,6 +31,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops @@ -39,7 +40,7 @@ from tensorflow.python.platform import test class DatasetConstructorTest(test.TestCase): - def testTensorDataset(self): + def testFromTensors(self): """Test an dataset that represents a single tuple of tensors.""" components = (np.array(1), np.array([1, 2, 3]), np.array(37.0)) @@ -59,7 +60,75 @@ class DatasetConstructorTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) - def testTensorSliceDataset(self): + def assertSparseValuesEqual(self, a, b): + self.assertAllEqual(a.indices, b.indices) + self.assertAllEqual(a.values, b.values) + self.assertAllEqual(a.dense_shape, b.dense_shape) + + def testFromTensorsSparse(self): + """Test an dataset that represents a single tuple of tensors.""" + components = (sparse_tensor.SparseTensorValue( + indices=np.array([[0]]), + values=np.array([0]), + dense_shape=np.array([1])), + sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0], [1, 1]]), + values=np.array([-1, 1]), + dense_shape=np.array([2, 2]))) + + iterator = ( + dataset_ops.Dataset.from_tensors(components) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + self.assertEqual( + [tensor_shape.TensorShape(c.dense_shape) for c in components], + [shape for shape in iterator.output_shapes]) + + with self.test_session() as sess: + sess.run(init_op) + results = sess.run(get_next) + for component, result_component in zip(components, results): + self.assertSparseValuesEqual(component, result_component) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testFromTensorsMixed(self): + """Test an dataset that represents a single tuple of tensors.""" + components = (np.array(1), np.array([1, 2, 3]), np.array(37.0), + sparse_tensor.SparseTensorValue( + indices=np.array([[0]]), + values=np.array([0]), + dense_shape=np.array([1])), + sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0], [1, 1]]), + values=np.array([-1, 1]), + dense_shape=np.array([2, 2]))) + + iterator = ( + dataset_ops.Dataset.from_tensors(components) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + self.assertEqual([ + tensor_shape.TensorShape(c.dense_shape) + if sparse_tensor.is_sparse(c) else c.shape for c in components + ], [shape for shape in iterator.output_shapes]) + + with self.test_session() as sess: + sess.run(init_op) + results = sess.run(get_next) + for component, result_component in zip(components, results): + if sparse_tensor.is_sparse(component): + self.assertSparseValuesEqual(component, result_component) + else: + self.assertAllEqual(component, result_component) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testFromTensorSlices(self): """Test an dataset that represents the slices from a tuple of tensors.""" components = ( np.tile(np.array([[1], [2], [3], [4]]), 20), np.tile( @@ -84,7 +153,127 @@ class DatasetConstructorTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) - def testTensorSliceDatasetWithDict(self): + def testFromTensorSlicesSparse(self): + """Test an dataset that represents the slices from a tuple of tensors.""" + components = (sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0], [1, 0], [2, 0]]), + values=np.array([0, 0, 0]), + dense_shape=np.array([3, 1])), + sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0], [1, 1], [2, 2]]), + values=np.array([1, 2, 3]), + dense_shape=np.array([3, 3]))) + + iterator = ( + dataset_ops.Dataset.from_tensor_slices(components) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + self.assertEqual( + [tensor_shape.TensorShape(c.dense_shape[1:]) for c in components], + [shape for shape in iterator.output_shapes]) + + with self.test_session() as sess: + sess.run(init_op) + expected = [ + (sparse_tensor.SparseTensorValue( + indices=np.array([[0]]), + values=np.array([0]), + dense_shape=np.array([1])), + sparse_tensor.SparseTensorValue( + indices=np.array([[0]]), + values=np.array([1]), + dense_shape=np.array([3]))), + (sparse_tensor.SparseTensorValue( + indices=np.array([[0]]), + values=np.array([0]), + dense_shape=np.array([1])), + sparse_tensor.SparseTensorValue( + indices=np.array([[1]]), + values=np.array([2]), + dense_shape=np.array([3]))), + (sparse_tensor.SparseTensorValue( + indices=np.array([[0]]), + values=np.array([0]), + dense_shape=np.array([1])), + sparse_tensor.SparseTensorValue( + indices=np.array([[2]]), + values=np.array([3]), + dense_shape=np.array([3]))), + ] + for i in range(3): + results = sess.run(get_next) + for component, result_component in zip(expected[i], results): + self.assertSparseValuesEqual(component, result_component) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testFromTensorSlicesMixed(self): + """Test an dataset that represents the slices from a tuple of tensors.""" + components = (np.tile(np.array([[1], [2], [3]]), 20), + np.tile(np.array([[12], [13], [14]]), 22), + np.array([37.0, 38.0, 39.0]), + sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0], [1, 0], [2, 0]]), + values=np.array([0, 0, 0]), + dense_shape=np.array([3, 1])), + sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0], [1, 1], [2, 2]]), + values=np.array([1, 2, 3]), + dense_shape=np.array([3, 3]))) + + iterator = ( + dataset_ops.Dataset.from_tensor_slices(components) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + self.assertEqual([ + tensor_shape.TensorShape(c.dense_shape[1:]) + if sparse_tensor.is_sparse(c) else c.shape[1:] for c in components + ], [shape for shape in iterator.output_shapes]) + + with self.test_session() as sess: + sess.run(init_op) + expected = [ + (sparse_tensor.SparseTensorValue( + indices=np.array([[0]]), + values=np.array([0]), + dense_shape=np.array([1])), + sparse_tensor.SparseTensorValue( + indices=np.array([[0]]), + values=np.array([1]), + dense_shape=np.array([3]))), + (sparse_tensor.SparseTensorValue( + indices=np.array([[0]]), + values=np.array([0]), + dense_shape=np.array([1])), + sparse_tensor.SparseTensorValue( + indices=np.array([[1]]), + values=np.array([2]), + dense_shape=np.array([3]))), + (sparse_tensor.SparseTensorValue( + indices=np.array([[0]]), + values=np.array([0]), + dense_shape=np.array([1])), + sparse_tensor.SparseTensorValue( + indices=np.array([[2]]), + values=np.array([3]), + dense_shape=np.array([3]))), + ] + for i in range(3): + results = sess.run(get_next) + for component, result_component in zip( + (zip(*components[:3])[i] + expected[i]), results): + if sparse_tensor.is_sparse(component): + self.assertSparseValuesEqual(component, result_component) + else: + self.assertAllEqual(component, result_component) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testFromTensorSlicesWithDict(self): components = {"foo": [1, 2, 3], "bar": [[4.0], [5.0], [6.0]]} iterator = (dataset_ops.Dataset.from_tensor_slices(components) .make_initializable_iterator()) @@ -105,7 +294,7 @@ class DatasetConstructorTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) - def testSparseTensorSliceDataset(self): + def testFromSparseTensorSlices(self): """Test a dataset based on slices of a `tf.SparseTensor`.""" st = array_ops.sparse_placeholder(dtypes.float64) iterator = (dataset_ops.Dataset.from_sparse_tensor_slices(st) diff --git a/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py index 95724241ef..5921be2ae8 100644 --- a/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py @@ -132,9 +132,12 @@ class FilterDatasetTest(test.TestCase): self.assertAllEqual(a.dense_shape, b.dense_shape) def testSparse(self): + def _map_fn(i): - return sparse_tensor.SparseTensor( - indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i + return sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0]]), + values=(i * np.array([1])), + dense_shape=np.array([1, 1])), i def _filter_fn(_, i): return math_ops.equal(i % 2, 0) @@ -149,10 +152,8 @@ class FilterDatasetTest(test.TestCase): sess.run(init_op) for i in range(5): actual = sess.run(get_next) - expected = sparse_tensor.SparseTensor( - indices=[[0, 0]], values=[i*2], dense_shape=[1, 1]) self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) - self.assertSparseValuesEqual(actual, expected.eval()) + self.assertSparseValuesEqual(actual, _map_fn(i * 2)[0]) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) diff --git a/tensorflow/contrib/data/python/kernel_tests/flat_map_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/flat_map_dataset_op_test.py index ddb4bc34f3..d4fbaa5cdc 100644 --- a/tensorflow/contrib/data/python/kernel_tests/flat_map_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/flat_map_dataset_op_test.py @@ -131,7 +131,7 @@ class FlatMapDatasetTest(test.TestCase): def testSparse(self): def _map_fn(i): - return sparse_tensor.SparseTensor( + return sparse_tensor.SparseTensorValue( indices=[[0, 0], [1, 1]], values=(i * [1, -1]), dense_shape=[2, 2]) def _flat_map_fn(x): diff --git a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py index c6e8ed5bdc..e66ed3f7aa 100644 --- a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py @@ -187,8 +187,9 @@ class InterleaveDatasetTest(test.TestCase): sess.run(next_element) def testSparse(self): + def _map_fn(i): - return sparse_tensor.SparseTensor( + return sparse_tensor.SparseTensorValue( indices=[[0, 0], [1, 1]], values=(i * [1, -1]), dense_shape=[2, 2]) def _interleave_fn(x): diff --git a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py index 514b08b874..e9a07da84a 100644 --- a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py @@ -626,9 +626,13 @@ class MapDatasetTest(test.TestCase): self.assertAllEqual(a.dense_shape, b.dense_shape) def testSparse(self): + def _sparse(i): - return sparse_tensor.SparseTensor( - indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]) + return sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0]]), + values=(i * np.array([1])), + dense_shape=np.array([1, 1])) + iterator = (dataset_ops.Dataset.range(10) .map(_sparse) .make_initializable_iterator()) @@ -639,24 +643,26 @@ class MapDatasetTest(test.TestCase): sess.run(init_op) for i in range(10): actual = sess.run(get_next) - expected = sparse_tensor.SparseTensor( - indices=[[0, 0]], values=[i], dense_shape=[1, 1]) self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) - self.assertSparseValuesEqual(actual, expected.eval()) + self.assertSparseValuesEqual(actual, _sparse(i)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) def testSparseChain(self): + def _sparse(i): - return sparse_tensor.SparseTensor( - indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]) + return sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0]]), + values=(i * np.array([1])), + dense_shape=np.array([1, 1])) + def _check(i): - self.assertTrue(isinstance(i, sparse_tensor.SparseTensor)) + self.assertTrue(sparse_tensor.is_sparse(i)) return sparse_ops.sparse_concat(0, [i, i]) - iterator = (dataset_ops.Dataset.range(10) - .map(_sparse).map(_check) - .make_initializable_iterator()) + iterator = ( + dataset_ops.Dataset.range(10).map(_sparse).map(_check) + .make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() @@ -664,10 +670,8 @@ class MapDatasetTest(test.TestCase): sess.run(init_op) for i in range(10): actual = sess.run(get_next) - expected = sparse_tensor.SparseTensor( - indices=[[0, 0], [1, 0]], values=[i, i], dense_shape=[2, 1]) self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) - self.assertSparseValuesEqual(actual, expected.eval()) + self.assertSparseValuesEqual(actual, _check(_sparse(i)).eval()) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py index 863c94ef9f..626a9e0edc 100644 --- a/tensorflow/contrib/data/python/ops/dataset_ops.py +++ b/tensorflow/contrib/data/python/ops/dataset_ops.py @@ -21,7 +21,6 @@ from tensorflow.contrib.data.python.ops import batching from tensorflow.contrib.data.python.ops import enumerate_ops from tensorflow.contrib.data.python.ops import error_ops from tensorflow.contrib.data.python.ops import grouping - from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.ops import gen_dataset_ops diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py index bb47832fe9..acb7a43211 100644 --- a/tensorflow/contrib/data/python/ops/readers.py +++ b/tensorflow/contrib/data/python/ops/readers.py @@ -164,7 +164,7 @@ def read_batch_features(file_pattern, shuffling but would increase memory usage and startup time. Returns: - A dict from keys in features to Tensor or SparseTensor objects. + A dict from keys in features to `Tensor` or `SparseTensor` objects. """ filenames = _get_file_names(file_pattern, randomize_input) if reader_args: diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 00ac3334b0..dbe29c087a 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -40,6 +40,7 @@ from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import gen_io_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import script_ops +from tensorflow.python.ops import sparse_ops class Dataset(object): @@ -892,11 +893,20 @@ class TensorDataset(Dataset): """See `Dataset.from_tensors()` for details.""" super(TensorDataset, self).__init__() with ops.name_scope("tensors"): - self._tensors = nest.pack_sequence_as(tensors, [ - ops.convert_to_tensor(t, name="component_%d" % i) + tensors = nest.pack_sequence_as(tensors, [ + sparse_tensor_lib.SparseTensor.from_value(t) + if sparse_tensor_lib.is_sparse(t) else ops.convert_to_tensor( + t, name="component_%d" % i) for i, t in enumerate(nest.flatten(tensors)) ]) + self._tensors = sparse.serialize_sparse_tensors(tensors) + self._output_classes = sparse.get_classes(tensors) + self._output_shapes = nest.pack_sequence_as( + tensors, [t.get_shape() for t in nest.flatten(tensors)]) + self._output_types = nest.pack_sequence_as( + tensors, [t.dtype for t in nest.flatten(tensors)]) + def _as_variant_tensor(self): return gen_dataset_ops.tensor_dataset( nest.flatten(self._tensors), @@ -905,18 +915,15 @@ class TensorDataset(Dataset): @property def output_classes(self): - return nest.pack_sequence_as( - self._tensors, [ops.Tensor for _ in nest.flatten(self._tensors)]) + return self._output_classes @property def output_shapes(self): - return nest.pack_sequence_as(self._tensors, - [t.shape for t in nest.flatten(self._tensors)]) + return self._output_shapes @property def output_types(self): - return nest.pack_sequence_as(self._tensors, - [t.dtype for t in nest.flatten(self._tensors)]) + return self._output_types class TensorSliceDataset(Dataset): @@ -926,15 +933,27 @@ class TensorSliceDataset(Dataset): """See `Dataset.from_tensor_slices()` for details.""" super(TensorSliceDataset, self).__init__() with ops.name_scope("tensors"): - flat_tensors = [ - ops.convert_to_tensor(t, name="component_%d" % i) + tensors = nest.pack_sequence_as(tensors, [ + sparse_tensor_lib.SparseTensor.from_value(t) + if sparse_tensor_lib.is_sparse(t) else ops.convert_to_tensor( + t, name="component_%d" % i) for i, t in enumerate(nest.flatten(tensors)) - ] + ]) + flat_tensors = nest.flatten(tensors) - self._tensors = nest.pack_sequence_as(tensors, flat_tensors) batch_dim = flat_tensors[0].get_shape()[0] for t in flat_tensors[1:]: batch_dim.assert_is_compatible_with(t.get_shape()[0]) + self._tensors = nest.pack_sequence_as(tensors, [ + sparse_ops.serialize_many_sparse(tensor) + if sparse_tensor_lib.is_sparse(tensor) else tensor + for tensor in nest.flatten(tensors) + ]) + self._output_classes = sparse.get_classes(tensors) + self._output_shapes = nest.pack_sequence_as( + tensors, [t.get_shape()[1:] for t in nest.flatten(tensors)]) + self._output_types = nest.pack_sequence_as( + tensors, [t.dtype for t in nest.flatten(tensors)]) def _as_variant_tensor(self): return gen_dataset_ops.tensor_slice_dataset( @@ -944,20 +963,15 @@ class TensorSliceDataset(Dataset): @property def output_classes(self): - return nest.pack_sequence_as( - self._tensors, [ops.Tensor for _ in nest.flatten(self._tensors)]) + return self._output_classes @property def output_shapes(self): - return nest.pack_sequence_as(self._tensors, [ - tensor_shape.TensorShape(t.shape[1:]) - for t in nest.flatten(self._tensors) - ]) + return self._output_shapes @property def output_types(self): - return nest.pack_sequence_as(self._tensors, - [t.dtype for t in nest.flatten(self._tensors)]) + return self._output_types class SparseTensorSliceDataset(Dataset): @@ -1513,6 +1527,12 @@ class MapDataset(Dataset): if isinstance(ret, list): ret = tuple(ret) + # Convert any `SparseTensorValue`s to `SparseTensor`s. + ret = nest.pack_sequence_as(ret, [ + sparse_tensor_lib.SparseTensor.from_value(t) + if sparse_tensor_lib.is_sparse(t) else t for t in nest.flatten(ret) + ]) + self._output_classes = sparse.get_classes(ret) self._output_shapes = nest.pack_sequence_as( ret, [t.get_shape() for t in nest.flatten(ret)]) diff --git a/tensorflow/python/data/util/nest.py b/tensorflow/python/data/util/nest.py index 421513cafc..2f89c006d2 100644 --- a/tensorflow/python/data/util/nest.py +++ b/tensorflow/python/data/util/nest.py @@ -17,17 +17,22 @@ """## Functions for working with arbitrarily nested sequences of elements. NOTE(mrry): This fork of the `tensorflow.python.util.nest` module -makes two changes: +makes three changes: 1. It adds support for dictionaries as a level of nesting in nested structures. 2. It removes support for lists as a level of nesting in nested structures. +3. It adds support for `SparseTensorValue` as an atomic element. -The motivation for this change is twofold: +The motivation for this change is threefold: 1. Many input-processing functions (e.g. `tf.parse_example()`) return dictionaries, and we would like to support them natively in datasets. 2. It seems more natural for lists to be treated (e.g. in Dataset constructors) as tensors, rather than lists of (lists of...) tensors. +3. This is needed because `SparseTensorValue` is implemented as a `namedtuple` + that would normally be flattened and we want to be able to create sparse + tensor from `SparseTensorValue's similarly to creating tensors from numpy + arrays. """ from __future__ import absolute_import @@ -38,6 +43,7 @@ import collections as _collections import six as _six +from tensorflow.python.framework import sparse_tensor as _sparse_tensor from tensorflow.python.util.all_util import remove_undocumented @@ -87,6 +93,8 @@ def _yield_value(iterable): # corresponding `OrderedDict` to pack it back). for key in _sorted(iterable): yield iterable[key] + elif isinstance(iterable, _sparse_tensor.SparseTensorValue): + yield iterable else: for value in iterable: yield value @@ -116,8 +124,9 @@ def is_sequence(seq): True if the sequence is a not a string or list and is a collections.Sequence. """ - return (isinstance(seq, (_collections.Sequence, dict)) - and not isinstance(seq, (list, _six.string_types))) + return (isinstance(seq, (_collections.Sequence, dict)) and + not isinstance(seq, _sparse_tensor.SparseTensorValue) and + not isinstance(seq, (list, _six.string_types))) def flatten(nest): diff --git a/tensorflow/python/data/util/nest_test.py b/tensorflow/python/data/util/nest_test.py index 6416e2850d..0bd0a5f443 100644 --- a/tensorflow/python/data/util/nest_test.py +++ b/tensorflow/python/data/util/nest_test.py @@ -24,6 +24,7 @@ import numpy as np from tensorflow.python.data.util import nest from tensorflow.python.framework import constant_op +from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.platform import test @@ -86,7 +87,7 @@ class NestTest(test.TestCase): ordered_reconstruction) self.assertEqual({"d": 3, "b": 1, "a": 0, "c": 2}, plain_reconstruction) - def testFlattenAndPack_withDicts(self): + def testFlattenAndPackWithDicts(self): # A nice messy mix of tuples, lists, dicts, and `OrderedDict`s. named_tuple = collections.namedtuple("A", ("b", "c")) mess = ( @@ -132,6 +133,17 @@ class NestTest(test.TestCase): self.assertIsInstance(unflattened_ordered_dict, collections.OrderedDict) self.assertEqual(list(unflattened_ordered_dict.keys()), ["b", "a"]) + def testFlattenSparseValue(self): + st = sparse_tensor.SparseTensorValue([[0]], [0], [1]) + single_value = st + list_of_values = [st, st, st] + nest_of_values = ((st), ((st), (st))) + dict_of_values = {"foo": st, "bar": st, "baz": st} + self.assertEqual([st], nest.flatten(single_value)) + self.assertEqual([[st, st, st]], nest.flatten(list_of_values)) + self.assertEqual([st, st, st], nest.flatten(nest_of_values)) + self.assertEqual([st, st, st], nest.flatten(dict_of_values)) + def testIsSequence(self): self.assertFalse(nest.is_sequence("1234")) self.assertFalse(nest.is_sequence([1, 3, [4, 5]])) @@ -143,6 +155,8 @@ class NestTest(test.TestCase): self.assertFalse(nest.is_sequence(math_ops.tanh(ones))) self.assertFalse(nest.is_sequence(np.ones((4, 5)))) self.assertTrue(nest.is_sequence({"foo": 1, "bar": 2})) + self.assertFalse( + nest.is_sequence(sparse_tensor.SparseTensorValue([[0]], [0], [1]))) def testAssertSameStructure(self): structure1 = (((1, 2), 3), 4, (5, 6)) diff --git a/tensorflow/python/framework/sparse_tensor.py b/tensorflow/python/framework/sparse_tensor.py index 10f5579ae5..6218cc34ca 100644 --- a/tensorflow/python/framework/sparse_tensor.py +++ b/tensorflow/python/framework/sparse_tensor.py @@ -93,8 +93,7 @@ class SparseTensor(_TensorLike): @classmethod def from_value(cls, sparse_tensor_value): - if not (isinstance(sparse_tensor_value, SparseTensor) or - isinstance(sparse_tensor_value, SparseTensorValue)): + if not is_sparse(sparse_tensor_value): raise TypeError("Neither a SparseTensor nor SparseTensorValue: %s." % sparse_tensor_value) return SparseTensor( @@ -253,3 +252,17 @@ def convert_to_tensor_or_sparse_tensor(value, dtype=None, name=None): return value return ops.internal_convert_to_tensor( value, dtype=dtype, name=name) + + +def is_sparse(x): + """Check whether `x` is sparse. + + Check whether an object is a `tf.SparseTensor` or `tf.SparseTensorValue`. + + Args: + x: A python object to check. + + Returns: + `True` iff `x` is a `tf.SparseTensor` or `tf.SparseTensorValue`. + """ + return isinstance(x, (SparseTensor, SparseTensorValue)) diff --git a/tensorflow/python/framework/sparse_tensor_test.py b/tensorflow/python/framework/sparse_tensor_test.py index e709eaeda1..c001fed3b0 100644 --- a/tensorflow/python/framework/sparse_tensor_test.py +++ b/tensorflow/python/framework/sparse_tensor_test.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np + from tensorflow.python.framework import dtypes from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util @@ -51,6 +53,16 @@ class SparseTensorTest(test_util.TensorFlowTestCase): self.assertAllEqual(sess_run_value.values, value.values) self.assertAllEqual(sess_run_value.dense_shape, value.dense_shape) + def testIsSparse(self): + self.assertFalse(sparse_tensor.is_sparse(3)) + self.assertFalse(sparse_tensor.is_sparse("foo")) + self.assertFalse(sparse_tensor.is_sparse(np.array(3))) + self.assertTrue( + sparse_tensor.is_sparse(sparse_tensor.SparseTensor([[0]], [0], [1]))) + self.assertTrue( + sparse_tensor.is_sparse( + sparse_tensor.SparseTensorValue([[0]], [0], [1]))) + class ConvertToTensorOrSparseTensorTest(test_util.TensorFlowTestCase): diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index e283542172..9fc0e49463 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -874,7 +874,7 @@ def is_tensor(x): # pylint: disable=invalid-name `isinstance(x, [tf.Tensor, tf.SparseTensor, tf.Variable])`. Args: - x: An python object to check. + x: A python object to check. Returns: `True` if `x` is a tensor, `False` if not. diff --git a/tensorflow/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/python/kernel_tests/batch_dataset_op_test.py index 660cbef302..0546218601 100644 --- a/tensorflow/python/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/python/kernel_tests/batch_dataset_op_test.py @@ -109,7 +109,7 @@ class BatchDatasetTest(test.TestCase): def testBatchSparse(self): def _sparse(i): - return sparse_tensor.SparseTensor( + return sparse_tensor.SparseTensorValue( indices=[[0]], values=(i * [1]), dense_shape=[1]) iterator = dataset_ops.Dataset.range(10).map(_sparse).batch( @@ -121,19 +121,19 @@ class BatchDatasetTest(test.TestCase): sess.run(init_op) for i in range(2): actual = sess.run(get_next) - expected = sparse_tensor.SparseTensor( + expected = sparse_tensor.SparseTensorValue( indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]], values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4], dense_shape=[5, 1]) - self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) - self.assertSparseValuesEqual(actual, expected.eval()) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) def testBatchSparseWithDifferentDenseShapes(self): def _sparse(i): - return sparse_tensor.SparseTensor( + return sparse_tensor.SparseTensorValue( indices=array_ops.expand_dims( math_ops.range(i, dtype=dtypes.int64), 1), values=array_ops.fill([math_ops.to_int32(i)], i), @@ -154,19 +154,19 @@ class BatchDatasetTest(test.TestCase): for k in range(i * 5 + j): expected_indices.append([j, k]) expected_values.append(i * 5 + j) - expected = sparse_tensor.SparseTensor( + expected = sparse_tensor.SparseTensorValue( indices=expected_indices, values=expected_values, dense_shape=[5, (i + 1) * 5 - 1]) - self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) - self.assertSparseValuesEqual(actual, expected.eval()) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) def testNestedBatchSparse(self): def _sparse(i): - return sparse_tensor.SparseTensor( + return sparse_tensor.SparseTensorValue( indices=[[0]], values=(i * [1]), dense_shape=[1]) iterator = dataset_ops.Dataset.range(10).map(_sparse).batch(5).batch( @@ -177,13 +177,13 @@ class BatchDatasetTest(test.TestCase): with self.test_session() as sess: sess.run(init_op) actual = sess.run(get_next) - expected = sparse_tensor.SparseTensor( + expected = sparse_tensor.SparseTensorValue( indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], [0, 4, 0], [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0], [1, 4, 0]], values=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dense_shape=[2, 5, 1]) - self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) - self.assertSparseValuesEqual(actual, expected.eval()) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) @@ -314,7 +314,7 @@ class BatchDatasetTest(test.TestCase): def testPaddedBatchSparseError(self): def _map_fn(i): - return sparse_tensor.SparseTensor( + return sparse_tensor.SparseTensorValue( indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i with self.assertRaises(TypeError): diff --git a/tensorflow/python/kernel_tests/dataset_constructor_op_test.py b/tensorflow/python/kernel_tests/dataset_constructor_op_test.py index b51d483b5b..9e2a620550 100644 --- a/tensorflow/python/kernel_tests/dataset_constructor_op_test.py +++ b/tensorflow/python/kernel_tests/dataset_constructor_op_test.py @@ -27,6 +27,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops @@ -35,7 +36,7 @@ from tensorflow.python.platform import test class DatasetConstructorTest(test.TestCase): - def testTensorDataset(self): + def testFromTensors(self): """Test an dataset that represents a single tuple of tensors.""" components = (np.array(1), np.array([1, 2, 3]), np.array(37.0)) @@ -55,7 +56,75 @@ class DatasetConstructorTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) - def testTensorSliceDataset(self): + def assertSparseValuesEqual(self, a, b): + self.assertAllEqual(a.indices, b.indices) + self.assertAllEqual(a.values, b.values) + self.assertAllEqual(a.dense_shape, b.dense_shape) + + def testFromTensorsSparse(self): + """Test an dataset that represents a single tuple of tensors.""" + components = (sparse_tensor.SparseTensorValue( + indices=np.array([[0]]), + values=np.array([0]), + dense_shape=np.array([1])), + sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0], [1, 1]]), + values=np.array([-1, 1]), + dense_shape=np.array([2, 2]))) + + iterator = ( + dataset_ops.Dataset.from_tensors(components) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + self.assertEqual( + [tensor_shape.TensorShape(c.dense_shape) for c in components], + [shape for shape in iterator.output_shapes]) + + with self.test_session() as sess: + sess.run(init_op) + results = sess.run(get_next) + for component, result_component in zip(components, results): + self.assertSparseValuesEqual(component, result_component) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testFromTensorsMixed(self): + """Test an dataset that represents a single tuple of tensors.""" + components = (np.array(1), np.array([1, 2, 3]), np.array(37.0), + sparse_tensor.SparseTensorValue( + indices=np.array([[0]]), + values=np.array([0]), + dense_shape=np.array([1])), + sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0], [1, 1]]), + values=np.array([-1, 1]), + dense_shape=np.array([2, 2]))) + + iterator = ( + dataset_ops.Dataset.from_tensors(components) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + self.assertEqual([ + tensor_shape.TensorShape(c.dense_shape) + if sparse_tensor.is_sparse(c) else c.shape for c in components + ], [shape for shape in iterator.output_shapes]) + + with self.test_session() as sess: + sess.run(init_op) + results = sess.run(get_next) + for component, result_component in zip(components, results): + if sparse_tensor.is_sparse(component): + self.assertSparseValuesEqual(component, result_component) + else: + self.assertAllEqual(component, result_component) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testFromTensorSlices(self): """Test an dataset that represents the slices from a tuple of tensors.""" components = ( np.tile(np.array([[1], [2], [3], [4]]), 20), np.tile( @@ -80,7 +149,127 @@ class DatasetConstructorTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) - def testTensorSliceDatasetWithDict(self): + def testFromTensorSlicesSparse(self): + """Test an dataset that represents the slices from a tuple of tensors.""" + components = (sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0], [1, 0], [2, 0]]), + values=np.array([0, 0, 0]), + dense_shape=np.array([3, 1])), + sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0], [1, 1], [2, 2]]), + values=np.array([1, 2, 3]), + dense_shape=np.array([3, 3]))) + + iterator = ( + dataset_ops.Dataset.from_tensor_slices(components) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + self.assertEqual( + [tensor_shape.TensorShape(c.dense_shape[1:]) for c in components], + [shape for shape in iterator.output_shapes]) + + with self.test_session() as sess: + sess.run(init_op) + expected = [ + (sparse_tensor.SparseTensorValue( + indices=np.array([[0]]), + values=np.array([0]), + dense_shape=np.array([1])), + sparse_tensor.SparseTensorValue( + indices=np.array([[0]]), + values=np.array([1]), + dense_shape=np.array([3]))), + (sparse_tensor.SparseTensorValue( + indices=np.array([[0]]), + values=np.array([0]), + dense_shape=np.array([1])), + sparse_tensor.SparseTensorValue( + indices=np.array([[1]]), + values=np.array([2]), + dense_shape=np.array([3]))), + (sparse_tensor.SparseTensorValue( + indices=np.array([[0]]), + values=np.array([0]), + dense_shape=np.array([1])), + sparse_tensor.SparseTensorValue( + indices=np.array([[2]]), + values=np.array([3]), + dense_shape=np.array([3]))), + ] + for i in range(3): + results = sess.run(get_next) + for component, result_component in zip(expected[i], results): + self.assertSparseValuesEqual(component, result_component) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testFromTensorSlicesMixed(self): + """Test an dataset that represents the slices from a tuple of tensors.""" + components = (np.tile(np.array([[1], [2], [3]]), 20), + np.tile(np.array([[12], [13], [14]]), 22), + np.array([37.0, 38.0, 39.0]), + sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0], [1, 0], [2, 0]]), + values=np.array([0, 0, 0]), + dense_shape=np.array([3, 1])), + sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0], [1, 1], [2, 2]]), + values=np.array([1, 2, 3]), + dense_shape=np.array([3, 3]))) + + iterator = ( + dataset_ops.Dataset.from_tensor_slices(components) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + self.assertEqual([ + tensor_shape.TensorShape(c.dense_shape[1:]) + if sparse_tensor.is_sparse(c) else c.shape[1:] for c in components + ], [shape for shape in iterator.output_shapes]) + + with self.test_session() as sess: + sess.run(init_op) + expected = [ + (sparse_tensor.SparseTensorValue( + indices=np.array([[0]]), + values=np.array([0]), + dense_shape=np.array([1])), + sparse_tensor.SparseTensorValue( + indices=np.array([[0]]), + values=np.array([1]), + dense_shape=np.array([3]))), + (sparse_tensor.SparseTensorValue( + indices=np.array([[0]]), + values=np.array([0]), + dense_shape=np.array([1])), + sparse_tensor.SparseTensorValue( + indices=np.array([[1]]), + values=np.array([2]), + dense_shape=np.array([3]))), + (sparse_tensor.SparseTensorValue( + indices=np.array([[0]]), + values=np.array([0]), + dense_shape=np.array([1])), + sparse_tensor.SparseTensorValue( + indices=np.array([[2]]), + values=np.array([3]), + dense_shape=np.array([3]))), + ] + for i in range(3): + results = sess.run(get_next) + for component, result_component in zip( + (zip(*components[:3])[i] + expected[i]), results): + if sparse_tensor.is_sparse(component): + self.assertSparseValuesEqual(component, result_component) + else: + self.assertAllEqual(component, result_component) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testFromTensorSlicesWithDict(self): components = {"foo": [1, 2, 3], "bar": [[4.0], [5.0], [6.0]]} iterator = (dataset_ops.Dataset.from_tensor_slices(components) .make_initializable_iterator()) @@ -101,7 +290,7 @@ class DatasetConstructorTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) - def testSparseTensorSliceDataset(self): + def testFromSparseTensorSlices(self): """Test a dataset based on slices of a `tf.SparseTensor`.""" st = array_ops.sparse_placeholder(dtypes.float64) iterator = (dataset_ops.Dataset.from_sparse_tensor_slices(st) diff --git a/tensorflow/python/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/kernel_tests/filter_dataset_op_test.py index 6eb445445f..b9258b720e 100644 --- a/tensorflow/python/kernel_tests/filter_dataset_op_test.py +++ b/tensorflow/python/kernel_tests/filter_dataset_op_test.py @@ -131,9 +131,12 @@ class FilterDatasetTest(test.TestCase): self.assertAllEqual(a.dense_shape, b.dense_shape) def testSparse(self): + def _map_fn(i): - return sparse_tensor.SparseTensor( - indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i + return sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0]]), + values=(i * np.array([1])), + dense_shape=np.array([1, 1])), i def _filter_fn(_, i): return math_ops.equal(i % 2, 0) @@ -148,10 +151,8 @@ class FilterDatasetTest(test.TestCase): sess.run(init_op) for i in range(5): actual = sess.run(get_next) - expected = sparse_tensor.SparseTensor( - indices=[[0, 0]], values=[i*2], dense_shape=[1, 1]) self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) - self.assertSparseValuesEqual(actual, expected.eval()) + self.assertSparseValuesEqual(actual, _map_fn(i * 2)[0]) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) diff --git a/tensorflow/python/kernel_tests/flat_map_dataset_op_test.py b/tensorflow/python/kernel_tests/flat_map_dataset_op_test.py index 895f36382a..350234a839 100644 --- a/tensorflow/python/kernel_tests/flat_map_dataset_op_test.py +++ b/tensorflow/python/kernel_tests/flat_map_dataset_op_test.py @@ -124,7 +124,7 @@ class FlatMapDatasetTest(test.TestCase): def testSparse(self): def _map_fn(i): - return sparse_tensor.SparseTensor( + return sparse_tensor.SparseTensorValue( indices=[[0, 0], [1, 1]], values=(i * [1, -1]), dense_shape=[2, 2]) def _flat_map_fn(x): diff --git a/tensorflow/python/kernel_tests/interleave_dataset_op_test.py b/tensorflow/python/kernel_tests/interleave_dataset_op_test.py index 0a3c4af9e0..28cb50c002 100644 --- a/tensorflow/python/kernel_tests/interleave_dataset_op_test.py +++ b/tensorflow/python/kernel_tests/interleave_dataset_op_test.py @@ -177,8 +177,9 @@ class InterleaveDatasetTest(test.TestCase): sess.run(next_element) def testSparse(self): + def _map_fn(i): - return sparse_tensor.SparseTensor( + return sparse_tensor.SparseTensorValue( indices=[[0, 0], [1, 1]], values=(i * [1, -1]), dense_shape=[2, 2]) def _interleave_fn(x): diff --git a/tensorflow/python/kernel_tests/map_dataset_op_test.py b/tensorflow/python/kernel_tests/map_dataset_op_test.py index 51f43bfd89..ad6bbc043d 100644 --- a/tensorflow/python/kernel_tests/map_dataset_op_test.py +++ b/tensorflow/python/kernel_tests/map_dataset_op_test.py @@ -284,9 +284,8 @@ class MapDatasetTest(test.TestCase): with self.test_session() as sess: sess.run(table.init) sess.run(init_op) - - print(sess.run(get_next)) - print(sess.run(get_next)) + sess.run(get_next) + sess.run(get_next) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) @@ -553,9 +552,13 @@ class MapDatasetTest(test.TestCase): self.assertAllEqual(a.dense_shape, b.dense_shape) def testSparse(self): + def _sparse(i): - return sparse_tensor.SparseTensor( - indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]) + return sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0]]), + values=(i * np.array([1])), + dense_shape=np.array([1, 1])) + iterator = (dataset_ops.Dataset.range(10) .map(_sparse) .make_initializable_iterator()) @@ -566,24 +569,26 @@ class MapDatasetTest(test.TestCase): sess.run(init_op) for i in range(10): actual = sess.run(get_next) - expected = sparse_tensor.SparseTensor( - indices=[[0, 0]], values=[i], dense_shape=[1, 1]) self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) - self.assertSparseValuesEqual(actual, expected.eval()) + self.assertSparseValuesEqual(actual, _sparse(i)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) def testSparseChain(self): + def _sparse(i): - return sparse_tensor.SparseTensor( - indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]) + return sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0]]), + values=(i * np.array([1])), + dense_shape=np.array([1, 1])) + def _check(i): - self.assertTrue(isinstance(i, sparse_tensor.SparseTensor)) + self.assertTrue(sparse_tensor.is_sparse(i)) return sparse_ops.sparse_concat(0, [i, i]) - iterator = (dataset_ops.Dataset.range(10) - .map(_sparse).map(_check) - .make_initializable_iterator()) + iterator = ( + dataset_ops.Dataset.range(10).map(_sparse).map(_check) + .make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() @@ -591,10 +596,8 @@ class MapDatasetTest(test.TestCase): sess.run(init_op) for i in range(10): actual = sess.run(get_next) - expected = sparse_tensor.SparseTensor( - indices=[[0, 0], [1, 0]], values=[i, i], dense_shape=[2, 1]) self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue)) - self.assertSparseValuesEqual(actual, expected.eval()) + self.assertSparseValuesEqual(actual, _check(_sparse(i)).eval()) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) -- GitLab From 7ea0fd6ccadc2922560ee66e3b11ae45324cc946 Mon Sep 17 00:00:00 2001 From: Guenther Schmuelling Date: Tue, 28 Nov 2017 10:46:48 -0800 Subject: [PATCH 0876/1801] add support for quantized ops on windows --- tensorflow/contrib/cmake/README.md | 17 ------ .../contrib/cmake/external/gemmlowp.cmake | 4 +- .../contrib/cmake/tf_core_kernels.cmake | 3 - tensorflow/contrib/cmake/tf_tests.cmake | 2 + tensorflow/core/kernels/quantized_conv_ops.cc | 7 +++ .../python/ops/quantized_conv_ops_test.py | 2 +- tensorflow/python/ops/quantized_ops_test.py | 57 +++++++++++++++++++ 7 files changed, 69 insertions(+), 23 deletions(-) create mode 100644 tensorflow/python/ops/quantized_ops_test.py diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index 4ddfec5960..4be733a280 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -19,23 +19,6 @@ for instructions on how to install a pre-built TensorFlow package on Windows. ### Current known limitations * It is not possible to load a custom Op library. * GCS file system is not supported. -* The following Ops are not currently implemented: - - Dequantize - - QuantizeAndDequantize - - QuantizedAvgPool - - QuantizedBatchNomWithGlobalNormalization - - QuantizedBiasAdd - - QuantizedConcat - - QuantizedConv2D - - QuantizedMatmul - - QuantizedMaxPoo - - QuantizeDownAndShrinkRange - - QuantizedRelu - - QuantizedRelu6 - - QuantizedReshape - - QuantizeV2 - - RequantizationRange - - Requantize ## Building with CMake diff --git a/tensorflow/contrib/cmake/external/gemmlowp.cmake b/tensorflow/contrib/cmake/external/gemmlowp.cmake index 3b146657bf..a235442dc5 100644 --- a/tensorflow/contrib/cmake/external/gemmlowp.cmake +++ b/tensorflow/contrib/cmake/external/gemmlowp.cmake @@ -14,8 +14,8 @@ # ============================================================================== include (ExternalProject) -set(gemmlowp_URL https://mirror.bazel.build/github.com/google/gemmlowp/archive/010bb3e71a26ca1d0884a167081d092b43563996.zip) -set(gemmlowp_HASH SHA256=dd2557072bde12141419cb8320a9c25e6ec41a8ae53c2ac78c076a347bb46d9d) +set(gemmlowp_URL https://github.com/google/gemmlowp/archive/6a2a90822e8546fc2bfa7044de0faf1c1cb4862f.zip) +set(gemmlowp_HASH SHA256=3447948d219f3270383766bbe08942888c0eb4e0ca6663c0e0548502ec5bb77d) set(gemmlowp_BUILD ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp) set(gemmlowp_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp) diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake index 2d015908a8..eb6bf567aa 100644 --- a/tensorflow/contrib/cmake/tf_core_kernels.cmake +++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake @@ -150,9 +150,6 @@ list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_exclude_srcs}) if(WIN32) file(GLOB_RECURSE tf_core_kernels_windows_exclude_srcs # not working on windows yet - "${tensorflow_source_dir}/tensorflow/core/kernels/meta_support.*" - "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h" - "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.cc" "${tensorflow_source_dir}/tensorflow/core/kernels/neon/*" # not in core - those are loaded dynamically as dll "${tensorflow_source_dir}/tensorflow/contrib/nearest_neighbor/kernels/hyperplane_lsh_probes.cc" diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index 18b71d1f9a..46134f4455 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -145,6 +145,8 @@ if (tensorflow_BUILD_PYTHON_TESTS) "${tensorflow_source_dir}/tensorflow/contrib/estimator/python/estimator/*_test.py" "${tensorflow_source_dir}/tensorflow/python/kernel_tests/*.py" "${tensorflow_source_dir}/tensorflow/python/meta_graph_transform/*_test.py" + "${tensorflow_source_dir}/tensorflow/python/ops/quantized_conv_ops_test.py" + "${tensorflow_source_dir}/tensorflow/python/ops/quantized_ops_test.py" "${tensorflow_source_dir}/tensorflow/python/platform/build_info_test.py" "${tensorflow_source_dir}/tensorflow/python/profiler/*_test.py" "${tensorflow_source_dir}/tensorflow/python/profiler/internal/*_test.py" diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/core/kernels/quantized_conv_ops.cc index 3b0764bb9b..54090bac7e 100644 --- a/tensorflow/core/kernels/quantized_conv_ops.cc +++ b/tensorflow/core/kernels/quantized_conv_ops.cc @@ -268,6 +268,13 @@ class Im2ColConvFunctor { Im2ColBufferResource* im2col_buffer_resource; std::function**)> creator = [](Im2ColBufferResource** resource) { +#ifdef _MSC_VER + // MSVC complains about the capture of chunk_value_count which oddly + // works fine in conv_ops_using_gemm.cc for example. + // Define chunk_value_count inside the lambda for now. + const int64 chunk_value_count = + (kMaxChunkSize + (sizeof(T1) - 1)) / sizeof(T1); +#endif *resource = new Im2ColBufferResource(); return Status::OK(); }; diff --git a/tensorflow/python/ops/quantized_conv_ops_test.py b/tensorflow/python/ops/quantized_conv_ops_test.py index 5ea47ea40e..5e9e710027 100644 --- a/tensorflow/python/ops/quantized_conv_ops_test.py +++ b/tensorflow/python/ops/quantized_conv_ops_test.py @@ -93,7 +93,7 @@ class Conv2DTest(test.TestCase): quantized_range = ((quantized_max - quantized_min) * range_adjust) range_scale = (quantized_range / number_of_steps) lowest_quantized = -(1 << (number_of_bits - 1)) - result = np.array([(quantized_min + ((x - lowest_quantized) * range_scale)) + result = np.array([(quantized_min + ((float(x) - lowest_quantized) * range_scale)) for x in quantized.flatten()]) return result diff --git a/tensorflow/python/ops/quantized_ops_test.py b/tensorflow/python/ops/quantized_ops_test.py new file mode 100644 index 0000000000..4bf3b35e13 --- /dev/null +++ b/tensorflow/python/ops/quantized_ops_test.py @@ -0,0 +1,57 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional tests for quantized operations.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test + + +class QuantizedOpsTest(test.TestCase): + + def __init__(self, method_name="runTest"): + super(QuantizedOpsTest, self).__init__(method_name) + + def testQuantizeOp(self): + expected_output = [1, 1, 2, 127, 255, 255] + with self.test_session(use_gpu=False) as sess: + x = constant_op.constant([1.0, 1.25, 1.75, 127.0, 255.0, 500.0], shape=[6], dtype=dtypes.float32) + x_min = 0.0 + x_max = 255.0 + op = array_ops.quantize(x, x_min, x_max, dtypes.quint8, mode="MIN_FIRST") + value = sess.run(op) + self.assertArrayNear(expected_output, value.output, 0.1) + + def testDequantizeOp(self): + expected_output = [1.0, 2.0, 4.0, 8.0, 16.0, 255.0] + inp = np.array([1, 2, 4, 8, 16, 255]).astype(np.uint8) + with self.test_session(use_gpu=False) as sess: + x = constant_op.constant(inp, shape=[6], dtype=dtypes.quint8) + x_min = 0.0 + x_max = 255.0 + op = array_ops.dequantize(x, x_min, x_max, mode="MIN_FIRST") + value = sess.run(op) + self.assertArrayNear(expected_output, value, 0.1) + + +if __name__ == "__main__": + test.main() -- GitLab From 4cb754e0513262e6d89eacc90eb3673f2b405234 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Tue, 28 Nov 2017 10:48:47 -0800 Subject: [PATCH 0877/1801] Add empty placeholder git/gen files. Projects that depend on TensorFlow as a Bazel external dependency will almost certainly not be modifying any of the TensorFlow source and therefore don't need these git symlinked files (which exist for Bazel to keep track of changes to HEAD). Adding empty files for the files generated by configure so that TensorFlow can be built without running configure for these projects. These placeholders will get overridden for TF checkouts the first time you run configure.py. PiperOrigin-RevId: 177184829 --- tensorflow/tools/git/gen/branch_ref | 1 + tensorflow/tools/git/gen/head | 1 + tensorflow/tools/git/gen/spec.json | 3 +++ 3 files changed, 5 insertions(+) create mode 100644 tensorflow/tools/git/gen/branch_ref create mode 100644 tensorflow/tools/git/gen/head create mode 100644 tensorflow/tools/git/gen/spec.json diff --git a/tensorflow/tools/git/gen/branch_ref b/tensorflow/tools/git/gen/branch_ref new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/tensorflow/tools/git/gen/branch_ref @@ -0,0 +1 @@ + diff --git a/tensorflow/tools/git/gen/head b/tensorflow/tools/git/gen/head new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/tensorflow/tools/git/gen/head @@ -0,0 +1 @@ + diff --git a/tensorflow/tools/git/gen/spec.json b/tensorflow/tools/git/gen/spec.json new file mode 100644 index 0000000000..176bbc21cc --- /dev/null +++ b/tensorflow/tools/git/gen/spec.json @@ -0,0 +1,3 @@ +{ + "git": false +} -- GitLab From c81a8ae591cf43b6d10b887dfb22a780af3beec0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Nov 2017 10:51:26 -0800 Subject: [PATCH 0878/1801] Make sure that additional ops added by Savers to read ResourceVariables are added to the graph in a deterministic way. For ResourceVariables (op "VarHandleOp"), ops.internal_convert_to_tensor will add new ops such as "Read_8/ReadVariableOp". If op_list is cast to a set, as before this change, then adding these new ops made graph construction non-deterministic. PiperOrigin-RevId: 177185279 --- tensorflow/python/training/saver.py | 5 ++++- tensorflow/python/training/saver_test.py | 12 ++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index 5bddde1698..bd47736d4b 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -523,7 +523,10 @@ class BaseSaverBuilder(object): if not isinstance(op_list, (list, tuple, set)): raise TypeError("Variables to save should be passed in a dict or a " "list: %s" % op_list) - op_list = set(op_list) + # When ResourceVariables are converted to Tensors, read ops are added to the + # graph. Sorting the op_list ensures that the resulting graph is always + # constructed in a deterministic way: + op_list = sorted(op_list, key=lambda x: x.name) names_to_saveables = {} # pylint: disable=protected-access for var in op_list: diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index 744b17dd22..98ac197204 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -164,6 +164,18 @@ class SaverTest(test.TestCase): def testResourceBasic(self): self.basicSaveRestore(resource_variable_ops.ResourceVariable) + def testResourceVariableReadOpsAddedDeterministically(self): + graph_defs = [] + num_graphs = 10 + for _ in range(num_graphs): + with ops_lib.Graph().as_default() as g: + for i in range(20): + resource_variable_ops.ResourceVariable(i, name="var%s" % i) + saver_module.Saver() + graph_defs.append(g.as_graph_def()) + for i in range(num_graphs - 1): + self.assertEqual(graph_defs[i], graph_defs[i + 1]) + def testEagerBasic(self): with context.eager_mode(): ckpt_prefix = os.path.join(self.get_temp_dir(), "ckpt") -- GitLab From 723f285b64ace381e4180b342e31a8117b483058 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Tue, 28 Nov 2017 10:53:48 -0800 Subject: [PATCH 0879/1801] [XLA] Improvements to replay_computation tool. * Reduce threshold at which we run fake-data generation on the device from 1gb to 1mb. At the old threshold, I observed cases where we'd spend many seconds, and >50% of our runtime, in logf(), used for computing random numbers. * Don't retrieve or print the result when running with fake data. Presumably this is uninteresting, because garbage in, garbage out. Retrieving this data can take as long as running the whole computation, and printing it can take many times longer. * Add a LOG(INFO) indicating how long execution took. * Add a --num_runs flag. This is particularly important on GPUs, where the first run does autotuning, and so isn't interesting from a performance perspective. PiperOrigin-RevId: 177185636 --- tensorflow/compiler/xla/client/lib/testing.cc | 2 +- .../compiler/xla/tools/replay_computation.cc | 57 ++++++++++++++----- 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/tensorflow/compiler/xla/client/lib/testing.cc b/tensorflow/compiler/xla/client/lib/testing.cc index d936bd870b..5f2b55713e 100644 --- a/tensorflow/compiler/xla/client/lib/testing.cc +++ b/tensorflow/compiler/xla/client/lib/testing.cc @@ -51,7 +51,7 @@ std::unique_ptr MakeFakeDataViaDeviceOrDie(const Shape& shape, std::unique_ptr MakeFakeDataOrDie(const Shape& shape, Client* client) { - if (ShapeUtil::ByteSizeOf(shape) < (1LL << 30)) { + if (ShapeUtil::ByteSizeOf(shape) < (1LL << 20)) { StatusOr> literal_status = MakeFakeLiteral(shape); if (!literal_status.ok()) { // If we got an Unimplemented error, fall back to making the fake data via diff --git a/tensorflow/compiler/xla/tools/replay_computation.cc b/tensorflow/compiler/xla/tools/replay_computation.cc index 503e7d456e..ec3f6a0471 100644 --- a/tensorflow/compiler/xla/tools/replay_computation.cc +++ b/tensorflow/compiler/xla/tools/replay_computation.cc @@ -65,8 +65,9 @@ namespace { // Similarly, infeeds fake data of shape fake_infeed_shape if it is provided; // otherwise, no infeed is performed. StatusOr> ReplayComputation( - const SessionModule& module, tensorflow::StringPiece fake_infeed_shape, - bool use_fake_data, Client* client) { + const SessionModule& module, int num_runs, + tensorflow::StringPiece fake_infeed_shape, bool use_fake_data, + Client* client) { TF_ASSIGN_OR_RETURN(Computation computation, client->LoadSnapshot(module)); std::vector> arguments; @@ -107,10 +108,32 @@ StatusOr> ReplayComputation( for (auto& argument : arguments) { execute_arguments.push_back(argument.get()); } - return client->ExecuteAndTransfer(computation, execute_arguments); + + // Run the computation num_runs times, and return the result from the last + // execution. + std::unique_ptr result; + for (int i = 0; i < num_runs; ++i) { + ExecutionProfile profile; + if (use_fake_data) { + // If using fake data, execute the computation but don't bother retrieving + // the result -- presumably it's uninteresting, since our data is fake. + TF_RETURN_IF_ERROR(client + ->Execute(computation, execute_arguments, + /*execution_options=*/nullptr, &profile) + .status()); + } else { + TF_ASSIGN_OR_RETURN(result, client->ExecuteAndTransfer( + computation, execute_arguments, + /*execution_options=*/nullptr, &profile)); + } + LOG(INFO) << "Execution took " + << static_cast(profile.compute_time_ns()) / 1e9 << "s"; + } + + return std::move(result); } -int RealMain(tensorflow::gtl::ArraySlice args, +int RealMain(tensorflow::gtl::ArraySlice args, int num_runs, tensorflow::StringPiece fake_infeed_shape, bool use_fake_data) { Client* client = ClientLibrary::LocalClientOrDie(); tensorflow::Env* env = tensorflow::Env::Default(); @@ -118,22 +141,25 @@ int RealMain(tensorflow::gtl::ArraySlice args, for (char* arg : args) { SessionModule module; TF_CHECK_OK(tensorflow::ReadBinaryProto(env, arg, &module)); - StatusOr> result_status = - ReplayComputation(module, fake_infeed_shape, use_fake_data, client); + StatusOr> result_status = ReplayComputation( + module, num_runs, fake_infeed_shape, use_fake_data, client); if (!result_status.ok()) { fprintf(stderr, "%s: error: %s\n", arg, result_status.status().ToString().c_str()); exit_status = EXIT_FAILURE; continue; } + std::unique_ptr result = result_status.ConsumeValueOrDie(); - fprintf(stdout, "%s: %s :: %s:%s\n", arg, module.entry().name().c_str(), - ShapeUtil::HumanString(result->shape()).c_str(), - result->ToString().c_str()); - if (module.has_result()) { - fprintf(stdout, "was %s:%s\n", - ShapeUtil::HumanString(module.result().shape()).c_str(), - Literal(module.result()).ToString().c_str()); + if (result != nullptr) { + fprintf(stdout, "%s: %s :: %s:%s\n", arg, module.entry().name().c_str(), + ShapeUtil::HumanString(result->shape()).c_str(), + result->ToString().c_str()); + if (module.has_result()) { + fprintf(stdout, "was %s:%s\n", + ShapeUtil::HumanString(module.result().shape()).c_str(), + Literal(module.result()).ToString().c_str()); + } } } return exit_status; @@ -147,9 +173,12 @@ int main(int argc, char** argv) { // Flags xla::string fake_infeed_shape; bool use_fake_data = false; + int num_runs = 1; const std::vector flag_list = { tensorflow::Flag("use_fake_data", &use_fake_data, "Replay computation using fake data"), + tensorflow::Flag("num_runs", &num_runs, + "Number of times to run each computation"), tensorflow::Flag("fake_infeed_shape", &fake_infeed_shape, "Shape of fake data to construct for (infinite) infeed"), }; @@ -162,5 +191,5 @@ int main(int argc, char** argv) { tensorflow::gtl::ArraySlice args(argv, argc); args.pop_front(); // Pop off the binary name, argv[0] - return xla::tools::RealMain(args, fake_infeed_shape, use_fake_data); + return xla::tools::RealMain(args, num_runs, fake_infeed_shape, use_fake_data); } -- GitLab From 5d3d7fa81b87aa3c1137366f062c4f4ab9681a09 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Nov 2017 10:55:32 -0800 Subject: [PATCH 0880/1801] RevBlock: memory-efficient implementation of a series of reversible residual layers. PiperOrigin-RevId: 177185950 --- tensorflow/contrib/layers/BUILD | 15 + tensorflow/contrib/layers/__init__.py | 3 + .../contrib/layers/python/layers/__init__.py | 1 + .../layers/python/layers/rev_block_lib.py | 538 ++++++++++++++++++ .../python/layers/rev_block_lib_test.py | 331 +++++++++++ 5 files changed, 888 insertions(+) create mode 100644 tensorflow/contrib/layers/python/layers/rev_block_lib.py create mode 100644 tensorflow/contrib/layers/python/layers/rev_block_lib_test.py diff --git a/tensorflow/contrib/layers/BUILD b/tensorflow/contrib/layers/BUILD index 2f1f283811..852d06e1e3 100644 --- a/tensorflow/contrib/layers/BUILD +++ b/tensorflow/contrib/layers/BUILD @@ -61,6 +61,7 @@ tf_custom_op_py_library( "python/layers/normalization.py", "python/layers/optimizers.py", "python/layers/regularizers.py", + "python/layers/rev_block_lib.py", "python/layers/summaries.py", "python/layers/target_column.py", "python/layers/utils.py", @@ -376,6 +377,20 @@ py_test( ], ) +py_test( + name = "rev_block_lib_test", + size = "small", + srcs = ["python/layers/rev_block_lib_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":layers_py", + "//tensorflow/python:client_testlib", + "//tensorflow/python:init_ops", + "//tensorflow/python:variable_scope", + "//tensorflow/python:variables", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/layers/__init__.py b/tensorflow/contrib/layers/__init__.py index d309ba958d..6c624929f2 100644 --- a/tensorflow/contrib/layers/__init__.py +++ b/tensorflow/contrib/layers/__init__.py @@ -42,6 +42,9 @@ See the @{$python/contrib.layers} guide. @@relu @@relu6 @@repeat +@@recompute_grad +@@RevBlock +@@rev_block @@safe_embedding_lookup_sparse @@scale_gradient @@separable_conv2d diff --git a/tensorflow/contrib/layers/python/layers/__init__.py b/tensorflow/contrib/layers/python/layers/__init__.py index 03337f9a5d..f1ae2de68b 100644 --- a/tensorflow/contrib/layers/python/layers/__init__.py +++ b/tensorflow/contrib/layers/python/layers/__init__.py @@ -28,6 +28,7 @@ from tensorflow.contrib.layers.python.layers.layers import * from tensorflow.contrib.layers.python.layers.normalization import * from tensorflow.contrib.layers.python.layers.optimizers import * from tensorflow.contrib.layers.python.layers.regularizers import * +from tensorflow.contrib.layers.python.layers.rev_block_lib import * from tensorflow.contrib.layers.python.layers.summaries import * from tensorflow.contrib.layers.python.layers.target_column import * from tensorflow.contrib.layers.python.ops.bucketization_op import * diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib.py b/tensorflow/contrib/layers/python/layers/rev_block_lib.py new file mode 100644 index 0000000000..31a1b38bd4 --- /dev/null +++ b/tensorflow/contrib/layers/python/layers/rev_block_lib.py @@ -0,0 +1,538 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Reversible Residual Block. + +From +[The Reversible Residual Network: Backpropagation Without Storing +Activations](https://arxiv.org/abs/1707.04585). + +Also contains the @recompute_grad decorator, which recomputes the forward +function on the backwards pass. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import functools +import re + +from six.moves import xrange # pylint: disable=redefined-builtin + +from tensorflow.contrib.framework.python import ops as contrib_framework_ops +from tensorflow.python.framework import function +from tensorflow.python.framework import ops as framework_ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import template +from tensorflow.python.ops import variable_scope +from tensorflow.python.util import nest + +__all__ = ["rev_block", "RevBlock", "recompute_grad"] + +LAYER_RE = re.compile(".*revlayer_([0-9]*)/([fg])/.*") + + +def _acc_grads(*lists_of_grads): + """Accumulates lists of gradients.""" + acc_grads = [] + for grads in zip(*lists_of_grads): + grads = [g for g in grads if g is not None] + if grads: + acc_grads.append(math_ops.add_n(grads)) + else: + acc_grads.append(None) + return acc_grads + + +def _rev_layer_forward(xs, f, g, f_side_input, g_side_input, + gate_outputs=False): + """Forward for 1 reversible layer.""" + x1, x2 = xs + y1 = x1 + (f(x2, f_side_input) if f_side_input else f(x2)) + y2 = x2 + (g(y1, g_side_input) if g_side_input else g(y1)) + if gate_outputs: + return control_flow_ops.tuple([y1, y2]) + else: + return (y1, y2) + + +def _rev_layer_backward(ys, grad_ys, f, g, f_vars, f_side_input, g_vars, + g_side_input): + """Backprop for 1 layer.""" + y1, y2 = ys + grad_y1, grad_y2 = grad_ys + + # Reconstruct intermediates and inputs (x1, x2) + # stop_gradients required on fn inputs to prevent infinite recursion into this + # grad function on the calls to gradients. + y1_stop = array_ops.stop_gradient(y1) + g_side_input = [array_ops.stop_gradient(t) for t in g_side_input] + gy1 = g(y1_stop, g_side_input) if g_side_input else g(y1_stop) + + x2 = y2 - gy1 + x2_stop = array_ops.stop_gradient(x2) + f_side_input = [array_ops.stop_gradient(t) for t in f_side_input] + fx2 = f(x2_stop, f_side_input) if f_side_input else f(x2_stop) + + x1 = y1 - fx2 + + # Compute gradients wrt to inputs + # dL/dy2 * dG(y1)/y1 + grad_gy1_y2 = gradients_impl.gradients(gy1, y1_stop, grad_y2)[0] + grad_x1 = grad_y1 + grad_gy1_y2 + grad_x2 = ( + gradients_impl.gradients(fx2, x2_stop, grad_y1)[0] + grad_y2 + + gradients_impl.gradients(fx2, x2_stop, grad_gy1_y2)[0]) + + # Compute gradients wrt to vars and side inputs in f and g + grads1 = gradients_impl.gradients(gy1, g_vars + g_side_input, grad_y2) + grad_g_vars, grad_g_side = grads1[:len(g_vars)], grads1[len(g_vars):] + grads2 = gradients_impl.gradients(fx2, f_vars + f_side_input, grad_y1) + grad_f_y1, grad_f_side1 = grads2[:len(f_vars)], grads2[len(f_vars):] + grads3 = gradients_impl.gradients(fx2, f_vars + f_side_input, grad_gy1_y2) + grad_f_y2, grad_f_side2 = grads3[:len(f_vars)], grads3[len(f_vars):] + grad_f_vars = _acc_grads(grad_f_y1, grad_f_y2) + + grad_f_side = _acc_grads(grad_f_side1, grad_f_side2) + + # Put returns in a tuple to ensure a constant memory budget (i.e. don't want + # the subsequent layer to start computing and consuming memory based on a + # subset of these values). + outputs = ((x1, x2), (grad_x1, grad_x2), (grad_f_vars, grad_f_side), + (grad_g_vars, grad_g_side)) + tupled = control_flow_ops.tuple(nest.flatten(outputs)) + return nest.pack_sequence_as(outputs, tupled) + + +def _rev_block_forward(x1, + x2, + f, + g, + num_layers=1, + f_side_input=None, + g_side_input=None, + gate_outputs=False): + """Forward for a series of reversible layers.""" + out = (x1, x2) + for i in xrange(num_layers): + out = _rev_layer_forward( + out, f[i], g[i], f_side_input, g_side_input, gate_outputs=gate_outputs) + + y1, y2 = out + return y1, y2 + + +class RevBlock(object): + """Block of reversible layers. See rev_block.""" + + def __init__(self, + f, + g, + num_layers=1, + f_side_input=None, + g_side_input=None, + use_efficient_backprop=True): + + if isinstance(f, list): + assert len(f) == num_layers + else: + f = [f] * num_layers + + if isinstance(g, list): + assert len(g) == num_layers + else: + g = [g] * num_layers + + scope_prefix = "revblock/revlayer_%d/" + f_scope = scope_prefix + "f" + g_scope = scope_prefix + "g" + + f = [ + template.make_template(f_scope % i, fn, create_scope_now_=True) + for i, fn in enumerate(f) + ] + g = [ + template.make_template(g_scope % i, fn, create_scope_now_=True) + for i, fn in enumerate(g) + ] + + self.f = f + self.g = g + + self.num_layers = num_layers + self.f_side_input = f_side_input or [] + self.g_side_input = g_side_input or [] + + self._use_efficient_backprop = use_efficient_backprop + + def _efficient_grad_fn(self, inputs, variables, ys, grad_ys): + """Custom gradient fn for a block of reversible residual layers.""" + side_inputs = inputs[2:] + f_side_idxs = [None] * len(self.f_side_input) + g_side_idxs = [None] * len(self.g_side_input) + assert len(side_inputs) == len(self.f_side_input) + len(self.g_side_input) + + for i, t in enumerate(side_inputs): + if t in self.f_side_input: + f_side_idxs[self.f_side_input.index(t)] = i + elif t in self.g_side_input: + g_side_idxs[self.g_side_input.index(t)] = i + else: + assert False + + f_vars = [[] for _ in range(self.num_layers)] + g_vars = [[] for _ in range(self.num_layers)] + f_vars_idxs = [[] for _ in range(self.num_layers)] + g_vars_idxs = [[] for _ in range(self.num_layers)] + + for i, t in enumerate(variables): + ref = _underlying_variable_ref(t) + + # Use the name to identify the layer number and function (f or g) + regex = LAYER_RE.match(ref.name) + layer_no = int(regex.group(1)) + fn_name = regex.group(2) + if fn_name == "f": + f_vars[layer_no].append(ref) + f_vars_idxs[layer_no].append(i) + else: + assert fn_name == "g" + g_vars[layer_no].append(ref) + g_vars_idxs[layer_no].append(i) + + f_var_grads = [] + g_var_grads = [] + f_side_grads = [] + g_side_grads = [] + + # Reverse variable containers to go backward + f_vars.reverse() + g_vars.reverse() + f = list(self.f) + g = list(self.g) + f.reverse() + g.reverse() + + for i in xrange(self.num_layers): + ys, grad_ys, f_ret, g_ret = _rev_layer_backward( + ys, grad_ys, f[i], g[i], f_vars[i], self.f_side_input, g_vars[i], + self.g_side_input) + + grad_f_vars, grad_f_side = f_ret + grad_g_vars, grad_g_side = g_ret + f_var_grads.append(grad_f_vars) + g_var_grads.append(grad_g_vars) + f_side_grads.append(grad_f_side) + g_side_grads.append(grad_g_side) + + # Accumulate layer gradients for f_side_input and g_side_input + acc_f_side_grads = _acc_grads(*f_side_grads) + acc_g_side_grads = _acc_grads(*g_side_grads) + + # Use the stored idxs to put gradients in the passed-in order. + side_input_grads = [None] * len(side_inputs) + variable_grads = [None] * len(variables) + + # Variable gradients were collected in reverse layer order. Reverse to match + # idxs. + f_var_grads.reverse() + g_var_grads.reverse() + for idxs, grads in list(zip(f_vars_idxs, f_var_grads)) + list( + zip(g_vars_idxs, g_var_grads)): + for i, grad in zip(idxs, grads): + variable_grads[i] = grad + + for i, grad in zip(f_side_idxs, acc_f_side_grads): + side_input_grads[i] = grad + for i, grad in zip(g_side_idxs, acc_g_side_grads): + side_input_grads[i] = grad + + grad_x1, grad_x2 = grad_ys + return [grad_x1, grad_x2] + side_input_grads, variable_grads + + def forward(self, x1, x2): + """Run forward through the reversible layers.""" + + side_inputs = [self.f_side_input, self.g_side_input] + flat_side_inputs = nest.flatten(side_inputs) + + custom_grad_fn = ( + self._efficient_grad_fn if self._use_efficient_backprop else None) + + @_fn_with_custom_grad(custom_grad_fn) + def _forward(x1_, x2_, *flat_side_inputs): + f_side, g_side = nest.pack_sequence_as(side_inputs, flat_side_inputs) + return _rev_block_forward( + x1_, + x2_, + self.f, + self.g, + num_layers=self.num_layers, + f_side_input=f_side, + g_side_input=g_side, + gate_outputs=self._use_efficient_backprop) + + return _forward(x1, x2, *flat_side_inputs) + + def backward(self, y1, y2): + """Run backward through the reversible layers.""" + + f = list(self.f) + g = list(self.g) + f.reverse() + g.reverse() + + for i in xrange(self.num_layers): + gy1 = g[i](y1, self.g_side_input) if self.g_side_input else g[i](y1) + x2 = y2 - gy1 + fx2 = f[i](x2, self.f_side_input) if self.f_side_input else f[i](x2) + x1 = y1 - fx2 + + y1, y2 = x1, x2 + + return x1, x2 + + +def rev_block(x1, + x2, + f, + g, + num_layers=1, + f_side_input=None, + g_side_input=None, + is_training=True): + """A block of reversible residual layers. + + A reversible residual layer is defined as: + + ``` + y1 = x1 + f(x2, f_side_input) + y2 = x2 + g(y1, g_side_input) + ``` + + A reversible residual block, defined here, is a series of reversible residual + layers. + + Limitations: + * f and g must not close over any Tensors; all side inputs to f and g should + be passed in with f_side_input and g_side_input which will be forwarded to + f and g. + * f and g must not change the dimensionality of their inputs in order for the + addition in the equations above to work. + + Args: + x1: a float Tensor. + x2: a float Tensor. + f: a function, (Tensor) -> (Tensor) (or list of such of length num_layers). + Should not change the shape of the Tensor. Can make calls to get_variable. + See f_side_input if there are side inputs. + g: a function, (Tensor) -> (Tensor) (or list of such of length num_layers). + Should not change the shape of the Tensor. Can make calls to get_variable. + See g_side_input if there are side inputs. + num_layers: int, number of reversible residual layers. Each layer will + apply f and g according to the equations above, with new variables in each + layer. + f_side_input: list of Tensors, side input to f. If not None, signature of f + should be (Tensor, list) -> (Tensor). + g_side_input: list of Tensors, side input to g. If not None, signature of g + should be (Tensor, list) -> (Tensor). + is_training: bool, whether to actually use the efficient backprop codepath. + + Returns: + y1, y2: tuple of float Tensors. + """ + block = RevBlock(f, g, num_layers, f_side_input, g_side_input, is_training) + return block.forward(x1, x2) + + +def recompute_grad(fn): + """Decorator that recomputes the function on the backwards pass. + + Args: + fn: a function that takes Tensors (all as positional arguments) and returns + a tuple of Tensors. + + Returns: + A wrapped fn that is identical to fn when called, but its activations will + be discarded and recomputed on the backwards pass (i.e. on a call to + tf.gradients). + """ + + @functools.wraps(fn) + def wrapped(*args): + return _recompute_grad(fn, args) + + return wrapped + + +def _recompute_grad(fn, args): + """See recompute_grad.""" + + cached_vs = [] + cached_arg_scope = [] + + def grad_fn(inputs, variables, outputs, output_grads): + """Recompute outputs for gradient computation.""" + del outputs + # Recompute outputs + with framework_ops.control_dependencies(output_grads): + with contrib_framework_ops.arg_scope(cached_arg_scope[0]): + with variable_scope.variable_scope(cached_vs[0], reuse=True): + outputs = fn(*inputs) + + if not (isinstance(outputs, list) or isinstance(outputs, tuple)): + outputs = [outputs] + outputs = list(outputs) + grads = gradients_impl.gradients(outputs, inputs + variables, output_grads) + grad_inputs = grads[:len(inputs)] + grad_vars = grads[len(inputs):] + return grad_inputs, grad_vars + + @_fn_with_custom_grad(grad_fn) + def fn_with_recompute(*args): + cached_vs.append(variable_scope.get_variable_scope()) + # TODO(rsepassi): Rm conditional in TF 1.4 + if hasattr(contrib_framework_ops, "current_arg_scope"): + cached_arg_scope.append(contrib_framework_ops.current_arg_scope()) + else: + cached_arg_scope.append({}) + return fn(*args) + + return fn_with_recompute(*args) + + +def _underlying_variable_ref(t): + """Find the underlying variable ref. + + Traverses through Identity, ReadVariableOp, and Enter ops. + Stops when op type has Variable or VarHandle in name. + + Args: + t: a Tensor + + Returns: + a Tensor that is a variable ref, or None on error. + """ + while t.op.type in ["Identity", "ReadVariableOp", "Enter"]: + t = t.op.inputs[0] + + op_type = t.op.type + if "Variable" in op_type or "VarHandle" in op_type: + return t + else: + return None + + +def _fn_with_custom_grad(grad_fn, use_global_vars=False): + """Decorator to create a subgraph with a custom gradient function. + + The subgraph created by the decorated function is NOT put in a Defun and so + does not suffer from the limitations of the Defun (all subgraph ops on the + same device, no summaries). + + Args: + grad_fn: function with signature + (inputs, variables, outputs, output_grads) -> (grad_inputs, grad_vars), + all of which are lists of Tensors. + use_global_vars: if True, variables will be the global variables created. + If False, will be the trainable variables. + + Returns: + Decorator for function such that the gradient is defined by grad_fn. + """ + + def dec(fn): + + @functools.wraps(fn) + def wrapped(*args): + return _fn_with_custom_grad_internal( + fn, args, grad_fn, use_global_vars=use_global_vars) + + return wrapped + + return dec + + +def _fn_with_custom_grad_internal(fn, inputs, grad_fn, use_global_vars=False): + """Create a subgraph with a custom gradient. + + Args: + fn: function that takes inputs as arguments and produces 1 or more Tensors. + inputs: list, will be passed as fn(*inputs). + grad_fn: function with signature + (inputs, vars, outputs, output_grads) -> (grad_inputs, grad_vars), + all of which are lists of Tensors. + use_global_vars: if True, variables will be the global variables created. + If False, will be the trainable variables. + + Returns: + fn(*inputs) + """ + vs = variable_scope.get_variable_scope() + get_vars_fn = ( + vs.global_variables if use_global_vars else vs.trainable_variables) + len_before_vars = len(get_vars_fn()) + inputs = list(inputs) + outputs = fn(*inputs) + train_vars = get_vars_fn()[len_before_vars:] + + if grad_fn is None: + return outputs + + if not (isinstance(outputs, tuple) or isinstance(outputs, list)): + outputs = [outputs] + outputs = list(outputs) + + defun_inputs = [inputs, train_vars, outputs] + + def custom_grad_fn(op, *dys): + """Custom grad fn applying grad_fn for identity Defun.""" + fn_inputs, fn_vars, fn_outputs = nest.pack_sequence_as( + defun_inputs, list(op.inputs)) + dys = list(dys) + assert len(fn_outputs) == len(outputs) + assert len(fn_outputs) == len(dys) + + grad_inputs, grad_vars = grad_fn(fn_inputs, fn_vars, fn_outputs, dys) + grad_outputs = [None] * len(fn_outputs) + return tuple(grad_inputs + grad_vars + grad_outputs) + + # The Defun takes as input the original inputs, the trainable variables + # created in fn, and the outputs. In the forward it passes through the + # outputs. In the backwards, it produces gradients for the original inputs + # and the trainable variables. + in_types = [t.dtype for t in inputs] + out_types = [t.dtype for t in outputs] + var_types = [t.dtype for t in train_vars] + + # Get a unique name for the Defun + with framework_ops.name_scope("identity_custom_grad") as ns: + defun_name = ns + + @function.Defun( + *(in_types + var_types + out_types), + func_name=defun_name, + python_grad_func=custom_grad_fn, + shape_func=lambda _: [t.get_shape() for t in outputs]) + def identity(*args): + _, _, outs = nest.pack_sequence_as(defun_inputs, args) + return tuple([array_ops.identity(t) for t in outs]) + + flat_inputs = nest.flatten(defun_inputs) + id_out = identity(*flat_inputs) + return id_out diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py b/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py new file mode 100644 index 0000000000..a420753fd5 --- /dev/null +++ b/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py @@ -0,0 +1,331 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for RevBlock.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.layers.python.layers import layers +from tensorflow.contrib.layers.python.layers import rev_block_lib +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import random_seed +from tensorflow.python.layers import convolutional +from tensorflow.python.layers import core as core_layers +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +class RevBlockTest(test.TestCase): + CHANNELS = 8 + NUM_LAYERS = 4 + BATCH_SIZE = 16 + + def testForwardBackward(self): + + def f(x): + return core_layers.dense(x, self.CHANNELS // 2, use_bias=True) + + def g(x): + return core_layers.dense(x, self.CHANNELS // 2, use_bias=True) + + x = random_ops.random_uniform( + [self.BATCH_SIZE, self.CHANNELS], dtype=dtypes.float32) + x1, x2 = array_ops.split(x, 2, axis=-1) + + block = rev_block_lib.RevBlock(f, g, num_layers=3) + y1, y2 = block.forward(x1, x2) + x1_inv, x2_inv = block.backward(y1, y2) + + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + x1, x2, x1_inv, x2_inv = sess.run([x1, x2, x1_inv, x2_inv]) + + self.assertAllClose(x1, x1_inv) + self.assertAllClose(x2, x2_inv) + + def testBackwardForward(self): + + def f(x): + return core_layers.dense(x, self.CHANNELS // 2, use_bias=True) + + def g(x): + return core_layers.dense(x, self.CHANNELS // 2, use_bias=True) + + y = random_ops.random_uniform( + [self.BATCH_SIZE, self.CHANNELS], dtype=dtypes.float32) + y1, y2 = array_ops.split(y, 2, axis=-1) + + block = rev_block_lib.RevBlock(f, g, num_layers=3) + x1, x2 = block.backward(y1, y2) + y1_inv, y2_inv = block.forward(x1, x2) + + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + y1, y2, y1_inv, y2_inv = sess.run([y1, y2, y1_inv, y2_inv]) + + self.assertAllClose(y1, y1_inv) + self.assertAllClose(y2, y2_inv) + + def _testRevBlock(self, + x=None, + f=None, + g=None, + f_side_input=None, + g_side_input=None): + random_seed.set_random_seed(1234) + + if f is None: + + def f(x): # pylint: disable=function-redefined + return core_layers.dense(x, self.CHANNELS // 2, use_bias=True) + + if g is None: + + def g(x): # pylint: disable=function-redefined + return core_layers.dense(x, self.CHANNELS // 2, use_bias=True) + + if f_side_input is None: + f_side_input = [] + + if g_side_input is None: + g_side_input = [] + + if x is None: + x = random_ops.random_uniform( + [self.BATCH_SIZE, self.CHANNELS], dtype=dtypes.float32) + x1, x2 = array_ops.split(x, 2, axis=-1) + + with variable_scope.variable_scope("rev_test") as vs: + y1_rev, y2_rev = rev_block_lib.rev_block( + x1, + x2, + f, + g, + f_side_input=f_side_input, + g_side_input=g_side_input, + num_layers=self.NUM_LAYERS) + y_rev = array_ops.concat([y1_rev, y2_rev], axis=1) + fg_vars = vs.trainable_variables() + + num_vars = len(variables.global_variables()) + with variable_scope.variable_scope(vs, reuse=True): + y1, y2 = rev_block_lib.rev_block( + x1, + x2, + f, + g, + f_side_input=f_side_input, + g_side_input=g_side_input, + num_layers=self.NUM_LAYERS, + is_training=False) + y = array_ops.concat([y1, y2], axis=1) + # Ensure no new vars were created - full reuse + assert len(variables.global_variables()) == num_vars + + loss_rev = math_ops.reduce_mean(y_rev + 10.) + loss = math_ops.reduce_mean(y + 10.) + + wrt = [x] + f_side_input + g_side_input + fg_vars + grads_rev = gradients_impl.gradients(loss_rev, wrt) + grads = gradients_impl.gradients(loss, wrt) + + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + y_val, yd_val, gd_val, g_val = sess.run([y, y_rev, grads_rev, grads]) + self.assertAllClose(y_val, yd_val) + for g1, g2 in zip(gd_val, g_val): + self.assertAllClose(g1, g2) + + def testRevBlock(self): + self._testRevBlock() + + def testSideInput(self): + f_side_input = random_ops.random_uniform( + [self.BATCH_SIZE, self.CHANNELS // 2]) + + def f(x, side_input): + return core_layers.dense( + x, self.CHANNELS // 2, use_bias=True) + side_input[0] + + self._testRevBlock(f=f, f_side_input=[f_side_input]) + + def testMultipleFns(self): + + def f1(x): + return core_layers.dense(x, self.CHANNELS // 2) + + def f2(x): + return core_layers.dense(x, self.CHANNELS // 2, activation=nn_ops.relu) + + self._testRevBlock(f=[f1, f2, f1, f2]) + + # TODO(rsepassi): Recent change to conv seems to have broken this test. Find + # out why. + def _testConvAndBatchNorm(self): + + x = random_ops.random_uniform( + [self.BATCH_SIZE, 10, self.CHANNELS], dtype=dtypes.float32) + + def f(x): + x = convolutional.conv1d(x, self.CHANNELS // 2, 3, padding="same") + x = core_layers.batch_normalization(x, training=True) + x = convolutional.conv1d(x, self.CHANNELS // 2, 3, padding="same") + x = core_layers.batch_normalization(x, training=True) + return x + + self._testRevBlock(x=x, f=f) + + +class RecomputeTest(test.TestCase): + + def testRecompute(self): + + def layer(x, name=None): + with variable_scope.variable_scope(name, default_name="layer"): + x = layers.layer_norm(x) + x = convolutional.conv1d( + x, + 10, + 1, + use_bias=False, + kernel_initializer=init_ops.constant_initializer(42.42)) + x = nn_ops.relu(x) + return x + + def fn(x): + out = x + for _ in range(3): + out = layer(out) + return out + + @rev_block_lib.recompute_grad + def fn_recompute(x): + return fn(x) + + x = random_ops.random_uniform((3, 1, 3)) + recompute_vars = None + with variable_scope.variable_scope("recompute") as vs: + out1 = math_ops.reduce_sum(fn_recompute(x)) + recompute_vars = vs.trainable_variables() + reg_vars = None + with variable_scope.variable_scope("regular") as vs: + out2 = math_ops.reduce_sum(fn(x)) + reg_vars = vs.trainable_variables() + + grad1 = gradients_impl.gradients(out1, recompute_vars) + grad2 = gradients_impl.gradients(out2, reg_vars) + + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + outs = sess.run([out1, out2, grad1, grad2]) + self.assertAllClose(outs[0], outs[1]) + for g1, g2 in zip(outs[2], outs[3]): + self.assertAllClose(g1, g2) + + +class FnWithCustomGradTest(test.TestCase): + + def testCorrectness(self): + + w = random_ops.random_uniform([6, 10]) + + def fn(a, b, c): + return core_layers.dense( + a, + 10, + use_bias=False, + kernel_initializer=lambda shape, dtype, partition_info: w + ) + math_ops.matmul(b, c) + + def grad_fn(inputs, trainable_variables, outputs, grad_outputs): + outputs = outputs[0] + grad_outputs = grad_outputs[0] + grad_inputs = gradients_impl.gradients( + outputs, inputs, grad_ys=grad_outputs) + grad_vars = gradients_impl.gradients( + outputs, trainable_variables, grad_ys=grad_outputs) + return grad_inputs, grad_vars + + custom_fn = rev_block_lib._fn_with_custom_grad(grad_fn)(fn) + + a = random_ops.random_uniform([11, 6]) + b = random_ops.random_uniform([11, 7]) + c = random_ops.random_uniform([7, 10]) + + out = fn(a, b, c) + custom_out = custom_fn(a, b, c) + self.assertEqual(out.get_shape().as_list(), + custom_out.get_shape().as_list()) + + loss = math_ops.reduce_mean(out) + custom_loss = math_ops.reduce_mean(custom_out) + + grads = gradients_impl.gradients( + loss, [a, b, c] + [variables.trainable_variables()[0]]) + custom_grads = gradients_impl.gradients( + custom_loss, [a, b, c] + [variables.trainable_variables()[1]]) + + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + out_val, custom_out_val, grads_val, custom_grads_val = sess.run( + [out, custom_out, grads, custom_grads]) + self.assertAllClose(out_val, custom_out_val) + for g1, g2 in zip(grads_val, custom_grads_val): + self.assertAllClose(g1, g2) + + def testCustomGrad(self): + + def fn(a, b, c): + return core_layers.dense(a, 10, use_bias=False) + math_ops.matmul(b, c) + + def grad_fn(inputs, trainable_variables, unused_outputs, + unused_grad_outputs): + grad_inputs = [ + array_ops.ones_like(t) * (i + 1.) for i, t in enumerate(inputs) + ] + grad_vars = [ + array_ops.ones_like(t) * (i + len(inputs) + 1.) + for i, t in enumerate(trainable_variables) + ] + return grad_inputs, grad_vars + + a = random_ops.random_uniform([11, 6]) + b = random_ops.random_uniform([11, 7]) + c = random_ops.random_uniform([7, 10]) + w = random_ops.random_uniform([6, 10]) + out = rev_block_lib._fn_with_custom_grad(grad_fn)(fn)(a, b, c) + loss = math_ops.reduce_mean(out) + grads = gradients_impl.gradients( + loss, [a, b, c, variables.trainable_variables()[0]]) + expected_grads = [ + array_ops.ones_like(t) * (i + 1.) for i, t in enumerate([a, b, c, w]) + ] + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + g_val, eg_val = sess.run([grads, expected_grads]) + for g1, g2 in zip(g_val, eg_val): + self.assertAllClose(g1, g2) + + +if __name__ == "__main__": + test.main() -- GitLab From 10d3ba2cf54710d0fd43a8c5723101a06f25f915 Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Tue, 28 Nov 2017 11:05:24 -0800 Subject: [PATCH 0881/1801] Run replicate_model_fn_test on the multi-gpu testing cluster. PiperOrigin-RevId: 177187767 --- tensorflow/contrib/estimator/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index 197cf7e56f..8395e2db5e 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -374,4 +374,5 @@ cuda_py_test( "//tensorflow/python:variables", ":replicate_model_fn", ], + tags = ["multi_gpu"], ) -- GitLab From 3527d477d6af99bc664785fd81d896605ebf4d48 Mon Sep 17 00:00:00 2001 From: Sourabh Bajaj Date: Tue, 28 Nov 2017 11:28:54 -0800 Subject: [PATCH 0882/1801] Automated g4 rollback of changelist 176737730 PiperOrigin-RevId: 177191521 --- tensorflow/core/kernels/strided_slice_op.cc | 1 - tensorflow/core/kernels/strided_slice_op_gpu.cu.cc | 1 - 2 files changed, 2 deletions(-) diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc index 73b6d4cf6a..8fc40db3cc 100644 --- a/tensorflow/core/kernels/strided_slice_op.cc +++ b/tensorflow/core/kernels/strided_slice_op.cc @@ -427,7 +427,6 @@ REGISTER_STRIDED_SLICE(bfloat16); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); -TF_CALL_int64(REGISTER_GPU); // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel diff --git a/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc b/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc index 8ca27e3b92..a8487f49f4 100644 --- a/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc +++ b/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc @@ -53,7 +53,6 @@ typedef Eigen::GpuDevice GPUDevice; TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); TF_CALL_complex64(DEFINE_GPU_KERNELS); TF_CALL_complex128(DEFINE_GPU_KERNELS); -TF_CALL_int64(DEFINE_GPU_KERNELS); DEFINE_GPU_KERNELS(int32); #undef DEFINE_GPU_KERNELS -- GitLab From bc8718b090c565c6562dce098d16cdadffc6a213 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Nov 2017 11:30:15 -0800 Subject: [PATCH 0883/1801] Removed unused variables from curl_http_request_test. PiperOrigin-RevId: 177191730 --- tensorflow/core/platform/cloud/curl_http_request_test.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/core/platform/cloud/curl_http_request_test.cc b/tensorflow/core/platform/cloud/curl_http_request_test.cc index 6c0f081852..d476a1a4db 100644 --- a/tensorflow/core/platform/cloud/curl_http_request_test.cc +++ b/tensorflow/core/platform/cloud/curl_http_request_test.cc @@ -263,7 +263,6 @@ TEST(CurlHttpRequestTest, GetRequest) { std::vector scratch; scratch.insert(scratch.begin(), kTestContent.begin(), kTestContent.end()); - StringPiece result; scratch.reserve(100); TF_EXPECT_OK(http_request.SetUri("http://www.testuri.com")); @@ -594,7 +593,6 @@ TEST(CurlHttpRequestTest, ErrorReturnsNoResponse) { std::vector scratch; scratch.insert(scratch.begin(), kTestContent.begin(), kTestContent.end()); - StringPiece result; scratch.reserve(100); TF_EXPECT_OK(http_request.SetUri("http://www.testuri.com")); -- GitLab From cbd31dd4d30663344d0d15d8897d8ce652cf6294 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 28 Nov 2017 11:47:47 -0800 Subject: [PATCH 0884/1801] Eager function definition no longer adds control dependencies after constructing nodes. PiperOrigin-RevId: 177194441 --- tensorflow/python/eager/function.py | 59 ++++++++++++++--------- tensorflow/python/eager/graph_callable.py | 1 + 2 files changed, 36 insertions(+), 24 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 9bcd9c23c7..2f4b59e938 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -30,7 +30,7 @@ from tensorflow.python.eager import execute from tensorflow.python.eager import tape from tensorflow.python.eager.graph_only_ops import graph_placeholder from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes +from tensorflow.python.framework import dtypes as dtypes_module from tensorflow.python.framework import graph_to_function_def from tensorflow.python.framework import ops from tensorflow.python.ops import gradients_impl @@ -48,23 +48,7 @@ _scoped_captures.tensors = None def make_function_def(graph, operations, inputs, outputs): - """Makes function def where accesses to resources are serialized.""" - last_op_using_resource_tensor = {} - - # TODO(apassos) probably control flow has to be handled delicately here as in - # if a resource is accessed inside a control flow context we need the control - # dependency to point to something outside the context which is guaranteed to - # happen after the access. - # - # TODO(apassos) this should do some form of alias analysis as ops which - # forward the resources such as Identity and Switch can cause serialization to - # fail. - for op in operations: - for t in op.inputs: - if t.dtype == dtypes.resource: - if t.name in last_op_using_resource_tensor: - op._add_control_input(last_op_using_resource_tensor[t.name]) # pylint: disable=protected-access - last_op_using_resource_tensor[t.name] = op + """Makes function def from the given graph with the operations.""" return graph_to_function_def.graph_to_function_def( graph, operations, inputs, outputs) @@ -85,7 +69,7 @@ def capture_value(tensor_map, value, dtype, name): if captured_value is None: captured_value = graph_placeholder( dtype=dtype or value.dtype, shape=value.shape, name=name) - if captured_value.dtype == dtypes.resource: + if captured_value.dtype == dtypes_module.resource: captured_value._handle_data = value._handle_data # pylint: disable=protected-access tensor_map[ops.tensor_id(value)] = (value, captured_value) else: @@ -120,11 +104,19 @@ def _convert_to_graph_tensor(value, dtype=None, name=None, as_ref=False): class CapturingGraph(ops.Graph): + """Graph used when constructing eager functions.""" def __init__(self, captures): super(CapturingGraph, self).__init__() self._building_function = True self.captures = captures + # Map from resource tensor name to last op (in program order) which uses + # this tensor. Used to enforce that execution order matches program order + # for resource tensors. + self._last_op_using_resource_tensor = {} + + def clear_resource_control_flow_state(self): + self._last_op_using_resource_tensor = {} def create_op( self, @@ -137,12 +129,31 @@ class CapturingGraph(ops.Graph): op_def=None, compute_shapes=True, compute_device=True): + # TODO(apassos) probably control flow has to be handled delicately here as + # in if a resource is accessed inside a control flow context we need the + # control dependency to point to something outside the context which is + # guaranteed to happen after the access. + # + # TODO(apassos) this should do some form of alias analysis as ops which + # forward the resources such as Identity and Switch can cause serialization + # to fail. + resource_inputs = set() + control_inputs = set() for i, inp in enumerate(inputs): if inp.graph is not self: inputs[i] = capture_value(self.captures, inp, inp.dtype, inp.op.name) - return super(CapturingGraph, self).create_op( - op_type, inputs, dtypes, input_types, name, attrs, op_def, - compute_shapes, compute_device) + inp = inputs[i] + if inp.dtype == dtypes_module.resource: + if inp.name in self._last_op_using_resource_tensor: + control_inputs.add(self._last_op_using_resource_tensor[inp.name]) + resource_inputs.add(inp.name) + with self.control_dependencies(list(control_inputs)): + op = super(CapturingGraph, self).create_op( + op_type, inputs, dtypes, input_types, name, attrs, op_def, + compute_shapes, compute_device) + for name in resource_inputs: + self._last_op_using_resource_tensor[name] = op + return op # TODO(apassos): it'd be really nice if we could scope this registration. @@ -314,7 +325,7 @@ class GraphModeFunction(object): return ops.internal_convert_to_tensor(x, ctx=ctx) op = g.create_op( signature.name, [make_tensor(x) for x in all_args], - [dtypes.DType(x.type) for x in signature.output_arg], + [dtypes_module.DType(x.type) for x in signature.output_arg], op_def=signature, name="FunctionCall", compute_shapes=False) @@ -373,7 +384,7 @@ class GraphModeFunction(object): args = list(tensor_inputs) + self._extra_inputs op = g.create_op( signature.name, [ops.convert_to_tensor(x) for x in args], - [dtypes.DType(x.type) for x in signature.output_arg], + [dtypes_module.DType(x.type) for x in signature.output_arg], op_def=signature, name="FunctionCall", compute_shapes=False) diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py index 837a75c808..faf0ac88bc 100644 --- a/tensorflow/python/eager/graph_callable.py +++ b/tensorflow/python/eager/graph_callable.py @@ -296,6 +296,7 @@ def _graph_callable_internal(func, shape_and_dtypes): # Call the function again, now replacing usages of variables with # placeholders. This assumes the variable capturing scope created above # knows about all variables. + tmp_graph.clear_resource_control_flow_state() with variable_captures.capturing_scope(), function.capture_tensors( captures): captured_outputs = func(*func_inputs) -- GitLab From 570d2772796bd642dc4808bd869e293f74553620 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Nov 2017 12:09:49 -0800 Subject: [PATCH 0885/1801] Bump LLVM snapshot to r319150. PiperOrigin-RevId: 177197719 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index dd5dc37a87..cb77f96be5 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -578,11 +578,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): temp_workaround_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/8d26b8bee4d8e7230870a600bc968c7ee8cf6f67.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/8d26b8bee4d8e7230870a600bc968c7ee8cf6f67.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/9ab4c272cb604a7f947865428c4ef2169fee2100.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/9ab4c272cb604a7f947865428c4ef2169fee2100.tar.gz", ], - sha256 = "ff5ddbe5af5e264426c8d489e7fddfc5ad7e0975f19cefe9db8c0a5d0faeb23e", - strip_prefix = "llvm-8d26b8bee4d8e7230870a600bc968c7ee8cf6f67", + sha256 = "1b1b7d3800a94ca2302e3dd670dbe84238749583027883784b55297059d83da8", + strip_prefix = "llvm-9ab4c272cb604a7f947865428c4ef2169fee2100", build_file = str(Label("//third_party/llvm:llvm.BUILD")), repository = tf_repo_name, ) -- GitLab From ba87a8030aa30f24c354cf705e79734658bb0a8b Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Tue, 28 Nov 2017 12:17:52 -0800 Subject: [PATCH 0886/1801] Eager: Better errors for invalid options to enable_eager_execution. Fixes #14739 PiperOrigin-RevId: 177198861 --- tensorflow/python/framework/ops.py | 11 +++++++++++ tensorflow/python/framework/ops_test.py | 7 +++++++ 2 files changed, 18 insertions(+) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 60df8f82f0..cfef5e35f4 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -35,6 +35,7 @@ from tensorflow.core.framework import graph_pb2 from tensorflow.core.framework import node_def_pb2 from tensorflow.core.framework import op_def_pb2 from tensorflow.core.framework import versions_pb2 +from tensorflow.core.protobuf import config_pb2 from tensorflow.python import pywrap_tensorflow as c_api from tensorflow.python.eager import context from tensorflow.python.eager import core @@ -4794,6 +4795,16 @@ def enable_eager_execution(config=None, device_policy=None): or if trying to create a context with nontrivial options which differ from those of the existing context. """ + if config is not None and not isinstance(config, config_pb2.ConfigProto): + raise TypeError( + "config must be a tf.ConfigProto, but got %s" % type(config)) + if device_policy not in (None, context.DEVICE_PLACEMENT_EXPLICIT, + context.DEVICE_PLACEMENT_WARN, + context.DEVICE_PLACEMENT_SILENT): + raise ValueError( + "device_policy must be one of None, tfe.DEVICE_PLACEMENT_EXPLICIT, " + "tfe.DEVICE_PLACEMENT_WARN, tfe.DEVICE_PLACEMENT_SILENT" + ) # pylint: disable=protected-access if context._default_mode == context.GRAPH_MODE: graph_mode_has_been_used = ( diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index cd296ccdc5..e929cc8abf 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -2395,6 +2395,13 @@ class InputTypesTest(test_util.TensorFlowTestCase): self.assertEqual([dtypes.double, dtypes.double], z.op._input_dtypes) # pylint: enable=protected-access + def testBadArgumentsToEnableEagerExecution(self): + with self.assertRaisesRegexp(TypeError, "config must be a tf.ConfigProto"): + ops.enable_eager_execution(context.DEVICE_PLACEMENT_SILENT) + with self.assertRaisesRegexp(ValueError, "device_policy must be one of"): + c = config_pb2.ConfigProto() + ops.enable_eager_execution(c, c) + if __name__ == "__main__": googletest.main() -- GitLab From b911049edfbb4a4eb07b3b46ed144da6cd33f9c1 Mon Sep 17 00:00:00 2001 From: Yilei Yang Date: Tue, 28 Nov 2017 12:26:12 -0800 Subject: [PATCH 0887/1801] Continue to allow old argument names specified in tf.flags.DEFINE functions. There are more DEFINE functions in absl.flags, they only accept the absl names. PiperOrigin-RevId: 177199982 --- tensorflow/python/platform/flags.py | 48 ++++++++++++++++++++++++ tensorflow/python/platform/flags_test.py | 41 +++++++++++++++++++- 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/platform/flags.py b/tensorflow/python/platform/flags.py index e9a36ae75d..abd6f3d855 100644 --- a/tensorflow/python/platform/flags.py +++ b/tensorflow/python/platform/flags.py @@ -18,5 +18,53 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import logging as _logging + # go/tf-wildcard-import from absl.flags import * # pylint: disable=wildcard-import +import six as _six + +from tensorflow.python.util import tf_decorator + + +# Since we wrap absl.flags DEFINE functions, we need to declare this module +# does not affect key flags. +disclaim_key_flags() # pylint: disable=undefined-variable + + +_RENAMED_ARGUMENTS = { + 'flag_name': 'name', + 'default_value': 'default', + 'docstring': 'help', +} + + +def _wrap_define_function(original_function): + """Wraps absl.flags's define functions so tf.flags accepts old names.""" + + def wrapper(*args, **kwargs): + """Wrapper function that turns old keyword names to new ones.""" + has_old_names = False + for old_name, new_name in _six.iteritems(_RENAMED_ARGUMENTS): + if old_name in kwargs: + has_old_names = True + value = kwargs.pop(old_name) + kwargs[new_name] = value + if has_old_names: + _logging.warning( + 'Use of the keyword argument names (flag_name, default_value, ' + 'docstring) is deprecated, please use (name, default, help) instead.') + return original_function(*args, **kwargs) + + return tf_decorator.make_decorator(original_function, wrapper) + + +# pylint: disable=invalid-name,used-before-assignment +# absl.flags APIs use `default` as the name of the default value argument. +# Allow the following functions continue to accept `default_value`. +DEFINE_string = _wrap_define_function(DEFINE_string) +DEFINE_boolean = _wrap_define_function(DEFINE_boolean) +DEFINE_bool = DEFINE_boolean +DEFINE_float = _wrap_define_function(DEFINE_float) +DEFINE_integer = _wrap_define_function(DEFINE_integer) +# pylint: enable=invalid-name,used-before-assignment diff --git a/tensorflow/python/platform/flags_test.py b/tensorflow/python/platform/flags_test.py index 23060e17d2..e8200142dd 100644 --- a/tensorflow/python/platform/flags_test.py +++ b/tensorflow/python/platform/flags_test.py @@ -24,11 +24,50 @@ from absl import flags as absl_flags from tensorflow.python.platform import flags +flags.DEFINE_string( + flag_name='old_string', default_value='default', docstring='docstring') +flags.DEFINE_string( + name='new_string', default='default', help='docstring') +flags.DEFINE_integer( + flag_name='old_integer', default_value=1, docstring='docstring') +flags.DEFINE_integer( + name='new_integer', default=1, help='docstring') +flags.DEFINE_float( + flag_name='old_float', default_value=1.5, docstring='docstring') +flags.DEFINE_float( + name='new_float', default=1.5, help='docstring') +flags.DEFINE_bool( + flag_name='old_bool', default_value=True, docstring='docstring') +flags.DEFINE_bool( + name='new_bool', default=True, help='docstring') +flags.DEFINE_boolean( + flag_name='old_boolean', default_value=False, docstring='docstring') +flags.DEFINE_boolean( + name='new_boolean', default=False, help='docstring') + + class FlagsTest(unittest.TestCase): def test_global_flags_object(self): self.assertIs(flags.FLAGS, absl_flags.FLAGS) + def test_keyword_arguments(self): + test_cases = ( + ('old_string', 'default'), + ('new_string', 'default'), + ('old_integer', 1), + ('new_integer', 1), + ('old_float', 1.5), + ('new_float', 1.5), + ('old_bool', True), + ('new_bool', True), + ('old_boolean', False), + ('new_boolean', False), + ) + for flag_name, default_value in test_cases: + self.assertEqual(default_value, absl_flags.FLAGS[flag_name].default) + self.assertEqual('docstring', absl_flags.FLAGS[flag_name].help) + -if __name__ == "__main__": +if __name__ == '__main__': unittest.main() -- GitLab From a86f2d2c1af7ac0e5c36eedb18d74c022737fc25 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 28 Nov 2017 12:49:38 -0800 Subject: [PATCH 0888/1801] Added an option to assume that the shape of fed nodes in unknown since any shape can be actually used. PiperOrigin-RevId: 177203023 --- tensorflow/core/grappler/costs/BUILD | 1 + .../core/grappler/costs/graph_properties.cc | 74 ++++++++++++++--- .../core/grappler/costs/graph_properties.h | 37 ++++++--- .../grappler/costs/graph_properties_test.cc | 82 +++++++++++++++---- .../core/grappler/costs/virtual_scheduler.cc | 2 +- .../optimizers/arithmetic_optimizer.cc | 2 +- .../grappler/optimizers/constant_folding.cc | 2 +- .../grappler/optimizers/layout_optimizer.cc | 2 +- .../grappler/optimizers/memory_optimizer.cc | 2 +- .../grappler/optimizers/static_schedule.cc | 4 +- tensorflow/python/grappler/item.i | 2 +- tensorflow/python/grappler/model_analyzer.cc | 2 +- 12 files changed, 164 insertions(+), 48 deletions(-) diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD index f02cb51038..f1edbbb602 100644 --- a/tensorflow/core/grappler/costs/BUILD +++ b/tensorflow/core/grappler/costs/BUILD @@ -50,6 +50,7 @@ cc_library( "//tensorflow/core:framework", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/clusters:cluster", ], ) diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index dd389de636..fb7e20fca0 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/grappler/costs/utils.h" +#include "tensorflow/core/grappler/utils.h" namespace tensorflow { namespace grappler { @@ -316,7 +317,11 @@ class SymbolicShapeRefiner { shape_inference::ShapeHandle shape) { return shape_refiner_->SetShape(node, output_port, shape); } - + Status SetUnknownShape(const Node* node, int output_port) { + shape_inference::ShapeHandle shape = + GetUnknownOutputShape(node, output_port); + return shape_refiner_->SetShape(node, output_port, shape); + } struct ShapeId { const Node* node; int port_id; @@ -646,6 +651,23 @@ Status GraphProperties::UpdateMergeNode(SymbolicShapeRefiner* shape_refiner, return Status::OK(); } +Status GraphProperties::OverwriteFedPorts( + SymbolicShapeRefiner* shape_refiner, + const std::unordered_map>& fed_ports, + const Node* node, TopoQueue* new_shapes) const { + auto it = fed_ports.find(node->name()); + Status status; + if (it != fed_ports.end()) { + // It is possible to feed node output ports with tensors of any shape: as a + // result, the shape of a fed port is completely unknown. + for (const int output_port : it->second) { + status.Update(shape_refiner->SetUnknownShape(node, output_port)); + } + new_shapes->push(node); + } + return status; +} + // Manually propagate the input shape for Enter nodes and update any Merge node // outputs. Status GraphProperties::UpdateEnter(SymbolicShapeRefiner* shape_refiner, @@ -673,9 +695,10 @@ Status GraphProperties::UpdateEnter(SymbolicShapeRefiner* shape_refiner, return Status::OK(); } -Status GraphProperties::UpdateShapes(SymbolicShapeRefiner* shape_refiner, - bool relax, const Node* n, - TopoQueue* new_shapes) { +Status GraphProperties::UpdateShapes( + SymbolicShapeRefiner* shape_refiner, bool relax, + const std::unordered_map>& fed_ports, + const Node* n, TopoQueue* new_shapes) const { if (n->IsEnter()) { // The Enter shape function always forwards an UnknownShape, so do the right // thing here. @@ -695,7 +718,9 @@ Status GraphProperties::UpdateShapes(SymbolicShapeRefiner* shape_refiner, } } } - return Status::OK(); + // Nodes can be fed with any shape. The TensorFlow shape inference code can't + // handle this properly, so overwrite its behavior here. + return OverwriteFedPorts(shape_refiner, fed_ports, n, new_shapes); } // Propagates the shapes in the transitive fan-out of . @@ -703,6 +728,7 @@ Status GraphProperties::PropagateShapes( SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes, const std::unordered_map>& resources, + const std::unordered_map>& fed_ports, int num_loops) const { // Limit the number of iterations to prevent infinite loops in the presence of // incorrect shape functions. The algoritm should converge in at most @@ -728,8 +754,8 @@ Status GraphProperties::PropagateShapes( for (const Edge* e : n->out_edges()) { if (!e->IsControlEdge()) { const Node* fanout = e->dst(); - TF_RETURN_IF_ERROR( - UpdateShapes(shape_refiner, relax, fanout, new_shapes)); + TF_RETURN_IF_ERROR(UpdateShapes(shape_refiner, relax, fed_ports, + fanout, new_shapes)); } } } @@ -803,7 +829,7 @@ Status GraphProperties::UpdateResource( return Status::OK(); } -Status GraphProperties::InferStatically() { +Status GraphProperties::InferStatically(bool assume_valid_feeds) { Graph graph(OpRegistry::Global()); FunctionLibraryDefinition function_library(graph.op_registry(), item_.graph.library()); @@ -820,11 +846,21 @@ Status GraphProperties::InferStatically() { Status s = ImportGraphDef(options, item_.graph, &graph, &shape_refiner); TF_RETURN_IF_ERROR(s); + std::unordered_map> fed_ports; + if (!assume_valid_feeds) { + for (const auto& feed : item_.feed) { + int port_index = 0; + string node_name = ParseNodeName(feed.first, &port_index); + fed_ports[node_name].insert(port_index); + } + } + // List the resources and the nodes using them. Also collect the Enter and // Merge nodes. std::unordered_map> resources; std::unordered_set enter_nodes; std::unordered_set merge_nodes; + std::unordered_set fed_nodes; int num_loops = 0; for (const Node* const node : graph.nodes()) { for (int i = 0; i < node->num_inputs(); ++i) { @@ -841,6 +877,9 @@ Status GraphProperties::InferStatically() { } else if (node->IsNextIteration()) { ++num_loops; } + if (fed_ports.find(node->name()) != fed_ports.end()) { + fed_nodes.insert(node); + } } SymbolicShapeRefiner refiner(&shape_refiner); @@ -855,15 +894,22 @@ Status GraphProperties::InferStatically() { // Force the propagation of shapes of Enter nodes manually (the Enter shape // function always forwards an UnknownShape). for (const Node* node : enter_nodes) { - TF_RETURN_IF_ERROR(UpdateShapes(&refiner, relax, node, &new_shapes)); + TF_RETURN_IF_ERROR( + UpdateShapes(&refiner, relax, fed_ports, node, &new_shapes)); } // Seed the propagation of shapes through merge nodes. for (const Node* node : merge_nodes) { - TF_RETURN_IF_ERROR(UpdateShapes(&refiner, relax, node, &new_shapes)); + TF_RETURN_IF_ERROR( + UpdateShapes(&refiner, relax, fed_ports, node, &new_shapes)); + } + // Also seed the propagation of shapes in the fanout of fed nodes. + for (const Node* node : fed_nodes) { + TF_RETURN_IF_ERROR( + OverwriteFedPorts(&refiner, fed_ports, node, &new_shapes)); } // Propagate shapes normally. - TF_RETURN_IF_ERROR( - PropagateShapes(&refiner, relax, &new_shapes, resources, num_loops)); + TF_RETURN_IF_ERROR(PropagateShapes(&refiner, relax, &new_shapes, resources, + fed_ports, num_loops)); } // Track shapes globally across the graph. @@ -874,6 +920,10 @@ Status GraphProperties::InferStatically() { if (!node_ctx) { continue; } + // Skip any information that comes from fed nodes. + if (fed_ports.find(node->name()) != fed_ports.end()) { + continue; + } for (const auto& merged_shapes : node_ctx->MergedShapes()) { if (!shape_manager.Merge(merged_shapes.first, merged_shapes.second) .ok()) { diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h index 95bc5044d0..6fc53a7f2e 100644 --- a/tensorflow/core/grappler/costs/graph_properties.h +++ b/tensorflow/core/grappler/costs/graph_properties.h @@ -34,12 +34,19 @@ class TopoQueue; // nodes, and potentially a set of nodes to feed. class GraphProperties { public: - // Factory method for creating a GrapplerShapes from a MetaGraphDef. - // Returns nullptr if the given meta_graph cannot be converted. explicit GraphProperties(const GrapplerItem& item) : item_(item) {} - Status InferStatically(); + // Infer the shapes through abstract interpretation. Feed information can be + // incorrect so it should be discarded to ensure correctness of the analysis. + // However, it can help infer shapes in the fanout of fed nodes (even though + // the correctness of these shapes can't be guaranteed), so in some cases + // (such as simulation or scheduling) it makes sense of keep these shapes. + Status InferStatically(bool assume_valid_feeds); + // Infer the shape by running the graph on the specified cluster and recording + // the shapes of the processed tensors. Status InferDynamically(Cluster* cluster); + // Extract the properties from a cost graph. For testing only since there is + // no way to ensure that the cost graph match the item. Status InferFromCostGraph(const CostGraphDef& cost_graph); // Stores `item_.graph` with the inferred output shapes to `output_graph_def`. @@ -65,12 +72,6 @@ class GraphProperties { OpInfo::TensorProperties*); private: - // Inputs - GrapplerItem item_; - std::map> input_properties_; - std::map> output_properties_; - const std::vector missing_properties_; - // Merges shapes , determined from an EnqueueV2 node, into // <*queue_shapes_and_types>. static Status MergeEnqueueShapesAndTypes( @@ -99,17 +100,31 @@ class GraphProperties { static Status UpdateEnter(SymbolicShapeRefiner* shape_refiner, const Node* node, bool relax, TopoQueue* new_shapes); + // Process a node that is used to feed the model. + Status OverwriteFedPorts( + SymbolicShapeRefiner* shape_refiner, + const std::unordered_map>& fed_ports, + const Node* node, TopoQueue* new_shapes) const; // Update the shapes for node 'n'. If output shapes for n have changed, // enqueue its fanout in 'new_shapes'. - static Status UpdateShapes(SymbolicShapeRefiner* shape_refiner, bool relax, - const Node* n, TopoQueue* new_shapes); + Status UpdateShapes( + SymbolicShapeRefiner* shape_refiner, bool relax, + const std::unordered_map>& fed_ports, + const Node* n, TopoQueue* new_shapes) const; // Propagate the shapes for the nodes enqueued in new_shapes and their // transitive fanout until a fixed point is reached. Status PropagateShapes( SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes, const std::unordered_map>& resources, + const std::unordered_map>& fed_ports, int num_loops) const; + + // Data members + GrapplerItem item_; + std::map> input_properties_; + std::map> output_properties_; + const std::vector missing_properties_; }; } // end namespace grappler diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc index c11af5777a..ad8e768f1f 100644 --- a/tensorflow/core/grappler/costs/graph_properties_test.cc +++ b/tensorflow/core/grappler/costs/graph_properties_test.cc @@ -73,7 +73,7 @@ TEST_F(GraphPropertiesTest, StaticProperties) { CHECK(fake_input.NextItem(&item)); GraphProperties properties(item); - Status s = properties.InferStatically(); + Status s = properties.InferStatically(true); TF_CHECK_OK(s); for (const auto& node : item.graph.node()) { @@ -179,7 +179,7 @@ TEST_F(GraphPropertiesTest, Variables) { { GraphProperties static_properties(item); - TF_CHECK_OK(static_properties.InferStatically()); + TF_CHECK_OK(static_properties.InferStatically(false)); const auto props = static_properties.GetOutputProperties("Var"); EXPECT_EQ(1, props.size()); @@ -219,7 +219,7 @@ TEST_F(GraphPropertiesTest, VarHandles) { .Finalize(item.graph.add_node())); GraphProperties properties(item); - TF_CHECK_OK(properties.InferStatically()); + TF_CHECK_OK(properties.InferStatically(false)); const auto props = properties.GetOutputProperties("VarRead"); EXPECT_EQ(1, props.size()); @@ -286,7 +286,7 @@ TEST_F(GraphPropertiesTest, Queues) { TF_CHECK_OK(root.ToGraphDef(&item.graph)); GraphProperties properties(item); - TF_CHECK_OK(properties.InferStatically()); + TF_CHECK_OK(properties.InferStatically(false)); const auto props1 = properties.GetOutputProperties("Dequeue1"); ASSERT_EQ(1, props1.size()); @@ -335,7 +335,7 @@ TEST_F(GraphPropertiesTest, MergeWithoutLoops) { "merge_without_loops.pbtxt"); TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph)); GraphProperties properties(item); - TF_CHECK_OK(properties.InferStatically()); + TF_CHECK_OK(properties.InferStatically(false)); std::vector nodes{"cond/Merge", "cond/concat", "cond/concat_1"}; std::vector expected_outputs{"float: [-1,-1,1]", "float: [2,1,1]", @@ -377,7 +377,7 @@ TEST_F(GraphPropertiesTest, WhileLoop) { "while_loop.pbtxt"); TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph)); GraphProperties properties(item); - TF_CHECK_OK(properties.InferStatically()); + TF_CHECK_OK(properties.InferStatically(false)); std::vector nodes{"while/Merge_1", "while/NextIteration_1", "while/Exit_1"}; @@ -435,7 +435,7 @@ TEST_F(GraphPropertiesTest, NestedLoop) { "nested_loop.pbtxt"); TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph)); GraphProperties properties(item); - TF_CHECK_OK(properties.InferStatically()); + TF_CHECK_OK(properties.InferStatically(false)); std::vector outer_nodes{"while/Merge_1", "while/NextIteration_1", "while/Exit_1"}; @@ -498,7 +498,7 @@ TEST_F(GraphPropertiesTest, LoopsAndQueues) { "loops_and_queues.pbtxt"); TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph)); GraphProperties properties(item); - TF_CHECK_OK(properties.InferStatically()); + TF_CHECK_OK(properties.InferStatically(false)); std::vector outer_nodes{"while/Merge_1", "while/NextIteration_1", "while/Exit_1"}; @@ -556,7 +556,7 @@ TEST_F(GraphPropertiesTest, LoopsAndResourceVars) { "loops_and_resource_vars.pbtxt"); TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph)); GraphProperties properties(item); - TF_CHECK_OK(properties.InferStatically()); + TF_CHECK_OK(properties.InferStatically(false)); std::vector outer_nodes{"while/Merge_1", "while/NextIteration_1", "while/Exit_1"}; @@ -608,7 +608,7 @@ TEST_F(GraphPropertiesTest, QueuesAndLoops) { "queues_and_loops.pbtxt"); TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph)); GraphProperties properties(item); - TF_CHECK_OK(properties.InferStatically()); + TF_CHECK_OK(properties.InferStatically(false)); std::vector nodes{"while/Merge_1", "while/NextIteration_1", "while/Exit_1"}; @@ -657,7 +657,7 @@ TEST_F(GraphPropertiesTest, InferRestoreOpShape) { item.fetch.push_back("init_restore"); GraphProperties properties(item); - TF_CHECK_OK(properties.InferStatically()); + TF_CHECK_OK(properties.InferStatically(false)); const auto restore_props = properties.GetOutputProperties("restore"); const OpInfo::TensorProperties& restore_prop = restore_props[0]; @@ -704,7 +704,7 @@ TEST_F(GraphPropertiesTest, InferRestoreOpShape_WithTwoNodesShareSameOutput) { item.fetch.push_back("init2"); GraphProperties properties(item); - TF_CHECK_OK(properties.InferStatically()); + TF_CHECK_OK(properties.InferStatically(false)); const auto props = properties.GetOutputProperties("restore"); const OpInfo::TensorProperties& prop = props[0]; @@ -732,7 +732,7 @@ TEST_F(GraphPropertiesTest, FunctionStaticShapeInference) { "simple_function.pbtxt"); TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph)); GraphProperties properties(item); - TF_CHECK_OK(properties.InferStatically()); + TF_CHECK_OK(properties.InferStatically(false)); const auto props = properties.GetOutputProperties("MyAdd_55e046a8_1"); const OpInfo::TensorProperties& prop = props[0]; EXPECT_EQ(DT_FLOAT, prop.dtype()); @@ -766,7 +766,7 @@ TEST_F(GraphPropertiesTest, SymbolicShapes) { TF_CHECK_OK(s.ToGraphDef(&item.graph)); GraphProperties properties(item); - TF_CHECK_OK(properties.InferStatically()); + TF_CHECK_OK(properties.InferStatically(false)); const auto shape_a = properties.GetOutputProperties("a").at(0).shape(); const auto shape_c = properties.GetOutputProperties("c").at(0).shape(); EXPECT_EQ(2, shape_a.dim_size()); @@ -822,7 +822,7 @@ TEST_F(GraphPropertiesTest, DoNotValidateColocationConstraints) { GraphProperties properties(item); // This function should return OK, since it doesn't validate the colocation // constraints internally. - TF_EXPECT_OK(properties.InferStatically()); + TF_EXPECT_OK(properties.InferStatically(false)); } TEST_F(GraphPropertiesTest, ShapeTracking) { @@ -842,7 +842,7 @@ TEST_F(GraphPropertiesTest, ShapeTracking) { TF_CHECK_OK(s.ToGraphDef(&item.graph)); GraphProperties properties(item); - TF_CHECK_OK(properties.InferStatically()); + TF_CHECK_OK(properties.InferStatically(false)); const auto shape_a = properties.GetOutputProperties("a").at(0).shape(); const auto shape_b = properties.GetOutputProperties("b").at(0).shape(); const auto shape_o1 = properties.GetOutputProperties("o1").at(0).shape(); @@ -851,6 +851,56 @@ TEST_F(GraphPropertiesTest, ShapeTracking) { EXPECT_EQ(shape_b.DebugString(), shape_o2.DebugString()); } +TEST_F(GraphPropertiesTest, FedNodes) { + TrivialTestGraphInputYielder fake_input(4, 1, 10, false, + cluster_->GetDeviceNames()); + GrapplerItem item; + CHECK(fake_input.NextItem(&item)); + item.feed.emplace_back("AddN", Tensor()); + + { + // Conservative shape analysis: the shape of fed ports should be unknown + GraphProperties properties(item); + Status s = properties.InferStatically(false); + TF_CHECK_OK(s); + for (const auto& node : item.graph.node()) { + if (node.name() == "AddN") { + const auto in_props = properties.GetInputProperties(node.name()); + EXPECT_EQ(1, in_props.size()); + const OpInfo::TensorProperties& in_prop = in_props[0]; + EXPECT_EQ(DT_FLOAT, in_prop.dtype()); + EXPECT_FALSE(in_prop.shape().unknown_rank()); + EXPECT_EQ(2, in_prop.shape().dim_size()); + const auto out_props = properties.GetOutputProperties(node.name()); + EXPECT_EQ(1, out_props.size()); + EXPECT_EQ(DT_FLOAT, in_prop.dtype()); + EXPECT_TRUE(in_prop.shape().unknown_rank()); + } + } + } + { + // Optimistic shape analysis: the shape of fed ports should be derived from + // the shape of the fanin. + GraphProperties properties(item); + Status s = properties.InferStatically(true); + TF_CHECK_OK(s); + for (const auto& node : item.graph.node()) { + if (node.name() == "AddN") { + const auto in_props = properties.GetInputProperties(node.name()); + EXPECT_EQ(1, in_props.size()); + const OpInfo::TensorProperties& in_prop = in_props[0]; + EXPECT_EQ(DT_FLOAT, in_prop.dtype()); + EXPECT_FALSE(in_prop.shape().unknown_rank()); + EXPECT_EQ(2, in_prop.shape().dim_size()); + const auto out_props = properties.GetOutputProperties(node.name()); + EXPECT_EQ(1, out_props.size()); + const OpInfo::TensorProperties& out_prop = out_props[0]; + EXPECT_EQ(in_prop.DebugString(), out_prop.DebugString()); + } + } + } +} + } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc index e5e1ee3292..6640de668d 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc @@ -122,7 +122,7 @@ Status VirtualScheduler::Init() { // Construct graph properties. Status status; if (use_static_shapes_) { - status = graph_properties_.InferStatically(); + status = graph_properties_.InferStatically(true); } else { status = graph_properties_.InferDynamically(cluster_); } diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 1e39c610a4..930d122234 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -1067,7 +1067,7 @@ Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/, if (opt_level_ == RewriterConfig::AGGRESSIVE) { graph_properties_.reset(new GraphProperties(item)); // Shapes are only needed in aggressive mode. - TF_RETURN_IF_ERROR(graph_properties_->InferStatically()); + TF_RETURN_IF_ERROR(graph_properties_->InferStatically(false)); TF_RETURN_IF_ERROR( graph_properties_->AnnotateOutputShapes(optimized_graph_)); } diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index c77b2badf4..33a9dddba7 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1163,7 +1163,7 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, Status s = errors::Unknown( "The graph properties are needed but were not initialized"); if (needs_shapes) { - s = properties.InferStatically(); + s = properties.InferStatically(false); } if (!has_feed && s.ok()) { diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc index d5563e9d4c..1b8046b787 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc @@ -1620,7 +1620,7 @@ Status LayoutOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, virtual_placer_.reset(new VirtualPlacer(cluster)); nodes_to_preserve_ = item.NodesToPreserve(); GraphProperties graph_properties(item); - auto status = graph_properties.InferStatically(); + auto status = graph_properties.InferStatically(false); if (!status.ok()) { *output = item.graph; return status; diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc index 7c44ce15c6..a2a2680c4f 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc @@ -716,7 +716,7 @@ Status MemoryOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, { // Estimate the size of the data to swap for each node. GraphProperties properties(item); - TF_RETURN_IF_ERROR(properties.InferStatically()); + TF_RETURN_IF_ERROR(properties.InferStatically(true)); for (auto& swap : nodes_to_swap) { const NodeDef* node = swap.first; std::vector props = diff --git a/tensorflow/core/grappler/optimizers/static_schedule.cc b/tensorflow/core/grappler/optimizers/static_schedule.cc index 6ce6deef2c..450e853407 100644 --- a/tensorflow/core/grappler/optimizers/static_schedule.cc +++ b/tensorflow/core/grappler/optimizers/static_schedule.cc @@ -86,7 +86,7 @@ Status EstimateEarliestExecutionTimes( name_map.clear(); GraphProperties properties(item); - TF_RETURN_IF_ERROR(properties.InferStatically()); + TF_RETURN_IF_ERROR(properties.InferStatically(true)); OpLevelCostEstimator estimator; VirtualPlacer placer(cluster); @@ -154,7 +154,7 @@ Status EstimateRequiredTimes( } } GraphProperties properties(item); - TF_RETURN_IF_ERROR(properties.InferStatically()); + TF_RETURN_IF_ERROR(properties.InferStatically(true)); OpLevelCostEstimator estimator; VirtualPlacer placer(cluster); diff --git a/tensorflow/python/grappler/item.i b/tensorflow/python/grappler/item.i index 7dd79f7c82..8f72a425c3 100644 --- a/tensorflow/python/grappler/item.i +++ b/tensorflow/python/grappler/item.i @@ -120,7 +120,7 @@ static PyObject* TF_GetOpProperties(GItem item) { Py_RETURN_NONE; } tensorflow::grappler::GraphProperties properties(*item); - tensorflow::Status status = properties.InferStatically(); + tensorflow::Status status = properties.InferStatically(false); if (!status.ok()) { Py_RETURN_NONE; } diff --git a/tensorflow/python/grappler/model_analyzer.cc b/tensorflow/python/grappler/model_analyzer.cc index 7d365c3be9..da5b03234e 100644 --- a/tensorflow/python/grappler/model_analyzer.cc +++ b/tensorflow/python/grappler/model_analyzer.cc @@ -27,7 +27,7 @@ ModelAnalyzer::ModelAnalyzer(const GrapplerItem& item) : item_(item) {} Status ModelAnalyzer::GenerateReport(std::ostream& os) { GraphProperties properties(item_); - TF_RETURN_IF_ERROR(properties.InferStatically()); + TF_RETURN_IF_ERROR(properties.InferStatically(false)); for (const auto& node : item_.MainOpsFanin()) { PrintNodeInfo(node, properties, os); -- GitLab From d96e936fffb8ccd5761c4bf59a8f8ce185f4d50c Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Tue, 28 Nov 2017 12:50:21 -0800 Subject: [PATCH 0889/1801] Add custom_estimators PiperOrigin-RevId: 177203120 --- .../docs_src/get_started/custom_estimators.md | 576 ++++++++++++++++++ 1 file changed, 576 insertions(+) create mode 100644 tensorflow/docs_src/get_started/custom_estimators.md diff --git a/tensorflow/docs_src/get_started/custom_estimators.md b/tensorflow/docs_src/get_started/custom_estimators.md new file mode 100644 index 0000000000..e347aa6bd0 --- /dev/null +++ b/tensorflow/docs_src/get_started/custom_estimators.md @@ -0,0 +1,576 @@ + +# Creating Custom Estimators +This document introduces custom Estimators. In particular, this document +demonstrates how to create a custom @{tf.estimator.Estimator$Estimator} that +mimics the behavior of the pre-made Estimator +@{tf.estimator.DNNClassifier$`DNNClassifier`} in solving the Iris problem. See +the @{$get_started/estimator$Pre-Made Estimators chapter} for details. + +If you are feeling impatient, feel free to compare and contrast the following +full programs: + +* Iris implemented with the [pre-made DNNClassifier Estimator](https://github.com/tensorflow/models/blob/master/samples/core/get_started/premade_estimator.py). +* Iris implemented with a [custom Estimator](https://github.com/tensorflow/models/blob/master/samples/core/get_started/custom_estimator.py). + +## Pre-made vs. custom + +As the following figure shows, pre-made Estimators are subclasses of the +@{tf.estimator.Estimator} base class, while custom Estimators are an instance +of tf.estimator.Estimator: + +
    +Premade estimators are sub-classes of `Estimator`. Custom Estimators are usually (direct) instances of `Estimator` +
    +
    +Pre-made and custom Estimators are all Estimators. +
    + +Pre-made Estimators are fully baked. Sometimes though, you need more control +over an Estimator's behavior. That's where custom Estimators come in. You can +create a custom Estimator to do just about anything. If you want hidden layers +connected in some unusual fashion, write a custom Estimator. If you want to +calculate a unique +[metric](https://developers.google.com/machine-learning/glossary/#metric) +for your model, write a custom Estimator. Basically, if you want an Estimator +optimized for your specific problem, write a custom Estimator. + +A model function (or `model_fn`) implements the ML algorithm. The +only difference between working with pre-made Estimators and custom Estimators +is: + +* With pre-made Estimators, someone already wrote the model function for you. +* With custom Estimators, you must write the model function. + +Your model function could implement a wide range of algorithms, defining all +sorts of hidden layers and metrics. Like input functions, all model functions +must accept a standard group of input parameters and return a standard group of +output values. Just as input functions can leverage the Dataset API, model +functions can leverage the Layers API and the Metrics API. + +Let's see how to solve the Iris problem with a custom Estimator. A quick +reminder--here's the organization of the Iris model that we're trying to mimic: + +
    +A diagram of the network architecture: Inputs, 2 hidden layers, and outputs +
    +
    +Our implementation of Iris contains four features, two hidden layers, +and a logits output layer. +
    + +## Write an Input function + +In our custom Estimator implementation, we'll reuse the input function we used +in the pre-made Estimator implementation. Namely: + +```python +def train_input_fn(features, labels, batch_size): + """An input function for training""" + # Convert the inputs to a Dataset. + dataset = tf.data.Dataset.from_tensor_slices((features, labels)) + + # Shuffle, repeat, and batch the examples. + dataset = dataset.shuffle(1000).repeat().batch(batch_size) + + # Return the read end of the pipeline. + return dataset.make_one_shot_iterator().get_next() +``` + +This input function builds an input pipeline that yields batches of +`(features, labels)` pairs, where `features` is a dictionary features. + +## Create feature columns + + +As detailed in @{$get_started/estimator$Premade Estimators}, you must define +your model's feature columns to specify how the model should use each feature. +Whether working with pre-made Estimators or custom Estimators, you define +feature columns in the same fashion. + +The following code creates a simple `numeric_column` for each input feature, +indicating that the value of the input feature should be used directly as an +input to the model: + +```python +# Feature columns describe how to use the input. +my_feature_columns = [] +for key in train_x.keys(): + my_feature_columns.append(tf.feature_column.numeric_column(key=key)) +``` + +## Write a model function + +The model function we'll use has the following call signature: + +```python +def my_model_fn( + features, # This is batch_features from input_fn + labels, # This is batch_labels from input_fn + mode, # An instance of tf.estimator.ModeKeys + params): # Additional configuration +``` + +The first two arguments are the batches of features and labels returned from +the input function; that is, `features` and `labels` are the handles to the +data your model will use. The `mode` argument indicates whether the caller is +requesting training, predicting, or evaluation. + +The caller may pass `params` to an Estimator's constructor. The `params` passed +to the constructor become the `params` passed to `model_fn`. + +```python + # Build 2 hidden layer DNN with 10, 10 units respectively. + classifier = tf.estimator.Estimator( + model_fn=my_model, + params={ + 'feature_columns': my_feature_columns, + # Two hidden layers of 10 nodes each. + 'hidden_units': [10, 10], + # The model must choose between 3 classes. + 'n_classes': 3, + }) +``` + +To implement a typical model function, you must do the following: + +* (Define the model)[#define_the_model]. +* Specify additional calculations for each of + the [three different modes](#modes): + * [Predict](#predict) + * [Evaluate](#evaluate) + * [Train](#train) + +## Define the model + +The basic deep neural network model must define the following three sections: + +* An [input layer](https://developers.google.com/machine-learning/glossary/#input_layer) +* One or more [hidden layers](https://developers.google.com/machine-learning/glossary/#hidden_layer) +* An [output layer](https://developers.google.com/machine-learning/glossary/#output_layer) + +### Define the input layer + +Call @{tf.feature_column.input_layer} to convert your feature dictionary and +feature columns into input for your model. For example: + +```python + # Use `input_layer` to apply the feature columns. + net = tf.feature_column.input_layer(features, params['feature_columns']) +``` + +The preceding line applies the transformations defined by your feature columns, +creating the input layer of our model. + +
    +A diagram of the input layer, in this case a 1:1 mapping from raw-inputs to features. +
    + + +### Hidden Layers + +If you are creating a deep neural network, you must define one or more hidden +layers. The Layers API provides a rich set of functions to define all types of +hidden layers, including convolutional, pooling, and dropout layers. For Iris, +we're simply going to call @{tf.layers.dense} to create hidden layers, with +dimensions defined by `params['hidden_layers']`. In a `dense` layer each node +is connected to every node in the preceding layer. Here's the relevant code: + +``` python + # Build the hidden layers, sized according to the 'hidden_units' param. + for units in params['hidden_units']: + net = tf.layers.dense(net, units=units, activation=tf.nn.relu) +``` +* The `units` parameter defines the number of output neurons in a given layer. +* The `activation` parameter defines the [activation function](https://developers.google.com/machine-learning/glossary/#a) — + [Relu](https://developers.google.com/machine-learning/glossary/#ReLU) in this + case. + +The variable `net` here signifies the current top layer of the network. During +the first iteration, `net` signifies the input layer. On each loop iteration +`tf.layers.dense` creates a new layer, which takes the previous layer as its +input. So, the loop uses `net` to pass the previously created layer as input +to the layer being created. + +After creating two hidden layers, our network looks as follows. For +simplicity, the figure only shows four hidden units in each layer. + +
    +The input layer with two hidden layers added. +
    + +Note that @{tf.layers.dense} provides many additional capabilities, including +the ability to set a multitude of regularization parameters. For the sake of +simplicity, though, we're going to simply accept the default values of the +other parameters. + +### Output Layer + +We'll define the output layer by calling @{tf.layers.dense} yet again, this +time without an activation function: + +```python + # Compute logits (1 per class). + logits = tf.layers.dense(net, params['n_classes'], activation=None) +``` + +Here, `net` signifies the final hidden layer. Therefore, the full set of layers +is now connected as follows: + +
    +A logit output layer connected to the top hidden layer +
    +
    +The final hidden layer feeds into the output layer. +
    + +When defining an output layer, the `units` parameter specifies the number of +outputs. So, by setting `units` to `params['n_classes']`, the model produces +one output value per class. Each element of the output vector will contains the +score, or "logit", calculated to the associated class of Iris: Setosa, +Versicolor, or Virginica, respectively. + +Later on, these logits will be transformed into probabilities by the +@{tf.nn.softmax} function. + +## Implement training, evaluation, and prediction {modes} + +The final step in creating a model function is to write branching code that +implements prediction, evaluation, and training. + +The model function gets invoked whenever someone calls the Estimator's `train`, +`evaluate`, or `predict` methods. Recall that the signature for the model +function looks like this: + +``` python +def my_model_fn( + features, # This is batch_features from input_fn + labels, # This is batch_labels from input_fn + mode): # An instance of tf.estimator.ModeKeys, see below +``` + +Focus on that third argument, mode. As the following table shows, when someone +calls train, evaluate, or predict, the Estimator framework invokes your model +function with the mode parameter set as follows: + +| Estimator method | Estimator Mode | +|:---------------------------------|:------------------| +|@{tf.estimator.Estimator.train$`train()`} |@{tf.estimator.ModeKeys.TRAIN$`ModeKeys.TRAIN`} | +|@{tf.estimator.Estimator.evaluate$`evaluate()`} |@{tf.estimator.ModeKeys.EVAL$`ModeKeys.EVAL`} | +|@{tf.estimator.Estimator.predict$`predict()`}|@{tf.estimator.ModeKeys.PREDICT$`ModeKeys.PREDICT`} | + +For example, suppose you instantiate a custom Estimator to generate an object +named `classifier`. Then, you make the following call: + +``` python +classifier = tf.estimator.Estimator(...) +classifier.train(input_fn=lambda: my_input_fn(FILE_TRAIN, True, 500)) +``` +The Estimator framework then calls your model function with mode set to +`ModeKeys.TRAIN`. + +Your model function must provide code to handle all three of the mode values. +For each mode value, your code must return an instance of +`tf.estimator.EstimatorSpec`, which contains the information the caller +requires. Let's examine each mode. + +### Predict + +When the Estimator's `predict` method is called, the `model_fn` receives +`mode = ModeKeys.PREDICT`. In this case, the model function must return a +`tf.estimator.EstimatorSpec` containing the prediction. + +The model must have been trained prior to making a prediction. The trained model +is stored on disk in the `model_dir` directory established when you +instantiated the Estimator. + +The code to generate the prediction for this model looks as follows: + +```python +# Compute predictions. +predicted_classes = tf.argmax(logits, 1) +if mode == tf.estimator.ModeKeys.PREDICT: + predictions = { + 'class_ids': predicted_classes[:, tf.newaxis], + 'probabilities': tf.nn.softmax(logits), + 'logits': logits, + } + return tf.estimator.EstimatorSpec(mode, predictions=predictions) +``` +The prediction dictionary contains everything that your model returns when run +in prediction mode. + +
    +Additional outputs added to the output layer. +
    + +The `predictions` holds the following three key/value pairs: + +* `class_ids` holds the class id (0, 1, or 2) representing the model's + prediction of the most likely species for this example. +* `probabilities` holds the three probabilities (in this example, 0.02, 0.95, + and 0.03) +* `logit` holds the raw logit values (in this example, -1.3, 2.6, and -0.9) + +We return that dictionary to the caller via the `predictions` parameter of the +@{tf.estimator.EstimatorSpec}. The Estimator's +@{tf.estimator.Estimator.predict$`predict`} method will yield these +dictionaries. + +### Calculate the loss + +For both [training](#train) and [evaluation](#evaluate) we need to calculate the +model's loss. This is the +[objective](https://developers.google.com/machine-learning/glossary/#objective) +that will be optimized. + +Before we calculate loss, we we must first convert the labels from a list of +indexes `(0, 1, 2)` to a +[one-hot representation](https://developers.google.com/machine-learning/glossary/#one-hot_encoding) +by calling @{tf.one_hot}. Then, we can calculate the loss by calling +@{tf.losses.softmax_cross_entropy}. Here's the complete code: + + +```python + # Convert the labels to a one-hot tensor of shape (length of features, 3) + # and with a on-value of 1 for each one-hot vector of length 3. + onehot_labels = tf.one_hot(labels, 3, 1, 0) + + # Compute loss. + loss = tf.losses.softmax_cross_entropy( + onehot_labels=onehot_labels, logits=logits) +``` + +### Evaluate + +When the Estimator's `evaluate` method is called, the `model_fn` receives +`mode = ModeKeys.EVAL`. In this case, the model function must return a +`tf.estimator.EstimatorSpec` containing the model's loss and optionally one +or more metrics. + +Although returning metrics is optional, most custom Estimators do return at +least one metric. TensorFlow provides a Metrics module @{tf.metrics} to +calculate common metrics. For brevity's sake, we'll only return accuracy. The +@{tf.metrics.accuracy} function compares our predictions against the +true values, that is, against the labels provided by the input function. The +@{tf.metrics.accuracy} function requires the labels and predictions to have the +same shape. Here's the call to @{tf.metrics.accuracy}: + +``` python + # Compute evaluation metrics. + accuracy = tf.metrics.accuracy(labels=labels, + predictions=predicted_classes, + name='acc_op') +``` + +The @{tf.estimator.EstimatorSpec$`EstimatorSpec`} returned for evaluation +typically contains the following information: + +* `loss`, which is the model's loss +* `eval_metric_ops`, which is an optional dictionary of metrics. + +So, we'll create a dictionary containing our sole metric. If we had calculated +other metrics, we would have added them as additional key/value pairs to that +same dictionary. Then, we'll pass that dictionary in the `eval_metric_ops` +argument of `tf.estimator.EstimatorSpec`. Here's the code: + +```python + metrics = {'accuracy': accuracy} + tf.summary.scalar('accuracy', accuracy[1]) + + if mode == tf.estimator.ModeKeys.EVAL: + return tf.estimator.EstimatorSpec( + mode, loss=loss, eval_metric_ops=metrics) +``` + +The @{tf.summary.scalar} will make accuracy available to TensorBoard (more on +this later). + +### Train + +When the Estimator's `train` method is called, the `model_fn` is called +with `mode = ModeKeys.TRAIN`. In this case, the model function must return an +`EstimatorSpec` that contains the loss and a training operation. + +Building the training operation will require an optimizer. We will use +@{tf.train.AdagradOptimizer} because we're mimicking the `DNNClassifier`, which +also uses `Adagrad` by default. The `tf.train` package provides many other +optimizers—feel free to experiment with them. + +Here is the code that builds the optimizer: + +``` python + # Instantiate an optimizer. + optimizer = tf.train.AdagradOptimizer(learning_rate=0.1) +``` + +Next, we train the model using the optimizer's +@{tf.train.Optimizer.minimize$`minimize`} method on the loss we calculated +earlier. + +The `minimize` method also takes a `global_step` parameter. TensorFlow uses this +parameter to count the number of training steps that have been processed +(to know when to end a training run). Furthermore, the `global_step` is +essential for TensorBoard graphs to work correctly. Simply call +@{tf.train.get_global_step} and pass the result to the `global_step` +argument of `minimize`. + +Here's the code to train the model: + +``` python + # Train the model by establishing an objective, which is to + # minimize loss using that optimizer. + train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) +``` + +The @{tf.estimator.EstimatorSpec$`EstimatorSpec`} returned for training +must have the following fields set: + +* `loss`, which contains the value of the loss function. +* `train_op`, which executes a training step. + +Here's our code to call `EstimatorSpec`: + +```python + # Return training information. + return tf.estimator.EstimatorSpec( + mode=tf.estimator.ModeKeys.TRAIN, + loss=loss, + train_op=train_op) +``` + +The model function is now complete. + +## The custom Estimator + +Instantiate the custom Estimator through the Estimator base class as follows: + +```python + # Build 2 hidden layer DNN with 10, 10 units respectively. + classifier = tf.estimator.Estimator( + model_fn=my_model, + params={ + 'feature_columns': my_feature_columns, + # Two hidden layers of 10 nodes each. + 'hidden_units': [10, 10], + # The model must choose between 3 classes. + 'n_classes': 3, + }) +``` +Here the `params` dictionary serves the same purpose as the key-word +arguments of `DNNClassifier`; that is, the `params` dictionary lets you +configure your Estimator without modifying the code in the `model_fn`. + +The rest of the code to train, evaluate, and generate predictions using our +Estimator is the same as for the pre-made `DNNClassifier`. For example, the +following line will train the model: + +```python + # Train the Model. + classifier.train( + input_fn=lambda:train_input_fn(train_x, train_y, args.batch_size), + steps=args.train_steps) +``` + +## TensorBoard + +You can view training results for your custom Estimator in TensorBoard. To see +this reporting, start TensorBoard from your command line as follows: + +```bsh +# Replace PATH with the actual path passed as model_dir +tensorboard --logdir=PATH +``` + +Then, open TensorBoard by browsing to: [http://localhost:6006](http://localhost:6006) + +All the pre-made Estimators automatically log a lot of information to +TensorBoard. With custom Estimators, however, TensorBoard only provides one +default log (a graph of the loss) plus the information you explicitly tell +TensorBoard to log. For the custom Estimator you just created, TensorBoard +generates the following: + +
    +Accuracy, steps/second, and loss 'scalar' graphs from tensorboard +
    +
    +TensorBoard displays three graphs. +
    + +In brief, here's what the three graphs tell you: + +* global_step/sec: A performance indicator showing how many batches (gradient + updates) we processed per second as the model trains. + +* loss: The loss reported. + +* accuracy: The accuracy is recorded by the following two lines: + + * `eval_metric_ops={'my_accuracy': accuracy})`, during evaluation. + * `tf.summary.scalar('accuracy', accuracy[1])`, during training. + +These tensorboard graphs are one of the main reasons it's important to pass a +`global_step` to your optimizer's `minimize` method. The model can't record +the x-coordinate for these graphs without it. + +Note the following in the `my_accuracy` and `loss` graphs: + +* The orange line represents training. +* The blue dot represents evaluation. + +During training, summaries (the orange line) are recorded periodically as +batches are processed, which is why it becomes a graph spanning x-axis range. + +By contrast, evaluation produces only a single point on the graph for each call +to `evaluate`. This point contains the average over the entire evaluation call. +This has no width on the graph as it is evaluated entirely from the model state +at a particular training step (from a single checkpoint). + +As suggested in the following figure, you may see and also selectively +disable/enable the reporting using the controls on the left side. + +
    +Check-boxes allowing the user to select which runs are shown. +
    +
    +Enable or disable reporting. +
    + + +## Summary + +Although pre-made Estimators can be an effective way to quickly create new +models, you will often need the additional flexibility that custom Estimators +provide. Fortunately, pre-made and custom Estimators follow the same +programming model. The only practical difference is that you must write a model +function for custom Estimators; everything else is the same. + +For more details, be sure to check out: + +* The +[official TensorFlow implementation of MNIST](https://github.com/tensorflow/models/tree/master/official/mnist), +which uses a custom estimator. + +* The TensorFlow +[official models repository](https://github.com/tensorflow/models/tree/master/official), +which contains more curated examples using custom estimators. + +* This [TensorBoard video](https://youtu.be/eBbEDRsCmv4), which introduces +TensorBoard. + + -- GitLab From c68d35becf59396f86b5d90d236405eafef3349e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Nov 2017 12:57:42 -0800 Subject: [PATCH 0890/1801] Change HLO verifier semantics for bitcasts to: Bitcasts that are not the root of a computation can be any shape byte size. Bitcasts that are the root of a computation must have the same shape byte size as their operand. PiperOrigin-RevId: 177204171 --- tensorflow/compiler/xla/service/hlo_verifier.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index 15188c4057..2c09d2defb 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -143,9 +143,13 @@ class ShapeVerifier : public DfsHloVisitor { } Status HandleBitcast(HloInstruction* bitcast) override { - // Bitcasts can be any shape, as long as the size matches the operand size. - TF_RET_CHECK(shape_size_fn_(bitcast->shape()) == - shape_size_fn_(bitcast->operand(0)->shape())); + // Bitcasts that are not the root of a computation can be any shape. + // Bitcasts that are the root of a computation must have the same shape + // byte size as their operand. + if (bitcast->parent()->root_instruction() == bitcast) { + TF_RET_CHECK(shape_size_fn_(bitcast->shape()) == + shape_size_fn_(bitcast->operand(0)->shape())); + } return tensorflow::Status::OK(); } -- GitLab From 9c7fd28542b37e7980f2b0a155996cc1703bd0d7 Mon Sep 17 00:00:00 2001 From: James Keeling Date: Tue, 28 Nov 2017 12:59:35 -0800 Subject: [PATCH 0891/1801] Allow unsorted_segment_sum and unsorted_segment_max to take int64 num_segments Previously this argument could only be an int32. It can now be int32 or int64, defaulting to int32. This fixes bugs that can occur when calling _IndexedSlicesToTensor with int64 arguments. PiperOrigin-RevId: 177204464 --- tensorflow/core/ops/math_ops.cc | 6 ++++-- .../segment_reduction_ops_test.py | 21 +++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index d30b847696..d7afd02df6 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1811,10 +1811,11 @@ output: Has same shape as data, except for dimension 0 which REGISTER_OP("UnsortedSegmentSum") .Input("data: T") .Input("segment_ids: Tindices") - .Input("num_segments: int32") + .Input("num_segments: Tnumsegments") .Output("output: T") .Attr("T: numbertype") .Attr("Tindices: {int32,int64}") + .Attr("Tnumsegments: {int32,int64} = DT_INT32") .SetShapeFn(UnsortedSegmentReductionShapeFn) .Doc(R"doc( Computes the sum along segments of a tensor. @@ -1849,10 +1850,11 @@ output: Has same shape as data, except for the first `segment_ids.rank` REGISTER_OP("UnsortedSegmentMax") .Input("data: T") .Input("segment_ids: Tindices") - .Input("num_segments: int32") + .Input("num_segments: Tnumsegments") .Output("output: T") .Attr("T: realnumbertype") .Attr("Tindices: {int32,int64}") + .Attr("Tnumsegments: {int32,int64} = DT_INT32") .SetShapeFn(UnsortedSegmentReductionShapeFn) .Doc(R"doc( Computes the Max along segments of a tensor. diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index 99f9f09690..fd58cdb170 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -266,6 +266,27 @@ class UnsortedSegmentSumTest(SegmentReductionHelper): self.assertAllClose(np_ans, tf_ans) self.assertShapeEqual(np_ans, s) + def testNumSegmentsTypes(self): + dtypes = [dtypes_lib.int32, dtypes_lib.int64] + indices_flat = np.array([0, 4, 0, 8, 3, 8, 4, 7, 7, 3]) + num_segments = 12 + for indices in indices_flat, indices_flat.reshape(5, 2): + shape = indices.shape + (2,) + for dtype in dtypes: + with self.test_session(use_gpu=True): + tf_x, np_x = self._input(shape) + num_segments_constant = constant_op.constant( + num_segments, dtype=dtype) + np_ans = self._segmentReduce( + indices, np_x, np.add, op2=None, num_out_rows=num_segments) + s = math_ops.unsorted_segment_sum( + data=tf_x, + segment_ids=indices, + num_segments=num_segments_constant) + tf_ans = s.eval() + self.assertAllClose(np_ans, tf_ans) + self.assertShapeEqual(np_ans, s) + def testGradientSegmentSum(self): num_cols = 2 indices_flat = np.array([0, 4, 0, 8, 3, 8, 4, 7, 7, 3]) -- GitLab From f93c8a72154fd22fe1578bf448df156acd54fddf Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 28 Nov 2017 13:40:05 -0800 Subject: [PATCH 0892/1801] [XLA:CPU] Avoid using the untiled lowering for dot when possible We still need the "make rhs column major" layout assignment optimization since we see significant regressions without it. I did not port the logic around single_threaded_eigen from ProfitableToImplementDotInUntiledLlvmIr. That logic became stale after we changed dot-matrix products to always be single threaded, even when calling into the Eigen implementation. PiperOrigin-RevId: 177210146 --- .../xla/service/cpu/dot_op_emitter.cc | 53 +++---------------- .../compiler/xla/service/cpu/dot_op_emitter.h | 16 ++---- .../xla/service/cpu/layout_assignment.cc | 6 +-- 3 files changed, 12 insertions(+), 63 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index 4c40dae512..8f7b478cee 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -518,9 +518,7 @@ DotOpEmitter::DotOpEmitter(const HloInstruction& dot, bool transpose_lhs, bool DotOpEmitter::ShapesAreLegalForRuntimeDot() const { return true; } bool DotOpEmitter::EmitLlvmIrDotIfProfitable() { - if (dot_.shape().dimensions_size() != 2 || - ProfitableToImplementDotInUntiledLlvmIr(dot_) == - DotInLlvmIrProfitable::kYes) { + if (dot_.shape().dimensions_size() != 2) { return false; } @@ -977,9 +975,7 @@ bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo) { return false; } - if (ProfitableToImplementDotInUntiledLlvmIr(hlo) == - DotInLlvmIrProfitable::kYes || - ProfitableToImplementDotInTiledLlvmIr(hlo)) { + if (ProfitableToImplementDotInTiledLlvmIr(hlo)) { return false; } @@ -1010,46 +1006,11 @@ bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo) { return false; } -DotInLlvmIrProfitable ProfitableToImplementDotInUntiledLlvmIr( - const HloInstruction& dot) { - if (dot.opcode() == HloOpcode::kDot && dot.shape().dimensions_size() == 2) { - const Shape& result_shape = dot.shape(); - // kReductionDimensionThresholdBytes was chosen to be 1/4 of a typical L1 - // cache line size, so that we can have the reduction dimension of both the - // LHS and RHS matrices and still have some space "left over". This needs - // to be tuned further. - const int64 kReductionDimensionThresholdBytes = 8 * 1024; - const bool single_threaded_eigen = - !dot.GetModule()->config().debug_options().xla_cpu_multi_thread_eigen(); - - // This is the point at which it is better to call into Eigen and shard the - // dot across multiple worker threads. This is a rough estimate by running - // a matmult benchmark on my local machine, and it can be tuned further. - const int64 kMaxSingleThreadedFlops = 16 * 1024; - - const int64 M = result_shape.dimensions(0); - const int64 N = result_shape.dimensions(1); - const int64 K = dot.operand(1)->shape().dimensions(0); - const int64 primitive_type_size = - ShapeUtil::ByteSizeOfPrimitiveType(result_shape.element_type()); - if (M == 1 && - K * primitive_type_size <= kReductionDimensionThresholdBytes && - (single_threaded_eigen || M * K * N <= kMaxSingleThreadedFlops)) { - // Heuristics: - // - // - Look for a configuration where we will likely be able to keep LHS in - // L1 and do a cache-optimal traversal of RHS. - // - // - Bail out on matrices that are large enough that Eigen can profitably - // shard the computation across multiple cores. This only applies when - // multi-threading is enabled. - return LayoutUtil::IsMonotonicWithDim0Major( - dot.operand(1)->shape().layout()) - ? DotInLlvmIrProfitable::kWithColumnMajorRhs - : DotInLlvmIrProfitable::kYes; - } - } - return DotInLlvmIrProfitable::kNo; +// For vector-matrix dot products, it is always profitable to make the Rhs +// column major. +bool ProfitableToMakeDotRhsColumnMajor(const HloInstruction& hlo) { + return hlo.opcode() == HloOpcode::kDot && + hlo.shape().dimensions_size() == 2 && hlo.shape().dimensions(0) == 1; } bool ProfitableToImplementDotInTiledLlvmIr(const HloInstruction& dot) { diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h index c9168ccc0f..2badb26f90 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h @@ -32,19 +32,9 @@ namespace cpu { bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo); -enum class DotInLlvmIrProfitable { kYes, kNo, kWithColumnMajorRhs }; - -// Returns a value to indicate if (and under what conditions) will lowering -// |dot| as a untiled LLVM IR dot operation be profitable over calling into -// Eigen or emitting a tiled LLVM IR implementation. Possible return values -// are: -// -// * DotInLlvmIrProfitable::kYes - always profitable. -// * DotInLlvmIrProfitable::kNo - never profitable. -// * DotInLlvmIrProfitable::kWithColumnMajorRhs - only if we can manage to make -// the Rhs layout column major. -DotInLlvmIrProfitable ProfitableToImplementDotInUntiledLlvmIr( - const HloInstruction& dot); +// Returns true to indicate that |hlo| is a dot, and that it is profitable to +// switch the layout of the |hlo|'s RHS operand to column major. +bool ProfitableToMakeDotRhsColumnMajor(const HloInstruction& hlo); // Returns true to indicate that we can generate a tiled LLVM IR implementation // for |dot|. diff --git a/tensorflow/compiler/xla/service/cpu/layout_assignment.cc b/tensorflow/compiler/xla/service/cpu/layout_assignment.cc index 3f2d101959..69466fd32e 100644 --- a/tensorflow/compiler/xla/service/cpu/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/cpu/layout_assignment.cc @@ -52,8 +52,7 @@ Status CpuLayoutAssignment::AddBackendConstraints( tensorflow::gtl::FlatMap should_make_rhs_col_major_cache; auto should_make_rhs_col_major = [&](const HloInstruction& instruction) { - if (ProfitableToImplementDotInUntiledLlvmIr(instruction) != - DotInLlvmIrProfitable::kWithColumnMajorRhs) { + if (!ProfitableToMakeDotRhsColumnMajor(instruction)) { return false; } @@ -69,8 +68,7 @@ Status CpuLayoutAssignment::AddBackendConstraints( bool result = std::all_of( rhs->users().begin(), rhs->users().end(), [&](HloInstruction* user) { - return ProfitableToImplementDotInUntiledLlvmIr(*user) == - DotInLlvmIrProfitable::kWithColumnMajorRhs && + return ProfitableToMakeDotRhsColumnMajor(*user) && user->operand(0) != rhs; }); -- GitLab From c294fcfd85c03a801d3aad83cfd08055dadbad1a Mon Sep 17 00:00:00 2001 From: Mustafa Ispir Date: Tue, 28 Nov 2017 14:10:24 -0800 Subject: [PATCH 0893/1801] Dataset support within Estimator. With this cl Input_fn can return a Dataset. PiperOrigin-RevId: 177215252 --- tensorflow/python/estimator/BUILD | 1 + tensorflow/python/estimator/estimator.py | 49 +++++++++--- tensorflow/python/estimator/estimator_test.py | 74 +++++++++++++++++++ 3 files changed, 114 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index 03f386e9cf..8e6945b0f3 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -433,6 +433,7 @@ py_library( "//tensorflow/python:summary", "//tensorflow/python:training", "//tensorflow/python:util", + "//tensorflow/python/data", "//tensorflow/python/saved_model:builder", "//tensorflow/python/saved_model:tag_constants", "//third_party/py/numpy", diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index f267f4a54e..63103ef4c1 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -30,6 +30,7 @@ from google.protobuf import message from tensorflow.core.framework import summary_pb2 from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session as tf_session +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.eager import context from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.estimator import run_config @@ -416,7 +417,7 @@ class Estimator(object): with ops.Graph().as_default() as g: random_seed.set_random_seed(self._config.tf_random_seed) self._create_and_assert_global_step(g) - features = self._get_features_from_input_fn( + features, input_hooks = self._get_features_from_input_fn( input_fn, model_fn_lib.ModeKeys.PREDICT) estimator_spec = self._call_model_fn( features, None, model_fn_lib.ModeKeys.PREDICT, self.config) @@ -426,7 +427,7 @@ class Estimator(object): checkpoint_filename_with_path=checkpoint_path, scaffold=estimator_spec.scaffold, config=self._session_config), - hooks=hooks) as mon_sess: + hooks=input_hooks + hooks) as mon_sess: while not mon_sess.should_stop(): preds_evaluated = mon_sess.run(predictions) if not isinstance(predictions, dict): @@ -582,6 +583,11 @@ class Estimator(object): def _get_features_from_input_fn(self, input_fn, mode): """Extracts the `features` from return values of `input_fn`.""" result = self._call_input_fn(input_fn, mode) + input_hooks = [] + if isinstance(result, dataset_ops.Dataset): + iterator = result.make_initializable_iterator() + input_hooks.append(_DatasetInitializerHook(iterator)) + result = iterator.get_next() if isinstance(result, (list, tuple)): # Unconditionally drop the label (the second element of result). result = result[0] @@ -590,16 +596,22 @@ class Estimator(object): logging.warning('Input graph does not use tf.data.Dataset or contain a ' 'QueueRunner. That means predict yields forever. ' 'This is probably a mistake.') - return result + return result, input_hooks def _get_features_and_labels_from_input_fn(self, input_fn, mode): + """Extracts the `features` and labels from return values of `input_fn`.""" result = self._call_input_fn(input_fn, mode) + input_hooks = [] + if isinstance(result, dataset_ops.Dataset): + iterator = result.make_initializable_iterator() + input_hooks.append(_DatasetInitializerHook(iterator)) + result = iterator.get_next() if isinstance(result, (list, tuple)): if len(result) != 2: raise ValueError( 'input_fn should return (feautures, labels) as a len 2 tuple.') - return result - return result, None + return result[0], result[1], input_hooks + return result, None, input_hooks def _extract_batch_length(self, preds_evaluated): """Extracts batch length of predictions.""" @@ -723,8 +735,10 @@ class Estimator(object): random_seed.set_random_seed(self._config.tf_random_seed) global_step_tensor = self._create_and_assert_global_step(g) training_util._get_or_create_global_step_read() # pylint: disable=protected-access - features, labels = self._get_features_and_labels_from_input_fn( - input_fn, model_fn_lib.ModeKeys.TRAIN) + features, labels, input_hooks = ( + self._get_features_and_labels_from_input_fn( + input_fn, model_fn_lib.ModeKeys.TRAIN)) + worker_hooks.extend(input_hooks) estimator_spec = self._call_model_fn( features, labels, model_fn_lib.ModeKeys.TRAIN, self.config) # Check if the user created a loss summary, and add one if they didn't. @@ -822,8 +836,9 @@ class Estimator(object): with ops.Graph().as_default() as g: random_seed.set_random_seed(self._config.tf_random_seed) global_step_tensor = self._create_and_assert_global_step(g) - features, labels = self._get_features_and_labels_from_input_fn( - input_fn, model_fn_lib.ModeKeys.EVAL) + features, labels, input_hooks = ( + self._get_features_and_labels_from_input_fn( + input_fn, model_fn_lib.ModeKeys.EVAL)) estimator_spec = self._call_model_fn( features, labels, model_fn_lib.ModeKeys.EVAL, self.config) @@ -844,7 +859,8 @@ class Estimator(object): 'already defines a default metric with the same name.') eval_dict[ops.GraphKeys.GLOBAL_STEP] = global_step_tensor - all_hooks = list(hooks or []) + all_hooks = list(input_hooks) + all_hooks.extend(hooks) all_hooks.extend(list(estimator_spec.evaluation_hooks or [])) eval_results = evaluation._evaluate_once( # pylint: disable=protected-access @@ -1039,3 +1055,16 @@ def _has_dataset_or_queue_runner(maybe_tensor): # Now, check queue. return ops.get_default_graph().get_collection(ops.GraphKeys.QUEUE_RUNNERS) + + +class _DatasetInitializerHook(training.SessionRunHook): + + def __init__(self, iterator): + self._iterator = iterator + + def begin(self): + self._initializer = self._iterator.initializer + + def after_create_session(self, session, coord): + del coord + session.run(self._initializer) diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py index c1b773b8c4..db64fbc9cc 100644 --- a/tensorflow/python/estimator/estimator_test.py +++ b/tensorflow/python/estimator/estimator_test.py @@ -913,6 +913,80 @@ class EstimatorGetVariablesTest(test.TestCase): self.assertEqual(3., est.get_variable_value('three')) +class EstimatorDatasetIntegrationTest(test.TestCase): + """Tests dataset integration.""" + + def test_returned_by_input_fn(self): + + def _input_fn(): + return dataset_ops.Dataset.from_tensors(([1.], [2.])) + + def _model_fn(features, labels, mode): + return model_fn_lib.EstimatorSpec( + mode, + loss=features + labels, # 1 + 2 + train_op=state_ops.assign_add(training.get_global_step(), 1)) + + est = estimator.Estimator(model_fn=_model_fn) + est.train(_input_fn, steps=1) + scores = est.evaluate(_input_fn, steps=1) + self.assertEqual(3., scores[model_fn_lib.LOSS_METRIC_KEY]) + + def test_with_none_labels(self): + + def _input_fn(): + return dataset_ops.Dataset.from_tensors([7.]) + + def _model_fn(features, labels, mode): + self.assertIsNone(labels) + return model_fn_lib.EstimatorSpec( + mode, + loss=features, # 7 + train_op=state_ops.assign_add(training.get_global_step(), 1)) + + est = estimator.Estimator(model_fn=_model_fn) + est.train(_input_fn, steps=1) + scores = est.evaluate(_input_fn, steps=1) + self.assertEqual(7., scores[model_fn_lib.LOSS_METRIC_KEY]) + + def test_with_predict(self): + + def _input_fn(): + return dataset_ops.Dataset.from_tensors([10.]) + + def _model_fn(features, labels, mode): + _ = labels + return model_fn_lib.EstimatorSpec( + mode, + predictions=features, # 10 + loss=features, # 10 + train_op=state_ops.assign_add(training.get_global_step(), 1)) + + est = estimator.Estimator(model_fn=_model_fn) + est.train(_input_fn, steps=1) + self.assertEqual([10.], next(est.predict(input_fn=_input_fn))) + + def test_batching(self): + + def _input_fn(): + return dataset_ops.Dataset.from_tensor_slices(([[1.], [2.]], + [[10.], [20.]])).batch(1) + + def _model_fn(features, labels, mode): + return model_fn_lib.EstimatorSpec( + mode, + predictions=features, + loss=features + (0 if labels is None else labels), # 11, 22 + train_op=state_ops.assign_add(training.get_global_step(), 1)) + + est = estimator.Estimator(model_fn=_model_fn) + est.train(_input_fn) + scores = est.evaluate(_input_fn) + # (11 + 22)/2 = 16.5 + self.assertEqual(16.5, scores[model_fn_lib.LOSS_METRIC_KEY]) + self.assertEqual([1., 2.], list(est.predict(_input_fn))) + + class EstimatorEvaluateTest(test.TestCase): def test_input_fn_args(self): -- GitLab From 49bb801e65caf6afeb7cc7f67a168c9a19582ad1 Mon Sep 17 00:00:00 2001 From: HyoukJoong Lee Date: Tue, 28 Nov 2017 14:11:13 -0800 Subject: [PATCH 0894/1801] Changed to allow removing side-effect instructions from an HLO computation and moved the condition to the hlo_dce pass. PiperOrigin-RevId: 177215395 --- .../compiler/xla/service/hlo_computation.cc | 7 ++----- .../compiler/xla/service/hlo_computation.h | 16 ++++++++++----- tensorflow/compiler/xla/service/hlo_dce.cc | 3 ++- .../compiler/xla/service/hlo_dce_test.cc | 20 +++++++++++++++++++ .../xla/service/while_loop_simplifier.cc | 4 ++-- 5 files changed, 37 insertions(+), 13 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc index c215cc48d6..014a851c96 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.cc +++ b/tensorflow/compiler/xla/service/hlo_computation.cc @@ -176,10 +176,6 @@ bool HloComputation::IsRemovable(const HloInstruction* instruction) { return false; } - if (instruction->HasSideEffect()) { - return false; - } - return true; } @@ -207,7 +203,8 @@ Status HloComputation::RemoveInstructionAndUnusedOperands( worklist.pop(); if (removed.count(item) != 0 || item->user_count() != 0 || - item == root_instruction() || !IsRemovable(item)) { + item == root_instruction() || !IsRemovable(item) || + item->HasSideEffect()) { continue; } for (int i = 0; i < item->operand_count(); ++i) { diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h index 353b30bc69..ccedda2a03 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.h +++ b/tensorflow/compiler/xla/service/hlo_computation.h @@ -313,11 +313,17 @@ class HloComputation { replacements, HloModule* module = nullptr, const string& suffix = "clone"); - // Returns true if the given instruction can be removed from the - // computation. Instructions such as parameters and send/receive instructions - // cannot be removed without violating invariants of the HLO computation or - // module with the exception of fusion computation. A parameter instruction - // is removable for a fusion computation. + // Returns true if the given instruction can be removed from the computation. + // Parameter instructions cannot be removed without violating invariants of + // the HLO computation with the exception of fusion computation. A parameter + // instruction is removable for a fusion computation. + // + // Note that IsRemovable() is a necessariy condition to remove an instruction + // rather than a sufficient condition. For example, instructions with + // side-effect (e.g., Send, Infeed) may be removed from a computation, but the + // transformation must guarantee the invariants relevant to the instructions + // still hold (e.g., Send and Recv must be removed together to make each + // channel complete). bool IsRemovable(const HloInstruction* instruction); // Returns true if this computation has a side effect. A computation has a diff --git a/tensorflow/compiler/xla/service/hlo_dce.cc b/tensorflow/compiler/xla/service/hlo_dce.cc index 40e67c8780..1e5f0f797a 100644 --- a/tensorflow/compiler/xla/service/hlo_dce.cc +++ b/tensorflow/compiler/xla/service/hlo_dce.cc @@ -55,7 +55,8 @@ StatusOr HloDCE::Run(HloModule* module) { for (auto* instruction : computation->instructions()) { if (instruction->user_count() == 0 && live_instructions.count(instruction) == 0 && - computation->IsRemovable(instruction)) { + computation->IsRemovable(instruction) && + !instruction->HasSideEffect()) { dead_roots.push_back(instruction); } } diff --git a/tensorflow/compiler/xla/service/hlo_dce_test.cc b/tensorflow/compiler/xla/service/hlo_dce_test.cc index d54b9a2708..5a56607a66 100644 --- a/tensorflow/compiler/xla/service/hlo_dce_test.cc +++ b/tensorflow/compiler/xla/service/hlo_dce_test.cc @@ -70,6 +70,26 @@ TEST_F(HloDceTest, NoDeadCode) { EXPECT_EQ(3, computation->instruction_count()); } +TEST_F(HloDceTest, InstructionsWithSideEffect) { + // Verify that side-effect instructions (Send in this test) are not removed. + auto builder = HloComputation::Builder(TestName()); + auto constant = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(42.0f))); + builder.AddInstruction( + HloInstruction::CreateSend(constant, /*channel_id=*/0)); + builder.AddInstruction(HloInstruction::CreateTuple({})); + + auto module = CreateNewModule(); + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_EQ(3, computation->instruction_count()); + + HloDCE dce; + EXPECT_FALSE(dce.Run(module.get()).ValueOrDie()); + + EXPECT_EQ(3, computation->instruction_count()); +} + TEST_F(HloDceTest, DeadParameters) { // Verify that dead parameters are not removed, but use of the dead parameters // are. diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc index b38ee907d7..b2fd64a4d9 100644 --- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc +++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc @@ -289,7 +289,7 @@ static StatusOr TryRemoveDeadWhileParams(HloInstruction* while_op) { // Don't try this transformation if the while loop isn't removable, since if // it succeeds ultimately we're going to have to replace the old while loop // with a new one. - if (!while_op->parent()->IsRemovable(while_op)) { + if (!while_op->parent()->IsRemovable(while_op) || while_op->HasSideEffect()) { VLOG(2) << "Can't remove dead parameters from non-removable while op."; return false; } @@ -558,7 +558,7 @@ static StatusOr TryRemoveWhileLoop(HloInstruction* while_op) { // the loop aren't removed, just cloned and added back to the loop. // Nevertheless our infrastructure sees loop simplification as removal of // these nodes and currently doesn't allow it. - if (!while_op->parent()->IsRemovable(while_op)) { + if (!while_op->parent()->IsRemovable(while_op) || while_op->HasSideEffect()) { VLOG(2) << "Not attempting to remove while loop it is not removable: " << while_op->ToShortString(); return false; -- GitLab From b5683d210834fd314410ea4b9c1a756b473fdece Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Nov 2017 14:12:19 -0800 Subject: [PATCH 0895/1801] Change description of sparse_column_with_integerized_feature to make consistent with the _SparseColumn that it creates. Documentation here says that the bucket_size must be an int that is greater than 1. The check performed when creating a _SparseColumn only requires that the bucket_size be at least 1. Hence, bucket_size==1 should be ok. PiperOrigin-RevId: 177215556 --- tensorflow/contrib/layers/python/layers/feature_column.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/feature_column.py b/tensorflow/contrib/layers/python/layers/feature_column.py index 226d933d85..092d418c3f 100644 --- a/tensorflow/contrib/layers/python/layers/feature_column.py +++ b/tensorflow/contrib/layers/python/layers/feature_column.py @@ -521,7 +521,7 @@ def sparse_column_with_integerized_feature(column_name, Args: column_name: A string defining sparse column name. - bucket_size: An int that is > 1. The number of buckets. It should be bigger + bucket_size: An int that is >= 1. The number of buckets. It should be bigger than maximum feature. In other words features in this column should be an int64 in range [0, bucket_size) combiner: A string specifying how to reduce if the sparse column is @@ -539,7 +539,7 @@ def sparse_column_with_integerized_feature(column_name, An integerized _SparseColumn definition. Raises: - ValueError: bucket_size is not greater than 1. + ValueError: bucket_size is less than 1. ValueError: dtype is not integer. """ return _SparseColumnIntegerized( -- GitLab From bb33903b4b34e4ac096908c1a08cf5ffa33b6ccf Mon Sep 17 00:00:00 2001 From: Olivia Nordquist Date: Tue, 28 Nov 2017 14:17:25 -0800 Subject: [PATCH 0896/1801] add parentheses because this test is failing in my current CL PiperOrigin-RevId: 177216384 --- tensorflow/python/framework/ops_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index e929cc8abf..371eadcd13 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -1537,7 +1537,7 @@ class ControlDependenciesTest(test_util.TensorFlowTestCase): self.assertEqual(future.calls, 1) else: a = constant_op.constant(1.0) - b = future + b = future() with ops.control_dependencies([a, b]): c = constant_op.constant(3.0) self.assertEqual(future.calls, 1) -- GitLab From e917bf7131b3216f7d09c0251d27a9aafd5b8373 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Nov 2017 14:36:12 -0800 Subject: [PATCH 0897/1801] [TF] Improve LogSoftMax performance In Eigen, eval should be called immediately before a broadcast. Otherwise, broadcast's lazy evaluation causes the broadcasted expression to be evaluated many times. Moving the eval changes the number of calls to log from batch_size * num_classes to batch_size. PiperOrigin-RevId: 177219486 --- tensorflow/core/kernels/softmax_op_functor.h | 33 +++++++++----------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/tensorflow/core/kernels/softmax_op_functor.h b/tensorflow/core/kernels/softmax_op_functor.h index 1f38bdce8c..d3a267ed87 100644 --- a/tensorflow/core/kernels/softmax_op_functor.h +++ b/tensorflow/core/kernels/softmax_op_functor.h @@ -64,23 +64,21 @@ struct SoftmaxEigenImpl { one_by_class.set(1, num_classes); #endif // shifted_logits = logits - max(logits along classes); - auto shifted_logits = (logits - - logits.maximum(along_class) - .eval() - .reshape(batch_by_one) - .broadcast(one_by_class)); + auto shifted_logits = (logits - logits.maximum(along_class) + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class)); if (log) { // Calculate the log of the softmax // softmax = logits - max(logits along classes); softmax.device(d) = shifted_logits; // softmax = softmax - log(sum(exp(softmax along classes))); - softmax.device(d) = (softmax - - softmax.exp() - .sum(along_class) - .eval() - .reshape(batch_by_one) - .log() - .broadcast(one_by_class)); + softmax.device(d) = (softmax - softmax.exp() + .sum(along_class) + .log() + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class)); } else { // NOTE(touts): If you modify this implementation please run // the BM_ImageNetSoftmaxFwd benchmark in nn_ops_test.cc. @@ -88,12 +86,11 @@ struct SoftmaxEigenImpl { // softmax = exp(logits - max(logits along classes)); softmax.device(d) = shifted_logits.exp(); // softmax = softmax * (1 / sum(softmax along classes)); - softmax.device(d) = (softmax * - softmax.sum(along_class) - .inverse() - .eval() - .reshape(batch_by_one) - .broadcast(one_by_class)); + softmax.device(d) = (softmax * softmax.sum(along_class) + .inverse() + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class)); } } }; -- GitLab From f252ea2d8ac13dd5c558e3862b3885585d3bccfe Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Tue, 28 Nov 2017 14:48:22 -0800 Subject: [PATCH 0898/1801] Deprecating `tf.data.Dataset.from_sparse_tensor_slices`. PiperOrigin-RevId: 177221417 --- tensorflow/python/data/ops/BUILD | 1 + tensorflow/python/data/ops/dataset_ops.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/tensorflow/python/data/ops/BUILD b/tensorflow/python/data/ops/BUILD index 05acfe4de7..695d3ef790 100644 --- a/tensorflow/python/data/ops/BUILD +++ b/tensorflow/python/data/ops/BUILD @@ -21,6 +21,7 @@ py_library( "//tensorflow/python:sparse_tensor", "//tensorflow/python:tensor_shape", "//tensorflow/python:tensor_util", + "//tensorflow/python:util", "//tensorflow/python/data/util:nest", "//tensorflow/python/data/util:sparse", "//third_party/py/numpy", diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index dbe29c087a..b5a8622306 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -41,6 +41,7 @@ from tensorflow.python.ops import gen_io_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import script_ops from tensorflow.python.ops import sparse_ops +from tensorflow.python.util import deprecation class Dataset(object): @@ -219,6 +220,7 @@ class Dataset(object): return TensorSliceDataset(tensors) @staticmethod + @deprecation.deprecated(None, "Use `tf.data.Dataset.from_tensor_slices()`.") def from_sparse_tensor_slices(sparse_tensor): """Splits each rank-N `tf.SparseTensor` in this dataset row-wise. -- GitLab From efe5658aaa6f1666d4967880311430a70bdb23b9 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Tue, 28 Nov 2017 14:50:43 -0800 Subject: [PATCH 0899/1801] Make 'input_map' argument to import_graph_def work with C API. PiperOrigin-RevId: 177221757 --- tensorflow/python/framework/importer.py | 88 +++++++++++++++----- tensorflow/python/framework/importer_test.py | 29 ++----- 2 files changed, 74 insertions(+), 43 deletions(-) diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py index 434cbda7ad..73c35de578 100644 --- a/tensorflow/python/framework/importer.py +++ b/tensorflow/python/framework/importer.py @@ -179,12 +179,11 @@ def _ProcessInputMapParam(input_map): def _ProcessReturnElementsParam(return_elements): """Type-checks and possibly canonicalizes `return_elements`.""" - if return_elements is not None: - return_elements = tuple(return_elements) - if not all(isinstance(x, compat.bytes_or_text_types) - for x in return_elements): - raise TypeError('return_elements must be a list of strings.') - return return_elements + if return_elements is None: return None + if not all(isinstance(x, compat.bytes_or_text_types) + for x in return_elements): + raise TypeError('return_elements must be a list of strings.') + return tuple(compat.as_str(x) for x in return_elements) def _FindAttrInOpDef(attr_name, op_def): @@ -194,16 +193,60 @@ def _FindAttrInOpDef(attr_name, op_def): return None -def _PopulateTFImportGraphDefOptions(options, prefix, return_elements): +def _ConvertInputMapValues(name, input_map): + """Ensures all input map values are tensors. + + This should be called from inside the import name scope. + + Args: + name: the `name` argument passed to import_graph_def + input_map: the `input_map` argument passed to import_graph_def. + + Returns: + An possibly-updated version of `input_map`. + + Raises: + ValueError: if input map values cannot be converted due to empty name scope. + """ + if not all(isinstance(v, ops.Tensor) for v in input_map.values()): + if name == '': # pylint: disable=g-explicit-bool-comparison + raise ValueError( + 'tf.import_graph_def() requires a non-empty `name` if `input_map` ' + 'contains non-Tensor values. Try calling tf.convert_to_tensor() on ' + '`input_map` values before calling tf.import_graph_def().') + with ops.name_scope('_inputs'): + input_map = {k: ops.convert_to_tensor(v) for k, v in input_map.items()} + return input_map + + +def _PopulateTFImportGraphDefOptions(options, prefix, input_map, + return_elements): """Populates the TF_ImportGraphDefOptions `options`.""" c_api.TF_ImportGraphDefOptionsSetPrefix(options, prefix) + for input_src, input_dst in input_map.items(): + input_src = compat.as_str(input_src) + if input_src.startswith('^'): + src_name = compat.as_bytes(input_src[1:]) + dst_op = input_dst._as_tf_output().oper # pylint: disable=protected-access + c_api.TF_ImportGraphDefOptionsRemapControlDependency(options, src_name, + dst_op) + else: + src_name, src_idx = _ParseTensorName(input_src) + src_name = compat.as_str(src_name) + dst_output = input_dst._as_tf_output() # pylint: disable=protected-access + c_api.TF_ImportGraphDefOptionsAddInputMapping(options, src_name, + src_idx, dst_output) for name in return_elements or []: if ':' in name: op_name, index = _ParseTensorName(name) + op_name = compat.as_str(op_name) c_api.TF_ImportGraphDefOptionsAddReturnOutput(options, op_name, index) else: - c_api.TF_ImportGraphDefOptionsAddReturnOperation(options, name) + c_api.TF_ImportGraphDefOptionsAddReturnOperation(options, + compat.as_str(name)) + + # TODO(skyewm): control dependencies def _ProcessNewOps(graph): @@ -312,17 +355,27 @@ def import_graph_def(graph_def, input_map=None, return_elements=None, else: prefix = '' + # Generate any input map tensors inside name scope + input_map = _ConvertInputMapValues(name, input_map) + scoped_options = c_api_util.ScopedTFImportGraphDefOptions() options = scoped_options.options - _PopulateTFImportGraphDefOptions(options, prefix, return_elements) + _PopulateTFImportGraphDefOptions(options, prefix, input_map, + return_elements) with c_api_util.tf_buffer(graph_def.SerializeToString()) as serialized: - with errors.raise_exception_on_not_ok_status() as status: - results = c_api.TF_GraphImportGraphDefWithResults( - graph._c_graph, serialized, options, status) # pylint: disable=protected-access + try: + with errors.raise_exception_on_not_ok_status() as status: + results = c_api.TF_GraphImportGraphDefWithResults( + graph._c_graph, serialized, options, status) # pylint: disable=protected-access + except errors.InvalidArgumentError as e: + # Convert to ValueError for backwards compatibility. + raise ValueError(str(e)) _ProcessNewOps(graph) + # TODO(skyewm): error if unused input map key + if return_elements is None: return None else: @@ -359,16 +412,7 @@ def import_graph_def(graph_def, input_map=None, return_elements=None, # more nuanced. g.graph_def_versions.CopyFrom(graph_def.versions) - if not all(isinstance(v, ops.Tensor) for v in input_map.values()): - if not scope: - # The caller must have passed `name=''`. - raise ValueError( - 'tf.import_graph_def() requires a non-empty `name` if `input_map`' - ' contains non-Tensor values. Try calling tf.convert_to_tensor() ' - 'on `input_map` values before calling tf.import_graph_def().') - with ops.name_scope('_inputs'): - input_map = {k: ops.convert_to_tensor(v) - for k, v in input_map.items()} + input_map = _ConvertInputMapValues(name, input_map) # NOTE(mrry): We do this in two passes, because there may be a cycle in # `graph_def`. diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py index 5a6187c8a6..000a88bc09 100644 --- a/tensorflow/python/framework/importer_test.py +++ b/tensorflow/python/framework/importer_test.py @@ -201,8 +201,6 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(outer_inner_c.name, "outer/inner/c_1") def testInputMap(self): - if ops._USE_C_API: return # TODO(skyewm): make this work with C API - with ops.Graph().as_default(): feed_a_0 = constant_op.constant(0, dtype=dtypes.int32) feed_b_1 = constant_op.constant(1, dtype=dtypes.int32) @@ -230,8 +228,6 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(d.inputs[1], feed_b_1) def testInputMapBytes(self): - if ops._USE_C_API: return # TODO(skyewm): make this work with C API - with ops.Graph().as_default(): feed_a_0 = constant_op.constant(0, dtype=dtypes.int32) feed_b_1 = constant_op.constant(1, dtype=dtypes.int32) @@ -259,8 +255,6 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(d.inputs[1], feed_b_1) def testInputMapUnicode(self): - if ops._USE_C_API: return # TODO(skyewm): make this work with C API - with ops.Graph().as_default(): feed_a_0 = constant_op.constant(0, dtype=dtypes.int32) feed_b_1 = constant_op.constant(1, dtype=dtypes.int32) @@ -299,8 +293,6 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(b.inputs[0], a.outputs[0]) def testInputMapImplicitZerothOutput(self): - if ops._USE_C_API: return # TODO(skyewm): make this work with C API - with ops.Graph().as_default(): feed_a_0 = constant_op.constant(0, dtype=dtypes.int32) b, = importer.import_graph_def( @@ -453,8 +445,6 @@ class ImportGraphDefTest(test.TestCase): self.assertTrue("Input tensor 'A:0' not found" in str(e.exception)) def testMissingInputOpInGraphDefButAppearsInInputMap(self): - if ops._USE_C_API: return # TODO(skyewm): make this work with C API - with ops.Graph().as_default(): feed_a_0 = constant_op.constant(5.0) b, = importer.import_graph_def( @@ -589,19 +579,20 @@ class ImportGraphDefTest(test.TestCase): self.assertTrue("not found in graph_def: [A:2]" in str(e.exception)) def testInputMapTypeMismatch(self): - if ops._USE_C_API: return # TODO(skyewm): make this work with C API - + if ops._USE_C_API: + error_msg = ("Input 0 of node import/B was passed float from Const:0 " + "incompatible with expected int32.") + else: + error_msg = ("Cannot convert a tensor of type float32 to an input of " + "type int32.") with ops.Graph().as_default(): - with self.assertRaises(ValueError) as e: + with self.assertRaisesRegexp(ValueError, error_msg): importer.import_graph_def( self._MakeGraphDef(""" node { name: 'A' op: 'IntOutput' } node { name: 'B' op: 'IntInput' input: 'A:0' } """), input_map={"A:0": constant_op.constant(5.0)}) - self.assertTrue( - "Cannot convert a tensor of type float32 to an input of type int32." - in str(e.exception)) def testNoReturns(self): with ops.Graph().as_default() as g: @@ -825,8 +816,6 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual("graph_def must be a GraphDef proto.", str(e.exception)) def testInvalidInputForInputMap(self): - if ops._USE_C_API: return # TODO(skyewm): make this work with C API - with ops.Graph().as_default(): with self.assertRaises(TypeError) as e: importer.import_graph_def( @@ -967,7 +956,7 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(2, len(ops_with_two_inputs)) def testGradient(self): - if ops._USE_C_API: return # TODO(skyewm): make this work with C API + if ops._USE_C_API: return # TODO(skyewm): get_shape() doesn't work with ops.Graph().as_default() as g: inputs = array_ops.placeholder( @@ -1226,8 +1215,6 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(z_val, -2.0) def testImportGraphWithFunctionTwice(self): - if ops._USE_C_API: return # TODO(skyewm): make this work with C API - g = ops.Graph() with g.as_default(): @function.Defun() -- GitLab From 9306dd922fde7b739c5a4230fdc6d9bd646fb71c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Nov 2017 15:02:11 -0800 Subject: [PATCH 0900/1801] Add bool value type support for gauge metrics. PiperOrigin-RevId: 177223509 --- .../core/lib/monitoring/collected_metrics.h | 1 + .../core/lib/monitoring/collection_registry.h | 6 ++++ tensorflow/core/lib/monitoring/gauge.h | 33 +++++++++++++++++-- tensorflow/core/lib/monitoring/gauge_test.cc | 22 +++++++++++++ tensorflow/core/lib/monitoring/metric_def.h | 13 +++++--- 5 files changed, 69 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/lib/monitoring/collected_metrics.h b/tensorflow/core/lib/monitoring/collected_metrics.h index fbef25619f..acdb0d86ed 100644 --- a/tensorflow/core/lib/monitoring/collected_metrics.h +++ b/tensorflow/core/lib/monitoring/collected_metrics.h @@ -88,6 +88,7 @@ struct Point { ValueType value_type; int64 int64_value; string string_value; + bool bool_value; HistogramProto histogram_value; // start_timestamp and end_timestamp indicate the time period over which this diff --git a/tensorflow/core/lib/monitoring/collection_registry.h b/tensorflow/core/lib/monitoring/collection_registry.h index 113d37e07d..2c8e250c56 100644 --- a/tensorflow/core/lib/monitoring/collection_registry.h +++ b/tensorflow/core/lib/monitoring/collection_registry.h @@ -224,6 +224,12 @@ inline void CollectValue(const string& value, Point* const point) { point->string_value = value; } +template <> +inline void CollectValue(const bool& value, Point* const point) { + point->value_type = ValueType::kBool; + point->bool_value = value; +} + template <> inline void CollectValue(const HistogramProto& value, Point* const point) { point->value_type = ValueType::kHistogram; diff --git a/tensorflow/core/lib/monitoring/gauge.h b/tensorflow/core/lib/monitoring/gauge.h index 75471cfb22..ec978a9193 100644 --- a/tensorflow/core/lib/monitoring/gauge.h +++ b/tensorflow/core/lib/monitoring/gauge.h @@ -86,8 +86,29 @@ class GaugeCell { TF_DISALLOW_COPY_AND_ASSIGN(GaugeCell); }; +// Explicit specialization of GaugeCell. Compared to the primary +// template, it uses atomic values as opposed to mutex. This class is +// thread-safe. +template <> +class GaugeCell { + public: + explicit GaugeCell(bool value) : value_(value) {} + ~GaugeCell() {} + + // Atomically sets the value. + void Set(bool value); + + // Retrieves the current value. + bool value() const; + + private: + std::atomic value_; + + TF_DISALLOW_COPY_AND_ASSIGN(GaugeCell); +}; + // A stateful class for updating a gauge-like metric. Allowed ValueType are -// int64 and string. +// int64, string and bool. // // This class encapsulates a set of values (or a single value for a label-less // metric). Each value is identified by a tuple of labels. The class allows the @@ -117,6 +138,9 @@ class Gauge { // // auto* integer_gauge = Gauge::New("/tensorflow/integer_gauge", // "Integer gauge") + // + // auto* bool_gauge = Gauge::New("/tensorflow/bool_gauge", + // "Bool gauge") template static Gauge* New(MetricDefArgs&&... metric_def_args); @@ -172,12 +196,17 @@ inline void GaugeCell::Set(int64 value) { value_ = value; } inline int64 GaugeCell::value() const { return value_; } +inline void GaugeCell::Set(bool value) { value_ = value; } + +inline bool GaugeCell::value() const { return value_; } + template template Gauge* Gauge::New( MetricDefArgs&&... metric_def_args) { static_assert(std::is_same::value || - std::is_same::value, + std::is_same::value || + std::is_same::value, "Gauge only allows int64 and string types."); return new Gauge( MetricDef( diff --git a/tensorflow/core/lib/monitoring/gauge_test.cc b/tensorflow/core/lib/monitoring/gauge_test.cc index f98cfe2a3b..c8f673db38 100644 --- a/tensorflow/core/lib/monitoring/gauge_test.cc +++ b/tensorflow/core/lib/monitoring/gauge_test.cc @@ -87,6 +87,28 @@ TEST(GaugeOfStringValue, GetCell) { EXPECT_EQ("bar", same_cell->value()); } +auto* bool_gauge = + Gauge::New("/tensorflow/test/bool_gauge", "Gauge of bool value."); + +TEST(GaugeOfBoolValue, InitializedWithFalseValue) { + EXPECT_EQ(false, bool_gauge->GetCell()->value()); +} + +TEST(GaugeOfBoolValue, GetCell) { + auto* cell = bool_gauge->GetCell(); + EXPECT_EQ(false, cell->value()); + + cell->Set(true); + EXPECT_EQ(true, cell->value()); + + auto* same_cell = bool_gauge->GetCell(); + EXPECT_EQ(true, cell->value()); + + same_cell->Set(false); + EXPECT_EQ(false, cell->value()); + EXPECT_EQ(false, same_cell->value()); +} + } // namespace } // namespace monitoring } // namespace tensorflow diff --git a/tensorflow/core/lib/monitoring/metric_def.h b/tensorflow/core/lib/monitoring/metric_def.h index a7f14f9c94..f046842618 100644 --- a/tensorflow/core/lib/monitoring/metric_def.h +++ b/tensorflow/core/lib/monitoring/metric_def.h @@ -28,16 +28,16 @@ namespace monitoring { // The different metric kinds available. // // Gauge indicates that the metric's values are instantaneous measurements of a -// (typically) continuously varying quantity or a string value. Examples: a -// process's current heap size, a queue's current length, the name of the binary -// used by a process. +// (typically) continuously varying value. Examples: a process's current heap +// size, a queue's current length, the name of the binary used by a process, +// whether a task is complete. // // Cumulative indicates that the metric's values represent non-negative changes // over specified time periods. Example: the number of rpc calls to a service. enum class MetricKind : int { kGauge = 0, kCumulative }; // The type of the metric values. -enum class ValueType : int { kInt64 = 0, kHistogram, kString }; +enum class ValueType : int { kInt64 = 0, kHistogram, kString, kBool }; // Everything in the internal namespace is implementation details. Do not depend // on this. @@ -61,6 +61,11 @@ inline ValueType GetValueType() { return ValueType::kString; } +template <> +inline ValueType GetValueType() { + return ValueType::kBool; +} + } // namespace internal // Abstract base class for a metric definition. -- GitLab From 8966a794411bd5d17e5ef024a96140f85a9ab500 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Tue, 28 Nov 2017 15:14:30 -0800 Subject: [PATCH 0901/1801] Add a log test for bfloat16. PiperOrigin-RevId: 177225564 --- tensorflow/compiler/xla/tests/bfloat16_test.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tensorflow/compiler/xla/tests/bfloat16_test.cc b/tensorflow/compiler/xla/tests/bfloat16_test.cc index a1c53ef2aa..ac3f3f4c9d 100644 --- a/tensorflow/compiler/xla/tests/bfloat16_test.cc +++ b/tensorflow/compiler/xla/tests/bfloat16_test.cc @@ -61,6 +61,15 @@ XLA_TEST_F(Bfloat16Test, ScalarOperation) { error_spec_); } +XLA_TEST_F(Bfloat16Test, LogOperation) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR0(static_cast(4.0f)); + builder.Log(x); + + ComputeAndCompareR0(&builder, static_cast(1.387f), {}, + error_spec_); +} + XLA_TEST_F(Bfloat16Test, NegateScalarF16) { ComputationBuilder builder(client_, TestName()); builder.Neg(builder.ConstantR0(static_cast(2.1f))); -- GitLab From d72e2a318c6b15d800aa1468dc2af658ea40dffd Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 28 Nov 2017 15:16:16 -0800 Subject: [PATCH 0902/1801] scatter_nd_update for resource variables PiperOrigin-RevId: 177225812 --- .../api_def_ResourceScatterNdUpdate.pbtxt | 69 ++++++++++++++++++ tensorflow/core/framework/common_shape_fns.cc | 7 +- tensorflow/core/kernels/BUILD | 6 +- tensorflow/core/kernels/scatter_nd_op.cc | 63 ++++++++++++++--- tensorflow/core/ops/state_ops.cc | 56 +++++++++++++++ tensorflow/python/kernel_tests/BUILD | 1 + .../kernel_tests/scatter_nd_ops_test.py | 15 ++++ tensorflow/python/ops/state_ops.py | 70 ++++++++++++++++++- 8 files changed, 274 insertions(+), 13 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_ResourceScatterNdUpdate.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_ResourceScatterNdUpdate.pbtxt b/tensorflow/core/api_def/base_api/api_def_ResourceScatterNdUpdate.pbtxt new file mode 100644 index 0000000000..b07ee9fda9 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ResourceScatterNdUpdate.pbtxt @@ -0,0 +1,69 @@ +op { + graph_op_name: "ResourceScatterNdUpdate" + in_arg { + name: "ref" + description: <input(0); + if (c->input_handle_shapes_and_types(0) != nullptr) { + input_shape = (*c->input_handle_shapes_and_types(0))[0].shape; + } ShapeHandle indices_shape; TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(1), 1, &indices_shape)); ShapeHandle updates_shape; @@ -1361,7 +1364,9 @@ Status ScatterNdUpdateShape(InferenceContext* c) { } } - c->set_output(0, input_shape); + if (c->input_handle_shapes_and_types(0) == nullptr) { + c->set_output(0, input_shape); + } return Status::OK(); } diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index b86739eea7..eff15e809a 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -3918,7 +3918,11 @@ tf_kernel_library( "scatter_nd_op.h", "scatter_nd_op_gpu.cu.cc", ], - deps = STATE_DEPS + [":dense_update_functor"], + deps = STATE_DEPS + [ + ":dense_update_functor", + ":training_op_helpers", + ":variable_ops", + ], ) tf_kernel_library( diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc index 484932ab01..98c0181afb 100644 --- a/tensorflow/core/kernels/scatter_nd_op.cc +++ b/tensorflow/core/kernels/scatter_nd_op.cc @@ -21,6 +21,7 @@ limitations under the License. #endif // GOOGLE_CUDA #include "tensorflow/core/kernels/scatter_nd_op.h" + #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" @@ -28,6 +29,8 @@ limitations under the License. #include "tensorflow/core/kernels/bounds_check.h" #include "tensorflow/core/kernels/dense_update_functor.h" #include "tensorflow/core/kernels/fill_functor.h" +#include "tensorflow/core/kernels/training_op_helpers.h" +#include "tensorflow/core/kernels/variable_ops.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/types.h" @@ -83,7 +86,10 @@ class ScatterNdUpdateOp : public OpKernel { const DataType dt = DataTypeToEnum::v(); const DataType dt_ref = DataTypeToEnum::ref(); const DataType index_t = DataTypeToEnum::v(); - if (IsRefType(c->input_type(0))) { + dtype_ = c->input_type(0); + if (c->input_type(0) == DT_RESOURCE) { + // TODO(apassos): what to validate here? + } else if (IsRefType(c->input_type(0))) { OP_REQUIRES_OK(c, c->MatchSignature({dt_ref, index_t, dt}, {dt_ref})); OP_REQUIRES_OK(c, c->GetAttr("use_locking", &use_exclusive_lock_)); } else { @@ -93,7 +99,16 @@ class ScatterNdUpdateOp : public OpKernel { } void Compute(OpKernelContext* c) override { - if (use_exclusive_lock_) { + if (dtype_ == DT_RESOURCE) { + if (use_exclusive_lock_) { + Var* v; + OP_REQUIRES_OK(c, LookupResource(c, HandleFromInput(c, 0), &v)); + mutex_lock m(*v->mu()); + DoCompute(c); + } else { + DoCompute(c); + } + } else if (use_exclusive_lock_) { // If we're here, it means the input type is a ref. DCHECK(IsRefType(c->input_dtype(0))); // Hold mutex while we apply updates @@ -105,6 +120,7 @@ class ScatterNdUpdateOp : public OpKernel { } private: + DataType dtype_; bool use_exclusive_lock_; void DoCompute(OpKernelContext* c) { @@ -113,7 +129,20 @@ class ScatterNdUpdateOp : public OpKernel { Tensor params; TensorShape params_shape; - if (IsRefType(c->input_dtype(0))) { + if (dtype_ == DT_RESOURCE) { + Var* v; + OP_REQUIRES_OK(c, LookupResource(c, HandleFromInput(c, 0), &v)); + Tensor* t = v->tensor(); + if (!use_exclusive_lock_) { + // We're not holding the lock in the outer scope so need it here. + mutex_lock m(*v->mu()); + OP_REQUIRES_OK(c, PrepareToUpdateVariable(c, t)); + } else { + OP_REQUIRES_OK(c, PrepareToUpdateVariable(c, t)); + } + params = *t; + params_shape = params.shape(); + } else if (IsRefType(c->input_dtype(0))) { params = c->mutable_input(0, use_exclusive_lock_); params_shape = params.shape(); c->forward_ref_input_to_ref_output(0, 0); @@ -159,6 +188,16 @@ class ScatterNdUpdateOp : public OpKernel { .TypeConstraint("Tindices"), \ ScatterNdUpdateOp) +#define REGISTER_RESOURCE_SCATTER_ND_UPDATE_KERNEL_INDEX(type, index_type, \ + dev, name, op) \ + REGISTER_KERNEL_BUILDER( \ + Name(name) \ + .Device(DEVICE_##dev) \ + .TypeConstraint("T") \ + .TypeConstraint("Tindices") \ + .HostMemory("ref"), \ + ScatterNdUpdateOp) + #define REGISTER_SCATTER_ND_KERNEL(type, dev, name) \ REGISTER_SCATTER_ND_KERNEL_INDEX(type, int32, dev, name); \ REGISTER_SCATTER_ND_KERNEL_INDEX(type, int64, dev, name) @@ -167,6 +206,11 @@ class ScatterNdUpdateOp : public OpKernel { REGISTER_SCATTER_ND_UPDATE_KERNEL_INDEX(type, int32, dev, name, op); \ REGISTER_SCATTER_ND_UPDATE_KERNEL_INDEX(type, int64, dev, name, op) +#define REGISTER_RESOURCE_SCATTER_ND_UPDATE_KERNEL(type, dev, name, op) \ + REGISTER_RESOURCE_SCATTER_ND_UPDATE_KERNEL_INDEX(type, int32, dev, name, \ + op); \ + REGISTER_RESOURCE_SCATTER_ND_UPDATE_KERNEL_INDEX(type, int64, dev, name, op) + #define REGISTER_SCATTER_ND_ADD_SUB(type, dev) \ REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdAdd", \ scatter_nd_op::UpdateOp::ADD); \ @@ -178,9 +222,11 @@ class ScatterNdUpdateOp : public OpKernel { #define REGISTER_SCATTER_ND(type, dev) \ REGISTER_SCATTER_ND_KERNEL(type, dev, "ScatterNd"); -#define REGISTER_SCATTER_ND_UPDATE(type, dev) \ - REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdUpdate", \ - scatter_nd_op::UpdateOp::ASSIGN); +#define REGISTER_SCATTER_ND_UPDATE(type, dev) \ + REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdUpdate", \ + scatter_nd_op::UpdateOp::ASSIGN); \ + REGISTER_RESOURCE_SCATTER_ND_UPDATE_KERNEL( \ + type, dev, "ResourceScatterNdUpdate", scatter_nd_op::UpdateOp::ASSIGN); // Registers CPU kernels. #define REGISTER_SCATTER_ND_ADD_SUB_CPU(type) \ @@ -281,8 +327,7 @@ Status ValidateUpdateShape(const TensorShape& params_shape, } template -Status PrepareAndValidateInputs(OpKernelContext* c, - const TensorShape& params_shape, +Status PrepareAndValidateInputs(const TensorShape& params_shape, const Tensor& indices, const Tensor& updates, int64* slice_dim, Index* num_updates, Index* slice_size) { @@ -396,7 +441,7 @@ Status DoScatterNd(OpKernelContext* c, const Tensor& indices, Index num_updates; Index slice_size; TF_RETURN_IF_ERROR(PrepareAndValidateInputs( - c, shape, indices, updates, &slice_dim, &num_updates, &slice_size)); + shape, indices, updates, &slice_dim, &num_updates, &slice_size)); IndexFlattener index_flattener; auto indices_flat = index_flattener(c, indices); diff --git a/tensorflow/core/ops/state_ops.cc b/tensorflow/core/ops/state_ops.cc index da5f091e9f..5b1f5d2477 100644 --- a/tensorflow/core/ops/state_ops.cc +++ b/tensorflow/core/ops/state_ops.cc @@ -513,6 +513,62 @@ output_ref: Same as ref. Returned as a convenience for operations that want to use the updated values after the update is done. )doc"); +REGISTER_OP("ResourceScatterNdUpdate") + .Input("ref: resource") + .Input("indices: Tindices") + .Input("updates: T") + .Attr("T: type") + .Attr("Tindices: {int32, int64}") + .Attr("use_locking: bool = true") + .SetShapeFn(shape_inference::ScatterNdUpdateShape) + .Doc(R"doc( +Applies sparse `updates` to individual values or slices within a given +variable according to `indices`. + +`ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. + +`indices` must be integer tensor, containing indices into `ref`. +It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. + +The innermost dimension of `indices` (with length `K`) corresponds to +indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th +dimension of `ref`. + +`updates` is `Tensor` of rank `Q-1+P-K` with shape: + +``` +[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]. +``` + +For example, say we want to update 4 scattered elements to a rank-1 tensor to +8 elements. In Python, that update would look like this: + +```python + ref = tfe.Variable([1, 2, 3, 4, 5, 6, 7, 8]) + indices = tf.constant([[4], [3], [1] ,[7]]) + updates = tf.constant([9, 10, 11, 12]) + update = tf.scatter_nd_update(ref, indices, updates) + with tf.Session() as sess: + print sess.run(update) +``` + +The resulting update to ref would look like this: + + [1, 11, 3, 10, 9, 6, 7, 12] + +See @{tf.scatter_nd} for more details about how to make updates to +slices. + +ref: A resource handle. Must be from a VarHandleOp. +indices: A Tensor. Must be one of the following types: int32, int64. + A tensor of indices into ref. +updates: A Tensor. Must have the same type as ref. A tensor of updated + values to add to ref. +use_locking: An optional bool. Defaults to True. If True, the assignment will + be protected by a lock; otherwise the behavior is undefined, + but may exhibit less contention. +)doc"); + REGISTER_OP("ScatterNdAdd") .Input("ref: Ref(T)") .Input("indices: Tindices") diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 4522520ee4..f15b3baabe 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -676,6 +676,7 @@ cuda_py_test( "//tensorflow/python:gradients", "//tensorflow/python:state_ops", "//tensorflow/python:variables", + "//tensorflow/python:resource_variable_ops", ], tags = ["noasan"], # http://b/32635055 ) diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py index a79d66e988..d7bde04230 100644 --- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py @@ -27,6 +27,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -157,6 +158,20 @@ class StatefulScatterNdTest(test.TestCase): result = sess.run(scatter) self.assertAllClose(result, expected) + def testSimpleResource(self): + indices = constant_op.constant([[4], [3], [1], [7]], dtype=dtypes.int32) + updates = constant_op.constant([9, 10, 11, 12], dtype=dtypes.float32) + ref = resource_variable_ops.ResourceVariable( + [0, 0, 0, 0, 0, 0, 0, 0], dtype=dtypes.float32) + expected = np.array([0, 11, 0, 10, 9, 0, 0, 12]) + scatter = state_ops.scatter_nd_update(ref, indices, updates) + init = variables.global_variables_initializer() + + with self.test_session(use_gpu=True) as sess: + sess.run(init) + sess.run(scatter) + self.assertAllClose(ref.eval(), expected) + def testSimple2(self): indices = constant_op.constant([[1, 0], [1, 1]], dtype=dtypes.int32) updates = constant_op.constant([11., 12.], dtype=dtypes.float32) diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py index dfc657893c..dee495f78f 100644 --- a/tensorflow/python/ops/state_ops.py +++ b/tensorflow/python/ops/state_ops.py @@ -347,5 +347,71 @@ def scatter_update(ref, indices, updates, use_locking=True, name=None): if ref.dtype._is_ref_dtype: return gen_state_ops.scatter_update(ref, indices, updates, use_locking=use_locking, name=name) - return gen_resource_variable_ops.resource_scatter_update( - ref.handle, indices, ops.convert_to_tensor(updates, ref.dtype), name=name) + with ops.control_dependencies( + [gen_resource_variable_ops.resource_scatter_update( + ref.handle, indices, ops.convert_to_tensor(updates, ref.dtype), + name=name)]): + return ref.read_value() + + +def scatter_nd_update(ref, indices, updates, use_locking=True, name=None): + r"""Applies sparse `updates` to individual values or slices in a Variable. + + `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. + + `indices` must be integer tensor, containing indices into `ref`. + It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. + + The innermost dimension of `indices` (with length `K`) corresponds to + indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th + dimension of `ref`. + + `updates` is `Tensor` of rank `Q-1+P-K` with shape: + + ``` + [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]. + ``` + + For example, say we want to update 4 scattered elements to a rank-1 tensor to + 8 elements. In Python, that update would look like this: + + ```python + ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8]) + indices = tf.constant([[4], [3], [1] ,[7]]) + updates = tf.constant([9, 10, 11, 12]) + update = tf.scatter_nd_update(ref, indices, updates) + with tf.Session() as sess: + print sess.run(update) + ``` + + The resulting update to ref would look like this: + + [1, 11, 3, 10, 9, 6, 7, 12] + + See @{tf.scatter_nd} for more details about how to make updates to + slices. + + Args: + ref: A Variable. + indices: A `Tensor`. Must be one of the following types: `int32`, `int64`. + A Tensor. Must be one of the following types: int32, int64. + A tensor of indices into ref. + updates: A `Tensor`. Must have the same type as `ref`. + A Tensor. Must have the same type as ref. A tensor of updated + values to add to ref. + use_locking: An optional `bool`. Defaults to `True`. + An optional bool. Defaults to True. If True, the assignment will + be protected by a lock; otherwise the behavior is undefined, + but may exhibit less contention. + name: A name for the operation (optional). + + Returns: + The value of the variable after the update. + """ + if ref.dtype._is_ref_dtype: + return gen_state_ops.scatter_nd_update( + ref, indices, updates, use_locking, name) + with ops.control_dependencies([gen_state_ops.resource_scatter_nd_update( + ref.handle, indices, ops.convert_to_tensor(updates, dtype=ref.dtype), + use_locking, name)]): + return ref.read_value() -- GitLab From a99e9a2c56a4922e76c367b8d3a9c43ea0a4ef61 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Tue, 28 Nov 2017 15:27:57 -0800 Subject: [PATCH 0903/1801] Support tfe.Network.losses Supports only variable regularization losses when executing eagerly. They are stored as zero-argument lambdas and executed when the property is requested. PiperOrigin-RevId: 177227550 --- tensorflow/contrib/eager/python/BUILD | 1 + tensorflow/contrib/eager/python/network.py | 24 +++++- .../contrib/eager/python/network_test.py | 29 +++++++ tensorflow/python/layers/base.py | 85 ++++++++++++------- tensorflow/python/layers/base_test.py | 5 ++ 5 files changed, 114 insertions(+), 30 deletions(-) diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index bf2e883bc5..55d768044b 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -232,6 +232,7 @@ py_test( srcs_version = "PY2AND3", deps = [ ":network", + "//tensorflow/contrib/layers:layers_py", "//tensorflow/python:constant_op", "//tensorflow/python:errors", "//tensorflow/python:framework_test_lib", diff --git a/tensorflow/contrib/eager/python/network.py b/tensorflow/contrib/eager/python/network.py index 0388aaa849..e3c13cbd2e 100644 --- a/tensorflow/contrib/eager/python/network.py +++ b/tensorflow/contrib/eager/python/network.py @@ -451,8 +451,30 @@ class Network(base.Layer): "at https://github.com/tensorflow/tensorflow/issues/new if this is " "important to you") + def add_loss(self, losses, inputs=None): + raise RuntimeError( + "add_loss is not supported in Network class yet. Please file an issue " + "at https://github.com/tensorflow/tensorflow/issues/new if this is " + "important to you") + + @property + def losses(self): + """Gather losses from `Layer`s in the `Network`. + + Note that when executing eagerly, `Layer.losses` evaluates + regularizers. When using graph execution, variable regularization ops have + already been created and are simply returned here. + + Returns: + A list of tensors. + """ + layer_losses = [] + for layer in self.layers: + layer_losses.extend(layer.losses) + return layer_losses + # TODO(allenl): Support other Layer methods needed for graph mode, such as for - # losses and updates + # updates class Sequential(Network): diff --git a/tensorflow/contrib/eager/python/network_test.py b/tensorflow/contrib/eager/python/network_test.py index e7835a63e6..3eb4f5f8b3 100644 --- a/tensorflow/contrib/eager/python/network_test.py +++ b/tensorflow/contrib/eager/python/network_test.py @@ -19,6 +19,7 @@ from __future__ import print_function import gc from tensorflow.contrib.eager.python import network +from tensorflow.contrib.layers.python.layers import regularizers from tensorflow.python.eager import context from tensorflow.python.eager import function from tensorflow.python.eager import test @@ -45,6 +46,22 @@ class MyNetwork(network.Network): return self.l1(x) +class RegularizedNetwork(network.Network): + + def __init__(self): + super(RegularizedNetwork, self).__init__() + self.l1 = self.track_layer(core.Dense( + 1, + bias_regularizer=regularizers.l1_regularizer(2.0), + kernel_regularizer=regularizers.l1_regularizer(2.0))) + self.l2 = self.track_layer(core.Dense( + 1, + bias_regularizer=regularizers.l1_regularizer(2.0))) + + def call(self, values): + return self.l2(self.l1(values)) + + class NetworkTest(test.TestCase): def _save_modify_load_network_built(self, net, global_step=None): @@ -484,6 +501,18 @@ class NetworkTest(test.TestCase): _check_op_prefixes(expected_prefix="my_network_1/dense/", checked_ops=checked_ops) + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testVariableRegularizers(self): + net = RegularizedNetwork() + net(constant_op.constant([[1.]])) + self.evaluate(net.variables[0].assign([[2.]])) + self.evaluate(net.variables[1].assign([3.])) + self.evaluate(net.variables[2].assign([[-2.]])) + self.evaluate(net.variables[3].assign([4.])) + self.assertAllEqual([4., 6., 8.], self.evaluate(net.losses)) + self.evaluate(net.variables[3].assign([5.])) + self.assertAllEqual([4., 6., 10.], self.evaluate(net.losses)) + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) def testDuplicateNameError(self): one = constant_op.constant([[1.]]) diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 6be2bc3e76..c083f8a5d2 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -103,10 +103,16 @@ class Layer(object): self.built = False self.input_spec = None + if activity_regularizer and context.in_eager_mode(): + raise ValueError( + ('Activity regularization is not supported when executing eagerly. ' + 'Got activity_regularizer=%s') % (activity_regularizer,)) self._activity_regularizer = activity_regularizer self._trainable_weights = [] self._non_trainable_weights = [] self._updates = [] + # When executing eagerly, _losses is a list of zero-argument lambdas which + # return tensors. When using graph execution, _losses is a list of ops. self._losses = [] self._reuse = kwargs.get('_reuse') self._graph = ops.get_default_graph() @@ -287,9 +293,22 @@ class Layer(object): @property def losses(self): + """Losses which are associated with this `Layer`. + + Note that when executing eagerly, getting this property evaluates + regularizers. When using graph execution, variable regularization ops have + already been created and are simply returned here. + + Returns: + A list of tensors. + """ if context.in_eager_mode(): - raise RuntimeError('Layer.losses not supported in Eager mode.') - return self._losses + # _losses may only contain variable regularization losses when executing + # eagerly, and they have been saved as lambdas to be executed when + # requested. + return [regularizer() for regularizer in self._losses] + else: + return self._losses def add_loss(self, losses, inputs=None): """Add loss tensor(s), potentially dependent on layer inputs. @@ -303,6 +322,11 @@ class Layer(object): The `get_losses_for` method allows to retrieve the losses relevant to a specific set of inputs. + Note that `add_loss` is not supported when executing eagerly. Instead, + variable regularizers may be added through `add_variable`. Activity + regularization is not supported directly (but such losses may be returned + from `Layer.call()`). + Arguments: losses: Loss tensor, or list/tuple of tensors. inputs: Optional input tensor(s) that the loss(es) depend on. Must @@ -462,16 +486,8 @@ class Layer(object): Raises: RuntimeError: If called in Eager mode with regularizers. """ - # Note that we currently don't support variable regularization in Eager - # mode. An alternative is for users to directly compute these losses before - # performing a backward pass. if context.in_graph_mode(): existing_variables = set(tf_variables.global_variables()) - else: - existing_variables = [] - if regularizer is not None: - raise RuntimeError('Variable regularization not supported in Eager ' - 'mode.') if dtype is None: dtype = self.dtype or dtypes.float32 @@ -486,28 +502,39 @@ class Layer(object): constraint=constraint, trainable=trainable and self.trainable, partitioner=partitioner) - if (context.in_graph_mode() and trainable and self.trainable - and variable not in tf_variables.trainable_variables()): - # A custom getter / variable scope overrode the trainable flag. - trainable = False - if variable in existing_variables: - return variable - if regularizer: - # To match the behavior of tf.get_variable(), we only - # apply regularization if the variable is newly created. - if isinstance(variable, tf_variables.PartitionedVariable): - for v in variable: - with ops.colocate_with(v.op): + if context.in_graph_mode(): + if (trainable and self.trainable + and variable not in tf_variables.trainable_variables()): + # A custom getter / variable scope overrode the trainable flag. + trainable = False + if variable in existing_variables: + return variable + if regularizer: + # To match the behavior of tf.get_variable(), we only + # apply regularization if the variable is newly created. + if isinstance(variable, tf_variables.PartitionedVariable): + for v in variable: + with ops.colocate_with(v.op): + with ops.name_scope(name + '/Regularizer'): + regularization = regularizer(v) + if regularization is not None: + self.add_loss(regularization) + else: + with ops.colocate_with(variable.op): with ops.name_scope(name + '/Regularizer'): - regularization = regularizer(v) + regularization = regularizer(variable) if regularization is not None: self.add_loss(regularization) - else: - with ops.colocate_with(variable.op): - with ops.name_scope(name + '/Regularizer'): - regularization = regularizer(variable) - if regularization is not None: - self.add_loss(regularization) + elif regularizer: + if isinstance(variable, tf_variables.PartitionedVariable): + raise RuntimeError( + 'Partitioned variable regularization is not yet supported when ' + 'executing eagerly. File a feature request is this is ' + 'important to you.') + # Save a zero-argument lambda which runs the regularizer on the + # variable, to be executed when `Layer.losses` is requested. This + # makes losses responsive to variable updates when executing eagerly. + self._losses.append(lambda: regularizer(variable)) if trainable: self._trainable_weights.append(variable) else: diff --git a/tensorflow/python/layers/base_test.py b/tensorflow/python/layers/base_test.py index 1eea20deef..3e5a51eb62 100644 --- a/tensorflow/python/layers/base_test.py +++ b/tensorflow/python/layers/base_test.py @@ -88,6 +88,11 @@ class BaseLayerTest(test.TestCase): regularizer=regularizer) self.assertEqual(len(layer.losses), 1) + def testNoEagerActivityRegularizer(self): + with context.eager_mode(): + with self.assertRaisesRegexp(ValueError, 'activity_regularizer'): + core_layers.Dense(1, activity_regularizer=lambda *args, **kwargs: 0.) + def testGetVariable(self): with self.test_session(): -- GitLab From a6ee905de83834c35e7cf01182270309ec2425f3 Mon Sep 17 00:00:00 2001 From: Sergio Guadarrama Date: Tue, 28 Nov 2017 15:31:17 -0800 Subject: [PATCH 0904/1801] Add non_trainable_variables to templates. Add aliases for weights, trainable_weights and non_trainable_weights. PiperOrigin-RevId: 177228107 --- .../python/kernel_tests/template_test.py | 37 +++++++++++++++++-- tensorflow/python/ops/template.py | 37 ++++++++++++++++++- 2 files changed, 70 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/kernel_tests/template_test.py b/tensorflow/python/kernel_tests/template_test.py index 40c0ade62a..f0354374ac 100644 --- a/tensorflow/python/kernel_tests/template_test.py +++ b/tensorflow/python/kernel_tests/template_test.py @@ -34,9 +34,10 @@ from tensorflow.python.platform import test from tensorflow.python.training import gradient_descent -def variable_scoped_function(): +def variable_scoped_function(trainable=True): return variable_scope.get_variable( - "dummy", shape=[1], initializer=init_ops.zeros_initializer()) + "dummy", shape=[1], trainable=trainable, + initializer=init_ops.zeros_initializer()) def internally_variable_scoped_function(scope_name): @@ -413,7 +414,7 @@ class TemplateTest(test.TestCase): self.assertEqual(custom_getter_count[0], 2) # Test that custom getter is called when the variable scope is created - # during construction + # during construction custom_getter_count[0] = 0 tmpl2 = template.make_template( "s2", @@ -539,6 +540,36 @@ class TemplateTest(test.TestCase): # Ensure we can get the scopes before either template is actually called. self.assertEqual(1, len(ta.trainable_variables)) self.assertEqual(1, len(tb.trainable_variables)) + # None non-trainable variable was created. + self.assertEqual([], list(ta.non_trainable_variables)) + self.assertEqual([], list(tb.non_trainable_variables)) + # Ensure variables returns all the variables. + self.assertEqual(1, len(ta.variables)) + self.assertEqual(1, len(tb.variables)) + + @test_util.run_in_graph_and_eager_modes() + def test_non_trainable_variables(self): + # Make sure non_trainable_variables are created. + with variable_scope.variable_scope("foo2"): + ta = template.make_template("a", variable_scoped_function, + trainable=True) + tb = template.make_template("b", variable_scoped_function, + trainable=False) + # Initially there are not variables created. + self.assertEqual([], list(ta.variables)) + self.assertEqual([], list(tb.variables)) + # After calling there are variables created. + ta() + tb() + # Check the trainable and non_trainable variables. + self.assertEqual(1, len(ta.trainable_variables)) + self.assertEqual([], list(ta.non_trainable_variables)) + + self.assertEqual([], list(tb.trainable_variables)) + self.assertEqual(1, len(tb.non_trainable_variables)) + # Ensure variables returns all the variables. + self.assertEqual(1, len(ta.variables)) + self.assertEqual(1, len(tb.variables)) # TODO(apassos) handle local variables in Eager def test_local_variables(self): diff --git a/tensorflow/python/ops/template.py b/tensorflow/python/ops/template.py index 98578b799a..07796b28d9 100644 --- a/tensorflow/python/ops/template.py +++ b/tensorflow/python/ops/template.py @@ -307,6 +307,12 @@ class Template(object): # To prevent partial matches on the scope_name, we add '/' at the end. return name if name[-1] == "/" else name + "/" + @property + def variables(self): + """Returns the list of global and local variables created by the Template. + """ + return self.global_variables + self.local_variables + @property def trainable_variables(self): """Returns the list of trainable variables created by the Template.""" @@ -316,6 +322,14 @@ class Template(object): else: return [] + @property + def non_trainable_variables(self): + """Returns the list of non-trainable variables created by the Template.""" + # TODO(apassos) Make sure it matches Eager when using local variables. + global_variables = self.global_variables + trainable_variables = set(self.trainable_variables) + return [x for x in global_variables if x not in trainable_variables] + @property def global_variables(self): """Returns the list of global variables created by the Template.""" @@ -334,6 +348,21 @@ class Template(object): else: return [] + @property + def weights(self): + """List of weights/variables created by the Template.""" + return self.variables + + @property + def trainable_weights(self): + """List of trainable weights/variables created by the Template.""" + return self.trainable_variables + + @property + def non_trainable_weights(self): + """List of non-trainable weights/variables created by the Template.""" + return self.non_trainable_variables + @property @deprecated( "2017-02-21", "The .var_scope property is deprecated. Please change your " @@ -501,7 +530,7 @@ class EagerTemplate(Template): @property def variables(self): - """Returns the list of trainable variables created by the Template.""" + """Returns the list of variables created by the Template.""" # Currently there is no local variable in Eager mode. return self._eager_variable_store.variables() @@ -511,6 +540,12 @@ class EagerTemplate(Template): # Currently there is no local variable in Eager mode. return self._eager_variable_store.trainable_variables() + @property + def non_trainable_variables(self): + """Returns the list of non-trainable variables created by the Template.""" + # Currently there is no local variable in Eager mode. + return self._eager_variable_store.non_trainable_variables() + @property def global_variables(self): """Returns the list of global variables created by the Template.""" -- GitLab From d8de0d979e9b9dacb20ebf425d54bbc98ed65fad Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Tue, 28 Nov 2017 15:31:21 -0800 Subject: [PATCH 0905/1801] Fixing the windows nightly build. PiperOrigin-RevId: 177228112 --- tensorflow/contrib/cmake/tf_core_cpu.cmake | 2 +- tensorflow/contrib/cmake/tf_core_framework.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/cmake/tf_core_cpu.cmake b/tensorflow/contrib/cmake/tf_core_cpu.cmake index 5c01ca382f..e4213ea2a4 100644 --- a/tensorflow/contrib/cmake/tf_core_cpu.cmake +++ b/tensorflow/contrib/cmake/tf_core_cpu.cmake @@ -63,7 +63,7 @@ if (tensorflow_ENABLE_GPU) file(GLOB_RECURSE tf_core_gpu_srcs "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/*.cc" "${tensorflow_source_dir}/tensorflow/core/platform/default/gpu/cupti_wrapper.cc" - "${tensorflow_source_dir}/tensorflow/core/platform/default/gpu_tracer.cc" + "${tensorflow_source_dir}/tensorflow/core/platform/default/device_tracer.cc" "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu_device_factory.cc" "${tensorflow_source_dir}/tensorflow/core/grappler/devices.h" "${tensorflow_source_dir}/tensorflow/core/grappler/devices.cc" diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake index c607546f4a..5ec1a8d04f 100644 --- a/tensorflow/contrib/cmake/tf_core_framework.cmake +++ b/tensorflow/contrib/cmake/tf_core_framework.cmake @@ -211,7 +211,7 @@ if (NOT tensorflow_ENABLE_GPU) list(REMOVE_ITEM tf_core_platform_srcs ${tf_core_platform_gpu_srcs}) else() file(GLOB tf_core_platform_srcs_exclude - "${tensorflow_source_dir}/tensorflow/core/platform/default/gpu_tracer.cc") + "${tensorflow_source_dir}/tensorflow/core/platform/default/device_tracer.cc") list(REMOVE_ITEM tf_core_platform_srcs ${tf_core_platform_srcs_exclude}) endif() -- GitLab From 5f1b61b5c851409c76015c908d127fbc2f886013 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Tue, 28 Nov 2017 15:37:52 -0800 Subject: [PATCH 0906/1801] Check per HLO instruction only at vlog=1 in non-opt build. PiperOrigin-RevId: 177229069 --- tensorflow/compiler/xla/service/hlo_rematerialization.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc index 017f996bc4..d09de7b528 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc @@ -566,7 +566,9 @@ Status MemoryUsageTracker::BeginInstruction(Item* item) { VLOG(3) << " memory usage = " << memory_usage_; VLOG(10) << ToString(); - DCHECK(Check()); + if (VLOG_IS_ON(1)) { + DCHECK(Check()); + } return Status::OK(); } @@ -603,8 +605,9 @@ Status MemoryUsageTracker::EndInstruction() { VLOG(3) << " memory usage = " << memory_usage_; VLOG(10) << ToString(); - DCHECK(Check()); - + if (VLOG_IS_ON(1)) { + DCHECK(Check()); + } return Status::OK(); } -- GitLab From b8969d12f9260a7b1981b8d22788aa1f8c8cbbb6 Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Tue, 28 Nov 2017 15:44:48 -0800 Subject: [PATCH 0907/1801] Mark Supervisor deprecated. Please use MonitoredTrainingSession instead. Fixes #6263. PiperOrigin-RevId: 177230053 --- tensorflow/python/training/monitored_session.py | 1 - tensorflow/python/training/supervisor.py | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py index e931555470..f1cb81981a 100644 --- a/tensorflow/python/training/monitored_session.py +++ b/tensorflow/python/training/monitored_session.py @@ -52,7 +52,6 @@ _PREEMPTION_ERRORS = (errors.AbortedError, errors.UnavailableError) USE_DEFAULT = object() -# TODO(touts): Share that with the Supervisor. class Scaffold(object): """Structure to create or gather pieces commonly needed to train a model. diff --git a/tensorflow/python/training/supervisor.py b/tensorflow/python/training/supervisor.py index a634a842b6..e4514aaea2 100644 --- a/tensorflow/python/training/supervisor.py +++ b/tensorflow/python/training/supervisor.py @@ -36,11 +36,15 @@ from tensorflow.python.training import coordinator from tensorflow.python.training import saver as saver_mod from tensorflow.python.training import session_manager as session_manager_mod from tensorflow.python.training import training_util +from tensorflow.python.util import deprecation class Supervisor(object): """A training helper that checkpoints models and computes summaries. + This class is deprecated. Please use + ${tf.train.MonitoredTrainingSession} instead. + The Supervisor is a small wrapper around a `Coordinator`, a `Saver`, and a `SessionManager` that takes care of common needs of TensorFlow training programs. @@ -198,6 +202,8 @@ class Supervisor(object): # the default behavior should be used. USE_DEFAULT = 0 + @deprecation.deprecated(None, + "Please switch to tf.train.MonitoredTrainingSession") def __init__(self, graph=None, ready_op=USE_DEFAULT, -- GitLab From 5a1e22b753225a7fa14f4ae60c06cf50bce6b9a6 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Tue, 28 Nov 2017 15:45:09 -0800 Subject: [PATCH 0908/1801] Remove temp_workaround_http_archive. PiperOrigin-RevId: 177230105 --- tensorflow/workspace.bzl | 51 +++++----------------------------- third_party/aws.BUILD | 16 +++++------ third_party/curl.BUILD | 46 +++++++++++++++--------------- third_party/gif.BUILD | 2 +- third_party/jemalloc.BUILD | 10 +++---- third_party/jpeg/jpeg.BUILD | 2 +- third_party/mkl/build_defs.bzl | 1 - third_party/nccl.BUILD | 8 +++--- third_party/snappy.BUILD | 4 +-- 9 files changed, 51 insertions(+), 89 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index cb77f96be5..68d663acfc 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -57,33 +57,6 @@ def check_version(bazel_version): fail("\nCurrent Bazel version is {}, expected at least {}\n".format( native.bazel_version, bazel_version)) -def _repos_are_siblings(): - return Label("@foo//bar").workspace_root.startswith("../") - -# Temporary workaround to support including TensorFlow as a submodule until this -# use-case is supported in the next Bazel release. -def _temp_workaround_http_archive_impl(repo_ctx): - repo_ctx.template("BUILD", repo_ctx.attr.build_file, { - "%prefix%": ".." if _repos_are_siblings() else "external", - "%ws%": repo_ctx.attr.repository - }, False) - repo_ctx.download_and_extract(repo_ctx.attr.urls, "", repo_ctx.attr.sha256, - "", repo_ctx.attr.strip_prefix) - if repo_ctx.attr.patch_file != None: - _apply_patch(repo_ctx, repo_ctx.attr.patch_file) - -temp_workaround_http_archive = repository_rule( - attrs = { - "build_file": attr.label(), - "repository": attr.string(), - "patch_file": attr.label(default = None), - "urls": attr.string_list(default = []), - "sha256": attr.string(default = ""), - "strip_prefix": attr.string(default = ""), - }, - implementation = _temp_workaround_http_archive_impl, -) - # Executes specified command with arguments and calls 'fail' if it exited with # non-zero code def _execute_and_check_ret_code(repo_ctx, cmd_and_args): @@ -121,8 +94,6 @@ def _patched_http_archive_impl(repo_ctx): patched_http_archive = repository_rule( attrs = { "patch_file": attr.label(), - "build_file": attr.label(), - "repository": attr.string(), "urls": attr.string_list(default = []), "sha256": attr.string(default = ""), "strip_prefix": attr.string(default = ""), @@ -157,7 +128,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""): sha256 = "57ba56c4c243f403ff78f417ff854ef50b9eddf4a610a917b7c95e7fa8553a4b", strip_prefix = "mklml_lnx_2018.0.20170720", build_file = str(Label("//third_party/mkl:mkl.BUILD")), - repository = tf_repo_name, ) if path_prefix: @@ -292,7 +262,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): build_file = str(Label("//third_party:nasm.BUILD")), ) - temp_workaround_http_archive( + native.new_http_archive( name = "jpeg", urls = [ "https://mirror.bazel.build/github.com/libjpeg-turbo/libjpeg-turbo/archive/1.5.1.tar.gz", @@ -301,7 +271,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""): sha256 = "c15a9607892113946379ccea3ca8b85018301b200754f209453ab21674268e77", strip_prefix = "libjpeg-turbo-1.5.1", build_file = str(Label("//third_party/jpeg:jpeg.BUILD")), - repository = tf_repo_name, ) native.new_http_archive( @@ -502,7 +471,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): build_file = str(Label("//third_party:swig.BUILD")), ) - temp_workaround_http_archive( + native.new_http_archive( name = "curl", sha256 = "ff3e80c1ca6a068428726cd7dd19037a47cc538ce58ef61c59587191039b2ca6", urls = [ @@ -511,7 +480,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], strip_prefix = "curl-7.49.1", build_file = str(Label("//third_party:curl.BUILD")), - repository = tf_repo_name ) # grpc expects //external:protobuf_clib and //external:protobuf_compiler @@ -575,7 +543,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): # TODO(phawkins): currently, this rule uses an unofficial LLVM mirror. # Switch to an official source of snapshots if/when possible. - temp_workaround_http_archive( + native.new_http_archive( name = "llvm", urls = [ "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/9ab4c272cb604a7f947865428c4ef2169fee2100.tar.gz", @@ -584,7 +552,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""): sha256 = "1b1b7d3800a94ca2302e3dd670dbe84238749583027883784b55297059d83da8", strip_prefix = "llvm-9ab4c272cb604a7f947865428c4ef2169fee2100", build_file = str(Label("//third_party/llvm:llvm.BUILD")), - repository = tf_repo_name, ) native.new_http_archive( @@ -650,7 +617,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): build_file = str(Label("//third_party/fft2d:fft2d.BUILD")), ) - temp_workaround_http_archive( + native.new_http_archive( name = "snappy", urls = [ "https://mirror.bazel.build/github.com/google/snappy/archive/1.1.4.tar.gz", @@ -659,10 +626,9 @@ def tf_workspace(path_prefix="", tf_repo_name=""): sha256 = "2f7504c73d85bac842e893340333be8cb8561710642fc9562fccdd9d2c3fcc94", strip_prefix = "snappy-1.1.4", build_file = str(Label("//third_party:snappy.BUILD")), - repository = tf_repo_name, ) - temp_workaround_http_archive( + native.new_http_archive( name = "nccl_archive", urls = [ "https://mirror.bazel.build/github.com/nvidia/nccl/archive/03d856977ecbaac87e598c0c4bafca96761b9ac7.tar.gz", @@ -671,10 +637,9 @@ def tf_workspace(path_prefix="", tf_repo_name=""): sha256 = "2ca86fb6179ecbff789cc67c836139c1bbc0324ed8c04643405a30bf26325176", strip_prefix = "nccl-03d856977ecbaac87e598c0c4bafca96761b9ac7", build_file = str(Label("//third_party:nccl.BUILD")), - repository = tf_repo_name, ) - temp_workaround_http_archive( + native.new_http_archive( name = "aws", urls = [ "https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.0.90.tar.gz", @@ -683,7 +648,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""): sha256 = "f599b57aec4f03ad696044dd430b2d201864113937353adc346f53ad47991319", strip_prefix = "aws-sdk-cpp-1.0.90", build_file = str(Label("//third_party:aws.BUILD")), - repository = tf_repo_name ) java_import_external( @@ -711,7 +675,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): testonly_ = True, ) - temp_workaround_http_archive( + native.new_http_archive( name = "jemalloc", urls = [ "https://mirror.bazel.build/github.com/jemalloc/jemalloc/archive/4.4.0.tar.gz", @@ -720,7 +684,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""): sha256 = "3c8f25c02e806c3ce0ab5fb7da1817f89fc9732709024e2a81b6b82f7cc792a8", strip_prefix = "jemalloc-4.4.0", build_file = str(Label("//third_party:jemalloc.BUILD")), - repository = tf_repo_name, ) java_import_external( diff --git a/third_party/aws.BUILD b/third_party/aws.BUILD index bc9e37ffb3..bf5310aa16 100644 --- a/third_party/aws.BUILD +++ b/third_party/aws.BUILD @@ -7,21 +7,21 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -load("@%ws%//third_party:common.bzl", "template_rule") +load("@org_tensorflow//third_party:common.bzl", "template_rule") cc_library( name = "aws", srcs = select({ - "@%ws%//tensorflow:linux_x86_64": glob([ + "@org_tensorflow//tensorflow:linux_x86_64": glob([ "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp", ]), - "@%ws%//tensorflow:darwin": glob([ + "@org_tensorflow//tensorflow:darwin": glob([ "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp", ]), - "@%ws%//tensorflow:linux_ppc64le": glob([ + "@org_tensorflow//tensorflow:linux_ppc64le": glob([ "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp", ]), - "@%ws%//tensorflow:raspberry_pi_armeabi": glob([ + "@org_tensorflow//tensorflow:raspberry_pi_armeabi": glob([ "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp", ]), "//conditions:default": [], @@ -53,17 +53,17 @@ cc_library( "aws-cpp-sdk-core/include/aws/core/SDKConfig.h", ], defines = select({ - "@%ws%//tensorflow:linux_x86_64": [ + "@org_tensorflow//tensorflow:linux_x86_64": [ "PLATFORM_LINUX", "ENABLE_CURL_CLIENT", "ENABLE_NO_ENCRYPTION", ], - "@%ws%//tensorflow:darwin": [ + "@org_tensorflow//tensorflow:darwin": [ "PLATFORM_APPLE", "ENABLE_CURL_CLIENT", "ENABLE_NO_ENCRYPTION", ], - "@%ws%//tensorflow:linux_ppc64le": [ + "@org_tensorflow//tensorflow:linux_ppc64le": [ "PLATFORM_LINUX", "ENABLE_CURL_CLIENT", "ENABLE_NO_ENCRYPTION", diff --git a/third_party/curl.BUILD b/third_party/curl.BUILD index 805a30d262..e311c7e758 100644 --- a/third_party/curl.BUILD +++ b/third_party/curl.BUILD @@ -6,7 +6,7 @@ licenses(["notice"]) # MIT/X derivative license exports_files(["COPYING"]) CURL_WIN_COPTS = [ - "/I%prefix%/curl/lib", + "/Iexternal/curl/lib", "/DHAVE_CONFIG_H", "/DCURL_DISABLE_FTP", "/DCURL_DISABLE_NTLM", @@ -224,14 +224,14 @@ cc_library( "lib/wildcard.h", "lib/x509asn1.h", ] + select({ - "@%ws%//tensorflow:darwin": [ + "@org_tensorflow//tensorflow:darwin": [ "lib/vtls/darwinssl.c", ], - "@%ws%//tensorflow:ios": [ + "@org_tensorflow//tensorflow:ios": [ "lib/vtls/darwinssl.c", ], - "@%ws%//tensorflow:windows": CURL_WIN_SRCS, - "@%ws%//tensorflow:windows_msvc": CURL_WIN_SRCS, + "@org_tensorflow//tensorflow:windows": CURL_WIN_SRCS, + "@org_tensorflow//tensorflow:windows_msvc": CURL_WIN_SRCS, "//conditions:default": [ "lib/vtls/openssl.c", ], @@ -248,10 +248,10 @@ cc_library( "include/curl/typecheck-gcc.h", ], copts = select({ - "@%ws%//tensorflow:windows": CURL_WIN_COPTS, - "@%ws%//tensorflow:windows_msvc": CURL_WIN_COPTS, + "@org_tensorflow//tensorflow:windows": CURL_WIN_COPTS, + "@org_tensorflow//tensorflow:windows_msvc": CURL_WIN_COPTS, "//conditions:default": [ - "-I%prefix%/curl/lib", + "-Iexternal/curl/lib", "-D_GNU_SOURCE", "-DHAVE_CONFIG_H", "-DCURL_DISABLE_FTP", @@ -261,14 +261,14 @@ cc_library( "-Wno-string-plus-int", ], }) + select({ - "@%ws%//tensorflow:darwin": [ + "@org_tensorflow//tensorflow:darwin": [ "-fno-constant-cfstrings", ], - "@%ws%//tensorflow:windows": [ + "@org_tensorflow//tensorflow:windows": [ # See curl.h for discussion of write size and Windows "/DCURL_MAX_WRITE_SIZE=16384", ], - "@%ws%//tensorflow:windows_msvc": [ + "@org_tensorflow//tensorflow:windows_msvc": [ # See curl.h for discussion of write size and Windows "/DCURL_MAX_WRITE_SIZE=16384", ], @@ -278,20 +278,20 @@ cc_library( }), includes = ["include"], linkopts = select({ - "@%ws%//tensorflow:android": [ + "@org_tensorflow//tensorflow:android": [ "-pie", ], - "@%ws%//tensorflow:darwin": [ + "@org_tensorflow//tensorflow:darwin": [ "-Wl,-framework", "-Wl,CoreFoundation", "-Wl,-framework", "-Wl,Security", ], - "@%ws%//tensorflow:ios": [], - "@%ws%//tensorflow:windows": [ + "@org_tensorflow//tensorflow:ios": [], + "@org_tensorflow//tensorflow:windows": [ "-Wl,ws2_32.lib", ], - "@%ws%//tensorflow:windows_msvc": [ + "@org_tensorflow//tensorflow:windows_msvc": [ "-Wl,ws2_32.lib", ], "//conditions:default": [ @@ -302,9 +302,9 @@ cc_library( deps = [ "@zlib_archive//:zlib", ] + select({ - "@%ws%//tensorflow:ios": [], - "@%ws%//tensorflow:windows": [], - "@%ws%//tensorflow:windows_msvc": [], + "@org_tensorflow//tensorflow:ios": [], + "@org_tensorflow//tensorflow:windows": [], + "@org_tensorflow//tensorflow:windows_msvc": [], "//conditions:default": [ "@boringssl//:ssl", ], @@ -312,7 +312,7 @@ cc_library( ) CURL_BIN_WIN_COPTS = [ - "/I%prefix%/curl/lib", + "/Iexternal/curl/lib", "/DHAVE_CONFIG_H", "/DCURL_DISABLE_LIBCURL_OPTION", ] @@ -406,10 +406,10 @@ cc_binary( "src/tool_xattr.h", ], copts = select({ - "@%ws%//tensorflow:windows": CURL_BIN_WIN_COPTS, - "@%ws%//tensorflow:windows_msvc": CURL_BIN_WIN_COPTS, + "@org_tensorflow//tensorflow:windows": CURL_BIN_WIN_COPTS, + "@org_tensorflow//tensorflow:windows_msvc": CURL_BIN_WIN_COPTS, "//conditions:default": [ - "-I%prefix%/curl/lib", + "-Iexternal/curl/lib", "-D_GNU_SOURCE", "-DHAVE_CONFIG_H", "-DCURL_DISABLE_LIBCURL_OPTION", diff --git a/third_party/gif.BUILD b/third_party/gif.BUILD index 27808a9d64..78fbd6c0e0 100644 --- a/third_party/gif.BUILD +++ b/third_party/gif.BUILD @@ -21,7 +21,7 @@ cc_library( ], hdrs = ["lib/gif_lib.h"], defines = select({ - #"@%ws%//tensorflow:android": [ + #"@org_tensorflow//tensorflow:android": [ ":android": [ "S_IREAD=S_IRUSR", "S_IWRITE=S_IWUSR", diff --git a/third_party/jemalloc.BUILD b/third_party/jemalloc.BUILD index a2addf2c66..1b0829b8fe 100644 --- a/third_party/jemalloc.BUILD +++ b/third_party/jemalloc.BUILD @@ -5,7 +5,7 @@ licenses(["notice"]) # BSD exports_files(["COPYING"]) -load("@%ws%//third_party:common.bzl", "template_rule") +load("@org_tensorflow//third_party:common.bzl", "template_rule") cc_library( name = "jemalloc_headers", @@ -97,10 +97,10 @@ cc_library( includes = ["include"], # pthread_atfork() is called for PPC. linkopts = select({ - "@%ws%//tensorflow:linux_ppc64le": [ + "@org_tensorflow//tensorflow:linux_ppc64le": [ "-lpthread", ], - "@%ws%//tensorflow:linux_x86_64": [ + "@org_tensorflow//tensorflow:linux_x86_64": [ "-lpthread", ], "//conditions:default": [ @@ -208,8 +208,8 @@ genrule( name = "size_classes_h", outs = ["include/jemalloc/internal/size_classes.h"], cmd = select({ - "@%ws%//tensorflow:linux_ppc64le": "$(location :size_classes_sh) \"3 4\" 3 16 2 >$@", - "@%ws%//tensorflow:linux_x86_64": "$(location :size_classes_sh) \"3 4\" 3 12 2 >$@", + "@org_tensorflow//tensorflow:linux_ppc64le": "$(location :size_classes_sh) \"3 4\" 3 16 2 >$@", + "@org_tensorflow//tensorflow:linux_x86_64": "$(location :size_classes_sh) \"3 4\" 3 12 2 >$@", "//conditions:default": "$(location :size_classes_sh) \"3 4\" 3 12 2 >$@", }), tools = [":size_classes_sh"], diff --git a/third_party/jpeg/jpeg.BUILD b/third_party/jpeg/jpeg.BUILD index f6078052ec..e431f19382 100644 --- a/third_party/jpeg/jpeg.BUILD +++ b/third_party/jpeg/jpeg.BUILD @@ -5,7 +5,7 @@ licenses(["notice"]) # custom notice-style license, see LICENSE.md exports_files(["LICENSE.md"]) -load("@%ws%//third_party:common.bzl", "template_rule") +load("@org_tensorflow//third_party:common.bzl", "template_rule") libjpegturbo_nocopts = "-[W]error" diff --git a/third_party/mkl/build_defs.bzl b/third_party/mkl/build_defs.bzl index 533c0766c7..f637873f14 100644 --- a/third_party/mkl/build_defs.bzl +++ b/third_party/mkl/build_defs.bzl @@ -60,7 +60,6 @@ mkl_repository = repository_rule( ], attrs = { "build_file": attr.label(), - "repository": attr.string(), "urls": attr.string_list(default = []), "sha256": attr.string(default = ""), "strip_prefix": attr.string(default = ""), diff --git a/third_party/nccl.BUILD b/third_party/nccl.BUILD index 06b9b8ff68..3a2a3afe46 100644 --- a/third_party/nccl.BUILD +++ b/third_party/nccl.BUILD @@ -44,17 +44,17 @@ cc_library( "-O3", ] + cuda_default_copts(), linkopts = select({ - "@%ws%//tensorflow:android": [ + "@org_tensorflow//tensorflow:android": [ "-pie", ], - "@%ws%//tensorflow:darwin": [ + "@org_tensorflow//tensorflow:darwin": [ "-Wl,-framework", "-Wl,CoreFoundation", "-Wl,-framework", "-Wl,Security", ], - "@%ws%//tensorflow:ios": [], - "@%ws%//tensorflow:windows": [ + "@org_tensorflow//tensorflow:ios": [], + "@org_tensorflow//tensorflow:windows": [ "ws2_32.lib", ], "//conditions:default": [ diff --git a/third_party/snappy.BUILD b/third_party/snappy.BUILD index 9c00b7068a..fd48ed8941 100644 --- a/third_party/snappy.BUILD +++ b/third_party/snappy.BUILD @@ -50,8 +50,8 @@ genrule( "-e 's/@ac_cv_have_stddef_h@/1/g' " + "-e 's/@ac_cv_have_stdint_h@/1/g' " + select({ - "@%ws%//tensorflow:windows": "-e 's/@ac_cv_have_sys_uio_h@/0/g' ", - "@%ws%//tensorflow:windows_msvc": "-e 's/@ac_cv_have_sys_uio_h@/0/g' ", + "@org_tensorflow//tensorflow:windows": "-e 's/@ac_cv_have_sys_uio_h@/0/g' ", + "@org_tensorflow//tensorflow:windows_msvc": "-e 's/@ac_cv_have_sys_uio_h@/0/g' ", "//conditions:default": "-e 's/@ac_cv_have_sys_uio_h@/1/g' ", }) + "-e 's/@SNAPPY_MAJOR@/1/g' " + -- GitLab From 9049b440df17de47baf16d9e24590c3d0761e2c9 Mon Sep 17 00:00:00 2001 From: Andrew Harp Date: Tue, 28 Nov 2017 16:06:37 -0800 Subject: [PATCH 0909/1801] Fix tensorflow-android jcenter link PiperOrigin-RevId: 177233056 --- tensorflow/contrib/android/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/android/README.md b/tensorflow/contrib/android/README.md index f49e5857fe..c7c128bf14 100644 --- a/tensorflow/contrib/android/README.md +++ b/tensorflow/contrib/android/README.md @@ -15,9 +15,9 @@ For prebuilt libraries, see the page for a recent build. The TensorFlow Inference Interface is also available as a -[JCenter package](https://bintray.com/google/tensorflow/tensorflow-android) and -can be included quite simply in your android project with a couple of lines in -the project's `build.gradle` file: +[JCenter package](https://bintray.com/google/tensorflow/tensorflow) +(see the tensorflow-android directory) and can be included quite simply in your +android project with a couple of lines in the project's `build.gradle` file: ``` allprojects { -- GitLab From a80fd2acf08ceba0c8fc7684c3013e8e7d6bd8d3 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Tue, 28 Nov 2017 16:11:48 -0800 Subject: [PATCH 0910/1801] C API: fix bug in ValidateNoCycles(). This change makes ValidateNoCycles() work when the graph has unused node ids (i.e. when Graph::num_nodes() < Graph::num_node_ids()). PiperOrigin-RevId: 177234002 --- tensorflow/c/c_api.cc | 9 ++++----- tensorflow/python/framework/ops_test.py | 8 ++++++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index bb41f92306..4fb8ec8e4b 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -383,12 +383,11 @@ void TF_Reset_Helper(const TF_SessionOptions* opt, const char** containers, // be less than the total node count. Status ValidateNoCycles(const Graph& g) { // TODO(nolivia): check this on a subset of the graph instead of all of it. - int total_num_nodes = g.num_node_ids(); // A node is ready when all of its inputs have been visited. std::vector ready; - std::vector pending_count(total_num_nodes, 0); + std::vector pending_count(g.num_node_ids(), 0); - for (int i = 0; i < total_num_nodes; ++i) { + for (int i = 0; i < g.num_node_ids(); ++i) { const Node* n = g.FindNodeId(i); if (n == nullptr) continue; pending_count[i] = n->in_edges().size(); @@ -421,7 +420,7 @@ Status ValidateNoCycles(const Graph& g) { } } - if (processed < total_num_nodes) { + if (processed < g.num_nodes()) { std::vector nodes_in_cycle; for (int i = 0; i < pending_count.size() && nodes_in_cycle.size() < 3; ++i) { @@ -430,7 +429,7 @@ Status ValidateNoCycles(const Graph& g) { } } return errors::InvalidArgument( - "Graph is invalid, contains a cycle with ", total_num_nodes - processed, + "Graph is invalid, contains a cycle with ", g.num_nodes() - processed, " nodes, including: ", str_util::Join(nodes_in_cycle, ", ")); } return Status::OK(); diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 371eadcd13..3eae3b5a25 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -1876,6 +1876,14 @@ class GraphTest(test_util.TensorFlowTestCase): gc.collect() self.assertIsNone(g_ref()) + def testRunnableAfterInvalidShape(self): + with ops.Graph().as_default(): + with self.assertRaises(ValueError): + math_ops.add([1, 2], [1, 2, 3]) + a = constant_op.constant(1) + with session.Session() as sess: + sess.run(a) + @test_util.with_c_api class AttrScopeTest(test_util.TensorFlowTestCase): -- GitLab From f22261e61c2359483ad17465161918856bb86e65 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Nov 2017 16:22:17 -0800 Subject: [PATCH 0911/1801] Add depthwise ops for NAS cell in nn_ops_test to improve the inference time on the particular depthwise ops. PiperOrigin-RevId: 177235744 --- tensorflow/core/kernels/nn_ops_test.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/core/kernels/nn_ops_test.cc b/tensorflow/core/kernels/nn_ops_test.cc index 0db7c63b8b..a841291ddd 100644 --- a/tensorflow/core/kernels/nn_ops_test.cc +++ b/tensorflow/core/kernels/nn_ops_test.cc @@ -653,6 +653,8 @@ BM_ConvFloatDepthwiseFwd(32, 7, 7, 1024, 1, 1024, 3, 3, 1, SAME, conv6); // Benchmarks with different stride and padding options. BM_ConvFloatDepthwiseFwd(32, 112, 112, 3, 8, 24, 3, 3, 2, SAME, conv7); BM_ConvFloatDepthwiseFwd(32, 112, 112, 3, 8, 24, 3, 3, 2, VALID, conv8); +BM_ConvFloatDepthwiseFwd(1, 100, 100, 72, 1, 72, 3, 3, 1, SAME, conv9); +BM_ConvFloatDepthwiseFwd(1, 100, 100, 72, 1, 72, 5, 5, 1, SAME, conv10); #define BM_ConvFloatDepthwiseBk(BS, R, C, ID, DM, OD, KR, KC, STR, PAD, LABEL) \ static void BM_ConvFloatDepthwiseBkInCPU1_##LABEL(int iters) { \ -- GitLab From b89251c6300b9941d06071543e5c4974d0db1984 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Tue, 28 Nov 2017 16:31:39 -0800 Subject: [PATCH 0912/1801] [TF:XLA] Implement Cumsum and Cumprod using the XLA ReduceWindow operator. PiperOrigin-RevId: 177236996 --- tensorflow/compiler/tests/BUILD | 14 ++ tensorflow/compiler/tests/scan_ops_test.py | 229 ++++++++++++++++++ tensorflow/compiler/tf2xla/const_analysis.cc | 2 + tensorflow/compiler/tf2xla/kernels/BUILD | 1 + .../compiler/tf2xla/kernels/scan_ops.cc | 140 +++++++++++ tensorflow/compiler/tf2xla/xla_context.cc | 14 ++ tensorflow/compiler/tf2xla/xla_context.h | 8 + tensorflow/compiler/tf2xla/xla_op_kernel.cc | 5 + tensorflow/compiler/tf2xla/xla_op_kernel.h | 5 + 9 files changed, 418 insertions(+) create mode 100644 tensorflow/compiler/tests/scan_ops_test.py create mode 100644 tensorflow/compiler/tf2xla/kernels/scan_ops.cc diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 6cad2b0824..fff1a7f57b 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -416,6 +416,20 @@ tf_xla_py_test( ], ) +tf_xla_py_test( + name = "scan_ops_test", + size = "small", + srcs = ["scan_ops_test.py"], + tags = ["optonly"], + deps = [ + ":xla_test", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + ], +) + tf_xla_py_test( name = "segment_reduction_ops_test", size = "medium", diff --git a/tensorflow/compiler/tests/scan_ops_test.py b/tensorflow/compiler/tests/scan_ops_test.py new file mode 100644 index 0000000000..3260e63b23 --- /dev/null +++ b/tensorflow/compiler/tests/scan_ops_test.py @@ -0,0 +1,229 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional tests for scan ops.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.compiler.tests.xla_test import XLATestCase +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import errors_impl +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import test + + +def numpy_reverse(x, axis): + length = len(x.shape) + if axis < 0: + axis = length + axis + + ix = [ + slice(None, None, -1) if i == axis else slice(None) for i in range(length) + ] + return x[ix] + + +def handle_options(func, x, axis, exclusive, reverse): + """Adds tf options to numpy scan ops.""" + length = len(x.shape) + if axis < 0: + axis = length + axis + + if reverse: + x = numpy_reverse(x, axis) + + if exclusive: + ix_head = [slice(0, 1) if i == axis else slice(None) for i in range(length)] + ix_init = [ + slice(0, -1) if i == axis else slice(None) for i in range(length) + ] + if func == np.cumsum: + init = np.zeros_like(x[ix_head]) + elif func == np.cumprod: + init = np.ones_like(x[ix_head]) + else: + raise ValueError("Unknown scan function.") + x = np.concatenate([init, func(x[ix_init], axis)], axis=axis) + else: + x = func(x, axis=axis) + + if reverse: + x = numpy_reverse(x, axis) + return x + + +class CumsumTest(XLATestCase): + + valid_dtypes = [np.float32] + + def axis_dtypes(self): + return set(self.int_types).intersection([np.int32, np.int64]) + + def _compare(self, x, axis, exclusive, reverse): + np_out = handle_options(np.cumsum, x, axis, exclusive, reverse) + with self.test_session(), self.test_scope(): + p = array_ops.placeholder(x.dtype) + tf_out = math_ops.cumsum(p, axis, exclusive, reverse).eval( + feed_dict={p: x}) + + self.assertAllClose(np_out, tf_out) + + def _compareAll(self, x, axis): + for exclusive in [True, False]: + for reverse in [True, False]: + self._compare(x, axis, exclusive, reverse) + + def testEmpty(self): + for dtype in self.valid_dtypes: + x = np.zeros([0]).astype(dtype) + for axis in (-1, 0): + self._compareAll(x, axis) + + def testAxisType(self): + for dtype in self.valid_dtypes: + x = np.arange(1, 6).reshape([5]).astype(dtype) + for axis_dtype in self.axis_dtypes(): + with self.test_session(), self.test_scope(): + p = array_ops.placeholder(x.dtype) + axis = constant_op.constant(0, axis_dtype) + math_ops.cumsum(p, axis).eval(feed_dict={p: x}) + + def test1D(self): + for dtype in self.valid_dtypes: + x = np.arange(1, 6).reshape([5]).astype(dtype) + for axis in (-1, 0): + self._compareAll(x, axis) + + def test2D(self): + for dtype in self.valid_dtypes: + x = np.arange(0, 10).reshape([2, 5]).astype(dtype) + for axis in (-2, -1, 0, 1): + self._compareAll(x, axis) + + def test3D(self): + for dtype in self.valid_dtypes: + x = np.arange(0, 20).reshape([2, 2, 5]).astype(dtype) + for axis in (-3, -2, -1, 0, 1, 2): + self._compareAll(x, axis) + + def test6D(self): + for dtype in self.valid_dtypes: + x = np.arange(1, 145).reshape([2, 2, 3, 3, 2, 2]).astype(dtype) + for axis in range(-6, 6, 3): + self._compareAll(x, axis) + + def testInvalidAxis(self): + x = np.arange(0, 10).reshape([2, 5]).astype(np.float32) + with self.test_session(), self.test_scope(): + input_tensor = ops.convert_to_tensor(x) + with self.assertRaisesWithPredicateMatch( + errors_impl.InvalidArgumentError, + lambda e: "Expected scan axis in the range [-2, 2)" in str(e)): + math_ops.cumsum(input_tensor, -3).eval() + with self.assertRaisesWithPredicateMatch( + errors_impl.InvalidArgumentError, + lambda e: "Expected scan axis in the range [-2, 2)" in str(e)): + math_ops.cumsum(input_tensor, 2).eval() + with self.assertRaisesWithPredicateMatch( + errors_impl.InvalidArgumentError, + lambda e: "axis must be a scalar" in str(e)): + math_ops.cumsum(input_tensor, [0]).eval() + + +class CumprodTest(XLATestCase): + + valid_dtypes = [np.float32] + + def axis_dtypes(self): + return set(self.int_types).intersection([np.int32, np.int64]) + + def _compare(self, x, axis, exclusive, reverse): + np_out = handle_options(np.cumprod, x, axis, exclusive, reverse) + with self.test_session(), self.test_scope(): + p = array_ops.placeholder(x.dtype) + prod = math_ops.cumprod(p, axis, exclusive, reverse) + tf_out = prod.eval(feed_dict={p: x}) + + self.assertAllClose(np_out, tf_out) + + def _compareAll(self, x, axis): + for exclusive in [True, False]: + for reverse in [True, False]: + self._compare(x, axis, exclusive, reverse) + + def testEmpty(self): + for dtype in self.valid_dtypes: + x = np.zeros([0]).astype(dtype) + for axis in (-1, 0): + self._compareAll(x, axis) + + def testAxisType(self): + for dtype in self.valid_dtypes: + x = np.arange(1, 6).reshape([5]).astype(dtype) + for axis_dtype in self.axis_dtypes(): + with self.test_session(), self.test_scope(): + p = array_ops.placeholder(x.dtype) + axis = constant_op.constant(0, axis_dtype) + math_ops.cumprod(x, axis).eval(feed_dict={p: x}) + + def test1D(self): + for dtype in self.valid_dtypes: + x = np.arange(1, 6).reshape([5]).astype(dtype) + for axis in (-1, 0): + self._compareAll(x, axis) + + def test2D(self): + for dtype in self.valid_dtypes: + x = np.arange(1, 11).reshape([2, 5]).astype(dtype) + for axis in (-2, -1, 0, 1): + self._compareAll(x, axis) + + def test3D(self): + for dtype in self.valid_dtypes: + x = np.arange(1, 21).reshape([2, 2, 5]).astype(dtype) + for axis in (-3, -2, -1, 0, 1, 2): + self._compareAll(x, axis) + + def test6D(self): + for dtype in self.valid_dtypes: + x = np.arange(1, 145).reshape([2, 2, 3, 3, 2, 2]).astype(dtype) + for axis in range(-6, 6, 3): + self._compareAll(x, axis) + + def testInvalidAxis(self): + x = np.arange(0, 10).reshape([2, 5]).astype(np.float32) + with self.test_session(), self.test_scope(): + input_tensor = ops.convert_to_tensor(x) + with self.assertRaisesWithPredicateMatch( + errors_impl.InvalidArgumentError, + lambda e: "Expected scan axis in the range [-2, 2)" in str(e)): + math_ops.cumprod(input_tensor, -3).eval() + with self.assertRaisesWithPredicateMatch( + errors_impl.InvalidArgumentError, + lambda e: "Expected scan axis in the range [-2, 2)" in str(e)): + math_ops.cumprod(input_tensor, 2).eval() + with self.assertRaisesWithPredicateMatch( + errors_impl.InvalidArgumentError, + lambda e: "axis must be a scalar" in str(e)): + math_ops.cumprod(input_tensor, [0]).eval() + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/compiler/tf2xla/const_analysis.cc b/tensorflow/compiler/tf2xla/const_analysis.cc index d57273d844..6a1a5467e0 100644 --- a/tensorflow/compiler/tf2xla/const_analysis.cc +++ b/tensorflow/compiler/tf2xla/const_analysis.cc @@ -52,6 +52,8 @@ Status BackwardsConstAnalysis(const Graph& g, {"Conv2DBackpropInput", "input_sizes"}, {"Conv3DBackpropFilterV2", "filter_sizes"}, {"Conv3DBackpropInputV2", "input_sizes"}, + {"Cumprod", "axis"}, + {"Cumsum", "axis"}, {"DepthwiseConv2dNativeBackpropFilter", "filter_sizes"}, {"DepthwiseConv2dNativeBackpropInput", "input_sizes"}, {"DynamicStitch", "indices"}, diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index 6302fece1f..a1720ff919 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -54,6 +54,7 @@ tf_kernel_library( "reshape_op.cc", "retval_op.cc", "reverse_op.cc", + "scan_ops.cc", "segment_reduction_ops.cc", "select_op.cc", "sendrecv_ops.cc", diff --git a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc new file mode 100644 index 0000000000..3cc9d14411 --- /dev/null +++ b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc @@ -0,0 +1,140 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/compiler/tf2xla/shape_util.h" +#include "tensorflow/compiler/tf2xla/type_util.h" +#include "tensorflow/compiler/tf2xla/xla_helpers.h" +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/bounds_check.h" +#include "tensorflow/core/kernels/concat_lib.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +namespace { + +class ScanOp : public XlaOpKernel { + public: + ScanOp(OpKernelConstruction* ctx, bool sum) : XlaOpKernel(ctx), sum_(sum) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("reverse", &reverse_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("exclusive", &exclusive_)); + } + + void Compile(XlaOpKernelContext* ctx) override { + const TensorShape input_shape = ctx->InputShape(0); + const TensorShape tensor_axis_shape = ctx->InputShape(1); + + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(tensor_axis_shape), + errors::InvalidArgument("ScanOp: axis must be a scalar, not ", + tensor_axis_shape.DebugString())); + + int64 axis; + OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntScalar(1, &axis)); + if (axis < 0) { + axis += input_shape.dims(); + } + OP_REQUIRES( + ctx, FastBoundsCheck(axis, input_shape.dims()), + errors::InvalidArgument("ScanOp: Expected scan axis in the range [", + -input_shape.dims(), ", ", input_shape.dims(), + "), but got ", axis)); + + DataType dtype = ctx->input_type(0); + + if (input_shape.num_elements() == 0) { + // Exit early if there is nothing to compute. + ctx->SetOutput(0, ctx->Input(0)); + return; + } + + xla::ComputationBuilder* builder = ctx->builder(); + + std::vector window_strides(input_shape.dims(), 1); + std::vector window_dims(input_shape.dims(), 1); + window_dims[axis] = input_shape.dim_size(axis); + + std::vector> padding(input_shape.dims(), {0, 0}); + padding[axis].first = input_shape.dim_size(axis) - 1; + // In exclusive mode, add an extra padding element so there is a complete + // window of padding before the data starts. + if (exclusive_) { + ++padding[axis].first; + } + if (reverse_) { + std::swap(padding[axis].first, padding[axis].second); + } + + xla::ComputationDataHandle input = ctx->Input(0); + xla::ComputationDataHandle init; + const xla::Computation* reducer; + if (sum_) { + init = XlaHelpers::Zero(builder, dtype); + reducer = ctx->GetOrCreateAdd(dtype); + } else { + init = XlaHelpers::One(builder, dtype); + reducer = ctx->GetOrCreateMul(dtype); + } + auto output = builder->ReduceWindowWithGeneralPadding( + ctx->Input(0), init, *reducer, window_dims, window_strides, padding); + + // In exclusive mode, we have computed an extra element containing the sum + // of all the input elements. Slice off this extra "last" element. + if (exclusive_) { + if (reverse_) { + output = builder->SliceInDim(output, 1, input_shape.dim_size(axis) + 1, + 1, axis); + + } else { + output = + builder->SliceInDim(output, 0, input_shape.dim_size(axis), 1, axis); + } + } + ctx->SetOutput(0, output); + } + + private: + const bool sum_; // True=cumulative sum. False=cumulative product. + bool reverse_; + bool exclusive_; +}; + +class CumsumOp : public ScanOp { + public: + explicit CumsumOp(OpKernelConstruction* ctx) : ScanOp(ctx, /*sum=*/true) {} +}; +// TODO(phawkins): implement non-float windowed reductions in XLA and remove the +// type constraint. +REGISTER_XLA_OP(Name("Cumsum").TypeConstraint("T", DT_FLOAT), CumsumOp); + +class CumprodOp : public ScanOp { + public: + explicit CumprodOp(OpKernelConstruction* ctx) : ScanOp(ctx, /*sum=*/false) {} +}; +// TODO(phawkins): implement non-float windowed reductions in XLA and remove the +// type constraint. +REGISTER_XLA_OP(Name("Cumprod").TypeConstraint("T", DT_FLOAT), CumprodOp); + +} // anonymous namespace +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/xla_context.cc b/tensorflow/compiler/tf2xla/xla_context.cc index 651bafd6c5..78e770c62b 100644 --- a/tensorflow/compiler/tf2xla/xla_context.cc +++ b/tensorflow/compiler/tf2xla/xla_context.cc @@ -178,6 +178,20 @@ const xla::Computation* XlaContext::GetOrCreateAdd(const DataType type) { }); } +const xla::Computation* XlaContext::GetOrCreateMul(const DataType type) { + return LookupOrCreate(type, &mul_func_, [this, type] { + const string type_string = DataTypeString(type); + VLOG(1) << "Building Mul() for " << type_string; + xla::ComputationBuilder b(builder()->client(), "mul<" + type_string + ">"); + xla::PrimitiveType xla_type; + TF_CHECK_OK(DataTypeToPrimitiveType(type, &xla_type)); + auto x = b.Parameter(0, xla::ShapeUtil::MakeShape(xla_type, {}), "x"); + auto y = b.Parameter(1, xla::ShapeUtil::MakeShape(xla_type, {}), "y"); + b.Mul(x, y); + return b.Build().ConsumeValueOrDie(); + }); +} + const xla::Computation* XlaContext::LookupOrCreate( DataType type, ComputationMap* out, const std::function& create) { diff --git a/tensorflow/compiler/tf2xla/xla_context.h b/tensorflow/compiler/tf2xla/xla_context.h index de8aafa362..55d2995987 100644 --- a/tensorflow/compiler/tf2xla/xla_context.h +++ b/tensorflow/compiler/tf2xla/xla_context.h @@ -102,6 +102,11 @@ class XlaContext : public ResourceBase { // separate specialization of the computation for each DataType. const xla::Computation* GetOrCreateAdd(const DataType type); + // Get an XLA lambda to compute Mul. This is cached in the + // XlaContext since it may be used by multiple Ops. There is a + // separate specialization of the computation for each DataType. + const xla::Computation* GetOrCreateMul(const DataType type); + // The name of the XlaContext resource during symbolic graph execution. static const char kXlaContextResourceName[]; @@ -155,6 +160,9 @@ class XlaContext : public ResourceBase { // Cached computation to compute Sum of two elements, specialized by type. ComputationMap add_func_; + // Cached computation to compute Mul of two elements, specialized by type. + ComputationMap mul_func_; + // Cached computation to compute Sigmoid of an element, specialized by type. ComputationMap sigmoid_func_; diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc index a052bb105e..f16472cac8 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc @@ -417,6 +417,11 @@ const xla::Computation* XlaOpKernelContext::GetOrCreateAdd( return XlaContext::Get(context_).GetOrCreateAdd(type); } +const xla::Computation* XlaOpKernelContext::GetOrCreateMul( + const DataType type) { + return XlaContext::Get(context_).GetOrCreateMul(type); +} + XlaOpKernel::XlaOpKernel(OpKernelConstruction* context) : OpKernel(context) {} void XlaOpKernel::Compute(OpKernelContext* context) { diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.h b/tensorflow/compiler/tf2xla/xla_op_kernel.h index 76bcf594e6..06845a674e 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.h +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.h @@ -210,6 +210,11 @@ class XlaOpKernelContext { // separate specialization of the computation for each DataType. const xla::Computation* GetOrCreateAdd(const DataType type); + // Gets an XLA lambda to compute Mul. This is cached in the + // XlaContext since it may be used by multiple Ops. There is a + // separate specialization of the computation for each DataType. + const xla::Computation* GetOrCreateMul(const DataType type); + private: OpKernelContext* const context_; }; -- GitLab From f2f6356f15f4c8b5c560ee8aec7bf1dd097bfbfb Mon Sep 17 00:00:00 2001 From: Sourabh Bajaj Date: Tue, 28 Nov 2017 16:31:57 -0800 Subject: [PATCH 0913/1801] Automated g4 rollback of changelist 177191521 PiperOrigin-RevId: 177237037 --- tensorflow/core/kernels/strided_slice_op.cc | 1 + tensorflow/core/kernels/strided_slice_op_gpu.cu.cc | 1 + 2 files changed, 2 insertions(+) diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc index 8fc40db3cc..73b6d4cf6a 100644 --- a/tensorflow/core/kernels/strided_slice_op.cc +++ b/tensorflow/core/kernels/strided_slice_op.cc @@ -427,6 +427,7 @@ REGISTER_STRIDED_SLICE(bfloat16); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); +TF_CALL_int64(REGISTER_GPU); // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel diff --git a/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc b/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc index a8487f49f4..8ca27e3b92 100644 --- a/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc +++ b/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc @@ -53,6 +53,7 @@ typedef Eigen::GpuDevice GPUDevice; TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); TF_CALL_complex64(DEFINE_GPU_KERNELS); TF_CALL_complex128(DEFINE_GPU_KERNELS); +TF_CALL_int64(DEFINE_GPU_KERNELS); DEFINE_GPU_KERNELS(int32); #undef DEFINE_GPU_KERNELS -- GitLab From 625ae88377b16705378065576cfd6983bb876435 Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Tue, 28 Nov 2017 16:47:47 -0800 Subject: [PATCH 0914/1801] Round-robin variables across local devices with `replicate_model_fn`. When the user specifies `replicate_model_fn(... devices=[d1, d2, ... dN])` all variables are going to be stored on each device an round-robin fashion. They are still going to be shared by all devices. PiperOrigin-RevId: 177239111 --- tensorflow/contrib/estimator/BUILD | 2 +- .../python/estimator/replicate_model_fn.py | 94 +++++++++---- .../estimator/replicate_model_fn_test.py | 130 ++++++++++++++++-- 3 files changed, 188 insertions(+), 38 deletions(-) diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index 8395e2db5e..e4d51aa148 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -346,7 +346,7 @@ py_library( cuda_py_test( name = "replicate_model_fn_test", - size = "small", + size = "medium", srcs = ["python/estimator/replicate_model_fn_test.py"], additional_deps = [ "//tensorflow/python/estimator", diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py index d9c83aa865..6f7f37473f 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py @@ -42,10 +42,45 @@ from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.platform import tf_logging +from tensorflow.python.training import device_setter as device_setter_lib from tensorflow.python.training import training_util -def replicate_model_fn(model_fn, optimizer_fn, devices=None): +class Mode(object): + """Modes for variables replication used for forcing a particular mode.""" + + AUTO = 0 + """Use internal heuristics for choosing the best Mode value. + + This mode is supposed to be the most appropriate in most cases given what + is known about the system. + """ + # TODO(isaprykin): Query system configuration to choose modes other than + # `SHARED_LOCAL_PARAMETER_SERVER`, even though it is often appropriate. + + SHARED_LOCAL_PARAMETER_SERVER = 2 + """Variables are placed on a single device and shared across all devices. + + Two ways to achieve this replication over available GPUs are supported: + 1) If exactly 1 GPU is detected, then variables and operations are placed + onto GPU. + 2) If more than 1 GPU is detected, then variables are going to be placed on + the CPU. Replicas of operations are placed on each individual GPU. + """ + + SHARED_ROUND_ROBIN = 3 + """Variables are placed on all devices in a round-robin fashion. + + Every subsequent variable is placed on the next device. There is only one + copy of each variable that is shared across all devices. + """ + + # TODO(isaprykin): Implement `REPLICATED_ALL_REDUCE`. + REPLICATED_ALL_REDUCE = 3 + """Variables are mirrored on all devices.""" + + +def replicate_model_fn(model_fn, optimizer_fn, devices=None, mode=Mode.AUTO): """Replicate `Estimator.model_fn` over GPUs within a single host. The given `model_fn` specifies a single forward pass of a model. To replicate @@ -58,14 +93,11 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None): optimizer. If `devices` are `None`, then all available GPUs are going to be used for - replication. If no GPUs are available, then the model is going to be - placed on the CPU. + replication: `devices=[]`. If no GPUs are available, + then the model is going to be placed on the CPU: `devices=['/device:CPU:0']`. - Two modes of local replication over available GPUs are supported: - 1) If exactly 1 GPU is detected, then variables and operations are placed - onto GPU. - 2) If more than 1 GPU is detected, then variables are going to be placed on - the CPU. Replicas of operations are placed on each individual GPU. + Varibles are placed on to `devices` according to the given `mode`. Operations + are going for each tower are going to be copied on each device. Here is an example of how one might use their `model_fn` to run over GPUs: ```python @@ -127,6 +159,8 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None): argument can be used to replice only on the subset of available GPUs. If `None`, then all available GPUs are going to be used for replication. If no GPUs are available, then the model is going to be placed on the CPU. + mode: An optional argument that specifies the replication method used for + distributing variables across devices. Returns: A replicated version of the supplied `model_fn`. Returned function that @@ -137,16 +171,21 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None): devices = _get_local_devices('GPU') or _get_local_devices('CPU') is_a_single_gpu_case = len(devices) == 1 and 'GPU' in devices[0] - local_ps_device = '/{}:0'.format('GPU' if is_a_single_gpu_case else 'CPU') + consolidation_device = '/{}:0'.format('GPU' + if is_a_single_gpu_case else 'CPU') - tf_logging.info('Replicating the `model_fn` across {}. Local parameter ' - 'server device is going to be {}.'.format( - devices, local_ps_device)) + ps_devices = [consolidation_device] + if mode == Mode.SHARED_ROUND_ROBIN: + ps_devices = devices + + tf_logging.info('Replicating the `model_fn` across {}. Variables are going ' + 'to be placed on {}. Consolidation device is going to be {}.' + .format(devices, ps_devices, consolidation_device)) def replicated_model_fn(features, labels, mode, params=None, config=None): """Replicated version of `model_fn` to be used instead.""" feature_shards, label_shards = _split_batch( - features, labels, len(devices), device=local_ps_device) + features, labels, len(devices), device=consolidation_device) tower_specs = _get_loss_towers( model_fn=model_fn, mode=mode, @@ -155,17 +194,17 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None): params=params, config=config, devices=devices, - local_ps_device=local_ps_device) + local_ps_devices=ps_devices) if mode == model_fn_lib.ModeKeys.TRAIN: train_op = _minimize_towers(tower_specs, _call_optimizer_fn(optimizer_fn, params)) return _train_spec( - tower_specs, train_op, aggregation_device=local_ps_device) + tower_specs, train_op, aggregation_device=consolidation_device) elif mode == model_fn_lib.ModeKeys.EVAL: - return _eval_spec(tower_specs, aggregation_device=local_ps_device) + return _eval_spec(tower_specs, aggregation_device=consolidation_device) elif mode == model_fn_lib.ModeKeys.PREDICT: - return _predict_spec(tower_specs, aggregation_device=local_ps_device) + return _predict_spec(tower_specs, aggregation_device=consolidation_device) return replicated_model_fn @@ -222,7 +261,7 @@ def _get_loss_towers(model_fn, params, config, devices, - local_ps_device, + local_ps_devices, name_scope_pattern=_DEFAULT_NAME_SCOPE_PATTERN): """Replicate the loss computation across devices.""" tower_specs = [] @@ -234,15 +273,22 @@ def _get_loss_towers(model_fn, if 'config' in model_fn_args: optional_params['config'] = copy.deepcopy(config) + # pylint: disable=protected-access + round_robin_strategy = device_setter_lib._RoundRobinStrategy( + num_tasks=len(local_ps_devices)) + # pylint: enable=protected-access + for i, device in enumerate(devices): is_the_first_tower = (i == 0) device_setter = _local_device_setter( - worker_device=device, ps_device=local_ps_device) + worker_device=device, + ps_devices=local_ps_devices, + ps_strategy=round_robin_strategy) - # We would like to preserve the names of the variables and ops that a user - # might be relying on. Names with prefix are going to resolve to variables - # and ops of the first tower. + # We would like to preserve the names of the variables and ops that the user + # might be relying on. Names without a prefix are going to resolve to + # variables and ops of the first tower. name_scope = name_scope_pattern if is_the_first_tower: name_scope = '' @@ -263,7 +309,7 @@ def _get_loss_towers(model_fn, return tower_specs -def _local_device_setter(ps_device, worker_device): +def _local_device_setter(worker_device, ps_devices, ps_strategy): """A device setter that puts distributes Var/Ops to PS/workers.""" ps_ops = ['Variable', 'VariableV2', 'VarHandleOp'] @@ -273,7 +319,7 @@ def _local_device_setter(ps_device, worker_device): node_def = op if isinstance(op, node_def_pb2.NodeDef) else op.node_def if node_def.op in ps_ops: ps_device_spec = framework_device.DeviceSpec.from_string( - '{}'.format(ps_device)) + '{}'.format(ps_devices[ps_strategy(op)])) ps_device_spec.merge_from(current_device) return ps_device_spec.to_string() diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py index ffe69f89b4..662021853d 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py @@ -49,15 +49,29 @@ from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.saved_model import signature_constants from tensorflow.python.summary.writer import writer_cache +from tensorflow.python.training import device_setter from tensorflow.python.training import gradient_descent +# TODO(isaprykin): Parametrize all the tests on replicate_model_fn.Mode when +# it's supported. class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() - def test_complete_flow(self): + def test_complete_flow_with_mode_auto(self): + return self._complete_flow_with_mode(replicate_model_fn.Mode.AUTO) + + def test_complete_flow_with_mode_local_ps_server(self): + return self._complete_flow_with_mode( + replicate_model_fn.Mode.SHARED_LOCAL_PARAMETER_SERVER) + + def test_complete_flow_with_mode_round_robin(self): + return self._complete_flow_with_mode( + replicate_model_fn.Mode.SHARED_ROUND_ROBIN) + + def _complete_flow_with_mode(self, mode): n_classes = 3 input_dimension = 2 batch_size = 12 @@ -109,7 +123,8 @@ class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase): model_fn=replicate_model_fn.replicate_model_fn( estimator.model_fn, optimizer_fn, - devices=['/gpu:0', '/gpu:1', '/gpu:2']), + devices=['/gpu:0', '/gpu:1', '/gpu:2'], + mode=mode), model_dir=estimator.model_dir, config=estimator.config, params=estimator.params) @@ -359,7 +374,7 @@ class GetLossTowersTest(test_util.TensorFlowTestCase): params=None, config=None, devices=['/gpu:0', '/gpu:1'], - local_ps_device='/gpu:0', + local_ps_devices=['/gpu:0'], name_scope_pattern='test_tower_{}') session.run(variables.global_variables_initializer()) @@ -382,6 +397,54 @@ class GetLossTowersTest(test_util.TensorFlowTestCase): c = variable_scope.get_variable('c', dtype=dtypes.float64) self.assertEqual(0.25, session.run(c)) + def test_variables_are_round_robined_correctly(self): + """Test that creates multiple variables and tests round-robin placement.""" + + def model_fn(mode, features, labels, params): + del params + for variable_name in ['a', 'b', 'c', 'd']: + c = variable_scope.get_variable( + variable_name, + initializer=constant_op.constant(0.25, dtype=dtypes.float64), + dtype=dtypes.float64) + + predictions = math_ops.add(np.array([0.1, 0.2, 0.3, features[0]]), c) + labels = np.array([0.1, 0.2, 0.3, labels[0]]) + loss = losses.absolute_difference( + labels=labels, + predictions=predictions, + reduction=losses.Reduction.SUM) + return model_fn_lib.EstimatorSpec( + mode=mode, loss=math_ops.reduce_sum(loss)) + + with self.test_session() as session: + tower_specs = replicate_model_fn._get_loss_towers( + model_fn, + mode=None, + features=[[0.6], [1.6], [2.6]], + labels=[[0.6], [0.6], [2.6]], + params=None, + config=None, + devices=['/gpu:0', '/gpu:1', '/gpu:3'], + local_ps_devices=['/gpu:0', '/gpu:1', '/gpu:3'], + name_scope_pattern='test_tower_{}') + session.run(variables.global_variables_initializer()) + + self.assertEqual(len(tower_specs), 3) + self.assertEqual('/device:GPU:0', tower_specs[0].loss.device) + self.assertEqual('/device:GPU:1', tower_specs[1].loss.device) + self.assertEqual('/device:GPU:3', tower_specs[2].loss.device) + + with variable_scope.variable_scope('', reuse=True): + a = variable_scope.get_variable('a', dtype=dtypes.float64) + self.assertEqual('/device:GPU:0', a.device) + b = variable_scope.get_variable('b', dtype=dtypes.float64) + self.assertEqual('/device:GPU:1', b.device) + c = variable_scope.get_variable('c', dtype=dtypes.float64) + self.assertEqual('/device:GPU:3', c.device) + d = variable_scope.get_variable('d', dtype=dtypes.float64) + self.assertEqual('/device:GPU:0', d.device) + class SplitBatchTest(test_util.TensorFlowTestCase): @@ -604,7 +667,7 @@ class PredictSpecTest(test_util.TensorFlowTestCase): params=None, config=None, devices=['/gpu:0', '/gpu:1'], - local_ps_device='/gpu:0', + local_ps_devices=['/gpu:0'], ) session.run(variables.global_variables_initializer()) @@ -850,25 +913,66 @@ class GetLocalDevicesTest(test_util.TensorFlowTestCase): class LocalDeviceSetterTest(test_util.TensorFlowTestCase): def test_vars_are_on_ps_but_ops_are_on_workers(self): + ps_devices = ['/device:GPU:3'] + round_robin = device_setter._RoundRobinStrategy(num_tasks=len(ps_devices)) + + local_device_setter = replicate_model_fn._local_device_setter( + ps_devices=ps_devices, + ps_strategy=round_robin, + worker_device='/device:GPU:2') + + with ops_lib.device(local_device_setter): + a = variables.Variable(0.01) + self.assertEqual('/device:GPU:3', a.device) + + b = variables.Variable(0.02) + self.assertEqual('/device:GPU:3', b.device) + + c = variables.Variable(0.03) + self.assertEqual('/device:GPU:3', c.device) + + a_op = array_ops.concat(a, axis=0) + self.assertEqual('/device:GPU:2', a_op.device) + + b_op = array_ops.concat(b, axis=0) + self.assertEqual('/device:GPU:2', b_op.device) + + def test_round_robin_placement(self): + ps_devices = [ + '/device:GPU:0', '/device:GPU:1', '/device:GPU:3', '/device:GPU:4' + ] + round_robin = device_setter._RoundRobinStrategy(num_tasks=len(ps_devices)) + local_device_setter = replicate_model_fn._local_device_setter( - ps_device='/device:GPU:3', worker_device='/device:GPU:2') + ps_devices=ps_devices, + ps_strategy=round_robin, + worker_device='/device:GPU:2') with ops_lib.device(local_device_setter): - c = variables.Variable(0.01) + a = variables.Variable(0.01) + self.assertEqual('/device:GPU:0', a.device) + + b = variables.Variable(0.02) + self.assertEqual('/device:GPU:1', b.device) + + c = variables.Variable(0.03) self.assertEqual('/device:GPU:3', c.device) - cc = variables.Variable(0.02) - self.assertEqual('/device:GPU:3', cc.device) + a_op = array_ops.concat(a, axis=0) + self.assertEqual('/device:GPU:2', a_op.device) + + b_op = array_ops.concat(b, axis=0) + self.assertEqual('/device:GPU:2', b_op.device) - ccc = variables.Variable(0.03) - self.assertEqual('/device:GPU:3', ccc.device) + c = variables.Variable(0.03) + self.assertEqual('/device:GPU:4', c.device) + + d = variables.Variable(0.03) + self.assertEqual('/device:GPU:0', d.device) c_op = array_ops.concat(c, axis=0) self.assertEqual('/device:GPU:2', c_op.device) - cc_op = array_ops.concat(cc, axis=0) - self.assertEqual('/device:GPU:2', cc_op.device) - class ComputeSumWithDevicePlacementTest(test_util.TensorFlowTestCase): -- GitLab From 57839fbf307fb01a280505f1f964d7331104d8f3 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Tue, 28 Nov 2017 17:02:01 -0800 Subject: [PATCH 0915/1801] Re-using (the more general) DeserializeSparse kernel to implement DeserializeSparseMany and improving documentation. PiperOrigin-RevId: 177241063 --- .../base_api/api_def_DeserializeSparse.pbtxt | 43 +++++ .../core/kernels/serialize_sparse_op.cc | 177 +----------------- tensorflow/core/ops/sparse_ops.cc | 42 +++++ .../sparse_serialization_ops_test.py | 4 +- tensorflow/python/ops/sparse_ops.py | 45 ++++- 5 files changed, 129 insertions(+), 182 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt b/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt index 00e96c8a15..dfaa531cbc 100644 --- a/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt @@ -14,4 +14,47 @@ The `dtype` of the serialized `SparseTensor` objects. END } summary: "Deserialize `SparseTensor` objects." + description: <= 0) correspond to a batch. The ranks of the original +`SparseTensor` objects must all match. When the final `SparseTensor` is +created, its rank is the rank of the incoming `SparseTensor` objects plus N; +the sparse tensors have been concatenated along new dimensions, one for each +batch. + +The output `SparseTensor` object's shape values for the original dimensions +are the max across the input `SparseTensor` objects' shape values for the +corresponding dimensions. The new dimensions match the size of the batch. + +The input `SparseTensor` objects' indices are assumed ordered in +standard lexicographic order. If this is not the case, after this +step run `SparseReorder` to restore index ordering. + +For example, if the serialized input is a `[2 x 3]` matrix representing two +original `SparseTensor` objects: + + index = [ 0] + [10] + [20] + values = [1, 2, 3] + shape = [50] + +and + + index = [ 2] + [10] + values = [4, 5] + shape = [30] + +then the final deserialized `SparseTensor` will be: + + index = [0 0] + [0 10] + [0 20] + [1 2] + [1 10] + values = [1, 2, 3, 4, 5] + shape = [2 50] +END } diff --git a/tensorflow/core/kernels/serialize_sparse_op.cc b/tensorflow/core/kernels/serialize_sparse_op.cc index cfb86904d5..f4159da229 100644 --- a/tensorflow/core/kernels/serialize_sparse_op.cc +++ b/tensorflow/core/kernels/serialize_sparse_op.cc @@ -409,186 +409,11 @@ class DeserializeSparseOp : public OpKernel { TF_CALL_ALL_TYPES(REGISTER_KERNELS); #undef REGISTER_KERNELS -template -class DeserializeManySparseOp : public OpKernel { - public: - explicit DeserializeManySparseOp(OpKernelConstruction* context) - : OpKernel(context) {} - - void Compute(OpKernelContext* context) override { - const Tensor& serialized_sparse = context->input(0); - OP_REQUIRES(context, TensorShapeUtils::IsMatrix(serialized_sparse.shape()), - errors::InvalidArgument( - "Serialized sparse should be a matrix but received shape ", - serialized_sparse.shape().DebugString())); - OP_REQUIRES( - context, serialized_sparse.shape().dim_size(1) == 3, - errors::InvalidArgument( - "Serialized sparse should have 3 columns but received shape ", - serialized_sparse.shape().DebugString())); - - int num_sparse_tensors = serialized_sparse.shape().dim_size(0); - - OP_REQUIRES( - context, num_sparse_tensors > 0, - errors::InvalidArgument("Must have at least 1 serialized SparseTensor, " - "but input matrix has 0 rows")); - - std::vector indices_to_concat; - std::vector values_to_concat; - std::vector shapes_to_concat; - - const auto& serialized_sparse_t = serialized_sparse.matrix(); - - for (int i = 0; i < num_sparse_tensors; ++i) { - Tensor output_indices(DT_INT64); - Tensor output_values(DataTypeToEnum::value); - Tensor output_shape(DT_INT64); - TensorProto proto_indices; - TensorProto proto_values; - TensorProto proto_shape; - - OP_REQUIRES( - context, - ParseProtoUnlimited(&proto_indices, serialized_sparse_t(i, 0)), - errors::InvalidArgument("Could not parse serialized_sparse[", i, - ", 0]")); - OP_REQUIRES(context, - ParseProtoUnlimited(&proto_values, serialized_sparse_t(i, 1)), - errors::InvalidArgument("Could not parse serialized_sparse[", - i, ", 1]")); - OP_REQUIRES(context, - ParseProtoUnlimited(&proto_shape, serialized_sparse_t(i, 2)), - errors::InvalidArgument("Could not parse serialized_sparse[", - i, ", 2]")); - - OP_REQUIRES(context, output_indices.FromProto(proto_indices), - errors::InvalidArgument( - "Could not construct Tensor serialized_sparse[", i, - ", 0] (indices)")); - OP_REQUIRES(context, TensorShapeUtils::IsMatrix(output_indices.shape()), - errors::InvalidArgument( - "Expected serialized_sparse[", i, - ", 0] to represent an index matrix but received shape ", - output_indices.shape().DebugString())); - OP_REQUIRES(context, output_values.FromProto(proto_values), - errors::InvalidArgument( - "Could not construct Tensor serialized_sparse[", i, - ", 1] (values)")); - OP_REQUIRES(context, TensorShapeUtils::IsVector(output_values.shape()), - errors::InvalidArgument( - "Expected serialized_sparse[", i, - ", 1] to represent a values vector but received shape ", - output_values.shape().DebugString())); - OP_REQUIRES(context, output_shape.FromProto(proto_shape), - errors::InvalidArgument( - "Could not construct Tensor serialized_sparse[", i, - ", 2] (shape)")); - OP_REQUIRES( - context, TensorShapeUtils::IsVector(output_shape.shape()), - errors::InvalidArgument("Expected serialized_sparse[", i, - ", 1] to be a shape vector but its shape is ", - output_shape.shape().DebugString())); - - OP_REQUIRES( - context, DataTypeToEnum::value == output_values.dtype(), - errors::InvalidArgument( - "Requested SparseTensor of type ", - DataTypeString(DataTypeToEnum::value), " but SparseTensor[", i, - "].values.dtype() == ", DataTypeString(output_values.dtype()))); - - int64 num_entries = output_indices.dim_size(0); - OP_REQUIRES(context, num_entries == output_values.dim_size(0), - errors::InvalidArgument( - "Expected row counts of SparseTensor[", i, - "].indices and SparseTensor[", i, - "].values to match but they do not: ", num_entries, - " vs. ", output_values.dim_size(0))); - int rank = output_indices.dim_size(1); - OP_REQUIRES( - context, rank == output_shape.dim_size(0), - errors::InvalidArgument("Expected column counts of SparseTensor[", i, - "].indices to match size of SparseTensor[", i, - "].shape " - "but they do not: ", - rank, " vs. ", output_shape.dim_size(0))); - - // Now we expand each SparseTensors' indices and shape by - // prefixing a dimension - Tensor expanded_indices( - DT_INT64, TensorShape({num_entries, 1 + output_indices.dim_size(1)})); - Tensor expanded_shape(DT_INT64, - TensorShape({1 + output_shape.dim_size(0)})); - const auto& output_indices_t = output_indices.matrix(); - const auto& output_shape_t = output_shape.vec(); - auto expanded_indices_t = expanded_indices.matrix(); - auto expanded_shape_t = expanded_shape.vec(); - expanded_indices_t.chip<1>(0).setZero(); - Eigen::DSizes indices_start(0, 1); - Eigen::DSizes indices_sizes(num_entries, rank); - expanded_indices_t.slice(indices_start, indices_sizes) = output_indices_t; - expanded_shape_t(0) = 1; - std::copy_n(&output_shape_t(0), rank, &expanded_shape_t(1)); - - TensorShape expanded_tensor_shape(expanded_shape.vec()); - - indices_to_concat.push_back(expanded_indices); - values_to_concat.push_back(output_values); - shapes_to_concat.push_back(expanded_tensor_shape); - } - - int rank = -1; - for (int i = 0; i < num_sparse_tensors; ++i) { - if (rank < 0) rank = shapes_to_concat[i].dims(); - OP_REQUIRES(context, rank == shapes_to_concat[i].dims(), - errors::InvalidArgument( - "Inconsistent rank across SparseTensors: rank prior to " - "SparseTensor[", - i, "] was: ", rank, " but rank of SparseTensor[", i, - "] is: ", shapes_to_concat[i].dims())); - } - - // SparseTensor::Concat requires consistent shape for all but the - // primary order dimension (dimension 0 in this case). So we get - // the maximum value across all the input SparseTensors for each - // dimension and use that. - TensorShape preconcat_shape(shapes_to_concat[0]); - for (int i = 0; i < num_sparse_tensors; ++i) { - for (int d = 0; d < rank; ++d) { - preconcat_shape.set_dim(d, std::max(preconcat_shape.dim_size(d), - shapes_to_concat[i].dim_size(d))); - } - } - - // Dimension 0 is the primary dimension. - gtl::InlinedVector std_order(rank); - std::iota(std_order.begin(), std_order.end(), 0); - - std::vector tensors_to_concat; - tensors_to_concat.reserve(num_sparse_tensors); - for (int i = 0; i < num_sparse_tensors; ++i) { - tensors_to_concat.emplace_back(indices_to_concat[i], values_to_concat[i], - preconcat_shape, std_order); - } - - SparseTensor output = SparseTensor::Concat(tensors_to_concat); - - Tensor final_output_shape(DT_INT64, TensorShape({output.dims()})); - - std::copy_n(output.shape().data(), output.dims(), - final_output_shape.vec().data()); - - context->set_output(0, output.indices()); - context->set_output(1, output.values()); - context->set_output(2, final_output_shape); - } -}; - #define REGISTER_KERNELS(type) \ REGISTER_KERNEL_BUILDER(Name("DeserializeManySparse") \ .Device(DEVICE_CPU) \ .TypeConstraint("dtype"), \ - DeserializeManySparseOp) + DeserializeSparseOp) TF_CALL_ALL_TYPES(REGISTER_KERNELS); #undef REGISTER_KERNELS diff --git a/tensorflow/core/ops/sparse_ops.cc b/tensorflow/core/ops/sparse_ops.cc index 8414519f0b..772e2531dc 100644 --- a/tensorflow/core/ops/sparse_ops.cc +++ b/tensorflow/core/ops/sparse_ops.cc @@ -256,6 +256,48 @@ REGISTER_OP("DeserializeSparse") .Doc(R"doc( Deserialize `SparseTensor` objects. +The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where +the last dimension stores serialized `SparseTensor` objects and the other N +dimensions (N >= 0) correspond to a batch. The ranks of the original +`SparseTensor` objects must all match. When the final `SparseTensor` is +created, its rank is the rank of the incoming `SparseTensor` objects plus N; +the sparse tensors have been concatenated along new dimensions, one for each +batch. + +The output `SparseTensor` object's shape values for the original dimensions +are the max across the input `SparseTensor` objects' shape values for the +corresponding dimensions. The new dimensions match the size of the batch. + +The input `SparseTensor` objects' indices are assumed ordered in +standard lexicographic order. If this is not the case, after this +step run `SparseReorder` to restore index ordering. + +For example, if the serialized input is a `[2 x 3]` matrix representing two +original `SparseTensor` objects: + + index = [ 0] + [10] + [20] + values = [1, 2, 3] + shape = [50] + +and + + index = [ 2] + [10] + values = [4, 5] + shape = [30] + +then the final deserialized `SparseTensor` will be: + + index = [0 0] + [0 10] + [0 20] + [1 2] + [1 10] + values = [1, 2, 3, 4, 5] + shape = [2 50] + serialized_sparse: The serialized `SparseTensor` objects. The last dimension must have 3 columns. dtype: The `dtype` of the serialized `SparseTensor` objects. diff --git a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py index 78c113f514..d1a90952c7 100644 --- a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py +++ b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py @@ -254,8 +254,8 @@ class SerializeSparseTest(test.TestCase): serialized_concat, dtype=dtypes.int32) with self.assertRaisesOpError( - r"Inconsistent rank across SparseTensors: rank prior to " - r"SparseTensor\[1\] was: 3 but rank of SparseTensor\[1\] is: 4"): + r"Inconsistent shape across SparseTensors: rank prior to " + r"SparseTensor\[1\] was: 2 but rank of SparseTensor\[1\] is: 3"): sess.run(sp_deserialized, {sp_input0: input0_val, sp_input1: input1_val}) diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index cdfe9e1c1e..9bdc124c83 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -1437,10 +1437,47 @@ def serialize_many_sparse(sp_input, name=None): def deserialize_sparse(serialized_sparse, dtype, rank=None, name=None): """Deserialize `SparseTensor` objects. - The input is expected to have shape [d_1, ..., d_m, 3], where the last - dimension stores a serialized `SparseTensor`. The method deserializes - all input `SparseTensor`s, concatenates them into a single tensor, and - reshapes the sparse tensor to preserve the structure of the input. + The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where + the last dimension stores serialized `SparseTensor` objects and the other N + dimensions (N >= 0) correspond to a batch. The ranks of the original + `SparseTensor` objects must all match. When the final `SparseTensor` is + created, its rank is the rank of the incoming `SparseTensor` objects plus N; + the sparse tensors have been concatenated along new dimensions, one for each + batch. + + The output `SparseTensor` object's shape values for the original dimensions + are the max across the input `SparseTensor` objects' shape values for the + corresponding dimensions. The new dimensions match the size of the batch. + + The input `SparseTensor` objects' indices are assumed ordered in + standard lexicographic order. If this is not the case, after this + step run `SparseReorder` to restore index ordering. + + For example, if the serialized input is a `[2 x 3]` matrix representing two + original `SparseTensor` objects: + + index = [ 0] + [10] + [20] + values = [1, 2, 3] + shape = [50] + + and + + index = [ 2] + [10] + values = [4, 5] + shape = [30] + + then the final deserialized `SparseTensor` will be: + + index = [0 0] + [0 10] + [0 20] + [1 2] + [1 10] + values = [1, 2, 3, 4, 5] + shape = [2 50] Args: serialized_sparse: The serialized `SparseTensor` objects. -- GitLab From 73a803fb854fc842700a865d4742ae893ed236d3 Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Tue, 28 Nov 2017 17:03:33 -0800 Subject: [PATCH 0916/1801] Fix flakiness in map_dataset_op_test. PiperOrigin-RevId: 177241314 --- tensorflow/contrib/data/python/kernel_tests/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 0790a4a737..4cb69d7c8e 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -275,7 +275,7 @@ py_test( py_test( name = "map_dataset_op_test", - size = "small", + size = "medium", srcs = ["map_dataset_op_test.py"], srcs_version = "PY2AND3", deps = [ -- GitLab From e2f9107effb0c5c4cee49a71562865d9e919b3d0 Mon Sep 17 00:00:00 2001 From: Tayo Oguntebi Date: Tue, 28 Nov 2017 17:20:39 -0800 Subject: [PATCH 0917/1801] Adds minor-dim pooling tests for cases in which windows exist entirely in padding. Modifies reference util reduce-window 1D implementation to accept general padding. PiperOrigin-RevId: 177243527 --- tensorflow/compiler/xla/reference_util.cc | 18 ++- tensorflow/compiler/xla/reference_util.h | 6 + .../compiler/xla/tests/reduce_window_test.cc | 112 +++++++++++++----- 3 files changed, 102 insertions(+), 34 deletions(-) diff --git a/tensorflow/compiler/xla/reference_util.cc b/tensorflow/compiler/xla/reference_util.cc index 5bb81b80dd..bdf92eaed1 100644 --- a/tensorflow/compiler/xla/reference_util.cc +++ b/tensorflow/compiler/xla/reference_util.cc @@ -195,14 +195,26 @@ ReferenceUtil::ReduceWindow1DGeneric( const tensorflow::gtl::ArraySlice& window, const tensorflow::gtl::ArraySlice& stride, Padding padding) { std::vector dim_lengths{static_cast(operand.size())}; - auto padding_both = xla::MakePadding(dim_lengths, window, stride, padding); + return ReduceWindow1DGeneric( + operand, init, reduce_func, window, stride, + xla::MakePadding(dim_lengths, window, stride, padding)); +} +/* static */ std::unique_ptr> +ReferenceUtil::ReduceWindow1DGeneric( + const tensorflow::gtl::ArraySlice& operand, float init, + const std::function& reduce_func, + const tensorflow::gtl::ArraySlice& window, + const tensorflow::gtl::ArraySlice& stride, + const tensorflow::gtl::ArraySlice>& padding) { + std::vector dim_lengths{static_cast(operand.size())}; std::vector window_counts(window.size(), 0); std::vector pad_low(window.size(), 0); for (int64 i = 0; i < window.size(); ++i) { + int64 padded_width = padding[i].first + dim_lengths[i] + padding[i].second; window_counts[i] = - WindowCount(dim_lengths[i], window[i], stride[i], padding); - pad_low[i] = padding_both[i].first; + window_util::StridedBound(padded_width, window[i], stride[i]); + pad_low[i] = padding[i].first; } auto result = MakeUnique>(window_counts[0]); diff --git a/tensorflow/compiler/xla/reference_util.h b/tensorflow/compiler/xla/reference_util.h index 62d455d71a..ee244e9a66 100644 --- a/tensorflow/compiler/xla/reference_util.h +++ b/tensorflow/compiler/xla/reference_util.h @@ -184,6 +184,12 @@ class ReferenceUtil { const std::function& reduce_func, const tensorflow::gtl::ArraySlice& window, const tensorflow::gtl::ArraySlice& stride, Padding padding); + static std::unique_ptr> ReduceWindow1DGeneric( + const tensorflow::gtl::ArraySlice& operand, float init, + const std::function& reduce_func, + const tensorflow::gtl::ArraySlice& window, + const tensorflow::gtl::ArraySlice& stride, + const tensorflow::gtl::ArraySlice>& padding); static std::unique_ptr> ReduceWindow4DGeneric( const Array4D& operand, float init, const std::function& reduce_func, diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc index 0601a1466b..aa035f0ba5 100644 --- a/tensorflow/compiler/xla/tests/reduce_window_test.cc +++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc @@ -962,68 +962,114 @@ struct R1ReduceWindowTestData { int64 base_bounds[1]; int64 window_bounds[1]; int64 strides[1]; - Padding padding; + int64 pad_low[1]; + int64 pad_high[1]; Reducer reducer; } kR1TestCases[] = { {/*base_bounds=*/{1}, /*window_bounds=*/{1}, /*strides=*/{1}, - /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd}, + /*pad_low=*/{xla::MakePadding({1}, {1}, {1}, Padding::kValid)[0].first}, + /*pad_high=*/{xla::MakePadding({1}, {1}, {1}, Padding::kValid)[0].second}, + /*reducer=*/Reducer::kAdd}, {/*base_bounds=*/{3}, /*window_bounds=*/{3}, /*strides=*/{1}, - /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd}, + /*pad_low=*/{xla::MakePadding({3}, {3}, {1}, Padding::kValid)[0].first}, + /*pad_high=*/{xla::MakePadding({3}, {3}, {1}, Padding::kValid)[0].second}, + /*reducer=*/Reducer::kAdd}, {/*base_bounds=*/{3}, /*window_bounds=*/{2}, /*strides=*/{1}, - /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd}, + /*pad_low=*/{xla::MakePadding({3}, {2}, {1}, Padding::kValid)[0].first}, + /*pad_high=*/{xla::MakePadding({3}, {2}, {1}, Padding::kValid)[0].second}, + /*reducer=*/Reducer::kAdd}, {/*base_bounds=*/{5}, /*window_bounds=*/{1}, /*strides=*/{1}, - /*padding=*/Padding::kValid, /*reducer=*/Reducer::kMax}, + /*pad_low=*/{xla::MakePadding({5}, {1}, {1}, Padding::kValid)[0].first}, + /*pad_high=*/{xla::MakePadding({5}, {1}, {1}, Padding::kValid)[0].second}, + /*reducer=*/Reducer::kMax}, {/*base_bounds=*/{16}, /*window_bounds=*/{4}, /*strides=*/{4}, - /*padding=*/Padding::kValid, /*reducer=*/Reducer::kMax}, + /*pad_low=*/{xla::MakePadding({16}, {4}, {4}, Padding::kValid)[0].first}, + /*pad_high=*/{xla::MakePadding({16}, {4}, {4}, Padding::kValid)[0].second}, + /*reducer=*/Reducer::kMax}, {/*base_bounds=*/{16}, /*window_bounds=*/{4}, /*strides=*/{3}, - /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd}, + /*pad_low=*/{xla::MakePadding({16}, {4}, {3}, Padding::kValid)[0].first}, + /*pad_high=*/{xla::MakePadding({16}, {4}, {3}, Padding::kValid)[0].second}, + /*reducer=*/Reducer::kAdd}, - {/*base_bounds=*/{128 * 2}, /*window_bounds=*/{30}, + {/*base_bounds=*/{128 * 2}, + /*window_bounds=*/{30}, /*strides=*/{27}, - /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd}, - - {/*base_bounds=*/{128 * 17}, /*window_bounds=*/{7}, + /*pad_low=*/ + {xla::MakePadding({128 * 2}, {30}, {27}, Padding::kValid)[0].first}, + /*pad_high=*/ + {xla::MakePadding({128 * 2}, {30}, {27}, Padding::kValid)[0].second}, + /*reducer=*/Reducer::kAdd}, + + {/*base_bounds=*/{128 * 17}, + /*window_bounds=*/{7}, /*strides=*/{64}, - /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd}, - - {/*base_bounds=*/{128 * 2}, /*window_bounds=*/{32}, + /*pad_low=*/ + {xla::MakePadding({128 * 17}, {7}, {64}, Padding::kValid)[0].first}, + /*pad_high=*/ + {xla::MakePadding({128 * 17}, {7}, {64}, Padding::kValid)[0].second}, + /*reducer=*/Reducer::kAdd}, + + {/*base_bounds=*/{128 * 2}, + /*window_bounds=*/{32}, /*strides=*/{56}, - /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd}, + /*pad_low=*/ + {xla::MakePadding({128 * 2}, {32}, {56}, Padding::kValid)[0].first}, + /*pad_high=*/ + {xla::MakePadding({128 * 2}, {32}, {56}, Padding::kValid)[0].second}, + /*reducer=*/Reducer::kAdd}, {/*base_bounds=*/{3}, /*window_bounds=*/{2}, /*strides=*/{1}, - /*padding=*/Padding::kSame, /*reducer=*/Reducer::kAdd}, + /*pad_low=*/{xla::MakePadding({3}, {2}, {1}, Padding::kSame)[0].first}, + /*pad_high=*/{xla::MakePadding({3}, {2}, {1}, Padding::kSame)[0].second}, + /*reducer=*/Reducer::kAdd}, {/*base_bounds=*/{5}, /*window_bounds=*/{3}, /*strides=*/{2}, - /*padding=*/Padding::kSame, /*reducer=*/Reducer::kAdd}, + /*pad_low=*/{xla::MakePadding({5}, {3}, {2}, Padding::kSame)[0].first}, + /*pad_high=*/{xla::MakePadding({5}, {3}, {2}, Padding::kSame)[0].second}, + /*reducer=*/Reducer::kAdd}, {/*base_bounds=*/{16}, /*window_bounds=*/{4}, /*strides=*/{3}, - /*padding=*/Padding::kSame, /*reducer=*/Reducer::kAdd}, + /*pad_low=*/{xla::MakePadding({16}, {4}, {3}, Padding::kSame)[0].first}, + /*pad_high=*/{xla::MakePadding({16}, {4}, {3}, Padding::kSame)[0].second}, + /*reducer=*/Reducer::kAdd}, + + {/*base_bounds=*/{5}, /*window_bounds=*/{5}, + /*strides=*/{1}, + /*pad_low=*/{0}, + /*pad_high=*/{5}, + /*reducer=*/Reducer::kAdd}, + + {/*base_bounds=*/{5}, /*window_bounds=*/{5}, + /*strides=*/{1}, + /*pad_low=*/{5}, + /*pad_high=*/{0}, + /*reducer=*/Reducer::kAdd}, }; string R1ReduceWindowTestDataToString( const ::testing::TestParamInfo& data) { string str = tensorflow::strings::StrCat( - "base_bounds_", - tensorflow::str_util::Join(data.param.base_bounds, "x"), // + "base_bounds_", tensorflow::str_util::Join(data.param.base_bounds, "x"), "__window_bounds_", - tensorflow::str_util::Join(data.param.window_bounds, "x"), // - "__strides_", tensorflow::str_util::Join(data.param.strides, "x"), // - "__padding_", data.param.padding == Padding::kSame ? "same" : "valid", // - "__reducer_", data.param.reducer == kAdd ? "add" : "max"); + tensorflow::str_util::Join(data.param.window_bounds, "x"), "__strides_", + tensorflow::str_util::Join(data.param.strides, "x"), "__pad_low_", + tensorflow::str_util::Join(data.param.pad_low, "x"), "__pad_high_", + tensorflow::str_util::Join(data.param.pad_high, "x"), "__reducer_", + data.param.reducer == kAdd ? "add" : "max"); return str; } @@ -1044,15 +1090,18 @@ TEST_P(R1ReduceWindowTest, DoIt) { TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr input_arg, client_->TransferToServer(*input_literal)); + std::vector> padding(1); + padding[0] = {param.pad_low[0], param.pad_high[0]}; + auto computation = param.reducer == kAdd ? CreateScalarAddComputation(F32, &b) : CreateScalarMaxComputation(F32, &b); - b.ReduceWindow(/*operand=*/ - b.Parameter(0, input_literal->shape(), "p0"), - /*init_value=*/b.ConstantR0(kInitValue), - /*computation=*/computation, - /*window_dimensions=*/param.window_bounds, - /*window_strides=*/param.strides, /*padding=*/param.padding); + b.ReduceWindowWithGeneralPadding( + /*operand=*/b.Parameter(0, input_literal->shape(), "p0"), + /*init_value=*/b.ConstantR0(kInitValue), + /*computation=*/computation, + /*window_dimensions=*/param.window_bounds, + /*window_strides=*/param.strides, /*padding=*/padding); auto reduce_func = param.reducer == kAdd ? +[](float a, float b) { return a + b; } @@ -1062,7 +1111,8 @@ TEST_P(R1ReduceWindowTest, DoIt) { /*init=*/kInitValue, /*reduce_func=*/reduce_func, /*window=*/param.window_bounds, - /*stride=*/param.strides, /*padding=*/param.padding); + /*stride=*/param.strides, + /*padding=*/padding); ComputeAndCompareR1(&b, tensorflow::gtl::ArraySlice(*expected), {input_arg.get()}, ErrorSpec(1e-3, 1e-3)); -- GitLab From 782ec4e186943d69e4ad047a835cbbf2eb477359 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 28 Nov 2017 18:17:01 -0800 Subject: [PATCH 0918/1801] Silenced noisy log PiperOrigin-RevId: 177249675 --- tensorflow/core/grappler/grappler_item_builder.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc index 36c7f92c49..a186e9a181 100644 --- a/tensorflow/core/grappler/grappler_item_builder.cc +++ b/tensorflow/core/grappler/grappler_item_builder.cc @@ -173,7 +173,7 @@ std::unique_ptr GrapplerItemFromMetaGraphDef( << ", skipping this input."; return nullptr; } - LOG(INFO) << "Will use feed node " << feed_name; + VLOG(1) << "Will use feed node " << feed_name; new_item->feed.emplace_back(feed_name, Tensor()); } -- GitLab From bf05a2d1dce3af9b88dcd5c9253a163353951c99 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Nov 2017 18:39:34 -0800 Subject: [PATCH 0919/1801] Support shape inference (i.e., shapes containing -1) in the Reshape bijector. PiperOrigin-RevId: 177251901 --- .../kernel_tests/bijectors/reshape_test.py | 342 +++++++++++------- .../python/ops/bijectors/reshape_impl.py | 277 +++++++------- 2 files changed, 362 insertions(+), 257 deletions(-) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/reshape_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/reshape_test.py index 38b3a23c2d..49451446b5 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/reshape_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/reshape_test.py @@ -28,8 +28,19 @@ from tensorflow.python.ops.distributions.bijector_test_util import assert_biject from tensorflow.python.platform import test -class ReshapeBijectorTest(test.TestCase): - """Tests correctness of the reshape transformation.""" +class _ReshapeBijectorTest(object): + """Base class for testing the reshape transformation. + + Methods defined in this class call a method self.build_shapes() that + is implemented by subclasses defined below, returning respectively + ReshapeBijectorTestStatic: static shapes, + ReshapeBijectorTestDynamic: shape placeholders of known ndims, and + ReshapeBijectorTestDynamicNdims: shape placeholders of unspecified ndims, + so that each test in this base class is automatically run over all + three cases. The subclasses also implement assertRaisesError to test + for either Python exceptions (in the case of static shapes) or + TensorFlow op errors (dynamic shapes). + """ def setUp(self): self._rng = np.random.RandomState(42) @@ -40,9 +51,10 @@ class ReshapeBijectorTest(test.TestCase): expected_y = np.reshape(expected_x, [4, 6]) with self.test_session() as sess: + shape_in, shape_out, feed_dict = self.build_shapes([3, 2], [6,]) bijector = Reshape( - event_shape_out=[6,], - event_shape_in=[3, 2], + event_shape_out=shape_out, + event_shape_in=shape_in, validate_args=True) (x_, y_, @@ -52,66 +64,23 @@ class ReshapeBijectorTest(test.TestCase): bijector.forward(expected_x), bijector.forward_log_det_jacobian(expected_x), bijector.inverse_log_det_jacobian(expected_y), - )) + ), feed_dict=feed_dict) self.assertEqual("reshape", bijector.name) self.assertAllClose(expected_y, y_, rtol=1e-6, atol=0) self.assertAllClose(expected_x, x_, rtol=1e-6, atol=0) self.assertAllClose(0., fldj_, rtol=1e-6, atol=0) self.assertAllClose(0., ildj_, rtol=1e-6, atol=0) - def testEventShapeDynamicNdims(self): - """Check forward/inverse shape methods with dynamic ndims.""" - - shape_in = tensor_shape.TensorShape([6,]) - shape_in_ph = array_ops.placeholder(dtype=dtypes.int32) - - shape_out = tensor_shape.TensorShape([2, 3]) - shape_out_ph = array_ops.placeholder(dtype=dtypes.int32) - - bijector = Reshape( - event_shape_out=shape_out_ph, - event_shape_in=shape_in_ph, validate_args=True) - - # using the _tensor methods, we should always get a fully-specified - # result since these are evaluated at graph runtime. - with self.test_session() as sess: - (shape_out_, - shape_in_) = sess.run(( - bijector.forward_event_shape_tensor(shape_in), - bijector.inverse_event_shape_tensor(shape_out), - ), feed_dict={ - shape_in_ph: shape_in, - shape_out_ph: shape_out, - }) - self.assertAllEqual(shape_out, shape_out_) - self.assertAllEqual(shape_in, shape_in_) - - def testEventShapeDynamic(self): - """Check shape methods with static ndims but dynamic shape.""" - - shape_in = tensor_shape.TensorShape([6,]) - shape_in_partial = tensor_shape.TensorShape([None,]) - shape_in_ph = array_ops.placeholder( - shape=[1,], dtype=dtypes.int32) - - shape_out = tensor_shape.TensorShape([2, 3]) - shape_out_partial = tensor_shape.TensorShape([None, None]) - shape_out_ph = array_ops.placeholder( - shape=[2,], dtype=dtypes.int32) + def testEventShapeTensor(self): + """Test event_shape_tensor methods when even ndims may be dynamic.""" + shape_in_static = [2, 3] + shape_out_static = [6,] + shape_in, shape_out, feed_dict = self.build_shapes(shape_in_static, + shape_out_static) bijector = Reshape( - event_shape_out=shape_out_ph, - event_shape_in=shape_in_ph, - validate_args=True) - - # if event shapes are not statically available, should - # return partially-specified TensorShapes. - self.assertAllEqual( - bijector.forward_event_shape(shape_in).as_list(), - shape_out_partial.as_list()) - self.assertAllEqual( - bijector.inverse_event_shape(shape_out).as_list(), - shape_in_partial.as_list()) + event_shape_out=shape_out, + event_shape_in=shape_in, validate_args=True) # using the _tensor methods, we should always get a fully-specified # result since these are evaluated at graph runtime. @@ -120,42 +89,9 @@ class ReshapeBijectorTest(test.TestCase): shape_in_) = sess.run(( bijector.forward_event_shape_tensor(shape_in), bijector.inverse_event_shape_tensor(shape_out), - ), feed_dict={ - shape_in_ph: shape_in, - shape_out_ph: shape_out, - }) - self.assertAllEqual(shape_out, shape_out_) - self.assertAllEqual(shape_in, shape_in_) - - def testEventShapeStatic(self): - """Check shape methods when shape is statically known.""" - - shape_in = tensor_shape.TensorShape([6,]) - shape_out = tensor_shape.TensorShape([2, 3]) - - bijector_static = Reshape( - event_shape_out=shape_out, - event_shape_in=shape_in, - validate_args=True) - - # test that forward_ and inverse_event_shape do sensible things - # when shapes are statically known. - self.assertEqual( - bijector_static.forward_event_shape(shape_in), - shape_out) - self.assertEqual( - bijector_static.inverse_event_shape(shape_out), - shape_in) - - with self.test_session() as sess: - (shape_out_static_, - shape_in_static_, - ) = sess.run(( - bijector_static.forward_event_shape_tensor(shape_in), - bijector_static.inverse_event_shape_tensor(shape_out), - )) - self.assertAllEqual(shape_out, shape_out_static_) - self.assertAllEqual(shape_in, shape_in_static_) + ), feed_dict=feed_dict) + self.assertAllEqual(shape_out_static, shape_out_) + self.assertAllEqual(shape_in_static, shape_in_) def testScalarReshape(self): """Test reshaping to and from a scalar shape ().""" @@ -166,11 +102,11 @@ class ReshapeBijectorTest(test.TestCase): expected_x_scalar = np.random.randn(1,) expected_y_scalar = expected_x_scalar[0] + shape_in, shape_out, feed_dict = self.build_shapes([], [1,]) with self.test_session() as sess: bijector = Reshape( - event_shape_out=[], - event_shape_in=[1,], validate_args=True) - + event_shape_out=shape_in, + event_shape_in=shape_out, validate_args=True) (x_, y_, x_scalar_, @@ -180,53 +116,178 @@ class ReshapeBijectorTest(test.TestCase): bijector.forward(expected_x), bijector.inverse(expected_y_scalar), bijector.forward(expected_x_scalar), - )) + ), feed_dict=feed_dict) self.assertAllClose(expected_y, y_, rtol=1e-6, atol=0) self.assertAllClose(expected_x, x_, rtol=1e-6, atol=0) self.assertAllClose(expected_y_scalar, y_scalar_, rtol=1e-6, atol=0) self.assertAllClose(expected_x_scalar, x_scalar_, rtol=1e-6, atol=0) - def testRaisesOpError(self): - x1 = np.random.randn(4, 2, 3) - x2 = np.random.randn(4, 3, 2) - x3 = np.random.randn(4, 5, 1, 1) + def testMultipleUnspecifiedDimensionsOpError(self): with self.test_session() as sess: - shape_in_ph = array_ops.placeholder(shape=[2,], dtype=dtypes.int32) - shape_out_ph = array_ops.placeholder(shape=[3,], dtype=dtypes.int32) + shape_in, shape_out, feed_dict = self.build_shapes([2, 3], [4, -1, -1,]) bijector = Reshape( - event_shape_out=shape_out_ph, - event_shape_in=shape_in_ph, + event_shape_out=shape_out, + event_shape_in=shape_in, validate_args=True) - with self.assertRaisesOpError( + with self.assertRaisesError( + "elements must have at most one `-1`."): + sess.run(bijector.forward_event_shape_tensor(shape_in), + feed_dict=feed_dict) + + def testInvalidDimensionsOpError(self): + + with self.test_session() as sess: + + shape_in, shape_out, feed_dict = self.build_shapes([2, 3], [1, 2, -2,]) + bijector = Reshape( + event_shape_out=shape_out, + event_shape_in=shape_in, + validate_args=True) + + with self.assertRaisesError( + "elements must be either positive integers or `-1`."): + sess.run(bijector.forward_event_shape_tensor(shape_in), + feed_dict=feed_dict) + + def testValidButNonMatchingInputOpError(self): + x = np.random.randn(4, 3, 2) + + with self.test_session() as sess: + shape_in, shape_out, feed_dict = self.build_shapes([2, 3], [1, 6, 1,]) + bijector = Reshape( + event_shape_out=shape_out, + event_shape_in=shape_in, + validate_args=True) + + # Here we pass in a tensor (x) whose shape is compatible with + # the output shape, so tf.reshape will throw no error, but + # doesn't match the expected input shape. + with self.assertRaisesError( "Input `event_shape` does not match `event_shape_in`."): - sess.run(bijector.forward(x2), - feed_dict={shape_out_ph: [1, 6, 1], - shape_in_ph: [2, 3]}) + sess.run(bijector.forward(x), + feed_dict=feed_dict) - with self.assertRaisesOpError( - "event_shape_out entries must be positive."): - sess.run(bijector.forward(x1), - feed_dict={shape_out_ph: [-1, -1, 6], - shape_in_ph: [2, 3]}) + def testValidButNonMatchingInputPartiallySpecifiedOpError(self): + x = np.random.randn(4, 3, 2) + + with self.test_session() as sess: + shape_in, shape_out, feed_dict = self.build_shapes([2, -1], [1, 6, 1,]) + bijector = Reshape( + event_shape_out=shape_out, + event_shape_in=shape_in, + validate_args=True) + + with self.assertRaisesError( + "Input `event_shape` does not match `event_shape_in`."): + sess.run(bijector.forward(x), + feed_dict=feed_dict) + + def testInputOutputMismatchOpError(self): + x1 = np.random.randn(4, 2, 3) + x2 = np.random.randn(4, 1, 1, 5) + + with self.test_session() as sess: + shape_in, shape_out, fd_mismatched = self.build_shapes([2, 3], + [1, 1, 5]) + bijector = Reshape( + event_shape_out=shape_out, + event_shape_in=shape_in, + validate_args=True) # test that *all* methods check basic assertions - fd_mismatched = {shape_out_ph: [1, 1, 5], shape_in_ph: [2, 3]} - with self.assertRaisesOpError( - "Input/output `event_size`s do not match."): + with self.assertRaisesError( + "Input to reshape is a tensor with"): sess.run(bijector.forward(x1), feed_dict=fd_mismatched) - with self.assertRaisesOpError( - "Input/output `event_size`s do not match."): - sess.run(bijector.inverse(x3), feed_dict=fd_mismatched) - with self.assertRaisesOpError( - "Input/output `event_size`s do not match."): - sess.run(bijector.inverse_log_det_jacobian(x3), - feed_dict=fd_mismatched) - with self.assertRaisesOpError( - "Input/output `event_size`s do not match."): - sess.run(bijector.forward_log_det_jacobian(x1), - feed_dict=fd_mismatched) + with self.assertRaisesError( + "Input to reshape is a tensor with"): + sess.run(bijector.inverse(x2), feed_dict=fd_mismatched) + + def testOneShapePartiallySpecified(self): + expected_x = np.random.randn(4, 6) + expected_y = np.reshape(expected_x, [4, 2, 3]) + + with self.test_session() as sess: + # one of input/output shapes is partially specified + shape_in, shape_out, feed_dict = self.build_shapes([-1,], [2, 3]) + bijector = Reshape( + event_shape_out=shape_out, + event_shape_in=shape_in, + validate_args=True) + (x_, + y_, + ) = sess.run(( + bijector.inverse(expected_y), + bijector.forward(expected_x), + ), feed_dict=feed_dict) + self.assertAllClose(expected_y, y_, rtol=1e-6, atol=0) + self.assertAllClose(expected_x, x_, rtol=1e-6, atol=0) + + def testBothShapesPartiallySpecified(self): + expected_x = np.random.randn(4, 2, 3) + expected_y = np.reshape(expected_x, [4, 3, 2]) + with self.test_session() as sess: + shape_in, shape_out, feed_dict = self.build_shapes([-1, 3], [-1, 2]) + bijector = Reshape( + event_shape_out=shape_out, + event_shape_in=shape_in, + validate_args=True) + (x_, + y_, + ) = sess.run(( + bijector.inverse(expected_y), + bijector.forward(expected_x), + ), feed_dict=feed_dict) + self.assertAllClose(expected_y, y_, rtol=1e-6, atol=0) + self.assertAllClose(expected_x, x_, rtol=1e-6, atol=0) + + def testDefaultVectorShape(self): + expected_x = np.random.randn(4, 4) + expected_y = np.reshape(expected_x, [4, 2, 2]) + with self.test_session() as sess: + _, shape_out, feed_dict = self.build_shapes([-1,], [-1, 2]) + bijector = Reshape(shape_out, + validate_args=True) + (x_, + y_, + ) = sess.run(( + bijector.inverse(expected_y), + bijector.forward(expected_x), + ), feed_dict=feed_dict) + self.assertAllClose(expected_y, y_, rtol=1e-6, atol=0) + self.assertAllClose(expected_x, x_, rtol=1e-6, atol=0) + + def build_shapes(self, *args, **kwargs): + raise NotImplementedError("Subclass failed to implement `build_shapes`.") + + +class ReshapeBijectorTestStatic(test.TestCase, _ReshapeBijectorTest): + + def build_shapes(self, shape_in, shape_out): + shape_in_static = shape_in + shape_out_static = shape_out + feed_dict = {} + return shape_in_static, shape_out_static, feed_dict + + def assertRaisesError(self, msg): + return self.assertRaisesRegexp(Exception, msg) + + def testEventShape(self): + shape_in_static = tensor_shape.TensorShape([2, 3]) + shape_out_static = tensor_shape.TensorShape([6,]) + bijector = Reshape( + event_shape_out=shape_out_static, + event_shape_in=shape_in_static, validate_args=True) + + # test that forward_ and inverse_event_shape do sensible things + # when shapes are statically known. + self.assertEqual( + bijector.forward_event_shape(shape_in_static), + shape_out_static) + self.assertEqual( + bijector.inverse_event_shape(shape_out_static), + shape_in_static) def testBijectiveAndFinite(self): x = np.random.randn(4, 2, 3) @@ -238,5 +299,32 @@ class ReshapeBijectorTest(test.TestCase): validate_args=True) assert_bijective_and_finite(bijector, x, y, rtol=1e-6, atol=0) + +class ReshapeBijectorTestDynamic(test.TestCase, _ReshapeBijectorTest): + + def build_shapes(self, shape_in, shape_out): + shape_in_ph = array_ops.placeholder(shape=(len(shape_in),), + dtype=dtypes.int32) + shape_out_ph = array_ops.placeholder(shape=(len(shape_out),), + dtype=dtypes.int32) + feed_dict = {shape_in_ph: shape_in, shape_out_ph: shape_out} + return shape_in_ph, shape_out_ph, feed_dict + + def assertRaisesError(self, msg): + return self.assertRaisesOpError(msg) + + +class ReshapeBijectorTestDynamicNdims(test.TestCase, _ReshapeBijectorTest): + + def build_shapes(self, shape_in, shape_out): + shape_in_ph = array_ops.placeholder(shape=None, dtype=dtypes.int32) + shape_out_ph = array_ops.placeholder(shape=None, dtype=dtypes.int32) + feed_dict = {shape_in_ph: shape_in, shape_out_ph: shape_out} + return shape_in_ph, shape_out_ph, feed_dict + + def assertRaisesError(self, msg): + return self.assertRaisesOpError(msg) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py index 93682639aa..1eb8e74fda 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py @@ -36,70 +36,77 @@ __all__ = [ ] +def _static_ndims_from_shape(shape): + return shape.shape.with_rank_at_least(1)[0].value + + +def _ndims_from_shape(shape): + return array_ops.shape(shape)[0] + + class Reshape(bijector_lib.Bijector): """Reshapes the `event_shape` of a `Tensor`. The semantics generally follow that of `tf.reshape()`, with a few differences: - * The user must provide both the input and output shape, so that - the transformation can be inverted. - * The `Reshape` bijector automatically broadcasts over the leftmost - dimensions of its input (`sample_shape` and `batch_shape`); only - the rightmost `event_ndims_in` dimensions are reshaped. The - number of dimensions to reshape is inferred from the provided - `event_shape_in` (`event_ndims_in = len(event_shape_in)`). - * The `Reshape` bijector does not currently support - partially-specified shapes, i.e., those with a dimension - implicitly specified by `-1`. + + * The user must provide both the input and output shape, so that + the transformation can be inverted. If an input shape is not + specified, the default assumes a vector-shaped input, i.e., + event_shape_in = (-1,). + * The `Reshape` bijector automatically broadcasts over the leftmost + dimensions of its input (`sample_shape` and `batch_shape`); only + the rightmost `event_ndims_in` dimensions are reshaped. The + number of dimensions to reshape is inferred from the provided + `event_shape_in` (`event_ndims_in = len(event_shape_in)`). Example usage: ```python bs = tf.contrib.distributions.bijectors - reverse = bs.Reshape(event_shape_out=[1,2], - event_shape_in=[2,]) + r = bs.Reshape(event_shape_out=[1, -1]) - reverse.forward([1., 2.]) # shape [2,] - # ==> [[1., 2.]] # shape [1,2] + r.forward([3., 4.]) # shape [2] + # ==> [[3., 4.]] # shape [1, 2] - reverse.forward([[1., 2.], [3., 4.]]) # shape [2, 2] - # ==> [[[1., 2.]], [[3., 4.]]] # shape [2, 1, 2] + r.forward([[1., 2.], [3., 4.]]) # shape [2, 2] + # ==> [[[1., 2.]], + # [[3., 4.]]] # shape [2, 1, 2] - reverse.inverse([[1., 2.]]) # shape [1,2] - # ==> [1., 2.] # shape [2,] + r.inverse([[3., 4.]]) # shape [1,2] + # ==> [3., 4.] # shape [2] - reverse.forward_log_det_jacobian(any_value) + r.forward_log_det_jacobian(any_value) # ==> 0. - reverse.inverse_log_det_jacobian(any_value) + r.inverse_log_det_jacobian(any_value) # ==> 0. ``` """ - def __init__(self, event_shape_out, event_shape_in, + def __init__(self, event_shape_out, event_shape_in=(-1,), validate_args=False, name=None): """Creates a `Reshape` bijector. Args: event_shape_out: An `int`-like vector-shaped `Tensor` - representing the fully specified (no -1's) event shape of the - transformed output. - event_shape_in: An `int`-like vector-shaped `Tensor` - representing the fully specified (no -1's) event shape of the - input. + representing the event shape of the transformed output. + event_shape_in: An optional `int`-like vector-shape `Tensor` + representing the event shape of the input. This is required in + order to define inverse operations; the default of (-1,) + assumes a vector-shaped input. validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str`, name given to ops managed by this object. Raises: TypeError: if either `event_shape_in` or `event_shape_out` has - non-vector shape (`rank > 1`), or non-integer `dtype`. - ValueError: if either `event_shape_in` or `event_shape_out` - contains non-positive entries, or if their sizes do not match - (`prod(event_shape_in)` != `prod(event_shape_out)`), or if - their dimensionality(s) cannot be statically inferred. + non-integer `dtype`. + ValueError: if either of `event_shape_in` or `event_shape_out` + has non-vector shape (`rank > 1`), or if their sizes do not + match. """ with ops.name_scope(name, "reshape", values=[event_shape_out, event_shape_in]): @@ -111,105 +118,74 @@ class Reshape(bijector_lib.Bijector): name="event_shape_in", preferred_dtype=dtypes.int32) - # check that input shapes are positive integers assertions = [] - assertions += self._maybe_check_valid_shape( - event_shape_out, "event_shape_out", - validate_args=validate_args) - assertions += self._maybe_check_valid_shape( - event_shape_in, "event_shape_in", validate_args=validate_args) - - # check that prod(event_shape_in) = prod(event_shape_out) - assertions += self._maybe_check_matching_sizes( - event_shape_in, event_shape_out, validate_args=validate_args) + assertions.extend(self._maybe_check_valid_shape( + event_shape_out, validate_args)) + assertions.extend(self._maybe_check_valid_shape( + event_shape_in, validate_args)) self._assertions = assertions self._event_shape_in = event_shape_in self._event_shape_out = event_shape_out - self._event_shape_in_static = tensor_util.constant_value_as_shape( - event_shape_in) - self._event_shape_out_static = tensor_util.constant_value_as_shape( - event_shape_out) super(Reshape, self).__init__(is_constant_jacobian=True, validate_args=validate_args, name=name or "reshape") - def _maybe_check_valid_shape(self, shape_tensor, label, - validate_args=False): - """Check that a shape Tensor is int-type and positive.""" - - assertions = [] - - if not shape_tensor.dtype.is_integer: + def _maybe_check_valid_shape(self, shape, validate_args): + """Check that a shape Tensor is int-type and otherwise sane.""" + if not shape.dtype.is_integer: raise TypeError("{} dtype ({}) should be `int`-like.".format( - label, shape_tensor.dtype.name)) + shape.op.name, shape.dtype.name)) - shape_rank = tensor_util.constant_value(array_ops.rank(shape_tensor)) - if shape_rank is not None and shape_rank > 1: - raise ValueError("{} rank should be <= 1.".format(label)) + assertions = [] - s = tensor_util.constant_value(shape_tensor) - if s is not None: - if (s <= 0).any(): - raise ValueError("{} entries must be positive, but found {}".format( - label, s)) + ndims = array_ops.rank(shape) + ndims_ = tensor_util.constant_value(ndims) + if ndims_ is not None and ndims_ > 1: + raise ValueError("`{}` rank ({}) should be <= 1.".format( + shape.op.name, ndims_)) elif validate_args: - assertions.append(check_ops.assert_positive( - shape_tensor, message="{} entries must be positive".format(label))) - - return assertions - - def _maybe_check_matching_sizes(self, event_shape_in, event_shape_out, - validate_args=False): - """Check that prod(event_shape_in)==prod(event_shape_out).""" + assertions.append(check_ops.assert_less_equal( + ndims, 1, message="`{}` rank should be <= 1.".format(shape.op.name))) - def _get_size_from_shape(shape): - """Computes size from a shape `Tensor`, statically if possible.""" - s = tensor_util.constant_value(shape) - if s is not None: - return [np.int32(np.prod(s))]*2 - return None, math_ops.reduce_prod(shape, name="size") - - # Ensure `event_shape_in` is compatible with `event_shape_out`. - event_size_in_, event_size_in = _get_size_from_shape( # pylint: disable=unbalanced-tuple-unpacking - event_shape_in) - event_size_out_, event_size_out = _get_size_from_shape( # pylint: disable=unbalanced-tuple-unpacking - event_shape_out) - - assertions = [] - if event_size_in_ is not None and event_size_out_ is not None: - if event_size_in_ != event_size_out_: + shape_ = tensor_util.constant_value_as_shape(shape) + if shape_.is_fully_defined(): + es = np.int32(shape_.as_list()) + if sum(es == -1) > 1: + raise ValueError( + "`{}` must have at most one `-1` (given {})" + .format(shape.op.name, es)) + if np.any(es < -1): raise ValueError( - "Input `event_size` ({}) does not match output `event_size` ({}).". - format(event_size_in, event_size_out_)) + "`{}` elements must be either positive integers or `-1`" + "(given {})." + .format(shape.op.name, es)) elif validate_args: - assertions.append(check_ops.assert_equal( - event_size_in, event_size_out, - message="Input/output `event_size`s do not match.")) - + assertions.extend([ + check_ops.assert_less_equal( + math_ops.reduce_sum( + math_ops.cast(math_ops.equal(shape, -1), dtypes.int32)), + 1, + message="`{}` elements must have at most one `-1`." + .format(shape.op.name)), + check_ops.assert_greater_equal( + shape, -1, + message="`{}` elements must be either positive integers or `-1`." + .format(shape.op.name)), + ]) return assertions def _reshape_helper(self, x, event_shape_in, event_shape_out): """Reshape only the event_shape of an input `Tensor`.""" - def _get_rank_from_shape(shape): - """Computes rank from a shape `Tensor`, statically if possible.""" - # Uses fact that rank is "shape of shape". - ndims = shape.shape.with_rank_at_least(1)[0].value - if ndims is not None: - return ndims, ndims - return None, array_ops.shape(shape)[0] - - event_ndims_in_, event_ndims_in = _get_rank_from_shape(event_shape_in) + event_ndims_in_ = _static_ndims_from_shape(event_shape_in) + event_ndims_in = _ndims_from_shape(event_shape_in) + x_ndims_, x_ndims = x.shape.ndims, array_ops.rank(x) assertions = [] - # Ensure x.event_shape is compatible with event_shape_in. - if x.shape.ndims is not None: - x_ndims_, x_ndims = [x.shape.ndims]*2 - else: - x_ndims_, x_ndims = None, array_ops.rank(x) + # Ensure x.event_shape is compatible with event_shape_in. if (event_ndims_in_ is not None and x_ndims_ is not None and x.shape.with_rank_at_least(event_ndims_in_)[ @@ -223,13 +199,35 @@ class Reshape(bijector_lib.Bijector): event_shape_in_ = tensor_util.constant_value(event_shape_in) if x_event_shape_ is not None and event_shape_in_ is not None: - if not np.equal(x_event_shape_, event_shape_in_).all(): + # Compare the shape dimensions that are fully specified in the + # input (i.e., for which event_shape_in is not -1). If x_event_shape + # matches along all of these dimensions, it is compatible with + # the desired input shape and any further mismatches (i.e., + # imcompatibility with the desired *output* shape) will be + # caught inside of array_ops.reshape() below. + x_event_shape_specified_ = x_event_shape_[event_shape_in_ >= 0] + event_shape_in_specified_ = event_shape_in_[event_shape_in_ >= 0] + if not np.equal(x_event_shape_specified_, + event_shape_in_specified_).all(): raise ValueError( - "Input `event_shape` ({}) does not match `event_shape_in` ({}).". + "Input `event_shape` does not match `event_shape_in` ({} vs {}).". format(x_event_shape_, event_shape_in_)) elif self.validate_args: + # Similarly to the static case, we compare the shape dimensions + # that are fully specified in the input. We extract these + # dimensions using boolean_mask(), which requires that the mask + # have known ndims. We can assume that shape Tensors always have + # ndims==1 (this assumption is verified inside of + # _maybe_check_valid_shape), so the reshape operation is just a + # no-op that formally encodes this fact to make boolean_mask() + # happy. + event_shape_mask = array_ops.reshape(event_shape_in >= 0, [-1]) + x_event_shape_specified = array_ops.boolean_mask(x_event_shape, + event_shape_mask) + event_shape_in_specified = array_ops.boolean_mask(event_shape_in, + event_shape_mask) assertions.append(check_ops.assert_equal( - x_event_shape, event_shape_in, + x_event_shape_specified, event_shape_in_specified, message="Input `event_shape` does not match `event_shape_in`.")) if assertions: @@ -243,8 +241,19 @@ class Reshape(bijector_lib.Bijector): sample_and_batch_shape = sample_and_batch_shape[ :(ndims - math_ops.abs(event_ndims_in))] - new_shape = array_ops.concat( - [sample_and_batch_shape, event_shape_out], axis=0) + if (event_ndims_in_ is not None + and x_ndims_ is not None + and event_ndims_in_ == x_ndims_): + # Hack to allow forward/inverse_event_shape to do shape + # inference by calling this helper method with a dummy Tensor of + # shape event_shape_in. In this special case, + # sample_and_batch_shape will be empty so we can preserve static + # shape information by avoiding the concat operation below + # (which would be a no-op). + new_shape = event_shape_out + else: + new_shape = array_ops.concat( + [sample_and_batch_shape, event_shape_out], axis=0) return array_ops.reshape(x, new_shape) @@ -269,29 +278,37 @@ class Reshape(bijector_lib.Bijector): return constant_op.constant(0., dtype=x.dtype) def _forward_event_shape(self, input_shape): - self._event_shape_in_static.assert_is_compatible_with(input_shape) - return self._event_shape_out_static + # NOTE: this method and the other *_event_shape* methods + # compute shape by explicit transformation of a dummy + # variable. This approach is not generally recommended because it + # bloats the graph and could in general trigger side effects. + # + # In this particular case of the Reshape bijector, the + # forward and inverse transforms have no side effects, and we + # believe the reduction in code complexity from delegating the + # heavy lifting to tf.reshape() is worth the added graph ops. + # However, you should think hard before implementing this approach + # in other Bijectors; it is strongly preferred to compute + # shapes explicitly whenever it's feasible to do so. + with ops.control_dependencies(self._assertions): + dummy = array_ops.zeros(dtype=dtypes.float32, shape=input_shape) + dummy_reshaped = self.forward(dummy) + return dummy_reshaped.shape def _inverse_event_shape(self, output_shape): - self._event_shape_out_static.assert_is_compatible_with(output_shape) - return self._event_shape_in_static + with ops.control_dependencies(self._assertions): + dummy = array_ops.zeros(dtype=dtypes.float32, shape=output_shape) + dummy_reshaped = self.inverse(dummy) + return dummy_reshaped.shape def _forward_event_shape_tensor(self, input_shape): - input_assertions = self._maybe_check_valid_shape( - input_shape, "input event shape", validate_args=self.validate_args) - input_assertions += self._maybe_check_matching_sizes( - input_shape, self._event_shape_out, - validate_args=self.validate_args) - - return control_flow_ops.with_dependencies( - input_assertions + self._assertions, self._event_shape_out) + with ops.control_dependencies(self._assertions): + dummy = array_ops.zeros(dtype=dtypes.float32, shape=input_shape) + dummy_reshaped = self.forward(dummy) + return array_ops.shape(dummy_reshaped) def _inverse_event_shape_tensor(self, output_shape): - - output_assertions = self._maybe_check_valid_shape( - output_shape, "output event shape", validate_args=self.validate_args) - output_assertions += self._maybe_check_matching_sizes( - output_shape, self._event_shape_in, validate_args=self.validate_args) - - return control_flow_ops.with_dependencies( - output_assertions + self._assertions, self._event_shape_in) + with ops.control_dependencies(self._assertions): + dummy = array_ops.zeros(dtype=dtypes.float32, shape=output_shape) + dummy_reshaped = self.inverse(dummy) + return array_ops.shape(dummy_reshaped) -- GitLab From 60d2e51254028df73f650abe07fad024c49688bb Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Tue, 28 Nov 2017 19:07:56 -0800 Subject: [PATCH 0920/1801] Introduce tf.contrib.summary.flush This op has been useful while writing benchmarks. PiperOrigin-RevId: 177254316 --- tensorflow/contrib/summary/summary.py | 1 + tensorflow/contrib/summary/summary_ops.py | 21 +++++++++++++++ .../contrib/summary/summary_ops_test.py | 27 +++++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/tensorflow/contrib/summary/summary.py b/tensorflow/contrib/summary/summary.py index f783179f61..9e6af5232f 100644 --- a/tensorflow/contrib/summary/summary.py +++ b/tensorflow/contrib/summary/summary.py @@ -31,6 +31,7 @@ from tensorflow.contrib.summary.summary_ops import audio from tensorflow.contrib.summary.summary_ops import create_summary_db_writer from tensorflow.contrib.summary.summary_ops import create_summary_file_writer from tensorflow.contrib.summary.summary_ops import eval_dir +from tensorflow.contrib.summary.summary_ops import flush from tensorflow.contrib.summary.summary_ops import generic from tensorflow.contrib.summary.summary_ops import graph from tensorflow.contrib.summary.summary_ops import histogram diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py index 8e37987cb7..de6f2cd79f 100644 --- a/tensorflow/contrib/summary/summary_ops.py +++ b/tensorflow/contrib/summary/summary_ops.py @@ -516,6 +516,27 @@ def import_event(tensor, name=None): context.context().summary_writer_resource, tensor, name=name) +def flush(writer=None, name=None): + """Forces summary writer to send any buffered data to storage. + + This operation blocks until that finishes. + + Args: + writer: The @{tf.contrib.summary.SummaryWriter} resource to flush. + The thread default will be used if this parameter is None. + Otherwise a @{tf.no_op} is returned. + name: A name for the operation (optional). + + Returns: + The created @{tf.Operation}. + """ + if writer is None: + writer = context.context().summary_writer_resource + if writer is None: + return control_flow_ops.no_op() + return gen_summary_ops.flush_summary_writer(writer, name=name) + + def eval_dir(model_dir, name=None): """Construct a logdir for an eval summary writer.""" return os.path.join(model_dir, "eval" if not name else "eval_" + name) diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py index ad89c0c36a..3fe421a7e9 100644 --- a/tensorflow/contrib/summary/summary_ops_test.py +++ b/tensorflow/contrib/summary/summary_ops_test.py @@ -109,6 +109,33 @@ class TargetTest(test_util.TensorFlowTestCase): self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'scalar') + def testMaxQueue(self): + logs = tempfile.mkdtemp() + with summary_ops.create_summary_file_writer( + logs, max_queue=2, flush_millis=999999, + name='lol').as_default(), summary_ops.always_record_summaries(): + get_total = lambda: len(summary_test_util.events_from_logdir(logs)) + # Note: First tf.Event is always file_version. + self.assertEqual(1, get_total()) + summary_ops.scalar('scalar', 2.0, step=1) + self.assertEqual(1, get_total()) + summary_ops.scalar('scalar', 2.0, step=2) + self.assertEqual(3, get_total()) + + def testFlush(self): + logs = tempfile.mkdtemp() + with summary_ops.create_summary_file_writer( + logs, max_queue=999999, flush_millis=999999, + name='lol').as_default(), summary_ops.always_record_summaries(): + get_total = lambda: len(summary_test_util.events_from_logdir(logs)) + # Note: First tf.Event is always file_version. + self.assertEqual(1, get_total()) + summary_ops.scalar('scalar', 2.0, step=1) + summary_ops.scalar('scalar', 2.0, step=2) + self.assertEqual(1, get_total()) + summary_ops.flush() + self.assertEqual(3, get_total()) + class DbTest(summary_test_internal.SummaryDbTest): -- GitLab From d2e7a2e4bf295a23d6a2e86aa7e0636f00cc2d75 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Tue, 28 Nov 2017 19:42:30 -0800 Subject: [PATCH 0921/1801] Add VLOG-ging to gcs_file_system PiperOrigin-RevId: 177256727 --- tensorflow/core/platform/cloud/gcs_file_system.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index 54d38fe962..45e9b05092 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -697,6 +697,9 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& filename, size_t offset, TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when reading gs://", bucket, "/", object); + VLOG(1) << "Successful read of gs://" << bucket << "/" << object << " @ " + << offset << " of size: " << out->size(); + if (out->size() < block_size()) { // Check stat cache to see if we encountered an interrupted read. FileStatistics stat; @@ -706,6 +709,8 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& filename, size_t offset, "File contents are inconsistent for file: %s @ %lu.", filename.c_str(), offset)); } + VLOG(2) << "Successful integrity check for: gs://" << bucket << "/" + << object << " @ " << offset; } } @@ -868,6 +873,11 @@ Status GcsFileSystem::StatForObject(const string& fname, const string& bucket, TF_RETURN_IF_ERROR(GetStringValue(root, "updated", &updated)); TF_RETURN_IF_ERROR(ParseRfc3339Time(updated, &(stat->mtime_nsec))); + VLOG(1) << "Stat of: gs://" << bucket << "/" << object << " -- " + << " length: " << stat->length + << "; mtime_nsec: " << stat->mtime_nsec + << "; updated: " << updated; + stat->is_directory = false; return Status::OK(); }; -- GitLab From bdde4d040cf01ef241ad349cf222c227b9a88814 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Tue, 28 Nov 2017 20:41:47 -0800 Subject: [PATCH 0922/1801] [XLA] Support transposing the spatial dimensions of a convolution's activations PiperOrigin-RevId: 177260886 --- .../compiler/xla/service/transpose_folding.cc | 9 ++- .../xla/service/transpose_folding_test.cc | 64 +++++++++++++++++++ 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/transpose_folding.cc b/tensorflow/compiler/xla/service/transpose_folding.cc index fb55d4e543..42b616f4c3 100644 --- a/tensorflow/compiler/xla/service/transpose_folding.cc +++ b/tensorflow/compiler/xla/service/transpose_folding.cc @@ -102,6 +102,10 @@ bool FoldTransposeIntoConvolution(InstructionOperandsPair pair) { auto& convolution = *pair.first; auto& operand_indices = pair.second; + if (operand_indices.empty()) { + return false; + } + const ConvolutionDimensionNumbers& dnums = convolution.convolution_dimension_numbers(); ConvolutionDimensionNumbers new_dnums = dnums; @@ -121,8 +125,9 @@ bool FoldTransposeIntoConvolution(InstructionOperandsPair pair) { transpose_dimensions[dnums.input_batch_dimension()]); new_dnums.set_input_feature_dimension( transpose_dimensions[dnums.input_feature_dimension()]); - for (const auto& spatial_dimension : dnums.input_spatial_dimensions()) { - CHECK_EQ(spatial_dimension, transpose_dimensions[spatial_dimension]); + for (auto& input_spatial_dimension : + *new_dnums.mutable_input_spatial_dimensions()) { + input_spatial_dimension = transpose_dimensions[input_spatial_dimension]; } new_lhs = &transpose_operand; } else { diff --git a/tensorflow/compiler/xla/service/transpose_folding_test.cc b/tensorflow/compiler/xla/service/transpose_folding_test.cc index 6ac32e88f1..ba99852905 100644 --- a/tensorflow/compiler/xla/service/transpose_folding_test.cc +++ b/tensorflow/compiler/xla/service/transpose_folding_test.cc @@ -376,5 +376,69 @@ TEST_F(TransposeFoldingTest, FoldConvTransposeLhs) { new_conv->convolution_dimension_numbers().output_spatial_dimensions(1)); } +// Test that a transpose of every dimension in the activations gets folded into +// convolution. +TEST_F(TransposeFoldingTest, FoldConvComplexTransposeLhs) { + auto builder = HloComputation::Builder("entry_computation"); + HloInstruction* x = builder.AddInstruction(HloInstruction::CreateParameter( + /*parameter_number=*/0, ShapeUtil::MakeShape(F32, {3, 2, 1, 1}), + /*name=*/"x")); + HloInstruction* y = builder.AddInstruction(HloInstruction::CreateParameter( + /*parameter_number=*/1, ShapeUtil::MakeShape(F32, {2, 3, 1, 1}), + /*name=*/"y")); + HloInstruction* transpose_x = + builder.AddInstruction(HloInstruction::CreateTranspose( + ShapeUtil::MakeShape(F32, {2, 3, 1, 1}), x, {1, 0, 3, 2})); + auto dnums = ComputationBuilder::CreateDefaultConvDimensionNumbers(); + Window window; + for (int i = 0; i < 2; ++i) { + WindowDimension* dim = window.add_dimensions(); + dim->set_padding_low(0); + dim->set_padding_high(0); + dim->set_base_dilation(1); + dim->set_window_dilation(1); + dim->set_stride(1); + dim->set_size(y->shape().dimensions(dnums.kernel_spatial_dimensions(i))); + } + StatusOr conv_shape = ShapeInference::InferConvolveShape( + transpose_x->shape(), y->shape(), window, dnums); + EXPECT_IS_OK(conv_shape); + HloInstruction* conv = builder.AddInstruction(HloInstruction::CreateConvolve( + conv_shape.ValueOrDie(), transpose_x, y, window, dnums)); + + HloModule module("test_module"); + HloComputation* entry_computation = + module.AddEntryComputation(builder.Build(conv)); + FoldTranspose(&module); + + // Instructions after folding: x, y, and the convolution. + std::unordered_set instruction_set( + entry_computation->instructions().begin(), + entry_computation->instructions().end()); + EXPECT_EQ(1, instruction_set.erase(x)) << "x is not in entry_computation."; + EXPECT_EQ(1, instruction_set.erase(y)) << "y is not in entry_computation."; + EXPECT_EQ(1, instruction_set.size()) + << "entry_computation should contain exactly 3 instructions."; + HloInstruction* new_conv = *instruction_set.begin(); + EXPECT_EQ(HloOpcode::kConvolution, new_conv->opcode()); + EXPECT_EQ(dnums.input_feature_dimension(), + new_conv->convolution_dimension_numbers().input_batch_dimension()); + EXPECT_EQ( + dnums.input_batch_dimension(), + new_conv->convolution_dimension_numbers().input_feature_dimension()); + EXPECT_EQ( + dnums.input_spatial_dimensions(0), + new_conv->convolution_dimension_numbers().input_spatial_dimensions(1)); + EXPECT_EQ( + dnums.input_spatial_dimensions(1), + new_conv->convolution_dimension_numbers().input_spatial_dimensions(0)); + EXPECT_EQ( + dnums.output_spatial_dimensions(0), + new_conv->convolution_dimension_numbers().output_spatial_dimensions(0)); + EXPECT_EQ( + dnums.output_spatial_dimensions(1), + new_conv->convolution_dimension_numbers().output_spatial_dimensions(1)); +} + } // namespace } // namespace xla -- GitLab From a55ee58c89d5bf6a8cd70b706dc3af90d7d6efc4 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Tue, 28 Nov 2017 20:54:54 -0800 Subject: [PATCH 0923/1801] Include the filename when we encounter EOF PiperOrigin-RevId: 177261696 --- tensorflow/core/platform/cloud/file_block_cache.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/platform/cloud/file_block_cache.cc b/tensorflow/core/platform/cloud/file_block_cache.cc index a472ae52fc..e1afc7b308 100644 --- a/tensorflow/core/platform/cloud/file_block_cache.cc +++ b/tensorflow/core/platform/cloud/file_block_cache.cc @@ -181,7 +181,9 @@ Status FileBlockCache::Read(const string& filename, size_t offset, size_t n, // The requested offset is at or beyond the end of the file. This can // happen if `offset` is not block-aligned, and the read returns the last // block in the file, which does not extend all the way out to `offset`. - return errors::OutOfRange("EOF at offset ", offset); + return errors::OutOfRange("EOF at offset ", offset, " in file ", filename, + " at position ", pos, "with data size ", + data.size()); } auto begin = data.begin(); if (offset > pos) { -- GitLab From 05f57851d4657ec6c09a454b157cf17d89d0cfe2 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Tue, 28 Nov 2017 21:10:16 -0800 Subject: [PATCH 0924/1801] Bugfixes: Gather's gradient for 2+ dimensional indices with eager execution. And shape inference function for the VariableShape operation. PiperOrigin-RevId: 177262783 --- tensorflow/core/ops/resource_variable_ops.cc | 5 ++- .../python/ops/resource_variable_ops.py | 21 ++++------ tensorflow/python/training/momentum_test.py | 41 +++++++++++++------ 3 files changed, 39 insertions(+), 28 deletions(-) diff --git a/tensorflow/core/ops/resource_variable_ops.cc b/tensorflow/core/ops/resource_variable_ops.cc index cdfbec85cf..bf9e673e8e 100644 --- a/tensorflow/core/ops/resource_variable_ops.cc +++ b/tensorflow/core/ops/resource_variable_ops.cc @@ -204,7 +204,10 @@ Status VariableShapeShapeFn(InferenceContext* c) { if (handle_data == nullptr || handle_data->empty()) { return errors::InvalidArgument("Handle doesn't have shape information."); } - c->set_output(0, (*handle_data)[0].shape); + ShapeHandle var_shape = (*handle_data)[0].shape; + int64 rank = c->RankKnown(var_shape) ? c->Rank(var_shape) + : InferenceContext::kUnknownDim; + c->set_output(0, c->Vector(rank)); return Status::OK(); } diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 343e38f960..652bfa1ebc 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -887,26 +887,19 @@ def _ReadGrad(_, grad): def _GatherGrad(op, grad): """Gradient for gather op.""" # Build appropriately shaped IndexedSlices - # Walk graph back until the original handle is found. - # TODO(apassos): more robust way of getting the shape. - # TODO(apassos): implement this for EAGER mode. - if context.in_eager_mode(): - dense_shape = gen_resource_variable_ops.variable_shape(op.inputs[0]) - return (ops.IndexedSlices(grad, - op.inputs[1], - dense_shape=dense_shape), - None) handle = op.inputs[0] - while handle.op.type != "VarHandleOp": - handle = handle.op.inputs[0] - params_shape = ops.convert_to_tensor( - tensor_shape.TensorShape(handle.op.get_attr("shape"))) indices = op.inputs[1] + if context.in_graph_mode(): + # Walk graph back until the original handle is found. + # TODO(apassos): implement this for EAGER mode. + while handle.op.type != "VarHandleOp": + handle = handle.op.inputs[0] + params_shape = gen_resource_variable_ops.variable_shape(handle) size = array_ops.expand_dims(array_ops.size(indices), 0) values_shape = array_ops.concat([size, params_shape[1:]], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, size) - return [ops.IndexedSlices(values, indices, params_shape), None] + return (ops.IndexedSlices(values, indices, params_shape), None) def _to_proto_fn(v, export_scope=None): diff --git a/tensorflow/python/training/momentum_test.py b/tensorflow/python/training/momentum_test.py index 7268b3abc9..6865513b0e 100644 --- a/tensorflow/python/training/momentum_test.py +++ b/tensorflow/python/training/momentum_test.py @@ -234,23 +234,38 @@ class MomentumOptimizerTest(test.TestCase): self.assertAllClose(var0_np, var0.eval()) self.assertAllClose(var1_np, var1.eval()) + @test_util.run_in_graph_and_eager_modes(reset_test=True) def testMinimizeSparseResourceVariable(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(): - var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) + var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) + + # pylint: disable=cell-var-from-loop + def loss(): x = constant_op.constant([[4.0], [5.0]], dtype=dtype) pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) - loss = pred * pred - sgd_op = momentum_lib.MomentumOptimizer( - learning_rate=1.0, momentum=0.0).minimize(loss) - variables.global_variables_initializer().run() - # Fetch params to validate initial values - self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) - # Run 1 step of sgd - sgd_op.run() - # Validate updated params - self.assertAllCloseAccordingToType( - [[-111, -138]], var0.eval()) + return pred * pred + # pylint: enable=cell-var-from-loop + + opt = momentum_lib.MomentumOptimizer(learning_rate=1.0, momentum=0.0) + sgd_op = opt.minimize(loss if context.in_eager_mode() else loss()) + self.evaluate(variables.global_variables_initializer()) + # Run 1 step of sgd + self.evaluate(sgd_op) + # Validate updated params + self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0)) + + @test_util.run_in_graph_and_eager_modes(reset_test=True) + def testMinimizeWith2DIndiciesForEmbeddingLookup(self): + var0 = resource_variable_ops.ResourceVariable(array_ops.ones([2, 2])) + + def loss(): + return math_ops.reduce_sum(embedding_ops.embedding_lookup(var0, [[1]])) + + opt = momentum_lib.MomentumOptimizer(learning_rate=1.0, momentum=0.0) + sgd_op = opt.minimize(loss if context.in_eager_mode() else loss()) + self.evaluate(variables.global_variables_initializer()) + self.evaluate(sgd_op) + self.assertAllCloseAccordingToType([[1, 1], [0, 0]], self.evaluate(var0)) def testTensorLearningRateAndMomentum(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: -- GitLab From 2ab7e9dad284bd15d69779ee0bcf8a2c894c2a2a Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Tue, 28 Nov 2017 21:22:20 -0800 Subject: [PATCH 0925/1801] Go: Add some more detail to an error message. Closes #14806 PiperOrigin-RevId: 177263469 --- tensorflow/go/tensor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go index 1326a95278..cd05e2aa0a 100644 --- a/tensorflow/go/tensor.go +++ b/tensorflow/go/tensor.go @@ -270,7 +270,7 @@ func typeOf(dt DataType, shape []int64) reflect.Type { } } if ret == nil { - panic(bug("DataType %v is not supported", dt)) + panic(bug("DataType %v is not supported (see https://www.tensorflow.org/code/tensorflow/core/framework/types.proto)", dt)) } for range shape { ret = reflect.SliceOf(ret) -- GitLab From a7c13b33d6df91e25fc793043bf748b30e311c73 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Tue, 28 Nov 2017 21:45:51 -0800 Subject: [PATCH 0926/1801] Update TF Android build instructions to warn about NDK 16 See https://github.com/bazelbuild/bazel/issues/4068 PiperOrigin-RevId: 177264756 --- tensorflow/contrib/lite/java/demo/README.md | 8 +++++++- tensorflow/examples/android/README.md | 8 ++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/java/demo/README.md b/tensorflow/contrib/lite/java/demo/README.md index 71b633c577..5d13a798e2 100644 --- a/tensorflow/contrib/lite/java/demo/README.md +++ b/tensorflow/contrib/lite/java/demo/README.md @@ -8,7 +8,12 @@ It's easiest with Android Studio. - You'll need at least SDK version 23. + - Make sure to install the latest version of Bazel. Some distributions + ship with Bazel 0.5.4, which is too old. - Bazel requires Android Build Tools `26.0.1` or higher. + - **Bazel is incompatible with NDK revisions 15 and above,** with revision + 16 being a compile-breaking change. [Download an older version manually + instead of using the SDK Manager.](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#install-bazel-and-android-prerequisites) - You also need to install the Android Support Repository, available through Android Studio under `Android SDK Manager -> SDK Tools -> Android Support Repository`. @@ -19,7 +24,8 @@ - Make sure the `api_level` in `WORKSPACE` is set to an SDK version that you have installed. - By default, Android Studio will install the SDK to `~/Android/Sdk` and - the NDK to `~/Android/Sdk/ndk-bundle`. + the NDK to `~/Android/Sdk/ndk-bundle` (but the NDK should be a manual + download until Bazel supports NDK 16. See bullet points under (1)). 2. Build the app with Bazel. The demo needs C++11: diff --git a/tensorflow/examples/android/README.md b/tensorflow/examples/android/README.md index 79202a38d7..881a975e60 100644 --- a/tensorflow/examples/android/README.md +++ b/tensorflow/examples/android/README.md @@ -126,6 +126,10 @@ the Android NDK and SDK must be installed on your system. 2. The Android NDK is required to build the native (C/C++) TensorFlow code. The current recommended version is 14b, which may be found [here](https://developer.android.com/ndk/downloads/older_releases.html#ndk-14b-downloads). + + * NDK 16, the revision released in November 2017, is **incompatible** with + Bazel. See [here](https://github.com/tensorflow/tensorflow/issues/14918). + 3. The Android SDK and build tools may be obtained [here](https://developer.android.com/tools/revisions/build-tools.html), or alternatively as part of [Android @@ -133,6 +137,10 @@ the Android NDK and SDK must be installed on your system. 23 is required to build the TF Android demo (though it will run on API >= 21 devices). + - The Android Studio SDK Manager's NDK installer will install the latest + revision of the NDK, which is **incompatible** with Bazel. You'll need + to download an older version manually, as (2) suggests. + ##### Edit WORKSPACE The Android entries in -- GitLab From 9aeb0eef9188a48a02078128d3d1ca6f78f0f438 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Tue, 28 Nov 2017 22:12:34 -0800 Subject: [PATCH 0927/1801] Add logging to help differentiate multiple stacktraces PiperOrigin-RevId: 177266569 --- tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index fe17664d7f..84a4208be3 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -514,6 +514,7 @@ class _InfeedThreadController(_InfeedOutfeedThreadBaseController): exc_info=1 ) time.sleep(120) + logging.error('Closing the failed session.') session.close() def join(self): -- GitLab From 4c2ca8b0cbfdbdc9f7525b1d9ad0c057cb513749 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Tue, 28 Nov 2017 22:48:10 -0800 Subject: [PATCH 0928/1801] Fix typo in GradientTape.persistent_ comment. PiperOrigin-RevId: 177268420 --- tensorflow/c/eager/tape.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index f52248e7d5..191e9c3413 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -161,7 +161,7 @@ class GradientTape { // the tape refer to it); to aid in tape garbage collection. std::unordered_map tensor_usage_; - // If true, all activations are deleted in the first call to ComputeGradient. + // If false, all activations are deleted in the first call to ComputeGradient. // Else, only when this is destructed. bool persistent_; }; -- GitLab From bc87c28c60dddc6137b11f8a1fd31fa79bcf0c1f Mon Sep 17 00:00:00 2001 From: James Qin Date: Wed, 29 Nov 2017 00:34:54 -0800 Subject: [PATCH 0929/1801] Register fp16 Reduce min on GPU. PiperOrigin-RevId: 177274800 --- tensorflow/core/kernels/reduction_ops_min.cc | 1 + tensorflow/core/kernels/reduction_ops_test.cc | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/tensorflow/core/kernels/reduction_ops_min.cc b/tensorflow/core/kernels/reduction_ops_min.cc index 807ac0a456..5c537c5b9c 100644 --- a/tensorflow/core/kernels/reduction_ops_min.cc +++ b/tensorflow/core/kernels/reduction_ops_min.cc @@ -50,6 +50,7 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS); .TypeConstraint("Tidx") \ .HostMemory("reduction_indices"), \ ReductionOp>); +REGISTER_GPU_KERNELS(Eigen::half); REGISTER_GPU_KERNELS(float); REGISTER_GPU_KERNELS(double); diff --git a/tensorflow/core/kernels/reduction_ops_test.cc b/tensorflow/core/kernels/reduction_ops_test.cc index 9bbe993a2f..fe8ea59f1b 100644 --- a/tensorflow/core/kernels/reduction_ops_test.cc +++ b/tensorflow/core/kernels/reduction_ops_test.cc @@ -174,6 +174,11 @@ static void BM_Min2DToScalarGPU(int iters, int num_x, int num_y) { } BENCHMARK(BM_Min2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192); +static void BM_Min2DToScalarGPUHalf(int iters, int num_x, int num_y) { + ReduceToScalar(iters, "gpu", "Min", num_x, num_y); +} +BENCHMARK(BM_Min2DToScalarGPUHalf)->RangePair(2048, 8192, 2048, 8192); + static void BM_Bool2DToScalarGPU(int iters, int num_x, int num_y) { ReduceToScalar(iters, "gpu", "All", num_x, num_y); } -- GitLab From 6196d30cf8498c428bdb7fbd4b4ab9cb83853457 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Nov 2017 02:51:49 -0800 Subject: [PATCH 0930/1801] Add alpha support for dealing with shardings to batchnorm rewriter PiperOrigin-RevId: 177285265 --- .../xla/service/batchnorm_rewriter.cc | 328 ++++++++++-------- .../compiler/xla/service/hlo_sharding.cc | 3 +- 2 files changed, 186 insertions(+), 145 deletions(-) diff --git a/tensorflow/compiler/xla/service/batchnorm_rewriter.cc b/tensorflow/compiler/xla/service/batchnorm_rewriter.cc index c6193b3fbb..2bbae25aee 100644 --- a/tensorflow/compiler/xla/service/batchnorm_rewriter.cc +++ b/tensorflow/compiler/xla/service/batchnorm_rewriter.cc @@ -149,6 +149,15 @@ Status BatchNormRewriterVisitor::HandleBatchNormTraining( if (!rewrite_training_op_) { return Status::OK(); } + + std::vector added_instructions; + auto add = [&](std::unique_ptr inst) { + HloInstruction* added_inst = computation_->AddInstruction(std::move(inst)); + added_instructions.push_back(added_inst); + return added_inst; + }; + int64 instruction_count_before = computation_->instruction_count(); + // Expand batch norm training into smaller HLO ops. HloInstruction* operand = batch_norm->mutable_operand(0); const Shape operand_shape = operand->shape(); @@ -160,7 +169,7 @@ Status BatchNormRewriterVisitor::HandleBatchNormTraining( Literal::CreateR0(size_in_elements / feature_count); TF_ASSIGN_OR_RETURN(elements_per_feature_literal, elements_per_feature_literal->Convert(ptype)); - auto elements_per_feature = computation_->AddInstruction( + auto elements_per_feature = add( HloInstruction::CreateConstant(std::move(elements_per_feature_literal))); HloInstruction* scale = batch_norm->mutable_operand(1); @@ -169,14 +178,12 @@ Status BatchNormRewriterVisitor::HandleBatchNormTraining( auto zero_literal = Literal::CreateR0(0.0f); TF_ASSIGN_OR_RETURN(zero_literal, zero_literal->Convert(ptype)); - auto zero = computation_->AddInstruction( - HloInstruction::CreateConstant(std::move(zero_literal))); + auto zero = add(HloInstruction::CreateConstant(std::move(zero_literal))); auto epsilon_literal = Literal::CreateR0(batch_norm->epsilon()); TF_ASSIGN_OR_RETURN(epsilon_literal, epsilon_literal->Convert(ptype)); - auto epsilon = computation_->AddInstruction( - HloInstruction::CreateConstant(std::move(epsilon_literal))); - + auto epsilon = + add(HloInstruction::CreateConstant(std::move(epsilon_literal))); std::vector dimensions_without_feature; for (int64 i = 0; i < ShapeUtil::Rank(operand_shape); ++i) { @@ -185,105 +192,110 @@ Status BatchNormRewriterVisitor::HandleBatchNormTraining( } } - auto scale_broadcasted = computation_->AddInstruction( + auto scale_broadcasted = add( HloInstruction::CreateBroadcast(operand_shape, scale, {feature_index})); - auto offset_broadcasted = computation_->AddInstruction( + auto offset_broadcasted = add( HloInstruction::CreateBroadcast(operand_shape, offset, {feature_index})); HloComputation* add_reduce_computation = GetScalarBinaryComputation(ptype, HloOpcode::kAdd); // X^2. - auto operand_squared = - computation_->AddInstruction(HloInstruction::CreateBinary( - operand_shape, HloOpcode::kMultiply, operand, operand)); + auto operand_squared = add(HloInstruction::CreateBinary( + operand_shape, HloOpcode::kMultiply, operand, operand)); // Sum[X]. - auto sum = computation_->AddInstruction(HloInstruction::CreateReduce( - feature_shape, operand, zero, dimensions_without_feature, - add_reduce_computation)); + auto sum = add(HloInstruction::CreateReduce(feature_shape, operand, zero, + dimensions_without_feature, + add_reduce_computation)); // Sum[X^2]. - auto squared_sum = computation_->AddInstruction(HloInstruction::CreateReduce( + auto squared_sum = add(HloInstruction::CreateReduce( feature_shape, operand_squared, zero, dimensions_without_feature, add_reduce_computation)); // Fuse two parallel reduces together to improve performance. - if (use_fusion_) { - auto tuple = computation_->AddInstruction( - HloInstruction::CreateTuple({sum, squared_sum})); + if (use_fusion_ && !batch_norm->has_sharding()) { + auto tuple = add(HloInstruction::CreateTuple({sum, squared_sum})); auto fused = computation_->CreateFusionInstruction( {tuple, sum, squared_sum, operand_squared}, HloInstruction::FusionKind::kInput); - sum = computation_->AddInstruction( - HloInstruction::CreateGetTupleElement(feature_shape, fused, 0)); + sum = add(HloInstruction::CreateGetTupleElement(feature_shape, fused, 0)); - squared_sum = computation_->AddInstruction( - HloInstruction::CreateGetTupleElement(feature_shape, fused, 1)); + squared_sum = + add(HloInstruction::CreateGetTupleElement(feature_shape, fused, 1)); } // E[X]. - auto mean = computation_->AddInstruction(HloInstruction::CreateBinary( + auto mean = add(HloInstruction::CreateBinary( feature_shape, HloOpcode::kDivide, sum, elements_per_feature)); - auto mean_broadcasted = computation_->AddInstruction( + auto mean_broadcasted = add( HloInstruction::CreateBroadcast(operand_shape, mean, {feature_index})); // E[X^2]. - auto square_mean = computation_->AddInstruction(HloInstruction::CreateBinary( + auto square_mean = add(HloInstruction::CreateBinary( feature_shape, HloOpcode::kDivide, squared_sum, elements_per_feature)); // E^2[X]. - auto mean_square = computation_->AddInstruction(HloInstruction::CreateBinary( + auto mean_square = add(HloInstruction::CreateBinary( feature_shape, HloOpcode::kMultiply, mean, mean)); // Var[X]. - auto var = computation_->AddInstruction(HloInstruction::CreateBinary( + auto var = add(HloInstruction::CreateBinary( feature_shape, HloOpcode::kSubtract, square_mean, mean_square)); - auto var_broadcasted = computation_->AddInstruction( - HloInstruction::CreateBroadcast(operand_shape, var, {feature_index})); + auto var_broadcasted = + add(HloInstruction::CreateBroadcast(operand_shape, var, {feature_index})); // Var[X] + epsilon. - auto var_add_epsilon = - computation_->AddInstruction(HloInstruction::CreateBinary( - operand_shape, HloOpcode::kAdd, var_broadcasted, epsilon)); + auto var_add_epsilon = add(HloInstruction::CreateBinary( + operand_shape, HloOpcode::kAdd, var_broadcasted, epsilon)); auto neg_half_literal = Literal::CreateR0(-0.5f); TF_ASSIGN_OR_RETURN(neg_half_literal, neg_half_literal->Convert(ptype)); - auto neg_half = computation_->AddInstruction( - HloInstruction::CreateConstant(std::move(neg_half_literal))); + auto neg_half = + add(HloInstruction::CreateConstant(std::move(neg_half_literal))); // 1 / Sqrt[Var[X] + epsilon]. - auto rsqrt_var_add_epsilon = - computation_->AddInstruction(HloInstruction::CreateBinary( - operand_shape, HloOpcode::kPower, var_add_epsilon, neg_half)); + auto rsqrt_var_add_epsilon = add(HloInstruction::CreateBinary( + operand_shape, HloOpcode::kPower, var_add_epsilon, neg_half)); // X - E[X]. - auto operand_minus_mean = - computation_->AddInstruction(HloInstruction::CreateBinary( - operand_shape, HloOpcode::kSubtract, operand, mean_broadcasted)); + auto operand_minus_mean = add(HloInstruction::CreateBinary( + operand_shape, HloOpcode::kSubtract, operand, mean_broadcasted)); // (X - E[X]) / Sqrt[Var[X] + epsilon]. - auto normalized = computation_->AddInstruction( + auto normalized = add( HloInstruction::CreateBinary(operand_shape, HloOpcode::kMultiply, operand_minus_mean, rsqrt_var_add_epsilon)); // (X - E[X]) / Sqrt[Var[X] + epsilon] * scale. - auto scaled_normalized = - computation_->AddInstruction(HloInstruction::CreateBinary( - operand_shape, HloOpcode::kMultiply, normalized, scale_broadcasted)); + auto scaled_normalized = add(HloInstruction::CreateBinary( + operand_shape, HloOpcode::kMultiply, normalized, scale_broadcasted)); // (X - E[X]) / Sqrt[Var[X] + epsilon] * scale + offset. - auto shifted_normalized = computation_->AddInstruction( - HloInstruction::CreateBinary(operand_shape, HloOpcode::kAdd, - scaled_normalized, offset_broadcasted)); - - TF_CHECK_OK(ReplaceWithNewInstruction( - batch_norm, - HloInstruction::CreateTuple({shifted_normalized, mean, var}))); + auto shifted_normalized = add(HloInstruction::CreateBinary( + operand_shape, HloOpcode::kAdd, scaled_normalized, offset_broadcasted)); + + auto tuple = HloInstruction::CreateTuple({shifted_normalized, mean, var}); + + if (batch_norm->has_sharding()) { + int64 instruction_count_after = computation_->instruction_count(); + CHECK_EQ(instruction_count_after, + instruction_count_before + added_instructions.size()); + for (HloInstruction* inst : added_instructions) { + if (ShapeUtil::Equal(inst->shape(), operand_shape)) { + inst->set_sharding(batch_norm->sharding()); + } else { + inst->set_sharding(HloSharding::Replicate()); + } + } + tuple->set_sharding(batch_norm->sharding()); + } + TF_CHECK_OK(ReplaceWithNewInstruction(batch_norm, std::move(tuple))); return Status::OK(); } @@ -317,52 +329,69 @@ Status BatchNormRewriterVisitor::HandleBatchNormInference( } } - auto scale_broadcasted = computation_->AddInstruction( + std::vector added_instructions; + auto add = [&](std::unique_ptr inst) { + HloInstruction* added_inst = computation_->AddInstruction(std::move(inst)); + added_instructions.push_back(added_inst); + return added_inst; + }; + int64 instruction_count_before = computation_->instruction_count(); + + auto scale_broadcasted = add( HloInstruction::CreateBroadcast(operand_shape, scale, {feature_index})); - auto offset_broadcasted = computation_->AddInstruction( + auto offset_broadcasted = add( HloInstruction::CreateBroadcast(operand_shape, offset, {feature_index})); - auto mean_broadcasted = computation_->AddInstruction( + auto mean_broadcasted = add( HloInstruction::CreateBroadcast(operand_shape, mean, {feature_index})); - auto var_broadcasted = computation_->AddInstruction( - HloInstruction::CreateBroadcast(operand_shape, var, {feature_index})); + auto var_broadcasted = + add(HloInstruction::CreateBroadcast(operand_shape, var, {feature_index})); // Var[X] + epsilon. - auto var_add_epsilon = - computation_->AddInstruction(HloInstruction::CreateBinary( - operand_shape, HloOpcode::kAdd, var_broadcasted, epsilon)); + auto var_add_epsilon = add(HloInstruction::CreateBinary( + operand_shape, HloOpcode::kAdd, var_broadcasted, epsilon)); auto neg_half_literal = Literal::CreateR0(-0.5f); TF_ASSIGN_OR_RETURN(neg_half_literal, neg_half_literal->Convert(ptype)); - auto neg_half = computation_->AddInstruction( - HloInstruction::CreateConstant(std::move(neg_half_literal))); + auto neg_half = + add(HloInstruction::CreateConstant(std::move(neg_half_literal))); // 1 / Sqrt[Var[X] + epsilon]. - auto rsqrt_var_add_epsilon = - computation_->AddInstruction(HloInstruction::CreateBinary( - operand_shape, HloOpcode::kPower, var_add_epsilon, neg_half)); + auto rsqrt_var_add_epsilon = add(HloInstruction::CreateBinary( + operand_shape, HloOpcode::kPower, var_add_epsilon, neg_half)); // X - E[X]. - auto operand_minus_mean = - computation_->AddInstruction(HloInstruction::CreateBinary( - operand_shape, HloOpcode::kSubtract, operand, mean_broadcasted)); + auto operand_minus_mean = add(HloInstruction::CreateBinary( + operand_shape, HloOpcode::kSubtract, operand, mean_broadcasted)); // (X - E[X]) / Sqrt[Var[X] + epsilon]. - auto normalized = computation_->AddInstruction( + auto normalized = add( HloInstruction::CreateBinary(operand_shape, HloOpcode::kMultiply, operand_minus_mean, rsqrt_var_add_epsilon)); // (X - E[X]) / Sqrt[Var[X] + epsilon] * scale. - auto scaled_normalized = - computation_->AddInstruction(HloInstruction::CreateBinary( - operand_shape, HloOpcode::kMultiply, normalized, scale_broadcasted)); + auto scaled_normalized = add(HloInstruction::CreateBinary( + operand_shape, HloOpcode::kMultiply, normalized, scale_broadcasted)); // (X - E[X]) / Sqrt[Var[X] + epsilon] * scale + offset. auto shifted_normalized = HloInstruction::CreateBinary( operand_shape, HloOpcode::kAdd, scaled_normalized, offset_broadcasted); + int64 instruction_count_after = computation_->instruction_count(); + CHECK_EQ(instruction_count_after, + instruction_count_before + added_instructions.size()); + if (batch_norm->has_sharding()) { + for (HloInstruction* inst : added_instructions) { + if (ShapeUtil::Equal(inst->shape(), operand_shape)) { + inst->set_sharding(batch_norm->sharding()); + } else { + inst->set_sharding(HloSharding::Replicate()); + } + } + shifted_normalized->set_sharding(batch_norm->sharding()); + } TF_CHECK_OK( ReplaceWithNewInstruction(batch_norm, std::move(shifted_normalized))); return Status::OK(); @@ -385,6 +414,13 @@ Status BatchNormRewriterVisitor::HandleBatchNormGrad( if (!rewrite_grad_op_) { return Status::OK(); } + std::vector added_instructions; + auto add = [&](std::unique_ptr inst) { + HloInstruction* added_inst = computation_->AddInstruction(std::move(inst)); + added_instructions.push_back(added_inst); + return added_inst; + }; + int64 instruction_count_before = computation_->instruction_count(); HloInstruction* activation = batch_norm->mutable_operand(0); const Shape activation_shape = activation->shape(); @@ -403,23 +439,22 @@ Status BatchNormRewriterVisitor::HandleBatchNormGrad( Literal::CreateR0(size_in_elements / feature_count); TF_ASSIGN_OR_RETURN(elements_per_feature_literal, elements_per_feature_literal->Convert(ptype)); - auto elements_per_feature = computation_->AddInstruction( + auto elements_per_feature = add( HloInstruction::CreateConstant(std::move(elements_per_feature_literal))); auto zero_literal = Literal::CreateR0(0.0f); TF_ASSIGN_OR_RETURN(zero_literal, zero_literal->Convert(ptype)); - auto zero = computation_->AddInstruction( - HloInstruction::CreateConstant(std::move(zero_literal))); + auto zero = add(HloInstruction::CreateConstant(std::move(zero_literal))); auto neg_half_literal = Literal::CreateR0(-0.5f); TF_ASSIGN_OR_RETURN(neg_half_literal, neg_half_literal->Convert(ptype)); - auto neg_half = computation_->AddInstruction( - HloInstruction::CreateConstant(std::move(neg_half_literal))); + auto neg_half = + add(HloInstruction::CreateConstant(std::move(neg_half_literal))); auto epsilon_literal = Literal::CreateR0(batch_norm->epsilon()); TF_ASSIGN_OR_RETURN(epsilon_literal, epsilon_literal->Convert(ptype)); - auto epsilon = computation_->AddInstruction( - HloInstruction::CreateConstant(std::move(epsilon_literal))); + auto epsilon = + add(HloInstruction::CreateConstant(std::move(epsilon_literal))); std::vector dimensions_without_feature; @@ -429,126 +464,131 @@ Status BatchNormRewriterVisitor::HandleBatchNormGrad( } } - auto scale_broadcasted = - computation_->AddInstruction(HloInstruction::CreateBroadcast( - activation_shape, scale, {feature_index})); - auto variance_broadcasted = - computation_->AddInstruction(HloInstruction::CreateBroadcast( - activation_shape, variance, {feature_index})); + auto scale_broadcasted = add(HloInstruction::CreateBroadcast( + activation_shape, scale, {feature_index})); + auto variance_broadcasted = add(HloInstruction::CreateBroadcast( + activation_shape, variance, {feature_index})); // E[X]. - auto mean_broadcasted = computation_->AddInstruction( + auto mean_broadcasted = add( HloInstruction::CreateBroadcast(activation_shape, mean, {feature_index})); // rsqrt[Var[X] + epsilon]. - auto rsqrt_var_add_epsilon_broadcasted = - computation_->AddInstruction(HloInstruction::CreateBinary( - activation_shape, HloOpcode::kPower, - computation_->AddInstruction( - HloInstruction::CreateBinary(activation_shape, HloOpcode::kAdd, - variance_broadcasted, epsilon)), - neg_half)); - - auto rsqrt_var_add_epsilon = - computation_->AddInstruction(HloInstruction::CreateBinary( - feature_shape, HloOpcode::kPower, - computation_->AddInstruction(HloInstruction::CreateBinary( - feature_shape, HloOpcode::kAdd, variance, epsilon)), - neg_half)); + auto rsqrt_var_add_epsilon_broadcasted = add(HloInstruction::CreateBinary( + activation_shape, HloOpcode::kPower, + add(HloInstruction::CreateBinary(activation_shape, HloOpcode::kAdd, + variance_broadcasted, epsilon)), + neg_half)); + + auto rsqrt_var_add_epsilon = add(HloInstruction::CreateBinary( + feature_shape, HloOpcode::kPower, + add(HloInstruction::CreateBinary(feature_shape, HloOpcode::kAdd, variance, + epsilon)), + neg_half)); // X - E[X]. - auto activation_minus_mean = computation_->AddInstruction( - HloInstruction::CreateBinary(activation_shape, HloOpcode::kSubtract, - activation, mean_broadcasted)); + auto activation_minus_mean = add(HloInstruction::CreateBinary( + activation_shape, HloOpcode::kSubtract, activation, mean_broadcasted)); // Grad[Y] * (X - E[X]). - auto grad_output_times_activiation_minus_mean = computation_->AddInstruction( - HloInstruction::CreateBinary(activation_shape, HloOpcode::kMultiply, - grad_output, activation_minus_mean)); + auto grad_output_times_activiation_minus_mean = + add(HloInstruction::CreateBinary(activation_shape, HloOpcode::kMultiply, + grad_output, activation_minus_mean)); HloComputation* add_reduce_computation = GetScalarBinaryComputation(ptype, HloOpcode::kAdd); // sum(Grad[Y] * (X - E[X])). auto sum_grad_output_times_activiation_minus_mean = - computation_->AddInstruction(HloInstruction::CreateReduce( + add(HloInstruction::CreateReduce( feature_shape, grad_output_times_activiation_minus_mean, zero, dimensions_without_feature, add_reduce_computation)); // Grad[beta] = Sum(Grad[Y]). - auto grad_beta = computation_->AddInstruction(HloInstruction::CreateReduce( + auto grad_beta = add(HloInstruction::CreateReduce( feature_shape, grad_output, zero, dimensions_without_feature, add_reduce_computation)); - if (use_fusion_) { - auto tuple = computation_->AddInstruction(HloInstruction::CreateTuple( + if (use_fusion_ && !batch_norm->has_sharding()) { + auto tuple = add(HloInstruction::CreateTuple( {sum_grad_output_times_activiation_minus_mean, grad_beta})); auto fused = computation_->CreateFusionInstruction( {tuple, sum_grad_output_times_activiation_minus_mean, grad_beta}, HloInstruction::FusionKind::kInput); - sum_grad_output_times_activiation_minus_mean = computation_->AddInstruction( - HloInstruction::CreateGetTupleElement(feature_shape, fused, 0)); + sum_grad_output_times_activiation_minus_mean = + add(HloInstruction::CreateGetTupleElement(feature_shape, fused, 0)); - grad_beta = computation_->AddInstruction( - HloInstruction::CreateGetTupleElement(feature_shape, fused, 1)); + grad_beta = + add(HloInstruction::CreateGetTupleElement(feature_shape, fused, 1)); } // Grad[scale] = Sum(Grad[Y] * (X - E[X]) * rsqrt[Var[X] + epsilon]). - auto grad_scale = computation_->AddInstruction(HloInstruction::CreateBinary( + auto grad_scale = add(HloInstruction::CreateBinary( feature_shape, HloOpcode::kMultiply, sum_grad_output_times_activiation_minus_mean, rsqrt_var_add_epsilon)); // I2 = Sum(Grad[Y]) - auto I2 = computation_->AddInstruction(HloInstruction::CreateBroadcast( - activation_shape, grad_beta, {feature_index})); + auto i2 = add(HloInstruction::CreateBroadcast(activation_shape, grad_beta, + {feature_index})); // I3 = Sum(Grad[Y] * (X - E[X])) - auto I3 = computation_->AddInstruction(HloInstruction::CreateBroadcast( + auto i3 = add(HloInstruction::CreateBroadcast( activation_shape, sum_grad_output_times_activiation_minus_mean, {feature_index})); // I4 = (X - E[X]) * I3 - auto I4 = computation_->AddInstruction(HloInstruction::CreateBinary( - activation_shape, HloOpcode::kMultiply, I3, activation_minus_mean)); + auto i4 = add(HloInstruction::CreateBinary( + activation_shape, HloOpcode::kMultiply, i3, activation_minus_mean)); // I5 = I4 / (Var[X] + epsilon) - auto I5 = computation_->AddInstruction(HloInstruction::CreateBinary( - activation_shape, HloOpcode::kDivide, I4, - computation_->AddInstruction(HloInstruction::CreateBinary( - activation_shape, HloOpcode::kAdd, variance_broadcasted, epsilon)))); + auto i5 = add(HloInstruction::CreateBinary( + activation_shape, HloOpcode::kDivide, i4, + add(HloInstruction::CreateBinary(activation_shape, HloOpcode::kAdd, + variance_broadcasted, epsilon)))); // scale * rsqrt[Var[X] + epsilon] * 1/N - auto scale_times_rsqrt_var_add_epsilon = - computation_->AddInstruction(HloInstruction::CreateBinary( - activation_shape, HloOpcode::kMultiply, scale_broadcasted, - rsqrt_var_add_epsilon_broadcasted)); + auto scale_times_rsqrt_var_add_epsilon = add(HloInstruction::CreateBinary( + activation_shape, HloOpcode::kMultiply, scale_broadcasted, + rsqrt_var_add_epsilon_broadcasted)); - scale_times_rsqrt_var_add_epsilon = - computation_->AddInstruction(HloInstruction::CreateBinary( - activation_shape, HloOpcode::kDivide, - scale_times_rsqrt_var_add_epsilon, elements_per_feature)); + scale_times_rsqrt_var_add_epsilon = add(HloInstruction::CreateBinary( + activation_shape, HloOpcode::kDivide, scale_times_rsqrt_var_add_epsilon, + elements_per_feature)); - auto I1 = computation_->AddInstruction( - HloInstruction::CreateBinary(activation_shape, HloOpcode::kMultiply, - grad_output, elements_per_feature)); + auto i1 = + add(HloInstruction::CreateBinary(activation_shape, HloOpcode::kMultiply, + grad_output, elements_per_feature)); // I6 = I1 - I2 - I5 - auto I6 = computation_->AddInstruction(HloInstruction::CreateBinary( + auto i6 = add(HloInstruction::CreateBinary( activation_shape, HloOpcode::kSubtract, - computation_->AddInstruction(HloInstruction::CreateBinary( - activation_shape, HloOpcode::kSubtract, I1, I2)), - I5)); + add(HloInstruction::CreateBinary(activation_shape, HloOpcode::kSubtract, + i1, i2)), + i5)); // Grad[X] = scale * rsqrt[Var[X] + epsilon] * 1/N * I6. - auto grad_activation = computation_->AddInstruction( - HloInstruction::CreateBinary(activation_shape, HloOpcode::kMultiply, - scale_times_rsqrt_var_add_epsilon, I6)); + auto grad_activation = + add(HloInstruction::CreateBinary(activation_shape, HloOpcode::kMultiply, + scale_times_rsqrt_var_add_epsilon, i6)); + auto tuple = + HloInstruction::CreateTuple({grad_activation, grad_scale, grad_beta}); + if (batch_norm->has_sharding()) { + int64 instruction_count_after = computation_->instruction_count(); + CHECK_EQ(instruction_count_after, + instruction_count_before + added_instructions.size()); + for (HloInstruction* inst : added_instructions) { + if (ShapeUtil::Equal(inst->shape(), activation_shape)) { + inst->set_sharding(batch_norm->sharding()); + } else { + inst->set_sharding(HloSharding::Replicate()); + } + } + tuple->set_sharding(batch_norm->sharding()); + } - TF_CHECK_OK(ReplaceWithNewInstruction( - batch_norm, - HloInstruction::CreateTuple({grad_activation, grad_scale, grad_beta}))); + TF_CHECK_OK(ReplaceWithNewInstruction(batch_norm, std::move(tuple))); return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc index d1adec31c2..447c244666 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding.cc @@ -246,7 +246,8 @@ Status HloSharding::ValidateNonTuple(const Shape& shape, // The tile rank must be the same as the input rank. if (ShapeUtil::Rank(shape) != ShapeUtil::Rank(tile_shape_)) { return tensorflow::errors::InvalidArgument( - "Tile rank is different to the input rank"); + "Tile rank is different to the input rank. sharding=", ToString(), + ", input_shape=", ShapeUtil::HumanString(shape)); } // The tile shape must not be the same as the input shape without maximal_ -- GitLab From 4b60f92986ef1a3e4456aa26911df449a68251a5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Nov 2017 04:40:56 -0800 Subject: [PATCH 0931/1801] Change "safe pointers" to make the deleters stateless (i.e. a type, not a value). This decreases the size of each safe pointer object and allows more inlining of the destructor. PiperOrigin-RevId: 177292948 --- tensorflow/python/lib/core/safe_ptr.cc | 16 ++++------ tensorflow/python/lib/core/safe_ptr.h | 42 +++++++++++++++++--------- 2 files changed, 33 insertions(+), 25 deletions(-) diff --git a/tensorflow/python/lib/core/safe_ptr.cc b/tensorflow/python/lib/core/safe_ptr.cc index 456ea3348b..ce34b6d004 100644 --- a/tensorflow/python/lib/core/safe_ptr.cc +++ b/tensorflow/python/lib/core/safe_ptr.cc @@ -16,25 +16,21 @@ limitations under the License. #include "tensorflow/python/lib/core/safe_ptr.h" namespace tensorflow { -namespace { -inline void Py_DECREF_wrapper(PyObject* o) { Py_DECREF(o); } - -} // namespace - -Safe_PyObjectPtr make_safe(PyObject* o) { - return Safe_PyObjectPtr(o, Py_DECREF_wrapper); +Safe_PyObjectPtr make_safe(PyObject* object) { + return Safe_PyObjectPtr(object); } Safe_TF_TensorPtr make_safe(TF_Tensor* tensor) { - return Safe_TF_TensorPtr(tensor, TF_DeleteTensor); + return Safe_TF_TensorPtr(tensor); } Safe_TFE_TensorHandlePtr make_safe(TFE_TensorHandle* handle) { - return Safe_TFE_TensorHandlePtr(handle, TFE_DeleteTensorHandle); + return Safe_TFE_TensorHandlePtr(handle); } Safe_TF_StatusPtr make_safe(TF_Status* status) { - return Safe_TF_StatusPtr(status, TF_DeleteStatus); + return Safe_TF_StatusPtr(status); } + } // namespace tensorflow diff --git a/tensorflow/python/lib/core/safe_ptr.h b/tensorflow/python/lib/core/safe_ptr.h index 70cd2fdf6c..80db840aeb 100644 --- a/tensorflow/python/lib/core/safe_ptr.h +++ b/tensorflow/python/lib/core/safe_ptr.h @@ -17,39 +17,51 @@ limitations under the License. #define THIRD_PARTY_TENSORFLOW_PYTHON_LIB_CORE_SAFE_PTR_H_ #include -#include +#include #include "tensorflow/c/c_api.h" #include "tensorflow/c/eager/c_api.h" namespace tensorflow { +namespace detail { + +struct PyDecrefDeleter { + void operator()(PyObject* p) const { Py_DECREF(p); } +}; + +struct TFTensorDeleter { + void operator()(TF_Tensor* p) const { TF_DeleteTensor(p); } +}; + +struct TFETensorHandleDeleter { + void operator()(TFE_TensorHandle* p) const { TFE_DeleteTensorHandle(p); } +}; + +struct TFStatusDeleter { + void operator()(TF_Status* p) const { TF_DeleteStatus(p); } +}; + +} // namespace detail // Safe container for an owned PyObject. On destruction, the reference count of // the contained object will be decremented. -typedef void (*Py_DECREF_wrapper_type)(PyObject*); -typedef std::unique_ptr Safe_PyObjectPtr; +using Safe_PyObjectPtr = std::unique_ptr; Safe_PyObjectPtr make_safe(PyObject* o); // Safe containers for an owned TF_Tensor. On destruction, the tensor will be // deleted by TF_DeleteTensor. -// Note: can't use decltype(&TF_DeleteTensor) due to SWIG -typedef void (*TF_DeleteTensor_type)(TF_Tensor*); -typedef std::unique_ptr Safe_TF_TensorPtr; +using Safe_TF_TensorPtr = std::unique_ptr; Safe_TF_TensorPtr make_safe(TF_Tensor* tensor); // Safe containers for an owned TFE_TensorHandle. On destruction, the handle -// will be deleted by TFE_DeleteTensorHandle. Note: can't use -// decltype(&TFE_DeleteTensorHandle) due to SWIG -typedef void (*TFE_DeleteTensorHandle_type)(TFE_TensorHandle*); -typedef std::unique_ptr - Safe_TFE_TensorHandlePtr; +// will be deleted by TFE_DeleteTensorHandle. +using Safe_TFE_TensorHandlePtr = + std::unique_ptr; Safe_TFE_TensorHandlePtr make_safe(TFE_TensorHandle* handle); // Safe containers for an owned TF_Status. On destruction, the handle -// will be deleted by TF_DeleteStatus. Note: can't use -// decltype(&TF_DeleteStatus) due to SWIG -typedef void (*TF_DeleteStatus_type)(TF_Status*); -typedef std::unique_ptr Safe_TF_StatusPtr; +// will be deleted by TF_DeleteStatus. +using Safe_TF_StatusPtr = std::unique_ptr; Safe_TF_StatusPtr make_safe(TF_Status* status); } // namespace tensorflow -- GitLab From e6d823dd19a5768d0dcd651c14a6ebf4bb023180 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Nov 2017 07:32:31 -0800 Subject: [PATCH 0932/1801] Make NCCL code ready for NVIDIA's NCCL 2. PiperOrigin-RevId: 177306507 --- .../contrib/nccl/kernels/nccl_manager.cc | 32 +++++++++++++++++-- .../contrib/nccl/kernels/nccl_manager_test.cc | 2 ++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager.cc b/tensorflow/contrib/nccl/kernels/nccl_manager.cc index 31a35b0d53..913935b382 100644 --- a/tensorflow/contrib/nccl/kernels/nccl_manager.cc +++ b/tensorflow/contrib/nccl/kernels/nccl_manager.cc @@ -258,9 +258,37 @@ NcclManager::Communicator* NcclManager::GetCommunicator( devices[i] = collective->participants[i]->gpu_device_id; } + int device_count = num_devices; +#if NCCL_MAJOR >= 2 + // NCCL2 prevents InitAll for more communicators than devices (but doesn't + // check that device ids are unique). Work around it by initializing each + // rank individually. + cudaGetDeviceCount(&device_count); +#endif std::vector nccl_comms(num_devices); - auto result = ncclCommInitAll(nccl_comms.data(), num_devices, devices.data()); - CHECK_EQ(result, ncclSuccess) << ncclGetErrorString(result); + if (num_devices <= device_count) { + auto result = + ncclCommInitAll(nccl_comms.data(), num_devices, devices.data()); + CHECK_EQ(result, ncclSuccess) << ncclGetErrorString(result); + } else { + int savedDevice = 0; + CHECK_EQ(cudaGetDevice(&savedDevice), cudaSuccess); + ncclUniqueId commId; + ncclGetUniqueId(&commId); +#if NCCL_MAJOR >= 2 + CHECK_EQ(ncclGroupStart(), ncclSuccess); +#endif + for (int rank = 0; rank < num_devices; ++rank) { + cudaSetDevice(devices[rank]); + auto result = + ncclCommInitRank(nccl_comms.data() + rank, num_devices, commId, rank); + CHECK_EQ(result, ncclSuccess) << ncclGetErrorString(result); + } +#if NCCL_MAJOR >= 2 + CHECK_EQ(ncclGroupEnd(), ncclSuccess); +#endif + cudaSetDevice(savedDevice); + } for (int rank = 0; rank < num_devices; ++rank) { members[rank].nccl_comm = nccl_comms[rank]; } diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc b/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc index 505c4b0d71..abafe4b407 100644 --- a/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc +++ b/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc @@ -30,6 +30,8 @@ namespace tensorflow { static std::vector GetGPUDevices() { std::vector devices; SessionOptions session_options; + session_options.config.mutable_gpu_options() + ->set_per_process_gpu_memory_fraction(0.1); session_options.env = Env::Default(); Status s = DeviceFactory::GetFactory(DEVICE_GPU) ->AddDevices(session_options, "", &devices); -- GitLab From 136ba0a2f80262816434abdde6fcd3a729aa3437 Mon Sep 17 00:00:00 2001 From: Simone Cirillo Date: Wed, 29 Nov 2017 16:51:18 +0100 Subject: [PATCH 0933/1801] Add layer scope to tf.contrib.layers.spatial_softmax (#14287) --- .../contrib/layers/python/layers/layers.py | 91 ++++++++++--------- 1 file changed, 47 insertions(+), 44 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index 623f1153b6..6cd586a5f0 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -2651,51 +2651,54 @@ def spatial_softmax(features, ValueError: If unexpected data_format specified. ValueError: If num_channels dimension is unspecified. """ - shape = array_ops.shape(features) - static_shape = features.shape - if data_format == DATA_FORMAT_NHWC: - height, width, num_channels = shape[1], shape[2], static_shape[3] - elif data_format == DATA_FORMAT_NCHW: - num_channels, height, width = static_shape[1], shape[2], shape[3] - else: - raise ValueError('data_format has to be either NCHW or NHWC.') - if num_channels.value is None: - raise ValueError('The num_channels dimension of the inputs to ' - '`spatial_softmax` should be defined. Found `None`.') - - with ops.name_scope(name, 'spatial_softmax', [features]) as name: - # Create tensors for x and y coordinate values, scaled to range [-1, 1]. - pos_x, pos_y = array_ops.meshgrid(math_ops.lin_space(-1., 1., num=height), - math_ops.lin_space(-1., 1., num=width), - indexing='ij') - pos_x = array_ops.reshape(pos_x, [height * width]) - pos_y = array_ops.reshape(pos_y, [height * width]) - if temperature is None: - temperature_collections = utils.get_variable_collections( - variables_collections, 'temperature') - temperature = variables.model_variable( - 'temperature', - shape=(), - dtype=dtypes.float32, - initializer=init_ops.ones_initializer(), - collections=temperature_collections, - trainable=trainable) - if data_format == 'NCHW': - features = array_ops.reshape(features, [-1, height * width]) + with variable_scope.variable_scope(name, 'spatial_softmax'): + shape = array_ops.shape(features) + static_shape = features.shape + if data_format == DATA_FORMAT_NHWC: + height, width, num_channels = shape[1], shape[2], static_shape[3] + elif data_format == DATA_FORMAT_NCHW: + num_channels, height, width = static_shape[1], shape[2], shape[3] else: - features = array_ops.reshape( - array_ops.transpose(features, [0, 3, 1, 2]), [-1, height * width]) - - softmax_attention = nn.softmax(features/temperature) - expected_x = math_ops.reduce_sum( - pos_x * softmax_attention, [1], keep_dims=True) - expected_y = math_ops.reduce_sum( - pos_y * softmax_attention, [1], keep_dims=True) - expected_xy = array_ops.concat([expected_x, expected_y], 1) - feature_keypoints = array_ops.reshape( - expected_xy, [-1, num_channels.value * 2]) - feature_keypoints.set_shape([None, num_channels.value * 2]) - return feature_keypoints + raise ValueError('data_format has to be either NCHW or NHWC.') + if num_channels.value is None: + raise ValueError('The num_channels dimension of the inputs to ' + '`spatial_softmax` should be defined. Found `None`.') + + with ops.name_scope('spatial_softmax_op', 'spatial_softmax_op', [features]): + # Create tensors for x and y coordinate values, scaled to range [-1, 1]. + pos_x, pos_y = array_ops.meshgrid(math_ops.lin_space(-1., 1., num=height), + math_ops.lin_space(-1., 1., num=width), + indexing='ij') + pos_x = array_ops.reshape(pos_x, [height * width]) + pos_y = array_ops.reshape(pos_y, [height * width]) + if temperature is None: + temperature_collections = utils.get_variable_collections( + variables_collections, 'temperature') + temperature = variables.model_variable( + 'temperature', + shape=(), + dtype=dtypes.float32, + initializer=init_ops.ones_initializer(), + collections=temperature_collections, + trainable=trainable) + if data_format == 'NCHW': + features = array_ops.reshape(features, [-1, height * width]) + else: + features = array_ops.reshape( + array_ops.transpose(features, [0, 3, 1, 2]), [-1, height * width]) + + softmax_attention = nn.softmax(features/temperature) + expected_x = math_ops.reduce_sum( + pos_x * softmax_attention, [1], keep_dims=True) + expected_y = math_ops.reduce_sum( + pos_y * softmax_attention, [1], keep_dims=True) + expected_xy = array_ops.concat([expected_x, expected_y], 1) + feature_keypoints = array_ops.reshape( + expected_xy, [-1, num_channels.value * 2]) + feature_keypoints.set_shape([None, num_channels.value * 2]) + return feature_keypoints + + def stack(inputs, layer, stack_args, **kwargs): -- GitLab From f7015074d78f930e1e1a5458dd460a9ea181dff2 Mon Sep 17 00:00:00 2001 From: Jimmy Jia Date: Wed, 29 Nov 2017 10:51:45 -0500 Subject: [PATCH 0934/1801] Lazily configure TensorFlow logger (#10657) --- tensorflow/python/platform/tf_logging.py | 90 ++++++++++++++++-------- 1 file changed, 59 insertions(+), 31 deletions(-) diff --git a/tensorflow/python/platform/tf_logging.py b/tensorflow/python/platform/tf_logging.py index 71ee5e365f..85ed4f071c 100644 --- a/tensorflow/python/platform/tf_logging.py +++ b/tensorflow/python/platform/tf_logging.py @@ -30,64 +30,92 @@ from logging import ERROR from logging import FATAL from logging import INFO from logging import WARN +import threading import six from tensorflow.python.util.all_util import remove_undocumented -# Determine whether we are in an interactive environment -_interactive = False -try: - # This is only defined in interactive shells - if _sys.ps1: _interactive = True -except AttributeError: - # Even now, we may be in an interactive shell with `python -i`. - _interactive = _sys.flags.interactive +# Don't use this directly. Use _get_logger() instead. +_logger = None +_logger_lock = threading.Lock() -# Scope the tensorflow logger to not conflict with users' loggers -_logger = _logging.getLogger('tensorflow') -# If we are in an interactive environment (like jupyter), set loglevel to info -# and pipe the output to stdout -if _interactive: - _logger.setLevel(INFO) - _logging_target = _sys.stdout -else: - _logging_target = _sys.stderr +def _get_logger(): + global _logger -# Add the output handler -_handler = _logging.StreamHandler(_logging_target) -_handler.setFormatter(_logging.Formatter(_logging.BASIC_FORMAT, None)) -_logger.addHandler(_handler) + # Use double-checked locking to avoid taking lock unnecessarily. + if _logger: + return _logger + + _logger_lock.acquire() + + try: + if _logger: + return _logger + + # Scope the TensorFlow logger to not conflict with users' loggers. + logger = _logging.getLogger('tensorflow') + + # Don't further configure the TensorFlow logger if the root logger is + # already configured. This prevents double logging in those cases. + if not _logging.getLogger().handlers: + # Determine whether we are in an interactive environment + _interactive = False + try: + # This is only defined in interactive shells. + if _sys.ps1: _interactive = True + except AttributeError: + # Even now, we may be in an interactive shell with `python -i`. + _interactive = _sys.flags.interactive + + # If we are in an interactive environment (like Jupyter), set loglevel + # to INFO and pipe the output to stdout. + if _interactive: + logger.setLevel(INFO) + _logging_target = _sys.stdout + else: + _logging_target = _sys.stderr + + # Add the output handler. + _handler = _logging.StreamHandler(_logging_target) + _handler.setFormatter(_logging.Formatter(_logging.BASIC_FORMAT, None)) + logger.addHandler(_handler) + + _logger = logger + return _logger + + finally: + _logger_lock.release() def log(level, msg, *args, **kwargs): - _logger.log(level, msg, *args, **kwargs) + _get_logger().log(level, msg, *args, **kwargs) def debug(msg, *args, **kwargs): - _logger.debug(msg, *args, **kwargs) + _get_logger().debug(msg, *args, **kwargs) def error(msg, *args, **kwargs): - _logger.error(msg, *args, **kwargs) + _get_logger().error(msg, *args, **kwargs) def fatal(msg, *args, **kwargs): - _logger.fatal(msg, *args, **kwargs) + _get_logger().fatal(msg, *args, **kwargs) def info(msg, *args, **kwargs): - _logger.info(msg, *args, **kwargs) + _get_logger().info(msg, *args, **kwargs) def warn(msg, *args, **kwargs): - _logger.warn(msg, *args, **kwargs) + _get_logger().warn(msg, *args, **kwargs) def warning(msg, *args, **kwargs): - _logger.warning(msg, *args, **kwargs) + _get_logger().warning(msg, *args, **kwargs) _level_names = { @@ -118,7 +146,7 @@ def flush(): # Code below is taken from pyglib/logging def vlog(level, msg, *args, **kwargs): - _logger.log(level, msg, *args, **kwargs) + _get_logger().log(level, msg, *args, **kwargs) def _GetNextLogCountPerToken(token): @@ -225,12 +253,12 @@ def google2_log_prefix(level, timestamp=None, file_and_line=None): def get_verbosity(): """Return how much logging output will be produced.""" - return _logger.getEffectiveLevel() + return _get_logger().getEffectiveLevel() def set_verbosity(v): """Sets the threshold for what messages will be logged.""" - _logger.setLevel(v) + _get_logger().setLevel(v) def _get_thread_id(): -- GitLab From c39544a066eae78e4672467c3883b0158638f1a1 Mon Sep 17 00:00:00 2001 From: Andrei Nigmatulin Date: Wed, 29 Nov 2017 15:56:52 +0000 Subject: [PATCH 0935/1801] golang: ~2x speedup for encodeTensor() (#14427) --- tensorflow/go/tensor.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go index 1326a95278..bfd8660b92 100644 --- a/tensorflow/go/tensor.go +++ b/tensorflow/go/tensor.go @@ -328,6 +328,14 @@ func encodeTensor(w *bytes.Buffer, v reflect.Value, shape []int64) error { } } + // Optimisation: if only one dimension is left we can use binary.Write() directly for this slice + if len(shape) == 1 && v.Len() > 0 { + switch v.Index(0).Kind() { + case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: + return binary.Write(w, nativeEndian, v.Interface()) + } + } + subShape := shape[1:] for i := 0; i < v.Len(); i++ { err := encodeTensor(w, v.Index(i), subShape) -- GitLab From 6bbff8370fa4d00a0001d930f101d4507b6c0ad5 Mon Sep 17 00:00:00 2001 From: Andrei Nigmatulin Date: Wed, 29 Nov 2017 16:02:56 +0000 Subject: [PATCH 0936/1801] golang: ~15x speedup for decodeTensor() (#14912) --- tensorflow/go/tensor.go | 9 +++++++++ tensorflow/go/tensor_test.go | 20 ++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go index bfd8660b92..cd6f4bc1f0 100644 --- a/tensorflow/go/tensor.go +++ b/tensorflow/go/tensor.go @@ -368,6 +368,15 @@ func decodeTensor(r *bytes.Reader, shape []int64, typ reflect.Type, ptr reflect. case reflect.Slice: val := reflect.Indirect(ptr) val.Set(reflect.MakeSlice(typ, int(shape[0]), int(shape[0]))) + + // Optimization: if only one dimension is left we can use binary.Read() directly for this slice + if len(shape) == 1 && val.Len() > 0 { + switch val.Index(0).Kind() { + case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: + return binary.Read(r, nativeEndian, val.Interface()) + } + } + for i := 0; i < val.Len(); i++ { if err := decodeTensor(r, shape[1:], typ.Elem(), val.Index(i).Addr()); err != nil { return err diff --git a/tensorflow/go/tensor_test.go b/tensorflow/go/tensor_test.go index 674a8ce86f..793c36dd4d 100644 --- a/tensorflow/go/tensor_test.go +++ b/tensorflow/go/tensor_test.go @@ -243,3 +243,23 @@ func BenchmarkNewTensor(b *testing.B) { ) b.Run("[150528]", func(b *testing.B) { benchmarkNewTensor(b, vector) }) } + +func benchmarkDecodeTensor(b *testing.B, t *Tensor) { + for i := 0; i < b.N; i++ { + _ = t.Value() + } +} + +func BenchmarkDecodeTensor(b *testing.B) { + var ( + // Some sample sizes from the Inception image labeling model. + // Where input tensors correspond to a 224x224 RGB image + // flattened into a vector. + vector [224 * 224 * 3]int32 + ) + t, err := NewTensor(vector) + if err != nil { + b.Fatalf("(%v, %v)", t, err) + } + b.Run("[150528]", func(b *testing.B) { benchmarkDecodeTensor(b, t) }) +} -- GitLab From 4b7d79b6eaae375559ac32f0c8a8b46cd8825774 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Wed, 29 Nov 2017 08:08:44 -0800 Subject: [PATCH 0937/1801] Fix absl flag initialization in cloud_tpu_profiler (#14426) This fixes a regression caused by 2652704b576adc16b4d735f651cea1024e88b72e where the command would not run. See also: tensorflow/tensorboard#716 --- .../profiler/pip_package/cloud_tpu_profiler/main.py | 12 ++++-------- tensorflow/contrib/tpu/profiler/pip_package/setup.py | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py b/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py index 3bdd475fad..7970c20a26 100644 --- a/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py +++ b/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py @@ -24,22 +24,18 @@ import sys import tensorflow as tf - tf.flags.DEFINE_string('service_addr', '', 'Address of TPU profiler service e.g. localhost:8466') - - tf.flags.DEFINE_string('logdir', '', 'Path of TensorBoard log directory e.g. /tmp/tb_log') - - tf.flags.DEFINE_integer('duration_ms', 2000, 'Duration of tracing in ms.') - FLAGS = tf.flags.FLAGS +EXECUTABLE = 'data/capture_tpu_profile' -EXECUTABLE = 'data/capture_tpu_profile' +def run_main(): + tf.app.run(main) def main(unused_argv=None): @@ -54,4 +50,4 @@ def main(unused_argv=None): if __name__ == '__main__': - tf.app.run(main) + run_main() diff --git a/tensorflow/contrib/tpu/profiler/pip_package/setup.py b/tensorflow/contrib/tpu/profiler/pip_package/setup.py index e77cae4695..ee6950699e 100644 --- a/tensorflow/contrib/tpu/profiler/pip_package/setup.py +++ b/tensorflow/contrib/tpu/profiler/pip_package/setup.py @@ -23,7 +23,7 @@ from setuptools import setup _VERSION = '1.3.0-a1' CONSOLE_SCRIPTS = [ - 'capture_tpu_profile=cloud_tpu_profiler.main:main', + 'capture_tpu_profile=cloud_tpu_profiler.main:run_main', ] REQUIRED_PACKAGES = [ -- GitLab From 18a36a823141c675658d218fa78ed5e2bf19ea8c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Nov 2017 08:05:58 -0800 Subject: [PATCH 0938/1801] [XLA:CPU] Factor IR function building logic out of IrEmitter into its own file (no functional changes, just code movement). This will enable building parallel IR functions from other emitters, and remove the requirement that parallel IR functions are associated with a sub-computation. PiperOrigin-RevId: 177309875 --- tensorflow/compiler/xla/service/cpu/BUILD | 17 ++ .../compiler/xla/service/cpu/ir_emitter.cc | 180 +++------------- .../compiler/xla/service/cpu/ir_emitter.h | 20 +- .../compiler/xla/service/cpu/ir_function.cc | 195 ++++++++++++++++++ .../compiler/xla/service/cpu/ir_function.h | 109 ++++++++++ 5 files changed, 352 insertions(+), 169 deletions(-) create mode 100644 tensorflow/compiler/xla/service/cpu/ir_function.cc create mode 100644 tensorflow/compiler/xla/service/cpu/ir_function.h diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index e1eed498f6..e64b313ffc 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -250,6 +250,7 @@ cc_library( ":dot_op_emitter", ":external_constant_pool", ":ir_emission_utils", + ":ir_function", ":shape_partition", ":simple_orc_jit", "//tensorflow/compiler/xla:shape_util", @@ -280,6 +281,22 @@ cc_library( ], ) +cc_library( + name = "ir_function", + srcs = ["ir_function.cc"], + hdrs = ["ir_function.h"], + deps = [ + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla/service/llvm_ir:ir_array", + "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop", + "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", + "//tensorflow/compiler/xla/service/llvm_ir:vector_support_library", + "@llvm//:core", + ], +) + cc_library( name = "dot_op_emitter", srcs = ["dot_op_emitter.cc"], diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 502dd2e738..f087329c6d 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -42,6 +42,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/dot_op_emitter.h" #include "tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h" #include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h" +#include "tensorflow/compiler/xla/service/cpu/ir_function.h" #include "tensorflow/compiler/xla/service/cpu/shape_partition.h" #include "tensorflow/compiler/xla/service/cpu/simple_orc_jit.h" #include "tensorflow/compiler/xla/service/elemental_ir_emitter.h" @@ -124,131 +125,27 @@ StatusOr IrEmitter::EmitComputation( } else { TF_RETURN_IF_ERROR(computation->AcceptOrdered(this, *instruction_order)); } - InsertOrDie(&emitted_functions_, computation, compute_function_); - - return compute_function_; -} - -static llvm::Argument* GetArg(llvm::Function* f, int idx) { - llvm::Function::arg_iterator arg_iter = f->arg_begin(); - std::advance(arg_iter, idx); - return &*arg_iter; + llvm::Function* ir_function = compute_function_->function(); + InsertOrDie(&emitted_functions_, computation, ir_function); + // Delete 'compute_function', finalizing 'ir_function' and restoring caller + // IR insert point. + compute_function_.reset(); + return ir_function; } void IrEmitter::InitializeIrFunction(const string& function_name) { - // The function signature is: - // void function(i8* retval, i8* run_options, i8** params, i8** temps, - // i64* dynamic_loop_bounds, i64* prof_counters) - // - // retval: points to the returned value. - // params: address of an array with pointers to parameters. - // temps: address of an array with pointers to temporary buffers. - // - // Therefore, the generated function's signature (FunctionType) is statically - // determined - parameter unpacking is done in code generated into the - // function, rather than by a prologue dictated by the platform ABI. - // - // /--------------\ - // retval ----------> | return value | - // \--------------/ - // - // /-------------------------------\ - // run_options -----> | xla::ExecutableRunOptions | - // \-------------------------------/ - // - // /---------------------------------------------\ - // params --------> | param 0 | param 1 | ..... | param N-1 | - // | addr | addr | | addr | - // \---------------------------------------------/ - // | | | - // | | | - // V V V - // /---------\ /---------\ /-----------\ - // | param 0 | | param 1 | | param N-1 | - // \---------/ \---------/ \-----------/ - // - // /---------------------------------------------\ - // temps ---------> | temp 0 | temp 1 | ..... | temp N-1 | - // | addr | addr | | addr | - // \---------------------------------------------/ - // | | | - // | | | - // V V V - // /---------\ /---------\ /-----------\ - // | temp 0 | | temp 1 | | temp N-1 | - // \---------/ \---------/ \-----------/ - // - // /--------------------------------------------\ - // dynamic loop bounds -> | outer_dim0_start | outer_dim0_limit | .....| - // (elided for aot) \--------------------------------------------/ - // - // /---------------------------------------------\ - // prof counters -> | counter 0 | counter 1 | ..... | counter N-1 | - // (elided for aot) \---------------------------------------------/ - - // Even though the type of params and temps is void** in the host's view, in - // LLVM IR this is represented by i8*, similarly to void*. It's up to the code - // to use GEPs to unravel the indirection layers. - llvm::FunctionType* compute_function_type = llvm::FunctionType::get( - /*Result=*/llvm::Type::getVoidTy(module_->getContext()), - /*Params=*/GetComputeFunctionParams(), - /*isVarArg=*/false); - // Functions with local linkage get an inlining bonus. Because we know // a-priori that embedded functions (non-entry functions) will not have its // name resolved, give it local linkage. llvm::Function::LinkageTypes linkage = is_top_level_computation_ ? llvm::GlobalValue::ExternalLinkage : llvm::GlobalValue::InternalLinkage; - compute_function_ = - llvm::Function::Create(/*Ty=*/compute_function_type, - /*Linkage=*/linkage, - /*Name=*/AsStringRef(function_name), - /*Module=*/module_); - compute_function_->setCallingConv(llvm::CallingConv::C); - - // Set meaningful names for the function's arguments: useful for debugging. - llvm::Function::arg_iterator arg_iter = compute_function_->arg_begin(); - arg_iter->setName("retval"); - (++arg_iter)->setName("run_options"); - (++arg_iter)->setName("params"); - (++arg_iter)->setName("temps"); - if (num_dynamic_loop_bounds_ > 0) { - (++arg_iter)->setName("dynamic_loop_bounds"); - } - (++arg_iter)->setName("prof_counters"); - - // We know a-priori that the function arguments are guaranteed to point to - // disjoint objects. - llvm::Argument* retval = GetResultArgument(); - for (llvm::Argument& argument : compute_function_->args()) { - // However, the return buffer aliases the temporaries and thus cannot be - // marked noalias. - if (&argument == retval) { - continue; - } - compute_function_->addAttribute(argument.getArgNo() + 1, - llvm::Attribute::NoAlias); - } - - // Add the optize attribute to the function if optimizing for size. This - // controls internal behavior of some optimization passes (e.g. loop - // unrolling). - if (options::OptimizeForSizeRequested(hlo_module_config_)) { - compute_function_->addFnAttr(llvm::Attribute::OptimizeForSize); - } - - if (hlo_module_config_.debug_options().xla_enable_fast_math()) { - compute_function_->addFnAttr("unsafe-fp-math", "true"); - compute_function_->addFnAttr("no-infs-fp-math", "true"); - compute_function_->addFnAttr("no-nans-fp-math", "true"); - compute_function_->addFnAttr("no-signed-zeros-fp-math", "true"); - } - - ir_builder_.SetInsertPoint(llvm::BasicBlock::Create( - /*Context=*/module_->getContext(), - /*Name=*/"entry", - /*Parent=*/compute_function_)); + // Create and initialize new IrFunction. + compute_function_.reset( + new IrFunction(function_name, linkage, + options::OptimizeForSizeRequested(hlo_module_config_), + hlo_module_config_.debug_options().xla_enable_fast_math(), + module_, &ir_builder_, num_dynamic_loop_bounds_)); } IrEmitter::~IrEmitter() {} @@ -1452,7 +1349,7 @@ Status IrEmitter::HandleParameter(HloInstruction* parameter) { // // Where Param is the actual element type of the underlying buffer (for // example, float for an XLA F32 element type). - llvm::Argument* params = GetArg(compute_function_, 2); + llvm::Argument* params = compute_function_->parameters_arg(); llvm::Value* param_address_offset = llvm_ir::EmitBufferIndexingGEP(params, param_number, &ir_builder_); llvm::LoadInst* param_address_untyped = @@ -1590,7 +1487,7 @@ IrEmitter::ShardedVectorType IrEmitter::CreateShardedVectorType( // Here we assume that the largest register is a vector register. int max_vector_register_size_in_bytes = target_machine_features_.largest_register_size_in_bytes( - compute_function_); + compute_function_->function()); int vector_register_size_in_elements = max_vector_register_size_in_bytes / @@ -2410,7 +2307,7 @@ Status IrEmitter::HandleWhile(HloInstruction* xla_while) { // Terminates the current block with a branch to a while header. llvm::BasicBlock* header_bb = llvm::BasicBlock::Create( module_->getContext(), AsStringRef(IrName(xla_while, "header")), - compute_function_); + compute_function_->function()); ir_builder_.CreateBr(header_bb); ir_builder_.SetInsertPoint(header_bb); @@ -2427,7 +2324,7 @@ Status IrEmitter::HandleWhile(HloInstruction* xla_while) { // Branches to the body or to the while exit depending on the condition. llvm::BasicBlock* body_bb = llvm::BasicBlock::Create( module_->getContext(), AsStringRef(IrName(xla_while, "body")), - compute_function_); + compute_function_->function()); llvm::BasicBlock* exit_bb = llvm::BasicBlock::Create( module_->getContext(), AsStringRef(IrName(xla_while, "exit"))); ir_builder_.CreateCondBr(while_predicate, body_bb, exit_bb); @@ -2442,7 +2339,7 @@ Status IrEmitter::HandleWhile(HloInstruction* xla_while) { ir_builder_.CreateBr(header_bb); // Adds the exit block to the function and sets the insert point there. - compute_function_->getBasicBlockList().push_back(exit_bb); + compute_function_->function()->getBasicBlockList().push_back(exit_bb); ir_builder_.SetInsertPoint(exit_bb); return Status::OK(); @@ -2642,7 +2539,6 @@ Status IrEmitter::FinishVisit(HloInstruction* root) { if (prof_counter) { profiling_state_.RecordCompleteComputation(&ir_builder_, prof_counter); } - ir_builder_.CreateRetVoid(); return Status::OK(); } @@ -2783,43 +2679,16 @@ llvm::Type* IrEmitter::IrShapeType(const Shape& shape) { return llvm_ir::ShapeToIrType(shape, module_); } -std::vector IrEmitter::GetComputeFunctionParams() { - llvm::Type* i8_ptr_type = llvm::Type::getInt8PtrTy(module_->getContext()); - llvm::Type* i8_ptr_ptr_type = i8_ptr_type->getPointerTo(); - llvm::Type* i64_ptr_type = llvm::Type::getInt64PtrTy(module_->getContext()); - std::vector compute_function_params( - {i8_ptr_type, i8_ptr_type, i8_ptr_ptr_type, i8_ptr_ptr_type}); - if (num_dynamic_loop_bounds_ > 0) { - compute_function_params.push_back(i64_ptr_type); - } - compute_function_params.push_back(i64_ptr_type); - return compute_function_params; -} - -llvm::Argument* IrEmitter::GetResultArgument() { - return GetArg(compute_function_, 0); -} - llvm::Argument* IrEmitter::GetProfileCountersArgument() { - const int64 arg_index = num_dynamic_loop_bounds_ > 0 ? 5 : 4; - return GetArg(compute_function_, arg_index); + return compute_function_->profile_counters_arg(); } llvm::Value* IrEmitter::GetTempBuffersArgument() { - return GetArg(compute_function_, 3); -} - -llvm::Value* IrEmitter::GetDynamicLoopBound(const int64 offset) { - CHECK_GT(num_dynamic_loop_bounds_, 0); - CHECK_LT(offset, num_dynamic_loop_bounds_ * 2); - llvm::Argument* loop_bounds_arg = GetArg(compute_function_, 4); - string name = tensorflow::strings::StrCat("dynamic_loop_bound_", offset); - return ir_builder_.CreateLoad(ir_builder_.CreateGEP( - loop_bounds_arg, ir_builder_.getInt64(offset), AsStringRef(name))); + return compute_function_->temp_buffers_arg(); } llvm::Value* IrEmitter::GetExecutableRunOptionsArgument() { - return GetArg(compute_function_, 1); + return compute_function_->exec_run_options_arg(); } llvm::Value* IrEmitter::EmitTempBufferPointer( @@ -2965,7 +2834,8 @@ Status IrEmitter::EmitParallelForkJoin( HloInstruction* root = computation->root_instruction(); // Build ParallelForkJoin function type. - std::vector compute_function_params = GetComputeFunctionParams(); + std::vector compute_function_params = + compute_function_->GetComputeFunctionParams(); // Number of parallel compute functions. compute_function_params.push_back(ir_builder_.getInt32Ty()); // Array of partitions. There is an array element for each @@ -3066,7 +2936,7 @@ Status IrEmitter::EmitTargetAddressForOp(const HloInstruction* op) { if (op == op->parent()->root_instruction()) { // For the root node, we write directly to the output buffer of the // function. - llvm::Argument* retval = GetResultArgument(); + llvm::Argument* retval = compute_function_->result_arg(); if (!ShapeUtil::IsNil(target_shape)) { llvm::AttrBuilder attr_builder; attr_builder.addAlignmentAttr(MinimumAlignmentForShape(target_shape)); @@ -3148,7 +3018,7 @@ Status IrEmitter::EmitParallelTargetElementLoop( // Emit code to read dynamic loop bounds from function argument 4. std::vector dynamic_loop_bounds(2 * num_dynamic_loop_bounds_); for (int i = 0; i < 2 * num_dynamic_loop_bounds_; ++i) { - dynamic_loop_bounds[i] = GetDynamicLoopBound(i); + dynamic_loop_bounds[i] = compute_function_->GetDynamicLoopBound(i); } llvm_ir::ForLoopNest loop_nest(loop_name, &ir_builder_); diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h index 351c95278c..9e5595052f 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include #include #include #include @@ -30,6 +31,7 @@ limitations under the License. #include "llvm/Target/TargetMachine.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/cpu/external_constant_pool.h" +#include "tensorflow/compiler/xla/service/cpu/ir_function.h" #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" @@ -233,13 +235,6 @@ class IrEmitter : public DfsHloVisitorWithDefault { // Convenience function to get the IR type matching the given shape. llvm::Type* IrShapeType(const Shape& shape); - // Returns an array of compute function parameter types. - std::vector GetComputeFunctionParams(); - - // Get the llvm::Value* that represents the "retval" argument of the - // computation function being emitted by this emitter. - llvm::Argument* GetResultArgument(); - // Get the llvm::Value* that represents the "prof_counters" argument of the // computation function being emitted by this emitter. llvm::Argument* GetProfileCountersArgument(); @@ -252,11 +247,6 @@ class IrEmitter : public DfsHloVisitorWithDefault { // computation function being emitted by this emitter. llvm::Value* GetTempBuffersArgument(); - // Emit ir to read and return the ir value for the dynamic loop bound at - // 'offset' from the "dynamic_loop_bounds" argument of the computation - // function being emitted by this emitter. - llvm::Value* GetDynamicLoopBound(const int64 offset); - // Emits code that computes the address of the given temporary buffer to the // function. target_shape is the shape of this temporary buffer. // The returned Value's type is a pointer to element_type. @@ -476,8 +466,10 @@ class IrEmitter : public DfsHloVisitorWithDefault { thread_local_buffers_; // The following fields track the IR emission state. According to LLVM memory - // management rules, their memory is owned by the module. - llvm::Function* compute_function_; + // management rules, their memory is owned by the module (Note that IrFunction + // creates the encapsulated llvm::Function s.t. it is added to the llvm + // module's function list). + std::unique_ptr compute_function_; llvm::IRBuilder<> ir_builder_; // Maps HLOs to their index into the profile counter array. diff --git a/tensorflow/compiler/xla/service/cpu/ir_function.cc b/tensorflow/compiler/xla/service/cpu/ir_function.cc new file mode 100644 index 0000000000..fa88627156 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/ir_function.cc @@ -0,0 +1,195 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/compiler/xla/service/cpu/ir_function.h" + +#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" +#include "tensorflow/compiler/xla/status_macros.h" + +namespace xla { + +namespace { +using llvm_ir::AsStringRef; +} // namespace + +namespace cpu { + +IrFunction::IrFunction(const string& function_name, + llvm::Function::LinkageTypes linkage, + const bool optimize_for_size_requested, + const bool enable_fast_math, llvm::Module* llvm_module, + llvm::IRBuilder<>* ir_builder, + int64 num_dynamic_loop_bounds) + : ir_builder_(ir_builder), + llvm_module_(llvm_module), + caller_insert_point_guard_(*ir_builder), + num_dynamic_loop_bounds_(num_dynamic_loop_bounds) { + Initialize(function_name, linkage, optimize_for_size_requested, + enable_fast_math); +} + +IrFunction::~IrFunction() { + // Emit function return value. + ir_builder_->CreateRetVoid(); +} + +void IrFunction::Initialize(const string& function_name, + llvm::Function::LinkageTypes linkage, + const bool optimize_for_size_requested, + const bool enable_fast_math) { + // The function signature is: + // void function(i8* retval, i8* run_options, i8** params, i8** temps, + // i64* dynamic_loop_bounds, i64* prof_counters) + // + // retval: points to the returned value. + // params: address of an array with pointers to parameters. + // temps: address of an array with pointers to temporary buffers. + // + // Therefore, the generated function's signature (FunctionType) is statically + // determined - parameter unpacking is done in code generated into the + // function, rather than by a prologue dictated by the platform ABI. + // + // /--------------\ + // retval ----------> | return value | + // \--------------/ + // + // /-------------------------------\ + // run_options -----> | xla::ExecutableRunOptions | + // \-------------------------------/ + // + // /---------------------------------------------\ + // params --------> | param 0 | param 1 | ..... | param N-1 | + // | addr | addr | | addr | + // \---------------------------------------------/ + // | | | + // | | | + // V V V + // /---------\ /---------\ /-----------\ + // | param 0 | | param 1 | | param N-1 | + // \---------/ \---------/ \-----------/ + // + // /---------------------------------------------\ + // temps ---------> | temp 0 | temp 1 | ..... | temp N-1 | + // | addr | addr | | addr | + // \---------------------------------------------/ + // | | | + // | | | + // V V V + // /---------\ /---------\ /-----------\ + // | temp 0 | | temp 1 | | temp N-1 | + // \---------/ \---------/ \-----------/ + // + // /--------------------------------------------\ + // dynamic loop bounds -> | outer_dim0_start | outer_dim0_limit | .....| + // (elided for aot) \--------------------------------------------/ + // + // /---------------------------------------------\ + // prof counters -> | counter 0 | counter 1 | ..... | counter N-1 | + // (elided for aot) \---------------------------------------------/ + + // Even though the type of params and temps is void** in the host's view, in + // LLVM IR this is represented by i8*, similarly to void*. It's up to the code + // to use GEPs to unravel the indirection layers. + llvm::FunctionType* function_type = llvm::FunctionType::get( + /*Result=*/llvm::Type::getVoidTy(llvm_module_->getContext()), + /*Params=*/GetComputeFunctionParams(), + /*isVarArg=*/false); + + // Functions with local linkage get an inlining bonus. Because we know + // a-priori that embedded functions (non-entry functions) will not have its + // name resolved, give it local linkage. + function_ = llvm::Function::Create(/*Ty=*/function_type, + /*Linkage=*/linkage, + /*N=*/AsStringRef(function_name), + /*M=*/llvm_module_); + function_->setCallingConv(llvm::CallingConv::C); + + // Set meaningful names for the function's arguments: useful for debugging. + llvm::Function::arg_iterator arg_iter = function_->arg_begin(); + arg_iter->setName("retval"); + result_arg_ = &*arg_iter; + (++arg_iter)->setName("run_options"); + exec_run_options_arg_ = &*arg_iter; + (++arg_iter)->setName("params"); + parameters_arg_ = &*arg_iter; + (++arg_iter)->setName("temps"); + temp_buffers_arg_ = &*arg_iter; + if (num_dynamic_loop_bounds_ > 0) { + (++arg_iter)->setName("dynamic_loop_bounds"); + dynamic_loop_bounds_arg_ = &*arg_iter; + } + (++arg_iter)->setName("prof_counters"); + profile_counters_arg_ = &*arg_iter; + + // We know a-priori that the function arguments are guaranteed to point to + // disjoint objects. + llvm::Argument* retval = result_arg(); + for (llvm::Argument& argument : function_->args()) { + // However, the return buffer aliases the temporaries and thus cannot be + // marked noalias. + if (&argument == retval) { + continue; + } + function_->addAttribute(argument.getArgNo() + 1, llvm::Attribute::NoAlias); + } + + // Add the optize attribute to the function if optimizing for size. This + // controls internal behavior of some optimization passes (e.g. loop + // unrolling). + if (optimize_for_size_requested) { + function_->addFnAttr(llvm::Attribute::OptimizeForSize); + } + + if (enable_fast_math) { + function_->addFnAttr("unsafe-fp-math", "true"); + function_->addFnAttr("no-infs-fp-math", "true"); + function_->addFnAttr("no-nans-fp-math", "true"); + function_->addFnAttr("no-signed-zeros-fp-math", "true"); + } + + ir_builder_->SetInsertPoint(llvm::BasicBlock::Create( + /*Context=*/llvm_module_->getContext(), + /*Name=*/"entry", + /*Parent=*/function_)); +} + +std::vector IrFunction::GetComputeFunctionParams() { + llvm::Type* i8_ptr_type = + llvm::Type::getInt8PtrTy(llvm_module_->getContext()); + llvm::Type* i8_ptr_ptr_type = i8_ptr_type->getPointerTo(); + llvm::Type* i64_ptr_type = + llvm::Type::getInt64PtrTy(llvm_module_->getContext()); + std::vector compute_function_params( + {i8_ptr_type, i8_ptr_type, i8_ptr_ptr_type, i8_ptr_ptr_type}); + if (num_dynamic_loop_bounds_ > 0) { + compute_function_params.push_back(i64_ptr_type); + } + compute_function_params.push_back(i64_ptr_type); + return compute_function_params; +} + +llvm::Value* IrFunction::GetDynamicLoopBound(const int64 offset) { + CHECK_GT(num_dynamic_loop_bounds_, 0); + CHECK_LT(offset, num_dynamic_loop_bounds_ * 2); + string name = tensorflow::strings::StrCat("dynamic_loop_bound_", offset); + return ir_builder_->CreateLoad( + ir_builder_->CreateGEP(CHECK_NOTNULL(dynamic_loop_bounds_arg_), + ir_builder_->getInt64(offset), AsStringRef(name))); +} + +} // namespace cpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/ir_function.h b/tensorflow/compiler/xla/service/cpu/ir_function.h new file mode 100644 index 0000000000..b7516b403e --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/ir_function.h @@ -0,0 +1,109 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_IR_FUNCTION_H_ +#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_IR_FUNCTION_H_ + +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Value.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/types.h" + +namespace xla { +namespace cpu { + +// IrFunction creates and encapsulates an llvm::Function, exposing methods to +// emitters for function and function argument access. +// The llvm::Function is created with the standard function signature +// used in the XLA CPU backend (see ir_function.cc for argument details). +// In addtion IrFunction saves the callers IR insert point during contruction, +// and restores it after desctruction. +// +// Example usage: +// +// // Create and initialize new IrFunction. +// std::unique_ptr compute_function(new IrFunction(...)); +// // Emit IR for function body using IrFunction helper methods. +// ... +// // Store reference to llvm::Function for future invocation. +// ir_functions.push_back(compute_function.function()); +// // Delete IrFunction (finalizes IR function and restores caller insertion +// // point). +// compute_function.reset(); +// + +class IrFunction { + public: + IrFunction(const string& function_name, llvm::Function::LinkageTypes linkage, + const bool optimize_for_size_requested, + const bool enable_fast_math, llvm::Module* llvm_module, + llvm::IRBuilder<>* ir_builder, int64 num_dynamic_loop_bounds); + ~IrFunction(); + + // Returns an array of compute function parameter types. + std::vector GetComputeFunctionParams(); + + // Emit ir to read and return the ir value for the dynamic loop bound at + // 'offset' from the "dynamic_loop_bounds" argument of this function. + llvm::Value* GetDynamicLoopBound(int64 offset); + + // Returns the encapculated llvm::Function. + llvm::Function* function() { return function_; } + + // Get the llvm::Value* that represents this functions "retval" argument. + llvm::Argument* result_arg() { return result_arg_; } + + // Get the xla::ExecutableRunOptions that represents this functions + // "run_options" argument. + llvm::Value* exec_run_options_arg() { return exec_run_options_arg_; } + + // Get the llvm::Argument that represents this functions parameters argument. + llvm::Argument* parameters_arg() { return parameters_arg_; } + + // Get the llvm::Value* that represents this functions "temps" argument. + llvm::Value* temp_buffers_arg() { return temp_buffers_arg_; } + + // Get the llvm::Value* that represents this functions "prof_counters" + // argument. + llvm::Argument* profile_counters_arg() { return profile_counters_arg_; } + + private: + // Initialize an llvm::Function with standard signature based on arguments. + void Initialize(const string& function_name, + llvm::Function::LinkageTypes linkage, + bool optimize_for_size_requested, bool enable_fast_math); + + llvm::IRBuilder<>* ir_builder_; + llvm::Module* llvm_module_; + llvm::IRBuilder<>::InsertPointGuard caller_insert_point_guard_; + + int64 num_dynamic_loop_bounds_ = 0; + // Encapsulated llvm::Function. + llvm::Function* function_; + // Function argument IR values. + llvm::Argument* result_arg_; + llvm::Value* exec_run_options_arg_; + llvm::Argument* parameters_arg_; + llvm::Value* temp_buffers_arg_; + llvm::Argument* dynamic_loop_bounds_arg_ = nullptr; + llvm::Argument* profile_counters_arg_; +}; + +} // namespace cpu +} // namespace xla + +#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_IR_FUNCTION_H_ -- GitLab From e7e1cab9fe66f00716ffaae8a180c5c08a2a050e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Nov 2017 09:13:54 -0800 Subject: [PATCH 0939/1801] [XLA:CPU] Factor out parallel loop emission into its own file so it can be called by other emitters (no functional change, just code movement). PiperOrigin-RevId: 177317764 --- tensorflow/compiler/xla/service/cpu/BUILD | 17 +++++ .../compiler/xla/service/cpu/ir_emitter.cc | 72 ++++-------------- .../compiler/xla/service/cpu/ir_emitter.h | 11 +-- .../xla/service/cpu/parallel_loop_emitter.cc | 76 +++++++++++++++++++ .../xla/service/cpu/parallel_loop_emitter.h | 75 ++++++++++++++++++ 5 files changed, 184 insertions(+), 67 deletions(-) create mode 100644 tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc create mode 100644 tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index e64b313ffc..bf41d5ce07 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -251,6 +251,7 @@ cc_library( ":external_constant_pool", ":ir_emission_utils", ":ir_function", + ":parallel_loop_emitter", ":shape_partition", ":simple_orc_jit", "//tensorflow/compiler/xla:shape_util", @@ -297,6 +298,22 @@ cc_library( ], ) +cc_library( + name = "parallel_loop_emitter", + srcs = ["parallel_loop_emitter.cc"], + hdrs = ["parallel_loop_emitter.h"], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service/llvm_ir:ir_array", + "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop", + "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", + "//tensorflow/compiler/xla/service/llvm_ir:loop_emitter", + "//tensorflow/core:lib", + "@llvm//:core", + ], +) + cc_library( name = "dot_op_emitter", srcs = ["dot_op_emitter.cc"], diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index f087329c6d..3f991c03e9 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -43,6 +43,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h" #include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/cpu/ir_function.h" +#include "tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h" #include "tensorflow/compiler/xla/service/cpu/shape_partition.h" #include "tensorflow/compiler/xla/service/cpu/simple_orc_jit.h" #include "tensorflow/compiler/xla/service/elemental_ir_emitter.h" @@ -1892,7 +1893,7 @@ Status IrEmitter::HandleSlice(HloInstruction* slice) { VLOG(2) << "HandleSlice: " << slice->ToString(); auto operand = slice->operand(0); // The code below emits a sequential loop nest. For the parallel backend, use - // EmitParallelTargetElementLoop() which respects dynamic loop bounds. + // ParallelLoopEmitter which respects dynamic loop bounds. if (ShouldEmitParallelLoopFor(*slice)) { return DefaultAction(slice); } @@ -2997,8 +2998,19 @@ Status IrEmitter::EmitTargetElementLoop( } else { if (ShouldEmitParallelLoopFor(*target_op)) { - TF_RETURN_IF_ERROR(EmitParallelTargetElementLoop( - target_shape, element_generator, IrName(target_op), &target_array)); + // Emit code to read dynamic loop bounds from compute function argument. + ParallelLoopEmitter::LoopBounds dynamic_loop_bounds( + num_dynamic_loop_bounds_); + for (int i = 0; i < num_dynamic_loop_bounds_; ++i) { + dynamic_loop_bounds[i].first = + compute_function_->GetDynamicLoopBound(i * 2 + 0); + dynamic_loop_bounds[i].second = + compute_function_->GetDynamicLoopBound(i * 2 + 1); + } + // Emit parallel loop with dynamic loop bounds for most-major dimensions. + TF_RETURN_IF_ERROR(ParallelLoopEmitter(element_generator, target_array, + &dynamic_loop_bounds, &ir_builder_) + .EmitLoop(IrName(target_op))); } else { TF_RETURN_IF_ERROR( llvm_ir::LoopEmitter(element_generator, target_array, &ir_builder_) @@ -3008,60 +3020,6 @@ Status IrEmitter::EmitTargetElementLoop( return Status::OK(); } -Status IrEmitter::EmitParallelTargetElementLoop( - const Shape& target_shape, - const llvm_ir::ElementGenerator& element_generator, - tensorflow::StringPiece loop_name, llvm_ir::IrArray* target_array) { - CHECK(!ShapeUtil::IsTuple(target_shape)); - CHECK(!ShapeUtil::IsScalar(target_shape)); - - // Emit code to read dynamic loop bounds from function argument 4. - std::vector dynamic_loop_bounds(2 * num_dynamic_loop_bounds_); - for (int i = 0; i < 2 * num_dynamic_loop_bounds_; ++i) { - dynamic_loop_bounds[i] = compute_function_->GetDynamicLoopBound(i); - } - - llvm_ir::ForLoopNest loop_nest(loop_name, &ir_builder_); - const int64 num_dims = target_shape.dimensions_size(); - llvm_ir::IrArray::Index array_index(num_dims); - - // Add loops from outer-most to inner-most dimensions. - for (int i = target_shape.layout().minor_to_major_size() - 1; i >= 0; --i) { - const int64 dimension = target_shape.layout().minor_to_major(i); - const int bounds_index = num_dims - 1 - i; - if (bounds_index < num_dynamic_loop_bounds_) { - // Emit dynamic loop bounds for this dimension. Dynamic loop bounds - // are read from ir function dynamic loop bounds argument. - llvm::Value* start_index = dynamic_loop_bounds[bounds_index * 2 + 0]; - llvm::Value* end_index = dynamic_loop_bounds[bounds_index * 2 + 1]; - - std::unique_ptr loop = loop_nest.AddLoop( - /*suffix=*/tensorflow::strings::Printf("dim.%lld", dimension), - start_index, end_index); - array_index[dimension] = loop->GetIndVarValue(); - } else { - // Emit static loop bounds for this dimension. - std::unique_ptr loop = loop_nest.AddLoop( - /*start_index=*/0, - /*end_index=*/target_shape.dimensions(dimension), - /*suffix=*/tensorflow::strings::Printf("dim.%lld", dimension)); - array_index[dimension] = loop->GetIndVarValue(); - } - } - // Point IR builder at inner loop BB. - SetToFirstInsertPoint(loop_nest.GetInnerLoopBodyBasicBlock(), &ir_builder_); - - // Emit loop body. - TF_ASSIGN_OR_RETURN(llvm::Value * target_element, - element_generator(array_index)); - target_array->EmitWriteArrayElement(array_index, target_element, - &ir_builder_); - // Point IR builder at outer loop exit BB. - SetToFirstInsertPoint(loop_nest.GetOuterLoopExitBasicBlock(), &ir_builder_); - - return Status::OK(); -} - Status IrEmitter::EmitMemcpy(const HloInstruction& source, const HloInstruction& destination) { llvm::Value* source_value = GetEmittedValueFor(&source); diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h index 9e5595052f..6b576d16bb 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h @@ -336,15 +336,6 @@ class IrEmitter : public DfsHloVisitorWithDefault { HloInstruction* target_op, tensorflow::StringPiece desc, const llvm_ir::ElementGenerator& element_generator); - // Emit IR to perform a computation for every element in a partition/slice of - // 'target_shape'. The loop bounds for the outer-dimension partitions are - // passed into the compute function as a runtime argument (accessible from - // GetDynamicLoopBound). - Status EmitParallelTargetElementLoop( - const Shape& target_shape, - const llvm_ir::ElementGenerator& element_generator, - tensorflow::StringPiece loop_name, llvm_ir::IrArray* target_array); - // Emits a memcpy from the source instruction's result value to the // destination's. Both source and destination must have an entry in the // emitted_value_ table. @@ -482,7 +473,7 @@ class IrEmitter : public DfsHloVisitorWithDefault { llvm_ir::AliasAnalysis alias_analysis_; // The number of root instruction outer dimensions used in parallel loop - // emission (EmitParallelTargetElementLoop). + // emission (ParallelLoopEmitter). int64 num_dynamic_loop_bounds_ = 0; // Returns whether the given instruction should be emitted as a parallel loop. diff --git a/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc new file mode 100644 index 0000000000..91e704e3d0 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc @@ -0,0 +1,76 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h" + +#include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h" +#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" +#include "tensorflow/core/lib/strings/stringprintf.h" + +namespace xla { +namespace cpu { + +ParallelLoopEmitter::ParallelLoopEmitter( + const llvm_ir::ElementGenerator& target_element_generator, + const llvm_ir::IrArray& target_array, const LoopBounds* dynamic_loop_bounds, + llvm::IRBuilder<>* ir_builder) + : LoopEmitter(target_element_generator, target_array, ir_builder), + dynamic_loop_bounds_(dynamic_loop_bounds) {} + +llvm_ir::IrArray::Index ParallelLoopEmitter::EmitIndexAndSetExitBasicBlock( + tensorflow::StringPiece loop_name) { + CHECK(!ShapeUtil::IsTuple(shape_)); + CHECK(!ShapeUtil::IsScalar(shape_)); + + llvm_ir::ForLoopNest loop_nest(loop_name, ir_builder_); + const int64 num_dims = shape_.dimensions_size(); + llvm_ir::IrArray::Index array_index(num_dims); + + // Add loops from outer-most to inner-most dimensions. + for (int i = shape_.layout().minor_to_major_size() - 1; i >= 0; --i) { + const int64 dimension = shape_.layout().minor_to_major(i); + const int bounds_index = num_dims - 1 - i; + if (bounds_index < dynamic_loop_bounds_->size()) { + // Emit dynamic loop bounds for this dimension. Dynamic loop bounds + // are read from ir function dynamic loop bounds argument. + llvm::Value* start_index = (*dynamic_loop_bounds_)[bounds_index].first; + llvm::Value* end_index = (*dynamic_loop_bounds_)[bounds_index].second; + + std::unique_ptr loop = loop_nest.AddLoop( + /*suffix=*/tensorflow::strings::Printf("dim.%lld", dimension), + start_index, end_index); + array_index[dimension] = loop->GetIndVarValue(); + } else { + // Emit static loop bounds for this dimension. + std::unique_ptr loop = loop_nest.AddLoop( + /*start_index=*/0, + /*end_index=*/shape_.dimensions(dimension), + /*suffix=*/tensorflow::strings::Printf("dim.%lld", dimension)); + array_index[dimension] = loop->GetIndVarValue(); + } + } + // Point IR builder at inner loop BB. + llvm_ir::SetToFirstInsertPoint(loop_nest.GetInnerLoopBodyBasicBlock(), + ir_builder_); + + // Set exit_bb_ to the exit block of the loop nest. + exit_bb_ = loop_nest.GetOuterLoopExitBasicBlock(); + CHECK(exit_bb_ != nullptr); + + return array_index; +} + +} // namespace cpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h new file mode 100644 index 0000000000..492d5953c4 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h @@ -0,0 +1,75 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_PARALLEL_LOOP_EMITTER_H_ +#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_PARALLEL_LOOP_EMITTER_H_ + +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Value.h" +#include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h" +#include "tensorflow/compiler/xla/service/llvm_ir/loop_emitter.h" + +namespace xla { +namespace cpu { + +// ParallelLoopEmitter emits a loop nest for the target array shape. +// The outer loop bounds of the loop nest are passed as ir values at runtime +// (specified in 'dynamic_loop_bounds'), and the inner loop bounds are static. +// Dynamic loop bounds are specified as an array of dimension index +// [start, limit) pairs of ir values (one for each partitioned outer dimension). +// +// EX: Let 'shape' = [8, 16, 32], with the loop bounds of the two-most major +// dimensions dynamic. +// Then 'dynamic_loop_bounds' will contain the following ir values for +// the two most-major dimenions: +// [dim0_index_start_ir_value, dim0_index_limit_ir_value] +// [dim1_index_start_ir_value, dim1_index_limit_ir_value] +// +// Code emitted by ParallelLoopEmitter will be called in a multi-threaded +// context where each thread will be assigned a different set of outer dimension +// partitions, and where all threads will collectively iterate over the +// entire target array shape. +// +// Outer dimension partitions can be generated using the ShapePartitionAssigner +// and ShapePartitionIterator utility classes from shape_partition.cc. +// +class ParallelLoopEmitter : public llvm_ir::LoopEmitter { + public: + using LoopBounds = std::vector>; + + // Constructs a ParallelLoopEmitter which uses 'target_element_generator' to + // generate elements, 'dynamic_loop_bounds' to set the loop bounds of the + // most-major dimensions, and 'target_array.' shape to set the static loop + // bounds for the most-minor dimensions. + ParallelLoopEmitter(const llvm_ir::ElementGenerator& target_element_generator, + const llvm_ir::IrArray& target_array, + const LoopBounds* dynamic_loop_bounds, + llvm::IRBuilder<>* ir_builder); + + ParallelLoopEmitter(const ParallelLoopEmitter&) = delete; + ParallelLoopEmitter& operator=(const ParallelLoopEmitter&) = delete; + ~ParallelLoopEmitter() override = default; + + llvm_ir::IrArray::Index EmitIndexAndSetExitBasicBlock( + tensorflow::StringPiece loop_name) override; + + private: + const LoopBounds* dynamic_loop_bounds_; +}; + +} // namespace cpu +} // namespace xla + +#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_PARALLEL_LOOP_EMITTER_H_ -- GitLab From 667282eb0e62bef03bbe527bef88c656532444bb Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Wed, 29 Nov 2017 09:30:56 -0800 Subject: [PATCH 0940/1801] [TFXLA] Return nullopt if no merge node found. PiperOrigin-RevId: 177319722 --- tensorflow/compiler/tf2xla/functionalize_control_flow.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc index 5726d8294a..267268298c 100644 --- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc +++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc @@ -1067,6 +1067,10 @@ FunctionalizeCond::CreateCorrespondingMergeCluster(Cluster* switch_cluster) { enqueue_or_update_merge(out); } } + // Return if there are no merge nodes. + if (merges.empty()) { + return gtl::nullopt; + } auto it = merges.begin(); Cluster* merge_cluster = *it; for (++it; it != merges.end(); ++it) { -- GitLab From 537ecc56cf09d5dcb2b328b322d9f8b195abcc6c Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 29 Nov 2017 09:48:08 -0800 Subject: [PATCH 0941/1801] [tf.data] Remove GraphDefBuilder and NodeBuilder dependencies from "dataset.h". This is a step towards making a header-only library on which external op implementations can depend. To do this "dataset.h" cannot depend on any headers in "tensorflow/core/graph/...". PiperOrigin-RevId: 177322011 --- tensorflow/core/kernels/BUILD | 2 +- tensorflow/core/kernels/dataset.cc | 140 ++++++++++++++++ tensorflow/core/kernels/dataset.h | 155 +++--------------- tensorflow/core/kernels/filter_dataset_op.cc | 2 +- .../core/kernels/flat_map_dataset_op.cc | 2 +- .../core/kernels/interleave_dataset_op.cc | 2 +- tensorflow/core/kernels/map_dataset_op.cc | 2 +- .../core/kernels/padded_batch_dataset_op.cc | 4 +- tensorflow/core/kernels/tensor_dataset_op.cc | 2 +- .../core/kernels/tensor_slice_dataset_op.cc | 2 +- tensorflow/core/kernels/zip_dataset_op.cc | 2 +- 11 files changed, 171 insertions(+), 144 deletions(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index eff15e809a..fd36e6ca1f 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -5832,11 +5832,11 @@ cc_library( srcs = ["dataset.cc"], hdrs = ["dataset.h"], deps = [ + "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", - "//tensorflow/core/util/tensor_bundle", ], ) diff --git a/tensorflow/core/kernels/dataset.cc b/tensorflow/core/kernels/dataset.cc index fcfa2956f7..0972129787 100644 --- a/tensorflow/core/kernels/dataset.cc +++ b/tensorflow/core/kernels/dataset.cc @@ -15,6 +15,9 @@ limitations under the License. #include "tensorflow/core/kernels/dataset.h" +#include "tensorflow/core/graph/graph_def_builder.h" +#include "tensorflow/core/graph/node_builder.h" + namespace tensorflow { namespace { @@ -70,6 +73,143 @@ class DatasetVariantWrapper { } // namespace +Status GraphDefBuilderWrapper::AddDataset( + const GraphDatasetBase* dataset, + const std::vector>& inputs, + const std::vector>>& list_inputs, + const std::vector>& attrs, + Node** output) { + const string& op_type_name = dataset->op_name(); + std::unique_ptr opts( + new GraphDefBuilder::Options(b_->opts())); + // TODO(srbs|mrry): Not all datasets have output_types and output_shapes + // attributes defined. It will be nice to have a consistent pattern. + bool has_output_types_attr = HasAttr(op_type_name, "output_types"); + bool has_output_shapes_attr = HasAttr(op_type_name, "output_shapes"); + if (has_output_shapes_attr) { + opts.reset(new GraphDefBuilder::Options( + opts->WithAttr("output_shapes", dataset->output_shapes()))); + } + if (has_output_types_attr) { + opts.reset(new GraphDefBuilder::Options( + opts->WithAttr("output_types", dataset->output_dtypes()))); + } + for (auto attr : attrs) { + opts.reset( + new GraphDefBuilder::Options(opts->WithAttr(attr.first, attr.second))); + } + if (opts->HaveError()) { + return errors::Internal("AddDataset: Failed to build Options with error ", + opts->StatusToString()); + } + NodeBuilder node_builder(opts->GetNameForOp(op_type_name), op_type_name, + opts->op_registry()); + { + size_t total_size = inputs.size() + list_inputs.size(); + auto inputs_iter = inputs.begin(); + auto list_inputs_iter = list_inputs.begin(); + for (int i = 0; i < total_size; i++) { + if (inputs_iter != inputs.end() && inputs_iter->first == i) { + node_builder.Input(NodeBuilder::NodeOut(inputs_iter->second)); + inputs_iter++; + } else if (list_inputs_iter != list_inputs.end() && + list_inputs_iter->first == i) { + std::vector nodeout_inputs; + nodeout_inputs.reserve(list_inputs_iter->second.size()); + for (Node* n : list_inputs_iter->second) { + nodeout_inputs.emplace_back(n); + } + node_builder.Input(nodeout_inputs); + list_inputs_iter++; + } else { + return errors::InvalidArgument("No input found for index ", i); + } + } + } + *output = opts->FinalizeBuilder(&node_builder); + if (*output == nullptr) { + return errors::Internal("AddDataset: Failed to build ", op_type_name, + " op with error ", opts->StatusToString()); + } + return Status::OK(); +} + +Status GraphDefBuilderWrapper::AddFunction(OpKernelContext* ctx, + const string& function_name) { + if (b_->HasFunction(function_name)) { + LOG(INFO) << "Function with name " << function_name << "already exists in" + << " the graph. It will not be added again."; + return Status::OK(); + } + TF_RETURN_IF_ERROR(EnsureFunctionIsStateless(ctx, function_name)); + const FunctionLibraryDefinition* flib_def = + ctx->function_library()->GetFunctionLibraryDefinition(); + const FunctionDef* f_def = flib_def->Find(function_name); + if (f_def == nullptr) { + return errors::InvalidArgument("Unable to find FunctionDef for ", + function_name, " in the registry."); + } + FunctionDefLibrary def; + *def.add_function() = *f_def; + const string gradient_func = flib_def->FindGradient(function_name); + if (!gradient_func.empty()) { + GradientDef* g_def = def.add_gradient(); + g_def->set_function_name(function_name); + g_def->set_gradient_func(gradient_func); + } + TF_RETURN_IF_ERROR(b_->AddFunctionLibrary(def)); + + // Recursively add functions in inputs of function_name. + for (const NodeDef& node_def : f_def->node_def()) { + const OpRegistrationData* op_reg_data = nullptr; + TF_RETURN_IF_ERROR(flib_def->LookUp(node_def.op(), &op_reg_data)); + if (op_reg_data->is_function_op) { + TF_RETURN_IF_ERROR(AddFunction(ctx, op_reg_data->op_def.name())); + } + // Recursively add functions in attrs of this NodeDef. + for (const auto& pair : node_def.attr()) { + TF_RETURN_IF_ERROR(AddAttrFunctions(pair.second, ctx)); + } + } + + // Recursively add functions in attrs of function_name. + for (auto iter = f_def->attr().begin(); iter != f_def->attr().end(); iter++) { + TF_RETURN_IF_ERROR(AddAttrFunctions(iter->second, ctx)); + } + return Status::OK(); +} + +void GraphDefBuilderWrapper::AddTensorInternal(const Tensor& val, + Node** output) { + *output = ops::SourceOp( + "Const", + b_->opts().WithAttr("dtype", val.dtype()).WithAttr("value", val)); +} + +bool GraphDefBuilderWrapper::HasAttr(const string& op_type_name, + const string& attr_name) const { + const OpDef* op_def = nullptr; + Status s = b_->opts().op_registry()->LookUpOpDef(op_type_name, &op_def); + if (!s.ok() || op_def == nullptr) { + return false; + } + return HasAttr(op_def, attr_name); +} + +Status GraphDatasetBase::Serialize(OpKernelContext* ctx, + string* serialized_graph_def, + string* output_node) const { + GraphDefBuilder b; + DatasetGraphDefBuilder db(&b); + Node* node = nullptr; + TF_RETURN_IF_ERROR(AsGraphDefInternal(ctx, &db, &node)); + *output_node = node->name(); + GraphDef graph_def; + TF_RETURN_IF_ERROR(b.ToGraphDef(&graph_def)); + graph_def.SerializeToString(serialized_graph_def); + return Status::OK(); +} + Status GetDatasetFromVariantTensor(const Tensor& tensor, DatasetBase** out_dataset) { if (!(tensor.dtype() == DT_VARIANT || diff --git a/tensorflow/core/kernels/dataset.h b/tensorflow/core/kernels/dataset.h index afbebb0692..504a88a309 100644 --- a/tensorflow/core/kernels/dataset.h +++ b/tensorflow/core/kernels/dataset.h @@ -19,12 +19,13 @@ limitations under the License. #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/attr_value_util.h" +#include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/variant_encode_decode.h" #include "tensorflow/core/framework/variant_tensor_data.h" -#include "tensorflow/core/graph/graph_def_builder.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/tracing.h" @@ -59,6 +60,12 @@ class IteratorStateWriter { virtual ~IteratorStateWriter() {} }; +// Forward declarations to avoid introducing a dependency on headers in +// "tensorflow/core/graph/...". +class GraphDefBuilder; +class GraphDatasetBase; +class Node; + // Wrapper around GraphDefBuilder. Used to serialize Dataset graph. class GraphDefBuilderWrapper { public: @@ -110,10 +117,8 @@ class GraphDefBuilderWrapper { return Status::OK(); } - template - Status AddDataset(const DatasetType* dataset, - const std::vector& inputs, - Node** output) { + Status AddDataset(const GraphDatasetBase* dataset, + const std::vector& inputs, Node** output) { return AddDataset(dataset, inputs, {}, output); } @@ -125,77 +130,23 @@ class GraphDefBuilderWrapper { // `*output` contains a pointer to the output `Node`. It is guaranteed to be // non-null if the method returns with an OK status. // The returned Node pointer is owned by the backing Graph of GraphDefBuilder. - template - Status AddDataset(const DatasetType* dataset, - const std::vector& inputs, + Status AddDataset(const GraphDatasetBase* dataset, + const std::vector& inputs, const std::vector>& attrs, Node** output) { - std::vector> enumerated_inputs( - inputs.size()); + std::vector> enumerated_inputs(inputs.size()); for (int i = 0; i < inputs.size(); i++) { enumerated_inputs[i] = std::make_pair(i, inputs[i]); } return AddDataset(dataset, enumerated_inputs, {}, attrs, output); } - template Status AddDataset( - const DatasetType* dataset, - const std::vector>& inputs, - const std::vector< - std::pair>>& - list_inputs, + const GraphDatasetBase* dataset, + const std::vector>& inputs, + const std::vector>>& list_inputs, const std::vector>& attrs, - Node** output) { - const string& op_type_name = dataset->op_name(); - std::unique_ptr opts( - new GraphDefBuilder::Options(b_->opts())); - // TODO(srbs|mrry): Not all datasets have output_types and output_shapes - // attributes defined. It will be nice to have a consistent pattern. - bool has_output_types_attr = HasAttr(op_type_name, "output_types"); - bool has_output_shapes_attr = HasAttr(op_type_name, "output_shapes"); - if (has_output_shapes_attr) { - opts.reset(new GraphDefBuilder::Options( - opts->WithAttr("output_shapes", dataset->output_shapes()))); - } - if (has_output_types_attr) { - opts.reset(new GraphDefBuilder::Options( - opts->WithAttr("output_types", dataset->output_dtypes()))); - } - for (auto attr : attrs) { - opts.reset(new GraphDefBuilder::Options( - opts->WithAttr(attr.first, attr.second))); - } - if (opts->HaveError()) { - return errors::Internal("AddDataset: Failed to build Options with error ", - opts->StatusToString()); - } - NodeBuilder node_builder(opts->GetNameForOp(op_type_name), op_type_name, - opts->op_registry()); - { - size_t total_size = inputs.size() + list_inputs.size(); - auto inputs_iter = inputs.begin(); - auto list_inputs_iter = list_inputs.begin(); - for (int i = 0; i < total_size; i++) { - if (inputs_iter != inputs.end() && inputs_iter->first == i) { - node_builder.Input(inputs_iter->second); - inputs_iter++; - } else if (list_inputs_iter != list_inputs.end() && - list_inputs_iter->first == i) { - node_builder.Input(list_inputs_iter->second); - list_inputs_iter++; - } else { - return errors::InvalidArgument("No input found for index ", i); - } - } - } - *output = opts->FinalizeBuilder(&node_builder); - if (*output == nullptr) { - return errors::Internal("AddDataset: Failed to build ", op_type_name, - " op with error ", opts->StatusToString()); - } - return Status::OK(); - } + Node** output); // Adds a user-defined function with name `function_name` to the graph and // recursively adds all functions it references. If a function with a matching @@ -203,50 +154,7 @@ class GraphDefBuilderWrapper { // name `function_name` is not found in the FunctionLibraryDefinition, returns // an InvalidArgumentError. If the function with name `function_name` or any // of its dependent functions are stateful, returns an InvalidArgument error. - Status AddFunction(OpKernelContext* ctx, const string& function_name) { - if (b_->HasFunction(function_name)) { - LOG(INFO) << "Function with name " << function_name << "already exists in" - << " the graph. It will not be added again."; - return Status::OK(); - } - TF_RETURN_IF_ERROR(EnsureFunctionIsStateless(ctx, function_name)); - const FunctionLibraryDefinition* flib_def = - ctx->function_library()->GetFunctionLibraryDefinition(); - const FunctionDef* f_def = flib_def->Find(function_name); - if (f_def == nullptr) { - return errors::InvalidArgument("Unable to find FunctionDef for ", - function_name, " in the registry."); - } - FunctionDefLibrary def; - *def.add_function() = *f_def; - const string gradient_func = flib_def->FindGradient(function_name); - if (!gradient_func.empty()) { - GradientDef* g_def = def.add_gradient(); - g_def->set_function_name(function_name); - g_def->set_gradient_func(gradient_func); - } - TF_RETURN_IF_ERROR(b_->AddFunctionLibrary(def)); - - // Recursively add functions in inputs of function_name. - for (const NodeDef& node_def : f_def->node_def()) { - const OpRegistrationData* op_reg_data = nullptr; - TF_RETURN_IF_ERROR(flib_def->LookUp(node_def.op(), &op_reg_data)); - if (op_reg_data->is_function_op) { - TF_RETURN_IF_ERROR(AddFunction(ctx, op_reg_data->op_def.name())); - } - // Recursively add functions in attrs of this NodeDef. - for (const auto& pair : node_def.attr()) { - TF_RETURN_IF_ERROR(AddAttrFunctions(pair.second, ctx)); - } - } - - // Recursively add functions in attrs of function_name. - for (auto iter = f_def->attr().begin(); iter != f_def->attr().end(); - iter++) { - TF_RETURN_IF_ERROR(AddAttrFunctions(iter->second, ctx)); - } - return Status::OK(); - } + Status AddFunction(OpKernelContext* ctx, const string& function_name); template void BuildAttrValue(const T& value, AttrValue* attr) { @@ -254,11 +162,7 @@ class GraphDefBuilderWrapper { } private: - void AddTensorInternal(const Tensor& val, Node** output) { - *output = ops::SourceOp( - "Const", - b_->opts().WithAttr("dtype", val.dtype()).WithAttr("value", val)); - } + void AddTensorInternal(const Tensor& val, Node** output); Status EnsureFunctionIsStateless(OpKernelContext* ctx, const string& function_name) const { @@ -294,14 +198,7 @@ class GraphDefBuilderWrapper { HasAttr(op_def, "output_shapes"); } - bool HasAttr(const string& op_type_name, const string& attr_name) const { - const OpDef* op_def = nullptr; - Status s = b_->opts().op_registry()->LookUpOpDef(op_type_name, &op_def); - if (!s.ok() || op_def == nullptr) { - return false; - } - return HasAttr(op_def, attr_name); - } + bool HasAttr(const string& op_type_name, const string& attr_name) const; bool HasAttr(const OpDef* op_def, const string& attr_name) const { for (auto attr : op_def->attr()) { @@ -548,17 +445,7 @@ class GraphDatasetBase : public DatasetBase { private: Status Serialize(OpKernelContext* ctx, string* serialized_graph_def, - string* output_node) const { - GraphDefBuilder b; - DatasetGraphDefBuilder db(&b); - Node* node = nullptr; - TF_RETURN_IF_ERROR(AsGraphDefInternal(ctx, &db, &node)); - *output_node = node->name(); - GraphDef graph_def; - TF_RETURN_IF_ERROR(b.ToGraphDef(&graph_def)); - graph_def.SerializeToString(serialized_graph_def); - return Status::OK(); - } + string* output_node) const; const string op_name_; }; diff --git a/tensorflow/core/kernels/filter_dataset_op.cc b/tensorflow/core/kernels/filter_dataset_op.cc index e4d80e4ce3..0ac6cd9a98 100644 --- a/tensorflow/core/kernels/filter_dataset_op.cc +++ b/tensorflow/core/kernels/filter_dataset_op.cc @@ -95,7 +95,7 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { DataTypeVector other_arguments_types; other_arguments_types.reserve(captured_func_->captured_inputs().size()); - std::vector other_arguments; + std::vector other_arguments; other_arguments.reserve(captured_func_->captured_inputs().size()); for (const Tensor& t : captured_func_->captured_inputs()) { Node* node; diff --git a/tensorflow/core/kernels/flat_map_dataset_op.cc b/tensorflow/core/kernels/flat_map_dataset_op.cc index ac1689e5bf..8fe8489371 100644 --- a/tensorflow/core/kernels/flat_map_dataset_op.cc +++ b/tensorflow/core/kernels/flat_map_dataset_op.cc @@ -102,7 +102,7 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel { DataTypeVector other_arguments_types; other_arguments_types.reserve(captured_func_->captured_inputs().size()); - std::vector other_arguments; + std::vector other_arguments; other_arguments.reserve(captured_func_->captured_inputs().size()); for (const Tensor& t : captured_func_->captured_inputs()) { Node* node; diff --git a/tensorflow/core/kernels/interleave_dataset_op.cc b/tensorflow/core/kernels/interleave_dataset_op.cc index cbee68b2db..833e8cb9c5 100644 --- a/tensorflow/core/kernels/interleave_dataset_op.cc +++ b/tensorflow/core/kernels/interleave_dataset_op.cc @@ -126,7 +126,7 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel { TF_RETURN_IF_ERROR(b->AddScalar(block_length_, &block_length_node)); DataTypeVector other_arguments_types; other_arguments_types.reserve(captured_func_->captured_inputs().size()); - std::vector other_arguments; + std::vector other_arguments; other_arguments.reserve(captured_func_->captured_inputs().size()); for (const Tensor& t : captured_func_->captured_inputs()) { Node* node; diff --git a/tensorflow/core/kernels/map_dataset_op.cc b/tensorflow/core/kernels/map_dataset_op.cc index 4ba09bc335..23148f122d 100644 --- a/tensorflow/core/kernels/map_dataset_op.cc +++ b/tensorflow/core/kernels/map_dataset_op.cc @@ -100,7 +100,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel { DataTypeVector other_arguments_types( captured_func_->captured_inputs().size()); - std::vector other_arguments( + std::vector other_arguments( captured_func_->captured_inputs().size()); for (const Tensor& t : captured_func_->captured_inputs()) { Node* node; diff --git a/tensorflow/core/kernels/padded_batch_dataset_op.cc b/tensorflow/core/kernels/padded_batch_dataset_op.cc index 7c28d955e1..cef5bde156 100644 --- a/tensorflow/core/kernels/padded_batch_dataset_op.cc +++ b/tensorflow/core/kernels/padded_batch_dataset_op.cc @@ -242,7 +242,7 @@ class PaddedBatchDatasetOp : public UnaryDatasetOpKernel { Node* batch_size = nullptr; TF_RETURN_IF_ERROR(b->AddScalar(batch_size_, &batch_size)); - std::vector padded_shapes; + std::vector padded_shapes; padded_shapes.reserve(padded_shapes_.size()); for (int i = 0; i < padded_shapes_.size(); i++) { Node* node; @@ -254,7 +254,7 @@ class PaddedBatchDatasetOp : public UnaryDatasetOpKernel { padded_shapes.emplace_back(node); } - std::vector padding_values; + std::vector padding_values; padding_values.reserve(padding_values_.size()); for (const Tensor& t : padding_values_) { Node* node; diff --git a/tensorflow/core/kernels/tensor_dataset_op.cc b/tensorflow/core/kernels/tensor_dataset_op.cc index fe53434d17..5cf9931188 100644 --- a/tensorflow/core/kernels/tensor_dataset_op.cc +++ b/tensorflow/core/kernels/tensor_dataset_op.cc @@ -70,7 +70,7 @@ class TensorDatasetOp : public DatasetOpKernel { protected: Status AsGraphDefInternal(DatasetGraphDefBuilder* b, Node** output) const override { - std::vector components; + std::vector components; components.reserve(tensors_.size()); for (const Tensor& t : tensors_) { Node* node; diff --git a/tensorflow/core/kernels/tensor_slice_dataset_op.cc b/tensorflow/core/kernels/tensor_slice_dataset_op.cc index e85f59b584..19d4816ff3 100644 --- a/tensorflow/core/kernels/tensor_slice_dataset_op.cc +++ b/tensorflow/core/kernels/tensor_slice_dataset_op.cc @@ -86,7 +86,7 @@ class TensorSliceDatasetOp : public DatasetOpKernel { protected: Status AsGraphDefInternal(DatasetGraphDefBuilder* b, Node** output) const override { - std::vector components; + std::vector components; components.reserve(tensors_.size()); for (const Tensor& t : tensors_) { Node* node; diff --git a/tensorflow/core/kernels/zip_dataset_op.cc b/tensorflow/core/kernels/zip_dataset_op.cc index 9381915ae9..31e5737f62 100644 --- a/tensorflow/core/kernels/zip_dataset_op.cc +++ b/tensorflow/core/kernels/zip_dataset_op.cc @@ -80,7 +80,7 @@ class ZipDatasetOp : public DatasetOpKernel { protected: Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, Node** output) const override { - std::vector input_graph_nodes; + std::vector input_graph_nodes; input_graph_nodes.reserve(inputs_.size()); for (const auto& input : inputs_) { Node* input_node; -- GitLab From fa8bfa89cd2d7b57bb119afcabdf67ce1539081d Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Wed, 29 Nov 2017 09:52:17 -0800 Subject: [PATCH 0942/1801] Add feature_columns doc. PiperOrigin-RevId: 177322632 --- .../docs_src/get_started/feature_columns.md | 570 ++++++++++++++++++ 1 file changed, 570 insertions(+) create mode 100644 tensorflow/docs_src/get_started/feature_columns.md diff --git a/tensorflow/docs_src/get_started/feature_columns.md b/tensorflow/docs_src/get_started/feature_columns.md new file mode 100644 index 0000000000..f9537927b7 --- /dev/null +++ b/tensorflow/docs_src/get_started/feature_columns.md @@ -0,0 +1,570 @@ +# Feature Columns + +This document details feature columns. Think of **feature columns** as the +intermediaries between raw data and Estimators. Feature columns are very rich, +enabling you to transform a diverse range of raw data into formats that +Estimators can use, allowing easy experimentation. + +In @{$get_started/estimator$Premade Estimators}, we used the premade Estimator, +@{tf.estimator.DNNClassifier$`DNNClassifier`} to train a model to predict +different types of Iris flowers from four input features. That example created +only numerical feature columns (of type @{tf.feature_column.numeric_column}). +Although numerical feature columns model the lengths of petals and sepals +effectively, real world data sets contain all kinds of features, many of which +are non-numerical. + +
    + +
    +
    +Some real-world features (such as, longitude) are numerical, but many are not. +
    + +## Input to a Deep Neural Network + +What kind of data can a deep neural network operate on? The answer +is, of course, numbers (for example, `tf.float32`). After all, every neuron in +a neural network performs multiplication and addition operations on weights and +input data. Real-life input data, however, often contains non-numerical +(categorical) data. For example, consider a `product_class` feature that can +contain the following three non-numerical values: + +* `kitchenware` +* `electronics` +* `sports` + +ML models generally represent categorical values as simple vectors in which a +1 represents the presence of a value and a 0 represents the absence of a value. +For example, when `product_class` is set to `sports`, an ML model would usually +represent `product_class` as `[0, 0, 1]`, meaning: + +* `0`: `kitchenware` is absent +* `0`: `electronics` is absent +* `1`: `sports` is present + +So, although raw data can be numerical or categorical, an ML model represents +all features as numbers. + +## Feature Columns + +As the following figure suggests, you specify the input to a model through the +`feature_columns` argument of an Estimator (`DNNClassifier` for Iris). +Feature Columns bridge input data (as returned by `input_fn`) with your model. + +
    + +
    +
    +Feature columns bridge raw data with the data your model needs. +
    + +To create feature columns, call functions from the +@{tf.feature_column} module. This document explains nine of the functions in +that module. As the following figure shows, all nine functions return either a +Categorical-Column or a Dense-Column object, except `bucketized_column`, which +inherits from both classes: + +
    + +
    +
    +Feature column methods fall into two main categories and one hybrid category. +
    + +Let's look at these functions in more detail. + +### Numeric column + +The Iris classifier calls the @{tf.feature_column.numeric_column} function for +all input features: + + * `SepalLength` + * `SepalWidth` + * `PetalLength` + * `PetalWidth` + +Although `tf.numeric_column` provides optional arguments, calling +`tf.numeric_column` without any arguments, as follows, is a fine way to specify +a numerical value with the default data type (`tf.float32`) as input to your +model: + +```python +# Defaults to a tf.float32 scalar. +numeric_feature_column = tf.feature_column.numeric_column(key="SepalLength") +``` + +To specify a non-default numerical data type, use the `dtype` argument. For +example: + +``` python +# Represent a tf.float64 scalar. +numeric_feature_column = tf.feature_column.numeric_column(key="SepalLength", + dtype=tf.float64) +``` + +By default, a numeric column creates a single value (scalar). Use the shape +argument to specify another shape. For example: + + +```python +# Represent a 10-element vector in which each cell contains a tf.float32. +vector_feature_column = tf.feature_column.numeric_column(key="Bowling", + shape=10) + +# Represent a 10x5 matrix in which each cell contains a tf.float32. +matrix_feature_column = tf.feature_column.numeric_column(key="MyMatrix", + shape=[10,5]) +``` +### Bucketized column + +Often, you don't want to feed a number directly into the model, but instead +split its value into different categories based on numerical ranges. To do so, +create a @{tf.feature_column.bucketized_column$bucketized column}. For +example, consider raw data that represents the year a house was built. Instead +of representing that year as a scalar numeric column, we could split the year +into the following four buckets: + +
    + +
    +
    +Dividing year data into four buckets. +
    + +The model will represent the buckets as follows: + +|Date Range |Represented as... | +|:----------|:-----------------| +|< 1960 | [1, 0, 0, 0] | +|>= 1960 but < 1980 | [0, 1, 0, 0] | +|>= 1980 but < 2000 | [0, 0, 1, 0] | +|> 2000 | [0, 0, 0, 1] | + +Why would you want to split a number—a perfectly valid input to your +model—into a categorical value? Well, notice that the categorization splits a +single input number into a four-element vector. Therefore, the model now can +learn _four individual weights_ rather than just one; four weights creates a +richer model than one weight. More importantly, bucketizing enables the model +to clearly distinguish between different year categories since only one of the +elements is set (1) and the other three elements are cleared (0). When we just +use a single number (a year) as input, the model can only learn a linear +relationship. So, bucketing provides the model with additional flexibility that +the model can use to learn. + +The following code demonstrates how to create a bucketized feature: + + +```python +# First, convert the raw input to a numeric column. +numeric_feature_column = tf.feature_column.numeric_column("Year") + +# Then, bucketize the numeric column on the years 1960, 1980, and 2000. +bucketized_feature_column = tf.feature_column.bucketized_column( + source_column = numeric_feature_column, + boundaries = [1960, 1980, 2000]) +``` +Note that specifying a _three_-element boundaries vector creates a +_four_-element bucketized vector. + + +### Categorical identity column + +**Categorical identity columns** can be seen as a special case of bucketized +columns. In traditional bucketized columns, each bucket represents a range of +values (for example, from 1960 to 1979). In a categorical identity column, each +bucket represents a single, unique integer. For example, let's say you want to +represent the integer range `[0, 4)`. That is, you want to represent the +integers 0, 1, 2, or 3. In this case, the categorical identity mapping looks +like this: + +
    + +
    +
    +A categorical identity column mapping. Note that this is a one-hot +encoding, not a binary numerical encoding. +
    + +As with bucketized columns, a model can learn a separate weight for each class +in a categorical identity column. For example, instead of using a string to +represent the `product_class`, let's represent each class with a unique integer +value. That is: + +* `0="kitchenware"` +* `1="electronics"` +* `2="sport"` + +Call @{tf.feature_column.categorical_column_with_identity} to implement a +categorical identity column. For example: + +``` python +# Create categorical output for an integer feature named "my_feature_b", +# The values of my_feature_b must be >= 0 and < num_buckets +identity_feature_column = tf.feature_column.categorical_column_with_identity( + key='my_feature_b', + num_buckets=4) # Values [0, 4) + +# In order for the preceding call to work, the input_fn() must return +# a dictionary containing 'my_feature_b' as a key. Furthermore, the values +# assigned to 'my_feature_b' must belong to the set [0, 4). +def input_fn(): + ... + return ({ 'my_feature_a':[7, 9, 5, 2], 'my_feature_b':[3, 1, 2, 2] }, + [Label_values]) +``` + +### Categorical vocabulary column + +We cannot input strings directly to a model. Instead, we must first map strings +to numeric or categorical values. Categorical vocabulary columns provide a good +way to represent strings as a one-hot vector. For example: + +
    + +
    +
    +Mapping string values to vocabulary columns. +
    + +As you can see, categorical vocabulary columns are kind of an enum version of +categorical identity columns. TensorFlow provides two different functions to +create categorical vocabulary columns: + +* @{tf.feature_column.categorical_column_with_vocabulary_list} +* @{tf.feature_column.categorical_column_with_vocabulary_file} + +`categorical_column_with_vocabulary_list` maps each string to an integer based +on an explicit vocabulary list. For example: + +```python +# Given input "feature_name_from_input_fn" which is a string, +# create a categorical feature by mapping the input to one of +# the elements in the vocabulary list. +vocabulary_feature_column = + tf.feature_column.categorical_column_with_vocabulary_list( + key="a feature returned by input_fn()", + vocabulary_list=["kitchenware", "electronics", "sports"]) +``` + +The preceding function is pretty straightforward, but it has a significant +drawback. Namely, there's way too much typing when the vocabulary list is long. +For these cases, call +`tf.feature_column.categorical_column_with_vocabulary_file` instead, which lets +you place the vocabulary words in a separate file. For example: + +```python + +# Given input "feature_name_from_input_fn" which is a string, +# create a categorical feature to our model by mapping the input to one of +# the elements in the vocabulary file +vocabulary_feature_column = + tf.feature_column.categorical_column_with_vocabulary_file( + key="a feature returned by input_fn()", + vocabulary_file="product_class.txt", + vocabulary_size=3) +``` + +`product_class.txt` should contain one line for each vocabulary element. In our +case: + +```None +kitchenware +electronics +sports +``` + +### Hashed Column + +So far, we've worked with a naively small number of categories. For example, +our product_class example has only 3 categories. Often though, the number of +categories can be so big that it's not possible to have individual categories +for each vocabulary word or integer because that would consume too much memory. +For these cases, we can instead turn the question around and ask, "How many +categories am I willing to have for my input?" In fact, the +@{tf.feature_column.categorical_column_with_hash_bucket} function enables you +to specify the number of categories. For this type of feature column the model +calculates a hash value of the input, then puts it into one of +the `hash_bucket_size` categories using the modulo operator, as in the following +pseudocode: + +```python +# pseudocode +feature_id = hash(raw_feature) % hash_buckets_size +``` + +The code to create the `feature_column` might look something like this: + +``` python +hashed_feature_column = + tf.feature_column.categorical_column_with_hash_bucket( + key = "some_feature", + hash_buckets_size = 100) # The number of categories +``` +At this point, you might rightfully think: "This is crazy!" After all, we are +forcing the different input values to a smaller set of categories. This means +that two probably unrelated inputs will be mapped to the same +category, and consequently mean the same thing to the neural network. The +following figure illustrates this dilemma, showing that kitchenware and sports +both get assigned to category (hash bucket) 12: + +
    + +
    +
    +Representing data with hash buckets. +
    + +As with many counterintuitive phenomena in machine learning, it turns out that +hashing often works well in practice. That's because hash categories provide +the model with some separation. The model can use additional features to further +separate kitchenware from sports. + +### Crossed column + +Combining features into a single feature, better known as +[feature crosses](https://developers.google.com/machine-learning/glossary/#feature_cross), +enables the model to learn separate weights for each combination of +features. + +More concretely, suppose we want our model to calculate real estate prices in +Atlanta, GA. Real-estate prices within this city vary greatly depending on +location. Representing latitude and longitude as separate features isn't very +useful in identifying real-estate location dependencies; however, crossing +latitude and longitude into a single feature can pinpoint locations. Suppose we +represent Atlanta as a grid of 100x100 rectangular sections, identifying each +of the 10,000 sections by a feature cross of latitude and longitude. This +feature cross enables the model to train on pricing conditions related to each +individual section, which is a much stronger signal than latitude and longitude +alone. + +The following figure shows our plan, with the latitude & longitude values for +the corners of the city in red text: + +
    + +
    +
    +Map of Atlanta. Imagine this map divided into 10,000 sections of +equal size. +
    + +For the solution, we used a combination of the `bucketized_column` we looked at +earlier, with the @{tf.feature_column.crossed_column} function. + + + +``` python +def make_dataset(latitude, longitude, labels): + assert latitude.shape == longitude.shape == labels.shape + + features = {'latitude': latitude.flatten(), + 'longitude': longitude.flatten()} + labels=labels.flatten() + + return tf.data.Dataset.from_tensor_slices((features, labels)) + + +# Bucketize the latitude and longitude usig the `edges` +latitude_bucket_fc = tf.feature_column.bucketized_column( + tf.feature_column.numeric_column('latitude'), + list(atlanta.latitude.edges)) + +longitude_bucket_fc = tf.feature_column.bucketized_column( + tf.feature_column.numeric_column('longitude'), + list(atlanta.longitude.edges)) + +# Cross the bucketized columns, using 5000 hash bins. +crossed_lat_lon_fc = tf.feature_column.crossed_column( + [latitude_bucket_fc, longitude_bucket_fc], 5000) + +fc = [ + latitude_bucket_fc, + longitude_bucket_fc, + crossed_lat_lon_fc] + +# Build and train the Estimator. +est = tf.estimator.LinearRegressor(fc, ...) +``` + +You may create a feature cross from either of the following: + +* Feature names; that is, names from the `dict` returned from `input_fn`. +* Any categorical column, except `categorical_column_with_hash_bucket` + (since `crossed_column` hashes the input). + +When the feature columns `latitude_bucket_fc` and `longitude_bucket_fc` are +crossed, TensorFlow will create `(latitude_fc, longitude_fc)` pairs for each +example. This would produce a full grid of possibilities as follows: + +``` None + (0,0), (0,1)... (0,99) + (1,0), (1,1)... (1,99) + ... ... ... +(99,0), (99,1)...(99, 99) +``` + +Except that a full grid would only be tractable for inputs with limited +vocabularies. Instead of building this, potentially huge, table of inputs, +the `crossed_column` only builds the number requested by the `hash_bucket_size` +argument. The feature column assigns an example to a index by running a hash +function on the tuple of inputs, followed by a modulo operation with +`hash_bucket_size`. + +As discussed earlier, performing the +hash and modulo function limits the number of categories, but can cause category +collisions; that is, multiple (latitude, longitude) feature crosses will end +up in the same hash bucket. In practice though, performing feature crosses +still adds significant value to the learning capability of your models. + +Somewhat counterintuitively, when creating feature crosses, you typically still +should include the original (uncrossed) features in your model (as in the +preceding code snippet). The independent latitude and longitude features help the +model distinguish between examples where a hash collision has occured in the +crossed feature. + +## Indicator and embedding columns + +Indicator columns and embedding columns never work on features directly, but +instead take categorical columns as input. + +When using an indicator column, we're telling TensorFlow to do exactly what +we've seen in our categorical product_class example. That is, an +**indicator column** treats each category as an element in a one-hot vector, +where the matching category has value 1 and the rest have 0s: + +
    + +
    +
    +Representing data in indicator columns. +
    + +Here's how you create an indicator column by calling +@{tf.feature_column.indicator_column}: + +``` python +categorical_column = ... # Create any type of categorical column. + +# Represent the categorical column as an indicator column. +indicator_column = tf.feature_column.indicator_column(categorical_column) +``` + +Now, suppose instead of having just three possible classes, we have a million. +Or maybe a billion. For a number of reasons, as the number of categories grow +large, it becomes infeasible to train a neural network using indicator columns. + +We can use an embedding column to overcome this limitation. Instead of +representing the data as a one-hot vector of many dimensions, an +**embedding column** represents that data as a lower-dimensional, ordinary +vector in which each cell can contain any number, not just 0 or 1. By +permitting a richer palette of numbers for every cell, an embedding column +contains far fewer cells than an indicator column. + +Let's look at an example comparing indicator and embedding columns. Suppose our +input examples consists of different words from a limited palette of only 81 +words. Further suppose that the data set provides provides the following input +words in 4 separate examples: + +* `"dog"` +* `"spoon"` +* `"scissors"` +* `"guitar"` + +In that case, the following figure illustrates the processing path for +embedding columns or indicator columns. + +
    + +
    +
    +An embedding column stores categorical data in a lower-dimensional +vector than an indicator column. (We just placed random numbers into the +embedding vectors; training determines the actual numbers.) +
    + +When an example is processed, one of the `categorical_column_with...` functions +maps the example string to a numerical categorical value. For example, a +function maps "spoon" to `[32]`. (The 32 comes from our imagination—the actual +values depend on the mapping function.) You may then represent these numerical +categorical values in either of the following two ways: + +* As an indicator column. A function converts each numeric categorical value + into an 81-element vector (because our palette consists of 81 words), placing + a 1 in the index of the categorical value (0, 32, 79, 80) and a 0 in all the + other positions. + +* As an embedding column. A function uses the numerical categorical values + `(0, 32, 79, 80)` as indices to a lookup table. Each slot in that lookup table + contains a 3-element vector. + +How do the values in the embeddings vectors magically get assigned? Actually, +the assignments happen during training. That is, the model learns the best way +to map your input numeric categorical values to the embeddings vector value in +order to solve your problem. Embedding columns increase your model's +capabilities, since an embeddings vector learns new relationships between +categories from the training data. + +Why is the embedding vector size 3 in our example? Well, the following "formula" +provides a general rule of thumb about the number of embedding dimensions: + +```python +embedding_dimensions = number_of_categories**0.25 +``` + +That is, the embedding vector dimension should be the 4th root of the number of +categories. Since our vocabulary size in this example is 81, the recommended +number of dimensions is 3: + +``` python +3 = 81**0.25 +``` +Note that this is just a general guideline; you can set the number of embedding +dimensions as you please. + +Call @{tf.feature_column.embedding_column} to create an `embedding_column` as +suggested by the following snippet: + +``` python +categorical_column = ... # Create any categorical column + +# Represent the categorical column as an embedding column. +# This means creating a one-hot vector with one element for each category. +embedding_column = tf.feature_column.embedding_column( + categorical_column=categorical_column, + dimension=dimension_of_embedding_vector) +``` + +@{$programmers_guide/embedding$Embeddings} is a significant topic within machine +learning. This information was just to get you started using them as feature +columns. + +## Passing feature columns to Estimators + +As the following list indicates, not all Estimators permit all types of +`feature_columns` argument(s): + +* @{tf.estimator.LinearClassifier$`LinearClassifier`} and + @{tf.estimator.LinearRegressor$`LinearRegressor`}: Accept all types of + feature column. +* @{tf.estimator.DNNClassifier$`DNNClassifier`} and + @{tf.estimator.DNNRegressor$`DNNRegressor`}: Only accept dense columns. Other + column types must be wrapped in either an `indicator_column` or + `embedding_column`. +* @{tf.estimator.DNNLinearCombinedClassifier$`DNNLinearCombinedClassifier`} and + @{tf.estimator.DNNLinearCombinedRegressor$`DNNLinearCombinedRegressor`}: + * The `linear_feature_columns` argument accepts any feature column type. + * The `dnn_feature_columns` argument only accepts dense columns. + +## Other Sources + +For more examples on feature columns, view the following: + +* The @{$wide_and_deep$Wide & Deep Tutorial} +* [Examples](https://github.com/tensorflow/models/tree/master/samples/cookbook/regression) + of DNNs and linear models that use feature columns. + +To learn more about embeddings, see the following: + +* [Deep Learning, NLP, and representations](http://colah.github.io/posts/2014-07-NLP-RNNs-Representations/) + (Chris Olah's blog) +* The TensorFlow [Embedding Projector](http://projector.tensorflow.org) -- GitLab From c27a90d2195545c9147ec79094d7bca3176deb44 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Wed, 29 Nov 2017 09:59:34 -0800 Subject: [PATCH 0943/1801] [TF:XLA] VariableShape op support. PiperOrigin-RevId: 177323587 --- .../compiler/tf2xla/kernels/variable_ops.cc | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tensorflow/compiler/tf2xla/kernels/variable_ops.cc b/tensorflow/compiler/tf2xla/kernels/variable_ops.cc index b19ea22f50..2346c62ad1 100644 --- a/tensorflow/compiler/tf2xla/kernels/variable_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/variable_ops.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/bounds_check.h" #include "tensorflow/core/kernels/no_op.h" namespace tensorflow { @@ -121,5 +122,31 @@ class ResourceGatherOp : public XlaOpKernel { REGISTER_XLA_OP(Name("ResourceGather").TypeConstraint("dtype", kNumericTypes), ResourceGatherOp); +class VariableShapeOp : public XlaOpKernel { + public: + explicit VariableShapeOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} + + void Compile(XlaOpKernelContext* ctx) override { + DataType dtype; + TensorShape shape; + OP_REQUIRES_OK(ctx, ctx->GetVariableTypeAndShape(0, &dtype, &shape)); + const int rank = shape.dims(); + Tensor shape_constant(DT_INT32, TensorShape({rank})); + auto vec = shape_constant.vec(); + // TODO(dga): support int64. b/28119922. + for (int i = 0; i < rank; ++i) { + int64 dim_size = shape.dim_size(i); + OP_REQUIRES( + ctx, FastBoundsCheck(dim_size, std::numeric_limits::max()), + errors::InvalidArgument("Shape does not support tensors > int32max", + " but dim ", i, " is ", dim_size)); + vec(i) = static_cast(dim_size); + } + + ctx->SetConstantOutput(0, shape_constant); + } +}; + +REGISTER_XLA_OP(Name("VariableShape"), VariableShapeOp); } // namespace } // namespace tensorflow -- GitLab From 2229a6cbbe27b3c42fbcd4aff0bb3de1925a8768 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Nov 2017 10:04:47 -0800 Subject: [PATCH 0944/1801] Internal Change PiperOrigin-RevId: 177324488 --- tensorflow/contrib/lite/python/lite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py index 0fd70f842b..982ea90f2b 100644 --- a/tensorflow/contrib/lite/python/lite.py +++ b/tensorflow/contrib/lite/python/lite.py @@ -50,7 +50,7 @@ GRAPHVIZ_DOT = _toco_flags_pb2.GRAPHVIZ_DOT # to protect against crashes. However, it breaks some dependent targets because # it forces us to depend on an external py_binary. The experimental API doesn't # have that drawback. -EXPERIMENTAL_USE_TOCO_API_DIRECTLY = True +EXPERIMENTAL_USE_TOCO_API_DIRECTLY = False # Find the toco_from_protos binary using the resource loader if using from # bazel, otherwise we are in a pip where console_scripts already has -- GitLab From 7921d01ec8fed3e5c62264b99b09440ea09796fe Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Wed, 29 Nov 2017 10:06:59 -0800 Subject: [PATCH 0945/1801] Raise an exception when converting lists with invalid lengths to Tensors instead of CHECK failing PiperOrigin-RevId: 177324815 --- tensorflow/contrib/cmake/tf_python.cmake | 2 + tensorflow/python/BUILD | 13 ++++ .../kernel_tests/constant_op_eager_test.py | 33 +++++++++ tensorflow/python/lib/core/py_func.cc | 54 ++------------ tensorflow/python/lib/core/py_seq_tensor.cc | 18 ++++- tensorflow/python/lib/core/py_util.cc | 70 +++++++++++++++++++ tensorflow/python/lib/core/py_util.h | 27 +++++++ tensorflow/tools/ci_build/ci_sanity.sh | 3 +- 8 files changed, 169 insertions(+), 51 deletions(-) create mode 100644 tensorflow/python/lib/core/py_util.cc create mode 100644 tensorflow/python/lib/core/py_util.h diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 0128946e45..819b6213ea 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -899,6 +899,8 @@ set (pywrap_tensorflow_internal_src "${tensorflow_source_dir}/tensorflow/python/lib/core/py_func.cc" "${tensorflow_source_dir}/tensorflow/python/lib/core/py_seq_tensor.h" "${tensorflow_source_dir}/tensorflow/python/lib/core/py_seq_tensor.cc" + "${tensorflow_source_dir}/tensorflow/python/lib/core/py_util.h" + "${tensorflow_source_dir}/tensorflow/python/lib/core/py_util.cc" "${tensorflow_source_dir}/tensorflow/python/lib/core/safe_ptr.h" "${tensorflow_source_dir}/tensorflow/python/lib/core/safe_ptr.cc" "${tensorflow_source_dir}/tensorflow/python/lib/io/py_record_reader.h" diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 9d3974b98e..5e7a6c0b59 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -268,6 +268,7 @@ cc_library( deps = [ ":ndarray_tensor_bridge", ":numpy_lib", + ":py_util", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", @@ -309,6 +310,7 @@ cc_library( hdrs = ["lib/core/py_seq_tensor.h"], deps = [ ":numpy_lib", + ":py_util", ":safe_ptr", "//tensorflow/core:framework", "//tensorflow/core:lib", @@ -316,6 +318,17 @@ cc_library( ], ) +cc_library( + name = "py_util", + srcs = ["lib/core/py_util.cc"], + hdrs = ["lib/core/py_util.h"], + deps = [ + "//tensorflow/core:lib", + "//tensorflow/core:script_ops_op_lib", + "//util/python:python_headers", + ], +) + cc_library( name = "py_record_reader_lib", srcs = ["lib/io/py_record_reader.cc"], diff --git a/tensorflow/python/kernel_tests/constant_op_eager_test.py b/tensorflow/python/kernel_tests/constant_op_eager_test.py index 3b71586b55..8e9d75667d 100644 --- a/tensorflow/python/kernel_tests/constant_op_eager_test.py +++ b/tensorflow/python/kernel_tests/constant_op_eager_test.py @@ -237,6 +237,39 @@ class ConstantTest(test.TestCase): self._testAll((1, x)) self._testAll((x, 1)) + def testInvalidLength(self): + + class BadList(list): + + def __init__(self): + super(BadList, self).__init__([1, 2, 3]) # pylint: disable=invalid-length-returned + + def __len__(self): + return -1 + + with self.assertRaisesRegexp(ValueError, "should return >= 0"): + constant_op.constant([BadList()]) + with self.assertRaisesRegexp(ValueError, "mixed types"): + constant_op.constant([1, 2, BadList()]) + with self.assertRaisesRegexp(ValueError, "should return >= 0"): + constant_op.constant(BadList()) + with self.assertRaisesRegexp(ValueError, "should return >= 0"): + constant_op.constant([[BadList(), 2], 3]) + with self.assertRaisesRegexp(ValueError, "should return >= 0"): + constant_op.constant([BadList(), [1, 2, 3]]) + with self.assertRaisesRegexp(ValueError, "should return >= 0"): + constant_op.constant([BadList(), []]) + + # TODO(allenl, josh11b): These cases should return exceptions rather than + # working (currently shape checking only checks the first element of each + # sequence recursively). Maybe the first one is fine, but the second one + # silently truncating is rather bad. + + # with self.assertRaisesRegexp(ValueError, "should return >= 0"): + # constant_op.constant([[3, 2, 1], BadList()]) + # with self.assertRaisesRegexp(ValueError, "should return >= 0"): + # constant_op.constant([[], BadList()]) + def testSparseValuesRaiseErrors(self): with self.assertRaisesRegexp(ValueError, "non-rectangular Python sequence"): constant_op.constant([[1, 2], [3]], dtype=dtypes_lib.int32) diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc index 8bf831f8ba..a42282b055 100644 --- a/tensorflow/python/lib/core/py_func.cc +++ b/tensorflow/python/lib/core/py_func.cc @@ -22,11 +22,11 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/threadpool.h" -#include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/python/lib/core/ndarray_tensor_bridge.h" +#include "tensorflow/python/lib/core/py_util.h" #include namespace tensorflow { @@ -133,48 +133,6 @@ bool IsSingleNone(PyObject* obj) { return item == Py_None; } -// py.__class__.__name__ -const char* ClassName(PyObject* py) { -/* PyPy doesn't have a separate C API for old-style classes. */ -#if PY_MAJOR_VERSION < 3 && !defined(PYPY_VERSION) - if (PyClass_Check(py)) - return PyString_AS_STRING( - CHECK_NOTNULL(reinterpret_cast(py)->cl_name)); - if (PyInstance_Check(py)) - return PyString_AS_STRING(CHECK_NOTNULL( - reinterpret_cast(py)->in_class->cl_name)); -#endif - if (Py_TYPE(py) == &PyType_Type) { - return reinterpret_cast(py)->tp_name; - } - return Py_TYPE(py)->tp_name; -} - -string PyExcFetch() { - CHECK(PyErr_Occurred()) << "Must only call PyExcFetch after an exception."; - PyObject* ptype; - PyObject* pvalue; - PyObject* ptraceback; - PyErr_Fetch(&ptype, &pvalue, &ptraceback); - PyErr_NormalizeException(&ptype, &pvalue, &ptraceback); - string err = ClassName(ptype); - if (pvalue) { - PyObject* str = PyObject_Str(pvalue); - if (str) { -#if PY_MAJOR_VERSION < 3 - strings::StrAppend(&err, ": ", PyString_AS_STRING(str)); -#else - strings::StrAppend(&err, ": ", PyUnicode_AsUTF8(str)); -#endif - Py_DECREF(str); - } - Py_DECREF(pvalue); - } - Py_DECREF(ptype); - Py_XDECREF(ptraceback); - return err; -} - // Calls the registered py function through the trampoline. Status DoCallPyFunc(PyCall* call, bool* out_log_on_error) { *out_log_on_error = true; @@ -195,18 +153,18 @@ Status DoCallPyFunc(PyCall* call, bool* out_log_on_error) { if (PyErr_Occurred()) { if (PyErr_ExceptionMatches(PyExc_ValueError) || PyErr_ExceptionMatches(PyExc_TypeError)) { - return errors::InvalidArgument(PyExcFetch()); + return errors::InvalidArgument(PyExceptionFetch()); } else if (PyErr_ExceptionMatches(PyExc_StopIteration)) { *out_log_on_error = false; - return errors::OutOfRange(PyExcFetch()); + return errors::OutOfRange(PyExceptionFetch()); } else if (PyErr_ExceptionMatches(PyExc_MemoryError)) { - return errors::ResourceExhausted(PyExcFetch()); + return errors::ResourceExhausted(PyExceptionFetch()); } else if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) { - return errors::Unimplemented(PyExcFetch()); + return errors::Unimplemented(PyExceptionFetch()); } else { // TODO(ebrevdo): Check if exception is an OpError and use the // OpError.error_code property to map it back in the Status. - return errors::Unknown(PyExcFetch()); + return errors::Unknown(PyExceptionFetch()); } } else { return errors::Internal("Failed to run py callback ", call->token, diff --git a/tensorflow/python/lib/core/py_seq_tensor.cc b/tensorflow/python/lib/core/py_seq_tensor.cc index 71cb38f8fd..317bdc2e14 100644 --- a/tensorflow/python/lib/core/py_seq_tensor.cc +++ b/tensorflow/python/lib/core/py_seq_tensor.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/python/lib/core/numpy.h" +#include "tensorflow/python/lib/core/py_util.h" #include "tensorflow/python/lib/core/safe_ptr.h" namespace tensorflow { @@ -89,12 +90,25 @@ Status InferShapeAndType(PyObject* obj, TensorShape* shape, DataType* dtype) { *dtype = DT_STRING; } else if (PySequence_Check(obj)) { auto length = PySequence_Length(obj); - shape->AddDim(length); if (length > 0) { + shape->AddDim(length); obj = PySequence_GetItem(obj, 0); continue; - } else { + } else if (length == 0) { + shape->AddDim(length); *dtype = DT_INVALID; // Invalid dtype for empty tensors. + } else { + // The sequence does not have a valid length (PySequence_Length < 0). + if (PyErr_Occurred()) { + // PySequence_Length failed and set an exception. Fetch the message + // and convert it to a failed status. + return errors::InvalidArgument(PyExceptionFetch()); + } else { + // This is almost certainly dead code: PySequence_Length failed but + // did not set an exception. + return errors::InvalidArgument( + "Attempted to convert an invalid sequence to a Tensor."); + } } } else if (IsPyFloat(obj)) { *dtype = DT_DOUBLE; diff --git a/tensorflow/python/lib/core/py_util.cc b/tensorflow/python/lib/core/py_util.cc new file mode 100644 index 0000000000..2635694e23 --- /dev/null +++ b/tensorflow/python/lib/core/py_util.cc @@ -0,0 +1,70 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/python/lib/core/py_util.h" + +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include + +namespace tensorflow { +namespace { + +// py.__class__.__name__ +const char* ClassName(PyObject* py) { +/* PyPy doesn't have a separate C API for old-style classes. */ +#if PY_MAJOR_VERSION < 3 && !defined(PYPY_VERSION) + if (PyClass_Check(py)) + return PyString_AS_STRING( + CHECK_NOTNULL(reinterpret_cast(py)->cl_name)); + if (PyInstance_Check(py)) + return PyString_AS_STRING(CHECK_NOTNULL( + reinterpret_cast(py)->in_class->cl_name)); +#endif + if (Py_TYPE(py) == &PyType_Type) { + return reinterpret_cast(py)->tp_name; + } + return Py_TYPE(py)->tp_name; +} + +} // end namespace + +string PyExceptionFetch() { + CHECK(PyErr_Occurred()) + << "Must only call PyExceptionFetch after an exception."; + PyObject* ptype; + PyObject* pvalue; + PyObject* ptraceback; + PyErr_Fetch(&ptype, &pvalue, &ptraceback); + PyErr_NormalizeException(&ptype, &pvalue, &ptraceback); + string err = ClassName(ptype); + if (pvalue) { + PyObject* str = PyObject_Str(pvalue); + if (str) { +#if PY_MAJOR_VERSION < 3 + strings::StrAppend(&err, ": ", PyString_AS_STRING(str)); +#else + strings::StrAppend(&err, ": ", PyUnicode_AsUTF8(str)); +#endif + Py_DECREF(str); + } + Py_DECREF(pvalue); + } + Py_DECREF(ptype); + Py_XDECREF(ptraceback); + return err; +} + +} // end namespace tensorflow diff --git a/tensorflow/python/lib/core/py_util.h b/tensorflow/python/lib/core/py_util.h new file mode 100644 index 0000000000..44dfe7ba21 --- /dev/null +++ b/tensorflow/python/lib/core/py_util.h @@ -0,0 +1,27 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_PYTHON_LIB_CORE_UTIL_H_ +#define TENSORFLOW_PYTHON_LIB_CORE_UTIL_H_ + +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +// Fetch the exception message as a string. An exception must be set +// (PyErr_Occurred() must be true). +string PyExceptionFetch(); +} // end namespace tensorflow + +#endif // TENSORFLOW_PYTHON_LIB_CORE_UTIL_H_ diff --git a/tensorflow/tools/ci_build/ci_sanity.sh b/tensorflow/tools/ci_build/ci_sanity.sh index 404a9a6b62..4021d794b6 100755 --- a/tensorflow/tools/ci_build/ci_sanity.sh +++ b/tensorflow/tools/ci_build/ci_sanity.sh @@ -99,7 +99,8 @@ do_pylint() { "^tensorflow/contrib/eager/python/metrics_impl\.py.*\[E0202.*method-hidden "\ "^tensorflow/python/platform/gfile\.py.*\[E0301.*non-iterator "\ "^tensorflow/python/keras/_impl/keras/callbacks\.py.*\[E1133.*not-an-iterable "\ -"^tensorflow/python/keras/_impl/keras/layers/recurrent\.py.*\[E0203.*access-member-before-definition" +"^tensorflow/python/keras/_impl/keras/layers/recurrent\.py.*\[E0203.*access-member-before-definition "\ +"^tensorflow/python/kernel_tests/constant_op_eager_test.py.*\[E0303.*invalid-length-returned" echo "ERROR_WHITELIST=\"${ERROR_WHITELIST}\"" -- GitLab From c572bc4fd7c73f4b8014ae43cdf9da5b99592f59 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 29 Nov 2017 10:37:28 -0800 Subject: [PATCH 0946/1801] Outline generated LLVM IR matrix-vector dot kernels This is a code size optimization for cases that dot matrix-vectors of the same shape repeatedly, but is also a slight performance improvment (most likely due to better icache behavior). PiperOrigin-RevId: 177329302 --- .../xla/service/cpu/dot_op_emitter.cc | 62 +++++++++++++------ .../service/llvm_ir/kernel_support_library.cc | 44 +++++++++++++ .../service/llvm_ir/kernel_support_library.h | 32 ++++++++++ 3 files changed, 118 insertions(+), 20 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index 8f7b478cee..4ccff756a3 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -522,8 +522,10 @@ bool DotOpEmitter::EmitLlvmIrDotIfProfitable() { return false; } - if (!primitive_util::IsFloatingPointType(dot_.shape().element_type()) && - !primitive_util::IsIntegralType(dot_.shape().element_type())) { + PrimitiveType primitive_type = dot_.shape().element_type(); + + if (!primitive_util::IsFloatingPointType(primitive_type) && + !primitive_util::IsIntegralType(primitive_type)) { return false; } @@ -573,30 +575,50 @@ bool DotOpEmitter::EmitLlvmIrDotIfProfitable() { int64 tiling_factor = GetGemvTilingFactor(); CHECK_GT(tiling_factor, 0); + llvm::Value* result_op = target_array_.GetBasePointer(); + llvm::Value* lhs_op = + swap_operands ? rhs_array_.GetBasePointer() : lhs_array_.GetBasePointer(); + llvm::Value* rhs_op = + swap_operands ? lhs_array_.GetBasePointer() : rhs_array_.GetBasePointer(); + if (is_column_major_matrix_vector) { VLOG(2) << "Emitting column major matrix-vector multiply with m = " << m << " and k = " << k; - ColumnMajorMatrixVectorProductEmitter emitter( - dot_.shape().element_type(), /*tile_rows=*/8, - /*tile_cols=*/tiling_factor, m, k, - swap_operands ? rhs_array_.GetBasePointer() - : lhs_array_.GetBasePointer(), - swap_operands ? lhs_array_.GetBasePointer() - : rhs_array_.GetBasePointer(), - target_array_.GetBasePointer(), ir_builder_); - emitter.Emit(); + int64 tile_rows = 8; + int64 tile_cols = tiling_factor; + + string kernel_name = tensorflow::strings::StrCat( + "col_major_gemv_", PrimitiveType_Name(primitive_type), "_", tile_rows, + "_", tile_cols, "_", m, "_", k); + + KernelSupportLibrary::EmitAndCallOutlinedKernel( + ir_builder_, kernel_name, lhs_op, rhs_op, result_op, + [this, tile_rows, tile_cols, m, k, primitive_type]( + llvm::Value* lhs_op, llvm::Value* rhs_op, llvm::Value* result_op) { + ColumnMajorMatrixVectorProductEmitter emitter( + primitive_type, tile_rows, tile_cols, m, k, lhs_op, rhs_op, + result_op, ir_builder_); + emitter.Emit(); + }); } else { VLOG(2) << "Emitting row major matrix-vector multiply with m = " << m << " and k = " << k; - RowMajorMatrixVectorProductEmitter emitter( - dot_.shape().element_type(), /*tile_rows=*/tiling_factor, - /*tile_cols=*/8, m, k, - swap_operands ? rhs_array_.GetBasePointer() - : lhs_array_.GetBasePointer(), - swap_operands ? lhs_array_.GetBasePointer() - : rhs_array_.GetBasePointer(), - target_array_.GetBasePointer(), ir_builder_); - emitter.Emit(); + int64 tile_rows = tiling_factor; + int64 tile_cols = 8; + + string kernel_name = tensorflow::strings::StrCat( + "row_major_gemv_", PrimitiveType_Name(primitive_type), "_", tile_rows, + "_", tile_cols, "_", m, "_", k); + + KernelSupportLibrary::EmitAndCallOutlinedKernel( + ir_builder_, kernel_name, lhs_op, rhs_op, result_op, + [this, tile_rows, tile_cols, m, k, primitive_type]( + llvm::Value* lhs_op, llvm::Value* rhs_op, llvm::Value* result_op) { + RowMajorMatrixVectorProductEmitter emitter( + primitive_type, tile_rows, tile_cols, m, k, lhs_op, rhs_op, + result_op, ir_builder_); + emitter.Emit(); + }); } return true; diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc index 29cc0f81bd..d951a37d5d 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h" +#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" namespace xla { void KernelSupportLibrary::For( @@ -62,4 +63,47 @@ void KernelSupportLibrary::If( false_block_generator(); llvm_ir::SetToLastInsertPoint(if_data.after_block, ir_builder_); } + +void KernelSupportLibrary::EmitAndCallOutlinedKernel( + llvm::IRBuilder<>* ir_builder, tensorflow::StringPiece kernel_name, + KernelSupportLibrary::ArgumentVector arguments, + const std::function& + kernel_body_generator) { + llvm::Module* module = ir_builder->GetInsertBlock()->getModule(); + llvm::Function* function = + module->getFunction(llvm_ir::AsStringRef(kernel_name)); + if (!function) { + VLOG(2) << "Generating kernel for " << kernel_name; + std::vector arg_types; + std::transform(arguments.begin(), arguments.end(), + std::back_inserter(arg_types), + [](llvm::Value* arg) { return arg->getType(); }); + + auto* function_type = llvm::FunctionType::get( + ir_builder->getVoidTy(), arg_types, /*isVarArg=*/false); + + function = llvm::Function::Create( + function_type, llvm::GlobalValue::InternalLinkage, + llvm_ir::AsStringRef(kernel_name), module); + + llvm::IRBuilder<>::InsertPointGuard guard(*ir_builder); + + auto* entry_bb = + llvm::BasicBlock::Create(ir_builder->getContext(), "entry", function); + auto* return_inst = llvm::ReturnInst::Create(ir_builder->getContext(), + /*retVal=*/nullptr, entry_bb); + // Set the insert point to before return_inst. + ir_builder->SetInsertPoint(return_inst); + + std::vector arg_values; + std::transform(function->arg_begin(), function->arg_end(), + std::back_inserter(arg_values), std::addressof); + kernel_body_generator(arg_values); + } else { + VLOG(3) << "Re-using kernel for " << kernel_name; + } + + ir_builder->CreateCall(function, llvm_ir::AsArrayRef(arguments)); +} + } // namespace xla diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h index 9bafb7b577..997b84bb27 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h +++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h @@ -118,6 +118,38 @@ class KernelSupportLibrary { const std::function& true_block_generator, const std::function& false_block_generator = []() {}); + using ArgumentVector = tensorflow::gtl::ArraySlice; + + // Generates the following control flow structure: + // + // define @`kernel_name`(arg0, arg1, ... arg`arguments.size()`) { + // kernel_body_generator({arg0, arg1, ... arg`arguments.size()`}); + // } + // + // ... + // call @`kernel_name`(arguments[0], arguments[1] ...) + // ... + // + // If a function called `kernel_name` is already present in the module then + // that function is re-used. In that sense we're using the llvm::Module as a + // cache of outlined kernels, keyed by function name. + static void EmitAndCallOutlinedKernel( + llvm::IRBuilder<>* ir_builder, tensorflow::StringPiece kernel_name, + ArgumentVector arguments, + const std::function& kernel_body_generator); + + // Thin wrapper around the more general EmitAndCallOutlinedKernel above. + static void EmitAndCallOutlinedKernel( + llvm::IRBuilder<>* ir_builder, tensorflow::StringPiece kernel_name, + llvm::Value* arg0, llvm::Value* arg1, llvm::Value* arg2, + const std::function& + kernel_body_generator) { + EmitAndCallOutlinedKernel( + ir_builder, kernel_name, {arg0, arg1, arg2}, [&](ArgumentVector args) { + kernel_body_generator(args[0], args[1], args[2]); + }); + } + private: llvm::IRBuilder<>* ir_builder_; bool prevent_unrolling_; -- GitLab From 78a4873cfa4562cf071492636f03e13fcb188bd8 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Wed, 29 Nov 2017 11:18:38 -0800 Subject: [PATCH 0947/1801] Go: Bugfix: Make list-of-shape attributes in an operation work. By respecting cgo rules on pointers. Without the change to graph.go, the newly added test would fail with: panic: runtime error: cgo argument has Go pointer to Go pointer in the call to the C function TF_SetAttrShapeList. Fixes #14891 PiperOrigin-RevId: 177336663 --- tensorflow/go/graph.go | 64 +++++++++++++++++++------------- tensorflow/go/op/op_test.go | 73 +++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 25 deletions(-) diff --git a/tensorflow/go/graph.go b/tensorflow/go/graph.go index 46c600eab1..f200a8e00a 100644 --- a/tensorflow/go/graph.go +++ b/tensorflow/go/graph.go @@ -20,6 +20,24 @@ package tensorflow // // #include // #include +// +// void TF_SetAttrShapeList_Helper(TF_OperationDescription* desc, +// const char* attr_name, +// const int64_t* flat_dims, +// const int* num_dims, +// int num_shapes) { +// const int64_t** dims = +// (const int64_t**)malloc(sizeof(const int64_t*) * num_shapes); +// for (int i = 0; i < num_shapes; i++) { +// dims[i] = flat_dims; +// if (num_dims[i] > 0) { +// // flat_dims will be NULL iff num_shapes is 0 or all elements in num_dims are <= 0. +// flat_dims += num_dims[i]; +// } +// } +// TF_SetAttrShapeList(desc, attr_name, dims, num_dims, num_shapes); +// free(dims); +// } import "C" import ( @@ -289,41 +307,37 @@ func setAttr(cdesc *C.TF_OperationDescription, status *status, name string, valu return fmt.Errorf("bad value for attribute %q: %v", name, err) } case Shape: - ndims, dims := cshape(value) + ndims := C.int(value.NumDimensions()) var dimsp *C.int64_t if ndims > 0 { + dims := make([]C.int64_t, ndims) + for i, d := range value.dims { + dims[i] = C.int64_t(d) + } dimsp = &dims[0] } C.TF_SetAttrShape(cdesc, cAttrName, dimsp, ndims) case []Shape: - ndims := make([]C.int, len(value)) - dims := make([][]C.int64_t, len(value)) - dimsp := make([]*C.int64_t, len(value)) - for i, s := range value { - ndims[i], dims[i] = cshape(s) - if ndims[i] > 0 { - dimsp[i] = &dims[i][0] - } - } - if len(value) > 0 { - C.TF_SetAttrShapeList(cdesc, cAttrName, &dimsp[0], &ndims[0], C.int(len(value))) - } else { + if len(value) == 0 { C.TF_SetAttrShapeList(cdesc, cAttrName, nil, nil, 0) + } else { + var flatDims []C.int64_t + ndims := make([]C.int, len(value)) + for i, s := range value { + nd := s.NumDimensions() + ndims[i] = C.int(nd) + for _, d := range s.dims { + flatDims = append(flatDims, C.int64_t(d)) + } + } + var flatDimsp *C.int64_t + if len(flatDims) > 0 { + flatDimsp = &flatDims[0] + } + C.TF_SetAttrShapeList_Helper(cdesc, cAttrName, flatDimsp, &ndims[0], C.int(len(value))) } default: return fmt.Errorf("attribute %q has a type (%T) which is not valid for operation attributes", name, value) } return nil } - -func cshape(s Shape) (C.int, []C.int64_t) { - ndims := C.int(s.NumDimensions()) - if ndims < 0 { - return -1, nil - } - dims := make([]C.int64_t, ndims) - for i, s := range s.dims { - dims[i] = C.int64_t(s) - } - return ndims, dims -} diff --git a/tensorflow/go/op/op_test.go b/tensorflow/go/op/op_test.go index 2451ba3606..842dee9ffe 100644 --- a/tensorflow/go/op/op_test.go +++ b/tensorflow/go/op/op_test.go @@ -58,3 +58,76 @@ func TestAddOperationFailure(t *testing.T) { _ = resize.Shape() t.Errorf("resize.Shape() should have paniced since the underlying Operation was not created") } + +func TestShapeAttribute(t *testing.T) { + s := NewScope() + x := Placeholder(s.SubScope("x"), tf.Int32, PlaceholderShape(tf.MakeShape(1))) + y := Placeholder(s.SubScope("y"), tf.Int32, PlaceholderShape(tf.Shape{})) + z := Add(s, x, y) + graph, err := s.Finalize() + if err != nil { + t.Fatal(err) + } + sess, err := tf.NewSession(graph, nil) + if err != nil { + t.Fatal(err) + } + + value, err := tf.NewTensor([]int32{7}) + if err != nil { + t.Fatal(err) + } + feeds := map[tf.Output]*tf.Tensor{ + x: value, + y: value, + } + fetched, err := sess.Run(feeds, []tf.Output{z}, nil) + if err != nil { + t.Fatal(err) + } + if got, want := len(fetched), 1; got != want { + t.Fatalf("Fetched %d tensors, expected %d", got, want) + } + if got, want := fetched[0].Value().([]int32), []int32{14}; len(got) != len(want) || len(got) != 1 || got[0] != want[0] { + t.Fatalf("Got %v, want %v", got, want) + } +} + +func TestDataset(t *testing.T) { + var ( + s = NewScope() + + // The use of a non-scalar here is inspired by + // https://github.com/tensorflow/tensorflow/issues/14891 + c = Const(s, []int32{21718, 31415}) + types = []tf.DataType{c.DataType()} + shapes = []tf.Shape{c.Shape()} + dataset = TensorDataset(s, []tf.Output{c}, shapes) + + iterator = Iterator(s, "", "", types, shapes) + next = IteratorGetNext(s, iterator, types, shapes) + init = MakeIterator(s, dataset, iterator) + ) + graph, err := s.Finalize() + if err != nil { + t.Fatal(err) + } + sess, err := tf.NewSession(graph, nil) + if err != nil { + t.Fatal(err) + } + if _, err := sess.Run(nil, nil, []*tf.Operation{init}); err != nil { + t.Fatal(err) + } + results, err := sess.Run(nil, next, nil) + if err != nil { + t.Fatal(err) + } + got := results[0].Value().([]int32) + if len(got) != 2 || got[0] != 21718 || got[1] != 31415 { + t.Errorf("Got %v, want {21718, 31415}", got) + } + if _, err := sess.Run(nil, next, nil); err == nil { + t.Errorf("Expected sess.Run() to fail since the iterator should have reached the end of the dataset") + } +} -- GitLab From 71f22bbab05e25c5f026c4343664091cc117b5ab Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Wed, 29 Nov 2017 11:27:08 -0800 Subject: [PATCH 0948/1801] (Temporarily) call Graph._add_op outside of Operation.__init__ again. This change partially undoes my previous commit (https://github.com/tensorflow/tensorflow/commit/f4c18a0eb05e21bae397c9c16527ff8080cae6b8). Without this change, if an op is added that has invalid input shapes and also requires a kernel label, the op will be added to the graph before shape inference is run, but then the shape inference error will prevent the kernel label from being applied. The placer will then complain about the missing label when the graph is run. This is only a problem with the C API disabled. With the C API enabled, shape inference is performed when the TF_Operation is created in Operation.__init__. Thus we can and should move the _add_op call back to Operation.__init__ once the _USE_C_API flag is removed. PiperOrigin-RevId: 177338123 --- .../copy_graph/python/util/copy_elements.py | 1 + tensorflow/contrib/graph_editor/transform.py | 3 +++ tensorflow/python/framework/ops.py | 5 ++--- tensorflow/python/framework/ops_test.py | 13 +++++++++++++ tensorflow/python/framework/test_ops.cc | 18 ++++++++++++++++++ 5 files changed, 37 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/copy_graph/python/util/copy_elements.py b/tensorflow/contrib/copy_graph/python/util/copy_elements.py index d060eda0a7..bae66ffd42 100644 --- a/tensorflow/contrib/copy_graph/python/util/copy_elements.py +++ b/tensorflow/contrib/copy_graph/python/util/copy_elements.py @@ -225,6 +225,7 @@ def copy_op_to_graph(org_instance, to_graph, variables, new_original_op, op_def) #Use Graph's hidden methods to add the op + to_graph._add_op(new_op) # pylint: disable=protected-access to_graph._record_op_seen_by_control_dependencies(new_op) for device_function in reversed(to_graph._device_function_stack): new_op._set_device(device_function(new_op)) diff --git a/tensorflow/contrib/graph_editor/transform.py b/tensorflow/contrib/graph_editor/transform.py index 2a97a79070..14ac529665 100644 --- a/tensorflow/contrib/graph_editor/transform.py +++ b/tensorflow/contrib/graph_editor/transform.py @@ -173,6 +173,9 @@ def copy_op_handler(info, op, copy_shape=True): if op._original_op: op_._original_op = op._original_op + # Add op to the graph + info.graph_._add_op(op_) + return op_, op_.outputs diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index cfef5e35f4..2217513966 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -1635,8 +1635,6 @@ class Operation(object): self._id_value = self._graph._next_id() # pylint: disable=protected-access self._recompute_node_def() - self._graph._add_op(self) # pylint: disable=protected-access - def _reconstruct_sequence_inputs(self, op_def, inputs, attrs): """Regroups a flat list of input tensors into scalar and sequence inputs. @@ -3100,7 +3098,6 @@ class Graph(object): input_types=input_types, original_op=self._default_original_op, op_def=op_def) - self._create_op_helper(ret, compute_shapes=compute_shapes, compute_device=compute_device) return ret @@ -3139,6 +3136,8 @@ class Graph(object): # compute_shapes argument. if op._c_op or compute_shapes: # pylint: disable=protected-access set_shapes_for_outputs(op) + # TODO(b/XXXX): move to Operation.__init__ once _USE_C_API flag is removed. + self._add_op(op) # Apply any additional attributes requested. Do not overwrite any existing # attributes. diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 3eae3b5a25..b1ad6ad744 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -274,6 +274,7 @@ class OperationTest(test_util.TensorFlowTestCase): op1 = ops.Operation( ops._NodeDef("RefOutputFloatOutput", "op1"), g, [], [dtypes.float32_ref, dtypes.float32]) + g._add_op(op1) self.assertProtoEquals("op:'RefOutputFloatOutput' name:'op1'", op1.node_def) self.assertEquals([], list(op1.inputs)) ref_t, nonref_t = op1.values() @@ -282,12 +283,14 @@ class OperationTest(test_util.TensorFlowTestCase): ops._NodeDef("RefInputFloatInput", "op2"), g, [ref_t, nonref_t], [], input_types=[dtypes.float32_ref, dtypes.float32]) + g._add_op(op2) self.assertProtoEquals( "op:'RefInputFloatInput' name:'op2' input:'op1' input:'op1:1'", op2.node_def) self.assertEquals([ref_t, nonref_t], list(op2.inputs)) op3 = ops.Operation( ops._NodeDef("TwoFloatInputs", "op3"), g, [ref_t, nonref_t], []) + g._add_op(op3) self.assertProtoEquals( "op:'TwoFloatInputs' name:'op3' input:'op1' input:'op1:1'", op3.node_def) @@ -1884,6 +1887,16 @@ class GraphTest(test_util.TensorFlowTestCase): with session.Session() as sess: sess.run(a) + def testRunnableAfterInvalidShapeWithKernelLabelMap(self): + g = ops.Graph() + with g.as_default(): + with g._kernel_label_map({"KernelLabelRequired": "overload_1"}): + with self.assertRaises(ValueError): + test_ops.kernel_label_required(1) + a = constant_op.constant(1) + with session.Session() as sess: + sess.run(a) + @test_util.with_c_api class AttrScopeTest(test_util.TensorFlowTestCase): diff --git a/tensorflow/python/framework/test_ops.cc b/tensorflow/python/framework/test_ops.cc index 25bb7af20c..dbabce0962 100644 --- a/tensorflow/python/framework/test_ops.cc +++ b/tensorflow/python/framework/test_ops.cc @@ -26,6 +26,16 @@ REGISTER_OP("KernelLabel") .Output("result: string") .SetShapeFn(shape_inference::ScalarShape); +REGISTER_OP("KernelLabelRequired") + .Input("input: int32") + .Output("result: string") + .SetShapeFn([](shape_inference::InferenceContext* c) { + shape_inference::ShapeHandle out; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &out)); + c->set_output(0, c->Scalar()); + return Status::OK(); + }); + REGISTER_OP("GraphDefVersion") .Output("version: int32") .SetIsStateful() @@ -104,6 +114,14 @@ REGISTER_KERNEL_BUILDER(Name("KernelLabel") .Label("overload_2"), KernelLabelOp); +// All "KernelLabelRequired" kernels have labels +REGISTER_KERNEL_BUILDER( + Name("KernelLabelRequired").Device(DEVICE_CPU).Label("overload_1"), + KernelLabelOp); +REGISTER_KERNEL_BUILDER( + Name("KernelLabelRequired").Device(DEVICE_CPU).Label("overload_2"), + KernelLabelOp); + class GraphDefVersionOp : public OpKernel { public: explicit GraphDefVersionOp(OpKernelConstruction* ctx) -- GitLab From ad1310a87caa14c495ad7ab47db7572443b2e7ef Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Wed, 29 Nov 2017 11:36:36 -0800 Subject: [PATCH 0949/1801] Add RandomDataset which generates pseudo random number of type int64. Add tf.contrib.data.shuffle_and_repeat which reshuffles its input on each epoch. Going forward, this will replace reshuffle_each_iteration=true. PiperOrigin-RevId: 177339570 --- tensorflow/contrib/data/BUILD | 1 + tensorflow/contrib/data/__init__.py | 1 + .../contrib/data/python/kernel_tests/BUILD | 4 +- .../kernel_tests/shuffle_dataset_op_test.py | 80 +++++++++ tensorflow/contrib/data/python/ops/BUILD | 32 ++++ .../contrib/data/python/ops/random_ops.py | 67 ++++++++ .../contrib/data/python/ops/shuffle_ops.py | 69 ++++++++ .../base_api/api_def_RandomDataset.pbtxt | 18 ++ tensorflow/core/kernels/BUILD | 13 ++ tensorflow/core/kernels/random_dataset_op.cc | 154 ++++++++++++++++++ tensorflow/core/ops/dataset_ops.cc | 18 ++ tensorflow/python/data/ops/dataset_ops.py | 33 +++- 12 files changed, 486 insertions(+), 4 deletions(-) create mode 100644 tensorflow/contrib/data/python/ops/random_ops.py create mode 100644 tensorflow/contrib/data/python/ops/shuffle_ops.py create mode 100644 tensorflow/core/api_def/base_api/api_def_RandomDataset.pbtxt create mode 100644 tensorflow/core/kernels/random_dataset_op.cc diff --git a/tensorflow/contrib/data/BUILD b/tensorflow/contrib/data/BUILD index f7d8a084d9..3b1c33063f 100644 --- a/tensorflow/contrib/data/BUILD +++ b/tensorflow/contrib/data/BUILD @@ -18,6 +18,7 @@ py_library( "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/contrib/data/python/ops:iterator_ops", "//tensorflow/contrib/data/python/ops:readers", + "//tensorflow/contrib/data/python/ops:shuffle_ops", "//tensorflow/contrib/data/python/ops:transformation_ops", "//tensorflow/python:util", "//tensorflow/python/data/ops:iterator_ops", diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index 7c6244f22b..c9ad091bd4 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -66,6 +66,7 @@ from tensorflow.contrib.data.python.ops.readers import TextLineDataset from tensorflow.contrib.data.python.ops.readers import TFRecordDataset from tensorflow.contrib.data.python.ops.resampling import rejection_resample from tensorflow.contrib.data.python.ops.scan_ops import scan +from tensorflow.contrib.data.python.ops.shuffle_ops import shuffle_and_repeat from tensorflow.python.data.ops.iterator_ops import Iterator # pylint: enable=unused-import diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 4cb69d7c8e..43431ca2c5 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -414,12 +414,14 @@ py_test( py_test( name = "shuffle_dataset_op_test", - size = "small", + size = "medium", srcs = ["shuffle_dataset_op_test.py"], srcs_version = "PY2AND3", deps = [ + ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", "//tensorflow/contrib/data/python/ops:iterator_ops", + "//tensorflow/contrib/data/python/ops:shuffle_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", diff --git a/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py index 6b5b53cc0f..ba1be0690f 100644 --- a/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py @@ -22,8 +22,10 @@ import os import numpy as np +from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base from tensorflow.contrib.data.python.ops import dataset_ops as contrib_dataset_ops from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops +from tensorflow.contrib.data.python.ops import shuffle_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import iterator_ops from tensorflow.python.framework import constant_op @@ -156,6 +158,13 @@ class ShuffleDatasetTest(test.TestCase): for i in range(5): self.assertEqual(10, counts[i]) + def testSeedNoneSeed2NonNone(self): + with self.assertRaises(ValueError): + dataset_ops.ShuffleDataset(dataset_ops.Dataset.range(5), + buffer_size=1, + seed=None, + seed2=10) + class ShuffleDatasetSerializationTest(test.TestCase): @@ -474,5 +483,76 @@ class ShuffleDatasetSerializationTest(test.TestCase): self.assertEqual(expected_outputs_sorted, sorted(actual)) +class ShuffleAndRepeatTest( + dataset_serialization_test_base.DatasetSerializationTestBase): + + def _build_ds(self, seed, count=5): + return dataset_ops.Dataset.range(20).apply( + shuffle_ops.shuffle_and_repeat(buffer_size=5, count=count, seed=seed)) + + def testCorrectOutput(self): + output = self.gen_outputs(lambda: self._build_ds(10), [], 100) + self.assertSequenceEqual( + sorted(output), sorted( + np.array([range(20) for _ in range(5)]).flatten())) + for i in range(5): + self.assertSequenceEqual(sorted(output[i * 20:(i + 1) * 20]), range(20)) + + def testReshuffling(self): + # Check that the output orders of different epochs are indeed different. + output = self.gen_outputs(lambda: self._build_ds(10), [], 100) + for i in range(4): + epoch1 = output[i * 20:(i + 1) * 20] + epoch2 = output[(i + 1) * 20:(i + 2) * 20] + self.assertNotEqual(epoch1, epoch2) + + def testSameOrderForSameSeeds(self): + output1 = self.gen_outputs(lambda: self._build_ds(10), [], 100) + output2 = self.gen_outputs(lambda: self._build_ds(10), [], 100) + self.assertEqual(output1, output2) + + def testDifferentOrderForDifferentSeeds(self): + output1 = self.gen_outputs(lambda: self._build_ds(10), [], 100) + output2 = self.gen_outputs(lambda: self._build_ds(20), [], 100) + self.assertNotEqual(output1, output2) + self.assertEqual(sorted(output1), sorted(output2)) + + def testCountNone(self): + output1 = self.gen_outputs( + lambda: self._build_ds(10, count=None), [], 100, verify_exhausted=False) + output2 = self.gen_outputs( + lambda: self._build_ds(20, count=None), [], 100, verify_exhausted=False) + self.assertNotEqual(output1, output2) + self.assertEqual(sorted(output1), sorted(output2)) + + def testCountMinusOne(self): + output1 = self.gen_outputs( + lambda: self._build_ds(10, count=-1), [], 100, verify_exhausted=False) + output2 = self.gen_outputs( + lambda: self._build_ds(20, count=-1), [], 100, verify_exhausted=False) + self.assertNotEqual(output1, output2) + self.assertEqual(sorted(output1), sorted(output2)) + + def testInfiniteOutputs(self): + # Asserting that the iterator is exhausted after producing 100 items should + # fail. + with self.assertRaises(AssertionError): + self.gen_outputs(lambda: self._build_ds(10, count=None), [], 100) + with self.assertRaises(AssertionError): + self.gen_outputs(lambda: self._build_ds(10, count=-1), [], 100) + + +class ShuffleAndRepeatSerializationTest( + dataset_serialization_test_base.DatasetSerializationTestBase): + + def _build_ds(self, seed): + return dataset_ops.Dataset.range(20).apply( + shuffle_ops.shuffle_and_repeat(buffer_size=5, count=5, seed=seed)) + + def testCore(self): + self.run_core_tests(lambda: self._build_ds(10), lambda: self._build_ds(20), + 100) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index 25ed58cdf5..1f35ee056b 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -40,6 +40,25 @@ py_library( ], ) +py_library( + name = "random_ops", + srcs = [ + "random_ops.py", + ], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:constant_op", + "//tensorflow/python:dataset_ops_gen", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:random_seed", + "//tensorflow/python:tensor_shape", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:sparse", + ], +) + py_library( name = "readers", srcs = [ @@ -62,6 +81,19 @@ py_library( ], ) +py_library( + name = "shuffle_ops", + srcs = [ + "shuffle_ops.py", + ], + srcs_version = "PY2AND3", + deps = [ + ":random_ops", + ":transformation_ops", + "//tensorflow/python/data/ops:dataset_ops", + ], +) + py_library( name = "transformation_ops", srcs = [ diff --git a/tensorflow/contrib/data/python/ops/random_ops.py b/tensorflow/contrib/data/python/ops/random_ops.py new file mode 100644 index 0000000000..7d727165fe --- /dev/null +++ b/tensorflow/contrib/data/python/ops/random_ops.py @@ -0,0 +1,67 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Datasets for random number generators.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.util import nest +from tensorflow.python.data.util import sparse +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import random_seed +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import gen_dataset_ops + + +class RandomDataset(dataset_ops.Dataset): + """A `Dataset` of pseudorandom values.""" + + def __init__(self, seed=None): + """A `Dataset` of pseudorandom values.""" + super(RandomDataset, self).__init__() + seed, seed2 = random_seed.get_seed(seed) + if seed is None: + self._seed = constant_op.constant(0, dtype=dtypes.int64, name="seed") + else: + self._seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed") + if seed2 is None: + self._seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2") + else: + self._seed2 = ops.convert_to_tensor( + seed2, dtype=dtypes.int64, name="seed2") + + def _as_variant_tensor(self): + return gen_dataset_ops.random_dataset( + seed=self._seed, + seed2=self._seed2, + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), + output_types=nest.flatten( + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return ops.Tensor + + @property + def output_shapes(self): + return tensor_shape.scalar() + + @property + def output_types(self): + return dtypes.int64 diff --git a/tensorflow/contrib/data/python/ops/shuffle_ops.py b/tensorflow/contrib/data/python/ops/shuffle_ops.py new file mode 100644 index 0000000000..460732d65e --- /dev/null +++ b/tensorflow/contrib/data/python/ops/shuffle_ops.py @@ -0,0 +1,69 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Experimental shuffle ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.data.python.ops import batching +from tensorflow.contrib.data.python.ops import random_ops +from tensorflow.python.data.ops import dataset_ops + + +def shuffle_and_repeat(buffer_size, count=None, seed=None): + """Shuffles and repeats a Dataset returning a new permutation for each epoch. + + `dataset.apply(tf.contrib.data.shuffle_and_repeat(buffer_size, count))` + + is equivalent to + + `dataset.shuffle(buffer_size, reshuffle_each_iteration=True).repeat(count)` + + The difference is that the latter dataset is not serializable. So, + if you need to checkpoint an input pipeline with reshuffling you must use + this implementation. + + Args: + buffer_size: A `tf.int64` scalar `tf.Tensor`, representing the + maximum number elements that will be buffered when prefetching. + count: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the + number of times the dataset should be repeated. The default behavior + (if `count` is `None` or `-1`) is for the dataset be repeated + indefinitely. + seed: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the + random seed that will be used to create the distribution. See + @{tf.set_random_seed} for behavior. + + Returns: + A `Dataset` transformation function, which can be passed to + @{tf.contrib.data.Dataset.apply}. + """ + def _apply_fn(dataset): # pylint: disable=missing-docstring + random_ds = random_ops.RandomDataset(seed).apply( + batching.batch_and_drop_remainder(2)) + if count is not None and count is not -1: + random_ds = random_ds.take(count) + + def map_fn(seeds): + return dataset_ops.ShuffleDataset( + input_dataset=dataset, + buffer_size=buffer_size, + seed=seeds[0], + reshuffle_each_iteration=False, + seed2=seeds[1]) + + return random_ds.flat_map(map_fn) + + return _apply_fn diff --git a/tensorflow/core/api_def/base_api/api_def_RandomDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_RandomDataset.pbtxt new file mode 100644 index 0000000000..0466b40f85 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_RandomDataset.pbtxt @@ -0,0 +1,18 @@ +op { + graph_op_name: "RandomDataset" + in_arg { + name: "seed" + description: <(ctx, "seed", &seed)); + + int64 seed2; + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "seed2", &seed2)); + + // By TensorFlow convention, passing 0 for both seeds indicates + // that the shuffling should be seeded non-deterministically. + if (seed == 0 && seed2 == 0) { + seed = random::New64(); + seed2 = random::New64(); + } + + *output = new Dataset(ctx, seed, seed2); + } + + private: + class Dataset : public GraphDatasetBase { + public: + Dataset(OpKernelContext* ctx, int64 seed, int64 seed2) + : GraphDatasetBase(ctx), seed_(seed), seed2_(seed2) {} + + std::unique_ptr MakeIterator( + const string& prefix) const override { + return std::unique_ptr( + new Iterator({this, strings::StrCat(prefix, "::Random")})); + } + + const DataTypeVector& output_dtypes() const override { + static DataTypeVector* dtypes = new DataTypeVector({DT_INT64}); + return *dtypes; + } + + const std::vector& output_shapes() const override { + static std::vector* shapes = + new std::vector({{}}); + return *shapes; + } + + string DebugString() override { + return strings::StrCat("RandomDatasetOp(", seed_, ", ", seed2_, + ")::Dataset"); + } + + protected: + Status AsGraphDefInternal(DatasetGraphDefBuilder* b, + Node** output) const override { + Node* seed = nullptr; + Node* seed2 = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(seed_, &seed)); + TF_RETURN_IF_ERROR(b->AddScalar(seed2_, &seed2)); + TF_RETURN_IF_ERROR(b->AddDataset(this, {seed, seed2}, output)); + return Status::OK(); + } + + private: + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params& params) + : DatasetIterator(params), + parent_generator_(dataset()->seed_, dataset()->seed2_), + generator_(&parent_generator_) {} + + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + mutex_lock l(mu_); + Tensor value_tensor(cpu_allocator(), DT_INT64, {}); + value_tensor.scalar()() = Random(); + out_tensors->emplace_back(std::move(value_tensor)); + *end_of_sequence = false; + return Status::OK(); + } + + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("num_random_samples"), + num_random_samples_)); + return Status::OK(); + } + + Status RestoreInternal(OpKernelContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("num_random_samples"), + &num_random_samples_)); + parent_generator_ = + random::PhiloxRandom(dataset()->seed_, dataset()->seed2_); + generator_ = random::SingleSampleAdapter( + &parent_generator_); + generator_.Skip(num_random_samples_); + return Status::OK(); + } + + private: + random::SingleSampleAdapter::ResultType Random() + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + num_random_samples_++; + auto out = generator_(); + return out; + } + mutex mu_; + random::PhiloxRandom parent_generator_ GUARDED_BY(mu_); + random::SingleSampleAdapter generator_ + GUARDED_BY(mu_); + int64 num_random_samples_ GUARDED_BY(mu_) = 0; + }; + + const int64 seed_; + const int64 seed2_; + }; +}; + +REGISTER_KERNEL_BUILDER(Name("RandomDataset").Device(DEVICE_CPU), + RandomDatasetOp); + +} // namespace + +} // namespace tensorflow diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index 6bf226e7a5..be41531347 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -469,6 +469,24 @@ stop: corresponds to stop in python's xrange(). step: corresponds to step in python's xrange(). )doc"); +REGISTER_OP("RandomDataset") + .Input("seed: int64") + .Input("seed2: int64") + .Output("handle: variant") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetIsStateful() // TODO(b/65524810): Source dataset ops must be marked + // stateful to inhibit constant folding. + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a Dataset that returns pseudorandom numbers. + +seed: A scalar seed for the random number generator. If either seed or + seed2 is set to be non-zero, the random number generator is seeded + by the given seed. Otherwise, a random seed is used. +seed2: A second scalar seed to avoid seed collision. +)doc"); + REGISTER_OP("ShuffleDataset") .Input("input_dataset: variant") .Input("buffer_size: int64") diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index b5a8622306..927c6d5c02 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -1234,13 +1234,40 @@ class ShuffleDataset(Dataset): input_dataset, buffer_size, seed=None, - reshuffle_each_iteration=None): - """See `Dataset.shuffle()` for details.""" + reshuffle_each_iteration=None, + seed2=None): + """Randomly shuffles the elements of this dataset. + + Args: + input_dataset: The input dataset. + buffer_size: A `tf.int64` scalar `tf.Tensor`, representing the + number of elements from this dataset from which the new + dataset will sample. + seed: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the + random seed that will be used to create the distribution. See + @{tf.set_random_seed} for behavior. + reshuffle_each_iteration: (Optional.) A boolean, which if true indicates + that the dataset should be pseudorandomly reshuffled each time it is + iterated over. (Defaults to `True`.) + seed2: (Optional.) A `tf.int64` scalar `tf.Tensor` used to avoid seed + collision. Users should generally not need to specify this. This is + supposed to be used when both the seeds for the Dataset op need to be + manually specified. If not None, seed must also be non-None. + + Returns: + A `Dataset`. + + Raises: + ValueError: if invalid arguments are provided. + """ super(ShuffleDataset, self).__init__() self._input_dataset = input_dataset self._buffer_size = ops.convert_to_tensor( buffer_size, dtype=dtypes.int64, name="buffer_size") - seed, seed2 = random_seed.get_seed(seed) + if seed2 is None: + seed, seed2 = random_seed.get_seed(seed) + elif seed is None: + raise ValueError("seed must be non-None if seed2 is non-None.") if seed is None: self._seed = constant_op.constant(0, dtype=dtypes.int64, name="seed") else: -- GitLab From c0bd9dffccb29d4c01a2a18fc23b0ecad41aa4c6 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 29 Nov 2017 11:59:47 -0800 Subject: [PATCH 0950/1801] [tf.data] Fix compiler warnings about unused captures in lambda expressions. PiperOrigin-RevId: 177343020 --- tensorflow/core/kernels/dataset_utils.cc | 2 +- tensorflow/core/kernels/filter_dataset_op.cc | 2 +- tensorflow/core/kernels/group_by_window_dataset_op.cc | 6 +++--- tensorflow/core/kernels/map_and_batch_dataset_op.cc | 4 ++-- tensorflow/core/kernels/map_dataset_op.cc | 2 +- tensorflow/core/kernels/parallel_map_dataset_op.cc | 4 ++-- tensorflow/core/kernels/scan_dataset_op.cc | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/kernels/dataset_utils.cc b/tensorflow/core/kernels/dataset_utils.cc index cd58c80912..bd20e20cad 100644 --- a/tensorflow/core/kernels/dataset_utils.cc +++ b/tensorflow/core/kernels/dataset_utils.cc @@ -32,7 +32,7 @@ Status MakeIteratorFromInputElement( // is always 0, so a negative random step ID should suffice. opts.step_id = CapturedFunction::generate_step_id(); ScopedStepContainer step_container( - opts.step_id, [captured_func, ctx](const string& name) { + opts.step_id, [captured_func](const string& name) { captured_func->resource_manager()->Cleanup(name).IgnoreError(); }); opts.step_container = &step_container; diff --git a/tensorflow/core/kernels/filter_dataset_op.cc b/tensorflow/core/kernels/filter_dataset_op.cc index 0ac6cd9a98..67417d467d 100644 --- a/tensorflow/core/kernels/filter_dataset_op.cc +++ b/tensorflow/core/kernels/filter_dataset_op.cc @@ -149,7 +149,7 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { FunctionLibraryRuntime::Options opts; opts.step_id = CapturedFunction::generate_step_id(); ScopedStepContainer step_container( - opts.step_id, [this, ctx](const string& name) { + opts.step_id, [this](const string& name) { dataset() ->captured_func_->resource_manager() ->Cleanup(name) diff --git a/tensorflow/core/kernels/group_by_window_dataset_op.cc b/tensorflow/core/kernels/group_by_window_dataset_op.cc index 8644bcf9b5..604555a560 100644 --- a/tensorflow/core/kernels/group_by_window_dataset_op.cc +++ b/tensorflow/core/kernels/group_by_window_dataset_op.cc @@ -169,7 +169,7 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { opts.step_id = CapturedFunction::generate_step_id(); opts.runner = ctx->runner(); ScopedStepContainer step_container( - opts.step_id, [this, ctx](const string& name) { + opts.step_id, [this](const string& name) { dataset() ->captured_key_func_->resource_manager() ->Cleanup(name) @@ -198,7 +198,7 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { opts2.step_id = CapturedFunction::generate_step_id(); opts2.runner = ctx->runner(); ScopedStepContainer step_container2( - opts2.step_id, [this, ctx](const string& name) { + opts2.step_id, [this](const string& name) { dataset() ->captured_window_size_func_->resource_manager() ->Cleanup(name) @@ -257,7 +257,7 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { opts.step_id = CapturedFunction::generate_step_id(); opts.runner = ctx->runner(); ScopedStepContainer step_container( - opts.step_id, [this, ctx](const string& name) { + opts.step_id, [this](const string& name) { dataset() ->captured_reduce_func_->resource_manager() ->Cleanup(name) diff --git a/tensorflow/core/kernels/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/map_and_batch_dataset_op.cc index ad1e356dbd..9bd66e681f 100644 --- a/tensorflow/core/kernels/map_and_batch_dataset_op.cc +++ b/tensorflow/core/kernels/map_and_batch_dataset_op.cc @@ -239,8 +239,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { // to unblock a consumer. FunctionLibraryRuntime::Options opts; opts.step_id = CapturedFunction::generate_step_id(); - ScopedStepContainer* step_container = new ScopedStepContainer( - opts.step_id, [this, ctx](const string& name) { + ScopedStepContainer* step_container = + new ScopedStepContainer(opts.step_id, [this](const string& name) { dataset() ->captured_func_->resource_manager() ->Cleanup(name) diff --git a/tensorflow/core/kernels/map_dataset_op.cc b/tensorflow/core/kernels/map_dataset_op.cc index 23148f122d..29899a987e 100644 --- a/tensorflow/core/kernels/map_dataset_op.cc +++ b/tensorflow/core/kernels/map_dataset_op.cc @@ -146,7 +146,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel { FunctionLibraryRuntime::Options opts; opts.step_id = CapturedFunction::generate_step_id(); ScopedStepContainer step_container( - opts.step_id, [this, ctx](const string& name) { + opts.step_id, [this](const string& name) { dataset() ->captured_func_->resource_manager() ->Cleanup(name) diff --git a/tensorflow/core/kernels/parallel_map_dataset_op.cc b/tensorflow/core/kernels/parallel_map_dataset_op.cc index 2be87f4bde..b9175fe904 100644 --- a/tensorflow/core/kernels/parallel_map_dataset_op.cc +++ b/tensorflow/core/kernels/parallel_map_dataset_op.cc @@ -195,8 +195,8 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel { FunctionLibraryRuntime::Options opts; opts.step_id = CapturedFunction::generate_step_id(); - ScopedStepContainer* step_container = new ScopedStepContainer( - opts.step_id, [this, ctx](const string& name) { + ScopedStepContainer* step_container = + new ScopedStepContainer(opts.step_id, [this](const string& name) { dataset() ->captured_func_->resource_manager() ->Cleanup(name) diff --git a/tensorflow/core/kernels/scan_dataset_op.cc b/tensorflow/core/kernels/scan_dataset_op.cc index 76c219f1ae..bc52322022 100644 --- a/tensorflow/core/kernels/scan_dataset_op.cc +++ b/tensorflow/core/kernels/scan_dataset_op.cc @@ -132,7 +132,7 @@ class ScanDatasetOp : public UnaryDatasetOpKernel { FunctionLibraryRuntime::Options opts; opts.step_id = CapturedFunction::generate_step_id(); ScopedStepContainer step_container( - opts.step_id, [this, ctx](const string& name) { + opts.step_id, [this](const string& name) { dataset() ->captured_func_->resource_manager() ->Cleanup(name) -- GitLab From dcf9b035a09904322020d87a9324f04bcaf89eec Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 29 Nov 2017 12:06:55 -0800 Subject: [PATCH 0951/1801] Made sure the unknown shapes of placeholders always propagate to their fanouts PiperOrigin-RevId: 177344207 --- .../core/grappler/costs/graph_properties.cc | 12 ++++--- .../grappler/costs/graph_properties_test.cc | 31 ++++++++++++------- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index fb7e20fca0..fbc52e9bd1 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -313,15 +313,17 @@ class SymbolicShapeRefiner { Status UpdateNode(const Node* node, bool relax, bool* refined) { return shape_refiner_->UpdateNode(node, relax, refined); } - Status SetShape(const Node* node, int output_port, - shape_inference::ShapeHandle shape) { - return shape_refiner_->SetShape(node, output_port, shape); - } Status SetUnknownShape(const Node* node, int output_port) { shape_inference::ShapeHandle shape = GetUnknownOutputShape(node, output_port); - return shape_refiner_->SetShape(node, output_port, shape); + InferenceContext* ctx = GetContext(node); + if (ctx == nullptr) { + return errors::InvalidArgument("Missing context"); + } + ctx->set_output(output_port, shape); + return Status::OK(); } + struct ShapeId { const Node* node; int port_id; diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc index ad8e768f1f..cc40ff2cfc 100644 --- a/tensorflow/core/grappler/costs/graph_properties_test.cc +++ b/tensorflow/core/grappler/costs/graph_properties_test.cc @@ -856,7 +856,6 @@ TEST_F(GraphPropertiesTest, FedNodes) { cluster_->GetDeviceNames()); GrapplerItem item; CHECK(fake_input.NextItem(&item)); - item.feed.emplace_back("AddN", Tensor()); { // Conservative shape analysis: the shape of fed ports should be unknown @@ -864,17 +863,27 @@ TEST_F(GraphPropertiesTest, FedNodes) { Status s = properties.InferStatically(false); TF_CHECK_OK(s); for (const auto& node : item.graph.node()) { - if (node.name() == "AddN") { - const auto in_props = properties.GetInputProperties(node.name()); - EXPECT_EQ(1, in_props.size()); - const OpInfo::TensorProperties& in_prop = in_props[0]; - EXPECT_EQ(DT_FLOAT, in_prop.dtype()); + if (node.op() == "Const") { + continue; + } + const auto in_props = properties.GetInputProperties(node.name()); + EXPECT_EQ(1, in_props.size()); + const OpInfo::TensorProperties& in_prop = in_props[0]; + const auto out_props = properties.GetOutputProperties(node.name()); + EXPECT_EQ(1, out_props.size()); + const OpInfo::TensorProperties& out_prop = out_props[0]; + + if (node.name() == "x") { + // x is fed: its input should have a known shape, while its output + // doesn't EXPECT_FALSE(in_prop.shape().unknown_rank()); - EXPECT_EQ(2, in_prop.shape().dim_size()); - const auto out_props = properties.GetOutputProperties(node.name()); - EXPECT_EQ(1, out_props.size()); - EXPECT_EQ(DT_FLOAT, in_prop.dtype()); + EXPECT_EQ(1, in_prop.shape().dim_size()); + EXPECT_EQ(2, in_prop.shape().dim(0).size()); + EXPECT_TRUE(out_prop.shape().unknown_rank()); + } else if (node.op() == "Square" || node.op() == "AddN") { + // These nodes are in the fanout of x: their shapes should be unknown. EXPECT_TRUE(in_prop.shape().unknown_rank()); + EXPECT_TRUE(out_prop.shape().unknown_rank()); } } } @@ -885,7 +894,7 @@ TEST_F(GraphPropertiesTest, FedNodes) { Status s = properties.InferStatically(true); TF_CHECK_OK(s); for (const auto& node : item.graph.node()) { - if (node.name() == "AddN") { + if (node.op() == "Square" || node.op() == "AddN") { const auto in_props = properties.GetInputProperties(node.name()); EXPECT_EQ(1, in_props.size()); const OpInfo::TensorProperties& in_prop = in_props[0]; -- GitLab From 97da160010a47ba37afa1afca914038d3ab0ba55 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Nov 2017 12:31:41 -0800 Subject: [PATCH 0952/1801] Allow the toolchain defaults to be used instead of hard-coding -Os. For example toolchains with clang may set -Oz which is more analogous to gcc's -Os. -Os for clang is closer to -O2. PiperOrigin-RevId: 177347371 --- .../contrib/android/cmake/CMakeLists.txt | 2 ++ tensorflow/core/BUILD | 10 ++++----- tensorflow/tensorflow.bzl | 22 ++++++++++++------- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/tensorflow/contrib/android/cmake/CMakeLists.txt b/tensorflow/contrib/android/cmake/CMakeLists.txt index aba356d616..a115d1610e 100644 --- a/tensorflow/contrib/android/cmake/CMakeLists.txt +++ b/tensorflow/contrib/android/cmake/CMakeLists.txt @@ -34,6 +34,8 @@ add_library(lib_tf STATIC IMPORTED ) set_target_properties(lib_tf PROPERTIES IMPORTED_LOCATION ${PREBUILT_DIR}/lib/libtensorflow-core.a) # Change to compile flags should be replicated into bazel build file +# TODO: Consider options other than -O2 for binary size. +# e.g. -Os for gcc, and -Oz for clang. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIS_SLIM_BUILD \ -std=c++11 -fno-rtti -fno-exceptions \ -O2 -Wno-narrowing -fomit-frame-pointer \ diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 4ca6fb1631..a1d61a7932 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1008,7 +1008,7 @@ filegroup( cc_library( name = "android_tensorflow_lib_lite", srcs = if_android(["//tensorflow/core:android_srcs"]), - copts = tf_copts() + if_not_android_mips_and_mips64(["-Os"]), + copts = tf_copts(android_optimization_level_override = None), linkopts = ["-lz"], tags = [ "manual", @@ -1096,8 +1096,7 @@ cc_library( cc_library( name = "android_tensorflow_lib_selective_registration", srcs = if_android(["//tensorflow/core:android_srcs"]), - copts = tf_copts() + [ - "-Os", + copts = tf_copts(android_optimization_level_override = None) + [ "-DSUPPORT_SELECTIVE_REGISTRATION", ], tags = [ @@ -1118,8 +1117,7 @@ cc_library( cc_library( name = "android_tensorflow_lib_selective_registration_nortti", srcs = if_android(["//tensorflow/core:android_srcs"]), - copts = tf_copts() + tf_opts_nortti_if_android() + [ - "-Os", + copts = tf_copts(android_optimization_level_override = None) + tf_opts_nortti_if_android() + [ "-DSUPPORT_SELECTIVE_REGISTRATION", ], tags = [ @@ -1198,7 +1196,7 @@ cc_library( "framework/tensor_testutil.h", "util/reporter.h", ], - copts = tf_copts() + ["-Os"], + copts = tf_copts(android_optimization_level_override = None), tags = [ "manual", "notap", diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 8d392fb36d..76ef59484f 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -167,7 +167,19 @@ WIN_COPTS = [ ] # LINT.IfChange -def tf_copts(): +def tf_copts(android_optimization_level_override="-O2"): + # For compatibility reasons, android_optimization_level_override + # is currently only being set for Android. + # To clear this value, and allow the CROSSTOOL default + # to be used, pass android_optimization_level_override=None + android_copts = [ + "-std=c++11", + "-DTF_LEAN_BINARY", + "-Wno-narrowing", + "-fomit-frame-pointer", + ] + if android_optimization_level_override: + android_copts.append(android_optimization_level_override) return ( if_not_windows([ "-DEIGEN_AVOID_STL_ARRAY", @@ -180,13 +192,7 @@ def tf_copts(): + if_android_arm(["-mfpu=neon"]) + if_linux_x86_64(["-msse3"]) + select({ - clean_dep("//tensorflow:android"): [ - "-std=c++11", - "-DTF_LEAN_BINARY", - "-O2", - "-Wno-narrowing", - "-fomit-frame-pointer", - ], + clean_dep("//tensorflow:android"): android_copts, clean_dep("//tensorflow:darwin"): [], clean_dep("//tensorflow:windows"): WIN_COPTS, clean_dep("//tensorflow:windows_msvc"): WIN_COPTS, -- GitLab From 3bf2f35c7131a0cbea3b05e7aababc23a1cf2d8f Mon Sep 17 00:00:00 2001 From: hyunyoung2 Date: Thu, 30 Nov 2017 05:42:43 +0900 Subject: [PATCH 0953/1801] Update datasets.md (#14941) * Update datasets.md specifically I change Iterator into tf.data.Iterator * Update datasets.md I fixed the indentation according to your requesting. --- tensorflow/docs_src/programmers_guide/datasets.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/datasets.md b/tensorflow/docs_src/programmers_guide/datasets.md index f458cbcef2..9ced56f0f5 100644 --- a/tensorflow/docs_src/programmers_guide/datasets.md +++ b/tensorflow/docs_src/programmers_guide/datasets.md @@ -190,8 +190,8 @@ validation_dataset = tf.data.Dataset.range(50) # A reinitializable iterator is defined by its structure. We could use the # `output_types` and `output_shapes` properties of either `training_dataset` # or `validation_dataset` here, because they are compatible. -iterator = Iterator.from_structure(training_dataset.output_types, - training_dataset.output_shapes) +iterator = tf.data.Iterator.from_structure(training_dataset.output_types, + training_dataset.output_shapes) next_element = iterator.get_next() training_init_op = iterator.make_initializer(training_dataset) -- GitLab From 037acadcfc5f2b96a2e9c1653f28131bb91858aa Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 29 Nov 2017 12:56:52 -0800 Subject: [PATCH 0954/1801] Deleted unused method arguments PiperOrigin-RevId: 177350575 --- .../grappler/optimizers/constant_folding.cc | 25 +++---------------- .../grappler/optimizers/constant_folding.h | 6 ++--- 2 files changed, 6 insertions(+), 25 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 33a9dddba7..03eaa4a84a 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -190,8 +190,7 @@ Status ConvertShapeToConstant(const string& op, const DataType& type, return Status::OK(); } -Status ConstantFolding::MaterializeShapes(const GrapplerItem& item, - const GraphProperties& properties) { +Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { // We may add some nodes to the graph to encode control dependencies: there is // no need to process these, so only iterate over the nodes of the input // graph. @@ -285,22 +284,6 @@ Status ConstantFolding::MaterializeShapes(const GrapplerItem& item, return Status::OK(); } -bool ShapesEqual(const TensorShapeProto& shape1, - const TensorShapeProto& shape2) { - if (shape1.unknown_rank() || shape2.unknown_rank()) { - return false; - } - if (shape1.dim_size() != shape2.dim_size()) { - return false; - } - for (int i = 0; i < shape1.dim_size(); ++i) { - if (shape1.dim(i).size() != shape2.dim(i).size()) { - return false; - } - } - return true; -} - namespace { bool ExtractShape(const NodeDef& shape_node, const GraphProperties& properties, BCast::Vec* shape, int64* min_id) { @@ -504,7 +487,7 @@ Status ConstantFolding::MaterializeReductionIndices( } Status ConstantFolding::MaterializeConstants( - const GrapplerItem& item, const GraphProperties& properties) { + const GraphProperties& properties) { const int node_count = graph_.node_size(); for (int i = 0; i < node_count; ++i) { NodeDef& node = *graph_.mutable_node(i); @@ -1171,10 +1154,10 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, // graph. That's because it's possible to feed a placeholder with a tensor // of any shape, which could make the static information inconsistent with // the shapes actually fed. - TF_RETURN_IF_ERROR(MaterializeShapes(item, properties)); + TF_RETURN_IF_ERROR(MaterializeShapes(properties)); } if (opt_level_ == RewriterConfig::AGGRESSIVE && s.ok()) { - TF_RETURN_IF_ERROR(MaterializeConstants(item, properties)); + TF_RETURN_IF_ERROR(MaterializeConstants(properties)); } TF_RETURN_IF_ERROR(FoldGraph(output)); diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index f04f413c10..7c5db2a70f 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -51,16 +51,14 @@ class ConstantFolding : public GraphOptimizer { const GraphDef& optimize_output, double result) override; private: - Status MaterializeShapes(const GrapplerItem& item, - const GraphProperties& properties); + Status MaterializeShapes(const GraphProperties& properties); Status MaterializeBroadcastGradientArgs(const NodeDef& node, const GraphProperties& properties); Status MaterializeReductionIndices(NodeDef* node, const GraphProperties& properties); - Status MaterializeConstants(const GrapplerItem& item, - const GraphProperties& properties); + Status MaterializeConstants(const GraphProperties& properties); bool IsFoldable(const NodeDef& node) const; Status EvaluateNode(const NodeDef& node, -- GitLab From d1aea3be42f6153a970319a298eb55372ab9aa2e Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Wed, 29 Nov 2017 13:02:30 -0800 Subject: [PATCH 0955/1801] Clarify the role of replicate_model_fn.Mode better. PiperOrigin-RevId: 177351409 --- .../estimator/python/estimator/replicate_model_fn.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py index 6f7f37473f..f5154231da 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py @@ -47,7 +47,11 @@ from tensorflow.python.training import training_util class Mode(object): - """Modes for variables replication used for forcing a particular mode.""" + """Modes for variables replication used for forcing a particular mode. + + Forcing a mode is meant for performance experimentation purposes rather than + for general use cases. + """ AUTO = 0 """Use internal heuristics for choosing the best Mode value. -- GitLab From ec4d31e82c2237824276eef302d9edc38c28c3a2 Mon Sep 17 00:00:00 2001 From: Charles Shenton Date: Thu, 30 Nov 2017 08:15:38 +1100 Subject: [PATCH 0956/1801] Half Normal Distribution (and inverse error function) (#14056) * foldednormal docstring * folded __init__ method * prob, log_prob methods * rewrote halfnormal docstring * initial implementation of dist methods * halfnormal unit tests * registered HalfNormal to contrib.distributions * added erfinv function * unit tests for erfinv * registered erfinv symbol * cdf, pdf now deal with x < 0 correctly * pylint fixes * cuda_py test reference in BUILD * erfinv fixes * corrections to scipy reference tests * Added reference to entropy test case. --- tensorflow/contrib/distributions/BUILD | 18 + tensorflow/contrib/distributions/__init__.py | 2 + .../python/kernel_tests/half_normal_test.py | 320 ++++++++++++++++++ .../distributions/python/ops/half_normal.py | 170 ++++++++++ .../distributions/special_math_test.py | 26 ++ .../python/ops/distributions/special_math.py | 24 ++ 6 files changed, 560 insertions(+) create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/half_normal_test.py create mode 100644 tensorflow/contrib/distributions/python/ops/half_normal.py diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 145b9495ff..b2c641f8ab 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -204,6 +204,24 @@ cuda_py_test( ], ) +cuda_py_test( + name = "half_normal_test", + size = "medium", + srcs = ["python/kernel_tests/half_normal_test.py"], + additional_deps = [ + ":distributions_py", + "//third_party/py/numpy", + "//tensorflow/python:client", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:gradients", + "//tensorflow/python:nn_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:variables", + ], +) + cuda_py_test( name = "inverse_gamma_test", srcs = ["python/kernel_tests/inverse_gamma_test.py"], diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py index 0d12d83893..66827179e9 100644 --- a/tensorflow/contrib/distributions/__init__.py +++ b/tensorflow/contrib/distributions/__init__.py @@ -36,6 +36,7 @@ from tensorflow.contrib.distributions.python.ops.distribution_util import softpl from tensorflow.contrib.distributions.python.ops.distribution_util import tridiag from tensorflow.contrib.distributions.python.ops.estimator import * from tensorflow.contrib.distributions.python.ops.geometric import * +from tensorflow.contrib.distributions.python.ops.half_normal import * from tensorflow.contrib.distributions.python.ops.independent import * from tensorflow.contrib.distributions.python.ops.inverse_gamma import * from tensorflow.contrib.distributions.python.ops.logistic import * @@ -107,6 +108,7 @@ _allowed_symbols = [ 'Gamma', 'GammaWithSoftplusConcentrationRate', 'Geometric', + 'HalfNormal', 'Independent', 'InverseGamma', 'InverseGammaWithSoftplusConcentrationRate', diff --git a/tensorflow/contrib/distributions/python/kernel_tests/half_normal_test.py b/tensorflow/contrib/distributions/python/kernel_tests/half_normal_test.py new file mode 100644 index 0000000000..a7571806f2 --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/half_normal_test.py @@ -0,0 +1,320 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for initializers.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import importlib +import numpy as np + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import variables +from tensorflow.contrib.distributions.python.ops import half_normal as hn_lib +from tensorflow.python.platform import test +from tensorflow.python.platform import tf_logging + + +def try_import(name): # pylint: disable=invalid-name + module = None + try: + module = importlib.import_module(name) + except ImportError as e: + tf_logging.warning("Could not import %s: %s" % (name, str(e))) + return module + +stats = try_import("scipy.stats") + + +class HalfNormalTest(test.TestCase): + + def setUp(self): + self._rng = np.random.RandomState(123) + + def assertAllFinite(self, tensor): + is_finite = np.isfinite(tensor.eval()) + all_true = np.ones_like(is_finite, dtype=np.bool) + self.assertAllEqual(all_true, is_finite) + + def _testParamShapes(self, sample_shape, expected): + with self.test_session(): + param_shapes = hn_lib.HalfNormal.param_shapes(sample_shape) + scale_shape = param_shapes["scale"] + self.assertAllEqual(expected, scale_shape.eval()) + scale = array_ops.ones(scale_shape) + self.assertAllEqual( + expected, + array_ops.shape(hn_lib.HalfNormal(scale).sample()).eval()) + + def _testParamStaticShapes(self, sample_shape, expected): + param_shapes = hn_lib.HalfNormal.param_static_shapes(sample_shape) + scale_shape = param_shapes["scale"] + self.assertEqual(expected, scale_shape) + + def _testBatchShapes(self, dist, tensor): + self.assertAllEqual(dist.batch_shape_tensor().eval(), tensor.shape) + self.assertAllEqual(dist.batch_shape_tensor().eval(), tensor.eval().shape) + self.assertAllEqual(dist.batch_shape, tensor.shape) + self.assertAllEqual(dist.batch_shape, tensor.eval().shape) + + def testParamShapes(self): + sample_shape = [10, 3, 4] + self._testParamShapes(sample_shape, sample_shape) + self._testParamShapes(constant_op.constant(sample_shape), sample_shape) + + def testParamStaticShapes(self): + sample_shape = [10, 3, 4] + self._testParamStaticShapes(sample_shape, sample_shape) + self._testParamStaticShapes( + tensor_shape.TensorShape(sample_shape), sample_shape) + + def testHalfNormalLogPDF(self): + with self.test_session(): + batch_size = 6 + scale = constant_op.constant([3.0] * batch_size) + x = np.array([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0], dtype=np.float32) + halfnorm = hn_lib.HalfNormal(scale=scale) + + log_pdf = halfnorm.log_prob(x) + self._testBatchShapes(halfnorm, log_pdf) + + pdf = halfnorm.prob(x) + self._testBatchShapes(halfnorm, pdf) + + if not stats: + return + expected_log_pdf = stats.halfnorm(scale=scale.eval()).logpdf(x) + self.assertAllClose(expected_log_pdf, log_pdf.eval()) + self.assertAllClose(np.exp(expected_log_pdf), pdf.eval()) + + def testHalfNormalLogPDFMultidimensional(self): + with self.test_session(): + batch_size = 6 + scale = constant_op.constant([[3.0, 1.0]] * batch_size) + x = np.array([[-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]], dtype=np.float32).T + halfnorm = hn_lib.HalfNormal(scale=scale) + + log_pdf = halfnorm.log_prob(x) + self._testBatchShapes(halfnorm, log_pdf) + + pdf = halfnorm.prob(x) + self._testBatchShapes(halfnorm, pdf) + + if not stats: + return + expected_log_pdf = stats.halfnorm(scale=scale.eval()).logpdf(x) + self.assertAllClose(expected_log_pdf, log_pdf.eval()) + self.assertAllClose(np.exp(expected_log_pdf), pdf.eval()) + + def testHalfNormalCDF(self): + with self.test_session(): + batch_size = 50 + scale = self._rng.rand(batch_size) + 1.0 + x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64) + halfnorm = hn_lib.HalfNormal(scale=scale) + + cdf = halfnorm.cdf(x) + self._testBatchShapes(halfnorm, cdf) + + log_cdf = halfnorm.log_cdf(x) + self._testBatchShapes(halfnorm, log_cdf) + + if not stats: + return + expected_logcdf = stats.halfnorm(scale=scale).logcdf(x) + self.assertAllClose(expected_logcdf, log_cdf.eval(), atol=0) + self.assertAllClose(np.exp(expected_logcdf), cdf.eval(), atol=0) + + def testHalfNormalSurvivalFunction(self): + with self.test_session(): + batch_size = 50 + scale = self._rng.rand(batch_size) + 1.0 + x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64) + halfnorm = hn_lib.HalfNormal(scale=scale) + + sf = halfnorm.survival_function(x) + self._testBatchShapes(halfnorm, sf) + + log_sf = halfnorm.log_survival_function(x) + self._testBatchShapes(halfnorm, log_sf) + + if not stats: + return + expected_logsf = stats.halfnorm(scale=scale).logsf(x) + self.assertAllClose(expected_logsf, log_sf.eval(), atol=0) + self.assertAllClose(np.exp(expected_logsf), sf.eval(), atol=0) + + def testHalfNormalQuantile(self): + with self.test_session(): + batch_size = 50 + scale = self._rng.rand(batch_size) + 1.0 + p = np.linspace(0., 1.0, batch_size).astype(np.float64) + + halfnorm = hn_lib.HalfNormal(scale=scale) + x = halfnorm.quantile(p) + self._testBatchShapes(halfnorm, x) + + if not stats: + return + expected_x = stats.halfnorm(scale=scale).ppf(p) + self.assertAllClose(expected_x, x.eval(), atol=0) + + def testFiniteGradients(self): + for dtype in [np.float32, np.float64]: + g = ops.Graph() + with g.as_default(): + scale = variables.Variable(dtype(3.0)) + dist = hn_lib.HalfNormal(scale=scale) + x = np.array([0.01, 0.1, 1., 5., 10.]).astype(dtype) + for func in [ + dist.cdf, dist.log_cdf, dist.survival_function, + dist.log_prob, dist.prob, dist.log_survival_function, + ]: + print(func.__name__) + value = func(x) + grads = gradients_impl.gradients(value, [scale]) + with self.test_session(graph=g): + variables.global_variables_initializer().run() + self.assertAllFinite(value) + self.assertAllFinite(grads[0]) + + def testHalfNormalEntropy(self): + with self.test_session(): + scale = np.array([[1.0, 2.0, 3.0]]) + halfnorm = hn_lib.HalfNormal(scale=scale) + + # See https://en.wikipedia.org/wiki/Half-normal_distribution for the + # entropy formula used here. + expected_entropy = 0.5 * np.log(np.pi * scale ** 2.0 / 2.0) + 0.5 + + entropy = halfnorm.entropy() + self._testBatchShapes(halfnorm, entropy) + self.assertAllClose(expected_entropy, entropy.eval()) + + def testHalfNormalMeanAndMode(self): + with self.test_session(): + scale = np.array([11., 12., 13.]) + + halfnorm = hn_lib.HalfNormal(scale=scale) + expected_mean = scale * np.sqrt(2.0) / np.sqrt(np.pi) + + self.assertAllEqual((3,), halfnorm.mean().eval().shape) + self.assertAllEqual(expected_mean, halfnorm.mean().eval()) + + self.assertAllEqual((3,), halfnorm.mode().eval().shape) + self.assertAllEqual([0., 0., 0.], halfnorm.mode().eval()) + + def testHalfNormalVariance(self): + with self.test_session(): + scale = np.array([7., 7., 7.]) + halfnorm = hn_lib.HalfNormal(scale=scale) + expected_variance = scale ** 2.0 * (1.0 - 2.0 / np.pi) + + self.assertAllEqual((3,), halfnorm.variance().eval().shape) + self.assertAllEqual(expected_variance, halfnorm.variance().eval()) + + def testHalfNormalStandardDeviation(self): + with self.test_session(): + scale = np.array([7., 7., 7.]) + halfnorm = hn_lib.HalfNormal(scale=scale) + expected_variance = scale ** 2.0 * (1.0 - 2.0 / np.pi) + + self.assertAllEqual((3,), halfnorm.stddev().shape) + self.assertAllEqual(np.sqrt(expected_variance), halfnorm.stddev().eval()) + + def testHalfNormalSample(self): + with self.test_session(): + scale = constant_op.constant(3.0) + n = constant_op.constant(100000) + halfnorm = hn_lib.HalfNormal(scale=scale) + + sample = halfnorm.sample(n) + + self.assertEqual(sample.eval().shape, (100000,)) + self.assertAllClose(sample.eval().mean(), + 3.0 * np.sqrt(2.0) / np.sqrt(np.pi), atol=1e-1) + + expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate( + tensor_shape.TensorShape(halfnorm.batch_shape_tensor().eval())) + self.assertAllEqual(expected_shape, sample.shape) + self.assertAllEqual(expected_shape, sample.eval().shape) + + expected_shape_static = (tensor_shape.TensorShape( + [n.eval()]).concatenate(halfnorm.batch_shape)) + self.assertAllEqual(expected_shape_static, sample.shape) + self.assertAllEqual(expected_shape_static, sample.eval().shape) + + def testHalfNormalSampleMultiDimensional(self): + with self.test_session(): + batch_size = 2 + scale = constant_op.constant([[2.0, 3.0]] * batch_size) + n = constant_op.constant(100000) + halfnorm = hn_lib.HalfNormal(scale=scale) + + sample = halfnorm.sample(n) + self.assertEqual(sample.shape, (100000, batch_size, 2)) + self.assertAllClose(sample.eval()[:, 0, 0].mean(), + 2.0 * np.sqrt(2.0) / np.sqrt(np.pi), atol=1e-1) + self.assertAllClose(sample.eval()[:, 0, 1].mean(), + 3.0 * np.sqrt(2.0) / np.sqrt(np.pi), atol=1e-1) + + expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate( + tensor_shape.TensorShape(halfnorm.batch_shape_tensor().eval())) + self.assertAllEqual(expected_shape, sample.shape) + self.assertAllEqual(expected_shape, sample.eval().shape) + + expected_shape_static = (tensor_shape.TensorShape( + [n.eval()]).concatenate(halfnorm.batch_shape)) + self.assertAllEqual(expected_shape_static, sample.shape) + self.assertAllEqual(expected_shape_static, sample.eval().shape) + + def testNegativeSigmaFails(self): + with self.test_session(): + halfnorm = hn_lib.HalfNormal(scale=[-5.], validate_args=True, name="G") + with self.assertRaisesOpError("Condition x > 0 did not hold"): + halfnorm.mean().eval() + + def testHalfNormalShape(self): + with self.test_session(): + scale = constant_op.constant([6.0] * 5) + halfnorm = hn_lib.HalfNormal(scale=scale) + + self.assertEqual(halfnorm.batch_shape_tensor().eval(), [5]) + self.assertEqual(halfnorm.batch_shape, tensor_shape.TensorShape([5])) + self.assertAllEqual(halfnorm.event_shape_tensor().eval(), []) + self.assertEqual(halfnorm.event_shape, tensor_shape.TensorShape([])) + + def testHalfNormalShapeWithPlaceholders(self): + scale = array_ops.placeholder(dtype=dtypes.float32) + halfnorm = hn_lib.HalfNormal(scale=scale) + + with self.test_session() as sess: + # get_batch_shape should return an "" tensor. + self.assertEqual(halfnorm.batch_shape, tensor_shape.TensorShape(None)) + self.assertEqual(halfnorm.event_shape, ()) + self.assertAllEqual(halfnorm.event_shape_tensor().eval(), []) + self.assertAllEqual( + sess.run(halfnorm.batch_shape_tensor(), + feed_dict={scale: [1.0, 2.0]}), [2]) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distributions/python/ops/half_normal.py b/tensorflow/contrib/distributions/python/ops/half_normal.py new file mode 100644 index 0000000000..12059b6a9e --- /dev/null +++ b/tensorflow/contrib/distributions/python/ops/half_normal.py @@ -0,0 +1,170 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""The Half Normal distribution class.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops import random_ops +from tensorflow.python.ops.distributions import distribution +from tensorflow.python.ops.distributions import special_math + + +__all__ = [ + "HalfNormal", +] + + +class HalfNormal(distribution.Distribution): + """The Half Normal distribution with scale `scale`. + + #### Mathematical details + + The half normal is a transformation of a centered normal distribution. + If some random variable `X` has normal distribution, + ```none + X ~ Normal(0.0, scale) + Y = |X| + ``` + Then `Y` will have half normal distribution. The probability density + function (pdf) is: + + ```none + pdf(x; scale, x > 0) = sqrt(2) / (scale * sqrt(pi)) * + exp(- 1/2 * (x / scale) ** 2) + ) + ``` + Where `scale = sigma` is the standard deviation of the underlying normal + distribution. + + #### Examples + + Examples of initialization of one or a batch of distributions. + + ```python + # Define a single scalar HalfNormal distribution. + dist = tf.contrib.distributions.HalfNormal(scale=3.0) + + # Evaluate the cdf at 1, returning a scalar. + dist.cdf(1.) + + # Define a batch of two scalar valued HalfNormals. + # The first has scale 11.0, the second 22.0 + dist = tf.contrib.distributions.HalfNormal(scale=[11.0, 22.0]) + + # Evaluate the pdf of the first distribution on 1.0, and the second on 1.5, + # returning a length two tensor. + dist.prob([1.0, 1.5]) + + # Get 3 samples, returning a 3 x 2 tensor. + dist.sample([3]) + ``` + + """ + def __init__(self, + scale, + validate_args=False, + allow_nan_stats=True, + name="HalfNormal"): + """Construct HalfNormals with scale `scale`. + + Args: + scale: Floating point tensor; the scales of the distribution(s). + Must contain only positive values. + validate_args: Python `bool`, default `False`. When `True` distribution + parameters are checked for validity despite possibly degrading runtime + performance. When `False` invalid inputs may silently render incorrect + outputs. + allow_nan_stats: Python `bool`, default `True`. When `True`, + statistics (e.g., mean, mode, variance) use the value "`NaN`" to + indicate the result is undefined. When `False`, an exception is raised + if one or more of the statistic's batch members are undefined. + name: Python `str` name prefixed to Ops created by this class. + """ + parameters = locals() + with ops.name_scope(name, values=[scale]): + with ops.control_dependencies([check_ops.assert_positive(scale)] if + validate_args else []): + self._scale = array_ops.identity(scale, name="scale") + super(HalfNormal, self).__init__( + dtype=self._scale.dtype, + reparameterization_type=distribution.FULLY_REPARAMETERIZED, + validate_args=validate_args, + allow_nan_stats=allow_nan_stats, + parameters=parameters, + graph_parents=[self._scale], + name=name) + + @staticmethod + def _param_shapes(sample_shape): + return {'scale': ops.convert_to_tensor(sample_shape, dtype=dtypes.int32)} + + @property + def scale(self): + """Distribution parameter for the scale.""" + return self._scale + + def _batch_shape_tensor(self): + return array_ops.shape(self.scale) + + def _batch_shape(self): + return self.scale.shape + + def _event_shape_tensor(self): + return constant_op.constant([], dtype=dtypes.int32) + + def _event_shape(self): + return tensor_shape.scalar() + + def _sample_n(self, n, seed=None): + shape = array_ops.concat([[n], self.batch_shape_tensor()], 0) + sampled = random_ops.random_normal( + shape=shape, mean=0., stddev=1., dtype=self.dtype, seed=seed) + return math_ops.abs(sampled * self.scale) + + def _prob(self, x): + coeff = np.sqrt(2) / self.scale / np.sqrt(np.pi) + pdf = coeff * math_ops.exp(- 0.5 * (x / self.scale) ** 2) + return pdf * math_ops.cast(x >= 0, self.dtype) + + def _cdf(self, x): + truncated_x = nn.relu(x) + return math_ops.erf(truncated_x / self.scale / np.sqrt(2.0)) + + def _entropy(self): + return 0.5 * math_ops.log(np.pi * self.scale ** 2.0 / 2.0) + 0.5 + + def _mean(self): + return self.scale * np.sqrt(2.0) / np.sqrt(np.pi) + + def _quantile(self, p): + return np.sqrt(2.0) * self.scale * special_math.erfinv(p) + + def _mode(self): + return array_ops.zeros(self.batch_shape_tensor()) + + def _variance(self): + return self.scale ** 2.0 * (1.0 - 2.0 / np.pi) diff --git a/tensorflow/python/kernel_tests/distributions/special_math_test.py b/tensorflow/python/kernel_tests/distributions/special_math_test.py index 9441cdbe39..2d434a39c2 100644 --- a/tensorflow/python/kernel_tests/distributions/special_math_test.py +++ b/tensorflow/python/kernel_tests/distributions/special_math_test.py @@ -332,6 +332,32 @@ class LogNdtrGradientTest(NdtrGradientTest): _use_log = True +class ErfInvTest(test.TestCase): + + def testErfInvValues(self): + with self.test_session(): + if not special: + return + + x = np.linspace(0., 1.0, 50).astype(np.float64) + + expected_x = special.erfinv(x) + x = special_math.erfinv(x) + self.assertAllClose(expected_x, x.eval(), atol=0.) + + def testErfInvIntegerInput(self): + with self.test_session(): + + with self.assertRaises(TypeError): + x = np.array([1, 2, 3]).astype(np.int32) + special_math.erfinv(x) + + with self.assertRaises(TypeError): + x = np.array([1, 2, 3]).astype(np.int64) + special_math.erfinv(x) + + + class LogCDFLaplaceTest(test.TestCase): # Note that scipy.stats.laplace does not have a stable Log CDF, so we cannot # rely on scipy to cross check the extreme values. diff --git a/tensorflow/python/ops/distributions/special_math.py b/tensorflow/python/ops/distributions/special_math.py index 222a39ad82..bed4cbb2c1 100644 --- a/tensorflow/python/ops/distributions/special_math.py +++ b/tensorflow/python/ops/distributions/special_math.py @@ -27,6 +27,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops __all__ = [ + "erfinv", "ndtr", "ndtri", "log_ndtr", @@ -350,6 +351,29 @@ def _log_ndtr_asymptotic_series(x, series_order): return 1. + even_sum - odd_sum +def erfinv(x, name="erfinv"): + """The inverse function for erf, the error function. + + Args: + x: `Tensor` of type `float32`, `float64`. + name: Python string. A name for the operation (default="erfinv"). + + Returns: + x: `Tensor` with `dtype=x.dtype`. + + Raises: + TypeError: if `x` is not floating-type. + """ + + with ops.name_scope(name, values=[x]): + x = ops.convert_to_tensor(x, name="x") + if x.dtype.as_numpy_dtype not in [np.float32, np.float64]: + raise TypeError( + "x.dtype=%s is not handled, see docstring for supported types." + % x.dtype) + return ndtri((x + 1.0) / 2.0) / np.sqrt(2) + + def _double_factorial(n): """The double factorial function for small Python integer `n`.""" return np.prod(np.arange(n, 1, -2)) -- GitLab From 197850fb12015f8e01a3b9c4d3e3546fc54aaa0b Mon Sep 17 00:00:00 2001 From: Olivia Nordquist Date: Wed, 29 Nov 2017 13:21:31 -0800 Subject: [PATCH 0957/1801] enabling Tensor._set_shape() to work with the C API PiperOrigin-RevId: 177353959 --- tensorflow/python/client/tf_session.i | 43 ++++++++++++++ tensorflow/python/client/tf_session_helper.cc | 19 +++++++ tensorflow/python/client/tf_session_helper.h | 14 +++++ tensorflow/python/framework/ops.py | 57 +++++++++++++------ 4 files changed, 117 insertions(+), 16 deletions(-) diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i index 5fa1a7e8fc..c286d5fe47 100644 --- a/tensorflow/python/client/tf_session.i +++ b/tensorflow/python/client/tf_session.i @@ -532,6 +532,49 @@ def TF_Reset(target, containers=None, config=None): %unignore TF_GraphGetTensorShapeHelper; %ignore TF_GraphGetTensorShape; +// We use TF_GraphSetTensorShape_wrapper instead of +// TF_GraphSetTensorShape +%ignore TF_GraphSetTensorShape; +%unignore tensorflow; +%unignore TF_GraphSetTensorShape_wrapper; + +// $input is a Python list of ints to a vector for TF_GraphSetTensorShape_wrapper +%typemap(in) (const std::vector& dims) + (std::vector dims_local){ + if ($input != Py_None) { + if (!PyList_Check($input)) { + SWIG_exception_fail(SWIG_TypeError, tensorflow::strings::Printf( + "$symname: expected list but got %s ", Py_TYPE($input)->tp_name).c_str()); + } + size_t size = PyList_Size($input); + for (int i = 0; i < size; ++i) { + PyObject* item = PyList_GetItem($input, i); + dims_local.push_back(PyInt_AS_LONG(item)); + } + $1 = &dims_local; + } else { + $1 = nullptr; + } +} + +// We use TF_GraphGetTensorShape_wrapper instead of +// TF_GraphGetTensorShape +%ignore TF_GraphGetTensorShape; +%unignore tensorflow; +%unignore TF_GraphGetTensorShape_wrapper; + +// Build a Python list of ints and return it. +%typemap(out) std::vector tensorflow::TF_GraphGetTensorShape_wrapper { + $result = PyList_New($1.size()); + if (!$result) { + SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create list"); + } + + for (size_t i = 0; i < $1.size(); ++i) { + PyList_SET_ITEM($result, i, PyInt_FromLong($1[i])); + } +} + %include "tensorflow/python/client/tf_session_helper.h" %unignoreall diff --git a/tensorflow/python/client/tf_session_helper.cc b/tensorflow/python/client/tf_session_helper.cc index ad982e5dd8..e4bf09a0ca 100644 --- a/tensorflow/python/client/tf_session_helper.cc +++ b/tensorflow/python/client/tf_session_helper.cc @@ -407,4 +407,23 @@ TF_Function* TF_GraphToFunction_wrapper( opts, description, out_status); } +void TF_GraphSetTensorShape_wrapper(TF_Graph* graph, TF_Output output, + const std::vector& dims, + bool unknown_shape, TF_Status* status) { + if (unknown_shape) { + TF_GraphSetTensorShape(graph, output, nullptr, -1, status); + return; + } + TF_GraphSetTensorShape(graph, output, dims.data(), dims.size(), status); +} + +std::vector TF_GraphGetTensorShape_wrapper(TF_Graph* graph, + TF_Output output, + int num_dims, + TF_Status* status) { + std::vector dims(num_dims); + TF_GraphGetTensorShape(graph, output, dims.data(), num_dims, status); + return dims; +} + } // namespace tensorflow diff --git a/tensorflow/python/client/tf_session_helper.h b/tensorflow/python/client/tf_session_helper.h index 6ed08d3a58..bb7171db31 100644 --- a/tensorflow/python/client/tf_session_helper.h +++ b/tensorflow/python/client/tf_session_helper.h @@ -168,6 +168,20 @@ TF_Function* TF_GraphToFunction_wrapper( const std::vector& inputs, const std::vector& outputs, const NameVector& output_names, const TF_FunctionOptions* opts, const char* description, TF_Status* out_status); + +// Set the shape of output. If unknown is true, `num_dims` must be set to +// -1 and `dims` is set to nullptr. +void TF_GraphSetTensorShape_wrapper(TF_Graph* graph, TF_Output output, + const std::vector& dims, + bool unknown_shape, TF_Status* status); + +// Return the shape of output. `num_dims` should be the output of +// TF_GraphGetTensorNumDims. If `num_dims = -1`, this should not be called. +std::vector TF_GraphGetTensorShape_wrapper(TF_Graph* graph, + TF_Output output, + int num_dims, + TF_Status* status); + } // namespace tensorflow #endif // TENSORFLOW_PYTHON_CLIENT_TF_SESSION_HELPER_H_ diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 2217513966..975a1c87ec 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -374,6 +374,19 @@ class Tensor(_TensorLike): A `TensorShape` representing the shape of this tensor. """ + if _USE_C_API: + graph = self._op._graph._c_graph # pylint: disable=protected-access + with errors.raise_exception_on_not_ok_status() as status: + num_dims = c_api.TF_GraphGetTensorNumDims(graph, self._as_tf_output(), + status) + if num_dims == -1: + dim_list = None + else: + with errors.raise_exception_on_not_ok_status() as status: + dim_list = c_api.TF_GraphGetTensorShape_wrapper( + graph, self._as_tf_output(), num_dims, status) + dim_list = [None if i == -1 else i for i in dim_list] + return tensor_shape.TensorShape(dim_list) return self._shape def __iter__(self): @@ -393,8 +406,8 @@ class Tensor(_TensorLike): yield self[i] def _shape_as_list(self): - if self._shape.ndims is not None: - return [dim.value for dim in self._shape.dims] + if self.shape.ndims is not None: + return [dim.value for dim in self.shape.dims] else: return None @@ -410,7 +423,7 @@ class Tensor(_TensorLike): Returns: Integer rank or None """ - return self._shape.ndims + return self.shape.ndims def get_shape(self): """Alias of Tensor.shape.""" @@ -441,14 +454,35 @@ class Tensor(_TensorLike): ``` Args: - shape: A `TensorShape` representing the shape of this tensor. + shape: A `TensorShape` representing the shape of this tensor, a + `TensorShapeProto`, a list, a tuple, or None. Raises: ValueError: If `shape` is not compatible with the current shape of this tensor. """ - # TODO(skyewm): call C API - self._shape = self._shape.merge_with(shape) + if not _USE_C_API: + self._shape = self._shape.merge_with(shape) # pylint: disable=protected-access + return + if not isinstance(shape, tensor_shape.TensorShape): + shape = tensor_shape.TensorShape(shape) + dim_list = [] + if shape.dims is None: + unknown_shape = True + else: + unknown_shape = False + for dim in shape.dims: + if dim.value is None: + dim_list.append(-1) + else: + dim_list.append(dim.value) + with errors.raise_exception_on_not_ok_status() as status: + c_api.TF_GraphSetTensorShape_wrapper( + self._op._graph._c_graph, # pylint: disable=protected-access + self._as_tf_output(), + dim_list, + unknown_shape, + status) @property def value_index(self): @@ -4517,15 +4551,11 @@ def control_dependencies(control_inputs): See @{tf.Graph.control_dependencies} for more details. - When eager execution is enabled, any callable object in the `control_inputs` - list will be called. - Args: control_inputs: A list of `Operation` or `Tensor` objects which must be executed or computed before running the operations defined in the context. Can also be `None` to clear the control - dependencies. If eager execution is enabled, any callable object in the - `control_inputs` list will be called. + dependencies. Returns: A context manager that specifies control dependencies for all @@ -4534,11 +4564,6 @@ def control_dependencies(control_inputs): if context.in_graph_mode(): return get_default_graph().control_dependencies(control_inputs) else: - if control_inputs: - # Excute any pending callables. - for control in control_inputs: - if callable(control): - control() return _NullContextmanager() -- GitLab From e00156b36d91019039c9148dc86b64017154564e Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 29 Nov 2017 13:24:28 -0800 Subject: [PATCH 0958/1801] Proper deallocation in the thread-local tape stack. PiperOrigin-RevId: 177354350 --- tensorflow/python/eager/pywrap_tfe_src.cc | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index ce823cb567..b52d71dc6c 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -531,12 +531,9 @@ static PyTypeObject TFE_Py_Tape_Type = { // xcode 7 doesn't define thread_local, so for compatibility we implement our // own. TODO(apassos) remove once we can deprecate xcode 7. #ifndef __APPLE__ -thread_local std::vector* tape_stack = nullptr; std::vector* GetTapeStack() { - if (tape_stack == nullptr) { - tape_stack = new std::vector; - } - return tape_stack; + thread_local std::vector tape_stack; + return &tape_stack; } #else static tensorflow::mutex stack_mu(tensorflow::LINKER_INITIALIZED); -- GitLab From d3a8bf0783754b8f4bbc24274ecd79d4cc3217f0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Nov 2017 13:28:59 -0800 Subject: [PATCH 0959/1801] Added comment/TODO concerning memory use of extract_images_patches. PiperOrigin-RevId: 177354924 --- tensorflow/contrib/kfac/python/ops/fisher_factors.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors.py b/tensorflow/contrib/kfac/python/ops/fisher_factors.py index fbc192f1dc..6c1dd0ae40 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_factors.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_factors.py @@ -580,6 +580,9 @@ class ConvDiagonalFactor(DiagonalFactor): # the target entry of _outputs_grads changes with idx.) with _maybe_colocate_with(inputs, self._colocate_cov_ops_with_inputs): filter_height, filter_width, _, _ = self._filter_shape + + # TODO(b/64144716): there is potential here for a big savings in terms of + # memory use. patches = array_ops.extract_image_patches( inputs, ksizes=[1, filter_height, filter_width, 1], @@ -739,6 +742,9 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): # TODO(jamesmartens): factor this patches stuff out into a utility function with _maybe_colocate_with(self._inputs, self._colocate_cov_ops_with_inputs): filter_height, filter_width, in_channels, _ = self._filter_shape + + # TODO(b/64144716): there is potential here for a big savings in terms of + # memory use. patches = array_ops.extract_image_patches( self._inputs, ksizes=[1, filter_height, filter_width, 1], -- GitLab From 19f62f62e5dab41b62b60ac66e7d07c09d55e17a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Nov 2017 13:41:03 -0800 Subject: [PATCH 0960/1801] Re-enable Mul hoisting for aggregations other than Add when input shapes match. PiperOrigin-RevId: 177356621 --- .../optimizers/arithmetic_optimizer.cc | 107 ++++++++++++------ .../optimizers/arithmetic_optimizer_test.cc | 106 +++++++++-------- 2 files changed, 135 insertions(+), 78 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 930d122234..6861a51795 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -253,6 +253,30 @@ bool IsNumberType(DataType dtype) { const char kOutputShapesAttr[] = "_output_shapes"; +PartialTensorShape GetInputShape(const string& input, const NodeMap& node_map) { + int output_pos; + string node_name = ParseNodeName(input, &output_pos); + const NodeDef* input_node = node_map.GetNode(node_name); + return input_node->attr().at(kOutputShapesAttr).list().shape(output_pos); +} + +bool ShapesEqual(const string& input_x, const string& input_y, + const NodeMap& node_map) { + PartialTensorShape x_shape = GetInputShape(input_x, node_map); + PartialTensorShape y_shape = GetInputShape(input_y, node_map); + if (x_shape.unknown_rank() || y_shape.unknown_rank() || + x_shape.dims() != y_shape.dims()) { + return false; + } + for (int i = 0; i < x_shape.dims(); ++i) { + if (x_shape.dim_size(i) == -1 || y_shape.dim_size(i) == -1 || + x_shape.dim_size(i) != y_shape.dim_size(i)) { + return false; + } + } + return true; +} + // Returns whether `reshape` is an identity op. The tensor that `reshape` // reshapes is the `output_pos`-th output of node `input`. bool ReshapeIsIdentity(const NodeDef& reshape, const NodeDef& input, @@ -868,8 +892,11 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( // multiplication over addition to hoist common factors out of aggregate nodes // where all the inputs are Mul nodes. This pattern occurs frequently in // regularization terms for the gradients during training. - // TODO(rmlarsen): Check shapes and enable for AddN. - if (IsAdd(*node) && NumNonControlInputs(*node) > 1 && + // For example, we can rewrite an expression of the form: + // AddN(Mul(x, y1), Mul(y2, x), Mul(x, y3), ... Mul(x, yn)) + // to the following: + // Mul(x, AddN(y1, y2, y3, ... yn)) + if (IsAggregate(*node) && NumNonControlInputs(*node) > 1 && !OptimizedNodeExists(StrCat(node->name(), "_hoist_add"))) { // Determine the set of common factors if the input nodes are all Mul nodes. std::set common_factors; @@ -899,24 +926,15 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( } if (common_factors.size() == 1) { const string& common_factor = *common_factors.begin(); - // In this case we have an expression of the form - // AddN(Mul(x, y1), Mul(y2, x), Mul(x, y3), ... Mul(x, yn)) - // that can be rewritten as - // Mul(x, AddN(y1, y2, y3, ... yn)) - - // 1. Use a copy of the first Mul node for the outer multiplication. - NodeDef* new_mul_node = AddNode(StrCat(node->name(), "_hoist_mul"), - node_map_->GetNode(node->input(0))); - NodeDef* new_add_node = AddNode(StrCat(node->name(), "_hoist_add"), node); - new_mul_node->set_device(node->device()); - new_mul_node->set_input(0, common_factor); - node_map_->AddOutput(common_factor, new_mul_node->name()); - new_mul_node->set_input(1, new_add_node->name()); - node_map_->AddOutput(new_add_node->name(), new_mul_node->name()); - - // 2. Hoist non-shared factors up into the new AddN node. - nodes_to_simplify->PushBack(new_add_node); - for (int i = 0; i < node->input_size(); ++i) { + + // Gather up the non-shared factors (the y's in the example). + // Unless the aggregation is Add, we have to make sure that all the y's + // have the same shape since the other aggregation ops do not support + // broadcasting. + std::vector unique_factors; + unique_factors.reserve(node->input_size()); + bool shapes_match = true; + for (int i = 0; i < node->input_size() && shapes_match; ++i) { const string& input = node->input(i); if (IsControlInput(input)) { break; @@ -924,15 +942,41 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( const NodeDef* mul_node = node_map_->GetNode(input); const int unique_factor_index = mul_node->input(0) == common_factor ? 1 : 0; - const string unique_factor = mul_node->input(unique_factor_index); - new_add_node->set_input(i, unique_factor); + unique_factors.push_back(mul_node->input(unique_factor_index)); + if (i > 0 && !IsAdd(*node)) { + shapes_match = ShapesEqual(unique_factors.front(), + unique_factors.back(), *node_map_); + } } - // 4. Add frame dependencies that the original node might have had. - AddFrameControlDeps(node, {new_add_node, new_mul_node}, common_factor, - {new_add_node}); + if (shapes_match) { + // 1. Use a copy of the first Mul node for the outer multiplication. + NodeDef* new_mul_node = AddNode(StrCat(node->name(), "_hoist_mul"), + node_map_->GetNode(node->input(0))); + NodeDef* new_add_node = + AddNode(StrCat(node->name(), "_hoist_add"), node); + new_mul_node->set_device(node->device()); + new_mul_node->set_input(0, common_factor); + node_map_->AddOutput(common_factor, new_mul_node->name()); + new_mul_node->set_input(1, new_add_node->name()); + node_map_->AddOutput(new_add_node->name(), new_mul_node->name()); + + // 2. Hoist non-shared factors up into the new AddN node. + nodes_to_simplify->PushBack(new_add_node); + for (int i = 0; i < node->input_size(); ++i) { + const string& input = node->input(i); + if (IsControlInput(input)) { + break; + } + new_add_node->set_input(i, unique_factors[i]); + } - return new_mul_node->name(); + // 3. Add frame dependencies that the original node might have had. + AddFrameControlDeps(node, {new_add_node, new_mul_node}, common_factor, + {new_add_node}); + + return new_mul_node->name(); + } } } @@ -1064,13 +1108,10 @@ Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/, int num_frames; TF_RETURN_IF_ERROR(IdentifyFramesWithNodeMap(*optimized_graph_, *node_map_, &frame_map_, &num_frames)); - if (opt_level_ == RewriterConfig::AGGRESSIVE) { - graph_properties_.reset(new GraphProperties(item)); - // Shapes are only needed in aggressive mode. - TF_RETURN_IF_ERROR(graph_properties_->InferStatically(false)); - TF_RETURN_IF_ERROR( - graph_properties_->AnnotateOutputShapes(optimized_graph_)); - } + graph_properties_.reset(new GraphProperties(item)); + // Shapes are only needed in aggressive mode. + TF_RETURN_IF_ERROR(graph_properties_->InferStatically(false)); + TF_RETURN_IF_ERROR(graph_properties_->AnnotateOutputShapes(optimized_graph_)); // Perform the optimizations. DedupComputations(); diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index e8a18ff9d9..80f42694d9 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -32,6 +32,21 @@ string OptimizedName(const string& name) { return AddPrefixToNodeName(name, kArithmeticOptimizer); } +void VerifyGraphsMatch(const GraphDef& original_graph, + const GraphDef& optimized_graph, int line) { + EXPECT_EQ(original_graph.node_size(), optimized_graph.node_size()) << line; + for (int i = 0; i < original_graph.node_size(); ++i) { + const NodeDef& original = original_graph.node(i); + const NodeDef& optimized = optimized_graph.node(i); + EXPECT_EQ(original.name(), optimized.name()) << line; + EXPECT_EQ(original.op(), optimized.op()) << line; + EXPECT_EQ(original.input_size(), optimized.input_size()) << line; + for (int j = 0; j < original.input_size(); ++j) { + EXPECT_EQ(original.input(j), optimized.input(j)) << line; + } + } +} + class ArithmeticOptimizerTest : public ::testing::Test {}; TEST_F(ArithmeticOptimizerTest, NoOp) { @@ -44,18 +59,7 @@ TEST_F(ArithmeticOptimizerTest, NoOp) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - - EXPECT_EQ(item.graph.node_size(), output.node_size()); - for (int i = 0; i < item.graph.node_size(); ++i) { - const NodeDef& original = item.graph.node(i); - const NodeDef& optimized = output.node(i); - EXPECT_EQ(original.name(), optimized.name()); - EXPECT_EQ(original.op(), optimized.op()); - EXPECT_EQ(original.input_size(), optimized.input_size()); - for (int j = 0; j < original.input_size(); ++j) { - EXPECT_EQ(original.input(j), optimized.input(j)); - } - } + VerifyGraphsMatch(item.graph, output, __LINE__); } TEST_F(ArithmeticOptimizerTest, OpDedupping) { @@ -398,39 +402,51 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsRepeatedAdd) { } TEST_F(ArithmeticOptimizerTest, HoistFactor) { - tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2}); - Output y1 = ops::Const(s.WithOpName("y1"), {3.0f, 4.0f}, {1, 2}); - Output y2 = ops::Const(s.WithOpName("y2"), {5.0f, 6.0f}, {1, 2}); - Output mul1 = ops::Mul(s.WithOpName("mul1"), x, y1); - Output mul2 = ops::Mul(s.WithOpName("mul2"), y2, x); - Output add = ops::Add(s.WithOpName("add"), mul1, mul2); - Output id = ops::Identity(s.WithOpName("id"), add); - - GrapplerItem item; - TF_CHECK_OK(s.ToGraphDef(&item.graph)); - - ArithmeticOptimizer optimizer; - GraphDef output; - Status status = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); - // Run the optimizer twice to make sure the rewrite is idempotent. - item.graph.Swap(&output); - status = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); - - EXPECT_EQ(9, output.node_size()); - const NodeDef& new_add = output.node(8); - EXPECT_EQ(OptimizedName("add_hoist_add"), new_add.name()); - EXPECT_EQ("y1", new_add.input(0)); - EXPECT_EQ("y2", new_add.input(1)); - const NodeDef& new_mul = output.node(7); - EXPECT_EQ(OptimizedName("add_hoist_mul"), new_mul.name()); - EXPECT_EQ("x", new_mul.input(0)); - EXPECT_EQ(OptimizedName("add_hoist_add"), new_mul.input(1)); - const NodeDef& new_id = output.node(6); - EXPECT_EQ("id", new_id.name()); - EXPECT_EQ(OptimizedName("add_hoist_mul"), new_id.input(0)); + for (bool matching_shapes : {true, false}) { + for (bool use_addn : {true, false}) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2}); + Output y1 = ops::Const(s.WithOpName("y1"), {3.0f, 4.0f}, {1, 2}); + Output y2 = matching_shapes + ? ops::Const(s.WithOpName("y2"), {5.0f, 6.0f}, {1, 2}) + : ops::Const(s.WithOpName("y2"), {5.0f}, {1, 1}); + Output mul1 = ops::Mul(s.WithOpName("mul1"), x, y1); + Output mul2 = ops::Mul(s.WithOpName("mul2"), y2, x); + Output id = + use_addn ? ops::Identity(s.WithOpName("id"), + ops::AddN(s.WithOpName("add"), {mul1, mul2})) + : ops::Identity(s.WithOpName("id"), + ops::Add(s.WithOpName("add"), mul1, mul2)); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + ArithmeticOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + if (use_addn && !matching_shapes) { + VerifyGraphsMatch(item.graph, output, __LINE__); + } else { + EXPECT_EQ(9, output.node_size()); + const NodeDef& new_add = output.node(8); + EXPECT_EQ(OptimizedName("add_hoist_add"), new_add.name()); + EXPECT_EQ("y1", new_add.input(0)); + EXPECT_EQ("y2", new_add.input(1)); + const NodeDef& new_mul = output.node(7); + EXPECT_EQ(OptimizedName("add_hoist_mul"), new_mul.name()); + EXPECT_EQ("x", new_mul.input(0)); + EXPECT_EQ(OptimizedName("add_hoist_add"), new_mul.input(1)); + const NodeDef& new_id = output.node(6); + EXPECT_EQ("id", new_id.name()); + EXPECT_EQ(OptimizedName("add_hoist_mul"), new_id.input(0)); + } + } + } } TEST_F(ArithmeticOptimizerTest, FuseConjAndTranspose) { -- GitLab From 48347ee4105d78d8f36ba8645953b75cb5280c4c Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Wed, 29 Nov 2017 13:46:24 -0800 Subject: [PATCH 0961/1801] Simplify const node creation. PiperOrigin-RevId: 177357416 --- tensorflow/core/grappler/optimizers/BUILD | 5 + .../grappler/optimizers/layout_optimizer.cc | 218 +++++++----------- .../optimizers/layout_optimizer_test.cc | 75 +++++- .../python/grappler/layout_optimizer_test.py | 2 +- 4 files changed, 169 insertions(+), 131 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 5d9eb8e0b1..24e6f8847a 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -332,6 +332,11 @@ tf_cc_test( deps = [ ":layout_optimizer", "//tensorflow/cc:cc_ops", + "//tensorflow/cc:cc_ops_internal", + "//tensorflow/core:all_kernels", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc index 1b8046b787..ef4b015295 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc @@ -69,6 +69,8 @@ std::set GetOpsFormatSupported() { return ops_format_supported; } +// TODO(yaozhang): enable SumProcessor with auto-tuning. Currently disabled +// because of the worse performance in some cases. std::set GetOpsFormatAgnostic() { std::set ops_format_agnostic = {"Add", "AddN", @@ -88,7 +90,7 @@ std::set GetOpsFormatAgnostic() { "Split", "SquaredDifference", "Squeeze", - "Sub"}; + /*"Sum",*/ "Sub"}; return ops_format_agnostic; } @@ -186,33 +188,6 @@ class GraphProcessor { return node; } - NodeDef* AddNodeReductionConst(const string& name, const string& device) { - NodeDef* node = graph_->add_node(); - node_map_->AddNode(name, node); - node->set_name(name); - node->set_op("Const"); - AttrValue attr_data_type; - attr_data_type.set_type(DT_INT32); - node->mutable_attr()->insert({"dtype", attr_data_type}); - - AttrValue attr_tensor; - Tensor tensor(DT_INT32, TensorShape({3})); - std::vector axis = {0, 2, 3}; - for (int i = 0; static_cast(i) < axis.size(); i++) { - tensor.flat()(i) = axis[i]; - } - tensor.AsProtoTensorContent(attr_tensor.mutable_tensor()); - node->mutable_attr()->insert({"value", attr_tensor}); - string device_name; - if (device.empty()) { - device_name = virtual_placer_.get_canonical_device_name(*node); - } else { - device_name = device; - } - node->set_device(device_name); - return node; - } - const VirtualPlacer& virtual_placer_; const std::unordered_set& nodes_to_preserve_; GraphDef* graph_; @@ -370,10 +345,20 @@ class NodeProcessor : public GraphProcessor { LOG(ERROR) << "Failed to parse TensorProto."; } if (tensor.dims() == 1) { - int c = tensor.flat()(3); - tensor.flat()(3) = tensor.flat()(2); - tensor.flat()(2) = tensor.flat()(1); - tensor.flat()(1) = c; + if (tensor.flat().size() == 4) { + int c = tensor.flat()(3); + tensor.flat()(3) = tensor.flat()(2); + tensor.flat()(2) = tensor.flat()(1); + tensor.flat()(1) = c; + } else if (tensor.flat().size() == 3) { + tensor.flat()(0) = 0; + tensor.flat()(1) = 2; + tensor.flat()(2) = 3; + } else { + return Status(error::INVALID_ARGUMENT, + strings::StrCat("Unsupported tensor size: ", + tensor.flat().size())); + } } else if (tensor.dims() == 2) { for (int i = 0; i < 2; i++) { int c = tensor.matrix()(3, i); @@ -394,7 +379,9 @@ class NodeProcessor : public GraphProcessor { Status UpdateAttrValueOfInput(int input_index) { auto input_node = node_map_->GetNode(node_->input(input_index)); // We created a copy of the node, so that we don't modify the original node, - // which might be used elsewhere. + // which might be used elsewhere. Note that this copy also copies the + // control dependency input in the case this node is inside a loop, + // to ensure added_node is in the same frame with node_. NodeDef* added_node = graph_->add_node(); *added_node = *input_node; string base_name = strings::StrCat(node_->name(), "-", input_node->name()); @@ -411,6 +398,14 @@ class NodeProcessor : public GraphProcessor { return input_pos; } + virtual std::set GetOutputPos() const { + // For most nodes, no need to process control nodes or nodes that use an + // output other than the first output: only the first output is of + // 4D NCHW/NHWC format and thus relevant here. + std::set output_pos = {0}; + return output_pos; + } + NodeDef* AddNodeTranspose(const string& node_name, const string& input_name, const string& const_name, DataType data_type, const TensorShapeProto& input_shape, @@ -476,37 +471,28 @@ class NodeProcessor : public GraphProcessor { auto outputs = node_map_->GetOutputs(node_->name()); string const_name = GetOrAddNodePermNCHWToNHWC(); for (const auto& output : outputs) { - string base_name = strings::StrCat(node_->name(), "-", output->name()); - string node_name = - AddPrefixToNodeName(base_name, kTransposeNCHWToNHWC, "-"); - // TODO(yaozhang): handle the rare case where node A is connected to more - // than one input of node B. - auto it = std::find_if(output->mutable_input()->begin(), - output->mutable_input()->end(), - [this](const string& input) { - string node_name = NodeName(input); - return node_name.compare(node_->name()) == 0; - }); - if (it == output->mutable_input()->end()) { - return Status(error::INVALID_ARGUMENT, - strings::StrCat("Expect ", node_->name(), - " to be an input of ", output->name())); - } - int output_pos = NodePosition(*it); - // No need to process control nodes or nodes that use an output - // other than the first output: only the first output is of 4D NCHW/NHWC - // format and thus relevant here. - if (output_pos != 0) { - continue; + for (int i = 0; i < output->input_size(); i++) { + auto& input = *output->mutable_input(i); + int input_port; + string input_name = ParseNodeName(input, &input_port); + auto output_pos = GetOutputPos(); + if (input_name == node_->name() && + output_pos.find(input_port) != output_pos.end()) { + string base_name = + strings::StrCat(node_->name(), "-", output->name(), "-", i); + string node_name = + AddPrefixToNodeName(base_name, kTransposeNCHWToNHWC, "-"); + TF_RETURN_IF_ERROR(HasAttribute(*node_, "T")); + TF_RETURN_IF_ERROR(HasAttribute(*node_, "_output_shapes")); + AddNodeTranspose( + node_name, input, const_name, node_->attr().at("T").type(), + node_->attr().at("_output_shapes").list().shape(0), false); + input = node_name; + node_map_->AddOutput(node_->name(), node_name); + node_map_->AddOutput(node_name, output->name()); + } } - TF_RETURN_IF_ERROR(HasAttribute(*node_, "T")); - TF_RETURN_IF_ERROR(HasAttribute(*node_, "_output_shapes")); - AddNodeTranspose( - node_name, node_->name(), const_name, node_->attr().at("T").type(), - node_->attr().at("_output_shapes").list().shape(0), false); - *it = node_name; - node_map_->UpdateOutput(node_->name(), output->name(), node_name); - node_map_->AddOutput(node_name, output->name()); + node_map_->RemoveOutput(node_->name(), output->name()); } return Status::OK(); } @@ -948,7 +934,7 @@ class ConcatProcessor : public AgnosticNodeProcessor { } Status CustomizedProcessing() override { - string concat_const_name = GetOrAddNodeConcatConst(); + string concat_const_name = AddNodeConcatConst()->name(); node_map_->AddOutput(concat_const_name, node_->name()); *node_->mutable_input(axis_node_pos_) = concat_const_name; return Status::OK(); @@ -956,8 +942,14 @@ class ConcatProcessor : public AgnosticNodeProcessor { bool IsAlongDimC() const { auto axis_node = node_map_->GetNode(node_->input(axis_node_pos_)); + if (!IsConstant(*axis_node)) { + return false; + } if (axis_node->attr().find("value") != axis_node->attr().end()) { - return axis_node->attr().at("value").tensor().int_val(0) == 3; + auto tensor = axis_node->attr().at({"value"}).tensor(); + if (tensor.tensor_shape().dim_size() == 0 && tensor.int_val_size() == 1) { + return tensor.int_val(0) == 3; + } } return false; } @@ -965,28 +957,18 @@ class ConcatProcessor : public AgnosticNodeProcessor { int axis_node_pos_; private: - NodeDef* AddNodeConcatConst(const string& suffix, const string& depended_node, - const string& device) { - auto const_node = AddNodeConstScalar( - strings::StrCat(kConcatConst, "-", suffix), device, DT_INT32, 1); - // This is to ensure the concat node and the const node are - // in the same frame. - *const_node->add_input() = AsControlDependency(depended_node); - return const_node; - } - - string GetOrAddNodeConcatConst() { - string const_name; - if (is_in_frame_) { - int value_node_pos = (axis_node_pos_ == 0) ? 1 : 0; - auto const_node = AddNodeConcatConst( - node_->name(), NodeName(node_->input(value_node_pos)), - node_->device()); - const_name = const_node->name(); - } else { - const_name = kConcatConst; - } - return const_name; + NodeDef* AddNodeConcatConst() { + auto axis_node = node_map_->GetNode(node_->input(axis_node_pos_)); + // We created a copy of the node, so that we don't modify the original node, + // which might be used elsewhere. Note that this copy also copies the + // control dependency input in the case this node is inside a loop, + // to ensure added_node is in the same frame with node_. + auto added_node = graph_->add_node(); + *added_node = *axis_node; + added_node->set_name(strings::StrCat(kConcatConst, "-", node_->name())); + added_node->mutable_attr()->at({"value"}).mutable_tensor()->set_int_val(0, + 1); + return added_node; } }; @@ -1036,6 +1018,16 @@ class SplitProcessor : public AgnosticNodeProcessor { return input_pos; } + std::set GetOutputPos() const override { + std::set output_pos{0}; + if (HasAttribute(*node_, "num_split").ok()) { + for (int i = 1; i < node_->attr().at("num_split").i(); i++) { + output_pos.insert(i); + } + } + return output_pos; + } + Status CustomizedProcessing() override { string split_const_name = AddNodeSplitConst()->name(); node_map_->AddOutput(split_const_name, node_->name()); @@ -1073,7 +1065,7 @@ class SplitProcessor : public AgnosticNodeProcessor { // We created a copy of the node, so that we don't modify the original node, // which might be used elsewhere. Note that this copy also copies the // control dependency input in the case this node is inside a loop, - // to ensure added_node is in the same frame with the Split node. + // to ensure added_node is in the same frame with node_. NodeDef* added_node = graph_->add_node(); *added_node = *dim_node; added_node->set_name(strings::StrCat(kSplitConst, "-", node_->name())); @@ -1329,20 +1321,21 @@ class SumProcessor : public AgnosticNodeProcessor { Status AddLayoutTransposeToOutputs() override { return Status::OK(); } - Status CustomizedProcessing() override { - node_map_->AddOutput(kReductionConst, node_->name()); - *node_->mutable_input(1) = GetOrAddNodeReductionConst(); - return Status::OK(); - } + Status CustomizedProcessing() override { return UpdateAttrValueOfInput(1); } private: bool IsAlongDimNHW() const { - NodeDef* node = node_map_->GetNode(node_->input(1)); + NodeDef* reduction_indices = node_map_->GetNode(node_->input(1)); + if (!IsConstant(*reduction_indices)) { + return false; + } Tensor tensor; - if (node->attr().find({"value"}) == node->attr().end()) { + if (reduction_indices->attr().find({"value"}) == + reduction_indices->attr().end()) { return false; } - auto success = tensor.FromProto(node->attr().at({"value"}).tensor()); + auto success = + tensor.FromProto(reduction_indices->attr().at({"value"}).tensor()); if (!success) { LOG(ERROR) << "Failed to parse TensorProto."; return false; @@ -1356,29 +1349,6 @@ class SumProcessor : public AgnosticNodeProcessor { } return false; } - - NodeDef* AddNodeReductionConst(const string& suffix, - const string& depended_node, - const string& device) { - auto const_node = GraphProcessor::AddNodeReductionConst( - strings::StrCat(kReductionConst, "-", suffix), device); - // This is to ensure the Sum node and the const node are in the - // same frame. - *const_node->add_input() = AsControlDependency(depended_node); - return const_node; - } - - string GetOrAddNodeReductionConst() { - string const_name; - if (is_in_frame_) { - auto const_node = AddNodeReductionConst( - node_->name(), NodeName(node_->input(0)), node_->device()); - const_name = const_node->name(); - } else { - const_name = kReductionConst; - } - return const_name; - } }; class DataLayoutOptimizer : GraphProcessor { @@ -1409,18 +1379,10 @@ class DataLayoutOptimizer : GraphProcessor { return AddNodePermConst(kPermNCHWToNHWC, "", {0, 2, 3, 1}); } - NodeDef* AddNodeConcatConst() { - return AddNodeConstScalar(kConcatConst, "", DT_INT32, 1); - } - NodeDef* AddNodeGatherAxisConst() { return AddNodeConstScalar(kGatherAxisConst, "", DT_INT32, 0); } - NodeDef* AddNodeReductionConst() { - return GraphProcessor::AddNodeReductionConst(kReductionConst, ""); - } - // Expand all nodes which is in NHWC, but supports NCHW or is layout agnostic. Status Expand() { int node_size_original = graph_->node_size(); @@ -1474,9 +1436,7 @@ class DataLayoutOptimizer : GraphProcessor { if (graph_->node_size() > node_size_original) { NodeDef* n = AddNodePermNHWCToNCHW(); n = AddNodePermNCHWToNHWC(); - n = AddNodeConcatConst(); n = AddNodeGatherAxisConst(); - n = AddNodeReductionConst(); std::set ops_format_agnostic = GetOpsFormatAgnostic(); for (int i = 0; i < graph_->node_size(); i++) { if (ops_format_agnostic.find(graph_->node(i).op()) != diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc index 8c89f6744b..e8f7b8ac3c 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc @@ -495,7 +495,80 @@ TEST_F(LayoutOptimizerTest, SplitNonConstDim) { auto split_node = node_map.GetNode("split"); EXPECT_EQ(split_node->input(0), "i1"); EXPECT_EQ(split_node->input(1), - "LayoutOptimizerTransposeNCHWToNHWC-Conv2D-split"); + "LayoutOptimizerTransposeNCHWToNHWC-Conv2D-split-1"); +} + +TEST_F(LayoutOptimizerTest, SplitSamePortToMultipleInputsOfSameNode) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto axis = ops::Const(s.WithOpName("axis"), 3); + auto split = ops::Split(s.WithOpName("split"), axis, conv, 2); + auto concat = + ops::Concat(s.WithOpName("concat"), {split[1], split[1], split[1]}, axis); + auto o = ops::Identity(s.WithOpName("o"), concat); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto concat_node = node_map.GetNode("concat"); + EXPECT_EQ(concat_node->input(0), "split:1"); + EXPECT_EQ(concat_node->input(1), "split:1"); + EXPECT_EQ(concat_node->input(2), "split:1"); + EXPECT_EQ(concat_node->input(3), "LayoutOptimizerConcatConst-concat"); + auto concat_dim = node_map.GetNode("LayoutOptimizerConcatConst-concat"); + EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 1); +} + +TEST_F(LayoutOptimizerTest, Concat) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto axis = ops::Const(s.WithOpName("axis"), 3); + auto split = ops::Split(s.WithOpName("split"), axis, conv, 2); + auto concat = ops::Concat(s.WithOpName("concat"), {split[0], split[1]}, axis); + auto o = ops::Identity(s.WithOpName("o"), concat); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto concat_node = node_map.GetNode("concat"); + EXPECT_EQ(concat_node->input(0), "split"); + EXPECT_EQ(concat_node->input(1), "split:1"); + EXPECT_EQ(concat_node->input(2), "LayoutOptimizerConcatConst-concat"); + auto concat_dim = node_map.GetNode("LayoutOptimizerConcatConst-concat"); + EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 1); +} + +TEST_F(LayoutOptimizerTest, Sum) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto reduction_indices = + ops::Const(s.WithOpName("reduction_indices"), {0, 1, 2}, {3}); + auto sum = ops::Sum(s.WithOpName("sum"), conv, reduction_indices); + auto o = ops::Identity(s.WithOpName("o"), sum); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + // TODO(yaozhang): enable SumProcessor with auto-tuning. Currently disabled + // because of the worse performance in some cases. + /* + NodeMap node_map(&output); + auto sum_node = node_map.GetNode("sum"); + EXPECT_EQ(sum_node->input(0), "Conv2D"); + EXPECT_EQ(sum_node->input(1), "LayoutOptimizer-sum-reduction_indices"); + auto sum_const = node_map.GetNode("LayoutOptimizer-sum-reduction_indices"); + Tensor tensor; + EXPECT_TRUE( + tensor.FromProto(sum_const->mutable_attr()->at({"value"}).tensor())); + Tensor tensor_expected(DT_INT32, {3}); + test::FillValues(&tensor_expected, {0, 2, 3}); + test::ExpectTensorEqual(tensor_expected, tensor); + */ } } // namespace diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py index 626e0502cb..50735fb567 100644 --- a/tensorflow/python/grappler/layout_optimizer_test.py +++ b/tensorflow/python/grappler/layout_optimizer_test.py @@ -190,7 +190,7 @@ class LayoutOptimizerTest(test.TestCase): self.assertEqual(expected_num_transposes, num_transposes) self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Reshape-0', nodes) - self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Relu_1-MaxPool_1', + self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Relu_1-MaxPool_1-0', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3) -- GitLab From 1d0b07351d901334b33565595d4c23607f11cc27 Mon Sep 17 00:00:00 2001 From: Christopher Olston Date: Wed, 29 Nov 2017 13:58:55 -0800 Subject: [PATCH 0962/1801] Add a way to query a batch scheduler to determine the max task size. A layer on top of the batcher could use this interface to pre-split large tasks that exceed the max batch size. PiperOrigin-RevId: 177359263 --- .../contrib/batching/adaptive_shared_batch_scheduler.h | 2 ++ .../contrib/batching/adaptive_shared_batch_scheduler_test.cc | 1 + tensorflow/contrib/batching/basic_batch_scheduler.h | 4 ++++ tensorflow/contrib/batching/basic_batch_scheduler_test.cc | 1 + tensorflow/contrib/batching/batch_scheduler.h | 4 ++++ tensorflow/contrib/batching/shared_batch_scheduler.h | 5 +++++ tensorflow/contrib/batching/shared_batch_scheduler_test.cc | 1 + 7 files changed, 18 insertions(+) diff --git a/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h b/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h index 6ed177e001..9e32bee505 100644 --- a/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h +++ b/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h @@ -208,6 +208,8 @@ class ASBSQueue : public BatchScheduler { // place any more tasks in this batch. void ReleaseBatch(const ASBSBatch* batch); + size_t max_task_size() const override { return options_.max_batch_size; } + private: std::shared_ptr> scheduler_; const QueueOptions options_; diff --git a/tensorflow/contrib/batching/adaptive_shared_batch_scheduler_test.cc b/tensorflow/contrib/batching/adaptive_shared_batch_scheduler_test.cc index a07cd6d834..e2aac54eeb 100644 --- a/tensorflow/contrib/batching/adaptive_shared_batch_scheduler_test.cc +++ b/tensorflow/contrib/batching/adaptive_shared_batch_scheduler_test.cc @@ -186,6 +186,7 @@ TEST(AdaptiveSharedBatchSchedulerTest, ObeysQueueOptions) { queue_options.max_enqueued_batches = 2; TF_ASSERT_OK( scheduler->AddQueue(queue_options, queue_0_callback, &queue_0)); + EXPECT_EQ(10, queue_0->max_task_size()); queue_options.max_batch_size = 0; // Queue must have max_batch_size > 0. EXPECT_FALSE( diff --git a/tensorflow/contrib/batching/basic_batch_scheduler.h b/tensorflow/contrib/batching/basic_batch_scheduler.h index 9d3805fbaf..91065db249 100644 --- a/tensorflow/contrib/batching/basic_batch_scheduler.h +++ b/tensorflow/contrib/batching/basic_batch_scheduler.h @@ -192,6 +192,10 @@ class BasicBatchScheduler : public BatchScheduler { size_t NumEnqueuedTasks() const override; size_t SchedulingCapacity() const override; + size_t max_task_size() const override { + return shared_scheduler_queue_->max_task_size(); + } + private: explicit BasicBatchScheduler( std::unique_ptr> shared_scheduler_queue); diff --git a/tensorflow/contrib/batching/basic_batch_scheduler_test.cc b/tensorflow/contrib/batching/basic_batch_scheduler_test.cc index e020301795..187823151c 100644 --- a/tensorflow/contrib/batching/basic_batch_scheduler_test.cc +++ b/tensorflow/contrib/batching/basic_batch_scheduler_test.cc @@ -73,6 +73,7 @@ TEST(BasicBatchSchedulerTest, Basic) { std::unique_ptr> scheduler; TF_ASSERT_OK( BasicBatchScheduler::Create(options, callback, &scheduler)); + EXPECT_EQ(10, scheduler->max_task_size()); EXPECT_EQ(0, scheduler->NumEnqueuedTasks()); EXPECT_EQ(3 * 10, scheduler->SchedulingCapacity()); TF_ASSERT_OK(ScheduleTask(3, scheduler.get())); diff --git a/tensorflow/contrib/batching/batch_scheduler.h b/tensorflow/contrib/batching/batch_scheduler.h index a5072f439a..e18cf6c350 100644 --- a/tensorflow/contrib/batching/batch_scheduler.h +++ b/tensorflow/contrib/batching/batch_scheduler.h @@ -178,6 +178,10 @@ class BatchScheduler { // This method is useful for monitoring, or for guaranteeing a future slot in // the schedule (but being mindful about the caveats listed above). virtual size_t SchedulingCapacity() const = 0; + + // Returns the maximum allowed size of tasks submitted to the scheduler. (This + // is typically equal to a configured maximum batch size.) + virtual size_t max_task_size() const = 0; }; ////////// diff --git a/tensorflow/contrib/batching/shared_batch_scheduler.h b/tensorflow/contrib/batching/shared_batch_scheduler.h index 41a3f99137..1d2158062e 100644 --- a/tensorflow/contrib/batching/shared_batch_scheduler.h +++ b/tensorflow/contrib/batching/shared_batch_scheduler.h @@ -248,6 +248,9 @@ class Queue { // BatchScheduler::SchedulingCapacity(). size_t SchedulingCapacity() const; + // Returns the maximum allowed size of tasks submitted to the queue. + size_t max_task_size() const { return options_.max_batch_size; } + // Called by a thread that is ready to process a batch, to request one from // this queue. Either returns a batch that is ready to be processed, or // nullptr if the queue declines to schedule a batch at this time. If it @@ -338,6 +341,8 @@ class QueueHandle : public BatchScheduler { size_t NumEnqueuedTasks() const override; size_t SchedulingCapacity() const override; + size_t max_task_size() const override { return queue_->max_task_size(); } + private: // The scheduler that owns 'queue_'. std::shared_ptr> scheduler_; diff --git a/tensorflow/contrib/batching/shared_batch_scheduler_test.cc b/tensorflow/contrib/batching/shared_batch_scheduler_test.cc index 3e924ae5f1..3ac79a8fdc 100644 --- a/tensorflow/contrib/batching/shared_batch_scheduler_test.cc +++ b/tensorflow/contrib/batching/shared_batch_scheduler_test.cc @@ -429,6 +429,7 @@ TEST(SharedBatchSchedulerTest, ConstMethods) { queue_options.max_enqueued_batches = max_enqueued_batches; std::unique_ptr> queue; TF_ASSERT_OK(scheduler->AddQueue(queue_options, callback, &queue)); + EXPECT_EQ(2, queue->max_task_size()); EXPECT_EQ(0, queue->NumEnqueuedTasks()); EXPECT_EQ(max_enqueued_batches * 2, queue->SchedulingCapacity()); -- GitLab From cb5a63d8d2b6e049a0a128ba47560f842497db8b Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Wed, 29 Nov 2017 14:01:29 -0800 Subject: [PATCH 0963/1801] Check when session cannot run because its graph was modified With current tensorflow code, if user modifies some operation after session.run() was called, this modification will never make it to the C++ runtime and no errors will be raised leading to silent wrong results. This change adds checks for such cases when C API is enabled. We don't change the code path for C API being disabled because C API should be enabled by default soon. PiperOrigin-RevId: 177359630 --- tensorflow/c/c_api.cc | 38 +++++-- tensorflow/c/c_api_internal.h | 21 +++- tensorflow/c/python_api.cc | 10 ++ tensorflow/python/client/session_test.py | 133 +++++++++++++++++++++++ 4 files changed, 190 insertions(+), 12 deletions(-) diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index 4fb8ec8e4b..c8b4bfffd4 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -624,6 +624,23 @@ Status MessageToBuffer(const tensorflow::protobuf::Message& in, return Status::OK(); } +void RecordMutation(TF_Graph* graph, const TF_Operation& op, + const char* mutation_type) + EXCLUSIVE_LOCKS_REQUIRED(graph->mu) { + // If any session has already run this node_id, mark this session as + // unrunnable. + for (auto it : graph->sessions) { + if (it.first->last_num_graph_nodes > op.node.id()) { + it.second = FailedPrecondition( + "Operation '", op.node.DebugString(), "' was changed by ", + mutation_type, + " after it was run by a session. Nodes can be mutated " + "only before they are executed by a session. Either don't modify " + "nodes after running them or create a new session."); + } + } +} + // Helpers for loading a TensorFlow plugin (a .so file). Status LoadLibrary(const char* library_filename, void** result, const void** buf, size_t* len); @@ -1744,7 +1761,6 @@ void TF_OperationToNodeDef(TF_Operation* oper, TF_Buffer* output_node_def, TF_Graph::TF_Graph() : graph(tensorflow::OpRegistry::Global()), refiner(graph.versions().producer(), graph.op_registry()), - num_sessions(0), delete_requested(false), parent(nullptr), parent_inputs(nullptr) {} @@ -1754,7 +1770,7 @@ TF_Graph* TF_NewGraph() { return new TF_Graph; } void TF_DeleteGraph(TF_Graph* g) { g->mu.lock(); g->delete_requested = true; - const bool del = g->num_sessions == 0; + const bool del = g->sessions.empty(); g->mu.unlock(); if (del) delete g; } @@ -2324,11 +2340,12 @@ TF_Session* TF_NewSession(TF_Graph* graph, const TF_SessionOptions* opt, Session* session; status->status = NewSession(opt->options, &session); if (status->status.ok()) { + TF_Session* new_session = new TF_Session(session, graph); if (graph != nullptr) { mutex_lock l(graph->mu); - graph->num_sessions += 1; + graph->sessions[new_session] = Status::OK(); } - return new TF_Session(session, graph); + return new_session; } else { DCHECK_EQ(nullptr, session); return nullptr; @@ -2392,7 +2409,7 @@ TF_Session* TF_LoadSessionFromSavedModel( TF_Session* session = new TF_Session(bundle.session.release(), graph); - graph->num_sessions += 1; + graph->sessions[session] = Status::OK(); session->last_num_graph_nodes = graph->graph.num_node_ids(); return session; #endif // __ANDROID__ @@ -2407,8 +2424,8 @@ void TF_DeleteSession(TF_Session* s, TF_Status* status) { TF_Graph* const graph = s->graph; if (graph != nullptr) { graph->mu.lock(); - graph->num_sessions -= 1; - const bool del = graph->delete_requested && graph->num_sessions == 0; + graph->sessions.erase(s); + const bool del = graph->delete_requested && graph->sessions.empty(); graph->mu.unlock(); if (del) delete graph; } @@ -2424,6 +2441,13 @@ static bool ExtendSessionGraphHelper(TF_Session* session, TF_Status* status) { mutex_lock session_lock(session->mu); session->graph->mu.lock(); const Graph& graph = session->graph->graph; + + status->status = session->graph->sessions[session]; + if (!status->status.ok()) { + session->graph->mu.unlock(); + return false; + } + const auto num_nodes = graph.num_node_ids(); if (session->last_num_graph_nodes < num_nodes) { status->status = tensorflow::ValidateNoCycles(session->graph->graph); diff --git a/tensorflow/c/c_api_internal.h b/tensorflow/c/c_api_internal.h index bb04e01bee..aac333d9e2 100644 --- a/tensorflow/c/c_api_internal.h +++ b/tensorflow/c/c_api_internal.h @@ -81,12 +81,20 @@ struct TF_Graph { std::unordered_map name_map GUARDED_BY(mu); - // TF_Graph may only / must be deleted when - // num_sessions == 0 && delete_requested == true - - // num_sessions incremented by TF_NewSession, and decremented by + // The keys of this map are all the active sessions using this graph. + // Each value is the current "runnability" status of the corresponding + // session. Under normal conditions all statuses are Status::OK(), but + // if some operation is mutated after it was run by a session (this + // is detected in RecordMutation function), that session is no longer + // safe to run. Its status will contain the error that will be returned + // to the user, should she try running this session. + // + // Sessions are added to this map in TF_NewSession, and removed in // TF_DeleteSession. - int num_sessions GUARDED_BY(mu); + // TF_Graph may only / must be deleted when + // sessions.size() == 0 && delete_requested == true + tensorflow::gtl::FlatMap sessions + GUARDED_BY(mu); bool delete_requested GUARDED_BY(mu); // set true by TF_DeleteGraph // Used to link graphs contained in TF_WhileParams to the parent graph that @@ -167,6 +175,9 @@ TF_Tensor* TF_TensorFromTensor(const Tensor& src, TF_Status* status); Status MessageToBuffer(const tensorflow::protobuf::Message& in, TF_Buffer* out); +void RecordMutation(TF_Graph* graph, const TF_Operation& op, + const char* mutation_type); + } // end namespace tensorflow #endif // TENSORFLOW_C_C_API_INTERNAL_H_ diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc index ba5a9268b4..37629a74ba 100644 --- a/tensorflow/c/python_api.cc +++ b/tensorflow/c/python_api.cc @@ -22,6 +22,7 @@ namespace tensorflow { void AddControlInput(TF_Graph* graph, TF_Operation* op, TF_Operation* input) { mutex_lock l(graph->mu); graph->graph.AddControlEdge(&input->node, &op->node); + RecordMutation(graph, *op, "adding control input"); } void SetAttr(TF_Graph* graph, TF_Operation* op, const char* attr_name, @@ -36,11 +37,13 @@ void SetAttr(TF_Graph* graph, TF_Operation* op, const char* attr_name, mutex_lock l(graph->mu); op->node.AddAttr(attr_name, attr_val); + RecordMutation(graph, *op, "setting attribute"); } void SetRequestedDevice(TF_Graph* graph, TF_Operation* op, const char* device) { mutex_lock l(graph->mu); op->node.set_requested_device(device); + RecordMutation(graph, *op, "setting device"); } void UpdateEdge(TF_Graph* graph, TF_Output new_src, TF_Input dst, @@ -75,6 +78,13 @@ void UpdateEdge(TF_Graph* graph, TF_Output new_src, TF_Input dst, } status->status = graph->graph.UpdateEdge(&new_src.oper->node, new_src.index, &dst.oper->node, dst.index); + + if (status->status.ok()) { + // This modification only updates the destination node for + // the purposes of running this graph in a session. Thus, we don't + // record the source node as being modified. + RecordMutation(graph, *dst.oper, "updating input tensor"); + } } } // namespace tensorflow diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py index f4b0271195..e4545d287b 100644 --- a/tensorflow/python/client/session_test.py +++ b/tensorflow/python/client/session_test.py @@ -28,6 +28,8 @@ import numpy as np import six from six.moves import xrange # pylint: disable=redefined-builtin +from tensorflow.core.framework import attr_value_pb2 +from tensorflow.core.framework import types_pb2 from tensorflow.core.lib.core import error_codes_pb2 from tensorflow.core.protobuf import config_pb2 from tensorflow.core.protobuf import rewriter_config_pb2 @@ -1742,5 +1744,136 @@ class SessionTest(test_util.TensorFlowTestCase): self.runTestAddFunctionToSession(server.target) +class GraphMutationTest(test_util.TensorFlowTestCase): + + def testUpdateInputAfterRunning(self): + with ops.Graph().as_default() as g: + a = constant_op.constant(1.0) + b = constant_op.constant(2.0) + c = a + b + + with session.Session(graph=g) as sess: + self.assertAllEqual(3.0, sess.run(c)) + c.op._update_input(1, a) # pylint: disable=protected-access + with self.assertRaisesRegexp( + errors.FailedPreconditionError, + 'add.*was changed by updating input tensor after it was run'): + sess.run(c) + + # Check that running the graph with a new session is fine + with session.Session(graph=g) as sess2: + self.assertAllEqual(2.0, sess2.run(c)) + + def testSetDeviceAfterRunning(self): + with ops.Graph().as_default() as g: + a = constant_op.constant(1.0) + b = constant_op.constant(2.0) + c = a + b + + with session.Session(graph=g) as sess: + self.assertAllEqual(3.0, sess.run(c)) + c.op._set_device('/cpu:0') # pylint: disable=protected-access + with self.assertRaisesRegexp( + errors.FailedPreconditionError, + 'add.*was changed by setting device after it was run'): + sess.run(c) + + def testSetAttrAfterRunning(self): + with ops.Graph().as_default() as g: + a = constant_op.constant(1.0, dtype=dtypes.float32) + b = math_ops.cast(a, dtypes.float64) + + with session.Session(graph=g) as sess: + self.assertAllEqual(1.0, sess.run(b)) + b.op._set_attr('DstT', + attr_value_pb2.AttrValue(type=types_pb2.DT_FLOAT)) + with self.assertRaisesRegexp( + errors.FailedPreconditionError, + 'Cast.*was changed by setting attribute after it was run'): + sess.run(b) + + def testRunModifyRun(self): + with ops.Graph().as_default() as g: + a = constant_op.constant(1.0) + b = constant_op.constant(2.0) + c = a + b + + with session.Session(graph=g) as sess: + self.assertAllEqual(3.0, sess.run(c)) + + d = b + c + d.op._update_input(0, a) # pylint: disable=protected-access + self.assertAllEqual(3.0, sess.run(c)) + self.assertAllEqual(4.0, sess.run(d)) + + def testRunModifyRunTwoSessions(self): + with ops.Graph().as_default() as g: + a = constant_op.constant(1.0) + b = constant_op.constant(2.0) + c = a + b + + with session.Session(graph=g) as sess1: + with session.Session(graph=g) as sess2: + self.assertAllEqual(3.0, sess1.run(c)) + self.assertAllEqual(3.0, sess2.run(c)) + + d = b + c + d.op._update_input(0, a) # pylint: disable=protected-access + self.assertAllEqual(3.0, sess2.run(c)) + self.assertAllEqual(4.0, sess2.run(d)) + + d.op._update_input(0, b) # pylint: disable=protected-access + self.assertAllEqual(3.0, sess1.run(c)) + self.assertAllEqual(5.0, sess1.run(d)) + + with self.assertRaisesRegexp( + errors.FailedPreconditionError, + 'add.*was changed by updating input tensor after it was run'): + sess2.run(c) + + def testTwoSessionsOneRunBeforeModification(self): + with ops.Graph().as_default() as g, ops.device('/cpu:0'): + a = constant_op.constant(1.0) + b = constant_op.constant(2.0) + c = a + b + + with session.Session(graph=g) as sess1: + with session.Session(graph=g) as sess2: + sess1.run(c) + + c.op._set_device('/cpu:0') # pylint: disable=protected-access + + with self.assertRaisesRegexp( + errors.FailedPreconditionError, + 'add.*was changed by setting device after it was run'): + sess1.run(c) + + # sess2 was not run before modification + self.assertAllEqual(3.0, sess2.run(c)) + + def testTwoSessionsBothRunBeforeModification(self): + with ops.Graph().as_default() as g, ops.device('/cpu:0'): + a = constant_op.constant(1.0) + b = constant_op.constant(2.0) + c = a + b + + with session.Session(graph=g) as sess1: + with session.Session(graph=g) as sess2: + sess1.run(c) + sess2.run(c) + + c.op._set_device('/cpu:0') # pylint: disable=protected-access + + with self.assertRaisesRegexp( + errors.FailedPreconditionError, + 'add.*was changed by setting device after it was run'): + sess1.run(c) + + with self.assertRaisesRegexp( + errors.FailedPreconditionError, + 'add.*was changed by setting device after it was run'): + sess2.run(c) + + if __name__ == '__main__': googletest.main() -- GitLab From d0d85965f3dc92a1572bd0853526c657395dff99 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Nov 2017 14:21:53 -0800 Subject: [PATCH 0964/1801] Add R1 slice tests. PiperOrigin-RevId: 177362829 --- tensorflow/compiler/xla/tests/slice_test.cc | 88 +++++++++++++++------ 1 file changed, 65 insertions(+), 23 deletions(-) diff --git a/tensorflow/compiler/xla/tests/slice_test.cc b/tensorflow/compiler/xla/tests/slice_test.cc index c21124750a..981d075089 100644 --- a/tensorflow/compiler/xla/tests/slice_test.cc +++ b/tensorflow/compiler/xla/tests/slice_test.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/compiler/xla/tests/literal_test_util.h" #include "tensorflow/compiler/xla/tests/test_macros.h" #include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/types.h" @@ -211,6 +212,13 @@ class SliceR1Test : public ClientLibraryTestBase, } }; +string SliceR1TestDataToString(const ::testing::TestParamInfo& data) { + const R1Spec& spec = data.param; + return ::tensorflow::strings::Printf("%lld_%lld_%lld_%lld", spec.input_dim0, + spec.slice_start, spec.slice_limit, + spec.slice_stride); +} + XLA_TEST_P(SliceR1Test, DoIt_F32) { Run(GetParam()); } XLA_TEST_P(SliceR1Test, DoIt_F64) { Run(GetParam()); } @@ -223,30 +231,64 @@ XLA_TEST_P(SliceR1Test, DoIt_U64) { Run(GetParam()); } XLA_TEST_P(SliceR1Test, DoIt_S64) { Run(GetParam()); } -INSTANTIATE_TEST_CASE_P( // - SliceR1TestInstantiation, // - SliceR1Test, // - ::testing::Values( // - R1Spec{10, 0, 0, 1}, // - R1Spec{10, 7, 7, 1}, // - R1Spec{10, 2, 4, 1}, // - R1Spec{10, 2, 4, 2}, // - R1Spec{10, 0, 10, 1}, // - R1Spec{1024, 1024 - 4, 1024, 1}, // - R1Spec{4096, 7, 7 + 1024, 1}, // - R1Spec{10, 0, 10, 2}, // - R1Spec{10, 0, 10, 3}, // - R1Spec{10, 0, 10, 4}, // - R1Spec{10, 0, 10, 5}, // - R1Spec{10, 0, 10, 10}, // - R1Spec{500, 200, 400, 7}, // - R1Spec{4096, 1, 4095, 3}, // - R1Spec{2047, 1024 - 24, 1024 + 160, 31}, // - R1Spec{2047, 1, 2046, 3 * 128}, // - R1Spec{4096, 1024 + 3, 4095, 500}, // - R1Spec{8192, 0, 8192, 1024 * 3 + 400} // - ) // +// Tests for R1 slice ops. +// The format for each testcase is {input size, start, limit, stride}. +// clang-format off +INSTANTIATE_TEST_CASE_P( + SliceR1TestInstantiation, + SliceR1Test, + ::testing::Values( + R1Spec{10, 0, 0, 1}, + R1Spec{10, 7, 7, 1}, + R1Spec{10, 0, 5, 1}, + R1Spec{10, 3, 5, 1}, + R1Spec{10, 0, 10, 1}, + R1Spec{1024, 0, 5, 1}, + R1Spec{1024, 3, 5, 1}, + R1Spec{1024 + 17, 0, 5, 1}, + R1Spec{1024 + 17, 3, 5, 1}, + R1Spec{1024 + 17, 1024, 1024 + 6, 1}, + R1Spec{1024 + 17, 1024 + 1, 1024 + 6, 1}, + R1Spec{1024, 1024 - 4, 1024, 1}, + R1Spec{4 * 1024, 7, 7 + 1024, 1}, + R1Spec{4 * 1024, 0, 4 * 1024, 1}, + R1Spec{4 * 1024, 1, 4 * 1024 - 1, 1}, + R1Spec{4 * 1024, 1024, 3 * 1024, 1}, + R1Spec{4 * 1024, 1024 + 1, 3 * 1024 - 1, 1}, + R1Spec{16 * 1024, 0, 5, 1}, + R1Spec{16 * 1024, 3, 5, 1}, + R1Spec{16 * 1024 + 17, 0, 5, 1}, + R1Spec{16 * 1024 + 17, 3, 5, 1}, + R1Spec{16 * 1024 + 17, 16 * 1024, 16 * 1024 + 6, 1}, + R1Spec{16 * 1024 + 17, 16 * 1024 + 1, 16 * 1024 + 6, 1}, + R1Spec{64 * 1024, 0, 64 * 1024, 1}, + R1Spec{64 * 1024, 1, 64 * 1024 - 1, 1}, + R1Spec{64 * 1024, 1024, 63 * 1024, 1}, + R1Spec{64 * 1024, 1024 + 1, 63 * 1024 - 1, 1}, + R1Spec{64 * 1024, 32 * 1024, 33 * 1024, 1}, + R1Spec{64 * 1024, 32 * 1024 + 1, 33 * 1024 - 1, 1}, +// TODO(b/69425338): This uses too much memory on GPU. +#ifndef XLA_TEST_BACKEND_GPU + R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024, 12 * 1024 * 1024, 1}, + R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 + 1, 12 * 1024 * 1024 - 1, 1}, + R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 - 1, 12 * 1024 * 1024 + 1, 1}, +#endif + R1Spec{10, 2, 4, 2}, + R1Spec{10, 0, 10, 2}, + R1Spec{10, 0, 10, 3}, + R1Spec{10, 0, 10, 4}, + R1Spec{10, 0, 10, 5}, + R1Spec{10, 0, 10, 10}, + R1Spec{500, 200, 400, 7}, + R1Spec{4096, 1, 4095, 3}, + R1Spec{2047, 1024 - 24, 1024 + 160, 31}, + R1Spec{2047, 1, 2046, 3 * 128}, + R1Spec{4096, 1024 + 3, 4095, 500}, + R1Spec{8192, 0, 8192, 1024 * 3 + 400} + ), + SliceR1TestDataToString ); +// clang-format on struct R2Spec { int64 input_dim0; -- GitLab From aeba52380f1b3bdf2ff9bd2256129e209bab08ca Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Wed, 29 Nov 2017 14:48:23 -0800 Subject: [PATCH 0965/1801] Updating references to the `tf.data` API to `tf.data` from `Datasets`. PiperOrigin-RevId: 177367024 --- tensorflow/docs_src/programmers_guide/datasets.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/datasets.md b/tensorflow/docs_src/programmers_guide/datasets.md index f458cbcef2..073bdb7baa 100644 --- a/tensorflow/docs_src/programmers_guide/datasets.md +++ b/tensorflow/docs_src/programmers_guide/datasets.md @@ -1,16 +1,16 @@ # Importing Data -The @{tf.data.Dataset$`Dataset`} API enables you to build complex input pipelines from +The `tf.data` API enables you to build complex input pipelines from simple, reusable pieces. For example, the pipeline for an image model might aggregate data from files in a distributed file system, apply random perturbations to each image, and merge randomly selected images into a batch for training. The pipeline for a text model might involve extracting symbols from raw text data, converting them to embedding identifiers with a lookup -table, and batching together sequences of different lengths. The `Dataset` API +table, and batching together sequences of different lengths. The `tf.data` API makes it easy to deal with large amounts of data, different data formats, and complicated transformations. -The `Dataset` API introduces two new abstractions to TensorFlow: +The `tf.data` API introduces two new abstractions to TensorFlow: * A `tf.data.Dataset` represents a sequence of elements, in which each element contains one or more `Tensor` objects. For example, in an image @@ -121,7 +121,7 @@ dataset3 = dataset3.filter(lambda x, (y, z): ...) ### Creating an iterator Once you have built a `Dataset` to represent your input data, the next step is to -create an `Iterator` to access elements from that dataset. The `Dataset` API +create an `Iterator` to access elements from that dataset. The `tf.data` API currently supports the following iterators, in increasing level of sophistication: @@ -379,7 +379,7 @@ sess.run(iterator.initializer, feed_dict={features_placeholder: features, ### Consuming TFRecord data -The `Dataset` API supports a variety of file formats so that you can process +The `tf.data` API supports a variety of file formats so that you can process large datasets that do not fit in memory. For example, the TFRecord file format is a simple record-oriented binary format that many TensorFlow applications use for training data. The `tf.data.TFRecordDataset` class enables you to @@ -628,7 +628,7 @@ TODO(mrry): Add this section. ### Processing multiple epochs -The `Dataset` API offers two main ways to process multiple epochs of the same +The `tf.data` API offers two main ways to process multiple epochs of the same data. The simplest way to iterate over a dataset in multiple epochs is to use the @@ -693,7 +693,7 @@ dataset = dataset.repeat() The @{tf.train.MonitoredTrainingSession} API simplifies many aspects of running TensorFlow in a distributed setting. `MonitoredTrainingSession` uses the @{tf.errors.OutOfRangeError} to signal that training has completed, so to use it -with the `Dataset` API, we recommend using +with the `tf.data` API, we recommend using `Dataset.make_one_shot_iterator()`. For example: ```python -- GitLab From 32b861d3d4f920b46954a2e02aee1fbf46a81c63 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 29 Nov 2017 15:17:06 -0800 Subject: [PATCH 0966/1801] Automated g4 rollback of changelist 177353959 PiperOrigin-RevId: 177371177 --- tensorflow/python/client/tf_session.i | 43 -------------- tensorflow/python/client/tf_session_helper.cc | 19 ------- tensorflow/python/client/tf_session_helper.h | 14 ----- tensorflow/python/framework/ops.py | 57 ++++++------------- 4 files changed, 16 insertions(+), 117 deletions(-) diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i index c286d5fe47..5fa1a7e8fc 100644 --- a/tensorflow/python/client/tf_session.i +++ b/tensorflow/python/client/tf_session.i @@ -532,49 +532,6 @@ def TF_Reset(target, containers=None, config=None): %unignore TF_GraphGetTensorShapeHelper; %ignore TF_GraphGetTensorShape; -// We use TF_GraphSetTensorShape_wrapper instead of -// TF_GraphSetTensorShape -%ignore TF_GraphSetTensorShape; -%unignore tensorflow; -%unignore TF_GraphSetTensorShape_wrapper; - -// $input is a Python list of ints to a vector for TF_GraphSetTensorShape_wrapper -%typemap(in) (const std::vector& dims) - (std::vector dims_local){ - if ($input != Py_None) { - if (!PyList_Check($input)) { - SWIG_exception_fail(SWIG_TypeError, tensorflow::strings::Printf( - "$symname: expected list but got %s ", Py_TYPE($input)->tp_name).c_str()); - } - size_t size = PyList_Size($input); - for (int i = 0; i < size; ++i) { - PyObject* item = PyList_GetItem($input, i); - dims_local.push_back(PyInt_AS_LONG(item)); - } - $1 = &dims_local; - } else { - $1 = nullptr; - } -} - -// We use TF_GraphGetTensorShape_wrapper instead of -// TF_GraphGetTensorShape -%ignore TF_GraphGetTensorShape; -%unignore tensorflow; -%unignore TF_GraphGetTensorShape_wrapper; - -// Build a Python list of ints and return it. -%typemap(out) std::vector tensorflow::TF_GraphGetTensorShape_wrapper { - $result = PyList_New($1.size()); - if (!$result) { - SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create list"); - } - - for (size_t i = 0; i < $1.size(); ++i) { - PyList_SET_ITEM($result, i, PyInt_FromLong($1[i])); - } -} - %include "tensorflow/python/client/tf_session_helper.h" %unignoreall diff --git a/tensorflow/python/client/tf_session_helper.cc b/tensorflow/python/client/tf_session_helper.cc index e4bf09a0ca..ad982e5dd8 100644 --- a/tensorflow/python/client/tf_session_helper.cc +++ b/tensorflow/python/client/tf_session_helper.cc @@ -407,23 +407,4 @@ TF_Function* TF_GraphToFunction_wrapper( opts, description, out_status); } -void TF_GraphSetTensorShape_wrapper(TF_Graph* graph, TF_Output output, - const std::vector& dims, - bool unknown_shape, TF_Status* status) { - if (unknown_shape) { - TF_GraphSetTensorShape(graph, output, nullptr, -1, status); - return; - } - TF_GraphSetTensorShape(graph, output, dims.data(), dims.size(), status); -} - -std::vector TF_GraphGetTensorShape_wrapper(TF_Graph* graph, - TF_Output output, - int num_dims, - TF_Status* status) { - std::vector dims(num_dims); - TF_GraphGetTensorShape(graph, output, dims.data(), num_dims, status); - return dims; -} - } // namespace tensorflow diff --git a/tensorflow/python/client/tf_session_helper.h b/tensorflow/python/client/tf_session_helper.h index bb7171db31..6ed08d3a58 100644 --- a/tensorflow/python/client/tf_session_helper.h +++ b/tensorflow/python/client/tf_session_helper.h @@ -168,20 +168,6 @@ TF_Function* TF_GraphToFunction_wrapper( const std::vector& inputs, const std::vector& outputs, const NameVector& output_names, const TF_FunctionOptions* opts, const char* description, TF_Status* out_status); - -// Set the shape of output. If unknown is true, `num_dims` must be set to -// -1 and `dims` is set to nullptr. -void TF_GraphSetTensorShape_wrapper(TF_Graph* graph, TF_Output output, - const std::vector& dims, - bool unknown_shape, TF_Status* status); - -// Return the shape of output. `num_dims` should be the output of -// TF_GraphGetTensorNumDims. If `num_dims = -1`, this should not be called. -std::vector TF_GraphGetTensorShape_wrapper(TF_Graph* graph, - TF_Output output, - int num_dims, - TF_Status* status); - } // namespace tensorflow #endif // TENSORFLOW_PYTHON_CLIENT_TF_SESSION_HELPER_H_ diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 975a1c87ec..2217513966 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -374,19 +374,6 @@ class Tensor(_TensorLike): A `TensorShape` representing the shape of this tensor. """ - if _USE_C_API: - graph = self._op._graph._c_graph # pylint: disable=protected-access - with errors.raise_exception_on_not_ok_status() as status: - num_dims = c_api.TF_GraphGetTensorNumDims(graph, self._as_tf_output(), - status) - if num_dims == -1: - dim_list = None - else: - with errors.raise_exception_on_not_ok_status() as status: - dim_list = c_api.TF_GraphGetTensorShape_wrapper( - graph, self._as_tf_output(), num_dims, status) - dim_list = [None if i == -1 else i for i in dim_list] - return tensor_shape.TensorShape(dim_list) return self._shape def __iter__(self): @@ -406,8 +393,8 @@ class Tensor(_TensorLike): yield self[i] def _shape_as_list(self): - if self.shape.ndims is not None: - return [dim.value for dim in self.shape.dims] + if self._shape.ndims is not None: + return [dim.value for dim in self._shape.dims] else: return None @@ -423,7 +410,7 @@ class Tensor(_TensorLike): Returns: Integer rank or None """ - return self.shape.ndims + return self._shape.ndims def get_shape(self): """Alias of Tensor.shape.""" @@ -454,35 +441,14 @@ class Tensor(_TensorLike): ``` Args: - shape: A `TensorShape` representing the shape of this tensor, a - `TensorShapeProto`, a list, a tuple, or None. + shape: A `TensorShape` representing the shape of this tensor. Raises: ValueError: If `shape` is not compatible with the current shape of this tensor. """ - if not _USE_C_API: - self._shape = self._shape.merge_with(shape) # pylint: disable=protected-access - return - if not isinstance(shape, tensor_shape.TensorShape): - shape = tensor_shape.TensorShape(shape) - dim_list = [] - if shape.dims is None: - unknown_shape = True - else: - unknown_shape = False - for dim in shape.dims: - if dim.value is None: - dim_list.append(-1) - else: - dim_list.append(dim.value) - with errors.raise_exception_on_not_ok_status() as status: - c_api.TF_GraphSetTensorShape_wrapper( - self._op._graph._c_graph, # pylint: disable=protected-access - self._as_tf_output(), - dim_list, - unknown_shape, - status) + # TODO(skyewm): call C API + self._shape = self._shape.merge_with(shape) @property def value_index(self): @@ -4551,11 +4517,15 @@ def control_dependencies(control_inputs): See @{tf.Graph.control_dependencies} for more details. + When eager execution is enabled, any callable object in the `control_inputs` + list will be called. + Args: control_inputs: A list of `Operation` or `Tensor` objects which must be executed or computed before running the operations defined in the context. Can also be `None` to clear the control - dependencies. + dependencies. If eager execution is enabled, any callable object in the + `control_inputs` list will be called. Returns: A context manager that specifies control dependencies for all @@ -4564,6 +4534,11 @@ def control_dependencies(control_inputs): if context.in_graph_mode(): return get_default_graph().control_dependencies(control_inputs) else: + if control_inputs: + # Excute any pending callables. + for control in control_inputs: + if callable(control): + control() return _NullContextmanager() -- GitLab From 963a521e255d2a189e349fc5c24ebc2bc032be5b Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 29 Nov 2017 15:46:42 -0800 Subject: [PATCH 0967/1801] Using the C API in eager mode for graph functions. PiperOrigin-RevId: 177375237 --- tensorflow/python/eager/backprop.py | 2 +- tensorflow/python/eager/context.py | 15 ++ tensorflow/python/eager/function.py | 144 ++++++++++++------ tensorflow/python/eager/graph_callable.py | 18 ++- .../python/eager/graph_callable_test.py | 1 - tensorflow/python/framework/ops.py | 30 ++-- tensorflow/python/pywrap_tfe.i | 3 +- 7 files changed, 143 insertions(+), 70 deletions(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 0144f3b1e5..dc1142705a 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -540,7 +540,7 @@ def _ensure_unique_tensor_objects(parameter_positions, args): if i in parameter_positions: tid = ops.tensor_id(t) if tid in s: - args[i] = args[i]._dup() # pylint: disable=protected-access + args[i] = gen_array_ops.identity(args[i]) else: s.add(tid) return args diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index 92f4e15c05..415416cfae 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -288,6 +288,21 @@ class Context(object): self._initialize_handle_and_devices() return self._num_gpus + def add_function(self, fn): + """Add a function definition to the context. + + Once added, the function (identified by its name) can be executed like any + other operation. + + Args: + fn: A wrapped TF_Function (returned from TF_GraphToFunction_wrapper). + """ + with errors.raise_exception_on_not_ok_status() as status: + pywrap_tensorflow.TFE_ContextAddFunction( + self._handle, # pylint: disable=protected-access + fn, + status) + def add_function_def(self, fdef): """Add a function definition to the context. diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 2f4b59e938..092b36ff20 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -25,15 +25,19 @@ import threading import numpy as np +from tensorflow.core.framework import function_pb2 +from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import context from tensorflow.python.eager import execute from tensorflow.python.eager import tape from tensorflow.python.eager.graph_only_ops import graph_placeholder +from tensorflow.python.framework import c_api_util from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes as dtypes_module -from tensorflow.python.framework import graph_to_function_def +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.ops import gradients_impl +from tensorflow.python.util import compat from tensorflow.python.util import nest from tensorflow.python.util import tf_decorator @@ -47,10 +51,41 @@ _scoped_captures = threading.local() _scoped_captures.tensors = None -def make_function_def(graph, operations, inputs, outputs): - """Makes function def from the given graph with the operations.""" - return graph_to_function_def.graph_to_function_def( - graph, operations, inputs, outputs) +def make_function_def(name, graph, operations, inputs, outputs): + """Makes FunctionDef proto and defined function. + + Args: + name: the function name + graph: the graph from which to build the function + operations: the operations in the function body + inputs: tensors to be used as function arguments + outputs: tensors to be returned from the function + + Returns: + fdef: a FunctionDef protocol buffer for the function + fn: a wrapped TF_Function for the function + """ + with errors.raise_exception_on_not_ok_status() as status: + fn = pywrap_tensorflow.TF_GraphToFunction_wrapper( + graph._c_graph, # pylint: disable=protected-access + compat.as_text(name), + False, + [o._c_op for o in operations], # pylint: disable=protected-access + [t._as_tf_output() for t in inputs], # pylint: disable=protected-access + [t._as_tf_output() for t in outputs], # pylint: disable=protected-access + [compat.as_text("%s" % i) for i in range(len(outputs))], + None, + compat.as_text(""), + status) + # TODO(apassos) avoid creating a FunctionDef (specially to grab the signature, + # but also in general it's nice not to depend on it. + with c_api_util.tf_buffer() as buffer_: + with errors.raise_exception_on_not_ok_status() as status: + pywrap_tensorflow.TF_FunctionToFunctionDef(fn, buffer_, status) + proto_data = pywrap_tensorflow.TF_GetBuffer(buffer_) + fdef = function_pb2.FunctionDef() + fdef.ParseFromString(compat.as_bytes(proto_data)) + return fdef, fn @contextlib.contextmanager @@ -115,6 +150,10 @@ class CapturingGraph(ops.Graph): # for resource tensors. self._last_op_using_resource_tensor = {} + # TODO(apassos) remove once the C API is used by default. + def _use_c_api_hack(self): + return True + def clear_resource_control_flow_state(self): self._last_op_using_resource_tensor = {} @@ -207,14 +246,20 @@ def _inference_name(n): return "__inference_%s_%s" % (n, ops.uid()) +# TODO(apassos) get rid of this by splitting framework.function._DefinedFunction +# so it doesn't have the definition-generating logic and is just a container for +# an already-defined function. class _DefinedFunction(object): """Mocks the interface of tf _DefinedFunction.""" - def __init__(self, fdef): + def __init__(self, fdef, fn): self.definition = fdef self.name = fdef.signature.name + self.signature = fdef.signature self.grad_func_name = None self.python_grad_func = None + self._c_func = fn + self._grad_func = None def _map_sequence_obj_to_idx(sequence): @@ -250,6 +295,7 @@ class GraphModeFunction(object): input_placeholders, extra_inputs, fdef, + fn, graph, operations, func_outputs, @@ -263,7 +309,7 @@ class GraphModeFunction(object): self._graph = graph self._has_backprop = False self._func_name = fdef.signature.name - self._fdef = _DefinedFunction(fdef) + self._fdef = _DefinedFunction(fdef, fn) self._num_outputs = len(fdef.signature.output_arg) self._ops = operations self._func_outputs = func_outputs @@ -283,38 +329,45 @@ class GraphModeFunction(object): with self._graph.as_default(), context.graph_mode(): c = _CapturingContext() with c: - filtered_outputs = [ - x for x in self._returns if x is not None - ] + filtered_outputs = [x for x in self._returns if x is not None] self._out_grad_placeholders = [ - graph_placeholder(x.dtype, x.shape) for x in filtered_outputs - ] + graph_placeholder(x.dtype, x.shape) for x in filtered_outputs] in_gradients = gradients_impl.gradients( filtered_outputs, self._input_placeholders, grad_ys=self._out_grad_placeholders) - shapes = [x.shape for x in in_gradients if x is not None] + shapes = tuple(x.shape for x in in_gradients if x is not None) captures = list(sorted(c.captured_tensors, key=lambda x: x.name)) - forward_function_def = make_function_def( - self._graph, self._ops, self._input_placeholders, + forward_name = _forward_name(self._func_name) + forward_function_def, forward_fn = make_function_def( + forward_name, self._graph, self._ops, self._input_placeholders, filtered_outputs + captures) - self._forward_fdef = _DefinedFunction(forward_function_def) - _register_with_name(_forward_name(self._func_name), forward_function_def) - backward_outputs = [x for x in in_gradients if x is not None] + self._forward_fdef = _DefinedFunction(forward_function_def, forward_fn) + _register(forward_fn) + backward_outputs = tuple(x for x in in_gradients if x is not None) all_inputs = self._out_grad_placeholders + captures - backward_function_def = make_function_def( - self._graph, [x.op for x in self._out_grad_placeholders - ] + list(sorted(c.known_ops, key=lambda x: x.name)), + # Excluding input ops from the body as we do not intend to execute these + # operations when the function is executed. + all_ignored_ops = frozenset(x.op for x in all_inputs) + # Enforce a deterministic order of operations in the generated graph. This + # means rerunning the function-defining code will always define the same + # function, which is useful if we serialize this etc. + fdef_ops = tuple(x for x in sorted(c.known_ops, key=lambda x: x.name) + if x not in all_ignored_ops) + bname = _backward_name(self._func_name) + backward_function_def, backward_fn = make_function_def( + bname, self._graph, fdef_ops, all_inputs, backward_outputs) - _register_with_name(_backward_name(self._func_name), backward_function_def) + _register(backward_fn) self._backward_function = GraphModeFunction( - all_inputs, [], backward_function_def, self._graph, c.known_ops, - in_gradients, _map_sequence_obj_to_idx(backward_outputs), shapes) + all_inputs, [], backward_function_def, backward_fn, self._graph, + c.known_ops, in_gradients, _map_sequence_obj_to_idx(backward_outputs), + shapes) def _backprop_call(self, args): """Calls the wrapped function and records the result on a tape.""" all_args = args + self._extra_inputs - signature = self._forward_fdef.definition.signature + signature = self._forward_fdef.signature ctx = context.context() if ctx.in_graph_mode(): g = ops.get_default_graph() @@ -325,7 +378,7 @@ class GraphModeFunction(object): return ops.internal_convert_to_tensor(x, ctx=ctx) op = g.create_op( signature.name, [make_tensor(x) for x in all_args], - [dtypes_module.DType(x.type) for x in signature.output_arg], + tuple(dtypes_module.DType(x.type) for x in signature.output_arg), op_def=signature, name="FunctionCall", compute_shapes=False) @@ -361,11 +414,8 @@ class GraphModeFunction(object): if v._trainable: # pylint: disable=protected-access tape.watch_variable(v) - tensor_inputs = [ - x for x in nest.flatten(args) - if isinstance(x, ops.Tensor) - ] - + tensor_inputs = [x for x in nest.flatten(args) + if isinstance(x, ops.Tensor)] if tape.should_record(tensor_inputs) or tape.should_record( self._extra_inputs): if not self._has_backprop: @@ -384,7 +434,7 @@ class GraphModeFunction(object): args = list(tensor_inputs) + self._extra_inputs op = g.create_op( signature.name, [ops.convert_to_tensor(x) for x in args], - [dtypes_module.DType(x.type) for x in signature.output_arg], + tuple(dtypes_module.DType(x.type) for x in signature.output_arg), op_def=signature, name="FunctionCall", compute_shapes=False) @@ -469,29 +519,32 @@ def _defun_internal(name, func, args, kwds): extra_inputs = [] extra_placeholders = [] outputs_list = nest.flatten(func_outputs) - output_shapes = [x.shape for x in outputs_list if x is not None] + output_shapes = tuple(x.shape for x in outputs_list if x is not None) - flat_inputs = [ - x for x in nest.flatten(func_inputs) if isinstance(x, ops.Tensor) - ] + flat_inputs = [x for x in nest.flatten(func_inputs) + if isinstance(x, ops.Tensor)] all_inputs = flat_inputs + list(extra_placeholders) - + all_ignored_ops = frozenset(x.op for x in all_inputs) func_def_outputs = [x for x in outputs_list if x is not None] - inference_function_def = make_function_def( - tmp_graph, tmp_graph.get_operations(), all_inputs, func_def_outputs) + fname = _inference_name(name) + operations = tuple(x for x in tmp_graph.get_operations() + if x not in all_ignored_ops) + inference_function_def, fn = make_function_def( + fname, tmp_graph, operations, all_inputs, func_def_outputs) # Register any other functions defined in the graph # TODO(ashankar): Oh lord, forgive me for this lint travesty. for f in tmp_graph._functions.values(): # pylint: disable=protected-access # TODO(ashankar): What about the gradient registry? - _register_with_name(f.name, f.definition) - _register_with_name(_inference_name(name), inference_function_def) + _register(f._c_func) # pylint: disable=protected-access + _register(fn) return GraphModeFunction( all_inputs, extra_inputs, inference_function_def, + fn, tmp_graph, - tmp_graph.get_operations(), + operations, func_outputs, _map_sequence_obj_to_idx(func_def_outputs), output_shapes, @@ -517,10 +570,9 @@ def _cache_key(x): return x -def _register_with_name(name, fdef): - """Registers the function `fdef` with the name `name`.""" - fdef.signature.name = name - context.context().add_function_def(fdef) +def _register(fn): + """Registers the function `fn`.""" + context.context().add_function(fn) # TODO(apassos): better error messages for non-hashable arguments. diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py index faf0ac88bc..3da100d800 100644 --- a/tensorflow/python/eager/graph_callable.py +++ b/tensorflow/python/eager/graph_callable.py @@ -318,7 +318,9 @@ def _graph_callable_internal(func, shape_and_dtypes): placeholder_inputs = flat_inputs+ list(extra_placeholders) func_def_outputs = [x for x in outputs_list if isinstance(x, tf_ops.Tensor)] - initializer_function_def = function.make_function_def( + initialization_name = function._inference_name(func.__name__) # pylint: disable=protected-access + initializer_function_def, initializer_fn = function.make_function_def( + initialization_name, tmp_graph, initializing_operations, placeholder_inputs, @@ -327,13 +329,13 @@ def _graph_callable_internal(func, shape_and_dtypes): # Also, what about the gradient registry of these functions? Those need to be # addressed as well. for f in tmp_graph._functions.values(): # pylint: disable=protected-access - function._register_with_name(f.name, f.definition) # pylint: disable=protected-access - function._register_with_name(function._inference_name(func.__name__), # pylint: disable=protected-access - initializer_function_def) + function._register(f._c_func) # pylint: disable=protected-access + function._register(initializer_fn) # pylint: disable=protected-access initializer_function = function.GraphModeFunction( placeholder_inputs, extra_inputs, initializer_function_def, + initializer_fn, tmp_graph, initializing_operations, func_outputs, @@ -342,18 +344,20 @@ def _graph_callable_internal(func, shape_and_dtypes): capture_func_def_outputs = [ x for x in captured_outlist if isinstance(x, tf_ops.Tensor)] - captured_function_def = function.make_function_def( + captured_function_name = function._inference_name(func.__name__) # pylint: disable=protected-access + captured_function_def, capturing_fn = function.make_function_def( + captured_function_name, tmp_graph, capturing_operations, placeholder_inputs, capture_func_def_outputs) - function._register_with_name(function._inference_name(func.__name__), # pylint: disable=protected-access - captured_function_def) + function._register(capturing_fn) # pylint: disable=protected-access captured_function = function.GraphModeFunction( placeholder_inputs, extra_inputs, captured_function_def, + capturing_fn, tmp_graph, capturing_operations, captured_outputs, diff --git a/tensorflow/python/eager/graph_callable_test.py b/tensorflow/python/eager/graph_callable_test.py index 548e16a909..b9e6ca2a93 100644 --- a/tensorflow/python/eager/graph_callable_test.py +++ b/tensorflow/python/eager/graph_callable_test.py @@ -152,7 +152,6 @@ class GraphCallableTest(test.TestCase): self.assertAllEqual(5, f(constant_op.constant(2))) def testNestedFunction(self): - # TensorFlow function (which is what would be used in TensorFlow graph # construction). @function.Defun(dtypes.int32, dtypes.int32) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 2217513966..36daf59647 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -599,11 +599,6 @@ class Tensor(_TensorLike): """ return _eval_using_default_session(self, feed_dict, self.graph, session) - def _dup(self): - ret = copy.copy(self) - ret._id = uid() # pylint: disable=protected-access - return ret - # TODO(agarwal): consider getting rid of this. class _EagerTensorBase(Tensor): @@ -729,9 +724,6 @@ class _EagerTensorBase(Tensor): return new_tensor # pylint: enable=protected-access - def _dup(self): - return self._copy(device_name=self.device) - @property def shape(self): return tensor_shape.TensorShape(self._shape_tuple()) @@ -1794,7 +1786,7 @@ class Operation(object): c_api.SetRequestedDevice( self._graph._c_graph, # pylint: disable=protected-access self._c_op, # pylint: disable=protected-access - _device_string(device)) + compat.as_text(_device_string(device))) else: self._node_def.device = _device_string(device) @@ -2083,7 +2075,7 @@ class Operation(object): def _set_attr(self, attr_name, attr_value): """Private method used to set an attribute in the node_def.""" - if _USE_C_API: + if self._c_op: buf = c_api.TF_NewBufferFromString( compat.as_bytes(attr_value.SerializeToString())) try: @@ -2652,11 +2644,16 @@ class Graph(object): # TODO(skyewm): fold as much of the above as possible into the C # implementation - if _USE_C_API: + if _USE_C_API or self._use_c_api_hack(): self._scoped_c_graph = c_api_util.ScopedTFGraph() else: self._scoped_c_graph = None + # TODO(apassos) remove once the C API is used by default. + def _use_c_api_hack(self): + """Temporary hack; can be overridden to force C API usage.""" + return False + def _convert_stack(self, stack, include_func_start_lineno=False): """Converts a stack extracted using _extract_stack() to a traceback stack. @@ -2985,9 +2982,14 @@ class Graph(object): # Add function to graph # pylint: disable=protected-access if self._c_graph: - assert function._c_func, ( - "Cannot add function created without C API support to graph " - "created with C API support") + # Handle functions created without using the C API. TODO(apassos,skyewm) + # remove this when all functions are generated using the C API by default + # as this will be unnecessary. + if not function._c_func: + with errors.raise_exception_on_not_ok_status() as status: + serialized = function.definition.SerializeToString() + function._c_func = c_api.TF_FunctionImportFunctionDef( + serialized, status) with errors.raise_exception_on_not_ok_status() as status: gradient = function._grad_func._c_func if function._grad_func else None c_api.TF_GraphCopyFunction(self._c_graph, function._c_func, gradient, diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i index 82b154164e..82750e9e49 100644 --- a/tensorflow/python/pywrap_tfe.i +++ b/tensorflow/python/pywrap_tfe.i @@ -18,6 +18,7 @@ limitations under the License. %rename("%s") TFE_NewContext; %rename("%s") TFE_DeleteContext; %rename("%s") TFE_ContextListDevices; +%rename("%s") TFE_ContextAddFunction; %rename("%s") TFE_ContextAddFunctionDef; %rename("%s") TFE_OpNameGetAttrType; %rename("%s") TFE_Py_InitEagerTensor; @@ -149,7 +150,7 @@ limitations under the License. } $1 = &temp; $1->resize(PyInt_AsLong($input), nullptr); -} +} // Create new Status object. %typemap(in, numinputs=0) TF_Status *out_status { -- GitLab From 2f56cb1e1764efcfbd8277b77eccd9c4f3e9fc64 Mon Sep 17 00:00:00 2001 From: Robin Richtsfeld Date: Thu, 30 Nov 2017 01:02:05 +0100 Subject: [PATCH 0968/1801] Format AUTHORS file (#14881) --- AUTHORS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index a46ae7e616..aa4be5169d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -7,4 +7,4 @@ # The email address is not required for organizations. Google Inc. -Yuan Tang terrytangyuan@gmail.com +Yuan Tang -- GitLab From 5ba21573f6763ac32b1c3dcc10d78d3e9f71c2d5 Mon Sep 17 00:00:00 2001 From: ted chang Date: Wed, 29 Nov 2017 16:02:25 -0800 Subject: [PATCH 0969/1801] I would like to clarify checkpoint is not a file. I also changed some (#14961) wording which may lead readers to use a physical file name in their code. --- .../docs_src/programmers_guide/saved_model.md | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/saved_model.md b/tensorflow/docs_src/programmers_guide/saved_model.md index 34e8e5faf5..54693f3d4d 100644 --- a/tensorflow/docs_src/programmers_guide/saved_model.md +++ b/tensorflow/docs_src/programmers_guide/saved_model.md @@ -33,7 +33,7 @@ roughly speaking, map variable names to tensor values. Create a `Saver` with `tf.train.Saver()` to manage all variables in the model. For example, the following snippet demonstrates how to call the -`tf.train.Saver.save` method to save variables to a checkpoint file: +`tf.train.Saver.save` method to save variables to checkpoint files: ```python # Create some variables. @@ -58,7 +58,7 @@ with tf.Session() as sess: dec_v2.op.run() # Save the variables to disk. save_path = saver.save(sess, "/tmp/model.ckpt") - print("Model saved in file: %s" % save_path) + print("Model saved in path: %s" % save_path) ``` @@ -66,10 +66,10 @@ with tf.Session() as sess: ### Restoring variables The `tf.train.Saver` object not only saves variables to checkpoint files, it -also restores variables. Note that when you restore variables from a file you -do not have to initialize them beforehand. For example, the following snippet -demonstrates how to call the `tf.train.Saver.restore` method to restore -variables from a checkpoint file: +also restores variables. Note that when you restore variables you do not have +to initialize them beforehand. For example, the following snippet demonstrates +how to call the `tf.train.Saver.restore` method to restore variables from the +checkpoint files: ```python tf.reset_default_graph() @@ -92,6 +92,12 @@ with tf.Session() as sess: print("v2 : %s" % v2.eval()) ``` +Notes: + +* There is not a physical file called "/tmp/model.ckpt". It is the **prefix** + of filenames created for the checkpoint. Users only interact with the + prefix instead of physical checkpoint files. + ### Choosing which variables to save and restore -- GitLab From 495bb7b9f6b55b0e431fc604ad9dbf5415016d90 Mon Sep 17 00:00:00 2001 From: Alexander Date: Thu, 30 Nov 2017 03:02:49 +0300 Subject: [PATCH 0970/1801] Speed up safe_strtod and safe_strtof functions by using double-conversion library (#12102) * Add double-conversion library to third_party Link: https://github.com/google/double-conversion Version: v3.0.0 With patch (add case insensivity for special values): https://github.com/google/double-conversion/pull/47 * Strnlen function to str_util.h Returns the length of the given null-terminated byte string 'str'. Returns strsz if the null character was not found in the first 'strsz' bytes of 'str'. * Use double-conversion lib for safe_strto{d,f} implementation * Path for double-conversion library is no longer needed, just use new version of this lib * Use more obvious arguments names for Strnlen function * Fixed DoubleToBuffer function in case of [full_precision_needed] use safe_strtod insetead of raw inplementation * add double-conversion compilation in makefile builds - created compilation script: compile_double_conversion.sh - added running compile_double_conversion.sh in {build_all_android.sh, build_all_ios.sh, build_all_linux.sh} - added linking libdouble-conversion.a to build in tensorflow/contrib/makefile/Makefile * Build double-conversion directly inside tensorflow/contrib/makefile/Makefile * Put double_conversion to the rest of targets including number.* as src * Sort external dependencies and make Sanity Checks happy * Add test cases with trailing and leading whitespace characters to //tensorflow/core:lib_strings_numbers_test * Remove octal numbers support from safe_strtod and safe_strtof * Add double-conversion library to the cmake build * Prepend lib/ to cmake lib path as per mrry instructions See https://www.github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/cmake/external/zlib.cmake#L24 * Add to double-conversion win32 build output path: fix up windows CMake build --- tensorflow/contrib/cmake/CMakeLists.txt | 4 + .../cmake/external/double_conversion.cmake | 54 +++++++++ tensorflow/contrib/makefile/Makefile | 9 +- .../contrib/makefile/download_dependencies.sh | 2 + tensorflow/core/BUILD | 9 +- tensorflow/core/lib/strings/numbers.cc | 112 +++++------------- tensorflow/core/lib/strings/numbers_test.cc | 69 +++++++++++ tensorflow/core/lib/strings/str_util.cc | 8 ++ tensorflow/core/lib/strings/str_util.h | 5 + tensorflow/core/lib/strings/str_util_test.cc | 8 ++ tensorflow/workspace.bzl | 9 ++ 11 files changed, 203 insertions(+), 86 deletions(-) create mode 100644 tensorflow/contrib/cmake/external/double_conversion.cmake diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 77a3fc0c83..ba708673b0 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -160,6 +160,7 @@ include(protobuf) include(re2) include(cub) include(sqlite) +include(double_conversion) if (tensorflow_BUILD_CC_TESTS) include(googletest) endif() @@ -178,6 +179,7 @@ set(tensorflow_EXTERNAL_LIBRARIES ${protobuf_STATIC_LIBRARIES} ${re2_STATIC_LIBRARIES} ${sqlite_STATIC_LIBRARIES} + ${double_conversion_STATIC_LIBRARIES} ) set(tensorflow_EXTERNAL_DEPENDENCIES zlib_copy_headers_to_destination @@ -196,6 +198,7 @@ set(tensorflow_EXTERNAL_DEPENDENCIES fft2d re2 sqlite_copy_headers_to_destination + double_conversion ) include_directories( @@ -218,6 +221,7 @@ include_directories( ${PROTOBUF_INCLUDE_DIRS} ${re2_INCLUDE_DIR} ${sqlite_INCLUDE_DIR} + ${double_conversion_INCLUDE_DIR} ) if(tensorflow_ENABLE_SSL_SUPPORT) diff --git a/tensorflow/contrib/cmake/external/double_conversion.cmake b/tensorflow/contrib/cmake/external/double_conversion.cmake new file mode 100644 index 0000000000..527ccdc8d8 --- /dev/null +++ b/tensorflow/contrib/cmake/external/double_conversion.cmake @@ -0,0 +1,54 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +include (ExternalProject) + +set(double_conversion_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/double_conversion/src/double_conversion) +set(double_conversion_URL https://github.com/google/double-conversion.git) +set(double_conversion_TAG 5664746) +set(double_conversion_BUILD ${double_conversion_INCLUDE_DIR}) +set(double_conversion_LIBRARIES ${double_conversion_BUILD}/double-conversion/libdouble-conversion.so) +set(double_conversion_INCLUDES ${double_conversion_BUILD}) + +if(WIN32) + set(double_conversion_STATIC_LIBRARIES ${double_conversion_BUILD}/double-conversion/$(Configuration)/double-conversion.lib) +else() + set(double_conversion_STATIC_LIBRARIES ${double_conversion_BUILD}/double-conversion/libdouble-conversion.a) +endif() + +set(double_conversion_HEADERS + "${double_conversion_INCLUDE_DIR}/double-conversion/bignum-dtoa.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/cached-powers.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/double-conversion.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/fixed-dtoa.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/strtod.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/bignum.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/diy-fp.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/fast-dtoa.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/ieee.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/utils.h" +) + +ExternalProject_Add(double_conversion + PREFIX double_conversion + GIT_REPOSITORY ${double_conversion_URL} + GIT_TAG ${double_conversion_TAG} + DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" + BUILD_IN_SOURCE 1 + INSTALL_COMMAND "" + CMAKE_CACHE_ARGS + -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON +) diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile index e2e6c05591..617ef25fa4 100644 --- a/tensorflow/contrib/makefile/Makefile +++ b/tensorflow/contrib/makefile/Makefile @@ -89,6 +89,7 @@ HOST_INCLUDES := \ -I$(MAKEFILE_DIR)/downloads/gemmlowp \ -I$(MAKEFILE_DIR)/downloads/nsync/public \ -I$(MAKEFILE_DIR)/downloads/fft2d \ +-I$(MAKEFILE_DIR)/downloads/double_conversion \ -I$(HOST_GENDIR) ifeq ($(HAS_GEN_HOST_PROTOC),true) HOST_INCLUDES += -I$(MAKEFILE_DIR)/gen/protobuf-host/include @@ -125,7 +126,9 @@ PROTO_TEXT := $(HOST_BINDIR)proto_text # The list of dependencies is derived from the Bazel build file by running # the gen_file_lists.sh script on a system with a working Bazel setup. PROTO_TEXT_CC_FILES := $(shell cat $(MAKEFILE_DIR)/proto_text_cc_files.txt) -PROTO_TEXT_PB_CC_LIST := $(shell cat $(MAKEFILE_DIR)/proto_text_pb_cc_files.txt) +PROTO_TEXT_PB_CC_LIST := \ + $(shell cat $(MAKEFILE_DIR)/proto_text_pb_cc_files.txt) \ + $(wildcard tensorflow/contrib/makefile/downloads/double_conversion/double-conversion/*.cc) PROTO_TEXT_PB_H_LIST := $(shell cat $(MAKEFILE_DIR)/proto_text_pb_h_files.txt) # Locations of the intermediate files proto_text generates. @@ -171,6 +174,7 @@ INCLUDES := \ -I$(MAKEFILE_DIR)/downloads/gemmlowp \ -I$(MAKEFILE_DIR)/downloads/nsync/public \ -I$(MAKEFILE_DIR)/downloads/fft2d \ +-I$(MAKEFILE_DIR)/downloads/double_conversion \ -I$(PROTOGENDIR) \ -I$(PBTGENDIR) ifeq ($(HAS_GEN_HOST_PROTOC),true) @@ -326,6 +330,8 @@ $(MARCH_OPTION) \ -I$(MAKEFILE_DIR)/downloads/gemmlowp \ -I$(MAKEFILE_DIR)/downloads/nsync/public \ -I$(MAKEFILE_DIR)/downloads/fft2d \ +-I$(MAKEFILE_DIR)/downloads/double_conversion \ +-I$(MAKEFILE_DIR)/gen/protobuf/include \ -I$(MAKEFILE_DIR)/gen/protobuf_android/$(ANDROID_ARCH)/include \ -I$(PROTOGENDIR) \ -I$(PBTGENDIR) @@ -543,6 +549,7 @@ $(wildcard tensorflow/core/platform/*/*.cc) \ $(wildcard tensorflow/core/platform/*/*/*.cc) \ $(wildcard tensorflow/core/util/*.cc) \ $(wildcard tensorflow/core/util/*/*.cc) \ +$(wildcard tensorflow/contrib/makefile/downloads/double_conversion/double-conversion/*.cc) \ tensorflow/core/util/version_info.cc # Remove duplicates (for version_info.cc) CORE_CC_ALL_SRCS := $(sort $(CORE_CC_ALL_SRCS)) diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh index a2b444d53a..e8021a53af 100755 --- a/tensorflow/contrib/makefile/download_dependencies.sh +++ b/tensorflow/contrib/makefile/download_dependencies.sh @@ -26,6 +26,7 @@ NSYNC_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/nsync/.*tar\. PROTOBUF_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" RE2_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/re2/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" FFT2D_URL="$(grep -o 'http.*fft\.tgz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)" +DOUBLE_CONVERSION_URL="$(grep -o "https.*google/double-conversion.*\.zip" "${BZL_FILE_PATH}" | head -n1)" ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)" # TODO(petewarden): Some new code in Eigen triggers a clang bug with iOS arm64, @@ -74,6 +75,7 @@ download_and_extract "${NSYNC_URL}" "${DOWNLOADS_DIR}/nsync" download_and_extract "${PROTOBUF_URL}" "${DOWNLOADS_DIR}/protobuf" download_and_extract "${RE2_URL}" "${DOWNLOADS_DIR}/re2" download_and_extract "${FFT2D_URL}" "${DOWNLOADS_DIR}/fft2d" +download_and_extract "${DOUBLE_CONVERSION_URL}" "${DOWNLOADS_DIR}/double_conversion" download_and_extract "${ABSL_URL}" "${DOWNLOADS_DIR}/absl" replace_by_sed 's#static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA );#static uint32x4_t p4ui_CONJ_XOR; // = vld1q_u32( conj_XOR_DATA ); - Removed by script#' \ diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 0f2b77e9b4..bd7617fa96 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -276,7 +276,9 @@ cc_library( "platform/types.h", ] + glob(tf_additional_proto_hdrs()) + glob(tf_env_time_hdrs()), copts = tf_copts(), - deps = tf_lib_proto_parsing_deps(), + deps = tf_lib_proto_parsing_deps() + [ + "@double_conversion//:double-conversion", + ], ) # This build rule (along with :lib_internal, :framework, and @@ -1024,6 +1026,7 @@ cc_library( deps = [ ":protos_all_cc_impl", "//third_party/eigen3", + "@double_conversion//:double-conversion", "@nsync//:nsync_cpp", "@protobuf_archive//:protobuf", ], @@ -1048,6 +1051,7 @@ cc_library( ":protos_all_cc_impl", "//third_party/eigen3", "//third_party/fft2d:fft2d_headers", + "@double_conversion//:double-conversion", "@fft2d//:fft2d", "@gemmlowp//:gemmlowp", "@nsync//:nsync_cpp", @@ -1114,6 +1118,7 @@ cc_library( deps = [ ":protos_all_cc_impl", "//third_party/eigen3", + "@double_conversion//:double-conversion", "@protobuf_archive//:protobuf", ], alwayslink = 1, @@ -1136,6 +1141,7 @@ cc_library( deps = [ ":protos_all_cc_impl", "//third_party/eigen3", + "@double_conversion//:double-conversion", "@nsync//:nsync_cpp", "@protobuf_archive//:protobuf", ], @@ -1497,6 +1503,7 @@ cc_library( "//tensorflow/core/platform/default/build_config:platformlib", "@snappy", "@zlib_archive//:zlib", + "@double_conversion//:double-conversion", "@protobuf_archive//:protobuf", ] + tf_protos_all_impl(), ) diff --git a/tensorflow/core/lib/strings/numbers.cc b/tensorflow/core/lib/strings/numbers.cc index 302a6967e3..b3cca504e1 100644 --- a/tensorflow/core/lib/strings/numbers.cc +++ b/tensorflow/core/lib/strings/numbers.cc @@ -23,6 +23,9 @@ limitations under the License. #include #include +#include "double-conversion/double-conversion.h" + +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -32,72 +35,15 @@ namespace tensorflow { namespace { -template -T locale_independent_strtonum(const char* str, const char** endptr) { - static const std::unordered_map special_nums = { - {"inf", std::numeric_limits::infinity()}, - {"+inf", std::numeric_limits::infinity()}, - {"-inf", -std::numeric_limits::infinity()}, - {"infinity", std::numeric_limits::infinity()}, - {"+infinity", std::numeric_limits::infinity()}, - {"-infinity", -std::numeric_limits::infinity()}, - {"nan", std::numeric_limits::quiet_NaN()}, - {"+nan", std::numeric_limits::quiet_NaN()}, - {"-nan", -std::numeric_limits::quiet_NaN()}, - }; - std::stringstream s(str); - - // Check if str is one of the special numbers. - string special_num_str; - s >> special_num_str; - - for (int i = 0; i < special_num_str.length(); ++i) { - special_num_str[i] = - std::tolower(special_num_str[i], std::locale::classic()); - } - - auto entry = special_nums.find(special_num_str); - if (entry != special_nums.end()) { - *endptr = str + (s.eof() ? static_cast(strlen(str)) - : s.tellg()); - return entry->second; - } else { - // Perhaps it's a hex number - if (special_num_str.compare(0, 2, "0x") == 0 || - special_num_str.compare(0, 3, "-0x") == 0) { - return strtol(str, const_cast(endptr), 16); - } - } - // Reset the stream - s.str(str); - s.clear(); - // Use the "C" locale - s.imbue(std::locale::classic()); - - T result; - s >> result; - - // Set to result to what strto{f,d} functions would have returned. If the - // number was outside the range, the stringstream sets the fail flag, but - // returns the +/-max() value, whereas strto{f,d} functions return +/-INF. - if (s.fail()) { - if (result == std::numeric_limits::max()) { - result = std::numeric_limits::infinity(); - s.clear(s.rdstate() & ~std::ios::failbit); - } else if (result == -std::numeric_limits::max()) { - result = -std::numeric_limits::infinity(); - s.clear(s.rdstate() & ~std::ios::failbit); - } - } - - if (endptr) { - *endptr = - str + - (s.fail() ? static_cast(0) - : (s.eof() ? static_cast(strlen(str)) - : s.tellg())); - } - return result; +static inline const double_conversion::StringToDoubleConverter& StringToFloatConverter() { + const static double_conversion::StringToDoubleConverter converter( + double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES + | double_conversion::StringToDoubleConverter::ALLOW_HEX + | double_conversion::StringToDoubleConverter::ALLOW_TRAILING_SPACES + | double_conversion::StringToDoubleConverter::ALLOW_CASE_INSENSIBILITY, + 0., 0., "inf", "nan" + ); + return converter; } } // namespace @@ -165,8 +111,8 @@ char* DoubleToBuffer(double value, char* buffer) { // larger than the precision we asked for. DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize); - full_precision_needed = - locale_independent_strtonum(buffer, nullptr) != value; + auto parsed_value = double{}; + full_precision_needed = !safe_strtod(buffer, &parsed_value) || parsed_value != value; } if (full_precision_needed) { @@ -302,25 +248,23 @@ bool safe_strtou32(StringPiece str, uint32* value) { } bool safe_strtof(const char* str, float* value) { - const char* endptr; - *value = locale_independent_strtonum(str, &endptr); - while (isspace(*endptr)) ++endptr; - // Ignore range errors from strtod/strtof. - // The values it returns on underflow and - // overflow are the right fallback in a - // robust setting. - return *str != '\0' && *endptr == '\0'; + int processed_characters_count = -1; + auto len = str_util::Strnlen(str, kFastToBufferSize); + *value = StringToFloatConverter().StringToFloat( + str, + len, + &processed_characters_count); + return processed_characters_count > 0; } bool safe_strtod(const char* str, double* value) { - const char* endptr; - *value = locale_independent_strtonum(str, &endptr); - while (isspace(*endptr)) ++endptr; - // Ignore range errors from strtod/strtof. - // The values it returns on underflow and - // overflow are the right fallback in a - // robust setting. - return *str != '\0' && *endptr == '\0'; + int processed_characters_count = -1; + auto len = str_util::Strnlen(str, kFastToBufferSize); + *value = StringToFloatConverter().StringToDouble( + str, + len, + &processed_characters_count); + return processed_characters_count > 0; } char* FloatToBuffer(float value, char* buffer) { diff --git a/tensorflow/core/lib/strings/numbers_test.cc b/tensorflow/core/lib/strings/numbers_test.cc index e15161de66..df395c301e 100644 --- a/tensorflow/core/lib/strings/numbers_test.cc +++ b/tensorflow/core/lib/strings/numbers_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/core/lib/strings/numbers.h" #include +#include #include "tensorflow/core/platform/test.h" namespace tensorflow { @@ -277,7 +278,40 @@ TEST(safe_strtof, Float) { EXPECT_TRUE(safe_strtof("-0x2A", &result)); EXPECT_EQ(-42.0f, result); + EXPECT_TRUE(safe_strtof(" -0x2", &result)); + EXPECT_EQ(-2.0f, result); + + EXPECT_TRUE(safe_strtof("8 \t", &result)); + EXPECT_EQ(8.0f, result); + + EXPECT_TRUE(safe_strtof("\t20.0\t ", &result)); + EXPECT_EQ(20.0f, result); + EXPECT_FALSE(safe_strtof("-infinity is awesome", &result)); + + EXPECT_TRUE(safe_strtof("-inf", &result)); + EXPECT_EQ(-std::numeric_limits::infinity(), result); + + EXPECT_TRUE(safe_strtof("+inf", &result)); + EXPECT_EQ(std::numeric_limits::infinity(), result); + + EXPECT_TRUE(safe_strtof("InF", &result)); + EXPECT_EQ(std::numeric_limits::infinity(), result); + + EXPECT_TRUE(safe_strtof("-INF", &result)); + EXPECT_EQ(-std::numeric_limits::infinity(), result); + + EXPECT_TRUE(safe_strtof("nan", &result)); + EXPECT_TRUE(std::isnan(result)); + + EXPECT_TRUE(safe_strtof("-nan", &result)); + EXPECT_TRUE(std::isnan(result)); + + EXPECT_TRUE(safe_strtof("-NaN", &result)); + EXPECT_TRUE(std::isnan(result)); + + EXPECT_TRUE(safe_strtof("+NAN", &result)); + EXPECT_TRUE(std::isnan(result)); } TEST(safe_strtod, Double) { @@ -296,6 +330,41 @@ TEST(safe_strtod, Double) { EXPECT_TRUE(safe_strtod("1e-325", &result)); EXPECT_EQ(0, result); + + EXPECT_TRUE(safe_strtod(" -0x1c", &result)); + EXPECT_EQ(-28.0, result); + + EXPECT_TRUE(safe_strtod("50 \t", &result)); + EXPECT_EQ(50.0, result); + + EXPECT_TRUE(safe_strtod("\t82.0\t ", &result)); + EXPECT_EQ(82.0, result); + + EXPECT_FALSE(safe_strtod("infinity", &result)); + + EXPECT_TRUE(safe_strtod("-inf", &result)); + EXPECT_EQ(-std::numeric_limits::infinity(), result); + + EXPECT_TRUE(safe_strtod("+inf", &result)); + EXPECT_EQ(std::numeric_limits::infinity(), result); + + EXPECT_TRUE(safe_strtod("InF", &result)); + EXPECT_EQ(std::numeric_limits::infinity(), result); + + EXPECT_TRUE(safe_strtod("-INF", &result)); + EXPECT_EQ(-std::numeric_limits::infinity(), result); + + EXPECT_TRUE(safe_strtod("nan", &result)); + EXPECT_TRUE(std::isnan(result)); + + EXPECT_TRUE(safe_strtod("-nan", &result)); + EXPECT_TRUE(std::isnan(result)); + + EXPECT_TRUE(safe_strtod("-NaN", &result)); + EXPECT_TRUE(std::isnan(result)); + + EXPECT_TRUE(safe_strtod("+NAN", &result)); + EXPECT_TRUE(std::isnan(result)); } } // namespace strings diff --git a/tensorflow/core/lib/strings/str_util.cc b/tensorflow/core/lib/strings/str_util.cc index d28857803d..0ae6c66080 100644 --- a/tensorflow/core/lib/strings/str_util.cc +++ b/tensorflow/core/lib/strings/str_util.cc @@ -452,5 +452,13 @@ bool SplitAndParseAsFloats(StringPiece text, char delim, result); } +size_t Strnlen(const char* str, const size_t string_max_len) { + size_t len = 0; + while (len < string_max_len && str[len] != '\0') { + ++len; + } + return len; +} + } // namespace str_util } // namespace tensorflow diff --git a/tensorflow/core/lib/strings/str_util.h b/tensorflow/core/lib/strings/str_util.h index 8cea0f0718..b0d774a05c 100644 --- a/tensorflow/core/lib/strings/str_util.h +++ b/tensorflow/core/lib/strings/str_util.h @@ -209,6 +209,11 @@ std::vector Split(StringPiece text, char delims, Predicate p) { return Split(text, StringPiece(&delims, 1), p); } +// Returns the length of the given null-terminated byte string 'str'. +// Returns 'string_max_len' if the null character was not found in the first +// 'string_max_len' bytes of 'str'. +size_t Strnlen(const char* str, const size_t string_max_len); + } // namespace str_util } // namespace tensorflow diff --git a/tensorflow/core/lib/strings/str_util_test.cc b/tensorflow/core/lib/strings/str_util_test.cc index d5909d17aa..3a8de7c96b 100644 --- a/tensorflow/core/lib/strings/str_util_test.cc +++ b/tensorflow/core/lib/strings/str_util_test.cc @@ -430,4 +430,12 @@ TEST(StringReplace, EmptyStringReplaceAll) { EXPECT_EQ("", str_util::StringReplace("", "a", "X", /*replace_all=*/true)); } +TEST(Strnlen, Basic) { + EXPECT_EQ(0, str_util::Strnlen("ab", 0)); + EXPECT_EQ(1, str_util::Strnlen("a", 1)); + EXPECT_EQ(2, str_util::Strnlen("abcd", 2)); + EXPECT_EQ(3, str_util::Strnlen("abc", 10)); + EXPECT_EQ(4, str_util::Strnlen("a \t\n", 10)); +} + } // namespace tensorflow diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 7d07769a45..6b13271002 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -828,6 +828,15 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], ) + native.http_archive( + name = "double_conversion", + urls = [ + "https://github.com/google/double-conversion/archive/5664746c5e64dc265e7fbc1a890a6698e6ad0ebb.zip", + ], + sha256 = "a0c49fb3cc8d34b2230d278a115f1bb266bcfcaae10400b84dc2a3b7dc2c8bc6", + strip_prefix = "double-conversion-5664746c5e64dc265e7fbc1a890a6698e6ad0ebb", + ) + native.new_http_archive( name = "tflite_mobilenet", build_file = str(Label("//third_party:tflite_mobilenet.BUILD")), -- GitLab From f42fde42beda0b6080f7051238cfb7f5036d861c Mon Sep 17 00:00:00 2001 From: Christoph Boeddeker Date: Thu, 30 Nov 2017 01:03:38 +0100 Subject: [PATCH 0971/1801] improved estimator.export_savedmodel exception (#14979) --- tensorflow/python/estimator/export/export.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/estimator/export/export.py b/tensorflow/python/estimator/export/export.py index 3b295a7e35..51075731dd 100644 --- a/tensorflow/python/estimator/export/export.py +++ b/tensorflow/python/estimator/export/export.py @@ -191,7 +191,8 @@ def build_all_signature_defs(receiver_tensors, if not isinstance(receiver_tensors, dict): receiver_tensors = {_SINGLE_RECEIVER_DEFAULT_NAME: receiver_tensors} if export_outputs is None or not isinstance(export_outputs, dict): - raise ValueError('export_outputs must be a dict.') + raise ValueError('export_outputs must be a dict and not' + '{}'.format(type(export_outputs))) signature_def_map = {} excluded_signatures = {} -- GitLab From 8a4d84969130162ee001fa52bac51e730129399b Mon Sep 17 00:00:00 2001 From: Gary Deer Date: Wed, 29 Nov 2017 18:04:23 -0600 Subject: [PATCH 0972/1801] Feature Request: C++ gradient for LRN (#13987) * WIP: added stub method and failing test * test with default bias,alpha,beta,depth_radius passes * Made changes based on code review Swapped out LRN for LRNGrad renamed to LRNGradHelper * Fixed call to LRNGrad with proper namespace * fixed function call to LRNGrad, all tests pass somehow * fixed function call to LRNGrad, tests pass * minor formatting changes based on clang-format suggestions * formatting changes and updated test based on code review * trimmed the tensor shape to prevent test timeout --- tensorflow/cc/gradients/nn_grad.cc | 12 ++++++++++++ tensorflow/cc/gradients/nn_grad_test.cc | 7 +++++++ 2 files changed, 19 insertions(+) diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc index 09fadfcab5..13a3bba5e6 100644 --- a/tensorflow/cc/gradients/nn_grad.cc +++ b/tensorflow/cc/gradients/nn_grad.cc @@ -196,6 +196,18 @@ Status MaxPoolGradV2Helper(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("MaxPoolV2", MaxPoolGradV2Helper); +Status LRNGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs){ + internal::LRNGrad::Attrs grad_attrs; + + auto dx = internal::LRNGrad(scope, grad_inputs[0], op.input(0), op.output(0), + grad_attrs); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("LRN", LRNGradHelper); + } // anonymous namespace } // namespace ops } // namespace tensorflow diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index ac66f51cf0..f9063e8365 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -191,5 +191,12 @@ TEST_F(NNGradTest, MaxPoolGradV2Helper) { RunTest(x, x_init_value, y, y_shape); } +TEST_F(NNGradTest, LRN){ + TensorShape x_shape({1, 1, 2, 1}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + auto y = LRN(scope_, x); + RunTest(x, x_shape, y, x_shape); +} + } // namespace } // namespace tensorflow -- GitLab From bc7180f02002788a7b57b36b14ceb7a47d6c76f4 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Wed, 29 Nov 2017 16:12:31 -0800 Subject: [PATCH 0973/1801] Fix more clang-tidy warnings: - Parameter names consistent in function declarations and definitions - Class members naming PiperOrigin-RevId: 177379085 --- tensorflow/compiler/xla/literal_util.h | 6 +++--- tensorflow/compiler/xla/reference_util.h | 2 +- tensorflow/compiler/xla/shape_layout.cc | 8 ++++---- tensorflow/compiler/xla/shape_layout.h | 13 +++++++------ tensorflow/compiler/xla/shape_util.cc | 4 ++-- tensorflow/compiler/xla/shape_util.h | 2 +- tensorflow/compiler/xla/statusor_test.cc | 14 +++++++------- .../compiler/xla/tests/client_library_test_base.h | 2 +- tensorflow/compiler/xla/util.cc | 6 +++--- 9 files changed, 29 insertions(+), 28 deletions(-) diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h index f37e529caf..069d1b33ca 100644 --- a/tensorflow/compiler/xla/literal_util.h +++ b/tensorflow/compiler/xla/literal_util.h @@ -285,11 +285,11 @@ class Literal { std::unique_ptr Relayout(const Layout& new_layout, const ShapeIndex& shape_index = {}) const; - // Creates a new literal by reshaping this literal to have 'shape'. Both the - // original shape and 'shape' must contain the same number of elements. The + // Creates a new literal by reshaping this literal to have the given + // dimensions. The total number of elements must not change; The // implementation currently only supports monotonic dim0-major layouts. StatusOr> Reshape( - tensorflow::gtl::ArraySlice shape) const; + tensorflow::gtl::ArraySlice dimensions) const; // Creates a new literal by reordering the dimensions of this literal. // The given `permutation` must be a permutation of the dimension numbers diff --git a/tensorflow/compiler/xla/reference_util.h b/tensorflow/compiler/xla/reference_util.h index ee244e9a66..58e1a84461 100644 --- a/tensorflow/compiler/xla/reference_util.h +++ b/tensorflow/compiler/xla/reference_util.h @@ -70,7 +70,7 @@ class ReferenceUtil { // dilation factors. static std::unique_ptr> ConvArray4DGeneralDimensionsDilated( const Array4D& lhs, const Array4D& rhs, - std::pair stride, Padding padding, + std::pair kernel_stride, Padding padding, std::pair lhs_dilation, std::pair rhs_dilation, ConvolutionDimensionNumbers dnums); diff --git a/tensorflow/compiler/xla/shape_layout.cc b/tensorflow/compiler/xla/shape_layout.cc index 5bf9842a6c..789eba5780 100644 --- a/tensorflow/compiler/xla/shape_layout.cc +++ b/tensorflow/compiler/xla/shape_layout.cc @@ -32,13 +32,13 @@ tensorflow::Status ShapeLayout::CopyLayoutFromShape(const Shape& other_shape) { return tensorflow::Status::OK(); } -tensorflow::Status ShapeLayout::AssignLayoutToShape(Shape* other_shape) const { - if (!ShapeUtil::Compatible(*other_shape, shape_)) { +tensorflow::Status ShapeLayout::AssignLayoutToShape(Shape* to_shape) const { + if (!ShapeUtil::Compatible(*to_shape, shape_)) { return InvalidArgument("Shape %s is not compatible with shape %s", - ShapeUtil::HumanString(*other_shape).c_str(), + ShapeUtil::HumanString(*to_shape).c_str(), ShapeUtil::HumanString(shape()).c_str()); } - *other_shape = shape_; + *to_shape = shape_; return tensorflow::Status::OK(); } diff --git a/tensorflow/compiler/xla/shape_layout.h b/tensorflow/compiler/xla/shape_layout.h index 92564660f2..4c83750f3e 100644 --- a/tensorflow/compiler/xla/shape_layout.h +++ b/tensorflow/compiler/xla/shape_layout.h @@ -38,18 +38,19 @@ class ShapeLayout { explicit ShapeLayout(const Shape& shape) : shape_(shape) {} // Assigns the layouts in this ShapeLayout to the Layout fields of the given - // shape. 'shape' and the shape of the ShapeLayout object must be compatible. - tensorflow::Status AssignLayoutToShape(Shape* shape) const; + // shape. 'to_shape' and the shape of the ShapeLayout object must be + // compatible. + tensorflow::Status AssignLayoutToShape(Shape* to_shape) const; // Returns true if the Layouts in this ShapeLayout match the layouts in the // given shape. Returns false otherwise. If the given shape is not compatible // with the ShapeLayout's shape, then false is returned. bool MatchesLayoutInShape(const Shape& shape) const; - // Copies the layout from the given shape into this ShapeLayout. 'shape' must - // be compatible with the ShapeLayout's shape, and 'shape' must have a layout - // (LayoutUtil::HasLayout). - tensorflow::Status CopyLayoutFromShape(const Shape& shape); + // Copies the layout from the given shape into this ShapeLayout. 'other_shape' + // must be compatible with the ShapeLayout's shape, and 'other_shape' must + // have a layout (LayoutUtil::HasLayout). + tensorflow::Status CopyLayoutFromShape(const Shape& other_shape); // Clears (Layout::Clear) all the Layouts stored in this object. void Clear(); diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index 74fa0b2f2e..9e3f06e527 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -694,9 +694,9 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { return LayoutUtil::ValidateLayoutInShape(shape); } -/* static */ Shape ShapeUtil::ChangeElementType(const Shape& shape, +/* static */ Shape ShapeUtil::ChangeElementType(const Shape& original, PrimitiveType type) { - Shape new_shape = shape; + Shape new_shape = original; new_shape.set_element_type(type); return new_shape; } diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 2ea1bd95cb..df5b450438 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -170,7 +170,7 @@ class ShapeUtil { // As above, but for program shapes, returns a string for the form: // // (param_name: f32[42x12], ...) -> f32[24x42] - static string HumanString(const ProgramShape& shape); + static string HumanString(const ProgramShape& program_shape); // Parses a ShapeUtil::HumanString-format shape string back into a shape // object. diff --git a/tensorflow/compiler/xla/statusor_test.cc b/tensorflow/compiler/xla/statusor_test.cc index 5fa2211ac6..f9d25945bc 100644 --- a/tensorflow/compiler/xla/statusor_test.cc +++ b/tensorflow/compiler/xla/statusor_test.cc @@ -32,26 +32,26 @@ namespace { class Base1 { public: virtual ~Base1() {} - int pad; + int pad_; }; class Base2 { public: virtual ~Base2() {} - int yetotherpad; + int yetotherpad_; }; class Derived : public Base1, public Base2 { public: ~Derived() override {} - int evenmorepad; + int evenmorepad_; }; class CopyNoAssign { public: - explicit CopyNoAssign(int value) : foo(value) {} - CopyNoAssign(const CopyNoAssign& other) : foo(other.foo) {} - int foo; + explicit CopyNoAssign(int value) : foo_(value) {} + CopyNoAssign(const CopyNoAssign& other) : foo_(other.foo_) {} + int foo_; private: const CopyNoAssign& operator=(const CopyNoAssign&); @@ -253,7 +253,7 @@ TEST(StatusOr, TestCopyCtorNonAssignable) { StatusOr original(value); StatusOr copy(original); EXPECT_EQ(copy.status(), original.status()); - EXPECT_EQ(original.ValueOrDie().foo, copy.ValueOrDie().foo); + EXPECT_EQ(original.ValueOrDie().foo_, copy.ValueOrDie().foo_); } TEST(StatusOr, TestCopyCtorStatusOKConverting) { diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h index 1d27880fb1..d8fe12a72d 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.h +++ b/tensorflow/compiler/xla/tests/client_library_test_base.h @@ -194,7 +194,7 @@ class ClientLibraryTestBase : public ::testing::Test { tensorflow::gtl::ArraySlice arguments); void ComputeAndCompareTuple( ComputationBuilder* builder, const Literal& expected, - tensorflow::gtl::ArraySlice arguments, ErrorSpec abs_error); + tensorflow::gtl::ArraySlice arguments, ErrorSpec error); // Convenience method for running a built computation and comparing the result // with the HloEvaluator. diff --git a/tensorflow/compiler/xla/util.cc b/tensorflow/compiler/xla/util.cc index e595df3052..fe5d29a6b6 100644 --- a/tensorflow/compiler/xla/util.cc +++ b/tensorflow/compiler/xla/util.cc @@ -191,9 +191,9 @@ std::vector ComposePermutations(tensorflow::gtl::ArraySlice p1, return output; } -bool IsIdentityPermutation(tensorflow::gtl::ArraySlice p) { - for (int64 i = 0; i < p.size(); ++i) { - if (p[i] != i) { +bool IsIdentityPermutation(tensorflow::gtl::ArraySlice permutation) { + for (int64 i = 0; i < permutation.size(); ++i) { + if (permutation[i] != i) { return false; } } -- GitLab From 4ada275eed7472ae32c67a1ec0b9b1dc8d80d1f0 Mon Sep 17 00:00:00 2001 From: "Joshua V. Dillon" Date: Wed, 29 Nov 2017 16:33:56 -0800 Subject: [PATCH 0974/1801] Change `tf.contrib.distributions` docstring examples to use `tfd` alias rather than `ds`, `bs`. PiperOrigin-RevId: 177381853 --- .../ops/bijectors/absolute_value_impl.py | 4 ++- .../bijectors/masked_autoregressive_impl.py | 28 +++++++++---------- .../python/ops/bijectors/permute_impl.py | 4 +-- .../python/ops/bijectors/reshape_impl.py | 4 +-- .../distributions/python/ops/cauchy.py | 22 ++++++++------- .../distributions/python/ops/deterministic.py | 6 ++-- .../distributions/python/ops/gumbel.py | 8 ++++-- .../distributions/python/ops/independent.py | 10 +++---- .../distributions/python/ops/inverse_gamma.py | 5 ++-- .../distributions/python/ops/logistic.py | 13 ++++----- .../distributions/python/ops/mixture.py | 10 +++---- .../python/ops/mixture_same_family.py | 16 +++++------ .../distributions/python/ops/mvn_diag.py | 8 +++--- .../python/ops/mvn_diag_plus_low_rank.py | 6 ++-- .../python/ops/mvn_full_covariance.py | 6 ++-- .../python/ops/mvn_linear_operator.py | 11 ++++---- .../distributions/python/ops/mvn_tril.py | 13 +++++---- .../python/ops/poisson_lognormal.py | 5 ++-- .../distributions/python/ops/sinh_arcsinh.py | 2 +- .../python/ops/vector_diffeomixture.py | 11 ++++---- .../python/ops/vector_exponential_diag.py | 7 ++--- .../ops/vector_exponential_linear_operator.py | 11 ++++---- .../python/ops/vector_laplace_diag.py | 8 +++--- .../ops/vector_laplace_linear_operator.py | 11 ++++---- .../python/ops/vector_sinh_arcsinh_diag.py | 2 +- .../python/ops/vector_student_t.py | 6 ++-- 26 files changed, 121 insertions(+), 116 deletions(-) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value_impl.py index b84502003a..0fe9f6aa78 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value_impl.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value_impl.py @@ -48,7 +48,9 @@ class AbsoluteValue(bijector.Bijector): ```python - abs = ds.bijectors.AbsoluteValue() + tfd = tf.contrib.distributions + + abs = tfd.bijectors.AbsoluteValue() abs.forward([-1., 0., 1.]) ==> [1., 0., 1.] diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive_impl.py index ae14288393..f51c48d2dd 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive_impl.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive_impl.py @@ -124,17 +124,17 @@ class MaskedAutoregressiveFlow(bijector_lib.Bijector): #### Example Use ```python - ds = tf.contrib.distributions - bs = tf.contrib.distributions.bijectors + tfd = tf.contrib.distributions + tfb = tfd.bijectors dims = 5 # A common choice for a normalizing flow is to use a Gaussian for the base # distribution. (However, any continuous distribution would work.) E.g., - maf = ds.TransformedDistribution( - distribution=ds.Normal(loc=0., scale=1.), - bijector=bs.MaskedAutoregressiveFlow( - shift_and_log_scale_fn=bs.masked_autoregressive_default_template( + maf = tfd.TransformedDistribution( + distribution=tfd.Normal(loc=0., scale=1.), + bijector=tfb.MaskedAutoregressiveFlow( + shift_and_log_scale_fn=tfb.masked_autoregressive_default_template( hidden_layers=[512, 512])), event_shape=[dims]) @@ -143,10 +143,10 @@ class MaskedAutoregressiveFlow(bijector_lib.Bijector): maf.log_prob(0.) # Cheap; no `tf.while_loop` despite no Bijector caching. # [1] also describes an "Inverse Autoregressive Flow", e.g., - iaf = ds.TransformedDistribution( - distribution=ds.Normal(loc=0., scale=1.), - bijector=bs.Invert(bs.MaskedAutoregressiveFlow( - shift_and_log_scale_fn=bs.masked_autoregressive_default_template( + iaf = tfd.TransformedDistribution( + distribution=tfd.Normal(loc=0., scale=1.), + bijector=tfb.Invert(tfb.MaskedAutoregressiveFlow( + shift_and_log_scale_fn=tfb.masked_autoregressive_default_template( hidden_layers=[512, 512]))), event_shape=[dims]) @@ -158,10 +158,10 @@ class MaskedAutoregressiveFlow(bijector_lib.Bijector): # poor choice. Here's an example of using a "shift only" version and with a # different number/depth of hidden layers. shift_only = True - maf_no_scale_hidden2 = ds.TransformedDistribution( - distribution=ds.Normal(loc=0., scale=1.), - bijector=bs.MaskedAutoregressiveFlow( - bs.masked_autoregressive_default_template( + maf_no_scale_hidden2 = tfd.TransformedDistribution( + distribution=tfd.Normal(loc=0., scale=1.), + bijector=tfb.MaskedAutoregressiveFlow( + tfb.masked_autoregressive_default_template( hidden_layers=[32], shift_only=shift_only), is_constant_jacobian=shift_only), diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/permute_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/permute_impl.py index b1d8f2f41b..8654cc39d0 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/permute_impl.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/permute_impl.py @@ -40,9 +40,9 @@ class Permute(bijector_lib.Bijector): """Permutes the rightmost dimension of a `Tensor`. ```python - bs = tf.contrib.distributions.bijectors + tfd = tf.contrib.distributions - reverse = bs.Permute(permutation=[2, 1, 0]) + reverse = tfd.bijectors.Permute(permutation=[2, 1, 0]) reverse.forward([-1., 0., 1.]) # ==> [1., 0., -1] diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py index 1eb8e74fda..55eca06312 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py @@ -63,9 +63,9 @@ class Reshape(bijector_lib.Bijector): Example usage: ```python - bs = tf.contrib.distributions.bijectors + tfd = tf.contrib.distributions - r = bs.Reshape(event_shape_out=[1, -1]) + r = tfd.bijectors.Reshape(event_shape_out=[1, -1]) r.forward([3., 4.]) # shape [2] # ==> [[3., 4.]] # shape [1, 2] diff --git a/tensorflow/contrib/distributions/python/ops/cauchy.py b/tensorflow/contrib/distributions/python/ops/cauchy.py index 8d59c1abfb..6f5d724a2a 100644 --- a/tensorflow/contrib/distributions/python/ops/cauchy.py +++ b/tensorflow/contrib/distributions/python/ops/cauchy.py @@ -43,16 +43,17 @@ class Cauchy(distribution.Distribution): The probability density function (pdf) is, ```none - pdf(x; loc, scale) = 1 / (pi * scale * (1 + ((x - loc) / scale)**2)) + pdf(x; loc, scale) = 1 / (pi scale (1 + z**2)) + z = (x - loc) / scale ``` where `loc` is the location, and `scale` is the scale. The Cauchy distribution is a member of the [location-scale family]( https://en.wikipedia.org/wiki/Location-scale_family), i.e. + `Y ~ Cauchy(loc, scale)` is equivalent to, ```none X ~ Cauchy(loc=0, scale=1) - Y ~ Cauchy(loc=loc, scale=scale) Y = loc + scale * X ``` @@ -61,14 +62,16 @@ class Cauchy(distribution.Distribution): Examples of initialization of one or a batch of distributions. ```python + tfd = tf.contrib.distributions + # Define a single scalar Cauchy distribution. - dist = Cauchy(loc=0., scale=3.) + dist = tfd.Cauchy(loc=0., scale=3.) # Evaluate the cdf at 1, returning a scalar. dist.cdf(1.) # Define a batch of two scalar valued Cauchy distributions. - dist = Cauchy(loc=[1, 2.], scale=[11, 22.]) + dist = tfd.Cauchy(loc=[1, 2.], scale=[11, 22.]) # Evaluate the pdf of the first distribution on 0, and the second on 1.5, # returning a length two tensor. @@ -76,18 +79,17 @@ class Cauchy(distribution.Distribution): # Get 3 samples, returning a 3 x 2 tensor. dist.sample([3]) - ``` - - Arguments are broadcast when possible. - ```python + # Arguments are broadcast when possible. # Define a batch of two scalar valued Cauchy distributions. # Both have median 1, but different scales. - dist = tf.contrib.distributions.Cauchy(loc=1., scale=[11, 22.]) + dist = tfd.Cauchy(loc=1., scale=[11, 22.]) + # Evaluate the pdf of both distributions on the same point, 3.0, # returning a length 2 tensor. - dist.prob(3.0) + dist.prob(3.) ``` + """ def __init__(self, diff --git a/tensorflow/contrib/distributions/python/ops/deterministic.py b/tensorflow/contrib/distributions/python/ops/deterministic.py index 850d08d1bd..8049522e9f 100644 --- a/tensorflow/contrib/distributions/python/ops/deterministic.py +++ b/tensorflow/contrib/distributions/python/ops/deterministic.py @@ -290,8 +290,10 @@ class VectorDeterministic(_BaseDeterministic): #### Examples ```python + tfd = tf.contrib.distributions + # Initialize a single VectorDeterministic supported at [0., 2.] in R^2. - constant = tf.contrib.distributions.Deterministic([0., 2.]) + constant = tfd.Deterministic([0., 2.]) constant.prob([0., 2.]) ==> 1. constant.prob([0., 3.]) @@ -299,7 +301,7 @@ class VectorDeterministic(_BaseDeterministic): # Initialize a [3] batch of constants on R^2. loc = [[0., 1.], [2., 3.], [4., 5.]] - constant = constant_lib.VectorDeterministic(loc) + constant = tfd.VectorDeterministic(loc) constant.prob([[0., 1.], [1.9, 3.], [3.99, 5.]]) ==> [1., 0., 0.] ``` diff --git a/tensorflow/contrib/distributions/python/ops/gumbel.py b/tensorflow/contrib/distributions/python/ops/gumbel.py index ba8d3c639b..d0efaefb8e 100644 --- a/tensorflow/contrib/distributions/python/ops/gumbel.py +++ b/tensorflow/contrib/distributions/python/ops/gumbel.py @@ -62,15 +62,17 @@ class _Gumbel(distribution.Distribution): Examples of initialization of one or a batch of distributions. ```python + tfd = tf.contrib.distributions + # Define a single scalar Gumbel distribution. - dist = tf.contrib.distributions.Gumbel(loc=0., scale=3.) + dist = tfd.Gumbel(loc=0., scale=3.) # Evaluate the cdf at 1, returning a scalar. dist.cdf(1.) # Define a batch of two scalar valued Gumbels. # The first has mean 1 and scale 11, the second 2 and 22. - dist = tf.contrib.distributions.Gumbel(loc=[1, 2.], scale=[11, 22.]) + dist = tfd.Gumbel(loc=[1, 2.], scale=[11, 22.]) # Evaluate the pdf of the first distribution on 0, and the second on 1.5, # returning a length two tensor. @@ -85,7 +87,7 @@ class _Gumbel(distribution.Distribution): ```python # Define a batch of two scalar valued Logistics. # Both have mean 1, but different scales. - dist = tf.contrib.distributions.Gumbel(loc=1., scale=[11, 22.]) + dist = tfd.Gumbel(loc=1., scale=[11, 22.]) # Evaluate the pdf of both distributions on the same point, 3.0, # returning a length 2 tensor. diff --git a/tensorflow/contrib/distributions/python/ops/independent.py b/tensorflow/contrib/distributions/python/ops/independent.py index 6a74ca9a0a..cbce005013 100644 --- a/tensorflow/contrib/distributions/python/ops/independent.py +++ b/tensorflow/contrib/distributions/python/ops/independent.py @@ -68,11 +68,11 @@ class Independent(distribution_lib.Distribution): #### Examples ```python - ds = tf.contrib.distributions + tfd = tf.contrib.distributions # Make independent distribution from a 2-batch Normal. - ind = ds.Independent( - distribution=ds.Normal(loc=[-1., 1], scale=[0.1, 0.5]), + ind = tfd.Independent( + distribution=tfd.Normal(loc=[-1., 1], scale=[0.1, 0.5]), reinterpreted_batch_ndims=1) # All batch dims have been "absorbed" into event dims. @@ -80,8 +80,8 @@ class Independent(distribution_lib.Distribution): ind.event_shape # ==> [2] # Make independent distribution from a 2-batch bivariate Normal. - ind = ds.Independent( - distribution=ds.MultivariateNormalDiag( + ind = tfd.Independent( + distribution=tfd.MultivariateNormalDiag( loc=[[-1., 1], [1, -1]], scale_identity_multiplier=[1., 0.5]), reinterpreted_batch_ndims=1) diff --git a/tensorflow/contrib/distributions/python/ops/inverse_gamma.py b/tensorflow/contrib/distributions/python/ops/inverse_gamma.py index 956dee38a3..ee4d86867d 100644 --- a/tensorflow/contrib/distributions/python/ops/inverse_gamma.py +++ b/tensorflow/contrib/distributions/python/ops/inverse_gamma.py @@ -88,8 +88,9 @@ class InverseGamma(distribution.Distribution): #### Examples ```python - dist = InverseGamma(concentration=3.0, rate=2.0) - dist2 = InverseGamma(concentration=[3.0, 4.0], rate=[2.0, 3.0]) + tfd = tf.contrib.distributions + dist = tfd.InverseGamma(concentration=3.0, rate=2.0) + dist2 = tfd.InverseGamma(concentration=[3.0, 4.0], rate=[2.0, 3.0]) ``` """ diff --git a/tensorflow/contrib/distributions/python/ops/logistic.py b/tensorflow/contrib/distributions/python/ops/logistic.py index 48794a4882..473677f8d9 100644 --- a/tensorflow/contrib/distributions/python/ops/logistic.py +++ b/tensorflow/contrib/distributions/python/ops/logistic.py @@ -60,15 +60,17 @@ class Logistic(distribution.Distribution): Examples of initialization of one or a batch of distributions. ```python + tfd = tf.contrib.distributions + # Define a single scalar Logistic distribution. - dist = tf.contrib.distributions.Logistic(loc=0., scale=3.) + dist = tfd.Logistic(loc=0., scale=3.) # Evaluate the cdf at 1, returning a scalar. dist.cdf(1.) # Define a batch of two scalar valued Logistics. # The first has mean 1 and scale 11, the second 2 and 22. - dist = tf.contrib.distributions.Logistic(loc=[1, 2.], scale=[11, 22.]) + dist = tfd.Logistic(loc=[1, 2.], scale=[11, 22.]) # Evaluate the pdf of the first distribution on 0, and the second on 1.5, # returning a length two tensor. @@ -76,14 +78,11 @@ class Logistic(distribution.Distribution): # Get 3 samples, returning a 3 x 2 tensor. dist.sample([3]) - ``` - Arguments are broadcast when possible. - - ```python + # Arguments are broadcast when possible. # Define a batch of two scalar valued Logistics. # Both have mean 1, but different scales. - dist = tf.contrib.distributions.Logistic(loc=1., scale=[11, 22.]) + dist = tfd.Logistic(loc=1., scale=[11, 22.]) # Evaluate the pdf of both distributions on the same point, 3.0, # returning a length 2 tensor. diff --git a/tensorflow/contrib/distributions/python/ops/mixture.py b/tensorflow/contrib/distributions/python/ops/mixture.py index e676931d91..f2d492f548 100644 --- a/tensorflow/contrib/distributions/python/ops/mixture.py +++ b/tensorflow/contrib/distributions/python/ops/mixture.py @@ -49,13 +49,13 @@ class Mixture(distribution.Distribution): ```python # Create a mixture of two Gaussians: - ds = tf.contrib.distributions + tfd = tf.contrib.distributions mix = 0.3 - bimix_gauss = ds.Mixture( - cat=ds.Categorical(probs=[mix, 1.-mix]), + bimix_gauss = tfd.Mixture( + cat=tfd.Categorical(probs=[mix, 1.-mix]), components=[ - ds.Normal(loc=-1., scale=0.1), - ds.Normal(loc=+1., scale=0.5), + tfd.Normal(loc=-1., scale=0.1), + tfd.Normal(loc=+1., scale=0.5), ]) # Plot the PDF. diff --git a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py index 5558ef0f25..5448918a50 100644 --- a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py +++ b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py @@ -43,15 +43,14 @@ class MixtureSameFamily(distribution.Distribution): #### Examples ```python - import matplotlib.pyplot as plt - ds = tf.contrib.distributions + tfd = tf.contrib.distributions ### Create a mixture of two scalar Gaussians: - gm = ds.MixtureSameFamily( - mixture_distribution=ds.Categorical( + gm = tfd.MixtureSameFamily( + mixture_distribution=tfd.Categorical( probs=[0.3, 0.7]), - components_distribution=ds.Normal( + components_distribution=tfd.Normal( loc=[-1., 1], # One for each component. scale=[0.1, 0.5])) # And same here. @@ -63,14 +62,15 @@ class MixtureSameFamily(distribution.Distribution): # Plot PDF. x = np.linspace(-2., 3., int(1e4), dtype=np.float32) + import matplotlib.pyplot as plt plt.plot(x, gm.prob(x).eval()); ### Create a mixture of two Bivariate Gaussians: - gm = ds.MixtureSameFamily( - mixture_distribution=ds.Categorical( + gm = tfd.MixtureSameFamily( + mixture_distribution=tfd.Categorical( probs=[0.3, 0.7]), - components_distribution=ds.MultivariateNormalDiag( + components_distribution=tfd.MultivariateNormalDiag( loc=[[-1., 1], # component 1 [1, -1]], # component 2 scale_identity_multiplier=[.3, .6])) diff --git a/tensorflow/contrib/distributions/python/ops/mvn_diag.py b/tensorflow/contrib/distributions/python/ops/mvn_diag.py index 163cf75d99..e862552880 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_diag.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_diag.py @@ -84,10 +84,10 @@ class MultivariateNormalDiag( #### Examples ```python - ds = tf.contrib.distributions + tfd = tf.contrib.distributions # Initialize a single 2-variate Gaussian. - mvn = ds.MultivariateNormalDiag( + mvn = tfd.MultivariateNormalDiag( loc=[1., -1], scale_diag=[1, 2.]) @@ -101,7 +101,7 @@ class MultivariateNormalDiag( mvn.prob([-1., 0]).eval() # shape: [] # Initialize a 3-batch, 2-variate scaled-identity Gaussian. - mvn = ds.MultivariateNormalDiag( + mvn = tfd.MultivariateNormalDiag( loc=[1., -1], scale_identity_multiplier=[1, 2., 3]) @@ -119,7 +119,7 @@ class MultivariateNormalDiag( mvn.prob([-1., 0]).eval() # shape: [3] # Initialize a 2-batch of 3-variate Gaussians. - mvn = ds.MultivariateNormalDiag( + mvn = tfd.MultivariateNormalDiag( loc=[[1., 2, 3], [11, 22, 33]] # shape: [2, 3] scale_diag=[[1., 2, 3], diff --git a/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py b/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py index 040bc23072..413e88f03a 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py @@ -86,7 +86,7 @@ class MultivariateNormalDiagPlusLowRank( #### Examples ```python - ds = tf.contrib.distributions + tfd = tf.contrib.distributions # Initialize a single 3-variate Gaussian with covariance `cov = S @ S.T`, # `S = diag(d) + U @ diag(m) @ U.T`. The perturbation, `U @ diag(m) @ U.T`, is @@ -97,7 +97,7 @@ class MultivariateNormalDiagPlusLowRank( [-1, 1], [2, -0.5]] # shape: [3, 2] m = [4., 5] # shape: [2] - mvn = ds.MultivariateNormalDiagPlusLowRank( + mvn = tfd.MultivariateNormalDiagPlusLowRank( loc=mu scale_diag=d scale_perturb_factor=U, @@ -118,7 +118,7 @@ class MultivariateNormalDiagPlusLowRank( m = [[0.1, 0.2], [0.4, 0.5]] # shape: [b, r] = [2, 2] - mvn = ds.MultivariateNormalDiagPlusLowRank( + mvn = tfd.MultivariateNormalDiagPlusLowRank( loc=mu, scale_perturb_factor=U, scale_perturb_diag=m) diff --git a/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py b/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py index f9952b2069..8e69dadfb4 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py @@ -73,14 +73,14 @@ class MultivariateNormalFullCovariance(mvn_tril.MultivariateNormalTriL): #### Examples ```python - ds = tf.contrib.distributions + tfd = tf.contrib.distributions # Initialize a single 3-variate Gaussian. mu = [1., 2, 3] cov = [[ 0.36, 0.12, 0.06], [ 0.12, 0.29, -0.13], [ 0.06, -0.13, 0.26]] - mvn = ds.MultivariateNormalFullCovariance( + mvn = tfd.MultivariateNormalFullCovariance( loc=mu, covariance_matrix=cov) @@ -100,7 +100,7 @@ class MultivariateNormalFullCovariance(mvn_tril.MultivariateNormalTriL): mu = [[1., 2, 3], [11, 22, 33]] # shape: [2, 3] covariance_matrix = ... # shape: [2, 3, 3], symmetric, positive definite. - mvn = ds.MultivariateNormalFullCovariance( + mvn = tfd.MultivariateNormalFullCovariance( loc=mu, covariance=covariance_matrix) diff --git a/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py b/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py index 300bdd5f60..a739979289 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py @@ -90,8 +90,7 @@ class MultivariateNormalLinearOperator( #### Examples ```python - ds = tf.contrib.distributions - la = tf.linalg + tfd = tf.contrib.distributions # Initialize a single 3-variate Gaussian. mu = [1., 2, 3] @@ -103,9 +102,9 @@ class MultivariateNormalLinearOperator( # [ 0.2, 0.5, 0. ], # [ 0.1, -0.3, 0.4]]) - mvn = ds.MultivariateNormalLinearOperator( + mvn = tfd.MultivariateNormalLinearOperator( loc=mu, - scale=la.LinearOperatorLowerTriangular(scale)) + scale=tf.linalg.LinearOperatorLowerTriangular(scale)) # Covariance agrees with cholesky(cov) parameterization. mvn.covariance().eval() @@ -122,9 +121,9 @@ class MultivariateNormalLinearOperator( scale_diag = [[1., 2, 3], [0.5, 1, 1.5]] # shape: [2, 3] - mvn = ds.MultivariateNormalLinearOperator( + mvn = tfd.MultivariateNormalLinearOperator( loc=mu, - scale=la.LinearOperatorDiag(scale_diag)) + scale=tf.linalg.LinearOperatorDiag(scale_diag)) # Compute the pdf of two `R^3` observations; return a length-2 vector. x = [[-0.9, 0, 0.1], diff --git a/tensorflow/contrib/distributions/python/ops/mvn_tril.py b/tensorflow/contrib/distributions/python/ops/mvn_tril.py index 260dcc18f5..6c7dc4ca7a 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_tril.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_tril.py @@ -76,12 +76,13 @@ class MultivariateNormalTriL( ``` Trainable (batch) lower-triangular matrices can be created with - `ds.matrix_diag_transform()` and/or `ds.fill_triangular()` + `tf.contrib.distributions.matrix_diag_transform()` and/or + `tf.contrib.distributions.fill_triangular()` #### Examples ```python - ds = tf.contrib.distributions + tfd = tf.contrib.distributions # Initialize a single 3-variate Gaussian. mu = [1., 2, 3] @@ -92,7 +93,7 @@ class MultivariateNormalTriL( # ==> [[ 0.6, 0. , 0. ], # [ 0.2, 0.5, 0. ], # [ 0.1, -0.3, 0.4]]) - mvn = ds.MultivariateNormalTriL( + mvn = tfd.MultivariateNormalTriL( loc=mu, scale_tril=scale) @@ -112,7 +113,7 @@ class MultivariateNormalTriL( mu = [[1., 2, 3], [11, 22, 33]] # shape: [2, 3] tril = ... # shape: [2, 3, 3], lower triangular, non-zero diagonal. - mvn = ds.MultivariateNormalTriL( + mvn = tfd.MultivariateNormalTriL( loc=mu, scale_tril=tril) @@ -124,9 +125,9 @@ class MultivariateNormalTriL( # Instantiate a "learnable" MVN. dims = 4 with tf.variable_scope("model"): - mvn = ds.MultivariateNormalTriL( + mvn = tfd.MultivariateNormalTriL( loc=tf.get_variable(shape=[dims], dtype=tf.float32, name="mu"), - scale_tril=ds.fill_triangular( + scale_tril=tfd.fill_triangular( tf.get_variable(shape=[dims * (dims + 1) / 2], dtype=tf.float32, name="chol_Sigma"))) ``` diff --git a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py index 8a95038a3c..96dff85665 100644 --- a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py +++ b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py @@ -107,10 +107,11 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution): #### Examples ```python - ds = tf.contrib.distributions + tfd = tf.contrib.distributions + # Create two batches of PoissonLogNormalQuadratureCompounds, one with # prior `loc = 0.` and another with `loc = 1.` In both cases `scale = 1.` - pln = ds.PoissonLogNormalQuadratureCompound( + pln = tfd.PoissonLogNormalQuadratureCompound( loc=[0., -0.5], scale=1., quadrature_grid_and_probs=( diff --git a/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py b/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py index b05f15771a..c4b8f055b7 100644 --- a/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py +++ b/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py @@ -115,7 +115,7 @@ class SinhArcsinh(transformed_distribution.TransformedDistribution): tailweight: Tailweight parameter. Default is `1.0` (unchanged tailweight) distribution: `tf.Distribution`-like instance. Distribution that is transformed to produce this distribution. - Default is `ds.Normal(0., 1.)`. + Default is `tf.distributions.Normal(0., 1.)`. Must be a scalar-batch, scalar-event distribution. Typically `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is a function of non-trainable parameters. WARNING: If you backprop through diff --git a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py index 92043d6a08..904724af42 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py +++ b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py @@ -188,8 +188,7 @@ class VectorDiffeomixture(distribution_lib.Distribution): #### Examples ```python - ds = tf.contrib.distributions - la = tf.linalg + tfd = tf.contrib.distributions # Create two batches of VectorDiffeomixtures, one with mix_loc=[0.] and # another with mix_loc=[1]. In both cases, `K=2` and the affine @@ -197,20 +196,20 @@ class VectorDiffeomixture(distribution_lib.Distribution): # k=0: loc=zeros(dims) scale=LinearOperatorScaledIdentity # k=1: loc=[2.]*dims scale=LinOpDiag dims = 5 - vdm = ds.VectorDiffeomixture( + vdm = tfd.VectorDiffeomixture( mix_loc=[[0.], [1]], mix_scale=[1.], - distribution=ds.Normal(loc=0., scale=1.), + distribution=tfd.Normal(loc=0., scale=1.), loc=[ None, # Equivalent to `np.zeros(dims, dtype=np.float32)`. np.float32([2.]*dims), ], scale=[ - la.LinearOperatorScaledIdentity( + tf.linalg.LinearOperatorScaledIdentity( num_rows=dims, multiplier=np.float32(1.1), is_positive_definite=True), - la.LinearOperatorDiag( + tf.linalg.LinearOperatorDiag( diag=np.linspace(2.5, 3.5, dims, dtype=np.float32), is_positive_definite=True), ], diff --git a/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py b/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py index 356d78b67a..526fe2d39a 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py +++ b/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py @@ -89,14 +89,13 @@ class VectorExponentialDiag( #### Examples ```python - ds = tf.contrib.distributions - la = tf.linalg + tfd = tf.contrib.distributions # Initialize a single 2-variate VectorExponential, supported on # {(x, y) in R^2 : x > 0, y > 0}. # The first component has pdf exp{-x}, the second 0.5 exp{-x / 2} - vex = ds.VectorExponentialDiag(scale_diag=[1., 2.]) + vex = tfd.VectorExponentialDiag(scale_diag=[1., 2.]) # Compute the pdf of an`R^2` observation; return a scalar. vex.prob([3., 4.]).eval() # shape: [] @@ -107,7 +106,7 @@ class VectorExponentialDiag( scale_diag = [[1., 2, 3], [0.5, 1, 1.5]] # shape: [2, 3] - vex = ds.VectorExponentialDiag(loc, scale_diag) + vex = tfd.VectorExponentialDiag(loc, scale_diag) # Compute the pdf of two `R^3` observations; return a length-2 vector. x = [[1.9, 2.2, 3.1], diff --git a/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py b/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py index b313a851b3..9d5fd9ac41 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py +++ b/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py @@ -107,16 +107,15 @@ class VectorExponentialLinearOperator( #### Examples ```python - ds = tf.contrib.distributions - la = tf.linalg + tfd = tf.contrib.distributions # Initialize a single 2-variate VectorExponential, supported on # {(x, y) in R^2 : x > 0, y > 0}. mat = [[1.0, 0.1], [0.1, 1.0]] - vex = ds.VectorExponentialLinearOperator( - scale=la.LinearOperatorFullMatrix(mat)) + vex = tfd.VectorExponentialLinearOperator( + scale=tf.linalg.LinearOperatorFullMatrix(mat)) # Compute the pdf of an`R^2` observation; return a scalar. vex.prob([1., 2.]).eval() # shape: [] @@ -127,9 +126,9 @@ class VectorExponentialLinearOperator( scale_diag = [[1., 2, 3], [0.5, 1, 1.5]] # shape: [2, 3] - vex = ds.VectorExponentialLinearOperator( + vex = tfd.VectorExponentialLinearOperator( loc=mu, - scale=la.LinearOperatorDiag(scale_diag)) + scale=tf.linalg.LinearOperatorDiag(scale_diag)) # Compute the pdf of two `R^3` observations; return a length-2 vector. x = [[1.9, 2.2, 3.1], diff --git a/tensorflow/contrib/distributions/python/ops/vector_laplace_diag.py b/tensorflow/contrib/distributions/python/ops/vector_laplace_diag.py index 0e3867809a..8dd983b750 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_laplace_diag.py +++ b/tensorflow/contrib/distributions/python/ops/vector_laplace_diag.py @@ -101,10 +101,10 @@ class VectorLaplaceDiag( #### Examples ```python - ds = tf.contrib.distributions + tfd = tf.contrib.distributions # Initialize a single 2-variate VectorLaplace. - vla = ds.VectorLaplaceDiag( + vla = tfd.VectorLaplaceDiag( loc=[1., -1], scale_diag=[1, 2.]) @@ -118,7 +118,7 @@ class VectorLaplaceDiag( vla.prob([-1., 0]).eval() # shape: [] # Initialize a 3-batch, 2-variate scaled-identity VectorLaplace. - vla = ds.VectorLaplaceDiag( + vla = tfd.VectorLaplaceDiag( loc=[1., -1], scale_identity_multiplier=[1, 2., 3]) @@ -136,7 +136,7 @@ class VectorLaplaceDiag( vla.prob([-1., 0]).eval() # shape: [3] # Initialize a 2-batch of 3-variate VectorLaplace's. - vla = ds.VectorLaplaceDiag( + vla = tfd.VectorLaplaceDiag( loc=[[1., 2, 3], [11, 22, 33]] # shape: [2, 3] scale_diag=[[1., 2, 3], diff --git a/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py b/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py index c7abdbb4ca..ec485c95c1 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py +++ b/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py @@ -109,8 +109,7 @@ class VectorLaplaceLinearOperator( #### Examples ```python - ds = tf.contrib.distributions - la = tf.linalg + tfd = tf.contrib.distributions # Initialize a single 3-variate VectorLaplace with some desired covariance. mu = [1., 2, 3] @@ -124,9 +123,9 @@ class VectorLaplaceLinearOperator( # [ 0.1, -0.3, 0.4]]) # Divide scale by sqrt(2) so that the final covariance will be what we want. - vla = ds.VectorLaplaceLinearOperator( + vla = tfd.VectorLaplaceLinearOperator( loc=mu, - scale=la.LinearOperatorLowerTriangular(scale / tf.sqrt(2))) + scale=tf.linalg.LinearOperatorLowerTriangular(scale / tf.sqrt(2.))) # Covariance agrees with cholesky(cov) parameterization. vla.covariance().eval() @@ -143,9 +142,9 @@ class VectorLaplaceLinearOperator( scale_diag = [[1., 2, 3], [0.5, 1, 1.5]] # shape: [2, 3] - vla = ds.VectorLaplaceLinearOperator( + vla = tfd.VectorLaplaceLinearOperator( loc=mu, - scale=la.LinearOperatorDiag(scale_diag)) + scale=tf.linalg.LinearOperatorDiag(scale_diag)) # Compute the pdf of two `R^3` observations; return a length-2 vector. x = [[-0.9, 0, 0.1], diff --git a/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py b/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py index 544a871070..e1ccf11645 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py +++ b/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py @@ -143,7 +143,7 @@ class VectorSinhArcsinhDiag(transformed_distribution.TransformedDistribution): broadcastable with `event_shape`. distribution: `tf.Distribution`-like instance. Distribution from which `k` iid samples are used as input to transformation `F`. Default is - `ds.Normal(0., 1.)`. + `tf.distributions.Normal(loc=0., scale=1.)`. Must be a scalar-batch, scalar-event distribution. Typically `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is a function of non-trainable parameters. WARNING: If you backprop through diff --git a/tensorflow/contrib/distributions/python/ops/vector_student_t.py b/tensorflow/contrib/distributions/python/ops/vector_student_t.py index 29d41ab81c..8c67647a61 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_student_t.py +++ b/tensorflow/contrib/distributions/python/ops/vector_student_t.py @@ -91,14 +91,14 @@ class _VectorStudentT(transformed_distribution.TransformedDistribution): Extra leading dimensions, if provided, allow for batches. ```python - ds = tf.contrib.distributions + tfd = tf.contrib.distributions # Initialize a single 3-variate vector Student's t-distribution. mu = [1., 2, 3] chol = [[1., 0, 0.], [1, 3, 0], [1, 2, 3]] - vt = ds.VectorStudentT(df=2, loc=mu, scale_tril=chol) + vt = tfd.VectorStudentT(df=2, loc=mu, scale_tril=chol) # Evaluate this on an observation in R^3, returning a scalar. vt.prob([-1., 0, 1]) @@ -107,7 +107,7 @@ class _VectorStudentT(transformed_distribution.TransformedDistribution): mu = [[1., 2, 3], [11, 22, 33]] chol = ... # shape 2 x 3 x 3, lower triangular, positive diagonal. - vt = ds.VectorStudentT(loc=mu, scale_tril=chol) + vt = tfd.VectorStudentT(loc=mu, scale_tril=chol) # Evaluate this on a two observations, each in R^3, returning a length two # tensor. -- GitLab From cb4ef362e4a18b3c42a2c90bdad8754d5ead4caf Mon Sep 17 00:00:00 2001 From: Yangzihao Wang Date: Wed, 29 Nov 2017 16:38:32 -0800 Subject: [PATCH 0975/1801] Add native dilated support for conv2d and its gradients in cudnn v>=6. PiperOrigin-RevId: 177382431 --- .../compiler/tf2xla/kernels/conv_ops.cc | 57 +++ .../fused_conv2d_bias_activation_op.cc | 2 + .../fused_conv/kernels/fused_conv_ops_gpu.h | 9 +- .../ops/fused_conv2d_bias_activation_op.cc | 6 + .../api_def/base_api/api_def_Conv2D.pbtxt | 12 +- .../api_def_Conv2DBackpropFilter.pbtxt | 10 + .../api_def_Conv2DBackpropInput.pbtxt | 10 + .../api_def/base_api/api_def_Conv3D.pbtxt | 10 + .../api_def_Conv3DBackpropFilterV2.pbtxt | 10 + .../api_def_Conv3DBackpropInputV2.pbtxt | 10 + .../api_def_DepthwiseConv2dNative.pbtxt | 10 + ..._DepthwiseConv2dNativeBackpropFilter.pbtxt | 10 + ...f_DepthwiseConv2dNativeBackpropInput.pbtxt | 10 + .../base_api/api_def_QuantizedConv2D.pbtxt | 10 + tensorflow/core/framework/common_shape_fns.cc | 23 +- .../core/framework/common_shape_fns_test.cc | 106 ++++- .../core/kernels/conv_grad_filter_ops.cc | 93 +++- .../core/kernels/conv_grad_input_ops.cc | 97 ++++- tensorflow/core/kernels/conv_grad_ops.h | 16 +- tensorflow/core/kernels/conv_grad_ops_3d.cc | 4 + tensorflow/core/kernels/conv_ops.cc | 113 +++-- tensorflow/core/kernels/conv_ops.h | 10 +- tensorflow/core/kernels/conv_ops_3d.cc | 3 + tensorflow/core/kernels/conv_ops_gpu.h | 12 +- tensorflow/core/kernels/conv_ops_test.cc | 4 + tensorflow/core/kernels/depthwise_conv_op.cc | 5 +- tensorflow/core/kernels/quantized_conv_ops.cc | 13 + tensorflow/core/ops/nn_ops.cc | 80 +++- .../conv2d_backprop_filter_grad_test.py | 54 ++- .../python/kernel_tests/conv_ops_test.py | 407 +++++++++++++++++- tensorflow/python/ops/nn_grad.py | 90 ++-- tensorflow/python/ops/nn_ops.py | 28 +- .../tools/api/golden/tensorflow.nn.pbtxt | 18 +- 33 files changed, 1181 insertions(+), 171 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc index c5017704e2..c150394c07 100644 --- a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc @@ -121,6 +121,7 @@ class ConvOp : public XlaOpKernel { : XlaOpKernel(ctx), num_spatial_dims_(num_spatial_dims), depthwise_(depthwise) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("dilations", &dilations_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("strides", &strides_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("padding", &padding_)); @@ -144,6 +145,23 @@ class ConvOp : public XlaOpKernel { errors::Unimplemented("Current implementation does not yet support " "strides in the batch and depth dimensions.")); + OP_REQUIRES(ctx, dilations_.size() == num_dims(), + errors::InvalidArgument("Dilations field must " + "specify ", + num_dims(), " dimensions")); + OP_REQUIRES( + ctx, dilations_[batch_dim] == 1 && dilations_[feature_dim] == 1, + errors::Unimplemented("Current implementation does not yet support " + "dilations in the batch and depth dimensions.")); + for (int i = 0; i < num_spatial_dims_; ++i) { + int input_dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i); + OP_REQUIRES( + ctx, dilations_[input_dim] == 1, + errors::Unimplemented("Current implementation does not yet support " + "dilations in the ", + i, "th spatial dimension.")); + } + const TensorShape input_shape = ctx->InputShape(0); // Input filter is of the following dimensions: // [ filter_rows, filter_cols, ..., in_depth, out_depth] @@ -204,6 +222,7 @@ class ConvOp : public XlaOpKernel { protected: const int num_spatial_dims_; const bool depthwise_; + std::vector dilations_; std::vector strides_; Padding padding_; TensorFormat data_format_ = FORMAT_NHWC; @@ -241,6 +260,7 @@ class ConvBackpropInputOp : public XlaOpKernel { : XlaOpKernel(ctx), num_spatial_dims_(num_spatial_dims), depthwise_(depthwise) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("dilations", &dilations_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("strides", &strides_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("padding", &padding_)); string data_format; @@ -263,6 +283,23 @@ class ConvBackpropInputOp : public XlaOpKernel { errors::Unimplemented("Current implementation does not yet support " "strides in the batch and depth dimensions.")); + OP_REQUIRES(ctx, dilations_.size() == num_dims(), + errors::InvalidArgument("Dilations field must " + "specify ", + num_dims(), " dimensions")); + OP_REQUIRES( + ctx, dilations_[batch_dim] == 1 && dilations_[feature_dim] == 1, + errors::Unimplemented("Current implementation does not yet support " + "dilations in the batch and depth dimensions.")); + for (int i = 0; i < num_spatial_dims_; ++i) { + int input_dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i); + OP_REQUIRES( + ctx, dilations_[input_dim] == 1, + errors::Unimplemented("Current implementation does not yet support " + "dilations in the ", + i, "th spatial dimension.")); + } + TensorShape input_shape; OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(0, &input_shape)); @@ -336,6 +373,7 @@ class ConvBackpropInputOp : public XlaOpKernel { protected: const int num_spatial_dims_; const bool depthwise_; + std::vector dilations_; std::vector strides_; Padding padding_; TensorFormat data_format_ = FORMAT_NHWC; @@ -373,6 +411,7 @@ class ConvBackpropFilterOp : public XlaOpKernel { : XlaOpKernel(ctx), num_spatial_dims_(num_spatial_dims), depthwise_(depthwise) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("dilations", &dilations_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("strides", &strides_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("padding", &padding_)); string data_format; @@ -392,6 +431,23 @@ class ConvBackpropFilterOp : public XlaOpKernel { errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); + OP_REQUIRES(ctx, dilations_.size() == num_dims(), + errors::InvalidArgument("Dilations field must " + "specify ", + num_dims(), " dimensions")); + OP_REQUIRES( + ctx, dilations_[n_dim] == 1 && dilations_[c_dim] == 1, + errors::Unimplemented("Current implementation does not yet support " + "dilations in the batch and depth dimensions.")); + for (int i = 0; i < num_spatial_dims_; ++i) { + int input_dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i); + OP_REQUIRES( + ctx, dilations_[input_dim] == 1, + errors::Unimplemented("Current implementation does not yet support " + "dilations in the ", + i, "th spatial dimension.")); + } + const TensorShape activations_shape = ctx->InputShape(0); TensorShape filter_shape; OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(1, &filter_shape)); @@ -526,6 +582,7 @@ class ConvBackpropFilterOp : public XlaOpKernel { protected: const int num_spatial_dims_; const bool depthwise_; + std::vector dilations_; std::vector strides_; Padding padding_; TensorFormat data_format_ = FORMAT_NHWC; diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc index 88306094ab..5fec69ea43 100644 --- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc +++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc @@ -493,6 +493,8 @@ void LaunchFusedConv2DBiasActivationOp:: {{conv_input_rows, conv_input_cols}}, output_depth, {{filter_rows, filter_cols}}, + // TODO(yangzihao): Add support for arbitrary dilations for fused conv. + {{1, 1}}, // dilation_rows, dilation_cols {{row_stride, col_stride}}, {{padding_rows, padding_cols}}, conv_input->dtype(), diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h b/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h index dc43af1158..fa7a3c03aa 100644 --- a/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h +++ b/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h @@ -30,11 +30,12 @@ class FusedConvParameters : public ConvParameters { public: FusedConvParameters(int64 batch, int64 in_depths, const SpatialArray& in, int64 out_depths, const SpatialArray& filter, - const SpatialArray& stride, const SpatialArray& padding, - DataType dtype, int device_id, bool has_side_input, + const SpatialArray& dilation, const SpatialArray& stride, + const SpatialArray& padding, DataType dtype, + int device_id, bool has_side_input, ActivationMode activation_mode) - : ConvParameters(batch, in_depths, in, out_depths, filter, stride, - padding, dtype, device_id), + : ConvParameters(batch, in_depths, in, out_depths, filter, dilation, + stride, padding, dtype, device_id), activation_mode_(activation_mode), has_side_input_(has_side_input) { hash_code_ = Hash64Combine(hash_code_, has_side_input); diff --git a/tensorflow/contrib/fused_conv/ops/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/ops/fused_conv2d_bias_activation_op.cc index 887ebc5a6c..6a56237f67 100644 --- a/tensorflow/contrib/fused_conv/ops/fused_conv2d_bias_activation_op.cc +++ b/tensorflow/contrib/fused_conv/ops/fused_conv2d_bias_activation_op.cc @@ -52,6 +52,7 @@ REGISTER_OP("FusedConv2DBiasActivation") .Attr("data_format: {'NHWC', 'NCHW', 'NCHW_VECT_C'} = 'NHWC'") .Attr("filter_format: {'HWIO', 'OIHW', 'OIHW_VECT_I'} = 'HWIO'") .Attr("activation_mode: {'Relu'} = 'Relu'") + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](shape_inference::InferenceContext* c) { using shape_inference::ShapeHandle; using shape_inference::DimensionHandle; @@ -151,6 +152,11 @@ REGISTER_OP("FusedConv2DBiasActivation") kernel_height, kernel_width, input_channels % 4 ]` activation_mode: The activation applied to the output. Currently must be "Relu". + dilations: 1-D tensor of length 4. The dilation factor for each dimension + of `input`. If set to k > 1, there will be k-1 skipped cells between + each filter element on that dimension. The dimension order is determined + by the value of `data_format`, see above for details. Dilations in the + batch and depth dimensions must be 1. )doc"); } // namespace tensorflow diff --git a/tensorflow/core/api_def/base_api/api_def_Conv2D.pbtxt b/tensorflow/core/api_def/base_api/api_def_Conv2D.pbtxt index 6522ce976f..070d6adb97 100644 --- a/tensorflow/core/api_def/base_api/api_def_Conv2D.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Conv2D.pbtxt @@ -26,7 +26,7 @@ END description: < 1, there will be k-1 skipped cells between each +filter element on that dimension. The dimension order is determined by the +value of `data_format`, see above for details. Dilations in the batch and +depth dimensions must be 1. END } summary: "Computes a 2-D convolution given 4-D `input` and `filter` tensors." diff --git a/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropFilter.pbtxt b/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropFilter.pbtxt index 4ea3374dbb..ff2d9d71db 100644 --- a/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropFilter.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropFilter.pbtxt @@ -51,6 +51,16 @@ default format "NHWC", the data is stored in the order of: [batch, in_height, in_width, in_channels]. Alternatively, the format could be "NCHW", the data storage order of: [batch, in_channels, in_height, in_width]. +END + } + attr { + name: "dilations" + description: < 1, there will be k-1 skipped cells between each filter +element on that dimension. The dimension order is determined by the value of +`data_format`, see above for details. Dilations in the batch and depth +dimensions must be 1. END } summary: "Computes the gradients of convolution with respect to the filter." diff --git a/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropInput.pbtxt b/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropInput.pbtxt index 4420073e38..2de38b4263 100644 --- a/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropInput.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropInput.pbtxt @@ -50,6 +50,16 @@ default format "NHWC", the data is stored in the order of: [batch, in_height, in_width, in_channels]. Alternatively, the format could be "NCHW", the data storage order of: [batch, in_channels, in_height, in_width]. +END + } + attr { + name: "dilations" + description: < 1, there will be k-1 skipped cells between each filter +element on that dimension. The dimension order is determined by the value of +`data_format`, see above for details. Dilations in the batch and depth +dimensions must be 1. END } summary: "Computes the gradients of convolution with respect to the input." diff --git a/tensorflow/core/api_def/base_api/api_def_Conv3D.pbtxt b/tensorflow/core/api_def/base_api/api_def_Conv3D.pbtxt index 8f3cd4493c..d26564097e 100644 --- a/tensorflow/core/api_def/base_api/api_def_Conv3D.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Conv3D.pbtxt @@ -34,6 +34,16 @@ default format "NDHWC", the data is stored in the order of: [batch, in_depth, in_height, in_width, in_channels]. Alternatively, the format could be "NCDHW", the data storage order is: [batch, in_channels, in_depth, in_height, in_width]. +END + } + attr { + name: "dilations" + description: < 1, there will be k-1 skipped cells between each +filter element on that dimension. The dimension order is determined by the +value of `data_format`, see above for details. Dilations in the batch and +depth dimensions must be 1. END } summary: "Computes a 3-D convolution given 5-D `input` and `filter` tensors." diff --git a/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropFilterV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropFilterV2.pbtxt index 6f9b917237..937c9c8ead 100644 --- a/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropFilterV2.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropFilterV2.pbtxt @@ -43,6 +43,16 @@ default format "NDHWC", the data is stored in the order of: [batch, in_depth, in_height, in_width, in_channels]. Alternatively, the format could be "NCDHW", the data storage order is: [batch, in_channels, in_depth, in_height, in_width]. +END + } + attr { + name: "dilations" + description: < 1, there will be k-1 skipped cells between each +filter element on that dimension. The dimension order is determined by the +value of `data_format`, see above for details. Dilations in the batch and +depth dimensions must be 1. END } summary: "Computes the gradients of 3-D convolution with respect to the filter." diff --git a/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropInputV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropInputV2.pbtxt index 19aba156d5..414e418dc5 100644 --- a/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropInputV2.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropInputV2.pbtxt @@ -43,6 +43,16 @@ default format "NDHWC", the data is stored in the order of: [batch, in_depth, in_height, in_width, in_channels]. Alternatively, the format could be "NCDHW", the data storage order is: [batch, in_channels, in_depth, in_height, in_width]. +END + } + attr { + name: "dilations" + description: < 1, there will be k-1 skipped cells between each +filter element on that dimension. The dimension order is determined by the +value of `data_format`, see above for details. Dilations in the batch and +depth dimensions must be 1. END } summary: "Computes the gradients of 3-D convolution with respect to the input." diff --git a/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNative.pbtxt b/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNative.pbtxt index cc10ebe923..3c313f7be6 100644 --- a/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNative.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNative.pbtxt @@ -21,6 +21,16 @@ default format "NHWC", the data is stored in the order of: [batch, height, width, channels]. Alternatively, the format could be "NCHW", the data storage order of: [batch, channels, height, width]. +END + } + attr { + name: "dilations" + description: < 1, there will be k-1 skipped cells between each filter +element on that dimension. The dimension order is determined by the value of +`data_format`, see above for details. Dilations in the batch and depth +dimensions must be 1. END } summary: "Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors." diff --git a/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropFilter.pbtxt b/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropFilter.pbtxt index 9126be2afa..e66aa3b707 100644 --- a/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropFilter.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropFilter.pbtxt @@ -54,6 +54,16 @@ default format "NHWC", the data is stored in the order of: [batch, height, width, channels]. Alternatively, the format could be "NCHW", the data storage order of: [batch, channels, height, width]. +END + } + attr { + name: "dilations" + description: < 1, there will be k-1 skipped cells between each filter +element on that dimension. The dimension order is determined by the value of +`data_format`, see above for details. Dilations in the batch and depth +dimensions must be 1. END } summary: "Computes the gradients of depthwise convolution with respect to the filter." diff --git a/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropInput.pbtxt b/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropInput.pbtxt index f1d16858db..f501ad21b3 100644 --- a/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropInput.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropInput.pbtxt @@ -54,6 +54,16 @@ default format "NHWC", the data is stored in the order of: [batch, height, width, channels]. Alternatively, the format could be "NCHW", the data storage order of: [batch, channels, height, width]. +END + } + attr { + name: "dilations" + description: < 1, there will be k-1 skipped cells between each filter +element on that dimension. The dimension order is determined by the value of +`data_format`, see above for details. Dilations in the batch and depth +dimensions must be 1. END } summary: "Computes the gradients of depthwise convolution with respect to the input." diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizedConv2D.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2D.pbtxt index b19bbeab12..d18bafdce9 100644 --- a/tensorflow/core/api_def/base_api/api_def_QuantizedConv2D.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2D.pbtxt @@ -53,6 +53,16 @@ END name: "padding" description: < 1, there will be k-1 skipped cells between each +filter element on that dimension. The dimension order is determined by the +value of `data_format`, see above for details. Dilations in the batch and +depth dimensions must be 1. END } summary: "Computes a 2D convolution given quantized 4D input and filter tensors." diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc index be7f2e2808..036e3473b1 100644 --- a/tensorflow/core/framework/common_shape_fns.cc +++ b/tensorflow/core/framework/common_shape_fns.cc @@ -397,6 +397,15 @@ Status Conv2DShape(shape_inference::InferenceContext* c) { TF_RETURN_IF_ERROR( CheckFormatConstraintsOnShape(data_format, filter_shape, "filter", c)); + std::vector dilations; + TF_RETURN_IF_ERROR(c->GetAttr("dilations", &dilations)); + + if (dilations.size() != 4) { + return errors::InvalidArgument( + "Conv2D requires the dilation attribute to contain 4 values, but got: ", + dilations.size()); + } + std::vector strides; TF_RETURN_IF_ERROR(c->GetAttr("strides", &strides)); @@ -410,6 +419,8 @@ Status Conv2DShape(shape_inference::InferenceContext* c) { const int32 stride_rows = GetTensorDim(strides, data_format, 'H'); const int32 stride_cols = GetTensorDim(strides, data_format, 'W'); + const int32 dilation_rows = GetTensorDim(dilations, data_format, 'H'); + const int32 dilation_cols = GetTensorDim(dilations, data_format, 'W'); DimensionHandle batch_size_dim; DimensionHandle input_depth_dim; @@ -447,12 +458,12 @@ Status Conv2DShape(shape_inference::InferenceContext* c) { TF_RETURN_IF_ERROR(c->GetAttr("padding", &padding)); DimensionHandle output_rows, output_cols; - TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDims(c, input_spatial_dims[0], - filter_rows_dim, stride_rows, - padding, &output_rows)); - TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDims(c, input_spatial_dims[1], - filter_cols_dim, stride_cols, - padding, &output_cols)); + TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDimsV2( + c, input_spatial_dims[0], filter_rows_dim, dilation_rows, stride_rows, + padding, &output_rows)); + TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDimsV2( + c, input_spatial_dims[1], filter_cols_dim, dilation_cols, stride_cols, + padding, &output_cols)); ShapeHandle output_shape; TF_RETURN_IF_ERROR( diff --git a/tensorflow/core/framework/common_shape_fns_test.cc b/tensorflow/core/framework/common_shape_fns_test.cc index ec9746b2af..5f3e5ad457 100644 --- a/tensorflow/core/framework/common_shape_fns_test.cc +++ b/tensorflow/core/framework/common_shape_fns_test.cc @@ -423,6 +423,15 @@ TEST(CommonShapeFnsTest, Conv2DShapeTest) { .Finalize(&op.node_def)); }; + // Invalid rank for input + INFER_ERROR("must be rank 4", op, "[4,4];[2,1,1,1]"); + // Invalid rank for filter + INFER_ERROR("must be rank 4", op, "[1,4,4,1];[2,1,1]"); + + // Invalid value for strides + set_op({{1, 1, 0, 1}}, "VALID", "NHWC", "HWIO"); + INFER_ERROR("must be > 0", op, "[1,2,2,1];[1,1,1,1]"); + // 1x1 filter set_op({{1, 1, 1, 1}}, "VALID", "NHWC", "HWIO"); INFER_OK(op, "[1,2,2,1];[1,1,1,1]", "[d0_0,2,2,d1_3]"); @@ -443,11 +452,6 @@ TEST(CommonShapeFnsTest, Conv2DShapeTest) { set_op({{1, 1, 2, 1}}, "VALID", "NHWC", "HWIO"); INFER_OK(op, "[1,4,4,1];[2,1,1,1]", "[d0_0,3,2,d1_3]"); - // Invalid rank for input - INFER_ERROR("must be rank 4", op, "[4,4];[2,1,1,1]"); - // Invalid rank for filter - INFER_ERROR("must be rank 4", op, "[1,4,4,1];[2,1,1]"); - // Unknown dims in the critical fields lead to partial inference. INFER_OK(op, "[1,4,4,1];[2,1,1,1]", "[d0_0,3,2,d1_3]"); INFER_OK(op, "[1,?,4,1];[2,1,1,1]", "[d0_0,?,2,d1_3]"); @@ -538,6 +542,98 @@ TEST(CommonShapeFnsTest, Conv2DShapeTest) { INFER_OK(op, "[1,4,4,?];[?,?,?,?]", "[d0_0,2,2,d1_3]"); } +TEST(CommonShapeFnsTest, Conv2DDilatedShapeTest) { + ShapeInferenceTestOp op("Conv2D"); + auto set_op = [&op](const std::vector& dilations, + const std::vector& strides, const string& padding, + const string& data_format) { + TF_CHECK_OK(NodeDefBuilder("test", "Conv2D") + .Input("input", 0, DT_FLOAT) + .Input("filter", 0, DT_FLOAT) + .Attr("dilations", dilations) + .Attr("strides", strides) + .Attr("padding", padding) + .Attr("data_format", data_format) + .Finalize(&op.node_def)); + }; + + // Invalid rank for dilation + set_op({{1, 2, 1}}, {{1, 1, 1, 1}}, "VALID", "NHWC"); + INFER_ERROR("contain 4 values", op, "[1,2,2,1];[1,1,1,1]"); + + // Invalid value for dilation + set_op({{1, 0, 1, 1}}, {{1, 1, 1, 1}}, "VALID", "NHWC"); + INFER_ERROR("must be >= 1", op, "[1,2,2,1];[1,1,1,1]"); + + // Tests for NHWC + // 1x1 filter, 2x1 dilations, 1x1 strides + set_op({{1, 2, 1, 1}}, {{1, 1, 1, 1}}, "VALID", "NHWC"); + INFER_OK(op, "[1,2,2,1];[1,1,1,1]", "[d0_0,2,2,d1_3]"); + + // 1x1 filter, 2x1 dilations, 2x1 strides + set_op({{1, 2, 1, 1}}, {{1, 2, 1, 1}}, "VALID", "NHWC"); + INFER_OK(op, "[1,4,4,1];[1,1,1,1]", "[d0_0,2,4,d1_3]"); + + // 1x1 filter, 2x1 dilations, 2x2 strides + set_op({{1, 2, 1, 1}}, {{1, 2, 2, 1}}, "VALID", "NHWC"); + INFER_OK(op, "[1,4,4,1];[1,1,1,1]", "[d0_0,2,2,d1_3]"); + + // 3x3 filter, 2x1 dilations, 1x1 strides + set_op({{1, 2, 1, 1}}, {{1, 1, 1, 1}}, "VALID", "NHWC"); + INFER_OK(op, "[1,5,5,1];[3,3,1,1]", "[d0_0,1,3,d1_3]"); + + // 3x3 filter, 2x1 dilations, 2x1 strides + set_op({{1, 2, 1, 1}}, {{1, 2, 1, 1}}, "VALID", "NHWC"); + INFER_OK(op, "[1,5,5,1];[3,3,1,1]", "[d0_0,1,3,d1_3]"); + + // 3x3 filter, 1x2 dilations, 2x2 strides + set_op({{1, 1, 2, 1}}, {{1, 2, 2, 1}}, "VALID", "NHWC"); + INFER_OK(op, "[1,5,5,1];[3,3,1,1]", "[d0_0,2,1,d1_3]"); + + // Tests for NCHW + // 1x1 filter, 2x1 dilations, 1x1 strides + set_op({{1, 1, 2, 1}}, {{1, 1, 1, 1}}, "VALID", "NCHW"); + INFER_OK(op, "[1,1,2,2];[1,1,1,1]", "[d0_0,d1_3,2,2]"); + + // 1x1 filter, 2x1 dilations, 2x1 strides + set_op({{1, 1, 2, 1}}, {{1, 1, 2, 1}}, "VALID", "NCHW"); + INFER_OK(op, "[1,1,4,4];[1,1,1,1]", "[d0_0,d1_3,2,4]"); + + // 1x1 filter, 2x1 dilations, 2x2 strides + set_op({{1, 1, 2, 1}}, {{1, 1, 2, 2}}, "VALID", "NCHW"); + INFER_OK(op, "[1,1,4,4];[1,1,1,1]", "[d0_0,d1_3,2,2]"); + + // 3x3 filter, 2x1 dilations, 1x1 strides + set_op({{1, 1, 2, 1}}, {{1, 1, 1, 1}}, "VALID", "NCHW"); + INFER_OK(op, "[1,1,5,5];[3,3,1,1]", "[d0_0,d1_3,1,3]"); + + // 3x3 filter, 2x1 dilations, 2x1 strides + set_op({{1, 1, 2, 1}}, {{1, 1, 2, 1}}, "VALID", "NCHW"); + INFER_OK(op, "[1,1,5,5];[3,3,1,1]", "[d0_0,d1_3,1,3]"); + + // 3x3 filter, 1x2 dilations, 2x2 strides + set_op({{1, 1, 1, 2}}, {{1, 1, 2, 2}}, "VALID", "NCHW"); + INFER_OK(op, "[1,1,5,5];[3,3,1,1]", "[d0_0,d1_3,2,1]"); + + // Some tests for "SAME" padding + + // 4x4 input, 1x1 filter, 2x1 dilations, 1x1 stride + set_op({{1, 2, 1, 1}}, {{1, 1, 1, 1}}, "SAME", "NHWC"); + INFER_OK(op, "[1,4,4,1];[1,1,1,1]", "[d0_0,d0_1,d0_2,d1_3]"); + + // 3x3 input, 2x2 filter, 2x2 dilations, 1x1 stride + set_op({{1, 2, 2, 1}}, {{1, 1, 1, 1}}, "SAME", "NHWC"); + INFER_OK(op, "[1,3,3,1];[2,2,1,1]", "[d0_0,d0_1,d0_2,d1_3]"); + + // 4x4 input, 2x2 filter, 1x2 dilations, 2x2 stride + set_op({{1, 1, 2, 1}}, {{1, 2, 2, 1}}, "SAME", "NHWC"); + INFER_OK(op, "[1,4,4,1];[2,2,1,1]", "[d0_0,2,2,d1_3]"); + + // 4x4 input, 2x2 filter, 2x2 dilations, 1x1 stride + set_op({{1, 2, 2, 1}}, {{1, 1, 1, 1}}, "SAME", "NHWC"); + INFER_OK(op, "[1,4,4,1];[2,2,1,1]", "[d0_0,d0_1,d0_2,d1_3]"); +} + TEST(CommonShapeFnsTest, Conv3DShapeTest) { ShapeInferenceTestOp op("Conv3D"); auto set_op = [&op](const std::vector& strides, diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc index 3d2bb57aff..1791c51096 100644 --- a/tensorflow/core/kernels/conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc @@ -194,7 +194,23 @@ class Conv2DFastBackpropFilterOp : public OpKernel { context, (strides_[0] == 1 && strides_[3] == 1), errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); + OP_REQUIRES(context, strides_[1] > 0 && strides_[2] > 0, + errors::InvalidArgument( + "Row and column strides should be larger than 0.")); OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); + OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_)); + OP_REQUIRES(context, dilations_.size() == 4, + errors::InvalidArgument("Sliding window dilations field must " + "specify 4 dimensions")); + OP_REQUIRES(context, (dilations_[0] == 1 && dilations_[3] == 1), + errors::InvalidArgument( + "Current implementation does not yet support " + "dilations in the batch and depth dimensions.")); + // TODO(yangzihao): Add a CPU implementation for dilated convolution. + OP_REQUIRES(context, (dilations_[1] == 1 && dilations_[2] == 1), + errors::InvalidArgument( + "Current Eigen and libxsmm implementations do not " + "yet support dilation rates larger than 1.")); } void Compute(OpKernelContext* context) override { @@ -262,6 +278,7 @@ class Conv2DFastBackpropFilterOp : public OpKernel { } private: + std::vector dilations_; std::vector strides_; Padding padding_; TensorFormat data_format_; @@ -290,7 +307,23 @@ class Conv2DCustomBackpropFilterOp : public OpKernel { context, (strides_[0] == 1 && strides_[3] == 1), errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); + OP_REQUIRES(context, strides_[1] > 0 && strides_[2] > 0, + errors::InvalidArgument( + "Row and column strides should be larger than 0.")); OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); + OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_)); + OP_REQUIRES(context, dilations_.size() == 4, + errors::InvalidArgument("Sliding window dilations field must " + "specify 4 dimensions")); + OP_REQUIRES(context, (dilations_[0] == 1 && dilations_[3] == 1), + errors::InvalidArgument( + "Current implementation does not yet support " + "dilations in the batch and depth dimensions.")); + // TODO(yangzihao): Add a CPU implementation for dilated convolution. + OP_REQUIRES(context, (dilations_[1] == 1 && dilations_[2] == 1), + errors::InvalidArgument( + "Current libxsmm and customized CPU implementations do " + "not yet support dilation rates larger than 1.")); } void Compute(OpKernelContext* context) override { @@ -459,6 +492,7 @@ class Conv2DCustomBackpropFilterOp : public OpKernel { } private: + std::vector dilations_; std::vector strides_; Padding padding_; TensorFormat data_format_; @@ -510,10 +544,30 @@ class Conv2DSlowBackpropFilterOp : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_)); int stride_n = GetTensorDim(strides_, data_format_, 'N'); int stride_c = GetTensorDim(strides_, data_format_, 'C'); + int stride_h = GetTensorDim(strides_, data_format_, 'H'); + int stride_w = GetTensorDim(strides_, data_format_, 'W'); OP_REQUIRES( context, (stride_n == 1 && stride_c == 1), errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); + OP_REQUIRES(context, stride_h > 0 && stride_w > 0, + errors::InvalidArgument( + "Row and column strides should be larger than 0.")); + OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_)); + OP_REQUIRES(context, dilations_.size() == 4, + errors::InvalidArgument("Sliding window dilations field must " + "specify 4 dimensions")); + int dilation_n = GetTensorDim(dilations_, data_format_, 'N'); + int dilation_c = GetTensorDim(dilations_, data_format_, 'C'); + int dilation_h = GetTensorDim(dilations_, data_format_, 'H'); + int dilation_w = GetTensorDim(dilations_, data_format_, 'W'); + OP_REQUIRES(context, dilation_n == 1 && dilation_c == 1, + errors::InvalidArgument( + "Current implementation does not yet support " + "dilations in the batch and depth dimensions.")); + OP_REQUIRES( + context, dilation_h > 0 && dilation_w > 0, + errors::InvalidArgument("Dilated rates should be larger than 0.")); OP_REQUIRES_OK(context, context->GetAttr("use_cudnn_on_gpu", &use_cudnn_)); use_cudnn_ &= CanUseCudnn(); cudnn_use_autotune_ = CudnnUseAutotune(); @@ -546,13 +600,16 @@ class Conv2DSlowBackpropFilterOp : public OpKernel { // do not support striding on the batch or depth dimension). const int stride_rows = GetTensorDim(strides_, data_format_, 'H'); const int stride_cols = GetTensorDim(strides_, data_format_, 'W'); + const int dilation_rows = GetTensorDim(dilations_, data_format_, 'H'); + const int dilation_cols = GetTensorDim(dilations_, data_format_, 'W'); launcher_(context, use_cudnn_, cudnn_use_autotune_, out_backprop, input, - stride_rows, stride_cols, padding_, filter_backprop, - data_format_); + dilation_rows, dilation_cols, stride_rows, stride_cols, padding_, + filter_backprop, data_format_); } private: + std::vector dilations_; std::vector strides_; Padding padding_; bool use_cudnn_; @@ -566,38 +623,46 @@ class Conv2DSlowBackpropFilterOp : public OpKernel { template void LaunchConv2DBackpropFilterOp::operator()( OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, - const Tensor& out_backprop, const Tensor& input, int row_stride, - int col_stride, const Padding& padding, Tensor* filter_backprop, - TensorFormat data_format) { + const Tensor& out_backprop, const Tensor& input, int row_dilation, + int col_dilation, int row_stride, int col_stride, const Padding& padding, + Tensor* filter_backprop, TensorFormat data_format) { using perftools::gputools::dnn::AlgorithmConfig; using perftools::gputools::dnn::AlgorithmDesc; using perftools::gputools::dnn::ProfileResult; + std::vector dilations(4, 1); + dilations[GetTensorDimIndex(data_format, 'H')] = row_dilation; + dilations[GetTensorDimIndex(data_format, 'W')] = col_dilation; + std::vector strides(4, 1); strides[GetTensorDimIndex(data_format, 'H')] = row_stride; strides[GetTensorDimIndex(data_format, 'W')] = col_stride; TensorShape filter_shape = filter_backprop->shape(); ConvBackpropDimensions dims; - OP_REQUIRES_OK(ctx, ConvBackpropComputeDimensions( + OP_REQUIRES_OK(ctx, ConvBackpropComputeDimensionsV2( "Conv2DSlowBackpropFilter", /*num_spatial_dims=*/2, input.shape(), filter_shape, out_backprop.shape(), - strides, padding, data_format, &dims)); + dilations, strides, padding, data_format, &dims)); + // TODO(yangzihao): The padding computations should be done in + // GetWindowedOutputSize() functions. const int padding_rows = (padding == VALID) ? 0 : std::max(0, (dims.spatial_dims[0].output_size - 1) * dims.spatial_dims[0].stride + - dims.spatial_dims[0].filter_size - - dims.spatial_dims[0].input_size); + (dims.spatial_dims[0].filter_size - 1) * + dims.spatial_dims[0].dilation + + 1 - dims.spatial_dims[0].input_size); const int padding_cols = (padding == VALID) ? 0 : std::max(0, (dims.spatial_dims[1].output_size - 1) * dims.spatial_dims[1].stride + - dims.spatial_dims[1].filter_size - - dims.spatial_dims[1].input_size); + (dims.spatial_dims[1].filter_size - 1) * + dims.spatial_dims[1].dilation + + 1 - dims.spatial_dims[1].input_size); // TODO(zhengxq): cuDNN only supports equal padding on both sides, so only // calling it when that is true. Remove this check when (if?) cuDNN starts @@ -730,7 +795,9 @@ void LaunchConv2DBackpropFilterOp::operator()( .set_input_feature_map_count(dims.in_depth) .set_output_feature_map_count(dims.out_depth); perftools::gputools::dnn::ConvolutionDescriptor conv_desc; - conv_desc.set_vertical_filter_stride(dims.spatial_dims[0].stride) + conv_desc.set_vertical_dilation_rate(dims.spatial_dims[0].dilation) + .set_horizontal_dilation_rate(dims.spatial_dims[1].dilation) + .set_vertical_filter_stride(dims.spatial_dims[0].stride) .set_horizontal_filter_stride(dims.spatial_dims[1].stride) .set_zero_padding_height(padding_rows / 2) .set_zero_padding_width(padding_cols / 2); @@ -821,6 +888,8 @@ void LaunchConv2DBackpropFilterOp::operator()( dims.out_depth, // out_depths {{dims.spatial_dims[0].filter_size, // filter_rows dims.spatial_dims[1].filter_size}}, // filter_cols + {{dims.spatial_dims[0].dilation, // dilation_rows + dims.spatial_dims[1].dilation}}, // dilation_cols {{dims.spatial_dims[0].stride, // stride_rows dims.spatial_dims[1].stride}}, // stride_cols {{padding_rows, // padding_rows diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc index d28f6b4d10..736241a029 100644 --- a/tensorflow/core/kernels/conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/conv_grad_input_ops.cc @@ -198,7 +198,23 @@ class Conv2DFastBackpropInputOp : public OpKernel { context, (strides_[0] == 1 && strides_[3] == 1), errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); + OP_REQUIRES(context, strides_[1] > 0 && strides_[2] > 0, + errors::InvalidArgument( + "Row and column strides should be larger than 0.")); OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); + OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_)); + OP_REQUIRES(context, dilations_.size() == 4, + errors::InvalidArgument("Sliding window dilations field must " + "specify 4 dimensions")); + OP_REQUIRES(context, (dilations_[0] && dilations_[3]), + errors::InvalidArgument( + "Current implementation does not yet support " + "dilations in the batch and depth dimensions.")); + // TODO(yangzihao): Add a CPU implementation for dilated convolution. + OP_REQUIRES(context, (dilations_[1] == 1 && dilations_[2] == 1), + errors::InvalidArgument( + "Current Eigen and libxsmm implementations do not " + "yet support dilation rates larger than 1.")); } void Compute(OpKernelContext* context) override { @@ -268,6 +284,7 @@ class Conv2DFastBackpropInputOp : public OpKernel { } private: + std::vector dilations_; std::vector strides_; Padding padding_; TensorFormat data_format_; @@ -296,7 +313,23 @@ class Conv2DCustomBackpropInputOp : public OpKernel { context, (strides_[0] == 1 && strides_[3] == 1), errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); + OP_REQUIRES(context, strides_[1] > 0 && strides_[2] > 0, + errors::InvalidArgument( + "Row and column strides should be larger than 0.")); OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); + OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_)); + OP_REQUIRES(context, dilations_.size() == 4, + errors::InvalidArgument("Sliding window dilations field must " + "specify 4 dimensions")); + OP_REQUIRES(context, (dilations_[0] == 1 && dilations_[3] == 1), + errors::InvalidArgument( + "Current implementation does not yet support " + "dilations in the batch and depth dimensions.")); + // TODO(yangzihao): Add a CPU implementation for dilated convolution. + OP_REQUIRES(context, (dilations_[1] == 1 && dilations_[2] == 1), + errors::InvalidArgument( + "Current libxsmm and customized CPU implementations do " + "not yet support dilation rates larger than 1.")); } void Compute(OpKernelContext* context) override { @@ -532,6 +565,7 @@ class Conv2DCustomBackpropInputOp : public OpKernel { } private: + std::vector dilations_; std::vector strides_; Padding padding_; TensorFormat data_format_; @@ -586,10 +620,30 @@ class Conv2DSlowBackpropInputOp : public OpKernel { "specify 4 dimensions")); int stride_n = GetTensorDim(strides_, data_format_, 'N'); int stride_c = GetTensorDim(strides_, data_format_, 'C'); + int stride_h = GetTensorDim(strides_, data_format_, 'H'); + int stride_w = GetTensorDim(strides_, data_format_, 'W'); OP_REQUIRES( context, (stride_n == 1 && stride_c == 1), errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); + OP_REQUIRES(context, stride_h > 0 && stride_w > 0, + errors::InvalidArgument( + "Row and column strides should be larger than 0.")); + OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_)); + OP_REQUIRES(context, dilations_.size() == 4, + errors::InvalidArgument("Sliding window dilations field must " + "specify 4 dimensions")); + int dilation_n = GetTensorDim(dilations_, data_format_, 'N'); + int dilation_c = GetTensorDim(dilations_, data_format_, 'C'); + int dilation_h = GetTensorDim(dilations_, data_format_, 'H'); + int dilation_w = GetTensorDim(dilations_, data_format_, 'W'); + OP_REQUIRES(context, (dilation_n == 1 && dilation_c == 1), + errors::InvalidArgument( + "Current implementation does not yet support " + "dilations in the batch and depth dimensions.")); + OP_REQUIRES( + context, dilation_h > 0 && dilation_w > 0, + errors::InvalidArgument("Dilated rates should be larger than 0.")); OP_REQUIRES_OK(context, context->GetAttr("use_cudnn_on_gpu", &use_cudnn_)); use_cudnn_ &= CanUseCudnn(); cudnn_use_autotune_ = CudnnUseAutotune(); @@ -622,12 +676,16 @@ class Conv2DSlowBackpropInputOp : public OpKernel { // do not support striding on the batch or depth dimension). const int stride_rows = GetTensorDim(strides_, data_format_, 'H'); const int stride_cols = GetTensorDim(strides_, data_format_, 'W'); + const int dilation_rows = GetTensorDim(dilations_, data_format_, 'H'); + const int dilation_cols = GetTensorDim(dilations_, data_format_, 'W'); launcher_(context, use_cudnn_, cudnn_use_autotune_, out_backprop, filter, - stride_rows, stride_cols, padding_, in_backprop, data_format_); + dilation_rows, dilation_cols, stride_rows, stride_cols, padding_, + in_backprop, data_format_); } private: + std::vector dilations_; std::vector strides_; Padding padding_; bool use_cudnn_; @@ -641,39 +699,48 @@ class Conv2DSlowBackpropInputOp : public OpKernel { template void LaunchConv2DBackpropInputOp::operator()( OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, - const Tensor& out_backprop, const Tensor& filter, int row_stride, - int col_stride, const Padding& padding, Tensor* in_backprop, - TensorFormat data_format) { + const Tensor& out_backprop, const Tensor& filter, int row_dilation, + int col_dilation, int row_stride, int col_stride, const Padding& padding, + Tensor* in_backprop, TensorFormat data_format) { using perftools::gputools::dnn::AlgorithmConfig; using perftools::gputools::dnn::AlgorithmDesc; using perftools::gputools::dnn::ProfileResult; std::vector strides(4, 1); - strides[GetTensorDimIndex(data_format, 'H')] = row_stride; - strides[GetTensorDimIndex(data_format, 'W')] = col_stride; + std::vector dilations(4, 1); + auto input_h = GetTensorDimIndex(data_format, 'H'); + auto input_w = GetTensorDimIndex(data_format, 'W'); + strides[input_h] = row_stride; + strides[input_w] = col_stride; + dilations[input_h] = row_dilation; + dilations[input_w] = col_dilation; TensorShape input_shape = in_backprop->shape(); const TensorShape& filter_shape = filter.shape(); ConvBackpropDimensions dims; - OP_REQUIRES_OK(ctx, ConvBackpropComputeDimensions( + OP_REQUIRES_OK(ctx, ConvBackpropComputeDimensionsV2( "Conv2DSlowBackpropInput", /*num_spatial_dims=*/2, input_shape, filter_shape, out_backprop.shape(), - strides, padding, data_format, &dims)); + dilations, strides, padding, data_format, &dims)); + // TODO(yangzihao): The padding computations should be done in + // GetWindowedOutputSize() functions. const int padding_rows = (padding == VALID) ? 0 : std::max(0, (dims.spatial_dims[0].output_size - 1) * dims.spatial_dims[0].stride + - dims.spatial_dims[0].filter_size - - dims.spatial_dims[0].input_size); + (dims.spatial_dims[0].filter_size - 1) * + dims.spatial_dims[0].dilation + + 1 - dims.spatial_dims[0].input_size); const int padding_cols = (padding == VALID) ? 0 : std::max(0, (dims.spatial_dims[1].output_size - 1) * dims.spatial_dims[1].stride + - dims.spatial_dims[1].filter_size - - dims.spatial_dims[1].input_size); + (dims.spatial_dims[1].filter_size - 1) * + dims.spatial_dims[1].dilation + + 1 - dims.spatial_dims[1].input_size); // TODO(keveman): cuDNN only supports equal padding on both sides, so only // calling it when that is true. Remove this check when (if?) cuDNN starts @@ -789,7 +856,9 @@ void LaunchConv2DBackpropInputOp::operator()( .set_input_feature_map_count(dims.in_depth) .set_output_feature_map_count(dims.out_depth); perftools::gputools::dnn::ConvolutionDescriptor conv_desc; - conv_desc.set_vertical_filter_stride(dims.spatial_dims[0].stride) + conv_desc.set_vertical_dilation_rate(dims.spatial_dims[0].dilation) + .set_horizontal_dilation_rate(dims.spatial_dims[1].dilation) + .set_vertical_filter_stride(dims.spatial_dims[0].stride) .set_horizontal_filter_stride(dims.spatial_dims[1].stride) .set_zero_padding_height(padding_rows / 2) .set_zero_padding_width(padding_cols / 2); @@ -875,6 +944,8 @@ void LaunchConv2DBackpropInputOp::operator()( dims.out_depth, // out_depths {{dims.spatial_dims[0].filter_size, // filter_rows dims.spatial_dims[1].filter_size}}, // filter_cols + {{dims.spatial_dims[0].dilation, // dilation_rows + dims.spatial_dims[1].dilation}}, // dilation_cols {{dims.spatial_dims[0].stride, // stride_rows dims.spatial_dims[1].stride}}, // stride_cols {{padding_rows, // padding_rows diff --git a/tensorflow/core/kernels/conv_grad_ops.h b/tensorflow/core/kernels/conv_grad_ops.h index e068fb8684..535586d53a 100644 --- a/tensorflow/core/kernels/conv_grad_ops.h +++ b/tensorflow/core/kernels/conv_grad_ops.h @@ -175,15 +175,17 @@ template struct LaunchConv2DBackpropInputOp { void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, const Tensor& out_backprop, const Tensor& filter, - int row_stride, int col_stride, const Padding& padding, - Tensor* in_backprop, TensorFormat data_format); + int row_dilation, int col_dilation, int row_stride, + int col_stride, const Padding& padding, Tensor* in_backprop, + TensorFormat data_format); }; template struct LaunchConv2DBackpropFilterOp { void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, const Tensor& out_backprop, const Tensor& input, - int row_stride, int col_stride, const Padding& padding, + int row_dilation, int col_dilation, int row_stride, + int col_stride, const Padding& padding, Tensor* filter_backprop, TensorFormat data_format); }; @@ -191,8 +193,9 @@ struct LaunchConv2DBackpropFilterOp { template struct LaunchConv2DBackpropInputOp { void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, - const Tensor& input, const Tensor& filter, int row_stride, - int col_stride, const Padding& padding, Tensor* output, + const Tensor& input, const Tensor& filter, int row_dilation, + int col_dilation, int row_stride, int col_stride, + const Padding& padding, Tensor* output, TensorFormat data_format); }; @@ -200,7 +203,8 @@ template struct LaunchConv2DBackpropFilterOp { void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, const Tensor& out_backprop, const Tensor& input, - int row_stride, int col_stride, const Padding& padding, + int row_dilation, int col_dilation, int row_stride, + int col_stride, const Padding& padding, Tensor* filter_backprop, TensorFormat data_format); }; #endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc index c2d24d1f12..4d0f1ab317 100644 --- a/tensorflow/core/kernels/conv_grad_ops_3d.cc +++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc @@ -645,6 +645,9 @@ class Conv3DBackpropInputOp : public OpKernel { {{input_size[0], input_size[1], input_size[2]}}, out_depth, {{filter_size[0], filter_size[1], filter_size[2]}}, + // TODO(yangzihao): Send in arbitrary dilation rates after the dilated + // conv is supported. + /*dilations=*/{{1, 1, 1}}, {{strides[0], strides[1], strides[2]}}, {{padding_planes, padding_rows, padding_cols}}, dtype, @@ -1011,6 +1014,7 @@ class Conv3DBackpropFilterOp : public OpKernel { {{input_size[0], input_size[1], input_size[2]}}, out_depth, {{filter_size[0], filter_size[1], filter_size[2]}}, + {{1, 1, 1}}, {{strides[0], strides[1], strides[2]}}, {{padding_planes, padding_rows, padding_cols}}, dtype, diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc index bb67113fb0..ba40c428e4 100644 --- a/tensorflow/core/kernels/conv_ops.cc +++ b/tensorflow/core/kernels/conv_ops.cc @@ -112,7 +112,8 @@ struct LaunchGeneric { template struct LaunchConv2DOp { void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, - const Tensor& input, const Tensor& filter, int row_stride, + const Tensor& input, const Tensor& filter, + int /*row_dilation*/, int /*col_dilation*/, int row_stride, int col_stride, const Padding& padding, Tensor* output, TensorFormat data_format) { if (data_format != FORMAT_NHWC) { @@ -133,8 +134,10 @@ class LaunchDeepConvOp { const Tensor& filter, int batch, int input_rows, int input_cols, int in_depth, int filter_rows, int filter_cols, int pad_rows, int pad_cols, int out_rows, - int out_cols, int out_depth, int stride_rows, int stride_cols, - Tensor* output, TensorFormat data_format) { + int /*out_cols*/, int /*out_depth*/, int /*dilation_rows*/, + int /*dilation_cols*/, int /*stride_rows*/, + int /*stride_cols*/, Tensor* /*output*/, + TensorFormat /*data_format*/) { return false; } }; @@ -147,9 +150,11 @@ class LaunchDeepConvOp { const Tensor& filter, int batch, int input_rows, int input_cols, int in_depth, int filter_rows, int filter_cols, int pad_rows, int pad_cols, int out_rows, - int out_cols, int out_depth, int stride_rows, int stride_cols, + int out_cols, int out_depth, int dilation_rows, + int dilation_cols, int stride_rows, int stride_cols, Tensor* output, TensorFormat data_format) { - if (data_format != FORMAT_NHWC || + if (data_format != FORMAT_NHWC || dilation_rows != 1 || + dilation_cols != 1 || !CanUseDeepConv2D(stride_rows, stride_cols, filter_rows, filter_cols, in_depth, out_depth, out_rows, out_cols)) { return false; @@ -187,7 +192,8 @@ class LaunchXsmmConvOp { int input_cols, int in_depth, int filter_rows, int filter_cols, int pad_rows, int pad_cols, int out_rows, int out_cols, int out_depth, int stride_rows, int stride_cols, - Tensor* output, TensorFormat data_format) { + int dilation_rows, int dilation_cols, Tensor* output, + TensorFormat data_format) { return false; } }; @@ -199,7 +205,8 @@ class LaunchXsmmConvOp { const Tensor& filter, int batch, int input_rows, int input_cols, int in_depth, int filter_rows, int filter_cols, int pad_rows, int pad_cols, int out_rows, - int out_cols, int out_depth, int stride_rows, int stride_cols, + int out_cols, int out_depth, int dilation_rows, + int dilation_cols, int stride_rows, int stride_cols, Tensor* output, TensorFormat data_format) { auto num_threads = ctx->device()->tensorflow_cpu_worker_threads()->num_threads; @@ -228,11 +235,8 @@ class LaunchXsmmConvOp { desc.options = LIBXSMM_DNN_CONV_OPTION_WU_EXT_FILTER_REDUCE_OVERWRITE; desc.datatype = LIBXSMM_DNN_DATATYPE_F32; - if (!CanUseXsmmConv2D(desc, data_format)) { - return false; - } - - if (!CanUseXsmmConv2D(desc, data_format)) { + if (dilation_rows != 1 || dilation_cols != 1 || + !CanUseXsmmConv2D(desc, data_format)) { return false; } @@ -251,6 +255,7 @@ template class Conv2DOp : public BinaryOp { public: explicit Conv2DOp(OpKernelConstruction* context) : BinaryOp(context) { + OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_)); OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_)); string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); @@ -259,15 +264,35 @@ class Conv2DOp : public BinaryOp { OP_REQUIRES_OK(context, context->GetAttr("use_cudnn_on_gpu", &use_cudnn_)); use_cudnn_ &= CanUseCudnn(); cudnn_use_autotune_ = CudnnUseAutotune(); + OP_REQUIRES(context, dilations_.size() == 4, + errors::InvalidArgument("Sliding window dilations field must " + "specify 4 dimensions")); OP_REQUIRES(context, strides_.size() == 4, errors::InvalidArgument("Sliding window strides field must " "specify 4 dimensions")); const int64 stride_n = GetTensorDim(strides_, data_format_, 'N'); const int64 stride_c = GetTensorDim(strides_, data_format_, 'C'); + const int64 stride_h = GetTensorDim(strides_, data_format_, 'H'); + const int64 stride_w = GetTensorDim(strides_, data_format_, 'W'); OP_REQUIRES( context, stride_n == 1 && stride_c == 1, errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); + OP_REQUIRES(context, stride_h > 0 && stride_w > 0, + errors::InvalidArgument( + "Row and column strides should be larger than 0.")); + + const int64 dilation_n = GetTensorDim(dilations_, data_format_, 'N'); + const int64 dilation_c = GetTensorDim(dilations_, data_format_, 'C'); + const int64 dilation_h = GetTensorDim(dilations_, data_format_, 'H'); + const int64 dilation_w = GetTensorDim(dilations_, data_format_, 'W'); + OP_REQUIRES(context, dilation_n == 1 && dilation_c == 1, + errors::InvalidArgument( + "Current implementation does not yet support " + "dilations in the batch and depth dimensions.")); + OP_REQUIRES( + context, dilation_h > 0 && dilation_w > 0, + errors::InvalidArgument("Dilated rates should be larger than 0.")); OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); } @@ -334,18 +359,22 @@ class Conv2DOp : public BinaryOp { errors::InvalidArgument("batch is too large")); const int batch = static_cast(batch_raw); - // For now we take the stride from the second and third dimensions only (we - // do not support striding on the batch or depth dimension). + // For now we take the stride and dilation from the second and third + // dimensions only (we do not support striding or dilation on the batch or + // depth dimension). const int stride_rows = GetTensorDim(strides_, data_format_, 'H'); const int stride_cols = GetTensorDim(strides_, data_format_, 'W'); + const int dilation_rows = GetTensorDim(dilations_, data_format_, 'H'); + const int dilation_cols = GetTensorDim(dilations_, data_format_, 'W'); + int64 out_rows = 0, out_cols = 0, pad_rows = 0, pad_cols = 0; - OP_REQUIRES_OK(context, - GetWindowedOutputSize(input_rows, filter_rows, stride_rows, - padding_, &out_rows, &pad_rows)); - OP_REQUIRES_OK(context, - GetWindowedOutputSize(input_cols, filter_cols, stride_cols, - padding_, &out_cols, &pad_cols)); + OP_REQUIRES_OK(context, GetWindowedOutputSizeV2( + input_rows, filter_rows, dilation_rows, + stride_rows, padding_, &out_rows, &pad_rows)); + OP_REQUIRES_OK(context, GetWindowedOutputSizeV2( + input_cols, filter_cols, dilation_cols, + stride_cols, padding_, &out_cols, &pad_cols)); TensorShape out_shape = ShapeFromFormat(data_format_, batch, out_rows, out_cols, out_depth); @@ -361,6 +390,8 @@ class Conv2DOp : public BinaryOp { << ", filter_rows = " << filter_rows << ", stride_rows = " << stride_rows << ", stride_cols = " << stride_cols + << ", dilation_rows = " << dilation_rows + << ", dilation_cols = " << dilation_cols << ", out_depth = " << out_depth; // If there is nothing to compute, return. @@ -372,7 +403,8 @@ class Conv2DOp : public BinaryOp { if (LaunchXsmmConvOp::Run( context, input, filter, batch, input_rows, input_cols, in_depth, filter_rows, filter_cols, pad_rows, pad_cols, out_rows, out_cols, - out_depth, stride_rows, stride_cols, output, data_format_)) { + out_depth, dilation_rows, dilation_cols, stride_rows, stride_cols, + output, data_format_)) { return; } #endif @@ -380,15 +412,18 @@ class Conv2DOp : public BinaryOp { if (LaunchDeepConvOp::Run( context, input, filter, batch, input_rows, input_cols, in_depth, filter_rows, filter_cols, pad_rows, pad_cols, out_rows, out_cols, - out_depth, stride_rows, stride_cols, output, data_format_)) { + out_depth, dilation_rows, dilation_cols, stride_rows, stride_cols, + output, data_format_)) { return; } launcher_(context, use_cudnn_, cudnn_use_autotune_, input, filter, - stride_rows, stride_cols, padding_, output, data_format_); + dilation_rows, dilation_cols, stride_rows, stride_cols, padding_, + output, data_format_); } private: + std::vector dilations_; std::vector strides_; bool use_cudnn_; Padding padding_; @@ -443,9 +478,9 @@ typedef AutoTuneSingleton void LaunchConv2DOp::operator()( OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, - const Tensor& input_param, const Tensor& filter, int row_stride, - int col_stride, const Padding& padding, Tensor* output, - TensorFormat data_format) { + const Tensor& input_param, const Tensor& filter, int row_dilation, + int col_dilation, int row_stride, int col_stride, const Padding& padding, + Tensor* output, TensorFormat data_format) { using perftools::gputools::dnn::AlgorithmConfig; using perftools::gputools::dnn::AlgorithmDesc; using perftools::gputools::dnn::ProfileResult; @@ -461,8 +496,9 @@ void LaunchConv2DOp::operator()( Tensor input = input_param; - if (filter.dim_size(0) == 1 && filter.dim_size(1) == 1 && row_stride == 1 && - col_stride == 1 && data_format == FORMAT_NHWC) { + if (filter.dim_size(0) == 1 && filter.dim_size(1) == 1 && row_dilation == 1 && + col_dilation == 1 && row_stride == 1 && col_stride == 1 && + data_format == FORMAT_NHWC) { // 1x1 filter, so call cublas directly. const uint64 m = input.dim_size(0) * input.dim_size(1) * input.dim_size(2); const uint64 k = filter.dim_size(2); @@ -487,7 +523,8 @@ void LaunchConv2DOp::operator()( } return; } else if (filter.dim_size(0) == input.dim_size(1) && - filter.dim_size(1) == input.dim_size(2) && padding == VALID && + filter.dim_size(1) == input.dim_size(2) && row_dilation == 1 && + col_dilation == 1 && padding == VALID && data_format == FORMAT_NHWC) { // The input data and filter have the same height/width, so call cublas // directly. @@ -530,17 +567,19 @@ void LaunchConv2DOp::operator()( const int64 patch_cols = filter.dim_size(1); if (padding == SAME) { // Total padding on rows and cols is - // Pr = (R' - 1) * S + Kr - R - // Pc = (C' - 1) * S + Kc - C + // Pr = (R' - 1) * S + (Kr - 1) * Dr + 1 - R + // Pc = (C' - 1) * S + (Kc - 1) * Dc + 1 - C // where (R', C') are output dimensions, (R, C) are input dimensions, S - // is stride, (Kr, Kc) are filter dimensions. + // is stride, (Dr, Dc) are dilations, (Kr, Kc) are filter dimensions. // We pad Pr/2 on the left and Pr - Pr/2 on the right, Pc/2 on the top // and Pc - Pc/2 on the bottom. When Pr or Pc is odd, this means // we pad more on the right and bottom than on the top and left. padding_rows = - std::max(0, (out_rows - 1) * row_stride + patch_rows - in_rows); + std::max(0, (out_rows - 1) * row_stride + + (patch_rows - 1) * row_dilation + 1 - in_rows); padding_cols = - std::max(0, (out_cols - 1) * col_stride + patch_cols - in_cols); + std::max(0, (out_cols - 1) * col_stride + + (patch_cols - 1) * col_dilation + 1 - in_cols); const bool rows_odd = (padding_rows % 2 != 0); const bool cols_odd = (padding_cols % 2 != 0); if (rows_odd || cols_odd) { @@ -605,7 +644,9 @@ void LaunchConv2DOp::operator()( .set_input_feature_map_count(filter.dim_size(2)) .set_output_feature_map_count(filter.dim_size(3)); perftools::gputools::dnn::ConvolutionDescriptor conv_desc; - conv_desc.set_vertical_filter_stride(row_stride) + conv_desc.set_vertical_dilation_rate(row_dilation) + .set_horizontal_dilation_rate(col_dilation) + .set_vertical_filter_stride(row_stride) .set_horizontal_filter_stride(col_stride) .set_zero_padding_height(padding_rows / 2) .set_zero_padding_width(padding_cols / 2); @@ -652,6 +693,8 @@ void LaunchConv2DOp::operator()( out_depths, // out_depths {{patch_rows, // filter_rows patch_cols}}, // filter_cols + {{row_dilation, // dilation_rows + col_dilation}}, // dilation_cols {{row_stride, // stride_rows col_stride}}, // stride_cols {{padding_rows, // padding_rows diff --git a/tensorflow/core/kernels/conv_ops.h b/tensorflow/core/kernels/conv_ops.h index e29271dff2..09a3b78776 100644 --- a/tensorflow/core/kernels/conv_ops.h +++ b/tensorflow/core/kernels/conv_ops.h @@ -34,8 +34,9 @@ class OpKernelContext; template struct LaunchConv2DOp { void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, - const Tensor& input, const Tensor& filter, int row_stride, - int col_stride, const Padding& padding, Tensor* output, + const Tensor& input, const Tensor& filter, int row_dilation, + int col_dilation, int row_stride, int col_stride, + const Padding& padding, Tensor* output, TensorFormat data_format); }; @@ -43,8 +44,9 @@ struct LaunchConv2DOp { template struct LaunchConv2DOp { void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, - const Tensor& input, const Tensor& filter, int row_stride, - int col_stride, const Padding& padding, Tensor* output, + const Tensor& input, const Tensor& filter, int row_dilation, + int col_dilation, int row_stride, int col_stride, + const Padding& padding, Tensor* output, TensorFormat data_format); }; #endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc index 37cb67bc51..39202d7334 100644 --- a/tensorflow/core/kernels/conv_ops_3d.cc +++ b/tensorflow/core/kernels/conv_ops_3d.cc @@ -377,6 +377,9 @@ struct LaunchConvOp { {{in_planes, in_rows, in_cols}}, out_depth, {{filter_planes, filter_rows, filter_cols}}, + // TODO(yangzihao): Send in arbitrary dilation rates after the dilated + // conv is supported. + /*dilations=*/{{1, 1, 1}}, {{strides[0], strides[1], strides[2]}}, {{pad_planes, pad_rows, pad_cols}}, dtype, diff --git a/tensorflow/core/kernels/conv_ops_gpu.h b/tensorflow/core/kernels/conv_ops_gpu.h index c852dc9991..6f82698596 100644 --- a/tensorflow/core/kernels/conv_ops_gpu.h +++ b/tensorflow/core/kernels/conv_ops_gpu.h @@ -91,13 +91,14 @@ class ConvParameters { using SpatialArray = gtl::InlinedVector; ConvParameters(int64 batch, int64 in_depths, const SpatialArray& in, int64 out_depths, const SpatialArray& filter, - const SpatialArray& stride, const SpatialArray& padding, - DataType dtype, int device_id) + const SpatialArray& dilation, const SpatialArray& stride, + const SpatialArray& padding, DataType dtype, int device_id) : batch_(batch), in_depths_(in_depths), out_depths_(out_depths), in_(in), filter_(filter), + dilation_(dilation), stride_(stride), padding_(padding), dtype_(dtype), @@ -107,6 +108,7 @@ class ConvParameters { for (int64 val : in) hash_code_ = Hash64Combine(hash_code_, val); hash_code_ = Hash64Combine(hash_code_, out_depths); for (int64 val : filter) hash_code_ = Hash64Combine(hash_code_, val); + for (int64 val : dilation) hash_code_ = Hash64Combine(hash_code_, val); for (int64 val : stride) hash_code_ = Hash64Combine(hash_code_, val); for (int64 val : padding) hash_code_ = Hash64Combine(hash_code_, val); hash_code_ = Hash64Combine(hash_code_, dtype); @@ -128,6 +130,7 @@ class ConvParameters { "(", str_util::Join(in_, ", "), "), ", out_depths_, ", ", "(", str_util::Join(filter_, ", "), "), ", + "(", str_util::Join(dilation_, ", "), "), ", "(", str_util::Join(stride_, ", "), "), ", "(", str_util::Join(padding_, ", "), "), ", dtype_, ", ", @@ -154,11 +157,11 @@ class ConvParameters { protected: using ParameterDataType = std::tuple; + SpatialArray, SpatialArray, DataType, int>; ParameterDataType get_data_as_tuple() const { return std::make_tuple(batch_, in_depths_, in_, out_depths_, filter_, - stride_, padding_, dtype_, device_id_); + dilation_, stride_, padding_, dtype_, device_id_); } uint64 hash_code_; @@ -169,6 +172,7 @@ class ConvParameters { int64 out_depths_; SpatialArray in_; SpatialArray filter_; + SpatialArray dilation_; SpatialArray stride_; SpatialArray padding_; DataType dtype_; diff --git a/tensorflow/core/kernels/conv_ops_test.cc b/tensorflow/core/kernels/conv_ops_test.cc index ea54d6cf6c..666bca265c 100644 --- a/tensorflow/core/kernels/conv_ops_test.cc +++ b/tensorflow/core/kernels/conv_ops_test.cc @@ -43,6 +43,8 @@ TEST(ConvParameters, WinogradNonfusedAlgoSize) { 128, // out_depths {{3, // filter_rows 3}}, // filter_cols + {{1, // dilation_rows + 1}}, // dilation_cols {{1, // stride_rows 1}}, // stride_cols {{0, // padding_rows @@ -60,6 +62,8 @@ TEST(ConvParameters, WinogradNonfusedAlgoSize) { 768, // out_depths {{3, // filter_rows 3}}, // filter_cols + {{1, // dilation_rows + 1}}, // dilation_cols {{1, // stride_rows 1}}, // stride_cols {{0, // padding_rows diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc index 7c43dcb670..02da64ce98 100644 --- a/tensorflow/core/kernels/depthwise_conv_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_op.cc @@ -373,8 +373,11 @@ class DepthwiseConv2dNativeOp : public BinaryOp { // If in_depth==1, this operation is just a standard convolution, so // invoke that op. if (std::is_same::value && in_depth == 1) { + // TODO(yangzihao): Send in arbitrary dilation rates after the dilated + // conv is supported. launcher_(context, use_cudnn_, cudnn_use_autotune_, input, filter, - stride_, stride_, padding_, output, data_format_); + /*row_dilation=*/1, /*col_dilation=*/1, stride_, stride_, + padding_, output, data_format_); return; } diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/core/kernels/quantized_conv_ops.cc index 3b0764bb9b..f83998e0c1 100644 --- a/tensorflow/core/kernels/quantized_conv_ops.cc +++ b/tensorflow/core/kernels/quantized_conv_ops.cc @@ -457,6 +457,19 @@ class QuantizedConv2DOp : public OpKernel { context, (strides_[0] == 1 && strides_[3] == 1), errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); + std::vector dilations; + OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations)); + OP_REQUIRES(context, dilations.size() == 4, + errors::InvalidArgument("Dilations field must " + "specify 4 dimensions")); + OP_REQUIRES(context, dilations[1] == 1 && dilations[2] == 1, + errors::InvalidArgument( + "Current implementation only supports dilated rate as 1 " + "in the row and column dimensions.")); + OP_REQUIRES(context, (dilations[0] == 1 && dilations[3] == 1), + errors::InvalidArgument( + "Current implementation does not yet support " + "dilations in the batch and depth dimensions.")); OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); } diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 654e890b57..59c4642e4d 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -513,6 +513,7 @@ REGISTER_OP("Conv2D") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn(shape_inference::Conv2DShape) .Doc(R"doc( Computes a 2-D convolution given 4-D `input` and `filter` tensors. @@ -546,7 +547,7 @@ filter: A 4-D tensor of shape output: A 4-D tensor. The dimension order is determined by the value of `data_format`, see below for details. strides: 1-D tensor of length 4. The stride of the sliding window for each - dimension of `input`. The dimension order is determined by the value of + dimension of `input`. The dimension order is determined by the value of `data_format`, see below for details. padding: The type of padding algorithm to use. data_format: Specify the data format of the input and output data. With the @@ -554,6 +555,11 @@ data_format: Specify the data format of the input and output data. With the [batch, height, width, channels]. Alternatively, the format could be "NCHW", the data storage order of: [batch, channels, height, width]. +dilations: 1-D tensor of length 4. The dilation factor for each dimension of + `input`. If set to k > 1, there will be k-1 skipped cells between each + filter element on that dimension. The dimension order is determined by the + value of `data_format`, see above for details. Dilations in the batch and + depth dimensions must be 1. )doc"); REGISTER_OP("Conv2DBackpropInput") @@ -566,6 +572,7 @@ REGISTER_OP("Conv2DBackpropInput") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s)); @@ -589,10 +596,15 @@ padding: The type of padding algorithm to use. output: 4-D with shape `[batch, in_height, in_width, in_channels]`. Gradient w.r.t. the input of the convolution. data_format: Specify the data format of the input and output data. With the - default format "NHWC", the data is stored in the order of: - [batch, in_height, in_width, in_channels]. - Alternatively, the format could be "NCHW", the data storage order of: - [batch, in_channels, in_height, in_width]. + default format "NHWC", the data is stored in the order of: + [batch, in_height, in_width, in_channels]. + Alternatively, the format could be "NCHW", the data storage order of: + [batch, in_channels, in_height, in_width]. +dilations: 1-D tensor of length 4. The dilation factor for each dimension of + `input`. If set to k > 1, there will be k-1 skipped cells between each filter + element on that dimension. The dimension order is determined by the value of + `data_format`, see above for details. Dilations in the batch and depth + dimensions must be 1. )doc"); // TODO(jeff): Instead of 'use_cudnn_for_gpu', maybe we should have a @@ -608,6 +620,7 @@ REGISTER_OP("Conv2DBackpropFilter") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &s)); @@ -632,10 +645,15 @@ output: 4-D with shape `[filter_height, filter_width, in_channels, out_channels]`. Gradient w.r.t. the `filter` input of the convolution. data_format: Specify the data format of the input and output data. With the - default format "NHWC", the data is stored in the order of: - [batch, in_height, in_width, in_channels]. - Alternatively, the format could be "NCHW", the data storage order of: - [batch, in_channels, in_height, in_width]. + default format "NHWC", the data is stored in the order of: + [batch, in_height, in_width, in_channels]. + Alternatively, the format could be "NCHW", the data storage order of: + [batch, in_channels, in_height, in_width]. +dilations: 1-D tensor of length 4. The dilation factor for each dimension of + `input`. If set to k > 1, there will be k-1 skipped cells between each filter + element on that dimension. The dimension order is determined by the value of + `data_format`, see above for details. Dilations in the batch and depth + dimensions must be 1. )doc"); namespace { @@ -823,6 +841,7 @@ REGISTER_OP("DepthwiseConv2dNative") .Attr("strides: list(int)") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn(shape_inference::DepthwiseConv2DNativeShape) .Doc(R"doc( Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors. @@ -845,7 +864,6 @@ for k in 0..in_channels-1 Must have `strides[0] = strides[3] = 1`. For the most common case of the same horizontal and vertices strides, `strides = [1, stride, stride, 1]`. - strides: 1-D of length 4. The stride of the sliding window for each dimension of `input`. padding: The type of padding algorithm to use. @@ -854,6 +872,11 @@ data_format: Specify the data format of the input and output data. With the [batch, height, width, channels]. Alternatively, the format could be "NCHW", the data storage order of: [batch, channels, height, width]. +dilations: 1-D tensor of length 4. The dilation factor for each dimension of + `input`. If set to k > 1, there will be k-1 skipped cells between each filter + element on that dimension. The dimension order is determined by the value of + `data_format`, see above for details. Dilations in the batch and depth + dimensions must be 1. )doc"); REGISTER_OP("DepthwiseConv2dNativeBackpropInput") @@ -865,6 +888,7 @@ REGISTER_OP("DepthwiseConv2dNativeBackpropInput") .Attr("strides: list(int)") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s)); @@ -892,6 +916,11 @@ data_format: Specify the data format of the input and output data. With the [batch, height, width, channels]. Alternatively, the format could be "NCHW", the data storage order of: [batch, channels, height, width]. +dilations: 1-D tensor of length 4. The dilation factor for each dimension of + `input`. If set to k > 1, there will be k-1 skipped cells between each filter + element on that dimension. The dimension order is determined by the value of + `data_format`, see above for details. Dilations in the batch and depth + dimensions must be 1. output: 4-D with shape according to `data_format`. For example, if `data_format` is 'NHWC', output shape is `[batch, in_height, in_width, in_channels]`. Gradient w.r.t. the input of the @@ -907,6 +936,7 @@ REGISTER_OP("DepthwiseConv2dNativeBackpropFilter") .Attr("strides: list(int)") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &s)); @@ -935,6 +965,11 @@ data_format: Specify the data format of the input and output data. With the [batch, height, width, channels]. Alternatively, the format could be "NCHW", the data storage order of: [batch, channels, height, width]. +dilations: 1-D tensor of length 4. The dilation factor for each dimension of + `input`. If set to k > 1, there will be k-1 skipped cells between each filter + element on that dimension. The dimension order is determined by the value of + `data_format`, see above for details. Dilations in the batch and depth + dimensions must be 1. output: 4-D with shape `[filter_height, filter_width, in_channels, out_channels]`. Gradient w.r.t. the `filter` input of the convolution. @@ -949,6 +984,7 @@ REGISTER_OP("Conv3D") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1, 1]") .SetShapeFn(shape_inference::Conv3DShape) .Doc(R"doc( Computes a 3-D convolution given 5-D `input` and `filter` tensors. @@ -970,6 +1006,11 @@ data_format: The data format of the input and output data. With the [batch, in_depth, in_height, in_width, in_channels]. Alternatively, the format could be "NCDHW", the data storage order is: [batch, in_channels, in_depth, in_height, in_width]. +dilations: 1-D tensor of length 5. The dilation factor for each dimension of + `input`. If set to k > 1, there will be k-1 skipped cells between each + filter element on that dimension. The dimension order is determined by the + value of `data_format`, see above for details. Dilations in the batch and + depth dimensions must be 1. )doc"); REGISTER_OP("Conv3DBackpropInput") @@ -1036,6 +1077,7 @@ REGISTER_OP("Conv3DBackpropInputV2") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s)); @@ -1061,6 +1103,11 @@ data_format: The data format of the input and output data. With the [batch, in_depth, in_height, in_width, in_channels]. Alternatively, the format could be "NCDHW", the data storage order is: [batch, in_channels, in_depth, in_height, in_width]. +dilations: 1-D tensor of length 5. The dilation factor for each dimension of + `input`. If set to k > 1, there will be k-1 skipped cells between each + filter element on that dimension. The dimension order is determined by the + value of `data_format`, see above for details. Dilations in the batch and + depth dimensions must be 1. )doc"); @@ -1073,6 +1120,7 @@ REGISTER_OP("Conv3DBackpropFilterV2") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &s)); @@ -1098,6 +1146,11 @@ data_format: The data format of the input and output data. With the [batch, in_depth, in_height, in_width, in_channels]. Alternatively, the format could be "NCDHW", the data storage order is: [batch, in_channels, in_depth, in_height, in_width]. +dilations: 1-D tensor of length 5. The dilation factor for each dimension of + `input`. If set to k > 1, there will be k-1 skipped cells between each + filter element on that dimension. The dimension order is determined by the + value of `data_format`, see above for details. Dilations in the batch and + depth dimensions must be 1. )doc"); @@ -2613,6 +2666,7 @@ REGISTER_OP("QuantizedConv2D") .Attr("out_type: quantizedtype = DT_QINT32") .Attr("strides: list(int)") .Attr(GetPaddingAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { TF_RETURN_IF_ERROR(shape_inference::Conv2DShape(c)); ShapeHandle unused; @@ -2641,7 +2695,11 @@ min_filter: The float value that the lowest quantized filter value represents. max_filter: The float value that the highest quantized filter value represents. min_output: The float value that the lowest quantized output value represents. max_output: The float value that the highest quantized output value represents. - +dilations: 1-D tensor of length 4. The dilation factor for each dimension of + `input`. If set to k > 1, there will be k-1 skipped cells between each + filter element on that dimension. The dimension order is determined by the + value of `data_format`, see above for details. Dilations in the batch and + depth dimensions must be 1. )doc"); REGISTER_OP("QuantizedMaxPool") diff --git a/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py b/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py index 1679857bd5..be299beee4 100644 --- a/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py +++ b/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py @@ -42,17 +42,21 @@ class Conv2DBackpropFilterGradTest(test.TestCase): filter_shape = [3, 3, 4, 6] # Make a convolution op with the current settings, just to easily get # the shape of the output. - conv_out = nn_ops.conv2d(in_val, - array_ops.zeros(filter_shape), - [1, stride, stride, 1], padding) + conv_out = nn_ops.conv2d( + in_val, + array_ops.zeros(filter_shape), + strides=[1, stride, stride, 1], + padding=padding) out_backprop_shape = conv_out.get_shape().as_list() out_backprop_val = constant_op.constant( 2 * np.random.random_sample(out_backprop_shape) - 1, dtype=dtypes.float32) - output = nn_ops.conv2d_backprop_filter(in_val, filter_shape, - out_backprop_val, - [1, stride, stride, 1], - padding) + output = nn_ops.conv2d_backprop_filter( + in_val, + filter_shape, + out_backprop_val, + strides=[1, stride, stride, 1], + padding=padding) err = gradient_checker.compute_gradient_error( [in_val, out_backprop_val], [in_shape, out_backprop_shape], output, filter_shape) @@ -60,6 +64,42 @@ class Conv2DBackpropFilterGradTest(test.TestCase): err_tolerance = 2e-3 self.assertLess(err, err_tolerance) + def testGradientDilatedConv(self): + if test.is_gpu_available(cuda_only=True): + with self.test_session(use_gpu=True): + for padding in ["SAME", "VALID"]: + for stride in [1, 2]: + np.random.seed(1) + in_shape = [5, 8, 6, 4] + in_val = constant_op.constant( + 2 * np.random.random_sample(in_shape) - 1, dtype=dtypes.float32) + filter_shape = [3, 3, 4, 6] + # Make a convolution op with the current settings, + # just to easily get the shape of the output. + conv_out = nn_ops.conv2d( + in_val, + array_ops.zeros(filter_shape), + dilations=[1, 2, 2, 1], + strides=[1, stride, stride, 1], + padding=padding) + out_backprop_shape = conv_out.get_shape().as_list() + out_backprop_val = constant_op.constant( + 2 * np.random.random_sample(out_backprop_shape) - 1, + dtype=dtypes.float32) + output = nn_ops.conv2d_backprop_filter( + in_val, + filter_shape, + out_backprop_val, + dilations=[1, 2, 2, 1], + strides=[1, stride, stride, 1], + padding=padding) + err = gradient_checker.compute_gradient_error( + [in_val, out_backprop_val], [in_shape, out_backprop_shape], + output, filter_shape) + print("conv2d_backprop_filter gradient err = %g " % err) + err_tolerance = 2e-3 + self.assertLess(err, err_tolerance) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py index 22e5400c37..bf7245a2ae 100644 --- a/tensorflow/python/kernel_tests/conv_ops_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_test.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import collections import os import time @@ -32,6 +33,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker +from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import nn_impl from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops @@ -240,6 +242,77 @@ class Conv2DTest(test.TestCase): for i in range(1, len(values)): self.assertAllClose(values[0], values[i], rtol=1e-5, atol=1e-5) + def _ComputeReferenceDilatedConv(self, tensor_in_sizes, filter_in_sizes, + stride, dilation, padding, data_format, + use_gpu): + total_size_1 = 1 + total_size_2 = 1 + for s in tensor_in_sizes: + total_size_1 *= s + for s in filter_in_sizes: + total_size_2 *= s + + # Initializes the input tensor with array containing incrementing + # numbers from 1. + x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] + x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] + with test_util.device(use_gpu): + t1 = constant_op.constant(x1, shape=tensor_in_sizes) + t2 = constant_op.constant(x2, shape=filter_in_sizes) + if isinstance(stride, collections.Iterable): + strides = list(stride) + else: + strides = [stride, stride] + if data_format == "NCHW": + t1 = test_util.NHWCToNCHW(t1) + full_strides = [1, 1] + strides + full_dilation = [1, 1] + dilation + else: + full_strides = [1] + strides + [1] + full_dilation = [1] + dilation + [1] + expected = nn_ops.convolution( + t1, + t2, + padding=padding, + strides=strides, + dilation_rate=dilation, + data_format=data_format) + computed = nn_ops.conv2d( + t1, + t2, + strides=full_strides, + dilations=full_dilation, + padding=padding, + data_format=data_format) + if data_format == "NCHW": + expected = test_util.NCHWToNHWC(expected) + computed = test_util.NCHWToNHWC(computed) + return expected, computed + + def _VerifyDilatedConvValues(self, tensor_in_sizes, filter_in_sizes, strides, + padding, dilations): + expected_results = [] + computed_results = [] + default_dilations = (dilations[0] == 1 and dilations[1] == 1) + for data_format, use_gpu in GetTestConfigs(): + # If any dilation rate is larger than 1, only do test on the GPU + # because we currently do not have a CPU implementation for arbitrary + # dilation rates. + if default_dilations or use_gpu: + expected, computed = self._ComputeReferenceDilatedConv( + tensor_in_sizes, filter_in_sizes, strides, dilations, padding, + data_format, use_gpu) + expected_results.append(expected) + computed_results.append(computed) + tolerance = 1e-2 if use_gpu else 1e-5 + expected_values = self.evaluate(expected_results) + computed_values = self.evaluate(computed_results) + for e_value, c_value in zip(expected_values, computed_values): + print("expected = ", e_value) + print("actual = ", c_value) + self.assertAllClose( + e_value.flatten(), c_value.flatten(), atol=tolerance, rtol=1e-6) + def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, strides, padding, expected): tensors = [] @@ -279,6 +352,16 @@ class Conv2DTest(test.TestCase): padding="VALID", expected=expected_output) + @test_util.run_in_graph_and_eager_modes() + def testConv2D2x2Filter2x1Dilation(self): + if test.is_gpu_available(cuda_only=True): + self._VerifyDilatedConvValues( + tensor_in_sizes=[1, 4, 4, 1], + filter_in_sizes=[2, 2, 1, 1], + strides=[1, 1], + dilations=[2, 1], + padding="VALID") + @test_util.run_in_graph_and_eager_modes() def testConv2DEmpty(self): expected_output = [] @@ -289,6 +372,16 @@ class Conv2DTest(test.TestCase): padding="VALID", expected=expected_output) + @test_util.run_in_graph_and_eager_modes() + def testConv2DEmptyDilation(self): + if test.is_gpu_available(cuda_only=True): + self._VerifyDilatedConvValues( + tensor_in_sizes=[0, 2, 3, 3], + filter_in_sizes=[1, 1, 3, 3], + strides=[1, 1], + dilations=[2, 1], + padding="VALID") + @test_util.run_in_graph_and_eager_modes() def testConv2D2x2Filter(self): # The outputs are computed using third_party/py/IPython/notebook. @@ -300,6 +393,16 @@ class Conv2DTest(test.TestCase): padding="VALID", expected=expected_output) + @test_util.run_in_graph_and_eager_modes() + def testConv2D2x2FilterDilation(self): + if test.is_gpu_available(cuda_only=True): + self._VerifyDilatedConvValues( + tensor_in_sizes=[1, 2, 3, 3], + filter_in_sizes=[2, 2, 3, 3], + strides=[1, 1], + dilations=[1, 2], + padding="VALID") + @test_util.run_in_graph_and_eager_modes() def testConv2D1x2Filter(self): # The outputs are computed using third_party/py/IPython/notebook. @@ -314,6 +417,16 @@ class Conv2DTest(test.TestCase): padding="VALID", expected=expected_output) + @test_util.run_in_graph_and_eager_modes() + def testConv2D1x2FilterDilation(self): + if test.is_gpu_available(cuda_only=True): + self._VerifyDilatedConvValues( + tensor_in_sizes=[1, 2, 3, 3], + filter_in_sizes=[1, 2, 3, 3], + strides=[1, 1], + dilations=[2, 1], + padding="VALID") + @test_util.run_in_graph_and_eager_modes() def testConv2D2x2FilterStride2(self): expected_output = [2271.0, 2367.0, 2463.0] @@ -386,13 +499,23 @@ class Conv2DTest(test.TestCase): padding="VALID", expected=[50, 60]) - # TODO this currently fails. - # self._VerifyValues(tensor_in_sizes=[1, 8, 8, 1], - # filter_in_sizes=[2, 2, 1, 1], - # strides=[4, 4], padding="SAME", - # expected=[72, 112, 392, 432]) + @test_util.run_in_graph_and_eager_modes() + def testConv2DKernelSizeMatchesInputSizeDilation(self): + if test.is_gpu_available(cuda_only=True): + self._VerifyDilatedConvValues( + tensor_in_sizes=[1, 3, 3, 1], + filter_in_sizes=[2, 2, 1, 2], + strides=[1, 1], + dilations=[2, 2], + padding="VALID") + + # TODO this currently fails. + # self._VerifyValues(tensor_in_sizes=[1, 8, 8, 1], + # filter_in_sizes=[2, 2, 1, 1], + # strides=[4, 4], padding="SAME", + # expected=[72, 112, 392, 432]) - # Testing for backprops + # Testing for backprops def _RunAndVerifyBackpropInput(self, input_sizes, filter_sizes, output_sizes, strides, padding, expected, data_format, use_gpu, err): @@ -724,6 +847,255 @@ class Conv2DTest(test.TestCase): data_format=data_format, use_gpu=use_gpu) + # Testing for backprops + def _RunAndVerifyBackpropInputDilation(self, input_sizes, filter_sizes, + output_sizes, strides, dilations, + padding, data_format, use_gpu, err): + total_input_size = 1 + total_filter_size = 1 + for s in input_sizes: + total_input_size *= s + for s in filter_sizes: + total_filter_size *= s + # Initializes the input tensor with array containing incrementing + # numbers from 1. + x1 = [f * 1.0 for f in range(1, total_input_size + 1)] + x2 = [f * 1.0 for f in range(1, total_filter_size + 1)] + default_dilations = (dilations[0] == 1 and dilations[1] == 1) + if default_dilations or use_gpu: + with self.test_session(use_gpu=use_gpu) as sess: + if data_format == "NCHW": + input_sizes = test_util.NHWCToNCHW(input_sizes) + t1 = constant_op.constant(x1, shape=input_sizes) + t2 = constant_op.constant(x2, shape=filter_sizes) + full_strides = [1] + strides + [1] + full_dilations = [1] + dilations + [1] + if data_format == "NCHW": + full_strides = test_util.NHWCToNCHW(full_strides) + full_dilations = test_util.NHWCToNCHW(full_dilations) + conv_forward = nn_ops.conv2d( + t1, + t2, + strides=full_strides, + dilations=full_dilations, + padding=padding, + data_format=data_format) + conv_forward_2 = nn_ops.convolution( + t1, + t2, + padding=padding, + strides=strides, + dilation_rate=dilations, + data_format=data_format) + if data_format == "NCHW": + conv_forward = test_util.NCHWToNHWC(conv_forward) + conv_forward_2 = test_util.NCHWToNHWC(conv_forward_2) + conv = gradients_impl.gradients(conv_forward, t1)[0] + conv_2 = gradients_impl.gradients(conv_forward_2, t1)[0] + # "values" consists of two tensors for two backprops + value = sess.run(conv) + value_2 = sess.run(conv_2) + self.assertShapeEqual(value, conv) + self.assertShapeEqual(value_2, conv_2) + print("expected = ", value_2) + print("actual = ", value) + self.assertArrayNear(value_2.flatten(), value.flatten(), err) + + # Testing for backprops + def _RunAndVerifyBackpropFilterDilation(self, input_sizes, filter_sizes, + output_sizes, strides, dilations, + padding, data_format, use_gpu, err): + total_input_size = 1 + total_filter_size = 1 + for s in input_sizes: + total_input_size *= s + for s in filter_sizes: + total_filter_size *= s + # Initializes the input tensor with array containing incrementing + # numbers from 1. + x1 = [f * 1.0 for f in range(1, total_input_size + 1)] + x2 = [f * 1.0 for f in range(1, total_filter_size + 1)] + default_dilations = (dilations[0] == 1 and dilations[1] == 1) + if default_dilations or use_gpu: + with self.test_session(use_gpu=use_gpu) as sess: + if data_format == "NCHW": + input_sizes = test_util.NHWCToNCHW(input_sizes) + t1 = constant_op.constant(x1, shape=input_sizes) + t2 = constant_op.constant(x2, shape=filter_sizes) + full_strides = [1] + strides + [1] + full_dilations = [1] + dilations + [1] + if data_format == "NCHW": + full_strides = test_util.NHWCToNCHW(full_strides) + full_dilations = test_util.NHWCToNCHW(full_dilations) + conv_forward = nn_ops.conv2d( + t1, + t2, + strides=full_strides, + dilations=full_dilations, + padding=padding, + data_format=data_format) + conv_forward_2 = nn_ops.convolution( + t1, + t2, + padding=padding, + strides=strides, + dilation_rate=dilations, + data_format=data_format) + if data_format == "NCHW": + conv_forward = test_util.NCHWToNHWC(conv_forward) + conv_forward_2 = test_util.NCHWToNHWC(conv_forward_2) + conv = gradients_impl.gradients(conv_forward, t2)[0] + conv_2 = gradients_impl.gradients(conv_forward, t2)[0] + value = sess.run(conv) + value_2 = sess.run(conv_2) + self.assertShapeEqual(value, conv) + self.assertShapeEqual(value_2, conv_2) + print("expected = ", value_2) + print("actual = ", value) + self.assertArrayNear(value_2.flatten(), value.flatten(), err) + + def testConv2D2x2Depth3ValidBackpropFilterStride1x1Dilation2x1(self): + if test.is_gpu_available(cuda_only=True): + for (data_format, use_gpu) in GetTestConfigs(): + self._RunAndVerifyBackpropFilterDilation( + input_sizes=[1, 3, 6, 1], + filter_sizes=[2, 2, 1, 1], + output_sizes=[1, 1, 5, 1], + strides=[1, 1], + dilations=[2, 1], + padding="VALID", + data_format=data_format, + use_gpu=use_gpu, + err=1e-5) + + def testConv2D2x2Depth1ValidBackpropFilterDilation1x2(self): + if test.is_gpu_available(cuda_only=True): + for (data_format, use_gpu) in GetTestConfigs(): + self._RunAndVerifyBackpropFilterDilation( + input_sizes=[1, 2, 3, 1], + filter_sizes=[2, 2, 1, 1], + output_sizes=[1, 1, 2, 1], + strides=[1, 1], + dilations=[1, 2], + padding="VALID", + data_format=data_format, + use_gpu=use_gpu, + err=1e-5) + + def testConv2DEmptyBackpropFilterDilation1x2(self): + if test.is_gpu_available(cuda_only=True): + for (data_format, use_gpu) in GetTestConfigs(): + self._RunAndVerifyBackpropFilterDilation( + input_sizes=[1, 2, 3, 1], + filter_sizes=[2, 2, 1, 0], + output_sizes=[1, 1, 2, 0], + strides=[1, 1], + dilations=[1, 2], + padding="VALID", + data_format=data_format, + use_gpu=use_gpu, + err=1e-5) + + def testConv2D2x2Depth3ValidBackpropFilterDilation2x2(self): + if test.is_gpu_available(cuda_only=True): + for (data_format, use_gpu) in GetTestConfigs(): + self._RunAndVerifyBackpropFilterDilation( + input_sizes=[1, 3, 4, 3], + filter_sizes=[2, 2, 3, 3], + output_sizes=[1, 1, 2, 3], + strides=[1, 1], + dilations=[2, 2], + padding="VALID", + data_format=data_format, + use_gpu=use_gpu, + err=1e-5) + + def testConv2DKernelSizeMatchesInputSizeBackpropFilterDilation2x2(self): + if test.is_gpu_available(cuda_only=True): + for (data_format, use_gpu) in GetTestConfigs(): + self._RunAndVerifyBackpropFilterDilation( + input_sizes=[1, 3, 3, 1], + filter_sizes=[2, 2, 1, 2], + output_sizes=[1, 1, 1, 2], + strides=[1, 1], + dilations=[2, 2], + padding="VALID", + data_format=data_format, + use_gpu=use_gpu, + err=1e-5) + + def testConv2D2x2Depth3ValidBackpropInputStride1x1Dilation2x1(self): + if test.is_gpu_available(cuda_only=True): + for (data_format, use_gpu) in GetTestConfigs(): + self._RunAndVerifyBackpropInputDilation( + input_sizes=[1, 3, 6, 1], + filter_sizes=[2, 2, 1, 1], + output_sizes=[1, 1, 5, 1], + strides=[1, 1], + dilations=[2, 1], + padding="VALID", + data_format=data_format, + use_gpu=use_gpu, + err=1e-5) + + def testConv2D2x2Depth1ValidBackpropInputDilation1x2(self): + if test.is_gpu_available(cuda_only=True): + for (data_format, use_gpu) in GetTestConfigs(): + self._RunAndVerifyBackpropInputDilation( + input_sizes=[1, 2, 3, 1], + filter_sizes=[2, 2, 1, 1], + output_sizes=[1, 1, 2, 1], + strides=[1, 1], + dilations=[1, 2], + padding="VALID", + data_format=data_format, + use_gpu=use_gpu, + err=1e-5) + + def testConv2DEmptyBackpropInputDilation1x2(self): + if test.is_gpu_available(cuda_only=True): + for (data_format, use_gpu) in GetTestConfigs(): + self._RunAndVerifyBackpropInputDilation( + input_sizes=[0, 2, 3, 1], + filter_sizes=[2, 2, 1, 1], + output_sizes=[0, 1, 2, 1], + strides=[1, 1], + dilations=[1, 2], + padding="VALID", + data_format=data_format, + use_gpu=use_gpu, + err=1e-5) + + def testConv2D2x2Depth3ValidBackpropInputDilation2x1(self): + if test.is_gpu_available(cuda_only=True): + for (data_format, use_gpu) in GetTestConfigs(): + # The GPU version of this test is not very stable. So adjusting the + # error threshold to 1e-4. + self._RunAndVerifyBackpropInputDilation( + input_sizes=[1, 3, 2, 3], + filter_sizes=[2, 2, 3, 3], + output_sizes=[1, 1, 2, 3], + strides=[1, 1], + dilations=[2, 1], + padding="VALID", + data_format=data_format, + use_gpu=use_gpu, + err=1e-4) + + def testConv2DKernelSizeMatchesInputSizeBackpropInputDilation2x2(self): + if test.is_gpu_available(cuda_only=True): + for (data_format, use_gpu) in GetTestConfigs(): + self._RunAndVerifyBackpropInputDilation( + input_sizes=[1, 3, 3, 1], + filter_sizes=[2, 2, 1, 2], + output_sizes=[1, 1, 1, 2], + strides=[1, 1], + dilations=[2, 2], + padding="VALID", + data_format=data_format, + use_gpu=use_gpu, + err=1e-5) + # Gradient checkers def ConstructAndTestGradient(self, batch, input_rows, input_cols, filter_rows, filter_cols, in_depth, out_depth, stride_rows, @@ -1457,6 +1829,22 @@ def GetInceptionFwdTest(input_size, filter_size, stride, padding, return Test +def GetInceptionFwdDilatedConvTest(input_size, filter_size, stride, padding): + + def Test(self): + if test.is_gpu_available(cuda_only=True) and stride == 1: + tf_logging.info("Testing InceptionFwd with dilations %s", + (input_size, filter_size, stride, padding)) + self._VerifyDilatedConvValues( + tensor_in_sizes=input_size, + filter_in_sizes=filter_size, + strides=[stride, stride], + dilations=[2, 2], + padding=padding) + + return Test + + def GetInceptionBackInputTest(input_size, filter_size, output_size, stride, padding, gpu_only=False): @@ -1497,6 +1885,10 @@ if __name__ == "__main__": test_util.run_in_graph_and_eager_modes()( GetInceptionFwdTest(input_size_, filter_size_, stride_, padding_))) + setattr( + Conv2DTest, "testInceptionFwdDilatedConv_" + str(index), + test_util.run_in_graph_and_eager_modes()(GetInceptionFwdDilatedConvTest( + input_size_, filter_size_, stride_, padding_))) setattr(Conv2DTest, "testInceptionBackInput_" + str(index), test_util.run_in_graph_and_eager_modes()( GetInceptionBackInputTest(input_size_, filter_size_, @@ -1519,6 +1911,9 @@ if __name__ == "__main__": setattr(Conv2DTest, "testInceptionFwd_No_Winograd_Nonfused", test_util.run_in_graph_and_eager_modes()( GetInceptionFwdTest(ishape, fshape, 1, "SAME", gpu_only=True))) + setattr(Conv2DTest, "testInceptionFwdDilatedConv_No_Winograd_Nonfused", + test_util.run_in_graph_and_eager_modes()( + GetInceptionFwdDilatedConvTest(ishape, fshape, 1, "SAME"))) setattr(Conv2DTest, "testInceptionBackInput_No_Winograd_Nonfused", test_util.run_in_graph_and_eager_modes()( GetInceptionBackInputTest(ishape, fshape, oshape, 1, "SAME", diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index 4b406ba840..8cd535aa0b 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -41,33 +41,48 @@ def _Conv2DBackpropInputGrad(op, grad): Returns: the gradients w.r.t. the input and the filter """ - return [None, - nn_ops.conv2d_backprop_filter(grad, array_ops.shape(op.inputs[1]), - op.inputs[2], op.get_attr("strides"), - op.get_attr("padding"), - op.get_attr("use_cudnn_on_gpu"), - op.get_attr("data_format")), - nn_ops.conv2d(grad, op.inputs[1], op.get_attr("strides"), - op.get_attr("padding"), op.get_attr("use_cudnn_on_gpu"), - op.get_attr("data_format"))] + return [ + None, + nn_ops.conv2d_backprop_filter( + grad, + array_ops.shape(op.inputs[1]), + op.inputs[2], + dilations=op.get_attr("dilations"), + strides=op.get_attr("strides"), + padding=op.get_attr("padding"), + use_cudnn_on_gpu=op.get_attr("use_cudnn_on_gpu"), + data_format=op.get_attr("data_format")), + nn_ops.conv2d( + grad, + op.inputs[1], + dilations=op.get_attr("dilations"), + strides=op.get_attr("strides"), + padding=op.get_attr("padding"), + use_cudnn_on_gpu=op.get_attr("use_cudnn_on_gpu"), + data_format=op.get_attr("data_format")) + ] @ops.RegisterGradient("Conv2DBackpropFilter") def _Conv2DBackpropFilterGrad(op, grad): return [ nn_ops.conv2d_backprop_input( - array_ops.shape(op.inputs[0]), grad, op.inputs[2], - op.get_attr("strides"), - op.get_attr("padding"), - op.get_attr("use_cudnn_on_gpu"), - op.get_attr("data_format")), - None, + array_ops.shape(op.inputs[0]), + grad, + op.inputs[2], + dilations=op.get_attr("dilations"), + strides=op.get_attr("strides"), + padding=op.get_attr("padding"), + use_cudnn_on_gpu=op.get_attr("use_cudnn_on_gpu"), + data_format=op.get_attr("data_format")), None, nn_ops.conv2d( - op.inputs[0], grad, - op.get_attr("strides"), - op.get_attr("padding"), - op.get_attr("use_cudnn_on_gpu"), - op.get_attr("data_format")) + op.inputs[0], + grad, + dilations=op.get_attr("dilations"), + strides=op.get_attr("strides"), + padding=op.get_attr("padding"), + use_cudnn_on_gpu=op.get_attr("use_cudnn_on_gpu"), + data_format=op.get_attr("data_format")) ] @@ -466,25 +481,32 @@ def _SparseSoftmaxCrossEntropyWithLogitsGrad(op, grad_0, _): @ops.RegisterGradient("Conv2D") def _Conv2DGrad(op, grad): + dilations = op.get_attr("dilations") strides = op.get_attr("strides") padding = op.get_attr("padding") use_cudnn_on_gpu = op.get_attr("use_cudnn_on_gpu") data_format = op.get_attr("data_format") shape_0, shape_1 = array_ops.shape_n([op.inputs[0], op.inputs[1]]) - return [nn_ops.conv2d_backprop_input(shape_0, - op.inputs[1], - grad, - strides, - padding, - use_cudnn_on_gpu, - data_format), - nn_ops.conv2d_backprop_filter(op.inputs[0], - shape_1, - grad, - strides, - padding, - use_cudnn_on_gpu, - data_format)] + return [ + nn_ops.conv2d_backprop_input( + shape_0, + op.inputs[1], + grad, + dilations=dilations, + strides=strides, + padding=padding, + use_cudnn_on_gpu=use_cudnn_on_gpu, + data_format=data_format), + nn_ops.conv2d_backprop_filter( + op.inputs[0], + shape_1, + grad, + dilations=dilations, + strides=strides, + padding=padding, + use_cudnn_on_gpu=use_cudnn_on_gpu, + data_format=data_format) + ] @ops.RegisterGradient("DepthwiseConv2dNative") diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index ec7b9372ca..b3c0a22efc 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1205,13 +1205,14 @@ def conv2d_transpose(value, raise ValueError("padding must be either VALID or SAME:" " {}".format(padding)) - return gen_nn_ops.conv2d_backprop_input(input_sizes=output_shape_, - filter=filter, - out_backprop=value, - strides=strides, - padding=padding, - data_format=data_format, - name=name) + return gen_nn_ops.conv2d_backprop_input( + input_sizes=output_shape_, + filter=filter, + out_backprop=value, + strides=strides, + padding=padding, + data_format=data_format, + name=name) def atrous_conv2d_transpose(value, @@ -1343,12 +1344,13 @@ def atrous_conv2d_transpose(value, (in_width + pad_right_extra) // rate, output_shape[3]] - value = gen_nn_ops.conv2d_backprop_input(input_sizes=input_sizes, - filter=filters, - out_backprop=value, - strides=[1, 1, 1, 1], - padding="VALID", - data_format="NHWC") + value = gen_nn_ops.conv2d_backprop_input( + input_sizes=input_sizes, + filter=filters, + out_backprop=value, + strides=[1, 1, 1, 1], + padding="VALID", + data_format="NHWC") # The crops argument to batch_to_space includes both padding components. batch_to_space_crop = [[pad_top, pad_bottom + pad_bottom_extra], diff --git a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt index ebd9c079b5..d920fef770 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt @@ -54,15 +54,15 @@ tf_module { } member_method { name: "conv2d" - argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'use_cudnn_on_gpu\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'NHWC\', \'None\'], " + argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'use_cudnn_on_gpu\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'NHWC\', \'[1, 1, 1, 1]\', \'None\'], " } member_method { name: "conv2d_backprop_filter" - argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'use_cudnn_on_gpu\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'NHWC\', \'None\'], " + argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'use_cudnn_on_gpu\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'NHWC\', \'[1, 1, 1, 1]\', \'None\'], " } member_method { name: "conv2d_backprop_input" - argspec: "args=[\'input_sizes\', \'filter\', \'out_backprop\', \'strides\', \'padding\', \'use_cudnn_on_gpu\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'NHWC\', \'None\'], " + argspec: "args=[\'input_sizes\', \'filter\', \'out_backprop\', \'strides\', \'padding\', \'use_cudnn_on_gpu\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'NHWC\', \'[1, 1, 1, 1]\', \'None\'], " } member_method { name: "conv2d_transpose" @@ -70,11 +70,11 @@ tf_module { } member_method { name: "conv3d" - argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NDHWC\', \'None\'], " + argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NDHWC\', \'[1, 1, 1, 1, 1]\', \'None\'], " } member_method { name: "conv3d_backprop_filter_v2" - argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NDHWC\', \'None\'], " + argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NDHWC\', \'[1, 1, 1, 1, 1]\', \'None\'], " } member_method { name: "conv3d_transpose" @@ -106,15 +106,15 @@ tf_module { } member_method { name: "depthwise_conv2d_native" - argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\'], " + argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'[1, 1, 1, 1]\', \'None\'], " } member_method { name: "depthwise_conv2d_native_backprop_filter" - argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\'], " + argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'[1, 1, 1, 1]\', \'None\'], " } member_method { name: "depthwise_conv2d_native_backprop_input" - argspec: "args=[\'input_sizes\', \'filter\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\'], " + argspec: "args=[\'input_sizes\', \'filter\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'[1, 1, 1, 1]\', \'None\'], " } member_method { name: "dilation2d" @@ -234,7 +234,7 @@ tf_module { } member_method { name: "quantized_conv2d" - argspec: "args=[\'input\', \'filter\', \'min_input\', \'max_input\', \'min_filter\', \'max_filter\', \'strides\', \'padding\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\"\", \'None\'], " + argspec: "args=[\'input\', \'filter\', \'min_input\', \'max_input\', \'min_filter\', \'max_filter\', \'strides\', \'padding\', \'out_type\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\"\", \'[1, 1, 1, 1]\', \'None\'], " } member_method { name: "quantized_max_pool" -- GitLab From b97585f5d2157b1e0273a4b20a568635fb58ad57 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 29 Nov 2017 17:55:53 -0800 Subject: [PATCH 0976/1801] Always leverage shapes inference now that it can handle fed nodes conservatively. PiperOrigin-RevId: 177391746 --- .../grappler/optimizers/constant_folding.cc | 88 ++++++++++--------- .../grappler/optimizers/constant_folding.h | 6 +- 2 files changed, 52 insertions(+), 42 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 03eaa4a84a..b5172a4833 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -190,6 +190,14 @@ Status ConvertShapeToConstant(const string& op, const DataType& type, return Status::OK(); } +bool ConstantFolding::IsReallyConstant(const NodeDef& node) const { + if (!IsConstant(node)) { + return false; + } + // If the node is fed it's not constant anymore. + return feed_nodes_.find(node.name()) == feed_nodes_.end(); +} + Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { // We may add some nodes to the graph to encode control dependencies: there is // no need to process these, so only iterate over the nodes of the input @@ -327,9 +335,9 @@ Status ConstantFolding::MaterializeBroadcastGradientArgs( const NodeDef* shape_node1 = node_map_->GetNode(node.input(0)); const NodeDef* shape_node2 = node_map_->GetNode(node.input(1)); if (shape_node1 == nullptr || - (shape_node1->op() != "Shape" && shape_node1->op() != "Const") || + (shape_node1->op() != "Shape" && !IsReallyConstant(*shape_node1)) || shape_node2 == nullptr || - (shape_node2->op() != "Shape" && shape_node2->op() != "Const")) { + (shape_node2->op() != "Shape" && !IsReallyConstant(*shape_node2))) { return Status::OK(); } int64 min_id = 0; @@ -409,7 +417,7 @@ Status ConstantFolding::MaterializeReductionIndices( return Status::OK(); } const NodeDef* indices = node_map_->GetNode(node->input(1)); - if (!indices || IsConstant(*indices)) { + if (!indices || IsReallyConstant(*indices)) { // The reduction indices are already constant, there's nothing to do. return Status::OK(); } @@ -506,24 +514,23 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const { if (node.input().empty()) { return false; } - // Skips nodes that must be preserved except whitelisted nodes. if (nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end() && nodes_whitelist_.find(node.name()) == nodes_whitelist_.end()) { return false; } - - // Skips ops that don't benefit from folding. - const string& op = node.op(); - // Skip constants, they're already folded - if (op == "Const") { + // Skip control flow nodes, they can't be folded + if (ModifiesFrameInfo(node)) { return false; } - // Skip constrol flow nodes, they can't be folded - if (op == "Enter" || op == "RefEnter" || op == "Exit" || op == "RefExit" || - op == "NextIteration" || op == "RefNextIteration") { + // Skip constants, they're already folded + if (IsConstant(node)) { return false; } + + // Skips ops that don't benefit from folding. + const string& op = node.op(); + if (op.find("Placeholder") == 0) { return false; } @@ -577,7 +584,7 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const { if (!input_node) { return false; } - bool is_const = IsConstant(*input_node); + bool is_const = IsReallyConstant(*input_node); if (!is_const && !is_merge) { return false; } @@ -703,7 +710,7 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node, break; } const NodeDef* input_node = node_map_->GetNode(input); - if (!IsConstant(*input_node)) { + if (!IsReallyConstant(*input_node)) { return Status(error::INVALID_ARGUMENT, strings::StrCat("Can't fold ", node.name(), ", its ", input, " isn't constant")); @@ -757,7 +764,7 @@ Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output_graph) { continue; } NodeDef* input_node = node_map_->GetNode(input); - if (!IsConstant(*input_node)) { + if (!IsReallyConstant(*input_node)) { continue; } bool valid_input = true; @@ -999,7 +1006,7 @@ bool ConstantFolding::IsSimplifiableReduction(const NodeDef& node) const { if (IsReduction(node)) { CHECK_LE(2, node.input_size()); const NodeDef* reductions_indices = node_map_->GetNode(node.input(1)); - if (IsConstant(*reductions_indices)) { + if (IsReallyConstant(*reductions_indices)) { TensorVector output; Status s = EvaluateNode(*reductions_indices, TensorVector(), &output); if (!s.ok()) { @@ -1023,7 +1030,7 @@ bool ConstantFolding::IsSimplifiableReshape( } CHECK_LE(2, node.input_size()); const NodeDef* new_shape = node_map_->GetNode(node.input(1)); - if (!IsConstant(*new_shape)) { + if (!IsReallyConstant(*new_shape)) { return false; } TensorVector outputs; @@ -1074,7 +1081,8 @@ bool ConstantFolding::IsSimplifiableReshape( } Status ConstantFolding::SimplifyGraph(GraphDef* output, - const GraphProperties& properties) { + const GraphProperties& properties, + bool use_shape_info) { for (auto& node : *output->mutable_node()) { if (IsSimplifiableReduction(node)) { // Replace the reduction node with an identity node, that can be further @@ -1099,10 +1107,10 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, *node.add_input() = input; } } - // It's possible to feed a placeholder with a tensor that doesn't have the - // proper shape, and reshape this tensor later on. Therefore only remove - // reshapes in graphs that don't have placeholders. - if (IsSimplifiableReshape(node, properties)) { + const bool safe_to_use_shapes = + use_shape_info && + (feed_nodes_.empty() || opt_level_ == RewriterConfig::AGGRESSIVE); + if (safe_to_use_shapes && IsSimplifiableReshape(node, properties)) { const NodeDef* new_shape = node_map_->GetNode(node.input(1)); DataType output_type = node.attr().at("T").type(); node.set_op("Identity"); @@ -1141,36 +1149,34 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, } GraphProperties properties(item); - const bool has_feed = !item.feed.empty(); - bool needs_shapes = !has_feed || opt_level_ == RewriterConfig::AGGRESSIVE; - Status s = errors::Unknown( - "The graph properties are needed but were not initialized"); - if (needs_shapes) { - s = properties.InferStatically(false); - } - - if (!has_feed && s.ok()) { - // Only use static shape information when there is no feed in the - // graph. That's because it's possible to feed a placeholder with a tensor - // of any shape, which could make the static information inconsistent with - // the shapes actually fed. + // It's possible to feed a placeholder with a tensor of any shape: make sure + // that the shape inference deals with this conservatively unless we're in + // aggressive mode. + const bool assume_valid_feeds = opt_level_ == RewriterConfig::AGGRESSIVE; + Status s = properties.InferStatically(assume_valid_feeds); + const bool can_use_shape_info = s.ok(); + + if (can_use_shape_info) { TF_RETURN_IF_ERROR(MaterializeShapes(properties)); - } - if (opt_level_ == RewriterConfig::AGGRESSIVE && s.ok()) { - TF_RETURN_IF_ERROR(MaterializeConstants(properties)); + + if (opt_level_ == RewriterConfig::AGGRESSIVE) { + TF_RETURN_IF_ERROR(MaterializeConstants(properties)); + } } TF_RETURN_IF_ERROR(FoldGraph(output)); - if (!has_feed && s.ok()) { - TF_RETURN_IF_ERROR(SimplifyGraph(output, properties)); - } + TF_RETURN_IF_ERROR(SimplifyGraph(output, properties, can_use_shape_info)); + return Status::OK(); } Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* output) { nodes_to_preserve_ = item.NodesToPreserve(); + for (const auto& feed : item.feed) { + feed_nodes_.insert(NodeName(feed.first)); + } if (cpu_device_ == nullptr) { owned_device_.reset(new DeviceSimple()); diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 7c5db2a70f..8af5b5fbe6 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -51,6 +51,8 @@ class ConstantFolding : public GraphOptimizer { const GraphDef& optimize_output, double result) override; private: + bool IsReallyConstant(const NodeDef& node) const; + Status MaterializeShapes(const GraphProperties& properties); Status MaterializeBroadcastGradientArgs(const NodeDef& node, @@ -75,7 +77,8 @@ class ConstantFolding : public GraphOptimizer { bool IsSimplifiableReduction(const NodeDef& node) const; bool IsSimplifiableReshape(const NodeDef& node, const GraphProperties& properties) const; - Status SimplifyGraph(GraphDef* output, const GraphProperties& properties); + Status SimplifyGraph(GraphDef* output, const GraphProperties& properties, + bool use_shape_info); Status RunOptimizationPass(Cluster* cluster, const GrapplerItem& item, GraphDef* output); @@ -90,6 +93,7 @@ class ConstantFolding : public GraphOptimizer { std::unique_ptr node_map_; std::unordered_set nodes_to_preserve_; std::unordered_set nodes_whitelist_; + std::unordered_set feed_nodes_; bool has_fetch_; }; -- GitLab From 65778d86a898d2aa73038837ab6c589b0a345d76 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 30 Nov 2017 02:24:29 +0000 Subject: [PATCH 0977/1801] Add `AWS_REGION` env for S3 in TensorFlow This fix tries to address the issue raised in 14951 where the region can only be specified with non-common `S3_REGION` environment variables. This fix adds the support of `AWS_REGION` which takes precedence over `S3_REGION`. This fix fixes 14951. Signed-off-by: Yong Tang --- tensorflow/core/platform/s3/s3_file_system.cc | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc index 234f3c3aed..682ad97eec 100644 --- a/tensorflow/core/platform/s3/s3_file_system.cc +++ b/tensorflow/core/platform/s3/s3_file_system.cc @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/core/platform/s3/s3_file_system.h" #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/platform/mutex.h" -#include "tensorflow/core/platform/s3/s3_file_system.h" #include "tensorflow/core/platform/s3/s3_crypto.h" #include @@ -49,9 +49,15 @@ Aws::Client::ClientConfiguration& GetDefaultClientConfig() { if (endpoint) { cfg.endpointOverride = Aws::String(endpoint); } - const char* region = getenv("S3_REGION"); + const char* region = getenv("AWS_REGION"); if (region) { cfg.region = Aws::String(region); + } else { + // TODO (yongtang): `S3_REGION` should be deprecated after 2.0. + const char* region = getenv("S3_REGION"); + if (region) { + cfg.region = Aws::String(region); + } } const char* use_https = getenv("S3_USE_HTTPS"); if (use_https) { -- GitLab From d8d43898b972a1224db50035a771e82985f60035 Mon Sep 17 00:00:00 2001 From: FredZhang <654496915@qq.com> Date: Thu, 30 Nov 2017 12:40:49 +0800 Subject: [PATCH 0978/1801] wrong code in programmer's guide in Variable Section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Programmer's guide Variable section 
the assignment variable is a `tf.Tensor` and should use `assignment.op.run()` instead of `assignment.run()` Otherwise, this code would produce an error: ``` AttributeError: 'Tensor' object has no attribute 'run' ``` Or we can use sess.run(assignment) to finish this assignment operation --- tensorflow/docs_src/programmers_guide/variables.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/programmers_guide/variables.md b/tensorflow/docs_src/programmers_guide/variables.md index 16753c931f..bac385c02c 100644 --- a/tensorflow/docs_src/programmers_guide/variables.md +++ b/tensorflow/docs_src/programmers_guide/variables.md @@ -205,7 +205,7 @@ methods: v = tf.get_variable("v", shape=(), initializer=tf.zeros_initializer()) assignment = v.assign_add(1) tf.global_variables_initializer().run() -assignment.run() +sess.run(assignment) # or assignment.op.run() ``` Most TensorFlow optimizers have specialized ops that efficiently update the -- GitLab From 4422aaa61338c3af8ce80034d92693a1bd33b09d Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Thu, 30 Nov 2017 00:18:03 -0800 Subject: [PATCH 0979/1801] Automated g4 rollback of changelist 177375237 PiperOrigin-RevId: 177418947 --- tensorflow/python/eager/backprop.py | 2 +- tensorflow/python/eager/context.py | 15 -- tensorflow/python/eager/function.py | 144 ++++++------------ tensorflow/python/eager/graph_callable.py | 18 +-- .../python/eager/graph_callable_test.py | 1 + tensorflow/python/framework/ops.py | 30 ++-- tensorflow/python/pywrap_tfe.i | 3 +- 7 files changed, 70 insertions(+), 143 deletions(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index dc1142705a..0144f3b1e5 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -540,7 +540,7 @@ def _ensure_unique_tensor_objects(parameter_positions, args): if i in parameter_positions: tid = ops.tensor_id(t) if tid in s: - args[i] = gen_array_ops.identity(args[i]) + args[i] = args[i]._dup() # pylint: disable=protected-access else: s.add(tid) return args diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index 415416cfae..92f4e15c05 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -288,21 +288,6 @@ class Context(object): self._initialize_handle_and_devices() return self._num_gpus - def add_function(self, fn): - """Add a function definition to the context. - - Once added, the function (identified by its name) can be executed like any - other operation. - - Args: - fn: A wrapped TF_Function (returned from TF_GraphToFunction_wrapper). - """ - with errors.raise_exception_on_not_ok_status() as status: - pywrap_tensorflow.TFE_ContextAddFunction( - self._handle, # pylint: disable=protected-access - fn, - status) - def add_function_def(self, fdef): """Add a function definition to the context. diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 092b36ff20..2f4b59e938 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -25,19 +25,15 @@ import threading import numpy as np -from tensorflow.core.framework import function_pb2 -from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import context from tensorflow.python.eager import execute from tensorflow.python.eager import tape from tensorflow.python.eager.graph_only_ops import graph_placeholder -from tensorflow.python.framework import c_api_util from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes as dtypes_module -from tensorflow.python.framework import errors +from tensorflow.python.framework import graph_to_function_def from tensorflow.python.framework import ops from tensorflow.python.ops import gradients_impl -from tensorflow.python.util import compat from tensorflow.python.util import nest from tensorflow.python.util import tf_decorator @@ -51,41 +47,10 @@ _scoped_captures = threading.local() _scoped_captures.tensors = None -def make_function_def(name, graph, operations, inputs, outputs): - """Makes FunctionDef proto and defined function. - - Args: - name: the function name - graph: the graph from which to build the function - operations: the operations in the function body - inputs: tensors to be used as function arguments - outputs: tensors to be returned from the function - - Returns: - fdef: a FunctionDef protocol buffer for the function - fn: a wrapped TF_Function for the function - """ - with errors.raise_exception_on_not_ok_status() as status: - fn = pywrap_tensorflow.TF_GraphToFunction_wrapper( - graph._c_graph, # pylint: disable=protected-access - compat.as_text(name), - False, - [o._c_op for o in operations], # pylint: disable=protected-access - [t._as_tf_output() for t in inputs], # pylint: disable=protected-access - [t._as_tf_output() for t in outputs], # pylint: disable=protected-access - [compat.as_text("%s" % i) for i in range(len(outputs))], - None, - compat.as_text(""), - status) - # TODO(apassos) avoid creating a FunctionDef (specially to grab the signature, - # but also in general it's nice not to depend on it. - with c_api_util.tf_buffer() as buffer_: - with errors.raise_exception_on_not_ok_status() as status: - pywrap_tensorflow.TF_FunctionToFunctionDef(fn, buffer_, status) - proto_data = pywrap_tensorflow.TF_GetBuffer(buffer_) - fdef = function_pb2.FunctionDef() - fdef.ParseFromString(compat.as_bytes(proto_data)) - return fdef, fn +def make_function_def(graph, operations, inputs, outputs): + """Makes function def from the given graph with the operations.""" + return graph_to_function_def.graph_to_function_def( + graph, operations, inputs, outputs) @contextlib.contextmanager @@ -150,10 +115,6 @@ class CapturingGraph(ops.Graph): # for resource tensors. self._last_op_using_resource_tensor = {} - # TODO(apassos) remove once the C API is used by default. - def _use_c_api_hack(self): - return True - def clear_resource_control_flow_state(self): self._last_op_using_resource_tensor = {} @@ -246,20 +207,14 @@ def _inference_name(n): return "__inference_%s_%s" % (n, ops.uid()) -# TODO(apassos) get rid of this by splitting framework.function._DefinedFunction -# so it doesn't have the definition-generating logic and is just a container for -# an already-defined function. class _DefinedFunction(object): """Mocks the interface of tf _DefinedFunction.""" - def __init__(self, fdef, fn): + def __init__(self, fdef): self.definition = fdef self.name = fdef.signature.name - self.signature = fdef.signature self.grad_func_name = None self.python_grad_func = None - self._c_func = fn - self._grad_func = None def _map_sequence_obj_to_idx(sequence): @@ -295,7 +250,6 @@ class GraphModeFunction(object): input_placeholders, extra_inputs, fdef, - fn, graph, operations, func_outputs, @@ -309,7 +263,7 @@ class GraphModeFunction(object): self._graph = graph self._has_backprop = False self._func_name = fdef.signature.name - self._fdef = _DefinedFunction(fdef, fn) + self._fdef = _DefinedFunction(fdef) self._num_outputs = len(fdef.signature.output_arg) self._ops = operations self._func_outputs = func_outputs @@ -329,45 +283,38 @@ class GraphModeFunction(object): with self._graph.as_default(), context.graph_mode(): c = _CapturingContext() with c: - filtered_outputs = [x for x in self._returns if x is not None] + filtered_outputs = [ + x for x in self._returns if x is not None + ] self._out_grad_placeholders = [ - graph_placeholder(x.dtype, x.shape) for x in filtered_outputs] + graph_placeholder(x.dtype, x.shape) for x in filtered_outputs + ] in_gradients = gradients_impl.gradients( filtered_outputs, self._input_placeholders, grad_ys=self._out_grad_placeholders) - shapes = tuple(x.shape for x in in_gradients if x is not None) + shapes = [x.shape for x in in_gradients if x is not None] captures = list(sorted(c.captured_tensors, key=lambda x: x.name)) - forward_name = _forward_name(self._func_name) - forward_function_def, forward_fn = make_function_def( - forward_name, self._graph, self._ops, self._input_placeholders, + forward_function_def = make_function_def( + self._graph, self._ops, self._input_placeholders, filtered_outputs + captures) - self._forward_fdef = _DefinedFunction(forward_function_def, forward_fn) - _register(forward_fn) - backward_outputs = tuple(x for x in in_gradients if x is not None) + self._forward_fdef = _DefinedFunction(forward_function_def) + _register_with_name(_forward_name(self._func_name), forward_function_def) + backward_outputs = [x for x in in_gradients if x is not None] all_inputs = self._out_grad_placeholders + captures - # Excluding input ops from the body as we do not intend to execute these - # operations when the function is executed. - all_ignored_ops = frozenset(x.op for x in all_inputs) - # Enforce a deterministic order of operations in the generated graph. This - # means rerunning the function-defining code will always define the same - # function, which is useful if we serialize this etc. - fdef_ops = tuple(x for x in sorted(c.known_ops, key=lambda x: x.name) - if x not in all_ignored_ops) - bname = _backward_name(self._func_name) - backward_function_def, backward_fn = make_function_def( - bname, self._graph, fdef_ops, + backward_function_def = make_function_def( + self._graph, [x.op for x in self._out_grad_placeholders + ] + list(sorted(c.known_ops, key=lambda x: x.name)), all_inputs, backward_outputs) - _register(backward_fn) + _register_with_name(_backward_name(self._func_name), backward_function_def) self._backward_function = GraphModeFunction( - all_inputs, [], backward_function_def, backward_fn, self._graph, - c.known_ops, in_gradients, _map_sequence_obj_to_idx(backward_outputs), - shapes) + all_inputs, [], backward_function_def, self._graph, c.known_ops, + in_gradients, _map_sequence_obj_to_idx(backward_outputs), shapes) def _backprop_call(self, args): """Calls the wrapped function and records the result on a tape.""" all_args = args + self._extra_inputs - signature = self._forward_fdef.signature + signature = self._forward_fdef.definition.signature ctx = context.context() if ctx.in_graph_mode(): g = ops.get_default_graph() @@ -378,7 +325,7 @@ class GraphModeFunction(object): return ops.internal_convert_to_tensor(x, ctx=ctx) op = g.create_op( signature.name, [make_tensor(x) for x in all_args], - tuple(dtypes_module.DType(x.type) for x in signature.output_arg), + [dtypes_module.DType(x.type) for x in signature.output_arg], op_def=signature, name="FunctionCall", compute_shapes=False) @@ -414,8 +361,11 @@ class GraphModeFunction(object): if v._trainable: # pylint: disable=protected-access tape.watch_variable(v) - tensor_inputs = [x for x in nest.flatten(args) - if isinstance(x, ops.Tensor)] + tensor_inputs = [ + x for x in nest.flatten(args) + if isinstance(x, ops.Tensor) + ] + if tape.should_record(tensor_inputs) or tape.should_record( self._extra_inputs): if not self._has_backprop: @@ -434,7 +384,7 @@ class GraphModeFunction(object): args = list(tensor_inputs) + self._extra_inputs op = g.create_op( signature.name, [ops.convert_to_tensor(x) for x in args], - tuple(dtypes_module.DType(x.type) for x in signature.output_arg), + [dtypes_module.DType(x.type) for x in signature.output_arg], op_def=signature, name="FunctionCall", compute_shapes=False) @@ -519,32 +469,29 @@ def _defun_internal(name, func, args, kwds): extra_inputs = [] extra_placeholders = [] outputs_list = nest.flatten(func_outputs) - output_shapes = tuple(x.shape for x in outputs_list if x is not None) + output_shapes = [x.shape for x in outputs_list if x is not None] - flat_inputs = [x for x in nest.flatten(func_inputs) - if isinstance(x, ops.Tensor)] + flat_inputs = [ + x for x in nest.flatten(func_inputs) if isinstance(x, ops.Tensor) + ] all_inputs = flat_inputs + list(extra_placeholders) - all_ignored_ops = frozenset(x.op for x in all_inputs) + func_def_outputs = [x for x in outputs_list if x is not None] - fname = _inference_name(name) - operations = tuple(x for x in tmp_graph.get_operations() - if x not in all_ignored_ops) - inference_function_def, fn = make_function_def( - fname, tmp_graph, operations, all_inputs, func_def_outputs) + inference_function_def = make_function_def( + tmp_graph, tmp_graph.get_operations(), all_inputs, func_def_outputs) # Register any other functions defined in the graph # TODO(ashankar): Oh lord, forgive me for this lint travesty. for f in tmp_graph._functions.values(): # pylint: disable=protected-access # TODO(ashankar): What about the gradient registry? - _register(f._c_func) # pylint: disable=protected-access - _register(fn) + _register_with_name(f.name, f.definition) + _register_with_name(_inference_name(name), inference_function_def) return GraphModeFunction( all_inputs, extra_inputs, inference_function_def, - fn, tmp_graph, - operations, + tmp_graph.get_operations(), func_outputs, _map_sequence_obj_to_idx(func_def_outputs), output_shapes, @@ -570,9 +517,10 @@ def _cache_key(x): return x -def _register(fn): - """Registers the function `fn`.""" - context.context().add_function(fn) +def _register_with_name(name, fdef): + """Registers the function `fdef` with the name `name`.""" + fdef.signature.name = name + context.context().add_function_def(fdef) # TODO(apassos): better error messages for non-hashable arguments. diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py index 3da100d800..faf0ac88bc 100644 --- a/tensorflow/python/eager/graph_callable.py +++ b/tensorflow/python/eager/graph_callable.py @@ -318,9 +318,7 @@ def _graph_callable_internal(func, shape_and_dtypes): placeholder_inputs = flat_inputs+ list(extra_placeholders) func_def_outputs = [x for x in outputs_list if isinstance(x, tf_ops.Tensor)] - initialization_name = function._inference_name(func.__name__) # pylint: disable=protected-access - initializer_function_def, initializer_fn = function.make_function_def( - initialization_name, + initializer_function_def = function.make_function_def( tmp_graph, initializing_operations, placeholder_inputs, @@ -329,13 +327,13 @@ def _graph_callable_internal(func, shape_and_dtypes): # Also, what about the gradient registry of these functions? Those need to be # addressed as well. for f in tmp_graph._functions.values(): # pylint: disable=protected-access - function._register(f._c_func) # pylint: disable=protected-access - function._register(initializer_fn) # pylint: disable=protected-access + function._register_with_name(f.name, f.definition) # pylint: disable=protected-access + function._register_with_name(function._inference_name(func.__name__), # pylint: disable=protected-access + initializer_function_def) initializer_function = function.GraphModeFunction( placeholder_inputs, extra_inputs, initializer_function_def, - initializer_fn, tmp_graph, initializing_operations, func_outputs, @@ -344,20 +342,18 @@ def _graph_callable_internal(func, shape_and_dtypes): capture_func_def_outputs = [ x for x in captured_outlist if isinstance(x, tf_ops.Tensor)] - captured_function_name = function._inference_name(func.__name__) # pylint: disable=protected-access - captured_function_def, capturing_fn = function.make_function_def( - captured_function_name, + captured_function_def = function.make_function_def( tmp_graph, capturing_operations, placeholder_inputs, capture_func_def_outputs) - function._register(capturing_fn) # pylint: disable=protected-access + function._register_with_name(function._inference_name(func.__name__), # pylint: disable=protected-access + captured_function_def) captured_function = function.GraphModeFunction( placeholder_inputs, extra_inputs, captured_function_def, - capturing_fn, tmp_graph, capturing_operations, captured_outputs, diff --git a/tensorflow/python/eager/graph_callable_test.py b/tensorflow/python/eager/graph_callable_test.py index b9e6ca2a93..548e16a909 100644 --- a/tensorflow/python/eager/graph_callable_test.py +++ b/tensorflow/python/eager/graph_callable_test.py @@ -152,6 +152,7 @@ class GraphCallableTest(test.TestCase): self.assertAllEqual(5, f(constant_op.constant(2))) def testNestedFunction(self): + # TensorFlow function (which is what would be used in TensorFlow graph # construction). @function.Defun(dtypes.int32, dtypes.int32) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 36daf59647..2217513966 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -599,6 +599,11 @@ class Tensor(_TensorLike): """ return _eval_using_default_session(self, feed_dict, self.graph, session) + def _dup(self): + ret = copy.copy(self) + ret._id = uid() # pylint: disable=protected-access + return ret + # TODO(agarwal): consider getting rid of this. class _EagerTensorBase(Tensor): @@ -724,6 +729,9 @@ class _EagerTensorBase(Tensor): return new_tensor # pylint: enable=protected-access + def _dup(self): + return self._copy(device_name=self.device) + @property def shape(self): return tensor_shape.TensorShape(self._shape_tuple()) @@ -1786,7 +1794,7 @@ class Operation(object): c_api.SetRequestedDevice( self._graph._c_graph, # pylint: disable=protected-access self._c_op, # pylint: disable=protected-access - compat.as_text(_device_string(device))) + _device_string(device)) else: self._node_def.device = _device_string(device) @@ -2075,7 +2083,7 @@ class Operation(object): def _set_attr(self, attr_name, attr_value): """Private method used to set an attribute in the node_def.""" - if self._c_op: + if _USE_C_API: buf = c_api.TF_NewBufferFromString( compat.as_bytes(attr_value.SerializeToString())) try: @@ -2644,16 +2652,11 @@ class Graph(object): # TODO(skyewm): fold as much of the above as possible into the C # implementation - if _USE_C_API or self._use_c_api_hack(): + if _USE_C_API: self._scoped_c_graph = c_api_util.ScopedTFGraph() else: self._scoped_c_graph = None - # TODO(apassos) remove once the C API is used by default. - def _use_c_api_hack(self): - """Temporary hack; can be overridden to force C API usage.""" - return False - def _convert_stack(self, stack, include_func_start_lineno=False): """Converts a stack extracted using _extract_stack() to a traceback stack. @@ -2982,14 +2985,9 @@ class Graph(object): # Add function to graph # pylint: disable=protected-access if self._c_graph: - # Handle functions created without using the C API. TODO(apassos,skyewm) - # remove this when all functions are generated using the C API by default - # as this will be unnecessary. - if not function._c_func: - with errors.raise_exception_on_not_ok_status() as status: - serialized = function.definition.SerializeToString() - function._c_func = c_api.TF_FunctionImportFunctionDef( - serialized, status) + assert function._c_func, ( + "Cannot add function created without C API support to graph " + "created with C API support") with errors.raise_exception_on_not_ok_status() as status: gradient = function._grad_func._c_func if function._grad_func else None c_api.TF_GraphCopyFunction(self._c_graph, function._c_func, gradient, diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i index 82750e9e49..82b154164e 100644 --- a/tensorflow/python/pywrap_tfe.i +++ b/tensorflow/python/pywrap_tfe.i @@ -18,7 +18,6 @@ limitations under the License. %rename("%s") TFE_NewContext; %rename("%s") TFE_DeleteContext; %rename("%s") TFE_ContextListDevices; -%rename("%s") TFE_ContextAddFunction; %rename("%s") TFE_ContextAddFunctionDef; %rename("%s") TFE_OpNameGetAttrType; %rename("%s") TFE_Py_InitEagerTensor; @@ -150,7 +149,7 @@ limitations under the License. } $1 = &temp; $1->resize(PyInt_AsLong($input), nullptr); -} +} // Create new Status object. %typemap(in, numinputs=0) TF_Status *out_status { -- GitLab From bec3d96c1f9973c22136f6fd33388edbd78f0824 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 30 Nov 2017 00:31:02 -0800 Subject: [PATCH 0980/1801] Automated g4 rollback of changelist 177362829 PiperOrigin-RevId: 177419730 --- tensorflow/compiler/xla/tests/slice_test.cc | 88 ++++++--------------- 1 file changed, 23 insertions(+), 65 deletions(-) diff --git a/tensorflow/compiler/xla/tests/slice_test.cc b/tensorflow/compiler/xla/tests/slice_test.cc index 981d075089..c21124750a 100644 --- a/tensorflow/compiler/xla/tests/slice_test.cc +++ b/tensorflow/compiler/xla/tests/slice_test.cc @@ -26,7 +26,6 @@ limitations under the License. #include "tensorflow/compiler/xla/tests/literal_test_util.h" #include "tensorflow/compiler/xla/tests/test_macros.h" #include "tensorflow/core/lib/gtl/array_slice.h" -#include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/types.h" @@ -212,13 +211,6 @@ class SliceR1Test : public ClientLibraryTestBase, } }; -string SliceR1TestDataToString(const ::testing::TestParamInfo& data) { - const R1Spec& spec = data.param; - return ::tensorflow::strings::Printf("%lld_%lld_%lld_%lld", spec.input_dim0, - spec.slice_start, spec.slice_limit, - spec.slice_stride); -} - XLA_TEST_P(SliceR1Test, DoIt_F32) { Run(GetParam()); } XLA_TEST_P(SliceR1Test, DoIt_F64) { Run(GetParam()); } @@ -231,64 +223,30 @@ XLA_TEST_P(SliceR1Test, DoIt_U64) { Run(GetParam()); } XLA_TEST_P(SliceR1Test, DoIt_S64) { Run(GetParam()); } -// Tests for R1 slice ops. -// The format for each testcase is {input size, start, limit, stride}. -// clang-format off -INSTANTIATE_TEST_CASE_P( - SliceR1TestInstantiation, - SliceR1Test, - ::testing::Values( - R1Spec{10, 0, 0, 1}, - R1Spec{10, 7, 7, 1}, - R1Spec{10, 0, 5, 1}, - R1Spec{10, 3, 5, 1}, - R1Spec{10, 0, 10, 1}, - R1Spec{1024, 0, 5, 1}, - R1Spec{1024, 3, 5, 1}, - R1Spec{1024 + 17, 0, 5, 1}, - R1Spec{1024 + 17, 3, 5, 1}, - R1Spec{1024 + 17, 1024, 1024 + 6, 1}, - R1Spec{1024 + 17, 1024 + 1, 1024 + 6, 1}, - R1Spec{1024, 1024 - 4, 1024, 1}, - R1Spec{4 * 1024, 7, 7 + 1024, 1}, - R1Spec{4 * 1024, 0, 4 * 1024, 1}, - R1Spec{4 * 1024, 1, 4 * 1024 - 1, 1}, - R1Spec{4 * 1024, 1024, 3 * 1024, 1}, - R1Spec{4 * 1024, 1024 + 1, 3 * 1024 - 1, 1}, - R1Spec{16 * 1024, 0, 5, 1}, - R1Spec{16 * 1024, 3, 5, 1}, - R1Spec{16 * 1024 + 17, 0, 5, 1}, - R1Spec{16 * 1024 + 17, 3, 5, 1}, - R1Spec{16 * 1024 + 17, 16 * 1024, 16 * 1024 + 6, 1}, - R1Spec{16 * 1024 + 17, 16 * 1024 + 1, 16 * 1024 + 6, 1}, - R1Spec{64 * 1024, 0, 64 * 1024, 1}, - R1Spec{64 * 1024, 1, 64 * 1024 - 1, 1}, - R1Spec{64 * 1024, 1024, 63 * 1024, 1}, - R1Spec{64 * 1024, 1024 + 1, 63 * 1024 - 1, 1}, - R1Spec{64 * 1024, 32 * 1024, 33 * 1024, 1}, - R1Spec{64 * 1024, 32 * 1024 + 1, 33 * 1024 - 1, 1}, -// TODO(b/69425338): This uses too much memory on GPU. -#ifndef XLA_TEST_BACKEND_GPU - R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024, 12 * 1024 * 1024, 1}, - R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 + 1, 12 * 1024 * 1024 - 1, 1}, - R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 - 1, 12 * 1024 * 1024 + 1, 1}, -#endif - R1Spec{10, 2, 4, 2}, - R1Spec{10, 0, 10, 2}, - R1Spec{10, 0, 10, 3}, - R1Spec{10, 0, 10, 4}, - R1Spec{10, 0, 10, 5}, - R1Spec{10, 0, 10, 10}, - R1Spec{500, 200, 400, 7}, - R1Spec{4096, 1, 4095, 3}, - R1Spec{2047, 1024 - 24, 1024 + 160, 31}, - R1Spec{2047, 1, 2046, 3 * 128}, - R1Spec{4096, 1024 + 3, 4095, 500}, - R1Spec{8192, 0, 8192, 1024 * 3 + 400} - ), - SliceR1TestDataToString +INSTANTIATE_TEST_CASE_P( // + SliceR1TestInstantiation, // + SliceR1Test, // + ::testing::Values( // + R1Spec{10, 0, 0, 1}, // + R1Spec{10, 7, 7, 1}, // + R1Spec{10, 2, 4, 1}, // + R1Spec{10, 2, 4, 2}, // + R1Spec{10, 0, 10, 1}, // + R1Spec{1024, 1024 - 4, 1024, 1}, // + R1Spec{4096, 7, 7 + 1024, 1}, // + R1Spec{10, 0, 10, 2}, // + R1Spec{10, 0, 10, 3}, // + R1Spec{10, 0, 10, 4}, // + R1Spec{10, 0, 10, 5}, // + R1Spec{10, 0, 10, 10}, // + R1Spec{500, 200, 400, 7}, // + R1Spec{4096, 1, 4095, 3}, // + R1Spec{2047, 1024 - 24, 1024 + 160, 31}, // + R1Spec{2047, 1, 2046, 3 * 128}, // + R1Spec{4096, 1024 + 3, 4095, 500}, // + R1Spec{8192, 0, 8192, 1024 * 3 + 400} // + ) // ); -// clang-format on struct R2Spec { int64 input_dim0; -- GitLab From ad3213bc53d9905c788509948412ad9703fa976b Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Thu, 30 Nov 2017 01:21:49 -0800 Subject: [PATCH 0981/1801] Disable baseline_test in asan. PiperOrigin-RevId: 177423981 --- tensorflow/python/estimator/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index 8e6945b0f3..e062e1fbfe 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -215,6 +215,7 @@ py_test( srcs_version = "PY2AND3", tags = [ "no_pip", + "noasan", # test flakily times out in asan mode. "notsan", # b/67510291 ], deps = [ -- GitLab From 8a98563eb6d552f0bd0931f83837640481c1f938 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Thu, 30 Nov 2017 06:13:53 -0800 Subject: [PATCH 0982/1801] Add support for int32 output types to the Multinomial op. PiperOrigin-RevId: 177444775 --- .../compiler/tests/categorical_op_test.py | 38 ++++++++------ tensorflow/core/kernels/multinomial_op.cc | 51 ++++++++++++------- tensorflow/core/kernels/multinomial_op.h | 2 +- .../core/kernels/multinomial_op_gpu.cu.cc | 30 +++++++---- tensorflow/core/ops/random_ops.cc | 3 +- .../random/multinomial_op_test.py | 14 ++--- tensorflow/python/ops/random_ops.py | 5 +- tensorflow/tools/api/golden/tensorflow.pbtxt | 2 +- 8 files changed, 90 insertions(+), 55 deletions(-) diff --git a/tensorflow/compiler/tests/categorical_op_test.py b/tensorflow/compiler/tests/categorical_op_test.py index 5e06f9a724..035cdea178 100644 --- a/tensorflow/compiler/tests/categorical_op_test.py +++ b/tensorflow/compiler/tests/categorical_op_test.py @@ -35,6 +35,9 @@ from tensorflow.python.platform import googletest class CategoricalTest(XLATestCase): """Test cases for random-number generating operators.""" + def output_dtypes(self): + return set(self.int_types).intersection([np.int32, np.int64]) + def _chi2(self, expected, actual): """Returns Chi2 GOF statistic.""" actual = np.asarray(actual) @@ -55,7 +58,8 @@ class CategoricalTest(XLATestCase): """ with self.test_session() as sess, self.test_scope(): random_seed.set_random_seed(1618) - op = random_ops.multinomial(logits, num_samples) + op = random_ops.multinomial(logits, num_samples, + output_dtype=dtypes.int32) d = sess.run(op) batch_size, num_classes = logits.shape @@ -73,11 +77,11 @@ class CategoricalTest(XLATestCase): return freqs_mat - def _testRngIsNotConstant(self, rng, dtype): + def _testRngIsNotConstant(self, rng, dtype, output_dtype): # Tests that 'rng' does not always return the same value. with self.test_session() as sess: with self.test_scope(): - x = rng(dtype) + x = rng(dtype, output_dtype) # The random-number generator, if working correctly, should produce the # same output multiple times with low probability. @@ -92,21 +96,25 @@ class CategoricalTest(XLATestCase): (not np.array_equal(y, w))) def testCategoricalIsNotConstant(self): - def rng(unused_dtype): - return random_ops.multinomial([[1., 1., 1.]], 10) + def rng(dtype, output_dtype): + return random_ops.multinomial(np.array([[1., 1., 1.]], dtype=dtype), 10, + output_dtype=output_dtype) - dtype = dtypes.float32 - self._testRngIsNotConstant(rng, dtype) + dtype = np.float32 + for output_dtype in self.output_dtypes(): + self._testRngIsNotConstant(rng, dtype, output_dtype) def testCategoricalIsInRange(self): - for dtype in [dtypes.float32, dtypes.float64]: - with self.test_session() as sess: - with self.test_scope(): - x = random_ops.multinomial( - array_ops.ones(shape=[1, 20], dtype=dtype), 1000) - y = sess.run(x) - self.assertTrue((y >= 0).sum() == 1000) - self.assertTrue((y < 20).sum() == 1000) + for dtype in self.float_types: + for output_dtype in self.output_dtypes(): + with self.test_session() as sess: + with self.test_scope(): + x = random_ops.multinomial( + array_ops.ones(shape=[1, 20], dtype=dtype), 1000, + output_dtype=output_dtype) + y = sess.run(x) + self.assertTrue((y >= 0).sum() == 1000) + self.assertTrue((y < 20).sum() == 1000) def testSamplingCorrectness(self): np.random.seed(1618) # Make it reproducible. diff --git a/tensorflow/core/kernels/multinomial_op.cc b/tensorflow/core/kernels/multinomial_op.cc index 8c0109f5c8..d086abb247 100644 --- a/tensorflow/core/kernels/multinomial_op.cc +++ b/tensorflow/core/kernels/multinomial_op.cc @@ -40,7 +40,7 @@ typedef Eigen::GpuDevice GPUDevice; namespace functor { -template +template struct MultinomialFunctor { void operator()(OpKernelContext* ctx, const Device& d, typename TTypes::ConstMatrix logits, @@ -49,11 +49,11 @@ struct MultinomialFunctor { typename TTypes::Flat scratch, int batch_size, int num_classes, int num_samples, const random::PhiloxRandom& gen, - typename TTypes::Matrix output); + typename TTypes::Matrix output); }; -template -struct MultinomialFunctor { +template +struct MultinomialFunctor { void operator()(OpKernelContext* ctx, const CPUDevice& d, typename TTypes::ConstMatrix logits, typename TTypes::Flat /* noises */, @@ -61,7 +61,7 @@ struct MultinomialFunctor { typename TTypes::Flat /* scratch */, int batch_size, int num_classes, int num_samples, const random::PhiloxRandom& gen, - typename TTypes::Matrix output) { + typename TTypes::Matrix output) { auto worker_threads = *(ctx->device()->tensorflow_cpu_worker_threads()); // The implementation only parallelizes by batch. @@ -128,7 +128,7 @@ struct MultinomialFunctor { } // namespace functor // Samples from a multinomial distribution. -template +template class MultinomialOp : public OpKernel { public: explicit MultinomialOp(OpKernelConstruction* context) : OpKernel(context) { @@ -195,11 +195,11 @@ class MultinomialOp : public OpKernel { if (std::is_same::value) num_samples_ceil_4 *= 2; auto rng = generator_.ReserveRandomOutputs(batch_size * num_samples_ceil_4, 256); - functor::MultinomialFunctor()( + functor::MultinomialFunctor()( ctx, ctx->eigen_device(), logits_t.matrix(), noises.flat(), scores.flat(), scratch.flat(), batch_size, num_classes, num_samples, rng, - samples_t->matrix()); + samples_t->matrix()); } } @@ -209,10 +209,17 @@ class MultinomialOp : public OpKernel { TF_DISALLOW_COPY_AND_ASSIGN(MultinomialOp); }; -#define REGISTER(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Multinomial").Device(DEVICE_CPU).TypeConstraint("T"), \ - MultinomialOp); +#define REGISTER(TYPE) \ + REGISTER_KERNEL_BUILDER(Name("Multinomial") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("output_dtype", DT_INT32), \ + MultinomialOp); \ + REGISTER_KERNEL_BUILDER(Name("Multinomial") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("output_dtype", DT_INT64), \ + MultinomialOp); TF_CALL_half(REGISTER); TF_CALL_float(REGISTER); @@ -220,12 +227,20 @@ TF_CALL_double(REGISTER); #undef REGISTER #if GOOGLE_CUDA -#define REGISTER(TYPE) \ - REGISTER_KERNEL_BUILDER(Name("Multinomial") \ - .Device(DEVICE_GPU) \ - .HostMemory("num_samples") \ - .TypeConstraint("T"), \ - MultinomialOp) +#define REGISTER(TYPE) \ + REGISTER_KERNEL_BUILDER(Name("Multinomial") \ + .Device(DEVICE_GPU) \ + .HostMemory("num_samples") \ + .TypeConstraint("T") \ + .TypeConstraint("output_dtype", DT_INT32), \ + MultinomialOp) \ + REGISTER_KERNEL_BUILDER(Name("Multinomial") \ + .Device(DEVICE_GPU) \ + .HostMemory("num_samples") \ + .TypeConstraint("T") \ + .TypeConstraint("output_dtype", DT_INT64), \ + MultinomialOp) + TF_CALL_half(REGISTER); TF_CALL_float(REGISTER); TF_CALL_double(REGISTER); diff --git a/tensorflow/core/kernels/multinomial_op.h b/tensorflow/core/kernels/multinomial_op.h index af5e81f219..6e41060aa4 100644 --- a/tensorflow/core/kernels/multinomial_op.h +++ b/tensorflow/core/kernels/multinomial_op.h @@ -21,7 +21,7 @@ namespace tensorflow { namespace functor { // Generic helper functor for the Multinomial Op. -template +template struct MultinomialFunctor; } // namespace functor diff --git a/tensorflow/core/kernels/multinomial_op_gpu.cu.cc b/tensorflow/core/kernels/multinomial_op_gpu.cu.cc index 19b4f3ca55..5cc5877cce 100644 --- a/tensorflow/core/kernels/multinomial_op_gpu.cu.cc +++ b/tensorflow/core/kernels/multinomial_op_gpu.cu.cc @@ -37,20 +37,22 @@ using GPUDevice = Eigen::GpuDevice; // Kernel for Multinomial op. Data is interpreted to have the following shapes: // scores: [B, S, C]; maxima: [B, S]; output: [B, S]. +template __global__ void MultinomialKernel(int32 nthreads, const int32 num_classes, const int32 num_samples, const float* scores, - const float* maxima, int64* output) { + const float* maxima, OutputType* output) { CUDA_1D_KERNEL_LOOP(index, nthreads) { const int maxima_idx = index / num_classes; if (ldg(maxima + maxima_idx) == ldg(scores + index)) { - CudaAtomicMax(reinterpret_cast(output + maxima_idx), - static_cast(index % num_classes)); + using UnsignedOutputType = typename std::make_unsigned::type; + CudaAtomicMax(reinterpret_cast(output + maxima_idx), + static_cast(index % num_classes)); } } } -template -struct MultinomialFunctor { +template +struct MultinomialFunctor { void operator()(OpKernelContext* ctx, const GPUDevice& d, typename TTypes::ConstMatrix logits, typename TTypes::Flat noises, @@ -58,7 +60,7 @@ struct MultinomialFunctor { typename TTypes::Flat maxima, int batch_size, int num_classes, int num_samples, const random::PhiloxRandom& gen, - typename TTypes::Matrix output) { + typename TTypes::Matrix output) { // Uniform, [0, 1). typedef random::UniformDistribution Dist; functor::FillPhiloxRandom()(ctx, d, gen, noises.data(), @@ -111,11 +113,17 @@ struct MultinomialFunctor { }; // Explicit instantiation of the GPU functors. -template struct MultinomialFunctor; -template struct MultinomialFunctor; -template struct MultinomialFunctor; -template struct MultinomialFunctor; -template struct MultinomialFunctor; +template struct MultinomialFunctor; +template struct MultinomialFunctor; +template struct MultinomialFunctor; +template struct MultinomialFunctor; +template struct MultinomialFunctor; + +template struct MultinomialFunctor; +template struct MultinomialFunctor; +template struct MultinomialFunctor; +template struct MultinomialFunctor; +template struct MultinomialFunctor; } // namespace functor } // namespace tensorflow diff --git a/tensorflow/core/ops/random_ops.cc b/tensorflow/core/ops/random_ops.cc index 2429171fa9..5a436fb93e 100644 --- a/tensorflow/core/ops/random_ops.cc +++ b/tensorflow/core/ops/random_ops.cc @@ -201,10 +201,11 @@ REGISTER_OP("Multinomial") .SetIsStateful() .Input("logits: T") .Input("num_samples: int32") - .Output("output: int64") + .Output("output: output_dtype") .Attr("seed: int = 0") .Attr("seed2: int = 0") .Attr("T: realnumbertype") + .Attr("output_dtype: {int32, int64} = DT_INT64") .SetShapeFn([](InferenceContext* c) { ShapeHandle logits_shape; ShapeHandle unused; diff --git a/tensorflow/python/kernel_tests/random/multinomial_op_test.py b/tensorflow/python/kernel_tests/random/multinomial_op_test.py index ca48ba6cad..a9dc7b7de0 100644 --- a/tensorflow/python/kernel_tests/random/multinomial_op_test.py +++ b/tensorflow/python/kernel_tests/random/multinomial_op_test.py @@ -57,12 +57,14 @@ class MultinomialTest(test.TestCase): @test_util.run_in_graph_and_eager_modes() def testSmallEntropy(self): random_seed.set_random_seed(1618) - with test_util.device(use_gpu=True): - # A logit value of -10 corresponds to a probability of ~5e-5. - logits = constant_op.constant([[-10., 10., -10.], [-10., -10., 10.]]) - num_samples = 1000 - samples = self.evaluate(random_ops.multinomial(logits, num_samples)) - self.assertAllEqual([[1] * num_samples, [2] * num_samples], samples) + for output_dtype in [np.int32, np.int64]: + with test_util.device(use_gpu=True): + # A logit value of -10 corresponds to a probability of ~5e-5. + logits = constant_op.constant([[-10., 10., -10.], [-10., -10., 10.]]) + num_samples = 1000 + samples = self.evaluate(random_ops.multinomial( + logits, num_samples, output_dtype=output_dtype)) + self.assertAllEqual([[1] * num_samples, [2] * num_samples], samples) def testOneOpMultipleStepsIndependent(self): with self.test_session(use_gpu=True) as sess: diff --git a/tensorflow/python/ops/random_ops.py b/tensorflow/python/ops/random_ops.py index 52fb5131cf..afaff8ca41 100644 --- a/tensorflow/python/ops/random_ops.py +++ b/tensorflow/python/ops/random_ops.py @@ -316,7 +316,7 @@ def random_crop(value, size, seed=None, name=None): return array_ops.slice(value, offset, size, name=name) -def multinomial(logits, num_samples, seed=None, name=None): +def multinomial(logits, num_samples, seed=None, name=None, output_dtype=None): """Draws samples from a multinomial distribution. Example: @@ -336,6 +336,7 @@ def multinomial(logits, num_samples, seed=None, name=None): @{tf.set_random_seed} for behavior. name: Optional name for the operation. + output_dtype: integer type to use for the output. Defaults to int64. Returns: The drawn samples of shape `[batch_size, num_samples]`. @@ -344,7 +345,7 @@ def multinomial(logits, num_samples, seed=None, name=None): logits = ops.convert_to_tensor(logits, name="logits") seed1, seed2 = random_seed.get_seed(seed) return gen_random_ops.multinomial( - logits, num_samples, seed=seed1, seed2=seed2) + logits, num_samples, seed=seed1, seed2=seed2, output_dtype=output_dtype) ops.NotDifferentiable("Multinomial") diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index 0edd4153d7..57573d5024 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -1394,7 +1394,7 @@ tf_module { } member_method { name: "multinomial" - argspec: "args=[\'logits\', \'num_samples\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'logits\', \'num_samples\', \'seed\', \'name\', \'output_dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "multiply" -- GitLab From 976049bb0bcdebe10d0a67f6c843f2b51eb1348c Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Thu, 30 Nov 2017 09:09:16 -0800 Subject: [PATCH 0983/1801] Implement Python-specific device and colocation logic in import_graph_def with C API enabled. PiperOrigin-RevId: 177462553 --- tensorflow/python/framework/importer.py | 63 ++++++++++- tensorflow/python/framework/importer_test.py | 108 ++++++++----------- tensorflow/python/framework/ops.py | 6 +- 3 files changed, 107 insertions(+), 70 deletions(-) diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py index 73c35de578..ada8c30fab 100644 --- a/tensorflow/python/framework/importer.py +++ b/tensorflow/python/framework/importer.py @@ -251,10 +251,67 @@ def _PopulateTFImportGraphDefOptions(options, prefix, input_map, def _ProcessNewOps(graph): """Processes the newly-added TF_Operations in `graph`.""" - for c_op in c_api_util.new_tf_operations(graph): - graph._create_op_from_tf_operation(c_op) # pylint: disable=protected-access + # Maps from a node to the names of the ops it's colocated with, if colocation + # is specified in the attributes. + colocation_pairs = {} - # TODO(skyewm): colocation logic + for c_op in c_api_util.new_tf_operations(graph): + # pylint: disable=protected-access + new_op = graph._create_op_from_tf_operation(c_op, compute_device=False) + # pylint: enable=protected-access + + colocation_names = _GetColocationNames(new_op) + if colocation_names: + colocation_pairs[new_op] = colocation_names + # Don't apply this op's device function, since colocation constraints + # override device functions. Note that this op's device may still be set + # by the loop below. + else: + with _MaybeDevice(new_op.device): + graph._apply_device_functions(new_op) # pylint: disable=protected-access + + # The following loop populates the device field of ops that are colocated + # with another op. This is implied by the colocation attribute, but we + # propagate the device field for completeness. + for op, coloc_op_list in colocation_pairs.items(): + coloc_device = None + # Find any device in the list of colocated ops that have a device, if it + # exists. We assume that if multiple ops have devices, they refer to the + # same device. Otherwise, a runtime error will occur since the colocation + # property cannot be guaranteed. + # + # One possible improvement is to try to check for compatibility of all + # devices in this list at import time here, which would require + # implementing a compatibility function for device specs in python. + for coloc_op_name in coloc_op_list: + try: + coloc_op = graph._get_operation_by_name_unsafe(coloc_op_name) # pylint: disable=protected-access + except KeyError: + raise ValueError('Specified colocation to an op that ' + 'does not exist during import: %s in %s' % ( + coloc_op_name, op.name)) + if coloc_op.device: + coloc_device = pydev.DeviceSpec.from_string(coloc_op.device) + break + if coloc_device: + op._set_device(coloc_device) # pylint: disable=protected-access + + +def _GetColocationNames(op): + """Returns names of the ops that `op` should be colocated with.""" + colocation_names = [] + try: + class_values = op.get_attr('_class') + except ValueError: + # No _class attr + return + for val in class_values: + val = compat.as_str(val) + if val.startswith('loc:@'): + colocation_node_name = val[len('loc:@'):] + if colocation_node_name != op.name: + colocation_names.append(colocation_node_name) + return colocation_names def _GatherReturnElements(requested_return_elements, graph, results): diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py index 000a88bc09..4a215abd2e 100644 --- a/tensorflow/python/framework/importer_test.py +++ b/tensorflow/python/framework/importer_test.py @@ -642,8 +642,6 @@ class ImportGraphDefTest(test.TestCase): b.node_def.attr["_class"]) def testColocationWithDeviceFn(self): - if ops._USE_C_API: return # TODO(skyewm): make this work with C API - original_graph_def = self._MakeGraphDef(""" node { name: 'A' op: 'None' attr { key: '_class' @@ -665,23 +663,17 @@ class ImportGraphDefTest(test.TestCase): with ops.Graph().as_default(): with ops.device(CustomDeviceFn): - b, = importer.import_graph_def( - original_graph_def, return_elements=["B"], name="imported_graph") - - self.assertProtoEqualsVersion(""" - node { name: 'imported_graph/A' op: 'None' device: "/device:A:0" - attr { - key: '_class' value { list { s: 'loc:@imported_graph/A' } } - } - } - node { name: 'imported_graph/B' op: 'None' device: "/device:A:0" - attr { - key: '_class' value { list { s: 'loc:@imported_graph/A' } } - } }""", b.graph.as_graph_def()) - - # Test a scenario where 'A' doesn't get a device; 'A' should - # not have a device, but during runtime will get colocated with - # 'B' because of the colocation attribute. + a, b = importer.import_graph_def(original_graph_def, + return_elements=["A", "B"], + name="imported_graph") + self.assertEqual(a.device, "/device:A:0") + self.assertEqual(b.device, "/device:A:0") + self.assertEqual(a.colocation_groups(), [b"loc:@imported_graph/A"]) + self.assertEqual(b.colocation_groups(), [b"loc:@imported_graph/A"]) + + # Test a scenario where 'A' doesn't get a device; 'A' should not have a + # device, but during runtime will get colocated with 'B' because of the + # colocation attribute. B's device function is still overridden by A. def BDeviceFn(op): if "B" in op.name: return "/device:B:0" @@ -689,19 +681,13 @@ class ImportGraphDefTest(test.TestCase): with ops.Graph().as_default(): with ops.device(BDeviceFn): - b, = importer.import_graph_def( - original_graph_def, return_elements=["B"], name="imported_graph") - - self.assertProtoEqualsVersion(""" - node { name: 'imported_graph/A' op: 'None' - attr { - key: '_class' value { list { s: 'loc:@imported_graph/A' } } - } - } - node { name: 'imported_graph/B' op: 'None' - attr { - key: '_class' value { list { s: 'loc:@imported_graph/A' } } - } }""", b.graph.as_graph_def()) + a, b = importer.import_graph_def(original_graph_def, + return_elements=["A", "B"], + name="imported_graph") + self.assertEqual(a.device, "") + self.assertEqual(b.device, "") + self.assertEqual(a.colocation_groups(), [b"loc:@imported_graph/A"]) + self.assertEqual(b.colocation_groups(), [b"loc:@imported_graph/A"]) # Only A gets a device, so B inherits it implicitly. def ADeviceFn(op): @@ -711,23 +697,15 @@ class ImportGraphDefTest(test.TestCase): with ops.Graph().as_default(): with ops.device(ADeviceFn): - b, = importer.import_graph_def( - original_graph_def, return_elements=["B"], name="imported_graph") - - self.assertProtoEqualsVersion(""" - node { name: 'imported_graph/A' op: 'None' device: "/device:A:0" - attr { - key: '_class' value { list { s: 'loc:@imported_graph/A' } } - } - } - node { name: 'imported_graph/B' op: 'None' device: "/device:A:0" - attr { - key: '_class' value { list { s: 'loc:@imported_graph/A' } } - } }""", b.graph.as_graph_def()) + a, b = importer.import_graph_def(original_graph_def, + return_elements=["A", "B"], + name="imported_graph") + self.assertEqual(a.device, "/device:A:0") + self.assertEqual(b.device, "/device:A:0") + self.assertEqual(a.colocation_groups(), [b"loc:@imported_graph/A"]) + self.assertEqual(b.colocation_groups(), [b"loc:@imported_graph/A"]) def testMultipleColocationWithDeviceFn(self): - if ops._USE_C_API: return # TODO(skyewm): make this work with C API - original_graph_def = self._MakeGraphDef(""" node { name: 'A' op: 'None'} node { name: 'B' op: 'None'} @@ -748,23 +726,19 @@ class ImportGraphDefTest(test.TestCase): with ops.Graph().as_default(): with ops.device(CustomDeviceFn): - c, = importer.import_graph_def( - original_graph_def, return_elements=["C"], name="imported_graph") - - self.assertProtoEqualsVersion(""" - node { name: 'imported_graph/A' op: 'None' } - node { name: 'imported_graph/B' op: 'None' device: "/device:B:0" } - node { name: 'imported_graph/C' op: 'None' device: "/device:B:0" - attr { - key: '_class' value { - list { s: 'loc:@imported_graph/A' - s: 'loc:@imported_graph/B' } - } - } - }""", c.graph.as_graph_def()) + a, b, c = importer.import_graph_def(original_graph_def, + return_elements=["A", "B", "C"], + name="imported_graph") + self.assertEqual(a.device, "") + self.assertEqual(b.device, "/device:B:0") + self.assertEqual(c.device, "/device:B:0") + self.assertEqual(a.colocation_groups(), [b"loc:@imported_graph/A"]) + self.assertEqual(b.colocation_groups(), [b"loc:@imported_graph/B"]) + self.assertEqual(c.colocation_groups(), + [b"loc:@imported_graph/A", b"loc:@imported_graph/B"]) def testNamePrefixColocationAttrsMultipleImport(self): - if ops._USE_C_API: return # TODO(skyewm): make this work with C API + if ops._USE_C_API: return # TODO(skyewm): set uniquify_names original_graph_def = self._MakeGraphDef(""" node { name: 'A' op: 'None' } @@ -791,15 +765,19 @@ class ImportGraphDefTest(test.TestCase): } }""", b.graph.as_graph_def()) def testNamePrefixColocationAttrsNotFound(self): - if ops._USE_C_API: return # TODO(skyewm): make this work with C API - original_graph_def = self._MakeGraphDef(""" node { name: 'B' op: 'None' attr { key: '_class' value { list { s: 'loc:@A' } } } }""") + + if ops._USE_C_API: + error_msg = "Node 'B' expects to be colocated with unknown node 'A'" + else: + error_msg = "does not exist during import" + with ops.Graph().as_default(): - with self.assertRaisesRegexp(ValueError, "does not exist during import"): + with self.assertRaisesRegexp(ValueError, error_msg): importer.import_graph_def( original_graph_def, return_elements=["B"], name="imported_graph") diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 2217513966..a616b15cf7 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -3102,7 +3102,7 @@ class Graph(object): compute_device=compute_device) return ret - def _create_op_from_tf_operation(self, c_op): + def _create_op_from_tf_operation(self, c_op, compute_device=True): """Creates an `Operation` in this graph from the supplied TF_Operation. This method is like create_op() except the new Operation is constructed @@ -3112,6 +3112,8 @@ class Graph(object): Args: c_op: a wrapped TF_Operation + compute_device: (Optional.) If True, device functions will be executed + to compute the device property of the Operation. Returns: An `Operation` object. @@ -3122,7 +3124,7 @@ class Graph(object): for output in tf_outputs) control_inputs = self._control_dependencies_for_inputs(input_ops) ret = Operation(c_op, self, control_inputs=control_inputs) - self._create_op_helper(ret) + self._create_op_helper(ret, compute_device=compute_device) return ret def _create_op_helper(self, op, compute_shapes=True, compute_device=True): -- GitLab From 5d52b95279be57076a794c2f334c150a26566360 Mon Sep 17 00:00:00 2001 From: Vishvananda Ishaya Abrams Date: Wed, 29 Nov 2017 22:25:37 -0800 Subject: [PATCH 0984/1801] Adds Operations() method to Graph There is currently no way to list all of the operations in a graph from the go api. This patch ads an Operations() method to retrieve the list using the existing TF_GraphNextOperation c api. The graph_test was modified to include testing this new method. Signed-off-by: Vishvananda Ishaya Abrams --- tensorflow/go/graph.go | 14 ++++++++++++++ tensorflow/go/graph_test.go | 22 +++++++++++++++++++--- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/tensorflow/go/graph.go b/tensorflow/go/graph.go index 46c600eab1..a40aded3bf 100644 --- a/tensorflow/go/graph.go +++ b/tensorflow/go/graph.go @@ -114,6 +114,20 @@ func (g *Graph) Operation(name string) *Operation { return &Operation{cop, g} } +// Operations returns a list of all operations in the graph +func (g *Graph) Operations() []Operation { + var pos C.size_t = 0 + ops := []Operation{} + for { + cop := C.TF_GraphNextOperation(g.c, &pos) + if cop == nil { + break + } + ops = append(ops, Operation{cop, g}) + } + return ops +} + // OpSpec is the specification of an Operation to be added to a Graph // (using Graph.AddOperation). type OpSpec struct { diff --git a/tensorflow/go/graph_test.go b/tensorflow/go/graph_test.go index c3120bc720..b8d65c54f6 100644 --- a/tensorflow/go/graph_test.go +++ b/tensorflow/go/graph_test.go @@ -29,10 +29,26 @@ func hasOperations(g *Graph, ops ...string) error { missing = append(missing, op) } } - if len(missing) == 0 { - return nil + if len(missing) != 0 { + return fmt.Errorf("Graph does not have the operations %v", missing) } - return fmt.Errorf("Graph does not have the operations %v", missing) + + inList := map[string]bool{} + for _, op := range g.Operations() { + inList[op.Name()] = true + } + + for _, op := range ops { + if !inList[op] { + missing = append(missing, op) + } + } + + if len(missing) != 0 { + return fmt.Errorf("Operations %v are missing from graph.Operations()", missing) + } + + return nil } func TestGraphWriteToAndImport(t *testing.T) { -- GitLab From 1c4810141e71289d71bfd94a74434bd09ee6b20f Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Thu, 30 Nov 2017 09:27:16 -0800 Subject: [PATCH 0985/1801] Hoist function input placeholders out of any control flow context. Prior to this change, functions that closed over external tensors in a while loop would cause a segfault at runtime. This is because the external tensors are temporarily represented as placeholders in the function body before being replaced by input parameters, and the placeholders would be created directly in the while loop body. This would eventually lead to using the input tensor in the while loop body without an enter node. This wasn't caught by the runtime check because it isn't applied to function bodies. This change adds tests for capturing tensors in a while loop body and in a cond context. Note that the cond test passed without this fix. PiperOrigin-RevId: 177464541 --- tensorflow/python/framework/function.py | 5 ++- tensorflow/python/framework/function_test.py | 32 ++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py index 29cf223724..366025a0d8 100644 --- a/tensorflow/python/framework/function.py +++ b/tensorflow/python/framework/function.py @@ -692,7 +692,10 @@ class _FuncGraph(ops.Graph): else: # Substitute with a placeholder. self.extra_inputs.append(x) - ph = array_ops.placeholder(x.dtype, shape=x.get_shape()) + # Hoist the new input placeholder out of any control flow context + # we're currently in. + with ops.control_dependencies(None): + ph = array_ops.placeholder(x.dtype, shape=x.get_shape()) # pylint: disable=protected-access ph._handle_data = x._handle_data # pylint: enable=protected-access diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py index ba43e9199b..11f343c579 100644 --- a/tensorflow/python/framework/function_test.py +++ b/tensorflow/python/framework/function_test.py @@ -724,6 +724,38 @@ class FunctionTest(test.TestCase): # NOTE: We still do not support capturing control deps. _ = Foo(x) + def testCaptureInWhileLoop(self): + g = ops.Graph() + with g.as_default(): + x = constant_op.constant(1) + + @function.Defun() + def Foo(): + return control_flow_ops.while_loop(lambda i: i < 10, + lambda i: i + x, + [0]) + y = Foo() + + with self.test_session(graph=g) as sess: + self.assertEqual(sess.run(y), 10) + + def testCaptureInCond(self): + g = ops.Graph() + with g.as_default(): + x = constant_op.constant(1) + + @function.Defun(dtypes.bool) + def Foo(pred): + return control_flow_ops.cond(pred, + lambda: x, + lambda: x + 1) + y = Foo(True) + z = Foo(False) + + with self.test_session(graph=g) as sess: + self.assertEqual(sess.run(y), 1) + self.assertEqual(sess.run(z), 2) + def testStableName(self): @function.Defun() -- GitLab From 9308470197bcc068dca9fe227d0ab144157950e1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 30 Nov 2017 09:52:28 -0800 Subject: [PATCH 0986/1801] Rename tests. PiperOrigin-RevId: 177467740 --- ...se_am_model_test.cc => speech_asr_am_model_test.cc} | 10 +++++----- ...se_lm_model_test.cc => speech_asr_lm_model_test.cc} | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) rename tensorflow/contrib/lite/models/{speech_terse_am_model_test.cc => speech_asr_am_model_test.cc} (93%) rename tensorflow/contrib/lite/models/{speech_terse_lm_model_test.cc => speech_asr_lm_model_test.cc} (94%) diff --git a/tensorflow/contrib/lite/models/speech_terse_am_model_test.cc b/tensorflow/contrib/lite/models/speech_asr_am_model_test.cc similarity index 93% rename from tensorflow/contrib/lite/models/speech_terse_am_model_test.cc rename to tensorflow/contrib/lite/models/speech_asr_am_model_test.cc index 30d89a1354..bf95b313f3 100644 --- a/tensorflow/contrib/lite/models/speech_terse_am_model_test.cc +++ b/tensorflow/contrib/lite/models/speech_asr_am_model_test.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// Unit test for speech TERSE AM model using TFLite Ops. +// Unit test for speech ASR AM model using TFLite Ops. #include @@ -45,10 +45,10 @@ constexpr int kLstmLayer5OutputStateTensor = 103; constexpr int kLstmLayer5CellStateTensor = 104; constexpr int kModelOutputTensor = 109; -TEST(SpeechTerseAm, RandomIOTest) { +TEST(SpeechAsrAm, RandomIOTest) { // Read the model. string tflite_file_path = - file::JoinPath(TestDataPath(), "speech_terse_am_model.tflite"); + file::JoinPath(TestDataPath(), "speech_asr_am_model.tflite"); auto model = FlatBufferModel::BuildFromFile(tflite_file_path.c_str()); CHECK(model) << "Failed to mmap model " << tflite_file_path; @@ -62,13 +62,13 @@ TEST(SpeechTerseAm, RandomIOTest) { // Load the input frames. Frames input_frames; const string input_file_path = - file::JoinPath(TestDataPath(), "speech_terse_am_model_in.csv"); + file::JoinPath(TestDataPath(), "speech_asr_am_model_in.csv"); ReadFrames(input_file_path, &input_frames); // Load the golden output results. Frames output_frames; const string output_file_path = - file::JoinPath(TestDataPath(), "speech_terse_am_model_out.csv"); + file::JoinPath(TestDataPath(), "speech_asr_am_model_out.csv"); ReadFrames(output_file_path, &output_frames); const int speech_batch_size = diff --git a/tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc b/tensorflow/contrib/lite/models/speech_asr_lm_model_test.cc similarity index 94% rename from tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc rename to tensorflow/contrib/lite/models/speech_asr_lm_model_test.cc index 04c54ffb22..53f2b66da4 100644 --- a/tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc +++ b/tensorflow/contrib/lite/models/speech_asr_lm_model_test.cc @@ -59,10 +59,10 @@ static void ClearLstmStates(Interpreter* interpreter) { interpreter->tensor(kLstmLayer3CellStateTensor)->bytes); } -TEST(SpeechTerseLm, EndToEndTest) { +TEST(SpeechAsrLm, EndToEndTest) { // Read the model. string tflite_file_path = - file::JoinPath(TestDataPath(), "speech_terse_lm_model.tflite"); + file::JoinPath(TestDataPath(), "speech_asr_lm_model.tflite"); auto model = FlatBufferModel::BuildFromFile(tflite_file_path.c_str()); CHECK(model) << "Failed to mmap model " << tflite_file_path; @@ -76,13 +76,13 @@ TEST(SpeechTerseLm, EndToEndTest) { // Load the input frames. Frames input_frames; const string input_file_path = - file::JoinPath(TestDataPath(), "speech_terse_lm_model_in.csv"); + file::JoinPath(TestDataPath(), "speech_asr_lm_model_in.csv"); ReadFrames(input_file_path, &input_frames); // Load the golden output results. Frames output_frames; const string output_file_path = - file::JoinPath(TestDataPath(), "speech_terse_lm_model_out.csv"); + file::JoinPath(TestDataPath(), "speech_asr_lm_model_out.csv"); ReadFrames(output_file_path, &output_frames); CHECK_EQ(interpreter->tensor(kModelInput1Tensor)->dims->size, 1); -- GitLab From f283173062f3ff9b6f69e8fc8a77421dcfdaa8f2 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Thu, 30 Nov 2017 09:56:07 -0800 Subject: [PATCH 0987/1801] [TF:XLA] Add support for the V2 variants of the FusedBatchNorm operators, which support mixed precision training. Until the necessary support for mixed precision fused batch norm is added to XLA, implement by casting to a common type. PiperOrigin-RevId: 177468202 --- .../compiler/tf2xla/kernels/batch_norm_op.cc | 74 ++++++++++++++----- 1 file changed, 55 insertions(+), 19 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc b/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc index 248e9d111e..468af34aab 100644 --- a/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc @@ -14,7 +14,7 @@ limitations under the License. ==============================================================================*/ // XLA implementation of BatchNorm operations. -#include "tensorflow/compiler/tf2xla/literal_util.h" +#include "tensorflow/compiler/tf2xla/type_util.h" #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" @@ -42,27 +42,44 @@ class FusedBatchNormOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { + xla::PrimitiveType input_type; + OP_REQUIRES_OK(ctx, + DataTypeToPrimitiveType(ctx->input_type(0), &input_type)); + xla::PrimitiveType stats_type; + OP_REQUIRES_OK(ctx, + DataTypeToPrimitiveType(ctx->input_type(1), &stats_type)); + + xla::ComputationBuilder* builder = ctx->builder(); + + xla::ComputationDataHandle input = ctx->Input(0); + + // TODO(b/69928690): support mixed precision in the XLA batch normalization + // operators. As a workaround, cast everything to the statistics type (which + // may be more precise than the input type). + input = builder->ConvertElementType(input, stats_type); + if (is_training_) { - xla::ComputationDataHandle output = ctx->builder()->BatchNormTraining( - ctx->Input(0), ctx->Input(1), ctx->Input(2), epsilon_, - feature_index_); + xla::ComputationDataHandle output = builder->BatchNormTraining( + input, ctx->Input(1), ctx->Input(2), epsilon_, feature_index_); // In training mode, outputs the normalized value as well as the // calculated mean and variance. - for (int i = 0; i < 3; i++) { - ctx->SetOutput(i, ctx->builder()->GetTupleElement(output, i)); - } + ctx->SetOutput(0, builder->ConvertElementType( + builder->GetTupleElement(output, 0), input_type)); + ctx->SetOutput(1, builder->GetTupleElement(output, 1)); + ctx->SetOutput(2, builder->GetTupleElement(output, 2)); + // Output 3 and 4 for "FusedBatchNorm" are currently marked as "reserved // space 1 & 2". They are used to pass the per-batch mean and // variance to the gradient. Here we maintain the same behavior by setting // them to the mean and variance calculated by BatchNormTraining. - ctx->SetOutput(3, ctx->builder()->GetTupleElement(output, 1)); - ctx->SetOutput(4, ctx->builder()->GetTupleElement(output, 2)); + ctx->SetOutput(3, builder->GetTupleElement(output, 1)); + ctx->SetOutput(4, builder->GetTupleElement(output, 2)); } else { - xla::ComputationDataHandle output = ctx->builder()->BatchNormInference( - ctx->Input(0), ctx->Input(1), ctx->Input(2), ctx->Input(3), - ctx->Input(4), epsilon_, feature_index_); - ctx->SetOutput(0, output); + xla::ComputationDataHandle output = builder->BatchNormInference( + input, ctx->Input(1), ctx->Input(2), ctx->Input(3), ctx->Input(4), + epsilon_, feature_index_); + ctx->SetOutput(0, builder->ConvertElementType(output, input_type)); // Directly send input to output as mean and variance in inference mode. ctx->SetOutput(1, ctx->Input(3)); ctx->SetOutput(2, ctx->Input(4)); @@ -78,6 +95,7 @@ class FusedBatchNormOp : public XlaOpKernel { }; REGISTER_XLA_OP(Name("FusedBatchNorm"), FusedBatchNormOp); +REGISTER_XLA_OP(Name("FusedBatchNormV2"), FusedBatchNormOp); class FusedBatchNormGradOp : public XlaOpKernel { public: @@ -101,19 +119,36 @@ class FusedBatchNormGradOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { + xla::ComputationBuilder* builder = ctx->builder(); + auto grad_output = ctx->Input(0); auto activation = ctx->Input(1); auto scale = ctx->Input(2); auto mean = ctx->Input(3); auto var = ctx->Input(4); - xla::ComputationDataHandle output = ctx->builder()->BatchNormGrad( + + xla::PrimitiveType input_type; + OP_REQUIRES_OK(ctx, + DataTypeToPrimitiveType(ctx->input_type(0), &input_type)); + xla::PrimitiveType stats_type; + OP_REQUIRES_OK(ctx, + DataTypeToPrimitiveType(ctx->input_type(3), &stats_type)); + + // TODO(b/69928690): support mixed precision in the XLA batch normalization + // operators. As a workaround, cast everything to the statistics type (which + // may be more precise than the input type). + grad_output = builder->ConvertElementType(grad_output, stats_type); + activation = builder->ConvertElementType(activation, stats_type); + + xla::ComputationDataHandle output = builder->BatchNormGrad( activation, scale, mean, var, grad_output, epsilon_, feature_index_); - for (int i = 0; i < 3; i++) { - ctx->SetOutput(i, ctx->builder()->GetTupleElement(output, i)); - } - ctx->SetOutput(3, ctx->builder()->GetTupleElement(output, 1)); - ctx->SetOutput(4, ctx->builder()->GetTupleElement(output, 2)); + ctx->SetOutput(0, builder->ConvertElementType( + builder->GetTupleElement(output, 0), input_type)); + ctx->SetOutput(1, builder->GetTupleElement(output, 1)); + ctx->SetOutput(2, builder->GetTupleElement(output, 2)); + ctx->SetOutput(3, builder->GetTupleElement(output, 1)); + ctx->SetOutput(4, builder->GetTupleElement(output, 2)); } private: @@ -122,6 +157,7 @@ class FusedBatchNormGradOp : public XlaOpKernel { }; REGISTER_XLA_OP(Name("FusedBatchNormGrad"), FusedBatchNormGradOp); +REGISTER_XLA_OP(Name("FusedBatchNormGradV2"), FusedBatchNormGradOp); } // namespace } // namespace tensorflow -- GitLab From 12976748822cdb3885f37dbda42ce8674afa6f91 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 30 Nov 2017 10:09:52 -0800 Subject: [PATCH 0988/1801] Uses C API for eager functions. Rolls back the rollback with some swiggery to get python3 to work. PiperOrigin-RevId: 177470328 --- tensorflow/python/eager/backprop.py | 2 +- tensorflow/python/eager/context.py | 15 ++ tensorflow/python/eager/function.py | 144 ++++++++++++------ tensorflow/python/eager/graph_callable.py | 18 ++- .../python/eager/graph_callable_test.py | 1 - tensorflow/python/framework/ops.py | 30 ++-- tensorflow/python/pywrap_tfe.i | 3 +- 7 files changed, 143 insertions(+), 70 deletions(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 0144f3b1e5..dc1142705a 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -540,7 +540,7 @@ def _ensure_unique_tensor_objects(parameter_positions, args): if i in parameter_positions: tid = ops.tensor_id(t) if tid in s: - args[i] = args[i]._dup() # pylint: disable=protected-access + args[i] = gen_array_ops.identity(args[i]) else: s.add(tid) return args diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index 92f4e15c05..415416cfae 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -288,6 +288,21 @@ class Context(object): self._initialize_handle_and_devices() return self._num_gpus + def add_function(self, fn): + """Add a function definition to the context. + + Once added, the function (identified by its name) can be executed like any + other operation. + + Args: + fn: A wrapped TF_Function (returned from TF_GraphToFunction_wrapper). + """ + with errors.raise_exception_on_not_ok_status() as status: + pywrap_tensorflow.TFE_ContextAddFunction( + self._handle, # pylint: disable=protected-access + fn, + status) + def add_function_def(self, fdef): """Add a function definition to the context. diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 2f4b59e938..cadabb3a24 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -25,15 +25,19 @@ import threading import numpy as np +from tensorflow.core.framework import function_pb2 +from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import context from tensorflow.python.eager import execute from tensorflow.python.eager import tape from tensorflow.python.eager.graph_only_ops import graph_placeholder +from tensorflow.python.framework import c_api_util from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes as dtypes_module -from tensorflow.python.framework import graph_to_function_def +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.ops import gradients_impl +from tensorflow.python.util import compat from tensorflow.python.util import nest from tensorflow.python.util import tf_decorator @@ -47,10 +51,41 @@ _scoped_captures = threading.local() _scoped_captures.tensors = None -def make_function_def(graph, operations, inputs, outputs): - """Makes function def from the given graph with the operations.""" - return graph_to_function_def.graph_to_function_def( - graph, operations, inputs, outputs) +def make_function_def(name, graph, operations, inputs, outputs): + """Makes FunctionDef proto and defined function. + + Args: + name: the function name + graph: the graph from which to build the function + operations: the operations in the function body + inputs: tensors to be used as function arguments + outputs: tensors to be returned from the function + + Returns: + fdef: a FunctionDef protocol buffer for the function + fn: a wrapped TF_Function for the function + """ + with errors.raise_exception_on_not_ok_status() as status: + fn = pywrap_tensorflow.TF_GraphToFunction_wrapper( + graph._c_graph, # pylint: disable=protected-access + compat.as_str(name), + False, + [o._c_op for o in operations], # pylint: disable=protected-access + [t._as_tf_output() for t in inputs], # pylint: disable=protected-access + [t._as_tf_output() for t in outputs], # pylint: disable=protected-access + [], + None, + compat.as_str(""), + status) + # TODO(apassos) avoid creating a FunctionDef (specially to grab the signature, + # but also in general it's nice not to depend on it. + with c_api_util.tf_buffer() as buffer_: + with errors.raise_exception_on_not_ok_status() as status: + pywrap_tensorflow.TF_FunctionToFunctionDef(fn, buffer_, status) + proto_data = pywrap_tensorflow.TF_GetBuffer(buffer_) + fdef = function_pb2.FunctionDef() + fdef.ParseFromString(compat.as_bytes(proto_data)) + return fdef, fn @contextlib.contextmanager @@ -115,6 +150,10 @@ class CapturingGraph(ops.Graph): # for resource tensors. self._last_op_using_resource_tensor = {} + # TODO(apassos) remove once the C API is used by default. + def _use_c_api_hack(self): + return True + def clear_resource_control_flow_state(self): self._last_op_using_resource_tensor = {} @@ -207,14 +246,20 @@ def _inference_name(n): return "__inference_%s_%s" % (n, ops.uid()) +# TODO(apassos) get rid of this by splitting framework.function._DefinedFunction +# so it doesn't have the definition-generating logic and is just a container for +# an already-defined function. class _DefinedFunction(object): """Mocks the interface of tf _DefinedFunction.""" - def __init__(self, fdef): + def __init__(self, fdef, fn): self.definition = fdef self.name = fdef.signature.name + self.signature = fdef.signature self.grad_func_name = None self.python_grad_func = None + self._c_func = fn + self._grad_func = None def _map_sequence_obj_to_idx(sequence): @@ -250,6 +295,7 @@ class GraphModeFunction(object): input_placeholders, extra_inputs, fdef, + fn, graph, operations, func_outputs, @@ -263,7 +309,7 @@ class GraphModeFunction(object): self._graph = graph self._has_backprop = False self._func_name = fdef.signature.name - self._fdef = _DefinedFunction(fdef) + self._fdef = _DefinedFunction(fdef, fn) self._num_outputs = len(fdef.signature.output_arg) self._ops = operations self._func_outputs = func_outputs @@ -283,38 +329,45 @@ class GraphModeFunction(object): with self._graph.as_default(), context.graph_mode(): c = _CapturingContext() with c: - filtered_outputs = [ - x for x in self._returns if x is not None - ] + filtered_outputs = [x for x in self._returns if x is not None] self._out_grad_placeholders = [ - graph_placeholder(x.dtype, x.shape) for x in filtered_outputs - ] + graph_placeholder(x.dtype, x.shape) for x in filtered_outputs] in_gradients = gradients_impl.gradients( filtered_outputs, self._input_placeholders, grad_ys=self._out_grad_placeholders) - shapes = [x.shape for x in in_gradients if x is not None] + shapes = tuple(x.shape for x in in_gradients if x is not None) captures = list(sorted(c.captured_tensors, key=lambda x: x.name)) - forward_function_def = make_function_def( - self._graph, self._ops, self._input_placeholders, + forward_name = _forward_name(self._func_name) + forward_function_def, forward_fn = make_function_def( + forward_name, self._graph, self._ops, self._input_placeholders, filtered_outputs + captures) - self._forward_fdef = _DefinedFunction(forward_function_def) - _register_with_name(_forward_name(self._func_name), forward_function_def) - backward_outputs = [x for x in in_gradients if x is not None] + self._forward_fdef = _DefinedFunction(forward_function_def, forward_fn) + _register(forward_fn) + backward_outputs = tuple(x for x in in_gradients if x is not None) all_inputs = self._out_grad_placeholders + captures - backward_function_def = make_function_def( - self._graph, [x.op for x in self._out_grad_placeholders - ] + list(sorted(c.known_ops, key=lambda x: x.name)), + # Excluding input ops from the body as we do not intend to execute these + # operations when the function is executed. + all_ignored_ops = frozenset(x.op for x in all_inputs) + # Enforce a deterministic order of operations in the generated graph. This + # means rerunning the function-defining code will always define the same + # function, which is useful if we serialize this etc. + fdef_ops = tuple(x for x in sorted(c.known_ops, key=lambda x: x.name) + if x not in all_ignored_ops) + bname = _backward_name(self._func_name) + backward_function_def, backward_fn = make_function_def( + bname, self._graph, fdef_ops, all_inputs, backward_outputs) - _register_with_name(_backward_name(self._func_name), backward_function_def) + _register(backward_fn) self._backward_function = GraphModeFunction( - all_inputs, [], backward_function_def, self._graph, c.known_ops, - in_gradients, _map_sequence_obj_to_idx(backward_outputs), shapes) + all_inputs, [], backward_function_def, backward_fn, self._graph, + c.known_ops, in_gradients, _map_sequence_obj_to_idx(backward_outputs), + shapes) def _backprop_call(self, args): """Calls the wrapped function and records the result on a tape.""" all_args = args + self._extra_inputs - signature = self._forward_fdef.definition.signature + signature = self._forward_fdef.signature ctx = context.context() if ctx.in_graph_mode(): g = ops.get_default_graph() @@ -325,7 +378,7 @@ class GraphModeFunction(object): return ops.internal_convert_to_tensor(x, ctx=ctx) op = g.create_op( signature.name, [make_tensor(x) for x in all_args], - [dtypes_module.DType(x.type) for x in signature.output_arg], + tuple(dtypes_module.DType(x.type) for x in signature.output_arg), op_def=signature, name="FunctionCall", compute_shapes=False) @@ -361,11 +414,8 @@ class GraphModeFunction(object): if v._trainable: # pylint: disable=protected-access tape.watch_variable(v) - tensor_inputs = [ - x for x in nest.flatten(args) - if isinstance(x, ops.Tensor) - ] - + tensor_inputs = [x for x in nest.flatten(args) + if isinstance(x, ops.Tensor)] if tape.should_record(tensor_inputs) or tape.should_record( self._extra_inputs): if not self._has_backprop: @@ -384,7 +434,7 @@ class GraphModeFunction(object): args = list(tensor_inputs) + self._extra_inputs op = g.create_op( signature.name, [ops.convert_to_tensor(x) for x in args], - [dtypes_module.DType(x.type) for x in signature.output_arg], + tuple(dtypes_module.DType(x.type) for x in signature.output_arg), op_def=signature, name="FunctionCall", compute_shapes=False) @@ -469,29 +519,32 @@ def _defun_internal(name, func, args, kwds): extra_inputs = [] extra_placeholders = [] outputs_list = nest.flatten(func_outputs) - output_shapes = [x.shape for x in outputs_list if x is not None] + output_shapes = tuple(x.shape for x in outputs_list if x is not None) - flat_inputs = [ - x for x in nest.flatten(func_inputs) if isinstance(x, ops.Tensor) - ] + flat_inputs = [x for x in nest.flatten(func_inputs) + if isinstance(x, ops.Tensor)] all_inputs = flat_inputs + list(extra_placeholders) - + all_ignored_ops = frozenset(x.op for x in all_inputs) func_def_outputs = [x for x in outputs_list if x is not None] - inference_function_def = make_function_def( - tmp_graph, tmp_graph.get_operations(), all_inputs, func_def_outputs) + fname = _inference_name(name) + operations = tuple(x for x in tmp_graph.get_operations() + if x not in all_ignored_ops) + inference_function_def, fn = make_function_def( + fname, tmp_graph, operations, all_inputs, func_def_outputs) # Register any other functions defined in the graph # TODO(ashankar): Oh lord, forgive me for this lint travesty. for f in tmp_graph._functions.values(): # pylint: disable=protected-access # TODO(ashankar): What about the gradient registry? - _register_with_name(f.name, f.definition) - _register_with_name(_inference_name(name), inference_function_def) + _register(f._c_func) # pylint: disable=protected-access + _register(fn) return GraphModeFunction( all_inputs, extra_inputs, inference_function_def, + fn, tmp_graph, - tmp_graph.get_operations(), + operations, func_outputs, _map_sequence_obj_to_idx(func_def_outputs), output_shapes, @@ -517,10 +570,9 @@ def _cache_key(x): return x -def _register_with_name(name, fdef): - """Registers the function `fdef` with the name `name`.""" - fdef.signature.name = name - context.context().add_function_def(fdef) +def _register(fn): + """Registers the function `fn`.""" + context.context().add_function(fn) # TODO(apassos): better error messages for non-hashable arguments. diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py index faf0ac88bc..3da100d800 100644 --- a/tensorflow/python/eager/graph_callable.py +++ b/tensorflow/python/eager/graph_callable.py @@ -318,7 +318,9 @@ def _graph_callable_internal(func, shape_and_dtypes): placeholder_inputs = flat_inputs+ list(extra_placeholders) func_def_outputs = [x for x in outputs_list if isinstance(x, tf_ops.Tensor)] - initializer_function_def = function.make_function_def( + initialization_name = function._inference_name(func.__name__) # pylint: disable=protected-access + initializer_function_def, initializer_fn = function.make_function_def( + initialization_name, tmp_graph, initializing_operations, placeholder_inputs, @@ -327,13 +329,13 @@ def _graph_callable_internal(func, shape_and_dtypes): # Also, what about the gradient registry of these functions? Those need to be # addressed as well. for f in tmp_graph._functions.values(): # pylint: disable=protected-access - function._register_with_name(f.name, f.definition) # pylint: disable=protected-access - function._register_with_name(function._inference_name(func.__name__), # pylint: disable=protected-access - initializer_function_def) + function._register(f._c_func) # pylint: disable=protected-access + function._register(initializer_fn) # pylint: disable=protected-access initializer_function = function.GraphModeFunction( placeholder_inputs, extra_inputs, initializer_function_def, + initializer_fn, tmp_graph, initializing_operations, func_outputs, @@ -342,18 +344,20 @@ def _graph_callable_internal(func, shape_and_dtypes): capture_func_def_outputs = [ x for x in captured_outlist if isinstance(x, tf_ops.Tensor)] - captured_function_def = function.make_function_def( + captured_function_name = function._inference_name(func.__name__) # pylint: disable=protected-access + captured_function_def, capturing_fn = function.make_function_def( + captured_function_name, tmp_graph, capturing_operations, placeholder_inputs, capture_func_def_outputs) - function._register_with_name(function._inference_name(func.__name__), # pylint: disable=protected-access - captured_function_def) + function._register(capturing_fn) # pylint: disable=protected-access captured_function = function.GraphModeFunction( placeholder_inputs, extra_inputs, captured_function_def, + capturing_fn, tmp_graph, capturing_operations, captured_outputs, diff --git a/tensorflow/python/eager/graph_callable_test.py b/tensorflow/python/eager/graph_callable_test.py index 548e16a909..b9e6ca2a93 100644 --- a/tensorflow/python/eager/graph_callable_test.py +++ b/tensorflow/python/eager/graph_callable_test.py @@ -152,7 +152,6 @@ class GraphCallableTest(test.TestCase): self.assertAllEqual(5, f(constant_op.constant(2))) def testNestedFunction(self): - # TensorFlow function (which is what would be used in TensorFlow graph # construction). @function.Defun(dtypes.int32, dtypes.int32) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index a616b15cf7..5f945ac133 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -599,11 +599,6 @@ class Tensor(_TensorLike): """ return _eval_using_default_session(self, feed_dict, self.graph, session) - def _dup(self): - ret = copy.copy(self) - ret._id = uid() # pylint: disable=protected-access - return ret - # TODO(agarwal): consider getting rid of this. class _EagerTensorBase(Tensor): @@ -729,9 +724,6 @@ class _EagerTensorBase(Tensor): return new_tensor # pylint: enable=protected-access - def _dup(self): - return self._copy(device_name=self.device) - @property def shape(self): return tensor_shape.TensorShape(self._shape_tuple()) @@ -1794,7 +1786,7 @@ class Operation(object): c_api.SetRequestedDevice( self._graph._c_graph, # pylint: disable=protected-access self._c_op, # pylint: disable=protected-access - _device_string(device)) + compat.as_str(_device_string(device))) else: self._node_def.device = _device_string(device) @@ -2083,7 +2075,7 @@ class Operation(object): def _set_attr(self, attr_name, attr_value): """Private method used to set an attribute in the node_def.""" - if _USE_C_API: + if self._c_op: buf = c_api.TF_NewBufferFromString( compat.as_bytes(attr_value.SerializeToString())) try: @@ -2652,11 +2644,16 @@ class Graph(object): # TODO(skyewm): fold as much of the above as possible into the C # implementation - if _USE_C_API: + if _USE_C_API or self._use_c_api_hack(): self._scoped_c_graph = c_api_util.ScopedTFGraph() else: self._scoped_c_graph = None + # TODO(apassos) remove once the C API is used by default. + def _use_c_api_hack(self): + """Temporary hack; can be overridden to force C API usage.""" + return False + def _convert_stack(self, stack, include_func_start_lineno=False): """Converts a stack extracted using _extract_stack() to a traceback stack. @@ -2985,9 +2982,14 @@ class Graph(object): # Add function to graph # pylint: disable=protected-access if self._c_graph: - assert function._c_func, ( - "Cannot add function created without C API support to graph " - "created with C API support") + # Handle functions created without using the C API. TODO(apassos,skyewm) + # remove this when all functions are generated using the C API by default + # as this will be unnecessary. + if not function._c_func: + with errors.raise_exception_on_not_ok_status() as status: + serialized = function.definition.SerializeToString() + function._c_func = c_api.TF_FunctionImportFunctionDef( + serialized, status) with errors.raise_exception_on_not_ok_status() as status: gradient = function._grad_func._c_func if function._grad_func else None c_api.TF_GraphCopyFunction(self._c_graph, function._c_func, gradient, diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i index 82b154164e..82750e9e49 100644 --- a/tensorflow/python/pywrap_tfe.i +++ b/tensorflow/python/pywrap_tfe.i @@ -18,6 +18,7 @@ limitations under the License. %rename("%s") TFE_NewContext; %rename("%s") TFE_DeleteContext; %rename("%s") TFE_ContextListDevices; +%rename("%s") TFE_ContextAddFunction; %rename("%s") TFE_ContextAddFunctionDef; %rename("%s") TFE_OpNameGetAttrType; %rename("%s") TFE_Py_InitEagerTensor; @@ -149,7 +150,7 @@ limitations under the License. } $1 = &temp; $1->resize(PyInt_AsLong($input), nullptr); -} +} // Create new Status object. %typemap(in, numinputs=0) TF_Status *out_status { -- GitLab From b8c9b75bbb75def92b2cae2406ba88d20630b66a Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 30 Nov 2017 10:33:27 -0800 Subject: [PATCH 0989/1801] Change "Datasets" to "`tf.data`" in the "Reading Data" API guide. PiperOrigin-RevId: 177473833 --- .../api_guides/python/reading_data.md | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/tensorflow/docs_src/api_guides/python/reading_data.md b/tensorflow/docs_src/api_guides/python/reading_data.md index b3ebaa0f0a..4594887349 100644 --- a/tensorflow/docs_src/api_guides/python/reading_data.md +++ b/tensorflow/docs_src/api_guides/python/reading_data.md @@ -1,11 +1,11 @@ # Reading data Note: The preferred way to feed data into a tensorflow program is using the -@{$datasets$Datasets API}. +@{$datasets$`tf.data` API}. There are four methods of getting data into a TensorFlow program: -* `Dataset` API: Easily construct a complex input pipeline. (preferred method) +* `tf.data` API: Easily construct a complex input pipeline. (preferred method) * Feeding: Python code provides the data when running each step. * `QueueRunner`: a queue-based input pipeline reads the data from files at the beginning of a TensorFlow graph. @@ -14,26 +14,27 @@ There are four methods of getting data into a TensorFlow program: [TOC] -## Dataset API +## `tf.data` API See the @{$datasets$programmer's guide} for an in-depth explanation of -@{tf.data.Dataset}. The `Dataset` API allows you to extract and preprocess data -from different input/file formats, and apply transformations such as batch, -shuffle, and map to the dataset. This is an improved version of the old input -methods, feeding and `QueueRunner`. +@{tf.data.Dataset}. The `tf.data` API enables you to extract and preprocess data +from different input/file formats, and apply transformations such as batching, +shuffling, and mapping functions over the dataset. This is an improved version +of the old input methods---feeding and `QueueRunner`---which are described +below for historical purposes. ## Feeding +Warning: "Feeding" is the least efficient way to feed data into a TensorFlow +program and should only be used for small experiments and debugging. + TensorFlow's feed mechanism lets you inject data into any Tensor in a -computation graph. A python computation can thus feed data directly into the +computation graph. A Python computation can thus feed data directly into the graph. Supply feed data through the `feed_dict` argument to a run() or eval() call that initiates computation. -Warning: "Feeding" is the least efficient way to feed data into a tensorflow -program and should only be used for small experiments and debugging. - ```python with tf.Session(): input = tf.placeholder(tf.float32) @@ -55,6 +56,10 @@ and is described in the @{$mechanics$MNIST tutorial}. ## `QueueRunner` +Warning: This section discusses implementing input pipelines using the +queue-based APIs which can be cleanly replaced by the @{$datasets$`tf.data` +API}. + A typical queue-based pipeline for reading records from files has the following stages: 1. The list of filenames @@ -66,9 +71,6 @@ A typical queue-based pipeline for reading records from files has the following 7. *Optional* preprocessing 8. Example queue -Warning: This section discusses implementing input pipelines using the -queue-based APIs which can be cleanly replaced by the @{$datasets$Datasets API}. - ### Filenames, shuffling, and epoch limits For the list of filenames, use either a constant string Tensor (like @@ -499,7 +501,7 @@ You can have the train and eval in the same graph in the same process, and share their trained variables or layers. See @{$variables$the shared variables tutorial}. To support the single-graph approach -@{$programmers_guide/datasets$Datasets} also supplies +@{$programmers_guide/datasets$`tf.data`} also supplies @{$programmers_guide/datasets#creating_an_iterator$advanced iterator types} that that allow the user to change the input pipeline without rebuilding the graph or session. -- GitLab From 3b9a26d04544ba6e13181a7df07bb693769b7d7c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 30 Nov 2017 10:40:46 -0800 Subject: [PATCH 0990/1801] Turned a verbose log into a vlog PiperOrigin-RevId: 177474943 --- tensorflow/core/grappler/grappler_item_builder.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc index a186e9a181..da99777bbc 100644 --- a/tensorflow/core/grappler/grappler_item_builder.cc +++ b/tensorflow/core/grappler/grappler_item_builder.cc @@ -188,7 +188,7 @@ std::unique_ptr GrapplerItemFromMetaGraphDef( << ", skipping this input"; return nullptr; } - LOG(INFO) << "Will use fetch node " << name; + VLOG(1) << "Will use fetch node " << name; new_item->fetch.push_back(name); } } -- GitLab From 0369392dfcb569a2b8c55fb7a5d3dc08b6cb6ef8 Mon Sep 17 00:00:00 2001 From: Russell Power Date: Thu, 30 Nov 2017 11:00:11 -0800 Subject: [PATCH 0991/1801] Internal testing change. PiperOrigin-RevId: 177478003 --- tensorflow/contrib/tpu/BUILD | 15 - .../contrib/tpu/python/tpu/test_util.py | 296 ------------------ 2 files changed, 311 deletions(-) delete mode 100644 tensorflow/contrib/tpu/python/tpu/test_util.py diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index f542d94139..a34c7f91f2 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -31,21 +31,6 @@ cc_library( ], ) -py_library( - name = "tpu_test_util", - srcs = ["python/tpu/test_util.py"], - srcs_version = "PY2AND3", - deps = [ - ":tpu_lib", - ":tpu_py", - "//tensorflow/python:errors", - "//tensorflow/python:framework_ops", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:session", - "//tensorflow/python:variables", - ], -) - py_library( name = "tpu_estimator", srcs = [ diff --git a/tensorflow/contrib/tpu/python/tpu/test_util.py b/tensorflow/contrib/tpu/python/tpu/test_util.py deleted file mode 100644 index a5d4ff9722..0000000000 --- a/tensorflow/contrib/tpu/python/tpu/test_util.py +++ /dev/null @@ -1,296 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# =================================================================== -"""Utilities to ease testing on TPU devices.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os.path -import pickle -import tempfile - -import numpy as np - -from tensorflow.contrib.tpu.python.tpu import tpu -from tensorflow.contrib.tpu.python.tpu import tpu_config -from tensorflow.contrib.tpu.python.tpu import tpu_estimator -from tensorflow.core.protobuf import config_pb2 -from tensorflow.python.client import session as tf_session -from tensorflow.python.estimator import model_fn as model_fn_lib -from tensorflow.python.framework import errors -from tensorflow.python.framework import ops -from tensorflow.python.framework import random_seed -from tensorflow.python.framework import test_util -from tensorflow.python.ops import gen_array_ops -from tensorflow.python.ops import variables -from tensorflow.python.platform import gfile -from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.training import saver as tf_saver - - -def has_tpu(): - """Check if a TPU device is available. - - Device enumeration via `device_lib` currently fails for TPU systems. - (http://b/68333779). To work around this, we determine the existence of a - TPU by a successful call to `initialize_system`. - - Returns: - boolean, True if a TPU device is available, otherwise False. - """ - - def _check(): - with tf_session.Session() as sess: - sess.run(tpu.initialize_system()) - sess.run(tpu.shutdown_system()) - - try: - _check() - return True - except errors.OpError as _: - return False - - -def _available_devices(): - devices = ["cpu"] - if not test_util.gpu_device_name(): - devices.append("gpu") - - if has_tpu(): - devices.append("tpu") - - return tuple(devices) - - -def copy_dir(src, tgt): - """Copy src to tgt.""" - gfile.MakeDirs(tgt) - seen_dirs = set() - for dirname, _, files in gfile.Walk(src): - for f in files: - src_f = os.path.join(dirname, f) - tgt_f = src_f.replace(src, tgt) - tgt_d = os.path.dirname(tgt_f) - if tgt_d not in seen_dirs: - gfile.MkDir(tgt_d) - seen_dirs.add(tgt_d) - gfile.Copy(src_f, tgt_f, overwrite=True) - - -def compare_model(model_fn, - input_fn, - params, - master="local", - temp_dir=None, - num_shards=2, - tolerance=1e-4): - """Compare the results of running `model_fn` on the TPU and CPU.""" - if not temp_dir: - temp_dir = tempfile.mkdtemp() - - cpu_model_dir = "%s/cpu-model" % temp_dir - tpu_model_dir = "%s/tpu-model" % temp_dir - initial_model_dir = "%s/initial-model" % temp_dir - - logging.info("Checkpoints and weights will be written to %s", temp_dir) - - num_steps = 1 - - def _model_adapter(features, labels, mode, params): - """Run users model function with random seeds fixed to known values.""" - random_seed.set_random_seed(0) - np.random.seed(0) - return model_fn(features, labels, mode, params) - - def _input_adapter(params): - random_seed.set_random_seed(0) - np.random.seed(0) - return input_fn(params) - - def _make_run_config(model_dir): - return tpu_config.RunConfig( - master=master, - model_dir=model_dir, - save_checkpoints_secs=10000, - session_config=config_pb2.ConfigProto( - allow_soft_placement=True, log_device_placement=False), - tpu_config=tpu_config.TPUConfig( - iterations_per_loop=num_steps, - num_shards=num_shards, - ), - ) - - def _make_estimator(use_tpu, model_dir): - return tpu_estimator.TPUEstimator( - model_fn=_model_adapter, - use_tpu=use_tpu, - config=_make_run_config(model_dir), - train_batch_size=num_shards, - params=dict(params, use_tpu=use_tpu), - ) - - def _extract_weights(checkpoint): - """Extract model weights from the given checkpoint file.""" - weights = {} - graph = ops.Graph() - with graph.as_default(): - features, labels = _input_adapter(dict(params, batch_size=num_shards)) - model_fn( - features, labels, - params=dict(params, use_tpu=False), - mode=model_fn_lib.ModeKeys.TRAIN) - saver = tf_saver.Saver() - with tf_session.Session(graph=graph) as sess: - saver.restore(sess, checkpoint) - all_vars = [] - all_vars.extend(graph.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)) - all_vars.extend(graph.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)) - all_vars.extend(graph.get_collection(ops.GraphKeys.MODEL_VARIABLES)) - - for var in all_vars: - weights[var.name] = sess.run(var) - return weights - - def _run_step(use_tpu, model_dir): - """Create an estimator and run a single step on the given device.""" - tf_session.Session.reset(target=master) - - logging.info("Running step. TPU=%d. model_dir=%s", use_tpu, model_dir) - est = _make_estimator(use_tpu=use_tpu, model_dir=model_dir) - est.train(input_fn=_input_adapter, steps=num_steps) - weights = _extract_weights(est.latest_checkpoint()) - with gfile.Open(os.path.join(temp_dir, "tpu-%d.weights" % use_tpu), - "wb") as f: - f.write(pickle.dumps(weights)) - return weights - - # initialize models to the same weights by running a single step on the CPU - _run_step(use_tpu=False, model_dir=initial_model_dir) - - copy_dir(initial_model_dir, cpu_model_dir) - copy_dir(initial_model_dir, tpu_model_dir) - - cpu_weights = _run_step(use_tpu=False, model_dir=cpu_model_dir) - tpu_weights = _run_step(use_tpu=True, model_dir=tpu_model_dir) - - bad_weights = False - for k in cpu_weights: - if k not in tpu_weights: - raise KeyError("Missing weight %s from TPU checkpoint.", k) - - if not np.allclose( - cpu_weights[k], tpu_weights[k], rtol=tolerance, atol=tolerance): - bad_weights = True - logging.error("Weights for layer %s have diverged.", k) - - if bad_weights: - raise ValueError("Some weights have diverged. Output pickle files have " - "been written to %s for inspection." % temp_dir) - - -class TPUTestCase(test_util.TensorFlowTestCase): - """Adds helpers for testing on TPU devices to `TensorFlowTestCase`. - - Example usage: - - ``` - def model_fn(features): - return tf.reduce_sum(features * 2) - - class ModelTests(test_util.TPUTestCase): - def test_sum(self): - v = np.random.randn(10, 10).astype("float32") - self.assert_device_output(model_fn, [v], (v*2).sum(), - devices=("cpu", "tpu")) - ``` - """ - - def __init__(self, methodName="runTest"): # pylint: disable=invalid-name - super(TPUTestCase, self).__init__(methodName) - self._available_devices = _available_devices() - - def run_on_device(self, model_fn, model_inputs, device): - """Runs `model_fn` on the given device. - - Raises an exception if no such device is available. `model_fn` should - return one or more tensors as a list or tuple. - - Args: - model_fn: Function returning one or more tensors. - model_inputs: An iterable of Numpy arrays or scalars. - These will be passed as arguments to `model_fn`. - device: Device to run on. One of ("tpu", "gpu", "cpu"). - - Returns: - Output from the model function. - """ - - def _make_placeholders(): - return dict([(gen_array_ops.placeholder_with_default(v, v.shape), v) - for v in model_inputs]) - - if device == "tpu": - with self.test_session(graph=ops.Graph()) as sess: - placeholders = _make_placeholders() - tpu_computation = tpu.rewrite(model_fn, placeholders.keys()) - sess.run(tpu.initialize_system()) - sess.run(variables.global_variables_initializer()) - result = sess.run(tpu_computation, placeholders) - sess.run(tpu.shutdown_system()) - # TODO(b/36891278): supports non-flat returns lists in tpu.rewrite(). - if len(result) == 1: - return result[0] - return result - elif device == "gpu": - with self.test_session(graph=ops.Graph(), use_gpu=True) as sess: - placeholders = _make_placeholders() - sess.run(variables.global_variables_initializer()) - return sess.run(model_fn(placeholders.keys()), placeholders) - elif device == "cpu": - # TODO(power) -- will this interact poorly with cached GPU sessions? - with self.test_session(graph=ops.Graph(), use_gpu=False) as sess: - placeholders = _make_placeholders() - sess.run(variables.global_variables_initializer()) - return sess.run(model_fn(placeholders.keys()), placeholders) - - def _compare_values(self, actual_outputs, expected_outputs): - if isinstance(expected_outputs, (list, tuple)): - for a, b in zip(actual_outputs, expected_outputs): - self.assertAllCloseAccordingToType(a, b) - else: - self.assertAllCloseAccordingToType(actual_outputs, expected_outputs) - - def assert_device_output(self, - model_fn, - model_inputs, - expected_outputs, - devices=("cpu", "gpu", "tpu")): - """Run `model_fn` on the given devices. - - Results are compared via `assertAllCloseAccordingToType`. - - Args: - model_fn: Function returning one or more tensors - model_inputs: Numpy arrays or scalars passed as arguments to model_fn - expected_outputs: Numpy arrays or scalars to compare against. - devices: Set of devices to run on. If a device is not available, tests - will be skipped for that device. - """ - devices = set(devices).intersection(self._available_devices) - - for device in devices: - device_out = self.run_on_device(model_fn, model_inputs, device=device) - self._compare_values(device_out, expected_outputs) -- GitLab From fa6a704e4ab891e69642332f991e510a6873aa83 Mon Sep 17 00:00:00 2001 From: Zhengsheng Wei Date: Fri, 1 Dec 2017 03:05:32 +0800 Subject: [PATCH 0992/1801] revise docstring (#14792) --- .../contrib/model_pruning/python/layers/core_layers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/model_pruning/python/layers/core_layers.py b/tensorflow/contrib/model_pruning/python/layers/core_layers.py index ae60d8b1e1..95dfd8f421 100644 --- a/tensorflow/contrib/model_pruning/python/layers/core_layers.py +++ b/tensorflow/contrib/model_pruning/python/layers/core_layers.py @@ -72,8 +72,8 @@ class _MaskedConv(base.Layer): linear activation. use_bias: Boolean, whether the layer uses a bias. kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, no bias will - be applied. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. kernel_regularizer: Optional regularizer for the convolution kernel. bias_regularizer: Optional regularizer for the bias vector. activity_regularizer: Regularizer function for the output. @@ -279,8 +279,8 @@ class MaskedConv2D(_MaskedConv): linear activation. use_bias: Boolean, whether the layer uses a bias. kernel_initializer: An initializer for the convolution kernel. - bias_initializer: An initializer for the bias vector. If None, no bias will - be applied. + bias_initializer: An initializer for the bias vector. If None, the default + initializer will be used. kernel_regularizer: Optional regularizer for the convolution kernel. bias_regularizer: Optional regularizer for the bias vector. activity_regularizer: Regularizer function for the output. -- GitLab From c9f9f054f7a4414577684cde3087b690877403d9 Mon Sep 17 00:00:00 2001 From: dmaclach Date: Thu, 30 Nov 2017 11:05:58 -0800 Subject: [PATCH 0993/1801] Update CONTRIBUTING.md (#14695) Add Objective-C Style guide to list. --- CONTRIBUTING.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 43abdaafbf..1b537ca73c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -114,6 +114,7 @@ pylint --rcfile=/tmp/pylintrc myfile.py * [Google Java Style Guide](https://google.github.io/styleguide/javaguide.html) * [Google JavaScript Style Guide](https://google.github.io/styleguide/jsguide.html) * [Google Shell Style Guide](https://google.github.io/styleguide/shell.xml) +* [Google Objective-C Style Guide](http://google.github.io/styleguide/objcguide.html) #### Running sanity check -- GitLab From 4e8301be75a234d53b08bec577ac0069fc40bea3 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Thu, 30 Nov 2017 11:00:54 -0800 Subject: [PATCH 0994/1801] Disable dnn_linear_combined_test PiperOrigin-RevId: 177478106 --- tensorflow/contrib/estimator/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index e4d51aa148..706a174efb 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -93,6 +93,7 @@ py_test( srcs_version = "PY2AND3", tags = [ "no_pip", + "notap", # b/62204861 "notsan", ], deps = [ -- GitLab From 4cc4d5329122f0f97c3804e6f8d27ed4b5874028 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 30 Nov 2017 11:14:36 -0800 Subject: [PATCH 0995/1801] Fix decode_bmp crash by adding length check before reading the data in buffer (#14967) * Fix decode_bmp crash by adding length check before reading the data in buffer This fix tries to address the issue raised in 14959 where the bmp content length was not checked before reading the buffer. As a result, decode_bmp might trigger a crash if the content of bmp is incomplete. This fix fixes the issue by adding the needed check before reading the data. This fix fixes 14959. Signed-off-by: Yong Tang * Sanitize decode_bmp_op.cc with clang-format -i --style=Google Signed-off-by: Yong Tang * Add additional check to make sure header is safe to access in bmp Signed-off-by: Yong Tang * Not require padding (as paddings are not accessed) Signed-off-by: Yong Tang * Remove duplicated row_size calculation. Signed-off-by: Yong Tang * Add test cases for decoding incomplete bmp Signed-off-by: Yong Tang --- tensorflow/core/kernels/decode_bmp_op.cc | 51 +++++++++++++------ .../python/kernel_tests/decode_bmp_op_test.py | 50 ++++++++++++++++++ 2 files changed, 86 insertions(+), 15 deletions(-) diff --git a/tensorflow/core/kernels/decode_bmp_op.cc b/tensorflow/core/kernels/decode_bmp_op.cc index cd7956e1cb..c778278e8f 100644 --- a/tensorflow/core/kernels/decode_bmp_op.cc +++ b/tensorflow/core/kernels/decode_bmp_op.cc @@ -33,10 +33,11 @@ class DecodeBmpOp : public OpKernel { public: explicit DecodeBmpOp(OpKernelConstruction* context) : OpKernel(context) { OP_REQUIRES_OK(context, context->GetAttr("channels", &channels_)); - OP_REQUIRES(context, channels_ == 0 || channels_ == 1 || channels_ == 3 || - channels_ == 4, - errors::InvalidArgument("channels must be 0, 1, 3 or 4, got ", - channels_)); + OP_REQUIRES( + context, + channels_ == 0 || channels_ == 1 || channels_ == 3 || channels_ == 4, + errors::InvalidArgument("channels must be 0, 1, 3 or 4, got ", + channels_)); } void Compute(OpKernelContext* context) override { @@ -48,6 +49,12 @@ class DecodeBmpOp : public OpKernel { // Start decoding image to get shape details const StringPiece input = contents.scalar()(); + OP_REQUIRES(context, (32 <= input.size()), + errors::InvalidArgument("Incomplete bmp content, requires at " + "least 32 bytes to find the header " + "size, width, height, and bpp, got ", + input.size(), " bytes")); + const uint8* img_bytes = reinterpret_cast(input.data()); const int32 header_size = internal::SubtleMustCopy( *(reinterpret_cast(img_bytes + 10))); @@ -73,6 +80,22 @@ class DecodeBmpOp : public OpKernel { errors::InvalidArgument( "Number of channels must be 1, 3 or 4, was ", channels_)); + // there may be padding bytes when the width is not a multiple of 4 bytes + // 8 * channels == bits per pixel + const int row_size = (8 * channels_ * width + 31) / 32 * 4; + + const int last_pixel_offset = + header_size + (abs(height) - 1) * row_size + (width - 1) * channels_; + + // [expected file size] = [last pixel offset] + [last pixel size=channels] + const int expected_file_size = last_pixel_offset + channels_; + + OP_REQUIRES( + context, (expected_file_size <= input.size()), + errors::InvalidArgument("Incomplete bmp content, requires at least ", + expected_file_size, " bytes, got ", + input.size(), " bytes")); + // if height is negative, data layout is top down // otherwise, it's bottom up bool top_down = (height < 0); @@ -85,25 +108,23 @@ class DecodeBmpOp : public OpKernel { const uint8* bmp_pixels = &img_bytes[header_size]; - Decode(bmp_pixels, output->flat().data(), width, abs(height), - channels_, top_down); + Decode(bmp_pixels, row_size, output->flat().data(), width, + abs(height), channels_, top_down); } - uint8* Decode(const uint8* input, uint8* const output, const int width, - const int height, const int channles, bool top_down); + uint8* Decode(const uint8* input, const int row_size, uint8* const output, + const int width, const int height, const int channles, + bool top_down); private: int channels_; }; REGISTER_KERNEL_BUILDER(Name("DecodeBmp").Device(DEVICE_CPU), DecodeBmpOp); -uint8* DecodeBmpOp::Decode(const uint8* input, uint8* const output, - const int width, const int height, - const int channels, bool top_down) { - // there may be padding bytes when the width is not a multiple of 4 bytes - // 8 * channels == bits per pixel - int row_size = (8 * channels * width + 31) / 32 * 4; - +uint8* DecodeBmpOp::Decode(const uint8* input, const int row_size, + uint8* const output, const int width, + const int height, const int channels, + bool top_down) { for (int i = 0; i < height; i++) { int src_pos; int dst_pos; diff --git a/tensorflow/python/kernel_tests/decode_bmp_op_test.py b/tensorflow/python/kernel_tests/decode_bmp_op_test.py index e7b472240e..c086f46170 100644 --- a/tensorflow/python/kernel_tests/decode_bmp_op_test.py +++ b/tensorflow/python/kernel_tests/decode_bmp_op_test.py @@ -20,6 +20,7 @@ from __future__ import print_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors_impl from tensorflow.python.ops import array_ops from tensorflow.python.ops import image_ops from tensorflow.python.platform import test @@ -99,5 +100,54 @@ class DecodeBmpOpTest(test.TestCase): decoded = decode.eval() self.assertAllEqual(decoded, img_bytes) + def testIncompleteHeader(self): + # Encoded BMP bytes from Wikipedia + encoded_bytes = [ + 0x42, 0x40, + 0x46, 0, 0, 0, + ] + + byte_string = bytes(bytearray(encoded_bytes)) + img_in = constant_op.constant(byte_string, dtype=dtypes.string) + decode = array_ops.squeeze(image_ops.decode_bmp(img_in)) + + with self.test_session(): + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "requires at least 32 bytes to find the header"): + decoded = decode.eval() + + def testIncompleteBody(self): + # Encoded BMP bytes from Wikipedia + encoded_bytes = [ + 0x42, 0x40, + 0x46, 0, 0, 0, + 0, 0, + 0, 0, + 0x36, 0, 0, 0, + 0x28, 0, 0, 0, + 0x2, 0, 0, 0, + 0x2, 0, 0, 0, + 0x1, 0, + 0x18, 0, + 0, 0, 0, 0, + 0x10, 0, 0, 0, + 0x13, 0xb, 0, 0, + 0x13, 0xb, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0xff, + 0xff, 0xff, 0xff, + 0, 0, + ] + + byte_string = bytes(bytearray(encoded_bytes)) + img_in = constant_op.constant(byte_string, dtype=dtypes.string) + decode = array_ops.squeeze(image_ops.decode_bmp(img_in)) + + with self.test_session(): + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "requires at least 68 bytes, got 62 bytes"): + decoded = decode.eval() + if __name__ == "__main__": test.main() -- GitLab From d24199ba2b845f516dbbdb558999e4c402342f42 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Thu, 30 Nov 2017 11:15:19 -0800 Subject: [PATCH 0996/1801] Add user friendly error checking about script run location. (#14949) --- tensorflow/contrib/lite/download_dependencies.sh | 7 +++++++ tensorflow/contrib/makefile/download_dependencies.sh | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/tensorflow/contrib/lite/download_dependencies.sh b/tensorflow/contrib/lite/download_dependencies.sh index e13df2fa1c..571d857be7 100755 --- a/tensorflow/contrib/lite/download_dependencies.sh +++ b/tensorflow/contrib/lite/download_dependencies.sh @@ -19,6 +19,13 @@ set -e DOWNLOADS_DIR=tensorflow/contrib/lite/downloads BZL_FILE_PATH=tensorflow/workspace.bzl +# Ensure it is being run from repo root +if [ ! -f $BZL_FILE_PATH ]; then + echo "Could not find ${BZL_FILE_PATH}": + echo "Likely you are not running this from the root directory of the repository."; + exit 1; +fi + EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)" GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh index e8021a53af..19e25ad767 100755 --- a/tensorflow/contrib/makefile/download_dependencies.sh +++ b/tensorflow/contrib/makefile/download_dependencies.sh @@ -19,6 +19,13 @@ set -e DOWNLOADS_DIR=tensorflow/contrib/makefile/downloads BZL_FILE_PATH=tensorflow/workspace.bzl +# Ensure it is being run from repo root +if [ ! -f $BZL_FILE_PATH ]; then + echo "Could not find ${BZL_FILE_PATH}": + echo "Likely you are not running this from the root directory of the repository."; + exit 1; +fi + EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)" GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" -- GitLab From ea1c29552b01f3404e27999a27a1919b3accc594 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Thu, 30 Nov 2017 11:13:19 -0800 Subject: [PATCH 0997/1801] Change depthwise convolution filter expansion and contraction with algebraic manipulation instead of slices and pads that are more difficult to fuse. PiperOrigin-RevId: 177480353 --- .../compiler/tf2xla/kernels/conv_ops.cc | 166 ++++++++++++------ 1 file changed, 112 insertions(+), 54 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc index c150394c07..61f4d1993a 100644 --- a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc @@ -46,72 +46,130 @@ TensorShape ExpandedFilterShapeForDepthwiseConvolution( return expanded_shape; } +// Broadcast zeros to ExpandedFilterShapeForDepthwiseConvolution. +xla::ComputationDataHandle CreateExpandedZero( + const TensorShape& filter_shape, DataType dtype, + xla::ComputationBuilder* builder) { + TensorShape expanded_filter_shape = + ExpandedFilterShapeForDepthwiseConvolution(filter_shape); + return builder->Broadcast(XlaHelpers::Zero(builder, dtype), + expanded_filter_shape.dim_sizes()); +} + +// Create a mask for depthwise convolution that will make a normal convolution +// produce the same results as a depthwise convolution. For a [2, 2, 3, 2] +// depthwise filter this returns a [2, 2, 3, 6] tesnsor +// 1 1 0 0 0 0 1 1 0 0 0 0 +// 0 0 1 1 0 0 0 0 1 1 0 0 +// 0 0 0 0 1 1 0 0 0 0 1 1 +// +// 1 1 0 0 0 0 1 1 0 0 0 0 +// 0 0 1 1 0 0 0 0 1 1 0 0 +// 0 0 0 0 1 1 0 0 0 0 1 1 +// +// The first step is to create a one tensor, A, that is [3] +// 0 1 2 +// +// and another tensor, B, that is [3 * 2] +// 0 1 2 3 4 5 +// +// and divide B it by 2 to get +// 0 0 1 1 2 2 +// +// then we broadcast the B to [2, 2, 3, 3 * 2] +// 0 0 1 1 2 2 0 0 1 1 2 2 +// 0 0 1 1 2 2 0 0 1 1 2 2 +// 0 0 1 1 2 2 0 0 1 1 2 2 +// +// 0 0 1 1 2 2 0 0 1 1 2 2 +// 0 0 1 1 2 2 0 0 1 1 2 2 +// 0 0 1 1 2 2 0 0 1 1 2 2 +// +// Finally compare A and broadcasted B in dimension 2 amd return the result at +// the beginning of the comment. +xla::ComputationDataHandle CreateExpandedFilterMask( + const TensorShape& filter_shape, xla::ComputationBuilder* builder) { + TensorShape expanded_filter_shape = + ExpandedFilterShapeForDepthwiseConvolution(filter_shape); + int64 depthwise_multiplier = filter_shape.dim_size(filter_shape.dims() - 1); + int64 input_feature = filter_shape.dim_size(filter_shape.dims() - 2); + + // Create a M sized linspace and an M*N sized linspace that will be + // broadcasted into perpendicular dimensions and compared. + xla::ComputationDataHandle input_feature_iota; + // DT_INT32 Iota will always return status::OK(). + TF_CHECK_OK(XlaHelpers::Iota(builder, DataType::DT_INT32, input_feature, + &input_feature_iota)); + xla::ComputationDataHandle expanded_feature_iota; + TF_CHECK_OK(XlaHelpers::Iota(builder, DataType::DT_INT32, + input_feature * depthwise_multiplier, + &expanded_feature_iota)); + + // Divide the M*N sized linspace by the depthwise_multiplier to create + // [0 0 1 1 2 2] in the example in the function comment. + expanded_feature_iota = + builder->Div(expanded_feature_iota, + XlaHelpers::IntegerLiteral(builder, DataType::DT_INT32, + depthwise_multiplier)); + + // Broadcast the N*M linspace to [H, W, ..., M, M*N]. + auto expanded_feature_broadcast_dims = expanded_filter_shape.dim_sizes(); + expanded_feature_broadcast_dims.pop_back(); + auto broadcasted_expanded_feature_iota = builder->Broadcast( + expanded_feature_iota, expanded_feature_broadcast_dims); + + // Compare the broadcasted linspace to the input feature linspace in the + // input feature dimension to create a diagonal predicate. + return builder->Eq(broadcasted_expanded_feature_iota, input_feature_iota, + {expanded_filter_shape.dims() - 2}); +} + // Expands a filter of shape [H, W, ..., M, N] to [H, W, ..., M, M*N] by adding // zeros for the cross-depth filters. Used to build a depthwise convolution. xla::ComputationDataHandle ExpandFilterForDepthwiseConvolution( const TensorShape& filter_shape, DataType dtype, const xla::ComputationDataHandle& filter, xla::ComputationBuilder* builder) { - // Filter has shape [H, W, ..., M, N] - // Dilate to [H, W, ..., M*M, N] using M inter-element padding, and then - // reshape to [H, W, ..., M, M*N]. - int num_spatial_dims = filter_shape.dims() - 2; - const int64 in_depth = filter_shape.dim_size(num_spatial_dims); - xla::PaddingConfig padding = xla::MakeNoPaddingConfig(filter_shape.dims()); - padding.mutable_dimensions(num_spatial_dims)->set_interior_padding(in_depth); - auto dilated_filter = - builder->Pad(filter, XlaHelpers::Zero(builder, dtype), padding); - + int64 depthwise_multiplier = filter_shape.dim_size(filter_shape.dims() - 1); + int64 input_feature = filter_shape.dim_size(filter_shape.dims() - 2); TensorShape expanded_filter_shape = ExpandedFilterShapeForDepthwiseConvolution(filter_shape); - return builder->Reshape(dilated_filter, expanded_filter_shape.dim_sizes()); + + // Create a [H, W, ..., 1, N*M] reshape of the filter. + TensorShape implicit_broadcast_filter_shape = expanded_filter_shape; + implicit_broadcast_filter_shape.set_dim( + implicit_broadcast_filter_shape.dims() - 2, 1); + implicit_broadcast_filter_shape.set_dim( + implicit_broadcast_filter_shape.dims() - 1, + depthwise_multiplier * input_feature); + auto implicit_broadcast_filter = + builder->Reshape(filter, implicit_broadcast_filter_shape.dim_sizes()); + + // Broadcast the filter to [H, W, ..., M, M*N]. + auto expanded_zero = CreateExpandedZero(filter_shape, dtype, builder); + auto expanded_filter = builder->Add(implicit_broadcast_filter, expanded_zero); + + // If the filter mask is set, choose the broadcasted filter, othwerwise, + // choose zero. + return builder->Select(CreateExpandedFilterMask(filter_shape, builder), + expanded_filter, expanded_zero); } // Inverse of ExpandFilterForDepthwiseConvolution. xla::ComputationDataHandle ContractFilterForDepthwiseBackprop( - const TensorShape& filter_shape, DataType dtype, + XlaOpKernelContext* ctx, const TensorShape& filter_shape, DataType dtype, const xla::ComputationDataHandle& filter_backprop, xla::ComputationBuilder* builder) { - int num_spatial_dims = filter_shape.dims() - 2; - - // Reshape to [H, W, ..., M*M, N] - TensorShape shape = filter_shape; - int64 in_depth = filter_shape.dim_size(num_spatial_dims); - shape.set_dim(num_spatial_dims, in_depth * in_depth); - auto reshaped = builder->Reshape(filter_backprop, shape.dim_sizes()); - - std::vector zeros(filter_shape.dims()); - std::vector strides(filter_shape.dims(), 1LL); - strides[num_spatial_dims] = in_depth + 1; - return builder->Slice(reshaped, zeros, shape.dim_sizes(), strides); - - // Alternate implementation for backends without strided Slice() support. - // TODO(phawkins): Remove when all backends support strided slice. - // // Pad [..., M * (M + 1), N] - // xla::PaddingConfig config = - // xla::MakeNoPaddingConfig(filter_shape.dims()); - // config.mutable_dimensions(num_spatial_dims) - // ->set_edge_padding_high(in_depth); - // auto zero = XlaHelpers::Zero(builder, dtype); - // auto padded = builder->Pad(reshaped, zero, config); - // - // // Reshape to [..., M, M + 1, N] - // shape = filter_shape; - // shape.set_dim(num_spatial_dims, in_depth); - // shape.set_dim(num_spatial_dims + 1, in_depth + 1); - // int64 out_depth = filter_shape.dim_size(num_spatial_dims + 1); - // shape.AddDim(out_depth); - // reshaped = builder->Reshape(padded, shape.dim_sizes()); - // - // // Slice to [..., M, 1, N] - // std::vector zeros(shape.dims()); - // std::vector strides(shape.dims(), 1LL); - // shape.set_dim(num_spatial_dims + 1, 1); - // auto sliced = builder->Slice(reshaped, zeros, shape.dim_sizes(), - // strides); - // - // // Reshape to [..., M, N] - // return builder->Reshape(sliced, filter_shape.dim_sizes()); + TensorShape expanded_filter_shape = + ExpandedFilterShapeForDepthwiseConvolution(filter_shape); + auto masked_expanded_filter = builder->Select( + CreateExpandedFilterMask(filter_shape, builder), filter_backprop, + CreateExpandedZero(filter_shape, dtype, builder)); + return builder->Reshape( + builder->Reduce(masked_expanded_filter, XlaHelpers::Zero(builder, dtype), + *ctx->GetOrCreateAdd(dtype), + {expanded_filter_shape.dims() - 2}), + filter_shape.dim_sizes()); } class ConvOp : public XlaOpKernel { @@ -202,7 +260,7 @@ class ConvOp : public XlaOpKernel { dims.set_input_feature_dimension(feature_dim); dims.set_output_feature_dimension(feature_dim); for (int i = 0; i < num_spatial_dims_; ++i) { - int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i); + const int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i); dims.add_input_spatial_dimensions(dim); dims.add_kernel_spatial_dimensions(i); dims.add_output_spatial_dimensions(dim); @@ -574,7 +632,7 @@ class ConvBackpropFilterOp : public XlaOpKernel { if (depthwise_) { filter_backprop_reshaped = ContractFilterForDepthwiseBackprop( - filter_shape, ctx->input_type(0), filter_backprop_reshaped, b); + ctx, filter_shape, ctx->input_type(0), filter_backprop_reshaped, b); } ctx->SetOutput(0, filter_backprop_reshaped); } -- GitLab From 4146ff1259c0b4ada8afbbad11a7b37d8373d1b9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 30 Nov 2017 11:18:54 -0800 Subject: [PATCH 0998/1801] [XLA] Adds Dot with DotDimensionNumbers proto for specifying arbitrary contracting and batch dimensions. PiperOrigin-RevId: 177481231 --- .../xla/client/computation_builder.cc | 36 +++- .../compiler/xla/client/computation_builder.h | 5 + .../xla/service/algebraic_simplifier.cc | 17 +- .../xla/service/algebraic_simplifier_test.cc | 6 +- .../xla/service/buffer_assignment_test.cc | 11 +- .../cpu/cpu_instruction_fusion_test.cc | 36 ++-- .../compiler/xla/service/cpu/ir_emitter.cc | 5 + .../xla/service/gpu/ir_emitter_unnested.cc | 5 + .../compiler/xla/service/graphviz_example.cc | 5 +- .../xla/service/heap_simulator_test.cc | 24 ++- tensorflow/compiler/xla/service/hlo.proto | 3 + .../compiler/xla/service/hlo_cost_analysis.cc | 5 +- .../xla/service/hlo_evaluator_test.cc | 21 ++- .../compiler/xla/service/hlo_instruction.cc | 56 +++++- .../compiler/xla/service/hlo_instruction.h | 18 ++ .../xla/service/hlo_instruction_test.cc | 12 +- .../compiler/xla/service/hlo_verifier.cc | 6 +- .../xla/service/liveness_util_test.cc | 10 +- tensorflow/compiler/xla/service/service.cc | 3 + .../compiler/xla/service/shape_inference.cc | 156 ++++++++++++--- .../compiler/xla/service/shape_inference.h | 6 +- .../xla/service/shape_inference_test.cc | 177 ++++++++++++++++-- .../xla/service/transpose_folding_test.cc | 27 ++- .../compiler/xla/service/user_computation.cc | 57 +++++- .../compiler/xla/service/user_computation.h | 4 + .../xla/service/user_computation_test.cc | 45 ----- .../compiler/xla/tests/dot_operation_test.cc | 20 ++ .../xla/tests/multioutput_fusion_test.cc | 12 +- tensorflow/compiler/xla/xla_data.proto | 23 ++- .../performance/xla/operation_semantics.md | 81 ++++++++ 30 files changed, 736 insertions(+), 156 deletions(-) diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc index cce9310003..9febea8dcf 100644 --- a/tensorflow/compiler/xla/client/computation_builder.cc +++ b/tensorflow/compiler/xla/client/computation_builder.cc @@ -625,7 +625,41 @@ ComputationDataHandle ComputationBuilder::Lt( ComputationDataHandle ComputationBuilder::Dot( const ComputationDataHandle& lhs, const ComputationDataHandle& rhs) { - return BinaryOp(BINOP_DOT, lhs, rhs, /*broadcast_dimensions=*/{}); + StatusOr> lhs_shape_or_status = GetShape(lhs); + if (!lhs_shape_or_status.ok()) { + NoteError(lhs_shape_or_status.status()); + return ComputationDataHandle(); + } + std::unique_ptr lhs_shape = lhs_shape_or_status.ConsumeValueOrDie(); + + DotDimensionNumbers dimension_numbers; + dimension_numbers.add_lhs_contracting_dimensions( + lhs_shape->dimensions_size() == 1 ? 0 : 1); + dimension_numbers.add_rhs_contracting_dimensions(0); + return DotGeneral(lhs, rhs, dimension_numbers); +} + +ComputationDataHandle ComputationBuilder::DotGeneral( + const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, + const DotDimensionNumbers& dimension_numbers) { + if (!first_error_.ok() || !PrepareComputation().ok()) { + return ComputationDataHandle(); + } + + DotRequest request; + *request.mutable_lhs() = lhs; + *request.mutable_rhs() = rhs; + *request.mutable_dimension_numbers() = dimension_numbers; + + OpRequest op_request; + *op_request.mutable_computation() = computation_.handle(); + *op_request.mutable_dot_request() = request; + AddCommonFieldsToOpRequest(&op_request); + OpResponse response; + + VLOG(2) << "making Dot request"; + Status s = client_->stub()->Op(&op_request, &response); + return ParseOpResponse(s, &response); } ComputationDataHandle ComputationBuilder::Conv( diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index d2dbbbbebb..531b98cfb9 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -393,6 +393,11 @@ class ComputationBuilder { ComputationDataHandle Dot(const ComputationDataHandle& lhs, const ComputationDataHandle& rhs); + // Enqueues a general dot instruction onto the computation. + ComputationDataHandle DotGeneral( + const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, + const DotDimensionNumbers& dimension_numbers); + // Default dimension numbers used for a 2D convolution. static constexpr int64 kConvBatchDimension = 0; static constexpr int64 kConvFeatureDimension = 1; diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 71491218aa..b1d0345e70 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -597,9 +597,13 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { // Simplify dot(transpose(a), transpose(b)) to transpose(dot(b,a)). if (lhs->IsRank2Transpose() && rhs->IsRank2Transpose()) { - auto new_dot = computation_->AddInstruction(HloInstruction::CreateBinary( - ShapeUtil::PermuteDimensions({1, 0}, dot->shape()), HloOpcode::kDot, - rhs->mutable_operand(0), lhs->mutable_operand(0))); + DotDimensionNumbers dot_dimension_numbers; + dot_dimension_numbers.add_lhs_contracting_dimensions(1); + dot_dimension_numbers.add_rhs_contracting_dimensions(0); + auto new_dot = computation_->AddInstruction(HloInstruction::CreateDot( + ShapeUtil::PermuteDimensions({1, 0}, dot->shape()), + rhs->mutable_operand(0), lhs->mutable_operand(0), + dot_dimension_numbers)); return ReplaceWithNewInstruction( dot, HloInstruction::CreateTranspose(dot->shape(), new_dot, {1, 0})); } @@ -1616,8 +1620,11 @@ Status AlgebraicSimplifierVisitor::HandleConvolution( auto new_lhs = add_bitcast(new_input_shape, lhs); auto new_rhs = add_bitcast(new_filter_shape, rhs); - auto dot = computation_->AddInstruction(HloInstruction::CreateBinary( - dot_output_shape, HloOpcode::kDot, new_lhs, new_rhs)); + DotDimensionNumbers dot_dimension_numbers; + dot_dimension_numbers.add_lhs_contracting_dimensions(1); + dot_dimension_numbers.add_rhs_contracting_dimensions(0); + auto dot = computation_->AddInstruction(HloInstruction::CreateDot( + dot_output_shape, new_lhs, new_rhs, dot_dimension_numbers)); return ReplaceInstruction(convolution, add_bitcast(convolution_shape, dot)); } diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 56dfb1cf0b..3d70505f6e 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -2138,8 +2138,10 @@ TEST_F(AlgebraicSimplifierTest, IteratorInvalidation) { builder.AddInstruction(HloInstruction::CreateParameter(0, r1f32, "x")); HloInstruction* y = builder.AddInstruction(HloInstruction::CreateParameter(1, r1f32, "y")); - builder.AddInstruction( - HloInstruction::CreateBinary(r1f32, HloOpcode::kDot, x, y)); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); + builder.AddInstruction(HloInstruction::CreateDot(r1f32, x, y, dot_dnums)); std::unique_ptr dot_computation(builder.Build()); HloComputation::Builder call_builder(TestName() + ".Call"); diff --git a/tensorflow/compiler/xla/service/buffer_assignment_test.cc b/tensorflow/compiler/xla/service/buffer_assignment_test.cc index 4d4c5b953e..75c71dfeb1 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment_test.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment_test.cc @@ -1360,10 +1360,13 @@ TEST_F(BufferAssignmentTest, OneTempAllocation) { HloInstruction::CreateParameter(1, shape_3x4, "param_b")); auto param_c = builder.AddInstruction( HloInstruction::CreateParameter(2, shape_4x4, "param_c")); - auto dot_ab = builder.AddInstruction(HloInstruction::CreateBinary( - shape_2x4, HloOpcode::kDot, param_a, param_b)); - auto dot_bc = builder.AddInstruction(HloInstruction::CreateBinary( - shape_3x4, HloOpcode::kDot, param_b, param_c)); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); + auto dot_ab = builder.AddInstruction( + HloInstruction::CreateDot(shape_2x4, param_a, param_b, dot_dnums)); + auto dot_bc = builder.AddInstruction( + HloInstruction::CreateDot(shape_3x4, param_b, param_c, dot_dnums)); builder.AddInstruction( HloInstruction::CreateConcatenate(shape_5x4, {dot_ab, dot_bc}, 1)); diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc index b9e4d006d7..1c04c9835e 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc @@ -31,6 +31,14 @@ namespace { using InstructionFusionTest = HloTestBase; +std::unique_ptr MakeDot(const Shape& shape, HloInstruction* lhs, + HloInstruction* rhs) { + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); + return HloInstruction::CreateDot(shape, lhs, rhs, dot_dnums); +} + TEST_F(InstructionFusionTest, DotOperationFusion_Basic_0) { HloComputation::Builder builder(TestName()); HloInstruction* arg0 = builder.AddInstruction(HloInstruction::CreateParameter( @@ -40,8 +48,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_Basic_0) { HloInstruction* exp0 = builder.AddInstruction(HloInstruction::CreateUnary( ShapeUtil::MakeShape(S32, {1024, 256}), HloOpcode::kExp, arg0)); - HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary( - ShapeUtil::MakeShape(F32, {1024, 1}), HloOpcode::kDot, exp0, arg1)); + HloInstruction* dot = builder.AddInstruction( + MakeDot(ShapeUtil::MakeShape(F32, {1024, 1}), exp0, arg1)); auto module = CreateNewModule(); auto computation = module->AddEntryComputation(builder.Build()); @@ -59,8 +67,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_Basic_1) { HloInstruction* exp1 = builder.AddInstruction(HloInstruction::CreateUnary( ShapeUtil::MakeShape(S32, {256, 1024}), HloOpcode::kExp, arg1)); - HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary( - ShapeUtil::MakeShape(F32, {1, 1024}), HloOpcode::kDot, arg0, exp1)); + HloInstruction* dot = builder.AddInstruction( + MakeDot(ShapeUtil::MakeShape(F32, {1, 1024}), arg0, exp1)); auto module = CreateNewModule(); auto computation = module->AddEntryComputation(builder.Build()); @@ -80,8 +88,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_Bitcast) { ShapeUtil::MakeShape(S32, {2, 512, 2, 128}), HloOpcode::kExp, arg0)); HloInstruction* bitcast0 = builder.AddInstruction(HloInstruction::CreateUnary( ShapeUtil::MakeShape(S32, {1024, 256}), HloOpcode::kBitcast, exp0)); - HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary( - ShapeUtil::MakeShape(F32, {1024, 1}), HloOpcode::kDot, bitcast0, arg1)); + HloInstruction* dot = builder.AddInstruction( + MakeDot(ShapeUtil::MakeShape(F32, {1024, 1}), bitcast0, arg1)); auto module = CreateNewModule(); auto computation = module->AddEntryComputation(builder.Build()); @@ -102,8 +110,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_Reshape) { HloInstruction* reshape0 = builder.AddInstruction(HloInstruction::CreateReshape( ShapeUtil::MakeShape(S32, {1024, 256}), exp0)); - HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary( - ShapeUtil::MakeShape(F32, {1024, 1}), HloOpcode::kDot, reshape0, arg1)); + HloInstruction* dot = builder.AddInstruction( + MakeDot(ShapeUtil::MakeShape(F32, {1024, 1}), reshape0, arg1)); auto module = CreateNewModule(); auto computation = module->AddEntryComputation(builder.Build()); @@ -121,8 +129,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_TooLarge) { HloInstruction* exp1 = builder.AddInstruction(HloInstruction::CreateUnary( ShapeUtil::MakeShape(S32, {256, 32 * 1024}), HloOpcode::kExp, arg1)); - HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary( - ShapeUtil::MakeShape(F32, {1, 32 * 1024}), HloOpcode::kDot, arg0, exp1)); + HloInstruction* dot = builder.AddInstruction( + MakeDot(ShapeUtil::MakeShape(F32, {1, 32 * 1024}), arg0, exp1)); auto module = CreateNewModule(); auto computation = module->AddEntryComputation(builder.Build()); @@ -140,8 +148,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_ElementReuse) { HloInstruction* exp1 = builder.AddInstruction(HloInstruction::CreateUnary( ShapeUtil::MakeShape(S32, {256, 1024}), HloOpcode::kExp, arg1)); - HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary( - ShapeUtil::MakeShape(F32, {2, 1024}), HloOpcode::kDot, arg0, exp1)); + HloInstruction* dot = builder.AddInstruction( + MakeDot(ShapeUtil::MakeShape(F32, {2, 1024}), arg0, exp1)); auto module = CreateNewModule(); auto computation = module->AddEntryComputation(builder.Build()); @@ -162,8 +170,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_TransposeFusion) { HloInstruction* transpose1 = builder.AddInstruction(HloInstruction::CreateTranspose( ShapeUtil::MakeShape(S32, {256, 1024}), exp1, {1, 0})); - builder.AddInstruction(HloInstruction::CreateBinary( - ShapeUtil::MakeShape(F32, {1, 1024}), HloOpcode::kDot, arg0, transpose1)); + builder.AddInstruction( + MakeDot(ShapeUtil::MakeShape(F32, {1, 1024}), arg0, transpose1)); auto module = CreateNewModule(); auto computation = module->AddEntryComputation(builder.Build()); diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 3f991c03e9..f242e0acb8 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -796,6 +796,11 @@ Status IrEmitter::HandleDot(HloInstruction* dot) { TF_RETURN_IF_ERROR(ElementTypesSameAndSupported( /*instruction=*/*dot, /*operands=*/{lhs, rhs}, /*supported_types=*/{F32, F64, C64})); + const DotDimensionNumbers& dnums = dot->dot_dimension_numbers(); + if (dnums.lhs_batch_dimensions_size() > 0 || + dnums.rhs_batch_dimensions_size() > 0) { + return Unimplemented("Dot with batch dimensions not implemented."); + } llvm_ir::IrArray lhs_array(GetIrArrayFor(lhs)); llvm_ir::IrArray rhs_array(GetIrArrayFor(rhs)); diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 1b863c9e3c..abc739d181 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -246,6 +246,11 @@ Status IrEmitterUnnested::DefaultAction(HloInstruction* hlo) { } Status IrEmitterUnnested::HandleDot(HloInstruction* dot) { + const DotDimensionNumbers& dnums = dot->dot_dimension_numbers(); + if (dnums.lhs_batch_dimensions_size() > 0 || + dnums.rhs_batch_dimensions_size() > 0) { + return Unimplemented("Dot with batch dimensions not implemented."); + } if (ImplementedAsGemm(*dot)) { thunk_sequence_->emplace_back(BuildGemmThunk(dot)); return Status::OK(); diff --git a/tensorflow/compiler/xla/service/graphviz_example.cc b/tensorflow/compiler/xla/service/graphviz_example.cc index 049e8d80d8..05017008e2 100644 --- a/tensorflow/compiler/xla/service/graphviz_example.cc +++ b/tensorflow/compiler/xla/service/graphviz_example.cc @@ -108,8 +108,11 @@ std::unique_ptr MakeBigGraph() { HloInstruction::CreateUnary(vshape, HloOpcode::kCopy, param_v0)); auto clamp = builder.AddInstruction(HloInstruction::CreateTernary( vshape, HloOpcode::kClamp, copy, param_v1, param_v2)); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); auto dot = builder.AddInstruction( - HloInstruction::CreateBinary(vshape, HloOpcode::kDot, clamp, param_v0)); + HloInstruction::CreateDot(vshape, clamp, param_v0, dot_dnums)); auto tuple = builder.AddInstruction( HloInstruction::CreateTuple({dot, param_s, clamp})); auto scalar = builder.AddInstruction( diff --git a/tensorflow/compiler/xla/service/heap_simulator_test.cc b/tensorflow/compiler/xla/service/heap_simulator_test.cc index 17b926c874..387b649a73 100644 --- a/tensorflow/compiler/xla/service/heap_simulator_test.cc +++ b/tensorflow/compiler/xla/service/heap_simulator_test.cc @@ -259,8 +259,11 @@ TEST_F(HeapSimulatorTest, MultiplyDot) { HloInstruction::CreateParameter(2, f32scalar_, "paramY")); auto mul = builder.AddInstruction(HloInstruction::CreateBinary( f32vec4_, HloOpcode::kMultiply, paramA, paramX)); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); auto dot = builder.AddInstruction( - HloInstruction::CreateBinary(f32vec4_, HloOpcode::kDot, mul, paramY)); + HloInstruction::CreateDot(f32vec4_, mul, paramY, dot_dnums)); // The buffer for dot is the output, and it cannot be shared with the buffer // for mul, since dot isn't elementwise. @@ -292,8 +295,11 @@ TEST_F(HeapSimulatorTest, MultiplyDotAdd) { HloInstruction::CreateParameter(2, f32scalar_, "paramY")); auto mul = builder.AddInstruction(HloInstruction::CreateBinary( f32vec4_, HloOpcode::kMultiply, paramA, paramX)); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); auto dot = builder.AddInstruction( - HloInstruction::CreateBinary(f32vec4_, HloOpcode::kDot, mul, paramY)); + HloInstruction::CreateDot(f32vec4_, mul, paramY, dot_dnums)); auto add = builder.AddInstruction( HloInstruction::CreateBinary(f32vec4_, HloOpcode::kAdd, dot, paramA)); @@ -327,10 +333,13 @@ TEST_F(HeapSimulatorTest, MultiplyDotDot) { HloInstruction::CreateParameter(2, f32scalar_, "paramY")); auto mul = builder.AddInstruction(HloInstruction::CreateBinary( f32vec4_, HloOpcode::kMultiply, paramA, paramX)); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); auto dot0 = builder.AddInstruction( - HloInstruction::CreateBinary(f32vec4_, HloOpcode::kDot, mul, paramY)); + HloInstruction::CreateDot(f32vec4_, mul, paramY, dot_dnums)); auto dot1 = builder.AddInstruction( - HloInstruction::CreateBinary(f32vec4_, HloOpcode::kDot, dot0, paramY)); + HloInstruction::CreateDot(f32vec4_, dot0, paramY, dot_dnums)); // The buffer for dot1 is the output. No buffers can be shared. The buffer // for mul is freed before the end, since it's no longer used after dot0 @@ -365,10 +374,13 @@ TEST_F(HeapSimulatorTest, MultiplyDotDotTuple) { HloInstruction::CreateParameter(2, f32scalar_, "paramY")); auto mul = builder.AddInstruction(HloInstruction::CreateBinary( f32vec4_, HloOpcode::kMultiply, paramA, paramX)); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); auto dot0 = builder.AddInstruction( - HloInstruction::CreateBinary(f32vec4_, HloOpcode::kDot, mul, paramY)); + HloInstruction::CreateDot(f32vec4_, mul, paramY, dot_dnums)); auto dot1 = builder.AddInstruction( - HloInstruction::CreateBinary(f32vec4_, HloOpcode::kDot, dot0, paramY)); + HloInstruction::CreateDot(f32vec4_, dot0, paramY, dot_dnums)); auto tuple = builder.AddInstruction(HloInstruction::CreateTuple({dot0, dot1})); diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index e984bdb5f7..5d0cfba1fc 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -118,6 +118,9 @@ message HloInstructionProto { // Shape of outfeed request. xla.Shape outfeed_shape = 29; + + // Describes the dimension numbers used for a dot operation + xla.DotDimensionNumbers dot_dimension_numbers = 30; } // Serialization of HloComputation. diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc index 6fcc01dd64..0ed64e6779 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc @@ -201,10 +201,11 @@ Status HloCostAnalysis::HandleCopy(const HloInstruction*) { Status HloCostAnalysis::HandleDot(const HloInstruction* dot) { const Shape& lhs_shape = dot->operand(0)->shape(); const Shape& rhs_shape = dot->operand(1)->shape(); + const DotDimensionNumbers& dnums = dot->dot_dimension_numbers(); // Count of elements along the reduction dimension (last dimension for the // rhs). - int64 reduction_width = lhs_shape.dimensions(ShapeUtil::Rank(lhs_shape) - 1); - + int64 reduction_width = + lhs_shape.dimensions(dnums.lhs_contracting_dimensions(0)); // First divide by reduction width before multiplying by rhs elements to avoid // overflow. int64 fma_count; diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc index b2c4351896..a5d39fe086 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc @@ -621,8 +621,11 @@ TEST_F(HloEvaluatorTest, DotRank2AndRank1) { b.AddInstruction(HloInstruction::CreateConstant(std::move(rhs_literal))); Shape shape = ShapeUtil::MakeShape(F32, {4, 2}); - b.AddInstruction(HloInstruction::CreateBinary( - shape, HloOpcode::kDot, lhs_instruction, rhs_instruction)); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); + b.AddInstruction(HloInstruction::CreateDot(shape, lhs_instruction, + rhs_instruction, dot_dnums)); auto computation = module().AddEntryComputation(b.Build()); std::unique_ptr result = @@ -664,8 +667,11 @@ TEST_F(HloEvaluatorTest, DotRank1AndRank2) { b.AddInstruction(HloInstruction::CreateConstant(std::move(rhs_literal))); Shape shape = ShapeUtil::MakeShape(F32, {2}); - b.AddInstruction(HloInstruction::CreateBinary( - shape, HloOpcode::kDot, lhs_instruction, rhs_instruction)); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(0); + dot_dnums.add_rhs_contracting_dimensions(0); + b.AddInstruction(HloInstruction::CreateDot(shape, lhs_instruction, + rhs_instruction, dot_dnums)); auto computation = module().AddEntryComputation(b.Build()); std::unique_ptr result = @@ -705,8 +711,11 @@ TEST_F(HloEvaluatorTest, DotRank2AndRank2) { b.AddInstruction(HloInstruction::CreateConstant(std::move(rhs_literal))); Shape shape = ShapeUtil::MakeShape(F32, {4, 2}); - b.AddInstruction(HloInstruction::CreateBinary( - shape, HloOpcode::kDot, lhs_instruction, rhs_instruction)); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); + b.AddInstruction(HloInstruction::CreateDot(shape, lhs_instruction, + rhs_instruction, dot_dnums)); auto computation = module().AddEntryComputation(b.Build()); std::unique_ptr result = diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index c30c432654..b4bac18bcd 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -118,6 +118,10 @@ StatusOr> HloInstruction::CreateFromProto( MakeUnique( proto.convolution_dimension_numbers()); } + if (proto.has_dot_dimension_numbers()) { + instruction->dot_dimension_numbers_ = + MakeUnique(proto.dot_dimension_numbers()); + } for (const HloInstructionProto::SliceDimensions& slice_dimensions : proto.slice_dimensions()) { instruction->slice_starts_.push_back(slice_dimensions.start()); @@ -332,6 +336,17 @@ HloInstruction::CreateGetTupleElement(const Shape& shape, return instruction; } +/* static */ std::unique_ptr HloInstruction::CreateDot( + const Shape& shape, HloInstruction* lhs, HloInstruction* rhs, + const DotDimensionNumbers& dimension_numbers) { + auto instruction = WrapUnique(new HloInstruction(HloOpcode::kDot, shape)); + instruction->AppendOperand(lhs); + instruction->AppendOperand(rhs); + instruction->dot_dimension_numbers_ = + MakeUnique(dimension_numbers); + return instruction; +} + /* static */ std::unique_ptr HloInstruction::CreateReducePrecision(const Shape& shape, HloInstruction* operand, @@ -1086,7 +1101,6 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( case HloOpcode::kLe: case HloOpcode::kLt: case HloOpcode::kNe: - case HloOpcode::kDot: case HloOpcode::kMaximum: case HloOpcode::kMinimum: case HloOpcode::kPower: @@ -1138,6 +1152,11 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( clone = CreateConvolve(shape, new_operands[0], new_operands[1], *window_, *convolution_dimension_numbers_); break; + case HloOpcode::kDot: + CHECK_EQ(new_operands.size(), 2); + clone = CreateDot(shape, new_operands[0], new_operands[1], + *dot_dimension_numbers_); + break; case HloOpcode::kCrossReplicaSum: CHECK_EQ(new_operands.size(), 1); clone = CreateCrossReplicaSum(shape, new_operands[0]); @@ -1509,7 +1528,6 @@ bool HloInstruction::IdenticalSlowPath( case HloOpcode::kCos: case HloOpcode::kCrossReplicaSum: case HloOpcode::kDivide: - case HloOpcode::kDot: case HloOpcode::kEq: case HloOpcode::kExp: case HloOpcode::kFloor: @@ -1582,6 +1600,10 @@ bool HloInstruction::IdenticalSlowPath( protobuf_util::ProtobufEquals( convolution_dimension_numbers(), other.convolution_dimension_numbers()); + // Check dot dimension numbers. + case HloOpcode::kDot: + return protobuf_util::ProtobufEquals(dot_dimension_numbers(), + other.dot_dimension_numbers()); // Reduction results are determined by the reduction dimension and the // reduction computation. @@ -1990,6 +2012,9 @@ std::vector HloInstruction::ExtraAttributesToString() const { if (convolution_dimension_numbers_ != nullptr) { extra.push_back(ConvolutionDimensionNumbersToString()); } + if (dot_dimension_numbers_ != nullptr) { + extra.push_back(DotDimensionNumbersToString()); + } if (opcode() == HloOpcode::kWhile) { extra.push_back(StrCat("condition=%", while_condition()->name())); @@ -2086,6 +2111,9 @@ HloInstructionProto HloInstruction::ToProto() const { *proto.mutable_convolution_dimension_numbers() = *convolution_dimension_numbers_; } + if (dot_dimension_numbers_ != nullptr) { + *proto.mutable_dot_dimension_numbers() = *dot_dimension_numbers_; + } for (int i = 0; i < slice_starts_.size(); ++i) { auto* slice_dimension = proto.add_slice_dimensions(); slice_dimension->set_start(slice_starts_[i]); @@ -3051,6 +3079,30 @@ string HloInstruction::ConvolutionDimensionNumbersToString() const { return result; } +string HloInstruction::DotDimensionNumbersToString() const { + string result; + if (dot_dimension_numbers_ == nullptr) { + return result; + } + const DotDimensionNumbers& dnums = *dot_dimension_numbers_; + if (!dnums.lhs_batch_dimensions().empty()) { + result += "lhs_batch_dims="; + StrAppend(&result, Join(dnums.lhs_batch_dimensions(), ",")); + } + result += "lhs_contracting_dims="; + StrAppend(&result, Join(dnums.lhs_contracting_dimensions(), ",")); + + result += ","; + if (!dnums.rhs_batch_dimensions().empty()) { + result += "rhs_batch_dims="; + StrAppend(&result, Join(dnums.rhs_batch_dimensions(), ",")); + } + result += "rhs_contracting_dims="; + StrAppend(&result, Join(dnums.rhs_contracting_dimensions(), ",")); + + return result; +} + bool HloInstruction::CouldBeBitcast() const { switch (opcode_) { case HloOpcode::kTranspose: diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index cda8b07c61..768c027a42 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -160,6 +160,12 @@ class HloInstruction { const Window& window, const ConvolutionDimensionNumbers& dimension_numbers); + // Creates a dot op with operands 'lhs' and 'rhs' with contracting and batch + // dimensions specified in 'dimension_numbers'. + static std::unique_ptr CreateDot( + const Shape& shape, HloInstruction* lhs, HloInstruction* rhs, + const DotDimensionNumbers& dimension_numbers); + // Creates a reduce-precision op, where operand is the data to reduce in // precision, and exponent_bits and mantissa_bits describe the precision to // reduce it to. @@ -915,6 +921,15 @@ class HloInstruction { // Returns the dump string of the convolution dimension numbers. string ConvolutionDimensionNumbersToString() const; + // Returns data on the dimension numbers used for a dot operation. + const DotDimensionNumbers& dot_dimension_numbers() const { + CHECK(dot_dimension_numbers_ != nullptr); + return *dot_dimension_numbers_; + } + + // Returns the dump string of the dot dimension numbers. + string DotDimensionNumbersToString() const; + // Returns the random distribution for this rng node. // // Precondition: opcode() == HloOpcode::kRng @@ -1173,6 +1188,9 @@ class HloInstruction { // Describes the dimension numbers used for a convolution. std::unique_ptr convolution_dimension_numbers_; + // Describes the dimension numbers used for a dot. + std::unique_ptr dot_dimension_numbers_; + // Describes the [begin, end) index range for a slice. std::vector slice_starts_; std::vector slice_limits_; diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc index 76b12fc8d3..11420cae63 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc @@ -1068,8 +1068,11 @@ TEST_F(HloInstructionTest, CloneOfFusionPreservesShape) { builder.AddInstruction(HloInstruction::CreateParameter(1, s2, "y")); HloInstruction* reshape = builder.AddInstruction(HloInstruction::CreateTranspose(s2t, y, {1, 0})); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); HloInstruction* dot = builder.AddInstruction( - HloInstruction::CreateBinary(sout, HloOpcode::kDot, x, reshape)); + HloInstruction::CreateDot(sout, x, reshape, dot_dnums)); HloModule module(TestName()); auto* computation = module.AddEntryComputation(builder.Build()); @@ -1182,12 +1185,15 @@ TEST_F(HloInstructionTest, Stringification) { builder.AddInstruction(HloInstruction::CreateParameter(1, s2, "y")); HloInstruction* reshape = builder.AddInstruction(HloInstruction::CreateTranspose(s2t, y, {1, 0})); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); HloInstruction* dot = builder.AddInstruction( - HloInstruction::CreateBinary(sout, HloOpcode::kDot, x, reshape)); + HloInstruction::CreateDot(sout, x, reshape, dot_dnums)); EXPECT_EQ(dot->ToString(false, false), "%dot = f32[5,20]{1,0} dot(f32[5,10]{1,0} %x, f32[10,20]{1,0} " - "%transpose)"); + "%transpose), lhs_contracting_dims=1,rhs_contracting_dims=0"); HloModule module(TestName()); auto* computation = module.AddEntryComputation(builder.Build()); diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index 2c09d2defb..ea7775b18a 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -75,7 +75,11 @@ class ShapeVerifier : public DfsHloVisitor { } Status HandleDot(HloInstruction* dot) override { - return CheckBinaryShape(dot); + TF_ASSIGN_OR_RETURN(const Shape expected, + ShapeInference::InferDotOpShape( + dot->operand(0)->shape(), dot->operand(1)->shape(), + dot->dot_dimension_numbers())); + return CheckShape(dot, expected); } Status HandleConvolution(HloInstruction* convolution) override { diff --git a/tensorflow/compiler/xla/service/liveness_util_test.cc b/tensorflow/compiler/xla/service/liveness_util_test.cc index 476e86fa72..2c2a02f637 100644 --- a/tensorflow/compiler/xla/service/liveness_util_test.cc +++ b/tensorflow/compiler/xla/service/liveness_util_test.cc @@ -277,8 +277,11 @@ TEST_F(CanShareOperandBufferWithUserTest, FusedDotAdd) { auto b = builder.AddInstruction(HloInstruction::CreateConstant( Literal::CreateR2({{2.0, 2.0}, {2.0, 2.0}}))); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); auto dot = builder.AddInstruction( - HloInstruction::CreateBinary(data_shape, HloOpcode::kDot, a, b)); + HloInstruction::CreateDot(data_shape, a, b, dot_dnums)); auto one = builder.AddInstruction( HloInstruction::CreateConstant(Literal::CreateR0(1.0))); @@ -312,8 +315,11 @@ TEST_F(CanShareOperandBufferWithUserTest, FusedTransposeDotAdd) { auto b_t = builder.AddInstruction( HloInstruction::CreateTranspose(data_shape, b, {1, 0})); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); auto dot = builder.AddInstruction( - HloInstruction::CreateBinary(data_shape, HloOpcode::kDot, a, b_t)); + HloInstruction::CreateDot(data_shape, a, b_t, dot_dnums)); auto one = builder.AddInstruction( HloInstruction::CreateConstant(Literal::CreateR0(1.0))); diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index d997cab83f..fa62080be4 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -1381,6 +1381,9 @@ tensorflow::Status Service::Op(const OpRequest* arg, OpResponse* result) { handle_status = computation->AddCustomCallInstruction(arg->custom_call_request()); break; + case OpRequest::kDotRequest: + handle_status = computation->AddDotInstruction(arg->dot_request()); + break; case OpRequest::kDynamicSliceRequest: handle_status = computation->AddDynamicSliceInstruction(arg->dynamic_slice_request()); diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 3df1911d07..7178eb40dd 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/lib/math/math_util.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" @@ -90,8 +91,6 @@ BinaryOperation OpcodeToBinaryOperation(HloOpcode opcode) { return BINOP_ATAN2; case HloOpcode::kComplex: return BINOP_COMPLEX; - case HloOpcode::kDot: - return BINOP_DOT; case HloOpcode::kMultiply: return BINOP_MUL; case HloOpcode::kAdd: @@ -549,8 +548,98 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, return ShapeUtil::MakeShape(operand_shape.element_type(), dimensions); } -/* static */ StatusOr ShapeInference::InferDotOpShape(const Shape& lhs, - const Shape& rhs) { +// Current DotDimensionNumbers Requirements: +// +// Contracting Dimensions: +// *) Exactly one contracting dimension on both lhs and rhs. +// *) Contracting dimension size must be the same on both lhs and rhs. +// *) Contracting dimension numbers do not need to be the same (i.e. transposes +// are passed on to emitter implementations). +// +// Batch Dimensions: +// *) Same number of batch dimensions on both lhs and rhs. +// *) Same batch dimension numbers (and sizes) on both lhs and rhs. +// +// Non-Contracting-Non-Batch Dimensions: +// *) Can be 0 (matrix-vector) or 1 (matrix-matrix). +// + +namespace { + +Status ValidateDotDimensionNumbers( + const Shape& lhs, const Shape& rhs, + const DotDimensionNumbers& dimension_numbers) { + // Check that dimension numbers are in range. + auto dims_in_range = + [](const int64 rank, tensorflow::gtl::ArraySlice contracting_dims, + tensorflow::gtl::ArraySlice batch_dims) -> bool { + auto in_range = [&rank](int64 i) -> bool { return 0 <= i && i < rank; }; + return std::all_of(contracting_dims.begin(), contracting_dims.end(), + in_range) && + std::all_of(batch_dims.begin(), batch_dims.end(), in_range); + }; + + tensorflow::gtl::ArraySlice lhs_contracting_dimensions = + AsInt64Slice(dimension_numbers.lhs_contracting_dimensions()); + tensorflow::gtl::ArraySlice rhs_contracting_dimensions = + AsInt64Slice(dimension_numbers.rhs_contracting_dimensions()); + tensorflow::gtl::ArraySlice lhs_batch_dimensions = + AsInt64Slice(dimension_numbers.lhs_batch_dimensions()); + tensorflow::gtl::ArraySlice rhs_batch_dimensions = + AsInt64Slice(dimension_numbers.rhs_batch_dimensions()); + + if (!dims_in_range(ShapeUtil::Rank(lhs), lhs_contracting_dimensions, + lhs_batch_dimensions) || + !dims_in_range(ShapeUtil::Rank(rhs), rhs_contracting_dimensions, + rhs_batch_dimensions)) { + return InvalidArgument("A dimension number is out of range in dot: %s", + dimension_numbers.DebugString().c_str()); + } + + // Check that dimension numbers are unique. + auto dims_unique = [](tensorflow::gtl::ArraySlice contracting_dims, + tensorflow::gtl::ArraySlice batch_dims) -> bool { + tensorflow::gtl::FlatSet dim_set; + auto is_unique = [&dim_set](int64 i) -> bool { + return dim_set.insert(i).second; + }; + return std::all_of(contracting_dims.begin(), contracting_dims.end(), + is_unique) && + std::all_of(batch_dims.begin(), batch_dims.end(), is_unique); + }; + + if (!dims_unique(lhs_contracting_dimensions, lhs_batch_dimensions) || + !dims_unique(rhs_contracting_dimensions, rhs_batch_dimensions)) { + return InvalidArgument("A dimension number is not unique in dot: %s", + dimension_numbers.DebugString().c_str()); + } + + // Check that the count of non-contracting-non-batch dimensions is in {0, 1}. + const int64 lhs_non_contracting_non_batch_dims = + ShapeUtil::Rank(lhs) - + dimension_numbers.lhs_contracting_dimensions_size() - + dimension_numbers.lhs_batch_dimensions_size(); + const int64 rhs_non_contracting_non_batch_dims = + ShapeUtil::Rank(rhs) - + dimension_numbers.rhs_contracting_dimensions_size() - + dimension_numbers.rhs_batch_dimensions_size(); + if (lhs_non_contracting_non_batch_dims < 0 || + lhs_non_contracting_non_batch_dims > 1 || + rhs_non_contracting_non_batch_dims < 0 || + rhs_non_contracting_non_batch_dims > 1) { + return InvalidArgument( + "batch and contracting dimension number mismatch " + "with rank "); + } + + return Status::OK(); +} + +} // namespace + +/* static */ StatusOr ShapeInference::InferDotOpShape( + const Shape& lhs, const Shape& rhs, + const DotDimensionNumbers& dimension_numbers) { TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(lhs, "lhs of dot")); TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(rhs, "rhs of dot")); @@ -570,37 +659,62 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, return fail("element types do not match"); } - if (ShapeUtil::Rank(lhs) < 1 || ShapeUtil::Rank(lhs) > 2 || - ShapeUtil::Rank(rhs) < 1 || ShapeUtil::Rank(rhs) > 2) { - return fail("dot only supports rank 1 or 2"); + if ((ShapeUtil::Rank(lhs) < 1) || (ShapeUtil::Rank(rhs) < 1)) { + return fail("dot only supports rank 1 or above."); } - // Determine the index of the contracted dimensions for input tensors. - // dimensions -1 of lhs and dimension 0 of rhs are contracted. - int64 lhs_contracted_dimension = ShapeUtil::GetDimensionNumber(lhs, -1); - int64 rhs_contracted_dimension = 0; + // Validate basic properties of dot dimension numbers. + TF_RETURN_IF_ERROR(ValidateDotDimensionNumbers(lhs, rhs, dimension_numbers)); + + // Check that there is only one contracting dimension for both lhs and rhs. + if (dimension_numbers.lhs_contracting_dimensions_size() != + dimension_numbers.rhs_contracting_dimensions_size() || + dimension_numbers.lhs_contracting_dimensions_size() != 1) { + return fail("must specify one contracting dimension for both lhs and rhs."); + } - // Check if the contracted dimension sizes are the same. - if ((lhs_contracted_dimension < ShapeUtil::Rank(lhs) && - rhs_contracted_dimension < ShapeUtil::Rank(rhs)) && - lhs.dimensions(lhs_contracted_dimension) != - rhs.dimensions(rhs_contracted_dimension)) { - return fail("contracted dimensions mismatch"); + // Check that contracting dimension sizes match. + const int64 lhs_contracting_dimension = + dimension_numbers.lhs_contracting_dimensions(0); + const int64 rhs_contracting_dimension = + dimension_numbers.rhs_contracting_dimensions(0); + if (lhs.dimensions(lhs_contracting_dimension) != + rhs.dimensions(rhs_contracting_dimension)) { + return fail("contracting dimension sizes do not match."); + } + + // Check that number of batch dimensions match. + if (dimension_numbers.lhs_batch_dimensions_size() != + dimension_numbers.rhs_batch_dimensions_size()) { + return fail("must the same number of batch dimensions for lhs and rhs."); + } + + // Check that batch dimension numbers and sizes match. + for (int64 i = 0; i < dimension_numbers.lhs_batch_dimensions_size(); ++i) { + if (dimension_numbers.lhs_batch_dimensions(i) != + dimension_numbers.rhs_batch_dimensions(i) || + lhs.dimensions(dimension_numbers.lhs_batch_dimensions(i)) != + rhs.dimensions(dimension_numbers.rhs_batch_dimensions(i))) { + return fail("batch dimension numbers and sizes must match for lhs/rhs."); + } } // The ranks of lhs and rhs are decremented by 1 respectively due to the // contraction, and added for the rank of the result. When an input tensor is // a scalar, its contribution to the rank of the result is 0. // Generate the result dimensions in order, rhs dimensions followed by lhs - // dimensions except the contracted dimensions. + // dimensions except the contracted and batch dimensions. std::vector dimensions; + std::unordered_set rhs_batch_dims( + dimension_numbers.rhs_batch_dimensions().begin(), + dimension_numbers.rhs_batch_dimensions().end()); for (int64 i = 0; i < ShapeUtil::Rank(lhs); i++) { - if (i != lhs_contracted_dimension) { + if (i != lhs_contracting_dimension) { dimensions.push_back(lhs.dimensions(i)); } } for (int64 i = 0; i < ShapeUtil::Rank(rhs); i++) { - if (i != rhs_contracted_dimension) { + if (i != rhs_contracting_dimension && rhs_batch_dims.count(i) == 0) { dimensions.push_back(rhs.dimensions(i)); } } @@ -816,8 +930,6 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( rhs, tensorflow::strings::StrCat("rhs of binary operation ", BinaryOperation_Name(operation)))); switch (operation) { - case BINOP_DOT: - return InferDotOpShape(lhs, rhs); case BINOP_MAX: case BINOP_MIN: case BINOP_SUB: diff --git a/tensorflow/compiler/xla/service/shape_inference.h b/tensorflow/compiler/xla/service/shape_inference.h index 0aadb98a40..382c4f8abc 100644 --- a/tensorflow/compiler/xla/service/shape_inference.h +++ b/tensorflow/compiler/xla/service/shape_inference.h @@ -229,11 +229,13 @@ class ShapeInference { tensorflow::gtl::ArraySlice arg_shapes, const ProgramShape& to_apply); - private: // Helper that infers the shape produced by performing a dot operation with // the given LHS and RHS shapes. - static StatusOr InferDotOpShape(const Shape& lhs, const Shape& rhs); + static StatusOr InferDotOpShape( + const Shape& lhs, const Shape& rhs, + const DotDimensionNumbers& dimension_numbers); + private: // Helper that infers the shape produced by performing an element-wise binary // operation with the given LHS and RHS shapes. // Note: By "element-wise" we mean operations that look at a single element in diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc index be93c879c0..6e53d2d609 100644 --- a/tensorflow/compiler/xla/service/shape_inference_test.cc +++ b/tensorflow/compiler/xla/service/shape_inference_test.cc @@ -898,8 +898,11 @@ TEST_F(ShapeInferenceTest, BroadcastScalar) { // scalar vector: error TEST_F(ShapeInferenceTest, ScalarDotVector) { + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); auto inferred_status = - ShapeInference::InferBinaryOpShape(BINOP_DOT, f32_, vector_32_, {}); + ShapeInference::InferDotOpShape(f32_, vector_32_, dot_dnums); ASSERT_FALSE(inferred_status.ok()); ASSERT_THAT(inferred_status.status().error_message(), HasSubstr("dot only supports rank")); @@ -907,61 +910,199 @@ TEST_F(ShapeInferenceTest, ScalarDotVector) { // 3D 2D: error TEST_F(ShapeInferenceTest, DotWithRankHigherThanTwo) { - auto inferred_status = ShapeInference::InferBinaryOpShape( - BINOP_DOT, ShapeUtil::MakeShape(F32, {32, 32, 32}), matrix_32_64_, {}); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); + auto inferred_status = ShapeInference::InferDotOpShape( + ShapeUtil::MakeShape(F32, {32, 32, 32}), matrix_32_64_, dot_dnums); ASSERT_FALSE(inferred_status.ok()); ASSERT_THAT(inferred_status.status().error_message(), - HasSubstr("dot only supports rank")); + HasSubstr("batch and contracting dimension number mismatch")); } // vector vector -> scalar TEST_F(ShapeInferenceTest, VectorDotVector) { + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(0); + dot_dnums.add_rhs_contracting_dimensions(0); auto inferred_status = - ShapeInference::InferBinaryOpShape(BINOP_DOT, vector_64_, vector_64_, {}); + ShapeInference::InferDotOpShape(vector_64_, vector_64_, dot_dnums); ASSERT_IS_OK(inferred_status.status()); ASSERT_TRUE(ShapeUtil::Equal(f32_, inferred_status.ValueOrDie())); auto inferred_status_mismatch = - ShapeInference::InferBinaryOpShape(BINOP_DOT, vector_64_, vector_32_, {}); + ShapeInference::InferDotOpShape(vector_64_, vector_32_, dot_dnums); ASSERT_FALSE(inferred_status_mismatch.ok()); } // matrix vector -> vector TEST_F(ShapeInferenceTest, MatrixDotVector) { - auto inferred_status = ShapeInference::InferBinaryOpShape( - BinaryOperation::BINOP_DOT, matrix_32_64_, vector_64_, {}); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); + auto inferred_status = + ShapeInference::InferDotOpShape(matrix_32_64_, vector_64_, dot_dnums); ASSERT_IS_OK(inferred_status.status()); ASSERT_TRUE(ShapeUtil::Equal(inferred_status.ValueOrDie(), vector_32_)); - auto inferred_status_mismatch = ShapeInference::InferBinaryOpShape( - BinaryOperation::BINOP_DOT, matrix_32_64_, vector_32_, {}); + auto inferred_status_mismatch = + ShapeInference::InferDotOpShape(matrix_32_64_, vector_32_, dot_dnums); ASSERT_FALSE(inferred_status_mismatch.ok()); } // vector matrix -> vector TEST_F(ShapeInferenceTest, VectorDotMatrix) { - auto inferred_status = ShapeInference::InferBinaryOpShape( - BinaryOperation::BINOP_DOT, vector_32_, matrix_32_64_, {}); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(0); + dot_dnums.add_rhs_contracting_dimensions(0); + auto inferred_status = + ShapeInference::InferDotOpShape(vector_32_, matrix_32_64_, dot_dnums); ASSERT_IS_OK(inferred_status.status()); ASSERT_TRUE(ShapeUtil::Equal(inferred_status.ValueOrDie(), vector_64_)); - auto inferred_status_mismatch = ShapeInference::InferBinaryOpShape( - BinaryOperation::BINOP_DOT, vector_64_, matrix_32_64_, {}); + auto inferred_status_mismatch = + ShapeInference::InferDotOpShape(vector_64_, matrix_32_64_, dot_dnums); ASSERT_FALSE(inferred_status_mismatch.ok()); } // matrix matrix -> matrix TEST_F(ShapeInferenceTest, MatrixDotMatrix) { - auto inferred_status_match = ShapeInference::InferBinaryOpShape( - BinaryOperation::BINOP_DOT, matrix_32_64_, matrix_64_48_, {}); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); + auto inferred_status_match = + ShapeInference::InferDotOpShape(matrix_32_64_, matrix_64_48_, dot_dnums); ASSERT_IS_OK(inferred_status_match.status()); ASSERT_TRUE( ShapeUtil::Equal(inferred_status_match.ValueOrDie(), matrix_32_48_)) << "inferred: " << ShapeUtil::HumanString(inferred_status_match.ValueOrDie()) << " expected: " << ShapeUtil::HumanString(matrix_64_48_); - auto inferred_status_mismatch = ShapeInference::InferBinaryOpShape( - BinaryOperation::BINOP_DOT, matrix_32_64_, matrix_32_64_, {}); + auto inferred_status_mismatch = + ShapeInference::InferDotOpShape(matrix_32_64_, matrix_32_64_, dot_dnums); ASSERT_FALSE(inferred_status_mismatch.ok()); } +// BatchMatMul with two batch dimensions and one contracting dimension. +TEST_F(ShapeInferenceTest, DotGeneral) { + Shape lhs_shape = ShapeUtil::MakeShape(F32, {5, 2, 11, 3}); + Shape rhs_shape = ShapeUtil::MakeShape(F32, {5, 2, 3, 14}); + Shape output_shape = ShapeUtil::MakeShape(F32, {5, 2, 11, 14}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(3); + dot_dnums.add_lhs_batch_dimensions(0); + dot_dnums.add_lhs_batch_dimensions(1); + + dot_dnums.add_rhs_contracting_dimensions(2); + dot_dnums.add_rhs_batch_dimensions(0); + dot_dnums.add_rhs_batch_dimensions(1); + + auto inferred_status_match = + ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums); + ASSERT_IS_OK(inferred_status_match.status()); + ASSERT_TRUE( + ShapeUtil::Equal(inferred_status_match.ValueOrDie(), output_shape)) + << "inferred: " + << ShapeUtil::HumanString(inferred_status_match.ValueOrDie()) + << " expected: " << ShapeUtil::HumanString(output_shape); +} + +// BatchMatMul with two contracting dimensions fails. +TEST_F(ShapeInferenceTest, DotWithTwoContractingDimsFails) { + Shape lhs_shape = ShapeUtil::MakeShape(F32, {2, 11, 3, 2}); + Shape rhs_shape = ShapeUtil::MakeShape(F32, {2, 3, 14}); + Shape output_shape = ShapeUtil::MakeShape(F32, {2, 11, 14}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(2); + dot_dnums.add_lhs_contracting_dimensions(3); + dot_dnums.add_lhs_batch_dimensions(0); + + dot_dnums.add_rhs_contracting_dimensions(1); + dot_dnums.add_rhs_batch_dimensions(0); + + auto inferred_status = + ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums); + ASSERT_FALSE(inferred_status.ok()); + ASSERT_THAT(inferred_status.status().error_message(), + HasSubstr("must specify one contracting dimension for both " + "lhs and rhs")); +} + +// BatchMatMul with different batch dimension sizes fails. +TEST_F(ShapeInferenceTest, DotWithMisatchedBatchDimSizesFails) { + Shape lhs_shape = ShapeUtil::MakeShape(F32, {2, 11, 3}); + Shape rhs_shape = ShapeUtil::MakeShape(F32, {3, 3, 14}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(2); + dot_dnums.add_lhs_batch_dimensions(0); + + dot_dnums.add_rhs_contracting_dimensions(1); + dot_dnums.add_rhs_batch_dimensions(0); + + auto inferred_status = + ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums); + ASSERT_FALSE(inferred_status.ok()); + ASSERT_THAT(inferred_status.status().error_message(), + HasSubstr("batch dimension numbers and sizes must match")); +} + +// BatchMatMul with different batch dimension numbers fails. +TEST_F(ShapeInferenceTest, DotWithMisatchedBatchDimNumbersFails) { + Shape lhs_shape = ShapeUtil::MakeShape(F32, {2, 11, 3}); + Shape rhs_shape = ShapeUtil::MakeShape(F32, {3, 2, 14}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(2); + dot_dnums.add_lhs_batch_dimensions(0); + + dot_dnums.add_rhs_contracting_dimensions(0); + dot_dnums.add_rhs_batch_dimensions(1); + + auto inferred_status = + ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums); + ASSERT_FALSE(inferred_status.ok()); + ASSERT_THAT(inferred_status.status().error_message(), + HasSubstr("batch dimension numbers and sizes must match")); +} + +// BatchMatMul with out-of-range dimension numbers fails. +TEST_F(ShapeInferenceTest, DotWithContractingDimNumberOutOfRange) { + Shape lhs_shape = ShapeUtil::MakeShape(F32, {2, 11, 3}); + Shape rhs_shape = ShapeUtil::MakeShape(F32, {2, 3, 14}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(3); + dot_dnums.add_lhs_batch_dimensions(0); + + dot_dnums.add_rhs_contracting_dimensions(0); + dot_dnums.add_rhs_batch_dimensions(1); + + auto inferred_status = + ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums); + ASSERT_FALSE(inferred_status.ok()); + ASSERT_THAT(inferred_status.status().error_message(), + HasSubstr("A dimension number is out of range")); +} + +// BatchMatMul with non-unique dimension numbers fails. +TEST_F(ShapeInferenceTest, DotWithContractingNonUniqueDimNumber) { + Shape lhs_shape = ShapeUtil::MakeShape(F32, {2, 11, 3}); + Shape rhs_shape = ShapeUtil::MakeShape(F32, {2, 3, 14}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(0); + dot_dnums.add_lhs_batch_dimensions(0); + + dot_dnums.add_rhs_contracting_dimensions(0); + dot_dnums.add_rhs_batch_dimensions(1); + + auto inferred_status = + ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums); + ASSERT_FALSE(inferred_status.ok()); + ASSERT_THAT(inferred_status.status().error_message(), + HasSubstr("A dimension number is not unique")); +} + TEST_F(ShapeInferenceTest, BinOpBroadcastMatrixVector) { // Test variations of broadcasting a vector for a binary add with a // matrix. diff --git a/tensorflow/compiler/xla/service/transpose_folding_test.cc b/tensorflow/compiler/xla/service/transpose_folding_test.cc index ba99852905..caa1a111ad 100644 --- a/tensorflow/compiler/xla/service/transpose_folding_test.cc +++ b/tensorflow/compiler/xla/service/transpose_folding_test.cc @@ -64,9 +64,12 @@ TEST_F(TransposeFoldingTest, FoldDotTranspose) { HloInstruction* transpose_y = builder.AddInstruction(HloInstruction::CreateTranspose( ShapeUtil::MakeShape(F32, {3, 2}), y, {1, 0})); - HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary( - ShapeUtil::MakeShape(F32, {2, 2}), /*opcode=*/HloOpcode::kDot, - /*lhs=*/x, /*rhs=*/transpose_y)); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); + HloInstruction* dot = builder.AddInstruction( + HloInstruction::CreateDot(ShapeUtil::MakeShape(F32, {2, 2}), /*lhs=*/x, + /*rhs=*/transpose_y, dot_dnums)); HloModule module("test_module"); HloComputation* entry_computation = @@ -104,9 +107,12 @@ TEST_F(TransposeFoldingTest, FoldDotTransposeConstant) { HloInstruction* transpose1 = builder.AddInstruction(HloInstruction::CreateTranspose( ShapeUtil::MakeShape(F32, {2, 3}), const1, {1, 0})); - HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary( - ShapeUtil::MakeShape(F32, {1, 3}), /*opcode=*/HloOpcode::kDot, - /*lhs=*/transpose0, /*rhs=*/transpose1)); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); + HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateDot( + ShapeUtil::MakeShape(F32, {1, 3}), + /*lhs=*/transpose0, /*rhs=*/transpose1, dot_dnums)); HloModule module("test_module"); HloComputation* entry_computation = @@ -169,9 +175,12 @@ TEST_F(TransposeFoldingTest, FoldDotTransposeInWhile) { HloInstruction* transpose_y = builder.AddInstruction(HloInstruction::CreateTranspose( ShapeUtil::MakeShape(F32, {3, 2}), y, {1, 0})); - HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary( - ShapeUtil::MakeShape(F32, {2, 2}), /*opcode=*/HloOpcode::kDot, - /*lhs=*/x, /*rhs=*/transpose_y)); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); + HloInstruction* dot = builder.AddInstruction( + HloInstruction::CreateDot(ShapeUtil::MakeShape(F32, {2, 2}), /*lhs=*/x, + /*rhs=*/transpose_y, dot_dnums)); HloModule module("test_module"); HloComputation* entry_computation = diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc index 4e90491b55..6d0d367981 100644 --- a/tensorflow/compiler/xla/service/user_computation.cc +++ b/tensorflow/compiler/xla/service/user_computation.cc @@ -88,8 +88,6 @@ HloOpcode BinaryOperationToHloOpcode(BinaryOperation binop) { return HloOpcode::kAtan2; case BINOP_COMPLEX: return HloOpcode::kComplex; - case BINOP_DOT: - return HloOpcode::kDot; case BINOP_MUL: return HloOpcode::kMultiply; case BINOP_ADD: @@ -1207,6 +1205,33 @@ StatusOr UserComputation::AddCustomCallInstruction( return handle; } +StatusOr UserComputation::AddDotInstruction( + const DotRequest& dot_request) { + tensorflow::mutex_lock lock(mutex_); + + TF_ASSIGN_OR_RETURN(const OperationRequest* lhs, + LookUpRequest(dot_request.lhs())); + TF_ASSIGN_OR_RETURN(const OperationRequest* rhs, + LookUpRequest(dot_request.rhs())); + + TF_ASSIGN_OR_RETURN(Shape shape, ShapeInference::InferDotOpShape( + lhs->output_shape(), rhs->output_shape(), + dot_request.dimension_numbers())); + + const ComputationDataHandle handle = CreateComputationDataHandle(); + + OperationRequest& request = + (*session_computation_.mutable_requests())[handle.handle()]; + *request.mutable_output_handle() = handle; + *request.mutable_output_shape() = shape; + *request.mutable_request()->mutable_dot_request() = dot_request; + + VLOG(1) << "AddDotInstruction (" << GetVersionedHandleInternal() + << "), data handle " << handle.handle() << ": " + << dot_request.ShortDebugString(); + return handle; +} + StatusOr UserComputation::AddUnaryInstruction( const UnaryOpRequest& unary_request) { tensorflow::mutex_lock lock(mutex_); @@ -1629,6 +1654,15 @@ void PureFunctionalVisitor(const SessionComputation& session_computation, break; } + case OpRequest::kDotRequest: { + const DotRequest& dot_request = request.request().dot_request(); + PureFunctionalVisitor(session_computation, dot_request.lhs(), + num_parameters, visited, is_functional); + PureFunctionalVisitor(session_computation, dot_request.rhs(), + num_parameters, visited, is_functional); + break; + } + case OpRequest::kSendRequest: { *is_functional = false; break; @@ -2453,6 +2487,13 @@ static void ForEachOperand( break; } + case OpRequest::kDotRequest: { + const DotRequest& dot_request = request.request().dot_request(); + apply(dot_request.rhs()); + apply(dot_request.lhs()); + break; + } + case OpRequest::kUnaryOpRequest: { const UnaryOpRequest& unary_op_request = request.request().unary_op_request(); @@ -2732,6 +2773,15 @@ void ComputationLowerer::Visit( break; } + case OpRequest::kDotRequest: { + const DotRequest& dot_request = request.request().dot_request(); + HloInstruction* lhs = lookup_instruction(dot_request.lhs()); + HloInstruction* rhs = lookup_instruction(dot_request.rhs()); + hlo_instruction = add_instruction(HloInstruction::CreateDot( + request.output_shape(), lhs, rhs, dot_request.dimension_numbers())); + break; + } + case OpRequest::kCrossReplicaSumRequest: { const CrossReplicaSumRequest& cross_replica_sum_request = request.request().cross_replica_sum_request(); @@ -3151,8 +3201,7 @@ void ComputationLowerer::Visit( lhs = (lhs == operand_to_broadcast) ? broadcasted_operand : lhs; rhs = (rhs == operand_to_broadcast) ? broadcasted_operand : rhs; } - if (debug_options_.xla_eliminate_hlo_implicit_broadcast() && - binary_op_request.binop() != BINOP_DOT) { + if (debug_options_.xla_eliminate_hlo_implicit_broadcast()) { if (!ShapeUtil::SameDimensions(request.output_shape(), lhs->shape())) { // lhs side is being implicitly broadcast. Change to explicit. lhs = diff --git a/tensorflow/compiler/xla/service/user_computation.h b/tensorflow/compiler/xla/service/user_computation.h index 317c631dca..b6686c3f1a 100644 --- a/tensorflow/compiler/xla/service/user_computation.h +++ b/tensorflow/compiler/xla/service/user_computation.h @@ -153,6 +153,10 @@ class UserComputation { StatusOr AddCustomCallInstruction( const CustomCallRequest& custom_call_request); + // Enqueues a dot instruction onto this user computation. + StatusOr AddDotInstruction( + const DotRequest& dot_request); + // Enqueues a broadcast instruction onto this user computation. StatusOr AddBroadcastInstruction( const BroadcastRequest& broadcast_request); diff --git a/tensorflow/compiler/xla/service/user_computation_test.cc b/tensorflow/compiler/xla/service/user_computation_test.cc index 5afaf226ae..e45673300b 100644 --- a/tensorflow/compiler/xla/service/user_computation_test.cc +++ b/tensorflow/compiler/xla/service/user_computation_test.cc @@ -334,50 +334,5 @@ TEST_F(UserComputationTest, EliminateDegenerateBroadcastAfterIndimBroadcast) { operands[1]->opcode() == HloOpcode::kBroadcast); } -TEST_F(UserComputationTest, SkipDotInEliminatingImplicitBroadcast) { - auto debug_options = DebugOptions(); - debug_options.set_xla_eliminate_hlo_implicit_broadcast(true); - - // %a = Param({1, 3}); - // %b = Param({3, 1}); - // %dot = Dot(%a, %b); - ComputationHandle handle; - handle.set_handle(123); - UserComputation computation("TheComputation", handle); - - ParameterRequest a_request; - *a_request.mutable_shape() = ShapeUtil::MakeShape(F32, {1, 3}); - a_request.set_name("a"); - a_request.set_parameter(0); - TF_ASSERT_OK_AND_ASSIGN(ComputationDataHandle a_handle, - computation.AddParameterInstruction(a_request)); - - ParameterRequest b_request; - *b_request.mutable_shape() = ShapeUtil::MakeShape(F32, {3, 1}); - b_request.set_name("b"); - b_request.set_parameter(1); - TF_ASSERT_OK_AND_ASSIGN(ComputationDataHandle b_handle, - computation.AddParameterInstruction(b_request)); - - BinaryOpRequest dot; - dot.set_binop(BINOP_DOT); - *dot.mutable_lhs() = a_handle; - *dot.mutable_rhs() = b_handle; - TF_ASSERT_OK(computation.AddBinaryInstruction(dot).status()); - - auto hlo_resolver = [](const VersionedComputationHandle& handle) { - return nullptr; - }; - VersionedComputationHandle latest_version = computation.GetVersionedHandle(); - - // Build the HLO computation. - TF_ASSERT_OK_AND_ASSIGN( - std::unique_ptr hlo_computation, - computation.BuildHloComputation(latest_version.version, hlo_resolver, - debug_options)); - - EXPECT_EQ(3, hlo_computation->instruction_count()); -} - } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index bfb04fd9f9..680d790b57 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -561,5 +561,25 @@ TEST_F(DotOperationTest, TransposeFolding) { } } +XLA_TEST_F(DotOperationTest, DotGeneralUnimplemented) { + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR3FromArray3D( + {{{1.0, 2.0}, {3.0, 4.0}}, {{5.0, 6.0}, {7.0, 8.0}}}); + auto rhs = builder.ConstantR3FromArray3D( + {{{1.0, 0.0}, {0.0, 1.0}}, {{0.0, 1.0}, {1.0, 0.0}}}); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(2); + dot_dnums.add_rhs_contracting_dimensions(1); + dot_dnums.add_lhs_batch_dimensions(0); + dot_dnums.add_rhs_batch_dimensions(0); + builder.DotGeneral(lhs, rhs, dot_dnums); + + auto status = Execute(&builder, {}).status(); + EXPECT_FALSE(status.ok()); + EXPECT_THAT( + status.error_message(), + ::testing::HasSubstr("Dot with batch dimensions not implemented.")); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc b/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc index 22d2b917a1..89fa6ed9f7 100644 --- a/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc +++ b/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc @@ -76,8 +76,11 @@ class MultiOutputFusionTest : public HloTestBase { elem_shape2, HloOpcode::kAdd, broadcast, param1)); HloInstruction* sub = builder.AddInstruction(HloInstruction::CreateBinary( elem_shape2, HloOpcode::kSubtract, param1, broadcast)); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); HloInstruction* dot = builder.AddInstruction( - HloInstruction::CreateBinary(elem_shape2, HloOpcode::kDot, sub, add2)); + HloInstruction::CreateDot(elem_shape2, sub, add2, dot_dnums)); auto computation = hlo_module->AddEntryComputation(builder.Build(dot)); if (manual_fusion) { @@ -133,8 +136,11 @@ class MultiOutputFusionTest : public HloTestBase { HloInstruction* reshape = builder.AddInstruction(HloInstruction::CreateReshape( ShapeUtil::MakeShape(F32, {size, 1}), add)); - HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary( - ShapeUtil::MakeShape(F32, {1}), HloOpcode::kDot, sub, reshape)); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(0); + dot_dnums.add_rhs_contracting_dimensions(0); + HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateDot( + ShapeUtil::MakeShape(F32, {1}), sub, reshape, dot_dnums)); auto computation = hlo_module->AddEntryComputation(builder.Build(dot)); if (manual_fusion) { diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index b560354050..7efdf8552e 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -498,6 +498,23 @@ message CustomCallRequest { Shape shape = 4; } +message DotDimensionNumbers { + // The dimension numbers that represent the 'lhs' contracting dimensions. + repeated int64 lhs_contracting_dimensions = 1; + // The dimension numbers that represent the 'rhs' contracting dimensions. + repeated int64 rhs_contracting_dimensions = 2; + // The dimension numbers that represent the 'lhs' batch dimensions. + repeated int64 lhs_batch_dimensions = 3; + // The dimension numbers that represent the 'rhs' batch dimensions. + repeated int64 rhs_batch_dimensions = 4; +}; + +message DotRequest { + ComputationDataHandle lhs = 2; + ComputationDataHandle rhs = 3; + DotDimensionNumbers dimension_numbers = 4; +} + message MapRequest { repeated ComputationDataHandle operands = 2; ComputationHandle to_apply = 3; @@ -732,9 +749,6 @@ enum BinaryOperation { BINOP_LT = 9; BINOP_NE = 10; - // Dot product, matrix multiply. - BINOP_DOT = 12; - // Element-wise maximum. BINOP_MAX = 14; @@ -885,6 +899,7 @@ message OpRequest { ConvolveRequest convolve_request = 8; CrossReplicaSumRequest cross_replica_sum_request = 9; CustomCallRequest custom_call_request = 10; + DotRequest dot_request = 43; DynamicSliceRequest dynamic_slice_request = 11; DynamicUpdateSliceRequest dynamic_update_slice_request = 12; GetTupleElementRequest get_tuple_element_request = 13; @@ -914,7 +929,7 @@ message OpRequest { BatchNormInferenceRequest batch_norm_inference_request = 38; FftRequest fft_request = 41; ConvertRequest bitcast_convert_request = 42; - // Next: 43 + // Next: 44 } } diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index 8831b3d0fd..4333f94486 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -511,6 +511,87 @@ contracted dimensions of `lhs` and `rhs` must be of the same size. In practice, it can be used to perform dot products between vectors, vector/matrix multiplications or matrix/matrix multiplications. +## DotGeneral + +See also +[`ComputationBuilder::DotGeneral`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h). + + `DotGeneral(lhs, rhs, dimension_numbers)` + +| Arguments | Type | Semantics +| --------- | ----------------------- | --------------- +| `lhs` | `ComputationDataHandle` | array of type T +| `rhs` | `ComputationDataHandle` | array of type T +| `dimension_numbers` | `DotDimensionNumbers` | array of type T + +As Dot, but allows contracting and batch dimension numbers to be specified for +both the 'lhs' and 'rhs'. + +| DotDimensionNumbers Fields | Type | Semantics +| --------- | ----------------------- | --------------- +| 'lhs_contracting_dimensions' | repeated int64 | 'lhs' contracting dimension numbers | +| 'rhs_contracting_dimensions' | repeated int64 | 'rhs' contracting dimension numbers | +| 'lhs_batch_dimensions' | repeated int64 | 'lhs' batch dimension numbers | +| 'rhs_batch_dimensions' | repeated int64 | 'rhs' batch dimension numbers | + +DotGeneral performs the sum of products over contracting dimensions specified +in 'dimension_numbers'. + +Associated contracting dimension numbers from the 'lhs' and 'rhs' do not need +to be the same, but must be listed in the same order in both +'lhs/rhs_contracting_dimensions' arrays and have the same dimension sizes. + +Example with contracting dimension numbers: + +``` +lhs = { {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0} } + +rhs = { {1.0, 1.0, 1.0}, + {2.0, 2.0, 2.0} } + +DotDimensionNumbers dnums; +dnums.add_lhs_contracting_dimensions(1); +dnums.add_rhs_contracting_dimensions(1); + +DotGeneral(lhs, rhs, dnums) -> { {6.0, 12.0}, + {15.0, 30.0} } +``` + +Associated batch dimension numbers from the 'lhs' and 'rhs' must have the same +dimension number, must be listed in the same order in both arrays, and must +have the same dimension sizes. + +Example with batch dimension numbers (batch size 2, 2x2 matrices): + +``` +lhs = { { {1.0, 2.0}, + {3.0, 4.0} }, + { {5.0, 6.0}, + {7.0, 8.0} } } + +rhs = { { {1.0, 0.0}, + {0.0, 1.0} }, + { {1.0, 0.0}, + {0.0, 1.0} } } + +DotDimensionNumbers dnums; +dnums.add_lhs_contracting_dimensions(2); +dnums.add_rhs_contracting_dimensions(1); +dnums.add_lhs_batch_dimensions(0); +dnums.add_rhs_batch_dimensions(0); + +DotGeneral(lhs, rhs, dnums) -> { { {1.0, 2.0}, + {3.0, 4.0} }, + { {5.0, 6.0}, + {7.0, 8.0} } } +``` + +| Input | Output | Semantics | +| ----------------------------------- | ----------------- | ---------------- | +| [b0, m, k] `dot` [b0, k, n] | [b0, m, n] | batch matmul | +| [b0, b1, m, k] `dot` [b0, b1, k, n] | [b0, b1, m, n] | batch matmul | + ## Element-wise binary arithmetic operations See also -- GitLab From eafa8efc55fb9989a679e36b030742c6d87b0310 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 30 Nov 2017 11:23:25 -0800 Subject: [PATCH 0999/1801] [XLA:CPU] Add Hlo profiling support to XlaJitCompiledCpuFunction Some of the functionality has bled into the generic XlaCompiledCpuFunction, but there still remains a fair amount of work to do before the AOT side of things start working. This CL also fixes a bug I introduced in a previous CL -- when I changed IrEmitter::hlo_to_profile_idx_ to a value, I changed the signature of the generated function to always have the "profile_counters" argument when the AOT client code expects the signature to not have that argument. In practice this wasn't an issue for the standard x86 calling convention, but it could easily have been problematic on other architectures and calling conventions. After this change the mismatch is no longer present. PiperOrigin-RevId: 177481998 --- tensorflow/compiler/aot/codegen.cc | 8 +-- tensorflow/compiler/aot/codegen_test_h.golden | 8 +-- .../compiler/aot/tests/tfcompile_test.cc | 4 +- .../tf2xla/xla_compiled_cpu_function.cc | 15 +++++- .../tf2xla/xla_compiled_cpu_function.h | 53 ++++++++++++++----- .../tf2xla/xla_jit_compiled_cpu_function.cc | 25 ++++----- .../compiler/xla/service/cpu/ir_function.cc | 2 +- .../xla/service/hlo_execution_profile.cc | 3 +- .../xla/service/hlo_profile_printer.h | 8 ++- 9 files changed, 82 insertions(+), 44 deletions(-) diff --git a/tensorflow/compiler/aot/codegen.cc b/tensorflow/compiler/aot/codegen.cc index ae22f7edc4..28ac40df18 100644 --- a/tensorflow/compiler/aot/codegen.cc +++ b/tensorflow/compiler/aot/codegen.cc @@ -418,7 +418,7 @@ namespace xla { class ExecutableRunOptions; } // (Implementation detail) Entry point to the function in the object file. extern "C" void {{ENTRY}}( void* result, const xla::ExecutableRunOptions* run_options, - const void** args, void** temps); + const void** args, void** temps, tensorflow::int64* profile_counters); {{NS_START}} // {{CLASS}} represents a computation previously specified in a @@ -483,7 +483,7 @@ class {{CLASS}} : public tensorflow::XlaCompiledCpuFunction { return *kStaticData; } - {{CLASS}}(AllocMode alloc_mode = AllocMode::ARGS_RESULTS_AND_TEMPS) + {{CLASS}}(AllocMode alloc_mode = AllocMode::ARGS_RESULTS_PROFILES_AND_TEMPS) : XlaCompiledCpuFunction(StaticData(), alloc_mode) {} {{CLASS}}(const {{CLASS}}&) = delete; @@ -496,8 +496,8 @@ class {{CLASS}} : public tensorflow::XlaCompiledCpuFunction { // void set_argN_data(void* data) // Sets the buffer of type T for positional argument N. May be called in // any AllocMode. Must be called before Run to have an affect. Must be - // called in AllocMode::RESULTS_AND_TEMPS_ONLY for each positional argument, - // to set the argument buffers. + // called in AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY for each positional + // argument, to set the argument buffers. // // T* argN_data() // Returns the buffer of type T for positional argument N. diff --git a/tensorflow/compiler/aot/codegen_test_h.golden b/tensorflow/compiler/aot/codegen_test_h.golden index 65f342ce27..cf01bee325 100644 --- a/tensorflow/compiler/aot/codegen_test_h.golden +++ b/tensorflow/compiler/aot/codegen_test_h.golden @@ -19,7 +19,7 @@ namespace xla { class ExecutableRunOptions; } // (Implementation detail) Entry point to the function in the object file. extern "C" void entry_point( void* result, const xla::ExecutableRunOptions* run_options, - const void** args, void** temps); + const void** args, void** temps, tensorflow::int64* profile_counters); namespace foo { namespace bar { @@ -86,7 +86,7 @@ class MyClass : public tensorflow::XlaCompiledCpuFunction { return *kStaticData; } - MyClass(AllocMode alloc_mode = AllocMode::ARGS_RESULTS_AND_TEMPS) + MyClass(AllocMode alloc_mode = AllocMode::ARGS_RESULTS_PROFILES_AND_TEMPS) : XlaCompiledCpuFunction(StaticData(), alloc_mode) {} MyClass(const MyClass&) = delete; @@ -99,8 +99,8 @@ class MyClass : public tensorflow::XlaCompiledCpuFunction { // void set_argN_data(void* data) // Sets the buffer of type T for positional argument N. May be called in // any AllocMode. Must be called before Run to have an affect. Must be - // called in AllocMode::RESULTS_AND_TEMPS_ONLY for each positional argument, - // to set the argument buffers. + // called in AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY for each positional + // argument, to set the argument buffers. // // T* argN_data() // Returns the buffer of type T for positional argument N. diff --git a/tensorflow/compiler/aot/tests/tfcompile_test.cc b/tensorflow/compiler/aot/tests/tfcompile_test.cc index 6b037f276a..413efd9cea 100644 --- a/tensorflow/compiler/aot/tests/tfcompile_test.cc +++ b/tensorflow/compiler/aot/tests/tfcompile_test.cc @@ -70,7 +70,7 @@ TEST(TFCompileTest, Add) { // Run tests that use set_argN_data separately, to avoid accidentally re-using // non-existent buffers. TEST(TFCompileTest, Add_SetArg) { - AddComp add(AddComp::AllocMode::RESULTS_AND_TEMPS_ONLY); + AddComp add(AddComp::AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY); int32 arg_x = 10; int32 arg_y = 32; @@ -258,7 +258,7 @@ TEST(TFCompileTest, MatMul2_SetArg) { Eigen::ThreadPoolDevice device(&tp, tp.NumThreads()); foo::bar::MatMulComp matmul( - foo::bar::MatMulComp::AllocMode::RESULTS_AND_TEMPS_ONLY); + foo::bar::MatMulComp::AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY); matmul.set_thread_pool(&device); // Test using the set_argN_data() methods. diff --git a/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.cc b/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.cc index b5c17c5273..43d0e17c2c 100644 --- a/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.cc +++ b/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.cc @@ -28,9 +28,10 @@ XlaCompiledCpuFunction::XlaCompiledCpuFunction(const StaticData& static_data, temps_(new void*[static_data.num_temps]), arg_names_(static_data.arg_names), result_names_(static_data.result_names), - program_shape_(static_data.program_shape) { + program_shape_(static_data.program_shape), + hlo_profile_printer_(static_data.hlo_profile_printer) { // Allocate arg and temp buffers. - if (alloc_mode == AllocMode::ARGS_RESULTS_AND_TEMPS) { + if (alloc_mode == AllocMode::ARGS_RESULTS_PROFILES_AND_TEMPS) { alloc_args_ = tensorflow::tfcompile::runtime::MallocContiguousBuffers( static_data.arg_sizes, static_data.num_args, args_, /*annotate_initialized=*/false); @@ -43,6 +44,15 @@ XlaCompiledCpuFunction::XlaCompiledCpuFunction(const StaticData& static_data, if (static_data.requires_runtime_context) { args_[static_data.num_args - 1] = &context_; } + + // If Hlo profiling is enabled the generated code expects an appropriately + // sized buffer to be passed in as the last argument. If Hlo profiling is + // disabled the last function argument is still present in the function + // signature, but it is ignored by the generated code and we pass in null for + // it. + if (hlo_profiling_enabled()) { + profile_counters_ = new int64[static_data.profile_counters_size](); + } } XlaCompiledCpuFunction::~XlaCompiledCpuFunction() { @@ -50,6 +60,7 @@ XlaCompiledCpuFunction::~XlaCompiledCpuFunction() { tensorflow::tfcompile::runtime::FreeContiguous(alloc_temps_); delete[] args_; delete[] temps_; + delete[] profile_counters_; } namespace { diff --git a/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.h b/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.h index f49a788922..3c4314d498 100644 --- a/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.h +++ b/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.h @@ -16,7 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_TF2XLA_XLA_COMPILED_CPU_FUNCTION_H_ #define TENSORFLOW_COMPILER_TF2XLA_XLA_COMPILED_CPU_FUNCTION_H_ -#include +#include #include #include "tensorflow/compiler/tf2xla/xla_local_runtime_context.h" @@ -27,6 +27,7 @@ limitations under the License. // never use this functionality. namespace xla { class ProgramShape; +class HloProfilePrinter; } namespace tensorflow { @@ -48,12 +49,10 @@ namespace tensorflow { class XlaCompiledCpuFunction { public: // Type of the raw function, produced by either JIT or AOT. - // - // TODO(toddw): Add support for hlo profiling, and replace std::function with - // a raw function pointer, for some codesize savings. - using RawFunction = std::function; + using RawFunction = void (*)(void* result, + const xla::ExecutableRunOptions* run_options, + const void** args, void** temps, + int64* profile_counters); // StaticData represents the state necessary to run an XLA-compiled // function. For JIT this is backed by data in XlaJitCompiledCpuFunction; for @@ -81,21 +80,29 @@ class XlaCompiledCpuFunction { // [Optional] Arg and result shapes. const xla::ProgramShape* program_shape = nullptr; + + // [Optional] Profile printer. Null if profiling is disabled. + const xla::HloProfilePrinter* hlo_profile_printer = nullptr; + + // [Optional] The number of profile counters expected in the profile counter + // buffer by the generated code and hlo_profile_printer. 0 if profiling is + // disabled. + int64 profile_counters_size = 0; }; // AllocMode controls the buffer allocation mode. enum class AllocMode { - // Allocate all buffers - args, results and temps. - ARGS_RESULTS_AND_TEMPS, + // Allocate all buffers - args, results, profile and temps. + ARGS_RESULTS_PROFILES_AND_TEMPS, - // Only allocate result and temp buffers. + // Only allocate result, profile and temp buffers. // Use set_arg_data to set argument buffers before Run is called. - RESULTS_AND_TEMPS_ONLY, + RESULTS_PROFILES_AND_TEMPS_ONLY, }; XlaCompiledCpuFunction( const StaticData& static_data, - AllocMode alloc_mode = AllocMode::ARGS_RESULTS_AND_TEMPS); + AllocMode alloc_mode = AllocMode::ARGS_RESULTS_PROFILES_AND_TEMPS); virtual ~XlaCompiledCpuFunction(); XlaCompiledCpuFunction(const XlaCompiledCpuFunction&) = delete; @@ -113,7 +120,7 @@ class XlaCompiledCpuFunction { context_.error = false; context_.error_msg.clear(); raw_function_(temps_[result_index_], &run_options_, - const_cast(args_), temps_); + const_cast(args_), temps_, profile_counters_); return !context_.error; } @@ -162,6 +169,16 @@ class XlaCompiledCpuFunction { return static_cast(temps_[result_index_]); } + // Profile counters for this XLA computation. + // + // When Hlo profiling is enabled (`hlo_profiling_enabled()` return true in + // this case) these counters are non-null and are automatically populated by + // `Run`. The counters can then be pretty-printed using + // `hlo_profile_printer()`. + // + // When Hlo profiling is disabled, this accessor returns null. + const int64* profile_counters() const { return profile_counters_; } + // Returns the buffer for the positional result at the given `index`. void* result_data(size_t index) { return results()[index]; } const void* result_data(size_t index) const { return results()[index]; } @@ -195,6 +212,12 @@ class XlaCompiledCpuFunction { // program shape isn't available. const xla::ProgramShape* ProgramShape() const { return program_shape_; } + bool hlo_profiling_enabled() const { return hlo_profile_printer_ != nullptr; } + const xla::HloProfilePrinter& hlo_profile_printer() const { + assert(hlo_profiling_enabled()); + return *hlo_profile_printer_; + } + private: const RawFunction raw_function_; const size_t result_index_; @@ -208,6 +231,9 @@ class XlaCompiledCpuFunction { void* alloc_args_ = nullptr; void* alloc_temps_ = nullptr; + // Backing memory for profiling counters. + int64* profile_counters_ = nullptr; + // Options and context passed to the compiled function. xla::ExecutableRunOptions run_options_; tensorflow::XlaLocalRuntimeContext context_; @@ -216,6 +242,7 @@ class XlaCompiledCpuFunction { const char** arg_names_ = nullptr; const char** result_names_ = nullptr; const xla::ProgramShape* program_shape_ = nullptr; + const xla::HloProfilePrinter* hlo_profile_printer_ = nullptr; }; } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/xla_jit_compiled_cpu_function.cc b/tensorflow/compiler/tf2xla/xla_jit_compiled_cpu_function.cc index 1dd454ea8d..f727f20464 100644 --- a/tensorflow/compiler/tf2xla/xla_jit_compiled_cpu_function.cc +++ b/tensorflow/compiler/tf2xla/xla_jit_compiled_cpu_function.cc @@ -90,21 +90,6 @@ xla::StatusOr ComputeResultIndex( return result_slice.index(); } -// Adapt ComputeFunctionType, which includes a final profile_counters arg, to -// RawFunction, which doesn't include that final arg. -// -// TODO(toddw): Change RawFunction and AOT to also pass the final -// profile_counters arg, and remove this adapter. -XlaCompiledCpuFunction::RawFunction RawFunctionAdapter( - xla::cpu::CpuExecutable::ComputeFunctionType compute_function) { - return [compute_function](void* result, - const xla::ExecutableRunOptions* run_options, - const void** args, void** temps) { - return compute_function(result, run_options, args, temps, - /*profile_counters=*/nullptr); - }; -} - // Collect names from `entries`, where T is one of tf2xla::{Feed,Fetch}. We hold // the actual strings in nonempty_names, and hold arrays of pointers in // name_ptrs, terminated by a nullptr entry. @@ -177,7 +162,7 @@ XlaJitCompiledCpuFunction::Compile( const xla::cpu::CpuExecutable* cpu_executable = static_cast(executable->executable()); XlaCompiledCpuFunction::RawFunction raw_function = - RawFunctionAdapter(cpu_executable->compute_function()); + cpu_executable->compute_function(); const xla::BufferAssignment& buffer_assignment = cpu_executable->buffer_assignment(); @@ -211,6 +196,14 @@ XlaJitCompiledCpuFunction::Compile( jit->static_data_.arg_names = jit->arg_names_.data(); jit->static_data_.result_names = jit->result_names_.data(); jit->static_data_.program_shape = jit->program_shape_.get(); + + if (cpu_executable->hlo_profiling_enabled()) { + jit->static_data_.hlo_profile_printer = + &cpu_executable->hlo_profile_printer(); + jit->static_data_.profile_counters_size = + cpu_executable->hlo_profile_printer().profile_counters_size(); + } + return std::move(jit_unique_ptr); } diff --git a/tensorflow/compiler/xla/service/cpu/ir_function.cc b/tensorflow/compiler/xla/service/cpu/ir_function.cc index fa88627156..701bce2cbf 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_function.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_function.cc @@ -99,7 +99,7 @@ void IrFunction::Initialize(const string& function_name, // // /---------------------------------------------\ // prof counters -> | counter 0 | counter 1 | ..... | counter N-1 | - // (elided for aot) \---------------------------------------------/ + // \---------------------------------------------/ // Even though the type of params and temps is void** in the host's view, in // LLVM IR this is represented by i8*, similarly to void*. It's up to the code diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc index ba75e2ef1b..0809fe780d 100644 --- a/tensorflow/compiler/xla/service/hlo_execution_profile.cc +++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc @@ -109,7 +109,8 @@ std::unique_ptr CreateHloProfilePrinter( }; return MakeUnique( - computation_infos, hlo_profile_index_map.computation_count(), deleter); + computation_infos, hlo_profile_index_map.computation_count(), + /*profile_counters_size=*/max_profile_index, deleter); } HloExecutionProfile::HloExecutionProfile( diff --git a/tensorflow/compiler/xla/service/hlo_profile_printer.h b/tensorflow/compiler/xla/service/hlo_profile_printer.h index 316753a82a..2f056490ae 100644 --- a/tensorflow/compiler/xla/service/hlo_profile_printer.h +++ b/tensorflow/compiler/xla/service/hlo_profile_printer.h @@ -65,9 +65,11 @@ class HloProfilePrinter { HloProfilePrinter( HloComputationInfo* computation_infos, int64 computation_infos_size, + int64 profile_counters_size, std::function deleter = nullptr) : computation_infos_(computation_infos), computation_infos_size_(computation_infos_size), + profile_counters_size_(profile_counters_size), deleter_(std::move(deleter)) {} HloProfilePrinter(HloProfilePrinter&& other) { @@ -79,10 +81,13 @@ class HloProfilePrinter { HloProfilePrinter(const HloProfilePrinter&) = delete; HloProfilePrinter& operator=(const HloProfilePrinter&) = delete; - // Convert the profile counter sequence `counters` to a human readable string + // Converts the profile counter sequence `counters` to a human readable string // representation. string ToString(const int64* counters, double clock_rate_ghz) const; + // Returns the size of the profile buffer expected by this printer. + int64 profile_counters_size() const { return profile_counters_size_; } + ~HloProfilePrinter(); private: @@ -90,6 +95,7 @@ class HloProfilePrinter { // is manifested as the deleter_ function. HloComputationInfo* computation_infos_ = nullptr; int64 computation_infos_size_ = 0; + int64 profile_counters_size_ = 0; std::function deleter_; }; } // namespace xla -- GitLab From af36437e3937e6e532579e9c42d7f45353b88990 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 30 Nov 2017 12:29:06 -0800 Subject: [PATCH 1000/1801] Add rules to replace nodes corresponding to operations with the neutral and absorbing elements for addition and multiplication with Identity. Get rid of a gratuitous copy of the entire graph in the main optimizer loop. PiperOrigin-RevId: 177491247 --- tensorflow/core/grappler/optimizers/BUILD | 1 + .../grappler/optimizers/constant_folding.cc | 274 ++++++++++++++++-- .../grappler/optimizers/constant_folding.h | 9 +- .../optimizers/constant_folding_test.cc | 96 ++++++ 4 files changed, 359 insertions(+), 21 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 24e6f8847a..7b4ed10e7e 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -96,6 +96,7 @@ cc_library( ":graph_optimizer", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:op_types", diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index b5172a4833..cf913d6f48 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -30,13 +30,16 @@ limitations under the License. #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/tensor_coding.h" #include "tensorflow/core/public/version.h" #include "tensorflow/core/util/bcast.h" +#include "tensorflow/core/util/saved_tensor_slice_util.h" namespace tensorflow { namespace grappler { @@ -95,7 +98,38 @@ class DeviceSimple : public DeviceBase { std::unique_ptr eigen_device_; }; +template +bool AllValuesAre(const TensorProto& tensor, const T& value) { + // TensorProto represents the content of the tensor in either _val or + // tensor_content. + typename checkpoint::SaveTypeTraits::RepeatedField* tensor_values = + checkpoint::MutableTensorProtoData(const_cast(&tensor)); + if (!tensor_values->empty()) { + for (const T& tensor_value : *tensor_values) { + if (tensor_value != value) { + return false; + } + } + return true; + } + const auto tensor_content_size = tensor.tensor_content().size(); + if (tensor_content_size > 0) { + CHECK_EQ(0, tensor_content_size % sizeof(T)); + std::vector raw_values(tensor_content_size / sizeof(T)); + port::CopyToArray(tensor.tensor_content(), + reinterpret_cast(raw_values.data())); + for (int i = 0; i < tensor_content_size / sizeof(T); ++i) { + if (raw_values[i] != value) { + return false; + } + } + return true; + } + return false; +} + } // namespace + ConstantFolding::ConstantFolding(RewriterConfig::Toggle opt_level, DeviceBase* cpu_device) : opt_level_(opt_level), cpu_device_(cpu_device) { @@ -202,9 +236,9 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { // We may add some nodes to the graph to encode control dependencies: there is // no need to process these, so only iterate over the nodes of the input // graph. - const int node_count = graph_.node_size(); + const int node_count = graph_->node_size(); for (int i = 0; i < node_count; ++i) { - NodeDef& node = *graph_.mutable_node(i); + NodeDef& node = *graph_->mutable_node(i); const string op = node.op(); if (op != "Shape" && op != "Size" && op != "Rank" && op != "ShapeN") { continue; @@ -248,7 +282,7 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { // cases where the shape/rank/size would have been run in // the original graph. Additional inputs are extra control string ctrl_dep = - AddControlDependency(node.input(0), &graph_, node_map_.get()); + AddControlDependency(node.input(0), graph_, node_map_.get()); node.set_input(0, ctrl_dep); node_map_->AddOutput(NodeName(ctrl_dep), node.name()); } else { @@ -263,7 +297,7 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { AddPrefixToNodeName(strings::StrCat(node.name(), "-", j), kConstantFoldingConst); if (node_map_->GetNode(const_name) == nullptr) { - NodeDef* added_node = graph_.add_node(); + NodeDef* added_node = graph_->add_node(); added_node->set_name(const_name); added_node->set_op("Const"); added_node->set_device(node.device()); @@ -274,7 +308,7 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { // We add a control dependency to the original ShapeN node, // so that the node will only be run if all inputs of the // original ShapeN node are run. - string ctrl_dep = AddControlDependency(node.name(), &graph_, + string ctrl_dep = AddControlDependency(node.name(), graph_, node_map_.get()); *added_node->add_input() = ctrl_dep; node_map_->AddOutput(NodeName(ctrl_dep), added_node->name()); @@ -293,6 +327,25 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { } namespace { +bool ShapesEqual(const TensorShapeProto& shape1, + const TensorShapeProto& shape2) { + if (shape1.unknown_rank() || shape2.unknown_rank()) { + return false; + } + if (shape1.dim_size() != shape2.dim_size()) { + return false; + } + for (int i = 0; i < shape1.dim_size(); ++i) { + if (shape1.dim(i).size() != shape2.dim(i).size()) { + return false; + } + if (shape1.dim(i).size() == -1 || shape2.dim(i).size() == -1) { + return false; + } + } + return true; +} + bool ExtractShape(const NodeDef& shape_node, const GraphProperties& properties, BCast::Vec* shape, int64* min_id) { if (shape_node.op() == "Shape") { @@ -383,13 +436,13 @@ Status ConstantFolding::MaterializeBroadcastGradientArgs( strings::StrCat(node.name(), "-", j), kConstantFoldingConst); out[j] = node_map_->GetNode(const_name); if (out[j] == nullptr) { - out[j] = graph_.add_node(); + out[j] = graph_->add_node(); Tensor value(type, TensorShape({0})); *out[j] = CreateNodeDef(const_name, TensorValue(&value)); out[j]->set_device(node.device()); node_map_->AddNode(const_name, out[j]); string ctrl_dep = - AddControlDependency(node.name(), &graph_, node_map_.get()); + AddControlDependency(node.name(), graph_, node_map_.get()); *out[j]->add_input() = ctrl_dep; node_map_->AddOutput(NodeName(ctrl_dep), const_name); } @@ -470,7 +523,7 @@ Status ConstantFolding::MaterializeReductionIndices( if (node_map_->GetNode(const_name)) { return Status::OK(); } - NodeDef* reduction_indices = graph_.add_node(); + NodeDef* reduction_indices = graph_->add_node(); Tensor value(dtype, TensorShape({rank})); for (int i = 0; i < rank; ++i) { if (dtype == DT_INT32) { @@ -482,7 +535,7 @@ Status ConstantFolding::MaterializeReductionIndices( *reduction_indices = CreateNodeDef(const_name, TensorValue(&value)); reduction_indices->set_device(node->device()); string ctrl_dep = - AddControlDependency(node->input(1), &graph_, node_map_.get()); + AddControlDependency(node->input(1), graph_, node_map_.get()); *reduction_indices->add_input() = ctrl_dep; node_map_->AddNode(const_name, reduction_indices); node_map_->AddOutput(NodeName(ctrl_dep), const_name); @@ -496,9 +549,9 @@ Status ConstantFolding::MaterializeReductionIndices( Status ConstantFolding::MaterializeConstants( const GraphProperties& properties) { - const int node_count = graph_.node_size(); + const int node_count = graph_->node_size(); for (int i = 0; i < node_count; ++i) { - NodeDef& node = *graph_.mutable_node(i); + NodeDef& node = *graph_->mutable_node(i); const string& op = node.op(); if (op == "BroadcastGradientArgs") { TF_RETURN_IF_ERROR(MaterializeBroadcastGradientArgs(node, properties)); @@ -602,6 +655,32 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const { return true; } +namespace { + +#define SET_TENSOR_VAL_CASE(DTYPE, TYPE) \ + case DTYPE: \ + t->add_##TYPE##_val(static_cast(value)); \ + break; + +Status CreateConstantTensorAttrValue(DataType type, double value, + const TensorShapeProto& shape, + AttrValue* attr_tensor) { + TensorProto* t = attr_tensor->mutable_tensor(); + *t->mutable_tensor_shape() = shape; + switch (type) { + SET_TENSOR_VAL_CASE(DT_FLOAT, float); + SET_TENSOR_VAL_CASE(DT_DOUBLE, double); + SET_TENSOR_VAL_CASE(DT_INT64, int64); + SET_TENSOR_VAL_CASE(DT_INT32, int); + default: + return errors::InvalidArgument("Unsupported type: ", type); + } + return Status::OK(); +} + +#undef SET_TENSOR_CAL_CASE +} // namespace + // static NodeDef ConstantFolding::CreateNodeDef(const string& name, const TensorValue& tensor) { @@ -945,8 +1024,8 @@ Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output_graph) { Status ConstantFolding::FoldGraph(GraphDef* output) { std::unordered_set processed_nodes; std::deque queue; - for (int i = 0; i < graph_.node_size(); i++) { - auto node = graph_.mutable_node(i); + for (int i = 0; i < graph_->node_size(); i++) { + auto node = graph_->mutable_node(i); if (IsFoldable(*node)) { queue.push_back(node); } @@ -985,7 +1064,7 @@ Status ConstantFolding::FoldGraph(GraphDef* output) { output->mutable_node()->DeleteSubrange(last + 1, output->node_size() - last - 1); - for (const auto& node : graph_.node()) { + for (const auto& node : graph_->node()) { // If no fetch nodes is provided, we conservatively // keep all nodes in the original graph in case users need to fetch // their values. @@ -1080,6 +1159,104 @@ bool ConstantFolding::IsSimplifiableReshape( return shape.IsCompatibleWith(new_dims); } +#define IS_VALUE_CASE(DTYPE, VALUE) \ + case DTYPE: \ + return AllValuesAre::Type>( \ + node.attr().at("value").tensor(), EnumToDataType::Type(VALUE)) + +#define IS_ONES_CASE(TYPE) IS_VALUE_CASE(TYPE, 1) +#define IS_ZEROS_CASE(TYPE) IS_VALUE_CASE(TYPE, 0) + +bool ConstantFolding::IsOnes(const NodeDef& node) const { + if (feed_nodes_.find(node.name()) != feed_nodes_.end()) { + return false; + } + if (node.op() == "OnesLike") { + return true; + } + if (node.op() != "Const") { + return false; + } + const auto dtype = node.attr().at("dtype").type(); + switch (dtype) { + // IS_ONES_CASE(DT_HALF); + IS_ONES_CASE(DT_FLOAT); + IS_ONES_CASE(DT_DOUBLE); + IS_ONES_CASE(DT_UINT8); + IS_ONES_CASE(DT_INT8); + IS_ONES_CASE(DT_UINT16); + IS_ONES_CASE(DT_INT16); + IS_ONES_CASE(DT_INT32); + IS_ONES_CASE(DT_INT64); + IS_ONES_CASE(DT_COMPLEX64); + IS_ONES_CASE(DT_COMPLEX128); + default: + LOG(ERROR) << "Unexpected type " << DataTypeString(dtype); + return false; + } + return false; +} + +bool ConstantFolding::IsZeros(const NodeDef& node) const { + if (feed_nodes_.find(node.name()) != feed_nodes_.end()) { + return false; + } + if (node.op() == "ZerosLike") { + return true; + } + if (!IsConstant(node)) { + return false; + } + const auto dtype = node.attr().at("dtype").type(); + switch (dtype) { + // IS_ZEROS_CASE(DT_HALF); + IS_ZEROS_CASE(DT_FLOAT); + IS_ZEROS_CASE(DT_DOUBLE); + IS_ZEROS_CASE(DT_UINT8); + IS_ZEROS_CASE(DT_INT8); + IS_ZEROS_CASE(DT_UINT16); + IS_ZEROS_CASE(DT_INT16); + IS_ZEROS_CASE(DT_INT32); + IS_ZEROS_CASE(DT_INT64); + IS_ZEROS_CASE(DT_COMPLEX64); + IS_ZEROS_CASE(DT_COMPLEX128); + default: + LOG(ERROR) << "Unexpected type " << DataTypeString(dtype); + return false; + } + return false; +} + +void ConstantFolding::ReplaceAddOrMulWithIdentity(int input_to_forward, + NodeDef* node) { + node->set_op("Identity"); + // Propagate the designated input through the identity. + node->mutable_input()->SwapElements(0, input_to_forward); + // Add all other inputs as control dependencies. + for (int i = 1; i < node->input_size(); ++i) { + node->set_input(i, AsControlDependency(node->input(i))); + } + graph_modified_ = true; +} + +Status ConstantFolding::ReplaceAddOrMulWithConstant( + double value, const TensorShapeProto& shape, NodeDef* node) { + AttrValue tensor_attr; + TF_RETURN_IF_ERROR(CreateConstantTensorAttrValue(node->attr().at("T").type(), + value, shape, &tensor_attr)); + node->mutable_attr()->insert({"value", tensor_attr}); + node->set_op("Const"); + // Convert all inputs to control dependencies. + for (int i = 0; i < node->input_size(); ++i) { + if (IsControlInput(node->input(i))) { + break; + } + node->set_input(i, AsControlDependency(node->input(i))); + } + graph_modified_ = true; + return Status::OK(); +} + Status ConstantFolding::SimplifyGraph(GraphDef* output, const GraphProperties& properties, bool use_shape_info) { @@ -1125,6 +1302,63 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, *node.add_input() = input; } } + + // Simplify multiplication by ones or zeros, and addition of zeros. + bool is_mul = IsMul(node); + bool is_add = IsAdd(node); + if (opt_level_ == RewriterConfig::AGGRESSIVE && use_shape_info && + (is_mul || is_add) && properties.HasInputProperties(node.name()) && + properties.HasOutputProperties(node.name())) { + const NodeDef* x = node_map_->GetNode(node.input(0)); + const NodeDef* y = node_map_->GetNode(node.input(1)); + if (x == nullptr || y == nullptr) { + return errors::InvalidArgument("Invalid inputs to node: ", + node.DebugString()); + } + const TensorShapeProto& output_shape = + properties.GetOutputProperties(node.name())[0].shape(); + const TensorShapeProto& x_shape = + properties.GetInputProperties(node.name())[0].shape(); + + // Simplify multiplication by or addition of zeros. + const bool x_is_zero = IsZeros(*x); + const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape); + if (x_is_zero && x_matches_output_shape) { + // 0 * y = 0 or 0 + y = y. + ReplaceAddOrMulWithIdentity(is_mul ? 0 : 1, &node); + continue; + } + const TensorShapeProto& y_shape = + properties.GetInputProperties(node.name())[1].shape(); + const bool y_is_zero = IsZeros(*y); + const bool y_matches_output_shape = ShapesEqual(output_shape, y_shape); + if (y_is_zero && y_matches_output_shape) { + // x * 0 = 0 or x + 0 = x. + ReplaceAddOrMulWithIdentity(is_mul ? 1 : 0, &node); + continue; + } + + if (is_mul) { + // Simplify multiplication by zeros where the output shape does not + // match the shape of the zero input. + if (x_is_zero || y_is_zero) { + TF_RETURN_IF_ERROR( + ReplaceAddOrMulWithConstant(0, output_shape, &node)); + continue; + } + + // Simplify multiplication by ones. + if (IsOnes(*x) && y_matches_output_shape) { + // 1 * y = y. + ReplaceAddOrMulWithIdentity(1, &node); + continue; + } else if (IsOnes(*y) && x_matches_output_shape) { + // x * 1 = x. + ReplaceAddOrMulWithIdentity(0, &node); + continue; + } + } + } } return Status::OK(); } @@ -1132,7 +1366,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, Status ConstantFolding::RunOptimizationPass(Cluster* cluster, const GrapplerItem& item, GraphDef* output) { - node_map_.reset(new NodeMap(&graph_)); + node_map_.reset(new NodeMap(graph_)); nodes_whitelist_.clear(); // Fold fetch nodes iff it has a single fanout. Note that if a fetch node // has a single fanout, it would be rewritten as a constant with the same @@ -1189,13 +1423,13 @@ Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, *output = item.graph; int64 node_count; do { - graph_.Swap(output); - item_to_optimize.graph = graph_; + graph_modified_ = false; + item_to_optimize.graph.Swap(output); + graph_ = &item_to_optimize.graph; *output = GraphDef(); - node_count = graph_.node_size(); + node_count = graph_->node_size(); TF_RETURN_IF_ERROR(RunOptimizationPass(cluster, item_to_optimize, output)); - } while (output->node_size() != node_count); - + } while (graph_modified_ || output->node_size() != node_count); *output->mutable_library() = item.graph.library(); *output->mutable_versions() = item.graph.versions(); diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 8af5b5fbe6..3bb9926338 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -72,6 +72,12 @@ class ConstantFolding : public GraphOptimizer { Status FoldNode(NodeDef* node, GraphDef* output_graph); + bool IsOnes(const NodeDef& node) const; + bool IsZeros(const NodeDef& node) const; + void ReplaceAddOrMulWithIdentity(int input_to_forward, NodeDef* node); + Status ReplaceAddOrMulWithConstant(double value, + const TensorShapeProto& shape, + NodeDef* node); Status FoldGraph(GraphDef* output); bool IsSimplifiableReduction(const NodeDef& node) const; @@ -89,12 +95,13 @@ class ConstantFolding : public GraphOptimizer { std::unique_ptr owned_device_; std::unique_ptr resource_mgr_; - GraphDef graph_; + GraphDef* graph_; std::unique_ptr node_map_; std::unordered_set nodes_to_preserve_; std::unordered_set nodes_whitelist_; std::unordered_set feed_nodes_; bool has_fetch_; + bool graph_modified_; }; } // end namespace grappler diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index b2d9b02c68..c72ed96520 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -77,6 +77,102 @@ TEST_F(ConstantFoldingTest, SimpleFolding) { test::ExpectTensorEqual(tensors_expected[0], tensors[0]); } +TEST_F(ConstantFoldingTest, NeutralElement) { + for (bool use_const : {true, false}) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output x = ops::Placeholder(s.WithOpName("x"), DT_FLOAT, + ops::Placeholder::Shape(TensorShape({1, 2}))); + Output y = ops::Placeholder(s.WithOpName("y"), DT_FLOAT, + ops::Placeholder::Shape(TensorShape({1, 2}))); + Output zeros = + !use_const ? ops::ZerosLike(s.WithOpName("zeros"), x) + : ops::Const(s.WithOpName("zeros"), {0.0f, 0.0f}, {1, 2}); + Output zeros_broadcast = + ops::Const(s.WithOpName("zeros_broadcast"), {0.0f}, {1, 1}); + Output ones = !use_const + ? ops::OnesLike(s.WithOpName("ones"), x) + : ops::Const(s.WithOpName("ones"), {1.0f, 1.0f}, {1, 2}); + Output mul1 = ops::Mul(s.WithOpName("mul1"), x, zeros); + Output mul2 = ops::Mul(s.WithOpName("mul2"), zeros, y); + Output mul3 = ops::Mul(s.WithOpName("mul3"), x, ones); + Output mul4 = ops::Mul(s.WithOpName("mul4"), ones, y); + Output mul5 = ops::Mul(s.WithOpName("mul1"), x, zeros_broadcast); + Output mul6 = ops::Mul(s.WithOpName("mul2"), zeros_broadcast, y); + Output add1 = ops::Add(s.WithOpName("add1"), x, zeros); + Output add2 = ops::Add(s.WithOpName("add2"), zeros, y); + Output addn = ops::AddN(s, {mul1, mul2, mul3, mul4, add1, add2}); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + ConstantFolding optimizer(RewriterConfig::AGGRESSIVE, + nullptr /* cpu_device */); + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + EXPECT_EQ(14, output.node_size()); + for (int i = 0; i < output.node_size(); ++i) { + const NodeDef& node = output.node(i); + const string& name = node.name(); + if (name == "mul1") { + if (use_const) { + EXPECT_EQ("Const", node.op()); + EXPECT_EQ("^x", node.input(0)); + } else { + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ("zeros", node.input(0)); + EXPECT_EQ("^x", node.input(1)); + } + } else if (name == "mul2") { + if (use_const) { + EXPECT_EQ("Const", node.op()); + EXPECT_EQ("^y", node.input(0)); + } else { + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ("zeros", node.input(0)); + EXPECT_EQ("^y", node.input(1)); + } + } else if (name == "mul3") { + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("^ones", node.input(1)); + } else if (name == "mul4") { + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ("y", node.input(0)); + EXPECT_EQ("^ones", node.input(1)); + } else if (name == "mul5") { + EXPECT_EQ("Const", node.op()); + EXPECT_EQ("^x", node.input(0)); + EXPECT_EQ("^ones", node.input(1)); + TensorProto t = node.attr().at("value").tensor(); + EXPECT_EQ(1, t.float_val_size()); + EXPECT_EQ(0, t.float_val(0)); + EXPECT_EQ(2, t.tensor_shape().dim_size()); + EXPECT_EQ(1, t.tensor_shape().dim(0).size()); + EXPECT_EQ(2, t.tensor_shape().dim(1).size()); + } else if (name == "mul6") { + EXPECT_EQ("Const", node.op()); + EXPECT_EQ("^y", node.input(0)); + EXPECT_EQ("^ones", node.input(1)); + TensorProto t = node.attr().at("value").tensor(); + EXPECT_EQ(1, t.float_val_size()); + EXPECT_EQ(0, t.float_val(0)); + EXPECT_EQ(2, t.tensor_shape().dim_size()); + EXPECT_EQ(1, t.tensor_shape().dim(0).size()); + EXPECT_EQ(2, t.tensor_shape().dim(1).size()); + } else if (name == "add1") { + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("^zeros", node.input(1)); + } else if (name == "add2") { + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ("y", node.input(0)); + EXPECT_EQ("^zeros", node.input(1)); + } + } + } +} + TEST_F(ConstantFoldingTest, FoldingNodeWithTwoOutputs) { // Build a simple graph with a few trivially prunable ops. tensorflow::Scope s = tensorflow::Scope::NewRootScope(); -- GitLab From 5e54d87f94271ce671ddc874cca8d34c83c180cc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 30 Nov 2017 13:14:09 -0800 Subject: [PATCH 1001/1801] Add an option to override maximum number of elements in the quantile accumulator. PiperOrigin-RevId: 177497240 --- tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py b/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py index 7e8e15e7d8..294e04002a 100644 --- a/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py +++ b/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py @@ -45,6 +45,7 @@ class QuantileAccumulator(saver.BaseSaverBuilder.SaveableObject): init_stamp_token, epsilon, num_quantiles, + max_elements=None, name=None, container=None): """Creates a QuantileAccumulator object. @@ -53,6 +54,7 @@ class QuantileAccumulator(saver.BaseSaverBuilder.SaveableObject): init_stamp_token: The initial value for the stamp token. epsilon: Error bound on the quantile computation. num_quantiles: Number of quantiles to produce from the final summary. + max_elements: Maximum number of elements added to the accumulator. name: the name to save the accumulator under. container: An optional `string`. Defaults to `""` """ @@ -67,6 +69,7 @@ class QuantileAccumulator(saver.BaseSaverBuilder.SaveableObject): self._quantile_accumulator_handle, init_stamp_token, epsilon=epsilon, + max_elements=max_elements, num_quantiles=num_quantiles) is_initialized_op = gen_quantile_ops.quantile_accumulator_is_initialized( self._quantile_accumulator_handle) -- GitLab From 62b70f5566768e0fd57013e3042a402830b2c4f0 Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Thu, 30 Nov 2017 13:29:12 -0800 Subject: [PATCH 1002/1801] Support binary operations with a scalar and a 4d tensor as input; refactor connectivity check code. PiperOrigin-RevId: 177499365 --- .../grappler/optimizers/layout_optimizer.cc | 87 ++++++++---- .../optimizers/layout_optimizer_test.cc | 128 ++++++++++++++++++ 2 files changed, 189 insertions(+), 26 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc index ef4b015295..97c8e6f907 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc @@ -761,24 +761,52 @@ class AgnosticNodeProcessor : public NodeProcessor { bool IsNodeAfterNCHWToNHWC() const { std::set ops_format_agnostic = GetOpsFormatAgnostic(); - auto node = node_map_->GetNode(node_->name()); - while (node->input_size() > 0) { - int data_input_pos = 0; - if (IsConcatV1(*node) || IsSplit(*node)) { - data_input_pos = 1; - } - node = node_map_->GetNode(node->input(data_input_pos)); - if (IsNodeNCHWToNHWC(node->name())) { + std::deque queue; + auto first_node_pos = DataInputPos(*node_); + for (const auto& pos : first_node_pos) { + auto input_node = node_map_->GetNode(node_->input(pos)); + queue.push_back(input_node); + } + // The code will exit this while loop in one iteration in most cases, as the + // graph is already topologically sorted. + while (!queue.empty()) { + NodeDef* current_node = queue.front(); + queue.pop_front(); + if (IsNodeNCHWToNHWC(current_node->name())) { return true; } - bool connected = - ops_format_agnostic.find(node->op()) != ops_format_agnostic.end(); - if (!connected) { - return false; + // We only continue searching if the path is connected through + // format-agnostic nodes. + if (ops_format_agnostic.find(current_node->op()) != + ops_format_agnostic.end()) { + auto current_node_pos = DataInputPos(*current_node); + for (const auto& pos : current_node_pos) { + auto input_node = node_map_->GetNode(current_node->input(pos)); + queue.push_back(input_node); + } } } return false; } + + private: + std::vector DataInputPos(const NodeDef& node) const { + std::vector pos; + if (IsSplit(node)) { + return {1}; + } + if (IsConcatV1(node)) { + return {1}; + } + if (IsAdd(node) || IsMul(node) || IsRealDiv(node) || + IsSquaredDifference(node) || IsSub(node)) { + return {0, 1}; + } + if (node.input_size() > 0 && !IsControlInput(node.input(0))) { + return {0}; + } + return {}; + } }; class AddNProcessor : public AgnosticNodeProcessor { @@ -801,42 +829,49 @@ class BinaryOpProcessor : public AgnosticNodeProcessor { public: explicit BinaryOpProcessor(const OptimizeContext& opt_cxt) : AgnosticNodeProcessor(opt_cxt) { - is_4d_with_vector_ = Is4DOperateWithVector(); + is_4d_with_vector_ = IsNDOperateWithMD(4, 1); } protected: bool ShouldProcess() const override { + // TODO(yaozhang): Support IsNDOperateWithMD(1, 4): first input is a vector + // and the second input is a 4D tensor; and update CustomizedProcessing() + // accordingly. return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() && IsNodeAfterNCHWToNHWC() && - (Is4DOperateWithND(4) || Is4DOperateWithScalar() || - Is4DOperateWithVector()) && + (IsNDOperateWithMD(4, 0) || IsNDOperateWithMD(4, 1) || + IsNDOperateWithMD(4, 4) || IsNDOperateWithMD(0, 4)) && IsOnGPU(); } std::vector GetInputPos() const override { - std::vector input_pos = {0}; - if (Is4DOperateWithND(4)) { + std::vector input_pos; + auto input0 = node_map_->GetNode(node_->input(0)); + auto input1 = node_map_->GetNode(node_->input(1)); + if (IsDimsFour(*input0)) { + input_pos.push_back(0); + } + if (IsDimsFour(*input1)) { input_pos.push_back(1); } return input_pos; } - bool Is4DOperateWithND(int n) const { + bool IsDimsFour(const NodeDef& node) const { + return NodeProcessor::IsDimsFour(node) || IsNodeNCHWToNHWC(node.name()); + } + + bool IsNDOperateWithMD(int n, int m) const { auto input0 = node_map_->GetNode(node_->input(0)); auto input1 = node_map_->GetNode(node_->input(1)); if (input0 && input1) { - return (IsDimsFour(*input0) || IsNodeNCHWToNHWC(input0->name())) && - ((n == 4) - ? (IsDimsFour(*input1) || IsNodeNCHWToNHWC(input1->name())) - : IsDimsN(*input1, n)); + bool input0_is_n = (n == 4) ? IsDimsFour(*input0) : IsDimsN(*input0, n); + bool input1_is_m = (m == 4) ? IsDimsFour(*input1) : IsDimsN(*input1, m); + return input0_is_n && input1_is_m; } return false; } - bool Is4DOperateWithScalar() const { return Is4DOperateWithND(0); } - - bool Is4DOperateWithVector() const { return Is4DOperateWithND(1); } - NodeDef* AddNodeShapeConst(const string& name, int num_channels) { NodeDef* node = graph_->add_node(); node_map_->AddNode(name, node); diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc index e8f7b8ac3c..363b4c3fd8 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc @@ -298,6 +298,39 @@ TEST_F(LayoutOptimizerTest, Connectivity) { EXPECT_EQ(node_i2_output->input(0), "i1"); } +TEST_F(LayoutOptimizerTest, ConnectivityBinaryOpWithInputScalarAnd4D) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto i1 = ops::Identity(s.WithOpName("i1"), conv); + auto i2 = ops::Identity(s.WithOpName("i2"), i1); + auto scalar_sub = ops::Const(s.WithOpName("scalar_sub"), 3.0f, {}); + auto sub = ops::Sub(s.WithOpName("sub"), scalar_sub, i2); + auto i3 = ops::Identity(s.WithOpName("i3"), sub); + auto i4 = ops::Identity(s.WithOpName("i4"), i3); + auto i5 = ops::Identity(s.WithOpName("i5"), i4); + auto scalar_mul = ops::Const(s.WithOpName("scalar_mul"), 3.0f, {}); + auto mul = ops::Mul(s.WithOpName("mul"), scalar_mul, i5); + auto i6 = ops::Identity(s.WithOpName("i6"), mul); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + // Make the graph not in topological order to test the handling of multi-hop + // connectivity (here we say two nodes are connected if all nodes in the + // middle are layout agnostic). If the graph is already in topological order, + // the problem is easier, where layout optimizer only needs to check + // single-hop connectivity. + NodeMap node_map_original(&item.graph); + auto node_i1 = node_map_original.GetNode("i1"); + auto node_mul = node_map_original.GetNode("mul"); + node_mul->Swap(node_i1); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map_output(&output); + auto mul_node = node_map_output.GetNode("mul"); + EXPECT_EQ(mul_node->input(0), "scalar_mul"); + EXPECT_EQ(mul_node->input(1), "i5"); +} + TEST_F(LayoutOptimizerTest, PreserveFetch) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); auto conv = SimpleConv2D(&s, 3, 2, "VALID"); @@ -571,6 +604,101 @@ TEST_F(LayoutOptimizerTest, Sum) { */ } +TEST_F(LayoutOptimizerTest, MulScalarAnd4D) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto scalar = ops::Const(s.WithOpName("scalar"), 3.0f, {}); + auto mul = ops::Mul(s.WithOpName("mul"), scalar, conv); + auto o = ops::Identity(s.WithOpName("o"), mul); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto mul_node = node_map.GetNode("mul"); + EXPECT_EQ(mul_node->input(0), "scalar"); + EXPECT_EQ(mul_node->input(1), "Conv2D"); +} + +TEST_F(LayoutOptimizerTest, Mul4DAndScalar) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto scalar = ops::Const(s.WithOpName("scalar"), 3.0f, {}); + auto mul = ops::Mul(s.WithOpName("mul"), conv, scalar); + auto o = ops::Identity(s.WithOpName("o"), mul); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto mul_node = node_map.GetNode("mul"); + EXPECT_EQ(mul_node->input(0), "Conv2D"); + EXPECT_EQ(mul_node->input(1), "scalar"); +} + +TEST_F(LayoutOptimizerTest, Mul4DAnd4D) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto i = ops::Identity(s.WithOpName("i"), conv); + auto mul = ops::Mul(s.WithOpName("mul"), conv, i); + auto o = ops::Identity(s.WithOpName("o"), mul); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto mul_node = node_map.GetNode("mul"); + EXPECT_EQ(mul_node->input(0), "Conv2D"); + EXPECT_EQ(mul_node->input(1), "i"); +} + +TEST_F(LayoutOptimizerTest, Mul4DAndVector) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto vector = ops::Const(s.WithOpName("vector"), {3.0f, 7.0f}, {2}); + auto mul = ops::Mul(s.WithOpName("mul"), conv, vector); + auto o = ops::Identity(s.WithOpName("o"), mul); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto mul_node = node_map.GetNode("mul"); + EXPECT_EQ(mul_node->input(0), "Conv2D"); + EXPECT_EQ(mul_node->input(1), "LayoutOptimizerReshapeNHWCToNCHW-mul-vector"); + auto mul_const = node_map.GetNode("LayoutOptimizerReshapeConst-mul-vector"); + Tensor tensor; + EXPECT_TRUE( + tensor.FromProto(mul_const->mutable_attr()->at({"value"}).tensor())); + Tensor tensor_expected(DT_INT32, {4}); + test::FillValues(&tensor_expected, {1, 2, 1, 1}); + test::ExpectTensorEqual(tensor_expected, tensor); +} + +TEST_F(LayoutOptimizerTest, MulVectorAnd4D) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto vector = ops::Const(s.WithOpName("vector"), {3.0f, 7.0f}, {2}); + auto mul = ops::Mul(s.WithOpName("mul"), vector, conv); + auto o = ops::Identity(s.WithOpName("o"), mul); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto mul_node = node_map.GetNode("mul"); + // TODO(yaozhang): Support vector as the first input and 4d tensor as the + // second input for BinaryOpProcessor. + EXPECT_EQ(mul_node->input(0), "vector"); + EXPECT_EQ(mul_node->input(1), + "LayoutOptimizerTransposeNCHWToNHWC-Conv2D-mul-1"); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 15b06e060af59a1e30f4a9079679718aaa68dbc7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 30 Nov 2017 13:48:37 -0800 Subject: [PATCH 1003/1801] Add R1 slice tests. PiperOrigin-RevId: 177502259 --- tensorflow/compiler/xla/tests/slice_test.cc | 90 +++++++++++++++------ 1 file changed, 67 insertions(+), 23 deletions(-) diff --git a/tensorflow/compiler/xla/tests/slice_test.cc b/tensorflow/compiler/xla/tests/slice_test.cc index c21124750a..4db566f784 100644 --- a/tensorflow/compiler/xla/tests/slice_test.cc +++ b/tensorflow/compiler/xla/tests/slice_test.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/compiler/xla/tests/literal_test_util.h" #include "tensorflow/compiler/xla/tests/test_macros.h" #include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/types.h" @@ -211,6 +212,13 @@ class SliceR1Test : public ClientLibraryTestBase, } }; +string SliceR1TestDataToString(const ::testing::TestParamInfo& data) { + const R1Spec& spec = data.param; + return ::tensorflow::strings::Printf("%lld_%lld_%lld_%lld", spec.input_dim0, + spec.slice_start, spec.slice_limit, + spec.slice_stride); +} + XLA_TEST_P(SliceR1Test, DoIt_F32) { Run(GetParam()); } XLA_TEST_P(SliceR1Test, DoIt_F64) { Run(GetParam()); } @@ -223,30 +231,66 @@ XLA_TEST_P(SliceR1Test, DoIt_U64) { Run(GetParam()); } XLA_TEST_P(SliceR1Test, DoIt_S64) { Run(GetParam()); } -INSTANTIATE_TEST_CASE_P( // - SliceR1TestInstantiation, // - SliceR1Test, // - ::testing::Values( // - R1Spec{10, 0, 0, 1}, // - R1Spec{10, 7, 7, 1}, // - R1Spec{10, 2, 4, 1}, // - R1Spec{10, 2, 4, 2}, // - R1Spec{10, 0, 10, 1}, // - R1Spec{1024, 1024 - 4, 1024, 1}, // - R1Spec{4096, 7, 7 + 1024, 1}, // - R1Spec{10, 0, 10, 2}, // - R1Spec{10, 0, 10, 3}, // - R1Spec{10, 0, 10, 4}, // - R1Spec{10, 0, 10, 5}, // - R1Spec{10, 0, 10, 10}, // - R1Spec{500, 200, 400, 7}, // - R1Spec{4096, 1, 4095, 3}, // - R1Spec{2047, 1024 - 24, 1024 + 160, 31}, // - R1Spec{2047, 1, 2046, 3 * 128}, // - R1Spec{4096, 1024 + 3, 4095, 500}, // - R1Spec{8192, 0, 8192, 1024 * 3 + 400} // - ) // +// Tests for R1 slice ops. +// The format for each testcase is {input size, start, limit, stride}. +// clang-format off +INSTANTIATE_TEST_CASE_P( + SliceR1TestInstantiation, + SliceR1Test, + ::testing::Values( + R1Spec{10, 0, 0, 1}, + R1Spec{10, 7, 7, 1}, + R1Spec{10, 0, 5, 1}, + R1Spec{10, 3, 5, 1}, + R1Spec{10, 0, 10, 1}, + R1Spec{1024, 0, 5, 1}, + R1Spec{1024, 3, 5, 1}, + R1Spec{1024 + 17, 0, 5, 1}, + R1Spec{1024 + 17, 3, 5, 1}, + R1Spec{1024 + 17, 1024, 1024 + 6, 1}, + R1Spec{1024 + 17, 1024 + 1, 1024 + 6, 1}, + R1Spec{1024, 1024 - 4, 1024, 1}, + R1Spec{4 * 1024, 7, 7 + 1024, 1}, + R1Spec{4 * 1024, 0, 4 * 1024, 1}, + R1Spec{4 * 1024, 1, 4 * 1024 - 1, 1}, + R1Spec{4 * 1024, 1024, 3 * 1024, 1}, + R1Spec{4 * 1024, 1024 + 1, 3 * 1024 - 1, 1}, + R1Spec{16 * 1024, 0, 5, 1}, + R1Spec{16 * 1024, 3, 5, 1}, + R1Spec{16 * 1024 + 17, 0, 5, 1}, + R1Spec{16 * 1024 + 17, 3, 5, 1}, + R1Spec{16 * 1024 + 17, 16 * 1024, 16 * 1024 + 6, 1}, + R1Spec{16 * 1024 + 17, 16 * 1024 + 1, 16 * 1024 + 6, 1}, + R1Spec{16 * 1024, 4 * 1024 - 17, 8 * 1024 - 18, 1}, + R1Spec{64 * 1024, 0, 64 * 1024, 1}, + R1Spec{64 * 1024, 1, 64 * 1024 - 1, 1}, + R1Spec{64 * 1024, 1024, 63 * 1024, 1}, + R1Spec{64 * 1024, 1024 + 1, 63 * 1024 - 1, 1}, + R1Spec{64 * 1024, 32 * 1024, 33 * 1024, 1}, + R1Spec{64 * 1024, 32 * 1024 + 1, 33 * 1024 - 1, 1}, + R1Spec{64 * 1024, 32 * 1024 - 17, 36 * 1024 - 18, 1}, +// TODO(b/69425338): This uses too much memory on GPU. +#ifndef XLA_TEST_BACKEND_GPU + R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024, 12 * 1024 * 1024, 1}, + R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 + 1, 12 * 1024 * 1024 - 1, 1}, + R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 - 1, 12 * 1024 * 1024 + 1, 1}, +#endif + R1Spec{10, 2, 4, 2}, + R1Spec{10, 0, 10, 2}, + R1Spec{10, 0, 10, 3}, + R1Spec{10, 0, 10, 4}, + R1Spec{10, 0, 10, 5}, + R1Spec{10, 0, 10, 10}, + R1Spec{500, 200, 400, 7}, + R1Spec{4096, 1, 4095, 3}, + R1Spec{2047, 1024 - 24, 1024 + 160, 31}, + R1Spec{2047, 1, 2046, 3 * 128}, + R1Spec{4096, 1024 + 3, 4095, 500}, + R1Spec{8192, 0, 8192, 1024 * 3 + 400} + ), + SliceR1TestDataToString ); +// clang-format on struct R2Spec { int64 input_dim0; -- GitLab From 39cac0519176d1244b0e29d6c28691189ea755ec Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Thu, 30 Nov 2017 13:50:13 -0800 Subject: [PATCH 1004/1801] [TF:XLA] Allow bfloat16 types in more places. PiperOrigin-RevId: 177502497 --- .../compiler/tf2xla/kernels/matmul_op.cc | 4 +- tensorflow/compiler/tf2xla/lib/util.cc | 3 ++ tensorflow/compiler/tf2xla/xla_helpers.cc | 7 ++- tensorflow/compiler/xla/literal_util.cc | 6 ++- tensorflow/core/framework/bfloat16_test.cc | 12 ++++++ tensorflow/core/framework/numeric_types.h | 43 ++++++++++++++++--- 6 files changed, 65 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/matmul_op.cc b/tensorflow/compiler/tf2xla/kernels/matmul_op.cc index fcef497e58..a62d233526 100644 --- a/tensorflow/compiler/tf2xla/kernels/matmul_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/matmul_op.cc @@ -23,8 +23,8 @@ limitations under the License. namespace tensorflow { namespace { -constexpr std::array kMatmulTypes = { - {DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64}}; +constexpr std::array kMatmulTypes = { + {DT_HALF, DT_BFLOAT16, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64}}; class MatMulOp : public XlaOpKernel { public: diff --git a/tensorflow/compiler/tf2xla/lib/util.cc b/tensorflow/compiler/tf2xla/lib/util.cc index 7ffe0aa6df..943248aedb 100644 --- a/tensorflow/compiler/tf2xla/lib/util.cc +++ b/tensorflow/compiler/tf2xla/lib/util.cc @@ -40,6 +40,9 @@ xla::ComputationDataHandle FloatLiteral(xla::ComputationBuilder* builder, case xla::F16: return builder->ConstantR0(static_cast(value)); break; + case xla::BF16: + return builder->ConstantR0(static_cast(value)); + break; case xla::F32: return builder->ConstantR0(static_cast(value)); break; diff --git a/tensorflow/compiler/tf2xla/xla_helpers.cc b/tensorflow/compiler/tf2xla/xla_helpers.cc index 9c3e15d2fa..ec9e535b70 100644 --- a/tensorflow/compiler/tf2xla/xla_helpers.cc +++ b/tensorflow/compiler/tf2xla/xla_helpers.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// This file defines helper routines for Tla JIT compilation. +// This file defines helper routines for XLA compilation. #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/lib/util.h" @@ -121,6 +121,8 @@ xla::ComputationDataHandle XlaHelpers::One(xla::ComputationBuilder* b, xla::ComputationDataHandle XlaHelpers::Epsilon(xla::ComputationBuilder* b, DataType data_type) { switch (data_type) { + case DT_BFLOAT16: + return b->ConstantR0(bfloat16::epsilon()); case DT_FLOAT: return b->ConstantR0(std::numeric_limits::epsilon()); case DT_DOUBLE: @@ -169,6 +171,9 @@ xla::ComputationDataHandle XlaHelpers::IntegerLiteral( case xla::S16: case xla::U16: LOG(FATAL) << "u16/s16 literals not yet implemented"; + case xla::BF16: + literal = *xla::Literal::CreateR0(static_cast(value)); + break; case xla::F16: literal = *xla::Literal::CreateR0(static_cast(value)); diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index 93d3cd425f..250df5f4d5 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -252,6 +252,10 @@ Status Literal::Copy(const Literal& src_literal, return *Literal::CreateR0(1); case S64: return *Literal::CreateR0(1); + case F16: + return *Literal::CreateR0(static_cast(1.0f)); + case BF16: + return *Literal::CreateR0(static_cast(1.0f)); case F32: return *Literal::CreateR0(1); case F64: @@ -263,8 +267,6 @@ Status Literal::Copy(const Literal& src_literal, case S16: case U16: LOG(FATAL) << "u16/s16 literals not yet implemented"; - case F16: - return *Literal::CreateR0(static_cast(1.0f)); case TUPLE: LOG(FATAL) << "tuple element type cannot take on value of 1"; case OPAQUE: diff --git a/tensorflow/core/framework/bfloat16_test.cc b/tensorflow/core/framework/bfloat16_test.cc index 6e45338751..17e6209f8e 100644 --- a/tensorflow/core/framework/bfloat16_test.cc +++ b/tensorflow/core/framework/bfloat16_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/framework/bfloat16.h" +#include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/lib/core/casts.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" @@ -104,6 +105,17 @@ TEST(Bfloat16Test, Conversion) { } } +TEST(Bfloat16Test, Epsilon) { + EXPECT_LT(1.0f, static_cast(bfloat16::epsilon() + bfloat16(1.0f))); + EXPECT_EQ(1.0f, static_cast((bfloat16::epsilon() / bfloat16(2.0f)) + + bfloat16(1.0f))); +} + +TEST(Bfloat16Test, Negate) { + EXPECT_EQ(-3.0f, static_cast(-bfloat16(3.0f))); + EXPECT_EQ(4.5f, static_cast(-bfloat16(-4.5f))); +} + static void BM_FloatToBFloat16(int iters) { testing::StopTiming(); static const int N = 32 << 20; diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h index 2b080e13fd..29cac26244 100644 --- a/tensorflow/core/framework/numeric_types.h +++ b/tensorflow/core/framework/numeric_types.h @@ -121,15 +121,48 @@ struct bfloat16 { return static_cast(float(*this)); } + static bfloat16 epsilon() { + bfloat16 x; + x.value = 0x3c00; // 0x1.0p-7 + return x; + } + uint16_t value; }; -inline bool operator==(const bfloat16 a, const bfloat16 b) { - return a.value == b.value; +inline bfloat16 operator+(bfloat16 a, bfloat16 b) { + return bfloat16(static_cast(a) + static_cast(b)); } - -inline bool operator!=(const bfloat16 a, const bfloat16 b) { - return a.value != b.value; +inline bfloat16 operator-(bfloat16 a, bfloat16 b) { + return bfloat16(static_cast(a) - static_cast(b)); +} +inline bfloat16 operator*(bfloat16 a, bfloat16 b) { + return bfloat16(static_cast(a) * static_cast(b)); +} +inline bfloat16 operator/(bfloat16 a, bfloat16 b) { + return bfloat16(static_cast(a) / static_cast(b)); +} +inline bfloat16 operator-(bfloat16 a) { + a.value ^= 0x8000; + return a; +} +inline bool operator<(bfloat16 a, bfloat16 b) { + return static_cast(a) < static_cast(b); +} +inline bool operator<=(bfloat16 a, bfloat16 b) { + return static_cast(a) <= static_cast(b); +} +inline bool operator==(bfloat16 a, bfloat16 b) { + return static_cast(a) == static_cast(b); +} +inline bool operator!=(bfloat16 a, bfloat16 b) { + return static_cast(a) != static_cast(b); +} +inline bool operator>(bfloat16 a, bfloat16 b) { + return static_cast(a) > static_cast(b); +} +inline bool operator>=(bfloat16 a, bfloat16 b) { + return static_cast(a) >= static_cast(b); } } // end namespace tensorflow -- GitLab From bc2b4b0679dc6e4ad6dc543d475d759f3ad6cadf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 30 Nov 2017 13:57:33 -0800 Subject: [PATCH 1005/1801] Enable tests that pass now with the new copy insertion. PiperOrigin-RevId: 177503567 --- tensorflow/compiler/xla/tests/while_test.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/tests/while_test.cc b/tensorflow/compiler/xla/tests/while_test.cc index 49f673f5f0..f3f10517e3 100644 --- a/tensorflow/compiler/xla/tests/while_test.cc +++ b/tensorflow/compiler/xla/tests/while_test.cc @@ -357,8 +357,7 @@ TEST_F(WhileTest, WhileWithVectorResultIntoTuple) { ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0.0001)); } -// TODO(b/63003356): 11-06-2017: fails on all back-ends with incorrect result. -TEST_F(WhileTest, DISABLED_WhileWithPermutationAndTupleResult) { +TEST_F(WhileTest, WhileWithPermutationAndTupleResult) { std::vector shape_elements = { ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {3}), ShapeUtil::MakeShape(F32, {3}), ShapeUtil::MakeShape(F32, {3})}; @@ -411,8 +410,7 @@ TEST_F(WhileTest, DISABLED_WhileWithPermutationAndTupleResult) { ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0.0001)); } -// TODO(b/63003356): 11-06-2017: fails on all back-ends with incorrect result. -TEST_F(WhileTest, DISABLED_WhileWithPermutationAndVectorResult) { +TEST_F(WhileTest, WhileWithPermutationAndVectorResult) { std::vector shape_elements = { ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {3}), ShapeUtil::MakeShape(F32, {3}), ShapeUtil::MakeShape(F32, {3})}; -- GitLab From 99525c7e1e21b3548eafdc7ae606ac1df2bf06fe Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Thu, 30 Nov 2017 14:11:17 -0800 Subject: [PATCH 1006/1801] Automated g4 rollback of changelist 177499365 PiperOrigin-RevId: 177505909 --- .../grappler/optimizers/layout_optimizer.cc | 87 ++++-------- .../optimizers/layout_optimizer_test.cc | 128 ------------------ 2 files changed, 26 insertions(+), 189 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc index 97c8e6f907..ef4b015295 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc @@ -761,52 +761,24 @@ class AgnosticNodeProcessor : public NodeProcessor { bool IsNodeAfterNCHWToNHWC() const { std::set ops_format_agnostic = GetOpsFormatAgnostic(); - std::deque queue; - auto first_node_pos = DataInputPos(*node_); - for (const auto& pos : first_node_pos) { - auto input_node = node_map_->GetNode(node_->input(pos)); - queue.push_back(input_node); - } - // The code will exit this while loop in one iteration in most cases, as the - // graph is already topologically sorted. - while (!queue.empty()) { - NodeDef* current_node = queue.front(); - queue.pop_front(); - if (IsNodeNCHWToNHWC(current_node->name())) { + auto node = node_map_->GetNode(node_->name()); + while (node->input_size() > 0) { + int data_input_pos = 0; + if (IsConcatV1(*node) || IsSplit(*node)) { + data_input_pos = 1; + } + node = node_map_->GetNode(node->input(data_input_pos)); + if (IsNodeNCHWToNHWC(node->name())) { return true; } - // We only continue searching if the path is connected through - // format-agnostic nodes. - if (ops_format_agnostic.find(current_node->op()) != - ops_format_agnostic.end()) { - auto current_node_pos = DataInputPos(*current_node); - for (const auto& pos : current_node_pos) { - auto input_node = node_map_->GetNode(current_node->input(pos)); - queue.push_back(input_node); - } + bool connected = + ops_format_agnostic.find(node->op()) != ops_format_agnostic.end(); + if (!connected) { + return false; } } return false; } - - private: - std::vector DataInputPos(const NodeDef& node) const { - std::vector pos; - if (IsSplit(node)) { - return {1}; - } - if (IsConcatV1(node)) { - return {1}; - } - if (IsAdd(node) || IsMul(node) || IsRealDiv(node) || - IsSquaredDifference(node) || IsSub(node)) { - return {0, 1}; - } - if (node.input_size() > 0 && !IsControlInput(node.input(0))) { - return {0}; - } - return {}; - } }; class AddNProcessor : public AgnosticNodeProcessor { @@ -829,49 +801,42 @@ class BinaryOpProcessor : public AgnosticNodeProcessor { public: explicit BinaryOpProcessor(const OptimizeContext& opt_cxt) : AgnosticNodeProcessor(opt_cxt) { - is_4d_with_vector_ = IsNDOperateWithMD(4, 1); + is_4d_with_vector_ = Is4DOperateWithVector(); } protected: bool ShouldProcess() const override { - // TODO(yaozhang): Support IsNDOperateWithMD(1, 4): first input is a vector - // and the second input is a 4D tensor; and update CustomizedProcessing() - // accordingly. return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() && IsNodeAfterNCHWToNHWC() && - (IsNDOperateWithMD(4, 0) || IsNDOperateWithMD(4, 1) || - IsNDOperateWithMD(4, 4) || IsNDOperateWithMD(0, 4)) && + (Is4DOperateWithND(4) || Is4DOperateWithScalar() || + Is4DOperateWithVector()) && IsOnGPU(); } std::vector GetInputPos() const override { - std::vector input_pos; - auto input0 = node_map_->GetNode(node_->input(0)); - auto input1 = node_map_->GetNode(node_->input(1)); - if (IsDimsFour(*input0)) { - input_pos.push_back(0); - } - if (IsDimsFour(*input1)) { + std::vector input_pos = {0}; + if (Is4DOperateWithND(4)) { input_pos.push_back(1); } return input_pos; } - bool IsDimsFour(const NodeDef& node) const { - return NodeProcessor::IsDimsFour(node) || IsNodeNCHWToNHWC(node.name()); - } - - bool IsNDOperateWithMD(int n, int m) const { + bool Is4DOperateWithND(int n) const { auto input0 = node_map_->GetNode(node_->input(0)); auto input1 = node_map_->GetNode(node_->input(1)); if (input0 && input1) { - bool input0_is_n = (n == 4) ? IsDimsFour(*input0) : IsDimsN(*input0, n); - bool input1_is_m = (m == 4) ? IsDimsFour(*input1) : IsDimsN(*input1, m); - return input0_is_n && input1_is_m; + return (IsDimsFour(*input0) || IsNodeNCHWToNHWC(input0->name())) && + ((n == 4) + ? (IsDimsFour(*input1) || IsNodeNCHWToNHWC(input1->name())) + : IsDimsN(*input1, n)); } return false; } + bool Is4DOperateWithScalar() const { return Is4DOperateWithND(0); } + + bool Is4DOperateWithVector() const { return Is4DOperateWithND(1); } + NodeDef* AddNodeShapeConst(const string& name, int num_channels) { NodeDef* node = graph_->add_node(); node_map_->AddNode(name, node); diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc index 363b4c3fd8..e8f7b8ac3c 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc @@ -298,39 +298,6 @@ TEST_F(LayoutOptimizerTest, Connectivity) { EXPECT_EQ(node_i2_output->input(0), "i1"); } -TEST_F(LayoutOptimizerTest, ConnectivityBinaryOpWithInputScalarAnd4D) { - tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - auto conv = SimpleConv2D(&s, 3, 2, "VALID"); - auto i1 = ops::Identity(s.WithOpName("i1"), conv); - auto i2 = ops::Identity(s.WithOpName("i2"), i1); - auto scalar_sub = ops::Const(s.WithOpName("scalar_sub"), 3.0f, {}); - auto sub = ops::Sub(s.WithOpName("sub"), scalar_sub, i2); - auto i3 = ops::Identity(s.WithOpName("i3"), sub); - auto i4 = ops::Identity(s.WithOpName("i4"), i3); - auto i5 = ops::Identity(s.WithOpName("i5"), i4); - auto scalar_mul = ops::Const(s.WithOpName("scalar_mul"), 3.0f, {}); - auto mul = ops::Mul(s.WithOpName("mul"), scalar_mul, i5); - auto i6 = ops::Identity(s.WithOpName("i6"), mul); - GrapplerItem item; - TF_CHECK_OK(s.ToGraphDef(&item.graph)); - // Make the graph not in topological order to test the handling of multi-hop - // connectivity (here we say two nodes are connected if all nodes in the - // middle are layout agnostic). If the graph is already in topological order, - // the problem is easier, where layout optimizer only needs to check - // single-hop connectivity. - NodeMap node_map_original(&item.graph); - auto node_i1 = node_map_original.GetNode("i1"); - auto node_mul = node_map_original.GetNode("mul"); - node_mul->Swap(node_i1); - LayoutOptimizer optimizer; - GraphDef output; - Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); - NodeMap node_map_output(&output); - auto mul_node = node_map_output.GetNode("mul"); - EXPECT_EQ(mul_node->input(0), "scalar_mul"); - EXPECT_EQ(mul_node->input(1), "i5"); -} - TEST_F(LayoutOptimizerTest, PreserveFetch) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); auto conv = SimpleConv2D(&s, 3, 2, "VALID"); @@ -604,101 +571,6 @@ TEST_F(LayoutOptimizerTest, Sum) { */ } -TEST_F(LayoutOptimizerTest, MulScalarAnd4D) { - tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - auto conv = SimpleConv2D(&s, 3, 2, "VALID"); - auto scalar = ops::Const(s.WithOpName("scalar"), 3.0f, {}); - auto mul = ops::Mul(s.WithOpName("mul"), scalar, conv); - auto o = ops::Identity(s.WithOpName("o"), mul); - GrapplerItem item; - TF_CHECK_OK(s.ToGraphDef(&item.graph)); - LayoutOptimizer optimizer; - GraphDef output; - Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); - NodeMap node_map(&output); - auto mul_node = node_map.GetNode("mul"); - EXPECT_EQ(mul_node->input(0), "scalar"); - EXPECT_EQ(mul_node->input(1), "Conv2D"); -} - -TEST_F(LayoutOptimizerTest, Mul4DAndScalar) { - tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - auto conv = SimpleConv2D(&s, 3, 2, "VALID"); - auto scalar = ops::Const(s.WithOpName("scalar"), 3.0f, {}); - auto mul = ops::Mul(s.WithOpName("mul"), conv, scalar); - auto o = ops::Identity(s.WithOpName("o"), mul); - GrapplerItem item; - TF_CHECK_OK(s.ToGraphDef(&item.graph)); - LayoutOptimizer optimizer; - GraphDef output; - Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); - NodeMap node_map(&output); - auto mul_node = node_map.GetNode("mul"); - EXPECT_EQ(mul_node->input(0), "Conv2D"); - EXPECT_EQ(mul_node->input(1), "scalar"); -} - -TEST_F(LayoutOptimizerTest, Mul4DAnd4D) { - tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - auto conv = SimpleConv2D(&s, 3, 2, "VALID"); - auto i = ops::Identity(s.WithOpName("i"), conv); - auto mul = ops::Mul(s.WithOpName("mul"), conv, i); - auto o = ops::Identity(s.WithOpName("o"), mul); - GrapplerItem item; - TF_CHECK_OK(s.ToGraphDef(&item.graph)); - LayoutOptimizer optimizer; - GraphDef output; - Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); - NodeMap node_map(&output); - auto mul_node = node_map.GetNode("mul"); - EXPECT_EQ(mul_node->input(0), "Conv2D"); - EXPECT_EQ(mul_node->input(1), "i"); -} - -TEST_F(LayoutOptimizerTest, Mul4DAndVector) { - tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - auto conv = SimpleConv2D(&s, 3, 2, "VALID"); - auto vector = ops::Const(s.WithOpName("vector"), {3.0f, 7.0f}, {2}); - auto mul = ops::Mul(s.WithOpName("mul"), conv, vector); - auto o = ops::Identity(s.WithOpName("o"), mul); - GrapplerItem item; - TF_CHECK_OK(s.ToGraphDef(&item.graph)); - LayoutOptimizer optimizer; - GraphDef output; - Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); - NodeMap node_map(&output); - auto mul_node = node_map.GetNode("mul"); - EXPECT_EQ(mul_node->input(0), "Conv2D"); - EXPECT_EQ(mul_node->input(1), "LayoutOptimizerReshapeNHWCToNCHW-mul-vector"); - auto mul_const = node_map.GetNode("LayoutOptimizerReshapeConst-mul-vector"); - Tensor tensor; - EXPECT_TRUE( - tensor.FromProto(mul_const->mutable_attr()->at({"value"}).tensor())); - Tensor tensor_expected(DT_INT32, {4}); - test::FillValues(&tensor_expected, {1, 2, 1, 1}); - test::ExpectTensorEqual(tensor_expected, tensor); -} - -TEST_F(LayoutOptimizerTest, MulVectorAnd4D) { - tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - auto conv = SimpleConv2D(&s, 3, 2, "VALID"); - auto vector = ops::Const(s.WithOpName("vector"), {3.0f, 7.0f}, {2}); - auto mul = ops::Mul(s.WithOpName("mul"), vector, conv); - auto o = ops::Identity(s.WithOpName("o"), mul); - GrapplerItem item; - TF_CHECK_OK(s.ToGraphDef(&item.graph)); - LayoutOptimizer optimizer; - GraphDef output; - Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); - NodeMap node_map(&output); - auto mul_node = node_map.GetNode("mul"); - // TODO(yaozhang): Support vector as the first input and 4d tensor as the - // second input for BinaryOpProcessor. - EXPECT_EQ(mul_node->input(0), "vector"); - EXPECT_EQ(mul_node->input(1), - "LayoutOptimizerTransposeNCHWToNHWC-Conv2D-mul-1"); -} - } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From ceeb930773d2af5c5b594c515988399e0d5bdc9a Mon Sep 17 00:00:00 2001 From: Sourabh Bajaj <1517779+sb2nov@users.noreply.github.com> Date: Thu, 30 Nov 2017 14:16:14 -0800 Subject: [PATCH 1007/1801] Fix export_test failure (#15011) --- tensorflow/python/estimator/export/export_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/estimator/export/export_test.py b/tensorflow/python/estimator/export/export_test.py index 3cbef4707a..8442bf04ac 100644 --- a/tensorflow/python/estimator/export/export_test.py +++ b/tensorflow/python/estimator/export/export_test.py @@ -358,7 +358,8 @@ class ExportTest(test_util.TensorFlowTestCase): with self.assertRaises(ValueError) as e: export.build_all_signature_defs(receiver_tensor, None) - self.assertEqual("export_outputs must be a dict.", str(e.exception)) + self.assertTrue(str(e.exception).startswith( + "export_outputs must be a dict")) def test_get_timestamped_export_dir(self): export_dir_base = tempfile.mkdtemp() + "export/" -- GitLab From 06a5cc4acd377815a0a6bbaac6ea0ab972f1c8fb Mon Sep 17 00:00:00 2001 From: Sourabh Bajaj <1517779+sb2nov@users.noreply.github.com> Date: Thu, 30 Nov 2017 14:16:34 -0800 Subject: [PATCH 1008/1801] Fix dataset tests broken on HEAD (#15012) --- tensorflow/contrib/data/python/kernel_tests/BUILD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 0f3ed9084d..1d4817fa26 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -143,6 +143,7 @@ py_test( size = "small", srcs = ["filter_dataset_op_test.py"], srcs_version = "PY2AND3", + tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", @@ -312,6 +313,7 @@ py_test( size = "small", srcs = ["prefetch_dataset_op_test.py"], srcs_version = "PY2AND3", + tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/python:platform", -- GitLab From 3a011f904112fe8c61017248d343798569b174f0 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Thu, 30 Nov 2017 14:12:54 -0800 Subject: [PATCH 1009/1801] Disable state_saving_rnn_estimator_test in asan mode. PiperOrigin-RevId: 177506166 --- tensorflow/contrib/learn/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index 94920db574..26bbcab307 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -461,6 +461,7 @@ py_test( size = "medium", srcs = ["python/learn/estimators/state_saving_rnn_estimator_test.py"], srcs_version = "PY2AND3", + tags = ["noasan"], deps = [ ":learn", "//tensorflow/contrib/layers:layers_py", -- GitLab From 6ebb6d6465ddf2380430de7aa287676e9440df7e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 30 Nov 2017 14:14:29 -0800 Subject: [PATCH 1010/1801] TF server should not crash when -v=1 is enabled. These WriteTextProto() calls are purely for diagnostics (and are usually called within IF_VLOG_IS_ON(1) guards), but if they fail to write to a file, they'll take down the entire calling process. Which makes debugging difficult, and seems rather astonishing. PiperOrigin-RevId: 177506379 --- tensorflow/compiler/tf2xla/dump_graph.cc | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/tf2xla/dump_graph.cc b/tensorflow/compiler/tf2xla/dump_graph.cc index ddd912b873..03603ee9ba 100644 --- a/tensorflow/compiler/tf2xla/dump_graph.cc +++ b/tensorflow/compiler/tf2xla/dump_graph.cc @@ -63,7 +63,12 @@ string MakeUniquePath(string name) { string DumpGraphDefToFile(const string& name, GraphDef const& graph_def) { string path = MakeUniquePath(name); - TF_CHECK_OK(WriteTextProto(Env::Default(), path, graph_def)); + Status status = WriteTextProto(Env::Default(), path, graph_def); + if (!status.ok()) { + VLOG(1) << "Failed to dump GraphDef to file: " << path << " : " << status; + path.clear(); + path = "(unavailable)"; + } return path; } @@ -79,7 +84,13 @@ string DumpGraphToFile(const string& name, Graph const& graph, string DumpFunctionDefToFile(const string& name, FunctionDef const& fdef) { string path = MakeUniquePath(name); - TF_CHECK_OK(WriteTextProto(Env::Default(), path, fdef)); + Status status = WriteTextProto(Env::Default(), path, fdef); + if (!status.ok()) { + VLOG(1) << "Failed to dump FunctionDef to file: " << path << " : " + << status; + path.clear(); + path = "(unavailable)"; + } return path; } -- GitLab From 6bfc73a0b3c6810725a5eb0020470457cc5cc23e Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 30 Nov 2017 14:26:58 -0800 Subject: [PATCH 1011/1801] Extract out a MathUtil::GCD helper This fixes a TODO. PiperOrigin-RevId: 177508258 --- .../compiler/xla/service/cpu/ir_emitter.cc | 22 ++++---------- tensorflow/core/lib/core/arena.cc | 18 ++---------- tensorflow/core/lib/math/math_util.h | 17 +++++++++++ tensorflow/core/lib/math/math_util_test.cc | 29 +++++++++++++++++++ 4 files changed, 54 insertions(+), 32 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index f242e0acb8..bb75d3f49e 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -24,6 +24,7 @@ limitations under the License. #include #include +#include "tensorflow/core/lib/math/math_util.h" #include "tensorflow/core/platform/logging.h" // IWYU pragma: no_include "llvm/IR/Intrinsics.gen.inc" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -1651,19 +1652,6 @@ void IrEmitter::EmitShardedVectorStore( } } -namespace { -// TODO(sanjoy): This is duplicated in tensorflow/core/lib/core/arena.cc. -// Extract out a common implementation to tensorflow/core/lib/math/math_util.h -uint32 GCD(uint32 x, uint32 y) { - while (y != 0) { - uint32 r = x % y; - x = y; - y = r; - } - return x; -} -} // namespace - StatusOr IrEmitter::EmitVectorizedReduce( HloInstruction* reduce, HloInstruction* arg, HloInstruction* init_value, tensorflow::gtl::ArraySlice dimensions, HloComputation* function, @@ -1686,9 +1674,9 @@ StatusOr IrEmitter::EmitVectorizedReduce( std::find(dimensions.begin(), dimensions.end(), arg->shape().layout().minor_to_major(0)) != dimensions.end(); - unsigned element_alignment = - GCD(ShapeUtil::ByteSizeOfPrimitiveType(reduce->shape().element_type()), - MinimumAlignmentForPrimitiveType(reduce->shape().element_type())); + unsigned element_alignment = tensorflow::MathUtil::GCD( + ShapeUtil::ByteSizeOfPrimitiveType(reduce->shape().element_type()), + MinimumAlignmentForPrimitiveType(reduce->shape().element_type())); if (is_reduction_over_minor_dimension) { // TODO(sanjoy): Implement vectorized reduction over the minor dimension. @@ -2463,7 +2451,7 @@ void IrEmitter::EmitTransferElements(llvm::Value* target, llvm::Value* source, const llvm_ir::IrArray& source_array) { unsigned primitive_type_size = ShapeUtil::ByteSizeOfPrimitiveType(primitive_type); - unsigned element_alignment = GCD( + unsigned element_alignment = tensorflow::MathUtil::GCD( primitive_type_size, MinimumAlignmentForPrimitiveType(primitive_type)); llvm::Type* primitive_ptr_type = llvm::PointerType::getUnqual( llvm_ir::PrimitiveTypeToIrType(primitive_type, module_)); diff --git a/tensorflow/core/lib/core/arena.cc b/tensorflow/core/lib/core/arena.cc index 2a04f7bd39..55e481d0e6 100644 --- a/tensorflow/core/lib/core/arena.cc +++ b/tensorflow/core/lib/core/arena.cc @@ -28,6 +28,7 @@ limitations under the License. #include #include +#include "tensorflow/core/lib/math/math_util.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mem.h" @@ -113,24 +114,11 @@ void Arena::MakeNewBlock(const uint32 alignment) { CHECK(SatisfyAlignment(alignment)); } -// The following simple numeric routines also exist in util/math/mathutil.h -// but we don't want to depend on that library. - -// Euclid's algorithm for Greatest Common Denominator. -static uint32 GCD(uint32 x, uint32 y) { - while (y != 0) { - uint32 r = x % y; - x = y; - y = r; - } - return x; -} - static uint32 LeastCommonMultiple(uint32 a, uint32 b) { if (a > b) { - return (a / GCD(a, b)) * b; + return (a / MathUtil::GCD(a, b)) * b; } else if (a < b) { - return (b / GCD(b, a)) * a; + return (b / MathUtil::GCD(b, a)) * a; } else { return a; } diff --git a/tensorflow/core/lib/math/math_util.h b/tensorflow/core/lib/math/math_util.h index 6f279865e7..9e71598622 100644 --- a/tensorflow/core/lib/math/math_util.h +++ b/tensorflow/core/lib/math/math_util.h @@ -16,6 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_LIB_MATH_MATH_UTIL_H_ #define TENSORFLOW_LIB_MATH_MATH_UTIL_H_ +#include + #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" @@ -59,6 +61,9 @@ class MathUtil { template static IntegralType CeilOrFloorOfRatio(IntegralType numerator, IntegralType denominator); + + template + static IntegralType GCD(IntegralType x, IntegralType y); }; // ---- CeilOrFloorOfRatio ---- @@ -107,6 +112,18 @@ IntegralType MathUtil::CeilOrFloorOfRatio(IntegralType numerator, } } +template +IntegralType MathUtil::GCD(IntegralType a, IntegralType b) { + static_assert(std::is_unsigned::value, + "signed GCD not supported!"); + while (b != 0) { + IntegralType r = a % b; + a = b; + b = r; + } + return a; +} + } // namespace tensorflow #endif // TENSORFLOW_LIB_MATH_MATH_UTIL_H_ diff --git a/tensorflow/core/lib/math/math_util_test.cc b/tensorflow/core/lib/math/math_util_test.cc index eaf8c31a43..a96e5467c3 100644 --- a/tensorflow/core/lib/math/math_util_test.cc +++ b/tensorflow/core/lib/math/math_util_test.cc @@ -195,4 +195,33 @@ TEST(MathUtil, CeilOfRatio) { #endif } +struct GCDTestCase { + unsigned int x; + unsigned int y; + unsigned int gcd; +}; + +TEST(MathUtil, GCD) { + std::vector testcases({ + {10, 20, 10}, // + {27, 8, 1}, // + {4, 3, 1}, // + {6, 8, 2}, // + {5, 0, 5}, // + {5, 5, 5}, // + {0, 0, 0} // + }); + + for (const auto& tc : testcases) { + EXPECT_EQ(tc.gcd, MathUtil::GCD(tc.x, tc.y)); + EXPECT_EQ(tc.gcd, MathUtil::GCD(tc.y, tc.x)); + EXPECT_EQ(tc.gcd, MathUtil::GCD(tc.x, tc.y)); + EXPECT_EQ(tc.gcd, MathUtil::GCD(tc.y, tc.x)); + } + + const uint64 biggish_prime = 1666666667; + EXPECT_EQ(biggish_prime, + MathUtil::GCD(biggish_prime * 3, biggish_prime * 4)); +} + } // namespace tensorflow -- GitLab From ce4200eae990d7f5efdfb727939d38bf48001ba2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 30 Nov 2017 15:20:30 -0800 Subject: [PATCH 1012/1801] Fix profiler to track some missed persistent bytes. PiperOrigin-RevId: 177516249 --- tensorflow/core/profiler/g3doc/options.md | 11 +- .../core/profiler/internal/tfprof_node.cc | 28 +++++ .../core/profiler/internal/tfprof_node.h | 10 +- .../profiler/internal/tfprof_show_test.cc | 37 +++--- .../profiler/internal/tfprof_stats_test.cc | 105 ++++++++++-------- tensorflow/core/profiler/tfprof_log.proto | 5 +- .../python/profiler/model_analyzer_test.py | 40 ++++++- 7 files changed, 157 insertions(+), 79 deletions(-) diff --git a/tensorflow/core/profiler/g3doc/options.md b/tensorflow/core/profiler/g3doc/options.md index 4c73e372e3..dd12f76d6f 100644 --- a/tensorflow/core/profiler/g3doc/options.md +++ b/tensorflow/core/profiler/g3doc/options.md @@ -60,11 +60,14 @@ Currently, profiler only tracks the allocation of memory. As a result, the accumulated memory request is uaually larger than the peak memory of the overall model. -bytes: The memory allocations requested by the operation. -peak_bytes: The peak requested memory (not de-allocated) by the operation. -residual_bytes: The memory requested by the operation and not de-allocated +It's recommended to generate timeline to see the allocator memory usage over +time. + +`bytes`: The memory allocations requested by the operation. +`peak_bytes`: The peak requested memory (not de-allocated) by the operation. +`residual_bytes`: The memory requested by the operation and not de-allocated when Compute finishes. -output_bytes: The memory output by the operation. It's not necessarily requested +`output_bytes`: The memory output by the operation. It's not necessarily requested by the current operation. For example, it can be a tensor forwarded from input to output, with in-place mutation. diff --git a/tensorflow/core/profiler/internal/tfprof_node.cc b/tensorflow/core/profiler/internal/tfprof_node.cc index 671b65d708..5cd1050bcc 100644 --- a/tensorflow/core/profiler/internal/tfprof_node.cc +++ b/tensorflow/core/profiler/internal/tfprof_node.cc @@ -139,6 +139,25 @@ void ExecStep::AddMemoryStats(const string& dev, exec_.accelerator_persistent_bytes() + step_stat.memory_stats().device_persistent_memory_size()); } + + // TODO(xpan): Make this more accurate: + // High level: Memory tracking is suspicous and requires large scale + // clean up. + // Investigte the memory usage difference between CPU/GPU with OpViewTest. + // + // 1. OpKernelConstruction::allocate_xxx is not traced. Below, we only + // discuss OpKernelContext-related allocations. + // 2. allocate_output calls allocate_tensor, which is properly tracked in + // 'NodeExecStats.memory'. + // 3. allocate_temp is only tracked through record_xxx_temp. It appears + // in 'NodeExecStats.memory_stats'. + // 4. allocate_persistent calls allocate_tensor, which is properly tracked + // in 'NodeExecStats.memory'. However, there is no way to count it as + // persistent now. + // 5. record_xxx_persistent is called when allocate_persistent + // is not used and hence tracks some complementary bytes. It appears in + // 'NodeExecStats.memory_stats'. It's suspicious. But we should + // use it now since it covers constant op. int64 residual_bytes = 0; int64 requested_bytes = 0; int64 peak_bytes = 0; @@ -147,6 +166,15 @@ void ExecStep::AddMemoryStats(const string& dev, requested_bytes += mem.total_bytes(); peak_bytes += mem.peak_bytes(); } + residual_bytes += + exec_.host_persistent_bytes() + exec_.accelerator_persistent_bytes(); + requested_bytes += exec_.host_persistent_bytes() + + exec_.accelerator_persistent_bytes() + + exec_.host_temp_bytes() + exec_.accelerator_temp_bytes(); + peak_bytes += exec_.host_persistent_bytes() + + exec_.accelerator_persistent_bytes() + exec_.host_temp_bytes() + + exec_.accelerator_temp_bytes(); + exec_.set_requested_bytes(requested_bytes); exec_.set_residual_bytes(residual_bytes); exec_.set_peak_bytes(peak_bytes); diff --git a/tensorflow/core/profiler/internal/tfprof_node.h b/tensorflow/core/profiler/internal/tfprof_node.h index e2d0563a07..77c14cb792 100644 --- a/tensorflow/core/profiler/internal/tfprof_node.h +++ b/tensorflow/core/profiler/internal/tfprof_node.h @@ -593,17 +593,11 @@ class TFGraphNode { int64 accelerator_persistent_bytes() const { int64 persistent_bytes = 0; for (const auto& exec : execs_) { - persistent_bytes += exec.second.accelerator_persistent_bytes(); + persistent_bytes = std::max(persistent_bytes, + exec.second.accelerator_persistent_bytes()); } return persistent_bytes; } - int64 host_persistent_bytes(int64 step) const { - auto exec = execs_.find(step); - if (exec == execs_.end()) { - return 0; - } - return exec->second.host_persistent_bytes(); - } const std::map>& output_memory( int64 step) const { auto exec = execs_.find(step); diff --git a/tensorflow/core/profiler/internal/tfprof_show_test.cc b/tensorflow/core/profiler/internal/tfprof_show_test.cc index 1f19f8c322..98773ae19e 100644 --- a/tensorflow/core/profiler/internal/tfprof_show_test.cc +++ b/tensorflow/core/profiler/internal/tfprof_show_test.cc @@ -105,12 +105,13 @@ TEST_F(TFProfShowTest, DumpScopeMode) { "node name | # parameters | # float_ops | requested bytes | peak bytes | " "residual bytes | output bytes | total execution time | accelerator " "execution time | cpu execution time\n_TFProfRoot (--/451 params, --/0 " - "flops, --/0B, --/0B, --/0B, --/2.56KB, --/13us, --/0us, --/13us)\n DW " - "(3x3x3x6, 162/162 params, 0/0 flops, 0B/0B, 0B/0B, 0B/0B, " - "1.28KB/1.28KB, 2us/2us, 0us/0us, 2us/2us)\n DW2 (2x2x6x12, 288/288 " - "params, 0/0 flops, 0B/0B, 0B/0B, 0B/0B, 1.28KB/1.28KB, 11us/11us, " - "0us/0us, 11us/11us)\n ScalarW (1, 1/1 params, 0/0 flops, 0B/0B, 0B/0B, " - "0B/0B, 0B/0B, 0us/0us, 0us/0us, 0us/0us)\n", + "flops, --/2.56KB, --/2.56KB, --/2.56KB, --/2.56KB, --/13us, --/0us, " + "--/13us)\n DW (3x3x3x6, 162/162 params, 0/0 flops, 1.28KB/1.28KB, " + "1.28KB/1.28KB, 1.28KB/1.28KB, 1.28KB/1.28KB, 2us/2us, 0us/0us, " + "2us/2us)\n DW2 (2x2x6x12, 288/288 params, 0/0 flops, 1.28KB/1.28KB, " + "1.28KB/1.28KB, 1.28KB/1.28KB, 1.28KB/1.28KB, 11us/11us, 0us/0us, " + "11us/11us)\n ScalarW (1, 1/1 params, 0/0 flops, 0B/0B, 0B/0B, 0B/0B, " + "0B/0B, 0us/0us, 0us/0us, 0us/0us)\n", dump_str); EXPECT_EQ(dump_str, TestToFromProto("scope", opts)); @@ -178,22 +179,22 @@ TEST_F(TFProfShowTest, DumpOpMode) { EXPECT_EQ( "nodename|requestedbytes|totalexecutiontime|acceleratorexecutiontime|" "cpuexecutiontime|#parameters|#float_ops|opoccurrence(run|defined)|" - "inputshapes\nVariableV20B(0.00%,0.00%),13us(100.00%,0.26%),0us(100.00%," - "0.00%),13us(100.00%,0.29%),451params(100.00%,100.00%),0float_ops(100.00%" - ",0.00%),2|3\n\ninput_type:\t(run*2|defined*3)\texec_time:13us\n\nAdd0B(" - "0.00%,0.00%),0us(99.74%,0.00%),0us(100.00%,0.00%),0us(99.71%,0.00%)," - "0params(0.00%,0.00%),0float_ops(100.00%,0.00%),0|3\n\ninput_type:0:1," - "\t1:1\t(run*0|defined*1)\texec_time:0us\ninput_type:0:2x2x6x12,\t1:1\t(" - "run*0|defined*1)\texec_time:0us\ninput_type:0:3x3x3x6,\t1:1\t(run*0|" - "defined*1)\texec_time:0us\n\nAssign0B(0.00%,0.00%),0us(99.74%,0.00%)," - "0us(100.00%,0.00%),0us(99.71%,0.00%),0params(0.00%,0.00%),0float_ops(" - "100.00%,0.00%),0|3\n\ninput_type:0:1,\t1:1\t(run*0|defined*1)\texec_" + "inputshapes\nVariableV22.56KB(100.00%,8.40%),13us(100.00%,0.26%),0us(" + "100.00%,0.00%),13us(100.00%,0.29%),451params(100.00%,100.00%),0float_" + "ops(100.00%,0.00%),2|3\n\ninput_type:\t(run*2|defined*3)\texec_time:" + "13us\n\nAdd0B(0.00%,0.00%),0us(99.74%,0.00%),0us(100.00%,0.00%),0us(99." + "71%,0.00%),0params(0.00%,0.00%),0float_ops(100.00%,0.00%),0|3\n\ninput_" + "type:0:1,\t1:1\t(run*0|defined*1)\texec_time:0us\ninput_type:0:2x2x6x12," + "\t1:1\t(run*0|defined*1)\texec_time:0us\ninput_type:0:3x3x3x6,\t1:1\t(" + "run*0|defined*1)\texec_time:0us\n\nAssign0B(0.00%,0.00%),0us(99.74%,0." + "00%),0us(100.00%,0.00%),0us(99.71%,0.00%),0params(0.00%,0.00%),0float_" + "ops(100.00%,0.00%),0|3\n\ninput_type:0:1,\t1:1\t(run*0|defined*1)\texec_" "time:0us\ninput_type:0:2x2x6x12,\t1:2x2x6x12\t(run*0|defined*1)\texec_" "time:0us\ninput_type:0:3x3x3x6,\t1:3x3x3x6\t(run*0|defined*1)\texec_" "time:0us\n\nConst0B(0.00%,0.00%),2us(99.74%,0.04%),0us(100.00%,0.00%)," "2us(99.71%,0.04%),0params(0.00%,0.00%),0float_ops(100.00%,0.00%),1|" - "10\n\ninput_type:\t(run*1|defined*10)\texec_time:2us\n\nConv2D14.59KB(" - "100.00%,100.00%),4.89ms(99.70%,98.87%),404us(100.00%,100.00%),4.49ms(99." + "10\n\ninput_type:\t(run*1|defined*10)\texec_time:2us\n\nConv2D27.90KB(" + "91.60%,91.60%),4.89ms(99.70%,98.87%),404us(100.00%,100.00%),4.49ms(99." "67%,98.77%),0params(0.00%,0.00%),10.44kfloat_ops(100.00%,100.00%),2|" "2\n\ninput_type:0:2x3x3x6,\t1:2x2x6x12\t(run*1|defined*1)\texec_time:" "597us\ninput_type:0:2x6x6x3,\t1:3x3x3x6\t(run*1|defined*1)\texec_time:4." diff --git a/tensorflow/core/profiler/internal/tfprof_stats_test.cc b/tensorflow/core/profiler/internal/tfprof_stats_test.cc index 2f2101d76b..b86a83cb1b 100644 --- a/tensorflow/core/profiler/internal/tfprof_stats_test.cc +++ b/tensorflow/core/profiler/internal/tfprof_stats_test.cc @@ -89,21 +89,27 @@ TEST_F(TFProfStatsTest, CustomOpType) { GraphNodeProto expected; CHECK(protobuf::TextFormat::ParseFromString( - "name: \"_TFProfRoot\"\ntotal_exec_micros: 13\ntotal_parameters: " - "451\nchildren {\n name: \"DW\"\n exec_micros: 2\n parameters: 162\n " - "total_exec_micros: 2\n total_parameters: 162\n devices: " + "name: \"_TFProfRoot\"\ntotal_exec_micros: 13\ntotal_requested_bytes: " + "2560\ntotal_parameters: 451\nchildren {\n name: \"DW\"\n exec_micros: " + "2\n requested_bytes: 1280\n parameters: 162\n total_exec_micros: 2\n " + " total_requested_bytes: 1280\n total_parameters: 162\n devices: " "\"/job:localhost/replica:0/task:0/gpu:0\"\n cpu_exec_micros: 2\n " "total_cpu_exec_micros: 2\n run_count: 1\n total_run_count: 1\n " - "total_definition_count: 1\n output_bytes: 1280\n total_output_bytes: " - "1280\n}\nchildren {\n name: \"DW2\"\n exec_micros: 11\n parameters: " - "288\n total_exec_micros: 11\n total_parameters: 288\n devices: " + "total_definition_count: 1\n peak_bytes: 1280\n residual_bytes: 1280\n " + " output_bytes: 1280\n total_peak_bytes: 1280\n total_residual_bytes: " + "1280\n total_output_bytes: 1280\n}\nchildren {\n name: \"DW2\"\n " + "exec_micros: 11\n requested_bytes: 1280\n parameters: 288\n " + "total_exec_micros: 11\n total_requested_bytes: 1280\n " + "total_parameters: 288\n devices: " "\"/job:localhost/replica:0/task:0/gpu:0\"\n cpu_exec_micros: 11\n " "total_cpu_exec_micros: 11\n run_count: 1\n total_run_count: 1\n " - "total_definition_count: 1\n output_bytes: 1280\n total_output_bytes: " - "1280\n}\nchildren {\n name: \"ScalarW\"\n parameters: 1\n " - "total_parameters: 1\n total_definition_count: " + "total_definition_count: 1\n peak_bytes: 1280\n residual_bytes: 1280\n " + " output_bytes: 1280\n total_peak_bytes: 1280\n total_residual_bytes: " + "1280\n total_output_bytes: 1280\n}\nchildren {\n name: \"ScalarW\"\n " + "parameters: 1\n total_parameters: 1\n total_definition_count: " "1\n}\ntotal_cpu_exec_micros: 13\ntotal_run_count: " - "2\ntotal_definition_count: 3\ntotal_output_bytes: 2560\n", + "2\ntotal_definition_count: 3\ntotal_peak_bytes: " + "2560\ntotal_residual_bytes: 2560\ntotal_output_bytes: 2560\n", &expected)); EXPECT_EQ(expected.DebugString(), root.DebugString()); @@ -119,21 +125,27 @@ TEST_F(TFProfStatsTest, CheckPointOpType) { GraphNodeProto expected; CHECK(protobuf::TextFormat::ParseFromString( - "name: \"_TFProfRoot\"\ntotal_exec_micros: 13\ntotal_parameters: " - "451\nchildren {\n name: \"DW\"\n exec_micros: 2\n parameters: 162\n " - "total_exec_micros: 2\n total_parameters: 162\n devices: " + "name: \"_TFProfRoot\"\ntotal_exec_micros: 13\ntotal_requested_bytes: " + "2560\ntotal_parameters: 451\nchildren {\n name: \"DW\"\n exec_micros: " + "2\n requested_bytes: 1280\n parameters: 162\n total_exec_micros: 2\n " + " total_requested_bytes: 1280\n total_parameters: 162\n devices: " "\"/job:localhost/replica:0/task:0/gpu:0\"\n cpu_exec_micros: 2\n " "total_cpu_exec_micros: 2\n run_count: 1\n total_run_count: 1\n " - "total_definition_count: 1\n output_bytes: 1280\n total_output_bytes: " - "1280\n}\nchildren {\n name: \"DW2\"\n exec_micros: 11\n parameters: " - "288\n total_exec_micros: 11\n total_parameters: 288\n devices: " + "total_definition_count: 1\n peak_bytes: 1280\n residual_bytes: 1280\n " + " output_bytes: 1280\n total_peak_bytes: 1280\n total_residual_bytes: " + "1280\n total_output_bytes: 1280\n}\nchildren {\n name: \"DW2\"\n " + "exec_micros: 11\n requested_bytes: 1280\n parameters: 288\n " + "total_exec_micros: 11\n total_requested_bytes: 1280\n " + "total_parameters: 288\n devices: " "\"/job:localhost/replica:0/task:0/gpu:0\"\n cpu_exec_micros: 11\n " "total_cpu_exec_micros: 11\n run_count: 1\n total_run_count: 1\n " - "total_definition_count: 1\n output_bytes: 1280\n total_output_bytes: " - "1280\n}\nchildren {\n name: \"ScalarW\"\n parameters: 1\n " - "total_parameters: 1\n total_definition_count: " + "total_definition_count: 1\n peak_bytes: 1280\n residual_bytes: 1280\n " + " output_bytes: 1280\n total_peak_bytes: 1280\n total_residual_bytes: " + "1280\n total_output_bytes: 1280\n}\nchildren {\n name: \"ScalarW\"\n " + "parameters: 1\n total_parameters: 1\n total_definition_count: " "1\n}\ntotal_cpu_exec_micros: 13\ntotal_run_count: " - "2\ntotal_definition_count: 3\ntotal_output_bytes: 2560\n", + "2\ntotal_definition_count: 3\ntotal_peak_bytes: " + "2560\ntotal_residual_bytes: 2560\ntotal_output_bytes: 2560\n", &expected)); EXPECT_EQ(expected.DebugString(), root.DebugString()); @@ -150,7 +162,7 @@ TEST_F(TFProfStatsTest, TestGraph) { GraphNodeProto expected; CHECK(protobuf::TextFormat::ParseFromString( "name: \"_TFProfRoot\"\ntotal_exec_micros: 4945\ntotal_requested_bytes: " - "14592\ntotal_parameters: 451\nchildren {\n name: " + "30464\ntotal_parameters: 451\nchildren {\n name: " "\"DW/Initializer/random_normal/mul\"\n children {\n name: " "\"DW/Initializer/random_normal/RandomStandardNormal\"\n children {\n " " name: \"DW/Initializer/random_normal/shape\"\n " @@ -166,7 +178,7 @@ TEST_F(TFProfStatsTest, TestGraph) { "4\n}\ntotal_float_ops: 10440\ntotal_accelerator_exec_micros: " "404\ntotal_cpu_exec_micros: 4541\ntotal_run_count: " "6\ntotal_definition_count: 32\ntotal_peak_bytes: " - "9984\ntotal_residual_bytes: 1280\ntotal_output_bytes: 4864\n", + "25856\ntotal_residual_bytes: 3840\ntotal_output_bytes: 4864\n", &expected)); EXPECT_EQ(expected.DebugString(), root.DebugString()); @@ -181,9 +193,9 @@ TEST_F(TFProfStatsTest, TestFloatOps) { GraphNodeProto expected; CHECK(protobuf::TextFormat::ParseFromString( "name: \"_TFProfRoot\"\ntotal_exec_micros: 4945\ntotal_requested_bytes: " - "14592\ntotal_parameters: 451\nchildren {\n name: \"Conv2D\"\n " - "exec_micros: 4292\n requested_bytes: 9472\n total_exec_micros: 4292\n " - " total_requested_bytes: 9472\n devices: " + "30464\ntotal_parameters: 451\nchildren {\n name: \"Conv2D\"\n " + "exec_micros: 4292\n requested_bytes: 18176\n total_exec_micros: " + "4292\n total_requested_bytes: 18176\n devices: " "\"/job:localhost/replica:0/task:0/gpu:0\"\n float_ops: 5832\n " "total_float_ops: 5832\n input_shapes {\n key: 0\n value {\n " "dim {\n size: 2\n }\n dim {\n size: 6\n " @@ -194,11 +206,11 @@ TEST_F(TFProfStatsTest, TestFloatOps) { "6\n }\n }\n }\n accelerator_exec_micros: 226\n " "cpu_exec_micros: 4066\n total_accelerator_exec_micros: 226\n " "total_cpu_exec_micros: 4066\n run_count: 1\n total_run_count: 1\n " - "total_definition_count: 1\n peak_bytes: 5888\n residual_bytes: 768\n " - "output_bytes: 768\n total_peak_bytes: 5888\n total_residual_bytes: " + "total_definition_count: 1\n peak_bytes: 14592\n residual_bytes: 768\n " + " output_bytes: 768\n total_peak_bytes: 14592\n total_residual_bytes: " "768\n total_output_bytes: 768\n}\nchildren {\n name: \"Conv2D_1\"\n " - "exec_micros: 597\n requested_bytes: 5120\n total_exec_micros: 597\n " - "total_requested_bytes: 5120\n devices: " + "exec_micros: 597\n requested_bytes: 9728\n total_exec_micros: 597\n " + "total_requested_bytes: 9728\n devices: " "\"/job:localhost/replica:0/task:0/gpu:0\"\n float_ops: 4608\n " "total_float_ops: 4608\n input_shapes {\n key: 0\n value {\n " "dim {\n size: 2\n }\n dim {\n size: 3\n " @@ -209,12 +221,12 @@ TEST_F(TFProfStatsTest, TestFloatOps) { "12\n }\n }\n }\n accelerator_exec_micros: 178\n " "cpu_exec_micros: 419\n total_accelerator_exec_micros: 178\n " "total_cpu_exec_micros: 419\n run_count: 1\n total_run_count: 1\n " - "total_definition_count: 1\n peak_bytes: 4096\n residual_bytes: 512\n " - "output_bytes: 512\n total_peak_bytes: 4096\n total_residual_bytes: " + "total_definition_count: 1\n peak_bytes: 8704\n residual_bytes: 512\n " + "output_bytes: 512\n total_peak_bytes: 8704\n total_residual_bytes: " "512\n total_output_bytes: 512\n}\ntotal_float_ops: " "10440\ntotal_accelerator_exec_micros: 404\ntotal_cpu_exec_micros: " "4541\ntotal_run_count: 6\ntotal_definition_count: 35\ntotal_peak_bytes: " - "9984\ntotal_residual_bytes: 1280\ntotal_output_bytes: 4864\n", + "25856\ntotal_residual_bytes: 3840\ntotal_output_bytes: 4864\n", &expected)); EXPECT_EQ(expected.DebugString(), root.DebugString()); @@ -231,9 +243,9 @@ TEST_F(TFProfStatsTest, TestAccountShownNameOnly) { GraphNodeProto expected; CHECK(protobuf::TextFormat::ParseFromString( "name: \"_TFProfRoot\"\ntotal_exec_micros: 597\ntotal_requested_bytes: " - "5120\nchildren {\n name: \"Conv2D_1\"\n exec_micros: 597\n " - "requested_bytes: 5120\n total_exec_micros: 597\n " - "total_requested_bytes: 5120\n devices: " + "9728\nchildren {\n name: \"Conv2D_1\"\n exec_micros: 597\n " + "requested_bytes: 9728\n total_exec_micros: 597\n " + "total_requested_bytes: 9728\n devices: " "\"/job:localhost/replica:0/task:0/gpu:0\"\n float_ops: 4608\n " "total_float_ops: 4608\n input_shapes {\n key: 0\n value {\n " "dim {\n size: 2\n }\n dim {\n size: 3\n " @@ -244,12 +256,12 @@ TEST_F(TFProfStatsTest, TestAccountShownNameOnly) { "12\n }\n }\n }\n accelerator_exec_micros: 178\n " "cpu_exec_micros: 419\n total_accelerator_exec_micros: 178\n " "total_cpu_exec_micros: 419\n run_count: 1\n total_run_count: 1\n " - "total_definition_count: 1\n peak_bytes: 4096\n residual_bytes: 512\n " - "output_bytes: 512\n total_peak_bytes: 4096\n total_residual_bytes: " + "total_definition_count: 1\n peak_bytes: 8704\n residual_bytes: 512\n " + "output_bytes: 512\n total_peak_bytes: 8704\n total_residual_bytes: " "512\n total_output_bytes: 512\n}\ntotal_float_ops: " "4608\ntotal_accelerator_exec_micros: 178\ntotal_cpu_exec_micros: " "419\ntotal_run_count: 1\ntotal_definition_count: 2\ntotal_peak_bytes: " - "4096\ntotal_residual_bytes: 512\ntotal_output_bytes: 512\n", + "8704\ntotal_residual_bytes: 512\ntotal_output_bytes: 512\n", &expected)); EXPECT_EQ(expected.DebugString(), root.DebugString()); @@ -265,8 +277,9 @@ TEST_F(TFProfStatsTest, TestShowTensorValue) { GraphNodeProto expected; CHECK(protobuf::TextFormat::ParseFromString( "name: \"_TFProfRoot\"\ntotal_exec_micros: 4945\ntotal_requested_bytes: " - "14592\ntotal_parameters: 451\nchildren {\n name: \"DW\"\n " - "exec_micros: 2\n parameters: 162\n total_exec_micros: 2\n " + "30464\ntotal_parameters: 451\nchildren {\n name: \"DW\"\n " + "exec_micros: 2\n requested_bytes: 1280\n parameters: 162\n " + "total_exec_micros: 2\n total_requested_bytes: 1280\n " "total_parameters: 162\n devices: " "\"/job:localhost/replica:0/task:0/gpu:0\"\n tensor_value {\n dtype: " "DT_FLOAT\n value_double: -0.000534315\n value_double: " @@ -351,11 +364,13 @@ TEST_F(TFProfStatsTest, TestShowTensorValue) { "value_double: 0.000374641\n value_double: -0.00149603\n " "value_double: -0.000317367\n value_double: -0.000417829\n }\n " "cpu_exec_micros: 2\n total_cpu_exec_micros: 2\n run_count: 1\n " - "total_run_count: 1\n total_definition_count: 10\n output_bytes: " - "1280\n total_output_bytes: 1280\n}\ntotal_float_ops: " - "10440\ntotal_accelerator_exec_micros: 404\ntotal_cpu_exec_micros: " - "4541\ntotal_run_count: 6\ntotal_definition_count: 35\ntotal_peak_bytes: " - "9984\ntotal_residual_bytes: 1280\ntotal_output_bytes: 4864\n", + "total_run_count: 1\n total_definition_count: 10\n peak_bytes: 1280\n " + "residual_bytes: 1280\n output_bytes: 1280\n total_peak_bytes: 1280\n " + "total_residual_bytes: 1280\n total_output_bytes: " + "1280\n}\ntotal_float_ops: 10440\ntotal_accelerator_exec_micros: " + "404\ntotal_cpu_exec_micros: 4541\ntotal_run_count: " + "6\ntotal_definition_count: 35\ntotal_peak_bytes: " + "25856\ntotal_residual_bytes: 3840\ntotal_output_bytes: 4864\n", &expected)); EXPECT_EQ(expected.DebugString(), root.DebugString()); } diff --git a/tensorflow/core/profiler/tfprof_log.proto b/tensorflow/core/profiler/tfprof_log.proto index f92301133a..b49bdf64ac 100644 --- a/tensorflow/core/profiler/tfprof_log.proto +++ b/tensorflow/core/profiler/tfprof_log.proto @@ -124,9 +124,10 @@ message ExecProfile { int64 residual_bytes = 9; // Total bytes output by the op (not necessarily requested by the op). int64 output_bytes = 10; - // Total temporary bytes allocated and released by the op. + // NOTE: Please don't depend on the following 4 fields yet. Due to + // TensorFlow internal tracing issues, the numbers can be quite wrong. + // TODO(xpan): Fix the TensorFlow internal tracing. int64 host_temp_bytes = 11; - // Total persistent bytes (e.g. variable) allocated by the op. int64 host_persistent_bytes = 12; int64 accelerator_temp_bytes = 13; int64 accelerator_persistent_bytes = 14; diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py index 698f8906d4..c39d0fa5b1 100644 --- a/tensorflow/python/profiler/model_analyzer_test.py +++ b/tensorflow/python/profiler/model_analyzer_test.py @@ -23,12 +23,15 @@ import os import random import re +import numpy as np + from tensorflow.core.profiler import profile_pb2 from tensorflow.core.protobuf import config_pb2 from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.client import session from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import variables from tensorflow.python.platform import gfile @@ -346,8 +349,8 @@ class PrintModelAnalysisTest(test.TestCase): with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long self.assertEqual( - 'nodename|requestedbytes|peakbytes|residualbytes|outputbytes|totalexecutiontime|acceleratorexecutiontime|cpuexecutiontime|#parameters|opoccurrence(run|defined)|inputshapes\nConst0B(0', - f.read().replace('\t', '').replace(' ', '')[0:180]) + 'nodename|requestedbytes|peakbytes|residualbytes|outputbytes|totalexecutiontime|acceleratorexecutiontime|cpuexecutiontime|#parameters|opoccurrence(run|defined)|inputshapes', + f.read().replace('\t', '').replace(' ', '')[0:170]) # pylint: enable=line-too-long total_children = 0 @@ -694,6 +697,39 @@ class PrintModelAnalysisTest(test.TestCase): exception_str) self.assertTrue(mat is None) + def testTrackPersistentBytes(self): + ops.reset_default_graph() + a = array_ops.constant(np.ones((100, 100))) + b = array_ops.constant(np.ones((100, 100))) + c = a * b + + with session.Session() as sess: + run_options = config_pb2.RunOptions( + trace_level=config_pb2.RunOptions.FULL_TRACE) + run_metadata = config_pb2.RunMetadata() + sess.run(c, options=run_options, run_metadata=run_metadata) + + options = option_builder.ProfileOptionBuilder.time_and_memory() + options['min_bytes'] = 0 + options['select'] = ('bytes', 'peak_bytes', 'output_bytes', + 'residual_bytes') + ret = model_analyzer.profile( + sess.graph, run_meta=run_metadata, cmd='scope', options=options) + + run_metadata = config_pb2.RunMetadata() + sess.run(c, options=run_options, run_metadata=run_metadata) + ret2 = model_analyzer.profile( + sess.graph, run_meta=run_metadata, cmd='scope', options=options) + + n = lib.SearchTFProfNode(ret, 'mul') + n2 = lib.SearchTFProfNode(ret2, 'mul') + self.assertGreater(n.peak_bytes, 0) + self.assertGreater(n.output_bytes, 0) + self.assertGreater(n.residual_bytes, 0) + self.assertEqual(n.peak_bytes, n2.peak_bytes) + self.assertEqual(n.output_bytes, n2.output_bytes) + self.assertEqual(n.residual_bytes, n2.residual_bytes) + if __name__ == '__main__': test.main() -- GitLab From f69915dc152c5516e6bc88b93515cbb02a1fbfc5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 30 Nov 2017 15:33:39 -0800 Subject: [PATCH 1013/1801] [XLA] Sanitize hlo names to match regexp "[a-zA-Z_][a-zA-Z0-9_.-]*". PiperOrigin-RevId: 177518046 --- tensorflow/compiler/xla/service/hlo_module.cc | 7 ++-- .../compiler/xla/service/name_uniquer.cc | 32 +++++++++++++++++++ .../compiler/xla/service/name_uniquer.h | 13 ++++++-- .../compiler/xla/service/name_uniquer_test.cc | 26 ++++++++++++--- 4 files changed, 68 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index faaf73ea1c..6fe2134466 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -35,14 +35,15 @@ namespace xla { HloModule::HloModule(const string& name, const VersionedComputationHandle& entry_computation_handle, const HloModuleConfig& config) - : name_(name), + : name_(NameUniquer::GetSanitizedName(name)), config_(config), has_entry_computation_handle_(true), entry_computation_handle_(entry_computation_handle) {} -HloModule::HloModule(const string& name) : name_(name) {} +HloModule::HloModule(const string& name) + : name_(NameUniquer::GetSanitizedName(name)) {} HloModule::HloModule(const string& name, const HloModuleConfig& config) - : name_(name), config_(config) {} + : name_(NameUniquer::GetSanitizedName(name)), config_(config) {} HloComputation* HloModule::AddComputationInternal( std::unique_ptr computation, bool is_entry, diff --git a/tensorflow/compiler/xla/service/name_uniquer.cc b/tensorflow/compiler/xla/service/name_uniquer.cc index a0d08c288d..7d8c05fffa 100644 --- a/tensorflow/compiler/xla/service/name_uniquer.cc +++ b/tensorflow/compiler/xla/service/name_uniquer.cc @@ -17,12 +17,44 @@ limitations under the License. #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" namespace xla { +namespace { + +bool IsAllowed(char character) { + auto c = static_cast(character); + return (isalnum(c) != 0) || c == '_' || c == '.' || c == '-'; +} + +} // namespace + +NameUniquer::NameUniquer(const string& separator) { + CHECK(std::all_of(separator.begin(), separator.end(), IsAllowed)) + << "separator should comprises allowed characters only"; + separator_ = separator; +} + +/*static*/ string NameUniquer::GetSanitizedName(const string& name) { + string result = name; + CHECK(!result.empty()) << "name should not be empty"; + char c = static_cast(result[0]); + if (!isalpha(c) && c != '_') { + result[0] = '_'; + } + for (int i = 1; i < result.length(); i++) { + if (!IsAllowed(result[i])) { + result[i] = '_'; + } + } + return result; +} + string NameUniquer::GetUniqueName(tensorflow::StringPiece prefix) { string root = prefix.empty() ? "name" : prefix.ToString(); + root = GetSanitizedName(root); // Strip away numeric suffix (if any). Only recognize separator if it is in // the middle of the name. diff --git a/tensorflow/compiler/xla/service/name_uniquer.h b/tensorflow/compiler/xla/service/name_uniquer.h index ed379b5225..4139c2700b 100644 --- a/tensorflow/compiler/xla/service/name_uniquer.h +++ b/tensorflow/compiler/xla/service/name_uniquer.h @@ -28,14 +28,21 @@ namespace xla { // Simple stateful class that helps generate "unique" names. To use it, simply // call GetUniqueName as many times as needed. The names returned by // GetUniqueName are guaranteed to be distinct for this instance of the class. +// Note that the names will be sanitized to match regexp +// "[a-zA-Z_][a-zA-Z0-9_.-]*". class NameUniquer { public: - explicit NameUniquer(const string& separator = "__") - : separator_(separator) {} + // The separator must contain allowed characters only: "[a-zA-Z0-9_.-]". + explicit NameUniquer(const string& separator = "__"); - // Get a unique name in a string, with an optional prefix for convenience. + // Get a sanitized unique name in a string, with an optional prefix for + // convenience. string GetUniqueName(tensorflow::StringPiece prefix = ""); + // Sanitizes and returns the name. Unallowed characters will be replaced with + // '_'. The result will match the regexp "[a-zA-Z_][a-zA-Z0-9_.-]*". + static string GetSanitizedName(const string& name); + private: // The string to use to separate the prefix of the name from the uniquing // integer value. diff --git a/tensorflow/compiler/xla/service/name_uniquer_test.cc b/tensorflow/compiler/xla/service/name_uniquer_test.cc index 9f0747a6e2..4258cf1687 100644 --- a/tensorflow/compiler/xla/service/name_uniquer_test.cc +++ b/tensorflow/compiler/xla/service/name_uniquer_test.cc @@ -60,12 +60,30 @@ TEST_F(NameUniquerTest, NumericSuffixes) { EXPECT_EQ("bar", uniquer.GetUniqueName("bar.-1000")); EXPECT_EQ("bar.1", uniquer.GetUniqueName("bar.-2000")); EXPECT_EQ("bar.2", uniquer.GetUniqueName("bar.1")); +} + +TEST_F(NameUniquerTest, Sanitize) { + NameUniquer uniquer("_"); + + EXPECT_EQ("foo", uniquer.GetUniqueName("foo")); + EXPECT_EQ("foo_1", uniquer.GetUniqueName("foo")); + EXPECT_EQ("foo.54", uniquer.GetUniqueName("foo.54")); + EXPECT_EQ("foo_54", uniquer.GetUniqueName("foo_54")); + EXPECT_EQ("foo_54.1", uniquer.GetUniqueName("foo_54.1")); + EXPECT_EQ("foo_55", uniquer.GetUniqueName("foo")); + + // Invalid characters will be replaced with '_'. + EXPECT_EQ("bar", uniquer.GetUniqueName("bar<-1000")); + EXPECT_EQ("bar_1", uniquer.GetUniqueName("bar<-2000")); + EXPECT_EQ("bar_2", uniquer.GetUniqueName("bar_1")); // Separator is only recognized in the middle of the prefix. - EXPECT_EQ(".10", uniquer.GetUniqueName(".10")); - EXPECT_EQ(".10.1", uniquer.GetUniqueName(".10")); - EXPECT_EQ("foobar.", uniquer.GetUniqueName("foobar.")); - EXPECT_EQ("foobar..1", uniquer.GetUniqueName("foobar.")); + EXPECT_EQ("_10", uniquer.GetUniqueName( + ".10")); // the leading '.' is replaced with '_'. + EXPECT_EQ("_10_1", uniquer.GetUniqueName(".10")); + EXPECT_EQ("_10_2", uniquer.GetUniqueName("_10")); + EXPECT_EQ("foobar_", uniquer.GetUniqueName("foobar_")); + EXPECT_EQ("foobar__1", uniquer.GetUniqueName("foobar_")); } } // namespace -- GitLab From 0472116d163eeb77d51cabdc5fc67be917048870 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 30 Nov 2017 15:34:36 -0800 Subject: [PATCH 1014/1801] [TF:XLA] Make tf_cnn_benchmarks run on CPU with XLA. Adds _cpu_jit to tf_cnn_benchmarks_xla BUILD rule and fixes an issue in XLA bridge triggered by XLA CPU compilation of whole graphs. In particular, modifies mark_for_compilation_pass.cc to skip _Retval nodes when looking for compilation candidates in the top level function. _Retval nodes are introduced in the input subgraph as a replacement for fetches. Including _Retval nodes into XLA clusters confuses encapsulate subgraph pass that expects a graph with no pre-existing _Retval nodes. PiperOrigin-RevId: 177518178 --- .../compiler/jit/mark_for_compilation_pass.cc | 7 +++++ .../jit/mark_for_compilation_pass_test.cc | 27 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc index 74c9791f5e..aceedeb823 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc @@ -210,6 +210,13 @@ Status FindCompilationCandidates( !IsCompilableWhile(*node, jit_device_type, 0, lib_runtime)) { continue; } + // _Retval nodes in a top-level function represent fetches. + // Do not compile them. + if (node->type_string() == "_Retval") { + VLOG(2) << "Compilation rejected node: return value " << node->name() + << ": " << node->type_string(); + continue; + } candidates->insert(node); } return Status::OK(); diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc index b3d258aea1..454f0aeae9 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc @@ -525,5 +525,32 @@ TEST(XlaCompilationTest, IllegalCycle_UsefulErrorMessage) { "+-- c\n")); } +TEST(XlaCompilationTest, Retval) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + GraphDef graphdef; + { + GraphDefBuilder builder(GraphDefBuilder::kFailImmediately); + Node* a = ops::SourceOp("Const", builder.opts() + .WithName("A") + .WithAttr("dtype", DT_FLOAT) + .WithAttr("value", Tensor())); + Node* b = ops::UnaryOp("Relu", a, builder.opts().WithName("B")); + ops::UnaryOp("_Retval", b, + builder.opts() + .WithName("R") + .WithAttr("T", DT_FLOAT) + .WithAttr("index", 0)); + + TF_EXPECT_OK(builder.ToGraph(graph.get())); + } + + TF_ASSERT_OK(MarkForCompilation(&graph)); + auto clusters = GetClusters(*graph); + + EXPECT_EQ(2, clusters.size()); + EXPECT_TRUE(clusters.find("R") == clusters.cend()); + EXPECT_EQ(clusters["A"], clusters["B"]); +} + } // namespace } // namespace tensorflow -- GitLab From 186caed810c0e9a9ee9a3f1e0f8bea50764ce5df Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 30 Nov 2017 15:48:06 -0800 Subject: [PATCH 1015/1801] Add int64 support to XLA Shape op. PiperOrigin-RevId: 177519992 --- .../compiler/tf2xla/kernels/shape_op.cc | 76 +++++++++++-------- 1 file changed, 45 insertions(+), 31 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/shape_op.cc b/tensorflow/compiler/tf2xla/kernels/shape_op.cc index 24a99f253d..06838d1625 100644 --- a/tensorflow/compiler/tf2xla/kernels/shape_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/shape_op.cc @@ -25,58 +25,72 @@ limitations under the License. namespace tensorflow { namespace { +// Converts a TensorShape to a constant Tensor. +// +// The input TensorShape input_shape is used to populate the elements of +// shape_constant, which is modified in place. +Status TensorShapeToConstant(const TensorShape& input_shape, + Tensor* shape_constant) { + const int dims = input_shape.dims(); + if (shape_constant->dtype() == DT_INT32) { + auto vec = shape_constant->vec(); + for (int i = 0; i < dims; ++i) { + int64 dim_size = input_shape.dim_size(i); + if (!FastBoundsCheck(dim_size, std::numeric_limits::max())) { + return errors::InvalidArgument( + "Shape with out_type=int32 does not support tensors > int32max", + " but dim ", i, " is ", dim_size); + } + vec(i) = static_cast(dim_size); + } + } else { + auto vec = shape_constant->vec(); + for (int i = 0; i < dims; ++i) { + int64 dim_size = input_shape.dim_size(i); + vec(i) = dim_size; + } + } + return Status::OK(); +} + class ShapeOp : public XlaOpKernel { public: - explicit ShapeOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} + explicit ShapeOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("out_type", &out_dtype_)); + } void Compile(XlaOpKernelContext* ctx) override { const TensorShape input_shape = ctx->InputShape(0); - const int rank = input_shape.dims(); - Tensor shape_constant(DT_INT32, TensorShape({rank})); - auto vec = shape_constant.vec(); - // TODO(dga): support int64. b/28119922. - for (int i = 0; i < rank; ++i) { - int64 dim_size = input_shape.dim_size(i); - OP_REQUIRES( - ctx, FastBoundsCheck(dim_size, std::numeric_limits::max()), - errors::InvalidArgument("Shape does not support tensors > int32max", - " but dim ", i, " is ", dim_size)); - vec(i) = static_cast(dim_size); - } - + Tensor shape_constant(out_dtype_, TensorShape({input_shape.dims()})); + OP_REQUIRES_OK(ctx, TensorShapeToConstant(input_shape, &shape_constant)); ctx->SetConstantOutput(0, shape_constant); } + + private: + DataType out_dtype_; }; REGISTER_XLA_OP(Name("Shape"), ShapeOp); class ShapeNOp : public XlaOpKernel { public: - explicit ShapeNOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} + explicit ShapeNOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("out_type", &out_dtype_)); + } void Compile(XlaOpKernelContext* ctx) override { for (int i = 0; i < ctx->num_inputs(); ++i) { - const TensorShape shape = ctx->InputShape(i); - const int dims = shape.dims(); - Tensor shape_constant(DT_INT32, TensorShape({dims})); - auto vec = shape_constant.vec(); - - // TODO(dga): support int64. b/28119922. - for (int j = 0; j < dims; ++j) { - int64 dim_size = shape.dim_size(j); - OP_REQUIRES( - ctx, FastBoundsCheck(dim_size, std::numeric_limits::max()), - errors::InvalidArgument("Shape does not support tensors > int32max", - " but shape ", i, " dim ", j, " is ", - dim_size)); - vec(j) = static_cast(dim_size); - } - + const TensorShape input_shape = ctx->InputShape(i); + Tensor shape_constant(out_dtype_, TensorShape({input_shape.dims()})); + OP_REQUIRES_OK(ctx, TensorShapeToConstant(input_shape, &shape_constant)); ctx->SetConstantOutput(i, shape_constant); } } bool IsExpensive() override { return false; } + + private: + DataType out_dtype_; }; REGISTER_XLA_OP(Name("ShapeN"), ShapeNOp); -- GitLab From 0438ac79bdb503ed267bec2146e7136ac8e99ff9 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Thu, 30 Nov 2017 16:07:24 -0800 Subject: [PATCH 1016/1801] [TF:XLA] Use output spatial dimensions instead of a transpose for conv backwards filter PiperOrigin-RevId: 177522710 --- .../compiler/tf2xla/kernels/conv_ops.cc | 31 ++++------- .../xla/service/gpu/convolution_folding.cc | 55 +++++-------------- .../service/gpu/convolution_folding_test.cc | 42 +++++--------- .../compiler/xla/service/gpu/pad_insertion.cc | 16 +----- 4 files changed, 41 insertions(+), 103 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc index 61f4d1993a..aaddbe811c 100644 --- a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc @@ -540,9 +540,7 @@ class ConvBackpropFilterOp : public XlaOpKernel { // Swap n_dim and c_dim in the activations. dnums.set_input_batch_dimension(c_dim); - dnums.set_output_batch_dimension(c_dim); dnums.set_input_feature_dimension(n_dim); - dnums.set_output_feature_dimension(n_dim); // The gradients become the RHS of the convolution. // The gradients have shape [batch, out_rows, out_cols, ..., out_depth] @@ -554,11 +552,17 @@ class ConvBackpropFilterOp : public XlaOpKernel { std::vector rhs_dilation(num_spatial_dims_); std::vector ones(num_spatial_dims_, 1); + // Tensorflow filter shape is [ H, W, ..., inC, outC ]. + for (int i = 0; i < num_spatial_dims_; ++i) { + dnums.add_output_spatial_dimensions(i); + } + dnums.set_output_batch_dimension(num_spatial_dims_); + dnums.set_output_feature_dimension(num_spatial_dims_ + 1); + for (int i = 0; i < num_spatial_dims_; ++i) { int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i); dnums.add_input_spatial_dimensions(dim); dnums.add_kernel_spatial_dimensions(dim); - dnums.add_output_spatial_dimensions(dim); // We will also need to pad the input with zeros such that after the // convolution, we get the right size for the filter. @@ -615,26 +619,11 @@ class ConvBackpropFilterOp : public XlaOpKernel { /*window_strides=*/ones, padding, /*lhs_dilation=*/ones, rhs_dilation, dnums); - // The layout of filter_backprop will match the layout of - // padded_activations - // and so will have layout: [out_feature, h, w, ..., in_feature] - // Tensorflow filter shape is [ H, W, ..., inC, outC ], so we transpose the - // output. - std::vector transpose_dims; - transpose_dims.reserve(num_dims()); - for (int i = 0; i < num_spatial_dims_; ++i) { - transpose_dims.push_back(dnums.output_spatial_dimensions(i)); - } - transpose_dims.push_back(c_dim); - transpose_dims.push_back(n_dim); - xla::ComputationDataHandle filter_backprop_reshaped = - b->Transpose(filter_backprop, transpose_dims); - if (depthwise_) { - filter_backprop_reshaped = ContractFilterForDepthwiseBackprop( - ctx, filter_shape, ctx->input_type(0), filter_backprop_reshaped, b); + filter_backprop = ContractFilterForDepthwiseBackprop( + ctx, filter_shape, ctx->input_type(0), filter_backprop, b); } - ctx->SetOutput(0, filter_backprop_reshaped); + ctx->SetOutput(0, filter_backprop); } protected: diff --git a/tensorflow/compiler/xla/service/gpu/convolution_folding.cc b/tensorflow/compiler/xla/service/gpu/convolution_folding.cc index 828ae675d7..f198c4c08e 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_folding.cc +++ b/tensorflow/compiler/xla/service/gpu/convolution_folding.cc @@ -55,19 +55,7 @@ MatchBackwardFilter(HloInstruction* conv) { // v v // Convolution // conv - // | - // v - // Transpose (optional if identity transposition) CHECK_EQ(HloOpcode::kConvolution, conv->opcode()); - // If the forward convolution is followed by a transpose, we can fuse the - // transpose into the backward convolution as well. - HloInstruction* transpose = nullptr; - if (conv->user_count() == 1) { - HloInstruction* single_user = *conv->users().begin(); - if (single_user->opcode() == HloOpcode::kTranspose) { - transpose = single_user; - } - } // Step 2: match paddings and dimension numbers of the forward convolution. const ConvolutionDimensionNumbers& conv_dnums = @@ -75,6 +63,9 @@ MatchBackwardFilter(HloInstruction* conv) { auto input_batch_dim = conv_dnums.input_batch_dimension(); auto input_feature_dim = conv_dnums.input_feature_dimension(); auto input_spatial_dims = conv_dnums.input_spatial_dimensions(); + auto kernel_input_feature_dim = conv_dnums.kernel_input_feature_dimension(); + auto kernel_output_feature_dim = conv_dnums.kernel_output_feature_dimension(); + auto kernel_spatial_dims = conv_dnums.kernel_spatial_dimensions(); auto output_batch_dim = conv_dnums.output_batch_dimension(); auto output_feature_dim = conv_dnums.output_feature_dimension(); auto output_spatial_dims = conv_dnums.output_spatial_dimensions(); @@ -98,7 +89,8 @@ MatchBackwardFilter(HloInstruction* conv) { } // Padding high will be checked in Step 3. } - if (transpose == nullptr && !window_util::HasWindowDilation(conv->window())) { + if (input_batch_dim == output_batch_dim && + !window_util::HasWindowDilation(conv->window())) { VLOG(1) << conv->ToString() << " is a regular forward convolution. No need " "to fold it to a backward filter convolution."; @@ -169,53 +161,32 @@ MatchBackwardFilter(HloInstruction* conv) { } } - // To make future HLO passes easier, we canonicalize the fused expression by - // adding an identity transposition if it's omitted in the pattern. - if (transpose == nullptr) { - // Create an identity transposition with the same rank as the forward - // convolution. - HloComputation* parent_computation = conv->parent(); - std::vector transpose_dimensions(ShapeUtil::Rank(conv->shape())); - std::iota(transpose_dimensions.begin(), transpose_dimensions.end(), 0); - transpose = - parent_computation->AddInstruction(HloInstruction::CreateTranspose( - conv->shape(), conv, transpose_dimensions)); - TF_CHECK_OK(conv->ReplaceAllUsesWith(transpose)); - } - // Restore the dimension numbers of the backward convolution from the forward // convolution. The two activation dimensions are reversed (batch and // feature). ConvolutionDimensionNumbers backward_conv_dnums; backward_conv_dnums.set_input_batch_dimension(input_feature_dim); backward_conv_dnums.set_input_feature_dimension(input_batch_dim); - backward_conv_dnums.set_output_batch_dimension(output_feature_dim); - backward_conv_dnums.set_output_feature_dimension(output_batch_dim); for (int i = 0; i < input_spatial_dims.size(); ++i) { backward_conv_dnums.add_input_spatial_dimensions(input_spatial_dims[i]); } - for (int i = 0; i < output_spatial_dims.size(); ++i) { - backward_conv_dnums.add_output_spatial_dimensions(output_spatial_dims[i]); + backward_conv_dnums.set_output_batch_dimension(kernel_input_feature_dim); + backward_conv_dnums.set_output_feature_dimension(kernel_output_feature_dim); + for (int i = 0; i < kernel_spatial_dims.size(); ++i) { + backward_conv_dnums.add_output_spatial_dimensions(kernel_spatial_dims[i]); } // The dimension numbering of the output of the forward convolution (before // transposition) is the same as that of the activations (according to the // semantics of kConvolution). The batch dimension of the activations should // be treated as the input feature dimension, and the feature dimension should // be treated as the output feature. - // - // The output of the forward convolution needs to be transposed to fit into - // the dimension numbering of the weight gradients. This transposition maps - // dimension i to PositionInContainer(transpose->dimensions(), i). - backward_conv_dnums.set_kernel_input_feature_dimension( - PositionInContainer(transpose->dimensions(), output_batch_dim)); - backward_conv_dnums.set_kernel_output_feature_dimension( - PositionInContainer(transpose->dimensions(), output_feature_dim)); + backward_conv_dnums.set_kernel_input_feature_dimension(output_batch_dim); + backward_conv_dnums.set_kernel_output_feature_dimension(output_feature_dim); for (int i = 0; i < output_spatial_dims.size(); ++i) { - backward_conv_dnums.add_kernel_spatial_dimensions( - PositionInContainer(transpose->dimensions(), output_spatial_dims[i])); + backward_conv_dnums.add_kernel_spatial_dimensions(output_spatial_dims[i]); } - return std::make_tuple(true, std::vector({transpose, conv}), + return std::make_tuple(true, std::vector({conv}), backward_conv_window, backward_conv_dnums); } diff --git a/tensorflow/compiler/xla/service/gpu/convolution_folding_test.cc b/tensorflow/compiler/xla/service/gpu/convolution_folding_test.cc index 112c496e1f..34e6bdb117 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_folding_test.cc +++ b/tensorflow/compiler/xla/service/gpu/convolution_folding_test.cc @@ -46,18 +46,18 @@ class ConvolutionFoldingTest : public HloTestBase { // // TODO(jingyue): Add more tests on NCHW input order which TF also supports. tf_default_dnums_for_backward_filter_.set_input_batch_dimension(3); - tf_default_dnums_for_backward_filter_.set_output_batch_dimension(3); tf_default_dnums_for_backward_filter_.set_input_feature_dimension(0); - tf_default_dnums_for_backward_filter_.set_output_feature_dimension(0); tf_default_dnums_for_backward_filter_.add_input_spatial_dimensions(1); - tf_default_dnums_for_backward_filter_.add_output_spatial_dimensions(1); tf_default_dnums_for_backward_filter_.add_input_spatial_dimensions(2); - tf_default_dnums_for_backward_filter_.add_output_spatial_dimensions(2); tf_default_dnums_for_backward_filter_.set_kernel_input_feature_dimension(0); tf_default_dnums_for_backward_filter_.set_kernel_output_feature_dimension( 3); tf_default_dnums_for_backward_filter_.add_kernel_spatial_dimensions(1); tf_default_dnums_for_backward_filter_.add_kernel_spatial_dimensions(2); + tf_default_dnums_for_backward_filter_.add_output_spatial_dimensions(0); + tf_default_dnums_for_backward_filter_.add_output_spatial_dimensions(1); + tf_default_dnums_for_backward_filter_.set_output_batch_dimension(2); + tf_default_dnums_for_backward_filter_.set_output_feature_dimension(3); tf_default_dnums_for_backward_input_.set_input_batch_dimension(0); tf_default_dnums_for_backward_input_.set_output_batch_dimension(0); @@ -86,7 +86,7 @@ class ConvolutionFoldingTest : public HloTestBase { ConvolutionDimensionNumbers tf_default_dnums_for_backward_input_; }; -TEST_F(ConvolutionFoldingTest, BackwardFilterConvolveWithoutTranspose) { +TEST_F(ConvolutionFoldingTest, BackwardFilterConvolve) { HloComputation::Builder builder(TestName()); HloInstruction* activations = builder.AddInstruction(HloInstruction::CreateParameter( @@ -136,7 +136,7 @@ TEST_F(ConvolutionFoldingTest, auto module = CreateNewModule(); module->AddEntryComputation(builder.Build()); - EXPECT_FALSE(FoldConvolution(module.get())); + EXPECT_TRUE(FoldConvolution(module.get())); } // Extracted from block35 training. @@ -155,13 +155,9 @@ TEST_F(ConvolutionFoldingTest, BackwardFilterConvolveWithPaddedActivations) { conv_window.mutable_dimensions(i)->set_padding_low(1); conv_window.mutable_dimensions(i)->set_padding_high(1); } - HloInstruction* convolution = - builder.AddInstruction(HloInstruction::CreateConvolve( - ShapeUtil::MakeShape(F32, {32, 3, 3, 32}), activations, gradients, - conv_window, tf_default_dnums_for_backward_filter_)); - - builder.AddInstruction(HloInstruction::CreateTranspose( - ShapeUtil::MakeShape(F32, {3, 3, 32, 32}), convolution, {1, 2, 3, 0})); + builder.AddInstruction(HloInstruction::CreateConvolve( + ShapeUtil::MakeShape(F32, {32, 3, 3, 32}), activations, gradients, + conv_window, tf_default_dnums_for_backward_filter_)); auto module = CreateNewModule(); HloComputation* entry_computation = @@ -189,13 +185,9 @@ TEST_F(ConvolutionFoldingTest, BackwardFilterConvolveWithPaddedGradients) { conv_window.mutable_dimensions(i)->set_padding_high(-1); conv_window.mutable_dimensions(i)->set_window_dilation(2); } - HloInstruction* convolution = - builder.AddInstruction(HloInstruction::CreateConvolve( - ShapeUtil::MakeShape(F32, {320, 3, 3, 192}), activations, gradients, - conv_window, tf_default_dnums_for_backward_filter_)); - - builder.AddInstruction(HloInstruction::CreateTranspose( - ShapeUtil::MakeShape(F32, {3, 3, 192, 320}), convolution, {1, 2, 3, 0})); + builder.AddInstruction(HloInstruction::CreateConvolve( + ShapeUtil::MakeShape(F32, {320, 3, 3, 192}), activations, gradients, + conv_window, tf_default_dnums_for_backward_filter_)); auto module = CreateNewModule(); HloComputation* entry_computation = @@ -222,13 +214,9 @@ TEST_F(ConvolutionFoldingTest, BackwardFilterConvolveWithUnevenPadding) { // Uneven padding: padding_low=0, padding_high=1 conv_window.mutable_dimensions(i)->set_padding_high(1); } - HloInstruction* convolution = - builder.AddInstruction(HloInstruction::CreateConvolve( - ShapeUtil::MakeShape(F32, {32, 2, 2, 32}), activations, gradients, - conv_window, tf_default_dnums_for_backward_filter_)); - - builder.AddInstruction(HloInstruction::CreateTranspose( - ShapeUtil::MakeShape(F32, {2, 2, 32, 32}), convolution, {1, 2, 3, 0})); + builder.AddInstruction(HloInstruction::CreateConvolve( + ShapeUtil::MakeShape(F32, {32, 2, 2, 32}), activations, gradients, + conv_window, tf_default_dnums_for_backward_filter_)); auto module = CreateNewModule(); HloComputation* entry_computation = diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc index 11290eda4f..c29fee0879 100644 --- a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc +++ b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc @@ -202,8 +202,7 @@ bool PadInsertion::CanonicalizeBackwardFilterConvolution( // ABCD0 = Pad(ABCD, padding_high=1) // BackwardFilterConv(ABCD0, xyz, padding_low=pading_high=1) // We choose the lesser of padding_low and padding_high as the new padding. - HloInstruction* transpose = backward_conv->fused_expression_root(); - HloInstruction* forward_conv = transpose->mutable_operand(0); + HloInstruction* forward_conv = backward_conv->fused_expression_root(); HloInstruction* input = backward_conv->mutable_operand(0); Window new_forward_conv_window = forward_conv->window(); Window new_backward_conv_window = backward_conv->window(); @@ -269,19 +268,10 @@ bool PadInsertion::CanonicalizeBackwardFilterConvolution( .ConsumeValueOrDie(), padded_input, output, new_forward_conv_window, forward_conv_dnums)); - HloInstruction* new_transpose = - computation->AddInstruction(HloInstruction::CreateTranspose( - ShapeInference::InferTransposeShape(new_forward_conv->shape(), - transpose->dimensions()) - .ConsumeValueOrDie(), - new_forward_conv, transpose->dimensions())); - - // Fuse the new forward convolution and the new transpose to the new backward - // convolution. + // Fuse the new forward convolution to the new backward convolution. HloInstruction* new_backward_conv = computation->CreateFusionInstructionForBackwardConvolution( - {new_transpose, new_forward_conv}, - HloInstruction::FusionKind::kConvBackwardFilter, + {new_forward_conv}, HloInstruction::FusionKind::kConvBackwardFilter, new_backward_conv_window, backward_conv_dnums); VLOG(1) << "Canonicalizing backward filter conv"; -- GitLab From b2db981a6731e978453862a73dab892bc674db68 Mon Sep 17 00:00:00 2001 From: Sourabh Bajaj Date: Thu, 30 Nov 2017 16:37:11 -0800 Subject: [PATCH 1017/1801] Merge changes from github. PiperOrigin-RevId: 177526301 --- .gitignore | 5 + tensorflow/compiler/tf2xla/xla_op_kernel.cc | 4 +- tensorflow/compiler/xla/BUILD | 1 + tensorflow/compiler/xla/ptr_util.h | 47 +- .../xla/service/buffer_assignment_test.cc | 20 +- .../xla/service/buffer_liveness_test.cc | 34 +- .../compiler/xla/service/cpu/cpu_compiler.cc | 25 +- .../xla/service/cpu/simple_orc_jit.cc | 130 ++--- tensorflow/compiler/xla/xla_data.proto | 2 +- .../boosted_trees/lib/utils/batch_features.h | 2 +- .../contrib/boosted_trees/lib/utils/example.h | 10 +- .../lib/utils/sparse_column_iterable.cc | 4 + tensorflow/contrib/cmake/external/re2.cmake | 1 + tensorflow/contrib/cmake/tf_shared_lib.cmake | 10 +- tensorflow/contrib/cmake/tf_tests.cmake | 6 +- tensorflow/contrib/crf/python/ops/crf.py | 2 +- .../contrib/data/python/kernel_tests/BUILD | 3 + .../python/ops/poisson_lognormal.py | 2 +- .../contrib/eager/python/metrics_impl.py | 2 +- .../contrib/factorization/python/ops/wals.py | 2 +- tensorflow/contrib/ffmpeg/BUILD | 47 ++ tensorflow/contrib/ffmpeg/__init__.py | 3 +- tensorflow/contrib/ffmpeg/decode_audio_op.cc | 25 +- tensorflow/contrib/ffmpeg/decode_video_op.cc | 118 ++++ .../contrib/ffmpeg/decode_video_op_test.py | 69 +++ .../contrib/ffmpeg/default/ffmpeg_lib.cc | 247 ++++++--- .../ffmpeg/default/ffmpeg_lib_utility_test.cc | 3 +- tensorflow/contrib/ffmpeg/ffmpeg_lib.h | 25 +- tensorflow/contrib/ffmpeg/ffmpeg_ops.py | 17 + tensorflow/contrib/ffmpeg/testdata/small.mp4 | Bin 0 -> 383631 bytes .../contrib/ffmpeg/testdata/small_100.bmp | Bin 0 -> 537654 bytes .../framework/python/framework/graph_util.py | 28 +- .../python/framework/graph_util_test.py | 14 + tensorflow/contrib/gan/python/train.py | 2 +- .../contrib/layers/python/layers/layers.py | 5 +- .../layers/python/layers/layers_test.py | 22 +- tensorflow/contrib/lite/BUILD | 3 + tensorflow/contrib/lite/Makefile | 147 +++++ tensorflow/contrib/lite/README.md | 66 +-- .../contrib/lite/build_ios_universal_lib.sh | 31 ++ .../contrib/lite/download_dependencies.sh | 99 ++++ .../lite/examples/ios/camera/.gitignore | 2 + .../ios/camera/CameraExampleAppDelegate.h | 21 + .../ios/camera/CameraExampleAppDelegate.m | 44 ++ .../ios/camera/CameraExampleViewController.h | 48 ++ .../ios/camera/CameraExampleViewController.mm | 506 ++++++++++++++++++ .../lite/examples/ios/camera/Info.plist | 44 ++ .../camera/MainStoryboard_iPhone.storyboard | 46 ++ .../contrib/lite/examples/ios/camera/Podfile | 5 + .../lite/examples/ios/camera/data/.gitignore | 0 .../contrib/lite/examples/ios/camera/main.mm | 28 + .../project.pbxproj | 419 +++++++++++++++ .../lite/examples/ios/simple/AppDelegate.h | 21 + .../lite/examples/ios/simple/AppDelegate.mm | 47 ++ .../contrib/lite/examples/ios/simple/Podfile | 5 + .../examples/ios/simple/RunModel-Info.plist | 47 ++ .../ios/simple/RunModelViewController.h | 24 + .../ios/simple/RunModelViewController.mm | 221 ++++++++ .../ios/simple/RunModelViewController.xib | 46 ++ .../examples/ios/simple/data/grace_hopper.jpg | Bin 0 -> 73746 bytes .../lite/examples/ios/simple/ios_image_load.h | 23 + .../examples/ios/simple/ios_image_load.mm | 80 +++ .../contrib/lite/examples/ios/simple/main.mm | 22 + .../simple/simple.xcodeproj/project.pbxproj | 359 +++++++++++++ tensorflow/contrib/lite/g3doc/apis.md | 2 +- .../lite/g3doc/tf_ops_compatibility.md | 2 +- tensorflow/contrib/lite/ios_makefile.inc | 31 ++ .../contrib/lite/java/demo/app/build.gradle | 4 +- .../lite/models/testdata/g3doc/README.md | 29 +- .../contrib/lite/nnapi/NeuralNetworksShim.h | 6 +- .../lite/schema/upgrade_schema_test.py | 2 +- tensorflow/contrib/lite/testing/BUILD | 1 + .../contrib/lite/testing/parse_testdata.cc | 2 +- tensorflow/contrib/lite/testing/test_runner.h | 2 +- tensorflow/contrib/lite/toco/model.h | 2 +- .../contrib/lite/tools/benchmark_model.cc | 95 ++++ .../contrib/lite/tools/mutable_op_resolver.h | 10 + tensorflow/contrib/mpi/BUILD | 1 + .../contrib/nn/python/ops/cross_entropy.py | 2 +- .../contrib/nn/python/ops/sampling_ops.py | 2 +- tensorflow/contrib/rnn/python/ops/rnn_cell.py | 1 - tensorflow/contrib/slim/README.md | 9 +- .../contrib/slim/python/slim/evaluation.py | 4 +- tensorflow/contrib/summary/BUILD | 14 - .../contrib/summary/summary_ops_graph_test.py | 5 +- .../contrib/summary/summary_ops_test.py | 7 +- .../contrib/summary/summary_test_util.py | 39 +- .../models/decisions_to_data_then_nn_test.py | 6 +- tensorflow/core/BUILD | 6 + tensorflow/core/graph/graph.h | 1 - .../optimizers/arithmetic_optimizer.cc | 2 +- tensorflow/core/kernels/BUILD | 9 +- .../core/kernels/batch_matmul_op_complex.cc | 2 + .../core/kernels/batch_matmul_op_real.cc | 2 + tensorflow/core/kernels/cwise_op_asinh.cc | 2 +- tensorflow/core/kernels/decode_bmp_op.cc | 15 +- .../core/kernels/dynamic_partition_op_test.cc | 58 ++ .../core/kernels/mkl_batch_matmul_op.cc | 238 ++++++++ .../core/kernels/prefetch_dataset_op.cc | 2 + tensorflow/core/kernels/summary_interface.cc | 4 +- tensorflow/core/lib/io/path.cc | 66 ++- tensorflow/core/lib/io/path.h | 3 + tensorflow/core/ops/math_ops.cc | 24 +- tensorflow/core/ops/math_ops_test.cc | 11 + .../platform/default/build_config_root.bzl | 22 +- tensorflow/core/util/ptr_util.h | 80 +++ tensorflow/docs_src/extend/adding_an_op.md | 28 +- tensorflow/docs_src/get_started/input_fn.md | 4 +- .../docs_src/install/install_windows.md | 10 +- tensorflow/docs_src/mobile/ios_build.md | 2 +- tensorflow/docs_src/mobile/mobile_intro.md | 2 +- tensorflow/docs_src/mobile/optimizing.md | 2 +- .../performance/xla/operation_semantics.md | 2 +- .../docs_src/tutorials/image_recognition.md | 2 +- .../reading_data/convert_to_records.py | 24 +- .../examples/speech_commands/input_data.py | 3 +- tensorflow/examples/speech_commands/train.py | 3 +- tensorflow/examples/udacity/1_notmnist.ipynb | 6 +- .../java/org/tensorflow/OperationBuilder.java | 35 +- .../src/main/native/operation_builder_jni.cc | 36 ++ .../src/main/native/operation_builder_jni.h | 8 + .../org/tensorflow/OperationBuilderTest.java | 37 ++ tensorflow/python/BUILD | 2 + tensorflow/python/data/util/nest.py | 10 + tensorflow/python/data/util/nest_test.py | 9 + tensorflow/python/estimator/export/export.py | 2 +- tensorflow/python/estimator/training_test.py | 4 +- tensorflow/python/keras/BUILD | 1 + .../python/keras/_impl/keras/backend.py | 2 +- .../python/keras/_impl/keras/callbacks.py | 2 +- .../keras/_impl/keras/callbacks_test.py | 61 ++- .../keras/_impl/keras/engine/training_test.py | 6 + .../keras/_impl/keras/estimator_test.py | 5 + .../python/keras/_impl/keras/models_test.py | 21 +- .../_impl/keras/utils/data_utils_test.py | 7 + .../keras/_impl/keras/utils/io_utils.py | 11 +- .../keras/_impl/keras/utils/vis_utils.py | 2 +- tensorflow/python/kernel_tests/BUILD | 16 +- .../python/kernel_tests/decode_bmp_op_test.py | 75 +++ .../kernel_tests/prefetch_dataset_op_test.py | 59 ++ tensorflow/python/layers/convolutional.py | 6 +- tensorflow/python/ops/math_ops_test.py | 2 +- tensorflow/python/ops/variable_scope.py | 2 +- tensorflow/python/ops/variables.py | 4 +- .../python/profiler/model_analyzer_test.py | 2 +- tensorflow/python/training/saver_test.py | 24 +- .../training/sync_replicas_optimizer.py | 2 +- tensorflow/python/util/nest.py | 15 +- tensorflow/python/util/nest_test.py | 13 +- ...orflow.keras.callbacks.-tensor-board.pbtxt | 2 +- tensorflow/tools/benchmark/benchmark_model.cc | 2 +- .../tools/ci_build/builds/test_user_ops.sh | 28 +- .../tools/ci_build/ci_parameterized_build.sh | 4 +- .../ci_build/windows/cpu/cmake/run_build.bat | 2 +- .../windows/cpu/pip/build_tf_windows.sh | 4 +- .../ci_build/windows/gpu/cmake/run_build.bat | 2 +- .../windows/gpu/pip/build_tf_windows.sh | 4 +- .../dist_test/python/census_widendeep.py | 3 +- tensorflow/tools/docker/Dockerfile.devel | 1 - .../tools/docker/Dockerfile.devel-cpu-mkl | 85 +++ tensorflow/tools/docker/Dockerfile.devel-gpu | 2 - .../docker/Dockerfile.devel-gpu-cuda9-cudnn7 | 2 - .../docker/notebooks/2_getting_started.ipynb | 12 +- .../notebooks/3_mnist_from_scratch.ipynb | 2 + .../tools/pip_package/pip_smoke_test.py | 3 - tensorflow/tools/pip_package/setup.py | 27 +- tensorflow/workspace.bzl | 2 +- third_party/flatbuffers/flatbuffers.BUILD | 7 +- third_party/mkl/build_defs.bzl | 2 +- third_party/nccl.BUILD | 2 +- third_party/py/python_configure.bzl | 91 ++-- 171 files changed, 4446 insertions(+), 578 deletions(-) create mode 100644 tensorflow/contrib/ffmpeg/decode_video_op.cc create mode 100644 tensorflow/contrib/ffmpeg/decode_video_op_test.py create mode 100644 tensorflow/contrib/ffmpeg/testdata/small.mp4 create mode 100644 tensorflow/contrib/ffmpeg/testdata/small_100.bmp create mode 100644 tensorflow/contrib/lite/Makefile create mode 100755 tensorflow/contrib/lite/build_ios_universal_lib.sh create mode 100755 tensorflow/contrib/lite/download_dependencies.sh create mode 100644 tensorflow/contrib/lite/examples/ios/camera/.gitignore create mode 100644 tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.h create mode 100644 tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.m create mode 100644 tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.h create mode 100644 tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm create mode 100644 tensorflow/contrib/lite/examples/ios/camera/Info.plist create mode 100644 tensorflow/contrib/lite/examples/ios/camera/MainStoryboard_iPhone.storyboard create mode 100644 tensorflow/contrib/lite/examples/ios/camera/Podfile create mode 100644 tensorflow/contrib/lite/examples/ios/camera/data/.gitignore create mode 100644 tensorflow/contrib/lite/examples/ios/camera/main.mm create mode 100644 tensorflow/contrib/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj create mode 100644 tensorflow/contrib/lite/examples/ios/simple/AppDelegate.h create mode 100644 tensorflow/contrib/lite/examples/ios/simple/AppDelegate.mm create mode 100644 tensorflow/contrib/lite/examples/ios/simple/Podfile create mode 100644 tensorflow/contrib/lite/examples/ios/simple/RunModel-Info.plist create mode 100644 tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.h create mode 100644 tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.mm create mode 100644 tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.xib create mode 100644 tensorflow/contrib/lite/examples/ios/simple/data/grace_hopper.jpg create mode 100644 tensorflow/contrib/lite/examples/ios/simple/ios_image_load.h create mode 100644 tensorflow/contrib/lite/examples/ios/simple/ios_image_load.mm create mode 100644 tensorflow/contrib/lite/examples/ios/simple/main.mm create mode 100644 tensorflow/contrib/lite/examples/ios/simple/simple.xcodeproj/project.pbxproj create mode 100644 tensorflow/contrib/lite/ios_makefile.inc create mode 100644 tensorflow/contrib/lite/tools/benchmark_model.cc create mode 100644 tensorflow/core/kernels/mkl_batch_matmul_op.cc create mode 100644 tensorflow/core/util/ptr_util.h create mode 100644 tensorflow/python/kernel_tests/prefetch_dataset_op_test.py create mode 100644 tensorflow/tools/docker/Dockerfile.devel-cpu-mkl diff --git a/.gitignore b/.gitignore index 9ae0d9c96f..d11a504bdc 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,8 @@ Pods Podfile.lock *.pbxproj *.xcworkspacedata +/tensorflow/contrib/lite/downloads/** +/tensorflow/contrib/lite/gen/** +/tensorflow/contrib/lite/examples/ios/simple/data/*.txt +/tensorflow/contrib/lite/examples/ios/simple/data/*.tflite +xcuserdata/** \ No newline at end of file diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc index f16472cac8..79d501b511 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc @@ -346,9 +346,9 @@ void XlaOpKernelContext::SetConstantOutput(int index, const Tensor& constant) { } void XlaOpKernelContext::SetInvalidOutput(int index) { - const TensorShape shape; Tensor* output = nullptr; - OP_REQUIRES_OK(context_, context_->allocate_output(index, shape, &output)); + OP_REQUIRES_OK(context_, + context_->allocate_output(index, TensorShape({}), &output)); XlaExpression* expression = CastExpressionFromUninitializedTensor(output); xla::ComputationDataHandle handle; handle.set_handle(0); diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD index 515b572b0e..d3f292207f 100644 --- a/tensorflow/compiler/xla/BUILD +++ b/tensorflow/compiler/xla/BUILD @@ -175,6 +175,7 @@ cc_library( ":types", ":xla_data_proto", "//tensorflow/core:lib", + "//tensorflow/core:ptr_util", ], ) diff --git a/tensorflow/compiler/xla/ptr_util.h b/tensorflow/compiler/xla/ptr_util.h index fa67030313..c58c19db2c 100644 --- a/tensorflow/compiler/xla/ptr_util.h +++ b/tensorflow/compiler/xla/ptr_util.h @@ -16,7 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_PTR_UTIL_H_ #define TENSORFLOW_COMPILER_XLA_PTR_UTIL_H_ -// Utility functions for pointers. +// As this was moved to tensorflow/core/util, provide indirections here to +// maintain current functionality of the library. #include @@ -24,55 +25,27 @@ limitations under the License. #include #include -namespace xla { - -namespace internal { - -// Trait to select overloads and return types for MakeUnique. -template -struct MakeUniqueResult { - using scalar = std::unique_ptr; -}; -template -struct MakeUniqueResult { - using array = std::unique_ptr; -}; -template -struct MakeUniqueResult { - using invalid = void; -}; +#include "tensorflow/core/util/ptr_util.h" -} // namespace internal +namespace xla { -// Transfers ownership of a raw pointer to a std::unique_ptr of deduced type. -// Example: -// X* NewX(int, int); -// auto x = WrapUnique(NewX(1, 2)); // 'x' is std::unique_ptr. -// -// WrapUnique is useful for capturing the output of a raw pointer factory. -// However, prefer 'MakeUnique(args...) over 'WrapUnique(new T(args...))'. -// auto x = WrapUnique(new X(1, 2)); // works, but nonideal. -// auto x = MakeUnique(1, 2); // safer, standard, avoids raw 'new'. -// -// Note: Cannot wrap pointers to array of unknown bound (i.e. U(*)[]). template std::unique_ptr WrapUnique(T* ptr) { - static_assert(!std::is_array::value || std::extent::value != 0, - "types T[0] or T[] are unsupported"); - return std::unique_ptr(ptr); + return tensorflow::WrapUnique(ptr); } template -typename internal::MakeUniqueResult::scalar MakeUnique(Args&&... args) { - return std::unique_ptr(new T(std::forward(args)...)); +typename tensorflow::helper::MakeUniqueResult::scalar MakeUnique( + Args&&... args) { + return tensorflow::MakeUnique(std::forward(args)...); } // Overload for array of unknown bound. // The allocation of arrays needs to use the array form of new, // and cannot take element constructor arguments. template -typename internal::MakeUniqueResult::array MakeUnique(size_t n) { - return std::unique_ptr(new typename std::remove_extent::type[n]()); +typename tensorflow::helper::MakeUniqueResult::array MakeUnique(size_t n) { + return tensorflow::MakeUnique(n); } } // namespace xla diff --git a/tensorflow/compiler/xla/service/buffer_assignment_test.cc b/tensorflow/compiler/xla/service/buffer_assignment_test.cc index 75c71dfeb1..09681b34e7 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment_test.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment_test.cc @@ -85,7 +85,7 @@ class BufferAssignmentTest : public HloTestBase { std::unique_ptr RunBufferAssignment(HloModule* module, int64 alignment = 1) { return BufferAssigner::Run( - module, MakeUnique(module), + module, xla::MakeUnique(module), backend().compiler()->BufferSizeBytesFunction(), [alignment](LogicalBuffer::Color) { return alignment; }) .ConsumeValueOrDie(); @@ -94,7 +94,7 @@ class BufferAssignmentTest : public HloTestBase { std::unique_ptr RunColoredBufferAssignment( HloModule* module, BufferLiveness::Colorer colorer, int64 alignment = 1) { return BufferAssigner::Run( - module, MakeUnique(module), + module, xla::MakeUnique(module), backend().compiler()->BufferSizeBytesFunction(), [alignment](LogicalBuffer::Color) { return alignment; }, false, std::move(colorer)) @@ -1451,7 +1451,7 @@ class WhileBufferAssignmentTest : public HloTestBase { auto sequence = CreateMemoryMinimizingSequence(*module, ByteSizeOf).ConsumeValueOrDie(); return BufferAssigner::Run( - module, MakeUnique(module, sequence), + module, xla::MakeUnique(module, sequence), ByteSizeOf, [alignment](LogicalBuffer::Color) { return alignment; }) .ConsumeValueOrDie(); @@ -1472,7 +1472,7 @@ static void RunCopyInsertion(HloModule* module) { } TEST_F(WhileBufferAssignmentTest, TwoForwardWhileLoops) { - auto module = MakeUnique(TestName()); + auto module = xla::MakeUnique(TestName()); auto builder = HloComputation::Builder("entry"); auto input0 = builder.AddInstruction( @@ -1529,7 +1529,7 @@ TEST_F(WhileBufferAssignmentTest, TwoForwardWhileLoops) { } TEST_F(WhileBufferAssignmentTest, OneForwardBackwardWhileLoopSet) { - auto module = MakeUnique(TestName()); + auto module = xla::MakeUnique(TestName()); auto builder = HloComputation::Builder("entry"); auto input0 = builder.AddInstruction( @@ -1574,7 +1574,7 @@ TEST_F(WhileBufferAssignmentTest, OneForwardBackwardWhileLoopSet) { } TEST_F(BufferAssignmentTest, TwoCalls) { - auto module = MakeUnique(TestName()); + auto module = xla::MakeUnique(TestName()); Shape r0f32 = ShapeUtil::MakeShape(xla::F32, {}); HloComputation* sub_computation; { @@ -1639,7 +1639,7 @@ static bool IsPostOrderTraversal( } TEST_F(WhileBufferAssignmentTest, WhileLoopsInterferingResultRange) { - auto module = MakeUnique(TestName()); + auto module = xla::MakeUnique(TestName()); auto builder = HloComputation::Builder(TestName()); auto zero = builder.AddInstruction( @@ -1710,15 +1710,15 @@ TEST_F(WhileBufferAssignmentTest, WhileLoopsInterferingResultRange) { auto assignment = BufferAssigner::Run( module.get(), - MakeUnique(module.get(), sequence), ByteSizeOf, - [](LogicalBuffer::Color) { return 1; }) + xla::MakeUnique(module.get(), sequence), + ByteSizeOf, [](LogicalBuffer::Color) { return 1; }) .ConsumeValueOrDie(); EXPECT_TRUE(BuffersDistinct({while0}, {while1}, *assignment)); } TEST_F(WhileBufferAssignmentTest, WhilesDontShareEntryParamIfLiveOut) { - auto module = MakeUnique(TestName()); + auto module = xla::MakeUnique(TestName()); auto builder = HloComputation::Builder("entry"); auto input0 = builder.AddInstruction( diff --git a/tensorflow/compiler/xla/service/buffer_liveness_test.cc b/tensorflow/compiler/xla/service/buffer_liveness_test.cc index 56600b5838..13825fe05b 100644 --- a/tensorflow/compiler/xla/service/buffer_liveness_test.cc +++ b/tensorflow/compiler/xla/service/buffer_liveness_test.cc @@ -120,7 +120,7 @@ TEST_F(BufferLivenessTest, ElementwiseChain) { auto liveness = BufferLiveness::Run(module.get(), - MakeUnique(module.get())) + xla::MakeUnique(module.get())) .ConsumeValueOrDie(); EXPECT_FALSE(InstructionsMayInterfere(*liveness, param, negate)); @@ -167,10 +167,10 @@ TEST_F(BufferLivenessTest, MultipleEntryParameters_Sequential) { SequentialHloOrdering::HloModuleSequence sequence; sequence.insert({entry, {param0, negate, param1, exp, add}}); - auto liveness = BufferLiveness::Run( - module.get(), - MakeUnique(module.get(), sequence)) - .ConsumeValueOrDie(); + auto liveness = + BufferLiveness::Run(module.get(), xla::MakeUnique( + module.get(), sequence)) + .ConsumeValueOrDie(); // Entry parameters interfere as if they are defined simultaneously at // the very beginning. @@ -216,7 +216,7 @@ TEST_F(BufferLivenessTest, NonElementwiseOperand) { auto liveness = BufferLiveness::Run(module.get(), - MakeUnique(module.get())) + xla::MakeUnique(module.get())) .ConsumeValueOrDie(); EXPECT_FALSE(InstructionsMayInterfere(*liveness, param, exp)); @@ -250,7 +250,7 @@ TEST_F(BufferLivenessTest, OverlappedBuffers) { auto liveness = BufferLiveness::Run(module.get(), - MakeUnique(module.get())) + xla::MakeUnique(module.get())) .ConsumeValueOrDie(); EXPECT_TRUE(InstructionsMayInterfere(*liveness, param, negate)); @@ -294,7 +294,7 @@ TEST_F(BufferLivenessTest, OverlappedBuffersSequentialOrder) { std::vector order = {param, negate, exp, add}; module_sequence.emplace(computation, order); auto liveness = - BufferLiveness::Run(module.get(), MakeUnique( + BufferLiveness::Run(module.get(), xla::MakeUnique( module.get(), module_sequence)) .ConsumeValueOrDie(); @@ -334,7 +334,7 @@ TEST_F(BufferLivenessTest, TupleLiveOut) { auto liveness = BufferLiveness::Run(module.get(), - MakeUnique(module.get())) + xla::MakeUnique(module.get())) .ConsumeValueOrDie(); // All buffers should be live out except the param @@ -370,7 +370,7 @@ TEST_F(BufferLivenessTest, EmbeddedComputation) { auto liveness = BufferLiveness::Run(module.get(), - MakeUnique(module.get())) + xla::MakeUnique(module.get())) .ConsumeValueOrDie(); // Buffers in different computations should always interfere. @@ -409,7 +409,7 @@ TEST_F(BufferLivenessTest, TupleConstantLiveOut) { auto liveness = BufferLiveness::Run(module.get(), - MakeUnique(module.get())) + xla::MakeUnique(module.get())) .ConsumeValueOrDie(); // Only the element buffers of the tuple constant which are pointed to by @@ -474,7 +474,7 @@ TEST_F(BufferLivenessTest, IndependentTupleElements) { auto liveness = BufferLiveness::Run(module.get(), - MakeUnique(module.get())) + xla::MakeUnique(module.get())) .ConsumeValueOrDie(); // We compare tuple element pairs that are input/output to the computation: @@ -536,7 +536,7 @@ TEST_F(BufferLivenessTest, DependentTupleElements) { auto liveness = BufferLiveness::Run(module.get(), - MakeUnique(module.get())) + xla::MakeUnique(module.get())) .ConsumeValueOrDie(); // We compare tuple element pairs that are input/output to the computation: @@ -624,8 +624,8 @@ class FusedDynamicUpdateSliceLivenessTest : public BufferLivenessTest { // Run BufferLiveness on 'module'. auto liveness = - BufferLiveness::Run(module.get(), - MakeUnique(module.get())) + BufferLiveness::Run( + module.get(), xla::MakeUnique(module.get())) .ConsumeValueOrDie(); // Return whether or not buffers interference is detected between // 'tuple_param0' and 'tuple_root' at shape index '{1}'. @@ -736,8 +736,8 @@ class DynamicUpdateSliceLivenessTest : public BufferLivenessTest { module->AddEmbeddedComputation(builder.Build()); // Run BufferLiveness on 'module'. auto liveness = - BufferLiveness::Run(module.get(), - MakeUnique(module.get())) + BufferLiveness::Run( + module.get(), xla::MakeUnique(module.get())) .ConsumeValueOrDie(); // Return whether or not buffers interference is detected between // 'tuple_param0' and 'tuple_root' at shape index '{1}'. diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index 99dae793ab..988f632748 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -469,11 +469,11 @@ StatusOr> CpuCompiler::RunBackend( &pre_optimization_ir_hook, &post_optimization_ir_hook)); // Compile must be thread-safe so create a new LLVM context for the module. - auto llvm_context = MakeUnique(); + auto llvm_context = xla::MakeUnique(); auto llvm_module = - MakeUnique("__compute_module", *llvm_context); + xla::MakeUnique("__compute_module", *llvm_context); - auto jit = MakeUnique( + auto jit = xla::MakeUnique( CompilerTargetOptions(module->config()), CodeGenOptLevel(module->config()), options::OptimizeForSizeRequested(module->config()), @@ -528,9 +528,9 @@ StatusOr> CpuCompiler::RunBackend( // uses data dependencies for determining order. TF_ASSIGN_OR_RETURN( std::unique_ptr assignment, - BufferAssigner::Run(module.get(), - MakeUnique(module.get()), - BufferSizeBytesFunction(), memory_alignment)); + BufferAssigner::Run( + module.get(), xla::MakeUnique(module.get()), + BufferSizeBytesFunction(), memory_alignment)); // BufferAssignment::ToString() includes a header, so no need for us to // print one ourselves. XLA_VLOG_LINES(2, assignment->ToString()); @@ -557,7 +557,7 @@ StatusOr> CpuCompiler::RunBackend( const void* data = instruction->literal().InternalData(); int64 size = CpuExecutable::ShapeSizeBytes(instruction->shape()); auto iter = aligned_constants.emplace( - instruction, MakeUnique(size)); + instruction, xla::MakeUnique(size)); CHECK_EQ(iter.second, true); unsigned char* aligned_data = iter.first->second.get(); memcpy(aligned_data, data, size); @@ -642,10 +642,10 @@ StatusOr> CpuCompiler::RunBackend( // temporary buffers are required to run the computation. TF_ASSIGN_OR_RETURN( std::unique_ptr assignment, - BufferAssigner::Run( - module.get(), - MakeUnique(module.get(), module_sequence), - BufferSizeBytesFunction(), memory_alignment)); + BufferAssigner::Run(module.get(), + xla::MakeUnique( + module.get(), module_sequence), + BufferSizeBytesFunction(), memory_alignment)); // BufferAssignment::ToString() includes a header, so no need for us to // print one ourselves. XLA_VLOG_LINES(2, assignment->ToString()); @@ -824,7 +824,8 @@ CpuCompiler::CompileAheadOfTime(std::vector> modules, TF_ASSIGN_OR_RETURN( std::unique_ptr assignment, BufferAssigner::Run( - module, MakeUnique(module, module_sequence), + module, + xla::MakeUnique(module, module_sequence), BufferSizeBytesFunction(), memory_alignment)); // BufferAssignment::ToString() includes a header, so no need for us to // print one ourselves. diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc index db6c201876..cda2783307 100644 --- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc +++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc @@ -213,71 +213,75 @@ bool RegisterKnownJITSymbols() { #undef REGISTER_CPU_RUNTIME_SYMBOL -#define REGISTER_LIBM_SYMBOL(name) \ - do { \ - /* Register both the F32 and F64 variants of the libm symbol. */ \ - registry->Register(#name "f", reinterpret_cast(name##f)); \ - registry->Register(#name, reinterpret_cast(name)); \ +// Register both the f32 (float) and f64 (double) versions of a libm symbol. +// Unfortunately the double versions are overloaded on some systems, e.g. +// Mac so we need an explicit cast. This requires passing the function signature +// for that case. +#define REGISTER_LIBM_SYMBOL(name, double_sig) \ + do { \ + registry->Register(#name "f", reinterpret_cast(name##f)); \ + registry->Register( \ + #name, reinterpret_cast(static_cast(name))); \ } while (false) - REGISTER_LIBM_SYMBOL(acos); - REGISTER_LIBM_SYMBOL(acosh); - REGISTER_LIBM_SYMBOL(asin); - REGISTER_LIBM_SYMBOL(asinh); - REGISTER_LIBM_SYMBOL(atan); - REGISTER_LIBM_SYMBOL(atan2); - REGISTER_LIBM_SYMBOL(atanh); - REGISTER_LIBM_SYMBOL(cbrt); - REGISTER_LIBM_SYMBOL(ceil); - REGISTER_LIBM_SYMBOL(copysign); - REGISTER_LIBM_SYMBOL(cos); - REGISTER_LIBM_SYMBOL(cosh); - REGISTER_LIBM_SYMBOL(erf); - REGISTER_LIBM_SYMBOL(erfc); - REGISTER_LIBM_SYMBOL(exp); - REGISTER_LIBM_SYMBOL(exp2); - REGISTER_LIBM_SYMBOL(expm1); - REGISTER_LIBM_SYMBOL(fabs); - REGISTER_LIBM_SYMBOL(fdim); - REGISTER_LIBM_SYMBOL(floor); - REGISTER_LIBM_SYMBOL(fma); - REGISTER_LIBM_SYMBOL(fmax); - REGISTER_LIBM_SYMBOL(fmin); - REGISTER_LIBM_SYMBOL(fmod); - REGISTER_LIBM_SYMBOL(frexp); - REGISTER_LIBM_SYMBOL(hypot); - REGISTER_LIBM_SYMBOL(ilogb); - REGISTER_LIBM_SYMBOL(ldexp); - REGISTER_LIBM_SYMBOL(lgamma); - REGISTER_LIBM_SYMBOL(llrint); - REGISTER_LIBM_SYMBOL(llround); - REGISTER_LIBM_SYMBOL(log); - REGISTER_LIBM_SYMBOL(log10); - REGISTER_LIBM_SYMBOL(log1p); - REGISTER_LIBM_SYMBOL(log2); - REGISTER_LIBM_SYMBOL(logb); - REGISTER_LIBM_SYMBOL(lrint); - REGISTER_LIBM_SYMBOL(lround); - REGISTER_LIBM_SYMBOL(modf); - REGISTER_LIBM_SYMBOL(nan); - REGISTER_LIBM_SYMBOL(nearbyint); - REGISTER_LIBM_SYMBOL(nextafter); - REGISTER_LIBM_SYMBOL(nexttoward); - REGISTER_LIBM_SYMBOL(pow); - REGISTER_LIBM_SYMBOL(remainder); - REGISTER_LIBM_SYMBOL(remquo); - REGISTER_LIBM_SYMBOL(rint); - REGISTER_LIBM_SYMBOL(round); - REGISTER_LIBM_SYMBOL(scalbln); - REGISTER_LIBM_SYMBOL(scalbn); - REGISTER_LIBM_SYMBOL(sin); - REGISTER_LIBM_SYMBOL(sincos); - REGISTER_LIBM_SYMBOL(sinh); - REGISTER_LIBM_SYMBOL(sqrt); - REGISTER_LIBM_SYMBOL(tan); - REGISTER_LIBM_SYMBOL(tanh); - REGISTER_LIBM_SYMBOL(tgamma); - REGISTER_LIBM_SYMBOL(trunc); + REGISTER_LIBM_SYMBOL(acos, double (*)(double)); + REGISTER_LIBM_SYMBOL(acosh, double (*)(double)); + REGISTER_LIBM_SYMBOL(asin, double (*)(double)); + REGISTER_LIBM_SYMBOL(asinh, double (*)(double)); + REGISTER_LIBM_SYMBOL(atan, double (*)(double)); + REGISTER_LIBM_SYMBOL(atan2, double (*)(double, double)); + REGISTER_LIBM_SYMBOL(atanh, double (*)(double)); + REGISTER_LIBM_SYMBOL(cbrt, double (*)(double)); + REGISTER_LIBM_SYMBOL(ceil, double (*)(double)); + REGISTER_LIBM_SYMBOL(copysign, double (*)(double, double)); + REGISTER_LIBM_SYMBOL(cos, double (*)(double)); + REGISTER_LIBM_SYMBOL(cosh, double (*)(double)); + REGISTER_LIBM_SYMBOL(erf, double (*)(double)); + REGISTER_LIBM_SYMBOL(erfc, double (*)(double)); + REGISTER_LIBM_SYMBOL(exp, double (*)(double)); + REGISTER_LIBM_SYMBOL(exp2, double (*)(double)); + REGISTER_LIBM_SYMBOL(expm1, double (*)(double)); + REGISTER_LIBM_SYMBOL(fabs, double (*)(double)); + REGISTER_LIBM_SYMBOL(fdim, double (*)(double, double)); + REGISTER_LIBM_SYMBOL(floor, double (*)(double)); + REGISTER_LIBM_SYMBOL(fma, double (*)(double, double, double)); + REGISTER_LIBM_SYMBOL(fmax, double (*)(double, double)); + REGISTER_LIBM_SYMBOL(fmin, double (*)(double, double)); + REGISTER_LIBM_SYMBOL(fmod, double (*)(double, double)); + REGISTER_LIBM_SYMBOL(frexp, double (*)(double, int*)); + REGISTER_LIBM_SYMBOL(hypot, double (*)(double, double)); + REGISTER_LIBM_SYMBOL(ilogb, int (*)(double)); + REGISTER_LIBM_SYMBOL(ldexp, double (*)(double, int)); + REGISTER_LIBM_SYMBOL(lgamma, double (*)(double)); + REGISTER_LIBM_SYMBOL(llrint, long long (*)(double)); + REGISTER_LIBM_SYMBOL(llround, long long (*)(double)); + REGISTER_LIBM_SYMBOL(log, double (*)(double)); + REGISTER_LIBM_SYMBOL(log10, double (*)(double)); + REGISTER_LIBM_SYMBOL(log1p, double (*)(double)); + REGISTER_LIBM_SYMBOL(log2, double (*)(double)); + REGISTER_LIBM_SYMBOL(logb, double (*)(double)); + REGISTER_LIBM_SYMBOL(lrint, long (*)(double)); + REGISTER_LIBM_SYMBOL(lround, long (*)(double)); + REGISTER_LIBM_SYMBOL(modf, double (*)(double, double*)); + REGISTER_LIBM_SYMBOL(nan, double (*)(const char*)); + REGISTER_LIBM_SYMBOL(nearbyint, double (*)(double)); + REGISTER_LIBM_SYMBOL(nextafter, double (*)(double, double)); + REGISTER_LIBM_SYMBOL(nexttoward, double (*)(double, long double)); + REGISTER_LIBM_SYMBOL(pow, double (*)(double, double)); + REGISTER_LIBM_SYMBOL(remainder, double (*)(double, double)); + REGISTER_LIBM_SYMBOL(remquo, double (*)(double, double, int*)); + REGISTER_LIBM_SYMBOL(rint, double (*)(double)); + REGISTER_LIBM_SYMBOL(round, double (*)(double)); + REGISTER_LIBM_SYMBOL(scalbln, double (*)(double, long)); + REGISTER_LIBM_SYMBOL(scalbn, double (*)(double, int)); + REGISTER_LIBM_SYMBOL(sin, double (*)(double)); + REGISTER_LIBM_SYMBOL(sincos, void (*)(double, double*, double*)); + REGISTER_LIBM_SYMBOL(sinh, double (*)(double)); + REGISTER_LIBM_SYMBOL(sqrt, double (*)(double)); + REGISTER_LIBM_SYMBOL(tan, double (*)(double)); + REGISTER_LIBM_SYMBOL(tanh, double (*)(double)); + REGISTER_LIBM_SYMBOL(tgamma, double (*)(double)); + REGISTER_LIBM_SYMBOL(trunc, double (*)(double)); #undef REGISTER_LIBM_SYMBOL diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index 7efdf8552e..6800c3d7fa 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -450,7 +450,7 @@ message ConvolutionDimensionNumbers { message ConvolveRequest { ComputationDataHandle lhs = 2; ComputationDataHandle rhs = 3; // This is the filter/kernel. - Window window = 4; // Describes the filter/kenel. + Window window = 4; // Describes the filter/kernel. ConvolutionDimensionNumbers dimension_numbers = 5; } diff --git a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h index 7a550d6f73..badc629a11 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h +++ b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h @@ -56,7 +56,7 @@ class BatchFeatures { *num_sparse_int_features = sparse_int_feature_columns_.size(); if (*num_dense_float_features == 0 && *num_sparse_float_features == 0 && *num_sparse_int_features == 0) { - return errors::FailedPrecondition("Not intialized yet."); + return errors::FailedPrecondition("Not initialized yet."); } return Status::OK(); } diff --git a/tensorflow/contrib/boosted_trees/lib/utils/example.h b/tensorflow/contrib/boosted_trees/lib/utils/example.h index e388cf332c..54f60e1dee 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/example.h +++ b/tensorflow/contrib/boosted_trees/lib/utils/example.h @@ -63,7 +63,7 @@ class SparseFloatFeatureColumn { public: void Reserve(const int32 size) { if (!single_dimensional_) { - mutlidimensional_values.Reserve(size); + multidimensional_values.Reserve(size); } } @@ -76,7 +76,7 @@ class SparseFloatFeatureColumn { DCHECK_EQ(0, feature_idx); single_value_ = value; } else { - mutlidimensional_values.Add(feature_idx, value); + multidimensional_values.Add(feature_idx, value); } initialized_ = true; } @@ -84,7 +84,7 @@ class SparseFloatFeatureColumn { void Clear() { single_dimensional_ = false; initialized_ = false; - mutlidimensional_values.Clear(); + multidimensional_values.Clear(); } OptionalValue operator[](int feature_idx) const { @@ -94,7 +94,7 @@ class SparseFloatFeatureColumn { if (single_dimensional_) { return OptionalValue(single_value_); } else { - return mutlidimensional_values[feature_idx]; + return multidimensional_values[feature_idx]; } } @@ -102,7 +102,7 @@ class SparseFloatFeatureColumn { bool single_dimensional_; bool initialized_; T single_value_; - SparseMultidimensionalValues mutlidimensional_values; + SparseMultidimensionalValues multidimensional_values; }; // Holds data for one example and enables lookup by feature column. diff --git a/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable.cc b/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable.cc index bc0a93db8c..ccee9530b6 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable.cc +++ b/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable.cc @@ -96,6 +96,10 @@ class IndicesRowIterator return (row_idx_ != other.row_idx_); } + bool operator<(const IndicesRowIterator& other) const { + return (row_idx_ < other.row_idx_); + } + bool operator==(const IndicesRowIterator& other) const { QCHECK_EQ(iter_, other.iter_); return (row_idx_ == other.row_idx_); diff --git a/tensorflow/contrib/cmake/external/re2.cmake b/tensorflow/contrib/cmake/external/re2.cmake index b56f4b0898..d10f5959f7 100644 --- a/tensorflow/contrib/cmake/external/re2.cmake +++ b/tensorflow/contrib/cmake/external/re2.cmake @@ -45,4 +45,5 @@ ExternalProject_Add(re2 endif() -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_INSTALL_PREFIX:STRING=${re2_INSTALL} + -DRE2_BUILD_TESTING:BOOL=OFF ) diff --git a/tensorflow/contrib/cmake/tf_shared_lib.cmake b/tensorflow/contrib/cmake/tf_shared_lib.cmake index dcedabb333..571d2b0dec 100644 --- a/tensorflow/contrib/cmake/tf_shared_lib.cmake +++ b/tensorflow/contrib/cmake/tf_shared_lib.cmake @@ -95,10 +95,18 @@ if(WIN32) add_dependencies(tensorflow tensorflow_static) endif(WIN32) -install(TARGETS tensorflow +target_include_directories(tensorflow PUBLIC + $ + $) + +install(TARGETS tensorflow EXPORT tensorflow_export RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) + +install(EXPORT tensorflow_export + FILE TensorflowConfig.cmake + DESTINATION lib/cmake) # install necessary headers # tensorflow headers diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index 5d6ba9ca8d..2e3ee2c96b 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -153,7 +153,7 @@ if (tensorflow_BUILD_PYTHON_TESTS) "${tensorflow_source_dir}/tensorflow/contrib/data/*_test.py" "${tensorflow_source_dir}/tensorflow/contrib/factorization/*_test.py" "${tensorflow_source_dir}/tensorflow/contrib/image/*_test.py" - "${tensorflow_source_dir}/tensorflow/contrib/keras/python/keras/integration_test.py" + "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/*_test.py" "${tensorflow_source_dir}/tensorflow/contrib/nearest_neighbor/python/kernel_tests/*_test.py" "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/python/kernel_tests/*_test.py" "${tensorflow_source_dir}/tensorflow/contrib/stateless/python/kernel_tests/*_test.py" @@ -171,7 +171,6 @@ if (tensorflow_BUILD_PYTHON_TESTS) "${tensorflow_source_dir}/tensorflow/contrib/graph_editor/*_test.py" "${tensorflow_source_dir}/tensorflow/contrib/bayesflow/*_test.py" "${tensorflow_source_dir}/tensorflow/contrib/framework/*_test.py" - "${tensorflow_source_dir}/tensorflow/contrib/keras/*_test.py" "${tensorflow_source_dir}/tensorflow/contrib/distributions/*_test.py" "${tensorflow_source_dir}/tensorflow/contrib/learn/*_test.py" ) @@ -225,6 +224,9 @@ if (tensorflow_BUILD_PYTHON_TESTS) # Numerical issues, calculations off. "${tensorflow_source_dir}/tensorflow/python/kernel_tests/concat_op_test.py" "${tensorflow_source_dir}/tensorflow/contrib/factorization/python/ops/wals_test.py" + "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py" + "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/backend_test.py" + "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/preprocessing/image_test.py" # Float division by zero "${tensorflow_source_dir}/tensorflow/python/kernel_tests/benchmark_test.py" # Flaky, for unknown reasons. Cannot reproduce in terminal. Revisit once we can get stack traces. diff --git a/tensorflow/contrib/crf/python/ops/crf.py b/tensorflow/contrib/crf/python/ops/crf.py index ec395e41d0..7f5ae937b2 100644 --- a/tensorflow/contrib/crf/python/ops/crf.py +++ b/tensorflow/contrib/crf/python/ops/crf.py @@ -420,7 +420,7 @@ class CrfDecodeBackwardRnnCell(rnn_cell.RNNCell): """Initialize the CrfDecodeBackwardRnnCell. Args: - num_tags: The number of tags. + num_tags: An integer. The number of tags. """ self._num_tags = num_tags diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 43431ca2c5..ffb5655c3e 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -161,6 +161,7 @@ py_test( size = "small", srcs = ["flat_map_dataset_op_test.py"], srcs_version = "PY2AND3", + tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", @@ -278,6 +279,7 @@ py_test( size = "medium", srcs = ["map_dataset_op_test.py"], srcs_version = "PY2AND3", + tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:dataset_ops", @@ -348,6 +350,7 @@ py_test( size = "medium", srcs = ["reader_dataset_ops_test.py"], srcs_version = "PY2AND3", + tags = ["no_pip"], deps = [ ":dataset_serialization_test", "//tensorflow/contrib/data/python/ops:readers", diff --git a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py index 96dff85665..2701c36fb5 100644 --- a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py +++ b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py @@ -293,7 +293,7 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution): # where, # # Z|v ~ interpolate_affine[v](distribution) - # V ~ mixture_distrubution + # V ~ mixture_distribution # # thus, # diff --git a/tensorflow/contrib/eager/python/metrics_impl.py b/tensorflow/contrib/eager/python/metrics_impl.py index aa359b7a0d..2f8016ede3 100644 --- a/tensorflow/contrib/eager/python/metrics_impl.py +++ b/tensorflow/contrib/eager/python/metrics_impl.py @@ -73,7 +73,7 @@ class Metric(object): * `result()`: Computes and returns a final value for the metric from the variables in `self`. - Decendants may override `aggregate()`, but usually won't need to. It + Descendants may override `aggregate()`, but usually won't need to. It adds in the state from a list of metrics of the same type as `self`. (Default is to sum all the variables.) Note that users should not call `aggregate()`, it is for use by TensorFlow infrastructure. diff --git a/tensorflow/contrib/factorization/python/ops/wals.py b/tensorflow/contrib/factorization/python/ops/wals.py index 2bde3e0dd7..4fe22ea26e 100644 --- a/tensorflow/contrib/factorization/python/ops/wals.py +++ b/tensorflow/contrib/factorization/python/ops/wals.py @@ -183,7 +183,7 @@ def _wals_factorization_model_function(features, labels, mode, params): # TRAIN mode: if mode == model_fn.ModeKeys.TRAIN: - # Training consists of the folowing ops (controlled using a SweepHook). + # Training consists of the following ops (controlled using a SweepHook). # Before a row sweep: # row_update_prep_gramian_op # initialize_row_update_op diff --git a/tensorflow/contrib/ffmpeg/BUILD b/tensorflow/contrib/ffmpeg/BUILD index 7a5a4cb8c9..eccce99071 100644 --- a/tensorflow/contrib/ffmpeg/BUILD +++ b/tensorflow/contrib/ffmpeg/BUILD @@ -47,10 +47,25 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "decode_video_op_cc", + srcs = ["decode_video_op.cc"], + copts = tf_copts(), + linkstatic = 1, + visibility = ["//visibility:private"], + deps = [ + "//tensorflow/contrib/ffmpeg/default:ffmpeg_lib", + "//tensorflow/core:framework_headers_lib", + "//third_party/eigen3", + ], + alwayslink = 1, +) + tf_custom_op_library( name = "ffmpeg.so", deps = [ ":decode_audio_op_cc", + ":decode_video_op_cc", ":encode_audio_op_cc", ], ) @@ -59,6 +74,7 @@ cc_library( name = "ffmpeg_op_lib", deps = [ ":decode_audio_op_cc", + ":decode_video_op_cc", ":encode_audio_op_cc", ], ) @@ -81,6 +97,15 @@ tf_gen_op_wrapper_py( ], ) +tf_gen_op_wrapper_py( + name = "decode_video_op_py", + require_shape_functions = True, + visibility = ["//visibility:private"], + deps = [ + ":decode_video_op_cc", + ], +) + tf_py_test( name = "decode_audio_op_test", srcs = ["decode_audio_op_test.py"], @@ -115,6 +140,27 @@ tf_py_test( tags = ["manual"], ) +tf_py_test( + name = "decode_video_op_test", + size = "small", + srcs = ["decode_video_op_test.py"], + additional_deps = [ + ":ffmpeg_ops_py", + "@six_archive//:six", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:platform", + "//tensorflow/python:image_ops", + ], + data = [ + ":test_data", + ], + tags = [ + "manual", + "notap", + ], +) + py_library( name = "ffmpeg_ops_py", srcs = [ @@ -126,6 +172,7 @@ py_library( visibility = ["//visibility:public"], deps = [ ":decode_audio_op_py", + ":decode_video_op_py", ":encode_audio_op_py", "//tensorflow/contrib/util:util_py", "//tensorflow/python:framework_for_generated_wrappers", diff --git a/tensorflow/contrib/ffmpeg/__init__.py b/tensorflow/contrib/ffmpeg/__init__.py index 2bcb7284e1..484ffee3e7 100644 --- a/tensorflow/contrib/ffmpeg/__init__.py +++ b/tensorflow/contrib/ffmpeg/__init__.py @@ -26,9 +26,10 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_audio +from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video from tensorflow.contrib.ffmpeg.ffmpeg_ops import encode_audio from tensorflow.python.util.all_util import remove_undocumented -_allowed_symbols = ['decode_audio', 'encode_audio'] +_allowed_symbols = ['decode_audio', 'encode_audio', 'decode_video'] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/ffmpeg/decode_audio_op.cc b/tensorflow/contrib/ffmpeg/decode_audio_op.cc index 4b1c8a337e..92fad70b1f 100644 --- a/tensorflow/contrib/ffmpeg/decode_audio_op.cc +++ b/tensorflow/contrib/ffmpeg/decode_audio_op.cc @@ -37,29 +37,6 @@ namespace { // https://www.ffmpeg.org/ffmpeg-formats.html const char* kValidFileFormats[] = {"mp3", "mp4", "ogg", "wav"}; -// Writes binary data to a file. -Status WriteFile(const string& filename, tensorflow::StringPiece contents) { - Env& env = *Env::Default(); - std::unique_ptr file; - TF_RETURN_IF_ERROR(env.NewWritableFile(filename, &file)); - TF_RETURN_IF_ERROR(file->Append(contents)); - TF_RETURN_IF_ERROR(file->Close()); - return Status::OK(); -} - -// Cleans up a file on destruction. -class FileDeleter { - public: - explicit FileDeleter(const string& filename) : filename_(filename) {} - ~FileDeleter() { - Env& env = *Env::Default(); - env.DeleteFile(filename_).IgnoreError(); - } - - private: - const string filename_; -}; - /* * Decoding implementation, shared across V1 and V2 ops. Creates a new * output in the context. @@ -69,7 +46,7 @@ void Decode(OpKernelContext* context, const string& file_format, const int32 samples_per_second, const int32 channel_count) { // Write the input data to a temp file. - const string temp_filename = GetTempFilename(file_format); + const string temp_filename = io::GetTempFilename(file_format); OP_REQUIRES_OK(context, WriteFile(temp_filename, file_contents)); FileDeleter deleter(temp_filename); diff --git a/tensorflow/contrib/ffmpeg/decode_video_op.cc b/tensorflow/contrib/ffmpeg/decode_video_op.cc new file mode 100644 index 0000000000..d44032968d --- /dev/null +++ b/tensorflow/contrib/ffmpeg/decode_video_op.cc @@ -0,0 +1,118 @@ +// Copyright 2016 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= + +#include + +#include +#include + +#include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { +namespace ffmpeg { + +class DecodeVideoOp : public OpKernel { + public: + explicit DecodeVideoOp(OpKernelConstruction* context) : OpKernel(context) {} + + void Compute(OpKernelContext* context) override { + OP_REQUIRES( + context, context->num_inputs() == 1, + errors::InvalidArgument("DecodeVideo requires exactly 1 input.")); + const Tensor& contents_tensor = context->input(0); + + OP_REQUIRES(context, TensorShapeUtils::IsScalar(contents_tensor.shape()), + errors::InvalidArgument( + "contents must be a rank-0 tensor but got shape ", + contents_tensor.shape().DebugString())); + const tensorflow::StringPiece contents = contents_tensor.scalar()(); + + // Write the input data to a temp file. + string extension; + const string temp_filename = io::GetTempFilename(extension); + OP_REQUIRES_OK(context, WriteFile(temp_filename, contents)); + FileDeleter deleter(temp_filename); + + uint32 width = 0; + uint32 height = 0; + uint32 frames = 0; + + // Run FFmpeg on the data and verify results. + std::vector output_data; + const Status result = ffmpeg::ReadVideoFile(temp_filename, &output_data, + &width, &height, &frames); + if (result.code() == error::Code::NOT_FOUND) { + OP_REQUIRES( + context, result.ok(), + errors::Unavailable("FFmpeg must be installed to run this op. FFmpeg " + "can be found at http://www.ffmpeg.org.")); + } else if (result.code() == error::UNKNOWN) { + LOG(ERROR) << "Ffmpeg failed with error '" << result.error_message() + << "'. Returning empty tensor."; + Tensor* output = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(0, TensorShape({0, 0}), &output)); + return; + } else { + OP_REQUIRES_OK(context, result); + } + OP_REQUIRES(context, !output_data.empty(), + errors::Unknown("No output created by FFmpeg.")); + OP_REQUIRES( + context, output_data.size() == (frames * height * width * 3), + errors::Unknown("Output created by FFmpeg [", output_data.size(), + "] does not match description [", frames, ", ", height, + ", ", width, ", 3]")); + Tensor* output = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output( + 0, TensorShape({frames, height, width, 3}), &output)); + auto output_flat = output->flat(); + std::copy_n(output_data.begin(), output_data.size(), &output_flat(0)); + } +}; + +REGISTER_KERNEL_BUILDER(Name("DecodeVideo").Device(DEVICE_CPU), DecodeVideoOp); + +REGISTER_OP("DecodeVideo") + .Input("contents: string") + .Output("output: uint8") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->UnknownShapeOfRank(4)); + return Status::OK(); + }) + .Doc(R"doc( +Processes the contents of an audio file into a tensor using FFmpeg to decode +the file. + +One row of the tensor is created for each channel in the audio file. Each +channel contains audio samples starting at the beginning of the audio and +having `1/samples_per_second` time between them. If the `channel_count` is +different from the contents of the file, channels will be merged or created. + +contents: The binary audio file contents, as a string or rank-0 string + tensor. +)doc"); + +} // namespace ffmpeg +} // namespace tensorflow diff --git a/tensorflow/contrib/ffmpeg/decode_video_op_test.py b/tensorflow/contrib/ffmpeg/decode_video_op_test.py new file mode 100644 index 0000000000..b43b6b8919 --- /dev/null +++ b/tensorflow/contrib/ffmpeg/decode_video_op_test.py @@ -0,0 +1,69 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""Tests for third_party.tensorflow.contrib.ffmpeg.decode_video_op.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os.path + +import six # pylint: disable=unused-import + +from tensorflow.contrib import ffmpeg +from tensorflow.python.ops import image_ops +from tensorflow.python.platform import resource_loader +from tensorflow.python.platform import test + + +class DecodeVideoOpTest(test.TestCase): + + def _loadFileAndTest(self, filename, width, height, frames, bmp_filename, + index): + """Loads an video file and validates the output tensor. + + Args: + filename: The filename of the input file. + width: The width of the video. + height: The height of the video. + frames: The frames of the video. + bmp_filename: The filename for the bmp file. + index: Index location inside the video. + """ + with self.test_session(): + path = os.path.join(resource_loader.get_data_files_path(), 'testdata', + filename) + with open(path, 'rb') as f: + contents = f.read() + + bmp_path = os.path.join(resource_loader.get_data_files_path(), 'testdata', + bmp_filename) + with open(bmp_path, 'rb') as f: + bmp_contents = f.read() + + image_op = image_ops.decode_bmp(bmp_contents) + image = image_op.eval() + self.assertEqual(image.shape, (height, width, 3)) + video_op = ffmpeg.decode_video(contents) + video = video_op.eval() + self.assertEqual(video.shape, (frames, height, width, 3)) + self.assertAllEqual(video[index, :, :, :], image) + + def testMp4(self): + self._loadFileAndTest('small.mp4', 560, 320, 166, 'small_100.bmp', 99) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc index 545a4386d0..1245f515fe 100644 --- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc +++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc @@ -16,6 +16,7 @@ #include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h" #include +#include #include #include #include @@ -25,6 +26,7 @@ #include #include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/env.h" @@ -38,28 +40,45 @@ namespace { const char kFfmpegExecutable[] = "ffmpeg"; const int32 kDefaultProbeSize = 5000000; // 5MB -std::vector FfmpegCommandLine(const string& input_filename, - const string& output_filename, - const string& input_format_id, - int32 samples_per_second, - int32 channel_count) { - return { - "-nostats", // No additional progress display. - "-nostdin", // No interactive commands accepted. - "-f", input_format_id, // eg: "mp3" - "-probesize", StrCat(kDefaultProbeSize), - "-i", input_filename, - "-loglevel", "info", // Enable verbose logging to support debugging. - "-map_metadata", "-1", // Copy global metadata from input to output. - "-vn", // No video recording. - "-ac:a:0", StrCat(channel_count), - "-ar:a:0", StrCat(samples_per_second), - // Output set (in several ways) to signed 16-bit little-endian ints. - "-codec:a:0", "pcm_s16le", "-sample_fmt", "s16", "-f", "s16le", - "-sn", // No subtitle recording. - "-y", // Overwrite output file. - StrCat(output_filename) - }; +std::vector FfmpegAudioCommandLine(const string& input_filename, + const string& output_filename, + const string& input_format_id, + int32 samples_per_second, + int32 channel_count) { + return {"-nostats", // No additional progress display. + "-nostdin", // No interactive commands accepted. + "-f", input_format_id, // eg: "mp3" + "-probesize", StrCat(kDefaultProbeSize), "-i", input_filename, + "-loglevel", "info", // Enable verbose logging to support debugging. + "-map_metadata", "-1", // Copy global metadata from input to output. + "-vn", // No video recording. + "-ac:a:0", StrCat(channel_count), "-ar:a:0", + StrCat(samples_per_second), + // Output set (in several ways) to signed 16-bit little-endian ints. + "-codec:a:0", "pcm_s16le", "-sample_fmt", "s16", "-f", "s16le", + "-sn", // No subtitle recording. + "-y", // Overwrite output file. + StrCat(output_filename)}; +} + +std::vector FfmpegVideoCommandLine(const string& input_filename, + const string& output_filename) { + return {"-nostats", // No additional progress display. + "-nostdin", // No interactive commands accepted. + "-i", + input_filename, + "-f", + "image2pipe", + "-probesize", + StrCat(kDefaultProbeSize), + "-loglevel", + "info", // Enable verbose logging to support debugging. + "-vcodec", + "rawvideo", + "-pix_fmt", + "rgb24", + "-y", // Overwrite output file. + StrCat(output_filename)}; } // Is a named binary installed and executable by the current process? @@ -106,7 +125,7 @@ bool IsBinaryInstalled(const string& binary_name) { ::execvp(kFfmpegExecutable, args_chars.data()); // exec only returns on error. const int error = errno; - LOG(ERROR) << "FFmpeg could not be executed: " << error; + LOG(ERROR) << "FFmpeg could not be executed: " << strerror(error); ::_exit(error); } @@ -198,52 +217,101 @@ string BuildWavFile(int32 samples_per_second, int32 channel_count, return data; } -// Returns a unique number every time it is called. -int64 UniqueId() { - static mutex mu(LINKER_INITIALIZED); - static int64 id = 0; - mutex_lock l(mu); - return ++id; -} - -} // namespace - -string GetTempFilename(const string& extension) { - for (const char* dir : std::vector( - {getenv("TEST_TMPDIR"), getenv("TMPDIR"), getenv("TMP"), "/tmp"})) { - if (!dir || !dir[0]) { +Status ReadInfoFile(const string& filename, uint32* width, uint32* height, + uint32* frames) { + string data; + TF_QCHECK_OK(ReadFileToString(Env::Default(), filename, &data)) + << "Could not read FFmpeg file: " << filename; + bool in_output = false; + bool in_mapping = false; + uint32 frames_value = 0; + uint32 height_value = 0; + uint32 width_value = 0; + for (const string& line : str_util::Split(data, '\n')) { + // Output starts with the first line of `Output #..`. + // Further processing output region starts next line so we could continue + // the loop. + if (!in_output && line.find("Output #") == 0) { + in_output = true; + in_mapping = false; continue; } - struct stat statbuf; - if (!stat(dir, &statbuf) && S_ISDIR(statbuf.st_mode)) { - // UniqueId is added here because mkstemps is not as thread safe as it - // looks. https://github.com/tensorflow/tensorflow/issues/5804 shows - // the problem. - string tmp_filepath = io::JoinPath( - dir, - StrCat("tmp_file_tensorflow_", UniqueId(), "_XXXXXX.", extension)); - int fd = mkstemps(&tmp_filepath[0], extension.length() + 1); - if (fd < 0) { - LOG(FATAL) << "Failed to create temp file."; - } else { - close(fd); - return tmp_filepath; + // Stream mapping starts with the first line of `Stream mapping`, it also + // signals the end of Output section. + // Further processing of stream mapping region starts next line so we could + // continue the loop. + if (!in_mapping && line.find("Stream mapping:") == 0) { + in_output = false; + in_mapping = true; + continue; + } + if (in_output) { + // We only look for the first stream in output `Stream #0`. + // Once processed we will not further process output section. + if (line.find(" Stream #") == 0) { + size_t p = line.find(", rgb24, ", 24); + if (p != std::string::npos) { + string rgb24 = line.substr(p + 9, line.find(" ", p + 9)); + rgb24 = rgb24.substr(0, rgb24.find(",")); + string rgb24_width = rgb24.substr(0, rgb24.find("x")); + string rgb24_height = rgb24.substr(rgb24_width.length() + 1); + if (strings::safe_strtou32(rgb24_width, &width_value) && + strings::safe_strtou32(rgb24_height, &height_value)) { + in_output = false; + } + } + } + continue; + } + if (in_mapping) { + // We only look for the first stream mapping to have the number of the + // frames. + // Once processed we will not further process stream mapping section. + if (line.find("frame= ") == 0) { + string number = line.substr(8, line.find(" ", 8)); + number = number.substr(0, number.find(" ")); + if (strings::safe_strtou32(number, &frames_value)) { + in_mapping = false; + } } + continue; } } - LOG(FATAL) << "No temp directory found."; + if (frames_value == 0 || height_value == 0 || width_value == 0) { + return errors::Unknown("Not enough video info returned by FFmpeg [", + frames_value, ", ", height_value, ", ", width_value, + ", 3]"); + } + *width = width_value; + *height = height_value; + *frames = frames_value; + return Status::OK(); } -Status ReadAudioFile(const string& filename, - const string& audio_format_id, - int32 samples_per_second, - int32 channel_count, +} // namespace + +FileDeleter::~FileDeleter() { + Env& env = *Env::Default(); + env.DeleteFile(filename_).IgnoreError(); +} + +Status WriteFile(const string& filename, StringPiece contents) { + Env& env = *Env::Default(); + std::unique_ptr file; + TF_RETURN_IF_ERROR(env.NewWritableFile(filename, &file)); + TF_RETURN_IF_ERROR(file->Append(contents)); + TF_RETURN_IF_ERROR(file->Close()); + return Status::OK(); +} + +Status ReadAudioFile(const string& filename, const string& audio_format_id, + int32 samples_per_second, int32 channel_count, std::vector* output_samples) { // Create an argument list. - string output_filename = GetTempFilename("raw"); + string output_filename = io::GetTempFilename("raw"); const std::vector args = - FfmpegCommandLine(filename, output_filename, audio_format_id, - samples_per_second, channel_count); + FfmpegAudioCommandLine(filename, output_filename, audio_format_id, + samples_per_second, channel_count); // Unfortunately, it's impossible to differentiate an exec failure due to the // binary being missing and an error from the binary's execution. Therefore, @@ -256,7 +324,8 @@ Status ReadAudioFile(const string& filename, // Execute ffmpeg and report errors. pid_t child_pid = ::fork(); if (child_pid < 0) { - return Status(error::Code::UNKNOWN, StrCat("fork failed: ", errno)); + return Status(error::Code::UNKNOWN, + StrCat("fork failed: ", strerror(errno))); } if (child_pid == 0) { ExecuteFfmpeg(args); @@ -285,5 +354,63 @@ Status CreateAudioFile(const string& audio_format_id, int32 bits_per_second, return Status::OK(); } +Status ReadVideoFile(const string& filename, std::vector* output_data, + uint32* width, uint32* height, uint32* frames) { + if (!IsBinaryInstalled(kFfmpegExecutable)) { + return Status(error::Code::NOT_FOUND, StrCat("FFmpeg could not be found.")); + } + + string output_filename = io::GetTempFilename("raw"); + string stderr_filename = io::GetTempFilename("err"); + + // Create an argument list. + const std::vector args = + FfmpegVideoCommandLine(filename, output_filename); + + // Execute ffmpeg and report errors. + pid_t child_pid = ::fork(); + if (child_pid < 0) { + return Status(error::Code::UNKNOWN, + StrCat("fork failed: ", strerror(errno))); + } + if (child_pid == 0) { + const int fd = + open(stderr_filename.c_str(), O_RDWR | O_CREAT | O_APPEND, 0600); + if (fd < 0) { + const int error = errno; + LOG(ERROR) << "FFmpeg stderr file could not be created: " + << strerror(error); + ::_exit(error); + } + close(STDERR_FILENO); + dup2(fd, STDERR_FILENO); + ExecuteFfmpeg(args); + } else { + int status_code; + if (::waitpid(child_pid, &status_code, 0) < 0) { + return Status(error::Code::UNKNOWN, + StrCat("waitpid failed: ", strerror(errno))); + } + if (status_code) { + return Status(error::Code::UNKNOWN, + StrCat("FFmpeg execution failed: ", status_code)); + } + + TF_QCHECK_OK(ReadInfoFile(stderr_filename, width, height, frames)) + << "Could not read FFmpeg stderr file: " << stderr_filename; + + string raw_data; + TF_QCHECK_OK(ReadFileToString(Env::Default(), output_filename, &raw_data)) + << "Could not read FFmpeg output file: " << output_filename; + output_data->resize(raw_data.size()); + std::copy_n(raw_data.data(), raw_data.size(), output_data->begin()); + + TF_QCHECK_OK(Env::Default()->DeleteFile(output_filename)) + << output_filename; + TF_QCHECK_OK(Env::Default()->DeleteFile(stderr_filename)) + << stderr_filename; + return Status::OK(); + } +} } // namespace ffmpeg } // namespace tensorflow diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc index 7176f3b550..d6c885a324 100644 --- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc +++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc @@ -21,6 +21,7 @@ #include #include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/test.h" @@ -49,7 +50,7 @@ TEST(FfmpegLibTest, TestTempDirectoryThreading) { pool.Schedule([&mu, &temp_filenames, environment]() { std::array buffer; for (int32 j = 0; j < kStringsPerItem; ++j) { - buffer[j] = GetTempFilename("mp3"); + buffer[j] = io::GetTempFilename("mp3"); TF_QCHECK_OK(environment->DeleteFile(buffer[j])); } mutex_lock l(mu); diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_lib.h b/tensorflow/contrib/ffmpeg/ffmpeg_lib.h index f64007c81d..c5ea1432bf 100644 --- a/tensorflow/contrib/ffmpeg/ffmpeg_lib.h +++ b/tensorflow/contrib/ffmpeg/ffmpeg_lib.h @@ -24,16 +24,24 @@ namespace tensorflow { namespace ffmpeg { -// Gets a temp filename in an appropriate location. -string GetTempFilename(const string& extension); +// Cleans up a file on destruction. +class FileDeleter { + public: + explicit FileDeleter(const string& filename) : filename_(filename) {} + ~FileDeleter(); + + private: + const string filename_; +}; + +// Writes binary data to a file. +Status WriteFile(const string& filename, tensorflow::StringPiece contents); // Reads an audio file using ffmpeg and converts it into an array of samples in // [-1.0, 1.0]. If there are multiple channels in the audio then each frame will // contain a separate sample for each channel. Frames are ordered by time. -Status ReadAudioFile(const string& filename, - const string& audio_format_id, - int32 samples_per_second, - int32 channel_count, +Status ReadAudioFile(const string& filename, const string& audio_format_id, + int32 samples_per_second, int32 channel_count, std::vector* output_samples); // Creates an audio file using ffmpeg in a specific format. The samples are in @@ -45,6 +53,11 @@ Status CreateAudioFile(const string& audio_format_id, int32 bits_per_second, int32 samples_per_second, int32 channel_count, const std::vector& samples, string* output_data); +// Reads an video file using ffmpeg adn converts it into a RGB24 in uint8 +// [frames, height, width, 3]. The w, h, and frames are obtained from ffmpeg. +Status ReadVideoFile(const string& filename, std::vector* output_data, + uint32* width, uint32* height, uint32* frames); + } // namespace ffmpeg } // namespace tensorflow diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py index 18b0b8b812..5bb011f41c 100644 --- a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py +++ b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.ffmpeg.ops import gen_decode_audio_op_py +from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py from tensorflow.contrib.ffmpeg.ops import gen_encode_audio_op_py from tensorflow.contrib.util import loader from tensorflow.python.framework import ops @@ -89,3 +90,19 @@ def encode_audio(audio, file_format=None, samples_per_second=None): ops.NotDifferentiable('EncodeAudio') + + +def decode_video(contents): + """Create an op that decodes the contents of a video file. + + Args: + contents: The binary contents of the video file to decode. This is a + scalar. + + Returns: + A rank-4 `Tensor` that has `[frames, height, width, 3]` RGB as output. + """ + return gen_decode_video_op_py.decode_video(contents) + + +ops.NotDifferentiable('DecodeVideo') diff --git a/tensorflow/contrib/ffmpeg/testdata/small.mp4 b/tensorflow/contrib/ffmpeg/testdata/small.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..1fc478842f51e7519866f474a02ad605235bc6a6 GIT binary patch literal 383631 zcmZQzV30{GsVvAXFfn3aU|;~znZ^0JiDk)#3=9k{X+^22An^gitfyb(rX-dyFfehl zvHt(hddKzU-e)&GwolW!p>R|A-TMk7GZO_}h2;FAR0VTO1ziOXJtH#{ecu3AcU=<& z$1rDwWCdLX=lp`ooYb@u1tS9kV_hQy1CZ*Bl9B=|ef{$Ca=o(5l+^s3#5}$H zqI7+bv3eOLxj72D3i$;knfZCeRtm|9Nr}m}1`0)~X|_fRDXB?0`N`R~hE@hv1`3IJ zi8+(ATquv#nw>Iz{)_+Kp{7? zA~hu*WFAO;d{JUvdaA9VnL=_#QGRY>d~T|(p+ZSfYEDjOv8{oEMTJF5a*3^hLULiQ zt${*HYGO)NeqO4rk)e*EAxuwUL41B%T5)QLt*((mNk&m>VoI^CnL=I>SVwYZZej_@ z)ZC=Rv^0=W`FX`9MTwbtsVVW9c_l@Owgw7GX+?>-sURE63yM-x3TzD&vQsNF^Ga-u z4Peap+{`>%V*`can_vp{)74@{>T){F>z-{XfmOWYhe@Z)o=;FFzE9>PLL;dGfu>5}gi}i`d z!T)!%o9{K}MYb&NF0$tOG|w>ddFNF7|NpvH9bWQ4!(#e}YYav!?zC(-fAk^Mb(zuqQ5f%m1f8rkAz7?{8yZjr8$wQYq-T%N)oZk-D$Jx{u8* z;biNFH}%Z2iJ~*SwZ24M-oH=X_oDSi>8=k6Ut;|K_)U4@X{J=|vipe0v4qV|>3;Xm zw@bzfHoSN-i6`Khxp2V#E72>~2WNkNRC88rn|;3B%84JI;6fQ{+bzqyBmI11hW+Vg%T;v+An9>}B}3Rzl?f-`-92YfL@!M1=M-H!yzI zOrFrtrN_uHf%8H4S>tt%hgb9O{g5CP$dRU%!t(SzV~dZuSW2(y)Ni>z87`d>*&O)H z{=kut*)tt16`p0}{E=S0@u7`0TNes@i|fFK2L1;n9fj z+?Dcs=U%puU#?sB32@yL5IHtY;oGGJ4_0(!dglsV5xuR#>Sh)2mtV-xlf#~Y?b?R? z13cM@rxRSCfB)zvU;Ff5*#AYRW^P)4`_xp1{92<9Hk11?&;IS4ze7gi(FdIsSpwoh zhZ!BCB5xc}=zrOe&^ae{$>~%BiD`aJ31FOH+Gr#N4;`skTSM~EV$A)8~Rt8}wlFly)=X&(8WRD{Q=gdP4ee>j=-Z^wI zPVUsqh_9T=>YWor1(u07>i#|U_1LZ5M-)3(GN?y9+_ufpg@4QG-R2Lro|7tVUOQ#s zvYPYdWzvn)((YGkI(r+ukjp*nq*E8PZuzGL6&DN`G>$nkFqSW#Q*ho@>yh!3PwBer z8ZY=Z3FMxsP5SvGQdc@_MeO2m;qsGht0(TfIy+cAv@UGV*=m#L|Noz*`nksZJ#iudy7uRnUtF^7@b%7{6Bij+8P0Tb zdwpi>&yQtuUH{upQeLs!Z`I}3r_Ogi4D;Kj8gcjO%|E+)1EVk23vIW`ZvQ1)AX94c zt?q+ixYI+hBQuhgs&U z%&fQRu3e@_7pbT!FD*L!UH?&OVb)^}*HtHH)>-_qx-Y3?Xms13H8X~NX6AGwF&*Ce zW@DZ&^+qdi{oK$Konkv*YM1D>Z`ZtbhU^N+kA1Z=c+K}U;fDhsHoajfVqex7)>rhW z<>^eL*%seTLl}cO&A#3;GI%rN*g1dJyP3zIT@A{b6aJL%zx|KTIbH7sJA>Ay?J{`4 zEW_Er#Ivl%=YR>%s8qq>7X(_p=QUC8+?7eifNrE=gswd8<4YV8aOGZ($O{&DDU|Bw6fUke{dmD>94#~h7y6?4L+O1ZQ?O2`^; zPM7Ws^uM;Ml)H8Exm6dnG{5-io2*)X_4%jyWt~S0{nsUx9@llT5OuXU|Nlc=dz}55 zzwIsw3cVtx_kOS|tnK1TGqt^KbU^FB$mxG~rY0|&v|V5Br>pmm&l`VpaBul@bg%ib z^4DzN=Kq+IJ^yE5Vd>1ND+@&e?=x^5*_zmvE~0L1pu=GC@POxOo;_TgUAt@h-6L{} zW5q8ssPzdbPkU?-{lD&Q?F{`*9qUy3*Kg=jnmExz_TUjF&F+_19*Fk#ZdonHAiGY~ zp)!I&=wr}ytJF6U%c`30PB^{Hjp6KV#f9~I5+i3!s;)4yxYb-OqkeeKiaCX@o0TTj z2>&uKd--SgbkV7)D$YmVeR{M}OFPND=>0dR@*l!`Ppo10XILV8clwb>|CK%$-RnNG zW~Ej4*GDs^#(S-*zdYHaf4|qptejZ;>H3f7Z;m<2Vk2bR;qh++myVh}!<}hP3-qcV z@P1psY{3&}S!}XtJHr9D@RJheCJP!EZZL6a2?(rKGRa^_Gwhpjzwyli$CbP{xRWgy z%gr}mX`R||ae+N&4|8DiBp2@(Mum-ee^}a2*plBH9Pg$?@vj#=61`#TGUGpDeQxz1J73h+2AVxvAbx!1g!{22Mu$Z|?|UaSL4tjm zlV8Mo+sq@0ihmylq)W^y3stbPkea2joYm$oZAlg36#9tqdk9G zW`BYB8Pz6*tHSG87D#zNm}75T+EI7U$3c+gw=F~2{zuOno2NTeJji>T+2vr#Bf!U~ zz|PV8m`|W*(^bz$fd}O_C`)o3tZ9sKUiRR`!50lvm;SIg@FAlAq!9ZXk=@J*Ukb#v z^7b(NpOUhMf%DJIhcmoaf2sc9G2iA2+kx%5+ZZ-{uaMwtc(9t$eb4dMeMSvit2Z(# z?4380{j%Mnl7%ii3olx%>1V2WvZU+Czo{`xx{geGH#NrZjoXTL@#~#O%UxC$PvtmJ zQlc`i^RLjh&d|!jLan_O%WC4+#`qtqJU>~^{^jaJZ0m16V18}f$nj-$iM3xEg8*ko zfzdmLRu9R86Z-pDm%aG!y<^9u35v%~f7ri#3qw)zhMsWG$H|V<*X`f0I$tWNuP*2P zhN8J0+qE3pL|YZU-V0w}Cd8lWQ;>Z4|APq(0!#@81`OX+gcaKEtZWso5o{DGkKE+u za=?W>>dEOw{^OS)%zn0f|L%LMggw;{ty=k_p0VZ%`<_MYXpOx^l}=#hIB|g5(q@%h=#!jR$EvZ*p;);#m*?fY)I`t}K@hIbueIl!R6#m>TdK%!blTh6)0C-0>^ z17p;rme`ke=E>~}oxOWb@Z_(Y*v0s0fk@q>9<7GZ?`K_@UuD+EX>rVB^x3gf^2NFa zV`gEKFY}Cp(l@*=lL%Va{pI4s;+Z9UcLih?-m(*5;N|CG;!CWm(qfQED99+-Y5UfC zN|wlkCnpyOB`C&fFwavsc4e*>Gq1w*t1j6Q4l|u<8vJi1c;2^XxWAhD#Z+6zz3Y_g zjg6=O2o>G3*qiZaCEJs__{AJf2@C?lOe@OXEqKx7G;P5(r}J(FL6tift)8ZmA9pWG zclQ<7kaW3CUX1JNN^;W`7xnKtE7+WPN&n*nv-9%NmJf`t*!_trfA;@-t6bqkjV0!j zkLB}3_@A?nUipmiLYmzYhSjF;7C&m|Jg6lT>GyOV=auz~@*NbW`R!kp)hxyz#_OC_ z`R}q#hGXs49~&a}O_}-s@{2=LWuoUFh}gK`+UpWMW>>>`_8bg?G8qnnPV$n6^u8R} zd@#;XlP~^x#LX=mo(U~6WWCMevGd;Vh-2>-nZ{q`c$6s5y0%X09W%ouKBZTxhZ#;6 zx6RRYG_-AC(o`smzv%LA@rxZ+0r%eoBs0|)|2}eWeyg>%#dM!3k0iNgF)<`ekZ86o zxbl3(as!9ihJ_4FJ1@(ir^A1W zXk{K~U{T`c5EGr_7%arTW7Z9gNfQ_{tX?>9uFXye;oB4|x=lkwd`@j)YI*7ti@0;|6bzv1~UW^sv(O^Z~f&ztd!CA#;}%(}1Hvp+C76mm`vxV~YQ z_{JJ$ku^4-cfa9ncRGKkTJdn;2BXu{S8=d5TyxTiZrJHiHb=^>iAB51tRb&|Uslth z^RYW-WZub;iM*#J+4;@&+K;uLQv3w@Z#MPU=tl;3-Z|8ZhFVk?>mR%_3b>qf@%w{MZ(jiyBZE8s8pSO=fMB$fM;X_ z*Tvh#Z=yfjeBdsM$WoH}*0Am?laR(;`@gRW{~SC2&D)zvdh^u2&4DvY)lYnDIkb6; z026}%2XA8o!^&f63}LDtmNJ~OZfm-5h|96y*QF+pzm0D9q&vCK>`6U(=jySQ;><1& z)$BVI_N?50(0y{^qVGzb`ATnpd|JBkv8f<`T|rpwOkumP`?C~gq%!|MQn`sexcYP? z-_1O+9Rd2TDz{Jf^}gAXrGHbXKXT?cB75*J8#O#sdrvF03{R6CMZ{ zEm`0s(^}=r@Yq3zF(he~m+18^6)cJG4+s7{_Le>9^0J$fza>o`$8BOOV~*0fYq>~9 zJ9m>`(;m6*2v!4L9mk88oZlz@WHeXqZ=0ez$K#RW`8{8|H|oh>t1(Z$a9O_N{^Qx- z&xB6dR;l^K)QnZ8y21Hwc~8&k|Nk#4Rg|^FPh5IUseb>L`%xSW4(T5p4Sd&M9;4ooK@a<~j+> zBZV)Rn_~9x#BnV?!aC)6&b9wJn***mBuL9lv|Yhjz}Uvlz&MSQ`PaL{2Ud9NFv`D( z+O5N$T)^D7W(HdVb0X(jzVP!o9|Ac49#|Uq@9NReQdU8;B4!WiXm_{G``8>)k0p4N z-+TJ(h33H()!BZ&57gZ2_&Fm}HwN&mZ2JEBux6cv;JU+wn+RRw!%9M z|INde?p?6X^xp*s7M2B#Jk9e986;P~O^$d^Zm@1&d*@hVS~=rI^%kEEN^4Kt%#AZVEpzR7fAz*T2B(cI3>-(f zy#yZ}u*piSF=+hz;h2%Y^v!d`6-=3~BryK#w7F4xg^_0#uY~@EW%2(`o?I`*_C?{`lXWzl&syU2{tQzs}8@rT&>|oAX`9U;AGl5xB-%^Fh-!K|Esr4GmQb z8TO_+<|WS#FgVC`CUk#Lu=Te$+9GiF|C8r>dQIA~At7hI7qU3DSe9cevciAjGDlsSUiyPye4_e1Mr_8))XZ*lu zl`m82^gBB|R#$plkG_&qCRO@B=f3GJm##Tay?YO(x?T9SJjuPwZMl(3nMjs_$Hv>D z$*cbOnyS@*ws&OYjxN|4{CQd@vnKPcx36ZqF}l6(>iGYc|JACM+3RlC@8>?VUa&B4 zio-|dmWGvwPgKua#l|R7xj?bg_gw?)zZ9K^`OCK54d0~r|0%-}xwSGY7?>Z1?F?*Q z+bU_|dUb&l1JeS5gNh8UQPP}pw;HtEHar$d&TtT1IQOC9We4W16VBb{_qVaqo^8Bv zajRH?&4h@VGdOe(9C+uN#ITBq(Ll`pVVA+hM=?uJ8{4|RzPT;2yY}sKJ%^pw9RGA> zg-*_VdHG0F$|0$oUrTZv{WmY`Y-G79%~eY7m1tZjs9!dDd)}5S&1?Ssos;Ime6`f@QTP9U z+iPa!Zw<}gpe$?tk^5f2g);HVyEkSU`EP%i{_l)rUD?zv>vEk#4u-4opvv!6BX?BNQ{U%DV-`S&$@#DCA~ zt^alP_L>sI{VWa!77gRWHiS9t!P$SSRQkw5xxkjP<##oFI5?)Z87mn2Imh4AoWGVWVArFcDQZD?Pq(^$EeleRYRWqid%XUVi7Zd< zuj#)hDi(CU_-4mte}ki6Va1#0XAd(lGO97iNw6;M`XK4VyZOk6Jq&717dLY#J6T=` zF0eA2o{}ucAhwG?amAIVOl=zT-`6(YJ!gN!dJDs{uD8;xS`NDwOkRCz+m4cyzYez! z$5|}0zVgE9L7H@gv`yuI%`;U&TLf5qZ4{*Lek%*#cQc&vZ5(qNqkqMp*1bXe4S!^I z*i^g`uwFPxd68LIe&d-d#h2E-ce!*!((Ki`M|+%@oz`|RaCDf&~{F|7tXQLELz)YuKW#Iy@fiJqWxdJz^i#N{RcIs-y%ZE&o z$>$B`ck>E;II*m>;mP$0_9v<>*JNFO^?dSY6B%av&SZr~-lr@K4&^MHY7Rx7m6Dil zb2+x|kgA)edY{K*21W+v2{wlUUQS48WVVex)?Kae(>hHqp)hiW#{MVo=dpe}IFGf* zTFpUa&nNjyN42V!X|}C%UjCH%wpFN4RkB*srgDMn*ncDU9qgrrBDe1^=~{WeMQFl9 z27xfSyu#B>3u+c#w9VdNmcQNTF7N7}>@KYW?VA_=dUb>4qt z&qIMHAMej#HVB!)({FsfS&YwS<(JdejNA7XerRBqacBq(3jE!8AV*H*ds1)Fhmh^St)h}rZq)O`g<`P9n$`Ttj~b(eopC|CTW_}c1F zrE~N1|NmcjDKU6{`i?)k`=XXUa!Qu<$nb^Lewe1_ShG^;$Sd`=l}o#BKFnIQJG1IavZR+%{PWv_yQ(!D zd3cv|>fTx3v|o1ry&JCH7w0Lj6Y1b!Vlr!AEK)Hq|3`TNlT4MksrXh2rL(V@7^E09 zHXLFJxXd6_^)F4QBi-}CF~-HhqQ_=_)>*lV$M!(S{PR5g#pbLE;_d%RcoYO0<2l%GmfJ2+`p$6ga-ghq?5m?I zF9a1x7;U{P+hoUM%`-`VQ`Wx|HL-cOGutL!Q%>6RHAAZ?GFD@UuSbV*xw77U`CUoz ze5=e_PVURznYg7?Fur+1k%pT8KgsL_=KqZx;=O*)mi1cQf1J+nP3U&PhDEyj6&^=? zTDtr4L6&s~5_RS}aR(oq?$N+p{Z8c1Ua{aQ3Ee(imi03GI6|M;c?TP8*T4VnO{K&v z{|{mM%dTa=vR=(G*ZkkZC0zoK5||p}dE5CH zf7)?FW7Qmq#g~@{(GtMRyDUbH|DR_W0N!Af7<_ji5iRZvkb>KTem1Ct=yZ} z&X>42uI_XO!;MY1qJDL~P7bljx!-w2Sf#w{sDf3WYVUj7)3SlCP79xlYFezf=Q7nf z_JWPAgpGY8pW5xe??e?(JLT(IIanIxmHp`s@jQ7|{`j^&IFEaU^en!KKdYvOzVA`84*1s^s>pmsF5}BV zBW<=|qiY<37alY)K2!=&eRRn}izDoYNnLo8@MF=NYfo}YU9&zXdj9a1bMLqpya}A= zxcK1SPkVXppIK^|bT`d$V_3if1_ibS3XvaOthIX*-`LN*b92pw|IZm%nGY|}Ss z8SCMEJ>;*tDZ97q>)9>dYU&SoQXafhKD7Pan#X6gWRutz9AJ}vSR`{GIbsjna|a&A ztu{Q4WlEikzyIeKU4H8Di?6)twU-X)JUS=ZwnFY9^W=Z~kFYe@T6($J1u#U%`=4UZIDg1i|Y5w%Uv|$9wqU-jJ`dcadRf~BUu(k zKF>x5#tV$itWQ=&&tq~k>~G|MCO)yD#3N|xi31-j4Uez!VF|BS70{TFe4_8<-9&-y z><3zx8>GuOy|uVez}Vnk@Sy0IWqOF@jfZz7nRmot|3CdTH0pKK`Y*dWZ$EL33=?$c*V_Mo`Trut_mkJXZru4H zt~ucICkHui1zF{8kuMD8Ia?pky~HDX_kM5~``IKJN#_YRjVmYeO=gig%~PUow&}1e z=NZ%M{E3b$8kLT2<0;kpyC(2^=c63GU%S+q3x59Avku?ja3z_WrAX{vMgGirJxqVt zO)XOX*WQh=J(gDfEk1jC^F7BCeY)=z{p`4-3#ztU4t)9Y)s5x{3Jly92E06L5@n{^ zN`;sHy}U+TeS_wdg2nq8j$S_c(_YTP_J)PV%AW#kd%w!WCQad9YjpRB<5Pt?gRl4c znGT!ehbJ`nsKq`FFRizpt(Elu7L48UFP0TRUe2Rm#swaLxP7{NKXx;f}1= zUS+0hZqH=Ra94Wnd{*L(!ee$jx&EE^t&gui#K6pWQbvM7Ktp1?$hX85m$YwXEQoBA zKRdx#>s{LYjk~A1*1z)qoD(ek+00q-hE_W7^SF*Al=?BnI8V2+tF_xK+O1}A+!|L~&S^TlkTks+O)19|&njD`rnMu7V^C8##C;Ls#UeA_)IqOLLukS2-^VVPGVDa6t z|E~kXcW%GXOoogfC0@-ZKDgSId=O+3*J9+#7)@1{rQ~KJE=OB z@|=`62WIRyF*g@i`ZL#nkta#BH{#stWKjkU#@G)U+!4C#C%nA4h~chSX1PGhL7)0& zk^a9C7WVrOFflcKF=k(=^UN`bPKiQt!zrw`Spj4+jHInY##y)d`d(Eemc{R;<@Ov4wl8Q|ydi{j5_ec5*PyQAprtb@r2G z{ZbHO^ly`b*lWe*zm`q;+xcJINuuzi^U{oMdpLF+^fQ^d>1j@^+S<-3F*ZLdVw%@2 zmiSbmm5}j}(Q0CiXR`kSC$*M0eKG|M%jX{C$-aEmc+c*>t;@`k&J}EPTks|Q#=H}} z5qXY}&B9i^bQ4!dWZ-jT7cfxTQ6(@#;?x}xFXkDF_nDF=U8oUayTZESzw86U#j1t6 z`@eEJx@(x6F|JeKVH7_rz~|^WwZ6$&*HVAyZ`=OO2cH&HZ<_n=_+g%j32a>LEB0}- zrL&)wt_s+&(n3Q$Vlq=_x6Z*61>weaT5`*|qR$)|m3<^pLQVbko3Cg~9WoODh{pn%a$RL>f+4p&|?T@#Rbld!IRV8fu82+aoEO@VdP#>B$Qo29{=p&ITTdo5|Nyl^esf zCSCaRr!w!|7JrqW&F5yY>r}_FziiOd((amcGTf8rq1p+XD#afXhP-V3+RvFZ^esYk zVqDA?d7E)8;Fz|)^u+hVu0@kGvmL#sHqg2E%)xY4Ao;NuU0sT$tWz?YA>#%X_os1}>i;j} zZs*_sy|DU`)u$)C_nCAeuRLy@*RYn|WdmOoH%HwZ!z&uU7{AYNX|U(xo>3FidB42Q zbAx`b)9--l1J8Ja+q9M&u4du&R;cM+yL#Vsb7sy51+QlJ{jFbR_ueNO<#M*xp3aQyeWpF2R^2;xjd+xB7foH?Ug+n)*Y?sYwz6r z+htLpU{;$xn{`kQ>&?vjLcbS(p5iy-kkD?SPx9YxrzZNEv}~6iE&bFQgB_{6Nzz^a*ED}PZ?lrw?-=Mjf{_snM0bg#PlqLjn_ z`%8oBeLCE7e-6G1=X6neV6}eb`U}FPZ@!f99la_(dERc*C)-*xE-p5lCz8KGe_9*U zt0Mv1qZr~|+`6|uM|`5^kKYgSgHD}4VQA31vgqH#J1eZi^Q4wmXl3(CTt2GJ;ke+^ zjknfvzrNq@W@2RP&WpaR|FHhxvD3!fpXzgu@&!qp< z`CCV94cATlwWVhd8(YHpe}>-jJKp}fy{26z`MhCqeqnt9v&4%)uLHKsWpkyCmu{Bv zo@l(&MYe%aU=qXZJPG@>!wVdja-6);oAqzH!;FR`-zL?7J-dGGII_b{hquf@;oY8( z*PqS%TmQ@QzOa@DyENCH?Fsr*-Pn7l?_+LYWN6ROq*u3%-=1wqB?~~AbFmff^~o6y@k3Ayp^vk-*|PnPW>CZv|xTf*){`5 zEq4oxZ55^oUcq0&s%n-CsZ{UZ($f&)_V>MFRRaTqivoiU2g6@B#ch!i$G%RLkTIU~ znX@wpp!9d`=1hqd1V&BTHRh8t|`+$}7Yi&!Is*%Bm< z$e&{f{>iOb$*HE)k>f1!-)YT`@F@p`c8P?y)ZV+l^__dtWMR)w2kf6{v3!5>+l%$I zL!6m!2PCoKX?2Ss*Iq#~##~*N$ z``e+OSuY%ZbJrAU_P4S}8EubWlF#M0A?en+xe8h3x%WPl#m~7g`*8l+7DkOSHpw4m z+SzW-&%{hT#6&KBh`n%o$)#(bjM*=2kWJ56DX>DCxz3UE$9$dkEe!KC8!xnp|6@3- zsJY%HO0>P?>&)k@c|Vk%|6BCn%9@4Vb_}QE)9rc}h=)$ANY1}8>)!{}2#J}>3#Mc! zovk?T*Z%Cl)7f`;L^;2hd~X!p7i%y(x>hY*B127k)|#FRkyc%6CMBNGeH$JAO+Rww zrYBnuyzMeR`)he%XnXV>(+TPJs_AdfnKrQg@D`nxHCy^n-^YNZmK(m>-rOMeskLGE z*ZWVrQJ{>sGhNa98k!d0IRg8R{ojK7vJ9#)T{9Qou+`jsr96A?Ye^x(&h_geaPTp;52e- zU}#vuz`?xB`uD!Hr4MFU_Xabl&I$=qn&idsY+9n@-Ii%qCpq{&FbFW8FOEq!J#*tz z%>kV&RrUvF?zlI7>w)_m47?42F4n7l2l45fGCc@d{@~QI_POm3tw5cenD9%FHd?el z?|kZ6lrV2<*A(_*rk3gckIHtbXX#y#X4rRnAKS$*7bI=k7!ww%w8(9^)A-U!v4d%S ztZS&YqRI*V0>hk&2mK0rpS-`eU!`44^M!wJOvCT)j_2}PCX0HXB_tboAMrb=qG7Rv zhn<6uNlTn9aCU3b@=c7#)wtv(UH+x>9smEQqGYw|?^oOl)%LWkPe0ps_+Rp~s#iPM z9)4NC{BerXasdODHF05^WZpBn-~7bpaj->)uhU90?+L&8g=_KF->cS%MwoO&zmjfp z$eP1__{~OB|DJ~-Mt0F+^*~5Z|9?q){b2e}b2naP?7BafZ ze?wZ)bHnN%t2DFBn;h$;u8VU0d8yTonB zcPYulDTS99XG=@Hhze!@5&R=bF!I^^&xaWUEfu6(V<&bB+Az+&aLBAcbGLK<-v8w; zLb_k4sR-|!m!6)D{W@u$EWYL}tzL6PWIs{|k0}-=(UX*%AKoOLeIAF2Di?2V8miKjVE^Yh6G-KK1i~E01n7q#8_N&`Ex3||ZsjX}_{A|z= zBiP8<$tE)=v6SON@`Rwa&o|cTOV`vKy8ryFt7xdE#ezxhFD_-6n|!R^e5T3SCE$Jb zMvvsM=0`$%S?`qX@A<3CuXvM(K|8hQ2AA@l_>i`L8<=@G4n^2H{gpCs{=2zh#*{<- z3A-Ya{MCAP_5In#^=E$ezMdoRET*12k>~Vp*6IuGT4|P_9&_%q*}nKQmzL(0_m4Cg z8aRLNR?~kyXU&2oCQUYRzO`0I6V?PXYp54p{`pP8gHe3vq@xBBc_w!B55rzH!H@6HZ!i};%k8eZDu-1;;IW$h+Q)K=4d+kJzmd}{z#zcN?9{`s z;1Q!{o{+$Mei^^*KOa7k<=gVkoLSw6(_`WB7jO2mPh{M{5OYQHM?(GIi4&JPr>yz+ zP&IA&yO@6Q$jk-%|EstBpKrZw>RvsreB1x&K0AcV*%$B7+%SpLiF1~?f#;-8(om#!-%L<7H4=7Ao zcDPJoAFt6d$%?kV0`Z#PCK{_FZ@&G((w%f_7DHnF?l98>@`hz@W(6Mk|FauYBz_IXQ8kBSG9MX!d>@c ze%@U8CFx9K)7&rlrEecJGH9?0a4|T@IT-ZoG4bWyU;Xvr!l^Nd2G>>uBzLjBVA)$B zv802AZNaXmapEgf^Jlg{_n32XsnX3pi-uU;=9PhLYR@;`-yOR~=b+jCcMS8kU%%e& z?!n`DA>Ad1p|Rl%vp~ZC6F;AI_jrD0Y+!H@VbDHsfWbpZ>+?*82!Wt~j%^J3KRYf5 zEL{A{Y~y#G0%IN~#?J!xbB#nxjj9wU{K+?1Asf_{BE={3;<~*73+lczqhULjw*V=#H{b9WNVb})2^j*c%J~Hn}4Dk+W&bD^&Vv=EHFl>$% zEUGDbxPD9K-1YbRrYkTAuqVf>vIuNB=E-ZW_Ea{3({a^IV`gUN6;?l7%?x6fxrP4V zvhO-^%d_lP%k(>6k2rWXOf_Kr6Xmlz=?v>@u_sTtl;&r?Q0rjrDfumCx_81a!wr%C zhn^VlSQ*_t`PM^Pi&uYz9gAb*Q5MC7bsnWspWB&DjoS%O|-+|1PHtYsrsYwZJ0#OVu zth)D-12e)!`);L4ePc`fcr{63{b8OntaaCIB(g1}TFlw@9*FLazaaj%VCmhR)AC%F zYdAP9WSD6F#Pfs8R-L+tjokYjxu<+Ot+#H6$c)g|)%8m>*8j9;e5sIi+{ba%wfbaijPC0y}uHgLR11_q+_?91-IfHeZ#cR3OEw}O; z85kMwY-nIfUGe1t=Yb+684kT4H!@814Jr&1?q3z#rM)mHk>HBWBShu){l%O0t~WjVjEtF-ff9^Th}`M z^ct6K3GOTZYQ`AqK8+QhlX&6RGGXU+Q@)B^<PCf`wM4mf+7H9lM@{e9jMkXGI-T`BsgJ0&kQM_c!ei7Bm>sV$0qL+OZ+%JWj)uS zMXMf${w*-=>)yyWYr%?2Lq`7gleJre4rlN@dg#1{-SGOgB?pclfB22L^!2KB@*mc+ z>!h(39{(!(@6h3jre%DQ`tz9t8oeZTBpoSKlFM;c>kIe^?Nwe+T!f)sd&g9 z42hTiP*Ae{`hyjRKjjLBEij7_*xW34@8TP$NKLOND^|{SQ?RkZwyy1hrIUS6=d5GLF0Qegc*{^DUE#-r>8CbKti748 zcv1Gk)mZDBLgH8QKNzt-TE=>HzsJ96P1<*_e0t&EXP0R8qGXf1!FQjDIagMOeJ~Z$ z`mtT&%7-ahyuXqSo^Xf?BI;5G7-Kf(pPuZ z>`;tY59@Ka1oM5oi+->deu&flT-WxdcdDH0^@WwgSzDlT^QKT&7WJ!A?9GvZeGp&PqgXVrC+lXuZnK3f6wFbzA1Wd zN_D8(^A>JKSH?yW_a3GR77Ewi@I|HEY`kzOwBeMUkM4ylTax)JYOXoib6bZsH0=8s zBJ#Cr_kMNlPj)ZrMSgye|E;5$_2|VCUJZuVc3(X1F-T;e|0_FtZJYSFsj=49@s}Eo zR+t>%;b3Ucx^nrEiLENj8u15;^*1(1KC}xv$UdLNP&3}oF7eY@jeku4Qr^8^yr0wH ze#0K;*J^?6$181`Hy_yc-rnliY-x@9?)bH-d@>wNavuyBtQe#PN<<76>dUM!bA9lu zaSK;~{N+EQ%NzT{?}#s8%u$=bF=tbQ=7x)W>;F3IM`=wJv-@*m^TlP1yI!Rjoqf-9 zsHaIFzeqT6ZQ1l?MfXfSwQ~E|zRvb2`h6^S;`wvarnZJY{ro^Kcfg#2TR)p{5M68Ha%TcGin$>QPTJGEzhODD|y{7B8a*5Vy=&=a2B6)3MX%T$ zglm%G>T@RfTc)1i`C|4zGRjujFjC^d92f8FzCO+?!e2-VialDXeJYUAf;IR{_A~_x z0mC_^cAMhjzqppZFGyrB;*jlGHq9t0I#p5KR=#Ni`v02r* zvEllyC-+=*dRF$V6ZF1Uzh&wibCt>LayJG?p-%l%zh=ptQ#_^$2|~5U zc3%(ke*LJ&?4p*?B4%qjafSC{nrZIoD`b*m*)B~lU(k2D_M|rR&xpC^8R`o!Px~t! zuwG`vu`31L4Ts)*`0-*hYyQ7ZF_v^bgM5}-T;FOsXE2^&w%MC+vP?TJLix|Mbxu=) z7`pd4Y?^xJ!=HP_7Pnf}ed5Lbe)v$pl_Ng!D*tTxmCR)hTV6em_w7uh%CTKAv^x*$}+;n^1Z6VS~v!2YVl$l3CUGdzP__e*eugbGyV7 zRIaIeCvU!)XM3xk$AEDy55s{Eu19!|uqI3n788q2VeL{BKDcZFtHz1+z^D$U)m=<& zewQj2-3k?XvSoAU<-ghO5fU?Pp7ThR7ORx3JY_1`Cigkk^4ZjKNreZi|3A=OmvF^E zhD|M@(EGs*cOB*p6*XZxMU8(0=V>jpP<;N{l~bqLvm!R>e7?`~?#%g(2Ur!K8-K9i zDLZB}x4ZdFRL=by)dHp+OM6N?jz737^oVn2Wr|i>qX5gBju#u=Maq8v!uwNDIfa); zZ%gq5r=2oY7uKwlkT>UL+{>O`AOAcpXKgTj@F(I<_fC#r zgUhHLx90ShC9Nj!t2hF$BwV=ct7$n zzHQ0gx#nMK?AKRqhZz{QE)0EE?)XxPaq1y^`{TY3JEs5Ms=B4YN?xgBZGg4Vt;WVa zA<@Kugo9S>X0m(TXEmmTCtv^i{B=a1h47sZ7D@|?Px<$h81j{K{1HEvzH#^WA70n( z?L_1XEKi)dZXmlOI(^1IHV5<6&!UF@|Lk{NahGJf>LZwOz)$jLz0wJpf*A}ik1YB< zZ&&|o{%uO_O^#&;cC_nRC~M!46OH;G&T;MVuEcV~vn<~jr|2*Lk)&|cbwb)5&WT|z zrMIuW{Q5O8Ytme~>)8Q`U)Bj0DjZ%cSUuC^-E*;r!s}M6ia6}gthm(tDlfAxl5N6M z{r_iIUE8-_SRP6&M&eei&Gm{%>RFa_ebQ@6Jl5IeQ@5< zeDC@CMD1Arpz__%e}7;TU@(v{5v)mkx?!RpqwUs!FFXv3xmI$$?&9gcET6N)sKL{v z-h< z+Xb9KexvlUy0C*&@BfeyWH2G;*8CLj6PhEv9NIx{YQFxb#0|hDc}#g!ws#8a!)w%A?S*n(N`ieZdAY z=C7M}+&y>g^M1M82Nqme<)|RL{d93s-q|^&jsZ&H_X1MFS zpG4b^J2hV}WLz?C+kdL;X4#>xD`t02hUPTfe`**g%I%;UwM4#%Q$fO^kw=fMxt2L^ zMb5!FJ3hFEP7^9-ciUu8!N1tqa8}>84$V84-193p-bnE3z0mRH$3dCUDp}QbQ|T`s zzs@eNa8WZzaC4N+pO^jbTL0AaX$}kKBz@LP)4s6$dVK8lJZ8b?MLRSS@0~oIbn$BI z9I<;j&l}qXJa&4S=LhW#>a(1{`d>VD$_dMp2Q=e+gUffU(ONNe*4N#fMK+PU*MH1& zVmu(MYhbhF&?vd{G@+4Kq0rvF7X*r2gb}KVJ*pnM-^D22_THDqu3XL^(%*``88sjJ3 z%_>NI?Vl^AVHe%i2+$1INJq3ypA=SeLL3XDB$EyAoqjS3P%A7u7P zi&)6y%Za-FGk1&mSjpbUe5>Wju8G^78zcA|t7foieLes8xz26bjSf4$KQPYR7_d+K z=js2P!P{;(hyFQKq`$t4_wcj>2^oza>L)U8bPsP0fGys+za5$(}lO{t$}dZhR7`_}6LQ#$Re-A~U*9K~bS7_E!Y+TyL|ftd0|$-@I6Cb< zusnW8QtVg7yZ@vnepz$jKX1EgkA2g=)Xm3adfQz-_wkrWvhOW#TwnT{`9>Q9qg=5= zn0EUIgQ*+7?)Y(jDPKpP!OUWR>J9dG9u13;ZHsFripXe@fj&xw0hwZ7I|p-5<~N(mg^pK zo3eOYj{MKIacoHl4b6G><-ep8Y*_;{H%#S+tSR%9t29bigS47UC2LnnoHg5 zR&v1Qs)H6285kPYKWMwwy!np;ll3VMmQ=f91^vkO%?mHH=rrF@uvz;5(bre|mR3xQ zK4dZ1{=Z+i$gvf3#kW3;;rsOe-+YeLV|)KEn>VX;`M-;^{wcSe{~Z zWk}iKR=&8pt>}r}`7*I9%=TQnzeg@#cmI4jW84l^^&di3C**_W=YEq2@z7Cn2>R$C z?OuDWApGK#&wGzgTr$@&?5orPo<6~VhQ;OkR_Z347rAj^gFM??38y~=3?(n#a#SZ( zUbyBaSZ^+}-ajx^<3*;T!$LkkQ-0o=B4c-AX3`l) z)=a(SUD{{rIbuKXJmJ%j+A+Umwt__wqrvR0dFaOd;V{ui3E z{Qbr97V$CC)wlUd`%&y|G^vTG83_D9thvhx}GK43c_ z%l6`N_cv+Vwa*f>Sryuv&Q1C9vg47S$eAb6Yl^tOeXz_uY@zTrb*8V-fs!*SDI(d+ ze>d10RBp=g=44ts zMev-F=keEyuhlmDo#*>eVd>y9rAapU_=Wom4lFv%#Cmhl;w4undFe5&Tapvdu=q!q z;F)3u1AbA%t?BH(3O;(0SF?gzp9rkDwOm5}ke>7&wSpIO+jJ%shWtGzRO!d+&@i!u zp<$koj(LaFmpzZN6GU4+Jjgq_L12o@x>im-14e0$TcR_#pRJH}SnxhS?aL(1BXQ@$ z{OSx%w|Cp^S`qldl~a^qeP!4F7vfya4skQQU&NQ(X5zeJ;J{GAEW^^1srfvl_k+oS z(|b4yb#@q>FDqhOY@j3I$#Crg6Ic9AVVmX4dVbnm`)1*NadF@hotufAvxU~wi+VST zJ2~e5%r#zoKj(*oz@Z-k4Gayd8yFgvDc`u`Wx%7nYsRMK60>*TuVB{TRDI~T^Mk`@ zNzHc7b5-wuU6*U>P;7fTf7(ti?Z%}|=JPJ;Jmr&#V|8;lAN%P$gOP~j%O)|0$qbAP z4H|wHrVPP*6Q>#Q?8x^^I`Z_umjD)Xo=*aLa-EK6r#8q(Gk@R+_;RDJ#Je#%Emh!2 zvR_^-yrri2fa#Ef4pU+)@f7hJLBek0CVC&S|X>`op3y9-UH84q81nKN6%sOHayNf|un4m(becCFjC zsNjF%>;KN1`!pKoYk#n4z16hf{>MtmC5Pls)o;?^KXUHlH={fJ7y9M}GCwZ)zh>R^ zUn^EH_sM@gd9kT1$$Zg)jLI^JX6dzCP79x7JW$Ma$NTfQ>v?s8+|?gMx?|OD-e@g; z&ug)&>YP>YwTCg`YRr`fHa>67IDXv+X+S0DhcJ60}S8G3bgDyU}6(j!)S7v zV_~WHf4c|$M~w_LUTxtoaQA*+$8>^Ah|OYsAA967745l2lfy+5FY}1je!Wn5RGQJk z{lT3BvW4?!woGhX;r&5hs??yi;lYG1J>4IQP7i-f@BL7|N!Ol5@WAaOXNv3=DR$xN$U&6lFVyZmr4{S-k$3116Tzz+P z`03L9KQAuWw^8oFwMA||UpH$oh;nBcep*vf&HPME)@yd${6iC*npo-_3xDxulznP4 z+_G)qQqRd(WQ!8zHZks5mSC{VX~(`R@{TGS)||+fdcM6`XcW|{I+i&^dr6o%VmyJv`(~0uupRAl1Toen9B4)SwZRS>KDRW7E;KcN5$iHxvF4Iw7k&StyM6LNcC$GExHhIwB|>` zM_IMkzukZLF}iRY+-MILF)({OPjsv9We$fq2N@V2FcfvJGvBJy_&4qNgKOPV>}m&9 z-a8&r&{<~RnsHl{`M}>0wcXsFy>IVV^xaR?PJQcD#&<)#$BZfGjHB7VV+@m+HF#}R zMN3%{4fbS}NLsKkFex%9Bsk^zoDk-G_hiA{&_xeUSVS#c(tZ9RugA2*avCh^zYflw zD01}fy`SgmnrHXuuaa)RuAXZ+%YY$#%H`O`XCmdAXCoF|Rg(C9_~8l%?gn-)4hD`D z50hpIr{7w4ov&4AiH!B3|LrP5F>2Z^`&rH;Gp>DaUTmLq?C7!$8S|Pei`?#~WEcf^ zr1Z^kob~ZW^}Zdp{w#85|LsU;WPG$|_NM*+bDx<{{{JuI{;&I&@3VWozozQY8EUan zu;r_FfjWPiOXM=E!Yc*Ow@2)+oWJz{j$c*&&K_5;+Fy7up*7F+=ik<)hXS(!K%tS1duO1Mn;7ysxCkYQBaxUqkV%bbqa6jsfwkmu7v*$ zP2`uHVtwz{((?yrNI7l@kL=AkR4Cud#U7(_?#~@-?gp2G3S0Icx*YN-&PgD}MNss* z!VXD;f+IT^r>!}W@R#Ru!i}u8zR9AGe|2Tu2%XjXT3hewT2qD-way7DJ{sZAT5=^e zq;5=%(fH21@aWU}iHyfTHa4w)QD5u!bd}&4mnSnCngR_IN~H|A9!N`FW@0yHwt7CB z>EUF)4J%HYJSbPuKC&b3>rA+e>Qov)jV%n?0xUYm5_=5CqMf1a=G>A z!?)$nGc>JOrBm;HE@GPN58bbi^WWdKj@oh1p0)m78dJm1a@qg?kMceEvU>T-rzt#D zJ|{L*gg^aXE9JLu$Eux=s%js-QfvLEEzZY>G|&_uKoc@?S0781?^Ou>3F8s@HMX zH(UsR?V`#zu0nx)QV^J1#5M_2dGRK3o#pLzo}pMJCdk#^wYhuNRI?^{25 z-Xy zwu!br|H8t5yIu)1zbJXJV1}(=$7|;!nmikyS#8TVUe4^W!I5!F=lYr&KBb@bUs z?sY9pb~0&Q!a~JDj%;#Q)~B$POptD1xVNXZG3VHu{YS%Hr&){cE#0zbnbA}gz& z9*m9Kc`nb1IrUpR)D##S-@Uy5 zxBgphKcDXL)rL2I>)YzYY0GNowoJdDztQDGe%P<;SMUE?wlMzMzFo&PH|`1AGsFG$ zdf~5GD^$-M>wUgsU#n}a_`a?Q;Vg!S&cBb17oF1+_VjgVOO5jX)rP0D7i<44?5;^K zes#4o?&B53k1>0EZ{)X4Ie-7+oL{yPOGUS~*7k)@w|dpCYxV5&>KSWIG;7MQtzKJO z+7-XHa^L!|Qw!!CU{JiH@!R9BW!=3MBJZvU%yxb9;wDGNt?XA`?HX$i{JP#5X1Dcg za*Wc}t8uNFh1z;YweN=?uP=?be*Itms+{o0*{i2ttj=3>wvWeEf9(<{u~~I3*DtOP zZ?z2H$g#3jIfZ%u>UZIZuKP4ESZ+x6Pb{yJjQ7gCa@Ojf=DU4qHmBpGeqFP!Oqvq* zu}*E*@<5j#TYY}TEj<19?8<|&*=PSGF9|tTe$>t|S!Ckvc8$9(A6CxJ{I}3A<}t7J z+sN2CWy;s$ZrtK~3F7KX~eW-FH-WO#G^^A@8_x`u~7djgzGY z+fQs0%i)Z^t^ccx<9Cs_{*xcUeW`)&lTw1+CDcPq^gPmDG5r(Zd(t6X@-=72L{qn= zEw3hj{9|;SRsQO|kg|!hUq?LsGB^9o`bRZh`>dY^{u95k^ksG2QC;!p|2}JfkGkJI z-K_mjQRc^lgP#se-T&o&_jw7^pY^v7m1Ny|Y`UhVl*?sHrl9^UgLOK;U-s$Uc68nJ zLg@IDn5J2r*KcQTJ$!u2HKjb;Gog#cW@v4Ax+J_qMV~8@ZP|qX{eihDyuZ2QBp0Ng zIa)dQ%Mst`yDzUFbI-of_w3IK?>QV_&RXyt692{Oyr>}1Pkn{<-LunMe*ORaE6iq1 z@A;O58Gp}-zQ$P#l6Uu@7rgf_er^C>7o#K zlk5HKudkLR)Ek=m;EVsp;Awa?bfoBDcvxa<4My7E#@#mLTY+D9YY^d>b4 z9B8=ul|O@BLj6;lGPnAbr)sH9CSIFgnhDr!-aO&LdZ|XMyvCgFHPTs$%>T?=w*O|C zudS)-n3n!ug8%We#HeKPHM6#*EjV$J%k%iV_$IrYCx@4C+DOoNrQNwJ z%JYJ|)DB;K);5Vv@Q$mFJ+~d-%e^n`g(OeKN5w6e`dUDh?fZnkCBh#WuWj92yHjD2 z>{J0^t<&mN<^Ni>IS-y>mF4CFCeO@vOy@nPSpLtx=gH#3U01eB+MQ~ZU-FSPdX8dR>GRhc=UkO-UnlFp zm$Yz7$J3OvK@YrNPEEMG>u;^<@3JYGx*LutPCqi$m^M!{t?1L>;-$ z79amJ)v9W?uWaV^VwV9u?+4j-D^WY6@ zt_NR~$(5$TC&ucsC>-LVgU zI9V@LG-~bYkl*@$lPl!@9+Y4A_7Ia^$m<6y6qtgPI2(>n^nK`8y0Ls_$fgH8?e>R6 zKUye1t^LLSw@$W0%5lYiX^(~mqo_pb3Nh`MQRn9i^qQZ$`0ZyvbLTP(=l|>ZK8kB9 z)_#w!{*%XM{5a}w%#=127NaNU<7e}oIV`tu<){DtEj#~CFsn7$6#Jkd$0|nPp0?<* z+c#7`9{M|5OG&@xg!e&tHTJ|sf-_a(WSZaH(|=>wvwEY=j5NddRrl17{(L3!H(qwS z>dEMz8@-*UUAr>ZC~4QK^^d=AwwOL4;lTM*2DKNS{@-KG;+rtPgmq6*S&mkITYUAT z_`X$7;|~4Tsn%cgcj*lc)wDBVoV!hpZTsfmIMUH@IaJ2yMrKDQi~6tAf-6^wU(LJC zGAZrHKhExL^WIIZP*rG|Ao$m-?b*%xq)+Qbq80CR{)k?wFD%zhg3{t-5<@ z>C$;0Yg=lwcHb$~3YP6@IKBG;|Fp{bdk5WSB`Z%2KRW$?z!cS(>3()spX~3xAwB89 z@uZR^3J!Unv^ifg@#?RCxIggy`Du@zuRp)}hp_Fg`h}<7FQ0e%|Np3cTce^w9o^^M z{gZxBaowMzMV#f=4VRh)t1H_DH5clIgq>XU|K~bw+j`EJ-&^A*>F0e|C>6SSJ{Q;V zv-c(nNo>EFUdSRLcCn1}q}{}qJ9&=p*8jfrie63TWDZvuzuq||_3Nk4iJi?_qwLOk z!^ZLJtk*LOSZeQxW-kje`g`AD!G#~6IU+nhS!d?#+uu~&UbFc=$DU7B$q%CqS|&be z+fdK6Q+@jVFMhpIotO6IuUH?yo+Y?YZJx|@#ig&cS?}I4T+v|80fck3VjoQ=v9b=Hm0eAKb!N9l6$P zFDk#jO5pSBMSHpa?GK+A{$VL!h*olb%b9l`>y{TP`Tw$(kN3N> z?5fx7M?V;D*zOQ*@Z|g>57)p(#~`F9JWI1){nM;^x;ETpulV1>nn}<0Pj4@8W&S9jSY@L2{>S6~ zr@E6|98UlH#*zHy$4V{7T9%p}Nuu$~Z`V&h8UFp5VNuNwC1JkboA+N?nELn5gwE9| z%(tg=|2?&T@A>}|rrKGpN{&&ln*D0tf+_1IdRX);Sr#$B&Tk5eJM(7o_jP_z`DZuqot+w5*w1|i&rJ`?Tu{N_3P{F;;*mD*UA3ds-AsU`pcThR~EJ=t~n&WGClIu zPuu^C^QNb--;y}#Xo60i+QY5cFM4P59**kz?4=g3X=M58y3pDX-O11YKRcd!c#+1_ zy;U2^e?3+8dQ&u`>0s5>`&sveWBgxFS@K=Ciu=<2@_@_Tw||$j1i9qJ-@LVy)#Z#e)3~*_PK#buI{j6tJmmD}eM{GOgoUo2akOrqYSQ1N-7ejG z!b?xT*!cVB)fwLFuTJ+__c!Tp)!v9h-o@G9U)Rn4x@zC`*^ffI_JoIVD&JqY!PueV zQ&s7cMuk5g|KEQtUUKW|zPyj8w3|!vf8D)jt>^iQ^Xl)2-QoMRc1{ni{i3w@VeIFt zi|jM}VxRn2e@-!P_TT;CaeF_x)NlVA^JV+%ki{HJ6Z4;Z`pM_Wks4uG#f|zr{3Ta{lfs zyey^pgFmrq^V@j-l^hY_8}3!Sx>xh}<)5NQlYWOR+FP}1b(r17*e{!9Yu~?~bmQ0S zecvmZ3)@pov)+D~*HkvQW2JS?(k&IQ&Yn$v7F3n`BW|kJtHo&>4Q8LH^Oygy-QjS- zv|D>`OqqP|czxban@XckwZEUQHgOKxf9v4-1+K^M*A;%Xs@q%OeE*2I->!v`|F6YO zP}zN+XU`O6t@zm58Jn)}{!~-{g)!VM=qszc3At(^X5%gcZ3zi_XyxVgE+aXI^! z*Cl@s@{}1*Y&~}J`Rk}^mYTY-u&4YHW->G0ZOx62__g5X^sGOBSBI~y+w}66^6ss5 zv-e+L|97wbgWhi^6ABM3yfM3E?PUw+@X3w0qn3QX`tP8^?2N9ztu_1P~I<>t*Vkx~Df>VJKMm-FAo0BK3X z-u?GqY&7w>OZd-Cu9AB`a6vchH0zP;UUDtUGKduPK%+qTSSU7~9x z`?E~AZ^FF7l@-+?Qtz~Ch4oMG{JegFTiMpaPlDUOaa0x^xo$q=_pYypzt&fMa8oXu zfBoyBJqppG8f)J#p8h{{YL?K}rSrvKoj+gxa{fiLFX9^U@;=_MZ%3&*6m)0-67Ug&Sb!=19wk67|KIzNi7i)L|8JQ=q7gZmQwW8tkHt6yp@T2@(`-){K0y!+{`(1(-G?%qG` z*5vD7!#L*KKkChS(4c*|Dt6V?gFIi#pHK6@^tb$V{{Mr5SLS<2n{4?V_E)%IGRM`t z?)$5Me{p|RP^+_R&Hmq;i)|}ZbhV1s$NXRG`Jiuh7;o?E&B;Yu_pjge`v29=@E89! z-rC=?L^s|CHtPU#Spzw{@|kZ>2wVP1M5ASgw#u>Ie|KXgT%eEfg%`Tzf}>(=VVt_@#Xw>56-#lNe+T)^cHT!Sy?H%j3)|BV%n?7x8)ZZW#<7I)7#_rxFhM8ekS2!)tP3XGBq^QS# zIB)_(#*OLgCpz-hI$7K-nPzq2R=Ifki-vPE3bI0*Snm6UtbN&--#)k5yY6km$=BO2 zls$;Km{O6|D|jJuxo1a5*fJi~m;5@tj%U9|Vz8)Ny(YTr+pQ!m}xeztne?xULfeyb3sUx~gYyv(`_Iz5hDm{QcG936tlSmKEtA?by-^P> zCUQH8UcLJLenQLQh7aNV?+@=gC~vSLbyl%8|7|1jo4tKGD@y%%FS}jZwg2nkuU#>U ze+5@xUH)ueYTobFYk%K<&Hh^a)zqA)fj#w~n{uDdy>#$?qMhb3p2}~H)g>Gyiw}g@ zaeOP>BcFmM?&R$wyyZ2w)7=LPwZ~f}Jt*6!$ zXWbXRbv^5PV)v#j$*XqT+7kcYc-gz4BXil}nN#Po_HW(f3nKYs~^0*E*iwPFQ_bgzwdPU zzpi8Ng*A36mhZ1!^z^2ug@fVa()h{G|6H5&ZR^dqyK{ek&H8_BcjVRA(`!$@mOA;! z|JCy+r+!Ab{CxF&@2tz|8X^#6Qy?0fw;vU6tl7j^!_W!tZAKDuqm|3%*F_kXOo)+1r^(&|%X<`2J| zgo2H`wk-Mm|8j-i{A0YkUvAiacwS8Svi%<-@0^vAG+&?g;XwHtp+K2m%wM{nO)-eS zb?a!6)Akn|YxTDNI(RSs`jcn>zTqA z6HD%AY+E0;KI}u^Udh#0@9&vk7xd}>Y`4aF@-Y*=fB%2bZua(Y(EiwaZ$DN4<@m?(Qt$fC)$x_JQhFyUj{mq_o7uWCcS#BMSxwCL2wKDH`V7yi6yY4FY;pBwL!44K{7=zOQFg zTxmM>n3rz&?mN?3UKIVmWOu*$i(S&kPw#Ke{`{|h^`6wt)2CR!xWBn>kDcvQ_RX)a zep=UDulatSwMzPYD|ipmMr~Cb{y!SVi_$vO+W&FEuVpBu&{VNl? zoV;re#q7_$@=tBy|9`e2rCPzdf0~)t_w%ng^8XKmTtAz_;+hqj+mAlhS#acOD68gg zlPiKp|6A_n7-bDhCE&s}Y7cmL0Jn>1m$MecKU z!};MKC$KZBh)mHjkrre+;jm$Xj)*nup@j;aA;B?E*6isFa#?YDWn__%6@O#qgDTk7>C?BKS#7iR-bofLZe?^6iii=l=l#ga zvvJ8ggQpv&XbSnYTvvQ|K(pw8`)saTK2NgW-tAs+MRn`Hix+ft7gqnPFO{42&|<6A z>$=dlhdH;2Gt9rf#M#WQx_Z-%qA!+lj0)91E;DMj97}8caID5|Vd|vcQi`^vqJ_89 zt>@elxG{}ut%R55pDSyRCf%R6e>U^C(zKHt%?*rdB0U~%T&zuv4I3}yh-me)94uJj z;w30??dIA0Mxm}x*Q*4iZ0%h5&;HTBYPY)j)@S>Fyk7p}hK>5eSMy?;FNRhbZEBbQ zzHZ*RE3f*JCfApG=AZpOFZ|Sg=`;4+$16`6^mtyV^ZBx`)It51MR=6P`Gd3GRH?}S zSXy`B?!3hHj~ag`*J}1I=Zk(?ckkDeSH*7yLkhyV-OUUBa`|NKy7m0Uw4%Gah(0({lJ!)y6Z}>ADpw*MPlWoow}Khy|!1@ z-S61`vTwlyMir3>XH>X_m<~29i0}}TVr^ z|NeSx>5shTXV>4o+5g}6{>~%$5y}8`n>f=zJhP`n^_BL z9=VpfC0NOFcSp{hz5J<-|DEuSg|+YRvAw?A{&0tW`r$e+wj`~y`nC-ruDfGC{ho8Q z?Cz~yYql1ws(E{Vl>%d^L2ALS1eul2pH?bw_X>0DY7JZ)`tZJj1*4kCgf~}&IS(Cl zSP`Hj&ehto&|yMLzTJ}Y%F1AcEmw;VEe(C=tsQ#c(cg0uuUWmm{C{mlcJ}}9+Q;sx zbFwZkZv9huEcCFwlY#$3yY7wojNr! z>33L~*XB1?*N()^kBiu*S^ITr*krZ{NOIK><6?DcN_3c@Bf-tebg*H=1RWD@PNs#4 z8Ub30nyp%)DSPg1<6NZ`8x*Q{RqKcRqr2wfvNihxe|&#;Y2~ZpRE78l+j`0?8griu zT={9e|4HroZ8c3I#q#?Mc4i(f_#*Xii{9iEskE|38xyOqJk#9x?R?`w(K&WU=B{77 zX;D$)){E?px6~daIp)ADO(Ufo?;)NL~e+Fy_&{5^&IHYW_Z@+HQ)ZN#sR!!fzH08>( zxRa%&>JGc=`Jc!i{B!?b@9t8&@6$K`h`(mF{`>hj>(I3|GP{d?MNXfy@V@Zk)wEMp zz6SrfSNj&TW>+b{+CKGV{-u9em*ttJKa-FCY5uJ%>r3lfk3)a{U)!g_%fq}RB&R(- z-rx^coMr?|Ki5C2&V(P|A~}Q-lbzm&r}K!OxPGTNfA;64>QlHn=QvH?>=-j=<{bvh zsT`fZR!*8<{NUph-^Xw38>dfN#rC`;Y%)XuS5%*sJru%yKAT zU|`+rIE#s)i?t+v{e>C(>aYGY+PVMzj#*lNtKR07g!!M}|MmUVz59OuO?omh`_VPM zsDC{_=6!Ve^Y_d5W8d#*wEbPP*HOLi;qmMCF8??UmuT~4M?L7C);O)?S^VqhuQe)L zf4#bHGDX>3_0`7s)n$7>+SlH13cA3#c%#?VPyzE}@lV!Yx2m3H6Z5@h!?|r2t+zIR zxBXcgfA#K3*FWkXZG~^`2wQVpcb{5(m{pX_zQ55=*2nD*oAmqg7LM#qg&FRgeNvOZ{O)l@_7U;F=i{as(S)$iYRy{o4--TS&eD|%1Rqt5qV zwL)5Mya~Vl`tF+WY|*OS65H3u{jI+!Y4iUtXWYWm_FC6B)z;5n@#cAM{Ygi+b@|=L z>*d#ndzJi+-)j47s`cAyk@q6&+BNHUM^@?mto!J-`uhH`u-D%dQ?3YAyl>Tf)fIBI zswPs$>{kE6hkviRNOWpnx?&bXZ@wq0do@E-l%HFy)cDrVmzS{ck_3P$eP?_ITDv?EUX?$p{7M!{111bd!-s+Tngi~qOj&yn&^|2X=J z+=_ypKU2EC|N8!@zj1rBw;HUzdgI-@o``e5Bg^BqYF%fQxZvQ%dtinDYnga;+xLy@ zH|Xds;FN}Bo(l`(r$5`S?RTxesH)zz zS+VD2*w-gF{io}BTv^UpZZmD`-KwYYPSfI@wf@+d&)d4D?Y;bEbddglD9uGefh`OXUnIV-rK)EIVHiXzG~yos?{5uk1;WI8wl?Do_#g@ z)VK9cQ+94$xkr6bP1^pg*OPlrMfe=sKIPSisVv7n9Jo8bPTI~{SAR~TOG>`^hcnC0 z=g68$+s%xR>o$MX;TUN@S-fqV;^|qZPs?9lTedYkZfE(e{Ts9OuW9X{EwaVGwtT(Q z{Q8V+(FYq^Pjha~RH}=)TB#_`{qXTKJC2`+7&iHaCBF-?W@*X0&1i8WFEeq0>W|Ai z+pISW2+B ztckNS*squWLhyvbvH*v4>kVI*FW9MIwrgSKj^Dx28&-NPt8W*$^p$5VEtDJt#`l0vMN^!qU z=>M!=`}duVNl1xX@A7T`;+RQK{=Q!oTDAJ>`e$|~Hd50!+oWjPwmw*}MLB3z;YI(i zM}99Ze|p;gsdeCt_*ZU!JvRM~f4BYJzLQV?ah{Ocdb_mXoL^O_5VHIof)t=6Y+E@FpFZud@(%YEnS9iyq{Q3XabL+Uj@pF@7MK9da zt=q1$#c%r5*gJE!)~?pRrnObEZcVs&-{~9rQ7a4MwAW4T?+P!dkJ}pb$2aPlVfZua zsn+lIW@dNC?EWJ-b?#r6Z+{(mB4=|sX0q}9XAyp7*SO(WR_46dpWXJm+`N&VsrAdm z^{Dcb=DEMt=KtDnwU8A1{JTGtpKO`AJ~VNc$lBWMJ@f5< zJQcmJwKw+H{#S3pV!rIX%N6}~YM_bnt{r>-?fl3c9_M+4^K6>v7oR=xuXW0&yZqbw z>uKDY^6;>Aq2Jb-U;4Ms^FjCSr@u~h|No->{%O>NvY7v`{{HdWb=m$?-P&LKE0+F^ zU;WieJ8W-Xv@qB2@V5#3qmJsHzqV`P>5BPr>%(Ww<}Cg8Gj84bpL=G0O|5zwuBvtY z)w!BL6OCCRgZ#|9G zcPUw~zWVCyRmaknmoEC|KmU>Mo#$Tjx&55|!+I1}c)tF=|Bp!Qw(Y9#f2gb~liqJ= zmc9RcXiu;FzqLO$U0G?qcv0>DMWyjYzrMeI{_54rxc6VX{_?y(XIkgi)&BX)qscBy z>L0Z1xy}A$qwdy+H=fn7CaCdzzA|&F(PC+Zf1NMCCd`itjKBJPeec5HrPKd?xXL)^ z5|_}FA1^ug|4S-dxMbftyLGX`xAxnuU)}Y=dBf9yyR&EA;D7Q~Bx%p{c%w~EC)PZR zH=U!v|LN(SX`D7wuJ%3kzrOyO*8Ze-QD46=%)UBpZQS1J|30{ve_FlXQZz*?vH5?D zV@%7}|4F`nd!EFbZfgybd~Whr_pFKd8)G-Kz5frJslNJqO<+yvYrS8qLo5HrOxfrE z`rlPEfBR2Uum4h>zefC_ke7m{a>mof#IN;pUTwU{?)~zk+TE9H9Nu5ez7qf6wd(8X zPhmT2|Hqdt_rLOG-mdkweij~!YWAmZQN3^fYx9&=ZE@%C<{6BcHAY@(h1f6du9Ln(A9TO>^;43+PvTEYdI39 z?7x3~^`u`ad#}#A>nJXn_n_cz%yiYo-z;7D!d8b~lP`X~weg3_ z#cQ$uII7l%uD;%+bMNon{L{hz_RRg@p0&1mZT7ya@BT+-O%lBQ`b+EAzY%}qyd$0_ z8CUJ!p>&$}+b{DF1_l;p$63q_r&#M38vc*jI8XnY_S4 zM*I$Gug>~?@^}2deb%nYYoq5zDL7p>*?ypX^5y>_;h!pNIHHulttnR%y}mm1>gs)3 zJHuCZ#n$bO+uNb4{`%^r@8z%C->ekfa!odU#hPX4zIHv6~#>&pe_J_N51E00^^-#PjE?uGaDuO;rg+O>6R zY~<^Y&1TX6UN5a)?f*UK%KG5dDbtqYs~s=PgY{gu$~zrMeo{&&sp-A}`JuTqSA zTdMDUZO6RzyYw&rh+Z5r|EAohMZ1%o-AuP|$E^up6TddLcD4Ab?@@chqXU)qulgTh zAGg}PJxKa%@#i~>cmF>q`f}SdfBXN}c2_1G_`9lf^PMygiO$d8Z(7`qV^L)(oh39q z>Wlh|t*_&f%q3ahpp4^1@xNt$X>X?Gl-+Mq zTAKC0>-5IOt!JwLCamWD@v?7@&4H^m`?yb}YUVqy?-7xk^&vVWZbfB?%0G40($|-& zwDm9U+FP~fX}p7{T4DYAy1l=)Ka9S0?`#0iq^wiv>Ty*ICRydR|Bc@iwttp=|Jyka z&Hr&{FS~hjvsGQs^Vj>I&T46XeRR52`0Cm-Gwm;KTJQR5_t#h7JTu?#+MKrhOW2>= zYpV{PTIN;$t(X0YaLE2}d6ud7R$r8O_g8T4y3iLfNqe3}E?F=2_x@^*myutq9=;CS zeKKKEU)J5p@2{IZJ|x8c!OivZs;}?QzrL0KPio!1If)iVLW|;8uMgusD7x*n*j>-_ zzWeIW-<~X8|8;t{cy@eS@sg>^f9_i@vHt#I{pq`poJ3!*zpnLlRr%u3pWAY+{rqbe z?S1<8|B|S`(t0oE*X`VGzWR0O;j>Y*ri%Xln|f=8Y{|N*`=;r2hJ5l~ulF}5BQ*bq z;=A{SM;`ADUl+Q5x8cYCs|^?!m^V4jVs3cFT6rwtUw*RO+p9nYW(Bh$raQ!>{kJufMuK?6{s`gxbF~ z;p=PMgT80q*RA@pZ*SPzzUS<(UVpI;iC6!BkmKX@YnRML%GQ+ab>8{)*4L;#(|_%) zy1wdEbZmLunuXq7(T~!0Z+*43HzvG(-PiK0qg8vWj@~(_e=Yu`%Rk|LXPI^PfAFl< z`Mdhxy4X)Av-5t1UY-6>{(sb8pIepHVY8>kfAQHpx&Gqk6Ytb^Oz}Hf7_Po1^mY5| z*!ycjUwwV8_3=`V;D?1K)>XIS4;wCA{`!5doxOSJ>O)59({0bs53k!0u5VZr^)+fg z$9MbHfA0QU8?x;_OU%xz(U^pj3U$Se4Hx|;{CxUj#v}3b+|2t~ToqZ2d7eMLk(IN5(@U=ZAEHmI z$ptEz?>7|9`PF21?CXt?CtH{Bi0U+UT` zci~T?6YtKF>-YT8DQ_fd?UHxf(=D#)ZYf5i@e;sLW|NcVXe%IF@Hu}q#gnn*g zI>iP zElAK&k!EE%=x7k2W6H&Hs8O-QH`#c-WT9)U_sYc2(3(HfAKVXm__$7W!v5}c8!~;b=jTN(_?cZ||DbTKX8hCtR;#kw zx-3@A4O9O;Kjh8XSB0-LbS7s=T@9c5R=C;Y+T6^XgSN28I!j2Zf8IuxcE#7?o{{ucov6=sP)y{d$ESjXXY>Pzc zH&vmoq|~oRX1s4O&i@?K!1->Hq`Kwy44aiRj>$|7b~!O&<2%b*d8vHU|EHeqe*5;* z)ys$G+C?Qjv$9vccE+N5`MYI_bELY|TfGC7A|&^$-fMp~=5}}arse>#c%Q3x4w%L& zUEcMoW}4%<@GVi^9$sIAt_YkIzOp$zPkQdkjmw3UTUR;F(47{4FmPg9qfcI5vj4Kw zXIWcL_OgaPebU{T@#b}UobHM9oSnT>{3`Ny6ckzM6t03!_hpu9axo;l7vfJ>> zY#HrYvv|5UUwo?()&4bkMWfuq3m<1aE^m0isB~q*nkmwpPE866PI#ztak?lfboeL< zb4Q-_7oA@pQ!KV>XLw{$YsjNs3pu^3|K?>cn4DFk^{>9~+@1~c#($a87MB0N{b#ZJ zfBq@|)Bm6UU0-=ze94Ph;(X7ZAHH~@*gn!IYR&ekSIr-;bC^Bt?|%1+eP-VT&s6QP zV|V#-@5vty-|qkQn~uLWcUsl7S<*Cm-KGXzo#&G4^1}B$m75=b)%4l*YbElfZ+O4% zd64w(lit~)-wup=dPG~GyG=gnYbzJj0z%C zG}O2_XEX+E4Db>WW_3CkFu_Gdn3Kij*^Xz$)mz_|YOPx?yYy(N)YMWF*;(_y%n1*u z+4OGK*LPR;Zt_3ub9Ygb?fxUPs^c~HHMcF>reA9uY5aM=(?9!HzrX%JIoo~xzGU9F zwoKpo7j(R1JYIi=BX7!nOp(ii`BturIaMFM2|BQ;cILEsZkDHAY#1IeDv3<+5mDx4I_EecMng=BrA0BI zW9F)pGiHi(ww}Fpb6v_g&C*!D_n~I*@4ddO{8y?k=0)zD?qAEK*2mWV5%=4^`n}ES zeQArjcB-ge$`_g~@@P|ed7-oO$LUsQG8X@1xNg#}{(wX3rS0|$70b<5X>WdR`pVU! z{O|qWvrj*p`tse+D`#Z)AK|-vZ|+*p<$>!tkIt$3;rDkdhvFKm=U@L_e_wx#2UQdiqn!W+r4z37rY8hir>Gu zHg-qvy000pw(K}-zb`((f>A}JM?;j0x2Z94!2%r#Zb_y?P8y(*a+X7m0ux+(*G3-i zUVXqTR#$78YgUkz{M=u&AOG~7{g?CVU;UW3&%eiiwBKFKzpUnxY|P=`eVtXFb2*m1 z)u=l@Sz`Ku)FpMlHU@Z~KKnB5+RCez=AG-ltzx$*Ji@y+SU9`7Cx@^2XwiiWa(nkR zhA-GtrFXbE{AcC2ViW7AzsE)Ur+xf%ezjJQdOLSj{`G3cEQYYH2X)y}w!HG6AZhED z?610S!Tqb3Io^Lyyt|Mi_hod9cVF+sBfr+XiCjEK>ch!zM-`G!m=$SE`ChkW`?o&_ z9)KpYa#VylnGQJ|i0}{-Vr^+mSQw!p#?{K?=+NPjCUPrj`u)(Z4O2_iRJCGOO+WBN zO8NPm$%+4&f7Th7Pk&WA>B}_l9n){`+o^oy;P!1swQBQq1J}Cew`@&LNnSVkxBr!V z)!#e4o|f*;o+Is2&(?Ub-3bxw=#_0Jb-J@4nH?@PG(-)Elfy>>QZ-G_jIxUSthQT+OGC4jxUal{dsCPi}bzyuN>+=&Fx=* z&GOg&?BMHyuXVx$)@S)Ai{87oV{ed>R9dQz!ud8ug##aR+viCs&bXQ}!+;~4v;Lqe zdqV1&{al~RW-ZhTeabgiWO4oppNqM_u0OK5e7M6Yx=jN8!nSA6|-t>pEdFki{^@dv3mR#}wuIX%;3d&u`K>ys5%AuE_9xlg4NLQ>#N3Y+vWKb^6ISo6FYg_ins;$X$P5fYICT z6rlwcDz*0cd$`p3efF%Z@12$SB16b>rRK+L2kt8Ami+(wk%NC`*SritCZ70@;j_Zn z?9^3KvyziOUocc?SRb3H{IKcN&qvMMm*l_xzG~YpmZ0RGGfU3}gdgfz_3pilS;_OK zdv#W7oxa4kaIvb_*Ql#^KZpMD`L`kaO`^pG1!KFl@8!*;YO>e)_g@j3cEOc{J;tC` zntNTho|~U+ZIdwjM2(UPd!0^WCWiH?nV0YF)h(~y5HG(#<7dJ9Z_ocPo+$apQNR0% zQ~%H9{p$nf9W_0#|KRYkRFRM*>-FuyJ03)Rjk>dIXUL|g*ITD16^Z}X3(eB5xE6l- zhR4k6SJw_|79LJ!YABarjhDZ%h%e^j!RDCcH%lLu+^jl#oK0_U%pbwiFDkrVG|k?g zzn!z!!8J+B|B$R}o9 z{O{vGi)^gyb65SZci7$W)NtZ7lU0?kKTkV;tT%i0r|jH{{O~I`DsKM!&$3%uNl};C z>BoMqxP{EQaskSpZ`Xa@^=JR}?k7jw&j0M6VdQ4U%`r3mmn_q)8>SntR%$yM|9&C( z@IyesrC;BL73!q~^q$vzu9~+hPW*fL|GOMjUZ+q0(9jMza%Re`zR%Ymy-92oqc-y*Su@K&1Cudvx2mS4LvL>+y>*6q+-?Zr|VhgW?&fYqqW{(lcNGUtE**^R)**d z{TmVRievrs?~i(y8eI0>6;!=%N8iIHpSwS8ub;eMG1I^El6UR@@b}Rx)|+>Tsh{1v z>iyT(`>*d0pCtF>O3NX$Sk2mcMRULX*Pq#0zslIZzOwGu_DS(4_Z(t>^e3Aq_x+j{ zy{O9ieXEZoF8L~Q^sxy`&8NEcug>oKF8@9Hp`Bdlv>!k0Qr}&@bY=b9(pTTN{FU8! z*Frzy(%O=cJui-|t~&XZjJUq*+x{i5idWpew*Ehh8PCN0 ztM^Z>jb6R~-rtDNDnE-q{G0t?`mrdpOC{Hz&X<3+VZZyk$W`I{BX&n-sef4a{{Hp( zTY{JB|95$HcF*2gCGW4@XS}}4$FE>uV9s}(#nPz7T3##E`t-k(ym>|M_ghDQZLHgS z=ezytFzL0sRAc^MU-fg!;duFuNZYE|@ZI~vwXdJr_4SqPuT~xRwc)EhU%&oZe{Fx% z-$c;|?OB$o`&rB~J&N`3d+M)~Uy~FaU+q0LHfm?x+N&Sy*R8&~RPbh<_19NhSFV}o zle|03_twsj_R?kR?;p{wnZIU#Z0z;bld5)QO+C4``s@0uyQ9`cf6dyOFT470%-@*5 zQF~+lR(~!2T68q4z0Q5>`m5Jpi@z4F+NgWFXMOzo^O36S!{gW0l+Rq>{cgK?zoLG) zY5a|}+*h4b()Y{`U3K)=(Z{RKE9R@M3jbTxs{OWDer@Q{3YFt`qeS+zioX4S(u}Ed z{^6P{Ve(6=!*4zObu4_fP`&uAt&g|9`dXfK_0`w-Jp1zR7Ds!uua8x3@h$tNQ-Q`lw~KRa@rm z{dH6IU)7ljY4TrROf0__U;k$R_xp3+)&BbR{avT{>-S#X!P_r=z4bl&ivRkbwpIRN z^0svs1ILYTeK7hgVYa6}QQo`X`E#8~U01Ao>NiKO#v%#c$k&s)Uti-czrOn4)?e43DldBV z^|bo)Crv`jc1!Pzc1_;ueZRC$|I5~@CcXF5U(8yss(7HO_U#v+t?$F@x^=Jq750m} z_kaKTy8lyJ_Fg@G3Q2FkOFC-1YKs=f4oiFNhq*YV!^S5B6E ztz8%TD&=kT*HyAl!>_u2{TsK(^yx8qH>=#t{i|n%v;HaAe7sPo>2yrWIqxm+f5*Lz zytSukd(g_s`(Itt+q&+3=ih9JeY^igtIz*)Z{>d$JC{s@AF4e!{=Vg2Vjo$rx^DOH z@b~+E>Bhf|TX($NEB^oLzgegMAJPteb@Z3;n^>ygtZ_Ss(Fz3Sh)CySmgs*gBcRJn`I==zKG_ttYvQVq76c=2L^f~AX& zs-MX`_eUwQ{Rdx)abMS363(q#^wR3t%c!4MSH)l79kJx=%NmiKuFhQP>xCb@+ozT7 z(~kNh?8dZwS%k^|sS&dS%i|B4nH22oczWLW>-+Aw^|3V*!^3v1k1r1o@!J0S>a&}R znP;5`1&^dd; z0n?Q|QrCqa&M0!<_2=t@u15Y-AM2EM?EHL}+2n<7)5@)@`1KZi`_#s!_o(dG)ivcW z_r!;UBx(!nZ#g+*^V{_oAN&yvJpIbAx#7Tzs*~^k{|Hay>27!A^vPf=`su{v$QZZY zQvBoVIrFdoPnfLK7A-%?`{l3IQ~dANOv#wjZpXE8`SVYanf%S)Pq<9`n|tf)r~4dw zvF5L@XWhBAU%ZS%*DZ;Q_4wIYoB9Qu9(L{darke`!yjB0j1v@O*S5TBJH+S0%VWC# zYw?yc^Lxnw{eP!RrA&M4Dq6L(Zf#uE&xcjh=e^&#W8N#h_xrwm$%(3L#U+XtDxnuv<_ot8f-v0IE!@jj~ zd*gn&PTBs$;qsQwGA8GCbHPu->%;F>*{1$~Q>faWZkKiM#r{8c|E;mBTHt$lyY@fr z|HC$ z$`)c+ugbEf_WShf{;n;j|83JgHd)+k-nHEG^nBO8gXM+S+|NDxFT8taXQtloKa*$7NqD~h z`tEmcWAE2~zh4_0rS#uZE@^Y<>#Ogt?+<(9cJ6)MlP_-i2L+R@|7TvU`V+aCYxVnm z7jNX4+6vvNHoD3#e^Bg_N$d|5!*5e}>wOQuwavf(yI0)X&&mgY#6k=&wHVoVig|wcodIN)$VreIw7_Bl?u*ll@WOZ*s~=c)#Cw z_0^mG-xqzmeJDj~(&pWN>qOSOZ|D3ZC@}qG|Cag*t%mpO_x8Sg^>DTnui1w`U#IOa zy8Oq{m*2{T^AYpcBi>cz@An^6XwSZ08vg3}Dq9cjpQ$g4-k*H`_0W6u{eOSyd9}vI zIxsLWD>=?$Z3JzJ(Kk0!K2p&aBk=qGKbCvH|L_0*zp3trW%%EsU%%ttM*RB!b@j7% z{xR#XzA5iM$eJDFe>|(~>ZPaKC!gOPeBucA${89Uw_?_XbEx7#*c zd*5}guQR%yF8a59Qt7{^N3V8o?f$y@YxT|+E$?edf3rpQuYNQwdVR#Q{nyrCi+DG8 z`qfp>cCHl*nLq7Op7&qDQ~R%L+7?Z|7QdSHb=dsa@>|b6vyS?|)LvD!HSg)xS6{Qf zR&A}cjsM#HHUHXvJ~y{k{?$F5(r)~J{HpAN^#2CQ?@RLh8@*kuWK!Hl&$_Cwcc!fi zU)!~H)t9L6;Y;fuwA{HLzb3TO-u}tgx|i?Q#ftwr^?q*lH1FMi#dUwL6na!2Rvx$M zjq0&Odl>Ux9}0VU`L6cc-Cf&eFYep)dwpQRq|ViouIk-iy&^B|pMU+Y{a62+o7h)} z*1hw`P3mSNm&=~7@2)OetGhV$hKly$n?KV(wJ^SkRBzd|scOU3FV`O% zyTsRZ&5>_P+Wm92?=R*3EL$%fi`~-wTL1JU9<^uxh5aUMLU0~?T*kt?skDcPo4Sg{knx{OK7DT@5og zhnK%A&)F&3UK(ufBTGw2q^jPa&^Dx_`Duj^=;f7w?Pa)&E!A_@Pnc zXSC*9xxJE4_TSjOwrcA=>$Irnt0T+fgakDlM5_J&PPx5c`|G-`%fI~dpZ_VFW6!gT zoA$>|KAYv_arMc+Ka)iJzCP&b)w{p=ijx2Rm|52!wE2H6w_oe|U`B7s*}I+Zzm)yW z@($i!zuI24JZ`=Ej@ueLc4$WI3E$va{Omet`LdP>XfdzDh5#K2Zc(Pgjs_tbpq7(4Xi2aqSxBt)9YW&##ut zUiq=DxBdMu@5TE)#KP-dfBSkvY+vuEqs!kH#ILP!ez5Q(zhGAH#rsAj;r!(<%-N~~ z-e3B)U+$R-!>2{sr?YTAewUY0T2Gt*r{qMe0{aaO@P9}?m%!Y|sOFQ0vs?Fq&{TewXJ#P_2rNAceg!$e(p_re1UkV(z>r^*Y@7$(_I(&YmZ#azE+>vPoEgn z3*Pw?Tvt9fICo)aPr;wEuUCI-@xS@9^VOR6KRQ1R?cT~Iz6|*M)bsDJtriR4M(F)& zR&EH*T))+zWnnzu92dzi<+5zkeb)y+pK--4a(!w+`?NOpTP6yT48a|ByvEb_?c4t( ze%VLq#$NT!Jx#5D-3PrD9rlz#fUc4Hlrm${rEU zPAAZ+`we1ToK1}Z3qv&2xLP_C4LV}3ZC)R4wBz1Gwbd#swYt1#W^X+5?f;!D|H_L0 zYyX-bb)WC|{ML)Txpi|}uTR_feZRx`+HfC>&)iZ6&HkwArrp2LyL`!(1O6J)Y=MRc z=Z3qSFWzK+CXJ2LY|B$mOS79zPcBC8m>=w*d;LY)%D}r-j{9HTxLl(eFZ%1-(^MNh zw)q>*`t9elI?ug#;-8#-E_uwIvlEzm%R=I6xL(}3b6sxAbonf`V2J{8>EF8ytloR| zO|$CcTV_;q^{DghX>IPIvzO12VN?>Cq9G>4+SHt=5XlAFR?wKRVuF`ACy$e(pz-|t zm}32Do4HR<>Uy-&%VSqu^Y+v@&eQ*G-M{3||9|aX{8x*BWT))4ne)$V`YpG6KkurO zOKt=geqFP(!}Wgeq>qj(BA@lot$nw=RA^eB4x$HWemRDFAYYs$MF@BZD=RWlMa zc((ZYkqVc~v8Rg;&f#28Fv%>uKJxsFnJ#-TyxSd?RjB&>gx%Dg`yJ2U&;Iu7_-7|= zm0#;C#XrB@Hbpx>eZkIapZN}k3BQWB*uBatOlaz5?R^V=*Xno9O6f`h3X?K0|;tC^Lf|j&icRupB{;s+Irx)ux zGk2D~+Eve;`cUO{yx69Cm8YAR&i;`mRsFKtTUQopcK#9JFIza-;(LzEeaoD0Pnmws zeja?|gy@1=vU z{$88E`F6Lrt86~s?CI+kw(qX{{8mR?>(;{M>$ZzdR2KYOAJ;N_yBlNrvni+bKYUP{ zu0%O`W^!HadpvlEXm3cvB|_ix)3PfvEp-nnkq zvFDl6ttYP4-P2a(>4zN>z$ha!MWYzB zUm)o~M1~lbL6c&@gciM@*W&yto@qud+g>d6DlE=B^wb0EZSp4yez!gRF8{4=@q-)3 zJ$~}7ypvsXD_cM0PImowuG#hS51Tf>Zrb?z(j~9pzl?g`e5mP@@WpBR3_TB2&kMPqwr|&*B?`+3|+{1jE(l=H2 zl>YLwEZF(-+idsEy$`CFZx%S&Vn02#QuTbu^IpjX3ntbj-n!K9?)N=jFOPS!|!l_;S;}l>XCr{t*Zq$3%@$@RVUK6{#Di1%69$L z-IHD~?P|@d5BnSW>-E>vTUV<#^!=?~ANp_C-&Fmzo>^ayeAT?Y^<}Mx{`r_0_h*EE z{=B+w*W2J5OV_SjdirYA-l}8znyV*YoEL8~^;_4}HPdUgMPD7ge`$Mc)c4ogUz_%p za+K_iTNiqDb?oDvT3=tX>vQTxEcCwhFkIp3#$V60ukP+T`fO{LfB2-UCvCGoxZgPJ z;pzOh^w#O!U%TRCy|Z3@t-q$VwO)Guwf*6{*RvGK*a&Fxo_l{~`)j_xwPAb9Zz*5@ z&T)I)?srP>MJIFQ&sz6aY{|agyyY(2l5e|A{;$4f_xkWZOFWLq*Dikk=Sj75as0o% z;q`HQ%T?Ewzgn-lx_34IqK9!OwuNV`(fc!hfAppJ!$xh9Zi}a1Xi;Q%f5`COH;%_A zUM_KbGd->|{72^{E$h0UReyc;HTkRUuczv(?QNr%8Q$Al z_5W+>NBi}$Cnv`(Wq;9Qcj{LB_1%VgVb4Fu{jC4)6#sc*&AqNQ#;#Qh*A%q>FMiB- zFh?lA>TSf^s&#v_zZ%&7KC1sZer@d8w5RK9o4ntKKU`NnZPGv6E7Rw#40%=mDk1jY z8u5ASSI2*zSm&M8v0wexyLEo-NRyy;gIl<`@;=U8_9#O75U$pZ6vC~#oM0)q$YL}OLqecI%E%B}T z8k^>^CVWx(i$$xyYG~~Lx5NL%1dsi0hZz`{%N=L2HLe0peyx}v|M5=z|L>pbe?9%R z`SJeM(cjj@ua;jlSO20_eAJJvFZBgBRee7y-n>y?VSQKl|7S0!-fgltefrJnAG7up z-P;@Se(Rcf_kX;fUKamv-8D_yxV>?IBmPd=`srug{Rf9R*)>-c&o^ZDcK>qUQy zI_^fr$3$1m`_1wzYmVqkQKj7eJGI9DwN80IkNfL!sp@;}zqPlnR(&n5+iUu7eI3Ww zZI53af6!KuA;L9%wS3#B4S~DkWp@Alw`SF!-s-zqfw7l#&hT|Xd}p0au5F7w8?+O&xbk)QYn}P`|F?ds78JU-+%Mt8NA0Ko_T=|1 z^8PvRZsL!+Kmpa?g3EsIKU6-$c;mJA>sjVW9$5b)W1Y{YAQp)aAB*IFC@Onarv8r% zyIi%sDC+C4FW2-v->3BXzyIJ~6uj<9ywmgV_vW36tluPb)ad9o*CttG(-`lV#C;I6OhWsCXNn$&)yWW4k<`&)k^#2-@itMt|gf8(}f2)m_ z|9bzieBbSVHSt+yQ~yT%`TECr#ns6>eQWDx{k__MeSh4$zcGK~-d4Ril3%;%VfdYS zFGY?v{x9L(o?X9a=P%Xx*_+h<2>!SyQ5&=_n)4y{J$OlJ@@xt_5S=V zInEjXB~1IsM9WKIT`Q|Rx&QvJ->EiX+TKL^FMeE9oh_wL-k>tc5%{Q8=>@3gVt zljGkld;hCG3l1wt7h(3{qnDq{Y6yz7WcY1J3C%J zf4hC__6lqCyJvl^`ucjOcx;5)8|5=aQY$uJ-oN4M^5ln~FBx%Isb=)ZPS_>&_h$XC zt&?B>|B{)1|Lft+nWt_4$p{_$6MU=wkEvwlE9a1t(eLim2~)U=i3zZSc`?xbCP z%B75d9R8c?-U@}*?wzY-)#KOt*SPmx+x@rn*TGxHr?lUHiLcvzhk=3F&2bhx<0;nKg}KtN zWWY5>*O2?mmEi4n5cb|8A+0K*6 zS^KYUJaTVu_SQr35sPe#{wDpc-oLc?YnF)r^ZieEt>+7^tvNMq_Ad47>sSBNjd^rs zn^4fBzx!W(fA#n7zm1+HhTeVg!f~6!SBI|&j?a4cerer5)tDE%_JsY8*k!ghHaz^- z-l&~1Ti&Vf*#A>+-)pYwA6qMHg*4?qRq4jBeSP)y)$@m>S#^!pXB?5zomBkt@~>0> zcI@j4Po5I?W9?tvvNe-lcOK6dR?$rRdZun&{QA&EGeZ33KXYua{j&Ye7vG|A>Hogr zcRw0MUAKb?H^d-_S<`>QY3Uw?awLo~7O*RuB; zyq6S3Zcfbpe8qgJ_wRG-UhiE0b^TQ(*I)nk2)x_!Xx$$5$NJlZeCxLUnE8Iwj<;9l z+XgPm`1JMF|F$)E7fr0Jj&k;2|IIJvwq~btO5v;N-1mybMAg@Cv-tX7QShYn`<=RR zt2=M6e&-$ad-}zs-8`@44|I4xHI>uZ{`>yRwe``f_q$XDeSNXMZq4rECZ&9=CnFyFPesJoRGL{}>V3HBawskE=R< zV!Ougui@*XCjb69_1TxGKR@0Jh4=2&`y+Nw=ip$Ovp?|=r*Q1IHM6h2KH+=u@2BUlPJap7{#x?a z(eQhJBTZ75A3c|`^HCf5 zH@-~zwfn;}1A|FdSGh~rf5};XeShdJt&6YTRw*CkR9$|5a`|7M(APKX@9P$)U#yOP zazZ;}-g?u;7IEuqm3M8~8+vH#tFKe5u14+Jo3KB8efj?K;@GI|8cH+z-v2VK^Q*DE z!N9(JL)U)KbEzh9!6_1`j-&3t|DV%Mn6H+~21 zTNEbW@Aj`}`poa^H!hqh)SPzoX=3~1wN_hvc89;);JVN3Z~WKf@1@z{uf<>e-Z8a$k@esBuj!=*({*;ZJ55`+H(?|9ooQ?8HvD&O55IG*Q}oOF z=%A=C{yNjPuV%4%B4%OFGLwD&&R-92Z48eV+V7k7IsVcq?}$~``LC{8y>EnUsKZ?%%vmaH9RLSOlE_2xqnlKfA zw@3Zdv13`;=SqLg>HdGPm1*{+d3wiB88{nB`X0XY>vxw?beQQIfn%3y)~3vRoVZJt zv2JS}r*Yi7->d(Mi@&-Q^<`SJ*Rj9idcWiT9+&!GX(#`|d+%%Qoq4~$9{F4S_5Hz2 zGk5XCuC-6T>;Anu;q&EN-=?43<@Y!HVWf4~$>0Bfz4CUedw((Z{_~yD*}s>5(|5^x z6PT-V`F-7=K%^^@PGHGUw!s<*}*?TFdrgMK=U$^`JYtSH#LT4f1MMs`NMzh zU)9$6+p8Y2eKoN%e#_>xe9iInuQMxeRQ%dmZoF^uwzKaq?SoK)!K9= zg;)K{=BM)VxyoB_U#`8jT-JO4_Mk^qRYqUdJ~vsnZh!h|KJGSE?!!x_ywTDNI968A zxov$@$eg85H}3qP-tKfHef^t*2S9@?IwG8{ObZhgW)y(B>5WN>5*n)9oJ>v*6MDRE z-8{bc$ldI+Jrh>XZdH19mwm?DANeoL^8?M}*ME&aRpD~UU9SAktt!pe;XI!MPrKEa zmmj;d?EOD03%Ij^$bPV0?+C=xlcDs_wtD;zRj63)uC#I_Aa7{AJ_G+Nve^?gJOcOR% zT{Wi{70ZhLjE%kT{-tMiZiq+inP2^@{ZdVq)`@K2 zY0`C9mcMDVQ$KxR!#}YD&Y5exW&QGdUR_#T^ZZN3>_4}~&DWH>y<29%CpXbc+Rx|e z3ZsDjXug}Sr604FrUgb;9Zy zjvE4cRJd7L7CKC1(T}aZUA0ha)hpN7kQq}a?U*Whpu+R~g1!Cx7ylfO(qH@Uzv8K! zw!3$kcADmhpS$$9Zf~T1C->ke3<#4xZz_%O5|xFDLj$Tkrn*S*73p`*Ezyo0T1D`#!?| zOjtzOjAed0nF{Z`JeP{6`5cjabaI6ZqlUBT;ZW7sJQ#w z&b8&oBxaoq3JN(nWpbBWd;90}>tD#9&DwwRe@Dr(t?#-5>p5rC?9{F(y*i=nw9%cV zs{{RWbAzsLneNc~DM6R>TJA>Pm&v7t<w)|J-<}5kU6nPlS8OuZ0lm9t*W6{_yQ%Bn)Kd#AwKIQ7UQOPf=H#7wBE|Kl zR{!iL((jUG8&4~(UC#OV^Jks&9l;B&4!(K+<)f+Vt#@kg8!uT+UHa`sCr{yDfyNIX z^4LKh)Dm%KWjX>r6vwHRC-qriP zm!o!n+Q02j{7Fl_#PfCWZenjI$IrBtTFPbnWB&i1M$LH6EjLqblDGWY-ctIGQSRBg zyGQ%(JiW~z{$O{zKzDHFMsm%D7#d*Ue{X@!RHo=LS1yd9jw55N}I!!hr}4F)7Zb zg)RmGI-(q{PL6^S(LPh7KX294SDET5krdlL@#p;s+vm&WuiIZTPx{Hv_&fXF?xaJswJ+3!nj;$la!h?WZ>4lIAeKWx&QTQjAPZ+ph2?`dIsk4>$-NX&{##xDZEv~i%B+ft$O-1O+-erY;UT0+xA_X&Hul?-)^;Wze$(l|2gZ`c%?R^2C9GSDD}?1 z>wSNH$=ua{_N#lEX_x!)Ae|F?Q==|`@M z|334i@1OHp?y&gN{TKSCJ-J(@etq|T*VUh9m;cgkzW?On&W{x;T55CWhTc6{bhk=9 zcG0Uni^M&vbXK1aVR_NcAHOy0Yty=TmKJ{3)ql&nwZ3-k4bop*x4FFQXw9tMUn}#! zgsgwiz2tgXP~OwsUt(_m(hr;Zy*xaw$xx-{czxcTboMNvXiYlGU) zrADPm*cleT`oCy@?dtHgfi~BFzA<<5os{d#WI6r$-=sIf$CMQ0dcU5oPw1{H@Xk7% z(DysCyo`IrIR)EH$&2B)_VV5Q8vbkFy;m2N*4uMAw`JIWzZ)C*`lqkd^s4veKh@5% z{87Cq(sU+u|KA@69H)G$x-#R=jrp&yXM7Fo6}|r|Abiu__p|q1-KGBZrH$b1c{lX+ zPQAN$6SNSU^VhOfOM|bzx^%y^s7fug7Svn!aGb@_q{Uj)@%qBFN$;Mls*l=xb$873 z+W1eUrLRR}&ioQQS^9c<>8poT8_M?HnYZq^?e5jC$*UY?f2qXk#wpDDx+ZA<$&>3p zPp$d1$+Pm!Z^PHz?>F2_S?bJi=T92bL9u_*y)Q4UQ`!E?>Zoi>^vdfoEDlHJ>MhEP zk6Q1t{%XS+&D{|z!nEJ5SsiXSJ$_}@*PmRsz8#wKyudQ1fx6%Ln zM-yw&@0E7@Utib!8yVxXZ>xIuO5-1iaZ{==JsW*43ZNGb7Zu%(FPVh=2Nn_VZ7Fo!WDEX8l3l z$Y&hOv+}J)-c|jRSz6n4X+FQ*!q@Bn|5Dk_5@utY^pf-A|65z?XP3Oc9zV@lC{g|I z**2G^@3-{iCrsaWUH{tZE3M}1cJKTj*YhD!oAK821r}~c-5;*#(f+@)w6TAF_K^tA z#yJi-TfW7={QN5Pw)c}=U%UPmyv_1(-wIw&nYDA(yzI!jB*n8^pX!-ZUY{{()0KL@ zFK$gb*QMDx=ePrz2YyZB!dJ;O}5Qw`0Q_TK73N|ogK%A=Y|e9W(&^0 z+J1HQHT^@?`^)1LCVjs>wQJwehet2f6dV5Ry7!;O_TKJ40xuVruhuk~UO#VTUoUgZ zi~3I&_4WVP&vxE_a=K}uUH3}Q*ZaQKFS3eW{kLFyd05}m3Kc`Q;QUL~CqGqaXq~hQ zS<~^ga!soflO?B~ z_OD;Gr}f{TBju-GyubCe?{E3t*q^U@O=kb!y8YRBO3O}l8tyQYJvcvsQdD)poTJK)|UcK)f z=q!VmHi8`d&&ukcs%;;ycH_O&Ty-?Ih=2BtE{S)5E>ti^Zkwd{X= z`J=se{J&i%U;h99fAtr|?+4ds&8+(Wzw`am$=6n2UsL|_ZPKdLb=S^az5Y7%^_uYT zm{sR1ZL{_NpLY2kuD(XLNvTKtJYR)e=jVSOuk}o9|7Coxwf~y1Z>^BF=*jvui)&L= zPrqJ$ZN<8GtdTqBUA&Cv@dN*;-b&aLdeyM+bANsXw!>_OL#lMb*YL|X5b^WzJ`|_{v&(|+- z7X7!G#Z!*%a$eJyfty`m==d%tUQpS^O3c}Ms9h^5t^D{Hpg{mx&n_x0K8f9us3fBmIex4&ZTt)sf@ z!&fDLID0YH&&lNaPur~byT7>AwQl^8*IwoPa$2|CldVnhZhk*@>1~tKAs6{=>wl>4efPcjYx?t5f(rlt+_@6^I_U1}pRQt0l=JV`dD*YO z7_t1-dKH_}_}5oo9e=O-?#Wl9x*f0X-oO9q@5TDR6IHf9+^fERy71Bu?x#D{wGU5S z;>h?hvi!-eeXq^t{nFVVwvsz;cig9xudDW7l=F}Jo8=$3f`NhQu;VPwCeR8g!>u*3 zXJ7qaeX=;VD123kFX#TackAlo>$Zl6uf6^?#rNO3)!`aJ_t$%da=f1QKJ3l<-Jd7c z+5X#`wQl`ZBmdgfU$v*cj$a?X`tDlK*B8ZqzDn%gf23sXMf2>rp=*Mx_RQO|de;{B z4CjCSnz>nOC1I{nJM~wGublksZ^WOwf|LI2_Wif^*HhuFqlLk@?yNqdU-4dK`^}mg ze_E1~W{OPj*dH0b`o??FPe+UX+;Z+cL|i^M~Ss-tT{IrSq_fU+GxzNA8i+U%YBU$f43 ze3)rj`S9*^72_MLx%~a+KNmZ{Ve$HH@;_Yy@@CprZ+-FCdu4fC*Rx$GJRWX}E)W0d zA39_I{!P|#|0DJb_?aYky7<@an|@F*>)-4n{vCRs*6(|DG+)*+Tkh@mj%}wNi+#Si z(L(zD$$5p%fA3E_Wb*gN_wZ9YL)Ywa-x)|JzZUNLBqn!l-|Oq)ucyBb`eiKoI_SyuMOUV6oqYV?TG92_ z^_OOUm0hw^f7kwtQlQ}uI45-lESRAo%Ei*6Qn7l^p6AXg%VMXr`h-mqZixQM ze$;eb^F#jlk9EpntD1AKey}k78yx;$eACxknal3StWBA(tE^eS{jt>I2m32!*$;o~ z_0B)pbg1r2q2tPZ$YdV%^muo!J6wq*|%qtPE=3+oG!cK?X}Fi+TU#dsI68$ zdbs{n{5S6F$IGQcwtd*J_v+f}%zeLfTs}Vdzk~1oiCrf5XZ|lUn4k5};BE7>e@hM7 z1ztt)ZaTvsw8ybEcdK2-L$6&E{|lenC38F3MfpuTLjhWKYO(jZ1uYLwmfin#c=EsO--mKORxMnq zxXj?%r~64x5vLu!mn9b1xH#W(XJk(Lsu^*J6&Vzt2ASR@EWMG2ryG(b!tvn;GrQ3TCSux!zJ&vzOb6x8@*7^kg0}ct5+^u z`bRo%(qH|@_r>r3PQSXhw#p(*_0E&B9ozy(SLW|qe!F+>jk~YdT;@)Hs<->O315WB zwtw#V)}Pw0-Yu#9aR2RhnV_9hl3h60NA_1LR35T=Bk+7(PU!wN(QQ+H`9GiSCcVeG zKk;jLtj;N~$;X%Pc)s^X{O_=-<^Gy4*kCJ6`MAc4dZW z)rsj5bw?N6D@|Fx;nC&mRa2d!XS3~*e&^jVT2ffcA?v2P~MPQ7SOy z(Br+uyJt+boVqkdYpKtal`Fkx$*=R{&v~*Z<>&i4zj<+)7qW$JtDlOS`nq33-}dhO zVB6oDcIRbF28Uf7?;^?LiyEj^lmJ?`LQ4 zUq5B*WmogZzZBjv|+vC>fF*9HF zvT!WZjB?qsS9O1KBiGNIe97_lr4M^K11>C^{@N;khGS{vOJ&Wap||}zgmxDOwI${( zmGBTQnB9{&m*vod=6yXc%O}i9pECF2QjSTU(c6~E_em6-tDJvaYiZ%cwUKMe6ShT7 zPN~{3^TyK78A0c|^Y(vlVr6adIWU3!^uF5C)8=V4OsSqpca|;U^WjX;%!yp4t-f;O zT7f;{HXG)>T7Tf(w{H*2?qzMwJ+tJ2Oiy8A-;%d-;>?czp^MiyzSveYS^v&{Q?1$4 zwk$Y$?%#qs)~lDxzG1P6q=vPS6pNYHeywSg=4x zm1E|6KjYYwXYLiA>kXW+(q*lZ1%Ln3+mAn;UEWzW=hY+GqXixBGHc5p&y(`%Q}xxq z`@Qbk=0}> zrwa|@nDtC9?hecKFf?1ZCB(r{K7N&6N{2Rgl*r;T-`(%r%$JoYCvH+(r)sUKvOPwI zQAMOfBbAG_rLke-L>^EMe5lYF8Ln-)HrP0{-U*KXc>t$FtE>ds1pFE9Uh_e$@@V}GXb zAC9~5VD}Y|EwZ1#C+g?#|FbB-$wlQ+*|a^||F70axoaHxd;Zla!A;M+mm8dGFrED` z`P!qX#mk=T<7b!8N~@Yv`Ltr+svH4%l?G>~#JJo{A$B>&_l}*e-0H5H8p^j$xu0k? zT-=-DopQCHfPsNY%W)PLQx|LP^@Tdzm-ftCAGg(XefawL)l*+jxh?!zTYmL?!B~CY zS1M zcgF3HEb$M2{c6|l9UtSjdiVazn)hRMb6#z6ZKUupze7*ncTZ=6J zrdhw+8~=61+@Fs!c7M(Onk;(y#m4X^Vcp#mLwxW2+bWv9vS#umQ`7p@+8xt({GNQB zd)N1~d4IE%tDn#3{ChELZ`IzS^(^Mna}HRocbWcX>Ak&0e{KdU7loF-`oCtsOXbdY zl_x)m{5=x$Z?C=S-=MWFRYy7Y^#0d7w|r*m*UUdpV~yTVJ{%cwrIP(`tnJl_uYX-# zzhPd6=~}+OMZ4eajobfxpHH~@;@4k1gTJni4ZXhlev_Y7bICp5obBrCmMjhL{JQ7f z2H%oj*ERQi{ctKGAnMJus{K*lquzh*TG0M=MZaz8?)a@u@j5jM_x<*4d%H>R;pC~V zuYddt{qvuhC*aPb_%G@9ag(oWYW?~v`uf_slc9J2aq9i`O<(_T%D4UVRz7TbWB=-G z{QZncf4;v+{kK+3c+;2sO~Doi|0VP9x7#bu{Ut;1obJ-Q_V=F%&Jx>o@m_mwdHHtz zJyY+`kDs)jH~ztdizn}E?fkPT{+Dw6=6B!UtKKoL>$@12tXm(WeYI=Xm$J9Qp*xi3 z^Cf?Oaclb3^PNsvN0YW@ulsm!{q_CfY&-79?AcnWz`(#P5J&8ukW6$ z7rbb{+Vgenzm4&$cdz~%wKwK_xP9=|SIUH6|z4vQ%qc*(JNJU&pU^ z`*qy+$%an<*F5V&Pd?jv`s<6?Z==7~U0-CG{<2D~GwxOR?y&H+p+-l49S!jXZ8Z3@ zrmo{_dHIpkZFN)TTzc}a>r&PB(7Sz4gzNT(*Kw{1{Br)JOU3!C^Vf)a*X{Oy^>VM^ z@~KRL?VWwynzc#m!~Ryi+bgu_?{xh?LPZk&?pb?d>vB$?7O!7q@&2c2WNp&y zS=r$opX4m~tbca&`W*ip@HA=9l$Tmkn=VN1P5poE&rY74`Y@HRT|ahzt+dUmXEAxX z!0*qV{a@VI>m1XceyAkp-kekQPVpbr7ERZ${pN9U^MA90y)$l136?H3ak`}2Dq);?S_!DUCG&HSf7(rb2~{-u^h)kd(^^SWG*~W|LKOZ;sozUzIka}r&~1myZj$p?v&npYJ+`YyI^&CvdHtO|Z(kpBQK`+WIl4ge#!n8D89%wd{t}v*m;ELQMH+SMzkl`oLBIO%&jRz;JH_vNeNbLbYs$>fYQx>U8T5UuE3R$aSHJ%fc>JKlr$n?W|Del;-wU7q+Wy3} zD@xy0Lv{aA@r#GOzdhf4p&zoF-cJI^Qe}qN;?}~~2 zxAxaj_4WQQ+{?pSwjT}sw)gI>@1?J=zZRL-mEam%W&HlC)G7uBCMCyN+)Srfix#h+ zy?ow3{r{7{sQwQ7zyHdC|NH;{{`LE(b-4Bar+>57@4I?>*+%a>P1Bdfy`Q-+HUC%s z%$S!`r@y`?6!rf5>uZ{Mzuzwpn-G3!ZN&Q-f3xbrOOaSLbW>s?!Q z_54Bc@Yi2$^%sRkX_s-Vue<(Y(!A7Fi`M14AKLR-{G37W!cXzfl}_cZ{8V-Q^}VR4 z5zevtPIgtBR|eMKlrJrRu`zt|iU$$paeH6wJ+)UzyW?wWCFilm8rSpOXJ737_517l z>r1{~na8nl;_auPv#2K2b=_YryLdxfuXy?Em3vd|*ZaOIdG|MAb=h_G_1?viAKm_1 z|G%z((K~!``Rj}3>g$B=7yr^<^7YjhkEOf%_q$hJtv+P;$Y%Cs!PgR#?>0`ZJ^i5n z^u5I%S=V2@|8@1W>h%ee?_O`xbde78eI0buvQ2aH)yq})R&1@Fza@Ol)TXA`!|_-D z?Jes#TtBIh(d+E3gOeZaUwyM}Q;hrCx$KXc6-;Y)mXnZ&ARMu#QKPJFUnti?aE#se))6Ev^ABl&R^Yo?a06NzpiH; zxwo}$>#e<2yP}uop00ZxUh4X7ecXEemGcE(Pk%lAb){`;+16jz+g68e)%TA5w)NN1 zQ!CxGW52BnJ^5PX_L?Q1t_H|f?R>YcTxHei=Wnx?zu(9IKm6tObsxDk7G=HsR~N;-_mCKbJ1|tgZf<^!#t!-uOdx z|CHWld3T6xeRcimy6#tBpX{=Yndg^wf1Ui=uuY$m_Uu3T`s!;<-J(U6|L?E$em(tB z8JoT3{=aWOotSg~Yr@|6t=(Vq{>QAo{*|kCb!E}7FV}?jO}&x(N3cA%(e~qqMnk#1 zKVO~Ae}DCd;y=&d`@`NwDW8wmIOw2uTWC}1p+fJN>qS+2Z+!`$)8)QbUwzRg|FUI< ze*{DGwLV|@u>WNDYVXolN8H!P-d`U+E9Q6S)_3pU%WwY^zE`#`nrYtjS4XxBmVWxR zf7M&vNiSpeN#A>Qp!vl<`Tsn(dm67#f6l_#)5Q5V>d%zQm0vZzU&l{B>lE{C*YmJx zH_J<32mR#LKbi4v?L~!Y*Yq#OU7uzj_S9nDm;G*2=5hGlH2$+aZttzHzu!IkIz4G; z%}bMbo*Q3dGX8IuQg{=4`|bXRo%VCPq>gR)|89@i6T^j1moMMBcXD~S>f)o}LD$#B z-aYxCE~DV^;@n4?Y!r&|ENAjP5u4< zLb1;GSJyGhg-p>f;^J&+Y*-NSqLqucX|Y0Q>^txKa$3H<$&r;=Q^O<0#e=!m7~bh5ucr#dB{<_uu_z+sZWmMOleq+dThGRN~t-dBY`Z{yXa!Shl81 zUBA04V}-o@w>!IDJ@@$3zijcR@7XRo{z1`G|4)C`w=3-`=UT2l(dzXF)+NnvZ`R+P z7W|hrdONgzckiflJznn~9yxz=+x0WyE;_fkYHPW=P9C~e z`+M!39`$$gs_o5p6>jtq(wB&ed0tS!C^hq+bV1AMj4tmBs>1opbXXPBX0R(rJ-Kc* zTfD;F#YUaA$@|Kt1lQw{&qFq^*t&jtWP$K`^R)H+Tg|??*VS4Swbf1i?s?)@a@*cl zrBf&8Jy_fF^!0?sW{H?I9r5|u?31;mvQ`zSWC=WKf4m~yCrW1Xa=DX1e6QY1>NKD1 z`}KO&AJId_drQMJ?j}kkdkZX<&$xV0RI==&Rx!^uh9&Q{c)xfTrRHj1WIwAQ#dTN zeBXj z3EZ9+-*7*hk)_OYbG_dL#?YIJ=MJW=t?>PK#?3%@W7~qV7@z-s=NEOe2nyR3-e;b9 zbmr2TyKh2*E}Ohn3eINsm%6suNsUwSzTrs&`?>9<2c=AM z^43ZAtX-xkYPso>*_#`EQ#W*o?(pOfm)ZGLsz6J*Y)4$JL&Ji*e7XF|PKOm9Flt=s z$PxnenF2N>fYyjG9SWG}q9w@L;+7<=mAAZZ&)J7Y@0QP;)O~Z3%C5P5|0I9fTI$y8 zf7xDM82oeV#r{6SUai)Z{mT8S8iZ-OJtf-ym&CiF(wbOGh_~MH`45td{#FcTxH5ckQnN zOIp)IzO&{P3HV**=2rgBcz{t&WQvLi=wPZw0}mY)Zb4=yMU4(8C9W1H$AB3w$Hc3y zS6BU-r>(Z>x%pGkpq^O0pkI7-vnRLrF1a%=`Y*d{{=#{^NB``Xn)III(-MzM>S-@; zt*JUI_TCgrWD7(gja97cZIk?D&G0g;7^zo6DK< z^j7kGHV=robYi){Bgu6(t{T70c_&SMyYO$hwrD_T^~$t}vOMnh;Va7A7|z~dXH*jb zg>D;5B4`DU2{$K`qrrp#6-7Z7Cr5)W>DT*q&fk82_uB3~JCCu7?3wg^S^NC&+Uv`e zC#!f z&GM%YbiMxcJLyqkiZkX=PI|j??+)9QlLB@Iy>^vc7QqftQr95-5czyWW zYkz*%nKLemvY+4Rs-*w_>3*#6tANRd!#Azoh3syPiS^Db@pZm8`J}H*e-@1pOD zh%o&y-JdBk;WX>jze_<~X2utevv`<6YYuYn-rD-<+s^vB_2KHPWlxr0`tdvNZ;<|K zQ5Ku1&0q7bZ~Ff?YJb*|`B#7K{q?kN`=2{EZhfu#T3YqqdAICZ@3#Nz>$a~Ey!w0h zv7+U#!(U&0t$+2T{?{V^7&Y15Wt&z<-EUg8r}h7@eZOZt3ArnL{>O{?;kDXUnsF=o zq$hcA&DxpgAGR*_(E9m1^*n#SjoAMxOzU@;*8hmr*Vnzi9-4hQ_LoZh>d@~Md*e&{ zkA&K<3)X)1{OhSL`=d|4da`e??cIMnW7q9I{eOqw>Zc#eWwcjMz8Sk_U+4YR(_Lb| zXUF+$7k%<%&2-g&U7zn~2h~=cTDLWB&i?w{vSz0569sBB1*;CKZ{jtn^%u^*{=N46 z`?;011rzVb?TlY77<%k?uZ-Ou*DaDv3>PM6d6&mY_3}51M`upWzkBEM>DKIkx8HAF zpRd09zQ;!IS5Llet=sfhSnFdH=S2RtiS>4WHXA0d{+sky{(r`F%dj0^SL%PQ{t}aa z_1Tw9|5r!0|N46O_lx6^_U_YD->ts-^!nDKc(PP=yd#k=yJ&o(Szb1B#Nc{gvPd;B+)O7gIw5|8;vrgJ_z32_t7VaJT zD$)D3cxh49)yePwe@UOmp{c;Y!1&B@7EiMlYv~K`xn47DUQgMsdi7-XMX|lNwpMMu zrCXjPzE)^cP5qaxr%f+B-CK2K-kR0ntN%uS&E8tHwt7*RPVt>-ua;)IX@3v>^|kWU zed~*-GN%7hW_^8seZc$stCRPKuZ~^q@=<dDwuK%~DZsX}iVtUut6up-H{WW_=*N)kdm#V%-ef_fb z))v>3udBYqdcT+&wJx-N-J-uq`-4=wZi_#^ZmBr$>+A6Bm9<&=SD(z=b8SgNlEgn> z_1*i_GTi*5{#M0D++P#8Z)17f&hRzyt53eZZ~ALVWXk@?y{SL9$6uZPcYTW8)W$Xxjg}t->jHwZ0yk_xkGli&5TjvoA`9$8{x^bq3w7QY|;1(<^QX% zyqL5;LiOvXz13e|?=SscbZhh1?5(f9Cao9Q`s&Bhzmd0&s;@6wy7AQPy5(M9-*?Ww z7%}bLf0tLM-&P-zzrJK^{q-d)i*~>NdMaw`N2BTY?{BTYzUm2RF;MlQ(05l4eO(iD z^w*W?s7Oq@8FYkG zVOh7{vrm&14u-$x5w<^G^XqP0et7s=-`5-JIxkggZQt+u?&9vwfYzh4xg{5jnG`TARS z`=Z}}aOG9B__y?X^}oG$f9<)(bDadK)i#_51y;g4+dmfhPaoUtPz*!1&y87H@MFX!O&vZezUB z_lmyk?ys-DzSNCKU!~Pnx2UrI?w`*eMPIMX%MUpl6S_KlP1V=rty7EsZLIMLUmLRH z>HeTMwc4k%{(i6VFL$|mea*T@dab%ud%2G0Ut9HLYUG_i_g7zE>+;()$fIfJU)gup zR$p7KTC_0!_4M+ypX=7Eua;jE`&awA;IAvL$9_GHTPMH1ZbSL=f1Z)sj(_#*vYxg& zvTWD)s7b57zPcKeSw-ze&AYt;*~6)@}M*{k8gP_12ep)$gj;tCG*WuZsG50pjdVMXl^g(-6)Zf=C z;_K_y@YeoYulp}s_3qjFO-py??a@Cwg-^c2INnL`RQq$K<+^t7cOS`ref4B+T=w6j z>Gx)GM%D^(>UBQ-uYY~DTh*)jsQDKoPX7Go^z_sH+SQ?}L)W`h9?_{&U4Qbq)<28p zGEU3ltEIKy<2(}U`LFLRU9VbP_#mrav3r-)g%lsTskurJSw>Q5uuAOnSxl-D=g^NY4R>{bB)_;y)6SkL0y}fC= zxaha^MYoI)2AhSpQ9>(E>^}J5iut}}Kj(>W6gu9xl!$-Z+m za6!Y3Nf%S2zjPQbu$&+@Vco9IDd_Rz2Ex0|)STerVmzH8}evmf2Pta)lR3sR($N)8n;DqNYM zBErcDxOj_}vJom#+mA4z;cSvdQA9)ni)&zXHwA9+h+ap|EIl)y#K9}v-)nUG3%AD zkM~Ii?Ej!CS2APq#~sE3b57j12-&o)ALmnvJO?VL^-zXsWVBQNqJZM5ra; z)YX(fzeB^Mv>v@SHJUy>JZRl6neG*@KW?{E_utJ{FZFoWyQzqUE&H1}( z=e2}~hj*+sWiRw|J^ias^HubtV11?+YjeK)HSCeEOb?$Jqpa(p`FlZptGT+Q$2x{N zixgLSmohgn>WEBn5#{7`df2ccz(<3Nx9OpiK!}nNXG_yUgU&m}*M8pbzrJf#L?N3Yo)R?i}3r&foiY>Gyf}Kj(1laOIu1YW1^OL393#{$AF>=Xj~~)Nzq2 z>s;Fl_LF)y&XfLCqIUk?x#+nbcazPsSS&O%qhfQnS)4rF&t&oGoMPtgri910{G$)Q zSrIUAGhax}6rE@6JHCBVnsqbPW%8sM>?Lbi0xc(NmZUmmn=m&pYKTnHP~vK7I@qva zhK86_E8D|>86H|9934%G0xNv9ggNF~?)mod=kKejp{rgVbBzi)STu3LyL;yA_~wa! z_CNPCZl(Gq>xr{Z^=$H(;xr9Z5j@@1a?qrYxvGG1F=y?y!r{p*FrH(tk1 zy>UAD+SlaP-FnxOUv{xfW_qEqe9`BSy}uT=X{s~KzIi^JcFCP^K>3$}08 z&x|`^p)d2@+TaOq%kxaPKTJbDk_{UZHWOh zTGRv$r)t&BE#0H^YIX13C%aa9?akBM#b@VMXZo^GZOi|gg^w?Ndp$X$%|9l4{$HDX z^97xIkKcU$N8uG~RQ>bJ#r7fMEDP_hbBIZEKjC|Q>WRv2vDc?G&y^^e9w8huT{iC0 z%x?ZmoIky`?hZJi+RQ0``ETdR^(S9G%Z&=!nRBy#e#x2wuUi=_LylaTxAyHr)@gGV zZPXJky1@=wW}qd)$;z}a;lP9(5usL=g$*kLS_C;+og4!?e3V2vrW|@2TKe-(}v6E zUx6tbxZgB)hrVd$KXkh$hC!g(vwB1P;=L`ee|G;h>eicH7dGSK6$XAdqwZWt`M!g8Ee-TZ?5Uc-oqiJdBy485g7&s z#&3?Z_?Wv`OEY}SV?u+I>^s+ae_eO=k!{@judBYUzxqE+dhOL#t3`XG)RujHclGL} zqra|p#b5m$?oze!qv)$w`@6!|hv{F}=zbR;HT`P)bxzxuh4**iz58o|@~ppE zD)Zz1zdCSl@7>s6TVlWe+IVNT$&KE?t*>6+`kSTn`ufs&dsE{!i|n5L_1%-5wtn+o zUq7O4AHMPQ*Q)QQe|=fB^W9%R&b@xCzE01&zuNQl)I0Nb`o6lK_4xIw@BOC#^=Ek> zc|KjiL8R*I;jgd0u6(sWGJNexy;k9vz5llUT)Xve!T#{+*Oxp}`MQ4EI`6;!q2Jd1 zO?qeb`pU}cHKAF5%**eDuL~`Gx;Q<%{MF@1%jJTv?|xhJWA2{(?5S0I;@9lAt$n^W z{+0LUtYuF=2W|Fn6eg0b|7*r*8 z_`&vGN#_l{rnx)AC#_k@HP7GF+diXby=&jJC!ZpeyDtCC&Hc0erqJ`RD{GS`cTB&y zVAA{R`&n8QyG?w)sxJzE5v}^cDK~7{rdKi77wxUuzx3MLsP$n-d>{05b)T}n7W7MJ z_vEYnSAT!qA6d5d&i1&i;bEPuQ7VCV!nD_2nDEvV{_j;Q=3SWgH15~?|NDRc-2MOm z|G(M$|Nl+;|2F^E{=fg%UyR>cer$Ew*0|2=XK!71tz8@YX#cETcH3vyZ`snke`(jY zsjoB3>qFN@tlJyCHhcZnbr)mR1@*71R;k6WyezWm=8DyK|CeaL4PFZxcKyG)_4Ot3 z|1nO-s$PH1J1BRxVAjgLaa!-M1(oczzpyBM|JBvk@2?MEUvu~5>)@&_({`4BzPdi_ z`pH_|T5av6<=NM)|Evss^0n&!)vK=#{<{9Uy?(v-_w2jtR$brSz4i6){nxyh_T{C% zUh=p4(G19rtYSx@-HxR!@8XN^fsfe%#OX*H>Sic(3zn(W$-91iIH<+Z{F8 zHvZ}UxW7fyuN)2k^~yi&-Madyb;?`c>F>Y#>Sy)pdB4}YzRdIIyt%5Xm|x%RrK^6| zr_+VgqA%R=dwo5q>Xi2D>v~uByT13*y3nJ2@oC+zX`lYq3s0|GYx{4n?$f{j_7?4r zSa-^2rujnKTgvm-hOaS=t$J1Ol9~5cWP71`+)nQ;-S@p$nohd5?2E^%uU&s5`gYwF zZTea@%|Gnv@=RUN=j+?2JN|oh_5Nh_C*R&*<8Z&aCj4~qeeabvEgoMs$P$EsGxf#CWu z&{pr)yY^k*AGSX3ZS|LUJMCS&xA-URTetK5*3{S^DvPc~+P(kubXmLp?)7m;YgdQ- zSmat5y+2I5bA52DzF^JOS5KpM{Ms6|_1E|88Gozyy}r6n{blv-^>Oc?c5nR}b>+J0 zYR|h|uS#EkEyz>en!WpL^4FyLu-Vy<)?Ysws>YFh_3Zb$t+$S<{#)n$>T2oaFKbo@ zZ9iWVU!FbxYu0YTyWiHmEt>S|_us78m4CChZiw4zzP4^%xYysitNCYVF5A~}VBgY> zK@+;azSK>4=e<8_((m=*Kc-gNT$z98-g@1&@1AX)GQDnVtkO~G(l98;{dN56 ztEY=AHy!?^C;QJSDlJ*O^tskT{#l#;mp^}1$~&R;Sj~k0udWvPt#P{>Tj{x}ynelR z>8tN8Mpe6vt_8>J+j_V9b70jK->d(+ufD$i`f~h>b#{&-li#V|>wU=Y+P(YJzp6vb-{x|NeG^bv?W?KE%BJV%bgzs$E{g(7<$$qulG50!7 z))sVpdg<%@OS`;ot8O;yRrS@@??1Rcsn$9hazW+#q7vnxZI;t7-Xp zcfDuoa9`)>^~iqt+N)2$_=Ydq|2{Ky-cOa?JJ;{5jd;Ja_TA*~llsC}W$rClAGI^| z)Yq=HNuVTiZGUXo>%CPw1^r9^uAUq8Ysp8=bV`eVpW+iM`cqccZ_)dRf1}FFasfbj1FMWrE-1cKE-T82Vi$ zO1D;L+1WLQlit1hAG4n&#*{18D!gt(_-r{Ac8_U0>o(SH+8?t@F#K=!^yh!8{Bptt zi(_M4ddt>clwZGY_Se;4-y zUAwnt=B=?PnZ0Sp-?IwsH|zG#zRw(4QrrfJuLsy2$hIOqT3eNmveaQTa673a3zdMc&m z^6C5C*!UM~)fd^n*s-W!{zup+PB>(#E)$iv0+O)RlZIoZ=`)ljtC#|%by6S)2 zueddq@7l}DZNKddc$aphyyBha>WdQl!{3)*{kK)MwCGBg%R0@tr2j|i*CvZ!y7lE= z)gPCHr(aV`HpUBVoa=SDYWLUIUr+t5itg+VU+8`Q+uGodCjRSxHOl2j?9yKntX{vi zy!`I3-MhcO+WPL=(*0MvMX#?e1Kq#+?&_1>&wtFn+O_rLFSSL**|S!?^Yb)HD&0N( z>Vb8;kJnB5nQ}GyYu2uPPrk&ydiP{&_SSdTISb}i<%gG-o1QOy9pr!bzgqnNmYP{- zlJZ?%?k!qVzV%e>$~?VS?_Qq^wL6vl$ZpxY-_t{1?6%FhGv!l!p~$D0dz(5VEqAhV z{XPCZylRonc9+lp-|d;jkrS7??_!iy#Qd zbB+0~>wR(FR@2{yj{NvP=~HF>qIuJ=YwQ1CY;Ln}XWe=g1_s7ejv_QuV$ghp} zy3zMH`s=&dU-O^t+JE)es#ojw{g3)|_1yno@soe;iVt7E`)|_z@H$SD()0(D!y>C! z|J$!MU-tLbch|H;udl9KGi%n{h*uMees6t$oqPA!gmvCw*5NOvzTSFh!?N{XS61$N zwl!n-+xUI&q}Bxg+Zz7s>aG1c4om-Lsb78d`nuMC)%dS9<#RuQ7V`*yid|dc{yL8% zWoy;`(uzLieOKE)#_Zczx9?r_de>v_G4+c~Z~az3|GI3W@2is!Chc8XwQ5~<)cUZ$ z9-Qm`#(&OvrF1>^qwPtT`zt4{-@W5}#FDS;uW27m*?P+E{ngXEzE1tMdDo$P*H_h- zt^yr9^*j8lOGUp^>YeMql!gC3(ylX(3|+nP+?y7gt9$&{)~%^q`!`Yc*SF`-c7LtE z+WAiV+S09+deN%)!++n3T)(5~|9Sz<%J4$+F|jZ?ms zq;`GTmoV$DuX%Sif4|pi{^ISeFWWYhi@thZ(EKmE`-_KK{J)md_fPM4%joexx#<6g z-l{9}x-Yp3)~%1*nfhV*&%aeillN|Y2^tPQ;y6o?#f!D5=+D-Qj;V1QcfVV=V{7-{ z8rA#XUx#Kr^;;j~zyJEr`oH4)Bld2cGC!`f(l*NbsQ$vR{clqzKVLuT>gliR9@!R6 zdM2`|YN!5c>&cTv_Z00fy1st(HO`$WJ6UzYS9{m5p1USq{nGBI8}F>Vb8l_k)+BS) zYu)Ty1jRA>4$4Q{af`l;X;}A`uL__ z+h50i|Fz}bn(#HdCtnMF9lvJvwe`E2MDwp~jek}3w`xQG*3-)t-gznb`udBRo-ek( z+Vw5;)vUjfF>`AiX+p;9W{yF{;ES6Q^ZuEjLIsOsoF|GmH0yLK60Uo}hgi_elzIn(xv?|%30 ztCP_rtzGdKwojJt{91B7ZoTBI&oTFUgC-yUr+z*@=X;{+Yx5aSJ!}0Wmj30>=XmyY z{ne#AZT%)yef@P*@OAuZ@9b&XyVup0uJ?Vues$)*qE*)E{lC^G*$ROx#;uOC1Y5E| zxo-OFt7m_G_uH8A=l??43;nM8_4l*>|G%|;`>pS_>uWXNUyJ|#ZR^+Tr@A%UJof(D z{C?lFuhY-0+x>G_YT>fjxDF)mE;kmoe+(cJDX6|LbVn+W1>vW9F^z zdVO{M_5E?{!xnP?+S?Hxyx#R|OxIQUJJa^w`X==LZ@T0!h4NEs^S|bI{9Kj4Yxm+Q zU#l)P{;l5n>goE(y0v$GzwO(-I&PQ#*F(p3e=XJjzw%&T`1jXU`~HWmUN=p9|26Kf zFZ1`U`yJ_N_4Ud6UD5lm?T^?WHv7uSxV>?aH-7(3552!ORB6xDb>XFuaT+@%_Fh{%V&d{p(!)|6;1n*4NjMT(T3`xqGc()~~p? z@y{k-UHoV6t)ErRze*Y&jK%0zM9)mj$=SIK@9*I#d5 zzu49EyJW%q^`51#FU6#qMp>8NeI0JU$oT8(uLaRPJJ%)sf35mGZqwf^fURZBax4YXtYwm@g zPY*BgcA9&kV2}6hHQZjyugy3A^F-0}XCPg-%ckFwv{X8*a;ae`Tk7p;n{cQBH zkNxJgO}k|GUF@ss*VmprdzE>B`ib(Y^LHD|H-?+5m3^;Ur(8edSm}*fPHz_c%3FVH zVZzk(pG=NndsnX2Jk)h6^HV_ULJMy7V3RC9(4nGQB3vy^3mpywSam{R0vTjd$jrXufOJ@qFNTuwBkb7ty?h7V)Jz8Cud%M*lJ;&7k#8M>7sPhT;s$S zjF08}jS{Y&J@ifG@Bfe^@r=K(-+yAc>RsQ8*OO-n1+87b{!WW;4eQ#aoZGmjT#f8H zxbK{x+>r;XLqC1qp}9}yZ{+Ns%(gY(x4#$pROxoU{&i&PHT|dy)AKhbo$On&IB$21 zg#77`w^OF^y*zV1bn7bluFtpDh$nO&`Y>}TkE-CLik}gn14Jiu330KpG$k&G(GcTe zb!kjka7L@pdh4C<-+#?p8Zv3iuBkgK_4R9e(vXgzih?C%zv8e>kE!VK91M_^l6n3i{G7@F}F;<^6_61|G4O{ zoz*2(&*y&m#}u~nPbqx1cTVQ%p6K0MxKC6k3kt@rF|qvo;(>Ulz=GfD<_!G(QXlut z&omO+qGqPe{!+?4vU0)JrC(;OvM%#s37=Kxdhh_FhDe8&5I8nAOwqC6W_3C^VL^x= zXxe|Fp+=9EI>+2&d3kkn{fY$R`@WvbRD1fq@6V-0KVw#eOAHtPgk`rKVLKF zqT-|rwGp4CZU1Y!dbiJ*5x0P|f2ZSJ`Dc$EgqF46t3J3>dwy?b?2ee227x(x=C5WI zY;qD@sI;)r@}3ZT2+ZZ$oNR|2Rzw&HakjEK1fPR}J<;Ai zzpZ&uzRun=>+al&Ej`{6WqjbNeD?mB`&T1XuB(23m=-#F>BeIVgGAL2?6p!YuUh|$ zf7fTGH$PbZ73Az)+!H9eVNygqOPZ3{%FM1-y`b+F9M2mp7#J9P9A^o!bg>qFjQ#rk z!~Fk$W^_zfHGlPYZOQknyJ8zl6wCHrF7K4^)^nUf<>dN+8uiEqf zb*~Pyj&Ci~n0EK%t^Kcm|La@%^y~YpzgOGV%U@SmztgtpUAykb_}6!)-sMo)x#NGB ze8j4itopB~#!Oq&yQAB3`swz0KhwUH+bLGdF45hmmLIeFnt8Uki)Yc@yD^i$>V~~? z@U*(pygz>Rq^hGI?N_fi%?y~Au`*xnZN%H8A2Vy?zZUM^e|_oh{dS#i|Cep8TmS2O zX;IY`-@Av5ey#O=EfT&sB+mX_--o|m=Sxp-om#asZqxd>mcJ2iWA^J?D2Pp89a_3- z_xr7{MfR^gU9jCGe$mrk%8PO%{yfcpm&vLtwe)M%uAOu#CX^HR@y`h{)Q ztGjki?_BE|pE0#ucYR%p&Fhz(y1(`)zSdTsetq)g|6hyuJ=WW8@Z{k3M=sXP!Qofy z+P~hd`g-eo{bI-Py4EY<6BNUz|B}A`>ier7>%TVbt$OG0n)Stv$uedC)praGjQx(Y zgjztSU+N3Kjy?VA^RMf#{{CCC`r7`ex4w5zzYeOp4mxH&>($nkwb@&*zn(I0tN!Zm z_g~F1+v>dOQMl~o?e(kW*S_wIdYj+(_t*98__gug@oU3Ze^=kNZd2Iauyxl~FI{(S zqu*=s)qmrDg}sZ@znXnFE8bZ`?Du}>h>fvTyPy4v+8yivI`q}2%Pzn6d|Tgj{nh)c z(^nl;UlY65|8<`)=U=JS7hm5^nxwb4>c#J!dP~%|p7P%M(X{+^`0LeQU+PwW&HB3T z>eNeL?yaj^TeoTV?)4G&VVl zY}`htdAs(^v`y9fuqQTeZ`jsf+e<^U){DNrzbH}s#a><0s$GAxzh>qM?S7Q3C#`>d z&vf0{XS+|{J^gyhydCa&dq3K*^?a?p|6-ih#nt=%N9}%R^nR+`>g%9gs}t?kyx;fa z>*){Sd%jj}oApZhdhF@W)2~_Xb)4R*XZdCBFKz$VLGh*ai$d>AmtCcuyuIDfw*LLK zCHvzZYeh|BU|{TUoF&Y1inaFiu|M~}f4b5ClC#K9{r~#^@Bd%zzo!59_y7ODqyA=p z37_`+#k^_$E-TH${-(y9y1y>o^#AMZt+Tf#?5$GzrT^@2we0Oh)-Ps#t-ro>Yg7FC z@V^mni&jVdt=`e4ls)_6oVSuszUcm*RH^-5^zqVX9bc!$RxODSpMQPH7sac4pI%pw z&v;W4UKBHJZQRb21;=-Ehim@}eYv(~dgsy7*O#k~{(7pvZjqjc{*!lqm)3Xwt@4gr zJ^yOcuC3i)uS}1s*YWyWrSvFIedkW4*xDHXdHYV;zHwh8`X{zfvi{%S>S-Q3{zr8z zSshj$sV#C{wD{MZ*;oJnoEGGLH|nY29_8$7+S5A2K81zEf4#osYr2(j_~Kt;PhN_y zo>ac;=&yryol|0KkK75pyXZ_*)jR*%kh{{6Y3s7xWmjE!X}`F%>TCS#>o3;c+W!3a z+hw;E<9x$nHTB}%-dzv;m%aJyGntI(+@~4;vfT5I>)H26S7c?}a+cTq@2~ED@#>3r z(*Ell_uaKzqs+_J7BDa{UUQr!-13UGEdJM%E$Zw0e*XXAzW%_1#hLxBmXR{<=P>@f@{p>eNHFrk_*3{!4v7JN}n>y=h;4-Hy=j z-`3xmseS!*p!ny|*nfM!{@Pmgux^d$`u5T{^J`uh$C}uUT?>?#HVAuf?;c%-b5bzHFm+`Rj0f+qS@KxA=}pR)eL8Q>cJnpz>*80>|F`aE zUEHgG`j5jG%BzQ@>;KITuf1QsY`wXP{(04+UthL!9gdkXZ^8fBS0B~fz8n@+@;~b2 z->n~O7hesoUC1{j_}t$opFED=+8I~%r|wtP?k%c)XL7e3JO4}j-`=RTbz22r+$;~j z8(XEkTjSc-TPCqb?7k{2n6dvy#)Adr+*&FZ=QeXlEy=&&`s%U%&b`(5zr?JV_v!2P zg|jyO-nV6&{0W7Ly8j)mR<94&y1Gm2Z)x3n^)>NrMREIz-M9YRDtP+oy71RQ4xqEn zG=#WVni>N(Own=SYIAlBSm2>2A`EKo&T!FE^giiWq+|pm~?|D61f8|v>)vs@5``_t@Z`s%XG}h+KLH>^L#q(c%UcF{P z$ETOGAAb>WmfO0P@$sCk7F(_Mt$exQBs)jfcMZ{M^+`VsE^yt+Wi{;AK?w{*= zTt9zZzN+)0b4BJRzqud(+{T-6aMw%Ok5987RkgQR+eq>?FW!CqYnB!J#q7BE%(LHy zE_i%Du-^M>Oj40k1@nd7l9!qya^CrKe^=U6{?X^WynNr25Y9UDtXZd<7jgg3zq^V2yW{Cyw`DBf zi)^m9m18~~Sy}kHc+ED?sZ+|L%5vR%S+oED|7F*9yqtf%#MgMeNhi-e-+yH9*>^|X zZBokg?=Jdq&NlM>$$5u6K0ioqe$V;f`sS?{EDm^Y&7Po!C7%$B24~_^4~h1qqms%+1}u{QC-8w>?jg*{%5vCYqll3@7faJa zM-3k>VXlrti-mVST+e@g%ll~Wk54||ee88#@@#g?R13a+-x+_Mf1db1)^0zih1~D2 zsxxa=$gkP;W@2AoP0SvtuD{HmHKq$2zIM%v53EnIJHBhpiGaUJzwWO1yf#z6?Qj3} ztwlci9fc2N1j4=V9qo+e)7!W`?I_2DkN5unKNR|H`%DY#?o)ivtEW0o(I}p`Z{i)c z7sp%*0*c%;99>dosw`8q?Dko8h5>xVt(KAy7t=vUg8&^>UN)!3fC(WULZHr(p+JC^ zILFNQOS9|k*YDGxYyD%-=X*O>U%QiTx_WA9mCWij&) z?DW%{uC3>~-+bm5xV!pq+M3PRPVeK}&-q5;?#JVCHXZLgk60zGU)XM)dG(3hznao( zt+)CNi(~l2cCJ<`+;Q{e$<5y)V&;Z7tH<9BHgV?h3Y@pXaMQ&5>CF1|TLKKjCou%- z?Km88aIa>#PSc6AuCbR`!@=t_&X|DiGHKYb#7tC(wP~Tlh5!{2t`?`HKna)gQm<#; z-}`ZE==0dGb+unZWAfWx76w|#IEVcB>-K(+dfJ~#|21suL*+v+6;1czpK*EpV^^`8 z7t8YM&K_`J(TJ7Xc|N{x$);r|#9vMIX}@(!UYh@d(T4|HS`OVhviEn<>Vx+TKg||6 z@})O!ZNxbfp}?|U!^N>+g^yGEx%D!? z_HEpC?On{icTU;mT4SQ_K_aI`>HWhpkYz zxEf)d5cP1Dz^0=8k!)|fPt-bqs?2?kvqV@yBmDX5>qAdo+8?uTgJ-s=@zQtK^tb93 zul9WHQMc4w26$wd&8Sh~2vvp6(LZ`B!kg%X-&Sj^4G}cSB!m$?Wufb@|su-PPf1g3iBN=l%NX zYw_3D>(|%qtlQi5dwpc=kzZf-tyTTD_SfmHdDj-bQ`g&@bp31g{q^3jWA9#HEqpiT zx}I{@wb<2PUr*nfwd?C4t4D@1LEmp}e|4#9_t*8;G|s!d%#}ItH|cND>Sw_-lavq?5kEE|8;7w)Zztgd&PnZjbki}qO|L}N+w^l68ZYF^Yb5{eS4>Wk5j+C z>aNZIj0tW@k;j{+>^2elo20aV{>_A``o9F9UbJ7gKRpSkwG^DFPk7i0E^^=>`=(u?bE(ehgFd!No<=X)5i zWOrDW&_VZ{V%hgkD%NKGdh~08dw6+KOzg?pD+~;*XB=mVv}&=|Ui!J}nfm>|A31#M zcK&}c>ucA#wNZbo_Ew+X`MT>v#Koe?7q!;?jg2{d^=$0-;`&9q@87?-Jwhw=n)R1? zJ6}Eh;`MWNXxH8>|Chh^e%(7sGIPKF{*-?w^*rb6&OGVoS)tY#u3%8PALV z`fg|S+Hn2Q!&9r)?Tvp~w0dRr`luiCelPSdTN}6e>HpvQfA3a3RlUBpZmZM&t6Hk- z%hu|z_S|(;ef8h?E>WdVn$1s*Cw_~rTKYv_>qxlayY=Vxt9;FupZDx*)~DGW5tHBf zzrT3qev6Nf?8?*gE*R#l&N^~Di$7v#XM|nSnfudPcg1Dj6}}4UQ@!5xHT!G!*ZkOb z*SGH7_cvy}jL(mnv?6cYX%p*?1#LRJzb$9IoX*1~e>P6Pz9VVRF85j11r2YT>eI@9&%nm%6@@#5G2E`_qH zDd`MXw=YorS$ZvgeNJ=9xrO5YK6og#$_U6Uf8x4ADXVhA{{0W8tliIY_0IkM>WWJJ zUwI}nw?*+bhn>4Q|G!iGuCG@=oy?e8*8l1*SEgCpt5vI3EqXE8RONWj)ngx8_wB!+ zs2VbVW%WO&SAAbTvX(XUt~qZyd?o_z%jp*T1ON-Bx>Mj`Y%rx0p+RZ!zD`(st_b!Rfza{JxuN-oO83 zXUErdzo#_+ek;GPxrwvm>DJ$cB5~)7pPl;j_^;^o*PzDsySLSkl+0G&>`>_Gbr)Z# z`ZDOt{l1>*6IXY~|NedYb)CYxXwi8O*IA`BURM1u)8M!06ie|x!n_xEN5xKmeXdbp z;f(&D({FwKvt8-$>qJZAz)za$-SRr>PRDsIu9l3JD@d!#lp zF7T~8rI6slrvI|&Rrte)mY-9cxA%(At>@hQ>u*X#nB2S-{7e1z7ghZg-*H;eHUG-G zs%KaJt-t-`mCxE3SJ!uo{^#r8e{)4SukuF|K zj{oQsxUl~HTb`Kzs~Hy>dM4{VJW5n!9nk_D@ew z)4iJYVd-vbb%U7Il6|XpPwg;!*;FjqX;aAV`ZhD{+h=)ciC=GD{+$tKuO)T8cB9>q z_bf>fdtdp@-PE~Q{(JnC)#j!aIqDC6i*(EHmzKqU4dyVlKil85{-yYB^_NYPx-F-F zempz7_|r>yUG>c(-!AXVJ*oY`T;)O zM+KJ)eeHOBXxgf)E;k#Gvh7)@5qk6TuP^S$tDAp)S^Zt`eEi8*f3Nw>)z3ZsZUF-W z`xeJpqO4x5wf|ao{@Q1}{ngXL{eOb4pTD*JWXE}5lX)Ap?!S(`zCQK;!>#XILVsB% zUzb~Fx9@MTW&Q7P>u~G8@jtcJ#dn3Rdb0I>Qnv8N+S+&dYrGA&oz+qO7IFLRlKN;_ z`=xurYu*hm)PaoAh7Fe;s4E({z<7d_KJH=EdOnGP>$0s zPq=G~9mhju|CsYpQ3=7#UHvY5LeSlXJwlOTYh{1i z?@O7p;g?J{VTm`chmnb%r`w<`a1M=?4w=J)>WElIaU1I z6WMt7a3<>UsziH>*@7srR(*pWmWENjazGasZ&$1k6*}XvYB4t z@iyc1GTGc}ibW@4eh()>;39SIjV-CFpc>mE)pVfsao!w$`nw+Z!{r zWc~ZE>qQpT#(#;h=TNb}#9vrGRO})OQNm^(6)1~WH1uA5-2v3sJb8AaF z_4LdWmz=7tf6Q%5%MM*nSz&(6`Sc$7iR@28(&9eb-;JMs%O?EQrK&yG1O9EdVwtr< z=~MnYF-NtlA@SEWlC28`jvXlV=B$2IGmpDYqxGzc5cmJ96<1d=9&+CL)5dfRz=CQpEdWml;iW-}`p?^Ix&>X6ot8`tkPusic4z26Go~TKcKx z^fQHB61Mz`@16Zc+jOMdReDU1ZS49KcE{+fp6PFvr8zB?^>-FVIK8~fK2PMs@6RW+ zWbSVG|1lz^RsX7jflk@;=d0bj{Nw%>&9^`8_IA7e(i|2v<&)tZV|_-t-q_^7(|&$DV4C;2$*ijG+lFQqZ?P~8(U4K<7bi_9JWtY zbm0-IGVzRA|MD`+QkHuaC$>s$Y7KjTaPsMQH^uh8@Cxvs)3&H%LHPvhE1QeTWGz3K zosRnNP&-}k-m!_Scg^?zj?A-lR&%KMKmAln>imr%+O3gcS_=LgkDZ9_Kf+;OJKyr$zK<5R z_FqqBN7nfMy88a^y^C%EGgjGdZ@ZXRcjQ3QJl;!(qYvNwQmDVvKk>}MhGPY5r<_^! zKcQ#6Lx;A(;e?JDzB|vaYZN%MnLd%*lfQ4Ta^(51|09*Qy}rKd!*kVMt$$9!d52S9 z_Vc{`qf|TRdFk6HJi=3Z@=xuZEI;Xl07qSatbaV8($4F$@Bi)-t(vp?d3;LV@3#?q z#CTpmn|^=w-4jdAH$K`LRXi!qfNPqbs7B8Msjj5Tuky1NRNFtfI$M&(Om}ic{Pl0o z7M=QjW5=S;hRHdJY)6t^7gX(t7M{kldh2qx4(=Y=)A^@G+#g-@e)?(aoeA~+ABzrt zxqn4y(NR60njfKZ0SQ4q>*u5{InH!xLEw6eHXQwMDw(q2;ss?&Ph_ zHLtYqe0z|?+5X|VqID*n4FB)7YSx{J;z@DT_}BN#iuI9&%NqX2Nn!hpDtc?rFTM27 z@y1c>xzjsh`GS}V`iz_wOcj66R`hVg(}oukS(Soihd-9oCo;zDvuifqA@%I&`SlMA z6IXunn3sQm=WJf-uiyP{oLZ94-@UQ!Q8{$9WcT8q-uG7h;+~y#!#*J=Wb4tmH=mz> zj?u7tT{F*T-xHAo1#0;swNfGP4z{*^wMf{_v@*Tz>G`kUSKT`K_1?qF8w|`3DnGh; z_ny8$_MQLVBwsXYKYjVPM(yeA#M*ba6}xP#qB7(Tl}G;la5LNRjb%cm!J#b&)x>=M z`RD4E{8&G;?folfEyv3zzi)oM(#XJd*1V|gEJqICVZI`xc-0c;~nJ`D0JF&b0lzN^@P{uXs)76_Is^#J0RRYO}_C zl}b6MeeZ%dm!9jmYD zt>4SCx;6Asb+PdEwvLOFSFQha<;wdjcMX~6vs6~dZ_Ts(C3WR*=>9N^$zl9H;vW@O2xa z$^Sav#@^bVeOLBEmwx#+fh()lyPWclf84qK)z#Xo*Z)j4U!C?g^7tadufJ4QUzqnd z%0F&>-0rx)sc~z6{q2b_Tif@ec72lgPImpH$BmRfrhmB>GXGP`rN83$h4!AmsQt-5 zzWdke*`Zky`#>u^zMj6YYHQZ&Ey_!8hn;>}`i^r}(&7qp_2++Y3E#aYusU>o-OBx8 zZ&TIRm+kF}WYvw@_|bNC^0c3}|9&4TeciI_>EEc!f7i&b4(GTJI`I3b{`%1MeLp{nqt3O;kMCIyYBjHoTsGztT?pb$UOSvGAuvt4&hdmIhDd%=@~!!z}yl^8264 zHDx9wY4h=HH~SnsZO-EW(ewW&pAOkDwK>CJ&y$riz8sQUHK(QaM?+kd=(QI=|{R_xsMM&B^dN`Ec?>Bjd;u#t&Xc zaV`D6dwTCO`|eM*r@S^5i8gaiO*v4rQDpOse_ME_EZoYno%wrELy ziq4i7@An=OOOshejEod?$|qZ#=(yUj?wKD? z&$gXwe3mOXuKl`i=GyF6CZ$c6o~Q*jn_BGV{cw2d>Z{Y=G$mep^DQ>z#mn~f>hRB*_cypnsY!jBP=QQj8K?3?r{4NjW%sy}`>NL$U5(l= z)*tiBbjv-*DKc^Xo%MXl#)V;at8TFvsMs&gQBU5}$WZcb_eE9h-LC8Y^H%&>YL$F% z-3BX_O6hC=4i|1*pLp*64P~Fx+hw2Zd|tPQtp90K`l+&R)6_A)832{ygF47qUn6zqtRg z?ps#tc--{`&s4^R4ZpXsK1xjsv9JC7)84+|p^`+Fh3lskzwaHIBmGyL_ij~LWNh73 z`5T5gn@;|IyDdsWEng+P{ZjbTlb=Dq z(|?WgZ)x0);?HN^btpC+*1fsu=Hk8KU8lmg^!zOQo3SQqkN&R2)Wvf$xULFo37>o@ zcVFHtj^l33xm9tGb&oxHzxUFrYtI+>x$pV^aK&s9^YY5KQ_q@l_vSwoW_?*+J8Qxn z=Wi-KHAe(q<<{5indi@>vVY(2%L`?8XHULx?ftH;UEkA=r8<-vtyFw6%YMUsIS%2j zkIzHmH~8}D%rDz!&B}VprLty?eeJratwp=_|HkXD`}ekdca_+2u`u0|sp5CdR$t#U zPkzNl=hNYA+v1nr-#g{u9OqfT-S1a(f15tL@%z~`OKh%8zAF0Z$;V$EYs!=_ehtmO z#M-2`Y}#AXW!E3{otp4iWm^2b{TqCu!fP-8<23%rmlNJnzVXRF-Y=6sy*{M!q+$A7 z;R|MaJu+8*dNKdjzwK4ktF%9yJ1>59UXIjP)96K-(Rakk3vDhR*wP?1l|ezz?Y}|I z^?jlYA47!it^504_W!Td7ryV*o3_LGYxmcyDc2@)WJ%jC($$%AvirkRm93w@{kn88 z_pa{h%^zHE+&%m2AIFyN*fVjPLgQMdnp=Or$u6(>v*zi^j~Zqg`jZ@muYCwrD6##N z)P8*3mjgxy(`JbZO+96N$<}6GQ*PDjyZ2S5$8FudeC@T@Q+@}k)nObd2@dsyAxelBWXkS>LL_!vTOArGokAN1)$|T zOT>6soenx0X6UGLvoalWSP`Hm$l2nQBs}xDZhdU&uDy5jVy+tn_l7=J;aqs@i?8y{ zSLnV@LZUJ?dUw;d*$u~f{yi~w zKbXf`p0@huB%x2@A}fZXW#Q*RwFMlv#65R;~?M7|5@@wWd_urL;u7 zz3bc7pS!lzZ1F#kcgOP8{eOSwv7OI9)bjc7?n)az9;O?6b!Ygid4K(2`iiH0RkeCu z9uNN)ev`DP*K$rP-kMc$xc$qTt-8hwZ*1THPG?C@GS|Xu5{I6FYI zl_o0OtWHM*7KCJoa<=$sZTj!uR~o%~MeMu4V;fd#g{*!duO7WU^p%}1JO7vbLl?jF zAn#Sz-MR~hB-Pk*SE>~=8?*Hdx_c`v1R>s+#e_t5fb^Bzl6Kt`5 z{kHBUTU4h-yg9?b^5js}1I^uXC85#ktCv|ZR|W{hqPs z^*+_@dv&$-9Q@jzpFV!r|D*2b^;715|1AF6&FHM))%8>BcC}8v6#eb9NtDo%O zU0HY9KV+YOnCyGPwm5BwO&+Q`etuL!I1}s=Z!zktG=0>`S88+U;Cwx{GS&) zSFdWn@;D*pkM^wp%gr^TILc3mD=8(OQn+qYf0^^I^Q~_ZziqQ`pL%^NjkQen!PWCM zdPRo}E0*q?_qXc%KL2j+u6s|v-Cg!4e8taBM+UKr&N2GgybMkso;#)$UcKne?xyqF zvj2LDy12{RZ<;GL*6i1O6*aNwR9!T~0|o}prH-@2SV6mEqs3o`Ub^m9^7s3H?@Q}; zzu&zwIrabEAf>8}zc$9Lf49=gYVKdq8vE<(uWr1vz4X=bZ1H>k_5WU}t3O|RwUle2 z|B(p0&UH3cg{mZz$@k`&vEh$}h^;qXJ^>4-cn^iO))m(n? zGl+d+h{E+>{x#vBYJWbe@;>D@%dRX??EGq{=&x&n?mn6lb~Mi~dQnx}t9R?K{(g5Y zR?*M8$@!t&(fzzNW*kW=^9#SIuDdApsQ+*3{p|SFXKNpctzWu6T>I*)tNqu4oOs&2 z_ic&$%U$=5C(AVUZq)biS65$)m%hIKD&=p{{QqJ483|5B4jb0C9F(6ft0H%8hSsIZ z#UhW4KKu=fcfN7;Q*hbdU*EJvrxe}t51-8SOX+>}Gc_Bhdk+P@S$TF}ocp)x@4Nc$_|AsLSgHH(SNm%14>PLy_1JKV=Bd`+3_B~az`aMWXqyCh3e1Si$hBF- zl=JtO_$|F9Utd~Hm|y?9{%fV4f6V@{Mb*D|6wmA1WpJN=?}>neJFd@g*(3WP>GmOZ zt@QUb=5-b3kL_z5|K{B4f6(?$TlBU1_lp8m--Fg#KlVO&WrDn<;`id$x(7YaIC3Ah zw^WI$^ZoX#P?$UYFcXW^{#^6>8=q7>)+t?Joh!(dQe)V*XI}UhuB~xRfB*B|@;_?q zzx;625-#N@N^=+A-d+)XqN7zH`_$gR7tMxh3v3kT2Km2g`C7F%=-awie{1iD-mJ#tWz-DSf0ncJ$K zv>x;L%Y4dC*6(`3MYq$RFaDn<8MN~F-$R@GW=r_#AMfb$UfP!H|98&Z4B_hg!Nu3q zD~%p)jW6Z0uZ{gb`QEv}jX46bcg#%Pxt6xnB(9tEWkci28HdbPE?NBlAg4@N^wBtR zD~7hpS=V%O9q8&L!RHnlv;TJ!Ux0&96T^&(9Q_Z%Zrr{>pEe(n{9~ZC zX#JFW9aLez|ZvF6v=D{8$~UhR4?5G2#v$~J|xycDey-GjlCFS|I@Gp1BO_zm_jHZX* zzYz6_+3VAfCto-vU$vcdS&-5YuYc_>+m6!_+-tW>i`mDwo%4uHO>1V_HBZrV^6xeC zbAf!yt-AMpjNg3sY|S52p{-9o)m?vB z82WMTfgFyUHwSkyPd}vdr{nSCiiYo87uFq%YF^;f$8v84|a8?*F)dPkJ-|OkDT>(7qUsf_w506MknMnd`fR z$7YMgVZ|=h;|H0gZYI{W@%CxKujSKKJ+`hsp;R#C=7&4-Ph@R4cDVIL z8ELHlC$_Wxp!(TgQ;ovymV9^EfAPeI>`wl#FPV%V{@4&F{o171M(VHm&o4opJ=L#y zIlTQR9kG3_5USbT)AM)hjHfqVD$cq#^>U4#pspg*nw6>^{ySJ-bI$s8A>m?#m&75J z-@lIv=c=Fb-ah$z-PBM27k)l zYgO<0U-4hCa>2jWr5DWEHB;rTsTOlNPk->Ue(i?!@ynOj$fy?GkN@(;$CAZfW|_l} z@98Wx9z_P7zfZpYE4Oj=jC%Du^Co0?sAfgJmuf%zS4nif`qli_HL?!PKZ_dPZfI++DZQCB?eEhs zKMfq+np)QQ?kP1gIpzI~_4+GeL#^m#PdqqH*jTr{*q_S(@QQz@xa2*hV7<(((>F*P zGEAApvok;b$+vmGSVS}?M}K%M7q}sLY1>VcG;<;S?)olND zgy!GucL?3wzxBu3orXW0%nqMR{hU+zZjy3krkk4%mw#sJ7bo6@He%8&%5K%i z4_}=4@sFNQuIGv_&P=6WJcZMJwOE??gR1XZ}N#%ny#+Qg>XN#e4IZ&HXUD ziN^Ost=hj_c>3I^^YNEbi+r6o@dq4Va!eepFDcFAq)-Cx9UFxTpP=j7AVkL9hH;(V; zF7I-_H(g@&+KP+=i)yZ?{tGEdc2r~mJsuXn!q-*4eGqqjc{_U`t*_~U<1p}^g^y;fhl zwttNJQk$>)eriw2o^wHIXZCGW3=>XKeSPBazg4SC4qRq4cr9$!?EGl@cKzk%aZUQ+ zWjpKksxR7_x|*qKd+Eu^$N%k^{WCiI>4Xsdck|oNX}B*Fo@*7B{eXdiOT%%NSnDd* z`i1#5cWtJJhyOag>+9>Ur?>X3y}mzYf4KF>dF!uls`>x@`mJmJ`~P3nWtsZu%ij() z&);$H_g&u|wl z?W;(&c)f~0wK?-)>fgltzTV>16J~ua{;FNNK0N!1W%$ImQEN-@UpgAL{r%VS+VA)6 z%Nm{TzYyrKUE6nIb*&^@&9=ox?|ai$Fa9YIE-5rGPX@$qXUb*qxzjmAT zv^&kdY+L3r2TeP;_>EE%L;053?H7No*m_#@-`-nartJ(}eR|dNX;)vp>sT7&cxE5} zmt#9TC3fTm`2KGZ*(tC0vbE@cK9y}$T>{J*^qYrpJ`+vs2Vdg|T#XSUXDeLel@OyB1gt)E}l zZ@!gl_Vm*G@{<`+u6L`OLhCB^zt&xUdiU4$`n9#Xo?o-YUx$2Jm?mYxrL#-%roGPz z_8h*y><$O-9_dMVki#4(FVv*>?c1|M>F;%ym}hMWx!*e{p_p;!SB43OHX_+PUr{6YD7Cu|__?ZJ&ux*NRzwG4q{eR}o-*^4hlc~Ii-ucIMeC>L<`ghbX zG2ewVcIV`5O8s1|Eut~o&+mh|a>`fXH7C1HdH>p`xa-K_dba!SXJ6DDt2hZ`2G7UgXw$k-kqx%vgWhPX~!L|t9^b*eRVdC5<2x&B-Ui!u2p{5!i!>>tYx0T!{>+w)H#2zbSm*e^7t2#>75J^*?iz4jTW+ za7;UVKX8e2jnOO*$yXY0ULKtF!*FKh+j-aDC^Uf6JPk$YIHS5>~WnNDH<5@*14>CWTP>|F5UcGw{>lDk(e+KJCl4S1A@aUKF`u6(u z&Hi0?AHHQg*7MRr!SZl{U8B2ZKFg~1*Ztcy*(diZDBFCnSJDbu+~S++>v(_ZpZ%db z>bLK!7JbY7&7kx$&$;7&eU;jdod5o-aDl`Aggr}t-wFHvbITexxAjd$LM^5d3YF!r zEpPv1fB4%XvL|u%%YuFV^1d8D?baPJew@x1xxe~-`=+B&$`5*aQWd)TKHZ-t@n~b5 zv3ATdx5)noHT@mG{abuQ*mKt687)tGw#FGAq@y zMSmSUD7F8!)|R**_r2@IFWT$>0IwaoEtPj0^smww7>-S}{eI$vn=hK_}NY_&{fud42J+s8loUVcSL zsPjYShnGPH8j6RFEZ-=`-rKL?&?9|)^6lGeZ%x>~XP(q*hZWBIrDqDsZ8_Y&(`a^h z|K2MCou7Q(AMt10a&(FK{~wLa&;Ly_`o;d?&DZ{$z6EO*xW>JrhuO?l3VbH3TR?Yr^M_Q~ZvtABp~qI&f;t0V8T)j#WEtG-t4d!w;t zfBcvI?QM>7-=Ez}dHP4lZvGbT1*TeUJ*LjHUV6X(q4;3A-nmCz=fjTqTyojBe|=ct zfdj$L>_1xHe0|WKGym^G^L*LQ#h1Qb;e543K;CI;W&Np*b!)2sztiU|sJJ%y^_i(`cL|#|ZwmE4J z>OVZU>^A*(p-f}`oYjw6qkEpP%}qc3^@!)MqE7esmj9i0eLEbNx8d&Y|FI96Ckb9o zo8$6|^|0rw_- z1zj`y%`H1^MKTQ+PnL;qdwA4%*Plbr1E0FR?@(NEcX|4c&!ol)(gv6#l`S<5a z+#hk-Z#Rq{fBmt!AlARAVWs4S{l?y3!#sV;bPvqP5dUytD$FQ9d>pzT_~0oVN3Rrmh0JnapVdvY`=`TK(dFM^yOY@5^4@%-#zkG-P* zBp%HgyZ+W<@Q^&sOueHaLg*>Tm!WA`_En9Y~_S4_pk2(Io z=RCK$wBf|6R`2iW{o&##_d7qHsG`4zW54F%+n*Wxmrbwln-L_mw!S*s=S*Gv@5`&&uE6 z;-U29e3#qgnf7{hErPrT>(=#7UUGHi+}nHRUo2eV67YNX+jsMlO$Fj?*5|!@_v8Ie z+cVYEIbxNby?gg|cdfg#=erMW>#ikNYJcyK>uA{VewCt~nv) zxh?y@@A$s|gIl!6#gwJuZ|lEBrFpx4NwfZAm>Jele>YQ5IrP^@_N{t}Yb4{}^>Roq zvzzk8@A?z7FbVD_H`8CKRNLSCtrq1upT#KY()%O6y(+ubAI)a2Z`iYB%KYp48ZN#O zIdj6kSFBsKNkBQO<>Bt#-$kzo3p=m1Q$5IQoqA&WqAlM8O&@d|-}!5gUGQJN=60jY zE3VIe{9)hJ+9s>$oj>2$^;uMTMBRSSf1d5nk^VXL2Q8BytG3Qv}IVy`<){`7inFtF~TQdoegln|1Q9Q+uX#%ItAJYq}`4IPNBs$eZmA3=CWqjtC<``gynNYt){5TT8aH|6#u{|J2f5 z=F3=GA1GEPTSpiPNNXf0K8~^A2?#CWdwQ@;^WW5eitE|j#5d%nY^j)@(?99L_0{3l zVblIcv+Le(X4Lw>`gD1xT&swD(@OmZQy+c4&;4VC&PBocxopOBw75(}S*65erA*ib zZZ4jw5}#PV;D5Woo*jj}vlx#Qs%|KVH`!ppwNSNvvdSrgKJPAOIEI;S4* zq+bs;#3LUuZ0Hb|F_^fSdExqmM=5P`y0@B>KAgT4aKg6ulHI>`p7ra!wAY7EzOHE% z_O@t!yx8u4hn=`jUx`!C-FCa4Ls7ImbA7vDg8l#HlY?I0&R^!cCen$8D?Rk7r_)cp zhp)0UU#>TKwIPRN#>1uJQ-g1PovD6A{A)-jgG0cBuiZ)2XViEN3X48Phu|KeeHe?`;HZ=VITSngXgI*3XTZb@kL?lk(IL zlWlr@G=vy`@6cI4vumU9tBKN;Q|@_e)HYaon&oVxXAJk5;8umN2fa!nf7e{-sF~0^ znYHPypPGoxWY)%RhUpHmc_|Gnmfq(rQ#L;qIa+dCr+$C${i}}mPhJkax}Y~cJ2b@g z-`;n7ReyiV`1C1IUVqXhryJ@VGV)yss-2P{#Uj zM_yc$P}7G)Mt4j?EAF3X+1+Mgl%llcW@`H58ItY)w0=xpboZ5;!Gv>t8z*;sW8-9d zR%?}Snk;J}88q$6l1qx_L9@~yZ_{mP*c*56?xDE0O+K@p?cV44SK#=}xa8ym4Zm#* z4yj04{(d8T>gdXS!4>TnzwTF$da&a7GX2Az*-Go5PYBx*sH3;?XUgxdN6H250@9aV z3Mk*R?5>b)o8F6)4v$R=O%Cqz{>?g#Pxzy!2=8~v&zcKT7w-I~mc-4TxFX(7)hSWu zdwJLIx3cd&t1B$7H?)X*Kg>-y;ES%Uz4U zGAaBC&=IYu+?^{V`PnOcRx`tNmW_^`?BeSJ<=3yaQesQ_^7^+(1)JRU{rYdz^CC?6 z&Lrz6CM}X?ZdA2cAFzm5OQ58$!|?pe`}wc7a?gAjz3{~D7llX8KV?yJG85ScZN)3P!UV8_V^tln{X?Gc>%RgOpZ{tL^5hsUL_OnJ2a+nVoJ*C$M$)ATcQ?atDB zr}suXeR!m=`f%a5qRSC^-&1Q+HeFfxyYtB3i}__+Z(R+%lX^cuT{Pl;lI7AVvhOD@ z?hI2q{4(jk{_=0ZZtrSmy|hoV+45-fvoAklzUS{=f6G*(Ui|zlkyUnF8L68;g--00 zm{66=d{O&-^dC`y>|?qmyJl~DqP5`otO-&3uf<>8SeSnG!RplJy}!2>Tx?6STi4lU z`~Orm+vzp#;`d9m*x&DqQoVjdr9ExA|GnZt)u(^t$;9L@TO7a9ne*;WtE!tjD-tHl9aOw~?U=vm|9uDR%FL`- zT(3WFei9P$ee&TSlUPLeZ#(L=Tqn}E@sG>rk5^&Dn|L6Mo#br|0SXktX_SVVz$xnX0>!cy~u|~Cd4mU&FDxAs% z1^>*uz5jli%If%skC#g<3vdW}6@8`dh^YM6rfhCz-7azIzxVo1B&Zn`y0fv-bH;xZ3Tui}6)Za|D0oX}71Js;(>4{wJtQjWbw(}Jzjl6KF1&J;ZDji&PvvD=PJAu{bu4SztzaLR(@ zq~zSbEyA*XMlEPkiaT?%?<*Wv|Bj)FVSigm0!`A)ey>;Ig-q72vP@x?1ykG01?b(HzJb&lg{`vp!uTdvI% zQA#c`ozNuU_xh>ZY0n?Eld7Lg{TIa1?;^CiF0cQU(o4O=o4KYw?9$)&$wiUH=D|GA zw!?{ctf#-?+OcC^Qbms7;howk6v##E+H3WHKigsTdBHJ3of$qakNUFhZ*_SdST@;U(_y*nPxov41R`@Ko?QxC9V1du z8oVSaFf5Ye-Ta5%+trV3`>|NSvQ=i+Z;rBSi+Yb9z4Vv6GQ%xDeD&X$TEU+Eznb2g zX}!_h6C3qqnt%WQ!@uvBuT9pTw(!WM2US&y8SLV3!@l>&g}>wUsi~2EBxD$?{6Bxu z@&9Rj*=tjuoPQ|f_${;XoPFWW)AcHXaEr_31}k{0s~P z#L~@*()DL6WE4ygc(7o>r*of!Uh3`TU4LzV+~kX?aVuNybsTWly_+^6An-xK4~_qm z9;~Prm!EuJ`mm8yjr4Oj1_qw>jA`BYU8b)zy3_WzPjUQ_4}t`>!$v@ zruFsp*W#_|ub%pc*Dcu{w>oCk>uXEDu3x&{YwMS-pa1Qw+iknmC{fp|_1^ps4b9Sh zk6R{OF8UMNIg`(%`SbVJpDiB7om9#e49J`q_Rz4`>1L*>I7^EzJA3`RAKuP!8x&qo zj94A@x3nuJyzBnavaP>1mTgrR*!t`GrK|T}$F9v~-7sUnoQj1pOTSN}>yZRUYRf4BU*KDXq=*Z)WU>V|&XUaH$u zynlu2$^C0%t2X{RdS`ps+LW*F^}lGIU*P0A_x#nfi6M$io(7UGb$0s{GF`6(i-(H` z<#)f+)0uYgav__jNDzLDu=;VBH@uR1&#JWu}4DVc_)+tlIe3$bTf%_F! z>!#iQ`}gGOpoOM)i>|H|{+hRR&HVYF>g&zJpGj=}Gx`6TgpSm?_n)RKP4-FuwNLF= z^cKC<*Iru8`~C9iubaQGJ#A%kG;-hM`ZK$3<%D{pnR8;FHoaww4q3@?g72=0w|;AU z3LB%}WHI)K_i{SEIfb_!PX4g+mFJnAa#72d-_O%_I-HSP5`X&i)kFTae|#2CzrWTy zJX?I#^8B-pwG)o~vv99Iq9ewkDYQSJpmI^abK1AVbL9Ry?7YEKt$zLLMxJkr7R>x^ zzM#)5?8?5qvRRLw2XmbJ$QNfBaPQd5yH`TaAB3IGOB|Dw*e- z?X<}5{QS$pdVHH6T;-HFaP77Khovf0PCer;jOF?m^R4C8y6W{|hhnSrgWiWVw|Tm` zcyeyy`k;Q#^7PYB@e>BHr5N8x}uNN<^BaMZh9Ph+lW6sOPue$`1Rt6+U|PYRAKV-emzI z^IhFz*;N7)Lbr3w*dMF(=;NgChrS;E6*hgY)Q1PxB!Z5(E%`R-=l;*%cFCAuo;Y7h zUs{ay)Pn`ua~Hh5V)nsQWNFne)s}?+s`0DYIFIBl`5CodL*V4zTPyBf5~^CB^+wa~ z+HS7t<(|A3u0J;CN}GRBQ8iIQ^v1>5%~urJid6hXt}p-J2AqC2NddeHiMR%_p)60-+W7U#^jTY8eyachQl z_Me)$H;lCSG#+2&_WmEgsW@TPOIP97|G(6puF!3BpR{Uqgfx@N>wfnBgA#N0$1#iC zKC&>eV8L?HJAqxiN0UC(ZZF}}ak~2}VU>oV+o#g-Wp#zjMe@GsViO8JcZYwIQ`Jrp zxA4_owA7S0}+UaY;l4-wk=lyS5 z)E<0ZyRAU;zxkI0{R`_SPI$NX{DZ$z@!i7J?|vS3o7csjYczAQ_OtkRxB2F%XYWZ} zp8xw(cWPWrweZFp{m(Y$OXl@`swsOXdU3Jh_O`Xh<2M}6{QRG*gH!!tZ}IJ4f0IAj z`5#r^a3*Z_@w0KQ8IxZpYkjqox?%aUMSGJgxBoY(XPX2Azhy|?|9@y(&XtOnIw_m) z@PEna{+}+i|Cs*kt53eZX#ce@K7RcP!~1)@rUtLiaQ@3{q4=w0v3SW@j+s*<+t{QZ zv_5^myD(6#WBd0XB@3U;XWLis-*l2}&Yk@$PMr4*I8v~4*)zo_KmYRv1Wi_`j7T-^ zS^D2cV9H_^v4iu!rEXt3;obV5J(oC24Sv-YycLf9oVa;y;elfcfydLOvY$TXy)~=j z`lm@|x56gAtLK=XmdST6?B9*_Ew5w*l0~|!JGa?oONTGf?D<#qZ;9KR|DVJz28kAm z-_riKvu@uL!T0n1IVxFd=Oi<0XK5Um@V~uF-pQ*e$WSNiy1|=J`RLiV*%$ky-2 zEltUOn!nIfbg2pd*?O-PF*9wfCJ8ityR3DU#a`y*86~4j`hJ2>zOUL{erfXUuj)ER zb^jK0Ii*Q#|K`7ER>5D<7Gw3oh*I8P%~ekP&x+|7^{bPRD_xo4ZpVvX$-mCekvhwI=ht)}Z)FU&%u54b2ioD$P~`n<0b zdwaHjXq9@#vVfaj^6g0vN)L-&|DUF3RPitA)9?8#PEz+-gTFa?2@4*yncrpU*;KSl zwdF@do$V9-?F)8nlQG)gW3c-2=7Z6k6&@8Pf2*393#Tn!vt{xhhqlE+ZFL))nbtjY z-hIC}x9rNRfAM}V6>d)YlBWCO;NQAC>3=^Z7Al`#_i@qBuRl81M^t6YD<<#WS!e9~ zKiSs#xWBZm!N0EZ0)bf*2!*D^hlib4sHVSLuQjjqy+R^S>dg)L z-(*xS{F-&EjC0$k`;%XSNdMmNge2`f{hL^z2o9RuU$$Oe**o|Ea~-J(RD{ zJpRxB-n&CjeD=+?@cDP|cHXat`Tws!Ha%1s`C4etq%xNU65SJw7CbJJ+dC70&T-Sa7+^XM&e|;Q}Y&Ji&f3jhn+GkzsZFX9=nQC%RBUhP~x+pK8r)e@z!@j@1#p5?C-PB2Z`LXCHrvDuls`IGo|JLf3ZsiGy->UoX-S2m+C+27U zzN(V`>Gyh5-IexRw={ihJDzrC_UmUHJ5x4n`k{C2jj+6|QM3NUuo>#Rn|ji`QciKa z{?=TUzdSCu*QDFSCW~|bU&*=kLhkdd`3F?i?b;YI=i^$P&^fox zG}+9z{ke7VOQtzy9*=u74eRW-T~7Bm%-?cUu%qP6eD?MG3fy!){FoZ$rSRE?Wp{zd z&c|t`f7stA*Ew{~DK%vly=vXvJ1xKDc>DQ<$C4L%Cvs}dIq3bP>sr}o*88dR&p-Vu zv!`}P%=x)@=i~)@+&j}HpkwxOI`8SPo4Nm9n>|1OTDOouskOUzum0P2&bsQ7 z(dmwtyX8Mj{Nv9bzs39=*CDkzO=rX6zo~Lwespfz#kETL-MiiM#HQ`FdK>4xsBZd? zh5i-^79X^#GW9nuWqJDE`)8&v%gU#x&1xR6T;#X)puAnxc9B@)&=(h%?2P&I&;R`W zD_VZJ3%gGH)@ZU?Y>}CFI%J_-^wQc{{%H>nrJs7tbE3C*e!Qb%i9y?w>aH8N_NAmHW8v#oC|M0wLcz^It5# z?%S<<_q+GgnM#*e3yWUPdz)2r{oeM?c`;8ut|~exXj!adHNer_G!9T6n+n`&;{8uY0YxHg0M7{eq!jHmrfvZaQKhwA|f7Z{x7s|ffzHTct zfAReDCwFfZ?zf#?%2yQq`r$_fDTWVrE7mb5P8GYrXY*|TkNQ>r%ewb8D6YFC-+q_L zp7$o>-TV8V9;ka>X8HPvk3!T5v0Q3uVyk`d%leh>p{He4A0k52w{;$$zR@#u z>iacGCp+0PbIyCNwF+OoU3W`;#50X4)wdZQFe->lQ4->7${W6++CI|mUGlcQpYwzdJ(*X`^EEBX;NZ6MKYNc} zyBfNC(|S9Bz}tWBXq&udDPD2dKXS^}(--zd?EAmsgNeTWo3bRwcsBE+Tgq)0{@nWJ z%&WhCH$O0D@4k4lN1!_@z|EBNlEP(cr$ z;CbvF>pCBbY}@cHeo^r8jMMwHZ=L$K|4Ze82aIAOJvt(wW1t)k1n7uxYcd^l*bv|& z!Og~Wuwh4sk_ab@%g;%_@7FS=Z7*6VI=N`ptktI<)NOAm+Wz<~zyIm)JNK(ytB?69 z{whmzJ>Q`>{+@5N)%>bzbaE_yL_T1hmAz{E%ME;+u9&`iAf6?syS_H?Im6VnJU!z} zU#!w*-qX3!%~3DCwKTEt+ROH0yYF6Mm;dl}@r3iLEq1l^(BRn7`c>Nc$?7!IBu2x; z>9uCOYh7oo?s!-KPgi86ry8f(>`mP3?=;>IICVaF%4Iccp?CTAg`aEP8~gV>e7Ht~ zb?2b^ZoPgtN6NmEZbrlzj(#1*V6WqPkLvIC9l{cH7~z(_l7DJxya8A^`CTm zlx7twFW$vW6suP9GefS{w)tZ_cQ9E zCP&z=zgIS$k$P@$zCKy}*`f#LcVC|B`hWf`%PYfxlEs?Sty5#$!fUMT+RwL}FFh`D z^+LS-POa?c8^T@~BpSqRHS@XGSLS7sViA4g`k|PYSx;Y0{`;M|fl&bz`<$&!i4F&5 zOb`=cZDveb5TT)_-O|)3FyV+^*4Mq?zdzTnELs{HtNLiw)YZ*V^2gum|NVbpPQC4i z{Z0Gbl%K}ds(oyj7CURJ>DIG$+NVF=e|MoV;cNT+>q%eCW^$?>tn0aUK>z4ZQLleu z=iGmBZ(J1coHyrM%bwH)mcGIN)kQ=cyW7wB#+{2?_~*R;;o0x@vab$5Q_1^xlG42n z#`E7lr)DqxeSO6PR`H;qLoz>j|4Hb~c+arc>dhRz`MzRHjoDso&*3%AcF^kCoppS} z?FZNOH!)@fa@nS+7+*a9TxiX_H)WR|-~D%^Zwjl%2Zat{lWvdM^4}jD6oBI0OGKEJ znK9X6j?Ppr@BwI3R0JhioD$2|yx#S7ZGK;|g=gs6(_O(*o79~@-PitNSN+kBd$~<~ z=Xd+-E}3tauK1Hb``Oa(*FQfze|$l^2}j22hhqP~Ge<98dCH$Bd)pSBJ9$vSiSM3StYJI#BZInS+;gTc5ARo={X5P0`=m4f8fKhz*F53CySn@O z&AT0+9!&igY%zT*^Cw%;*I#XPkLDVCwJyy!^V47Sou92(?7MCD zvQGxZukU|L^=GqZoN2GcT6;sV<&}NW^gr>=dpDn9XH*fHpd%vK+RB)CV1 zgB1Z+V^;{`~#zUdz|zS}apVLyx2`cb-(Ff9Id_SE*O}m-f8c@=wF> z*K(uAzk79jKQB_rFMM|4b7J-`dI_7u)PbA~Va(*EmrU)KKlP+9zRm#mCWRp_^>z~?a=XE%JF``afa_e4!( zxP0Kdjp@g>*X++)Uv>8MuJ_roRR?QkzN#pA{PJsQ>&Z!}HqY8!Ei;;*C2U;CrgG-Z zr8yyM?v)+8EO$}+^sfwd1_sU!$5|3=T&$&~;ZZ{GUSHiG^<&=p{r`6V-~an>{Qv*| zzk2Omz5DeW&7uxz;Il zKCOKl60`yiGObC}&28}#O7YA+IaTHJYuEc9MB7F7?wEdI$LecKwpJQA?%t*RY+-Cr ziTlZO8&8Ssx!-!9Y2$K~;! zDr z(5Q6@8p)ULq<>j%XfJpvuQtp7&YzGg36~aa|IpZWynA{FzrKDM_w*^3X3lec$*ID( zb+YUJ8C6`rzB;bk75E^bit&`3w3`;Qd*eRIjT`oOdFX_t9Vk2Q5Nv(ac#m7s{>yIn zAA~tho89of{N4K+n{)Z||NoI$kfbmpDsxjyxPkV}ZK1MfwoJ2%Uv;IgYxk3N`PcUe z#caH>uKv32vK2}aTfcqCpAw(zvuq=KOi4v!-n)12zREHNi3+M!@A~b2ub=DQyYHo+ zX4?JW3+@hNTX47Dyzc+xtMk35%*lPVCiIKfmpNNs-&ndnoa>+3@_6%eJbWUuObOo~ z3oJXd?a21FgS}cy=S_E$3|*9eW#RLkZJ#bUD?|S8y0}Ak(VzYGs(j9c73sge?~VGJwdrrv zyXaNd!~Ex;{IKQNdG|-E;%(2Pmp@XNlX7fhu}19eBP(4xih8Fs_&-u++%$)$Z`0Jq zQ$=0sBL1OK9KZG^ANlp=-riqZHflv*64RdA7qkBA?XMD*0`4{4$}8_Vaj(35FaGNx z;WVy~4?2SLGDO~esSj+k%X=$%^rMa8oWklW!4jU^^e>kgB)jbrTxn(|Wx2=2ZE&{EL_FvU6sg<67|1O(--upYs zOPnM&dv2ShdU*fxMIPCyKlR?NmDeoDu{pXs%HfW}!rlinPBJy#?z*SJW5my~B=lqJ zqn|5$LiUIMh-;kEx%G$Av_oZFO<|7~+;so=pEX3hulcT1qjq_0T?ESY){6WXo`uq>BCu?l|Ts4;j z9X`2A-}mIp?@rSXKM{$#w*BFTc^^;c2r4X%XMEZewQlvdjZe7OCN)arbQ-EY7(Onve8*Z#@x5~WrhRB0*yAnVdJd-vV% zyO+nh-46IQW7i`l>74I>!h1RORx7pq`>Uwx)bKfmo#CbIRsD}R0R*1nKkkL%43g~a^x@816reo?eh ztM~Q6u*9Y!ul8nz&*sN6{9`u#?@yeOyi3_KzVwgz#{b-hgVr^EzZ-K{$4CBu)86F5 zrHel=`&Ihk$&CY1^BOZ0qZ9f6#83Lt?!;;?{`vl*y;WD%MY+|zKP>Dv%{lDKqnu{X zvV<8z#w*rpPcVJ>CHBPjzW)1hT>M!pvI1q#A6B(l8~IQ4&W{4eh^eYI1^%BLf3mqp zRo2UXSowIjss7#GMFu`$e`S?^-?yw5{{R2x?W7ck1Aih{H1N!>P2f7T>6(`Q($tih zbG+jJxEQ2uVq#soeRBOnDQVgNPlS#Y{(sOFEE#ca{`4pNUPpf^vyajO;Ol1ulK9|7qFL1Vc!3O@5!e8$x(f) z|K5u_%>6_Ap{mWKNp`C?*w(2Y`u&gVbU)9p0RLwzPV=1&m_5Dpz{D;=}Fy_yN}xb+Agj?=hQ*>BVC-)o))mQ%RUd?mCHr;2- zFXLI?T6(p*tC#-l?Y~>qKfyhPWxZ#t{$~)3e>(l3Z@OJ# zTzAmVBbjwqp0%IY&!Ti8*==s{WTGU*ifQDqiiW%!t`pep-Khg- zwJWkv#rpM9R+g;T(D?>+?b}~n`nBbruyIcAlHcpqj(vZw-Ep$t;>hNV_J_MAe^vdR zCy>unR&4pBmPbc)z0Ul#f4~1&eM~U#r^Lr)^=m)O`y4!9_4}sc2!mf$&CWqbuHP*C zf3temoffuR^PWvF{LXEa$p2CO_d^B8HT#@)9OCWS6ZUPcLiOvFx62#%_W#g+pS#&W z<5>BroX(kNHZ9B*F0M2dG}Vd|e)oK;%Dy6<Cy{X+2K`po4(+|0`F*9wL(3sLvw)f}LUk4Qzoc|)aQa66<>8plGI_Qvh~^|j;X?pfEhw_4Th3}3(g+R{apb(+#MErXN&k}-(R0J?QivlXNn)0&mHaWtQUW9;;?I;_UQnd(?^?yY)Ze(la4Ye?-;D(C4ls((Z*sx3sgV%!ZuCXi;VORXOF7j6-=Qr5{^_Mr#Qr#{f;&3qZ;A>Am zzP;%U-s}@DPcOQ)@z-}3&AhI+)mLld)?PLJy1u-lYTv6*b@`1V<`3qm>6G496aJgf zqxN8t+?AViB&*nY_4k}u-E!o~Zykkzs)D${jiGH#W)C;#PPukgr9!ZOV|U}bSu+z{ zqEc7<+`Z=N8Y@1g6c+K_ZL6n8AAf)4!aR;1O*fS{`kK;qcRPKf$`ouLoIaf?cqgo_ zj-B7Zz|v9mj~st|4A-vrvfLH*TwOe3{rc5Uc-j_jIqLr4+=TTj4=XXc-rwDCE*@D{ zs1U~x!@9P^_JNa@!SB1?cdxy=zcym&>h)psuWP-`UbZ~^C8vbelhE_>`VWoxO81D( zVAZ2xfi1&Q$5ZDlH%Wbp8L^7hPBRs<`&z zs(05{glUJHul9CXAHAsFWp&(bg=1VS%KJM~Z`S0COC0IaSi+ZZJwA~Hwx~s{JzsiiZ^`P5cI)&0{r)4!V;-{H{~*Wp{c%q=yqz_Doy`@W_Z2pF zXLo*{er=Ebf^%LU+j`k{IbYs?S(5Lz^CvI!hiAMUXC3gDddVZu{fGI8#D2Y;*#?I` z?<(aen=8Bcmi4>6^2`2D{+g+`w`#|fxO?mhVXO=d4`afiuxRIccr{UE#?GOkx>a1hy$MFHqp0pY`b(|D5Lc`E5HR{z`I+l=rq8-JQ}VKz zQQMZV)qktMzQ4ZYz%;RIjGRmk3T#TR^j0l7Kf{RAW7Vpy-+M*ZNVPM!uskb_(-tXz zo8PcDh1yyUZ2lO5c{ERpgN7gpybDKS+@5j7bb3seZpAvll5fW z>r*Zj@29u=_s*)`T-V#gJK^@P-+!YwJk+{cD>!5I2XB_L3BUaQ9rpVs->P)V$Kdoy z-gjl14<`#oIzIgT^vL|${XAF2b9d!SgkN$vIW_D@=fi2zzBSs%q69acd;NF)5{cOQ z^|KGxcO2DU)-C?(>Q%S(;imD2C)T$q z#OBONTDj)C>}me~JO6&Oq?Vt!TKZ_cNYaMtr#*YNSNI4#-MzQk&Ln2qtj{HRdyYO1 zS{-x9;`+9|zqPJKzu($aAzUYQJ#M>GhkM-zJ>CVkua`QdgIQ!YF{+Nd@xPIH-;QR;op6dP2 zHZOa9b@j!FX>m{EKWN$c+y8x~{J@=sq2+zn{e_{w)Z`bPzxzw*>vN;+{3~PIiw;gd z$g%&>BJ(t>AA3W;_50ob6R^A2^ePL-oqh4&4>B74vXHuUKz6r+q203uipmpD_43T{ zGN1fs{~w2qJU^J$JZe9lsc#p5|AbnU{YQ!1X}2#uzP@L>rCt4#T_2oJX4?Kxy7#j@ zj-#JG-YZz@HxVF+Wmxk;diH|DJdt2mH292KN=smSa0f*U#n+FJ`8lJVk~U^8hR_> zyU)&kP5x(_UjNHkvSiiiFPlyotc;rF7RhQi<5yW|M!V$is?MuNsEw5WV0*v zZGJO-@>$O+?bSat=FMW!D!$IU@}q6B-OL^PHn`a{HI-Uuy*jOtY4=y~{q%IDQd;b}|Db_vY8lKv8szcUO`o!0Y*Z#+364@V{8!L-#S^M)W^G@k4 z&KI-nyuTdsP_}SbJ7g&64 zf=4Pmrdi*Rh~#Cf+gK;5`|Rp(#nV^zK{QB+eTK6{s(i*RQG$$?6 zciiJ0FQvK4>sHbGKa(YO?L|4(?5cjhOl$XxWh@(XYm5Zv99X{lf$hqd;xim)*#&kM zguk9%b92pi(W=jnOLkct{>Ue;sah8ksAm!H7vC{IaCS}g={@)Uy|=sjK4PPowe^Gg zbsyvBN;FmWJ&SgJweaVjwi4d|QM0eMpIp23iHYv1$G6m~kNtfzKW^8ac6~c}ZB<9f zk9^VRS_BtAn;_p;WcO*o#P-vzY+>`;Ypy)_^zO*)O@I7^Lv0H#qnH-dRUq zO-?&)pHWk3o27k_=i9Y|l^hj$PL=;BY&M)dRXi-d_;1f0^J#1EY`%U?>Cw&)KbwDL zv$~x~X}`h$i-B>{0yQQE9k#}p1#Xea)!aH)E!@RqC;e4i-54Uu%U<*J%+9-y=WJc2 z9q{h?s;P$YMNj^%{q^zmcyD?y;~JF?a8XWS-YavM@_%BYk$-_#h6Uq>8F24^>MHk-R{UtQ`T4|nz7|TRi)wu zGo}dtwZc`tt#NC3eWYd>IP4Q@zx47^&(AbnE~`G@+A=}SBI zOpAES{(j=D4F5~3KDwpwypUJXvnc&j^}4jD*f!{QuY^r|d*vGGiu?aOw&uS1JH>E% zeuPUZ%Wq%X8S(Ep4m0jtxnqr(#I*SjT68#0SI;>bB_1z*_t*C9@Kw)$)`waDjrtq0 zkn4Z=s(osmuj|)WY*?}I>fX0=-_EI-9_ew;_icg10vCSSa`~m!_Jv}1!nDhm|DST< zejyXvj4;*X+n?Uv{YZ-6iswJ$0*-H;zYG3tP(Gm`;9c5voAuTTu37Kw4*Qi`aCCJ~ zxb1V6ZMt_$NZA1&v6&{i(?5ormdgKSI#-bBA$M(8WZ=>#Cj83r{8Rizg4IQCUb^>Q z$v$Fha`q?Z@I5)~sSQ11tNMZuOj%OxR4ubUsOb8d zA0T09`XWDkz4tonzxr!^Uxyae`Z!r$49yAd;d1+tZB(*l760@P=U)Dcm}emB`lxgr zThPy--iJO#HeIGhLc89u&O3Uhf4bCUqqr*-SqUm<_Wx(wzj{u1NUr$n{}VYu4&JiKQ%sNqFm6sS8wNXiMGv5-S_SAk}Eq` z@t)_>b=63nQ_6hU@#WX}Q^(KRR@z!xu{bf83WmuyE8boBeEYBOD>6ev4n-kz^RcYWXW{fKtxn*C89=I{Rcp1F{z?e53e z`;|NPNgq3KC7!up&i;cE_AF{r8nf0WsED~7_@(`*VppT`1h>fvyEu5)gkIQdEO=(| zb5WM+Md?o;oT*oStXNxf>z8f(8`0l~Q+Fsxd4AuLV|K6YMnPHdqdoKPKU7*AjD?=`?@&WAxtSZhs5+2FKeI@62nn_xsP1BDvdfN~oI9j34?sUqAacM;uuc z{#*Wyppf6eLeGq?C36J6*37-Do1l2myeYuXXwj=xAh2aM>w+GkmL@0u{*xQC*9f*B zxS-^*hDMlGL-~(bM&Xt_5X9DUqHN(|kf_M(>z(;nA&| z3p38kTc3BH@qPc~-{m)_ioU=4;9Sal|CA?NR#&m?ITAG0%U-Xzv+AbB&Bs>%JKsjj z-rx0g{ndYQyRXJ==+}R+fTQ4mhv1L=Jx7?iI9rrtnwPsXzZd3pi1~U!{od(4uih>1(5#)xX1Zkl{Krq1 z+oev{2o#hEc_(>msh;WmlaJzmX*#za{#rgUPBOQD^0%(IwwG5V`C1!}x1VN^o^GX3 z@uA>f+NagwtCoLR)xy~+uls#{%H3ag17Ba+GhtKMG7x5mk)MNaRC3AqcmTC0^=7wEW`~}}6JWQ=_ zImfJXtZMgP9_JE~>9+BtOG#bxcQezE+h?D5y572AnUHvkpZvj9?E3;kr-+#8vVO06 z_#^ySLATFKgKYsqe~fQxrT=va4Ve-DUokwssrLP;#c@yGoUc6ezPx|p?Pe|!t4dk`P{s@-PCXPy4C-SN{Q9&atp z2A7Ts!7$T(ua@(O<;;Jw_n^#5h1!n3_!(;rr}o8Hs!ac|)8y=NVavE?|L>;k@9J6Z z+?0}%Rry|WV)hGPwi8Mz>1hvEyq&y1{jG_kiJifzQxm4!9hJ2;VY0C*=MidVkeU5s zdjG$WV9tv_4<7NH_n4*pwJ!5b#Xs}cmOC~Iue-^x_N%n`E4S`I*Vh40=G5Pa@ibUg z!JM{r{>Jj_56$?_ezWR+F0}8{zkf)yf4Cf1i?w_n}mPmFJQC)LTM^0a-!KC8(74ju2j53*6RN(R!0_HURtSytF? zg;H)3Tc?Pe;p7;e>Z3{*1K%y=xPD%t*`n}{5o7hN*q;Ye`Xgm$d(At||KQi$r2Ahv zB@Q+34*P8P`jBX>(Y)Qy|9+bG>5^mfl!w2T{QWLqQ+ZbC!Ay;&KcV~@&wLL2U4KP0 z{X8T8Hj!oGcm2P+2tIuMv9RCjwZ`f)$Mp$4b?o2gB_*ret;$oL_`Xv3iOAvPgj3m% zO0NmbX1|yJBQA%t`qs_D=SMDdCZ2osu>amYG41?i60aLBXmMQ=TweD2f9IXg@2)?T z4lVv&8d<)xs4Ctz+pPAz!?LpI+m;D$sxv|kJ&*b$JfZ5*#kTTCcKc4g{(rN-;IQ4% zpq;ErO!5<(?!BM>j;q2@L3gcOwBN+1X7f4(t(AP4_cYJF^&JC%Uon?ydt^nN($PvX7%l7cz^p~q1WQ_*OLk- z-fn!(&Sc%|zr^I^?XFixi_X4(-uY!xdA-iv`@gOo?OxLHZEx+`%zsPYUwwZu%m3?F zjh0F8KR8y^Y|biZa8NxKx%}9I25sK>=0E%6+iP~}Ul7^Vb7HNq&T)bLxBF%OKUlET z!+PesKXY5=-+uqfXW~DFrK=|Ar^LqH78Ei6tBV7lzGx z&?0Lv`&v|s&z)pv;e|{|pZzb%eeOQyGU@QOu#3uTOB1injNiT6;B|Q2F^-G#Sc8}S z+4PkCx@c1*cigE(oIjSWPwiSMUy)>U{9v?hNMK~ameV0WuXC)>m@p}F&G&EdtrwRE zPOrOCv@k^VNFmqzH4eMDIM*#X_|0-!w`IREW^XY=arv}^wsOjjwp^X6~$2hS(n ztm+n&-dO_IJTg&q z+gi6?Rb*v(V`k&brpAzUG1H#=-1xDP!FacRzul)R(V~fw;Rerd-du3D@NIHk<0KA# zV*xdXtgVN>mUQSOJAZ$!;`29wRf0Prk0(Dzjj`+Et>y928|OP*9(vW`aa+88ZQO(@3c4SeuO?P@ z_8d(xjGKMCe|cD*GK1Nb%^UBAZ_c`^+jwz>l!orMlkU%Zv(vaG(xpy5S7>Tb{80DU zMs}l|;-RO_pLA1J7y9iz{qRLZ!=44b?XT}WsL3+sf4j_akuTFvJ72TyGW%=}3QkJw z@|C;!_VK+ruEy{GR2)b=-Ek+>M7^i?`TvIj3(DSene3~v*%rKM=GXl{Ty*62-MwF4 z{nv6YSJbNK8&A~!UbSlSs&#uK-udPI;doTuWH8%cR-7oirPlW5;{U%`_dRoscE0p!rsQ<>51sCZCOB0# z2uwDVNDEgI)8;Mzn6O&YzWyPjxB8#?)f*g5pZ`8wU;BSy-S@cF-^@OmcKOF0FJ85` z`swOfPwqx-@ZD53Cn3Y>!pEZ%WU?ch{#@tE7JB}yBgrBA($8D>OqEy-)a;{q{wfAu zc(i3t=6Tt8DZ3l0ZojslmR`o3(Pr>ft-k5b?1k@5w!V&E{cr8Z($ehk*P*Yz?Ae&K zbA6i4wGS6MCiPxBbxq(s$B9Odr|Wk93k&MJ-AB+WKotzP|brzWcB6 z&hkZuj++an>^JPrWARN-kN*)TQ=yYkA!qT!Qd{uJqm5BPf14QBKlv20M_}TOmH9_k zZMewt@uW+kQ))=5($Rh2?i74D@=L6`@q>iD>NAy?twkZ+sk8q#2@4g?zu*5je9|3cG%@ zuvn%2PyQKperT7hawt^kWlp%P@PCW$!S!3*-|a2cU7OGBki+Zx!I+zsNj|e|cLZnm z^exw!K5WWbU7;x7qqS<)s;h;i+>9Xy&0^;?>^Jy0G2+B84+UdC-ON5kZoj8Vzu#}y z;`g&MlHj#WX-~e+dHD|iLSvx|)eFQ{e2rtS-M8S&ck%z+6Am9#=UR{=!&ct#>r$uS z(XZEkE$H2?Jb5Db3E?#nrlA& z|G}kn{jJ|DY`vTAsUEtlD=^Xf@$?UmbzWV6Gl%8&)xE{>Kh@V6C;t|8{i^qTuWCie zQl-FKHM5?m{L$3efBU^uo5Ja1{D)n3SxvdR@%H{B0ox~El)oFl`|Fx{@t<|s7!(vd zRA1>wq%ra_cW4?c7Tt1rKFb^%i&c91w+>p&5t{IwJN-l1aj#{IuJJo79}G}XdocUE zjRzn5!9@N~|K`u=EP7M9&_e&g6cbU62nJrwHH;1&8y3#*l$zOP=izt6)O!DmqaR(j zK6n1l%iS8tT4j8HN2Th-yB75RrUP?$*UcRSMhEg8#ED)!g78e66Yc(T_<#zvkLV^VRWY z_ZXEN&D!Jq;N}Gfizf@1mNvZq#Cf#7>2Sn?vdilKcCvNH&sk+(?6=hZ*Ya48)MFl& zhd-BEZI|%XT~;t(_VCmByPV)XujGis|^( zTDeiyNbvg8i<_nAf1bz2A2I*y(QjMB)=z%^f7h;O>#`1f<1&AsbjVHhdc_j0c z?0+Novpsq7`pO?G*P}ri6{pmAE{b*TdZFKHyz%kBFaeqCZ#fKHZWM-OWI8QXIGAww z`+0){Cqty1FJ=5cRebPgS-tw%zY^D6dDz~6_!fQg0K3wG&yCI-Tf}5 z{osN}^Rs(<_!=FYEVI_G;Z|sM?_iH(-sie%L9r>Hgw}rk;^~JRnrAF!6FOMKdiVE= z2R8rACSQNPYO(g=sCJo2$9WR3L^r?T`({;ot7E|~p_IKJrnf$SeVeavlhdR_J14QX z{Hj}Pc(<{?zp450uhwAsmpdYk8ywRweRb|2%acn-CzRe-dAzI4RUU3ot`M#8_UijnJw6yy|6#RM!oLVY2I6R z+kdHt%qU0{Ul>+??*E3kWApB83HmWZA>Yc``L*MYBLBM1XV)V>W~|}-c;n2{E!x^L z0`g0^IoorV9DQD_#B1sCsP^@PxXX`QavMVuPt1rj+~D)+(dEy2KbL&h3_tY9+n-7O zf&I7j*&o`k-SdCCcEOut?oP)`Sba;y?{57f_O|6i-I4kA>$~o+cJVEL{`BvsgMyzP z@t*Z}T=BgB^7>0=Kjv?^*r<2>Fh|$6B^%4kUS&0zlbN zogP2a@81^3 zzkKzj>59#>>^CIpX|r&$pHR@@GX3dR@Tcv%m;9X@k6x)N7^q)7nY-qs$WQu#Ub zES6B@l)O22!q0_^79HPvqHQCik${bB-C}+LH+!kcJDbiwZZXKvIwiJCC+>h~?o8|Z zU(DCch-6;Nuq0Y~mYBu=r@viNH}3g7(S5o7!_`}g0!_`Ycdupbi4WgmU}q64UbsC( z#hNEy{&gYqvH7xD(r45Dy=geNF3#suri%jmUcqtH2`h8-F zwanx4)Bm4|oPFQ-s%9QXziyMa!I!U<8B4nB&R$!!p-({7&ftCftk;?0*H=XBzLPWk z$3B~5_v;VsEH2yn;X&uuXJ5B{*e|+xv8KVgb*Voj9&s}5kCW^E z%U*x>lA&}`_Qmq;r7q>aMl>)pi9{j4@G|DmdVgl*cr5A!D+ooVj!JNoL26GiVm z7Yp0S9hoek*ZYMf>HUAX&rME|TkKBjhBwuws@m?GTGl4H{6oiyGO;uHQ>$Yl(^acj z^|yFh%s$4~e6l+8UX((IxUKh`cJ=eqw`qy4S@T_J$H&%q8Iud9-Sr`~^MRF1jhMdY49y$woVudV?_Iop%a=d1Pf zFMr-gzcc%~^@BJ}7XR&7%d_Ifw8vdFca9~ie5OX$g*F7XwT2?!qw`%wLyByq-UPr@1&a{;=?9<6*k(m)` zrTKo-p-o@!ZCo2z@wm<;`dktpqnyYL4L2^c^|psS<1(hck%wV%X?<*_*-4>d--$N7u!d5=by#JnMzG)c=$W} z&64EvlXsu(>RGC{`orseKhCT;9bdR4KIQ7%?$b={#iCM9)3)j`=lx&Q5Pxv{U)>pR zl6o1L0#)Yq9f?2B^C8aI!1MHGi}e@M_uN#vwdIFs7iR#_m62jyHsjot{qg{ARC%>lK z*c10IBx~Md@9rN*DmmgOKTZCTahR2#f62LDGxu0*s)_tLE3M{QqyXRfQ$EbqC(ZXi zea+RJdMQ!EQ-QU*PWY^$fa2|aUbgpNxbFGadX2BAoa?`~o7x9OE(tCUy{FbY*1Ff6 zy>or~zStjr$G0cvEn(R8yGs72$m+-29)7Bmex}MjGyVIbaDkJy5hu3fea!Y0Des%H zh5b2~TbOa|*_$D$%Oxy8Ia5nSS+40|!$uJu5pGVdg$^q^l#Dn!&pq3HE;X$5^_s_e z>oyk1+*>fYad~a7^%;v}XXeCTz4Z2N_s8?)?VBHj|J&?db!7dg>82%qf6hPCseO4{ zDZ=gQFZ04jGXvP&!>8r-r(T~l|9Hm#h(CXm13xW!cE=`dZPQIAr&?Zl&1ZJ^(uMx+ zZ<+92u5RB_f2N7c*MISq?K)mEPx$0%bEdx7)vwO$^&FQAv}^c1Rlx4?nKv;eT|K|~ z-iGo|c)V&uv&15mm~E1m{P!eZs|~EtSJm8iW!0*X(zZQ<4mY1OH!#YHOi&Y%W@S3) zXt2OULRgaJP@uvT9dWLfCbe9{bJ^wlY-6q*S$*zn;w9lAEm=G(zEYl&0c`@H4r@>xFI)1E#-UVr;9b?e$-2w+Ns-`ddGFS)|B+u9d~XfPP+1N9h-5% zmJeUgrrDW%*Wayazwr56Z|=}rIz79e@iD51^k@llakerhIn2~h<6`Y-OxWS!BqZ4R z{N8t&fB%z>)`?-H|ze=bJx*v#sjCoLDZ ze!I5y|Ef=4rp^2K;(9P39$;YLFL0bC#n#1I{L%hf%Qxls2O5<3xc!gW zr~Z2XzkmDxzs=tNWMBQNue%?;>Wy0;e>GM-M)7{BfAN#=f4G^>r20RIvQSa@vRoi# z-cIE;Tt`{ec-(Hr?p^MoY0TUcVUVZwy7NVn9oMw@e;4lly~efq(prVh`Wabq`B&rK z?T!Ds{ng#oXG_msG_4g@Rp0&~Y))F2_u2SayLMa+zcMTKu5Xt9^kWPx3W+u{IWWMU-fRw{E0O1zs$VNIlAsn*6#G_PZn@ExUerM5h$>|XOSbT!Lu^z z(>;T%qNdI6QrDag1WguASst-%pYrs}Tr3Rp64uzcZtFB(9{A_~bf$zaf0gy7XU>lD z`1**+@$VVO6E0C{Gr5JPc?EPIK5bUJ>{ZrJcJ|f}iTWa{ubi*(N9xZy8{uv6?~52(&4(ZC9k~S!>ptrvB4$U;LGm{l|#xNk9N^x zZbi`jb*qwc%D*q$nmzfN{rdwwoF@dtg%9r9eQ?J?g{ema-!Aj(hiL{sPZw zvQu@FHH{_1*m5a>iC!K_F5f9Bpj*zP3k!aXI+KK+C6 z!law2-#$e?;Cs8_U*3~<>$Mj#PiG9#iQ=r#ixE2Q9bRVGqT{7ComHsh>#ATkrfYu_ zfA8qd))R?!JZQATNR>4%@!FwRtCnSGsI<;sZ`rS~rTeVdbRKrb@3qJNR;H@w^+q|cDc62V%p3FXIUjLo38vE*Yawe@zoQvKUFN@k?M-cSj2Cb>Sb}obLvBJngA!S#{r|2f^HhdXb(Bn^v%KnJQj-ov(h=o1-GEc?R1# zIbR0;`HY8;^F<$t;aJJ`F0y<@n)Tnc`3X}34Yb~fet-T%cHJE-L5W`~|0hpgI@{x; z-_nb9Zl7;U<(lRGZdqSy|6%1)dv}{D8@xrA|35UhPtv9DnbE;!$BUcyZ(rTI`|^Sb zHud-R^XX4M{ZlWZNX~z{V9I`{WtOVX`&)7y*1hpjJ$%mObnfq`$=eSP({Yl9)nA0EeerVZf%+>HlbXz5&CwN{T!L&xgcYOj_0+nZ-M zb-nKH^!T0WQ4-A0!s-w-|6ohO^myJyuXjlC22}Vw{UcxAzD>H9c`<8-569jNC>_2}B*u7^=q~Ty_rJt`HRU&7 zq2c2H*JS?oI7y9v@$vWUZyU^g)+iXZ@8tVmGv;V5Q~moVNAkoJsSO1hg}yi0@A&I&**^X6BA@K~q8^m?`rwZEIL;e7va@-cV(juyO$jdc zh;2V7wEBpRcfVx~k3x$`+YJScirlIyN5gj;RxR1d>9+Rg-j>IA@9WN5eC$X}mhu0o zOc4uP`&o_ROdY+r9gOd{wH@Xtm)mi^{=-j;G{)dz9+(k7yjM|EhVeShKz6cDe zxBe~VdCA3gt7^Euc1eYMKi8_L)k&eZxix$5-Aws+!mp?Lo`2RvvxACL_&%}BTRJPA zZ)#hDac=tFbG3KRY3zP&QBwcSTtIUxeyX_9TTjZeiiqo<93hV;{GA zb=1?S{SUYzWcTnc=xEFMc}?|Vi~ro5h8h8ZjZ+kYCMyU!X{9sN1*aTkKKP^NfuG%} z{pVuSo}Y=)-M@7HtNIg_9NKXl+)Q2$2a?;nSpRcBwFrnlFvrk1t! ze4?e(|1B(8{L3*=WB&Q+@fRK3-Y$)v;h1w{`^TLRix0QY5fjviU`zH}F1z#K!B<`j zmzK}^e`L1!^yMet-hQ}f58rCx|8G2_u0BuxI&H%Ia5EO^UroYOy3%epUolV)F+ZNP z{^P_);YKg#O{uXKa_)2=7Sdx@m8#H7y8e4n&IG5jRleif6esZ zY8E_i)!o-u%H;g?{~D_ZZoXLSqpzCxn|ME%aDUoQ_WsLXmvDI4sN3I)`_1cVr|!yg z^ke4yzXo0f%Zqm#cm1q;A9pxar8sbEl@y<6kF*_!$2a>r@i(p$BMvV2nZDYH^T_%? z6E4>>6zU<8E=ii*+X1yih zX!Y#S1%KPjn`Y<~7~F2DJ7qDGgZqwSQA(LaQEkxK8iNP(&o5bdX8yjo{d@jjU-cvI zulQ=G->z%sP3YfW{WWQO9dENjNA%wR>jN(CtU6Q4`%tg$@gJQu*_}J~Oyf>W(!RRa z;m2uSha(M(9m7}D?CD`M-{19xb=3r)@A5CVM!K{;?AS7+o{9PCVWaTBYj&t;UOMI{ zU)NPW)2?bcKS%Y?&(2yGk6ck+D)U~-r^X=Q(#*vX?r&mW+2v)m?K-vjuw(qb<=ija zeti*DXxrEpa_-zg#-Ee#mCQI+bs^6}@qZGxxgi23m*HRN2fG}r4DM}l<< zdJOk1`4p=wyzihygZVAq$Oqq))?`19wAb6t-Pvn&N2%bIeEAtYgRkGs!lV{hZrj&X zq1x=nzqk1L`Y#_9B^CO7JAQmA|C{RaasA1w#c%)2yY+SHa@Me;U+y3Gs4ldAd-&KA z*Ty$IEK2Vp*xu_0GWq>n#J)zLY~q9Ntmd$HzHlZOkjP4!T`>y7T&vf+@YKN)JLF&y$N+ z|0>RGV$Q~Y@a`O+X7$zQ&uGlJ!#GvkQZoLs#m(!}?*Eyep~iKvusO3=W!c898fRv) z$FXsQt(YD#rR2${ZXY@3LjkN_;@bS(Y!UCjytVz^vwu#BYsPM$8LStR4WxY@)_mwT zQruBBU*F{3`|@+Sf23aLJ{O+jdRzbHVR*fcuKrvb_ix|7Wrmqge*X?|bXcr2g}@xU4bZzi*H(HKJ=c6}-)PJW*n z6q3QE>?6P8tNPK__tPIrByF6to#*DSUAvSYn@yHmCjL_4?7k(x+HZLGN`&+5lF2Te z`SNqhdh-ibDSLbNo>7+&I1t;hkLzARS<}lEPnK_(Tz`rG0pBVO*#*pb+u3sFub8Yf z?|FzEi}qaS&&g3UGJc)@bx5gD_O$P;kB=N(-__i`aeZawi>YrOwY+}kz2g3bT`z8T zUtQ-v>3zV1j+ZkN_Zg_@ivKm6ws*rxkCKMD@1MRYU#;I(_`&DyEAve{C!{yr^0-^? z#(2NG>VnNKy)%);uQ;;2=RaeuSd_f9t#zLK`S`Z^TNRf75uaSH%RRR!^W@*R7Y^p{ zF__tQ@o%X4yBalLgDdqK2c?a^a9bH&(tfujDKGA!aA8X?+mV9hW)1h;(|FxJ{fN3V z@kWyEW&4Hm9Q|DO-cwXAI@&)W;`7s~AHHkVyss~mzH;M?#7isYi4UGWT(Pq-$>y<_ zh~e593w9>YGRd(#_%G$$tp62zCeGS1J*?lC`Dfo56?y*8wjVh%V{h%^dV1k`SHH&8 zkAGGw*t7rIH`_Z`efq0Mjt5*4Urq0@`SH=UkY%yzwF<+gv%YiHMVE5*9GiSMQmxU)8em)KRnlYkPk;?UxLyEv_yM?>g0$FSVp&ht=y1joM3J*Ghhv zcGEZLC&!DM*PnFsRn;;7e*MX%EbrBdq>|>f;X78G_OWi<`}e1Ph@-jiC7xr>g+-Mt zoclN}q_d*m&QKSAW>_KqRU`Ci)1`Hpw&&YED*j7dr*%YNoA##%)^&e)9vaR$b8l5% zPu}}EPLAKWJ_Tgu9a?Mfm?#Tw_S!rE`sH5Hw>&KG^pd%II(?Sq4RZu?DFFVCnC zefiHe>2K`)x>tuEOldzfp^%rUfA_B@y?4*1{}=R*jEsN$uE}%XYQK2}C%IQ&Kit_r zQ~#jm`M&`d{=R$lw2Jj@+^@eP+ox^%!s$aM#U(9sthy@-_AE}9#021tAOStIh8sF1_2kxSyFAQSW7d!Csl2) zy)U@F_s9MJ@%8`ze)((ue|5O%_g_o-|NqZ^{LqfcLG5qs$Ng)5SuW6ox+>bTmCnCD47SY&)U@_7Po7bl0j$kkvic#?RVTR>=sPn zk2-5^-B@bTs$BV{^M2ror+KqK9G`Akz+=OHo+bPSU&p79@7ret{gZg?TeC;nrr}&q zvD$;`+FAeDgWhp6H6BV`!goR|UsP&p#q+1jyZ>?~FA6>Px;O54$w86*>sQRb`uEq- z5U1_0{;$5ezHVdaw~e8H0@N?}T?%`dUvpr$Lf^us9qYDjeErtM=d>%&Dv9l--0crt zf>{d!5_|hD8cgZ%7Km?Ksic;8a6uTG)rO0cSQu?Y0!|kFTUaf*IH%^2gcWaTom8Ja zn<;-@kD#0W>0ifdR24XcRzH6(x^R2V{Edlhe2fyjg$m6Z8uw{CmLDiEUr`qGG9%gxU7U$DW!hNGeF(_@Y`ErP@r;pX5srl`-+OD+)N1b}uHa0BT*nhkKx1GFgZyImo ztqKP{Cl;}CR{_SP3Ri}rLi>fK;kUjUCC{6^?@?~j+pJYT-^&SGG={u%*}VJ1Zv77y zZBEPFc-eLvAeooJ;0*u8p6-Y{ z3*N53{#+wu!TiHKD$ftzx$)2U>5^COMH=((f8Brne)*xefN38$2rx4}RAjK>dz)3i zy+Y#9`-%vQ2d{usP49x5xgl(p{7+W@`OvXDU}2$oathm?4pHX+3RU&T|C+t?d-P$y z$__R)u_*2Z`m;LL-TGPk*YWXJ*3Ow_jcIe^+m0|B#O7I^{B(Ioz&EY1!W|!eFW7ce zH`iO|-Y>4DA_`ybN2ut@by%}Jk-Js)oAJX-@eY-WHS5%`|1p|Uwdtfsy72uUIlnHc z$DcpE@%jn{kqOp;U)T(_H|j8*SG{s~{prcEsyozfo_-v-;OHi%P(_zpL0xJa?ew<4 zdf&f$;|bsO*Y-!OKRnCr;H~aI>EEXGvYwsJ=eS%(Qe3rf@n6*lg#+(YgLk^@(fqn` z!MvNh*|Hs3%A{Xfa&TShJv^!M-;FZgBcHx&?XSAR>g{Hi@vi>TYXd6}o8%oALPR@w zSPp%>I~$VlOniIZ&-(u(#hqUZ zSfdnWk0=!^a2<6jO>klnFMbQQZ= z3u_qD{|M8T*}QFsbwdlLe%Q&}5g&f(-h(qxo_S<);7*t)}IetGq|;M(W@wtd)F+SbE|kd z4n^I!4SglQrOEg~pU)+k;6GL`em*8A0aZXSH0 z%~~3J-^_Zq*2&O>Id)u(3Ae6KXEgjhLuMLFi|6@26}GP%^R~AAcp+MUNl@IeC@Wy0 zbT`kk4LdGv;)oWr-go%1b48+#{EdQ1y#g7Rd!DS_lGtdz{mxe@m+l?Y)!+Y6xMgtk z@MoTK#cla-UVT;yQq8;Lac}XbTD9vP8s|b<-r9dPeihjC@84mT)(W4+0so}=86G}7 zF7IpXzt8pBH1QWxj(?2z;#=?kYg^M%`H-0bmLW4&IM(J0-v58ruKG{q;+Nm=ZA!VG zwe3z>%D*j1>wEX?IoR;}_Lud`4x6rJwZENpGLOqqZtDM=uYdS{^}lTElj-IDzj&_o zkGk8(FYM7>AGUf`^#1Afe`b9!Y*LB}6<@<*QNntn!cnTb`>N}c&r(b)_Q*5IAM4<~ z6gh)+(l_ZoC&!)yhq((Rr!cTEb}r!BRIJh8d~Sj9!LLlg8#bBMMC}vPVEOuQ{;EIK z6Q+l3uCKcwAubtg%^*)L7&HsdshpF*0o7y$&|IXQxy;rTdEqX&{`x^_@tNZic zf4+VsUinwP_>WG-sFmWYLo3CB+7~?y`moKdUF^iwD$`%lzi-afT@Y@4YeE+RmfYjpsHS(V){Ewe=K2AkJ)9J7K`j3}(bL9JTP0MbZcQm9gbxO(2 z_cH(f-92WrzQ8Z~-ZFLPX8pBQ+x9y09Ldx?{ox_sqEg1t_*D5rg3}_R4L=8~?~LD( z)A8^bQ$f=!_uSbo0zSs}Joy$4eI0#VE1q#M`Tnd|wGnXg{14oNxBSzE_>&gMeT=^BpP$5Z3(c)mIP zBf9P8^>qiYnqNO)*LAOen`iYr)wmqR)vH}ssZZ$)UwYEkKXT3O-`Dt<_4M_>SH0)C z6&rZm=l_=OKUyBOm##DlCAqCwBFDVtHLw2hMSt5?NQYk&co}PHF)8#HaS^Hx*fAYU;)1=cn%o`P$Tct@ghhdp~X!6vWBJ&)QwN{$7|vwU0e#>huA*170Ho%HnQ@7BaRdfCQm z?Y_yD^7Fs+P02FW4TcPgQ&Mtvo{Z#I|FKoH)gwd9`c(WTotI_&OwkXxZY=E0?XjwQ z@>IsMrqL`+&_qMw{*MaPyi1Igr+OS3UoSb>xZ0u2zVcO5KmgzTGug%(3#TjV?p+>x zTZ4;7?5@TdL#`*r@$2muwiJ8H9qay`p1gFP<5O2<6~mgVsk`M_T{b-VuDvxyKH)+F zN2gWG1cr!Lu`J&%)O)1P%)Mw_IJuaqQul z-Gz5Q%)K*Za{f==U7o+3^Y^F}Cgw`^Tz{y%Q)JU+K69@lte>j>O-O&!=6;oJ>ER0s zP4{m%ls^5G?ITc-J^!?r(BtL@$EIJ4oN{c-EUDS+FE}0YTf48b>5chI`|J-p3!`^! z;xs#R&NXk#{@;JZ-~F^Y9C9howbksh!EU1!|Egwh`Zw!fZr_%xg{7@00?wS?vGLU5 zHH(gUT)%D?f3-x!2mmM?f@_@+?TY&Ykuy$^*}%ZPI9TYdBIoQQdRt5+895$k?`^`Fx3y+4mc z?)xJ6`I&O~pSthN+xV*wG4yaAcUEw(z3Z)z%#gl7AdC6V^nLqS=W$5erXQc6+#35Z z!s`+HKk1^~_9qrhd}XrOwxDKGrq7}B>XV-f7BgPla7o~UeL~;VyNa*4*YAG!=&fo> zd41Clm2=O(-`wGHe?~$ti_k%lW7~OTj$cx(jcM)OFhx+LO+@~Vk#MZ_zb3P^b3#e+ zUs_7iTi%_TRu9l-s&}RA>2Y7I@T9 zc!&PqOJ;w6^{o1HC&pjXzHqm|nRPXGS>8N$%R;~O&YZeD{QARB`(@>dn?KE8*q^(2iG9f3`w>px&pKTG zYFM=KuK&;153hd7?c00EF5>Rj^#|o@l5f-mN^PzZeb@QX>xXh{h}17{xr;i2f^&{< zUnuH2Q=ruS%Z8Act?5(WeTY<4k`_+fQRKGgeC#=i{$}-^i#XI-7P;hrOHk{hj=1_4&YF`*-TQ`WO8O zdG9Bz&giL>vh($K7LkOvz8=}K-!t_u>0W)tnd#%M;#%4yaj7XdX+yTGjr_OIH3z$delRcyGC9tYW;?}NTKdBK>V@9fTeBb4{tvoYAMcvH|KIfeum3-q zdPi;V{iuz%RPuhm|IZo{{+khN=C4DuufD$fJtS(`PU#~pMzvo!TbmLWHga|N zhrQM_W~kpC`#fe#%oV)^jt>v^W~~l9b6xKJkBEi)1>|dX+&cP4kmt8^>4Of<n;WVk{)vP=l443kVO0Ld-Eu|yd#lmvIpz-O01NE(we{4=?Ix{y( zD*S!^>*M90{y0=P2t-&WS52+6ex|VAZ`N*4Jtx=5ZGSHL%=~sRrFQWHvxOJr=WEWm zC$ru1(CwPVUb0Our9uqf8}iD2*>wB%?gK|e*4OYxe~o{zeD^H7ckhm${%Em&wffxf zBFBlbe-#Vs_Nsebln#IOv$l(gGqEG#5kmya!Aa8}Ts{4VWygh!LTX8jN&+k^Ds;Ko z%KMrxGR%C)8gPmI1<#rC3k&QX9+~2B!6^P?;v=rY$yQG<2S~;7$!M`=crkEIoV#0j zQ=Yl2!LJ)>D;QOt9Qu~#bGEno?1%S>D<)e-@82qE&!}gUC%5sPj(uZa(4BKDe*JL{ ztJ%jS;Ipqnj6tx4K`_xaXKG6QLSBJ0uw>NJ4cF6`uhdUfR8b>7-i&V__VKH+{b6UaW(sQ6~ zE#qr0gZb>{Mhrh^Ee&&1c(UoIy+fZID+kL0HKu?XyBvvoYbQrzG|JXT;9zNH(Qs<`L`t0=G_D0p6O6yCt zQ*1Q)ydH4i+0}zvHjg=g#W)<`(N?jsXX^3 z_w)VN)+_zs*7Mt!pP|pYb-(`4xT1!Gk6b4+g?=-ilek_$R8BjK`zoh z&iwDK6etjF`{uETnfmtkx>vk^ZI!hWx^iqmU5$8Hnc4jGi|3ZVKl5|K>6c{-7ECR= zESV9duyy*?_xy)-Ho0XiyYxu(`mgT89(@@JOC&0?j;>KZVE-d8wM+fQt(l?|{=V~2 zR^gOi9{RPdO#1nPJKcwQdsFtl@{hV>N8fn^BxU>H{ zU&)ckoW}lNtMg0K%x;;b*EZX~zAE><_FE@=jdIA{wVq82|A?|Y6L|5vK(U`CrF^wy zy_02Yc=g%oja!sC7Nr^e@KMt|Zp6gAHsA>3ft*eJ*1Bp-Ki26?QMt2i2&rU#gN08v=g5m;yRZpZ&1@Gk!o@)BvdN*+W*M9!rH&Dq~F!a7)_bj zba53why2_By{D4yc?a*)~B~uJpUK* zLq9jb;mrPi&TGBmuMX`sSXss<-}WUW?!J%lzOB8p@1!>G_i^FQn{ODr`*-Tiy+&zg z7TpiD-LgITkD6!x!J9=@A|X2Jy0^A#HT9j{b4@P6h3m_l!p$j4+(M`J@!kBP@NCbT z{CjJUOYTjQa%^87T+i!QlaOp{IxR!B_4_~hvr7ci9+qwVu+-7bI`P=l;}K%l17I?8Tq4%aq93OM}uhnk?Q&lrmq%U^NZ~L*m{BWy#Z|9MtpRWIt7F;P_ zub?LwuGQEoe^h4Q|HMrXPpD*Sd{q3llIKp9Wa+<)GxvUY(9?bWwTi(sQH!-MJD(oj z*luw#>*2KjRy*y*s|3~d*S`DEG0Eudr9J69^8Z=H56s(sFXRy}EJn|HOq3HcKu)yrJ@bbK=G6Ph#hP zN->gM|M%F1g^fm+=jA;3Bi?weDeS9=l2OaCsLwvn5_(o-^ff*bvs&vK>V0Q!*#31L zF?+vRJ$;|Fz1IB3>h&8|&TH%}c+Md#H`l1fPVmQz-`mPguHMgLli-wj*i5E*LtH|G zhqT3k$2t#Wch~w){ylrG&y4s-p@xAmm;P-$_oL)NPe|mvg$)m{ERdP&DDR`NcspOn zN4d$2r>|Z}Un)Ie?x#6bA3vRX+J4T*rdVG6dTbMi$n|ft>et-fF-uZFeWgvnYt9?H z4`$YvYJ>+d$7#uZfA{iZBmd8Xj%+s-{a?SH@GX(o;D=knY4vCU{Rc{60lygd1(jyh zroR7?u=b2#^1{OUr&&!34bG18$7arVe%rP1+njsm_Zio^7IsX2@!a%rPW6^!Ro`_| z&rA_ERC|B5F!ciC6|c;`goWSU1=%Go6EezOzG3#w>fN!wpGP^^zUFV`o|5*`t*+;_ zymREteW}S&D>f~EoO-NT_~eo$+V9ubDBJg(y4^VI-SjK_`K;nMH8IV%r8vDVJUDqgbAT6fk()ho7+8`OFvk{_&IFK~tbP-#<`qvket zUd8Oce_1x@{nKB%rs4HvZV!Eb^TTcHUo|c`{Pz4$oB!wIrKf#~UlFru!-`;|*}Q%_ zYm(RfOP#51GmOm+@vZG5olVjhQner4RYUuM?RT9@-EV>-MnPEqc|5 zo2$!LJTv-rH(Bj>?yU`~FVr?1-&A=bV{6IVtsAA+X7S~}{{NRx_*nYWpC<%oIV{t7 z-+mx@S#djy|0em~#D>Ppi<$Bdz7g5- z^qUW7^D&mQ(UVLPH79huX_2_QQ{eq70pA(AuYU&02%g$);G`1c*Hr20da}L9fA?Wd znfSokMZt5tG=t-my2Tma$lUi{R}%JZ_12fB0;#c0OM=37b)TQy*?wekp7^0V zhH5IZ{hzyc@fg(2%`*tScukV{sl~o>kFY^7D zbFM$X)-BoB*e0xLbNZ{OX5X7LX+K>jM%A9=u$>a^cFgh}cZY?E`kUi_r$+>R+n(;7 z&(nRW+G`sJ&kep1SM>*(Jk}aX$E+T$Ivp2PxA=Z^$jWn>mUcI~yjn_jIDQLxxO%eg z={F72R$I2Q_I#RhQ^YU-iqH|>hkwsYo7Se!5k6u#bz0CLQNfkZn&zoDRTTH^-}5Hj zEGRAI`0-hVHfBNlJ}ileXU{R4tYqeG=r2@wbNbzq!uH&sMLDOZoz%J?-`{gA|M=RY zFaGI>lm>nO`XXXuWWgEb$Cjmz=8r;G+do+!vFdl!)tekrN4c{9-)2?$SZ*vgYn$iv ziEmAphyJK8{481fc+Irmwc^JrW9Mnsem^#+w#!o||JUMMKLt4==GjUFD!ClVm(6sW zX}&(-&aSn019}b@?n%2W=UBV+9p~C*FZV9_`XhT*=ud4O>lXd$yD5#ErhfN0JNbEj zNo3EwpqCFj6}NJ3VSD{NT=`R#={jdQxB1fFet5IV`KUbp_bT`HPurhtIhQ8#th(p_ zQbn1=^_tzkCCdXjyEmjxvdNwBZ~lpm+2@XanD~x8)WCXu>C)-P_I{i;D<~l#Yx6YI z`CPo-zqn#=vCmiY&Y%9#g`aEB?+;TFwO;yu7rj{!rkJv==)97oNK}`|g1&jOWje>- zmZ}G*m6&VqtQFIm9do|(`mSkf4{FZ&aA#+ERnzOebKl>6@~u8T;OcGV$MFUYbFxa! zZa?`K{rT&MrFQLqnIfhOZ#7%A?fZWB?qBNX_BlDf_%F#TIZxPBZ;9{!N!)wCiLJr`P_? zP@M8=>*P1`*Gv?21Ya`+ThvKMpLBiC(qzHO=y2eSaQlkT6>FUP4|V=fI@2W5@5sg9 zY4E^}lgT1_oA<``?|R?unDxc<_0^MkKc;5dR?m(39kXib`(hqrz1iPcC$#8a|FI_~ z*7fL_R=Yq)pDSk14(#R0cH8jAOnmxo+196zmsGxF;#_{VD^Aa1-Jwt4f2VoODXMHz zOlWWjX}r|?=&U@SX{KAWzEF+7>z~B@x-G4dSC)%x(elz*#QY)Q-pWVrTu-;L#}=(< zkGeHG`={m+Kb6Hr>01k5o|*73(`w<~u&Q!BU4!Nu4{io22YK`@epBj>v^*3 z^e)UzsOp}X0y-kLUBH)j!$FDrJd=V^J-4K zlzjTbPGe4h9{1&qR=&|K3<7-x4h((Yr-uG4DHKK}pyk{srB zEXpQJ*Ps2zeS)*2LQ3|@+3)3zW!&F)w6!i1l+tjRyu_w>pC8NRJYW_PhF3uOnBSo~o~l_YR6bS-xk*qQ(Fxv*MbH_@_IhFWgwByg#@% z?TVu#`ziT@Y15b(bMyC$H#M(boVrEjm+KYLlTsUMa-OYRUiO3Qn4|dX)jNN`tt;DE zx8`qrqn&?@>el3)x>21^|7|R&{q1yeM|w#@S#~&607I!%mcKu%%)SZNql+u+YUTti z3|9&(-W)UkiqqTgkuIN3X4IFfyn45F{q_s9LMS{MEN!~Ww6JGyrlWJQ?p+g2Q} z*~jbD(4r&2yTHlh+}q?+vr85T+OTezb5MAeneid12C3Hclp^6?0X7wp*~XW*Jl;|l zUml}j+ULo)aCYh<`3_f7m4fBO`3=z|3rP{?P#r*A?8JIiJ&2Fhoho%q5qeOv*sw*Ets;5pUHXMj|a;xE5zTlm8}uzQ2BY~(YMy(47Jn?IsYa$ z#(zA;Q8dZoX#ApY7p}d3_HO0GU3;s3OtroIbbZ_ZLEWmtY>Unv zotYMkbtD|xW_{~x^e>;1&=hwx_|W31wagEH7|an6`@}ZkIS*?-OYeG{`op&$&f2hK z`6lyyEw|35#VfI`U7+uLYR>VNPY>%GOqto1B>b`c#gcVxdyeMHym(Jb&R-(udP*-n z-(l?Ix_^Do!_S?`mJ(`Rn~s{il@{U+nfXO8^Va*2b20Vmu}tSUco(;&{!pl^GM(qG z5dGtZzGBYP-=)V+l)WqDxw-2+$A-0bK631P+Aqu8v*Nt+@Wg}b_B#va%j$_;c6EC+izX3o4TLT%l^P8`$_@Dg*TH#+HAB04P}2FadK2#6sET^ z=82#)A8Sj}H}0M@ZrApH?A^TcRmI=OLCHxy8)S_>ls;RNoFp7&Ve&q~rtNy5RsHAg z7k|(Ht?pf5a7d@&_TfU-2#2dp$D?9wC-B{=xFz6ZnpSxvXHy8bOO8R-%5!H*5_x|2 zR7L%hRWla;%_#Ij;-b`rCWo#4dFr?HpZrn&&Z?@I%;PEQrlY*G)8pUW=YKT3yk&Mq z-u*fG{-&v`Dr@cL7+x-yTIxHuTX4S4{{02pI%i}p+SswwbMD5;=Bp-doOeii_P_t( zS8hgx95j!cmZYbh(HiUhLQ%-JY(hDU^TDgRyH7QKoVxVNWs8RGXFE(U1*{X@kRf?y zMU9(BpWfp80peQ=|2YZXy#LH;$79p8@m{}f+i2hMC|z8rvh_!q{l zuUmgN>i@2<@BO~a6x)4GU7(dQ?PYwjlieaO4Yiq8 zaYAFG-hneuL-psqsGc2S<0rv4QS*4ah~fof*WT){`3w7`H!N{6*vtR8QstQx$6kR4 z!jt}4aquVg)N=p#j>uc$_ayxI4{i2KUC-vb1^<6{aj#SA?o%C7w-}DUd3?_EZ@Di^ zPU+N3Q@rw`Pna97d#``ue(&B}$91-KKld;*-RpJM`c+kpYPrLUF#R9N=bxMB7Hi98 z{O@j$-@!9;NmlHsz4yK=^JFdNNIhP;BKu(9`DD)CnDr;QD{iJ$m`>sA`lQX%FWvTY zZvDFY^z~~dER1ozzG(AB?Iymx_rLx;raMP1>2JNal0wPjLnlrjNWbr#ymdiDU5a?< z-Tz&E^|PvWKiE9|qL4g~mz2xHPLAJeL=1J=3N5e7O(-}Uy>fn?3u{}-`X`@$YdWWv zmy12zlF0hz-;Vdnb1iMVVhi5uMQfGH2;9B#C+B|nyR%h-zT9WkbAPq<&z$j!b+`GS zUwdqFmIyz%Q*HPrxn#qe%c83`D2po1iku&26ts2E{CQ?w9LlS{*p*D1d|&@?kD`qA zTZ71;x38rSNc`x(di2BA7x|CPkFja4=6&ncG0*+Pbk68j9#PHqiA}*1K5?gr{&l&i z{iXRIfBKV?f9BXei}wG|m&G58znaQ>-8B5em9Vuw zK41R+*>WfHPko)2TJ7r9pDJz3zW%Z2@!Xz~pvK?%=%^a|(W!HGv#6{1ai+N}*~I_t zk=jF#$|}=WCQE-DI9kNSvX=i2M~HV!*lO8c9<~J~2c$MQxV0YmV%i^ia_XVjZLF8} zZK^$eTO-fHq=HTH=))}q0Z+7QrS=Rk)o0b3Jox|c{lY)H(^&jdF zrWa^lmee~lHO=s^&~o()ns*-=9((mrVY!k{Ghct~H*txtl2>-z)#aMG=EK}iP9C0b zyC2=ic(i8ToOQ07c@|q$Uj1^|@Xx|hvA@>af3I|Gn7yr`i-+-*`~8+JTx>=v^>4&# zRB!J$Js@#(&ZN?Pf$XxUjb1)J{6+4xZqq`&_k8NJ zo$ndRZrpuaTH`~!_0-qOoBiTAI;@y~KmT@UZH@nm{P};yCj70HIXSK6QR?rq>j{f@ z-}_kk&-2vVm71TbxBgIhzVz?E!)|OAHZ7$&t4|6QY+JjJ^QGm{fZKmxCa!VI72E&w zC_8_>ctYWmRSx3**+tf1Jw|pZX+=YB9QOIl50hM9t*?gQtnTnP*J)JNvR~YhMod zb^4RXF%2V%VCVu*Sl1CxM*jQ)f=9eD-W+xsK?Z{Zl zFJbmuGw#ZDef{a@eBb8W%)GdA_jIGlOQs#FnUu+rRa8CUdPQ7C-=0&oi(lMOaG1HL zO#Ww^GLvKGyd@D4~JPGi+Az(e%*;j8)h|Bs=@ngLkePlKj{N1>bWx?9{ z2U#~Lyo>={i{}q_m(8fy!Y6$ZT|aL=9043 zj&j^rE7!}q?eMyMXUG2n|83fp`G>EVXYs7hZZE(8vQFvK#_b<>#T_zQ%Wo}S^mIwE z^;edrT$5fs|M_``o*fo0kNE%Z!(pC-2mdCs1^sY49>(dWU(UUC?XN#gFVai3U!AWm ztMoBG))rp0R>AJxO4BkUCFTo}PZr82r~eb^3D}!^;k=%>+^%^qUfDjl?_4f+takcO zC)S5c?F1gKte+!)I7ssy*B=ixT~qhWjkBuy)SmuE!PRX zaAK7WEWh)^{nO3tDCb~EoO8t&=E{b4cMN?_x|#}PewC@u1$Xa zi1Vr1k=Uy~*B?D?wr!T`hAaCg zwWx9B{n=7A_vvz5#pe|pduG1MPdRCNt-qsroBYww^%nkJOFeGX>@!gdTm1gw#M!Be6qwq!v03c-FuR$v;7V~ykA+ry&&>%{H6>V5$7>Utp3mt;mityR9F=i@A}hwcFkv3&gGXW_j#WwDn*J)R-~^|`Z+EXMS{!mu!vEa*wf~QX2DGVaoK`(;ADbN?(IhbI)yG=Xjm#rOt$jO6PApT`stJrP8P6v9Ht&wqL!& zaxl>|e3OId?0pBdD)twy3Kr@H+=#c!+eNWZPrfAY~IrhVMZu6MT= zIVbv-%3Gh{XER>oyl2m}=II{IYwv#h__~m*BH>kKf>g)rS5fC%bu3;w=>C_wvcrN= zN@PNg2RAR%!G;YFbY5_?9!hkWa7ORe{q5&0&e=$<4D!9}DlxgUYajc8`u~69{PrC@ zoWF6OVrAY_yZ4_jtADZkomS!X*^awjp7%?8&Nch;rQH`Mi9O&u@wevrRX6U?b-eNE zu43EY+J@ddzv=3RO8;E*V6(=3+zp<-Fa4ITtLmE8+;Al|^p%-$|Eb@eY-#4p7Ed}; z8ZY5>Y0B~qjAhr^*jv|^{W{(7``bRTfITm^3fC3-uQ|-QcHT*?rxn-QOL&&e?rL#l zs1muW@aE_Kr^Ou$$~|n3cg24&O)O5XphLWhTwFt^(FeYRiM{rNFfCpOgM^eX+x z)DQL-|G$5If9b{fzY|ZcmFu%jpVoIjeSOQkg1cwFRqwgszx}X$T6M|IjYX?l{-o@C z@cHkGSwFY!v|TZ0_t|F>Yx`Z>m*}1O`?^7V_orOTl^(U)#TOR3o#xXL$b6MG^X19E z#^;W|d}C7g_iu1~j!e*9{~vyv&KfF4Br&bta*=Ch`emk$KbPZ*@@Bl(T(fU=iRou% zpLc9ugCfrrt|)Roe%W?U7ZIzyuvB9@b{2M283uF(JVY$jK-iEr;&Kh87>y3X99i z)|eV-URjeA`l9A>NL~N0(knwyyEC)%Vvsxw|nlIX!iRi>1uz@$G%clD~cA_krn>!`~P!)x+lM%e^ULo z>;GM^;`j5t{!%}+O?Cg#pZeEM`852=YTH-rzCLfiq2O|>PoV;9noJTUI$a*7et9V| zZ=cNa>bNLZM$`XQcay*6Zz-v}cly}|1{b|KGi=;1elw1m>(-l=DspvlWJ-c$`ad>WF0K8p$hy_M`MR*2?y~N* zHCH}eb&PRiD^*(&vs?GE^s#n{g|D;I^n!msjN1NIX8W&KqLuSAsx9Vz*N8WJ!2b1j z-Mir2e<$8KKA%yN?!a*W`2?o>GO9Y8=DSv1TcvT|fx%uicACZBFXHFtGI5*~n_#$3 z{zyucnfS8ZleY%?CaO!CCNB=YaNRXc! zywbwYcool-vmKZ2H%+i^nLJx=MY7Pmyaj>V3c0l=_ul>g>=pC8MFHFQpSmnxb^HHv zPR(Bqk`j`Z`7@-x&bw^(;N+c$J*T%N@D!yoYNka_xmgvny{wOb-(s zZWxG3v34~k9EkAH;ATDKXfVOWO;qu;)T)!7ieX)=@5W}usxv=-zv91)^pCf9cb&1~ z-x=~XIP`ySs7ilGbDiI8Zz-3Zf4*=pbuth95qWpjnWFhI#k=*S%%93Q#jQFNweB!q z-*>Tle+-RsbWCn6wyosb&am^H%-i=9>Vi9;{)n6r$Z+4Q&P*;#Gt(i7VY#qG=nf0h z;*i3czmuBcN~eiQ{`O_)e7!gKme7w+a-Uy0+^>E8bNh#lSMwFT7wmhk*89*wsX{~a zz0cw4(_=&YP5$g(6dM+!a?P;vh}XTS33|V>_dR@jDM0P_C9hrVr{!4#a&F(e{;{_1 zf&~KuPrTzS8Fnt#@=Ibz@7I1!G7m55`u~4lhHujRYg@0a|F~ai_x}C=e}&hi7e(!A zE17)j#=ApY5~)4yl@odOW^R)xyk`G9_wDu@-a+?YyjUEONCYA4Y-ZiW2_cANDzs_8>{E>4~TXAZ3l`a3aO$&~lw8_}) z`Dk;}w4$FAeN?WPy?%Astj3|hp-?&EW5UODzbTWJ7$vVk*mV(aztl$_+18XG)y z0@S{XajLd9d1l%@Pn2Ra=v24WojrB3;?w4R(tZnOo%WrjF2TI6`17nVsniL_{_dEx zYgWXkTBG%oi`HmtYCQNt-tAt+>otbbJC8?~GDe8b;CSqRRjlQtw9b(wwLYwJ4?Xrj zzfeC%A@GL6H=R_qRn}|ftbTnq{=VFjUtcS$qD`7M8_Z~xtPs0j&A=aZn=yQ!5##BD zPs~p{?q``P5OObst$Yga>!fM#*K)rpE8^)|)w?=of0)X?cfVtn*2Pbp5Y&~W&#_+S zjs78y-lK2-Tl1%dx;s5h;A(C%muV1h73*WSWNbYD>|#%)%|iv1hfOSOH-9g7o8)S~ z=+WyQ1#XdDncI)&C0cc#GVSxZt9vCi{Qf0o4FQqiXRg`@>d;$>~_s{=wcRzc7>-ns!`b#=4 zR{N<7r6Dmwf%5)MYu5Hd=YUc4%f@u}w#|BxZz-KYs-3QbrACGJZN*DDv z*y=v(@@%`Dj7`5iE2r<2WO<&zF?}u1ouh^OB$KZTTF%RzeyXXhCM(NnVaa*6(zeDh z`P7T|(_YrAedy(%{(5ShWnuC@lj)`2Syx|+zrK2AeRzbOda z8&5N9keP{(9Al%YPXm+sWvK(#zAmgwO4zxpy8q9-`1&Z0uYb$--uk_>Ht6bg^H={@ z6@Jt@`fJN;F0R7GJMM4%)*q1kR8)oYPQSvo*9?;Wm1!KMD;LOD?n>^om(fcW^zvEo zwBUokf7&nmo_h|Zj28~@q^6xTjTZfMxw>{^?4M`sZyH!1j3wT_f@if2A1-?%Wxeu}XTmyezYMvT58r*cbhMT4RdN@b!SVRe z`wNZs$F1|{tX^XpHcd5dNtVpMzfphVzh-^#+&f#o_2d?l(}!YtyI!8wpZ>j+C-DhG z(6N*LhTA7<+txa~+x~CC?9f;Cz4x0I^4xuu(z@lG1;4(;ajT%uPK%%SSNw4BGI<#n z>i+j@hrqAk1;-=pj@R5M`v0SPahtuX<83{=t2LHt{nvgtSx>0q&0pDEucq}dW%Z}K zY&Yi0?>qZZw74!q|M}Fr%?Ve^;LUo%hOLTFd( z|6^avW!vtQJKnS1%aqc{dA({*L&Mcm2U%VS8(yw`$nZy};_GRz+0Xy4ia)=rE9GRp z{=9!2oTpb5{wgWnwC|jO*|PZi6E+-of1fYjaCc){vi!;$o_mWD_Z^c~yYx8sYyHL2 z-)e?$Q||uj4lU$6(!9Z6JoM!B_l!ou_f`Bpd^ljud-_9=u2N*sb zU|4g)@xX))oc;m3eLJ3Cn3)``^_=T|Yia{yg_+}pFU(>8`Vukt`bu=J;ghd-ydZHdvQDSZ>2H(Sje4%>x`s3S0~e{V zTx|dK+U3puKPPsl{{0*IlGj(i3y9$`KawW6YWal{RDQ`1NCYey;gH&J|qpE^YVz$u~U~ zF|bJxi40%$;bJUvc|bqgzgyY=7OdIv_th%-^!5J_D*Cr&ZE&dma6EKRslmS2>7TC} z|Noa%RCCdLRjgBh?En3375A^{A1;yjWw%nz*!=&Y?|Ydicq~`9nMeF#vQsNrC9DB$niqS^QP&$Cx^PP=HCAl~ho zI^|gN)_U7tj#EPNg8y@KJwC+TB-hLsU$rqcYR&(@vd6C`)kp95z1ySe zF|#mw+3SOf%e~9F&&N9yYiOOmSpBKu6@OEy`d@8-#2vW1 zRiWYK{%Ohwa*~|BKWI3-%z|a9()JS_-wz*PJ-l{0zgB0^TGcS4nX8g|LVL5eZ94bA zQt}{539vaFWwXyCJ zf^mH^482CiA4ECq=3Lf_yHYAX^L|F|lD^L1c~_q;yC0J}`P&^yzlUk0as9iOvY^=Ee4zQ>$K8dIKJmYKb5+n!}1o7-M7Y(3U0P`U8H&ePvs zUHUV*{>Hi253FDMUiRiK*&DHu=O*9weva!;fBencZu`E=}M|=KeB!~jrF0p_J#(A+=XV}Z||+0xwY!^gM@RU_pAFBq-@O0bxO(Yd+Ycv|_SS_Q{u2AMzjQszld?=5rl8G9 z{{6wV+$IaBh=_l1DpitNH0?8ApDWMqqlek-_ozxO|6Y1wPm_kp?3;YZ>s&Xtg6BIFGQM0q^Sb5S!E%-Zh8kr8mu>gBqzgPO_@SMbcj&4_R&|qMrbO->3e3ujc-% zE>l?c*3D?=?w!aInRi1si{bwT*ZD>|?X$dWs&@ole&PLZ)2lswi?9COX=9OD9CqX6 z7X2SNTgtD_nj7X_CR88hu|LT4N5V`yr|=MsTCacesyh@?=N|vgy!qUA8+YMe^~apt z*S2l@-PPT9L~r4-=xeWo-rLrzPLECfvE=T;zsvL3{-kc$Y~%gu*R4Y`J8qukY}Zx_ z`eb=%^@)RM+1ScZJI6x%bc~vYnOy6s9ro5U`K~kyuS;U$?A2g0I@%Je zr1j)ZTEQXFlDhW2@%8ezv^`(9XRrIuqL%UV=% z&ntNUhtLPt!_-s%UjLJ@LVoE}qeCCEW=56<)II$i;(I8@*-s?LTK_C-jrprhhZ~E# z`lmP67>Ou;xW9P9gDVyXuD{vP@G5)G7A_TTnZ@FBuJbUj&=TZb6tM4&)Ti}6|0Z9E z-5Z#-bYkW4Q}O@Thlfv?y))_6>8tVLlP`bp{NpZ>eN$*cWnH7fLB}vjIf2{_l^-@X zWNpidJh_Nn^@9nUvCW~Yb)3@}7l6&<2xGmay<4lUB zZ;I?w&lys?7M%Nbb?@FR77cfGpSep@v!5^X&uo9r9ChH3DDz3?J^5yr8QrfQdh4Bi zK%@036G{~FxZyf5!@nhMC|SM$wwFk8pSx@vr49HE~s7FXSSPR1EYeWj?0Jd z@3hY{%3rzq^GM4U$FL7mTyGyzvb%ZQ`_$j+tvhZ}pKW9uHmTK3emB z+3F9WYoqLRRy|{CXa3o=Yjx(Dh$H38bdz62E&X8hScRdFkNeje_9x5s@P4q4}8aRx{Jvi%u6ALMP1 zIb1CuhihD!N>w z;=%GZBkk28hZz}`<|5lfU2b%zv0T~X>s;>n|6-m0kw5+~;$v5TzqRrH*dvn(;t6kKmDmb|LUDh zzrM`dShu%4ZtuIjaeJp9KX0NgVa3VuFx|EEKJ(rO+fCkn`XnGaD>yUZ3Tu+qN{@e? zYs?Ho`<$b0WmqL7nB;7{>rjySP(u?d}4IZ}q-#vYI z?p&_<%rX8>YRH$_#%dAX!ZN3RmH2M&@Z+kU_2i4-`|I&q--SQ@TK#C+->AP?zLN3J zPCr~I^}lz9fk5ZsuJp?eX6{Sk8oN8>cAx2AuaYTp;_y5DAD1NE6gb5%H!?CFKF?n- z8YVjLSo#0!r}r(KBs6XL`oB*<`%Ti8@KxGmZG7n32DvrY4&8rR{z}d2Ld~>0=Brtr z&Ak(MVXkN0k?+yvN7q(eU+tOo_wJwg?D%=lJu>J26+CzLdGf=`Psi0?+wiUbSzFJ` zku3P^&nlb#|A(D!7<`Lff7zofzUsl2#mx6_MDNXx(Nef`<#d`vo1tpptVu6QJHGwj zX5V*QCE*JHo5&Bh6PkTCZF>4S)aclcjKi1D9kwjHb>yJ#*UV`yrwe}9sRsR#d{gu2 zjpSxCNlot!)eq(`X5A5_d+B!W?LEFL{ygka&W>zwB3W<<+Wn9hF;8-F_z- zc66cptXZi`_gTa#o@H$3<`ZJdxmsfOO?-j0;YP&^(p**?p$-?<+s=DocAWLx$+Rwq z11I0#O_XNb@aB#CIeTlfe;i)E%}U z^TU?y!aRvOr(W#ek}%!sUzU1IAIBfl+k{?$tF_y04tDi>(BnKpc6a^ZM%V)css6|5Ef3FeJ6IYXll#I-c|G{xN5 zq-;L{8VjrPJ?^ge z+h%1^m~k**=8AgNw{piy&A;4@RJeNi{v_+f$n-61mwx-ll{jOAcbg33t!=J#eJp%N z5%q0{wyH~~EM1$O7#DVvdFzqIC(lW|;{9}M`u{EAD|vn{eH6BD+WwezU+NoqnvQ6# zdbNE*L%7|BbWs7|y(|C7G($rt6oNUwz+9?hh{MwV&$3cf`$Z{#RhZ z{fm#GsAq%Z?S94eW&abE^B(P3`%h!l`A`2R&zir!OlP}ZTut2{&)r)$3!e6l7k;|% z^Ofzt^pkdNUElfb`ugeno7XcZ1k9Lj^*!xsJ|n|NmhvMzRL?)Od{bL_?;3 z9M679?_SRtCEe%Ef9~*q)%(A9-+wr1&r97eD(N?O9}#-xBl~sFRQvu*mRdRL)uKG% zVS=mWdvA(OWqJ4DU(Bo3Ngg?uzw7<7oE zOPQoz2k@5f6l~e0Cw4Z-dB2RbL~NwtBNaTolp9$aMX7RXwZ3j~RbV+By!s39OI0I)BmjN0#|IGdFgXL{}a4<+$r%L|EZUv`vNuy zPCgLNI`Q+d6(Vy?&g`iDq4P#ZXc4o!PxP}OqaD+~U*?#6xM=3>eBM)B4^)&Lws!Sj z*>_91%-}%T7L(%<50Y7)ebSlq#&>DHTtHFLKE}3ZAMYRAH2K7fSwael-}0_J4m#6s zoot!!^K&2jtXa2=xGz8aXZld>&hZ`Jm2M=gdi)^7Q*1)ejZew{kKPNA{`0~proKRa z!Hqehfv>LLblY^|Uz~09+A9CJ{VYL`;^Uhnb@la5x!3+WTGuWA;kbm>0^a*=e+@5} zfAG91>9U_y)_iMO_L?Qt|D?q4UbVUX{fo`z^)bo3+Ke*Cw&@*_&|o&RIN@H}wf)`A zpa!;z3+;%&`Wj~ z)E7HG|GE2L-uz_scPEOcX`T4Nn_HEb79H69HQFVa@8R~ymJPy>_B$P1=;|aZ4`1$cShc{C{m~FoK#)M6yG3v4s_oPYN*WF&k%p_8}eKVUmf5KMb z+4Wb_y+ktqw>;Z@Z|D5@-|FQre3smvd{g%SENemWA4>0zuRkA?A;o?1|M~PMU6=Fk z91Y9)AN2gig#5eD&c*BBt*-t*;f24#-mk1jJQ=4=FPhz{P%3}--BVAdW7hpW4xKX! z&dt8<|NPyVgEd!!W9w(mRlLzPNtk8-eG6u_3yh5dmkutp?S4 zu|)+B?2k{hJ@xp*e8!Du_Jr0bvWVAsH?hsIePDlVe)n(J+uylT+kRFB-w~;3DA-;5 zx0UZ;*VXlh3ROa`v)vWrNPhL!Re*^_L15MK8517N6Q9q#Dr%<-_dDwe^SpBQ1v341 zo3+N}XvSO9lTS?V?Fsy}B4^_2oBF%!UH1Lw`tCAk?)4NIzvaJXT;F&<(%A6y*Z&q_ z=PV)$S9*KT59_SI`$$z!@#cKX$Lfz~dr!XYxjBqa$G^O|N6z2T^CQ5zhv}O!$wsV{% z%kITmR3(1*lyUjbr7x3qX~ykMnzs6D_sh7oNzcFkO`iI?wd%^WtzXv1uAKEfc=_w= zO&ls8`PU|tG#0k8sAwPhT4_-tw#rkNuwCYioTTZ(oL38yc~O9hUtC`-^1@6{9>EqaXT5rzQ+e@AlgkF|X{M9VEA})ia%%4-^EMMve z%Q@z3&so>xD_(az%o?44Hu#R#G7G6Miw{ijJndk-aC5*!uX`_9)#eoa&=nBoWI3An z>ukcjIQM10n%8{{t+wwHy3L^KaaW_{>>f2CHW^O)x%q#WwS3ro`psAA3T=L-{i4nG zyJj`+d#AeDj`Q%rPmkF8_c-6Ho|s_z@KsoF&k|>+o$jtHXTAS={q^+X*jSg zI%`KPKmGsI`iH{o3vPY$FBkHA_p@QOX!aj5$3WJk?d98xZtX9Sn4eVm66R^PpQ_u9c%a*o%z_Py}-d>!%hYiv>I zJ?)ySubxcXwfcwiqyC6s{XhH5f7a{}*i?E=L}2w@&a0sTOXBadXZTmIbb27fcE%<^D`BAfYweuYVX^WP`m)H>^As&2d5tCqi* zWmDVLZ}G`0+#ly|;dI-#z;MpfePz3MKi;UnCVuh-w{=gyuH&fQrxtxgx9a*44}bfU zA1qnBlnbo$+5Q@IG`!<#-*~~twmqw-=V1D3y(K@-<(IOUef_<&mL=zJ{O+&sR+gx> z>#hpDvTy6J{jY9K{ZucutNKI$F0rT(LfOZ&Z^)i~1zD?;2%a`en>bYbqB%?0lu>S?jcvU2f@} z?kTA)9VPoe6|4Qfwvs83Ys#dxi=X_8^2peD`cA$778my2XXdUnTjKIFYt|K$8cmy- ziw~4Z|8+C%+7rZd>56H>j~5vt=N~<3h;#a$@cPr4^##)|3tayuc!YK94}(`xtNL$g zX4t9kt+Fjw72101>8+=Ad(D^_C1&iH7pKRb7G$^fQ{#=i4|%ez2kwN1hUYNnFTC!5 z(6CUQM?pa)&Y|~q;Xkbp(+(+leeut4dKa^H%Jvug{uqR8RX_UulDppN-@96RJm$^k z>DjV>dc_UFd(TXbHrJXntT}XX;~BOr-Ua^^H9c1dY+6#U6Q8}}&5i&EE%Bg?THen~ z|Jr_u*yM0GG5`7NJA0R{yChTL@ZEdEPSJm28KYPxlZ?JxH@o#0Jp5nnv zVejt0W=}MzlHfbi@L;iB`lFdY47g;^a7-u==&M%#+F*A3LiOZtr>8{7wXrx}eh{-^ zTE%-g?t^X<{%`JX{1QGCwx)o)tvVK<|`BL z?!UPYgPavqB^&K0XCB#SGu_{R_53HXC&Dk(3kKcunfTG{;sx9HO*1|8j0Gz{D8`EY z=Ct?zqkYF?%gX%1N0;VS{tk)874`f5)Kz*Z1|NTC@Kc zpD{GL^mAM7Pw(?jc0F6@ZTfu$U*p@WRg*4C++$i%p~KOy+$po;g&Ze?jP2n&>Raz| zIo;@dz!%T>YM)(l`2G4t-}7v>Sl3_7{uo)ms$+WEcIVD>t>F6 zO&06*vKE$X`D6M1me;o$1$|DbKYG@M`FShrN6W3%x5747oQSPyI{P?)O_f z4wuZoz0SYr*I8Nf0yEti+xN_$e?djy>=O=^{L}Nzd0IWIS5+O4t@!_X`hy9cZ+@9c zDcsujWIs<-;>o%GS05f)xFB+M)YDT_uKaH@jLUj+`~QTO!PB3MKK=UVX!+gM_O`im zb?U#qKasxwVbBx@Uv}gBA{jFLs}$Q;vPsl$*gm(mQAX~v*Pf)8Yx%?f9iGG@yrbf% zms#iY;NPa^|Fr$$^*5gUTN@CueUtjPU$5@}<4BnPW1;q4W`isKYrk>|DEdr!89l`) zgmazU&sPDjpPkxpOQqlG$0mjLr&S;Kd%v&KQvSB>Zl1`or3TMrSNq#;bh+imW!4=s zy?LLP_sfaK(_|ISHe5exo)CFWyGCVqzu(+v<*IvrbKiB9TRH!~`i@Ca6^A~2nj9y(*gyQpsQJ0gWOLh^67_Njf@ zdi|l7oUyZ-;rAD(EB~)O-=q|Crsyws;zu`^_fqC10&i`Zf|%6R@0g^z?@CJZ-@W#r z|M%86hXoDSPv3v@x$DQH`*p3148O&S*S>a0e=_$h@5bbc`qigj z$E*@;Ir3ZXVdDA5mJ`2KQad(Y-WP8zA)GS9(Yy6nXUMH*|Mnd%`mtd4k24ZV2CuGr zY0lt#Tm2#Y^jj-Cfg_zf!rZ#Ce>lIswmdttX7iJvyZ;{j-uGx?8x@`ZkX?x8@W8>2Od5{0yZ=5)7 z`jwSe+?IW?i?W%&XGWt~a<@q4x$Jo--)k#6ZEJY;Fplxms`N$Ie{5>E(Nfo0JO5UR zbYXXR^WWD=AD@)+cFfu_tJ~#3#;1OnKNH$bMIJ8rJ}py)w_YTE>5rbW!qU*)sW16r z-tuU)^*+omY`hn!!##J0RM+-~+|?Tj@1(Bzx%p-RPt+@C@12v@u8GXlNZXXfee~|n zfL*hN($w;I! zs^Us-^|JQs=G8lzy-nI5=bO5^^2T0`-Kkxn#$Qv$G`~Z>=ymw1wyU|W_g}nxWZyJPzr~Bw zFYyu6BBtZ#J``1O{a?+RYwkmn7`}G@BjbSy=tn^{-}3)@0pGHBz6|upR}@c|FicWbMK1GEI6_u zKkGJEYvS9)nyb!M;=U3Ro{Z0K%RD*$O?O&vd-e~e1*}P@r{*-j{mkjM|Niacg|S`t zcOAXkbvLtYe;SUifldmG=ASYe7Nn=mAU>g zf9*vsvIe};H=Mk!`s20_4L4V&Cue!Q^Vaxau|wK0=d0r5<-M*k-4f|immALWvf42$ zd}DF`$C4}G=U8x7&p7{-^+W52Lg_wJgFvA?d&+pc$|Q)ln` z{H{OlPwzTRay)ak{C`IEAFT~F+L2uwy-O<=Wp_@#w)&c8ZR*{RQ>RCWH90!8L>Vgw z+%!=9d*?$}jKdPOb-yE&Z#JEnX203sf<%F8R`j7N$%g)_eqOVbQ?EQq-_2WCTy$;E z?|rOm1N@hME&cRwd)d}dy?C$Zt9Jf6_G88>rA;;5jofukO$QGaFf5a0^pRs=k+9$v zaQ!LlV$S!2A;U(EpHD2|yJ&vOkr_2-*x!7K?SJq@;p#POr|I)H)~R={ceD)Gxwgi; z_8TYv@!G;}Rz}|)+RPaxLJWM(2S0VBF;+*i&PjMw^dxM`98ukypI=n(mo_-4$a=tH zpXP2cpR6tl~1qGxYAZ z1nBI1F|qXgwC;(9+l87x-nTlvBr1hBT4ip@YmP_p!uLa8#9WV3iC_0$I3#NCx%GF} zGBhv=*z!le7izd*f8d0D1XrR{n&9p`Y!iM6HFSvxty-lA0{Vf4g^CA4-Q2xxe{AB< zn8cv{j8`PjZrve)N44`392ZQRD7Y+n!879xp>tekUs_OlW7*_z{wF)8Tg}Ph;4j|b zdzCpVi^Eg0?s=k7|7`CEKMUXQ6`NQqZ5RLRU|8nVEjO$T7e3hakhiMu&zy^1e>P8y zn#OMWbzfebNNSB$;eR#v_bNSclYbXBhrKY)|KlhVt-5rPSt{4D(=FNVcaL634l31L zG;8MnFI78Vg;sBU_kP;S{pO)7GD6sY*eR;t*qd~Wu|}rfU~#=rfk*LlZQhNC8g;_$ z+YTiv8jEen66S6?xYheW=(;M0EQj_jHk=IX3|+UGWVqIu#Ld5YAVWf6&8wsvT)Mg~ zcXEwhCd)opA{bGn!64Vjv4qjbWMa7erYcQ~LuR!N4T}=H7=O(<+Pd^pb?p;jr(OB# z_urZCaF9snVPg!mc<_3UQlf|Z<;d@Dtlf2hqpIFIG!ml$@1usTztN6=~?|hw%g}y6Zcm5a_g$>sx4Ca z^S=u{n7%Nzzh{Z_8r5L&7aI#}{ImWFtgZaNHulOP=??;m&BCFRKdF?k{oLrJxAXIw z5B5#!h0*Tk9%k6q%-O?sTJl`OIR@q(LQO7`IS=Nw>UQ(Z?f4hJYL7wyb8q&7|91Uf z8Fwv+|6F^zN9VV#w3Ot%q`U1m^P6(6oW6d;V8`)>sa3^?+i&+x`u+Qtses|kdWR;l zkdMcX-gw2n>F8O>M`!O9PT{z0oBvPc_`j9w{N=yYY^@dl;jBN2b?NSRXZuzy7MgPX z)s9(TuOzGtn_$5xZE*YEl7@~F%bxi&I3Dg;A|iOGY0-@a#>f8(80`MMx)yfwl?3b5 zGP!vSC#Rj5z@X(Bv!r#-F57o&Uklm)7AU;dYTfBlWwU*r;?7<5E#|wQ8wNh&HvUnb z>+4W~#0f<~M{=_CIKQm3Di++V>x7x)&onu3UQQ|0;#E>rt@=4_NTyzc*D#~hC6U)EP{hVDCP zKF#i!vGH%$Z#F8G`Y!}36f@4RX)=prc(?z}#`}*x$678vaxl2Yq?288f1+VX`SJ4m zJJc=C*O~CG-+6^I%i{FJSz57TOTXs5zWTazV&?i)+Vj`fMZKO{@TNtA)hqP<=i=1L z<)6MtZJt_pT*hE;+@25?*}qHAZ`SOq|DmVs-P`EDqq^kkjBD}x3nHH!yw99$^|{3F z__>Odg|SNvTKgxw?E0KAiM=9DG1-|#>MS=$+)6{iZ>oQ`%n?4e_B8jhs?A}k=0`8d z?cHs4%`4f5JMI1kEp3;7nZK=H#CGkP7PS2Kqu}>y@b^P~o$K}a(pPUzq|M+dmx&NP!%%tk+;k^r2@Xlmy`hH@2sT8xat=r=Bi+);p zh`83Ax&4f%c>n3IyV_KGo28ev{4Pmmt5Fr%pP&4TmG`_^UvpAzfplH`m2d0TX6?+p}-&D=trWMb;mz(Mgvb z+(VpVQX-L9gBaurLI+p{~{=D=$SrcHicK7>_A0ygrm+bdc z+sO9GxPOOT-5EjCt~0jB%FC^1n+vD!=b!I%Ox80^;XP*u()fYIW_e$pI)kwsBuPxz`4}bN+ zMNP20q})1XlgZWGQkH-9EXQ8Foqjd;kedFLBRV}()E*cv+Pv&>WcWvG?HwoICUCKS z&lJhHf9*-w-$~#9xIa8LW$%@LR(^%=q&9WloRrD8&3zUppJmPF@_qhySTeLfJ`p~g z@SE@1qEEpO6dE1QskfbSJ3FIXNAixy-((I=RxY2I*g|!d4~`4da#A9arq!H$wNxds zy=QXH^V7#)UM-Yj6}8rKoz#A)Us1|_$E(?MpM6%pGcoM(&$FzR>xCXJ{4`-!2lJxTGC?@j(J z7+SBW_-6Z_P2Zod&%dKzos%biqkH9=3IF1Y?j8!ivn1+e?w|Qa;!l?uzdQKOW?K3= zf6>pLqJOIA@AxsD#a=Z`@xIn&^L6%;dg9)aQ!?*7XR+A%$4u_kr+?}*#G9B|60|Q= znbxo?*j;tmWF~fV#tx>M4X$;2`7^WLiY)kBw0i!Rai@h#? z-d}w)f8IZTra#`CJa&4QUwNyabGB37J?YZDcyUgC8y(JL2fj^7W~}50QTUTmH#{rZ3V=YS>mHV1yH(_ihcd@6hOwq_BV-R_=& z;@cg6i{JdgKkHO$(JQ$r94xnNThe=$)y({8=BZe}=t-U9{)@98KH0M7w075u`}dMu z!VelVWFP-BXU*5g7Ob0pdfnb|WViL__1izaf70n9lc?|9Z?r?f+B)SOBDum3?5aVoZYfM-XQ6MJ5I;k=0XMc$e@*H(Cj z2X)^$w$|s}=4NyY^u>LoA#aj8hk3tt7%Qf zit_jSy}y>&O5{!3eqH0F2Iqro>FU;jD`xVjel=Nh>e`o@wKi|3{ha4AK}zYT*v;R3 zt38*!_fBa$zND4cC9+7iBBT5L#ha$TXVrS_;b)Y4!+iVw*UCR%IrUO`z6zJ^^m?YS z`~J2c*~%st7f$rzoEG_HR{TS!1bh1_2M;ic%=~AO%yxRR?xUyXrWYPRaj87qc1Lcd zh0~{ithmgD!kvCA)%R`sn6sqXY5GLD{raNav39G2)!v=6Gl^?Gv-`@9Hfvf;{8KBAD-xcGR(uUv*}F&^>UJxgX;zuc;- zdS=N3fk*O_uANtkyRCZa`lNRy4-{3V>gLb+v3}O_OG@&g!3Vm16-kkL`qy^S@)h18p$TWFKJ`{MJ$tg$G$Qtpyzs;`_D7Fgp7?l954(*2lOu22FR!Zq zbFiK90Hegrf1(mw5*7wMo3?7@bEh(=ms}AxE_YQL85%8xG8fwPrkVNdDlWJZx}=)j zC@iSV?DrN`QxPv`zc+tQvRs<{K5k)^?K{z9ZvVbosGH6RO$=)ByjAyYdFz&`o^#Zy zRHJ9@H!u+8J*YV8(UX18A2v^AHYz@J$NNeW0=>b=RW7q583#j*EB9L zC`r7^^_g@avq#HJuY$nV4ZaI!KiK&F2)B*}r<&X2shLlgto7zJ(Oi7(m@b3FGOiNt z2}eZ^E#fK*eD3%0$D2Ql3~TJCeR>_1CEi=bobX;wBgSG?1$V&}g`MnoJzb*#F)LKhUD{CXki_uh2~SK@VZxCY;xpJ@IUZnCx-vmcjEmK&G5J8m z2?=f`rb7i2LX?y^I%1wztXMT!-!xEbz0qskqgPg6xE{asPEFcM6I^+r7VbuG631Zm?if3Ym~2%*ERB zFwr4mf|v+vFJrQW$0A|waCx~}=f&P4H0<<{WDXjL<}g>%<_PIOJb`cC6Rufhq}oSDq#D&eKP%ew03YWZ9j zIwclzvf`Y?d!Z#U2VA17oG(nCBJ^RuoSNVR9v7+NzQ{SEJiqParFU(zimKuaO$=?F zCvF-M-Ec|U@rXK8M+lprNX&(gN*gWzdzbs~I~uw_wX;xO%U-S5T6=+GuJRm-h@4K@ za`PpB{GDI?j6MFk{{8>Hwz#XAU)SD!b6PhqyIVn*)hp}y&)2OI3D+)dvv?R`ylsu% zWg`zxW?_Yb4M`vPr!QjYW|v}aU|`_abDSl|-o;u}C3L(n{9^t0Uq{tfUtL?X|NE7l zMXSq}`2T^0La#6>DbMouc+(z1mGPtJ1UH`9;5u4_MkX&t70UH|G0&-0hg+-hcIf)Ze1@ajzymC=rrvJtjYU z+k5k%4>NLDT7NA4=z3=HA&K>I`@^T-iha0Mcnx1`$dMoB&5T_$m(ON!^q23Ob7O5- zXvK_p_ijfW{>MM;zW%gZ+hGoj$hprwASTr(R+@Env~ETV62n0fF`^ZVoV zKlZUQRj=OfDE>3++NybftKO~JAG7M&t^=N|O^q7*HtD=}vJRnnvqKFulPt1++*!kX zvQ^`nvWDG|#h%eEN}O&OG;$jQNGlEKcnLA2zc)iJokjZh2(u=dfjBrh$r4ygT-`3v9lX z>u4YA8~@}JYwBSQTb4uZO-Wu1ZYuZe)z>N+?GJyU^jz!Lu81Yete->w zOXuv}sjo9{U8mr?V`;Bet8g{QZaZ|(Wl7SuC;zxQ?;dJ;)p_TLa=p|VhcD7^*Qfk4 zneMcb>w}CX?@cE4eGjg$`1kR?W=r+k&9AmUKfH)9{#E?9zOERn=qUab0k>9I#?9z; zj(DdaWw=(cV#=oU&a)TxY!M2XwDbH5i<2B4C-=WqVbtnr-~aPcY}$j|_t9V9Z>ih4 z`bg-;yH{UdeRVKU+g`u4_WMV>S6}{q*GgF=i+}%{`Tz%yz{upM zW^Q*aR+WBf{^#`MU%GvO0E5nm1AINRUf<} zWO4Xui}FKZb~DT-2b>qbGUa`Yma_Uy4-dunk*)y`m;af+>ruqn7HdxP)AG$fUszK5GT@@YTb_A!6S8UyD_I{d=3A4+z3#H2 z;zRQXX#x8VE=X1up6?d5H0rW%#^VbT-?)C9KNS$Gk#frHcExGe(UMO5>KJkND%8F8OADT*d!1+WeLA&XVJ- z!RL;aw%7PX?AM%npmk-AfJ5#p2JQM)m1mpZwd`a#%v~-1bQ;=l!ty`1Iex-}eF^i+dP$ zx;&`PvWZ{scIWmC_XX>J1gsIT)BBY$@u%#kJ-U()cU#}t{_vOKZX2^bi}tSGx~JvB zzKVqS-@jR&ypaswcziPJ&XoNZqoTv>Htp>^{kXiZzUd+Vt}dxg<`cJ`Fk8e#vNSgq z#@CsspJtv>8$~ zjMbJ$6m%>{fBE$IqSTde?Ld4CT_l1SJ%7$LCcSNW(UQqK2SU?$5s&fK;4l+Rq=!O z%y)cy)@CzF2s$N9?RvGFHI?PZL=~|UAq+G9MVQP3BvpA@tG5XK{{ClvXM<2}_`A97%7uEWkJW7JxH5$j_e~G2etyPeh4)u>o{0rc$0i6q zPOJYA$=ks0m>{vCo9TdYp_Sc~yXY)j92LE9<}J$!X^QyIb=0 zR)3$Kw!19$pk?mv%B@?LtrK8bV$jg8zIMv=$h9)R*dz5W2k}okmt|A&aq-6_BK=I;OR-{Knkf%jTY+UoSr z!M)Oxzw0lUFy-{TvroRPKfH~t;=F_J&)MG&fBsurldwIbNc?5o^gqfMZU1dZ{r_us z)cTdLpR6lcbus=^`dRyp&(1SF72M3eCcgH`xBn(TH2!b8$n{@z>g>`r`{VywC`mP6 zj{Ki6YxR>mHT!L^2{o$MM^@jGT+6s{-|XM4Y_lSJ?yAWhJ3ZyXK}qE=ENoh( zJu?4)+7)lobUXRnxYmm)K|+Sk8h5?fI|kf4&kw_y3>PxfgOLc7E9M|Hp$f`@Tp2-&iH( zwd;>y#D#f*bw`%{{pGOZ+Jow~ajES|7k#3xItH@)tSb9FmE)?XqVd@qKiMRlPV;Y% zi&&bzc$?D^ZA0%RwMjer zIQuh;i)ISfE|P9f{Mhqnew`whfKKf5J2#@jzh~W8{?4;A>ifwr|2S%%3F?*H z%c(g2WA>g$%zP{|YwKB5(`w}BUwQnX@!$FnC*5vJSeR+O{VDubQ{eIqC4(2Qf6J%5 zvT>RGX8q|H^X+MB%5A-OnI`5=i`{#7{V7|65Vt;qq8ZyS&S=`udi9}jhzWo0qZf@m z*Y-}zeAPH@PyAG^h!;n{T(MQf zE4K%~FXySaet7+5gY3T-iIX$O%yZYj7qx-MNPLF=S zS@7nk|FKOGm#~UJ`k9?y({^l`ekE@2 z7rV=k+!%ic-2Z+v<=n)DJn6Hg^rob5OS$t;S}ae~b~EP!?Iwd|T5l$cPLg&gGxeNN z@nG?r)1P)LET}S>@#KBtHwih%6}PU2$ZmMvl_45s_}8FTKm3=}_1Dv1?5ta}`u&Ob z2L-!a?~B(w{w{lRNB-{pYrZaf{qJDwm*sc+cYK^?yGY&S)4x+f~5l?_+<8!j>G zaVC_pCi=ZNy++WO{jkT;imy`)-ml3`-@w4YRqi-Tu6-42{X*TiHRa-^r8V)d#s7X= z`)hm8>he|duhu;OcD3#Q2mk-mzrMe|Pc6~wcWnRCmp1npU00rY@I%kUZS|^ln+EyG zA{8@zW4|Y;2YOHFY509KyLDOnWd`4Nb}NUtS+NhNRO(tWI2uSRw3w^D;BL@KVd&sJ zrLKJG!Io8GwkdzVt8)J6o3rED93d8y2OR+kWs-8MGp|dT*cV=QHrspp;F}JE{JhTR zzyGyh$BigDUTZ_Ru4X8*@uJ3p_BUtP2M_N_zDg5Gz2e17zJ z|BAOek|dn{m$jw^aSQeF&#=9%yYHX=Cik)h8A5Cg*ZjhS0yzyOxQe$}hJwE^^%P#nR^wGu zT5iN1Ii6KndxEYnv<(*8Yp%-^C+_-h-;e&StH0VtTwJ&?C{E2umsu;))BEGk>+4Iag+JcCxFjX;@a?{aN!|Q;$v#() zpW>hCqNyczjIHgIN87c9J0G4raJt{oc2!6D9tR&oE+NS^ehlAYGt!+L#rJRfd;ehc z%1_~Hs?zU&>&<-dGwZ*j)w}Fz*C$GKPA+|ETU}wSIYC3TRhDU061T4LjE!HS19T3C z8buT)ZM6S+SD(w#(XEN+P{Hh%oQ1`$+gp7?ERv>WSESH3)Oar z178Vw$t^R^Nv|sv=yK^ldMuxN)rvJIw#@wZ`ahRI!KJB>%`Xbo&D#G$rPKZ2Bh7+I zg5Un#uD4#HV0{1h|2eCF`nY@d?2F8b&@q{sdis2FZiEqQNXgUf#amgU!mW7|lRBDq zcZc#;X)~mhn*6)}AeQ~kVRnsG``t^g>^-afU14caaoL74PU~qm{tEWYub5n+(64cw zBj@lL<;l}p_-1WCdp1=dS3qW})+?8egE|wmB45d?{XO|@2jUIY^`2vuTyE(v$93(Rxf{UQ0da|EZ1<cLjO-G0&ZS*t%E zz2wR4aA`{4f2TWLqW?b!eF)LbKG47sICGuw!q~7aJGeS`?`J-K@cZXNJ~qdfb+3>8 zeBQ^_cxhd4vVnpLZ{5vw{%7rilHt~~-nKqdOIiJ`a^|wX6NCOq_U+d^;@I`)w$A#R zST@bMP0r4o3e|tMA6TvS`MK6@`O*XXAEgIvI{5XkaOkH%pbia*SH@~Oqz$_J(j0lHDJ-c7m*eA>oesT9l$wLWG zEkRx;>Fe(0Ee#V}m?Lj*a%W&r88$LVvA6i!5 zHhfSzkzRd|sr0%#%M-kmGdp?oGpwn$5Jm-~k>Cff$X9}lvRDAgHMwrX$D%%|DutTD#e+FfbJz6q>{E|HOuHF@;khCPyUgSy~RdE)kl=vZB1J zZR5A2@%oGQs+jp2uRrx@GiOgi!_4Ds$umt3_N2@Tyt!>s;PV^H)_>OTHF)f1o}utF z_WpgtJ5#5q%>QXyl+rE#ZHMVC#VIpa#vc>sn6*4FCSS5z^!>@Ohy0T#%<3rH-hcG; zu`VyIoQ0oO$%~!R*X8ck%U>P$H-2BR(EHnOx2iw?yZdM$U+c8mMNAJvAAOl|+f<_d zb^KnpnMJ?;%%2q*d4KilbE>sb%RjheoM^53^i{EWpNZ%6+_&E>CUbbs7N0l8>T|Z& ztIMqg%uK(XI=9bF=H7XKZoK+_s}%~(`)v>Ou*rOW{P4)ULazJv;(Al%uNTY+_FS~_ zzFe5>hr8Ie)=xq(or$p!-tT@B`EKg{=S8)_buCwX@3(UXU5?jiWmy(n9~S>U>g)QmXL3#~ z{ae=C%DVeIN5|pF>#`C1eArH3-7u+R(+z`tqUEAu{|kj~9=LR|NcVkMOOO`p|Jm`a zQLA-7c()6A%Ln~5eBH0m*0eon&5cW*r9a%>FF98&>V8UCf5*v>-@OZ?SG>_<6%eN*?!O+E*zlb*I! zuknj_&;0wDgL_Ho+B5b1^&F&S zmFfTXt&@FO`Sl_1dYzW)s&LMr8WSU#>!%+t>+I~CCOhTl9ltpp{(btouXR2hteW;W zkK;(MlFRLb#wUGGJBZ(X$x-p)qo{rN?Z0=Xm$g4tUteCgA@uau+VA(@TdMCma=bff zk>%;ftN0E*h?e8(h>Q3Wnk19I=^qCx^Mno!<{1GqGbZp{cNh7wV>!oq*$EpM7>J`(yy@D_1$9-SJQ((j9%yvYlz|}OH^{?@qzx(&_H%{Wde|+|Ic}9)$ zreuko^-Ye9O|c43E{jgr{UMmv6jbTc#cw##Z;vF0SQ_v1cKKyO$w5pX6f3l*@3G+D zFVKCzD)X1%yR|Vd<^+9S{cop!QGCXv7c+CS+V%9WZTk^(?3hDo$i_=4+xYVHe;V!C zusrJV?&rUbT1-C86L6YY#2~VVwQ!jIzvc-GKFM5v#FYP z;!8H_KW#d}a<*)C>>tmAHabR!g$ktBi%BN-YIv@+_heag;D!N5l2hB!;1B(m)aB*W ztS*{9j$+vKLoBUHBDD92g_i%L^sCDP)toyvY_zGpwUlv7&Q00a`j1QdboS3>T;jo! zWgaNSI#X|F>DsRsc81@%-NGPv?LY&gr0;Ukn?cvovsYWzKhIT8-}>XVJFoMVZ0V4F zJ$EKGUe$6}O=HSF_`uPHQ;Fl%M7i(T+Fw^Ksxyr%h_KLm!?mdT3?w{A}YJ2{Uinu~R#e)qQrw;6Yu)-kmf=vA6Uk8mpABbRi zWc?`iTkF+8X}fo8>q76Y_FVNazOuab%GXN!AdVx8=2W~;O`5l4y5GtNQcO>K5|=$- zIMq{ra;u1Y`JUtxzizSo+4V7{zP)zB{lANXwM_K_PhPybJd?*X*Bs`N*9;`pi>seSqqJ2S+C%rBk~{nviu>-4Mr*VaG#`m^5Z z;F=9PEmW!*u6WI5W-#3FVpqTpP7hViZ&`;G1pnObw1{m_&Nh##NSEcwaEbQd(Vnz`@hUBXd-Kv1R<4CHbdB zM%VV9|2qG?X!li5reE71w*GIe>;FzBkNKQe&!+BBQ0tK0xTqueO3G&$LAy;$H}@^A zFL+*Nvi`Y*+KMY*o#*V|6(YIn`m24sm;Z8ZF=Kn?ES=%}W&wi$+vdxkyfS3W4_fsY zWC{LIEr0lIefRc)IVHSWE4XW>@!h+fHG%)3{xx33+VAx{w2uEdxasw_L-KQ$r|y%Q zGvVzM>61GGe(~JeWv!?tAI~HC`cqw&n#aj3mEQ0ab6MHfpCUC|US0dK`d6=Z@H?*G z+q^c(z5EdM+_x>#NqlwF$%37%=h9V86jlo)Ny|ShnfYAEW#QF|z|j07IU9eMemL!1 z^!nuA-`5wF{@Pl8Kl|>r`i4C$szO`68y4^^l>M-N-Hu0{V(Ou>Pe0t@Hn44DaBf5hZ#1!{~*2L=D9oj?#oYp{hdX^{(#cE$Un7=Z4uMWlL{9$9)7Xa zdxL~Yq2hn;2^JUhqi^Jvye#Z(sZUTl$=g&sEVYGqR8Pbg%rO z^Xuo=^h_VA_x~89#SYr7Yc%|>_tb2<<*n-pi^IN#%=yZivRR|yrf|}J+lnpc+z(fs z%nlbmu9_KGVE=2n+Oc3!y>-{KoE9Xld2})9$@j27j)JM*_p@2638XO@YSuqF5;;Z2 zLtbKbk;&h?*QEH@S8xcbz196~rzl~<84^B}N+RMG_gYd5Dw+>y?Q@!xVH?wC!*)`5a?&KtfDhGzF?(aurq+ZXR?|x$W z$3-E`D`y&ca8Cd9@2|)+4mM-$S4tZaUjEYg#dIrOt^WB><*KGf()KozYk&CMT+eKD z;{T=@H(rE@{%aF%j_RJBwes}U(7e2eUrm!5uYEf_#jZW-Z|GM3V52QfmoM!xa(?^z zt$20JhIPV8EeBU+e~9dC{V@H_{yQ$xUu1q}g}(UesCM9O<>ux3YnO${U6$0V=jHj* zI^|{M(QgND=KIOMnEXPv)8TEP{M{?NL;`O857O42<)aw!fAzP$Azw@4d;T9jEI9j! ztng<3zdu4eKC3+qd%r$=uI~RH=cZFf*DqXj+<%gpb6%)ekpYWn^S!2Gxm7~1)|{Wb zxsq*(fCQIG;W4(=kw5B;a=-fj?6(aKOI;Lf*tOv6zZW}N);gzNy6uu!VS8AamHAjq zZxhr17=;^)mvGjeci~yGzB~SJ?TqbVf17SzIu2PT%*v&;R7T(Oxh9^?#nu6UX$| z#m3njUK=~xRZ`6_P2tV4)7q@;%9WBJuFn$v5ja{qu`|^UCLas9n6D(1%-d zrq^}9+82L){hgoxw%4t7{q|~2+Y!r~dwxu>@E5-%-siJgq;j!---3JR3x9sx{qvqh zW9!nh3HvP4zdHo=>Lxe2zyHW0`%zml{QKP}pI>b(N;tk-VA&$e*d3R|Ed)18)u`WD z#JXsz$Mwi5=PycVlx6-rSRwGE`RT@;_SY2DX50>V|6I#>Gn;{;c26|KU}dQK4DF@n4O5)lb;$i}jvnwUfzw6) zziZk0>w4)UO+8!f|1R&Z>7Trn`AvAkTY;wcZ&cf_ta&ldzx>}G;i-&MjueJJo-)D5 zZh6+%x8>i?)zHC%?Yd2Zn*5nz zPG{VNW$v(NPQSOVMyi7I5aTk(P$|*J`ImXhegD1PzxDr{W{x}FHFeq#Zme3Xn~-F) z_`Z|Vm;F)+_cadP)_wV~@Nm%)ZprE5`*f#V*>}9>tlCk|HGvcB3-9!&^DjOsv3UB0 zS&!IV5B#iH)6tq#+2~qrJ0rwKylFGseDYBl84V_ByE!#X|L)?%#?X`NBCKy=C5)v@t$TP(#61<6aUwcl*Zs zLX!eN8t_GCZmG}Nefno18*gvp{V12U@iOPazRoW?xRcX7|9@mkzjk2G)>$8{Pw_{D zXrx%BhW_P#^-lWphSattF)Kt$dX6!CwOo77fM2He)4%m`-S^kJ{8Fu3WBzMzh00ZC z*U*G+*_proC_fH7`7)sQV8r_mE}046?|6inEAk}m_;07>a{pzFJM-38;%xsN;#npx zXL?yTC;wEl*sSc@$xpBRpZs&x{rzXpx;*`RQ$^oaYWj;MwnxG?atrR9U!%cx;l??= z+}_U*jSB16<|qZO`?bt?hv}88HO?DXzB8_@wN;W4>)@`Gwf$InJtHT$>igBIuTgue zzVF_dIsM|?pZpe%d-hL%b^XQGXV*LS-kbdUpMd0nce~4rZ-4rkr{uA7h0)%P7d?MX zytt%iTh7nNXFoe{DtbOsU{ZV>r%T7&L+3?!WVhY=V!I~n_4Oq?75cn;@7Fg8x*tAn zm3>gdy+YvlME1_fL49d&8ro+#lrT0JvK`QW(RKgy_67z9t~-vitW43h6Q4*BUoOYAPU)`do1;^u9iF_XP_rud1m2|64m%VfC6@hs7?>yb~f58XvVUz;MQu{{l@jwokQwlcB#SwCCMU-5Rgn zzit^TwlX%nV`q$E>wj=>UyMVs*Xpj!yqm2eu_r@rupW@v_f+jl)2^zOHk+;FigP7> z=RH3A`A)Q#?vKykR%k7G!Q>+}k2%WordFAIcEkU_t-HP`$bQPPDEASw&o;fjJ zWP4C!+Ke|3r`^c$@sQAJuXtF(Je!Fjp+bx2#v?JtfJ=3ia?+Ybd;eV*W3&oC&E$3U zx^Uln=cVQQWA=wnzq+z?_1!;r*LrN*BylU|k3*R8lzkryeLb641se1k7aU9NdXSNG zVACFj7UP|cBIgbj&J&0fzkSL3Q5*Xzm5=9jKD=Oh+~UX8)Mci`DX`+2bwz_m3!@hE z+0*VTD@-mO-Ru3DCAYQtY3MU=vCT5F8QOxJDoWR*i}HU?O?qp(j*DB*+%(F_w$R>k zM`Uc#cBh#gd>+!~Z5lrgy4;gG-(C`?&E-(oc!zrl%kpNHFvYFmSX^6svx&x|HE9)3TL(6|YwGyEYw)-gf%mlzCh4+*`8o zZl`{Xi&kRa;@5U@%m*9}rh z>;-cPgZr~@E7MK6JQZvkI!!M<*``?&`Z?Kko0-B&GqITFO20}5{k@a6uQxfilkt<% zeOU&vLo<)x$=#q`U67^cbNIKI@(V2B95<*-lU1@4mF&^}EX6Ti>%qGu&NDT(-xpywJL?NdL*EC;P+&q-7+8M5N3&_Px~c z{k~`F?amM}u3QdDcDV~}d>@`&Wn_E0Wuu!=g`%nRy#*_bR_g6Ls{i^yyH90RMf>q9 zvKj4+8lN6r)8G2+{U3vvB?1qOZk}WOw=(?6Bdyf0!ix_!E;RqL*5Pw1!*mtWsXY_lf8$)gI6*IFzIe*d z_nV?pcHLg^^{XH26-(XB19LVvDIE$n>Jkb*{8e)!gXYBpj&h6t-M#;(-9o`CKFKg@ z-JLp{D4EZO-eH&BHM?S;JpSu1v3*LGjX|we5{^IP9E!G$}MXAidzppe$KBF^66%L9H=(;yfrM4MMf#Glzd1KmeZMf5dGCd@ z`_7eq6G;%T{Gq!$B<_8p*Q31us(%z*rYASQfBLmDC4ITug2ha8(%c>YYUPwI-Tg-3 z^6iJWzU$u7k2|;XwczTgJ?balACy&i_eH@ma@y`gY>V^WAJqE&N3-kXxyl1J8yHd! ztaxD~a?RVTaps=g)#lf|0#C#(j5pO2oyNeDa7$LMah>5$b%Xnl*KIvMYlo|#EHBe~ zv1#YSILd;TOHRLgQT^hDhrh*K*phuiPs%Ua{Kiu|x@hyhvomM^F$!$?Ew8`gqWb!m zzdKZfceYm@{GRZ8`rE^L`l%b{lx%MJ7c9QqWE8H-7(^q|HEI%o~zK3&yY@I>r4_ir|G9ERV`;si|flW34VE_fJaNE;|48_tGQRj-J*n4w3!7-N{Bt zK`3nB)fY3%Z|T;4zp3{7-5$Z|X>0yC3aAxDKX6z5Fz@}T6WiS~64t$4{bKXJbG4dl zCp1|9udmv_dsqFBVBJOgj~A)Oiy!@bze(p=`lT=R>yw7;a_qy?)Rd~ z|M%N=?~YdLuHW~yLe*r0UiJ2A_YS65SXr?Z9o(LObGqV=tP7=&GiQC`mvV7=s1{-V z|G?yL?-mCBWA%{JJvC|hzQ}E*HWsUnrSTP*Ru%wK1R4nzrzZh-y^x9lj6Gfh@EGxzf=9P18Yty9#a-AIG)2C-unN{qxboj-_8BEMX#-5X^(Tm zF)3AX)f+lj&lhTihPK;F72SPx!C#{M;fHN^u1)hYiq;BW_|tu@uSaNekgECVF6S1e zDGjoJF6e!JQ>}F~bz{Pp!>z4qCI_oxQ-6AR9~X3t>g?5$+#Ay9W0#lmDtqt4tL~qE zB>GjO*%nr`ubc)ci$D7dYNAyX?(%g@3Vp{HZAwpZy|SZdm`> zu4ymWyFY&3X}gdo3hypXVwOB;x_U8-`;WF+T>=waj~+90pDwuK$I8V|oK)Qo=Efuk zwz4gJFW=r5vSz`QPL=AG#jmEFo><-UQssR>`JwPT4;P+`+Uq7DAQzEv{zbP4kNj1O z4BMx_(*NsU7fNt4nzVMIsP%~>$A3(IB%@c-Cc5I&cfCaSizN-m)fUL~_x0b4Y-P2y zGVl6(Z^5NA&!c~`3VuK25O%I7o3rTC>GDV3d3WFb*>dMhz1eq{-)j3m_^vv+CGJhz z^y*W$Rc^l44wOCX7qesY91(vxv%evy|9*RTbLEkPA9LgHT$_Ki`@{5yb4++TekQGq zpKf|Y%+7E1=V{-Xeyut8ul?D-G7GDnwp!1Ri5G5FWm~;J#x%^w*(u=3n{^)|a)11u z+qL^>3g@fVJ@fX4?X2t8n(wN-_x{(UIqIvKs!WnMh3%iy{qV@;-*ti>exj@P&sy(Q z;~4j4?n{*iE#<2Jw+L{kh`Imy@gwk^`JDOvPg)Jk?H&gAmb}@1{>S%KwLd=hJzZeZ zzU;=?4o?hK{ zPEhr(^{uj76^t5JCa8&Uvoa+*8YSqs2(unK*687*qR7eObT;Hv_O%|B``eQBK5M#r zul7$|y+8WnJl}uu5C0YaP<`{h_8`t`v2?U(zl zC7iOvk9dXlTusly<-7;%dV@`EZ`R9;GnUu?V_?11yCVHz5_{dVtP=OUx1l%PPT749 zHfvOJh>I6c1^fv$+3dj&8os)x*xx$KI#;eBS>f{^Y%-y2>KX z^M%$fm24^5s>dU{b-fFp!efWCQOymEN+CTOqC%`qiH-&r48&wudqDSBX-RN%F&%Q4 z;L_7ucKVssyUtn9SFJ3aa3||hxbu%$ogZs{C_nyX-|=5_bzfL{^nT8z|BfHO^jv81 z>zBLZ_gK9Ce%{EcWg>4z#qZiX#&2o^7GA2I`CLck=;}?&cijrfTpoLm)n~UZ=c*_B z{}|q4Xc2k;+>%-Tg)5_5;j*tM3J$EB!jOM~x4ZP`>-(SI9Nb^-Q20hZf`P@_X|-Re zZp*>H#~2=2eyy9DcPO6Y{v(c<{j00qbOn9*w%X&~+r3uLY(J$UaC>*k z-%B0L4I5-~xYs7~s5s~+_e8CkoBhl<$o6sC%3Wt}h5ycEZeSFf`A=4@AZDS_d!c73 zpBGw`1bR8ysZQxzp-uO1nW%QRfT~*K(;P6@E zd~oB=l~r>X6|PKh6VVoAW=v6t&=BM5VtN>`VUCv|CyPsB?(we2dryR>t*<(HO!cX| zVNKeq63h8_{=1&7tN+x0vHtty<4?k3?>E2t7yax0tJNRxXU~_rRde^(r<>eQ7yXkH z=DSx@zuzp*UG2P9#+_}?JMX6cy?LxAJFZgt&eAohGopXJuw_^mYU||ca^(wKUvTfy zFaJsv&rg#2l`gaOo#K*{Q4hpcHnwOLef{FKzVz7DYbQ?3v-vlV!{>diy;k~$dmE0f z(=pX=3A&xH!n4T7?y>gY-TA)v3N`rHVy83D4sTw1?9ZCqoE@68uep{iNn3R1!>e1> zyJ8M#_?_-pdr?-xr$-*2tnkPxLgIYySvgHiaqvE=HaMU#2fajo#^T^t^-ul79d z?YC!v-~Z1(x_#x_vQ_mz;5|0gjqPgbkeWXLl-PR zY56ldtLvA#t!$Eiez~6Q!=t_bK6CC>e;FQtuCUV*QEcsXOjd~S64PpJWlY%N;vy(jyfXUKN^L4vgWnS@`lot7ye4B z{7N!#i_$oL$^ZVh1`9@okSQ8rLagmf2?rt!#H?DG79}kRQ4^AEjSLTe^-*MXaH#0@ zf?Jt65LzasI=`@1cdawZ!JvNf~Lo2vIm|7hfu51E`>*tnFh zPTm#G{_T&asod@jD_-8ZB>H2s>MnsE<3GtcpY}IQo)ICva>w&KujQ{7#e8sm-dobJ zOZxGWBz^OjY_;>I_g1`*ooljdqNRey&w~oOZ+pJZoO^R!2J0K2wlfP&78b1gbbu$= zvMT%i&#J(j6*s0h{Fz*-^J(Ylo3G-TTHc9Hxe>#eC~jFGlQ!j6|H*e(w){z1$j#%* z*x{*sRAN!VcFDyL%NY+aFmQ1>&QjpuVy$1O6MJ9$|H=Qg|EK@||NH&dq$u^R-~Y$| z|1Y%u`>);m_g@dnxqGd%M)AH&{QA1?J4{EE55#hOU@;N-p(m2h#QZ#KgD!{WVW#dw z3x0Plkvd>i#_CYJ^?||h09)D37nbZdTdcMJgC-YC>HA%i`|JMldL`}D|KhVhY~$&x zL7I{c^S0Kl?fW0Ux^C~ey)iRwGqm*G?z!EFago2HF!f#A&wvG`m-N0VzM8typ2Lwv z!$r&Ce_i}z_ZgG>>~-cp`WS3e^H}Wu0~QUwrrF+0ObqXMX?9gR%~|5pSz`MnPrZKf|CO;KQBjeW31a5?|VO+q=FhM7#838vwHQeSee4t0yIh@aMZLjR$Gvl@KeWPrC&f-z@mL_i*=?>nzLJtfwx^nCnocnZ3 zc-7auzHs~a@>^T(t-o8f{p@}F6KkT&zZ}e*@^9CgVh&~gt;M%r{$??a(@j1ip(Cet zHqAliZCA;6arsk=R27AtxT2h-v`Z1vkQWNWW9yK=h zNAKae%KUDQc&uM~Ox5a+Yr7vRYrpTUDt;#Z(O4nyGfTHk(hX(PH=GNq&I>;C-QdV@ zvoOMUMajA=@3~~P=FGUKkvT21X~Fe+_uF^&Xn&WUeyaJN)yXHV>Hl8tzQlVe_``}5 zQMx|`Ppo?#;;~-sMNfHY)awg7D{ij*=zadf(q|g#BF{cVcrX8VHnC65UoI|QJhCp+ z+Uf5B^Jez$rZ&cZYv(_>KK)DMOr7LA4C)SFLOD$S$>v4<=>PTb?bWs6s!(PIZQ$BUWW z_u1OIHkO^-_gF?N%F4t@&T2x>DTY(bR+(*HAO zw#!OIwvWH(m-k;WN$`?-ovU)laQS4mPW$_ZCJ96(3W#q$Iwg>na`@@Ew@o=FJWmqm}rq!?e=QOI+uxj~J+ZbFDNr?{l~+ao5<%K~0=wEpwYx-PG0 z(A`ldGf|OEUj5v3=e_KQV`5)Mx9%-A`)kLg_xJt!(y3Kq$7}z@%3Vu9<%zfP?sw@6@PI+YHO9v;~B4>s_#otwWw%!DEx3|UZ{y_ow@bml}%kC9!jVFM?b14 zv}+eKm-+m^RR+p;icU)0_-N~Tf!{}Y z+rB>tl;ieU?e+aVQ|FP;(o1^`8k&p`b~a7qw_tpJ_~4-f3_Sr1$~NrGJFXV?%y+BG z^=P>?x9Ldwhcy8V>Rf!x!G$I7o;F->sn4=wY!F^@Jox{gT)bl}3VBzv+iH z$8mbfn$-AqJe)1xB>DXK&0i0H+LY|5S`bqX3-yGd8 z*M?7AQ+&Xalf|X@xyB=Wm}uMk@r%e=ll7A?eETaM^+)vCnG2qC`MznN z49rXaTN<~a>R-|N53ia38C>6e_wKTF*FT(nAFpt*r}_EUmwHi3PuXm+F3noUCoof@88DGYG?a$@SE0# zCKjfs%ZK|z?!W%j?c^xbRLP}n>(USNd1F27euX5c*f05Omp?a3;l}7%Q%Er64>P#4L|X0i|EQezI?wN4yJuWoaJliljcosc_D9q2o>M#(A}7SH z)%jZCwnX{ur>oR|r=_$?u2tRXvemyfML5x`$=~Ss>EK?~ox;hwFT+bsPe)zQRMlYm zoX+i$uJ-cPf>Yh~U)4X={t8_lqO$gK%}zV}*Vkgd|N1ho^XjXw&-VUaZy73SIwx+= zlHLC{wC?wKwdZKjyjFjmEtTggbpJ}aXK}Ls=_Pl%<-hFXxP7Zv zxNLou+rpPi)?b?aQ0QN_`rcFyLCvy1ub!U17HZ+(Iir8}z8brbu)l|F_AXs|@am`k zKYM)-C!fA}>!{6y)T{rTg{`+#tiF0_^11(inpEZYpERrAS0k4({eQ9NyxoNbt@DbO zbiO{_x7s=5Sst@~e${{0E_}88hm+04f97wu#xF8l9~kO&=IQZ7-6wuN zKPO$d*1wbc|3|qsS9WrVrfTK=UwGGB=abOX2Mi3{>5j7$I<#18uj{{vi?@G$cfD`c z)2OHB>%zld-(Ow3CcJ)q+1|39;cNG=Qhua$b$@iYYSoo{d-uy&Sa^tfO>cOwQ@Q^A z)&~vg`}PPMUH%bdGDGb6gDoE(8D?HK3qH(sw(r1u%?py}oY$UgI~HUfwDOpE&9Am( zHyJkJOul^_o>#uT4?SqeocbXAtkVLMfXllNs~$W1@aE|gkE>GeeyLj+A9VEB(Tg?$ z@9Nd});sEdnxFrEYwG`^*iWZhU1m)7n$OuUwCTzjSijmvJ=J0*sxo-nst_|avx&3x{xZ5fk}G-^-%wq5z{#wpAB zw0>;sZuc^?;h!oTR1nq@w?Ha8*gD6{?p2jRfBnDm0|yQ@w@%#u=Q_7cl<>4I3;*mB z)(bO}_!pfLac{k`D%a$Bo!7(iyvg;m|Nmw8>c3zkGmG`d zbjQng4Z=dcZcAd#TD3KV|M@Mm7wd%_1D6~)v-CO$i6BVZXj>f5iUC@YUYgS6@BvJ^APR$>~RqlpH!_ zcqV^+c$9vuqV3C-?+vCzEVdO|{`Hi9_`ZKv_lK`L#-@6}W zmjvdpT?)0A{Bm)p?d?l8dGXr3VpoC^MSK5FH#lTH}sH~qcgko=fE z+*@_#%9qh4ohy4Lnua+ENq5cq`|k|bb=Sm&e1Z?{niuBO>NQ)gZI+l3C!#QK!?YzD z$zi_@wfI+w{a<9?Vk)-YY5LZx{Z~6}Bi4p%M|ia}^_!Y{@$5It*4Siw`|i`zaPh(fss4i$jUK z*XD}nv(JCN&wpxl!IMneD{z4Y5zX1O`+L`79Eic&077)Ws}xFRpG7YBWoPuiaV@WSw)U7cN1S~;d$y| z<>l7&V}5BmM`R8$&GCFHktDJ|K*`EAmt8z!4dbz*7-+%I_R(%V&s2XT&`uP6eU0;4K zdgXdLZ$fj3TFKYcAGh~7{n~N!%IQ?Gw&a|BGWqP^?GANko_zW?(bS(ZLu*nZ4=^`p!0PtSKu%&*?J@otIz&lYw$6_4oe z57xWfnsZ_5m*;Al9JNowyr(35 zyj=alnI)}SNjvx7O3uHj7iG=QhTpCCoqT)!)mCvu?wAXULd7jzckMsOKl%0NEP`2BwO>xX|u*J{+K?l|*&`m42$B0K8tuRj=HR=I3~&qj8skL*T5 zZf>sHpVXDy&uS{(kSDqVRBb@gISQyL)6-|E_OSKj8i8 zbE(^GvoMv$xrtxY6<00!{bVc01Z9@ESuO6Ktr}*J8=nTT3ZH(KqN!)DCm>w7=4ee# z0e9q`WBsw+J#FEydoC2WZj7ELcIw38X&N_QC$sLjR8qGoN_xtTx6|hxiV;4*61V%u z!u}GmC;QY_8vdH@^ru|4CqKR~`I?u;eU{2?yXSO;{0!|>oxg5Tey;TEZH>B*`hC&hI~K(0h5xmj2B@lyc&1mJ07`RsDK;M`L_T(T^tUz2Dz{ zoqXJ5HiwhVqwfu}Xe zlj7wUDsHendlu@{@NR2Aw`kN%xBvD1>Lqio`t5V$e))Ef6W6*AFIE3m{62VVlHoPOx^v_Ds zzY8Liw>YPG7{8ykCi%>n>c|-lQ<5HU{j!g1`}IjDl;xG!YgfNf3wZ0QQ`9v?7xQAE7sP8 z{gvIXxOlPf+sUhbzuywhb#=Gus&8E8vnH4RPES!R`gQKb%%Hfx-u81Rr`|j1`F+;9 zWaZj-p=Q!`zd25aY=7P=UukpYe&LLMueq+%WmUIym_G|z8oG9Je21FN<*UZ3wf|a@ zJYUxqJ?`(aINANy_VrxXp!u?yd-lDb5XoYl`D50vllvV-%J+9qKbNYu{8!bkx!T8z z|Ha?^mt_0j{q?+cou8IJ-f7Q${mZ6*DW3N>ODCzF`*VzCck90N;~sHQ8)mvr^skpV z<$uz+C`vZ&pI*Q7{jYtKugO++=e_*@?R))&_C2>V`u8v5Rui_Jpt9)i(++o~$-)0; zX{(hkWnkbaaGa&c;l)}Me@XmJ`kp&;b$@Mtb$@m2%1c>O_eboyzE6AK^>hFKMy-3d zH}=={?0Dh0y~#WCuKo|-&(hM`a$?2G*ZONWZIe)Vf93lFw&~|p_HVm%y#C7`JJFn! z`7ami_}u=oV?v3LX6cvDi{JbV<0*Qyk~^OFy41SA5pQdqHpW%{a`EbY<+48Nexr=v zhXe(!!z_zS=4Uodxg7stH{XXj=Qa7;W~Tmo5&HG~rXPO7<_0OF-OVnKeWamz<P&2HZ94}TrYY~w7Cn^^i{$-Hle zQ+b3Ku1NZnnJwF~v*zT+U;liSqK_mA+*=iDd$}Ot|C%__KXKMmluv%Dez(-+RP>MC zUyHxK`yu&#{m(PUKk8jm62JHPxws?$bPg_5qbG6M*%@4l5_~@&x>J^KscgL-d z+r47f*X*6LQzF@+j(>ZPde@*|7wL%cKcert@9Rs$WzLHKiPO% z+}5@Ld6iH$PRVzDM`oJ7I%!+S`Ff|q(^Gqcl>Or$W<6Se^3|((!H#>^P4|gsGoS5a zHT#>yI!|5s6-B@PR;|;%wq&<{>(bb!!-1AF7EJZ&opOCY-_(QlGgVCPznNfqtLo3M zyY*~Bhm8+4vAS%T%DhA%cj2pRS52PXl~@<=dF>#J@r=Ya`pvM6U+t9(WLK$dOZDG-a@xPBBRdV{XMd@ies%t8m#V#a zt&_eQtZCDX+v$1u&eex=X1eWr_3O8ysf@#3Zrkbq1DUwbw=Gufoy3so5EQ&J@y_4mwQ!etR-cNv=*Iyw2-@W&sRe^ZHvYq{c0f>{>k)l z-sildDSmOQtiXJgvf%6u{71e=>i;O%GuOjtV$$3H^Y-fm9{=F#(IRIt{p%NxO?B)4 zJAJM$>-<^QePz>K=Lu)*S)WQ~aM!-BWBpjZVzW z&7Y(`TmRzQyLaxbnar-dr@|*v+`s&dhx)5^y?^((#r~Mq`(CIqPy6WAoIkba=Wg}w zQ~$rvvM%nap?pU_#~B`e;?h$haQnRy}L z$eGPobk*M<)Q`FFDx~<1d7=5Q?=RNgEuH>|`#;C31JnGEbew;1^m1LDn&d(LIRfUU z@^h;H9{$|1Vt42>iFc2-K3e)c`Ekg4@BIfQZ%vm^uGEsa7bL?ud;a{_Gfr-`xUzj= zo%$wT#gxjO#R3WSwl)1zJ1cj#ZoFx}`je4mc&_H4bM3yvFG@`zQfiRuAeZ9 z`?sO>{nK|w%T4NPgg)M>)P3-sW%G=#pP9LFp_9ruT3)KmEoW7m&-e6VShD>ZO_%e2 z`=Vw|*I)Gg_0=Qm0~S@ke^kBRH7{xU?vFRM4c=An`uE%L?%k{3IDXo%KcTHH+hKc> zH)igZA8EBm1Fk;(|0iJ1&6c|k(sfIp?-A|RY+Kc4|I7QIYn%RpQl9G#2_ENO+bsLS zyu{GmCQQaD{ZmC`68UWtu|O*|72{Rnz8ot;A_Xi-S<{;FNnC5 zJpJq+Ume3~(k*{quaZ9?(RO8Sq38>CslOH~RnJ$e|E)|qmGN$}XJ}Aiz>TP=drzl_ z)a?(t6l;CTXaCmIyVUpJbJ{KakE3+*=Fb&pgQ~AT-qOC}ZJ2HE*MDb{HvZ!{w)ax< zhmd@k&EBh6Paf%he_w6;&D!O<2L%nLB+X*>xv(we*xB5~ODgY!URcDjN||y*|GM;j zq4uhObCRy6Z~xV|VCld3n^|qePv1SZI9>EdBaATA;vuM-dPZ!@` zlI#nKiL*4ldcQGUdVi5qhsne<$3Cl`?%$?y)23ub%*~rF!AfHG8^ia-T5R&qh_=yR zAMv%c^g&DK%hO-G-hXhvRAbNiZm#WVO(Mtab**s)PQU7E?dC3Z`0co2%?|eLRY6(x_q{}Cn7z8Q@~6<^c)o49 z$9DSck8U}+YRUBK9U5BeO{%Wn(C*z*`pA3c_TK5wCL2o>MHEi^zIkPlR;KF3rYgUm zi|&4L=2pA-x%r!y_I~&J>#--Fs9s*ZsIt61`|8st|C_qADh^HlQGWW~n$40udFm&> z+?3^@%`+)#~b;U{&}nyv-R%B zx{IG5EOF(1n!oPNdgth`mAYP&s`mU_7dx5HRQ}cXvV8{yPye-l#1?tK{4L9mU8tt9M4^qD=Teo z+kfrP<7A4_SSP{2z+>b%OR*!1wSHk?{Q9!Bcjjsf-8yDiOG<)7y zIoyta@}lG zll0!W=G|-=N2HUtCY;ub%a*^${7insA)UB&tB$?M=%FF*yh_|`mN_NkhddezMDkK(!X`Q>+&V!cH%@n?C(-o5{4uV?kH zG2fIYb%s}2>i;LwQ$w$Ymc}$#Uov}s?dOG>RlT91D${nq<@H`OYtqf!uWuIY|G?_i zW7T1`{pGGZZsDi*A6OHSGV|U3Z{a#B>kpOGsBB$XF?HSb-SeJY`xCJ2?e4t0kMGwD zs~rjVh_BV2C@y*HY4!<+g)5yx@12tI`egS1uJ(n0GwV0~&b-uWVI3eExI(Ul^=6Cj zngxw2TFb+)*?h^!&9$&OUw+NM=j03DxO>`v^8}a*+;W4{!*=1y)N$Z^Z(z7R6e1PHZd#OJL_|o@#tqx zJQ|Vo`Qoya`LECYf9M*Cg|^fa41T(t=!G?%G7Pwzp~Wa z?5^Gs7qjV_O-H0n1!EfhU5VdDRziI=#QxD8i4bbezvMS%6e($%5KY-~H_58LXB^Q&LlU+KL( zc!lzV_rKl*>J$~1=_}iF$i933@m{NH->fft7I8T_|9&SjC!uZmsZSqPw2S^Tl@zg= zude&=@5zK0hBEV9xKtz^9lsv@rts*UmUwVl66cA&iFT^Yyzi&AZvFCDHT=ke_IB~E z(+*d*%4KvhPoA~SKUaSDM(?his=21LrF7$4_Ok8LwJL09 zXUsFIzkYPF%k-*)9Z8$5?kWGt&pdm?RMbgv{ex|rZ-wRl{@~;I^jhw2wNq|>_uooZ zIEZ&FJIMZU(p-kMcaP7R8ocb^&T5DKPBIcdc?#yNnrY-~IlC$Tc2?1ErE4aQxk09x zJ6i0Q_xGrEACEL-_8;dkV3BQ;C_6FN?t8$=1pcb+tqrw1@?N#i zU!i^LZ2jInI-dHqmJz1~chCQqeDA-A@9n;2*}=Asri(0oUkm=xe=gLQ)%^D#wWQC5 zCx5=*GoL$7^l@C&HTfLwgUfHNZH>Pl5oP3J{@JUk^Ls$mHE|;qNz)ap?+D*jT;?+U zzVL1R-(I5c6lOG?UE@%;f6w_9zb(VhDVW(m3$L{Ei~lF~AxG|~%I>dKucK!!cDK=p z4{&m?wfgz`dE)A5{wa)a7zc!nNO*;Z8=Zjj^S|K~taEhke?7A|!bC$ztlq5P{Q8#nyQ)wBUr}_ZCuwdjpG1|$%NYWz z+jVDr(-w@Y(XF1g-Lk(|^+uXd{lvXLyuasv`7li~`|8P8dmmIVvHcOK5zyZC_4Lu} z>u1N6uSu-!Oe_?d5WpuH6!7@j)!uop*^X3fxVQM>+Q^qpYqndt|G#)@ZQ|7F{|_CB zQnzh*sLj6dYn+Nr6Z^lMw+ohh`?g_yMnU?DtnjQJgFFB4y;Ip^7qawX4jbb^sn!@UF8zn_cvet#|ODAi*>g`3_@7hoVD8*ZMiB}|7&)@ z-@WfV0^Ta<-8j3X^O2EVXVI@iu?G8pD1X(J3Jm?XHNx3xnt$Bt+tZ)kTd{Y2z`pI~ zGrzBW%og?kXqTSLj{0rJzwv!_5*{FU6&`t^qo1*Y;p6mQI2e(XNu8jDw761>-5&TW4Gar6G~{XB=) zn>TH6so-|sc%Sq7t@+NgKh@sfF_B~9V-H68%Ri@w&dt1k`P{7dmqFZfY?r^6X05Gb z;$30;e~Fem@2Nd`3-(-pma%>-=dRr|w0SR06aQnC;;m|5tkopAB`*)su?jhsJ9oD`^_DM%JJi4dl z*1o!o&*0j(HCi!BZgsyTUiN)0k-GcJEAi=X`-J9W+D&E$UixJn*wFVWYIcOAiGcai zpSJlQ9Qj^7l92*Za9Q|^6|L+qYeW!@UPWWcd{$@pU$sEfXXJ2m# zzYIpHb zxAqJ7$;3@C{53(=ar1?`l{ZT5f9p=pc%J;%x{3Sl;Xf0$Y<;!GdE)w`eKL7_*%oyA zWYt&&xT&i~)n8w>wl+R&Z(V2jzFURgzHFb87ACy(j=dH0(u*^a8Q)E_F6TJ#;Qozz z^?UO?yk!+tUre1dZ~8T>J5v(A7wUdZJhbFl?S>y^Qx4zAFKa5-*#Gy?OT+vfDjzQ& zi*Hp=c|LdYG?~Q~t^HSTT&{@e;MkRFk?M0U;%VQ9|Jz!Zb6>I&bHC4|{;+=knw5rb zXYQolIh1yLMp}ISvNnN^Wjm`H58mp1^)BV($|H5D?{7(#-#v5g@~PM}yDr{Kf4QsA z$~CD%)AH@HzeV%+#gtBbu-o(b@!!>spE^{ZU15)2^sSHQNW0#zUzOim{)xXn&C-6y zdG7k_ORlZ^&f*&Sx&D5Jd&-~Hp8`J4S@L~vVa-nJryZ_ zXQzl%^Z$IH?4HQ6Mk4%feQ)}v>VFG5cJD89VyXRe{=euYsf*`)Dr@%ZtG^eFNH~7* zTL00~zY8C2PS_Inh$;2&J;Rq>O02te72D=#z2DHetA{r#(!KV4PlSp6qt#ROZ@%x? z^fqg`V&eAuU)HXDCdmFj%1xQ^^y_`1v%2$kc|S_qKS^blSM=|Dhtt)x?6wQsEjzto z^@l`TyXUXG?(<)d_`Lhh&h2^AwrI2LsXu*Pttc-nUc~x|SmvjB+LCK)@18&ZuH~z} zM-#hb$<|-nc7OlLb*uht?Z4N{_g~xob>D}phkEr=W4a{&Pk+CZrF`%0`a6yb>|g3H zI#X@;$l{r|{o8}rZq}byV!f*uEwAVh+p}NOtGe&w`?!SIDJ(UAQ^g*=`*ApEM(g*- zM}AxGuwP_#y7=n0%l_9LwfFV?`YuskmG}Aa-FcCwPgs+rZ-e>_^f12m>jrDuxUq6(*+ePrg;uR;KXte&H=cMBOfPsPYk>e~S zjxN^Ph5B*pZ|%RdPx){5*HvGOqNDa@Z(V3HxzGI8GG z)mQ&t+Hm&tr@kK^*Yv0VQ9LO5)$+~XtR1VrR-cagTlF&(bp7l5t0!ike8=H+@1b($ zoGE;l^-mrCQ~XijYw^C;!o5NZuChv>66QF$!RztsEBb~~1`a24qe}kEKDE!PpMKt2 z=#Fhu=Swbz%g-+zeQJ@pLPx(r`1;L953|Z&YwP5d3M6>S>V03$v+HKzS5vkBiz~KA zd^BeKGG|~s3HmVL{wZ>n^QiGQecl6CLBSib+0I^PAJfBAmvs>)R+cdWD%Lyp&rL`7Y-`x4o* zTqsteS!dOp`R}8+O7FN<{+&r^>cp+ZUO! zwQ1e#uh|#tHr6F2?wKIn$ts!K`?JUBzVEa+rnNcWes`Roe6jlWTF#!Y^=I5Ru2ub8 z(^9Q}Y|gQRqQ<&q*{*r*$s3nH4m{XcGe_ZzdiZZ{6O(ZcP4i4%@;J^cz+_3$Lq&$4fdUAOxeMnJCS2v zXij&{^ma+sbvu8v->TWm9Q5w~F6A#(Z$0J;ep(x@ANBow_^(%gBUT9c?yY!ovi$xH zmFY^GmL{t|UFPo4T<}WvDvR>`IeEd}Qfa2;zh@~+%PI*iWcj=8_&Ft?gZ($Y+A$m! z$T28e!256Cm;1Fl;v@~keVDuE`gY!9nqHM3xqpMQdAK3(>BZean&0HQo`3pXexZEE zG21KMUr(fF9bdR2_OqI8-Pr8h3FTXo`&%e2q7u8DQYhU;5p4xkuXWyw~(=2X0 z;y)NEk#ue!SIW7x1oQZXu0MACJ^!5N*kswCsfSM$R@)woz5k#*%yg&gb3vA_3$p*t z@jU$-&$2muTUE4fu_MP9#k-r&9lm&?=lbP&UXR~*=3BKFJr!9tOQrVz>-)BM6K!vO z-?ZMduKZE_+}E0!9B(rs7j}1jF#Y%}+1E*8%f8pV&1vixUqAULUDKm2+8QsCtbg=- z>X8tup!&tX|1>3Ux7lBJq5RmNkAjo>7s#Z34dp!eSE63w!Gq%khd<2M5zRuSr*4zW#8>Qz!WErmXjG#O8gk4mbbd zGEa7y^>>Y#aq3?p_6sDwNI6w}^oDYf|I&SJ@53%{mU_#S$9G&)dYjqwxHmXzXX2PEFAJsXUi~YvkXz%`nf({scXwX6vd?^PU9j!H zj?e`m|DLD2&A-}zP%k7M=^8Mz=mZpduKlW#JOnA)PFL%G~t=wvLwf@D%O1pjYA0M;c{qtA&{skYM1dpX2 zaZ7($ard-=f#mGHk$rrwhhO;Sc>hrDZ(8oKe*J=(mUdz0;gjFH?r~dum}Qoem(I_R zthT>HGb@yI?w|f7Y$)(8`oXG6AyT`4@BSxmeSBBG8Nb=SnJ3q8*W57i;j{R^&MC)Z zr2ehDA;NzlDW>LO!@hmDwoOX^+jLuzDY?(f!7J?9ZvA^d3_dxm=l$!gsAliB@k8d1 z{QgtdFP^B<504JJ|Ksfch~C(>r$0)krLJGU@Y!)j&U=4sZT9m{Nz0NK_F5t=t(>#} z{srX;x{|DN+rMxBZ*uxM|KuZr$uGBMWZIok$>`Xld13#&og1FpKUfgXcj)3S8`J3< z&3yieP7rWT7JA)sZEL0Y;kWa>|70GN`Cl%jn%!Cxezo>u@!4;jHaDJE-J4kL-=w6m z@BZHVu@8e{efuto#$Uf;@b>$&ISM=n8Ll@u?|#|-?yA}S3C+q@(U0ya)Cs@ooEKTA zXa6vLXLaM}2f1@=G^^@N^zsbeO(H@{-{p6x@Q0XyD_Wx1TJUc@@fzEKmX|6uElHS-@j>a z*`zA?H1F2(f8Xm5I!4z{EPnMl-g5rDQ!Cr;{a=X9FF)-Q->>?3!hMIOKQ0`%m&x!C z4s&G8vURur;N!Z8=}zNiT_N?!zdzOr{N%J@F>+dKlX&;tuds>sxp)7z7(UJ{?)vWe z^+(Ml*HpO$sgJ8#uP&2+a>@Bswb0AnpnuF$xS!qo!FEIReEiw9g{&;?mpmFiOnCIY zxJuWh(k0~X%nutq=Q({^zHeApo@c-HmE^5x)U;gapoLpC&wnuDsn{Xyydaql>e!)im>0fkxj90#1bjN36 zx|aTqQ`~>NPygHZcXR5O_iuda_r1UNPUYRd_lKCL?@_Q27Z%+e{?4rL`{&q-X2;q0 zP3>o!WnWC1_{;6=&F`;@^pAHPK7Lhy){Si6*Q!UphD&Z=6LS52SGT%d()Kk@I)!oX z8YPncNuL!AcUa@G`@EN@{j;Y}PrbkY=%nAuIPduvZfIR!eNjUA`Xx;Dk%U*|n`5&?W)zi1x`}Xxs`ZzDVsOsuRuebNV-ujv*z`;5F>B+AfO#hfG zS$q!M-)6wMYQ_7vNAIzE?+=aAvbdG1lH;IOm+;&D{ndBB%pZji`xqZ1iho;+I~bA`j=jGI*KxlQ{|cfT;>DAwY?pn6WnSo~y4k?S3Shg$_w zSN{FFyQw!wnfdV154Vy}HBB^Aw`%yfaRT4Im#;7CYag~}yu_|$cJ=j#HCk@)8@Ip^*A(Nc;%9+msRVhMX#QHkkO>a_1jw$hU4Y7WmmT?R_xZ~-1PP6 zf8NtgQB`-uC+z=T+STQ{|B=6~UC!zE)Bk%2OqO$*es8DxX`@4XoeTaxk=pX5>aXSc zhk8BrcBb!}!aGUuHZixoFo40&J*OE@pJCC z$9V@Hw)lvi=1}-MXX(`L4Idi69SgsEuYmQY3)f$Ud7aYHALkqv_L#0_o_}Eajn_HH zPyNbr+{XS2xcT$?Pk#g- z8!vr**&?vMRzzsZgX@*bW(|7-Tg zO?w@B{q^;`@84;-RVfwU@G_leeBt?gHtF5dWLjRIUjF9)1W#rrtEmhB*jDp8-ZXi4 zwx%k=uuX++ZM*oh|9>iVU1i;WJQUZ=@BC=W z`dc&Ortg0iHT}5q+aJ8!o|N3bl~MJWzbSHM^hv2D+duqq)y~-bIG#7}>hUGU(*KIq z@69-C^v8Yu-tfJ(n>hCV`@LWH-v7iGc{8>go}^Z0_ggo&BxApt@QGP%YCk6?9rF>N z|3}z2LVV*J>18gPzitq=XJg&+LorRe%Qcx_>b>0`^RJ0NZs;(!8Y;2tuF0B8dFNX-@mxXYNz`9kHHmh9+zHQRwub9rF})Q_kaDr z4^KItjhpq4t>%2x*SbJf$=cxW+v}F88l4kAD0|y_$Fg5v^Q?~7?OONbgG-)4()2oI zhvdiHddWWff_ix7S*T>c5tzi@KchMK;qG_41S;+3|HuxFn#rkb{clhE-s`LFMclZ; zRFlH&or301I1+g?yy-{&Qt!!DC)fV}(0($`W{tQ!2g|j+cQf{{5B+of#oh(`T<__) z%NZV-zF6F527kKJA-g|&>^FOul`!{u)QcY$@_JSM`C8V*%Kx2DPlkJnzc{~tfs2c| zR_}k2I1?kiee3M||1=dx>v#9r{aN-&Nu&L#XeP_`qnv%ysU)#}HF>MwH-}i0v)+zC=pZxGn z*sQ%bIlSNAkNW=NNA^TV{qoa3aUH!^Uw9w+X&C(PZ0?HvEW6VTk2@`8t^4C z>$?4ZzuLVg<*V)9FG$_D^~sXyAEiQ{U48eyR$29(`h_3+BlZdwtPb;uU;k#`{>^#U zo;(VS{}x`WTYddgi>&;;c;~OlOZHZ6)Lp%7-^(Z8Utcs}U|{*;I7^x16!_MUwP9;} z{=5>nS~vZro93^L*Y#J1-p{(fdhWlC;j+Ju{y+Nt|LgBr_t)27TC-}!EdQ;q_fM@_ z(faQC_3W!pO-fV~e)9DTS1jn-o_+n~ugy>5cCFigjq7WYh5a6{M(byX!5z)zWjPu{QjnItyktPn!WGo*O#@` zH)ieor?#&9kKy_k$z`q2R=d3ZQnfo`$=0l|#b4h`My-$9=oRzpss3tV@xMh}Jg5Hu zy1sk2Q;civ>OJO@o_?)dx6bMJ|JScLe(=AGzN-I7>uBk!TYuw^e5$Em?|JlOX~?|b z;>z0Ve?R^E)g-&Y{l&nr+|LpK)C&6uw$$xy1ieS-t!} zLLJY~2^Bpp{r&0EUeAPgMi%=77Vnq#YTT0SV#@e?zq_$Z)Rpb(GhE~LD22_E6R%eM zIJsli)$c!9>NsC6lra#FQJ?fbdBgPI9}?G2Fc4gFe#v5oj(Zj+#`TAeChn3qeqtpt zXaBPPf7$y#crKe9`=h4oEW^VclPf;n3Q{TbvgFa&5^&`0#hP%L7kp{SpB``L+FNh# z_MheHteBrqf4+V@y?(}|jYW617lppq>HWfae&6oggf~Yr#FFF=zmG9dJ6@)`&|m%k zL497u1v`$KZ2EWcW1Yej*6C&^9H;;976|jT{&Kffz4iO%l=BY_&E|do6DV*z*z|+%b9~oI5oV66`zM z7kPZmn!AX};lQ68C;LDB{QoBV{by;(*dJ@E_7pki#(aNz>HXh`^?D|SudiOdb(QOP z)atm2iTZUd-XDU37HyN;Ec5h@%D?^UAC3OlJ=y;HaDngVEBp2fDqf$leX97$sk8pL zElb{CSkQCt-kLv~{(G-zQJmjo^f`I;8P!=n#(OI?MUQ7p6l#CU zdTveg;=L8x*1ic2-z@I@zrAhtdhux&i&jUh{k0~3wb)+Qdrz0W+hdYc@v+z2Y{mZv zU5%4uM4zu@{_)`G*C+OJ0`g5#vTKsJBlSs?Of7~lB6@2`8_@LFo+aLPp zX`Kl9=l*^AuMd8Hvax&<6>YO)u1Ox{I-=i_d3pNT{hw!_+JCfipH<<%ADQ;*9Dlw# zZkBkn-_x&USLBnaSsOa*W0rmr_;NwL==a9=pIvlnzja4nmvD|d77~}RIau(bJ6mm^ zQRC?!D}LOseLw4X@dTGUlTT(;|Kr#=VPV~Vft;Ps&tJXYrEcaVrrH1LTN?vY_Cd*~ zt=muk+y1()CU_cOg8aXeg_HC4a&}KyvOCiDWxf9ECx&$q%D=up-TUOz%c!5PpMBjQ zq3<=_Tzz5s-taa1tJXjA6wS$fvMIgv^w+CDl;8dRd15nTTq&qgU_Of4{Z!zRe=B_xq>cdbgvm?s}+O z$(-I3hmJ^c1{MGF-~W&I--O+(>#zQfd*>hi?0RTvp-i{mrr7Z9C;n~9a99{qW_S91 z%<=v7+RufOOCi?98zdSU5~NBS3Y!^8GUswJmfj6dT(|9$M-8~JhZ zX3;xzCjDJ;At=+<>u%??{%VC!qC2`@>{VZ1w{GP>Wy=_KHoJJZ1ql zQ?ITpzJ50%d$ZKstS_qn72QtmK9?2KWR~n{TIav~HRr4PVyVeLTtcsi`1toduucyC zEO>2UOvsIk9#iVey4<^3bp0fm;`hBaaB^c#c)pGImrZ5fmsg=HYqJ(*pURj&`zxE^ zp%CoZDpID)De5KiB_KyAb-&=OHO|}zqwx?Q?1DKIsNIz8TD5Jsuz_4E-X3z!{gdfDdn&B zI}7f-Y}xyk@mBi!$5Rc~A9PQ)u6X>4XWs80P4hM1oVoedqV}N9>HR^6qqqu}dF?rO zMBwo66B{bd#LVknKP^9M@eXBKX9;fw!!0jk{U;Ll3Gxh%}fvj5|39WuAd|MB;2^H=(p0#`K zhU6#n_F4a%7q7mY)!@c)ub$H{GHwb8$ZGj%U(qi7(Q8#J8y%jtMMsNIs9K(R#~gQ7 zxma%9_1||s{qCw%w^da0sFqH}#eYW*njPT&uGFM|>h~tLZ#wJj-}f+0UC8i8cav{n zc>1c0<30QL2YBdyPnsEx`58CzR5w(;F=gFt@a1ft!Fr81Ydyex&9GRcAs&4R26tHTz2di<~T(*-s;Pre5;6mxu6%#ck!X8Y>j z!+4V}g-52zJHr#<_3!oX?M?@{t~x3YR!&*_v;x~m1+bN zC-L56Uw4>EUY^x6;F)quNWj|<7ye!6U~g(YG;4Nq_8S3J3F5ebCZstDp<*eCvU`TT_f;i%IkFVUsYU>m-q9^E>0dGAmB z?p?Z`zfS(X-@APWZ}g>xM+Ryin-4PGzPZZCNiz8V$!0DScIk5r{oT!99(}){e7L6m zPNL!c%IfSte?Mg9hMoF#kmXlf>C1rLRehTm%YWN^=Kt^W{}r1(oa;YIywkR`T<=^#97B}TX-@cx1|6c8o>)g-Ar$4ot8-&RmeS7+G z=JN8gtjVlxH&T=Dp8R}ReO1(hp4@j+%xtba-1+gFYf_wBQGIzyaE9WGO?CGl`5w2i zNc~#+xZ7=|#rvHzt{+{TH@(+u!u6fi`%^SOuCJYNyjJOO?VnQt;aT_B=lzO(CdxTO z(=Na_Kc>?6an1a`wGU=2n619QE{wl!Q}zGb-~RqrpEW&8^sO{Y@%1Yi7YgS+d!M(6 zZ}!bS0s^m`_ODP;U1@k`o_BV~ok#4|PyR)}NZel2^kDX}+8wg->kk(G`>XvYcgAO( zNej0$HJ+c(J#*7)Py0BIod%CBt1qA4oRu^CYWC!ryqRAwiCxbQiZ6fEoAKegXNCIe zbz$aZk@i1}s*3*KU#k=QqI&Tkw~DHK{r`^D%Z+XJuXE-9xA0Eg?7P=5|5sGVU-IYs z;mZ7^Nk4w;UH|$#k=xpf+cf<5;zw~4o?hQC=EufU+jh_N!xABjcZ=Wr_E+yM{}%Ut z!?gE%*Ih5V6neBuFMhW0i~oy^^)F@jTfLX-+vU$_TeW3E*Y+nu@%6FI;&BQpvnz|g zvy|`GRZ?@9)cfw(UYFSK`-7yuz592yOZL@Y!4tEWercJhv}3RKq^bAC6cp9cw(Z|! z{fbd$=09#JLy3cvR?hY`ns-fRqj&eT`-fRiBnUF*-KjM!elqv|nY%&X|5cxgVmU0v z#mF8N@%i+<$~%k>vuu*quhfdQsjI(#W5F*LwO@~x6koa>_xoxQ%g1{fi=#OWS*EfH zt1R7<6y#?3`})4)%N}s(d9*uCRC4|Ch^KjH^m%1Q7H93LdApD8e#0_*j%j&dyyc7K ztJMzg`j}ttYgRsKrth2^41dk<^lW-{vsR(4dvU{QmBwjP_s;MA$SS&2qe%GmyBU`s zIb68f`05XbtY4K%k;QNJ=m(4Vrk`-S|LWh*MLh@RGMg(IusXLaduQCC{^aHLWz)DB zHkc}eG0d3Y!}#`RiVUOF%zxSvIte?2QqOC9oqX=7#l9p-R^pSpVQTwo=dP2#%FMHM zr!Gt1eQNrPAD@lSPqI95x!2oJ&UdY^Oz!Vv?_BFIuG)OGRPN=!lQVmx96C)Eqc1)9 z8o2&=&i(D@CuRNK^)*j@`hzdY&QH58-PH)KpSSqrHl`Pa3R0RYK3m**bYJ4bl&y0Q z-ab6}`QiC-Hrh}4Zoc4F+i01$#O(fz+f2==epjbEE?ITpbi-BGl5h>FRUafT6*UD< zJ367e=th51^ujrFAMM-!;*EUb{)E>CH!Q5pG~e&~JEdUnj+76k+j#x66W$)#n&zT! z(lzOQz)|DXcSX2mXWVG~TgumB^-D&=JK!xlqtcZLSt7!`Oa~ne5_}@KSUVgG4d&>G zbF?%mDRjhKkJ)ah7V3R6>h!AD`!>y!mbN8g}pC^7l;8&x8^6CJWfXJ)%2mj2iU0v$L zS$0u7Z?)f>R?%6aj7Mz0nFp|asE+!%>e#aNb|Kd%OI`b6FD$7ccfWhvhM0JDo)tXl zTB-#FjJ5X^Y!6*bwY#*^>iRBQm4fW1ed5X66@zA8V2y9xC-+~ak5Rt4?~G^5k)S-m zQy(XteA+??vUfDRt9x`eZE6e{5?gU{ny9p&`b_+18Zk5HUeaiM6RQU_pqE zt8hz`V!#9!zpvBJhR4jh_cXF(YH6vL-%8C3cJlVutRBzG-BSJb>F50&{_Y3Y&kt=j z*z(i;&ff6SYQv|$ChtD4UUTjj*Sz)%X>vO=cb#~u`iI-#d9#OndDM$P;-8{Eekh#d zV}5x-udKH(UJlB}T$|+fM*2gccNA})qJiw?TGGPtq0Lar08v=ApxLP|F7ECze zCdjqEbj{tURmH2OPg<9HWzn*wana758M5rZ?=N|}`C;MV2Qe$W%93u1A6oJ=yJz?5 z-TUoT<2Q?1dVl@>;={S6?3-KV=Z2lVxuEXg&)6kZF=8<{4qwju|LOg@)B^w7@c7!B zAJX;+{PGSx88A0wUsvpZ_JB8UKDW!o8noLlmCHLTDtbI1dqKlf;f6Kkya$gpGtHVj z^A=O3_kBADb1%`jt&EWl89pESYW3>QRy8eeY~9qOU0x?%E4%QN#NrvtCuQ-quqMow zh>ZMr?w620*L9cWvsZs)k2h;5V3ZM=;3Lekg{JooOjkv@_Llr z_;2RCOIx_!KJq{7;rKr0_3m2-{Wbp0o7EKgX7_GiRoN@?Od;NR0zGewCp+Ddy3p}Z z+Ubwwr$y5zZ_7z|z^EoNK}&?ImF0}Xh5#QGX-+Q{g^sz!XC80$*|S{cCa3qBsWMtm zEu;?LxwL-%lRu{K{5D=Znf-jR#@l60SGPYszI&-n-tyo6=56b}PcG}7wl{WHbZ`By zw-0MBZGSya{BFI~{Rh=YF6oKAIDNA;QgJ5Ry7mP&`VX%CGQXYad>~}eo)_Hz=PfK2 zEegNx(E2z?`oyOY<7@`@dev|7@vVE5{&pp4U#q--b4z;jxw?D%=G|}OIm18YQq4Zr z5B|&J?{7XHo#3^u^V^|*i61p15=;?gou zd8@s_%{=s6@UE!m@8?Fm3NPELl3%)Hjc?Zfb@6LLkF2j-=e@Gkc71^RdAFkX`~Gs= zpWt-5M=^WiZl|Kvd#~<}oA%^sxFNUxYL#o9pKHHn1fR@!|7GvT*wT!R#b2hrx_bKl zMxkF{w#G$m4BJ}#Y2n;!ZAV@HUHU)Ki|>cnzUxbO|C3YA{@S&Ek9)b@_PT%8F<)QD zwE4ac|5SA)?a|2%%{uSpCrW2+{;#Pjv8X+5%Aws$oC>D(JYU^)H)*{_U%pd`wrg4W z)dyRaF1q3F_D{Yk>%rAG@5|TQ)=pe7HdcB)`|2nJny~(?G$?5G2@U1>t zeCU6Z^ZD6x{~d`{*{9w1UA;$U<{v|eDX+@bE8bat%;#TAH;Wig;kTd@l93q)*Yz?- zobiv7nm6x|;El=bJZ7`c1#xmpG-n)pXMXYCy2bi2^RGuGD*KAf=Zsu4pL2gR$HR30 z1sZQ{mPsD=$PYaJvG7qf_ZeH|m%k)=*#(%UHnEv5zWOdw(N&nQ|J&Q%7hKG|TR%E8 zzB^hH*nYEi!}{wYFCBbVet3Al{6}q_-P*Hud#{--4@}`%^!mm=tJfP8q{Uh6jKW;s zaeQ8>H23d8wa0e5?oP5;Bak}lYchw{KlwQcduO-r^f2$6RyFC(PwnM5_*@RR|DT}d zEPc=M#^Mgn7hBbGZlwQE(bw-jJ5fbAVL|a0^Ev6Ng)%WJ{pHuoT3@TTKXvR@q`#Yw}Xvvm=$dE3DGLT1s3p5Z1UU*f-JCVB)dsZ;WT_F+@%>;N$-pH1E-> zjjW2@eGF+oUmMi12%md#JzV|!Ef?eWlby0Z30ckTHcFX)>f`LM;gxpv_nf7zzw~l|$~H)=T@osP9er;9$U z+AI5%hxHTl^&YOL9ERoGr;M0SKd|69b%Ud0Ven2fAL;LcJ=0<@bWc9D|Kz9tGj=!} zeWAbKRk2{_SK;079|u-nF}!MYp>2!MQ;+E1pZ@Es2F9+x;r9FU(Rb^2q)$nhuk<7B zt9|*M$?7`O>$<+z?6h6~LQBET|H5+JRcuw-Zx69$e|;z6vamjJ_TKAXB=Yt3UtLq4 z-g==SY3}F5xc9qP{I7rU;%WRt_33}T?sxP}_<4Hrq<0F}ssgGvU;4$ydUEaEyLA=& zUs!6jU)?FBfAZHaKM$`9QsJC>=bELJ1IiBk4l+vLYh1>f)u?bwR?&9ha^BnPyk!5B z{S2&nTl6>kk)7IGF}=E~{YNElme(In^Qz%n#Imp9ghAW1!{rZ~HubdJDwprPdDwO_ zi~a1|D=SLY*w0stuivz9{)M)>DCXqi?~giNr6k#U3SWd>-yYq1WZ|)yW*3&Pe32jT zkm)?pwwfhHETQ}T@;}>44BYjPUHmP%xo=C^q=kifGO2qK7~Ea=n{)2IRkLsUh4Pj* z@w|k0)6W*Dn@tRuyMBd?=-KK2e|bN$x~6HlnE!b#tLrB3m~J;+MPcfK*JnOf zA64GmAECOJefNhC_hZ+4KG^c{gOkzu`&m^Les$lCbg$2tc++md|CU`n`MW31N|$ru zJZ*89PpSOd7cS08i<7y8Hkh%`F*DLwRCu`GSkkU~zxwjHkLzm{jV@g^PuKdzyVGo% zUZcc>=F%#5jk)3bGZdG&3eBteOjr^`_WaHU{otma=v>$WL=U0mS=RfZ| z>khRyKjrr(mL8q_`#r1antk6pXT4c(dba9=N1W-lKjyl&$4`7xeCz#J=E3cI?>5z4 zf8^x+c=~0Q{rcJe*34h`zS3S&YkGA?*dG<^2%qzN4?Q!C{d1&L#P{~fcU9`&?XNG| zzh1vOUPQCu)6+v~%YU;;-+5*{AuE2O_^Y*3+Lo-#|6^`u;(h%?a{W*1U7x2$R9vrJ zKS_|kxcuqgyMbJhHQU3vz2_dgKRr&YZ`$>{4`%e&+}I=UsF-mmPW|1xzfDf5a-0Tx z`Cm5a^KSX|owaR6Uj34eu+9Y~^S504neTU6G?Hsmz2IJpE{@toA3uCNR^vYT<9}Xn zyEVtv>#qoF$3^;Ke_ZwX&LegDGXu;u%r`0e96ObqQ})0h7dD}H7w zc*|q^hUxD^xkYA7GkuryGOdAh!0`ZbT4 z`R(rKEGow(bI)Z|>zGd2e)8MtEZtSE>a)vttb1SnF;?N0BKJqVwZK2-0Hg*R33b9J>%qGch&Xo zzsY}8BKParfA?h7Txo0nw9ZS@aqp204eizMzL&p}RP4UD?`HX{jb|0vTeXs9m)YuN zA9%CZcIEt(^Bu3aT@I>!PgcKMCTzJg^Op48)pa||)|Qw5dMdgq~`YnpM+6UwyUvo*~%N^8fn%CCA-A zmaX~u@zne4G5hVqENkYi*1x{1H1FB8wK2P+CZAF7yWI6-JK+vUsmJGr`$@ z?<-kxPMdGP*xLlNYp*W+ziRdMWmhX5tB(JTt6m?k`BP^0O}mr3FSLEdT$@tgrFQU#C+6BB7Tx5j*0B74d6-%|oD7G+2c5T5ki_dVN{D+zO~c9cDPBKBy#+0A!9yvyU5pSC<#*c-Ej zY45|?ntOJB-{`aGy-=LmGK=<;Hq(P8KUMr+Wfxwom*;o%XR!-UlKO^+%#%&nRvQZ@ z&Dhzd`1Rml$GYUO=9f#WCO2eWE>4py`fzd5ltg(R1G_b?%WJ>M3N`YZKAzJgW40wf zp~dq4mgKn5ADkD7M@3rEV`cpEEV_mtbL%6^Iy{T_xvN9SRa>`^SHUNo<8Uh&;M`D zh5rZbA7{?BH=6Z8TTW?SQGSZc{eL=7lU0AT%cty?UdtJ9_ugU_{S}{G4sM?mq7?0L ziAzcNbC1%q6`zjP=(~k=aOXZ|`6G5te(E*<<#+GT+clpA_Pqb6wGyP8w>j!QeD6%N_C{^?F#m_?e&Y0u$Pcb&ejoxP@gXJJs%ckkc3dKJq4#x*6L4`JFi*Z95J{ZdPr z+$Wt@{X5_2M7%H&uKAR#AJ|o|;UFSdcKt@l`Mr17{4)<)^WlGv_x?3DE?0w@5Rr~Si>*<^#xaxbNzjFjIR8@r?fEos9fX{iIWe^X70FL z*TlrXZ|?gSyZ)wz_*s7~u3Hzj`u_U()dklBT;~1^yVuqwEg+htHf1AoW6j(%1-EpL z30-!QOaHpqrN`g8BUyTI4ER(d{6?^k+P%zrLD%l4eBwn%$X$uyrN z;m2Y(_%>Xc|CeQn?7Duul8B=T)9?wGwnp!uboaT|H^DVb%hW}7} z7I5&})(s(9o?3-Rq$3)CFJEVT<*8Gbw65#B_w_yR9#_`h__RUw_}gm!=wokKva(HC zQ_}g$PU!CRj$ZnqeNM`XC7Ya9JIqg6F+V}4X5XZDIsYer{8!>Rrxud)UUiOUH+&&Y1PXl?s<_5X_}{lBB4t9P4pseS&)n6tJ{cmHZkycpU32)Q=KY`NlYeAd&AIXBuF$GU zm)A>Ae*KH(>G_-U?Kah9R&r?9S8eCd!^N2>%iUgclk zdtmY7d%;<)x-9&grhh0;$eS)+%CNX#?c(3QtA70we#|*r>O^0@=*gGa4-U*Ua#<83 z`YLf%PX|9wbnByTjmD`ytJ2<|u6@#eZ;W|sVIpOuPMJ;SujMc1F|Gz&C6kYmp%)Ut6ufBE%~+iJSe0t+4G z<~&<|no0Pc!h>D?7j&1(r+iA;nId?1$$96CKW$#76tbF>OHO^UdP|`UWB(>k?afnX z?)k$#^_9egrocZn?O*@%Jlj-Xn{@1niO*r@kZ+r&b94QewR_*+>hJH>&u1odT+8J4 zxq2bb{-pZqrw^Za+pF)&Ugq&(y4<&WN-_Iwe(VYTvYu~awSHarzW4r<4x0+>Uzk@X zU~c^F?L%qXe!xKncVJ(Jb(^K9l7 zg}jrwcyL?9x|`)MGrisJL`6+jZ}dsz@6$0W5p@!qX0&MjnWG!`b~*A_fBY2t>!Ey% zlEth?!7NR&chBsbwKppB-9^cF+$n84k8yvydr++HZ_3B~ml2^456$eoe#|p+bI$*J zG1)2)ro~O4tg2t&;;2`&{NawdJjq8t=eDH$<2>@>?!7m8bsU{}@5;0*-$kjOe3`k` zQP?_5JLM8f|Mc1S>)SW2H98nB+rztNeHzQ!qMxeynulsuthD_;>2llcmG-y&^fl+| z`M)VW%AsFA{U%qE)%9ODyA`(0KQ5zj?sv%Cl=~jp=O0XQWb5n^%QC+0w`{ikAy&J9 zZyDE3#opb0ueN+?e6N(u^e2kxRi7$1T@6YUPWrP^{O-H##k=bRLVU_AXS|X-U71~z zVk!NSH~HE7g=aDkl-Jd5w7J%B_0#+{S{ z2(6!gWFG?q`(ejfs+?Y|wP#;=*SOdJzyJ69>HjIafB*mY`)|bC)c^nW-M`FT{r`Wh z?yu7qr@s3AW7faUb)l;kg{{A~FZ4Y}(`PO{@z!|P2ty$gA@(2fp{st(K6W|h_~lhG zzYUM;?wa`T&GQxSQ&yC}yP^Ex(?rWj%Re|e7k#{%^)<=y{_dFFhg`1TzOBC2Lu20S zYdbqvzs)`zEF@hW&*${WBGV@8#I}CTjB96R&9Ae5JZt&sM@|9@R1^{#-&r<%vFg-* zwlnj%;`FOkK|gCIISR3|a@85xOz8i)@9uhKu4X6K1#Ijk4~qM0B9?Z~XKj_%N$t_% zlb^vnyY@=;!|%fHVuc^F^fp-^?!RF7h8R)XqJZCj78>)7TSxk2fjG?7zh<&6W2i zOk1?;X!5`1g*Un7t&}#Y9ctCH%)NUjOuM@4Xx`(V`bk_~$6`fNx9$n-IregmZdH{KIC%BY0X2`sCa)|O zeJwn5Wq0TnYq?sB8K0c{6;^gl|MrJH{>qy$?5yg=w7`4@MUS! z%8H}f_T(&MM{1ffxzsB`r%Ua)Mi>+tOco!rat3Ai+ zkl=eOy*D4-7|U0FO}hK*O<-Ni_u@-Bp~Y2S{>ePu=6!ts_jNjR3Z6|%iRbyaNL`d& zU$<8L_eUqg$Upxat{+smeOKXcy#DN03XW$IyzA^`h1b3^`nzAUV`+`~pD7~Nf`=a- zls5F9y>I=+>Pf4j_5@vBU$<{(=$id)KTKb zr;N18|2rky7iQd_^TCbnW$XXe-Fxrtn{oE6eUm1q#?;>*RthL?{bjQwh-;c<;(xu{ z(SH=HpO~~1Y%Kq?`}WUM8yE4}|2kPOf6md{P<-KGt<>cI|J7H0{k3Pha)E=0>fIk= ztY)02A2hhP9?5P}*f3$js$-8|)qjW;6k+COs40v1I{A+fckS!1P4R|u`N z%RKzo_6I8#rY?v(KP5!SZTSnOr(a*mY&FyRo!qa(_H^mTUGMg2{C*o(qxiC-<%5^< zB72R7t;HMuPRVi3(>aiTP+)uZsXd2g2N(U{`~HJ#n59|#ggu!nYtpt(4GnxaJ0>c= zbfv;RAInw$RIB3j8`eM2{->|rX3w#p|II0%`%RzT{WxIzx?lM5*9VJtyr0_J%znhq zdiu5#R_DVm`lSErU-N#l+T?wE#HNL%y^oC8`B#Ylj87&@c`N6P9rg!PKV52i`~TCi zlK)pf{qtX~|1ti>-Ur9^qQ0N4>t4CX!g$(sqtx|(mToYdaOAb>Lfbf%^LAw}@t!}F zBxjcy{fIu@)~B7bCfgt(K&WMZT=AOS77hQ8-Z0$U`?0|3u5ZnvO*)*?=J`Kck9o`M z|Ia)1V%?#I@?EoDcTK*pSJ(CY_5CUB=l*`*w&%!v`zGPtyTqP9tYvhcy4}$I`_rDH zcL(!4O()i`54ih%eea7K`A=H_f>5zBPwybxoc1`|VpBv+edbSrpHI6MpPw*ZcJ` ztIwt-Hvg^uaI;kLhQVtQr*&qP4tLz2ufOM!I)(B3zU<3?gx};HT$Y{6ylMRh&$H{} z=X6^e@Bh}eGiHe-qyL&xqf~dPSEsM(Bu*;gR^DT~biH&S<6IT<-`Zy`-?ERKKK-i3 zj*0W$Y+Jix`M>I%1L2+6Gw&+6SbiWv>3uVVI{r*$? zbmiL9jq6YE{qaKa?yIcHvpmg;{;4>rIPWQ}y3fIRWPjAE?(XY5oMqmx*d18aL{;#e-H~OOz8St0;)%`U=KVN@ZDIqQOAuxI4*2zZ8U%yhg<(qLl>iUYE zoObqCt8N8~M8B(9yXNAz#?Zh2S?bs{Pv^hNvNXP|S-E**it_sywZ+d5-7hb)5k?{*9y`Qh&q0`CcCH15DtAN@2 zy+2o!r+$r_vvbu&^Hqv1v0qQ@Ix1}5aOv{m@)ydxMZOp7cTB%_QC#c8k?DF-`_(%W zl>>h}3N$o$2sl(L|2-kfD^L^T`YT4djy>%7P0j>equ|a=&?P#@9cQU_X0a9(YIy(u zvfauud*3hhUAwkcZM}N;*7e$N8-KseJ`(yr_W!G&_0iwf#_d|A{dIlMy3&<-@7vE6 zUJCW$c>R2G-iDh_P)NZfA!pzsa-+;*XJE*kYjP$5fF8{LV;V3Yh6@m&zol-{Q5VQ z=HwQp{5r}f&~`9-_3_uT#|_L*iG?jUQrW7qb%mH8Q|gK=L+1?X-Vcj%cigr$JF=N5#GT8K2EA ze-y3s@)1ki+4#ue`$CC7ot=!mI*;(=lCPclkeS$us-mB!KsCrfrpFtReeRfU%9Ggn{zvlxboy9 zCOHl&9o*+F68LAOd^~yFU2?g~($MHHQ7f z*@ja}6fe)c?EhRL!==ccb>XQt`{#RbLnS`kMdwpLy%IUax&U*U{pg+L^uIPk)F%aG1YBO_60jEBFISd5ILYwV()7>kHOuWM?0UOm-qxbm|E!x6^(L)<6cD!Rm-5!F z=k~^2ef58R?7zL?s^5bayZ*9SU8eNK?q7acj((@exb<>d&$zlA=2iM5KYg7B$tve!a6QyTfh6LSNthu%JLJ_I~xM`9JfOIN2rI>l;rf1WH8h zXLICn>b}jLZ8he|@)r!y%_SQPG2MTplJDo<4rpm*#+2a2A%KZA-dHEk6rv6%)`Ty*ar)S^Ce-_)akJ~qNRrUKAfopsAhx>j% z)qTxc|6RwbI{ib9yFNOtPY(HgT_!~Cp-#(Z|CyaLepY-q_-c=6VDNdF@Y;_$nnnL# zzY=|Rv^(y{$ye*^9lEuq>puP8)^9$wY@_J=-CI2F?N)nPW8t_(Y74u`^%sjpR&#Hb z*`h0ydSPLTP)nNb`S(@!hyLWvnyC2o$FKh_3p{_+E^QVwm6vR=|L|XO-H*Z*Zmu^T zIqXkb9`KsEX5QYa-|Kz<1TX$}x>M1-sM0{d+pUHU;myw z-E@7Jf=kxic@`Z0Pkug*Z?Zc3{)_O7_v28~EUQnBBRFFU_0WCx7Do`|t0I z9jcob{P-Zvvi?`p``!1hadU9oU-mxbrijQXzr>!0KR4bu^vmkHqNTR}n|VTWs~Tjs z9KGAWtK($&{^X`MM%T*>PdctyOEKnetc{(%=rN~79KVr^pX&hI~K z7iPC`^POLEer(%=b5$3Y%vCU!DlGc{iTjk>8y@R=*~`l~j(xW;-?lOQ$N%g*we>Id zmcOrSsoJCeaOxjj17nTU$XzD?;@=idHsVc|Wk305f_c5-^#CKG;BbYe?q5F&w)b@( z@1I(D{~uR+%BhowCHAZKl~w0|O`4qg&#zTd@pNml^zyymn;h5Ia4r2BetgFD)a1P% zR&!eZJLG0&K4VVK0fB0sGq%xs|GysV_PQ>S9ava;^zj3>%I`0Gp3REXW_-+{bL0K# zf4iiwxh>8+%Ren(>%CL2!=x_T=XB)j?7M56TiDy9wQ%;6BtftTd|r=<+(;cRGFJt?2O+ zYYwx`y)|Wn@7@~?pY`KvUte!}y?3Ya>9_?gb0(+PcL~pmet&(=H_RKw$H|F!ft=U)P;TWOfS*wENfgTej=Q-_&2G`}1mbL#s z?bkMY|F+5K#qr(y0@*^}bMFl6@4viX;7Pvrr$2GUUD{iterY(&-uV2~R__-|A?w4Y zT}*o48FY2k>#wKQN?v^(S-<>nxnA~`nxA$1tCza`Sbwtc_59sSF24V&81`b<+cPnY>VuH0q5s_k8pdHiLu*N2a>Nbitq z+mofyKBYmbzet8LhEZVc`bTlz`<>(Whc=&|v?R>)>Au}A*XNZ)KHZ%FK_!3Id|cGn*xTW2QmyC0joAt8oxB|-Sqwge4$jL!-fk6 zVmz#UjY$habVMb2oDv1K&YW6Z_()SI?%k3#dwRX}BK|aA{}S)K>gV?lc3ZcHl;8cO zz4iPZp|$dTS^jH&ty6H%{&Jw?goJmzozT~koM8VYYu4?b_jL`!nM41*&hYM6`CFh+ zbHm|0lW|GL%xMQ-7O^(zux!!wQ&AA(PyO!5^5=PycN71on%QBQtL|*gtKD7LZ})oN zU4{0d7)9&wI*yIIPa4kXE&Y1;Uxn<3^|qg(ER9_9;AAg<^b%3i7fPqI zy^4aIuD-vv=IY)bFTO-g^pk#a>h$wpjNk=-Y9h*-OotmbT+k8a=5#s~u)sq}h@$wX}2O)bWNCZZ5_*l3j4!9`Csm@zv*7ydgbHQ;4;6dj=vX|e}1z(&hFXr$ID%w zK7Vgt&R#$3VQ;zW`8oe)O}b*@)#x52s6YQ?$nEt8P0M@UOZq6i{*fm#@m{6Tg6tJ> z8#q@Tx>GB!FU`o=-z{bAC34O1xv(%(h{u%c%Qv?x$vjAOvYyo5ZsFzkRb$)9We@#0 zgcw-Tq}B!W+p%AFy6Rqb+T2LF&4O>qyNzF=0iAn~*@~!|?8PDXXr9&D2||YI;gf%a8eRJm0fR zHCxtSF1d5JZfAbHYE}FHV7_UQe-guO&YtW2t(RAye&peUi;sWI3isW6>5l56nKI`O zKYacF;hC0C_D5=^&*-Jet7lH7n*h5pIWEp zKM9BqYn7bg5vy_G`udHn-{K2|mcM-T>&#Wf%-gfr%k%TUfc-haM?{#Hqh!tW| ztWK?o8WCE8oGe~n*M9g~B07Cq_ugkSP0w_%7gvYU6nQbucT8~g)jX2 zq273fon-pcPj+9QY|XXjntew+^Z(^nGRNmR-k5k{*~;Z}swY*b?|vuzXy!@T`RwM= z8|U9Q&z0slZh9j}-$yy-`L%F<%OgAgoeuB#{N(G0MJ-xlygAGKPc2YUm@p}WE4(T_ z&-(3(>*tTl7pz{yV^_nL!R&CS^^M~BjUQXqE%Pt&I_YxjOh!w-b<~b`4#{%8PbIqno46|PpMBMzGcd{mq{nH~yTuFs23mk#xuqqSt|nbUVqu2E_H7#sHa zLu}lnZ<*`P&OW~8m+*_}Y>V?_ZfEaW^66Li(&*ol{`+&@TX$c}oN=4~m;ZJK)=%Q6 zMRVqS*(z|NVQ*MwPN@+8w-=K;zx-Qv%jmq^^k3{^W=db<)=$$pEhGLU>Mm#YTG76l zKhi3X|2z9%UjIq`?0+BPPNX|aKE3s5!5Pla6)TwTa5ePXZ+#-_!kFvhe!4a~O|P-( z!k4KnJdgS=ZdsiZo%%H~>Z8}Uxi5EV`re3+sh$4)Zv}G$qngMJH!&_&m&SwzGggRk zu{Jd&ESREW&c)i2m@uJ5MNpXg+NU4iuO@v8eYz?pFYoE8mJKzP`#Gwwgbr!s_X|Fwc zr2g0WS8|N`Ey21kw=IA4|6x(}a;drV=C!`5nm)7epQ-%YT@R!dZN4Vbo1DJy@~Wwq z!){uC>b~|?@Sl4R`;Y9~&)$39JU#1aw>z7o=}Wo0UQ^*~E~9PcC2GytHsJ?W z-kN35+`z!Vbr{T+`k4JOyCYUz?R!_c^Op9XQ=jil z4~={F!F}J=$IXlfW3KHi)d>~*)3CtLc>1xhyVeEzZr^pd+mE!a(rsK?Dlb%?$adQ6+5JY92<0Vd?HxQ1Oc*1Uo_zbi ztaZO{*v6V4-l9+byZ_r5zdn5R$$Hseo7@5_#3YhW9ez{$ou^0T&x1(kBhlL@e+W3@ zG1D=<|CDam>Hhqsx4-#2GEZ}4XPxk=Hfw3RXxGQ+BPAz)o$9s7;;>X>miyQg>z>CZ z?m5Y1>#Ze87tj5)?>t!8a3EmGHH%X(dc7A}SiG#LpXYHRh)eO|mtXOx&b;PPP!Y&o z@^x)>wAZJ&pS!yh5AHc|+9BktfqsEQ{`G8! zS3TKj<@_u2^+oxYaodzUCrwM3{+6xPVtt^on(?2i{!NStX}QdD@siUb;~z%vzWeU* zOW~?RPRF|WCGQ?jk`{fm#VE~M$?W-!H@iPuy%M!rcgmshWaEin;Wd1P&y{|hntp5h zhsRvr(&p1rzlz?EYUY=&N!WO_Eh-}C+~(tJgcFlb+9*lSIadAjOZmH9VOIYm{;$5O z8{>X$wdTA_O@-6D-q+4=vcC6T@Ops9uXU|`CV`9>G$(F9WPa&i+>@ufU&ZZ@d-^w` z^35>uWlo=aOCde@G?q0zlm?#WrNaR>1LwL*SF2rR~O~h|9bi{P4DTc4G^0*|lX+ZdoQ zZxZ`trm1`64$SA_TKy`9&1PH3{==&@Yj|xq?}&YNVrja-aVcwx$oleY@qb;@W?3`~ z+NwMU0DeN(wL+!IaVu2pZR8CJv02< zJ`rV(jaQFLda+#&v3R#8(Y)yIsa#>V>USOP)z1zJY~Qb6=vErCbP}(YLrzlHRD+Yr z`72-Szr93iztr`Uh9UDG@kW`PzxnBZXp?NZsMy(;4}%t;wicN%+l6Plm%Ll*fr9)0 z)+P)%99PhVQ_2WzmrqCBV=0Azts6(J@+d5P+HtQ)ikyDkvF7{U4Gv6MXQ}z8|NCOqpPu|Eo89)lr;#!K?BZgNeLNSf zre&yp=eZ`6Ic1B*C*hv?zgN$CddPnI`u)92*LI7%Kh5|2=CSv6f&BTgi8F4reE#~0 zt=;8!@VQ94O?CCHl}U0}+RDRbuJm3q|KIbe^@{QJvm<9r{~vH=N!zRqB~mj^JlX%B zUotpK>>%Gn`QBu0L2kC!(lTdi52ol!>~>$W|5sc2(XTTy8XGpWe_N-2W_R{KpIL|1>sy@x=XSvs>p=)iw8@{NPfQ=br1G z^YEdV`qOXC*H5ID*sNIpV_kNlK*IN*Yxw!q^1DS-8{(Jze)??c>7Vsm3!L=qoZhc~ zRHDD@XjZJ=6MOT!`bRhpJwN`*A}=+zcHh*C_3v3Gbx54-kDu`A@$0i`-}b#RJO1~_ z1oueE{WoG7zyDqK_S~{HLR>n3PhBu*pU7ffvd_fIwocMp|CI5EMx&?I`h88x=cla` zytuJJ?sHVudG`(0OV@b(k34Q-;2^wZd9)9+b;GfXr$rWLw7glXwKV&7P}G#KDekLJ zs2(e>$}hbtUj6Sr)7jSbd*>gyH*MB~RlBeI6x94|a|m&L)cJ4qLDw*a70bM4|J!@~ zOcAs83X@9{#iqfo+mf1wY$83>y@kZF-vyu z+`2j?^!ls{dHWWPEgsju!_4ixWZ_59?n0Z)$HW^y|rvM?9x>KR#pBnU{K#>rB_Si@9eWD=2HrJ^aM~ zwzyCK6R%{M$r*j!YHd#GgPtoNUo@TUC}H<+p5VO2UaMXwtFfjow(?vN^3>?H=c#Mk zHcsa}aL4(}hf_aolkQX+Yvq1Eq(1B1+r!>_f4J}GJXsxmP@rjY%0@^;zpqd?w=De7*ng6565H=@M*}JVPuSxA&e7yMk4xM+g zrmUY6vvOuUi=W`}ULtH`#cW=aGZW+Mf_OBh@9(ZEDG zmuZ@H${#w`{CGFDCvZi&+uG_Mr}oAw^ep#!#FEUSxorR1x}du%Oa8sA3oQP(>+jL( z^+Dpblc!tS)@_>b|Eo%p4Yy9Lz5bIgoJaq-N&igNncuw0GTD9ke{lgn!%u0-{59QT zUwuNu*3V~q7W^}!L;dRU>CfLEnId@a`|_C*i=K77U-EwU$DikPc4i*DI_v3)bgrh& zcb;hc7fIUQA;w}c^Ih%6IXk93y_V{jTA6T8#^v`nohEkamCp`8{F%%0=-oQ+ck92; zfBGdu<%37i?YQ`z#!DyoAN0@qw*Bi0iDn-6&f*<=gj>7%0>b967Z!6!eHz+R^`mWS zr_j^A8~x%fzuLsq-#3llQ<9*=@kmbm>E-D>4Qj4t8h6+DMgQfuDk^@u`szQQ>+koy z3vu69-4wfji3kVNhJ{XxE&ka4o6zaf#mZ#xaH?}#X-7fz;dWz5+Ydbp8E!b_@vUER zx-52cL&20#0pkK_tGgMcBzc4u+&O*ldBf+U$5Pk5 zGrm;$L)dSI#?skA7nyym4yRVW6JDY9hi_(iv-<5XW&7j)Myek9b@bQw+SPKeu5Z}L zB=q`6!0uh4cghq`UlyHqG-?07f@Nkq|MC3*t&U0BbYrn-*8vvs*n=+4&c8QCM0OP2 z44kqrs7aa0L!g~r+WYdw9{7Z3SNwP0N3Yg&fye zd26e~?E}lp_RGgsn!jeP+8Q738FXc}=;TZ0k3AeCGf%%`UBuA!h=nMg0U{YSG`9B^PYJbSx-`*QvEhDc`SCJirHjRc#(nOE0-UYWY{bLYg8 ztIsa-CO`ehp(n*18Xnaqq{Jb7it~i%mkm?TZkW*E@ZaiDwdIfU`i#xA*04DLl17L9TrE?yPw(oL#(*fT+pI=Pc-nwZInU>2L#-!ZX`pEO^hf3Q- z|No8>p)RMhS%d!VwO@TC-qy%vk59xo{+Er*N*WKG2)w})7W`m>`gVrGV8_C_kv04N3#c31 ztuNiLqR7U!FGi`k>Ee5VaPd6X{OK#%EEjlLdv}MF9A+vv+@kd|_He0G z#?@(}|0Yk|aQ$eMrf`qHZ&1c}r-+W3%0j6dlIKk?O5SG4xzzjGs{br=kJ$EBln8O} zzI%7^dqy@X-RFE%Vtg4G2Xv4!mQ_AJ3OcN|~vCy-C!%*PK5NBQjjPpdGD2>m7~^nQ7Ht?JXqJ&WQv z%KP@+iF*~Ccq>&_%}@Hn#iWag|E$-4JigCD`Be(vSH^mNlk2-2BO32)I-|V(@2Oa! zTCSjH_rGFcijAM;ID`U&* z6f3J8KhDMMu@w|K(kP*JaDh1^V~m=5-~N9JdG9{XwbK8e)EU&YUGZRN%)amO7arxk zuipQ`P50iL{5?WaEonC9Ulg{@Y`c?p^Y1CgbpQCKMN;!tKl1$i^x~the5VeyY?>eV zePYG-rRN+IqB$cr?f)-$xoDPM{Xv~4LQh!(tp92KYu<2PbJm7Qzc^Aq=hzjk-{X)| zTko?W?)6pIvrH3hxJw&Z{66S79MjgnUM43I9GuL-uVz}@Gi%bm<<6S(=e&Ni;`i|n zy)v0!MSeaLkaW)4%ssdJ>NAt_!&T}@cNSI}M)Q3$DB2Ko*rfk|y{WuI|LNQDb5{E- zTXlM|aXgQ-%_PZ-pDNDn-*i|>qn^cPi=>X>>3P~0v%j96|4{V$Av>-n(?@JsSEjt( zV^YW$w%oyD|BL*Vdg{y}l}l zN518($C0Ef`&MxrowH-I@s`Jyr~h6`*kmku)ob_PchBB6%48|E-&cRXx_7SXS*{cb zYp$$?U*>JT_Wwuy1QFA(M$G+x`@lX%8sz2lbr8{z5GIKQ9vPkDC!V0zok-3(TzZtwr?CCfJV=P@2O{=hd+ zZ*1g}uTZwLkUM^+<*3cGCq2b_+XG^BPk6VjTc3ETu+?!|SL6L8trLBjayK+MK4@^f zia+_aT=Zx5L6IklInO%VH@thl_xzv0M)?5K=Ya+d|B4)~dM{f~Pcr;nYe3VM(_weMGh&z;z3o@^c7NeB$-BaQ-DOLDFIQR2 zHrHX@WoF~^Ym34Z%xjNsT@$qF+`g(UyS^n(y#JSDR*6%vsG7#B{dsa< za6Dv|n4Z7)Q}~0;25shVY>r%EVa?rfrSR)I*)KiEs;ph4b~7GwjlVwoJp1eEPv1q= za?9@i%JP_Nw_@DMj*53z_J*F9`&kz%G+8FmY2q4YF_#arzl47C6l)%7<2 z{RvUYH13;a(=x;#olwx)6P4u`U8lY4>@4Q&%<#~y4R;D9rpjG>eB&0&j^6d{d!N2Z z&|W5+)svT)-5x)+`svRWNsES{@JB^K2X8-%P%9BK{ZUbxzC?e? zyBIHh17+(S?Yh0yi%#v0Dysc|!hio85vfS2NmrZd3YGOBB^Pjz_D!xNl=gl3H?@J4uT!j5UZmQ4TS+)L<+G7<9 z5oKfh36sC=zcc^#^rqV@gkMWvtzC4*UF%3y!E!@)^;QoK3)Qx-Y@3cXeEiEe`*yyF zg3ydzf+yejHT`~OfgPl_~@rUrL$*EG`v5zLn3@@V}J4F7v-XVckFST5VGc> zO5xKLKaVE4vwcq1@5#)*vGDU>|7k}Plf&Yl%n0CL9A-WD7RRe~`D#Ktc7K0&$XBvT z{k`(TmtW49TW{G_(SBU`jpPr%FSjcru3vxnd+*Obd%yp@d}fK#!D*9^X6>1OEB5*X z7wO3AlPB$F*|tyfm#kTnuD)*aGR{xeXZ<_*>2B5QVw=71G;jJI|9ib?dPfFZ@zR(d zO_uq6dp<}rA8D1EId3mFYoy7J^=hxy$Cu@tm|3?l``Bc?3RCgeWwWK^5}R6h@*EiE zi-~SpGiBOD_bq}I-BBH1{PvxXx6PjX`1jI}<{93>IhW^5vlOl0Tr)p4`#Gn^#ih6Z zE!5EqtyM_pf8s+Vb6wg`yzr`-U@8v;-1mRZMI}Q)?J$ABfvSN*qX>WMoeny5# zoUuXhW>!R(Yp_=LBH_QTVmzYH6WwnvVA>uVa4|FC?vAZ*`OgcKDaSrcblkzfz@y?g zOP%u+YkBDlpNR9*kA&;y?X~w7jVaUr8+G*VN25aRYZ|jV&0F@LJ!p{;ah&^A`1Zsc z=V==z$3b;*V8j|A{O#_T|8wg^B}VPP;v$nYea2? zL$~<#ip0}AKI;}1{8^+~F2Fa3?~HEqihmVZznd;E`EY5CjF1acjl`W*aWgD`8tYH~ z@N^Pm^g&ybZ_2es=>iNa3=Sy@404QD_RDYv2rx0kBzgzBE}0*=vo_&Af^QGm5`p=H)mzl)rNf%mk{eBds{__~GmWyTZ{o=&RG(EjVuN*9@ zTwh)-*`fGo=OM0(zc&7hp1=F+x>xU>$=+DOT9NOO<3DT1|Gwt7-QOGEG@QBO%{5`2 z+`fnicY}Xo4pW-kS^AQC^&{toI7PAS7kE}Rr72cecB}ZG;KK|d7c5x0gydhHmRKWo z=UrU<0e45;vuUcGCW)<+4Dmnckx43*#LuM>@T9&&64-==G6Txp2N_6;7Yw$|I?lJ`s@E* z6)qS4n(=q{Z?*Uj0uTK8T4XH#ZK|kPz-D)-?bE6iGB0?RU6}VFS$|!if)A@r0Y?lw zL;noX?JrGpJ=EnI8IQE8G({(5t+8*M{_tP-(-YdWoC0R;kDB*;ddH9cYis85a2_>M z>gQzq-7l8DNAz#s=`+WaXBtw&z z>#vUYzW!QMZ_Pc1oG_`)mZcBOm<5>*$wj-{8l-N~iaw*RG3Vgbwk^oQmjl_TCJFe}`Gl7)qUN>6hl_o^wR;=*s^Ib0%0e2W(p2@bvxy;o5Ey zvjPSiIUD}t4G*oBu2@uB`@QQOn<2-GXtm2Zzs)DVP!Q;vXJVtZ_?%0DGK0IMf z>HR$q4+?LT+Z!98ZF|4;)8o%sKWneXs6J*^zMyu!$>L@I{#3)6)A!y?`dLsnJM*28 zWsAtn`u0Wh|5p_LF}5qx*|LAu>x)I}^n&XCzur*&v0wkrw)4|}<_O9}JUxBBzoTfw z5?OApxW6GD76OX}Oq16e2ezesP%-=}XgJ}4TZWKNPW-w1mG$3x%mNvuB)mDc>Y1Hb z$yxoAcZ!63BGa-WNsGf;|Nq2pcH)cu|)!9vxdbqW3TFup#9IyKK`;Ohz*|qyi z)cQzYqf z!N`e?N%rENGKP)U<-07tzTh!i`0werww~Sdj(-eZ6MufquVu5(sF&1m9-NY{;`^({ zqi5>nbw|bR=NL%ao$xbIuy>si`%L3h_`l)}zY14Rs;vFr=8}5qp|rd2lz%6mekw7A%seSSX@rYu%DAZ5%Y`s~!hSyLbNFSmEI*7vymF=d|jB!{;4E1e%( zFWlN`^y#PFls`@)y4rXDEm&9JAo$Zm$Nc||d7s#-SGV`wH}}2M`tR$>Z|{~|emJM~ z>C*fO?FZvK|2V2myVAtG{Pn?-iRToQ3wjr_eU#&{`Ow$g@Wt1}$0+ZEwEmK)2}Ral znv6==+FL4ZH8$>=TK%y9MEV*Rw9G zuN6`#Ni%49u2?2hcWiOu-!CeXBz zL(HaYRd2?kV=Ct|1m-=HUR&w$jrZSP?>)jdQq64+`&{stXew~hZ^Ln2?JtfCIW&s` zYEpmN*B3^-cC60$F#V(HntfScpDkBBcRqSn{q-Fxy4ptJPzhT~+?GB3k zYt|i;ob^lj(S_6Bi=Hi+n08z$>m&DR=5MABFQukUNjaqIexqgQ-F=&!l$Cw{ui-x6 z!t7xz`Si-xP}?8#Z*y+_aox-~!fK|P_P>>WAtt5mn=+Oy398ZKV@)b|IHMiC!|qe< z2@#`+*LxYh?OV;2!fYB8@x^A%E#bu*wv?xO3hQ@s#1|IU+_*9`tU_b$4E@5=H}mVx zy6xV5dBvtn%$uX{cyoOSFk1hmY<_fj(wXL*ddU^vmmhJ@skh-fGB5f^#J2gyEIY1$ zNEY}%&FAe)^B-ALbGTA118e5HE`4Z}YvsJYfA4O-sLQb%oOgWvZ!k-Ed)y+A?|ar{ z$0nWpvwDl5X{q!1t@9bSU&Hb-1!7K5fs_kU%a~|8~{s_`jpYX`6kgrnl*`6);vfn4U6@L*w z+qw3Vu3DnsmRFN6`EJNbboG(Rv)Gw4au9Y0YiMs>;;oqvyRcCxW{>jBhJlJYZ=H&G!f6o4C@4ug8;(OI^$vt~>=dX*K zJE3)xUeu!i_wrAd?LT==77AUjB=4ae{`h109=Q)Qem~o>;(ye&X=*3Cil1D6#QpGb zjbG#C%+zzbf30eN97y2csj)wBntvmo*`H$z`)>wDW^kN-{dYlMtk9;vRrztVFG~8? z7CaX(KRvx_wb|`|oC}w)y}7Gu{}Y`I0kPQMP0+clbzxy_6Tled!m3r^~2r?x^h%u(`VoYS`cE6(^cx1!BpBB3` z&)9~4{29l-+53L8)U>j=|Myj$*6ua=IW^xP;9s-TtND_rbv!0qbh=YvI<>xrfq`q9 z<1F>gSFH65bG!sz|J}OKynJif#@O&nU#pHr)c=2Vm~H?1eN*p${~L2}?XNHM)~%24 z<8xb|ufvnj5wN_xoNYDx=L7qsxCHOjl-Hm8_Tzhxz~j$1*4fwbxUzru{qcSx|2qly z%y5;?JgcVl%XPM0%6v9Q-=jKt|8d4XuJEnDo_-2=x=%Cy^}VP~OXFVntH1xX{nn#h zRbOA$W`DhUYpeMG?@e-TwKGGMSG9&u7WkjNx73V>gO`!*w%C!cMjyk9cQB=}F5J9R zcKPI@^UND{4mB-$wV8P(v-<2D*?rmiYY+V3YkPI9Oug;kX3fNHOLugKR4q7QUoW4e z8Sj(*JwnX2hgqZU>#wYyoy!znHA$Ug@G&-xXSx$~p_*0vlTe`4oQ{3PlKBTGY}_?n zdRqVEUhT>q&PN|CW$ru>w2p6<$cAe{=YG~i`~7A#(9k@jUBA1#`I`8L4M&UlxB~AK z1T`;p^x&SOSMb-$uV&MNEeiD{9*C8c04!3m? ztec-U==@*fVWi1)>40&6T4^LZm-YW2OdfpJKe8`R|24tw)Ug%kGIOhbOud@Y%EV^D zakSMmps<6%JGzpG?+ioKep}(Gi~7$@c>YjJ_4uQ`#+!b&Y)tG-Hk`<2wV`Zjp-r)W z?uFOJVx6|F4Qi5EbGV-=WhpPpzTmn#bp88l@vDpet^M`A^tIme#*3{T2Q3)elbKrN zZ%%$-z_w%!_XK6}=1o6$ie?FNx~XJNT397fuXlK%*t`#$c@`R8JMdt3L!|%lQzv*L z)V}||c!YbS;+=@>hduY23}bKBe+`RRxOe)aH(p zOTV6ew>Rl;>UrC>{tsGi@2@jIV#EDxSxd$9gU65Sz2W@uoza8E;BuSVVYMcu*z4c8 z*2(H;{&`nAKcx9Dd&dR_0TpKf1_6N-35`nu7d9j;ir|p;zW9Ps-BG4ENtelS#X=z$ z&VZY7A~WS!Y|Z9>&55@Cc7)^6Z}}HT*U4QrSM@BpwMj^G)>pxYe?J>E*)G(Ne<=Iw zYt`3}a%=(Zjc;sp&eyGu;1FO#t?ob^SkE4X*?Y*e+ryMB7tUEQKj7pEVM zoM3h@tWRM7(YPaRFRH$%Zm+E}HqYa>-r@3np}|c-&H1}i-d%pM=$EnPrHcA?uVO(> z-5FIy|E!!Eu5uVHG5__=g_l_-zfI}&_m3J|73=?>5(xiPQ+dWnyU^X}YPfUA|MKdt zcel5D7cR57ynB1tlixfJ8{`;WnNo88KZ-ASSl-mdF5c+0Lu>EB3$sl3i(MDE+OB{7 zoyKj|TJZ~)Z++FYEPlANkR>Dg$x5A5RbPdRW+nW+|L{+d+vy8e_6oc>{Nc(JQH{s9 z-B#Y)WxJoH?M|lY{Wfnq!`jbEhSR^T{h_Mln!H(c_uO~NI(4F_i+0o-gAF%tJ(44d*AY{U713Oa*2~qDO zEC#+I1$+*>Zd$xrqCC&>e#^P48nb=+pWe0qd}U=fLn21)Oa0cjaY{b|FHL)Y$aQT| z`Xe+euqkNo-T6KnPgmOfkNV)^RIjyjo7%gdwcKyHuV-95 zRjv@}u-(SLQpU4ey9%=`8IGI)9BzvAqw z`MvklhR8c5CV>aU?|c5Qym%XY7GnYTBt^X~PYoi{h#eaaobd+OiRfA98lo}GG{|9$zNX_uaU>l0HI2sBVL z%bTnH;Xpt_|69ew(sS3YN#wY?Nn@SUv_-cbS{Z6R$hOaqR2Lb4|fe> zB`VJr-qtE=I3l3pb=z>pr29p+GP;%>9%`N61NMGv+ETvKfH&JR*vpzs0nyPH)%w^eZWU(!mRI@Avv_T(D&KBc7Y9 z%e{}37G@s@3;v2TtA8Jz zd{4M)L%W(*fMjw*<`G`TzFkFCIhUF{KCS%|o~3#6XM|wFkH;$i*^K^%)C=lGzGW$@ zH`x28wtMMts@2pl}J^8kZ0tHJt^s}+rbyS@n4Utjhm>`xPa|9Z7ERm%2l z<-3-dbsgot-tj!~rMLJ_-kr(n^Y%*d#>KBRYdtLBtVt2Cb{MITh^_MKKpTSV^>h-iO~6vgK%|2PggbtTyge0%>%&9(d2+dobsO5e*ppMFeonG;sv<`+}1bLIXP zAJI9Xr}L`~%FhXYuWMR!`v2G08a=Z!>z}WmcVF=J)p*v`)BIyBI5uwLD6ctl;2gWv zxmAz58-ImcJ~d`!5;=5xWCNa?p%eal`mHCV7H{1I&VoWG$-!ze?qesKe%mPn6| zD3@qcW5R+E9TRRgrh|?f0<=VgIzdM*x2OmzZ4I7hTpsl-&nd`td-3emSA`<}e0)=5 ze($*Z`}_7+%_c0})&Hr~=JvBYPcqH7E1%c1Km3z*cl9<^<)6ZWi6zs6#5yHEuAg1! zxbAk$x8wi!v9auG7hBW+*-c+0TzGGk%cH9s=lo!>Z(ky#pU}Q?)92oUt zxaQ6^JXu@0G;7P+TPu!qx9VS?<9L8kO=KdEiZCD3L5Bko9->^VPR)S|SM&spwniTB zO5Lbgx%Q4eS8{jPWan*<>-}$=+eI$g=e>3Me61^UieGeJe;2*$?Vf+)GfW;PmFK%zQyYUcOol~?T-aM-N#0zn|&I{k|ZZ8)5oBEl5N$fd~%?&et@LY`VC|?$u zxO2siYd_0mBe~{9sopg1z4LnAG!~d5C@z3;4nA(`3vt{@8 z&HAVLUNhNnyH`kG{y)D+PJR8R;7xDi-gSFzUOang`F(kzIcdQ!4qZR5ZqLL&<8Da< z$DKtE9;^R7G8HV2jI^nq6FlqXrTI&fFDO>3)Tgxu?X6hO(ldwQ0i%Y<1RW7&R;Ggv z8v=AxxJ3^&Caefh6X9k#w8&vbh*Ns4YO%rI!&PVF!d7(d3@;2Su*h>h_p|@f_Ll3P zVo(2)-uj^~{+s8ky4U6B{?C4rf4c3w-f8U%V#oG2-VJ{h!Ikw!P3hk!)4EIb=kKmf zoHTv*k=w67JURUA{>PcRA9+qxW?3g&`tRf3dM1chs6b2p#-?)RS4>Nb>m(kUb?z25_-+%L8_^bcjcT(Oxezfj>P{f0cRiA%z3upOm+3K_;An=3ViquF6 zSJj|B*E{_e$S`V&OvqBv=4Cq6upvQDMVOUok;94!T8_r^kDs6Q{k8S(IWt%7n!R(? zl2xVx7aq-Scl%gkHIM(~tNLel_AS{z?S+ls+poM=d*y7t`q_CR?VH7R+?ggDqcZ*W zCzYZ-;iepq1OA79wo~)5HP{+(!y@R)!rQZd{qx%GcBT8o(R#U4{k_#ZJ8y>{Yd^lY z^!}3o-EW7lDtz0R5WglpTUlN9(?0eV{dW&*cZkneSN+THU0=yM2AjXjE^;WAUW(kA zHOJ5^Y{HcFDT4Nqm#)RE(`asB)CigI#)Vsy>9E6w0Iy{3R;NZ`%eC4y`Lla`Urh^r zHT!Dlv(l6ccXvEjdb4dl|F>DproV=Z3Wuy21%+_m7;-*32 z?#JV#A2W;SKiu_2ks)y3e#i6gxc?r0y~q3h%^(F~TPyj(+Vdx0K4kc2w}Ab2kM-R$ zox?KfOBcqv#(FM&*%`XLMEW=A?EIPkj6^j;I*s0~va`PS<3fSM$vaPaIs`R0yi}dh zbh?(yNG104M!QL`&V*-X2~EqM!e%w`wg>YQ#j2^Rrf2xvc6;$dpqaBt!$Cr5`}}|J zK2N*gw}PR1? z@8#vCbpro?ZLeLoKkEJJ{k`#1U$4KuBI&o_)%D?P!&m>?`|F#({jUc1wz8%ja_&rw z;oGaX>D`GqRncRy?qPv-hxD#3@3^?9SqP~ee-yZ1*DSk(f%Tvg1Ea#{1XI4a3U=>? zkJ+yR*w$K@G&3a$=?dK8|F%t3kn!4a>67*c|M&f!qjI$C=$XY$jwx~<9{i5$FMb~J zTx;inzP&N=ukQYcEd9D>*RH2Kv)=3fzj`~|yz%3j3v(u#%Z4;wyj`^N$g;x8oefm##Fcr6KY2mWb}0d)emrtkZMFCV8jn7KcUUo#IjG z=H?I%;@WVuu~|2N!FGw2$Ldxrp8P|kTO*uRQNZyuXMw|o^(kRDg;rl)FY*0vlGBQZ zP7nHhzB`}o3hg$r3|v3?F*9{a+s=_E%l5WH2f zP;`UA3l_O@m&w*&w2Z)$kWbi;&p2Tkbo&VZx5Rgse+IJk)%;tKki~f5UG4h7rdm#~ z87C}K;!K3wJbX?ybULMq8WnO2+Epowerx+$7@`>*eqH=)_A1$56Mx$7=s)>GFv)|- zM*S6&gigT>$G5LTr+;R;)6?2Fe@d0qXTNML1iWgi0pEP-B zaqvDfH*7!#~ovlUt=9IS8q`lt?eI;*bq^yGKFiLU4m&=Ry_z+f%l8(^IT+qy6R%k#S$7=&0>)Su#0Ti-dK^VuO2yLJE6{HODseRzCFlFp3)#$`L0R=hDe zvoR#2F47c^>~? z_%~`@Xm{Jf&xm9*vKc6U5J=Sh!Ljik(=skryPP8*7#F{YyRWh7 z@7AX}7A?Z>cO-RcPtCf>xYna?32*(u@RoZT))Tv5>23Er9;XmFo4>_!l9SCHwem|w zvx8FpUKHn@w?6;jl)q7*g`3XIyd(O-`Tbdw86P~7}Z=E+;io{D5x`x+kZx-58Q_rYshI8M#}@xNq}rTV2k)6%D_ z)8a+ zSqv<)|5=W!_uM_R!|j*5=l!nZMc1`OLf4;D$bEOS=l;%?AS?gJX%}xl{J`nSpmkQS zGr!+1!l3fg!Fv|Y_xFEO>G`K|C;XhxzW=PptzyjhA`4RgzleEhcIT(X@rS3+PyN%@ zuAWjv(?+*ge^Gs-$ld5)7q(8eg7+Gi#(s$ z*L#*d)ShyDfm3m5zSfjI3P&!-ak8x2dqZ7xXN!nUXFy&?%~Ga*^9c`NZvTFb!zx+f;uJkk>$R+YElm*t|CZgo zWOn-N^G=Wk8=IsInhy3JX?u5NV6n({;Y$=|sB2Q8E8Oq#rXQnv>;1-(tO-NJkO zP=0~4%V}-HH|v_*I8VN9iLy-lV)E26^xX+I{z|)FA7YQqdd73U@7GJ$iM=dlcRQ=f ztj|sevzpOdbYs0$*I%q%qh-?1xjx7tZy9qt8RXO=ZuO|99AWt znD#UL-&1@hSM}+XUDFevSqFW5#?f6~nG&s9qo{su@01rOv#-9o?|t>f-d|tlwOFXP zTUR1&3yT1`!-d*%oCbtKdmeNu3BTouC$e&!H-Y%>i&I()!n`8RHRJo7FeW~ z208`&c+q_FXZOye1#d1*`c`n&WZ%p;tE=TgwH6CHPUcVT)Zu#Z%41RT~%4YZNtFL!&=Gnw(T=M7F zx<8JiYEm1D-be*=ZsB-%|>3R2K9xK=XzFS`6yh9~y`KS6dLPFezGiCgj-BGPs z?0CC;0aw1i)_r&h|^kLt$Z+z?uCx5QV;NG!1}(rrtV6Ae^Y+4ABdp4;|pmuQzf<+y52ck=Rc>^!Y2@9SL5&W#Hp+ob>B<@Eb-N2S6wE&JYYO1O3UlB6-ScSsd8Wtv@4nGzZ7r|fQ8-ukxt_hw z$wLBpQdz#|W0tw7)%maR)2V2fAD!&d(zz?*<#hG8*C)&=7y6m-q1<7*chQ?EQf*=$ zGn<=?nI8X4woKffCc9QkFpP)Eq~YqlBqpEb4)SqAp3K}%TLrHOc6TJ)|03A+Eb8&Q zmlb`xzCEg66aMDR`Z)d5p5L#;W&deyyJc7-aLp_2XQ1t>tY5GE9XmLb_}*9f{aKo1 z{`AQw(@GYB>-TQ13{+PBKI37=@2ULIwWmx3T=bOa1F#`;YuyJ#YWFedSmG-i?{OHukT!<^S+F)`@jYxlBdYU3Zt} z#;##`$^5u&w%fWl_XQp_9)A3p@pvMStD!ZKNVz$J6aAp3G@bz(JJ#&JFly-T*icp=MJ7T9?2!Cp) z*OH8Mp}Jw2VJd;_zRb~TtVmj3@L|AFIxm5hh12X8*J zSlkC&yYFHB=i@en#;^4(UG-FW@|o_CTLOYCZcqO{bYe>6FMDgJ{VYjB z&yPK47;C@pQ+Sv0I;!N#8IgnPTQoDb3R#<9Q(nLD_+*p)1&i0P^VD$jz7b*E*Qv06 z8&_LZdY;8=PsN*`b)&xWSu{D#-E=MLV8c~?ooz*@AIz}hJYRe@WVyg+gN(3gwg3Of zFgZ8~olln9Id@InOkM82^3SEKrygDF|2lrHZ=Lk_%f)L8l%-pyZT)C#XDRNBYQ0v?;vZ`KdXJYA(x^vYpa_4`)Q~&;J-7JBxE9+wR>{D`@ z+A0uG;8nR~=_5nu!}c{-*0y9gxwF}O+GgKZ|L8GOOW=4E^K@#^@}cU7zQmj3!$b-msE_3G}oF*>`>8*ZAUGsW*))5WGWg@J39o##nl z-u&*=Y{_%>I-b{K?zq%H{#qJ-(rMbOm!`ET3v1O^pL`wu`lkK<*YyDr1`}Rw+BD1J z(X>B8okqdam2JYV^B;^kJmbP~!#5KT{CGE8dXGrO`)$v4Q&${v3wU+G=vws!BdK|J z_Z}}+)xGq8XTZ!~s@jaMljnKtx#tmkQcLKgT-WtVyQQ0yl)3D@PFz}yz(?>V{%=eS-bUE8s z7&$%o^xw&jl4sHaUY(dC*LWyx-bQWV)3c>h=Y?qXh&*iHW&UZo{QbY2RZFM+(-zqj z|N44EyUg0@2mB9&eP;W?WMrXa^j$opd%9@X56+29CwG64`+B3tN@u44a}&GKEQ=4^ zQxcd{u3ih&diPIxQ&jRv`P0Ad_P$Wg?0s}b#rOo;m=dHi~_)s5Xm$2`~jsJHfxsF;>PE&c@58Jao4jboc}f1{r~3)?7_e0q&@W6G>dJg_TQ?N*{mAJ3l^+s z?`Qt>Uy4aUis6@lkl^n7(Fd;ZGK4Xyxz{NQaD=prCZw<&TBxi2X#4bY0;~*sI2h(| z_^9r%KP0*H)nwL%9~pnhEQ~N?TbRbc?!5i#eF0781u?vr&)l%@nW8aQQsWWNwyJv8 z$EO`$MOL5pdVFm1|JS#qZ?XKZJnrx!VdmqO1DnJi=~tg)J5rJ}`s{BUe~Xy@ zoNt?ViDkJ}@5$h8T}thDTxEI_u5;?GnO737+7!sRD{$wQd)4pm+GxP19gav>C3~f9GGbJHom_BOn?rbmix(ry zqK#~h=EoWx84qk%i4<77{zuaR85X8h_4)UcfBct?V{E$|U|P1tbFOZksK!m5{>7F5 z*6@C<`S^ZCotbSDk442?y_-9p>wgV-(4@lf@Iir8!-@!otm?;G_#TyQc@&%cw^PCH zbMu??-ZhRdwO(caIy(KjzV5{tSF}$bbXQ|vR=e}(_JW7So^tZbZj@=n^KkTXAFtx8 z5*L5GKJ;CyLgV!tIV+Yfdm3f&>2bq>zoq_v??lh8l$jNtEwcSyNB9+{9+4Ft)A?u5 z{#4JjF=4H(qU?{f7}kc?@I-b4y@Vr`ZfwW@7)9JTBCj=h^BKX<({wlgyP13Z%)grj z)5Rmha{V(6zPFrNYp!qNz_Wa>$A!m?8~$%!DEzQs?r;;=(o=JT?TqXe|9*YrkjFkNX|;! z_fNk5RWa_`se@rhCEKzl)lQIS3iGMl{xRiq%vKgTljz=rY%M*eFa`UVU&;0A{gn|L zT~igOui(F$Tv<{2HRa#?fFGOR1-^Xz>CofzVd1Ohf2n)_#cx;W`>Q8SU03NpT{WZi z?DZz4-Mjbgt^U#&&~b71<0OfP?0X+Pz4}x$#QIyZO@e4ujX>v}e_On}Od4lAn4a|i zVC4mc$t`k*{_Bsm=l$c}C3&`HuX&}t{Fm%kFK#Y;BT?|?r`E@wsH;s*K6&w%PCeMr zay8j|-Xe3QJgXSb+0{7*LzkRqUSZO+YerVa3(YM%A}4!KVXAam`o}JXZGGOpOLq@9 z%@gw0&De1`Z0Xh=BJT}8);@d%5m0!}%71M8jET7mA*HPW~KyBLBTaz8t zf8J)^de8r^)3@f2cT)8_PCa{|cQS9Mv(f{}@OM`qDJY81neu;0pvpA$*u^(e9&Vnj zEKvDa!+EK#>z=DSZo5{y*9n!@8BX7sw`!;9|1XNu-g6(bnRoiqLVukOf6>|5U+xr? zdOm%d_&aF&I(aF(*=+y#c0^R%>?(*&lwNZ7R+X=FrsIZY%~$(4+l5`Xb}Zyc%4P)-817 zRa3{`@BMjuTCUIi{!2ez@cg^3CtbPq5*mB&@6MA-ZDv~YHtEj$y9cGVEr0)EV`UyAK$j&(0i+&wcF}hVve{=&j|~k)BW*g#T%ssPhY5c_xc)0{%yKw zmbmxsxnt3>21^tk*U8?tzxVFUU!O~t%r9G)E!cO-Lvho4&4u@~-<(*IJ8AtV!^PbD ztoyd;FHpIEYtr?Tw=YjW^Y^4v+mny$1H}~93KrikQn%s`iL^J1XSEEOd&IuRFZb`g zn7Ott9BCX+A3du4(9kGQnqZw0m8IM?_vt34b?S~bXD3R_e1BWAsGswq(wRr`pQ8BA zI$k})<@JlDsa5j9)sTA=xSH?F?f&%PhMtC+G~4z@+n1v2=WSTR{~(Kn^NybS$&6n* zY~mL_p4hGB?smUvxH>DKN_C^`42FPhUw(v-D72owe6DXx)d#RIXDV zYuW7kqJHXLIsNXBQ2P6gaaI529q&F^(o^~Gvvl%Co$a|CySJp96wX=q`^StXW1scY z8jgM6>vr<6&ZS*upJFCf{q9)u>HGfI`S%vN{8;>B@#~Xv;hvvn*OwnUVc@tnQ@T)f z_q)%0YwA~g@%3%{Y25qYrqK8Ck)~*;81bjSO5U>kH(j*zmdfg-UF%N&-&^UCS-&^F zR>Nbf*W~|ero?FL_ipWc6A)1I|HlsJ^BPC8U;l~z^sgdxnnvjAryK9yZ~P(5cP#v5 zkNF`Tq}jUAubj*OjZLx{9CvsP%Vk_?q~&@q%$%f9Fct3w~FUaKK^e* z+bZ{+OGA}TW-$m>yWLrA>vczEW?1ZOHD<U=SH>PC%BwAV#8y@u5xYfx&Ub^S^4vuUmax>-7BVOTNC2 zY@H$2;u5Dl*EPQ`Lrr<>#K>2Tkw!B_=WMhNP~d#fu{EmH_`m!MQOA`WTU2I>UD>hd z;QITK4|1B~*YL<+@3Visu0(Cg)^|UDPv`hQ>0@l@uRYWAJr1+AF!-I4DD<;3P267) zVHP3SbbOJNgd|ItswIo!d4rAYJ#CsHe?#8QKWn&3e8%C2=EvL5E?OR>zI$_R&L+=k zU9IPq+0~txH&dmeJziydX=(8+oSVooX7%ZdjyUmU%3 zl5Luh?t)dVTKVhd@6m}^lc&5$DrdvL3Ag@whu5!-UoCGk>D%648aTZ+?48~o zvS$AL8%c*AE?15d*E?n#U!&=Osur zKS?(}_eXBN{`U9p1pCY{|Z+qXKSpMWuQANER zC-?5{Nk7_IEISHzZcIDfdTKvIgkz)kl9-ED5iV>SL=HMm4OI<#T)5?6#z)3xty_UH z?B{-L?^Pv5@ z{XqH7rmCg}A4Ua^WAFCdW31h3AajFHw&BVVc4i~_uf0-y(|;C=o*i^ew)BX5+_38iK9=oYLDM$L{yZ7bq+X93C%=@p(r{?c_ZMFF= zVZBAtr{C@o68rA4?t0zsIUPbXZ#Qk9ko)e}yo3A=x97<%+P&jm{GJazf*z@L);sFn z$}ib{YqHMFZ~2jaET1_{W@zhncjbNNsQ=gGroZ>@{Rn-*>I0rrI2LcJWw^v<`(Rer zl4KRnzJvhX#U4dTd+(=P9p-e%I@*8r@50=B*Ec>|uxeW6n!7GHrUq(lU8tiKd})f* zM-PYad*+YhuYbO-aN=OWUlZ?~c+-|&Zy)gZuHXIp|I5Gbk3KD$J89PCli&PTC_cVb zdiT?jU#E73FF(om+VP^%n`rkei|LMI=4SW^K>BMq&`6u6KXKoUf(ry(M^C zo26~R;`j63@164K{Pca3-d{QNv;J_?>`lHt3+J5SnP>Twb?HRkv;Pwp%I#Nl;n=-5 zML_PkHowIGCqLt6^SPZ~`mlw?YR1i}e=794o|zj*eVBgSvMhATkIyg6JJiw}OSM$B z%KP|z{vGXF>ESfxQI2$m`gHx;g*orue+k?DxI22fzJ2PJUtjY+RX^Q7`S5yy%=%?1 zv9jfp5AC|MaQ6g-JHPC<#BbYLcYW8UiqiXOCdK+2t_AnLIw%;k{p-{fm&8mgHdIb1 zdTn)3lg-k`;W9^E%$EC_9mWnP|9+0!z`BNE*_VTPTvNO3wp%19`7z78WKQFcVEy@! z$$=-8yEcI@FmSV+wW`V=jYg(`&|}e)Rzd#f~wlDj?>}L`$FV9WcC%>)ao_9#Z z>m29l4$P1}`Kw2*|}H z6{Y)^eA6DwuAW-Iw)}o%^}Z##vbQ^Ux&#H9gdYBVcJ^%H#>GXITr8@7EgZ@v&ldk{ zS(9ZrJ>-Pno`3AwLe1(=90lKr7=;@tr6-Ck_~Md3@3o1kn5xfYQL6V3vHq2<@dxA1(klz5-)mt zH|YM0_0ccam?@V>9JsNwewFw8-Jd!wPv5jzeZA;~^Q>~G-?um?E|U24{DY^@mir&# zly*IgJiPPK`X@g=g$O1{KHfH6#j!Y9J$J|LFY}usOWNbqcb@wcV)Xa!)5%XvOkyMp z*;1#4bF&|9f5q!^#MRJeJCElIN>uB+hm^B|13!;I(5I94U1xJn{G6`Gd|plNf&VMp$=%a+H%?dnrx(Y!r}OD! zms19Ft%{$76f+?lZLcRwjT z4O=Ga`teO*=8p;OW(ksaJ|0?EvcO)w>lMp{4ws8J_Qm|vj;Qd6@wn*{#Vg5wxJ#-h zfZ_OlgS=mR^WJ>EUA8|&$lc1x^7|f9BYE}mxTejkm#)@-_BS^E#a#9Eo-fuuwLkq! zS?HkYcQ<{{daduTu9uc(i|&aj5$^KXcwaBEb7n(=D%20La&28IWG`~I%J z>i9w8;XB^CnODCYEEjkp`ucf7XL2|YRM(m~}Lhvlp|9gpt3 zD|bD+>)1)bv7n%pD|Bw)k~H>pYpSZkKOQCf8tr++cS*UdRhLw zOD)(c@o8W2aV~{V3y#*VEVFyg9vO7)zY)(1HNEDK%XOV&!uAUB?En2h^6ZTV6BK-z z^HcT9-7Z^ymey#n|I2qPGdg-dZ!ep#(si?z#0@$do=q&#l|3X>qUhDQjDwAVC7+Y~G#ksKk+s1v%Ui`W7kMBaw z|JMg1x%fUOd-+@~C|-D^^NO6vOqj8%Th zPmWeB{jo-+<@)uvRWIwN?G#f9|8@1&{-ghPru;p`a8^RNsH%%eXZ?l-i#^%v9=esr zTzCEdZ{MmfIxp+?iWlXr{U}rV+~B|j)+rtzG91?LOgOb)-OuaCZeGCQq)slzb&#v!Mkp8SMe(;kML^jp6zs_68dnHhVVGNv$#Xp#C<>I`pTMP zYW=;nhaZ0kJ#+8-A@zn8#!LUSP1#(SQ>9a~MXW6$JN@pzo>_4Q|JNQ~$~BQKLg`+S zguP1A)lxgRq95 z<(7T?+VHjUVw~JZY`;A4WD4-Oy?f{T*Ef&e348Zd5-R_udA9b$a)8behvnOt4U~>?=CksFbI`s5-&ava$Z%{3FWQX?8jL-r6mm)Nhsa zK-oUMdifjuf=iDc6?c5N>9t<|PE;4q;ji}>#@ojp-qYs9@zjL>Mp5X6`^L?i}#D?m8(fvRUONFr}nP${ndE&RgQKy6b>|hzVqE< zx3j~koohF{`&^lAA@O1Ex^o+LZ(I^|sfb(fo%|~ceub0Ww>y)Y4tQMmvrYA=_FT3j zcm0nR=?BX$g?h~V7ypr)wJy4lTYt7k;WFD*&m{hAyze?^%Di{0n~bg=kPO|u?&qhI zpQcuF?(De!;85Zh-txJ=y{S2+TDAYx=l?$vuCnLu{e_0*+uE3fVm6+iQ(niEFtdh9 zmGOdKFw2g0A7&mX;1a(q`jM|d(COG+m;N=Lj~Y@RCOpwF+*w-i`B{13X8*5TQvw)N zg&0^?7#OmhXeei8xuJ4m>iiCo^Gl8F=l?&azyI7u?LPvE%J1HmaLJ$f)X!XcEGvhD zYvUU^U!$56E39>Yg~Hi~5Up)Li<+|L4q=aM{>U)z`uDuiu!c9eeis@s0l$;^q4f&zAph%}@4r$dSu^;~u36ag)j+1Z2wfA>UW(?q}h*CJQe)9F-Y(fd4Qy<^`#4RiHCh)KGqEIH5sdC~$ zuil-WUtFPL{&Zzaon6gWp6I>1SPFN~WwOY96Mwg_d``3mh+nCEt=e$t;u5`u5!`F6o^M!>u+>+OSGg z@vdHUbiH)8=4MtILtQJEtE_@dOy9KJOEYW*EMGnR|9xV@x-*~F>#t8Oc?3piHp)WeO`Os6|>a>M& zyAHgVaPQ-iqx&zt=s4`Gwsr33{V!4#ot=`W))=Y1Zp3r)TH-|s?^sQ2=IhU4pZ z-)GttYuI^vBR3aIZ(i-C3I7kW^Vv*f4C5(jeeLEOQ687maBY2vyVS4QhnXZ2<}Ul! z_GfA;$IGgat&z$exo2#?e zU$vPMP+ib?@VAU>#H4xNQR`i&mahKux4P@4miP4tRke5TuZvH9h%Dabm%8{~aS`7yjc<6>I)j$NpA+ z=laY45ArOoSZDaX_T{95p@(y`nF6Qq^V-!Y7nyEcv*fVRCefzV9IvF>gx7Fx{k{EV zmEnvh2bN!|e*HMLU$e$p?uSliXItjKFo{|2!mO5pU*s~E3+0v_SbO?fZ}8pc67|z> z6g!?|>quYu;pVd+T-qfZw=VzP!=>=x+~ze3&kvXsecT#VF77a0LZq?6?#e&^?M7i5 zxwmW|>e>XghnKDUASCrW-R|Me>mD=L)%`#DPI%%ZmBZW*&39H62Rbr++j{%M;;Ffh zA2?k%4q0~X)brWr#VZ1a2dwXH8^r2e1IAN$@y%g9NIi^E=wJBWjG zqOALW(=AV0_O164I%xI7hkyAdwHjZ!=u(CCw(&n-Kh_Ty_}e7?_TbNC-`q{HbN2mB zG+O&7E4F_9zlk6I1oX0fY~B$n@$a!`?8(FWPuclx3e`VIDJSj8oqnFxr07zx?j)Ig zfv?j01fI@m>6!FIJzwX=`-4~6uB3*xUecSo^vkq`5|d7Uu)ROIfMfdpvUNZC_ptgF ziJe=_UF$CR^Re1Wi@na1e$S}t6R;MZ0Prk-^6{4{S$udr^|~Y{TkbwS4*wl&!TAcCGJPp*Y}$HRQJzUG<#(CaU1Y9!!opg0P%M#MuK(>kfqh~c!mrJP|5>eH?(uN;;TsBvRtQ#> zZS#N7Jk5H(e9R*0MVo&abX(2en)u;i;C!w36ZK5zA1V|wy=z@{?S$is{#~c~GZT+|ng7&#({*Se( zKR0jp|3e0;0&Q0Xf3&}S99+3_u zijU`H#WQt$ROOn%bwt7_W?9VaORF~@UpIxx_28Fj{};?JkJ}rsXm9`5Gj-Y7|2H(v zYV0C2{(bkJ{wsImPW|(+w%O@ntzG!rfZ`w3j;q9Z1dda(@Wt+SfzC3p%Z}#*5 zv&-yG-@U%bdbVZkl*FV``;5M7mjkP_--{Yuixa)r+pjM8SjCICCNCy_8n;r8bE40& z8-FJ<$9`An?&9<)F*9+TVHfVc@w^;{f01tz^qJA5|k{Q5W1tn*NE>rK}A zxjWTs{dzX0JTO1cJ&9+1mxz$ff~5!dn4B)1&&eYga>=9Y&5s`Aq_QNr3g)SeQ!-Kt z&UogPzh0%=yzcPge~$zDt=<%~jpbpx)(a^7__6l))eYVP4~uG!<~?7jIpg}3 z+ne8Bo4;#gSnj=(&;S1t)^g*h+1+%=kg19F;Xkv>M)j3y?SC#7rAE}gs!h4}e}6xx zl=$!IW&NmA;&EB%=@3Cfec8P2Hx{Z#PUPsL1bX%43s{Hob?^i_3ICh-%w)=Wg z{d-`I*Tzayom2nS_v=0Xc>7m`=r3kNy&6&y3( zKDODrOXbKSv8c>|ZjD)i+yNDZP7mjutp4rL9JE=ZrzJaM759Q$J8x*pDm-A+5$W&{ zQfg&67_cEgM}(V;>1aX66em}Knfol~R=xfBdEM8!)n{7P+e@CWyBIkqD6T!ZPX!IF*v`LW zko|X9mLcrqv{=Q^iCu}&_mnb{MM7j4bwVbnh;XtxIXP+sXo+#PGA(pi5#b@h&3VXC zU`B+Npk%9)TJguOXM3_%mq)FAbT#~I-OiWmd^rz%u-$8?cW>S~{S%cd|L(r1&GzQ_ z>)7Kz+Pgir#{HT*=U~t6qGd{_zQ+GpSkD=4)!uhP=5xK>CU35U<8{8S^DFn}|9+Kx zV!^G=LM{<68s_}__wT{#o9;=?3t|@4%f8`mjuN}5Kc&L)v%Vcy_{XlvExY*J4>DZ- zb~Q9bb5tOk&B&wH?iK@ z>9y3<{kL!bZ;Gv(@}){MZ@SMnGiG(YD^rTO*Ut=HUz9JD8R}B{_PmKsx2I{ z2O~mVpIeHFR3vry#w7+`@%sEpJ8;vCi3ZBgFS1!@KXJ4AYZV_}@3T+m^{T!buNNGb z^odJ7_K(j`R(AWzmp`tavfcGuw_VR;P1IfWO{azLeLS*Z%NZtdWuwK%EEp9)-tvn$ zn6$)u=h>^D=2bByT^EjuJM@<|KjEfP;k1P8j^5mt?;db zvHKN7oGof%)fR2|u>Nh8jp3ej^GbEQ<{!&dW=}h`&e-g{#(dpXlJA(yUFI@(vR~2n zxZf!oV=-IdTeN%R2}YOgmlsX_=^TH1>aK53OaJ9ZE;@PV5~pUKim#-f*UZPfCxT@S zEVfN|{H*Lb-lX=B9QO3A4XBmnoonqEb`ocF~YI7?C z!@{JKu`ztz7k;ZZ_7*cO+S#+noH;JJ0p!U^Vp)+Zh2BmNJ!|oc?I7>Ht#{mSFo^lb zuXuJ__0t4Kg`0oZYV1G!`#-~<|Hs;=_{?woaeIrkTZhS!k~7X;Hk~YboJZQkez9v@ zE$WWT2%S7vDP@w$j#m@E%#F{yf35su_wkc8ukzaFiuDQfkFmul~SJGJR<%%rP3Uwl&OzFxi5xM5kU;IasH zjs6RQ|L^SnewcszuSp%%xorILhu=2`-)g)ZVEyC3o5#$nJL=vUnI3cB+^<;pgz=1r zvt!{cc1DdW9j}Cp1P(1Wm~cczgqxM+P{WQ8JwfSXJMQLvd)|9ByXxHRMLRzFo;f}J z$MFrHzg-QDv)L^x^P@8P>BQ?h*XPNE_GfyVg>Br|thZ_M!2kcU1w0+tqVF zd1gh6m1I7PDp;ZY?#yg$uW9!t?-fW<+9B4}&ab`x?cp4@ZEgL#WcN03{Qe^=p~1ja zz?8aRhXn%zU!CJDEv_!sT8`5h@l##@ZT%B^Rs87Z|My?refjV2?~|W@e=so5Ix=r< zXsF<=gX*jAuCEYsJ}7v5aqW}Y_n9;j1d1NFPI~&i?qh_?f}rj#C;gZ6a*E%P+&k6U zG0y#tnYz^f1Ve@xZf@m#rlpeG5A;cYSa2ZlvQBYQVA}s9f7`P;=BBqZRJb{nHX1Bj z-c|mFHMg2=)0g`{*R5(^@|y2e)Yb407KfN+Di5eRCCHvxA$d%(=9bveLhh-3e@@0( z=GiT~9`sfG|L=EB|9^cw|7+ttYrWVtKOHR&Jv`jRK8;zxW`m4(Gn@I0YChA~y_?P1 zKdFhHyWM!%(PLJ|V>VTN@!PTw^sRZz5}G(xzdVt=wfgUpDbk+~v5Iku`F*LhR9}62 zg~$xn#J2Bx+UGU6DlV`p7%Bhzd&S;;MyKhn=jNtM^#!gU-IjSx#ejJS*W>D!hntVM z%AT8MHS^rcy4KCdg_yH$yn3q6_UXoEW+p;@aKxzwcG#Gc(^I#Kq{ke@f~0W$GK(tr0)W${M{s@SE((tJ!Hud14Qi zehdEnVbR(b4;dyLZee+}N!sF7L3Quq+l8oifZk2mXpXzAtNBtN-Vb(^AH}Z2{5qc_ZH2{|VCGv$p!( z&a7+tSNBK#t$taRAFi@B`D@nQT2F)i2E)L`{Zow#wh#_qS^LtLv{dZL7b&+Q0Rx@EI-E z#*{)H4FjfUW@}`hGfz0^Au`F6vo3AT28($D-#B=ZmNa_m$u0HH_xg9u^PvwzveZPg z=ifUIOH4WSOuk(AdiMMs{)XDCXFl0?I?U#b^eNb&|CGl`m1iO&hB+pPgXwJ zf8}e1iatZgv7*O!_olKkZ~5R}a!7qk^<3?F%>9)GlLPB76+Kpw*ME9aWnb&p;Lv}| zSF5dh8zc90m;M)4hai>%PC5!3zCWsX)AMdZz0UpzMYg;jEH+GEFSS5#@?qse83~hr zSZ_69GVeJ0ha+Xdk+#N*N<|;+y#h_T6I0VQw4`g2g5`}vnF`HxFZ*GF&P>vS(ZUM8D)X-3Vr4!CE?rg5#n|()MYPEaSAFZT|FQhXhZp@C!jpwzzzi<1I znA3$l&vU0=o5qzPKQl|I=H45QsAcbeE~iPO%5$JJN4=3QXn{`+YRh zRQ#9S?sxBEV$o39qfuFkRi`9Vp-_W1=pD}7TX^Y*8=Ef$#; z>htCZWCkx0+@W)#IB$uJ?7n5^o&6VIDd1E0ThUwICqAS6Nc4guhTe1a1=}}nV`4E> zJM(O=vjNA8smq_R-B4M({r!*2Vh`n{xqj5#tj*K>{`_oBVaMA3n|&82_nf@PzkXrr zS*K^(m9<|svd-U^FS(F4`a#33@H<+Y-dIfHsb|TJ3kdqPzqjG(-|8yfbc)NFpZ zA@QA{ZH}M0l(}PX#9Z4hiRD}3KYYvP-*Q6z@`jfl%8zVU`MmxjxV8U>vdTYskyF?5 zB&Lam-z_{W|HrB<|K|Q_U)`LOQ~STK|6#Dl_RL;B{nMNF%>T8uZ2hnOllGdw=jyZg zuUYgX^+!x)yWXe2r`Qr3K6q&Fo3QJxWzqczFP)6r_ct?TpGuf`V8v7U@D1HEZ%mKO z5&8S>gNe4FxPVVZK|Oh&B$@lzW1B|go=?4f84@b2dmZi#9uWzbY1ST ztvmCnZPiDO$0ry2I^9>F|E@@INC zbLwBGe=&Adznwczmd|?_=dG)n93r6cT=?HVU0K`vN7mMOy{i}cC+2Xfd;N_%V*849X6--!*_w?7A}j29c5)dpak2$$;oT~h#(3($ zyER8M601KnPUC zw*K*lwoRS9*FWB`b@}>-hu^*aW8JEMl6O|Cq|j_Xo1a&h4;=k)UP@=~l~p-wEJYrz z>B@c3-)}tc{l;&0J9cKz`@N{*gThaqrGGQ{etgPX_hG?`8mXJ_ihewB)R`vQ$Mn;u zC|iwz-$s5x`WLCc<&{@*Y*ZICW*mHc&WE#Rr^d2;HkWNLbh00st@*Ivm+05jNy3Nq zuSa_7zpuGoe)r*gJDn%z|N2EOFZ}=Mr&-kN*__|YwS_ju&s*5W=yd~80_Wy2K-ad`_nq}?%s4MY(rwn_;cwc8#e>Y$4`C{wj!yi)A z8`emMWymQn`u}C?@(tI741F)p_ij?Vrux`5P18niW`+dUof?6;TJE|}re3sh|10}W7g#f+B z94xbBRgP%xfjVPpiATKCdN=>y&~WbUv8WUN_S;?s{f#Gnt?QQ+!-*-0r z7kb(l`}n}dSB(4 zL^aDYGZjG|&qFiB_1$tJm7>mxB|0o+>oa&GFx$vl+Q2T7qx{pq z|35NLl9j&yA+-D|M|aGO%OdBG{t~<8_3>Hu9vAKP*S{<^i;ryj$zwSo(Ol|K^z^Ww zHSJ#|6tpe}w7)-@kZrRsNTf{TlWf!RBhsHfhTpIL^rP|Ap&T{AZ3|rtC-_cX|M_~u z#M!pmj;s=qq0Tl>=KkToYoFe8_2gHc%%m)fnL%gvyf^#0Y_V=bqKV1Lg=)^ve=xq) zKGN{QNJiquhrD9HjZfOAtNa#G5aIf8y*&7J{bfh1rJ+AxZ>jwwrCDKr_T~BYM!TYS z?Yr>p{q@*1QI2V{lCMAgxWDi>=M3rJw4!X@5%#hr-*YRTDp6dg5#B{qP_wi`uMr zucBW)RXC@(_}$HQbAD*`?|Alh$HI@w^V$Dh@n06N{?d7lkuy{Ahe`H~RjW>S#{^~v zm~ZG?dVbZ`csZSf;bXT8K*CPXlMwZd(cL0#s5IYU$1$E6n?R; zVqiXCb4YyZ42F71uKWxCX1%*^YcgOMHd{*7PU(l}7ST86||In6*ZLfc+ zh+LIb6l|}Gn<&NeJ#(qg?JcoSKF4^6cxPbje zUA?|IEuJOy~y!ZV-tDf(#fBOHSXpw)oow~^0Z+q+Z-nv?~YjV?sAQ_X?QuF@$ zAJT^oI@^So>+A0CDq3^s_)eC`aeS@44^tP$Z-0Jdo{C9F!aN1R1qZs>8qUQYdj8hJ zzvw(a`Wy2%`m4`$ zujqrXSsuy!+569C$8O_5QHQH>izgIrj6IrvtD@pSgxBQNU(a8w%IIqOFaLb^ZvGF+ zt|nDWqdimn9&bMGwLdH_1GNqL{oKHL9)_OURu zIGAv?ami0tw@7w-pEXUN|4HGsrrMPiALZUAr_JiKmQ83_aFuBv&$gyJoJ+pleLU~R z?Gw!*jo-xYUa-o0G(ooGSmAGsP3)z@~6uWALLN9^N*hRK6w9& z>Ui;w_Ui+du8!Lu_ctnW_Oj{M_ga}auXnk_Atex}`17pLf$8j%rGF-U_gOgIhNF$e zz|Onqn%8=k9~LYudP&qsXU=S2^latIK%K-h?|<*UBx>|Gp?1|&+xJuE2{fg1 z@kWWB<~bH|pY6_uPsXR$FiFTaW=nLhQCMmC^6#hT8tPUJ%Q|eYh}RsxbFXz{Kypab z_u7-$_t(eXUthLYwXMhg;q^JJ4L)3_4!NX)r#?aR6si&t+41E2|kZXR6huKMQm!~t7s$y5{e#rAGlV?@bwhg{A z(^#ZszIM8~y|ugYY^w6!FK0j0^^4bv$DiA4`)#ed(B6>Pt5LI;L@3p+e=z5|jovoD z=Um4>eXLV4TweR0dFAroM+#QT6jl7b5`0nr$1DEzPUq_s&dPaj2yst}zt+=yJ$t`& zrAM>K>52OcK7D)oZ@%OM6Q1o0te40y^HqI$&hgK-Ztr-G8^%J0i#(2>?mn#g$J$Wy zw#w!Uf1bqm9=qt3R{MKP-Hy`goqPS(Uti7a5gj3OhU1~^`|7=48z%Fl2)w^HgX==u zXHI61gVT4v`{OVB_rV+E>rF*B=NQPWaZ+9{$gj{-mZU!)sGz2^*9Q969XTpH?-&k?@ShW>fL*@ zhINA#bHc--gCR4N=jp0E+P!fNZ!;guhJ`FU>#it=)ME2OoqNDtwH7 zS8wq%=34*%Cd<7a>rQl*>^l}Mv3RcM$&dEavV78NWse^fx?}C*x#raW^{WqRs(pSE zp853~S82_`q9>xv(`D|j2s=1&zC+3FPJ?Uvpi^mb`zJ7Xe z`tqOW<@Xl3U%$V)Zdb>|>Dyi}5N^2G%ElpbCEoVvm0-bNTh)6sg!!e|8)lreu2Xzo zy-j23w~evSip15ckH7YCZel+Yv+e2aeam{!e-PnN5WF`l^Y)Lr3|*a!nG=l-Q%`4h zEpFg)Fnbol;l3hKll8>WGs}-1xAlvDE&lM)h2*Dk(U)?(d6SE7Eq{HmVSDZOsNMR% z1XBJ~>KC{36;%98xD)=0wrc2qlJ|^K8?|*VuKHN0L_T;QbuT3-jL)*^CxNPR$n0Vm$jXm1ZTm1L`nRoyHrs)47 z(YMac@a;T)Fur_RM4D(^tyY~*=mRwo7kQa;Z|W_!`gmox^hi{%o<1+y^=d@V&gwh! zI8&XtFC-=;pEvy>P_A;!yZ-b?R!z3!d<#8PGheS$QmQ=_|J94&RX8))wUvR zs_tJUVh_2ytJ=M_v%VS?!2a5eL)`HH<$WACQvvG7$m|q`}q=9qX(iI$HZ!@1>DTE8LfPUBv>|t%J62L z-@jdRjttkoXkCM8YeoBiB#KWLT2py4X8R<=womu=PY&LD>+=1H3vI*JURv$*Vg34) zbB?ub%RObyvL%GX`Tm_Ys8`%F-!jZBQ*+)yKIP?Kzxn&te~NpzS75DwjEMK`a=oaD zcWRo_yLI#I-p%H^sImCcs_&CO7n>YXTIW^ZxWp>LhMy&7ErAzUIfzOzk*TtK`*; zw*}|qud|u3T=$30l7mm1e03z^AKf;ox9QM4Uw1{S)oZK3cY}>}51HL$w7wtw#{4Ja zfPdO@eKzI|k$O5jFa8N$WEBlt-FrdHAkL~|$=*+F%?fjFM3g5z5i^#2XI`$l@NeAS zx&X}V;@3pl+JF6Dd*=S>@9Q{PV(0On&u`?`+CEQn#9S?otJgxy`$}!uK(-h%Jogx=`Ph&{31Rdzp9iO5t@+zD|EJ_x_8$ z-}atXpR)AtojEJ+Dt}juk2-l!U|IkCJZVNDaaI>*>oC=)?{@Frz4NmA&cqGVA|B-& zT=gw#{q6${47?82>byz%1p z3TegP4=?@T_~7lls3Ly#|MIwX>%$Ml2LJO9k6k0bwo3TE{i|;iYz`W5ZaCO#=o9OC z>3ChBe(|L_Q&uz_;W(gtm_?QGwq#chBiE}$_etimI%^**2rcF|dYzhV_U_@0XPjE= z4k+5p)1Cd{1)GCwsH^jxyZQVd!Zs}|IbeB)hs(g|k>E?#dGB=YNUdn;RoVT{M%jp~{I zELg~>pk|)IxBTt5GSx!`x-3&WD%GF%zB4qc-uoc)K~0WkjY)w^z2urTAGX{tFiy5y zE^s0IKm-T>r50{>3DvUWHC^{67Hkvq;EwRIuRkvE_a_&}!;SJnk^KhhtgTnWCTFc$ zdrN%wu3byQ|HiG2$c_Ix{dLUV>#HA$?alsLnLjuFz!atvdWNU-vP(LjI+%F3pO+zUqIBc4>a;tN6=j1&+5gWG(L$UYj2{{cP}+vK5Q>%{as8 z9@YDqoo#7uC;y(^zY>xhk1lP!KUI=}^+AdLJAWBQCH?<1 z-9FA=_)x4A<8M~o!oyosMCPtOw=;xgqPy>#UB8qrYTi(}Q+@e=*Uw$`TkMzY+5i7z z_kE6lb-v*nWADr?ef=uRi<6b}LCiKTN!1A)&vqS5RC}{|$7c1(kN#+wEK#&8+B5NW zA{%pgZHLF4?zg@9tqNAt|FSYR6jUj%bNZjpsK4*{+$Hrz+Y~weS$_I&xzB&`$xtcp zLT;T&y~mR8TZyuBN<1v$OLYzZxW?q{k&H#DOQHk}XYB~OZSD}Y+x^}3)$<-oy{ z_qTe<&myLi-+Jfs*cZy0Mcd%8lr(at4_tgzWrHt^Tm&%U-2thYL5oQvb*kwfqay$8`IT<*`qH-czU~1_N3#vWkET~DD4x*? zdn6MR#h|vJL%fE?!$C;6!F~0mor%Y8SkOmKkH8~yd|^$`HFmwPM4d_KOgzNl6d##@xALGulV11w8Q$| z=7atIO%wzrcNPmgSf{zuxN(KB*lMx(3=UVeh9lBU0*+mG zIR3UM@Ub&Hs-A0e$}Rq8v&&hnYSLDQE_sbA4F(R@1Nw?9SZ8#}i&jbOW)3V&?>s8w z@nLeoD#zXJ=gxm~lC4a=W=TBLFsdVa!@R`jhJror)-#$-Ndquy&5w{;) zJdX|B&wW4tpT%bTef{ara~k&ll(S}ve|=Zs>+uO|&!n%6=#{=Ba3j_Kgnv+y#i!cv z6(^%v1lFIfE@X{<^eB3(GH;do?qkOztIZZ%?~$K4r|IA1&<>yfAH0vvWWQY^v4!hI zRN=d>sg*YB>sRlOn%@1v_g)#3ftlk(nb*t{${NGF?t1^eRr|nELY$51WcY`@sb7A| z*cFt1oTwyQdQViqxx&<9#+(BTu@_mF$Q=|v9&X>DIZMMY_rK$(M!StC|7hgenxFdk zykN?Utc8kgFO%LGs4VO*JS}*ws(%}IQRa#DdJ>QI#n!VFKlwW?`Hgt*2JZR0_l7)d z^#Ao%!9$^R)ykkd-ClX(iU#jED$jA$PM>!3yXxr7sw+TOm=LIHUZc#ovyiKT3aJ9!V%=v11qTpMGGGAlqXP zI~zCA^_!BvHkQ1zIl8@kv(!q@`Ki(s3|vVEW-A_$v0$FL#!yYeV$lupqVSBEmsWjJ z)0027`yK3Cd~8OA*Uvvqz5g5T+x_1lzcu0Hd-wk=*C(hdF!n1{B)raj7|*q;q9*X8 zBBP^#gU159Io^lkny0R<%Sej+CwsZLN!(U>MM|FC;-}?&d+N0mmu2kV*{go}k6>j) zt#-on-3H0k8gJ&$y7S7WmuJuX_8oI)CA~FSAKI5@7n9|GUw(o2xy%I-%l28Y&-r6t zdn&7CcAK_%#P@9z9%!zZa`S)n!<+w#l^3g+{%HIBYWl$+&$_+dI)CT?p0?TVjX>ap z#=lqAK0W{Ex?`Yz-1^mVA1#{xzt!HmDsGwH`?x16)$2}AI#{{w^5X|9*6Ge51i<%9;zp^&XJtP;){b#d+_nn8|?RCt3c6i=&o>=hT!|S4Y=lzR+?c~Jv;Qt}iX-~uFQ#MNLAm7J%4S~Us)+W`P1F+RaR_vE;Y6C``!FM{V#Pf2#Ixiv;BCY&E$PC zP6yt~Km8P2Fkdw%ufN_=k|#+nogrz8lYxN&@7GsNQOj$6DmQ&-nE3kb(WtOsyhF5 z&Vi|4o^Lm?YpVObb^Te^(ktcLLmY>t!@ZpzoOp~^Lec&wKsTJ4=XlTP1%C~SDr z!M-r4T=MJkupL3Q*H>uYE-=m3ZnO3}JEuU7o4@H~f$^-DFJ@X?3Dh{~HnV4bQ+~PD z{eQpimi}0id0vR)T9?@2G7;&SM&9qGcb4n3dD`pa6zarf>uPD-by zU!D8p-^-72`#%5MRTSSIk|-*vKmCu_#zO*S={j6H-KQQc-LTT({CkJFO>d0t*A~u< zOA*tbx^it!^YpaPPi@WZhs>()H}}m2*V-+`+~B zYW53=S!;MCyqmjsZ_@FOx^vv7HOID}Ewo&`rm0=>z++Xm{I*oOWtMX(r%?zh0>%4KE}T#@AbX^XZ_$1gTc4r`MDY| z#U{^-KmDSz{$W#F)zP;}CHQ+ke67AEn z{i=nFZ@+ea@;kir`o4W9f(?BB2`dQI8y|c0e_sF6EenN9d7is=oQb=$@qhdwWA&YF zVV|ta)pV1-%VeKYIym8|XaB3%_lJ~^1Xl>|x~Vi*`#sN_MF;D5wZ!pepv6G^!xq2=~)~thiY$c{Gs-KPHIija@!3(clK`kemAdq z<@MyM<##5{S?`c@+TBTV(xmQf;_oDGt>Tfo(->XmeC%q?t3=MgXBTHqKK#xhBEtUT zt%Pen$z-Nzu$huq0OIzGkz^q+IX{O9}0XO_mi39C`now@RP z?Y;YJvTf^qZeP+YY4?+z;;MS4WyR0qYi21M7JN0!nf7$^g){qY>Tj3wb9MdEuivAU zlepmR>($3<-Unw|Y`wD3Y3;^YJG0r$HZIY0bY~6z!ec$FuCDq-^oxK^DZlq06jxI@ zzR!RE*Y4^Y27C8KuYap=`fn!p>OVqrX8WFbSC#o@Ui{xhNAGGRY6#6OmHEEX(Z5ZQ}X&esI#UHhpl>(lcI3H{@Vf@`IK3o92jD#p|~oY1+)< zCoHAnAlvJ_b7kdo#bCkTf0K3}d-p_r_5Zv6r^D43tgY+3RQ~q!pDRDRKMFD#yx&** zBjZflv*i&2ZpW1j8yFaP6&z=2cfDdQeGyf%?r*{St*dt0#?)q?{=c<*>-zsO|NsBb zjuTxyC;tDpt>>2H{|tgPrv$K_J zKbzEtNUfewFPnWJS*P)O#;1#7jkmuk1o3ceRCw{-t-LGZykBO6wt}zak(IA^i!S_Q z)j#?0|5ZMg|BnT}3~Dr65c)U&{%eu#`)6kF+InS*dt-u-#I25crU^|2h7ZV0Jcev~9b^zUgO?>}9+cH{i4(Eqt5`;8P`XDV%ye)2LZ@uQd7uOHRA*K1`Q{u(d*HGwZg|Ng8`wI?4mR3t}v`CZ9g zx&QJOF0RG%GJo18r8a~&)nB;rb#K)*jd?6zlXq=h_vCBd?^V0>{wD2N(@641I(PklxqR6?orLu6wKoJ72i>(75fwZeE_@(q58Dsk zPuGv>Eq}gYHEOS^O~GaAqHO;UDkK8n3K=tXQ~o-}+Y< zB%}@$9C}*So?95kzB8?$dFvj-y{1Z(CKrm&a{AeRuk0Nxfq%jlIsvo^5s!McIZKuO@7r^!+iH z0h{vaXWEIrd2@t zUU+-j+v0yaUte2#H*0TnjQjtERa?chTiT_6z3fu25I*+x-iO5=Itp^UjR#nF@PAy* z$FQCKgVaSnv$KpZSxvP6d(_5WJ*jSc{&jfiGEP>0op1NUxIHIZ%!`Q+5s^D&w&K-l z?}hWHsIIS{vByzgz}x+yr$ea078CBf`&VdP`fB{MqD%VOw6_uoE_RQi7?s8#X()?`u5 z2~T5;UTjsl{^hZ=cOBiD4#n!RO5e!5;Bf1zK%#zW@$dQC?hGCw+5PZQN4Z@yPjNLV9D09#sghm#zda|k6q(=q z98ED;lW}ZO+QEZnpI>G!{{y$?vBf}T^FOgQ&t5XGUMNPiX%3=PjZEgnpsTg$!9Lg7j~4-5_c(3>G7T) z`@cS}@5j5D+OH!P6$m`L#`MN5Yt7CIBUT0FUb9aU-0wD2GQwv9~{bU-rx8n zT<_)~aURoe&D-znye+(Y*Pji0_T8^gINTZXM{Cn^Jw8*#8CPpIe!19S6Y}oJ(%bdN zCMFjjJEEKZJ)N;_W?r-XqKB{TAFEIISD&%aWy+TgeB!_N8Cv zEV~V#qONbfCjH=a&;ot;8ndN|57t@QZ9Lj*{Qf}b&0W#VI{&zJ?j79y?r-(Kb?d{| zM}IH=y1s5ltkBkVCY%Res6LVr?K+yY>%p?XCZmVN0uS2d{$|IBu(`Bw{N8)l^^%%z z|NjdiQ?~ERSNiu$@Q(AlcYoh6$q+Me2#sJp$s3el6TyBm`%3zkr)->28~-;wddBx z3e9<~^?#d$z6(6fjIr_6et6rr&*-JrqJ4sa3(s!(=}|4ZRfOa9%ezyiA93QhusXWD z{Hv{8!_34Mbs)j+ z-^P1+Ozs{TwJ8V1nk=ik)vw=3Fp!=n!uIZ;mG}B5(k`DgO#GjImC`EsvpJyb>5R{g z_V=#wJy;)>!RxpEk0esye^KZXedok|eenz!FVy&eOtsHGlRtF|=*l+mI__XDH!6Q@uSr=7~ z$M;IcNqylyHJLA2_NLYEDR1UK%I_;UaO(Vv{{5$)p6__p8Psz-e*cN^-EYc6%dgD* z{>OE8*sSdJQ}XO5u{}2Qi?~!B(aLIgE!3cNFOyW$bS)OKQ)LUoCv3Vek$$n|?c5*s zN7)`<-ma_nU-eSbAHgTQ*4g6EwsP)hvOWHHef(O_T{>ab-y0(Ye`F}v?ftgy{)GhR zJ9eGCD+JdsFyYAg82e9W?U{_!uj{%WG-T*oOYxZ~*d|Q;`fOV8KaMB*-_Fe0b#Ch` zz0XfyxCt0leLuOiz`4A-zv~QsZ~9F=Fg#ow=ar*y?U^oZ~5t-zIyZ76U!}B6gF{vdj0j?qtd_P`e(!H z&-jL4+W#=dd58L+ZicfnG$-&|W|V~F_X#G=`+q!tkBfk`#>s;gtm;o+Ep4Bup~>pD zk!jZzfysiW_(OLnoqc>I`GZg48j`gipN6+U#lwCs~zR(i6(>S*gk_FJ}&1y>@c=}S+||6BBpWu>C*H@08=6oxf75<^&`DpbPuI`(~ zw|0KqqHh-DRQWEuXojt1YU8dO&(F^3%-0mq`n5ctKO!#U|Hsl@;z2S|7Y;crhZ#mu&i> zsSpuSE3?tDE<5{d&8@tE&=1^3r)}mOh;VQH`0rQh!s)M=H`e^qV*K;}e5%Mshs>}Y z=W^JCRi>2wHdtp?y~)roe$&C}62*odRc#!XJ}uu-?aVeQ+{<_RVh@q&6K-0c>u2Ws zZZqe^_m6JTfpK*&!#NM7q??(n{xBypw5-Qb&Dbs|YsXT}Lf)UL59)uHhqZ1}ztOy* zzJ9if{+A$TugcfoBo0YWcKI21|DHdKYx_pMd&`wt-WuEg+i7QdODyTZnbjx7)-Shl zyVzS{ExBLEP0f4NhG@f`b%*?qIMlsrGC3h(V>7RF!xtONd8S9?0+l!v4`$Z4*QjY~ z{CId?$SB85=3(j4rSm>rF@LSMp^=G&<7LdYn48_s^K93jx^n7lE8AiN>*rST0s9L> zW8{8+)JXYxRdZL?-lBIuk4^~Jl~!LZ@UtmNTlg*a;i;1*8U{bwV-_<#-rV)tj<~pE zZ*r%0uiv-6b=sQw@hkQ1C+z>rw&`zOhuyaMM<-ir^A#l4|8W!D_~^{qo1b@tZ<6Y` z_K@qu;!C%(|ID8$xMPpq>7sJkSAoSL*Z!t-H+63IV%oY&@PGDEM>)xk7N?E(#dg^k zN_{)?{Lsas*Y)b(=SIwEXNmwh1TRxExrutypWSc5!;uhZ-RXrFh}@KRc_J z)UKRb7G!;Uf{5>r_=Ap*j%j=C@U&&As*em^U$-|rXwKYk{r~6RNbdjtX6pKbnm1mz zi*0^AZMW<6tdi9c^1bd_g_X9PF)0EqyfX@ylw5yU@jHIj{FQ3A;v@EPaO(bn_heC}yW+vxyO1)$xLZx^$PCsKm++}9Jrqrj{XUh85&xGRHPL|Eg zb`!bETUDliJuYw6^I+Bc+g~b5<}auaNfF)e(xA#1#;76!+AzuMv)Xs?q=temDW;T{a)o4cI|8Wu<-r%$Q8AUdaq(G{uS}Q=&@%v>+PeL zFLS@&-}r^+{H>oiyk_10_OUi@%k$p<%=`J%6@Qd05W9W<&niS& zZC$_a@A)v-@4=L(R#H(rkF{TAW7{xekrvzqJv!6t=Es5>lO8nxy3+ z_9&1eoMX*j7&9~6_mE_v_0~9W#qG)oap%te<2Q@U!HIe<-e@nHy$4@g>ch{E(_5X7%`Jpn=K=d+K><2Hs&x*F2*6(=U^wVpf zYAx$G@we7omzO;XI~$R=;-Jj*=GK2pnO~SkCsm!kZ(Ukz8fKXu8j*it*4apvIX})M z%T!5py%F5ouzbx^uJvbk$Szu-p4gg@d*lw^YQ+mr8dW%+$OXy0W4zAzo++ce$y3!p z$(%pNUx4Gz14flA6I4VwS(z3&tq9N(;bvuKOxTg2BhJm@vP57-%e#vA;h(1(KfY}9 zWA5GOlD5ltTU@R9tTgZN&d+5(OKZ;k=dODf`b+Np!DaPjZ(W|h{K2*=ar(A*pO@y8 z1s`qjx%2&mfwdj8pB6j&aqIPK9k}#aO83`B<*~)RI#b4+`}Im}p7`tLV#)VsA{bu% zuKmv!x~{givaI{d1J;VW;tzdiKMU~E%Mm{R_~9-8HnVk=uirLpT63wYCVt6Pj^msu zr-UbXT3-udn%#JSQBP!wimDWAOJl-yaoEcj)(jYdqtgaX**;+vv)^e8sDXX~`D*JvJ+Bxxx{) z_zZIcqn=2ImWog-%R+|@6I8^wSX&+j8iZ&lakM%;3h0=kRGMviU*0C{@9yi({ZiMh z?*8_dj19TB$aLY27X@!E^VMzt+|O<+*jzTja+hOJrtvr5rHv=IR+RlHJ@}{o%Y!TV z79YMeua*pd%W(0wvQW*|S#sZ8w>Yjf-5mhULHJS}&ZxspkLv_2oKSv!~Zy{T{c|v-b11?#KHiqiwEMeev`xeSP=4 z$-?uj>lb|7_js=Pzq%FT(%hC2GKq{kK2CUX=<|kSOL@Zkp9_`!@c8rLx@d(XiDDqK;}0Ozi-;Q`hU}d5RuNhJ)z~bcTcyZ zhi_cap<{e-s>V*+=1&ics)Wwn{ivYQm~c1p?2|s>@Z;T1mv=c`6nHt=d8=jR#);Oy zxtk_t3cZtX%lI6k^W@BB&BaD1W_D&CPF(O)p?+~%w~*L^Z!h+$9C>$g&efoYck+*P zH?P`Q(kU-@zu~QXo2SCH3)eqvy7o%U@6B1w6sfE?&u_jIf5!Gz=Qh`?xeHmYF7R3> z!{oZs^#0=yor+eW26Y{$zD#;6U%{>5QmFE^^XJ3;*TSr?%!=0aLtwpDQL{5wTta-&r z+w_#<@ui*LgI9R*Ozkr~zkIt_*4jIZu4h!==}!B$WpnC=gR>Mu)vHS**Y9{|_~+sD z7x}l22AfZxZMR|0L3ZJ`=kvwc|5X2Ix0Bu);gq0oFhJs|P|Ri(zM5@67Vm!l==V|G zDElz2jp3_>W3GRcUzztiGM-iGSn%@dFFH%g%h&KbYMAhfhcc$S9}HchTYAv0@`C*0 z+$f)WLR0Q2ni;HeIPmVriaqgniz998pI?<yJjz9{2J z;$NeH*?D3Df_Vu$8o7&8B&NB0I%L?21|H^YRr$2u<88JHi@+TvC%x9kZoKOz)m#yC znRLAUYJk+7ovT(J)R>YOx3kVtYTBFCdehBsaa^DMYvae*__b4a?fLY*Zm;U+)#dtA z+(hL0KmW96Sjgnhw z`QJi8k=)4#qO3P&*7^zEVl*n@d^TzAL9+#aY8K5)oL&^@X7=Zeet%v6-Aj*tfmUto zFu1CuzP@Pn`Bz_Ghraqh<2Fx9;=aa!#(5U(k78IS{65(bTFW_gpV3S^S$melq0O_8 z-xizD$A3`M&%<`tU$#a2qOKm{NDIjNv`U4y=)d2Dp462_V%PoUGJZIHE)8`veVVw- zWzWvw6}zX+;@EjVQLaXyiRsOaxQiZd6psd2KfCNa`+ohxig45Tvvr-*OXB}W|FW&@ z-_dv~DZxWrwCnyfZP6_`lP~{?XP?JWEbzeg`6-_rF%N>x8#=k9!}AtM8we}-@<@j4 zxbr~$Zck!`d+&}yubR|^7ZJ*znPb>2nv8QKYNPVcaj-vVb}Kk=lyA{H<&2gZ$!pCG z7uTPOlZj7$xbh%tt40pT_t*2XpUS+NG5F^+$UDgtgH!vn2Na+5aUXZ$aY2 z>kFcmCOq4p`Yb`VuJ@p3emQ4^&Yms%&lPELi~gDK{kLSRT5W8UPMG$6F*!M5)%UwU zToIahf7j~Q%nP(QHbh>Dy)JT;H|S)@ewGVYCUu*J7M4zb=rvn>)!NW(P6bW2gOza) z#E)G$^w6-Pu`*Ha5$k{EEkZ{a7|#f;VqhxJXO*lF_`=Py;8|$zRmLMW`<>3jw`PVU z>X|xt?T^vFv|xJY4dYUmxYb`gRv(Jo(_iE1g{5YHwnzN;bfHlI`xO{8Cx~pnf3bxuX@hc zi{!g19k1AE{X72m-A}*vpN!J2EHAHLIq98e_CdR^OUvTt-2G7%Kd)Y)XWrg!yLWR} z?3=)RMRixBoMg!k5&b4+r84F>9IAQzosBtB#L~t1n&-Q`$|Fo`a=UttyB(sn782e4@}!)U<_nQY8XA{;>t?8KQ_l14KR>TW zY*yxy6L}^Jm46<$TJrvNofG>e{jXK0?k;7E{kVkZn|RLd{}Pom0#h~#B};tUZx$>w zTlYv;&ScMvIzbi94<;8q%41$}Jv?pU|B#TDRo^ZU-ZuNZq-rqHV=dPup|3Y zCN>^+Nck49>h9MU=k`0re-_F#zCPpl#FKv&1KhrxS+*?y9=PD}tcv-b?nmnfZ?0J`;|i(0aVh@ZM1wE3=cZ~)AKZUm`b^1( zfQrR(70-9NR8_IM~ra?AVAR{AlI^MTve)s?k#j>#_6_&zzre#zIaciq4K{yecSB1OX9wx%ZU z)*-Q{pQLI|Y&9{i*%DopfAa4my{Z|P3jgn)+hnh1Td6l!d_#}^^4tJ0Y@a?%TV zqK{|R`ETz!+_Yc|`}c#f!QuiZ{Fi(%@u*y(V7%5ThO4$d{Y6Z;&iA_QtKWZdQh(6B zaj$pA()A(5r`PWn{H$i}G4o8Ls&c&eBaf9e#rGQd)vTot>=S2tQIcN1sdw_K24^3@@Rn@M29 z+HaR1bWEE1;?=nW3=D$H9B1itYq8dw_ z+)?4VUbx=&u2l^gO`GRxJ&1mG$J{GWoWG4FA@{HsV^?O$(|m_%6GHB3F>9*!>*$ET z@O*!Qc};>Svtsva!56cRo>G%7Qhjwn{OteGKVqkS+%{N96c{jjHa%F%aehO9w$nWI z(BlCwHs!xsxAc3xVEM1FeCpS;^{;jsE@X6`lV)GFZL`CisE0Xre=kk@vi^RfLBnSr zy)WflRXa~VW{P2d!!8l#mvM)G{@R+=%nXb_7#SWg6g50AU;q32nLZ|E#@I_IN*A!C z_uc(j_ULc9em3ix($^{>42{&gpxBgTD-|O68Z+6G-nKQ|9iM1e03un+zW0$J| zlQ&#tZ7^%lVwdShuW#2WdHa3u{H}NZKZbvHdC^z@!nEw|#`i{X(rQj9c9kc%G zqZ0KB(`JJXC6)Sndze`G8a3y)pYd*ZedbAA>!e~jfebhK0h*l-Rc0NL(G%%%c6d8Hyp_5(r<1kVq*|>@^!3W?YC&i;&DjJ z|KVU9da$X3k2lbX)h{UMPmJT`sWJN>M2Hk0w^04~|5MGC{9AhAXFr<0F5YQd8n^Xs zaahmRTbtEibTLWJb7+a*;vkyJVsypxRs9?7y`7;~&Z?Rg*IY{a{^GCd_4VGhCp(}1 z2{=4KKtRJncv7{4<)++y98SXD!X)&dD`q$2? z|I(f5ceYy}>tLRs&)TdVr`X`|Wy)5u&smWbDWPV1e&4q|;yz$KNvi(?_p>FgwU7VB zG5%A3pki%w;buUXg_#4>wmb8ttDg)llM}!7({-(hpF`Q4mmZ(4Og7lnIXix;Yr1*J zJ{P}-e1_UFzE6!iXaU)b{W-$04W z4*5P;lhT7!x?PTW@hmGT$a)lg`D}7q)-=iOpDM3aCH}m_zUD^L$wZfb4z{+jXFopt zeCkljZu?KVQ9_?$V+%Rvey&k`SpVv3=atp>jgCf^mag2ByXegd0pa$dD_jD6D?eEu zZ%obIAi*hj_xJQS)^8t4#k3hNiWe?f?;oD((CoCSt?rV>$?Suk-<}GViFO$uJntVA zB>(w9r#wgLf=7;lj(R>3`m@^Ew3e4moLZG|_VGUJ{qGYm9bOz~^G9djv8}p)q>lKk zc)b5#eEbm+>+>sWnf}VeuShxQ7ZIbJsK0#E;r}bDc+X#-mAdJ6>UmxE6O9Wa)AQzq zt?IGq%>0(xo_cp>mQQ@%tN6%u<-g@BPh0)^ug!gZZu`$oimac7Z+R75n|u6=fOEjS zNqYp+RO~oHBzSLxrzE4^A<7 zY1NN8F`gWFW7?bUf}x4Oq}A`t4}TbAI5a{wdqgW{|mlO zdz84LXS=B6Ub%qV4Yyt#vU_JPDEqzqfayW$WA*zUJ2Q6N_xY+}-}}Xa*=%TXu8Yuc#5-ekkt%(eJfn<=|#K#AlBUFWsM?cdkQN>kF6o&v&g<0Jv8YExtQJtb0r^s3uAOC!HzJ=e0 z-exwRIkt~NmKn$TYIJO1ytJ*M2(7BDg+dwRa(`~wc`KaMIa z7vl4u&F}8;X@ zPC@nf#!0;%dcS}D5vi2P4Obk;%nU#ESic;p%SMu=qUH;GM)mjAig*|eMS?fz3O z`?qnh6s(OiQ+*ydbBff>Y1d_#{Q0^qR^JJk@kM&}Hd*H~rIi=Fm+U)YqGv#~xtbbj`9Cyj`nVp+k8qE~kQ`)hLc-+gFoqdCLt zuxXWwp~j=YV>w@4Os}3+pI3k5qWDc21Gyi}xwbPO8=9@(_IKaZb;+0hZHwZS;h*oY z>lX`)Y;pC^DZ2B*nZhRg(fnuiw>j(MZqdoV7TN~%e2+|+)3kl7`0KdnJ-63iOnPn- zrnp@H>GTPww!fc#D*n*N389DdHZHjCuxI8PHnZLHTNNI?wvzE2qw^sQdrxVC#_49? zH{8FQten#1Rwo_0?SlM;{C%MtH`%Ikal8&a7r1m03-@-eB`f7tsf4#$uCiOT?#_yz z+gQIS-#YK;u|;mxmviCMzgtY6WB-yjBW?R9`(?+%w4Vgu&^XTXaW7NHTfe}EjcZ+J zd}P%NyBoaxgXGHnf9LqcY!$6P5~>lO|E{g{*Pf;Se_s`xx-NcQH23$kM)9DM8`6)C z3im3Q^@t>I`Xc&ri8<@ugKj(*G*?b5eV@*L^=QTU>1Wrv7d>YZzql^n_`<=)hV>t| z=Tz`HcKVe4)}H+J&)1)dQa&}>I_CF^V>ipV{#U={w?jprk>lKAr#cDch2P$up88LE z{_-Cx3-5m29pI(Xvp=)<_nS)Vf=e|oK1eNj_=NBG8o{6T`k9UzD;*B+5a&K>v8+LA zy9QI^qII6Pw?CQd`M~qYE3ULh8=}Kc6$XoIu8-QEFLmXR?8+CWzfxXEY z&pKov`BPuPdDcw96NN2N0Zzwtn7aZF% ze?5GpZ?<6pPc`TDX<2_hRWA=_?kHC>Hr;#J&FRZO$-Q^XRNK#NkWHHBthHzMRE0FP zhecvbu3kyM{=cg2&|_=E_XStqPx@(l!%y|fv5d<s!P7->UJB@4jcB zI(vH7d&93W%xie-XV^L0h7Jp`6e6b=hG*oyzyly>I!hF3?rUf2HDDbD3`zlb;tJ66UZ>T_3RB z?NhX#7RRpppQ=CU>AU=%_v?>X=d7fE+c$r&Q&;2gc@TH}>enR~64uu+>h+)gQ+NOE zGUs>R`b$GRbOdMpTxG_#wbg!QL*#_0XY)UweBW`*=j4wG&!U>*8XA*@K28!ab+-Sx zI`LKI;vMU^T|AN_DD%yx?ZtkN@0Yj^3T0gCd#l>D|5m_8F=oblT(z%tO``sUer0@N zQPi~3ct>{ETz1c@$4ecrc1RYV6I);;;}RpS&v)hct(L{fUHs4Q@ZaAV5M2FTdYg6P zhquj(Cgf?$hNVr?|8XiH&iO&vwSf74wg#-hU%l8pv~+fC%GzjVA^(c^hsHjg(xVSO zg7w~BnU|+0`h;uZ>P&}pult_OK8sFPJfGw(7kho7{+&tpCk17*=}*7ub@GVhj?h1E zmoAQeJ^k8tHi=jK=es}lpZfZW%j|a$`w6AdTH?HQ%-{b%I-rk=3 z`k>qAyJ62C9?ErF>^RNmxc!P0DM@ku>1Q%Kj~!TUqxe#_s^b^`=Edc|Bfo9v;+SGzvG&<&oBC^ytp7e; zb3J3SvE0rt%j*6Lhy=Aw4w&WiVS;+G>zZDL=nZ!(cy5aCSez#s=CgkK>nZIfT6g{* z6Ptd1W%u$+9d&g%)2}~bjE%U;)nBskhH|M&(#juES_l7J`quJqZ`}ST>s|f!YVVGX z`kUqNr0{2YdDS1wj>6I}5-*>}A1;aB`E`lzBlg&f+@XT)o)hFMN~HJctDW~-7=Kc2 z=~tE=8~pzAoxAMDw%198r}~XV{@shVg8E`()8yFab z5*=sha(l7XFSLwb>%Z#z_5c5N)~+p3TU~zsBiH-?@Bd$m|Ni~{-_>(CYrn0$e|5=@ zJ~IyvTMJ`eZqdq=CjRs4`Fr>to)L@t_TZ)Z#ize^o&E3rPQ!ZEZRUMZn@(BQsZ4n9 z$85=XSe#Y7Q-b$A+tfpiJ8H~ou2nwf%xF6Mu;I{l8+-_gD*Vh#ay+1Kqowod;3Z*9WQYgZ@#nppZ; zwCbVyx}d*NPan9mS2k3Hb*Tp`JUlP&{fgb-;G|HQM+>`kxPuz5&N!0dx9iY)sgSLf z9oiEAbB}w>TxNQxGx*~+>%-~%u`>^(yiiyvo!oKj>l(2V&0jZC7wrl<5OMf-*>|~_ z>f3&oHrhPM5)%2{yK2$Vr`3Vmz4`RgVwr|&8Cjk{MLFUKYOu0QD3|zE?-lQ`eJRSccdezn6;cu({ChguI zH~s3lt?zpdaoL@$PyNil{_%y3%g?GfzaKdv@|dOc&5x4IKZiD^2DZgnI4}2cxZDxG zhKD&Lah8#`6H8gdvE>1~mux;DF~PKH(eovnFFF3%spj=ra=utr4$JS`x6ag*UNw2> zTJkDQfMJG_6N5TehmdaYSq`Bbj+dX0?_;oQpD0;t^5ydXbMh}H@2|YvUZb%5!0pCg zGSfE2_ny|+SS`e!uWyUi%zN;K zdpUoa#3Mf0V+U=07X=`iI`{a6WhsU@pm{JZt!i~ z$1dZ&aSiv9?{gc%D|7`jjMW-Lt{y+^a?nvyM7FEt^wGn6=YQws=h}DVYx&*ct4psc z<^L*W4L#6gF!{yQm2I~5r&0>5UwpZLXSaT5)s8!*hEX%xEKV8S;QenNklc3M!F0WV zb=0=+N_!l3`Nm9twn^!=vWmTG4e#G`7yrw)e{@`|=wIu-JVZ5X+QMVEFDw@NwsL3P z^{sys{|i^VUugR3$i1C@e-}9JWYdl^IMTz=^5xR?^J^`yT{tjf#o?asZnx{V$f-|j zkiC?prm(@fNknSG0jZlBGmWdAcL}Ccv@bs!XO`HZp*5M;dsRHsj0pMGb9r$5? zYu*95hu_W5J~gR+$Kd+Wdy!w~iQ#n+@v~!17%_RQm)B0+$?|MV|5g9=+pIk~ zmNruplMlC?$?*T|5t~%L{ZJ&y1+liH4J0{2*9^x!4FZ%zDLqRuJe1>xWuh-R2 zGUJ!uyK}eP`i|Efx!9LgdHfBkvv%~D7|5{%>{Ncdh*|%CaxJs`!PUCC>QhBHd!7d- zFG)Q*<^CFnueaF~9alb|ucg>Mz2oJMncG!oslKRNIsK4(#0#(aAKZMNb9n3QU*E(O zc6scg=d5sIRlXGIE0Q#u=VHHH&M%H}whcd6V_t z?&GD`Jp1-7ygZ{$Z^7F4UynBBJ>U9R){T?L#9O1fY|ebGy{w)YsaZBW_4jyuesDY~ zXj?REWn;lV|J|t*6}CpLTr%k#r`xpFdHa8|DnI>nW9h6Xb^BCOZO$76ec!`>P(j#n zfnQ+szDFC~>zNNp3otxrkP%^!OyF^zbZ~z2^2Wro)7PuMTpq3xpJ#bC*scB-%TjZW zpj{JQ9DVUMV*Y-H=vua;51&NTeLh%!)H#RUsr~xDw3MemE%=M4yGZ#@?7aWR@_mhF z3(JAVBD3Pn{A*^MnJ#spsQTqwro%BU<~LIoR95V<+i~{Ouhr8;b*d9h&-x^?Z*lOds&k>gul?s=Ytvihy+vj||mfHF+evK%j(cvdL zk<3|a|Mpy;!T5*av`pD5#Vi{U^V6ex@&4Z@zmHwja(yow-+#F4eKIjKa>u+91;LMusm*7uMY!zGcxlKmVZE(w?;Fuv z6|RYFS|4V4GjcC*W%GY}=Lz3;(ffBZw4yH5@5y812sOR`NIdobn~K|)R6$NbzXB`NUdSeiHPG*VV_5lA^Pd}e? z5H<7bOS-$nHF27LXWY9Qg;z`o?pqIk67X(scpk;Pt7%qhs1qZL4FZylta!Pk>=zkOmpRIwIMsQ%VcaZW2c zZgx)|>$^LF;UPS+y1%;am5Qmmx-GnsG`lOpjHhtHlWu=&ab1T0FaB?QcICUj-0~RN zD=&^KG+x@$Uz`-WQ?Y8-hIemDjA9n+Z?R6v=D%}{?X#zZht9-`dhU-mBUjH^#dSEx zR#aB>-0WuuSwwt%fWK4 zYz=`%?UkPO({&@6Jr!aPV@Lw?=|8!_os%JDrLJZnSwXZGQf+dhgR| z8!r8_f4`mghVT@x>+dBz{$)-uFu%V;MSS<6)BhthMB4?+U!(+lJiS8fd3y-kZQieJ zt&&c!<#$&9JMGRd`)uv|x2KpTS=2vh8a~oZXfN?9Ez7Igod4g#!0xGGZc8S^+4bMQ zKZ~{CdUf&pl0`3h?QNIoB^Yg45dI+|^Tyo92_9zG56)P2VO8XZr!S0)`t75054$ED z|ML2cgqGy*4;>SZ@ZC_+Kie!n{rXuR@i|N_oi!7}?_90^7igLPF>1O+t9sp=OUXaB zzCW~OJHx*P|5G_VXXhra{&d)FV$Hnx?+fNnesL_~kGZTw^cAVB0}oG~e`#6pO_}d& zz{U^hiT9oKqN3KyZCrJ~`M>a4Z~N1~?3;W_*)C?ivh$n&T5}rPZ}Gd|BzrEc30t|E<&sF$F6zAffGvG(l8I|su6|BoeA(Mq|2XHUR~ery z-6oT=eX+0iY5Sv@GV_eLZ#WQqd*S+>Zt}PP*4~^ZT6~#HSSil}h-Ft+v0KHV4MrDYzW$nYxxM`cEFG*&erT=Sq2aC)=KTRlGD* zb$&}-qWv4Ohm4$0GR_uGou+v8(cApK4{}C-Lrd?ii&ek&O6INBe7DCseE&l?{7}(N zP|j<(ZIXQdaYjPx`$HQyv%J;)_o;TNs7%;%xm?)=LFfM0M)F*Ju)FFv%X7^I$#1s2 z{{8#M$92E&Y+WhNR7nf}eyKwZD!=f#d7=M>4M+F_IZ~gP{Z*FyKsmr67-2YpSEoiIV zcDd*H2?zH>p$^`^w(R!}R#J_h;NT{^YN!5fy#=bD`CjeXZpp%9)38$B?}FC1NrkUe zYwFd~r#+m!YW?K50$TDd8{YHJ$WdGMnZ5K`VxC6km4;W>6+W0Hx$|w{+H$aSL-K;p znf3=IQtBJoX4ZunImdnYG2ujvmT!1y%&+^`I3752v>jP0e=$&G<&n4c53CPd()r~O zb8JV*v$dxx-(Qs58yj?e(Q~aL4%f8b*Ta&WnN(~`r*m$Ydyw;wRl%>#>mAj8OZ#4p zcz)x@A$eK-^rxpr5xu+puJ+;wCx3i@IO8Dq>*q`LH#r(Fv)bbLwYBnd z+$ola&xAb8csDLS{*{g8bnCJ9zgvH~Jll8m@Eq%d>tiHmF7K58BiwmVJ1ytkAM-83 zW{O#Re}12D)pY1X?UZ#BrhiQP^T_{wkca@s=bXz1hQft?Sx(^|;=5|%-~3qjLHpmN zKYv@^zT)ySj(Cwh%egh|{IA>=PJ=IKC2rzdca1r5 z)6KfavsgnrB)3bK|2pzXHM71_(Xlah*`BMHIJPK$W4yFL_VlH9C(m>^MqmE7qsMV> zuM7WXna?Zr4@GDw=rnaM)AhHv7c)GH|d$B-mev$G4VwI2k$I9J6_Hvx65Vb zJr_Exik3{A_F;YYgTA%blU@6NN(WtAxXz`jIc(NW#+tq_Hw6@WwygL+d#~-|I}(us z_1-6fBNgs{pX6~$e3{U4LCJ(Wo`v;^I#W2d{Hm4QXXBR(xB&Ygn)uz|+Qx@{QPd}q&w=Mhs zj2-J#idUR^#n@N$#4#wz`?Hb0`ifis18t5i+!?+)Y176<>#wcvT>ZD|@;d*z>#Qu| z*_nZhi{qQ}mUjduiJH}QJo?Y#-rJk|ckS;jF;=^;>`+o<O@L+mCG1Vva_|Y<*E1K7G~d7yGaNySDCC)u+TI^_N$FfB$;_wH;q;BTC9M`YOWj zK7O%m>x6a7{g3c*r~F!9v7~-QvDuUS>A?>e7ZvcgnSMlwaqZrnRFId%&$IT#?t?oP z{MX}pB=O<9TxQ=X^@Arwcnu64bDRHl^aPy@44dM9a7TZL?WX_D6>`oNOFx*j`cH4{ z+qk3j=*qaYuGfQ}mT{D>PM&|Y#prZ=T!+i#sabntPQL#CVE!I!pCfml@vROG6Pt`@9Ci8tvmk~TX*dUe*SCzyknDe743KYZ#;NH!q#Z{4z0&Jk*~_RQgwFP z9Q1fGL8!TK_hNT5sSd~9BK2uATF+0@{Zw6Fq16}=akI$kg|*&}7#p_j%Qv>v-CuQn zdoV*_F@Hhu=gX!|_ny4fdw7pGw)eWnCh6H<*=v}M9bQqW+-%^vLK< zNqt+Tv!c|Z{zmzIEaqnw+`RPE?ci;vHSUGJSaiLqz^&@XBmZ5;`4-jw|Eu`-Zqzr; z-{I4*F4>uXH~Ve;*V+UP#sbb087`8451egS#+&*fJ^nyTg13>9!T0&D`OB^PWrU== z)J1Ym?ljo;Xk+A#cJ@6M%|*}t#O3QB7HT#K;Br)`xopy!_~Giw=m5>>H}95v9k|lu zJEQLRgIljZOxn|2@4b~nV{MV7pz($o;lJPg3!a-~kRPZce6sfY`TM0W-W9H1KW+Wj zm9{av%qnQ9WUx1=VqIJcH#RNgWVrIPnOm`+8SPNZghRsm%A_jb-ayNe{o`oks+@v=d6TF z1~=aMK5|T!)pU@KJ!do3rtqJozgYaW-1Xm%CCq%@+IFpifBiBx8+6zVTn_y^vfxgY(vO+8vgh7Cy4k#;A-P>HIEC3+^x`242H^sh1$hcavlu+s zw@hFVSY(~hz_3E`kP}nTtQjZYu zn92P?!^X<>M#axf88;L>2#_*baPG~FCFzn^;)H{ugJ0-oeD>gDzOP@{JYRlVtxMtq ziI+7u^#!J${n5;lKI7t)?M8{4%hk^+h#vWJ>9_i0^$vqF-Iw|e5_e6UyWc)|H%D;( z{_k(zi~L@+(e~f^5_hYsQF~_9H~E`xwa&;iT-eWNQ(#fux-ZLkPes=9xRx;Im8X4m z-^{Cdx#DH+{RdypR)2E7(k|+JfbmC)Ni=gs;e)^`pZrL#Ofgg6d&f$A&(zIzFTU?^ z@Wr&~d+~KI91n}{Y|txY=Z|}}*T5^=bEBFkv%_q!tv{++R^6~%^Dj_uMnmcMwACf& z*h{}2U*BdR{`yX?C2!b~T{RJ=FLV-piwe%|(-M|%t~he_bA++#gzATVWs+MH&bSs% ziTZF!WAE!GIUkAX;?IS)Jpcb}Vxc_u$-32XbIm_`KPctOX8sZV!K#(LXZPKI`W1`X z9T|S_-uLd?_Qo2)Wl8VyZ-oo}|Hi4j`l)aK!LU;)CI-8>TJmL|#0jt~^3T{WePW`o zk4Htp+DCzV^Xxx&aIaQcnDXJk{|_6Z>z=9Sf1cPcmHkdeQ}z1elV97rYRr75v3Onn z@cifGw&=AwIc+V*40`+L_}{m3WXW7w*xm7BW9eL%^vUbLoSI>@qMw6@^WdD6`%Df? zuJU%)mKXfho{{tYgGHtKk9vhG2N``T*3?Mv2A?6w-;6I!~0!@(su?+-siM~$40K(V;UtIJ=T zTMF_-6c&sA>|j9%bCWqWgBy;F*Bq_Z#g-j2+*mFAoh4Y$ z-+tfM!<%Mn?Mj?>d!7l~i$`ZW$y2I*Y{NZTxjIOPfp)!_I}y^>b~Iph8BxQ*1L~>FF*N!*@S(KfWnM| zhE)oWO{0&nPoE_E)U?dl(t7fqW$PtX^u*nZq9+=J3$$~m^O=UdID0T4VcD}Qmwk5K zeI-9b{YOY+LR+h;V3T~C_DsgBjv0;nPnWjr)q5{K`N*2^@Ab}JEc&YLbtNDFr5xQ7 z$hhaZgM9oe4!2vncbZt8VuWXPFFM%&kH>Tm_r)p4BVMV^zkWk`YJJ??y27P;XDW{W zIW)WH^4^@h_%2`P#zRX?br{l?EYtJgYj1d{l=a8r{37Y=d>=Y$BUgL*=Uu<5ch3Cw z-&Z=@7gtot*fJM+J9_e#l4!Yz z>(4itnQ33xTcLC%xLeTs`(-unst2>H{L7F3@R{_p@{)^=dC-lV=%9jpqax>`$euk< z{0#2B;^5fz?aHja%|8p$jP^Su+cN)1I#jLG-qL>Mx2o{JhjsRb%!_MlC#Yo3cd7s2 zxL5SWhW^Kb)~c&bxBr>9H6~`d{fxd}pE;kMx_wae$EV}wWwIi(cd8e2Sb11FGq>c& zRNg=ivhv|MHyrBcfH?h1*&WM7j6xo&`^H;Q(b++u9baG=>k9hiOlDi zm5_b0HPWH{vX*)QU!-0C-rw<@Ogt+U)-FH#Xx7H%r#Fk7_z};!+i~}q9htk<{_~5! zB-!xbl=83O&1|cq8rqA$-!NeOzEpef=H<6@tj+l|YWf=+B_^dE4mc6;+i>^&#A$JD zbvvguvgWQ^7F4c{i%N|KX3N4%BuZ)rnT5#+-c{( z>c9H^AKvLPAFl0HKU!?~I4WqGTxTq;Bp}|uo`qEKu1tz{G{byRn1}Cdu*gTy0R_3SKyVJqkY4MlC zr3H?ehVsiDzL%r1;Fw;r#~&{K)M)v>S%>+X({yH}zJGJc!|8EHbo~d*!(REHm3zy|T)Av-~`cZahZJYU~UrSUcB)EM2D(aj$`B`gVmCS?-oA)eQ z#ON2L^I2+DTb%IWW0Bb_n?8y$KsUfki9@KmlBY)_2mo-{rkp~oSUv2%s` ztnSH@Q_nW_d{cM&bMw83vi_fgjvU#dpR!fgh1Pz*=O2Gnvj22;Z2#VOze@l7QC@c6 z``?r4zh3Q~^y2%jc1y+ha~WSJ-~0ddX0FVunR@r%2yh&jI`w#y-=`l;Iu4)XI-~B- zzk1oQXujT`eJiqDlfC!KlrAzjYL*eVt0eZ=6HBFEuPmIp^zYn~Q#zX>IziKDo%?(R z%gNsa1;US{J=^`u$?vV6T2)J?aoLY&2R42R{H9~S<54R|WamTUg{gdNj^9vt#ItG9 z|5N><+yA8gGH7HsJTx!MbH?V5=?cGD3cqdB433x<5wOTYZ*#KO#59!$cmByg{8Pri zVV&`v`%Ab|jsD&am@8ym_Otzn<*RHvRyl#0>93Ov+I5!(ehPCqcC7l6jAiY*%*M#_ zvZtqSZ94eR^^Wg9pDouHr`}N1e7W#OZtVSM8{dEW^y{byP%BH8rH5G)sDXeQy7{I&Pj{?ga+ z>znsQ?cAjBHGSEt)oZ->-&-gx^ZK5MdH|z}NQZ|I7jM%-hYc5WRJhrg9tJG9p(V)0 z>X8`8IZr*>e}3Q3Z|~Pvmb5;7^(U_Iyrik8g|xj*e%{gTHMd*lp6C3q-#u_Gcjb%8 zWzY5(RlmC~|4HO(+u8reTa=~PZ*Ti6ld*2!dail@NxZd+^yVW_}iyI>O>;y8D zs#B(KesD>TB{6K9{f6)z+y6|ty0!HCmHb#IyT>>AZr&=nKk3#yulruH+UGtt6qHq2 zZFb#!eqrMA1-FmxZB6@e>yL8p6xleT85-`s_I;mvan-%uyLW|Nn>sn>Ufilz1vYkTk1yZ)YVY|Y zrB$D2|GS)*psYS!xAx^AGP4anU8^`TEY2 z{sq76@%Z~}``@gRKacN^`1>dAE z&E8NqZTZ7>l7;yXzkPkN{>SE+Ti&1d$v@YAdB3+tJSF(z75^0%&h4o_WoPoHN-XbI zwDqZOy~}o<^UKZ^3Z|4#KD+PU>I{Qj&9+|{?jD&}XErIoL4;XL^6^RIEi!7oVHsDy z?z-x&8X3HNVSeP5h1V^b=M>1KFG&t}FIJX#Vmz-^LtInf;Ip02*GXMVQaY@0N}W0N z6d$A3l?f^?T%Aph2{U3e#JJd)4h1U2Xb5w3Iwc9ra8Xk<-nU)!=gafQms>sW4N868 z`z&kgu7{zJaeZPzPR*@qkh`=qu&<9&)Dy`<>l6O%Qp$kxVi7U$GeB8Z%>!7 zt$&_)?*BE>Y1?<*>{H|wdVed(cmE+j$&Md~9Ll0gh`2qLv0GvomUiOi@u0BkF0t!q?09)z;R0ecUgbRlN4zp6}->r_Q{xbc^LKixRuV`QLm0tKt#KKIEe>>RxaChOKh9ycIS47J?1iDz?iJCV&U{ss= zcdCR=%u1)D9?NZCdv1BXKu;{>*ul(S!uKr7^BMl~CZGJh?DoSN({p#ub$*+$x!;O+ zhggQ!o>kA)at`e{>EfYlT6+4FS?`Gj&c5sJ&7D#jTzvAvd5vSqS%!zxjE#G9RyI#N z^L@^vu$Gr}()h^%^v?@3daHFl?hJP^|S9$gBS67d0X=RLK z4Pxd#@c*OxCy&GDk6JqjII%l39-LF%Qs#KLQcLyz!;PArcOU9H2G{TiZr{T-NpkyT z_wT#D3B~2zt`)z&Kkl!a!UOlX-Elwdl()XWx^JTO*Hhk6f2$v*?Am?7<>-T$W1H%n zH#kW#|8Qt*>{?YcmCd|YWwOVab?dI5IWu#^%`CN>*0ZKc?6vuqa;085bxZR%tHZwq zogPj}*kR0mOQXslfcM~A4dxXGq+F#BN*r}AdT&xwrr?oL*J^Q?O{(_xHJ9~&%2zXK zOwd-`HK(cEz1r*jrvQ1OKN~Z`AEczVEGqK;(|2#C@t;j+C;bpT-s+TcO7_F!yD4XP zbWM$)aq{ziG0tPtWd*iAJIZG6`RiHF4$oNMz%cnm+YfsM?YMc4^C0`IhV4HNn#`}Y zVSAtXEbi?y%Tkg5hZ1@W9hOf%X;`(6?{oTZr{7y&U9Iwtuk|goUR$@)F+s14!I}%5SfDE5zL1C-(i+Y1y6cW?PxPXNm~O`xrfMd2wL$ zv^)9%K4(O)uqTGJ1^%#J*lByPm}lh^Rh0#=l6W^}2p?Ee&~Q0UJLgQaWv<;Sx%vAI z{2 z54oK+(hEMirZ2v|yX*UvH%_xMr)~HxaHsZ|_%;sCsI_5R4Z6^hure8hvwfk$P`P-zw@#@$AU;Hh0dj8!6O=oqL2!(6sX8r$lPEJ8EqP@JHZNaiT zGT!YDh8tFgf3z!Kcb=(j!2>nt`1$+ZmHL+WY};A2x^A!Pwbz&K`M-Zzy`MQF{)mKz z6~o@m2ljlHX?f+7zsdwuoe{e4%T zd_Dd3(7LSejVbEg>I~a{TwMM1lZryxW_2l#C)1lJ#cYs#Z^J(C zSXO@Vu8!LJms%TZvb8q!Zg>Bp`zN*f>(%_KHR0#>mi}9_K5Sps*Y(%-M@>FgS)3ey z#K1wj;PgK>%K+N~4~89Q9)=uXS{2K{%ea{M(N$K*ICCXFykE>U+wrz%Xlhq@#sA(vpMB3glF9uR>1o5uz0hFN zw&=Aj2e(-BJxpW&_%mJcpBeuv%?&M#erYNN=j+#`^7&k=sjQ#(*0iUhNx9+JGDZW{ z!WI1QgH)5R?EPUo{O{Z|z0+gidroEBBu3$D1$PUV&ldTk&--HzfA{G(ODC+AUOGWE zto!4JWaEN%NyEII-}Rr?nN_6D3ibY{aVPvFqnMWY#g+E|iFE(G;fkxbR8N}L z#;M9y8?v{q_kZlQU`yP^iu{$Y98xnr-}gHz*y`FUsrQ%U=Kc@dbkN-CiQ&(B-cp{W{SOqU-r2a=A$RhF6DJnu)n%$2Tu@kl zlDGJhwxV2OVDtU^ZjKVKmF(^}B?`Z`m{@dHrKUqTY||q}u@cGUj8Eh1x-Z|Y+VsJo z?}L5EJtMY_Q3t%bCPXZ~U%rO5o=4tv$-4!LJL2wo9}_5;cy@)LGV31ht=D9o<~{qj zr$@ratI*iepED)Z2vDX?zY7yIj}tGDKA1btKcB1C6p((i?65+ZW3p_YH0hOOa@Q zJB3wX?Mv?UoOUmMxZPgZYUQf%I==2<(D_P_HmT2R-DmxesCvQwB5;Mcrod;h-@E?B z_9gvQSd+Te>9)Hcd&$#9a$6TJip^0@`*!jB{}&&t7kfAUnEHNtP57_PcdPg7@0|Z& z!XM{pb_Ib;_~rQ7r!=0B;}h7K9LJ&%k~lfZ{XbJeMWK+lr}j0UH4*}%OdB^=A5Chw z!+7`O?i~%x4S7186HHtDD;#{>ESCjN4!h|ZE6;vVV9f^x0nP_;J_i_{NiFz!=cfX{ zPIuFek_oam^tW05Xs`dHW%^!9=uC{ph1H3l^(@y|+Sh$>Y%rMG_-pRW72j^U3CMrE zqQk8f$Q*9}*0)>hfcuX6pKtH&-ecwSL`QAf83Qk#0+TIbhl1X_b-p-#Y{&D>EF!y( z9nG^#d~@9GGI zyVWDv9>v$a?DOq`!N`|&B;1Z|5m zdXKo6Z<)!zR&uXjOZT&?Wv#V|FTOkY)wxWoTDxw~+7{mJ4wtWY%a~qSAu0RAbxl>> zqAS*tMGsu(1t|nhd2Xj1b}b}i+k3t_E<%&#-pHMJzCZhv^E}u0_mqCF=*XOE!`V;DHp`bl& zK115;Tw6Ee=ayxA|IhYtem{#(xIDh+kH8koU$IKtbH3Mfe7KMic>7;E_sy68j^8@I zQP9A}XXzi--G9^THl*pZZ1`bYZZ2Y6eBiU$2hAB$?JNQwcAJiU-l6e&@)Z^ff&E;k z1^Ai%d`jStej3a8lb461E%FkxTk4wMjM0yF>}H&*@vynSldFnX#Cqlg>u-}PdZgZN zo)vsJ-u~o|UrjF3Z3z;|J2D=2EuH@G%gW07FGX4(6_@TWb)MbAqWa;{)Ay;hZxiGB zelES6By9i2RsHSVUmkn!t#fcbn%o(q7<1|Fe}28i@vi^>FMXNkv%kK?XEn<~k#f;R54tsC zHx=JV`MWpAE={sz76zw0-ngXjGVj$=B@#nx0yg*D!Ry`~AK4>$lb>-m02;D(XAR(xf*WDwCYN z&hvAp#MCCfr5~0peA9C2-50IOQ~Re2u8y1RALqJ8wLZWjV&AW?pNnd*<`sLb6@1t( zbNS!y74z-|KRj2kxr_&=@93%Bb3yOGWQu;GkkP2OML^}qgz9s3l=Usr$o z!`T;W*41!sU;E`@-F}wvn#WK7{o)kda_2!qrL^zEn*j@^q~7~`x2`eqC-03U!wH>h z_=A3*j=iL^VYb^((_r_N*-Sg_*7S8XJrQ=UGJ4f$xWVmWTl0gK4Xnb#*El?{o|0+W z@?o0M{`slr6DBsA{Ez2g;!Bcn;uJdBkhFiJnE1}q58Rg&eX`n6E&8`_v&-wt@rvt@ z<$k=sRb^j?!6c0v^L=xtpINfxuU`9x%9wN(fwMw(v+U1DCp_61xzV#HOXTFOl#wa4gWMWM|n7!XHX6Rjat=hzb7Jg!5VU72>> zs&}lp`26e{1NZqpa(>n)qH1?dyvjDiqIc)ld7qJ`)9uB?#T z_+?_SO;b+q3#Nta=YLKAyhHqcb?a^u=NoqatBZRSbHbNdGkN}aBrh{VZ0+I+1#d!K zW~FI=`eLx{eA_{roNXVO%uZUq&COg8rW?Cdf5HhDHxW&ta)BqM_gO#9l~~ZJ@OZ{t zmXEzJFWmEsW(awn)h8{?zCt1S+ljpmVN#eXOx*&q(q8`mqs62VsZA1v&4>e7EAqKg4gcs>%Y3|+yAHc-?#;f z71i1=VtvlR)+aq(n8W&abBo08u8mv&KkP}fOmb)|y--^!AYZg(-hxwq9ws%Y)UB{6 ztlv0UQQf1)?cSc-Wy+Nt7A@iyc|A?6`uhu}v|X;>P`fy-!HSjyEzqyDE0?+Rt{`K@}N|KT{#O%yW(tb^U*Uc=xeviNW z!O$;fOsBj*tM|4-S!?IL_)|wdII`_;mc3zG-*WQNq5cK7lik;^yzjNyo6Bj7- zwQW*eJ~8rNF8-C8BgMASAn|O0nfD{1DWA=L@CFoT$9F&QP#Ds4)8#31{!U zve^Iq#opj6d5b=>RzE*}+4W@7j>4Drg~1_$NhKGnmR+%ae&(aZdD%%!Uch`(XkBfV%@Rf9r$056>`a8FgqLs2k1tnWnB#rc&!px3S1-NSC4SFCmS26e z`o7iP#Pg-6{fp}bG-NBE{?nAX=`lSsDfy(tr#FX{%nBCYV_*=z;y6pMdlhS;xbEGB zwJ}$-&Ry-l+P&|8nD*p<{bAB!|NmF*+NtNg`rQZnmKq)jk2ETzv$Do!e0W=m~){K@YR9&BRM6gR#sdN`(4bB;mbmjcs2_2MdjJu;RZuX&?- zYW^Fq(yFgDDoaoOyXyVx)!w4r?;nZpQ}W&;&&u4Ed6;39?KA%m^`$SH`JNuUWN;(q z6sx9JdO{DofR*#K&f|$oY8nn|=5GBT^YLMThk3~nrUQ#b(mPHRl>C^Y%2)Bje)>h> zgEx;aj(@1NJ63bk>=Z3i_T-Fq!@{+S0xAvDSROvMyWjqy?4)4rxp=0J89a=)!k%fZ z<=O94nSF`tb}|`M-v`tl!#^!oqF; z`Ns2W8=kpIJ#gV+I~B7$w(sbPppQBgJ=K3X*I&~P|GirK`rY;GuWPMx?5?e^GJk!Q zMPu#Sf*p_N9F+ zA|x1B2#LfAwcp@oSbF1uz10IP(b+PEhbBh;SgiZTxpia2nje33l6tmjOC75#2#QqK ztB-2xvTHh~Es_+!O!~Hp@ud}4#J#$vd^O=aP(Cy4z*LX0uH&yYEe+$HB9`wD|C?G~ za_Z>5ReOux#vYizK>m-y%DwAKeKU@92(+@9uI^JtFE5nK74aMrod0(>n_}q8{cEQGd8=tHBVM1^ zIc>|lG`DcIbnj2cw`%U5w#D@7t4~q;z2mmtx~4H}^^WN?+h!ymSobnv#=B?Qb?ILu zB(948E#dNI4;HXEHT|ws@e0RJr(eI*D*f52aMk7ad-eJ2f7yf{S^fQfw$@?Mb%JZ| zy}G9Lb$@y7sZYiR$J3Thy|YsyHB4rH{5v@{(}^_)H5(wv((ebcMe;XlQ8M@{?V{y#KR zJ8a$6)mQ&bTdi~3BClrY51*r^2U*`s>lo-Y&N#^O%31pKSEFaJgvT z!q=S=vCpsG`L^~EW9iWxzu@&;C9n71mUcfjC#GHfhL_3hTcIiMtlnQ z>XU?~&zx=L>q=x-xW4_7I?MgikMISzKArz~&ZExz)I_sH0H;qJElTPO6-hd11d+nVKXfBXxtR%HK%M)hOOKWko| zHDVPoU774QD|*qyS@z5B#GCrQew6@aR}dR)%(Ml@d`-)z%f9N5<080lDH@|C}S=W9pV zfL(!R3%Aw2d>|5fyk6&cM`r#!1$+Lm9x0X=opA^D9^}s!RQdJKDg4%so((#;8eAXp z?Y|iRzP@Qri(sXVwYtE0`3rBX%#$8?KA5I^e_r5!^+wrP-VeNo_~m|B{3^(`n;^|O zEhIl6(9!1djyU(EvqyrvPkXO;c$Z`Q@t(b>c=Hl#8)MYY_m@*y9jvf z$nZCR{o$jWhuK+&o=~?^pNh777c44em4aidrhO4Ey8lPuORu{;tH_kD)U5gWr+Et1 z1#B#guH64oH1+Ae162+p8wBrp?R#bLO&fWMsiv;mx`_ z_C-F|UtcXhn|)w{1WRgp+5DITXPdfrEEXwGGtYkgT*O#UTsw}3iFMV@YaBdW%*kFp zf(QA;j$YZ>Vc~ovP(hygfjYC?2flZ|?gwn;zsEH$6PB_`qQ)$F;=tP5y1Ia}0T$H@F?U zS5O^0g(q*acIeKa%kd8yX0Zs)DdOOHV(}t1N-TU;`Mb+5UGh(UUthR2cjmmUQ3|R8 zDkgijEL3TmA-m)CXa4r&psNkV<&{zsCtQoZeCek?PoLzU^rR;{5f=xyuYBtle|*jBq#hff z^6tAle|_lTeKV(VEGXQuPuORAq2kkl|0^w``gGTSSgRAG7&x=gIk)-u(v@ZNpZ@8LV%c6%RlB^+ z!#&=X-ig=)J8ybES>L1|^j%66-jRD>3L^db9LPBk$qw zOTO!9JzleDZn2WLzt)-KUR_V})Gw5Aue+}^|NZ{P9j7WgNaH-53W{I|K$m_1UI zcjn~40{q{WEnHaj!C`{j{x=>%y+WTfV$S_;m*ee>dcR2|u*IJtINtWx>DCbYCXN{9 zz=X68Pcg>D^Ih#;_5~h1{~N`->#+M>!hC^xb!&5`NuY!{B@mj!OzR)?@ZQz`lH~Cribd;fv9v z`sjf_)(-P~UpTzxHM>^wGEUp^{Q|YQZ%a)dscyR=wsBV6tH+gl)7&S2Z}$AKefI?A zhTkn44$Q9+{+TrY%@d7ht#KvU79x8+ijk{NPK8F|MebI?Y&#N zQx^Q@$kUFKyw`MM*0TCJP0C^abZU3zgx|?d{Iqvfbk}d`v%b!k&!25KT6eN*QeEe+ zgSXF}X$+fid{VmWrdvNHyuN>1bVowKdfGRmHBW-8_PuZpXOBM3rg8MtoTmnpF3PLF z?@(&@T=%e3>94}X&$C|~{{7fYdRN~R`CI&+yDtiUIN}`k_0*K4_^03As?d&beWX!(XK+hkm=-)t|etn<3MI9F`z z>bA~=^9BC{w|3O8x2*s4x8z1mA73u_hTC#RdyIE}zFug-p?vh2_Rrc=@ocPzr+qvj zz*2MLE>m3NG6}!889T%@-JF9emP$@BwTPE_dhq6^H}!uScQO~)wU|EqaOn0ud2jdc zCK+-Y(%)3iDe+dae&a2bHs$_#<_mG{+LA&SQ(i95-V||!WxiD9^2cJ2e zpvN{NE)43*Rt_KKaxo%zR0h??c00 zZWa5zmnR&V^ds%esqeL#+r$rks7ts#&DA-A|7ZEX!%JC>FF!utQ?k>{qF%qcY?Frg z{mYtthYj3*UP-OH&-0y!<*8Og*{^$XcX!^}{X_Z}pTG;b_WP9&r{CDRCa*rzX~#4> z3snILJCA_&#hDQ*d@S=P#QuA}D*MSFj_otP{#UfPp_Bh2_kLkt$9f-62Fd2;Aj9(k z;frK57QDZ-$R&AGW0UIU{Mt1UCj`Bp@0nW0VY+WdMwjPy{o|g3&S_^q8co^xv+u7N z%dIM|)|H2Mcq!|iNjv}2j{XQMC@ip_a7r@T`B%t- zmOA4l`=%}wW?$REB)LrE;mxEw<>C*fURId+Wv`6cj`KXr{q~>ywoKhQ<3h4gr{%h9 zW;sy`=kL9A?%Q$r-BnZl>ig3~9{<}r#ojEzPxOv@#XspM>>&>8vqVmdebevip0?J* zn6+l5-FKJ6$#G9y>fe6b8v1?KhZp*S~ILd;#keGx2I)%^S?PCT(!XkGlYdk0xK&R1D|@>{<6VPLa)&x|zJ=1o0Q ztUAhTd6Sc7F!S+F=euuK zJUZ3A|55+C*qfyhFAj(8ToZrFVBVkl#av~lcst&8Pn&S<(w1}YTJ`UinKXT|dmoat z-=A=iDFIA}s&ac@sVIpVvPyOFpBfRxrhws05wNybv{Uadyc`9XE3?ZRT}w-D+g%NkLWZ+#3WeOBM|w{?**eu#@ZJlx zg6)lw0o&LWe9ZcaJ5y&HKmPb;oA!gB+w&itP(0G#x+k#9D*f~kiDxcyYbvlb7xs@--hGXx@Ia@B5UDSN88uEmP}0`Eu`{1#PGP zi`_aXqWYue@XNUUaSuAyiFvQTI$zI3!n<@$>J~r8zURl}ej8cVe{0$$5_9$O-Pr;E zs{U^?af}zQa9DKzkD%+JUuh=pm29PMnP2o)|L6Ib=pOxGntA$yYsG8+eeg~DmbEXg zkF#{%`bZvM>IBTq7!wpIo}{;o}(* zvh4lF&&4*@h`!^u$zx9rK6p-y@xlc8?67~DJenWA8%8Dvo_t_`u;)jijYnZngMG@X zZ8Hw^-S$6!A>Pb1a&rG9`>3t8sh)irU;7{Bs65*Dlc!nbtk)aod4IOPc>6wL{zmRg z5lSB#C9JCQ#r&UnnhNBzW%}kVj+tLKm5F<^sg6I>UW?U!b7vYV)IN6OH|Ke^X34QR zWqx~iO9sZ=5}DAOHP>^}`_TLMcJVE2`r>=j#`=zS?Wx1%owG~=PE9I0-yHO*;(Zb8 z)^!tuROEMN8kLw&`gZ#1|J$~;{wlGxV%q!q4ho8$^eW!qKmEAg&&uE{0_6$^d4lcB zOI5V`f4|o+F#fXQ|M3sgV@}Wa&l4)>kF9^O$y-HwnP$(iU2_lnedu5dc~kdjOVHuJ zuGY6gWAAD$ndJF5^TA@qs8`xc*__ro$SxMtEB{#Gw0>{SlE?Y0uDZ*9eDJ!pUtdqTbo_bs zuYUVozqRTelWn&ATOYUUvHnG=b?>{s{%KM@KmBRH%FM|ubrW>6q8~6Y2)a7X(&s+K zTEy}G!nAkuKYfhn{G)hl)3aT#udiPEerslJ{QiqU`s=%&b*?(9`{@T)f%D`$JOV0j z1QiavW4rhvphPTYdr^$VuaB+M-?L0`l)myLuDWmT!*5POpXO{zG}9+y6WB4&f1ujwd)TzS7={0n&A+*!TtZi z1rHbcSenlo_AJpiOemV_InGA7!B_0RKzWEy!GGi<5!_To;!;U z-k8U8f?#a z9^|l^UgPNMGHGpl#Z+4HH*w9Bvew9zodpNkFXpYwW4rCq8*$p>?)+shO3kAbTzfye zNZgpOejxeRH056_zIY3NnY(FMXU=UE=TCRDV&86G-+VARDW1J<8pD@qZ#=E1A3l&W zjlrRieTtCgbRh$?_3;-BUhQghpX9aiN&BB}{`#hc4?BB8R-c~ZuwstB`0ecZe$#vA zH*zZeFR#_zc-s5w-Mp1jU)*GWul^e`>DfVtCeEFm;p>hnym)kc8b4<~A8%avzZ(BY z%P-D*4{eZtZu;xoziQj9Q;&QpQq?X=i#hk>{}<`?+5a9CC!|mIJ?SMCoV(gck>PYr zVG6%s*4Ds6BW+2=2V6Q&ZMI3B_mw#Iq0woDX1x>d-`M6bO*^_7P@~fGNO3o7wf*-UH)3U{_c@mSI_*~w_$1aA)SEJ1!bQr_ddz-P`o4kvFP^irw285 z{NM4u|6J5NF|J8lLs$L$^7BXY$rC(R&o{mMZ}+oC>Ef^VdxHK%?RU{Hy1M#gX~@?w zY4JU6$G94-u9~Dbo$~345&gdG2ev~ zZPy>@y}Khnx9507>i5uJ9J_D5tzN&i_@Uj>|FQKfKW+X$vd#LNu-4Wjgv)Y6UViPA z$uF;;|Fpr!^P5r*)4dhyQnP#aTh(!CK1g2W8`k5mz3=tKsP}t~>W=Ku{HXduQ zvD5oy_cFp@+5?mQf18@iL~5tbbG~Qy$wgsd@(J}Te7Cf^4b9k%Jj1t5h*bJ&)V|Bf zbc;%41#*3+aHOjvS&+3@dQv6R)kRkI%{yw~6N^Jli`{;0Ch{tUDc2s*I%b9~`+Wy|ZW*5JA6_?ACt&+A;5Bs0Jw~i}v zx4U+EYf$%sjMFb;{~ofu!SB?pv-mN`(jPBXKm5xUS;DeDRw?q+!gWjU?pI&5C%vtQF5>6PUHy^9cU~MQN6{7@$`Y2WqU6j>|J&7 z!*><|_4QBwmHh2n&;DEd!B*}yDhaNir0vAI_shF;9BpL&aq;`*Ne5eQ|1=Gqu()pS z)F+CuI_cssI7A;`PgR(3=0|C@l+%R}8@0%phs>H~7JLw0%(y{^0Z&j|+>QK4fT^WPM-syDR!hyui(V%q!PnS$Q{E|~KYEb`hkCL<+-ewqB2ie|6jQp##}t58XJdh5@Mw&)d+ zhbH*dBnnBa^p?!(U$E};>i0~cTk6Zjzx8va?lL~C_@{LCcFXRF^JSuHuMOE|h5gyk zcjxB0u%B*RS$b7|q0Q5F?p_~%e@&ds_sPHYdq3CcerP{9qi28YW6zb+3Hx4iR-aDJ z44KO0t=n>i)67EhKy_Rj|C>m!Y2CI9+$VW{loUE~_xPqCb<8)-H@^RU$;{kghs@#I zuf0WUv^QFr94_^-y5JBYF-5@f>76*gD7RBR4|)3L3I~^Je%zEAGQ&t>>G7L2@-55? zUP)gT9=%lBocvGvqKJOF_UhLy&u%9wA3DE;`{?EU_K}lp?Z2}+3Hr0h?4L5>teLFa zBeq|$C$F5Ve3-QOmR?3)#3pAO-d*M5KI%E*|DXQnWZ&k#CT@2s_tj&!`?~UVj9vdH zCVfd-R{G1jb@B#H-TwhkKkoVPSMkNaJ5%54>8`H45wcY2=jZDlSANYoVc^AOkt@8w z?$||xa|?9rGi(K|V$S3%cCIY5`~399$;0BQN*qsC=p4GX?qlnuo+l!4@1K2*tXunk z{-^Jk7H?bNr+!5?i<5WN)J?yxyQ#ka5Gr!>YGcD&bBzrLYz`JRe@F-xll-OMW65`L zeaQBXx7x`^65oV_=^lU=3AP2OF&KYo0pcE`z9pZC-GnD{z_}ez2IPB3L@q=nruR8~BC@n2QDmy~Crh-j{G=IpBqKRsv3EQ#XaoAfTR z`8QXugnH7Px(4qB2Nh;EpO4;CJK^xRb(7Rne(n8m(?fuVFGJKksD&pg{{PXq3g^N~ zBeD0t{a>v4W0zhpXOm+&)mZCj%z0TMsn=8LdE8vsgDfVk<@I$?*Qeh!gdgoW`t7>GUY+A?Hc=5b9G`qIdv%aq`s~x23j%n| zPZ%y#SofT9L(0m9$DS0;U-x05M4VCm*%ussqCYJc{LOw?%PzJkrqnUpc43TJ_~!L< ztR%|frsmk13KfToPM5oPVeW&@uqpeW+t#-!PH?|H`}*#`RVRP0pWOZG%GZaN8UMxC zCgmw@?RfNKeva+z58FzW*cxM^SQ%e8>h5%kn`snj-K=|6R;9-JZohiN5jC@wMsM0B z@)l+W|Nq5%_@$qN)Q^t*pNARy_?y4;36#X;@ozA_)Skco`{e7>GxQ?cQlG{&KUw_k z%%Otm^DCamdx=avVLIhB>-FN7d!y>>IM>~EvRkp9x%YM6yPr0f`VQT z?U&CMEmao%?X2?C$H+$aye)^tq1ThYGBa7dS+6H;OY#C-zI?@dUT4KfbS;#b~_w@x#*&GrsZf z5WbOGqj)TOW(&(fQ;Vkp=8vu}+I4i2=@z%Ql_rrJ*JL&9zL?O}x4X4MaUVmG(O(<0 zMFslP4$6oImhHH*TFdlr(fY`mOl^V*nB`B*>wnX~HfmHy=n zY{8BdOA4!&FAYu2h?Z;06u7e__1JllZs$NLm8Q?->vIe}k~;m3xv$85a9{GwVfQx) zqmAWrfAz>p6z;edytP=7H}&Ygi*oxs|NfYqe6RY)zHjC3=e}-UA9OzbT>Q6>ffp}* zI$oI^c%EVZN`39SMQ#h6jwbd@yZ$8ZcG0Ax7KS%|-@d>8(Uu&m3+)na z3H#4qIVIW7BN)BZz2$50M8EGb`vQg3Te)_Mrz~DA#cg7iJpc8Y9HRYh&rS^u9i0CVwD#{_z^7nl6-X}-0bhPgL`1Y&EZT}xB zN?hv4%iRlV`}bVvaM`}!Od_UA=-2lBldrA%{dSi3w=CNyU;l2{_pYAbWx8+T>JKs2 zpDMiT)UAb|URDtNssH*$8sq)-;ZARAPlhhAZ?Vo&JTAq*>H0>#;D?9ana>pdIa%vJ zOU$ubcYmmE+ADnD@+U`Db(Pxlzfx^RLN#qOKWZLx4Ar*%+ud{b?t9_I+aIfm-Myi1 zJ+U)f*|xs?{ag0VN#WkrXLG*F&p-aZsp)RZhkKE4f3zm)2;IlGz`?l*J z`JdV(C=@04f4Z{!eme{M(&M&=?;K6qnRwvEvgozDE}7lhW2drl_jJAQD}Ee0Vxp5- z{885IQLI(;6$QuLcaJ%o zS^Q7yOPQ^b?&pAycWVCZlZjV!G>RzA6{uzmU3-`?4;%mJi^0CY6C7rh>Bk`9gVCrumeI>t&lxJXtFL#(I{|^x{ep-tb$&`S;5nO_gbT z_gLqfRC=50k^1=}yQcpr5t#sZv;H=64@uTB4vty`767p zyJvQ+-ZsD9;EnsAU2n`XZEN;(8VavCsPyCy_w=u3%Qj=A#btn=>e$2Tn2@$fk-Zx_$Nz!&N`OTYUSYxyPd z_%%WCS>J=MmTdGauU~uT)$#h+)%E|s?pnsbwru^aiB_wx?KA&cCAmI)UHsa*t-r4O ztO;LRxAoTEU;CRks)=S;d=h+h@aLfx9)YQB>hJw*-p@LwGutV4kL1*Q?|DyptSG;6 zi&fi4|IM!VTkKX}J+yWC<=nMpJL|S4@8(}ywrPF%?scl!PyhPXt*Kl8>w0!{!3Gb@ zJ8xY7nO?t z>8n+e%v*Rj$0=nPH)`m`%LN={bZB_keC+Ugr?mVZiARD$4wM935}Ex?H_F@m^w*{i z6CwGBH(x0Kd3W!A0sq!KhC6=GBmGyWAMSYg;k?NH9xLEBYm+=5cLyHVg`5C^%b!!V6OA?yS z{kX(<@Mw=iNY$e3PS?|iLhG*Wv)@{t9kW`@^5eN{;YX?kojTXQ`mp}i#DBhhW!h_& z+11b4^&x+9htKs>MY7X3d+z-5TmNU^O`l1zk7xL{s@rVS7VWwdrkx@CsU_lQS({;N zkz1#pH=D*wj)Lc`Y#K*)R6I=9No^L9cvBNS-+S%j)fG)03CkwPSKhQZrTzZEZ^o9R9J_)Z`XkXFj zXw2G{+LEq+@5P)6;jw?26xXs&eY>ViKpCaJ}K}g6vnKcR#&$ z`H{Rof6mK8M>T)H(*JS$>#D7pdihcN!{54!RvlFpzIt)K$oek^Dcg9mSa<>wEfz+e z{8Knf@aSu|dnFyQF>Ez^eBC%c+$&+~G+rDcQN?;tpSSYT_0LkzPZg!yUobbu*K5u6 z-nhS2)$60)JwE!aH~ix7(5Ff)mj7>aoH^Rx+x9fs^ZJq2oeO7e3BMfsYvbMEy0xM& zHrA~?{9wVHz3eNTj%~Tz*4Zv~ZRNCS@&0`}#!LEF3VIkwzPr96O2WaMDNoAIwl{BY z*o*!1Bm_GOq})!+%wx_!WF)Y4di25F{nq-oA{ROzHmYpTWqxBmtzSZ#P3PYZsqJUD zc7ADazdc`g)uCUvW-YlP5?mnrXThFJ=L$MFdH*lcn}5w^zs=S{>x|%kJEFhyNb9y& zJxXV&F8>wBv9C;j`rR*$^(K>i8NZxeDdw6JwnE?*mkita$E&SBA6EZkYg-oQvhZs1 ztgQcCTeLXs#W_W6Gw00|%Y9{MvPM#-w){#^&HFN*o!4Hc)>q9^>Rr8io&oEJkH(Y5 za=gSMFHi40o#)tiNPKx2pO8h~za^KvCj8s$BvfX&?3A5ryxzN)ywf>P2GmPGFq_0G z7d5jdcR`|7teIDJ&f#r)vTo10b-+>a>#@3py?Zv*T-Uo{9@ixEzaUbN??+Sfn=Qdo z1zM9PzAv1*ucNka&)S+Frn38{uuPs5_inbx=0GC{%fk#4qMts`O$cmd?-tp!GxHDU zQyytOj)Gqs84{AF99SwR@P3_aU`^)f)`QIL%&o_mycAg~4?9X*{je_){bJt1yD0v} zWp&-Uhi`iqUfNuc^q|Ggw(^p|+{#Nn$F;-GYuWErfx%d@`Bk-QvoS(ti!@@k-7|gLp0kr0lY% zUwNip4qf56c-f=+R~)-_Y^tsv41M?bN!FQtA7|e>{p+5*+Jo8E{r6u8A2grdso1an zW3h`bd+p=2>FeX0yiYBjzCwBGqz7d)B0l`tvqa#?^J_ms7Iw~@x@(pr!~JUZnn+8b z<3DS|3*K(s`aP`3-EZw;q3AagGTo!DPqU9(_$Z*}OS#Xv&M8McS5EB^SvKp;f7S0H zK92PtK4b`YK0Utj)9DXu45Sh>KBpe!%ez*;hD)*YlzZOC4u=AHy944p`4J8_-0iKb zUFS-EczwJh^1)-P&4I72DlH{bF4R5pIm`7SAfL7Qn_EYOy!;XwJ9=9zV7#hr$4!m=+uY`o0@nEZDoC>oBhE(QnI{`Q}0h?eLTzN zP>1$s&3c7v?$n67X8G=0W#aM8er9vvD#Q1m=gafo*PB@BmiqONc9=`^>!zJ16X&k) zIkq5l{|;5PoeR_Fu30m6_vfiK6*)dX?X&yV%TDLMFC374{O-j%wRrXNpFZ#2zuGYS zt5iViIZcj<=0TB*+$?XuWO>};(kA-K_0XReI(w3|)7zHyF#g&9m;HgyZ|1-04i9bL z%{_hTd;iIg8?&DHxn0<{m7}Dx$~nDn`h)oyjZ>9=Q{lD9| z{~cu5zVU~O*sn~-Je$yW{_CVyDla|#Pw?8;xc48Nw|$Jtzx&Ry|K6UbF&(}8_E<_w zK5n?w{Y~&)oqF%qigIVOyI)?NKDqk+v<>fa^n}A7D9(T6e#x8thR<8AYv+W7I6Ajn z?(aR)+$W!7nmt)*C+Cb8bC*PY(!X-*DC3kg!ABp3ICx~Q)VM27J8>=cf%*DHhq$&^VAl||JrnzeTPw{ z2wSz!pY`GL&6|w)YbJ~K&dfgH`JiOh*8V&5wuT4(GY`l<|4VrN#nt-n|Ng9>=W^}a z{vRn*Bo8qh+aG-RhnkK+t#b30yRY?StRsW}L@6-Pe1ujmPaXk3NrAc}H z^g}IwGXGxE-M#l-oK?>6e@pi@l)O4!*z^CxgDK}Yq*NcOHP(OVZ`;*z;jzN0khR$h zrP}O&)Ld$NARIJNjz4R|{G!uRTTWh|ACPvt%fw)MPY#FuAAza&yFdInR?g$OO=!_^ z{pH>pjpYvVihek>W$W(9*zYe+2d%E2V{G=-D|bunlfU0j8ZdY$ve?UCtJ(kM;h$=q z(>>A0kFb56bNO(|sdY_1)b?0(FU)sdDNbw|<%d7GwzDsIYjXUL&~-ICJMP^* zzxT)3^WOU!7VP}bBVj_T^sP7gVi~nu>$#pTKV-%#A?OfuW6F)+zeQhisB#!G39=ti zpLcUfMd+MFH{MTId*vDLD^84<-Oh4BSLB8E0j~!3r#^||m=gYOG2P>4EipR~KYB zh-I~G+)%;S;=;AcBE{oOylsT=KklvRS2iSnoo=aXym|FOu@r88zS17^AKDA23Pw!1 z>==9N^+y+D!Bg1+_0PrrNA8N3R%GL`IV*ax=j)kk^&1aP^2!uByfm!Sc3Sac7PeoEN%Mms3`KP_j5W z;qLkJr{W^p%2)H={WWXH_oO>2^_PF$-LS0A`_M1(&AU0iZdluP=H|u(MSaf$e#R5_ zCuIM%*z<9nwNzlC8MpDDJu!P659A-Tm{XW#ePmnr%D`z}seh_3UMm!APD$WBc12j? zSkTFzfq&+d`(1dZ`aDyzD*I2e%)XL$a_ycCfAh5&P1-n*sz1%Go!`G?-_acIuyg-^ zxK_BYFFjm&qQ&2}u65qb(g}exUapr3aQY$jDC@*zKCNqM5{2!TAFhiE{?G5U`c+<- z?SH|!zwgH_-~N2%M2Bx2=9=thXDV)fIAhwMSGzluPj$Vow6~4fy?3pR!`yr4e#g3Y z%(ju*FUh@gb?~uiiu+F`pZY%ee=wu`?-L6D4V%l>I9=xcuMsxWSY*#@rmk!Gu8+GF ze>QRbT`AP3Uv9tO!;wesD&LW9j8TU_w;f)8l9$XPHon=;EGpMJ zfBdXH_AHTM$(27l_&W8ok1S`p=oj_E#6ZZ6)N zKmFOCC9Imp0p))t&F0KnRyV0&-Hg0S`_rq8b&u^o`Ec+1NzS?{KXoPsb_l;vc>L{v z{lAHR7k_4Fco*9ieSWe}uyV(q+DTmN*gb#l?A2}EzFfe?-DHN}-Zv55o@?K{skiv! zyrKNb)Op{kZX9t~uu(?mDp}*T288ZO#0pUF%N&{{Q04AEA_|`^{hUX?|T+4ttV=9 zr?Yz>xH8xCxPrZ4`EBXQGYX7KpyQ9SA`TX*oUYz!KDDld;a2G0eU9EyvzyCjf6m$Z zd5Y!vbku66n7_ZvZ2S^H(VLq zDsCRG4NOVgtKD|YIF#qHr2K!&zxk`yul}?7{{Nupo;S}6ew|xAFHLoE>EEkG*MFD) zI#VOlc4wBy{)%T7tWLu7wB|YbToin$R>QEAY4UrvkN{&jRfEEtSDrgw$zWf8o4LGA zYIm``^b3!xw`Aw{GTqqK@AN15e1u`dT#G*$3d%hX#XH|D@>1t`z^E|u?<@(OkV8e9 z=VIf&&AY`IaIUB*?J3)I-HWBA*?V=%-^@y=VP7v6`}3rH_42OwzJVX>PXB#k_Wx5O zPnzJJP1l0HFtqvxxm{KI*pjk`~u+-*(~{4=bN9pd;PBBbnLJaM7j%qbc-oSvUZ z3H>lbXO?DQ;)YLkJ-qf+a@$(^-B-xI=VNc(#$1tEc1dii-`y=sqo-h{5(6Mxs1{KdibkZ{oa*!|4-J~pUQl*PtP#+TT`jYVzwzy5BQ0< ziEmDF=)Tc?H^b^Pn;ZM#f_Rxj@A(*0UcG$7apqrX>HqD$YuCNl5?C~yb*1L*#LPFl z-p$LbtF7l!fA{j^Q(wQ7?Xl)%eOJ#_xn=*o@YyQmyVKeHMlOpvmU8n8s?R7?zj;%f zap1~jj#KV(wmW^}#`3>~;$ZNVIu~$}nnNnV_N~)Y{UNup%HwgqxMg z$zZ_=4L2^<7WJUY?sN`2qI>j%x(Z9aJZXt+b>0;kIL<(SRDbSEf?(d7MpFO#TKcT)VOuQ{RoLN9vJf(MK$A``TP zgjkvmI&Pe!BO=Jk;^<@$pd}*Aa!9Rs?LNEPSJPz=d2N_nEEv2h){kAe;Pk(z^V=SN z>{C9gGC7oQ`QPf2vjHbh->S-;qWUj5@VVYi#~;P>u2;Lwc1`^IPW-#J)%#`x2l?OI zPIB{n-T&ckS;^Oypv2D)cIj)@2A})Lx6U{F%9IUP-Xu5uxvo9=@2mbFdtyI5i4Bh5 z-d>(3H6`qH_s5(~pZvSlX@9ttv06p;=vr2eOeQ)0_uC}iw@ut|pl#-n2Zq|uPwu#< zT<9a>;hk3fARO-yt(GD7yVyTS7A~Ze=GMNr|#|h|0}qz zus@8f?M*%QFXL`~++uYn>r?JqkL|wy+{|UN=DyVIb%DJl)k#UY$-jSBN=|6mC~;MN z&qbv+Io1D{JF=raf_o!#m#@8ea)rO((+?*e z{Cu|8^sCx-(GM~w*KE9c*Dg3;uSaX2z^3Ji51HP*s(&B<^47bZ;Ya6di#)nqpHlG3 z_}c$~FZ1uuIQqKkb5z>PIqs0EA8$7JN^FpM)CFE&4sm+XI{szt*!qko*nyd)9R>KAur}Fd;IEoQEXrlUwrJp zb)l=v@7`zJW4?gJ@?e1~4@a1Qys6eF#-(#Mg!jvOT;7mn&J-!Gu)ZbP%_u`_v7b$b zsJ>Y|6UR*Ewj6=N=&5cieV#{JKC8C)9uh0n=rcWP;uc{e)6ZL$`$gn^zO(-0ynikG zEOX26XWd^JdVkl~`1&>RtIOloOuD{%_4}>K5d|6(V)`>L3q2}}{*x_ZBO>9xXG?+h z$9LuRJev-jZ9bEyKSf0&U*^+VqtxlLOBmReo-tsMx^!KJfn!p`0|u8V<YEp3(ld z+PTK0JGRC5aVa&Jww!#3l4v#oy{#=18B-q^YN zvy+*?gcJ@fr^5$&71r9xWq90QRGqoGv5NcD)@>ULR&Y32NzY+O>Z#Q5>IBo30q+J>$$Z{ZnlX_m7&KnjD+QD;xRzY7UKJzoYR&#v zk6j$~?mjc*QgpW!iF+aPueHQ?UGC+(HoG?$#jE^TTNk_B$kj>WN+iGN4HvFmF7k$U z@dxIx205s0mGoZmSt>xccK#3f=KC9jZ=PO%Vg1Eb(~s0Un(uMj8ymK^{MMG(Ys+R$ z6Y|pjv}wZg$7<5kFSQf}n_Sz=-ub|HZc*CB(+}K#JH6WO_AdMK+tPBuil*5!CC$CUfRaEXjwy7lQJ9R94(sjkwUlzV;_q~*(VD`)C8Ux;p@-iQ#7u{ZQNs2AE=RDy}rC|uf@91MmKr>rsh?bLOnJ1 zNO$vb#W2q}Gu7xp>-s#7@ACdF+cnp?|1WQ1dOT%!cmSX7S<`P4iJJsO**7?b3wR2f zvle9TJP!|DesSXzIS>0ixrDM++}m(%(&(I z$Fk8t{t(-}5OZ}0v-WNJ=4bMAlN5k>BZ5OHROj*Dw8cKa9bu73XBK18IYqhXt_;k~>5 zhJVs`ADrONYRJIg%p}3Iv*S=KThqZKCo)%OzdX&t&bop5^PDJsyBvu^an8TO`#DY} zU-dD%YNh>0-9N?i)5hB8>-#3%+p3>@>V0bUS@nOMoSn}i>-Gy8b{xC#_22rF@8df* zCrmImi9CG!;mf=g##L;E%M31wdr!I)pV8vKUy19Sm~gDujMby(fD1w?rB-m z|2v=MWwuLYog^jOqqVl;MMAVz(N>W!I#Jo_H-Z*R^{slb z?`jHf?THK4&NyhGVEex3o^|)_{LN-er%bubXBoKMPbf2szpsCN>DTwycU}*&(o$&( z+xvOph497uU*)XVDcQ6jLs+&#ao>-bcJ*f+Hhew!C1yX%vRU#hzT!_O)ouM&q`#;C zPurcJ>X&@Ix@3j*-ghnAs2KQ2?C|W>33t>VOtn~QcUXG2?%l}T|9>)1oo$qHJe>Ku z(eB-H?&C)f_0+^K@{}}x&{V9)@yC8*UD>rg_qa-YP5xNVuD&wqgx40SE*mdr@2CGy zb4l*qsW5%jgURpjOPM8o!@dzFz&V*=jnaT=6*rtcCwL* z_33B+$j1F^v=K~KRq^+iF}cIM|AxV;&yzY;1(P2($Qf(i3u^kjC&R4%*rcaBN*-_S zdTL`J-`i!ih3Bc?g9g9p|2PyY<~AQ-^87G=qP=Lxs}!3(eIJgwakGX_e0F8)pM8bx zA}e_{_~gv>jvOicY#zLH$J1|Z4O)-7nmJMq-2AXSmWPG^LZHJJhE01ISlOj|ST-LK zKbhEa;9he$$6U!4IhKuE)@42pxc>C(R7K^I5DA5(wT~Uym@6LDzrJg}drO1U68^30 z+U>s|kCHqsW|{qL$)2uh{NnBH28MUv3bAiZX4GEUnVh4!UENE@-}Xb&N7Iaj;jRMa zN0j+1pp2 z3Y_;R^6j+0e^9P{)xxeeMO6{w<@$eSSd$xwf(_>Jm$8MWV2VmNF6>A|Jf`b$bG1qDawDlvm(!$Pq(m_|8>y9&8sGR8=F}Fy11tLz#Wy{JJWL| z6x9;v%ulh}k@?=kcG-(HK~vRv&y~x~7qH@rUb<&pcRWLxQQOHK3fy;}ORI%4aNM`3 zov84mzfu43R^umb9VX|l>f~EST)Wz_1w|IQKa+}Ruok#{DTBF$Z~G>W zq}vvc1e?A;_q&Ti_tnJ6t_z2l461_Alq`J5B{q4J5{O6Ao;#sCY6A-_9_bpraCr=L1zg#a) zIkuRekF;<(H)rjdl0%xy9<fe z?XsFyG0&PsX5Q(C1`QvNHmv3CIrjH-UY1jhsQ2>oM_ZVtm$m(Q7cF|Pf|=vWwl!}! z#Mk``ns;05$F9ratM|{O?&tI zJ^N!X%zE|q_VoL!SbzJ~tzQ3l&OxbrhebEO`}%jr$9adt_bTvBaFqR`y2bA}KW|*q zwV}k84^}IDhTQ`1>6_ z`)>KaIdHeOzBS|7tf}UObA1`tq<%ZSH|xPBVbjQ+J*D3dhl|ge|K3K_A^S8xmqq1! zUCI06E$0O9zI*rX-Pcxy4)@N)KHI}pZw!~LS6CP8ZMHQ`vgy-)qgi$DoNgOV&%ZiN zq4m*6tHPXr{#R6Hvbc%H&UnAe>t@NjTf0v`Gxc(65WSz6O z6^G=6-k+R^BOYq7VZavuKP^zOaS66ubN;)&;XVSe*P$Nnz^EmQEP`|!Nh;Fw>#e4?V@1tM!MaH_mAtrm(G88 zZtLBtTJ`?3k#LZ|H@8i0TgalbvEOVI1e9TOUSGVieeH~fNGc|X%y1qT;&wXq6D&gdBZ!`J?-b6p;d2cQ8!=h+o zOXIQT`~MX$t>2OQGyQYRY2I5$1vc(sJ>PfDXN^0X)sM}NwpUg}NuOG?!~5ou4yD8i zih`|udlfd2{jp^3R`}NZWlfTv-oGcZ`!x+NOAT?WoO?QFIJic5Nvm-4R ztPg|-C6uh=+jCnutM4ze z_Z6(Yx-Ry~=l}2bK3Ulo_w`Voqea|nACh0$y!-=J-zHeX84Bo$EW`+T;{`W(Af@kFtrX}3DqH(f; z$J9nysEdvLfB)U(58j79n-dsY8r1MHHli$=L1X>VV$QGc`Lh(S)v$=Q>FDMB53s*y zk#J;j#MgwPC0e_fy*RyFBL1zcDbjm?{)fMSifDjfoY;z%_0BiM9VF&2v0z!dg#U!{ z39G*mtN%vt`ug&1_Sd2#MS=3`JtuKlJgGW#UUehOw{=t2vV;hk{khq`{8QbIn1ziO zSN279{eRlV<5t)yaeUom3)4-t?W_K%t3F=(-LrV*tFL*PRgU);Y0UasFkQ1t;^Wj) z_uK6fkDYBTl)snN`@u{xls#~5eg*I#A}9(yyi+4v&2ndv(!W`$LCphlye09HOe@bneYBlo%$7LTM`>fTn zjZ^36brPR-ApFARx%<)=tT8ZESGZdC|Hm3XQ``R=H%r&AWsc^nzWd!UC#U70TT57e z$(f$t5AS|8I?0sv-=SRe>eia=&(+gxtZsSOD$EhnnwXlcSe^B3Luyw(SHGQm?vG=t z%#TBQUo2g`X0fN>?^kUyHfKj44I4!$En z>i2hwx_ErzWG}K4IPrWhhwZ}C7xSK|WglEEeSG$tCpRQH{A-^mJfE5LdR1D_rK-4f z8t+To1Fs)Tgi`mc$!vLo^PjM+5!h;B%lG~N<8*NiZMj^BjoPcEUYIEe%|33o z=e(U=&IJLMuO>hBO?DQ@zb*QF|9-{I4|A8Pgg*Yi;`R~GTGd*ScX}KD%bs;*{r2~Y z=#8|+#tZ(GwlmI_KKpV0-_Y~TC+Znv0uO46@$A3fdnIoZ2j7}WnNHGsw^oxEKa#awwN<#9v)-SW;ew{`~q zDEhwsMZr1C$yYzc2!!hV{n>1g%)wjX@%H$Wk43NfZ)i9j&{q%Gci=WV$4&!|xa*r+ zJ~h6WV4BRCa4xX-n2V{M=sU^KtYs#f-m}er@?GymHctX~tg4i`(vio0v%{>P1nqEs z)pg;+wLOX1=aqOojGAg&=PbV$*gf%1?Yzqh>*s#BRXhLK6s!4UiIYo z^!HWVldQEG@A;hiBKG5VysYoWV-r#Z&CfqwU2&jl#`VvRv)RHLzg@5Re70>@`n^9+ zT!+~7KA!GzzY)Kql{rT1?IorAr7xz}{EOdzc;^cDS5l{QHrUIwh?)x=z0_#pESKTx zdqbrvK3(QVZ1wZMtEYee@Qs;y>#pYx(LJ)q6jGn%u}osyx~z6J^Yw%II+qMygzhs7 z?da9#oS5)1{3ibi)#WQ?>w_;lSl;)WW_N~9d)>MJtreD?%%8ZD8@|3=c>c7(&WArN zQolS%`~F>S!>JALuXcVY68>AX`s0h%;`>K7bhO64FTY+_`s1FUe1o%=&kYHQ6+8ip zl$lxfH<(ynsjq*2_7vZy1xpsb>h!;6YA2tN+frvH_`rS|hw>z$**C%)lA{7@vp?Ng zQ{$A!!xz2TP1$$0vB3V976zK1RkQq$O%rTlYn5!Dz!C1?!nUqrx>WK~*GZ}0F5Y|7 z+wn~MQkzicf&Xh0BmCdpwF&eLZ2qu?|B-5rxqiv2Yvv2`Ef-$69edCJsgAndqPT}Y zew=*w`*d>KMlGwB*mwVS9iAKVXyMZI+C{&=mH6EkbXmg}yY+)lSjL7=_1o`${3kPQ zHmk(HfP;4TpKmxGuY1R|;oxoUxlbPUMKP*>{#N`&>B$s(ez|F_^Y*e^RJM;hf#ePj7TN8DGc#f8p!41G57>Z|=@pW8-!9P`0sv-e!czwZJ&(Vnx%XbI8L9$CrFoBnK>tqm(U%QytzUJo}B#gQliem^E=;uF&6te z>0jHao6Nh5ZkMl5mGaOyeqryTSH&iRF4~4Q{z*UN+Gh9%xPO-OjkuNkq(5*`-r=n0 z$G+W?4V3)%IO?InBukSGI+n|0s^{KfnS1${_uJER4trK9z2%>0p)7lPMp4Z6P)SP- zm4hZGY1huXsjqwFX;r$D%WQS|bLY0hA!aJS;+p(d+Rh1&=ChxqAAi4@egBQ}iyt?J zIo+K1IsB9K(uW-%0xr&3eB{urBCYf*lb`>Od-y-JNj#coioC+EkEgn}bW~*6PZ4T} zz8?Ma+Vtx}GnSk%j5cYU{b-wC?W-sj9w{H$wmZLT&xP*w*Ztmiv^GZlqix3C&=lpudw<=jpHS{L>dVgfGf)@|cj|bK;|V;o*bq&G~0KH}mJaZgjD=$(CQl)U;Zh zc`bXxHtng#UsIcEvUMG%ZcO`Jn=P@wc+s|RYXk(QdCM7;=@dAopI)yKu^{t&i{7n+ zbJhy{JG>|CSX-LGrsHjgr$6wS&35>O+otLbuDhCUO*tMl<#_ed(~kwidv(e^J{^=O z7muiaxW&KQk@L^N&yJ^EWTkC*m)4{u{dPFKX6iSW>Gkh?_J7#

    a)C59@)6yE|+X zvl}%)47lT;m0zw7eD{7@BdnAd1d>b z`Q=|Yc#>}%n0#RUcCNW$Ke=R=pSM{r9rxQL=8nhg#I~TBUCR$W=j~j@UbT9DrSFwb zjc={~Gc$+Wp8vPbpGj!tv9lHJy!^JaTjV)wKi&Qowa2&6&^-5q)GX#nZx$u2v3*~k zVk!Ts+GW1?lzHz%;+9yoY^{FpKJWi_y{B*H>7O|E{OGOBP|Xh?PWSD*@T6sZhqC(G zn#M=r9NgPAc#B{iy)930X#nar*jy zflX`3v<(khqS9i1@c*5V6FL7;*Xg-^Jl69I9@MOl{K0-UuXD|@_g%N-=C?ikzEif{ zaT2@oIk%VzU!~m_{&~Ut=x%mGq^tMK)w$=J6c;>ucQpFK*%k3JrdrcZNQE7}`dh(t zp=n6b?Ec_?>BbxN`!DI9_;OP2)2~ohgR0Foisf$m^7(i+8o#bTsAzIz?c%m+x&Nbs zdUyR%H9qL5{l7Au?|I4U?-RGJ=}z@rmF=@Tdsc0xMtunDomn@Ee{-LE7OQ^d+3u5} zQXdyTDB%)0u=+~magVTrU8YxTtxlaS%{J$ny?f`M?h~udrPa)uuf8mL<@(K&Ute4y z5TV0UIj_^lz%9yR=cO};1sCuNV=gsw4U>GL-ISx~j*@8Rc~Ys{Ng zh<(qfdXXo7t}FJxyR4b_r&|?=e0)nT@*R4;P^WIW^pPb(kCesg7C-&@T=lryk$u+Q zQIA_WmHRbJcys!;P1cIt)U<7CyVXC3L$=P``mee*Zf{$xZt$&QU*4m|mziRI*S7rP z{byOYbLFp#hEuh*JGFe?e+~1laaz1jw6#tB6Vs+s87+olQ(fZI*k$~Hvt+=G(leF`Z7;aT6Ce`%-Qf6XMt`pD7uPHEthxo^MMZ2E7_ z$;rv4vMKNEayz|R&H3w%#3Cz_>i6EOUd|j)5S!Pyd`WV6;{nOiS6_8QI8LYijGZi( z^?mREmD5EO7TSC=ylDUNT>T&OFYQU(Sx0+1Wb0p8v*e1;{`M)&x%s5oepZ$^i7f;=_i*D-IM7`&Wvs!ia=}rG0Hd%wY?YB>Uzp_o# z=6k;E`GY@$;y3Sb^lK6oi`P}zwn4Od{>I=-bB?k|p3seaUZ(AoBv{#T>+j;+OE&_J z)~u8ByfE>+gLTx|UoCqh=T3Ief8H1tahL5`_|{sPlW#jWU(j6qBFJ5Q&H1%$`oICR6`+ouL(eF>7u8K}`e0U6UWQc7Y=|MoJ4jq9$6-uGSb+O*cPw{hOG!)3dKx$@rcPQGV1!|J5@?YH5JXV%SV znUi?$dS{|LOT()J<)?g(Iqi6z-MB?dZswjXbpGMno_&vdz4O1Vb-aDm zV9BygZL=hNem;&bnPT;nrSzH3n)B1%?cDP(?$yQDPZawb0>7{S%j&tYbK3lvFOwFn ze|Y*sW&I_klfR!lnDKnQ?38?`32*GUpS`?rJ9Bj>8_%qeyfvZuv2yz?+8& z&r@`zWbN6-3G0_|ah^GP`{oxZy={KG{PuqlReSl}UHtg5J7(6CF9hB*X4x{yE_;o_ znX~e1=DYWqwmgjbWEgK!UHIc%j z>b%wMCe1o)LD5T6+ zdlXdS+l&sct-tn;ySm=Y-RRHDRwso6&Hu9XJHMzGz57?NeE)|N7cUh?r(N9>cgkS% zrj4~BYt%CK=0#@qnpATLMO<0K7qaHgpT^WBN;wvjm5+vB(yrUJd(z9wx)oD@Z=TOT z>31kcA|9J{>Y}u91NZXrBe-rwg?Z15E#;ZH;zFD{X z?xP=zx0$+i@$KH=6|~`d(agQIO%m*d&PfJ0{#$1zO3c((U$vj(b-ezr|B4J7PUgK$ z&gsiPZd5bH_s8B#X`e5rRg_&0ocHPR4Fi!+@vmYROv;MAXOP+6)LAm?Tl;RQ0}FIE zINtWn+3~Q;g=!Ph+p1AN;od z+Hd1myVUQuyY%e+=J#{f+t}}FqGdlA7&tdL&NAfjVy$0jS+;KW^`)o(@0X1G|NsB{ zd2f^cR=3vd|NqkZZ~XhO+52Wo{;H3A=eyuO#~-#Q3x8~wkth85VCww`^AG+u4F1@; z@ARXb)yXy7_kPLkK6fF+%cAylLH6`3+Z5P4b!`g&IQ%*<*K#{w`1Mr5EmO1Rg~>lX zRuF%jrDV^3>9-7lkHSJdIqt3~)7CNmxncT^)Dn&P*YvJF*}6YFsA{9=UcILJ4UW2v zJ6|vwYyY?Se)jk6H72ib7VP={mwoALhJaj=`}$|6>9PFv6&90Wk*@o3`bWFezWv8J z`OdyGKi^m0qW3@f2>ba-m4a=nq}J44S-4u~rR0?DyY|;iF$>+sl=bo5qtD6A_nqwj z{uX-aFum^Af<5jQz0=%|NUKM#D`-%&c(QeA6HlAQ)rWW9ZaVJP=W#VTpzPV@SsH#8 zt3PgdHrqMk7O%t>zll+t@|)`~Ip2_vQMTQ^Y>RH?enXonk2n8QdL|O;u(EJrQ@rUD zh5K#YdqeBDb1Egr99$Km{%g1ABZc4QpA09g4Ac`++m(|1qm$#J%K!B`=5ORasCF`3 zma}!>sJ?$)VfF3P;taxux_>^GoZWKz>h8GJ!OOF6yr282W~=V6J<~(IkKA`$y?>c} zTjclCPyb0N@3!jo&Rn-P=?|ZW_U`g`e@?Py{7QB+@lE>>aZx$jmGueh((S&_;@Sd^ zJ~e#!_)fI-ciHdt(z(a=FYRitI^womv~I=td^jS2hXQu{nt^8T^8Oan$?n4e%iOZ>y)32a@PBNf0nb_{}-D#erRa* z*|+xB_Sbi|zrLbb{UYq$@w(M9OLu*(d)*Uu`uBsWf0Q_m1uO|Tv;XN1RqMUCYx@^G z{gD>!eYSaZMSH#bik1snEBNP|c+C9u+g&&O*4NNqU#8!Ujg1Y~^?ZHxwP=uj>i>Ts zeh;Ez_KQsPn4r}zXY~DoWSPK1=6QeahPoAPfBKO%{_3QLklsBjo=a922?Vd-nJhmc z#)bKXAM?%`HUbr8|FYCNO?RpNTmCw%<7w}NDdC?t9G|v}O+xtf{j*Dq_iJCz(rG?> zx6f(O zhL!rye#}h0AYi@l*IR?i#A&}LR-G(wD=v;-w72SD-1;919}gXyt+t`PAfe#f9kb)T z?*vMCEY2JKUnkwy)^x>AYHR`&P+=dt{rCW&O-Pj2yY{U6IRq+B20diD3CL8MgL zCEtlJ<*I^}%m078>}%cgKxZ2cgZj2p!9fyB*-wA! z2`K*(yS(mUC8z$Z#;tMu$GQy<96BEET4RGKi6!i{YP-p ztxta=_DB8OTl73w<@=r+lSJRYWO6cmHo5EeAMNYy)8cREix+IV7-Sf*>ePwoqzNTI z&h?}#e1GvPIq~paW5xW!b3o@1i_@8^NPzCW0C zRi$Xg-lISMSz0azRx$1`Wn%)IBvSz+~|`JIdQ zExrA=^`dwGtyfLk`r?oKTKFP^R|GK{I1`=9s9O}E`U@BeQSl9x}~V-dbQ$iO1=-yV?? zCTYc-Bbo8n<5#Tu8}T>dZ`C^gxP7W+KW3``xOH2feyx1$V*m5~FCm`Hr2Pf1X6Xy(|5lf;)KBf_WP1K#>aJ4v zKmUGQIG9yip)!sC(Pg{MXQRGLi$0h&?fbs!uc!8Wd${|1_rI+-RCMQD>i)1(-&^w8 z()crrECPgY8uljsE&Xlx)NGGQUWQ&=|36NCDcO4a2koZiTJL4w3TfJk?T=b-7oBzg zk8$kP9q;$ot^3nb{%`%yuPrjxQ_g((Tlu4J>)O{Pd0#v! zkKV27H_!I>rSJ>M@-f@a$7?FT|M2y2N3ZH3H(`TMGH#!??dLVs|9mk^<5Q2#syuda zy^EKtzTSPcE`0U%rqhexP5Jr$=ZD8{9{t;+ysT%9kzEA4g@cs*`LGib%9ZQc>wm<{ ziN;S_-QxFcHTORG`zs7S_x2uq_utTO;r<_4O=>~$%WnM<%4;~8^GB>w)%W!7eW4F` zTe(hcf6yH~annza-+#<>@7GUt{m){PBELk-)J4HX!w|}_nYWCO7EAHNZ zeb4L4y<*otr?(s1^Zc9<^5*-2_+NVF&n_?jeEpn3@SMK?yzjn#k~~|_vRnJ5)P%Ys zjVrqs%*xsATcKk2zWu?j_qQ?%%=W*we|T7-{lwhpZ6*6Z_|B=UzLfkWvaaLa>S_Dc zmhNA5s&cn=|b_6hb10){r@ey-}A+; z-4pizYWf%O=Z(FanY|ql2ZM^X; zS3V0?-`D^CXtl1^{)2H_vpx4FU5`+>_lwQv~__KdWgx#In|M~LXPv$9# zj{hsHnJatz>gsE|qjo=cJ=OXC|A+aL*2ZnDUOg#q>+86lC)PuqTVP^pe=jV`XYD|GxL#^&av6|5jhE+8eWX>ASTCoslP=vnPAlPk#RNPhpLc{b3gI$J0#TpKo2z zyjXYj0;O3!UG4lwjb6Om_P=#i(aYcOzr-DiKKt_5p{1+ip8veQ{+iyjzx8z+>-O7+ z+sD1%TGaV^y<>G^g+t=I^Xv< z*@t_tAyeC{)~gHqOjhat-DWLR!*@Hp|2wNuRkljZp6~|?RLrK#FPIqYF36_v*>C>e z+8t_J%*zb?Gf%Ic{OHiMH3AB~hp$#O2c7!#qe4Y#hxSjsCjQ_EURDlUvF^SH%MPg= zSLlhl9NWLv%%tphb@s>29cIFF3+L7EEI$wuuyBG<)0$~V`fpo2zEWFPq%?Kj3DdX# zf9-m|B{E=Xan#k30G6`-{1IFIkG*3XUu(i+BCE)1Gyg`Q5#HotujKt^ImB zbN^pG`j@w#|B>9e_p;ufo_y$Y%X&4d!0jQ+uNz*9eTuc!)0a$peO+_MPOW)c*X2*G zo?5k=rD=U_9XI#f>U&4kcYp1v>bN##ukI~<&!}Hn#T|-aM-saZRX4O*P@>O`=fcbCnQYgjkSly z!u8kAJYWA>ee3B*S`%YKCjI(;>7n`BnoB=lx;sh4OuL)kUbEvYZ;aw2)|j(GFRS(c zaTd&65IS#@n$C?cU(a9v(mZ?m!J6EU`+l!XIOK7x{=-zq(qGJWQD3i2EG>@unzb(W zi~p;;!nd}+Ze3fP{4ZJY=k>#}!h4e)F04}cf6#8~x9*ew<=t~`d{0eHmA@AIPjcSP z?3XKA|FcA%l*rTiqrN0b)soTQsnveodc(pO{XbT-bXYB$Uzhbq;O6I(*1wL%e(7GV z|9|xf>+QN#Uw?gnb#DJb#n8I%EHgJoa7gTDNeN>)_@4iKyfo|4Wz{YA+{X?(@3U|W zk!-%awXN`B!)qS@lnagjAKrNIDum(g!G>GBLOOfSCax@y(XD^Xd~?OV9d%WIMe0f` zP15xLSULm-hQBkGe6#u3@f#|Sn`KMmxEDJuzolanzq9@ET5gYPqTlb{*1A4q`zhA#I`ftI{?5nS?ukN1p`lisGdD@vp9D&>IBAISXJpFISrL~(E zq+Dv-=)_p^{>BUCuqLMEdKTd_I*b1|?OwF_-eL{*cnL|>_QS62(VGuioT&1j{QsFs z<%AFC#ZT96y!-R@lHixADd%tUK_4RsQw^Cs)Re(zwZ|6t0_Klis*U4QlJV@g#<`JB)S?Uy@WI#fJ3{Dp5fZ+hVR3(^Al z`hWCyUD|Rq=rarB|Ib&ZwZ{M6Gym(=>w@Ql!k_LuVSoMZ_0GF?6VpF9-%ySZU$|u7i{7e;tk65xzKEeb)b9R<3cc?oa4*@(}%d@+FDxv`uam>Yk5tUmF802uXKNeFWUHXeQw>x zx^?mM^`FH|)}PW>#Qi#8x+d#gWx@Mv*6d%tlY7-)!BdZ3U(AZx612R|XI}56%D-|Z zX7m5;tP|2VUAq5BkVI8=L0)*=uJ@;(sO`~muil*XCffA>hm#hY15aL?^-m@IZk5o- z$^RbD`+qKU&z1ixgLm9u)Dr3N2;*vPRctf}(G%lhb#g4!Xi-z;>~KmFUboqQ-p-l( z^Xqg2k7pK6clEgT_DSy5qt^Rwm;dEE{8nAxP04$I{<~+Kr+e2r{<m)mWt+x04 zCvz1&ejj=n)b-KKZ1%r@Ev0pG0){L0xtw2P@$mYI>lK&Y`ukse{8b{{UScbQ)2H_B z_x*RWMz_U>RUB)pdwuh}dbR3ZIT@jy*6I~Pk$w`6+FnYnVY5tmUfeQdXVekt&=S(( zb$Xa^A;3$CtHo(y!3rNIVUCt2ZNFbDKUdu=|M7R%mX*(+tzVlKdNFEYmF(Z%*f|?N zy!k5^|7E>e-RtiYb365F^q*SAeOmCnTKdxGDHq>0e7ds#PU)i!e|cQ~{x2@mo%&X1 zPTWQY}r*0XtGOgt=OdIR@+qQ5N&PTlIP8eY?6_pG&4cS{|Ej^fGekRX;XA8Ag=9y+aNZeY|C>Bth{WMy&;STI9N zlAHC=LWdO{8sc0nhm;IC&L|0U%vins_T#JX_uHH6J;^O!v+~%>m{iplDJNfE`t<2z zyx#O_)g7~nUrpQl`6qAGE7_NKPfTB0|8((pUbzLUZiRk((_sAbgNy&ci?ccX-S?|r znN~j8#p%=4FIAhT%kL0faw$HBBVbvLQr=5hSz}?7*7|}BUxkE8M<=pf5n=i=WeT5y z1!$+Nhma7fle58&2on+RR;NZog%Ay8ftDuS;`K7|x7VKialI%zSnYFc+@~L_&VF5c z!dY^UgoNg&b30LcH3K~r& z4WWt_3M;z4y2voQD5^7~y=qM1PC(6Ux^e|yXfSQPKtCOQ4hmVpfXOCB6_CB@h zztv~o_q{4qDh@vTzUtBLna`HGA1L@xwZ-zsxpk#iUi@nfd3-;-xWPJe-NrxD{#|rh zy0Ez85BHbf=D(l*u#dAl5Htv}~n{TR13midN7 zRqgr3m);kN%jpBkyv& z%cmLP{i+K$rp5);>+vOCSz`TWPZ6uk3KrQX77uOmUVd-0SQEMM2@_YxwZ=&GK$8ct zL7pv31DZIP8yFZ^Upvk+;^|_oUsxFK{i!Yc{q_BEf3vr~`uh57{`GZFcdEy|jau~k zZS~6NukUuRijV7f61QsoRimFzCBF!}c-w2TJ*~={ShZH%T7CYn?NQrbzk9tkZ|VCh z3*J_F$Ngyi_-o^>@2`&6USBQyPi9xtv4G3_r~jK`%29o&inlM~h|2P7)8-!*n|0Fh zSl_-$M)CUp9?o2z%yO%KYt0v@rTVWQ^d$xMOffaC6TBo>>lD}!wc7YN$K97TqQ?*1 zFF2hvvOz* z=)SApvx}v@{PSPlqkhFPTkhUI8`^xTN%Ca#zdx!h7hctzDti8q+vohsS5IGtJxw<4 zI{P}bs8}~F{`IP@_1BE%|9Q~TqrPtMIj4B(u=gi?Z`sDK^{rpGH#~OD?x^=qC+lWC zpDrK%N8O=VT8(Y_^)&83yL-G`lRj?G4$q#xIJAD9*4CQvU&oKvhyIyVbyV=x|3wFH zsqAOnX)GfY!gp`K@U6Wz=ECu3FL}O>QQH{0TD@^2;K`%$~?&e4R06{-d)| z@9rP^bIFl!Ua_2L>vWldJDvX}eI~WcPu=z{Vi)K7*T?uhW`5lIP_5v8UZPsv{S8_t z7F1;HQU1D)bNQFT=dE|+I86ed$o|mY>|$2zxMDfCl*|fK_M@T|%5xI#Pk(*4oqOA~ z5N3soU3P!I^{#uj^rfr*6TSCWgyVC+uD{T=<^NNi58tFLALO1$Y;by2y`OLWrPe>9 zMLo_hO)T9f{M#x1&s1xo@P~&I9?ffSY$*^47Fg{X{m=LL`GmvVsa0pMpO5{wrjFCh zwB*nlftxo~Lm$SPEj23CoS9%_@yF!8{U@(z%k@XgE-M_ow>M(<>UB!W7p>2)k@_#I zti!6Mb${0WGLe}-RCH>N7Buc$z0sQ0ET;0o{F3jkKMc0~xKn=p+@dbm>8-EKthAnN zT+()9$&O=_FK+$+waJ^Myk6qh!JnUsAFuvtzi;#X_tG}if0HI%5A_$WU;QmfGoW74 zKX&@nn>PQC37m;AY+>K~Y5&404)qRwI~y6Q%GdvRbn0}K=0v-4(+}=xyZ>#htM1oi zD`@CrI`#Ex)%Wa6Mc?l@*4Bsp5e$ygKWL~qQLTL{+kV5amg^@R4li!GoA#h*nLXd4 zKYSC+e`~-0;$a_F*JA%#wi6 zzW$)Np54xR{qnfH)gPSPYd`&;yjT0r9p|YJZ5j6-)^$;4=h|EswC&&PgPhVy)w1*U znW_Crsr&Qw3+I(7^#|Qd?lfMCnEJ<_P3U3fZl?5jz1N>^l$ig%@_f~;*WZr^{GamJ zXwTE`v*C~GrS?Y5`d(@K|3v!!hjH(xYd_oaf7;ZawXd)KS1+%hW*&Zh_8f&>J8eYe zwce#I*vr;)zW$)3;hEd)f8#%U?)~AFoGM|vY2)|8OVJ*8V=v0z_tn_6Ra3Iq^-W~) z^O7A)pZu}@ zZT1Gw30Lp@5fr?;`l7A<>wu5nWm{|$#is66{~fs5r#|hxq}=P>@RgN)GSO;p+?%hTq{Q}4n3?fE zUcA1|zxc4lek135FRS;nnD5uFdU5yH*UDFOUDPVJf3CctQyXO;d-m1W*Y{V?`~G_U z)n5_)EFsfA?st+{!N9=U<~Yl!XBBJhg}!x8HUIy$>@UxH&T9YpYt8@vNB;i*w*Kn< z|M6e-D>c^C-_QC~So)$r+;(k6+V)S5?s9T1XKm(pPhBSK9oHmh1fg zC2zm2@8Tr6Ab-itq2@y3_u4lXX+;oO8VESIes_-WA#U>-($o*H?W!-Tn35 zq$;JqF}sxZ)}LCl_Ui7i@CiN1hVx$?b<`~iH<>cE;$`fs>-!^izgib{aer*_zvW*0 zuCFeAcYXE2mW_&H-uDcSAI{z5_v1{yeN$g!Ps)dtPKUwCcd3N`np!>q-)_K3a_SR8eZWZ&p>$`WyO?r2IRsPoGxmD}7sI}IuzMg%l zYGa-LEY2v8n;)F--uMZ+zIb7-Vq_|1fL3=BHifbD5`A6*M@-WnvnHKaV zqkHFay;!XXdlr|N({et4{$}>M)PI;5yZ3hH{Hy_aNn+1^Fz9)KU%+M!@U({@ojAPO8lbCqE4?o{J-gK>Z#;3 zNp^YHtCc7A&pVlS*mdvHpUNh7PQEXq{|LI@-BI^9nSXWvzL+xq{a;J-HiWMBe0BVY z=H*@QpDumS;xo@|ncbRwJ6X?MRFD#u)m*`*C8Ro|k!yvrPHB~L{=|=){Ui@D{DiihvMeuq45&yJh%j6oni{~%>-hHK^p7mbMrrrr# z@3)@Xdt`l_{qryPgxBnF-`^RtK5c)J?DwZyp8s7>{So|LChzL8uk(J}`|`j?+!y!N z@3s_g)2t6me%|_P$B&)YFYuhQt)Cw1eJ}q1dKTMz;jbRW*Oq68KfHN!M9JhflvulwTe`9xRvU-ZqoC9BVPGJf0l(?fOkM-%k~_xDr$_6SDZTYr7|?=Mjk z_KE(lbMi{oT;K95@1m+heR-Asj(gVYS>{C^yfOP`+1&dty!q!Xwmf*#d7n2+imSnq zjI!|SH#uH>Z8@^f=Gr_l6+?}mzArXTTw1R-JMKIzU#}a|e zh}y^>S?iQnPyF^bHa50OFMh`4^wqMuzyHQeufNiHw^}pd+Ft2ZM?O9Mwy)~PPp>)g zZ9iVSJ~ip+{`y4s-P)?D|5u-UB3Rz#E}XQ#^RN7puhmanr^Z&DTEF{Osp_0xhiw)ke_~iPu#osFum)mr>%<*pM5^wZR@YU>bu|W zk63pt^wq)3*Vl)y4;3yi+w0wVv3k>|Xa0NtsQ9QRdVb^B@NbQs%#Q!NdYb149RL5I zZ;e6^@4r9a?W)(;_nkU6Df-Rz>AT;5ZCac9BfG?EURv;ijc0^q{)gF3)6}gN@qQY2 zH1lnB_`kjC(_RSHe{j8EcsXXjtHJSyR!r44+Y|b{v+^4Nj%Q_*Pf?mLN5G& zu%)B?-W~bXqNT6dpReDbIO(X?zP-P`##VjLo?7+&_5JmG!x!z%{%Drw5V+?ttNh2n zx!T<&vH!B!dNL#9TXrUgEil@9A03z7~I-8Z9*YKg;wD z=EWbJa+l_vkJn9Zjn>?7WFbSExshL`zuendUbi;f<=_9wN8F<9c9*{n z<9O?%p!54fBFj9ce}3!{%Rn*hnyFI1 zYFm1F>4QnL|Nc7qL*>b2%er-c%px;aTI=6kw>#>`e8KM#>o0V@&Dt9m8F*vTlP^I_ zWAA5&7eD^HIQvn?zWp3085meN9cLN$ykafQ@N)l}{Ot9S()!i%Yl9!}@BVTBpVi*f zxOJMpcWpiW_1(qTpqB@ElRk65pYSv}e7)zVLeUpfV^#->#<#t>n^U;))kf~SzgL$h z@17L(<-^)rTdTHAzxlB)N_BnsqT2e|QTroQ_7q)SX6?C8AS^WW^`&!LooyD^W`A8- z74`9J(cY|9{ntVJc5PjMZTGwEw-Il%;_c#PO@sBXw45xutfTk*YxmZ!b>8cv)?M3| zy6S^#q>F~k>e*k{&3YRfv^q>bdHak0aP6yo*R-Z)ZF+zD;gL`KS;}=HdIt@dR7 ztXJ1|N5;L5eYEdt-|Lx0ldlDH#a%sqQ0{U_|D!EanFQ2eF0L zVV(Nle(YI(!EBM8?9N8^OUvq6ukZVw{$O(Pir-&fe=1K~|FwJ9*LQK(W3Q~bdv@1R zeRD1Sx+PM3#3xURTD7$O(f0pc(>^u^x?C0I>`|Tm$2hSk=Ih7VRYet@F^PBk|pnukBiOeMxQBk$HM|TZ)A){5Z(i^yA-~thp?|?H5h7m%eG`{D5bN zqv0-wUb7$sUR{D0hWDf|)FfwRtb-+VH9C;wBr*lQ4`c=gpE;e4-eesx{X zU(6^Et7`kq)%wrB{`&5%1<}tVJzw2lDHU`3V{i+r=$j21=h}H#m2ztYuQj+|JnG&4 zVBWzQ&OBMUCim{o+wW!3zTUR(Y2crY6K`xX(vbe(dfG?Q_T-=G_2s3nj>HSAUcEkH zDzi1?hDl$-%X*$xyf@JmHasll2ml(@!op`kz`;n!o$clGIh7^V7esTw_u5Z{NqJ{a?J+$9-u3 zI%VJ1$)D7JeaeiAtNw4d^ox(ye{RtWjyoJDNWToWYMb80lv@8|m5aoNIbH`&7$p5# z%Db=Tk4Tw$dKuH<%~zfWO36<@sF>+1_vT5wR?H9onpeWNUU{2ke@l9}Y~^~lcRylf zuXnC@smST+ESdhqQ0K;y?dps6r>|Rj|A+4L#j!W@n8S*Wc0))znpq4Su1h!mbU#rRQ9>~^vs`k{DKji#Mk-r3TsZs zENEt}Ql8(W&$nPo(wiso(&_S1CtquQ2@CzUF?{v?MVqpt{wC~ntku|kw|4RSFMe<4 zUW)tj#N>~V=v~&t?{|KFUHAL)tLra%_S@FXnjPOH9n8P+V4Pl3RLtiX-_TcGS6_>1 zzP_lnZk_k^i>2mkR=a8!25zD57tLUmizwvc5J0{RJ&bN_E_PN$jwY=mYac79Tk^hW z=HO$L6X{qX%Ej6AFkwN0jyg9hQ)0k^7%gM24yVL`1tCi1Yj@qR%f4@X`?HUJxk0h2IU@D+{M|_pe8df=#qaoY=8^I7 zL*HXgHOdBVPW$uly`=T#t9KYw_Ie#mRJy=$FmtWJoC_Rlz$2|HqMW=gjtM)am`HH3 zIz4pM2+(ro>Hr1v-FxbN=5?#jpMHPacC}HV(dyer!`4R%FL*eA<#PYY%a@*7CwFnu zpV+O>4n+PqBy{_qo+0}sJ$>o-|MutWy!xlh_WGJd#@YQZ_VU+SKV6@2!g^bA$sY#S z+%0l*-|duY&VK6piI2I{bdgHA{M7r6v&FmLFee{hx$*9zii($oh6=`4wEQ@Q6$>V{ zs`h9o_%VHBXEYJ%=n|1?ZF=NzAjU|Po0aWRV26j6AZM#nVj$;SzhC!Dt^Vxz^5f*> znCE_$E00@h_iEbo&MwI*t*UXacpZK7t-5G+r|BQwe*y`Q@@n$~+uj`yQ1GAHP*VEv z-|U;KC+~GhzAkTkZqeSFr}}yu8}hgPRBk%RUmxZpDL&6UWb>C~#RD!aR)N9qtSzd$ z^s{>--l^U3KUOcg^7z5h{tU+J4;b`WMP~|qbeVg#Q7B6I z%nzw}_2oOR{V>g%^z`@5f8R})Z8*PC<^2EImo96BC>hxqu%_8K@u%OPe<#6uPXdQ% zK%!$PW62}6unw;hg}MaLxtSeaN?fd<6cG_1#?#7lIN(5xmIzlzQ)0l305ws8neUHz zzJGdNxBU4&74527XQpp`wq87j=f8Wnn(xVvAM>|g{H|>z{&B zOcoMYa#8tA5X<%48+@P>Sye>2Segwio9JG-Uh(&c&) z*5lJX8e{ISfBCJ9<#0{BJ*$B6wPQ*_wWk-$H2$3aH0zPmC;_!JLwDNt@L| z_B5$1a^z|}z`($~-Eo!)FBfa=LQCJ*_t)24_dU1w|C$Z{ueRAo)?chu`u_jh`v2Y^ zxt?b||My~D>HS6KMNz-j?XB8WRhb)8(31m<*De26GM-v{w<|ozzV@sC z*{gSd?fmui%Z9GGvF7s@`zHVMO|lH%r?=O)ZfE?;%j*08?Au$mGk$&eYW``z;`II| z{ol3df4Il~)e-AMSMMo5{4)CMslC-(D{Zqs&F_6xvul6nnzFr;=l&>$+f_Z=qr7|i zHLb53;yyPqoUSiic=~hHsl8c0E9VwXzIHNC`Rf0O{XG8s5ie(5);;?At~GP~&KFLR zH9|qPtL`#;?T#&1U0=TW_uK5P^`Tkn&%cKFR$Yy)dirz6lf(AM-)@qs-{3R7CyF_2 z^8cRcRlXaw#h&d~`ojO}=+D=WSN5;hwJNP&x8HoKb4UEORo8dFUw^e~ZAk1> z_Fv~ez4u?&lX)G-EA95*eg9+TxyARszIyz8{Z9VNzjnllAAfoG*Rj|Czb1Zt|LW?i z_t*X&`7x)YU=7#D3k>sF3{Rf8(%pZ*^^4bm9K#1e8?~kfB`)|g@7w#&g74$H-k+T= zb$!~;ja3itmwtU6?)z%1{_0q-tGkZ=@afv%bfcu``9Dr>wN%lH&2c|c-~K$cI9G1| ztlyKb#|Q3P_ebER_1B*U)7#yCy_ep?A{m>ysfTCw5$@&}&!wj}>~;wFz4)T-?~lQ$ zRTY|@U7L^JFE3ADzbMbnV*Z3JOXS1v7gcSUerfIfjuQf`a+jI;Jkr*d{#@+P zeZlx-w|(hF2Cj12v@$&WtMRHzsU9Gk@fwL+W&E+3glLQ{SnuB ze~tWFkM))j?;>mGGHGuux+>usXD#{NxAcipvclqjCl~dl?UmE|{Pmwf=hyQ0yZ=V5 z+aoaZF4NbDejB4pe{Oy*>t3}vD7}8xZ~Jy_!!FK?vEmojt^X41ovrt5OV<9lzoG~A z-T&?So4sTA>Zr+Af6w|Kwcqvd(Z8H;CEs`MkGsd$7Nk|;!N9DuZ@?q|G54Cd3DKscY(Zgy@P5p{_BsaeBb{{^ljFP*;|uVEcM&AUe!DH>vyNA zkEb1tuV3x?dU}25i{pKo$G@I_x4y~R{m-V#e|y_@Myy|UGV;~m_^+#E{;mA|`*q0Q z$5S7z%iGzavt97@-6l!K{Dar)*3|fC9gV$zZhhVQSIh6XtdH@%UtVmlBG|E9HL|W~ zLE#rS#lH@H9hA5B?&+fYYXg?9ewj7Vs(iTK}t{bJ+#+K8-4i=Te?+s_xW@yfN( z{l6ytnH2ozs|+jGmpFg5fPxxjhdXlzTdv;4PTictm z>L=ESmkA`YOqR0w{>L~tyWslii~qyw*3~U%f4+Bful;{EIVsN_2@k*iv+rMD_wL_+ zTW3L$)&Bpyp9{#_+_`7}r0V4Zl7vc@-O~iyJx1)p<6ny|4&$PGDQAEeCx#Z zyBQ69^=BVHv8?C2At`v4_99o-{fU}L+m>LHLa~HHTRXi4_z^z z`-SJd!06Isoe9zt>JPGfs_+UHTA*iOkhtKBzu2Zr>^*|fUd;a@&)Uavbnla|OG>;k zqh|m8HK7mJG@Bp(csGCf_1gS-yHfYx+FSLt`d(#i*1EM>@BXHm?4KORH_IErV%x$jep#PqH%1frluUmI*)wOl$ z)4kqi`J26a`M1gYtb5g_4?aJb{a@exw!{13%x6CPEDp@7>VJLu?zbY%dHer74>$Z4 zKkfHh;ibF3PM!Yu*XQfU*$);xudA&{yH{IvdV^EU|M=DWdGqGhz1;O#C_=XX>D~LO z#}BfUPpDg0r@ri3%-ajn&JIie%r@dX{4?i%-1@-PvEIQ^`Ysh2t*(pWm#eL}t)I*u zCBH@_{@T{&?a5Z&tKSI8UEM9e$o&0rH}QkDCYhIRtv%k@r~hlR{Pby`{#JGDm@EBs zdG)`qUFJO^-Qp5~dB3hkm+RO5TmNl|*Y~~mCpa9iDyUss*F8Ob{Z!HAagtj57Q2M? znC3j5EPqhG{PY)}{So$j-kr9^#cN;8@BaTfW{LK$f5GDV_v<^hHL3&(uG#-N_RD|M zlw*694@Rzz$?SN)`fuLki&goWy}i|zA+Izhf6sbcX>b1fi_iM=@tTv~?NMD?GU@kp zUGrye?f%y;+sov%ujYK*TGi99SACsw{pag9O<}xG`_q-?@x1+eXZkInqLtP6emvw@ zzxVvV(zVzAu0Hu-<1ABN zFV><$dn2)@u|@04|Nqsx{^zxL{r|9a6?JRM4;;S#|Eq4{{4IOGtusIJx4QoOuj{K` z@7neC{kw_TtJdrPYpD{f+sA#iY3r+9&vw5Gd%yl=)!vxbD?9#1fBkstn}7WOrSDdK z{boJ$_=&xHTH_ANeVhOFvVD`>2}`c(-mb^}7E?~fGXD{J!jpVhxK4Hb!Mf#=Zw=hH ze!G$8a<+sk4YM00D{kHFK(dyIxzn}iKQ+;*VtGi!yVuLC^<(kai81X#t zm$vM`9WC?se_xw8)zF#*qq$}ij z$?IJ^Uc_t-T_gVL^~=&&eeVa`w`9$%-*NNxrx(BcGPmY;ynm#m|Mewnd9BU79p&ch zJ*U^quAl2Q_jLW5K-;h>`_}#0;&W1p{p+f)tGCv1{>#2w^EdU^mDH6&{t>Is2kv0! zep2;ljrL3-VJD`0djtZ^*bW^2Fgr0iJ2|&BC#UuQ(r0N_g_z$w=vtkcW+H-e0l!7n-+?* zUSHQV%#W;}Z@+k=);U*=otLJ3W&Zk_{Z(SQ()HE0?(6c}FaO({H0%4-`17^@I!>Hk&Fr`}$Dee>Plf2FwhpMGBV z-!|@YoqyQ7jrXp93e1jQed*VqS*!lU-27^$A1-ej+&A<1>$8dC|5bhGUkk4KljqO% z(jawan;9=}cWOpn`me9)Pv4q;&MqylUn3~@q&dFQHsNnE>+_x8T&~rhoL7JK%6!4S zBKvP|oT)4PC0zSN+Lq}}pPug5m3b*%docL>K|@vJ_lMHoz28!?I=&>fY)9CW^Rqs9 zXnem>vuEc0jO9Fk6L#+}y)v)+{$j)FfB*fDn{+W|_21KTZ(Z?UoEX>VwKw(G-2GwU zbGkn-P0f0LFDmlgp6mBtuly_fCoE3=>(zG~%GN$TQu&&1$@f3=*4J&#uW8cz`uhHw z@Kr*urmtF9THh75KkVJ!=&$cvLQj8+Ff0Fkse1DDLt0;3OzW&eUi~$HF(HP5fmz3K zmT7MmYw<7f*!6W=>-PToo;^ML($%1Un^uSYjo#P!fBM(cTMHNVU)SFGwo3on>H^;@ zi?*>zJ9a%-A3kaJ>(#QS|A+s$)e*JRHuBZFl<)ulCQpjmU-NrC%k12*H`IHl+9#Tp zv3!2&Fj>C;|EsI7{;w+eo3#4<(a?Wp-f{1|<0e(@+V|wsgdf+p#FlML`TPHG_18?j zP2sElhqLV7y87#P{nuBsLtp1zo%`KUn~)=o^w=)?QmIL!0*EQ|HD=4{(k?x!TrVG zbwSx7r-QsNg`4YEU75GT^R;Mf^~-I0BUAscsTDk%ZesQ;VbzJu`G@ab)!8ho8qM3x zCg*CK@W4JUTDrVkJ$q`^mGCK(>R+>je&aa3skV;OdT*Wa?spdP+hUepnf|Cc!yiuvpE~U0sGS+$U$Fl5?Y~uTtG~WJo)xF( zA78gKev#K&v1OlxBHbL1M%nCE*I&;PG@+gzd-4}O%sjz36eV^;=>V0Q&?(FVjdDVZOL>#>)%KrN5vFqHQ<4*0B=Rb65>B7sYDba8G-|XY) zRnD0I>DWQ0zokv5bIR81oYP%D)%nyPp^ptA=UWwieUJZ`rm{r;)B5bF7k)A+`n|E= z%d@`E+p9mrnRCkY?5}UuRGfMD>fLqz^Uwaa*tzb$zp1u<#-__Zd#2fIKUtDwn?GS~ z(7QKBo~@aG>yO)tujWlkKF9UmUs+imw==xS&ck$SrLJ;HRXvP9JWZS9Y> zzfR9_{j&W3L2(-{|LUFZAXbDaJz%HOrS z{OaBBrW{Yc{on6&Zc^1$tvB=je~SGjQu0T28u#6M3=B-S9A}yFcCi*!iQixE`}%67 z-TwF2m#V-2wejxVU$@>yyx(aW*RTKo->$bYxsg9KZVL4%oZKgx9lySA>#hB-j&?=w zYstQ*^>lrB?7yv1@m5=ZYCI13e@*XN{G_Wa*Uz5zzrLI0PEB~$0qKi(EU)bkTfINF z{MXg0qnFEnZ9gr%>gcU6|33ZudfobK;aAg#{0ohuzWO`wWVf2y|LS$3{Uiv&7~^gVG>eW4Y8PT=#o%cKMS+B1LRee7#T6DMSYy6Y7 zv!hG+F1v3{`db=vx$27hqU%}DAMPvvniBL>)sy8-f6Bfi>C0A33~e`itG~o-^7&U^ zJN|VBRq2MVmS5=qOZCOP`ycHTDyMBvcq=pS+Wwf`f4<2jJ~rC3|Mlcq`xhAszk2oc z)t9ipN%J1r-q0*rL;zm)QsFX9eE;o9>d(UCY*^wOKNI>aV?pf^wX^$KBnu zK6sX2T2#F(=HHRs{|$|uKe$UtDy9k;)K#5Y7gcla*O8jg>_sU@o^9>^n)UV6-qgF5 zOX5y8{{0>lx$^z4PZJil#iZ3w`#tHmyO+GJTm6BYzrUW|e_}RixBI*Qwj~l(vHf*t zcg5?qMP*;T7XSUw(!^N7Kzl!CwUYmnUtQPew>^J+`n3OHyZ@HPRn3)N_J6%g;r=sf zhb6!LS*BNeJAPtkQEYO?o4U2asZ$J7{hz$ zT<=-j_u404{_l6`TDQ@n_0PS%zn=bj{rQ(g^Z%WB?zL6l6Shx|dzG|zkewE@#4@f(eGxfzP~y$J#HO;R!aS$kI$bk zUiS6pm*sMo3ypqlymfzHWk&bb;)`MX!?mxix_b51zw^o`pDkNw{q^+ecW>jn|Hjxy zb$qSsF@h-p^w(@or~s*3b9X4b@jqfA#fs=&Pqy z%FI7jX7oS3+c~Xuzi0fSd(W=!UEOSdE%f!pdF#XDw%$_a-888_d-g|`)3wGrd#-r~ z{ao|#`&;{-hEIrIStnDf|h0 zKmDThrnoEjHth}%ZK^R}x^PKVj$PG;WaA6I%m3`!{l~rd!k7Qv_g=I9`u_Fgt-V!8 zqgQ`$|Mg`b$EzZ~qQ4hpdiX^(X5LSf4+X~`}M&i zJb9tT{TEk%iOp93{)(d`Sp3u9bGseksr@4nk&V5K7JcR1JRgZtstYraOW z3H?6ZeDTqrJ->HH?GIZW*75mP$XfgBlaCz}KPGf#_WqAQSGMTd${$wmO}~CR_dkou z`8h%HHv78nulG&T+?Tggce1SYKi}2)=fdU6r#L3<-&a+)>racRP9N{@b^6DzWuGiQ zb#}(89~zf3zFBkpty8kLxTTk|>_u2mLcDxBU_Q#Mz2RcPJaIXROiZa@A;?y zyT>~$Sf7&ovm^Gtck8dG;omkN{rSqvn*YD8@4Eiqf0mSA?^c*|-aaeK=n-1f$GuD@vg^~$`igH^9TmH6!u{<$ehWZ&9@B0KNsv$Plrp0B(2 z-@bO*&yTnC%P0TZ>)V@}CVa8_YnPATU+_b>kYZ?~86O}bS=_ZMY<${^a*up?`nxQT{Qj!7IXrP(6WRjx*TnYZ=V&R<9G7yUZD|7z3MR|iXz zUab6Eoqm6PU5-U}@%-st@BUW3d$%`ca_2hrt?AkEYwOmno|?U8yH%6Mqwj?uWqUSn z=&!%u-@W_mAIlz-uRo43J(jEwt-n`&_upRC@A~WO*4M2R-uY+ROOIax@y2pr9{d#X z+`9gq{LBS=UyHAAoBD6+l3SCn%T{gK;x$$O+PYW&<92q&tryaoYgzmBZ;4KO-u*qZ zg}N`*hK6{3z5DBGY|-`Ap0Dq(i4*>}>2J~>^_nXiGPS;Do&HvxZGUz5 z?)ttf4?12(y-nR1_eVIrchkwwdFAF=0>=X~mh?w2Z{@e8KZ~m{HKQxxa%j{me;3X;&ioRQn|Bn`{dX$v>)=zq{r0`;-Q?}B&Pj%xS^cBm zbzSKCf{9wD5}QIY4(rALJ6O_YRbV>X*!yHz*sQpXH-?jZ`TPl_pPS5zUN$`v9@gHZE`Zw-BD6?mY(7)YLe_B0kXVo9c z`0@DlGNbtW7xWb0XR7dp>95|oG-}$#s_Cn)Om6yp_tUH}$xD28_UfO%H(d(z39LFA z$JNy||AW_$|4jTJHyv7LwVxw_qu{V!VfWL$EygxSR6X)LVu>F+meCyNtDS~d9hkvvkHu5{S&ox=$;{Oybiy8X`dxY&-`qd}rc6wwz(XY5zW$SJJj(v7kAGfiH!WJl@!*9PJCYZlcACfW*bx)%RwV!`x0 zQ#P+<9`{zaPm2D{;#Rx2EVwO<&1P8GX`-jK2-X1|Hi4*QR6y8`0hD0;X4 z+im#y|DlwuU6T9Ma{Fw&;65n*Hm0Gt%Lobv|4HYqm7#m z#Vu_Kx!m zyruNyOVm2oqWCbaYl}GF{{MB#gMoo1!*P~*?<>|qq5BI>f6djt#`im7>DMCPYwNpT zitc_JynfNnU(5Jc@87+8^**(&F0Y^bjoLB6BTlaF{Xt37eK&-!nZJJhwfJq!uejY| z9s94o`W<()h%Y?r>+KKcioMtGRQz*m_pKfC`;Tp{+5ea8^Q9=`f6M&B&)OdpnfO=# z>iV*^zx3<&t&jWbbMaosfqgr|SJ$oEy?*vJ{%d)U^uH!qU0=U+{Uk?kjdj(3Q*T{8 zTlzh2*ZU7s-+bTxp_hH~rDc^L@2{=f81`D~Z~WJc&M-~es<_a)m7U*z{aeGG-F~F9 zUNj_Wa{j(qqpE{%hS(Z-(Bo?w(@nz`S~A^ z5>&bL@TyvkVy3kA$89G00ur1R>&%1JynJ)kzvIUX^_zY_!W?6(w%2QYnQr>&_jC)# z$$!K?|4GSsAe;JskE5klh0(Hv>qmY{Xv^)DJ@p~M$6Bb>^76t}83Mm*>$jhXmD+!? z;BVV(Q^(^x_BG!wu6wu6?_$jCBNH+N;uvmglsl^Hzka`yuV&)1 z_#a9(88Z7~XUvGSNcr0Q_0*pDN!-ihV)nj&vir+=!Jt16U+=$qxoSgPk^ZUMfA5tA z{q%~NU>uhe?D=n-_1~mFty6vbU!A|%UVEYR;o|my$$z3}G(Fk>A&~2&VAbO952n4j zVdmB*zn^86M0I7HVh8^zZ9RX{W zf=4soKD>BbU|&VN{`VtHn!nS3C4UmT@Uh)JZ}mS;KA*Yj@ogFcFDk_NGZIRse>tYO zFIaNYqJ8oImfhG{E4KVoX!%3iIsbJ+r+?Z1sUm%kK8xi|1^?@>{< zkChiNS+RAMUVOal=!1?QUHNt2rhl}&!NYYriI>0bYexSK4ej&#b{lQfC;$HcC1&Yo zj=t{I*&prdmlf9^G<0X{U-o_ZAK#mQ?#90k`pG@xu6>h)a_#x{{+pIQ@eh^F<>n~t zm0tMPd~LbMmiOUxLbujUiZ^`z_THiLJGbAz+FSj#KD6Z8);Nw%fsGI2zL?yvpDiQ! z>(k52aXacbetA0nSo2}}jXPzN{;qG?Q*z>e&{IX_p56NUR~-$%q5SEY=!2g1o{iU^ zEcyG(ZS&shW#%sT|E;~Hec$VP=h>9qPqt*NJ}5t{=TGZ?^GaL$R~)4uHMXiRm6;%OP_S;AmG(TAkIT#4GwxY zb;hoVOUx?H_!#WkbK}o+#+xDWDt9*a$ArKBBDB$KqrAZ7d6#VGS}A^2i}SPJep#R; zLvpLc`*#U8wl(&47fQn4yv#hY!L#mPf`Oml6ye7y54G=lIjov9;ZHa_qfW>~12rRF zr?!LzAvz&~EpDst?tZm9x_r9kwM*;E&t47nE>&xLd+?K9`iqMBt{3F2s^))T*u(l_ zqIAm4FDG_$g?{oD<_-P)*Ob4pm|xkwPJ2IV-{DDn|L#zK7teimCTpDt^A+c^?LEJu za*mumm~SYeRmjvZPvj+o_7eXJzZcd^>Vs8ho!=(!&ig*$(RW`4-tf&QRT)d})vamk znrp48`#r8}OQ@KiaENa4x*u!OPvs~|`94p1wkfDuc-fDR%=MYg|0gVxyZ!yYaB2B( zYk7k!JoWZ(PaHh#^Z9D(xb*tzjXR9dH&R_z?#^vR@tlS_lHL5*niA>wPNYg zrS~%Gm^c2Iyk~K(dTFRY=LsPx3C1;3W=UmjKk@$Sz2@t7aW2=NXityb6EJ0STLI%$ zS5?sju{|dO9L%4IU(v`5+0k-yrR+z=UsAWU%vMWZ4N|yY*sf*RZ!-IF?qSIa;Wih; zrdJE?ZT#v|gCCc6{|L%F%J=BSbgpbC;q8S2oO%{QhIgiTrWtSjYb2idr^@;2&-TT) z^uKxf7Vy10c%b@t6Z@8^=X(@#W*%VFxiUpVl#A2pV8enL25KU#tc(s5LR5q~1)UTF z4g~0kaLinn`u^@Z>$N}c#XkKi()GNb%ilC}!PDb+XUSKU?f-w-_RHIHg%6uGR@}4o zme}aDw>2yER_~LoQr?g5^dGsX#-*R`Ao`^1;abb%ds%!<^JbqfE^IrU7sTssB(5zk z@mldgooIEw>XdykvhzN)osZyo{POL;d-5i2C)F-9tg+5=C|Vf3`;ye{*I~7{LS18y zW^r846XImBxWD*w_*>ntFVYEf`^8Mpd^?+1vvtqu%u*AJww~wKZrjV^@AEtVVcsKt z_1KYn^Pf&jh*JLi_5DV5{%3U(cjbNm-?$J}8un;fg6fgPSOrHNGk!(cV}BxUFIiRg zkpKI`huY%o**oUVdg7lm=RmQaU2rv}CeEW^BFMMmE+lgDNV;<{o+T8U0*QK)e za_qfRR!%<6P{61XGU0-XbZbXr!h#45BQB0Z3msNW$PzR@de!q+>2;m6Ue`RbUgcIV z$yzeaqFwE(%;f3+?!~7sJ@fzj?^@Y!u33NQ`&9qW*Qt#!IGgm^H~zZG2m6HUGMkhB zIlq+>2;cm^!E)OQv*r_*ioWIr-u+oW!)My&8zsvNTkn57oVq$~-9$a@a51$%%k&RD z&{(^&_TDNH@41^#t+J{V&9d2lA}&8c@5~7XYngkYr&B9@b8w~<&&D6*H0*U7xGe7UvEZ|CSS^2UlYtj1P z%c5D2L)ovMR4wA)E*rIS_v&qPviIy4YvXiM&g!nHzoo0tz#wSGbb!%3;cM+*h6fA` ztf`K(Ecm!s%l4Un(+&T$zW#sxzrFYWfARWm`v2d5fxVyCmS27S>-%f@t?#FAegFUc zzw6Jl-(Npj|8$c3TGiyQQ#UKD`&9K`y~FDzrJavj0zd|DE;vl(@i^E`g`kMt|M->FVR1!TYZEO^ko?(lGkPj`_1AZILQroTngukV?)_5H-yyO%>x9{XOm zH}(Hl-H&!>_I}x0e*V_ek0$&7sLv5#{k@89Z^G`^6T*)aMeY5l_4mHc_47gTUPq>> zCms(LUD@|z*6v-qXUF}m-hX|6#H#n#cK`MFeD(H3CzJQPuv@RVO=NOzR?ghh@<(rwd(J+rB7F{p0Tg<8<%=pA){SJ za^TAL2VFJy{otUqAK!w@ zKkL38G(4LAzT~uMoAPYyKcN@6lFb%s9A!T`VgApWe0h_3{y%dHuSo5?oBm_X|LxY= zlb=lTUOoTGtM#tkUsvuCDPdOYHA?Jdto@;*GN)idYGakA{i17z{zVfu&ba?kuHz7s z8CSM@+LKBD_9dBJzP`FH=-q$$yn8Bn-YS)5_OZRI|9rJFs{hp0-@*U%*B8H~uXe4w znDjAnZ_%&tQf<%IH*M{oeq17Fp;F?Nc2H;UoBy+HDnkV1WG5Z5^7?owIXtZVo9vnt zk5``!v)9~Fuyv|8?qZ)he{sPx;i3}XN`uwMjvfDFzWADlR%x^Iuix%ndjuxME*Fi| zda~u)#=5xpy;a{$mrj4SbbZ)6pRez)tc+V{SES3Nd&cy?c>11lyFETNH+LwA&l7RU zpS*iN=NY>quE;;${|)Av|4=e!ntc0H(TYF2rLW#snYY*W^n-5xBkDQ(%RS}x-MA!W zF85=WG~+5smz)**C$uKZ3rK&}yuoLF?|lCGO=~)Tf1NgG(%;m&POCk~ZG0G*{|Mdv zqx{t9!^!vYQyg#1oPS15F!MoetB1L&UC#FM2l@5aYE zY;3;kH#@y^-8$Xsr{IS_dY=2Ge9tF`+n;>@y6Ms;NtxX}Q*YGV%l~M)HvEr@^Fkvo zW`ndFKa}jAUwGnJ|DkU}skq`x_nX#V)z|+~&Ykk0Vd2JeNh?Bn8|C}k#Df_#WEPs- zkPu>jIenx1p;^qUJ#HL+mAroL)8ut&+TlU9F%#CrPqOCt@J97Mhlhe1x0%Q!*E)lo zpDJqKPQP>7S$DPm?)`V(cdySr&EN5Mi|P7@Qj#mon(y9!ef_u4_ggzX?)_I(3Aq0K z@qxy*PCwjIHs+l#`EtEe?{-e^+*$vEM7yt=D|0Wd??1Ktx8dh!-A}$={dMqAQN>1? zol)_EzHh7}>mRIH*lK({QQEn9j(@@>;i$*j2NjJvILq}9O3eIx`m5${bxZ)}o95xk^4o30;33bZ56vmyZAMutIyI>#ytn z$Itz?{??~&8><$Dzm8v9xAoWctgo+Q*9G~lzqR+))u`qCE2r6Qs9XPQ|Nqt3S4-Ob z{~vwQZ~OX~eQ_(Lzh1gpvgvKQ*0udnOS7teFVB88#pwMtf4whzzc^j9%nTB}zj|`j zm4EBSUrdN^N}e#=fx*&viFobmiAI{<1B9t7aVkU%zg@Q`TRh z{co$@e>L(idbNhz<<9G=wy}@JMEzHNnA)CJUvcVf+}^6UMH}y~68>9zYj4%zm8pV9 zel3r8z5b&AhT^$1Z%%&*-|O<~adRX8^P~g+pRN?zXwRK>_1wGXUy~f~9t-*+5V&OC z|EIf^{=c_Vs0)~*TEM0ue^pBT{JPI=Z1o>}HQA*clX~~ZEsW24JpJ675dWg6jWMS_ zO0NxHA3rThL0k37)D~-*d9VNUUzsCu*s1B){wIgG>)L%Sf5Rdhb#|rnuBY#JT3$W5 zH!JD?t6ism?bGfP$Su6rD4_YHqVd_j#!I}Gl5N*o6BpcY>InC1{OTwh(`EmxZ<*QB z(&yIoEj{u_e`FQ6NbUMt@MYHYYx)O8L)$g>TZ)=3O}<`qXJ5B?{hH7RlcwFCRHa(S zWwYNS;dbaF&6iJ8zdpGaA74LV)4#8WFT43qbV*d!oT=sdeD#f)Pd>8r&95=uqt}1( zN&i7{jYHK_g&)7R48Obiu&S{B>g(%6Hdk!&n`C0Q&`fh*@6Hb`57s0wnbgSHg)>@4>RSAT8pYZwrMOW#^WVf^XHl3`x{?K~6^XXFxb)07J9-c108?&!}`iq{4=9A@F z)WTcz0y;Mw-C;6cYNF~3;cMcCijR+3{V|zd{zKVb-pu9D-|X{-vw3Gc@>M)sqd!e5 z%vbqK{67Uj9{c{wH-9WNIx5`4a^dSU%d`D9K5YML#Pf_Y|Mv?X?>_wh@y@iL`CrO5 z{o9wlb$|HZs_83~l#V&vZ~l~K*c&MR{`=9d54w*{7N|RSZ%cgfd1>bPuWG0NUnJP8 z{^#ZqmVJ{xte@)W%=lD3+HiCHKW!tX89SY&6|Tr1lH~Q1;EcF7k$`#Blom z3f+ag=YG2hWawB{{%4&ny(+c6*rSu{)^**zEW!4wxBtcbk1JlkJ^Y7?+L1`B=_@{& zHmhrK?KC^X=jmyt8i|j$uFHM@!SkXVkJaw?)vLbj>-zfY@uQwiObo~5=N;5{ zy>fl>>3Bo$>DT=nrI+}-SK7wCpW5M2B(eGwn+ds#~9?`cCon=@;IHw?CDy?p*zT-}|dizhwQ{viMO6PS$Vs|9!x~z&6)$mL;DTYxyN%?Ok7AeO-5L$=#y)*R{S{ z*91NNwR`>l>$}vqzWSQHd)KaI)35E0p1W?{)g@c&uEh3zdzBY;m2+*0`q!k+g?D?( z?>f)){u+MK{Kc$aaepK1{C>at%3^z;FaDbTwbIHI7QTwFQLk1Bt$s11`)^hDq^w#m z&&Y*eU8nEb>fxlnd!1Tz*m2Ld7j2_9>aUMq8~VNcZq?QJ9ZE_?^|5zPXKZ>t@9EYR z-EWir%$0a~ShGB6{pnTz!=L}Hk6x?(>U+(8;k&=??2ns&jce=mS9kV)dSC3GY_hZB zl|aE6oBOXd*T!XCTlCI9Zau5SiH4~{x3VKArQT58ap3d=zW3{E@Bc{WkU9P9=ic=P z&CJd{Q%js|?j^lCHgxsetE;D<+Vr++`TD5NU3JSg={2fn6s1p=4rdi|c4UdrsDqW!Z4 zi$Wg>BY{>X;iEN4D-QHx?5Ls?>kb-QRh*P5E+{ZRbm>j&}GQI(#FXbJ|)*m4h3$3%H(XniM--+(z^M=3-~{=RXv$`JDFp z>$0jH9Y6SoMxnRZ3>02+Hr>_3WTp{C>@yPxf$jpc z>-(kuCt6AW-gi*3an_85p7ZNkziTvyGv8mh<^Q~$Y@FG@Rnm-qv&>eK_$&DO>g!8) zR_5Qk%eU+>Q?Kzp-tI&9?Jj{>P)EGScS_!|%x!u9EB< zo%}n#^ID%huJyssp_}#1vr^#|J5;Y8v5(p{eZx9|X(FAao0!zkaGQMF8FWVdr}yOJ z2g9HC-8``~JAUCTb-ts=7T=qA@`iGod*u7D1@nuAG@AKl-0piYWz`Y(`iC8}ZRa+< zb8~ap{CI`z55;wRey=o$aFJY`P;lxO+u8lH&vUCCb_w3}N#Fh9LzYj7-K`%kbGDrQ z2qHDt!CKC!x!kAJVv``~T7X2z)_r~lhHC$w)i{4TF~@(XiZ@1-Q8DqYXD+iUl7 zsq8#A>C~!#_LSv3oW7P-+pd26oPH>DOH^lF%J0p=?_z|)JvUX?yPmvN929&!n>|ft z-O~5<-{d|Q==bb8=9+ZbL}})oo8QmujceJk@ye0o=d?2Ki25aUCr%Oz-}rt~;7TSL z-j(7!dbA-7s` z_gAsW*Y%&h+xs(o_w855*T1-!74G!t^sC+9s-9f?=f6K(u|q=Ag{7qP?i0g#PwT_I zBfme`Y(D$ul;i!uyWLU1Fwx3SVR@e(d_Y>$_dO?EHHlR)2AqckSBi#kphl2hZ}Sb&mdf z7#Nrh9cNkgWwDmmZ`Au+bNYJut0#Mc_RL!!^rQO!|L_0*#(%&6>iyQDb&lM1%h+Fs zZVg)-?%#Xm`s>)${A-N$|3CT8VjEkus51PI^jW64w{(IFO*fjpnDt|7QoOod-PW)_ zNyd8dSO1qL-}yR8MX~w6%=){(y7EeKCEtK#9P)=#|T^C(hik z`^u^-?*Atr|M&50(%)sUKijRAzeoenKykmED%fGW-SHgwwzh-~U{;F(x>HE?y z|HvAzJ#+kz++8QK|C>^N#OluQw=ui-hpX(KT6K5*Q!;)`!G2Z`Z1emy_bZV|(W}%il>MVMc|f#dq&!x{LpxwKrkE=*?Z8wz|I#|F62x zwPU?&$-Js#GnfBAKE027W zvAV3|slOsmRfRu;jvM*BK74&=jc~=8r&1I9)}QrNVf>osPv<-C3WwYci}jvr|{|0n&JaWnOc-{cE(!zbLdnHTr|>#Df@YqLuKChhpC zzx%HE(w9Y(Px+x!&eeZJeL;=-d}hKrp)$NrzKHU0XWF8`GMTl80-N;mrT$(wJh zzW(m%DU;QETZU27uws(L2)>$roePPwgdOm)irx(QbwS}yne9rZ^#?OuWU-RtT zoAqaV{hE0j!kg@lmCX8M-q$=m=-&SQ*LC*)TJ*K*`Xf*4x{N-L0975#d8dU>VIlX&O0 zcky2f{KLX$+*h4 z>(3jfI!Jvu`@Z~m>+|v#le%|(aEe~fA6kDl^y=#A#lN(Jrq91qGx(x|e*Uk&DEmv?>Ts?1_sjkMUku)QR!uuRpi5ByqQ57j!OVX>Rt7UQ zol>>dEnOGERrb4reQT>@L)pa4RqkckN5uI>c(azN$9IQcS#)p09^+4Mjm?huZP8og z7b|>iQ|q?B)6&aVU3w8ebI;DKG?!;;$_9tB)w?V_&Q6$A#?Cxb(n|dCyn^D#et$L0 zx^ki&wPt7Tn$T3PQ_!O+zHZA>-gQ2ggE%)FNaYY{I=N)hm9FqzlQ>Tpya-5JzCyx5 z!_8~0bxXyWug`*RnA$a|EM2g=E%S$j<-E4J4XQi%C%8+lXbEwu+AZOKPE_>$t*^1M zn+0A!WeKvpbaU!7vllNGF3l||i_f%)w4H0cqp(+Ee)$7~D-Vv86`F3C!)SD+M@x*0 zv!gNbKtzU^m59?qL(ZFbuHE@l^=kXxIPX(i6qh|MIazw(&+*BV|I~Z`p6q+;pWW^H zl~>Bk3nv$*|NPt{*Y@&Pwf#)4;J340TNLWA`lqqjhghaGMo%4#zWW)~~I`qq|Gk$XBT=vLEv!;iJa~V7gTfXm1 zV+NgQzn8>TVKKh$1%qW5XVxkZyYEpGIA-H~B5 z5Sig2Cf&x=m~dc*ffx_#gw{X>9}Pi47LRA2`^w`gXFXF>nOU?{ZPnBzYK^ab>#voV zWFM|6SNVDWckP+H^pz59XMTIew{2Z+)`#ef4*h_u^+1<0AxK6$ISXz9PyVdH?9IU4p4U_MYp!@TFQrRO0d8DHf?$ zu3DFdEix7rf2zH4s&?tgiHatB?riv8UEim=+QR)iZ_qVai|dV^2`{rxW-k)C2y{2yK4hu#Jkr^6dT&=Cl4m%ko}cSAZKj}cu$GSg!m#`Qs@S)l_`WF4KkRm#ZqfbSJAdu|k(Kk&??s=P z*w(L`F+a9S{G_vZi44BZvmxlgrc-fxP3(%P`MSYU=<&La7m z{eH|-PTudmZe`nNYqB@=exGZ}S*17noGKdp*x_a!q-E=I*4qlLh{D+}?j7p;hOe)c!#6|C#%z-u;zOpBd}Z z&M-OmqExortNm;mQ>AAyd}3u!K6UZ#+qGHMJ;uz}7cKT&;lgYbu;q{b`ehgHEZzHh z_C%)!!^Sx}roy)?WV;_9`d_&4u6=x!o$$#QXPyVXd_5zbQDE`c=@kbYzNYPL_1}H0R3M#y z=U<`h7TILeT#vf{dRv}dDV=_o>F(cCj{18mFa3=3eEZvP&4exS^?x_<1mrL<*WOb& za{kC4{c;h%*Gn&6%CKLrze8HV`})?4r537gIT7u{B(iJ!^xE~Ump}g6`t17W z{{M4+Z>ZcK>ApdYt31BqR)f-q!q-n#(oQU^*j%6+2$A7MX!>*^ls ze|qCLQ?7OTk^_5vPl|qyulE(P{(hqJR$SY&8wx>%mwvgFc~*+I^cBzf`z@Z?@2^w-aqGk0mU^xgOFr<0WA_i$;N>so-hX{Xv3lL~{SmwWR&RZE`ft_Nc=`23 z7wz}Idr|ajd)!g|*VmhG{r;L$7I*T;)b#8Re@j2z7yh-$zxK}bTc0*@g~zS!`gpo- z&Ak7wt{?fc>3&G)_g}BSo<8Zi`ir~u>i1t?i|k)Le|`PiSly~$*=w)vk9!*%=cLo~ zYjx|3r@KQIc>n17nps=)rE1^rs9AOK*Oz?#=3jp;?rPW8RdcK2!|V1uXCp!`5YOt@_%%Xn%ytPTiz;f0N_ePWtj*tGNDe)AmY5bgt{;tm zeO1SIwfBR`yN>P;&gZ>$edVRS;^9x!tY(~keZAEF?DZq-Yc~D*@{d#b5%U54I<-qa zybSOCAM}OUhtJsaGwR`4zK~6YkHc*D$L#<0&*zo(^}?OuG57YGRb78Avp#0^LDk!L zrXQUsd_m@5+tOVhBQD+AWn^{%n_qXQy9caki{irQwK%1QRS}>haOkEuc=CD&W_XWlPPW1 z|EDr(#c63tXM4W%hf8K`Ww|A{`i1=sWtEDwU(44e^>lCx8EW}`{2085bIxaWDZ}}* zdu7V@@A1=)FY(*@diwu>hsh1ATh?${F8?p+*Bn!D{A-u%tEluZcDw#;F{;~Dus?kM zy(MYe{(n)mx_RRGSK0lagk?JR3AcpBIp0ta>fpJ*JD_Ll!TTqFA3wx&zsWLUf1r5P z;;&U-W4$MTssHH2F?qT1ulIkBZl2dNfA6kY@87Kv5}W%ueD$X4`d1$UeDA$~cXfYc zWc~X60^xPG!yxy?5hpEOO%Q zJ1*sO>aJ!KOX2!uwKrl1YbP;+Wqg|s^%?hS(3v0K9A{bef%l*M57(ak@9K^7zrJRF&3c&^zb5>(XkhKXb^m9@{j`rQ zzyEht;niQCl?t_HR=)0>EgiM1w8?f$0OW6M4t6490McPJfjakR9X7t_VSNz{vAKmZM?*|1|dCw`Ip}p~vecj%2 zQ>pd8Ht+lW{)2~8rR4SOm4C%{AJy02wL$f+_^<7+FBP9Xdu#63N9O&KW&iaPj)nbG zfAY3^$&d3~DU0mGR==FSerf*oB|G!4me%c>eB{}lr&&p>cw1Nhw2%88x6V_0b=>Ox zF-v#q?b(;F|N6n^Nj=4fEKA>9U3mSyeDD8)BMS3gALRSA!eIL=b%UNv!(uU}__)35 zYirl-42#+H>uRC){q?E;fAn(~yH!Nlh~HRv({`(F_AcGnAi_i-gF{|J3|oo@C1OVyv*_aC(M%9(dfztea9OVsIKKa@}0-(TBgop~v0vijG( zRjSo%yS7&D71REb#xXDZ|HE^Vt`qJ^IGot{`stUDrai~(|2y62_~JhKn%39#*8=4> z)>R$uD*pdenV-kAze#fc>Cee(_NPK0vtOv!FaOzn_Ii^MZ%+ygZ;yUny2!WXkMY7& zr_LPSV_+Xwys^sj_eUnZfb&Kj2m7a8uKGMRc-#JOUP_16n~WY`Gpk$kH#JDOJZxj# zUi~T84H($#iZ*zs+83t3-@Wg}{$JO3OJv8bSs(X)>pbuEQB(Hc z`ud`Wvwq$ClaDuby^UG4`_0$Cn_hpqbU)?Q`Pb}E?mc_-_5Gb`PEX(caf>Q`HhLV>PrkSsHM@G+?@7@&X6ALi{vR&*VypRt+nX8~7}zWw zXIb-|Vy$18d%{1wc5V6g|L+C=ZM_>c=|TJd|FQLxKmY$9dieF;`1`Nd@1MN%{mY2j z?0s8Tum4(UoBj3lSEKONcUmgk*TxDxnc*Jc`gnirx9*^_^?g5={(ADwb!yckt?6H{ z?As6~IO$DC_Cv{6OFlN&q&x_jfA#*f-SJ^sS6^S>ANRLGY+Q>QU_M7aqUlaOzr7cg@ z7uDASQD34ay_cGL(4hZD|KzQwcmA#Z+WmF)*6S?w6J|b`$obv;j_)<`e>+1q#`wg2 z_k1W=^jdY%O#N$%Y=fCUrYu*A`K#Zt;>RI}KaI>$U)Qr(m)(slt^G4~dS&USzn`wJ zuiL9WJ7cfj$`9^!72(hJ{uh-$^4aLV{<>FA=N$JJxbJ=X+xXs|IcaOAUh#FDTXg-Z z$KJS2f3tS}yS}sY-LBLBR~EkR;j0rr<5D4ImD2Zw?f8~?3nE;OE~?r8y7&l`ccXagUEhhm+p+6z)sDWp`p$LNbc@1| zl!PA*Z*o#@`zxIK^*yU#anP2FJWR{;_n1UF@g&Gsf4F0#SLc1PbN+EzTiY2kO7c#~ zpWJKxTR68_v}((-J*KOC0)xfB&ztZ1F8(Hau>4OWm39>;hn6V4prY%mubzB$|DNxU z!@>)fz4h&VB^{(0?9I5l^UpVfQ~QrOrHL-)&|3O_`g)l+Cwm(k7TDSQZCPzZmGuG z^9ey1E@u-5im;RrWInMn#RX|{~ z+K-vfCwG2Z8vE+kmezX@nm48@{+%vJz%P;cL|zxvP4of|LgU!{L- z9@od6(e}sMVloYvzRCZYc+~j(>xV7r*N--FSpP76dHTJ3RczAl>DP}<&K0!3_-o#d zbtyh=E^p<3JaYE)-2N}xMg7BO|NKA4o75-&`Okg$L*w?!eXl<*aoblsr_a7^^2$5P z;W7Jlt8Bye?X3S1KQ;fiYWUu=3H{uEtP_$Hl1yFpt#xLOyP@=O$5yGwJa!zdTqbUzS)>(_eH#NGA*r`tn?szkEZvhxYFo%ZHzmfDtJ%k2+IBM zVo@&7ve(w+rfnaWsX{iI3-_r)w-cjD}quk}$=!W6&nHe2O; z@#(#(*Ldx8?u3O&|6aW>a_i~uuf!IsJw3P2#4cp&l>eQ#w@)`+r}aH){`Z%1`(B82 z3D*B+J$jm7z5dYMvlqT?Z2qP2_3){ln>N>89!)t{X4~Dr;MdYyyHEFj4Xe6;Z&UR7 z=TGNf-tSaB=bql)e8c+dX?Op<&)X^x{GYGj@4we2QGce!znHW((BO|m;i1>%*KeD? zK51hYdTML5XZ=DwmR~!pUU%&J;BzgMrX-stuI z`l_XGWq(BNb>12FD{k^XudOTfHq868`^Nhi<0W6e+Pi-Z6W`Twfr~l1`GNX>?V=~& zzy6Nh?^1f-cJk-{U*E>x6PiAC|L^d>)%&h5+4_2?ZdUZ4Wsg0~e_2~kda{4%D^A_D z^?hmuPxfzJucj2oT3UYTtlA>igr#v>*75;v(f4;|yRCj#wQSdxsEyv`xAuNCfBGW- z!*u;Rf$d+vuB?rU4&NQsvGvNn^{Q)jN37WY`=7vRcdL(ql_FuF1F52R)@|*I{IT`L z(pUP2*7?tkYTB>goAh4#&iBVJ6lNY=B_{3DKU>o0nV)WAD(jV3=ii-QF>lpB?oE1D zGXuT1?)zG3De4w1nqj?l`s@3v^{y{_qhAmf`1LB8>EUfU6&DX0 z|8g^PliiV|o!w$8#ZtNQ`@7%L_V-L?zOE{_-!!do`=sW>x)~k3su_>m^MB1-+|WAz z_n+F?<#*r4C#kRInlH(|zHagEMaLMK?q1-OkNh6>@xS=r=(}0#y8nN=TanMQp21|z zqF1H=z7`x~u9u6DRl2upj#R~eos+KLxK2pkUHS3J9)X{0gzWRrA6)h3#QVhsYUVd? zIx}9o7%jKL<6-)C|2N84KVC~{F0OR{XGwamd5-R|;En8@S^56!x~IFouC&=B zEEo5Pb&8l%nCk1l%fdfCaLLzju>QyCoh=u_!L{qV>^TXMFA=J{4*Y!{z$F)^^zMU) zN8!;udk^M)fzxXs;B+8+uuGTcxi3+q78c5r^CZ_1iw%Jf41DE=AM83 zhnedZit{y`vHy5=TcVlGnl{6^Do)3Xrm{2DKfb^J?7yT;TU)-H*4dw0k1x0Xnta{A zOSdmgzh-|^vCY3bQeR$m{(jxBWBt}sXlDukEcto+ZkO&3@=~oo7<+ry#k+zB81xsH zt@Ermt$G@+=Vf16p3(b5>CC|m`|oe^zWg@!dtjCN{#QZI|13FOyJSbfrfsIK|M(x? z3Ea3?R%T(NQPdx?_eC35ym@di_rW(ZS~hL`@CN22F84kd(&2ay=~0|rP?~1UHTo%cX6hs zR;+z{=k)*T-6He$hR%C`tmMDzh9Cac8vF9~zrK2Y%S^WA@iO_jEfxvpC;uK4mz_NQ zp=1Ett&fd`JUj+hbx%m}A5A{MaiZ`f`}%{dZ4!}{vVu}Bd*-ss+32iafBN;Sw`c!x zEcP&0IT%-{{p06j{YB+gUylB*-{Ni`zCO&d>}QR_qh|fp|6e_tdN@jT{p}!~J*`t+ z?OQkNrHRauuD^bo-C@)J_{q=z?U{c45ogK0r#e-!IU85F7Nu5b&*Plhv*y70WmQI( zf_@z}UwkyK^HR0otkBJV^+StEd;zFu^$OW9P#wU9e|@nt_ItRXX6No{&%TuJo*w%8>#0qa@7{k?`XtrB zs3$U^M}(WLnK4-*B2Pq_mF=M8h5#2qPL3n1C+&Ry?&O_`#k2L6rW{!=%3EMp{x|!p z{@EAv9qq#&{alc0w(mt%cf<#Mzq)j@vi<>!#RuH#DVJA@-(<`6>dr1Kk)hS)Ax%Gw-(G< z9CK-HKvd7oq$_v3;#8xK$8>s!>Ly%V``*fCa(%Egc)+(uLrjRX*)i$B3@cz=1C_DZism!E3qcWhJJaG7=k9->^V%}fb1K*#BFwlXFvMyQE$@ir|KnBk%1_Ik~=>c7P{QQ1@0 zZ8SY@J^j@BfOv+gKgz%IFI`%>q*rgX`Ca`F8;ufG3k|E(}#o=|;x;$H=G?yE{nNvLs&mQJl{0Y&psw)`}_a%zTDsKGDli!NmE#I)0DtZOAl>XFLh|?_KVJ^mnA%4 z)DoGZqarQEa;R}Zh>nO*E7Kx}2?1In60J@ul%n z?T&`u&)@t{h1ErOT;25O+V$n_t}%KX6Q0iL+vFp2?%me?=c4*IPl}LZ-|xTY#EE4W z9&$&CbsK)4XX++byCqyHC^?gz(I8~PJ24^FR>s7D90L(sX>!+`;t-X9;`O{NzXRX#n zJId~^Z+g-VkZy1;12P*zJ|4H-TOU0mx?{&U3Z$f zY2281BuysA`BC`2+P|#P$)~5z(z?v$e(CzzhdEid#H(IKe{;FIJ6hwVi29D_ZLMeL zxi_uiteW$v&~I9#-t;LhL4Gc#pvk0}|3o!(?g|FoH$OIY>ahl$R>fNjD*C4|?lk=L zy=<28{#B<=uyP(XyJoVbmL*TipI0nXb)U)e83jJo*L9d3ibEnJjrXj-B&(r&@WUlO zrO*9}K|a4HMvEHQMWvNl_iuBmI#+bh$5@K*^tvGL8pr7!jAt}Xcw9T6mn9gbA{gU* z;>;}vFCUK!0%!9!G4?K&o>pusT%9@R&SBAXi~MOCEDSi{D`6|S$ z?VQmLKL!St6vtUM{9LTH3-#+Z)~yR&w?FRPy7-7CTi;*(9n`%4+WsK(*Wy{;Ti?Cz z|MuVYJ!iODe7NeW&Y%B(sr$aZ{wieo($!__%-H93E1V(}_3yOo zQqA{YHJ*6d*WWYC|F61j`>W@PJI}=5`K58lfAgn*<=b}bST%L-+S2RmReynHh8cW+C#@v_}ox7r*{*AnQ>s9pM?7ZvQ@vHT(o_~FHhWFcl zOTH%U%~HO)^y~gNQ+M=-Pkuj5>5;8y@2`0~b&l27uMMpGugE5BQxyL?*qiIu%~O8w zs%O_Z+f98`E4FGye{7j^;o}qcT~Doxo_{4g^?N*$ZuWY?{qg?Eym*Tn}{Bh6d z@qDrO*VbQ8>w2C)TIP36rj2_&%jG50-^^un)Ss{^IPUD}e}&nHJ)hKHoUz~J*d@0B z2YcJPBQh=qR~C0Z{;ztyJ)<;d=dQ<0g#|};i|GH@&Frg^($>?c`tUX2BPya@3UG)E+VEfea4td1zw){`&e4=dMP6oK^oizKxUT`>e;k#V@s0j!$r~kINGC`Qg7Ut?=vx>u^zb z>7MjMG3?9gI7_-eF0nrQH05{n|5K_N>c5o#?ftd??45mE)tiL#z6M=yJ@q1&r&lC1 zCf09R!EgPa1v5A9JISNTdsg;eMC{bRPycUalfE5MsP;O(kH0yg;nF(kZL{aL{t7KO zeWYD&Yk$IX)xQf@9)9rf<&E1TG^26mA*Sw`9j321R$jjGXKBfe$l7}0%80o$XZ#Tk zO1AQS_g;N%Rg=-ZV40oS*SPnG)?ZtFa`%ulWj{qLT> zJM@0dPMdx6H(KBA^M6Pm*C{_}o!YtQDpEyh3ptjP^} zEBd^D_xs&Z{iXFyMVWj4Cw{(uSZ{H{;k&nl*Sc9lfmQ1SN%hOudiEM@ay}lyMhM}|N7>y z|9btuU(@&Pj+f1Tzi;X5pWCLdpSEsO=#C5bWxwt3@DuYV*cNe|Y*O!azi%>eXuzhzoXT1Kx`|0aXEVS6ikz>B-)t#7!41pQK43n(X z*RTKkYx}F`*n9Y=*QdB%Qn5=vHNfI*LBZ!Km8pqo>emW8mDdc*QCAKU)MeV=EfFk#3TD} z>#wh=_v=erN>3jCZ~LG{{qI}-x~+BVMf~Ug+S>kWZ{_PRoxb|7w_Yvw&e(MQzv!2( z>VhBTP3Cn@nbsY@$o78P8_Bo7zQ2w?nf3ACrZ*~)9{cCTH_4T$tUmPjR?*+U&|3BX zmZFkzAO1hl`TVbHHb4LUNiV*>=zI5Z_n|tDw?7QIPnHUo$(-YJu(~QFm>&P<>n9Bb z!`_2ipS+s6_j_@CQStj<`>wC%Z^`<-N7=Q@pJ#&Alqamr*N^Fyy*}sq?U;u-_vDMI z=BsyCUs70CA2xa|ZsYDFNxHIqPrhZc znVo#S{$dR0{u4eqF85E`nz3!lH~Y9K@AZkwm;0Tx)xzWn6Mr`sjiAe{6cWZC^a+5A_f8{{$`l+O=+N(T`tS-?@hWNR7Pz zTJ&?>roUM}Q@gJHuU~C%o2>F}f8f(oQni(DeEKH;J0G92yl2vnsp~$v+?(|N{F~JH z?7LU5t-cyEYjxe)xK5FIzume&Uq8#duhTy~H1?;)rdj{vj_Ul|TdEztzHHNaw}QL> zj9SE4;U_n_jqYZ^WK?@%GC5AJr~>sUG*Y$~$5w=Q_0=J5EJ(f3Cc~J7WEn zl{J6w>xb70?Y$^_e|6aEsTIk-Qs$RJ0&h=PoZc3)`ijfZyNq{IwCmSKe|=lE*FXHv z>Fc+?M}2>N_131pSud|PZ+(Aleem^mLEHM`T+5!ixV8cY{*y%=l`b=%@)t7$PZuz@t!tzS66|rv7BfrXLlTHrBX*sNxec`?G6e%YKu7{RguS z7%cJs7VOU^ux#$nWr9`_CtF{9_5Y|Meez|w>)v12*LuF#x<6>roX)zK_+@(WdhwHl z|7IzjacE{$I`Z^J?DRt)U)7$`UdiLuKL3BJ#UTNhr>D>od!8& z-Jkr|iLL33%haF*{vuwU37^yAuNTWnPWu1R;z{@m*)9L7{%J|E_|9AZ^+i+FzisiK z3RxCUmc5mt*fC+r5Et3vUt?=G( zyj?j_n4SHPb+&U+8!LP8vE#0yrZXJeUEBY6y<~e7X*5m2=tJzGIeE%)3b#tkZd)DR zID4iaN8hZS>8q+9t`bS)jyU@M^(j@Ig2cU+e{Keaft8m<0tIjeGjqhR-frJ^Nso* zllS8LULQ0y+hFYLmAp4}@@&g$megY>?2EsfFFJZ_u4?QvNfuxJ*E){7KFiwv%f0r# zZfouL$+!RQxijVO)AO&6uG_yQZolE&`MWO6zxZmSWlft*ZjJT#Nj#$uZc}dq0|V1l$62=hS**2x6?5wL-nxEkqJ8-8 zw^@I)*2Ra{tqc9Pt!`)BYW=IP{;7J*dp-UC>Z{_P|LqLduhqITZ|kd_o)0EZmycar zwzg0X-3`BWwdmivy1n|XWh<{u z+plu}hu*uZ7w-hGP}w$LJbiin@h?B-URrJEulmpX0S3erXY+D@Kqo0YGh5G z{xV8P|Nr%s!C$XuovjwWdi{Ldx}Vuz^>O=y)K_gui+c5b>&ka~qnv8~Utir<8|z({ zB&+;C=A{0k|51Ol*0)&nJxRZNU0l5O{^~E^Pae^IER;qIs3{<%&+@AB>0ldi9;Hn_wYBzApp-kN*R_UGsNCL!Ce zUy6>`eAu)7^xeDpS082Hi}Afa#r5@PvE#`)fV4*UXRpy4t_V$-Pfbv&O7+Ug^))<45m#>)kmS z-(xCrL1OabV`fWtZ{YKv|NHhN;prED{fRBv9JsXhep`;eHCGt($$GD~8SQ^x{Xd=g z^|k)dcmKk8A3vUGZsM%Sy1rAEkFg-LY5qkCecL*l8UMrVor>1De@T0PSZn2S);vo=4Zz66H}*N?_Rvv>s!6;f8K~&Cts}d>{zq(gHzJ~xB1uC zh5pym{OI9T8@hXb=&G-WqGOZ{rH-UFFltGnu3anlX+4YEx%%LJQ{yY zi#mT~`H$(l_Qrib)Espwre>vH`l&>{Lk)3HzuNU#)tFl6FS+$f-TnMmQR!uTiyw3K z$Q!ri22NzY^3vri`wH!tiCy3FHZPd%c%u5b^tS9RDISjN+GBHSj}$*mdAqFBuuuQt zoj)bof%RvM_pB=vHrnEQae=U_HGAWt#e3iQshyrFFkrC?BH3?dtVnVdEd5bUD~Sr z$m!v#o(Hb|Kk(PSJ5cSdXZ!v|6}9){L8V$a>@5UyQe%~tbEd5KgC9R&Lxf;M}?0DM|&^$Y_n1J_C{@E z?~@;IPkQ6M?f9o9y6Fku&q?mz7XIV)9k1r!Q?BpP*N{)Vb^qk+^)BB}JZF@Za9)`3 zlJmZ`P4%TWb3%IaH)PM_x-nZ+N8yL?ZFWX2ksdD*VJ?lE@y?(3AN;ZZ)1%~F@1FdMvQzo~?|kmcL-%V; z-v6KZ>;9vPs-oCgJEht9AHU?9YoY1NC*!d1|FSK*lJ(nawj{3p)%WXM)BfheRnfMO z|98$1I>`1tK50q(?BkWoH_ixN|NhS1jWfDp%hd#5UvstI{~+I6tbTdC*4^BjNrg8n zp06+8&KJT`bS1>cWzU&MweM1oS$+(9Y!?{UFS*HP(Z1B$R{s_i_dx5&H`7%2Z&l23 zytg7|^5&NEn-wctCrxsaFS(N44X=pV)JK>*c)x zpYDLx+fP^~!Y#{maKeR%JTWegL(PE-Q=HPvqt46ReH8cHR`uOeeXUQcy;U|IvA@3R z$GL5P>yK}Lu6F9@(|VrQU(JIq?(g55!hiMuk8dv)&Mf*r@!zZku0?y5iyynyZ4eZ1 zUff~uFHbCE;o2Qnj{P+;Em<(@@Lu_yQ~qwB^Pfp1cKesCe_6qgQetDy=6v*)dp9$4 ztLW?AeKF5XE!Mf)27SCA`0L%Lo|2fw|NEoA$Uc9h`!wTEeM)ajQV(Z%8ue+t~ zZSmYM`{$ZIYt^~eHa?78Y<+GHTSoatyAQ|s7&SzCG{m@gK^r3?49rA1TN)DFP4hjwUw!Tve^NVV#Rk8ghLa?F0Cy)S2a;?8a+)eWzD8sBVLIGy85XZ7SaM|~zAkJ=g#_3u5Iu* zCH{K+-&_X?wvJsZ+r{^XSawfW{7|7De^zt>`@b*y_i-M}Qs(&Zp0;^l*16fk7`oD<4mP*f0(Yu(j1^F{x^7X7vJmu}r!*Z==#t@C$z znYUrv-c;}I>(T$0$Ndc|i`ShH^d&aFIQG}(MgODrN0#j@*S%>||Ecc(8SSG8mqS2&G*UJ<=bDhy7Rl!{Ua$au=WI*tqqt*8Po({2#7$_4L*~(^b#D)?DWw_k4Z$ zTJF`WcDP1G|BdMU`m#3Z{qyOiySJ{o8ymK^Y^&hAR{R=(QYvESp8ZcnqfX#K*qr2bsE&P{&a{ z@ACd))!yn)lT%(j{rYNi=le;k&Ux=sm0t2C-224^@8VnH61kGQIRm~vJwIV)_Wi_3 z7m7+h?%qA+`;VCqQhT;leTuvPDf{Zm>_uCvzU*tYTDNC;LPoK}wi=1*oaO&7D((K$ zRCZ?i3zzsc!MB8C|2fPwhM{Qvn^`%h)xf76v5e?H!B z`vC?9CQrv%cKxeZiwgJu4{Nz@TlM|b-K%TM!*9KQ6t-*s|NsB4{=U7x_TP4^saF3P zIfQCH6(>)>zuxuNuC+;TlP>GrH$U_4-_=XEHa|J}{(tsYzZ&IGy`ZXBd#e&-EA($~ zKmGCU-1-&WEqPw^k4+FdDBgGE;LqfQ2V1WHI{RzS^!<;rCSThh{(jfjm3rPRDSL1I z6+O1^+0M#WrS;eTudUl^`tx;|f8PGDtF{^i)_-*?@P0Y*(K^lStyN!(qE|)jzxt4S z_ui|M!%zR(U%zg#ZE4-sTUWjY*8YDRv-8=7sfQ;`zP3N=op;Rsxcw2UFKT(a*8Tpe zeQots-ON8*vo%)FzxK3BMd+92>Lu&EMX&4Ly8b$R*HNW#*1vZTmpop7#v|`quRVwG zC)&JK2dRn%oYg5Yp2W^*X zgsPs#Ir&azW0}@J_5bPcCu_sk*G$!MpK>g@`;*xByH$t9!m55gtSO7JiVaHnY4mE% zn!3G3?{}!+*?yZ-6y=vFz>$h!f@83Ogw?gIJbJof;+j~X-#;5Jc zamu^+_4k6ieerIm;yFYv&bU?*X87#wr@u0tuR4xQ39s7cAE9I`ynL7c)!SeHw3nY; z((`0{Sj$!KUq@B1-d|){KKb&WIY-=W)-0QC;%{4b`rn`HlP|xXd||qh-e1YPr(diS zdi_@LeSQ4twCdB@SL@fT*#Gs~3jgxBjp1wSHg$YaUVrh!e%I!m@%y&NSM7{nvbH>K zWBj^6bMYbv1_q`|$65CLr&vpmEX=E||6lw6|9#)PRqx7Gh3{Pdb;>(x)~nwyPOZ&; zwm)v#tC`-ldZ8I}!@9M$7VrCbvtZ?~qxN51Z@t>7eLa5l^w-m0U#(xwRMBUudtiIn ztbav1YvMmYp7z`}wq!%V%jh#=^8YrLIDJ1ErFy^i&i7XjuKCsc4YJw$G3#;dANN;R zFMX}ry*2CllhsS>Uww|P+8h7+uCUFkX**_C+$o>pUnpg=J#Jan)#O!IcYnINRQBKc znDtB7U)`5!8#C?o)!%=s@?-t`|E)>dzx4Iaye$3A6N}GW_!trs`^UBF>t+9x# zv&3Ir?^s{qthVaq-`M*v_UbP#FVEPk^Q!9oz8As&HZA&p`N*%QKa`i7#n`Ll*PSoV zH?#k`>TBh@<$Yh*pUphHlJk$>)~(?Wx@XOv^7Y#4YrFsavFkkcNI}K=^y$gh_U`_= zQZKsuOSsYhSBc_vzyGLT6nk@m!&E!g<=4?)+M%oMT~9?%x;SgE=!x$2W*qa3zkjco z^Ka{~t-nlNDl|^>Ym3Xf)*lqq^*OTJ_4@O5%lrRk?NLjv*Wr@j+q2{OzyER5{<~H* zdh4&Rdlz$k^5NohcPndlevbRL^+$(8<=huU;|8W$Y(7b0-{7K2~@6RUG{crNkxRx zZr}asdtJ-F&gp4~%RPU;Tenv9b@%$Hzj1rxx7J_N-uLutwal(}olpO5|MS&`t+W2$ z$-^gh-&?q8lGv&e-37P)z38|3TKZyr*U{>5-+EzFt^K7hT>b9;*>~~QyLIcoR@TP7 zTbVNPom16Dm+PIP)%Rw7QL8`UUhCIy{dN7d7^$*jj#2 zwrW$>{eJKf!|cOMifS4*A5Y%7VDE=(J3TAw%M}u$?zemo{w-6qewXpKV@FHHtJjAwdaAQ3 z>z#l2{_x%R*FRCtTz>z>`uw;(0uhQAkA1nIovP|^#&Ov(Q_b~H|HlYCFyW~_pLzRh z{k8aY>#qexU4QjoXV-e~r^nj7j>vOP3_2Y3yYcD&J(U(G+mmV&`Xt0T?EY)axwu|* z!}Au~{8v3S3O-fqo_OwmD9+Wjkufgf_F|7^`PcQ&&OiTM?Zxs;y}fboa$P5LtDV{~ z`NzN9lo$K!J?Dg< zx3>$v4y`@;!h6X_re(imGxpd2TeGREe)IOcHJ`Sh=ic!0^@oRx(|D%G$$G}WG2Uc< z{fqLCr+;h8PNfB$;F%+OUKK<_4F+Yjt z?8^Lidj#EHhL+W}9(DbHZBM|FC+WNFH}96d-Z|OFcxsode^~aude^FV)sqihmzT2d zdiwFN1@{+^75n~uIqTUL&iyyoZhwn!znECh=~+$k|2Vu3tx^8e@mcnM+}^NdtHoE( zKao}bbMxQ$XJ#;VYlE@St5pGtO zLk=4sn2B<6g4*yQ8m8Q=PK^c=T+{@W%~#bfIh&Mn_jSasb4F7Qj|g5Ukz31t(9YfQ zS8eD2{Ygh}Z8bBkj`Fq(Nv>-&)vi-Tu6eGM(GVZQw3|H&HpN74^WuGLE4Pw(Ft{dU{Ce|L=SdF)re-M)Ii zvd_UMPapOFu{2?4GhHJ5dfVB~jJswv0Xi9c7FBLChc7St^gMIzdp!nC#?o54r2swibj-zxuv> z_tvD8`2TimkJ-dM(9Tqq>kMdJ>%e^B{QhlOVpC5}YyP2WcrxnR z!=NN$E%3*nF)drXvzW0V^N`Ad3A6Sd@OT?2;h`RPU{0nf({Igp>)XwCKb~{=ciE%e z=AX9wr<;<-1_%Nvi3_>cj)Pt{okH^f8Y0Ke}4Zw;yb(a_5O@a zPZSq+FL?fG|J$q2X0aZ8$z6Y5{fpf6#S1?D=BSfZ34DxdA~V!fq@|b+Hf+c+6H{VsS?IVS zKu3hTrAe*g$5(U9SLx=Fl`BiSt|jGpE?O5MXTSc>x&9yWe>cCLy#BsR?xcx--mg2q z_M`3D&)@y?i_Nrj-D5v<`M;a9fO+qZ)bUE7 zNbhohh{#7&vdm_Uh*^b-#bJV%^by4-9;BU;aIMXsdwv=Xl-6DTS0!kV4^0(dgzFwL4uA6SBsOQzyufH)7SN7 zgT=#lU70jhA}}s#$%E*{OYSedT(ke#|4E1M{nP)u_o~cEhwt&ZldE@?ddBY-n;%@K zX{l(#>{u3Lf|Ja=6I;U{d(B4+4@0 zJr}F|5%>MfzjBA(^BasRPuZ?{T-LBIUBKMr`(499Gx)x8GILXQliMomr4rUn1wO}I zHZS4*{z9Qqrz2?wD_{4f86UUs&ksKP`B%`My*5JUcpVK}+I)pgM4B8|U-`MFg?++` z&mMCspDE{l+h74&m%B=ghqdWp;({3_(%dbF8WR?PPoi)-=x8uSLs03{x$19cPL`U! zzBO~zO0U_WuUzKv-~99cMb&rzU)>+7&3{k4`Q-1yb8AI@zdSzwRc+q=&xd{oForL# zj<2@qYEAjRs%zf#*ZYl*PyK)6&yBTn7V2Az-~T$*u{CGroZpF0&&B)PEDn#|dPRsK zZc27~$e(Y@oLlnWil4O7PyVhN74I2WU9|c(GnLa%ZQ2$=7=-^%hf#x7~_d+OkBo zx$@4GKMx)-FfcJW&T zUtOBK-}}|^?9d}RyTi1v{$3yUH|qV@LwojB*@tPr>)f^Kf4J(tr^gQhre_B|2OLU>+7#B#r}F2x54v~#$(>v8Mout zH~v4awR_s%h)D+>RVPOq1$V8R{59*q^tiou@7#Yl>GbK-rY$e8uZ~_{R{mDz!Mkji zcdxH5KmPCflCLjzzyBvBGZ1_{3}}j;LS4s&q;bpzvn&Oe~_>GUi{zc zBY%|FyJqHpzHxTj-dloGtM%%`f9;uG=cHD9_j_Do#fjFVzossW`@c}Y`^nwkVY|c1 zS|?7Z>)ol|WHYa8)pg18Ut9Kdi`V|j4tf{p_y6<-=Sx#hzWDVn^_`@?di`df-<$qF zGP-GaSOOtDjB2rholv(Vgvg`tR*oB>U-yvewq8Ve8cwz1mw9v(ER){*zO6 zy_d&H*(_rEbu`vqec@GIVdq7;b!&eejI25-p{7~4w|4a%ZS{3a8yFZE|2occn4kr^ zAJMo?f1G!md;~e?tUuGZqNEO`S<^q(O=86gYML?J9;)e z>(jd(b?c57#jXo~b#>Jo`PIU|zW(~Muf;1%y>(@~%V`(!f9k8($L$Z(zS{LQ!9Q;I z`)84!R^N|=o__VQ{MOOBt?K$)1xt?{z4bJ%<7w1U&|OER=la=;s;25))6Cy#vZyv@ z_3rhWA~z>rEcXqq+Tyxe@7n4!PWz|5{XKhk*O$o9>n6Ka?~YmWF=fi@s!Q+py*`;A z`r=*aw~evKqPAzumHW5;NJ;jrNmaVxQ)aFdESNv-_kOq2Tg>+-DIS@<>(izDpRXUU ztW}fxI<-nCuv~rh-K*cc-~El)RA2vZ(|Yy9dEetYHCDT?zqU7JXRXlg)k{mm<8&^5 zi$CAFzC$B-TYW;hK=x0y{U1fsQf4il;-1iEXI!~5_UJENw_hqj_cPY&uZjJ9<U`k#LH`f8@1$24B?|9iUob7}U24qv_}o;hJtb?@xk5kKSHuW9Qd_C@Vnw`Z^U zucP-k@1Au=x%XL|#2Q!5Q@g_d zR*P;izqbDB^4;GwW}FXTU|_FyoaHFs#adJ)cI#KT_I15yTVLHxn)FZo_5c4T|Ng&! zYE9YN|Je`!{o38}^<{0U>2<-+A~W)Xu3!0jtXyIFUr}wFy{o6cI&!b$|0Mr^+&MWs zKd0TgA9r!{<)39s_V1l?Z1ozuxWX@823H*4t4(RzXOi4t{Y!Q_Z~3Qy_cM;`tT@^C z(Q5MRUt3?e?Adzs{;HDG|6jO=Tr!>?ws)D%I{wss7vuI;f6cqTdQyAc`?{$TOp;=g%H+&5?LXGuNt=SyM9 zs^?Ed5zKIWi{)ok{fKYum7xo=SNpLb@`-t{TJ?;n)kUabH8nz7V*9TlUCH%_ZbwLf^8 zV0XT=yZD7dy6tgxXLi*tqY(F#zjQtQ)z#f~cE2sXf23}&-K&4Qe{oDMnowETeD|E; z+S1&)LO%Z|&Dr?v$W+HW)8<&57TsSZm{DhcBeUdF|LMOK&8$2tUO)J^V)dQUS?*__ zOgFtOd--3>9oHjOY%#At9Wi0wC3gPMln*^iT2vZl{rq{R_iam5sdKLWi3k6l+D~!U zb}IcfZQ93E6Q`@M6RNs?X64?F^+C-a)24R6`ns2EQKn&p-H+-uPk+6Z^W$45%lXRS z_!j=9>-Jp|&62c!{q>u`pFa{)1*+?hO_u+$(em5(`>Xc(ui7$2z&|l``olL{>YjE@ zRuN`jDxY%cQhj>0qp{j)o0LnrXW((B)vvOm1Nj%iN6qC&mDeoy&VZ{PjFEBa2c3UBa@D|cHe zR7$*dF|Mr)?b~#Fc3zv1?2M_4C6-)Ow%YvdU}b&n!JK20)!x@^ebIk(SKWm*|2Ll6 zv-<56%VeYZiL0yEA6|R@?&z_p+I@S|M<);sVkPhwsN=|8pE_|t$zGBwJicV zeDAh=me*ajTW)E1&pgS8O?{_#eqJImS@f9J*XjFq1&b=)XI#r~|4m@UyqEv2=Bq6K zAAQ%;KDO}1%e(LX&OVsNwL0Hc<6rRO$!p8o4Axc~>xlnwI9RcD<3xD1{{%QYv{pD}FblA^>rh6(CyFD;y&ig&>>#8|E+>hjBk#boS6fYU^x=S|579Ra>axC1+kQ}X)nneKiq$FpW&5IS9@os*n?KFKj{oiJ z&Hjw{>(|$H-~J^(S8+x*Q~ZwU|9)FPXR(WnR^R`!>6Eg>58llE(^~E9+4eQ1Y?F5A z{kc#5Y=o+APKs;y&Dxhiny!7n*H%P7Roq_l@7hhN)Bn}_uU^{p{`{)<*55h09MAqq z>Etb)TYEjyM)!|VK`R@BA;dLDEt9LJb9+;%hHl4F0nOb(+5X$d;N_U>`(MbE=>Fu{nOyeFYDC5 zHB-tb<==KaJZJa*@>id|e|?SHTP>1(`E<|w7@6Xy<-$!COb{*=}p`f8Kk4^7R;Ry{H=RkNVT6@0ubozq~8^tLv+;FV{@Z{>hW% zUHkKf>X&KXX0ES4pZ;!Q&+5H_)jw@zw%W#j>S@lLzO-P(h47tBh_q6Y? zN2{;Ludmx1ufND^t!%&FLe8kln*HiqjCUVkU|@aiILmQD7Hd&(veMsQJ8ylh+M2ib z>#wi3zP{QUxAxc8s4tUN*OtWpi4WTTf4%4FxVr&jHZ z+Z#M#O4}o?cTx?Kw)Nuw|Ht>4WUsw5=hNLCVODo?&PQuDzLHnd@10p$sK2!MLs%SV zUHykwY)e~z?q6KvcJ}_;S68DphJV&CTl?#KrOp5MS65%(A2xaEOTBNa{|2<3d>j8& z+46pEli<#kJ67H?=H2dn=xdn4m(T&uqN-+J9W?fSa-Yg)U$7OkKB zv*h33tL?$FHvD_}Kh*Q6r@PB9oBdJK-e2AQI$S>L*Zog<>-Ju~d-dL{Q#<;@Rp;$Z zeg8`@_Fo{g?!EpbgStKP>spgOKb+%wO}l!v{Gx;E!eQ&e{{Q3fw6WP=CT+gI-0tw)$(;&f2WK)zj}B*~;+m>B1=9d*><~k5&KLee;6e*7Dn$)APdrw9Xf> z+gtR19Vbt=RQBuYn=2H*?c4V0?WbS1;p<|(ZNt05Z@qey((-%a`d7C^yx7iskP+It z7?qO?{=MJer1HP&ZFYT2{`7hLwiR9)PvdxarE3*_ z&XP)9Fva}+zXIp;%{AwP4sUW2uzzw^|DfmN$|^R#&ElUqxaLH~9ya)S>B|&D`>FQ- z`BKu{l(tw+)ra+j<*4}^R zn73Zw?vK0DXJ6aBW|8&#eXlP5{#vbm|A(I7${_hozB_6=mNXncdg!sMX=m%EsSkb_ zHS9YgsB*vVca49@$@zC2bG2S=sw;~v>w2`Pz{`E-3DphBl|TM8fBZGU{F%(m9k$1W zt-K#~qP zqLrIXFYa((aA@NVql+IWTEFysxqL^$(FwnlIc9IoUh{9q-SbbreLpHQV~<+WyuY_Q z8o%tj_(kB(e7o;2_N`v>|JAeJFL}-y`}U-tgxpXT$ksJ`ymt;TtB3*67=SXWIbw%r^s{EQ)E}i;& z`kR)6-=rp0OKku2X zSQKSl`*q1J{U)dM^vOT{>K*HUX?xc%dU`MZ|8JKo zlRp2nc_NViD*WWfCDp&y#LoD=FYfor-~U6)f9iY>->%;BZRJws{Vnfa{i)@={R?tY`bj!9!4FJ86LviqKA%6xR3xE_UAb0 zura_ul)I%xsrc=_&2@VohDsjtS{WFu>8pC&d1~$1kL%Cdv){ij4p+LSADf4CNVUYNj>V8-j}cAjB}&%x`C z&Cl=sjfnUCs=K#A%JPx!yLXHl$2=y4xY_Ti`hIiASNV3^JFowiu8^pPEM*7Arw<8*i5|EbqHmx`QNC|t8=1y8`M#kub;K4)$KdtQu-wUsepMnr*_ zNL$lEhYb^aRD}7M4o$RhQBmf4T6pj0qt}yOsm$aINr((&)lSl%ub8*)gO$+|q5skwKi&D}uW`GBCp|6y z?Y?#AZ~cBJU3Yo6zxs^AkL%tZePI}W{4+~psfx?{SEhFMs}=hF*F4zUpto*PhkWl6 z!(YpufCiK&=%{eBG97GK5D_3I#o7!x@)k6*+^}JRpOQe!q327Jug|^r)Hmm~R#$elr{;f~Q^T&v!;+}!x>(!6bn*(@Gg}Ql-DSwIa_iydIo{pBFznxX561}; zyYKz9(Y-Ngq3%iTig=3!1{=cP>a4QS_|$j+v|3L`L|KgKNWq2xKQS)WwuKHGLNp|V zS&uAKwA8F?(C~RkJhEVubQ(gn@?qOJbs;d@Xe{VQ?tan z1@^zBz!PJe!XGw0#{2e;Mawzbt3#a}8g|5<+JU;Tyvu`}Px zJPn?!Zv1>I^!Cl=PnO1+zG609eADQcMM!P*cE?LGulDc%y!ZFJ*t*G1*7rC0G++Ot z|Mhjh-u>u}zmEU-<-axb$NUXTju;>C<$7=Bcu?}xVVNHx_xipu_dRSC(Nl`i{r2vl z{*|%^!6gN|Z0nd`!96m<}~;$S@F7Vr^wiQi#wH@;RUXTIh&))q&Z^7-qJ9SxI;Km8U7b!t&MEnAb` z=B2b$_{yW-g}f)y_7o{R6R zu{wFSrs!JX?9;Akn^Mndy($q-f4yJtg-vl&M)Y2<%X=E)Yv-FRD4Dszrel8Q=U>aG zzG(H6>IxD4{&iv&kEWyJbkAdX@8{brFTbbt^L6!!4YTu_bEf@!nJ%5)|7i6RDZYP( z2k*Vq<_ipwz0)^q|5Ve!6;E$Asw!Uo_Urso28Laec1Hd-lv%%Xe&ErU@r}Y3pS83R!ca^poESeP-Q#8`p?;tE@Q84chiDG9gDqScvIh!-(-N(1 z4+9psw2Aj-&p!9_^X_$ZyH{_y7vtl5YWfmCwt3fW9-3tMRYkvhEPwI05ce;u%NMMY z9mL*G_S)kWu>odA$GmcE1O&**l*z z&8s+E_jc*+6$--B{oXn?aG&7cxTSn&Ls-ndyK%4Q_n$Z<_@>f8{LhZ4@1mEFPgo{j zTy!Ahy4&xs$2RX=emHbu*`{aTAIfDYzx(*2xq*R!b&KOHCxI@|);&*;HRYey*7*DE z4ZmOdI`;0rO>e84Cm)*oM&N~p!jF6Br(eIalz;iUb?bM3&3dx>U3+CC07k z@~n5yTN55|pgp5khcD}BhGN*>`&x&8YW=;g-?4gq#QyNBd+u?hF8AxM`Tly{)k(#7 z>zYclW6w^1wWjFqlkfjoRC>29c$72G^qbJ)DesF`1m9h>H#RWL`pdrcHSVRww>I9` z7i+oqpwZ4}uYY}i-#Kx<`u#7$HCmUArk(F@xhFk-r)8?PBy&Tr=1K4O*Ir02y3-J`v1ZH1_(L;8 z0v78^n9V=VGdV_MdxcZ$G^cYrXEx28)O5O7sC@nWLo@Yu>R-&OS{twQ<=)!+X=*|n zs=40PM*Hu5{bBCBZ_nd*lq{Bue7TF$`~5nLtGcK6zp7cBcI)x~BNEd(((}%=sIRK} zKg;Cy&-!?lu+)aK*se`~e?EA(`t|D7?)6uHP2(3{GrwcU?AjEyB`0lf{Si2LBmL`- znBK(N%U7FQEjF}XlXjV$WdCDVgY(q|8y>0L^Y8zt6lovAtNV}ht?7v`xe}#6)C^gl zC2er2BEthe@Q*_wk}To zrM>E)WRqFZc_R+(DNP;AT=hSRZT+EVxnjD%H(zn|?l*CEzv{k~XIb2Cn`9x7vWLl= z=e7Qx?O!6ow&lqP^Bry6z5ehS=`XvQKJj(0|K#@mhHGNC^PTVEzwD#FFU^zs9CbD_ zu?YxSps<#oR;mOgBGWuFoMU+wOTtMPN14Enh*hI;L7={x&p z=l$9z|K99V4vCxoH*D3Hr&B^EJo)th^w)pu-z+V=zxn#dg&z`g)T>_JPggoNp?>AZ z$a<-z_wK)$bol+IqFJ%C>`p$ZyE{Ade%6;wMbq6RGIxK!eS7=%$tO>~SbOW#UeW6{ z%X@AY$G_j&z1Q_?PV&vYQBQt-`qA%nf6u)6U)IdK(K6{nt5-lSFf#}mZzGuJIwlx?Wf=!JCx`%j@#c+mCo z(xTX?hxcE{y*hr@zIDX}X~PR4c0qUjLi8W$X*qvU*0h9w+b3|67?R>2`2VQ@-naJ^zTEY0st~ov^>RS2p26`VV!r1IKdYitA*AE%Q49C`MiF+SC6}jo+R#;rq{xT)%dGIM){Cvd@xh$&UJ}>FYHw zmsvhvr?j{FYv#${{|tf!>p6_4y$iGOd8(UzGsVo=`8|`_UTNXl$Me3{*FO35w(67U z#uIbp4)U+^%l)-$$LyH>uCKqo{+ji*>rLAxqmp+Y!bN}n`}Eze!upMR_1nGGpDr%j z;1hdaSI7Kj&z=9fH&0F5_JaSL#OJ`KS@A1>vY#_3xN?5eN$pS3uCL!l@4vqEYx>%% zZL5D9?szF&+qU#?;+Ng~-|VS5HvRhZbx->*Kh=zI&Cj^?Mo8HHwfg+fRK2H7`wqE3 z-*jP_ZJf$Ai^2=7(Hp)mzZjFZxpVUMC6Dxevna)WGktx(_WS)r!>F4|`*Xg8-{d{} z{eD%|<-fDcGUhyHPrX#TPiu`icYydNVW(TiGPIl}+w+D0U!Q0DI`;dm$K|^pt55p< zM(|9|me^LS$-0)ddwo}{o9uoQnD_48-)X@Ds~UcW9TiyP|2V_?)-+DGyD!5ocKko# z8R)KVrqox*Y>yZ_qn_m`+Y5q;WA{xAM|XWx>neZLjDHotL9 z`+ND#tn8ba`}h6+qpxD5GJ8Lpon(HXnW+Ef%DSUBzIZL3R9e3&)Ba@juXRhezPfV! zuB_Mp)pu71&KjX)=jB)9 z>&3gmu3Hr}P0y-wuvuF%xfFaE`sled1;jB#HV zzb-Ior@^r=(}OOWW__yKXumF~+R$xv-Jidi;fqe+nDw9c@9M8FO~YqjU-duk-JeMI zV_xh5FD|TGefDJUdKT66-sybnFK$@9-u3m>s<=qos11F4#e=Hm>Yvc6aqwkf{%9s~ zoIN2W@ckFvWjkx5_sYb~T2zw0DRr_^rI*k5vRBjgCjG76yK9$P+4Ol!kNp1;KSzM| zl7%**-Vc7VHoA zd@=Qr-I3F}=fg!y;_pA%;>PrTr{(UyV%qw@{!ivry>KPp@O#PpN&iK1YM*l^Ys5dB z5Z04ttA2gaTk+*P=DS=k`lTXdeLmi2x5JX}e=qFWUZw6@Wp{du@v7@lb$_eg23@LC zJ{4neZl~S<*LPKR?|UmAcK_YCiCORO{fRh}eoOmE{tcDqXT)E4?-x^2uPiQ#y;~)I z^>yg0PgMu&_WtDMo3Z@#>aP`P#ntQkf?wZ_|M{vj`^nd-@2i{MT-*8C?CFBv-KX!JerNyF z_w#qm{#zPu9>03Zx+PyT*R2C><6)L`oaH>>6>I6yKXVtF-mm|D|L@=bzm9(W|Nr}~ z>#xQCypCU6fB*mQ@7M47umAr)eEpjJ*LFwk-IupCN;MZN0MoP0*F;CEcM>*UxNySrdNt*WPz~r~gwoegA8F!mgK*d*?^hw0M_i ztTC%PX7c=6@Ur-D&s%$AtG3rGeSdwg<*MngtuNyRQ~&?+pX~Mf{f_RiRlCDhg|2=U zwR(NjmoN&6XuEXD|F+W#3MQ;n*czq+|&o3oPkL&i{ zzrW~e?ETs!Uy_u|R_w2z@KUVDZtn??A19ywuT8l3_uES6gqG#{UL|j<_O$i9czHSh z-7Wi{x|ym!kF0-eZWJ4Q_22rdDxZEV3`*;1WQua}%sLeu7<)d=`sUrgi%RBfs`~P$ z?Xebed+${{X)8LUmCgYJ^xK1EXI~^@xR<%`~GE?Ec;>>6s3IjMc@6s zQKwG-y&Jzabl!fx&ChQs^ZtoH;{W19)YtWz`Fh&>lXOG+-}#F_b`w7TOvR>X-}6fv zSLE!?&$^`CTd%rMJ$swwYt`%1S808J{&c-%NJU@$-=zEZQ^c1&`{0v3wf9Aucl5WJ z>lXc7_xr8r?zdTc#h2{AzCVoX$rt7IXQHhB7VW(n1X*P?87r;2_OU0fvUfgdy6@qQ z|1sq?e^j~mg;!*JyrNUsUmE@E{<_cadusG;g>~L>-?^@Tb4kM4cMn>te07=`zU$9o zKEL(t&EWP096CA=gFAok_&D`t&gVyazl+V-CVKeNyZ&2M{&z0^m1jL{m=}99=tl$F zMdk@P|L3KgnrnVx&dccz0%yWB@7{c8vGePnM|GEe-cU5Ys4l>@+k4ZysL7X4KKyzy zu9|u61JU)-cp9|v!`K0{3Bg5K$!%RL{lzq?-_vHL~alxAPmIUFzMuy4BF+<0GsfA!Q4TNghT zI{rf{>;3EB=j1%C(|qxzJ|+2Gt#D<>6~)_+(jT6--G1Uu zzyF4~KNDEC#1-3oPF|J!XMysaUl+n-9S<;SiA?Ykk!EE&Xt3dpftXP5p@Rh*0<=^( zTb)2_UeyG(tgqay{+)mO^SqN$Gb8mw&pyrbT&cH1W_Re%9rq2NJwF`ZU;gacZ8LMZ z+WV_)b2{a9wtby)WnB;Nf=)Y*S0D9CziDrLw`ty<+YSGfg;cBFa zYrm5N9p&P=S#Juz=)J74>zCutRrTK|R2~y7TEBnwxt=78^*_v}vOYaAse^GrbmQ40 zrKX#-bjs!4|GIy@-uU}o_y6bSzgzqD=InU)&2qnAO;Y(`a`oT+RaNPSGG!mkcxh^; z(qvRid>QCKcY=axH1jUEdhLwk;2=`5K(A_xm<-eIil!|=-TrhFSAL4ES@3Yp zf9vA7k25x}irW+yQ>^<&X8rDog16p0K6TvkPv1L%>pyJoo}YW?@B6gkwa0_fTCYZ) zRBM!2Z?Jm->x|9?lB>4wn0RvCCxgO=vlWW}`nfDvWpZ9^{$qpAaBUrhMRy-R;Xila z0i&A8gf>v59^SAaM8i~=)#;$aiU2nitrn+;g7aj`!{2|sJ@;#A`I={2cCAX~GYxxD zUaxX@yZwd2v-cgAFJC7+U3R%ZvR+g_}*21 zF`M`;b&W6elkOF$B5=ifZOVpA0H@ zuzZ#H>GtN$ddAQ%R+lTzU)j6pU{(Ik`d@dyy!I`$`&pjw^Jl|4eu?-;%%^1-1zL@p zcW+=(ys}fvwNh?3BWONgLXIdG$DxM_2VxAQxFtc!G(^LdTaf9Yzz&ym%j?E9zbp2Y zA9__?JXK|3mqmAh#eRu<72?IQ8S;shwU)0%1#pzj4Sq~iJz`5 zXRzDq!1I6qe$F4c@l_{G0|M*VF8F%ijyf1OF|{pe(!J|De>V4SW^Q0q5t*VPD$L4s z7<7rF6t^JL!9s%=9TBNcmO}+AT(ktGkDj~xyZV*i=k8V2`C6~E(u{+{3Kx8@_pa49 zyZb5Q&Q|sb zuSHGtOXZed`xaj~Rd{szi#YcCcDK5xF#Wi#GVLZyZ`G>oYP}PA?+;)13Si&lz@*Fb z@#4N;CMzzPezn%%`n4_N_PM9~Ty;O)`XHKlZ6?R{soI5c6WJLU*bh3+auMWWEn2?* z!oIgrYpdRGJ@sP!zW;H5tEWGG8&UuN|NE`UTMPeR-Tk*(a{8XRdf}zDYs0C*m>kuOhZze)8eh^?2d+>vO&b_MHAdd!1Ul-*cBZ^{?+cpT4dC^{ZXW zXWw*9tuX7b((+%g_w9?`;4QD@P{U!``0viU{PpQCdh9rgnr=>XOA-0MTDB>B%flt- zjFTIF|BGPT^Xc>dwB7HncDm~R(iSbhbv6Fh*Qzh~))|HUcb@MbY5Vuh;eytazpp=N zFUwzl-&Lvl)zpn528}Xvw`-txi<7bO!76qt1 zyt9A9eb4GoOEXON$9-pY^BXItuycKzw=Svp!ttUGEZQKr$g3-my7@H{mm2e zq*6b=U2^%AA9>4`IsNwV$#RZeu(Culjs17Ug!~7c-0!8|?{8aFE|-zgJ?s6Z2~YPj z%V&M^@?QT_fAW{`qqSG#*H^Czl=vI``p%QcNwefnekuKtb*|;&S&bvtTO#)Nf8#tM z^l_G=9zkQ+btG@r`^Ye+u@4nZsnfL#Uq|9lkfZ=Mz8pNK1KEQhsV~-U+UXN_{#sEV|?KCE#7<2;xm4J zp1%97{66ok>67nl&8yl}``2P2Px9h}9|T_?`20&_`pMt1j~AIQ{C)IK5gY)xJK%lE6f?(=+AI2h!X;kohC z|1y`Fc+FLY;g!PE&P{wd$Nb?AInh72t{-kc{jt97gWA$FX=|i*Jztx(%Js^T53j#- z)rQCk?7Cr@vD)bM$Hm7QZ`W3&$rn|xZ2jc? zPpaX2(SdJsHh1VLtgx2P{-oZi@b29_ujYhxFs8jD34D6z2ZVy&_PNnXRL^ zt%G~U#NsGn*Za#Cbnf`^@V2FxfMnr-NZ{qw< zvI?im>f5qg&nZ>E__MES-_)BtJi#ukn|}CZRC~@)J^AD7L!)G!EjC{!-+N#5IJ`N> z`+c=Q;MUd!>oW?=dzJ|sd#}(F|DUKS@^epw!6!CmDM7i7Vr{c*CQXsQBXRG6c_kL59YWfIjW2Al%Dy!`oX%H`_n6`-4|%qb}j5R ziEx~eupvBs-oDFQZ@zx4ANt;Y^6x)imbdssKmGG?_mP}*(;w$^+xGQ^es1Z0@#@3W zw|6GWcQ5;)CiH#r-c#`tLT09_KMmdSmdWApnw>5up8sW%TEACL>(0M}=X>OjyeQw< zX(rE-u5A82x#ynRk(!pJi;q4_KK7?JI%ek@G0Cuxf!>pMR>$nxmG|WQ^GC0Ky#MIL z&iY)?rb5)tW6uroFQ@Dr~e%XVLSu=8$ZZG(IcgdbhhlR~6x3UyYe}DVIgjrjjKW00> zr({R<{qmZH)l2iYF(5q8~wQT_9ll{~xe9PRRAzWZ(7`6)_O#-mR&> zs@PfF-@88=!c|$<--Q};<_nxkQeRuWr(p&pq>;3*CD0%;V z@yt4zH+$=sADQx9Z|ViJrT^l??Kb_t|8?@a-RI-aI`p%eechX>;T^gA{C{y)qxARn zlX^`nwy%8{;B3f#FLs~di+DZnZ+`FdCzr1Y65qY!W=oRPnGce)( zKI!*)Z(NeX-*Hs$mbA?;f6jUD=lA;Xm*1m*UEh26_t`R`&hPW)t&My4J$$nD`iOng zmwa*Gy-shzB-c0dnmE^WirU98FtDz0oaHi6i?#fc&?TO43+0b$pL{)K^7mWc+ikt4 zXW#$)|Ne`O)nC6~j7eKpkpAn+)Vj_~A-g91QvSN?bjYH`&UMz$>|2Ze$p6S&JNf(n z<~84SH1acY{@>cDd!?m3X{vhp+0%Eqe($^?n&#H5Ed3VuT{rLZMb+@Hg|Es>9d~UyUjJE5ky~3+{-UZeE z|F>R!RefkveB9*g`&6U9zPr9BeD|Dxf3(j1U0m_x)y1dhqrY@tu`KP1`=j|YZGY!< z@6#WDf4y4ab7h^)_5Yl1?df;wf3duBub;lZrItU|bK2Gtul?ELXVyzz+JE+>-geu% z?&%p5Lpw~lLeDI={i=NRZsM%(|9pS#xt{uS|D@mVwz9N5J0`jQScJ2gfpkVx$ijtp znR{DUL)ogYaAe$WIHRD(He<>`$>QjZSAR?v<}5coxb<$Jr0xA*`BzI%hV{LA8@F`r zwIy5YuI-MNUHvnC-Nfy$H@VJKepc+$`$EraqW00a?sAFfuL5E1->-@LsD69+@=wP9 zLbl8~TaM1Y|6t#w;7O7h5!bIDmN5TmzxT!5d;U|Um+Lk!j&9a43Wx|u&swo%L2J$H zq6vA&>%2Wu*7h98nWdor@_K96qwxDS8|Ef_-u~(DKW^T3 zt$PW3MT%_C?6Xdrb&v5p&%Av<7r*-XSWT-+XN}!&mPc>*em8EN72j$4?%Yqi>UDeO z#r)jHdu3{!A; z&B@UB|LJ6&r0jnJg2z{;ZVEeX_&xZ?LdBjK#wy9;)yrJ2<#Xmo{L+sX)OezHWzFBX zs(-rwlKrDE`k((1al;4eT&g8VOe|k%;US1~UmEDys(gvp%eP6$Qhk;NF^ZQLc z%i4c9Y}XY1Y5Vb$`12-5z3<&SdP7eBy6(C6X888{wb}c>SGpECojoKXd4#oL($lwU z`7{1?#s58d@&41^Y4vOWpY^Eey!3lfZT;ku*JonhJ6{nzw@5TyPUz>ZHD*`YW<9K8 zQMu_>S(nA-lc?{$_SYs8%lCojKluDESo!tbA4%bB{g?i1*vX>0^}4U+#?w9G9|AAx zeA+zq@ay}lS)W+Ht-ZMa^Ys(cZm!MVy8eSxRQ~&ddEfGPy4c(HKVJQ(S)y>$Kwq_=!K)C%uf%`c(D(n!o+OSnZeDtJhEZa;;_SkH#6Vw$1n{?0x-{ z?3Z`1H{E-aeRkQDc_$JdHO+rr_5N+*2bx_?iM_kVCLzw_0l_}Z#H_nwMgd+}>hNXY^h7pMJ;4X^&P5xN_7@O)Uu zo&W2?85mef9A~)-da;&j3E#cG`rp=D|6c76*YDhA{=X}3^7Xyp{8!Ik*T25xYts6! z)rYLER{ixZTeEK3(od7c>ipKn{T1KUJhM}zdB@koQf4|cUM6O06td6%^;gzNb!W^$E>ulM@u~FLHACS|6FUE`P;=#DwFK@og$eQAMVv z{Ig$wxKp~iv#R&>^3Wxjg86MJ`g-iep=M{2rConW{n;S(Puptmf89kt>(b+Ms z>DI6Oz4PRA*MgrU*3z>lR?mu$ zbd8#`uiWLH z8mfA-^wrb2>1ULta73ND{pj_ZCY!x>YZTqr-mBktT+Sr^>sb-`8%EEL`1>_@8o0>c zXx2QLA=CUq_T<<1(<=Xdi#VA2_uJF6YZscHQ?`87Kb< zo^?3++g!1$dbeobGQBvpz5KP;J8t$)E6sVo!S&fZ*{%f@zk?3HH|KSB*7;T*)_%7B z&-3_x-aDsX%)RyX*7xi{vCiB3H*D3MbYf45&!NI+b2@gLJPSMU{g!y7{px?Glw!XB zP}=*>&Tq}X`l2PezrWP%ww(U)(a*2H-~C&9>*K%QKPP6ZpZ|4A>C6AbVC8wvF6@bX z3rm%Q7t2W4U;Tc+(x&#;*29|wt!kSLpPurxTEG9o8GF}LS{MCe*O$M_zWZJE>V$xg z{p+2o{zm-#73j>d%vEo$)xLuYJKr44(^7x_)5lG4(;q?3c{cHjEcEoBU*2d^HtBba z^>_Eg4{qtTeXm}fau{|_hm+OFBZBvxeJezouKww}J9D$=i(zwftg^foi~`tsWEahdk({GaZAv-IMe zeRpkbU)E3G^;x|9*3&coul}o^zZ<_d%hBJ@-#y{aB=0}sZ+rgE+`XDdAS1~Cf5tp( zKHoVjkH>2@so$UDJt?|=zsu>e-*ObJ`@&gY-=?%#?jr{7l9OnHb`nC#lSuWEY9pYtzeRsMBPHGW&9^|Z3?h4ata zdHb4je!u^G{e|c3x^=%&tB+JqnD%}3_oq+4*G6jG)wH+&Uwb!7=wWfdnjfpeK0iC6 z^|fnKe6;DWJxx=8IIBHToxQ&-W%9r1{R|8oxsJ13CuXr06~>B}D%#fV4fWl6J#1t6 z`q0?V(*IveKi=neO^>rmd)K4gyLYb+R=KJ6p?z<@`}zleu4{dNFe&hP!UUVW@2>9C ztrmT}-9me*QQ6)Qbw|C!Z+~j*Uw=LQY?NSlxMqFz`S{-7*M9UQzMo*1w*0l0jnL`T zofq_XOW*d;H#nE!K0i9t>TG>{lkTzT+buftOaDLoZBdZ6t0bf=zLsn5Z4dcgq4dCK z{6Dqq!tcNOZytWMB)#Uy_p&wX<3LxpCH?3={a0H5T$)|1-s}8Z&js|7!&dL#_H1JL z+r_V+tao|OVZF7q>i45M;h(GC1lq56c{4rg>&;g&+^PSA?v|fCUEZbsXn)*_@@F%P zi(bt0`ksAnmgVIY&4!=UKbGr!{MP^T_s@5;_R8*$n-Csz>gwzGwK2W-v-Muj`nhNS znH-DL7X8_$-`vao_GgvokH6vP!}?0rOr}kV?|6cBJ z#q?WZW%uoXsQ#&z3hPdDY%Oo@x#5&@+2Y}J=FTkFiuoZge;til8*chhcA*-p@OS-; z2}G4qWNj(Bee&)7>vbG|T0#W_!krg+|MKzZ z`#tqtNKo9TTFcNI^~rl1PJ1~wd|l5fo9pg={p8184~wO?X&oqEZX?;q{j=ccj~6zq zr~aPZQU0mCY~$s9%MuOLMKX`=KeyZE$HzVf4mruWUBzR~` zn%(F45Nk8<=DmkTE_U{ZPnOt4xLpYI>VKTf#r$}m{ZISS#iz{`1((R%Y~hz#YRBy_ z@%@lksYwpU>HH)^yP28^2X9S#Cv)=26VV0FmKXmQa4$Z%{;b|rd#?@g*PBix^xZzz z@nKzU)GH&ix_GUu57%raFUTtC*{t%vt?%I$TU)01jqDX)PyYV0RCz(*^C$DpE%oA2 zO-`G!<`J8znUfj!+5JyGDK9?~yChZOXl-p-hSQ`;IWDkd^c1?sHkVEc=~z8k6`g=YNs4e_wgO?61nZqwQ;Kzue7x zap3jSy!ER-3SDe^yzidJ)mM_!xm#mbpEQ45RX^ur?Yf?r_g}j971$Bj6ADmIK^Y@Au)MWdd_w8NK6a}6Xv4nWbDK#ENCYJyHhU@+2 znmRjVYbgJY{l8;&+ty1@yHd|}Fs?RO{m;QyOBb>I7Z239ytHQhPy4FcNjHpQ;w}Ez zP33HnNw~J)!omi}zbP}F{k;Dt^1jNdnfXWi+2yGjpEK8_8~4Q4>+jy>u=uRQC&M@? z{l7oo*sfT-zP`Zy@xJ#L=6$-_xw3lq)Abs?_upqt`hSs+ck4%%I%z9Dfd@}#e{i^T zZvV1%oUHq9CD*6R>R9 zwr*$l9X$QFBI1sLq4-*bgE7YOfA+;zsEf(`X{y@Ca=ZQFpZ%6oO@bVrIZWJtZsTR~ zck9IB#qXw^S#(}8_HEFTsCviXm$zBW>$j}`5z~>mif86i9pS&}r6=qC+g1uVlpf=i zv3L3U>-<%Z^ZqPe*I#)@is*~BJ@J(}Q~o1l16S!Y6JeXfb_=fF`Txgvr|s!^`*xpv zc}sB4DYv9kAzrSzO?mLHM_2R#u59qd6j_UrUzV4^Oet8yle>X$T3e}vr zV@rS4s9uSg`BwcmU&u|)l56{AJl058^L~-cm^w%KV9^I5Puc66rv$0_T>Sr3>8$?Y z+R!sw7Js+D;giXn|K!$&2`4H&Ypx}(@4Q|VQo8xSPB~X(^{2?To7bOym|v5hCcOLV zHK8fCq0^qU-Q2%w<+j?lS01s+SFV5iv&er>lPUM}FIEX1sqE_vRCLAvw>@TVQ*z;B zcv--6E&0&q|C7I09X6cSzxkiyf}Cl|?|+`!Kh+`2zdl^#`uDxsyiq-_5x){2>d)jp zvAZ_+iR!ICF;{+he+bcw-EDOEQr_N3t^MonIw^7AI5oNT{kr1jeYWerW}WJ2_HGv(3ww)CPaVTt$2R=Z-eD_89{d!R=vNQ^knt5-HIE$@^e3Tn&;p9 z^3&X3{k!Y5?n~4EZB9S_dd<{p{w1&0{5#7M+^7Cuz(>AbPT|cAkJBGtU%4La*;#wi z;rUm)NB8#5yZrm=jK1GL)%;JG-oJFEbIsJtg4g~%5v~5t5`5%XW%k!({ol{mFXD{; z^xyV)^M9Y##aAS?-|bsy|MHXgdV`IzQ4H3^?uY9(1sZ|5uw&r#)KUaL1IFzO$!qpLUcr=S)G~^ z7DVWYa!h&l^Yz=WpFh6Jw^^U-7pUeF7Pse}TD$1||K`U^(>v=ef{VEhJXC(WsRmX2%TYgA9;JZQ$xi(|EpQW=QR#myNPZs zlwEi=>C%=p5l@;pPS15b0Gjl@AjaL=%;<0ev_puqr7>W^6&=utYzG4^T>3>#=l#il zeD&FXIjwhFPX&DoR{fou*LHNpzvS<}hCd#@nB2YN-wKNdZ~i;}(2dVH(73PoVTI(y zf=^!#9S(o!6BfVPW2?;ala&>|SN0!gUO%60)~2i#9LvAIie$cU_gKKp*M0ICQhfEV znrjd5+`|8D^=ZF5>4_Kj`ui};#P#b%h=?D3VsPJpz2uqB_k}Bt>hU>FXzre9*TlXo z)Bb1S`B%I7uLUf)w%z+d?A5S_YggD9KdpTEX#(H6_ve;$oSNk_sp2E+-KBby4r=%F zX||>)Yk%%rPyq6=jtXaMOJl==4A8WGM`MyjfQ~5WsKg|VIZEyZ`?tP-|9)L|!!CVY zpQ*it)59UwpZ@X}R{M z&iS#oq#su;zB6^tq9>1U&S0tAe{-^R#fK?JKA!t(IzM?*`oiok&5GQkK0RlYa!m#4oj5PD0xe|w7+ZP{}WGYGQ znwfFM(9>A9S~q_m4`h}O%?~tY-fu@K5le&|G_ub#H@7?CJ*FWU1_f9eVw`fXb+N^2I|6W;F zS+}q4kZ9E5oUflQw=6u@k$wNI%6X}VnavL$-+uV^hgJelj@O@`Q43u5-9Iwv%+7@= zuOl~Zy0^;KC@eqZ0OTqL5pI@4hYLL-45UO@TN)Eqr0A${buuk<+%P4{ef6ndf4837 zUKx|`dd*|{)`01S7SgjHFSgMC*gog>_mJ;*pWcrC(=9(|)1Qe48ou8-KYM0g(f${5 z0{3qQ-U8>B|RBKtQix=-VOnN20W0k)0;+ob+rVr}7WPNQ9HkFq3xK*xS;-#&~6P3K& zsL@x|^oZV>I}Y|raqnKfde|PkkJ+x{){4)KN;CiNoszLzr}gR7RQvP_uMKImkvDkN0 zLyLSv9uz*ge6D=2^{##Eck!1$I5PR>zlBbluRYW1Ju&YdpStf~(o=GwT)V}sc4AprZcNj^;}uJ88Ldo-e6p|BfBCkx zes!yJ9J2OF&O5Sh!@Momt6mCE`O5iWoyO_{0sY&BANH|LSz@bQDR21W-~+LFHe6!C zELz?<5e|nn&($Bah)LK0I{tIQn%HlF^-#_02P+yD2Ux?a4fIVLRiSo24{ z#q4?2&V?s^cs-!7kR)OX9Js zr@j{D_`Q35-2RxO=eB;*p1-#K*TIYX*Vcc%|2lTH{Qvv4D@DS#W>1;w{aX8U_@{4= zYhqKiRAPdf>;xa~w#YpvbNBU+rzdK;UM_j!y6%6>>Qb+%I#cVm#%&c`we4fA@Qx2X zVivQuG@tK3S$Fib~CZFKgr8a<08!xAiX9ZRcHe>bt&n>Ay;idmH_=?nF*a z=bMY)_Ap%)$#1r@33vIc{ozvi^}r(G)BoP-{jBuzJ?6ExG<8Z-qnE**nU88rmL+u9 z`|Rr$nOc7S+*;kUuP;?S?Th|Ar^n)u{mXg#%;ta5ZZ(?yp0V}wsR-9q%oT~9X6O4) z@y-6!w9B>sKq}k2OU$PoXNIoy)GUwlmznOPkQc^TAl`9rzj)s=p@tuql2ul`)BTwq zw=3>{)ZgsgQ{(6Dt@>zu-75J0<3rCb=|#2hy;_~y%wcg{ev0A8$AYnni~HW+$Sa&$ z{(0R`d-2b&Iu_525yYIfK%@=-`-FI@=+J5dZ{1C2dklztxu%cAZbU~f6;D&0J z?ScizntxB1cY5^rP5Z8b#&?00w(=i3la(LIC(M&87P@R^{b~6j(Z-!=tJ=5oOv%+s zSoicx-0Ice!#};hzUZd_tAvW=l;`R+ z{eCmkvi*jmL2%;cyNhSXw_ksJJ@t)F_1&BKc~gEW-zw6w5$w36mOA<1$qxw*lVcUr zc!jRL)KNZ^SUAQ1MU46-Q@;I^4}DkJab$_!QS&J(WeHaIMStYZx;mxS^HBf0DBGWh z_DsKft&8`&-I8NrIG#W$A?XMx6Czl3l}L(Jowx8dBExTYf?5W(Vzc6 z|C0S#Pp1xiKeXcFb;^RKP|GVJBecwaBEB)Wdy2b3Yyo`w0mn?oiR*9=6IWG~l+jZ@F;6~APUl^6Tu#=eVC4lWRR`{Q@;e+}qvpW>w@kBIcZ%lY{iaC zM+=%PXkWH-*B<7?NA<2*Ar~L?eXrSB%{f){bGm!?X1Q;dU)A5Q68ic5k?ke!fTy~r z;^(v!z2Xbyp0|%9Dyi+<*S`x(d8WQ8vn%~(zbb0FzWUeVyG75}hwTpg8$UbT`pqNN zMKbamzO=Ni_%UJA25*~f(#DCC4DPchZt7vGUG}Qr3+Md4e~164n=QC5Bz17ZisIiz zHuLJA+)}IQS3mSObCH*N`BE(bw}j)_PqL3MpQGhF5VZEM(Mz3<5@vBSyqW0gkn>_wcSjhC{@#pI=c_vED%lP`IdhgdOp||u) zCf~194!qVg`I^AL_vtV5+uw2=F0@G4#bWd;HJW$b837-Ixwjg-jvP4D8P3|H@g!F0 zPW_kGIfbPREA!r6(=u(eWcK6XTi~Jl)?kzSh6jiBuOD}5w!XL0@XdzLg z;k}&nN#E(s(={ICAR6qr|`P?L%LgkT|TsI^D|Y>iuW%BUiug(xqE)Td(?iX{S&WEs7^31 zIOVV~?~{v5)&E-G;5q}9w{zKr6S@Ui*zX>%Z~1+j-{r|V7WSWA{!czd-Ddg4)Vyoj z-8XKVY6X9_E;Q`k^ZRdp$;98Ye$W3V*`K=4;(g2B>+>S2fAdP1OncIDbFY8kC4G-| z|J%)F1;jUOiR0Yzx&Fo$t{v8io2NUdepiqBALHb&*Kz5VpB-aaVcFW1mvebbSseGb zn|%>pKc_tN;X;w_$64;>|JU!Abo|Xy;{W|d))CX>HAVmbgm85~?iIh27k($v;AZOC znv^wHRBFoicfPGYqZA#t`~AKz&!${{9;lkwSEH+(yq@{cIlE>F9y=TRHQ_uLPT$bt z^=NH8elR5E!^c@WZL(hUm>zfHUlx$2=9FJ+cV)NwvAULuKsf6Pn+UPm4bCLmv;TmQT()hV@#yv_VW7LjXK>i z^RD>+6Ev0;co5&=RwlSmruo8!jurn`{Ve>b(6MCl$BJzp*Y`4~{)>q=)SC2*C4I-O zzy3$KIH#R`?|=R4szY|a{zukrt*nXU?V7HBQRwyL!=){;9P2`LUIAga!V6oN~McJm$#xeQ(v;!r$dCmAg8Q)C4``cKc=X&Hj?w$zS>Z ze%4-ov%LDr?|ZAS?|XM+_Wc*(Irh(4Qu=@IJo4mYNoW4vL*l>0{;w}DEwAfrnb!4- z$?k8^Hz#Ae3g4th_K|sii?&UFe|4s>_nm(X3>;4#XSq#W#adqdf7;GnRgbJ*y?gij z-THlhFKpAgt}(A}YWBWs`=kD5?Tz@cKl|zXuSs#I5BqHPth}~kt&r~Rq#r_i``67q z?jHaB&J@4zeeeE??GFE&weQFL>-?RQol^GduA9$w-1~n4%a0?Q_{#6C?A?;#%f8ut z&s5eY+w87;e*JY)@z2B956ogew)`(B;<7&`cj2L`oxlBw#&kKkxF;$P#ow>BvDW3E zH2tEDdG@!@+GSGpMSfy`avnF$nAmJC`1tIixrIQMqmp0K^5r(|zG%Dbjn z6{MQ$8=70UHg50PUz$gXl(V->2fnY}*Yi~TPuE|kMw}|{7cF*f~{k+A#;okqx6>D#`zrXnF<_a2Gx2L&Ae+49dJDJGzZ{aM*IF+8CHA$HiNDnLmuiU6^)+!V*X>`2zt;L% z^ls`673KNnSv4YQ&(7*Ty0)^XZPuslJ1gJJoSG`Nr*cii3Tv(_>L%&;FSs9^BE6V1 zinY3(e{p5KhtHLHyubPdjb^26O21=wWSiQI(}G`h{=VXt;F-Mm^w;++rmxz&-o1*S z>or64dy#OB*Z;3cd!xS|uA8A^i^_~+r`3GN7M}a{{^IVIynk6=v$nom^ljow&XQ`qqXEGiEqgcnoojj~tFu)} zM(0OT*O4dYpN_xgS=6~+vu3{SE6#mlkNP>Td7OWDSSj+}KKX4MxBZLrUwl%qz_CTD zY$}v;e|?SmuJzsZ!>KK^K7GF}@%??+ zbE{`7h5oB(U-J97GaxOXWyi$-oN~@bIoKLw#r7U_WIOIAciya8{o8L1g9WJ`*KK>e zK7CHetNs7Mdr?e4(YLQWr~lT@$yDC0eez-L3WX^9Gv5Z2fhY@WJw#nylR`o8DmGYbEI&Tqb`C+vReZ}r!e ztir#3Z~b2We{o5dH-GM#gW{~2k6IZX@c4&ctvht5NuNWZHjnLUsqJ~08_~=S^Gn#j zt6%Ir-PROvGOfggeTLbi`p^f5qS8;!SfH`C_w+^8>0c|~m`K_j|Le#x>2T`(#KXze z@lPMNUaZUcYWm9RzSrIA;Ie&LpW`F#zOA{Md^IBfj-_3$#V?+U-3A*DR6n^H_tm!Q zd)F8LT}OXy{Jb*g-Tqy#=c{j9GfiAD(Xr3Xj>985-OJc-e|b}+s^7vBi#P4!J9N6> zrvGDQvpx2@>|Z{&`E<=>iu#k_QFb)#ca8Sbd&*W@YUi`G={_5~`;Y;7`4Zmd`yeld8 z-%J({3CTV4%wNo&6wp5V_`3IpCdJLa*q!oim8!vwIXmMIZw^|0;xDU~yU_o~;W{yo zE$u=xPh2>1BWGz+!|VRnTQZmVxo`1%^`*HkLzk&%o|>er>wq~^Z&TeeD`qX zMG>YoPaiK$|Hfgn#{Ky5wXFUQ*WR0ozcJJ53gVjSzV`V4w$>%4Yuh6J@&5f*r1pn# zriJz4nKG+?@L#Ld(}{75*>zSmeEFx^*-IDiU$H3eO8m;4mc1|P)xSP}I4Mx;R84y7 z%N1Qgk8*PNNC-|e=Kb_JO>K(Z-pIQ9cOPxEXERC)oGdx{*7U>bk~aA-Ce@rV75Z&k z=kjaU{9C)jCmgsh;Bo5D&24P&{alP^wVgG5mD;}T$?j4;vqu71^<{c?7Duz+K3QNU zkWnHnJ;Oli>O;Ro#ePPu*IV{0|JQBd**;NuXZzjv)8-cnJ^Q>N({_Gqmq?OT+hd)* zbqtqyFFRZ*R&c8MFT62q??qenI^Psmv-e#Gj6xnGo)m08W% zBYv49efb>uu{M`B_y!6SR_=7z4^65`^fBOGQ>gl@;uWRpE|K4&?)9AvMyN~Yp zaev>veO-#Pim$;Xf1`@9{abXrcfVJZwBP=9?&Le)ufGuE=>NTEMpoLnE&pEqURkor z<4xK7S|`m3OZK095f-I?*Kex*>$@*68Jzf;_;pvi&5y$ur2b0OX3yF8kApg#|Fa7boOL;T7)7M{a+dZjETmRJgc%Lp=>+OgBDSrCV{&%f( z&YZr9YZ-F+Z*HQ%*PMimy~MPKh_wefGU7WxD2bf3L4QJgqBh``44- zU+wc`lG%CvWW?@w@tdmuUtPGNeDd@EU)E&k}rE|Eu!`A z^K-}ujJx0|*3yjly1jAh%gaqyi@uI~b$_L>{@<@wU;WirPk;3^Z{_5#uXgRbruXV! z)YexgANp;1EovzFc)j;$f4iC$XBRiDEUKQcQ>*ge@kvtig!WD4nb-YFWbeO?A$z}n z)V_XdXIRw1TN{5}*|#=s@3|taX*I?1>aYE8^w@U4X4!v#CXiL_=kJEeuUkiGe+sX$^$*}(aFQ0eeWbxi9NAm9oUP!LFcl_bLXh-XbkMsAx zx>$Sn?w6hGUd@UQ+ZS5&Zqx7A7R&6+*4~f5K3%DA-QlFXuUmEh`F#C&^5)kHMTM8e zwZ}NJF1~t~AG53eTAwC%+T_pYIW}_gv}c*Y)RIzqI-M-G57{R(*Zh-dO$Q z&Rs8;?mj*F`p)px*RR%b|0+m)BRG@UE8sw^+L3?VSF@^*{QerZI!x>OdX6hEZLfY; zySRh@R}|Ca_m^9|YIelD+w|V2ms8TJb9$$9+)UYb*Y#(ujhIxa8>zbb?!T=%ySBR= z_Pe0{R(*E#v?-_5WSds9_%Aiz(6nUF<35ha6Q+}|f0^qj#&Rk4rIq$K88gQRf4+og znq1W`{iY^obmq(|4KLBQPM1%2zyGogT@w_wE2*yj+t!PvLThr*@<`9t6)ioy@t33W zp0wGnuHFxK-%v39+0p;1Q@rlV&aXEoAND@<>EEOK2_{EwZ_av=*?aTw7n4U@c>cTj z>g~KSf3~qu#Wt_ZfA_g=*d?x6rn6jSg8$+h3;*9&zyHhlWjNQhEf(nycXS=de%Z4B z%bEF0Uhd50HTs%4NhWD^)UsB+o$5`Q)gMD_i;M2p>ulFwsJCbSA+^me)oacq$u%EI zJlQOv_@=(+YVr-1r$>J9ZQI2hQWd0{a_HG!!;jsf%<}qwV$FT0{5xNwttPdrbpKM% zfb-SgB!#4Yzy5f3#@#m-;k(ygcGg=M=lV1Gi0$q*H@?n3(IMjG&L)&uY?Rf2t`+{!D)$_Svl}zT>=NTEeP326?mI2+zszTHYId_vYkt+YhVU zn`(MC#&*#s`ET4`W+>c}(DNw zu6z8~qO0$q;JdrMM;{i-rKu%H#>@E4{JK4(@L1HoABW@4`JP~o%?A4pTk^^zyb&;X#`pN|ob^SQ=IY)5ZMm>z^^2k`m$@tp zasqALH-C5|P-!=Rxz_vBZyrVd*_BkyvZP4R;f3L~)lsi+aS3mp9PiR8D1VxlKl9zW zWaqa1>eH_k`qdrcUAF$_N3Y&ZOYchxzxknO!(qezH|xRf;~JkGlJ)+4{gTQFKg}vuKUOp0>EAhFYrNJy|IN#7;Yi6zJYT-wA-=!CASkwFUr)%Dyy;iNBJW9?C)smMA z!Y54c-Ft}nQ$=m@=Npj#pf;Z#mjAM=UjF3G}Mx`yU&~V)wgDTyHJI^_t%?+ADKQ?gg@Mq_{ZS) zIn9Nav?7167UaD7R^;VjpY4n1Ykj||@OGwtVw}O%urHpABZ{Z~yK9gTwpChh_d}OA zHI0e~Kjen%&-^{bTgP^~>f#Xh3Ga8E*!BDM8y)TP^X)AQeiZPGHJz!S^ISa&df?)_k6>C^~s-~g=W6A{o>oL7kab1pu79h z{e6dyMJJTc?w_|TD5L&V>nWFQzxjOAi=WyBIUX_I`z?qsc)od)>>-sp&%U#BKlEqc zS$%)e{*AW6zqX%#RQvsjx@6|U91A1!;&mzKSN@-VZvFa$Jjw^A)F-Ro``UI|O42j; zbxYrK`;%|$=j%TSF|pUb^ycaNd0i$|-#ET|9oy1%*yVh4UE(X>>`%3Vfx%5x?|Pf7 zbMG9{^Y5SU-Yxe2?xh>byYFg$<6?F8oc-}X*SeBlMrW&Ieut;>U8}EOpL^%X`MC)P z{NDY_s?h(lXCCMFZQWw+i@B!$dt`s!x!}q)M;AG^ee>h?oDhG$On}GG`2LSiX9_MJ z?@Z#)U$*@3k+0jQbap0tmfh|1-+uq|g24HE^Q(V1-F>~y7vBa zCzGz^gNy$D6nkHNe3ns5Nm{a^@}%n~Hq*Dhj@xSIKR>v7(eKw);qUZc3G8rsA6zT` z=Way#{TmUU_SNTXQVrI0{!jX=AFldKzo2jF^!wGTMNhxIUo?-?dfMF6U!-sD-tcGE zylZ+FrhQqqb=~W$zp7Izj2BJQGyk&I``6c`?VnS=u3NR$DtP->0R!*pyI(Odu;x3? za-aALbg@p+_m8&!@Bja|_x}IbzjZsy|7%?juNPYG|NC$D{{Qd)|KxuEkTv;hf|vj7 zXRA`Hp1nT#@c$>BO*Q|os@aR+Uk6gJQWCs^_I)(B5uHL4!S3G`g*}7>h zqVv7J_@DoOZQZ)-tFHE4Tl%&B+U~bPa{lt0PqN(dfBxqQ@9`)($5-acFaP=Xt~PXQq(Td&+4AF%3l*>i%(e=&At}#c8`jB|KHR_ z+jp7mcc1#BF)*oHP<&&Zy8|`?|ic>+9~V9{-+;BPyLQt8M^c7-KwZ*yhlSg zdEJCIRExJKEUG9z$Xl?f(EIfEL!S+vIZmA|EGO7Fvr272_OI`M1m;w(|2^fAmEA@D zf3^Gk(xi?(`SPE8y1YS;ShL?(^u6qKwd$Qe4@xfk+O58O zzvGEYp?eQ*F*-gJIQr|Kf4ZKJp2y<_3&M}j+8zEku_d%7SX-(8;qMFDEfQ+;oc(w0 zVt%ajQQPRvSX!}!g+6bPA9LM z^Y5U*m2@7qnDj&?2kU=N_w0Xj-LU+9xbC}8Rr~+ePL|!1$=cng@iBFK^}PP_r|V+= zJmPixp;S>Kd1KzeCGTt9pS(F=t)72B<752#&~-tDq9?=em)0%}1s$_h`~80H6hAAM z)wSQaYn&%eG`M`&r%deWBEI!4KW^{4YGo(zrQ&+J%>IZMKdj#@&ok4nv{SNwzhUBb z%>}Dc|9-!DWXIo)N7jBzK{dBo~hX3DhDm+kNz`m)g?e3$! z*0oa}UfKG8)4g}zTidIwudWSUe0^$JYq^!s-EV=?`|1K&+Jzo$y`U7~_weHUeZNhY zTcqEz+xE2eq(XvmTe9c8A8GUMtpCRCZDV`xxOm=ywllT!4qIRU)*Cz9Ww!2gULCu0 zP2ZU3R^(Of+k9qa{f%O){1snz?SH@T_Zz`(JL3(NzOS{;9=F;4I_`SyLdoLq_wAl- zzOu1*SN+YEi64d1e%sc`6n=ca?^DIUOWM3sT0Q*s9S*!ABRlinlz+cjT6Yv4-)8-$ zaE7qr6$`Gr7uJbC`7pU=bI*IXN8XhY%l^K(Q+B*~>%XwiKPKfJ<#?*Ax$=I7pYr_u zrcv)U{ykZ{(Er|gGhu_EDoxKliu-=El<%zOU38`_+xq*tFZbsN-F-h(*+--Fx#7I` z>TSO!#L3Qo|9AsiWFTJFhU$bl9-nh?MZ$rON ze!0G89{>M@j&sM}jLv!9 zo3ADxPX4=R=Eq(8^taYu-|=U@T=d&t`|e(z{FXCIWOJ3y>d>Y|SHAhn|NH&?;`%hv zY5&XX(>0@vCae|yyZX0#^H$sNK%2i4l8q)+FD|VY4zu_*mGjibNd2$tuEpLlP+t7% z$A9T?%jMFk^|dFj#$MU`(qGqiO_W?*oY4EsuI|OW(7#pMiAveof7hP=TAnp`q1Rv4 z>1WH<2CYA}C+KU;*Nh7}HNsP_e7^U)l>6Ml-aAv~Y5uBGT=%JR>le{E6O_~9a*lpF zcYnEDCa;kGoz%lYn`%5Sf8LaO@a~>d-2QHXm}iswJxjFkNU0dy?(kY%g)**B3#GU zt>rxRVr$m!)mw{Y)=d?9_UQVOubFawI(NiBuSwec#O=>%`wagtHz#a#eEnB9 zPCfHGYzKZdH9xqWvFC}%;+eMHe-f7j$s|2?JN-p)`6{j!d0l{l3%9D=z+PhoH1+CSQ}~reH;V$Kaexx)!%hd~DWz`?uF=TV`m& z@mXIJ-4?Ad<}i``=fB#@*O7O7xcG{5?{}|UsUyPwdv#HuqlS4;)?sH6voOKTvqe3I zulM{Zzk03ZYSs60|6fN1S6Sw(|Cv8mG(bf{^yS~Yk1otLB0U^gZd{UIHW=vleiOTQ zH}%w?c%zrg>MuLRPdfQ@$=oKV)P)b3R>?KCEPgJ@T&Jp|Dz_s#&}p5hmxfsXYi$*Q zFFf1MF7NDaOB9e_emms%{vAo5Cz(A~ab3&(L-%ptv<(v@`z`FAxSq6~`=|7<`0RaI zQvLM-*FCz#jF+ydd$dyP_095{iyZIY@7KR_B%LKKMf_=GfX2Jq+x4>Nn3UU?_SOHH z_wClCBK<2VOOBrWGwV@++wbG2{?Frmlagg7{o5yU>+F5+^S)dZND~WtypA_<{?EG3 zr>@Bx{4!o8*(d3gm_)ymv|RA$m*45~qw+Gv_UAY=W@W8;zGbJq&(=NjOmj6t&R@K> zW7eJNXX~OP|F7t^%4Ay+STrm9Qpm!H*YDqoy}9zNxyt_TlTda?``!**6{B*Ea6?oxUymq_D+5qutYQf$7|MY1UdbU-|a!&2c`4x*MZ5NY#Y!x== zhwGv6eLpo;KH2(GJ#KZ}nlNwo?xUp~zjHT)Se^R#$>q!BZOn@v`E z9?-j{LL)woN3qlJXKE>*{-ma?UEJ?KUvC#)`M)*y&dg9}jZHmFyd6J}^~`WO@^8kk z@Y=igy)zGbUYe@s5@r${^gJ=hAXTNe`9S>^_Rryy7JTJl^pUyi%mC3m23)H zw$yh1`S=CW0_S&`^@aY?TlDnnt4C_;=f6I={AXGD+rRu1cFvbOGS56x^6Y(s1eGv5 z73&o%o`wV*s(pQ={p90YbJU7;q#ilXUs2Yl&fliHzCCSe>{qb|6Z{?@T6uiIjnnU? zdam~HG^fmXDzg8}*L1Ueoq`F+uT09=boHlA_lfK3Z`S>LvmtycZ(QSix!X(FfHqe{pj?(pY8+eX7a-rOlE$=V*V;yg1h<-+!G8 z{kW&@`(d}b(3?{~?%vuLyIXJ1)9{|}pK9_$U&)x>c;)%<$UMGn26nt_1ByKOw*E+s zoXzz2dXt?|3*VkkC*S_%=*g31JH&eD(D5s?xD5%)W6)4yDzfu{mvQl z@g@75bMg0U^CqM$JGUd(>)$TRq#v0#ZVRv4?=^Sp=B$mbtY7u-zkbNSz55rvFMV?CUGm$iCM^l|PU|}_ z4_b5^AD@|EJ7K1W@1)vX`=|MOuC@Qe??sws{GELK#r5*quc>?Q$LqgX_bYx<+M;Po zYhA*57nRq}IsMEi_my|ej_m38i>`0_oAO|H+0UXwyS5g-5u1AJ^nB@6n}61%ZK{Zk zuiRx`xA)igZF|qHsVsfr*;QG+HjIDu%YvUa{Y^#5JO4ZBpF8!__Wz^YqPOv1Uu{v= z`@q1!Vd*%_W0DqY>5KTfy}!QxdcAtASGUtfK7_4T^0*L&Ap-Ph+{EG0ARw6bqWoV|WX!{Tbm!sa(G8Qqy` zAJ^>QGm9;^-tGGCV(vEo=Lze1N}8GG-tK#zcd&i>bl#(>4`%PUt#x%v`YsI;L>qC|*n!oG1 zSzpiV|GmJ}Y|X|0pC^8BnX_SDY0UgZ-IXM^%thCj{EXItZc9Onjp`j@A~U@|E=01Fe_MEJ}fuNK74)Yznw)TdsP1DhE2Zm z(tLer_L}+oU$YlQo&FO1zc9&2%8-vecVV{vl%Iz;s+H6CY<>Il;o)SbQt602LVusx?YAx2FlU*kW}Cm_Bdyi54vN0FlWMlw z8eH6Yv1b48w$ptkwUxaR+pP7hr=GOS5xal-)$i8MVxbnR_yf`FdrV5RuZR^M$f-Dz zvuU-%%Yed5@u`3O1CE_Lo?^=5`fqywwh1qnF5m3Zv(>x${N$r+Z*wG=2=|;)HJjHl z^QTqym%2~?{3h+KTDf|be|X5BYyNCS;hO6!?Y7K2Y&6UA6jNv_XXN+)>P@>IteBG~ zbh*I)t(x3*?Z5AGUfAEx^l0?=<(szuUSLP z@AcJ`SKNQuGcBn0d7Xfg!DEw96ZwBKt{0PwyfzBPTer5%h7KlW3}XBw}ti`KV3Jvq%Ic~0R( z=7cwu+f!dpNN96C^8eVksk|4`jeDex=UmBqG%2l>w{Y{TtFjt3l?G2Y{Bc$ocyv2{|zCLmtbkh^t_bpN0{lWi&FV7*htjq!qRk2TTQWMWN7?_9s-B5p{^|*>!^1=Hu_f9@m zy)4`IOXZ9A$I8!ZN;3YKu6_Aq|C`U<%`eXA*r8jcu==mp<0O~6 zCH?)DcDp2M_Ti5A>R)-M>$3YyT7M*HV*Evq`AwhmKD_!TzGeS$t|=O`N?CV2sf$A8_PI;Z>3O14>b(fNPl)fZo${L_Dl;q)WYlaANC{;t)p+*f^d^2x^?mbN9U zK2-?6{&{ZOscCio??1Tm9^(1^PiE277A8OD)boKs^#%9sB9cAk|9KjJc%wtP_0lSH-`)Fl)(@NRHKu{-(ZIe}!ZBe3{cv-cLBSPrs){$hLQn>Z}J(=bpOt z`p!~$;aNBG(+m$6Zcg7b-}Y&jzJ94^{|cj^YTNs#6pVkF7bh%xzh6f{!fJlp&D`C^ z>)KmNIxnUEHcMGJ@3{KjI?<;}fopH3*KWIQIMXJ4xvc%C6$QU$-khu%dZ9`p!8vnI zdRt?Mv*zAvq4shM{cn?d9xJrfiTGG_Jic=*-6gy8=M29W6R${pWP0-J`eD`BD@yB(^uI?bQ2?pI?>U zP&s^ZChO)6cY|XC3vb+P`jz=V@b%;6^WCr5cJpn2d}+PIj>-_b z-y3eSsMgvpDx0>^dW)HikXhpwb=MZ%!zL6DkZ{LFn|NrMZmEKU-(YM%l^`FG#Tp!O@8jc3Zcb}f_UT%Lw)zZHD z_LS@Mf2Zl*{i^6w*Ro~8`PzSbbL|!7QW8@Bo=Cd5&;P7<&HTFPCtv>a$!@v#_{d`Y z=KSE5k&|!gZN7eEv1Z?V+j0}mV=SWFi;M3lUy?Knc*`eoOS!tf>F!(k8`Jiy9MiJ> z{r)t|?EBj|yQ1fOls$dQaq^bE7wtYo>i+m^(f?HM-TJ*#7azUf^W&~l?fS4)QHv|? zdd22WaDBS8eA3M`@ypMrUr5}3ODw=S_V%3baz>N6f4SC&m;dX&yT01{{^}2&>$ZIQ zX)W%wTm9z#>1riUpM3iN{qg4V`i1&x#=rJ#zZ(}?*x^S%FH3w5B zYr<>e!=T&v- z^7~BvHFEbCea+hY9U%gKb5}Nwmn`mZPTNmnbO8@8Qw-bxuLf> z#WF1-h2FF;-!3+<+F7^b=;c#M+wCOgCF}mXAD<`bn)&rdg1?jPzpkT`um9h* z_04?wzyJ33{j6J4svW+%+|>L1_1$rArSvwRn{srI@ti;N%9r|E>}9qQtu8{Z$w0yT9J|XR21r-hX@VPrfMo z{nypI7e(t=|G)oScT<)9ivi|#{9Vnz9{+b=EwfD}{M*j^YeGA(3dVo_w)fqd(0@Ct=e+qZb?%YTh8ycVxTSMG zJKp~5xx!%I_a4E7_8oQmGqXfy)G;o}6RIs16gU%nOWx^TyQ(NlqmH7>BX{9~yboLM z&cAYTPU3L})7Y^lp2-6dd)XezyrRm2y{jvmk zpZUcPvpF?&)+M;CC`_faIy8yce4uJSMu+ZHLLi(<4mUSN<1=`v`w*U zO|&>&^=G#JrOy{8+D55G7V91{xN!e_^|WS#gi4#=N1T?q_A@NbN%E4kkFcj2x7 zMo6`%FOUE1Te(j_R9L%&If)u0N!2TesjSvz?dMgwrZMueb|$-ryBx+;RE0 zf3e%NgVxQT971;$PVSrEyE5Qy^Sv5Nb;FsV_KP*)BfMJzQ7^%%{=qZ{0-^VGmTv@b4I*on)XuU)WP!%tFM?XE(xkh_A(5p z;;6Ujc5In{>C>~N$GzKAnKat64=65kyJ7icHKSdbe9xQG@(Z@5pK53C)`)77{hFnk zr#0oo{^iZfHCMcv{33cMQ$c`>LSkIq-It+HckE(|Uf#2%t;lZM<(TIaRaeyhZGFY{>dH&eYW3L6`)WN?vz_D*Om~XU zm{OwmT%vVyo7`z*j@@yJ) zTjAxkjn^@^dBS7HfByGBB<6mepRkZmbaQlCGFxqG;8e{i-}Z@3`DOShdDcVC8)ipk z><;v&cH6~2_UsQ()_ylP{K1?X=3*N1nRA5aIT4}R8qF)~8b7?#t%-bkEB#s1j@;=^r&BXNek`#1)Op~_(&YBTyS7Vy)oRb~ zm@!L3t!KuL=1I$c{oeaww#|>jF==J?S8U}k*vx;-C#5(o`nh1-m1Fbga2QD3`|Zel zaQ-XNhsz>l|3xo2Q}gzB{CfBOZjJ}uulX}$-Cpsfn}j`e_Qr(lI{AB7mdVuHi7O8$ zK4w~fgE>9w`s1}%|IA!wFk$EXduuzJ^cidKUFbga&;Cl)J-sLFK|$YM57(lTDvXz$X(edpuP zE?&0xoN8Zg*pGb1lh3nP9?`#$CmSwYck%T@yZd(R->YI$R``4P+y6HVOIpRp!+)fu zBFRTroGnrDQ|;;>6|0YNpUpTfoKYYbW-32#(u2G87t8NY|F6V*UoY|86y=R`?N_L{ z^s=9vw^uF5yS`m%`|m$FN9WEq=5~)hSfJXPdMWeY(iU<41zEPjzgkWv9G1*)Zs$UZ{*Z-flVE>Lvjc*gY z7#Pc^9}WyyY1$RiyZPkT-_4BTmk*_hEIU2*)mizg5&BYGAsgG~vA%cftqs=~XYY#- zxz3{Y@Yw1+gO@K4-!pKzCUjv@%9rqc`QEMTr9bP;pFOS2Ykk<>R^J4-vkf0V6qL$7 zPS;uQaQ1NTyl=ASk}L8P!lzH&b@0@U<^R*QdN}7gZvItrn60Ntdfs`dCO!#}5QP)X4W z;*(09vDda%O7-#v`~GgWk`zN*?~cX&OMLF}|0{3tF|Cpkyi$|TV*lI1K=!%!{*?#i zR||bMQ-3_iKzg0glvC6HxSL9d1wGxj{m{m{D}}{(-(F)g*ZxPrf|I^pzROh7mVS9v zqtorh5+>W!w0!9_^YFEy*VlW#zRp=!{>!{D>reUQ^GgqhAJ)0@V2Y!xk$Zi5%kM@; zZk_&^hnjXz-a3j2TjXEf_gwwD`lb9GaVOd{E+|h-``B8+_v+NYhdVN-Eq?vs?bm-> zH&5fdv{wGcJC#1ZGgDiq_}LwGWsy=oF#qn!OLyy4wAH)5CQQhSSsHK9Q&XT;-uHOj zZl(;Q^1ugq1T=-)Qgk?>@X%;>GhLrQtUw8r_o&wm;EWxmZ`` zl;W$jr2(_`uc&{xcW=iE!Ku&pmcLhgkPt=DqU2B(=ap?fvPRsrR$)t-d}zJoL@_O+309x|KH+cC*d%s%h!$e!4AF zRekf#A1Pa`*DsnC7aVsm?Nyl3<9n00g{J(z|MhR>iT%7PpPzjDy>wdq^bgbf`M>Y` zTJ9(=Kkv8b%_)1|`%hYb#sAR_<=uZHjzr~ew_UtFep#~h&uZt$w^#38%h%WXp7lRY zbs_)r$)#ETRY%TW_fz`4_cwRR{mk`=M>QX3y4_oE8>X=T>%{8|OHbcAHA`vVPg{Pm ze~W&<4~c6lH&tI)`)g~}^{u|5-|I8x{<6LQbMLp6zb&ml>iygOR%Yeh>oxvXCb7S= z?>zrj8~^g4_#OrZE+fZTo|Cdz%g_ElQu4yzH~iMqQtg^or}kf6ecd){|Ld!-*DcMo z+-EC#@4so4dhOqJv5%(SUngsHYUgyVa@pXkUccjQ&p#3l-JjT$I5Tv)Z2F_$DwkEZ z$8}x4Z?j)t@Z+cWRZrizO}S%wIM;kLhugLKl`l-)YOeDx;`y(>cj_|Jwq>Q-QY~Au zf7*oamQFaZ_^|bM_Nkrv{dT(!nQ=~UlUZM%*cWsBEWu z=*&a+n4_2PSKb!2B>eO{-M%I<;fhqzP`&u9tF@<({P`zl@3$uQd(5iuuZ~Q*x_VB_ zybafCKU^;PckTbHlP}BvckTX}xH`0ahuXZuIoo8){N_|I%d7w zdU$JQ@ae1PBzL|4#^aQ}df!UhtiRd&en-7qx&E5g?rF>8Pkx>LxVF7hqBeg{)6)a* zx8z2)P5i%Ot-{hfPoyn8Wv_0j?%z^-YJbPPi&4?LCtY24Ez~sYZ_5r9HRerEd+g3m zyQj0L;MJ{f`@czxSj5&AmH)PX%l%^Rq;>08UyYd`;vT=c^z!Lk!PfIlH9J4Adpx!G zM5Ow~rT>^EQ;Lfj7khfvRoF~Arn*%8(hj{D8{?*$l;u7Xd#cW8a4OE~;^`ktZudM^ z{>A0<;gU2rcj+Y=#jvQR46~Vh-ge2)%M$Z`zwWrP?qbpN=W)~C&+?eMX5P$5O>T3( z#UHNd_p&_bEB0%(+qY|**+g@fIrF5q-O$wHusdCKRh&P{aHgN$2WQ^A*=x_QKeXho zq@RJS!j@@s*>5KMiS7S0p)Z;z*hl5I+-DR}Yu{-XJY4Z^(~Fm0QuS# zN;w%DBQ8kw33DjaB%T%D*R5p1&m^9q9mURAlb=vA>ykp+4wr*B8oJ-c`tnRu51JJ4 zQTpZSgr@w%Xn6>)j8h-tBXAV}J6wyzbrGi8^=YajU&pzw0EAw4FhX=o8m3 zkJtP8-~D5+aH6VyLF=|$kL6+UZXXX+$!<;9RZ%u+;Q^C~n}WALgddYyq1^mVOY{1* zH){kZ?2#9e@_DYOE&k+3V8%}S`hXynC-)p5u84|1FmcEG&qY7ZWt1u!3q;<$pmoD6 zX}RO6qkLgn-alq$>R&u@<>WdGLf56^skA9PUn z;r)+0zC4KR4`z;^Zq_X-e0}}$KXb0?7GKx9H&OfDw%=R=jSa0yWlvT-eO5istl~$m zSgNXv`Wsz_!1W)@_s{RU$M(9eCY=3*gt!Lp%*hD{WiRfVbNivs3i(a)%JDzf7m2Yx z;o(?z!PUw9-pBY4PP&udoqY5AnV=rug1BFkPikL@$>_A66t8}5&)klEpY1o7XB|?D znEd@n8}}h`(YP%>nbY6$&&=i%m$H0Q`2PGiE?(Ka>z!8xE>vf+3zI!3trzq9NlpLG z-%;yhR_y<`HEwHo+}gPAyCJ7M^sMLK`=JwI%*ns^jZwgh)}0|m`-7V*C(cTH_hjK7 zfvO98r!^FGHs8w-JDk$r;lJX?grX|(6&*9?DnI_TeZxwpcYjMm`4|6x#S*<r({7u z|FcWf+EGwt8})5bK4C$%ECGg

  • n|w{dDR073Ful7b=k|&N9)~arf=>7fvsJ<+3;Ncu7Ujl?|tqXU$w;`hM;y^W6#Gc(|7o z3%+;}+ut{}ec>8|uESfxxvGx5zP4sHe!Cam-G_CD?(~}>&u1suPS}6A_^zqJ&Q*u6O{Eu)lX-+JCpi7NZzTc=+yz5Y<^y88RDKG&`(@Bck=Qi=O~@Ndfz?X$b`5XO zj5?_F^`m~lltXVGX-}WL?dJXsjoWy*m>d$XIQZrL{P=fvdBT%_d7G*^IAeYYInpLQeT_n|fV^l`B4&DIWafa3G3v?;ZQSXQ!tl) zj$qjIZQo?n<;=7H6nreSXk1^ixL$AhvDj0|g1Pf7KJ8rh=Jd(e`;-h4i=PQ8G`!>f zG2L%o{K*eLv%EQ`e`Q(Z{+F@fv2Jbe&Lj?joxb*Lx1Edseu}TQ-x6ECY{Sghd+$#^ ztzG|mdi~l;i*+GkOM&?w0zMk^#-r?E1{lCs? zkA8T1Q;kHGYFqiIib;Rlw%gtM`{B|2Pt~XEZyKnsd-!jEN8j(uyNq~luH>wKUXs6G zT<*)i8qZI1U&5~C-hK1hb=gU6apBUs7t!C=yv;uQsq*L7`gD=S9rnK!lNYO>zNMS{ z>!Doe^u10Q&XP8^|LbbIq8_e(x_keHNkQ@JPjo%pZGYBf=~EV?w2y@zUD5N_M%q64 z_5Y)MAya?y|F5PuSDsjM<$nX{x+yI&F3H1;6D}n9i1KhYJxtg!K}$qvV$(v0&eoc` zto8eh%VKr+aYm{NT?&1XrEm25N59kG|L^VmzvgGjC&q;zf11}@If-G0{OPCnRa)Ns zj#~e^Z_NbB4(m7jR_VVxtIb(kb8B|`kw$5q9|xB{_Q^6Sv2eWcW_k5L^UNoYZ@nux zYH{}OHP(I254SnV{fXu7Xxn^K@Y??M&As8wHlZii=RG|qWMf>jsQ7F8s_%>c=0E3t z%6@*yNlQ)%kyqmPm;CR!r@!O<1NG1~D*fE?%A$>n@LjCosRv8_thn>H_g0js&e&X)@%-!iKl{)B zi<3@$Jy9a}opyBD-}Ixu4qqtbE#By*+s=Lev|f2^Yi=WJm2Kd)^0McT|Lp#Gd;XpO%zF-%2=Pu(ygN&=^|!i#^9r3t<~i%H2*1_hdiv_s z{zYX9M^5yHY?aYjzvS$eh6ZCLr=|Ikt-NO2YHC6)M^vk{itkI#so1qQDO>DurG(d&&V{>P`x);1c>UOZ{hF)t(lPcS_1*bW z?p1gGP0la){L5f<>(&6p>3c1Hgv|WI|5*Oe=XvY5SooP-UtJsh-}JG~m%4d;@)y5- z*|))>Qe*FRt+Y9e(~lTGQdq}-`Ay3C1@rYZ9F}cxa6h3c@%QBazTLm=KkRG&k@3}d z(pq7K0IpSEJeosYe(zFBZki=0HGk8t`j;0g>#Sb>e}1Ir*}+H;SHI~?wQb*) zdmr2NwcvC@0qE{KHxcPJrb7)I5^_p}SzQ_nR;1{-a!fsIx37HP_PDyz>1uDaEY>bf zv|1_4SN!?^)r@tyKiLnwx1Fvh)+0T|_=3IWXPqq}uU74-FZ%LAJ#I5k=)B&d_zRY&QkK|e|9e$^!E1YCEbk*B z>5}7SS97NPv;27LW60;eSC_t@s)wjj4xb}i zE{X0s)UmDh#?4;yNxhW^)ITw>U*$|&W7D2nrB=MaX0wKP<_m)h-tQcGK9}}}w?2P= zfxl#u_f?kkjq6t)mu)=2s30<-$C;ax>9E6w1P?JT)~43v9Ue;VJ#+FauYHcaa#e3> ztm(7B7# zUD@#><_T)`(Ki{hs!z|AF@ITCU0f3wC?j@b-wWop->LcczwUhBboarBW7Y0|njiMR z`@8Q=W$o{;m)|UUvqQ3(LsE)mg+tlN_+|OeDo^Mh(b{+P%lYWfht|g)dAQG4?7Mh^ z_k$loc3vMA_Z=P+;g(`L>bN05OGKKBoDsa+{2Uq)?S8iK>{_pX|f8y77YP_;P&%Ud7-~0CtN9HZLy}O0u^S<2;->Nh3 zrmb>XsdZb`v~s3TVlicE;^FmMWjr!NlcW+B6=K+S@=lu1APhNHnWA^S5BYr zbt2_`!#ieU)@GX&(N3GQhNTfLOCrmk+CN^`?RdI0_;#;pajyrXUiqn{gL@} zz0~TO?{Qm}eJc55rTr-O)-A7>2Mr1s7}&Ns&hirKVy$0T80#H-eLZO3sOg%R?S8xd z@Bjb*-~Zo1_v1fjo%Fj?HNb`ai?; z=DvW>Th_JzQY*A^axDB7|Iy-QbY^YQ{xJKvUFKg`EwYb{{kE=dxqoTV((o_pS1i1= z*B=kC+bUSwyOOuMt)R2*)E=oNhwJ|`d0XFzS*KL^ragXX)vwx!lvO>Qvf5MqukHUE zmAyY|b<~foRe$B9o_zcIskZvTv>VU&+t^Nin_uR_`+lzck}cjRZ(YwW-dZ18fA#$2 zhZ;LW`*~8k;^RZ#SM5|^bya`YmN`DtZGW!?t-0CXWRw@mA7oqpOW!w4Gwb&pr!DUn zrC$3h72khqZ?DCn^n$p+x>?`;27dTk{bm%i9qGAZ=#lTWr)TXy}LF~?xjlYjRM?tIwA*y2&H?>41m z{okO^I|Wz8?R#$3cW#^E{|_R7De81kBFO}tb zbN0B3|KFeG`8!@PB`unZ09eq{m{dxhr1I$e`mdR@&8H> z_D>uN{~X!x%-;WSp)SL*hswT*GY=g4zw^Wwnf?C{{l4%tSlV&TX>Wer(vzQSP6vzs zuk-u;rFq%}qoW7ETTaoF3=H^ZGH1gSQ_c@zo7$MGPQ@Ir3l&yaxQlg>*G|XJ?dE%a z+Pz%QJxScn@3+?(t7mb}|5@*v&0M&}!07${;7^++JkCElbba~L#E3;d>aTyCes5LZ zQO^BZGfqCbcKyRef9_As)e++V-gFiE|IuzsztvcAvF-B7r#0zn6BqtBFQ5MRifHn_ zzsqgyw|I+m<5ew;s7_VvY_yhYU~-~MH1;<7e=`ll!E z-I4P=R#WCb{dKBDBTaQ$i#3l_Ma7RfS1k6qtN&MTUtQ+^DBSwmy1rj`!}$-jMV~C% zSbJD2jeoE4^%Vs*AFfY2w`hv3{r4NoR>r@+X?y#tV0GAB@rPL~-|sBbe!OJgGUlJJ z-#T);aow8s-?jR7+)u6R>#r|aDZlH_tquNm)qC$o+dQ6<^O4cQ|EJCB7=h{2Z(iTF z>BB3%r+*?VukbS8T6lbgo&Ob!#92As_QrvCCZrU6@`!mSt?_1lyp`0I{!7ZQCboR4 z-1dmSZTl3J#KhSdGBI~~{@zV=OD(bbsAhPYTmODR_tfl1JDc7#vFo!t|7n`%aQ(T| z{uUnL`UgD+zs$0G_)$cA>5af{0=go4=_Hheq-@PxdIeK<|twd%^{$&=Qg>on+anbF{qBj~T-qX0(o<(yzwy*dI`(4L8)cbg=kK0;mwdI?_x}vH^r`bF z^Onm$;+^7G^x5FfTCW(>Yh0pD2Iu~QN=iPTX z{q=QyJ6D)S=|_p0_=Gj0_ki+P6c`4_6NsXqUS@eTQm|18UD zjb}VPUNgTsYt=D-+rOfH<#*q0e;@vjUlqCEn|kl+{i!uo>-M^SinrCiU#`DU&of&zZ12;zwq^To zZCn5K>)ML-7nzgRz143EJN~Wv`m}z%(EnL~zd7yw{e8vczR0f{|JUuGe_{K(kj?dr z$}d^wbG%*hZ;8p5{dE`5ai0%a89!Ok_Wz&IR22pW)~}AUye6$;tzVe`>#O+x*VjO& zO-6O?+S>ee^`p?ASO13gubX~<{i?NDanrRg%+$*dxBYu@jo#<$xB7T4E)VPbnms-H z&Zn5|WlQ%*towWQ{ngba?kOMY)$h!`pY_x3dw7g_h4S9_lMmMZHuwAzvv_~#qNQ)E zx8B}b%$c`8KWtfx{buP;X()L;Frtz46tnM|2lb?R6dcUZ;^XvCl|E9k>{#x%;PTrws_hKX5ch8k#tzL3&*&WS=t{t!c z3NkV-v#_w@z7$_4{L?gJ{pIYhtF~r+E&5xPAFjXj^x`s?z|Lc}8~oda1Y>vFpDX;= z(pVJZU1s36D=YiO6@fDor<&OqRs7yRjqAf|Pi=?O;lDVRX_WRDB~B4ee0Flt4M&ya zf9(^sr6>LTF>lgN>sRZY)|-o~eoGE8+LU@ZSS0FjS#0#D^YMIkf4iRQB_veZAK$4s zWz%2gA58+S+fL1X>Z!idzA4IVkxEwkn}KRsXdk(I~W(RoW%f0h5@!oA1kW9{PJcg&s?y|q4}x%{-Cj#;llvWkZH&kT#q z8y<7sXzfpGxhNIF_P)|qWdECESw8*=+a|rQSu>wg@4ad>6)Fn-u^Pbv&W8ibnkubCm>hIs8}|0`rW(7XLN4=p}1JO zXB$t_^XD<^1)uIweX;xLr0aWprFSoR+P?1g0*&NOekdaqjMD*y7s){2vRj($4$ zg*`|kp`D+{^!PJjBVciz9aHS@2&I$8GK`R}`5Tl6wW z^77^O4cn*oR4$H}dB@USoVBrTXUK}Zk)ro&7hSJ?^CIBI_4Oy5=Y(Hhyx`_Bf$XA% zyW7G(|64G#jWPWAk!N-s^-1sdJvF@1{Z{wY?+ZVTp6_+4$f%2cx>K+9_gk)aCwZ2; zS9Kq<-urQzBVV)1uiQhjPOJVd)K^otH$Shwx-ZJGTYYN$l+!a5u7tOop4cV*IH@p% z<0%KTg6GqFP5%y;&viZD$9>*dsAl@pCaZmiYri?gsxUAARFNNfUi_Gw*DI%c{_MV2 zJo@#UazxV8a7y0Vq?XOp+`JSD3ana{f>g6|7PW$$~yvNZh zU^XFBkI$j}rdOExrp*!s0d{7cm-ANGHH=CA&J^7&IMOdJ<> zym>wMY5nb~a(gVc%db(qbM1w|vQnd!?>F=mxU8`-Fy1)J{cPR5ui{QhzK_0LNPi=x z!lRgY%xIEaa^T-dP8^T^w&tCg@y%_+d-0z(wn3@qo}WHs=QFMQ>g%g@QJcKqt>1UZ zZIbjcw^UuB>fNvJC(Nr{z2o@l;$DMW^KN~=k*{K5-?peGDfV08n%b3HZNE7NZ+ttk z;bTi|OZw-Z1v~xwe|idM?7gL5CTc2aJm0kP;h*^0Pu;rbXPJKs+H3ynRGk0CgrC*H z%gc_Ru{KNjnf-oS=+gV&Vy^7_ee%`Uhec1nZ=e3*&8y3&vTwZ+6kMtK*8T7IU;1Th z?AaMeec+-7j5f*fBJ2) zM$0Q%iRs46i~AE7y*qZw@bnD*`?cQF^Q+$&@4f!n=gZ{htLlzS<_R{_*mmjsYTG*N z>HEt|*5C6s{9gP0?p>}aVSB~3e%c28p7iG4d)Hl((`&!0ertVyJ?i7u^>+U3SAYL4 zK3)1cZnaqa|8@2AKIfgBGx`2?y_f%E{L4%EJ~-yco5I}TU)m`Ze#qK^w%QSb-tIs*1x{K`(@Ssu*ui-ujyZ%{$g&RY0av`mTzoV zDK6Qb@%H}fRa?8ap4ywLd)?@2{Hx=q>(+#?TXn|S`_hsoS?L?kS>}A%JoWLR)fayk zISCuj@z`H}wtBu!&i-oo+EZ51Yr^k~w@kTPJJEK(dCbb3pcoy#nrt(#gF`3w%_g0>HpuOY~#I-XWcb_z0$Pj>-6mStn1H}j!*yf zC2Cq}x9q1i+ToMttxcMJo&W0j>#Kghjo-KQaM_C%?@woK*LQV`ayhaU@uqz14*Ie& z<;(w{?@KFflinunkNcbY>uc4{xD7My_kVpdW9AIAl=HUfCcOSPRE|5j&5QoZV%YQO zlmz#+1&7rqzm+%Dk?^_v=jqPy3re?iV}q)We%X8f)!qB6_lM0ofBYfeoZp=x8M|4E zn;6dSFsk{#`~8ORv!>^lm`pz$9L8<(t4V10{-~)Hn}p}O{kHwjA;)xK+Zyvb_kY)Rz{mscF|da;~oWXXcu#^6P5p^~55h!x#0ltIkJi{^mW;V(Zq~Td_Oi`TLEJ z?!9;U8S&)bSAF%y*MFNH+0C?1-aN7Jj?J~asonE-Tyd>9+n@3G_FN|QlTUVS6W>r7 z5_fFnv`HJEe0^8BHtuMR>)E?+;y2IY=8atcrv2`dAN2?8%=j$2y3Vh*Tr~aU)7Rgm zc3sl_aYJ)KOv>*I_gC%_o@6y+x=h51*c&OY*;Lt=Uf$ru|K;DEdquOSfAaLyuWj;u zH(UKo{>3B9_q^|0l)hp0i>W2^Hq@>Cb^eyJc<+ z7rteM()NalN9L#heip@CK52}vJh^$Uak#!o5T(&yY+?_A7M^XSK)HFY7T3mohM67Syq zCf6KrsZe91aguSB&^_Z{#uksvp1k-hUw-q5>yJzMo0s_JyfF$>xH3PxVAI*gS~Clk zVx1a~?B6CmYVTj)JU7Q?M@YeT-KLBFRfPxjbLM5I?8sT1F#mP`(&H9Myf5W0Zk_yO z>k((S^w**BH~C$D?XgfdoE~tx@c-Jv-NKs}DYCcCdw+`Ynn}ijgyVM~Z5Ha|GjC1z z^E{fBTDW`u9q;zGy&t?+nTSX_p8Wbp>9<&3z`^a?O6B%PNZbn6*f_b=+vNX3!x%q* zp-(P8iueAQs0ehI-&f!D)2eQb5RYdOw|iTa^ug~vo%RvRT70fDCSfwhyBNRf=yYxW z%bn)GZs%dQq84VQ*4~&){|*W!&a9h%df#N%6a$xCGoIUJXzK*O`zj;w&q~EU(J$xW zZs#EDpSK@Q{%Lob<5;V5tNz4*+j=db%+LC!Z~Po|F=Gk0;i2w=+@90dA8QycRy`yv zyIFRzv)`BRS=}=8_1B-Xi}~bu`~39NOQvdU%aA@<6#G!t`-Y;(*YDM(m!3swx@M@& znqZ)NXI|)}g;|YV@0YyZaIwY5zkl-CtXnDx5ADuuaOYj~zrOb5ilUjt@iQ|;f@Zb7 zXN~_`vn6fWbM=q2-^tiUJL`UK>GpL0S7b9mW<%?u+Rv?z7b%^utFK$v^t$`6<^Cq8 zt;Vy9W0Q5>x0mbsy1Zq3_c*1$!|0>p%uhd0URw30S~mLg<~M!2${zI0c$D_(>H9SQ z+0Q=ozu5Xq-?Oy(Q}v=+?q^R#0`?{RPw3yaG3C$oLl+wNpTD%czQ2EICO0SlQrot! z_p3z=?whl-#?AiwtL^u$e=XActM2_53Kgu2%AXs*dD1@VcenbrYSzyZbAF!p({|gQ zrU(7y_K*Hu{dtUQ_cKe)9?{IaH@PQEQr5{Vu`K+(cjEqURg;7NPkLXz|7hO(^7@53 ze+1uGJ=&Xgv&Gr#`7d$No7=1QUET3@o&EYv96Vb~_61zj&z#?`JALw_@F4HaaS*@L@e(HDkt0^-tpQ?0~eY#ZctH76cJaaT{_Nq17 z_Xa&n_MFuA<)Z3MgWd1XY^r&3`gP7`_dj!|{n`8Jy?e!7-Qw#P67OBU{h)vMj`K15R26qEt-YZnDs}vHVSUZ2Kf7f* zgucyvUoZHVGt=-PKga*@w+YWze0`O*w&?TZ>J|Tj1e;~!%R)93E7|P5vNBNQdid!Z ziV}Q#{+{D}-*|8L;+>Zn7}%aW&hno0ina7bY&t&=m@(cC{qn(MM_K-kb#M0G{d2YM z=;G^_uGSXHe%pF(>#vRKCim8sN2RC*3!nPEVU2fZ%)SF^yNVavzx}E9KeRYhL-B|? z%UbbJtNj_5c*W-Xh~{g2i<>HaUwx%{7I)r{%2~%&$<6-d_x^2MVEi#JtH_Iiwpsq+ z9n<~gTFw0V|HfvmnZ3V0#D6@b?bi}RawTtz0|kfwa7nw-xC?9ZCA6!Utj)M^jx_7^VNTXf_>Ffn#+X@_2S)A@9$b3A2s>f zuK3P*zn@1~>DR9>ty}x)r&Xbfm~V^O^^j9#EvNS#lvvC^?Ptu9@46>nybSw0>BYXf ztFOD??^?b7(e)McYo=Y;ly~y2v>lga&YC;3C+wG85NP_;MegNVBl@+np3A4L~^ z{kD8>_>%n{>m&VU%9uYr_b=B_((a4mDzWq$yK|p<6l`_V7VdG|GV%GF%ewYAUrFp~ zuDffn>~7`PS3(;6*G<^hEK2?efHq5+g6jrr+@mtBcMMep&`MQhczp5 z@%Cpg7kMxIAHB&*PSASA@oK{}f7@DizkdAt!qeSFUnaMH?UPoky%wgvb)uO}%u$Kg z;fwa(NfcTl7QVau-hYh*!VyOC(z8@XnQn1{B#q46W;Uk4|@ z`~P0OzpO-i(ah?y_aA%;PX<_+``$0rFH8A){yXOzo-N02mj6hL66+B0in;%5(j_aa zaGs}|;=sd*cR35UZv1{eZtwcvPv3v>TBr8s?w#4KXPp%sbU(B<|Nb?T$MR0`!Iyt@ zHKf_raP3h}-Zo`I()!b{zeqh%dUYdpX-4ALN3Qq2*Pm0Y$n~##|2{L%JOBUpyBF(> zKc-%Ae$aA_Pd2ZU{f~d`qA%Mv)~$WFHe7en*CS~X799Rd7X_Z%QK>WkjgyDMvnDG$ zo`W(ajSao?Ou~L`IC6`#r2OjoX9BK|Ip;svqV%qaulKA4?<@7l!;4jtdg zYg|9(sPd*>F5b8Q(Ov)S{z%sBMo#Nuy0H8*#Ohu_?(y=T7PueL4z#h-Om&DK~i z+PVMugx6aP#H_2&|9zj-Ir-$DdGDjYC0abaws}LJTJrNI_5JhCZ1Hp4yfoyLciC6F z)4OH%pM0pa`|a%;uT(DHZvAF8C{|NqAMUei)v{y+Qf z!pMq!@6LEPC9bmTUfS(fd1~K5Ii_UR4E-zh=cM)C?eCc~g?I1zqU-Y?G;z$(je6An zp}+R0&5QNtI9y}z#a`Kc?EdGtKaZ^UZohD;owet3wwP;Gz529A^|cdR@A)XdKl1zI z?~I+-t`^^!sO|sLrsw;Yti0&A_iO)E{ixMDUO#8kZ=*IQ12fI%ymx>2e{pW>tLt8T ziv7d_o9*>)L}uH+>wHsLe){zN>GzA`YrnN_jW2o|ySA`qpI(dg^yKyWiL>(eznUcc zE%v^D+v(j;e;lfJzVqL<=s zW#sC-m^GDe&K~tM)vA>f`u(~Z@i^NM(d!dLr>oq1{a&Kk;&fNcn-7ht+xcq!Or{)vy=v+|W1B@1#&=l! z(|&t4ef_6d@~~#+T|LpWyWM|2%xf%Kyy@QlbL&o-Mowkk=Nt2W-W*MX;l&Q<$;HU9qPAlUP1B3x}OO{oh*bVRr%kEr;Sp4++ldey4a zUfr{&M;z6>w(6ey+lu}d|6PCVZ;*;>-2b4qET?Mvp=W+|uTvJUT=wc>>Mir$+Pzbm zQ(7N7db(@oP5bwF%K3d67Vm%N>#MV$s`=z>%K!S-jtd;FyxF(^-uLflU7f~#c>n6_ zJwk$B8Nxgzo954!bPN74CwZHJn8bqjyM7x^4^O}I_}U!tcKiCdJsYS0-R`Pd_S*E? zl)aNP&tHsC%Afe`!kT-Me8;b;o|kOP6RD|;)tc8keeIm`{UMFD*0%ZjS}$fx@9=8+ z_;s(WL)5g*?52WU|NTCm`}#e~^Pey~qk%||Mwn1*J7dC*038i(PL_jC8xnLhxLJ>= zc)gze<;v4i&+xFG$J%_Q{;Jm}{_2%nUO8>S&#U(KH}ve#aG#={j{Z!bRm>lErF z9OO2;_wCd2j=EdSx#=YupBHLcIlPo+ZeY|CnXpWPyOn8Sib9A6XsZ69qrn3obAgUS ziayi6$8TR5(sDIq=CPMutFCkEJNsnk-zuJG{YUs)yyvg=d-qSBB7Z<#@6px#`bllG zSAU$UxG(R(BD>$OJoou-%w7ApGE=R0lF+J#U$a;9ZSPUN^h#Yl`~B7Xk)oxQFCNGE zsoq_mHh25^I~knXYqZ!msP3-&+2%GYb=&g<_Cf|Wu72I=$`fjH#}}c zd;MnZJ!Ly-MmO^XPu-14>)xaCPxyb|eU|I; zVn5PiEYIx9-u;Vh@|O#fY%;bTj+mWcZgB1BUc-}h%QrGk`T09!tN-?oyFRT^eUOx4 z68H60`N7(siDyiw$W|UT*W$=tmB?}N<^IJXsVfquU48#$Z>{~$Zv`sb*CeX&*+)P7 zS@>ba(qm4((jT-Lt{5sgrg$(guH9PB`7`OWzWWXrmRBZSCd=;~Ui#y&?V3F~`kYMv z=U;FTw&<9@X6Cs=lP#T>s_50Nof>whM6jLD;Q^zLNDnBc`yM)M63`J*W<7K?U`L3O zYvw+?(pmdL_Ds?eT9r51^oV}rA786EJ32qj?%5yTTB~? zEuT7l)zRa}>v=yd4GoOn5phN3xyiztjSU*-x7T|fl@&Gxwaay1PH|J~K* z6qp%)^^{uo>PeGdP0cLZlCypHZId4%`)m~+Fsg)1IOD-B$aL7rAi$@co8^#7)wH|$ zJ!>mo>jlLZ%=9Ps%dXw2|I+;7(}k+c*Um>xRu@hFcK*~a+y5Dp{y1;{ zC9he})U^KJ>Z?9W^F9Bs`Eu+3ywA4-JAS1<`JGeOf5_9~rBG3&55tms`u{Gd?T^T@ zaNtc3@BF%8qHIgaqsv;qgj*hb-qI}4ZMV>B<|@k==ZCG2uJCH9N{Ov|JG1YU#<7>L zxK01<;rq42kGGod#2&{AuJWG-_eEd46FzmVyh+x-Rxk7Q*00|}9CqveJjI|8pvda} z^5yk^jDa8bJ-xSNpGa zfAyGpb#>XU-BIhqcTc{iy>(aT-=DU>zK7q?Ff)oRcp~mEUKF44X)y^l}#-RHZ%l_$p8#|a)X7kD4{KK=XlbFQDIQfn6-y|sJy z&w^#kcfXh|dwj3ee&U7v(wyro3FeII2o(_HR;)@ zgPigP5#Lj0YJ)b>z7K!$<-g^08~Im%XLrxr_P)OEkbA$(UrCj9-qWvd`hQE^W_cwWEeG;C2 zt>>A$;Wo?pDZldX*M94~`)%*lU*GF>pVZybkIV7x7x=CG;nT%Cs^zKap>vt$aM`gk z-mh8C_atZjth;i1Os2)Zxqbgl^!JVh-tXVV-|B7q@BO~A`Rb*||E1T* zy^9T9v;Tis&#_Yn6BUhy~5RrAzRD^&EdI6TY4rK}t=HDc zy$^3PEWX&j<9?-WZ2cylqcy+QyC!c5TOT(mv&#MBrudR~9a&7}laHEVE&3d#d`nZeW+*dJzvwH%y!yISYud*|*{l;;gS1IskSs?Fw_jhUr zLbA^T&!3gs-#cB@eWG3e^!mjI&$X7XG_NaB-BP#SY4>&Y)<+e6%T8pK`t$k6X7k0~ zdd(6q-~T_!d#>_YA*+vy5uhz0#kCV3_TGJc< z@jRt|wovAF}X!DDhji=6~vjd%v$tf5~?Agx#+n z=L`62-#^vN#I)la%VPG|_6Vh|@GrhH@dS$TODwSzS#PY+ZaPMcU6dTN^0 zZ&|yw8kx-tqTdHvzCK)ZwN`X`k@Y=)?*DuHc3nN#7~5uNX}&>mjpe4o$=f0}z3Vuh zq{hP*R{V29%+9}mX3hKSRXD5edZ3#5Y?aT3uOq9jpM3s%p-$|-wdId8w*H#9H)mc_ z>CPs>2O-{z)<)jawr07$z1o_4dg|V{C*SXx_iO$Bu$(P5%a#Y#E-WnLNc{5KdgnX8 zv)?$52JYTBXU=^6fA`mx=LdI-pIo#1{T9WkFW#G9iQmb|-+kt9$*Yr|Uw)*ln%mjm zqmc7OWS-nl)#Nvq>eXM>r|!<*w0%|WJO_h+9lmNy+)}<>p8h6&R`2)w_IBshYCeB{ zaqs=UZ%_XC?VoHn|3zN7=(OKI!}t7+TkrC*=>N&*@3$Gf`zxSUUVf=wOT9d<|0>tY zpsi1LZ0afh{7vu0r)NL*UAVC~eAn`Mn`S+_zRR}oL)_X^>z=<6IO~1qlii1|YgT*sxqRa+kIpVK7!yZ&qO zr}x!U&!`(q*8jGxS#j+*=OKag*r@a8Jl}r_yZZI$7ylP?1+@Q0t;}AUe_iYA>aQzx zvqCG)cUJv)`hVB=qLP)TzOLLG)3wu9ij%c|@zuLAcUDi?^Zj*e)YX`In|i)heNC#5 zSbeqY>#DCwe|@;VWO_c;yWF{7HA`Ln>x%gzx%Y$JzP~T2v0n7=Uy$+sM?0_QthBzq z@B4p~<`-LC7r%PGvh!Vj)Q{G#zg2tXLmzIOzN`21MgLFr)&IK`{WF(|JpbGFa^0$) z%ME+;0;~6($;;pHOr|JmV$lkVhJW9F=16}2z5DCx37hNpKk7XBpDH2F&{y zAA0@C?yp}u9!%;sJv04S_{uwO-}@8}h<{o2FHg4f^W^8>-)~Vp_TTp6_g;Sg-@D)E z#jj*>cX8rbz5XC)?hCv9wtg9T@xQkId0IcmA*HtJ`>Q*Gw)bDR@A@)tZ)C>Iin$KX z--L}NZm8r*Wq#gwC+DWa-|Byfd-f-_#J!u?_xo+>?%!{|9uL&0`(L;G?;oqV?cuA6 z-ij?0wc}`sG`4vferw0x|EDG5m+nj2rJr^0^}qj4H)7wUO#FV&U%uvo(S%UD^M~)3 z>)sN+`s&H_U$%9vCDW?@*cQj98*NmUa{QmzG{xl&ZwuSSX{@p0VU{bpzpn^C`SUNg zzEwki@6lTR^2OiNCC>LuDc0am)lAG;ReqC|cfr*K-()m88ovH`=$2ZX^!HnG3tt@n z$(O&I0%i6j>oHbu*l{B?wMgRQwY}&2j_tOUWfD$)(^Fi&LwWBznNPR0>$_w%QyK2B{r_o<@l zor%%)C%?b#J8j8cJ7=fo^8arvqj|POiSXpi{CBr+r>)ukHHPKytCnpF{=WRe^pn4L zJu@q_ep5I@Os<>H(rw1sw|kHNo$!9&>0Pm#)UABisqe1)S^InU{<~jmzu)XM$2{drao|-+$qgv_`BWmrc4=;NpzOaesf-?EUop zmeAWT7n*k8+57g}_kI6kmi(W-w)$@T-0H8__oqq9*1pgCez|kbUU|p=_fzivKKbGQ z_xrWo%4HLLm)Cy3pMAGu-}}0HeYbdf{o3#Md9Mp!oL*Jo*Zt3Dp0DxG-xv4Se!si( z|AkM>%cmdt{Qce?zu9r~_mqEa+AF;JT2S`#->ct7Z%y73e0TMZSpV2-FXqO~{r;%B zz9nMWmIVw9tVxcue1*MO%a1J7y%_s z_C2rdudim^U4QQS?)UqzFIijVJ?YBJ$5LO_=T>dXkLnGxm^4}E$E>^Ji^}8PMof-; z8!_!S%Z|5J)8p^F-*>`y`Mve2XSZfwJ$hqN)sn9-x-zZy$8Gt^^ZxF4`%0Vo-<@C7 z)?d@RIw@*v)YnJ*R~KJ1+8BEBiKxf_cYP)AUy2?H->1KN^40TKcgO9Ho2KSBb=CD< z*SQ}3ULUdj{n{&wEX+BXJ$z;_-dc4u!fETTjd%WSTJ-&Lt)&FaY-iRhM)t4AOY10h+{@_q$gdN`%Hbd?`CNqBNKiwyM zcIzdz8sFn?ZQN>gEeBQC`-A4%ufD%J>DSXpHP!oP4Od9>SZtLSPmG>Dk7G_}o$%Hb zFOTm^d&Kvg?*q%}3jRkt20!ap%{?2o^hd|*psOO=4uwo#E#FiTSUdG!V)xshC+#2Q z?S4~d@ulJA)pOd%T`t*sU#}I6+qGx*XAT?tV=tb2O?Tq#tv#Rj*sZVXr!(W!GWo;% zBtEoU-%-e6&;I*q#fA31`4{8%h($IP&S^hCMz2>zw7K={$b)Jw)Ep+!9o-2lnyrS_mEw&b9Kk`>-(d+;y7OP)alFW%)VQA zt^eWbP0O!`DD%kwC|K7YxvFITpChMR+mD`R5jV-*^i$>hM=O7A@juVK0x~%`hUk_8~NM*?>YoE3L>_>l)PM$5j%8b9~^7`?UqqGEpjm7^zr%bIhY`Fk(Q z{{70hdw%P8#&2*^d2;FX$0Wri6E4Q>X1ROc)Ld#}zHN3;)mQya*T03eYs+7+)@jz1 zP~hJjW|!icMi;o8c{`=YkLjOP0+`D7-M7}RuzpnFI zx5iDzyXpI7c~hlX6!Hr$Gl{>drX5_kO2b~a{*j##dGE8os5Qu3$&?Nq((`6~RY z;l1DgymjBlMc-R1wmhh%?0e4PHEc}VBmV7QwE1h|`!696wtxNp_0CW3wo=NYP3+>otFy!VweOc1KYbtmy>L65>Fd~kj*{y8#it5? z_x^nSw4sj9s?&@9?VtYZ^PSvz^SfP&)xSS=*OqKyy~^AF=hny4mDamH6uqMroL)*{n_#U@$bK<`C>Vpem#`hcTMkF=#kT}?}uCI zP0cO(Y5Qc?k!rWoXa5%MU|?X;b)4lpIg7RQh4v9H^Sh`*7EGT*9Fa+3@@#m z>aBO8dVhJw&zhjGxiwQ`V`Ee9tqwo(UEMamMSGh6`9J&q&pG{c<@$*1`sbf@OZxTa zM(@ji8Mn9S)z6IZ_!6(9;nl0}FSfT5jul$}{l!%$yfhZ%dah& zn)>edD#f^shb#{{$$nqzd%gPNx&1rLPv75PK3Q$*=YU1oUEwcwYR%%%mr=fAk#Zs6 z?U~(gH>ZWa-}JsbWzUPhwSUaxzW+bFFK>(b`l&^;roE1ObZz&8?dt8%e>|#7WInLo zqnnxgSr7C1e*&9iqAgNI51qev|HZnK&%f+@FnJMY#KX0Ii%!`4fB2g^f8F;uo$33Y zpDW$=`fvT$?f0WgZT59*e*bf5b@cW9B@_Iv{#p9;?fSUyEXU=2vM) zl`yy1XbUJuZQAolto+FO-|-rM zo~bJQY3ft3e!4oGo0EfgdVApd$M^o%bx-~E^s~-7m$;+4?^C|>zb?3_98`W`>8=0Y zvbXwVpC;|^6)oF%h{q3`~F$#C*Qq(b-ecbyEUQH zzQ5nQt4XHL{f2O*trYhc)!*U!Bdn$_-FyC9vGwC6o2QvYzu#Yc{gZ>a)k&NA*Ejz* zQEPwmB(QPg-ug+uSxRp1-&tn=YtN)_-xuz8`nCV9n0}~LdClykMHWxq7-z~}fBfm= z)AxI&zwiFNJF=2RFXq+VPiuD8zKXr|E$~wO)QG>lP3yL(?Ty-7H2I>$^Q*kObieF> z{?UH*r%z`~v+w!yyomE)RGIltRKn+>rqMIcoRjMnixn>G2XW7kVwveT_1fzqt=|S8 zgx@{76%l>>i>axXPm9*0OR=YGT9a>id;G0DnBE)2@}auKT58>v#|fM*1q>Ns1<(C@ zR_@&_7Rq>OnNZ`+!qs0N1YXx|+Bczj-)f&5tM&YR_wQ^BIa8OL~ItRlK*+>{jq|t}=K~B(v$ErK{cwR?ADPd{UJ^u_>MS(LCkW{X^HvKFrwl zZc%){vhI!!Ddse_W5!A9509&J9$-|O`A<1rEi_y;)%1IQ!U-%S;WlhYua%a=_!y(Zw|LRtonjgCN z^J4AO^w>GI_WJH&WyhD-PY%5I-FJ=Z;{Ke)nNvPl@6!ekOI_V+D*VS07^di>w|j2Epn z@GRM{>=bAHy+7#i)yvK=&R;R^x_*O|$ujQr#V4H4=AJnCU~YTnosb7*j#VGJ3-6lo zCF$Ou{q^mP*R4NwCD}htzH)i?nHv4_%ze8zE%KgLJc;#4U(3y2Spj2mP}>*UijtT zi}u}FjF$I88|zMO%m@uvyX?nMz$hm&K}&?Im6a)BK}3NHS1ZdwSAzvwVq6^_KOYr` zzFx9l+;3;hs;O+DuJIEW9*eb!?=Jsv_1ZahwHIx-k6iMbVLye_T>Z<}6WsqdJbiB$ z{J1Fg@#5S{>h=+JD7NH}QV9eRKUZzvDS6daMHLp3mLPrmLQD(^b@FM$fnHjt$E_bq)sd zzfogU6cVpnSGRxt%m1Bc79=oDSoZOGpLs~t-!%t*EwR}vcfL`$Z#~;Gqq4Yl6W1>E zxVz88S!mha0Cv!R(>EeqtzC_Y3O6*|xLTQx1#-%5oj&{H(f4tt=azZThe~&$K)f?xVuD)dbTmD?d{VS*3 zf4O9Q^ZZ)EW#`xbBkkZCp0CrgSVEllrLW)Iaw6@&#w-!G4_hbogtOPTDHY$pa_8Cd ze|5nhu3T_WV`@;6mGWD>QnokuRNn0g5xPmsZf=_>ver~=t=qGY$=}Sfx9?A24`9}d z>)iN+`37?&hw!%KaQpp-WnR8rLKbY#}+8+?p9 zB2zSsgn5|`I&4^=VYI`B_m%LvN1cMb;Cbi{4Oeb9rlx`g5dmUCyiN}t zb_A>fO;9$1W~m_VUd>YyH<vkKcDUy;|}|WPjuDQ~M8Wf3^Gg!|&Ps>7v)>#m!ZnA8;Y$m95(8L(eTa zUK{a{~Y)D*XhrCffECjEH1v&Dm^~ed14M*==!=%eD-|%qisKxztvEXGjDwLyRJ)~ zlQCvRS<^RPmU~+`R>$ArV_;xwbDZTT+{Id&5nH#;GduRK_w;S+d|$`@+Zz6BNp;lU z?3LO3uId)v7ru2p>+#>ULAB{`v%gl_M(i@pioN=0|Fu7YFC()>$nUVS~T z{q_2y4R>#Cynk-%;s3>Vznd<)`mr=y zqx)}G^wRayfA9K~vhMlSHuKW#Yu3Noi&wp@kDIh^cemjd9`m`St8bR5J9Ue!vJv>M zdh(>2@VCCJXT$H8zP==U`v29KsK|r~cl=d%*39qz@n@nXyGqHmnH=h;7A^OF_SpK# z=TF`z->1YcT*~dZ<;4Gkw{P^%I?wF>Pyf#MTlO)3_TFD#w(gzpHLcTT@4lxd9%MhZ zklR?Fb3=Lgyy|0!jQ&6;(*_S@@P@s%ITbp@ZO{Ple( zxk%9VDx2N+-I5Ou7l-}WE(coS#_XFP#HGG#-|rtY_j0~&dC9u2bz_pz#Z7GD3+xWx zbX9!$uG7szK=Iy_xb-{abNcdUc~yR3m)ER2{Oh>*VS=V^w zou;Fl^n3N&D|hG~v%UN6I=a>5!r+4$yf-uqooRrlC!mpW^=SC;kT z6VVgemo0Wh3C^7UXM*Bp!GCY3PD$N8`QqR2mPx$R`W-B8Oj`L-a9Z2tX)XI#s&2Yj zotgGtw`0<$IbJ_rl^wsjzHF`j@3lXle0>@GU`5dMjjsYUPVeB9Utm5z|K8zAVvC%A z@G+M^owxbOnFopcxS!3-3j48cqM*SAmu`lY|6=B8-(R-x*YBJA-pnp<<2L`Xj{oV5 zgS(b~`X&E2uU}LDWXz`8|6jGgp7amvelCCiiOgFCJ)MbLzs5N6}Ac&^q5p%ezLgf^Ul5HWp!OPeNzLx(#k70-Y?7!WfRM}{_Es}-`6kl zso&)NJiGk;M_F~z{Y4KZ*=xzIGt}6zboWa+{p;bSk~8*%pLVkT;y3wFb$-v;wqM(4 zR@UGDoBA_x-EN2OiA&zU?D}}H<;Pq7wUsSAY8ATqRhw^I&wiEp`EF{km7&vONnwWTpP?kIm`g?BKiKz5La~R{T@!F`Vr72;t9r*P2<;v*macU-2Io`DYiamkYdOpML#q(dqv; zkDRMXIo9;2a{KBR^F!DD%(}4LqBDNay!pRt=GpG7TQ_C%fy1o(;(r$<%>1sq{N&xA z^-YiNJ$-Qh)mq7K9OnBC%+5@@Yd!6C&$lS&b>X2~zi_U-wjz0_lYU+2P21Z1k4_Qq z(>ZQL_dirpHo9kZ*DHSiVVf1sp06yH2V~T%FRF5_tbhOekI18vN0!r19sP0dzQ@kA z_-DFvzjZw?l-pQ-qtSog`|WqL@7`^a zJH_tCD{u467uw(U{&Rk_`hNGbL&^rHa-N>`);mAdWTN4oz0W3b^u^R~jq}{}WY3@f zX{tH@S~eGL+k5oIQdTDU=zCmWbnia-^?&#Kudn6=ZfKpW_x`>5)1Mk=vOb*5J%9b= z&Hrk1ZJR9rAJ+f+_t)_vHKU_RE&@cM9JW>kX_w;ZZ4n?4xzj zW_L-8{wMpzEBcyygneVg^A zYEu8doW%3*PtU$|sdiuX{;OiEbj7v5T4)JBeX=J>c>pS|}dtuoubB3Xo!it|IIq!j->3LqF>4-A(_~xu#qmZhC*_MS>8#)NC1&04b?RR&@2&}db>yd| zP1T=y@_#*wzY6T!oV&{|s=m-7KkZwMx!|-G`CZQ@6zOZNmwg_v{{Pg7f8HVc*My%g zcit@&*Jn57PyT)Hb^r9^*VnCc)zc4uF|Wg5QU9F!yIs$}sE6Mz4G-^9|N82s&-B-^ zCnGi&F5ANvvpK*`zx;iP+Z&Je+{cIiHUI8>y5Fh#-PZkaldiA6dQ$bqx8C-&u2U}m zcNTn`B$8{@D-k=v@T2&0OWoRqKkM!^$p)X#lW_=r^6`kx^a+!%<#{S)y}$VWYVe62 z>wdXb)y*hK>{WS_eeCz=l)Jyx%WwR={WLLS?L7NKuFPe%%#Y#?j_gcc5oVjq`Xke> zZ;My`zJrA_yB+5wHBOk~f4{bE#UXZqiXRgmu79}lNwvA}45QXP>azm8GiU(wP(w?(aWZ_tyQmdQ1QOSJ%h3 zZr@lB@rpSwDPQq`XR?pkuaX^hFWETR*)PA?pXAkfzxsW66X%=U`M3Ggc3o?76FPrE z;oNgsmN$2-HdWv5aOf*+a&oxu`?J!ccoJj3)4}}p0iTl2$X_o0eQJ4zciqMih0-Kb zec_-V-bU}Q*GFVH-@Pv9()iy;b)(#h28YlYj$djK(xL=!9XC9j* zVfp0qFCVeV76)sd&DWXFt3NsW(zJJ%U+xl9dtd#lOE7C?Qm2UFr;3*qiq>r&Hv8t6 zNwr_vy>DNK?Dy@(GtYPU{5I!}shX}4@y!0sEc53oQ|1Whi0wP4dcRWs*U^CJxV>t! z|2EbOEDc=LC28gHvd!R-M*j-o`AqB?|DSLEtI+?Dlhc0xg12oS)V{HN?mIPk-~TI3 z%+6^R@qe4@o=O<8u}M!B%v!}_skfuJOT2pT_dJbr|K5E2{6DVkitGEmZ#}w|{7)5|i-dG3Wf+Z(V=e*W?%T zn0Y;yu-a|hWB7MW*(|fC9A2{X=J$yGmf;`QzQ0)Yw`h<5?w7J!?^P>p z@-MyO;Ohxy2{CI_$SRq}GG|)VV~zhCGmbhQ?-Spmx|_de@74a?Eo*+8Tou21d+E1* zLIOu$MBnS{3BP~Q<x} zAdceDj}za2OSZk=a{OM*9QU*D<60i>_TS7={d3p;Nl&(_widJ9J13U1NA5dc)eZTl zEYX~&zfS)C>y2SA^Sa9?r|odbW_TR4#Ouqf!pJ=tM^5(DHz{o}yx>r`^PbsXhrT;K zm!Ew9y|Lx)*|W3ri&mW5|Ip%W{SKq+x6~r-Y(GWt-2G`|{_&|n!;T%lUD(Wi=PvSf zYfH#}zR=e9R?{hkKk>V7G=yv{GLKTy4SH_zU&e5;UirR*x>{@(cmGuve9JxS?620& z3-i{@`npnmmD=v7kIG(3xV_(N6P0lF4)rk^X~hd z>%*4yXFu5;sz&J=>{s@7~W7|9*9?`0v;1yVhOMXw#hIpZhLPrv6NeQ|TJ^ z!x3#y@809|toR(eMK+`EP;IvAK1=0EPd-8xzGfW$;$P^K zMHQuP^*VDtoct92%U8V-3s+vRQZHPwe801GZguXT?($Ec zel0Bcz3`UZ+xWY2f0NeE|5~+Bv`(Xcva5vAn&S7TUA%AY{8||@=Xz`w#}bpQ`}>nv zc7Cs8U|{>@ILlx76l>{`za{(akJhz*|GQfL|Nr|(b>g!{tIpId-yg9*V%_WO`@`4A zEV&!~wYYiy>;G|oQ{(m)*S)^G-h11w(;xRc^vv0kKUF+##y;!!dLiBK#Lc3V3%B|2 z`0ZgF?Cso9AHRD3`rr@W*4EZ-z4f&y_TSd9<(?K(->3g_d!I33&S&oW3%WC!8f}jB zIUdj17n|E!sLG(Ey;t>LUDRp*c=_+&WO}E^y?#04w4dvS^DR98pILr+75+1!Iq2{S z`#pD7yo-DN(zg2l?Ah;}D*vBc{yD_W+3wr+Io*#pJ^Ii!r(-7L%-+-dcecO&a?EA2 z$-WMklb>#%{MPzi>-QX8mhNRqYSVaYcCMKoaK}fgu-eMHyldh)?U&;B6!FiNxpiudhD&Ue@xx^GCGvt}vf@(feNQ-1$%GZ&c@l?pNF`ZgKuo zYASX)d_VbMYnZ3H<}!ixPN_P7*M(?i1pV3i>iLOvvG?^JO*Bk-tXuH>)|TnJZhsf@ z3w;{1Wcv&6`#Rh9-p{&JX#3MQX71TdWgKVE9$mS7;otjZSEk?bGc$9Fi^yuYvb8{O znNXtkx!CMapLlCG{;a>?C$vL8IdLHy!ORw(7&ay6L;a*Z#`Xe-KiIANP+-?PRJRoQdV{jNi=jDC*jtnzl*(?{^6*e4Avo zTy;udhnJ?gj{IC(o?UD|wyUP6D?B~@IdNum*6l}E`%IcYIrQ0`xCExY~Am zrN`%+FUz$bt?*X9C4Tj^{(4`%d%41Dfr3?+<}&fh%;Zx&lD=TFEYFAd)6I|70(LT` z)rZ{+a^#a0=KZtr*TX4S|L;E}r}}Gk{h1^4KX(>B50rEI7qsuw|49!81&(pV-aqGY zulLvs!(|QX*ER=7IfSn7wZ4>`R3n|$>v`~T<;t3`f8_*T%1nE|Y@7P_K-)V8i_eHB zJ>9(^U*Vv<-pb-bOZ$JX))QR3`A==;vfTfUQ7YRcxa)#A>=(7E7t8;zy}Af=J>aXo z)zhM1FZtObGO@?W_{RNPEO%Ab))u|GU;9|^R*X6KqZJydUR58~pI8`KxHQq}a@36L zEIGYh_P5nG{gv$tbSu1BG3$%*#y0{}AD>WPzd7IU;G9km+j}P}d#=7#Ybl$x!>de3 zWx|>*vKa}-W4sg2zVdzfm#^~m+ja@=8*^JL3~%jgo;9VA@vG+K%m3U@e*Lc}_$9t> zh5!EGP97}#aU$g09@&2;PPqRPO{GRI|FZm6cWYJ2r`5Xo zyZA3nQvYAO`pxT2vEqBXZl2n|nNO{LcJ-~uN zPQJE!@2w7PzL>U)|1a!aF1w*-Y0HM-+SKRk7b_`FvVW=y}yp$y8c@0 z(ci3fKWn`1Ec={s_EVkoG+`zt^EaD&JWO_YpRls3QuT@S%l~5i_~E9ux9PXPB(Idx z|IaZY&?agAuG9+)D=zEJ{V#f@K$qo)W&Tgw-CIs&&pMt}r7$Vsv-@vl{j){)!``cx zpMJC=`jzodz1ZlDxAz|r`S8NIXP?{mAB%Q;c=7jsM7qSy_UqTbU!QPv!I||R<}Grb zCg*YK{weM^Y^LrvuC%NTolt0d|LsR9o+o`?%zA-7!OMStQ4X)M-f7Ff?ECSS;~#~d zeV3Oo`Bb&t>1*9y!ItmB_iJ`nb8@LZZ+-bkJ?~@5tP=`Gm6`WfeL6pXzrL>4<&&>p z$4;r9ZdJA=FED!FO$m`w78UL+9#y><@DbD4m;NUegCk0^{OwWlP_^Jyrs6fL-A%!Sj;z$_0^x=DIHXM zsr_P4Q-0Z76UC=HJ)hTX|KfD~_Mds4`(K9iygHEEWG+dEeW*Gwu8ax7bVF-gi&l6>9x{(&qokyVruReixYk^#B6{%Vft{{*zy^mcQ`sU4MSo zx^;WAJof)y{cdmZ{k7$`r)#gCEc)N^Q++>svd@3r8!DR*yfrOfp`mIJm@c<`}?x3cl&(BbH(rBuJ=!V zPLE$HdSdqM>wB-Rx^mX#)Bm{daqpBqRn+bCy>9M3W#4k|!`3@a$KCVp6U?f3wkLIQ zQAOPCa4Vs&(iZDi#wlIX-nG@rds0$C{j3Xf>pJgDm?*xpH(q^e%HMA-+w0fIuAZk| zz4}Pho@rZk*Ozgz6sMcHo?0rtQ@MHZOMTb!M?FuMPv)Gar)F_Ve|CVmP3g`0fA4qg zeX=&TO7n8||2bV|8BrPk!**0n>#y-xQ8{bo&)3_<_l3*f^`DV(dtt4hcZ~lL&#Zf% zE^1;fZoiv6ZiVe||7XW}BD?ZX;mP>$hZFPO$4~A4b8L^*vL}D;AB@>j@JelFocjH$ zxx$j2{YOf8E}x$J`_+}!Y4Nq|)^xn<^=p~A{BCN1yY2T=IxffB8jmg97VVkxIlef# zp<(|=SL1fq^LJlgS6{z>>#IK|J~G`qe>^?EVPcNVm)+NA%-*}JwXj=tdAm^Kz1EtP z<8o_H+D_?PrQ_R<&K@1{RG=Au)0(EjB6-%Zlz7K=aZ zcs|{vX1RrD?Vf+V&S9(LkDZr`I}>xUYP-3yyxZP;dpCyfYk3-9T6*ovNxiwBn{0b_ z?!0BaKho`U?DxsHu7CS2P$}1Aztd|&eXz7u8}m;uk<0_;7iBeB&GXkS|8wYR@(nfP_8_C@ zic|Iq%-*;AdB+#i50kDh{~fU|F3Rhd>8cy&mYt1XynD7tT7T==`}ZFf9kTO3nlSBE zu!Q-kpYdPwR95MIy#IfN;+~rR=Gv|6CkRT^|7*BE*~~|trQYh{&Vt~oeR*5I=<_#uJZx^pJ(uJJRzzN@IX=7o`L+Ge%{M-M%In;fw&H)pvHAb1{y8@D zs5k!-`(9YQarUlzb5>8j`gP5Gk=@_V-&l0x-kX09W_^FS@>7>y==&)?Z(65r?F=a^ z;!>A(TGfuuzeHVji4~exM%>~l$|`7#uRi;}{Ka0+diPh? zmVbS!(S6U~H9mNG?f26+ZSE(v-us<*`hR4r?SJQ@PWA17y;puQ?2F}ZQd+p&_wY{6 z$3m;V+S}XpKYzd9e){+7koShN-`!5E&3b*|-&EQdbXE2HJw^?YDH@`jtSpBdH6jW`!259| zK)bJ6k0=|gu6p|RukHHsGj}?pW*Ubq=2QC>KjHbN52fG!e6-J>8|9o|^z*avgW{7{ zi(ghf{Z+{QgKbUUt4SM+-`+5C{BCa}Z|&l*za!s2qf2n9TvXqs_BVnKRa2WEZRNXj zKQ{j0tvBzbzi8T=?Rnkjaxi_<{^_;qOBJ{{-5gfN-fFrZe*WHcm8G|?Jbt*xfA3AF z_cbMLi?^InQaZGK){3czH>Lg(5LGD4;rS!p8MdaSVQ>7S4}0!)7&17C2HAILmuoJ%-H1BH zP^K^`^}A<9rvabHd5e4NDyn|WdRw}(mFHOPojEBJ`)c=vY~$KDUOFhS2N`BaUm|2mW7ERkMXp!GCFPuNE0z; zb$KW>y?x!ke=X16tKK`k>!@LIpt|#cg3phwYvOHwyqxdHdyD_gU*SuCF38O7pX{4e zuYdZ&0V$UkN4n#_&M`Ns`MFwUa;b;fodVW%#pl1=IdQqLnojr; zx;OH5@e&T}cEm|7ztfnP-NpWUPuZ2r4|v0mh6imq@ksD!?P=n;zZe-}=$i|NrNTW9v^a{ryIgrXLCrEGo8v7|cRKx9jrBJ;D#ty}-T zzQgJHQSUfYc|qy!It85_`)*9GKW)VBzIg(p_oWFND|arpRK4P7vcMbcrzt8XGOR2Q z9X13QhzN7C9CTdaqan)0+SVAz8F`-f^W#2f#LY~RBmRl>)`{p5w@-}i4t zr0Eo{bbjs2Vs_Ec_)Rl3=IJmb>4Yg=;xfu` zGGfy8_Tqi`Y?6oJ9+nByizhu{`~PnBnvlN_3+p%7R<2dGT>gnsb=fq@cHKk58jtjX zy1qonFlvc(XasSyvN9$dh%gW_YGrc_1npE6?C_bsH@#mr)c4eD-Vjd98D6h+lONYU z+RN|u!~Xw{&-Tk@Y98*$TPPoMdc7=fdx>av-}T6ke%5~l=lXxYeZOMO>aulyzu(@v z=Tsu-ZFcOPbH(4dt)lr7J9xT-Gx}6C^OTeXH@zyf{bjAQ@$Twq+qvEo#NIzB-bL&j=I4DrucGGpy7+5`hrZ?Zyh>fsdeSTW%8n(1 zM^;!cYKU}b7;&|>G!=l(dFNtvI_M}7&?d^o>7||@8NFZDziv-<)t*^-H%l!a7n{5a zDlz-m*SzETEA{l$nCy3N{wwpxzdieLiCp*6hj+HDznj0|YkBRz*1a76qjv9)S(Rk}AvCuB>-69M>;J#{|L<>o*xT%{ zb^HJS%@VjVk43r4Z)>f^l%D5Jg?5}rrXAe<-+b}rx211=b@uI5T|IwI{JQlQ7yZq% z<0!g&XZb#X^WKUNQrn*{|8z{I`ft)((cfYAaqFTFTCeUn{``=np6~10h~Ah#VcNf9 zy+baYd{xr%C9UfG3;%HS*B6%Ad)G8`^1bDKnOn6sX@A)9q|K&PN8|SDy}G|$Wv-(QPX?FnBK6kByLLg_>D-Me0`S$~#a-1SlIX;PSJ?ZUlx7H!#){$)+wnwqJv z#TA9Bw*J~^9dB^>`|s;r&sPhjDZleQyZ35F+>%ErPvYLjn%>$K_u=za*#sf8Y6I)?L}x!hPklcSh(h?^3i^Ny@g{dYfKi{G|Y2CFy&HA-x3+j%E%-0z3$LGru~ovNRrj(ddw-7o{OR9DUj_!| zdycaLrf9JiT~%LbtG{k{#JVSQlU6O;UVeA=^wQU_md(DlYxu{XWe{XG*=8Z(tMD??EN(X0bF?-D6@i%`>)T_hc z`xif7Uw<=UZe_E1a^l}8^^3pO?M$xC@^+r_wrbU?iTB=Rt-CNSe6`T8U3-)MW|?1D zH#sR$Hek;F@V8m{al1BE{lBrUdeXK1Vb;^HYi&&yJ^fm|-c|76k5gqYqC9J}_C`fk zN7wC*+w?b1IrP;bxvQ)GIx3t!VyVCGpy+8AKIXdrme;?ur@jwQa#8X5P}^zfeM_1( z>foL2<>u={@9m9^&j^XM{rB(ty>ka!=FYwHw`noA(Y=og7haP1%=PG+nj*{Z#gcX9 zn-}!7Ofc7#eX5-K~1la-}o$PyLtUf)#KugRj<~)^YP2xZy5Wv&805m zvy0Ono&7HFey`VE9p5qKmUFV*`+eKrZk{aoZ~uBj`O*t%vZqx~m+cEyPUc?yE>3wR z=enIHtA$sXKT!GCW0m-A;osw@&z`JjnU}BiiMu5JOj_e@@#+6Qoo+q<_1&~ro9=zR zCA>EC`jt=YykpPbx#gXL=fgg> zv)4T|EPl>%?|Ag;D<5NpR$qI)zFz0L=-0KOw|*J4#$SKbU46JKHS3pLP<>kbyO-}@ zuKsh#xPJMunbzVReO?Rhxt%k*D3KfZ>9gU!|0k>eG6}t1l{`sfN>}Xnv-*psYDMWc zUB9<)&ALtdwNJhe_g#4DtX)Y*@u;zSFK;a;aFwq zwbM(&RZe|>9s2#pMn2v*JN-_5ewtOOBVk;9r|MIO=B0a4b|=5yk1c%>E&Xlx?ukFb zpMQ^xTYJXvuF9(H%lE^>W3&DKpKN@x_rv_(UrlCA_nKZgeg6Be>-=9S{=dcX`uX3i z_ve?~pM3YtrmwGGpH^R6@krly{`1-&)9cp0j99wgWy;*eJAduo>Lnk1clFL!Go$xk z*R)=9|7DGm_kR6p|L;05Ft9K=&I%OqVy*S?{VN$ATi^f3n*aaf|I@Dicl`fb%c4ZyitpnH-^=}T_owUTFXpP-%rBVKM}Fi?W%u0p+M)j2t_ekV{#MI=I$j^{b@%(Lz4x<@t$)$_>ZWMt+th;> zZT`ev)j!Rm*0M)>HB;^FBM)sK{_X4Z-XCTkHUDb=((&r{^f~y|J?fNnQn%;Z;|Hu1Qnv`9V zda7{qDd%zTdB2Tkd0moy604Mc_g207(R%%bwMqLYFZocUxBvP2xYg5YXScJeO!*Ki z`Txx(j@upEcK!M4QYvKs_M}iydX)L(2m8OIZWgwgH+`RH$Be!`UoS~#{@(Djoc)xn z_OuJGC+6HQe^LGCp7hVk`hPFx>8*>|D?KOa!`rWa{hQYFnRK~vGEZw&Zeeeo<|O`b z^6@{qjh{8V?+3lgFtWPwD)B?Z^v5OFb%dYnPg|q%JLr!@P+v#)a%qRfCt~a$Ir7_y z_1vC)bo$9Rp^Baj%^|fNr|Yt$x@J9DpY&_uuZ{ORC#U?hTb~iJX5Y7%=c_lZe=EMJ zqRz9j`o@lonLe?>$2{gwnRWJC)kK#MzovxQH0@+PWi9@9-sRZ0*VXT@+Qhpl$mMU7 zR>bd!m4D~F|M+qH+~|2SqIvh!b_*|%F*L&{ENo22>aC*S`% zy8OJ;sllbo?X5L$-OT-^<@yOd(f3Y2>)ZG02$%N~uiBO230=QmZCxg|XP5oeQyzOorc|NAI6&%(*5}oxjelqORLC*EiFpz~PiW)5R|1)~+?h#FoTneZ zXlM8D*ZRY5nu}Yv%J2NY^6uIB>$f=ZY%vjFciD3!^joarpItUCTmGC$eZP@4+jOqm zr|NxA)(6_|SpUxbOWW6&33DzlypgwGz3q0&oxP!@dj9PH_bq?EYwhklb(N1S-~WHK z^nAH=UCVs+XZ!zJpQ_&W)BbGr_mx#!!p&Y!{{8>2^yly8Z{z;{>${j4dh+?@zjd3I z?fd&@-k-TUx3AdHa9Mxa#h?tb|E>+4^Clm5ou+Mab^z5dC!|A!<`7-s(aTfM(yeaw=t>Di&JTbcgt z5&cwEq1ZIhYR!J$AELcC+r1B7I$i4D_rCA+T1VBV_g}qw)%iE8`~UaX(~q9iw-Pz` zUHJR$PseL(zqRaY)$NO0`ZBNR`>msQKWk5|y?gcZ^@YBxzm?vv{noZOURYOX&Blne zQ9D)FRq9XQyZ>pqqIvDrg)>91W<;yp&9c9~zH8R+z4?;@?|Ezw{(d4hW_xAn{?}J# zTq@U#lWu$Ze&73jyT87lymN2d->T?GSNr1CHaOpTfBt^$_xroP#ztL<7Y@I*`D^9o zihr*_`!Sq%pM3DM!YIbl>}Eqe;YS^RKTrSJt*ozHgtS z@a;eEbTQT#^`xVJTXl0TO+P5Iq|5te@s&rGoBZEidj75^l>gNGODpEZ3oftg{`S+! zGk*2D&X@1v{A2d&wt8N-(%W}VIW=eci91~r*X!GQxJ>BT*Yx#C)c>P@Ui<1sGWm4# z>#q72yZ`5@_w)Lf*b1Gw`Rb2FW`6Xt`@3(4?hK!=zUcph7N2QN)!#$zUbBz=8Jq7@ zwe$Dulds=TDK&~){od_f{C~s3+q_9@PcGcED=uFA{)9Q|RaVmXS6^4!e`mg5wTOsC zg8AKbzu!EMKW6;(W$Cw7MMdd?xu^bCuY6XR_GOOi`Mch6O*$QuPX1nJ`g-Zb>!ISW z_rG|^`X%CFi@$ZryzGbT_Y15xKee-bY3;(dpTpOd3r~M~ZoApGADXVVi}ziRviO=_ z7ap_4DUobnYzJ8t1uV2qU&i9S0nzH4lY0&{jC6OsAB7&?=je!OUS`yrB zObZ<~1++wjS`IY^E(lR7UuRTYx8?3<!ahv0?s+HL>km(P9`A9;JL z*xu^%c{(<;4`f*_i~lLKUo88(QRchjYqr%G*BL!{WG?IbCHU#I>WFofb9a8>dVF@$ zN^!HlZ(apA^Bqq=vuEC}dfAt*x&Jg5+?abr;3>Pk{QQU)ah8SKGA*CaV~)FbL#uz? zk3+f}Cz`!q^v-4(LrB`8V|jr}7gbHximVL!Ryp;Y&YOmt0R@a&B2zS!q*@L&CMu+8 zh;y+VYBpGKMMH?QmC4azMSzx|)heSsmG^$U`@AYD@5q{|Mn9SgWGv+iA76d<{onq= zYpUfRrrUAb`fu6K_+$C+lKk%>_VJHi#_eDIGxf#4XGUi2&-GqTR@eT%{iNEKwyM`L zes5LJuble#Vx!WLiY4>Py?^u1Iv!&l@HuLS#-Db5gE_r-QgRimrfhvQohQD{Z_)Z! z+pAtPufBbn_g&os7SrVkP4g2?^j2;c5cBWgn-J(`0P?_u77=0CNP%W+Q&ZrADH_Hc zbETfz{&;TnaqY*TsL86&ob@|Btz_bxAKA(C-CyzE&-J2A|BKK>|9SuTT~>U$skqbp z@TuD7df)Jr|8BS2+drn5nc8ofn$o%N!H2@PE9#gI+&Xil*5s(~opT*o~?nnEZRRyYN zE4#OA83h?T^eP&xnm&W)&G}6S9x!TzOh^%tW@S3uupvQ1O^CCpB~UR$seFFrx;yuZ z-@fjvST5?(>3hmgDCEN1KYYIbp5MLi@}yesqwW7+q2Bw7O~s!6(a(Nxqj3GgN?Etn z;Ty`sH*3#nKD2~!P2rwI&Rp3EN8}ePpU?c_!#(Sn3eSo|vU}fe4OqG>y~F%&;gy4W z-D`7p$9^n0ICcKo6^E_`?sLz3cSTHZzwKA`UqARBc3m;osSN#c&T*c@ES^NkAeZby zN!Is;th;q@cHHF3&5dhq zgMJ!)^T|9MG3~I=B&+RV_cv%qW%PV0lP~Mr_V)eNu7jc1`VVe+&t0u2{d6YlCe^ih z5n{#{iWo$=Z4PdmK3CId(xh1jT+VqnF+S8Q3Q3e$ez_$0&w>eBHx8?QJ~m_0!n4O* zB=((td-qhZxoK%rRm|i$1>&5mH$J$#hrN=!GpzLHjNYZO_D8ckk&gUPyZ^5f2Pek7Zdo13+Xx0Tc`?I-u z_AkFR=pPO#-<~z?s!f!2L4DSojIHeJR9z>n40)=u*mRA^UB@8Ln4<|6j0Pc7G*m(3 zPX?edLT*;@Sha^L7fVY^!h$PKX`4Lnms?&ppA(oG>w7X(IDESH!n4Qs_#Ia$(2HMl zYtPYxcLls3^}e~gIQ~y%eBQ@-X5u1$J5Tw34-7t?%p&!hD{+on$?w%Af7gAE>Z~cd zu=}@IpR66Tn8a~Khus^0$Is*ND`hdw$gwTYebS&2m8TNgqGYc8Ux}yIZrTz>k+k5j94 zM7mh(J^aF7$Nt+}x3z3z*sJ$fCs*w+x^Ew@wl)0L-p}5z@0+hWu6KRc>T97VKUMUf zjG6sQIz=46+J!CbBX(Xh5A%=z`hNEQ&iPl*PmQUKSvB?PpZ)hIBq;v3+r?sG z*mS-4*Qet}zqja~e0Q|Grnltk>q|LzL%%+k*f+6!|Ne_|KC`|EzWVxZ^~#9X$A!-w z*I8*ATqwWV^W>-hmA2PjzgijjWv|(5@mI&QBc6XPuG<@P>b{`hYpuIgewAj6oV<&F zU47j7C12^_CyBD^jD#2emtA_dqwh())mQHr*Eds|@^`5p%f3JDkjs&{CyNC9^saMS z?o--u&ilputM^*23w{(el(F#VRJ$I%Yu`)J*x0Lg1@9mI99ncS=I{0QPa-SpcGjMZ zFWYHfzgvB6b{a>~uX_Kx-zVQqc^8-W?oZubmeiW*fAyZLd+ja1Z+rIC-JgGNIBdUb zUh`j|{cG%>NvDOiO|O1B^8Nm@f~NoSze?tY*EQMJhhL7hy|1$_|NZp&`+mPyZ{K}q zs`q`HeS6DQ_BmZWd)MmUuiq?eg-j z^)znnyRCKW!q>X|+HvdE--5r{J8iR9uT6UYY}fVO5!Tbsr5@ZD|2|T0{TJ0$|HJo( zPY&(g`g-e+JF`~XUemrJ(9>)Be)40N|54Y!R(;W3vNiwu?#Sd_nYKymQdfOlw|i^d z)0NeC z_|*S>O@#lo%r03TE z`TFyOztZxKuNwPRu1$M6!S`UT*F5XGo#AU;Qr^t^TfXi0zdr}HWbVJdx_)xi*^S0A z6U963`Ruyibz#v_tuMN&L2~&%Mz1<&t<&}~4SJPkGo~Bis6jB$l-c+3=KkNU+jQLgn{Oi5HJ=#~kEAIKa z$IIud{Z&lPm5pC?de;)CdrHE~pL~k*y=A-p>h4vZa<5gcuRN~5e75`j(pA>;_9_WI z4dnW}-qSNOHud+_D_>2Q>Xq-TtpB(E_PbZp|3~cEcYVh^z4gNsb$Q7LsUwCR_5bsOPi3uK=Baz!=nSu& z{hXVgElDD3YD%SVYx2#XzpSvjqZ=zLu{(Kl-}zd>k|0&@KfG$%J}7# z>+7of^QO(8t8(3`@6DPXt_k(uPNg)@$v3q1U<`cqc(T*t1a?M=ng611x`k|7qP2Wp z``+3o4E%=wJs9}E#Fa4ccmAKiATRx2fx-0e_dm4?3=9ml`yC(unZUrn@LT`w|KmLj z3=Fd2Z||Bvz58B-0R;XFKQQ?g0b()ydwc1B2?K-NopLajfq}V!fq}O zw=5$CBys%IyXD7Ey<5S+7(ao5eQ5&&BLfJ5q#&3PLcvKiz5`qWD}$MVfk7fOKcyHX zE6~u;z|P?R|382Ie+C9Nu9Bj}Yz78~n38O;TNvSPfjNPFW&lVZ3P!UZ)v*i=AonmC zFfcHJ7!0h-a#J!B85kH8a#O%|Fu?5q`*&tQ=n=5#iWw<6MPL>K14CJ6N-B&ErCDQh zGxO3I7#Kv#a=`{6J3}P}A}5nllnPP<(gTv`DJ{wY#Tp|61FLLtNm3321H)_xJz`(%FAi%)D%zWZw(mO^5){Jv(r$LSs zO3u$I%1h2kW?%$E1_p`Zk`i#dL7ldYfq{Dm0|V2V;*#PjBwLp;FodZyFmNj{FtF}n zU|@a3z`%Ntfq})Gfq{jcfq{7&0|T2Y0|Rq00|ToN0|T=f0|V1b1_tJ81_ov?1_tJA z1_q{W3=GUZ3=B+F3=B-i85o%K7#Ntv7#Ns9YU>#o7~e23Fur17VEoF!!1$Vhf$AKz`(eZfq`)X0|Vn^1_s6+1_s7k3=E8i7#LVD zFfg$1U|?XYVqoB0$-uy+!@$5*!@$6^nSp_`hk=1Jh=GBJg@J+hAOiz`2?K+G2Lpp3 z8v}ztA_D_YG6MsTAp--q69WSeF9QR=5d#BPH3I`LKLZ2T0|o{zJ_ZKvYz7975(Wky zGX@6EXABH%y$lR2KNuL;yBHYQ4>B-ter8}`KhD6wb((>J>kkJGmybKJ?yBQc*vKbhd z9T*sx4Hy`h?lCYhuVY|fJoo=jmJ9|4CJ?=qfr05J0|Qe%0|S#U0|Qen0|Rpp0|OHq0|OHy0|R?K)NasOG+Yb} zOfwl6m}MCl*pD(Wu&!ZXV5wkW;3#BZ;F!t4!0{47vlc_bg~OJCfjg3cfy<16fo&H9 z1M5!)26jD&e^`z&FtC8c(ij+63>g^Mx)>PP&NDEu{bOKYna;q#}xr z3=B-5G*!yLz@SlFQk)D;t4z>z%mSsg7#J8}VmpdUlJlYJK;kPH7>b`UFm!)mU|7Pz zz_2}qf#Jkb28Qc57#N-`Wng#{&%p2_o`I3Yk%3YEFau*?D+6Oo9RuT;WeiNr2N;-a zf*6?6N*S2CWEhyXEMs7Lp2one%*enTyM}>z-gE}$S1AlEYFrE~c9{$;KK~h5l5aAw z)HyS-%$UuAmf{L9@#VI8PAiuD<3xpJ7$q1OQ$H{; zx@5C9G&C>?cCa-xG%$ivHd+bJ$jZO~O8hLKT!B=AgTxuWGcYjRW?*1=$iTqxl!1Za zB?AM)3kC*;4-5z`$?~YW^Njg~`Ccu$h5@VJiaz1Bic) zfq`KQ0|Uc0s5pq;&cML1pMik^M4x3~U^vLYz_5#ffnhmR?M4O$hJ6eS3?Q>X;vjQC z_5Lvi28PoN3=AOlVFm_<6ATOtm!KHrhOZ0^48Ir{7``zufWz|-0|NsHgZu;X2gvUr z3<{@v3=9nSp<(llfq~&a0|O(d^#jt&0cHPXU;u?R$ShDSGm0>PB9DRLFQ`Swz`!U7 zm1kvOVB}(8U<4`p%)kJSo9_@lNH4<=1_m$&#mQR+28PEB3=D4=7#JQwp};r$aDk03XK-}%d7mI@101IyH-Y>G3M)`L0@(#}D=065nsK0b z2Bkkx`T&W6+zhez=00Iy*r>?B zaA-LL!=*k3hWmXC44*?782-IrV3cBIVDvi9z*wijz<6{a1LL1g1}4)O2BzjL2BvjM z3``H#F)+*KGB8K5Ffh;bWng}?k%2{VCIgFQIs;4WF$R_j2?mx)Sqv=8(-~NH)G@GJ z$YEf4&cndUevE-NIEI0>WDf&t-%JM9b&?FM2VOI>AHq@5x<_zSy#6s9m2P|rM`fFy!O)AcO$b!J`6X6V zvE>y6Rc?I^MlUP3zO35z3O}mZ@dk$8)$Bl^+MVxlp}Jik!0285?)Qyg)U@XV8p0<2 zsc8=gHShh56}9gF3PEiLzQIxZ!SBeZS0NAOvA|ocZ5z2CA~{)c@9#2sT8j z^X&hwbN{=~|L+8&_6z^pFZ}N!5%pa7-*e%A|9LPv`@iqZ|K5`b)N|s0&x!wi$N%>p zLq~nbLE><-|HS|P6aV`l2qJate;+o~bNoLVLQ~d%{67?d&4idd@g!7m;>rII)PL$f z2%Y@j4?_@fI0-QXWH`iCC;s;y2O+2jNCs?|ctObvxeSMB6{qZU0cW z^&K45Zg~qw)mvXzZ+%_0CK!K_cE8> z&RB9Ieerd8Wt6k*4!knTU49o*8Rf0Gho?S*)JK|(4ftd$hFN|L;bsjCxOkDkJ*WM}5bjRS$AG2B~ErRSc93 zE=%E+Q9rDng2=+_uc1;OO}PTCk0xLFKj9*%c|gVb2un=_shhxIh*=-?!K!AgwGU<` zg|$AKa0*|21gVUAj{bu%pycttJ;(k)s-&J{f4YwRLPn4(36b4Un}v{E2`6#YL=bU$ z)<-o+l@X*?8aee5sA9s=Kf+oc6|8vxsfi#Y*7^wEQ7T^l7^$O#=o}$;l(1GzkjkiH z%PZuX2*M_*KEhKaVb({G-VlU@bd-?U5D`c#sA(Q^1l>ye+Whkv&n`2#_n$3f+M_v!!Lr$L<~c>f6AIf5`C{iCMSupSe* z4+ZH-wVnj^kI=eEooD|;x=0=8KwTvA`bXX8K{Z<6x&J+9K%F>HR}NeyL3&0#C;oTC zJ4Z+V_aY%sIgi}^f%J)xNn}|hHd^PX=Q#Fm6v%ue)B0h`AXQTTaZrB&)DeJnj?n5O zxH6DAu$~UF{UeCI5M9V@hzL>_2}}P7saotm`5#m!p8}I-LH(mi7qRz`CSCnM@yh@H zs~|Mt%KzSr|9j8>?>PtRTana1g1841EZ~X;&VclfAS9%2g0jKNkn1CO=LOOjAgN;N zKLP5d!TU!OPC>g}a5LcyXzg?Cf6vi>Cp#T__;bdGYD-_BWfD-Vpoougani?63Gx`y08 z!qr8}S#byJcpaoZ%3pmSt&0TV(zHG*-~19*CB1-DLvRvaAK~aCq4bX+V|S2V5@y{5 zu9Du>?Rp2PcaTX)?Sm`=VZ+lQt||#@l15JaZ3n)=t0X-2QP+w89Vh;Go%-K#5`-W% z3#1x?lf=|V5RGl8Ktp*QXQ1^FCF6CF`l#m&sIG+ckKi>Cgh5h$1TSYmCC@QL{ROHN zU{w+o>m$$*)^X@~UGH(|xEZ`Af-pcL zPz)mVlJInt&}uG-8$gv2co-034m@GN8SrYT>+l~0!qy=|tA;=W5lB5Fw9JS#!?y1K zid+q0)Lc`;2vRp;tB*?8KQ3AKsA$cDqBW0-);xj?))lUPh;6X$0c5ZaUK!=BxPx3DLB{Lg z^-;=#D`@o*gbN?I19gs8+=JIgkXk8k)jfDkgvNl>L=Y0KJ}O%G2)SDXXT$qPr5m3k zBci)VFDkZx5WGHuFefLbe|aUqpav! z^%0znmUNr(F>VH>N@wIy#5iSCIU@Ao&Dc^28>Sr?*dO4L24pM=cxPm z|L$Xi5u}y@DL($c`veHVOP*d>O$4eMj)7)FAd2B6L>vnV>zaYeG$cbH6%Dj}22Tk= zIzAIm{GWK@fB!Kg1nE^l>JeCd1W^nkkAbJakW9wn28b-IR_Z+tn$G}L?Jf8Wjjeb@hYU->`b5~z#R zcao_3s1IBXK`R2J>Jm~J^_>J&M%`et=M<11q1ge|B)f~K9flL>I!W`mjq*@6g z2vLbfBKM>~QYZiSo%jba3OOb8BK3(7)eyW&0@p{73J94*>k~n899lMnaM7|VW?qJf zz%x690k3`FlSB{^cx8mOK7v#_Xe6i(+V%#Stls*%Zrhu>ZEtF~zD7fk+6P|mRBd^M zRue&_(E3NP%IJCJ#^;dPB4iRabM&-i-4n=oSMl11g{$vF5RPi-0i^CISaAmpLHb0H ziY9yM?W`rYvXow~G6T6XLSy5pn;?}DTJ2N0^(C4JxF&+Ec>q;LRomX6)kJVEa%BYXAJy-C+py~$ z8iLnB$P7qoL?a<77MVm#fK*A4Y6zKxbd(@nr1rC*DhV=P*Lfb_k_Jdms^i@McJNpqWW=ux zRzG%uU5KcLj)Mm4x{m$tI*J8hucW|Q(CQ;lAqFYc!6dvIf-`W{0?6f9AFQ_pSBzO7 zq0~f>T4`|9N1$%Vng0{dfqFd?FGK62?yK1Aqy8KJd$0YUe3hE@5vbBR4XueFY{;k` zgoM<3pi1Nf`l=O36o6t7TsJ|~U?nl@BhWMuY#9u^Hw0(EE2GXszu^dLy#uLz;3Ook z!AVFqgp-hLi)*+8t!D(ObdX8p3J8sj+$Tb-gCJbwItb2&)HrC>5S$CCZXi7%NM!_3 z4-o-ZLoZ?E^9t}tUHSUQ$n{a>#;3@Y5zlwFi(&3bgzHTp#5xzYRf< z$|!Hyt?Z?@VU^LX%*EGJ=U+~lcPVMk1$YI7#z5{OK`J9yee@s?wBqhT{_2OQ2(LW-m2~r<*T>Rg05lh7csgFQisx#1;EXaDl zu9N@a2&F#4Q5kg|#nEGe)JI4psB;9V6u^~H-?9Hamd3%4N>CB-G*)0U@n@=yE%7`8?q`s7E&O_pqszE^g8jSEk8^MG3 zgL+RORscc<=pZCy?h=`VtoDT0N1$0eu#cc23<)135>h2WV-u_rA_5_?=mM{l!>oNE z)excrI{2#t86En8wcddU!ZR9#0m*%E5|TF|H4dDO%s|fL@Qe>(AXhz*dIur`VWW|d z+6O|y>m6i9_14!AE~Ij)hINZ-w!Es@@~U?0YY3wTwyX|bE5X&ndr7d$2sD3GvFTaG zrf1;p&(qTNPs=wxFW(62B2^-h-meX5o8b# zJfj5dA3^FPG!ouRLSnqB-TAg21HmgJGzKI!qLIky7HdNbo&_KbNbYDm@C{NE!AZz; z5~MzAJ@ON>_PgcCzxLz*+fRZJq&{jn1*($Zl@WpgUJ8!fKO$Hk!K);!^%0~p0;xF* zo_+$)WI<}Wu9N>ePk>Oz@&8@NK;0t9$Q#z45mrG+cL~&)I)*Tz3p6+bZgPWb5Xf?f z?xX*^k3wrDC;1ywW<^^o!qG9m|ILr8Ev0_k22czrbC`2PvVp!E@`Rss*%A=iw( zCqT=#k?W)GtN**N{_nW*zw;^xL29M$>;JoN{OVoXx|XVMy`+G zRT88of{>Us5n6o&awNEO1g}A`R575~L|G1pzB&#ZJ5UqAD-(K-{)1FN5E5PubsqW+ zuZG$W{6rw!6GQM^24O(59-Kt3cd%AT@Jx-I*&&q;YNCowFCYv^7YWiif{0XZep#{U1-MoM^^ef%BajrVGJ;e?rE4EU zYNC=gk4ix`(Iap-1XLM8dnn-g2v%Q!)^g`9yOqEEHl!;AuZ9Yi-O5{X1BAe97(kWL zvKxgO2n`(}RrEb)NxEMZr5nkje;B6LlT?-+2Vf=p3jHI`Y2@jxa{>p!E?%99$)J z9sdtOpy4}6j|nmlg;XD5_LaJif^3192B{OFBt$Qa1QpJx{VYhu1R}wuB8YMP|HNbe zCmj6`sxpp)NJupS;vNH4A`_5Ec;~412(+$5b{kw4!stDM)UkrEPKPb10BuA%37ty< ztpfzFxad9ef6~SOJ(vIYUH#vMiaM|S@4NyM?6~&7;~J=Q)N}oR-xW}N^+D#mVD%1K zHH6HCjG;k9AaxKj*>wuEUZwLiDAhsw=G|xicY`;?^_~TFsUY=H-*JouZ?N@mkm?Ci z*TC6u21FcE`#{)`3J9_~4o(uB5`tDt;93b%@1T+J3JAi0WJb)&2wwYOGKj2?(5fd$ zH3T8yRSy~iUi%<3Ak_+T6$2N62!iVx=%5^23R$pX(+dbHN1d`S-S85Eiq}0aUjMXc z-IJ2F&x+SPEn52wgw{O)_lzDDz}7H82IU|t79f*E`QT~@(ldh8M-Z}L*{%GgH;a~m zQ1S9xB`a}K)O8(DMC7tKDA)V(91WUf_QJ6bga=hCx2!nM=^UQfX*CI|z$ zdV*9*xT+zzI9h!KseLfX0j!T2_I!fmg~q*~A+=KT{;!Z)sRh%lK zc_oN~cJS(j4x|A(mx{I)PD?AF(KDP5H_Tu={x$LjQVKe$^Vm1fzagBpe?bJ zPW_((Mv$FQkX7G(Xa7&W1R90w1y?=DRT88=>bwG~j5@FX?*XH}E1)&}`0FF&Q9E4q z5oETg8@wGGQU~>%1Fa;7kdS?6@cAR;`l##hA4tuDNn%PtYaj5kgw8|15nS;41W0{E zxH3BU9bO;7t073egGoXLE+BajxjuqaK=2K%5H2zaua6)-AP9+E`#>rkxCn%cOd?l9 z$W;%V4e92<$K2qx3?>6o@4%~@icK#-LvY|e5kwFo0wK#cg6f^(^-qh}KLrg*ZFmW) ziPk-b)IJ4k9u=*AQndO>;hLu)1nwLatRYYz<%6e+kh@5E;K90*RrktQ->X=2zhuSj z%()jb=A4i2KjdAz!8&(=MPi?lcZH~B5|>^$yaIwVvX|b5uyetkBUl&dUJhts1C+$p zMS_mk;p!g|>?^@*A4~?M8iJFQ*GE`;NtIh)L#B`*B&==%9qd5-cwNJ;5AgH}XCS9u zI2)4YA?Y8D#H^1X6%*RphStM>(dr{eAE)UwXtoH_KSHjRh^dd7!6!Vlf$JkkFRAVP z|F-l0TQ5LS8+alKbZ)~r&|wacnWOGgpxGiwHPn3q+KcHr{=XAa1;DEnI0LyR!o`Lw zh8S`TVJxVZge-dmG+qZ8wS(3>NV1UrQOD8$9dL7S)JG5#kb5ybN5Nx*kp2;v>^}yY z6@rWt!b!w@5M+i3Y1*jg7|3mCX7(T{099Cr|MwjM84It%AS>&pp8XF&Q_lRK21bxt zX~L=hlg@xnW`Ng6klqk-mDF<;w1%PU#{bUip!Icq*Fn=vUFV?D1nOXdmnAwrz7z z*#f7u31*@7%1$|oR`Fu`A%aS-Y~of-JUR?4Dv)&!a1v4zK}h8K2(wm#I1)}msv%?& zxdK9CLq_c&B(70AbyF^b#_M`vqjTNY zKx1@}TB-LMs6Ohv{=e(S{|Pt$_ul~3N1($uPJt%%dQSd_Ab5S$1>QRVseK@9lIkN! zWz+#aBnmQK2kL4a``>jKG2Me!(ID4BXl#@U2x+k#NF22eqGEm2xc4)p0)mtEyFWru z1Na0mv}y>xkQ<)OaaBghH4daELKdmn_6E5h1n&yLYae*k17SdVK$s-F#(^`cQ0pVm z=-hh98o4K+vAA_lO4ozN;tIi)5vWfD9+xXz^E7|eV+hJy`6y@kgPaxjz>_kdeWsB5 zsASCpWCW_A);=s*_o#Hkqw-BpO4r=WS$s8p=9%QFC!)J{dsnS?&Yx=>->c+ZA!MDw zZyd|49>^f(z`$?Gz^TW;q|U&k%D|w&z#z%MAPKLDAPh)N1R+VPk05I;kn1A|8?7dS za3M7jgoM{e5C(EJ1ZP94Av6+RD?x_r;3T9Pf^Z>hI0+uLqe*=Psg=OX@W`o;;8_4t zA7LG`gOA!ls-)I~-_3Ho0*Z)rfA6VCa9aQ!7fd}P!u7egZ zbYA=4c@4zqxbeROTpz*vM`$yBkR}9(JPBH*38{}DBxphnatsK#(**A#K{g!qodX>S zh1oyqIR{$j*?khU^0WKc{|==32(8jV=7K63w0Z}L3ty-JV!)1eM6QV-Y{-&1(5WU! zCqsfdN8tVuq$`A#g!GT_)JOH;1L)A|BX~6gXAn~#q18mll@Xo_2vQS4`a$rT2vYAL ztxo{;j39!g>z_jUN0s1?5`1*76kHz_uXzfY6#{jRHasm@|0I9iQE?s;v zcjnQwN&BK2R=MR&vyN`k^DEVGOp`K>5Yq7Al(%P=FkukXW#Cd`V3A>9mSkWMWnkcE zVBlk5;AUXpVqoB4U|@w*LvRvO6G2F-)JKqB9;6zAlaQJSPC{xTG!kAJp)rtaC9G8v zr1lx2^%10E!c{B5Ya$2(QXhea>|nD%>SlSAk=)~ ze=~SV1EfBJuWEqIA3^FP2nnf`poW0ENG<1}^-=qI&`A!ErQ@w9|F<9e-v+KeKz)!S z|2q!-Z#xX_l7I%vAa%`QXw3rY)PSldq}~w0`l#bDtd4^WyMgNz=qhn=C4yY?L)p0N zBlw&VgaIjgK@||{EE1#!K_>f;{s$rOs2#laflnks81TxdA3Ved(GKZnK@Px~a`yl9 z%OEuK8mNAmedGW1E1;UF_vHWn(@1L(kNxjE1*)y4UH(7o+W+~t|1Z4vf5F}Vvv2>O zb?g81+n_3G^6meVZ~vco3)Be$ZG8nV@a_WFN+9wUs6Ohy0UEOFzYIFZspl+cSw4K_ zN#_aB2p*_=OG`Uup?!ciZgb(HW_MxY7^dGYqaUy!N? zd2|k~8iJp@1sS7j+yA3=-*-IK5Tr6{#@#uBbdfN7L-1+{!oXD-)$MqPRtG_PLufS- zL=ax@z!}K34>B84??5Ue$jBRQoQzY3Aj&Gvi2EhVrcEN zg4Iv*);G9D%TrtysGfeg}43__L+JcbM$x(uwU z3`|N4jI!WPkT3&-Ft{dyR7S}45qs{^+h{0n*_}M_q2G`t4R8{?(C%LL^4qZGcF;qF zAUz{Y5>ms`zdnMD)WOeUg&iw|b}Bfumjpfm56Xs&-9d)zAahAzHp+M%eBiDQyeSFZ zErK&3i4%=PYgs|KkWo8G1%yUo)<=*LJoGNofuHS%ezzV5A>>|C^KsD11n6oy@Y?U@ zv;UjV{%<%1LQSVYy`-k&;9k=4|IOeoRXezbY6UNNfK)~h5>h3BYPU20Th9D%Jqzv` zo&Dc=2GpDeZG8gof$BU8?%+VGq(jj9sO>PUR)W+=U=rHX0rz@9RTV_zk^dcskq}rV zq=rIP-+2_=&w`L(6S^QhBSQ62H>~1=)IMk=s4_yWhCquMkg5_$g@Q~%`c@!ikfr0` z-AvF00LP$bto0rTEvo|^j&$;W-)T@kXv#%Uy)*sl|C!hS&%FG9>UmHN+qmz4*~b4_ zOaCX%`X4j(f8>P!5&i!oC;m^K{XcKz|H>`@8}|KgJ_uUOHtEv;c{l$ry8C~@!~csO z|DXHl|IB;;r-ISsyZnkiEZ9(npcQxB z;Bz^^{UgvqP{?x7j>CT-Te*=*%*qH$9fVvZA?+uE*F+Eoqyi#XAHgdi2m_K0;Unje zoCv9d;A9=rDcs0e8@Ucbn=nGIgCKPc8VRX!;DQh?G6}7HHodOg^tx>0tI`cGOE$eK z-3(=yZ+Ka;@m2YTm*pE?macnVvi4c&y65E^UKX!^Tm(LUyA*sVciB47>D*=O9u=>B zShVIr@#^~}YaW!Xc~rjkamBhv__@ney5 zU=THB5HMii)L>vyW?+7no#YV1OW21_l-+#LmEguRcOPr2*1c z%2`3b`UuyeJLLWm(%Imk72uHW5~ODYua6)Mv|0(73#o?SB)ATGgIphhrjWM1s@e7m zxiZ4Vh9^I~&wuiN_LKj!9{- z(7I{jz5f&M{qMOALcO>D_ud9=iR}Wf_wKp?I-;TH-2c9_p!%pAsaAsQUxSR)LFyf} z`Un))Cqd^W!dG{Ls+@?n*&~WP`dVE3Fxf32SwmXpR#q2 zsy9Bb-Tbm{%ZuubPs_HxDBkq6VD;Uc48pbyBDM@-_6%aS3=&{c*or~O9Ml71QDb0IVPKGBV31~DkO3o61_luZ z20;b}0dO@0sf-}wavb2%ISvK}P6kGpIF}@!lqOo|2*QQ*kI?EPNJlAq`EBGb5~Qz$ z**SuUfI3HO?-#5EO@!eZtSemm5N)Im&P5xkgK&whk2bw7M?w{w-#`)i^+|}%QPnn3 zHH1_Xq3oQ6Ew_X2OM;|22nm-$J=g(svl1jRqml5W4qC&r`%41`YTWY`iuQkRL`6*p zzd=y*A<$7mpjrvsKZ2b84XKY>j{R>t0UEV~^lsY0og?J>sOc1F$gb%)h=h#YffnYS z_}>a!@(tpG`$`}aPXBL%q4qN%Hl);ooVoz%mvtZc-*FJsE$RSwj@l3YLt}vEZVvwk z&Ep*U-*xDJ`w;|!^n+0PN07eKk^k+7p~~7IjHCbC!3bpOQIHf=?-6jn30%`bN9rI` zHsJBPo}-|7CCI7dVZ$0upaO^uJ=u|AICD6KDJn>iq9g{?97+ zi*53M!|)%9UVo)szDwBu7qc5!9e<_>)a_;}NL;ky0{EzMXpR@db$@>2_ z+x|E00S$_Fodg|5*LVH@59$|65Ois!mYH>iB=iarA9R5C@zRtbMu;{(_^9gTLAj{Om+R z(DmEMJ6^%;=6&BGsAd0;mi@n)_x*%$oA-PL)kol3sd>*=1OlJMjoc1I&Xve)L%l7+B;PIMf)VEjiQ!MT}!*oy+w?Iv^DgCJC8B!mN@Y^%0R(5@^V7 z%{{dG2vRY@Cy(F^cr^rPKq??K60JVM*+oLCjLJ8?hE_(K-@toD@D33s1JXr8BjJ?} zgaJ=)pz&eY1%Hs_icBIWW;h!;wPRLB4ZFWUE2BMM>-T~Xa+L(BjG7L9gV#qbhkmsj z`UUPH{cb+|xB2kj#>0PGj{R?hb&)XZqsEianyBgc|HfmW&J?I~1g?)DyJg|^5vYrF z@_!2qLFS&iPJm{sA#3M4j)Ce3P-O(JkJ=CZZ$J1C4MFOrj)RE$2r`rhsg>FefyeqF zbr2E>s*E7j&=JtMA4oB{GJ;e|ATIK--cjh}6t(IjZS{~4D+-IiHb|IfY#VoW*<8hfnR{6A~yr|4;K{5tR3RoyhqJ)@I+Q$FmBi2FGK z=TqXg_rz`Q@tMEkGk?Wn{Ef%>8=ujCe&hdw#{VU({!2Lim-qN@koe!F^nduI|7iv!DK-`Q-nM z$NwkY|3Bd_s6Ohw`M)20WYC05|0i7fKM{5v5~MN$RWY!`Q6ZHMa%I$c9CS<~Xg&x$ zpw|iRB|$VIkJ>@jTXvp<)<>XD7I-WdQYFDxFA!fJb%HAzQtKmdHT1K2-%m&;f>%bZ z`@SP8qdi|C-6D8>gq$VO*pO@s&(t+L-hr;egItQY{R5;9!c`wZCW#;4Le`g z?R;Im?M2zlu+>JbH#BH*e=2uuosdq6^9QUF{N@xy8*9`Hg1UIqpT5@2AJ zWaUxd5z~`Vvy-t1)^<-a3oi6ZZw)S*liai>p>0=u>rU|Ts<+@I+QNpcWp|Jfc-1$k ze?(G!l)nag9yp{Xf|Kx?2$=z|jF1_~RT5@>1QCG@)*+G5+ms;Vb#M}1AHipi;0)x7 z39T~1-K!XM3Bk|PC{#?17DG= zq~=54TMmORNP<^Mt%pIi5@?g`anP9w%_l);2Q{4m%^cO80xfK4I`h9CywI-kBxnk$ z@ffH|f=nmD$;MOv8&Caj0(Y_?f-T2EBYVxqK=Vx<;JyiD$O}4ibmV{Aq5rK=^sn_0 z0<|3c+j0<8p|l+WscAb1Dy!R${BJt~s>nc<3uG}iQcVQuD?#g{!=PRgRPiy;!ijc> zdaxVdYC6E3BhVTN$T|nugb}D;1{Q&i5W?0}bb=?IdX9m*MSbwSKSw}o6($}79l`*q zgL;obFY<$ILM)LpRwqF{>K01JO7s+{a<%Lh9sAdE1UicZuZ-{y%0Xn&xvD>m!u=Ai!sF!zYH|j4JT* z07!iVxqApkLiQhRhQcu76&z?ipm4Q2yGdd22xX zRg2ewj-G>IkDU%MwHm~)_n_(Hzr0kRQv2W`VO0|0`UrpBgjok6i$Lll zh)BiuHx=7KgL{w^i(Dn)s)-=t^}9b}Rzt`l@X83n098o`zBac$_QK^ftJRB*Un8k{(sVOP*(`l8-mqG6Hb86Ntg-l3Qax>x^1EL@c)`E|MQps zOq%*Ju;qbE$vyM*i`o(AR05Anxt|tyIU#I)MbPpRugN1`lSdrJpLxu`^O*eSHT^GO z1nL9{8~+zE`7dhrU)`@fjMe_>;ghy=Jw(hmP`oB7|n^?%fq z|0&BrRZ`(WP<>Q;>3{tdP<_;K^Z$f9|EE0oKl{=Dc~Ac@cmb-67QF%u<;{KZf6jA| z_>BAir{4KL>E{0l*FmWN3aCEnz5qIX7j(=N>@p)rEdyH80PfmA4(#nX_OIjEzqTWP zAl&xjptTLq+6r7Zb({jN@rK;<*l`wWX4?tSNx$u=^%10(1h17Kht6SEF_2+8G;w%+ z1gU_~NVNK>dEa+Pl|*EHRJZE`q*j7gMm4)XKoDAegw{W*+xDgbe788HN`e#%5E5P! z!5Gl-I&g;wdR=(sX3)k}NIil^Lh7U9wXY!cQR%vu#cQ6HtbJCv{(1gd(7qw~3Wti# zZ>qMwtKRxLXW5;MMK>ZQo$zno=2*JIEMum2Y@du*1-of7t9B@ZtUGAS534={vns6b zBL$x5kp}mAM8WllAh?eM8C!$T^{_E8ure^PgDW5|aFxUjo)Y2!uT0dopj%se*ZD z3Kv|?nSCyG+UfKeXCc)PoFrHuWi7jvwd@vr)DFUc)I^vhXclSpT~L*@8hUyIybi)- zAor4RO)9}VO33vQ=8<>cDhXO2LC#`@_m7Y+@`J9pL#~yu_LY$PO~~x3ZJ=rhQW-%= zNJpt^C;Bz$klF{6Bv>Co>Ly5K1R)#2^%1;Ef(+b&dPzw2QS;%yZQxNj(5eRTRwKv> zLXcfZkh-bi6sVU}fAW9b2@tA>^_Ut?{0AXeebfl<7D1|^rlbE`A?V2eRxoNg44P+e z0e5zq5B_gC23?!YR;dgqaur z&$Kk_|LBo>iY;7|K~OM&j&{QhX45u{|lJ@7c>J^I$~!370myO>;30e z`NS;qibwgEnC^cuqyM6Y|3ytf9VKap|C)aP&9nZySN-?u`5!*>fBcsJxrhE2T>f8t z?SKCD{{=VxSKs_!b>n~YjsNX;Kx1^%pZuTm;{Uu?pzhM*SN|8i_`l%U{{@f#&%XD6 z=I#H}Zv3Bk?SDV)Mx^eupxOj<Y< zNJwP_?;pXrAQ7bg5u^?RWy4(`AlVaA2f@keU7%Uu+MVwqO-*P9-|?Yh+q=B)eSQP10xtQGcdr} za1kc38pw(`4se~t52}C|gh7V$XfR4xi|Gc)*`*o;lv^cq`4-O&t6!7YwLQB3K+?41 zsdLVy&%2N^?^4RVO9^v8D1FxXl<8+6D0$}DPZsKXs7TUUP{|i_CNt*FBsN<<)$!+7bvznn76?{&JxSZy@nSTs?cNo~OGKjqu)c7x~52}v@!Bvu|)qh#1|5~yCt#bd{H~#nN`tQHwf6T`J zNk{+ZU;3YW^?%;A{}o`=bRASB_1px_8BKi%8o`_Y3^a(h^!5KGufPi)9{-ph%-GFS&5nXlOPu6*m;if!*ewG#MpPf!K1?G=2*0;Dnm_k%!VaggP6 zRhynwZ+>33=|$ofc<^V_pk?Ywf?ss0wHK03TccEqQ2`eXzd(iZ3Bdah{J0iP`3y? ze*`)G2DB6byytY{S;LCm{6FLD|A~kHw`}`gw)B6>TK|M}{`2Yn7dHAYW%^&j_`itJe<6eag8CpNVEA9a z5F{mP@?XOAzmz$M6gB`=E+PgXDM6$Ef`*_TlaMiZu+H*7kKTU{#d{3gCm1;T8Q6Lm z#g2+;-x4?YFJ%leP{ar{peJMdUp?@@dE$TT(*KUt|E;_JJ5T!Wy7GV2zWD_2(*y_TYZGJ8bThFgRs%6A!IIe z1rhkDF3kQBa&CpQAvqe}{zNVikk2B6jI=J!hs%#zd2dMg@md ze*F+OB^L$}GX`!gaJ?e}9(IE)E#L!JI`9EE$iNz8*o_6&(_sZOIKTsO5H>^#v=0cj zEeKNkaDyj|Aj=Yj7#O4&_zZ+}{B#4W{EB8L^=!+Vaj0C(mL^A?`ZU2qPxZFc^} z)VUW^=3Gdebvbd?xq1#2FlRY?#od|nBuRwA=L!a8sV7oYCj5EhXAQfkn5x71JL@Y=>SqS1gSodM%6%-1$Y>(9kOH&Z+!%@1yVz` z9|o_%gBS=N!2=m}1hinU8(cX;4TO#ObsPfqZXj2IL-wxrodhkto_Y~f4NblHfBNPB z(=Pp=couZnY4z6sS@Sm$_+VbCY=6|ns|NT#bs-%Rg|KqRx&$;|R_X?;!s<`~W^5XxB z3;!F>{ck!CUchkW|BM^|=imRoeDHtX-T$+0f~uq$SN?<0CD8cfq;sGHBKl8( zE{uTOiw6$9e~^7cklF-7Lh2(3yW{Y`j>G?u&k1P*-+|I{G-G2BZQ)CedmiI2TeA)$jTYVvtxLAwW#E z=fz8ICQLl$U9(Xyeu{{F0fR~?gSZQWpdACh1p|j61EU%PgA#bZj~uuMBn}>ngU|kO zfma|v>KaJ>0El!>xgfNgkXO5K9O zsrzado~l`Pp?u-#vLzP^=AFu(cP4w@ne=%V(&nB|nR5YNA0^JXm^kBN!i+NsGtR_M zKOGP54Z-Um$Vgqnv{P|YPGl~=2}huA(c+s~OQ4s@W`ny(kd6|Zgv=kIk?_h0je#_y z1UeWTUMs;F@X83n0C$ugKq@9k1%yO`?j3u+oeMn6KCtJWXM-`tg|1V+qU&`pef(59Gk+c3UZ3F7Ih?#(TMxy$l zMF{*l|G72(^J)DT)cr4L@L$pbv}{}4^1p=ke-7#Y3_@QSMSpN=f_g@L#{YSZ{&SoD z7qR;pzFsE(V5K3=F0WtX2$sWkSl`LQ1zJH6L=QeiPCMEdm!d|1ahAU)}$| zcG7>N%>Rak|4mE(>o@(kpY-2s+kelK|J|;DP{^hKVORdAUHPAS`G3~M|Cty5mz@1y za1z`UISM)*eFFFt)*079=MF7@@_*T55M$mAP$v;GN;&oX|0(A{X!6p+MM>#Sf764{$b!0Uqvy)I<=n=kT9y@FjYkhkmz%QQN^^ za0FS>0O=*c7tx{393ijehOm)qCFBL(jeEX=>LB>Cy4_zOxeT8B;9VgI1KyH^R6`K5 zax0dhlFBV_D>lD@Zgt!Y8h(LH4ZvoC-juF`T`IH@bie}SR3Fgc2^*hQY<}9X`)$kK z_pSRr)NOxWx#>~C;)@a8JH1Ng8Ai7%xa4x^g!0O}GVmCIjynJizDa=RfJDF(H<0Dl zkUk4!#8?19!OUqx{tmA(auf{t@>22wpM48IZcEaKqEW4NoDJ5rl+PNyya@G8_11e(BVKMdLr+kG4~*3@_E|AaH3F(}9Zag)x0 zdO)??|K~6NA3ODfSHlz2?DL8tM}%Cra$D|TG1|zWzK==kAfwu4PQBZFdLIQ0K8qUu z6F2@ZZuno?6f`0yWBy;p^uN5te?`mxDmMQWZ2!xHDY_Rkjg&} zm0w(He|WV2i<^P!BT@7J!fO8+g#I&dzF`pl#;pxnQzvNlpWo!afYpCV=l{x%|G6~& zGxBd{V3^LpV9dZ^!oZNkz*)$^x|f0H2m}8Ec9pMOdjEyZ|4TUhSMm9;8uMQ#`M*}y zf8E^w%4Pozn*XaU`|o)0zw^caE|>lXT>znkbN`dUl~K;+|K;E+so~iF>VyBA_Wx}? z^uPJg*S4cS`cD6ydg=fCJO7tF{J-qM|3!EHFTDAG{&i5rGy~jGg4ajg$NzR6`vbYz z0bVP09r*>VeZWTmLC&LtEO-D_J>d0ta0YTustf7C8tWK7Wb`A*OR26%l`v*R74GJwdFo+2iE#?4amg6U8Mx}WCOSoz`{nkeG%w2N-CQ>NV9nA?m5a_7 zF1}W{_*&M2D_IM!q|LjO241KDUaoKjQXj?7231DUv(CrQ1R+R$6hHlJ+_WZ9UK&mmP(>6VwcP|4;OkO~MF2~Ub7)kpBu4q>3w zN050{$QS}r`>k%zH#n-_`yB~^cF5wdk6^u|KP`v;G#~ukeDHU}A@Fn(5tsa4j8&5UaI8+QI5Oz&$L`8gKBZ z9pvcIiDyB_kWM)VIty;<+5c0{fUd^tJ@UVC>;Jq(|HE4TJLZ4W341T)a#z&y6|dnV zHno?`%Fmb;-?OQFVpsXgqxqjl<3F$Fe;(ccf=2%Z^g$CtLWci^3_-(XlHhGYpha%x z|K)7|tJ;FGESo5_D6z5l{m|9MsZb1A=MlYh!CdzW4IE|2mLDZ~GgW*{V` z^`AxLCj-wzHt{zCx}X^)F;h@&C1v(s&g#FC)qf6!*KC3{3=G+f?DC*xE?t##fyZ`6k04?X3diMW>Q~&!;{Odjbw+DPtIJ`1~oM;NU@dUCu z4!M5>seK?M%9@8mzuRDQLy!@L6qbc+nlC-huRqAS6Tt zGGYKBk^4uGx(T^Hssr!%11)I)XTZ9hpCEY?(t5_nKn_ojv z$tKX*K4lwUm23i`;*BpMTZBNngjPQ+Ui-Lc^Rv=zFB^8gtlab{sc)Bc`eX&?OeR%t zMqy*np`)OacBH_aAo%#30C*q{GK0ebp4Nd^E0C%OJ}U&NRv_bY5Gi=A1gW;*Bqw;z z2(oci8npJCS&MB$PboDrDLye#XVbnddn2DI z1tGH*-GCr?t%Oz|!7HYWCATs_>l~oZ*=ND@xS-f|Nb-oCtmakH{$>!l#lUxkf%hx}|9J+{SNv+f zMZi^(tnPmfiN6edk6FcD^J@JUHv!dpJlddokX7+No7_tVp6g7!6$}j7EL=(qY;p_? zI$-3_z!1W~(9bHjfm`Fhgehoytg7pO^^pJC5&v~!{_Dm4S5En_k@;V;=fCy3|3>FQ z^^x_(|5g|NyIug*N1mttyB_-=e)zxt?*B;}z9(<|QL*iN@y1V(J8`PEKCj#Px_QsL zmc8%04}F<_?*GE8p!#Up{r}7EgZfHyu7H*|OgQzw2i!S=R7Q}R2tq;z)*v+tq&|Yw zP>`AkLL%2k$m$_|AINM``+=Vw2Y>`))y&DZ}_xsRf+B8wM=D@c4y!+U|>*#orC}xkrRWRL@Erf zjQGG4I-u(qVD%9@QdbC4A3>@i$btk28!|lvmxV~NgJyy_#JPo)xTTCll&ytygA{EN zb$p8L;#z~t7bdrF%$a(me8Ktrwv@>Y+5o8cAcIs(J zWrR#-EV=<7yTh!TAl)UT;XBYWJO~$aF&^YR2XK`Ht&i|@kswn@pd*IBo0Y(I5OgjH z%qFKk!a8IJ=_Nr(cm;&UDBB8M3J$55;MGw1w%3q42tq>kBUJ2sR{>rK3gJS?>Rs=V z5u^@+kX5@sz!7AZU{gTaZu_czqpciQWGH zEeD_oGM5BdBi9PPgaT6UKxTy?b3Kr?bC8+{LUtVf54uR=IOyhu38(%~1mD>`^Wy&n z*Z$AF{D1oC|C5jZ@7VsoXyO0Z&j0Sk@AX4pOWI#$)wsr>c%DJ(HiP&L2C>JC@^9Fc ze{!k)5!C)KX$&26Q?U3iXZByl7=*+P|BLGX7t#MOYVcnGjD(H_)|D-2xg7+6;_ur6ib zSivB4mQVG8q|tvxtN+q^;Q1rL2MmG_*yUgFtAF5E{>m=>mRD1uKzUy{_BPQ*AM$|5dL31 z^1oKhfAO0C)(iftp8KzL`M=(^|GHN}Nc-}CqYM9aPyaVP@?U$`f3sEJ)n~tmn)4uI z)w|*~&r8<6C|UEMeEs9{O;4(~y{Oy%vSH`z_I)2F9RE4>-2b`P{x7@tfAQ`Av#x-; zNWI7ZLI&&F4}OQ#L=dtKT!}#HBk0Z`@K9Sb^1cHkHl#j+=z^$+?{9^#>qe`L@br%$ zyI&zS5rk|8pI-;5i6E;_A@i;93gPziZv}v~KB@tceH0tCkyuHc1*qf{r9*Q3bDy1KmRh9)*J(ae#aa1EgET z1+Iozz)P|rn}gsar0Rjp9I=7dFhJJSLC#EooMkEs9_5o|;L~MLauc?SQwSd8-~|ExVJqD}%NSbpgY0jnOxtAe~q&b(MbrWPY-K@(A(=WtNKN~;&EU1eFnm>XrW{97D zI%e9bm}#e?r=CPZ$Tbm!jlVvEbeF&#B}7dGsgE!h<7Ih)kSQeikR7Blf{7UrwUvd zVb(`gJKqsgA3^d3bj=Tr@j7^YRKNE-besY8TsyQb5~MzYEVqN#M@>im)kDx>@PT#U zJNuf?g6=bFJomr(0thvo|KD={Kj@0{lc4#d2H1FA{lWi@2T%~GdIDEK@G1#jAE8x7 zpepI$|2F7WS=b zLfgLoWo!P&PyOIhaYrlekht3x4)di9Ix84d7BEPzXOQ2@pm2^!^)jc%O9A~4!iN7O z%|W|LB~1P+TK`vb_^)F7U)ct{Ai?CnnC^ceP4J|T+J7GP|9qPNxwJr4lDPSQIlKQ- zR{xom|1*e#5WCuc1^fRdLH`Z?|7$schU^q<{wv!4SG4&rY4TrK9kft^f%6gr!!ZVi zMGOoJ85kBYuqmGsbn+D%9P!76i;A!=yC-vK2H12xSyyso_;qRcq&vVfF2sB%C2zuo= z;uvl4`Zq|Q1ya92Y7K07B0&;xj_RI?3K8C8K#W-i_EvS`yAFnU$I`E}vu*CpHD)$IDzxc__m!S4;bKGbb{ zQ@r|N`mFPQjoYl!XDRtN@tLMDCvXH4_*{As%i79Fo# zeW7;U^}6*ps@C4FT6?=>?fsIqpox;~<)BhMW67=D#W!;n-N;&SEekd%2dR?a^%0~) zlsFr-VgXL3%(;{@=MrRfcjAnT@zc&j`a!W%PsdC-6*mopVyB*rg;hgQQ%*oo#H3>o zl(yg+8p>F3Eo;$@3@}PtbR&K7O=JWawS%m|gI7av23i*hTnCX>A3@fK!)qm^fjdzD z2)qFAQ6X&94$eTEOCqU0!mO1bbr7UJf{>67JMx?n=zt)MJ`uczf-@i;BKY44?3#d`yC<+$#IZ&VEz8@4X`D4$ZNkL^$}!KQsd#j2z2OQ z%Tdq;NgZeYx1Rgoe&K%$7&V;-RY}bk|2LioZASuaN;>wx7JNWZ;}OuZfu=*KRT8*9 zf>uKZLH#C3-PCsQf6IPQ{nE7mfAc;NyJg>h5Q21?AcKD38meR8|L%kTdk%vx?}psb zFyZ9?sptMrKLa{Ju4DWEisk>)CjR%W_+c3JUfSg*yY6)cg;NZ|XBqfUGVoks5WdYI z`J7SlJ-6n6eqB&EN6hfQq!DPGO~T~Av^i)&f}-_*RT~gewgJ@+k|v-P+L8vKDoIf5 zKcD)4E|nh~O5b?Z|B4&_m$UeimC|oB!2r{#U*6 zU+u^#MLc2kIFe_}0GvYby-39ry;R7a(;+>%Q;sm2XW4el#Mj z`GmKW;3`3t2)F`*tm1}T>;$|G0?<%*vt=jqyr9RsH zvU>B&+O4naw!Nv@@(MKIx#d;Owl}pq-#737T7TecXlY6!w$2iH3+;7K3k z<9ay3%i*}e%k9J&SkxE-AqZUHLo1{CH!|m4hoFpkS2E^ZNt}Bz zaqh+BxffIBT}qmBArZ8t=wj;J%b;p#_QkY0mr`e6OrCi@X~wyv8RrtFor#@#I%?91 zm`TT?CmxNNbS!SliMXl9u0i&qNN24YmjhTEndeWiDNrxgP9z?5*(!jG+pt&kS z^-&gdItf&t!s{al1G!d$tpF#se?&}uRImoUN+P~Kf_Ic43`k{!Iimz0uftUz!D}33 z2BbrTECQ)?h^dd@6;t)@&)}@|t$O!YY*iAd?MG^T1lkyTtSf$2RgyDA5tC5=EmBPlab&q&q&zq;LjWh>AEIaxDMWh7zzU(x_{K98`@e{Plk%(5RD zB;T?rd=u3EFKZ6cBCPYDf&U`|^J50Vk75S@wO#)k`~KJS_^Y85ou^F!nPrO<-W@1=U9!>lvjE@@W5&wfQe@`5#mt37utN*vY`q z!@$r1-e*+9z!1d1;LpIO%fM;C#HYftzu=uq{Hb8E)}f24Y`$|VC8KPd)2)X(4fuJ^35;n zz}F}=?fKNa_jCLHugLXL>wakO19F=KWVHaKnt+U$VUm#A1VTcp5-1y~_5oEx5bl2H zIjFR#k03px>YX2}c6@-MbgIYL)h!X>c83Usp1A{gLgF3h$q{6@eStbYR z5J`bIuA-e62OpZ_0(WqbD<*F6KpZD{oi;?A6)eKfz$V2hq9LyBr0*PM9a|?)%|BYU;AG?KE46EH*Q~i!vHpJ1%A46sZ)PpIku>j0+M*lT%b*BS8D%cH zmbUn6+TyFp^RK3WyG2 z09iqoI{PA|K1!T^Hg3wP=!wT8CmxTPaw>l6NeGIYaw2-tkr;3_1gVT7Cmo5Jco0$_ zg-EK*YmA&c#oPkV29iqu7dw74MYylTe>2p5w0v6Ao#2-dXw0ucdMM$ldoq~1X$tM`6~quTvH z;i!K9&xQlP;HdH7Z^#;krbB-q8 z<^T2P|2Limb&)^^*`4^`aP)sYxJqgSZ}w?{^_5x<{BH(VNsaqqdteWN&RYN-iEsqE zlnyk|cNlbhbnp596V8DSrI~&SbScA}i~pyb{NK6#f6c1@S(E+;RlT)|f2ZVng-`1O zgXDe&?qgtdlR@+`qvSU(rT>EJ|3$Sy$H9pi{TDU{AzlN}2|WBp|M?96^XY^3nM#|2 z7HNy?{}Z2wTd>vQ|T?`;RZL3&1z`U2F!*#qh;HShi03PF3mw!*F!YTWa^anJW=7;4@3 z6I$Krv8AQQ5UZokB zl^9r67)DI1=PbFAx#U{rl4}`@KnOBDlsq3)F{RGCo-+4Z z%G_(obFL=OxtcWl3S_D%an3n7g6wjIkjXPaC(*(CM@cizB~Ck^F!fyA3f>tcF9Ri&Z)N}xJd=O|U0%RZ1QSd$8C;#`I0bR8- z>B@fyntTy-!9>mWze#f+yH%glPS`B!yp&OYB7^J%2FV@w|FTy9#f<;+fc6CaVBomUz;uy;a}NW@ZU*jSY_d0a zHUG<5{g*cS&nWSWf%6520?29|m;aEJb!zsYDoND|RH<>v{AXZ(#=yKDJO|Xuz}yP0 zk661v^%2`b2C1Xm5Udvi? zBWKyI{B@5@x4eYZN09yzsOo|BTp;yP^WM)OL89v;&_SlKSsq9o)C{^Z4|;1Jyc&Wt zAax%^CDcZ6eFQmu4l*kQAt5ypgsj>99j!j9-S@e6A9xEnc-I~%e}M<RY_Yv z)NK7wx#fM;RuH#g_m}4VU)v7-XgTn$6?_qG{m%Ers~^TrJ?&YuQ73+ih;tc(ULu2H z5QBgt1Cu@jgC=;3kR}6@CU{Cn5p-q(s|slQDvK0&RtU0p6|$}Xvd>f)TqOyBkCKDj z=>Q=iw*^5uN1)T;Kz%3}8?&9mYi?8P_zM8Z6Y8Dt}FS(Wl=0XJF;u#CBWGuLn zy5JHB&A$xULzFV_N)l*j?sD>+t0?@F|qWD>7;0SX5Pr{5d$+OR=&OV_Eh%?6*gUE!0C#LYYn>LN`(8aL@M zxIO~82jVI$B)G|BV;^*PjC&*8n=Y;W%imUBeO3 za=XT(;EvUC&<%YZC;xYz`rm!_fA9JKz2`uv=feN~%l{`_1fi)HKevdX<>k^jM~0XlU@*bsChgQ)p` z8QcGI_MnvmGUlLhHbEWG)=>_%|D5U|#Gwm5<-qvAfCXsDft2NcL5=^65`P#Z{_t!3 zSFro9@B81-=f9dGXb4Wp26Uo=jO~AUv;X{R|5?QzFtA@_U|j}YfiRVUVHyMLN@j^Y z94f!$to|#SgU90dzVNDoCZ2ShK|L&GoBvAopxRB|`aie&e+JRppfwDvGZ+}BFt9d* z5OWs;TQ37!I|F+M1N%}2iESL}pksw3&Hu9~{b%5Pz`$^sk@qN@&?yGd>kLwNSf!t_ zNIhkf*w4VahJneIfx()AK?1Y|iK&`TWv-C!KM7;d4HOb)AiwB%{WtXeujBDw!wocG zsOt7#)$PBc$A2Zy{|r_CwO9R@y!l_|&VQM^Af$Tdzv`X;a$qEX`@aI1A$8-w&Xxb% zoB!~8^)VQhDq5#1+onr93&53N^nlbxA*~)tbtDlr^cv-vSBc#BG z4#dHBl|n}6Q0gOatppKAQw-U}jV1`=euv+h2dReY4}6B8+I=69_kBYykAOG^vKj$G zA~))4_I#@a4;|F*|5*h_)qB5I@BLb_?{nFnj}^N=l<#_9x$|QM=xFW_727}ubXV{E z*tqL+^PaCA`+syF`aR|7p9u$lbnbXvv-)1qtTPb}n_N;S>H1ZR8%DD$dNYXGF|g=> zmmkQ2NjdOjkSwSN#3I4KBmzQ=0t|2jx~dye^+1l5gI}TwS?vbd-zpF7@~|j_7~EP6 z?3xU``V4YT0>d9+~Z<&u@xtJdAGUjLwQ&Ar?ew=$RB zOkaFGb;-@tB{$O--A-S0J9F{%j78VdAZP*T*aXOo3}{IJX!r|M+JhHtgO1uxnRf}& z8%mi6+Q*s*8jb@knFFnwgY<@GUx9XuU`x26_0i0;ap1#$5@()G22CNIPndBga^lg* z{=<>|hogEAMfDyE>Dm|AzT2&Oi)+Y+W@JI3P4BOL3bfx)<^J)39YXLsf<8l zci8$zplz_5UO?_lyWuM)3LwGHq176Es6wwd+$Acct`m35L zyZBvKGU!ZTknLv>Y-Zq`!XPx4L1H(f!eJiuo1zAfBn>~ynf;J41@!_z(>f;q#ZCW9 zfKN7%wFlKl5|*H4Y@!CBLwi80;57bosDX-iM*06t>i;=(|8pDv7q|bf;SSm~#31~g zf%PQ=|7St1|7tG(jRXIi2mROg`mbsaLUQ(?Q{aR^OAG!nu%2gNILg2iplp@4vA> zs6GNMv2*#a;tH*gRDJ%lxBWNX1geZ=@BWttS4p_)BfV??r4Rp?3Z2QIkt1aiCvTZ7 zW}PK&lPzVRC*xQk=Tl{tGAXoUZ|#PskkbY*s}e|!0I7GH_kM2L^Qi?1VfI`g>XAuU zO$3>K0*&lJL?E?K?Y@t2RJZpN2!RI;kWHxH^%=5AyM7mFSpt0I0W#TI4bBc#`+t`2 z`Bt&(Q~A!1CA;1i?|fIb^IgU64>fzf)bIb^eDHV6p+ArnfSreax9yz61@ z`)qQTDEK#VS>`aPhBF8_F)$l2Fz7NcYJ-s`1G6>*n;ICYg2v5Q6d9Nm7?{Ku7)8OA zkuaz-ViIIv5(F_omv=*!D+q#?x-kedu*$LsYjet&39C7XXuC@q_^8-KtJ=n>J01yaTQeQfX_iqoOKyAMhC8p;%8h0p_!ob7UE}~jh}fIau{9Qv{O-& zjs&;R{Z{+Z)rRp?WW4I7 zJ?qp$I!%(M+vO~@&0l1ZHPZ45X0*v&ZhQlAsewkm0+6HII<1AvhZ{WCtO!)<^Kp5qv!Z ze9j21e+20nA)inOU$g+NjKHn0%AFq}2()wu+)IMz7c>T>4niZ5XO19jls95G4O6+;9keTb(lfqEVJ|zE~VE(n*SsX{tIaR7uE&M1PSYd?gkPt_|K#L zpGy;T=9{PqXqS+p)qizcP^BPm{$JV{bO3^&F6jOR5rhAdhW|x%|MP2nXOn-&F872- z`K5p^sN14w|DR3eKLg8s28LS%Xe&e--Ed%1)r+J8ifBf`0$yqW`lj{I7QSzw})YlDP9<;?94$yC5WW z=RXMD{V#dvzu3M1Lbv`a-TW_h_P=ueHU{-PVciH3y-0DBXeskJ8Ou01>jW9cJPqF} zv*c;1la99U`PzNpXXE~#^?ScJ?*0Kn;8`I^y#T3qApN8Ey`S5`ouigLpCIB85|XeX z>Kk@{fpll!Tv$y9t&c$5y`LNRey#^~i$GOU&E5|+dq36egA^Haml zPjx##f!1m709`j$zw0w-vI*SEsy_@Gj;=lUw|f8Y>I1)O4}q?y>^%8@;;H}rC;s;z z{N1tZOUsgq9yRjxOSovh$g!l@n0AnwG#ZUpWGX@I7ISkxITdn!XO$Zsm9KkHw*Fz!>bp71 z?&L1No4xclX#Lot8=&ohscZqNk90i#YdM2fAQ*Uo z)@;jI{AX4C&%kq=f#D3OGGdqos*IRB7+BgE*gHY>5#Is^iATaF|5R;3N2jvL{^!^D zuVe$Nk3buaY(b-TvS$BzmH#tve`a7^$H2IXfqe=C+hhjzi3}W*K(|G3^fPd_Fz~c9 z2<+g~x+G%$U(WHrnjOd&tWrN1gg)>n{F2ZCEyLi^2Mspzn}F7+vnl>(7GJ}_G@XGn zhFvU^QSzv$?oC01|5BEq%1GAXzpBT7E#LpDuK$&tK_ioj4*xZrL5R=$zkbetiLL+j zPJ=2VDR5~!=6v@`lxo_M|gb%X~==Lje}27fOL*(w!W(aFSh{gW;*nv{qRrFhP4Zz z>oX>t2km8OJqhZnx19oYE0Hf3Nn0M$pVx(uwk3~ZVp#G%2!p#iFl*pxv>XRygJut|aHBWRTb**7E#>ezzn zBQXXBDFzND22m4XO%DynG`sMc;G#Ksz1yqio@!ljwQcR4?oAInwmjVys6K)pumGu#QeoR#Q)XXD0k2y~oP9BA&ZX$7 zr^EV=1a}_{>N*hAbs)I=U_i${pO#&ojoV$TH#$|UvnyF;nZML5XMsW5EThy}2FWuF zQ)WVtaoTK?v;`JfOKgi*I#;fDFI%Y<*&}3{#319zDB;E`MP1cbx+r)YE?Cf5D31&eiASeO57OcQWwgF|cPcurx5RHZt&R;7~Xs zr1M?Y9CRWax6XeKFk;pD&!YXGMdv@O?tfOD|7_r;2=Mz-d9*>N#t9mLPIZ&B0Zo@E zS%E4e&|(IQ|I%h4B&GkKTj4*0&;z8gfh=m->GOp4SZQ z&$v}U9T{ymP$xpo4zx5*&-uTq73iD=qLuR z$qd|6Kzcc+f~q9;i40ub41E0zA{T}9-$>d0mvj2B;RM5{~2Z8G6+6m5WmbIf16YFr>Nn7VN=j?tWtLWrS1PKI{jC2294S& zIe^A6W$peexqvRJU`qRMKKsA?MbL&L@jL&;@BA0N@n8HFbkt7b_J4`npq`P~z5jxD z|MT7YFMi{{=(YbcMLQVeQw3DKgw%Y5G(1GKyu@@p#r3>|4Z@|&<3z18rR?)`B6`af zUGLiet7-eE#@#;}c73nk^Q(T(ue#me>vlue&Vj3vZ;;+lF-V0m@VkQQAZ9ZLb}I%B z69!IG1}-BI;?`#1)@I<;03mi2P&EYU6LBjsa4Imc%P=sBgOBkM2h}?a(x6!+7G(xb z69yq$MwMVui*)_aPM55Cq4nFcr=BTYe4~Etqxy}n8#lkL-}1I*s|7F~l>J&-+~pi34O!>(XhbUS0=tt>Et zR6y`<5r{GWM(VulkU9u*nq1nvD{0^k5$KEs6vm~Dd6yt{Q~KP?5C*&hl|J`!(u{Md z;Kd1%{fGTpc6d~8aI0MBT)Eb%VvS4r8mH1#wgpQqvKLq(A&bm;CaJT`(`K5d%`{D( zVUjf6Flo9;>MY~b`Bpj0oJv=@R;+c&ou%Me#xCc`AYjbEug1Wy#wwt~!mrH4Db2tp z!N@Gez#xoV4Z+!%^$}7>3B7-WwLZ#Nas$>yf^JZPFKmD_ApIju5>hciNJxDIUiA%H zUk9m@FiA*#ghs*#?vUq_Ae9l?L{iD-7e!!Hf^oDRq&|X^kSeJXeDXZxlm<9SVSNOz zn6NS+%Ot9Ie}>dYkSYnYK7w?VAY|44pVhE|yL#~QZ>(h9fnQYz{!}0QTYC&tCDokx zUv=z%*-;RxJo3Nt2xzdb?JTIZnZ5q2Tk{2}@Ffho4GfYw4E(tad^rq!1q_144E){f zGBYF$uBh5Rlr;V)X7HOw7gQxd>LV6ya5bd&pG^-`8F3l@7cu`YX8E7b;6H=>e+H3X z3^M;kO+bx*H5br^Q+adHvN|Pe(5#J&Ip|0qUeJ<&w+sw>!S@8s1>erFh=FYtgXn!h zy&sZh|Ah_ybBljt;(N=l0qPcMx&Bvm0QF5Y9RBM#{#P*l&m;ezf$bRs!y52*p4KQgf1XW+QSDES&P^TlWU zpT`hXAMxt^7tsF0t@fQ)_ocA$dvW9cQf8oCw9>Z!A@z~09e7=bJ*cLZvi>h?_g~89 zzpVX##n%5JJ3w2F)NlTmxC^R}AaxLA774-@y9cV0_-;Y#BcaRx#d9|^$R_hBxbZ7{ z@~gV?tGaS4*>Ne_va5LWX$SEdCGr|23fUF=R<7$k^moe1|1JA|w;lM~eBfWx{=bm= zsA>1N<~`pb^%1D*+4Z?$=cmS9pPP1mY6i1GgK6M;q<-gn5CYdnHQQd*?|cj5?gZ6J zkfqd+`UpJq_py2J$A�>UO;YS4MB^c73P^S5;j{e)S*!+kgD;q!a(9p8nr=fJ{qnm4zV?Uxiiw+HWp1iQoG=M+nNVGo1S%Vebc-BUE7wI z%^ROpuYF#*`f16^M}^Do=Pv;vXuY%KZZ-_%F1-yA%w2jLvOfrU4KwH<>%})ArxRo> zyaht@Z)Aa|ia>P`a-V44bx3^#sf<#pZGA*rZP4HA@Cn zMywhPY_cH4DhZl^1FeULEHC7N*Fne(to0GBuXHVA(Ty}Hf-U3#FXw<)OvrsD;`>L) z6%(W~!aVE_vnGOwpw&bWE=l!K3HTaW$fzAw64G4)tpmq#V=UzUVt5w`(oq6&x4o;{ z@d3hxuJGCM0c)jGvFj5qR0$f&`&td|96_oiNEfMU&$o)b-yx_9ywDE5z6i3)sA}IY zI06l)@Ba-Y5B#bI_n|-o?0bGR9sFB)@PFmO|7C~%mmd6Ia};zDQs-&V+PRK{{}X0D zQwUtkpxMeGRscdGg$%+448m~?Lb0IE5pO<&OBMNEztZC$3_PJ8K5eOcQOOdBnIw&5aQUw zAby%l=fAe^e~X0wIwAkH?Lb#fun0e6V7kh{xSxUV46F1LA&rkbhM@Y0Pyat3_ykuz zUC?Zruraizmb3;{I&#*a1?qA(pu_8wZ9#*Fau%RNBpBTOi^u=xU-w_{?0?x?|K(u) zAV}>aeg{-NLFSUg?)?|N`(NnBe}QZNC9nVIJ^f!ab{T_Y5|4rduc9Njq63eTBaebP zlb9~ExH+$)qkv|Bh;9h4ZoImCWpu;->3e?9I{kmb!T$|AzSnL0TC?M4J*!{K%2{rF})36=1yB2)t!JGP>ZyR>J11*Hu{iSio z$EIEHYPUiUKxo?WrgiuGzJuSU9RD-r_@BwgeosC0wSUk1?v0NdmffhCdnUYMxkYG) zs#7ViN)Us%D+8YyXlxC1Eu9hryFG()G>3f`uS-9J^AraA2@G~U3>Iw+X3Y%dEez)M z3}$tVh9wMIxeOX9po&SvlY!rbf!BtC(*o4T;WT03He}$?1MMLK83-Q(6Gh8zR%mfXr+3R)uvUdfDD z&kWi7nZ4+C7Nkl7lWFsBAR|y^H1B!}_LW-;3Aq}AvvJi&xH?L?YaZsVc?hY7;3WR~2(60*pG$%>Fl!}vm4wWIA8iLY z>9=(2+tRIX%eK9PFiN++h1XM{x(QY>RbZ=!YfD zzo3;7c%HZVz%NLhRlDa$?VcZH`~H>f`&YdGfAw+DAwo^3|7WfFU{SS?$8{2edI^JA z5rc3(gGe3-3FR~JW-;(ZGO!0RFsHJ}H*y%<6105Grw7^`Bw-Ai7U9wQ&mi}OLF^Tq z!WS;B|2*J3z6JFEi|T>uBYC_3Vy56@c^K|7FzjIvy(wh@T195+3#v^F9R8cQfVu;6 zHvgry|1%2yXJ9$bz_1Hk8MT5dqe=#bW(MXC2F|_QDmNs}Ks$Z-6#lb{gQ^T|@aPuk zR3E$lT8{tKE&lV${b%4g&%khifpI#hK4R@*;GDp~GZj>u@b@$D^)YZwV-TLtrtn44 z8dM)CTmR=*{m&r>x=TXE8gzV|toeUg)Boc7{{u{y;MxpAg0mQeW-|!Q08N?k zOkfb4${>1@SL>dH)qkUq|K{=kH3I%Cn*3*%`p>{~pMl{Ls6OI8$slrLVF5&}LrV(Ek<<|K<1nH#q-a`UVJz z-UZb=5K`hUqCSF%2;YR(M@$F)Gq_J+5Q>J>M;vmt+=>poiWZEbIt)Ta?9$fUD&BnR ze(dUz%*vq*D#_9g^}coc+O~h6apC{Olm8p{{;S&dy>iF5s-53!c7Cnd`L$uk*ZS>W z>UVs`S|2s;degM~ZS$@-&AZ;z?RW?76Md-J_6mCZ&enI3`lxyL`>sPjCZ7C1>BOH2 z$9}c%dS9~qPGrwPtK3Bzp&g=5rM$+e3`!vkVy+Av<_yec3{0jB%!UkXCZKb8xQsvx zr1=aO1U(tm;i6C91oF%uAQ8s9F4pI|ANYG$i)}kBPi*G_k>L7Jc#==`^kO4YK zw+Kw8%)JH~uY=b|SqrXZ%)0^~uS=bMK5_c#xQWLidk*^4Z*eSHX;-q!zH~K&v@cz0 zQ?$%7XCY`fE@!@V{v!LrC6?LqOfqI0rOkv?M#d>KjZ@te`+}uTd5c{OmpRw0bFNuun$W_a>IvTM%LT56 zSQ!`?!3b6xF)+aEBVB8ODam4BBZB&=dW?jONBN+1Sok;6?01F|UzdCx4Q z0>VndYa(O@yjFrUAhC>0Li$Gp>m%eLyMpyk;G=e!wGw<@3BrK%n~=xxAeB+c)>m)@ z5rI@pkZP!8%bSudZy;>QI35}a=`NwwN|^OgC1`-|GlZ<(jov>h-}4Q$t`*ils@VG- zQomH}|5*Xvf>E{qC!{{A*!K&LAZjWP{Hi?gt78Ap$^*Yj5Bx3J|EJ;r=yagTr~mgJ z{a-lurA5jHZv6%Zp=1WWXa>&R_->Uj`;`2JU4-dMCLI z|Fi4;XV>}9XABxTV~~Bvz`uxrVG;wwBvzSqJZis$b^mkf{^!#B&!-3K2*_Ce7cuWdiCu@T!1LKo!#eujc$;6+G#qY71K4reOG=RqQVV z$6nA02`m#qE8;kM8Myip^^xF22KJQ02tH%rImp1ZpFwyX6bY?m;5)z|ev(D;m#EQyS)2drZXjRCTm2VT`OhNw zi-GG31M^u1?%hm6hZw}pv&vp$QTZzXK2%)L5W2Qd%p9~LUCR2ul=XiR3lM_bwWMhN zU&-RXq7`UYFN4E>HlP0t-TxIg{1?0QU-~*|R!IEDe}S7IBz)(;@SXo+cmIpt`7d-A zge30#7r*^q^d^YZy!oGD%})lSdp#ps`G5M+|DF5(H*Nn}v-Nw;*6&r@zgF+~ zRvaD5SK0k zw;t%IY5{!)2;$dd;MZgjuxC*XVYBVvcbmxIF$;t|XEC@t--* zV$dvNP|RWwiDKaNXJB?=;PPhR_FxbTW>AXaG0s-x^R>7?j>f znh3&x#5g9Iv-&~qnup-z@c?W82;NsBxV!;U4WW^kwNmkxmk?9}Tl|1rA3_ncx6qkV67o8A(`zDkFG(BxMe|zfa!uzr5*xX%o;nv1~s7wF>^rulsMX|G)U<|B_ch zOB6(}{pY^{LV~yc3*P!Kd<(rk62AUl`u=~_tN$4$-eXWsU=R)BQ?%ugw`P?wW0N)K zmNH@!)nyUY=a4k!lCuMi<0*LaDf@9Mhq1^7GKdB+FxfINIB@9Zxs`0MUHxv-@&A)g z{BPU;zi#`_@-3e#wtlYM`MF}pC+HfUoo_0(zpB~xs&2=d>a8!Ux4x|3`M!DY=eGSn zIu8EoI{2r3-_N?u@A8-2&zX5Sy5o>l`Yg?`PGQSz7OfZtVFw0QBk+N5s-TnKm~k~R?=kP(q!P)0wFFH25wabK3hnAB;?$^|!?*52#g@v?8< zo8G;zJ9ocn+wrn~+l!jbPb=0vDqVfQ2t4ct>F;E%yqB~3LDtGUkopKjt^nPt1Ch;J zaSvV@LDtAYCW+80qpZbuKvP2CB?_RiI9R0vsdvCvEr9m7!cT95h=Z;LpMNz8Tp7XV ziBiG$5JBprv^kgJrkskIbRx3qJXQ5g891zM} zV4ODBBz+EKskU+2Y=h((7TF8DYc>Wo@9?Tw?@+YVI(LB;xZ`7!JrB|evPhq0m@wHa zb%tB%ijd}QL5*9yD%Us_F0s#!n={`!XQ6H0V#mCN?j_5;8aKPut~ZNs2KSF3 zw+dpZj2IXgQs-WTAb4ejJaz}^86lI9g$*eSu3;j0#e{q+IA$*iULQdi7~Q2?kc~^= zgIMpPEw_WzN|3QxRKSCqn6%+E(J9vGB)=R?QQNpZ`;G=nv&Jmo1*F+Eoa%BYX z6IEd>2#2ithSW-!H4#h%w6d-YTn!=DMDQvJUMrRE`39+yK;)hu72wWM<^JDQ2mVwY z_)~M>Pwm0K5O(GMzf}kRmG1jraqxfo>W})xI~gA$r8f6&bf+)t(TK|5HL9YO2a;$S2RBS-&B1F~yGw^<3;Ml>yw26UpItX!3V&I+xS_mgN2{cm2IhBFu9Jl&QdAt9* zKA^6dzSn;h@IiL`YM}ERg!Do65ueU~Zq5JPYX4c}f3nEFVU&8wDD#L#{wcfC3to+P z+*+U|8IWF{x-GZ{(f-dN@t=wJ1q1sd2A;D_0v8!XPO?axWt6_lsr-~v0~8uUhM;jv z8LR)GK{?C+a+aWNu%Mfjz_%=kS%3!a#4P>`8vkdo{mJ5qB{4WSb!gv0Q-2E?j<-gS3|7M5&Go-Es^*f|o zI20UMWNcWZ&6%al_#{oZL=D(PbvY#r*rm+4WG$gc&V^Ueom0V&TPc7+!k2-=mVw=# zMLF6$dPdEHyK|2JUvT<==kD(<+dfrpdRw{aZQb@yjk`bxUe)gfAHKTx^Q04hrk?&k z`S`ECLti_$J}y~sF0gE|reC$NQ4Gi#0_F^Cx}a5R3{tSeb9lkm&xtUw$uO`hGq5Ux zmKJboFz{$HaH)gthvHNL9XZRT#K5f#VsI-lKo-AoD>3leGpdF&*tT+eP6Sm$zVjHo zW-&NTVzBRLueJblVQN1Y33|2aG_whhW_zc+Utj@|Fhf^}tEU0tPS_wBrVHx&pLT0v)pe>J)&s z7r_=Sq|OB$b_!W6mo(#CWdBjGhHdV(8(pi{+m)=aE?jC`yxcr@p;g{O`{HFT6{{Ue zmYZkKGtZu9nKj=kdjY7U1l}}cm@-o@eu_oryuhaI5uN+|8aBI>t+dTwV3RlBK5xEV z?mS4}$1HV*QQ{P{)EVw&D??kh2RChXDp_uwyTCGgo+WJ5&L($}ZQf#s{6%i1Dnkt%N*$hgLDchv^{o5n2}sQYArKk~==2 zRZNij2+~V}kg%EvGGs?leN+akm>{!ARR{i*?*3k~>qq5&(8{{(E#K9%H!^7EGl<4A zNF{+PBcVtJp+FE4^kv|3V-WHMA^rdckx&Ma2nNYm2I*c=pLqm47^hq_$Po6#}o#E zIgGORrOkip`~5cy{cqy`U(fTuq6O$+RRQ(?{A&OCwEqj}{^!;H&!hEUKpWgk()iA& z{!LKllZgITaibry7XKtIKsGAagH{R3nSc&}Wf%X?!2Oef?I8o}0|vhHEJByrr7!R) zUFT7IA*}meKp)gC5;p_wGXhmcHlWH#-U>t_*GFO&|AkEci<>vFkzE4Ws@{zmonp!apF~U=Ti;lQ4VBR4C7D=XOInL;B^HZB_SH5 z=+luk_2Q%>|0f*!*L4_#dXE0@J@&ur1PDzy@vnXV*NSzIvKCzNYuaj%GF`|nhe6UE zeCC@v_!2r9@QzVg@X|F&@D?612GGHwVhoJp44~8elo;5+$A_}3ftFsgt1z%Dfe>U3 zHk6bHt;FV0X5dq0;CF=7N1X2cpkX?<=?tDT8T{t4MXeIc+ND>2-hIZ4$YozrHvTWz z`M>fI=q&n{L;qX%{BPL)zj^z=hAqFVHhnB#|E_fX)6(@%i`G3VTnoBhym;+n2r6Cs zxNO~%vUN{N!IczfVrUiU@aL>mk1|#~098gv^%1kY5^n&nz%<>Mj3Og^B1{Ptah(n=Tx@RGIxPh-a@PF1=irX9_yTiW|{L0 zQ)cQXOts9K52=p=nzp)CtadD3;!w2MxoELd;UcHPB~FD)EHh@CBu|6%k0AAtOW8{M z!X>u(i)`{1Lh39~WmLG#xoD|-*-G!GE$($2K>Z_4U+{Px1Nd%1to0G-wpoaeASA?( zY4fh6gS`y)ICKgLQY8^nA0c;lFA3f=f~>Er01wlF`b1y`w3oE!N9peGn5cN?*V@DXJI;W1 z6}27y?^J)7$Elq`DV;$zmq9U;K{}a1B!)pKhCw)%K{1~}uZBUZmO;CYLAQa)xQ#)l zmBFx!N$U*1`AsIh|11We;#yGezqH8@4#lTTLYWKNHffP0c28L(`;e1x{ zRjlHxSY-BdDIH~&*~1{TiGgz-XdkH1T@?$^ayU7g|1wsf%1G7vzpU|pPQ^D2%;&&s z>be-1%0Y;^f`P4?fvpmXSSuK~b_nU+k+B5L8Oa#@XA=9*CJWkit7r!r5|p<99rz%u z_Md_GHE8~bc?SdU5(d7dpshyyvq2=^EC$xi3{ocqbwD+mMdW|0=>JB3|214dC*6ta z{O8vMApvbrH6*C}Ur_fypAPttJzdZ#6B7EM1$c61pkr4-%d>4jXWvN~{1;UG&n*0( zf%ho`=R*eG3k-Z08HBH}N#5dAcqX9!R!|ePE<)7gzoZ3dbWYmlzmzQq$=HDIK!@Cf zE^i4Mx8$?>&u#vn!R0?+)PIJq|3XXt^X~gEa`ZpnDG=hh@SpF}f9`YtdCr3v0$2Wv zT=_2w9>Ej4{$KFge<5&pN$~oAt=s>l&-|CI-oqdh%fPS6DPzVcW6daI%_3#RB4x!X zWx_0?$0BOTB5KGWYRD*N#4e`8DWS(9Z7!(dAgSvssp~7C8Nj3J!zS;`BI^yBSK@a9 z9ZSydD(_Sk-?XcD^Sfz#zPGM=l-7MXu70aa>TK(vP7V7q5%p*mac>4z69xta1_oa6 zWj&B9dmwklaWXJ)!tPdpT_?xL4pPa)4?cuPjDcAiwBnjo7Q7u*hJhW-V3%j$kO%dQ zxKtQ;)EIa)KuE})K`olWvW~&Ng~73hDP+D}!EVd$Ye7rjCT{tYf9QYdiT{O8GLd=X!m}niZxcbiy+7{XQ2gn zwKil;f_?EapN7rB?YrEn)>`K;vIbWlw)vorjBVaxi_Ce(;QoQ^z$6x)exM4TpvMtNszfD+7E zyG1v<ybjbCQu;zaTv4^1Bg&6x77&;l4E1(EoAF)<| zY9*lq(&kSUY(e#rviW~*`TxvPpu1lsOhKn`OPheMLlRa4ZyMq|462g&mNM`!V-TDR zMZ)tL#E**T-I2EfEk7^_{BIHpI=NKM33Nx2s1E3KBLQ%qNKgk<8A0kJAp_8T77|9F z%1G7(G$bf*{$I@EKffWkTO|9Rf&VW9>t_b``wScp7z8dei(F@yy3MEfNJ#U&q`@~y zL(pztNej?vtJ2_uUg7l-yh?)9N8INBh3)?f#r@Z<{Lef8zxc}koV)(>?f=hp41_q& z{O3ITpYzOruCt)}i0{&Wp-caTFM|g0M6UklyZm3^Dri}q@Xh}!SN}82eac{42b#MV z1RbFUsgGEtty!h5xui|mC5%|bj6n5~hyjDBA)BZ+o2WLcgbA0drI4DNgpQAxewesH zxPWFjr&0i@J`(d}5buVqnk%uR~A+lNt=5 zn}7tsoge{l1;h?sUIiJnVPjxm2bWGv;9F2xLDd#BM6oafqa+A1OEa*@fICex;EG8O zQ6I4>Ft8{vFv~OWIx%P`Ft~K`hs{>2IP5m#apLBGg@^x_90#GoBmZ*`{4dz|KY#E4 zqCNi$cKt8i@xOT6zsjwDOE&x{1Eb1KU&=RpD&6oJYkdT{a}BbV8?uHf54?&SUKwSr zc#ys9J_s$p3#xiB>LW>;RNseQpxtLz0P zsk4ldXISSf3~b&O*}X5Yc^ha7sbr~r@e=#uC3Z!N?F$##7cPPf%~@p4b*oq%)UrLa zeV1qTI;WE5kX7OiCCfpmc!g8xDzA#Q0d<>wTekVNZg(k}F6EWWBBBP4aYk^Y!`3mt zXh@$3jf7N2Y4bpfH^2+pk^4uWej=Xw2o#1$s~upQlAw$3AW@LM7<4UE){+|#l)V&m zQB?NQn>ov%lSi=n=vMY}$n~gj?j8Wpy?#=tx2UDUY2fnS-265Ha#zbb&ep%fI~>gURvaiQt{^3#hYJ4`bQ;O zphq{9K{g_Rk8yyk{f2jokQr6GK7qC=Ve2S?7k-0A?J9SDtK9XieE0Wq*ljNr-~|kj z9#i4oZ-slmJzxP}Ip6~g4zE|w|Rde89_lf^gF8-f#;(x)C$IjWS1uQBU1Vb5E zJQ#R_8N^~4q!Jk9QWylH8ARe4oaQiuu4i!H!r;7t!DKOm#&ibF9tO2e2Bl60wO$7O z`HYq;Idq@$8od%U0FAXz`$I{z@-WrSAmT1bILNXOM`YCaLcj_Xmbkb zGB7DIfaVLhz-dwt#OB+%+~%@e$|Ox6xG;KnNVpF!XV1H)zpmKFx)X4n{AJp)@k z1A93Lv9vI7buoxNkp?dzPp(Awy`k>8I ze7gU6bwG$m`@fJ5=xz%U?f;@W|0Q+)i>UwSSN+c_{hwLn3j@y^2F^PS9Jd+xE-(mQ zW)Qi{D*KE}>5GWYKVe-^MJ8?XU)BuNlTxyPc8DbG{)^i}Paub^^yahv&tv(YA>cn( z=6{B%|5=v&XIS%}aovB0&Hov;|7Y0ypJCsBhQt4vkNsyp^`GVRe~xqiInVzWJpW(l z!hf;L|HZHT=erIUd#25w6RK?4>Eb50oxE?G-XX)7)nYaS_c zZb>s%2_t4P113>@MiD(`F?AMk4OS5?W+4qGQ4LNReL-a_No^-t10NZEZ(bD#1`z`W z0RskJT?RpO1}*~z$cdv2dZ4Zlvpxfp9s`R$1DhcOvleJs0;qE&54(H;(o14z0M$Y8 zT8RbRU}gi=L<~IOxg;Ub*d4P31G5Y>n>+)PG!v^F1FHfEu_=HWIjkz6i6=%i24)Qg zZc_$nR|e|>#e}Jrtw%!_K8)Y=Bk$nQLye=!MbPpYoF$? zeVV)WdCr<=d262Ku6~-o>S^A}CwVKLf)QvfckZ(Lxy$b7Eq?%Epa^??=eJ5r9t+a(8GG+d)l=-(Z7F>fYqRRj;oXc5sBX7x# z{G~SwmfgyjcPVkoN&lwpE~RU1^OicmkbUkFhrFeBIg24D`8bxW4C~w%)wkcLVWVT| zGH@-k4Ahq?T54YmTIp?^Hd`-chEc|xfTpdH-TMMsw!2rab1q%xSiHokWT`{ZVz3ra zC&(&qp+(Momx|Q^E!%_JcX`)uaw=QtP_o>qY^8nK3J@w?=}@-HqjH09-B#DuZQk8` zJnNUsh175>*?h9$BwBrxw&)tTN+PyC z%3OLAg3ziYJoOQLUI|hYK}bk130@z;t0YqEqvCbXk?W&E@bTcFGle$2fK*0hn-FI^ zK>A9hTV6uyBg~Ex@?NB}ZSRn)AqX2Xg@kp~4%S`zQnBj`q&_O!^{s5zw@UCP*)rHV zhqB#Y^LKyA+x0nP+xwhdpGyz@tvUI>^X&hY6aNd>y|Js_$?Mp_AQ8^M?#{sL3z`X% zier#XVUSJ&RYpP)45A4pnOlq}yb>uq%@%c>!Eqgf-dqqen#y1_mBC;-gV9U|vt=wc zYdCbD@|(O7Hv`p241AYBCnvC%GB6b|aI1mJ4bY&90t2Tk1BVO)yCfrz94ntDGmkoW z)<7KG5@rCGJR%H?t5mF?$k>7guf!okSSFz3Od0qOgH~v>wlc7_F>o}35N8tuXA=Wg z6{tR9ZDkOh%A@*O)$YH#3#fmj;RqU?<5mJyN&H&>Mf5=%odh)h^D6)2R{X;y|CmAK z27~w^2FW8V3dcBAPxENp7dL(-Z}DH<;lH-qe;xP#TAu&aTtNruNg0Cp`Q$V-x#<)fsTM+f6l;l7qsC>_$q_M zEjGF5{OTV>bpA;j|CccXT|p~v0kT5L03#yWYFZ>s|_@C?Yf8J~Vr7rzv z*!Z6zY&ip8Jm{!uUTX$ET}Dw84rx%8#3^mXBWKGaWzHjI&MsxbDq+MbVZV}DgbYHg;Ybh>LW-^l)2#*V4)C$gTWnvj+%|8i zQ^9fvFzHmV+^Jx>eeM!?eH7VqFsg6AZ{sGHij@u}plZmzcp0cWQvzCdXO=lnFJ*># z?n3{@EfHON16sCw*KcwuTkcrA#JO~tbLldNlBHm@+^%S;b^apP$~D35yTZHo`8RKK zuU_X;vD%|%y>rzX=c+YM6>D6o)_GKJ@UGeH(z4a7YqxjfO5NmcDMN2i3^Fi-mwZEN zAp{9IwH1?uR7|k?2-;DCaKWn$FDK7OL68&9QORrIx(RfKL+X<2sY|ZIJ4a~!qpT%2 zvSD2$NM)3{^d`JM%2<92jvzfFIGM8oI-rNyKfmKK>dkh(@gLjVdz_-ex zb&-nKJuhDO9Mnc0|{t=uFS_cm9A7R!<$YXb9+dg1L6+1qa zZTnEN?OoZ94`n+(K*sJMB)CfYQoi$Z#m=u4JHM7f(5^3~Fa%Lnx$|T7?$2#U{!Kjp zf6AHv6&v3AHyn`k>SB~fumnv#FxWGIZdlL;i?nK( zACb2DFKzW-)a<{Q`F|P1|EyB~8TgJdFzy7MRth>7u8o1Kje)NYgt%%L*ee-WRx!vO zf$XBy_W!Tq0Xl+P)bKx#>VIw(&=o~|TF~=axwZatY5wO@{l_BzgF*fSlj0{fRnTe* zF#}NT02%AkaR*gNS`PnJtp3Xy{1?{(?b{X61#Ok$*9R?x(5wYxQ5o@;{^Pe>SK849Wkw>i;t={m--UKgXv3 ztn2?XuLYxZ|C!hQXIlTCb<=;2ZU4D;{Ab(spMCd#mIMD;5B_I5@}K3{f1cC-15T?-_y@F$h?LD+LJ#J~akjb!HJgHc3-f2@`fH6E0bEP8l-}X;ThqQ#MIs zR!Kc(aUE7sb!K5zCLU=9RuKkf0R|?15Q9^KiC2+BSe;i&PfXcdLElx$&|O%`f`L~V z-1iY>U{Pe?R0GvPpqo;`{UbIb24;N*Mjg zf7PG-UwPtx=@C#Jly~5N{{H`k2Vv;Y|3YwmRCwTj@&5mXdqLxMMO*)tZ2Sj8>;II1 z(AVO1Um&wcMQdL`>ZAO%PYc&PEm-{o(lg3k{WNFQll+y>z&#^S{|Hhaxc0YGH=zwm>Oho$9yATAahSHYYNdcpzC3g}R-%eV1H)-MBltp(E=ih{!w2(3X zR?gykIg9V)EWT5)2!tw^-^rhQDSOJ{*tV?!rHfp%raLBd83t4-yBEki=g4|hn zwll12Z*coA-^MNO)$81=*SS@#b*ownVS84s_paIK(X`FCeYbDx2Ah)k%C0F4LWbbJ zFdL#Cg0V1bBDe@>nhZ8%2dRnRBwA&ZJpVFkO$54qBV_^3`Y2<`Eyze6gamE)g4IVE zu!HZg)<Hx2gAPf)*-mQk*KO(z6DgY1bJR?xI`zN% z^#7_IpS(H_iTZXiD8?~xdo!@PfjUHDVGLs7AS4mVAQ=Th(lHF;aSVbH4BSzwK@07V z|M$A|U;oB`gWgZXL(tL+SSyCL-vkKun-t^c4! zYYd(2!sbkTMxZm0StJ-Zg+PcyoPk3eR2eY|GO(z#2%8B=#tTTs^GW52D3tI^goE#) ztCLXQB4!F&X(ns}t&f-`{xk3%1=UBKlNmTCGw^mY@O3c=HZlk_FmTs0@HH@q?iMt> zuIl{XBTeFY4@?rT zz?IQG2L5Z%`iT1igWx45;VUfSx47jVaw)&$*LWwY|6ke^bR?auC1{_loGs`~R2f@P zWh83#U&022l&wMgc4aI@RtY0ENn=n=1ZFUc>oAFFGYhLS2q-adN`O1C+zgC-pgVn7ML_o~ zuuFqFygUjF{HnY%Mv@wKQkwSSYIdNS4m1oa3SK*>%)q1z8n5Fp0UcP%q7UM-sDq9^ zV3G%4#~=(kF@^y$G{+6@6S081%xvH}1tHM71ra@N1uH%^HyM))+x^~s&zpC>YT5m|cF)I(?QcrytTH=1$A7f?sC2_ONPSei=3UX+7e#AdKt}2cRzHE)M>(sW z%bl%kF0_y_dc0KJrQicx40`vP)fjI~k0UmfT5Nawlc+ z{glP`Qy1S&TLL;wD0kWYg5{607u`*tcRhaE`RLx`{*Alsa~J8yPErr3mvkzWcF5&5 zif2>xXOQ<~kaK5{bLX|nHcpu6QnVaWAGsEYk$my z!vQVZ-Ky3)m92!=N02JUp=7yb!6NITC4LQ?BfIy7cJA?O+Ui!d+O={OGIFU{?OM6U zscfZt^}2wT?cv?~!n^nRHEs2%S&v*FdDN_j)JJX&TYTDf1$Aw8tzBUeRmq{{$;2ni zz%0bTzyaLhsWgULvpdpQ}5;MEYEku>iTT9t&YKDq+ArVu<{cO6VZN9{6D z>mx|51R+tbt5|j;bJ-0@=O}Z2RJ`>q z8iMqdN_KuO+4;G6I|!BU_)@n0bLrOil{-JT?Elqw^8bvp|J(O`&6s@FFlHK?Rw@I( z7Xzmo1CJ+zkUwY#x_AhKXb^)~AcJ@igLnuCNk%Zp$1zAJGYCX57}R*K{w@waDVpy* zXgG-J@PDSm|M@rkXPEn6uJSvB|5*l;4GhNX8H{!?nD1rK`N?JkI*FQH^FIUM31h9l^D3ptbj5_TbEvo~q_tyBx zrSzIz?lz0`4F<8348q44gbp!?9%T?c$sm53LHr7{%uPBM3Tu zlu4O^QwvlvF+--7j6hRMjJlwcd6+c7Gl{a`zLhZO3L$<~0Zms)BR@&|1f8H_r@~p$ zU7Hgo9Zs8ZGIzzT@=Z@__Pwv#^R|B1o2sqPE4RI<-1eek`b@+fC1=%~89rT6kc=hQvOS^6Mj83?5=y`Q@De#)X-3G=QcEx3`g2(<7x zYuTNG7555O+$&jeJ8#j|oY`lRCml}e+!0!{+A(daVOXoSSE;N`x~y5OkaiHisw<0> z6}yBHldvx6N+woWP#b_#gF`REGQQKTc$rJ#66eBYuEi^yidQ)nt+X##YMZy%Bz+F3 zJ}O%o-n}no!r|cdU66{yrFyk<)hegT)ehw=A;_k5xlQSE&)N-o zwPuZ5)oS}nsea=)R{|bI1Uq%&s1|B7FM~NG>sFH!3fk6PX8iI@9sgH1Uh(O&UNF9Vm zf-57?cpYS637+~W9ekk@bkq)9AEht5nX&v9uKEbMe?(z@gloJGtv<@%02;D`)JG5! zGMWcxgQ_Hq`lxvGYqUxVPi0iP^*wTBgxpao-tnn;$ET7VU&?lUtK0v(_1OQub0E~R z|5w87Yo=L?nJo$#1icxUY#7*`7zDf-gaa5v0zuV~Xb_AH0ByRL34FBt zBrX1{yZyKD{jX;IUqIz8lh}P0<^RG)pc}e*v_R_+_|*TiOWkGQKF7eZ5QGFLG6+p# z5T48+F@s5Audv}cDYO5oj-WGzKvj}6=!7?M-T$2Opvs6r_%nmZT?WBh3?kPUM6NMP zJY$o6&8_%fKn--{f`I0K0UZz$(E%NHCk?&?P|5&Q8A<4YuGGFo+*#l0MHYbAeUvHn++{VeM}c2EQeYL7Q8p z%|Kg#nz&$tp)B{8f4En{F<1+If2A}hd^ z(JIiW9mmH192>z>i~lps{>RYwk)iG;L*@~NfcXsCRSc}b3=9t73W5vN?gU|B25tpV zeFQq{Uz34PgMmi@)Y*Z|@o`Esfa(`n25xx<9%&F_7X)i(2KQN*z$6E#++Y%7WD{fL zkYeGM1!-Xt0atna3@m&MjNIVjg#%3Tg6kR?&|n?29s`>x2(cJ2up2XQ>VuIc1G6%y zCSnw4;8Eh0F|r8EE$&&-zxCSmz4s>Uf6>41dH=yreFr|a9{AL_=UwIQHznJiS8RS- zy5UjjrYGfFo|SHSS-$Oc@s3ZWyS^0e{RXLziVprOK<*rYN9qa={wp~64^$a}>!Y0A z{|mSO%ijvBj0!+28U7Tl{ROIwFzO>nWt6w(3A{eaS^fxuvQ|98tdAg5MOom=2(-ch zTpwjGxu3n{e%g|IDU0qTEVz}l=uZ05yP);qs~+X9ew@GhVfx~m$+Itp_aF7H-(;P? z$S84=QecyebCIxBCZ}F3gK7waqzi+f4TFd^1HT#Q9CB7w2GFuq$i6KBP-l}#kzF_3 zBCZ2oAAveb#jBi(S2+|e104;Ny}%@WjuW^(ik)~Qq+_>7&3aI^QUj@C*1A-$h0Oan zRIYZYTJ1^) zN6-qZVm)LEDX?j0IHbc>y~HMSl5tp}vRk5*rafqml1+-8Pl-oVQ%2PiUi)A&@YF{{ z_K!e!<{};a4X=!_)<=-D*5TuIpekt@sDA{hk08Ay$ao#nVBKxx`UtX;0dnRlq<;iq zLwZS&6?fTd9zyy@Icpywcab3dBa|v>{qw^0&+|7tg><06<8@CVB&Y%cuegKkOM#^4qE>Rtv&)zExjw*f`W>+yeo#)L9k>eFf2v{ws#*9|{xgdG=MuliE^?bo z^caWm5mupHO#Is!`3|!P9%B%>&m{JcLHHvG3I1Xh`_C-?pHucfhdk(zI)1hPBHEz! z58?(OH;5bk7c&CgCo5n8Iw4lX@ISBCe@?aUEQ+5PB(ir>`Oo0His)fze8Ey|Wxlt_F3BSl9h$TmN5l!+(+W|JB$2ms|2* zYt{$h=9|(vTiC+qbGbA!=;tuV1TgSAGB9d_)}Db*ssvR+NEkd0#-#w7@!?Yj^_4g! z7}!KX^#}_elw=lQU=s$7uX0L&qmK(boC2A8W`y;X*g-uYCSE2sK^9JN7EW^7t~i`RAgXOgx~T@XQe}1FN*Mkwr=9pY>B!fb&2P%qJ}X`KvV6nq@(r&mH@&Ue{H|*AyUHzZE4P47Ew0-2sdV@E zvOPbF_Wvr{|10;v|NKM$a}R=0-l6|_hhTl8L;v#*g19*c{^#ubpS9tZ#l506pgm2XjZ4dJWzN5xJoN-<4O`7Br@T2fiG5oB zH8M8IA|}y7x;`ArPMne!jDiNN0=kSmnhYFrpfOC)y$syo88^swEqL6rGB619>IT`y zHn|opbtzl|9;{pGR=&=?V!eCCT9;DLjYXC@3tcK!hjr~um~uR`#?ONxWbxzf5ovPP5R;}@C+8W(|C~nfxsJ?@cfjCH|<5~l%lH97+!e^Zzqjr#r z3DW0sty}{#8KS0qjcdhPuj)BXh#A!Zt) z;GAv{T#i`{!A0Qp5$32JW_<)7zC#LbYf5x6D-owkZr8I^2#SGf6Y;pVr+Ti+IKc>}453bwv21WzQDY z5kyeX9V`{hz~jvz8tp&%ro*lOPIvwbT>j5;{6FV~{{q+l%U%T`*17)~LJl%$uV+wS z&7iV~L3J^M(rO0v4Gc=}SoMF48h~mLZk2273YQd(W^qb2fp*h_R#I^>F!C@ks4(-H z3n(5{x46Tn2~sI-3OX)ZH{icX_isc@HsgUt{3C!NB*1QREY=#D6YXP-Vm^3tAw@sqkMw z9ai?|kz+-gN|1q?^Wyn3t5V3>7eky}v zIfGy%1G_&1XqAWo1A`)H>K}5-0lch2Fa$wccsS%hgCx943?L)}nk3>BX5bKFU=v_q z1}}bN;s*`vvx`CNBhZZi@ZF1$EkY2I6;vxR@G>w7Fmj2o@k_Gsi7;>pFt7@N`cMpP z;PnZRc>zvv2TGcOK^DBEK?yXz#2^FiJqds}BEg*qnN1L8;M0-zt&8c~-@NN{*TG-) zJ3g1JeNnXf8Kgd{-txX`>${rmAF6kMuHXN?a{up|L;s5Q|0+H7H}4>LDc#}!`G-NZ zPd<2z4!*uF_uzj>eU!QTfBw$@nVWy*Z~c?K@kidq-}#&VlyCb}yy-{L#`k$^U+1lR zmbdO1q%z80{Wu3SeD@48d6cv2N$%>W@G1#1UY8A?FiKx~H*@*@jAi#SmxDGUWi7o2 z8{&IZu=+{v^1G??uZB%H=G(s4u4IK~{ABsS79p2%HlqXvjW8zl5C$1f25~zEVJpyS zu{_$Ko0u5n89)IAnaJY<*V-(wz=E%rW(0?!D34B{ZETZU@iMpKW$vY`yerqam4lYI zc~x)ls9Xm+%DZrxYvr1tww+1SPC}M6c-L?8Y}f$m`_!#-uUqF*yWXXCJ)~ChuHO{g zzALnIPe{jZ-^MMF%E+T;J-j}0ty}}ClAOv`Lb^h(m1~gakDz_1inXp4Ydxzr`qgjq zYupypx+}DOS3uKN_lD(W`O^%ds+E27^!;)zBg$PfdVMj*;ep?Kxm|hodehpfxx8-Hc_7Clceor|5 zzvuA3`VH^G+K;LQ^f9O;G4OgZusSiY+cWUEFbH@u2>UPyc`^ulF^G7A5TpWv)IN|t z5wZwGT*8lm*^(h_oy)HOlGp#s-1^US;y?3A&{AQ(>;KiR{pa2LpCkD_gZW_wt*s22 zTNqRpFeuJtke$zYM>i_w)KuA>cznIp493y0oPjT# zTlJlY-ggdl(5f~eR2kWVhQdVj{|l=9=aBu+B>IYh{|STW5eAXN zEaJPFM0c_Xf)E4$Ar{dS4AL*zRX_8og9Zb|4E~E5{O1NEF+I@IZfV{BvihKd9i)x^ z%Nv4_v@xj9C8i6ir-Ze?@u<9Km%q;_c@xw#l03^Gd744|B%|m>Cb3J*QujFIAM+{y z7u5nC+8}1|U)1=&q&a8^PSWnbm;(q&Is6y51udl$xBf3;`CrWXKdTK0F@*o;D)`Sf z^S|Vp|AO283vK_exb?sM=Kosj{wpo}uf5>ERR0(8x_j(-=XfL5GC0p*&?{$lWm+jKvN`W0#M&YNq8Q4WYEo~kp20ldw0cA!$1qNPuc0oB-ei?3~AvIlN$al@4O$~7HpAJ0AXqkH$)=FM-**S;!W`>Jf) z_o`jLi}(L8J@~(9KL`~b04?V(I0W8pbOgMb?l6d52ww7CbPyz4eBgiaf&T@&|L5)e zm%HO{?vB6tTmNOQ|B=7xPws|adF#LDt^Z!Q=|ld8clqm|=dOJU8?1YlvFri7GJ@1Z znadw$E&=s@AS)2kmfTBUdOv6Rqx@A*E7v`*So^GM?UTwikE@p7%Aa#SrF&Ok#d5cd zNtV&AhJh6-F4=MxQ36^%Jj(VQ^46RZ226sQ9Q=xm98!#|0u0QMdWRL9gCIRqc;Mh+ z@CoYryCyc-pfyMq}{)oWdA*16S!2IU}KA#i=P z(xqahOWAVI>NSp~D_kqrc-C%!m;mV?fmBwm@~mFtRkPN+cAa&u{B9ojpk3tY zq`Xy-!{Hy0R3AZBhC>#IBUecvHn{gxu}IH z&;?1bOOfC+N}wfnJ3fFG%N_XIbofWx!Cw`d-vqQDk_(-{pq0VE>&?LAz`*9h!0!W^ z9uo8fjm`;pGVpsa2)Kigpc{j*2ZIP0LFyercLqUs6cJ%hkcfaAi)ys;%#T4QK=qOI z&Hrp?{_|Y=&vO-YUbOuA{|pnKGuWSIFxtnUx069_BZJaB2Km_x(sLPP<}t`#=G1w_ ztqQ6#1av_4k(kzhRm=af>I)eda$q^fno~T1PvfJo?l*2N&?+?v%l~Tb|BZwH8w7yr zBL&OUX#tc*$$=QI$<)rLEm65pPe`&}65_bQ^tUy(g z1bE1f$N9fv#D9gh|Ay25%WwKGzUn{2;{ObjelRq>V<>sRkanISa2JE~3I?O;463aR zQdJB>=?rYq3=AHi#UTuu;C-f&;9WWF;Q9g<-uP&w`ba>TL0FAVSd~*ugHJ-0PePST zM3I?S3Oe$}E(Thq&nXNV!~;zU!Ou^?G#xUp1X-=k#=ywU!YRqYE6X7u%g!&u!Y&H# z89^3Buz_Zym?c?xl{p19S$I^yJCfjYTX2&Z!My-(22M>)>u5fWXa-gb29+p_*vUC_ zuC;If*s+T=r+ke#U`(JY4f928t?mlZcgeB;CCC1k9{XQ%6jT|3cETP7 zE&oQVk8*eX&D;4eebe`>4L=LE{m)(p>LQhH`&Yc>Pucdbg_}MUZhV;w-UbU=@4fzI z-kN6x>t5undXl;PVaBot=}YftEPIf*@=4*E=lN?vs9@cT+|^G(+*MDr=HH0uJ?7i6 z-8yfneoVh|K(mZ<5wA%yn??|WqBnz_D`*->z=(m*fPq5=*8LF!FJZk{`9vxpJ*X&H8|r?XeS& zBuqIT+OyBEbvvX!@@m-PS-%kW1New~CeSm8(3eS9(-~Dl*5? zWo~6lTuT?bl`U~ATjE~1%DZ-5^6ZOngjq2q&AF5`=MubU1ZP0%BdjER;10q-u9d*s zkw8^a8t7Q{YZ;562v#@U%vy2_S|5RHqV(lA(w9Tm*FowdWD>T*0Z|`8Y9i3^>?+Vv zhwwTGGM5A)A+t!Bm-j(dgCmdNLD-Of6Ea!22DBw1f9=x(@ZN0@2_CzH*GCXWG3vk_ za(x6jbG3NeYXkz1(Uop}U$Ns;?XGXl2mW*&`rmWpfAjV)2~#dwrY+(!F9t0_U~>d@ zXSf^~M7=;r*o{HhjX}^6goGRzK*)(f2!;e5L2LnM&@DECE)4umP==rj0|>c+?#JW~ zl1o`FviHBj$^W9a|MP>-nGwA9pZD^AfouO|R{dv)yU3uoi$QA}gZe54rKJoC(-~x^ zFi1~jkeb3EvzkkH53lZT0fT?6TA)?moZ8^!cJlW@=PEFSF)&0iFwNwU+sdHI(4tM{MR_`i?^sH&1M0u37qX#eL|`^ql& zmP7g;o8%p4u?tM1=NUyWGl^YclDGvzQcv0CUvaDa6VV0jSCTRYU1%!@UP>ox2O6W3 zcm6N!3PQ3@pz2A+=D)1%e@UDFiZ1_^3;r9{{WqQXU!~!bY|e4ch|O%ylNj`B8Pv)c zq_P-g4Rj0@PT_S zkRdoUlObGYaLvfYz{JbMF3Q3w&c-dt&LhRiFU8C$09TMmt*6T1sB2) zy_h7kfVie#OiN^Xzno1DgMb5rq>qAkT}=PQ_FaGKPW-Pq_CM$N|BS=`vyT2RI{Ux) zGzuy_`M==A|GZ=WvycDJIPyQ^00?Cr{GYKOI-^v5=zrPX|K&UX*X;XWyYGL=*1v@t zzGbiakhthUa#m>bG9SY{z=g+mv zooAmn-ywg2OYsty@-=Ri(49(<+6`h6yfVVdfYe725?&dB4o#YSF%bzNS4@y<2#ti* zN{Caz!B;7PkFEt@I0fz;-NITQK?dvKH4&s5f^(s>Mc@um?((~k$_TVcZ560Wg499C zB<1x{(b{K4Ye5HzKyMVnrMM(4J_DcSy}Z0EbO zo$tU00e!CB`@QAJzmB8-E4F@2nsw8@e4Ds;2ZKr?Xkv)dm4VL_v{YNdk3q}_)cp}~ z1y?IBpvp+VfkD6yOMN8Z0IG)goe-p;3#e8CiMW7{{L(ClS@cq5?|+%2|3z+rsv+(R z|M{o5~}BV31nLr1XJb|1YQB ze^xEf;of`({{=N)GKgIhQf-se=@r*KqGWNNTN`v=xv0^91zXTfd8$tTHC;eQ^vIcl zE@0)?01ePd7=q546W0fw(!&ovGL8>?To1n{=wu#2-T%V+|Hbq`2y&8-pyq!*wg0?I z;E_794-EV-8TihF7C#8?1=UC5`Z;PPL_4O$;b+5eZZ`7dGdpWo;|kN$rK z=T{8o_ZZApFsRRDkgH@6D`en}Vc?Ep;EG~k3u9pQ2iFRK!t%%iG^R0nNI`_g}~!%hzq8XN=ZnQo)J9I!~yCJG4L?32(j^qa|_C_@k_AqiZO7C zf!x3>1X@SIAr9Ks1Ugz1$qWP&bW0AqsI5y-m2dWJm!wJ3Zj}sLu?&XAx(SQ?X525@ z^S}Dq|N2}1E3f=7Isd=x-2aLTAXIklf9cu(C1?H@p8TJG98@1=9|u)Tg(v?Pfl=Op z|M45YW^ekQyX9x$=AUUR-o?&)=F@xCrTL_N-4WZmJua<>!>67NnRLpsWUWs60=vpB z(bFy#u6kOr@lDm{cctrImacl5v+zz#|1p>Hb>>+MHKHepyH>H8WP+}6QV3#@^JI{8 zVHCG#5VU0A)nx#!g;Qi;Rsi=0MZm#Ao-H{ zBcG;i-i@IBLEiP7d>TMqAjQc@W zK7bb5RjzWYS`8Vmb1GR1IZ(*6dYxaxX7Ab!UN!5TOIO$zF0sp-W0yC_I(w!~&Mey; zXqDt%w%o09oofYX&#ZU-Cf~*_puMzp8(eEp`bTJ0QsT_>keUdMB%?lpRZmwzcN&7v zUxn94X$!7k_Kz|kGe^ro{UgX29i#$+R7ROAZfC8$13}P{I&ggiuZ)PPkKh#(gn`*V zf{4H`s(^QqK!bJbpFl?HAXQR6xJrW5LC7TL+6H*71g@msmTmh`v+GOa{-2$P{`MXJ z-@X4&(Xz)OEeF)XCNb-zF^Kvz@VSA;;>3L!B>Wh}y+BC9jX}(XLEH(1#2greY(NvB zBGwGTwqRt(AZ*7VWQ#z8_MobY-xfp)*nvn;W#j}hna718|CA+oD<$7)(A7Cy7yfgc z{m*j|w5*O{%YTOOmkeen81y$Vs4Ztuo6DdylR<6@gY-lOsZIv5RtBMJ2Hr9T=`9Ru z``NU9bLjl#(E1O$0!iy1hvr{y&2L;9U%9nG7sB%B{^!>DuW0sP*9mkw9B2Z_0(1z2 zsK$RmmH&bopi|%kb^r5ggSt_mo6U7V$7u6t{o~R4$EW@ugw#N_59Br?0oDHks{i@G zh+pMDoAgg6@w*HH*BJOtG4P&b5ID>rc!WXZJd^AVE-lajagejVK}WWlgRUi#Gx;xJ z0@^&Q=m1_}uKSHc={2kD3pTlzq6YsJZ9tbhaOr?fMrD=$#wzig6;>aC7I=$aVGzH~ zEOn1r<~5h{CvG)RH%in9bc2S3)qioz|6;cPr5yjuI{lZj|1atIU(W5nwA+6v*Z<-U z|AlQp3+?y~|MTlzWRTd+Adt$y6wbhI145jZ44jq>?A8ptj*Nm1pi_7FY{8@z1D_QG zuO$PwIfH-&1E(Pavm9t*1+-d#j7BvlXrTh91dD(?tB?v4zak62G7G;lE1v=zzamIb zP=Q@ofmKkBkxz+LNDZ_Q7qm1&lz~Bnkwb=!SCLObk6%)sLs*r8U6KLRyk`TixWhlk z!3gR!F>*6=ig5BvafvAMh$(Z5X>o{Za)@ZL3u`j)DSi>!>|68wvP}7C~XdfqzUPS326Fp zD!FsWSThJ4G4N?JaH}zK%7T4_c5*Dlb7aICL?aO-yR4e2w{^zEkj7QsONk7WfvP0Ph@3~wddMUZcqPLo7x1hQq%v}^+u#OXYv*3Q#-j$h;?ARHtxv3k zaFjUnJbINh`(onki)i&x(ws}kog*|hT3-pmMXrw^RT6T21e-s?K3)f}k06y1q#A;g zRH=_39jILJQU}N|9c0BFq)LLww-2bzB=jXiTk9^w>szgj>FfL$_2w~v$WDxLT5D#P!_h%3TcZkG%7(_il zNX!LPABj4EYaAQUbv8nlpejiS%oel(?Nfk|0@ffk&^pK#)WC++O1uuBjupGTka4#9 zlK&k3qDB zL9&B^tC&HcQaR`~pY1gU;U~=MKe%;4T_i5;|6JPt`E`E@=>6u=0aY>FI{*3fLG_V_ zE$FCHevSW}a{t++|FcW|=a2?1w-(X_RYv^U|GB}44_qJdfwuf0>LXro1tg&UUkHo@ z)Ip2lAoUT0=xqkRE1>#_?-aB?62HW+^;E**zpCqh6-UtB6G~?PmCgSvTY`|B6=?KN z!W>#3F-kvS;J?Knc!x#)r<5tETH)3I&!hC0LG%N&*i#nqXRH#pStV{WNnT@>xydT? zfJgZ`ui94;-9KW6|D{a-OIv`>`H`{vuk89?C-}c{)PEJv|I!ZsrJeuFx=V zX8(Eg{xeAYXW&`Sz_5UUF^qvBn1RijfytbK*_eUdoPo!IfzOEndJ3Ek1FsFJGU7G| z9S6r{#=vLBz+u3^A`d!&7H6LVf2qX@>W{$cBW3|5Rsj`eJ|zZDX$DSdMqU|aemQnw z1yGeFq{b$!&cvqz+StOt2W{B1NI;h7aR{q&il}jmDKPU%fR-^Z5ZT&iV_@KBU=?EF zlV%oB;S|&2lQIyLH5ODf6;w237m@*Y95CH4#Kf&8Z5S?Z8ZBy>q~cMi9W}!ueW^zN z7WuT*j4`YEGItpE-HqM#zx3+=+PhFxclUqoo&WW>L8$g7sA9_4^(S)mTaRhi^lJ8~ zqUo2|;W+g9%KYC9e^<5I@52g!5J>7-6)H%VtPOyM=n zlyE4KcP*E+FXS>vVo(WVkoRMda0fO2_$?W@^ch&SKx^Tc6u~P7#lTZ$Jm9fS4sd;q z=_wkCh)QbMX-71Amd%HZeR3hS#lrd(3+l3$gN|r`*F+FT=8C(Zi-VE+N03!^$n_MQ4Vh7bbci5z5Hbn6RBJVK!9(_% zhmgt$UOmAXkX_q_YahcAWG)F(JwZr#l?1PqAY*ip^Hdvmfv#_E-}$+G<>T1S111U6 zMXd4|r2QGVEkKCh5>(3wT0=P|9RH`XV~&z=p<;c4m3u0@;}p=|DrqpGqiqWaJa`{ za*4rUEraSJ29=o%3R4+mdKo0U7{uxsWZM`x6B%?{wKo5EIQgF?;}?U~83xUt44OX$ zwEhX{{O1BA9vx5>!=Vitp%OR!FK!CD%#K0g4+Hl{2F~{kLLV8VesU=O;!^&@qw!5Z z=a-P~KOxn}C)s zNZ9?CwEr*R^j|&jzgqZz#i0K(p8tg%K)ob+m;Vy3AS7n}pHb}}tI9?OuIUWiE(~n; z4BQ%^eI?v#pleTARYCi4cr6%sEf~0+L5Rx>bi@<4KLc+d1AiEUP$Yvy3aAr&5RO@0|Y5k+H3Wg`J8O$H`j3=eQJFpBakI*4m}2paec8wK)M=7>6#Duzz6 z$XRV!cgCdhxJ2$j@tgz39asGpe+XUiA#ll??A^c1j{VQx@il7s6R!#9Y?}`n7j4kZ zS!IyD%rIxURpBa!@(tF-AmmWK!LeeaL**8S$}P@yJN-J3WG%Uox%j$w$mnWhZWVI}RCZc4;e484Fek69zsF(7j8n(%_mJas(e_tP;cdbQNId5>u71Pj$-d z_pDyyUAqoaA3@eLK!)a!>m$#GO+L+AgW7ibG=NstfyoWt^&sR?vlcQI=Tx@brDCOP zgXr&03%8HC~l# z+)7t?R;>1^UgurA!Mk>YU+o4^Wz?|QuL0C$gA62k)@(#Z@zc-35oX;KKl3~sL24yP zcL}*NLSrM>O^I_aB3De1TPh$Va{ma<&RBFUZShrjl?1Jrz$boV_L3moCCI&3kQxUv zUWcbX%2@%vRv2=*FqkBxK7ydK)qiuV{$y4KjotC+fDSd~(*4h+3pxl$Ko4}W9h>5R2JycPg5Md0 zJ~D`YVUh+R7KL}5s-Jn(fADJjQf8pdO(3h) zA1MX>SFQbTG2y@I%`1O`Ge{sX$Mfn#BK4PLFEqv z*CPh@DWLnFIGq?coj{cl==4!VP<_Ov2A*ZmXW%kr;BjFR@nH}TXP1j(SIy+o$l=m1 z;?XVP(kWn&O9IzNO5jxi#20{&2_1IOrc?$I23AQ%9tB1|B}P6a&@mcJg5Vvg;A`@D zq}j!_*~PV4MDW)tGeo0S$Q2(q>Ab64ag;x0qd@X%vCNHfdFzylHYwz; zlFMG9k_AHASu4y+H#<~sv#Z=3k zIiG>wnnBQvLE3>))C9C4h+7MEc{+;%=(;5aad2QlcK*UQ1R*|#us{-!Za zP9;l0tE?M9%NU@=K6thWQXfGM^YL!l65Itn!^g80)OCUn$3YeH6D_U%wzreO| zk$v$J*UD8MHEW$qm)Ydax6GbnojcDucb1rmcS9Iw-JVb0C-juaCUzHX~O?o;4ferk#nKb_Q#egw|t%_MV_Urt^uj z&nL~f07nomgiM-qA$jh_S0pX zKD)G8%5J61YN4Rc3#SPKk2wRk83U&Ys4U_(WZ*Gk;4=Y{5H_z7103<2f$AL|GX@Yc zVc;==O7Vc%kjjYPiUEYceI-8d>;r`4H(}s60bPH?Y0ki4!ypsQGVQ(Q?*H6-|FiD| z9ZJKt^*`U4|GcL`_q%bN{x7`#KSTK&2FEK5=BF66_cEw#V^E&QAU}&iu8Tpwn?a(A zfjO1IYL3Bt&^gi)C;#)d|7Qq)%wY9`LH{m;@m&V1Ck&b|8C0J#sD5P7`5|ibU)TUz zj|jqUS>@6C&!PUGS?NEI!Y>Y)Z!FRun5Eux$-d#1d&@5OnMe5tzvf?1jRUTjK+EmG zwGyNz64eJC4JV-epG)a2gZLu`kuwZJrx*l}F$f%G5WK^y_(sa;zn&ZD=5Zrm(7{An zp8t(v|C=TL*NOfw>hhmK<0*sHcZT|(;tRp&?W_coi~qAF{b8`WB4zwv!x1#4#U%Nc zf$t3i-y3Gh&s++>c-6rxEA&Czi^MHKhb@R%f!6FvIDqDfSuvyJuy9Pv>bTLR3Fo4d=Rw24RVg}9KfF_%G7(ksMAyEH_ zMT&__fr($4fmackcL$Qv;&t%gB5m`jDfNCXX&=y)o z9yw+{1#W&Rj4l!zXm>5M61$QkgQ6FMya#BkOf`f>FICjOOvtTOA!M>b++!bbpYYeiNSrn}g>Ny$Kf5NAApMB+etCCgr6`*R! zscM6B^+uQKE$(&O13C}IO}~({{HA}`9!?E+1`c%=UPU&3WmaA(&>`obt8DSCEgX;v z2+dM(6Ghz0$TP<=XKqmKI=@EHVs9^Stpr)u?NI~TKm=K{4Vg;{?cC$vyv?(A17sP4 zW9bUpe9(1!Ryp%6vgTT6&9%v%XPGhEHfKKQ+VQHj?q$oJKv%>rvM*fXQnu2gYOQzG zS|9K-hJdE6$Oy95&cAUB2sLc=2P41wEs(m&uVJ%q{U+bK&E7Q|A*5H$das)GkopK$ zl?1JWW}Zhdkm{qv*@#*xarXK6nP=hk5u%;~FT;aWPmsN|uv+O7xP}6e@SA5LwNmEd z>)_2wpo=Qf7F|nUd=tDr{6^;Dn~>A(Ag4XRD=c`$l(h_c{Y3WiyU1+hHF(HtA+Xg- z(E2E6HLOZn{Wxz8;@A+-PHpgE4)FRYe>HSu53)@OQWI5ddsn^lW5do*4Ld)Ttb7#N zcig3DrJ{culU5vqkQ)P+B?E^!=zN~K;6Ll(|J*14GamWRe(Jy2rvD7JpBTLEGT5AF&^y4Oxtl?A34_Xf2E|?m>2?Nz zA~xk#$&OdfoBoR)`_HiOGegil&^9IWrwry#xx@df6#SQr`_JO}pF#gOgW_8T>96c6 z{{*%Fi|T`x$qDO&&L#!*mGnX5W@1|ZMKu1i%Y9}Le*;-Rz##UTMfwAq!WT~E?_6qs z__Y2*M(u>ai?oGxLFR}WfVxYd`bgq2v_29(!615yN$jDp`X?o;|AwCbjeJ2!*XO^U z?|;n@P<^Bs@t@E3KZEyw&65A3^Z!dN0iBY~u=YR4mj7Ja{+mtu&k*uX+yFGtCu#7X zS?V97&<7^rkLa#C`uud4rL| ze0KHlxE|V=ate^HFA?PaOIRX;FPpr6*b|~ zP4&oLY?im#Jb#Nz=~lP$?T$&a_zf}`cpX4{f<#={RYL_eyctByLAQ!Ch%m58@ri2k z@XNAtKrW<)IEEm}3Yu495o2JNWCbk?$J~)5D5UFS8&Pf?*`(%Es_0R!7usTxG}9q# zp>5#?^XyeRS*vxjR_o@i(ac(5Ub^0_WSx1@T8rX!7RBpK3s##IuF=n0YFoY~dg`Uv zY3DtgcH393cdXv%RK3lqdYenlCeS9G`W@a)yMudFi)$L!k+OKgNWabF8umQYg8&VTNj#F@}0&Pcv>=<&d zUguP{(!O|^S=L;$jM)}hb1k#yS>?>P&s%7px6rw0saxp^@2a(awHv&vR)bb7RIc%? zUgulC2|fx3uZaSix53!~O!aU5%2rP0BJOBt< zzXz$3vcNM+nQI;*q5GL@?&qunqqPsR*W3pk_^|GAG5EOe>aDNqcfN1h`l@`@{j6zc z{i`<_M|Sg@rZdR8Gw_--u_?E%q3WLU72EE4&+W#4JL02Ym8-OO2xWJdL^6CB;)&U*DAfWn(Q|>FP%xh+; zm(0>{nWf*d$baBe`O2gAOHdnBmx&qu7d8AZZVW;KI{!J9UNA`BXBI!jD0+fH>=c9K zS!T)CVmkknY(ZB@=(+#bb^EX7@n6U5znuGjNvHn|w!hhY{!2~$Z?XBm-roP3JN}#Q z`meSAzvZHzD!orMqTce@U6eNXuW9#RRQEr#^nV7C@2rx4xK%*|hR{Xjw%L*A_i`M2A%*00XNVd zWKMeq4m;3lI(}yc=_p3ER0fSE2CFFy?t2-6PB4UD0ilTN3^BJE;vX?2KV!&vz>xct zJz_6|Od|t>DtP}92Y5|2xIGLl0%2Qw7{F)#Fz|!sgBbW37zMx#K~R+hIub#IfkBvs zLz;zOkri~KgcxW?600}^hm@F_YgAUJe?k?flnI}N0l%ant44xp#00&>RT|Mtl!Io8 zy7Wj`S8-@(F^Ksya5yqBS}<@ra4AMf8DUHvB}+X^*Laq$aV}YH zle@&VdZSO%4v&UyHYKZUOIO;ItgtO!X;-q!u4I)>(F*hY#oEc!-RrkzExM65|6*{@ zK9`zJuC<$;Yd1U9Y;vvH>Rz|qyckdcvxk&y>{ zj1nu%or8gf>_LW*EZ~#?=?L?KLX5*mKtIwjrq{b{xqH=m(6Z|4^{&;>eXK5JDLcVTDQ?;+&xC>B6ok+!CWuDJtP{UIgSfkZ zL5iMVlV{=L+^NU&mOQN92&#-+Hh@Owq;LKgy8d7M&VP|J;A?bNf)+M_mU*xE&$bj) zAF-x?VDP=hV7i|{V-thMY6it63~F=uop(ytJ@?!ApK}}NRyh8x|9Lk3m)-bZdHa9a zP5D+%j^Y9&6c z{~U_X86@wsNu6aDKgA$%hDG)wm%=AGlmDs?pk)kN&i}QX|EoHKswDM*|C~nu8H_(F zr2SW4^j~h~e}?u~40#tBg10l6&Sa1-V&F+;VCZL%-5{y=U)2g!A8{!B=aBi&uM8S# zlrRDvjw%DLjAZTqL+T?bd(gUW3CI5uj{ilRKvj|qxO2qk_@6=VKZD9m2FY^_%2OHS zCo(9fGbpCAsYWx%1TpYBGqBq-@Hp@)gzy>_idk1N*v}LUUdxkkO(^XSQ|2?K%x4Ta zubB(pGZcPdDE+}u^@+3XpF-g^2IUU$Vg_aK-cmO3s2!x7WP){t_`tOfA83q=fd@Qv z#tH6Bae}q*LTV)@Hc4ha1$H47CO&BfPBGA69h(HVw3W7Xl%kOjqmT}#ur`mF9)n_} zyi=8+OD~Ig1A}HEgH#fOXb6K~0O%MHUN;6dTLuO*1_lcTUN2!I~*(5J5;Q(D_iAIw#K1sjZ?)s z`_k108FN%(Cb-vc%~^6QW8vkX?!Ayxbew87Il@Nj+#7a!Hti1TIsmDUyj!-2YCAE2 z>U40pfmXId!kGwv&{hfLlv)VM0A9QY9*O0U5s=iC({eJf2{8(+c2Aq&Rl3Zta+OQ@ z8mH1#_N6QBN>Tpz(JCgi#aQW?R?gc)bzUyT%%D`d7z@o>%q6I?i>Y#oQj|ONim|Kehf_StUcyz!BToYl|N1)+2Lk1p0 z=u{EBK7v$60v4b>caW8M{NO5&+Z430LE4j9KUUhW*`atvNdJlKW%tWAzN*~vHg)^& zs5Kw>*Z)^M_+Reff5j*NWgh%je*WL&DQFsqdDef<1>igQX8dQUd(TkynxXk8L*;vh zq&o~jXBgZLGWeZj$i6Q;^*_s=|4h5UmDdi?S_qE)|5=XwXFd9#^%xi({?C5&Kld>( zI{u&U)PMdX|9KAnSK9esVEKQRqW=tj_ZjpaGN|5QQ2WHB^Mf0*?MU}Ok1l9&IB5To z-hY1m|2*2DdtJHJf3qupXP5uTD*K*Q;UkCg7f#jB-0EMs)qiuU{^V46#w_)KP3k<0 z1n68-Ho5COia(UhLC3VJIsDhK2O(9b|2m%k>ip->I?kkWkV7Mxf!Bk9K?6KT zF9a^bLG$Je3}u3fvjlWNhkFa^gZ4lPX@L$D5;6QQVftUf3bcVp!v4Rc1L&{?N$}+n z(st0gNyzEHu=9U@um9X`{~6qVGT44#us_aVf1KHFC4<3i27_h>n|@)p4hEGp20m8? zZYKtzXaVCIPWQQj{!1CckFsQ3V<`B{Q1YFj{3k=@Z-$zG3=NISy?lz|Ie9dLlFCr&U4X+J|MFAlIz`59S6SombQM3g~y zqH#&G@X9mu$T9G0Fz~1|u!=LVh_P`?3y7++NcoCr#WE;mg02r1iDnQ8XW;V&U8lw2 z$iVKv!0ODv;>5sc!oXm}z;4FCXUinx&!rO1DsBrt)|CS^z$M8iA`d>IK^W{d z=tGa))&+X)N;)-jBIwyoa|h-#It6VXXSFIq6H2mOKl4mSrjcZ&0lO@xYV+E zxlPduyOLE_xl7Em7J@GHDP5I0|8mB>O935woU7M6Rc&yp+Td8Z-m!AMbM;1tigiX= z3zVWJcr{7dPXacjH!rv{}-gbxa1S3=%#JoVELZz~2#XLj9i6|WM<+}UwmJF@0ps#yJ?V)L8Y9UrT< zzN^~$E`Hp z{}~!TG1PrwsQJWG_JObBlVa@;gP#A2i~sAa`_HiTKf|W~%)9?{9Qe<%|3BBk|4fHK zi185U{yx^jps6UXqyO0s{%1P`M!Wy>9{R6+=)cDP|4tkJ8!!B?p8KD{<~@V@X9lHr zEGqxl)Ib|%`Sd}XXZiF%wGyA+e*t~))vn-G-$Gh{_|<=Ms(fWr{KTsGo>lQZliVjJ zxlgPzkC;IHqw{Q1=b5A~v&-M$Q~sr7{$Ilhgw*Z+YdQW`cL5y=Az=QWLFqpO?m$3)kXC!V18oLv6`Y-7CpVQ?(hx301_g@Uo-F063{Bu_sQo`f*MEjS@YLSa z|3WMOyDa?=I?E0;d87g^*ttN{E}-clE?AWWsfO6WTsVnVA3-W90Z4ts$ScFdC(ACN z#LB0@z@^Fny1Y*a6ckKCyh2Kh;$Dp6UZ84-Czt_(c)dVJ_;R~2a5^!tyMhp>4FiWY zXg-wL2sEk0W6v%LLJVAxd)64hj^SqE66F$*;ue%<;}8Nbv4EG@tlCa-L7A=5WwYZd z=lPc}^eLHVTQJWOyou7QYDH-0F3;L^mc`30ik3l;ZShL`($&^^OHI<}sYUj=SFO)m zawB)~_2BOPPF3rit2esVg3fJ&R62HLYmBoODn|BuHf}9ibuW9#wcwt8?)6)pt2a7U zZ*;ER>|C?et$qh&ye_EgKu%2g3peNat6Wg;$YB%2Zz0L(4AK zE~wBor8&50VnqGo#Ev!by_@2DH^udCOX%Mb*S9CGZ%=g3!Lar{KJ{DNDnTcE+ZQc& zC|==M0=k;qqh`HV^#;Fs&@zLd7Eq-V*t9*Uc}Gz5j)3MJL9M$2APlfbQ1cE%O$4cx znzsdlDda(pOmk8 zTE6CK_3G!POCJ`_x)Rg6-!)^NwqJ{sMFxYcFKF2tt3LQrW>wH+D1#ih`7Ou5qQJna z$iSumLL5pATq>X{iAMum8R;WZx|3b#Bj-t?|` z(>sebKTTJDW83y$_UwPzgZ~*Oe_<&9&rtuLZ}NYJ+V2cu?-{(GFeE*boBUsS>wl>= z{}~qjXP6H{j0^rVF8|ND{y*c!|4f_yGw=J)bNoN&DbTfjT*v=&pZL#p420MZgK8g! z{h+(E+4lTr+4Y}gI|xDQFVL-~i~h6C{m(J~zk26?hNK5f)<+mLHZmyhU{Kw~sri&g z=LL_>FFw6Le0t!DNe8^_!2q;k9JD;o0MudO(fZG>_MJ)b6NBt$2Fdpfl8+d~A23T@ zWtO_iAbFET;Q@~d=!`pMd(drts&=5`Ak>^eTUL41{xb-^V&u+XVDJQ&hLC0_18km} z8C-HUiD;}6)%-854>~dkbPPE77Ig_*=(wDu(|<|V|KiS|^HHT7{zH(UBd8`~0CkK0 zGX(r(2>itm@suI*8AIYNhJ>38!6z8J4@jr%v~IcS+^~fSQ6{AXzY&(H&|gC_oGnDU=tD!5@V`#;Zy|3(Y{GZ;()PaZ+`A3-`h zko$=s&3G1YhX`I1LHbIN`Upa@f$IWB1_n+>UKuViRnWx~JWA|BYApQ941CfIeA0|; z+~B}r5by_Sab@6j10fz)1`cP?&5r_Z3_MN@ z+_nsC77VJM4C>wtY_g!*4K#(s3hG?2^Ko#Au&@b$gPMVXK}bN^-l=erUG6-y-1#7s zv%oZazG?PCgS0u?iBm(m_9f3g@71&oG$mBJ(zbM^RnansvQ=3c+myLqQi(+;AL_G>>7(sw9%?!~O7*X=4-vr6iL9SON;0~{nYBp|!PAUz;9 zu)PAHnIKkGMp09CZ667nSY@9g^Mp2+;yFQ$s{&d#hIVa>>)VqsVPEXTLvfQ1M@~2t z+OsdT_h4x6!Ei8&>OB(PbugrTpMTR%pZYC+ji9?8Ak|H9+wS1D-9fFpLRxo)g4tk^ zy}@mJgIf0lG(+nnNUapmyu-g~yMNP8|E8UOjXV4rcLaj#qrj%^pv4JIppiQ0tWe_? z(8&+5{t=`;@~+tkse|CP54`H}1~1I>sswFQ@~T|pS+N>|;PnwQ1G#R3)JO11rP%4G zAqZX{#o_KK#m_t&JM(Njco`m~hC(JGwGw=BIBZQgXvH12DhZ`Nf>%k9*(C@GJ^5h~ zv{r)DM`&c~lIswZwiJ4|MaGgFkVz=`Iz4!OlmouHqF~j7;#H4}S3NFVc0X+>!N!S=vC4nj?Q)nkh&>UJT37COSj1d5Lg+ONn*)d2s^Qnib z+2>lvbcd8KNu6-8a>>=|b&u;dJ}ck+vUKC~{EaX3H@?i>@HltFL>Cj{owv{;NOwFL()5CGnm9&w1=W=TT67#C;r5AF=NK&%E;31}`pu^Di%H=dtNaUQ*=Ou>H`!!wFi78KReZ>+ z3OWW{+5W$h-G3EZ(EO3Q3uv^BMfMv5`vV4+L|A=i(B80G%r? z4X%vj?f%O+{+D<8FXQ@O*6qKvI|#|SK&vEa$N!?P|Ha(?Gy42z_WREe{huN3KSTN# zhU_0)W$)RFUNc0VV~9LwPx_i>Uo^Dz1s2X74wd(7M*6PdMVic z4O$aT0L|PnO#9C;0~}m)KuaG5w*1$h_n$$3A~;<^=8qtg{+yt4o`DHGm;gCCMeG%FM6I#HYe3qQNPm!N4!WARx=c&I1k?W;PyiMj=~9 z(7{vwpv&%sf*FKE7=!{r^G5>SpxTGu12mDu53ZQ_JV7F2J}kvN~= z5;JfI*GG`m7hr=yP6gLZ42+yI7GANHcDeJ+)28dE&9KN{Xqms*B5#pl`drQUsoIHC zlV+UBS#ra_eYb7tO30Y%SQXd6(?)3zpXzkg! z-KA!ePs^^r_PySX+pP>{j}MOW_j^`LylvY~JbJwA&k84f%H*^ljT8 z+;cR1;?dOkm$Q~$_wCpvrezNvuS2enh}>EQ@g^B0I|G9t3$LQ2l9`rifUQTeS6p3G zUSCYZ!i=7^*;95E%-o+b{XoX_196l0$4uNC-w#4jefy*O_DA#|4DUM-+`T`zdq1Q; zitITY*>gC&>tJx(UU)SW(!K|dg4=e7f)Thf+8xxo2ZH>YcEW2SSY?E$k9-@pL)OcI zx>pUL%E$+{p9oYL)o+6Ij}S}dz^mrql@X*X`c%)aEJ&p39pYJ46MDRlzErn{iD?RS0M|+A+--C z38{}Dog?^=9ddn?vE&w{GD=-?9o$R0mAVAFhCFTQjr3(V!51olCX%vN-pN}5LM6-Z z6)n9}Joi#^@1fwbrB<;$n$Ed`+M!J1R-iR|GfBL>>y0lJM&lz~y4 zfl(5KnB_pnyl^Wq@F;^iM_eilpw5vd19ZGjhe1G}fnOVhgbWyj4M80xVKW91V+K(Z z22oQ65fcUxGX_Cp1|DMuAxj1sPbT#cKI=T~h;IARl@Wc1lILH}UwyxL{o{%)FG{z( zEZzj#jSJd`3%>uNa?|V5bQAw6{Li-GzwEOAeAEBSRlQ;G zKgggrgF&T_LA#wnt%*S_l|eLxK_m;bo>`=gL86O6bTg0A0Y3F7LfS9bH9=<(vFm`B zZSy`OgsbpCSD}L*{>mqQ4Af{~4-&Gt~YAtu1W$ z#gTZDJLQZ^=heu0uRpvNH_z?|A$Ef;vRg!5yL* z;H8NR{xd8DRYnYpL5mm-*8XQOo&sCX0691gGWZ5*T|;Jw!1M2*(}Y-HwGyNm94t+m)>{&0Q!L*5y*OA!pgG)cKeFJNG)*ZnUr7=uorCwQ;*w+a8~e zeXfn$9l%pao{ih{mfgu+b|Z4q5udg_ejWR~+V*(2@AdCG?B8`bu=j9K-;vmjAmm~&8d2pRh=sfQCLpGcenTKpU{e65Ki3 z1tB4QB3QQw)V+eNmxIg`LFStvBxL#tIz0qgzYRW571A?;kdP_~(m8_ni69JkP2^dz z+O2d2avg-srm#MO58uUtyGY2@5S$IEhLA}}m6QzKJd5ZkfxAmrQs-ZV)J>Q*5kv&D zK1y431Al##w)7Tcxj4K&%3pmyZ|wunB8SD-6K9_f?>gvIy2d1Kvb1Xzmu?K`+$t^; z24-E*&C3i*;KMDY!8H-+*h$b9574Sw@LmrFF>qxh4LSppSq?OI!6wVVDi1>J$_!j; z3|yKB#H-1`tI5Ep!@#c#T4Kke4;szm(FQFD2Q7rqWe_%HP;eJDjnVL}a!i{XP`e>@ z($S20mkO8PF5URFV#|y2&Ckm>KQG<#vSjlM$eFH?10qV-Jug}N%xBGepEd8fH~tsc z_Mc(@f2O1Vd5`|*JN94d(0~5z|CKiWSKRR5Xx)F+h5s31-ZS{zX9#%05c!rNI6 zC;sP2`_EAHpSkouL*0Ld#{Zy^x3>SRz5f~7{&T0C5y-sgJ>gE=;`h!5yO6{~6|hE2D+~8J2)Agk21}zFc72f1~yP8Eoc) z$Lo|}eIm%(Iml`&$S@sdeFW(fL24ofNL>S}j~Ip71(cYDG`J-7Ii*Zk#SEaQq=I&R zf(lGFULghnGiG5+29aO}iEvP51i9K?IEXfDQYEA9k>Cx*P5ws|*i_h{JST)WY!W`lF}daI&k3K2c7 zwHtGm-O5~i-M4+WYyD=IhHb8m+ufUYdbjWO?cDF)ywkOIvsKXw_xi25OK;~bzZo_8 zs9(pv;NHW&o%{W|4g~fb4eU7@)ORGf|7gsV6RC4AR&IP+wCbKuW;=s~8F8z09IA~1-o+^SanFH5HXfs5RYRIE% zEppEYxf*gWTj^H10)mk1AY?YYK7ugdy`-3FryvMd7YWi`LaUG9TzF*!VIWsV$OCuS zsw6~x1YhI;sfHjVSnyg3`0#Kz3F#$4dQ7R{k-D^H*VC3=Pg;5-Y3YrWrMFU+-b!0` zFMav_%oTStmfrzgP`l__^31a#UHbwmR@tUbH}I4$ z*e3s1-tgaK+kd8({|sSI7$ToD#Q$N4{=pFP0knar z{wqBEul(e{>a+h!&;H9l{x5v{KgWgtjK}{o90m7}w*6;a_8&C#RP>4=^elt5 z3`#v9q}t4&(!`=y#-N_hAnDB@<^h@$XESBs&}LxOU|>-JPrw;6u-GuL6|##rams8L zQa!+{_DWRmlYlO0_>NQiKd1J89s>~KHvlb%;7|vx#}m;1FK+f<#`eFo&3|!l2S>*1 zzp&$f28(wL@>dx+^BEZ8K^L`42QtV-gYNQB%wmwwU=UitrnZMo<3FFle{Q4y0_LEl zbc)XZ6+QoJ2md!n{BM}_Up?->Lim5VfdAs2{~3J#vxfXk?w&Gptk_Rr$`}tfZG3eKd`pq@Y*{xS~#EK!$(kEI3u8EfYXIS~4VHN1~46Ysjb$9${@Lk6MTF;;is!%|aGLV%Fkhvj9eFPyv z6$*G>4A&477pJ%yue26}kUEoyCYz83yRarZzZ~fFZ;-Lf3=CXM+@cJSb0qlPKsOGH z1v7|;fYy5p1v2mjFmQTmE~P=AEjlX$(3NoRNc_OOQoKSJyhsE^EF^!BVHfMNWl_Ec2II<}bC*TWp=T zST|w1xL2b=+N_*K*9unL3F|xLUcc3=X@^(iF3*OY&NW+|YqmPoY_h9ZryM)kzH)uu zvO76TZiV!LIzC=4J0QrXZI@5$KA+Zop3OTQsy4VcZckfqt$fpyf0<=muW!-9@WyS)y@wJeo=Bf@K6%#V#2FXkW}J(eb|!l2$%skEA|{;x zp$W&}br7T`3hz4}4(=93^d5`oJr>>r?iO_(4(m7sLg3y|P{*F2jy;f?2vQjZw}Iw{ z0^9ZnwC;nVwp{^jXw?vCp2)X(yKnP$NbLjf4f!^JR&;|;w*%cC3pyCQ7PKS}bf^Ms z_mEf3ddP|e&+2tvHR~Z25Ts88se>RST89WSOov<Ct)=LmY%5Tt*E zyhjO{4X=jK7|>d2@wJr2*HV{WL#vNcmfT5QdN*z9t@wGDgL@A;RIM{en(OrSS3J+O_G6Kih)fUgjnT4J2=?YK&!z;ta;UZWo^>T!y4RjXT>yc$eMYo za>dQ^b@wXP-!EGCsA%1zy!B7u2y!|Eq$Yxr;0rD@*S=0){Zek-f302rg)aV=y7^!5 z3g`p|rlbEE4*zG^`JZ72xX_#NpP~3AL*;w575~k4{1@r^&ye_@A^ZzN@COFJR}4x2 z7?%7O+VAknFwx@((~&5~My-eEMJF!GD1p|2fZtZain$3$E$r{by+U z$dGi4A^0?d-Bt#(wG4){L4$Sry$sr&pvp)lg@Mb4fk7WU5(T*6BeT^^vMC=!jJ1kpD8l|Hb|OOZohl^#0Eg{$C{dKWp)So~r*W z&Hvd!tFr$y^!{g<07Dc1v(5a^FzLTY&MBFaOW`x`XRiL3vHY=b&k0`Vi3~bb;sNu` z3-%k4fp(Kh}zA-pa~8r zR`75flDm{YLqR>6up;r)j_8@GEjY=g}BxHs$sA@G_6owPajmFshs z+{#&UD`L_K|IYnBZM%Hicl);Q_UqW=-*GUY^N?T1KG*uKZVlU#=3Hvt_n~mjz38c@ zBPN{)n{X_&|0r^O6yARrw0?N*rTle|Gp8SxbT4LLmt$ZMWdR)(3ken?cOh}G@QMpb z87ONznz<9OQK=l!%4uX@Qg$m%x z$gg>iU-KSNrPHzlJ}w7dsIbky2~-*RG=YZZ{2I3**GHi3Mc^}6A(KR)OUY|CdR2pt zNQG279#v~049E--q&|Xlh#+i8eFW(fxtFa(Mv!h1oQ#=t3=PFiIT1JI1cmhxS}!Sa z=GjDyDhW~z!AVGc1R>#-5fbAv_@E)sARgpIRu~DoJRMdmU4;+XL1ZBn6Qn+ZjNPRz zy^*>EbV+*V;_JDKZ{{qzkv`*WWY_+n+I5aOa}7f3r0g;Tv;!E#tQj~p7+91Um}J2x z{6h*!NR7h{UTO=egCL7+xxig}hzPts0#`|(ItWs8fwsnjCe0Z{KvPT1k_;@;AjBxm z07CK%EJ_T#dJK|Q3~K(OwrQ$Ct=1W{1Dbb$y6tl>XDzz}Ibb|%?W62<%^U8kp7yH#;IQIURyZm4D&VQcE|5*hAvx9e)|(elmoAVhDf9)&8GwgSZ7Z~)9GU)U&sJ4mvFH%q3!eDTKL2Vne+AluC|9pm^%kl(FLFyG;{wult zS91Ta;QAlbQE~b@RIBv*sAOa{~0ENmm)wG#m)TBz3@Nt z%>SZ!CzPtM#?HP|wBcj+vWJmV&I`NsGiVgb1kSN5-e=x$#f%fM&otp-;^ ztD&bDF>LtHun~k68zZ1dCP1()JFHiBv* z$hb6UXD&AbXjTg{nZ?1tAT6WgZJD#sDtD1}{sIsxTxwmo)H)A@OtTlthjtrf%*$JH zD}VXz@czT@joUq%cK9^!@oC=U-n7@PagRsy9=FC_W`(P4D>kOgyHd2~QS9`yejWS# zy7qar?C|Z}>({j}u=`+O_d&m|1K_Em-C>iCWiP*-w&+^y^s^C@PK5R!4e2`))^{Yl z|0o27_8*R&emZXM1<-P)C09%f7BC2zGcao~b1Q=eXYcRZ~BL|Fd`2pd`lfgKsvdn~l)CZ9nXC*fTr zto%s)<0S3_-3{BY#@2uTX0 z47o}I9fb$3kCNwJjGlTjwELh}!#3-jg=*nFk}jnz`U#*@efTXIShc_d^-|!PNDMqK z2RV0z3%p(kQu{#G)56)@44~#EA8fD=QY#6;xm( zSmZ#99E1$mRh-1FV{`%vZBzTg>Xs)@I9xFILjI!bMN4nxt$L8Z`eF8(hdFB>Wq}c- zZi1Zb3OUmi!iJJ-p5(20lDX`@>NDk)h}%(o><@PW&7rXyo@CL{t-b?@aF8>!e@Sl74f3g1G450@Z ztYlVf<_?3r~jYZ0EC!q{`J@B5oFV0%+cUfn}f-Akro0RBErs&AD5&;bY#)Co$76DFn}9P|KGNnC(z{ zz`Ws%Q}=bD>3yH0ANB%QR`vJNDi$RWo zk%x&xn1M|KbmuteusC>q1PL@oa9~32!bYp&8NpQ*Hv@+h=z?;7MJ9el2IxJlLd-mJ ze8S3H{PHY((ky(^(E3Qql|j~nQ8|=TJC;o=j!i3$S2KxUJ4Hk{T39DaSSN~0IZ(p6 zET#WY!kkOBJKqsVAdnoy}N&t8~MI=xN8y!)pu!%e;fLB@`^!xI`I1 z*I}@O8%_)i44f={vXaV1N~Zo+zUkg+t+5r;)7#e-PTWyE<7oMulLfQS<<2;rH1$m4 zl+!U&PQ^?)6*lEe@Whjm6OTtuJRUXSSQM-W1gQ-}dXGV>AqW`)?h1iAM14m>dk-Vm zM8RE$gTV7dAzg^&3XsYOJV3V_QW-(2Ay7>O-cRJ$vctD|yFY00_I7YJw8OUn#DLU0 zpvnlmUJle1g4IFrYRD6OC4@)y1`wlm184^m`0P{6`pCU<4WwJ-UbzN*==U0zvXw5l zJ4BE=$fbB097Rt&3PI=<6KtO>WYi8aixfNcBq+*|NO-LjGwl?fiV0pHL8>P>2_L(I zFd&^HWHK3i94qMFiaD2(=72`+z_-(0PMUWGhAu%WBS^)R1gfyEq|UzzsfHjVc;nKw z^!Zm)=3Gjhb~?J}U`X9&=iEh>G2I&8RWfD?YzkhWRc&mV;NduNa8q0mJov^3?g~K$ zM%ZC(Jaz^K$i8?81F}0FQZYelC0@iJi23d;u*0lO@?&`-`s~%;88M&(;=dVIStMBK- zP~OT%c`F}fuYQ`n`f2Qn*Rd;Ji*Ed_d-A`;qyHlJ{xg8@HsZPPpZ_xG23V0Z|AnUh zXGnR@5cG^84ZvVG_^I!JLf0@hw zg|C8;;PwB)H~)*>1=UL85B`hZ`!90mzvx}i8AJkC|1(|zA-x;_r4DNBhbWx$guzRWz)3__E66c#edUEr_5Td&|Cx3EGa7tn(E7-rd6UoPjg0FLDd%qzCZOG~qI&;@!FOAT z=>Hco1D%p8>jpaVPQe{?u!Dm0e|e|>eD43n{Qoml{O9fj4a6}_f~|#vtU!P)MVJp> z5w{35_{P8bKlk$gvQ?Lq>aWMozEiT{W5K#-;Zx7*Brj#qFO>3`=u)=Zq4~7$gc~w5 ze`ziM&$__g{!QJm)e%C zur6I~QMAIcaJfy%Dyza(MwyEYGUg@BI9I&(Vf56KzMcC)eVn#~KJACR+YUmISL*@S zhTRU;+oC3)DP8xxao6X#=@K+ z0#!q>E>g&ZV-ZtMCC)jMx#UXmh6mXzZbbI&3hvkv-MC8DIfsEo8+5mef*XsvuZB;V zQ`&^^>b1%3J7c={rA|4XGW$yEoU8G(u0~J07&{eI0fkLE5ego53!88(dgAfuiN|C5 zkHz#u*CastK9HISROx`bKM)4;h+KFN%Bs2G?t{VI2ZOo}1c7@-NOcfsh6u8O2vQ9N zv}_M(*$%0LAhSZg&D-H)ao&wvy&JdsG;Q-~LK&9>ZC?d%AOh7p;NB2~;ZeN-g50at zLy$-1I?pQTp>rOU>ktUGXaQ0mxq){TA$N!%T_nip9HcUGEm;n)j9iMBK`J8%iCiT` zLDzL4W|1HhNs#&|YRU-+16O?nshi;S4zBtL-eZDyl#nYU2s;s6B|-W}$#X9w*GKUF z5u`E#FKM^}t$o0!f~PIIk+R@g^4!aDGtNa!IOg5F)4pJ-PV6KFuX;Xv*|1Q~yWjLt!3iXp9ZNHqizK_0JzR7pJG1|fLd12hT32RhS| zSqQYAfmNJ=T^4i=w1^(Jf|aaksG(=3O=LrG!StBMby@v;^5>kdTz0c)`Gcb65AwhW zdOg#sM>(q=WvzOY0Xn(pLCz{rr30yYK;)`>d2j@-kFr)h$y)UUQXi=t_-}jjzu1HS zTvz@xocPai>A%30|Drel3!es0npgf|NPoo;^^PI(Ekocn2KO@z0cRPKA27}SFS-ug z?4S9cee-|8z5h+0{Wp62U-AN|N)oyPt&aq+gT?^G?*13K`(OADXg!1A&HsYeK#1Ye ze}?n_jqm$i#mw{CmbXo=+nI85n997&b77onlk^Af)$)L-#+I{(mk#5E9e@9S_c={hvYYKd3U&{={PRh~4xl zpZP0Er?2wX|E0}Al@Xu%e;#$vbxXqLp!!J06;vO|yZx7U`>*KoU%?r)v|B#nKU>Rx zp$Xtc8q;BG;UJ3;AT<$WQQUISG?3`V{{n0NE7o08th*LH{YJ@#kA>@>r!2bV*>H%# zyi(A)!>MGabKALq$+u-^{nlFkpKa5Bo*n<$cKv7F3EH~FvJ1T0WH)G8KJ)JX^2h!w z9RIHrJq=W+vrEdUTMEc%GH{7AfKH!ggLQ}?=@W945Er;91({KTq*cfq6GRq#;Ssn> z0@X)6(hO`eEIjZVV`UjQWVqyPmCQmV>@q~HQ@KqNIgR4kjgt8+v&5{6rR>Y4919dZ z%H>@OnbbqXol5;G*SJ<~O`mhUe9N<#nWq^ftr-~PS-9mvQ>CDT;2?WoA;XCb3=F)A zhJg;H>+Fix*px1Js9bGVvCgJ!ja}Iq(1Aadn{0~LnCCBzpMJJ{fd$Luk(m+$6=rLLmtihTpMmx`tM<7c0Tpp-cmQoz?LfiAFs9elgOb~z##6j{eWX%$Ugv<^@*bowu>mjugq&|X>kopKha)384 z@PoTrBA^jJW<>^GZ3bxve*F*`mpuLOM$63U-qkCE`wk?|IhVHR8t76d@D+ieYid{C zgPaZlsgJV2l~K;BC!jNnR^H25{UC1@2tm${$Xk6EjBe+xzFn~DUcst+Ijf%Jta@Uz z>a*+SKO&d^D?R+rbK^hr`Twj}{&QUaFMI{Gl~rufe}>fi41Nz70v<8=J!bHJzz}dB zgxoJML_B2Z`YyiWKg0U}jGO-p9{jI$>%ZE=|B_e!^IrSUcl|%#wg3Fr|MT7gA^to6 z`C&DYw55?B9w9{SHU|D%@A~b59IVDRt@kvI%NjX zEzc6*!AxT7Ayjpct)T4SwZu-`^0^G^ci4)<=p>^xcwJ) z{m&HiUm^q4Ct{oh?gTA_t%zF+uGyFW2OU7M0!*$1twZ47_@8(Ef3@!WQuUV;=iVz` z{~>qvDzuWYRWzRMV~F#{AJzopM4kTY(SPh|5^5d(H>A` z#I^4~`|khZC;uy7{cqHNg@N6Nfk8@0%#2+`7M#W)M>Ig{8psMy5pc5#w&{RD4AdWI zkOJ@2Rs?q{6~I-wEHi^38>0v#moO`zI0KI)=y+8wanOn1Jlc$+CafCak~Z1=Ho3f3 z*_>AS+%|Hdqy`&6sz)e9N=+CD()u z0~i=27+EF2gR%VJ78*Nvs1UN`Pf*^-&!%{dL-_`WsturXd+N8kG;H%|+~v`@%d2sh zOZ67J(sjwRE;sG@RJ#6YVD~})&iw(MM*_Q!2KF2e>^UCPdm^ysIHY3oYd_%Eejv2x zC}=Ho|IzT?!;lg?sOvy5s8-q+(6uk9YhQ5p{=n}2UM)L zLZ*kn)b2WLJ(wsE84mRx6&2P zrOP15sbndS(!`+q zh|Tx~5BPWo&_X-Y|1zfkB@99Jk+{Ks3FH6LX8)y4|I3>Hm$w8l1l0dCN&aWx`NY8c znMEE{TS=Jzm$&<`;0{7^?x0?hD7Z>u3HdLR^Ph3nf2P^6`e?y_hQ2t}ZR~6gdweJ06lCzrGyv90zi*NhI z=&ASZH~qKW06G#7ULP^-`OgHdjM(=5=i2w5W6yt~BmdeUkzry|U}91L*F-8{QW>21MHm?PSQvyD zxrEsTBpEnFnfRoc`DGY5L>ag=C3Ssd?DOQE3S~U2g$+aTwA?^6fYbIejj$^~aWe-u<1^&NrsjKH-LWX(3TCW3T@ zz`I!?>*c`J5NNpqcw6gUNPPs^Llo2j+EoN!J{N$rhsd{iCvts+G-m{=k08gK`ZjC@ ztxTu`oqY;rfNLUn1%$N@g49Qlp*eVc1X{5Gu8iEu*SM9hL0-a*JT&K2vJ{RW)ewY4 zu8AOQNPPq+;Z+i(K7x$eftSRCDkf0f1RKYLNWm*6NRJ6JhzB9jW|U&5pN>XC@X83n zfLBS$bIv8tIhP1V@pI0_&p8)6=X~rOQ11zRmD1(Jxloim=W5E_YYFqNCCW017q()19qPSgx2Gl=VQEuQb!v^lD8 zf5Obu=?gAF`$R~02xc$44@Zz%DSgG=^c8nAm*3A^ejj|y_yb6t1=_B=;x>d07YA#( zle79>;>wqC%btmD_^*HJzr^GJ!uS3&U;NK?<3I1s|NOWAD z8LY1`SeyYNvl9$9=NPR9n!|Ic#`RJCzm z2a#OY{I{tP49v=)twyX0;08SM*(}7@N8r;2K+}0F4E!N7ev=t&o->#|pHbus1IG&n*82=R&o~wTOB()%*GEd8 z|E0Y^NZ9AUkoSLv@c&YU|2byd{DEA8b$t9O``KzF8t+G)}( zdJ&qzwPIlt#KR}bMfAx>_A5kBQ%#tyoxDINWuZytB9qKTIvI0J3zwS2^f9Q0^B8BE zL{HRCo|(J!cK-SY>C12UWOsu5Xzbt|0U2|Etgm1OPh)Y&Xt@Tp>w{5>) z=b^x!qrp8#Lwb*e_ni#wIS#2`A}5?opLacO>bZ#i6OfWTuLA!j(w0BmSAwf9^QM{zwe-L&wfZv6x@5*zjJ@&)RP%YZp6)IXGwL7S5dqBrlpQeM}jR!oMcKEjM4es0<(RV0z!tv<7qY-^aAk|9Dq!UpSPr>^{ zkkb;P!N(?q_Z<%dxgWGk2)RCjY!|{L!+Q^h_d-YHK!b9yRSK}0XkS<-s9O}$z6V|n zK@L?2Y=`!Yz{deWPgDqK2CbU&ZQcp0lE4SnA+MB!tXqKeh9IMJ5E8jQf(*)eRBwQf z%fT6-3aDzWTh&?+1Kc@st$^-tb+1?psf-}?5oB}@J~Rgzor70Kj>Su`)rCRTGx4*|B+fY(JL^pBtTWNG&c@9J zU800pA0^Gc5;fyO*pxFNy~n(o_c)cVGD)AK6WJx>S;c9Z$eYR`gc5%o)>JGW*`JCrr=eBPqVS&Oe`FS!OeG!OZ1rmUs+;3#AHos8vo(w5&z zU3NQT*}aTq_aLLcs(3}&YoEWpTmH-pO&hUSlQ>p>@Yi=F##_2R$! z)&G3wLG=;$RZ!K#1+J1fum0z}3aXFTu7W0~xUT#cxb$D>%72Cn{~0cV>LZ5IeW3es z7;Kn1jhHwLL6s4cDyTAIR%2k&03lXY26lA@P8HA*7NA`okj674B8Vg*fe${YO(oMH zX*rkMM+WmpoLZoj-ZIvp%7tI)KLgKa2G&;$obMR8-!brhVBq=6!2bh;c)l`ly<%W{ z#=!P~f&V49GJ1U^d7q^#HW)fZ(AknKgBS4g;vdRk6BNB zw*HSj_CN5@f1SA>3}R+6h=qVRG^v4B*RZLvOInz?rN(z|_AOs18#`SmXPHIO3agS; zM!73Y3s#%wuQASEVNC$jd8N%fsS`*mc;ec@OA<*RI1+lTJiUIuYJ~G`Ra(~>}1*&KQdk-egy_mK5a_qEY{_VTHo41E_>&QT?( zTLh_(TuN7=RYtC`<=jpsOC5?9I~Ff-C|V4ucO3E;AtSgTq#A;gMAk=;&Jpq+B}o4W zGH?f(SAthV$PCCj2Q(61AH~c7RYnOjPsf9+q}W+!KwTv8)Dq+#rNr46K?@sZo==>9 zHgU?y=)S|gEn6JQm)qsc(v9xa^e&aMNfXlXW0J9B;8$f}l?JbogESx^>x&q`%`-^8 zKwAn65rmWQDhX2Qz)48A2+|>f9DT+E8ua6kWf0V7RA! z2dj$d{pXPQ%fR-Uf$=^A%OeJ!=M2K{SY`gP%KT%I`OPZ(hgJG3gV+ZKvDfT!pLsPw zHy=UzM~WW*l{`U51Ic;+7Y8HGu>ayIp!Id26M_1{$NMY?FU$t7f&-m&vEx7UcF=+Z zhFzd_aZG#ua~=7wci=zAtXB?Q7gN`Nn{fVr@uuf3yWh^g_CK=!j8s6cQ^nqhzRP}d zo+hsUA-fHrjvFf19q)z|ms653WkO;0v zG+>i5;C?!zC_A4LgMc-MtOtXBjcC?(hUUj&vwm@|{4cl>v{*uD-+!S?{}mqm=Q{tN z>G*%K-TxUH?lQz|XRw~eVAH_sGr_WWdBLiCWm{i4HEvT(U94BM!K{3%amgn0vR!7S zJ57qWo0aagtk_~xz1^mElSAV+%j|g!@<9x`NopaTmPKpR=HE(Lc0FX;ac&JaaQ_HW zS2BV}6d_CdAR9}C8MqW2vgT(`Jq4+5K!?bIDx>3pT}Ok#2;44>2vQS4*pNC1LPGjPnDr5~4gzm$&*h4M8nrK=$8 zy5Zd-cufQug>x=i3_;-IIG|l2p9W-LkA;m1NBcfp|Z4qYRn{5--f=HAqv{laF=Po|^)#cuu=zx$u>3TXa_=f;2GOW?+Q+9w9@>kQ7f z8SJisDkIBFpvnj|WOtgu^aQ9fa^BA}?Z48N|KgYbt33i$Mp9Qm-6BYR#0~2c@qmZv zAmeqQ%INBU{u`hwiT~Pv;amS1(l;=01hNacG4VPwNZ2z-Ixt8%GRe9y$-02pLRJiX zW}uV$Kv%SDFtDqDDij9t>LYM_fPsg>Z?;L>WzNL^46e`M^^u_Fetpq>%$YZlSZ zd`f@Cwf{>RfRKa%=&n66o&Um`pd}4ry8i`rQR*WVZ$y10>HD8E?7u+le}>}U47K1x z80LWbN1&0q-Tzq*{O33fsxr9tf$Ag3Y6P|epnAmO)PI?E|D1cTgwB7~dE#&5fv@H3 z9&{c2oV)meLE2)yl$B8vu6j(nAF}AR`p*rnjF=9CDkD&pbO>Y)JGe^X zIPzcg;(zxY{~1D-GBD{gFsQIFa4~}por7#T5CWY;#h}f=YoS_oJbcYhi8+6im;Yy3 z@}C{D++q#rfI*Qfpsmmh2mY&``Y(Opf7qV?(v5c%6P7qN9!*S{qH?=cO-By)*&C7P#*X^>c+2Pi{$FXU_X#-5p;b*FSr&1kH|r4 zB1km^+B5{Ni9r1z@L6-9%BbTYqz4q(zB{M`Q5nH^5Bin)ZMY=wb-aNF97L z5xhPEt>s4DLj=(Ef>|Hg=Pg98j3WCFqt!>qH4%gj zsi7d15nBHUt&4t&Bz2 zQm0>x={sRobJ)E02*d23Vw?Xn9RJUK>%ZhJ(9{pxCD7!N?CJju)BiC9zh`iN%;0#1 z!R9>ZyaMyH4CW^otWGmn9b(Ym&S1Ght>U@h+W%^YKx^t0?|_Ejgsy;Qi#TEZBgl9i z?^V!3JD#imd9H$X$Z}o%FL3og&&B`xH~%v%d&i(#${?G{X$V5RCdq;p>B1JN{3eM! zx>3w3flM-Ppo^RYO&E9$82GdpxYa>5JE&ZR%raoc7o>TBDGu48EDqA1xY2mZ6W;v) z3|`MUwf_s5fG$E3()!OR{+~hgFSp`5AOt!A)O6~@+|3`W5B{&-{IX=t zqx5CBy*l^lq%8DlIT$kgUiiF+Ml1g~Z2vEI3}gb!A8D@UsPv4`{ax-?t(}dM8($+n;Z{K57xYoLAn?=n5 zi<$$*l?N>9j@q`IG_N~s-MHVe{itK>LD$Zs!Ci+X13DQrQvI6td$phT@41k&_*(qD zi;hVx46N#)o6wm=VAETW8Cyu*1l|oH>h6^_Cu+j6ppJdM9ft#ZjzgAngStpPppH^N z=Mmr5Lq07BLwk-#O*|7h;dCfyWgY0mxuD)-kfTujyAJzw9rA47?^?IrwQhU#l=B%2 z@5D?#=iheFyJ@dS{jR{eUG60tw6oXfCJ7U5y$oUDNL*&3mn}P=JP`gFY z%BTml%n!L5f>cJK;88?Sp9tI=g498fDWQq^V^&U0r-K*ERSFeNYY;}Y5 zfGXFxR<3cWSPeqpdIwSmK?T8gpu2!cXVAjA6)vT#oJ&?Zl`MBgTF4EliCl`8!TU$B z$_QE?VXchp^A<+*9g65Xgj_2@s-%bsM?zZnS|6t5E5&B z6gBM>q&|YL43C|8HhTK$$f+lT`wx4x>~N{tV3xB;Gon+{qmtJ=kwHC>LCh931Oz&P zQx-IC2HNriS*3tkb`dFpSq(u%Ae9jlxX;DTz#zgdpdzSZr)C+f?UQF6Q|FX3&8KWZ zP{a1<&I6GXPRCEX7(3%)(%fsw^RGexGFz0f4AkYxUUolg=>r5>b|+)Wt+WMKlIC29oqjrE(lNh|J|ChQAt&g~`{}Exm4)<=-pB1oSIvY3JI8lpbpz6Khv<2(o2d8Bgg zKSTCr2D3Ur@5$=^%?1%YI>AlaL5-Sz)v`|cBBn{)I+36a$YKtlYX|v_7=-i~1R-^`^_|KvCpIh}mr}BSJ)&D#i|2eh(3mEWmTl*+s{uS5SUFP{~e5YKG zT<}-E>ilI^o^D6fJaewsR=K6n7WmNl^p#yxj-W1SkK(0goB`<=`cxTuJs*HG#{#U;6 zpJCQ#;p_u?ZMVZ0-H%`OJYmIM&xyxus<%5g?Xzz_ZdG^4y!MEF>lsjG)O6gbgv_{-z38?_{SF4zB+J}Yfqj>}+E3@LeUP#IR&vWSMkN=}HRznu;4TuR`^E)} zM;`FrC~bB3N=SX=kF=0G1iX$e7*Zv59QAEI6gBZo?vne-v#y5qp78HF9Nc$2sP~wE z*J0m|gFfvCJlprXH0`pl*l1m{)}wJx_=M9@6VC*99rdW+ZiD$yWJBdKI zJb>?dfb3lj?K=)RIU%C|1Z2fRMDH>9NFAaw0@p|IItapm>>+|zM!~S#A0YJ+s4_yW zk08|$goI2IAu6NB-LRV=KzHO}t&gzQM9>}(xDIlwS_`R+++jDgLVG~q`Uqb8K)OPp z3aAWpy{uE|Dma4FN6sZHA@z|HY_9(>r}YNA%6j`n#ewHp?%)MsNO@!D7x=3 z9L4m5mV!g7CpZbIry#WwG6|`eaIJHI)Jl+=2ts15kD_6##1m$mNuGTkbiCoTQxOx6 z1a|KAtl0=UYcYO`l3%lgO&W(z7?Z3G1HT^l2rqtcIY!28IC7zf%tot#Am@O=>m$e% zk|>*y2EUqAl=#_j8xs&j+L2CHJ!y-btT#GiLI+ z;I6~|ZTr1zx7!u0vQD0^8``PpRViYdBdiKu4syKVh)H%wT;UG+_jqGdj*-bBaN4HG}yU*`5~`%l|8#`fvaI zzv4a69wJWg+z_Nbg7%EStL*r$g3bjJ0C$UcuYpGDxG(${zw_UA^%sVu)kc}CtqM2W zm#%gwTVtEQ$Rus1PEd=KLy@3aDu-?iyRt8XgcAe55eNzCfR5Z`5d#-o%&@h=Ot9s_ zkYI=G`GVI+knR!_c$5!vv>-o2(rUYIsA(F-nZP)|qN@V~SnXor!q)qi!{|Ekvi z6-@uj8UL3u_%EUNUsMM)Ovk7BpHt;8r}9rOm0x`7pq)yP`bft8zmhNLG;g_p|5E<{ zr33!+hk_78&OfF~Q2&T!4(OCQmaU+L2kb}w3!ejBQ)uw`zs7A)|3&!Bf3DO2*-wB- zzVrVL_x~4P`onwftEe?UYqor@*zi7k?Y;Dsw_>JWa%kNuXt(cbaj zm2;Ad=J!$5WVqV^u~YTrT-Z+E-@rOVaR>QQ2U8t;(y+Q|0U0Yjv->$_Mi3Sf9B)= z)vo?$+xnle@Pb&|)5yhllGi@V-}E$O`YD^nLoS`C>^lxSbRKc+J?lF0V%+?@2@CHR zEPs%*_)g}`t8sm&;_5b7M07C-xEX|YM@_r#)pk60?bE7VpAskT=e5m-?s^6_5h3R@ zaDW#%Ko;HcF)+yTC^$s)9*gQf;oW=Or|*P+|4F~T6Jfm|6w-Sl7`(6nGINwQ|5nc8 zdyx}P`*$4lY261ozs?K1SqU;u70|NPr(vUU@fMSkt(py|^;$0|*B=rq+REH|UuD^U z=_CK84*!=q_Fw$qf9<~i3`zg_EdDcS{pSTE7R~=Gn*Z5#{|lJ?w=cOG)czuT!pU&h zS_Q~%Bgo=9(6Vsw5krvb3ABV8wxABQvJQTn0{DD6cy9=@UJf!t1X`+%+Bt&uhFbT7 zCWgTG;=$H$gU_7IY@j9BO^!R<#y# z;v8gs0)zy0jw)6oDx=a>PGu`W2wWdQ`b5y#qLP)a#n3ITNF5^Rt|G|joJ-*%Cs0ka z08$@8svbCLm%G3&cY$rreA}G)$dyr4?;&_)6y0~2lKKczJt5C7#h~u1h15zAGIHuk z$dFysv{R5>O9?a1#7{pRJN0Db#G?Tndt7QZSY$8Iik+n7*DPdL%&8mBAnysfv5rF> zbnXEt!9z9*A=fFCu(8%h5JAWZV631mu3QRClBPnMt}-@>nx1)v5v?}K6Kr$lyOu0> ztJ&n+ywksZcX0Q9&^GN!C!(iaOq_E)W$_JA&uB5IK7v$0kSYmO4K2M5VP`D9lfLM7 z=AzrF^R6e)x*RhZ)Y$WG+Ur=c*(h^~dfarm&^|GbCT`nuE~7LCwI~KDPX=Kp1{OUA zDHrL!Ywo+iOOChvXWIQ=;3Q~65dV450&Ip8{}~qkXDE8b5crP4b~`2HhH{8iA?HY zjI!{Ff`l`?0)i|cf$Tem6l;+EEZpEKi5IjS zB6Ob1>MsH{{~7#VaqIjSGX5`T@?Xi~zq%a=sak{jM+&B(DoNfHBqe3|Ur6&mx7vRu z*>4QuZy7{ib1427(gt0AByIa&$`w%`Ne6)HBfb!n`iONdXrB@5Hc)-Re&oNCBStWR2lJ{1@%vDPW{)~@jqbE`{1RYsyBbC+VnAh630JN|HS|uF;UC1kS;=l66|0ZYu zYaaSoh~^Glr`740HZVocS+! z6ufD7+kduGp!!JZ;(w9D|Jlp0Np`*nnRh*H!^^C-4-yt%bLl+g)qf#+$^G~hj}z9s ziCFR^X!1p?>Ydt2v!#Q&8FVul#DW-D%orH785ne=>`P;3-tz4{mA33&#f}dN6ZTnW z&jhX6V~_?f`4$9|T;O&iJGjZg1zP0KrDa{PGJVd~@R^srdyo6|o$%{B0a;%c(0K&3 z0k-3?SK}V{n(Yz2C(`ELNS<}oul;~q{SL>Ptq#>&e46*T)@*gESZ80l+9YG9VrUz$ zZ=X-k*?{GrJQuvSo_@=5?gP66|84jGmpBMQg1f<4AmKlQ^J@l;{|uV{dB6x#A93h` z?)>$ry%*B?3Ua7vWZ&_q{u7WTUdZ(k{FEU`eFRxh2im>_*+T>-;k_Zygc0~sJmd}$ zWTXxAS{3EfWws(`@t5r_em$&Hot}em61jSc?mLV;jt5~w`bWsC z9U#3VNCgx=@hF}>O33Ftz)u{Co_aD8hQRgF+4$)o6gw4k@}U9U`o&?cM2K8Nf%o+V4YtJVfKYz}DK6WqB!9CYE=DbNDK zxffIBUrbwgIb-qF%q7>d7TwQWcrRno{ftHT)8^kwntd&C+Idhe=r|NqztgjLg>CvA zie%>CUk7 zKiB^M!WaMZf*b0fDTCSn8HztJgnwpmdBEU&ox%1ZgV}KqvN*(Gd6+?G5nsYJ@iqU= zPXD)k`(OP!=s;BdE1>mZBkAVYRM*Z%Wf1tG!9paDI$EB`sK|2MhvUtq&e z@6O9nGakiEyd2hj*1Ku9bN5(|7Vo^#vpi{fn^s1-!caN z-zS-x2&aYtI4y}*) z&Vj0ah1>rXZvU6O2CB#TF8t>?_n+?qXc@fnrT@~W{u^!jufF_C^qMaT8-HeRdX>87 zNy5_m;d8EePdx88_6p<&%~1neTRLkH|Pa7v1vzv4*CXNc%cHCt71_G@1c+YRVxglT=L!t(=NwNxfnR{ zbjp%@F*DBicJAf3&1BFBVUx3CV320zRR*0Q44NW@ba;6|Q+^DhBHEF0rHeDBo{#GW z9R%dle#o<9uV=?zpO(X(jR!pI_qf;Y@~qqLRlg&7($V5YSF`7yi|E=NP`Apva;bG( zx2i{}kYO~ZMi8&23xlv8gHmSb!WRm=|BGz|U5_Gr@IT`&(2XiwJ3)Owi3$H1QvS1; z|7S4#&!zpJ16&#LX#Qta2O&X+hgy-p0^81qbzgzhK9KqJ$O(rcCmf3EJrdOmKH9YR zaB%lQ$id*ClTN!1Ll$gw2q#A;dcHo{7yfU)QnGdgtAPj=_QF#C1@czT_S_#g8)JkFCDJS@-9i)t(6P_6VUtbqQlpgV8d1F}{tc2YrP8)(0!GmS8XoNO_UvLh zprzeh;B!tGAvcsk5Opsa$QctmhOHM)130>T(45IqX3iblJK4R7xO70~Z!7avd z{pM*i9r71BRjhNb-R#q{3v_@<|KW%!Ct_!uiUY4{Pn>fue)^S|DHp>goDJ$Z>C<|^ zv1+SL;VPr_1$xm_mHay;oXUC4b2xP37*xVQXO9b5F|cVfFsU=Fw24v=tc}4 z20^dXIag(l{nxqnU-km{0D%dh)yN#j{&O4y4IDA-`_DN4KSR}bhL|4=J}($tZ-Tl= zX2%&UPBPdWW6<5sV7}I<<(KZE{~mY$8$bRJng_Y^A5tH&gSQ>QYa$2(GJgcGk9cqY z*E#c_Y0>NGnU6y!-wp0K(A1PV=SF!o8X!~Em=D&jNe_@0F%+fy?gg-FIUY4}_ z;F|VN+vf?3{9|^dXM%?RLV$C&|sYexJnWX2G>WK|1j$#)wn=N%WP<#1d>z#E>fasHwJj*Ci-B7ktezLt zz~GQ#V3!6lK*JjB3=CSVGG6K-eUWVk0(*{ow;l9o-{aQ03sf039`J75=UKnUyJ4qu z#YU&XC7u#s4|i z{^wo~x~+p@>wo4={~2cgWr+XJ;PQk)3p7&4rv9Hz{Xdube@-<}WyGrfU(WZPecu0& z&PySkm*DkLWdGra{)6Ft2O@h8NA`efBFLepp#8SsiV3oB2(p_MvSbc2Ljb|q(gn!q9OQ%_NY4l|D@0^{1n(cgYa-{OrI0=myfOlnf%%IZLDNI?A@vcY z8nVrq52=Z4vgg53c-Q{$uKke82u{LlCCt?hVSR_f`VK*ArEt`WDH66U98x!-)k>hc z33bR0YxNX6^)v*%+inRR|{=Z^{bY4DU`BJ7Bq?C z)AnLja%7b;V-VG6;8A2?mjSQz<_DE#pl$n*vI{Z+1S!GDCLxs(oP?MInM>kkU>0ED zkYW~4VwbTH)^L$B2~)94(DW`b4R0_{=yl4T;a=&&Xp zJq9ivP|t{2iGf`ibW1z03InGC1DhzgaD$w`1epqi)cBBdK_O?PbA#Ie@TL2ZC}04O z0wEmyaR!aosk{F-4E(Pi_+Q%fKbO&e2E{K7 zN{7{gzl8TcOqlVidehtZX%}T&&$8-m6Eyg*VE8(UT>LL|{=dwn{|eXsD_!|7aq7SB=Km_I zenxKiox10L)1hCD2YDNO|3&xyXFc+t1-t^B z5mF`X`_Hrww8EU{C}=!Z;tXg$i0|}&&SM~D{OA75U->V60kjK@_X=nPRqQrM0mIt= z43mE{bi89|c+D{J5BJXh(kDS~7P$Cd=-PjUC;x?4{AK8UKVBpp=@<fyD}A$PTSauJJLicT$8jmRamcv{X?e<;hDhpput*v+@N06(nv1H~ zun222^2o6YC^1N-Sv8+$m=EdzGA;hkvGTvz;{SZJ{|goWX9)YxqW_;kO2Ih zhQKGCLh2yo86wE-c#u=(AS8622z*;Bgbf*~0}svZgpiQh2ewuLR0n~sXNBF42dRVL zq<7s`2!agCK~DRD*F^Bn5wuD|8l!`(TY%I=kb}6P^-<|6$ixt2iGnjy=Lpg*f{^w_ z%N+`rITS8)1|yfE(k zB6|)-^@5h(L8>G;iKjk_m~bQvj3D(9@|Yf|GJ>6k7Xf3S)klyKJV<2(A!DYT0^L;A zvDd$8yJy2Dhw_yU`3o%4XPCwJ=!P}x29&9J=Bqj-N?U|W8F~w8I&;YxGm2_4@F_5G zNHH)$&IRIt4e4MOVo2G61SVv>4z2-mdIKA{Y5)%_u_}TVFpFEV$~&@a1__zON;wy( z2ha63~}R?SO&$v2Oq{}t~2m%I)t041)12J4_Fd~O5H zbTBOW&oBeLaVGaKL)<$Czk3XJ=NL>6GguyH2)bt2_Fru4f7K)ZO&|U@d-PxY+JDI# zpjjav@S+7i@KFq$XuYJXp!$gQGU)IJ(JTL%5B=Bfy)IpQ+`DkQb>echkZuXLS{};#=lo}g`oL-YU()%%p%yEd{*>(A5i|lOEf2IS_`iT3;f8JyNMNa$|J^7#S^ndOXAZ0wK|I1wbFMR=YB_e!wPvOdc zu~Yx~HbO6E;#v1!$%`{(8@0r+diZ(y`Q}DPT=%Y;)XGx z)((RVGrzI8qLYSEuxmhNLiUvO=5;kQ4wo;vTDs_R(%fqav#*$BZc>g|=-+uFa`L6{ zsb@2n--pykaZ~oVR4ww(no>XSM9r$R)ho}Ww9W=M7vY(R0TjQiDvTO&N}lx^{_SG6 zrJykZ2`>h2Yw-M(DyVnHqyg$ygT@Jkz}Zcdfm@YPL{D1FiA&Oukw=b0Sd~FG)2#k9 z!*uY)h*Q8fB$j<;$av1+{(-^z34=PQJ`z+0)khpk|JfD)Gb{XORRY~>ZyfR8tLd&^ z+r#jl?!t^<&s5tI$tNrbdy4$>il)H@IoUi-iqkkL6v zZwOKoK}g8WM3CLAkn^`8B%~&S)Io64vt~1-KJo&eeF~|Mpmk8ydbi4Tpl(qmV!{Yg z8M(lY;C3ln3BMJ>xnw0;ePmy>92waaF0(IMZU?T6;PsJn;W8)Cz4Iu$Tp`sEgtSAd zh8%JiK&l}KiCh!eWzVzCnj6*$IwdI_+)IL!@LDOn_h5MML2%SS>n2Ej6o%AAf-Jp* z^q3;RtHg2DN03?xPQu6WAPiW2bP{>xL&T(GkrR)G^&Rr>23=p`(Xhp_a;<&w67%di zMoAO2qB>MVYGu4jq@6NEtP=$cgW1$QS>$aQB+M9vwLqgltm5GA5@gZ=vLg?(C_~N{ z1lgF{;UbVdv+SUDK8qLww=9EzDw~u(pO&k*v7d@#nwEQxs!N`VbFQLehKxC69`cR?$xw4VNVc>r3r&Cv6ochP@=^`L{P8QOm{^#2E~{%!cjQ1po*>lH)6 zD~49kBro44&^g{3H~t$w{jYo-bfkm8HRy^3-YXymq&EZ^ql2unE@#9hp`I>;IZ$QUdTf{x374Tt|a4*!kZ{+k5-w+#PplKI~v_rFH|fBCfUHf;~Qr@qhM z{3w6(qnwquGnQVD?0u#hxrtNjzk=g`Iaknmsp158XIS;0c?+mMVmthw=Pc+vCE=U@g>U{BxdB2#-~*Hd&w;8W$xHtwFM$>|aGnAU zgbE${&*nRcfx!_v?h5w@hyn4qC4n)4F9_Fv}ie~X9z8Kys0p7hju)!(Z9KXW#|PFwvnaoGcxwgdWw zt8FT`dbJ-;oOdU8)r*q#pYvC}3!nciWX|K**>?i_FL<;c_3J(D*l}93V6#j8VgIgk z&TYs2CtVJiawc}}RmfCn^7K3Cm;!@nxn=rE zmc;)Ik^h;3|1-G%XE6BBp!1(mc)p{byGE&!q65L;JCqA>X>K-Zh)y$g6sjSM?_En$3_p2+|?) zgk1*V0p86Ds*k{TsN5x8>% zt&Cvx5oC-GQVl^!c!vnWfK)^FIrEWgBHOIFa1;ivk02zxK7ueJdq9;DB!b|z5^_xh zse>T1evq07jf7N85Hbq&AXW$$QWHT)to2d!)RU04;t^9$giJUR+`ZqweYaoJX3yGn zZe>gC^XFP-Of-n^FpQ|v@Gn(%Nt3pTlQaq9*YXflw&Id8<`7k91a+50LE~wR+~65= z$Xq!jFHo0+4CR5d4`^&$0Mz1Tm15x2WDqlCl6PTIbYqb708Il)IfAxL2^xcrlHk#1 zU{zrN4S-04*6cBWPF)7~dRW1E7E%*IOoq4?BEkap3ooc&pc#|1_JQEh|Ef3t%U}M_ zbKpPsG0<8sgERjb=l*BNe8SN5mwVNJ{&nEnODFti=mGVTMEm|rO#Cl0=fBFD|H9k< z^KS-i!?b(%U+>X>rEC8M!BrAuMK}KXi2pk18a~bS-x(5CXar7?bMF$jE91~lXH}16 zQVCw=W+Zm?D5~Z>wnV9|85grc}#kfy7ET;=0}C=9^|dQn>6jcUeazJU1)tI z@AhBD15_W$`GM*qkst!~k<2)L`B@Ku zswAevpejk=)PJEfpu?azj{j#n1FDQT&x32FbN{){f$B=$bKuJ85~wl~Irm@eGHCIn z*tP$XH$iOAh!e9)+JkAB%sv%;;8H3zKg_M2AhbZOZW zGUZI_f=d~TuO?4F<=3<|b=EPT#?DF6$N$Ed_{C@`NzicZ16`X&YM*R1#e;nBQGNk7uygmx;-Va95 zDk-%4U}(qwuujk=MDPj-!a!a%M@&tGRvAG$L~s&P6G2GW2|u7gIdCTkQ4>L`BuIVa zS+x<;8-g6F;9jv7as)SKeFPbxgV#im*&;lZk!$e^cxB`W8>4e5TxMUe6x1OqSn80s z*dcE*gpD~;2bm;-cZlq==RqnXTs0A-N($-Phm63n0Er7Q*?TY)31N1QAXO6DL=vP{ zf>c9TNl0Y`CQm>v^95ZiKjl;u2!XC!ikNZ&g5XQev}a#X=WhSz zZQk{p+$&ev<}a|!m}!_WQ758JC7@2xy-?gXUBo1sTRVtb)tyz|ky+dfG(g0o%)knn zT@nCyJ=noh;b~|PC$brI8v_5v{~R0tb8Z5i-evL);4f~k78iG_Ma72XDQ$MFMa91Ow3#cgF+RvI4PYF z7DYD}MK@MuSAJc8VeDS*2|=JJFzQYmTtIFy5UCLv=?e| z8~FA9D?9yHbpEgG`d`80zmnH~dB6XP0sn=9K!_prKT82a&jn?U^|_9LJb zZo(HqcjF3O2X&4_Zu}R#23lRfc@|V7h+h6Lb_KMsf&JuvmSg|Hq9UK#1!Q=&T0T1OHj}|7X|-I`#*$){gJwf59{VdCvUjI`yCJ z^nZ@C|9QcP{|x9AKIrLR){Li5Qnj7L$|Ie%jLaa)lp*be`{~)CFhe7@)x5ZhNz-M-4C%v1mh4$?W zpKu_e?@)N}!N}fyk-ht%^F*D8K!b9SeozPal(~+BLG1^isAC^wv0PyLUI+tn)?7$C z;=mvLJtKHE1ZHf5R72oC5$FIq*oM}v;L2ztd<7k3-2!BE4njgl7 z4!QH}bLOJfL38Y~5y&=cHYP%@j6%Bh;jfRd)=iK~3Q{paY9i3V4zQ&T#MDQSQ+XpM zorstOs*Iw*l@aKs`$@;bCLM!RNs!4V&`{o_V=0#$-o4^BMef` zW6c6s3J4N~Ae9%Sc@Jsk@qwx*&{zayDIlaXh1~LoT%Z8Yr;thtGH?MQ!BR{D44}1V z;!Z(@YHR)*Z2Ql=`9IU9|DZ{tNq<0p{CC1yB6QLj zis#aQ-6#JUCfs0;D{eA2?QT5{@!;F<^$6p-;NxZff1g*F=q z31>mjYD3M<$}!LQZT|~7fT|%iyZ?~-NY3^@mnLW#gIx4~h3)?hj{o<+^j~`3bEm#@ zp$i`tt-V^b_GuOID_gVB^S_4le|0V$_Y8&{{jL31p`3~hMDvKGj#t4 z)kiD-GjIIQum`jdj_(X;X}uu$Zd~E(|AnuEPEX>y462NHF8oKUkN7V9=Z~5XI&78! zK2(J@WeI^U(es#Vz8y3}#C`-+8F3u~4b!nf7TST=fwLY4^@#aTf$Agf)1dl@{mg&v z(;x(|k9bf07dQ>tw2NFHiCp_Hat%7qC3FjP*Sy|NP?Z$E_;LK|*J&#rrLA}rzw~k7 z^sDX@u6j+r;W_D^_ml@7Qy+UweQeox)2jcLRsUn#NzV-Dezafx-}cb|pzHs2FaOs( z|KIb%fB9t}wFG#?m4XH5%+jWS?~Rp$owN^$Ur3`1G6M^#oiQ^bc#N8f zhf_#~M^qg&pv}O?z%IrntjZ;>$;cxI8XITkV_@QCU`-d*>EqM;&!q*bk2t}V5tAxt zl89LWTp7v#XHogbt^Hp$=(&B)f1j4?J}uXSd-i~;B+vlee#qSuX!Q}OD+I2OpfyqZ zfxx!?@MUsvMsVA1I0|aTx0@AQ0f7eSAX7zt%TP^5C&R(gj@|lmn*&9?y*89k*lQ8-h+_JD75z=q&~tV;WZTU z@&aWOFRF))fSaLKYs8Z(LNGZ+*(O}oLe=|9UR z=(<*h?Vts9TwDLMZTv61{6Fi={|q@F7`)FjxIbX!!ZpkX7BCl=5z%B+G%z>2HkS-FW8iI5WF~gq|v}%iY zvXbvYW`+WSG2NB($Bz-pX==@i5`mX{;@{XX2Nx|j6k{jp>BvHTr3<>|4v;Q-6{^tZA z2gke~wC|Yf@PEFu|Aj7s#yJJSm60&`oF#!vpacDQFM;YK@vG4O5#uqCAb;!<@H%Qt zUtowZTiF$^&_4NJ{U~TV68jO*z@6Y>(7-I$K2S%A?cjfoBcQ|BA$1UB7K!iFe?IV} zlJMF85F~gGbZIa5`TyMKK@JeShL|PfzxH4B)_<|v{}rzMXIS}7Wy&k_<$p6)eMnyV zI(E^m@Htn4C!F)`I^ooPE@0*zy_xq-=0D|~^-*g1KmMiv`Ir7@SoEKH<$osdfq2Y^ zLCqV*lm8j|J~HsRv1%nr8AmZmJAt;Cfo^MJ0C$cceq;f6ksy1VA+0sYEFok~EJO`> zr3&b_K~@GvZcY(7Idx0WF+>dPjBFz8f(l$B%FO(7ECTWj9DEFn>R2xY)aoaRewsmpEHbo;$C&uzv*gF*8vdf*%{QcGo*WO zXwSZo?!6!c9yJ10L!J8}=bb`GXnoXyvTPf&jR?GWdl%$NR=<`VzRla=Y`?~B;7eLT zJ6j>ANkB+%@ZAuQ6S=(_ws;7|W15G7xAN$XD6m-Oy@V@`N zJN`?q{V%uazsvUj3X}gc#C`+SM`5oRqTVq?JYopC!w__rA?_u^%>O)FKqqQCzWc9p z_rJ>3|NPfL2y#j)W_=`b1-yOoDrotZ=(YdKPyVwP?qXoD11|2qWA!D&m+UCEC>wi@%P<^E7@?YES zzl_a)7M1@De%}ozfa)WLo&UuT|2Mq*Kk(>(jX4kPdk#4C9yClj&F{2Q$Ou#)DLejG zcKk07MzW6o6Zx9Mk_ZEc*|skGPM3>LY>kIO`*^EB_^~fu@t# zPl67$1JzK;s~8x};jYKa&|;7bV3_(`?+mCT#(D^JSFFe}Xnn*4S=ew0R3)(=2aVTp zpZd>r5>y%Sp8PL(8dMpHock|w?!Um<|NLh_{WVB^#D4`;AMsrQMJ4YQP;XA~8fXeq z;>>@YIiJ*~y-8mBI&<~M#AWxQ7u<-Sb1Qb%4cESlX7z{UyDqqI`oYj~pKtajrWya4 zW`Gt~gO2{#0y+?n@gV4U5QP)}8TvngC&)CxbE7h#`9V;91nKWXZk>P+4zPmP%W;CM zCrBQIw80=NRoKAdtPBiX%zP4{{t;-k4<92NXbhZ{Uyg}S7E~W`@`L--0fH)}+=~CX z75}pcxzg>>%??%EU7xf@a# zLh2)E9Rwbx19gkQJtJ`Kvj>etR6yYR$hUbr1i@A*KuS1?v-oc!*P(B$h~Z(J8Wkwo;t{}co{OnRSm)WL0|@GPb;JXf-F&h zR6r0Ctpc*onG3IpAhnNe)@*pyW1TV6Dt!h7S*A^gAXvyldr1WABgjqkkRB7fuLP-& zu#(7?6mpdWVZ%F0a0YxeID`Qiri+>as(s*<5rh#nnPSJ_%(F$%-^s1C|$rZCn6*CFr*7W95wr7*KW)Rf@HR-v* z8+<^sIgn{qcyR}*HIPZXg$I=-IYH~qcof*gw0Kn9ocB+9 z2WTsb;MxB|SN{uK2HhsWbm%|xPH=Z=_J4-XA563U>mB>Au;o8P?JI`R&ma``pCRNg zL+~qx4A2aS_<{d6FaMiA`7eAGgm}Rh7(q{^1GD+B{^tiT?dHA$nrINZ@}K|ue~t_P zT^|0I*!q(}+MfZmXI~i9Hvz5U5Mp4JWZ;x#m(=4^Fy&RU5mI;L*Kpxecji=cVN-Tw zl(YaH^u({tz@-M7&SR1UO$UJPmE&b#;8!(r;g!;15|HPTP-kG~0f!}g0RyC?ggmLl z3@fQ1u>&dJ%9KqusM`Nma{z5SQnmT7~WTiNJ6iv=LQ2YZC*5 z71)FDlm_NuA;A3ww&b{G01Ugcc|KNY#{h>pp9LLx$A9`i@0tJn=Rgx%e3wBr5!XfV!Go~#pxCed=eh|x z;)7xSC;15vwdcHzS^O+`*4>~fH+=doN!IR@u0LRQ`oHg^{|x=_89HAwbpB^({>RY! zpJ6KKvW&e1gVB9*Lhd1^{xWl&^TnEbL_KbLy&F8 zOgOShp8-MEY182dtv-Tq;k8mw*FHFc)IrGgQE>NuVyYxaJw;^2gnX7bq)GxEK!l?{ zf(+t;x=vG0MZi$_#N(m;M}xZd`LyitYux5pv(dd|xkK&(yYy*h@qGqi^_u<_+Md}e zPASSZ5t2rJB3h0-N|wA*ddvbU44e`S%tGLnHe}-xq@Y71k+KwRnT!lfJWSl8TvB>6 zIu2^Kk;YzWCgIg?nZ1UED?_KA72Wh-3~J~9JHGp`e+N_!2kKko9$t`iS>3s6G-nKN*_;f-0tE;1xC7L8pl8 z9Qe=B{0KZPCWVo`rOa_`$2Yg+R>`kTS?FGj8x2OAe4;(2j3zP?lsD zW963v)kmOB91tl!HcoLyPBD-yH2mE7#%>v!NAV_AOu^A09ugHxjU$HH;4hMlJ*3)??$eM;58Ar_Spr!793O)?ec5c1*w31 zo45NkZG#~1#;x#yILO2hVA21IWNtv-TmBSNl^Ak`3r1oehsm63bJTCd7AUa++a zkSYm6LTVq#d=R7#g0PWmA2c?kGO`Cx41q`F7J>Ug2ogL!1Uh^hbY&j?5jj{jWScz` zf*{opgtSSYVVg12CVd9H_Q7O;n*or<0F;DONf0uyb1xb~tB;VYB=~d^qyj=C@z+O? z?h`IQHP2WN~47|$VyJZEygA9-{ zZO9lAq^%B3`n0DQ8JPH4g%x;J9OTV{R9$n-!fH)pI$d*SGSr;YS^1me6sQF*aTT<~ zh36FLOf&WapiA6DxBll?@n2%rf9~G@3~3)3{BAN#{bO+aztpDx4E0|aBL6e^|6(Y9 zBeL?p__6;M&p{U@$X^2$O1$9NZP3OUaD60r8Pp+ykdV5G=h}acOQ7zO?BoBT%N{c@ z*fM|)!QcavkY+8U*x>~&uxAqntt4a>0T(sGu(i!1;2mM`bsdmvaUhLhPEe7>Ar4xL z$SJ|XBMF)>15KPkf}I&`3m2G#6i1MzHDu}mVlNK^L$;L094WK^N_L>r>g28eOS*v8 zeluA9XVCx8ko8+(DQE=(!!FR3;tZSr^Y8z!cJ05`^#2Uz+Zg0O30eJDbo{U4@?XvQ zzcRRUr0M{=4oTJ?v{qce71Tds%KFdH{)>GosNiK>4LTl;y9!oIDnV*6L|}vC9 z7rzKPzLlx}KilO0(#!s4_Ig!h#|U8G4T;B!n-;93dRIXVHUi6A6o6BGQ5)~G2b z!X_Mv=sOhJy+5dJr%%IX-xCv5fD~3#rxdFIIBRRI-hcGzk{fa^X|9 z6_7V&6V+zsQvj{RV}z8PkOC93{s%mM!T_1tplPMV$iOPZCZfo#WG7|ltKg7s7+7Hx z*=8QsEiv`E$5GI^G!nN!lhka7LC2SI9{wRQb#~8%i{ree<1Y*FSsCrTpJ2mVF;=2Il#pWD|onupMg;T)JtU( z232a%K_1939lSJy6ljp)9Z0Rj2^-`|7gCraV(?$y`oEmje;JGaq7IHh{7{u^HW zZ+hv!`IY}l*Z(VA1vNo?=@|L*12{p;@scI^oO&kaHD1P3k5Zbj5bkjug0)eyW6f-|sHMmtHWkC2xu zKq?@3ZwOu!c~ozJ>?eX$M$m2%cxVo?iW^=Td4kVafDFn(CW#;>)2uV6gKDMB>9(2EA)|AU zaXAPH=@VI{PQ$E>EK{ap)<>WVb770_Ahi!BiCG^(M)Tkm6Py9Bn~=Lp@cIbOz*-+c zPG^PGN)fPAhTsgy-G8yuPQ*?-5jFLA^t2ODH1#BS7wu`tvIo$u_Y;qT&NAvd0-C<; z*aKSgP_-U(&2Hfm+pM|P$x{s@yFmxXxtGa0W(%7q^6Q7OtNXDjIxt9Cf)4*-Rb*h6 z1dl8U!g@@Q%MKuGk7-&Z@iB5rFo>!1s+kMx`YYSUGdNT!q|TSv_uu&DfBwh+8Ls?i zIQpOA#DC7SpmkI7hyQ~P?@Il{;PaTl`5uGQT?X%a48hMBVy=qx{FmMfI*Ngz?jJ+{ zf9CC=b7p+s|F^yi8nNNO0b0hu5AG%Lz-EgeeIf|Se)T{5RnVn-%;)}lKL5`!`wjzx zDhPqbSRj2Ic5r0YFQlS@R7nEh`Up~af#-%8 zxfno4=5m79XhCMiA*}+qIm`?Uc3fid+$x`xtp7`y{g*KLFJb*(*5SW^%YO!w{|qs& z)n@+ZJ@B7p`+ue_pjkGFIsX|7pE6i{V6gbWZSr5z>A#u_6sbD@*Kh*OL&-S&mvQ*d z?f##~<3B^fd#1d<46VP}CWFde=GCClQLY1^?WmHM|4ZBgb&eqQkq~&g4Y_~BclAHV zd2s*e(toYa3k(b{;1ZRKlmsL#X_O$f=$FkAaIJdaKhr_bNllCg|1%y0-Bb&@B>f0z z7K!u3f9_MD^9*@UgVu@*oQ2j$GFShLo&)Wc;yd$S^b%-+1n9_)@^1_UZx~V^GL$}H zX!*=A@jKIE&@wr;{r`DSf>!RapZd>n`ajE=|6G?qcL8%=`7eJH)a_zu{=krMmZ9{G z?5h8=d;T-b1D{E?2sB*Guho}O_+s8nuSN2fnAP) zO%{Y$Wf)i`K!_O}<4lmA5oEv|lEvW5dwAG|l^8fBcqG)tlwg|zSVcgGO>uHD@$xaS zMF?nCGpK%LQ2QoidqF4sl|$JPuZDB}?Rx{jqj15Udqdj~hO`}koSzWXx+fUhfpU-= zhyvQ6lSJ^E2-Y)#E>wV5MvyT&$b=ERTLf7$hrDVIQXj!)=Bz%Sl(m8_EN01JYL(x+BxE!Pcf|CyUi)_INQUO6o$a$xbn~5MK zq&|YLQh+d^l@WM95oozW=5(u!X`spowLY>=n{Jgl4PGB%t&+gQq`Q$x~4CWQ!vu>2ZB`Tzk136fa9oEx<)DaL85=xMM zC8SD%48cJ}AaxUbmlnKA;sb9JgS3+&hC?blcm)U14hc}mY6lhu21^DOUj~7DV)}nY z4E_u2{}(j*FJk_m!{$GW#east2O{nN1rCD{`%dt7t-_}a-uoF0UozNz6R?EVM@kO= zRh>ag8q^%1^^vS2s50Vq|IZNnh9T!SL-S9TiP-BSiCh0AZ-e_sS3#qx@cKvueC!hE z1!#Sw-*=UP(F435gA?Kbyd*nl7LA3IfkBOdQJ*1rwdThEtVco9g-i!QRT5}b+W}C0 z#JV4}s)7CRfA(Xb`bgm1fBv(em34yWK=qO270|IoB4?rXkas|6k=OXkHA|^*aozk~mKP=REVD{XA$L3uM)$_{IOS*Z-^R`VXp-8h=PE z1$VvMzB0`E&$#kG!zR!$rs)3v%4h#ub)07qiezArW#!YBRMcZ&6K3O+W#yHD)<>Yq zNQQw~jDblM6y4CuhzmTD36Ec9@C+KbO~NF|z#$G^P$vOymM}B0i*XApaf^ww2?{Z= zMF{KEF{pp%HTe&!jI#fEHJtNqJRi`p4^kh2dPZ#rK$nBJ?S*evMLx*~exy%8GiYK6 zxh8_J!NYW*`UqYJ!5Q$H2tIQJnIVE#L-0-zq)LKwA$=kU3F#cU!KR8lD%ZJJAl5Cw z&zghG3L!7yhF3%I=^;o>1nC*!s*m6uBKy3BHhBwe^B39YErj=m;0#D*1Uiu$wxJch z4uWkU0mI-QE#5U7T}oHkPapyndrT;8fLC2;EUH-3n>p$nd{|qJn8T{`uxIbiYxyN90o5A8b zgWVMdn@bEXr`XcoFf9Dfw*hn-1M418!`0yx=rC}p8~=H)gAn9&AP5Psk9ewNcbKrNNW~Sm_TYG$PzM0m4RGC!NnoV6_~-{ z2_D&F6aZDkpw1VhwT#>w!WD=R2MB`3g*d0n>F(y&{4c2epIhrcm*#&)t^aKL{}~)# zh{gZcT=d^=`+ubw{~0pAFqr*dF#ODE^k2a8zm&y)1-t(W4xsgQO7@^@k`x{OD?0p_ zaQrXr@}JZ9Kda|IhNwGCc^??szOzjNbw$}${%79ypJzYl_&D*4;Ny30;;fJOuYp!F zFdPT+b%~yX(K;zW>bo!INTpz~uh_j0gTRANkLI0<=Sx7kn&(@Hxlf+id#u7ZxIV3_iqq4oho(IbYgUkoe%vv2&*xEoYCavTGl?rXaCH-k+V z1BV`?pt-5NA85%Cg8&1Q5NMc=NenbF%OuFa4C(X2PjQE=afJj68VMf3;}&J$6$fon zV-{lIQs5BM;1pJ3<(FgNjFYwOlkq;QANRng?x#WM zas>oAV*x_CR;&e`J6EyRt$Ym>frsWGtL7jz5rl+vh@4ATKlPrrA^bIweGceEZb&s`pEVmY5C{Nt>LUmjQY%5~BM1qp ziI7RmS}AhUk;q9$AXQT2#G|mT5NJ~pm<`=R3ta*+2~-)yOga_=Vw{MZcp|#*XmroP zh|Yb1P1}9zHhWgA2E|m)EVHBu1`$n~fz@grg^JEOiZ+SjM&Ux5u3U2BHGM+ zN}y|8m;^z^CSsL1WWy#}4x*gP3Ld8t;+D75T=*>Y?tkg~pc4QX&i&`O3965TulyH3 z^PgeHD~7Nq4DMGM>~AvIUT3hr1*(#4FEiL)W^g>tknoIQ&VOF;jW8Vh|J%LzZ}SXP zTu594%}~Sddxeb7q18v+S3m_R@0I^j5B_ss{Ldg6#K52nE=wWx5xmlYFd#J%q>4c! zAqvn0Azbi$Gy^-hO5z2DDw7~+IRt3ABR>OZMAV?L4CrJ6hSM79gNiJuU{?D!Y zpG6t81BF597lZ0w2D7^i4$nac$rgTO2!G7rc$UHPH-pVTKJ)(~w*O_VLG_WmJ*YlX zw1d`1%1)q8lc?Q)M%P~q4qq7}ZZYP*XK4A#IuSHH#kLYO^vAsqG)E(L0aPDJ;qD(n z>LbRJ|G6*!H=XsAK_D7D2rLZq3}_;ZM^uwtK!t%_9)!3Q7y+gGU}9hh-tJ#Dj$ph-L&O1{OXRL1`u)5Mtm`VCGe3 z6Oae3zEdo7h}me7{?fVVOHj+{z~)o_9lQKHb_KWZ03q=C3E&E77pO97+ZoWj1B`aT zyG7vc&raC59Awb~V$~d^Cc^9ofh(hJklqlSgskF*bcH~*Qq4xM>J7;C5u_^wC*fx- zxK^wMRY~y5sC*4%k_dV29CCdGsfHk=18mWP1NZ_6_zV%~5+d*lI(UZ&&VW=#kggEC zA7q;|AATnhgaJ8s4r_g6i?)*pRv%fVO@Y)$SgRq3AfzUOlhD`!pKbwRgO;6uBQCgY zH@sqkGvL(|q)LK!m*5>GI0If2!5NUs2$_V`M93thCc-3sym0AEu**@186axI%vlYsJjH2*2mMd17#<2F(U(mvTDjq zvE%=B@BQb$|DXBtfA+Kg`L6tzx&}gA3;#3Z++%Qk$l!b*bkUyuWzcoxHW#3z(;0^7 zrwsG|3vB_NbfLx08UTl)>XFkopKhLh2f1HY5NbA`lXy22vkEW|6o+OCXr|!ShFuP=%}{ zf#@YjGBGe{u?ssfNNy6;KEk2)i&OnKtMY$V<^OETf7w<3F=%{c*8js{0J;c?&+xyn z*?$=;5RwO9)hB2BU)~OMbDyH^e?>dcss>T#|NOT98QgC&xLjk1y1NAR*`)_vdKf{cd3<53; z3?{5R>Y&~$17tW3GVY2Tn~*6;WD#UGr2h*Vxdffsz{$ z4xQ>>xig^oXh8E(PzBV!Bd}vvV8^atlu07cNj^cXyTCOOs6K+%K5z!)`~=88RyYaS zLj)lqi?_k#Hu%*KFb1eM1gUo*l@WvuC*c(koZ(iv4paewNtg095ad?A1~kWAx(dVq zZ*GOuL2jihAPW^-VMi-KdP9!I%ixufec=-LJQ1YcaRi^G-~_%G55k7rNd#dd&l5pv zA50Q*9|UN62)u^~vN!=wLS}^^Gen?jC~b;m>SU`l#Bdy>_JNaTus#to!!%(cqz*zO z;Z+j68iF(Y+jqmOBsc?7H$g~veFR~^DhcN3Sz=`i@sllhI^rgQ;MQ>tei=(gr2Xch7+%X6{naM6TdQOAOh4~;sfu7 zfQ(ro=O#+n;BJ>nWbT@W%2&XrD1hrD&hw!9Na-GEu`xr-H-@M)po_>I?l9P22Hiz& zbAiG7JcHFa5ZmbtQ|xnw`TzMgfXH3Cx{um3Kvq>pw&B&ss}EJ95RsECCF?M zq@4^|*$XilStWk90{GsX84P0U8Du_isQqG7`Ol&DpGWgQpZ0%d{XeXR|G16+^PB${ zG6l^pNt%NO>*TEe%US=IwfQdxxklCo#Fer9&!GE@LE|ff-3126%M6j{8FC&mG<|^7 zN9-%0^%2jp|H5ZM^^p{8NdsiPH{_Ccczq;#6?DL<@P+?|bKf&a=Yr0);LrzM*TpUg zx^!kN{|mrJ>P~^GB(bZY!@wEVfa{}G|5q}l=R5je@C4{a z4Bq|!+4lYCIRZKzLg*Z*KH@(5U+2ny>BIk7tnwHbO*sX$!JCq}z-a@L0FdJmSE~cE zhz>HEhqN4#g9lV42}?3?3o~=g*tyH$ssbo2%K7v<9X!Q}iGJ-5x zfY(Q$0Uz*n;mDN{5#hs-YtSuL)KYyv7n6E))})w(?hU*tdPnGUK2qpqtt2e z`p7bMGQkSSG+`n#LaT`&TzGvH(6$R1p$+Q=wxf5TAZz=PNl2}PfA|hkJt5aekXi|| zVuJLHkjaRNha)B)4x4y5e9{p}j|sZk0X(S$u9A)+lTcak-!8W}QY1QCI^Z{bQIwG~7bA_x(IlvEHlM4UX*jDg3E zL0~SA%0_m@hl1L#d9?rY>ilQZ`Oj_mU%>Fcu*rWx)Bi$dpe~Y#`F}}M5Rx(fFK_i< z$@;&d)qfdlka`BWBMeF>7@W2-xb0$yJjRfFi=pWQ$Ao_j^Zs+L_|LKh)IZ`s`Cse; zv_6u&4O(RfxwC`!>VIDF!Uo~1pxsHr;2wwI#s3CNelaMNF)->faF{T#DS)PjST#Un zaeO8WJVp#0dZ2C*i!uX~BDnUKVqnn+)kj>m44gI$tm+I5@(c{ZTpVf)nv+>w*E4us zV2HfWQ1zX+_dm~~{}Su|3vBx@wg+@6FxLUlay!0LpgAMnbN_|Tf{^%mP_4nR8oXY8 z6X=`=-h=;HcY@aJu^;-+bs999$A1}AH}PHh&vgM*sj(gT&wlbh2ly}|$PsyMVIFCW+tprZ}=R5JA55YX$~i28Iv@h8_m)8SKKRxy8>5DLfQbe8eC)iGd*-yrD@4d=e4(a%2W|QS1DW zs&%2=`~6yW1h?)FZr$(Gw#NswKn`4jHSYo~S^#nP1hj#UQ-EAQ1i5<%xf((~=@ecQ zAy-3iHmJ!8nixXd2LY>(HhENoY9&aA2vQ|MCWhQ9*SS=I5OPiAQohEuVlBKTg49PK zvTUVe=?cgU5qzxzoPpUVf)CB%nk0gBlpy1B$Rt{2WRpG58g?O(HMkCflUC`oAaxL= zR>D#fK~{06P6pLPDU&QxCRrs-u}T6>8-e;kNfXRq$TYDZ6PYCRfe-rI4krVecffnH zAO>oE6wtN{txAITn;;D2nh3&%*GI?<m@PZ|BkQzo7@FeMp9Qnm5~s*GU5YQKs?|T-JDlIEo2_Zme^~c`iSw|f4STL z8PALL1c*` zkxkPA4{bFuuufuNJ<1?_nL+v&o9cfCwf`(yp!!JM;=hQ+e^JZ-q86YkNz4eeK3v-D zzqHwZd5iy2#{caVvxCPKL;1%msHD8s8(=N4$GMi=QF&5$HxC zSbf9~u8erW2Qk6xBc3z=1uymxoB22Op@QaUzO zaD5~R>Qlh$BVGpvZe0cjMbJ4I3=#|+MGSf~80?RNrU-pcF+^QwsCo|CWwz))|JMJU z`$3595NORd_nH5E=Rvz-1<(8!JP8_AV^{@h$*}DJo%6u8546XYaS!MqM3$qV$_r8- z@m>bqqr`m@bPy2BvHz^cL0A5<9R=Nu&U6s80SZ(ff!9&6ffx9Jj(Y_U;(-o#JqlU| z&v^Vl>*@bNyZ$q{_k!vpWuT z*E&^zDj;~31nLUGxoB^+lAPh*)2)RCjR6TGKGD8F* z;WZJw8iFt&JtIhE1gVeU)ewYXl|Bo(KC*-z@&j2c2O*)$%vkJQ_XmD5Rc>n0z#R(h*3l1R*0P9fQJOMg)sC%z}$1d-tEnbbA zy=ynPRjhU`S!$az7qSvhBcw^erwXzXPt+og&oEqA*PmO}olDLZbm%Qo?vi1#Nd8{L2jC0=0mY#|8?&CH+uG8@hV8Kz;)0}0r!>v zJYd8Hnd|^pMtooepbJK?f+niCZ~srZ`k%qN8N8-W1l%)X1XoGi;88pH5DG77JBffh zgK`vuQ3iud5Ce-g=*9%lDh0S2WWO_l=bJEnjVX>Pf>js1rv@1qXA5wm5T|9KAm7drW0^bF{H2BFjcd5`>O+5I0hf3)d8+n)dYM?ojx zG4BDL>BoHVKjX3g>}UQ9T>{N^iJk`Cw9bDHv}TX>5a=Ey?&F{(<&1|w_oqW{Vq)0~ zI^>*j7wDi`j$@#EHzZDh>NuXQ|HTi0PJb0V{9p0Re}<{w8C0?u80?rpH^7Qv#vcPi z2$y7yfb35(WzZo%Tx$P0)&Fy8{O3{q&#U&IR|SN4)c^CS|L50v&ZqrM$@PF$?2Ul> zqtRU#f;&MMKm@e!3~1j8IYbUZLTVyNs}fSy!Fxc+4Dh`V&>M*0m5yKi7G&gGw;2^d zZfo5LK8SlW_+Gq?9@U%Nt2TnsdU&4*vW*BbNd&2V+%VQEz_+qu?GQo6=peNZ8VRY4 z>~XABz`CauK3imyJrA-<0WvoPxe*VvB@@;yg4IE@Y(PCD5NVw@6OL?BX26ko%5(@a zOP*$yJjE<|ib>)mlSEJ*WR^6+B5{IwLceh$2pJ{x871_Y#Y2&4TrV<$#1Vvq)I<;x zYkdT%p5WCGoPkyoLAZG8BRs=*$Qzf!`VNJ{uC##HO=u$U`UvZtfRKs_QY9hRN1(yI zNk@?TP4M~%>6{|aeN2&)j)e6e4C&q*)VVvPZKr?Z7Qfnc?q$neOP1T^F0f3UY!uf6 zS&66NT`2FAp==v3V-_J};4h@+Dx_e|Eos6bq`}Co2s-Kkx_1e_aS2OL2{yb|9{o1{|z64wjF_PfV~0Qa0Hnxf{ZD_@0H-W3X;%4qDa)DoI3Pl?dcq7x)}Bo1m(VflIW4PqRiyw_3t9<=DyE9{H?Fo-8cl;5ju& zO#~+)RSblS=52@+njls#L?wiT^u-{`Afttl)9oa{LtbVK3>FLw$qWp|4BXooB#$tN zK4Xx5$E5L}#|Tsz30i>aBVPUg+?xMcl>Rfyer6DS&LFmifo}_gI$MeVDrkldxjy2)^xt&je+G?O za5qPpfklCVLxq9e7&LarWx&91%D`a&>L0PFGcYJIuqcCW5azOE;IRhPN8C;fd{zw1 z`V5THpnemB9s{E>gLDsr-FgO_6QKIYU=M@QJ_hd-3>i-uy8kmR`_HuFKkM%QJcs{_ zoc=Fy5wssx@C4`_c!tfOvng11{TDg`s*hOq{%1Z2I_QJpDCiO;0r2pn+MWN>7eVVE z1kQt&c7v|=1J_5)M?m!v3wXSac@Jo_67wF=DX+Yz|Fa(j??0OHjiLE1L(ezR2JKb< zdA9%O-SOYL>HvdW3V0D6wp~{&=@RN)B8q>-RY3I-o6>(a6%gW70#`}k`iN8IKZo*v z5yLmi4!^DAZhGgv^s7DWUw6c}d1pY|9>3P@Ak?x0QW^QR?C@{e3BHmQIs^n>oB+Cb z2woW>_lDp#5rlzU8Tr+3fmcT03TPwjlGY92BTqMaRBmvu+UQof!41rSS4NPP+wR~c zbMQeq(3B8(dI-e8xQz&YzyiEO1X;O_Rue%MFhG_r*yb;S_l6)0kfjcA z5_#;7___&FB_Y>G;r)jpg2>lfK~LytDO19sWXh@d$qz^GyHGFxL zoY-V+m_!XhM?J91f{%LO1s`RATz*1IQB0DXMcR&g>U*7Spq0IhyFru7B3J&iU;EE^ z;XmKS|EwoKXH7H2JY?{F!r*=nG;?Hq5i~|;eVV~!H-q&d4x0lEe3Ka5mpbkHumAYJ z?n7{+_bRAH5b;5E>Bs$5t9GoAf!eD}Zj!T*ff37`^^L5Kl# z@F`?j9poYa$V7l5ld3nj`$UqVqvq9(@Xo($Yq1|=RJ?4Tp{|nss&vFhl@n^XCKZABFc<@u3 zfk}mdQ=5Uqj)B{ef!~fnz@CB6j)C8vf!~RN&yhjE8HD&;7A>$Q8(;tS};J(-f(8Lx) z^DPFwZ1A~us^IRSCHNG?90sOF2EGTvs^7U4{_`sRXI1~tq6VsrKvk0Je|GSFMqJAO zIdxwOn0!zTI%%19*R%Q%c$3jCzXs5hkZ<$$fEEz)Yu)bKvJFDQ%QtXM1UhOP&VY=? z!AVFRgiIpWLB4gHVbu_*Lj>7O1g?qJ!w>d>55XZbAoY=3`5Jfdej<34801q8bfVgZB!sd^yy zhJ)Flt6CxJw_)U5jLs40J_zJ_BKV>O%ap0s$y2S8rdTI~7?3^@gfvNgojH19yGk04x7tpuA_f_IT1H4%h_ z^p7B9aOd8j4(KMPz|Oq^NNflfxsrm+B|%8gToTgx;b{FPcr^rP5YthDS5ol02AKh= ziQsh;vQ)%`BM|e$`VIwk?elNn?boskv<|0!3urA()ml(5JAbZC#x$FhKI7}4 z&mwiFba|_IdDBn{JwHJe2TmCac418hE(HcQQPBPx&`G3_q7q&{W0u2W0qI?uoByk> z2MyLSZ2!-C6m$a-+tvRJ=l*k@{m*dtKSSGphJ>dKeovwGk<|rIePnTh!SVou-BAYp z%?y%rMJBupx$xiM$$zz5pc~0WVf7KDN`llx5E4=!K~^$wUir^*{=eCS{|uD}K}Q3D z_P`2)R$_y0HDv%-sj#zdq#bnw$|OSONrf%ojNKuSvY)y5wm`!R){b`~)Bk$Rf2CY? zRMk8jbU_lxi*Wx!_T7VosDUB%J7iZWq*j7VTCszt@K{0DNHL0nr(MiIrMh4)43(c5=75fK=Qs*lnj(DZzrcCW$*T<8 zK~qR9d;hcS_z#*wItZ$gm=A(FO8jR)lZL8S|BD?1%{TEK19gJ9kNg)s^`HL;XgH30 zFKDk5^MU_t2fzme&Hm5O{Ei|2Aw%YEhU5pJvlAoh~B6Qlxy)Jh0)GIYubw6J^vXowHeR|@Jq=HGSLuVbHA%PxWNRU&RrNP5)kQ27y%dq(vL=72S`$VD^Gx+ad2;9Yx zbe1{qCPUo|rtYr{)BZ3m{?D@NKl|GMvh)8lgl=VE(+ADwfTpuqVJi(h(pep1~lR#vqi&AehP^ z6c0ke;S54y41!S%LeUJO2@K*%43fDFnzan>i&^|uF@zsw&AtimAF-|Y&$#tJq<_SJ z7SunIy!~JLE~q{dzwux68fc_W5ZpO})JFn0pr`I=Ec?%(KZAkYg+aiVLEMf(z?^~8 zmVv{Dfy0u4&76VRh=J9Ffz^zG*#yL9&}U#)W?)hRcPLcAq#9^JJEIP0MFg`x=(r3< zBL>+bal6S3CMy}V7cgk7VbEH~puUAcV=IIHAqLwE3}N?0%R#k+^e)iAo!Hs`VrM~< zO2P+0=W;Wy`OmcWKi{tZLi<3=1^M=Y2J{3^{1>_OpYu3qLjm`3&`cBaPS9dJz61aH z5BwK8{9pRWe}SFRsZv-fp#%NZgk~P293_K zss3kD{Lg7{SK8^lLHud^yjwo?2Ynk31vYIDXxtXsv^BJ8t55T0pXSYeOmW#<2z2r`_+THQxa_}W{kSYl> zTLc=K1Fz+V_lzLjA_xiZ86np}_JvCz6%d4k56!{rBisB%HsEu9Z1SM@LBJ~`yW9o# zIrAOBB)Ct6xEc>}{3+_~c#yqCkhODG=`$b~UMCP{jlrp@mdQ5HhfRH-v#)D+P5xZV~~F?;+Pm zSgRy>^#mEjgOJE|6NC+~kKhbQeFPz~)<=-aC>+~iL=Y)RWrU@31U~-(vI+t&8#U=z z*u+zz6HbQq9}Vd{6415JzjJR;>n_lJTQ%$5E7y3IEOpAAXPY+FG_Kb;yiv=qLfs`> z(JocqJWNc_S6IV|N70&7QkOwM1+@1BbgVIawF6}M3sNF;%2n=5+5cbW{C~BZ{~0&^ zXJ~%RF#R{nuK&!Z|1%u_&vE!a!><3J+t6~}feuaey362nnZf2PgY8++GH?Cu3|gBQ zVy?L@`fq;gzuU|I@^?W8GC=Ahj_WA(5fAK?2F|OXDhYJ3>gE4@*Zxa9_|FhFoq@rO zfmN1)1#+S?2Lq@!fy{z2Fz{*VxhIRqF5?fK#}KfSA@&$^-c5n(XH4B+`DgrRTKu1D zHK;z~Soh!Y@PG52|D6kFf(NM>UiVf5WV&-Dt6d8R3C*M0o6x!A3%3`%=^!_0@QEi*aIrGA>(xtw?LJV z)Sdt0H$X_}256*C_~w5Qx((_gv7i6Xc=EsGbXa}F|&ZL%j)9CGj5vEo_iH{$Ktq_>_hT9~hSYXIu;FF)@MnF>xLIFLnjA zUy1W5XiE{>9?*Of$1c!TBax&3Id*~@NFARTvhOfNon{C=4XTfdUkUd7=U?()e#?J} zz5j)G{1@K=s*ia0|Cc)RU+U0*hS^^k0{CCK|?N<5_xjqVM+ybwUpmh*Ztpw>GL24p+?SssK zR6yQ!n;~@&GD)x+g7l0al@VyyDtId^WcLtcnH;1(g49G164W6oS?N%`0>nTXqjM-( zZjW?ZE2IK~kXWlBNR6(E(7Nzw%5nh0JE!5NVH$RG|hRfN_vf^?A}G3DC;>e#}2Nr;Y8^ES-- z2sAbetB*iyDPd!G(8ZQWTuAkV+<^ksSsl>fJLKvKUK2q&N)Qq<%>=1~FiCi21Ytlb zBM6Bpf>udE>L5%q5_}Rkylw(9km@7IsYKwVchLGMr2j-n|B0Z9#{wrD4emMW*M7ji zeYaN==n_De@-=pa%PeyiTBOf1h@Yev-liT!Sw=z-AM-1y$pss87ww2#9n7u^Iz)3fAd%Wjh_A&ybU_1 zj`jL~wj0p-Bc5xZu_MSx9pvOX#%uo>!0W-dF8p_V^q*<*a|RY81_mWYHeLoM_>Iz# zH4MTG+^P%~brMkv7(#Y11Z-i5J;#u97jzMA*LQ|#e?WIwuL7NB#<1-_M1{OO84sFmm44`WbHNo4vRKVe*!2qg;v_K?iZ>$yrvo>e}9` zINxE&{vbQ)zx1~Mayvmw;1o{&S6=a-A@e*#$uow!4-EC+m|FiZO#Uyt>_6mg3*KG- zMGyTK-SwYmWt@$HE#$AT!3k)$A7~(H8d)ZX~v#Eg)Xc3(n zXz2pC3aE?3sr;Wq^&f}oKVh32D!wnxviI8-9rJG3>D#nBxM_1p^Oo?Yt>H~uLmPI5 zHtYg*g<#bXq*j8JY@qeqKK0wc2)dOOQXgThi6DIRohQk?<`UuiFg3lI#Y9jD;Mexc9#7G3yM9B3Kp2`Sb6B)$y7{sBB z(HTZ{!VxUipeyZ>t0WK`9L11s5enH1TEp+#ydA!d1(N}(jF9_EkctWG2@mk@5~L1- zkdRslQWHT$AZ$1Zsdtb`|#31paUT#n0aMDQ!|3rITLn?9{8_u8FU1V=+*xcC;y8c`_HfvG>y)% z=|AJP|DeM#=ly4&^`D{dKSSv&`Pcc{>V(`4kQ}WJu_J6)T|23}tw|MmUk09f6@NNJ@^FA_F~!GAMb%mId+yPTD;7`nc*O$D7b%Ch`F+lK$V+d)$jd?)`4p95`v z@xJ)q`rv=_#BOkr%LrZr&jwyV4z6uL1iV&)GayIAKn?)s1g$e?;s@2;9O4XIl8n4E ztbz(W66*Xidcq1u!fNIc+BQGaFJ{nM#GtkoR3$;`BaK}Qn!6cvk1-gWWNITT!n zgU-_sHTtg;`q3isuS4lc=ZZ7l4LiLXb_O?X4sO~EsgFYHcZSsO^ljXNh9H#=xEcag zM&Kh9K%F3XH3VV6J4BG}tB?u^PQoi7_|P1@CW2H(kg+(o@-@!LTZdqEkW=vrNHqi} z9l@O=cr|2SybOYnYa&SP1DP8_W<&Z!$YXSnn#ekL0lYqfteu0LJ_o6cAWP;T{Udm- zWR*SxvjRe{i7eBmTOb|84Vf?k9k!4HUBV4n#SN*9AZzE06Z;|kAj9}xIKwc$*C4J3 zQW-(mkopK-0ULUn=wN^szB|+Fh zUHc#m2#K{;!aB7Csk0ze5`=_QN$~mzlYv|xK^8E8MUF*IJPtviW9}v#10UvaJbL2M zs0l}+CLV^M$O)jO>7X?jy@!K)4~K$LP}c#!HqdoT?zJ0T%GcNyEVav?V-!C@H>_Db zpjO?hT-G61#yVBVFq%)tk3-FaNzxRA#GOE!@xzw8Zu>8D_P^$>|8fuii{AJzedoXC ztN+S}|1-3H;F|hhZsmXGdH)&O-ZC`3XXyRK&<$GHz*P5}q2en;)d!BrprIAU?f+FT z{kMDlU;Y7TE`$Brf3~Zz-A3T4B6xiSVqEiF_LE^MxW-%oasb0F(4KwX zQ{XEP&Vp_(5IOr_^3s3pE&oA_?ih@~b4if9&IQ36Y9JT(A)j##7GV-&V3T6tkY(Uj zVBk|_5Y=IlF=kURXI8f1&~W6|b`v!85j2SqwTO{%$W(SOQ1mWU3#d{KuGfrgRg3P_ zPUtg9n{Jge*FI;yS@JYB&w4K1WCk&R23|MNiZzh{28mP#@e~G;GzP(B(6o_2B(y#f zie?awVGvDWkk4aqU(6P;h9PVZWA0^!y60>?Ul|tuXI=$bgwJ~LKil#DyyrmCBzgn1 z1xf4%h%Is*)DPvp4w_dIx%OZB#((iE|78w?hPedOuP}%lWa>AqWFr4MA!m$mUk$T}AMk2(21|)IpHhA_xiT8Cj%G1D(_j z?h`>OBgoo0vlP%&kx|kFBiPU!WQ!|gMK^p`ky(77aeOZtBBnlq$0J(B1nL|$ZiDoK z(JCVl7ko`TyjFrR{91Nk_L$HH@gS8^VEb-=FhU-(gRlcJZb*lVKq@0-5>n~FNl3i| z;UdpHLBx^QKVViy$Rgls=m>-i?>`Db5fhF<7-18Rg-tjHs*ENaiJkQhb`O``PFRO!GZv;6#j1AmB5*O8Q5(}QL5WA2e zcneeHE{4n-tQF50TE4SP0QHPmmxI<*vF`ZKe(*o&uF`X$HCFZ}J5OnNIKpyJie5TA(Tk zQXg@efNCYs_99*I5S$ixW2^>f-z$SAcz{k5EUpbY;EvmjLBxSUzy`D(i9rL@!vblv zVPJP+5Upo0n8l#I4pblMY+}&a1X|djv6Vr4D}(+{P<>>2n89QhgVg~B&%F$BmpF=U zF@)`7uv!VKkGu{s_#S2OKLYCh_#9*ap(CIg%6T1w^Ew9iouEEc+-=^9uYz;`Yi|87 zvH!o&&i`yXK*MxA2mcEm`!BTnKjW(ZJX2pY6rEr&-_M}AkwNwqgZvvveZ(gJAJhYa zE$3!e{m-ces**q(U{ygAMr?}zMU6rCPZ=gWvCMemT6x^P`lNsTPUQN?uVFI;1=eo~ z1YZaaseq7Yf^OJbTeFq__4ohR*zuoz%YTlopba1#t3Zd6 zF|Ys6v=y`v!r|3_!`uHkul?r%@1o_t4%$V_2VS(mbq#dl5D#n@66E|ko?D=MV1usx zXUN|NsuVyQjv(u(A){yDj*@JWl+Q$l@cj(IyBQ*nFr-~%sCdcH{)1uie}=iB5kVG!|Tkn&_u4&u}f6)=hsF^iY9PLZ-tm3PTf^3GQeDA5eA z(Tix*k7+eY>@rE2V4gnNJZF}5!93fNCH7^@oT}Ej*Kc-j-0sn|19C>0SNTSx_?c36 z)eH);pe;NcE}$xj!;yh2m_a;&K{As;G!-;qBoxaa5D8vl7senQ4qELXp3UH}fZJ~! zL&SEjs+*$Iei?24Z@v#SEn;=(zv0gR>f8RSZ2qsl+Jupap=F=f&UVR zK-Z+Ip8l_Q<-f({|E3rIYp(szQ2kas;Ut4%7ie7^n;8SEF6cl8W-ZXY-k@0|ZE(Ly z8&vy%PWd(f4bX9zfO<@9=AcD)EG7(WW(>^647|1sQoall-k^owj3yxUpeZM9&>{y` zS3c!r2Aw$!`tuodH-M1NItCEh#Gto@!C)t7d4tg|2BTdJR$Cd|4l+a?VF=mB;Jk^! zWix~8UIx#D3|>bVypMvaArN~ngU4o{YMh-F@6=zgcN@xSoV|2+Gkw;hS@ z2kld{KKx%{#dnL|Gg|fgyeiMwWbc!-e9oqJg<0$ytHcjB$^U$cpsI&c4b&~-0F&(C z?M0j#AaOS3{{n`e#4UbmMIN`xyzW+W(4+3Kf8$=?`rZBw+rgb3@R=`POgp z1|!ef4G`o}vmP=K2U(K5bzzW zh^vT-S2z|ecPLy2sfHl+kt3{UWM90@u4pL)L24z)xE#D1f=mxt=PrQshLB0j%E&%v zzFqb_NS_FP;1B3PRFq>DKr6Q)br6JvS30;D=CIBYXqXPXoEvoH0<2pEADx5LM;7sY z7T_ZnAeE76TrZ>!LM9lS+`j5}pw}TwNzfrGrccckhQ(QV>CSeFR}()=Cf& zNUa1XA$_ID{=<>|hr|1iVpc{G6OKXZAY>A`J_?_BJZ!=-NKF*dcPI>0uYqRGkky=fF+kSWabJh_kI?EP*316|@BcU0@}Ge_3cR{o2sEXG z!0cR-`jXxgbmNzRn$uB-7}73+ra~b10nG*7Jj=KdbmTSDzW*FY{_~sxZNHVg4yuws z*Lhz8ZT1&A^`GT9==5pLL;v-T|F>NHPIK}-{l&ixmi)0_@ZNIPOYfdjnprCp{F(*r zOE|QnICY|hOycAmvUL3`%pw|1BI`|~8Y~i9ZPU8!Gbg#^O?N4rVlB zm7aBLe3~~0v~P#BXZ$<&2Y`C12mD%gIpwcZ3-0AL%wrG_0dLcYI1|}m0 zE-TP%5t9}Jv_8@T4bX9#f@&X569z8u5FD>1=u8G)OVDx!Hgg6}O9pOh26l5$eI)M1 zAnpa)Hp}Y(8t7v;0{4})85nFBI6WDZCNUT;!+aRNDzV(~X z5N3S@IcFPI>1^_<-sD-e9>xV7ssLeQt%e|TLs)8}a?k~NkU9uLLTVxi>4ba%1h_*4 zuZG|Z$VeTeCbBD9Y74#?0&8UinH$1i6G7@ANIwX|2314g)445CDx+DDJ`rRr4y_Y} zSs$6FOobpwO$6#3fe-%yZFGfoiy$*ZMhVbyIrF$)bEK1hOkzPDBE#5j!`N>9=uRjj zwp%y46N1ocB7?{d{fKt`h;~qy7IaE2X#baY-DdP!sbRBE!)8P^L_vLoRx3dU?jSuT z2pdu>1$6F4M;(auc!8b!kx@|B0Z69_LPF{w2pQbDH>7JH1ci3*N1)z=$aN5ejXabG z={JRgE2Buz`42}R!*mcbtnWxzA4mi;Mi({lFoY2a?i@u-1l3>gDhyJuLF%Kh2}eTu z4h44~4D8$=)PBIPd5=&1Hn++RHl@q0ikCVSEwIj>DHAtCCT<2p!v(eF-$CmZR)G2% z3~T;#Z22#||G&wF|9YqXD=hiXkbQ;0mx78~^36|L48&pYP^>Xnk}Yv@4VMI<#}d4L+g))K>zJ*YTeR z9a1gi+z4u8gQ_7G*e)UPh9e#iS>I0P$i0l=yBVU-Fl63hD1FS({1H@F&i>D^0<=_^ zVJGOAO_t;Txz7FvT@-i&WT5C((2$+LSdHFy4>Hkc}{tF)c zFLvs`=9&L0$N$T0{m;JOKhLcHER%i-Rh?moUC-ds!(iXe=iRRmyU-$cg-hiY@5*(q z#VZ_&7JF2!_O4y$*RavAb$eh3XrDf43wGCm(B8x0y@wTV0UF;a$;a}V-SgDkWOI`OJfjB1YMTLAH^UT0lI5jIE6tr zkHLHzujdK|pH&PV%NR^%F{pMk=uKg;UCa=%i=p5WL)S}|8Q)nJ|KwlypKA*Uac={~ z0^`bG3=2Oo%>BeL@fkzWb%v<@4AvVM>^Ac_t!0pCW?-^rV6g?&N(|bddW6diGzY|I z1*%^lhcoawGVnTpxV&}@ymky+*3d-@oK_56_Mnp)xSbfdoj`0Zd(hF}oHn41w49dU zMR!IFBH03VQyEOxF&M05Fj&iAv>t>^HZquRXRz48V6~gUW-o*DZU(PI41wDjLU)5| zCBGdEKKnq({|H0qDF)x&44&H=LUu6(?O<@3&0sr)!KRPFs)r$CxAms~3WxtI9{F#1 z{J-M*|F%=_Dpw!&tUlmfdnmN^U`WdW)5P1Nj=LG8o-;^2XOjo*X64lc%@DDvfG3aC z{m$ffMUW~9P9iUUfOMLIy7qzAA;8Xp zfb^2!^%0~R3hzG(sfplZME}8v{)3Sd4#COLz5`+X2O;VqT`EX@1i!fnbbSVRokM8X z;gHTl!EJkkTKBltZE&hy>s7VFsc3;(#v-elZIZKIDKGiLumrq(ZsmW5wg1^S{}(^} z-|p6bpBMjwum9(p^qa%?0fY592IE5vx_cP(S2D=WV3423ptyiRaV|sVQ|5L5MfU$U zxbxrS0jNGwx&^9?cyEB}Bc2niEqux}( z%ibWz9WXL5u&@fK2sqT}$IoU6-3h9avhVO!ykuy6&)oNiVa9)!#h}WFWizNU;yC=D z^UQz#OaDc#f{^G{P%laF+<(5)prf8yPlGOr6FB#u>&kz&3;$V9{%6<^I(3J6`+wnW z|GC%w=Unt(WbJ?bz5fmN{rB7U-*Vw+)3$R$$*Z|NdicEijnWr+)$a;#-4)!pEud+0 zX#0+^j-BD1yFz>Rh4&qZ=sOhAcL)*-QJ`tQW6={%Mo%~y)PBIeWUYG01XiOw2C+Z} zPVmeTyECXVVs~X=abaL~X5a_}O&&>SGe~4W>m#8^2H_|M;S|s|C8LQPE{hrL<}f%f zWC+_MQgBUe(htj}|BZKo7O@*0`)_sbztv3;GQ15MBQ<^S-}C|KHaeqQ|4pugko(2| zax4A|6g_7MKEa^e2fjDK5>y{C8ZvNLfTn@?tr_^O!CfH-&=4Htb_ot^1`cZmb_)<< zGY9RKWi^2N+)(osRAjD=3W?L|D*)oW_GD>(b@H;aw=rb@zaGLiq*lcAmTFqd* zfx&zWgT+<`%WVu+yBMr@GdLY)a6Ja<0r?$c@ZSomlKgiv`0Zfu-4CjdLQXP-oo4Xa z#o)CAR38Oy2h~Sb6B*1q84Q~l{1$32`l@mIKktVBOcNe6G+t4vI;32E*t_;nK-2M% zmIE#&8x(wwahR+F)ko4V*cJW@Y5(Wb`p=^Q>KU;p{by1Eoe0FD{GUw~R2lK<|5tST ztRM2p3${Uv6JXasz`I3|+6S#Zg51yw zsgEGng*z85Me7Pdri37)bC5km$QxSWwGX6Lg3J@4)I{K}kTtgY$UJSPB^W`vKag4p z!nR1AW}Y$?LPF{tcx43Pg64_99U?Pu9b}p?(IkEXgoGdXV~Dg|!6*(?4M8d+NPT1+ z1L_$;DkFVZpGZHtQ!lCmf{^PVINKn+4Xr*xo(=S@+2~!r3DQ6EY6NwZd>UXCQv>Lx zc&~;nkSYm6`ZR5W_oN`BcJPBcFxNR?)<=**Ja~PCsFG0XBS>EfR4YO1Cio~HXq9#6 zUU;`CxN|S0PZZL%55jU6l7&Wc<;do(3y3IgS!s~cOQf-nS+zz zeFs3LU;jY}BeZ89q&$RISfRbpv-KeL5$vcvQ1>dd>u`9_(Qt4B-mhhccjH#~`pq^a zE5u878}(mfTK1o5*?&-dv;@3MaTRF4ywaur&QCx{a_N7D#48N$cR^Q6=AC-y_vyj z7K__dv54i46}y7F&V=Qtz7ne&1CK9gVS`*YsFx&^1RA&#j$;r^Vi3<@FzVy= zU(FDEKp_7TU&niudH*F={a4!ZpLf@Po}JLs41_L#X39jbf(~U8zW!h2#(&XU|Hbe8 zm%Q^|=Kg=RlmD5P{^xD_&QS1_Blrx1^%e&CdKC2H%|w{=1o?wlM^)V{o3uU^#)osFOjrg+XVHT+$<@~8qX{~)n?ns$4S|N~l>hT; z{FijOt`+*ix?rbM`2o+`b$(5o{pxlgP~&>wN2>m63Pt2Ctg+$OuvgK}ff% zwQvNfeK1K}6%eE{g7$;JN1j5~Cm@sXaXARX5!MNU)JG7~xoD{q_{?nx3E?{AFM`hy zq18mlYqpWAA>=v;QWGK15P`Zy;Bh&t^jVOjek{{w!n#E>k$Xds3J6{YK`J14?E_(e zj^l>S6TxdD(}anTwF;1VB1mrtGD&0{kC-GfiRm+m=`)V*g&@PoZV1wk>eh?wf+ARD zq#Fqunu83`=|{Bdg}1>GWY!NSS-Sz$5pLMzUI$`8*q#lWys*_rpjrtwpa-vypj8sl z`<49KcOq9wAU1d<9;EjK=?y_@A_xhok09f9@X83%8-i3oSnDH5O#~+))e{;S-g_{# zdq1Qyf{>w*-VK<9R6`LM^$|oRgoGFfxyT8^23Jx?!h4QJ^d5uMM2~gDQHzN!(vdCBzEk-)vNzOpZ}{J|Id(ojlpI;gVq5CjeQJ8`&e8r zf)-FIFXPo(#jdi1A>^pX%Kzf0{_8*bulopEAIaSNFL(#kMMAER_-_2?y8#-q;|AZ+ z!+8-@8Hqgj&)#u^fx#Q;>{D>1#KtdSU1$|MnImEsN5n3Mq)W`jkC~g_@b!IXnDL)= z(SL^3|5-PI)-bRg1l@2gZ~;^qiGV93&|-$mpv$s3Pr~XW@P*x?m;MV}{Lgb1G|j|* z_&?{W{|p5W8JxB;IB#bNKgp1Fhau|{L*hw>?6G36|LN(E2Fk z07JwPwzypk{;L_BW-?e!WYB8`A@$`9TB{gT=Q9|sQ3ze>SavF`<49=x;lT3!_Ni-S z-8S)>En<|r%^-E1LF65zKH}HLcXZ2bm42eL!1T z!PkOA7?8>c)DMDmh)P#L>K&{kq-O-_{=kp-u`gT#semANvqDyDLr8di1n(0;Y9a^; z8I;3X2f?c$$eA(Nox3gB*$aRT(tR+IP% zCh-#>^%0~df{>Rk84q`=+1r?C0 z5K;$)^+A?`gU#$Y8rFLZ)Hwq8k0SbxK#)uKIs3MgjD1gq=6_>Y{GSDaD-2GayC{FglcU-0;U z2GAOBW$>bIa7_dvl=<{Rc|$e{Mr>mUIm(cFjiK~8L(>VN4g|D`U2j_%|=16sqteC$8(DNx-caQ?sWrT@I=|8s(`Cty4B zU*X_?hADp-tamVIuV66S#9*_7!E7~y!BPgJRSYI;7%bN^*luKS+sI(Mg28GEgV_W& z`$nbcxgky4lO`MupKvH*;$g@Ne9#mgXzl%x@cyHa+CQZ0fNS+e!^GJF4pj_l2@KMq z4E$aUJnm4$?a9FF&A{i!AQ;HN7sSBr4;r=Ok6=>BW{^r{5J>?c{&WWMd=~fRGV%Kv zVh(d;Ut*|x&eQjualwC{)&E(xf=Bc{w}Y#Db!+kEVnGsv|w@JBMR`ZI9Yf)JMx z1Gg~)mk|hY8ZvMiGH~fKaO!|=>tol2c99rO7(grQ?7*jF2QV-M@)^$wseBaH{NKCc zmxRw-4x78|4*xlw{&PG2=XL%s;QXK834{ck|MR%~=X3in!UjG?v|1g+7V6c3` z;C+KJ=sxHOps?eN(fgTV4zNXSX7F0f;5?PVx|czIH-p?R29f&=0=F4tcC)D*mbPAE z5U|rKc9&V?HWk+c;uf3vRIhR?U1AXZ!XW$!gd~5kD*or!{?D)fU%=qMuNRx>kX~Y7d9oE9V>T0?zRIJGPN5ZMUYoDhyiIWL)OT_ODf31Irs_NkUPOa+t0xV z%E3EC$h8k-(-34`95U%%w$i<9C1ia9goLczMn2;dzJnFcfUHbFKIRm%QVw1dL3&1* zRTAc%M34#yxiW%mBeKq#YneUI5`1B+b=F*qthr_xv&}MQn}V;#vr3-@seK^z5rl-) zMDTGrI0I4>L8>A6Dg_7wd3p%BTLh|%!1v`rdP5Kr`5aV8pU5z#*D$6Ra@r4k6}Leo zbX-m+szV3dEkf%P8Ah}lgtuWLc>H2As8t_Vavg-i#9y~K z)I<;xtv*8TB0(wz%>EHteFR_30GUMsS4q$@I!Jv4>21L)BZ#r#UHc*RQFzyW2#V}E z7}mKDQXfIIfQ>pB4C-XT`dM9vgS!qx=8ryL ze69Y^F!MXpivJ9Y{xB5pXK?Ig5RGHt3}WEQW)LZ5;BDh~KcKt(zxUDq7EAv#q}*aK zJ1m&=o@dj4r4#>c-~2ay0xAq;ZvPj$145#=|AP?t)Eeaai0#UM=8K@}gXheDtq1=Z z8qR{(46B1@jzCor=oD`af2*iQ!H8`_k=q#(FR|x8U}*fz+54Yi)_;y=pgAM9t)LsL zSq}W?JPEq_6f#=`sgI;CgKpb_TyM>C{6EKO(7>JOrT;<~L6sEGIZ%DXdKh$BgZhd8 z43mF@=G>L$Gw7^f&|3vM6jXfZ8a5>{%BX>YnrV|6p1CpLY$YKH}Q_ zAGtmfy#^YJl(-J6k|b|}x=VtG|1&K5#!&fzA^Qs!XApE@ z5Dj6Hk7HJ@Gz(kfSA54I^_))RZ6*JUg09bm+@ABe{^$4jFW~lH$m745CjvogA`zee zB0m2GeE)O0{bzRn#OV5g!Q(1}-))A-Qw&k3nPU$!gzshuSjpfzkHKjQgUd7qwL=Ul z2N=X2G6>#b5ZlVYyM}?MltH|KLAilZrIA5)27}CW2LAmFB1af_-!SmMV-WhtApVU> z?k}Gfs4@~T_%Estu9CF>3+nt=^trDW`NpAQyHnLp$Q7&5zE3q`2^?f)f@jr6uj);h z#T0ZVsA@f=G6GE-RjhTdSPNMr2k!xa7!|9Lt073$<5ITLt#pMO(v%Qr&2}mDfK#;E z2T}(ilaM+HPTC`l(Lw4XNVf=52SI8a$P5vDN(ep*2WQyk%(sS241wz)3nVtAJ~GRg z4XJ$~RT8WYnqiR&MUdJDGCBugnkJN?>sLfE+KBgira zI2qEl56%t-Z?=V>`2eXeV7-_lknO8MT?fF+_CO2yAiG&1l?$XA!cRg}f`;I__Cc0a zppl{72Sd6Jgmxbc?%WSyK&(^mxNbhWE+MuHw7*rS}GW41*F|7fe4kxQ&mdjS;JQLAWuAW_#KAG`wUSJ7-AnX#64n2c*GF*fHCSUL+~*Mr$r1}T?}GL z3>+a0tey<)-VAKujvH$-1A8h1V+8|8Jp=c22I2V(!lxJ{&NE8g<57Jntp8WXeB)BGmS3r>MtIptR$hBlSq&|X>@FfcH(K*P79HbuvVMA&k_!u30 zjhsXNB6uAHStbXmj3Ctz%DoS{3m|}( z%(O|L0YTPj(=pd7K&l=Hi98}_kvs)7ACx!=UL_$j;FS?*=>oWC1RtG4u8fRhx=q0d z(kFtDhEeTsq!-nu7uBW{)utWU3PCy%Ejke`df^~s5Z+=C-l8AYjEwX`n;;0Y4nh|3 z0Iyns)6%)ADI1mV4OdbGU z!UrcIV}lTLh#}#%B-|ooMsVkT2ny=h2SI_I`+Zt=Y4_YVo%WPr&QJag|C!sLGnh_g z;D}^k@MU1}1C8FWTQjg(fsT}6*JToS0!?0f&h(!Do?+>KrZxZh*ZpT+^q+abf3eN~ zmCpQke*0hh`G47)(E5n~I_Q{g)+_&+FM)Q&a)A$06SxkpjKDi&*)IKOx%!{?-hYAm z!wd{o-~;7gw;D;<=m(VvL~Y{?UeAzlku&c;L+ux?ZqSXi91H(5t^d!t?LWiL|E&8# zl@ZT5P(Mrb>VK*0|0S=1&S4Nc4?4Jj`#7jF;yn4E=lp-cEB{3<|K~sVpX>C0&QqYv z1X+&%XFK|z}^nG1iBWy{SQO&3x>=m4C#*;5}q=|K4OTx2da-E?lHtZW{A4S z5P6p&>JCHXZHCA*3_(Zv0ypybuNQUhXESMFRn26OjAf8YW>U;zQ)=MVZ0FHjAf&%c z$Z)T?*&zvo=aPmmMD;$28+;Kr{x78WM?e=eN5iiVs*i+B|BG3IDkDipXniE>462nB zo&L)^fNp${2UkY$`bgN}zp%r9KIi|uPXF26|1&!NXK=p7V15o%8QCvmuwAa0e3n=1 zDFf#b28IF#hE&j|07e}KUJKBbwE75Ixm1B}{dNcUeIWfHh`3MHdLQuGIY=i6-Wx)zj36^XAQCnl z2Va%|scu|Lmb(-$gOHGgaF7}Yjf7M{Xe4A@4tYKZQU^i0Kae^InS@;ZU=Kd#6jx;g zse>SML!b>rumL)V2&5)5%K+6%Ht93q*W@ABN62*$WONQvADJXh0x`hr<w zoro6Qux4Ex)exjI(hF^Z&j4aFAe9js38{~et0c5q35^S>ong0tZr zB@hFAu_JPQ1e#QWof+=ew#%nw2c&L-kb&)c1KRckb?k$b^T;Gv5VYbBQl~&jNbLYy z7qJIC+_%@i9fbVa_Q23Czcwg>*|Hl_(GVn&T@KNUi-fGi@N7Tm)p3Zs=bp&a=lsk5 zi>&?++T6jP$iS`5z^V$m;(nJ!EqXI$V!H=gA7S$8FKG4)PDq> zk~bA>?<&we3!pip1EBkx2Imb7 zzPmsNi3e<9h+E5;v)iTNpnvz7sNO@-eTO0Yk%RjV1orF$4Yc+i4sPA=SiDvnAW_C3Rl(rC zQaJ7aw0~6doTu+6%cB1R>;7}^0Ig@>Jp7;U1ZWUa_|kvLtN-P0{a3&BU;5mCw&njB z+P*X7ePu{{%aHsCgyJ7EM8j&IhS+2!Fuf^_aosE`!rs z2FEvS)}I)RUNR^=WK(~^rSXkd`zNm^XyG)k=6_zz|2#S%#HRyVB*3TfUr_tMpw@p; zz5jxG|3wTzr|!sD{g=1>uW0{Y#TkUuT>h&%|5tPPui^Ax6^xXfK;nvy|K;ufOWXgK zariF_M&h>rCGG!<+Wr@?_|Iwio5|>lpxYbykYB7C2N)Q-7#O4(7$g{&d0DtbSOi2E zxcM141VM;dl7Ug2flYvcQ<#C>oI~E9N$e80{B0Jw|9l$%g$(}-8~qnB{m*X#>Lm#p z{Z|jVW0v^Zxq7{GHE7e2Th&_cs@2|At0DCcdL5rD2trzB%!DI&hsX>pXpuexf{;(%hTI1Msfm#5AW#Ja zKIg{-b(I3B`x6gZr~sNF0y97r5V#rw)kjD*5%LN;NIwWt9~nn=8iDGdPWb4YL1Y^Q zK{`RYVa<>}k#=~qb~vaSg4aawsz)!hNhi1g4PjP8a1pec2*Sl$A9;co;Nj~ufh!|; zt>oRf6dpB?QY1xH=pcZ*!OnZT9Ox%K?74N8CL-r;b+q-$YSJO88+U;H) zheW16S6uW(c-4P~vMUTqB@A3K47|Fa`iM=AfmIfi!5O%;^<4KKy2+)Jz^$98GXKBo z0`LO7)u0P&*f;-Y-UYhR#`XDst2_UN@B9~njnQ%5`Y-(CzwiUlN^g!!pd-G8Zh+42 z7P#JetYwHl#R|G#?*m82 zFNP`q+2(=HBVyYG>KU=@{m*{%Kkw=PVpl;^d*7aYu?t^sAJMpIX%6-4prvKKP|KD=SXVnS!Jz9=>x1WH_j45C2{pwT%`1|ClkgU=h(MFQ1L;L1oS5L6in2SMv2$yf%- zGzOVWQ0GXcl0mVaLAIK~X^BAeUWV{}p#D+yGoHSm%nMQKBi`ent{LW;%6mgFs?g>N2Cx+~=44K~;Qa&=oJ_FT99uFDp zZ!p-tV6cD1Zu6Pl=CiQLS7DQ{qQ?J)4gU-3f)J!W;?e>QeDP?37Ewd$BT>Eo!aATX zlCS}&K9V$t)<=+fN6iIvE*x5Yr0n!x*%?$H$v7bDBT4)JQV!6*m8k1~0f+x0p6_{` zUN9)`WMG}kz@^5(AO&i=F!3<3aDXQqIT%=Y85jf^KuL%T+|kx%5O81+I>)1M2f04t zHv!d0LdO4vjQ^_z-Zo8m9r-UGtk$c5jFEE1CL=e&q zGA;+(G2~pb(ye4Aq&|Xlh0sVyO#~s4Ya+B>54`(>#(=MmgH%Hh5?%qJF(7r2eGceU zAIO>8kje;N6Tzw>&~8?6S7;`r(y>UNVTy#z)271_T76^*J5d3x4uaQ2#^7sPO~KPc z@M;KBF&V}7LFyx;*gg;&Tpt<5K{vF*8Ibx2+9!%=htx;#a~1R>THy#%CFw-8Kq@0@ z)kjz>Cb!!45CpH7APjhAgvNl5Z-;b1u=ZRK z7q2&hu8H(**y`E1)eAbMwavS6t5?Go*V>IPH5)wZH+zA%Q+YLP@vPtMUc1q)W`kSJ z21p=4atbaIv>Ci+y=UzPP?cS^+Ov9%XZ0Gdnzi_mTkU$6>a{v$>m9ldbIkcHw(7s~ zn*R)8o57c{+A%XrfN$331zm5-z|O!X%)lzbEw0AEpde+NsIu_0#!}FD5&H^I?ZC7i zbXbtd!~g0JK{W#3P0;FY>0AG0ZvR(3_h0!I5C*Dibt+s2;RmJcaS0PE@Sm8hR)v%6aOrH|E!1pa~=jE z*2Di<4})!3xy*sFTZ$Rhn(C&kQ?R#x=mn#Hx z@t7Ag$VGvw8y;^4ZZ8H7cLpv`5QEo;fe+R>5)NVz31$!t0|mZBJcCps=m1r@Tn4!^ z2Gv>ytxgbfUdk4+iy>+sL*`kAnnyhSU)dJ^7hdmyZXP?e*v_EJNZIMXf;|W+I{b$qIfwrcr0D!#(dECK z<9{jp|00(E#f<;+>-=YsImN)Sje#MDfgv7z;;S430}H5n0!=+a7@Q0YBFy~iOrkr4 zR8O$T{O42qFK7Vj9|@WK7c~AaVDewk^uMz2ZIie+&eiK3E7y8dt#+?mOsE#8&U^(RjlHSQ9;^)b=p1By0%YqDygqWsoo5d^Rer904$AecHraEmvu48)yf*}|h9LEhMfwZ~ z145dmPJ=TbeIl#0DOPDyAm?vG`bUuZ2!1;r+68%#3J6jkLFyn#56Bo+2f=F}$W~Sq z5_U_G0r+SI&;mK|A*isG+tB(5QW@!mgC>m7>LdI;BkiDi?Vx(BWh|r+#!5meBe&Z1 zkZK5xM5~P8TrY5a1S#_&Y)Ijc++%{Up&OTARR+AOf^ieZC&m8(eBY$hCUCYxR1!nhmbtjNnkQ#;IbBbLCp+%C!z z-+zYCO$^K~;NuZQK=lz5KNFWYyQmTavj8Kn3U1^5;SDT zvGu?5h5t4$|I6R}FLMpF^;P6Ps50U@`JZ9lUxvyX3<+l#B9AhJon#0-!4Q3hA@?3b z_Y;N%KUt1}E=OQ2I>TVnC+*rT=h3I=UMcHbDj2*-EMhrB)Lw?PlMEHl`8$6wO#IId z=^1VK&#>)3^G?vD5BDihWh8MGG(shD<-gq3|1y_BmmCS60c|ycY>XAU1geif`(Drd zhtx;hCqbn**AY-P#IO-O?zbDXE0+H>xH7uXL9u{AzMMg$fx&nZ ztK$OJ@SW_DyBQ+)GUi-hsCvNB{gHLSf4-IfIktf=nc_bBpXb8tk{+Yq#KZ6OV zGD5D8Al)A>jsKWc61+a*1Mf5C)&#Ai76y;k3F(76QxazXdLMf)J!WlD7j@Nsx+3&f&kTJqXD;fa)X2Op}b`e+}3FO7{OXT|diNy^uCp zz#`kpAm{|1Hev@~^$WR3ksDk$i8Ax6Gm2~%P(H>i{hwR;zkoipJ`ytd&u~QG9QFYLV7^RH-p<{&w+65vgaVzN4P2@NY!JWHr*_B8YY5XkO#S~HF*kXSpw3~ z99lJmIQtYnJrvgu=?%g65WyKJbrARfI;aS^K7w?JAgBGnY9h#nR?u1n$P5vLgjYsd zVNE)rAOxw8kO$}>br3R%wLXFfqSZ&PHS1hqYhd8D5}binF`-Q`dDd@6tBi2fN6>|N z;MxRU8NnIIwU2+(cK;^Og@6HVpi|3yo59@1ZIC0%AvF=`;9u}oZjY+9kjqLSI}0F3 zPC-V4q18wJ0;j@7PKAr?^XA*;%(cy#3u(RBXV0|FoK6tgXU~Kno75?`X;UG{E`1sv zWR*U{JY}kM<|4bEqYSfu@~--?yX`+i>@EgwUj_zy21Y&xRv~r)DOoiGaYb_m1~Dcs zNe%&d@J>~J2H_~f@_md;{iJZCVN#-P#1U_FV!W-^1@B1Z4! z4E`G!!gnyFUtueK#L)Z^bRrO>?%VpGaVKcHhv5)t@J;mEf9dO>o{_?p|MHhXMX}%+ z&`Ck;r$E=Kvz`U@zl5*;=e+Qr<2+QH{rG>56QFCd88(CWF0BULi@~t#Klic!0;m5A zp8YR$4uk~GfCltf4*h323R?WYeHwH$E61t-%t!z8ANkL)>A!N%ZPB{3ZmkEsI*&qD zut0i7{%!mH+xGi4>@m+=s^HVbWmdwVl*Aws0@_=|69C#?#O24p>C3?72V(OCF!1_- zsw!ddNS#xp5b&4L<3EGje>SK8%=Z7; zZT_=Z{%1G(&t~|a!{9%w?teCI5aQ7N&#v>IT^EEnb^deefCl2Yv_M@RZdhFd8J&Z6 zjTknwsI$_|K~j;)14-v_a!_Li+#3P5w)n|ChA^b$L`={;RtFS9Jj) z6-YJY@E;dso4$6r9RdI+d*i zUH=WfcNLPs9g#M%qAgBa zfh(gqHn0i^a!WWe2`O9PHIZcoVt@`_4MA2ZKn+2+}8lS3q!vVQe?Ne*`%M6>{7h4igAEWqm{t*|Bt(g|(Sf=v}6*FF%oR$v_*xmK>hM3DLjjdZDA>r%ZIQb|GTCU|88 zXW*=r!1WQNR)Uc5UJ`o61m08&ses_qOvnuH#;wp=2|NS`R|)D~)q(DRgzQ#;tjTe& zTmxC#=~%oBvMkd&f01SG0?X`q7Flym(`T8a%`{4xYM4C51cnR~CK|*~Fo>U^AJ?xR z*N-di;E@Pnz$02eq8+^21T@nJ9cu-*D=^vmF}>Omo&2%W9Quwk%>KzZ?~ml7|Eztl zMZJ0$*gQb>5xWS7ptQJx7Q3K21A{07yBGt9I0KUiD4R3bs%CHE+X$+UxOV+ly7b@V z$$zzLpvp-4{(r#-;I2pS7tn#ykq;Om?lT0xVF-H75cC{WANfCI2z$Yh_KYFzpVefywlc3Ip;8{?WBzP9IFpvExXmpO_BxofbL>KGv|4c_f*S#^!`5{7zV&i{-L;uB2fa)X0Eug!f7`i?%n8@scMP`QK*;hBgXs?j zQElPc)0v@0Pb&J45bC4=Y(H>DBftKDmfEVDw z>LXAW$r&`YB;)X39?TH82USUsPL`qPe@(~#+D@k>^mhm=mNRh1gYU~w2F)e$sxSy| z=Tkh&Ed8Hf{lB38e<6eae8!;qNXYoVsM&udpF8@IFC8n_J5;Q5t61q)u@Y1nm92zy zc_1~BQ|T)B4d9ULzd@I=7DJCsa41>sSPHu5&Y^e(h*1I>)37gI2CsL}81QPyp=c@Q zxEy3A2u|98uU>`MK@f&j&V0yyte`Qv?0JwWA-n8(kWH-M%4nVqxD$k22SM1@88bmw zwPwx6Um2N!DSRLg+pm>>*jucrZeyE=&5u-T_!Gw2{ASUmck%>)OTHb_#JIt!*I;a3#^Pl_h ze~kzK74L&iPE&XQI!qgM_D!izt0eSg(3P5L)I&Xq(=-v zml*;tF?bweFj>c7w2;AY0fXgY2D=puW=k1t*D$zlW$@n35POcN>^b|y|Ki|^Tfb>d`mZU|6C^jIgS6b>4ADc+}fbd4iC7q1DO?qR3dPaPXkn^ z@PlWFAj){O5H%6JK7vSrDkHuB0$>E0EfUlLO(F4Xfo6_`wf~Fif=Yg2@M1Pe3($C- zg57^5m;cJ({*j6^sCrU#fDX+;W{VUZ|0{qejN~2vBO}NZlAHrb5ZZNe0j+b8vHve+ z`(Mfq)M=7(0EtN1gC>~7tp9Tx{O8kn$iR1;fuWXxp^Ab10E^632Fd?CY9KcY8T=P8 z0?i)@8vPeG`LF18UoZTPL&X}0iZw3fi(SeWdzP*AC|!YBAHfgsaf07-2fp6{vSk%{ zk1B)>sfHl64`ewUgbgPl4OB>#WCJQI7C^c@$Rwo8W0$)CQU_Us8CE&-t#al=rNH$O ztPYw7XCSXtut=W)uYI8Rv?A3mW#12vQU2fsXfS)s2KANY6+gTn*u>k3iKB zxa!dis@DihYx=3&m-bF%V!0RK> z+5m8^A0V|4a@Psc5Ato;3ON84a)b!v#8=N+(5eGhSXaoUc$r=9 z0*mxnCdt!GlBXCYPBKpHH;C(j?jw%r)s5)X4Qtg3Zqf*>Q}eG;@vBhuE>-d>QS>ZU z@F-IBELQX=Rd6qnbt#l}DO7ODlXJ=;h~%Af<(+fooN}aX(`4+@AxO?46AzMf%oelD zU~sCo=sL_i_Xo?opWO5R^Dg`^oUxffA)A3+k^!_%i4%O?0}FVy5<3F}gQIKI{KY#M z%$q`&e`471pK%N5Itk_5psklu;A7Jmw)|r#dCZV{g(2o9L)c}8@SC8GE%DD8q8>5C z-Itp1UuNomhTOLdrQgJQ{+rDCZ#3(_Y5O;hsFMsf%NPu2GZ-%dT@vcFiNSq4L%>0X zxbqBU&p9T6W~q30fJW-rkN)Sm0IH85tGgAhfO|%l{_~#&%^9&C1yx2u7ye6M11)S| zKMSgl*v^3JBc@}Z6Lr|O{AZZ*o1ynF!+h{0%SKRr#C{N58G-8~k#qkA&VZ(qSPnz$ zBc6-@`ObhA@-b}t&oK87L+KTU=v|^|>vZdmLF%KRmaRc8TZ7wn2et0?#rp1KnpN5CW>3_`?|Z!$36=pFgxp;tyjGiewN;0M$nt zbqpq53@!_KVs{DF-cggI7k#3`jKusdtb`mv#C|1wtpA@z|GWU$T|JU|B?rc;Cu**Sw|kz`>LM(`>L zGK&PQmFz*CCTT}d?IUCVU)l~-A4%E$mw+JK|Kc{FDoM)zzn;f`Rr?>x_Irf&*Dy(4 zW|OebWO?X@M3q7=gAR2^s%a@Vtk$K7z~yK}X?AK*Mo}Y6z)50*!kVgYLG2 z-*5+KK?qpo><)6%eEvf|HQm5TtViuZC=3XZt|vBk*E5 zl&%n@K7v$F@R|sD-2$WzGE1EX=?6hb$XW$RO#~UqL9UNX!K>!rBxtz;xPJtxgCHcl zCNhrig;Yau65a`dFd*kDKuCCHq#FrZt^nx`>4r5!_O!zLL@3=NaDAi|46TgRgX`6U zL6s4_KEkYnv;%74$faU6Cc>Ogw4Y?FAaV}cyQn<(|e}QApT$}V6R!LLL;wG5Jbb}5v z4{O#7Zh-V+wEZj9yh>Ev3sl_;l-+WaT(gy&(&g-uWo?sWY!aoc;-#$lQ2o+P+>h^VLe|FeLsRo$RLPE%ZEWD!LE5X>)an~^L}!# z`7gBjKU?$)29ZS203Cw}0|R7^7_y$0k%56p+A`ffwws~yvf@h6N&u!K|FxchcE9pm z`Y(3pKmVrx3>6O;!X7Y$KV%5F#o&JvG<6>Pj3NFpL&6h=DgRAR{|8koOaJrl`Y(Fm zKl@G)65I(MIxBw#I#knc1B1gR2D6o*Q(HavGXx%I2s_1)affljKiM_^#gBu=20@n@ zT>;fNQddEjy~TK$juQtjw}Y&*V?Ont;;W1#gCtVcjcH}ISWomR@U^FROW{|q_T z80=>=IM25#SntzxFtlxVXxr|fmhC|;+e6y-1vKw+DcfL_GEds8i`l4%K{kOwBpg)5 z2uFdcA)zn^L1YvGy52}I6x3bf4r1U5X5b2C5R73E&18@%V9@Shu$;yaw2344EPLA< zmBs&s8a^;YoM3a=!Ch{pU9N&!qXES?NEw)_*ah|Ki5~MNIw+8A10V$$Q+@3x8u*w#Ke( zjdR%|m-5AK;A0aYwU0wFXdn)B!8iD_Zzu4js~{3Q90#d!kXISm7cN1b4??bV(9Y9_ z)Io3(GX4gygCNV~Y{8u%NPPsUeITdFK}bj)1YtwQ{~;u#CPH1pJrhy^S!RGP4Yx*} zErQGw!K)$AB05-y$Q(902VFG>o>MSNoMZ%>ErPCF0I%f+RYUO2MDe|lE)t|G1h0(X zbr57k4%9P3noWRAG(b-Kfovv1?h`?JMvyuPavHF4b$DtJXlOA!HI!2f3_4r=`5%!S0YMd~z@_z9q{P-M4WSci5{qq<+Ms#m#^ zdx?Tep1gCej6;@~Rf>pNys&AUkZClZVK|>&Ft2V9w{`#oacKCmtNXC2d2^_Gv8#Bp zE4#8PIuk@}N-i8qF6@fV4ANGNGS-YT)=aWC_z{Dg1A~MWgRr++_F~q#KNzNe;$8RO z^xA*XtnCa8UZ4&UyS%)L2@9trXbCM8b1^U|s5liUuKcgG3Ur<}|Hc2x_d$ya1h4;R zI`N;O{XIj%35L-7p!&$`3aCnof5DLOk|FUack_RZegE~3{^#5IpJOwqRsxN`Z2(Qj zb8Ps}vEjeQ{QtZy-^ELxGbdhV@Y&B`v6{hfIfL~^2De=do_iVM&M>rpW!?H;>ePSP zyZ>eGfQI9wuYne3OP&WUoZ~zNni~?l2wH0=ei=L&bml+nng5I@Kqsm4od3_g_di3& zGls%@42|y?rv7JI0KSTS8)zLJ?z@A% z%lZ;z1F4UAwV}N#cufRZ%)kvE`r`u+)(L=nMv%%#L?3ie zHi*>wFQNxp$p+edYWiQy0#qN#fCttdgoM{VmazN5A?NwPNi*<`c(85}bUzWK9|T#b0J*0XxkCgw zg&R^IA@_!m>mWEAQW+V6k6AEHm}m^Stus?> zL6F)9KMARloU7J=Dj@Ls2M8Oo!U35?u9zTfP-RpLU0Ds;(E}mjl@S^PQXfHTA9#HP zudux9HoJqnMQ-JwNg~k7OmIzPkv0XirZKKdFRD#1tU)87O2e;01>6skcg#|>OO>}t zma&YJG>aB82;$T6<573xR&(W1a^zNY;8L_@m9b!!GJ_x{NmC|CV+Jt;22p(mQGF&+ zeMS*IqL8Q#gNQb0K@qE|NNK#%RKlma{a&5-T#W` z{xi(|3#xj;AArW_!fr4`U1zBL#xU(4!_>bFbN}=21s%e`vhhFj22e%7wBbMVM$l=f zpwnz1$BC~1PsexuW2pYXTKAcu{0&3cSq94u47y7h^p`W3Z(#C2!H{=TWBPxU{r?S4 z|5v>VLPF<3M|N`@2kmkdyZm4L%74Mj|9Q@V`b3;({<9wk9r`VG>c7a5{|rs98RCvJ z6uw~Z_|E`2MJ7U`oM0V^9Z`&T;wllJ0cVOdgx3UeoadSjmn;Fz|8N_2jJtNSF z9C*SAGHoOf#2^#`LZacI871*(&}j_9;SBr{p!>@Cqd|jp5;>syNUwvzX%0iu5uwJ% zLeu_>&HB%g|CGUE9fR~@2BlRD@~4;-&#=nB;#U2@p!$zV{XehTe;(C;Tq^&$RQ_|S zf^MCI)H}Qy|KaruuQq7097G&K3TXZp&;%*qhxL!Rwf^&fl|dKLX@j}i|GB`3SLZ)3 zc-e}bKd%93?2F&vKLiOGf+{1(`E`)}v%<#z#mqp9*@R90i<*KEpqQ7Fs!2 zRV8T)8oq+1MUXXe zkh4s&u9Smxh#>V5goM;bHqhz@v_=lHGQl!?4jfrz&4wd*ZwTBWLiB?mOXeW84}?Un zk05Mw@LF!j%|wuz2tp#Cu>cvJgUmZXHn)N<4F?aEZzI=7paCV+!?*Rq+jPTPAxJ;0Sr1kpLHa=uQYW|p(g}i-T0u2%q!U!5 z6I7!eSdEFa0;-S^WVIw3iM4uis$A_Sf$aK$Z2W=LN^lZU zD?!f31Fy$}3&Q7~JgPUiRjzX>UFA>+nk2H!m~D|h!z3A085zX&Xooke1=XqfRVaCt z$h+ptIOWLLrHfl8i5W+U8bu1}hw$qJa;SMQD>^gD+Av64F-e#+ikmTr8Zrp#GVp7I z5T6#Pj^I{h;8tbeQh_24B?e5yrGhTVsldP?&%h?jz%EA|Qf6RPWMI%_Pz`2izQ{G_ z6~nyW40C@e7ad~Yjb&h9WnmE)leS=BRxrt)Tfgl^)Us!GU8kIue{x^*oo(xX{(Ybu zcEoRk>La;_|M_9=13I;v13XLzxq(9L z*nfu3rwqQ^7}75=RDI;>1s}({2xL3suK%pZp!Jc^IZ$OJa0*l(aUA>4egcGePyFXO z4!U=Pq3jWZVs2yNLK+Oj>Wb5B6SF56ttG6qJY0tSgV z2EGu`a2#(418*Scd{oF{Z*E@(E+5c}dcFWqw@5S+R3C{)GYEl^Kn&;zQ^{OVeWcyW z=scGpOX$)ifdkV zl=_GdT-`usf*@54=&A+S{1I|}1R1R3)&h-dK~zFWQ1t|^jJUKxl@TxafFOQ-(A^Du zdjFB@BS;A^Yy?uqtqrQJ_;tZEO?v-@jsJ_7f+{Rd&HwD`pp_C3DGs&&?Arf%4gd3- z{ui?NFKh>@k3{VL3)_ND3W6-66SoH)90VGs1Fu+s*GKa9DD{!FJ)%Amw*}Ql!WRF9 zE&j_}fsnWbs6LV~|1WNaQXfe<-`5CyYgx1!QXe^#E`+R4fSg}ySF{weI03Z3uW*?| zA*eFKtdAg-5u`q{&0ho>FacLa@MUt4U8|7Qamb_{_*_#+O$6!tz-l7M$^`WK2wn}r z8Q^MYHiU!^%0Vh4i}V?gng~Kd>LBFa5Ts88pBsWt4?%iEkV{(O9U{;?ICxyn7_&D7 z?h}EkA@Hsu!x+RhdGMMDd2R?&`@l)$ZjnCt_Hg($5Rk*SAvF<%gzO=LkdT^4E4TqZ zHw3ASAiW`YWu%F%8iH3$$d!?XUj_1+w{SM{sqC0+cW@WUwGw*12b=*J%7c)QWq9t@ zpe;*SswePPCTQ0Qc~}o=n=O3HEu_wZ)I@F-s~wA%+2${@2CZ0_VVE${D6U5@szWCf zyf^_|6Dhjp$vS39+oecYCrX+{3+e?6==kwycyTE?F-u#s$(l1unJ|m%f%-B6stkO} z3_J=9oU#lYG9bh$&A=hWz$U@KA_hXN;vhDsGy|tJXn_H%I9O1GfmsNIm;@P^_|Xxg z0BF}LqYwjOB+9@b%)}tf$u7kZG0k??4Th;-8D{?$nfHyM>>Pt$Hu!W?QwFzA(M7){ zmi}W{@}F}fs0?RV_n&DWXn;%T#(%ym|0ThQsrNra^hJiiYYbtxL5t`TA2H1PCx7O@ z@}2*RxBhb<0#`a~{&Q^k&$;P8+j>xC1nnQK|Ie@%jMn@I^_A9xml@3b&(QUSq3$I^ z&v%9i{~0FyXKnq-5O9RSU^#=udIs-94Dr_(%AWE}{LjAnzvSNkG8g`<-2%-h34<>s z=LOd(tcO9HScT5~7wP-V;IWp$eXDT6J%+{~j6MGuW`h=xGi>?Kw&y?p@&Cf$dnkm? z{}(y~(!zcObS4AyanO1^&eNbhM?zEoGelovQ14>k&f_yHam!m1(7eO9VXI%$=Absv z9yH(P?M@}@)FLJc*wiqnrZR{|GYCb1hUo;t8TbPjczhXnyg*m)akwzBJAksRv(xxfb|@M(f87x3Y7yx?&*$Y>jg z1lK#rs}~@fjCeIbE9C?r!*}5AMFQHOEv}HEJV?C;SziY^k5tG2v_gR2^go};e?GJS z+{U1VeB9cgh2k6<|Jl?*NI(~KsDiZFe<{=dQfB`pOhBveIMx5ND*k7X{LdiwpIPHS zgV}#}hyScj2*hRgpUdvQsNH`td(fDh6nGhf6y!i5N6>lTGLE2w!KEFb>lq}$)s~zs zXuFfN?SCno|5BF!B`yAoTY->>#eWeC(BPq{`G3gy3_^yWDI^7#$2tM;?F&~s7O#cu z3Un!4?OL=3QXe7rgCKiXosh1&gAB?+NJupV?+`iUFLKOV2uF}zLXh4NgoKDdwhkfp ziLAlLCP4Z`)>*UR42U=y39f*kgL06zF=#1W8uY+z%e3iM8PhD&r$T0kAf#F9471c3 z<|)&`cSB4=n<0YKM3CMPq$Yy&i6E!_7{yO8il1N@*KY``cR&*p;OmD#45YoQ(6fEO zBKpytka;3V1q7~rK&PHUcBX*2?fT)J`r)01;hhFy9q`J?AiNDyA3;dH&=xcVseK@% zR$!fWP(5TsPA9NVJD?UgV(S4Lc42;5{{S*uW;O#$rpas|ryr5Yg1|IN{K0IJHH&~nxrdznomB#4tVWuA%`S#%-x#KU7n%1>dgf4vzhx%1GkMe}QYD1=Uhl!PnvCJYons!Qg*|A@mkQ%w2}kZ#+9ehc+nP{?C2@ zG(g9&`9Iqx5Mtl(pJO9vu#Ra1c)V^sxT=D%H-L;{-}hf|ALtrxksbfJ*8S&O`d@Ix zf5VOcEqDI6oBdxf?j6v^$ZS68KO>r>Z6A5tP?;3gX}B*Gwt}#bpTW!37rCUtc1>k`bYdHL6s59 zLC}pt?5F>8oB}N}XK47&;JAZ9q@01bK+dhjzkI8I({|s6t^Unh0$M=((cEh{nxrid z^Jrqy%VUs^W)KVqA^tGXif$fX1}<;VrR40c3@i>{UNz7? zr#$LV64J-v10R(D8DWF;bU@V*yjB9&N01Rb2nnf=_%uQF5p?)Y6SN2%QXfHjPrTZo zszk`(zlafN2u{KRG~fmrbF%?e3Zl0E+4cW3EC1!t0M$pJT1o3ak1l9$thfoNlOk{R zUk;3f4M8`wg6bow{|t)%x%B_D*n;ltV{`t`?gFZkSRDUz+JX?IJ_1!nj-XzWB>2Ep z&@p$g%1GJ)Bo19-X9wErBx?hju>w^|NcEAZ<$qywXniDX`d`o(R2c~y{g-oksO|sG zI(NB45oj*hsbslJ;cDlCRme3FWQhXgDhD_T8mk9i|7~Bm#3pYcWU2_#4}uKPL24Yw zyoHeC;~a9K%ef)7k7M3KNM&RXTeJXK8i#yx0$S|@=Yp#t(9j&H_5lsi%rZ}#2}aXF zBCz@hv}q`9ibdKK5CgWK$TVfTY07kSD4J>pUI1(YJ6Zu+?|@emUdNA`V$N;Zm{M0lahpR2hNC=%78Lum<(OYBj%d1&=~mmn?C+ z6mhFK0poCfgCJhL06uLmZVh)%6(=@Ddp22fMhPPZ0aXyB=}5PJU*zC_spJ3E&iq$D`(JI^ zR|b#e3`SEK+*UHgon$Gz!_fMhbqeT&aOTzj8F&5X2CrD)ISm@_<39)5PRo7lKg$6S zVn6<$`!u*`x9UH0`W*(%c?`U14B`bk;S+;uxBJy^325Br-wZ-NP222CS87Mi;I*k@ zP|9EsiDKXlVc-vA;PnI5L>z9Q4iTFZxF)h?V6kOjv1VYk0dEl@O={^`u{}@{>wN%Q}_93mA%rwV4Yj(GS`x&_65uB3zkFbBgn`br0)ai z2SIv3Xs2pJ#^oT@kRABI1bAHoXF%#5$N-%!_|gY^@Bya~(hfXf1fC2+94m)h0YTXC znh3Ib$|`*ZmYD_1UYLCHZBLMk4#ghn(Uz zR$~xWV-iwh7FK5yR_74aWDvAr5U^%&>Jn``$GqS_>!SY*bN({S{Kc^7KkG`+-9SvM zK&Qs>9{JC87F=bV10CkZavW42F)aPh5OajV>Hve?5eAnd44x+$ik@ok1g(M-xdK`~ z$FTA*<2umdYPL0?g#n!F{LE)+*dKgo?tJz&(QVq5UHjP>%*O%LA#2 zASC3NQ%J3ZwLap8t)SxoS2vv6|2eckRT8K6e;&R6y!xPJ36KLwL2CfaL8D%v;WDTH zQqCYG=?GqI0A6=3;P9W%?!SO7==LHhYtTFqi}rs`ga42)Mac5MkmY|iYY<|z{?A|zIzSJ4jGpO#7L)(%=7=gu)b+nO7)d&T>O@I! zl>}P6XA7#8(CQ;uYiNB08oRRrRYu~L|KatKupVeZpp^X+Rj>C}*()83Hn^28b17Z| zs*DO(IG`S$0O|gKDyF=JkWLV!4uTB6K|~;I+no8xbr6JYmpuSR*SSL=Bbk*XDL8dDS$UcPBQ^j zMxd>c_$wnweFPbxGekWr20k$aVSwtO*lx(CA-$*$NbQ46!fGOvwR5_l3yDAr72y3K z2m`qqg0LYq5rhQyj2g9r8bO<0k&gP&45)=7SbYRqR~AsEfwW=)tpY;kYWP*C`IJG{ zu{f11N3MpD*~m2!ob6P$5_!!7ygou^KsrjeY9g%SXcd!l)f$)TwQf~w?TVIK=PxqP zo@bdc+bng4al%C1sBYb`R?VPBwSX!WzY0aqVi}h_3A=O=t7H-LXd%-`A)`nEgK%Em zAPx;*J}n<^bx(FB2SynyRw*-f84Fe^6J`k`&?7ASNk8CTUZ41uH_xhE3j@P0p50&X!%lhC|VoQ^}4|%8Wt6UC26%y?Bq_ ztjAJo{!6X~^^f>B{}IX{@VYl=l=`u1&xd`EdI~9;y?2$(BVRyYe1C|=Q_}BdMq3NGj9FQvg1Gd zLC`>%$kqQMS3#E|@m&G+R>ZIUSGxOO;Wp?l389Psd9MEFzX3V|g8%G)<|ClPy+LRG zto<*%>_0=x7Y6SW4Ek#r%=a>c-C->G#8dy5A?FT*;X(%K8V2ni29v35E=w6=PO_Ff zWN81vJ{45MvaSEmv=>zSaGVAmZO4BGbf7oy@&6o${zK{`zLTKtk_hO^hOZ2+n;C=~ z83beGY}#FmR|Pd}hipmnY1-jgztyf_jatMM9`h;&`6LFB7zUvzQ0Iszn1RQSffKDh zvIE^=%4!4ZA~9Gpa5ypu2QbJ*Gbm(&#_PnRL3@$7+!#QeDL0T_E-walcLweN1_nn4 z!!FazL(uw2@-&P5MM!p4YYc} z;=iETe=b8%&BdYjpF`3t~k5%Rtv_%0#Hm>cN09@r=xyfT9H zeIOMOa^DBCjR;a1*?SQzUVLs3!eZZ$ESb)amrkNy8fgq#ANrvFl$sj!=c%KNF0k4ce zb4GDJke(5w4ubcFAY*ipqZJ@^5ZZaC@X81>Mh98N4X%vfHxq&H4G(SC3u!|^p`bY< zcr~OG+@u@Ss0$jY1Jy@}4pBfYT89W;6G0eS0aeIV51g&xSAmR>>mzu@gr{zT)Kf0t z0}LTc@{nsK%Lb&X=|=Gr zv?DroLR&P08bOtjcd5L4p_EgOgk8F*HE6s})FMXAELPMsM#w0fUq3{|AV^TxUqIWF zN8OcI$&OFio=4G|L)M%{(wI%!ltb2>L)M&2-jYk+l3URNffTH`!3a&9N70%`(VAP? zibn-X@~But5Wl*efTpvko(Cc1C93Bos_P@F>m#P`Evn}wqU$NB<;I{M#vtX%pq9@N zG@GI6Hvht3yc_=uZTm017gQf{>;QFVKxY)}{LiohG^-+T96S~mdzQgw4TJUy2K6Nj z#%n+~s|FooD0#uK0#xiX%m(juT>@gWEdS585>X$qZ3b0F9D6{Okp%b@Zoy0cxi9|b zzY6NJFzf==7XskBjX*cLUj8p|^S|VEP?aQb@xQ=DP%Xx=^S|=8|7tt`yRP}K(f^+z z@;-y}Ne1_$49O1}!jCiP&SlW)WiXz?U^t1vWGaLA4uXIXC@h*^j7? z_)deaOoG%$tOx&d9Qn_C0(8hK+lv37m38Jz8F;D~#FKQrr}=o!lYZkARPyq`Vou-jn{$pxB7ri0%UW6_K(<{7+CE=hz&es$70JM96zcFa(HDs_$#qPhV z{eLC9{|Yw$WvxL6{zzJY)=2XjfClbZwf-}y|7TDIA!fz@9BThL4MFvVuoL1B_{#Wt=F$Ast z3t9aa0WTVtasX9EQnvqPzz8&v1g?)Htp1C_>LY#w(8+s(+MxPK)$_Au)(ZQAb)d>9 ze}O~ZQir^yHhJ?Q^%3NV9ta6Pr3YRgL8>GO7cv$HuWOL|KJaP?(g}jAfp8%;k#)vw z5XzhmsgFRl54a98PoD}wAQA8d5Rf%;kP$ifOc1CVLaL9b_(1JXHy)JKpt+wf`#!hlsqZIEq5@cIb6p9nld z1gng6f?9NfT0j+3XbWU?4l+Qe4X%uIgBrC08^EXz+986~N08bFnN;_$RL7VkLau}0 zY*p`4T=fyWZh}-mXe4qk39X}qStTJ?Lj>87DhWb5RYDJra4K7AmAB9;XTD|jJoB`f zMu}65V*7L>I<>-D)PoyUe5;haD-^xTW!;LUopU7}vnA}(#ck8Xty9IUQiROn`Hf>B zYa4`&BKh^hg!O_2bpi!-{UMzu9t|%69Y0WQr57NmA1GuPfItR8LIyz~L4zQKpkaWp zVX&}au#icxuxSW{6gCZpAQ7_=QS&fy%Lx2P(ke#MDn`~SQO-I^)H(%(EaHSrqXjL( zc#H!XH2oP=y%}^9S#1m10;kDkuH&e>Alv_xYx*C)+5Z_9fjT=3t3l&z^6UOHVi zn#G_wkwK$_L8XO3a~gy30tTyH3=ub&Cj4ia^q*lm_#}kIpeY-Mh5uQX|7TtEpM4!@ zB?HSAP)|hQ3}~iF_%i4eAf5~Vxi0+Yy$q5CEvxGS>)rRC@7#Z$%OE6h^*{fW|NQ6w zi(L6He*M4bRnYz*wu7MSCm1&W=UDKcq4OU@_DhD?+YFIc7~BprSgd3)UB+OrfI)2< zgT{OYm)#8ckHq`_bFTa^vIW$?<39!(C*(Z|T2{w@@;}#M(CQ18ga6r&{1-hA+Cs}S z;Xgyn1qP)q2DUs#wN#7PIgs<)y&AT8HEeUQ+i9M+Qa)%3yHyQ?awce5oj@dLGZB9n z15YrhgTv+l8n0t_1(BeN$q7`~u-P-P+A;8YFe=6{Yo;-4rZLDTFi1o)i1;#ayD+dD zGq4yiFq(o8vpxfp9s`>x=wfcY2BW+K3H84M7t(TzdaGb)jdaa%e%TA$HCGEE@k=HUG2f{Abhs&t(8wZY^O28uC)M z{jY5MU&HagvCn@4-~XC!|K%+}NY?bfxc+}p-T%Vc{{__lbIAW^k^0ZT_l<%5H3Qc> z2G#$<_W!x<|8v;<=e7FJY4)Gf5rmkWJ~KOiVQ~M>;P{cj?k|Jme@0u-G2?u;{~2`t zGbn?OGlFcGl>?szq~!2l$>G16<9{vJ|0;I>6|F&5%bEX|GzHxN$ffb0UG+bkG6*q9 z|7DQ)#i0FP*!90i@PEO8|Gb|6*`5D$+kudf4XEn`n(Bfqw6g&n-5_Q4AF>NczyNf_ z1mu=m$atN3=3?9Ym5}<#K6i24`CzmVzC0UJ^+2u@f}Fq)sfN%xKJcms&OqMN z3aNmsAzh)Yxe%^x)?BOfSyt&YtTSdpR>Xm-o{VXbY6zLMOrHu`p8y$!1J4bCsv+2M z3T7#jK>Z`so)PH0Q^+ZEU=p%20ZJzJ!;j!LO6Y?ed5T6tIz$i>Pqzp@MhB^Skn1C) zYN!pilL*=^(g|&XtlZWKZh$b5?`%arYYueiTyU*!Fm%-%WZi;pKrIM?PwzxK-m|g`7HwM#AeOtPFVd1nD9nlW5%~mkQ99C3sKA zxoQnE8*B9hsf-}~BZz5^r7Pg|ky+{tqr@r3as7IcUE1NTp!uV~I`x2B&@ptrRZ3nZ z3LZuBu6c4Uprhnu9kV5E)5NTjrR`FrY?EbdKuFOhQNcPv&MHR6B1+mkQrbLH#v%&D zwTh9oj+L{GK_HtrS?gGkpiLY?&^AWSCQi;KPR1@)#x7RcHU`Xyk+B0IS^HQyhj=BI zWCF-JRoNv?-6dPYHAmhxPu?|8(IrdXDMQ{ZMaDT%!ZBUKF`dCUok2aCK{boPsGPx~ zkHK#VL;4wp+D8oi-x+59XP5`BUz(mWSk7UPY-A9xXOOI9kgjG>>|;=!#9+Q%vg8Z% z#QzN4{}}pyf~uss;F@Lre@5`w9m{IanmV>^pt=Bbmcm8QQK($!{&SoGS4PJ`D=0v^ z7JzzH3 zAGE%X^VolpWB*yU{b#88z~H)*LAaWMvrxda#5HTBU*k5P`Ym1!+q@gMJCtwIh@U0s z(#D`yz#yLrTFD>~$sia7-Fd|A%fRjlTFk)V2C9$PUBQ)+69c;gsNRu_6gJM|H!Wh* z$z)JS1YO3*>BPWb4!&JZje$WAj5I+T#Fz{j7!(;aszCLT{CWoI)eKUXS>pjCXn{9mK8Rf#ROzs2{AUDrWSG=Jb3>q-NbNtPGN@O=rUg1nN6_NG zu;qUdE6})>wC#T-8&E}{?(kpF{lAXqe`SaNvS$CKP5w(8gDN9IO;F#5PxU{$+cB-2Phx|F?_$ukQ(}85OPmL+T?TJN4*fEJ}g?ze^1M;2L2>|d+oSEKAz zuIN#!2tFKD#wAzQHBa8XP}VhH-Zfvzqe#iCM9Hf}*{fX1vrOK-5Po{9l6#4gdx?r? znX*Tzl6#4wTQLO5yA`7$$T_P@-c?H8Rf=AfaHQ;22|-Gp6{>#KkjXxcz&czA+~Ean zC4;C`^RH3$t5))^Q1&WUa4%7GFO+r8m2=3Fv`!T?O&2!LWVWpm^zLAYT`p3zN38Cq zSlvyIfHe%Vl?*(I41#G4{4or|2@JAz4CXU9%AT1n{?E|+ouTOsL(dO}{@&nqFK`KTB?QOC|IBAV2cI$=`_I1TKf{7Q3|;>jW`X)*tULZo-}*0n z3AD(8@8W;S%b*D?_AQ|6Xaz2S`c1s&L8mbYoc}L!`9El>_I}W^K!(}}41ot3{PrJ#?ZI}AR@8JzYqgq~xo`!2Tezs6xuKS<=*f3f5L1rGja+Xq6d zNB?u3`Y*osKf{v$+!^;7j8-!6r!oknsrk10m#y=z-R@nx-J^PwNA)Jd%oP#={oGbf z3~G4{GARt=@t~SWI0|%nD&#b8o&W|;PY~j81C83TIf3Se*qs~Foq<6)%f4_2gVb&Y(QOPO*O+APu_*rH z(frS$0Y;kt*)>4x*%(zo{SF4De++8B8MOX0=>BIg{Lf+z>NSYlfaYuXHU6_JfX;K{ z(FQGnmbCp3K~lE=K_`@g_ngYwfoEPULAytVO#XuwNE`j<)B;XFL-VFFSz_a!<7FF zx&IlWKQXxdXK??|ZVlRDByI6u#`3?i^?xO+|7te>HSPcFdw^;~4cq?;rvD`jK}bLk zROfPNfcDaIDF0`Y{m-HD3sj+)fo73}JpW7h{}&7SFB|<|C*{9h+JBAc|1$pn#hw0( zI)FCGO2GO@(pDfJ@#uqY(}1kCP;kDj<@?kkdx3S{V(m5i}2*R~Un*w1#>mbOu9GC=EL!eoqDIf+?eFQpo8(a;UB~5_TL=Y0P zW*bUk>k~n$Avg)|6Cu|`kf|c9H4&r&g6tuJuwi{7(7FY1Wz>pX6B&dy=><1H5TqJ{ zoU{Py5TPB!4L)E&C#YIGunJNKLHb0H8V8fq^{>=H?GtJFRcQE>Y50_>dzY$vmui4J zN06FG#j8Zwvsl@)7_zR?p=c?*4#H&Ms+HjN5rl!?DYd~g} zEC-$93$BuEz)KoH*McNZF-e?c6h8qHHM)`Qx{>WV5p5bFObW!z!8+3!}4MLj?LYs8K8@qIa>h*#f;Bkb; zfV4y4F|HTh21x@@GP2DusvSQvj_L%Vm`>xEPJ_rc{fJiGux2gru^S-w2UMwem8g3b zD>~=dYW-}NUFc{@C7}YRXHZyoGVenYYpxVg5k;1?j#=soGz!S+J7{?%2&*ivC ztmTVr_iu*U=b-wi`#VGTcZQyy&?;#*sAI*r0yJ|Zb?(2!El`!je&Ij+1yFs&diuZ6 z@&61{J}^}NXXpUcTx>i43!De7eB(L`S_dw0>OaG>-wZu(7-s!q*bQEfeHL^;kkCbt z8j*AV#V-6;JMo`y!+(yZPYiMA7{X67q+aJpy2}uFhQa@wX!a}h=6~|l-xyNwF+^Ws zD7?WiQTMPp?H-_%xqrQ78c`b2IXwfQf+vBBpL}i)*EuBH=iE^FL*yJmj?rf z3#dNgac5JBm$0i=@oJZKXynk&VUToTV9^I}H{%6YK+Fsbj9>&gH;@%X$t2rD>Lckr z3{rR46drM?{^Qm9&!Y05N$Ee6@_$Ap&|V$}rT@&DpdmPJ%l~37|HXX%O9g-mXBmIc z9#R2Q(B@ed+5b$6py?#gmK-b4w2>sZG6EeF=Lk9!PTB@E`66fgU&i{sh#44}{uea* z&#MbM)K1j!Kbz8j2D#S^A`cmOw=yuTVqj#djmSJ?$ZhHL-J&;GAi^`A8kw4$5A0JK$B)(To5sapS6wf?Vc^Iy^GzmoZX zWeZT(Oxyr;PXxa%$aY>`&?0TnQf3;^B|3Y5>rGx*gg#T9v z{;%WLa_HS#~+IY_mXB5@enTQXe5#J;-d!4EPZWplS%bcXbv>5LO?7m&t+GCxA!f zAhi#qD}+WOS4p6*5WF%cw@Y)B?(2wng zAUxF&_&x|wxd$nWAS9$jG5{AxklF{C)Cp|?qb5+51TNelH4%h_6mgLKM3DLjGA;)p zVcUo*;q{SDKow>+1nK^0`BmtE5u6LDi6Er9cd43JiJDi53JgIiBjoxBUNPaXn;^9n zq=tgrgaxULAS9^z0QZ&P42SZSmSOYFh3NT3r7 zK?_qd8iP*gW)gqd1OM0V(fw`qsBss}ehdra47`pE4EhWV zx(r;F3?j}9^2w~KnGCvZ;%WOCnx8Y&KW1op&(Qjbq4PUK*AIrie+*MWEAALqfEH9s zU;i(08?=C&`S5?ii~mJ0{g=4a0f5}sz{t?@O|7=G;r-r`WX$slu)nBQaui&_SaA_nx0v7p7?B2f$y zF;FBG#~>NOAR59T9K;|L$iU~#zzf=Q#H*hp>)ask+9YLN%pe=ez@x#yEYH9U+2G8A zR1<+JA_fK)5G9%Hn7@-j>NtbkX$HAx4D!zz%;Ud&;D3Y2|7!mKwLC#Lx~khfl{UI5skwc(+)fx8xXFd8~ z=)ix@?f)5O{AZ~A%MkgW!RsS~{eK4M|AKD+g+2aDdH$F4{V(GJS}&(+^IzEt)KO9} z{jX#OLSi~!nIs-E2y`$o6oPIz;+Vo9a)UwkA%oNl2Fd3Pa@QH;Z!oCcW6*fOp!SME z`#pokHwL{w3}*isEdKMDfR+LZ7=n)DW7PwNfOgmsllZe%dGjrE=h@`Vx6Yksoj1oi zZw_P=E4ceJ7gGB`NO*l@n+>|y4syvIgoIpm2fFPpW2SWmXum4jB5gPqykHx&i4|N8 z&4l!fEK;X|PE<&nVg*Cc)!J#GJ*~*q5QGh?j8Z0nvN>p#0+<2L?~tAmybgjeKs8Z9 zA83*&4s^N#q|7ml?}b-Da0axng4IWmwF+<&-Y0^zz#$}ZO@!GSg4aignh5E(R!BP> zTKj-&C`f$-se>RS^wd-Eu|HY?p!x_hG>2RtLFyfC|4MjG1g(#J%Cx{8B1laH?;Jtu zBUR60&}|Ez#i}rbTn%AmLntdcM*BU~czE|p8gYWK=D5ad#}(yep_^bkex zF+n!4jn&p!b1l+mL00uc&U!LSnqZMK$s}n4G^v1-he^@|Ncyo%ooJal5wsluykitsgX&@4g&CYY{;f;CLF&Y(m21g!J;EOSIm zGB`D3c_iE!*v%Li4M6i9Y+ek^?hL{?VyXKW%5E`~-)5+K4mwY}`6EN?XNGR@z}+m+ zk_OJB|3xqU7rgchSJzvyMq-9}tzK{Xe{ za`4pBED*_i?7#FC(5@(k86O#DeP-JHUj%#r9mC%LjQc?qitPFSI%odN@Bc5o+JgU*lm9c+|7Ixv%+T@*&iP6mTg2Gx8}eI${{ARGg# zk3_+zq)NszNXIkCB{9gxFvvtRNJlY9MS@Oa5D#IHixxC4khHH6wk+e+N@C!31sCZu z;QEM@fq@e|UI(gzV3--ifYe9I7Z~(zbL)TS*8k3=^_|1uFQ37G(7qw1|C}oSMf5;R zwMDJ|3z>tiBWKY3&!GN^LF7II`(+01>kLBA7?i(r>4R=Z)KKNgK z|9_=j|4o+sm+AShp7ozU=)Z{Pe{rAxiUI#sL;fpy{g-g~uVDUPP~$&`?0;s7|6Fqa z8AQG_FrQ&y*v7z63UZ9Z5;3b|A};^c!u~5I{MXL>Z(Q}?yzakw^M8w$|3=mSwM+i1 zr2UtS{?Ft6pTX`wv-W=m70?kz9Gd@G_5O34{MU^*W}0x$GIyRu&RomvIaWDyt#W5u z<<5pJ)t+OQJqJ|vWY2`uN7mWXZ9v_nSs=lz*^rtDPC{-Mf{>6yrj-1}exADRQ#N8sje zAG{7Si0eVChIAp#X7Euz$dwVK0zxApeIf{{8`i25+5)eGAY5=w1nL=S1%vKsg^-xz za*#8Bkn1C8O$5H5NDI1+s8TDS5>g*QY9B~N1F4TRe99p8krucbf|HN|Iyecbh9EVO zibs*Md!e#>p{i%GvU?$9*E2GSwN}Cu1RXF{vK(^wA%uikENiwI__$p&q!y7iOay-Hp+y#G+qF&h9GJ*li>$ej6BMCj z)_kk%1(5ku$XY&2h)B*ttLz1s$Tn{=8gj^63`b5#r)()>N1>v7ft*X8ltY%3O{$bls+@I#m~jM` zx*LO-DFc@osF)IoHO$z^P;!$Y{|ZCNU54r>4An0g8s0HcW5C!=Szu!#q%(C3y0` zq2e~K0Tmz(%swBtWR(|?B2zYLAvSmykf-uz!^)qk_kbB;N(RMj2GCJIQ4AuXpbL-0B0-%a zP|pZlCCMa!Y9)n42E`=Mx&`SN29;!PgM2~L0&e{*W;q`Q79-Hi5tA6WKH_3vDdb7`3|? zq^lTsiy1ic8CVM$xXKu~${EC4q$mFfI00H<&vX30%!&WP`~U0i|F5>=zv=4#sWJDRZ$Dj$>9vrUm3WbFtA+$A)&(zQl}a84>LGlXGnX(Q}au3 z+JDtm|Han+m)`VW?7)ASWB)}?|Cc!bU*Z~QYXIc@D8)`+tdP{~2;# zGMIj6F!?5C`CifGms#p+tIYL~*&>^qSzrX3A+pPw4?^ILLXhglK5Mo;xB>!IH{ey- z5D^Fo=@vnD20>eu=`%p5_kb@Of}CXvAt5bK2piHCg^*UM(=1b_TBS{aR7r3WUK2qW z=%XC4&XIA#M0i&SUI)P$pj+@DqjN~>x8cwn0aZwAwktqgB=GV%NKFJG z;q?)yNX0COx_?f^bm z7>#rUpMH-_x)d!(pyFkaYsCnXke(*wPBF;oC^mWXt#amCWX(2Cod!9}*C?(BlpwKmK3f#D9i}y$p7X848~;wEbi#d&yAzjG^P5_@4jL zS3y_4GHm?Maq>U^ssD_7{~Ug3#dL)$O0XODiX>d9D=BiAVYSLE|Ov@gGwrsdIqCfI)h>ovqn0bb{4B<3WIVS zgQzp8UkNJhMHrZPK$Q^_4|oa*TF`?jaQV+49@u@EJN_?&%OggopPXL*d7S=p+W+UX z0M$nvYX7-3K%0ytOuHJ^7>pzqCe?~9R;268t zf1%+2qGA8#WB)5B{?|(Xsh9oNEaRD4#5GQXnG6gu3=9UK@izu#1_mYYh@1!m=*nPj zu#?aTP<_Ouo?+5B;T3;0_Xk z2jbxMku6vRLPF{wNPPr8#%DUD#(|Tdd+yRef{^+M`D|^-cnG9Ef>%Ru2D~c-XCPNe z@bz+#`Up}18N;r_gL94HLv!GTSo$%&AcR``=tOluY9gJe4mg5O66r>Qj#fafjL_;M z?eJFZaL}R!cy9=6eFUk7Acy=QpMa_ZK4}5Ip9oY9K`JBv8V%I?NYlRpGHnE@iQq$U zkR=Mp^^vA`sit?S26(CnxekJl&Z&45!RsSPtpqKN!H2X%N@GkCQa+=Rpfv{Idr2Uj zBTN#xqeLVdQ6+(w)xql{m$H@cD#;OiJu##J2VKHdvc$GziGA@B+rmY5MT;RMsD>(B z1Yz43FR?3HY*)0{z8ItoqQRq%0t5H=KSKi;HT`Gk{LZ`{bi@$L z;r|R9|Fa+Z&v)cM>+b);C;!V_0-gB)I-&0Df02v-6)%IXixr;xpCRKFgW)p~sO|iOhB_ z42BI1>SYY_>EM-h0iYQokx&M)aL{HV$tVV?Xa?Ch2E`Nxm2?J;37&5!Bc;+Hp>flV7UY{^+#WH6equ=b&s_eWr{OYLosmcpcFW*=`np$S!xDP4*lIFtW~>3PRa4twDz*%(Kp% zXANGD4HRAL2e^52A{=k6x#C-siICTCL#iRjh#Z83^o$^E2nnf=KwDr!nxIt@cs>ZTPXwuqAg3#62GnW= z)`~5^~*!N7Y)->U9vdXZ1P=f-Kd5usy5SxmB%& zEa8AGCWKT>_QlIQikG?MFLKG5=ae%{{=7oXNWz? zAXv+wv0E|m3#dM7{?E`3UjH!hKkLT-;@AGO9|Nr?XFK>`^z?tB6QD!9Irf3Bc@;Ph zIuuCk@_)Gt|E13Sw>>i-Pxe;D+hF<5_Q2>Zkk{~J`=+n-~QoyNe?$iNWE zzz!O46Ev+9^JtOv@0R!P6m_jm$&B7#nK$g9)^x zfz!pR{gBLp|8k4|>&*YJIOV@=^?#9~|Dx&txnuuxh5zRa`!5vrUo`eVj}K_28N2;| z78_`N#BB40!T2?U#$^VX;|z-18KgEam@d%Gxa?SV(mZFsebE-X!p)9FE1im0xs}fI zty$`mGm(K)4_w#qfGbB%FbP?R&H`JA&IBH~V+M`XF@WnM24@C=5C-8~5aQ1TRY`m~ z47}M4JQ)n!=?pAcpdORndIpoN3>q82Xgh=M9tPv1T#lCoLmtV*zm?DXZ&3eVr|Z9N z_kY#y|4JSIh1&iLwESmj1tEqu@T$umaAnjFUJkJYbaXu9j{nTNKzpmX!IcsB>Hj=u zL5TnSe}N02%1Gz}XyK*k`Tw$K|BD^_uQBC6L-1LH$URoc#~|lvL&n@dl~K-AP-T=o z(=vSy2tlt~1uafMu7e=`AV|*$vpzBh_k$qy5egf;aTQ)GAy-DAOYxGYSf)%xu8Ghp zAP5&!2Z47ILDtJbDji5ogt(qHt{)w+XVhD7RXjB`71Pz8@>kUEb z9S8}pkKj{9@cKwAtVJuhNeguRDRg--yiWvSKx!g530lnyzMfS(pcYaMY5G+o*F>6r z)f&E_8%`k`S2e(uk%~_l zIF+q*tz6?!z7oDI7jjrDWWhOPA1?f~A_y09dLHN?zRGoOXs8NwupUSXl>w=J&`6J( z^=Kj>Zsi(}s)k5XIajVlMh<1G>`Is0mMnvImvR?aX3sNCpJkjn!#H)CVe%Bi z#D4wwUftMkjfghQphoq8dJX?N74IrFk3uDvTp{}mG3Oj^mo|QnE{2c|tns@UVoot9 zU0^7A#?bVIdEPmNbC6PV+!L zBZiIty>9$x=z7h-zLJ4^ErZKh-ojs?nyC9fLpQjjn*N`48|dP(e_CL?|{|pKL8BFgmX#Z!>2c6*wV#r=% zVA;vQ+sm!COw6`Z*s6`iFq_3No7J$8SwEjat$;xxi$OM%K{6F|vmU1}1D_j%fCp#= zop1o6N|K3XP)uY{PG(R{2lb4UGC-t44oH_sDCqQWMs)_zfEe-xBHZBdI`Etkv|3^Z zFLE&TYu(4Y{y)pI|E%Dn*SWX-=i2(8ck6$iE&ql0fY%4^|F3ZAzuwaS4Ax5-R8}(R zY-G^c#Gt>G!F&q{sV!lUo5P?nmqB?3gHfw#)?U!j#APep$`-kmEpjef;#{`Gu6UtK z<#N~j2@Je?Aj_H9z%>!1e*~$In8E!cMvyE5bAXhw+H*?=GYHp%kXS2&P$Pq20|Q4n z15+UbLkx|hp=`+v}q=rHpsWXG!@&KuWAQcd_ACxlHDs`G!@)U?5X!j6!<+fGQ z6sx2u773Fe$UJ@`1et>SK@bvN`(Ukr(CQ#aw+Q)S2>qyTC;~56(2ME-BY0)hid-KV zg0E_Y*FoCht&o}sUJXGQ&<;^(3#c+8u|5KIk-+s4q#uMrg6|}P^o$^tks7!fQt>H+ z)JO0t2~zt&>LW-U1eqIB@hE~Yz{?dtlSIfpBL$aS2vT&-Q*g$aM;&3j-k`2RL9Q;acFbkZuTksX3$?f{^gq2a^Hm zzo1t}m7snNM9LL*FCnOkf%S&a*qHT^Th&^3r25FE3Z+VNs#s%Rw!*Fyv|Pj1ViR+hPn@ITmDO*{LgU#R3C|6{m*$4Jmj(rJU}-Oe7?|BSOa(h zxE`4fokHR{{@-x_e+IpN28Q{J3Wpey-$>Vki>ZEak<|-20)lHds6Jvj0O}tJ9Qn_^ z`#;09FQ8*0rh#l>-48k|Pw32l;q(6m&iv;%`=9;Hf2pJYRnGr+Tn?_1)Il3aSxo*j z=>2EXf5M=8OU!ViuEz~ftG6UZu`%* z=fC*L|MuJdGlcGEFx^yHG^b_kntjwz~dIFx8*87829~Gp9MnFGyluZ{?9-8 zKUd#>(4p&)cU|1zI2RodX@m!GG+(&hq~ZdB=?s)>x!%f~-}5oFE5TlK`)ekQc0? zv2D_4Sf@=#Mvz${$VxfL=p2NER5!>Zygsr_0bM@?U78E7j7$=t)sRKPBuF&`Cz0zQ z2pdw-!23ZE21F2z#9kSJ`$u}v%BVv(vK0w|wo*dsAjnQ42wNutv<(D8LaHGM39pQF zLR&Pz=K~=3k05;_I0^3-K~^k4CXC?SA`RavoI|DlmM@MfDHE`S4I#vyncZ*AQcU+iV3a^&Oob=oGMpC zDkEePQXe^0uZ1IsF5>GW%pMb{>H&|-L8RQP*MYi4um$aKlU*wi19zaYyUNw}Wh)?l zhuk;@S^H;^GuJ$O4rC3Uaq2Yv#7PDTeFh1AkiL>maD!$*t*m#sv}Y-|T`_}BB7;gA zvsDd;_e$P?bqrCbI4WK+ZT>HD5;Tb;c<#U0+5c>N!NYOOK>LXp)`4!65IhMwM+kIe z>taw916tuQ7c@s8dG^0d>~#jlY6iijf^N4!86W_sU-j~Tr3;|TVMWgTS3CWmX*p<^i^1TVurX-Y zC%+MB)-CycxJWL6=8>?tXP- z;B;c(_GIAqWe^Eu5DSJ)CxO@6$;C0q#xh7Ig3bWviDY06Vqi#NV8~)%nZqcwidF0+ zm-IPK`6~iyw}rKzi0VG$(Rn6d@Jd+i5(CE;28IqfpIfq9{tK@H_5K((|A*Z94!J{$ zaSN!Mz_j~6-|_z*H~up;-(fJ_!r*#@-Tf$|<9-I$Jq!-pp*4}-VsL%bWSY4X(s*+z zUg8QGY6dqy@>jZ)tP3hzz#wc39**Mz&l#~ZFtCEg48he8D!~j^CJA2n?ajcS&Y&@e z!F)AC@Nv+&e>o3%T7U6O`!Bxmzu>a}A}jxME%?tg1KiA*3hn|=0Z#)>MIxtx+a%M$ zyUeD7Rl+Ny3E=34D4qdY*}||I6bP(4|8wmF9e&Gy3Uss-AEa}19@P2eyZm493V0_q z_%si})1Xaiy6gTilpeK8U1*oN6kZeALYB`V%@D!6JZQZh(1LAn#|Kigz;73_NSy{< zlK`FxLau}0Y_sGkklP<1B=Tx)2-_@ik||PG2*QR`KoAmA?|{g7(54K-*lx&?KFB2e zPzCsi9E1VC9S>3sq1@0K(F(aH4{|3FqymDkSb*#*f{)0dty|Cu1r5+aPT|%52QX)Myia^>LX?MLPghn z2!fuC=9;GnLnPHl_Ru2@AqVY3j(>-e@cPIWTrt@fE<&!79Euh@qSi`~wFI^$%WR65 z+LkOsL&)7FNPUDxLW*TH5?;{|!+=y?pn<+h#F2Vft0d>DHI7IKq7hVCRIEiIVVAbU z`$v%K28|05gphD4$kr+d2{F^9d<}?ExyGdedZiR}P!T+#=3Kf0w0fj)31|Uc;UX*W z0z6Attz?=y*(3#2E9u5|8%MS4hc#=4H!BC$O8YenyHzu2W-}kmuWNpGt2}RVw*u1*9x2iE%s*E3o4pG%k%m{QcP?9a~}FH zaPmL@>Hl2E{)=4%t-E8|@L%}gf5DUg`HuaUKk{ELXEe z&|RY9HZLV?Ux_*HVA5T}pp?fTmIOM8n^99vM9N@DT*c}+y>_JBZakw#X zdob{Lf-WoP^#e`xOT{tB#DH#(6^~~Sj%E;wXOK!~SLhPdnj)orSk>&Dn)xeDtGAl= zU$h*)YB>ByH1-d;YWR{qJ?_zwFxo3^uzN%(gPv>|!w8%wV;V!F(Nq)_exF*$nziAoY=X_HM|y zw{y{AXV8+HrJ%lW?sE6?jlty$8N?kybr5LrIb<0FJC6E@0n|C-G80wulX95hlyShK z@vBSkf0McY9ajF=Ujag53;y#?|Ia)FgczrTqih|DG6NJr3=nZh^#qZf0*YTeO z)knr#{xj5_vrb!No3X?;Yc6Aoqsg zwUUN^jk;eoq&|XFMw))rYTgx)Dhb2@-$?}N6G3Vs$nidqQ`aDs5#*>J_>wvN^^qKG zfDY0dLL=pzvf*o_?Q-Yg>L}U3DkeAs(p|F7UjSOkmcIZ}J;6z|x(UvOtZK6|^6%%|V1H5j6tac!tgfBIR58NR$Abll=%GD0AY71H)A=N&R9+PY3 zDm3I)wc4!;TARSstbx}-5E1AY9k_D@5rLB~70~_~yh?)fl0eP_FS>xNc!$i{*%d9e zhOJ$<$eL}EHXSlLVUaSyIKEpys#_<#Lp`KcEx1M_uuj+}6ST%mEtA1!ra;V2&ei`l zPlFad@tyn6a_~RHjGv&nfF+;{EI7~n7rF7D?-FP-TIlkB#f$%W_JH>st@!V9?mt7y zaR!D829?zeiMIsWz{S)wa0hDse}*NXsRV|7|7C9dXE+X8?=5>4w3~^!|GymgCPALV z|M|}R=RN(O;S30Q?D@}NGl{`wzI4@HhGn362hJ1!dC&Y8tN+hn^F&bZKQH)ZI$nLy z(QW*O|9SO5_inSRJY?WK#lT(7z*54%70AHh59$Z;crkE$GjMr<&P`=^Wni;oV6kCf zb^z_%VY3I_aK!7zARfRV70DnU#~_i+AeIOk$l|MI;GNGPwt_+QIbwk3MMGwxG#>2BrTDT<00M&Wld?Z?qP4t2@J%{|uW!)eysu z|BO39US`_!pJ^ZHDiEQg|CO%(XGp%rpf#7lauWzyuV=7Y2bwtojnpk+P@BPO)@hlw z*R^P=OW_i?q6Kb63tWpAx)v|AE}rL7vplqV4ug_2c%cF>Xm1pBxdkT!XcUp3fx#Gb zE0b6@gLw}__$rq2Q(S%DMQ8r!S@>UQ`G1z>;12IXaOZR;c-6r)aQA0AxE?_xXM)=j zGr-Cq6%(Z21nC4#0r!s}6Gr{u7>6vYn+dMS7J;?^Fm3|ftH^QiKhMeke5XMpbpjVb zM@R`?23=eNx=ZUSm~r_(_l5u5CqYPm+kb|JGgfJHZ8H}@&Xj}HK9GJ8WE2ik<3LEr z$^=LXhmer@AV|FfAtAL7gbiAj0PY8Y?{}DF3^^Kw1%$>nN$3Mz!-}*Z0a^{k z^}wqhc#VUH0qGFINk|<8A>lQVVMHqg8HBeWBgmzA@Ub{ZO$4173PYJ8f>cJJW!vC$ z#%0{b?kn1L7 zHoO`_X4rx&BS?K@i%})P>n3D|ZQ&Bz!X?&4AOxwKAoUTXhJujr`UsiffV#^G!i7{x z@O5`^2BdC+bKwk#IJ`c>$^aGqm7vN9R3(8=I)cwpKZF%`#>}+KpCe zlPppum?cgE9f2O*q7w-|s4BEc+Ovm2uaH5xfx&Vb!<4U@r$LKydCq_e5Qe>=o)N>^ z|4c{!^IiZ~Mi)Sn(fk+wvu*`(0>Nq2@FCrjpLsQw*MFH22UbDrj{0h zDkJ9Y|AjAtZbxF+4=S!TZv1Cxd%~bNiJ{@6$m##`*FYOmK=skl|9rRpvs9d8VDMqk z>XoU!$FSr-=w$EXpatPl&Howfp7Ck_=hpboq6WGnmERC_dYiDxe-6$64AM6k825oH zBZeFXW-kUNPiQ}g-5pduLk{s~wPj$oW?--dAr@OueI(+`t{lUtlFXo(0IH9K<3RP1 zP$PrDA_j?73}QD}6do}t|727D%b*H6Kmc?BgyDaFLuh@(qYt`Nh+FSJlfqL5mXi#H zFKo7hY9ax^k4PHf4#;38SFQKR;}A?Ww6}{8bec^1?m|Y zEMicZ#%0}SowMJyXsJ{FB99Ufaw%HiRzY~Mx(2x# zf{e&P>L5sk1z}GIS6lFI(G>9L9HeIiAtCdLQ^5w#gpSu?t&fDRfT|>ZNKJI*zrZz6 zUy18H=-?^tlmGR${%5E?X_YqDI(?o+`Ya3ZT@R3|#}YOx1mQyJ9n+MlXb5sD3uMd< zLPDp35+^~quznC)b%TowIabanegblRgfa~Tu8$xqw2`YJ{pe0KF62Z_2npXk1ZTk4 zDu7SCM5%@#Rg!*Kvwj%pc09;23y`@Xc((|$oEvmXCwK*&Cg_sZM)=$iygq`Q;S8;V zAh)%GFK>m^L=aNluNsap>mv>C3P^nfuY=&dAr-F@&@t3Vm5~y78xg!`1h0u;l~KMT zc)0?+K9YCIg4aioY6wokZW5Vmn====4nk&I=gvbzkctU3a0Ko;LApzD5>ifLl92ic zlZ4btcu3^B37PFsvK+FI4_@g&7!W~Z5+d$girAcswLZcWgbW5kY6nyjR3BkhNst;1 zjRakE3*LnU8Zbn?S{GgifuvxaGDtTKVhD15Ub5~1ZR+Pc`d{+Gf7!183=S{&wElCegLaY%>wr%321W1@ z=z?(!JUtA86B&3nGl=eHka)x(|B^xR7mFI`fNpLb&}|69#{b1lKnQX>D!cK2Zu9@r z`u`a?9x>RQ^#k|#7&e3U2!ldtH>hjLya%*$o_qg)-h==74*!?E^xyNue}GeQ{;2<3(2pd@(K3E5-caSHHAZzL%D;Xw%NARY>>Z3*fL8nV?0PV45*$+Ae zm-FO*&a>c)v@ZYWx(G^LTvz^cUHZ>+Lz#(2$=!t2SG?k1q3G{tK(qH-s**s*N>@SZAY>9!8L9bJse#upX!usC zc~^kO=)g6RhIfU!S2^-}IryqMNLL7%gmj1?B)BqyuA>93TX4-&a><1wMdutvmn=Af z?C6J+@{U=^^${8yQYG2u%tfn=;9Pip1gV}NB)nF_WLW1f!mNzYM6lLMkg^vm37HW_ zu6nStG3z79ay~Q?QXe4;Lh2)Ilx6!d+dk)GtB=7niX0CT6f33 z=fBv*zYKY|_*(z-wSw1QO#l~7v%#Zzi$O=HDqZ=naPdFyK2Yf-cK5%;gnJB30Sru* z;6nzi7!+a{q82jrUSY`Ez+hIxte(QaV8s)=E*X3)pTP0|d`JIFUHmWI0qP$K=z!`Y z4#oe%I{$gJ{xb-FXW+QQzm1g07R`@?zk22O}3y zp9nOCWDlB1Vs~KR_h66;231Bf;S2(?AjI1Xt&ha_GDtoK)kpF_m{tF=Yy5}QN1~?x zCCxxc$n?LE>3>es|6FGOmCgV2Dg9xPTCLRv+Ug6rsF(G?f40LQ#C-&`l0oPgXeNbW zBWNItulF;9&n5<=^`Lv?b(b@!F9g*`S_>J}W-zEu*Ns}_T(H9_Z;pNTOvk+WcG+`` zvlm(yuQV>+Vqbs2sr!QGr0d~}?q_WN;I;9;`?~+SOaJRE`OmlLKie#Dm_xRMfv5IB zCpgUocZ(q8Y;ZLM>G(_s*By{*2r>i*sf-|9BshC2I5sDOV-_+lHyJ!qHxb+m?gx|b zeiKA$8n`L46122~W&eMU!=ULx-n0L?&;RGY{GadYf8Hzqxvzo{-!;(Y0=~=txz2-5 zsbW9(YAh!=dDkI1OIq>2H zILNb2vie!m4iswLELDS5v1yYlaLj3kVOj$E})?~Mc9f3Wu*EDR2jkQ zBL(M75CWetqu`V+@0bNakn=ntBwBrBn={uYdk*~U9mokh5E5P`LFynIq(yoT`3rC% z$ZjU<{6(OO3F$-#t0X7bITaAE!B0MSDqP}}zrZPffn)w` zyPO%;ki+`YXIQ09gRH^RkMGrw@70d&(~j*^kL*#8?2(xH$p6BB^_!rh5|~c>X9aJl z=RO0vAC~{>e~x|s8D{(e)kiBpE9>}=|7V;3pP}(HLkoD1Qrmyf*2rG)pwMzq1tfR_ z)E)+P8CL(dI``l3=znhAL{JZiPmzH~UdldJ-6spY%|{bVN-{9;b8x9~7Vit%^I!7P zf1%U=1&{uhKL1~$@;`(5D>1GAd`kZX6#h$V|L2hZ#=v%$fn_NJ^AZN;RtAP@28L(` zhFAuccn0<)2F^Ip^-p{W415WoiiST1w7h}aA2fW&>dL_C2wHT<=L*G2~MeGF3fSQVbIDE;72`@^a6pI7I1?zX=G5 zn*L{y|H;6$n%Dk;$=3h&2mVX!`!99izt+zGiW~mRF8$9j;U8ngTZZHt48A8A>~}Gk zYycs{^$fbJ8MKx%D9>Y1nZ=+mjX|Q3L9JgPVXaiwc8>II66MDP>uyN2-skFmFER5k z-@^YQ%l~sM|IfA*JRk*GG`yJ4mersc0aJy?c?WB*@}PNMC6Zcz;F zFMsC0&V~Pam;P&C{;z-izurv{(!Kp(@6LbSTmKF3{g=G{U;5^M>C2$yD+W9NGc=vm zOIc`?vD7SehAF5@nhNO(K?dUBl@Vm{%_4blWGDFm2#-`J(+d=gaXn6k$XeOHH?0)U zKSTLThN3SFwf{k=5xiM)2559n^x}W1hu~!lOF^^wE;s%&WS#=exwBa@FpEiO+fChd zBe-H3sCr`103Svz2j0;m!Qj|qx%i#X>HqvE{)?RWFM9GnW72;HtGD9X|GDKs^^v6J ze+Kb)3=EeT80IrDECAO>jSQ^m4D6WKVi_PLk;)*J%pep7UIgv~ zTHyd1zHB-ULUc@KSr*P#0)_7k+=n@KH@k1&ujdj z*XTdD!GBiO{|vm(8Q7LHu&-b!ek42ZKSS?3hT3}!8CMx1jxu;|XRuktV6>dUd<}!` z4p4n$yAxC&X)I$A7h<>7nC-_lEO+m@fLG zu<*aa!vA~=|MSd8>g>z~jh8Y&dP9pql@Ze-a6JX7iQp>dplX5FN01&6q$YxpkeUe6 zZ-NZdO$HCWLdNC#VfE2u@B%!D7DzQT9XuX559A)kh5y+Xfe>hM=A!=$OaFn;lJCsR ze=*E{!!YkX!@{==i{CRW|G==~Bh&idT-*P%90ILo5jg`&OD22&Gjv?mOIfIwvJg@q z!7Cs%2BbcM)I<;xQZYf+znLaXgfL9vCqQ~V5H7N~F?dxrXo*5R=n@&oMGv6kwc|h+ zul1Ru_0X{2np&C!MZ|Fc0iSyUxk`)mAYTGiZ_U%=3AxWU7_k#uIg2; zhQv_wECcn42-Qc()ewAhE2J_)o+nar$yIdDfsl|YN!d9IQVqdLw8}`qAste7VI?uE zCrlB@m>wDluaeLhxc1&c*6Be=%uzmD=wb=bYG?RXT*yLMT(uHh9L|7OPw+uMcpZev zK^Yz{;aQ;b&mpIvn`O<0>|HWVpJA9dTRFH_IC-&Q<3W`bZyb;QSG)dS=`yH3 zVmtGn<@|pR@PHogng7yfL1)d$9Q@BP^$SDMBZkJ042|CyI{z^&293^1-1;wZ>p$qg zy3L@gUFEL*S6cg@!E`YLgA)UTB?F6!g;!Sfgf%jTzTkCYA`GnjppyhZdrmnSq!JCP z4l=C#&$17+jg({8e@0Kxh1ycO|9KVub1MJm*Zj{S^%!))0s8_5_5}<)6B)QCFbK4O zkZ=uyNG*e86$nXHGDuZ2$doWh6flUUFbKpm2!u26hk^FV@&tf-OngBgE^i0}dk6z} z1Os0?gK!T6-#P}#oeZ)!85Qp`Dt}>B|G};STE!<~^k2dZbYX;$`G0<65E3@}&#LmD zLF6n0*I@?M`3wxx8Q3PWYj0yP*v6o~mO*7LgT_V%wT;Z0TNw1WF`DdVu-wI9e}KX5 z0E72Y2H(RB-Uk_ik29p-W~h28-1|pj!GF1B|G5`}5Zh8vR|qr>1Rr^u4;}}Dbav)| z+4DhF0JJx>0L-2b?iN8*LdNJI9Vp0RImp`k8L$cka$nUHFc(riLAphdiV4DosDV^c z5D|!hu#?`oR{j@Q|6g|4K-Gq~cuxsgE>#t5m(pRXoc;$g4yZd|NBzfCb3GK9JEl2&v)$?HQ?g z6so!xsJa&@!^YwuT_MQa5Tq-FT>Buim7Q{wopRvSkdjj-W=*8vkdD7T!dgio3u0DJ zcos!~cQY-t&Ru|6D?#R=D5;O&^$souW_<*&n2;He+5~w&6uh?uVIYx9ol3AEhzZCf zq<%poAqqgJ)qz*>Ih3!o2k(A`ETw}hhF3#y269aVua6+U1N$9%=`(2cBlvUA z{wp2-uYBS^?;_A17S5&r#W(+#Is0Gl{(q@E{}~Sb2hD5m1nq3)J^G(*;(vy!7X|^d z83bb(7!();4H-lX7+4j-eIg-7c0o{;#3&3p5<#=TwEh^wd{BKPeHOfE+#XaPiKzb< zRQWHg`=3YcKeOZ`29Apie9IYxR!spex$tYC+dh$dxfj z7c)p^Fo>ryh(t38MS^ZEhg=aW7|OsK!oU-ZQXlcIh1W+6il5;1k&wZE5u^Wn#vsIR z{GVGNbhkJI&r1f5gA5Ei7#L=P=CD}$8Mvl0Xs!nx2c)!`L3SB~)KUiNWekcd7!0;E zc%R{mxyzaVQnLQ1TJL|8+5e3f{Wn?sUuy~I$|tdT|3&A5_NB5TS0#`R&TLo(1gUW# z_0c?V?E|Taz#S#V1^+?Uc0lSZWJBQf5xm}kbZ{Uf^1vNL!Ax)^1=)o(9W)BZvfw}0 zvj410!2^S{Kzz6)OXEqv*}=#}r{m;Os%`!9VRvg(7^@;4q!Ul}iYp+E15{Iq+L6K)vJdnDd+ zLO*qpPSSix?E~413Lzo=AUFvbcEcngl@6SQ^p8-s3xRt^$g2?`^$uEn1gVDLq+x6) z_~On^(84*?%E%xRaJkUkNpGV&~gteS%?PJqt`!5NSxb7~%i5Cj>5 z!&)CfY9-`42+}7~bk0$5%2sj&BaC4>NF9VsLgpHnkC1;g=Rc z81RY-R7mE{hmi0IDU6QNBFOk2au*5C1{cN9J_3F=*7Xu_K}g*M-w$P13c4yCQrbgE zGU_9E)dR0(K-CatD0;|IN<- zm%j*FH3zzBV(WkIl;WT>Gzb{lEUB|8lp%%f)wrX0(}i|Chb`U;G$&aaaF; z_7(pncY#Q@^fe5EVc@Ic#BTrq=e5rb4ZgG4HWcpPZXNH_}AT@nZZb(Ahz)1)B{=! z9%lpJatJ*V5K`$tDjN968>D`LkdPV&JVeH@08~dXE(DYC`Uo<+1X)K1U&a6#g@cgr z%|w&IYq=rS5TpW{3#$7-_1Z$vgcQT#|4hsNGq3v3vKn+iAH(c_0<-_{&-$&^{(?8_ zG=u8`2ID>k{Zi+R-Zwb2iym9q?>B;@fp1YU+jLc{ei`-G-pzHBYOyysFyr zs(kayvQ01Y*FMc%{kUM|liXzw6BgbLo^rvd^RQm&daJa#W=S(GlBd9{9%Kgcv89lc zOCd8s5E9bmfv(Mt?}gtk1gVc8vdA?NgblBB^kci><8qM6AV`-7%x;I*N60-OI2&B= zK+aMCT>t^8kC0FGF$iyiEZBySdZ8_F1UW(hPC_;cK}bkN10lg(p+>F11~96FFIRxb zs>Aw3kSQVPBoVAz1nLmMY9igj7b5g$nTHa|+Ix@=h7ZNX{`G8OhkEVy%hr2qITZkP#usng_I632R3Q za^el7GD0RH4R}bI39pZk8Q{7JS|34bB}iclua5{a;H5OY>cPrDtB-6;mO%=6Y~&KG z{UeA8$RxZ%L1w_uKS%D1!RsJo23!|JCD!@~;yZ{V;Uk$425h$%=x`@+FhWn*aLk+U zl)u0wZ?PNb)~D6>g{x(A*GgutWJp}ekiMRw?T*M=&?T)xNB*<#1>MBQeH?V5fY6Em z66gNQUidF@8nnn6R8?&T^=>3C{O3Ii9!i=5T9wDX;=kJQ|GMY?XP^1+zu~KnXC7z= z4FfaygaH;XDWqZ^DBSYQaM6Fhga1WO{O4c$pP~2#pVfZ`o&O9H|2b6ti|YPoU^vad zFp)uQ3!BX)cH7I`R!12$HZv$JV33~8AlDB<3SA5eT?~qy3<@0#3JnZ$bqw+)3^IkF z%1Af?)I}1E098i(;7KLkKyYOg&cNNwAk@Xcvx-4t8-v^}X65?~@}HU2ez9u&=h6jL zMnd3CPkaX8%830T1LJ-MhCSfxStm0vv@kHVGjR0?Yp#~_J#Ul!)~fBl$CUr3^Z(l{ z`>(P5zv6=beDnVE&i&60MxYzpA(acH?tqYxE)RSl4pK2emLfnD&j!y2!RsTiN@!)Y z5Y#yW)gzE93^K3=uZ$qWbdWJO__Dgm;FTwHz!N}_j@bPFObh>Wt^kd~32gk&zu`aI zDo}j{I#ze;UxrzK*;f2#Xnw-rx{N`gk%7N}L9UX)pqs(0hu34CX5@0Sv@MP`hy1%P zgipB?J>y!!>}zQY@8m4Km$&>u#l}~SyFT}y{NH!tU)`<`RomW_ZFy6;{(145=S8cZ zr7eFPH~+Rr|4H4Fb>_*lFzX{kKM3gzZFn67uYlkTNY#T}AAvRv#q~fcAhg8^`mxyW zT?JP|pbn89=rlRVxu&2_5ac``w0j@G6%eRCLhb>PQ6GWF;y|ml(dr{`byKeq*q|9u zuNeSc$PJkr0!x7g=pcO}NYw)&A+-{;Lj+#Jtp;8-2U@}nz8C^t8G*LAg0IGd49!7$ zMv#6Ga!mwb^kwBeDfBzR{EULQdikW~{-g^LIx$N~xYT0X4x5vCxd zkcSlb4n<2LTri2{m_EniCD>GAuhbxQ7QDKFFpz5^csB|n1@8?ZGr%qZnGbgx+yM{< zWS24AU-0{$Tnm@E7A}K?hGXdp+rlNzg)3Z(S2-3hvCN*MoxDgZX`yiGHp|&}w0Hft zJp)=91*&nj|7Tba-aoqfC?Hv;W2G{xkT377}nMfiAX_H2*KC zzJ-CImVv86(PtM^{7Z&_TMQ0o7!3C^D6L_TUdo_29kfngr?!v3r9~Y~XMMk=&jPyxt%L>Kwu9Bii5SLUj>OqFlgTHZ^~{O_{!zw565%4Js>R9hH$(;37Id96D6th+=#x-?_v*%z+$ zuH6~dzAvu-Sn`yUsnbp;O*@%B`&{Py%Q=g#Wi7s%GWUGU^pkNjPp2)unzj69-pbo~ zD{tp4znQcAX7=LiS&ObE&N=5)yB>0kwn@?iV_4M#=@UU}A7m0z>6pa!n8x**#P*oP zf~JJvH4X{`X`QxlOt*0iq8|k55W%Y+2*VI#L=N5$LaT-#=lMWLNKFK(gV0EDpQstU zr?puxw8;RB@ZZo1Ib{yg34+u+5E60^5!xsm{7?l*H3T6c{U9~iIX`eNF0mBM#ovin4@T5wy8APr)S*(k+72L=Y0KK9YCLQglEd6m}iR*GG_A2~Ii{E`qZm3@6w*dyolv89KV28>= zDhP+7rI1C{^)D=DhS_{Xz=Re1O z(4h+aCqd_#g!_8WuP zD+aE63?la!Us_{yQ%I@3iE<;)?$g^Zzr=`NuTpAH#f5 zhXu6W08-{dsu;-nw*~(h=YtTa-vOzKAS9%IK~@Rr>_B=?u={byMSt`u`lD^WlVwYdTp5WGlLCw3u z+V@3t?u+R?lrZIZ%8b()^DgBrxlywAe)X1TRa>8zZ+eow;#TIe8(B-QXDq&&y5LgM z-1CVu&&EzY88!8|Yr`h%Z9jAcA0M$g0yIBoFoAijd6c4#F z((tc=*GF2|=7u135S)bcgCI*3AXO5mPXylCid+*x>LWE+&j_+s0kbl~sEHs)p`z4B z;LZ`KK7!Oia1yODf(+J?R3FLOrl3_rXk17g1V3F0%mCd#0zLw9wsqEAtjH#N9uk@Z zuc2TJgsg4Od@ur?4gs&W;8RY>)f4LRdJFCI;0XPEa!3V)i-go-$RvTWzJjHod&~=# zBHveT59Y$fAzX+8&w?V}Vmw_%%hxO5*g5Z^P zaEsuAa1qFAa0m&Jg|HzcXyX#-I@$%cuoH0|!3W`j4sXw2;8486rF@NH{&J19c~ZG+ z%sNjPuK3`8?7!uW|ArU-^PU17%F2HBKj-=X!YBSStozRZ9;{>8{$K1Q=tw&Llc4D* ziBtcD{bw>TI503+F&btwtzgU+?%jb&3WXOQk^5Ncx(oXa4(5VXop zcr$~{K?aqR3}zP@obED&-Dk`B%Gdi}cFupPr644*{6EiP@Mt8YmVwLxL24iH0t3)` zYzP<9%>l1NK-5HVDI^AV*?G`D5%W2⁣*J}C~=OM=ZJF)acOwJ|LO_d_99 zCBjIiMgN%>|L0o{TAa_l_&@u6@N{0^Cx)4y+1CB%KJZ`i+<)0y{}t~3SGf6K`ObgM z^Zyx^{A0*^$l$gYbT5YCb_U%Q3@Y;(q*@uon;7Kl7?hhBG|Cv%N`x%3^g`Nfk~`gU zCqvHJ^Q~AN-n2EceNS}fzUaQAiBnFe&c2wp>Leb0>fK zmGt>%QsDgm0i-vCNkV!+klF`Q6M+ZiIt(Jw5pwMV+sE1pzXQTBqSYY01#;dgWQGV*AL)jH zt_X*adZ8`)VXcsw2ucPw!0RLAQ+*&65Hbm=gOEw+jd=c*S^-s>Fj5nCArYJbx|~saxK2fon7YISN6hTN8*iItM#oN$%B2b42dQlO$G6J2q0I!B1D;D6D z5qw+@vVu;*J`)+q*`^~SS(`L81UdH%jYO-Jz||1wI$$tk7UW7SYw*>LSnDH*AgoHl zS06#tz-uB1gQWThwVpz+o8Sx1;nfpz-wLx)s(&K#YQu@VX4tO)Fdi zsdSuR8sS_x!xnsX2)v628Q8QhTx6ZM*sgGydFfhhNL(~k-D3ue=3EBOS_Y;*P<>={Ny_JqV#1gV%H^$Wb41M8KX_M>KQ?l z!AY=Q#`*u5zzj46%-}IPt{Eux5$MVf$QU5Z5Kw)z>_5|r|BP!uh+!q@DkApf|Jj%S z=UV)qeF3O4VxIkw>E!9oV`B9q`enkXyoagX(pI>LDX? zkopK-2Z3(E1E25%?+`)8;vl3t?3QrI{ooK1vMd2kLMtHf^bmp#?ioS$7NOQeCE(r= zXlEq8G1Ysz`R&;}h=1^)PNPUE>GJ^DpAS)IWA)|A+>m#(92*QP&TZoGUS3on6 zWsxt&vPho=Ii43%0pTEL;~LO|b(cUD7NjOZBa!<@$ZWgZ1rTJPw-8wz+pN=k@K_%* zi9YTJ*`*7qeejd;ngUZ9M9LmE)(5E?;DY!Wn5H3%K=j(?FM?Db5H6$=gp%O9dLb1P zRJIVM<s|RTbRN`QVm|>Io#Q|IpZny0#$%uZh~y9bXGlBBpf{ht;34~>|595)n<6=n|7Si8 zI@3)3_GwA&kQ2#He0h+iG*8R^Q{g8p-D1**A$&_adRbLruzB7QTr1uO7 zcNx4-Fj#G4&|eMe7O75W(CT4OX=9K~Vqo)UU^Zl6&;@mf7!<&Jodm%v%q789k}7C_ zE|VF9PMt~B83wau3_?u|`in)vPO;a&lb8VNiSWz?54(ZK$`}`cXJ^1{(1L^o;H7bp zt`Ma5fz=Dpm2$9}2(5B~a*<^r+95R&oDE^X&7Ak23A#SvFT>2g40HZ6%mWR(aW4BW zy#Bw~rvH*#|I6(7FTLl#>^=|zF?Rk}-v3{A|9^!;pxQ@t?|)DYwHS29o#MIwYIpz3 zUimL@5rjC;gZi^P=l}DZ|IdH+Ki7%>oLm1hO#R4^dWFG#8-w{`29pI0ma|x#7PC0d zW3ZUaq*2Mh6U)F5%)k%;IxUbXh(Xw!LBxkaGMqsvfk7vO-Mm=9rBTACQ!aFZQsfl1 z_!*i>v$fM_>u1k5&Rbw!w8Xk(iCy_}*P3-M)oUH{7n!C^H%XjgoG=-(bOCa39Aw!x zWObZDY@cCVKV%;(a%BXmi6He3RuZ!sGK}hk)IOM`L1Y^Qfv1E(qjQE4?S>KUun{>> zR|s-!f>uO}Rs@Kv8`ca#I-#IyNH4TSCm4E=0_eI25VrwR>A(-%)(xuH39N(ENAQ{m z!a(i{LD=xABFI=Afa#bswRProS_9};r&Oz34Lx$$y zogg?v*}VXn4L@T6zK9OKr3f-l1SgRzBec#DuKEbMPXuQ}Dj*0cW1R{?kYl5eN#xoG z8+#_~2*jD@X&?lzl^}H!oPjWW@D?-kN;;q{GapUfBx&B#ymsfSqAk_ z3_9P1H2;g~{uk8#&m#AqLE;{R_+19?v&wlN7%IOo)c#fHfidkdL+}L#$GxES z)oL>s6eck!G%<)(Fz`n(Ft{-=XfiOUGcd@5Z>AFf?QdcbW?&FwU{C^gmrNM=oEV&@ zxOU$ah&sh!wv63#uXOB1hLXp;tzQ@>{AGb0TMa)Q4pKWT01wk4Pxrvt5Csqt(r<#W zv62vTkV%L+kU1boO}Fqr=p;JuDhRgupgs=g;{RMr{f_?G{Y+y0Ag{Li=Q zKhFvf;#~EgVG|4fY^c!KtU>Lau14B9;mO0^7P zg$(=&p!!HCn@PHaO+KGTwUk*cj!89^K{=6GJCnnrM98U5+P_CBY_fLJT;r^zHl^#F zs<*f`ZF8*M=v2Gjv1XlP)he6PrG^=^G!rIigm)AR-2lZQz;+GAjhCk09f3kP$fu38{}X!<#k3 zp*4|iSTlInDwg_4E4T?>8NnGkfpt28hy~k_nh4MFK9DL2QXfGkgEU|_5TTFD!R`%L z_AZ6oMg%3n^%0~df{>7PbnvPN&VbZMkZK4*DuTO3@M;KoEjMz#1F4VT19T7uXqXOm z1|6h-1X;QOuYDj4NbLhBA$1TkiChz5vLW>mTpY|rsgEGX#iNn%DhbS(38{b}Q&A8S zB7#XGS4=k9bI@uc2p3!*%>$3=fqF?0K{$z7A0dl?4=z{$ThxbTXD+;Af-^`h&q-BJ zd?o0RzsMmUebGCq))7?iP%saIoQf9ML(x3jf;lcl^IVGNITp-xESTw#H^(7wj&0sz ztLz1m2{Y95mPz%VvEA@q=hT1Wi=e~0g}~=LGavuYbnHLtNzk3Mpwmb9{^vXas;vah z{Wm%FpJx&HHdwVU{A&OCRH5g1vnl*#5dOd*{D58k3q#a9j)MOTmES<9;sZm$3x;PvE;;9FC985jgWdvY0+8Q7gz zRN@(;wktN>X9zyZV7Gz6XB%tGF@};`%pLC-rv71`1zuteS&as%ARtTT(CQkrIt5bu z%mc3(K<)=YCXyhskg5l+5pFm{1hP_Y(SOFJpeZ4~mH$Q7|JT~`Uvc$+jfLN&r@Rzt zzaiFnORe>aQsV{1+LMa4Cyn}Um`}cIGwX@%?59qPU)j!kYB~F<#jGbfv!3eAda5|@ zo$jh%;=BLL9s19)8?-x&VJ(>44!XCO?Kr5-AaNda1Bmn$(7>JSng0^U|Jxq;ufFQP zM&2(5vqKCb8yUD)FfcD>U|h(+y@Ww>6NBhs27!GH;>#H1*Rh(m^4aw%h0NBCUuIFT z-mPwLK<~-OX_pe`-%ML_FL%wug7r`G*FDTz`yg-K{oJ+p@>btXUve#Z_W6hjM}k^+ zfkuGSW*R0;hTMe*sgEF}K^*7|A9#HPsfOSrr0PMdcaXX8ItZBonF)f7!okQ^z3>)D zWdyn60hxrsUe31w zLgKHOupT7=ucy!$Sj%}dH578~^A?g(ACY9KZT=#Mf_e7&b0Nqke=Zo!w#lCj8nP>x z1F4Uk3g$T#%(Kl~>`=H&EqAG0>Rg7z1q@j$89Fa>ul&q?=)dS0&>_#<=l?Sw2O-|0 z|M`!DuG`}}`CsJHf6>d})1*DOG041P(Ecf*|6g41Key(82AO{h0&f{OpD~C$6fpS9 zkoXl;AC-M(DE-7x_>v*}F+;*FhQQMdPP-XQ)-q^J1RYGu7s@tJvb_VOU48GghVvjSF-(~Cm$T;Ia`#jJ( z0ni8=q?&+`^TBvDM^?ZZX)LVAVXvtbT-D zb}xh2HU^>9ASAGlLFfRp+EyO@eNtYtG$U79=WcN=-{;eEIHc!v_>>Frb8e(8x|6o# zUiz}zxoaNguDPGH`d-eOyLs#G7p{9yyx~#Vx<{pJAC|4WmofW7Xv=o{yoDx-lZ?Rq zBWN`Qsf^-!^kYDmq`<2oI0I{agk1H&*|2&C)a$`oA3-W3eeeSr9h<`Uq0>z)9pfNX9x9QU@WE<|&Aib>VEhbrYyQvPgxJkctV7gw#{8 z(YzV(nh3H?0?x%`pw&%i-6b?GWMq%X`Ut6>g6@HWS4Q|5kWoQQlB9B;DvBXX3L(8D z2ni8^kZ4vw>PIxMVfq3h0#OEG+vUx%&7BSD0omovfg?zT4poyo&jEa#IOI@qr~C!> znF}nEW@;qQ(@9?>)p*!^-UHQL|IN?**F5)M8+-w}#D)KwXaB38{;$3CKSTZ-2K9do z3g7wE{|jn?=8Pna{xhijXApeP!1aWI|0RR!TULkv3|T+fivBZ{y<;eO!;tfYA@wdp z#3csL!=StEjFy46#t0QMa5;e&D)509lf&+>L(qbtgCH2RKu5U>M2Xn8Gh|)jEWXX) zw1dH79fR*qro^)h<@Xu8J~7Pr&kV10AXN;c%7C!p)e5{$Sp>RRb{+lNcIbW0 zj;Ey?9u%*;SGeY0?uuKv%WkC2znC=TsAu&WlcWg-aXor*U2tR&*QFoZ2{}ap^AuCa zngn>IgT_FvgOJ&J;ca^1ZIHPk$YrdM$_UaAf>%cRVW36Y`ryh4JR%1=Sq@eofv#l5 zJYxa#d^zL}5xg3L_lC3sYPI}p;0SUB9vTVVIs~tbyg{8KNCgBYOO(A!zzp~p9jH&F z>{*0_iXj)aDti{g_7i~);)c{g5E4=oA(QYiI!KiS?-Ripkg5kxLhB&-0G(r&oI@rE zfjdXgkvh9nIlEMNO(biRMzAuHvP_28LAV%@x(O}|uZbWGw3-OQHBXyqmO2AcADO33 zw@3%=Z-O%{(r1{bO~obLN8wOXu0=%yr0} z=a@Gibksx6Vwb$7#(66>lIJlbEn&@B$53;eVbWcWwZFx7|7Y0)ItH9|nSD^YR z@g{@c2?odA45lj?H0Lr%S23`sfh!{|@V(xwh*}8BVg*ekG0QP9XoKn_`AjA6sT@VO zgle8JxbBA3N6bm*7^)tD>Z9rZQ0gOi6$7b&AQcUyZi3W7ka-^n8?G3x3?j1RKkF*! z2|S$Z{&TJW&$IqN-^Tx(>;5w>2aoUde_`nR2tuXT7=pJj7r($PcPF!#`ecA2&bq|X-J+0gOq2u_k_7lGw4}GoO`Mz@N z>!P*K3RXYKT=gt-)ic+{Z!M-hV3_oPeeP?9X>S=?ZZo7`VTe1;;BuM4_z;84CI->f z45GUjgm*9qTwoBp#vpK)LF_Su*gFQ%w+tfh7s^7wPTjr=56?Q6Rkc1S4N=vNH-dES^}g3f{>6& zAH67)McT;qj($Wtr0PK?;ewF4AxP~5sfi$@L3j(KCW3c=;FXb1DCp)pNR^}!(gg1l zK^TzANIR$z`FuI#`UtW_0n!_Sl77{Y$_T9vLgqs1BT%;peDSXiO$rfo-z-W?1+9Zp#DVFI|!CZ*o6!Y|{ zaAcM`4S6v+q;A3_;gu0G1G5f77J<}9kUhD$>LWC9E6`D*^DMKW2)RnaWTTYQuoZpK zLOT~R2?bF`(%>GXvVf2fjmWhOel~1*256Na+-hqK=Rpjy%UxgzM#w6$vaP@y!N9(Q z%+1Y1zK0QX(;xVbVAwe4Jgc0!HhJ@H^Fh_LZO(j1kXmQWwauAtmA%L^YoT7+9KEzT z3OP$v3RbEVZ%`^+rF%@|IcawLcIF_dG!8sYyTHC z{LgInpU?9@L-JSV^q&lQ&l$3xFeKe%2tC8#zKg+PHG{!q2IV#e(Ma&7S2+d-A!z-B zLUA#$DKN-HF(@Z8NaQexWijaXvxV(osC>ZR`GFx|KZDgu2JgLGiI*9wo-lNNW|;a9 zw4@DQp}>1L$P3{hbO04%i#G!4YC7|dSypAo!SiD%w_o_YVJ7XKHT z{aH}M&N_~!pt+4|r1*ngd^|Cu}&Fz}`@Fobg}lylhhaM<*S_;zR|&vUO@AJV=j zy6g!K>b!G_vrZ+?Ig>Q!Lh{^;S<7x^FTb6$>`u4NGqgvL*wr`ASSQuVCC$yl?H>1rZtHrOR&!>2TWkRh(O0QSmbeG&Yo<$4Yt5$f{ ztnzJG7umBbb?U*;){Ryf(+uKzjAJ^DV>*rEx{Ttwj3Mi{!Dl8IM|B!Sc3>jNqBsZ% z>FL00CFGh2d>eQhxEg}q39cX345^PGq;7a41nGn|z$+urK?>ltb2_0dTCkf~H9|lY z6Qmk~kg&=KbeIq1OgVT>q!~~PIadKv2SLW=G{H3ya=imxI|p0T4PCkbKj8LVGubU1?4N3ymlAOzlFgw`_xjnr8uNh2XCt3(Kb)IoSiNUa1{ zU=FU2VB|zdeS};YS)@%dOGB!X(xxNVM-VngI<(=O18vSR}{?DNLpGg-~83`Kym$dvZVF9X-xZVCU#J*=p{LGN~ zh#~y}L-ZvEpTnR71WcAQX!kM5HZTZX(!klUvRa&2GvLQ>#)~HkWLVUgj70^P7rL*DQJui()|HfOiW8a$G)+y{Li}b zKg$YmO|;-Y!^}SnQ-6RKiZA=my$pOqYvXH%^otBJM;OBQGeqoRh&jNJdx@dpDMR~h zhMWTo8ubjqSqv=EJc<>vfeYn>7we=hu&>zc*SIaHbyrCHzQ~@#;k^e#y7om*Iutkk zc+AwJY4gu#FTI(!;!ghR`+2MGWi7dtw%}UI;%kWuE{DxJ9Wdpn&!po)GtR0coCMv< z!LgM=_yB|a3r4Na3@U#aWWIvVpb&Zn>K2JzW0gC~t#Vqx@PLTv5m|@b%5DcW{CDXD z@6(UiU=+PsKWe2;_%d;~776!O-Gu2D*^A6FW}BwZGESRqSGY_gsD?!^f0T}7-TFNxHTA9bQw4dnY5#1?H)f^+Bg7 zfa)X2soJ396u^~{A!=m=8*PKG$A(oykfm|0kopL%GJ@AfkaY;iq+VzfXk0G5Q75bc zg3#(C_!0&6;6_Mg1nD2asv*!d5b%>u;q?)`LjcJ3t`LNT*GKT( ztSaDTa>`!i;7(8pa!sV<1?m$a*GJ%Ls1Qa%>mzVYgs6|Oc8lQsBL$aSV(KFqcs~ft zKvX~unQ*qWZ5ncYgj^FL&l5>oCqXJ8WKz;H0dmGWG6^{W9$o>VGg2m*rA&g?N9L&$ z;oT)T!wg&{nL%o$wCSL|Z>iHDB&0sVBq8+?8i`ybVX`3=lUc@0T$K^BxMenImy=EQ z9E+^E7FlyKt0A}utjL89>%mHC(Ao)PHkvYc{{p$j!Kwz?5K`H8xeH*Uj_~0}C<8K^ zhAay)99|z;f)CAr2*OEhE=1Hv@C$+=hNF>o*>mi3=Gy1XwF8AM=*$g^tl4k}K%~G8 zfI0Sg3vIIJIc3gw%A9YXH{BtBhHcg|i`0eM5qnjGwu{uK$PM{J(%H2r*dv zXR-ay5OJS5<}pLkH4utA!Qiut!EQN&;VcHtRtBju23}9_aX_-*O-9g42tlzqu^Hwv zIIm>z-OS*>mm%r|L(WB}y2oOz&l$4MGB~Yda9qLQznc{_UiXBd;}gTwf1tzMARU|~ z;A#b40WASvgs}`vE&<w9O<2>0sLox{UAtrY4kk=GnzP_?{pM#APW+jE z{{MuNzgzcxsM!3pVD;Ukd8gxNpNN`zG`MeHKN8CQ9m)+Ei$jo6-FlH-1`$055k0V;QJYaDeD(*uK7x$7!D}MO zzEn5~Ilu=}*Ffqc2pd8|Y9c5JJ<CtPTQIMv!$2NYxN@^#XED zgxoEHOch~PL%4?KApIa@61fhNu}hb>O@kmAn`8t6Z!AKoiNH(gkgFjnt3*l51PDT| zi6CsV#7Uq-;FG7I*F+Eza5^-Dk`v5QCcxP6N@}uc>SRcjWSTnJG-ax3%2Y^Ygq0++ zZbGiAkn1Kh@D-?V61fgSV?*jMNPUFAN`ll)kg^(HbfYmKvdAPv0kQ}t8={615?Vsjz?fx@tv&V%m39b{x`hy-{{bPhMsQ>A?Fy3*D`2MWl-)1 z4LQoxGe{OO$X77Pl`$w)F{r0A2nRDryD%tuv*`x%n?*^x=4%Gm8pd|pWK44{S?X1} z%DZ+$c;}wPsV6e$UdUQ>EqCe7eDI`9$@+&Co1f;axSF~2QqtVB;gb%z*X&b`n!~BL zh*fKzsO}+g{Uf4A`{b;TE4m+4^Esv-vQs@|r(VQby@<8uQ47tZ78=D(HH@7Cy}d4> z(>S3MvIbi}y3-_~53{j0aOz~L|}CgXoIRAbU7U2#2!dx1mCX;K0*PyC=OB$!AWRM z6xN^>QU^Ip0WuZ`sgEG*H4&s+1eqj4tB+*Cvqg}B zIC=Og1#oW&YfS{Hk|5O(oJ3ScHp!6w5%KjA%{0^_3;S5Nv z4KWSE#zWd<&w(vFnG3FuKz&b0ePoe6(;|DOZT3v-%;~mZ22^FvRNK7i_IcCoa;Mql z&9cdvVUarDB6YrI#5UECO(M>Bgq?1)n?GmJzt3R4lfmH-L%=SEz}*Z%J3u`hm(>iG z3mEkKKv64Z)fI+pI-Dw7c-$sVSBMjB|*}L8`O#8(!^Do2f-wg9W_ueur{|~xhaQ%PQ z&HvfA{^#2EpJ&H^zU}|HHvebe@SkPvf2NiHxi^6j!_xl@Gk-Bm`O3ZZzxLJtlGpwV zp8L;r9)Wny{pUXWUt&Ax*lnitn+%q#8B|&s6q^_n3m7!Y8BFTgY&+P^x)`+U8DtU| z*t{4Rv=~4edo>t1j2I-H85F%4)FU|!lLf5v1a0yKY|F)6>ZE-;W&FCN0(#{_CaNaP zHq2RWRl32xYO8boRUZhH&K7lRQSq6i8#>!Ga;{m_JoDID=CQMkw+x6Hj3>tjOjIx>$ObiH;?NzitI3m z=rWG(wN9TJ(y*>@?#YHVH@dbx>Dclpt?xio)0UdW7bfj|*1GC?`J9u*GmaNbJ(4nE zUr56```p<^34M@y#4xhiC|kJ0Z0XxH4)5PXj?F zgW4eGz)A4(Z0z+BtX9HaAAzQdz}+J4pn4rB12hl^sfi$~6jTFiA(fF@P#wJ9L0&JX z4ZfQd(iPG`s*E)Ksv$Lzx__lQ?8G^EH3X@8K=(etj{1SrLEzd4bXzM@WmH0BWdx~` zz+|C3aWxU7K0;p04X=ja4CFdU4m>Ca+13iFh9D%o8j`X}g&;ijk(6aJ6u~BpkUK|` zmI;uW2tq>YBT#2NaT26fLavFR?4$`MN$@@qN__;bp(dF_sv$TD?jJ#FCHx&Fm#ss&_EUy z8OU`ItgZpwOAfIenS|6w(Aoh!N{Q)NNHt`ZG20B(y8+cEcxoSLg#xSDAu261W?H1r zfYkDknFZ(wYW7UC%<0x(gjOF}=S&4vM!C~$b7$J-&a}x~Y?HazC~?1j+#WgKXObQd zxvgI_=-p$`+rXf`mcecXgWCpBW#qSo!F?Tr;|f;0xeO+g85I&4_yZW&^%xiw!L<<+ z1A`C?w=9FOkBC7gL&y%Pyh{ufZx|}yGE~1}?fS_-_a8&$Ee7)r27yEd)oKpsnG9ZQ z8R8By)IMZxd&My8ALn9F$A^8>f9{?C1^53KIPjly=YNiE|G9R6>Ld2e|GBsP7vA$< z?)ZO=i~n`6fROsR|Kj`pv#$niW#!-bU-metG7`V?U-Z_0!E68dE`!kp(EYjm>;5xL z`@@z0kimT?gKi&#atni2C4*Tjt4%wDc@u+51%q%ps4`;nV&Jr7;Id*6wiVC_;?fCY zQt@Mu4`fgXV$g_Y(NE&GC=z$66Z2@1@#|6ypQ08sQz3eqavX@1jhHMG(I*|=D;3xy zm$%X+2H!fEVjoYzRxtK+b|M@^rJg*9BKB_sE@Nl{UpJX@YKar%qG{XtPgbn^9B?a(x88O9-(`NH?MrQXlDs zb-?N%$Z7<{4R_!Q2yt2hmhj5YEXe;#KRTaGc!dfvQ_n07T>x`Lb$Tn*>K?Esvv69GzKN1^M z0oi5GBNAC>%_Xw7g48C+br3QeQu{zih-2)s=h%WT1cCPrAtpdbcnt!njLZntN02at zS4p612yD1n#!PSnV3uk6EXZzXaP>6P4BQ@o1~|A1Gs>K8l0DZxYqo6$w9{mrJ<~dS zrfuc|5XzctpEK1cWr0D`JdLPT3PFnmU8gdbv@n>oGnjWVIL&47UdG_PnjvTtL)322 z>S)Ue3<~88ykX#zf(#g#Ef_d`8Tf)21acVUs~H?OF(h4PsD8=N^ns!KJ;$Q|Qq$ft zgsx$bj$_~rV9?0tckW{FoX4HKgT3Xh?5xj9%l@mZ_^+_wyUd)=V*O88YA>->o?~W1tH_}phJG-`o1w_U*_@N%wRf~L3t{JY(InO33rhM zYsqu(WGs4^w)lS1{M$+MZ^zHM5jEpN*pxHA9fxfzHXCIwQH`0d9Wz-wX0l;izd>vt zq~6hw>(`6zgY6A*Jn7KnONrByEebw+PGV9IP6Gc8)BQ;WZJ2 z0jZ3{&Ep_75rl-CY6z)`AS9$FLM9N85}15Y8rdrY9!4v>x#gfvN>f~zJ% z7B@?shFk|BvmrGUW~~ITkKi>FoPk+GfeKQL`UqNDqKc!{P*`gsyn>)RIBc`#5{V$S z1MwuJ4ubW1KvOmlHnOuJ&Vz_RWFaEfS)frhNKFJG;gu1b0k3y38IXzwR1<+ZM9>-s zYkg#qKEot!x@kJ-OnA^N61a0}lsVfd6I2;N>LcsS=@uE&EHb89rOmZUn`@If$tH7> zaq2>ov_%FXN%pf;`L18hI+DcXh5aN{H%_0U~bj`bk zfqNx`;Cf!|X`;sSbV8MZ4 zJ~EE!HjVBzjqZi){Q*}-osiig$Wc7TQJrvP5Y=KB-3l3DGl&EaokcY2MKqd#k$Gg3 zVQ`IRRI^586Xc{F-7ru!WEj~DsgFR@J&;w_;BBXnxgJP;1S3I%au9LI3IrI5xL^o& zf*feSDy&K(P#QW?oR=fG8=LMkI!TX1D$mxA0a zg3J>^NVNJ0UI#%Kk`_rI1Y5u$X_)}8jKt03AaxKXiK{*`OzcNPMoAMOA|@%5Oj0Hp zB~5@M6X>!B^hydore~5e2{F?I9^!-4L|92k1%yUgq)tNM97z_kzLKzJ+Amgkuk>4>XIpciIk%!V*5)8|;E&4z_MND!jYCVd8|w#t|ZimdcG zmZ`H8L%PL08@Md0S&T{1mC`Z4hNGiVgadrV??>0)>3VsLEb z^zIi4pP`Vv#H4VoUG;YVuA@;?FN9COn6Thh=JNYlD<0&oew?@FN$T>4X)7M3t$C5S z{BiV>=RvdY*-gG@-g8}i(tV4~|BY_^*SYjx@AQB9J^$5ae`Uxx#9-3RAXUh~mj*(t z=?q*s48k1@qFoFUtJoDbu}GZdmcPQMa+6!}8jIXPM(KTw%1hWZR&pB8<+E7C?>JG) zXO3*>B%PFn<^?O=>UX#|>kR@;iksaU@vq3j{!}bTYfx9;CkmYb-L68V!pDAR_4YE8NA_A*l zn&AxafLk+UD-eXO6WRiD{A^3fH@LP(YwGz@D;&R~4t)RmeAX|#yq)nQ%RSKjIf|J-QBcv5{ zkPeZgWt_Nq3>p$Mi-w=9i_CymPmq2SGHH;|2dRBTwnuowYV zTM)K&+H~u*>CiSt>MVgQf`cD$HZ(=ZMV{n_n(0E&Y{ePvM|D`tm*O>QT zxbuxj!waRpZ!+`#t1SC3x#_>+uK!9q{_}yBz5QpH_?Khxe};*_7`*2&NM?a5BR+oy zg)DZ1DhAbT2Av`X$5xTBS(-U(O)GZV*X(d>-tXUaG-C3(@aY$0=3L8Ib}whuqk?tM z>i2%F*!87w^V_^FA5vF8k6-aRe#L9Q#qXSFJQkgBS9;oG=9Rx#R{aKD_LOso!Fwr# zNJQ1FMflaji4LZsu$H}9Mx$W(`_2lX%OCG7y+t}AS)1H zJsr@w2l`>4r3jF<2k;6AjRC2HNUDz@OW<(TN8kks^+VY6wn3>LWGZDtHA1VL*CAYOqrj;I$9r;vonZ&W6-PXhU<#;L1qR6MDQ4 zq(cND!G|g!DkJce5NJ>iTn)h)`Eo9K^3J)C`Uq0vK&m7N8&V(1I%dHckje;o1)ZD& z%4KXJJ)e5Xmw9S|a=@DQ`VHF|OMr6Mb z!-kwa2&qjlNwd@$W~nn^^$Tb?4tn}i`cz9alr|k6@^A(u3arv+*dQUeAgHO3KGhn0 z)`w;KRNFKV($AQnpE1EGeu7S9m#}XapGPZ0)@IYiZzK+aF7;#B^Pgeof6(FKJN`55 z`Ok6ybaOiIzW+R{{xeMd&oJRP!_5EuOaC*pykIbFXW&a=5C~=94q%XoVbRE8(28fb z%9jgjHBO)9P`TQtb$d|PzK97&;%A&nn0Y#J&bgHN7c-V#%~^S~bkn2e10R|WeXQB@ zx_I-Gymj~U);`Kx`zUky!<0pLe5YPDuir0~vrag5xmeUZ{^02g4}#o(2dQfyq!#$fAxIsh71*F11f3*; ztdRp9UA_u97ASApp0@X)O8IXw~ImdL! zS#uB)-Vc(qONG}*pm`$jY!Rpef>lW}Hp!5+3ZRn~z$+HuRg#otGH5u?B1r~};B}Cg zS*)aG9IkGWm{~LgA>T6sncsxmQxB=3AbSvyN&WaZ^iOj%NNkL|sFiA-7$tY<8 zq&|X^$Q2MSHgbK0rUqFGnGG)z(aK~<2MUcuQ$q}w$Z7(!iosPMA(zr{HheAyGKqty zZpG?m_!>nt{~~jd4j!BdJAn{!8X-g$a!Mg2NFjRBE2H$O5LsL#q#pz!k=0mZ)JF!H z6ZJDd+c3?Nr>MlumI<585VKUW{Q~=j{}KoPvmE-*bl^YdG0^$dtOx&d9{n$L_&?kB z|DX$1+dnb%{bZQ^UvT+L<=MNSbFEH^m}$vRTY@^Vk_?@iWchXBx-O zHIAKY7`4POYKckQJd?P2hHjk1a0LXa zl5`{6A-AULM6~EcK$aOGNJxbOI`;rv4e5k}7Cb=uK5!{mb%UsAu!`seHzC(Ypb7}J zKGF^Xb$>KrBmWEAbjAW~ZU|H>!N%f{D;nmVj0tL8gbq%wi#b zX^shAF+muZBYS8fu%jVB$LN9B*v=<6Pnn8b86mUbt0N!`NO6irA{XXpQkYz1#RS># zngz~)^c3J+Tnu=jjcf(HenA#QV@a*njS0|Je_LF85(P_@D0(ctyed{|uAh2C<3Pwbq0=b712Pt2 zlF+9c*{U|rga8tO zjIil}FA0LwK5!Cp8IX23&R!2>dAXiL~eW`F+ zP<0brtBnn52G?o?)o4Kw_}V+jnF+A7wLyI!a3`o1`7}95eFPyPwGX^21mQxKC_pBL z)KJfqgI7c7br9@C1++mq$PAI9XR(4u5d=Z%Bgo0ykeUcy4T08ayCZfFLHa(ZWFB~Q z4ob>9=fV(lwg^%s$vI_1)+fO0AP~bTL)Ixn#xWg%>_MF%jGW05$) zDtVGoTE9_RziH}3Fq&+XG(jn4j#|b7hWwr49hVpue&XHupJDfZmi_;k4uKHQf&Z*K zLAzF&R{v*N`Ja0OxIU`9!C*RxL86pFD3XCEm_a;*Nhw~$Dn-@1R4b^)Afm}AvCArR zl6}!UkLu;#b*uau*GBYgO`d+BaM9_e&3C%?J?}sKrsvS>rX3H9R$b2ppMd~6mZxW* zN8@J8{Ka}HGxSoYYb8$Bi|x~o>o*3UZ(x$xX_DBfo6rZYgL>f%wED;}t{sxsjN^Ot zqB_(9s zL<_t!!mN+r6$)~F1gVukl@ZdJr8>dj>M5)NUgKaen!v+upm`r~2d55F8A0nENbLhA zHG*m&^^s;^HKg8wpP2w!fdCzdtATZepbSV&1epecR6vluLJ&5*8bUC@r<+1*B6#-) zodF)5gVZ&Ug>w)#q&|YwL=X~E2f^zjcx9vru8F`?Mfqssa-bD)(DUWs6%dpm=ad6R z+3=bOvS#AW5@mNF9VsLf6=UhnFF%Zj58P(dr{etpp+U;(Fj+B+M!a ztuiveHUkBjjWWborw0*1tBEkVkje;7LUu)=)k<(Ka$yKz!)qc419{CkoQ+%`L6qSk z;q?NfzJQRp)I-Z&c&7kXEJJDvOj$G$OKgYJ!X1E>fouZ2+Xx8?I2Tn7v>Gx`nT{WU zs-cvr7RggU4AjcVGIcWC3W5wsutQoGmZ_61U~4k1k|$XtPOwRuW|K4xVu(q~1XK8V ziHWmq(-vqHZL(}WEI;j@-qN??TmLH^_%C$mKmY#!Y`Z{+d*^?aP5-&){$Z%U$KbJ? zL93HNDuqELjzQRufyowhZ!C*CgMcxMv>m6WpR8??qE|U+pecTWY5EMu(q-QD8+@8K zdpB+JYTg{&vnOHZ>4cf5<7b|YpLsT7(uvUiqi&7646>IPp06Ak`3rl(b0zO&EbIBdkZyLFywJ%LFO& zI0y-;kHFOstR^yxkN_h|^GFB-nG`n-7dH)uC|l^`V6S_v))seLd>NbQ43 z!aGoC49toNDv~(K1T;c21zrasGa%&{G6^aD@RMe#(+DcStZ|S_bxc`^2(lUo8`4c6 zvOdD>-$9JULqcqUknjo!IY2Pj_^KhZd1H)J>*FHkjm?Tdz zNuB~}V1N$ugw;oo{*g`6G^>OukXDCbVlQ~V+a%E7UgB)C_?cn}3uMxkG30M&tT@2X z_kelfXNI-^S+;;GBaS`)1^54F+wz}t_AiFU2kgO{7)&NJ$Y(MLMl%R_GjO^vFsp&i z8sgAq5HV+vc3@HQWYh|0)r)2@N@g}qjsOBzS!UU8)LRDP$Ns3A9!rc7jpt1jtR?df*%A z%wqaLyMDl{tdZ*@tJofs@HR!eWFAdFCNXW$5F7(LsG^VJrJ~)J3a^axLYpAf zkXA^YW^k=$a4o2>2dRBRpxqpJeS}&EA=gTfQ1=J9CW5db)ewY)48%d^gCPAQcr^r>DnhP?lp)tWz-u2c19a>*r0PMdeISC^ z7R$lwBjm0Sa(x6Df>U%wU%3ripMa=|(CQ!v7hDr%gGuP!5TF?%@HQgkItbDyf{-%y zsgTM@+75gr1ZZ^~O@v9p zt07DVWcUtFLe33FCQTFjO%waksv$TRTKiy*=7H-YNO^@!BG*dDZ1_SxA{me}6ODuv z?^Ge-^$~W{z@u1@z60d=bVywZserI+LF#;2q=I@D5bvXTfo!f>@)TsfX5ho#;nfhR zXOs$>+JlI|T@GhJf*rzz*FK>6#KcLUS}7S+^_V4103q<9RTjxpAV*i3CQO9vJAfn! zvy=%&2|b31y@rXsy73eA5++I~%+kzTA=7+Rf7%U|#c%Z1ep6iiOK!z?iMbyc+io$& zY+Bb-zz^)$2svXOrm%wh8DPUhD=2fd0+MyFa%_M!E zRnbPLs+}(NJ6!5_*fnmkuG?r_x>h%Txkma@x!Bpl0aKhCj<~lS(au_-6E{&WVKV3f z?zn!)+>mKpw@EDY>>cEDcp$YB{!`@o$TF_iCUJsO#zg1LN%rAo47@tvDu)H! zAz}f~6LEmMOUz(#H~~3fn~Q;gPf*jtEw0uewA&yQbTBFSY|>W9^pFAQUT#2{}apvM3HtBKLQ2 zu_4DPU@lZZu7==i=iq%JST&RfIc*!#GlJDbIgs5$NMmuJt`MXef|KBy2vi?II!97= z>G1jpQu`p2kX3W=%1GKSMZp$?@lP_1N@h*k~BTE)v+#miX6fe~n) z2+|=!?h`@kBT2JZFhUxX19y(#HIal_1iTu8FvLv4AbSvyNkhn;voeBBn}0tE3)K(=N8pAhr)u6G2FLePkNbX%X9F5!+)N*<=yd?vYT>AfOMfgLuFs zq&|WS-?6}}A{c`eG_}OQ!>8uz7FTN+)?*mf13N1lv`PUoItQcJy@-~Tb)(MC%l2trp6b`vh1g?f4l@VxM z4y_uJG>e6FizG~Az?D%f^4t)lCW2Hwn539V7`Q$H?L>g=N5DpcFAk3FhSW#;u*wKh zB|%7dHAFl^A3T%+SyGNh!s{b62Cj+;R6r$6gw#ryBxc}_> z;SAwI%56L(L>Vz8{I)6V)db9)5NDf%kFYRJMa<3+V+BDq_-hzf+79xV&BQ{K!XqYe&SsWsZOd2Kh8OHZQL?HE%abmAgLXR=H>tviT*(4Fvy)ufQ zpboiH7 z&;l%~qe5T>%~Y!uyZLO*<1e<6PW~a|v@Uq%63Sx%6hnk{c;YZzM0d9=rHP z$gK0pi*Nh)ANOtFYgxQfFMf)4%tVv;380yy*dFlK2~e#BsgEF|4Nx+s*DwZtJ`Z$T z5af6Sy{HcTux2ChQ9Sw)tIO1T1ZILp=O7F1AY3@f2p-mB;Zd=6iz_t> zZvmq=qwqH4h<0Pp(Qt^lA;_c;goKW^f!Dl2+2FNs&}t|QG(e{j(tw6QRZmd8E)s&Q zZNpymfGZ=dpc)-y6j-N&R1JaViTrCoCuT!thyp6e_&C!#4gWew zjRV;e1R0Totk6aohyzzZkRA|(g!GIc^$}#A2)<4m&VW=mm?T;Sq~uWqS(5;%jF9Uf zWHzKeQgX>dLZF%ma;6-Fgs;|yEKYz-34!(v!6t^lM<`^;IiR6TusGssJmhgXcufQu zqk}A#leSKRFyyS0;0RJf!RsJN%LGWR1X(hNTpvl9$4Hrj>K)V$QLMCCtdwbtlxdU{ zc*z{3N)k7Tgx5i0CSedZqz*zRAv+P!NTiAhR3GU^cOnqBDhX0CK}ce1B|P;JWJVZ` zM5~)nxbYJ(t0A}u{_+#KEJbFUrA$I_VMQ^)Qk^^n@cIb5cGUU^R)TYhVjzv6LTVFSB*FT~0M_?`)Ho0lx$g`~6%Y}u^%1yA0?j9C#Lv)( zpCOSrPda5GU(zy;m<8-1Ga1~v84L>;q#_yE92o?>1+^0ttn$U2%T$9}^%JJr7cBK| z*cse$Fueb0!i;m-i*M#Hzgx8CQR(`p6s{Thcct527H)c)x&BG|+DGZj z??=wO7~Ff4q!H2pxfKY~AtI(ef-gVN46N2hLXi3kxlaVCeIV0A zpe_>Z?r+Tc2r?f8BWumymmEXXJvq)I})kO-wRlCjGG z9cl``G#qlQoV0D4lx;F{H6(490_heZuav{AkL0YAWU*Za0k4lB)exM7^n;K|wE75C z6PZMcn?&NNhQv(5M2$m5jYH9DA_x~>eFUqUAQ=r0327PPs-7TgT%q@WA$6A^WeysN zQURf@*^38l-iORA;O{FzDk(%ogH({>6CAku2vi+lIud+BAV>=4FjCnB`a7t`8YfJ| zG#@g01_@7GwGV!ANL^!|&~Kj553OIo@d#nVNz+7_EG*Iu!N-P}B=i}_cN@odnLTcEE+*vdJ&w4$^7P- za=r~_sWUuk)`oQK3~1dL*|#fs^8U1Ghtj4WNS(GnX~LeUjxABm8xp&=XHVLnKkabl z)Wf+mk0(w&mN4aL_WVovi?8L*IUCZv#Uf*xZY<16v0ad35DcT*AsGbGoQv+%kLm^? z@CgZ!vwFaX^Rz)~B?ud`PTL`_fkDCwyu*ka-1lJxuj>X?I$#E*R)XBh$_-xR0G5)p zvk9-!k7x(o<^Vpi$0)qTFs#`iw8>Ed!tNL2v`Av;t7h=Yh-z!D^psI0Ldi z0bCi?K>9(DRoRff4+KzCPsdsRZkeUdxdkD@( zu7l)U^56(kAAzf(Jj^->QWHTA+=kadkj2}Oz7H}98j-V02Td4(t0BmJ5Rg6*WC=I) zR3Gqi1xUREsfplyA1SLuSulchjv)OYv;$7z=lsZ6CV+?IKz$;l$_RZkk%TFzTLkGG zNts5$D=;$h)s1D=|g`W+{fN&DhX+k6Q;-LGR z;I$H*fmRb?a?$E0!?=EUWrWFqmYK){dbpQNz>83n>It=c#w?sE6oJf=;4}oIK7wr2 z!l@pcYaxMvM+-y&)~W|Cgi(&essH0T$gbS=okb@ zl?2Hw$TdY6w#M$b%0)m2t>K?Gt6f>mx|*BZW~PN!z7Bb``;o zQ-IV#kYx!nHpx<`OB5jWkqmg#5d3T($fhA=5_H-Rczpu$?jhuTtdK!D2q|S6C20}~ zLGb!W%s2uvE(fWDBuv8Kbr3Q`HxhIj6&i_DJ%Q>YNKS#1kcON-tfK_UacCs|`UrA* z31kohUK2qWkX{uUiCHVbMIdDkoP-o#ct}XWNCJsmml0y)Df4kj;i->|6DC6X5fBpM zW>V`TNQHvMc1TT^I0>QvR2jujGEJBa=fWmsKy!2OiXJo=6+Z!)gwzzIlE@JRk4i(N zZZbSVAq==0czpy}y#`syW*pxQss*6hLG_eTTo)Xf#dScCd3>iuLZ@+bi*8u6oO_A9 zcO_`P23#)~$F})bE-IbARm>s`yefbdeEI7+1-XxgOoul28* zVU*aZ9o1wI*JY75$s%O}sD1(85CmP!7702J0iJ90qB?ZI2qFT>anN2&L@W5ZI`D}- z5nYCn-J0P&S`ob_ksS=mK@6-048nE{N?|-kY0_>DI^h$|(iYg~uXHQh;90rZziFpm z;|}lI?Y{LpT}n2Xq%6>gn4}okE$!VZ?cJ&x(5MmI0_ojAYof3Q(4HWq+kzlHCg}P% z#3mr<0&Ga_gGOqAr+$ztBgjx2eD()2<_1}Q0PhgNYa&qh$G;M}8bV?FSK_IU)ch(S z2sHKMU7-v+t_Qk28@w_B(gVUImAp!npqGG`Kx!OFl?3Sop^@-&wjq00mE8;B3}x66 zbI1(H#1Nc>^n(<@N1nnr4S^Y;vlK9UM(}C~!hqL15C-x|r=X2RNF5^h=p3YH1R0cr z_l6)0NM(dZN`VLEWG$0rEt4Vj5uAj~2Z2dwZwONLfOfE&MM#=QLXenwq!{jra^5ekuoBm+o{ z!xylb#KNl{NT(alg)&TI+f8HJ&Eh(Yqg#!lnvJ7dOyj!M{A-l_YE%R3RRe2H5_&v} zW_y>;leLd!U=Rb(*TB~gK(_oKZT4Xjb&tuQ*{mPg z3_b1uqdwA(Y}W=KPpS(dLG3`S^^rkDmu7g6Mp(D1SEYbyikefddvc#o(XxoTO>v!v z(fpZKaONRdt)38_;sNn*^0yB3dPh`%sNNL7Mf8G%=_fs6&u+CbDm>KCX=n5RH$ zU~@M{-~$KHtT2r4#){xI3myifW`RU5gbg7LW4j>?v|0%+2sx9@C=RlW4N?t3NyFGq zaL)*IXd7fF9h?Lm6b!l|q1!yJ+bFV4)vs2`yGqfk(jc-UxOz!)_ZG9*Y6eMj@L_R+ z;0_T#1A_nq10T4@#K*uO%qXL85Kw9yT%qh0t`L^gK&$BOC?^L_qy=jYA$ARF9 zCnKkykDPu!e(u%yxmOF;KPlP#B5TEi)Wx?UC}Yu$j72vx=Uzyjek!DKlUec%V0BsHe9~=h}L9U6Qb3@?yAP5_ITn^ITK_=C_O5h06Cqk}zR6UDT zJ&RR5iXfy4c(pd9Cc;_=L24Xi5>oqMlBhKis80l`gP^2yrabsSIa#L+c`(9QEC*Vy zAY+#Ssfi#Y=I$ZLas||(xddrzD3Y^IfFQ`GAutJ=FoIV`@cKx~ECGaI8(P7O6QC1A zpaD8?hX`I1K^WrT$_P>)q1Qp+mD`}o2*!Yn&Pf`FN*afX8wP`rQJ^?B1mBMUXF&21 z8VRY43_v%bwCRE`K!H_8ZIJN?WRhStgscEk4Z%rxO$1>;3M)J$T1|w>h1Wz72Bdg| zw71bnw6d2}E}qK57`)XNvogZ7{}ev;RBl5|ep6stxo2FyTVhFIPQ7btmqkRaesHa6Bxqqa{DuYSNeIv?26WFU zBnLriA23gGE~-<(vx3Dig~c$1->Fg~dXi<{O0W7|9?e_* zyY@gTqo^4dqGw)An0GC4{`JficS|S$9S8}zmjP1mAd`^l20}vWAWgq& z2m{nV@~?r<@1WI0pjrk}*FZ?58V6GEKuHW)NIwYDCqnM{K-u7v6QFAnkcQ(RwGzBC zQiUC!0I824vqJD{2r?FjwLU_wec;%AF#-VUFoFNKdu3#99t3DDl41gebHH5~1 zc`xCBYGItY^sDS_eQ z5H95;yn;Yu;5QjfF=*Z&T#sPtLQ@H^0w5lNs)tlJ;9WcraR>=f45^i%Dv@kKRs&Iq zOd{7u;4PnBxJYof1+?bP2)vpFQe_Y%A&Ma+vSLUbL<|Yl71ar;ydW3Mf$r&!=`@aN zH;QV}3v183^er=5tLa4325LBF~+`qAy`!Jx_rPkn@3<6yEOwGV_;Lq4Skd`<#rJF0&rE)r5J zAy-CfK4r*UWHzMBgGNHuCqM*ojmSZ2A25yJpcqd*7)PNG#qc({eMFD}uZi$82-Z*#1&~sgBoede#v_7OU%n8-}(Ug}3PjHJV1Wnntu5hqao5H<=now1bD*+9Bg_klcYx zVy%YYf{@)nphH~4n)SmQ4I`Tsy-Qj3BW1mc9rLDpR;=`^UFXxh)w^knbL~d=#;wjx zTV0yB`Su(PnS3H*`l;wyXXEBxh?;pOde+&fnP=kWUWlA=I&#vH@CkE(f`c6+Su#se>RpTajxb$O1W}Y6yBaD`wRL z5rM4MM&8g0?-@ZmL`a97f(}ox%aB6q7DCk(?Vf7Kb8j`e0f=m@j+9W|H zhNP{NzyonOt072zBo5ldikKvV%@f5!DkBNg7zywqZAg72X&Mfxk05hH$Ru)o1n(9> zsvcxg*f1E5Ae9lk>cPrDIyMAycnFAu*GjM(xY{5L&|(JA5m+tw>mxkfCFGh2D;u%~ z36sQCJ>jp6Fl8a-GcFQR#zNK=L)0Lb$&|2>OLAm3elw9(LfE*}LuAoN_-Gi!FStmE zuSq45YYJ@aPFMwm#sKXJ#aflX1tF_tAY6EzLXZI&QpQvU5r?#g6dR*>osEfO)_R%7B6?L-yAaGNW_#A(KAlP&N>r6=Un2v3kh@2Lr~1j(-Hj# zBPSdR@7`luxX2`Ff@*N1dO$5?k_c=62zFZ#=#n6`S_x7Cpkq~3vde9-D6NWBBEhTsh3ItVgShg=6i z*vLb3(D@+Pl|zt92OLl2aLuqADhAoUTrCQ1a8p!x_}^;pG;TVc79RnjaPynzUG-YKLSf{f0In?yo( z4@sItN`ZSulBVHU5oVtVvN8dgge+HpS4P5y!B{IG5yJps1AjQeS|7m$wZmJr!&@Ph z5rotY17%4_)`K_m&=|<|5iT~?%1AG&4YMY~C4#GlLKDZVlCX+E%4$fN3}Hi*LD*PH zTvY>BakQEO&P7&?X$!N^Cf zk78z?hSW!i^DZRKzX&0NyY~1rZ%vwZTsE*t+Ou3MqD{@eMm?ZbBM{t0LRvwGRuf^? zK3Hoa$m#+}Ed!~KAZ!SUToXar*s3AqAvk#L17W~x9|!|d2SM0ql@4-!1UbD2t^iUQ z!FxcEP7rdh2f02%o(zK5M{+KpW91+f5PaFToKp^@GJ;JE<-jW)JPhb!Ii&gsUKzpb zBT3tIDR37_!X{1HHeCubOlO@6sfi#xAV?jA%$BfDl(2?WLvS)4QW=462FFn!fe-nK zhSfx%o)Ki<5QK#89s<`y5+-42^$}#vHs;VAq$Yx_k%N;W20`%pNKhX!G$(A}53h!V z^!?zKk+6Y3_-bIth4oMpa`-%igjY;R48-Y+$hi!yO45mFf#h7cIFS_-WKFDgWIL{E z2riCR?~u)f7tZ<^!$(+4UAQ2mxQ3IEDh5(JV3KfU#53R)K^RyyLZtAJ5Z~ca0I3}y zvijh=4mW$K9zsIu5hw|%k3fg&fYuclgtusgcW6g+YK3=bhJo(s)(!`a8|a3$Leev&RR*tF zkQwm$1tO>&-mDb{JqZCWg_U6t(FD09NZGrTLDidq*O)=VmR;RP%r;pouv|ZJibK&- z$LjUI?YklMQS6LUaWhZH&pMMd_k7a4OG)!CB`yS^fX>~TNmKlrwr+5<;FEk_2D< z02`MBRY0iq5uzpnRYvA9ko&d-162p3Wf zA(Nm5+u##^Kn(EKA$Tme25B16nnKSrb7-(3BCw#k3St7p59C5v=}#)FuSyYv2mt42VYJNR;{r zUNJ#x983~Y*FeM}Y=cP9j1OvULSSQpegw2nq#FsUL?EXeKumxQK|`BKdZ2}Hh~5pj zbJPK;oAkq)4Z>SA!`d}M+cd-4HN)Dq!dl?2I&w%rhXt*4}?UkYqY@U zk)jDAb0M36G=u6PJ5qH*>I@^BZIe2T!fVCNW4N^gBGDZNE6oj2wwZZ8Ssh; zx;xdsQp2wTQsZcX+3@NHD+9U4fvkmtkjV85u2pc5`Uq0dK-drxQsY3x(MTm&9fY+S zf{(vJY9df&9=A3-}o;Mxb74cRV)Ai?z!WQ`o; zHgHINBx#!_fm97asw7G46i9CfLP9Pi0uRc8DkCW?Q0GX(IuWv14$&Z`M`UrBC0;B?xG!2Jzjv#dq{A?dMSKK%h()WSaKFAD6H3TOi^$~J4 z1ZNBB`ynGp?SqTd32lKRNEZo`l^|IUxqpPrhV+bxB=sXe8w26B5~K`)v}}<{OmSG* z1=~D-uc9Zm%{oF%7u4&*Kg2@ z>sJn}R|{!U52yuAAq9YDh~TvkG6OUv1YRHquYDj4;_D;i$_R3g17snbl2-|GeFUjx zAS7g8Duj(pLc}3GBS?LOOu`4^AY*Y5DYQNjyfOl9S#>Xj49$TD;_~EO^JI~bylbAE zOD;0PRUaYOK@c`%#RBp5k(5oUv~8NCT?!b1xX9DK1WK;X*pWO;{16 zhJsf{NDRc_1ElQ-A(87Nv&-5;RqKVT)1MAwLUnB<+hiCzv9SS~m2bPKui3-w@K}(amVa@QW2blp$_WEJ1(DWS9 zs1*)AU?&W84G^XwU~{n7N02H8Qb|GBct}u91Uah*d`^OPV3nqSg@9QCn@*UpO}18W zlWEFSr@}=pH5&pt_JmD10tD$_@%n_*ek#WhDamkf-&XIP`k#)|2R7SF3wyZPa1|mqM16d#kXG6G< zng~Kd>LcV15rhrt5J3)4fYd|~Hn=iMk+e&autmKS9NamA)I<f&|w` zkQpL439paf^$wgNtRD!PInoaV)kF{yykY^;4-(P$6V~%ZtB-{Bd?5%jT!2aHfRF!$ z)HNt1xWx!*H=>Ax$0Z;l5WNr*YZlfEZ-L}@%+do=0b!D8)exKunG3)}qE%JkA{0xJ zNK845Uk#*g!moyIvPj;9)E8L1jHHHGSxC19nbeB{?T<#TbTHXE5iLl9h#@t2BL2-@ob-@>35)&RL>TR*%J z*L)DAjl9LgbQIqNJxDIsdSJ@cm;&UK(2b=Y)I9EMnZ0a zfYd%{XUah;BM1qphTtUZ4u~WvThPtmkjw8t_pw5HLy$ViG8wHVg7l9d7qg0+$3TYW z#LZ(wE#g46l6f5X>{IB3k(e2DTu#gkbW1p7h6qA}t072b1R@~=bRZIP><{eZZAeW7 zs(MU<#o-9PtrdLK4_YS(UKyd+MBw@eUK62JLy)=&QWFX5`3mX!Kyv{+JHQx_8Vd9F z6OGV12m)0}NOcgTCW4TVng~K77YoR2NPVOo-i%QtVd)>CRZpnp7-*D+7WEONxTaeT zLS2g>jt7&7`UuauH8d$m2!pkwR8^3g2+}2itTaHXXi&yo(Zu1g3^5rkLUkfg`aZg0 zt>8KcS&-QJ2sw?x*_ic_PG}Rj8fu1-p!x_hPynfqATP*F2~J0Ef?ACRI=1GeTIT}4d_T!f6$F}kopKx^}tE^!UG5c zxdMW)F{>W92xR*YybeNUV6BhPcBg_C8^HFfLh2(32^o=t^nK7s0prJW%l?1AZQ0pV)EAS-2vqF-VagcrxT787v8y!(haxH?6VO?6kR{yk0XpP52+l_IiNG}xgo{}p!RsJohLElgWUdpF zgzTQftdF!p>fi_>3#qw~^Cng{yl{Xp;Pnv(1GK^%vd$b%VisR;5j-^!Rw+n%h)H6W zy|_fETX7(phRa6E#3A(&h(z@@qWXXbltE-0ysiN=2-NkExj%SaLp%f0ok3O)5rL5K zfm}#Z!B4`67z^E+@>t5 zM@5TmvgasySE++KN1!u&AgdJMy&-k~N=O|9A>p+Tg5gu9?p=zEAe|aC5q0opRpd?( z8XMBJ!Ahc4H;|eLLPDglovK}+?2dvIUGpGF!6g^MP;^BsOMuikaxS?L1~Mt*lr8I= z1F44KB)l4eR658cqymDIkeUe636il(N3MF1*^ntA1PMOx6tXe_a+ZR$bpmK04ps+2 zCWF9ZbcvuVhb%!Gh#(yz8Ou2Ez9G<=+mQMQLc*${STVC$2pe7jiJC@1M&zJp`#`E8 zaBm1&6M@e=h15Zi!8&M71nCXII!B$Q2Fp8F8S?xKTI$VD9}<3#ifvs?i7n)ko?fwJL#C zLRKk|`bagfUMFFqMb2D@s&$^t+x@%uhfFvUJ?%u?%+s+m&nAHSN0;O0U5=f1F>d~) z*cqoJL)z@~=G*1YQ}V6?9nj}nr3O1J4s<~(?8*i;$a-uD7q&|Xvp!PuDpB_?g;YjZ zNwf+G&V}@M&}t$`MT3=u)I^Zw2ap{@knuNU5?B^SOZ4!HtCX2UBW zGzPo^g4a9949I;$5E5P!K^Vw&5HcHH6G3K$AUjwg^^ufSJfupJvWl0qNCNeYtl*2~ z5~R%I(CQ<2Wh4b#K?kXQU?iwMLavDrm61s#ygmY5{Q$l$9K--uMxsWbrF58;5vU&o zJKhIZeS}>5z}e6`2woZK1qkT+2*PHF;B^p$0jY-IBxLOkG6}7rf@(nZPH+SM0X<0e z#6u#N1ITR1$R1{uq#52yB+`m#(*lk25U7tJ1s$A(mw{*lN~m0Tq1{fO+6S_!3VE$N zCL6UjLAa2RIHKM`RSDfn0}l{L5P`cz@G1#WA30~!1_nvY6#p30`+K6>LcAC(CMR~ln-uwLE2!D zDhZN1;9VPSaD4>NJ`i!uz-rLl4M8=Ufz_Bu!@ma7A%d(f0FmHR;2<|Os0UU_X0=-1g~@;T;#40 za>WF%kI)$K?hhIRUJXIkCm;@0z*xTxpDL2INduj63f?q?Tmd1o!9#PP3m{7s z2per5tE6QD{CZYN%LMRz5NK8i(p{3V1XWM)`UtW(LDDQ5QWHULg+QJg5(RB6ih=fw zz$+FYY-k+>z8oBW#scEJQ%Ii(OoA#Sqz(~$wg@6FW)LK15G1M}h>1k>0#FcmDV>n6 z52W@%CgF7ul%eG%q~!(AN^k}w$AOz_HJ~gB&T+&r@YG7kQjpREULT<`h^&m@${^(% z@g%(Xg)>lVB1q|tue7Ib4F#zqFl!%75r|4sNw_z`4CJ{v#0oU96zG;Za4-^{wSiX( z$U|gsLGl>T^#h@x4hy8#fTRrh%sL49emYPEgmfKJoK~9HL53GjNN1VE0JfHO#-B+10m6BBD6XPnG5L| zK}g7;oT6(UoB^qTAoUT1M5~O@svZazt=>WA$~tCYT`vb2h(jhJJs`LUWONR*K7y>H zgLH_bZPTP|QXy3j8VOmdja(l=DjhfpuZ-XfDdz(ZJwZUR9r@%06yCjwEAS^Hp$KvY6>A(N1)e@wq1i-0Q{l#V2_ zEb{4UFqM#62~y&8N&zJu#T`nDj*06t$N@$(*;&*!+JxId*~qj9XK1rK&yseTy_6S zkZeE|xZ0{z_pgAgf|IaG5tbD^!4S{++keUTXB5zZL&IG}vP`H?Nkg{u@qH~UtORgf) zI&FDp&^_Ra&N+}fSmm5E5y&YUQU_sHL&*Ijc)bJf3PI{4I0@+iLFyw238{l1B=Wc% zWFM;(_$F4!X+QAF2vQ%x>mXzXyc&WtAoUTXHv}OeHIb-U1O!3uArdzYhujhlAtAMo zgmDA}i5rDO5x55gse?qp+gh=nv4C6+!PzJ^5%Ojta0LXflHfIwpq?)T3F!J@MSR*` z0!Rqbu0tjv?LA1F5VJmlWJb*SS$GDAGawZca!~+h>xDKUS2VcT8X*l>r66?*=_F*O zL^DJqT7itrMXr?yRzPS9AhiZDB&1G(sD#u$kP%2+B&0rt=z`Qi$Rx7+G1;(M22mwJ z)Ifp)nS|9NkmGR(51PT$Lsdd65%8oxL`ny#%LA!0FiCjAKxRPF54IGukhp3exH!DtLGBqr1d;nbFgA494XMTAd9ylw?e?HAvgoACPJ={Ag7)}Y9dgF2)vaQR3%}YbShz;C~lPi zsgIEN4Z&+7c<#T2ckclC*`Up}1A=gLHng~)QA=O8? z&RBp~K#h+t)eTp#gkc|yhqFiFS(j2eM;@Tv+@ zPr)-L8UwTDf>cb%`5nTBlogP*5bzoY&V}fLkf81rxMV{v<L4YLLPhri2!af?p^->65vcov*6Tr&g^b84x#YqTygq_gKyU_pEDn0SHteP$$OWu$ z5`NtSNW?J<-W!5fI+zSl1!R{ZW1k8+=Ld2p5u|4XsgEFgS3%W~bqZv34*6Pe(DFGe z(BX2B9Yc`H2x;FCXr2hUGJ@1W@G42n40PnFq**MqPXwunz$Cmf5&>Th4!I9P%p^?8 zI6?}uTLil?9C&qzq89+El0aP{@Z1ozRsz>YplS$G8R_~%2Ia8! zi6DY}+FlSOpzX!4qQi?+MGa{>mE5OQtmH&vI1%44c^#YptaQA@Mo`LEh`1*D{42bQJ zvzw4fG{fQE#1uypfd>Ud9KJ{ZvOEB5m_wFKqe;OV50J=0CLuKrrU*nFjYMv=fY`yc z+QGGu?WhnEQ~`knArm?v61H>=>u4LiWd%=@$RhCc2~XJIK{L?M8HfR{Qy}#STmgiO zPGamch1NT;x(2kq8obU8az3`2Zxuv=nr{{4oHxj7H)Zcq6`wNDAe&!>oI@6iS^%S3 zu$Xm*if0+mV7sba<}^QXfGoBWb%7 zNPQ$@oeJMH1YyAIBgj}BWS9=TN&&Q82(o4yQW-%>=ss3ZeI#ZU3#pQjt06dB(hPC- zDdws<5z}yC@CG9I5(P+ogwiJh_l%HhA_yB^AHgdi2m@04V3MF}2we4m7~uK{Ix7UL zh9Fgvkd7Z(eFUj`FiBomO$4cru#75zTY6~K5S$Com1qpO0(h>6FyQ$ejiDA)2S>8=M%E?Q*-S&9I!j9@1!q)6MPK-SAiTcyCeMUa{Z(mz5z zc^g~}!I#V-_7K5pr8rRww9^$3V|0*eNCI>CJB*+kjSNkYGAEuU@g2#f-@ks7^DRYuYm9{kn1w6Y|Q!yE&^e|i(;${Ncm4Z ziB|HH#Kr0WG(kxJML(<+v~x2Qv5ON;nO0a6F}}mp1*v5qH4&ulAc@3UA3^FDfhJCG^~Q3t`qK_Y>5kZB!=7C0M{>L3Ga(3%Lmni`TyA;}w_p5cRX=#^1G z6^a_aYDi@S*D0LrheFRySfLRm4=70*pYqOz)a*(bN zygov!gU~7$OfG2ofeYx$Z>%*ArXZx=K_em65S)b6N0{}FoMXB)c-Rem*F!3TMBUAb zsENR5%0cQN$bGDkD_P-n5TqIc?HjU825le$uUmk0i{P_D;KkdZ6Mi6jS0N<0G6K~{ zBIck;B1i>m&^&kM&uyfBDDGlttR5rbjMX6fkwZECZI7O^$}*3gj^p%*tlkwlIDQ7C zWTrKVRxjXk60Vzc@KqV$^KH<~hjTG4f>$VzK{;F|Bd@fC)B>253z`Tdz91xgU=5-d zPU-~K!4XstX%`Qu>Otay=6AqzJ&=`bkZK4t$Oc}h2A72}P-+>Z{thG!L#|qYR5V%v zRaya6kopC3&jM83zXo3W!0QxHt%P(J1E@lQEvSZ6KxiTmt_HY8 zL8xrZm2#l@AjpglX1#+Zf?3hv7eT9lAY6C_1ZSY^8$#+6!K)s$`bf?$Rn|5If*?2F z;UXa`<)9?^U>``W1gVUWdqWU5tUdyb)IsVXOcJ>Qg0La`h9G?+ag%5WL&7u$az`Fy zdPp2RRRkZJgD~KGh#)l)sB>f(4DAgeZ6gvh41gnet%Q6#9{eC~$O%8lB%&HZtm5_& z*7Fh3@dhIw5gjOkS4Qyq2r@$i=@ubZLvS{vGUC&8hfJYjl8_#gs$Vs7<)Vg>ks;Yx z14m~Ht{$y2(hLA~j^I@igaPRyLE5em5?&uc7?7e2jf7M##E=k;aFv)vF;)>;*F+HW z;S~ggfz>4tL5O-Z5?8GOsed6FiL8hrp#&k}#zMGoQVYCG8eZK%D-J?O;y@x1LPE4) zrV4G?@@XCL5FAJWxW<9hDFjGJZyh|^22qJp2Z85zpd#3kvASP1sB!`Kbl~?aKMVG*LMj8Yzp^Km*zZNF4<497$Rv z!D=7q)eqw4ao}zddSwJzAP1?Kz-*Mx5u`GL_l(3%qTvX#ejBqUf{w+(?!^-acZ(pE zkvM971R0BiS4Lun0g(C#UI8I9kgFj$8!|BjCBb8KLb~2Ux{%7q8(tZS=zum8!MZ|T z@SYKPdI)87j$g|IUJbz+m^Bgd30JEAm2d>9q2TRCWCr1tuHfDlv@`&-RRXJ20;^Pm zs?iY2xC(f51-kM)s0JE_T*XRb-V?~gT45VfuNJ2U>7Ns#m#2Sc^&01hcd$*4Z;1()ta; z>dpQ0WlTbNqzxGu#Ti(HSUANwc*Hq*B{;YxIk+VmxWrlbqy^A4p9Et8S22W#@x0umcUUq4jzczzg9Z3$U@0 zkd<%9Qn>0JG;w(CBjcD3L6BA4GDw{u!~h+*mVwkYD04$7cj1BNgCIR4S=$syWrV8& zf=>*|StrR_B_bo_nh2>H0xeMhuTKD-xed7*4_pC3PgH=gA*;AyognB85qMkMV zK84grkggDfM6QXTY;ZqF+%Oo9M2!MP3PZ7UbOsqfBa(#dB!w6PPjA>6Nc9n< zu7S+;!0Q@h2B<3W22cHf>n0EvHZF(EhSx8U>PF3{3{3>9IHWRykg`6d(q6^?^&A_V4z`)PIz{&suj0_Bn3=B+QE*k>_ zI|Bm;0}~IkfE1^M0gtpXgQ~NlTMBCJ0~&1uRYtG_5FoV=GKty`BDOw4u4^#akopLj z#9Hsb1>w~YoB^qfkV!~21n>CB*n_$}kjhBTE)`NAffmTwrok7;!5Q$H2>DuY%vuRD zB?LL+6uB~zwSpcu2d;YHbr6&RUNVPrDIR3OHl#8F-x3aAt^k@Lf>b?V5>y|F8ApLp zgt$p0bUzVz5goMl0r!a@yIJ9DgcLULhg3Rn z281i5?+4jB1Sdh&kgh*y;T&x39Hc%H(((i&=m86m%1B7d3pN%9UC52UKH}8?RYv^k zZjiO6Xe7LnQt_*RBZwe!CPijLWRXeig#fIoQVytuS4PMT$ZB&m61f_Jvmvc($jBZZ z60JUhEWtu9LNVFMCP|z2vY8Z_gzxCVWI)a%#T18#K-OJD7PF~&l_+@?D|i%&8ijLf z_%ce{N|}WDXEb>x)z}54i)h#~aELQ7aDl2IVlfK?12==Xfx2s)rgy2bYd)k_f{@Ct zd2j^l2Voh7!|Vh>`azI`5ilzqxCnB+gNrTelmXwe3hDAdNaPv^A_A>45^Q#t&@=JBY5qD$&j@Iow+SznE*$SlM^uO zBS@tKAtlTrB+Vlw%pyR@6x0ubio+HyfXC$`!8H+-1aD=9)<m$g51jtraQBy>>2-FXP)I<;xTpvL?N)o1^8(P7YQM8ybs2>Ebh9Y1jN_`}56b`S6 zARQuP5^c>ka(x7?i6H08>HEPeBS`HdsOJms5TV_U2d##{RS#(49Hb_KkYGWq^%1-# zf-vycN4y$t@SYK$nk%20D_YM8jjQBS21gJnNPa{kLG4EHI#sM$9;+avGD0RH6%+CZ zA*2q16irx2aN!22bciR>icqZMONhc5TwFt@;Nb;6eg?eoCz65HaEKsQ64O|SYq2WU zfK?w*HK6MZYCzYIf+yxc)p|fJypD#34C?5aW?(gZjR95$WHmLu>INK5_!NLw8X(sr z$ZSN_1MAD6h`_2L=md~oHD+Ri?C8N1hloIG8BhX7AAc)V^MN8ryamB7{DND!NjY-+)GtFiWFQB9U{=-Z{VAQlw5M*brW)}1gVU0Ejhqi4MC2KL#}Rc zvE>{xAySZ9243mlVL()(RYp=6Q$mm`3DPZslaSg+(k4L)d`mdwEfY(G22BaF2GLMrqisdkFBC`m{>NqGH zat{%hgw;L~qDJ9Z5vZzy>}Ca@HHTac2^$7ut%)FqZ^P$YI;CwA7oNM!$UyB1Kch0;MH&kGe9f4 zA$=kU39pYJl@XtsE953eG!jzvs328FNGQ>LbV~86+g(BxK<>$@c$ZSZhr0iLw;#I8TUn=R6#i`=Vz#zxKAi%)DNpd9w(;zLbYUNdA>szYhUaH|- z2wsE$t&bol9zbdw2pd8|L?9&Ang}8&=afxSCkRsYppl>@a1NP}S_W4&gk1YTR>UET zNZO@H+JTlOK$_TkW5=OlM z0#Xe@NaQ*QnT=c@3F&&{ua98W5c~vGP<;fhj8N($NKFJG!POA7R)W+)e46f%Y6wp9 ztGmJLBjj8LVM9(V!Ig_4YS2hESZ#%=3!dL04N*83Qr94pke(5;2&8C&6jPWaL_K6V zx~g9_E`(NLL5f5)QWLh!7_$oisk4Z$@^IA)5OGp#6NoMdiAyhJMhv0`ZEzAY76P=a)R8X0A6nZ6+!RpltIMdeH;iEl5Qc5F(t1O2m`XN0GWhTF_5YU(u;u&yA`Q= z7D1{ZNSy+yW#DxXoC^_waFJC)R$oK*`#{8%yo!~*OH^HRL`8X8FPcPv2r0*4}^p-Fn}|V>m#&^24xcv z>_8r9?SsUHtTcd(wjtLt_}TDE2R#0k3?bna5S$@t17EETx)vNVE(foSAPfoXcnCt> zF$7w(4e1BLZXJq)R6}qQQu`p2kQ*N$cR)bc5K`PU9NHm*T*(R}Ar+9AF-D&Vwn_m$ zH-vl&H*|W)AXG#@1d6~%D?oZf&~Z5fe-WfXIZ-edGBk%qLi$9=B=VYV2pf4P5v(!- z)j_a6kq4|YLNEk0+>sHa4ieDt;8%CYh4|Fm_|)8Z)IkW=8Z3oZF_2t|i-apfV<4+X zu9e^!AB_R(a3L+0K(2=1Y+~vo72hfq-zr>{5oG@qvaE(b=cGBfU72gYz_rg zJ>V(^odK?oATbCbAypEj4kD&Lf}|o`H4(}P8>~K3^R9p-OGsjauRG#sgEF438Zp?R5~P)N*+au?u8I73Lb?D9) zvIcnn2(9YDteDUgKx!puO=Oo2Vt@zS@Yg|7_NkDr5L#~tUi-iqD5ofZHxNP2GKHLO z3aN%5ce6rjA_xiT`yi8$`UqYDAv2JxA;__E;04>DRSJ+g2)tlB5<)_IL$DpJ5U!X> zn7BzKa`y+phSf*#t`MXS5;6#atXKeTBZ5~(`XP|BeV{cFq;mwWfJmx_AXSo(E_635 zv?cw-e#2oX7R8kOSkdh5TVv0b-Ng|QE16bMc4h4dNZ3{43 z`46cnkV$wor;R#Dh+NA+`a8(Fkl7HGkmc66OhZ-=Wuwjtq5Bc3mjta=z_kfvWj>ye zgakg3B$k>5a%>LrDWi~@1*vy~ymA1O4ehgdm8-%KLBgxoXZCI-YrkE(r`g8sPef z2~yV(LkcL{T6m|byXPr8XURI{gOGDBs8({xl1D?}Y6w!};3DD6t>Fwv)dQ(tkV%L* z8VOeb=?x*1keUcy-N0)cOc6+>BVn5iM-UNY5>LWB1 zf-I|m)E)TiBZwO0nh4T=f%a~YY8Dg`urDB9!&1wDJ2(*a@cIZ>P?IVSk4MPv9!Spx zLV~7yyem|Y5a>uZZl5wC)knP;-Pd!CY0rmR!Gj3a2s4zoUzbIydu6-c2jD&R}L=avdA@_P9r)q;Pdaz0mHwT?52baZKA3 zT@AtNAVjwaUI8I91VLL_5$7rptbicZ4M7sE8iI2nISxX?vm^$i7&O!B1tOKaiopo` z$Oc3sd?+4&eS{_pDY&o-LZq;gka`NM6tW;>kvZthf27$8JXIB>=!P#vhRjvrCn4oL zq*RBje1@~}Fd!R0kx97Km<(iPa5kng{33XK4Us~waWFfo$g;?6m@R z*vN{}8aArlrI4Bhq8`GAlaQ(hK|*R7@Ja(ndWNWkD1g*N${wZ49;I+Lh~ZTNu3U2l8L3eK4zSr9i9xyC_e!!N3X3*xMgz(*y(S0+F@K9Kcq$Q2MW8#EbYn+Dyi zipBshRDkq-AcrSFNJw=9ziSmTPlP!E1mC*~uW`T(&^g=U=AgyckbV%j8iHC)B;v~(W3+EvHAmr^r5H_SH5;utit%w7!Sb(4KBWfH9Jy!u-AHjP@C>0Q_ zN)pl!5jF^goOB8y1@%D(aYIJu@Yh7bdcKg#2s{c0Su6)Sb{ov~MP4a~Rv&>k5P??8 zAy-C_c_P@(R#2^krw4?*jR>(TiM!Vr4_hWH<>?0~syEPeQ9aziK?S z52h5nT7fh0*Cx=jLy=Y;z@3Vf0d*C4{0-t4tjgemkXv{lDv{Zc(8H{r;Nn;r$gK{{L}N8T+5vUXb8BTv~QPth$0guv^oA>ybc==1|Q=K?wB0vVS)8J9fB z3IrvmbanR}^6Dc75d&SPI3?F?P>q9HAA$Gyq{%v^L2hIKb2DTevmg}>oRqRlm$FNj zama))AS8q?wc+&!%bOp#s3m_8S4?^o7;i``?DQPg76|27Xwm6LuB#SL=ahc z$xpCCfp@zgR;Ysy2~-cPhV;L%j19sE2~mgLAle}=#7`2h5>f|Yu0w2L-_C6?O9s{~TfAV`l=RgY50d?-TDvlzn0tcf6%i=t<-f=3Z@9fZn; z)Hqm3MR#bo2)Pb|)I<kVxPQb9 zCW#~XHFX`L6kW35^^vq=mb7D*tWyT~%(ql|SbYSkm1G>#CGAoq?9!ziGLfq!NPh?3 z*?}cXsCt6W1WB2LIzHe!2uuG6yi^<1{ee_Tl4jA!JtJf`as>ooL#Bk# zNO*k&KY1InQcljyznVFeb1s|#$-MANO39;08L2XYR7sd5N%avP#aN}VmWRaDP!Kg( z3wB72gP+8zk)R-?rhwE$1WC|Qe&C)CL>Gi4Xa&5YL{o{`*@09>AaU@)YnZhUvIwNk z!W!CeLF5R6S1~H!AvHDFN;XV2$Soqwng~+AV3I2CC6M|EvbY*e0i-@saxXz3P&HJf z-~mFAWDFr8^$UyytucV)2yg`iS_r4;4iW^D&^v(SUGpGy5Hbns5J9RTIEmChg5Ue2 z;F1gJ4S|*%IOU`DkCdG=^xcveL=3={5GnN$pN5Woq@oMxS~?l~92xr@DTho@T?0OE zN6s-7jMAaoe_)3mh}))tsw8mF2+|>f)h{TgBB0er@G3?IW)8eElCn?6Qy)p$q(W*U z$lVRdY~J z^>Uyk3TX8acq^-ifQCC-&j?Zt!AW@k2wnj}7?4^9lZ4ktm{kwAvJ)mk88?7kd=JZv z1>j5vt&AYK6+a2dvyeOvCo$_IWD(@z2sDC+I*gBX-U>|%+8SL*3mg{-FDlU(kjjW| zB)n>X)F}jOB8UPN@Z=Xn3));5+(0x2sB*zL0u;Vj8qPqjXdrA{0f;6J8F$5u5lj)x z`Uq0jAd5rTSnDHr{Q~LB!?|zIoE7?w;To6Yy@&aoQaKEALYt| z7rDthgBE7Xf)~QcI_D}nW+*vj>bs^eh?ym=398 zWE?@manMS~4%9i4v`c}ktcD-*CT*7lsaa$|)k-q5xQtCA9Ld@y$=bq?hJ$QSg>#c2 zXJ|v}EEyZnk#Ugod?2-tlufFnbqb_Df{;>{$smSx5>f>e4_me!59tlT>Z3$)s|0vu z1nK+0Nk~lu=Zc%hz;3|cHj60&ay(g}k0j0}Q6RgYdUv>MV27SIg>vGoE#wU53Z^7d5(8!RiR>yKO? zq4k5%xUkB|3$=d)nls|lbm!Od;MMZr(RAkpGvE~vpC+_JB&6Xkq~R{8?j{I6h#P-3 z#0Tyc38=a9tGZxCyvj~k5q0Y$NKS{>L=XmY?L&|a?`pvrkSYnhh@u#?l!J&s$}v17 zyc9*NhTvSf)kF}B;I#&XtKwGyKaT?53xiCJA(N2$2(A|w18OGHCS^z!13!WQ&c$Rv zmQX5t7Q+J*9&}h4knkslgc}I&AHnM*nsRE&LtO80b!De%E&bv*ZxyE7ifJX=adbp zj38?d6dcl(owE$wQW?d~NUo1Kl(cNak?SL9)dSnh1FCv#(1gU)>E9D@hq$OzT2f5yXup#vkysm-QMzBASd}Ctx|x@2SG?k9fVB6>mz7oq#pvRjPwIR19adjNkBIUflhA z6BQse5rhuZ(nmns3sMas)jo*bLl8m8`UK2=5Raxi9AVZ+keUcWLMkKVa}{v) zjIdTl{HiWkDr8?C`5aVOujt^S>f@vK{J<{rEK(fbDZ30w+jL3WbZMJRX`4(1n>0n+bP2n3aoaS=(rbC!czN4+ zHT!5a`)C!1NEL@jRr@ei`!IDdQnrs$w2hFri;=gBk+hAIw2hOpj+M5FgRDJ(^n)Nh zCP;5c!8S$S2DK&vb0H&gkeUcm2Z65qwoHcAM4+w^wqtxC<8q+6AxM3M-0Oj}Ma-ii zwGw1y0;E=g)I<;xUiF|c;A;{fRT8}R!DK+r@IfXaH59Za0-vG)sfpm*g+vX5A@vc2 zgw{mhbqfM|0fPF0keUcW!s{bw1q7ZR0iP>n4mgF80$M&o;QJsTRSy~o zJKYqtQjVDV2(n}jQU{@tXtP3?!*N(@A~h#|Fyd8lz>0X3?UCDW$fMs-HnLW-Eg=`4c0u!zPQh#BR1gj_NDuCBD@cmYx zx(2C716P2~Afi4}_9}-X7gFNGjjSafl2T~WYG`xNxo&m3kAPmg< z2vXNz)hpKL|3K2dakTZIb0t7bie#BFOm(mdTLH zNXjx9ULQf$!hyR&pn3<)g)$(u3}l_Qs6`wYfyUn;H4&t)fsl|oNWwGjgs)yguUB_2<|1 z7tjlU)I<;x()SV2^?_DB81<2Wu8)9@A98&ppydNa+UR{E$c^D>^$~nB2+|wk(RAn5 zaD#M_AcxCA`ax(l5rhk=k1(qtxCo>^Lg@z~RYN?=_E-_}W^H)W4$gpQPe`^!CgD=> zfex@NIGZEYN8llP^!^c~feInfDk<>bKV--SN`jAMfh;n|(m%ph5`xzF;Sz_`UuYy| z^+cWsd36?OiU;XL0_2_xCL3OVLAD8FwGr9znDq#x*8>qpu8Gk4L6881D8ouZ6u?Qe zc*I({kRb@!H;S#^0o5;%JLBN8;Eko&>m!ItNM(ddf~JZfiCNAi7mWeo%Dd*lr63}* zpot#%Tu%;+3$0%udwd`#yg~98Ln%M!3p+VMKE7+ze*rq7irYPH` zs@Wu|SS1+P1R2-_Iomrq**OL}nFTtT#W?H7IP0gmX{Nht<$5XQ`>2%qE0zT)RR$?m z1uInrsnmq1mG~JJ`RJ#6nx%P|`Z?SAINBK5`WxE?DBH#>gS%Jqw(*d;AvxrCcQ8pv&j>OT1h0LN4?BfaJqXe? zTm*)odxgM@v>{uEFzq()E$-H%fW(M9A!0*FR561hr3lY(%G3`}HYXz@r!)dSH2OGwaR zVOUB4jkdw-9S8$b#bA(#8|C2FD1h%)0PpbuZ|wmkQOFW&7nI5f($j$?X*dZHL?dAp z5aM&t z)WXu#!qZC6$JQ{yUOU=JFV$58xjrfnREE??H6dy>A!=m-#>Kt{8D8dT9;W^-_CAg_ zhBkhNHhxMraY{CEa$qEBog!hCj8-3kdP7!;kjhBPDpA5ZUfc>Y1cyd~Iz*6zhg<$m&=atU&!Qf`-5qU=)&AdRUMu2~JX2A3@4m<#8MwAxxgl9!1WPmvyTgChzvAJ25wd%kE%h^Hhi5AvJ_;fO#!_808tG& zXUc$&H~`f(NDSoD4^X7Q^^vS&wyYy`o(Qsr0n}-N?froqh9GU1BJY%;;$@k z9CRZ&!FeON0x?AcH)V%-Wruh<`$Sp01XYJPb*BVlrzk^*2y?pt19MM3ZBsR6U1L>Q z6E(SD9iDIlfjmq0B3tfyXU;}f?rtxxULT$gf9|e8-U-26lR|l>hI3C1=bjwKJ2irT zMmXp6FpmCUzTQBdMlbO?cacN~{X_?S9qV8n>tI=%Xjz+RF{@-TE6{upWEu!m2Z2ZD zAXk3NSjNeKPLKmtLlCx=r$q)e@JBn zsfHjVXsI@Q%{F*t0(5->r0;`T0YPdaNDl}^!UyQId{9T^Al)L+JQ2T^C;ZX}czwjL z?gp6~g49HCl26SQ!iKC*z^sV`RbAi+UK1f#G`uPf{78sT*`6OfOov$kaVyzjBIKMR z>ym|zz~d5VjY9|*k{@NAb5MseaMnkVMk$y#nyl%e39 z2|fM5DFeX(kIO;oBPj>y;oG1p34B5VNuoH&S~ye^`z;TUd+xy9BIr78c-4cI0jZCmRSdW$g7ko(q;Z6h zQ8)xa>LUn=+z&!N(G*e>LC)}jjK9H2VLj+D9jF2VcY+WM1AoXeInd?c;C0%Nu{cO? z2wwX@DkkWH1W08BUYP)?gM@TJH4dl_LY^A}*GK%C-XNp}s)P7I$IU?*kXa#kWyGfm zx*!jH^@AIv(m^I66%ZPUTpvN^gW#2spsEXz^${PosUpmp2qFRhgwqq3`z7m0`gct&06C@#u@sN;h%UBhl1s6ds?(ipE6G387*}VV~ z(L|7tY6y}-@*!0W5{amZAn6d2G9fE=AZw~+ok3|BE(_lTgk0A^*bu!CHk^cK7I+2$ zop=DPUqG`zkaMjORgY5!Lo0`z7d ztTo+I1$2GDw;;j$K147W1r+t|G#$e<971&*Lp1FI_03&04efMQH54SIRmFrA1ocY1anOeWuF|% zJ|&D}N*KqCFxKgzEK@?5CI&Ke__DNmF{V0*r#ngNTlpB;_(A$d64r^3{XtSz@h~=M z6b@1oK}g8@1k{=+2FwNB!72&8@D6c+53D`{^^BkukXeK%5<=_yfG&4{9j^_ogTNzl z!p0H8#-N+QA&cTfjl!XOg)qkDAhi!@X&iV;2)PpkXNwsG!4aYw(hr1O4h}g`4njgE zhD7uNAhi!@EgY=sfmBS8!8aitKWN1SzV`ugqz`&!r0oTt7=kk}>myj72viv%S3nRp zd~pJD)q}}~R7S`oXrThQsi52C!25t8>#jiu_}C_ExTXl|`7$u@5m5;tE8$nvx6^ixP_YS= zHgFc!uojck7LZVt7nhP1l~5GoR}|uh)JHZl><$XtK^ma?D8+~&)10xy7E~X#da|^6 zv9|hvs-(U^j{YFdNg-^I`eu39R*pB4e0^F*wFe2 zT;qW19klugvIqf8#!8sRV6Bg!)e!77IYb2nTa5q}F^L4#I3{6`1qou1Y6wO``$S0l zh9LD3a?c3DhSW#!nIK3V1mVJ~9!O;bIz&z{0C_|XQU@WE@IDb{eFUkSAoUTPgw#Y3 z5?&v{YacWQa#sk<2Hg)1?hv^{NJt$7At7BM%&G?>f?V|=vq9Amd@Cy{^$}(T#G`17 ziQrj`BnC8xf^#|~zrzRmErJjNp zznBaK@cn4Slcd%TkTuK%Nyvg{Rp{ZL_!bbNIRMi~a1pGv2}BmF6q+EUcY|Dakj;j4 zMzE5Q(KZMPQ2?n~FiEH^Y!i^IOO`YeLQ@Yq4*`;Tok4wk$h~#=N$AKn(ntd2L^w&O z3`ua8N6s-Fauyu4Pvn>-2b&LqthI)b;L{Qy_b)&c*r&p4C@H&iO!l5M0t}e>1CBbDT z!)&F%9-_q%p~sMJ!jNapRA$dumZuzAgXKWp>OR4ua876K+{9uDhX5*K~^R}XNAlX zq|6c^BX8ge2-FFJEI+_Uf@gvt^Fb135s=|HOcK#8Lb}#L$T$>)zyoyfZV_lv9C&>K zr1pW6pnJd#14Lj5QuUzRdh2XW&AjqkkxUxNJu{jTn%}^M(RNI5v2A(BO#R$pPH)>4um`|hs?%RAMvX=K&FZy z49w0ET786B6G22ss)pc-@zh7ME?JP00Wuf@pIt&L5O7sY)T)n=>nTO|LO2(#P=#ALk(io&Bf{+lEXbB0< zg{X(Li)5TLk?SKF*KA1t9hro*qvV})*qsR}*2=6z4RRX0niD z4bni=N2PWQ)h=wUUd*jt%$+{0oj$CP`e;fpE2KV}5z0P2m}`14*TfKJT=kKZWrVm@ z0_d_K=s~9N1#+-~IMaAZ(|F|i2(uazHvwI=3OO|nDvP@gf?w_cnJt2Ji{SN+s8Kkm zGBOT@_l!U_5oA#uYJCLh5W(vs=u!mmln`cpB%&7px(E+^3m&8gK{Bk+$1zpqb-C0uY!P-5AfoGNCwh$0CXq@ zGVZ50FSsJx40m;j1U(H3G&DY@_-m1E~l(8my8gnj1Z@iC(-=rignYj$ zyw^jphTtSpeFUkDL<|EU)exKknF)fE@Qx2M15y)- zAl>_bRvAGmAP}kL1+RuMYa;x05Y}o4QYG=LyYp&55MnbCa(x7;gCHbi^g|Kmx&_N7vmWO0~5DA`#g4Adz zWd*pN!X*MJTS%&p6kM|*x-dyd$qCuX2rs+9g3tj#{5>XkF)j~2V3T+ftvVpcg>0oE zo`gG$NCw=SNDR_yaB!fZXn}+_yvBhuAaw_%UkshQ0gr~kMIZ_wByt6Tiw&R0fiR#c z$|Vb)#*pW6pxa9!vd)>(P8kp+17^!QW?q zkg+&fr!;s?1YuBKA3;vqk+MletB)jX(!{J%lm`vFxkvZEWBLYf_(hq zg1q8_ykY`e!o2L#f}Ao!T+#yEl6;(!{OnQ!9MXaukjh9#h*MF7Ls5hSxjxD?1Jy@m zwhVQy?A?Csoj$DnJ`5B6n5G6W%nV_H)JL;ISs?Y%jBut&VW9e`(~qIm3tAuPS-28e zABmdAh?>SA*GI^;3>q7`x`9hUdO*lh$eV^B^%0^IWDqJ~7z{!Nq44?$dWf7s5TqJ{ zlaOi%Qole*gD?3qGAHgdo%%M57-VmfpLaUE3Ya)mUN=<}Z2O+b;^${#Tf>y4= z+3@-ZnSq@5Atec}0tZtZzGMQO0dDakHtNEr2_R)7CW)oibIAf}2k-EO^tm7;QbCPg zRe_~Yiek*d8QNKZ9rOp*g4G=Cg2dNE5S65w4^xlN7x2)-^c_S5vN#Q{3{s(>k!Vqg zRv$rRu?j+@;3T}(fG{B95H^GaRYuO4@R|tDfQ+6&>L5%Kp3dP36517V%0{k_;I)r} zbEZ5v*~>a*$T?@qIHp5a%|TXWL+)+>&;CGGEI|4}P&RmO2z*!^=sr7$6oiBbf);fn zsev473hoxAiGhypNfWh77qvCrwgBZa3W4T#aML0Mld3ogo z`Q!xoWcayc__?I`xg_~GB?P#ng?S|fA&6T-fJ;)4TT+l)T98v#m|IPRLtT{9Qj*0+ zhCNu7AzGI;%bX$KhPm9Hq1Kh9-J7k$hrP>(y~mGpVi5P#F#Z{lLNlX;=fsN4iWZs? z!#*W~c|sUtUkF2&A497rLy0q2i8Gg>m5Y(JD`c0DhT1^D0hTtT;bA;9p!Zi&9uZEC&Mvxi@lZ4knkTE(wMQaG+Rj|T^AlU?k z1mBPX6@jq9BxHCTk}Ki$5rm6ILdp%aS_vYEMnXy(WD>Gh9SaHWf?cKE`Ur9- z9i&QvaOOygm}OPKDG*qE_jU`bfeo zf`LmItv+I6VB}z77vkm-7Z8-@=LaD^ZW(?a8GbHV0d5&V9#FSPkXuTKM@onfTqS{O zA_)O5P$dQKH>rtmsEKeu*L$mh>Z43EhHMLl5*vmJ2Zjb0hGuuB4lmYDZ?;LnJhP%j zmZZupNtK%yFE%xdcXBw>QnY#TM+r1geTzN{IxeP3w4Xs=ttFlGRV@1rN z19afZD28Bt1h0S~40tsJXF#eUOcGubL8~Fy5FE4z1iog7Uq6H&%s>c&>m&F%a&QJ{ zqY&&oZCt$}c+~@EAgUYWt`N3;L-4sFv_27}CW2H<{OX`Zai}#BXw5d(Y6vnY$EOCW zfOynhxxk1AqdtOHLy(JycvW3+)kj!+J$%ackX=IDN_O!22(uc3i@@t62m@Cgge(rp z7bqn7-eIT+gpEW(Y9%-cshHq77g{la`&JMsXnBHdV<2*u3eLt-6M;|kf|NoO)<+=i z(Ao;J4+V5FG^E5vCXvg0%+eUY2(n&c*!XK7YRKZ8sexB3@Hzr6g2{lZ!ODQdGFB;w zAVdKk5>oqMl8}UfoUTyV;FJc@h0;F)--Rmam?Z)3$VfTB_N9W=%Y#?GLFR)L!1WR6 zs04>BP{rg3B0++n>r>Md9YF{(A_u9JKuZxEvSdID5;Ei*vfv1O!4PQSoD}TzY{>kP zvSYHMeWId$qOxPMvVEemeWH?mtdf1Kx?2teBT5E3$s17Sl+@Q5Fj4c%lh0}+A9Vx8r|Kf;E!eu1o5!zzVG5SmWF^#uwWIn6+1 zAtDeq5(%k~q#V`guf?WdS`UZL1cqO|8 zMca5qyBI~g7&ZH7Rl6u-0bUtFK6yb- zS$=k9K~^;p4l`+XYXu%pWj23Jo@gD0NG*m$U4~?Rh71FS9216e8-`jZmU2h-N@va{ zFM-ZL@%~Wh?m+R*0Ff4N{&p{}CRf%9d*)&*hCFkITr-9wQ^rgi4g(8UV`~q{ec#A6 zk%&n&7=iW#2^&Yi59)!}N61wV8XHm*K}bmLgGQp&M35~)5E63TD(HGUjQR*#8R-W? zY9a_JpzkMu+B3ph4PosHAy-Cx+FpDZl@VmS5U%$O;EQ>RCCNwbIj0jOwo2oHgHNbbWXBxjx%?PHFJzJbBwcc4!3X&g^*?r zVHQpi?oJ+Vj_!U=PQH$gzV`OM_V#`b4*rgg{*I1*4h|vq_96E6p^kQ;j&>0amQhaD zu`ZUeE|v+-W(m$_$aFMfo zhNWntF;BEEo2$BzhnA?Ssi%g84`ijBuvwIlX`~3+HdWJbVbc&1vrvdQ+9GY_ng~(> zp^@;K2(1o+b0O6boP<<5(1AEu-v?6lz)8qF5gLhWWdfvn!df3e>L6qi*A7|~8#YhGr)Z7bKY~;}SV{8gBN?Q+3C_k-Nr4Iz z@FWy616HJf7DzzO`vQ&ofzO$wU3~#&RbAp9) zf|YZ)wM&GxON5nkxS2zknL`+IeFUk7{2d(woSYyi+{p=qoE^fQ9b#N;V_a+#+-(xv zZBpE=Qa!BG-L2EytTNm!GCeHwyo~a^j0(N=3cd9Td^8JuH1oaH3VhV7y(O!>Bx-$x zYr&Nfq&{j65H0hR$+i=8RpBubWKiV+RY|hEEV8^T()?`F{A`e(5u_%P7T}f^;09Gj z{Ok}BNPVOs%%&^B?I6$Us=^hm&y!*ynqeiJW+51B%rE2!rSuaB@+J!pcEdIz~Cg0LYSB1{rbH3U0I0kl{SvSbcSLaQXKbr3`l zQsY2sA_xf)fsowFPFzZk@ZCc^N{$f3rEJed%K38ON(WRQ;b1^(B1n}4uaEEy&>`1K zT(aiUwrS|d4t!e*-u@B14nk%~+od3LWx%`6A(a%Y^hk#@5S!V-Cr?7UQQ-OrbPgC= zVFluXTf&g?1V(}uK7pi=25k^raDNJ|%7a!$&Y6&M5~3KwhLdQeF>;@a7&b&Z+%$p= ztd$NPL4pS2QUGxnCW%!&L>ZbCdVK_2fd(Cv`vHT@qyGz(oSj8PHE~6sTz)HhW1fFQWo4ivlkTWL8KD+$Rzj;t&_&fYw2REE<9==3)#s(u|%84E|~i z5jqU9hK#8u>{*sPu_inL>I_yg42HrC>Rb#;tPI**43-k?eo73Xnr!)Ij3w4=H4Y5T z?x2lG?Ox2C-V8l{OcTMV$CtO=gCo&eF4kPiR87%TO;JI|TFuB&+$31iEEGB-huJfN z9M@wSB5WE0S(^>-@1QY|57Wk9A3-Z1_(2Nb*&le{2Yz}FsA58rf>%9o2BZ!`CgBr9 zXtfV$st98&4pJpS4%vo~nAH$?vnuG!1jsE9Xe9FB8z!3%cF_amJReLFQtv=WczpyJ zlmpEi!8$~|%1)4)2ufnBjELzU;jfR7Ya&R`2)W+jk+{wMAgx?538|+rN#t?~g$*8Wfz?EyQ5tAngS9ep%!10o3Q2S>k}kAT z7mZ6y9fYg~Ye|k~CNdXUFEMOnz0_r6t(_rOK%_vo#W`fc*^pWW60?wA3?z^tBFH2> zmgSu?P~sH1enDb`sv&SU2gCr^N09SHL0ok4WEsa)Df?s)f=Qv(M^cWdQjV$W4yg*3 zaSD2#lIl*{I)>Uhh7NiPZYFAR7ShSK3I$G5#jdgy?ozcr@~yrypeo5%s?Arb(_b7^ z6Zwg?`G^p#kJ^2CT9Htv4^NjbF9c2S6P(~DIN48dq7VN>AHm6f!jt_3r}zuOY9ek( zebnd2395`>_0eR1wyA*}eSus({v7c7$WjLR^Z% z+}a}Sdg5G;3T&<_Tp`-b;d*TG#>`1(Y#HWUS(ZGZIxH^I40-|#n%oR(oS-U6hnvAf zn8{z6AzYgy-;A-so~yxyvDuxa)q}Clk)hd@q1}@KvO~7hi>uX*Ezw#o&O+K&Pt(Cr zN5jNf+uTjUG(^%YRKx^4PXwubFiF!8A(LQ8KM1WF5;qQojJzS&K@c`%H3GEu0k1|t zW+N&fq)G?0Ob&LQHt2E(*u)UL4iWvbr7s`1mdFBN031|Op;5{ z7J@LVA;`WV7-_|=07Z~K5u`GLu#u}Itd$X$thtPJDmnt+ZZB(-212$evbYhJ8Vb_e z!ssX=S6gs4yqH2^fajN?GtS6t=!ul6a$tr$X!#T9NPqa6C=dgws|6_=ags>&5n8h! zQUb$C$SgTzHi{PY1uMfw~5j;wvs^kU~^UDiPmFIGUY0@VF0fd zV{G+c?DApg3t*WR%sMTYbwVIVpFdk)08?K8(}W-v2%|56X<`7=1b;?M)a%dK>(AKj z&)DV1&<$pE`!n|huuKGNfsnoa%zXhYeE}>G#)Kd?kQM%Hef}JM{-86od;HlTsLP)X zfr7ZYg1BnDg^Haxebm`4LGwop8vG2ZybLNl49dJ9q{7Fb!pEQ{$e=F7pef3rCB|SV z$zUSG;I7Evt;!Uu&623kk!H$~X~CUq!&qR?Qe@9t=_Zn3Cg>r@U@ppFz{jA;4nn4U z431Kav6>8tx=h7Zpi>%Jy_nm)S*AqDZLPH5-x)YPQL`_YtHYP2&yTa!l_AttDa=k; z)!a|n)JNPn6toq{BwEZkO41}!!Z-r5IS6ytD&*WAto0G3GJ=!HH4aWTxMze}A3-Y} z-1QN>>VY%h3lfN_eIS()WWhGP-a%u4DLa)~y!IiM0U0MmB|+5_xEg{~Mlcd|Tt7$(UTq<* ztiUW4AtKNk3cRZkxunI#hSx_#GBC??{36J84KZw7?xdEu3{npW(@eOmj6)`*mVtyS zeiEV*N`fN`TGt@g9Y}1*{!mF<@D?6$or2syLKO%1jF9Rhh%AJZvQL$?OHs6o5j6@B zmQ`lu7nK%ZQ4r;{k!5yP;fmH{PB7*uwqmGuVr%taZ1-aB@?q%pXPz3wGBt<=xjyO( zV1iUPpt=dQGV1qZ==Woo0IrN6^%1NxV(j*3?)GPf)I<;xQW?SPBM=u<8KKul-F~c{ zFx2JG))Bzb?$2KBC0OLh;ibxICCZ>B$e_*#s*hB`l@X*$QsxEKM_Qtc`qE5Ba;(d9OZ7@)gEGX-jYQg%Gr)Gp~gZUYOHoLoMs}7 z)*=kfGR*PX45@}}g%%9ub_{jS3=OUfkostQqvwPur4C<~386fbf_OVUSwd}l zwef5nf>cb9+6P`438oR1Y0skYJJ43U<0prFd2~A z2bqM|IB*81j2R}9v`NB>pl6uaCQI2SL+)WgCXr?Fu;H~5gn_k6f=D68`cP{k#BvM! zWEn7mRYuT`610>rE?SQOt^f}MQv2XhKxZk4Pw`j^@2bc- zWXhv<8zH(N3P48+z=qTywFxAgkx5AWf{v=jw%i3Wcni^si-gxia0aAKf!@Dhn+6iM zO969}B<)ZTBq2dsP|#6myCfO=WOe&6VQnW#5h+0)Ar%1z4G|_s1qKgw<|ti;1Y@=$ zE2c6#wni70Ru7JDZ;l=x&WU~?)Dyr0L6B}vw?8waN}Axu+V9KK@5j^+nnq!mh*lp3 zFm%HZL_MS?njFA18P>7t_hXuXsHa#a_%lO#TM+SHf5sk=nIQAK{Fu7^nY;WsI{nzI zJOv>2k+m3uwh*X3(&T5*@h9TFODbIvC&73F6m?KV)Ax4KG#gH-6jLpeZ)7?r>!ZbwOBv{HM zOv)ro*f>hqI0`Zf2dRC~W`f{?PzJ{O1V}XmxxoQKLTemw?E^hQ4t%OMlx^TIYTysq zwF;?mAZ+NSA*8EUAxq=XNC9o=QF4%;j*u2Q7b1>aB_Y>7kopKh!Yd$N4R;6|ZQ&ev zWddZO0(5->ay100jF9_5$n_B~($E~dCW7{TzzgKi)@j{B&0M$ zR6uAI6Sy*hR7o=S&@Cb0x(VD%qM$y4l-Q6`mvRzPhSQED*@fWQI%0G|>LZ9si2EUI z2nnx`;8h8vCW6KqjtU4<7Oox6097%NiUunQt$o0C5JUk~1f?>Pv`c~{Ge~^|ISWo2 zB$%XT7b>LXh+H2zD>1mMF-7X2)JM&3Y+YXLJwBWhd^slga&-AIK@g;y)8)t1JSDf$F0^Ur-kbQjfqZqwWBZ;$DA7NPRTPpK%hbV+E;qCP1Y?^%10# z)#uCD54HufKI-!4sPYsjuxIm9WwjIm)kj)_3_3y#y21?lLJS7N45s1?7E%nZiVQv) ztU;Qbff^j1N=(i&3^pPRwxSHdstmFE?Ahkbkou_Ai?7W`s5e-$Gf=$6N2nuEv?EZo zz(F!vo5f#|AxwiY*OIl=k+;r`wb`4i#gn_vnYF}%J0`5=@U2SLZ&} zDVtP?I5G)t#HK*BL&^%OkeEdrX(EvF*)d(pJ{4LMfkhzuC?TYTeX0aZ7G*pTJk~^A z5lCrII}&u|fn7SLo6&5Pf~i3c69^kRR)#!ohQuZ<@a;iIhC(|pkfB{{q!c(sL6u>w zrIE5th9o4=b>23~kO~O0`4rBUw@H=LWD)1}za5XL$w>6^3YC zhD2k=JZq*BM~+%omO3}qHZRsrUyfcF>hWQRAc#o6FI&GaTaPbGw-56KP-Vn4!H0Pw zxUU56<1qL8G4+DFQOr=%4|Gifq~4hjzyL>(TCCq6D$?%{IT)M1Fw23^Tl?#kcnDOm5y+vv_)@5)`}$X8*{6K5_Ss>f%pDrqP$tgWDLqG=(c z=d57l4jPm*43;(ymN5wtHwqUuKv|O@ZWs*e<`@Qu8wJ9Vm|*}EA>FJhsuv7GkZK62 z*8{I^5DdKl(B*fqBNQOwDAf?gSqeg+!{vO0^?Zc&y@d2UA@vdTkZo;05p6#qO&<`_ z@`BVvpejZidb$E~1%$>H)btR9-2;wX6G6_|#;lJ7)!hZv-38R#kdQ0#03Do-T>C)S zkZT|WRU8Dt9U@3IghoPY9840q4#LBRS3PJ9cumBwU;{>0{II=6yowe)3g(bb5Ka=@ zGlGxQK^T~Wb(}J0SSurGYwUw~k~T@=U<9wNKn&{?N$V7dxVTLca+M?j9`1t|An;-Y zQc0na5LqHgcri$%dbl!NrK<$E4nig&DzWz0AcZ_GDa^tjj|f`5Kz%N_n~CWFW#JJa1HRrA!i9%676!HoNZK|HQU_s@a1+o{3xo@)hTx>EZ3=`9 zsgGoA5;5x|O<`t8eH5hws*eh6SxTKaYh0LXT|w8OcKUMk`EmBb$Q~bdNM+RP%Q3-^ z9j!i^=*u$6j}_M6VV(f1j36^ay|DVI-;V)O`}F%W^!YJN@CBiMe~=)Aoao0i!I!be zo1w>>q1OjQc6u{(`7m_*GWPf}wfQi#`7l(tu@~4u>mwr(P;baun!!ef!A6e3PJzKi znZZ#3)KOC90No?Y4!*rllz~B=9aJ9$DKkRqqY7(|W+$d*C#Fs(h8|aj9#4iYcZPNk zhE6ZWY8U=0XTEeR-e^6>Bm;(YQ-)G&=5kxsG8?8+YsMl=wtO?@C}ZJBBOxaP6=y?L zOFdg_1ABQxH$`I)$h?oFVW6~eFsL%ZsEZy{Z8P-TQ#AAzbNNM)pnxP(Yh(-W=g!ODd#P5_;#z^CZ}sfOSr zW_=`}=7y_2f>b&JYOW9l77~0R5n6qOwvQEF6A@YIpeaMEhTvR0^%1BtqF#L@WtE5@ zp;cItHVGIARC$TpB*2k`Z6X{&$_-Gp1)HdXh+rk9?NZRHGYD4_qmhiin8Io}q*{cR zrD)|U(lRaNDe`1UDNREXHYr2>nh0VtxIU7xONVwhVATPn4nig&euRe)G6T}p#U!P` zbr3u*h+@DT0I7rEuEMN|pdzp)4c0h?)IN|TDr1u@ZJi_sMheym3f2iSw(&By@yfRG zlE$HuVhRGh;wl0R>cWgpa-jZElrBS}2}6+$ONA3xtqWV7D|?$KXQvNOpC5OxFV_S= zc5ns6GQp1(j{1FBdiPEWSAVxG%=8&4~!-Ru}lbJ z=?!4$4FD~B08JJ7u}%c7=;od1$35AfW0D{H1bKWdmK>2RY`B zk%57WfkBOn!9;>1ScNHChpWVdq1u|g-I<}&m7&gsxze7Y%7LN5jkVpAq1}_A(Sxhr zjibm(Ak&gP(~2#_k~PnUBhQAT(2Bj#iapbuKhvDw-&EP(R9VZ&UdO~y+Q?7J&{xPX zK-efy*dSckAY9xqSR6bVgf{pF;X->u;K~Rx?*kD5U1kTVT;L?Q*CVFq2S?DEAXt|N zQ7eH}BCkn+uwk{24+aBKA3^4WAYCC+>myz*PhKrhZ3XTSf!1$>87P$zW=#Y+X#uHQ zG2@gq!HPI!jNu4>Wh7;l2+8bl5~DIofYdec z$_R}Ckwqr47A$Z5IiSxBi)Jc(Q*(1Hzj7#ag})&`;gUg<#UBRCh%kg!dLvq5z_Z1fDR zGQt@h;ITMpU4z7y1oyI$NvIm6`Up7{LD=wQ31^_yM^e_YkorhUT#=tgOo6-T;A#fqZ>_-0=Dcw0gl8R4aihkw6a6qBzhhZkDMb z9MeNMW`uHrE=vtzn-Rt`BaCHE6z9xvwkaXZ)5F=OM{!OLV+Ab{4`!Vl#yTY&blLZm zVAd%?oFEj)H#vZJk{|mdKlT~n+|$FjriSrO2;}JYW9<*(oEXB}AH?1023p(J=)uz9 z&Rp)unPJ2bs?HFkf?gka%Ys%iScx$xaWhCUGq8XwBvu9n0VW1@ZU$3H&MXdSL{ zbLJWwjtM@j{oXA7p(0zWt#;Pi%}!L962>(lh^@t&x5b;c#6_skk-x~9Ki`2T-g+fp#wQZUFuEyPMg&&*lR%vshXK-MHc)FfEcB$&wh2vQB9llp<6kvDi{ zq!$3GZbWr`;Yb)(6JcdQ>LB=1ZCv|?z}+QKeI%gc4np9uIL!J8J|85g=?ST2Al)Je z8$$AEdhlpMHWNW_28VZu;0$(h3p=Ju2=xi7C~-n z6;QSpfGyI7S3__H+HwVWO@yoV!4$`=i6A17`iM{78jLK_>LbVy93}}_!vI;aK(I36 zkTE7$AE7Cbu#U%y#H{1t2)RmvlpDm5QntzP@jysr1ShFgA4$R13QB@IV#wvHq#a6q z1gV?gv*c)nt+X9zUJO%^G!bx50o4uY6(y!Dyw1X`P_T+XY8gxtQ!zvhvW@Vn2blqH zAwU?AdIu69k~W}41rQOOBvKs&t#QC2$TblP8`iCcCm{*zLz*~LrP#L8PGN*G5;ODOU4h^YxMXp1ttDl&MgGQ=7%B%3ki+cK0mu~xgX z)O&KY`*C#z@^<)e_5|?u`SW-C@bm`=^alv^`0|2|BkRhoXz&uM_Y`dK z5NPlaX!7Q7^x~`c;;ZxIt@YrkcH^pWWCb00?aW^7#8&G9It05Fd>VGM3(M3nvAMBI zot}J64j@!v%T;Q{o@v68pv@4b#t^8;;49DIBgfz)3qpRf41Tf<9?}f15)5v#Aml8^ z;Hto2A;VxO%Ag_0pvKRjz{8-(%b+dBZ6Gb6BPXUUD=I0(Dager#l)b<2|EABLxDL# zn<34Rq1>FI$_jK!YPADHl|5H)kZgaLR)f2At(RbvzgSD4M4h)#sUvs3HEW?gQ;`F6 zkt0KaJwu@lLy;{*sU1_P9aDilTY)`Wyp2eby||5yzNMv(qKT`rnY*Y_peSr^2+|u8 zF$hAe$JP%7txo{Y2ccI+2LAB#6NK~vgduAWAhi!N8zQ3TE2QfKseLd>0bOr664C|L zJD8^^2x)nOkhZ&!wmbaB2T%p1;VlS9NY@Xct&S7WfY(7F222*TmK$FCpfO-I5n|ak zzp9IXnk!@`2(ot-rB4K}j9@cF$g30}JtMUG2(lCbP72BA z!y>}Qz{d{iB1tiUHYhnsF?cJprRX!H8-XgLDl3LcJBBKIhC(yOG;M}tErvu5&`LRf z8HNZ|hGYYVY;(qZYt{lg#zK3hLI;LI2T)}MuaEL0K;i`|2huzR>BBX8+qCOJP_7uB=EY2HOsC52+-iR41N< z)IN~O7f20)pTx4?8GIrKE?szB0`IOOhasdIg3R3@&*s1f(9juHDblD&$}(BXGFj3h z3Bo`lk+nl=9|$R7orqZ<$$%SI;^uMUW(gnzZIeaJW96;lC5$5!r4&SXh4sZ4EaW-8 z)EIp=S;F-hql{P}=jY^DvE`Dt+bs&jd%uzM=AxvR3cDl_?LatG=O1ZnXFY4OGCamDI! zWf?ORS+lizaI|}Ib$GK(3gw#-B|0gDe`2Umf3QG{k4Tf3P@#iNwv||fu1KI7ueUOr zyF8PtJcFw|gNq!4vn+$N41*htZ7j`XEX^du&cqBJl!FsY970?IQgALa1A`GigRK;( zY70?gC~@Ga_ZIK;7j5+xEO+BBb>XaY=Bsz%FS6oH)@E>%W^fW`aFSr~R%FOD22H4y z*fN&bGM3sil-h%;q(TRVe0zpGJBDHh5Gt}|D7Irrv*k*&I1KO&>6h2 zaXI)*5WlJmv_1ll!a?dCOcF8#2bm9okdVrVU&)Ri+a^{>O$3nzAD#d@KLK(dE5U;l z;Jb%#E|7!I3V~P3L3%yVnh0DaVI7O(1b2(L!Icr7`Up}JK}b;MHIKtZN?InMNr_s- zKoDB31h0(H7~)n5$Ox^vA;?85^D(&uO+Y9BH^Pw%RXAJ1HW{rlf^!F3eFW*z;Hph9 z#o_f4dMyAOC4*>@v`&FAFzX|DC}CwlVg$L?z|TgjZs1%{^#QJ!;37~4j`cRU>LX}! zHA@gPjhC=a6gG>Ivy786jZ&6Tk`NR(m0`11;_*^r^wD4r&|vUWV+d9U9Wk0@z>s6b zQtrTA<;>UOF4W;I)*C3iMJk(gd zG}xULm~13Lqhxks3_;2a;TkMi#tg+a?2RrgZJwO19*jNy920~2CI<8M2MhEC@plGF zw)u&bILqhRN`~tQ2deRUDzdo9FgU@GqcnpHxJq(YWC+&fuvh0(;bCB9U|@vTJs<`n zvVa5+i?O=Aiz2h59BAiJq&9Pr9ap(Ce~lBzlqiKQ%^uU!v}>Jso7{!+&DoN*8A7!f zBlTD#^cW)b8FS1S3aps0)<^mF3`JlB8No}n;Y_jSFfcPTFf-&c^x-q~5!d$@*GCzD zL#~_P)exKkscRs8B6xiS;)2KJgmnBs2((Ds7x`RM%=(B=+mjEYGJ^Dvz*9dS@cIa{ zHXBkOL2d(w_J-8G1mFnPIl@u%leFUj)aMedVpfh}IVU-c0K7v;_1R3PjN4P2@V(KGEH3TQc&0|PHQsyyI zU@elCal|0BYD?TI4%dhuOdNfD4^g3D4E>QNg}Xiim$ZoWf$+*m9I==Xw1yB;_Ck6D za5fr))cz5qPeD8h>9tAOCK8FHtrM^!SUrorS`r)-&{h3tZ1^}TvX^nyK2lck5F}|C z2VvkRG1C$r^>D?QsSCM261PkeHjR_Fj+HWt&{dRFkr1#|W4BRdGnVJokzmsnVAbMh zah78AR^m+3W6m_?s&e3{bKz4SGY^17_<4Ruy`pldMPn@DKWS!fcivk@*sw* z9D}PIgRdN@f{WDSi#8CjkzmkbV^HN_(BK9gUZ>0p8k$q)W-t?G@Ymrhag#4~S4cGA z2~uPTQvsngebD$}hCV})C0kdRQnkBqq7g%)5krzO2qhXbr~H&kquJ`7?s&DmDw=mTeId{v*y|`=h`sa8SB{@>%poRUlDy@7~2n% z0j+>wlR;3fuD`IZKcqf_kkHx(Rv#gEj*!@p86s~;R|uKp*8){QU=lQB2WCL~M}k^z zVB{j8=?qz?Eu`TNMjk@2Y6#bnKG3yrsEZT8l@4Sq&J9-exME{~wzWbwu|jGh2#HxA z!K)!S15y(~Rw+O#BS9rQK_xrnc^?=Xbk_r}4iT=322w-el7)*yCWDaI%fZL%@Ki>S z`iM)$7=qxVb8rT{8iF%Os*J?Jl@X*qLe>Z`A21o1B?hE>1tHkJjW#tua9t5C}`qXDg1>xyO+$s)|qQuN%LC7>-+$=%d zGD*ZN9#S9aDM@L`iJ8eW=!i0iGcyP=Fvv17sB$qlh%@*ob3y8(3Olws7oHAp&_cWZ zAnr+_eEmV3{Xv|)fqWf4oK4;mRn7tdI-Fi=%$_PNUaG9#T5RrWOjhy?`XUTsEDY=n zpq`5W1A_)HgP|CcrxHV;7E_KPs6J|NVD9kX?(t-4cVTF^XXtfhm>$Y9DS%<3FUtfU z<|0Rtcs)jUIR<+%1_ucca+YCml4fv_WU!Y2Atz}D4@m|uX$Ds*25$uhS4DO!F$M!+ z24gV>U2z5jDF#zn784m}e;vMb7qv=nm0Ev|d^@RlJ+2fZ?o?x*3`2%&6UIUd)_gPO zG$V#0JFa{Ou525&LMOg#8;%q+wpe|J6hnq|6NWqsh9VnKeN<}8P+$i_MAk>(O+cXf z2wKaaRXW(X(8>i|@4$LJp!>d&#i49nAMnT<59wA$1V9vNME%Msh2IjyZ+Y zK9HISLP8Ih!?9QnvP1zg3I`z}^^t(0t$-rR89tcx5u}zuCgC*;eg?eW!Nq`#)Ztk# zhg>~zz-l5+X(LY93=zEQL1wT?8A6blSu}oxTD|j6NT53gHHw|2ZfmUw73`Cd)u~)(-7CQp3C&X;4gAD}r5YeRI1vQZj zY$Z8p*%&TyNZ(D;CIJ=**q1xPWzn1m=fV}BF_5citZY%F$_TV>0mpb6#3FboK`Iw1 zt9WD)WHu;ikr0*W(Fd=OM9ibXC|1-W4pcFj#VXjwNSTMJ8@cg_YlCYH&&g#$yqJ6oGKONTF8lPg1uJ425j1LzFZ5YX{K_3mO74*V`E zY}V2Y=2A=+(#*y(YzC6dBJ60RXY5Lx44MMWK5`5ZT1+`6%mo(gjSlQxo&qh-JRR-= zQzBJ5d?jWEvdj!*>Gx&m_GHL+;z=-OP*o69RS=U@5R*}slu?pJLkbFV^7683GTcfM z>=sg>{jd%apu6)xm%7R_ILR?M$}%{~GC0dI1gbJc>T<>E3&rROW|<4;+elQ}a8_7z zG&*tj_=@!hOZEjxbO(vFfi7|aEepTYt4$>KN8gQ5!CZRtB>Fn3Qh*NK0=iR_j8f2V5Dsf;vZ<&X5{MP~8oT+y&Lq#@}4QvqGS{ z2DMf~)JKqm6kwxpp!M6R^%1B~1gV3NNj@bjuX zt%s4aN+c8)psE~M4X*S9Q3I*1;3P;ftV4v>)B#sU(IV!tBIclulDK)CyiK%}d6=$+ zkE(%-yt1y4m;wVMHv=QLG&_qDAGe(}qCTp2W@`54YV&64^keVw=jiffn*hFPq9=f* z)0eTzNwD0W&ry-tMux#kmeod{-Ata-Qi(@JoLg3ag_}=?lShh|ON4`!UzUYIjTf|1 zE=q?b-<++?j<>~`tIt=Y!(CuyhT;E9bGKEyEr{S=7|Am$f^BLrW1b^-x)qDPm4UsL zfvJs=p@p7-nW2$|v5A$bnT@%rwHb(EW@2h;Y;US!q9tjkz-%D}+7Ih24O;NvDi5xV z&OGfNg5Cb29X>*x0YYuQ{B8ci?ExZH z?xK}$BBhRE`PKrNCQMnT%=s1!g;orCHXsD4i6G=4sgIB=9r)Z3To6(Lp^>E4N061+ z@R|t11(S&3I9N>txq--)SJ{~x1tI!G$XDRODBKGlHBf2dRl5 zByxQOTP6p+cnJ9}JgmJTR@&fNT!EP9f|OC@k+8Z9qk(N5BaRJW zwFPTCA5!KM*)77`vjFvv;Omyr7`W;YT;f>WFKLP4eXBT#N=Pk(hZMDdw*)}-J6cTy z;fk9>O@o+;OhRHASp>p{q!u(1Qg%XZz!3M9EK!kewDM z(i_TM>M5S%z^7wmqi13#XXGhs=pkj~DQoH@ZW73=Mv2DMnnWMoifj95s&2jcy#tjta5XQbHy{VisW{ zrr`pH!NSH7!p0FoM&Uw6;i5*7U=%KD6fS8LF02zMui-4C>Y$^bq#_}uC&8gF$!Vd$ z>ZHYQBFhfik;KNJz{X%B#_FcPAF0U|uP;z&##nB{-s#TOaK|~;Z9LOG3@MI8VaRPJ&0=NPKcXPl;CBV6; zH4&&jg7knO<8lxZA_5_i`$6#eAP@uGSAx_;JUEtcL&oSJbr6iS=7rsP2k#Jp88+NV z=ba)~G!Qn{Dux(A`iMiy5Dnp}j382wOf3c;rh{i~TnxBk zF|%k$iGfxhp>fgbBd`|2^%1D>fsO5ewIHf0!dx8d)v;Da;?^;c(h`$|TZATsrzA&{ zf)w4jNEyontO#E3;H+3tJ&5K!I2ThpL<-H3m|P<3BY5~wg@IfXLD*=MLy*l~ks&~fA=waw@=QV7g^H~hiYytL zT}A301#y%dbhu-!rG!m`g-n7VcLNC+gu;<9_%t42gHR#;5HbA_af46|Lmw?;KYL?SV|7g% zH8C><0Yga!JuwCuRtA0s1~CQ(ITi*Z9tL|!jxaU$C@t_^IAgA4W9Bq7wj4{&Tq~|TE0#PfmV8Twd`pI6 zONJ6FhCFMgJZmP%dIo~^5w1!Hk2rWyoF~@G2z;g-=tu-ey@QK{)JKq-2tq?eMIE?ZFuJh(m#UtgCGpF z%7{bCP{K5dBqU`Tg@z#7;e`Mi15yE@35uCPyGsyR5pz(#39W>o5f`}zL}sJaN1~Qd z5CnGyq_~EYaB&C&A&$Df9kV=#i$H23td)+8Wr7?Af~dr-M{tQioCNU~gpEvM>Vn9^ zorlH{wTMGQ(3NahHVcr_JOFop#If~jAZZ2~yco?Ph%6=vNsVw4(r^+ti4*}NQOhVn z(=a8=P%cGVB>^Ta2`*1%248iCG((0QbI|tHVk?FcYql&yhDap_A5jKZAqFRY20IZ3 zA0-AGH4#r^MQP(8Ny7jkqi|v42*_zUkb6JB=rwauNO)s3OffwjtpG0Q?Q!;C-0 zR5-;{IN6dn%|;;GhCkbeKhK&w*NUsak}cPaIoF&a*PJ2Kf+5G6Da(dA%ZAz3SliZE zTU6IaRM$s9&r<+)PATTu2)HUB$gB{A#8n4D#9=G6(XU&DtVw{^P0*@G9kwh%!v%6W z9HcUWut6m1RdW|}2&F!PY+}V#9}!a-amkv)>m6JSE>N8%=t zcn~~a!x?Cmkrb>t!z>vfBEn`-!e)peK{2yvNUIfI6Tuk}^+b}8Vu}nBxeh{M!zv(1 zH3TQ&RzQktI0+YrFc9J-)<;t2aafUzc^n#oD1+A{D2|713PTozI1J(u2pgG1RtaH4 zWFeJ~h;ffX^f*LN`lH6JnT!GpQna-ju{=(I6Tuq+5^=@2Mjy$C{?73$A@fwW&G7LVF z3=U!pj^Yev3VhzC%F@O`(#Amo2BDylG?QR4(-6q5+&pIC+@@jj=6>S(ZZZ4Q>h9pCVWK)Jza|TO& zbx3_AqT>y(k8o8zXyW+$LC~E+h-KK&a}wb74x9n0kI<_jaD4=-Wxz+3f+`?1h8lrE zIe3)>sd11=NKFJGA>vf2k6_CZpljzKH4d(=tPoi^$t7=zr#>QB8R4pZ;Nl$8rVxZ& z`#{)Os~$8#NWFuLgjYsflKRMb7C#%3$1zFBupWej)IJarSp>p{)I<;xOL>Ai(*&=l zureS;C}<%BtbvVO#DUlrQQ~L_s*1Nq9JTZoh5A^DPsQN=B%q4Zmtw#qaJIm7Hh2* zWv&)(qU@}#Vy~`Xpsa4Bs;QxAuBB}$Z|I?5coyu`YL#5w~+dc62sojG#ALvTe7 zT*Z#u)h>ed9-<9iLJeL*jh=$_?)-HwJasNSg*IGSrp$p_T;3`yzG@7CT8xHTiWY_% zl6oFu+O9&-Q{x~zgFs8;AiW+mQV=#1gr5O9ApsXDq~QU+NC>hz4xNP8O`!S++z;YW zcST0rYA#Tu?!*m4kaOgS8H{W`HJ%lb%2qszpuP{J z0)p=tLS{e)<#-jW_!TYS2wwX@RxF?m$3eK@iV0dbamib9$eLqC>@sE$1g~+hGN5&k zv?-UgDWvvclQCw4o&3WtWr9FR%ef(AaS#$$9Rv~Ql+Z&%kUWJ<;;)r3Wg(>jgoM;Y z5E5Ag!WK1+f)ogFF1&=n9Kpj{4Iv9cN??cySV;3oF*F2Sral1m5uREAQvPF-khRK~ zBupjrlnh89;2|Ns3`_?ghZwRfd}SI&y#oq2O!pw017Snz4oJ;{Ov0-h$b=4ri(K_U z*l<}07oJoIGLY*dNs~xP@PrZA5S02z#3T|_ahQZDnguDE1^HPRc$?}58_NY7%Y~aN zM3^Z?nyH4HD0>=ec^PTj8kjg3o10s>nOnH2nE5Iidm-0HqDJAu2BD&01icXz%mxV> zg^L;ng9O31FvuDP$bu^)c^D~e7$60zR{Y`h5%dgF)Y?Z#-w(Maf^Z>c!9nik7S!_* z0_{Te71r~S)^`Ifv`|#$;Ni2DWpGpmozD=e&XA%Bnzza}0IjA-RcDBnWeAjIh)`jO zQ~(`eS8mEsVaAYQ&X8^fI;5i1nYYSWpw3;S(ObCDTe#Umx2@doHA5I>-bNoP>?VA;#-?lq~VrNAQ{m!hlys z{E8OH^%11@!6Z3k&B0ZYHHR#EWdx~!&`7M+5L^&feFWV)1g?+RB#l7`wt)yz4Z%rB zy@N)wOBz5BT4ls3p(hGjzyn&k17{%DO>j1R1YX2694itx4M#%~W)Tu*5txca%_2n2 zB81JMNXRT4&K9wVM6Pj&VMEG5EF@ZG1m}WlB`oSm5l1e-DP%*c65@OcTEGmck05;z zOkW_I1C@n%oM_)6*GJfDACMbR#nJb#K`IV#MSx{$64IW5EHHrdSuj;X)IdmxX}Cy8 z9fVAZn?#BkM?mx*PFw)`JXy+aX>-mU)yGVk1KD=6nxE@%%CT8)Fuh14=|68Gqw2U<-8;esn8cktFBXy*vtCxY)Af~-}5^m-t* z5;6&?ec+W5rz)tM1M2BO&XhwUA!`!2lpL`lcy9<^2O%@yJtJ_32r>`{=@}uBN|roI zmS6=SLF7sY)DME(1`Z}6QsC;w5?&Lbjm2@unSu~Tw+O8of^#8N4~MiVguy9e2C0LP zNl@nq)<1&OM3CV)v|0wt)#s3cB6dkVc1b-hNka(YlrVrZI3)~V2)%y0uE=;wq^iwH_u3Ev1PrpNUrw zshEgY4^fF+gWzXFY5`b%fnEz>ng;2oh?z%cigBo@aj3XSu!L!dq)9NON)k5;ls5{MH3*gjS4ooKh1L*_5EG!8 z1$_M+a!mvpaD%NnfQXBOZ|j!O4+0_ZiK7w*0pj}pkTEyV#E@YCqyoZKA3^#-@M;Kr z5(45h9(a8upzAGc;4fhsqNHUmBBN<8$>65JUEw0oBQ<`yr`W_Ok*H|9oHraC8v zTsy8bb7mK9Q3rKFeN`DF4S6Lka}^y+P-Ud$DWc^Gua96mRZ*77!D}C47?64g7YVy! z)fH0d2!IFWARQt&3Atz$YkdS7Y6Fk65v-3Ol@Sk)%7{zJk(l}jR@WfvBS_7{tpFOh z<5si)A?!=Itsu97Lr*RRO%hpBw?0Cycd)YArA#;>2?SsBr8&VlTY9a^; zsdu;}4IvB+Qjc9s2N{Wi^CL1TW)g-6K`J9;1>&aRaIU0jI2wXfOt|VJNM!^kh0P<7 z%QTAE;9eMJeMFI&u)SK4qMJezGM0tQ5KMPM3Ug%jm}-z~aHuS}iwb4K)iJqd*8-6uRpNy!Qv3N5oBn#X$-JM2(}zBaauX1w4#G-8sv#lp6{zsaNJQUH2zJCMWY-Tw3ZfWtLV}d8gSeKh zhNhmHx{ig8ij9FrpqWyzg-W=ELb!#3tDdrhrks|NjJ&v@f*`BB0E?*@gQF5hlpaI8 zF>{eELxnR#xdTIuD?_CNL#+!#vnx}xD^r&TLx&qfu04O61&5KAu923msHUB$rk$X= zn~;XPh=v~sX?clgc?oNJih#Bbc|f)WA(QYG2&fF`LIqq^4@4ZcIS6_@0=yc6GvL(_ ze2pA*HQNU4(gv9I4y5)$CRxRGSjBbVBXaCw2sW}Ha%IFWrUS`S$Ru*z z1ZTr*9L)Mi%p?r13^Fp0OhPn5bU{cF({N#vFhNrgg4aZF2E4Q($N&|9rl1Ka>?(1L zD?)S)1`=KiAUg))7i1AQ8=@X<0uC+A+JkdXRFP|p{v%U4j(N7leuM&HT6(9FQl%*H^& zQdiZ-P}bW(##c|wQ%lHIPuW>VQD0qAQ&C!mmr+5G)m(zXMwU5RpCQSNt;C+8#+|v| zgQ?kvZC$D1!VHzZ0N!q2u09{;HWyHRlx4$Xre|oPqc5syC#Gc&sgH!!eL)CZ6A5d2 z3WHWGps&hCt%Ka*!)pQ>*hl2hDj+m2yc&Wt;MEX(P!3WbK@QDEu7)6Nv}y>>MXP|g z6&(m5*wqj4O+%1X3NR8{AA#x}1j%jl@Wvl zO&B5FJ;W|+0z$Cm3ivA{Ht?!B$eILD#RRU8;MEYE0k4c8^${EDh#b5QLaU@8^%11H zfmcAB5_%93TqJT$1YyG~7c>TRr=DRjgaIdoO~T-8NbLh5A+-{O4XKHcNl1N!ObVNT zHm8G%HE@9jVL*x+WD+6{Cy}L4*cjuA5ZmDziDaOapAasj497}B6yPDzbU{jcXsrS6 z%V72;AtKP8CU{jdR2R5fK{gOs98!Z|m4*0S7&gy>>{^Hvy!Qby1nXFth)J{vc-g8k3er3yByHd)A+aW89139|lkilB%m7tAu=yR#`bZR9D?wHu zKuPeb1IXAKE>gfK5QGea1wcE4K$AYuwb|gIHb|ccLc%K-2p2*k&;CHVMQ{=_LjQ}M_AuWSl>$!j38_=T~Bd6F9BU|9!+-vZBHRx zZ^$koF@0}QJud-mJ6;WINj^?-9(D&w(D~tEx(ty93>lUTdG-uBHlW*%dLoqCgJo;o z`O6$Qs-2mtoEYP*L}M(3RrMTH^&EIK-NDF%R||T75M=!}ghZ=-AY6DIgq4BZ!6C?o zjKx7}95j+w*$E;dsOH44;sB|SAS9$F;#P5h*FKmG7%@8BZ26|9ge8ZLQDNbQ46LdNTO6cAV7@yJ{9Dp)~!Mp(``l`(_Q2SFI1>IU9F z05H>^{ULHaiC{+x2ngAjWCovU3MR3$dP+721 z#IQkyEx3TC76ZA0kbw8^AUzAbB&eu24ack?kR1jO7C3`=S%^xwMtI=BMKBmx>LXC) zg1ibEymA0ESp!}SAYmK@_k%Mabr3QMQ3)Yo zXYY7}D;>}^K;Y^L%7D~ILb_mCSbZd9;4Na{Eu`lurVB!_Gkm<^=e!AOxryj{3h6rV zY1+sN@k;S?IZ1TmJ`zy(g49IdBehXFM_6kgOhJNm5V8VDjf0hh47h=6B^3wE z`UtZELKC5SeFR#QpkM{BjG%oV`1N*3Q$o zH~4^4ZW%M^8KR%Q40Y`D-m~<1TP}sMG7WE7;FgWoGx6o2GQd15{y^|xU__f z=MY0eN@)^Er1n3g2Ek84`aUF(;Lam1_h3{Rpv}(ku`*0q%s_+pQsMOxCIe~?xb}pu z96+vOz#$Cn4T0AWAoq_jZG6VtGj(6Wb|mCdj2CZOTYuLU~82lFtF#fMK7y>xhAV(@kxA745$IwF%sL271X3A+DkjwB3TU+wT4h91eFVw%{02dg zh9ab5!bJ*!r+~2*BM@^S8sU`}gaPRykwjv(0*@dubr3-{xT*tC;f!1|V`HOLPnb0k ze(yu-0=NUvoQhm`KvW_tK$eBDF>42??ZosS!EFYp`N%$n)klzO2${s%KY~OSBvrtx zAvgmr3ui!7LRv=( zfUz=QLB~)>e}-)KQYqvX|6$ zfYe8j8V6nz@oBoFoEHZ^Bmq+I2&%im>l$PRq}PLI{0&|QAv2I`BFJ4rplS%dNSjc7 zgjN$lxR~`3kAgixL`;2zwE{vD1XVOB)eyMq0f`V*A3>L7OPj(gAT$QNx`8v$>LcU| z2#t+aAF+aV4}sQlqt!vkTw>}Y%*u$Q`UsNKA?-ut5&#byQihH-`jq-KHC zAdq#}q6U8G2wWc_*Gdpw(7Fjymm!V3LFyn(60$rSQokVAJCOQFK*vu|*B=={>K90D z0%1dHB^c=~q60^u+6tmZNXrYX050V%q#Yon9RR6`AUApoYPkq%x$tYd@oBm8Xt{D} zI16dm^Q+p(Dj6uK8XIb98fj~pYAYLSDjI7j8>uU4tEp?Lswt^zDXVEqshP@ZS_o;_ z3TxW&XgTv}IrC|`@@ct3Y99!RIRpo(iI8g_JZ$9ZhA>;%2~-V%N96cb96;Bng6kmU z-Vo$$1Y{978&dUfqjr%Xf{^|Z!D zBqU#Bl8~GZ5l1T@$lyZiBQz3T6QPNN#_e&85F&~}+&wb-s3Ez`2bbrV^$so(i1`p% z2wRNe`Uq>tfEG-G>lA1;hqQhY646+7K?Fex!0Xu{^$~QM1~x|nshH4eA4s}F6NGa? z27;GW!x@kih>L`nhDkzd6JhWW9Fz^NQy^n&BH$x;L=F7l4A5YmUVsP~3F!sE5hSNT zM&U4PA50Nq>LURiKL`@k^@r9kNVn91D;ghojRW0qs^tk0ha3tAIg`r8CQwTRQb!PP8uI!>h+h08#AGxz5JBuDq)wq~H4P75>^6ee zMZ?!ugDN9fO$4c5ApIkFS|Wx4I_w&Jlqfu1;b%b1grsW72|N%IB7#DK>m%6e0#N@* z1bpxglmypAkorhOFF;t=A5tYjO#q*q0I7W-H4cOgCm|goI0>m`AY7DQ60CD1paa@i z1W|@eg2w2yydc#LnDm6#M_3smI-tc20y^#jnttH=$dgygLsS!lAUDP#*GDK75Ylo5kT}x1IINWpyvD&?x_~SNuZhs=Ajrvbkei0!H4YjBQn_G~@V*ZR z(t$tV5jp6QKFF03ygp);)MFOcK}N`R5UaQjiefve!9TpUt3!xxniX&|INB1$n?^$}#98}jT7#4bG1fvgy$4BS5gRYnG( zkZK6AT@=Rv8l)1zO5(45;Ifcziy%_73R-bs6M@t$keMA=4FWp<4O$aHY9*xlNLbfj z7+i5cHR=Vx^+Ku`NDTsEVmxo*cjWqrThkl4KH^mO zV_3G9A^L_k;ReOm^BfkR>D;u;i-u*D zAm_`0j+=wjN|1U7QU@`M>%kGLxE>fGdPb1`5vQm&C+ZF($k+wE zu7NYCTOWZ6G*ZTANL7zkep1Lqt82*MV)jSyI|*9Xz-lpkiXr_J)Dbgu7b4X}kh%#z zJ_a`#62ed}YJCK`JPs-aZbg6;6I~y{(;Hg#K?zq-KTv?U^bG0az^WKfO@yaDf^0#B zR5T*`eh>yGiM$R0A`7pNKvfcI9R#n8Ablb{^%0~J0h2`4N8suSQt5zh_lER|AV>E= z&W?lBGW;4Y0-CNu8lVaYvp#~1!okE{`P5v6zzA}21ElW*se_QuMj*95f>%bM9Yc^& zIAteDrGtkARYppVcq$`)=r&d8J$H~Q2}fmQ3){PD3pzXjHV_A?ZU~aF3J7w*DGBuv z{JwAGItZB!KiU*>f*hyLz09 zBS;;DOp;n3L24PINbpfwLV5wh`hk#HUyTDjhKC z4(aB=N$5-vcwsi=XgJifvfW`^P$ea(?gkmD6Hs##P;=u^cjZ=tUbhPA8Nmmsdtb`NF4-U1&8Z;JMba|8&GFQ-i}w^4#a>i zQ~(Pq*ud%?P>sWj!bQ|XT(XuJ$Q;r$;+8Rk^pD^hg%Au`6D}EJ1Q)z$0aW`)n?kA^ z5F1<*aZ4G2kpZ`q0hnRLEos6nX~HF83}+y(mqTSkkKN{w(#Kp52dQfy^$v`LS4Pki zejukSK>A0hB&cV^E~dvWriXuY4tao%LqrQJBB~~WjHQEfE~Gv}&a=?$kI^Uv6-xSn z_-h}^Wr-;s>8XZb1%##mQmudn?g-U%5S3IW4MHK8Uqd=aq6VQt`XPdP!N|*6An}MC zh7b`5i7XChgQT!kF_7qnCK&K=87?;Bj2+mzYe+&xR7?i`$cYei&?wkINL@n=iB=y$ zxM(D#mO&;_IyFdD5@bgzwDy72N}4`m5QerNWXuiJMFKC(M&d$de<0ICAhv)uXb28U zYCvlfaJ>WV86mU5wG8MyQb^5$jf5_HgLHQI)j@R%q>2Hrhy&dy1gUEv{T&z^RQo_g zV7(!b2;>$atS4r}>myJ%2Rt7HshH4e96Veeq~SPtrGw0X*GHh52)q{+x!&Q0UG@!U zKx!fgNlbmjBX13=hRCgtFlr)5-Govfp;bCiF1S7dRXX6oH)IBKeZ(mZsf>)kl@X#o z;*u~%MyPcV^7-4)`UtHuf*hoPt3HApf(lua03jhY5u^tMTeqOgBBskG3PC7`{~%XS zXhU;ol@YtJCIms|YKbACMTcGhXjLjW??XkvYyrIhv~dzpp+txJ2x%uH3B4*xbm8rh zfd;Hd8E*t#3<^Fd0CEKn9un$1ytcz@1acVQTd<+AMoe!PtOj{qG)k2L?!Q3$I50L^ z4MGf8OwSL5bOXd-NL0sPRL37A2$6!3klqcXCKA^3MefKTS0c!4wAu%m3$KGO+bUXrNEgbabZlntuAoUJb5;VD^>H@E8AoUS4373K}xFL(N!SxZS zVp4O092|$0gfGknF_avMtdF3T5lYVpdfOdr*v%SJ2Vs)nP7wOSY`7rgL^x2Dgw!(v zcZ(1`ATSryMFKM*;-Eu&;FCmH8Qd~vT+*iS+6OWy2O%NzL7dV?5C)ut47lG7o3_mFEM2ph9L zg3b)|02>jX(+PCtYA4(bxTwv$>7A@_qss8k<8 zYbA{S3o*41rW#1S0I!V1bORuL5QsQ338_s$i`I~;4CLAeD;uH%>*XM|40!4XR3-6gVyTHBbr7Vp17Q=aj|9{} zm64#j8)Sry4)qa_k|VSq1m2?xsf>7(?coei1%$R^2v#3Ks~br72bJVkbl``JAk|0k zY6x89J6Ic1^}tseU}Zq+BXD(tsCU4%45Ytf36TQ#hCqEEuoQL% zr;Hiod^u=M1RjWkS3__HybeNUz-uB11707&sv&(C1ABc0>F>Zu^s^M;H4&Tv8Eu1+ zNEZ^pDLX~~gftot6$dYi z<=6G+*Y$_iP&BTOh_B-yr8*5saDRbd1%#%6`1%OyaYzm_MTCTK_vJevbyKr0$Z1q7*9;3Ry02Nwf8 z&W0F;L#>aXRS)vvJZS5#p`&f!D}f-L8e|eO$c9GpDm#HLutOuk_u0Wu%!V*PH+O?; z7D#;rsdq4!WJAP}NzefcN_KcEBVI)Z$O<}MMF-H%AaGp+u6iI#<3J?%P6zM}ci?;O zcoeN6B4849*8|qN23{+HCVD_C;^5T~lmV-8p!E^BCW5fR)ez`NANbim2nI?W1gVK2 zq^t=yWPlDlC4^iDAurnoS3?Gz(oh7ck6?8WyfV^<+_(xQ!M!AQ2_p~!9q9vKEQeAb z!RsJo2BhPILPBdI4K^_iQ0GWY3yowI)kK~TViwh67S&=A)&x~UqMEEQ#38B)LLyr1 z!kQdl5?TR?Xs}~LB-KZtIb3j!Lri@HsgOetLT5oDz_gyhxofFnrdf=NPFvLRJQC}&^8t06c8S2Y9?hfMuI z`bQ9w*7XsjW+9_K0v(hM?gSC6kGSET9eDQ#8v{}wfhr>fd)QbUs2YOTO5l12>0Tj} z8&?U}N1QTd;JOJ~4Po?yAaxKj39XMHOSRFq5uw#bkXa$nz#Wci2(2;#PXj?`d^n_# zDgTp39i!FooJDhX5{A=f@=Y!)%lqz_7EB&NeIt_`k$u-8Y> z>IUhYA4q)!s*FUmpt7QxEbtl!Ha!HbjD$6@Rz~b18j$=*JP8>OL9S(>MGCmAK<*_$ z*%a3}$aZ0}p|&HPicDP*+~u>ckUkm$?PtNQ9$i*I-%k)$(TGxMbPKKI2lX$&Rv?Q* z=2;*jkog@58$uHAeuzqt71#nFmpBz-N7oLy$EIu$l-mA_uCE;MEY9yd|8C#=tpJ2R_M%Pr;l|0k%I#9#kK}IziwX zN7fv?707~9#)4bMf=AYpN7j-X#5P0R?|`v=719ZU>|KShk-I|NQbs(|h|NUc-jES8 ziK9LOkIqSgNNh7cpt=cC6M?6J40t3!h)YZljC8ofuprF!*lc22u*yga+&O|&NgD84 z2F?I=d_+OL9u9cL1nw5Gh^VoMsG-z2$n_DtlETKo%%ONhKpQ2%yUQW95`=`529Q=O zv<`ywp&;ZCua7WmK$wUR5w3<0l35{RF^AKttA&L8=2{NRorRN!R^5H^B@j4D%HA3^#!)T)o*^$3Ik zuZ-Xo3Pc=E!fPVP6|0adt)X&X5`hMq;UtFe@Xp`UtJp!wXx7 z0Ga5491RCyL+ToWBqyw=1Fw}p^$Su3#G_~ptA^mUk{!H?0V}qH_JGK$k04bNghamQ z4&3p9ErH>iTQtioy{NIwWvAAu_)Gfo+3eFUzFKot|D2L$OHVUm#A z2XZ_D=5hrtNkeWalnMyBGUAjr!e1Xjsv9H{vH}57A3GSWk?j39LoG6|m; z0x?jA<}_GE)nN!R3J0l@FzX{05e+aRTpzIstFsBKqtsd8>IpU{3%VhN7&h#zdv6Q| z;`Dn+Jp~~_O<70*LL5nCc{OO1fo`IK?dS)wA!RkB+$N3$b+V9a1pI9BJPt7vt6qo{ zoJ3wBKnxqJnfL_}H4Bb91+G9?&sS8}2l<##FdMYg4YLP?yq{D^%L~4Q0KAP9w5|YB z0iltQ`UtZhgi8due*{?t2O;703mOA!ox%@35&_b|K_hXghlqnJBQ;k6b!Q~xD4_0$ z#Dz-ngZnuA;1zBVHe}?DSJ@eioM2oS18MC6qz;1AN02?I5E5R?pfMn_0?PKtyHK(2 z27=T$FcMTH@hjQzDOzLe`#^d?2ojY6!ka8+p7A)F(o!fWTD`Xum3m0Uo%6?pg&Ok^tHY#18Eiq0~pnbr6IN z9+X3$8-fc$>K!Nv=>&m$K+u{9Ha7%cG6$}VG}u9%BWV8!Yo!CJjIff#)IrEEyI_w&2eI%p>y`>aV2O*Pa)ewXWUipS)s7*k_9a0~` z84#|ZCg`A1$W#s*iB=5}?C+peLvSvn-hr;oMp~5(sfi#Ya_z&f<^V=ckV*%Qgw!lh z61*52Tp2l|S4PTM7G^{4ae&t~0AWOBu^GAl5l@X*91STOX;y~39p85z< z0ilubst1h$t%Ja|50p))K4KRJO&EdajMTtY5@N*y)~W}qAfC}V98#XhTgc&TTqTTv zmJdk?mpMa49MUT!SQEh&z|ZhRuGH|dNvo>h+i-}i+Te8!@)9*T8zKwmf|d;s>f=xv zaj=s@Asrbc61w0Gtzv@oU*OdToBCLt9HTmgK94L+oXl>yO(M)GQa7Pi5L&Y;sb zkSYdJgTPP2hF3QbS;#^-2pggut{xWyG^z&c`+#d7ob?g38iI9vpeMqC&%}oGebBl& z{3;HDDh}{^2U!+UD?wMm!3O1cQBRG7Z$yPxKxhm|bpuxhS%(0xkNCioL_CV%LwjH= zwBeNzH`1OUaD9YQ6QNBH@yT2AfmX^P2IwGl6NH4U!G_i~;CUZtj|RL&2v!q;M(3bv zz^9l(Yag&0(6R*h-c_Xa3CQ&kqymDR)dTPT!0RK(cpbNt5tk&qH-yPR9Hf9&8F5O0 zM&#H;b-|Sp^!x+}8?!#*6x9VI&^dBwl@aJm&Fo1LMbihvDG8s@iAY_N(Z?DLSvKR z4D9Jd8%z2D84f$Y8mT4%?H)z$`@q@Y{t;;N4y1B{)HRT51+4~wbCK0RRN^P$8X*kG zf;VF7BeePjQu|<%Xv)yIkhR(1Y6#vT!d@T2Dm9fVgn_H? z0}+RiNEHoYkPT8LL24pMX;TP-*GK3Ky!8>gv=OVM0eln=xjup}(#BmMK{`he61hHt)H|po zdjAM=S^~Hlf>cJjm^BeZ1i4#8ph_aQKEhh>;1VRhCW5HMsD|L(B`oC$sN@-u^%10( zM34lp2Y^;OxCcVvBcc!n4XY&3(mR|dt$}83&?+NH{|FSz7_|v#=?jJ+R+ab#(W)T` z7hH)Tj%$P0M}nHr6Rk1p6iD@fNkUZetGhuM5E9OXGa%vupz9c*N4i03A|7?<9jTZ# z3q%C1LVDGO>sV{7W;62~+FzX&dKaETMFgU}Sf`$vPJJ`w^g9)Q+6XyafIE~Gw! zupuP=(4$-yQUSrYmx5~|$czv6`UtYR0A78-8JIN@L( z!*pm>47`3pW5AW6?^i`Sp9gDY1Ui!kHUtN*dT`c9yebaJRT3HX5v2BkkITWiunNT% zYrVsxh*KP{5zGMXSA|q1B-KaYDhae!0iqX9LMj~)SJoU-A3;RWNc{B?xEg{~M&^*7 z5rjmmh9D=)fk_n;cufRhAlFCmItZBoR{*PqAe)8^AT<$$g!GTFRz?F@ zA3?_AD6fxLh1B4P_?if&5;PuyJPr?I<5fs?kus(v+LC7RrFYydmLc)cI4>aLiJ zt06TGq)tJrjNmucVd{dY$3;R`XoIUEc!!Am`UqaXKp1?myM!QIWRhC_Bgj}BxH7VZ zY!QMjhXW175!pWica+fQhL9^A$VuABBFJnC>myJd1i9Y= 23 is required to build the TensorFlow Android demo (though it will run on API >= 21 devices). - - The Android NDK is required to build the native (C/C++) TensorFlow Lite code. The current recommended version is 14b, which can be found [here](https://developer.android.com/ndk/downloads/older_releases.html#ndk-14b-downloads). - - The Android SDK and build tools may be obtained [here](https://developer.android.com/tools/revisions/build-tools.html), or alternatively as part of [Android Studio](https://developer.android.com/studio/index.html). Build tools API >= 23 is required to build the TF Android demo (though it will run on API >= 21 devices). - In the root of the TensorFlow repository update the `WORKSPACE` file with the `api_level` and location of the SDK and NDK. If you installed it with AndroidStudio the SDK path can be found in the SDK manager, and the default NDK path is:`{SDK path}/ndk-bundle.` ``` - Android_sdk_repository ( - name = "androidsdk", - api_level = 23, - build_tools_version = "23.0.2", - path = "/home/xxxx/android-sdk-linux/", ) +android_sdk_repository ( + name = "androidsdk", + api_level = 23, + build_tools_version = "23.0.2", + path = "/home/xxxx/android-sdk-linux/", +) android_ndk_repository( - name="androidndk", - path="/home/xxxx/android-ndk-r10e/", - api_level=19) - + name = "androidndk", + path = "/home/xxxx/android-ndk-r10e/", + api_level = 19, +) ``` -Additional details on building with Android can be found [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/README.md) + +Additional details on building with Android can be found [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/README.md). ### Build the source code Run bazel with the following command to build the demo. Build the demo app: -bazel build --cxxopt='--std=c++11' //tensorflow/contrib/lite/java/demo/app/src/main:TfLiteCameraDemo + +``` +bazel build --cxxopt=--std=c++11 //tensorflow/contrib/lite/java/demo/app/src/main:TfLiteCameraDemo +``` + +### Note + +Currently, we only support building the Android demo app within a Python 2 +environment (due to a Bazel bug). ### More about the demo The demo is resizing each camera image frame to (224 width * 224 height) to match the quantized Mobilenet model being used. The resized image is converted into a ByteBuffer row by row of size 1 * 224 * 224 * 3 bytes, where 1 is the number of images in a batch 224 * 224 is the width and height of the image 3 bytes represents three colors of a pixel. This demo uses the TensorFlow Lite Java inference API for models which take a single input and provide a single output. This outputs a two-dimensional array, with the first dimension being the category index and the second dimension being the confidence of classification. The Mobilenet model has 1001 unique categories and the app sorts the probabilities of all the categories and displays the top three. The Mobilenet quantized model is bundled within the assets directory of the app. @@ -95,7 +101,7 @@ The demo is resizing each camera image frame to (224 width * 224 height) to matc [On Device Smart Reply](https://research.googleblog.com/2017/02/on-device-machine-intelligence.html) is an on-device model which provides one-touch replies for an incoming text message by suggesting contextually relevant messages. The model is built specifically for memory constrained devices such as watches & phones and it has been successfully used to surface [Smart Replies on Android Wear](https://research.googleblog.com/2017/02/on-device-machine-intelligence.html). Note that this model only works on Android as of now. -These pre-trained models can be downloaded from [here](models.md). +These pre-trained models can be downloaded from [here](g3doc/models.md). ### Retrain Inception-V3 or MobileNet for a custom data set The above pre-trained models have been trained on the ImageNet data set, which consists of 1000 predefined classes. A model will need to be re-trained if these classes are not relevant or useful for a given use case. This technique is called transfer learning, which starts with a model that has been already trained on a problem and will then be retrained on a similar problem. Deep learning from scratch can take days, but transfer learning can be done fairly quickly. In order to do this, a developer will need to generate their custom data set labeled with the relevant classes. @@ -104,7 +110,7 @@ The [TensorFlow for Poets](https://codelabs.developers.google.com/codelabs/tenso ### Train a custom model -A developer may choose to train a custom model using Tensorflow. TensorFlow documentation has [several tutorials](https://www.tensorflow.org/tutorials/) for building and training models. If the user has written a model using TensorFlow’s Slim Framework the first step is to export this to a GraphDef file. This is necessary because Slim does not store the model structure outside the code, so to communicate with other parts of the framework it needs to be exported. Documentation for the export can be found [here](https://github.com/tensorflow/models/tree/master/research/slim#Export). The output of this step will be a .pb file for the custom model. +A developer may choose to train a custom model using Tensorflow. TensorFlow documentation has [several tutorials](https://www.tensorflow.org/tutorials/) for building and training models. If the user has written a model using TensorFlow's Slim Framework the first step is to export this to a GraphDef file. This is necessary because Slim does not store the model structure outside the code, so to communicate with other parts of the framework it needs to be exported. Documentation for the export can be found [here](https://github.com/tensorflow/models/tree/master/research/slim#Export). The output of this step will be a .pb file for the custom model. TensorFlow Lite currently supports a subset of TensorFlow operators. Please refer to [this document](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md) for details of supported operators and their usage. This set will continue to expand in future releases of Tensorflow Lite. @@ -128,9 +134,9 @@ Since we employ several formats, the following definitions may be useful: - TensorFlow lite model (.lite) - a serialized flatbuffer, containing TensorFlow lite operators and Tensors for the TensorFlow lite interpreter. This is most analogous to TensorFlow frozen GraphDefs. ### Freeze Graph -To use this .pb GraphDef file within TensorFlow Lite, the application developer will need checkpoints containing trained weight parameters. The .pb contains only the structure of the graph. The process of merging the checkpoint values with the graph structure is known as “freezing” the graph. +To use this .pb GraphDef file within TensorFlow Lite, the application developer will need checkpoints containing trained weight parameters. The .pb contains only the structure of the graph. The process of merging the checkpoint values with the graph structure is known as "freezing" the graph. -The developer should know where the checkpoints folder is present or checkpoints can also be downloaded for a pre-trained model (Example: Here is a link to the [MobileNets](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md) +The developer should know where the checkpoints folder is present or checkpoints can also be downloaded for a pre-trained model (Example: Here is a link to the [MobileNets](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md)). Graph freezing can be done using the command below (and modifying the arguments appropriately) @@ -155,7 +161,7 @@ Here is a sample command line to convert the frozen Graphdef to '.lite' format f bazel build tensorflow/contrib/lite/toco:toco bazel-bin/tensorflow/contrib/lite/toco/toco -- \ - --input_file=(pwd)/mobilenet_v1_1.0_224/frozen_graph.pb \ + --input_file=$(pwd)/mobilenet_v1_1.0_224/frozen_graph.pb \ --input_format=TENSORFLOW_GRAPHDEF --output_format=TFLITE \ --output_file=/tmp/mobilenet_v1_1.0_224.lite --inference_type=FLOAT \ --input_type=FLOAT --input_arrays=input \ @@ -183,18 +189,18 @@ with tf.Session() as sess: ``` For detailed instructions on how to use the Tensorflow Optimizing Converter, please see [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md). -You may refer to the [Ops compatibility guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tf_ops_compatibility.md) for troubleshooting help. If that doesn’t help, please file an [issue](https://github.com/tensorflow/tensorflow/issues). +You may refer to the [Ops compatibility guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md) for troubleshooting help. If that doesn't help, please file an [issue](https://github.com/tensorflow/tensorflow/issues). ## Step 3. Use the TensorFlow Lite model for inference in a mobile app After completion of Step 2 the developer should have a .lite model. ### For Android -Because Android apps need to be written in Java, and core TensorFlow is in C++, a JNI library is provided to interface between the two. Its interface is aimed only at inference, so it provides the ability to load a graph, set up inputs, and run the model to calculate particular outputs. The full documentation for the set of methods can be seen [here](https://github.com/TensorFlow/TensorFlow/blob/master/TensorFlow/contrib/lite/g3doc/). The demo app is also open sourced on [github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app). +Because Android apps need to be written in Java, and core TensorFlow is in C++, a JNI library is provided to interface between the two. Its interface is aimed only at inference, so it provides the ability to load a graph, set up inputs, and run the model to calculate particular outputs. The full documentation for the set of methods can be seen [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/g3doc/). The demo app is also open sourced on [github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app). -The [demo app] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app) uses this interface, so it’s a good place to look for example usage. You can also download the prebuilt binary [here](http://download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk). +The [demo app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app) uses this interface, so it's a good place to look for example usage. You can also download the prebuilt binary [here](http://download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk). -Note that you’d need to follow instructions for installing TensorFlow on Android, setting up bazel and Android Studio outlined [here](https://www.tensorflow.org/mobile/android_build). +Note that you'd need to follow instructions for installing TensorFlow on Android, setting up bazel and Android Studio outlined [here](https://www.tensorflow.org/mobile/android_build). ### For iOS -Follow the documentation [here](https://github.com/TensorFlow/TensorFlow/blob/master/TensorFlow/contrib/lite/g3doc/ios.md) to get integrate a TFLite model into your app. +Follow the documentation [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/ios.md) to get integrate a TFLite model into your app. diff --git a/tensorflow/contrib/lite/build_ios_universal_lib.sh b/tensorflow/contrib/lite/build_ios_universal_lib.sh new file mode 100755 index 0000000000..cbc96e6edd --- /dev/null +++ b/tensorflow/contrib/lite/build_ios_universal_lib.sh @@ -0,0 +1,31 @@ +#!/bin/bash -x +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -e +make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=x86_64 -j 8 +make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=i386 -j 8 +make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=armv7 -j 8 +make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=armv7s -j 8 +make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=arm64 -j 8 + +lipo \ +tensorflow/contrib/lite/gen/lib/ios_x86_64/libtensorflow-lite.a \ +tensorflow/contrib/lite/gen/lib/ios_i386/libtensorflow-lite.a \ +tensorflow/contrib/lite/gen/lib/ios_armv7/libtensorflow-lite.a \ +tensorflow/contrib/lite/gen/lib/ios_armv7s/libtensorflow-lite.a \ +tensorflow/contrib/lite/gen/lib/ios_arm64/libtensorflow-lite.a \ +-create \ +-output tensorflow/contrib/lite/gen/lib/libtensorflow-lite.a diff --git a/tensorflow/contrib/lite/download_dependencies.sh b/tensorflow/contrib/lite/download_dependencies.sh new file mode 100755 index 0000000000..778d618361 --- /dev/null +++ b/tensorflow/contrib/lite/download_dependencies.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -e + +DOWNLOADS_DIR=tensorflow/contrib/lite/downloads +BZL_FILE_PATH=tensorflow/workspace.bzl + +EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)" +GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" +GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" +ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)" +NEON_2_SSE_URL="https://github.com/intel/ARM_NEON_2_x86_SSE/archive/master.zip" +FARMHASH_URL="https://mirror.bazel.build/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz" +FLATBUFFERS_URL="https://github.com/google/flatbuffers/archive/master.zip" +MODELS_URL="https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_1.0_224_ios_lite_float_2017_11_08.zip" +QUANTIZED_MODELS_URL="https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip" + +# TODO(petewarden): Some new code in Eigen triggers a clang bug with iOS arm64, +# so work around it by patching the source. +replace_by_sed() { + local regex="${1}" + shift + # Detect the version of sed by the return value of "--version" flag. GNU-sed + # supports "--version" while BSD-sed doesn't. + if ! sed --version >/dev/null 2>&1; then + # BSD-sed. + sed -i '' -e "${regex}" "$@" + else + # GNU-sed. + sed -i -e "${regex}" "$@" + fi +} + +download_and_extract() { + local usage="Usage: download_and_extract URL DIR" + local url="${1:?${usage}}" + local dir="${2:?${usage}}" + echo "downloading ${url}" >&2 + mkdir -p "${dir}" + if [[ "${url}" == *gz ]]; then + curl -Ls "${url}" | tar -C "${dir}" --strip-components=1 -xz + elif [[ "${url}" == *zip ]]; then + tempdir=$(mktemp -d) + tempdir2=$(mktemp -d) + + curl -L ${url} > ${tempdir}/zipped.zip + unzip ${tempdir}/zipped.zip -d ${tempdir2} + + # If the zip file contains nested directories, extract the files from the + # inner directory. + if ls ${tempdir2}/*/* 1> /dev/null 2>&1; then + # unzip has no strip components, so unzip to a temp dir, and move the + # files we want from the tempdir to destination. + cp -R ${tempdir2}/*/* ${dir}/ + else + cp -R ${tempdir2}/* ${dir}/ + fi + rm -rf ${tempdir2} ${tempdir} + fi + + # Delete any potential BUILD files, which would interfere with Bazel builds. + find "${dir}" -type f -name '*BUILD' -delete +} + +download_and_extract "${EIGEN_URL}" "${DOWNLOADS_DIR}/eigen" +download_and_extract "${GEMMLOWP_URL}" "${DOWNLOADS_DIR}/gemmlowp" +download_and_extract "${GOOGLETEST_URL}" "${DOWNLOADS_DIR}/googletest" +download_and_extract "${ABSL_URL}" "${DOWNLOADS_DIR}/absl" +download_and_extract "${NEON_2_SSE_URL}" "${DOWNLOADS_DIR}/neon_2_sse" +download_and_extract "${FARMHASH_URL}" "${DOWNLOADS_DIR}/farmhash" +download_and_extract "${FLATBUFFERS_URL}" "${DOWNLOADS_DIR}/flatbuffers" +download_and_extract "${MODELS_URL}" "${DOWNLOADS_DIR}/models" +download_and_extract "${QUANTIZED_MODELS_URL}" "${DOWNLOADS_DIR}/quantized_models" + +replace_by_sed 's#static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA );#static uint32x4_t p4ui_CONJ_XOR; // = vld1q_u32( conj_XOR_DATA ); - Removed by script#' \ + "${DOWNLOADS_DIR}/eigen/Eigen/src/Core/arch/NEON/Complex.h" +replace_by_sed 's#static uint32x2_t p2ui_CONJ_XOR = vld1_u32( conj_XOR_DATA );#static uint32x2_t p2ui_CONJ_XOR;// = vld1_u32( conj_XOR_DATA ); - Removed by scripts#' \ + "${DOWNLOADS_DIR}/eigen/Eigen/src/Core/arch/NEON/Complex.h" +replace_by_sed 's#static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );#static uint64x2_t p2ul_CONJ_XOR;// = vld1q_u64( p2ul_conj_XOR_DATA ); - Removed by script#' \ + "${DOWNLOADS_DIR}/eigen/Eigen/src/Core/arch/NEON/Complex.h" + +cp ${DOWNLOADS_DIR}/models/models/* tensorflow/contrib/lite/examples/ios/simple/data/ +cp ${DOWNLOADS_DIR}/quantized_models/* tensorflow/contrib/lite/examples/ios/camera/data/ + +echo "download_dependencies.sh completed successfully." >&2 diff --git a/tensorflow/contrib/lite/examples/ios/camera/.gitignore b/tensorflow/contrib/lite/examples/ios/camera/.gitignore new file mode 100644 index 0000000000..9e8962f4c6 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/camera/.gitignore @@ -0,0 +1,2 @@ +/data/*.txt +/data/*.tflite diff --git a/tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.h b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.h new file mode 100644 index 0000000000..55891c3ee1 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.h @@ -0,0 +1,21 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +@interface CameraExampleAppDelegate : UIResponder + +@property(strong, nonatomic) UIWindow* window; + +@end diff --git a/tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.m b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.m new file mode 100644 index 0000000000..128266d53f --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.m @@ -0,0 +1,44 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "CameraExampleAppDelegate.h" + +@implementation CameraExampleAppDelegate + +@synthesize window = _window; + +- (BOOL)application:(UIApplication *)application + didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { + [self.window makeKeyAndVisible]; + return YES; +} + +- (void)applicationWillResignActive:(UIApplication *)application { + [[UIApplication sharedApplication] setIdleTimerDisabled:NO]; +} + +- (void)applicationDidEnterBackground:(UIApplication *)application { +} + +- (void)applicationWillEnterForeground:(UIApplication *)application { +} + +- (void)applicationDidBecomeActive:(UIApplication *)application { + [[UIApplication sharedApplication] setIdleTimerDisabled:YES]; +} + +- (void)applicationWillTerminate:(UIApplication *)application { +} + +@end diff --git a/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.h b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.h new file mode 100644 index 0000000000..fb5800e86d --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.h @@ -0,0 +1,48 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import +#import + +#include + +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" + +@interface CameraExampleViewController + : UIViewController { + IBOutlet UIView* previewView; + AVCaptureVideoPreviewLayer* previewLayer; + AVCaptureVideoDataOutput* videoDataOutput; + dispatch_queue_t videoDataOutputQueue; + UIView* flashView; + BOOL isUsingFrontFacingCamera; + NSMutableDictionary* oldPredictionValues; + NSMutableArray* labelLayers; + AVCaptureSession* session; + + std::vector labels; + std::unique_ptr model; + tflite::ops::builtin::BuiltinOpResolver resolver; + std::unique_ptr interpreter; + + double total_latency; + int total_count; +} +@property(strong, nonatomic) CATextLayer* predictionTextLayer; + +- (IBAction)takePicture:(id)sender; +- (IBAction)switchCameras:(id)sender; + +@end diff --git a/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm new file mode 100644 index 0000000000..ea398ad14e --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm @@ -0,0 +1,506 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "CameraExampleViewController.h" +#import +#import +#import +#import + +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/string_util.h" +#include "tensorflow/contrib/lite/tools/mutable_op_resolver.h" + +#define LOG(x) std::cerr + +// If you have your own model, modify this to the file name, and make sure +// you've added the file to your app resources too. +static NSString* model_file_name = @"mobilenet_quant_v1_224"; +static NSString* model_file_type = @"tflite"; + +// If you have your own model, point this to the labels file. +static NSString* labels_file_name = @"labels"; +static NSString* labels_file_type = @"txt"; + +// These dimensions need to match those the model was trained with. +static const int wanted_input_width = 224; +static const int wanted_input_height = 224; +static const int wanted_input_channels = 3; + +static NSString* FilePathForResourceName(NSString* name, NSString* extension) { + NSString* file_path = [[NSBundle mainBundle] pathForResource:name ofType:extension]; + if (file_path == NULL) { + LOG(FATAL) << "Couldn't find '" << [name UTF8String] << "." << [extension UTF8String] + << "' in bundle."; + } + return file_path; +} + +static void LoadLabels(NSString* file_name, NSString* file_type, + std::vector* label_strings) { + NSString* labels_path = FilePathForResourceName(file_name, file_type); + if (!labels_path) { + LOG(ERROR) << "Failed to find model proto at" << [file_name UTF8String] + << [file_type UTF8String]; + } + std::ifstream t; + t.open([labels_path UTF8String]); + std::string line; + while (t) { + std::getline(t, line); + label_strings->push_back(line); + } + t.close(); +} + +// Returns the top N confidence values over threshold in the provided vector, +// sorted by confidence in descending order. +static void GetTopN(const uint8_t* prediction, const int prediction_size, const int num_results, + const float threshold, std::vector>* top_results) { + // Will contain top N results in ascending order. + std::priority_queue, std::vector>, + std::greater>> + top_result_pq; + + const long count = prediction_size; + for (int i = 0; i < count; ++i) { + const float value = prediction[i] / 255.0; + // Only add it if it beats the threshold and has a chance at being in + // the top N. + if (value < threshold) { + continue; + } + + top_result_pq.push(std::pair(value, i)); + + // If at capacity, kick the smallest value out. + if (top_result_pq.size() > num_results) { + top_result_pq.pop(); + } + } + + // Copy to output vector and reverse into descending order. + while (!top_result_pq.empty()) { + top_results->push_back(top_result_pq.top()); + top_result_pq.pop(); + } + std::reverse(top_results->begin(), top_results->end()); +} + +@interface CameraExampleViewController (InternalMethods) +- (void)setupAVCapture; +- (void)teardownAVCapture; +@end + +@implementation CameraExampleViewController + +- (void)setupAVCapture { + NSError* error = nil; + + session = [AVCaptureSession new]; + if ([[UIDevice currentDevice] userInterfaceIdiom] == UIUserInterfaceIdiomPhone) + [session setSessionPreset:AVCaptureSessionPreset640x480]; + else + [session setSessionPreset:AVCaptureSessionPresetPhoto]; + + AVCaptureDevice* device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]; + AVCaptureDeviceInput* deviceInput = + [AVCaptureDeviceInput deviceInputWithDevice:device error:&error]; + assert(error == nil); + + if ([session canAddInput:deviceInput]) [session addInput:deviceInput]; + + videoDataOutput = [AVCaptureVideoDataOutput new]; + + NSDictionary* rgbOutputSettings = + [NSDictionary dictionaryWithObject:[NSNumber numberWithInt:kCMPixelFormat_32BGRA] + forKey:(id)kCVPixelBufferPixelFormatTypeKey]; + [videoDataOutput setVideoSettings:rgbOutputSettings]; + [videoDataOutput setAlwaysDiscardsLateVideoFrames:YES]; + videoDataOutputQueue = dispatch_queue_create("VideoDataOutputQueue", DISPATCH_QUEUE_SERIAL); + [videoDataOutput setSampleBufferDelegate:self queue:videoDataOutputQueue]; + + if ([session canAddOutput:videoDataOutput]) [session addOutput:videoDataOutput]; + [[videoDataOutput connectionWithMediaType:AVMediaTypeVideo] setEnabled:YES]; + + previewLayer = [[AVCaptureVideoPreviewLayer alloc] initWithSession:session]; + [previewLayer setBackgroundColor:[[UIColor blackColor] CGColor]]; + [previewLayer setVideoGravity:AVLayerVideoGravityResizeAspect]; + CALayer* rootLayer = [previewView layer]; + [rootLayer setMasksToBounds:YES]; + [previewLayer setFrame:[rootLayer bounds]]; + [rootLayer addSublayer:previewLayer]; + [session startRunning]; + + if (error) { + NSString* title = [NSString stringWithFormat:@"Failed with error %d", (int)[error code]]; + UIAlertController* alertController = + [UIAlertController alertControllerWithTitle:title + message:[error localizedDescription] + preferredStyle:UIAlertControllerStyleAlert]; + UIAlertAction* dismiss = + [UIAlertAction actionWithTitle:@"Dismiss" style:UIAlertActionStyleDefault handler:nil]; + [alertController addAction:dismiss]; + [self presentViewController:alertController animated:YES completion:nil]; + [self teardownAVCapture]; + } +} + +- (void)teardownAVCapture { + [previewLayer removeFromSuperlayer]; +} + +- (AVCaptureVideoOrientation)avOrientationForDeviceOrientation: + (UIDeviceOrientation)deviceOrientation { + AVCaptureVideoOrientation result = (AVCaptureVideoOrientation)(deviceOrientation); + if (deviceOrientation == UIDeviceOrientationLandscapeLeft) + result = AVCaptureVideoOrientationLandscapeRight; + else if (deviceOrientation == UIDeviceOrientationLandscapeRight) + result = AVCaptureVideoOrientationLandscapeLeft; + return result; +} + +- (IBAction)takePicture:(id)sender { + if ([session isRunning]) { + [session stopRunning]; + [sender setTitle:@"Continue" forState:UIControlStateNormal]; + + flashView = [[UIView alloc] initWithFrame:[previewView frame]]; + [flashView setBackgroundColor:[UIColor whiteColor]]; + [flashView setAlpha:0.f]; + [[[self view] window] addSubview:flashView]; + + [UIView animateWithDuration:.2f + animations:^{ + [flashView setAlpha:1.f]; + } + completion:^(BOOL finished) { + [UIView animateWithDuration:.2f + animations:^{ + [flashView setAlpha:0.f]; + } + completion:^(BOOL finished) { + [flashView removeFromSuperview]; + flashView = nil; + }]; + }]; + + } else { + [session startRunning]; + [sender setTitle:@"Freeze Frame" forState:UIControlStateNormal]; + } +} + +- (void)captureOutput:(AVCaptureOutput*)captureOutput + didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer + fromConnection:(AVCaptureConnection*)connection { + CVPixelBufferRef pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer); + CFRetain(pixelBuffer); + [self runModelOnFrame:pixelBuffer]; + CFRelease(pixelBuffer); +} + +- (void)runModelOnFrame:(CVPixelBufferRef)pixelBuffer { + assert(pixelBuffer != NULL); + + OSType sourcePixelFormat = CVPixelBufferGetPixelFormatType(pixelBuffer); + int doReverseChannels; + if (kCVPixelFormatType_32ARGB == sourcePixelFormat) { + doReverseChannels = 1; + } else if (kCVPixelFormatType_32BGRA == sourcePixelFormat) { + doReverseChannels = 0; + } else { + assert(false); // Unknown source format + } + + const int sourceRowBytes = (int)CVPixelBufferGetBytesPerRow(pixelBuffer); + const int image_width = (int)CVPixelBufferGetWidth(pixelBuffer); + const int fullHeight = (int)CVPixelBufferGetHeight(pixelBuffer); + + CVPixelBufferLockFlags unlockFlags = kNilOptions; + CVPixelBufferLockBaseAddress(pixelBuffer, unlockFlags); + + unsigned char* sourceBaseAddr = (unsigned char*)(CVPixelBufferGetBaseAddress(pixelBuffer)); + int image_height; + unsigned char* sourceStartAddr; + if (fullHeight <= image_width) { + image_height = fullHeight; + sourceStartAddr = sourceBaseAddr; + } else { + image_height = image_width; + const int marginY = ((fullHeight - image_width) / 2); + sourceStartAddr = (sourceBaseAddr + (marginY * sourceRowBytes)); + } + const int image_channels = 4; + assert(image_channels >= wanted_input_channels); + uint8_t* in = sourceStartAddr; + + int input = interpreter->inputs()[0]; + + uint8_t* out = interpreter->typed_tensor(input); + for (int y = 0; y < wanted_input_height; ++y) { + uint8_t* out_row = out + (y * wanted_input_width * wanted_input_channels); + for (int x = 0; x < wanted_input_width; ++x) { + const int in_x = (y * image_width) / wanted_input_width; + const int in_y = (x * image_height) / wanted_input_height; + uint8_t* in_pixel = in + (in_y * image_width * image_channels) + (in_x * image_channels); + uint8_t* out_pixel = out_row + (x * wanted_input_channels); + for (int c = 0; c < wanted_input_channels; ++c) { + out_pixel[c] = in_pixel[c]; + } + } + } + + double startTimestamp = [[NSDate new] timeIntervalSince1970]; + if (interpreter->Invoke() != kTfLiteOk) { + LOG(FATAL) << "Failed to invoke!"; + } + double endTimestamp = [[NSDate new] timeIntervalSince1970]; + total_latency += (endTimestamp - startTimestamp); + total_count += 1; + NSLog(@"Time: %.4lf, avg: %.4lf, count: %d", endTimestamp - startTimestamp, + total_latency / total_count, total_count); + + const int output_size = 1000; + const int kNumResults = 5; + const float kThreshold = 0.1f; + + std::vector> top_results; + + uint8_t* output = interpreter->typed_output_tensor(0); + GetTopN(output, output_size, kNumResults, kThreshold, &top_results); + + NSMutableDictionary* newValues = [NSMutableDictionary dictionary]; + for (const auto& result : top_results) { + const float confidence = result.first; + const int index = result.second; + NSString* labelObject = [NSString stringWithUTF8String:labels[index].c_str()]; + NSNumber* valueObject = [NSNumber numberWithFloat:confidence]; + [newValues setObject:valueObject forKey:labelObject]; + } + dispatch_async(dispatch_get_main_queue(), ^(void) { + [self setPredictionValues:newValues]; + }); + + CVPixelBufferUnlockBaseAddress(pixelBuffer, unlockFlags); + + CVPixelBufferUnlockBaseAddress(pixelBuffer, 0); +} + +- (void)dealloc { + [self teardownAVCapture]; +} + +- (void)didReceiveMemoryWarning { + [super didReceiveMemoryWarning]; +} + +- (void)viewDidLoad { + [super viewDidLoad]; + labelLayers = [[NSMutableArray alloc] init]; + oldPredictionValues = [[NSMutableDictionary alloc] init]; + + NSString* graph_path = FilePathForResourceName(model_file_name, @"tflite"); + model = tflite::FlatBufferModel::BuildFromFile([graph_path UTF8String]); + if (!model) { + LOG(FATAL) << "Failed to mmap model " << graph_path; + } + LOG(INFO) << "Loaded model " << graph_path; + model->error_reporter(); + LOG(INFO) << "resolved reporter"; + + tflite::ops::builtin::BuiltinOpResolver resolver; + LoadLabels(labels_file_name, labels_file_type, &labels); + + tflite::InterpreterBuilder(*model, resolver)(&interpreter); + if (!interpreter) { + LOG(FATAL) << "Failed to construct interpreter"; + } + if (interpreter->AllocateTensors() != kTfLiteOk) { + LOG(FATAL) << "Failed to allocate tensors!"; + } + + [self setupAVCapture]; +} + +- (void)viewDidUnload { + [super viewDidUnload]; +} + +- (void)viewWillAppear:(BOOL)animated { + [super viewWillAppear:animated]; +} + +- (void)viewDidAppear:(BOOL)animated { + [super viewDidAppear:animated]; +} + +- (void)viewWillDisappear:(BOOL)animated { + [super viewWillDisappear:animated]; +} + +- (void)viewDidDisappear:(BOOL)animated { + [super viewDidDisappear:animated]; +} + +- (BOOL)shouldAutorotateToInterfaceOrientation:(UIInterfaceOrientation)interfaceOrientation { + return (interfaceOrientation == UIInterfaceOrientationPortrait); +} + +- (BOOL)prefersStatusBarHidden { + return YES; +} + +- (void)setPredictionValues:(NSDictionary*)newValues { + const float decayValue = 0.75f; + const float updateValue = 0.25f; + const float minimumThreshold = 0.01f; + + NSMutableDictionary* decayedPredictionValues = [[NSMutableDictionary alloc] init]; + for (NSString* label in oldPredictionValues) { + NSNumber* oldPredictionValueObject = [oldPredictionValues objectForKey:label]; + const float oldPredictionValue = [oldPredictionValueObject floatValue]; + const float decayedPredictionValue = (oldPredictionValue * decayValue); + if (decayedPredictionValue > minimumThreshold) { + NSNumber* decayedPredictionValueObject = [NSNumber numberWithFloat:decayedPredictionValue]; + [decayedPredictionValues setObject:decayedPredictionValueObject forKey:label]; + } + } + oldPredictionValues = decayedPredictionValues; + + for (NSString* label in newValues) { + NSNumber* newPredictionValueObject = [newValues objectForKey:label]; + NSNumber* oldPredictionValueObject = [oldPredictionValues objectForKey:label]; + if (!oldPredictionValueObject) { + oldPredictionValueObject = [NSNumber numberWithFloat:0.0f]; + } + const float newPredictionValue = [newPredictionValueObject floatValue]; + const float oldPredictionValue = [oldPredictionValueObject floatValue]; + const float updatedPredictionValue = (oldPredictionValue + (newPredictionValue * updateValue)); + NSNumber* updatedPredictionValueObject = [NSNumber numberWithFloat:updatedPredictionValue]; + [oldPredictionValues setObject:updatedPredictionValueObject forKey:label]; + } + NSArray* candidateLabels = [NSMutableArray array]; + for (NSString* label in oldPredictionValues) { + NSNumber* oldPredictionValueObject = [oldPredictionValues objectForKey:label]; + const float oldPredictionValue = [oldPredictionValueObject floatValue]; + if (oldPredictionValue > 0.05f) { + NSDictionary* entry = @{@"label" : label, @"value" : oldPredictionValueObject}; + candidateLabels = [candidateLabels arrayByAddingObject:entry]; + } + } + NSSortDescriptor* sort = [NSSortDescriptor sortDescriptorWithKey:@"value" ascending:NO]; + NSArray* sortedLabels = + [candidateLabels sortedArrayUsingDescriptors:[NSArray arrayWithObject:sort]]; + + const float leftMargin = 10.0f; + const float topMargin = 10.0f; + + const float valueWidth = 48.0f; + const float valueHeight = 18.0f; + + const float labelWidth = 246.0f; + const float labelHeight = 18.0f; + + const float labelMarginX = 5.0f; + const float labelMarginY = 5.0f; + + [self removeAllLabelLayers]; + + int labelCount = 0; + for (NSDictionary* entry in sortedLabels) { + NSString* label = [entry objectForKey:@"label"]; + NSNumber* valueObject = [entry objectForKey:@"value"]; + const float value = [valueObject floatValue]; + const float originY = topMargin + ((labelHeight + labelMarginY) * labelCount); + const int valuePercentage = (int)roundf(value * 100.0f); + + const float valueOriginX = leftMargin; + NSString* valueText = [NSString stringWithFormat:@"%d%%", valuePercentage]; + + [self addLabelLayerWithText:valueText + originX:valueOriginX + originY:originY + width:valueWidth + height:valueHeight + alignment:kCAAlignmentRight]; + + const float labelOriginX = (leftMargin + valueWidth + labelMarginX); + + [self addLabelLayerWithText:[label capitalizedString] + originX:labelOriginX + originY:originY + width:labelWidth + height:labelHeight + alignment:kCAAlignmentLeft]; + + labelCount += 1; + if (labelCount > 4) { + break; + } + } +} + +- (void)removeAllLabelLayers { + for (CATextLayer* layer in labelLayers) { + [layer removeFromSuperlayer]; + } + [labelLayers removeAllObjects]; +} + +- (void)addLabelLayerWithText:(NSString*)text + originX:(float)originX + originY:(float)originY + width:(float)width + height:(float)height + alignment:(NSString*)alignment { + CFTypeRef font = (CFTypeRef) @"Menlo-Regular"; + const float fontSize = 12.0; + const float marginSizeX = 5.0f; + const float marginSizeY = 2.0f; + + const CGRect backgroundBounds = CGRectMake(originX, originY, width, height); + const CGRect textBounds = CGRectMake((originX + marginSizeX), (originY + marginSizeY), + (width - (marginSizeX * 2)), (height - (marginSizeY * 2))); + + CATextLayer* background = [CATextLayer layer]; + [background setBackgroundColor:[UIColor blackColor].CGColor]; + [background setOpacity:0.5f]; + [background setFrame:backgroundBounds]; + background.cornerRadius = 5.0f; + + [[self.view layer] addSublayer:background]; + [labelLayers addObject:background]; + + CATextLayer* layer = [CATextLayer layer]; + [layer setForegroundColor:[UIColor whiteColor].CGColor]; + [layer setFrame:textBounds]; + [layer setAlignmentMode:alignment]; + [layer setWrapped:YES]; + [layer setFont:font]; + [layer setFontSize:fontSize]; + layer.contentsScale = [[UIScreen mainScreen] scale]; + [layer setString:text]; + + [[self.view layer] addSublayer:layer]; + [labelLayers addObject:layer]; +} + +@end diff --git a/tensorflow/contrib/lite/examples/ios/camera/Info.plist b/tensorflow/contrib/lite/examples/ios/camera/Info.plist new file mode 100644 index 0000000000..f3d96bab16 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/camera/Info.plist @@ -0,0 +1,44 @@ + + + + + CFBundleDevelopmentRegion + en + CFBundleDisplayName + tflite_camera_example + CFBundleExecutable + ${EXECUTABLE_NAME} + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + ${PRODUCT_NAME} + CFBundlePackageType + APPL + CFBundleShortVersionString + 1.0 + CFBundleSignature + ???? + CFBundleVersion + 1.0 + LSRequiresIPhoneOS + + NSCameraUsageDescription + Capture images to detect object + UIMainStoryboardFile + MainStoryboard_iPhone + UIRequiresFullScreen + + UIStatusBarHidden + + UISupportedInterfaceOrientations + + UIInterfaceOrientationPortrait + + UISupportedInterfaceOrientations~ipad + + UIInterfaceOrientationPortrait + + + diff --git a/tensorflow/contrib/lite/examples/ios/camera/MainStoryboard_iPhone.storyboard b/tensorflow/contrib/lite/examples/ios/camera/MainStoryboard_iPhone.storyboard new file mode 100644 index 0000000000..0f10a22e41 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/camera/MainStoryboard_iPhone.storyboard @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tensorflow/contrib/lite/examples/ios/camera/Podfile b/tensorflow/contrib/lite/examples/ios/camera/Podfile new file mode 100644 index 0000000000..4ae6fb6b94 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/camera/Podfile @@ -0,0 +1,5 @@ +platform :ios, '8.0' +inhibit_all_warnings! + +target 'tflite_camera_example' + pod 'TensorFlow-experimental' diff --git a/tensorflow/contrib/lite/examples/ios/camera/data/.gitignore b/tensorflow/contrib/lite/examples/ios/camera/data/.gitignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tensorflow/contrib/lite/examples/ios/camera/main.mm b/tensorflow/contrib/lite/examples/ios/camera/main.mm new file mode 100644 index 0000000000..1a9e542f7c --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/camera/main.mm @@ -0,0 +1,28 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "CameraExampleAppDelegate.h" + +int main(int argc, char* argv[]) { + int retVal = 0; + + @autoreleasepool { + retVal = + UIApplicationMain(argc, argv, nil, NSStringFromClass([CameraExampleAppDelegate class])); + } + return retVal; +} diff --git a/tensorflow/contrib/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj b/tensorflow/contrib/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj new file mode 100644 index 0000000000..c98183276b --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj @@ -0,0 +1,419 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 46; + objects = { + +/* Begin PBXBuildFile section */ + 1C3C9DCC1ED3AB4200B8B5FA /* main.mm in Sources */ = {isa = PBXBuildFile; fileRef = 1C3C9DCA1ED3AB4200B8B5FA /* main.mm */; }; + 1C99111C1ED3B0E600A6BFB9 /* MainStoryboard_iPhone.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 1C99111B1ED3B0E600A6BFB9 /* MainStoryboard_iPhone.storyboard */; }; + 1CA5EB931ED3ABFB00247A34 /* CoreMedia.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 1CA5EB921ED3ABFB00247A34 /* CoreMedia.framework */; }; + 1CB47D491ED3AD1700DF7666 /* AVFoundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 1CB47D481ED3AD1700DF7666 /* AVFoundation.framework */; }; + 1CDB2D491ED3A9CD007929E9 /* CameraExampleAppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 1CDB2D431ED3A9CD007929E9 /* CameraExampleAppDelegate.m */; }; + 1CDB2D4A1ED3A9CD007929E9 /* CameraExampleViewController.mm in Sources */ = {isa = PBXBuildFile; fileRef = 1CDB2D451ED3A9CD007929E9 /* CameraExampleViewController.mm */; }; + 1CDB2D4E1ED3AA35007929E9 /* Info.plist in Resources */ = {isa = PBXBuildFile; fileRef = 1CDB2D4D1ED3AA35007929E9 /* Info.plist */; }; + 54DC6C3C5F734F3A58069F0C /* libPods-tflite_camera_example.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 3BA8BF92C84895BFE59D8236 /* libPods-tflite_camera_example.a */; }; + AC1F82661FBA3CBD0052BA77 /* labels.txt in Resources */ = {isa = PBXBuildFile; fileRef = AC1F82641FBA3CBD0052BA77 /* labels.txt */; }; + AC1F82691FBA3F930052BA77 /* libtensorflow-lite.a in Frameworks */ = {isa = PBXBuildFile; fileRef = AC1F82681FBA3F930052BA77 /* libtensorflow-lite.a */; }; + ACA1A4CA1FBB6C28009B8D86 /* mobilenet_quant_v1_224.tflite in Resources */ = {isa = PBXBuildFile; fileRef = ACA1A4C91FBB6C28009B8D86 /* mobilenet_quant_v1_224.tflite */; }; +/* End PBXBuildFile section */ + +/* Begin PBXFileReference section */ + 1C0D73481ECCC41B008C1DAB /* CoreImage.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreImage.framework; path = System/Library/Frameworks/CoreImage.framework; sourceTree = SDKROOT; }; + 1C0D734A1ECCC460008C1DAB /* CoreGraphics.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreGraphics.framework; path = System/Library/Frameworks/CoreGraphics.framework; sourceTree = SDKROOT; }; + 1C3C9DCA1ED3AB4200B8B5FA /* main.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = main.mm; sourceTree = ""; }; + 1C564C0D1ED3A92E00087306 /* tflite_camera_example.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = tflite_camera_example.app; sourceTree = BUILT_PRODUCTS_DIR; }; + 1C99111B1ED3B0E600A6BFB9 /* MainStoryboard_iPhone.storyboard */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = file.storyboard; path = MainStoryboard_iPhone.storyboard; sourceTree = ""; }; + 1CA45FFE1ECCC356002FA6A4 /* UIKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = UIKit.framework; path = System/Library/Frameworks/UIKit.framework; sourceTree = SDKROOT; }; + 1CA5EB921ED3ABFB00247A34 /* CoreMedia.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreMedia.framework; path = System/Library/Frameworks/CoreMedia.framework; sourceTree = SDKROOT; }; + 1CB47D481ED3AD1700DF7666 /* AVFoundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = AVFoundation.framework; path = System/Library/Frameworks/AVFoundation.framework; sourceTree = SDKROOT; }; + 1CDB2D421ED3A9CD007929E9 /* CameraExampleAppDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CameraExampleAppDelegate.h; sourceTree = ""; }; + 1CDB2D431ED3A9CD007929E9 /* CameraExampleAppDelegate.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = CameraExampleAppDelegate.m; sourceTree = ""; }; + 1CDB2D441ED3A9CD007929E9 /* CameraExampleViewController.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CameraExampleViewController.h; sourceTree = ""; }; + 1CDB2D451ED3A9CD007929E9 /* CameraExampleViewController.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = CameraExampleViewController.mm; sourceTree = ""; }; + 1CDB2D4D1ED3AA35007929E9 /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 3BA8BF92C84895BFE59D8236 /* libPods-tflite_camera_example.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libPods-tflite_camera_example.a"; sourceTree = BUILT_PRODUCTS_DIR; }; + 3BC5BE4BBD09374D3E98F082 /* Pods-tflite_camera_example.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-tflite_camera_example.debug.xcconfig"; path = "Pods/Target Support Files/Pods-tflite_camera_example/Pods-tflite_camera_example.debug.xcconfig"; sourceTree = ""; }; + 55ED318E8D29C8AFEF03DF1E /* Pods-tflite_camera_example.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-tflite_camera_example.release.xcconfig"; path = "Pods/Target Support Files/Pods-tflite_camera_example/Pods-tflite_camera_example.release.xcconfig"; sourceTree = ""; }; + AC1F82641FBA3CBD0052BA77 /* labels.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = labels.txt; sourceTree = ""; }; + AC1F82681FBA3F930052BA77 /* libtensorflow-lite.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libtensorflow-lite.a"; path = "../../../gen/lib/libtensorflow-lite.a"; sourceTree = ""; }; + ACA1A4C91FBB6C28009B8D86 /* mobilenet_quant_v1_224.tflite */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_quant_v1_224.tflite; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 1C564C0A1ED3A92E00087306 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + AC1F82691FBA3F930052BA77 /* libtensorflow-lite.a in Frameworks */, + 1CB47D491ED3AD1700DF7666 /* AVFoundation.framework in Frameworks */, + 1CA5EB931ED3ABFB00247A34 /* CoreMedia.framework in Frameworks */, + 54DC6C3C5F734F3A58069F0C /* libPods-tflite_camera_example.a in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 24D7686C331131624F4454A0 /* Frameworks */ = { + isa = PBXGroup; + children = ( + AC1F82681FBA3F930052BA77 /* libtensorflow-lite.a */, + 1CB47D481ED3AD1700DF7666 /* AVFoundation.framework */, + 1CA5EB921ED3ABFB00247A34 /* CoreMedia.framework */, + 1C0D734A1ECCC460008C1DAB /* CoreGraphics.framework */, + 1C0D73481ECCC41B008C1DAB /* CoreImage.framework */, + 1CA45FFE1ECCC356002FA6A4 /* UIKit.framework */, + 3BA8BF92C84895BFE59D8236 /* libPods-tflite_camera_example.a */, + ); + name = Frameworks; + sourceTree = ""; + }; + 3E9FC355632FB928EA23BEED /* Pods */ = { + isa = PBXGroup; + children = ( + 3BC5BE4BBD09374D3E98F082 /* Pods-tflite_camera_example.debug.xcconfig */, + 55ED318E8D29C8AFEF03DF1E /* Pods-tflite_camera_example.release.xcconfig */, + ); + name = Pods; + sourceTree = ""; + }; + 591157921CF4011C00C31E3A = { + isa = PBXGroup; + children = ( + 1C99111B1ED3B0E600A6BFB9 /* MainStoryboard_iPhone.storyboard */, + 1C3C9DCA1ED3AB4200B8B5FA /* main.mm */, + 1CDB2D4D1ED3AA35007929E9 /* Info.plist */, + 1CDB2D421ED3A9CD007929E9 /* CameraExampleAppDelegate.h */, + 1CDB2D431ED3A9CD007929E9 /* CameraExampleAppDelegate.m */, + 1CDB2D441ED3A9CD007929E9 /* CameraExampleViewController.h */, + 1CDB2D451ED3A9CD007929E9 /* CameraExampleViewController.mm */, + 59A3CFF31CF4E68100C4259F /* data */, + 5911579C1CF4011C00C31E3A /* Products */, + 3E9FC355632FB928EA23BEED /* Pods */, + 24D7686C331131624F4454A0 /* Frameworks */, + ); + sourceTree = ""; + }; + 5911579C1CF4011C00C31E3A /* Products */ = { + isa = PBXGroup; + children = ( + 1C564C0D1ED3A92E00087306 /* tflite_camera_example.app */, + ); + name = Products; + sourceTree = ""; + }; + 59A3CFF31CF4E68100C4259F /* data */ = { + isa = PBXGroup; + children = ( + ACA1A4C91FBB6C28009B8D86 /* mobilenet_quant_v1_224.tflite */, + AC1F82641FBA3CBD0052BA77 /* labels.txt */, + ); + path = data; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 1C564C0C1ED3A92E00087306 /* tflite_camera_example */ = { + isa = PBXNativeTarget; + buildConfigurationList = 1C564C351ED3A92E00087306 /* Build configuration list for PBXNativeTarget "tflite_camera_example" */; + buildPhases = ( + 66DAEAAEE9EF6550C3A061E0 /* [CP] Check Pods Manifest.lock */, + 1C564C091ED3A92E00087306 /* Sources */, + 1C564C0A1ED3A92E00087306 /* Frameworks */, + 1C564C0B1ED3A92E00087306 /* Resources */, + 00E875C3B066535AE6B77101 /* [CP] Embed Pods Frameworks */, + 5C2D02120E3E5E09567AA946 /* [CP] Copy Pods Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = tflite_camera_example; + productName = tflite_camera_example; + productReference = 1C564C0D1ED3A92E00087306 /* tflite_camera_example.app */; + productType = "com.apple.product-type.application"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 591157931CF4011C00C31E3A /* Project object */ = { + isa = PBXProject; + attributes = { + LastSwiftUpdateCheck = 0830; + LastUpgradeCheck = 0830; + ORGANIZATIONNAME = Google; + TargetAttributes = { + 1C564C0C1ED3A92E00087306 = { + CreatedOnToolsVersion = 8.3.2; + DevelopmentTeam = EQHXZ8M8AV; + ProvisioningStyle = Automatic; + }; + }; + }; + buildConfigurationList = 591157961CF4011C00C31E3A /* Build configuration list for PBXProject "tflite_camera_example" */; + compatibilityVersion = "Xcode 3.2"; + developmentRegion = English; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = 591157921CF4011C00C31E3A; + productRefGroup = 5911579C1CF4011C00C31E3A /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 1C564C0C1ED3A92E00087306 /* tflite_camera_example */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 1C564C0B1ED3A92E00087306 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ACA1A4CA1FBB6C28009B8D86 /* mobilenet_quant_v1_224.tflite in Resources */, + 1C99111C1ED3B0E600A6BFB9 /* MainStoryboard_iPhone.storyboard in Resources */, + 1CDB2D4E1ED3AA35007929E9 /* Info.plist in Resources */, + AC1F82661FBA3CBD0052BA77 /* labels.txt in Resources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXShellScriptBuildPhase section */ + 00E875C3B066535AE6B77101 /* [CP] Embed Pods Frameworks */ = { + isa = PBXShellScriptBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + inputPaths = ( + ); + name = "[CP] Embed Pods Frameworks"; + outputPaths = ( + ); + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "\"${SRCROOT}/Pods/Target Support Files/Pods-tflite_camera_example/Pods-tflite_camera_example-frameworks.sh\"\n"; + showEnvVarsInLog = 0; + }; + 5C2D02120E3E5E09567AA946 /* [CP] Copy Pods Resources */ = { + isa = PBXShellScriptBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + inputPaths = ( + ); + name = "[CP] Copy Pods Resources"; + outputPaths = ( + ); + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "\"${SRCROOT}/Pods/Target Support Files/Pods-tflite_camera_example/Pods-tflite_camera_example-resources.sh\"\n"; + showEnvVarsInLog = 0; + }; + 66DAEAAEE9EF6550C3A061E0 /* [CP] Check Pods Manifest.lock */ = { + isa = PBXShellScriptBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + inputPaths = ( + "${PODS_PODFILE_DIR_PATH}/Podfile.lock", + "${PODS_ROOT}/Manifest.lock", + ); + name = "[CP] Check Pods Manifest.lock"; + outputPaths = ( + "$(DERIVED_FILE_DIR)/Pods-tflite_camera_example-checkManifestLockResult.txt", + ); + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n # print error to STDERR\n echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n"; + showEnvVarsInLog = 0; + }; +/* End PBXShellScriptBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 1C564C091ED3A92E00087306 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 1CDB2D4A1ED3A9CD007929E9 /* CameraExampleViewController.mm in Sources */, + 1CDB2D491ED3A9CD007929E9 /* CameraExampleAppDelegate.m in Sources */, + 1C3C9DCC1ED3AB4200B8B5FA /* main.mm in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 1C564C361ED3A92E00087306 /* Debug */ = { + isa = XCBuildConfiguration; + baseConfigurationReference = 3BC5BE4BBD09374D3E98F082 /* Pods-tflite_camera_example.debug.xcconfig */; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + DEVELOPMENT_TEAM = EQHXZ8M8AV; + INFOPLIST_FILE = Info.plist; + IPHONEOS_DEPLOYMENT_TARGET = 10.3; + LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; + PRODUCT_BUNDLE_IDENTIFIER = "com.pf.tf-camera-example"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + SWIFT_VERSION = 3.0; + }; + name = Debug; + }; + 1C564C371ED3A92E00087306 /* Release */ = { + isa = XCBuildConfiguration; + baseConfigurationReference = 55ED318E8D29C8AFEF03DF1E /* Pods-tflite_camera_example.release.xcconfig */; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + DEVELOPMENT_TEAM = EQHXZ8M8AV; + INFOPLIST_FILE = Info.plist; + IPHONEOS_DEPLOYMENT_TARGET = 10.3; + LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; + PRODUCT_BUNDLE_IDENTIFIER = "com.pf.tf-camera-example"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_OPTIMIZATION_LEVEL = "-Owholemodule"; + SWIFT_VERSION = 3.0; + }; + name = Release; + }; + 591157B01CF4011D00C31E3A /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer"; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + HEADER_SEARCH_PATHS = ( + "$(inherited)", + ../../../../../../, + ../../../downloads/flatbuffers/include/, + ../../../downloads/eigen/, + ../../../downloads/, + ); + IPHONEOS_DEPLOYMENT_TARGET = 8.0; + LIBRARY_SEARCH_PATHS = ../../../gen/lib/; + MTL_ENABLE_DEBUG_INFO = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = iphoneos; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Debug; + }; + 591157B11CF4011D00C31E3A /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer"; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + HEADER_SEARCH_PATHS = ( + "$(inherited)", + ../../../../../../, + ../../../downloads/flatbuffers/include/, + ../../../downloads/eigen/, + ../../../downloads/, + ); + IPHONEOS_DEPLOYMENT_TARGET = 8.0; + LIBRARY_SEARCH_PATHS = ../../../gen/lib/; + MTL_ENABLE_DEBUG_INFO = NO; + SDKROOT = iphoneos; + TARGETED_DEVICE_FAMILY = "1,2"; + VALIDATE_PRODUCT = YES; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 1C564C351ED3A92E00087306 /* Build configuration list for PBXNativeTarget "tflite_camera_example" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1C564C361ED3A92E00087306 /* Debug */, + 1C564C371ED3A92E00087306 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 591157961CF4011C00C31E3A /* Build configuration list for PBXProject "tflite_camera_example" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 591157B01CF4011D00C31E3A /* Debug */, + 591157B11CF4011D00C31E3A /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 591157931CF4011C00C31E3A /* Project object */; +} diff --git a/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.h b/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.h new file mode 100644 index 0000000000..94046d9728 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.h @@ -0,0 +1,21 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +@interface AppDelegate : UIResponder + +@property(strong, nonatomic) UIWindow *window; + +@end diff --git a/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.mm b/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.mm new file mode 100644 index 0000000000..fe26ceec42 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.mm @@ -0,0 +1,47 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "AppDelegate.h" + +#import "RunModelViewController.h" + +@implementation AppDelegate + +- (BOOL)application:(UIApplication *)application + didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { + UITabBarController *bar = [[UITabBarController alloc] init]; + [bar setViewControllers:@[ [[RunModelViewController alloc] init] ]]; + bar.selectedIndex = 0; + self.window = [[UIWindow alloc] initWithFrame:[[UIScreen mainScreen] bounds]]; + self.window.rootViewController = bar; + [self.window makeKeyAndVisible]; + return YES; +} + +- (void)applicationWillResignActive:(UIApplication *)application { +} + +- (void)applicationDidEnterBackground:(UIApplication *)application { +} + +- (void)applicationWillEnterForeground:(UIApplication *)application { +} + +- (void)applicationDidBecomeActive:(UIApplication *)application { +} + +- (void)applicationWillTerminate:(UIApplication *)application { +} + +@end diff --git a/tensorflow/contrib/lite/examples/ios/simple/Podfile b/tensorflow/contrib/lite/examples/ios/simple/Podfile new file mode 100644 index 0000000000..1740ad6457 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/simple/Podfile @@ -0,0 +1,5 @@ +platform :ios, '8.0' +inhibit_all_warnings! + +target 'tf_simple_example' + pod 'TensorFlow-experimental' diff --git a/tensorflow/contrib/lite/examples/ios/simple/RunModel-Info.plist b/tensorflow/contrib/lite/examples/ios/simple/RunModel-Info.plist new file mode 100644 index 0000000000..1a3eaa8a2c --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/simple/RunModel-Info.plist @@ -0,0 +1,47 @@ + + + + + CFBundleDevelopmentRegion + en + CFBundleDisplayName + tflite-simple-example + CFBundleExecutable + tf_simple_example + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + ios-app + CFBundlePackageType + APPL + CFBundleShortVersionString + 1.0 + CFBundleSignature + ???? + CFBundleVersion + 1.0 + LSRequiresIPhoneOS + + UILaunchStoryboardName + RunModelViewController + UIRequiredDeviceCapabilities + + armv7 + + UISupportedInterfaceOrientations + + UIInterfaceOrientationPortrait + UIInterfaceOrientationLandscapeLeft + UIInterfaceOrientationLandscapeRight + + UISupportedInterfaceOrientations~ipad + + UIInterfaceOrientationPortrait + UIInterfaceOrientationPortraitUpsideDown + UIInterfaceOrientationLandscapeLeft + UIInterfaceOrientationLandscapeRight + + + diff --git a/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.h b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.h new file mode 100644 index 0000000000..a4b358b4eb --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.h @@ -0,0 +1,24 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +@interface RunModelViewController : UIViewController + +- (IBAction)getUrl:(id)sender; + +@property(weak, nonatomic) IBOutlet UITextView *urlContentTextView; +@property(weak, nonatomic) IBOutlet UITextField *urlTextField; + +@end diff --git a/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.mm b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.mm new file mode 100644 index 0000000000..0dafb1f61e --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.mm @@ -0,0 +1,221 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "RunModelViewController.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/string_util.h" +#include "tensorflow/contrib/lite/tools/mutable_op_resolver.h" + +#include "ios_image_load.h" + +#define LOG(x) std::cerr +#define CHECK(x) \ + if (!(x)) { \ + LOG(ERROR) << #x << "failed"; \ + exit(1); \ + } + +NSString* RunInferenceOnImage(); + +@interface RunModelViewController () +@end + +@implementation RunModelViewController { +} + +- (IBAction)getUrl:(id)sender { + NSString* inference_result = RunInferenceOnImage(); + self.urlContentTextView.text = inference_result; +} + +@end + +// Returns the top N confidence values over threshold in the provided vector, +// sorted by confidence in descending order. +static void GetTopN(const float* prediction, const int prediction_size, const int num_results, + const float threshold, std::vector >* top_results) { + // Will contain top N results in ascending order. + std::priority_queue, std::vector >, + std::greater > > + top_result_pq; + + const long count = prediction_size; + for (int i = 0; i < count; ++i) { + const float value = prediction[i]; + + // Only add it if it beats the threshold and has a chance at being in + // the top N. + if (value < threshold) { + continue; + } + + top_result_pq.push(std::pair(value, i)); + + // If at capacity, kick the smallest value out. + if (top_result_pq.size() > num_results) { + top_result_pq.pop(); + } + } + + // Copy to output vector and reverse into descending order. + while (!top_result_pq.empty()) { + top_results->push_back(top_result_pq.top()); + top_result_pq.pop(); + } + std::reverse(top_results->begin(), top_results->end()); +} + +NSString* FilePathForResourceName(NSString* name, NSString* extension) { + NSString* file_path = [[NSBundle mainBundle] pathForResource:name ofType:extension]; + if (file_path == NULL) { + LOG(FATAL) << "Couldn't find '" << [name UTF8String] << "." << [extension UTF8String] + << "' in bundle."; + } + return file_path; +} + +NSString* RunInferenceOnImage() { + std::string graph; + const int num_threads = 1; + std::string input_layer_type = "float"; + std::vector sizes = {1, 224, 224, 3}; + + NSString* graph_path = FilePathForResourceName(@"mobilenet_v1_1.0_224", @"tflite"); + + std::unique_ptr model( + tflite::FlatBufferModel::BuildFromFile([graph_path UTF8String])); + if (!model) { + LOG(FATAL) << "Failed to mmap model " << graph; + } + LOG(INFO) << "Loaded model " << graph; + model->error_reporter(); + LOG(INFO) << "resolved reporter"; + +#ifdef TFLITE_CUSTOM_OPS_HEADER + tflite::MutableOpResolver resolver; + RegisterSelectedOps(&resolver); +#else + tflite::ops::builtin::BuiltinOpResolver resolver; +#endif + + std::unique_ptr interpreter; + tflite::InterpreterBuilder(*model, resolver)(&interpreter); + if (!interpreter) { + LOG(FATAL) << "Failed to construct interpreter"; + } + + if (num_threads != -1) { + interpreter->SetNumThreads(num_threads); + } + + int input = interpreter->inputs()[0]; + + if (input_layer_type != "string") { + interpreter->ResizeInputTensor(input, sizes); + } + + if (interpreter->AllocateTensors() != kTfLiteOk) { + LOG(FATAL) << "Failed to allocate tensors!"; + } + + // Read the label list + NSString* labels_path = FilePathForResourceName(@"labels", @"txt"); + std::vector label_strings; + std::ifstream t; + t.open([labels_path UTF8String]); + std::string line; + while (t) { + std::getline(t, line); + label_strings.push_back(line); + } + t.close(); + + // Read the Grace Hopper image. + NSString* image_path = FilePathForResourceName(@"grace_hopper", @"jpg"); + int image_width; + int image_height; + int image_channels; + std::vector image_data = + LoadImageFromFile([image_path UTF8String], &image_width, &image_height, &image_channels); + const int wanted_width = 224; + const int wanted_height = 224; + const int wanted_channels = 3; + const float input_mean = 127.5f; + const float input_std = 127.5f; + assert(image_channels >= wanted_channels); + uint8_t* in = image_data.data(); + float* out = interpreter->typed_tensor(input); + for (int y = 0; y < wanted_height; ++y) { + const int in_y = (y * image_height) / wanted_height; + uint8_t* in_row = in + (in_y * image_width * image_channels); + float* out_row = out + (y * wanted_width * wanted_channels); + for (int x = 0; x < wanted_width; ++x) { + const int in_x = (x * image_width) / wanted_width; + uint8_t* in_pixel = in_row + (in_x * image_channels); + float* out_pixel = out_row + (x * wanted_channels); + for (int c = 0; c < wanted_channels; ++c) { + out_pixel[c] = (in_pixel[c] - input_mean) / input_std; + } + } + } + + if (interpreter->Invoke() != kTfLiteOk) { + LOG(FATAL) << "Failed to invoke!"; + } + + float* output = interpreter->typed_output_tensor(0); + const int output_size = 1000; + const int kNumResults = 5; + const float kThreshold = 0.1f; + std::vector > top_results; + GetTopN(output, output_size, kNumResults, kThreshold, &top_results); + + std::stringstream ss; + ss.precision(3); + for (const auto& result : top_results) { + const float confidence = result.first; + const int index = result.second; + + ss << index << " " << confidence << " "; + + // Write out the result as a string + if (index < label_strings.size()) { + // just for safety: theoretically, the output is under 1000 unless there + // is some numerical issues leading to a wrong prediction. + ss << label_strings[index]; + } else { + ss << "Prediction: " << index; + } + + ss << "\n"; + } + + LOG(INFO) << "Predictions: " << ss.str(); + + std::string predictions = ss.str(); + NSString* result = @""; + result = [NSString stringWithFormat:@"%@ - %s", result, predictions.c_str()]; + + return result; +} diff --git a/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.xib b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.xib new file mode 100644 index 0000000000..93f334b985 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.xib @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tensorflow/contrib/lite/examples/ios/simple/data/grace_hopper.jpg b/tensorflow/contrib/lite/examples/ios/simple/data/grace_hopper.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d2a427810f679db537236c5430873a81a62ef412 GIT binary patch literal 73746 zcmex=zKMNGjWx(E@lO+TPE zg%l;`6{n>ZA=^~I!0;a{V15Cz7pv5NO5{K;V&PzOFU?Fz1$mW00PJQ4CI<#EW&npH ziyUugkdH!sS!z)cR3Rt^A#ze|!8w`95Z#q zjL?)KBJPt|nOdaa>7wACnwMIXSdw3);FMSlN^l_mFoMnC5%o(gR|wC{OUW-URtU)` z%}vTn%*5YF1;xxwsC9t_RE|9ZCl4PK`pH0#{JGoxAelq;`~InJWYyx znUcK2YPN#xIvMNi!?XW0xR%_H+x5>rGEH{sg+dw1>e^NJL;tm2R>}Ux&SAS) zSwk&F(9%F|<3#x%cdolkl%TTdesx5;x!AU~7|9tq@ z+^JLk?Y{c<)sfvCbjg}){UX?mJcV zpP~NpuC3YPN6aNzFRs(r`ux4-z27yv+jD=}f03JX{#AXL z@vnN1^{bah`Imb=$6h@Tt#&{1U*gM5*Y1`&ezBPE zS!8s#43Ep!H@ekflk2wqP03}KDAk$o(7SBIU$e%Szp6dHUEO^9rSTWcz%?YybM4$V@g^RhThdK<2L9-&0#QANm)k_3FG_ zR7&J@{s0cFZvts z%4A1)Xy`oYWVuthng5pkXZW@1o7$WU>JfT2TMk^RZ+~0yu5tG4=~w5yS<`K#vu!&2 zzU#Y=3&!2|znfToA?u}URhF0-yW8$%wW;f6_Fr8So_#ana@@Z zD%um3TV+{u#^a=W)m8Ro^8~+%9NU_9BmV!z=T+~o=iJ+P>Enmb*{cn=UO670y5@HJ zE!Mu;X@O~$t1qYS_%}^={)w-m=hxRq6jUAl`10V^HxlQ2<|JIus@*p``*&>l_PY~b znC?&foBUt6nJ3nLF{)NBfpZyJAn>X24%y`Mo z1iei?)qU0sbFb~z{BnKwuYJqDo_qZG{^SX#3|Ez2zm>G>hU$_34C^lyeL2^CA>(C4 z$aV#uo;y`hwqYL*=RW#=-DB>$FZ2J2-an@`=jMtdam$LP|1+ODf7)Urk4uZ}pC3H< z+dlO8`s=%H8*6W{2<(sD78)EElWn)z@V@VU?-&0j|1-=eshfSZAnx@$ld36OuSaNa zJKpcZ`Q`1(cPAEKt-p|McKQ8(2Cpr#-J$uvZF8#&zx8?;@0@=wIu!gbJ#N3+bj`2XWgxO&03Jkrl~ho$M>KE|sTBWrUD ze}(K%&0F;9-_-p3zw|%f*d?=m&5oa@QXjNW*7@>FZQAkWO-A{P_h0{-7KMsziqWKG;!y?=`a4?{?G6_>&7~#jk!AOzceilz9Stwdp2M3>KPmF-z>Y9`=5cS zU8+Xx`eC0wp_IHQ*{AhNl=Yg?y9T}umQTCwYL`06Qt)|ZC;n)-I}_5Td4-|C%Z$}z4|rzz~R#w6LMCr$)0^euiEYX zoAov?Z?1cJ^Xro$zmuyxXa}ZtQlKS^enoa#<%Ne!VEAL*r*?l#;*5?a%X7&Ny;#=jBrDE5vZMq-Z ze*fM5H{rML^qAi{;~tb%daicb{hd$V@s?j$WoFuQ%Zs!TDS03dECS3A_Mts_AN%cJ=J$_w>ov} z{G#?3j~+gLnsOlby7=C=IdAlCe_4I^-^_Ra8Ek{rzkbJ;eAMS~>9zfC+Y=h)=X|PN zwJ$#CtgO2Bs;8GOow^^nUP4$ed2MoR^1S=G_8+Y4?O!hM@HnT?C#{hEXj^2Gq9$*K z{Pe?%SLyD0Q&XN2`RYkk!mSSs)8$I?H~%?up?blWJAeF^_ZS(>-MjebF?gX~chq|Cb+BrHbCa zW>8opZ5ll_G~D2&!XCpnZOmi*0XDU&a6GVfiL2?_b16X}-F;`O4AV3TapET_TS2^z?++=ij;iLp$8K^xBiyG}m|c zOW&7H+w;Bp-1psoo+}y{2~-9Aj@Wcpitk$1T7I$Yqakx{z)8BmU*O+f+ zYvxRsiu|IPzG>rv{LeZ4OSgZXDm1|%xnaG><-AoV&pbQ%`OL%{RptL)U#agtV{N=^ zT^%dG$la-z-=><)UvzxaotmyWfB9d0zx+OWu7kPaw5W(zGLOZA-pU+j&)qg#>iwcu zYhtEfwOjLU;@5s=^ZMn%dAXH*Z(rP~)jqs`!`su>>UQsc^^N=0=~tG%M|Z1#$zKun zIyNKy;*Ma?>|1~8zx-#IAO9lfKLg_?n;&OZ1^#UM7j2SQIQ_KV-tL2slM<5+ub+-` zR(;PNUwHFBLtV6eY<9lquL)C8Czn!IAiZjnhZe^7Il6o&VrK;>dL*D*hkDf=JtqJ+hptYUzl(kgmDZ^-_qqgq;{V>DNEE6Spzic>aqrj9@_ToRo|10bN z8G5_Bud#kvyRdP-?0WtFHybPOtb6?@CHmJs*7*Io>-%4A?ETMhadp>`CGpc;`3jPA zxAqjDyv0!59P`}zbJG!K6rBP7dLbF%?Y;|YUW=3 zHT7a?z59#$2lF3XmD~7FPkXV@>0M#2+wMl(|5kn2_AFD}wL1snZcE)S-`VA6RZ+p_ z_~6AZy}9kz>h{zouUj*x-R^Z1>rK^7w=&k$<@(+__MhS4=0B?Mzlz)Nbjn^|RNvLN zzxVRpD}_ee4Cl=}^EH8e+BLH~$L&Ag4lav~t~ahdSrU-_H}^k-*p2#dE7iIF-)67d z+P`ksi(21|jpxfQ9oxD7&h@Rc-~DG;tEYIb`cB-ZnD0W*EsuGsoSySse#vva<$n@t zQ%}E^jefI!>vCz~u*<>Q4YyfT&-z{~t-o;oqV0cW_wCJ|re><9C}83kkvGpSbI!vx z1=|biR+Y;1R23CTyvjVCeYcY@?RnSSmrwGN@9x^~doKO*udMRF0eg}xf2=c$yI-;D zc-V!^>|FCZ^1EMN`}Hb)`9I++UN7Rd-e0W0(N0+3xB2kFqw;0%+kdWqE?0Tf)_lLb z(weR_k>0nH`Db%fhpyQ-@rt3L_SwBXBGHK}`}ni_9+(SmTwWcjaM@~!3a>!j!oGqj zoIMA7+9vh13jAmIfA!gahGT5O`L85=*B<=R6CbplkAKDY-?RVDny&vbY}e$|n;g`f zC8XFJRL?j5-uL`nsDl_? zm+x+Vxlohi$jltO)r*BS8D;N0aJ24TGS`2m#*Ld|kMEu7`u1z;4%^nVd<@MymnZ*S zVK+1T&h=x@CS7K8X_gZC<=1&jpuxk;MDa@bHN6e5^LJm**sS4tPBcutf6?7}=NJF% zvb2w|I_5p+^(N--d%1;QUI~3STNU#@H~Ej8sr@sZpNy{Oe<~;oXfUu{jne;KRLJ@9 zbWf;v$45T-X-j4+TkHL54UgQn_u$8^i|;VcxSp3C^?t$EICm|*!Xj4%h8qmqisw8@ zwUj*ZUQRKOW=W+&W?ou8Xp|!qG&Tbo za^h!jWJqDiXGj7MQw1<&Fyu3oFyu27gINU(3=F~qR46c*G3YTEfDDA#&Bo&u;Oedr zSegj35@Z`Y0~2VJlo9*r5C>7Is!vHWpT9Rz^W421RBTLq{RjKqa=s!bT(E zi3^?BHy#X1DpD5taIwi)^x?-zi<~zd3QkrLyHwn4^61m##T+iGYKKEoricqNFflT* zGP1I;v9qy(ox;c@$gF7SD8v$&SlFnob>#zM$<-(UXB>1t-+k%Toy7&%4r4KCrf;ldE};b^Lwg7 z@@8|7@~H{|CbyO@_2lqUV&XhA*Xy0&uGM>ELvFl$QQ?g-QO~w6{#&-nCZNJ#2o}!d@4qmYy1yt6BWr`ToAgzY<( znYz2@Fw8nL0xnf0UFQsXpy=myF}LXUWU|%-}5#x@&pLW5FE*u9qFp zOqK`FnEb8UBV5#RmHvI5@7=Z2ei!W9p>feg(Q?`HxEYC{dw`q zy3%UjNzFSm>q-wC?#M1YW+^=H`_8K}>omjzgWK$0oWK9w>i5yF#~#Q0OFe!1!jtnY zau1~rJX|uZ$BEgvza>2D)OL^8+t&6yxii;9J#hkyXQ7~`&4NZ7|LYQ8zG@zGmuYy{ zJ~eAu?Tl8zO_HL<)+r0O-FX+XGB31OJ6&+ir0yFVU)-C$>E&r{tMrVe9;+Fy8EVe# zH(0Kzr*%E`mUZ2mJhORA?`<7sG@+5uneOh}zW841S_H!c_p39if^Y-Jd^%<8lSvL6Y{rIfb z&*agab(^~WCNDnv&!k%Ty9ocRzuu)=-xpTal+@%o1+x+l-U<%XKI-LX zYkGaJt#jRtRZ+HUzuq`oqQ#=|?V-iz;FZ_5tq;4k;bLHavD^_x8NMRfjrYF%-EFqM zGiaIi*Sp~pzP~IrGT?C1f2)yq@44BxJBlkVhVF{COHrOu*d8Jk8Mudum0i+BjPXCi z>P4^Iz5ixa@v~=5{A0N7pGa-6X8V!d*Y~`&&AS^b9T8kMZJHtDQPK2=r)6AQ*hM%D zl_KPRyxp4{9PSxfJgZM9Z(C1SPc>u2!zaawEuCkoT?+m)+Wa z`(ApmqQhYwqsNk_QgPfKiVre%EqBNW91~RxeV46ia;136)-|k0Q%#s>>D^{OeQxEw zS+>^pmo6z}u8wo9=gR7Lcz3(&^Qm@A^+kH|%FVs9XOEn@D*Wb>?MD4`cGFgdPO0j; zxqL!Zc;jL*A(OpK#fBI2f0keP9xI<6H}!7V)!M^nT@w@-8rT+g*Ls;<7m3d=^$A&0 zn90RcB>P;>W!=%M;V(=sZ3whkyyZ*V@4eTS{*w0EB_bSpRZOMEI<@Gum#uhz)pzaG)pz}7X9g|1({RW_?D_tnd*AmSc4V7tE*7k-pphA>Q8(ko zhn?YiMKT#vVzT`%>#JP~E6B)mCD%J`M7*~k=@n{jg`L?|&_I^iD_6bxBjmB*qyJ`51p#B-yN!UCnx+$)}N3z?1#$F}?}Ta|}6_Z+W1=V6f0CWEqQy z9XG2?6th^?%!V2BPAGORa}MO#@$kS?K3z#>&6SV1IKLfG61u1$vs6S;z}eql`z{7=&;0UtW6n z?kqj89fxOK%{rN?JkRJHSHls{^lt$^)e9Cf=sC~UxxT~s&rVe8LIKQzrm}))ZbXsA%fon(V7oLE>3Bp&{~a3I+uzop~*0lg4)|B@*_2c6Ouy7Mdxf7O)UuBj}n z+@Hh1DUn=D6 zIdZ1qT@%-|`yKbvOwF18g)(cle(yM+7?tqMP~rSNp6r)r52m#4ulB+E8n2U=9nF|k-*qd_7G|}} z@mj`ek$ku(MSF&~#KIGzYZW8*oesDggWqf({S4o;I9`a*?&Z0tO) z7}B+SR)o}h{i73|3Iin`OkZ&H9AkyXw8F_DybQixuN*{#EmdYq99ZqCvU1M3EHUk! zfBadb_>-8N7}DF*B0qFa5fFaMI=}sA65FJL2TrZuM0%Q@vL0x-B-6|0;CkWo5tmRV zi}{*%$#>Z0-f!jSUg9q)dPK+Y1k;q@nNvEZSnx*q{G4;`jMgG%8ztV$#j1agFZosV zuiy7es`jgOJgHJvJNt};KK9!Cz7L;$b!zRc4-3}^zFY8MmBgO4e!3pd@1|b$*?rsW zZP&?@ZgUNO_ZqWod6`_;UGBJTuH6j&Wp8uKHa(l)di3&#b2i)8+`4@rs9xl~tNxN# zAEav9b~(=4Rd1G`v+kX4xpn=OsBN29UaYQ+xxiJ-o-A6o$=dE==*ZClI^TlJ6?w8_R-f`3@dByWZ#Akh)#{cjeTQspdWmJIc4T_FdmlbLeI7&7Rb~ zw=?%N*oqff$Z-VOY`G*@nsvsa#rTMuvnT61mUgCHjm|gPtc4yq<({bh!734(O2wmc=9(7f zI~T3foV8Snf1hPg|IEXSv-uAFDtT{OZzR7b_~q=6evw6BD=HI-( zOGBrse6^SqRBZUN_wd?Zrn4jW?7N$LGdHr`*Vgf?R$0)Zd*%Nb+HB76iM??rZ+dvO zuho=X*?@g_!Y{8~RdcZSX-@9LyFa71t2eLu&+sa=iZ5`HUPho4*CK@p$A2eTU0u7v zIJ9a_hULx!X^dJsRRUMVhO86Yn$^!c$@0!B$AioD4@*j?UEi6vop>0BG`^Aw zd;68kkbTAbO~+nk=$9Y)aaTXE>y+GOor=J0sXa*8>ch% z7`86tKeE_!zSyzOyWH2UBt!pNm(>OZu3N?#cJ{E>Za0~E%X;HkvbAr<xx7V|K&hD%mBmK#p8kW^FdFB8zsd%IpDHa8$7OM7N>%f9nH zUfyXQS{aQ?85ScBhk&Y4r*w@B-%>nsW1$G~9a+}&_x zM%fxBuIZdT{~1yYl{_yw8Z^lKUMP_Aa`_aIhy8DtN$^~7IVPbH`S2#cu0kL4Lu2JV zoKHDSURl&~ziI9|v5rAMhrv*N#RL`3pmv!DN*-rVE6;E$YHwaLML28ql){$QClgJK z<8K9U^h$ZDaWC_X<+6#MgpkX^*GkeYJ^g!Bm>QVaZyt@kw}B^1QQ_H=Nj|@= z6}F1(kbJ@@QKw_nyi~wslG99$ha4+pmU=DXNxf0{p@D6I>luD|Aw|9E8b<7lD$@)) ze6Bo8=(X~ilg4!Zm_+L-ktGLOdS{7fl~$B<6v`Y73U=X2zt6bY;icli+MgZ=R~%vB z<7Y^5v#2=0uAmaqqtbau$Wr7<^}?jZFP%FZm;_B`W=)!Sl84uq=fq#J54H-IO~(F zoYp%n)xM-Ozs9pnAd-Jk!<19<;Xhe+&-4;lY*4buVae6RG6g}K>Z0b@1Hmk!Gqehu z|1+FF*c~i3(Q4LYBb6qh1h-nlWfNIUvRGb4J~osPnHpBadAKDlxKQMTw&}kU>$^Yv z=yOqF2y*FZoG!B{D(hEqkT|17=K*m=t+#HD{F{4H4A*_}-pJkIvFZMiBcEJ+jaeBq zk2;=GbJ*J}k+ib&lwu-7>*+3rnJy2HTnS-TYi^pv^618s-6u9#cN%Qj=CPvM>AA$& zO9u+3G!?|1IJs(<$|GUZ_Yp_4TH{n6PwxpW{?WaoO331@pT+Wg$-gV+F4?(CuH-?3 zUSN;F?rDyxcCsI>JsDmK9jLq1o!i+{m*{zAW|G618IC7?I-8X)>J&tnI;i<-6mTp# z9QxuCm-#*~VSR~~5BWQi8zQmI?=EUE;N}XxQge_jvL7`W^onmTdpeP-XX0*52!%&3}fs`>QP8JzV~^ zTq;J6H+*qlg2g$-WU;*2DLXo9e-`fDAyu;bYQ-h?sWAc04jLRYIhLHhDSNw8@98_s z&u72)79PFP`q}@!X)nB_doLf>+%$c~ zcOfR8u)-r%hgW2m>U}QW6;krG$lPwk(+JL1;1=!>;66JP!7?c1?q=jyiYAsM;qGyg@``r7ka z^enku@6EDdT`hab{S*79-+rq(ZxR39_ov-g|Lk9!?|Wk@S4pq>;<~%x#kJEalcV?j zS#y6^(x3PDb@XB#-(}_=yYqd+9+MrmXBP)Ou-uwy?_3(VW95f>!+&m$dupD(n!E2! z+`JiU(>J!WT{i!+I&fu(@UySpwO0AMpN!|}l*wEDO4~bC`|PoE{~7X@hfLeFHoM%a zd5(B+W_Z%Jh1uyX;k)!jRbH=kTUeOvb=^7j)9vEdzQ@iN%Z&kz4zJM-)?=)^?j?B(aW>Nxi2?u{3`WpI_DA- z!+T-HpZ@N1uj0?VpP6s6GUj$v%;haRmT5=Fh9@jLwMN4++q*X{D|@T?)n#XW+v304 zUEF%}PM-PumHCOefmgKe+4Va5uFuRiyu9+Psq9O$#Zs^0mOp)URcQ6aZrSsvLblzX zbXPcg`=;9k+g8?BE~(nSer?$7?#ogoFKv64UHi|l>|Xw!??=^F)p@^@4$jJxHI7^M zJmYGuH`ju?6{ZU;_jTVg33vWCf7z+|$yd9>1OB|Z@m5#Mdb#fEcUPxwxml2LWqHP} z{!iPMy0=}q`d!+8mipq)@~d9l4%vUFO6})1hSgd@x?jIYZjYVaxyJsPozhc|=c<>* zJe{7EoOi3dJ9WO_hV^cXU&fg1Jh!65xl!5q)(-i;*^BOPc{l$`#ItJ|OG|xyy^>je z`~FsbYo=2h82|eo z%{RUIoq2nFbl#!vJn8(fxz1VNdQF{Huf21f`E=l{*{A)d|5%(aKiS%U)z$h$-`8Bv z_}RAVt>6-!e<*l5hwr5>mg#yIWpnomUW)bJ7M-`-aAmsRr^(uZD;m5Q9sAOhZZtYdhmaS0lB$e*&B_*DX)Tr+9L97G!97DNK@J5w&0l z@l{FGP!Vvs>JTTXqq(iJMM36aM}jfSlstt(%k&oE6%*tFRl@oHJg}&2I<2$0p|?n5 zW{9HEqYXR-MtR#kQZD{3Iuumt5h8T__Pf@l>x8t552UOTW6PM?Yc(;XbA1UDpMp1M z-_55t89B}}&Qs=GBG91ZbJD1(cisdJhNeS~arS=<7_OU&C}d{3ynSad$MB=Ws?~=* zTF)3c-LLE1+@~}+ka+vLudY7dy)@z~94sQ*YCYepf z&*GC*449@XT0RmtWER(CSTkkG`4pwc&7D?Giyqh{G&LSvBDp}by`*0vlwm>|V^D{i zyo;);)DkV0_>2-m8BdD^6Wawu3)NC4e|44!n4V~NSaap0#LF5Btdbm7`U~=eYFN(V z^yxXyQ6$7zB({pZaGIpkvBhrA>*8KHP5N}A?a5}Y^zOvQC$9om2p(7vw9_TU$hAT6 zAcv9dgr~|gorGHVaK?!1b9iZ{@LrjH&Y2ez*6Kene(&OuV zQqG>1lQuaOx=gW8+xj=|Wv0VV`Ow~~iOq95?S6Fal+Z9b{;Fs0gtm9vN~NErR;5h- zag5EXJT(1*qqzQzT7jyR3wyR^&)#4z}0zN1mWvqKu9CodRIV2~_xU%x$f(&?2} zg}+N5o0_eai&I$AC(-e#?e*5Ux3`zZPFt%S^s4)=_oZy_IUl$T-{#MrxvOU5yrX}= z-BFztqN>^(xH9Q==F!67rWxn*CLB$gC|&iJfB((gspj3c3-g_3TwVH9{!OoTO$)=~ z?}gnz&IVqV5}c$^ZhSFdj-$^Hy}}vSwp`>``e0J0!i=-&5+2^qcCKLXGP|AHd+lkV z-=&lDmbx=NYhgdF6l{}py2!h_LNJXX$RYHBe~lN<1uJ8H< z*Y>3=xxB6mJovWzt!Dhk%GolBEz1>RHFXM%jei7Lt2^#m|97+Z6<=L;=i>}sS-%f6 z%hmrqvHzmue+DxAi!2QFkK*LWB)Wd-f=S;eTws_v zvv<0lrMc!F@r{x~7qzZ0ROfh5|Ig{3#k<|CJ>8iBJJv|e1qck=QVcV1@42j1G7wdG22*izpG=5Ndv@8bv*I*mt0W5}55CVXDYjhq zpW(`NvENee`I{G~Z;3Wry42vop@BbQRa&{&@VfPjOr6_r?>tbsT=HOf z(=Wb`mv_G^?pf})>}K9=_uwnP9=$z!X2m{#{|i#HzkhXiK7G57mAh0kw9WF1X|3X= z{+Dv|mu`v)U%q2wf34TnT0sw?@3UrY5f2SFnQ-6yW>0swRG8wqTecB>#j&Vd#Ms~+4IYN zyI4bibFsUsn_{Bx2i^VNyZGvA53{9WcdjkF@V%<#>axhdE8gdyURis7`Qsj&6@hQN z7thVOSGcbJ;or%wt4>}wn}2xmzt1hDSNCps_4nP>sXMnVIlX*K0IyqnChv+5XQZ@3 zqUIkLeJ&E4u=7V|fui-kFS);#uadq}{q@_A)A^UUr*AR4AGCulSmdka347Cf^KG)u z-c^_N&)U-9F{5l*OZa+$7cXxAP>#72sy+S4@AGbd`tQuIy%@i21%q|z-Rv~=8+Eg{ z{by+ZYGvPK<=E%_pW*ReFZE^HqjD5%cXt-gULlgbb_LsfS-rk(i}=p92LG7Zy5nti z-D~GBw_7*sX{N}DZ27y)`pog#XA;l$x_-L%?&SKNH@x3#B>i@u_}YZ?lg&%fdhY$^ z{~3&|HnyZ|?7I4U+T27PE1Rz6dBzdzh0mU2?UK19cPQe)SJriBTbBynFO#~soLT3x z=tJ4ftE|ip*Dv~YW!CN36a1%lXV{$l5SHSE7J=T3&YNRo4B|zuOu|l zVe(T&)ujhM?cB5OKZCN>ojRYq<^DR;+O)ULF|1fD`&q-o_=uc*n^fcCX|95tA)zs0 z+_M@v=CLkwO=P;Marma?Ia85d)dilxZ0o%5OyG2_Pf}XGjUk}mnXsZ?ok3%eqvRs3 z0}4V?0WXyej|SUs=2K`5T$UP?u<%FXt_c%XcxZ@6{CMlsP&J0I$r@*F=H%<=uMFO};CI)V701Ocvn=2{7P#}}y@~r? zWpB7<5x9l(Rmh|XcIPwuT}%4?b8m~z*8O(cEO^3KEsJ8|nGrWnJXztpwEKuM+tm{D z+xaWZb|;7K%G%m{MfQfe1as0&zNf*}tJ8h2pUaycvL(yo?$Tg&h5VG{>&r5OiyN*i zik*1=*p7T5sT51rHewz!wU*FdGYf3wYxqlcfBuKn?<>$+&@ z6*-0_X%6a=M;#aLin-2Y?f>?stQ^zQZLg!YxqbeucZfmed8R}X|9*=X@tQ9;Ef2e- zmsxeG@*S7T#PiK=>k3Vsc$VImRe$nY=H1$5f%dAWESQ{J-UUC6d8a6&GC$X+ezrBk zEvw~Ur`(9n-eI!SVZw^bXKY)nGY}5Rfd|BRbSLk2=En(ZYbJpg(kGgqt zy7|L@YO9myf0=)B?Xj)VW@AF+bMGvJteILtk`rTe9pDjEsVDp~c zh0C*juf{Oy@vz=1nCU37^~IHiu~%O6>Sro!m-(Qq@g+TYd&1B9%Ujv*=K5|Im411F zh2gpFAy>nGtJsj*=;>i=Lf;n_KmTZzE#~%m$M&F38|f=StwP7e7czIMw>+79(NyTs z2Pv-gGh9?wRqs#f)_07!bk}OyrAr(5mOc3EZ8Nzga;J02ysjldtG!n*T)2Y6_(&38 z#9^jJ6Bmc4iq@)P_h-ztE93pQRjf((9m86O2Nt%AZ;EO%HTv{rcC_#Hb<2L6#A|Y^ zN=QN9hS6fq4)^4r`GNbteQqe*m;Ng8N6dJ=WId*%!t<BiSmVA?~mIb z`TL+MQ}=XRwshZwm0{BZBdj{5zAj`bnh@of_3hZKg{(Y>Gw zEqk-Yb#`vn6F*+M@#g$@x9*fBnh^n6%5(1KtN6yu56QCnz2?Q2ZTEjw@n@Gy&&o5I z^U|!hKm>Fzt9D^-^dO3n?u_AOe% zY}&0nbM;IwS6{2BIoDpT`=+I!`fql<;r-`*kym0fom$16tN}2POuXdllHPv;ld+_pr9oKi1e|~LwWV-D;nT0;#ceI3J z&wP3=Hoe$7cyr#8XXVoZQ+|Bczj%MziA;`X{}}{LtIoD=J5zo>a^K$8N3Wli{W{%S zEg|N+JZ$CSs0E_0*{(e0DO>B~m2-Ld^bbel3-6WNK70PPdh1i~T}SGTR!?c0yy8WM z{Iu1py`!ZLl?u7f#SGzX6{%UIX!e`^xccA1Gh$9-#68(&(x&h zRcU5uTD$T4qZhU(9d1eF-90Z`KHK=*)6-k0%+2&)EIfbf+faomKc)s~^>IqxPQANz z@6)YEzi!yOX<^{hJYAVb=R?;Rr>|)Dj#>G!PU5<`v!hDi(T9ist~imL^0A>nK{LQ| zvqQ4;(ZGibS?&hg8eXyW-@f_jTJGK5m1!Wb?=I=wp!CgWPVpUY%reARp)X4 z%Yi$uE^WKJF3YNQ?bFunE4N-xOnwowaru=3b-!tQ`?9l4&-`cbGWn~xujKVF=j61< zwduZJay7&@Jw1~){o?Wk`+c46>)*=BTd#ZbN=e?E{|qZ`U%jpWR@QO;lKSV`d%i}7 zSiZS*@#&=#k5{h~3hK&T?iH@GMZfUiv0klhPyQ`BBP?MUdE-s_-o`bz*R4DEe!Hn; zc5$F&xo1;Up3RnZQb*+H-1po5XZx{Sy=6D@zFfY&-P(F}*V^KjL8(XW5)Lt+*DT($ z{n69DMLWgC^=2+M-gbFO&DE>Y+H>=kMS7m)SKXJtX>pKRRn111n^x2I&X0H}Yq%ro z&aB^a??$iPw==`+(Y3jIb@j9F)T~?S`)AG7GcV3Bz9jPE{eOnymf8EsdOwuc?(!==%sOR@&EwC!)9k;uy>z>FYwqgI&`aM8n66ti zd=LHYd;L{q(u!XqbE2NLWodZGv^j5>wr0`o&$=tG{obiQ^{d1;Ue#5yUh@sEFr9zY zHL-k#h^m9sj@rQO{ePshV^`ahh@S79W;f||^tt}ryV~AXZzETHdi|fFOyOPp=06^PZq)ur7riZ5GsE}N?c3^8Qdi#o z+~s~oZ;tiU{;$<)YyWnHZ%Gd7m_5s4-~4&=KOXy@_$u1=+gyi(FYaAr{%W@5m9F}m zJw>7U_m)eRX6@})k>qjH;_Nlssih~LYqfrLNMO<7&(X6Mzc_EZ+Jm*tgG1usQvo5@ zfCEt8RzqPtHbfkDVG^Wh^>C$2# z+@Tt-vmjHC*??)4pNfp5mfBUd)XE(Pj_z^k)P8s@fMI9q@s}UkCJ5|uwchaExg$eC z^1R?ond*g{Q>QsBY42f@T03V7t5$V|bK4{jl@`Vrk12}A2A*8u8?6l*TrPWe6`rb5 z>`|C(pn9X3MO*KO)+L`4nMYO$-V>7YnJ`t6(@%CnSUjhSh$P=l2ZiapULI;L%Vphp z6ohZ3%(F9SSMHlUX*%l>;Ug?9X^b(&no|}?IC?%lA|m&56;9Px;$l{w(B#n!j=rd6j7&YS=0Xc>$A4ACR}dxYW-i_C5>7Fe=H zc>T@J>&iqq{uSs!l)s&kzv)%C>9T(-M2r$1}Ma9GxX) zov%rU9W8b{_iK0NtOqZz1O`j2{4;Y0?@L>Mt!q{b-aRusVfCM(rtCk%Rr9Tfx8A-` zxYV`aiBybxbxZQroMY-cP6cePYZDR~BW*vc0r4;Ym?icUpdx0@98wl6UM`-i*jve~zO=hvYeUJrWT z^?T>}Eq!rydE~+~1vAQX7R}xF{@P#LO;>mGGOm%zxF{4np=ZLAn69t|lXEYYiPvm- zt(nrNf7P1p{^e55n`RI0#0#vqXgs7ZugiURb6Bc_tzqE5*@qH#Sbx}M8T$3@xu=y; zzuw+j9bQ>|X9ttN#&5&Rbzv60``(z|y8lYtYRZz5rUg9n#Eh%Hr2c2K}E7hF^46_!r{65Q( z9PDwTAwd74Y@W>Ow9Efu>{BJyuZrEV-z)Cpd0(YuR+Vp0lGQVIF3j8!edo@(d%bSE zR{foEY~4q{S(BJnY01U}pFL&}^T2D;I%XT&ll+q{E*`f%x%`fHg~>0gd7UdiA77lK zY1X<|`}MCaFSoE?UUaLZyV-$PZ*AaIspV78eOcU5svh)C)_+InzCD$EJBzOJ1}rt% zxX0GYmp57ZTe8om<-u&z_A_vvw0_vITd@B^xWwU2nr^EfMu^oMf3{3|^Xof)x2wwy z-<6(bzH!!P*|(i>A0F7My$stFHUE|MH0$f9-u}9`-!t;OVu&`qg2xv&_S9^f9b6)~IIrt*$X?s@vALD~_fXIM4d_Y3CAAzRWm#kh6i%p`nM`mzL|by|}gIdsv>Vh0a<%Ewh)uSJX=RE%AtkJ_Jq$rOEvyZuV1X8(-sdQWQIP3Np#e&TIZ=4aP(-TU3Er_7m^ zd)f7#;8Lx*Q3s`YFE6)R*tco>i|=oLmtD_vxL7*%t$t|hudi8)u5CP@artA_p8Ls< z`|_{q|J;1}YHqpi>C1oZZhgPHeb0u)9=A>3Wv`gEb;dQb1%Gzks<)aQ`K@x%iT@1q zVs%cRUba1V_ja4$N{qNQ1@WQ+6*7dD*T=d{YW{k7u zw96+quKKDuO>9=~B_~ng8NN|HpHAtS+9gE9Z7?o)DciSV+O91bQl?ygd{-QLZup-; zB;0KG+Etee%*$^V+{wKmlMxlR>e;31|K_iH_W4S(`00-W4fuS&k{U7oXLW7g!%T-Ph5 zdg@oM1|B{B;ppA@wq=VGUQIPy-B`E4Y%;gJ%lVo2Hnp3smH+hT;uX!cCZqY0Lpl5qeCQB#W>dO@~o6Q_0yHxIxbFI61Tu0u2hELr~ z%CqhL^EOMHo|d`Jz1xjdH`Cql%9X9ZW`DHEy80(@viGxemv_sPGp^nG>Kr2WdZml? zmFoWtm)6vqd)w*T1ZKv@h24`ZJ>csVdgt5LN!iR+ub-ZHnY-m}wcgLn#Q4h9FXQum zxBB}%ymp^=?pg&~`|jv3`)0q||HCAITl}Cx$&#lUfp(n-=E{bTT2Zd#-Dw$satdJ*O}}6mwlg`^ZU-mxSLj|t6sdW zn)X}u@!qHd%{~TV8|LqN5V_XmLW|E74|nX}lt|Ezpl+>g^6 z-tITjl`8u!{d}?S)?}X@3#_!Z?6%`uDs%m-*}{X1cAM^)wk3E1 zdRA`0D-TCw=NbMbChRO>3t}veZD0Of{#A6Y`IUmwEcZ*R-l$zX^@;sf{?Dy$=RG4C zO%8djw0rT|Evqmn(lmH3sHy+rfPkQ)C4-UE<`#xa9wA32Y&yhj()XzA>RJC50Y<9; zNe!XzGm-=~IJ9k+&rwv@4rJ3xQJ-~?gR|gGWI}a2gkA(NKFiAcuzOX_yq~H|KEYTyH6F?*6 zp-;nhXlew1o5RGRZL}~|CGt7Lfp(!(qv)f;5-gcAsZ*YI{cvee;7D7@G>5;ugi*l!~d&~MGvw}R9IB^E>BQ*=R_8kj8EtL zCE7M{EjVe-^;@aBMC7wjh@_yx%i}@X-}lKq;(r+19{kd7+0qR)JNFgKxL&>+xFuSt z)b?o4ujCm_&*e(>oz^M%?4Bi@r!~ud*;lcy<^_UF9j{IiIIO_5ra|IE{9V@B)iLw$ zP4)D=&djvp&l6+YIW|gZS-J|3d@dXRP87Jta%V!r(-jNeo>NQS86x7@`J*~Xpf2VF2` zE=D~Ij%9D`Ew{xUcHQ<)Ha*~vKiAbiPMY!Nzb4+^_Vce^bN|aV47n*rrRhobt&1#o zF+94ocWzUWVeC4Y)J)5(=Fo7=WySMaMaK8-FPtb z%5t~e@^SN5`YxHi;qe6J{qK%lJ6v|hY+rWNSBc|ltFli-%~*KKG<@p(;;5zN4cD(| zJupyHmk!#_&i37HYk!Dakzb#M(cev9)BRqjn&0LUWt_B~?~(Q9zSEayZ@XWf$GbLc zufZip!>8=5!bY>E9AqhtpLA^X_t}jrb~Pkzn>9(wpY^fC`-nil!pdN~OE>P`_1OBx zy6&^r+HCvKW}f8sn(M1;cw3eKGi;dMRe!{BiLmVYRlE9gUY@op{m!*$uGKo8nP-o; zgjG#W+3;#_+veOkw|ZBd4{CK2)wO8nSas`WnYm`@O7$L@BAW*bb~HCW?$SE$_cJzd z|MES{mwRtrw|-ZYp#R;1{2z;F-nqDA%lfcu55%r5@U`l-iK^Rn-0Nq+Bj2y$%a?FC z2_6wT*`NN~pz8D8m)q~B#J^Umo87J@d;ZR;`pDH)v;H1gsh20I?XuYKkkp|V8HQ&n zPc9!+uD!lR+I{7FiPpGVd6A2E7Ihs>U(GDCP%*fnH^zF(!<+LzE%f`l)TZ_K@9r;8 z-%AEQTl`gjHiM|c$F>G8U&mNa<(7qBS$#%NPs~%k&9JcC(rEfO*W7KdKBs6pp1l;@ zw?M>af_t4dM=RsPI}5~FIa(g=DQLVU{z9OC8QX-exyxG=1Xy=XczQsx%!wt--7{VD zufUeh2`@dDJZQZa&li!mXoq*h-{lNK;=*Sxg)p^Nlso0RH+aYND!j^?qT-S+lD#DD zanr#NEl$fxojr_kZI}KtlWR85lY~4|yi1FXYQtMg4hpfe#He`{Hpo0xnCqfdcSymU zt8xZ|Fayi+m(8y>9+|2u$@=z9o2Zf0b)}{yUcobd>|U@XSbC;#Ff9wX#`MbF{M63Y@6NLeLs*24xjfZ4>~Lw{>fN4pCmnNxj%%#& zJkm7zee(v9-9DGgnrc=nIm}dl(V%tNl=+~^oo8njF;3ckT3;u5+p{^RS2hJqou@Z_ zTg--oMHeik+Ib8cmv|SSTAuV`_P?dC=1zONEqbn3^>MFXq1(Po_3P}}Ubnq&(zjV# zLNC9_NX@r@=I*-f{x0X0Rvz*1&TlQd8`xL(aC=-x_FT7J^8ZAY07tX zmswe}O4UD46)HWm?W^C(4Y}J(=WcjE#hlyydilakhtTSk7i`XyM!ieUv^@Li#g!M= za{fB+D7kz4y_xs)sO!bPR;??(?OfUXa&PAR9gn4qo?GZGZd&rd=eI>cFBf;(&7YHB zIo>(8F~nH!yY}u=>()jstc_}!e#N*_YwyXklatu4>EumQVEJ6A-1tyXkGp5Grqe06 z8a~fkiEBiKmRboE6_w@`7Iy}P2+TP1VS0+1`9qHI>FxWNPADxCcw?Y3DdssZ$L5mM zbn}AsIh=M&d3`fZo++Bx@-tRkR7LS%s^xcA@g-jGeXltEGA*rUJf?p(L#x%;L8U8Q zH70rUq0$YzrQ;*dKh?iizUr@YsgRS$3Sl9JgOSQzZBrg}RU1YWH*kmu_VmTLENEP6 z>ciOi_I+OlYmV>sQD9iEt{CIL zDR_Pk69?xKEv=d9_xb8L0$5x;zl#d+O;z#qIUv-)&|v9sSn>Dg2Iq4s>p13bm#7FA z2=sP0VAfAzQri@Fr}1$^o8r<10-Fz|Z~4!VBETdruwzckg+k#b=M+|7(v$tvOU0b~M$!&wtdZ!64z>FgxH> zqWb(2v&Gz93`-seObRG-T)M>cq@mqmS&xT>`wp>8xl^h+LDR1n!dZkC^tx~|zK=pcH6@PtNuUg@ECi6g*Z;*lRNQ?)Eb-8h626g)P%dfi#9v-GRMYG0$O@22Zk zU*Em6Y}MCSQd7l^-qg*=V*0jofyFZRqYe?t4v*5((zqV&IDBupzocL7Yq2#G#b?YB zX32JHSWxh;$LdL1sDYxo-HFqx9CqP6VioH*8D@)2oZt{>`$&6li(7|JCp$x^_3z|O zf4Y|)>pk}JhUtT9>9!S*RUI9bI{a&*cU7-i^~Tn|HaSzfd%H-*@fjI<|HO`29tsz` zu_ik1(+QKg@y_X=y;s@4aYJnPjK-HJ3CeG4%e9Lj}94bKF+E=>*(dJ5(gbut#RGEJ-GJD?6u2&*8laoHF06k z_J0o-^G43tRI_$f^v;-uN7pTTRXdgJu0N80?{3c?+$PECXT^T%`LuVX2bTDn8!o=$ zuf1IHp76v@uubknZY1JQ?Rq(V_puwkCPg`VpvCU4G z4tCBJ7L0v@B|m4~m34R&yYg7GhoZ~WgDXXrCoUG?c;@-=Xp)V29KUG2u940DS9$-` zR8_;h=ldVFqEd04We2VC$FsVuN zoV?ovxSC}AA`dD2d!fM@WKpIkAeuI5&Yr|p7ADR^LI<1}GT%z7s4zLEPtQ>hP-cGD zvdO{V{60ow&Gr;c>kXWT887*yzCTv*AnMVB3w!rYI3w`*_ruE!Q>S15JK?~FlAP_& zmTO&H(~@S=v41MVvxUZT2VOk4)e2sbb4#H473-TX_v%)>bP!s~F`>-x>I18r%RTK2 zyQl2=&+zcqF~yKL36F9wv8js1^JLPG$vMqoTEC&zz*%%h;w<5Lft!|P@$#rlv|~BM z(xlQgceBu}!>0t;=A5cfd8o)ct3{HJwYZHz_?gSm!WnPcGD}YRC}}XcsT{GGu;7ov z=}pXTib39Y-%eq03lUl%;u_fguAN6f=97%SAfJvC1E-OvtA+CxGk43%P60-lCkv*$ zi924)#K_0cG;@`o{8GLtyweRNW`2{bkaoQn;g@YEee>E=Ce?ErA)&Pzl1rd3_Q-yPm6qL<-71y(IQnx z(V6-G^~a$dGbQ}*a?M+CSkr^&$ZwYgSMw%>b52_P==-6Bu&k5`EguE1{;p%;%;5E$ zaVa9FKv#Y6_a zA5E*e9&{>cyzPu|2za*AsNkfRTSTJew@H->MjmcjZk9`Y0|dA?1o-To$n6jpu)w#b z&|~H+hosLU%)2GFNqYEHGsG$$^I-a+W9`+nS#w_O1?eS89$F!NpIHRD)gGAi+?kgz zu`{KkMVqN9KtR^e;^3-xnWr3Wl64saih>_YlrHV```VRXqwJF5d7@loj=wY0!w$u; znk1%%a}{3oF}^OVnmXTi?fB0iaZ^)fg-}rZ!lDj2#-c@fGVBs_FL4?yHuB$Pc<-uH zs~>w$bcKh*B!?LfCUrkQEj5+xePmDBtkho9%%-%K#s3*v>(7b^bTJ6a<*9Gd->>6p zyyvQltkaAO?+#VY?)hjSLpaNqb-2HS{QW`qqZ)s0`OX|Gs(CF6yO!6|WcA zDzGr6ZvVT6U`4}*cP)mK)-fLwIkhXx<@uaUW9}^;6YXy*Xsui1$vmZ^!(D)9a({V> z#^al$-`fB5uKrx9 z=~i9KmQ8WCSXI7|=We`9#`dE-wQMGyTY3BHD$6KOCg+|{J=1ch=g(jEXvs;DiX~;M z^ZGVz;a#S=LEmgTqXD1cro|g>W)yERT`Qa&#isC7a=z)cPHE+X+g*2VD`?A^zSX4d zlH#haR(81;lYjAkYk26h@ZhOt%Ng%Ywq*;icJ@BNqAay!%7G7U+mn`kEsbLAVT|I^ z^p`yJ=gMhuzC-TJb>~Y2GS&G`&Xc>rf9u_hdua~>zNx?O`&f86ET8d#$;z1wN@{X~ z!oQq4lU`iav=ry4pFQi+l6v*jZ!!uut}!qiV%|Jqdj2Ny7q4fE?O1Z?w84ItGWRcw zxxd&=$WUQ$S$dn}%TxuC4NY9OYbVYZ^{}`2CG@9v-E#SVvtw9vP6#+X)~Gh?Zr7VO zRYK`(Vc!9(ux`e(Ngov^O^Ca3V)^Be!;L0~J(ayvE&VyVpLwga|J2qto_P7?>h^}W zMh^@kPUd^Y>|r`+{Nb_e#Jjd$Q*Qe%=yB&v7v`PRo!P!`vj0?Be^Hr4JJ+*pJH82> zc(ned^nse?O^o)85>t1j{hPdHn&?Rbk9YnXwKDcEw%jo7c`` z-Q!xcPJPDGV_U8ts{FU_Zn1-W)a=>Ig(iJJraUW3{mAhJ@4iV}y{XpP=$LQlu;aPN z-p^0}nrGF{;Hl$Fs#ufJZfK>sUfA>;*W$AK&6aiCI#0X)Dv2d`B_-Pz-pc!SYuVJY zL<0xS!$K?xVmnu_P<+rSb!GBVs~PH?u9v);EY}&u89Zxn)M~Z;JApy_@T(@~J!f+c zXjNN$bLp6^+w|bVeI0EP`#Hx%H3XkuQWtEFmG#(Sl4+MVQEAhh&&MqOGCT~M$?iSt z!L#W?916{rW$vjf?CpaR9xLAnILT77*Y;=HE|dQZXB#I^+t~eT);GZkD=rCa4VYxV ziAyAisiJJMBA=6k(}J@V3k^CHd{4YQp=P+_6ce{wQ|M{6HG33`CK*mRXnaoY+DV_5 zsdv1*gx&PC7ATzYsw_XC*uruseO9o4`okth_eEz!W=!(q%;e(U%@}L(;V}b)W8pK- zIR*kwD?5Ydzcpi$+faNZ-~{t6^|u1=;u#IPzaE;Q zFj=Y4V$w_<1~<>RAdXN+)klx-v#bm`Kdbh0j=+VN%{u)(mTR?CWQ_IHRh_yj4lwh4 zVo?(LRWD*{v!KQ2Qr($H4hy#I-aebrj-ew%rRjm%sz<^W3C7`C*=NN!cO7b1*{IRI z{69mdq67Q?3r~ONT`k@hFYRr%*4Juj6aO;Puez_It(J1Vy&N@Dcg1m!CRIc6`BQXq z{Z{L$Prs=0(DGqV#J;=|=QVTgExf&Qt<)0kTghFYu1@&RAd*}D@?>G9#b4#y5uxe9 zleR8V(70rMu-!yx!3lxQ4|bemF1=E))Te4|tl7qeFfPJiQ0$0HH7kMw_oL1 z##zW9I$z^WlSlCZ8;{_LhjiNyKXP|g_|(9s@K9%xqhVVsL$Rm(`<4gnQ*@p`NO5ty zo@G44h5eAuS{84Wr^k7OvZ{R&uZGTP&UCsZonYb)a z?#gnQxi0n~i@$%A_7sawaXks|kYrz`W>W{A4?VMjjU*mUIP+Yz@Spk}^NGrB(;8bE z*%WG~^jJK!IBCQue0V?KjgIY17QXd)H!c?@xC)r`OQy>*KdsyU%3NW?&eopiXV*#X zztzR$aguS`VvYSZYt?_PJ&^s6ZMEIykTZ3{GVu}dzyA3@{PigJ%YTMfYs>fFOyfUu z>EachOK;OJX3aae+9${>O_;O5W6z%pS?^0YvY35NGp#oKy{Nb4@=+028TL5Y{@)oj zNBvyyA7P&IW0}FPkocg*nU@o5!zbJdxnbEoud$ceHfrGv?am2D`fnf8yKBEa=vIc+ zw{({kZYR?-OpGSvG|sRPsxQf#={n`MkEQ}kUhKMv zzV3ruOer%GC4H>!NiR)tU{*>tnYU}xmsP(7+jLij7p{t7RFz`W{OabN(IOQU%wl}F zCDrJTc+r9^CB2%&w`tRrZ#jtuHo1rt>J(qv$<;hd!ueLqdFSB!Z+qG};a?96AM`L4-)CN4iq?hgO?6lxe z?8avQ5}t)?IWBoM{|l5bwVJ@ISAC#(L(0Sjg%PE_nyW=sKDp&an_n(EQS7sjgJ-G4 zR}GUbTOS@c!E^9q!?~qfWTwV(R=q6b5Cm$9`iMlLq9`?ngD`{<1$*qp&nTux{es+&v)%w18$&z_j*evx9 zovprhi~VS+<2gx%isHRz`?fEc;HUk5PHC-erP{AuGVh{~X>Ul=VA*jpWv27N^8p#2 z3_KstI5=@|>siRp5xa!L?WjbemurE?;P{pS%-`tF`Rfb zfrCXqb0VjPtJi`6i}OdEm;+cvbo%T!FFdL~sjR84gCS$`lqtbF1yeYEOg$pmMFYIb z9A~O}CJHI@-8@u~Ij7@L_RTpArl-16(iZcC>mmlzf-4~W5dt98dZ|cwVO(kp1!dKkC7H|DGszLaM)DG9;XWxU(?wym_#w>)s2hUhBGP@9L!GI~qr{79B{R1LFS^2Y5<<#!>gx}S?wk5E zdT;*IsqZCnayeOJ{~X_z#q2*l=&_XIiS3_~Ew^kqzLL}NTE`N}E&V1_B)zvOvW76L z$lmZxUiIhTEvx#)+g4fU-F3fuM^B(?$x0r1Njsa3r(UuKEOVAS>a#Iomw4!rxiY~A zLxS4wXouPMU8tC_cWsCEib%nh*!x1Vsgqe=^VpNGqaQ&P{{ zMMGk)I!*3b#6Icq(se2mI2}?|%WRZhPZx^lROq#uWBPOR_oMHhR;GWgI zMbGXD(#Y)7a5+`|==Y}`q5oDEJzLhR%f~M3<>9LS-d*|X{eSx>+`sdmVWxDgZT;%4 z_x>I;KdWYwf8|!Zy+_eH7Hj!OY#MzW)rNKwM_)h7erdfzY|iNnQ-9IIOY?p#wrE&q z%pj}X`2IhGN#KEyhadklyj}3DKCpb7+u>aMWvfna_P?@||8lxZE7STv;mzt*ak=b| zyIrTQ_4=9-SnKrdQsIhHed7z013o^}yZBGzTFKvuJMT&yGUc|4x)bDj{@I&n&knyk zdhy$pwX6nLzIFvIYfY&+cgb@48>hX6?*gN=rrm4`%w6Qo9p$&;!iASVS$^*OJgF-6 z)Up2zA`zKQ&zX4cs4izHO}+ooZQ8w!+gH4DjSli#_aX4$Iyu+l!4vmPOx~QfElW{z z3)|&9hKkx1CyR}9_Xy5*nYQuQteEGM9ON>SYCmeOEP8lCWcjCCvvyx~PUh>qtg-Ry z9Pat8W(G|wd{TnVMH$Mr2N%xFp2LyxHsg?X<)Y|ViBtV|UbI=R;B{@MGa`&!f`2 zl3p$Lo?jvtt8jDWJ4I!?b6cxodRMOP>d#&_C*WvM@41Ig3w6`xYRMcp%FuI=$yoE= zy{)GlGhC#mB|Nv#extr|Pl0}k9Jkb^;wc{BN4x9z)~qklFsEuJ-L)-&bU zl+yi|3RJdb?T}-va69^7=FV436kMkFm}>Q|O32>%*8A39Q>L}%Wx^7n>wF!2W_@}T z<0j>#UXr8dDCOO;>|1lk#CdEc)~zR&Jd$X=!DMjIqr>c#XJv;u*EUofRM66s|| z6VhUCdJ(E&mbqP@JzX}M7iFxf_0p@VWbU(gBv9DdxmLsOeCZ9a|W6HmzJH>Z)KPu%al@74&&175=k>+lw zZRFz|adp{s@#RY%gw&|=&3msC{H^!&`u7>#clRCsDza)i+cgun>C=9;8a+3Tn_fF# z$9-q%^uWk)>%8~dHeK6lbTMl7%4t{fcCO4c3VqDu)J{NILG969fZv&!9gzE?H{iy_M@Y7J>G_UD}y@-{hCw*RxB#RxA&hX|_ON@*#aQ z^Q5^;Ikl$jZ{=oMu>X`(Lruv%A-J|DCpMQPq^nS?&+Im5=YvihjJz zqPpXnKHE8qTc&#JX8O(M^_uE8D{-OUnwBc}r}2L8tl5sdvhFfknl9D2n1QSCoJDi~ zw(htMOLyGhHS=4&#^8)`piJ_m-);|`i?%Kg>veeclEp$MRpe8i*|(>=&v@M_$hS(! zU8fm+C+fw{gcTAGqGSyg99vv|H#1kvbp7k4)>S%slX#4KeqC8-Yacr2%V)u}|MG6V z+b+#&^(~Zh%c2LbS0%{0HYj<&nX%~B{W8PJi`H`P5zZFdXmLI0eR=e|=kbNRRI@EJ zw@hM=;JhULP->P_Li6f_4h%=8^FPfLVoC1ld^p9`<8#@A{ZZ~!ot~~K6MJ_(zEYrC z*>a98SWqy3W!PLZh2jps&l)op82{LCK;%Qm&ekx&?ieni$#FM&-zfcVOtUV!z0PcR z|K&R@PqS}TG$|Ke{wu%d{rQZAE|(X`o{d_dF;lE#*Sjq`(6Y{;sdtKP#xTn@vKIcwOwH99&r#%L~gRB7pzWUW_O zI}dCL{joj-c-`@EelzwmGN%1u+fC)HXd2t*xX z+WT(a8k4R2v&_EtFU^nL`f{h1f$6k2B^TOSW29ov#m4h3&-`6D)Ay>`@3fmQMOIDN z%E+ZT@hOi_ip_qWFmvr)c{?(aIr-VW7JSUVd(JqoXx_7ofWoc=xz|3N-8ECsZt=|J zuIhp--|781)zT39pP~F=j@X}r_wJm(91(HJDc}lkt-Da?v)}Pq+P9~2O%K(~SP>t2 z#N<-(ti5l3q-|{ZefCr<|I1~Mp7m<^ub6i5T|)N#rCaTFD`WoZUR`Axy_M57Sh8&9 zyV#xVvjg9_y?egas;m3H+v=m6Z>b%>ta!L({`4H#eUI1unzVSu+xT_+;-7J+P1{g# zJs>l=zVo8l!ELemhvq%sAG*5S>C)ufoRwGfiZ*q1<^E?lwIt@PZeM@mp7^)R=H1-B zbJ`Y7#;|v*D`TW;wv=p*I&Nz(HqEtYXO{4-MH|jecRKrf!o6pgD%ApB*X)1&D>pUv z*zC8rzGp^#cMKJ0vp7=!O+DCf;iA39ftMA+6(1DNxp3_Die2x1{o2gFF5+}>;FdcE zibs-vB&{%)J@=oXVBO(LuXD@VrLMlu`jvQWM!e_#+LKFr8&5r1a&=emR^gv!YmZuA zp7#9W=A5eV^*N5U{1FYI`=;hPCGyXhk$7*{?u<)!58b_bU&HO4W{&)GRwcCH?6e!*Mx_H*Tb)rlY zPd&2#sU$r0#Gitm=}8N^Al6 zhFtHgP03qc=oTF9&u~#Px-%zhsl??wnY}wNO;-(FyiE1g((cUA+%Taf3$)TUpEA1h z%tT4lB|!D9VMOXxt$9;d=q_=Xn6`9UvS*KVyWy>s`*yBe@{;jfq2GtcI!V({_C$N> zOxmoO_OH2m=bf9E?K)T8&C5{Tb!h4v86i#SwhyUtIK2OEe>VKwORJk#7>=iYF3EvUbeS^LAqCGm6zO) z$Pc+&T>bKGbcWWeK7}dY{Y1C>ed)XDxAfh!&xKBtJFbSDTfD<`!DqMACHLOt<(}BL zTSUXqBiSXaNDSrr><8@itrx${PlP1(xXq=uikpOcdgjk`sJ%8bLcO9_3}w= z$iI~Ao~4B=j=t?WH$x<5Gk0L|tov%ePX}Aw&)Bvqa{JAIrTz~S6IdFL9uCRP`1-cD z=yK5f1zxPnDx)^3%y`;&cZGSidD-n!DYLkJyQ-tNhAg?|ecJWC>Q*iOZU15eCweVk zck5EN{OV{E4~8W>TZA6XPfZGbbTm2qQF!`{B`>$?8dxo@^2s%i%Qfw0KEFDw)yQOz z{$~l9wYmPgKOWuI@BPI*;?1G%sLLhC)L+|AFFHP9*J~4{&-E{l3*6j(>y1+1q9~un z6YFFzZr^_A{n~Z0<=d{6t}R#`?KdgP-B?!iR=r-Xr`r7X4ZA1iKX=TXDs{BJa@ni7 zlgnP-J^kzlY{Zi{{Av*MScW-!CvcN&b! z(`FR@IP8*VE@i#$X=2o)4_12d_x8$_aG$yMtYS&dci&Z8 z&ss^(RZ&{NzuJc{+4oW2qr35`doC{9w0LLq)zr%xS5_T*bltEet86Lj*^sAxv3%Xz zPHzv(jVjff5;8l|V=7N}KwwH|{-GcG_qz*bMJ#pqbx-eksqya6*5l6Mm&~TU-E~Ks z>3LUXmW=16SfAHXsRyN3nD$Ovu{CtXb@3~!j*Av8+Op&WqtwsZbMf0(nO)J0x?Z?6 za`C0f4y;R68sGBHGi=;(J9yKMm3i0pdkbH#tSH+y<i5W8)DmouCFfLf3ZjkzrFL;F{ zuSW0=_3Ipl2NDECrYyhD!n~}a#j3c0t3X!V}*&sW}{P?tFYmfXhLHN#*1bX9>?DMa|W!55Fyt z^<(i_$-02o^C)|kqndk=x&6%q78apK5vJhnJq#vn^B7LwXV}ubRJ_Gz9>TBcd6%T9l?_V3xK z>1Cy>c7A!;HjQbinCJUGleBlj-#T9ZXHeO4dG(I#g${QWeyhry^SYatAF6R-*XboI zV=i9(es1^8Us;dtoiAJ^{o1oat#Hcgxy$P}R&HJEw|8%uz^{a+1y!Cdl+Em64w<+pJj;+qSSMED)b$0G`|Aot{qP%B5y4rcX>F!pQm-Y+l&9oxc z@bqoEvFgjWpK%8{7xAymi8WksR{dMDw+oAew#x?A%aJDvd+!I8`d$w22qs8#ss37(#lm#qBorbCN)g@#??d|_cBHbs}e zA2}8Fu9%nDIrETu^@EJAAAL)nu|-Bo^%RMj2pM&~j|;ss>vdMDYX{@OgTX;_6}}(4 zdN(#PCwI;2hF^9wt`#!rB-meIQnFJudUNGZM?Aid3R6b zQO+ytCdSzrTo7D%prKR0X0dv~#`W7{uWsIv6|(4y%C{4OHzl5~;F!{!j>8&t_DpTQ0vWVz6*U_iCoS8!LY%ddGc_T2MCWT0((B z(%!Xw<*S1~$#nfazvlXZf2*!ne;4}tTVwZ%h->u+ufIyU_Sx{)ezvuKKW0m>Yq|Ym zI$){m+8um3QSl<+FdBuyChlH)KDt)u;`UTZ9yu5ma zL2IARjJxN5RN#HI>)ERfdse!1|4w_c%42Snuk-qkm-Bxny-fC-n!T*5B}C;@6W>Hr zyMP(Tu9t1S=s5KjcddH+teG!u`m^`Fn=DkmRBY+0l|fr2j~-le{Zi2ExL2pnPMh}Y zWmm{e-=*s=U3zqFuKQb7J%e@nzU!{Mwo<(+AA024n$Y-wMLTwXyDWKo?ZmIVwW$Rn zhdm1%q}IKel5yAJYRVm#3zkQXJJgoEew5X;{xy!hbq<4<+i$FAI`oq^l7-&-k|Tk3P?azmXpK77wQH(0ahGG&HPKYFNi3JA?kei>+Fi?aMr{5PzU@m&HwdiQu_VS^DsO4PwVnJI4)Lw` znWpq0=A7Z=oZCWD&w^^JrZN_LYfiY7Zt$Gt+tH1a*YRu$nG!T5_`uW9s_Hz^InlYH zCvEi#84XS;FW-D=(Uq->+!mEGt#f{)yq%jnedDsV8<#CjOG|IKEKz*urY~@!1CcX=~=PGb$RWB~~&s=q7$v2a2E1$7zo@s80 zczJt6ShmTw8I^@RMghr*@mYLZ^<23YY&pBaOZ`^YQcLdxJ9ixG)|xf#?Uk32TnT%< z`xKN+vZm#=^RBybBgj;fl_z`7n^UXz?Ko2_ZZvu3(skG8syf}wnttfXXSd*9tsc1v zPjr1P7AT*7$TV?Qw%w(LaaXsz;+nQf%ylV$=B#7UT6gYR`6(@)vsKpDfcL`9*5Bva zLKX&oXPewvEi!>Gs=%{TVq@gnHJg*29V;)b==1eF{%)7mjvs1!glRl_~_p1S-fFab+4p6 z@cHn~?bW?|OSewtu4AbFyE6x_j-6)u06*93)P9Sx~6pgt*{WTa8o@$9$70Cz+v2;3_3Z)rO zzv)rkz!el6V)$9&5**KBDy6`}KR!RG|OiHe6JU8c;6 zDOtd(ZsKZLUK1$s$BBDJ-^8$wTpm-BLsX3>Nft)TTV#1;o3}y1k>?WcXJ~Ll_WWna zQ8M3@k!{#tJde3EMDje}jdQolYftB_I5jQwt}p-8lR}uG+OC(~jorO>>N|;RcTTRWuKwIxs4Ex1+%}2}_lEFl%X<)V&_tVy3g9a=nd_tj-m+;Z1if6RKg`tORxivwDphP>rG zq;Iby{Ohbk?9yiw9409Ts{7x|Dc&14f8vumeppL^_vfUDv)?Aqd7E$2A;X~N_b1?Q zUQS7-8;6~ zFWixB>7gY&{fhr)i{t5MUxw{kadbUfUdy*%v-8zn{AW1jeQ{gK{L-kkZtU!QX)l8( z2~5n`b?erJ=YgWOvKk%tLRiG+x(3fEc2oDbvG3l}G_jY8QrDR$a`#tsmj5kzVR8A& zsU@O{N3>2A_MUye%4%{;s+H8*bz9x9>Dm>cz5)J*zi$bd+`emTD)H%o&ZOkO;l#jJ9qW7D1DH?HDocUFDQTc)d(w~X5(AT9RzhNE6btaI*Zzxi@-x!03f z-#VWj4)rR2EN=djahBwVnZ0Jew{6?1b|;u6Yzu_8;mw6% zfzxjvKAMt}`)jWA{z3TkTW5A_UNhHap4t#Dlg5>H9hXafmdeHOu{>-)Dl}EIv2=%9+3kw0 zjhkOt7nkmRJ7r?Vj;rg6xR{xjORAnNWT}kdc{XF>ob=esOCy4JGuXaa@bMt`oD20g zd!kBgogSU)apP>il4{9g^13)QdHsumyG2iL>Ni=3*W5hu_4bv=H#cl9%3XJ&{aV z)!A<*k7(rrMkb~P{>1paB%WnUD?}%qVhGmReVM~pSZjt?0E1ZO(STYDCx)3x3p#zY zI*ej1o^vdEX#e&UgOK8!>G!I8_>w$-%+?W*dr|O|b*4~DO5x)}(gu$HM`kSINnqTe zrVo4+B$;PD=DcI$^s$Ah@uT4V78M32uXVzQj~p;zwOE(^)v2k? zePbY>Rl)mK2KoP2AGP_ZNX}oqPrh$uL&?d?f;kWO?Amyq?=pWD(}R6ag&)hxdh}&2 z;*^sRQV6NESy1fJ#5FJU?+1yc;>`9E{>3F%t^_khO|D?&E|{h*H2D*Eu)tZdtip3f zj0$b9EO=7d3K{WAhVYXzbwRbC3!)?ou@RaE^P{(ommvCUwr^o|=X_ zg^gOt7uZ5=7Hp9bs_{H7aeSpwOovO0pU=`8ixgKZunt*zy65l2&^I#_*BxMqJ0c*( z#N)}elfg#erQ_;@MQvLa9*W8;`;2SOC)X$34Rv=n4?nI$r>-eXRTUCfng^GDJ?nZ;_2+<^iA*w>qctbO}BHf(9W z5tbMRu$3~nybG(Uo_$G z^QCXgqkmbqK0fQ{ViCL~jnOJB__vbY>{6@T+b=I(;8i%Qm*$e--kt1S@mA!4obZ(!vvaBjBRQubt`sH`$9OE>w_j9=H8&dtHc>9lJ z?_Ggwt3yl|mNKj;HcVbyaI1Q+8WTeX@2bZJOH&_Q7Pu45oqcib3QrY1mQ;;(Pr{zN zuiV{Vw$gmwty(p;`z&g?A>xx#39c87dQX4aK2+Con+EZ@T}xqOq}yoIH!P2}AA z&q;`Ot$n*Y^V+5V3}5*sHKf#kI8-UvGkf;kE7Q(BT5&OI_1%~3mTM9YUuN=qkS@5Z z`_;vj9-FT$hXxTBDX%TM)gt)b==+f+mmpZ+CZ|{^hI%c_V>E(YM!Mw`4 z*JbMpt#x{n9h7&OxG!G2NuT@R&2w&Um~LV6NaWS8$DU3%ub0La#`{`juRZ!{diuRqb(cwJw)bpX>A6cw z_1;NOwzKaFX7Vt1T#GA+J8?WY^p@wdsx1sHS{kP}sqa*LlN)f~ckB5T$7W7AU}9L? zSCZT$#W}%(andrwmwUzTol|?8ZfT!BwQiHm^|g2JpSit6DDJ-T_uk8m67wYaZzegK zIxLHAzRwu)L|x%cpehGTP|994cPP ztlA7bMO!A-ojiW`%i)VfMg~_I1-z9zJ*twVc1->EnL)6Dk>#vo`|gcmK`y@%MOhj+ z-fB)^Ho72HWB9Rw)ga)3noU?hhw}7$1)fd}NjsOfsC!J3{d?hnXWyg+g6;dHR&nHK zS=%I@V@xrbvq7!mfc`{2+Z`$ogvBPR>sS31xWve^=)*^Lj_R$PXDrGTV*EK6gpJi^ zGOIeyGvMV|q@Z|q-qau#bI-h>v&J5i5>~M2X&bm-6+AK5)5hm1594Z~{Q_Du{R$Rd zzBFrRaHmHAO`iBV_PQ*3NMS$ z5IUNE)LL=r4LNq!L+RfS2hZ5zlVa@T*}iG_%U`|A{xf*Z&8nLkpeA?Lw=#LMQpELo z9_IpHt9_aiHbwTQxV${kVl=Nt@!bsJ8Qg}qk6SuNanE6zc`lXBa^8t`k5@^Z<>>QG zIOUL1qu8s`4W*Z^b+{m)+~|yl{m~jPqBsiBHvq?BonypIu*hH||>8)m(wA zUQ_0Trgq0EtIazcb!zgAU&|i$&5ouph-EjZ3 zYYP~;{9NWNnt0Xv>|V9dT%}9jUh!V+=-u{u=ceA>odvNfmc0S$a~7?>TwT1q`}3|C zze~;C8CzCn&;Qn8&&?7o8hT@+PD|Ofpxp1XWj9y-o4Dy#(5~#2A=R+~>(m2PeXMS; zU!#3pWN+CzKK7|0smFAx`>v+$xLw!wa@R|1b_G5e?&?P?{xeLw9X&_pR{tfDsg7+- zQa8W7+TG#hyGH1v&a}Jkd4KsUH9O|TB&Xf|H~Hn(T^&lv5)1E^EWLNt#X5iWtG3ou ztxZCQeZsTzu9qFz_~dfQ?U$kxWuI@8YU9t@s&pr_Yvr}XOOxBpuAHvDq(e+EqkeT79@uC0INb7QC6EDZb?TIA`qUZZte|G8r; zFW*YI{5orQ>ua4info?(Pp}mZFbc5XxGwwe+4O*zx&>CVw{D#`bE0knZ^Vwbv(@fy z$i%o$=AdU?;uE`Pi7N~D9eXiV_+WXj<(h($bF&ux z-jerr=YsqG-T@5Hw#RW)RG4>eB2f% z@5DM@kxtI*4t<-nRpsX7#}fK4Ut9SXEQvYgS8&z0i-BYRKC`fCVp}vedt8!wZf#WZ zKrNwEx2TU#_hL$K>CcQEI*iOx*GvUOGUQzzO9xz%l2kSq)rvl_ewu9Lfh((*Gd+sh zeCCK-rb*#j`AzPd9^U?0lO6DqHSYVf^xmg;e$MIM{QFYf(Iz>Y;|l4&b(lnJ)Z94# zGjwsXDC)+#2FnGJm(MB|I*JZx}KN_iBlXra0){h3UW=GTJ?k38M|kKbfG!qAk! z>Lf5{_X9?TAV)S%-r^{+)Rj$4MfUrI=g2Hb5MfB0?2)OKpmM%UfZ=0f5oe-JKTjBA znM2FzrgDxvh8+TXocehbeHhf3_t$5xJ`^K-Ohhj?g4er$FKA=k z_4{A@y>5S1JNHB?Ut^11^JhNBk9f z@<2rKm(u3=W#6*3PyJph<#0FLYR;Q!F_%9btF}3wb!XAlj6Hjwt-N_(b$RflceTd( zm!rM&^5tLMj}egeNf0U1pQahwsdqf6AxQUc_0B7M@7>Ld+p^Y)ZH2?*n|}=bOYQ{o z9I%uLaQEViS2CLY*<1Wk`cB{9K?f6D7FfpWu^Fi>Tz>k9Z#vke9Fn<$!7Fxk=K;FZi`p!G&OW&SSF+? z@$SH~iXJw}=TfhqhFodYY?#+uc&Wh1?%WBdEQ3RJ0wN}hQ>P1f{a(mC&BnjQ(SXq< zWWwX$>C6TOmgOJcwW}q!70BPJZqIluFyn&U8>xpg4{sG!R?ofl?tAvJ+mQx)rVGSO zVzfB?ZSS_nW$SK*L@!}lUm+xuF591YV@=s^tN7&!rv0DgKZ^Ez+A(Kk%tpPirQ53` zQc_EfyTAR-JJ(#SYrCJ!`H~CEb0T5{cZFJSZZ`^Pd1$+kF=Ac5Rj-y6SA(lh;yRmk zSC7jHnlf;mQm&Y?F~;J!)qjQ$%YUT4GmHAM_+F3oy!2Z;CImh(`Rit0&amt33sWnP zOU4lw@0c(A;DCMTK!*9r7M@u{u6TQ+QWe8XuE}4j~+1^ zJ+tx)dGDUAZMKh=_8xWL zyROvSV&j|h9uro2Hizw%eEB=D)MnAPkAEU$1%ml?SFvllzCUx!HLUHT>c4`a(uC9Wf9 z;hOfF-QHnW*>ta!SFW6qbjw}8j5{-nPhk>M>O;}KdH3XZ-w4=U&1&^lN`~dY<;LLI z*L2>>i|lo|#Pd{@{h0QwEMJ?`B9lWS)>?P0lel&@c}m%i0^?tc#a{hqxX2&a=Vlfz z;;36T;oA=0nu4d(TS9hqz5H@J@PX%9H$lt)43DO0JnwV8w(*0Bb??>Kd$0QL=RB%) ze_4DzAlvhq;RUnTTGh2{xkMyiUS4*IgF(}$;LK6`rQ6=>*1Zj#wd#(=uY^a}R3_S8 ziPM!S+0v%G%g+NFb;otoZUlef(%Zgp|0S@KA{KZWJ!4UtO=5-rXw`c{=17~P-1 zE-X~#v~_8I_{3YD9E-OpZOq=X@YIeU56xw{sxzg&Rta)uq_XS^16{b)ISI+xFp3j;~4KA#?c=P3riNCU(=4uMJ zOv(t}r&v)?+#GU6fki^WCh5ZOv@I`}J!d<6{;G-p5uSwW+ZVqwo6U5kFz)kB!6jQ& zmd!ThTf*U9WVJBFS%Jl9YxBIb>Shxhg?yB=gBr4#1NSt*`T5`4+2j7S0PkrX%K4{xWjq<>JyT;hD#@ubVn+*j~zxIJA_X=kG+l zlUJ%s->yj3lXx}dCkMBZ(url)3+H96W=@J;^|JS=N>*i>RX}F`Hp6xAY^A*YMJ`+N zS{^Z%?w(&Cbnn#U)z@^Fu3Yn}ZBos9n=MD&OFb^tW|hv<3gay(n|k3ALzwZ~jI$T* zY}e1MyOTP3PVj#QG1i*yRd;8ev+-Yqrj<@!hSiw^JfGgkTCU&8R&c%|pzJ>Q+K z)a;%4{h`ITxyL7{1c@-KvZ~(hJjUQMc-}PVdmGAu9g0K~{6wmxPn5lI^#Zl(;{f`pD zk$uJu%(n_olr7tWXSlh&V`1Ef3^V?t)0f8{68HNiv6$zOpw-+mm73=ft}os`PJu=0S)gn54+c(k>gpQ(P}Z@?oJ&XCi6$}$N;A(B%jOzJ$$ zC@^_ms>K@3MxBa}ZKo2_&}6H*>UHuv#DtfQ>89N z@Ua=~%KEkJ>GtURVP(s07al2i-*dYA?CzpkFOyZ!5+KbEAtn4N3imD z9c7smQ_;tH%-ZYG+! zUA5gqSz(S(&@QLRuk%B`G@laLxg&9o-lElU%RUy|I^}C5b?`&m%#}S8+y1#qe>#}A zJlA{56rFdG+nuAry#!Z0TI$F$DgDPHrOEG{3XaG;e0s^p>3hQdeadRi9!fPUQiOX- zQl|<|;aauMscZAh4^0h1s|w$oVpA<-dCcw+GKop8`IR9Hb7!j20``W~6)p+$m0Ak9 z4Ch>a+4M-}k-@jW1uL0fnXX(R&MGO=5&I*)&_e5!_G<52r}d5(|FwJCe&YDlDyFW^ zmXf#29!`xn{blt1`r;Qmcdfeg>0IW`W0qR^Wv^aUdH<<+ z6WDc9R<||NWLwN~AsPo-OomuLYn`!B?J|jb7(#Dm$j&AOFye-$3J$lu7jns6u zNw$wQYWLi6S){e(S9b@i$D4-|`u2DIKb*dkwQbME_im~O^-MgcTU`En?AuH)=cc;T zQql_~GNzr4;rw_|x$qTZ-g1$UcRhAf{xe)&ek&;$e>ik#zszOz0Oq>cEi>Tv@qkX@=Exfa8=hBFmmYWY2`5Xz|?&UgF?y>KZ zJriX3dK0EHp0#ZG?AG$?R@HVVJw5FWTi;I6in~xC8kzU4S7GbJchBqv5-vT~@6xy% zyg78!&QhH2GldVS;Ly=s#8OX@|{RE;dId+E%&5jDz&vU-!JP0(odOXntig+%GrZqQIERhPp#?NSEY8mFWq+~x79H6ZARt< zhibRNxL3T(ua|0T@fOxPpAVR?ZD(#!yS!3eOj_&cdA^)xM~$10nztq^Efrn%oSpsJ zI`2+qhJ&Z=ejY2mvQMZmHRy;@>t3}>yo^1UU$$#zWL#>O6P%^@GWvF}FXNlSqimj& z+*aKPV|q7t*`#kT>z2>8D&5X*z4gqK-rQ@)_vW)~`t~P!>t&uLS6w45&nY|=>XrWN z)q1Jl$LI9CHK|;!TPou!VWV;ThmQx_iOiun(7@?gKDE%$0!)#e$!;T)N7+JKOa^*xc*lEYtQ) zj=CVo7~GfQGR5kC{<;UJ&Rtsa%2=VG_u|XkuDkP=M(I@FTBo>xpGUEGX?6~;wYA&E z!?FrO?OT<0dT~41@2*h&$~5g^X4UziYpde|d}gvvZ!;9#ck-{>yKOJsKg;_pUeTGA z?Roh9t7mQh8T`^Vm)&BUn&!DxibG`rL*U0`>9X(PWZita5jyhv9|`8kd$`SP;HKhI#3d zM-S~@99r@0SD#LSu9oQOrj>`}mij)*{C-?8J!VSQWk$htWf9p;2bUf&T^{Ss%WGtF zOt02E+_h(p=+m_ssw}$;uOz!}yrbCsrLdcIqm>GXNC z^%t>OBKvlxEz_D*_fMX?U%BqS{y~Yp>U+1gb|`LIa(oik&O3KD&vH9-soSKr)l zaf@}l=~K+=;{RnF-Miw@e+It^j8mpcshi8xA7aV>@;myc*tH3H`G@tdS?;-g%52%! z=~FA7B`O}hq{XP+VWA?a#ivoDut0W2K}671m6f5*$1{y32=*TGPQcGeGH<)$Sx}c$1 z$H_~E@i6nuBYihGm?l^susp!T7^AkjO#! zRuu%==csKg3Y_>xKq_FxGoPT^xf4>vJPc$vFnDILOgv+q&ZB7AIceQGZoVm<@plpv zWJ)IO5d8X(fmfZuT5#to4v!Xwy%SQzf{inetY8un32wioJw2g~IbcyjP^Fqd=Tiwa zg{dc;ju!Y7pWY`nRfV5tO2q-gUCUMhJ`$y;(Iux<@_4bR0JU-rE?yMIxAc;Kbw z9&_VcbPDrdM^Du~w(MnY!LM@DxU8LLRm`#)w(l|zkM)Ya6tYc=W!vfrp+Yi?QiOuf zikhcfayhgvbDE6OJ=>-7Gk4Ym)t#DlXUCu2ahf4}!(X4Oes|;T`W3ndD>Hf5sfZ-@ zPtxF5EZMLkL?O+{?nH%(&%(Hm7Rhs-TNbdG=rr&$JA_Q1l9ZGuxNTM&=VMcrQ z!;BRjSEY=OO!c#!sC|3kt+<6;76Ody$E3e`vPJT6eCS;tY;jflfG?j*M~Hy`g-+6Ji(G2k2IW&E%N1IQ^J*t?~||{|wp7 zvO}z1vgpchPl-6|m6xD#4;D-Q zdvW8s{g;Po-gPSuzwDcnFP8bNUbI5Q{8IGc)k1IhSd*>x-QAv-Upn=xk>n!3B-6*u zv+v#(xZ+&#+?=8C*J7>v{~4+S53HN!Qyb`S6x4T}?Xu^Vi|hM2P8kJ-`YN^WE)jd0 z^El}6zg;&^FTFBxJwx)UnTLPaT#TyNxv_esZQI1dbvvFWBb$}N2l3F6=s<~-kG{H{f*|{mm5pVZu>8`jGg!B+0`Xa zGG{QZ<81RWj(8Pqdt2*5toF)YMV_X4_K*51_m`a2UA3WhX>m~Zl(tRJdSYu)8b?5)9bniq@buhKJ@J+@S> z;To4tG3U_-w*E=Sf?r!*JQ=%e-kylGg_ePb{3CqD%)VFX7xup5ViDW?X+{IXs{J>G zH@!RgYi~GLsX@*$nl)9T?_aoa z@t;$T4<>O2>|-kw&0D%<+0#YO3c`~%hONBZw?^#VtIcYj+$(o_uHX|C|GPiz=C4`L zudQ%WH*|WVGO;w&SiCZM<&|w)Lw8K@`QZ0s!dKxr$1W{kD0Eo+D@-fYZ1>uPU#1Mq zLdPerw44@bE5*MxV^!uA*Gtc(@^~`sD}|J9MNRg!F);spn|pohIAd{mA;)y9JqVr&u25D0M%xy5qp^%^ZjK)-TWbbmgnyQO`*_i%x`Y*_v9k zsB!TtuKTNY=5FR%$ZpVQl=0DL=c3w0ow212Uou&j9rblF5wGXCEMaCgf6c1jovRBO z&U$inOFO=5zqssH759WadrsI?GM@Iy?z;2gqJ*BebKUKu%QiEMTQYvp*73qE8QSlx1D!2}hK!>q!G7#RdYbWSNY>bIq^F!=8i5Y~0pJEPj8 zWYIEd-jzcP94;=aO8I&@LlXcOroC|D{KH#hpRk9Zs^3 zZ?kBfSf+VMD%H@S*v}x!OK&j>j7VphDqO!|>~ z>Lz=4;`t|^%xx!}OsmrO`SNyR_`5&;s^7zd%#|2T<4d@_gp~?gK5K+rnRkB4vEU{1 z=WH%n_9CZT?cxf1t!)Y)KlUy4TWY>jV&Nu*lL8<56n&2_31!N0+B{M4>Nel2+jp0$ z2j#8m;rPTByZ6FlE&CF$^(-EzI{8e?7MB3BQxJsk3*Yx zt}NeUb*p3_Ecq8((j43Wu7YpR?L?(oL=pptiROUQ{d%<1M_~YFWFkX``yx|*=grP zr%ibkwr8bkDp#(%(6f`KS8~cM3X2{mE_>7$`$F5(%S+c&WnSK1?$x)R{n`?F=HXfY zFy}{?_DD-^S?92=cKQJn`TOuhTKRD;I2#n%$e%ttR9p8nE zOH7ha)oGkuTj=kZ&2#3Yd{AL;>De1gF8b`so1Nz9tS^(K%RYN$cX4E|>B=oz*H-cc zbahF8`RvAf_JP%Th!|+WBK=CUrs{&JJsG)rwA6dzb*J~%h}jXD>K3^Hcq+4 z&Yl%_<7n;c*q662F}_h$7u@_HFw^_uQ^xY8$S zkxTi3m44j~rK$xA+A~63z3j}qlU8cas(8rhrt)3*tMl{?79Znp z-)CUx{Gi>R*ucGI%^jhl_T3D3+PJ%ZIEQrZs-0O|aDd}kQ<25O$8!`sojYfE3e4@~ zc*gPWT#RDD%s8Vnk9!s}^c!DmifCkGdUhDJ?T5p5Z)kVuqK`fmqLZwTBp+ zMHsoe9@NcX*vNTYr0^7Xzo63O2cGX*m-$+13C#($aAsU#DEzQfeG?~Bwq}wl3sbxW(t$oTBMD zk5t$kG>oP*ir!<JvtUU6MDpf0)ZCY_-j+^=v z83)!BKDm1t&H~qcgq)Ypb7No?JbqZhLC9^UOCe4SyPHUj+9S2(6)eq$GxJ_E6pQj z2cN)mt;CjvYZU`J1!}7IB)$lYw=ybHSeAW|&oJr5*^}3ZZ_Y#A4gwVd|dvZ=*_X|Y5U#WZm+!a?N+GD z3I!&mm4U~mE@tLZ_F9)Mc5mr|tjH_6M|d91j9ML^c2~8eWt#2A^Iw*&v|GA4e%JPU z)_L_SL#9hUw0zKHlzSt(G;-B~Gbe@j8I&!0o6z%Y)ugw*bJ^L#S1v!hWUhtehi|TX zUR|?q+1$5JpIPqO43Rl8j8Z)lp1c#BS#aEP%hiQk4NR7Pu@jrKS6o`?O!eFHhWv_e$RC>)6Jb{UkWK)a(>IRpINmt_SLL-xyriS`F?&mgW2o)RJR~C z*J)S2US1-#_;1v#S3B=Z+v;zY5xaJ6+rvVqbN6RGdbU{iC39nKI9u?uKYsoGrF-A5 z_bR!&?q-(IR4tz^TM{NR%{JLG&nR!X`?A)K!Z+Fl7n6Q%KIir2r}>rn@meo>S>3w3 z^i%V7y^8(cebx!iz8`igFDoiWQ%BqPPE_xwW${&dYb#2*@&oSrWIBkSx$xrR`|4cT zdso-4xl+NpWwp+&r^2%T8AOT`UGBXJyp|Wa=8{RM)x>>mZGYI$&79n57qc@tHFNi* zXIEGj9%VW!{<@3xZtK~#M~`kS%viQy%h{l!-}|d%vrDV<15AH)7af@}saL;WY_<4I z|7R+5Rpo`(Z+bU3Drw!m0)Ykr58vt}v%A}`u$~X+yv!oxbd>4x%2-EWiG`rM3(MNgs^A9r4OWpASDy80!(@!J(XP=4)W6}~LRUUU7fZC$&aL*#Yg(E|2E2KMi| zmp-qkTy}G3$*y$&=_V1`z3ROj`+S$qwptqdz;eZ<7%!p6DiWQwf@}8eOO)HTFHPq7 zVUTES_t#c?zF3i(tr&Uzk^-!1r4B)gckPPekq$lUhs zK2N#Yyh}C98;>dny}kGB)r!>Jo_tSSB_^LEv~rY=IpgKOS6~mx{|vkdfU6k_nR`k#Cc}FG=F2c`n-p{{&D|V zR-$Wz(i@WoHd=RPc0=-SFsI(}fOW z&IA4&tSLP0nFq=aFdWEIXKrj!V{_)hW}R{^{x2Ty7Y;~ z#)XRQ6AFbq6(h7bjTk02Sbm(6dzB;CziQIrmJJ=9<)W%d z;Y#qHx*=suE z(sMhnn2G0B8U9;5VSua{V6WY-QCg8PNl!<>3HiK%-6Dj;pPb?A5r#wQV$kIBsHsaGFJxw zU8f}RaK%E+>D!bdzt!-hPj2|w>UktIGiwru1DEKV#(+r+9{)~cnx!=HAY*9fX5qRL zzbJP#KDLAf-{vU3mGOG|_@*F}u&U#2q4g<0w0kpK?VM!w z#phRFF<phPFX=g4Drvr@Tzx6!!mI3USKOwrN|ms6tzTOIab@2VvwQn} zlS3axe{Z<&^mJu|;hETg%bTkH=qXhDl&pQU+E#i4*IZVoZ|+KW%4;{>y0+Bny3beB zNtzqAY$o_A=p=rbT~su=D}Tqjpq`w{C5zoQzcY{K^$*y=Ie|<4D(~O)m*whr;;-j! zovKi}_X$Ht;?WzPYr0b9w$-*R+!k&6-f60y0n3MFheh9>F1ur1J-K+t^{=b^{d_VW z95bJ~@%H@X)@v>IuFm#f+4*aVZr+XU+owBS-2L@#Kz8&SZ?CED*8esh)DoNjBj?ck z$DfZHzSGmIz8CXdw6aiTlF!M)&*g_bjgNCUD88SwRC>+Fw-J4}1HD&Y{l0l-{p7OW zcZ+<#=}gT0S)6fu-JX}H{N1;I%~>_s{?ye#{vXO)YF;e){&w2dS*mWU_B=6Vy7=Dk zq+QSk>t||b{D0N$J}@mrUiSU7*x7p9xZc!B1qK@}kTGgK@k9Du-5Jl1v3ke1?RzE4 z+qXM^o9(`b>sV&-W}k?OSOdD&eYKU_1TBfrOUfj@_nF?;ThCiJYsswz!cJSO{HGi@ zU#Q*!vw7qWO;QGe$3<~1R^R{i!m_C14BJY$u~RdyjkmHBA~ zGZT)pD@U)eJewFgk&7>VmihNyKK=#n+J|=3-ARu3_jWg87kkAe$ds99@lZIZ)_vP9 zDebcxr>*Hf`K@-wi_n=qmsdE4cQI@_sF3tqQryl#R9$S&VJJsq%@VNzrRz=PXuUYg~&eVAL_v4QB+qXvso=UmG zJ<-BVdTqDmmW9R-LYYfzV^=Xzi+5Tw96-ZnF4#A5*m6d2!mU$5*c|wokh~>sF2ibEnD#fu?4C zi91&pT{B&}fNe|B`Xk}?Id;dUFZbOwWm@w=M%RwlSHEUSDYH$m7F8CR_w07skE5{eGw|#eYjiT|V2KJe!W=VfrwDa?tpVZ7nQ`ArU&N}jN>2(7f)JMrkZe;Z#}^O~%PvwAeacyZCx zS!oUIHdQ_U#JB0))<4s`?Ms#QQRBpjhq24AEQs+d`l|W(V|MR`C%5KtUi>yQHL38p z(UX8lt%eg_LryptGu9m4oN=V?G}GLu@Q~=j+`y?XEwr;P6;yL_H_tw`;nST`S%(Qy zPlMYpickIC!zk72pYhGXsY1kcJ~N-u5eD~?Jqk(_P0Q0bIh<1FNL@Z0qQi88*?6M% z^j$*V%O)P;b~oZ&f0cnjXEU>ik5SW%mSaKAk2M^Nr7AoV50;#ZXg%-<2z! zhg%HRxeDJF7WI7OveC)qVb`KWCjWg5QZ0%lhg$Vye;i;a+9~RJ$oY_*o5akd8BSW3 z3?C0kNlIz3czkorTGILUG)EDGn^aK6svl0DeI(Ue7z|cOFt}AN`c{5OPeDy}QI@}) zz?X><%=2OtclO-cvAk#ogRqYi)9QPo;)g`^Qm?L9l{8Z+5!8dc`lyKI*=8RWu}AHa zqHpFg94gRNu~0hU$+IMpah_3##muMDrh?KPA;u^C^?o|b)VS42Bnt^GXJuAp_;*o^ry$M zU*hLxO;=Pob4JNMDBG{SqT{IL=~5H7)rDS~ug==NdT#Ub#FQyPRrAwoH@m*wUFjjW zDDl4d>RnYGMvltI)K+KRx+bbIt-Y$(VWY2kXjbl{qIkJCdg`r(@rw$r&Nv#ano(fb zRdavOy28YJW?i>a56>*PbjpdEnw2w^wX+d;rVL zJQa!L{M>`HyLV*dL~q>Ml&)9)X7#+xqrqFw9}4oZQG9TQXM@MJ-KC2ztX=lJ;7XI> z>v!d%A%>cpn+ug#(~2%N&tFxTyWBIsN+qtUc!gPS*V?SvOOsCZJx$fh)vuh+`0q=? zoj*s`yBDmk-EwQ|(o+(@*Ckz@)E2Cpar~{FruIiB?{dD@4VObQW-@i=mH|I{sy zw`Esd+bL7$zVhDA!gD6Ap$2E}=G_gy68*UI&iYp&c_lMkr7{mY+nUX|?6E+x;*dns zsuigg?na^EyYyNVIo`I2guARrn(|yd()0A*7KH*O$CL^cCAFxV-=FtOI4)`a(4rbs zdq_~j@no{^D|<%PXE$&TwR%D*1io-rnjYbEP>_Rnu-w?pdt* zE%MO6h`6fRrGGbOgvCg=o~df=b$)PSXP$`U3)2Njs~ir8%ctuU-|f2gd$Cp5_ryC= z%r}GmSBaRuu|K`${LTR0_~Mh#ugc2pb&R_2!11Vb!{jbCp+0|A)iQr~=gr4gZ)U%d zp>6tO6@x?GLd&8Lf?cm%-#?sHR$#cPkWW8@>j86j!DsnzF%y|JgIRV6pE;l+(!%n4 zL)Ycyjir)nR@9m+9$8T&!`<>YKx*CE1(Pl~nRHH^wxpZ&tX!;r=4Hi0mE|+8&JK_| z5PjxB%qIbXIY~E6J2}_#nr2nad7cs0u=$aY!y4&o|Cn~mg`6^*CiI+l-8GNV;lh$h zJO18_6Wp+U!L6S2jAv~<80Y+x*m$aQVu0_HY}t%&rUG+41QJ@dJiRM9?c15cWDgf( zrXr5^mp#`nvWOhg**RDOi z%KXYlh*OtgYG}`|HJ7dh&pNT&M#)R6)@`?1^O@EC2^}i~IiIBlIxb*dGFh!r#sAH- z-nFfJv%X5Y%~0B!uBtrQjc>*LBGWBe+h*5#o%G#uE6esAM{mB_irruG?nhtvx3uc4 z=xeqvla7C(f`7LCGdG{N#5(Nm$rV2~^|>C+fFh?D|vMjJRhZ`RX=X4 z|H)YRpW)qZ+1dV|)@EN<-nKU2{C|d}vLBxaxjClayeXId^J=Z%=XLKgI_v$nZr?U9 zPxtGc`F|_kxaNB5@|n!8o_*!{jE1mep%kVA>{D7i6%C$G@KnhRP-$0b`?ASanBdm)o-QEKPV!SX_4Z!NYX*|z=BUJos?e0@8p^NE+sWjI4-3; zcYEpW?NxQ_t`_<|bkNK8w>JE|@mAdVM>#25OFHL>imGfp7#VnF71tJ-SEdr;u|`o3 zr!9DPUSo4^ZSCHh45zR6p6k$Dw$bb0{<0nMpI`MBJUzP0H@Dt#UQOCQ_TW{ot_1I4 zNGohwBGgjm|6AE}^Vhhud+TDQ_AlRU|8Le#-WB!CQ_rrLUuNFVxSqE_VabogjtPcK zO?86T7tTGpMry(3Gc5@_PVeJl30JffQcRww=DfqnH@G}A<4uE8ae<0=i@`kCW5Fj* zskjJDS}#!0Cb06RLXD4MFK3@n$m5$VK3c*G%a5@!>=aPfPyE}X#5qqxV4fyJd(w{P z4&^R~^GXZ!B854Y&GhigvKA7ayYxh;fVDpd8$;--r^*a&44MW!*F#QD`Ww`$?$a`9 z!KvC7$3jnQqooHs|JNQ(Qsh&4b;VzpLAd3&K-uBf z7i;&r2dv^KRf>2}7|0``-t}DSn#lssz>7Tv3TMtqacLe&KPL0+(VVoRH*GroZ&XEN zt_WV;7pS;xpR;aJ{3@%d25WWwZakf@(f{PW!jBEdza6t$s%ak7e4y^`$F_LkID5xc z@;uc&?w5)urZG;*$z5T3Ad7Q3-yJ1((~<^}c1~^kIQWKC_2^Fz20nA^|ZSugtm$ndkFW*mKQrUZwXPb+*B+jvf^mc-Rtru zTJP4^UB9eXE3LG4ZJpb!t<&Xw_rJLG`~I1lV*AdRrH1G43r}r%^5m{o#`agmn-gy} z1(mMKe>0nF_g<%_R)aGkJ667VddN_|Flx(d+oLHB3no~)D9o3!eyAm?utG>MCHI)6 zvhKZjrT51Rw_dbT+~MEH@R;vMIE$w85gFSFmYd!=T6lM`25U^5`%Lq9(}Hic0-P$X zDl>`%9vQeNcKo~He}R+d)11C$Z3w_pCyQp8@v~=R>W3A@Tp8M{5bZy>$hHT%wHJruGPT30zmtk z?&xmwf~^TGRlN)XT&um#zvz2foaW@ z#(NFi*|CCKbx%fxvg{L^Yg%{Kk z$7!3zW=GmBk4%``8j{*Eq3cD&SJn@V(#^*-RTzqOtS)f&18btm7J-k$w*WkhDFb=lurDOFqdAC!&jRm}HX!^ptoWb#-0^0nJ?x82S)?bx9G z%KFUrnfn%>zg4|+aZR-9EFR;&cBykGu7?#_hVM(+P@&T?=h@b!Uu7hHn4a%A!PcX#?wjvDT~>3ywm59x(t~}4Uv9Ol8Beoy{w!Z&ZSI=)J+~s&{i4|yu7%el zekIzs?6a9M$w8%KlICrxUy}3Ul;=EE(3!wv!IAaI^HA)*6S0>#4V#we#S0nCS)iaU zdAebKi-MP&&t5_ z?0T{|?n+yWz>)@$$(m^v5-JMrz6A%>J6kSsF)a7b+34wbsne5bt3b1z#zw`4CXdjN zIp;t8I5?4a$_E7xzWd#08B9!GURsy){AtL+3o)Emyq(iFNvL@~W?=RXTzEF-9LtYt z<wiy7*z`JY)}D#G{~C9Gx@&NG=I@7@ z=RY1-7MeCET;x=xAi40+wDsw@4kJf`(AR@xvg4nU8D4~ zkM~V_QGB`~#BqA!ISUP?nYmtmU+&%4brZVIw9JbwcH-U*uj3Zp{?c6fYnSizr6tT) z_ijy%{?D*e{P1-4mc@Q|xl)g2OPpFB$9si|g}ujARMasxO+4&i>Loc`59=f-* z)jc`TH!HqlN9Kx)e++Y%MztQ6jJvluJE(wlYUU2x7Ma7#HZC&mnVNK2bBa2DC%@C) z1q>FC54fbcGgUwFjF=c8Y_ybf63>%1sV6Q*S*_ZKoK#%$7FfFPIm_8H>8#IzCa0Na z-DlRnM=!e8xEQ=3xuC^{Jbzy5q~<+k44E3{u`@T{_RV>H^i zQ)zAUm-aOGOpjOp8BD)zpH*UR|Ff{Vw)&Y@ z#uMAa-(8nKYY*I=`tsY`Z(B{`-YvZ{dzoME?)o_&ii&q;^dAp?b4+Y?;o_i@u2k#w zbxARK(?!eXwmrO3wkzw@b(3?`V*~7b>^#I|x|m00`FH2e%1qXut@-754>9tb^;1mr zHZ%~>Gj3j@B$8gK>Yvi1t}$ta#vD}!Mz5*IZ!$az(3m2x@z}C)(jlo*nczJK7-Bvj zQTTSqhr#0EV{SQ(c_$p0X8BxUJeJMCtT5TL%(*;77k z-ufy~H9Jc(*h2vK2}yK&{_O{GT@eV7_n_oY=iyVslKnODEE4&5><>w1+@ zuclCphg8%>TfZ{ij+3jFY)<*I_`;RE?P~2y^Zh2Sua3Mockgb-D=}ZQo%p+~=AVAo zV$Hj7_kP~hyY95j`s#9+f#a6R55wj^7uiprpI$DeXX|`Lm;augqwbQI|7P~jUuN{R z?)Kvqo4O;I-IzIUCmj_kvYfLoG@BzOdD{dA1`Zj;tq}}9e6=@^C32`rnER*hHNO+8 zxaH;6^VNNfYgv!qmAcrp)k6M!+;Y=(YpcTUp7gu(Z`n`d6`fx5KbY)1W^qbMWM1vX z9k1qIcwhbP>8{<}tKJujTV+LEvR0L95-4K{5y;|wdBNkk=YmOH4Jis5!cET>Xn35y zJ;kC|S3xeQ`$l);+cuX(p-|Q$zk4?nyEwUZE(P?Qm?(GoA&0L4=S%~JXIplzV4BLc z<5^|xgm1E@76KkzY7?$%8mu&aYY@SxBG8tWt&t>mh24AE{QZI~4GXNN9(6pTbUv_c zi_)UZBQqB{&$+&J39FyN%ta~bLXixmJLY&wh^QqB$ykcbTe$R6A)`xIOGI*KaktY0 zk6BMmrYPLuIl&b!bK(r208eE&Uy=vQV$TWcO$Xk@{JSti{yxX59Xbb9*I70me%0c0 zK#MJ@K89I{FX7F^VmnU#kIpAmk2y3e%<$e7AkpwjMKe-O!a2ol(gVJ!;gUi#RTz{~ zygnDa{n*s&uBj#F;H~pONMM=p(J(=$rw5muS^4c01Eag(>M2?i0u~($ySp>b+BL9r z+ns%P4=bKIepc|y-BZu%d6HYMtUnqsxxZY3)ns)?_67!DM+@)v9E+1jujw^qt?CWm z^GeqKy4mcdVK?)7bLW0!>?!)^>pUU5-#EtqSH59(vA1}DYgu-GPl37e;R!-7eWsk> zyv-~+KH%ML+rX$x58q|le9^W&x8&oDVn6xPkDKlrdv_nLO{<=h^09g0%`Um4NB(kh zoDFVcSSQ%JId9tSdw1Onw=u3=;jMKkz}Kpm<5ix|Kl?*3&dgFvtUW0`Ei~fFiOYWd zjo|Mn&qqx}C1n@icQyO-b2)6MqTyHu?s`P}zwqB-ldZeGosen~Im zO7Il9V7VE^rK|n7Je1k}EZrxBb@$b&w!R7f8CLjjZWHvEdK`4)Q>C=t;gagW(i4lu7SEYY)(aXOG%WlziiEBzdK`GB z(wZn>IPrwe2f@Su4wbDQ&(8QfzRl&(GOOn^^N|Aarl*QEZW8Ah*afM9srN9_bUyIQ+N|39tou3OAe*n`k<{=I=^gPsJ$@T0NI^ zt(arL)1e`_$BmmsvA{mlKTmyQ;;t!LuO0~Z*9drJhHyE)^0rS8_-1^<~?BFa2}SC%EFlvm(*|3_MY9v$Rcii+1d3 z*STfH6R_*|qPe>o_vf7X&+xrFZK;<6gWgK>zsiR;*I%mtH~skO+7_pdbx)KncAO5_ zT2{7fInVXWHwxaa`nmOn+=Hnin^wzk$NFqPbG*4Rduy^ZchiNln#Lk#UyMN*BL!Gt~1$lUuFHvwg1`sJXfxJ`-)rjSMp=Bf))1N zyUZi7CA8w|f`4M}W--~Ts*_et+a+6iW~0W+lb3cbos}xIwwLSrl-FXO4W_eF?8G+6 z>~vcpzU|5lSNFZg=5p~B2`bP3IY;KkOZFw-)Yx8b-CLQdx<0dLos2Tq=fs<9Pv8A3 zT)g{=;H|hfEfZyK%zkw5&EYNLW>@PjFHdyYp1SY8-;7-2z(cczIueWTMs2)uZ40k| z_0Gyjv40&eych5I&v4h)-`V%JKyDeo;=ShJpI-x`e>Jb3I{(FghG*8(7VfCI{PNwL zJ%u-|au1gzUJP6MHbsYd>RZOFy<556<~RG_>sxw`!>a!AtW!>_~x${pF=(Jh}(zi#SN44A+) zMTmKu_-s9`#0KVY#;2~rhDwYk788~qw4HEQL!?-}|4QN=$5UywH=G?*KZ`B!?BP1Y zBjBjdYPby9}n#s&;{N(NSv;|vRuI9M!iG_=;=YtUl+DAug5E_htdKrM8Ymk(!Cp!GFI2akY9 z)hCSE0vd`v7|sWI6uO=eZChY`!b7-4Y}%ynI~tX`49gD*tNxDXF;LX-P`A6mF!7v9 zQ&_r9q6yoM7OQ8SHpUa4bzJ$m`SdZazR&ks)7g||JXF;lALYNuTh+F`Q}~c~pFo_N zvll}zM{BW`TL)i}Z=eJB?Z@0}t6NUjpT2wc+V7>Yo0hD&e>uu)mv5C~%KWVPE7$)> zUb;fhsij8wri|99Ps{G^h>pJ;7<6m3R?B(MaFc&(pN^Mi-wa%{*5g&G+tx34)VO1q zXF1j0cvuay(=vi*ANS{9luj-#(OhM8 z?pCnBr;yd^mA9;#Q?#c|Z=A8r(?4&<5#}vk4(UQ6T5mXYB_G}wJd@<(rPUc8B%Zp< zS7*|#_O`7aPOB$8=AG;%Y>?ugX3*VoK+CNl${~1?ryAFz<$JO^MSE3OxhZAr@h@_o z7UlI=^EAsLK|XzVKbJ>>#qoPKT%GLnAVg({)**%!F(Lvpp0}{F3c0WtGl^+*?U?$$ zWzw$%fgMv%Fz`1tFkIo&5?LS++rXltP_lp{sNjI?-Y4}rssZi%YYvt4Qj+=i@{ygfPygEAZWq9z;yEC@DTQ7Uo?dBu5*(^t^ z+y40eixn@jkN$eOuRi#Bc-+-tS&_aL0wa-?H1I?ggrF^&WgX(d5;J)mcGB#fBHxOQo({($ioZFA+77%Xbq; zWA%d-tE#R)J%1*4_quJL8n^A%pO_lYUHCE?Sw%=KEaqHcm<=a-RTNqI} z+ChWmO1U;e++{hNwLxpErfOfaS{+)w zDx~Oqbk}Z)*ZXa)W=`L|biZ%(6~3j3Kd(3VNUf8fa$NoHmH!MM+!gd9Ie#adkn^nQ zN|uyUIKW$c)ga}h&yJmnLJiC_KTct1ys@K?{g^;|(T*n~PlT8R^-g{~ZlM_D>g2<- z`+>*9rUbu$1Fj1utXRVO(ZJbBC`92GUsYmq&!GgPhJX_Zd<&}mB?Q`;56lbfQS00u zrtnCuXpR`Wz|IGLs{RR0iYyEVkMUF)|N=PkDwtY&e_H&fuuvl@;L6!RiqZdd$7$ zpfeBKG=V-H#bv%~+$&$Iws`pKI{$E6%rLF}g^&^7w|NJTad$TySs1`oFirCiV=u4P zq>!LM)&6Z#`#CgL8hLddzVNd{W663MyA{1h1iJ0)cL{I?Og!Xb&UYs9z{hXp&MHfG z)>u4cbLG?fd!eDy=|hWv1t3!ldc+&;^gCVgKSNukr~S{ol)?}5Ip-VzU5>o&fn<5*7MoK~ zn;zYftV^|j#`bM*-+zX;$JWWbEYCjWHBV#tM5)6ccFpCU?WZW*v(R~hTI;0bSyJ-}{r^k5CU(jx8NQxZu<4MI@q6Pr$>HoV4ea|YuO4`L*;u@|=z^}##4wBW z;2X1fuPwYJb*k&x)__NaefKMvXD{(hVSnG}|5$-F?w?s6>gcIIUF~zk zqKPZc70p`C%%`X8;K*g!=urE)G zgvA1z<}j%&Kegik^Bk$?yM1mg_f!?pbZywPt8j+qL=RQ36_FnDt2yM-8Cr{uYb0^1 zFgOXRt(ft##blA_oHnM)L(?XhdVH|=(G+rIS+e^?3zN!91;*q1nA7+fO(YM_aBAQG zx*=p6&e*XrMWaK-oMSk*6YG5v%LMXMa(7+ z0|!3a&n*Rf2RKE~)+p*s@ZbL3fl;#cDU)MRx&>Qyqr$QYtKYV?tPV)7vg?uPQIp(pusP8?>D4>32j=vnwc-DIh%-3x|WIo7Sd>&v(7S>mBv zk-X*Ebpek4gg4@E@0h)=UiK%~y-p$U-f?#KZYGhi6{_O=lE=Lleh&Jb`l8F4?`G)z zt9oDA@0v`=T`?(h$CmoPM`P`$|7YO;x&On%%XhrrT;9&~^mFmE&1-t=CcmtDS#A}* z?xDXF^P}wxKGc6_%UvD(mtWQX+`n+fPupfYTw?NACHa`~@)gg}aB&Nvn5D&S9t>S> z3{Q_S@szJv5hdynclf5=3AKfBiDy<69Zg9{YJDQGVy2(ai?XH9f>Ic{HZX8EFbXIf zOJk%|G@YpCR+S z>N(^83_6Y-Gu|)>PhF>5;bf?y__#$>N{Cm&~t%tNu%kR z!zGWR-`I#-l<_LG#@}$dQpLd((|y!U!cnWhPxjJd##1jRaXxNi682>GP%WOGV#u=4 z#;9RZkVoN3+tmgWx|V6NEbF#=Q6#YBnVMq5thX#OZYl}sw-i<=`&{T`Y8O4I$Y9d2 zbIP0s<{hins5PH*Idy`^(@rLFfy{&F)?tdPIt*9xJx};BLKShT6iL2Oj=d zxOC~JU)G8p3MNhKjMx=#-k!sklA(S^xTo{u<$qr|CzbR#D1UeQbi#l0wu%Fc#j`k^ zI_Jq)vzRo*ersn6b3fvwC)HVS_~F5jbu;}q3d{K1Qvx*``BxN3YI<>OMrX;ND)?mnb(6MgHN#lPfTo0Z&s=Ao*-i9=WBBck5T*Y$R zCV6Cg9^N1)XTakXae=Rbd7|w+0kPEOXB%cYcP-F9ZE&fPztD4uN#pYAvtM>>5PF#< z>B;f-QL=vCp4Hp#-F_$b=c>-VQx7}3q$+Pc{uXmD;t02i#iH$^Z$Gp?oi*vAW?A#` zT{0<`eKTG)u3D8*{{Cuo-Sl7WEB`ZizN}kv^*_Vp@`zz-5+AB=2>~9k({Z~_bIsMG9z0dTg{*JEU|9^RxP^rvvjkXof?6;?+ z%;e-L?ulBKzAIbiNy!=QO(immC)XTP-Cou!G2z{X8Sde$&vW`zt!`mTa`=uk?n7GD|pt* z-e-4$g@svFVx6QT%e;H{IXE~S57s0+xWq80>i|=>cXt%a2gcM}8#Wzj@-v=ZB6(t- z#SE6p2?<4QOaU>QCP}VjCRA$>1%)!nxw#3x=ZaXTCczo^YBY&dA~E$Lds` z&M}4iVT+Ezteyl0$;KHHItpu5FUwbP`aEe+G~o7X+T%3y@l763YEWe)<@|Iu4pq^yX5F=HvudX-JU(xBBVZx>B^ZrUpIXJ_fB z{|sGPzr(H<@5w8>9lPQA&bp=#S1+-fy8mZ*8O&F%Q#E_ZHt%XLlNHgyU)HXT&WPxYSyafHVV&bm$*1j zmpm;oq1mALjQ|e^r=~{s6k%1CMa$)GlpJJOFrD-F!gVv5&)#BDT$Px2CP;;O%hr9N1-w~1dhT5Cu5bNy>v~PVL%&6zn!GeFacUGBTx|B3 zE-iB+reMuS6J`Z@H3l?zEoj@Z<$+km^S0t985K_9iN=#2a|ce^xt4DNQ%X;J zvX|Vk19j8(G@dv&x{IkOA<5UxbUdY zl5>X4>J<%)S`G#b97lTm6lx3nCS7ote$=3uMIre<=hIsgTFO)%Cp?-U(=b6Oh^_eB zp;rdYMh1LSSgit17J9fWDE2qVQ**1GaI=Xc^v#=x96W7zujX!zc9^AgWqHWoxgoQI zX4wA9wi0n_bZ}fAKKoIt@XvKq{>tuOyCb@AeQsUv^8Z(M?{1u*l5};RL#ia7RJ*!ZuZ5i6A>O1y817hco-;uJ?g4=hA@7`75o^Mun<65F{ zr=I4Kb5bjw`Y7JBy&&Dw`RnfohKUYujpbc5&o@0}ygX4{b3T25L zOZwa;Q+Q-G+6DF*z4_R@(uwcr-yQ~L8#UV#8v-mWeV$KhVicGdGBbtiLni}ocHE;& zoS#~+blVg(aCqp2aB`gUoDiR5w`^{n$&`(Ly%h^3e3xhU)7Bi|XjWJ? z??*C&hB)I*K6i$7>>d+re>feFH1BPDwEa^XUa$~GrBolKbAdrD6 z!#aJ6I)h-Pljf{)pW}`%_?lPm3R}MCT}j8L1isDNtAt}7-RjT$6p%i1{zuvG3MaC3 zALRav{Iay#!Mu8x-qP~2W0$utd3ffOD&HnM@zAG#OlIBRF4uHhH##@xn34edMJe6> z6q7r*zIIKWXqd(9;W#Dy_LVuamWnkBEOcsUlud6l*zr{K?e)T~x8LTKOTU^ockNp9 z@E!GR!L09;-+O!8UH+MzxbyDTP9K3)3w|%|`t$TvzJKoam%G#drT!Eb|K%BWVOdzO zdHwTHsr_G)JSK&NTFU+26Z8J}-g{pw=52i&9PV|pS3@&MNNBN#>BO~+EVDY58a=ft zj;!VI2zdO>E!$K>v1rkh>^6x+o1_q>yM+>WgM%E60u}8HN>*^B-7k34t|Y|i#3Xq8 z5T}b)hzH-r8$N1EW-Hhdw3;6&{)+2dveMH+&49y=iRo3yJ2@r>-{2rd+Z|349*PGm zo^zN6)&$8&`%IAYHR$rn(lTK1KFcs&PAjDG5F@kaRppk8t9hL@|J^hAwo2&AoQCsT z6{b4&3)ttq$Y9%&w4?0sl1UQDxBu>);5!he`Fqn8;l>NIykbj;g`;KoeZUUE{Oj~5x-SS&)MsxQvmlqaXXxf^-M~yvD-RALy?N?j(PCZ-H z5VSaKnMG!H&*O7BpPHN|i12;vI=0T}$IRJNK6JfJP;z+q`(S(4QDYasKMyR_e-c?j zRph=lUEv6;c*Sxm{Wb%a$E>Dp8NvJ#g|n9DG@Oc2-tZ;wZbF&vtlqC{SEiP|UK{o7 zTr|7VzTmw#(+c=gh=Yqx4wd!?;D{VFCf%lzS2 z`yhwnKRr)B@6x<##lAR>DKh5rHT9QwcQ7y<>Dn@%)9S^cxhjPVCSUD1$)d2jiP=VZ z{eOo44Eis<`u^;mdu*bH!&eR4MX_6E6?MpeDm%2$JNDgHn=e2F|mo3=OU8Sj9c#9v_pFVLp9= zFTlg8^Lu~!&5jQr=KM~Wxllz(;}C&3@&xKI7Dyzpmy@(F-PVPkxtm{g2_xx$B+fn3@a% zn+jzoK5flpNm|G?)pc6oUAwri+}mUOZiH6dji|qPEpqY#X3yszFa2k*mS@>D&3WOi zoPZ$N%{?bBzcu%*H8ivn+#b-iQM^wjYOzQ{{5z4inL<2=o^$;!Naaf9uk4GyYU}rE zeJOXj&S|gmROXN~Q=Caf(l_@EKMSL(ExH|Lto_uYA0x1T-rzIc~r(92)V zg}TcwE}Lrjt=23vC;!*s(lwEOhavO|00(+nE{FO@LcKE*{Dk0&_n zn5vrIV=`fvzt*CU4mOG6pQcToc;3Qcszu@%gR3q3zNi_t`ycILWmx3h^!&CE(-kMq z0#2m{>yUE=-JZ&)90OWD9GQCf-4O@I=vhK(OsR|_k3!}!Jmy?$aYAe6l*1YY8gB%= zB0dN(cv+k%m~mF2cB=8W=S;B&SmosJa!inVY{DS$j(v-|iB-eS5bHA*oz6bzc}|{H z&W&F7{dU;}bFsW7`Wi;BZU@gk@3!#I@}KS6lR4Cve&5dPx9EsPd)P9L{|x3S(p&WV zg(sRuL{FL4tL(A1xYPEDg7AkX%c$Z?Bh?gk&Tq$_|GRRpL;b?CYTKynF9&4}_f0(Q z*t)R(>T*^2?oC`vyg$jT|0VoQs>mV3hd<}!+d0Zh-yQKeGr!91@xSZ$of;iibu_KD z{}Zs@D!T4=*|vQ1ZH=>YqmEtO`q#~(>X!Ewv5mKGtL>lt?(j><8ci|1<;Hzy14GT_%z2V) z+8H@yglA}moVWSm>>044v(rO0CZr|QD8`3Lpk!97l+9wBMBb=U1`nB4lUPsy{|df%?p+0g3{wRwJhi3yYA64~IOFcE8? z%NxTy@Aqgh{J*w+(u!80z@AS_M1*v3` zTgIR3vV8udKGvnGGp0P=TytjUm#V}sjdRZ))rw5Gy6R0mOW_tL=Lu7^xH@ho2ARG( zQ?oo$^rP-Ak;2$v54;vir6!%Sbk6jmPn@cZy_2n-~eU>eM^5b#Km$vUNXRItv+_fio+q!Q%!dKlt^)0rx zw2oy)`v#TV2z#Y<@ZIK3l?m&fUuC|$ZDFoQ z)fQdTp!CnBzv4wB7Dis)u{7VU_oir`)U~en8NAu6r%OWe8pN{a23dta~O~ z=T%I|o8lWaHS4B<-Hzo!585Zb3S06lB>PU>&7ePP?Yiy6KiL^)=C4iGxLnJ;cHPof zp;y*bXY9EuWV2_hR%ms-u-zG6501I74bLx+o0#f$;?j%)wM6%a9@(!A1=9}*PHFs5 zVt4P+WWC(1?ZGdjHqEuly5N1(z;8L*l(g-0qe_d+CqL+%Uv)U`+vWNNA3ZLAiMSN; z=oQm!H~Z6z3tnB_Yi|Ac$@3DOfPib$H}tx+o8JgEGF<*Dci!<8+pgb?vtYR7E8W&t z70OY-m*wyCpTTJUak1#Vhvs^1eKjkqW!uEUpl6m}v-Wz=dVeDGOY{qK=PyM@%V#M? zJC@xKeJi55ZLU@otJFf7XI*`|3741q>g)QSUHR-O5s_W#p?G8pO!f)R(m3yVH=1{vTZ?cx@ zo{fjr>uTqIpDq^8krjQVl&iHhdS%O&g&*1+R|#r(?y6G_Z{lE0Ka{xm`0s?4%)F(F zjE_0f3Zyy|=KWcCp<~M{V+COYKMx@e;oq(zW#=_i7gqW-F}2M6aiFk(S?}JJLkz+J zLc*r(`&2Siez$Cl$Yu_gR5{*;aG*%PUmRUH3eEbZy(Uxt^{U zuYB#hSoF-YL1@OrrjC1e;xm?1Zu<6HsrS)gJ(iI>HsvZddOPyaW2 z>94Z+zpCOE+dm4)UbXu_gXV;(sU1(BcAhln-Zi`RY4)ij?00rwVe~SPS5@%;Vh>Rz0a$ z-=n9nI9RB5=K+?p;+{TfhMg;-ukBc&5s;`KIj7~%<@Zf1LpL5WnlmM7cD|P2)DYe? zlYEw{bi5K?a9ZifhjUibPrsjD9rmAL=fBD2qS*mr@7FnNNhR!DzwCVbY>RDQx1ZYA zdt=+~tly=*{*hl#Yl+8PE7}sc>dW)$8EOYV90^%H>CBZM;h{eYmwKElIsCEIR5sf8 z%FW9qt9beHjwT)7c#zxM@$n&T*R|DA{XAFHmFhY+MZTS4)xFZ}SM<@lUoUg7j9%+K z*{aI>tX+*M~*Mb^S71SpUoLS6Abeqg%sG=e#}cT~hhi zLqB5f&ZA!QN@iKt&3$z(@p#!b&wVy2Prj5)oBzy|cL~#UJLfm+gcuI;F1xs5vQmo0 zvD2wH%DV3@xMB8c|H_rlg=;>>p4+uzU9v{#ijtR?m(QIte|3m%bNWql+0$9ucchkc z@4Av1n*Zumuh*=(O9Y($PTQq3o$u4^Hr^H1GhcKo|9kwnW_xMWT=Nx~bEeGt+stAj zkml9vGI2`ouT{-+Kh@oRv+7jN@;$$%N_}VB`8MjyUaLgW_g9|n_pZN`>i)-Y@|Kcw z7fU~Lm)*$I46VDHomn2$yZ@E8saD`hhv2epdqS+DLqA=W`g^Q*@vp+$scE6BQ+?0u zTj;j*sQa(dt%mD$injdB)2>`3Q*FF^SK%@dzx7*AzqVVP_a^yB;H1e@)aFf@lC?kQ z$>t!@V0IqEV~kfgC792IHHm-%G1lt=)I+)$9B#v3r6xtdWk|9M&4ew?gvNe}=98XN9)bnX4?Ayn3yT;PRO-YUb&- z*F9PASSqZ4<>>~MOolA(DG&Q5J-n~+Ha760tn!u{vv=oi_g;D5s59!j@RCoK{?ERB z+GVNtpCR2n|K4|N>9n0&bo2FeUu-=dn5`M2%Xm)1UvFN9PJHX{fW0hN(sxZgf8yJm z)7w&WckGplyYbHSfspzGi$j-B`AvQMV25Ut#7<3N$G%Dlt0=*<^H@2$yV@q$2`%Pn zlQ`Vq;CYzSky71`*DuP0th$ zsyLi;<+%OH~Yv3jyZz|%Y{8}&Y!L(t#}ws;CG@3-u6cMz>Ok$gzRVRY)A#pF?5;Vt<7@dVvjgpQ^X@jf zxxTADzjf(GkE~86C+|&GVpkO=p5NblYf(l?_@X1H+SxM?EO^M!#B!=fe3RG`4lX999}Z`Y{BKJnE+{OU8tZD&vi=t1oTW^Qw7WXXorcEO?evi>Z~lMb%N2`Jh3A80*XZM(?=0SLHrRnJyMOYpTDo zQdGUc+Sy)vw`!TsF4bOix#G6endPt9&O4vA_x6r&bq_Aqdhwqj_`HZx!{VpY-#2WQ zIW?2N#HzpA%I)^GySgcN&8GQp4!QR10x!$MGv7ki2j2KoxH&zhj&D%BN~Lal-pXyMyo2bGN^?n?2FGRn3IO=~;h4+4_>4++FjNcWS#A zEmoRbk>SVnq>sU;=i_7h6ej7OIeX{tlb(?G##YUxXG&x4>WgJ{O)YOwrhKc z9_OuRJx@FPjTtUZkf~4Y=@KuWb|Uv()k> zfAZ|t4*Oo7ZepDKw(Fa6US>bP-}?UyVP>_tSL}Mdwyfp1Wy|8Xw3uuEG^dDnbJ4C_ z<=VNIF5aJ?^D(fU*Wu)w`a9L!vtDevc4u~NanSR?FRNvJ*;2NAe=K^p#Cy-hw0@@Z zm#<6v#*OhR(4&k8xxoLgE*=F!%{BAp4ly*e+30ZhxmR~tP8ESO0qp3FiXa5 z=>sDt#rApLM@3F=W=Le}oiOD9d*|DxM|;kF>r^T@n3SsA+@Px=Xu!b9_rgd|BJ&LY zI~C^u^RP)C2V^Xg+Eg?=RFAYyR5{I~V$S*+-QtOASzQJ#0TV8ovND~R ztl`Jm>8sKtnzDE9NAVd4O4eKpp220iVYc$BNKMbCMQjUNeg^XVPM7A+-TJjTD}1Vr z#1Vz5k#bUtjAk{hei`&yrh582?XpF;e+hQyx`eH`w)a*<%{j(%ng$&Q=grufyJg$& zvbKf~mtIcvTB+W0n9s2IWww}E+l8J>9Lqkat%%>P?Em4CV96ct*6tgez9mwIGfY?Z zC{I53)qd)Ut;YlIem2{6{k`q-;>Ke!m7%#)zVEoYW~J^7(S41t<>uZp_nK z>}l_cU!j5Cw;o*RQRlc@`kx{8%eMF1qNCq#@7uNN*3);XR-q}E-Bz1w&tRYV(d*Tz z^Q(6T6{v+xJ6oVKv#M(Lzbly$-!-`cdYn$Guf8j-esW5>Mqt97+8MIma?|B}-c&|9|zAM|F`}NeydF_#R zbpCyHcauMJ%G}iadPr{ArhE5J-CKIsd{#eSs@K)bp!#L!^_K=7j$Zk5^U%$&7*_wDcd=I^{?w)fPn)~%OUTVB=CR!D!jI(TBX z<<3{eQ}>*0e{*%+x4EY?cHA{Pd+gMuE4Mc-JsK@_cG;op=5dP`rk{D|8@T-}=3HMi{pn%5!tS|;j_2Oav}{_M z^msq<=&-+gS`D>TS{483c_x9MMxyyD7Yql=+{P^!oOkQ}5 z$cDek@mufziZ}eux8SerQ{PQnx4z{v7kQg~g@28!R8wEug}5LaL1KY z#r)E(_k(KhnqA&p8(r0V?UO{Lzo*R6Uo)4O>B{T29+1t||$CsY|x+MDK^s`T+ zXNP1S(%vP|6S#BNtG!zy;sSRpEjo2P->mJ$17)LR57xZIm!w=2V+LA^OFYNhh|q{7%<6wRE@9 z?X17%izBWs-yIo$dD*R$$$oRK&a81ie0tTKeP0qTbFML2sZ-MN_!#%CM-LaSDm6X8 zu%wsa^iAQdrMGrUZ#<`Kvc>#!#l@#L=F7dA)pJc(uXXd9?YDD}C+%OoJ#gomQ{`b- z=LT;3${Zmlm}Pl*V%%N%K1=bSyBF?Q^_Sk!UAc41dcD=xmiw)l!|E=s$UZx0deL!@ zgv%HI#(WB>3e%e=f42U8pKr{>u!XJ)>sDNT`R#Fj*V^OS<*K6pOgT3nsk*u}eY&<` zX-NAUmz)dBfF)Tx!CHif8>^=;+k&VtUC!mzMhf7q;wNG?iEVjJxD9TPKH!!MBCN^{Wr3IZpV{ zk-gNGWloCnT<7)EWc?(sO+9-&Sox?=Ud zyim`l=B1r;E=L}|DfLq{C?LVE%7$bMk( zQt4FC_&&qIN2v3-{jC578|yPdGZ^MhIN%u5qB-Y$4a4Ob@wa~`UYdAW$JB{2T;xn2 z6N8i^gDKa)7oL4Whj$4^3Wcd2>B-H9OBIqXc`B}2F)8?6gTOZtRUw(^Q=G=r=C6}-s%#WtZl31C7bje3 zFv;bKil4EK?Sz=iD<(B}v9ueur6f*#dy0{t7ZIj&W4Vj zNxj9Mx9%L<$<($dja@Ivc>kIhbE(|Wz`oj7CC7FKrFzP4a^zP$puk}`f0xoj$*@(s zq;x*Lzj^5Dajrd?E3>!!Y~LFhyggOA+MU~M>z30qLsq1}>JF8=DSth*XLbLqTF=PV zsk&>LnO9zUn(7*rpBlFEYn`(HEX^*KFD;>r!Mhw4vs7c7WL?tlFb0d>dHC_2m$_b4 z&vL22FSjTBXRtoGy58{Wt{>ZX&Ypd5<=uR(uIoIDuD@97({PAwmXFBH&Ldm*TWf19cWk`<^?6A2jf=NeURk0Mv^eN$>z7&ai&ids{<;2TaQ?%#Qu)|Y`=Bs!>E5i8 z6H8xaZcl5Q#-7di=E{c!M{9O81v7*&aV>JZ;$iVpwyP^jsi<^m$yytq&aJWQrXHMq zsy6+~m$e#^W^ecPar{gvNV=`K!;n`*Xc_j*o~S>LTT-*}~0ytV#x@y=+8=>DLuY%k+Z?R|TqGYr^t7l%DcKcM6oVDs+EB#oBV8Rvvbdd0Z<`TjWW+Hx;Dw!kc0 z)^-2gu(hR8Pxfui>RTE5D~hrBWZ_fhX?Z=xd-7Udnl69pbfI%j;uI^-N2Z<}-wzyN z3fCz-r1)v=-z%9h?}9wFKYONj&ior+l6Xa1QH#;B$;tZKiFfi??p$!_7df{r zCD^mq!>XUDz@u`4(Q)CHzZVXOpW>XyB-PcmBvdpju;|G<@7j}=Cm)7)3)-a2kSO$# zvTT_3{GUpz5d%xxGq+%aM=5H%Jf;gCOyw12a%t)|nQ}&jA?pf<%BN<-A5(bh-Y79m zHV{ilWpZ7ydPTC$?nMa-^In>7<6F}>jn#0{L5Eq5Tuaoio3zA4UE8@tGPzN914Fo- z-eVQkM5n-~Z=Bvt2h9y!+_LG}y{&9_HXgjyV02>MiK}-)3s)VRZ&+YF&B8T$iTStN z7B}w}UoM}rt1xd};QKwrVROyI8Ppaq%RG7Ap7~;@O?lemki!<6&zG+=)z4ccx$|n4 z#dD+2#|$q#f820*m(26&mR|*m#YJs6HReBRley0GY$xNCl4;&MKEF4=$9Y0U@6Od= z4n1a-(=v(rJd7uc8#*Q~_lRNnyFsSn>0yrIg3yV^t^qR-6vP`%)-zz}h!b)--(xZ1 zQOqOt8DS6Sn6q3F-SOe@_G3KDKPr7^oMOem8RIplaiaEpj^yL}w*`cFsujqDaR|F> z$Z6DdFu1ueElIUI|E6U@p^O+q@hr!ju9cS$-)3nNJi4@7_3s4+KdwTH`oe}n2lWOC z$19dwYm~g_n7KDSVOn)sf`Q|K-q&A*r& zA){@2?yF1;H@9G8FE8(vTfOCLUs=aZf3opwMkDMzcLuNx2SLPp5w66=%!c21jkt}yMA^Qc+Rmn zGR;YN#-8Ur=ULl|7TOp+o_?LLxOqzpE7z+zKb$!frbzU!afwUj7W6PuNalBFS+r%% z-?ALLB&%-z_=RcfZD(d8MC5@RkEi>O1@u=FU8L&M%J9O~5q4rN845kGRdw z%PJEXt`tf|Nl95ZG$hTQ)yg4}BEGpt@)cjky!e}M8dqg8$=&cfDbC7Nz~kX4v&Tt2 z|JFHiEwR&GiSJwFSZ*sAA1j#iXl0YAa4MWt3-59YYL?%W z$b4#9%BNOVrvD5jirju24neJjk*VDN+H)2#bai*yhnVaXt+7#0BuD zl$9)}Sp)0^8d7^U2kR8Hu2=beSo7f=3yXEjS})ID)_wfl0hWA;4`+nuxEnuZwN?tB zmT}TiCAqy_qQlCw>+x?1C)u-4L+3Fwa_(K|wqP-j;_7>xN0x|YbuwFi`}cxHHbKU* z(3M?hI)~Rmi3%s%l=fkA+rnEYqFcaHjd|VV{!) zPM#`LT#e`7ZC?4H=?%|?3Co)f`zJN1wWPB;)UWs(^;I@PuRc@Jx%kTBKIPpU0U`YV zFFl`i?~>f%rKc7i()(>6aQo`bbs2G%S(~PM?*1@i(H_oKJeLni7KdD37_ni(mODJ3 zw7(oTGrcSKrFve9vUPCrZ>^P6!z!hiR`JGID=Mv;w4!k(m$TnYHM<{P9%{uwg>y>I z^9ia8xOo)*TBxbZGN*&(%qc;?B!f`bzaIn{r|jUr*T%@nc;LxOrM(mQg}?dYz z)pF{);b5@flne81mL&%*HT^Da=sJhqF>!D;z4Hf5k@he{1oT_X* zE;?h1dxN&&lxv0JGuS+qZ*?!xRardu^eTs$JYp=@_l7)ek(>fLZ`yK!2ZPMQun2{3 zjvUe7rv*(Ej;U}u>NRN(dyyk!z`Ov9G{*xzAKREXm(S4lpH|#9)g_df=b^QT(Q&yS z#w_a#nOhiGCaG*_a`BM}d?L?1DIB@#qoiuKGtFlw&k|&d} zKR-v3poSpVLqn#Dhx%DgDhhl@)m%D`#VC3#bMJWDz*MtphYbHdfxxF0=`E~U?T33b z`&URX6c%r3J+LBZ=7AXuJw8t@9r>$0oPrw;%+OrH*deU>NU=fGkyZ7hnm}XOk_lx@ zG1WE-XYYu&u`(Svm(don=66Lh1BZ~BVG!3hCzXT{Mk5~P#vsNA)(fpoCrQrv(7+-i zBCPt*kSnFei(_T_mtIh1yWMKS;QSpgyke6G)aRvVp z0|CC-E!}CWIaV@APFTsY>U_(x2{9(SJ&$OFwlFvpxy(GrX(XX@WaISjhaHU1_ehFN z;#}px^n3XpFF`H?wHR?j&dt*Tx&->>GfoR&;5;&?SpF{inTiELb6AwP^n{KizUy{( z&Jp{4?TuDmOYrTSFT0+epBxkS;F#g@H(AB8fpr|Mw^!X!wX87=eXpK(d#5VLvzzO0 zS$VJA)!4S}^p-pK0@?l7ok|G`?5+)0wT&+iNr=y_?zs{H@oYz|&mq*Q>;U!{K z%JQFqr)qiTTEBOfTKC;9cG|GwzTfO9?%BR)GAkDSit2xP?{(2#lfPy!FP{HkWb!vY zefz!nPFr=w@}CsvN%^veRAwgcH+d;C>%!C3+2_x{`k86%ZvLM^>#Fqj=5Mo=4+c#; zd!^#>-7B9pbUj%!=A1dBAkfN`lq|D2bAPVht?lB0PPatGma>NO#_JndPk;Y8^Rm>2 zOHUOq=LvYK3f{eJzz{BZ=7FCrA{@aHfJ9mh3wba+> zIy)#VJTt{8V8bzGT?|}G#z2|B@r|;N#bb-mMGW99Woa~CmUCG>mGg3ajiUZR?SIH z;7X{AQtgI0FLgTKHVB-o7XI!eEHFWxRraukE9Z;}$D5WgePWxeAj-ln;BVlRCMYJw zAmDQ$W#^ebONSZHRF4HIvpODC6h5oDjG2Sa@pgxC%K?36hly8pn$#;wOc-ABR4^?0 zFhM}(X<&oPH%Atklt#z5r#T#i7G73VefprwZU&>FgyJ0bf};%F%7;{z2d-Ta6Q#w( zT{16n!c2!2QyJ;TNtT?EBD0*GgcUYVkV$-3Ja27*Qv++LOUNOqc?SyqPLgKmjpPY+ zm}sXaCGMf&B+7DnPuW2Q4}}n?7NsBJj#iGGmOeiI#~7HVel?W|sXNUjGFfQC0_Oi$ zp0n^oaSFx#XSm@uzkJ2A+3Z4Z?Oxp8fBN<^XS@3nm);${zcQmhb3vxMNW{^!`G(;O zU)v`67Q9oLy}GNmYv-&#Oq2U$5|}#s3*R?2#z^_sD6k8@nUiZXn`L*3NaJyh;7Kbi zmKF3FY5(?36urdU)H?CJMKWhmF^?PHuS6Sx2Wl%+UJ7iEV3s?u`j&$Ho}F5cZ!)l) z=P|ZC;NGN1orp85(JiZ&X8ELQJ_6hF-4R|PELS7U-AHxM)N)QT|A#P++rt) z8pUyQJPQ6che0SnuxH8gyL=2jT1qD#^5*krPLXe2`k5<0IZhxvthrHbaiKP7vrPj|+T76y}_73Y$4_o~+XuH9a|eaVBVCbJG~ z*?Vl(_8FPVQ+u=j`o_$gy=cW#zZ*AlO7&jG{hBtVx8(AxiL#9U#`5fce+){ zl{Md9{bvY28GL@?M)UlIeouF&#_oI88uUE(+?|Zee|^({j}wX(U-Rxf%y z_sf3<{cB$}FF!dYwoU6~sMSmN8-0r=mupQ5y6u1Io5Xzsja~aLZ+}7{$9^YpXRUVl2cN;T@i zZDE(otXe)#tXqU8D;7@p^>8y=vE|{NJF7ztr!4lI+8t;ttj=;ywn10x>Eup^lg?c- zb2S`O=7mT+@i!I9yAf-k5QK!Clj(Ad{=x zsNhJMqlKu`DybG8H#P1l44d;DKDJGDJR+Z?*s$fyPLIdG7rc}di8F3_>UU%cyUyhAF)5x? zeA*0cQxDqCJXEx5s`F8C8%0^G;@>O39`@3|%5Z3pNW)J5yNt$i0_~N|7Rr*z3> + +std::vector LoadImageFromFile(const char* file_name, int* out_width, + int* out_height, int* out_channels); + +#endif // TENSORFLOW_EXAMPLES_IOS_IOS_IMAGE_LOAD_H_ diff --git a/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.mm b/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.mm new file mode 100644 index 0000000000..cb19377d7e --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.mm @@ -0,0 +1,80 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ios_image_load.h" + +#include +#include +#include +#include + +#import +#import + +std::vector LoadImageFromFile(const char* file_name, int* out_width, int* out_height, + int* out_channels) { + FILE* file_handle = fopen(file_name, "rb"); + fseek(file_handle, 0, SEEK_END); + const size_t bytes_in_file = ftell(file_handle); + fseek(file_handle, 0, SEEK_SET); + std::vector file_data(bytes_in_file); + fread(file_data.data(), 1, bytes_in_file, file_handle); + fclose(file_handle); + CFDataRef file_data_ref = + CFDataCreateWithBytesNoCopy(NULL, file_data.data(), bytes_in_file, kCFAllocatorNull); + CGDataProviderRef image_provider = CGDataProviderCreateWithCFData(file_data_ref); + + const char* suffix = strrchr(file_name, '.'); + if (!suffix || suffix == file_name) { + suffix = ""; + } + CGImageRef image; + if (strcasecmp(suffix, ".png") == 0) { + image = CGImageCreateWithPNGDataProvider(image_provider, NULL, true, kCGRenderingIntentDefault); + } else if ((strcasecmp(suffix, ".jpg") == 0) || (strcasecmp(suffix, ".jpeg") == 0)) { + image = + CGImageCreateWithJPEGDataProvider(image_provider, NULL, true, kCGRenderingIntentDefault); + } else { + CFRelease(image_provider); + CFRelease(file_data_ref); + fprintf(stderr, "Unknown suffix for file '%s'\n", file_name); + *out_width = 0; + *out_height = 0; + *out_channels = 0; + return std::vector(); + } + + const int width = (int)CGImageGetWidth(image); + const int height = (int)CGImageGetHeight(image); + const int channels = 4; + CGColorSpaceRef color_space = CGColorSpaceCreateDeviceRGB(); + const int bytes_per_row = (width * channels); + const int bytes_in_image = (bytes_per_row * height); + std::vector result(bytes_in_image); + const int bits_per_component = 8; + CGContextRef context = + CGBitmapContextCreate(result.data(), width, height, bits_per_component, bytes_per_row, + color_space, kCGImageAlphaPremultipliedLast | kCGBitmapByteOrder32Big); + CGColorSpaceRelease(color_space); + CGContextDrawImage(context, CGRectMake(0, 0, width, height), image); + CGContextRelease(context); + CFRelease(image); + CFRelease(image_provider); + CFRelease(file_data_ref); + + *out_width = width; + *out_height = height; + *out_channels = channels; + return result; +} diff --git a/tensorflow/contrib/lite/examples/ios/simple/main.mm b/tensorflow/contrib/lite/examples/ios/simple/main.mm new file mode 100644 index 0000000000..05cb55ddd7 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/simple/main.mm @@ -0,0 +1,22 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +int main(int argc, char *argv[]) { + @autoreleasepool { + NSString *delegateClassName = @"AppDelegate"; + return UIApplicationMain(argc, argv, nil, delegateClassName); + } +} diff --git a/tensorflow/contrib/lite/examples/ios/simple/simple.xcodeproj/project.pbxproj b/tensorflow/contrib/lite/examples/ios/simple/simple.xcodeproj/project.pbxproj new file mode 100644 index 0000000000..9277c230b8 --- /dev/null +++ b/tensorflow/contrib/lite/examples/ios/simple/simple.xcodeproj/project.pbxproj @@ -0,0 +1,359 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 46; + objects = { + +/* Begin PBXBuildFile section */ + 1C0D734B1ECCC460008C1DAB /* CoreGraphics.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 1C0D734A1ECCC460008C1DAB /* CoreGraphics.framework */; }; + 1CA45FFF1ECCC356002FA6A4 /* UIKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 1CA45FFE1ECCC356002FA6A4 /* UIKit.framework */; }; + 594C14AE1FB8F9B500EE8BFE /* libtensorflow-lite.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 594C14AD1FB8F9B500EE8BFE /* libtensorflow-lite.a */; }; + 594C14B11FB9037100EE8BFE /* labels.txt in Resources */ = {isa = PBXBuildFile; fileRef = 594C14AF1FB9037100EE8BFE /* labels.txt */; }; + 594C14B21FB9037100EE8BFE /* mobilenet_v1_1.0_224.tflite in Resources */ = {isa = PBXBuildFile; fileRef = 594C14B01FB9037100EE8BFE /* mobilenet_v1_1.0_224.tflite */; }; + 59A3D0011CF4E68100C4259F /* AppDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFF21CF4E68100C4259F /* AppDelegate.mm */; }; + 59A3D0031CF4E68100C4259F /* grace_hopper.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF51CF4E68100C4259F /* grace_hopper.jpg */; }; + 59A3D0081CF4E68100C4259F /* ios_image_load.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFFB1CF4E68100C4259F /* ios_image_load.mm */; }; + 59A3D0091CF4E68100C4259F /* main.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFFC1CF4E68100C4259F /* main.mm */; }; + 59A3D00B1CF4E68100C4259F /* RunModelViewController.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFFF1CF4E68100C4259F /* RunModelViewController.mm */; }; + 59A3D00C1CF4E68100C4259F /* RunModelViewController.xib in Resources */ = {isa = PBXBuildFile; fileRef = 59A3D0001CF4E68100C4259F /* RunModelViewController.xib */; }; +/* End PBXBuildFile section */ + +/* Begin PBXFileReference section */ + 1C0D73481ECCC41B008C1DAB /* CoreImage.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreImage.framework; path = System/Library/Frameworks/CoreImage.framework; sourceTree = SDKROOT; }; + 1C0D734A1ECCC460008C1DAB /* CoreGraphics.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreGraphics.framework; path = System/Library/Frameworks/CoreGraphics.framework; sourceTree = SDKROOT; }; + 1CA45FFE1ECCC356002FA6A4 /* UIKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = UIKit.framework; path = System/Library/Frameworks/UIKit.framework; sourceTree = SDKROOT; }; + 5911579B1CF4011C00C31E3A /* tf_simple_example.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = tf_simple_example.app; sourceTree = BUILT_PRODUCTS_DIR; }; + 594C14AD1FB8F9B500EE8BFE /* libtensorflow-lite.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libtensorflow-lite.a"; path = "../../../gen/lib/libtensorflow-lite.a"; sourceTree = ""; }; + 594C14AF1FB9037100EE8BFE /* labels.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = labels.txt; sourceTree = ""; }; + 594C14B01FB9037100EE8BFE /* mobilenet_v1_1.0_224.tflite */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_v1_1.0_224.tflite; sourceTree = ""; }; + 59A3CFF11CF4E68100C4259F /* AppDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = ""; }; + 59A3CFF21CF4E68100C4259F /* AppDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AppDelegate.mm; sourceTree = ""; }; + 59A3CFF51CF4E68100C4259F /* grace_hopper.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = grace_hopper.jpg; sourceTree = ""; }; + 59A3CFFA1CF4E68100C4259F /* ios_image_load.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ios_image_load.h; sourceTree = ""; }; + 59A3CFFB1CF4E68100C4259F /* ios_image_load.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = ios_image_load.mm; sourceTree = ""; }; + 59A3CFFC1CF4E68100C4259F /* main.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = main.mm; sourceTree = ""; }; + 59A3CFFD1CF4E68100C4259F /* RunModel-Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = "RunModel-Info.plist"; sourceTree = ""; }; + 59A3CFFE1CF4E68100C4259F /* RunModelViewController.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RunModelViewController.h; sourceTree = ""; }; + 59A3CFFF1CF4E68100C4259F /* RunModelViewController.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = RunModelViewController.mm; sourceTree = ""; }; + 59A3D0001CF4E68100C4259F /* RunModelViewController.xib */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = file.xib; path = RunModelViewController.xib; sourceTree = ""; }; + 73DBC33C5DD9A526EE6D1EF2 /* libPods-tf_simple_example.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libPods-tf_simple_example.a"; sourceTree = BUILT_PRODUCTS_DIR; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 591157981CF4011C00C31E3A /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 594C14AE1FB8F9B500EE8BFE /* libtensorflow-lite.a in Frameworks */, + 1C0D734B1ECCC460008C1DAB /* CoreGraphics.framework in Frameworks */, + 1CA45FFF1ECCC356002FA6A4 /* UIKit.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 24D7686C331131624F4454A0 /* Frameworks */ = { + isa = PBXGroup; + children = ( + 594C14AD1FB8F9B500EE8BFE /* libtensorflow-lite.a */, + 1C0D734A1ECCC460008C1DAB /* CoreGraphics.framework */, + 1C0D73481ECCC41B008C1DAB /* CoreImage.framework */, + 1CA45FFE1ECCC356002FA6A4 /* UIKit.framework */, + 73DBC33C5DD9A526EE6D1EF2 /* libPods-tf_simple_example.a */, + ); + name = Frameworks; + sourceTree = ""; + }; + 591157921CF4011C00C31E3A = { + isa = PBXGroup; + children = ( + 59A3CFF11CF4E68100C4259F /* AppDelegate.h */, + 59A3CFF21CF4E68100C4259F /* AppDelegate.mm */, + 59A3CFF31CF4E68100C4259F /* data */, + 59A3CFFA1CF4E68100C4259F /* ios_image_load.h */, + 59A3CFFB1CF4E68100C4259F /* ios_image_load.mm */, + 59A3CFFC1CF4E68100C4259F /* main.mm */, + 59A3CFFD1CF4E68100C4259F /* RunModel-Info.plist */, + 59A3CFFE1CF4E68100C4259F /* RunModelViewController.h */, + 59A3CFFF1CF4E68100C4259F /* RunModelViewController.mm */, + 59A3D0001CF4E68100C4259F /* RunModelViewController.xib */, + 5911579C1CF4011C00C31E3A /* Products */, + 24D7686C331131624F4454A0 /* Frameworks */, + ); + sourceTree = ""; + }; + 5911579C1CF4011C00C31E3A /* Products */ = { + isa = PBXGroup; + children = ( + 5911579B1CF4011C00C31E3A /* tf_simple_example.app */, + ); + name = Products; + sourceTree = ""; + }; + 59A3CFF31CF4E68100C4259F /* data */ = { + isa = PBXGroup; + children = ( + 59A3CFF51CF4E68100C4259F /* grace_hopper.jpg */, + 594C14AF1FB9037100EE8BFE /* labels.txt */, + 594C14B01FB9037100EE8BFE /* mobilenet_v1_1.0_224.tflite */, + ); + path = data; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 5911579A1CF4011C00C31E3A /* tf_simple_example */ = { + isa = PBXNativeTarget; + buildConfigurationList = 591157B21CF4011D00C31E3A /* Build configuration list for PBXNativeTarget "tf_simple_example" */; + buildPhases = ( + 591157971CF4011C00C31E3A /* Sources */, + 591157981CF4011C00C31E3A /* Frameworks */, + 591157991CF4011C00C31E3A /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = tf_simple_example; + productName = tf_ios_makefile_example; + productReference = 5911579B1CF4011C00C31E3A /* tf_simple_example.app */; + productType = "com.apple.product-type.application"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 591157931CF4011C00C31E3A /* Project object */ = { + isa = PBXProject; + attributes = { + LastUpgradeCheck = 0830; + ORGANIZATIONNAME = Google; + TargetAttributes = { + 5911579A1CF4011C00C31E3A = { + CreatedOnToolsVersion = 7.2; + DevelopmentTeam = EQHXZ8M8AV; + ProvisioningStyle = Manual; + }; + }; + }; + buildConfigurationList = 591157961CF4011C00C31E3A /* Build configuration list for PBXProject "simple" */; + compatibilityVersion = "Xcode 3.2"; + developmentRegion = English; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = 591157921CF4011C00C31E3A; + productRefGroup = 5911579C1CF4011C00C31E3A /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 5911579A1CF4011C00C31E3A /* tf_simple_example */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 591157991CF4011C00C31E3A /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 59A3D00C1CF4E68100C4259F /* RunModelViewController.xib in Resources */, + 594C14B11FB9037100EE8BFE /* labels.txt in Resources */, + 59A3D0031CF4E68100C4259F /* grace_hopper.jpg in Resources */, + 594C14B21FB9037100EE8BFE /* mobilenet_v1_1.0_224.tflite in Resources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 591157971CF4011C00C31E3A /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 59A3D0091CF4E68100C4259F /* main.mm in Sources */, + 59A3D0011CF4E68100C4259F /* AppDelegate.mm in Sources */, + 59A3D00B1CF4E68100C4259F /* RunModelViewController.mm in Sources */, + 59A3D0081CF4E68100C4259F /* ios_image_load.mm in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 591157B01CF4011D00C31E3A /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer"; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 8.0; + MTL_ENABLE_DEBUG_INFO = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = iphoneos; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Debug; + }; + 591157B11CF4011D00C31E3A /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer"; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 8.0; + MTL_ENABLE_DEBUG_INFO = NO; + SDKROOT = iphoneos; + TARGETED_DEVICE_FAMILY = "1,2"; + VALIDATE_PRODUCT = YES; + }; + name = Release; + }; + 591157B31CF4011D00C31E3A /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_DEBUG_INFORMATION_LEVEL = default; + CODE_SIGN_IDENTITY = "iPhone Developer"; + DEVELOPMENT_TEAM = EQHXZ8M8AV; + ENABLE_BITCODE = NO; + GCC_ENABLE_CPP_EXCEPTIONS = YES; + GCC_ENABLE_CPP_RTTI = YES; + HEADER_SEARCH_PATHS = ( + "$(inherited)", + ../../../../../../, + ../../../downloads/flatbuffers/include/, + ../../../downloads/eigen/, + ../../../downloads/, + ); + INFOPLIST_FILE = "$(SRCROOT)/RunModel-Info.plist"; + IPHONEOS_DEPLOYMENT_TARGET = 9.2; + LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; + LIBRARY_SEARCH_PATHS = ../../../gen/lib/; + OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)"; + OTHER_LDFLAGS = "$(inherited)"; + PRODUCT_BUNDLE_IDENTIFIER = "com.google.tflite-simple-example"; + PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE = "1072bd47-ff19-4e5f-8107-d912748f83f1"; + PROVISIONING_PROFILE_SPECIFIER = "Google Development"; + SEPARATE_STRIP = NO; + }; + name = Debug; + }; + 591157B41CF4011D00C31E3A /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_DEBUG_INFORMATION_LEVEL = default; + CODE_SIGN_IDENTITY = "iPhone Developer"; + DEVELOPMENT_TEAM = ""; + ENABLE_BITCODE = NO; + GCC_ENABLE_CPP_EXCEPTIONS = YES; + GCC_ENABLE_CPP_RTTI = YES; + HEADER_SEARCH_PATHS = ( + "$(inherited)", + ../../../../../../, + ../../../downloads/flatbuffers/include/, + ../../../downloads/eigen/, + ../../../downloads/, + ); + INFOPLIST_FILE = "$(SRCROOT)/RunModel-Info.plist"; + IPHONEOS_DEPLOYMENT_TARGET = 9.2; + LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; + LIBRARY_SEARCH_PATHS = ../../../gen/lib/; + ONLY_ACTIVE_ARCH = YES; + OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)"; + OTHER_LDFLAGS = "$(inherited)"; + PRODUCT_BUNDLE_IDENTIFIER = "com.google.tflite-simple-example"; + PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; + SEPARATE_STRIP = NO; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 591157961CF4011C00C31E3A /* Build configuration list for PBXProject "simple" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 591157B01CF4011D00C31E3A /* Debug */, + 591157B11CF4011D00C31E3A /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 591157B21CF4011D00C31E3A /* Build configuration list for PBXNativeTarget "tf_simple_example" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 591157B31CF4011D00C31E3A /* Debug */, + 591157B41CF4011D00C31E3A /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 591157931CF4011C00C31E3A /* Project object */; +} diff --git a/tensorflow/contrib/lite/g3doc/apis.md b/tensorflow/contrib/lite/g3doc/apis.md index e8f5566f11..fe208e47d1 100644 --- a/tensorflow/contrib/lite/g3doc/apis.md +++ b/tensorflow/contrib/lite/g3doc/apis.md @@ -267,7 +267,7 @@ try (Interpreter interpreter = new Interpreter(file_of_a_tensorflowlite_model)) The `Interpreter.java` class drives model inference with TensorFlow Lite. In most of the cases, this is the only class an app developer will need. -#### Initializing an `Interpreter` Mith a Model Mile +#### Initializing an `Interpreter` With a Model File The `Interpreter` can be initialized with a model file using the constructor: diff --git a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md index 121c4c2c95..9ade04eb8c 100644 --- a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md +++ b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md @@ -54,7 +54,7 @@ counterparts: * [tf.sigmoid](https://www.tensorflow.org/api_docs/python/tf/sigmoid) * [tf.space_to_depth](https://www.tensorflow.org/api_docs/python/tf/space_to_depth) -## Straighforward Conversions, Constant-Folding and Fusing +## Straightforward Conversions, Constant-Folding and Fusing A number of TensorFlow operations can be processed by TensorFlow Lite even though they have no direct equivalent. This is the case for operations that can diff --git a/tensorflow/contrib/lite/ios_makefile.inc b/tensorflow/contrib/lite/ios_makefile.inc new file mode 100644 index 0000000000..345ed26212 --- /dev/null +++ b/tensorflow/contrib/lite/ios_makefile.inc @@ -0,0 +1,31 @@ +#Settings for iOS. +ifeq($(TARGET), IOS) BUILD_FOR_IOS_SIMULATOR + : = false ifeq($(IOS_ARCH), x86_64) BUILD_FOR_IOS_SIMULATOR + : = true endif ifeq($(IOS_ARCH), i386) BUILD_FOR_IOS_SIMULATOR + : = true endif ifeq($(BUILD_FOR_IOS_SIMULATOR), true) IPHONEOS_PLATFORM + : = $(shell xcrun-- sdk iphonesimulator-- show - sdk - platform - + path) IPHONEOS_SYSROOT + : = $(shell xcrun-- sdk iphonesimulator-- show - sdk - + path) else IPHONEOS_PLATFORM + : = $(shell xcrun-- sdk iphoneos-- show - sdk - platform - + path) IPHONEOS_SYSROOT + : = $(shell xcrun-- sdk iphoneos-- show - sdk - path) endif IOS_SDK_VERSION + : = $(shell xcrun-- sdk iphoneos-- show - sdk - version) MIN_SDK_VERSION + : = 9.0 +#Override IOS_ARCH with armv7, armv7s, arm64, i386, or x86_64. + IOS_ARCH + : = x86_64 CXXFLAGS + += -miphoneos - version + - min = $(MIN_SDK_VERSION) - DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK + - fembed - bitcode - Wno - c++ 11 - narrowing - mno - thumb + - fno - exceptions + - isysroot ${IPHONEOS_SYSROOT} - arch $(IOS_ARCH) - O3 CCFLAGS + += -miphoneos - version + - min = $(MIN_SDK_VERSION) - fembed - bitcode - mno - thumb + - isysroot ${IPHONEOS_SYSROOT} - arch $(IOS_ARCH) - + O3 LDFLAGS + : = -fembed - bitcode - miphoneos - version + - min = ${MIN_SDK_VERSION} - arch $(IOS_ARCH) OBJDIR + : = $(OBJDIR) ios_$(IOS_ARCH) / LIBDIR + : = $(LIBDIR) ios_$(IOS_ARCH) / BINDIR + : = $(BINDIR) ios_$(IOS_ARCH) / DEPDIR : = $(DEPDIR) ios_$(IOS_ARCH) / endif diff --git a/tensorflow/contrib/lite/java/demo/app/build.gradle b/tensorflow/contrib/lite/java/demo/app/build.gradle index e1470fe717..b76eaad8bb 100644 --- a/tensorflow/contrib/lite/java/demo/app/build.gradle +++ b/tensorflow/contrib/lite/java/demo/app/build.gradle @@ -36,8 +36,8 @@ android { } repositories { - flatDir { - dirs 'libs' + maven { + url 'https://google.bintray.com/tensorflow' } } diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/README.md b/tensorflow/contrib/lite/models/testdata/g3doc/README.md index 83760e420f..46b24248f0 100644 --- a/tensorflow/contrib/lite/models/testdata/g3doc/README.md +++ b/tensorflow/contrib/lite/models/testdata/g3doc/README.md @@ -86,25 +86,34 @@ same input. ### Models: -[Speech hotword model (Svdf rank=1)] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_hotword_model_rank1.tflite) +[Speech hotword model (Svdf +rank=1)](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_hotword_model_rank1_2017_11_14.tflite) -[Speech hotword model (Svdf rank=2)] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_hotword_model_rank2.tflite) +[Speech hotword model (Svdf +rank=2)](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_hotword_model_rank2_2017_11_14.tflite) -[Speaker-id model] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_speakerid_model.tflite) +[Speaker-id +model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_speakerid_model_2017_11_14.tflite) -[TTS model] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_tts_model.tflite) +[TTS +model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_tts_model_2017_11_14.tflite) -[ASR AM model] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_terse_am_model.tflite) +[ASR AM +model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_terse_am_model_2017_11_14.tflite) ### Test benches -[Speech hotword model test] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_hotword_model_test.cc) +[Speech hotword model +test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_hotword_model_test.cc) -[Speaker-id model test] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc) +[Speaker-id model +test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc) -[TTS model test] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_tts_model_test.cc) +[TTS model +test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_tts_model_test.cc) -[ASR AM model test] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_terse_am_model_test.cc) +[ASR AM model +test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_terse_am_model_test.cc) ## Android Support The models have been tested on Android phones, using the following tests: @@ -112,5 +121,3 @@ The models have been tested on Android phones, using the following tests: [Hotword] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/android/BUILD?rcl=172930882&l=25) [Speaker-id] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/android/BUILD?rcl=172930882&l=36) - - diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h index 5d06165772..bdb5e01538 100644 --- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h +++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h @@ -1454,9 +1454,9 @@ inline int ANeuralNetworksModel_finish(ANeuralNetworksModel* model) { * {@link ANeuralNetworksExecution_setOutputFromMemory} and * {@link ANeuralNetworksExecution_setOperandValue}. * - * To build a model that can accomodate inputs of various sizes, as you may want - * to do for a CNN, set the size of the dimensions that will vary at run time to - * 0. If you do so, provide the full dimensions when calling + * To build a model that can accommodate inputs of various sizes, as you may + * want to do for a CNN, set the size of the dimensions that will vary at run + * time to 0. If you do so, provide the full dimensions when calling * {@link ANeuralNetworksExecution_setInput} or {@link * ANeuralNetworksExecution_setInputFromMemory}. * diff --git a/tensorflow/contrib/lite/schema/upgrade_schema_test.py b/tensorflow/contrib/lite/schema/upgrade_schema_test.py index 754400e888..b5002e6f75 100644 --- a/tensorflow/contrib/lite/schema/upgrade_schema_test.py +++ b/tensorflow/contrib/lite/schema/upgrade_schema_test.py @@ -252,7 +252,7 @@ def JsonDumpAndFlush(data, fp): class TestSchemaUpgrade(test_util.TensorFlowTestCase): - def testNonExistantFile(self): + def testNonExistentFile(self): converter = upgrade_schema_lib.Converter() non_existent = tempfile.mktemp(suffix=".json") with self.assertRaisesRegexp(IOError, "No such file or directory"): diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index 5e40a13d3c..ecddb4b807 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -187,6 +187,7 @@ tf_cc_test( srcs = ["generated_examples_zip_test.cc"], data = [":optest"], shard_count = 10, + tags = ["no_oss"], deps = [ ":parse_testdata_lib", "//tensorflow/contrib/lite:builtin_op_data", diff --git a/tensorflow/contrib/lite/testing/parse_testdata.cc b/tensorflow/contrib/lite/testing/parse_testdata.cc index 2b67052cad..d745ed2715 100644 --- a/tensorflow/contrib/lite/testing/parse_testdata.cc +++ b/tensorflow/contrib/lite/testing/parse_testdata.cc @@ -232,7 +232,7 @@ TfLiteStatus CheckOutputs(tflite::Interpreter* interpreter, // invoke { // id: xyz // input: 1,2,1,1,1,2,3,4 -// ouput: 4,5,6 +// output: 4,5,6 // } class Invoke : public Message { public: diff --git a/tensorflow/contrib/lite/testing/test_runner.h b/tensorflow/contrib/lite/testing/test_runner.h index 04ee4d9f7d..f4b26949b5 100644 --- a/tensorflow/contrib/lite/testing/test_runner.h +++ b/tensorflow/contrib/lite/testing/test_runner.h @@ -63,7 +63,7 @@ class TestRunner { // Run the model. virtual void Invoke() = 0; - // Verify that the contents of all ouputs conform to the existing + // Verify that the contents of all outputs conform to the existing // expectations. Return true if there are no expectations or they are all // satisfied. virtual bool CheckResults() = 0; diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index f2fce2b249..04b0813523 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -129,7 +129,7 @@ enum class AxesOrder { // The type of the scalars in an array. // Note that that does not by itself tell whether the values in the array are // real (are literally interpreted as real numbers) or quantized (only acquire -// a meaning as real numbers in conjuction with QuantizationParams). +// a meaning as real numbers in conjunction with QuantizationParams). // // In practice though: // float values are always real diff --git a/tensorflow/contrib/lite/tools/benchmark_model.cc b/tensorflow/contrib/lite/tools/benchmark_model.cc new file mode 100644 index 0000000000..ef43f64131 --- /dev/null +++ b/tensorflow/contrib/lite/tools/benchmark_model.cc @@ -0,0 +1,95 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/string_util.h" +#include "tensorflow/contrib/lite/tools/mutable_op_resolver.h" + +#ifdef TFLITE_CUSTOM_OPS_HEADER +void RegisterSelectedOps(::tflite::MutableOpResolver* resolver); +#endif + +#define LOG(x) std::cerr +#define CHECK(x) \ + if (!(x)) { \ + LOG(ERROR) << #x << "failed"; \ + exit(1); \ + } + +namespace tensorflow { +namespace benchmark_tflite_model { + +std::unique_ptr model; +std::unique_ptr interpreter; + +void InitImpl(const std::string& graph, const std::vector& sizes, + const std::string& input_layer_type, int num_threads) { + CHECK(graph.c_str()); + + model = tflite::FlatBufferModel::BuildFromFile(graph.c_str()); + if (!model) { + LOG(FATAL) << "Failed to mmap model " << graph; + } + LOG(INFO) << "Loaded model " << graph; + model->error_reporter(); + LOG(INFO) << "resolved reporter"; + +#ifdef TFLITE_CUSTOM_OPS_HEADER + tflite::MutableOpResolver resolver; + RegisterSelectedOps(&resolver); +#else + tflite::ops::builtin::BuiltinOpResolver resolver; +#endif + + tflite::InterpreterBuilder(*model, resolver)(&interpreter); + if (!interpreter) { + LOG(FATAL) << "Failed to construct interpreter"; + } + + if (num_threads != -1) { + interpreter->SetNumThreads(num_threads); + } + + int input = interpreter->inputs()[0]; + + if (input_layer_type != "string") { + interpreter->ResizeInputTensor(input, sizes); + } + + if (interpreter->AllocateTensors() != kTfLiteOk) { + LOG(FATAL) << "Failed to allocate tensors!"; + } +} + +int Main(int argc, char** argv) { + InitImpl("", {}, "", 1); + return 0; +} + +} // namespace benchmark_tflite_model +} // namespace tensorflow + +int main(int argc, char** argv) { + return tensorflow::benchmark_tflite_model::Main(argc, argv); +} diff --git a/tensorflow/contrib/lite/tools/mutable_op_resolver.h b/tensorflow/contrib/lite/tools/mutable_op_resolver.h index cc1a8e27e6..be60cf476d 100644 --- a/tensorflow/contrib/lite/tools/mutable_op_resolver.h +++ b/tensorflow/contrib/lite/tools/mutable_op_resolver.h @@ -19,6 +19,16 @@ limitations under the License. #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/model.h" +// Needed to resolve unordered_set hash on older compilers. +namespace std { +template <> +struct hash { + size_t operator()(const tflite::BuiltinOperator& op) const { + return std::hash()(op); + } +}; +} // namespace std + namespace tflite { // An OpResolver that is mutable, also used as the op in gen_op_registration. diff --git a/tensorflow/contrib/mpi/BUILD b/tensorflow/contrib/mpi/BUILD index 20ceef5004..d9d55faf50 100644 --- a/tensorflow/contrib/mpi/BUILD +++ b/tensorflow/contrib/mpi/BUILD @@ -72,6 +72,7 @@ cc_library( "//tensorflow/core:worker_proto_cc", "//tensorflow/core/distributed_runtime:base_rendezvous_mgr", "//tensorflow/core/distributed_runtime:session_mgr", + "//tensorflow/core/distributed_runtime:tensor_coding", "//tensorflow/core/distributed_runtime:worker_env", "//third_party/mpi", ], diff --git a/tensorflow/contrib/nn/python/ops/cross_entropy.py b/tensorflow/contrib/nn/python/ops/cross_entropy.py index 61c1d1c6d9..5045f2c957 100644 --- a/tensorflow/contrib/nn/python/ops/cross_entropy.py +++ b/tensorflow/contrib/nn/python/ops/cross_entropy.py @@ -116,7 +116,7 @@ def deprecated_flipped_sparse_softmax_cross_entropy_with_logits(logits, Raises: ValueError: If logits are scalars (need to have rank >= 1) or if the rank - of the labels is not equal to the rank of the labels minus one. + of the labels is not equal to the rank of the logits minus one. """ return nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits, name=name) diff --git a/tensorflow/contrib/nn/python/ops/sampling_ops.py b/tensorflow/contrib/nn/python/ops/sampling_ops.py index 2ae529e015..98749cff7e 100644 --- a/tensorflow/contrib/nn/python/ops/sampling_ops.py +++ b/tensorflow/contrib/nn/python/ops/sampling_ops.py @@ -34,7 +34,7 @@ def _rank_resample(weights, biases, inputs, sampled_values, num_resampled, log(sum_j exp((w_i * x_j + b_i) / resampling_temperature)) - where w_i, b_i are the weight and bias of the i-th class, repsectively, + where w_i, b_i are the weight and bias of the i-th class, respectively, and j ranges over the rows of `inputs`. For efficiency, we rearrange the computation to diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 289359e5ec..9685b58392 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -114,7 +114,6 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): The class uses optional peep-hole connections, and an optional projection layer. - Layer normalization implementation is based on: https://arxiv.org/abs/1607.06450. diff --git a/tensorflow/contrib/slim/README.md b/tensorflow/contrib/slim/README.md index f7a85557ca..dc92ae0c85 100644 --- a/tensorflow/contrib/slim/README.md +++ b/tensorflow/contrib/slim/README.md @@ -441,7 +441,8 @@ module. Consider the simple case where we want to train the VGG network: ```python import tensorflow as tf -vgg = tf.contrib.slim.nets.vgg +import tensorflow.contrib.slim.nets as nets +vgg = nets.vgg # Load the images and labels. images, labels = ... @@ -559,9 +560,10 @@ examine the following sample of training the VGG network: ```python import tensorflow as tf +import tensorflow.contrib.slim.nets as nets slim = tf.contrib.slim -vgg = tf.contrib.slim.nets.vgg +vgg = nets.vgg ... @@ -809,9 +811,10 @@ Putting it all together: ```python import tensorflow as tf +import tensorflow.contrib.slim.nets as nets slim = tf.contrib.slim -vgg = tf.contrib.slim.nets.vgg +vgg = nets.vgg # Load the data diff --git a/tensorflow/contrib/slim/python/slim/evaluation.py b/tensorflow/contrib/slim/python/slim/evaluation.py index cdb720b36b..3caf4e02da 100644 --- a/tensorflow/contrib/slim/python/slim/evaluation.py +++ b/tensorflow/contrib/slim/python/slim/evaluation.py @@ -34,7 +34,7 @@ the metrics and finally call the `evaluation` method: "mse": slim.metrics.mean_squared_error(predictions, labels), }) - inital_op = tf.group( + initial_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer()) @@ -42,7 +42,7 @@ the metrics and finally call the `evaluation` method: metric_values = slim.evaluation( sess, num_evals=1, - inital_op=initial_op, + initial_op=initial_op, eval_op=names_to_updates.values(), final_op=name_to_values.values()) diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD index 45d6454526..f34291c203 100644 --- a/tensorflow/contrib/summary/BUILD +++ b/tensorflow/contrib/summary/BUILD @@ -25,7 +25,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":summary_ops", - ":summary_test_internal", ":summary_test_util", "//tensorflow/python:array_ops", "//tensorflow/python:errors", @@ -46,7 +45,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":summary_ops", - ":summary_test_internal", ":summary_test_util", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", @@ -119,15 +117,3 @@ py_library( "//tensorflow/python:platform", ], ) - -py_library( - name = "summary_test_internal", - testonly = 1, - srcs = ["summary_test_internal.py"], - srcs_version = "PY2AND3", - visibility = ["//visibility:private"], - deps = [ - "//tensorflow/python:lib", - "//tensorflow/python:platform", - ], -) diff --git a/tensorflow/contrib/summary/summary_ops_graph_test.py b/tensorflow/contrib/summary/summary_ops_graph_test.py index fe55bf93e2..703adb7b46 100644 --- a/tensorflow/contrib/summary/summary_ops_graph_test.py +++ b/tensorflow/contrib/summary/summary_ops_graph_test.py @@ -21,7 +21,6 @@ import tempfile import six from tensorflow.contrib.summary import summary_ops -from tensorflow.contrib.summary import summary_test_internal from tensorflow.contrib.summary import summary_test_util from tensorflow.core.framework import graph_pb2 from tensorflow.core.framework import node_def_pb2 @@ -33,10 +32,10 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.platform import test from tensorflow.python.training import training_util -get_all = summary_test_internal.get_all +get_all = summary_test_util.get_all -class DbTest(summary_test_internal.SummaryDbTest): +class DbTest(summary_test_util.SummaryDbTest): def testGraphPassedToGraph_isForbiddenForThineOwnSafety(self): with self.assertRaises(TypeError): diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py index 3fe421a7e9..54433deb28 100644 --- a/tensorflow/contrib/summary/summary_ops_test.py +++ b/tensorflow/contrib/summary/summary_ops_test.py @@ -21,7 +21,6 @@ import tempfile import six from tensorflow.contrib.summary import summary_ops -from tensorflow.contrib.summary import summary_test_internal from tensorflow.contrib.summary import summary_test_util from tensorflow.core.framework import graph_pb2 from tensorflow.core.framework import node_def_pb2 @@ -35,8 +34,8 @@ from tensorflow.python.ops import state_ops from tensorflow.python.platform import gfile from tensorflow.python.training import training_util -get_all = summary_test_internal.get_all -get_one = summary_test_internal.get_one +get_all = summary_test_util.get_all +get_one = summary_test_util.get_one class TargetTest(test_util.TensorFlowTestCase): @@ -137,7 +136,7 @@ class TargetTest(test_util.TensorFlowTestCase): self.assertEqual(3, get_total()) -class DbTest(summary_test_internal.SummaryDbTest): +class DbTest(summary_test_util.SummaryDbTest): def testIntegerSummaries(self): step = training_util.create_global_step() diff --git a/tensorflow/contrib/summary/summary_test_util.py b/tensorflow/contrib/summary/summary_test_util.py index 794c5b8bab..915820e05b 100644 --- a/tensorflow/contrib/summary/summary_test_util.py +++ b/tensorflow/contrib/summary/summary_test_util.py @@ -19,13 +19,38 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import functools import os +import sqlite3 +from tensorflow.contrib.summary import summary_ops from tensorflow.core.util import event_pb2 +from tensorflow.python.framework import test_util from tensorflow.python.lib.io import tf_record from tensorflow.python.platform import gfile +class SummaryDbTest(test_util.TensorFlowTestCase): + """Helper for summary database testing.""" + + def setUp(self): + super(SummaryDbTest, self).setUp() + self.db_path = os.path.join(self.get_temp_dir(), 'DbTest.sqlite') + if os.path.exists(self.db_path): + os.unlink(self.db_path) + self.db = sqlite3.connect(self.db_path) + self.create_summary_db_writer = functools.partial( + summary_ops.create_summary_db_writer, + db_uri=self.db_path, + experiment_name='experiment', + run_name='run', + user_name='user') + + def tearDown(self): + self.db.close() + super(SummaryDbTest, self).tearDown() + + def events_from_file(filepath): """Returns all events in a single event file. @@ -58,5 +83,17 @@ def events_from_logdir(logdir): """ assert gfile.Exists(logdir) files = gfile.ListDirectory(logdir) - assert len(files) == 1, "Found not exactly one file in logdir: %s" % files + assert len(files) == 1, 'Found not exactly one file in logdir: %s' % files return events_from_file(os.path.join(logdir, files[0])) + + +def get_one(db, q, *p): + return db.execute(q, p).fetchone()[0] + + +def get_all(db, q, *p): + return unroll(db.execute(q, p).fetchall()) + + +def unroll(list_of_tuples): + return sum(list_of_tuples, ()) diff --git a/tensorflow/contrib/tensor_forest/hybrid/python/models/decisions_to_data_then_nn_test.py b/tensorflow/contrib/tensor_forest/hybrid/python/models/decisions_to_data_then_nn_test.py index cccf444db8..a56beeeb2c 100644 --- a/tensorflow/contrib/tensor_forest/hybrid/python/models/decisions_to_data_then_nn_test.py +++ b/tensorflow/contrib/tensor_forest/hybrid/python/models/decisions_to_data_then_nn_test.py @@ -80,7 +80,7 @@ class DecisionsToDataThenNNTest(test_util.TensorFlowTestCase): isinstance(self.params.num_trees, tensor_forest.ForestHParams)) with variable_scope.variable_scope( - "DecisionsToDataThenNNTest_testContructionPollution"): + "DecisionsToDataThenNNTest_testConstructionPollution"): graph_builder = decisions_to_data_then_nn.DecisionsToDataThenNN( self.params) @@ -95,7 +95,7 @@ class DecisionsToDataThenNNTest(test_util.TensorFlowTestCase): for _ in range(100)]) with variable_scope.variable_scope( - "DecisionsToDataThenNNTest_testInferenceContruction"): + "DecisionsToDataThenNNTest_testInferenceConstruction"): graph_builder = decisions_to_data_then_nn.DecisionsToDataThenNN( self.params) graph = graph_builder.inference_graph(data, None) @@ -111,7 +111,7 @@ class DecisionsToDataThenNNTest(test_util.TensorFlowTestCase): labels = [1 for _ in range(100)] with variable_scope.variable_scope( - "DecisionsToDataThenNNTest_testTrainingContruction"): + "DecisionsToDataThenNNTest_testTrainingConstruction"): graph_builder = decisions_to_data_then_nn.DecisionsToDataThenNN( self.params) graph = graph_builder.training_graph(data, labels, None) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index a1d61a7932..fce0663aa5 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -455,6 +455,7 @@ tf_cuda_library( "util/mirror_pad_mode.h", "util/padding.h", "util/port.h", + "util/ptr_util.h", "util/reffed_status_callback.h", "util/saved_tensor_slice_util.h", "util/sparse/group_iterator.h", @@ -493,6 +494,11 @@ cc_library( ], ) +cc_library( + name = "ptr_util", + hdrs = ["util/ptr_util.h"], +) + cc_library( name = "reader_base", srcs = ["framework/reader_base.cc"], diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index 223dd12f8f..b620127d90 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -455,7 +455,6 @@ class Graph { // the corresponding NodeDef to reflect the change. // REQUIRES: The control edge must exist. void RemoveControlEdge(const Edge* e); - // Updates the input to a node. The existing edge to `dst` is removed and an // edge from `new_src` to `dst` is created. The NodeDef associated with `dst` // is also updated. diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 6861a51795..efe8ac05a3 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -1068,7 +1068,7 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps() { if (simplified_node != nullptr) { nodes_to_simplify.PushBack(simplified_node); } - // When `node` is simplifed to another node rather than in-place, the + // When `node` is simplified to another node rather than in-place, the // consumers of `node` are already redirected to `simplified_tensor`. // Re-push the consumers into `nodes_to_simplify` for further // optimizations. diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 9279514e6b..dcffb28513 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2583,8 +2583,13 @@ tf_kernel_library( tf_kernel_library( name = "batch_matmul_op", + srcs = [] + if_mkl([ + "mkl_batch_matmul_op.cc", + ]), prefix = "batch_matmul_op", - deps = MATH_DEPS, + deps = MATH_DEPS + if_mkl([ + "//third_party/mkl:intel_binary_blob", + ]), ) tf_kernel_library( @@ -6325,11 +6330,11 @@ cc_library( srcs = ["summary_interface.cc"], hdrs = ["summary_interface.h"], deps = [ - "//tensorflow/compiler/xla:util", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", + "//tensorflow/core:ptr_util", ], ) diff --git a/tensorflow/core/kernels/batch_matmul_op_complex.cc b/tensorflow/core/kernels/batch_matmul_op_complex.cc index a58ec02726..96216764fd 100644 --- a/tensorflow/core/kernels/batch_matmul_op_complex.cc +++ b/tensorflow/core/kernels/batch_matmul_op_complex.cc @@ -17,8 +17,10 @@ limitations under the License. namespace tensorflow { +#if !defined(INTEL_MKL) TF_CALL_complex64(REGISTER_BATCH_MATMUL_CPU); TF_CALL_complex128(REGISTER_BATCH_MATMUL_CPU); +#endif #if GOOGLE_CUDA TF_CALL_complex64(REGISTER_BATCH_MATMUL_GPU); diff --git a/tensorflow/core/kernels/batch_matmul_op_real.cc b/tensorflow/core/kernels/batch_matmul_op_real.cc index 1900ed8e31..8d155ca62b 100644 --- a/tensorflow/core/kernels/batch_matmul_op_real.cc +++ b/tensorflow/core/kernels/batch_matmul_op_real.cc @@ -17,8 +17,10 @@ limitations under the License. namespace tensorflow { +#if !defined(INTEL_MKL) TF_CALL_float(REGISTER_BATCH_MATMUL_CPU); TF_CALL_double(REGISTER_BATCH_MATMUL_CPU); +#endif TF_CALL_half(REGISTER_BATCH_MATMUL_CPU); TF_CALL_int32(REGISTER_BATCH_MATMUL_CPU); diff --git a/tensorflow/core/kernels/cwise_op_asinh.cc b/tensorflow/core/kernels/cwise_op_asinh.cc index a7673afd0b..822d72e068 100644 --- a/tensorflow/core/kernels/cwise_op_asinh.cc +++ b/tensorflow/core/kernels/cwise_op_asinh.cc @@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/tensorflow/core/kernels/decode_bmp_op.cc b/tensorflow/core/kernels/decode_bmp_op.cc index 086369a9f1..6d9fdfcf33 100644 --- a/tensorflow/core/kernels/decode_bmp_op.cc +++ b/tensorflow/core/kernels/decode_bmp_op.cc @@ -34,8 +34,10 @@ class DecodeBmpOp : public OpKernel { explicit DecodeBmpOp(OpKernelConstruction* context) : OpKernel(context) { OP_REQUIRES_OK(context, context->GetAttr("channels", &channels_)); OP_REQUIRES( - context, channels_ == 0 || channels_ == 3 || channels_ == 4, - errors::InvalidArgument("channels must be 0, 3 or 4, got ", channels_)); + context, + channels_ == 0 || channels_ == 1 || channels_ == 3 || channels_ == 4, + errors::InvalidArgument("channels must be 0, 1, 3 or 4, got ", + channels_)); } void Compute(OpKernelContext* context) override { @@ -66,11 +68,11 @@ class DecodeBmpOp : public OpKernel { channels_ = bpp / 8; } - // Current implementation only supports 3 or 4 channel + // Current implementation only supports 1, 3 or 4 channel // bitmaps. - OP_REQUIRES(context, (channels_ == 3 || channels_ == 4), + OP_REQUIRES(context, (channels_ == 1 || channels_ == 3 || channels_ == 4), errors::InvalidArgument( - "Number of channels must be 3 or 4, was ", channels_)); + "Number of channels must be 1, 3 or 4, was ", channels_)); // if height is negative, data layout is top down // otherwise, it's bottom up @@ -117,6 +119,9 @@ uint8* DecodeBmpOp::Decode(const uint8* input, uint8* const output, dst_pos = (i * width + j) * channels; switch (channels) { + case 1: + output[dst_pos] = input[src_pos]; + break; case 3: // BGR -> RGB output[dst_pos] = input[src_pos + 2]; diff --git a/tensorflow/core/kernels/dynamic_partition_op_test.cc b/tensorflow/core/kernels/dynamic_partition_op_test.cc index 0e8fbc0a67..9a7ed0af21 100644 --- a/tensorflow/core/kernels/dynamic_partition_op_test.cc +++ b/tensorflow/core/kernels/dynamic_partition_op_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include #include +#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/fake_input.h" #include "tensorflow/core/framework/node_def_builder.h" @@ -23,10 +24,14 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/graph/testlib.h" #include "tensorflow/core/kernels/ops_testutil.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/random/simple_philox.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" namespace tensorflow { namespace { @@ -153,5 +158,58 @@ TEST_F(DynamicPartitionOpTest, Error_IndexOutOfRange) { << s; } +Node* DynamicPartitionNode(Graph* g, Node* in0, Node* in1, int num_partitions) { + Node* ret; + TF_CHECK_OK(NodeBuilder(g->NewName("n"), "DynamicPartition") + .Input(in0) + .Input(in1) + .Attr("num_partitions", num_partitions) + .Finalize(g, &ret)); + return ret; +} + +template +static Graph* DynamicPartition(int num_partitions, int dim) { + Graph* g = new Graph(OpRegistry::Global()); + // Always use a 128MB buffer. + const int kRows = ((128 << 20) / sizeof(T)) / dim; + Tensor data(DataTypeToEnum::value, TensorShape({kRows, dim})); + data.flat().setRandom(); + + random::PhiloxRandom philox(301, 17); + random::SimplePhilox rnd(&philox); + Tensor partitions(DT_INT32, TensorShape({kRows})); + for (int i = 0; i < kRows; i++) { + partitions.flat()(i) = rnd.Uniform(num_partitions); + } + DynamicPartitionNode(g, test::graph::Constant(g, data), + test::graph::Constant(g, partitions), num_partitions); + return g; +} + +#define BM_DYNAMIC_PARTITION(DEVICE, T, num) \ + static void BM_##DEVICE##_dynpart_##T##_##num(int iters, int dim) { \ + const int64 items = ((128 << 20) / sizeof(T)); \ + const int64 tot = static_cast(iters) * items; \ + testing::ItemsProcessed(tot); \ + testing::UseRealTime(); \ + test::Benchmark(#DEVICE, DynamicPartition(num, dim)).Run(iters); \ + } \ + BENCHMARK(BM_##DEVICE##_dynpart_##T##_##num)->Arg(1)->Arg(256) + +BM_DYNAMIC_PARTITION(cpu, float, 2); +BM_DYNAMIC_PARTITION(cpu, float, 100); +BM_DYNAMIC_PARTITION(cpu, double, 2); +BM_DYNAMIC_PARTITION(cpu, double, 100); +BM_DYNAMIC_PARTITION(cpu, complex64, 2); +BM_DYNAMIC_PARTITION(cpu, complex64, 100); + +BM_DYNAMIC_PARTITION(gpu, float, 2); +BM_DYNAMIC_PARTITION(gpu, float, 100); +BM_DYNAMIC_PARTITION(gpu, double, 2); +BM_DYNAMIC_PARTITION(gpu, double, 100); +BM_DYNAMIC_PARTITION(gpu, complex64, 2); +BM_DYNAMIC_PARTITION(gpu, complex64, 100); + } // namespace } // namespace tensorflow diff --git a/tensorflow/core/kernels/mkl_batch_matmul_op.cc b/tensorflow/core/kernels/mkl_batch_matmul_op.cc new file mode 100644 index 0000000000..d9713075be --- /dev/null +++ b/tensorflow/core/kernels/mkl_batch_matmul_op.cc @@ -0,0 +1,238 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// See docs in ../ops/math_ops.cc. + +// This file uses MKL CBLAS batched xGEMM for acceleration of TF Batch +// Matrix-Matrix Multiplication (MatMul) operations. +// We currently register this kernel only for MKL supported data +// types (float, double, complex64, complex128). The macro INTEL_MKL is defined +// by the build system only when MKL is chosen as an option at configure stage +// and when it is undefined at build time, this file becomes an empty +// compilation unit + +#define EIGEN_USE_THREADS + +#if defined(INTEL_MKL) +#include +#include "mkl_cblas.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/numeric_types.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/type_traits.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/fill_functor.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" + +#define MKL_Complex8 tensorflow::complex64 +#define MKL_Complex16 tensorflow::complex128 + +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; + +template +class BatchMatMulMkl : public OpKernel { + public: + explicit BatchMatMulMkl(OpKernelConstruction *context) : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("adj_x", &adj_x_)); + OP_REQUIRES_OK(context, context->GetAttr("adj_y", &adj_y_)); + } + + virtual ~BatchMatMulMkl() {} + + void Compute(OpKernelContext *ctx) override { + const Tensor &lhs = ctx->input(0); + const Tensor &rhs = ctx->input(1); + OP_REQUIRES(ctx, lhs.dims() == rhs.dims(), + errors::InvalidArgument("lhs and rhs has different ndims: ", + lhs.shape().DebugString(), " vs. ", + rhs.shape().DebugString())); + const int ndims = lhs.dims(); + OP_REQUIRES( + ctx, ndims >= 2, + errors::InvalidArgument("lhs and rhs ndims must be >= 2: ", ndims)); + TensorShape out_shape; + for (int i = 0; i < ndims - 2; ++i) { + OP_REQUIRES(ctx, lhs.dim_size(i) == rhs.dim_size(i), + errors::InvalidArgument( + "lhs.dim(", i, ") and rhs.dim(", i, + ") must be the same: ", lhs.shape().DebugString(), " vs ", + rhs.shape().DebugString())); + out_shape.AddDim(lhs.dim_size(i)); + } + auto batch_size = (ndims == 2) ? 1 : out_shape.num_elements(); + auto lhs_rows = lhs.dim_size(ndims - 2); + auto lhs_cols = lhs.dim_size(ndims - 1); + auto rhs_rows = rhs.dim_size(ndims - 2); + auto rhs_cols = rhs.dim_size(ndims - 1); + if (adj_x_) std::swap(lhs_rows, lhs_cols); + if (adj_y_) std::swap(rhs_rows, rhs_cols); + OP_REQUIRES(ctx, lhs_cols == rhs_rows, + errors::InvalidArgument( + "lhs mismatch rhs shape: ", lhs_cols, " vs. ", rhs_rows, + ": ", lhs.shape().DebugString(), " ", + rhs.shape().DebugString(), " ", adj_x_, " ", adj_y_)); + out_shape.AddDim(lhs_rows); + out_shape.AddDim(rhs_cols); + Tensor *out = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, out_shape, &out)); + if (out->NumElements() == 0) { + return; + } + if (lhs.NumElements() == 0 || rhs.NumElements() == 0) { + functor::SetZeroFunctor f; + f(ctx->eigen_device(), out->flat()); + return; + } + + auto rhs_reshaped = rhs.template flat_inner_dims(); + auto lhs_reshaped = lhs.template flat_inner_dims(); + auto out_reshaped = out->template flat_inner_dims(); + const uint64 M = lhs_reshaped.dimension(adj_x_ ? 2 : 1); + const uint64 K = lhs_reshaped.dimension(adj_x_ ? 1 : 2); + const uint64 N = rhs_reshaped.dimension(adj_y_ ? 1 : 2); + + std::vector m_array(batch_size, M); + std::vector n_array(batch_size, N); + std::vector k_array(batch_size, K); + std::vector lda_array(batch_size, adj_x_ ? M : K); + std::vector ldb_array(batch_size, adj_y_ ? K : N); + std::vector ldc_array(batch_size, N); + std::vector group_size(1, batch_size); + std::vector a_array; + std::vector b_array; + std::vector c_array; + a_array.reserve(batch_size); + b_array.reserve(batch_size); + c_array.reserve(batch_size); + for (int64 i = 0; i < batch_size; i++) { + a_array.push_back(&lhs_reshaped(i, 0, 0)); + b_array.push_back(&rhs_reshaped(i, 0, 0)); + c_array.push_back(&out_reshaped(i, 0, 0)); + } + + MklCblasGemmBatch(CblasRowMajor, adj_x_, adj_y_, &m_array[0], &n_array[0], + &k_array[0], &a_array[0], &lda_array[0], &b_array[0], + &ldb_array[0], &c_array[0], &ldc_array[0], 1, + &group_size[0]); + } + + private: + bool adj_x_; + bool adj_y_; + + void MklCblasGemmBatch(const CBLAS_LAYOUT Layout, const bool TransA, + const bool TransB, const MKL_INT *M_Array, + const MKL_INT *N_Array, const MKL_INT *K_Array, + const float **A_Array, const MKL_INT *lda_Array, + const float **B_Array, const MKL_INT *ldb_Array, + float **C_Array, const MKL_INT *ldc_Array, + const MKL_INT group_count, const MKL_INT *group_size) { + std::vector TransA_Array( + group_size[0], TransA ? CblasTrans : CblasNoTrans); + std::vector TransB_Array( + group_size[0], TransB ? CblasTrans : CblasNoTrans); + std::vector alpha_Array(group_size[0], 1.0); + std::vector beta_Array(group_size[0], 0.0); + cblas_sgemm_batch(Layout, &TransA_Array[0], &TransB_Array[0], M_Array, + N_Array, K_Array, &alpha_Array[0], A_Array, lda_Array, + B_Array, ldb_Array, &beta_Array[0], C_Array, ldc_Array, + group_count, group_size); + } + + void MklCblasGemmBatch(const CBLAS_LAYOUT Layout, const bool TransA, + const bool TransB, const MKL_INT *M_Array, + const MKL_INT *N_Array, const MKL_INT *K_Array, + const double **A_Array, const MKL_INT *lda_Array, + const double **B_Array, const MKL_INT *ldb_Array, + double **C_Array, const MKL_INT *ldc_Array, + const MKL_INT group_count, const MKL_INT *group_size) { + std::vector TransA_array( + group_size[0], TransA ? CblasTrans : CblasNoTrans); + std::vector TransB_array( + group_size[0], TransB ? CblasTrans : CblasNoTrans); + std::vector alpha_Array(group_size[0], 1.0); + std::vector beta_Array(group_size[0], 0.0); + cblas_dgemm_batch(Layout, &TransA_array[0], &TransB_array[0], M_Array, + N_Array, K_Array, &alpha_Array[0], A_Array, lda_Array, + B_Array, ldb_Array, &beta_Array[0], C_Array, ldc_Array, + group_count, group_size); + } + + void MklCblasGemmBatch(const CBLAS_LAYOUT Layout, const bool TransA, + const bool TransB, const MKL_INT *M_Array, + const MKL_INT *N_Array, const MKL_INT *K_Array, + const MKL_Complex8 **A_Array, const MKL_INT *lda_Array, + const MKL_Complex8 **B_Array, const MKL_INT *ldb_Array, + MKL_Complex8 **C_Array, const MKL_INT *ldc_Array, + const MKL_INT group_count, const MKL_INT *group_size) { + std::vector TransA_array( + group_size[0], TransA ? CblasConjTrans : CblasNoTrans); + std::vector TransB_array( + group_size[0], TransB ? CblasConjTrans : CblasNoTrans); + std::vector alpha_Array(group_size[0], {1.0f, 0.0f}); + std::vector beta_Array(group_size[0], {0.0f, 0.0f}); + cblas_cgemm_batch( + Layout, &TransA_array[0], &TransB_array[0], M_Array, N_Array, K_Array, + static_cast(&alpha_Array[0]), + reinterpret_cast(A_Array), lda_Array, + reinterpret_cast(B_Array), ldb_Array, + static_cast(&beta_Array[0]), + reinterpret_cast(C_Array), ldc_Array, group_count, group_size); + } + + void MklCblasGemmBatch(const CBLAS_LAYOUT Layout, const bool TransA, + const bool TransB, const MKL_INT *M_Array, + const MKL_INT *N_Array, const MKL_INT *K_Array, + const MKL_Complex16 **A_Array, + const MKL_INT *lda_Array, + const MKL_Complex16 **B_Array, + const MKL_INT *ldb_Array, MKL_Complex16 **C_Array, + const MKL_INT *ldc_Array, const MKL_INT group_count, + const MKL_INT *group_size) { + std::vector TransA_array( + group_size[0], TransA ? CblasConjTrans : CblasNoTrans); + std::vector TransB_array( + group_size[0], TransB ? CblasConjTrans : CblasNoTrans); + std::vector alpha_Array(group_size[0], {1.0f, 0.0f}); + std::vector beta_Array(group_size[0], {0.0f, 0.0f}); + cblas_zgemm_batch( + Layout, &TransA_array[0], &TransB_array[0], M_Array, N_Array, K_Array, + static_cast(&alpha_Array[0]), + reinterpret_cast(A_Array), lda_Array, + reinterpret_cast(B_Array), ldb_Array, + static_cast(&beta_Array[0]), + reinterpret_cast(C_Array), ldc_Array, group_count, group_size); + } +}; + +#define REGISTER_BATCH_MATMUL_MKL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("BatchMatMul").Device(DEVICE_CPU).TypeConstraint("T"), \ + BatchMatMulMkl) + +TF_CALL_float(REGISTER_BATCH_MATMUL_MKL); +TF_CALL_double(REGISTER_BATCH_MATMUL_MKL); +TF_CALL_complex64(REGISTER_BATCH_MATMUL_MKL); +TF_CALL_complex128(REGISTER_BATCH_MATMUL_MKL); + +} // end namespace tensorflow +#endif diff --git a/tensorflow/core/kernels/prefetch_dataset_op.cc b/tensorflow/core/kernels/prefetch_dataset_op.cc index 1a6b7e078e..b02269f525 100644 --- a/tensorflow/core/kernels/prefetch_dataset_op.cc +++ b/tensorflow/core/kernels/prefetch_dataset_op.cc @@ -37,6 +37,8 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel { int64 buffer_size; OP_REQUIRES_OK( ctx, ParseScalarArgument(ctx, "buffer_size", &buffer_size)); + OP_REQUIRES(ctx, buffer_size > 0, + errors::InvalidArgument("buffer_size must be > 0")); *output = new Dataset(ctx, input, buffer_size); } diff --git a/tensorflow/core/kernels/summary_interface.cc b/tensorflow/core/kernels/summary_interface.cc index ad28d77ffd..97c0c2c099 100644 --- a/tensorflow/core/kernels/summary_interface.cc +++ b/tensorflow/core/kernels/summary_interface.cc @@ -16,7 +16,6 @@ limitations under the License. #include -#include "tensorflow/compiler/xla/ptr_util.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/resource_mgr.h" @@ -28,6 +27,7 @@ limitations under the License. #include "tensorflow/core/lib/png/png_io.h" #include "tensorflow/core/lib/wav/wav_io.h" #include "tensorflow/core/util/events_writer.h" +#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { namespace { @@ -229,7 +229,7 @@ class SummaryWriterImpl : public SummaryWriterInterface { } mutex_lock ml(mu_); events_writer_ = - xla::MakeUnique(io::JoinPath(logdir, "events")); + tensorflow::MakeUnique(io::JoinPath(logdir, "events")); if (!events_writer_->InitWithSuffix(filename_suffix)) { return errors::Unknown("Could not initialize events writer."); } diff --git a/tensorflow/core/lib/io/path.cc b/tensorflow/core/lib/io/path.cc index d93dd0296e..83f15e134d 100644 --- a/tensorflow/core/lib/io/path.cc +++ b/tensorflow/core/lib/io/path.cc @@ -14,8 +14,22 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/lib/io/path.h" + +#include +#include +#include +#include +#include +#if !defined(PLATFORM_WINDOWS) +#include +#endif + +#include + #include "tensorflow/core/lib/strings/scanner.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/env.h" namespace tensorflow { namespace io { @@ -60,8 +74,7 @@ std::pair SplitPath(StringPiece uri) { auto pos = path.rfind('/'); #ifdef PLATFORM_WINDOWS - if (pos == StringPiece::npos) - pos = path.rfind('\\'); + if (pos == StringPiece::npos) pos = path.rfind('\\'); #endif // Handle the case with no '/' in 'path'. if (pos == StringPiece::npos) @@ -112,7 +125,7 @@ StringPiece Extension(StringPiece path) { string CleanPath(StringPiece unclean_path) { string path = unclean_path.ToString(); - const char *src = path.c_str(); + const char* src = path.c_str(); string::iterator dst = path.begin(); // Check for absolute path and determine initial backtrack limit. @@ -229,5 +242,52 @@ string CreateURI(StringPiece scheme, StringPiece host, StringPiece path) { return strings::StrCat(scheme, "://", host, path); } +// Returns a unique number every time it is called. +int64 UniqueId() { + static mutex mu(LINKER_INITIALIZED); + static int64 id = 0; + mutex_lock l(mu); + return ++id; +} + +string GetTempFilename(const string& extension) { +#if defined(PLATFORM_WINDOWS) || defined(__ANDROID__) + LOG(FATAL) << "GetTempFilename is not implemented in this platform."; +#else + for (const char* dir : std::vector( + {getenv("TEST_TMPDIR"), getenv("TMPDIR"), getenv("TMP"), "/tmp"})) { + if (!dir || !dir[0]) { + continue; + } + struct stat statbuf; + if (!stat(dir, &statbuf) && S_ISDIR(statbuf.st_mode)) { + // UniqueId is added here because mkstemps is not as thread safe as it + // looks. https://github.com/tensorflow/tensorflow/issues/5804 shows + // the problem. + string tmp_filepath; + int fd; + if (extension.length()) { + tmp_filepath = io::JoinPath( + dir, strings::StrCat("tmp_file_tensorflow_", UniqueId(), "_XXXXXX.", + extension)); + fd = mkstemps(&tmp_filepath[0], extension.length() + 1); + } else { + tmp_filepath = io::JoinPath( + dir, + strings::StrCat("tmp_file_tensorflow_", UniqueId(), "_XXXXXX")); + fd = mkstemp(&tmp_filepath[0]); + } + if (fd < 0) { + LOG(FATAL) << "Failed to create temp file."; + } else { + close(fd); + return tmp_filepath; + } + } + } + LOG(FATAL) << "No temp directory found."; +#endif +} + } // namespace io } // namespace tensorflow diff --git a/tensorflow/core/lib/io/path.h b/tensorflow/core/lib/io/path.h index 8d02baa5bb..47bb2b998d 100644 --- a/tensorflow/core/lib/io/path.h +++ b/tensorflow/core/lib/io/path.h @@ -89,6 +89,9 @@ void ParseURI(tensorflow::StringPiece uri, tensorflow::StringPiece* scheme, string CreateURI(tensorflow::StringPiece scheme, tensorflow::StringPiece host, tensorflow::StringPiece path); +// Creates a temporary file name with an extension. +string GetTempFilename(const string& extension); + } // namespace io } // namespace tensorflow diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index d7afd02df6..ceda11663a 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -2333,11 +2333,25 @@ REGISTER_OP("Cross") .Input("b: T") .Output("product: T") .Attr("T: realnumbertype") - // TODO(cwhipkey): implement these shape inference constraints here: - // * Both inputs have the same shape. - // * Input rank >= 1. - // * input_shape[-1] == 3. - .SetShapeFn(shape_inference::UnchangedShape) + .SetShapeFn([](InferenceContext* c) { + ShapeHandle a_shape; + ShapeHandle b_shape; + // * Input rank >= 1. + TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 1, &a_shape)); + TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(1), 1, &b_shape)); + + // * Both inputs have the same shape. + TF_RETURN_IF_ERROR(c->Merge(a_shape, b_shape, &a_shape)); + + // * input_shape[-1] == 3. + if (c->RankKnown(a_shape)) { + int rank = c->Rank(a_shape); + auto dim = c->Dim(a_shape, rank - 1); + TF_RETURN_IF_ERROR(c->WithValue(dim, 3, &dim)); + } + c->set_output(0, a_shape); + return Status::OK(); + }) .Doc(R"doc( Compute the pairwise cross product. diff --git a/tensorflow/core/ops/math_ops_test.cc b/tensorflow/core/ops/math_ops_test.cc index 28f9969de5..3dfa776d26 100644 --- a/tensorflow/core/ops/math_ops_test.cc +++ b/tensorflow/core/ops/math_ops_test.cc @@ -515,4 +515,15 @@ TEST(MathOpstest, RequantizationRange_ShapeFn) { INFER_ERROR("must be rank 0", op, "?;?;[2]"); } +TEST(MathOpsTest, Cross_ShapeFn) { + ShapeInferenceTestOp op("Cross"); + + INFER_ERROR("Shape must be at least rank 1 but is rank 0", op, "[];[]"); + INFER_ERROR("Dimension 0 in both shapes must be equal, but", op, "[3];[5]"); + INFER_ERROR("Dimension must be 3 but", op, "[3,5];[3,5]"); + + INFER_OK(op, "?;?", "?"); + INFER_OK(op, "[?];[?]", "in0"); + INFER_OK(op, "[1,?,3];[?,?,?]", "in0"); +} } // end namespace tensorflow diff --git a/tensorflow/core/platform/default/build_config_root.bzl b/tensorflow/core/platform/default/build_config_root.bzl index c63fb28ff9..6e98f12114 100644 --- a/tensorflow/core/platform/default/build_config_root.bzl +++ b/tensorflow/core/platform/default/build_config_root.bzl @@ -10,7 +10,9 @@ def tf_sycl_tests_tags(): def tf_additional_plugin_deps(): return select({ - "//tensorflow:with_xla_support": ["//tensorflow/compiler/jit"], + str(Label("//tensorflow:with_xla_support")): [ + str(Label("//tensorflow/compiler/jit")) + ], "//conditions:default": [], }) @@ -19,37 +21,37 @@ def tf_additional_xla_deps_py(): def tf_additional_license_deps(): return select({ - "//tensorflow:with_xla_support": ["@llvm//:LICENSE.TXT"], + str(Label("//tensorflow:with_xla_support")): ["@llvm//:LICENSE.TXT"], "//conditions:default": [], }) def tf_additional_verbs_deps(): return select({ - "//tensorflow:with_verbs_support": [ - "//tensorflow/contrib/verbs:verbs_server_lib", - "//tensorflow/contrib/verbs:grpc_verbs_client", + str(Label("//tensorflow:with_verbs_support")): [ + str(Label("//tensorflow/contrib/verbs:verbs_server_lib")), + str(Label("//tensorflow/contrib/verbs:grpc_verbs_client")), ], "//conditions:default": [], }) def tf_additional_mpi_deps(): return select({ - "//tensorflow:with_mpi_support": [ - "//tensorflow/contrib/mpi:mpi_server_lib", + str(Label("//tensorflow:with_mpi_support")): [ + str(Label("//tensorflow/contrib/mpi:mpi_server_lib")), ], "//conditions:default": [], }) def tf_additional_gdr_deps(): return select({ - "//tensorflow:with_gdr_support": [ - "//tensorflow/contrib/gdr:gdr_server_lib", + str(Label("//tensorflow:with_gdr_support")): [ + str(Label("//tensorflow/contrib/gdr:gdr_server_lib")), ], "//conditions:default": [], }) def if_static(extra_deps, otherwise=[]): return select({ - "//tensorflow:framework_shared_object": otherwise, + str(Label("//tensorflow:framework_shared_object")): otherwise, "//conditions:default": extra_deps, }) diff --git a/tensorflow/core/util/ptr_util.h b/tensorflow/core/util/ptr_util.h new file mode 100644 index 0000000000..f902b3ffa1 --- /dev/null +++ b/tensorflow/core/util/ptr_util.h @@ -0,0 +1,80 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_UTIL_PTR_UTIL_H_ +#define TENSORFLOW_CORE_UTIL_PTR_UTIL_H_ + +// Utility functions for pointers. + +#include + +#include +#include +#include + +namespace tensorflow { + +namespace helper { + +// Trait to select overloads and return types for MakeUnique. +template +struct MakeUniqueResult { + using scalar = std::unique_ptr; +}; +template +struct MakeUniqueResult { + using array = std::unique_ptr; +}; +template +struct MakeUniqueResult { + using invalid = void; +}; + +} // namespace helper + +// Transfers ownership of a raw pointer to a std::unique_ptr of deduced type. +// Example: +// X* NewX(int, int); +// auto x = WrapUnique(NewX(1, 2)); // 'x' is std::unique_ptr. +// +// WrapUnique is useful for capturing the output of a raw pointer factory. +// However, prefer 'MakeUnique(args...) over 'WrapUnique(new T(args...))'. +// auto x = WrapUnique(new X(1, 2)); // works, but nonideal. +// auto x = MakeUnique(1, 2); // safer, standard, avoids raw 'new'. +// +// Note: Cannot wrap pointers to array of unknown bound (i.e. U(*)[]). +template +std::unique_ptr WrapUnique(T* ptr) { + static_assert(!std::is_array::value || std::extent::value != 0, + "types T[0] or T[] are unsupported"); + return std::unique_ptr(ptr); +} + +template +typename helper::MakeUniqueResult::scalar MakeUnique(Args&&... args) { + return std::unique_ptr(new T(std::forward(args)...)); +} + +// Overload for array of unknown bound. +// The allocation of arrays needs to use the array form of new, +// and cannot take element constructor arguments. +template +typename helper::MakeUniqueResult::array MakeUnique(size_t n) { + return std::unique_ptr(new typename std::remove_extent::type[n]()); +} + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_UTIL_PTR_UTIL_H_ diff --git a/tensorflow/docs_src/extend/adding_an_op.md b/tensorflow/docs_src/extend/adding_an_op.md index 15d6d77f5e..c52279b212 100644 --- a/tensorflow/docs_src/extend/adding_an_op.md +++ b/tensorflow/docs_src/extend/adding_an_op.md @@ -341,9 +341,9 @@ Assuming you have `g++` installed, here is the sequence of commands you can use to compile your op into a dynamic library. ```bash -TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') -TF_LIB=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') -g++ -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC -I$TF_INC -I$TF_INC/external/nsync/public -L$TF_LIB -ltensorflow_framework -O2 +TF_CFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') ) +TF_LFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') ) +g++ -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2 ``` On Mac OS X, the additional flag "-undefined dynamic_lookup" is required when @@ -451,17 +451,17 @@ Now that you know how to build a basic (and somewhat restricted) op and implementation, we'll look at some of the more complicated things you will typically need to build into your op. This includes: -* [Conditional checks and validation](#conditional_checks_and_validation) -* [Op registration](#op_registration) +* [Conditional checks and validation](#conditional-checks-and-validation) +* [Op registration](#op-registration) * [Attrs](#attrs) - * [Attr types](#attr_types) + * [Attr types](#attr-types) * [Polymorphism](#polymorphism) - * [Inputs and outputs](#inputs_and_outputs) - * [Backwards compatibility](#backwards_compatibility) -* [GPU support](#gpu_support) - * [Compiling the kernel for the GPU device](#compiling_the_kernel_for_the_gpu_device) -* [Implement the gradient in Python](#implement_the_gradient_in_python) -* [Shape functions in C++](#shape_functions_in_c) + * [Inputs and outputs](#inputs-and-outputs) + * [Backwards compatibility](#backwards-compatibility) +* [GPU support](#gpu-support) + * [Compiling the kernel for the GPU device](#compiling-the-kernel-for-the-gpu-device) +* [Implement the gradient in Python](#implement-the-gradient-in-python) +* [Shape functions in C++](#shape-functions-in-c) ### Conditional checks and validation @@ -1228,10 +1228,10 @@ into a single dynamically loadable library: ```bash nvcc -std=c++11 -c -o cuda_op_kernel.cu.o cuda_op_kernel.cu.cc \ --I $TF_INC -I$TF_INC/external/nsync/public -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC + ${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC g++ -std=c++11 -shared -o cuda_op_kernel.so cuda_op_kernel.cc \ -cuda_op_kernel.cu.o -I $TF_INC -I$TF_INC/external/nsync/public -fPIC -lcudart -L$TF_LIB -ltensorflow_framework + cuda_op_kernel.cu.o ${TF_CFLAGS[@]} -fPIC -lcudart ${TF_LFLAGS[@]} ``` `cuda_op_kernel.so` produced above can be loaded as usual in Python, using the diff --git a/tensorflow/docs_src/get_started/input_fn.md b/tensorflow/docs_src/get_started/input_fn.md index 0db5c6143a..f0dcdc47ff 100644 --- a/tensorflow/docs_src/get_started/input_fn.md +++ b/tensorflow/docs_src/get_started/input_fn.md @@ -211,8 +211,8 @@ def get_input_fn_from_numpy(data_set, num_epochs=None, shuffle=True): ### A Neural Network Model for Boston House Values In the remainder of this tutorial, you'll write an input function for -preprocessing a subset of Boston housing data pulled from the [UCI Housing Data -Set](https://archive.ics.uci.edu/ml/datasets/Housing) and use it to feed data to +preprocessing a subset of Boston housing data pulled from the UCI Housing Data +Set and use it to feed data to a neural network regressor for predicting median house values. The [Boston CSV data sets](#setup) you'll use to train your neural network diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md index 63742828b0..8d0eb7966f 100644 --- a/tensorflow/docs_src/install/install_windows.md +++ b/tensorflow/docs_src/install/install_windows.md @@ -84,7 +84,7 @@ install it now: * [Python 3.5.x 64-bit from python.org](https://www.python.org/downloads/release/python-352/) * [Python 3.6.x 64-bit from python.org](https://www.python.org/downloads/release/python-362/) --TensorFlow supports Python 3.5.x and 3.6.x on Windows. +TensorFlow supports Python 3.5.x and 3.6.x on Windows. Note that Python 3 comes with the pip3 package manager, which is the program you'll use to install TensorFlow. @@ -98,7 +98,6 @@ To install the GPU version of TensorFlow, enter the following command:
    C:\> pip3 install --upgrade tensorflow-gpu
    - ## Installing with Anaconda **The Anaconda installation is community supported, not officially supported.** @@ -219,6 +218,11 @@ ImportError: cannot import name 'descriptor'
  • +
    + + +
    Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
    tensorflow-1.4.0rc1CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
    tensorflow_gpu-1.4.0rc1GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.368
    tensorflow-1.4.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
    tensorflow_gpu-1.4.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.368
    tensorflow-1.3.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
    tensorflow_gpu-1.3.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.368
    tensorflow-1.2.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
    tensorflow_gpu-1.2.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.35.18
    tensorflow-1.1.0CPU3.5MSVC 2015 update 3Cmake v3.6.3N/AN/A
    38896424 +
    Could not find a version that satisfies the requirement tensorflow
    +
    - diff --git a/tensorflow/docs_src/mobile/ios_build.md b/tensorflow/docs_src/mobile/ios_build.md index a04655052f..4c84a1214a 100644 --- a/tensorflow/docs_src/mobile/ios_build.md +++ b/tensorflow/docs_src/mobile/ios_build.md @@ -98,7 +98,7 @@ There are three demo applications for iOS, all defined in Xcode projects inside ## Building the TensorFlow iOS libraries from source -While Cocapods is the quickest and easiest way of getting started, you sometimes +While Cocoapods is the quickest and easiest way of getting started, you sometimes need more flexibility to determine which parts of TensorFlow your app should be shipped with. For such cases, you can build the iOS libraries from the sources. [This diff --git a/tensorflow/docs_src/mobile/mobile_intro.md b/tensorflow/docs_src/mobile/mobile_intro.md index 3a002c4da2..17dbf1c3e6 100644 --- a/tensorflow/docs_src/mobile/mobile_intro.md +++ b/tensorflow/docs_src/mobile/mobile_intro.md @@ -156,7 +156,7 @@ easy cases on device. Doing on-device computation can also signal when it's time to switch to working on the cloud. A good example of this is hotword detection in speech. Since devices are able to constantly listen out for the keywords, this then triggers a -lot of traffic to cloud-based speech recognition once one is recognised. Without +lot of traffic to cloud-based speech recognition once one is recognized. Without the on-device component, the whole application wouldn’t be feasible, and this pattern exists across several other applications as well. Recognizing that some sensor input is interesting enough for further processing makes a lot of diff --git a/tensorflow/docs_src/mobile/optimizing.md b/tensorflow/docs_src/mobile/optimizing.md index d9e8875c38..44cacff5db 100644 --- a/tensorflow/docs_src/mobile/optimizing.md +++ b/tensorflow/docs_src/mobile/optimizing.md @@ -115,7 +115,7 @@ If you look at the resulting file size, you should see that it’s about a quart of the original at 23MB. Another transform is `round_weights`, which doesn't make the file smaller, but it -makes the file compressable to about the same size as when `quantize_weights` is +makes the file compressible to about the same size as when `quantize_weights` is used. This is particularly useful for mobile development, taking advantage of the fact that app bundles are compressed before they’re downloaded by consumers. diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index 4333f94486..a49973d550 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -776,7 +776,7 @@ The output type is a tuple of three ComputationDataHandles: | `batch_var` | `ComputationDataHandle` | 1 dimensional array (\\(\sigma^2\\)) | The `batch_mean` and `batch_var` are moments calculated across the batch and -spatial dimensions using the formulars above. +spatial dimensions using the formulas above. ## BatchNormInference diff --git a/tensorflow/docs_src/tutorials/image_recognition.md b/tensorflow/docs_src/tutorials/image_recognition.md index df13eabead..32257f87d6 100644 --- a/tensorflow/docs_src/tutorials/image_recognition.md +++ b/tensorflow/docs_src/tutorials/image_recognition.md @@ -5,7 +5,7 @@ tell apart a lion and a jaguar, read a sign, or recognize a human's face. But these are actually hard problems to solve with a computer: they only seem easy because our brains are incredibly good at understanding images. -In the last few years the field of machine learning has made tremendous +In the last few years, the field of machine learning has made tremendous progress on addressing these difficult problems. In particular, we've found that a kind of model called a deep [convolutional neural network](https://colah.github.io/posts/2014-07-Conv-Nets-Modular/) diff --git a/tensorflow/examples/how_tos/reading_data/convert_to_records.py b/tensorflow/examples/how_tos/reading_data/convert_to_records.py index d14c1f7c86..c89e839563 100644 --- a/tensorflow/examples/how_tos/reading_data/convert_to_records.py +++ b/tensorflow/examples/how_tos/reading_data/convert_to_records.py @@ -52,17 +52,19 @@ def convert_to(data_set, name): filename = os.path.join(FLAGS.directory, name + '.tfrecords') print('Writing', filename) - writer = tf.python_io.TFRecordWriter(filename) - for index in range(num_examples): - image_raw = images[index].tostring() - example = tf.train.Example(features=tf.train.Features(feature={ - 'height': _int64_feature(rows), - 'width': _int64_feature(cols), - 'depth': _int64_feature(depth), - 'label': _int64_feature(int(labels[index])), - 'image_raw': _bytes_feature(image_raw)})) - writer.write(example.SerializeToString()) - writer.close() + with tf.python_io.TFRecordWriter(filename) as writer: + for index in range(num_examples): + image_raw = images[index].tostring() + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'height': _int64_feature(rows), + 'width': _int64_feature(cols), + 'depth': _int64_feature(depth), + 'label': _int64_feature(int(labels[index])), + 'image_raw': _bytes_feature(image_raw) + })) + writer.write(example.SerializeToString()) def main(unused_argv): diff --git a/tensorflow/examples/speech_commands/input_data.py b/tensorflow/examples/speech_commands/input_data.py index 6d75fbb92b..751652b330 100644 --- a/tensorflow/examples/speech_commands/input_data.py +++ b/tensorflow/examples/speech_commands/input_data.py @@ -240,7 +240,8 @@ class AudioProcessor(object): # Look through all the subfolders to find audio samples search_path = os.path.join(self.data_dir, '*', '*.wav') for wav_path in gfile.Glob(search_path): - word = re.search('.*/([^/]+)/.*.wav', wav_path).group(1).lower() + _, word = os.path.split(os.path.dirname(wav_path)) + word = word.lower() # Treat the '_background_noise_' folder as a special case, since we expect # it to contain long audio samples we mix in to improve training. if word == BACKGROUND_NOISE_DIR_NAME: diff --git a/tensorflow/examples/speech_commands/train.py b/tensorflow/examples/speech_commands/train.py index a54bcbdb32..f5bf04305a 100644 --- a/tensorflow/examples/speech_commands/train.py +++ b/tensorflow/examples/speech_commands/train.py @@ -156,7 +156,8 @@ def main(_): predicted_indices = tf.argmax(logits, 1) expected_indices = tf.argmax(ground_truth_input, 1) correct_prediction = tf.equal(predicted_indices, expected_indices) - confusion_matrix = tf.confusion_matrix(expected_indices, predicted_indices) + confusion_matrix = tf.confusion_matrix( + expected_indices, predicted_indices, num_classes=label_count) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', evaluation_step) diff --git a/tensorflow/examples/udacity/1_notmnist.ipynb b/tensorflow/examples/udacity/1_notmnist.ipynb index 39674e1aa4..dffe5d37c6 100644 --- a/tensorflow/examples/udacity/1_notmnist.ipynb +++ b/tensorflow/examples/udacity/1_notmnist.ipynb @@ -46,13 +46,13 @@ "# These are all the modules we'll be using later. Make sure you can import them\n", "# before proceeding further.\n", "from __future__ import print_function\n", + "import imageio\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import os\n", "import sys\n", "import tarfile\n", "from IPython.display import display, Image\n", - "from scipy import ndimage\n", "from sklearn.linear_model import LogisticRegression\n", "from six.moves.urllib.request import urlretrieve\n", "from six.moves import cPickle as pickle\n", @@ -325,13 +325,13 @@ " for image in image_files:\n", " image_file = os.path.join(folder, image)\n", " try:\n", - " image_data = (ndimage.imread(image_file).astype(float) - \n", + " image_data = (imageio.imread(image_file).astype(float) - \n", " pixel_depth / 2) / pixel_depth\n", " if image_data.shape != (image_size, image_size):\n", " raise Exception('Unexpected image shape: %s' % str(image_data.shape))\n", " dataset[num_images, :, :] = image_data\n", " num_images = num_images + 1\n", - " except IOError as e:\n", + " except (IOError, ValueError) as e:\n", " print('Could not read:', image_file, ':', e, '- it\\'s ok, skipping.')\n", " \n", " dataset = dataset[0:num_images, :, :]\n", diff --git a/tensorflow/java/src/main/java/org/tensorflow/OperationBuilder.java b/tensorflow/java/src/main/java/org/tensorflow/OperationBuilder.java index 9a1b7592b3..a24150484e 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/OperationBuilder.java +++ b/tensorflow/java/src/main/java/org/tensorflow/OperationBuilder.java @@ -265,6 +265,36 @@ public final class OperationBuilder { return this; } + public OperationBuilder setAttr(String name, Shape[] value) { + int[] numDimensions = new int[value.length]; + int totalNumDimensions = 0; + for (int idx = 0; idx < value.length; ++idx) { + int n = value[idx].numDimensions(); + numDimensions[idx] = n; + if (n > 0) { + totalNumDimensions += n; + } + } + // Flatten the shapes into a single array to avoid too much overhead in the + // native part + long[] shapes = new long[totalNumDimensions]; + int shapeIdx = 0; + for (Shape shape : value) { + if (shape.numDimensions() > 0) { + for (long dim : shape.asArray()) { + shapes[shapeIdx++] = dim; + } + } + } + Graph.Reference r = graph.ref(); + try { + setAttrShapeList(unsafeNativeHandle, name, shapes, numDimensions); + } finally { + r.close(); + } + return this; + } + public OperationBuilder setAttr(String name, String[] value) { Charset utf8 = Charset.forName("UTF-8"); Object[] objects = new Object[value.length]; @@ -297,8 +327,6 @@ public final class OperationBuilder { // The names of all the setAttr* family functions below correspond to the C library types, not the // Java library types. Roughly, setAttrFoo calls the TensorFlow C library function: TF_SetAttrFoo. - // TODO(ashankar): - // - setAttrShapeList: Which would take in a long[][] private static native void setAttrString(long handle, String name, byte[] value); @@ -324,5 +352,8 @@ public final class OperationBuilder { private static native void setAttrShape(long handle, String name, long[] shape, int numDims); + private static native void setAttrShapeList( + long handle, String name, long[] shapes, int[] numDims); + private static native void setAttrStringList(long handle, String name, Object[] value); } diff --git a/tensorflow/java/src/main/native/operation_builder_jni.cc b/tensorflow/java/src/main/native/operation_builder_jni.cc index e03be7b110..55d214a7c4 100644 --- a/tensorflow/java/src/main/native/operation_builder_jni.cc +++ b/tensorflow/java/src/main/native/operation_builder_jni.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/java/src/main/native/operation_builder_jni.h" +#include #include #include "tensorflow/c/c_api.h" #include "tensorflow/java/src/main/native/exception_jni.h" @@ -262,6 +263,41 @@ JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrShape( env->ReleaseStringUTFChars(name, cname); } +JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrShapeList( + JNIEnv* env, jclass clazz, jlong handle, jstring name, jlongArray shapes, + jintArray num_dims) { + TF_OperationDescription* d = requireHandle(env, handle); + if (d == nullptr) return; + std::unique_ptr cshapes; + std::unique_ptr cdims; + std::unique_ptr cnum_dims; + const int num_dims_length = env->GetArrayLength(num_dims); + if (num_dims_length > 0) { + const int shapes_length = env->GetArrayLength(shapes); + cshapes.reset(new int64_t[shapes_length]); + cdims.reset(new int64_t*[num_dims_length]); + cnum_dims.reset(new int[num_dims_length]); + jlong* shapes_elems = + static_cast(env->GetPrimitiveArrayCritical(shapes, nullptr)); + std::memcpy(cshapes.get(), shapes_elems, shapes_length << 3); + env->ReleasePrimitiveArrayCritical(shapes, shapes_elems, JNI_ABORT); + int64_t* cshapes_ptr = cshapes.get(); + jint* num_dims_elems = + static_cast(env->GetPrimitiveArrayCritical(num_dims, nullptr)); + for (int i = 0; i < num_dims_length; ++i) { + cnum_dims[i] = static_cast(num_dims_elems[i]); + cdims[i] = cshapes_ptr; + if (cnum_dims[i] > 0) { + cshapes_ptr += cnum_dims[i]; + } + } + env->ReleasePrimitiveArrayCritical(num_dims, num_dims_elems, JNI_ABORT); + } + const char* cname = env->GetStringUTFChars(name, nullptr); + TF_SetAttrShapeList(d, cname, cdims.get(), cnum_dims.get(), num_dims_length); + env->ReleaseStringUTFChars(name, cname); +} + JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrStringList( JNIEnv* env, jclass object, jlong handle, jstring name, jobjectArray values) { diff --git a/tensorflow/java/src/main/native/operation_builder_jni.h b/tensorflow/java/src/main/native/operation_builder_jni.h index 2e72bd68da..cf0abe4829 100644 --- a/tensorflow/java/src/main/native/operation_builder_jni.h +++ b/tensorflow/java/src/main/native/operation_builder_jni.h @@ -169,6 +169,14 @@ JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrTensorList( JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrShape( JNIEnv *, jclass, jlong, jstring, jlongArray, jint); +/* + * Class: org_tensorflow_OperationBuilder + * Method: setAttrShapeList + * Signature: (JLjava/lang/String;[J[I)V + */ +JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrShapeList( + JNIEnv *, jclass, jlong, jstring, jlongArray, jintArray); + /* * Class: org_tensorflow_OperationBuilder * Method: setAttrStringList diff --git a/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java b/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java index 6dc233987b..0a4a8cf4e3 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java @@ -148,6 +148,19 @@ public class OperationBuilderTest { } } + @Test + public void setAttrShapeList() { + // Those shapes match tensors ones, so no exception is thrown + testSetAttrShapeList(new Shape[] {Shape.make(2, 2), Shape.make(2, 2, 2)}); + try { + // Those shapes do not match tensors ones, exception is thrown + testSetAttrShapeList(new Shape[] {Shape.make(2, 2), Shape.make(2, 2, 2, 2)}); + fail("Shapes are incompatible and an exception was expected"); + } catch (IllegalArgumentException e) { + // expected + } + } + @Test public void addControlInput() { try (Graph g = new Graph(); @@ -175,6 +188,30 @@ public class OperationBuilderTest { } } + private static void testSetAttrShapeList(Shape[] shapes) { + try (Graph g = new Graph(); + Session s = new Session(g)) { + int[][] matrix = new int[][] {{0, 0}, {0, 0}}; + Output queue = + g.opBuilder("FIFOQueue", "queue") + .setAttr("component_types", new DataType[] {DataType.INT32, DataType.INT32}) + .setAttr("shapes", shapes) + .build() + .output(0); + assertTrue(hasNode(g, "queue")); + Output c1 = TestUtil.constant(g, "const1", matrix); + Output c2 = TestUtil.constant(g, "const2", new int[][][] {matrix, matrix}); + Operation enqueue = + g.opBuilder("QueueEnqueue", "enqueue") + .addInput(queue) + .addInputList(new Output[] {c1, c2}) + .build(); + assertTrue(hasNode(g, "enqueue")); + + s.runner().addTarget(enqueue).run(); + } + } + private static boolean hasNode(Graph g, String name) { return g.operation(name) != null; } diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 5e7a6c0b59..12d81c4383 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3341,6 +3341,7 @@ py_test( tags = [ "no_gpu", "no_oss", + "no_pip", "no_pip_gpu", "notap", ], @@ -3387,6 +3388,7 @@ py_test( srcs_version = "PY2AND3", tags = [ "no_gpu", + "no_windows", ], deps = [ ":array_ops", diff --git a/tensorflow/python/data/util/nest.py b/tensorflow/python/data/util/nest.py index 2f89c006d2..2455395635 100644 --- a/tensorflow/python/data/util/nest.py +++ b/tensorflow/python/data/util/nest.py @@ -376,6 +376,16 @@ def assert_shallow_structure(shallow_tree, input_tree, check_types=True): "structure has length %s, while shallow structure has length %s." % (len(input_tree), len(shallow_tree))) + if check_types and isinstance(shallow_tree, dict): + if set(input_tree) != set(shallow_tree): + raise ValueError( + "The two structures don't have the same keys. Input " + "structure has keys %s, while shallow structure has keys %s." % + (list(_six.iterkeys(input_tree)), + list(_six.iterkeys(shallow_tree)))) + input_tree = list(_six.iteritems(input_tree)) + shallow_tree = list(_six.iteritems(shallow_tree)) + for shallow_branch, input_branch in zip(shallow_tree, input_tree): assert_shallow_structure(shallow_branch, input_branch, check_types=check_types) diff --git a/tensorflow/python/data/util/nest_test.py b/tensorflow/python/data/util/nest_test.py index 0bd0a5f443..90dd7dfe77 100644 --- a/tensorflow/python/data/util/nest_test.py +++ b/tensorflow/python/data/util/nest_test.py @@ -268,6 +268,15 @@ class NestTest(test.TestCase): nest.assert_shallow_structure(inp_ab2, inp_ab1) nest.assert_shallow_structure(inp_ab2, inp_ab1, check_types=False) + inp_ab1 = {"a": (1, 1), "b": {"c": (2, 2)}} + inp_ab2 = {"a": (1, 1), "b": {"d": (2, 2)}} + expected_message = ( + r"The two structures don't have the same keys. Input " + r"structure has keys \['c'\], while shallow structure has " + r"keys \['d'\].") + with self.assertRaisesRegexp(ValueError, expected_message): + nest.assert_shallow_structure(inp_ab2, inp_ab1) + def testFlattenUpTo(self): input_tree = (((2, 2), (3, 3)), ((4, 9), (5, 5))) shallow_tree = ((True, True), (False, True)) diff --git a/tensorflow/python/estimator/export/export.py b/tensorflow/python/estimator/export/export.py index 31e9933c6f..3b295a7e35 100644 --- a/tensorflow/python/estimator/export/export.py +++ b/tensorflow/python/estimator/export/export.py @@ -57,7 +57,7 @@ class ServingInputReceiver(collections.namedtuple( groups of receiver tensors, each of which may be a `Tensor` or a dict of string to `Tensor`. These named receiver tensor alternatives generate additional serving signatures, which may be used to feed inputs at - different points within the input reciever subgraph. A typical usage is + different points within the input receiver subgraph. A typical usage is to allow feeding raw feature `Tensor`s *downstream* of the tf.parse_example() op. Defaults to None. """ diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py index 1862e325e2..17d018aa88 100644 --- a/tensorflow/python/estimator/training_test.py +++ b/tensorflow/python/estimator/training_test.py @@ -1016,7 +1016,7 @@ class TrainingExecutorRunEvaluatorTest(test.TestCase): is_the_final_export): del export_path, checkpoint_path, eval_result estimator.times_export_was_called += 1 - # final_export is happend at the end. + # final_export is happened at the end. self.assertEqual(0, estimator.times_final_export_was_called) if is_the_final_export: estimator.times_final_export_was_called += 1 @@ -1361,7 +1361,7 @@ class TrainingExecutorRunLocalTest(test.TestCase): is_the_final_export): del export_path, checkpoint_path, eval_result estimator.times_export_was_called += 1 - # final_export is happend at the end. + # final_export is happened at the end. self.assertEqual(0, estimator.times_final_export_was_called) if is_the_final_export: estimator.times_final_export_was_called += 1 diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index e4992afbca..d9391dd6c5 100644 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -556,6 +556,7 @@ py_test( srcs = ["_impl/keras/utils/data_utils_test.py"], srcs_version = "PY2AND3", tags = [ + "no_windows", "noasan", # times out "notsan", ], diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py index b029e5161f..ec7a5dcffd 100644 --- a/tensorflow/python/keras/_impl/keras/backend.py +++ b/tensorflow/python/keras/_impl/keras/backend.py @@ -2487,7 +2487,7 @@ class Function(object): """Runs a computation graph. It's possible to pass arguments to `tf.Session.run()` via `session_kwargs`. - In particular additonal operations via `fetches` argument and additional + In particular additional operations via `fetches` argument and additional tensor substitutions via `feed_dict` arguments. Note that given substitutions are merged with substitutions from `inputs`. Even though `feed_dict` is passed once in the constructor (called in `model.compile()`) diff --git a/tensorflow/python/keras/_impl/keras/callbacks.py b/tensorflow/python/keras/_impl/keras/callbacks.py index 40a996a03f..16109b52b3 100644 --- a/tensorflow/python/keras/_impl/keras/callbacks.py +++ b/tensorflow/python/keras/_impl/keras/callbacks.py @@ -768,7 +768,7 @@ class TensorBoard(Callback): self.writer.add_summary(summary, epoch) self.writer.flush() - def on_train_end(self, _): + def on_train_end(self, logs=None): self.writer.close() diff --git a/tensorflow/python/keras/_impl/keras/callbacks_test.py b/tensorflow/python/keras/_impl/keras/callbacks_test.py index 97a650a992..79dfcd1bb6 100644 --- a/tensorflow/python/keras/_impl/keras/callbacks_test.py +++ b/tensorflow/python/keras/_impl/keras/callbacks_test.py @@ -19,16 +19,18 @@ from __future__ import division from __future__ import print_function import csv -import multiprocessing import os import re import shutil +import threading +import unittest import numpy as np from tensorflow.python.keras._impl import keras from tensorflow.python.keras._impl.keras import testing_utils from tensorflow.python.platform import test +from tensorflow.python.summary.writer import writer_cache try: import h5py # pylint:disable=g-import-not-at-top @@ -498,7 +500,10 @@ class KerasCallbacksTest(test.TestCase): values = [] with open(fp) as f: for x in csv.reader(f): - values.append(x) + # In windows, due to \r\n line ends we may end up reading empty lines + # after each line. Skip empty lines. + if x: + values.append(x) assert 'nan' in values[-1], 'The last epoch was not logged.' def test_TerminateOnNaN(self): @@ -678,23 +683,41 @@ class KerasCallbacksTest(test.TestCase): batch_size=5)] # fit w/o validation data should raise ValueError if histogram_freq > 0 + cbs = callbacks_factory(histogram_freq=1) with self.assertRaises(ValueError): - model.fit(x_train, y_train, batch_size=BATCH_SIZE, - callbacks=callbacks_factory(histogram_freq=1), epochs=3) + model.fit( + x_train, y_train, batch_size=BATCH_SIZE, callbacks=cbs, epochs=3) + + for cb in cbs: + cb.on_train_end() # fit generator without validation data should raise ValueError if # histogram_freq > 0 + cbs = callbacks_factory(histogram_freq=1) with self.assertRaises(ValueError): - model.fit_generator(data_generator(True), len(x_train), epochs=2, - callbacks=callbacks_factory(histogram_freq=1)) + model.fit_generator( + data_generator(True), len(x_train), epochs=2, callbacks=cbs) + + for cb in cbs: + cb.on_train_end() # fit generator with validation data generator should raise ValueError if # histogram_freq > 0 + cbs = callbacks_factory(histogram_freq=1) with self.assertRaises(ValueError): - model.fit_generator(data_generator(True), len(x_train), epochs=2, - validation_data=data_generator(False), - validation_steps=1, - callbacks=callbacks_factory(histogram_freq=1)) + model.fit_generator( + data_generator(True), + len(x_train), + epochs=2, + validation_data=data_generator(False), + validation_steps=1, + callbacks=cbs) + + for cb in cbs: + cb.on_train_end() + + # Make sure file writer cache is clear to avoid failures during cleanup. + writer_cache.FileWriterCache.clear() def test_TensorBoard_multi_input_output(self): np.random.seed(1337) @@ -767,6 +790,9 @@ class KerasCallbacksTest(test.TestCase): callbacks=callbacks_factory(histogram_freq=1)) assert os.path.isdir(filepath) + @unittest.skipIf( + os.name == 'nt', + 'use_multiprocessing=True does not work on windows properly.') def test_LambdaCallback(self): with self.test_session(): np.random.seed(1337) @@ -789,14 +815,15 @@ class KerasCallbacksTest(test.TestCase): # Start an arbitrary process that should run during model # training and be terminated after training has completed. + e = threading.Event() + def target(): - while True: - pass + e.wait() - p = multiprocessing.Process(target=target) - p.start() + t = threading.Thread(target=target) + t.start() cleanup_callback = keras.callbacks.LambdaCallback( - on_train_end=lambda logs: p.terminate()) + on_train_end=lambda logs: e.set()) cbks = [cleanup_callback] model.fit( @@ -807,8 +834,8 @@ class KerasCallbacksTest(test.TestCase): callbacks=cbks, epochs=5, verbose=0) - p.join() - assert not p.is_alive() + t.join() + assert not t.is_alive() def test_TensorBoard_with_ReduceLROnPlateau(self): with self.test_session(): diff --git a/tensorflow/python/keras/_impl/keras/engine/training_test.py b/tensorflow/python/keras/_impl/keras/engine/training_test.py index e2a06e8e77..17a26f978e 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_test.py @@ -18,6 +18,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os +import unittest + import numpy as np from tensorflow.python.keras._impl import keras @@ -783,6 +786,9 @@ class TestDynamicTrainability(test.TestCase): class TestGeneratorMethods(test.TestCase): + @unittest.skipIf( + os.name == 'nt', + 'use_multiprocessing=True does not work on windows properly.') def test_generator_methods(self): arr_data = np.random.random((50, 2)) arr_labels = np.random.random((50,)) diff --git a/tensorflow/python/keras/_impl/keras/estimator_test.py b/tensorflow/python/keras/_impl/keras/estimator_test.py index 1144aa3152..a7ea3b48a3 100644 --- a/tensorflow/python/keras/_impl/keras/estimator_test.py +++ b/tensorflow/python/keras/_impl/keras/estimator_test.py @@ -33,6 +33,7 @@ from tensorflow.python.keras._impl.keras import testing_utils from tensorflow.python.keras._impl.keras.applications import mobilenet from tensorflow.python.platform import gfile from tensorflow.python.platform import test +from tensorflow.python.summary.writer import writer_cache try: @@ -132,6 +133,8 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): tf_random_seed=_RANDOM_SEED, model_dir=self._base_dir) def tearDown(self): + # Make sure nothing is stuck in limbo. + writer_cache.FileWriterCache.clear() if os.path.isdir(self._base_dir): gfile.DeleteRecursively(self._base_dir) @@ -153,6 +156,8 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16) after_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1) self.assertLess(after_eval_results['loss'], before_eval_results['loss']) + + writer_cache.FileWriterCache.clear() gfile.DeleteRecursively(self._config.model_dir) def test_evaluate(self): diff --git a/tensorflow/python/keras/_impl/keras/models_test.py b/tensorflow/python/keras/_impl/keras/models_test.py index 86acac4604..61938066b9 100644 --- a/tensorflow/python/keras/_impl/keras/models_test.py +++ b/tensorflow/python/keras/_impl/keras/models_test.py @@ -54,10 +54,11 @@ class TestModelSaving(test.TestCase): model.train_on_batch(x, y) out = model.predict(x) - _, fname = tempfile.mkstemp('.h5') + fd, fname = tempfile.mkstemp('.h5') keras.models.save_model(model, fname) new_model = keras.models.load_model(fname) + os.close(fd) os.remove(fname) out2 = new_model.predict(x) @@ -95,13 +96,14 @@ class TestModelSaving(test.TestCase): model.train_on_batch(x, y) out = model.predict(x) - _, fname = tempfile.mkstemp('.h5') + fd, fname = tempfile.mkstemp('.h5') keras.models.save_model(model, fname) model = keras.models.load_model( fname, custom_objects={'CustomOp': CustomOp, 'custom_loss': custom_loss}) + os.close(fd) os.remove(fname) out2 = model.predict(x) @@ -125,10 +127,11 @@ class TestModelSaving(test.TestCase): model.train_on_batch(x, y) out = model.predict(x) - _, fname = tempfile.mkstemp('.h5') + fd, fname = tempfile.mkstemp('.h5') keras.models.save_model(model, fname) model = keras.models.load_model(fname) + os.close(fd) os.remove(fname) out2 = model.predict(x) @@ -144,9 +147,10 @@ class TestModelSaving(test.TestCase): model.add(keras.layers.Dense(3)) model.compile(loss='mse', optimizer='sgd', metrics=['acc']) - _, fname = tempfile.mkstemp('.h5') + fd, fname = tempfile.mkstemp('.h5') keras.models.save_model(model, fname) model = keras.models.load_model(fname) + os.close(fd) os.remove(fname) def test_saving_with_tf_optimizer(self): @@ -161,9 +165,10 @@ class TestModelSaving(test.TestCase): optimizer=training_module.AdadeltaOptimizer(0.1), metrics=['acc']) - _, fname = tempfile.mkstemp('.h5') + fd, fname = tempfile.mkstemp('.h5') keras.models.save_model(model, fname) model = keras.models.load_model(fname) + os.close(fd) os.remove(fname) def test_saving_right_after_compilation(self): @@ -177,9 +182,10 @@ class TestModelSaving(test.TestCase): model.compile(loss='mse', optimizer='sgd', metrics=['acc']) model.model._make_train_function() - _, fname = tempfile.mkstemp('.h5') + fd, fname = tempfile.mkstemp('.h5') keras.models.save_model(model, fname) model = keras.models.load_model(fname) + os.close(fd) os.remove(fname) def test_saving_lambda_numpy_array_arguments(self): @@ -194,10 +200,11 @@ class TestModelSaving(test.TestCase): model = keras.models.Model(inputs, output) model.compile(loss='mse', optimizer='sgd', metrics=['acc']) - _, fname = tempfile.mkstemp('.h5') + fd, fname = tempfile.mkstemp('.h5') keras.models.save_model(model, fname) model = keras.models.load_model(fname) + os.close(fd) os.remove(fname) self.assertAllClose(mean, model.layers[1].arguments['mu']) diff --git a/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py b/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py index 14b2f08442..47c5b4cff0 100644 --- a/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py +++ b/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py @@ -22,6 +22,7 @@ from itertools import cycle import os import tarfile import threading +import unittest import zipfile import numpy as np @@ -164,6 +165,9 @@ class TestEnqueuers(test.TestCase): self.assertEqual(len(set(acc) - set(range(100))), 0) enqueuer.stop() + @unittest.skipIf( + os.name == 'nt', + 'use_multiprocessing=True does not work on windows properly.') def test_generator_enqueuer_processes(self): enqueuer = keras.utils.data_utils.GeneratorEnqueuer( create_generator_from_sequence_pcs(TestSequence([3, 200, 200, 3])), @@ -185,6 +189,9 @@ class TestEnqueuers(test.TestCase): with self.assertRaises(StopIteration): next(gen_output) + @unittest.skipIf( + os.name == 'nt', + 'use_multiprocessing=True does not work on windows properly.') def test_generator_enqueuer_fail_processes(self): enqueuer = keras.utils.data_utils.GeneratorEnqueuer( create_generator_from_sequence_pcs(FaultSequence()), diff --git a/tensorflow/python/keras/_impl/keras/utils/io_utils.py b/tensorflow/python/keras/_impl/keras/utils/io_utils.py index 1c8299c27d..a8fc18c17a 100644 --- a/tensorflow/python/keras/_impl/keras/utils/io_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/io_utils.py @@ -63,11 +63,11 @@ class HDF5Matrix(object): 'HDF5 and h5py installed.') if datapath not in list(self.refs.keys()): - f = h5py.File(datapath) - self.refs[datapath] = f + self._f = h5py.File(datapath) + self.refs[datapath] = self._f else: - f = self.refs[datapath] - self.data = f[dataset] + self._f = self.refs[datapath] + self.data = self._f[dataset] self.start = start if end is None: self.end = self.data.shape[0] @@ -78,6 +78,9 @@ class HDF5Matrix(object): def __len__(self): return self.end - self.start + def __del__(self): + self._f.close() + def __getitem__(self, key): if isinstance(key, slice): start, stop = key.start, key.stop diff --git a/tensorflow/python/keras/_impl/keras/utils/vis_utils.py b/tensorflow/python/keras/_impl/keras/utils/vis_utils.py index ce2faf2d96..d56c4484ce 100644 --- a/tensorflow/python/keras/_impl/keras/utils/vis_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/vis_utils.py @@ -120,7 +120,7 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True, rankdir='TB'): layer_id = str(id(layer)) for i, node in enumerate(layer._inbound_nodes): # pylint: disable=protected-access node_key = layer.name + '_ib-' + str(i) - if node_key in model.container_nodes: + if node_key in model._network_nodes: # pylint: disable=protected-access for inbound_layer in node.inbound_layers: inbound_layer_id = str(id(inbound_layer)) layer_id = str(id(layer)) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index f15b3baabe..f6721de32a 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2362,7 +2362,7 @@ cuda_py_test( cuda_py_test( name = "slice_op_test", - size = "medium", + size = "large", srcs = ["slice_op_test.py"], additional_deps = [ "//third_party/py/numpy", @@ -2942,6 +2942,20 @@ tf_py_test( ], ) +tf_py_test( + name = "prefetch_dataset_op_test", + size = "small", + srcs = ["prefetch_dataset_op_test.py"], + additional_deps = [ + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dataset_ops_gen", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python/data/ops:dataset_ops", + ], +) + tf_py_test( name = "range_dataset_op_test", size = "small", diff --git a/tensorflow/python/kernel_tests/decode_bmp_op_test.py b/tensorflow/python/kernel_tests/decode_bmp_op_test.py index 783492a6f2..35f8f76991 100644 --- a/tensorflow/python/kernel_tests/decode_bmp_op_test.py +++ b/tensorflow/python/kernel_tests/decode_bmp_op_test.py @@ -64,6 +64,81 @@ class DecodeBmpOpTest(test.TestCase): decoded = decode.eval() self.assertAllEqual(decoded, img_bytes) + def testGrayscale(self): + img_bytes = [[[255], [0]], [[255], [0]]] + encoded_bytes = [ + 0x42, + 0x40, + 0x3d, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0x36, + 0, + 0, + 0, + 0x28, + 0, + 0, + 0, + 0x2, + 0, + 0, + 0, + 0x2, + 0, + 0, + 0, + 0x1, + 0, + 0x8, + 0, + 0, + 0, + 0, + 0, + 0x10, + 0, + 0, + 0, + 0x13, + 0xb, + 0, + 0, + 0x13, + 0xb, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0xff, + 0, + 0, + 0, + 0xff, + 0, + 0, + 0, + ] + + byte_string = bytes(bytearray(encoded_bytes)) + img_in = constant_op.constant(byte_string, dtype=dtypes.string) + decode = image_ops.decode_bmp(img_in) + + with self.test_session(): + decoded = decode.eval() + self.assertAllEqual(decoded, img_bytes) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/prefetch_dataset_op_test.py b/tensorflow/python/kernel_tests/prefetch_dataset_op_test.py new file mode 100644 index 0000000000..646324cb95 --- /dev/null +++ b/tensorflow/python/kernel_tests/prefetch_dataset_op_test.py @@ -0,0 +1,59 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Test PrefetchDataset.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test + + +class PrefetchDatasetTest(test.TestCase): + + def testBufferSize(self): + buffer_size = array_ops.placeholder(dtypes.int64, shape=[]) + iterator = dataset_ops.Dataset.range(10).prefetch( + buffer_size=buffer_size).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op, feed_dict={buffer_size: 5}) + for m in range(10): + self.assertEqual(m, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testInvalidBufferSize(self): + buffer_size = array_ops.placeholder(dtypes.int64, shape=[]) + iterator = dataset_ops.Dataset.range(10).prefetch( + buffer_size=buffer_size).make_initializable_iterator() + init_op = iterator.initializer + + with self.assertRaisesRegexp(errors.InvalidArgumentError, "buffer_size"): + with self.test_session() as sess: + sess.run(init_op, feed_dict={buffer_size: 0}) + + with self.assertRaisesRegexp(errors.InvalidArgumentError, "buffer_size"): + with self.test_session() as sess: + sess.run(init_op, feed_dict={buffer_size: -5}) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py index 8c327d7e27..fbb13bb72c 100644 --- a/tensorflow/python/layers/convolutional.py +++ b/tensorflow/python/layers/convolutional.py @@ -920,6 +920,7 @@ class SeparableConv2D(Conv2D): trainable=trainable, name=name, **kwargs) + self.data_format = data_format self.depth_multiplier = depth_multiplier self.depthwise_initializer = depthwise_initializer self.pointwise_initializer = pointwise_initializer @@ -1231,9 +1232,8 @@ class Conv2DTranspose(Conv2D): def build(self, input_shape): if len(input_shape) != 4: - raise ValueError('Inputs should have rank ' + - str(4) + - 'Received input shape:', str(input_shape)) + raise ValueError('Inputs should have rank 4. Received input shape: ' + + str(input_shape)) if self.data_format == 'channels_first': channel_axis = 1 else: diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index 81a7cf28bb..bd26ff6696 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -61,7 +61,7 @@ class ReduceTest(test_util.TensorFlowTestCase): @test_util.run_in_graph_and_eager_modes() def testReduceInvalidAxis(self): if context.in_eager_mode(): - # The shape check is in run a graph contruction time. In eager mode, + # The shape check is in run a graph construction time. In eager mode, # it misses the check, magically return result given wrong shape. return x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32) diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index ae2d46a2b7..3643861a16 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -1697,7 +1697,7 @@ class variable_scope(object): # pylint: disable=invalid-name v1 = foo() # Creates v. v2 = foo() # Gets the same, existing v. assert v1 == v2 - + ``` Basic example of sharing a variable with reuse=True: diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index a1e4305de1..e0748d87e2 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -200,7 +200,7 @@ class Variable(object): @compatibility(eager) `tf.Variable` is not compatible with eager execution. Use - `tfe.Variable` instead which is compatable with both eager execution + `tfe.Variable` instead which is compatible with both eager execution and graph construction. See [the TensorFlow Eager Execution guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/g3doc/guide.md#variables-and-optimizers) for details on how variables work in eager execution. @@ -1064,7 +1064,7 @@ class PartitionedVariable(object): """A container for partitioned `Variable` objects. @compatibility(eager) `tf.PartitionedVariable` is not compatible with - eager execution. Use `tfe.Variable` instead which is compatable + eager execution. Use `tfe.Variable` instead which is compatible with both eager execution and graph construction. See [the TensorFlow Eager Execution guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/g3doc/guide.md#variables-and-optimizers) diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py index c39d0fa5b1..ccfb9aac53 100644 --- a/tensorflow/python/profiler/model_analyzer_test.py +++ b/tensorflow/python/profiler/model_analyzer_test.py @@ -65,7 +65,7 @@ class PrintModelAnalysisTest(test.TestCase): ' ScalarW (1, 1/1 params)\n', f.read()) - def testSelectEverthingDetail(self): + def testSelectEverythingDetail(self): ops.reset_default_graph() dev = '/device:GPU:0' if test.is_gpu_available() else '/device:CPU:0' outfile = os.path.join(test.get_temp_dir(), 'dump') diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index 98ac197204..74ee1e5fa8 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -726,6 +726,8 @@ class SaverTest(test.TestCase): class SaveRestoreShardedTest(test.TestCase): + _WRITE_VERSION = saver_pb2.SaverDef.V1 + def _get_test_dir(self, dirname): test_dir = os.path.join(self.get_temp_dir(), dirname) gfile.MakeDirs(test_dir) @@ -751,6 +753,7 @@ class SaveRestoreShardedTest(test.TestCase): "t0": t0.saveable, "t1": t1.saveable }, + write_version=self._WRITE_VERSION, sharded=True) variables.global_variables_initializer().run() t0.insert("k1", 30.0).run() @@ -771,7 +774,13 @@ class SaveRestoreShardedTest(test.TestCase): with sess.graph.device("/cpu:0"): v0 = variables.Variable(111, name="v0") t0 = saver_test_utils.CheckpointedOp(name="t0") - save = saver_module.Saver({"v0": v0, "t0": t0.saveable}, sharded=True) + save = saver_module.Saver( + { + "v0": v0, + "t0": t0.saveable + }, + write_version=self._WRITE_VERSION, + sharded=True) variables.global_variables_initializer().run() t0.insert("k11", 33.0).run() self.assertEqual(111, v0.eval()) @@ -789,7 +798,13 @@ class SaveRestoreShardedTest(test.TestCase): with sess.graph.device("/cpu:0"): v1 = variables.Variable(222) t1 = saver_test_utils.CheckpointedOp(name="t1") - save = saver_module.Saver({"v1": v1, "t1": t1.saveable}, sharded=True) + save = saver_module.Saver( + { + "v1": v1, + "t1": t1.saveable + }, + write_version=self._WRITE_VERSION, + sharded=True) variables.global_variables_initializer().run() t1.insert("k22", 44.0).run() self.assertEqual(222, v1.eval()) @@ -817,6 +832,7 @@ class SaveRestoreShardedTest(test.TestCase): "t0": t0.saveable, "t1": t1.saveable }, + write_version=self._WRITE_VERSION, sharded=True) variables.global_variables_initializer().run() t0.insert("k11", 33.0).run() @@ -982,6 +998,10 @@ class SaveRestoreShardedTest(test.TestCase): self._testPartitionedVariables(use_resource=True) +class SaveRestoreShardedTestV2(SaveRestoreShardedTest): + _WRITE_VERSION = saver_pb2.SaverDef.V2 + + class MaxToKeepTest(test.TestCase): def _get_test_dir(self, dirname): diff --git a/tensorflow/python/training/sync_replicas_optimizer.py b/tensorflow/python/training/sync_replicas_optimizer.py index 2a97d45daa..b52d101a21 100644 --- a/tensorflow/python/training/sync_replicas_optimizer.py +++ b/tensorflow/python/training/sync_replicas_optimizer.py @@ -99,7 +99,7 @@ class SyncReplicasOptimizer(optimizer.Optimizer): # Note that if you want to have 2 backup replicas, you can change # total_num_replicas=52 and make sure this number matches how many physical # replicas you started in your job. - opt = tf.SyncReplicasOptimizer(opt, replicas_to_aggregate=50, + opt = tf.train.SyncReplicasOptimizer(opt, replicas_to_aggregate=50, total_num_replicas=50) # Some models have startup_delays to help stabilize the model but when using diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py index 75f482e5a8..5c066e2bef 100644 --- a/tensorflow/python/util/nest.py +++ b/tensorflow/python/util/nest.py @@ -116,7 +116,7 @@ def flatten(nest): used instead. The same convention is followed in `pack_sequence_as`. This correctly repacks dicts and `OrderedDict`s after they have been flattened, and also allows flattening an `OrderedDict` and then repacking it back using - a correponding plain dict, or vice-versa. + a corresponding plain dict, or vice-versa. Dictionaries with non-sortable keys cannot be flattened. Users must not modify any collections used in `nest` while this function is @@ -296,7 +296,7 @@ def pack_sequence_as(structure, flat_sequence): keys is used instead. The same convention is followed in `flatten`. This correctly repacks dicts and `OrderedDict`s after they have been flattened, and also allows flattening an `OrderedDict` and then repacking it - back using a correponding plain dict, or vice-versa. + back using a corresponding plain dict, or vice-versa. Dictionaries with non-sortable keys cannot be flattened. Args: @@ -452,6 +452,17 @@ def assert_shallow_structure(shallow_tree, input_tree, check_types=True): "structure has length %s, while shallow structure has length %s." % (len(input_tree), len(shallow_tree))) + if check_types and isinstance(shallow_tree, dict): + if set(input_tree) != set(shallow_tree): + raise ValueError( + "The two structures don't have the same keys. Input " + "structure has keys %s, while shallow structure has keys %s." % + (list(_six.iterkeys(input_tree)), + list(_six.iterkeys(shallow_tree)))) + + input_tree = list(_six.iteritems(input_tree)) + shallow_tree = list(_six.iteritems(shallow_tree)) + for shallow_branch, input_branch in zip(shallow_tree, input_tree): assert_shallow_structure(shallow_branch, input_branch, check_types=check_types) diff --git a/tensorflow/python/util/nest_test.py b/tensorflow/python/util/nest_test.py index c4020f4f3c..3d9e9f9684 100644 --- a/tensorflow/python/util/nest_test.py +++ b/tensorflow/python/util/nest_test.py @@ -385,6 +385,16 @@ class NestTest(test.TestCase): nest.assert_shallow_structure(inp_ab2, inp_ab1) nest.assert_shallow_structure(inp_ab2, inp_ab1, check_types=False) + inp_ab1 = {"a": (1, 1), "b": {"c": (2, 2)}} + inp_ab2 = {"a": (1, 1), "b": {"d": (2, 2)}} + expected_message = ( + r"The two structures don't have the same keys. Input " + r"structure has keys \['c'\], while shallow structure has " + r"keys \['d'\].") + + with self.assertRaisesRegexp(ValueError, expected_message): + nest.assert_shallow_structure(inp_ab2, inp_ab1) + def testFlattenUpTo(self): # Shallow tree ends at scalar. input_tree = [[[2, 2], [3, 3]], [[4, 9], [5, 5]]] @@ -429,8 +439,7 @@ class NestTest(test.TestCase): input_tree_flattened_as_shallow_tree = nest.flatten_up_to(shallow_tree, input_tree) self.assertEqual(input_tree_flattened_as_shallow_tree, [0, 1, 2, 3, 4]) - shallow_tree = collections.OrderedDict([("a", 0), - ("b", {"d": 3, "e": 1})]) + shallow_tree = collections.OrderedDict([("a", 0), ("c", {"d": 3, "e": 1})]) input_tree_flattened_as_shallow_tree = nest.flatten_up_to(shallow_tree, input_tree) self.assertEqual(input_tree_flattened_as_shallow_tree, diff --git a/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-tensor-board.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-tensor-board.pbtxt index 6620a9d308..7de4008c45 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-tensor-board.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-tensor-board.pbtxt @@ -29,7 +29,7 @@ tf_class { } member_method { name: "on_train_end" - argspec: "args=[\'self\', \'_\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "set_model" diff --git a/tensorflow/tools/benchmark/benchmark_model.cc b/tensorflow/tools/benchmark/benchmark_model.cc index 2d59299da4..9809ad52de 100644 --- a/tensorflow/tools/benchmark/benchmark_model.cc +++ b/tensorflow/tools/benchmark/benchmark_model.cc @@ -622,7 +622,7 @@ int Main(int argc, char** argv) { RecordBenchmarkEntry(output_prefix, benchmark_name, "meta-first-inference", warmup_runs, warmup_time_us / 1000000.0); - // Time from starting to intialize TF to getting the first result back. + // Time from starting to initialize TF to getting the first result back. // This also assumes that only one warmup run is performed. RecordBenchmarkEntry( output_prefix, benchmark_name, "meta-init-plus-first-inference", 1, diff --git a/tensorflow/tools/ci_build/builds/test_user_ops.sh b/tensorflow/tools/ci_build/builds/test_user_ops.sh index 4f1c61b8e9..358f82ac5d 100755 --- a/tensorflow/tools/ci_build/builds/test_user_ops.sh +++ b/tensorflow/tools/ci_build/builds/test_user_ops.sh @@ -76,17 +76,17 @@ echo "PYTHON_BIN_PATH: ${PYTHON_BIN_PATH}" pushd "${TMP_DIR}" -# Obtain paths include and lib paths to the TensorFlow installation -TF_INC=$("${PYTHON_BIN_PATH}" \ - -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') -TF_LIB=$("${PYTHON_BIN_PATH}" \ - -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') - -if [[ -z "${TF_INC}" ]]; then - die "FAILED to determine TensorFlow include path" +# Obtain compilation and linking flags +TF_CFLAGS=( $("${PYTHON_BIN_PATH}" \ + -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') ) +TF_LFLAGS=( $("${PYTHON_BIN_PATH}" \ + -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') ) + +if [[ -z "${TF_CFLAGS}" || -z "${TF_LFLAGS}" ]]; then + die "FAILED to determine TensorFlow compilation or linking flags" else - echo "TensorFlow include path: ${TF_INC}" - TF_INCLUDE_PATH="-I${TF_INC} -I${TF_INC}/external/nsync/public" + echo "TensorFlow compile flags: ${TF_CFLAGS[@]}" + echo "TensorFlow link flags: ${TF_LFLAGS[@]}" fi # Check g++ availability @@ -145,7 +145,7 @@ if [[ ${IS_GPU} == "0" ]]; then "${GPP_BIN}" -std=c++11 ${EXTRA_GPP_FLAGS} \ -shared "${SRC_FILE}" -o "${USER_OP_SO}" \ - -fPIC ${TF_INCLUDE_PATH} -L "${TF_LIB}" -ltensorflow_framework || \ + -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} || \ die "g++ compilation of ${SRC_FILE} FAILED" else @@ -184,7 +184,7 @@ else OP_KERNEL_O=$(echo "${OP_KERNEL_CC}" | sed -e 's/\.cc/\.o/') "${NVCC_BIN}" -std=c++11 \ -c -o "${OP_KERNEL_O}" "${OP_KERNEL_CU}" \ - ${TF_INCLUDE_PATH} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC || \ + ${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC || \ die "nvcc compilation of ${OP_KERNEL_CC} FAILED" CUDA_LIB_DIR="/usr/local/cuda/lib64" @@ -203,8 +203,8 @@ else USER_OP_SO="add_one.so" "${GPP_BIN}" -std=c++11 ${EXTRA_GPP_FLAGS} \ -shared -o "${USER_OP_SO}" "${OP_KERNEL_CC}" \ - "${OP_KERNEL_O}" ${TF_INCLUDE_PATH} -L "${CUDA_LIB_DIR}" -L "${TF_LIB}" \ - -fPIC -lcudart -ltensorflow_framework || \ + "${OP_KERNEL_O}" ${TF_CFLAGS[@]} -L "${CUDA_LIB_DIR}" ${TF_LFLAGS[@]} \ + -fPIC -lcudart || \ die "g++ compilation of ${OP_KERNEL_CC}" FAILED fi diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh index c27f4953e3..2217b110e3 100755 --- a/tensorflow/tools/ci_build/ci_parameterized_build.sh +++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh @@ -546,8 +546,8 @@ echo "" TMP_DIR="" DOCKERFILE_FLAG="" -if [[ "${TF_BUILD_PYTHON_VERSION}" == "python3.5" ] || - ["${TF_BUILD_PYTHON_VERSION}" == "python3.6" ]]; then +if [[ "${TF_BUILD_PYTHON_VERSION}" == "python3.5" ]] || + [[ "${TF_BUILD_PYTHON_VERSION}" == "python3.6" ]]; then # Modify Dockerfile for Python3.5 | Python3.6 build TMP_DIR=$(mktemp -d) echo "Docker build will occur in temporary directory: ${TMP_DIR}" diff --git a/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat index 6e600e2dcf..56bff07774 100644 --- a/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat +++ b/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat @@ -37,4 +37,4 @@ SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe" %CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% :: Run msbuild in the resulting VS project files to build a pip package. -%MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 /verbosity:minimal tf_python_build_pip_package.vcxproj \ No newline at end of file +%MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh index f6e3d2e6c7..8520ca898f 100644 --- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh @@ -64,7 +64,7 @@ reinstall_tensorflow_pip ${PIP_NAME} # https://github.com/tensorflow/tensorflow/issues/12844 is fixed. bazel test -c opt $BUILD_OPTS -k --test_output=errors \ --define=no_tensorflow_py_deps=true --test_lang_filters=py \ - --test_tag_filters=-no_pip,-no_windows \ - --build_tag_filters=-no_pip,-no_windows --build_tests_only \ + --test_tag_filters=-no_pip,-no_windows,-no_oss \ + --build_tag_filters=-no_pip,-no_windows,-no_oss --build_tests_only \ --test_env=TF_SAVER_LENIENT_NAMES=True \ //${PY_TEST_DIR}/tensorflow/python/... diff --git a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat index 44d8252a7a..832943ad6c 100644 --- a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat +++ b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat @@ -38,4 +38,4 @@ SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe" %CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_ENABLE_GPU=ON -DCUDNN_HOME=%CUDNN_HOME% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% :: Run msbuild in the resulting VS project files to build a pip package. -%MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 /verbosity:minimal tf_python_build_pip_package.vcxproj +%MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh index 25d327c818..47ca42d642 100644 --- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh @@ -65,7 +65,7 @@ reinstall_tensorflow_pip ${PIP_NAME} # https://github.com/tensorflow/tensorflow/issues/12844 is fixed. bazel test -c opt $BUILD_OPTS -k --test_output=errors \ --define=no_tensorflow_py_deps=true --test_lang_filters=py \ - --test_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu \ - --build_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu \ + --test_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \ + --build_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \ --test_env=TF_SAVER_LENIENT_NAMES=True \ --local_test_jobs=1 --build_tests_only //${PY_TEST_DIR}/tensorflow/python/... diff --git a/tensorflow/tools/dist_test/python/census_widendeep.py b/tensorflow/tools/dist_test/python/census_widendeep.py index 3a55781496..8feb5386e9 100644 --- a/tensorflow/tools/dist_test/python/census_widendeep.py +++ b/tensorflow/tools/dist_test/python/census_widendeep.py @@ -263,8 +263,7 @@ if __name__ == "__main__": "--data_dir", type=str, default="/tmp/census-data", - help="Directory for storing the cesnsus data" - ) + help="Directory for storing the census data") parser.add_argument( "--model_dir", type=str, diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index 1a0145b078..3525c7524f 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -101,4 +101,3 @@ EXPOSE 6006 EXPOSE 8888 WORKDIR /root -CMD ["/bin/bash"] diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl new file mode 100644 index 0000000000..8180e5e7fb --- /dev/null +++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl @@ -0,0 +1,85 @@ +FROM tensorflow/tensorflow:latest-devel + +LABEL maintainer="Clayne Robison" + +# These arguments are parameterized. Use --build-args to override. +ARG TF_BRANCH=r1.4 +ARG WHL_DIR=/whl + +RUN apt-get update && apt-get install -y --no-install-recommends \ + golang \ + vim \ + emacs \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN pip --no-cache-dir install --upgrade \ + pip setuptools + +RUN pip --no-cache-dir install wheel + +# Download and build TensorFlow. +WORKDIR / +RUN rm -rf tensorflow && \ + git clone https://github.com/tensorflow/tensorflow.git && \ + cd tensorflow && \ + git checkout ${TF_BRANCH} +WORKDIR /tensorflow + +# Configure the build for CPU with MKL by accepting default build options and +# setting library locations +ENV CI_BUILD_PYTHON=python \ + LD_LIBRARY_PATH=${LD_LIBRARY_PATH} \ + PYTHON_BIN_PATH=/usr/bin/python \ + PYTHON_LIB_PATH=/usr/local/lib/python2.7/dist-packages \ + CC_OPT_FLAGS='-march=native' \ + TF_NEED_JEMALLOC=0 \ + TF_NEED_GCP=0 \ + TF_NEED_CUDA=0 \ + TF_NEED_HDFS=0 \ + TF_NEED_S3=0 \ + TF_NEED_OPENCL=0 \ + TF_NEED_GDR=0 \ + TF_ENABLE_XLA=0 \ + TF_NEED_VERBS=0 \ + TF_NEED_MPI=0 +RUN ./configure + +# Build and Install TensorFlow. +# The 'mkl' option builds with Intel(R) Math Kernel Library (MKL), which detects +# the platform it is currently running on and takes appropriately optimized +# paths. The -march=native option is for code that is not in MKL, and assumes +# this container will be run on the same architecture on which it is built. +RUN LD_LIBRARY_PATH=${LD_LIBRARY_PATH} \ + bazel build --config=mkl \ + --config="opt" \ + --copt="-march=native" \ + --copt="-O3" \ + //tensorflow/tools/pip_package:build_pip_package && \ + mkdir ${WHL_DIR} && \ + bazel-bin/tensorflow/tools/pip_package/build_pip_package ${WHL_DIR} + +# Clean up Bazel cache when done, but leave the whl. +# This will upgrade the default Tensorflow version with the Intel MKL version +RUN pip --no-cache-dir install --upgrade ${WHL_DIR}/tensorflow-*.whl && \ + rm -rf /root/.cache + +WORKDIR /root + +#add welcome message with instructions + +RUN echo '[ ! -z "$TERM" -a -r /etc/motd ] && cat /etc/issue && cat /etc/motd' \ + >> /etc/bash.bashrc \ + ; echo "\ +||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n\ +| \n\ +| Docker container running Ubuntu \n\ +| with TensorFlow ${TF_BRANCH} optimized for CPU \n\ +| with Intel(R) MKL \n\ +| \n\ +||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n\ +\n "\ + > /etc/motd + +CMD ["/bin/bash"] diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu index 21a44ee404..041f45971b 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu @@ -102,5 +102,3 @@ WORKDIR /root EXPOSE 6006 # IPython EXPOSE 8888 - -RUN ["/bin/bash"] diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 index 9bcc3925a8..3bedc8cf34 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 @@ -113,5 +113,3 @@ WORKDIR /root EXPOSE 6006 # IPython EXPOSE 8888 - -RUN ["/bin/bash"] diff --git a/tensorflow/tools/docker/notebooks/2_getting_started.ipynb b/tensorflow/tools/docker/notebooks/2_getting_started.ipynb index e171b439fe..b0963ebc3f 100644 --- a/tensorflow/tools/docker/notebooks/2_getting_started.ipynb +++ b/tensorflow/tools/docker/notebooks/2_getting_started.ipynb @@ -159,7 +159,7 @@ "X = np.array([np.linspace(-2, 4, num_examples), np.linspace(-6, 6, num_examples)])\n", "X += np.random.randn(2, num_examples)\n", "x, y = X\n", - "x_with_bias = np.array([(1., a) for a in x]).astype(np.float32)\n", + "bias_with_x = np.array([(1., a) for a in x]).astype(np.float32)\n", "\n", "losses = []\n", "training_steps = 50\n", @@ -167,7 +167,7 @@ "\n", "with tf.Session() as sess:\n", " # Set up all the tensors, variables, and operations.\n", - " input = tf.constant(x_with_bias)\n", + " input = tf.constant(bias_with_x)\n", " target = tf.constant(np.transpose([y]).astype(np.float32))\n", " weights = tf.Variable(tf.random_normal([2, 1], 0, 0.1))\n", "\n", @@ -583,7 +583,7 @@ "# Split into x and y\n", "x, y = X\n", "# Add the bias node which always has a value of 1\n", - "x_with_bias = np.array([(1., a) for a in x]).astype(np.float32)\n", + "bias_with_x = np.array([(1., a) for a in x]).astype(np.float32)\n", "\n", "# Keep track of the loss at each iteration so we can chart it later\n", "losses = []\n", @@ -598,7 +598,7 @@ "with tf.Session() as sess:\n", " # Set up all the tensors.\n", " # Our input layer is the x value and the bias node.\n", - " input = tf.constant(x_with_bias)\n", + " input = tf.constant(bias_with_x)\n", " # Our target is the y values. They need to be massaged to the right shape.\n", " target = tf.constant(np.transpose([y]).astype(np.float32))\n", " # Weights are a variable. They change every time through the loop.\n", @@ -621,7 +621,7 @@ " loss = tf.nn.l2_loss(yerror)\n", "\n", " # Perform gradient descent. \n", - " # This essentially just updates weights, like weights += grads * learning_rate\n", + " # This essentially just updates weights, like weights -= grads * learning_rate\n", " # using the partial derivative of the loss with respect to the\n", " # weights. It's the direction we want to go to move toward lower error.\n", " update_weights = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)\n", @@ -743,7 +743,7 @@ "with tf.Session() as sess:\n", " # Set up all the tensors.\n", " # The input is the x values with the bias appended on to each x.\n", - " input = tf.constant(x_with_bias)\n", + " input = tf.constant(bias_with_x)\n", " # We're trying to find the best fit for the target y values.\n", " target = tf.constant(np.transpose([y]).astype(np.float32))\n", " # Let's set up the weights randomly\n", diff --git a/tensorflow/tools/docker/notebooks/3_mnist_from_scratch.ipynb b/tensorflow/tools/docker/notebooks/3_mnist_from_scratch.ipynb index 614a19c178..5585ebdcd3 100644 --- a/tensorflow/tools/docker/notebooks/3_mnist_from_scratch.ipynb +++ b/tensorflow/tools/docker/notebooks/3_mnist_from_scratch.ipynb @@ -135,6 +135,8 @@ "from six.moves.urllib.request import urlretrieve\n", "\n", "SOURCE_URL = 'https://storage.googleapis.com/cvdf-datasets/mnist/'\n", + "#SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'\n", + "# for those who have no access to google storage, use lecun's repo please\n", "WORK_DIRECTORY = \"/tmp/mnist-data\"\n", "\n", "def maybe_download(filename):\n", diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py index 3677aaa886..cc46dd5162 100644 --- a/tensorflow/tools/pip_package/pip_smoke_test.py +++ b/tensorflow/tools/pip_package/pip_smoke_test.py @@ -66,9 +66,6 @@ BLACKLIST = [ "//tensorflow/contrib/timeseries/examples:data/period_trend.csv", # pylint:disable=line-too-long "//tensorflow/contrib/timeseries/python/timeseries:test_utils", "//tensorflow/contrib/timeseries/python/timeseries/state_space_models:test_utils", # pylint:disable=line-too-long - - # TODO(yifeif): Remove when py_library(testonly=1) is ignored. - "//tensorflow/contrib/summary:summary_test_internal", ] diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index a493c6f2aa..3852b251d9 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -33,11 +33,16 @@ _VERSION = '1.4.0' REQUIRED_PACKAGES = [ 'absl-py', - 'enum34 >= 1.1.6', + # weakref.finalize introduced in Python 3.4 + 'backports.weakref >= 1.0rc1; python_version < "3.4"', + # enum module introduced in Python 3.4 + 'enum34 >= 1.1.6; python_version < "3.4"', + # Needed for unittest.mock in Python 2 + 'mock >= 2.0.0; python_version < "3.0"', 'numpy >= 1.12.1', 'six >= 1.10.0', 'protobuf >= 3.4.0', - 'tensorflow-tensorboard >= 0.4.0rc1, < 0.5.0', + 'tensorflow-tensorboard', ] project_name = 'tensorflow' @@ -52,20 +57,14 @@ if sys.version_info.major == 3: REQUIRED_PACKAGES.append('wheel >= 0.26') else: REQUIRED_PACKAGES.append('wheel') - # mock comes with unittest.mock for python3, need to install for python2 - REQUIRED_PACKAGES.append('mock >= 2.0.0') -# remove tensorboard from tf-nightly packages +# tf-nightly should depend on tb-nightly if 'tf_nightly' in project_name: - for package in REQUIRED_PACKAGES: - if 'tensorflow-tensorboard' in package: - REQUIRED_PACKAGES.remove(package) + for i, pkg in enumerate(REQUIRED_PACKAGES): + if 'tensorboard' in pkg: + REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.5.0a0, < 1.6.0a0' break -# weakref.finalize was introduced in Python 3.4 -if sys.version_info < (3, 4): - REQUIRED_PACKAGES.append('backports.weakref >= 1.0rc1') - # pylint: disable=line-too-long CONSOLE_SCRIPTS = [ 'freeze_graph = tensorflow.python.tools.freeze_graph:main', @@ -76,13 +75,13 @@ CONSOLE_SCRIPTS = [ # is now declared by the tensorboard pip package. If we remove the # TensorBoard command, pip will inappropriately remove it during install, # even though the command is not removed, just moved to a different wheel. - 'tensorboard = tensorboard.main:main', + 'tensorboard = tensorboard.main:run_main', ] # pylint: enable=line-too-long # remove the tensorboard console script if building tf_nightly if 'tf_nightly' in project_name: - CONSOLE_SCRIPTS.remove('tensorboard = tensorboard.main:main') + CONSOLE_SCRIPTS.remove('tensorboard = tensorboard.main:run_main') TEST_PACKAGES = [ 'scipy >= 0.15.1', diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 68d663acfc..b61012f71e 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -80,7 +80,7 @@ def _apply_patch(repo_ctx, patch_file): bazel_sh = _get_env_var(repo_ctx, "BAZEL_SH") if not bazel_sh: fail("BAZEL_SH environment variable is not set") - cmd = [bazel_sh, "-c", " ".join(cmd)] + cmd = [bazel_sh, "-l", "-c", " ".join(cmd)] _execute_and_check_ret_code(repo_ctx, cmd) # Download the repository and apply a patch to its root diff --git a/third_party/flatbuffers/flatbuffers.BUILD b/third_party/flatbuffers/flatbuffers.BUILD index e1563103c8..0a76adcf91 100644 --- a/third_party/flatbuffers/flatbuffers.BUILD +++ b/third_party/flatbuffers/flatbuffers.BUILD @@ -6,8 +6,11 @@ licenses(["notice"]) # Apache 2.0 FLATBUFFERS_COPTS = [ "-fexceptions", - "-Wno-implicit-fallthrough", -] +] + select({ + "@bazel_tools//src:windows": [], + "@bazel_tools//src:windows_msvc": [], + "//conditions:default": ["-Wno-implicit-fallthrough"], +}) # Public flatc library to compile flatbuffer files at runtime. cc_library( diff --git a/third_party/mkl/build_defs.bzl b/third_party/mkl/build_defs.bzl index f637873f14..8b73ddabdd 100644 --- a/third_party/mkl/build_defs.bzl +++ b/third_party/mkl/build_defs.bzl @@ -20,7 +20,7 @@ def if_mkl(if_true, if_false = []): """ return select({ - "//third_party/mkl:using_mkl": if_true, + str(Label("//third_party/mkl:using_mkl")): if_true, "//conditions:default": if_false }) diff --git a/third_party/nccl.BUILD b/third_party/nccl.BUILD index 3a2a3afe46..b2b8e18824 100644 --- a/third_party/nccl.BUILD +++ b/third_party/nccl.BUILD @@ -55,7 +55,7 @@ cc_library( ], "@org_tensorflow//tensorflow:ios": [], "@org_tensorflow//tensorflow:windows": [ - "ws2_32.lib", + "-DEFAULTLIB:ws2_32.lib", ], "//conditions:default": [ "-lrt", diff --git a/third_party/py/python_configure.bzl b/third_party/py/python_configure.bzl index bbc07905fc..c16eb3a12a 100644 --- a/third_party/py/python_configure.bzl +++ b/third_party/py/python_configure.bzl @@ -1,11 +1,8 @@ -# -*- Python -*- """Repository rule for Python autoconfiguration. `python_configure` depends on the following environment variables: - * `NUMPY_INCLUDE_PATH`: Location of Numpy libraries. * `PYTHON_BIN_PATH`: location of python binary. - * `PYTHON_INCLUDE_PATH`: Location of python binaries. * `PYTHON_LIB_PATH`: Location of python libraries. """ @@ -23,32 +20,13 @@ def _tpl(repository_ctx, tpl, substitutions={}, out=None): substitutions) -def _python_configure_warning(msg): - """Output warning message during auto configuration.""" - yellow = "\033[1;33m" - no_color = "\033[0m" - print("%sPython Configuration Warning:%s %s" % (yellow, no_color, msg)) - - -def _python_configure_fail(msg): +def _fail(msg): """Output failure message when auto configuration fails.""" red = "\033[0;31m" no_color = "\033[0m" fail("%sPython Configuration Error:%s %s\n" % (red, no_color, msg)) -def _get_env_var(repository_ctx, name, default = None, enable_warning = True): - """Find an environment variable in system path.""" - if name in repository_ctx.os.environ: - return repository_ctx.os.environ[name] - if default != None: - if enable_warning: - _python_configure_warning( - "'%s' environment variable is not set, using '%s' as default" % (name, default)) - return default - _python_configure_fail("'%s' environment variable is not set" % name) - - def _is_windows(repository_ctx): """Returns true if the host operating system is windows.""" os_name = repository_ctx.os.name.lower() @@ -73,11 +51,10 @@ def _execute(repository_ctx, cmdline, error_msg=None, error_details=None, """ result = repository_ctx.execute(cmdline) if result.stderr or not (empty_stdout_fine or result.stdout): - _python_configure_fail( - "\n".join([ - error_msg.strip() if error_msg else "Repository command failed", - result.stderr.strip(), - error_details if error_details else ""])) + _fail("\n".join([ + error_msg.strip() if error_msg else "Repository command failed", + result.stderr.strip(), + error_details if error_details else ""])) return result @@ -163,21 +140,23 @@ def _symlink_genrule_for_dir(repository_ctx, src_dir, dest_dir, genrule_name, def _get_python_bin(repository_ctx): """Gets the python bin path.""" - python_bin = _get_env_var(repository_ctx, _PYTHON_BIN_PATH, - None, False) + python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH) if python_bin != None: return python_bin python_bin_path = repository_ctx.which("python") if python_bin_path != None: return str(python_bin_path) - path = _get_env_var(repository_ctx, "PATH") - _python_configure_fail("Cannot find python in PATH, please make sure " + - "python is installed and add its directory in PATH, or set the " + - "environment variable PYTHON_BIN_PATH.\nPATH=%s" % (path)) + _fail("Cannot find python in PATH, please make sure " + + "python is installed and add its directory in PATH, or --define " + + "%s='/something/else'.\nPATH=%s" % ( + _PYTHON_BIN_PATH, repository_ctx.os.environ.get("PATH", ""))) def _get_python_lib(repository_ctx, python_bin): """Gets the python lib path.""" + python_lib = repository_ctx.os.environ.get(_PYTHON_LIB_PATH) + if python_lib != None: + return python_lib print_lib = ("< Date: Fri, 1 Dec 2017 08:58:17 +0800 Subject: [PATCH 1018/1801] Update input_fn.md (#14992) --- tensorflow/docs_src/get_started/input_fn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/get_started/input_fn.md b/tensorflow/docs_src/get_started/input_fn.md index f0dcdc47ff..24bfdbdd2e 100644 --- a/tensorflow/docs_src/get_started/input_fn.md +++ b/tensorflow/docs_src/get_started/input_fn.md @@ -292,7 +292,7 @@ prediction_set = pd.read_csv("boston_predict.csv", skipinitialspace=True, Next, create a list of `FeatureColumn`s for the input data, which formally specify the set of features to use for training. Because all features in the housing data set contain continuous values, you can create their -`FeatureColumn`s using the `tf.contrib.layers.real_valued_column()` function: +`FeatureColumn`s using the `tf.feature_column.numeric_column()` function: ```python feature_cols = [tf.feature_column.numeric_column(k) for k in FEATURES] -- GitLab From 7ab54c4c48f35a4107e6170cefe5c93245595601 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 30 Nov 2017 17:37:21 -0800 Subject: [PATCH 1019/1801] Support compressed TensorProto format in constant folding for types iny16, int8, uint8, and bool, in addition to float ,double, int32, and int64, which were already supported. Add unit test for all types. PiperOrigin-RevId: 177533200 --- .../grappler/optimizers/constant_folding.cc | 24 +++++-- .../optimizers/constant_folding_test.cc | 63 ++++++++++++++++++- 2 files changed, 80 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index cf913d6f48..e0f39c2931 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -657,9 +657,9 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const { namespace { -#define SET_TENSOR_VAL_CASE(DTYPE, TYPE) \ +#define SET_TENSOR_VAL_CASE(DTYPE, TYPE, NAME) \ case DTYPE: \ - t->add_##TYPE##_val(static_cast(value)); \ + t->add_##NAME##_val(static_cast(value)); \ break; Status CreateConstantTensorAttrValue(DataType type, double value, @@ -668,10 +668,14 @@ Status CreateConstantTensorAttrValue(DataType type, double value, TensorProto* t = attr_tensor->mutable_tensor(); *t->mutable_tensor_shape() = shape; switch (type) { - SET_TENSOR_VAL_CASE(DT_FLOAT, float); - SET_TENSOR_VAL_CASE(DT_DOUBLE, double); - SET_TENSOR_VAL_CASE(DT_INT64, int64); - SET_TENSOR_VAL_CASE(DT_INT32, int); + SET_TENSOR_VAL_CASE(DT_FLOAT, float, float); + SET_TENSOR_VAL_CASE(DT_DOUBLE, double, double); + SET_TENSOR_VAL_CASE(DT_INT64, int64, int64); + SET_TENSOR_VAL_CASE(DT_INT32, int32, int); + SET_TENSOR_VAL_CASE(DT_INT16, int32, int); + SET_TENSOR_VAL_CASE(DT_INT8, int32, int); + SET_TENSOR_VAL_CASE(DT_UINT8, int32, int); + SET_TENSOR_VAL_CASE(DT_BOOL, bool, bool); default: return errors::InvalidArgument("Unsupported type: ", type); } @@ -721,6 +725,14 @@ NodeDef ConstantFolding::CreateNodeDef(const string& name, POPULATE_TENSOR_PROTO(tensor, t, int64, int64) } else if (tensor->dtype() == DT_INT32) { POPULATE_TENSOR_PROTO(tensor, t, int32, int) + } else if (tensor->dtype() == DT_INT16) { + POPULATE_TENSOR_PROTO(tensor, t, int16, int) + } else if (tensor->dtype() == DT_INT8) { + POPULATE_TENSOR_PROTO(tensor, t, int8, int) + } else if (tensor->dtype() == DT_UINT8) { + POPULATE_TENSOR_PROTO(tensor, t, uint8, int) + } else if (tensor->dtype() == DT_BOOL) { + POPULATE_TENSOR_PROTO(tensor, t, bool, bool) } } if (optimized) { diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index c72ed96520..32a691d3ee 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -173,11 +173,70 @@ TEST_F(ConstantFoldingTest, NeutralElement) { } } +TEST_F(ConstantFoldingTest, CreateConstNodes) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + +#define MAKE_TEST_GRAPH(TYPE) \ + Output TYPE##_const = \ + ops::Const(s.WithOpName(#TYPE "_const"), static_cast(10), {5}); \ + Output TYPE##_mul = \ + ops::Mul(s.WithOpName(#TYPE "_mul"), TYPE##_const, TYPE##_const); \ + Output TYPE##_id = ops::Identity(s.WithOpName(#TYPE "_id"), TYPE##_mul) + + MAKE_TEST_GRAPH(float); + MAKE_TEST_GRAPH(double); + MAKE_TEST_GRAPH(int64); + MAKE_TEST_GRAPH(int32); + MAKE_TEST_GRAPH(int16); + MAKE_TEST_GRAPH(int8); + MAKE_TEST_GRAPH(uint8); +#undef MAKE_TEST_GRAPH + + Output bool_const = ops::Const(s.WithOpName("bool_const"), true, {5}); + Output bool_and = + ops::LogicalAnd(s.WithOpName("bool_and"), bool_const, bool_const); + Output bool_id = ops::Identity(s.WithOpName("bool_id"), bool_and); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + ConstantFolding fold(nullptr /* cpu_device */); + GraphDef output; + Status status = fold.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + EXPECT_EQ(24, output.node_size()); + for (const NodeDef& node : output.node()) { +#define CHECK_RESULT(TYPE, FIELD) \ + if (node.name() == #TYPE "_mul") { \ + EXPECT_EQ(5, \ + node.attr().at("value").tensor().tensor_shape().dim(0).size()); \ + EXPECT_EQ(1, node.attr().at("value").tensor().FIELD##_val_size()); \ + EXPECT_EQ(10 * 10, node.attr().at("value").tensor().FIELD##_val(0)); \ + } + + CHECK_RESULT(float, float); + CHECK_RESULT(double, double); + CHECK_RESULT(int64, int64); + CHECK_RESULT(int32, int); + CHECK_RESULT(int16, int); + CHECK_RESULT(int8, int); + CHECK_RESULT(uint8, int); +#undef CHECK_RESULT + + if (node.name() == "bool_and") { + EXPECT_EQ(5, + node.attr().at("value").tensor().tensor_shape().dim(0).size()); + EXPECT_EQ(1, node.attr().at("value").tensor().bool_val_size()); + EXPECT_EQ(true && true, node.attr().at("value").tensor().bool_val(0)); + } + } +} + TEST_F(ConstantFoldingTest, FoldingNodeWithTwoOutputs) { // Build a simple graph with a few trivially prunable ops. tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - Output a = ops::Const(s.WithOpName("a"), 10, {3}); + Output a = ops::Const(s.WithOpName("a"), 10, {5}); auto b = ops::Unique(s.WithOpName("b"), {a}); Output c = ops::Identity(s.WithOpName("c"), {b.y}); Output d = ops::Identity(s.WithOpName("d"), {b.idx}); @@ -1059,3 +1118,5 @@ TEST_F(ConstantFoldingTest, MaterializeReductionIndices) { } // namespace } // namespace grappler } // namespace tensorflow + +// LocalWords: NewRootScope -- GitLab From 1a89cf58c021ef176c624b4070ee8422303e29a2 Mon Sep 17 00:00:00 2001 From: Max Galkin Date: Thu, 30 Nov 2017 18:01:03 -0800 Subject: [PATCH 1020/1801] Output unknown dimension root nodes with --vmodule=graph_properties=2 PiperOrigin-RevId: 177535370 --- .../core/grappler/costs/graph_properties.cc | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index fbc52e9bd1..ec44d11bdd 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -265,6 +265,79 @@ bool IsEnterWithQueue(const Node& node) { return false; } +bool HasAnyUnknownDimensions(const TensorShapeProto& proto) { + if (proto.unknown_rank()) { + return true; + } + for (const auto& dim : proto.dim()) { + if (dim.size() < 0) { + return true; + } + } + return false; +} + +void VerboseLogUnknownDimensionSources( + const Graph& graph, + const std::map>& + input_properties_map, + const std::map>& + output_properties_map) { + if (!VLOG_IS_ON(2)) { + return; + } + + VLOG(2) << "Nodes with known inputs, but with unknown output dimensions:"; + + // Find all nodes in the graph for which we + // do not have any unknown dimensions in their inputs, but + // we have some unknown dimensions in their outputs. + for (const Node* const node : graph.nodes()) { + if (node->num_outputs() == 0) { + continue; + } + + const auto& input_properties = input_properties_map.at(node->name()); + const auto& output_properties = output_properties_map.at(node->name()); + + bool has_unknown_inputs = false; + for (int i = 0; i < node->num_inputs(); ++i) { + if (HasAnyUnknownDimensions(input_properties[i].shape())) { + has_unknown_inputs = true; + break; + } + } + + if (has_unknown_inputs) { + continue; + } + + for (int i = 0; i < node->num_outputs(); ++i) { + if (HasAnyUnknownDimensions(output_properties[i].shape())) { + string inputs = "input_shapes=["; + for (int i = 0; i < node->num_inputs(); ++i) { + inputs += + PartialTensorShape::DebugString(input_properties[i].shape()); + } + inputs += "]"; + + string outputs = "output_shapes=["; + for (int i = 0; i < node->num_outputs(); ++i) { + outputs += + PartialTensorShape::DebugString(output_properties[i].shape()); + } + outputs += "]"; + + VLOG(2) << "Node: " << node->name() << ", Op: " << node->def().op() + << ", " << inputs << ", " << outputs; + + // don't log again for this node + break; + } + } + } +} + } // namespace // Queue of nodes to process. Nodes can be enqueued in any order, but will be @@ -1000,6 +1073,10 @@ Status GraphProperties::InferStatically(bool assume_valid_feeds) { } } + // Help trace the unknown dimensions to their origins. + VerboseLogUnknownDimensionSources(graph, input_properties_, + output_properties_); + return Status::OK(); } -- GitLab From 6e16af86658cd27b466c7c3ba270338b8f95f184 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Thu, 30 Nov 2017 18:24:27 -0800 Subject: [PATCH 1021/1801] Register more ops with bfloat16 types. PiperOrigin-RevId: 177537667 --- .../compiler/tf2xla/kernels/matmul_op.cc | 5 +- .../compiler/tf2xla/kernels/scan_ops.cc | 13 +- .../contrib/tpu/ops/cross_replica_ops.cc | 2 +- tensorflow/core/framework/numeric_types.h | 6 +- .../core/framework/op_def_builder_test.cc | 15 ++- tensorflow/core/framework/types.cc | 24 ++-- tensorflow/core/ops/array_ops.cc | 32 +++-- tensorflow/core/ops/math_ops.cc | 119 +++++++++--------- tensorflow/core/ops/nn_ops.cc | 66 +++++----- tensorflow/core/ops/random_ops.cc | 8 +- 10 files changed, 150 insertions(+), 140 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/matmul_op.cc b/tensorflow/compiler/tf2xla/kernels/matmul_op.cc index a62d233526..644abd5905 100644 --- a/tensorflow/compiler/tf2xla/kernels/matmul_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/matmul_op.cc @@ -85,10 +85,7 @@ class SparseMatMulOp : public MatMulOp { ~SparseMatMulOp() override = default; }; -REGISTER_XLA_OP(Name("SparseMatMul") - .TypeConstraint("Ta", kFloatTypes) - .TypeConstraint("Tb", kFloatTypes), - SparseMatMulOp); +REGISTER_XLA_OP(Name("SparseMatMul"), SparseMatMulOp); } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc index 3cc9d14411..650f8c7dc8 100644 --- a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc @@ -35,6 +35,11 @@ limitations under the License. namespace tensorflow { namespace { +// TODO(phawkins): implement double-sized windowed reductions in XLA and remove +// the type constraint. +constexpr std::array kScanOpTypes = { + {DT_HALF, DT_BFLOAT16, DT_FLOAT}}; + class ScanOp : public XlaOpKernel { public: ScanOp(OpKernelConstruction* ctx, bool sum) : XlaOpKernel(ctx), sum_(sum) { @@ -124,17 +129,13 @@ class CumsumOp : public ScanOp { public: explicit CumsumOp(OpKernelConstruction* ctx) : ScanOp(ctx, /*sum=*/true) {} }; -// TODO(phawkins): implement non-float windowed reductions in XLA and remove the -// type constraint. -REGISTER_XLA_OP(Name("Cumsum").TypeConstraint("T", DT_FLOAT), CumsumOp); +REGISTER_XLA_OP(Name("Cumsum").TypeConstraint("T", kScanOpTypes), CumsumOp); class CumprodOp : public ScanOp { public: explicit CumprodOp(OpKernelConstruction* ctx) : ScanOp(ctx, /*sum=*/false) {} }; -// TODO(phawkins): implement non-float windowed reductions in XLA and remove the -// type constraint. -REGISTER_XLA_OP(Name("Cumprod").TypeConstraint("T", DT_FLOAT), CumprodOp); +REGISTER_XLA_OP(Name("Cumprod").TypeConstraint("T", kScanOpTypes), CumprodOp); } // anonymous namespace } // namespace tensorflow diff --git a/tensorflow/contrib/tpu/ops/cross_replica_ops.cc b/tensorflow/contrib/tpu/ops/cross_replica_ops.cc index cbbd19800e..d389050e67 100644 --- a/tensorflow/contrib/tpu/ops/cross_replica_ops.cc +++ b/tensorflow/contrib/tpu/ops/cross_replica_ops.cc @@ -22,7 +22,7 @@ namespace tensorflow { REGISTER_OP("CrossReplicaSum") .Input("input: T") .Output("output: T") - .Attr("T: {float}") + .Attr("T: {bfloat16, float}") .SetShapeFn(shape_inference::UnchangedShape) .Doc(R"doc( An Op to sum inputs across replicated TPU instances. Each diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h index 29cac26244..bdd5af064b 100644 --- a/tensorflow/core/framework/numeric_types.h +++ b/tensorflow/core/framework/numeric_types.h @@ -58,7 +58,7 @@ struct bfloat16 { explicit EIGEN_DEVICE_FUNC bfloat16(const T& val) : bfloat16(static_cast(val)) {} - EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const { + EIGEN_DEVICE_FUNC explicit operator float() const { float result; uint16_t* q = reinterpret_cast(&result); @@ -89,6 +89,10 @@ struct bfloat16 { return static_cast(float(*this)); } + EIGEN_DEVICE_FUNC explicit operator long() const { + return static_cast(float(*this)); + } + EIGEN_DEVICE_FUNC explicit operator char() const { return static_cast(float(*this)); } diff --git a/tensorflow/core/framework/op_def_builder_test.cc b/tensorflow/core/framework/op_def_builder_test.cc index c1511ebe34..9b24e3aa00 100644 --- a/tensorflow/core/framework/op_def_builder_test.cc +++ b/tensorflow/core/framework/op_def_builder_test.cc @@ -124,22 +124,23 @@ TEST_F(OpDefBuilderTest, AttrWithRestrictions) { "attr: { name: 'a' type: 'type' allowed_values { list { type: " "[DT_HALF, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, DT_INT16, " "DT_UINT16, DT_INT8, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, " - "DT_QINT32, DT_UINT32, DT_UINT64] } } }"); + "DT_QINT32, DT_UINT32, DT_UINT64, DT_BFLOAT16] } } }"); ExpectSuccess( b().Attr("a:{numbertype, variant}"), "attr: { name: 'a' type: 'type' allowed_values { list { type: " "[DT_HALF, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, DT_INT16, " "DT_UINT16, DT_INT8, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, " - "DT_QINT32, DT_UINT32, DT_UINT64, DT_VARIANT] } } }"); + "DT_QINT32, DT_UINT32, DT_UINT64, DT_BFLOAT16, DT_VARIANT] } } }"); ExpectSuccess(b().Attr("a:realnumbertype"), "attr: { name: 'a' type: 'type' allowed_values { list { type: " "[DT_HALF, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, " - "DT_INT16, DT_UINT16, DT_INT8, DT_UINT32, DT_UINT64] } } }"); + "DT_INT16, DT_UINT16, DT_INT8, DT_UINT32, DT_UINT64, " + "DT_BFLOAT16] } } }"); ExpectSuccess(b().Attr("a:{realnumbertype, variant , string, }"), "attr: { name: 'a' type: 'type' allowed_values { list { type: " "[DT_HALF, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, " "DT_INT16, DT_UINT16, DT_INT8, DT_UINT32, DT_UINT64, " - "DT_VARIANT, DT_STRING] } } }"); + "DT_BFLOAT16, DT_VARIANT, DT_STRING] } } }"); ExpectSuccess(b().Attr("a:quantizedtype"), "attr: { name: 'a' type: 'type' allowed_values { list { type: " "[DT_QINT8, DT_QUINT8, DT_QINT32, DT_QINT16, DT_QUINT16]} } }"); @@ -216,12 +217,14 @@ TEST_F(OpDefBuilderTest, AttrListOfRestricted) { b().Attr("a:list(realnumbertype)"), "attr: { name: 'a' type: 'list(type)' allowed_values { list { type: " "[DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, DT_INT16, " - "DT_UINT16, DT_INT8, DT_HALF, DT_UINT32, DT_UINT64] } } }"); + "DT_UINT16, DT_INT8, DT_HALF, DT_BFLOAT16, DT_UINT32, DT_UINT64" + "] } } }"); ExpectSuccess( b().Attr("a:list({realnumbertype, variant})"), "attr: { name: 'a' type: 'list(type)' allowed_values { list { type: " "[DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, DT_INT16, " - "DT_UINT16, DT_INT8, DT_HALF, DT_UINT32, DT_UINT64, DT_VARIANT] } } }"); + "DT_UINT16, DT_INT8, DT_HALF, DT_BFLOAT16, DT_UINT32, DT_UINT64, " + "DT_VARIANT] } } }"); ExpectSuccess( b().Attr("a:list(quantizedtype)"), "attr: { name: 'a' type: 'list(type)' allowed_values { list { type: " diff --git a/tensorflow/core/framework/types.cc b/tensorflow/core/framework/types.cc index faae19585d..48849f9dda 100644 --- a/tensorflow/core/framework/types.cc +++ b/tensorflow/core/framework/types.cc @@ -206,18 +206,18 @@ string DataTypeSliceString(const DataTypeSlice types) { } DataTypeVector AllTypes() { - return {DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16, - DT_UINT16, DT_INT8, DT_STRING, DT_COMPLEX64, DT_COMPLEX128, - DT_INT64, DT_BOOL, DT_QINT8, DT_QUINT8, DT_QINT16, - DT_QUINT16, DT_QINT32, DT_HALF, DT_RESOURCE, DT_VARIANT, - DT_UINT32, DT_UINT64}; + return {DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16, + DT_UINT16, DT_INT8, DT_STRING, DT_COMPLEX64, DT_COMPLEX128, + DT_INT64, DT_BOOL, DT_QINT8, DT_QUINT8, DT_QINT16, + DT_QUINT16, DT_QINT32, DT_HALF, DT_RESOURCE, DT_VARIANT, + DT_UINT32, DT_UINT64, DT_BFLOAT16}; } #if !defined(IS_MOBILE_PLATFORM) || defined(SUPPORT_SELECTIVE_REGISTRATION) DataTypeVector RealNumberTypes() { - return {DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, - DT_INT8, DT_UINT16, DT_HALF, DT_UINT32, DT_UINT64}; + return {DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, + DT_INT8, DT_UINT16, DT_HALF, DT_UINT32, DT_UINT64, DT_BFLOAT16}; } DataTypeVector QuantizedTypes() { @@ -227,14 +227,14 @@ DataTypeVector QuantizedTypes() { DataTypeVector RealAndQuantizedTypes() { return {DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT16, DT_INT8, DT_QINT8, DT_QUINT8, - DT_QINT16, DT_QUINT16, DT_QINT32, DT_HALF}; + DT_QINT16, DT_QUINT16, DT_QINT32, DT_HALF, DT_BFLOAT16}; } DataTypeVector NumberTypes() { - return {DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, - DT_UINT8, DT_UINT16, DT_INT16, DT_INT8, - DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, - DT_QINT32, DT_HALF, DT_UINT32, DT_UINT64}; + return {DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, + DT_UINT16, DT_INT16, DT_INT8, DT_COMPLEX64, DT_COMPLEX128, + DT_QINT8, DT_QUINT8, DT_QINT32, DT_HALF, DT_UINT32, + DT_UINT64, DT_BFLOAT16}; } #elif defined(__ANDROID_TYPES_FULL__) diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 9fa6423d59..6f4ea09206 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -724,8 +724,8 @@ REGISTER_OP("OnesLike") .Input("x: T") .Output("y: T") .Attr( - "T: {float, double, int8, uint8, int16, uint16, int32, int64, " - "complex64, complex128, bool}") + "T: {bfloat16, float, double, int8, uint8, int16, uint16, int32, " + "int64, complex64, complex128, bool}") .SetShapeFn(shape_inference::UnchangedShape) .Doc(R"doc( Returns a tensor of ones with the same shape and type as x. @@ -738,7 +738,7 @@ y: a tensor of the same shape and type as x but filled with ones. REGISTER_OP("Diag") .Input("diagonal: T") .Output("output: T") - .Attr("T: {float, double, int32, int64, complex64, complex128}") + .Attr("T: {bfloat16, float, double, int32, int64, complex64, complex128}") .SetShapeFn([](InferenceContext* c) { ShapeHandle in = c->input(0); TF_RETURN_IF_ERROR(c->WithRankAtLeast(in, 1, &in)); @@ -776,7 +776,7 @@ diagonal: Rank k tensor where k is at most 1. REGISTER_OP("DiagPart") .Input("input: T") .Output("diagonal: T") - .Attr("T: {float, double, int32, int64, complex64, complex128}") + .Attr("T: {bfloat16, float, double, int32, int64, complex64, complex128}") .SetShapeFn([](InferenceContext* c) { ShapeHandle in = c->input(0); if (!c->RankKnown(in)) { @@ -1059,9 +1059,8 @@ REGISTER_OP("Reverse") .Input("dims: bool") .Output("output: T") .Attr( - "T: {uint8, int8, uint16, int16, int32, int64, bool, half, float, " - "double, complex64, " - "complex128, string}") + "T: {uint8, int8, uint16, int16, int32, int64, bool, half, " + "float, double, complex64, complex128, string}") .SetShapeFn([](InferenceContext* c) { ShapeHandle input = c->input(0); ShapeHandle dims; @@ -1137,9 +1136,8 @@ REGISTER_OP("ReverseV2") .Output("output: T") .Attr("Tidx: {int32, int64} = DT_INT32") .Attr( - "T: {uint8, int8, uint16, int16, int32, int64, bool, half, float, " - "double, complex64, " - "complex128, string}") + "T: {uint8, int8, uint16, int16, int32, int64, bool, half, bfloat16, " + "float, double, complex64, complex128, string}") .SetShapeFn([](InferenceContext* c) { ShapeHandle input = c->input(0); ShapeHandle axis; @@ -1834,7 +1832,7 @@ this operation. REGISTER_OP("CheckNumerics") .Input("tensor: T") .Output("output: T") - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .Attr("message: string") .SetShapeFn(shape_inference::UnchangedShape) .Doc(R"doc( @@ -4565,12 +4563,12 @@ REGISTER_OP("Bitcast") .Output("output: type") // All supported dtypes are listed here to include qint16 and quint16. .Attr( - "T: {float, double, int64, int32, uint8, uint16, int8, int16," + "T: {bfloat16, float, double, int64, int32, uint8, uint16, int8, int16," " complex64, complex128, qint8, quint8, qint16, quint16, qint32," " half}") .Attr( - "type: {float, double, int64, int32, uint8, uint16, int8, int16," - " complex64, complex128, qint8, quint8, qint16, quint16, qint32," + "type: {bfloat16, float, double, int64, int32, uint8, uint16, int8, " + "int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32," " half}") .SetShapeFn([](InferenceContext* c) { ShapeHandle input = c->input(0); @@ -4782,7 +4780,7 @@ REGISTER_OP("QuantizeAndDequantize") .Attr("input_min: float = 0") .Attr("input_max: float = 0") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {bfloat16, float, double}") .SetShapeFn(shape_inference::UnchangedShape) .Deprecated(22, "Replaced by QuantizeAndDequantizeV2") .Doc(R"doc( @@ -4798,7 +4796,7 @@ REGISTER_OP("QuantizeAndDequantizeV2") .Attr("num_bits: int = 8") .Attr("range_given: bool = false") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {bfloat16, float, double}") .SetShapeFn([](InferenceContext* c) { ShapeHandle unused; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); @@ -4877,7 +4875,7 @@ REGISTER_OP("QuantizeAndDequantizeV3") .Attr("signed_input: bool = true") .Attr("range_given: bool = true") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {bfloat16, float, double}") .SetShapeFn([](InferenceContext* c) { ShapeHandle unused; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index ceda11663a..45ebfa203b 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -85,7 +85,7 @@ REGISTER_OP("BatchMatMul") .Input("x: T") .Input("y: T") .Output("output: T") - .Attr("T: {half, float, double, int32, complex64, complex128}") + .Attr("T: {half, bfloat16, float, double, int32, complex64, complex128}") .Attr("adj_x: bool = false") .Attr("adj_y: bool = false") .SetShapeFn([](InferenceContext* c) { @@ -184,7 +184,7 @@ _HostCast requires its input and produces its output in host memory. REGISTER_OP("Abs") .Input("x: T") .Output("y: T") - .Attr("T: {half, float, double, int32, int64}") + .Attr("T: {half, bfloat16, float, double, int32, int64}") .SetShapeFn(shape_inference::UnchangedShape) .Doc(R"doc( Computes the absolute value of a tensor. @@ -210,29 +210,31 @@ value is computed as \\( \sqrt{a^2 + b^2}\\). )doc"); // Declares cwise unary operations signature: 't -> 't -#define UNARY() \ - Input("x: T") \ - .Output("y: T") \ - .Attr("T: {half, float, double, int32, int64, complex64, complex128}") \ +#define UNARY() \ + Input("x: T") \ + .Output("y: T") \ + .Attr( \ + "T: {half, bfloat16, float, double, int32, int64, complex64, " \ + "complex128}") \ .SetShapeFn(shape_inference::UnchangedShape) -#define UNARY_REAL() \ - Input("x: T") \ - .Output("y: T") \ - .Attr("T: {half, float, double}") \ +#define UNARY_REAL() \ + Input("x: T") \ + .Output("y: T") \ + .Attr("T: {half, bfloat16, float, double}") \ .SetShapeFn(shape_inference::UnchangedShape) -#define UNARY_COMPLEX() \ - Input("x: T") \ - .Output("y: T") \ - .Attr("T: {half, float, double, complex64, complex128}") \ +#define UNARY_COMPLEX() \ + Input("x: T") \ + .Output("y: T") \ + .Attr("T: {half, bfloat16, float, double, complex64, complex128}") \ .SetShapeFn(shape_inference::UnchangedShape) -#define UNARY_GRADIENT_COMPLEX() \ - Input("y: T") \ - .Input("dy: T") \ - .Output("z: T") \ - .Attr("T: {half, float, double, complex64, complex128}") \ +#define UNARY_GRADIENT_COMPLEX() \ + Input("y: T") \ + .Input("dy: T") \ + .Output("z: T") \ + .Attr("T: {half, bfloat16, float, double, complex64, complex128}") \ .SetShapeFn(shape_inference::UnchangedShape) REGISTER_OP("Neg") @@ -481,7 +483,7 @@ Computes atan of x element-wise. REGISTER_OP("IsNan") .Input("x: T") .Output("y: bool") - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .SetShapeFn(shape_inference::UnchangedShape) .Doc(R"doc( Returns which elements of x are NaN. @@ -494,7 +496,7 @@ Equivalent to np.isnan REGISTER_OP("IsInf") .Input("x: T") .Output("y: bool") - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .SetShapeFn(shape_inference::UnchangedShape) .Doc(R"doc( Returns which elements of x are Inf. @@ -507,7 +509,7 @@ Equivalent to np.isinf REGISTER_OP("IsFinite") .Input("x: T") .Output("y: bool") - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .SetShapeFn(shape_inference::UnchangedShape) .Doc(R"doc( Returns which elements of x are finite. @@ -520,7 +522,9 @@ Equivalent to np.isfinite REGISTER_OP("Sign") .Input("x: T") .Output("y: T") - .Attr("T: {half, float, double, int32, int64, complex64, complex128}") + .Attr( + "T: {half, bfloat16, float, double, int32, int64, complex64, " + "complex128}") .SetShapeFn(shape_inference::UnchangedShape) .Doc(R"doc( Returns an element-wise indication of the sign of a number. @@ -533,7 +537,7 @@ For complex numbers, `y = sign(x) = x / |x|` if `x != 0`, otherwise `y = 0`. REGISTER_OP("Floor") .Input("x: T") .Output("y: T") - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .SetShapeFn(shape_inference::UnchangedShape) .Doc(R"doc( Returns element-wise largest integer not greater than x. @@ -542,7 +546,7 @@ Returns element-wise largest integer not greater than x. REGISTER_OP("Ceil") .Input("x: T") .Output("y: T") - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .SetShapeFn(shape_inference::UnchangedShape) .Doc(R"doc( Returns element-wise smallest integer in not less than x. @@ -551,7 +555,7 @@ Returns element-wise smallest integer in not less than x. REGISTER_OP("Rint") .Input("x: T") .Output("y: T") - .Attr("T: {float, double}") + .Attr("T: {bfloat16, float, double}") .SetShapeFn(shape_inference::UnchangedShape) .Doc(R"doc( Returns element-wise integer closest to x. @@ -569,22 +573,23 @@ rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.] // Declares cwise binary operations signature: 't, 't -> 't. -#define BINARY_MORE() \ - Input("x: T").Input("y: T").Output("z: T").Attr( \ - "T: {half, float, double, uint8, int8, uint16, int16, int32, int64, " \ - "complex64, complex128}") +#define BINARY_MORE() \ + Input("x: T").Input("y: T").Output("z: T").Attr( \ + "T: {half, bfloat16, float, double, uint8, int8, uint16, int16, int32, " \ + "int64, complex64, complex128}") -#define BINARY_FEWER() \ - Input("x: T").Input("y: T").Output("z: T").Attr( \ - "T: {half, float, double, int32, int64, complex64, complex128}") +#define BINARY_FEWER() \ + Input("x: T").Input("y: T").Output("z: T").Attr( \ + "T: {half, bfloat16, float, double, int32, int64, complex64, " \ + "complex128}") REGISTER_OP("Add") .Input("x: T") .Input("y: T") .Output("z: T") .Attr( - "T: {half, float, double, uint8, int8, int16, int32, int64, complex64, " - "complex128, string}") + "T: {half, bfloat16, float, double, uint8, int8, int16, int32, int64, " + "complex64, complex128, string}") .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn) .Doc(R"doc( Returns x + y element-wise. @@ -600,8 +605,8 @@ REGISTER_OP("AddV2") .Input("y: T") .Output("z: T") .Attr( - "T: {half, float, double, uint8, int8, int16, int32, int64, complex64, " - "complex128}") + "T: {half, bfloat16, float, double, uint8, int8, int16, int32, int64, " + "complex64, complex128}") .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn) .SetIsAggregate() .SetIsCommutative() @@ -757,7 +762,7 @@ REGISTER_OP("Maximum") .Input("x: T") .Input("y: T") .Output("z: T") - .Attr("T: {half, float, double, int32, int64}") + .Attr("T: {half, bfloat16, float, double, int32, int64}") .SetIsCommutative() .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn) .Doc(R"doc( @@ -788,7 +793,7 @@ REGISTER_OP("Minimum") .Input("x: T") .Input("y: T") .Output("z: T") - .Attr("T: {half, float, double, int32, int64}") + .Attr("T: {half, bfloat16, float, double, int32, int64}") .SetIsCommutative() .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn) .Doc(R"doc( @@ -802,7 +807,7 @@ REGISTER_OP("Mod") .Input("x: T") .Input("y: T") .Output("z: T") - .Attr("T: {int32, int64, float, double}") + .Attr("T: {int32, int64, bfloat16, float, double}") .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn) .Doc(R"doc( Returns element-wise remainder of division. This emulates C semantics in that @@ -817,7 +822,7 @@ REGISTER_OP("FloorMod") .Input("x: T") .Input("y: T") .Output("z: T") - .Attr("T: {int32, int64, float, double}") + .Attr("T: {int32, int64, bfloat16, float, double}") .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn) .Doc(R"doc( Returns element-wise remainder of division. When `x < 0` xor `y < 0` is @@ -832,7 +837,7 @@ REGISTER_OP("TruncateMod") .Input("x: T") .Input("y: T") .Output("z: T") - .Attr("T: {int32, int64, float, double}") + .Attr("T: {int32, int64, bfloat16, float, double}") .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn) .Doc(R"doc( Returns element-wise remainder of division. This emulates C semantics in that @@ -847,7 +852,9 @@ REGISTER_OP("Pow") .Input("x: T") .Input("y: T") .Output("z: T") - .Attr("T: {half, float, double, int32, int64, complex64, complex128}") + .Attr( + "T: {half, bfloat16, float, double, int32, int64, complex64, " + "complex128}") .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn) .Doc(R"doc( Computes the power of one value to another. @@ -946,7 +953,7 @@ REGISTER_OP("Atan2") .Input("y: T") .Input("x: T") .Output("z: T") - .Attr("T: {float, double}") + .Attr("T: {bfloat16, float, double}") .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn) .Doc(R"doc( Computes arctangent of `y/x` element-wise, respecting signs of the arguments. @@ -1064,15 +1071,15 @@ Returns the truth value of (x >= y) element-wise. // -------------------------------------------------------------------------- -#define EQUALITY_COMPARISON() \ - Input("x: T") \ - .Input("y: T") \ - .Output("z: bool") \ - .SetIsCommutative() \ - .Attr( \ - "T: {half, float, double, uint8, int8, int16, int32, int64, " \ - "complex64, " \ - "quint8, qint8, qint32, string, bool, complex128}") \ +#define EQUALITY_COMPARISON() \ + Input("x: T") \ + .Input("y: T") \ + .Output("z: bool") \ + .SetIsCommutative() \ + .Attr( \ + "T: {half, bfloat16, float, double, uint8, int8, int16, int32, " \ + "int64, complex64, quint8, qint8, qint32, string, bool, " \ + "complex128}") \ .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn) REGISTER_OP("Equal") @@ -1291,7 +1298,7 @@ REGISTER_OP("MatMul") .Output("product: T") .Attr("transpose_a: bool = false") .Attr("transpose_b: bool = false") - .Attr("T: {half, float, double, int32, complex64, complex128}") + .Attr("T: {half, bfloat16, float, double, int32, complex64, complex128}") .SetShapeFn(shape_inference::MatMulShape) .Doc(R"doc( Multiply the matrix "a" by the matrix "b". @@ -2105,7 +2112,7 @@ REGISTER_OP("Range") .Input("limit: Tidx") .Input("delta: Tidx") .Output("output: Tidx") - .Attr("Tidx: {float, double, int32, int64} = DT_INT32") + .Attr("Tidx: {bfloat16, float, double, int32, int64} = DT_INT32") .SetShapeFn([](InferenceContext* c) { ShapeHandle unused; TF_RETURN_WITH_CONTEXT_IF_ERROR(c->WithRank(c->input(0), 0, &unused), @@ -2160,7 +2167,7 @@ REGISTER_OP("LinSpace") .Input("stop: T") .Input("num: Tidx") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {bfloat16, float, double}") .Attr("Tidx: {int32, int64} = DT_INT32") .SetShapeFn([](InferenceContext* c) { ShapeHandle unused; diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 59c4642e4d..102de94787 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -73,7 +73,7 @@ REGISTER_OP("AvgPool") .Attr("strides: list(int) >= 4") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .SetShapeFn(shape_inference::AvgPoolShape) .Doc(R"doc( Performs average pooling on the input. @@ -101,7 +101,7 @@ REGISTER_OP("AvgPoolGrad") .Attr("strides: list(int) >= 4") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s)); @@ -300,7 +300,7 @@ REGISTER_OP("FusedBatchNormV2") .Output("batch_variance: U") .Output("reserve_space_1: U") .Output("reserve_space_2: U") - .Attr("T: {half, float}") + .Attr("T: {half, bfloat16, float}") .Attr("U: {float}") .Attr("epsilon: float = 0.0001") .Attr("data_format: string = 'NHWC'") @@ -393,7 +393,7 @@ REGISTER_OP("FusedBatchNormGradV2") .Output("offset_backprop: U") .Output("reserve_space_3: U") .Output("reserve_space_4: U") - .Attr("T: {half, float}") + .Attr("T: {half, bfloat16, float}") .Attr("U: {float}") .Attr("epsilon: float = 0.0001") .Attr("data_format: string = 'NHWC'") @@ -508,7 +508,7 @@ REGISTER_OP("Conv2D") .Input("input: T") .Input("filter: T") .Output("output: T") - .Attr("T: {half, float}") + .Attr("T: {half, bfloat16, float}") .Attr("strides: list(int)") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) @@ -567,7 +567,7 @@ REGISTER_OP("Conv2DBackpropInput") .Input("filter: T") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {half, float}") + .Attr("T: {half, bfloat16, float}") .Attr("strides: list(int)") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) @@ -615,7 +615,7 @@ REGISTER_OP("Conv2DBackpropFilter") .Input("filter_sizes: int32") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {half, float}") + .Attr("T: {half, bfloat16, float}") .Attr("strides: list(int)") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) @@ -837,7 +837,7 @@ REGISTER_OP("DepthwiseConv2dNative") .Input("input: T") .Input("filter: T") .Output("output: T") - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .Attr("strides: list(int)") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) @@ -884,7 +884,7 @@ REGISTER_OP("DepthwiseConv2dNativeBackpropInput") .Input("filter: T") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {bfloat16, float, double}") .Attr("strides: list(int)") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) @@ -932,7 +932,7 @@ REGISTER_OP("DepthwiseConv2dNativeBackpropFilter") .Input("filter_sizes: int32") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {float, double}") + .Attr("T: {bfloat16, float, double}") .Attr("strides: list(int)") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) @@ -980,7 +980,7 @@ REGISTER_OP("Conv3D") .Input("input: T") .Input("filter: T") .Output("output: T") - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) @@ -1073,7 +1073,7 @@ REGISTER_OP("Conv3DBackpropInputV2") .Input("filter: T") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) @@ -1116,7 +1116,7 @@ REGISTER_OP("Conv3DBackpropFilterV2") .Input("filter_sizes: int32") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) @@ -1163,7 +1163,7 @@ REGISTER_OP("AvgPool3D") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) - .Attr("T: {float, double}") + .Attr("T: {bfloat16, float, double}") .SetShapeFn(shape_inference::Pool3DShape) .Doc(R"doc( Performs 3D average pooling on the input. @@ -1190,7 +1190,7 @@ REGISTER_OP("AvgPool3DGrad") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) - .Attr("T: {float, double}") + .Attr("T: {bfloat16, float, double}") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s)); @@ -1225,7 +1225,7 @@ REGISTER_OP("MaxPool3D") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) - .Attr("T: {float}") + .Attr("T: {bfloat16, float}") .SetShapeFn(shape_inference::Pool3DShape) .Doc(R"doc( Performs 3D max pooling on the input. @@ -1253,8 +1253,8 @@ REGISTER_OP("MaxPool3DGrad") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) - .Attr("T: {float} = DT_FLOAT") - .Attr("TInput: {float} = DT_FLOAT") + .Attr("T: {bfloat16, float} = DT_FLOAT") + .Attr("TInput: {bfloat16, float} = DT_FLOAT") .SetShapeFn([](InferenceContext* c) { return UnchangedShapeWithRank(c, 5); }) @@ -1319,7 +1319,7 @@ data_format: The data format of the input and output data. With the REGISTER_OP("L2Loss") .Input("t: T") .Output("output: T") - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .SetShapeFn(shape_inference::ScalarShape) .Doc(R"doc( L2 Loss. @@ -1341,7 +1341,7 @@ REGISTER_OP("LRN") .Attr("bias: float = 1.0") .Attr("alpha: float = 1.0") .Attr("beta: float = 0.5") - .Attr("T: {float, half} = DT_FLOAT") + .Attr("T: {half, bfloat16, float} = DT_FLOAT") .SetShapeFn([](InferenceContext* c) { return UnchangedShapeWithRank(c, 4); }) @@ -1376,7 +1376,7 @@ REGISTER_OP("LRNGrad") .Attr("bias: float = 1.0") .Attr("alpha: float = 1.0") .Attr("beta: float = 0.5") - .Attr("T: {float, half} = DT_FLOAT") + .Attr("T: {half, bfloat16, float} = DT_FLOAT") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &s)); // input_grads @@ -1402,8 +1402,8 @@ output: The gradients for LRN. REGISTER_OP("MaxPool") .Attr( - "T: {float, double, int32, int64, uint8, int16, int8, uint16, " - "half, qint8} = DT_FLOAT") + "T: {half, bfloat16, float, double, int32, int64, uint8, int16, int8, " + "uint16, qint8} = DT_FLOAT") .Attr("ksize: list(int) >= 4") .Attr("strides: list(int) >= 4") .Attr(GetPaddingAttrString()) @@ -1429,8 +1429,8 @@ output: The max pooled output tensor. REGISTER_OP("MaxPoolV2") .Attr( - "T: {float, double, int32, int64, uint8, int16, int8, uint16, " - "half, qint8} = DT_FLOAT") + "T: {half, bfloat16, float, double, int32, int64, uint8, int16, int8, " + "uint16, qint8} = DT_FLOAT") .Attr(GetPaddingAttrString()) .Attr("data_format: {'NHWC', 'NCHW', 'NCHW_VECT_C'} = 'NHWC'") .Input("input: T") @@ -1913,7 +1913,7 @@ backprops: The gradients: REGISTER_OP("Elu") .Input("features: T") .Output("activations: T") - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .SetShapeFn(shape_inference::UnchangedShape) .Doc(R"doc( Computes exponential linear: `exp(features) - 1` if < 0, `features` otherwise. @@ -1926,7 +1926,7 @@ REGISTER_OP("EluGrad") .Input("gradients: T") .Input("outputs: T") .Output("backprops: T") - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .SetShapeFn(shape_inference::MergeBothInputsShapeFn) .Doc(R"doc( Computes gradients for the exponential linear (Elu) operation. @@ -1940,7 +1940,7 @@ backprops: The gradients: `gradients * (outputs + 1)` if outputs < 0, REGISTER_OP("Selu") .Input("features: T") .Output("activations: T") - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .SetShapeFn(shape_inference::UnchangedShape) .Doc(R"doc( Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)` @@ -1953,7 +1953,7 @@ REGISTER_OP("SeluGrad") .Input("gradients: T") .Input("outputs: T") .Output("backprops: T") - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .SetShapeFn(shape_inference::MergeBothInputsShapeFn) .Doc(R"doc( Computes gradients for the scaled exponential linear (Selu) operation. @@ -2015,7 +2015,7 @@ backprops: The gradients: `gradients / (1 + abs(features)) ** 2`. REGISTER_OP("Softmax") .Input("logits: T") .Output("softmax: T") - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .SetShapeFn([](InferenceContext* c) { return shape_inference::UnchangedShapeWithRankAtLeast(c, 1); }) @@ -2035,7 +2035,7 @@ softmax: Same shape as `logits`. REGISTER_OP("LogSoftmax") .Input("logits: T") .Output("logsoftmax: T") - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .SetShapeFn([](InferenceContext* c) { return shape_inference::UnchangedShapeWithRankAtLeast(c, 1); }) @@ -2057,7 +2057,7 @@ REGISTER_OP("SoftmaxCrossEntropyWithLogits") .Input("labels: T") .Output("loss: T") .Output("backprop: T") - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .SetShapeFn([](InferenceContext* c) { ShapeHandle input; TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &input)); @@ -2086,7 +2086,7 @@ REGISTER_OP("SparseSoftmaxCrossEntropyWithLogits") .Input("labels: Tlabels") .Output("loss: T") .Output("backprop: T") - .Attr("T: {half, float, double}") + .Attr("T: {half, bfloat16, float, double}") .Attr("Tlabels: {int32, int64} = DT_INT64") .SetShapeFn([](InferenceContext* c) { ShapeHandle features; diff --git a/tensorflow/core/ops/random_ops.cc b/tensorflow/core/ops/random_ops.cc index 5a436fb93e..31d9c82e53 100644 --- a/tensorflow/core/ops/random_ops.cc +++ b/tensorflow/core/ops/random_ops.cc @@ -29,7 +29,7 @@ REGISTER_OP("RandomUniform") .Output("output: dtype") .Attr("seed: int = 0") .Attr("seed2: int = 0") - .Attr("dtype: {half,float,double}") + .Attr("dtype: {half,bfloat16,float,double}") .Attr("T: {int32, int64}") .SetShapeFn(shape_inference::RandomShape) .Doc(R"doc( @@ -87,7 +87,7 @@ REGISTER_OP("RandomStandardNormal") .Output("output: dtype") .Attr("seed: int = 0") .Attr("seed2: int = 0") - .Attr("dtype: {half,float,double}") + .Attr("dtype: {half,bfloat16,float,double}") .Attr("T: {int32, int64}") .SetShapeFn(shape_inference::RandomShape) .Doc(R"doc( @@ -115,7 +115,7 @@ REGISTER_OP("ParameterizedTruncatedNormal") .Output("output: dtype") .Attr("seed: int = 0") .Attr("seed2: int = 0") - .Attr("dtype: {half,float,double}") + .Attr("dtype: {half,bfloat16,float,double}") .Attr("T: {int32, int64}") .SetShapeFn(shape_inference::RandomShape) .Doc(R"doc( @@ -145,7 +145,7 @@ REGISTER_OP("TruncatedNormal") .Output("output: dtype") .Attr("seed: int = 0") .Attr("seed2: int = 0") - .Attr("dtype: {half,float,double}") + .Attr("dtype: {half,bfloat16,float,double}") .Attr("T: {int32, int64}") .SetShapeFn(shape_inference::RandomShape) .Doc(R"doc( -- GitLab From e361bf18a3c71a1ec9985a478c419c04852a61d3 Mon Sep 17 00:00:00 2001 From: Koan-Sin Tan Date: Fri, 1 Dec 2017 10:31:22 +0800 Subject: [PATCH 1022/1801] add link to decode_bmp --- tensorflow/docs_src/api_guides/python/image.md | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/docs_src/api_guides/python/image.md b/tensorflow/docs_src/api_guides/python/image.md index a2c8c3c3c9..051e4547ee 100644 --- a/tensorflow/docs_src/api_guides/python/image.md +++ b/tensorflow/docs_src/api_guides/python/image.md @@ -19,6 +19,7 @@ Note: The PNG encode and decode Ops support RGBA, but the conversions Ops presently only support RGB, HSV, and GrayScale. Presently, the alpha channel has to be stripped from the image and re-attached using slicing ops. +* @{tf.image.decode_bmp} * @{tf.image.decode_gif} * @{tf.image.decode_jpeg} * @{tf.image.encode_jpeg} -- GitLab From 87e2f20c8b4f2ece313584c7c3c5588ee6ae5ece Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Thu, 30 Nov 2017 18:56:57 -0800 Subject: [PATCH 1023/1801] Automated g4 rollback of changelist 177505909 PiperOrigin-RevId: 177540002 --- .../grappler/optimizers/layout_optimizer.cc | 88 ++++++++---- .../optimizers/layout_optimizer_test.cc | 128 ++++++++++++++++++ 2 files changed, 190 insertions(+), 26 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc index ef4b015295..cb8411ba5e 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include #include #include "tensorflow/core/framework/attr_value.pb.h" @@ -761,24 +762,52 @@ class AgnosticNodeProcessor : public NodeProcessor { bool IsNodeAfterNCHWToNHWC() const { std::set ops_format_agnostic = GetOpsFormatAgnostic(); - auto node = node_map_->GetNode(node_->name()); - while (node->input_size() > 0) { - int data_input_pos = 0; - if (IsConcatV1(*node) || IsSplit(*node)) { - data_input_pos = 1; - } - node = node_map_->GetNode(node->input(data_input_pos)); - if (IsNodeNCHWToNHWC(node->name())) { + std::deque queue; + auto first_node_pos = DataInputPos(*node_); + for (const auto& pos : first_node_pos) { + auto input_node = node_map_->GetNode(node_->input(pos)); + queue.push_back(input_node); + } + // The code will exit this while loop in one iteration in most cases, as the + // graph is already topologically sorted. + while (!queue.empty()) { + NodeDef* current_node = queue.front(); + queue.pop_front(); + if (IsNodeNCHWToNHWC(current_node->name())) { return true; } - bool connected = - ops_format_agnostic.find(node->op()) != ops_format_agnostic.end(); - if (!connected) { - return false; + // We only continue searching if the path is connected through + // format-agnostic nodes. + if (ops_format_agnostic.find(current_node->op()) != + ops_format_agnostic.end()) { + auto current_node_pos = DataInputPos(*current_node); + for (const auto& pos : current_node_pos) { + auto input_node = node_map_->GetNode(current_node->input(pos)); + queue.push_back(input_node); + } } } return false; } + + private: + std::vector DataInputPos(const NodeDef& node) const { + std::vector pos; + if (IsSplit(node)) { + return {1}; + } + if (IsConcatV1(node)) { + return {1}; + } + if (IsAdd(node) || IsMul(node) || IsRealDiv(node) || + IsSquaredDifference(node) || IsSub(node)) { + return {0, 1}; + } + if (node.input_size() > 0 && !IsControlInput(node.input(0))) { + return {0}; + } + return {}; + } }; class AddNProcessor : public AgnosticNodeProcessor { @@ -801,42 +830,49 @@ class BinaryOpProcessor : public AgnosticNodeProcessor { public: explicit BinaryOpProcessor(const OptimizeContext& opt_cxt) : AgnosticNodeProcessor(opt_cxt) { - is_4d_with_vector_ = Is4DOperateWithVector(); + is_4d_with_vector_ = IsNDOperateWithMD(4, 1); } protected: bool ShouldProcess() const override { + // TODO(yaozhang): Support IsNDOperateWithMD(1, 4): first input is a vector + // and the second input is a 4D tensor; and update CustomizedProcessing() + // accordingly. return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() && IsNodeAfterNCHWToNHWC() && - (Is4DOperateWithND(4) || Is4DOperateWithScalar() || - Is4DOperateWithVector()) && + (IsNDOperateWithMD(4, 0) || IsNDOperateWithMD(4, 1) || + IsNDOperateWithMD(4, 4) || IsNDOperateWithMD(0, 4)) && IsOnGPU(); } std::vector GetInputPos() const override { - std::vector input_pos = {0}; - if (Is4DOperateWithND(4)) { + std::vector input_pos; + auto input0 = node_map_->GetNode(node_->input(0)); + auto input1 = node_map_->GetNode(node_->input(1)); + if (IsDimsFour(*input0)) { + input_pos.push_back(0); + } + if (IsDimsFour(*input1)) { input_pos.push_back(1); } return input_pos; } - bool Is4DOperateWithND(int n) const { + bool IsDimsFour(const NodeDef& node) const { + return NodeProcessor::IsDimsFour(node) || IsNodeNCHWToNHWC(node.name()); + } + + bool IsNDOperateWithMD(int n, int m) const { auto input0 = node_map_->GetNode(node_->input(0)); auto input1 = node_map_->GetNode(node_->input(1)); if (input0 && input1) { - return (IsDimsFour(*input0) || IsNodeNCHWToNHWC(input0->name())) && - ((n == 4) - ? (IsDimsFour(*input1) || IsNodeNCHWToNHWC(input1->name())) - : IsDimsN(*input1, n)); + bool input0_is_n = (n == 4) ? IsDimsFour(*input0) : IsDimsN(*input0, n); + bool input1_is_m = (m == 4) ? IsDimsFour(*input1) : IsDimsN(*input1, m); + return input0_is_n && input1_is_m; } return false; } - bool Is4DOperateWithScalar() const { return Is4DOperateWithND(0); } - - bool Is4DOperateWithVector() const { return Is4DOperateWithND(1); } - NodeDef* AddNodeShapeConst(const string& name, int num_channels) { NodeDef* node = graph_->add_node(); node_map_->AddNode(name, node); diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc index e8f7b8ac3c..363b4c3fd8 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc @@ -298,6 +298,39 @@ TEST_F(LayoutOptimizerTest, Connectivity) { EXPECT_EQ(node_i2_output->input(0), "i1"); } +TEST_F(LayoutOptimizerTest, ConnectivityBinaryOpWithInputScalarAnd4D) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto i1 = ops::Identity(s.WithOpName("i1"), conv); + auto i2 = ops::Identity(s.WithOpName("i2"), i1); + auto scalar_sub = ops::Const(s.WithOpName("scalar_sub"), 3.0f, {}); + auto sub = ops::Sub(s.WithOpName("sub"), scalar_sub, i2); + auto i3 = ops::Identity(s.WithOpName("i3"), sub); + auto i4 = ops::Identity(s.WithOpName("i4"), i3); + auto i5 = ops::Identity(s.WithOpName("i5"), i4); + auto scalar_mul = ops::Const(s.WithOpName("scalar_mul"), 3.0f, {}); + auto mul = ops::Mul(s.WithOpName("mul"), scalar_mul, i5); + auto i6 = ops::Identity(s.WithOpName("i6"), mul); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + // Make the graph not in topological order to test the handling of multi-hop + // connectivity (here we say two nodes are connected if all nodes in the + // middle are layout agnostic). If the graph is already in topological order, + // the problem is easier, where layout optimizer only needs to check + // single-hop connectivity. + NodeMap node_map_original(&item.graph); + auto node_i1 = node_map_original.GetNode("i1"); + auto node_mul = node_map_original.GetNode("mul"); + node_mul->Swap(node_i1); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map_output(&output); + auto mul_node = node_map_output.GetNode("mul"); + EXPECT_EQ(mul_node->input(0), "scalar_mul"); + EXPECT_EQ(mul_node->input(1), "i5"); +} + TEST_F(LayoutOptimizerTest, PreserveFetch) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); auto conv = SimpleConv2D(&s, 3, 2, "VALID"); @@ -571,6 +604,101 @@ TEST_F(LayoutOptimizerTest, Sum) { */ } +TEST_F(LayoutOptimizerTest, MulScalarAnd4D) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto scalar = ops::Const(s.WithOpName("scalar"), 3.0f, {}); + auto mul = ops::Mul(s.WithOpName("mul"), scalar, conv); + auto o = ops::Identity(s.WithOpName("o"), mul); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto mul_node = node_map.GetNode("mul"); + EXPECT_EQ(mul_node->input(0), "scalar"); + EXPECT_EQ(mul_node->input(1), "Conv2D"); +} + +TEST_F(LayoutOptimizerTest, Mul4DAndScalar) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto scalar = ops::Const(s.WithOpName("scalar"), 3.0f, {}); + auto mul = ops::Mul(s.WithOpName("mul"), conv, scalar); + auto o = ops::Identity(s.WithOpName("o"), mul); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto mul_node = node_map.GetNode("mul"); + EXPECT_EQ(mul_node->input(0), "Conv2D"); + EXPECT_EQ(mul_node->input(1), "scalar"); +} + +TEST_F(LayoutOptimizerTest, Mul4DAnd4D) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto i = ops::Identity(s.WithOpName("i"), conv); + auto mul = ops::Mul(s.WithOpName("mul"), conv, i); + auto o = ops::Identity(s.WithOpName("o"), mul); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto mul_node = node_map.GetNode("mul"); + EXPECT_EQ(mul_node->input(0), "Conv2D"); + EXPECT_EQ(mul_node->input(1), "i"); +} + +TEST_F(LayoutOptimizerTest, Mul4DAndVector) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto vector = ops::Const(s.WithOpName("vector"), {3.0f, 7.0f}, {2}); + auto mul = ops::Mul(s.WithOpName("mul"), conv, vector); + auto o = ops::Identity(s.WithOpName("o"), mul); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto mul_node = node_map.GetNode("mul"); + EXPECT_EQ(mul_node->input(0), "Conv2D"); + EXPECT_EQ(mul_node->input(1), "LayoutOptimizerReshapeNHWCToNCHW-mul-vector"); + auto mul_const = node_map.GetNode("LayoutOptimizerReshapeConst-mul-vector"); + Tensor tensor; + EXPECT_TRUE( + tensor.FromProto(mul_const->mutable_attr()->at({"value"}).tensor())); + Tensor tensor_expected(DT_INT32, {4}); + test::FillValues(&tensor_expected, {1, 2, 1, 1}); + test::ExpectTensorEqual(tensor_expected, tensor); +} + +TEST_F(LayoutOptimizerTest, MulVectorAnd4D) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto conv = SimpleConv2D(&s, 3, 2, "VALID"); + auto vector = ops::Const(s.WithOpName("vector"), {3.0f, 7.0f}, {2}); + auto mul = ops::Mul(s.WithOpName("mul"), vector, conv); + auto o = ops::Identity(s.WithOpName("o"), mul); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + LayoutOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output); + NodeMap node_map(&output); + auto mul_node = node_map.GetNode("mul"); + // TODO(yaozhang): Support vector as the first input and 4d tensor as the + // second input for BinaryOpProcessor. + EXPECT_EQ(mul_node->input(0), "vector"); + EXPECT_EQ(mul_node->input(1), + "LayoutOptimizerTransposeNCHWToNHWC-Conv2D-mul-1"); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 79ad4a423f3e9031eb841a164372cc7476cc112a Mon Sep 17 00:00:00 2001 From: Olivia Nordquist Date: Thu, 30 Nov 2017 19:46:05 -0800 Subject: [PATCH 1024/1801] enabling Tensor._set_shape() to work with the C API PiperOrigin-RevId: 177543170 --- tensorflow/python/client/tf_session.i | 43 ++++++++++++++ tensorflow/python/client/tf_session_helper.cc | 19 +++++++ tensorflow/python/client/tf_session_helper.h | 14 +++++ tensorflow/python/framework/ops.py | 57 +++++++++++++------ 4 files changed, 117 insertions(+), 16 deletions(-) diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i index 5fa1a7e8fc..d471a39b69 100644 --- a/tensorflow/python/client/tf_session.i +++ b/tensorflow/python/client/tf_session.i @@ -532,6 +532,49 @@ def TF_Reset(target, containers=None, config=None): %unignore TF_GraphGetTensorShapeHelper; %ignore TF_GraphGetTensorShape; +// We use TF_GraphSetTensorShape_wrapper instead of +// TF_GraphSetTensorShape +%ignore TF_GraphSetTensorShape; +%unignore tensorflow; +%unignore TF_GraphSetTensorShape_wrapper; + +// $input is a Python list of ints to a vector for TF_GraphSetTensorShape_wrapper +%typemap(in) (const std::vector& dims) + (std::vector dims_local){ + if ($input != Py_None) { + if (!PyList_Check($input)) { + SWIG_exception_fail(SWIG_TypeError, tensorflow::strings::Printf( + "$symname: expected list but got %s ", Py_TYPE($input)->tp_name).c_str()); + } + size_t size = PyList_Size($input); + for (int i = 0; i < size; ++i) { + PyObject* item = PyList_GetItem($input, i); + dims_local.push_back(PyInt_AsLong(item)); + } + $1 = &dims_local; + } else { + $1 = nullptr; + } +} + +// We use TF_GraphGetTensorShape_wrapper instead of +// TF_GraphGetTensorShape +%ignore TF_GraphGetTensorShape; +%unignore tensorflow; +%unignore TF_GraphGetTensorShape_wrapper; + +// Build a Python list of ints and return it. +%typemap(out) std::vector tensorflow::TF_GraphGetTensorShape_wrapper { + $result = PyList_New($1.size()); + if (!$result) { + SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create list"); + } + + for (size_t i = 0; i < $1.size(); ++i) { + PyList_SET_ITEM($result, i, PyInt_FromLong($1[i])); + } +} + %include "tensorflow/python/client/tf_session_helper.h" %unignoreall diff --git a/tensorflow/python/client/tf_session_helper.cc b/tensorflow/python/client/tf_session_helper.cc index ad982e5dd8..e4bf09a0ca 100644 --- a/tensorflow/python/client/tf_session_helper.cc +++ b/tensorflow/python/client/tf_session_helper.cc @@ -407,4 +407,23 @@ TF_Function* TF_GraphToFunction_wrapper( opts, description, out_status); } +void TF_GraphSetTensorShape_wrapper(TF_Graph* graph, TF_Output output, + const std::vector& dims, + bool unknown_shape, TF_Status* status) { + if (unknown_shape) { + TF_GraphSetTensorShape(graph, output, nullptr, -1, status); + return; + } + TF_GraphSetTensorShape(graph, output, dims.data(), dims.size(), status); +} + +std::vector TF_GraphGetTensorShape_wrapper(TF_Graph* graph, + TF_Output output, + int num_dims, + TF_Status* status) { + std::vector dims(num_dims); + TF_GraphGetTensorShape(graph, output, dims.data(), num_dims, status); + return dims; +} + } // namespace tensorflow diff --git a/tensorflow/python/client/tf_session_helper.h b/tensorflow/python/client/tf_session_helper.h index 6ed08d3a58..bb7171db31 100644 --- a/tensorflow/python/client/tf_session_helper.h +++ b/tensorflow/python/client/tf_session_helper.h @@ -168,6 +168,20 @@ TF_Function* TF_GraphToFunction_wrapper( const std::vector& inputs, const std::vector& outputs, const NameVector& output_names, const TF_FunctionOptions* opts, const char* description, TF_Status* out_status); + +// Set the shape of output. If unknown is true, `num_dims` must be set to +// -1 and `dims` is set to nullptr. +void TF_GraphSetTensorShape_wrapper(TF_Graph* graph, TF_Output output, + const std::vector& dims, + bool unknown_shape, TF_Status* status); + +// Return the shape of output. `num_dims` should be the output of +// TF_GraphGetTensorNumDims. If `num_dims = -1`, this should not be called. +std::vector TF_GraphGetTensorShape_wrapper(TF_Graph* graph, + TF_Output output, + int num_dims, + TF_Status* status); + } // namespace tensorflow #endif // TENSORFLOW_PYTHON_CLIENT_TF_SESSION_HELPER_H_ diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 5f945ac133..13e6426447 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -374,6 +374,19 @@ class Tensor(_TensorLike): A `TensorShape` representing the shape of this tensor. """ + if _USE_C_API: + graph = self._op._graph._c_graph # pylint: disable=protected-access + with errors.raise_exception_on_not_ok_status() as status: + num_dims = c_api.TF_GraphGetTensorNumDims(graph, self._as_tf_output(), + status) + if num_dims == -1: + dim_list = None + else: + with errors.raise_exception_on_not_ok_status() as status: + dim_list = c_api.TF_GraphGetTensorShape_wrapper( + graph, self._as_tf_output(), num_dims, status) + dim_list = [None if i == -1 else i for i in dim_list] + return tensor_shape.TensorShape(dim_list) return self._shape def __iter__(self): @@ -393,8 +406,8 @@ class Tensor(_TensorLike): yield self[i] def _shape_as_list(self): - if self._shape.ndims is not None: - return [dim.value for dim in self._shape.dims] + if self.shape.ndims is not None: + return [dim.value for dim in self.shape.dims] else: return None @@ -410,7 +423,7 @@ class Tensor(_TensorLike): Returns: Integer rank or None """ - return self._shape.ndims + return self.shape.ndims def get_shape(self): """Alias of Tensor.shape.""" @@ -441,14 +454,35 @@ class Tensor(_TensorLike): ``` Args: - shape: A `TensorShape` representing the shape of this tensor. + shape: A `TensorShape` representing the shape of this tensor, a + `TensorShapeProto`, a list, a tuple, or None. Raises: ValueError: If `shape` is not compatible with the current shape of this tensor. """ - # TODO(skyewm): call C API - self._shape = self._shape.merge_with(shape) + if not _USE_C_API: + self._shape = self._shape.merge_with(shape) # pylint: disable=protected-access + return + if not isinstance(shape, tensor_shape.TensorShape): + shape = tensor_shape.TensorShape(shape) + dim_list = [] + if shape.dims is None: + unknown_shape = True + else: + unknown_shape = False + for dim in shape.dims: + if dim.value is None: + dim_list.append(-1) + else: + dim_list.append(dim.value) + with errors.raise_exception_on_not_ok_status() as status: + c_api.TF_GraphSetTensorShape_wrapper( + self._op._graph._c_graph, # pylint: disable=protected-access + self._as_tf_output(), + dim_list, + unknown_shape, + status) @property def value_index(self): @@ -4521,15 +4555,11 @@ def control_dependencies(control_inputs): See @{tf.Graph.control_dependencies} for more details. - When eager execution is enabled, any callable object in the `control_inputs` - list will be called. - Args: control_inputs: A list of `Operation` or `Tensor` objects which must be executed or computed before running the operations defined in the context. Can also be `None` to clear the control - dependencies. If eager execution is enabled, any callable object in the - `control_inputs` list will be called. + dependencies. Returns: A context manager that specifies control dependencies for all @@ -4538,11 +4568,6 @@ def control_dependencies(control_inputs): if context.in_graph_mode(): return get_default_graph().control_dependencies(control_inputs) else: - if control_inputs: - # Excute any pending callables. - for control in control_inputs: - if callable(control): - control() return _NullContextmanager() -- GitLab From 6968ff07225ad88928922bc83e5522d4515cf963 Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Thu, 30 Nov 2017 20:21:19 -0800 Subject: [PATCH 1025/1801] Disable tuning for now. Re-enable when measurement-based estimator is ready. PiperOrigin-RevId: 177545499 --- .../core/grappler/optimizers/layout_optimizer.cc | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc index cb8411ba5e..e9436638f0 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc @@ -1623,20 +1623,13 @@ Status LayoutOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, } TuningConfig config; - config.no_gemm = false; + config.no_gemm = true; + // TODO(yaozhang): Enable tuning with various TuningConfig choices wtih + // the measurement-based estimator. status = Tune(item, graph_properties, config, output); - // This is based on an empirical observation that if the introduced Transpose - // nodes is more than 30, not using GEMM implementation would result in better - // performance. - if (status.ok() && GetNumTranspose(*output) > 30) { - config.no_gemm = true; - status = Tune(item, graph_properties, config, output); - } - if (!status.ok()) { *output = item.graph; } - return status; } -- GitLab From 1ec61fafe13e5edce6e45d5a67e960efb9df618a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 30 Nov 2017 20:30:18 -0800 Subject: [PATCH 1026/1801] Use latest nsync in tensorflow. Latest nsync builds with bazel on FreeBSD. PiperOrigin-RevId: 177545934 --- tensorflow/contrib/cmake/external/nsync.cmake | 2 +- tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt | 9 +++++++++ tensorflow/workspace.bzl | 8 ++++---- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/cmake/external/nsync.cmake b/tensorflow/contrib/cmake/external/nsync.cmake index 155c91cb97..0508006047 100644 --- a/tensorflow/contrib/cmake/external/nsync.cmake +++ b/tensorflow/contrib/cmake/external/nsync.cmake @@ -16,7 +16,7 @@ include (ExternalProject) set(nsync_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/nsync/public) set(nsync_URL https://github.com/google/nsync) -set(nsync_TAG 93815892dddafe9146a5f7e7042281d59d0f4323) +set(nsync_TAG 8502189abfa44c249c01c2cad64e6ed660a9a668) set(nsync_BUILD ${CMAKE_CURRENT_BINARY_DIR}/nsync/src/nsync) set(nsync_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/nsync/install) diff --git a/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt b/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt index 594c2492d4..aaae18a313 100644 --- a/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt +++ b/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt @@ -158,12 +158,21 @@ if (NOT "${NSYNC_LANGUAGE}X" STREQUAL "c++11X") elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "NetBSDX") include_directories ("${PROJECT_SOURCE_DIR}/platform/netbsd") set (NSYNC_POSIX ON) + set (NSYNC_OS_EXTRA_SRC + "platform/posix/src/nsync_semaphore_mutex.c" + ) elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "FreeBSDX") include_directories ("${PROJECT_SOURCE_DIR}/platform/freebsd") set (NSYNC_POSIX ON) + set (NSYNC_OS_EXTRA_SRC + "platform/posix/src/nsync_semaphore_mutex.c" + ) elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "OpenBSDX") include_directories ("${PROJECT_SOURCE_DIR}/platform/openbsd") set (NSYNC_POSIX ON) + set (NSYNC_OS_EXTRA_SRC + "platform/posix/src/nsync_semaphore_mutex.c" + ) endif () endif () diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index b61012f71e..25e036e24c 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -416,11 +416,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): native.http_archive( name = "nsync", urls = [ - "https://mirror.bazel.build/github.com/google/nsync/archive/93815892dddafe9146a5f7e7042281d59d0f4323.tar.gz", - "https://github.com/google/nsync/archive/93815892dddafe9146a5f7e7042281d59d0f4323.tar.gz", + "https://mirror.bazel.build/github.com/google/nsync/archive/8502189abfa44c249c01c2cad64e6ed660a9a668.tar.gz", + "https://github.com/google/nsync/archive/8502189abfa44c249c01c2cad64e6ed660a9a668.tar.gz", ], - sha256 = "e3bd4555415ace511338fc27e595351738eea4e9006f1612b76c82914770716b", - strip_prefix = "nsync-93815892dddafe9146a5f7e7042281d59d0f4323", + sha256 = "51f81ff4202bbb820cdbedc061bd2eb6765f2b5c06489e7a8694bedac329e8f8", + strip_prefix = "nsync-8502189abfa44c249c01c2cad64e6ed660a9a668", ) native.http_archive( -- GitLab From 6eec9c2ea33f3b86012cb0ea2aeb9e49e65bc716 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 30 Nov 2017 21:08:05 -0800 Subject: [PATCH 1027/1801] [XLA] Hlo parser: support rng and reduce-precision. Also simplify the lexer by regarding several things as identifier. PiperOrigin-RevId: 177548483 --- .../compiler/xla/service/hlo_instruction.cc | 30 +++++++ .../compiler/xla/service/hlo_instruction.h | 5 +- tensorflow/compiler/xla/tools/parser/BUILD | 2 +- .../compiler/xla/tools/parser/hlo_lexer.cc | 32 ++------ .../compiler/xla/tools/parser/hlo_lexer.h | 14 +--- .../compiler/xla/tools/parser/hlo_parser.cc | 81 +++++++++++++++++-- .../xla/tools/parser/hlo_parser_test.cc | 25 ++++++ .../compiler/xla/tools/parser/hlo_token.h | 6 +- 8 files changed, 149 insertions(+), 46 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index b4bac18bcd..45825c7c76 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -2060,6 +2060,14 @@ std::vector HloInstruction::ExtraAttributesToString() const { extra.push_back( StrCat("outfeed_config=\"", CEscape(outfeed_config_), "\"")); } + if (opcode() == HloOpcode::kRng) { + extra.push_back( + StrCat("distribution=", RandomDistributionToString(distribution_))); + } + if (opcode() == HloOpcode::kReducePrecision) { + extra.push_back(StrCat("exponent_bits=", exponent_bits_)); + extra.push_back(StrCat("mantissa_bits=", mantissa_bits_)); + } return extra; } @@ -3029,6 +3037,28 @@ string OpMetadataToString(const OpMetadata& metadata) { return Join(result, " "); } +string RandomDistributionToString(const RandomDistribution& distribution) { + return tensorflow::str_util::Lowercase(RandomDistribution_Name(distribution)); +} + +StatusOr StringToRandomDistribution(const string& name) { + static std::unordered_map* map = [] { + static auto* map = new std::unordered_map; + for (int i = 0; i < RandomDistribution_ARRAYSIZE; i++) { + if (RandomDistribution_IsValid(i)) { + auto value = static_cast(i); + (*map)[RandomDistributionToString(value)] = value; + } + } + return map; + }(); + auto found = map->find(tensorflow::str_util::Lowercase(name)); + if (found == map->end()) { + return InvalidArgument("Unknown distribution"); + } + return found->second; +} + std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind) { return os << ToString(kind); } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 768c027a42..088902e2a7 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -1285,9 +1285,12 @@ string ToString(HloInstruction::FusionKind kind); StatusOr StringToFusionKind( const string& kind_name); -// Custom stringification functions for protos that live inside HloInstruction. +// Custom (de)stringification functions for protos that live inside +// HloInstruction. string PaddingConfigToString(const PaddingConfig& padding); string OpMetadataToString(const OpMetadata& metadata); +string RandomDistributionToString(const RandomDistribution& distribution); +StatusOr StringToRandomDistribution(const string& name); std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind); diff --git a/tensorflow/compiler/xla/tools/parser/BUILD b/tensorflow/compiler/xla/tools/parser/BUILD index ce936af6c3..97aacf6b39 100644 --- a/tensorflow/compiler/xla/tools/parser/BUILD +++ b/tensorflow/compiler/xla/tools/parser/BUILD @@ -34,9 +34,9 @@ cc_library( deps = [ "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/service:hlo", "//tensorflow/core:lib", "//tensorflow/core:regexp_internal", ], diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc index 56744440db..04247594ed 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc @@ -17,7 +17,6 @@ limitations under the License. #include -#include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/util.h" @@ -153,15 +152,15 @@ TokKind HloLexer::LexToken() { } } -// Lex a shape, name, keyword, opcode, attribute name, or the dim labels -// pattern. +// Lex a shape, name, keyword, attribute name, the dim labels pattern, and +// other identifiers. // // shape ::= ([a-zA-Z0-9_]*[0-9]*)\[([0-9,]*)\](?:\s*{([0-9,]*)})? // name ::= [a-zA-Z_][a-zA-Z0-9_.-]*: // keyword ::= HloModule, ENTRY, ... -// opcode ::= add, greater-than, ... // attribute_name ::= condition, body, dimensions, ... // dim_labels_pattern ::= [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,} +// identifiers ::= other cases that match [a-zA-Z_][a-zA-Z0-9_.-]* TokKind HloLexer::LexIdentifier() { { auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end()); @@ -220,20 +219,6 @@ TokKind HloLexer::LexIdentifier() { #undef KEYWORD - // See if this is an opcode. - auto opcode = StringToHloOpcode(identifier.ToString()); - if (opcode.ok()) { - opcode_val_ = opcode.ValueOrDie(); - return TokKind::kOpcode; - } - - // See if this is an fusion kind. - auto kind = xla::StringToFusionKind(identifier.ToString()); - if (kind.ok()) { - fusion_kind_val_ = kind.ValueOrDie(); - return TokKind::kFusionKind; - } - { auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end()); static LazyRE2 dim_labels_pattern = { @@ -244,8 +229,9 @@ TokKind HloLexer::LexIdentifier() { return TokKind::kDimLabels; } } - current_ptr_ = token_start_ + 1; - return TokKind::kError; + + str_val_ = identifier.ToString(); + return TokKind::kIdent; } // Lex names after a % character. @@ -428,14 +414,12 @@ string TokKindToString(TokKind kind) { return "kDxD"; case TokKind::kPad: return "kPad"; + case TokKind::kIdent: + return "kIdent"; case TokKind::kString: return "kString"; case TokKind::kShape: return "kShape"; - case TokKind::kOpcode: - return "kOpcode"; - case TokKind::kFusionKind: - return "kFusionKind"; case TokKind::kInt: return "kInt"; case TokKind::kDecimal: diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h index 5c9d1bf391..9daf6a11d3 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h +++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h @@ -18,9 +18,8 @@ limitations under the License. #include -#include "tensorflow/compiler/xla/service/hlo_instruction.h" -#include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/tools/parser/hlo_token.h" +#include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/platform/logging.h" @@ -48,6 +47,7 @@ class HloLexer { case TokKind::kDxD: case TokKind::kPad: case TokKind::kString: + case TokKind::kIdent: return str_val_; default: LOG(FATAL) << "This token does not have string value"; @@ -57,14 +57,6 @@ class HloLexer { CHECK(GetKind() == TokKind::kShape); return shape_val_; } - HloOpcode GetOpcodeVal() const { - CHECK(GetKind() == TokKind::kOpcode); - return opcode_val_; - } - HloInstruction::FusionKind GetFusionKindVal() const { - CHECK(GetKind() == TokKind::kFusionKind); - return fusion_kind_val_; - } int64 GetInt64Val() const { CHECK(GetKind() == TokKind::kInt); return int64_val_; @@ -114,8 +106,6 @@ class HloLexer { TokKind current_kind_; string str_val_; Shape shape_val_; - HloOpcode opcode_val_; - HloInstruction::FusionKind fusion_kind_val_; int64 int64_val_; double decimal_val_; }; diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index 47979ec6f3..ddc1e69951 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/gtl/map_util.h" @@ -104,6 +105,7 @@ class HloParser { kPaddingConfig, kMetadata, kFusionKind, + kDistribution, }; struct AttrConfig { @@ -174,6 +176,7 @@ class HloParser { bool ParseShape(Shape* result); bool ParseOpcode(HloOpcode* result); bool ParseFusionKind(HloInstruction::FusionKind* result); + bool ParseRandomDistribution(RandomDistribution* result); bool ParseInt64(int64* result); bool ParseDouble(double* result); bool ParseBool(bool* result); @@ -816,10 +819,36 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, shape, operands[0], config ? *config : "")); break; } + case HloOpcode::kRng: { + optional distribution; + attrs["distribution"] = {/*required=*/true, AttrTy::kDistribution, + &distribution}; + if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction( + HloInstruction::CreateRng(shape, *distribution, operands)); + break; + } + case HloOpcode::kReducePrecision: { + optional exponent_bits; + optional mantissa_bits; + attrs["exponent_bits"] = {/*required=*/true, AttrTy::kInt64, + &exponent_bits}; + attrs["mantissa_bits"] = {/*required=*/true, AttrTy::kInt64, + &mantissa_bits}; + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { + return false; + } + instruction = + builder->AddInstruction(HloInstruction::CreateReducePrecision( + shape, operands[0], static_cast(*exponent_bits), + static_cast(*mantissa_bits))); + break; + } case HloOpcode::kConditional: case HloOpcode::kCustomCall: - case HloOpcode::kReducePrecision: - case HloOpcode::kRng: case HloOpcode::kTrace: return TokenError(StrCat("parsing not yet implemented for op: ", HloOpcodeString(opcode))); @@ -1548,6 +1577,15 @@ bool HloParser::ParseAttributeHelper( static_cast*>(attr_out_ptr)->emplace(result); return true; } + case AttrTy::kDistribution: { + RandomDistribution result; + if (!ParseRandomDistribution(&result)) { + return false; + } + static_cast*>(attr_out_ptr) + ->emplace(result); + return true; + } } }(); if (!success) { @@ -2024,20 +2062,51 @@ bool HloParser::ParseMetadata(OpMetadata* metadata) { bool HloParser::ParseOpcode(HloOpcode* result) { VLOG(1) << "ParseOpcode"; - if (lexer_.GetKind() != TokKind::kOpcode) { + if (lexer_.GetKind() != TokKind::kIdent) { return TokenError("expects opcode"); } - *result = lexer_.GetOpcodeVal(); + string val = lexer_.GetStrVal(); + auto status_or_result = StringToHloOpcode(val); + if (!status_or_result.ok()) { + return TokenError( + Printf("expects opcode but sees: %s, error: %s", val.c_str(), + status_or_result.status().error_message().c_str())); + } + *result = status_or_result.ValueOrDie(); lexer_.Lex(); return true; } bool HloParser::ParseFusionKind(HloInstruction::FusionKind* result) { VLOG(1) << "ParseFusionKind"; - if (lexer_.GetKind() != TokKind::kFusionKind) { + if (lexer_.GetKind() != TokKind::kIdent) { return TokenError("expects fusion kind"); } - *result = lexer_.GetFusionKindVal(); + string val = lexer_.GetStrVal(); + auto status_or_result = StringToFusionKind(val); + if (!status_or_result.ok()) { + return TokenError( + Printf("expects fusion kind but sees: %s, error: %s", val.c_str(), + status_or_result.status().error_message().c_str())); + } + *result = status_or_result.ValueOrDie(); + lexer_.Lex(); + return true; +} + +bool HloParser::ParseRandomDistribution(RandomDistribution* result) { + VLOG(1) << "ParseRandomDistribution"; + if (lexer_.GetKind() != TokKind::kIdent) { + return TokenError("expects random distribution"); + } + string val = lexer_.GetStrVal(); + auto status_or_result = StringToRandomDistribution(val); + if (!status_or_result.ok()) { + return TokenError( + Printf("expects random distribution but sees: %s, error: %s", + val.c_str(), status_or_result.status().error_message().c_str())); + } + *result = status_or_result.ValueOrDie(); lexer_.Lex(); return true; } diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index 90cdb87a1e..69d48d65bc 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -654,6 +654,31 @@ ENTRY %InfeedToOutfeed () -> (u32[3], pred[]) { %outfeed.1 = () outfeed((u32[3]{0}, pred[]) %infeed.1) } +)" +}, +// Rng +{ +"Rng", +R"(HloModule rng_module: + +ENTRY %Rng () -> f32[8] { + %constant = f32[] constant(0) + %constant.1 = f32[] constant(1) + ROOT %rng = f32[8]{0} rng(f32[] %constant, f32[] %constant.1), distribution=rng_uniform +} + +)" +}, +// Reduce precision +{ +"ReducePrevison", +R"(HloModule reduce_precision: + +ENTRY %ReducePrecision () -> f32[1] { + %constant = f32[1]{0} constant({3.14159}) + ROOT %reduce-precision = f32[1]{0} reduce-precision(f32[1]{0} %constant), exponent_bits=8, mantissa_bits=10 +} + )" } }); diff --git a/tensorflow/compiler/xla/tools/parser/hlo_token.h b/tensorflow/compiler/xla/tools/parser/hlo_token.h index 07e48804d0..7928bee5c2 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_token.h +++ b/tensorflow/compiler/xla/tools/parser/hlo_token.h @@ -18,6 +18,9 @@ limitations under the License. #include +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/core/platform/types.h" + namespace xla { namespace tools { @@ -60,10 +63,9 @@ enum class TokKind { kDimLabels, // [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,} kDxD, // [0-9]+(x[0-9]+)+ kPad, // [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)* + kIdent, // other identifiers kString, // "abcd\"\n" kShape, // f32[2,3]{1,0} - kOpcode, // add - kFusionKind, // kLoop, kOutput, ... kInt, // 42 kDecimal, // 4.2 }; -- GitLab From cae852a32ee8ef86d4a58512c1177359c5bfd465 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Thu, 30 Nov 2017 21:40:48 -0800 Subject: [PATCH 1028/1801] Change bazel-mirror to mirror.bazel (#15007) --- tensorflow/contrib/makefile/download_dependencies.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh index 19e25ad767..904118e2d9 100755 --- a/tensorflow/contrib/makefile/download_dependencies.sh +++ b/tensorflow/contrib/makefile/download_dependencies.sh @@ -26,13 +26,13 @@ if [ ! -f $BZL_FILE_PATH ]; then exit 1; fi -EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)" +EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" NSYNC_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/nsync/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" PROTOBUF_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" RE2_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/re2/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" -FFT2D_URL="$(grep -o 'http.*fft\.tgz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)" +FFT2D_URL="$(grep -o 'http.*fft\.tgz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" DOUBLE_CONVERSION_URL="$(grep -o "https.*google/double-conversion.*\.zip" "${BZL_FILE_PATH}" | head -n1)" ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)" -- GitLab From e747fc911f0dc6f1bf0b9c0ac0b57ad1a704c542 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 30 Nov 2017 23:20:35 -0800 Subject: [PATCH 1029/1801] Add additional linkopts argument to tf_custom_op_library. PiperOrigin-RevId: 177555877 --- tensorflow/tensorflow.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 76ef59484f..709a2d46e1 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -1197,7 +1197,7 @@ check_deps = rule( # Helper to build a dynamic library (.so) from the sources containing # implementations of custom ops and kernels. -def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[]): +def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[], linkopts=[]): cuda_deps = [ clean_dep("//tensorflow/core:stream_executor_headers_lib"), "@local_config_cuda//cuda:cuda_headers", @@ -1226,7 +1226,7 @@ def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[]): deps=deps + if_cuda(cuda_deps), data=[name + "_check_deps"], copts=tf_copts(), - linkopts=select({ + linkopts=linkopts + select({ "//conditions:default": [ "-lm", ], -- GitLab From 370e521762f3cbd558a7e56992e3b062236b626f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 30 Nov 2017 23:46:38 -0800 Subject: [PATCH 1030/1801] Adds a fisher block for fully connected recurrent layers. `FullyConnectedSeriesFB` uses an approximation to the Fisher information matrix designed for RNNs. This CL also adds support for dtypes other than `float32` to `fisher_factors`. PiperOrigin-RevId: 177558080 --- .../python/kernel_tests/fisher_blocks_test.py | 54 ++- .../kernel_tests/fisher_factors_test.py | 117 +++++- tensorflow/contrib/kfac/python/ops/BUILD | 1 + .../contrib/kfac/python/ops/fisher_blocks.py | 210 +++++++++- .../contrib/kfac/python/ops/fisher_factors.py | 383 ++++++++++++++++-- tensorflow/contrib/kfac/python/ops/utils.py | 31 +- 6 files changed, 751 insertions(+), 45 deletions(-) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py index 5f2b5c6cac..bdc950a4e6 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py @@ -301,8 +301,7 @@ class FullyConnectedDiagonalFB(test.TestCase): multiply_result_big, multiply_inverse_result_big = self.runFisherBlockOps( self.w, [self.inputs], [self.outputs], [self.output_grads]) multiply_result_small, multiply_inverse_result_small = ( - self.runFisherBlockOps(self.w, - np.split(self.inputs, 2), + self.runFisherBlockOps(self.w, np.split(self.inputs, 2), np.split(self.outputs, 2), np.split(self.output_grads, 2))) @@ -584,8 +583,7 @@ class ConvDiagonalFBTest(test.TestCase): multiply_result_big, multiply_inverse_result_big = self.runFisherBlockOps( self.w, [self.inputs], [self.outputs], [self.output_grads]) multiply_result_small, multiply_inverse_result_small = ( - self.runFisherBlockOps(self.w, - np.split(self.inputs, 2), + self.runFisherBlockOps(self.w, np.split(self.inputs, 2), np.split(self.outputs, 2), np.split(self.output_grads, 2))) @@ -608,8 +606,9 @@ class ConvDiagonalFBTest(test.TestCase): self.kernel_size, self.kernel_size, self.input_channels + 1, self.output_channels ]) - expected_result = (expected_result[:, :, 0:-1, :], np.reshape( - expected_result[:, :, -1, :], [self.output_channels])) + expected_result = (expected_result[:, :, 0:-1, :], + np.reshape(expected_result[:, :, -1, :], + [self.output_channels])) self.assertEqual(len(result), 2) self.assertAllClose(expected_result[0], result[0]) @@ -692,8 +691,8 @@ class ConvKFCBasicFBTest(test.TestCase): sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) - vector = (np.arange(1, 15).reshape(7, 2).astype(np.float32), np.arange( - 2, 4).reshape(2, 1).astype(np.float32)) + vector = (np.arange(1, 15).reshape(7, 2).astype(np.float32), + np.arange(2, 4).reshape(2, 1).astype(np.float32)) output = block.multiply_inverse((array_ops.constant(vector[0]), array_ops.constant(vector[1]))) @@ -776,11 +775,50 @@ class ConvKFCBasicFBTest(test.TestCase): self.assertAllClose(output_flat, explicit) +class FullyConnectedSeriesFBTest(test.TestCase): + + def testFullyConnectedSeriesFBInit(self): + with ops.Graph().as_default(): + random_seed.set_random_seed(200) + inputs = array_ops.constant([1., 2.]) + outputs = array_ops.constant([3., 4.]) + block = fb.FullyConnectedSeriesFB( + lc.LayerCollection(), inputs=[inputs], outputs=[outputs]) + self.assertAllEqual([outputs], block.tensors_to_compute_grads()) + + def testInstantiateFactorsHasBias(self): + with ops.Graph().as_default(): + random_seed.set_random_seed(200) + inputs = array_ops.constant([[1., 2.], [3., 4.]]) + outputs = array_ops.constant([[3., 4.], [5., 6.]]) + block = fb.FullyConnectedSeriesFB( + lc.LayerCollection(), + inputs=[inputs], + outputs=[outputs], + has_bias=True) + grads = outputs**2 + block.instantiate_factors(((grads,),), 0.5) + + def testInstantiateFactorsNoBias(self): + with ops.Graph().as_default(): + random_seed.set_random_seed(200) + inputs = array_ops.constant([[1., 2.], [3., 4.]]) + outputs = array_ops.constant([[3., 4.], [5., 6.]]) + block = fb.FullyConnectedSeriesFB( + lc.LayerCollection(), + inputs=[inputs], + outputs=[outputs], + has_bias=False) + grads = outputs**2 + block.instantiate_factors(((grads,),), 0.5) + + def as_tensors(tensor_or_tuple): """Converts a potentially nested tuple of np.array to Tensors.""" if isinstance(tensor_or_tuple, (tuple, list)): return tuple(as_tensors(t) for t in tensor_or_tuple) return ops.convert_to_tensor(tensor_or_tuple) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py index 5e2ce5a309..f4a017fc77 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py @@ -67,6 +67,10 @@ class FisherFactorTestingDummy(ff.FisherFactor): def _num_sources(self): return 1 + @property + def _dtype(self): + return dtypes.float32 + def _compute_new_cov(self): raise NotImplementedError @@ -94,6 +98,10 @@ class InverseProvidingFactorTestingDummy(ff.InverseProvidingFactor): def _num_sources(self): return 1 + @property + def _dtype(self): + return dtypes.float32 + def _compute_new_cov(self): raise NotImplementedError @@ -121,7 +129,7 @@ class NumericalUtilsTest(test.TestCase): normalizer = 10. x = npr.randn(100, 3) - cov = ff._compute_cov(array_ops.constant(x), normalizer) + cov = ff._compute_cov(array_ops.constant(x), normalizer=normalizer) np_cov = np.dot(x.T, x) / normalizer self.assertAllClose(sess.run(cov), np_cov) @@ -267,13 +275,13 @@ class InverseProvidingFactorTest(test.TestCase): for i in range(1, ff.EIGENVALUE_DECOMPOSITION_THRESHOLD + 1): factor.register_damped_inverse(1. / i) ops = factor.make_inverse_update_ops() - self.assertEqual(ff.EIGENVALUE_DECOMPOSITION_THRESHOLD, len(ops)) + self.assertEqual(1, len(ops)) sess.run(tf_variables.global_variables_initializer()) new_invs = [] + sess.run(ops) for i in range(1, ff.EIGENVALUE_DECOMPOSITION_THRESHOLD + 1): # The inverse op will assign the damped inverse of cov to the inv var. - sess.run(ops[i - 1]) new_invs.append(sess.run(factor._inverses_by_damping[1. / i])) # We want to see that the new invs are all different from each other. for i in range(len(new_invs)): @@ -331,6 +339,16 @@ class FullFactorTest(test.TestCase): factor = ff.FullFactor((tensor,), 32) self.assertEqual([6, 6], factor.get_cov().get_shape().as_list()) + def testFullFactorInitFloat64(self): + with tf_ops.Graph().as_default(): + dtype = dtypes.float64_ref + random_seed.set_random_seed(200) + tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c') + factor = ff.FullFactor((tensor,), 32) + cov = factor.get_cov() + self.assertEqual(cov.dtype, dtype) + self.assertEqual([6, 6], cov.get_shape().as_list()) + def testMakeCovarianceUpdateOp(self): with tf_ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) @@ -351,6 +369,16 @@ class NaiveDiagonalFactorTest(test.TestCase): factor = ff.NaiveDiagonalFactor((tensor,), 32) self.assertEqual([6, 1], factor.get_cov().get_shape().as_list()) + def testNaiveDiagonalFactorInitFloat64(self): + with tf_ops.Graph().as_default(): + dtype = dtypes.float64_ref + random_seed.set_random_seed(200) + tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c') + factor = ff.NaiveDiagonalFactor((tensor,), 32) + cov = factor.get_cov() + self.assertEqual(cov.dtype, dtype) + self.assertEqual([6, 1], cov.get_shape().as_list()) + def testMakeCovarianceUpdateOp(self): with tf_ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) @@ -364,18 +392,25 @@ class NaiveDiagonalFactorTest(test.TestCase): class FullyConnectedKroneckerFactorTest(test.TestCase): - def _testFullyConnectedKroneckerFactorInit(self, has_bias, final_shape): + def _testFullyConnectedKroneckerFactorInit(self, + has_bias, + final_shape, + dtype=dtypes.float32_ref): with tf_ops.Graph().as_default(): random_seed.set_random_seed(200) - tensor = array_ops.ones((2, 3), name='a/b/c') + tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c') factor = ff.FullyConnectedKroneckerFactor((tensor,), has_bias=has_bias) - self.assertEqual(final_shape, factor.get_cov().get_shape().as_list()) + cov = factor.get_cov() + self.assertEqual(cov.dtype, dtype) + self.assertEqual(final_shape, cov.get_shape().as_list()) def testFullyConnectedKroneckerFactorInitNoBias(self): - self._testFullyConnectedKroneckerFactorInit(False, [3, 3]) + for dtype in (dtypes.float32_ref, dtypes.float64_ref): + self._testFullyConnectedKroneckerFactorInit(False, [3, 3], dtype=dtype) def testFullyConnectedKroneckerFactorInitWithBias(self): - self._testFullyConnectedKroneckerFactorInit(True, [4, 4]) + for dtype in (dtypes.float32_ref, dtypes.float64_ref): + self._testFullyConnectedKroneckerFactorInit(True, [4, 4], dtype=dtype) def testMakeCovarianceUpdateOpWithBias(self): with tf_ops.Graph().as_default(), self.test_session() as sess: @@ -418,6 +453,18 @@ class ConvInputKroneckerFactorTest(test.TestCase): self.assertEqual([1 * 2 * 3 + 1, 1 * 2 * 3 + 1], factor.get_cov().get_shape().as_list()) + def testConvInputKroneckerFactorInitFloat64(self): + with tf_ops.Graph().as_default(): + dtype = dtypes.float64_ref + random_seed.set_random_seed(200) + tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c') + factor = ff.ConvInputKroneckerFactor( + tensor, (1, 2, 3, 4), 3, 2, has_bias=True) + cov = factor.get_cov() + self.assertEqual(cov.dtype, dtype) + self.assertEqual([1 * 2 * 3 + 1, 1 * 2 * 3 + 1], + cov.get_shape().as_list()) + def testMakeCovarianceUpdateOpWithBias(self): with tf_ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) @@ -453,6 +500,16 @@ class ConvOutputKroneckerFactorTest(test.TestCase): factor = ff.ConvOutputKroneckerFactor((tensor,)) self.assertEqual([5, 5], factor.get_cov().get_shape().as_list()) + def testConvOutputKroneckerFactorInitFloat64(self): + with tf_ops.Graph().as_default(): + dtype = dtypes.float64_ref + random_seed.set_random_seed(200) + tensor = array_ops.ones((2, 3, 4, 5), dtype=dtype, name='a/b/c') + factor = ff.ConvOutputKroneckerFactor((tensor,)) + cov = factor.get_cov() + self.assertEqual(cov.dtype, dtype) + self.assertEqual([5, 5], cov.get_shape().as_list()) + def testConvOutputKroneckerFactorInitNotEnoughDims(self): with tf_ops.Graph().as_default(): random_seed.set_random_seed(200) @@ -471,5 +528,49 @@ class ConvOutputKroneckerFactorTest(test.TestCase): self.assertAllClose([[43, 46.5], [46.5, 51.5]], new_cov) +class FullyConnectedMultiKFTest(test.TestCase): + + def testFullyConnectedMultiKFInit(self): + with tf_ops.Graph().as_default(): + random_seed.set_random_seed(200) + tensor = array_ops.ones((2, 3), name='a/b/c') + tensor_list = [tensor] + factor = ff.FullyConnectedMultiKF((tensor_list,), has_bias=False) + self.assertEqual([3, 3], factor.get_cov().get_shape().as_list()) + + def testFullyConnectedMultiKFInitFloat64(self): + with tf_ops.Graph().as_default(): + dtype = dtypes.float64_ref + random_seed.set_random_seed(200) + tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c') + tensor_list = [tensor] + factor = ff.FullyConnectedMultiKF((tensor_list,), has_bias=False) + cov = factor.get_cov() + self.assertEqual(cov.dtype, dtype) + self.assertEqual([3, 3], cov.get_shape().as_list()) + + def testMakeCovarianceUpdateOpWithBias(self): + with tf_ops.Graph().as_default(), self.test_session() as sess: + random_seed.set_random_seed(200) + tensor = array_ops.constant([[1., 2.], [3., 4.]], name='a/b/c') + tensor_list = [tensor] + factor = ff.FullyConnectedMultiKF((tensor_list,), has_bias=True) + + sess.run(tf_variables.global_variables_initializer()) + new_cov = sess.run(factor.make_covariance_update_op(.5)) + self.assertAllClose([[3, 3.5, 1], [3.5, 5.5, 1.5], [1, 1.5, 1]], new_cov) + + def testMakeCovarianceUpdateOpNoBias(self): + with tf_ops.Graph().as_default(), self.test_session() as sess: + random_seed.set_random_seed(200) + tensor = array_ops.constant([[1., 2.], [3., 4.]], name='a/b/c') + tensor_list = [tensor] + factor = ff.FullyConnectedMultiKF((tensor_list,)) + + sess.run(tf_variables.global_variables_initializer()) + new_cov = sess.run(factor.make_covariance_update_op(.5)) + self.assertAllClose([[3, 3.5], [3.5, 5.5]], new_cov) + + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/kfac/python/ops/BUILD b/tensorflow/contrib/kfac/python/ops/BUILD index b2272a4cee..3d731c7bc2 100644 --- a/tensorflow/contrib/kfac/python/ops/BUILD +++ b/tensorflow/contrib/kfac/python/ops/BUILD @@ -38,6 +38,7 @@ py_library( ":utils", "//tensorflow/python:array_ops", "//tensorflow/python:framework_ops", + "//tensorflow/python:init_ops", "//tensorflow/python:linalg_ops", "//tensorflow/python:math_ops", "//tensorflow/python:special_math_ops", diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py index e822a1213a..cf734d56ad 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py @@ -38,6 +38,7 @@ from __future__ import division from __future__ import print_function import abc +import enum # pylint: disable=g-bad-import-order import six @@ -153,7 +154,7 @@ class FullFB(FisherBlock): self._factor.register_damped_inverse(damping) def multiply_inverse(self, vector): - inverse = self._factor.get_inverse(self._damping) + inverse = self._factor.get_damped_inverse(self._damping) out_flat = math_ops.matmul(inverse, utils.tensors_to_column(vector)) return utils.column_to_tensors(vector, out_flat) @@ -411,7 +412,7 @@ class ConvDiagonalFB(FisherBlock): (self._strides[1] * self._strides[2])) if NORMALIZE_DAMPING_POWER: - damping /= self._num_locations ** NORMALIZE_DAMPING_POWER + damping /= self._num_locations**NORMALIZE_DAMPING_POWER self._damping = damping self._factor = self._layer_collection.make_or_get_factor( @@ -487,8 +488,9 @@ class KroneckerProductFB(FisherBlock): return 1.0 def multiply_inverse(self, vector): - left_factor_inv = self._input_factor.get_inverse(self._input_damping) - right_factor_inv = self._output_factor.get_inverse(self._output_damping) + left_factor_inv = self._input_factor.get_damped_inverse(self._input_damping) + right_factor_inv = self._output_factor.get_damped_inverse( + self._output_damping) reshaped_vector = utils.layer_params_to_mat2d(vector) reshaped_out = math_ops.matmul(left_factor_inv, math_ops.matmul(reshaped_vector, @@ -720,3 +722,203 @@ def _concat_along_batch_dim(tensor_list): def _num_conv_locations(input_shape, strides): """Returns the number of locations a Conv kernel is applied to.""" return input_shape[1] * input_shape[2] // (strides[1] * strides[2]) + + +class SeriesFBApproximation(enum.IntEnum): + """See FullyConnectedSeriesFB.__init__ for description and usage.""" + option1 = 1 + option2 = 2 + + +class FullyConnectedSeriesFB(FisherBlock): + """FisherBlock for fully-connected RNN cells. + + See the following preprint for details: + https://openreview.net/pdf?id=HyMTkQZAb + + See the end of the appendix of the paper for a pseudo-code of the + algorithm being implemented by multiply_inverse here. Note that we are + using pre-computed versions of certain matrix-matrix products to speed + things up. This is explicitly explained wherever it is done. + """ + + def __init__(self, + layer_collection, + inputs, + outputs, + has_bias=False, + option=SeriesFBApproximation.option2): + """Constructs a new `FullyConnectedSeriesFB`. + + Args: + layer_collection: The collection of all layers in the K-FAC approximate + Fisher information matrix to which this FisherBlock belongs. + inputs: List of tensors of shape [batch_size, input_size]. + Inputs to the layer. + outputs: List of tensors of shape [batch_size, input_size]. + Outputs of the layer (before activations). + has_bias: Whether the layer includes a bias parameter. + option: A `SeriesFBApproximation` specifying the simplifying assumption + to be used in this block. `option1` approximates the cross-covariance + over time as a symmetric matrix, while `option2` makes + the assumption that training sequences are infinitely long. See section + 3.5 of the paper for more details. + """ + + assert len(inputs) == len(outputs) + # We need to make sure inputs and outputs are tuples and not lists so that + # they get hashed by layer_collection.make_or_get_factor properly. + self._inputs = tuple(inputs) + self._outputs = tuple(outputs) + self._has_bias = has_bias + self._num_timesteps = len(inputs) + self._option = option + + super(FullyConnectedSeriesFB, self).__init__(layer_collection) + + @property + def num_registered_minibatches(self): + # TODO(b/69411207): Add support for registering additional minibatches. + return 1 + + def instantiate_factors(self, grads_list, damping): + + self._input_factor = self._layer_collection.make_or_get_factor( + fisher_factors.FullyConnectedMultiKF, ((self._inputs,), self._has_bias)) + + self._output_factor = self._layer_collection.make_or_get_factor( + fisher_factors.FullyConnectedMultiKF, (grads_list,)) + + damping /= self._num_timesteps**NORMALIZE_DAMPING_POWER + + pi = utils.compute_pi(self._input_factor.get_cov(), + self._output_factor.get_cov()) + + self._damping_input = (damping**0.5) * pi + self._damping_output = (damping**0.5) / pi + + if self._option == SeriesFBApproximation.option1: + self._input_factor.register_option1quants(self._damping_input) + self._output_factor.register_option1quants(self._damping_output) + elif self._option == SeriesFBApproximation.option2: + self._input_factor.register_option2quants(self._damping_input) + self._output_factor.register_option2quants(self._damping_output) + else: + raise ValueError( + "Unrecognized FullyConnectedSeriesFB approximation: {}".format( + self._option)) + + def multiply_inverse(self, vector): + # pylint: disable=invalid-name + + Z = utils.layer_params_to_mat2d(vector) + + # Derivations were done for "batch_dim==1" case so we need to convert to + # that orientation: + Z = array_ops.transpose(Z) + + if self._option == SeriesFBApproximation.option1: + + # Note that L_A = A0^(-1/2) * U_A and L_G = G0^(-1/2) * U_G. + L_A, psi_A = self._input_factor.get_option1quants(self._damping_input) + L_G, psi_G = self._output_factor.get_option1quants(self._damping_output) + + def gamma(x): + # We are assuming that each case has the same number of time-steps. + # If this stops being the case one shouldn't simply replace this T + # with its average value. Instead, one needs to go back to the + # definition of the gamma function from the paper. + T = self._num_timesteps + return (1 - x)**2 / (T * (1 - x**2) - 2 * x * (1 - x**T)) + + # Y = gamma( psi_G*psi_A^T ) (computed element-wise) + # Even though Y is Z-independent we are recomputing it from the psi's + # each since Y depends on both A and G quantities, and it is relatively + # cheap to compute. + Y = gamma(array_ops.reshape(psi_G, [int(psi_G.shape[0]), -1]) * psi_A) + + # Z = L_G^T * Z * L_A + # This is equivalent to the following computation from the original + # pseudo-code: + # Z = G0^(-1/2) * Z * A0^(-1/2) + # Z = U_G^T * Z * U_A + Z = math_ops.matmul(L_G, math_ops.matmul(Z, L_A), transpose_a=True) + + # Z = Z .* Y + Z *= Y + + # Z = L_G * Z * L_A^T + # This is equivalent to the following computation from the original + # pseudo-code: + # Z = U_G * Z * U_A^T + # Z = G0^(-1/2) * Z * A0^(-1/2) + Z = math_ops.matmul(L_G, math_ops.matmul(Z, L_A, transpose_b=True)) + + elif self._option == SeriesFBApproximation.option2: + + # Note that P_A = A_1^T * A_0^(-1) and P_G = G_1^T * G_0^(-1), + # and K_A = A_0^(-1/2) * E_A and K_G = G_0^(-1/2) * E_G. + P_A, K_A, mu_A = self._input_factor.get_option2quants(self._damping_input) + P_G, K_G, mu_G = self._output_factor.get_option2quants( + self._damping_output) + + # Our approach differs superficially from the pseudo-code in the paper + # in order to reduce the total number of matrix-matrix multiplies. + # In particular, the first three computations in the pseudo code are + # Z = G0^(-1/2) * Z * A0^(-1/2) + # Z = Z - hPsi_G^T * Z * hPsi_A + # Z = E_G^T * Z * E_A + # Noting that hPsi = C0^(-1/2) * C1 * C0^(-1/2), so that + # C0^(-1/2) * hPsi = C0^(-1) * C1 * C0^(-1/2) = P^T * C0^(-1/2) + # the entire computation can be written as + # Z = E_G^T * (G0^(-1/2) * Z * A0^(-1/2) + # - hPsi_G^T * G0^(-1/2) * Z * A0^(-1/2) * hPsi_A) * E_A + # = E_G^T * (G0^(-1/2) * Z * A0^(-1/2) + # - G0^(-1/2) * P_G * Z * P_A^T * A0^(-1/2)) * E_A + # = E_G^T * G0^(-1/2) * Z * A0^(-1/2) * E_A + # - E_G^T* G0^(-1/2) * P_G * Z * P_A^T * A0^(-1/2) * E_A + # = K_G^T * Z * K_A - K_G^T * P_G * Z * P_A^T * K_A + # This final expression is computed by the following two lines: + # Z = Z - P_G * Z * P_A^T + Z -= math_ops.matmul(P_G, math_ops.matmul(Z, P_A, transpose_b=True)) + # Z = K_G^T * Z * K_A + Z = math_ops.matmul(K_G, math_ops.matmul(Z, K_A), transpose_a=True) + + # Z = Z ./ (1*1^T - mu_G*mu_A^T) + # Be careful with the outer product. We don't want to accidentally + # make it an inner-product instead. + tmp = 1.0 - array_ops.reshape(mu_G, [int(mu_G.shape[0]), -1]) * mu_A + # Prevent some numerical issues by setting 0 eigs to 1.0 + tmp += 1.0 * array_ops.cast(math_ops.equal(tmp, 0.0), dtype=tmp.dtype) + Z /= tmp + + # We now perform the transpose/reverse version of the operations + # derived above, whose derivation from the original pseudo-code is + # analgous. + # Z = K_G * Z * K_A^T + Z = math_ops.matmul(K_G, math_ops.matmul(Z, K_A, transpose_b=True)) + + # Z = Z - P_G^T * Z * P_A + Z -= math_ops.matmul(P_G, math_ops.matmul(Z, P_A), transpose_a=True) + + # Z = normalize (1/E[T]) * Z + # Note that this normalization is done because we compute the statistics + # by averaging, not summing, over time. (And the gradient is presumably + # summed over time, not averaged, and thus their scales are different.) + Z /= array_ops.cast(self._num_timesteps, Z.dtype) + + # Convert back to the "batch_dim==0" orientation. + Z = array_ops.transpose(Z) + + return utils.mat2d_to_layer_params(vector, Z) + + # pylint: enable=invalid-name + + def multiply(self, vector): + raise NotImplementedError + + def tensors_to_compute_grads(self): + return self._outputs + + def num_inputs(self): + return len(self._inputs) diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors.py b/tensorflow/contrib/kfac/python/ops/fisher_factors.py index 6c1dd0ae40..ff8636785a 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_factors.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_factors.py @@ -27,6 +27,8 @@ import six from tensorflow.contrib.kfac.python.ops import utils from tensorflow.python.framework import ops as tf_ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import init_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import special_math_ops @@ -101,7 +103,7 @@ def diagonal_covariance_initializer(shape, dtype, partition_info): # pylint: di return array_ops.ones(shape, dtype) -def _compute_cov(tensor, normalizer=None): +def _compute_cov(tensor, tensor_right=None, normalizer=None): """Compute the empirical second moment of the rows of a 2D Tensor. This function is meant to be applied to random matrices for which the true row @@ -109,6 +111,8 @@ def _compute_cov(tensor, normalizer=None): Args: tensor: A 2D Tensor. + tensor_right: An optional 2D Tensor. If provided, this function computes + the matrix product tensor^T * tensor_right instead of tensor^T * tensor. normalizer: optional scalar for the estimator (by default, the normalizer is the number of rows of tensor). @@ -117,9 +121,14 @@ def _compute_cov(tensor, normalizer=None): """ if normalizer is None: normalizer = array_ops.shape(tensor)[0] - cov = (math_ops.matmul(tensor, tensor, transpose_a=True) / math_ops.cast( - normalizer, tensor.dtype)) - return (cov + array_ops.transpose(cov)) / math_ops.cast(2, cov.dtype) + if tensor_right is None: + cov = ( + math_ops.matmul(tensor, tensor, transpose_a=True) / math_ops.cast( + normalizer, tensor.dtype)) + return (cov + array_ops.transpose(cov)) / math_ops.cast(2.0, cov.dtype) + else: + return (math_ops.matmul(tensor, tensor_right, transpose_a=True) / + math_ops.cast(normalizer, tensor.dtype)) def _append_homog(tensor): @@ -135,7 +144,7 @@ def _append_homog(tensor): rank = len(tensor.shape.as_list()) shape = array_ops.concat([array_ops.shape(tensor)[:-1], [1]], axis=0) ones = array_ops.ones(shape, dtype=tensor.dtype) - return array_ops.concat([tensor, ones], axis=rank-1) + return array_ops.concat([tensor, ones], axis=rank - 1) def scope_string_from_params(params): @@ -173,8 +182,8 @@ def scope_string_from_params(params): elif isinstance(param, (tf_ops.Tensor, variables.Variable)): name_parts.append(scope_string_from_name(param)) else: - raise ValueError( - "Encountered an unsupported param type {}".format(type(param))) + raise ValueError("Encountered an unsupported param type {}".format( + type(param))) return "_".join(name_parts) @@ -225,6 +234,10 @@ class FisherFactor(object): """ pass + @abc.abstractproperty + def _dtype(self): + pass + @property def _cov_initializer(self): return covariance_initializer @@ -236,7 +249,8 @@ class FisherFactor(object): "cov", initializer=self._cov_initializer, shape=self._cov_shape, - trainable=False) + trainable=False, + dtype=self._dtype) @abc.abstractmethod def _compute_new_cov(self, idx=0): @@ -273,6 +287,13 @@ class InverseProvidingFactor(FisherFactor): _cov_shape properties. """ + # TODO(b/69108481): This class (and its subclasses) should be refactored to + # serve the matrix quantities it computes as both (potentially stale) + # variables, updated by the inverse update ops, and fresh values stored in + # tensors that recomputed once every session.run() call. Currently matpower + # and damp_inverse have the former behavior, while eigendecomposition has + # the latter. + def __init__(self): self._inverses_by_damping = {} self._matpower_by_exp_and_damping = {} @@ -293,7 +314,8 @@ class InverseProvidingFactor(FisherFactor): "inv_damp{}".format(damping_string), initializer=inverse_initializer, shape=self._cov_shape, - trainable=False) + trainable=False, + dtype=self._dtype) self._inverses_by_damping[damping] = inv def register_matpower(self, exp, damping): @@ -311,7 +333,8 @@ class InverseProvidingFactor(FisherFactor): "matpower_exp{}_damp{}".format(exp_string, damping_string), initializer=inverse_initializer, shape=self._cov_shape, - trainable=False) + trainable=False, + dtype=self._dtype) self._matpower_by_exp_and_damping[(exp, damping)] = matpower def register_eigendecomp(self): @@ -325,8 +348,9 @@ class InverseProvidingFactor(FisherFactor): num_inverses = len(self._inverses_by_damping) matrix_power_registered = bool(self._matpower_by_exp_and_damping) - use_eig = (self._eigendecomp or matrix_power_registered or - num_inverses >= EIGENVALUE_DECOMPOSITION_THRESHOLD) + use_eig = ( + self._eigendecomp or matrix_power_registered or + num_inverses >= EIGENVALUE_DECOMPOSITION_THRESHOLD) if use_eig: self.register_eigendecomp() # ensures self._eigendecomp is set @@ -347,21 +371,30 @@ class InverseProvidingFactor(FisherFactor): for (exp, damping), matpower in self._matpower_by_exp_and_damping.items(): ops.append( matpower.assign( - math_ops.matmul(eigenvectors * (clipped_eigenvalues + damping)** - exp, array_ops.transpose(eigenvectors)))) + math_ops.matmul(eigenvectors * + (clipped_eigenvalues + damping)**exp, + array_ops.transpose(eigenvectors)))) + # These ops share computation and should be run on a single device. + ops = [control_flow_ops.group(ops)] else: for damping, inv in self._inverses_by_damping.items(): ops.append(inv.assign(utils.posdef_inv(self._cov, damping))) return ops - def get_inverse(self, damping): + def get_damped_inverse(self, damping): return self._inverses_by_damping[damping] def get_matpower(self, exp, damping): + # Note that this function returns a variable which gets updated by the + # inverse ops. It may be stale / inconsistent with the latest value of + # get_cov(). return self._matpower_by_exp_and_damping[(exp, damping)] def get_eigendecomp(self): + # Unlike get_inverse and get_matpower this doesn't retrieve a stored + # variable, but instead always computes a fresh version from the current + # value of get_cov(). return self._eigendecomp @@ -402,6 +435,10 @@ class FullFactor(InverseProvidingFactor): def _num_sources(self): return len(self._params_grads_flat) + @property + def _dtype(self): + return self._params_grads_flat[0].dtype + def _compute_new_cov(self, idx=0): # This will be a very basic rank 1 estimate with _maybe_colocate_with(self._params_grads_flat[idx], @@ -458,6 +495,10 @@ class NaiveDiagonalFactor(DiagonalFactor): def _num_sources(self): return len(self._params_grads) + @property + def _dtype(self): + return self._params_grads[0].dtype + def _compute_new_cov(self, idx=0): with _maybe_colocate_with(self._params_grads[idx], self._colocate_cov_ops_with_inputs): @@ -497,8 +538,8 @@ class FullyConnectedDiagonalFactor(DiagonalFactor): self._outputs_grads = outputs_grads self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs self._batch_size = array_ops.shape(inputs)[0] - self._orig_tensors_name = scope_string_from_params((inputs,) + - tuple(outputs_grads)) + self._orig_tensors_name = scope_string_from_params( + (inputs,) + tuple(outputs_grads)) # Note that we precompute the required operations on the inputs since the # inputs don't change with the 'idx' argument to _compute_new_cov. (Only @@ -522,6 +563,10 @@ class FullyConnectedDiagonalFactor(DiagonalFactor): def _num_sources(self): return len(self._outputs_grads) + @property + def _dtype(self): + return self._outputs_grads[0].dtype + def _compute_new_cov(self, idx=0): # The well-known special formula that uses the fact that the entry-wise # square of an outer product is the outer-product of the entry-wise squares. @@ -572,8 +617,8 @@ class ConvDiagonalFactor(DiagonalFactor): self._outputs_grads = outputs_grads self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs - self._orig_tensors_name = scope_string_from_name((inputs,) - + tuple(outputs_grads)) + self._orig_tensors_name = scope_string_from_name( + (inputs,) + tuple(outputs_grads)) # Note that we precompute the required operations on the inputs since the # inputs don't change with the 'idx' argument to _compute_new_cov. (Only @@ -604,13 +649,19 @@ class ConvDiagonalFactor(DiagonalFactor): @property def _cov_shape(self): filter_height, filter_width, in_channels, out_channels = self._filter_shape - return [filter_height * filter_width * in_channels + self._has_bias, - out_channels] + return [ + filter_height * filter_width * in_channels + self._has_bias, + out_channels + ] @property def _num_sources(self): return len(self._outputs_grads) + @property + def _dtype(self): + return self._outputs_grads[0].dtype + def _compute_new_cov(self, idx=0): with _maybe_colocate_with(self._outputs_grads[idx], self._colocate_cov_ops_with_inputs): @@ -644,8 +695,7 @@ class FullyConnectedKroneckerFactor(InverseProvidingFactor): Args: tensors: List of Tensors of shape [batch_size, n]. Represents either a layer's inputs or its output's gradients. - has_bias: bool. If True, assume this factor is for the layer's inputs and - append '1' to each row. + has_bias: bool. If True, append '1' to each row. colocate_cov_ops_with_inputs: Whether to colocate cov_update ops with their inputs. """ @@ -670,6 +720,10 @@ class FullyConnectedKroneckerFactor(InverseProvidingFactor): def _num_sources(self): return len(self._tensors) + @property + def _dtype(self): + return self._tensors[0].dtype + def _compute_new_cov(self, idx=0): with _maybe_colocate_with(self._tensors[idx], self._colocate_cov_ops_with_inputs): @@ -735,6 +789,10 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): def _num_sources(self): return 1 + @property + def _dtype(self): + return self._inputs.dtype + def _compute_new_cov(self, idx=0): if idx != 0: raise ValueError("ConvInputKroneckerFactor only supports idx = 0") @@ -799,9 +857,288 @@ class ConvOutputKroneckerFactor(InverseProvidingFactor): def _num_sources(self): return len(self._outputs_grads) + @property + def _dtype(self): + return self._outputs_grads[0].dtype + def _compute_new_cov(self, idx=0): with _maybe_colocate_with(self._outputs_grads[idx], self._colocate_cov_ops_with_inputs): reshaped_tensor = array_ops.reshape(self._outputs_grads[idx], [-1, self._out_channels]) return _compute_cov(reshaped_tensor) + + +class FullyConnectedMultiKF(InverseProvidingFactor): + """Kronecker factor for a fully connected recurrent layer.""" + + def __init__(self, + tensor_lists, + has_bias=False, + colocate_cov_ops_with_inputs=False): + """Constructs a new `FullyConnectedMultiKF`. + + Args: + tensor_lists: List of lists of Tensors of shape [batch_size, n]. Layer + inputs at each timestep. + has_bias: bool. If True, assume this factor is for the layer's inputs and + append '1' to each row. + colocate_cov_ops_with_inputs: Whether to colocate cov_update ops with + their inputs. + """ + + self._orig_tensors_name = scope_string_from_params(tensor_lists) + self._batch_size = array_ops.shape(tensor_lists[0][0])[0] + self._num_timesteps = len(tensor_lists[0]) + + tensors = tuple( + array_ops.concat(tensor_list, 0) for tensor_list in tensor_lists) + if has_bias: + tensors = tuple(_append_homog(tensor) for tensor in tensors) + self._tensors = tensors + + self._cov_dt1 = None + self._option1quants_by_damping = {} + self._option2quants_by_damping = {} + self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs + + super(FullyConnectedMultiKF, self).__init__() + + @property + def _var_scope(self): + return "ff_fc_multi/" + self._orig_tensors_name + + @property + def _num_sources(self): + return len(self._tensors) + + @property + def _dtype(self): + return self._tensors[0].dtype + + def make_covariance_update_op(self, ema_decay): + with _maybe_colocate_with(self._tensors, + self._colocate_cov_ops_with_inputs): + op = super(FullyConnectedMultiKF, + self).make_covariance_update_op(ema_decay) + + if self._cov_dt1 is not None: + new_cov_dt1 = math_ops.add_n( + tuple( + self._compute_new_cov_dt1(idx) + for idx in range(self._num_sources))) + op2 = moving_averages.assign_moving_average( + self._cov_dt1, new_cov_dt1, ema_decay, zero_debias=ZERO_DEBIAS) + + # TODO(b/69112164): + # It's important that _cov and _cov_dt1 remain consistent with each + # other while the inverse ops are happening. How can we ensure this? + # We will need to add explicit synchronization for this to + # work with asynchronous training. + op = control_flow_ops.group(op, op2) + + return op + + def _compute_new_cov(self, idx=0): + tensor = self._tensors[idx] + normalizer = self._num_timesteps * self._batch_size + return _compute_cov(tensor, normalizer=normalizer) + + def _compute_new_cov_dt1(self, idx=0): + tensor = self._tensors[idx] + normalizer = self._num_timesteps * self._batch_size + tensor_present = tensor[:-self._batch_size, :] + tensor_future = tensor[self._batch_size:, :] + return _compute_cov( + tensor_future, tensor_right=tensor_present, normalizer=normalizer) + + @property + def _cov_shape(self): + size = self._tensors[0].shape[1] + return [size, size] + + @property + def _vec_shape(self): + size = self._tensors[0].shape[1] + return [size] + + def get_option1quants(self, damping): + return self._option1quants_by_damping[damping] + + def get_option2quants(self, damping): + return self._option2quants_by_damping[damping] + + def get_cov_dt1(self): + assert self._cov_dt1 is not None + return self._cov_dt1 + + def register_cov_dt1(self): + """Create a variable representing temporal cross-covariance. + + This is technically the second moment, not covariance, since it's + not mean subtracted. + """ + if self._cov_dt1 is None: + with variable_scope.variable_scope(self._var_scope): + self._cov_dt1 = variable_scope.get_variable( + "cov_dt1", + initializer=self._cov_initializer, + shape=self._cov_shape, + trainable=False, + dtype=self._dtype) + + def register_option1quants(self, damping): + + self.register_eigendecomp() + self.register_cov_dt1() + + if damping not in self._option1quants_by_damping: + # It's questionable as to whether we should initialize with stuff like + # this at all. Ideally these values should never be used until they are + # updated at least once. + damping_string = scalar_or_tensor_to_string(damping) + with variable_scope.variable_scope(self._var_scope): + Lmat = variable_scope.get_variable( # pylint: disable=invalid-name + "Lmat_damp{}".format(damping_string), + initializer=inverse_initializer, + shape=self._cov_shape, + trainable=False, + dtype=self._dtype) + psi = variable_scope.get_variable( + "psi_damp{}".format(damping_string), + initializer=init_ops.ones_initializer, + shape=self._vec_shape, + trainable=False, + dtype=self._dtype) + + self._option1quants_by_damping[damping] = (Lmat, psi) + + def register_option2quants(self, damping): + + self.register_eigendecomp() + self.register_cov_dt1() + + if damping not in self._option2quants_by_damping: + # It's questionable as to whether we should initialize with stuff like + # this at all. Ideally these values should never be used until they are + # updated at least once. + damping_string = scalar_or_tensor_to_string(damping) + with variable_scope.variable_scope(self._var_scope): + Pmat = variable_scope.get_variable( # pylint: disable=invalid-name + "Lmat_damp{}".format(damping_string), + initializer=inverse_initializer, + shape=self._cov_shape, + trainable=False, + dtype=self._dtype) + Kmat = variable_scope.get_variable( # pylint: disable=invalid-name + "Kmat_damp{}".format(damping_string), + initializer=inverse_initializer, + shape=self._cov_shape, + trainable=False, + dtype=self._dtype) + mu = variable_scope.get_variable( + "mu_damp{}".format(damping_string), + initializer=init_ops.ones_initializer, + shape=self._vec_shape, + trainable=False, + dtype=self._dtype) + + self._option2quants_by_damping[damping] = (Pmat, Kmat, mu) + + def make_inverse_updates_ops(self): + """Create and return update ops corresponding to registered computations.""" + # TODO(b/69918258): Add correctness tests for this method. + # pylint: disable=invalid-name + + ops = super(FullyConnectedMultiKF, self).make_inverse_update_ops() + + if (len(self._option1quants_by_damping) + + len(self._option2quants_by_damping)): + + # Note that C0 and C1 are stand-ins for A0 and A1, or G0 and G1, from + # the pseudo-code in the original paper. Because the computations for + # the A and G case are essentially the same they can both be performed by + # the same class (this one). + + C1 = self.get_cov_dt1() + + # Get the eigendecomposition of C0 (= self.get_cov()) + eigen_e, eigen_V = self.get_eigendecomp() + + # TODO(b/69678661): Note, there is an implicit assumption here that C1 + # and C0 (as represented here by its eigen-decomp) are consistent. This + # could fail to be the case if self._cov and self._cov_dt1 are not updated + # consistently, or are somehow read between or during the cov updates. + # Can this possibly happen? Is there a way to prevent it? + + for damping, (Lmat_var, + psi_var) in self._option1quants_by_damping.items(): + + invsqrtC0 = math_ops.matmul( + eigen_V * (eigen_e + damping)**(-0.5), eigen_V, transpose_b=True) + + # Might need to enforce symmetry lost due to numerical issues. + invsqrtC0 = (invsqrtC0 + array_ops.transpose(invsqrtC0)) / 2.0 + + # The following line imposses the symmetry assumed by "Option 1" on C1. + # Stangely the code can work okay with this line commented out, + # depending on how psd_eig is defined. I'm not sure why. + C1 = (C1 + array_ops.transpose(C1)) / 2.0 + + # hPsi = C0^(-1/2) * C1 * C0^(-1/2) (hPsi means \hat{Psi}) + hPsi = math_ops.matmul(math_ops.matmul(invsqrtC0, C1), invsqrtC0) + + # Compute the decomposition U*diag(psi)*U^T = hPsi + psi, U = utils.psd_eig(hPsi) + + # L = C0^(-1/2) * U + Lmat = math_ops.matmul(invsqrtC0, U) + + ops.append(Lmat_var.assign(Lmat)) + ops.append(psi_var.assign(psi)) + + for damping, (Pmat_var, Kmat_var, + mu_var) in self._option2quants_by_damping.items(): + + # compute C0^(-1/2) + invsqrtC0 = math_ops.matmul( + eigen_V * (eigen_e + damping)**(-0.5), eigen_V, transpose_b=True) + + # Might need to enforce symmetry lost due to numerical issues. + invsqrtC0 = (invsqrtC0 + array_ops.transpose(invsqrtC0)) / 2.0 + + # Compute the product C0^(-1/2) * C1 + invsqrtC0C1 = math_ops.matmul(invsqrtC0, C1) + + # hPsi = C0^(-1/2) * C1 * C0^(-1/2) (hPsi means \hat{Psi}) + hPsi = math_ops.matmul(invsqrtC0C1, invsqrtC0) + + # Compute the decomposition E*diag(mu)*E^T = hPsi^T * hPsi + # Note that we using the notation mu instead of "m" for the eigenvalues. + # Instead of computing the product hPsi^T * hPsi and then doing an + # eigen-decomposition of this we just compute the SVD of hPsi and then + # square the singular values to get the eigenvalues. For a justification + # of this approach, see: + # https://en.wikipedia.org/wiki/Singular-value_decomposition#Relation_to_eigenvalue_decomposition + sqrtmu, _, E = linalg_ops.svd(hPsi) + mu = math_ops.square(sqrtmu) + + # Mathematically, the eigenvalues should not should not exceed 1.0, but + # due to numerical issues, or possible issues with inconsistent + # values of C1 and (the eigen-decomposition of) C0 they might. So + # we enforce this condition. + mu = math_ops.minimum(mu, 1.0) + + # P = (C0^(-1/2) * C1)^T * C0^(-1/2) = C_1^T * C_0^(-1) + Pmat = math_ops.matmul(invsqrtC0C1, invsqrtC0, transpose_a=True) + + # K = C_0^(-1/2) * E + Kmat = math_ops.matmul(invsqrtC0, E) + + ops.append(Pmat_var.assign(Pmat)) + ops.append(Kmat_var.assign(Kmat)) + ops.append(mu_var.assign(mu)) + + return [control_flow_ops.group(ops)] + + # pylint: enable=invalid-name diff --git a/tensorflow/contrib/kfac/python/ops/utils.py b/tensorflow/contrib/kfac/python/ops/utils.py index d5461c9f2e..035f080fdb 100644 --- a/tensorflow/contrib/kfac/python/ops/utils.py +++ b/tensorflow/contrib/kfac/python/ops/utils.py @@ -30,6 +30,7 @@ from tensorflow.python.ops import random_ops # Method used for inverting matrices. POSDEF_INV_METHOD = "cholesky" +POSDEF_EIG_METHOD = "self_adjoint" def set_global_constants(posdef_inv_method=None): @@ -187,7 +188,7 @@ def posdef_inv(tensor, damping): """Computes the inverse of tensor + damping * identity.""" identity = linalg_ops.eye(tensor.shape.as_list()[0], dtype=tensor.dtype) damping = math_ops.cast(damping, dtype=tensor.dtype) - return posdef_inv_funcs[POSDEF_INV_METHOD](tensor, identity, damping) + return posdef_inv_functions[POSDEF_INV_METHOD](tensor, identity, damping) def posdef_inv_matrix_inverse(tensor, identity, damping): @@ -209,13 +210,39 @@ def posdef_inv_eig(tensor, identity, damping): eigenvectors / eigenvalues, eigenvectors, transpose_b=True) -posdef_inv_funcs = { +posdef_inv_functions = { "matrix_inverse": posdef_inv_matrix_inverse, "cholesky": posdef_inv_cholesky, "eig": posdef_inv_eig, } +def posdef_eig(mat): + """Computes the eigendecomposition of a positive semidefinite matrix.""" + return posdef_eig_functions[POSDEF_EIG_METHOD](mat) + + +def posdef_eig_svd(mat): + """Computes the singular values and left singular vectors of a matrix.""" + evals, evecs, _ = linalg_ops.svd(mat) + + return evals, evecs + + +def posdef_eig_self_adjoint(mat): + """Computes eigendecomposition using self_adjoint_eig.""" + evals, evecs = linalg_ops.self_adjoint_eig(mat) + evals = math_ops.abs(evals) # Should be equivalent to svd approach. + + return evals, evecs + + +posdef_eig_functions = { + "self_adjoint": posdef_eig_self_adjoint, + "svd": posdef_eig_svd, +} + + class SubGraph(object): """Defines a subgraph given by all the dependencies of a given set of outputs. """ -- GitLab From 6b6244c40197b34f49bb50aa52efb082380d4637 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 30 Nov 2017 23:58:26 -0800 Subject: [PATCH 1031/1801] Build demo app for SmartReply PiperOrigin-RevId: 177559103 --- tensorflow/contrib/lite/build_def.bzl | 5 +- .../contrib/lite/models/smartreply/BUILD | 85 ++++++++++++ .../demo/app/src/main/AndroidManifest.xml | 38 ++++++ .../models/smartreply/demo/app/src/main/BUILD | 65 +++++++++ .../smartreply/demo/app/src/main/assets/BUILD | 15 ++ .../app/src/main/assets/backoff_response.txt | 16 +++ .../android/smartreply/MainActivity.java | 99 ++++++++++++++ .../android/smartreply/SmartReply.java | 44 ++++++ .../android/smartreply/SmartReplyClient.java | 129 ++++++++++++++++++ .../app/src/main/res/layout/main_activity.xml | 44 ++++++ .../demo/app/src/main/smartreply_jni.cc | 129 ++++++++++++++++++ .../models/smartreply/ops/extract_feature.cc | 9 +- .../lite/models/smartreply/ops/normalize.cc | 7 +- .../lite/models/smartreply/predictor.cc | 21 +-- .../lite/models/smartreply/predictor.h | 12 +- .../lite/models/smartreply/predictor_test.cc | 9 +- tensorflow/contrib/lite/tools/BUILD | 1 + .../lite/tools/gen_op_registration_main.cc | 48 +++++-- .../contrib/lite/tools/mutable_op_resolver.h | 2 +- tensorflow/workspace.bzl | 18 ++- third_party/tflite_smartreply.BUILD | 13 ++ 21 files changed, 758 insertions(+), 51 deletions(-) create mode 100644 tensorflow/contrib/lite/models/smartreply/demo/app/src/main/AndroidManifest.xml create mode 100644 tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD create mode 100644 tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/BUILD create mode 100644 tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/backoff_response.txt create mode 100644 tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/MainActivity.java create mode 100644 tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReply.java create mode 100644 tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReplyClient.java create mode 100644 tensorflow/contrib/lite/models/smartreply/demo/app/src/main/res/layout/main_activity.xml create mode 100644 tensorflow/contrib/lite/models/smartreply/demo/app/src/main/smartreply_jni.cc create mode 100644 third_party/tflite_smartreply.BUILD diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl index e3c9cdd99b..5813b3de4d 100644 --- a/tensorflow/contrib/lite/build_def.bzl +++ b/tensorflow/contrib/lite/build_def.bzl @@ -223,11 +223,12 @@ def gen_selected_ops(name, model): """ out = name + "_registration.cc" tool = "//tensorflow/contrib/lite/tools:generate_op_registrations" + tflite_path = "//tensorflow/contrib/lite" native.genrule( name = name, srcs = [model], outs = [out], - cmd = ("$(location %s) --input_model=$(location %s) --output_registration=$(location %s)") - % (tool, model, out), + cmd = ("$(location %s) --input_model=$(location %s) --output_registration=$(location %s) --tflite_path=%s") + % (tool, model, out, tflite_path[2:]), tools = [tool], ) diff --git a/tensorflow/contrib/lite/models/smartreply/BUILD b/tensorflow/contrib/lite/models/smartreply/BUILD index fbdf19f205..733c3f4c7f 100644 --- a/tensorflow/contrib/lite/models/smartreply/BUILD +++ b/tensorflow/contrib/lite/models/smartreply/BUILD @@ -1,7 +1,92 @@ package(default_visibility = ["//visibility:public"]) +load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts", "gen_selected_ops") + licenses(["notice"]) # Apache 2.0 +gen_selected_ops( + name = "smartreply_ops", + model = "@tflite_smartreply//:smartreply.tflite", +) + +cc_library( + name = "custom_ops", + srcs = [ + "ops/extract_feature.cc", + "ops/normalize.cc", + "ops/predict.cc", + ":smartreply_ops", + ], + copts = tflite_copts(), + deps = [ + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite:string_util", + "//tensorflow/contrib/lite/kernels:builtin_ops", + "//tensorflow/contrib/lite/tools:mutable_op_resolver", + "@com_google_absl//absl/strings", + "@com_googlesource_code_re2//:re2", + "@farmhash_archive//:farmhash", + ], +) + +cc_library( + name = "predictor_lib", + srcs = ["predictor.cc"], + hdrs = ["predictor.h"], + copts = tflite_copts(), + deps = [ + ":custom_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite:string_util", + "//tensorflow/contrib/lite/kernels:builtin_ops", + "//tensorflow/contrib/lite/tools:mutable_op_resolver", + "@com_google_absl//absl/strings", + "@com_googlesource_code_re2//:re2", + ], +) + +cc_test( + name = "extract_feature_op_test", + size = "small", + srcs = ["ops/extract_feature_test.cc"], + deps = [ + ":custom_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:builtin_ops", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + "@farmhash_archive//:farmhash", + ], +) + +cc_test( + name = "normalize_op_test", + size = "small", + srcs = ["ops/normalize_test.cc"], + deps = [ + ":custom_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite:string_util", + "//tensorflow/contrib/lite/kernels:builtin_ops", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_test( + name = "predict_op_test", + size = "small", + srcs = ["ops/predict_test.cc"], + deps = [ + ":custom_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite:string_util", + "//tensorflow/contrib/lite/kernels:builtin_ops", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/AndroidManifest.xml b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/AndroidManifest.xml new file mode 100644 index 0000000000..75ed9432c8 --- /dev/null +++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/AndroidManifest.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD new file mode 100644 index 0000000000..f8767b443a --- /dev/null +++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD @@ -0,0 +1,65 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +load( + "//tensorflow/contrib/lite:build_def.bzl", + "tflite_copts", + "tflite_jni_binary", +) + +filegroup( + name = "assets", + srcs = [ + "@tflite_smartreply//:model_files", + ], +) + +android_binary( + name = "SmartReplyDemo", + srcs = glob(["java/**/*.java"]), + assets = [":assets"], + assets_dir = "", + custom_package = "com.example.android.smartreply", + manifest = "AndroidManifest.xml", + nocompress_extensions = [ + ".tflite", + ], + resource_files = glob(["res/**"]), + tags = ["manual"], + deps = [ + ":smartreply_runtime", + "@androidsdk//com.android.support:support-v13-25.2.0", + "@androidsdk//com.android.support:support-v4-25.2.0", + ], +) + +cc_library( + name = "smartreply_runtime", + srcs = ["libsmartreply_jni.so"], + visibility = ["//visibility:public"], +) + +tflite_jni_binary( + name = "libsmartreply_jni.so", + deps = [ + ":smartreply_jni_lib", + ], +) + +cc_library( + name = "smartreply_jni_lib", + srcs = [ + "smartreply_jni.cc", + ], + copts = tflite_copts(), + linkopts = [ + "-lm", + "-ldl", + ], + deps = [ + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/models/smartreply:predictor_lib", + ], + alwayslink = 1, +) diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/BUILD b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/BUILD new file mode 100644 index 0000000000..3c882ffc43 --- /dev/null +++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/BUILD @@ -0,0 +1,15 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(glob(["*"])) + +filegroup( + name = "assets_files", + srcs = glob( + ["**/*"], + exclude = [ + "BUILD", + ], + ), +) diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/backoff_response.txt b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/backoff_response.txt new file mode 100644 index 0000000000..a0a5b46b5f --- /dev/null +++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/backoff_response.txt @@ -0,0 +1,16 @@ +Ok +Yes +No +👍 +☺ +😟 +❤️ +Lol +Thanks +Got it +Done +Nice +I don't know +What? +Why? +What's up? diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/MainActivity.java b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/MainActivity.java new file mode 100644 index 0000000000..02fec9ae5e --- /dev/null +++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/MainActivity.java @@ -0,0 +1,99 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package com.example.android.smartreply; + +import android.app.Activity; +import android.os.Bundle; +import android.os.Handler; +import android.util.Log; +import android.view.View; +import android.widget.Button; +import android.widget.EditText; +import android.widget.TextView; + +/** + * The main (and only) activity of this demo app. Displays a text box which updates as messages are + * received. + */ +public class MainActivity extends Activity { + private static final String TAG = "SmartReplyDemo"; + private SmartReplyClient client; + + private Button sendButton; + private TextView messageTextView; + private EditText messageInput; + + private Handler handler; + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + Log.v(TAG, "onCreate"); + setContentView(R.layout.main_activity); + + client = new SmartReplyClient(getApplicationContext()); + handler = new Handler(); + + sendButton = (Button) findViewById(R.id.send_button); + sendButton.setOnClickListener( + (View v) -> { + send(messageInput.getText().toString()); + }); + + messageTextView = (TextView) findViewById(R.id.message_text); + messageInput = (EditText) findViewById(R.id.message_input); + } + + @Override + protected void onStart() { + super.onStart(); + Log.v(TAG, "onStart"); + handler.post( + () -> { + client.loadModel(); + }); + } + + @Override + protected void onStop() { + super.onStop(); + Log.v(TAG, "onStop"); + handler.post( + () -> { + client.unloadModel(); + }); + } + + private void send(final String message) { + handler.post( + () -> { + messageTextView.append("Input: " + message + "\n"); + + SmartReply[] ans = client.predict(new String[] {message}); + for (SmartReply reply : ans) { + appendMessage("Reply: " + reply.getText()); + } + appendMessage("------"); + }); + } + + private void appendMessage(final String message) { + handler.post( + () -> { + messageTextView.append(message + "\n"); + }); + } +} diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReply.java b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReply.java new file mode 100644 index 0000000000..3357fd17c1 --- /dev/null +++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReply.java @@ -0,0 +1,44 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package com.example.android.smartreply; + +import android.support.annotation.Keep; + +/** + * SmartReply contains predicted message, and confidence. + * + *

    NOTE: this class used by JNI, class name and constructor should not be obfuscated. + */ +@Keep +public class SmartReply { + + private final String text; + private final float score; + + @Keep + public SmartReply(String text, float score) { + this.text = text; + this.score = score; + } + + public String getText() { + return text; + } + + public float getScore() { + return score; + } +} diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReplyClient.java b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReplyClient.java new file mode 100644 index 0000000000..d5b1ac0ffb --- /dev/null +++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReplyClient.java @@ -0,0 +1,129 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package com.example.android.smartreply; + +import android.content.Context; +import android.content.res.AssetFileDescriptor; +import android.support.annotation.Keep; +import android.support.annotation.WorkerThread; +import android.util.Log; +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.util.ArrayList; +import java.util.List; + +/** Interface to load TfLite model and provide predictions. */ +public class SmartReplyClient implements AutoCloseable { + private static final String TAG = "SmartReplyDemo"; + private static final String MODEL_PATH = "smartreply.tflite"; + private static final String BACKOFF_PATH = "backoff_response.txt"; + private static final String JNI_LIB = "smartreply_jni"; + + private final Context context; + private long storage; + private MappedByteBuffer model; + + private volatile boolean isLibraryLoaded; + + public SmartReplyClient(Context context) { + this.context = context; + } + + public boolean isLoaded() { + return storage != 0; + } + + @WorkerThread + public synchronized void loadModel() { + if (!isLibraryLoaded) { + System.loadLibrary(JNI_LIB); + isLibraryLoaded = true; + } + + try { + model = loadModelFile(); + String[] backoff = loadBackoffList(); + storage = loadJNI(model, backoff); + } catch (IOException e) { + Log.e(TAG, "Fail to load model", e); + return; + } + } + + @WorkerThread + public synchronized SmartReply[] predict(String[] input) { + if (storage != 0) { + return predictJNI(storage, input); + } else { + return new SmartReply[] {}; + } + } + + @WorkerThread + public synchronized void unloadModel() { + close(); + } + + @Override + public synchronized void close() { + if (storage != 0) { + unloadJNI(storage); + storage = 0; + } + } + + private MappedByteBuffer loadModelFile() throws IOException { + AssetFileDescriptor fileDescriptor = context.getAssets().openFd(MODEL_PATH); + FileInputStream inputStream = new FileInputStream(fileDescriptor.getFileDescriptor()); + try { + FileChannel fileChannel = inputStream.getChannel(); + long startOffset = fileDescriptor.getStartOffset(); + long declaredLength = fileDescriptor.getDeclaredLength(); + return fileChannel.map(FileChannel.MapMode.READ_ONLY, startOffset, declaredLength); + } finally { + inputStream.close(); + } + } + + private String[] loadBackoffList() throws IOException { + List labelList = new ArrayList(); + BufferedReader reader = + new BufferedReader(new InputStreamReader(context.getAssets().open(BACKOFF_PATH))); + String line; + while ((line = reader.readLine()) != null) { + if (!line.isEmpty()) { + labelList.add(line); + } + } + reader.close(); + String[] ans = new String[labelList.size()]; + labelList.toArray(ans); + return ans; + } + + @Keep + private native long loadJNI(MappedByteBuffer buffer, String[] backoff); + + @Keep + private native SmartReply[] predictJNI(long storage, String[] text); + + @Keep + private native void unloadJNI(long storage); +} diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/res/layout/main_activity.xml b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/res/layout/main_activity.xml new file mode 100644 index 0000000000..23b4cadc00 --- /dev/null +++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/res/layout/main_activity.xml @@ -0,0 +1,44 @@ + + + + + + + + + + +